]> git.ipfire.org Git - thirdparty/gcc.git/blame - gcc/config/gcn/gcn.c
substitute @tie{} with a space for the man pages
[thirdparty/gcc.git] / gcc / config / gcn / gcn.c
CommitLineData
99dee823 1/* Copyright (C) 2016-2021 Free Software Foundation, Inc.
5326695a
AS
2
3 This file is free software; you can redistribute it and/or modify it under
4 the terms of the GNU General Public License as published by the Free
5 Software Foundation; either version 3 of the License, or (at your option)
6 any later version.
7
8 This file is distributed in the hope that it will be useful, but WITHOUT
9 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
10 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
11 for more details.
12
13 You should have received a copy of the GNU General Public License
14 along with GCC; see the file COPYING3. If not see
15 <http://www.gnu.org/licenses/>. */
16
17/* {{{ Includes. */
18
19/* We want GET_MODE_SIZE et al to return integers, please. */
20#define IN_TARGET_CODE 1
21
22#include "config.h"
23#include "system.h"
24#include "coretypes.h"
25#include "backend.h"
26#include "target.h"
27#include "memmodel.h"
28#include "rtl.h"
29#include "tree.h"
30#include "df.h"
31#include "tm_p.h"
32#include "stringpool.h"
33#include "optabs.h"
34#include "regs.h"
35#include "emit-rtl.h"
36#include "recog.h"
37#include "diagnostic-core.h"
38#include "insn-attr.h"
39#include "fold-const.h"
40#include "calls.h"
41#include "explow.h"
42#include "expr.h"
43#include "output.h"
44#include "cfgrtl.h"
45#include "langhooks.h"
46#include "builtins.h"
47#include "omp-general.h"
48#include "print-rtl.h"
49#include "attribs.h"
50#include "varasm.h"
51#include "intl.h"
52#include "rtl-iter.h"
53
54/* This file should be included last. */
55#include "target-def.h"
56
57/* }}} */
58/* {{{ Global variables. */
59
60/* Constants used by FP instructions. */
61
62static REAL_VALUE_TYPE dconst4, dconst1over2pi;
63static bool ext_gcn_constants_init = 0;
64
65/* Holds the ISA variant, derived from the command line parameters. */
66
67int gcn_isa = 3; /* Default to GCN3. */
68
69/* Reserve this much space for LDS (for propagating variables from
70 worker-single mode to worker-partitioned mode), per workgroup. Global
71 analysis could calculate an exact bound, but we don't do that yet.
72
86b0eb81 73 We want to permit full occupancy, so size accordingly. */
5326695a 74
86b0eb81
AS
75#define OMP_LDS_SIZE 0x600 /* 0x600 is 1/40 total, rounded down. */
76#define ACC_LDS_SIZE 32768 /* Half of the total should be fine. */
77#define OTHER_LDS_SIZE 65536 /* If in doubt, reserve all of it. */
78
79#define LDS_SIZE (flag_openacc ? ACC_LDS_SIZE \
80 : flag_openmp ? OMP_LDS_SIZE \
81 : OTHER_LDS_SIZE)
5326695a 82
87fdbe69
KCY
83/* The number of registers usable by normal non-kernel functions.
84 The SGPR count includes any special extra registers such as VCC. */
85
f062c3f1 86#define MAX_NORMAL_SGPR_COUNT 62 // i.e. 64 with VCC
87fdbe69
KCY
87#define MAX_NORMAL_VGPR_COUNT 24
88
5326695a
AS
89/* }}} */
90/* {{{ Initialization and options. */
91
92/* Initialize machine_function. */
93
94static struct machine_function *
95gcn_init_machine_status (void)
96{
97 struct machine_function *f;
98
99 f = ggc_cleared_alloc<machine_function> ();
100
101 /* Set up LDS allocation for broadcasting for this function. */
102 f->lds_allocated = 32;
103 f->lds_allocs = hash_map<tree, int>::create_ggc (64);
104
105 /* And LDS temporary decls for worker reductions. */
106 vec_alloc (f->reduc_decls, 0);
107
108 if (TARGET_GCN3)
109 f->use_flat_addressing = true;
110
111 return f;
112}
113
114/* Implement TARGET_OPTION_OVERRIDE.
115
116 Override option settings where defaults are variable, or we have specific
117 needs to consider. */
118
119static void
120gcn_option_override (void)
121{
122 init_machine_status = gcn_init_machine_status;
123
124 /* The HSA runtime does not respect ELF load addresses, so force PIE. */
125 if (!flag_pie)
126 flag_pie = 2;
127 if (!flag_pic)
128 flag_pic = flag_pie;
129
f062c3f1 130 gcn_isa = gcn_arch == PROCESSOR_FIJI ? 3 : 5;
5326695a
AS
131
132 /* The default stack size needs to be small for offload kernels because
133 there may be many, many threads. Also, a smaller stack gives a
134 measureable performance boost. But, a small stack is insufficient
135 for running the testsuite, so we use a larger default for the stand
136 alone case. */
137 if (stack_size_opt == -1)
138 {
139 if (flag_openacc || flag_openmp)
140 /* 512 bytes per work item = 32kB total. */
141 stack_size_opt = 512 * 64;
142 else
143 /* 1MB total. */
144 stack_size_opt = 1048576;
145 }
146}
147
148/* }}} */
149/* {{{ Attributes. */
150
151/* This table defines the arguments that are permitted in
152 __attribute__ ((amdgpu_hsa_kernel (...))).
153
154 The names and values correspond to the HSA metadata that is encoded
155 into the assembler file and binary. */
156
157static const struct gcn_kernel_arg_type
158{
159 const char *name;
160 const char *header_pseudo;
161 machine_mode mode;
162
163 /* This should be set to -1 or -2 for a dynamically allocated register
164 number. Use -1 if this argument contributes to the user_sgpr_count,
165 -2 otherwise. */
166 int fixed_regno;
167} gcn_kernel_arg_types[] = {
168 {"exec", NULL, DImode, EXEC_REG},
169#define PRIVATE_SEGMENT_BUFFER_ARG 1
170 {"private_segment_buffer",
f062c3f1 171 ".amdhsa_user_sgpr_private_segment_buffer", TImode, -1},
5326695a 172#define DISPATCH_PTR_ARG 2
f062c3f1 173 {"dispatch_ptr", ".amdhsa_user_sgpr_dispatch_ptr", DImode, -1},
5326695a 174#define QUEUE_PTR_ARG 3
f062c3f1 175 {"queue_ptr", ".amdhsa_user_sgpr_queue_ptr", DImode, -1},
5326695a 176#define KERNARG_SEGMENT_PTR_ARG 4
f062c3f1
AS
177 {"kernarg_segment_ptr", ".amdhsa_user_sgpr_kernarg_segment_ptr", DImode, -1},
178 {"dispatch_id", ".amdhsa_user_sgpr_dispatch_id", DImode, -1},
5326695a 179#define FLAT_SCRATCH_INIT_ARG 6
f062c3f1 180 {"flat_scratch_init", ".amdhsa_user_sgpr_flat_scratch_init", DImode, -1},
5326695a 181#define FLAT_SCRATCH_SEGMENT_SIZE_ARG 7
f062c3f1
AS
182 {"private_segment_size", ".amdhsa_user_sgpr_private_segment_size", SImode, -1},
183#define WORKGROUP_ID_X_ARG 8
184 {"workgroup_id_X", ".amdhsa_system_sgpr_workgroup_id_x", SImode, -2},
185 {"workgroup_id_Y", ".amdhsa_system_sgpr_workgroup_id_y", SImode, -2},
186 {"workgroup_id_Z", ".amdhsa_system_sgpr_workgroup_id_z", SImode, -2},
187 {"workgroup_info", ".amdhsa_system_sgpr_workgroup_info", SImode, -1},
188#define PRIVATE_SEGMENT_WAVE_OFFSET_ARG 12
5326695a 189 {"private_segment_wave_offset",
f062c3f1
AS
190 ".amdhsa_system_sgpr_private_segment_wavefront_offset", SImode, -2},
191#define WORK_ITEM_ID_X_ARG 13
5326695a 192 {"work_item_id_X", NULL, V64SImode, FIRST_VGPR_REG},
f062c3f1 193#define WORK_ITEM_ID_Y_ARG 14
5326695a 194 {"work_item_id_Y", NULL, V64SImode, FIRST_VGPR_REG + 1},
f062c3f1 195#define WORK_ITEM_ID_Z_ARG 15
5326695a
AS
196 {"work_item_id_Z", NULL, V64SImode, FIRST_VGPR_REG + 2}
197};
198
342f9464
KCY
199static const long default_requested_args
200 = (1 << PRIVATE_SEGMENT_BUFFER_ARG)
201 | (1 << DISPATCH_PTR_ARG)
202 | (1 << QUEUE_PTR_ARG)
203 | (1 << KERNARG_SEGMENT_PTR_ARG)
204 | (1 << PRIVATE_SEGMENT_WAVE_OFFSET_ARG)
205 | (1 << WORKGROUP_ID_X_ARG)
206 | (1 << WORK_ITEM_ID_X_ARG)
207 | (1 << WORK_ITEM_ID_Y_ARG)
208 | (1 << WORK_ITEM_ID_Z_ARG);
209
5326695a
AS
210/* Extract parameter settings from __attribute__((amdgpu_hsa_kernel ())).
211 This function also sets the default values for some arguments.
212
213 Return true on success, with ARGS populated. */
214
215static bool
216gcn_parse_amdgpu_hsa_kernel_attribute (struct gcn_kernel_args *args,
217 tree list)
218{
219 bool err = false;
342f9464 220 args->requested = default_requested_args;
5326695a
AS
221 args->nargs = 0;
222
223 for (int a = 0; a < GCN_KERNEL_ARG_TYPES; a++)
224 args->reg[a] = -1;
225
226 for (; list; list = TREE_CHAIN (list))
227 {
228 const char *str;
229 if (TREE_CODE (TREE_VALUE (list)) != STRING_CST)
230 {
55308fc2 231 error ("%<amdgpu_hsa_kernel%> attribute requires string constant "
5326695a
AS
232 "arguments");
233 break;
234 }
235 str = TREE_STRING_POINTER (TREE_VALUE (list));
236 int a;
237 for (a = 0; a < GCN_KERNEL_ARG_TYPES; a++)
238 {
239 if (!strcmp (str, gcn_kernel_arg_types[a].name))
240 break;
241 }
242 if (a == GCN_KERNEL_ARG_TYPES)
243 {
55308fc2
AS
244 error ("unknown specifier %qs in %<amdgpu_hsa_kernel attribute%>",
245 str);
5326695a
AS
246 err = true;
247 break;
248 }
249 if (args->requested & (1 << a))
250 {
55308fc2 251 error ("duplicated parameter specifier %qs in %<amdgpu_hsa_kernel%> "
5326695a
AS
252 "attribute", str);
253 err = true;
254 break;
255 }
256 args->requested |= (1 << a);
257 args->order[args->nargs++] = a;
258 }
5326695a
AS
259
260 /* Requesting WORK_ITEM_ID_Z_ARG implies requesting WORK_ITEM_ID_X_ARG and
261 WORK_ITEM_ID_Y_ARG. Similarly, requesting WORK_ITEM_ID_Y_ARG implies
262 requesting WORK_ITEM_ID_X_ARG. */
263 if (args->requested & (1 << WORK_ITEM_ID_Z_ARG))
264 args->requested |= (1 << WORK_ITEM_ID_Y_ARG);
265 if (args->requested & (1 << WORK_ITEM_ID_Y_ARG))
266 args->requested |= (1 << WORK_ITEM_ID_X_ARG);
267
5326695a
AS
268 int sgpr_regno = FIRST_SGPR_REG;
269 args->nsgprs = 0;
270 for (int a = 0; a < GCN_KERNEL_ARG_TYPES; a++)
271 {
272 if (!(args->requested & (1 << a)))
273 continue;
274
275 if (gcn_kernel_arg_types[a].fixed_regno >= 0)
276 args->reg[a] = gcn_kernel_arg_types[a].fixed_regno;
277 else
278 {
279 int reg_count;
280
281 switch (gcn_kernel_arg_types[a].mode)
282 {
283 case E_SImode:
284 reg_count = 1;
285 break;
286 case E_DImode:
287 reg_count = 2;
288 break;
289 case E_TImode:
290 reg_count = 4;
291 break;
292 default:
293 gcc_unreachable ();
294 }
295 args->reg[a] = sgpr_regno;
296 sgpr_regno += reg_count;
297 if (gcn_kernel_arg_types[a].fixed_regno == -1)
298 args->nsgprs += reg_count;
299 }
300 }
301 if (sgpr_regno > FIRST_SGPR_REG + 16)
302 {
303 error ("too many arguments passed in sgpr registers");
304 }
305 return err;
306}
307
308/* Referenced by TARGET_ATTRIBUTE_TABLE.
309
310 Validates target specific attributes. */
311
312static tree
313gcn_handle_amdgpu_hsa_kernel_attribute (tree *node, tree name,
314 tree args, int, bool *no_add_attrs)
315{
7039cebf 316 if (!FUNC_OR_METHOD_TYPE_P (*node))
5326695a
AS
317 {
318 warning (OPT_Wattributes, "%qE attribute only applies to functions",
319 name);
320 *no_add_attrs = true;
321 return NULL_TREE;
322 }
323
324 /* Can combine regparm with all attributes but fastcall, and thiscall. */
325 if (is_attribute_p ("gcnhsa_kernel", name))
326 {
327 struct gcn_kernel_args kernelarg;
328
329 if (gcn_parse_amdgpu_hsa_kernel_attribute (&kernelarg, args))
330 *no_add_attrs = true;
331
332 return NULL_TREE;
333 }
334
335 return NULL_TREE;
336}
337
338/* Implement TARGET_ATTRIBUTE_TABLE.
339
340 Create target-specific __attribute__ types. */
341
342static const struct attribute_spec gcn_attribute_table[] = {
343 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler,
344 affects_type_identity } */
345 {"amdgpu_hsa_kernel", 0, GCN_KERNEL_ARG_TYPES, false, true,
346 true, true, gcn_handle_amdgpu_hsa_kernel_attribute, NULL},
347 /* End element. */
348 {NULL, 0, 0, false, false, false, false, NULL, NULL}
349};
350
351/* }}} */
352/* {{{ Registers and modes. */
353
8d0b2b33
AS
354/* Implement TARGET_SCALAR_MODE_SUPPORTED_P. */
355
356bool
357gcn_scalar_mode_supported_p (scalar_mode mode)
358{
359 return (mode == BImode
360 || mode == QImode
361 || mode == HImode /* || mode == HFmode */
362 || mode == SImode || mode == SFmode
363 || mode == DImode || mode == DFmode
364 || mode == TImode);
365}
366
5326695a
AS
367/* Implement TARGET_CLASS_MAX_NREGS.
368
369 Return the number of hard registers needed to hold a value of MODE in
370 a register of class RCLASS. */
371
372static unsigned char
373gcn_class_max_nregs (reg_class_t rclass, machine_mode mode)
374{
375 /* Scalar registers are 32bit, vector registers are in fact tuples of
376 64 lanes. */
377 if (rclass == VGPR_REGS)
378 {
379 if (vgpr_1reg_mode_p (mode))
380 return 1;
381 if (vgpr_2reg_mode_p (mode))
382 return 2;
383 /* TImode is used by DImode compare_and_swap. */
384 if (mode == TImode)
385 return 4;
386 }
387 else if (rclass == VCC_CONDITIONAL_REG && mode == BImode)
388 return 2;
389 return CEIL (GET_MODE_SIZE (mode), 4);
390}
391
392/* Implement TARGET_HARD_REGNO_NREGS.
393
394 Return the number of hard registers needed to hold a value of MODE in
395 REGNO. */
396
397unsigned int
398gcn_hard_regno_nregs (unsigned int regno, machine_mode mode)
399{
400 return gcn_class_max_nregs (REGNO_REG_CLASS (regno), mode);
401}
402
403/* Implement TARGET_HARD_REGNO_MODE_OK.
404
405 Return true if REGNO can hold value in MODE. */
406
407bool
408gcn_hard_regno_mode_ok (unsigned int regno, machine_mode mode)
409{
410 /* Treat a complex mode as if it were a scalar mode of the same overall
411 size for the purposes of allocating hard registers. */
412 if (COMPLEX_MODE_P (mode))
413 switch (mode)
414 {
415 case E_CQImode:
416 case E_CHImode:
417 mode = SImode;
418 break;
419 case E_CSImode:
420 mode = DImode;
421 break;
422 case E_CDImode:
423 mode = TImode;
424 break;
425 case E_HCmode:
426 mode = SFmode;
427 break;
428 case E_SCmode:
429 mode = DFmode;
430 break;
431 default:
432 /* Not supported. */
433 return false;
434 }
435
436 switch (regno)
437 {
438 case FLAT_SCRATCH_LO_REG:
439 case XNACK_MASK_LO_REG:
440 case TBA_LO_REG:
441 case TMA_LO_REG:
442 return (mode == SImode || mode == DImode);
443 case VCC_LO_REG:
444 case EXEC_LO_REG:
445 return (mode == BImode || mode == SImode || mode == DImode);
446 case M0_REG:
447 case FLAT_SCRATCH_HI_REG:
448 case XNACK_MASK_HI_REG:
449 case TBA_HI_REG:
450 case TMA_HI_REG:
451 return mode == SImode;
452 case VCC_HI_REG:
453 return false;
454 case EXEC_HI_REG:
455 return mode == SImode /*|| mode == V32BImode */ ;
456 case SCC_REG:
457 case VCCZ_REG:
458 case EXECZ_REG:
459 return mode == BImode;
460 }
461 if (regno == ARG_POINTER_REGNUM || regno == FRAME_POINTER_REGNUM)
462 return true;
463 if (SGPR_REGNO_P (regno))
464 /* We restrict double register values to aligned registers. */
465 return (sgpr_1reg_mode_p (mode)
466 || (!((regno - FIRST_SGPR_REG) & 1) && sgpr_2reg_mode_p (mode))
467 || (((regno - FIRST_SGPR_REG) & 3) == 0 && mode == TImode));
468 if (VGPR_REGNO_P (regno))
3abfd4f3
AS
469 /* Vector instructions do not care about the alignment of register
470 pairs, but where there is no 64-bit instruction, many of the
471 define_split do not work if the input and output registers partially
472 overlap. We tried to fix this with early clobber and match
473 constraints, but it was bug prone, added complexity, and conflicts
474 with the 'U0' constraints on vec_merge.
475 Therefore, we restrict ourselved to aligned registers. */
476 return (vgpr_1reg_mode_p (mode)
477 || (!((regno - FIRST_VGPR_REG) & 1) && vgpr_2reg_mode_p (mode))
5326695a 478 /* TImode is used by DImode compare_and_swap. */
8ae0de56
AS
479 || (mode == TImode
480 && !((regno - FIRST_VGPR_REG) & 3)));
5326695a
AS
481 return false;
482}
483
484/* Implement REGNO_REG_CLASS via gcn.h.
485
486 Return smallest class containing REGNO. */
487
488enum reg_class
489gcn_regno_reg_class (int regno)
490{
491 switch (regno)
492 {
493 case SCC_REG:
494 return SCC_CONDITIONAL_REG;
9ecf84e6
KCY
495 case VCC_LO_REG:
496 case VCC_HI_REG:
497 return VCC_CONDITIONAL_REG;
5326695a
AS
498 case VCCZ_REG:
499 return VCCZ_CONDITIONAL_REG;
500 case EXECZ_REG:
501 return EXECZ_CONDITIONAL_REG;
502 case EXEC_LO_REG:
503 case EXEC_HI_REG:
504 return EXEC_MASK_REG;
505 }
506 if (VGPR_REGNO_P (regno))
507 return VGPR_REGS;
508 if (SGPR_REGNO_P (regno))
509 return SGPR_REGS;
510 if (regno < FIRST_VGPR_REG)
511 return GENERAL_REGS;
512 if (regno == ARG_POINTER_REGNUM || regno == FRAME_POINTER_REGNUM)
513 return AFP_REGS;
514 return ALL_REGS;
515}
516
517/* Implement TARGET_CAN_CHANGE_MODE_CLASS.
518
519 GCC assumes that lowpart contains first part of value as stored in memory.
520 This is not the case for vector registers. */
521
522bool
523gcn_can_change_mode_class (machine_mode from, machine_mode to,
524 reg_class_t regclass)
525{
526 if (!vgpr_vector_mode_p (from) && !vgpr_vector_mode_p (to))
527 return true;
528 return (gcn_class_max_nregs (regclass, from)
529 == gcn_class_max_nregs (regclass, to));
530}
531
532/* Implement TARGET_SMALL_REGISTER_CLASSES_FOR_MODE_P.
533
534 When this hook returns true for MODE, the compiler allows
535 registers explicitly used in the rtl to be used as spill registers
536 but prevents the compiler from extending the lifetime of these
537 registers. */
538
539bool
540gcn_small_register_classes_for_mode_p (machine_mode mode)
541{
542 /* We allocate into exec and vcc regs. Those make small register class. */
543 return mode == DImode || mode == SImode;
544}
545
546/* Implement TARGET_CLASS_LIKELY_SPILLED_P.
547
548 Returns true if pseudos that have been assigned to registers of class RCLASS
549 would likely be spilled because registers of RCLASS are needed for spill
550 registers. */
551
552static bool
553gcn_class_likely_spilled_p (reg_class_t rclass)
554{
555 return (rclass == EXEC_MASK_REG
556 || reg_classes_intersect_p (ALL_CONDITIONAL_REGS, rclass));
557}
558
559/* Implement TARGET_MODES_TIEABLE_P.
560
561 Returns true if a value of MODE1 is accessible in MODE2 without
562 copying. */
563
564bool
565gcn_modes_tieable_p (machine_mode mode1, machine_mode mode2)
566{
567 return (GET_MODE_BITSIZE (mode1) <= MAX_FIXED_MODE_SIZE
568 && GET_MODE_BITSIZE (mode2) <= MAX_FIXED_MODE_SIZE);
569}
570
571/* Implement TARGET_TRULY_NOOP_TRUNCATION.
572
573 Returns true if it is safe to “convert” a value of INPREC bits to one of
574 OUTPREC bits (where OUTPREC is smaller than INPREC) by merely operating on
575 it as if it had only OUTPREC bits. */
576
577bool
578gcn_truly_noop_truncation (poly_uint64 outprec, poly_uint64 inprec)
579{
580 return ((inprec <= 32) && (outprec <= inprec));
581}
582
583/* Return N-th part of value occupying multiple registers. */
584
585rtx
586gcn_operand_part (machine_mode mode, rtx op, int n)
587{
588 if (GET_MODE_SIZE (mode) >= 256)
589 {
590 /*gcc_assert (GET_MODE_SIZE (mode) == 256 || n == 0); */
591
592 if (REG_P (op))
593 {
594 gcc_assert (REGNO (op) + n < FIRST_PSEUDO_REGISTER);
595 return gen_rtx_REG (V64SImode, REGNO (op) + n);
596 }
597 if (GET_CODE (op) == CONST_VECTOR)
598 {
599 int units = GET_MODE_NUNITS (mode);
600 rtvec v = rtvec_alloc (units);
601
602 for (int i = 0; i < units; ++i)
603 RTVEC_ELT (v, i) = gcn_operand_part (GET_MODE_INNER (mode),
604 CONST_VECTOR_ELT (op, i), n);
605
606 return gen_rtx_CONST_VECTOR (V64SImode, v);
607 }
608 if (GET_CODE (op) == UNSPEC && XINT (op, 1) == UNSPEC_VECTOR)
609 return gcn_gen_undef (V64SImode);
610 gcc_unreachable ();
611 }
612 else if (GET_MODE_SIZE (mode) == 8 && REG_P (op))
613 {
614 gcc_assert (REGNO (op) + n < FIRST_PSEUDO_REGISTER);
615 return gen_rtx_REG (SImode, REGNO (op) + n);
616 }
617 else
618 {
619 if (GET_CODE (op) == UNSPEC && XINT (op, 1) == UNSPEC_VECTOR)
620 return gcn_gen_undef (SImode);
621
622 /* If it's a constant then let's assume it is of the largest mode
623 available, otherwise simplify_gen_subreg will fail. */
624 if (mode == VOIDmode && CONST_INT_P (op))
625 mode = DImode;
626 return simplify_gen_subreg (SImode, op, mode, n * 4);
627 }
628}
629
630/* Return N-th part of value occupying multiple registers. */
631
632rtx
633gcn_operand_doublepart (machine_mode mode, rtx op, int n)
634{
635 return simplify_gen_subreg (DImode, op, mode, n * 8);
636}
637
638/* Return true if OP can be split into subregs or high/low parts.
639 This is always true for scalars, but not normally true for vectors.
640 However, for vectors in hardregs we can use the low and high registers. */
641
642bool
643gcn_can_split_p (machine_mode, rtx op)
644{
645 if (vgpr_vector_mode_p (GET_MODE (op)))
646 {
647 if (GET_CODE (op) == SUBREG)
648 op = SUBREG_REG (op);
649 if (!REG_P (op))
650 return true;
651 return REGNO (op) <= FIRST_PSEUDO_REGISTER;
652 }
653 return true;
654}
655
656/* Implement TARGET_SPILL_CLASS.
657
658 Return class of registers which could be used for pseudo of MODE
659 and of class RCLASS for spilling instead of memory. Return NO_REGS
660 if it is not possible or non-profitable. */
661
662static reg_class_t
663gcn_spill_class (reg_class_t c, machine_mode /*mode */ )
664{
9ecf84e6
KCY
665 if (reg_classes_intersect_p (ALL_CONDITIONAL_REGS, c)
666 || c == VCC_CONDITIONAL_REG)
5326695a
AS
667 return SGPR_REGS;
668 else
669 return NO_REGS;
670}
671
672/* Implement TARGET_IRA_CHANGE_PSEUDO_ALLOCNO_CLASS.
673
674 Change allocno class for given pseudo from allocno and best class
675 calculated by IRA. */
676
677static reg_class_t
678gcn_ira_change_pseudo_allocno_class (int regno, reg_class_t cl,
679 reg_class_t best_cl)
680{
681 /* Avoid returning classes that contain both vgpr and sgpr registers. */
682 if (cl != ALL_REGS && cl != SRCDST_REGS && cl != ALL_GPR_REGS)
683 return cl;
684 if (best_cl != ALL_REGS && best_cl != SRCDST_REGS
685 && best_cl != ALL_GPR_REGS)
686 return best_cl;
687
688 machine_mode mode = PSEUDO_REGNO_MODE (regno);
689 if (vgpr_vector_mode_p (mode))
690 return VGPR_REGS;
691
692 return GENERAL_REGS;
693}
694
695/* Create a new DImode pseudo reg and emit an instruction to initialize
696 it to VAL. */
697
698static rtx
699get_exec (int64_t val)
700{
701 rtx reg = gen_reg_rtx (DImode);
702 emit_insn (gen_rtx_SET (reg, gen_int_mode (val, DImode)));
703 return reg;
704}
705
706/* Return value of scalar exec register. */
707
708rtx
709gcn_scalar_exec ()
710{
711 return const1_rtx;
712}
713
714/* Return pseudo holding scalar exec register. */
715
716rtx
717gcn_scalar_exec_reg ()
718{
719 return get_exec (1);
720}
721
722/* Return value of full exec register. */
723
724rtx
725gcn_full_exec ()
726{
727 return constm1_rtx;
728}
729
730/* Return pseudo holding full exec register. */
731
732rtx
733gcn_full_exec_reg ()
734{
735 return get_exec (-1);
736}
737
738/* }}} */
739/* {{{ Immediate constants. */
740
741/* Initialize shared numeric constants. */
742
743static void
744init_ext_gcn_constants (void)
745{
746 real_from_integer (&dconst4, DFmode, 4, SIGNED);
747
748 /* FIXME: this constant probably does not match what hardware really loads.
749 Reality check it eventually. */
750 real_from_string (&dconst1over2pi,
751 "0.1591549430918953357663423455968866839");
752 real_convert (&dconst1over2pi, SFmode, &dconst1over2pi);
753
754 ext_gcn_constants_init = 1;
755}
756
757/* Return non-zero if X is a constant that can appear as an inline operand.
758 This is 0, 0.5, -0.5, 1, -1, 2, -2, 4,-4, 1/(2*pi)
759 Or a vector of those.
760 The value returned should be the encoding of this constant. */
761
762int
763gcn_inline_fp_constant_p (rtx x, bool allow_vector)
764{
765 machine_mode mode = GET_MODE (x);
766
767 if ((mode == V64HFmode || mode == V64SFmode || mode == V64DFmode)
768 && allow_vector)
769 {
770 int n;
771 if (GET_CODE (x) != CONST_VECTOR)
772 return 0;
773 n = gcn_inline_fp_constant_p (CONST_VECTOR_ELT (x, 0), false);
774 if (!n)
775 return 0;
776 for (int i = 1; i < 64; i++)
777 if (CONST_VECTOR_ELT (x, i) != CONST_VECTOR_ELT (x, 0))
778 return 0;
779 return 1;
780 }
781
782 if (mode != HFmode && mode != SFmode && mode != DFmode)
783 return 0;
784
785 const REAL_VALUE_TYPE *r;
786
787 if (x == CONST0_RTX (mode))
788 return 128;
789 if (x == CONST1_RTX (mode))
790 return 242;
791
792 r = CONST_DOUBLE_REAL_VALUE (x);
793
794 if (real_identical (r, &dconstm1))
795 return 243;
796
797 if (real_identical (r, &dconsthalf))
798 return 240;
799 if (real_identical (r, &dconstm1))
800 return 243;
801 if (real_identical (r, &dconst2))
802 return 244;
803 if (real_identical (r, &dconst4))
804 return 246;
805 if (real_identical (r, &dconst1over2pi))
806 return 248;
807 if (!ext_gcn_constants_init)
808 init_ext_gcn_constants ();
809 real_value_negate (r);
810 if (real_identical (r, &dconsthalf))
811 return 241;
812 if (real_identical (r, &dconst2))
813 return 245;
814 if (real_identical (r, &dconst4))
815 return 247;
816
817 /* FIXME: add 4, -4 and 1/(2*PI). */
818
819 return 0;
820}
821
822/* Return non-zero if X is a constant that can appear as an immediate operand.
823 This is 0, 0.5, -0.5, 1, -1, 2, -2, 4,-4, 1/(2*pi)
824 Or a vector of those.
825 The value returned should be the encoding of this constant. */
826
827bool
828gcn_fp_constant_p (rtx x, bool allow_vector)
829{
830 machine_mode mode = GET_MODE (x);
831
832 if ((mode == V64HFmode || mode == V64SFmode || mode == V64DFmode)
833 && allow_vector)
834 {
835 int n;
836 if (GET_CODE (x) != CONST_VECTOR)
837 return false;
838 n = gcn_fp_constant_p (CONST_VECTOR_ELT (x, 0), false);
839 if (!n)
840 return false;
841 for (int i = 1; i < 64; i++)
842 if (CONST_VECTOR_ELT (x, i) != CONST_VECTOR_ELT (x, 0))
843 return false;
844 return true;
845 }
846 if (mode != HFmode && mode != SFmode && mode != DFmode)
847 return false;
848
849 if (gcn_inline_fp_constant_p (x, false))
850 return true;
851 /* FIXME: It is not clear how 32bit immediates are interpreted here. */
852 return (mode != DFmode);
853}
854
855/* Return true if X is a constant representable as an inline immediate
856 constant in a 32-bit instruction encoding. */
857
858bool
859gcn_inline_constant_p (rtx x)
860{
861 if (GET_CODE (x) == CONST_INT)
5960de78 862 return INTVAL (x) >= -16 && INTVAL (x) <= 64;
5326695a
AS
863 if (GET_CODE (x) == CONST_DOUBLE)
864 return gcn_inline_fp_constant_p (x, false);
865 if (GET_CODE (x) == CONST_VECTOR)
866 {
867 int n;
868 if (!vgpr_vector_mode_p (GET_MODE (x)))
869 return false;
870 n = gcn_inline_constant_p (CONST_VECTOR_ELT (x, 0));
871 if (!n)
872 return false;
873 for (int i = 1; i < 64; i++)
874 if (CONST_VECTOR_ELT (x, i) != CONST_VECTOR_ELT (x, 0))
875 return false;
876 return 1;
877 }
878 return false;
879}
880
881/* Return true if X is a constant representable as an immediate constant
882 in a 32 or 64-bit instruction encoding. */
883
884bool
885gcn_constant_p (rtx x)
886{
887 switch (GET_CODE (x))
888 {
889 case CONST_INT:
890 return true;
891
892 case CONST_DOUBLE:
893 return gcn_fp_constant_p (x, false);
894
895 case CONST_VECTOR:
896 {
897 int n;
898 if (!vgpr_vector_mode_p (GET_MODE (x)))
899 return false;
900 n = gcn_constant_p (CONST_VECTOR_ELT (x, 0));
901 if (!n)
902 return false;
903 for (int i = 1; i < 64; i++)
904 if (CONST_VECTOR_ELT (x, i) != CONST_VECTOR_ELT (x, 0))
905 return false;
906 return true;
907 }
908
909 case SYMBOL_REF:
910 case LABEL_REF:
911 return true;
912
913 default:
914 ;
915 }
916
917 return false;
918}
919
920/* Return true if X is a constant representable as two inline immediate
921 constants in a 64-bit instruction that is split into two 32-bit
66b01cc3
AS
922 instructions.
923 When MIXED is set, the low-part is permitted to use the full 32-bits. */
5326695a
AS
924
925bool
66b01cc3 926gcn_inline_constant64_p (rtx x, bool mixed)
5326695a
AS
927{
928 if (GET_CODE (x) == CONST_VECTOR)
929 {
930 if (!vgpr_vector_mode_p (GET_MODE (x)))
931 return false;
66b01cc3 932 if (!gcn_inline_constant64_p (CONST_VECTOR_ELT (x, 0), mixed))
5326695a
AS
933 return false;
934 for (int i = 1; i < 64; i++)
935 if (CONST_VECTOR_ELT (x, i) != CONST_VECTOR_ELT (x, 0))
936 return false;
937
938 return true;
939 }
940
941 if (GET_CODE (x) != CONST_INT)
942 return false;
943
944 rtx val_lo = gcn_operand_part (DImode, x, 0);
945 rtx val_hi = gcn_operand_part (DImode, x, 1);
66b01cc3
AS
946 return ((mixed || gcn_inline_constant_p (val_lo))
947 && gcn_inline_constant_p (val_hi));
5326695a
AS
948}
949
950/* Return true if X is a constant representable as an immediate constant
951 in a 32 or 64-bit instruction encoding where the hardware will
952 extend the immediate to 64-bits. */
953
954bool
955gcn_constant64_p (rtx x)
956{
957 if (!gcn_constant_p (x))
958 return false;
959
960 if (GET_CODE (x) != CONST_INT)
961 return true;
962
963 /* Negative numbers are only allowed if they can be encoded within src0,
964 because the 32-bit immediates do not get sign-extended.
965 Unsigned numbers must not be encodable as 32-bit -1..-16, because the
966 assembler will use a src0 inline immediate and that will get
967 sign-extended. */
968 HOST_WIDE_INT val = INTVAL (x);
969 return (((val & 0xffffffff) == val /* Positive 32-bit. */
970 && (val & 0xfffffff0) != 0xfffffff0) /* Not -1..-16. */
971 || gcn_inline_constant_p (x)); /* Src0. */
972}
973
974/* Implement TARGET_LEGITIMATE_CONSTANT_P.
975
976 Returns true if X is a legitimate constant for a MODE immediate operand. */
977
978bool
979gcn_legitimate_constant_p (machine_mode, rtx x)
980{
981 return gcn_constant_p (x);
982}
983
984/* Return true if X is a CONST_VECTOR of single constant. */
985
986static bool
987single_cst_vector_p (rtx x)
988{
989 if (GET_CODE (x) != CONST_VECTOR)
990 return false;
991 for (int i = 1; i < 64; i++)
992 if (CONST_VECTOR_ELT (x, i) != CONST_VECTOR_ELT (x, 0))
993 return false;
994 return true;
995}
996
997/* Create a CONST_VECTOR of duplicated value A. */
998
999rtx
1000gcn_vec_constant (machine_mode mode, int a)
1001{
1002 /*if (!a)
1003 return CONST0_RTX (mode);
1004 if (a == -1)
1005 return CONSTM1_RTX (mode);
1006 if (a == 1)
1007 return CONST1_RTX (mode);
1008 if (a == 2)
1009 return CONST2_RTX (mode);*/
1010
1011 int units = GET_MODE_NUNITS (mode);
95607c12
AS
1012 machine_mode innermode = GET_MODE_INNER (mode);
1013
1014 rtx tem;
1015 if (FLOAT_MODE_P (innermode))
1016 {
1017 REAL_VALUE_TYPE rv;
1018 real_from_integer (&rv, NULL, a, SIGNED);
1019 tem = const_double_from_real_value (rv, innermode);
1020 }
1021 else
1022 tem = gen_int_mode (a, innermode);
5326695a 1023
95607c12 1024 rtvec v = rtvec_alloc (units);
5326695a
AS
1025 for (int i = 0; i < units; ++i)
1026 RTVEC_ELT (v, i) = tem;
1027
1028 return gen_rtx_CONST_VECTOR (mode, v);
1029}
1030
1031/* Create a CONST_VECTOR of duplicated value A. */
1032
1033rtx
1034gcn_vec_constant (machine_mode mode, rtx a)
1035{
1036 int units = GET_MODE_NUNITS (mode);
1037 rtvec v = rtvec_alloc (units);
1038
1039 for (int i = 0; i < units; ++i)
1040 RTVEC_ELT (v, i) = a;
1041
1042 return gen_rtx_CONST_VECTOR (mode, v);
1043}
1044
1045/* Create an undefined vector value, used where an insn operand is
1046 optional. */
1047
1048rtx
1049gcn_gen_undef (machine_mode mode)
1050{
1051 return gen_rtx_UNSPEC (mode, gen_rtvec (1, const0_rtx), UNSPEC_VECTOR);
1052}
1053
1054/* }}} */
1055/* {{{ Addresses, pointers and moves. */
1056
1057/* Return true is REG is a valid place to store a pointer,
1058 for instructions that require an SGPR.
1059 FIXME rename. */
1060
1061static bool
1062gcn_address_register_p (rtx reg, machine_mode mode, bool strict)
1063{
1064 if (GET_CODE (reg) == SUBREG)
1065 reg = SUBREG_REG (reg);
1066
1067 if (!REG_P (reg))
1068 return false;
1069
1070 if (GET_MODE (reg) != mode)
1071 return false;
1072
1073 int regno = REGNO (reg);
1074
1075 if (regno >= FIRST_PSEUDO_REGISTER)
1076 {
1077 if (!strict)
1078 return true;
1079
1080 if (!reg_renumber)
1081 return false;
1082
1083 regno = reg_renumber[regno];
1084 }
1085
1086 return (SGPR_REGNO_P (regno) || regno == M0_REG
1087 || regno == ARG_POINTER_REGNUM || regno == FRAME_POINTER_REGNUM);
1088}
1089
1090/* Return true is REG is a valid place to store a pointer,
1091 for instructions that require a VGPR. */
1092
1093static bool
1094gcn_vec_address_register_p (rtx reg, machine_mode mode, bool strict)
1095{
1096 if (GET_CODE (reg) == SUBREG)
1097 reg = SUBREG_REG (reg);
1098
1099 if (!REG_P (reg))
1100 return false;
1101
1102 if (GET_MODE (reg) != mode)
1103 return false;
1104
1105 int regno = REGNO (reg);
1106
1107 if (regno >= FIRST_PSEUDO_REGISTER)
1108 {
1109 if (!strict)
1110 return true;
1111
1112 if (!reg_renumber)
1113 return false;
1114
1115 regno = reg_renumber[regno];
1116 }
1117
1118 return VGPR_REGNO_P (regno);
1119}
1120
1121/* Return true if X would be valid inside a MEM using the Flat address
1122 space. */
1123
1124bool
1125gcn_flat_address_p (rtx x, machine_mode mode)
1126{
1127 bool vec_mode = (GET_MODE_CLASS (mode) == MODE_VECTOR_INT
1128 || GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT);
1129
1130 if (vec_mode && gcn_address_register_p (x, DImode, false))
1131 return true;
1132
1133 if (!vec_mode && gcn_vec_address_register_p (x, DImode, false))
1134 return true;
1135
1136 if (TARGET_GCN5_PLUS
1137 && GET_CODE (x) == PLUS
1138 && gcn_vec_address_register_p (XEXP (x, 0), DImode, false)
1139 && CONST_INT_P (XEXP (x, 1)))
1140 return true;
1141
1142 return false;
1143}
1144
1145/* Return true if X would be valid inside a MEM using the Scalar Flat
1146 address space. */
1147
1148bool
1149gcn_scalar_flat_address_p (rtx x)
1150{
1151 if (gcn_address_register_p (x, DImode, false))
1152 return true;
1153
1154 if (GET_CODE (x) == PLUS
1155 && gcn_address_register_p (XEXP (x, 0), DImode, false)
1156 && CONST_INT_P (XEXP (x, 1)))
1157 return true;
1158
1159 return false;
1160}
1161
1162/* Return true if MEM X would be valid for the Scalar Flat address space. */
1163
1164bool
1165gcn_scalar_flat_mem_p (rtx x)
1166{
1167 if (!MEM_P (x))
1168 return false;
1169
1170 if (GET_MODE_SIZE (GET_MODE (x)) < 4)
1171 return false;
1172
1173 return gcn_scalar_flat_address_p (XEXP (x, 0));
1174}
1175
1176/* Return true if X would be valid inside a MEM using the LDS or GDS
1177 address spaces. */
1178
1179bool
1180gcn_ds_address_p (rtx x)
1181{
1182 if (gcn_vec_address_register_p (x, SImode, false))
1183 return true;
1184
1185 if (GET_CODE (x) == PLUS
1186 && gcn_vec_address_register_p (XEXP (x, 0), SImode, false)
1187 && CONST_INT_P (XEXP (x, 1)))
1188 return true;
1189
1190 return false;
1191}
1192
1193/* Return true if ADDR would be valid inside a MEM using the Global
1194 address space. */
1195
1196bool
1197gcn_global_address_p (rtx addr)
1198{
1199 if (gcn_address_register_p (addr, DImode, false)
1200 || gcn_vec_address_register_p (addr, DImode, false))
1201 return true;
1202
1203 if (GET_CODE (addr) == PLUS)
1204 {
1205 rtx base = XEXP (addr, 0);
1206 rtx offset = XEXP (addr, 1);
1207 bool immediate_p = (CONST_INT_P (offset)
1208 && INTVAL (offset) >= -(1 << 12)
1209 && INTVAL (offset) < (1 << 12));
1210
1211 if ((gcn_address_register_p (base, DImode, false)
1212 || gcn_vec_address_register_p (base, DImode, false))
1213 && immediate_p)
1214 /* SGPR + CONST or VGPR + CONST */
1215 return true;
1216
1217 if (gcn_address_register_p (base, DImode, false)
1218 && gcn_vgpr_register_operand (offset, SImode))
1219 /* SPGR + VGPR */
1220 return true;
1221
1222 if (GET_CODE (base) == PLUS
1223 && gcn_address_register_p (XEXP (base, 0), DImode, false)
1224 && gcn_vgpr_register_operand (XEXP (base, 1), SImode)
1225 && immediate_p)
1226 /* (SGPR + VGPR) + CONST */
1227 return true;
1228 }
1229
1230 return false;
1231}
1232
1233/* Implement TARGET_ADDR_SPACE_LEGITIMATE_ADDRESS_P.
1234
1235 Recognizes RTL expressions that are valid memory addresses for an
1236 instruction. The MODE argument is the machine mode for the MEM
1237 expression that wants to use this address.
1238
1239 It only recognizes address in canonical form. LEGITIMIZE_ADDRESS should
1240 convert common non-canonical forms to canonical form so that they will
1241 be recognized. */
1242
1243static bool
1244gcn_addr_space_legitimate_address_p (machine_mode mode, rtx x, bool strict,
1245 addr_space_t as)
1246{
1247 /* All vector instructions need to work on addresses in registers. */
1248 if (!TARGET_GCN5_PLUS && (vgpr_vector_mode_p (mode) && !REG_P (x)))
1249 return false;
1250
1251 if (AS_SCALAR_FLAT_P (as))
1252 {
1253 if (mode == QImode || mode == HImode)
1254 return 0;
1255
1256 switch (GET_CODE (x))
1257 {
1258 case REG:
1259 return gcn_address_register_p (x, DImode, strict);
1260 /* Addresses are in the form BASE+OFFSET
1261 OFFSET is either 20bit unsigned immediate, SGPR or M0.
1262 Writes and atomics do not accept SGPR. */
1263 case PLUS:
1264 {
1265 rtx x0 = XEXP (x, 0);
1266 rtx x1 = XEXP (x, 1);
1267 if (!gcn_address_register_p (x0, DImode, strict))
1268 return false;
1269 /* FIXME: This is disabled because of the mode mismatch between
1270 SImode (for the address or m0 register) and the DImode PLUS.
1271 We'll need a zero_extend or similar.
1272
1273 if (gcn_m0_register_p (x1, SImode, strict)
1274 || gcn_address_register_p (x1, SImode, strict))
1275 return true;
1276 else*/
1277 if (GET_CODE (x1) == CONST_INT)
1278 {
1279 if (INTVAL (x1) >= 0 && INTVAL (x1) < (1 << 20)
1280 /* The low bits of the offset are ignored, even when
1281 they're meant to realign the pointer. */
1282 && !(INTVAL (x1) & 0x3))
1283 return true;
1284 }
1285 return false;
1286 }
1287
1288 default:
1289 break;
1290 }
1291 }
1292 else if (AS_SCRATCH_P (as))
1293 return gcn_address_register_p (x, SImode, strict);
1294 else if (AS_FLAT_P (as) || AS_FLAT_SCRATCH_P (as))
1295 {
1296 if (TARGET_GCN3 || GET_CODE (x) == REG)
1297 return ((GET_MODE_CLASS (mode) == MODE_VECTOR_INT
1298 || GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT)
1299 ? gcn_address_register_p (x, DImode, strict)
1300 : gcn_vec_address_register_p (x, DImode, strict));
1301 else
1302 {
1303 gcc_assert (TARGET_GCN5_PLUS);
1304
1305 if (GET_CODE (x) == PLUS)
1306 {
1307 rtx x1 = XEXP (x, 1);
1308
1309 if (VECTOR_MODE_P (mode)
1310 ? !gcn_address_register_p (x, DImode, strict)
1311 : !gcn_vec_address_register_p (x, DImode, strict))
1312 return false;
1313
1314 if (GET_CODE (x1) == CONST_INT)
1315 {
1316 if (INTVAL (x1) >= 0 && INTVAL (x1) < (1 << 12)
1317 /* The low bits of the offset are ignored, even when
1318 they're meant to realign the pointer. */
1319 && !(INTVAL (x1) & 0x3))
1320 return true;
1321 }
1322 }
1323 return false;
1324 }
1325 }
1326 else if (AS_GLOBAL_P (as))
1327 {
1328 gcc_assert (TARGET_GCN5_PLUS);
1329
1330 if (GET_CODE (x) == REG)
1331 return (gcn_address_register_p (x, DImode, strict)
1332 || (!VECTOR_MODE_P (mode)
1333 && gcn_vec_address_register_p (x, DImode, strict)));
1334 else if (GET_CODE (x) == PLUS)
1335 {
1336 rtx base = XEXP (x, 0);
1337 rtx offset = XEXP (x, 1);
1338
1339 bool immediate_p = (GET_CODE (offset) == CONST_INT
1340 /* Signed 13-bit immediate. */
1341 && INTVAL (offset) >= -(1 << 12)
1342 && INTVAL (offset) < (1 << 12)
1343 /* The low bits of the offset are ignored, even
1344 when they're meant to realign the pointer. */
1345 && !(INTVAL (offset) & 0x3));
1346
1347 if (!VECTOR_MODE_P (mode))
1348 {
1349 if ((gcn_address_register_p (base, DImode, strict)
1350 || gcn_vec_address_register_p (base, DImode, strict))
1351 && immediate_p)
1352 /* SGPR + CONST or VGPR + CONST */
1353 return true;
1354
1355 if (gcn_address_register_p (base, DImode, strict)
1356 && gcn_vgpr_register_operand (offset, SImode))
1357 /* SGPR + VGPR */
1358 return true;
1359
1360 if (GET_CODE (base) == PLUS
1361 && gcn_address_register_p (XEXP (base, 0), DImode, strict)
1362 && gcn_vgpr_register_operand (XEXP (base, 1), SImode)
1363 && immediate_p)
1364 /* (SGPR + VGPR) + CONST */
1365 return true;
1366 }
1367 else
1368 {
1369 if (gcn_address_register_p (base, DImode, strict)
1370 && immediate_p)
1371 /* SGPR + CONST */
1372 return true;
1373 }
1374 }
1375 else
1376 return false;
1377 }
1378 else if (AS_ANY_DS_P (as))
1379 switch (GET_CODE (x))
1380 {
1381 case REG:
1382 return (VECTOR_MODE_P (mode)
1383 ? gcn_address_register_p (x, SImode, strict)
1384 : gcn_vec_address_register_p (x, SImode, strict));
1385 /* Addresses are in the form BASE+OFFSET
1386 OFFSET is either 20bit unsigned immediate, SGPR or M0.
1387 Writes and atomics do not accept SGPR. */
1388 case PLUS:
1389 {
1390 rtx x0 = XEXP (x, 0);
1391 rtx x1 = XEXP (x, 1);
1392 if (!gcn_vec_address_register_p (x0, DImode, strict))
1393 return false;
1394 if (GET_CODE (x1) == REG)
1395 {
1396 if (GET_CODE (x1) != REG
1397 || (REGNO (x1) <= FIRST_PSEUDO_REGISTER
1398 && !gcn_ssrc_register_operand (x1, DImode)))
1399 return false;
1400 }
1401 else if (GET_CODE (x1) == CONST_VECTOR
1402 && GET_CODE (CONST_VECTOR_ELT (x1, 0)) == CONST_INT
1403 && single_cst_vector_p (x1))
1404 {
1405 x1 = CONST_VECTOR_ELT (x1, 0);
1406 if (INTVAL (x1) >= 0 && INTVAL (x1) < (1 << 20))
1407 return true;
1408 }
1409 return false;
1410 }
1411
1412 default:
1413 break;
1414 }
1415 else
1416 gcc_unreachable ();
1417 return false;
1418}
1419
1420/* Implement TARGET_ADDR_SPACE_POINTER_MODE.
1421
1422 Return the appropriate mode for a named address pointer. */
1423
1424static scalar_int_mode
1425gcn_addr_space_pointer_mode (addr_space_t addrspace)
1426{
1427 switch (addrspace)
1428 {
1429 case ADDR_SPACE_SCRATCH:
1430 case ADDR_SPACE_LDS:
1431 case ADDR_SPACE_GDS:
1432 return SImode;
1433 case ADDR_SPACE_DEFAULT:
1434 case ADDR_SPACE_FLAT:
1435 case ADDR_SPACE_FLAT_SCRATCH:
1436 case ADDR_SPACE_SCALAR_FLAT:
1437 return DImode;
1438 default:
1439 gcc_unreachable ();
1440 }
1441}
1442
1443/* Implement TARGET_ADDR_SPACE_ADDRESS_MODE.
1444
1445 Return the appropriate mode for a named address space address. */
1446
1447static scalar_int_mode
1448gcn_addr_space_address_mode (addr_space_t addrspace)
1449{
1450 return gcn_addr_space_pointer_mode (addrspace);
1451}
1452
1453/* Implement TARGET_ADDR_SPACE_SUBSET_P.
1454
1455 Determine if one named address space is a subset of another. */
1456
1457static bool
1458gcn_addr_space_subset_p (addr_space_t subset, addr_space_t superset)
1459{
1460 if (subset == superset)
1461 return true;
1462 /* FIXME is this true? */
1463 if (AS_FLAT_P (superset) || AS_SCALAR_FLAT_P (superset))
1464 return true;
1465 return false;
1466}
1467
1468/* Convert from one address space to another. */
1469
1470static rtx
1471gcn_addr_space_convert (rtx op, tree from_type, tree to_type)
1472{
1473 gcc_assert (POINTER_TYPE_P (from_type));
1474 gcc_assert (POINTER_TYPE_P (to_type));
1475
1476 addr_space_t as_from = TYPE_ADDR_SPACE (TREE_TYPE (from_type));
1477 addr_space_t as_to = TYPE_ADDR_SPACE (TREE_TYPE (to_type));
1478
1479 if (AS_LDS_P (as_from) && AS_FLAT_P (as_to))
1480 {
1481 rtx queue = gen_rtx_REG (DImode,
1482 cfun->machine->args.reg[QUEUE_PTR_ARG]);
1483 rtx group_seg_aperture_hi = gen_rtx_MEM (SImode,
1484 gen_rtx_PLUS (DImode, queue,
1485 gen_int_mode (64, SImode)));
1486 rtx tmp = gen_reg_rtx (DImode);
1487
1488 emit_move_insn (gen_lowpart (SImode, tmp), op);
1489 emit_move_insn (gen_highpart_mode (SImode, DImode, tmp),
1490 group_seg_aperture_hi);
1491
1492 return tmp;
1493 }
1494 else if (as_from == as_to)
1495 return op;
1496 else
1497 gcc_unreachable ();
1498}
1499
1500
1501/* Implement REGNO_MODE_CODE_OK_FOR_BASE_P via gcn.h
1502
1503 Retun true if REGNO is OK for memory adressing. */
1504
1505bool
1506gcn_regno_mode_code_ok_for_base_p (int regno,
1507 machine_mode, addr_space_t as, int, int)
1508{
1509 if (regno >= FIRST_PSEUDO_REGISTER)
1510 {
1511 if (reg_renumber)
1512 regno = reg_renumber[regno];
1513 else
1514 return true;
1515 }
1516 if (AS_FLAT_P (as))
1517 return (VGPR_REGNO_P (regno)
1518 || regno == ARG_POINTER_REGNUM || regno == FRAME_POINTER_REGNUM);
1519 else if (AS_SCALAR_FLAT_P (as))
1520 return (SGPR_REGNO_P (regno)
1521 || regno == ARG_POINTER_REGNUM || regno == FRAME_POINTER_REGNUM);
1522 else if (AS_GLOBAL_P (as))
1523 {
1524 return (SGPR_REGNO_P (regno)
1525 || VGPR_REGNO_P (regno)
1526 || regno == ARG_POINTER_REGNUM
1527 || regno == FRAME_POINTER_REGNUM);
1528 }
1529 else
1530 /* For now. */
1531 return false;
1532}
1533
1534/* Implement MODE_CODE_BASE_REG_CLASS via gcn.h.
1535
1536 Return a suitable register class for memory addressing. */
1537
1538reg_class
1539gcn_mode_code_base_reg_class (machine_mode mode, addr_space_t as, int oc,
1540 int ic)
1541{
1542 switch (as)
1543 {
1544 case ADDR_SPACE_DEFAULT:
1545 return gcn_mode_code_base_reg_class (mode, DEFAULT_ADDR_SPACE, oc, ic);
1546 case ADDR_SPACE_SCALAR_FLAT:
1547 case ADDR_SPACE_SCRATCH:
1548 return SGPR_REGS;
1549 break;
1550 case ADDR_SPACE_FLAT:
1551 case ADDR_SPACE_FLAT_SCRATCH:
1552 case ADDR_SPACE_LDS:
1553 case ADDR_SPACE_GDS:
1554 return ((GET_MODE_CLASS (mode) == MODE_VECTOR_INT
1555 || GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT)
1556 ? SGPR_REGS : VGPR_REGS);
1557 case ADDR_SPACE_GLOBAL:
1558 return ((GET_MODE_CLASS (mode) == MODE_VECTOR_INT
1559 || GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT)
1560 ? SGPR_REGS : ALL_GPR_REGS);
1561 }
1562 gcc_unreachable ();
1563}
1564
1565/* Implement REGNO_OK_FOR_INDEX_P via gcn.h.
1566
1567 Return true if REGNO is OK for index of memory addressing. */
1568
1569bool
1570regno_ok_for_index_p (int regno)
1571{
1572 if (regno >= FIRST_PSEUDO_REGISTER)
1573 {
1574 if (reg_renumber)
1575 regno = reg_renumber[regno];
1576 else
1577 return true;
1578 }
1579 return regno == M0_REG || VGPR_REGNO_P (regno);
1580}
1581
1582/* Generate move which uses the exec flags. If EXEC is NULL, then it is
1583 assumed that all lanes normally relevant to the mode of the move are
1584 affected. If PREV is NULL, then a sensible default is supplied for
1585 the inactive lanes. */
1586
1587static rtx
1588gen_mov_with_exec (rtx op0, rtx op1, rtx exec = NULL, rtx prev = NULL)
1589{
1590 machine_mode mode = GET_MODE (op0);
1591
1592 if (vgpr_vector_mode_p (mode))
1593 {
1594 if (exec && exec != CONSTM1_RTX (DImode))
1595 {
1596 if (!prev)
1597 prev = op0;
1598 }
1599 else
1600 {
1601 if (!prev)
1602 prev = gcn_gen_undef (mode);
1603 exec = gcn_full_exec_reg ();
1604 }
1605
1606 rtx set = gen_rtx_SET (op0, gen_rtx_VEC_MERGE (mode, op1, prev, exec));
1607
1608 return gen_rtx_PARALLEL (VOIDmode,
1609 gen_rtvec (2, set,
1610 gen_rtx_CLOBBER (VOIDmode,
1611 gen_rtx_SCRATCH (V64DImode))));
1612 }
1613
1614 return (gen_rtx_PARALLEL
1615 (VOIDmode,
1616 gen_rtvec (2, gen_rtx_SET (op0, op1),
1617 gen_rtx_USE (VOIDmode,
1618 exec ? exec : gcn_scalar_exec ()))));
1619}
1620
1621/* Generate masked move. */
1622
1623static rtx
1624gen_duplicate_load (rtx op0, rtx op1, rtx op2 = NULL, rtx exec = NULL)
1625{
1626 if (exec)
1627 return (gen_rtx_SET (op0,
1628 gen_rtx_VEC_MERGE (GET_MODE (op0),
1629 gen_rtx_VEC_DUPLICATE (GET_MODE
1630 (op0), op1),
1631 op2, exec)));
1632 else
1633 return (gen_rtx_SET (op0, gen_rtx_VEC_DUPLICATE (GET_MODE (op0), op1)));
1634}
1635
1636/* Expand vector init of OP0 by VEC.
1637 Implements vec_init instruction pattern. */
1638
1639void
1640gcn_expand_vector_init (rtx op0, rtx vec)
1641{
1642 int64_t initialized_mask = 0;
1643 int64_t curr_mask = 1;
1644 machine_mode mode = GET_MODE (op0);
1645
1646 rtx val = XVECEXP (vec, 0, 0);
1647
1648 for (int i = 1; i < 64; i++)
1649 if (rtx_equal_p (val, XVECEXP (vec, 0, i)))
1650 curr_mask |= (int64_t) 1 << i;
1651
1652 if (gcn_constant_p (val))
1653 emit_move_insn (op0, gcn_vec_constant (mode, val));
1654 else
1655 {
1656 val = force_reg (GET_MODE_INNER (mode), val);
1657 emit_insn (gen_duplicate_load (op0, val));
1658 }
1659 initialized_mask |= curr_mask;
1660 for (int i = 1; i < 64; i++)
1661 if (!(initialized_mask & ((int64_t) 1 << i)))
1662 {
1663 curr_mask = (int64_t) 1 << i;
1664 rtx val = XVECEXP (vec, 0, i);
1665
1666 for (int j = i + 1; j < 64; j++)
1667 if (rtx_equal_p (val, XVECEXP (vec, 0, j)))
1668 curr_mask |= (int64_t) 1 << j;
1669 if (gcn_constant_p (val))
1670 emit_insn (gen_mov_with_exec (op0, gcn_vec_constant (mode, val),
1671 get_exec (curr_mask)));
1672 else
1673 {
1674 val = force_reg (GET_MODE_INNER (mode), val);
1675 emit_insn (gen_duplicate_load (op0, val, op0,
1676 get_exec (curr_mask)));
1677 }
1678 initialized_mask |= curr_mask;
1679 }
1680}
1681
1682/* Load vector constant where n-th lane contains BASE+n*VAL. */
1683
1684static rtx
1685strided_constant (machine_mode mode, int base, int val)
1686{
1687 rtx x = gen_reg_rtx (mode);
1688 emit_move_insn (x, gcn_vec_constant (mode, base));
1689 emit_insn (gen_addv64si3_exec (x, x, gcn_vec_constant (mode, val * 32),
1690 x, get_exec (0xffffffff00000000)));
1691 emit_insn (gen_addv64si3_exec (x, x, gcn_vec_constant (mode, val * 16),
1692 x, get_exec (0xffff0000ffff0000)));
1693 emit_insn (gen_addv64si3_exec (x, x, gcn_vec_constant (mode, val * 8),
1694 x, get_exec (0xff00ff00ff00ff00)));
1695 emit_insn (gen_addv64si3_exec (x, x, gcn_vec_constant (mode, val * 4),
1696 x, get_exec (0xf0f0f0f0f0f0f0f0)));
1697 emit_insn (gen_addv64si3_exec (x, x, gcn_vec_constant (mode, val * 2),
1698 x, get_exec (0xcccccccccccccccc)));
1699 emit_insn (gen_addv64si3_exec (x, x, gcn_vec_constant (mode, val * 1),
1700 x, get_exec (0xaaaaaaaaaaaaaaaa)));
1701 return x;
1702}
1703
1704/* Implement TARGET_ADDR_SPACE_LEGITIMIZE_ADDRESS. */
1705
1706static rtx
1707gcn_addr_space_legitimize_address (rtx x, rtx old, machine_mode mode,
1708 addr_space_t as)
1709{
1710 switch (as)
1711 {
1712 case ADDR_SPACE_DEFAULT:
1713 return gcn_addr_space_legitimize_address (x, old, mode,
1714 DEFAULT_ADDR_SPACE);
1715 case ADDR_SPACE_SCALAR_FLAT:
1716 case ADDR_SPACE_SCRATCH:
1717 /* Instructions working on vectors need the address to be in
1718 a register. */
1719 if (vgpr_vector_mode_p (mode))
1720 return force_reg (GET_MODE (x), x);
1721
1722 return x;
1723 case ADDR_SPACE_FLAT:
1724 case ADDR_SPACE_FLAT_SCRATCH:
1725 case ADDR_SPACE_GLOBAL:
1726 return TARGET_GCN3 ? force_reg (DImode, x) : x;
1727 case ADDR_SPACE_LDS:
1728 case ADDR_SPACE_GDS:
1729 /* FIXME: LDS support offsets, handle them!. */
1730 if (vgpr_vector_mode_p (mode) && GET_MODE (x) != V64SImode)
1731 {
1732 rtx addrs = gen_reg_rtx (V64SImode);
1733 rtx base = force_reg (SImode, x);
1734 rtx offsets = strided_constant (V64SImode, 0,
1735 GET_MODE_UNIT_SIZE (mode));
1736
1737 emit_insn (gen_vec_duplicatev64si (addrs, base));
1738 emit_insn (gen_addv64si3 (addrs, offsets, addrs));
1739 return addrs;
1740 }
1741 return x;
1742 }
1743 gcc_unreachable ();
1744}
1745
1746/* Convert a (mem:<MODE> (reg:DI)) to (mem:<MODE> (reg:V64DI)) with the
1747 proper vector of stepped addresses.
1748
1749 MEM will be a DImode address of a vector in an SGPR.
1750 TMP will be a V64DImode VGPR pair or (scratch:V64DI). */
1751
1752rtx
1753gcn_expand_scalar_to_vector_address (machine_mode mode, rtx exec, rtx mem,
1754 rtx tmp)
1755{
1756 gcc_assert (MEM_P (mem));
1757 rtx mem_base = XEXP (mem, 0);
1758 rtx mem_index = NULL_RTX;
1759
1760 if (!TARGET_GCN5_PLUS)
1761 {
1762 /* gcn_addr_space_legitimize_address should have put the address in a
1763 register. If not, it is too late to do anything about it. */
1764 gcc_assert (REG_P (mem_base));
1765 }
1766
1767 if (GET_CODE (mem_base) == PLUS)
1768 {
1769 mem_index = XEXP (mem_base, 1);
1770 mem_base = XEXP (mem_base, 0);
1771 }
1772
1773 /* RF and RM base registers for vector modes should be always an SGPR. */
1774 gcc_assert (SGPR_REGNO_P (REGNO (mem_base))
1775 || REGNO (mem_base) >= FIRST_PSEUDO_REGISTER);
1776
1777 machine_mode inner = GET_MODE_INNER (mode);
1778 int shift = exact_log2 (GET_MODE_SIZE (inner));
1779 rtx ramp = gen_rtx_REG (V64SImode, VGPR_REGNO (1));
1780 rtx undef_v64si = gcn_gen_undef (V64SImode);
1781 rtx new_base = NULL_RTX;
1782 addr_space_t as = MEM_ADDR_SPACE (mem);
1783
1784 rtx tmplo = (REG_P (tmp)
1785 ? gcn_operand_part (V64DImode, tmp, 0)
1786 : gen_reg_rtx (V64SImode));
1787
1788 /* tmplo[:] = ramp[:] << shift */
1789 if (exec)
1790 emit_insn (gen_ashlv64si3_exec (tmplo, ramp,
1791 gen_int_mode (shift, SImode),
1792 undef_v64si, exec));
1793 else
1794 emit_insn (gen_ashlv64si3 (tmplo, ramp, gen_int_mode (shift, SImode)));
1795
1796 if (AS_FLAT_P (as))
1797 {
75d0b3d7
AS
1798 rtx vcc = gen_rtx_REG (DImode, CC_SAVE_REG);
1799
5326695a
AS
1800 if (REG_P (tmp))
1801 {
5326695a
AS
1802 rtx mem_base_lo = gcn_operand_part (DImode, mem_base, 0);
1803 rtx mem_base_hi = gcn_operand_part (DImode, mem_base, 1);
1804 rtx tmphi = gcn_operand_part (V64DImode, tmp, 1);
1805
1806 /* tmphi[:] = mem_base_hi */
1807 if (exec)
1808 emit_insn (gen_vec_duplicatev64si_exec (tmphi, mem_base_hi,
1809 undef_v64si, exec));
1810 else
1811 emit_insn (gen_vec_duplicatev64si (tmphi, mem_base_hi));
1812
1813 /* tmp[:] += zext (mem_base) */
1814 if (exec)
1815 {
5326695a
AS
1816 emit_insn (gen_addv64si3_vcc_dup_exec (tmplo, mem_base_lo, tmplo,
1817 vcc, undef_v64si, exec));
1818 emit_insn (gen_addcv64si3_exec (tmphi, tmphi, const0_rtx,
1819 vcc, vcc, undef_v64si, exec));
1820 }
1821 else
75d0b3d7 1822 emit_insn (gen_addv64di3_vcc_zext_dup (tmp, mem_base_lo, tmp, vcc));
5326695a
AS
1823 }
1824 else
1825 {
1826 tmp = gen_reg_rtx (V64DImode);
1827 if (exec)
75d0b3d7
AS
1828 emit_insn (gen_addv64di3_vcc_zext_dup2_exec
1829 (tmp, tmplo, mem_base, vcc, gcn_gen_undef (V64DImode),
1830 exec));
5326695a 1831 else
75d0b3d7 1832 emit_insn (gen_addv64di3_vcc_zext_dup2 (tmp, tmplo, mem_base, vcc));
5326695a
AS
1833 }
1834
1835 new_base = tmp;
1836 }
1837 else if (AS_ANY_DS_P (as))
1838 {
1839 if (!exec)
1840 emit_insn (gen_addv64si3_dup (tmplo, tmplo, mem_base));
1841 else
1842 emit_insn (gen_addv64si3_dup_exec (tmplo, tmplo, mem_base,
1843 gcn_gen_undef (V64SImode), exec));
1844 new_base = tmplo;
1845 }
1846 else
1847 {
1848 mem_base = gen_rtx_VEC_DUPLICATE (V64DImode, mem_base);
1849 new_base = gen_rtx_PLUS (V64DImode, mem_base,
1850 gen_rtx_SIGN_EXTEND (V64DImode, tmplo));
1851 }
1852
1853 return gen_rtx_PLUS (GET_MODE (new_base), new_base,
1854 gen_rtx_VEC_DUPLICATE (GET_MODE (new_base),
1855 (mem_index ? mem_index
1856 : const0_rtx)));
1857}
1858
1859/* Convert a BASE address, a vector of OFFSETS, and a SCALE, to addresses
1860 suitable for the given address space. This is indented for use in
1861 gather/scatter patterns.
1862
1863 The offsets may be signed or unsigned, according to UNSIGNED_P.
1864 If EXEC is set then _exec patterns will be used, otherwise plain.
1865
1866 Return values.
1867 ADDR_SPACE_FLAT - return V64DImode vector of absolute addresses.
1868 ADDR_SPACE_GLOBAL - return V64SImode vector of offsets. */
1869
1870rtx
1871gcn_expand_scaled_offsets (addr_space_t as, rtx base, rtx offsets, rtx scale,
1872 bool unsigned_p, rtx exec)
1873{
5326695a
AS
1874 rtx tmpsi = gen_reg_rtx (V64SImode);
1875 rtx tmpdi = gen_reg_rtx (V64DImode);
1876 rtx undefsi = exec ? gcn_gen_undef (V64SImode) : NULL;
1877 rtx undefdi = exec ? gcn_gen_undef (V64DImode) : NULL;
1878
1879 if (CONST_INT_P (scale)
1880 && INTVAL (scale) > 0
1881 && exact_log2 (INTVAL (scale)) >= 0)
1882 emit_insn (gen_ashlv64si3 (tmpsi, offsets,
1883 GEN_INT (exact_log2 (INTVAL (scale)))));
1884 else
1885 (exec
1886 ? emit_insn (gen_mulv64si3_dup_exec (tmpsi, offsets, scale, undefsi,
1887 exec))
1888 : emit_insn (gen_mulv64si3_dup (tmpsi, offsets, scale)));
1889
1890 /* "Global" instructions do not support negative register offsets. */
1891 if (as == ADDR_SPACE_FLAT || !unsigned_p)
1892 {
1893 if (unsigned_p)
1894 (exec
1895 ? emit_insn (gen_addv64di3_zext_dup2_exec (tmpdi, tmpsi, base,
1896 undefdi, exec))
1897 : emit_insn (gen_addv64di3_zext_dup2 (tmpdi, tmpsi, base)));
1898 else
1899 (exec
1900 ? emit_insn (gen_addv64di3_sext_dup2_exec (tmpdi, tmpsi, base,
1901 undefdi, exec))
1902 : emit_insn (gen_addv64di3_sext_dup2 (tmpdi, tmpsi, base)));
1903 return tmpdi;
1904 }
1905 else if (as == ADDR_SPACE_GLOBAL)
1906 return tmpsi;
1907
1908 gcc_unreachable ();
1909}
1910
1911/* Return true if move from OP0 to OP1 is known to be executed in vector
1912 unit. */
1913
1914bool
1915gcn_vgpr_move_p (rtx op0, rtx op1)
1916{
1917 if (MEM_P (op0) && AS_SCALAR_FLAT_P (MEM_ADDR_SPACE (op0)))
1918 return true;
1919 if (MEM_P (op1) && AS_SCALAR_FLAT_P (MEM_ADDR_SPACE (op1)))
1920 return true;
1921 return ((REG_P (op0) && VGPR_REGNO_P (REGNO (op0)))
1922 || (REG_P (op1) && VGPR_REGNO_P (REGNO (op1)))
1923 || vgpr_vector_mode_p (GET_MODE (op0)));
1924}
1925
1926/* Return true if move from OP0 to OP1 is known to be executed in scalar
1927 unit. Used in the machine description. */
1928
1929bool
1930gcn_sgpr_move_p (rtx op0, rtx op1)
1931{
1932 if (MEM_P (op0) && AS_SCALAR_FLAT_P (MEM_ADDR_SPACE (op0)))
1933 return true;
1934 if (MEM_P (op1) && AS_SCALAR_FLAT_P (MEM_ADDR_SPACE (op1)))
1935 return true;
1936 if (!REG_P (op0) || REGNO (op0) >= FIRST_PSEUDO_REGISTER
1937 || VGPR_REGNO_P (REGNO (op0)))
1938 return false;
1939 if (REG_P (op1)
1940 && REGNO (op1) < FIRST_PSEUDO_REGISTER
1941 && !VGPR_REGNO_P (REGNO (op1)))
1942 return true;
1943 return immediate_operand (op1, VOIDmode) || memory_operand (op1, VOIDmode);
1944}
1945
1946/* Implement TARGET_SECONDARY_RELOAD.
1947
1948 The address space determines which registers can be used for loads and
1949 stores. */
1950
1951static reg_class_t
1952gcn_secondary_reload (bool in_p, rtx x, reg_class_t rclass,
1953 machine_mode reload_mode, secondary_reload_info *sri)
1954{
1955 reg_class_t result = NO_REGS;
1956 bool spilled_pseudo =
1957 (REG_P (x) || GET_CODE (x) == SUBREG) && true_regnum (x) == -1;
1958
1959 if (dump_file && (dump_flags & TDF_DETAILS))
1960 {
1961 fprintf (dump_file, "gcn_secondary_reload: ");
1962 dump_value_slim (dump_file, x, 1);
1963 fprintf (dump_file, " %s %s:%s", (in_p ? "->" : "<-"),
1964 reg_class_names[rclass], GET_MODE_NAME (reload_mode));
1965 if (REG_P (x) || GET_CODE (x) == SUBREG)
1966 fprintf (dump_file, " (true regnum: %d \"%s\")", true_regnum (x),
1967 (true_regnum (x) >= 0
1968 && true_regnum (x) < FIRST_PSEUDO_REGISTER
1969 ? reg_names[true_regnum (x)]
1970 : (spilled_pseudo ? "stack spill" : "??")));
1971 fprintf (dump_file, "\n");
1972 }
1973
1974 /* Some callers don't use or initialize icode. */
1975 sri->icode = CODE_FOR_nothing;
1976
1977 if (MEM_P (x) || spilled_pseudo)
1978 {
1979 addr_space_t as = DEFAULT_ADDR_SPACE;
1980
1981 /* If we have a spilled pseudo, we can't find the address space
1982 directly, but we know it's in ADDR_SPACE_FLAT space for GCN3 or
1983 ADDR_SPACE_GLOBAL for GCN5. */
1984 if (MEM_P (x))
1985 as = MEM_ADDR_SPACE (x);
1986
1987 if (as == ADDR_SPACE_DEFAULT)
1988 as = DEFAULT_ADDR_SPACE;
1989
1990 switch (as)
1991 {
1992 case ADDR_SPACE_SCALAR_FLAT:
1993 result =
1994 ((!MEM_P (x) || rclass == SGPR_REGS) ? NO_REGS : SGPR_REGS);
1995 break;
1996 case ADDR_SPACE_FLAT:
1997 case ADDR_SPACE_FLAT_SCRATCH:
1998 case ADDR_SPACE_GLOBAL:
1999 if (GET_MODE_CLASS (reload_mode) == MODE_VECTOR_INT
2000 || GET_MODE_CLASS (reload_mode) == MODE_VECTOR_FLOAT)
2001 {
2002 if (in_p)
2003 switch (reload_mode)
2004 {
2005 case E_V64SImode:
2006 sri->icode = CODE_FOR_reload_inv64si;
2007 break;
2008 case E_V64SFmode:
2009 sri->icode = CODE_FOR_reload_inv64sf;
2010 break;
2011 case E_V64HImode:
2012 sri->icode = CODE_FOR_reload_inv64hi;
2013 break;
2014 case E_V64HFmode:
2015 sri->icode = CODE_FOR_reload_inv64hf;
2016 break;
2017 case E_V64QImode:
2018 sri->icode = CODE_FOR_reload_inv64qi;
2019 break;
2020 case E_V64DImode:
2021 sri->icode = CODE_FOR_reload_inv64di;
2022 break;
2023 case E_V64DFmode:
2024 sri->icode = CODE_FOR_reload_inv64df;
2025 break;
2026 default:
2027 gcc_unreachable ();
2028 }
2029 else
2030 switch (reload_mode)
2031 {
2032 case E_V64SImode:
2033 sri->icode = CODE_FOR_reload_outv64si;
2034 break;
2035 case E_V64SFmode:
2036 sri->icode = CODE_FOR_reload_outv64sf;
2037 break;
2038 case E_V64HImode:
2039 sri->icode = CODE_FOR_reload_outv64hi;
2040 break;
2041 case E_V64HFmode:
2042 sri->icode = CODE_FOR_reload_outv64hf;
2043 break;
2044 case E_V64QImode:
2045 sri->icode = CODE_FOR_reload_outv64qi;
2046 break;
2047 case E_V64DImode:
2048 sri->icode = CODE_FOR_reload_outv64di;
2049 break;
2050 case E_V64DFmode:
2051 sri->icode = CODE_FOR_reload_outv64df;
2052 break;
2053 default:
2054 gcc_unreachable ();
2055 }
2056 break;
2057 }
2058 /* Fallthrough. */
2059 case ADDR_SPACE_LDS:
2060 case ADDR_SPACE_GDS:
2061 case ADDR_SPACE_SCRATCH:
2062 result = (rclass == VGPR_REGS ? NO_REGS : VGPR_REGS);
2063 break;
2064 }
2065 }
2066
2067 if (dump_file && (dump_flags & TDF_DETAILS))
2068 fprintf (dump_file, " <= %s (icode: %s)\n", reg_class_names[result],
2069 get_insn_name (sri->icode));
2070
2071 return result;
2072}
2073
2074/* Update register usage after having seen the compiler flags and kernel
2075 attributes. We typically want to fix registers that contain values
2076 set by the HSA runtime. */
2077
2078static void
2079gcn_conditional_register_usage (void)
2080{
342f9464
KCY
2081 if (!cfun || !cfun->machine)
2082 return;
5326695a 2083
342f9464
KCY
2084 if (cfun->machine->normal_function)
2085 {
2086 /* Restrict the set of SGPRs and VGPRs used by non-kernel functions. */
f062c3f1 2087 for (int i = SGPR_REGNO (MAX_NORMAL_SGPR_COUNT);
87fdbe69 2088 i <= LAST_SGPR_REG; i++)
342f9464 2089 fixed_regs[i] = 1, call_used_regs[i] = 1;
5326695a 2090
87fdbe69
KCY
2091 for (int i = VGPR_REGNO (MAX_NORMAL_VGPR_COUNT);
2092 i <= LAST_VGPR_REG; i++)
342f9464 2093 fixed_regs[i] = 1, call_used_regs[i] = 1;
5326695a 2094
5326695a
AS
2095 return;
2096 }
2097
342f9464
KCY
2098 /* If the set of requested args is the default set, nothing more needs to
2099 be done. */
2100 if (cfun->machine->args.requested == default_requested_args)
2101 return;
2102
2103 /* Requesting a set of args different from the default violates the ABI. */
2104 if (!leaf_function_p ())
2105 warning (0, "A non-default set of initial values has been requested, "
55308fc2 2106 "which violates the ABI");
342f9464
KCY
2107
2108 for (int i = SGPR_REGNO (0); i < SGPR_REGNO (14); i++)
2109 fixed_regs[i] = 0;
2110
5326695a
AS
2111 /* Fix the runtime argument register containing values that may be
2112 needed later. DISPATCH_PTR_ARG and FLAT_SCRATCH_* should not be
2113 needed after the prologue so there's no need to fix them. */
2114 if (cfun->machine->args.reg[PRIVATE_SEGMENT_WAVE_OFFSET_ARG] >= 0)
2115 fixed_regs[cfun->machine->args.reg[PRIVATE_SEGMENT_WAVE_OFFSET_ARG]] = 1;
2116 if (cfun->machine->args.reg[PRIVATE_SEGMENT_BUFFER_ARG] >= 0)
2117 {
342f9464
KCY
2118 /* The upper 32-bits of the 64-bit descriptor are not used, so allow
2119 the containing registers to be used for other purposes. */
5326695a
AS
2120 fixed_regs[cfun->machine->args.reg[PRIVATE_SEGMENT_BUFFER_ARG]] = 1;
2121 fixed_regs[cfun->machine->args.reg[PRIVATE_SEGMENT_BUFFER_ARG] + 1] = 1;
5326695a
AS
2122 }
2123 if (cfun->machine->args.reg[KERNARG_SEGMENT_PTR_ARG] >= 0)
2124 {
2125 fixed_regs[cfun->machine->args.reg[KERNARG_SEGMENT_PTR_ARG]] = 1;
2126 fixed_regs[cfun->machine->args.reg[KERNARG_SEGMENT_PTR_ARG] + 1] = 1;
2127 }
2128 if (cfun->machine->args.reg[DISPATCH_PTR_ARG] >= 0)
2129 {
2130 fixed_regs[cfun->machine->args.reg[DISPATCH_PTR_ARG]] = 1;
2131 fixed_regs[cfun->machine->args.reg[DISPATCH_PTR_ARG] + 1] = 1;
2132 }
2133 if (cfun->machine->args.reg[WORKGROUP_ID_X_ARG] >= 0)
2134 fixed_regs[cfun->machine->args.reg[WORKGROUP_ID_X_ARG]] = 1;
2135 if (cfun->machine->args.reg[WORK_ITEM_ID_X_ARG] >= 0)
2136 fixed_regs[cfun->machine->args.reg[WORK_ITEM_ID_X_ARG]] = 1;
2137 if (cfun->machine->args.reg[WORK_ITEM_ID_Y_ARG] >= 0)
2138 fixed_regs[cfun->machine->args.reg[WORK_ITEM_ID_Y_ARG]] = 1;
2139 if (cfun->machine->args.reg[WORK_ITEM_ID_Z_ARG] >= 0)
2140 fixed_regs[cfun->machine->args.reg[WORK_ITEM_ID_Z_ARG]] = 1;
5326695a
AS
2141}
2142
2143/* Determine if a load or store is valid, according to the register classes
2144 and address space. Used primarily by the machine description to decide
2145 when to split a move into two steps. */
2146
2147bool
2148gcn_valid_move_p (machine_mode mode, rtx dest, rtx src)
2149{
2150 if (!MEM_P (dest) && !MEM_P (src))
2151 return true;
2152
2153 if (MEM_P (dest)
2154 && AS_FLAT_P (MEM_ADDR_SPACE (dest))
2155 && (gcn_flat_address_p (XEXP (dest, 0), mode)
2156 || GET_CODE (XEXP (dest, 0)) == SYMBOL_REF
2157 || GET_CODE (XEXP (dest, 0)) == LABEL_REF)
2158 && gcn_vgpr_register_operand (src, mode))
2159 return true;
2160 else if (MEM_P (src)
2161 && AS_FLAT_P (MEM_ADDR_SPACE (src))
2162 && (gcn_flat_address_p (XEXP (src, 0), mode)
2163 || GET_CODE (XEXP (src, 0)) == SYMBOL_REF
2164 || GET_CODE (XEXP (src, 0)) == LABEL_REF)
2165 && gcn_vgpr_register_operand (dest, mode))
2166 return true;
2167
2168 if (MEM_P (dest)
2169 && AS_GLOBAL_P (MEM_ADDR_SPACE (dest))
2170 && (gcn_global_address_p (XEXP (dest, 0))
2171 || GET_CODE (XEXP (dest, 0)) == SYMBOL_REF
2172 || GET_CODE (XEXP (dest, 0)) == LABEL_REF)
2173 && gcn_vgpr_register_operand (src, mode))
2174 return true;
2175 else if (MEM_P (src)
2176 && AS_GLOBAL_P (MEM_ADDR_SPACE (src))
2177 && (gcn_global_address_p (XEXP (src, 0))
2178 || GET_CODE (XEXP (src, 0)) == SYMBOL_REF
2179 || GET_CODE (XEXP (src, 0)) == LABEL_REF)
2180 && gcn_vgpr_register_operand (dest, mode))
2181 return true;
2182
2183 if (MEM_P (dest)
2184 && MEM_ADDR_SPACE (dest) == ADDR_SPACE_SCALAR_FLAT
2185 && (gcn_scalar_flat_address_p (XEXP (dest, 0))
2186 || GET_CODE (XEXP (dest, 0)) == SYMBOL_REF
2187 || GET_CODE (XEXP (dest, 0)) == LABEL_REF)
2188 && gcn_ssrc_register_operand (src, mode))
2189 return true;
2190 else if (MEM_P (src)
2191 && MEM_ADDR_SPACE (src) == ADDR_SPACE_SCALAR_FLAT
2192 && (gcn_scalar_flat_address_p (XEXP (src, 0))
2193 || GET_CODE (XEXP (src, 0)) == SYMBOL_REF
2194 || GET_CODE (XEXP (src, 0)) == LABEL_REF)
2195 && gcn_sdst_register_operand (dest, mode))
2196 return true;
2197
2198 if (MEM_P (dest)
2199 && AS_ANY_DS_P (MEM_ADDR_SPACE (dest))
2200 && gcn_ds_address_p (XEXP (dest, 0))
2201 && gcn_vgpr_register_operand (src, mode))
2202 return true;
2203 else if (MEM_P (src)
2204 && AS_ANY_DS_P (MEM_ADDR_SPACE (src))
2205 && gcn_ds_address_p (XEXP (src, 0))
2206 && gcn_vgpr_register_operand (dest, mode))
2207 return true;
2208
2209 return false;
2210}
2211
2212/* }}} */
2213/* {{{ Functions and ABI. */
2214
2215/* Implement TARGET_FUNCTION_VALUE.
2216
2217 Define how to find the value returned by a function.
2218 The register location is always the same, but the mode depends on
2219 VALTYPE. */
2220
2221static rtx
2222gcn_function_value (const_tree valtype, const_tree, bool)
2223{
2224 machine_mode mode = TYPE_MODE (valtype);
2225
2226 if (INTEGRAL_TYPE_P (valtype)
2227 && GET_MODE_CLASS (mode) == MODE_INT
2228 && GET_MODE_SIZE (mode) < 4)
2229 mode = SImode;
2230
2231 return gen_rtx_REG (mode, SGPR_REGNO (RETURN_VALUE_REG));
2232}
2233
2234/* Implement TARGET_FUNCTION_VALUE_REGNO_P.
2235
2236 Return true if N is a possible register number for the function return
2237 value. */
2238
2239static bool
2240gcn_function_value_regno_p (const unsigned int n)
2241{
2242 return n == RETURN_VALUE_REG;
2243}
2244
0ffef200
RS
2245/* Calculate the number of registers required to hold function argument
2246 ARG. */
5326695a
AS
2247
2248static int
0ffef200 2249num_arg_regs (const function_arg_info &arg)
5326695a 2250{
0ffef200 2251 if (targetm.calls.must_pass_in_stack (arg))
5326695a
AS
2252 return 0;
2253
0ffef200 2254 int size = arg.promoted_size_in_bytes ();
5326695a
AS
2255 return (size + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
2256}
2257
2258/* Implement TARGET_STRICT_ARGUMENT_NAMING.
2259
2260 Return true if the location where a function argument is passed
2261 depends on whether or not it is a named argument
2262
2263 For gcn, we know how to handle functions declared as stdarg: by
2264 passing an extra pointer to the unnamed arguments. However, the
2265 Fortran frontend can produce a different situation, where a
2266 function pointer is declared with no arguments, but the actual
2267 function and calls to it take more arguments. In that case, we
2268 want to ensure the call matches the definition of the function. */
2269
2270static bool
2271gcn_strict_argument_naming (cumulative_args_t cum_v)
2272{
2273 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
2274
2275 return cum->fntype == NULL_TREE || stdarg_p (cum->fntype);
2276}
2277
2278/* Implement TARGET_PRETEND_OUTGOING_VARARGS_NAMED.
2279
2280 See comment on gcn_strict_argument_naming. */
2281
2282static bool
2283gcn_pretend_outgoing_varargs_named (cumulative_args_t cum_v)
2284{
2285 return !gcn_strict_argument_naming (cum_v);
2286}
2287
2288/* Implement TARGET_FUNCTION_ARG.
2289
2290 Return an RTX indicating whether a function argument is passed in a register
2291 and if so, which register. */
2292
2293static rtx
6783fdb7 2294gcn_function_arg (cumulative_args_t cum_v, const function_arg_info &arg)
5326695a
AS
2295{
2296 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
2297 if (cum->normal_function)
2298 {
6783fdb7 2299 if (!arg.named || arg.end_marker_p ())
5326695a
AS
2300 return 0;
2301
0ffef200 2302 if (targetm.calls.must_pass_in_stack (arg))
5326695a
AS
2303 return 0;
2304
d3249526
AS
2305 /* Vector parameters are not supported yet. */
2306 if (VECTOR_MODE_P (arg.mode))
2307 return 0;
2308
5326695a 2309 int reg_num = FIRST_PARM_REG + cum->num;
0ffef200 2310 int num_regs = num_arg_regs (arg);
5326695a
AS
2311 if (num_regs > 0)
2312 while (reg_num % num_regs != 0)
2313 reg_num++;
2314 if (reg_num + num_regs <= FIRST_PARM_REG + NUM_PARM_REGS)
6783fdb7 2315 return gen_rtx_REG (arg.mode, reg_num);
5326695a
AS
2316 }
2317 else
2318 {
2319 if (cum->num >= cum->args.nargs)
2320 {
6783fdb7
RS
2321 cum->offset = (cum->offset + TYPE_ALIGN (arg.type) / 8 - 1)
2322 & -(TYPE_ALIGN (arg.type) / 8);
5326695a
AS
2323 cfun->machine->kernarg_segment_alignment
2324 = MAX ((unsigned) cfun->machine->kernarg_segment_alignment,
6783fdb7 2325 TYPE_ALIGN (arg.type) / 8);
5326695a
AS
2326 rtx addr = gen_rtx_REG (DImode,
2327 cum->args.reg[KERNARG_SEGMENT_PTR_ARG]);
2328 if (cum->offset)
2329 addr = gen_rtx_PLUS (DImode, addr,
2330 gen_int_mode (cum->offset, DImode));
6783fdb7
RS
2331 rtx mem = gen_rtx_MEM (arg.mode, addr);
2332 set_mem_attributes (mem, arg.type, 1);
5326695a
AS
2333 set_mem_addr_space (mem, ADDR_SPACE_SCALAR_FLAT);
2334 MEM_READONLY_P (mem) = 1;
2335 return mem;
2336 }
2337
2338 int a = cum->args.order[cum->num];
6783fdb7 2339 if (arg.mode != gcn_kernel_arg_types[a].mode)
5326695a
AS
2340 {
2341 error ("wrong type of argument %s", gcn_kernel_arg_types[a].name);
2342 return 0;
2343 }
2344 return gen_rtx_REG ((machine_mode) gcn_kernel_arg_types[a].mode,
2345 cum->args.reg[a]);
2346 }
2347 return 0;
2348}
2349
2350/* Implement TARGET_FUNCTION_ARG_ADVANCE.
2351
2352 Updates the summarizer variable pointed to by CUM_V to advance past an
2353 argument in the argument list. */
2354
2355static void
6930c98c
RS
2356gcn_function_arg_advance (cumulative_args_t cum_v,
2357 const function_arg_info &arg)
5326695a
AS
2358{
2359 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
2360
2361 if (cum->normal_function)
2362 {
6930c98c 2363 if (!arg.named)
5326695a
AS
2364 return;
2365
0ffef200 2366 int num_regs = num_arg_regs (arg);
5326695a
AS
2367 if (num_regs > 0)
2368 while ((FIRST_PARM_REG + cum->num) % num_regs != 0)
2369 cum->num++;
2370 cum->num += num_regs;
2371 }
2372 else
2373 {
2374 if (cum->num < cum->args.nargs)
2375 cum->num++;
2376 else
2377 {
6930c98c 2378 cum->offset += tree_to_uhwi (TYPE_SIZE_UNIT (arg.type));
5326695a
AS
2379 cfun->machine->kernarg_segment_byte_size = cum->offset;
2380 }
2381 }
2382}
2383
2384/* Implement TARGET_ARG_PARTIAL_BYTES.
2385
2386 Returns the number of bytes at the beginning of an argument that must be put
2387 in registers. The value must be zero for arguments that are passed entirely
2388 in registers or that are entirely pushed on the stack. */
2389
2390static int
a7c81bc1 2391gcn_arg_partial_bytes (cumulative_args_t cum_v, const function_arg_info &arg)
5326695a
AS
2392{
2393 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
2394
a7c81bc1 2395 if (!arg.named)
5326695a
AS
2396 return 0;
2397
0ffef200 2398 if (targetm.calls.must_pass_in_stack (arg))
5326695a
AS
2399 return 0;
2400
2401 if (cum->num >= NUM_PARM_REGS)
2402 return 0;
2403
2404 /* If the argument fits entirely in registers, return 0. */
0ffef200 2405 if (cum->num + num_arg_regs (arg) <= NUM_PARM_REGS)
5326695a
AS
2406 return 0;
2407
2408 return (NUM_PARM_REGS - cum->num) * UNITS_PER_WORD;
2409}
2410
2411/* A normal function which takes a pointer argument (to a scalar) may be
2412 passed a pointer to LDS space (via a high-bits-set aperture), and that only
2413 works with FLAT addressing, not GLOBAL. Force FLAT addressing if the
2414 function has an incoming pointer-to-scalar parameter. */
2415
2416static void
2417gcn_detect_incoming_pointer_arg (tree fndecl)
2418{
2419 gcc_assert (cfun && cfun->machine);
2420
2421 for (tree arg = TYPE_ARG_TYPES (TREE_TYPE (fndecl));
2422 arg;
2423 arg = TREE_CHAIN (arg))
2424 if (POINTER_TYPE_P (TREE_VALUE (arg))
2425 && !AGGREGATE_TYPE_P (TREE_TYPE (TREE_VALUE (arg))))
2426 cfun->machine->use_flat_addressing = true;
2427}
2428
2429/* Implement INIT_CUMULATIVE_ARGS, via gcn.h.
2430
2431 Initialize a variable CUM of type CUMULATIVE_ARGS for a call to a function
2432 whose data type is FNTYPE. For a library call, FNTYPE is 0. */
2433
2434void
2435gcn_init_cumulative_args (CUMULATIVE_ARGS *cum /* Argument info to init */ ,
2436 tree fntype /* tree ptr for function decl */ ,
2437 rtx libname /* SYMBOL_REF of library name or 0 */ ,
2438 tree fndecl, int caller)
2439{
2440 memset (cum, 0, sizeof (*cum));
2441 cum->fntype = fntype;
2442 if (libname)
2443 {
2444 gcc_assert (cfun && cfun->machine);
2445 cum->normal_function = true;
2446 if (!caller)
2447 {
2448 cfun->machine->normal_function = true;
2449 gcn_detect_incoming_pointer_arg (fndecl);
2450 }
2451 return;
2452 }
2453 tree attr = NULL;
2454 if (fndecl)
2455 attr = lookup_attribute ("amdgpu_hsa_kernel", DECL_ATTRIBUTES (fndecl));
2456 if (fndecl && !attr)
2457 attr = lookup_attribute ("amdgpu_hsa_kernel",
2458 TYPE_ATTRIBUTES (TREE_TYPE (fndecl)));
2459 if (!attr && fntype)
2460 attr = lookup_attribute ("amdgpu_hsa_kernel", TYPE_ATTRIBUTES (fntype));
2461 /* Handle main () as kernel, so we can run testsuite.
2462 Handle OpenACC kernels similarly to main. */
2463 if (!attr && !caller && fndecl
2464 && (MAIN_NAME_P (DECL_NAME (fndecl))
2465 || lookup_attribute ("omp target entrypoint",
2466 DECL_ATTRIBUTES (fndecl)) != NULL_TREE))
2467 gcn_parse_amdgpu_hsa_kernel_attribute (&cum->args, NULL_TREE);
2468 else
2469 {
2470 if (!attr || caller)
2471 {
2472 gcc_assert (cfun && cfun->machine);
2473 cum->normal_function = true;
2474 if (!caller)
2475 cfun->machine->normal_function = true;
2476 }
2477 gcn_parse_amdgpu_hsa_kernel_attribute
2478 (&cum->args, attr ? TREE_VALUE (attr) : NULL_TREE);
2479 }
2480 cfun->machine->args = cum->args;
2481 if (!caller && cfun->machine->normal_function)
2482 gcn_detect_incoming_pointer_arg (fndecl);
3ed8f692
KCY
2483
2484 reinit_regs ();
5326695a
AS
2485}
2486
2487static bool
2488gcn_return_in_memory (const_tree type, const_tree ARG_UNUSED (fntype))
2489{
2490 machine_mode mode = TYPE_MODE (type);
2491 HOST_WIDE_INT size = int_size_in_bytes (type);
2492
2493 if (AGGREGATE_TYPE_P (type))
2494 return true;
2495
d3249526
AS
2496 /* Vector return values are not supported yet. */
2497 if (VECTOR_TYPE_P (type))
2498 return true;
2499
5326695a
AS
2500 if (mode == BLKmode)
2501 return true;
2502
2503 if (size > 2 * UNITS_PER_WORD)
2504 return true;
2505
2506 return false;
2507}
2508
2509/* Implement TARGET_PROMOTE_FUNCTION_MODE.
2510
2511 Return the mode to use for outgoing function arguments. */
2512
2513machine_mode
2514gcn_promote_function_mode (const_tree ARG_UNUSED (type), machine_mode mode,
2515 int *ARG_UNUSED (punsignedp),
2516 const_tree ARG_UNUSED (funtype),
2517 int ARG_UNUSED (for_return))
2518{
2519 if (GET_MODE_CLASS (mode) == MODE_INT && GET_MODE_SIZE (mode) < 4)
2520 return SImode;
2521
2522 return mode;
2523}
2524
2525/* Implement TARGET_GIMPLIFY_VA_ARG_EXPR.
2526
2527 Derived from hppa_gimplify_va_arg_expr. The generic routine doesn't handle
2528 ARGS_GROW_DOWNWARDS. */
2529
2530static tree
2531gcn_gimplify_va_arg_expr (tree valist, tree type,
2532 gimple_seq *ARG_UNUSED (pre_p),
2533 gimple_seq *ARG_UNUSED (post_p))
2534{
2535 tree ptr = build_pointer_type (type);
2536 tree valist_type;
2537 tree t, u;
2538 bool indirect;
2539
fde65a89 2540 indirect = pass_va_arg_by_reference (type);
5326695a
AS
2541 if (indirect)
2542 {
2543 type = ptr;
2544 ptr = build_pointer_type (type);
2545 }
2546 valist_type = TREE_TYPE (valist);
2547
2548 /* Args grow down. Not handled by generic routines. */
2549
2550 u = fold_convert (sizetype, size_in_bytes (type));
2551 u = fold_build1 (NEGATE_EXPR, sizetype, u);
2552 t = fold_build_pointer_plus (valist, u);
2553
2554 /* Align to 8 byte boundary. */
2555
2556 u = build_int_cst (TREE_TYPE (t), -8);
2557 t = build2 (BIT_AND_EXPR, TREE_TYPE (t), t, u);
2558 t = fold_convert (valist_type, t);
2559
2560 t = build2 (MODIFY_EXPR, valist_type, valist, t);
2561
2562 t = fold_convert (ptr, t);
2563 t = build_va_arg_indirect_ref (t);
2564
2565 if (indirect)
2566 t = build_va_arg_indirect_ref (t);
2567
2568 return t;
2569}
2570
955cd057
TB
2571/* Return 1 if TRAIT NAME is present in the OpenMP context's
2572 device trait set, return 0 if not present in any OpenMP context in the
2573 whole translation unit, or -1 if not present in the current OpenMP context
2574 but might be present in another OpenMP context in the same TU. */
2575
2576int
2577gcn_omp_device_kind_arch_isa (enum omp_device_kind_arch_isa trait,
2578 const char *name)
2579{
2580 switch (trait)
2581 {
2582 case omp_device_kind:
2583 return strcmp (name, "gpu") == 0;
2584 case omp_device_arch:
2585 return strcmp (name, "gcn") == 0;
2586 case omp_device_isa:
955cd057
TB
2587 if (strcmp (name, "fiji") == 0)
2588 return gcn_arch == PROCESSOR_FIJI;
2589 if (strcmp (name, "gfx900") == 0)
f062c3f1 2590 return gcn_arch == PROCESSOR_VEGA10;
955cd057 2591 if (strcmp (name, "gfx906") == 0)
f062c3f1 2592 return gcn_arch == PROCESSOR_VEGA20;
3535402e
AS
2593 if (strcmp (name, "gfx908") == 0)
2594 return gcn_arch == PROCESSOR_GFX908;
955cd057
TB
2595 return 0;
2596 default:
2597 gcc_unreachable ();
2598 }
2599}
2600
5326695a
AS
2601/* Calculate stack offsets needed to create prologues and epilogues. */
2602
2603static struct machine_function *
2604gcn_compute_frame_offsets (void)
2605{
2606 machine_function *offsets = cfun->machine;
2607
2608 if (reload_completed)
2609 return offsets;
2610
2611 offsets->need_frame_pointer = frame_pointer_needed;
2612
2613 offsets->outgoing_args_size = crtl->outgoing_args_size;
2614 offsets->pretend_size = crtl->args.pretend_args_size;
2615
2616 offsets->local_vars = get_frame_size ();
2617
2618 offsets->lr_needs_saving = (!leaf_function_p ()
2619 || df_regs_ever_live_p (LR_REGNUM)
2620 || df_regs_ever_live_p (LR_REGNUM + 1));
2621
2622 offsets->callee_saves = offsets->lr_needs_saving ? 8 : 0;
2623
2624 for (int regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
a365fa06 2625 if ((df_regs_ever_live_p (regno) && !call_used_or_fixed_reg_p (regno))
5326695a
AS
2626 || ((regno & ~1) == HARD_FRAME_POINTER_REGNUM
2627 && frame_pointer_needed))
2628 offsets->callee_saves += (VGPR_REGNO_P (regno) ? 256 : 4);
2629
2630 /* Round up to 64-bit boundary to maintain stack alignment. */
2631 offsets->callee_saves = (offsets->callee_saves + 7) & ~7;
2632
2633 return offsets;
2634}
2635
2636/* Insert code into the prologue or epilogue to store or load any
2637 callee-save register to/from the stack.
2638
2639 Helper function for gcn_expand_prologue and gcn_expand_epilogue. */
2640
2641static void
2642move_callee_saved_registers (rtx sp, machine_function *offsets,
2643 bool prologue)
2644{
2645 int regno, offset, saved_scalars;
2646 rtx exec = gen_rtx_REG (DImode, EXEC_REG);
2647 rtx vcc = gen_rtx_REG (DImode, VCC_LO_REG);
2648 rtx offreg = gen_rtx_REG (SImode, SGPR_REGNO (22));
2649 rtx as = gen_rtx_CONST_INT (VOIDmode, STACK_ADDR_SPACE);
2650 HOST_WIDE_INT exec_set = 0;
2651 int offreg_set = 0;
2652
2653 start_sequence ();
2654
2655 /* Move scalars into two vector registers. */
2656 for (regno = 0, saved_scalars = 0; regno < FIRST_VGPR_REG; regno++)
a365fa06 2657 if ((df_regs_ever_live_p (regno) && !call_used_or_fixed_reg_p (regno))
5326695a
AS
2658 || ((regno & ~1) == LINK_REGNUM && offsets->lr_needs_saving)
2659 || ((regno & ~1) == HARD_FRAME_POINTER_REGNUM
2660 && offsets->need_frame_pointer))
2661 {
2662 rtx reg = gen_rtx_REG (SImode, regno);
2663 rtx vreg = gen_rtx_REG (V64SImode,
2664 VGPR_REGNO (6 + (saved_scalars / 64)));
2665 int lane = saved_scalars % 64;
2666
2667 if (prologue)
2668 emit_insn (gen_vec_setv64si (vreg, reg, GEN_INT (lane)));
2669 else
2670 emit_insn (gen_vec_extractv64sisi (reg, vreg, GEN_INT (lane)));
2671
2672 saved_scalars++;
2673 }
2674
2675 rtx move_scalars = get_insns ();
2676 end_sequence ();
2677 start_sequence ();
2678
2679 /* Ensure that all vector lanes are moved. */
2680 exec_set = -1;
2681 emit_move_insn (exec, GEN_INT (exec_set));
2682
2683 /* Set up a vector stack pointer. */
2684 rtx _0_1_2_3 = gen_rtx_REG (V64SImode, VGPR_REGNO (1));
2685 rtx _0_4_8_12 = gen_rtx_REG (V64SImode, VGPR_REGNO (3));
2686 emit_insn (gen_ashlv64si3_exec (_0_4_8_12, _0_1_2_3, GEN_INT (2),
2687 gcn_gen_undef (V64SImode), exec));
2688 rtx vsp = gen_rtx_REG (V64DImode, VGPR_REGNO (4));
2689 emit_insn (gen_vec_duplicatev64di_exec (vsp, sp, gcn_gen_undef (V64DImode),
2690 exec));
2691 emit_insn (gen_addv64si3_vcc_exec (gcn_operand_part (V64SImode, vsp, 0),
2692 gcn_operand_part (V64SImode, vsp, 0),
2693 _0_4_8_12, vcc, gcn_gen_undef (V64SImode),
2694 exec));
2695 emit_insn (gen_addcv64si3_exec (gcn_operand_part (V64SImode, vsp, 1),
2696 gcn_operand_part (V64SImode, vsp, 1),
2697 const0_rtx, vcc, vcc,
2698 gcn_gen_undef (V64SImode), exec));
2699
2700 /* Move vectors. */
2701 for (regno = FIRST_VGPR_REG, offset = offsets->pretend_size;
2702 regno < FIRST_PSEUDO_REGISTER; regno++)
a365fa06 2703 if ((df_regs_ever_live_p (regno) && !call_used_or_fixed_reg_p (regno))
5326695a
AS
2704 || (regno == VGPR_REGNO (6) && saved_scalars > 0)
2705 || (regno == VGPR_REGNO (7) && saved_scalars > 63))
2706 {
2707 rtx reg = gen_rtx_REG (V64SImode, regno);
2708 int size = 256;
2709
2710 if (regno == VGPR_REGNO (6) && saved_scalars < 64)
2711 size = saved_scalars * 4;
2712 else if (regno == VGPR_REGNO (7) && saved_scalars < 128)
2713 size = (saved_scalars - 64) * 4;
2714
2715 if (size != 256 || exec_set != -1)
2716 {
2717 exec_set = ((unsigned HOST_WIDE_INT) 1 << (size / 4)) - 1;
2718 emit_move_insn (exec, gen_int_mode (exec_set, DImode));
2719 }
2720
2721 if (prologue)
2722 emit_insn (gen_scatterv64si_insn_1offset_exec (vsp, const0_rtx, reg,
2723 as, const0_rtx, exec));
2724 else
2725 emit_insn (gen_gatherv64si_insn_1offset_exec
2726 (reg, vsp, const0_rtx, as, const0_rtx,
2727 gcn_gen_undef (V64SImode), exec));
2728
2729 /* Move our VSP to the next stack entry. */
2730 if (offreg_set != size)
2731 {
2732 offreg_set = size;
2733 emit_move_insn (offreg, GEN_INT (size));
2734 }
2735 if (exec_set != -1)
2736 {
2737 exec_set = -1;
2738 emit_move_insn (exec, GEN_INT (exec_set));
2739 }
2740 emit_insn (gen_addv64si3_vcc_dup_exec
2741 (gcn_operand_part (V64SImode, vsp, 0),
2742 offreg, gcn_operand_part (V64SImode, vsp, 0),
2743 vcc, gcn_gen_undef (V64SImode), exec));
2744 emit_insn (gen_addcv64si3_exec
2745 (gcn_operand_part (V64SImode, vsp, 1),
2746 gcn_operand_part (V64SImode, vsp, 1),
2747 const0_rtx, vcc, vcc, gcn_gen_undef (V64SImode), exec));
2748
2749 offset += size;
2750 }
2751
2752 rtx move_vectors = get_insns ();
2753 end_sequence ();
2754
2755 if (prologue)
2756 {
2757 emit_insn (move_scalars);
2758 emit_insn (move_vectors);
2759 }
2760 else
2761 {
2762 emit_insn (move_vectors);
2763 emit_insn (move_scalars);
2764 }
2765}
2766
2767/* Generate prologue. Called from gen_prologue during pro_and_epilogue pass.
2768
2769 For a non-kernel function, the stack layout looks like this (interim),
2770 growing *upwards*:
2771
2772 hi | + ...
2773 |__________________| <-- current SP
2774 | outgoing args |
2775 |__________________|
2776 | (alloca space) |
2777 |__________________|
2778 | local vars |
2779 |__________________| <-- FP/hard FP
2780 | callee-save regs |
2781 |__________________| <-- soft arg pointer
2782 | pretend args |
2783 |__________________| <-- incoming SP
2784 | incoming args |
2785 lo |..................|
2786
2787 This implies arguments (beyond the first N in registers) must grow
2788 downwards (as, apparently, PA has them do).
2789
2790 For a kernel function we have the simpler:
2791
2792 hi | + ...
2793 |__________________| <-- current SP
2794 | outgoing args |
2795 |__________________|
2796 | (alloca space) |
2797 |__________________|
2798 | local vars |
2799 lo |__________________| <-- FP/hard FP
2800
2801*/
2802
2803void
2804gcn_expand_prologue ()
2805{
2806 machine_function *offsets = gcn_compute_frame_offsets ();
2807
2808 if (!cfun || !cfun->machine || cfun->machine->normal_function)
2809 {
2810 rtx sp = gen_rtx_REG (Pmode, STACK_POINTER_REGNUM);
eff23b79
AS
2811 rtx sp_hi = gcn_operand_part (Pmode, sp, 1);
2812 rtx sp_lo = gcn_operand_part (Pmode, sp, 0);
5326695a 2813 rtx fp = gen_rtx_REG (Pmode, HARD_FRAME_POINTER_REGNUM);
eff23b79
AS
2814 rtx fp_hi = gcn_operand_part (Pmode, fp, 1);
2815 rtx fp_lo = gcn_operand_part (Pmode, fp, 0);
5326695a
AS
2816
2817 start_sequence ();
2818
2819 if (offsets->pretend_size > 0)
2820 {
2821 /* FIXME: Do the actual saving of register pretend args to the stack.
2822 Register order needs consideration. */
2823 }
2824
2825 /* Save callee-save regs. */
2826 move_callee_saved_registers (sp, offsets, true);
2827
2828 HOST_WIDE_INT sp_adjust = offsets->pretend_size
2829 + offsets->callee_saves
2830 + offsets->local_vars + offsets->outgoing_args_size;
2831 if (sp_adjust > 0)
eff23b79
AS
2832 {
2833 /* Adding RTX_FRAME_RELATED_P effectively disables spliting, so
2834 we use split add explictly, and specify the DImode add in
2835 the note. */
2836 rtx scc = gen_rtx_REG (BImode, SCC_REG);
2837 rtx adjustment = gen_int_mode (sp_adjust, SImode);
2838 rtx insn = emit_insn (gen_addsi3_scalar_carry (sp_lo, sp_lo,
2839 adjustment, scc));
2840 RTX_FRAME_RELATED_P (insn) = 1;
2841 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
2842 gen_rtx_SET (sp,
2843 gen_rtx_PLUS (DImode, sp, adjustment)));
2844 emit_insn (gen_addcsi3_scalar_zero (sp_hi, sp_hi, scc));
2845 }
5326695a
AS
2846
2847 if (offsets->need_frame_pointer)
eff23b79
AS
2848 {
2849 /* Adding RTX_FRAME_RELATED_P effectively disables spliting, so
2850 we use split add explictly, and specify the DImode add in
2851 the note. */
2852 rtx scc = gen_rtx_REG (BImode, SCC_REG);
2853 int fp_adjust = -(offsets->local_vars + offsets->outgoing_args_size);
2854 rtx adjustment = gen_int_mode (fp_adjust, SImode);
2855 rtx insn = emit_insn (gen_addsi3_scalar_carry(fp_lo, sp_lo,
2856 adjustment, scc));
2857 RTX_FRAME_RELATED_P (insn) = 1;
2858 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
2859 gen_rtx_SET (fp,
2860 gen_rtx_PLUS (DImode, sp, adjustment)));
2861 emit_insn (gen_addcsi3_scalar (fp_hi, sp_hi,
2862 (fp_adjust < 0 ? GEN_INT (-1)
2863 : const0_rtx),
2864 scc, scc));
2865 }
5326695a
AS
2866
2867 rtx_insn *seq = get_insns ();
2868 end_sequence ();
2869
2870 /* FIXME: Prologue insns should have this flag set for debug output, etc.
2871 but it causes issues for now.
2872 for (insn = seq; insn; insn = NEXT_INSN (insn))
2873 if (INSN_P (insn))
2874 RTX_FRAME_RELATED_P (insn) = 1;*/
2875
2876 emit_insn (seq);
2877 }
2878 else
2879 {
2880 rtx wave_offset = gen_rtx_REG (SImode,
2881 cfun->machine->args.
2882 reg[PRIVATE_SEGMENT_WAVE_OFFSET_ARG]);
2883
5326695a
AS
2884 if (cfun->machine->args.requested & (1 << FLAT_SCRATCH_INIT_ARG))
2885 {
2886 rtx fs_init_lo =
2887 gen_rtx_REG (SImode,
2888 cfun->machine->args.reg[FLAT_SCRATCH_INIT_ARG]);
2889 rtx fs_init_hi =
2890 gen_rtx_REG (SImode,
2891 cfun->machine->args.reg[FLAT_SCRATCH_INIT_ARG] + 1);
2892 rtx fs_reg_lo = gen_rtx_REG (SImode, FLAT_SCRATCH_REG);
2893 rtx fs_reg_hi = gen_rtx_REG (SImode, FLAT_SCRATCH_REG + 1);
2894
2895 /*rtx queue = gen_rtx_REG(DImode,
2896 cfun->machine->args.reg[QUEUE_PTR_ARG]);
2897 rtx aperture = gen_rtx_MEM (SImode,
2898 gen_rtx_PLUS (DImode, queue,
2899 gen_int_mode (68, SImode)));
2900 set_mem_addr_space (aperture, ADDR_SPACE_SCALAR_FLAT);*/
2901
2902 /* Set up flat_scratch. */
2903 emit_insn (gen_addsi3_scc (fs_reg_hi, fs_init_lo, wave_offset));
2904 emit_insn (gen_lshrsi3_scc (fs_reg_hi, fs_reg_hi,
2905 gen_int_mode (8, SImode)));
2906 emit_move_insn (fs_reg_lo, fs_init_hi);
2907 }
2908
2909 /* Set up frame pointer and stack pointer. */
2910 rtx sp = gen_rtx_REG (DImode, STACK_POINTER_REGNUM);
eff23b79
AS
2911 rtx sp_hi = simplify_gen_subreg (SImode, sp, DImode, 4);
2912 rtx sp_lo = simplify_gen_subreg (SImode, sp, DImode, 0);
5326695a
AS
2913 rtx fp = gen_rtx_REG (DImode, HARD_FRAME_POINTER_REGNUM);
2914 rtx fp_hi = simplify_gen_subreg (SImode, fp, DImode, 4);
2915 rtx fp_lo = simplify_gen_subreg (SImode, fp, DImode, 0);
2916
2917 HOST_WIDE_INT sp_adjust = (offsets->local_vars
2918 + offsets->outgoing_args_size);
2919
2920 /* Initialise FP and SP from the buffer descriptor in s[0:3]. */
2921 emit_move_insn (fp_lo, gen_rtx_REG (SImode, 0));
2922 emit_insn (gen_andsi3_scc (fp_hi, gen_rtx_REG (SImode, 1),
2923 gen_int_mode (0xffff, SImode)));
3258c2d6
AS
2924 rtx scc = gen_rtx_REG (BImode, SCC_REG);
2925 emit_insn (gen_addsi3_scalar_carry (fp_lo, fp_lo, wave_offset, scc));
2926 emit_insn (gen_addcsi3_scalar_zero (fp_hi, fp_hi, scc));
5326695a 2927
eff23b79
AS
2928 /* Adding RTX_FRAME_RELATED_P effectively disables spliting, so we use
2929 split add explictly, and specify the DImode add in the note.
2930 The DWARF info expects that the callee-save data is in the frame,
2931 even though it isn't (because this is the entry point), so we
2932 make a notional adjustment to the DWARF frame offset here. */
2933 rtx dbg_adjustment = gen_int_mode (sp_adjust + offsets->callee_saves,
2934 DImode);
2935 rtx insn;
5326695a 2936 if (sp_adjust > 0)
eff23b79
AS
2937 {
2938 rtx scc = gen_rtx_REG (BImode, SCC_REG);
2939 rtx adjustment = gen_int_mode (sp_adjust, DImode);
2940 insn = emit_insn (gen_addsi3_scalar_carry(sp_lo, fp_lo, adjustment,
2941 scc));
2942 emit_insn (gen_addcsi3_scalar_zero (sp_hi, fp_hi, scc));
2943 }
5326695a 2944 else
eff23b79
AS
2945 insn = emit_move_insn (sp, fp);
2946 RTX_FRAME_RELATED_P (insn) = 1;
2947 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
2948 gen_rtx_SET (sp, gen_rtx_PLUS (DImode, sp,
2949 dbg_adjustment)));
5326695a
AS
2950
2951 /* Make sure the flat scratch reg doesn't get optimised away. */
2952 emit_insn (gen_prologue_use (gen_rtx_REG (DImode, FLAT_SCRATCH_REG)));
2953 }
2954
2955 /* Ensure that the scheduler doesn't do anything unexpected. */
2956 emit_insn (gen_blockage ());
2957
86b0eb81
AS
2958 /* m0 is initialized for the usual LDS DS and FLAT memory case.
2959 The low-part is the address of the topmost addressable byte, which is
2960 size-1. The high-part is an offset and should be zero. */
5326695a 2961 emit_move_insn (gen_rtx_REG (SImode, M0_REG),
86b0eb81 2962 gen_int_mode (LDS_SIZE-1, SImode));
5326695a
AS
2963
2964 emit_insn (gen_prologue_use (gen_rtx_REG (SImode, M0_REG)));
5326695a
AS
2965
2966 if (cfun && cfun->machine && !cfun->machine->normal_function && flag_openmp)
2967 {
2968 /* OpenMP kernels have an implicit call to gomp_gcn_enter_kernel. */
2969 rtx fn_reg = gen_rtx_REG (Pmode, FIRST_PARM_REG);
2970 emit_move_insn (fn_reg, gen_rtx_SYMBOL_REF (Pmode,
2971 "gomp_gcn_enter_kernel"));
2972 emit_call_insn (gen_gcn_indirect_call (fn_reg, const0_rtx));
2973 }
2974}
2975
2976/* Generate epilogue. Called from gen_epilogue during pro_and_epilogue pass.
2977
2978 See gcn_expand_prologue for stack details. */
2979
2980void
2981gcn_expand_epilogue (void)
2982{
2983 /* Ensure that the scheduler doesn't do anything unexpected. */
2984 emit_insn (gen_blockage ());
2985
2986 if (!cfun || !cfun->machine || cfun->machine->normal_function)
2987 {
2988 machine_function *offsets = gcn_compute_frame_offsets ();
2989 rtx sp = gen_rtx_REG (Pmode, STACK_POINTER_REGNUM);
2990 rtx fp = gen_rtx_REG (Pmode, HARD_FRAME_POINTER_REGNUM);
2991
2992 HOST_WIDE_INT sp_adjust = offsets->callee_saves + offsets->pretend_size;
2993
2994 if (offsets->need_frame_pointer)
2995 {
2996 /* Restore old SP from the frame pointer. */
2997 if (sp_adjust > 0)
2998 emit_insn (gen_subdi3 (sp, fp, gen_int_mode (sp_adjust, DImode)));
2999 else
3000 emit_move_insn (sp, fp);
3001 }
3002 else
3003 {
3004 /* Restore old SP from current SP. */
3005 sp_adjust += offsets->outgoing_args_size + offsets->local_vars;
3006
3007 if (sp_adjust > 0)
3008 emit_insn (gen_subdi3 (sp, sp, gen_int_mode (sp_adjust, DImode)));
3009 }
3010
3011 move_callee_saved_registers (sp, offsets, false);
3012
3013 /* There's no explicit use of the link register on the return insn. Emit
3014 one here instead. */
3015 if (offsets->lr_needs_saving)
3016 emit_use (gen_rtx_REG (DImode, LINK_REGNUM));
3017
3018 /* Similar for frame pointer. */
3019 if (offsets->need_frame_pointer)
3020 emit_use (gen_rtx_REG (DImode, HARD_FRAME_POINTER_REGNUM));
3021 }
3022 else if (flag_openmp)
3023 {
3024 /* OpenMP kernels have an implicit call to gomp_gcn_exit_kernel. */
3025 rtx fn_reg = gen_rtx_REG (Pmode, FIRST_PARM_REG);
3026 emit_move_insn (fn_reg,
3027 gen_rtx_SYMBOL_REF (Pmode, "gomp_gcn_exit_kernel"));
3028 emit_call_insn (gen_gcn_indirect_call (fn_reg, const0_rtx));
3029 }
3030 else if (TREE_CODE (TREE_TYPE (DECL_RESULT (cfun->decl))) != VOID_TYPE)
3031 {
3032 /* Assume that an exit value compatible with gcn-run is expected.
3033 That is, the third input parameter is an int*.
3034
3035 We can't allocate any new registers, but the kernarg_reg is
3036 dead after this, so we'll use that. */
3037 rtx kernarg_reg = gen_rtx_REG (DImode, cfun->machine->args.reg
3038 [KERNARG_SEGMENT_PTR_ARG]);
3039 rtx retptr_mem = gen_rtx_MEM (DImode,
3040 gen_rtx_PLUS (DImode, kernarg_reg,
3041 GEN_INT (16)));
3042 set_mem_addr_space (retptr_mem, ADDR_SPACE_SCALAR_FLAT);
3043 emit_move_insn (kernarg_reg, retptr_mem);
3044
3045 rtx retval_mem = gen_rtx_MEM (SImode, kernarg_reg);
3046 set_mem_addr_space (retval_mem, ADDR_SPACE_SCALAR_FLAT);
3047 emit_move_insn (retval_mem,
3048 gen_rtx_REG (SImode, SGPR_REGNO (RETURN_VALUE_REG)));
3049 }
3050
3051 emit_jump_insn (gen_gcn_return ());
3052}
3053
3054/* Implement TARGET_CAN_ELIMINATE.
3055
3056 Return true if the compiler is allowed to try to replace register number
3057 FROM_REG with register number TO_REG.
3058
3059 FIXME: is the default "true" not enough? Should this be a negative set? */
3060
3061bool
3062gcn_can_eliminate_p (int /*from_reg */ , int to_reg)
3063{
3064 return (to_reg == HARD_FRAME_POINTER_REGNUM
3065 || to_reg == STACK_POINTER_REGNUM);
3066}
3067
3068/* Implement INITIAL_ELIMINATION_OFFSET.
3069
3070 Returns the initial difference between the specified pair of registers, in
3071 terms of stack position. */
3072
3073HOST_WIDE_INT
3074gcn_initial_elimination_offset (int from, int to)
3075{
3076 machine_function *offsets = gcn_compute_frame_offsets ();
3077
3078 switch (from)
3079 {
3080 case ARG_POINTER_REGNUM:
3081 if (to == STACK_POINTER_REGNUM)
3082 return -(offsets->callee_saves + offsets->local_vars
3083 + offsets->outgoing_args_size);
3084 else if (to == FRAME_POINTER_REGNUM || to == HARD_FRAME_POINTER_REGNUM)
3085 return -offsets->callee_saves;
3086 else
3087 gcc_unreachable ();
3088 break;
3089
3090 case FRAME_POINTER_REGNUM:
3091 if (to == STACK_POINTER_REGNUM)
3092 return -(offsets->local_vars + offsets->outgoing_args_size);
3093 else if (to == HARD_FRAME_POINTER_REGNUM)
3094 return 0;
3095 else
3096 gcc_unreachable ();
3097 break;
3098
3099 default:
3100 gcc_unreachable ();
3101 }
3102}
3103
3104/* Implement HARD_REGNO_RENAME_OK.
3105
3106 Return true if it is permissible to rename a hard register from
3107 FROM_REG to TO_REG. */
3108
3109bool
3110gcn_hard_regno_rename_ok (unsigned int from_reg, unsigned int to_reg)
3111{
3112 if (from_reg == SCC_REG
3113 || from_reg == VCC_LO_REG || from_reg == VCC_HI_REG
3114 || from_reg == EXEC_LO_REG || from_reg == EXEC_HI_REG
3115 || to_reg == SCC_REG
3116 || to_reg == VCC_LO_REG || to_reg == VCC_HI_REG
3117 || to_reg == EXEC_LO_REG || to_reg == EXEC_HI_REG)
3118 return false;
3119
3120 /* Allow the link register to be used if it was saved. */
3121 if ((to_reg & ~1) == LINK_REGNUM)
3122 return !cfun || cfun->machine->lr_needs_saving;
3123
3124 /* Allow the registers used for the static chain to be used if the chain is
3125 not in active use. */
3126 if ((to_reg & ~1) == STATIC_CHAIN_REGNUM)
3127 return !cfun
3128 || !(cfun->static_chain_decl
3129 && df_regs_ever_live_p (STATIC_CHAIN_REGNUM)
3130 && df_regs_ever_live_p (STATIC_CHAIN_REGNUM + 1));
3131
3132 return true;
3133}
3134
3135/* Implement HARD_REGNO_CALLER_SAVE_MODE.
3136
3137 Which mode is required for saving NREGS of a pseudo-register in
3138 call-clobbered hard register REGNO. */
3139
3140machine_mode
3141gcn_hard_regno_caller_save_mode (unsigned int regno, unsigned int nregs,
3142 machine_mode regmode)
3143{
737d6a1a 3144 machine_mode result = choose_hard_reg_mode (regno, nregs, NULL);
5326695a
AS
3145
3146 if (VECTOR_MODE_P (result) && !VECTOR_MODE_P (regmode))
3147 result = (nregs == 1 ? SImode : DImode);
3148
3149 return result;
3150}
3151
3152/* Implement TARGET_ASM_TRAMPOLINE_TEMPLATE.
3153
3154 Output assembler code for a block containing the constant parts
3155 of a trampoline, leaving space for the variable parts. */
3156
3157static void
3158gcn_asm_trampoline_template (FILE *f)
3159{
3160 /* The source operand of the move instructions must be a 32-bit
3161 constant following the opcode. */
3162 asm_fprintf (f, "\ts_mov_b32\ts%i, 0xffff\n", STATIC_CHAIN_REGNUM);
3163 asm_fprintf (f, "\ts_mov_b32\ts%i, 0xffff\n", STATIC_CHAIN_REGNUM + 1);
3164 asm_fprintf (f, "\ts_mov_b32\ts%i, 0xffff\n", CC_SAVE_REG);
3165 asm_fprintf (f, "\ts_mov_b32\ts%i, 0xffff\n", CC_SAVE_REG + 1);
3166 asm_fprintf (f, "\ts_setpc_b64\ts[%i:%i]\n", CC_SAVE_REG, CC_SAVE_REG + 1);
3167}
3168
3169/* Implement TARGET_TRAMPOLINE_INIT.
3170
3171 Emit RTL insns to initialize the variable parts of a trampoline.
3172 FNDECL is the decl of the target address, M_TRAMP is a MEM for
3173 the trampoline, and CHAIN_VALUE is an RTX for the static chain
3174 to be passed to the target function. */
3175
3176static void
3177gcn_trampoline_init (rtx m_tramp, tree fndecl, rtx chain_value)
3178{
b7c28a47
AS
3179 if (TARGET_GCN5_PLUS)
3180 sorry ("nested function trampolines not supported on GCN5 due to"
3181 " non-executable stacks");
3182
5326695a
AS
3183 emit_block_move (m_tramp, assemble_trampoline_template (),
3184 GEN_INT (TRAMPOLINE_SIZE), BLOCK_OP_NORMAL);
3185
3186 rtx fnaddr = XEXP (DECL_RTL (fndecl), 0);
3187 rtx chain_value_reg = copy_to_reg (chain_value);
3188 rtx fnaddr_reg = copy_to_reg (fnaddr);
3189
3190 for (int i = 0; i < 4; i++)
3191 {
3192 rtx mem = adjust_address (m_tramp, SImode, i * 8 + 4);
3193 rtx reg = i < 2 ? chain_value_reg : fnaddr_reg;
3194 emit_move_insn (mem, gen_rtx_SUBREG (SImode, reg, (i % 2) * 4));
3195 }
3196
3197 rtx tramp_addr = XEXP (m_tramp, 0);
3198 emit_insn (gen_clear_icache (tramp_addr,
3199 plus_constant (ptr_mode, tramp_addr,
3200 TRAMPOLINE_SIZE)));
3201}
3202
3203/* }}} */
3204/* {{{ Miscellaneous. */
3205
3206/* Implement TARGET_CANNOT_COPY_INSN_P.
3207
3208 Return true if INSN must not be duplicated. */
3209
3210static bool
3211gcn_cannot_copy_insn_p (rtx_insn *insn)
3212{
3213 if (recog_memoized (insn) == CODE_FOR_gcn_wavefront_barrier)
3214 return true;
3215
3216 return false;
3217}
3218
3219/* Implement TARGET_DEBUG_UNWIND_INFO.
3220
3221 Defines the mechanism that will be used for describing frame unwind
3222 information to the debugger. */
3223
3224static enum unwind_info_type
3225gcn_debug_unwind_info ()
3226{
3227 /* No support for debug info, yet. */
3228 return UI_NONE;
3229}
3230
3231/* Determine if there is a suitable hardware conversion instruction.
3232 Used primarily by the machine description. */
3233
3234bool
3235gcn_valid_cvt_p (machine_mode from, machine_mode to, enum gcn_cvt_t op)
3236{
3237 if (VECTOR_MODE_P (from) != VECTOR_MODE_P (to))
3238 return false;
3239
3240 if (VECTOR_MODE_P (from))
3241 {
3242 from = GET_MODE_INNER (from);
3243 to = GET_MODE_INNER (to);
3244 }
3245
3246 switch (op)
3247 {
3248 case fix_trunc_cvt:
3249 case fixuns_trunc_cvt:
3250 if (GET_MODE_CLASS (from) != MODE_FLOAT
3251 || GET_MODE_CLASS (to) != MODE_INT)
3252 return false;
3253 break;
3254 case float_cvt:
3255 case floatuns_cvt:
3256 if (GET_MODE_CLASS (from) != MODE_INT
3257 || GET_MODE_CLASS (to) != MODE_FLOAT)
3258 return false;
3259 break;
3260 case extend_cvt:
3261 if (GET_MODE_CLASS (from) != MODE_FLOAT
3262 || GET_MODE_CLASS (to) != MODE_FLOAT
3263 || GET_MODE_SIZE (from) >= GET_MODE_SIZE (to))
3264 return false;
3265 break;
3266 case trunc_cvt:
3267 if (GET_MODE_CLASS (from) != MODE_FLOAT
3268 || GET_MODE_CLASS (to) != MODE_FLOAT
3269 || GET_MODE_SIZE (from) <= GET_MODE_SIZE (to))
3270 return false;
3271 break;
3272 }
3273
3274 return ((to == HImode && from == HFmode)
3275 || (to == SImode && (from == SFmode || from == DFmode))
3276 || (to == HFmode && (from == HImode || from == SFmode))
3277 || (to == SFmode && (from == SImode || from == HFmode
3278 || from == DFmode))
3279 || (to == DFmode && (from == SImode || from == SFmode)));
3280}
3281
76d46331
KCY
3282/* Implement TARGET_EMUTLS_VAR_INIT.
3283
3284 Disable emutls (gthr-gcn.h does not support it, yet). */
3285
3286tree
3287gcn_emutls_var_init (tree, tree decl, tree)
3288{
3289 sorry_at (DECL_SOURCE_LOCATION (decl), "TLS is not implemented for GCN.");
6349e4f0 3290 return NULL_TREE;
76d46331
KCY
3291}
3292
5326695a
AS
3293/* }}} */
3294/* {{{ Costs. */
3295
3296/* Implement TARGET_RTX_COSTS.
3297
3298 Compute a (partial) cost for rtx X. Return true if the complete
3299 cost has been computed, and false if subexpressions should be
3300 scanned. In either case, *TOTAL contains the cost result. */
3301
3302static bool
3303gcn_rtx_costs (rtx x, machine_mode, int, int, int *total, bool)
3304{
3305 enum rtx_code code = GET_CODE (x);
3306 switch (code)
3307 {
3308 case CONST:
3309 case CONST_DOUBLE:
3310 case CONST_VECTOR:
3311 case CONST_INT:
3312 if (gcn_inline_constant_p (x))
3313 *total = 0;
3314 else if (code == CONST_INT
3315 && ((unsigned HOST_WIDE_INT) INTVAL (x) + 0x8000) < 0x10000)
3316 *total = 1;
3317 else if (gcn_constant_p (x))
3318 *total = 2;
3319 else
3320 *total = vgpr_vector_mode_p (GET_MODE (x)) ? 64 : 4;
3321 return true;
3322
3323 case DIV:
3324 *total = 100;
3325 return false;
3326
3327 default:
3328 *total = 3;
3329 return false;
3330 }
3331}
3332
3333/* Implement TARGET_MEMORY_MOVE_COST.
3334
3335 Return the cost of moving data of mode M between a
3336 register and memory. A value of 2 is the default; this cost is
3337 relative to those in `REGISTER_MOVE_COST'.
3338
3339 This function is used extensively by register_move_cost that is used to
3340 build tables at startup. Make it inline in this case.
3341 When IN is 2, return maximum of in and out move cost.
3342
3343 If moving between registers and memory is more expensive than
3344 between two registers, you should define this macro to express the
3345 relative cost.
3346
3347 Model also increased moving costs of QImode registers in non
3348 Q_REGS classes. */
3349
3350#define LOAD_COST 32
3351#define STORE_COST 32
3352static int
3353gcn_memory_move_cost (machine_mode mode, reg_class_t regclass, bool in)
3354{
3355 int nregs = CEIL (GET_MODE_SIZE (mode), 4);
3356 switch (regclass)
3357 {
3358 case SCC_CONDITIONAL_REG:
3359 case VCCZ_CONDITIONAL_REG:
3360 case VCC_CONDITIONAL_REG:
3361 case EXECZ_CONDITIONAL_REG:
3362 case ALL_CONDITIONAL_REGS:
3363 case SGPR_REGS:
3364 case SGPR_EXEC_REGS:
3365 case EXEC_MASK_REG:
3366 case SGPR_VOP_SRC_REGS:
3367 case SGPR_MEM_SRC_REGS:
3368 case SGPR_SRC_REGS:
3369 case SGPR_DST_REGS:
3370 case GENERAL_REGS:
3371 case AFP_REGS:
3372 if (!in)
3373 return (STORE_COST + 2) * nregs;
3374 return LOAD_COST * nregs;
3375 case VGPR_REGS:
3376 if (in)
3377 return (LOAD_COST + 2) * nregs;
3378 return STORE_COST * nregs;
3379 case ALL_REGS:
3380 case ALL_GPR_REGS:
3381 case SRCDST_REGS:
3382 if (in)
3383 return (LOAD_COST + 2) * nregs;
3384 return (STORE_COST + 2) * nregs;
3385 default:
3386 gcc_unreachable ();
3387 }
3388}
3389
3390/* Implement TARGET_REGISTER_MOVE_COST.
3391
3392 Return the cost of moving data from a register in class CLASS1 to
3393 one in class CLASS2. Base value is 2. */
3394
3395static int
3396gcn_register_move_cost (machine_mode, reg_class_t dst, reg_class_t src)
3397{
3398 /* Increase cost of moving from and to vector registers. While this is
3399 fast in hardware (I think), it has hidden cost of setting up the exec
3400 flags. */
3401 if ((src < VGPR_REGS) != (dst < VGPR_REGS))
3402 return 4;
3403 return 2;
3404}
3405
3406/* }}} */
3407/* {{{ Builtins. */
3408
3409/* Type codes used by GCN built-in definitions. */
3410
3411enum gcn_builtin_type_index
3412{
3413 GCN_BTI_END_OF_PARAMS,
3414
3415 GCN_BTI_VOID,
3416 GCN_BTI_BOOL,
3417 GCN_BTI_INT,
3418 GCN_BTI_UINT,
3419 GCN_BTI_SIZE_T,
3420 GCN_BTI_LLINT,
3421 GCN_BTI_LLUINT,
3422 GCN_BTI_EXEC,
3423
3424 GCN_BTI_SF,
3425 GCN_BTI_V64SI,
3426 GCN_BTI_V64SF,
3427 GCN_BTI_V64PTR,
3428 GCN_BTI_SIPTR,
3429 GCN_BTI_SFPTR,
3430 GCN_BTI_VOIDPTR,
3431
3432 GCN_BTI_LDS_VOIDPTR,
3433
3434 GCN_BTI_MAX
3435};
3436
3437static GTY(()) tree gcn_builtin_types[GCN_BTI_MAX];
3438
3439#define exec_type_node (gcn_builtin_types[GCN_BTI_EXEC])
3440#define sf_type_node (gcn_builtin_types[GCN_BTI_SF])
3441#define v64si_type_node (gcn_builtin_types[GCN_BTI_V64SI])
3442#define v64sf_type_node (gcn_builtin_types[GCN_BTI_V64SF])
3443#define v64ptr_type_node (gcn_builtin_types[GCN_BTI_V64PTR])
3444#define siptr_type_node (gcn_builtin_types[GCN_BTI_SIPTR])
3445#define sfptr_type_node (gcn_builtin_types[GCN_BTI_SFPTR])
3446#define voidptr_type_node (gcn_builtin_types[GCN_BTI_VOIDPTR])
3447#define size_t_type_node (gcn_builtin_types[GCN_BTI_SIZE_T])
3448
3449static rtx gcn_expand_builtin_1 (tree, rtx, rtx, machine_mode, int,
3450 struct gcn_builtin_description *);
3451static rtx gcn_expand_builtin_binop (tree, rtx, rtx, machine_mode, int,
3452 struct gcn_builtin_description *);
3453
3454struct gcn_builtin_description;
3455typedef rtx (*gcn_builtin_expander) (tree, rtx, rtx, machine_mode, int,
3456 struct gcn_builtin_description *);
3457
3458enum gcn_builtin_type
3459{
3460 B_UNIMPLEMENTED, /* Sorry out */
3461 B_INSN, /* Emit a pattern */
3462 B_OVERLOAD /* Placeholder for an overloaded function */
3463};
3464
3465struct gcn_builtin_description
3466{
3467 int fcode;
3468 int icode;
3469 const char *name;
3470 enum gcn_builtin_type type;
3471 /* The first element of parm is always the return type. The rest
3472 are a zero terminated list of parameters. */
3473 int parm[6];
3474 gcn_builtin_expander expander;
3475};
3476
3477/* Read in the GCN builtins from gcn-builtins.def. */
3478
3479extern GTY(()) struct gcn_builtin_description gcn_builtins[GCN_BUILTIN_MAX];
3480
3481struct gcn_builtin_description gcn_builtins[] = {
3482#define DEF_BUILTIN(fcode, icode, name, type, params, expander) \
3483 {GCN_BUILTIN_ ## fcode, icode, name, type, params, expander},
3484
3485#define DEF_BUILTIN_BINOP_INT_FP(fcode, ic, name) \
3486 {GCN_BUILTIN_ ## fcode ## _V64SI, \
3487 CODE_FOR_ ## ic ##v64si3_exec, name "_v64int", B_INSN, \
3488 {GCN_BTI_V64SI, GCN_BTI_EXEC, GCN_BTI_V64SI, GCN_BTI_V64SI, \
3489 GCN_BTI_V64SI, GCN_BTI_END_OF_PARAMS}, gcn_expand_builtin_binop}, \
3490 {GCN_BUILTIN_ ## fcode ## _V64SI_unspec, \
3491 CODE_FOR_ ## ic ##v64si3_exec, name "_v64int_unspec", B_INSN, \
3492 {GCN_BTI_V64SI, GCN_BTI_EXEC, GCN_BTI_V64SI, GCN_BTI_V64SI, \
3493 GCN_BTI_END_OF_PARAMS}, gcn_expand_builtin_binop},
3494
3495#include "gcn-builtins.def"
3496#undef DEF_BUILTIN_BINOP_INT_FP
3497#undef DEF_BUILTIN
3498};
3499
3500static GTY(()) tree gcn_builtin_decls[GCN_BUILTIN_MAX];
3501
3502/* Implement TARGET_BUILTIN_DECL.
3503
3504 Return the GCN builtin for CODE. */
3505
3506tree
3507gcn_builtin_decl (unsigned code, bool ARG_UNUSED (initialize_p))
3508{
3509 if (code >= GCN_BUILTIN_MAX)
3510 return error_mark_node;
3511
3512 return gcn_builtin_decls[code];
3513}
3514
3515/* Helper function for gcn_init_builtins. */
3516
3517static void
3518gcn_init_builtin_types (void)
3519{
3520 gcn_builtin_types[GCN_BTI_VOID] = void_type_node;
3521 gcn_builtin_types[GCN_BTI_BOOL] = boolean_type_node;
3522 gcn_builtin_types[GCN_BTI_INT] = intSI_type_node;
3523 gcn_builtin_types[GCN_BTI_UINT] = unsigned_type_for (intSI_type_node);
3524 gcn_builtin_types[GCN_BTI_SIZE_T] = size_type_node;
3525 gcn_builtin_types[GCN_BTI_LLINT] = intDI_type_node;
3526 gcn_builtin_types[GCN_BTI_LLUINT] = unsigned_type_for (intDI_type_node);
3527
3528 exec_type_node = unsigned_intDI_type_node;
3529 sf_type_node = float32_type_node;
3530 v64si_type_node = build_vector_type (intSI_type_node, 64);
3531 v64sf_type_node = build_vector_type (float_type_node, 64);
3532 v64ptr_type_node = build_vector_type (unsigned_intDI_type_node
3533 /*build_pointer_type
3534 (integer_type_node) */
3535 , 64);
3536 tree tmp = build_distinct_type_copy (intSI_type_node);
3537 TYPE_ADDR_SPACE (tmp) = ADDR_SPACE_FLAT;
3538 siptr_type_node = build_pointer_type (tmp);
3539
3540 tmp = build_distinct_type_copy (float_type_node);
3541 TYPE_ADDR_SPACE (tmp) = ADDR_SPACE_FLAT;
3542 sfptr_type_node = build_pointer_type (tmp);
3543
3544 tmp = build_distinct_type_copy (void_type_node);
3545 TYPE_ADDR_SPACE (tmp) = ADDR_SPACE_FLAT;
3546 voidptr_type_node = build_pointer_type (tmp);
3547
3548 tmp = build_distinct_type_copy (void_type_node);
3549 TYPE_ADDR_SPACE (tmp) = ADDR_SPACE_LDS;
3550 gcn_builtin_types[GCN_BTI_LDS_VOIDPTR] = build_pointer_type (tmp);
3551}
3552
3553/* Implement TARGET_INIT_BUILTINS.
3554
3555 Set up all builtin functions for this target. */
3556
3557static void
3558gcn_init_builtins (void)
3559{
3560 gcn_init_builtin_types ();
3561
3562 struct gcn_builtin_description *d;
3563 unsigned int i;
3564 for (i = 0, d = gcn_builtins; i < GCN_BUILTIN_MAX; i++, d++)
3565 {
3566 tree p;
3567 char name[64]; /* build_function will make a copy. */
3568 int parm;
3569
3570 /* FIXME: Is this necessary/useful? */
3571 if (d->name == 0)
3572 continue;
3573
3574 /* Find last parm. */
3575 for (parm = 1; d->parm[parm] != GCN_BTI_END_OF_PARAMS; parm++)
3576 ;
3577
3578 p = void_list_node;
3579 while (parm > 1)
3580 p = tree_cons (NULL_TREE, gcn_builtin_types[d->parm[--parm]], p);
3581
3582 p = build_function_type (gcn_builtin_types[d->parm[0]], p);
3583
3584 sprintf (name, "__builtin_gcn_%s", d->name);
3585 gcn_builtin_decls[i]
3586 = add_builtin_function (name, p, i, BUILT_IN_MD, NULL, NULL_TREE);
3587
3588 /* These builtins don't throw. */
3589 TREE_NOTHROW (gcn_builtin_decls[i]) = 1;
3590 }
3591
3592/* FIXME: remove the ifdef once OpenACC support is merged upstream. */
3593#ifdef BUILT_IN_GOACC_SINGLE_START
3594 /* These builtins need to take/return an LDS pointer: override the generic
3595 versions here. */
3596
3597 set_builtin_decl (BUILT_IN_GOACC_SINGLE_START,
3598 gcn_builtin_decls[GCN_BUILTIN_ACC_SINGLE_START], false);
3599
3600 set_builtin_decl (BUILT_IN_GOACC_SINGLE_COPY_START,
3601 gcn_builtin_decls[GCN_BUILTIN_ACC_SINGLE_COPY_START],
3602 false);
3603
3604 set_builtin_decl (BUILT_IN_GOACC_SINGLE_COPY_END,
3605 gcn_builtin_decls[GCN_BUILTIN_ACC_SINGLE_COPY_END],
3606 false);
3607
3608 set_builtin_decl (BUILT_IN_GOACC_BARRIER,
3609 gcn_builtin_decls[GCN_BUILTIN_ACC_BARRIER], false);
3610#endif
3611}
3612
3613/* Expand the CMP_SWAP GCN builtins. We have our own versions that do
3614 not require taking the address of any object, other than the memory
3615 cell being operated on.
3616
3617 Helper function for gcn_expand_builtin_1. */
3618
3619static rtx
3620gcn_expand_cmp_swap (tree exp, rtx target)
3621{
3622 machine_mode mode = TYPE_MODE (TREE_TYPE (exp));
3623 addr_space_t as
3624 = TYPE_ADDR_SPACE (TREE_TYPE (TREE_TYPE (CALL_EXPR_ARG (exp, 0))));
3625 machine_mode as_mode = gcn_addr_space_address_mode (as);
3626
3627 if (!target)
3628 target = gen_reg_rtx (mode);
3629
3630 rtx addr = expand_expr (CALL_EXPR_ARG (exp, 0),
3631 NULL_RTX, as_mode, EXPAND_NORMAL);
3632 rtx cmp = expand_expr (CALL_EXPR_ARG (exp, 1),
3633 NULL_RTX, mode, EXPAND_NORMAL);
3634 rtx src = expand_expr (CALL_EXPR_ARG (exp, 2),
3635 NULL_RTX, mode, EXPAND_NORMAL);
3636 rtx pat;
3637
3638 rtx mem = gen_rtx_MEM (mode, force_reg (as_mode, addr));
3639 set_mem_addr_space (mem, as);
3640
3641 if (!REG_P (cmp))
3642 cmp = copy_to_mode_reg (mode, cmp);
3643 if (!REG_P (src))
3644 src = copy_to_mode_reg (mode, src);
3645
3646 if (mode == SImode)
3647 pat = gen_sync_compare_and_swapsi (target, mem, cmp, src);
3648 else
3649 pat = gen_sync_compare_and_swapdi (target, mem, cmp, src);
3650
3651 emit_insn (pat);
3652
3653 return target;
3654}
3655
3656/* Expand many different builtins.
3657
3658 Intended for use in gcn-builtins.def. */
3659
3660static rtx
3661gcn_expand_builtin_1 (tree exp, rtx target, rtx /*subtarget */ ,
3662 machine_mode /*mode */ , int ignore,
3663 struct gcn_builtin_description *)
3664{
3665 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
4d732405 3666 switch (DECL_MD_FUNCTION_CODE (fndecl))
5326695a
AS
3667 {
3668 case GCN_BUILTIN_FLAT_LOAD_INT32:
3669 {
3670 if (ignore)
3671 return target;
3672 /*rtx exec = */
3673 force_reg (DImode,
3674 expand_expr (CALL_EXPR_ARG (exp, 0), NULL_RTX, DImode,
3675 EXPAND_NORMAL));
3676 /*rtx ptr = */
3677 force_reg (V64DImode,
3678 expand_expr (CALL_EXPR_ARG (exp, 1), NULL_RTX, V64DImode,
3679 EXPAND_NORMAL));
3680 /*emit_insn (gen_vector_flat_loadv64si
3681 (target, gcn_gen_undef (V64SImode), ptr, exec)); */
3682 return target;
3683 }
3684 case GCN_BUILTIN_FLAT_LOAD_PTR_INT32:
3685 case GCN_BUILTIN_FLAT_LOAD_PTR_FLOAT:
3686 {
3687 if (ignore)
3688 return target;
3689 rtx exec = force_reg (DImode,
3690 expand_expr (CALL_EXPR_ARG (exp, 0), NULL_RTX,
3691 DImode,
3692 EXPAND_NORMAL));
3693 rtx ptr = force_reg (DImode,
3694 expand_expr (CALL_EXPR_ARG (exp, 1), NULL_RTX,
3695 V64DImode,
3696 EXPAND_NORMAL));
3697 rtx offsets = force_reg (V64SImode,
3698 expand_expr (CALL_EXPR_ARG (exp, 2),
3699 NULL_RTX, V64DImode,
3700 EXPAND_NORMAL));
3701 rtx addrs = gen_reg_rtx (V64DImode);
3702 rtx tmp = gen_reg_rtx (V64SImode);
3703 emit_insn (gen_ashlv64si3_exec (tmp, offsets,
3704 GEN_INT (2),
3705 gcn_gen_undef (V64SImode), exec));
3706 emit_insn (gen_addv64di3_zext_dup2_exec (addrs, tmp, ptr,
3707 gcn_gen_undef (V64DImode),
3708 exec));
3709 rtx mem = gen_rtx_MEM (GET_MODE (target), addrs);
3710 /*set_mem_addr_space (mem, ADDR_SPACE_FLAT); */
3711 /* FIXME: set attributes. */
3712 emit_insn (gen_mov_with_exec (target, mem, exec));
3713 return target;
3714 }
3715 case GCN_BUILTIN_FLAT_STORE_PTR_INT32:
3716 case GCN_BUILTIN_FLAT_STORE_PTR_FLOAT:
3717 {
3718 rtx exec = force_reg (DImode,
3719 expand_expr (CALL_EXPR_ARG (exp, 0), NULL_RTX,
3720 DImode,
3721 EXPAND_NORMAL));
3722 rtx ptr = force_reg (DImode,
3723 expand_expr (CALL_EXPR_ARG (exp, 1), NULL_RTX,
3724 V64DImode,
3725 EXPAND_NORMAL));
3726 rtx offsets = force_reg (V64SImode,
3727 expand_expr (CALL_EXPR_ARG (exp, 2),
3728 NULL_RTX, V64DImode,
3729 EXPAND_NORMAL));
3730 machine_mode vmode = TYPE_MODE (TREE_TYPE (CALL_EXPR_ARG (exp,
3731 3)));
3732 rtx val = force_reg (vmode,
3733 expand_expr (CALL_EXPR_ARG (exp, 3), NULL_RTX,
3734 vmode,
3735 EXPAND_NORMAL));
3736 rtx addrs = gen_reg_rtx (V64DImode);
3737 rtx tmp = gen_reg_rtx (V64SImode);
3738 emit_insn (gen_ashlv64si3_exec (tmp, offsets,
3739 GEN_INT (2),
3740 gcn_gen_undef (V64SImode), exec));
3741 emit_insn (gen_addv64di3_zext_dup2_exec (addrs, tmp, ptr,
3742 gcn_gen_undef (V64DImode),
3743 exec));
3744 rtx mem = gen_rtx_MEM (vmode, addrs);
3745 /*set_mem_addr_space (mem, ADDR_SPACE_FLAT); */
3746 /* FIXME: set attributes. */
3747 emit_insn (gen_mov_with_exec (mem, val, exec));
3748 return target;
3749 }
3750 case GCN_BUILTIN_SQRTVF:
3751 {
3752 if (ignore)
3753 return target;
3754 rtx exec = gcn_full_exec_reg ();
3755 rtx arg = force_reg (V64SFmode,
3756 expand_expr (CALL_EXPR_ARG (exp, 0), NULL_RTX,
3757 V64SFmode,
3758 EXPAND_NORMAL));
3759 emit_insn (gen_sqrtv64sf2_exec
3760 (target, arg, gcn_gen_undef (V64SFmode), exec));
3761 return target;
3762 }
3763 case GCN_BUILTIN_SQRTF:
3764 {
3765 if (ignore)
3766 return target;
3767 rtx arg = force_reg (SFmode,
3768 expand_expr (CALL_EXPR_ARG (exp, 0), NULL_RTX,
3769 SFmode,
3770 EXPAND_NORMAL));
3771 emit_insn (gen_sqrtsf2 (target, arg));
3772 return target;
3773 }
3774 case GCN_BUILTIN_OMP_DIM_SIZE:
3775 {
3776 if (ignore)
3777 return target;
3778 emit_insn (gen_oacc_dim_size (target,
3779 expand_expr (CALL_EXPR_ARG (exp, 0),
3780 NULL_RTX, SImode,
3781 EXPAND_NORMAL)));
3782 return target;
3783 }
3784 case GCN_BUILTIN_OMP_DIM_POS:
3785 {
3786 if (ignore)
3787 return target;
3788 emit_insn (gen_oacc_dim_pos (target,
3789 expand_expr (CALL_EXPR_ARG (exp, 0),
3790 NULL_RTX, SImode,
3791 EXPAND_NORMAL)));
3792 return target;
3793 }
3794 case GCN_BUILTIN_CMP_SWAP:
3795 case GCN_BUILTIN_CMP_SWAPLL:
3796 return gcn_expand_cmp_swap (exp, target);
3797
3798 case GCN_BUILTIN_ACC_SINGLE_START:
3799 {
3800 if (ignore)
3801 return target;
3802
3803 rtx wavefront = gcn_oacc_dim_pos (1);
3804 rtx cond = gen_rtx_EQ (VOIDmode, wavefront, const0_rtx);
3805 rtx cc = (target && REG_P (target)) ? target : gen_reg_rtx (BImode);
3806 emit_insn (gen_cstoresi4 (cc, cond, wavefront, const0_rtx));
3807 return cc;
3808 }
3809
3810 case GCN_BUILTIN_ACC_SINGLE_COPY_START:
3811 {
3812 rtx blk = force_reg (SImode,
3813 expand_expr (CALL_EXPR_ARG (exp, 0), NULL_RTX,
3814 SImode, EXPAND_NORMAL));
3815 rtx wavefront = gcn_oacc_dim_pos (1);
3816 rtx cond = gen_rtx_NE (VOIDmode, wavefront, const0_rtx);
3817 rtx not_zero = gen_label_rtx ();
3818 emit_insn (gen_cbranchsi4 (cond, wavefront, const0_rtx, not_zero));
3819 emit_move_insn (blk, const0_rtx);
3820 emit_label (not_zero);
3821 return blk;
3822 }
3823
3824 case GCN_BUILTIN_ACC_SINGLE_COPY_END:
3825 return target;
3826
3827 case GCN_BUILTIN_ACC_BARRIER:
3828 emit_insn (gen_gcn_wavefront_barrier ());
3829 return target;
3830
3831 default:
3832 gcc_unreachable ();
3833 }
3834}
3835
3836/* Expansion of simple arithmetic and bit binary operation builtins.
3837
3838 Intended for use with gcn_builtins table. */
3839
3840static rtx
3841gcn_expand_builtin_binop (tree exp, rtx target, rtx /*subtarget */ ,
3842 machine_mode /*mode */ , int ignore,
3843 struct gcn_builtin_description *d)
3844{
3845 int icode = d->icode;
3846 if (ignore)
3847 return target;
3848
3849 rtx exec = force_reg (DImode,
3850 expand_expr (CALL_EXPR_ARG (exp, 0), NULL_RTX, DImode,
3851 EXPAND_NORMAL));
3852
3853 machine_mode m1 = insn_data[icode].operand[1].mode;
3854 rtx arg1 = expand_expr (CALL_EXPR_ARG (exp, 1), NULL_RTX, m1,
3855 EXPAND_NORMAL);
3856 if (!insn_data[icode].operand[1].predicate (arg1, m1))
3857 arg1 = force_reg (m1, arg1);
3858
3859 machine_mode m2 = insn_data[icode].operand[2].mode;
3860 rtx arg2 = expand_expr (CALL_EXPR_ARG (exp, 2), NULL_RTX, m2,
3861 EXPAND_NORMAL);
3862 if (!insn_data[icode].operand[2].predicate (arg2, m2))
3863 arg2 = force_reg (m2, arg2);
3864
3865 rtx arg_prev;
3866 if (call_expr_nargs (exp) == 4)
3867 {
3868 machine_mode m_prev = insn_data[icode].operand[4].mode;
3869 arg_prev = force_reg (m_prev,
3870 expand_expr (CALL_EXPR_ARG (exp, 3), NULL_RTX,
3871 m_prev, EXPAND_NORMAL));
3872 }
3873 else
3874 arg_prev = gcn_gen_undef (GET_MODE (target));
3875
3876 rtx pat = GEN_FCN (icode) (target, arg1, arg2, exec, arg_prev);
3877 emit_insn (pat);
3878 return target;
3879}
3880
3881/* Implement TARGET_EXPAND_BUILTIN.
3882
3883 Expand an expression EXP that calls a built-in function, with result going
3884 to TARGET if that's convenient (and in mode MODE if that's convenient).
3885 SUBTARGET may be used as the target for computing one of EXP's operands.
3886 IGNORE is nonzero if the value is to be ignored. */
3887
3888rtx
3889gcn_expand_builtin (tree exp, rtx target, rtx subtarget, machine_mode mode,
3890 int ignore)
3891{
3892 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
4d732405 3893 unsigned int fcode = DECL_MD_FUNCTION_CODE (fndecl);
5326695a
AS
3894 struct gcn_builtin_description *d;
3895
3896 gcc_assert (fcode < GCN_BUILTIN_MAX);
3897 d = &gcn_builtins[fcode];
3898
3899 if (d->type == B_UNIMPLEMENTED)
3900 sorry ("Builtin not implemented");
3901
3902 return d->expander (exp, target, subtarget, mode, ignore, d);
3903}
3904
3905/* }}} */
3906/* {{{ Vectorization. */
3907
3908/* Implement TARGET_VECTORIZE_GET_MASK_MODE.
3909
3910 A vector mask is a value that holds one boolean result for every element in
3911 a vector. */
3912
3913opt_machine_mode
10116ec1 3914gcn_vectorize_get_mask_mode (machine_mode)
5326695a
AS
3915{
3916 /* GCN uses a DImode bit-mask. */
3917 return DImode;
3918}
3919
3920/* Return an RTX that references a vector with the i-th lane containing
3921 PERM[i]*4.
3922
3923 Helper function for gcn_vectorize_vec_perm_const. */
3924
3925static rtx
3926gcn_make_vec_perm_address (unsigned int *perm)
3927{
3928 rtx x = gen_reg_rtx (V64SImode);
3929 emit_move_insn (x, gcn_vec_constant (V64SImode, 0));
3930
3931 /* Permutation addresses use byte addressing. With each vector lane being
3932 4 bytes wide, and with 64 lanes in total, only bits 2..7 are significant,
3933 so only set those.
3934
3935 The permutation given to the vec_perm* patterns range from 0 to 2N-1 to
3936 select between lanes in two vectors, but as the DS_BPERMUTE* instructions
3937 only take one source vector, the most-significant bit can be ignored
3938 here. Instead, we can use EXEC masking to select the relevant part of
3939 each source vector after they are permuted separately. */
3940 uint64_t bit_mask = 1 << 2;
3941 for (int i = 2; i < 8; i++, bit_mask <<= 1)
3942 {
3943 uint64_t exec_mask = 0;
3944 uint64_t lane_mask = 1;
3945 for (int j = 0; j < 64; j++, lane_mask <<= 1)
3946 if ((perm[j] * 4) & bit_mask)
3947 exec_mask |= lane_mask;
3948
3949 if (exec_mask)
3950 emit_insn (gen_addv64si3_exec (x, x,
3951 gcn_vec_constant (V64SImode,
3952 bit_mask),
3953 x, get_exec (exec_mask)));
3954 }
3955
3956 return x;
3957}
3958
3959/* Implement TARGET_VECTORIZE_VEC_PERM_CONST.
3960
3961 Return true if permutation with SEL is possible.
3962
3963 If DST/SRC0/SRC1 are non-null, emit the instructions to perform the
3964 permutations. */
3965
3966static bool
3967gcn_vectorize_vec_perm_const (machine_mode vmode, rtx dst,
3968 rtx src0, rtx src1,
3969 const vec_perm_indices & sel)
3970{
3971 unsigned int nelt = GET_MODE_NUNITS (vmode);
3972
3973 gcc_assert (VECTOR_MODE_P (vmode));
3974 gcc_assert (nelt <= 64);
3975 gcc_assert (sel.length () == nelt);
3976
3977 if (!dst)
3978 {
3979 /* All vector permutations are possible on this architecture,
3980 with varying degrees of efficiency depending on the permutation. */
3981 return true;
3982 }
3983
3984 unsigned int perm[64];
3985 for (unsigned int i = 0; i < nelt; ++i)
3986 perm[i] = sel[i] & (2 * nelt - 1);
55308fc2
AS
3987 for (unsigned int i = nelt; i < 64; ++i)
3988 perm[i] = 0;
5326695a 3989
b1d1e2b5
JJ
3990 src0 = force_reg (vmode, src0);
3991 src1 = force_reg (vmode, src1);
3992
5326695a
AS
3993 /* Make life a bit easier by swapping operands if necessary so that
3994 the first element always comes from src0. */
3995 if (perm[0] >= nelt)
3996 {
b1d1e2b5 3997 std::swap (src0, src1);
5326695a
AS
3998
3999 for (unsigned int i = 0; i < nelt; ++i)
4000 if (perm[i] < nelt)
4001 perm[i] += nelt;
4002 else
4003 perm[i] -= nelt;
4004 }
4005
4006 /* TODO: There are more efficient ways to implement certain permutations
4007 using ds_swizzle_b32 and/or DPP. Test for and expand them here, before
4008 this more inefficient generic approach is used. */
4009
4010 int64_t src1_lanes = 0;
4011 int64_t lane_bit = 1;
4012
4013 for (unsigned int i = 0; i < nelt; ++i, lane_bit <<= 1)
4014 {
4015 /* Set the bits for lanes from src1. */
4016 if (perm[i] >= nelt)
4017 src1_lanes |= lane_bit;
4018 }
4019
4020 rtx addr = gcn_make_vec_perm_address (perm);
4021 rtx (*ds_bpermute) (rtx, rtx, rtx, rtx);
4022
4023 switch (vmode)
4024 {
4025 case E_V64QImode:
4026 ds_bpermute = gen_ds_bpermutev64qi;
4027 break;
4028 case E_V64HImode:
4029 ds_bpermute = gen_ds_bpermutev64hi;
4030 break;
4031 case E_V64SImode:
4032 ds_bpermute = gen_ds_bpermutev64si;
4033 break;
4034 case E_V64HFmode:
4035 ds_bpermute = gen_ds_bpermutev64hf;
4036 break;
4037 case E_V64SFmode:
4038 ds_bpermute = gen_ds_bpermutev64sf;
4039 break;
4040 case E_V64DImode:
4041 ds_bpermute = gen_ds_bpermutev64di;
4042 break;
4043 case E_V64DFmode:
4044 ds_bpermute = gen_ds_bpermutev64df;
4045 break;
4046 default:
4047 gcc_assert (false);
4048 }
4049
4050 /* Load elements from src0 to dst. */
4051 gcc_assert (~src1_lanes);
4052 emit_insn (ds_bpermute (dst, addr, src0, gcn_full_exec_reg ()));
4053
4054 /* Load elements from src1 to dst. */
4055 if (src1_lanes)
4056 {
4057 /* Masking a lane masks both the destination and source lanes for
4058 DS_BPERMUTE, so we need to have all lanes enabled for the permute,
4059 then add an extra masked move to merge the results of permuting
4060 the two source vectors together.
4061 */
4062 rtx tmp = gen_reg_rtx (vmode);
4063 emit_insn (ds_bpermute (tmp, addr, src1, gcn_full_exec_reg ()));
4064 emit_insn (gen_mov_with_exec (dst, tmp, get_exec (src1_lanes)));
4065 }
4066
4067 return true;
4068}
4069
4070/* Implements TARGET_VECTOR_MODE_SUPPORTED_P.
4071
4072 Return nonzero if vector MODE is supported with at least move
4073 instructions. */
4074
4075static bool
4076gcn_vector_mode_supported_p (machine_mode mode)
4077{
2b99bed8
AS
4078 return (mode == V64QImode || mode == V64HImode
4079 || mode == V64SImode || mode == V64DImode
5326695a
AS
4080 || mode == V64SFmode || mode == V64DFmode);
4081}
4082
4083/* Implement TARGET_VECTORIZE_PREFERRED_SIMD_MODE.
4084
4085 Enables autovectorization for all supported modes. */
4086
4087static machine_mode
4088gcn_vectorize_preferred_simd_mode (scalar_mode mode)
4089{
4090 switch (mode)
4091 {
4092 case E_QImode:
4093 return V64QImode;
4094 case E_HImode:
4095 return V64HImode;
4096 case E_SImode:
4097 return V64SImode;
4098 case E_DImode:
4099 return V64DImode;
4100 case E_SFmode:
4101 return V64SFmode;
4102 case E_DFmode:
4103 return V64DFmode;
4104 default:
4105 return word_mode;
4106 }
4107}
4108
2b99bed8
AS
4109/* Implement TARGET_VECTORIZE_RELATED_MODE.
4110
4111 All GCN vectors are 64-lane, so this is simpler than other architectures.
4112 In particular, we do *not* want to match vector bit-size. */
4113
4114static opt_machine_mode
dd455df7
TB
4115gcn_related_vector_mode (machine_mode ARG_UNUSED (vector_mode),
4116 scalar_mode element_mode, poly_uint64 nunits)
2b99bed8
AS
4117{
4118 if (known_ne (nunits, 0U) && known_ne (nunits, 64U))
4119 return VOIDmode;
4120
4121 machine_mode pref_mode = gcn_vectorize_preferred_simd_mode (element_mode);
4122 if (!VECTOR_MODE_P (pref_mode))
4123 return VOIDmode;
4124
4125 return pref_mode;
4126}
4127
5326695a
AS
4128/* Implement TARGET_VECTORIZE_PREFERRED_VECTOR_ALIGNMENT.
4129
4130 Returns the preferred alignment in bits for accesses to vectors of type type
4131 in vectorized code. This might be less than or greater than the ABI-defined
4132 value returned by TARGET_VECTOR_ALIGNMENT. It can be equal to the alignment
4133 of a single element, in which case the vectorizer will not try to optimize
4134 for alignment. */
4135
4136static poly_uint64
4137gcn_preferred_vector_alignment (const_tree type)
4138{
4139 return TYPE_ALIGN (TREE_TYPE (type));
4140}
4141
4142/* Implement TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT.
4143
4144 Return true if the target supports misaligned vector store/load of a
4145 specific factor denoted in the misalignment parameter. */
4146
4147static bool
4148gcn_vectorize_support_vector_misalignment (machine_mode ARG_UNUSED (mode),
4149 const_tree type, int misalignment,
4150 bool is_packed)
4151{
4152 if (is_packed)
4153 return false;
4154
4155 /* If the misalignment is unknown, we should be able to handle the access
4156 so long as it is not to a member of a packed data structure. */
4157 if (misalignment == -1)
4158 return true;
4159
4160 /* Return true if the misalignment is a multiple of the natural alignment
4161 of the vector's element type. This is probably always going to be
4162 true in practice, since we've already established that this isn't a
4163 packed access. */
4164 return misalignment % TYPE_ALIGN_UNIT (type) == 0;
4165}
4166
4167/* Implement TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE.
4168
4169 Return true if vector alignment is reachable (by peeling N iterations) for
4170 the given scalar type TYPE. */
4171
4172static bool
4173gcn_vector_alignment_reachable (const_tree ARG_UNUSED (type), bool is_packed)
4174{
4175 /* Vectors which aren't in packed structures will not be less aligned than
4176 the natural alignment of their element type, so this is safe. */
4177 return !is_packed;
4178}
4179
4180/* Generate DPP instructions used for vector reductions.
4181
4182 The opcode is given by INSN.
4183 The first operand of the operation is shifted right by SHIFT vector lanes.
4184 SHIFT must be a power of 2. If SHIFT is 16, the 15th lane of each row is
4185 broadcast the next row (thereby acting like a shift of 16 for the end of
4186 each row). If SHIFT is 32, lane 31 is broadcast to all the
4187 following lanes (thereby acting like a shift of 32 for lane 63). */
4188
4189char *
4190gcn_expand_dpp_shr_insn (machine_mode mode, const char *insn,
4191 int unspec, int shift)
4192{
a5879399 4193 static char buf[128];
5326695a
AS
4194 const char *dpp;
4195 const char *vcc_in = "";
4196 const char *vcc_out = "";
4197
4198 /* Add the vcc operand if needed. */
4199 if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
4200 {
4201 if (unspec == UNSPEC_PLUS_CARRY_IN_DPP_SHR)
4202 vcc_in = ", vcc";
4203
4204 if (unspec == UNSPEC_PLUS_CARRY_DPP_SHR
4205 || unspec == UNSPEC_PLUS_CARRY_IN_DPP_SHR)
4206 vcc_out = ", vcc";
4207 }
4208
4209 /* Add the DPP modifiers. */
4210 switch (shift)
4211 {
4212 case 1:
4213 dpp = "row_shr:1 bound_ctrl:0";
4214 break;
4215 case 2:
4216 dpp = "row_shr:2 bound_ctrl:0";
4217 break;
4218 case 4:
4219 dpp = "row_shr:4 bank_mask:0xe";
4220 break;
4221 case 8:
4222 dpp = "row_shr:8 bank_mask:0xc";
4223 break;
4224 case 16:
4225 dpp = "row_bcast:15 row_mask:0xa";
4226 break;
4227 case 32:
4228 dpp = "row_bcast:31 row_mask:0xc";
4229 break;
4230 default:
4231 gcc_unreachable ();
4232 }
4233
a5879399
AS
4234 if (unspec == UNSPEC_MOV_DPP_SHR && vgpr_2reg_mode_p (mode))
4235 sprintf (buf, "%s\t%%L0, %%L1 %s\n\t%s\t%%H0, %%H1 %s",
4236 insn, dpp, insn, dpp);
4237 else if (unspec == UNSPEC_MOV_DPP_SHR)
4238 sprintf (buf, "%s\t%%0, %%1 %s", insn, dpp);
4239 else
4240 sprintf (buf, "%s\t%%0%s, %%1, %%2%s %s", insn, vcc_out, vcc_in, dpp);
5326695a
AS
4241
4242 return buf;
4243}
4244
4245/* Generate vector reductions in terms of DPP instructions.
4246
4247 The vector register SRC of mode MODE is reduced using the operation given
4248 by UNSPEC, and the scalar result is returned in lane 63 of a vector
4249 register. */
4250
4251rtx
4252gcn_expand_reduc_scalar (machine_mode mode, rtx src, int unspec)
4253{
a5879399
AS
4254 machine_mode orig_mode = mode;
4255 bool use_moves = (((unspec == UNSPEC_SMIN_DPP_SHR
4256 || unspec == UNSPEC_SMAX_DPP_SHR
4257 || unspec == UNSPEC_UMIN_DPP_SHR
4258 || unspec == UNSPEC_UMAX_DPP_SHR)
d9f50366
AS
4259 && (mode == V64DImode
4260 || mode == V64DFmode))
a5879399
AS
4261 || (unspec == UNSPEC_PLUS_DPP_SHR
4262 && mode == V64DFmode));
4263 rtx_code code = (unspec == UNSPEC_SMIN_DPP_SHR ? SMIN
4264 : unspec == UNSPEC_SMAX_DPP_SHR ? SMAX
4265 : unspec == UNSPEC_UMIN_DPP_SHR ? UMIN
4266 : unspec == UNSPEC_UMAX_DPP_SHR ? UMAX
4267 : unspec == UNSPEC_PLUS_DPP_SHR ? PLUS
4268 : UNKNOWN);
4269 bool use_extends = ((unspec == UNSPEC_SMIN_DPP_SHR
4270 || unspec == UNSPEC_SMAX_DPP_SHR
4271 || unspec == UNSPEC_UMIN_DPP_SHR
4272 || unspec == UNSPEC_UMAX_DPP_SHR)
4273 && (mode == V64QImode
4274 || mode == V64HImode));
4275 bool unsignedp = (unspec == UNSPEC_UMIN_DPP_SHR
4276 || unspec == UNSPEC_UMAX_DPP_SHR);
5326695a
AS
4277 bool use_plus_carry = unspec == UNSPEC_PLUS_DPP_SHR
4278 && GET_MODE_CLASS (mode) == MODE_VECTOR_INT
4279 && (TARGET_GCN3 || mode == V64DImode);
4280
4281 if (use_plus_carry)
4282 unspec = UNSPEC_PLUS_CARRY_DPP_SHR;
4283
a5879399
AS
4284 if (use_extends)
4285 {
4286 rtx tmp = gen_reg_rtx (V64SImode);
4287 convert_move (tmp, src, unsignedp);
4288 src = tmp;
4289 mode = V64SImode;
4290 }
4291
5326695a
AS
4292 /* Perform reduction by first performing the reduction operation on every
4293 pair of lanes, then on every pair of results from the previous
4294 iteration (thereby effectively reducing every 4 lanes) and so on until
4295 all lanes are reduced. */
a5879399 4296 rtx in, out = src;
5326695a
AS
4297 for (int i = 0, shift = 1; i < 6; i++, shift <<= 1)
4298 {
4299 rtx shift_val = gen_rtx_CONST_INT (VOIDmode, shift);
a5879399
AS
4300 in = out;
4301 out = gen_reg_rtx (mode);
4302
4303 if (use_moves)
5326695a 4304 {
a5879399
AS
4305 rtx tmp = gen_reg_rtx (mode);
4306 emit_insn (gen_dpp_move (mode, tmp, in, shift_val));
4307 emit_insn (gen_rtx_SET (out, gen_rtx_fmt_ee (code, mode, tmp, in)));
5326695a 4308 }
a5879399
AS
4309 else
4310 {
4311 rtx insn = gen_rtx_SET (out,
4312 gen_rtx_UNSPEC (mode,
4313 gen_rtvec (3, in, in,
4314 shift_val),
4315 unspec));
4316
4317 /* Add clobber for instructions that set the carry flags. */
4318 if (use_plus_carry)
4319 {
4320 rtx clobber = gen_rtx_CLOBBER (VOIDmode,
4321 gen_rtx_REG (DImode, VCC_REG));
4322 insn = gen_rtx_PARALLEL (VOIDmode,
4323 gen_rtvec (2, insn, clobber));
4324 }
5326695a 4325
a5879399
AS
4326 emit_insn (insn);
4327 }
4328 }
5326695a 4329
a5879399
AS
4330 if (use_extends)
4331 {
4332 rtx tmp = gen_reg_rtx (orig_mode);
4333 convert_move (tmp, out, unsignedp);
4334 out = tmp;
5326695a
AS
4335 }
4336
a5879399 4337 return out;
5326695a
AS
4338}
4339
4340/* Implement TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST. */
4341
4342int
4343gcn_vectorization_cost (enum vect_cost_for_stmt ARG_UNUSED (type_of_cost),
4344 tree ARG_UNUSED (vectype), int ARG_UNUSED (misalign))
4345{
4346 /* Always vectorize. */
4347 return 1;
4348}
4349
4350/* }}} */
4351/* {{{ md_reorg pass. */
4352
4353/* Identify VMEM instructions from their "type" attribute. */
4354
4355static bool
4356gcn_vmem_insn_p (attr_type type)
4357{
4358 switch (type)
4359 {
4360 case TYPE_MUBUF:
4361 case TYPE_MTBUF:
4362 case TYPE_FLAT:
4363 return true;
4364 case TYPE_UNKNOWN:
4365 case TYPE_SOP1:
4366 case TYPE_SOP2:
4367 case TYPE_SOPK:
4368 case TYPE_SOPC:
4369 case TYPE_SOPP:
4370 case TYPE_SMEM:
4371 case TYPE_DS:
4372 case TYPE_VOP2:
4373 case TYPE_VOP1:
4374 case TYPE_VOPC:
4375 case TYPE_VOP3A:
4376 case TYPE_VOP3B:
4377 case TYPE_VOP_SDWA:
4378 case TYPE_VOP_DPP:
4379 case TYPE_MULT:
4380 case TYPE_VMULT:
4381 return false;
4382 }
4383 gcc_unreachable ();
4384 return false;
4385}
4386
4387/* If INSN sets the EXEC register to a constant value, return the value,
4388 otherwise return zero. */
4389
4390static int64_t
4391gcn_insn_exec_value (rtx_insn *insn)
4392{
4393 if (!NONDEBUG_INSN_P (insn))
4394 return 0;
4395
4396 rtx pattern = PATTERN (insn);
4397
4398 if (GET_CODE (pattern) == SET)
4399 {
4400 rtx dest = XEXP (pattern, 0);
4401 rtx src = XEXP (pattern, 1);
4402
4403 if (GET_MODE (dest) == DImode
4404 && REG_P (dest) && REGNO (dest) == EXEC_REG
4405 && CONST_INT_P (src))
4406 return INTVAL (src);
4407 }
4408
4409 return 0;
4410}
4411
4412/* Sets the EXEC register before INSN to the value that it had after
4413 LAST_EXEC_DEF. The constant value of the EXEC register is returned if
4414 known, otherwise it returns zero. */
4415
4416static int64_t
4417gcn_restore_exec (rtx_insn *insn, rtx_insn *last_exec_def, int64_t curr_exec,
4418 bool curr_exec_known, bool &last_exec_def_saved)
4419{
4420 rtx exec_reg = gen_rtx_REG (DImode, EXEC_REG);
4421 rtx exec;
4422
4423 int64_t exec_value = gcn_insn_exec_value (last_exec_def);
4424
4425 if (exec_value)
4426 {
4427 /* If the EXEC value is a constant and it happens to be the same as the
4428 current EXEC value, the restore can be skipped. */
4429 if (curr_exec_known && exec_value == curr_exec)
4430 return exec_value;
4431
4432 exec = GEN_INT (exec_value);
4433 }
4434 else
4435 {
4436 /* If the EXEC value is not a constant, save it in a register after the
4437 point of definition. */
4438 rtx exec_save_reg = gen_rtx_REG (DImode, EXEC_SAVE_REG);
4439
4440 if (!last_exec_def_saved)
4441 {
4442 start_sequence ();
4443 emit_move_insn (exec_save_reg, exec_reg);
4444 rtx_insn *seq = get_insns ();
4445 end_sequence ();
4446
4447 emit_insn_after (seq, last_exec_def);
4448 if (dump_file && (dump_flags & TDF_DETAILS))
4449 fprintf (dump_file, "Saving EXEC after insn %d.\n",
4450 INSN_UID (last_exec_def));
4451
4452 last_exec_def_saved = true;
4453 }
4454
4455 exec = exec_save_reg;
4456 }
4457
4458 /* Restore EXEC register before the usage. */
4459 start_sequence ();
4460 emit_move_insn (exec_reg, exec);
4461 rtx_insn *seq = get_insns ();
4462 end_sequence ();
4463 emit_insn_before (seq, insn);
4464
4465 if (dump_file && (dump_flags & TDF_DETAILS))
4466 {
4467 if (exec_value)
4468 fprintf (dump_file, "Restoring EXEC to %ld before insn %d.\n",
4469 exec_value, INSN_UID (insn));
4470 else
4471 fprintf (dump_file,
4472 "Restoring EXEC from saved value before insn %d.\n",
4473 INSN_UID (insn));
4474 }
4475
4476 return exec_value;
4477}
4478
4479/* Implement TARGET_MACHINE_DEPENDENT_REORG.
4480
4481 Ensure that pipeline dependencies and lane masking are set correctly. */
4482
4483static void
4484gcn_md_reorg (void)
4485{
4486 basic_block bb;
4487 rtx exec_reg = gen_rtx_REG (DImode, EXEC_REG);
5326695a
AS
4488 regset_head live;
4489
4490 INIT_REG_SET (&live);
4491
4492 compute_bb_for_insn ();
4493
4494 if (!optimize)
4495 {
4496 split_all_insns ();
4497 if (dump_file && (dump_flags & TDF_DETAILS))
4498 {
4499 fprintf (dump_file, "After split:\n");
4500 print_rtl_with_bb (dump_file, get_insns (), dump_flags);
4501 }
4502
4503 /* Update data-flow information for split instructions. */
4504 df_insn_rescan_all ();
4505 }
4506
3df6fac0
JB
4507 df_live_add_problem ();
4508 df_live_set_all_dirty ();
5326695a
AS
4509 df_analyze ();
4510
4511 /* This pass ensures that the EXEC register is set correctly, according
4512 to the "exec" attribute. However, care must be taken so that the
4513 value that reaches explicit uses of the EXEC register remains the
4514 same as before.
4515 */
4516
4517 FOR_EACH_BB_FN (bb, cfun)
4518 {
4519 if (dump_file && (dump_flags & TDF_DETAILS))
4520 fprintf (dump_file, "BB %d:\n", bb->index);
4521
4522 rtx_insn *insn, *curr;
4523 rtx_insn *last_exec_def = BB_HEAD (bb);
4524 bool last_exec_def_saved = false;
4525 bool curr_exec_explicit = true;
4526 bool curr_exec_known = true;
4527 int64_t curr_exec = 0; /* 0 here means 'the value is that of EXEC
4528 after last_exec_def is executed'. */
4529
3df6fac0
JB
4530 bitmap live_in = DF_LR_IN (bb);
4531 bool exec_live_on_entry = false;
4532 if (bitmap_bit_p (live_in, EXEC_LO_REG)
4533 || bitmap_bit_p (live_in, EXEC_HI_REG))
4534 {
4535 if (dump_file)
4536 fprintf (dump_file, "EXEC reg is live on entry to block %d\n",
4537 (int) bb->index);
4538 exec_live_on_entry = true;
4539 }
4540
5326695a
AS
4541 FOR_BB_INSNS_SAFE (bb, insn, curr)
4542 {
4543 if (!NONDEBUG_INSN_P (insn))
4544 continue;
4545
4546 if (GET_CODE (PATTERN (insn)) == USE
4547 || GET_CODE (PATTERN (insn)) == CLOBBER)
4548 continue;
4549
4550 HARD_REG_SET defs, uses;
4551 CLEAR_HARD_REG_SET (defs);
4552 CLEAR_HARD_REG_SET (uses);
e8448ba5 4553 note_stores (insn, record_hard_reg_sets, &defs);
5326695a
AS
4554 note_uses (&PATTERN (insn), record_hard_reg_uses, &uses);
4555
4556 bool exec_lo_def_p = TEST_HARD_REG_BIT (defs, EXEC_LO_REG);
4557 bool exec_hi_def_p = TEST_HARD_REG_BIT (defs, EXEC_HI_REG);
4558 bool exec_used = (hard_reg_set_intersect_p
4559 (uses, reg_class_contents[(int) EXEC_MASK_REG])
4560 || TEST_HARD_REG_BIT (uses, EXECZ_REG));
4561
4562 /* Check the instruction for implicit setting of EXEC via an
4563 attribute. */
4564 attr_exec exec_attr = get_attr_exec (insn);
4565 int64_t new_exec;
4566
4567 switch (exec_attr)
4568 {
4569 case EXEC_NONE:
4570 new_exec = 0;
4571 break;
4572
4573 case EXEC_SINGLE:
4574 /* Instructions that do not involve memory accesses only require
4575 bit 0 of EXEC to be set. */
4576 if (gcn_vmem_insn_p (get_attr_type (insn))
4577 || get_attr_type (insn) == TYPE_DS)
4578 new_exec = 1;
4579 else
4580 new_exec = curr_exec | 1;
4581 break;
4582
4583 case EXEC_FULL:
4584 new_exec = -1;
4585 break;
4586
4587 default: /* Auto-detect what setting is appropriate. */
4588 {
4589 new_exec = 0;
4590
4591 /* If EXEC is referenced explicitly then we don't need to do
4592 anything to set it, so we're done. */
4593 if (exec_used)
4594 break;
4595
4596 /* Scan the insn for VGPRs defs or uses. The mode determines
4597 what kind of exec is needed. */
4598 subrtx_iterator::array_type array;
4599 FOR_EACH_SUBRTX (iter, array, PATTERN (insn), NONCONST)
4600 {
4601 const_rtx x = *iter;
4602 if (REG_P (x) && VGPR_REGNO_P (REGNO (x)))
4603 {
4604 if (VECTOR_MODE_P (GET_MODE (x)))
4605 {
4606 new_exec = -1;
4607 break;
4608 }
4609 else
4610 new_exec = 1;
4611 }
4612 }
4613 }
4614 break;
4615 }
4616
4617 if (new_exec && (!curr_exec_known || new_exec != curr_exec))
4618 {
4619 start_sequence ();
4620 emit_move_insn (exec_reg, GEN_INT (new_exec));
4621 rtx_insn *seq = get_insns ();
4622 end_sequence ();
4623 emit_insn_before (seq, insn);
4624
4625 if (dump_file && (dump_flags & TDF_DETAILS))
4626 fprintf (dump_file, "Setting EXEC to %ld before insn %d.\n",
4627 new_exec, INSN_UID (insn));
4628
4629 curr_exec = new_exec;
4630 curr_exec_explicit = false;
4631 curr_exec_known = true;
4632 }
4633 else if (new_exec && dump_file && (dump_flags & TDF_DETAILS))
4634 {
4635 fprintf (dump_file, "Exec already is %ld before insn %d.\n",
4636 new_exec, INSN_UID (insn));
4637 }
4638
4639 /* The state of the EXEC register is unknown after a
4640 function call. */
4641 if (CALL_P (insn))
4642 curr_exec_known = false;
4643
4644 /* Handle explicit uses of EXEC. If the instruction is a partial
4645 explicit definition of EXEC, then treat it as an explicit use of
4646 EXEC as well. */
4647 if (exec_used || exec_lo_def_p != exec_hi_def_p)
4648 {
4649 /* An instruction that explicitly uses EXEC should not also
4650 implicitly define it. */
4651 gcc_assert (!exec_used || !new_exec);
4652
4653 if (!curr_exec_known || !curr_exec_explicit)
4654 {
4655 /* Restore the previous explicitly defined value. */
4656 curr_exec = gcn_restore_exec (insn, last_exec_def,
4657 curr_exec, curr_exec_known,
4658 last_exec_def_saved);
4659 curr_exec_explicit = true;
4660 curr_exec_known = true;
4661 }
4662 }
4663
4664 /* Handle explicit definitions of EXEC. */
4665 if (exec_lo_def_p || exec_hi_def_p)
4666 {
4667 last_exec_def = insn;
4668 last_exec_def_saved = false;
4669 curr_exec = gcn_insn_exec_value (insn);
4670 curr_exec_explicit = true;
4671 curr_exec_known = true;
4672
4673 if (dump_file && (dump_flags & TDF_DETAILS))
4674 fprintf (dump_file,
4675 "Found %s definition of EXEC at insn %d.\n",
4676 exec_lo_def_p == exec_hi_def_p ? "full" : "partial",
4677 INSN_UID (insn));
4678 }
3df6fac0
JB
4679
4680 exec_live_on_entry = false;
5326695a
AS
4681 }
4682
4683 COPY_REG_SET (&live, DF_LR_OUT (bb));
4684 df_simulate_initialize_backwards (bb, &live);
4685
4686 /* If EXEC is live after the basic block, restore the value of EXEC
4687 at the end of the block. */
4688 if ((REGNO_REG_SET_P (&live, EXEC_LO_REG)
4689 || REGNO_REG_SET_P (&live, EXEC_HI_REG))
3df6fac0 4690 && (!curr_exec_known || !curr_exec_explicit || exec_live_on_entry))
5326695a
AS
4691 {
4692 rtx_insn *end_insn = BB_END (bb);
4693
4694 /* If the instruction is not a jump instruction, do the restore
4695 after the last instruction in the basic block. */
4696 if (NONJUMP_INSN_P (end_insn))
4697 end_insn = NEXT_INSN (end_insn);
4698
4699 gcn_restore_exec (end_insn, last_exec_def, curr_exec,
4700 curr_exec_known, last_exec_def_saved);
4701 }
4702 }
4703
4704 CLEAR_REG_SET (&live);
4705
4706 /* "Manually Inserted Wait States (NOPs)."
4707
4708 GCN hardware detects most kinds of register dependencies, but there
4709 are some exceptions documented in the ISA manual. This pass
4710 detects the missed cases, and inserts the documented number of NOPs
4711 required for correct execution. */
4712
4713 const int max_waits = 5;
4714 struct ilist
4715 {
4716 rtx_insn *insn;
4717 attr_unit unit;
930c5599 4718 attr_delayeduse delayeduse;
5326695a 4719 HARD_REG_SET writes;
930c5599 4720 HARD_REG_SET reads;
5326695a
AS
4721 int age;
4722 } back[max_waits];
4723 int oldest = 0;
4724 for (int i = 0; i < max_waits; i++)
4725 back[i].insn = NULL;
4726
4727 rtx_insn *insn, *last_insn = NULL;
4728 for (insn = get_insns (); insn != 0; insn = NEXT_INSN (insn))
4729 {
4730 if (!NONDEBUG_INSN_P (insn))
4731 continue;
4732
4733 if (GET_CODE (PATTERN (insn)) == USE
4734 || GET_CODE (PATTERN (insn)) == CLOBBER)
4735 continue;
4736
4737 attr_type itype = get_attr_type (insn);
4738 attr_unit iunit = get_attr_unit (insn);
930c5599 4739 attr_delayeduse idelayeduse = get_attr_delayeduse (insn);
5326695a
AS
4740 HARD_REG_SET ireads, iwrites;
4741 CLEAR_HARD_REG_SET (ireads);
4742 CLEAR_HARD_REG_SET (iwrites);
e8448ba5 4743 note_stores (insn, record_hard_reg_sets, &iwrites);
5326695a
AS
4744 note_uses (&PATTERN (insn), record_hard_reg_uses, &ireads);
4745
4746 /* Scan recent previous instructions for dependencies not handled in
4747 hardware. */
4748 int nops_rqd = 0;
4749 for (int i = oldest; i < oldest + max_waits; i++)
4750 {
4751 struct ilist *prev_insn = &back[i % max_waits];
4752
4753 if (!prev_insn->insn)
4754 continue;
4755
4756 /* VALU writes SGPR followed by VMEM reading the same SGPR
4757 requires 5 wait states. */
4758 if ((prev_insn->age + nops_rqd) < 5
4759 && prev_insn->unit == UNIT_VECTOR
4760 && gcn_vmem_insn_p (itype))
4761 {
dc333d8f 4762 HARD_REG_SET regs = prev_insn->writes & ireads;
5326695a
AS
4763 if (hard_reg_set_intersect_p
4764 (regs, reg_class_contents[(int) SGPR_REGS]))
4765 nops_rqd = 5 - prev_insn->age;
4766 }
4767
4768 /* VALU sets VCC/EXEC followed by VALU uses VCCZ/EXECZ
4769 requires 5 wait states. */
4770 if ((prev_insn->age + nops_rqd) < 5
4771 && prev_insn->unit == UNIT_VECTOR
4772 && iunit == UNIT_VECTOR
4773 && ((hard_reg_set_intersect_p
4774 (prev_insn->writes,
4775 reg_class_contents[(int) EXEC_MASK_REG])
4776 && TEST_HARD_REG_BIT (ireads, EXECZ_REG))
4777 ||
4778 (hard_reg_set_intersect_p
4779 (prev_insn->writes,
4780 reg_class_contents[(int) VCC_CONDITIONAL_REG])
4781 && TEST_HARD_REG_BIT (ireads, VCCZ_REG))))
4782 nops_rqd = 5 - prev_insn->age;
4783
4784 /* VALU writes SGPR/VCC followed by v_{read,write}lane using
4785 SGPR/VCC as lane select requires 4 wait states. */
4786 if ((prev_insn->age + nops_rqd) < 4
4787 && prev_insn->unit == UNIT_VECTOR
4788 && get_attr_laneselect (insn) == LANESELECT_YES)
4789 {
dc333d8f 4790 HARD_REG_SET regs = prev_insn->writes & ireads;
5326695a
AS
4791 if (hard_reg_set_intersect_p
4792 (regs, reg_class_contents[(int) SGPR_REGS])
4793 || hard_reg_set_intersect_p
4794 (regs, reg_class_contents[(int) VCC_CONDITIONAL_REG]))
4795 nops_rqd = 4 - prev_insn->age;
4796 }
4797
4798 /* VALU writes VGPR followed by VALU_DPP reading that VGPR
4799 requires 2 wait states. */
4800 if ((prev_insn->age + nops_rqd) < 2
4801 && prev_insn->unit == UNIT_VECTOR
4802 && itype == TYPE_VOP_DPP)
4803 {
dc333d8f 4804 HARD_REG_SET regs = prev_insn->writes & ireads;
5326695a
AS
4805 if (hard_reg_set_intersect_p
4806 (regs, reg_class_contents[(int) VGPR_REGS]))
4807 nops_rqd = 2 - prev_insn->age;
4808 }
930c5599
AS
4809
4810 /* Store that requires input registers are not overwritten by
4811 following instruction. */
4812 if ((prev_insn->age + nops_rqd) < 1
4813 && prev_insn->delayeduse == DELAYEDUSE_YES
4814 && ((hard_reg_set_intersect_p
4815 (prev_insn->reads, iwrites))))
4816 nops_rqd = 1 - prev_insn->age;
5326695a
AS
4817 }
4818
4819 /* Insert the required number of NOPs. */
4820 for (int i = nops_rqd; i > 0; i--)
4821 emit_insn_after (gen_nop (), last_insn);
4822
4823 /* Age the previous instructions. We can also ignore writes to
4824 registers subsequently overwritten. */
4825 HARD_REG_SET written;
4826 CLEAR_HARD_REG_SET (written);
4827 for (int i = oldest + max_waits - 1; i > oldest; i--)
4828 {
4829 struct ilist *prev_insn = &back[i % max_waits];
4830
4831 /* Assume all instructions are equivalent to one "wait", the same
4832 as s_nop. This is probably true for SALU, but not VALU (which
4833 may take longer), so this is not optimal. However, AMD do
4834 not publish the cycle times for instructions. */
4835 prev_insn->age += 1 + nops_rqd;
4836
44942965 4837 written |= iwrites;
d15e5131 4838 prev_insn->writes &= ~written;
5326695a
AS
4839 }
4840
4841 /* Track the current instruction as a previous instruction. */
4842 back[oldest].insn = insn;
4843 back[oldest].unit = iunit;
930c5599 4844 back[oldest].delayeduse = idelayeduse;
6576d245
RS
4845 back[oldest].writes = iwrites;
4846 back[oldest].reads = ireads;
5326695a
AS
4847 back[oldest].age = 0;
4848 oldest = (oldest + 1) % max_waits;
4849
4850 last_insn = insn;
4851 }
4852}
4853
4854/* }}} */
4855/* {{{ OpenACC / OpenMP. */
4856
4857#define GCN_DEFAULT_GANGS 0 /* Choose at runtime. */
4858#define GCN_DEFAULT_WORKERS 0 /* Choose at runtime. */
4859#define GCN_DEFAULT_VECTORS 1 /* Use autovectorization only, for now. */
4860
4861/* Implement TARGET_GOACC_VALIDATE_DIMS.
4862
4863 Check the launch dimensions provided for an OpenACC compute
4864 region, or routine. */
4865
4866static bool
4867gcn_goacc_validate_dims (tree decl, int dims[], int fn_level,
4868 unsigned /*used*/)
4869{
4870 bool changed = false;
4871
4872 /* FIXME: remove -facc-experimental-workers when they're ready. */
4873 int max_workers = flag_worker_partitioning ? 16 : 1;
4874
fe22e0d4
AS
4875 gcc_assert (!flag_worker_partitioning);
4876
5326695a
AS
4877 /* The vector size must appear to be 64, to the user, unless this is a
4878 SEQ routine. The real, internal value is always 1, which means use
4879 autovectorization, but the user should not see that. */
4880 if (fn_level <= GOMP_DIM_VECTOR && fn_level >= -1
4881 && dims[GOMP_DIM_VECTOR] >= 0)
4882 {
4883 if (fn_level < 0 && dims[GOMP_DIM_VECTOR] >= 0
4884 && dims[GOMP_DIM_VECTOR] != 64)
4885 warning_at (decl ? DECL_SOURCE_LOCATION (decl) : UNKNOWN_LOCATION,
4886 OPT_Wopenacc_dims,
4887 (dims[GOMP_DIM_VECTOR]
55308fc2
AS
4888 ? G_("using %<vector_length (64)%>, ignoring %d")
4889 : G_("using %<vector_length (64)%>, "
5326695a
AS
4890 "ignoring runtime setting")),
4891 dims[GOMP_DIM_VECTOR]);
4892 dims[GOMP_DIM_VECTOR] = 1;
4893 changed = true;
4894 }
4895
4896 /* Check the num workers is not too large. */
4897 if (dims[GOMP_DIM_WORKER] > max_workers)
4898 {
4899 warning_at (decl ? DECL_SOURCE_LOCATION (decl) : UNKNOWN_LOCATION,
4900 OPT_Wopenacc_dims,
55308fc2 4901 "using %<num_workers (%d)%>, ignoring %d",
5326695a
AS
4902 max_workers, dims[GOMP_DIM_WORKER]);
4903 dims[GOMP_DIM_WORKER] = max_workers;
4904 changed = true;
4905 }
4906
4907 /* Set global defaults. */
4908 if (!decl)
4909 {
4910 dims[GOMP_DIM_VECTOR] = GCN_DEFAULT_VECTORS;
4911 if (dims[GOMP_DIM_WORKER] < 0)
4912 dims[GOMP_DIM_WORKER] = (flag_worker_partitioning
4913 ? GCN_DEFAULT_WORKERS : 1);
4914 if (dims[GOMP_DIM_GANG] < 0)
4915 dims[GOMP_DIM_GANG] = GCN_DEFAULT_GANGS;
4916 changed = true;
4917 }
4918
4919 return changed;
4920}
4921
4922/* Helper function for oacc_dim_size instruction.
4923 Also used for OpenMP, via builtin_gcn_dim_size, and the omp_gcn pass. */
4924
4925rtx
4926gcn_oacc_dim_size (int dim)
4927{
4928 if (dim < 0 || dim > 2)
4929 error ("offload dimension out of range (%d)", dim);
4930
4931 /* Vectors are a special case. */
4932 if (dim == 2)
4933 return const1_rtx; /* Think of this as 1 times 64. */
4934
4935 static int offset[] = {
4936 /* Offsets into dispatch packet. */
4937 12, /* X dim = Gang / Team / Work-group. */
4938 20, /* Z dim = Worker / Thread / Wavefront. */
4939 16 /* Y dim = Vector / SIMD / Work-item. */
4940 };
4941 rtx addr = gen_rtx_PLUS (DImode,
4942 gen_rtx_REG (DImode,
4943 cfun->machine->args.
4944 reg[DISPATCH_PTR_ARG]),
4945 GEN_INT (offset[dim]));
4946 return gen_rtx_MEM (SImode, addr);
4947}
4948
4949/* Helper function for oacc_dim_pos instruction.
4950 Also used for OpenMP, via builtin_gcn_dim_pos, and the omp_gcn pass. */
4951
4952rtx
4953gcn_oacc_dim_pos (int dim)
4954{
4955 if (dim < 0 || dim > 2)
4956 error ("offload dimension out of range (%d)", dim);
4957
4958 static const int reg[] = {
4959 WORKGROUP_ID_X_ARG, /* Gang / Team / Work-group. */
4960 WORK_ITEM_ID_Z_ARG, /* Worker / Thread / Wavefront. */
4961 WORK_ITEM_ID_Y_ARG /* Vector / SIMD / Work-item. */
4962 };
4963
4964 int reg_num = cfun->machine->args.reg[reg[dim]];
4965
4966 /* The information must have been requested by the kernel. */
4967 gcc_assert (reg_num >= 0);
4968
4969 return gen_rtx_REG (SImode, reg_num);
4970}
4971
4972/* Implement TARGET_GOACC_FORK_JOIN. */
4973
4974static bool
4975gcn_fork_join (gcall *ARG_UNUSED (call), const int *ARG_UNUSED (dims),
4976 bool ARG_UNUSED (is_fork))
4977{
4978 /* GCN does not use the fork/join concept invented for NVPTX.
4979 Instead we use standard autovectorization. */
4980 return false;
4981}
4982
4983/* Implement ???????
4984 FIXME make this a real hook.
4985
4986 Adjust FNDECL such that options inherited from the host compiler
4987 are made appropriate for the accelerator compiler. */
4988
4989void
4990gcn_fixup_accel_lto_options (tree fndecl)
4991{
4992 tree func_optimize = DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl);
4993 if (!func_optimize)
4994 return;
4995
ba948b37
JJ
4996 tree old_optimize
4997 = build_optimization_node (&global_options, &global_options_set);
5326695a
AS
4998 tree new_optimize;
4999
5000 /* If the function changed the optimization levels as well as
5001 setting target options, start with the optimizations
5002 specified. */
5003 if (func_optimize != old_optimize)
ba948b37 5004 cl_optimization_restore (&global_options, &global_options_set,
5326695a
AS
5005 TREE_OPTIMIZATION (func_optimize));
5006
5007 gcn_option_override ();
5008
5009 /* The target attributes may also change some optimization flags,
5010 so update the optimization options if necessary. */
ba948b37
JJ
5011 new_optimize = build_optimization_node (&global_options,
5012 &global_options_set);
5326695a
AS
5013
5014 if (old_optimize != new_optimize)
5015 {
5016 DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl) = new_optimize;
ba948b37 5017 cl_optimization_restore (&global_options, &global_options_set,
5326695a
AS
5018 TREE_OPTIMIZATION (old_optimize));
5019 }
5020}
5021
5022/* }}} */
5023/* {{{ ASM Output. */
5024
5025/* Implement TARGET_ASM_FILE_START.
5026
5027 Print assembler file header text. */
5028
5029static void
5030output_file_start (void)
5031{
dd455df7 5032 const char *cpu;
f062c3f1
AS
5033 switch (gcn_arch)
5034 {
5035 case PROCESSOR_FIJI: cpu = "gfx803"; break;
5036 case PROCESSOR_VEGA10: cpu = "gfx900"; break;
5037 case PROCESSOR_VEGA20: cpu = "gfx906"; break;
3535402e 5038 case PROCESSOR_GFX908: cpu = "gfx908+sram-ecc"; break;
f062c3f1
AS
5039 default: gcc_unreachable ();
5040 }
5041
5042 fprintf(asm_out_file, "\t.amdgcn_target \"amdgcn-unknown-amdhsa--%s\"\n", cpu);
5326695a
AS
5043}
5044
5045/* Implement ASM_DECLARE_FUNCTION_NAME via gcn-hsa.h.
5046
5047 Print the initial definition of a function name.
5048
5049 For GCN kernel entry points this includes all the HSA meta-data, special
5050 alignment constraints that don't apply to regular functions, and magic
5051 comments that pass information to mkoffload. */
5052
5053void
5054gcn_hsa_declare_function_name (FILE *file, const char *name, tree)
5055{
5056 int sgpr, vgpr;
5057 bool xnack_enabled = false;
f062c3f1
AS
5058
5059 fputs ("\n\n", file);
5326695a
AS
5060
5061 if (cfun && cfun->machine && cfun->machine->normal_function)
5062 {
5063 fputs ("\t.type\t", file);
5064 assemble_name (file, name);
5065 fputs (",@function\n", file);
5066 assemble_name (file, name);
5067 fputs (":\n", file);
5068 return;
5069 }
5070
5071 /* Determine count of sgpr/vgpr registers by looking for last
5072 one used. */
5073 for (sgpr = 101; sgpr >= 0; sgpr--)
5074 if (df_regs_ever_live_p (FIRST_SGPR_REG + sgpr))
5075 break;
5076 sgpr++;
5077 for (vgpr = 255; vgpr >= 0; vgpr--)
5078 if (df_regs_ever_live_p (FIRST_VGPR_REG + vgpr))
5079 break;
5080 vgpr++;
5081
5326695a
AS
5082 if (!leaf_function_p ())
5083 {
5084 /* We can't know how many registers function calls might use. */
87fdbe69
KCY
5085 if (vgpr < MAX_NORMAL_VGPR_COUNT)
5086 vgpr = MAX_NORMAL_VGPR_COUNT;
f062c3f1
AS
5087 if (sgpr < MAX_NORMAL_SGPR_COUNT)
5088 sgpr = MAX_NORMAL_SGPR_COUNT;
5326695a
AS
5089 }
5090
f062c3f1
AS
5091 fputs ("\t.rodata\n"
5092 "\t.p2align\t6\n"
5093 "\t.amdhsa_kernel\t", file);
5326695a
AS
5094 assemble_name (file, name);
5095 fputs ("\n", file);
5326695a
AS
5096 int reg = FIRST_SGPR_REG;
5097 for (int a = 0; a < GCN_KERNEL_ARG_TYPES; a++)
5098 {
5099 int reg_first = -1;
5100 int reg_last;
5101 if ((cfun->machine->args.requested & (1 << a))
5102 && (gcn_kernel_arg_types[a].fixed_regno < 0))
5103 {
5104 reg_first = reg;
5105 reg_last = (reg_first
5106 + (GET_MODE_SIZE (gcn_kernel_arg_types[a].mode)
5107 / UNITS_PER_WORD) - 1);
5108 reg = reg_last + 1;
5109 }
5110
5111 if (gcn_kernel_arg_types[a].header_pseudo)
5112 {
f062c3f1
AS
5113 fprintf (file, "\t %s%s\t%i",
5114 (cfun->machine->args.requested & (1 << a)) != 0 ? "" : ";",
5326695a
AS
5115 gcn_kernel_arg_types[a].header_pseudo,
5116 (cfun->machine->args.requested & (1 << a)) != 0);
5117 if (reg_first != -1)
5118 {
5119 fprintf (file, " ; (");
5120 for (int i = reg_first; i <= reg_last; ++i)
5121 {
5122 if (i != reg_first)
5123 fprintf (file, ", ");
5124 fprintf (file, "%s", reg_names[i]);
5125 }
5126 fprintf (file, ")");
5127 }
5128 fprintf (file, "\n");
5129 }
5130 else if (gcn_kernel_arg_types[a].fixed_regno >= 0
5131 && cfun->machine->args.requested & (1 << a))
f062c3f1 5132 fprintf (file, "\t ; %s\t%i (%s)\n",
5326695a
AS
5133 gcn_kernel_arg_types[a].name,
5134 (cfun->machine->args.requested & (1 << a)) != 0,
5135 reg_names[gcn_kernel_arg_types[a].fixed_regno]);
5136 }
f062c3f1 5137 fprintf (file, "\t .amdhsa_system_vgpr_workitem_id\t%i\n",
5326695a
AS
5138 (cfun->machine->args.requested & (1 << WORK_ITEM_ID_Z_ARG))
5139 ? 2
5140 : cfun->machine->args.requested & (1 << WORK_ITEM_ID_Y_ARG)
5141 ? 1 : 0);
f062c3f1
AS
5142 fprintf (file,
5143 "\t .amdhsa_next_free_vgpr\t%i\n"
5144 "\t .amdhsa_next_free_sgpr\t%i\n"
5145 "\t .amdhsa_reserve_vcc\t1\n"
5146 "\t .amdhsa_reserve_flat_scratch\t0\n"
5147 "\t .amdhsa_reserve_xnack_mask\t%i\n"
5148 "\t .amdhsa_private_segment_fixed_size\t%i\n"
5149 "\t .amdhsa_group_segment_fixed_size\t%u\n"
5150 "\t .amdhsa_float_denorm_mode_32\t3\n"
5151 "\t .amdhsa_float_denorm_mode_16_64\t3\n",
5152 vgpr,
5153 sgpr,
5154 xnack_enabled,
5326695a
AS
5155 /* workitem_private_segment_bytes_size needs to be
5156 one 64th the wave-front stack size. */
5157 stack_size_opt / 64,
f062c3f1
AS
5158 LDS_SIZE);
5159 fputs ("\t.end_amdhsa_kernel\n", file);
5160
5161#if 1
5162 /* The following is YAML embedded in assembler; tabs are not allowed. */
5163 fputs (" .amdgpu_metadata\n"
5164 " amdhsa.version:\n"
5165 " - 1\n"
5166 " - 0\n"
5167 " amdhsa.kernels:\n"
5168 " - .name: ", file);
5169 assemble_name (file, name);
5170 fputs ("\n .symbol: ", file);
5171 assemble_name (file, name);
5172 fprintf (file,
5173 ".kd\n"
5174 " .kernarg_segment_size: %i\n"
5175 " .kernarg_segment_align: %i\n"
5176 " .group_segment_fixed_size: %u\n"
5177 " .private_segment_fixed_size: %i\n"
5178 " .wavefront_size: 64\n"
5179 " .sgpr_count: %i\n"
5180 " .vgpr_count: %i\n"
5181 " .max_flat_workgroup_size: 1024\n",
5182 cfun->machine->kernarg_segment_byte_size,
5326695a 5183 cfun->machine->kernarg_segment_alignment,
f062c3f1
AS
5184 LDS_SIZE,
5185 stack_size_opt / 64,
5186 sgpr, vgpr);
5187 fputs (" .end_amdgpu_metadata\n", file);
5188#endif
5189
5190 fputs ("\t.text\n", file);
5191 fputs ("\t.align\t256\n", file);
5192 fputs ("\t.type\t", file);
5193 assemble_name (file, name);
5194 fputs (",@function\n", file);
5195 assemble_name (file, name);
5196 fputs (":\n", file);
5326695a
AS
5197
5198 /* This comment is read by mkoffload. */
5199 if (flag_openacc)
5200 fprintf (file, "\t;; OPENACC-DIMS: %d, %d, %d : %s\n",
5201 oacc_get_fn_dim_size (cfun->decl, GOMP_DIM_GANG),
5202 oacc_get_fn_dim_size (cfun->decl, GOMP_DIM_WORKER),
5203 oacc_get_fn_dim_size (cfun->decl, GOMP_DIM_VECTOR), name);
5204}
5205
5206/* Implement TARGET_ASM_SELECT_SECTION.
5207
5208 Return the section into which EXP should be placed. */
5209
5210static section *
5211gcn_asm_select_section (tree exp, int reloc, unsigned HOST_WIDE_INT align)
5212{
5213 if (TREE_TYPE (exp) != error_mark_node
5214 && TYPE_ADDR_SPACE (TREE_TYPE (exp)) == ADDR_SPACE_LDS)
5215 {
5216 if (!DECL_P (exp))
5217 return get_section (".lds_bss",
5218 SECTION_WRITE | SECTION_BSS | SECTION_DEBUG,
5219 NULL);
5220
5221 return get_named_section (exp, ".lds_bss", reloc);
5222 }
5223
5224 return default_elf_select_section (exp, reloc, align);
5225}
5226
5227/* Implement TARGET_ASM_FUNCTION_PROLOGUE.
5228
5229 Emits custom text into the assembler file at the head of each function. */
5230
5231static void
5232gcn_target_asm_function_prologue (FILE *file)
5233{
5234 machine_function *offsets = gcn_compute_frame_offsets ();
5235
5236 asm_fprintf (file, "\t; using %s addressing in function\n",
5237 offsets->use_flat_addressing ? "flat" : "global");
5238
5239 if (offsets->normal_function)
5240 {
5241 asm_fprintf (file, "\t; frame pointer needed: %s\n",
5242 offsets->need_frame_pointer ? "true" : "false");
5243 asm_fprintf (file, "\t; lr needs saving: %s\n",
5244 offsets->lr_needs_saving ? "true" : "false");
5245 asm_fprintf (file, "\t; outgoing args size: %wd\n",
5246 offsets->outgoing_args_size);
5247 asm_fprintf (file, "\t; pretend size: %wd\n", offsets->pretend_size);
5248 asm_fprintf (file, "\t; local vars size: %wd\n", offsets->local_vars);
5249 asm_fprintf (file, "\t; callee save size: %wd\n",
5250 offsets->callee_saves);
5251 }
5252 else
5253 {
5254 asm_fprintf (file, "\t; HSA kernel entry point\n");
5255 asm_fprintf (file, "\t; local vars size: %wd\n", offsets->local_vars);
5256 asm_fprintf (file, "\t; outgoing args size: %wd\n",
5257 offsets->outgoing_args_size);
5326695a
AS
5258 }
5259}
5260
5261/* Helper function for print_operand and print_operand_address.
5262
5263 Print a register as the assembler requires, according to mode and name. */
5264
5265static void
5266print_reg (FILE *file, rtx x)
5267{
5268 machine_mode mode = GET_MODE (x);
5269 if (mode == BImode || mode == QImode || mode == HImode || mode == SImode
5270 || mode == HFmode || mode == SFmode
5271 || mode == V64SFmode || mode == V64SImode
5272 || mode == V64QImode || mode == V64HImode)
5273 fprintf (file, "%s", reg_names[REGNO (x)]);
5274 else if (mode == DImode || mode == V64DImode
5275 || mode == DFmode || mode == V64DFmode)
5276 {
5277 if (SGPR_REGNO_P (REGNO (x)))
5278 fprintf (file, "s[%i:%i]", REGNO (x) - FIRST_SGPR_REG,
5279 REGNO (x) - FIRST_SGPR_REG + 1);
5280 else if (VGPR_REGNO_P (REGNO (x)))
5281 fprintf (file, "v[%i:%i]", REGNO (x) - FIRST_VGPR_REG,
5282 REGNO (x) - FIRST_VGPR_REG + 1);
5283 else if (REGNO (x) == FLAT_SCRATCH_REG)
5284 fprintf (file, "flat_scratch");
5285 else if (REGNO (x) == EXEC_REG)
5286 fprintf (file, "exec");
5287 else if (REGNO (x) == VCC_LO_REG)
5288 fprintf (file, "vcc");
5289 else
5290 fprintf (file, "[%s:%s]",
5291 reg_names[REGNO (x)], reg_names[REGNO (x) + 1]);
5292 }
5293 else if (mode == TImode)
5294 {
5295 if (SGPR_REGNO_P (REGNO (x)))
5296 fprintf (file, "s[%i:%i]", REGNO (x) - FIRST_SGPR_REG,
5297 REGNO (x) - FIRST_SGPR_REG + 3);
5298 else if (VGPR_REGNO_P (REGNO (x)))
5299 fprintf (file, "v[%i:%i]", REGNO (x) - FIRST_VGPR_REG,
5300 REGNO (x) - FIRST_VGPR_REG + 3);
5301 else
5302 gcc_unreachable ();
5303 }
5304 else
5305 gcc_unreachable ();
5306}
5307
5308/* Implement TARGET_SECTION_TYPE_FLAGS.
5309
5310 Return a set of section attributes for use by TARGET_ASM_NAMED_SECTION. */
5311
5312static unsigned int
5313gcn_section_type_flags (tree decl, const char *name, int reloc)
5314{
5315 if (strcmp (name, ".lds_bss") == 0)
5316 return SECTION_WRITE | SECTION_BSS | SECTION_DEBUG;
5317
5318 return default_section_type_flags (decl, name, reloc);
5319}
5320
5321/* Helper function for gcn_asm_output_symbol_ref.
5322
5323 FIXME: If we want to have propagation blocks allocated separately and
5324 statically like this, it would be better done via symbol refs and the
5325 assembler/linker. This is a temporary hack. */
5326
5327static void
5328gcn_print_lds_decl (FILE *f, tree var)
5329{
5330 int *offset;
5331 machine_function *machfun = cfun->machine;
5332
5333 if ((offset = machfun->lds_allocs->get (var)))
5334 fprintf (f, "%u", (unsigned) *offset);
5335 else
5336 {
5337 unsigned HOST_WIDE_INT align = DECL_ALIGN_UNIT (var);
5338 tree type = TREE_TYPE (var);
5339 unsigned HOST_WIDE_INT size = tree_to_uhwi (TYPE_SIZE_UNIT (type));
5340 if (size > align && size > 4 && align < 8)
5341 align = 8;
5342
5343 machfun->lds_allocated = ((machfun->lds_allocated + align - 1)
5344 & ~(align - 1));
5345
5346 machfun->lds_allocs->put (var, machfun->lds_allocated);
5347 fprintf (f, "%u", machfun->lds_allocated);
5348 machfun->lds_allocated += size;
5349 if (machfun->lds_allocated > LDS_SIZE)
5350 error ("local data-share memory exhausted");
5351 }
5352}
5353
5354/* Implement ASM_OUTPUT_SYMBOL_REF via gcn-hsa.h. */
5355
5356void
5357gcn_asm_output_symbol_ref (FILE *file, rtx x)
5358{
5359 tree decl;
9200b53a
JB
5360 if (cfun
5361 && (decl = SYMBOL_REF_DECL (x)) != 0
5326695a
AS
5362 && TREE_CODE (decl) == VAR_DECL
5363 && AS_LDS_P (TYPE_ADDR_SPACE (TREE_TYPE (decl))))
5364 {
5365 /* LDS symbols (emitted using this hook) are only used at present
5366 to propagate worker values from an active thread to neutered
5367 threads. Use the same offset for each such block, but don't
5368 use zero because null pointers are used to identify the active
5369 thread in GOACC_single_copy_start calls. */
5370 gcn_print_lds_decl (file, decl);
5371 }
5372 else
5373 {
5374 assemble_name (file, XSTR (x, 0));
5375 /* FIXME: See above -- this condition is unreachable. */
9200b53a
JB
5376 if (cfun
5377 && (decl = SYMBOL_REF_DECL (x)) != 0
5326695a
AS
5378 && TREE_CODE (decl) == VAR_DECL
5379 && AS_LDS_P (TYPE_ADDR_SPACE (TREE_TYPE (decl))))
5380 fputs ("@abs32", file);
5381 }
5382}
5383
5384/* Implement TARGET_CONSTANT_ALIGNMENT.
5385
5386 Returns the alignment in bits of a constant that is being placed in memory.
5387 CONSTANT is the constant and BASIC_ALIGN is the alignment that the object
5388 would ordinarily have. */
5389
5390static HOST_WIDE_INT
5391gcn_constant_alignment (const_tree ARG_UNUSED (constant),
5392 HOST_WIDE_INT basic_align)
5393{
5394 return basic_align > 128 ? basic_align : 128;
5395}
5396
5397/* Implement PRINT_OPERAND_ADDRESS via gcn.h. */
5398
5399void
5400print_operand_address (FILE *file, rtx mem)
5401{
5402 gcc_assert (MEM_P (mem));
5403
5404 rtx reg;
5405 rtx offset;
5406 addr_space_t as = MEM_ADDR_SPACE (mem);
5407 rtx addr = XEXP (mem, 0);
5408 gcc_assert (REG_P (addr) || GET_CODE (addr) == PLUS);
5409
5410 if (AS_SCRATCH_P (as))
5411 switch (GET_CODE (addr))
5412 {
5413 case REG:
5414 print_reg (file, addr);
5415 break;
5416
5417 case PLUS:
5418 reg = XEXP (addr, 0);
5419 offset = XEXP (addr, 1);
5420 print_reg (file, reg);
5421 if (GET_CODE (offset) == CONST_INT)
5422 fprintf (file, " offset:" HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
5423 else
5424 abort ();
5425 break;
5426
5427 default:
5428 debug_rtx (addr);
5429 abort ();
5430 }
5431 else if (AS_ANY_FLAT_P (as))
5432 {
5433 if (GET_CODE (addr) == REG)
5434 print_reg (file, addr);
5435 else
5436 {
5437 gcc_assert (TARGET_GCN5_PLUS);
5438 print_reg (file, XEXP (addr, 0));
5439 }
5440 }
5441 else if (AS_GLOBAL_P (as))
5442 {
5443 gcc_assert (TARGET_GCN5_PLUS);
5444
5445 rtx base = addr;
5446 rtx vgpr_offset = NULL_RTX;
5447
5448 if (GET_CODE (addr) == PLUS)
5449 {
5450 base = XEXP (addr, 0);
5451
5452 if (GET_CODE (base) == PLUS)
5453 {
5454 /* (SGPR + VGPR) + CONST */
5455 vgpr_offset = XEXP (base, 1);
5456 base = XEXP (base, 0);
5457 }
5458 else
5459 {
5460 rtx offset = XEXP (addr, 1);
5461
5462 if (REG_P (offset))
5463 /* SGPR + VGPR */
5464 vgpr_offset = offset;
5465 else if (CONST_INT_P (offset))
5466 /* VGPR + CONST or SGPR + CONST */
5467 ;
5468 else
5469 output_operand_lossage ("bad ADDR_SPACE_GLOBAL address");
5470 }
5471 }
5472
5473 if (REG_P (base))
5474 {
5475 if (VGPR_REGNO_P (REGNO (base)))
5476 print_reg (file, base);
5477 else if (SGPR_REGNO_P (REGNO (base)))
5478 {
5479 /* The assembler requires a 64-bit VGPR pair here, even though
5480 the offset should be only 32-bit. */
5481 if (vgpr_offset == NULL_RTX)
f6e20012
KCY
5482 /* In this case, the vector offset is zero, so we use the first
5483 lane of v1, which is initialized to zero. */
5484 fprintf (file, "v[1:2]");
5326695a
AS
5485 else if (REG_P (vgpr_offset)
5486 && VGPR_REGNO_P (REGNO (vgpr_offset)))
5487 {
5488 fprintf (file, "v[%d:%d]",
5489 REGNO (vgpr_offset) - FIRST_VGPR_REG,
5490 REGNO (vgpr_offset) - FIRST_VGPR_REG + 1);
5491 }
5492 else
5493 output_operand_lossage ("bad ADDR_SPACE_GLOBAL address");
5494 }
5495 }
5496 else
5497 output_operand_lossage ("bad ADDR_SPACE_GLOBAL address");
5498 }
5499 else if (AS_ANY_DS_P (as))
5500 switch (GET_CODE (addr))
5501 {
5502 case REG:
5503 print_reg (file, addr);
5504 break;
5505
5506 case PLUS:
5507 reg = XEXP (addr, 0);
5508 print_reg (file, reg);
5509 break;
5510
5511 default:
5512 debug_rtx (addr);
5513 abort ();
5514 }
5515 else
5516 switch (GET_CODE (addr))
5517 {
5518 case REG:
5519 print_reg (file, addr);
5520 fprintf (file, ", 0");
5521 break;
5522
5523 case PLUS:
5524 reg = XEXP (addr, 0);
5525 offset = XEXP (addr, 1);
5526 print_reg (file, reg);
5527 fprintf (file, ", ");
5528 if (GET_CODE (offset) == REG)
5529 print_reg (file, reg);
5530 else if (GET_CODE (offset) == CONST_INT)
5531 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
5532 else
5533 abort ();
5534 break;
5535
5536 default:
5537 debug_rtx (addr);
5538 abort ();
5539 }
5540}
5541
5542/* Implement PRINT_OPERAND via gcn.h.
5543
5544 b - print operand size as untyped operand (b8/b16/b32/b64)
5545 B - print operand size as SI/DI untyped operand (b32/b32/b32/b64)
5546 i - print operand size as untyped operand (i16/b32/i64)
a5879399 5547 I - print operand size as SI/DI untyped operand(i32/b32/i64)
5326695a 5548 u - print operand size as untyped operand (u16/u32/u64)
a5879399 5549 U - print operand size as SI/DI untyped operand(u32/u64)
5326695a
AS
5550 o - print operand size as memory access size for loads
5551 (ubyte/ushort/dword/dwordx2/wordx3/dwordx4)
5552 s - print operand size as memory access size for stores
5553 (byte/short/dword/dwordx2/wordx3/dwordx4)
5554 C - print conditional code for s_cbranch (_sccz/_sccnz/_vccz/_vccnz...)
5555 c - print inverse conditional code for s_cbranch
5556 D - print conditional code for s_cmp (eq_u64/lg_u64...)
5557 E - print conditional code for v_cmp (eq_u64/ne_u64...)
5558 A - print address in formatting suitable for given address space.
5559 O - print offset:n for data share operations.
5560 ^ - print "_co" suffix for GCN5 mnemonics
5561 g - print "glc", if appropriate for given MEM
5562 */
5563
5564void
5565print_operand (FILE *file, rtx x, int code)
5566{
5567 int xcode = x ? GET_CODE (x) : 0;
5568 bool invert = false;
5569 switch (code)
5570 {
5571 /* Instructions have the following suffixes.
5572 If there are two suffixes, the first is the destination type,
5573 and the second is the source type.
5574
5575 B32 Bitfield (untyped data) 32-bit
5576 B64 Bitfield (untyped data) 64-bit
5577 F16 floating-point 16-bit
5578 F32 floating-point 32-bit (IEEE 754 single-precision float)
5579 F64 floating-point 64-bit (IEEE 754 double-precision float)
5580 I16 signed 32-bit integer
5581 I32 signed 32-bit integer
5582 I64 signed 64-bit integer
5583 U16 unsigned 32-bit integer
5584 U32 unsigned 32-bit integer
5585 U64 unsigned 64-bit integer */
5586
5587 /* Print operand size as untyped suffix. */
5588 case 'b':
5589 {
5590 const char *s = "";
5591 machine_mode mode = GET_MODE (x);
5592 if (VECTOR_MODE_P (mode))
5593 mode = GET_MODE_INNER (mode);
5594 switch (GET_MODE_SIZE (mode))
5595 {
5596 case 1:
5597 s = "_b8";
5598 break;
5599 case 2:
5600 s = "_b16";
5601 break;
5602 case 4:
5603 s = "_b32";
5604 break;
5605 case 8:
5606 s = "_b64";
5607 break;
5608 default:
5609 output_operand_lossage ("invalid operand %%xn code");
5610 return;
5611 }
5612 fputs (s, file);
5613 }
5614 return;
5615 case 'B':
5616 {
5617 const char *s = "";
5618 machine_mode mode = GET_MODE (x);
5619 if (VECTOR_MODE_P (mode))
5620 mode = GET_MODE_INNER (mode);
5621 switch (GET_MODE_SIZE (mode))
5622 {
5623 case 1:
5624 case 2:
5625 case 4:
5626 s = "_b32";
5627 break;
5628 case 8:
5629 s = "_b64";
5630 break;
5631 default:
5632 output_operand_lossage ("invalid operand %%xn code");
5633 return;
5634 }
5635 fputs (s, file);
5636 }
5637 return;
5638 case 'e':
5639 fputs ("sext(", file);
5640 print_operand (file, x, 0);
5641 fputs (")", file);
5642 return;
5643 case 'i':
a5879399 5644 case 'I':
5326695a 5645 case 'u':
a5879399 5646 case 'U':
5326695a
AS
5647 {
5648 bool signed_p = code == 'i';
a5879399 5649 bool min32_p = code == 'I' || code == 'U';
5326695a
AS
5650 const char *s = "";
5651 machine_mode mode = GET_MODE (x);
5652 if (VECTOR_MODE_P (mode))
5653 mode = GET_MODE_INNER (mode);
5654 if (mode == VOIDmode)
5655 switch (GET_CODE (x))
5656 {
5657 case CONST_INT:
5658 s = signed_p ? "_i32" : "_u32";
5659 break;
5660 case CONST_DOUBLE:
5661 s = "_f64";
5662 break;
5663 default:
5664 output_operand_lossage ("invalid operand %%xn code");
5665 return;
5666 }
5667 else if (FLOAT_MODE_P (mode))
5668 switch (GET_MODE_SIZE (mode))
5669 {
5670 case 2:
5671 s = "_f16";
5672 break;
5673 case 4:
5674 s = "_f32";
5675 break;
5676 case 8:
5677 s = "_f64";
5678 break;
5679 default:
5680 output_operand_lossage ("invalid operand %%xn code");
5681 return;
5682 }
a5879399
AS
5683 else if (min32_p)
5684 switch (GET_MODE_SIZE (mode))
5685 {
5686 case 1:
5687 case 2:
5688 case 4:
5689 s = signed_p ? "_i32" : "_u32";
5690 break;
5691 case 8:
5692 s = signed_p ? "_i64" : "_u64";
5693 break;
5694 default:
5695 output_operand_lossage ("invalid operand %%xn code");
5696 return;
5697 }
5326695a
AS
5698 else
5699 switch (GET_MODE_SIZE (mode))
5700 {
5701 case 1:
5702 s = signed_p ? "_i8" : "_u8";
5703 break;
5704 case 2:
5705 s = signed_p ? "_i16" : "_u16";
5706 break;
5707 case 4:
5708 s = signed_p ? "_i32" : "_u32";
5709 break;
5710 case 8:
5711 s = signed_p ? "_i64" : "_u64";
5712 break;
5713 default:
5714 output_operand_lossage ("invalid operand %%xn code");
5715 return;
5716 }
5717 fputs (s, file);
5718 }
5719 return;
5720 /* Print operand size as untyped suffix. */
5721 case 'o':
5722 {
5723 const char *s = 0;
5724 switch (GET_MODE_SIZE (GET_MODE (x)))
5725 {
5726 case 1:
5727 s = "_ubyte";
5728 break;
5729 case 2:
5730 s = "_ushort";
5731 break;
5732 /* The following are full-vector variants. */
5733 case 64:
5734 s = "_ubyte";
5735 break;
5736 case 128:
5737 s = "_ushort";
5738 break;
5739 }
5740
5741 if (s)
5742 {
5743 fputs (s, file);
5744 return;
5745 }
5746
5747 /* Fall-through - the other cases for 'o' are the same as for 's'. */
5748 gcc_fallthrough();
5749 }
5750 case 's':
5751 {
5752 const char *s = "";
5753 switch (GET_MODE_SIZE (GET_MODE (x)))
5754 {
5755 case 1:
5756 s = "_byte";
5757 break;
5758 case 2:
5759 s = "_short";
5760 break;
5761 case 4:
5762 s = "_dword";
5763 break;
5764 case 8:
5765 s = "_dwordx2";
5766 break;
5767 case 12:
5768 s = "_dwordx3";
5769 break;
5770 case 16:
5771 s = "_dwordx4";
5772 break;
5773 case 32:
5774 s = "_dwordx8";
5775 break;
5776 case 64:
5777 s = VECTOR_MODE_P (GET_MODE (x)) ? "_byte" : "_dwordx16";
5778 break;
5779 /* The following are full-vector variants. */
5780 case 128:
5781 s = "_short";
5782 break;
5783 case 256:
5784 s = "_dword";
5785 break;
5786 case 512:
5787 s = "_dwordx2";
5788 break;
5789 default:
5790 output_operand_lossage ("invalid operand %%xn code");
5791 return;
5792 }
5793 fputs (s, file);
5794 }
5795 return;
5796 case 'A':
5797 if (xcode != MEM)
5798 {
5799 output_operand_lossage ("invalid %%xn code");
5800 return;
5801 }
5802 print_operand_address (file, x);
5803 return;
5804 case 'O':
5805 {
5806 if (xcode != MEM)
5807 {
5808 output_operand_lossage ("invalid %%xn code");
5809 return;
5810 }
5811 if (AS_GDS_P (MEM_ADDR_SPACE (x)))
5812 fprintf (file, " gds");
5813
5814 rtx x0 = XEXP (x, 0);
5815 if (AS_GLOBAL_P (MEM_ADDR_SPACE (x)))
5816 {
5817 gcc_assert (TARGET_GCN5_PLUS);
5818
5819 fprintf (file, ", ");
5820
5821 rtx base = x0;
5822 rtx const_offset = NULL_RTX;
5823
5824 if (GET_CODE (base) == PLUS)
5825 {
5826 rtx offset = XEXP (x0, 1);
5827 base = XEXP (x0, 0);
5828
5829 if (GET_CODE (base) == PLUS)
5830 /* (SGPR + VGPR) + CONST */
5831 /* Ignore the VGPR offset for this operand. */
5832 base = XEXP (base, 0);
5833
5834 if (CONST_INT_P (offset))
5835 const_offset = XEXP (x0, 1);
5836 else if (REG_P (offset))
5837 /* SGPR + VGPR */
5838 /* Ignore the VGPR offset for this operand. */
5839 ;
5840 else
5841 output_operand_lossage ("bad ADDR_SPACE_GLOBAL address");
5842 }
5843
5844 if (REG_P (base))
5845 {
5846 if (VGPR_REGNO_P (REGNO (base)))
5847 /* The VGPR address is specified in the %A operand. */
5848 fprintf (file, "off");
5849 else if (SGPR_REGNO_P (REGNO (base)))
5850 print_reg (file, base);
5851 else
5852 output_operand_lossage ("bad ADDR_SPACE_GLOBAL address");
5853 }
5854 else
5855 output_operand_lossage ("bad ADDR_SPACE_GLOBAL address");
5856
5857 if (const_offset != NULL_RTX)
5858 fprintf (file, " offset:" HOST_WIDE_INT_PRINT_DEC,
5859 INTVAL (const_offset));
5860
5861 return;
5862 }
5863
5864 if (GET_CODE (x0) == REG)
5865 return;
5866 if (GET_CODE (x0) != PLUS)
5867 {
5868 output_operand_lossage ("invalid %%xn code");
5869 return;
5870 }
5871 rtx val = XEXP (x0, 1);
5872 if (GET_CODE (val) == CONST_VECTOR)
5873 val = CONST_VECTOR_ELT (val, 0);
5874 if (GET_CODE (val) != CONST_INT)
5875 {
5876 output_operand_lossage ("invalid %%xn code");
5877 return;
5878 }
5879 fprintf (file, " offset:" HOST_WIDE_INT_PRINT_DEC, INTVAL (val));
5880
5881 }
5882 return;
5883 case 'c':
5884 invert = true;
5885 /* Fall through. */
5886 case 'C':
5887 {
5888 const char *s;
5889 bool num = false;
5890 if ((xcode != EQ && xcode != NE) || !REG_P (XEXP (x, 0)))
5891 {
5892 output_operand_lossage ("invalid %%xn code");
5893 return;
5894 }
5895 switch (REGNO (XEXP (x, 0)))
5896 {
5897 case VCC_REG:
5898 case VCCZ_REG:
5899 s = "_vcc";
5900 break;
5901 case SCC_REG:
5902 /* For some reason llvm-mc insists on scc0 instead of sccz. */
5903 num = true;
5904 s = "_scc";
5905 break;
5906 case EXECZ_REG:
5907 s = "_exec";
5908 break;
5909 default:
5910 output_operand_lossage ("invalid %%xn code");
5911 return;
5912 }
5913 fputs (s, file);
5914 if (xcode == (invert ? NE : EQ))
5915 fputc (num ? '0' : 'z', file);
5916 else
5917 fputs (num ? "1" : "nz", file);
5918 return;
5919 }
5920 case 'D':
5921 {
5922 const char *s;
5923 bool cmp_signed = false;
5924 switch (xcode)
5925 {
5926 case EQ:
5927 s = "_eq_";
5928 break;
5929 case NE:
5930 s = "_lg_";
5931 break;
5932 case LT:
5933 s = "_lt_";
5934 cmp_signed = true;
5935 break;
5936 case LE:
5937 s = "_le_";
5938 cmp_signed = true;
5939 break;
5940 case GT:
5941 s = "_gt_";
5942 cmp_signed = true;
5943 break;
5944 case GE:
5945 s = "_ge_";
5946 cmp_signed = true;
5947 break;
5948 case LTU:
5949 s = "_lt_";
5950 break;
5951 case LEU:
5952 s = "_le_";
5953 break;
5954 case GTU:
5955 s = "_gt_";
5956 break;
5957 case GEU:
5958 s = "_ge_";
5959 break;
5960 default:
5961 output_operand_lossage ("invalid %%xn code");
5962 return;
5963 }
5964 fputs (s, file);
5965 fputc (cmp_signed ? 'i' : 'u', file);
5966
5967 machine_mode mode = GET_MODE (XEXP (x, 0));
5968
5969 if (mode == VOIDmode)
5970 mode = GET_MODE (XEXP (x, 1));
5971
5972 /* If both sides are constants, then assume the instruction is in
5973 SImode since s_cmp can only do integer compares. */
5974 if (mode == VOIDmode)
5975 mode = SImode;
5976
5977 switch (GET_MODE_SIZE (mode))
5978 {
5979 case 4:
5980 s = "32";
5981 break;
5982 case 8:
5983 s = "64";
5984 break;
5985 default:
5986 output_operand_lossage ("invalid operand %%xn code");
5987 return;
5988 }
5989 fputs (s, file);
5990 return;
5991 }
5992 case 'E':
5993 {
5994 const char *s;
5995 bool cmp_signed = false;
5996 machine_mode mode = GET_MODE (XEXP (x, 0));
5997
5998 if (mode == VOIDmode)
5999 mode = GET_MODE (XEXP (x, 1));
6000
6001 /* If both sides are constants, assume the instruction is in SFmode
6002 if either operand is floating point, otherwise assume SImode. */
6003 if (mode == VOIDmode)
6004 {
6005 if (GET_CODE (XEXP (x, 0)) == CONST_DOUBLE
6006 || GET_CODE (XEXP (x, 1)) == CONST_DOUBLE)
6007 mode = SFmode;
6008 else
6009 mode = SImode;
6010 }
6011
6012 /* Use the same format code for vector comparisons. */
6013 if (GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT
6014 || GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
6015 mode = GET_MODE_INNER (mode);
6016
6017 bool float_p = GET_MODE_CLASS (mode) == MODE_FLOAT;
6018
6019 switch (xcode)
6020 {
6021 case EQ:
6022 s = "_eq_";
6023 break;
6024 case NE:
6025 s = float_p ? "_neq_" : "_ne_";
6026 break;
6027 case LT:
6028 s = "_lt_";
6029 cmp_signed = true;
6030 break;
6031 case LE:
6032 s = "_le_";
6033 cmp_signed = true;
6034 break;
6035 case GT:
6036 s = "_gt_";
6037 cmp_signed = true;
6038 break;
6039 case GE:
6040 s = "_ge_";
6041 cmp_signed = true;
6042 break;
6043 case LTU:
6044 s = "_lt_";
6045 break;
6046 case LEU:
6047 s = "_le_";
6048 break;
6049 case GTU:
6050 s = "_gt_";
6051 break;
6052 case GEU:
6053 s = "_ge_";
6054 break;
6055 case ORDERED:
6056 s = "_o_";
6057 break;
6058 case UNORDERED:
6059 s = "_u_";
6060 break;
1dff18a1
KCY
6061 case UNEQ:
6062 s = "_nlg_";
6063 break;
6064 case UNGE:
6065 s = "_nlt_";
6066 break;
6067 case UNGT:
6068 s = "_nle_";
6069 break;
6070 case UNLE:
6071 s = "_ngt_";
6072 break;
6073 case UNLT:
6074 s = "_nge_";
6075 break;
59e6d62b
AS
6076 case LTGT:
6077 s = "_lg_";
6078 break;
5326695a
AS
6079 default:
6080 output_operand_lossage ("invalid %%xn code");
6081 return;
6082 }
6083 fputs (s, file);
6084 fputc (float_p ? 'f' : cmp_signed ? 'i' : 'u', file);
6085
6086 switch (GET_MODE_SIZE (mode))
6087 {
6088 case 1:
0e159efc
AS
6089 output_operand_lossage ("operand %%xn code invalid for QImode");
6090 return;
5326695a 6091 case 2:
0e159efc 6092 s = "16";
5326695a
AS
6093 break;
6094 case 4:
6095 s = "32";
6096 break;
6097 case 8:
6098 s = "64";
6099 break;
6100 default:
6101 output_operand_lossage ("invalid operand %%xn code");
6102 return;
6103 }
6104 fputs (s, file);
6105 return;
6106 }
6107 case 'L':
6108 print_operand (file, gcn_operand_part (GET_MODE (x), x, 0), 0);
6109 return;
6110 case 'H':
6111 print_operand (file, gcn_operand_part (GET_MODE (x), x, 1), 0);
6112 return;
6113 case 'R':
6114 /* Print a scalar register number as an integer. Temporary hack. */
6115 gcc_assert (REG_P (x));
6116 fprintf (file, "%u", (int) REGNO (x));
6117 return;
6118 case 'V':
6119 /* Print a vector register number as an integer. Temporary hack. */
6120 gcc_assert (REG_P (x));
6121 fprintf (file, "%u", (int) REGNO (x) - FIRST_VGPR_REG);
6122 return;
6123 case 0:
6124 if (xcode == REG)
6125 print_reg (file, x);
6126 else if (xcode == MEM)
6127 output_address (GET_MODE (x), x);
6128 else if (xcode == CONST_INT)
6129 fprintf (file, "%i", (int) INTVAL (x));
6130 else if (xcode == CONST_VECTOR)
6131 print_operand (file, CONST_VECTOR_ELT (x, 0), code);
6132 else if (xcode == CONST_DOUBLE)
6133 {
6134 const char *str;
6135 switch (gcn_inline_fp_constant_p (x, false))
6136 {
6137 case 240:
6138 str = "0.5";
6139 break;
6140 case 241:
6141 str = "-0.5";
6142 break;
6143 case 242:
6144 str = "1.0";
6145 break;
6146 case 243:
6147 str = "-1.0";
6148 break;
6149 case 244:
6150 str = "2.0";
6151 break;
6152 case 245:
6153 str = "-2.0";
6154 break;
6155 case 246:
6156 str = "4.0";
6157 break;
6158 case 247:
6159 str = "-4.0";
6160 break;
6161 case 248:
6162 str = "1/pi";
6163 break;
6164 default:
6165 rtx ix = simplify_gen_subreg (GET_MODE (x) == DFmode
6166 ? DImode : SImode,
6167 x, GET_MODE (x), 0);
6168 if (x)
6169 print_operand (file, ix, code);
6170 else
a94d5170 6171 output_operand_lossage ("invalid fp constant");
5326695a
AS
6172 return;
6173 break;
6174 }
6175 fprintf (file, str);
6176 return;
6177 }
6178 else
6179 output_addr_const (file, x);
6180 return;
6181 case '^':
6182 if (TARGET_GCN5_PLUS)
6183 fputs ("_co", file);
6184 return;
6185 case 'g':
6186 gcc_assert (xcode == MEM);
6187 if (MEM_VOLATILE_P (x))
6188 fputs (" glc", file);
6189 return;
6190 default:
6191 output_operand_lossage ("invalid %%xn code");
6192 }
6193 gcc_unreachable ();
6194}
6195
eff23b79
AS
6196/* Implement DBX_REGISTER_NUMBER macro.
6197
6198 Return the DWARF register number that corresponds to the GCC internal
6199 REGNO. */
6200
6201unsigned int
6202gcn_dwarf_register_number (unsigned int regno)
6203{
6204 /* Registers defined in DWARF. */
6205 if (regno == EXEC_LO_REG)
6206 return 17;
6207 /* We need to use a more complex DWARF expression for this
6208 else if (regno == EXEC_HI_REG)
6209 return 17; */
6210 else if (regno == VCC_LO_REG)
6211 return 768;
6212 /* We need to use a more complex DWARF expression for this
6213 else if (regno == VCC_HI_REG)
6214 return 768; */
6215 else if (regno == SCC_REG)
6216 return 128;
6217 else if (SGPR_REGNO_P (regno))
6218 {
6219 if (regno - FIRST_SGPR_REG < 64)
6220 return (regno - FIRST_SGPR_REG + 32);
6221 else
6222 return (regno - FIRST_SGPR_REG + 1024);
6223 }
6224 else if (VGPR_REGNO_P (regno))
6225 return (regno - FIRST_VGPR_REG + 2560);
6226
6227 /* Otherwise, there's nothing sensible to do. */
6228 return regno + 100000;
6229}
6230
6231/* Implement TARGET_DWARF_REGISTER_SPAN.
6232
6233 DImode and Vector DImode require additional registers. */
6234
6235static rtx
6236gcn_dwarf_register_span (rtx rtl)
6237{
6238 machine_mode mode = GET_MODE (rtl);
6239
6240 if (VECTOR_MODE_P (mode))
6241 mode = GET_MODE_INNER (mode);
6242
6243 if (GET_MODE_SIZE (mode) != 8)
6244 return NULL_RTX;
6245
6246 rtx p = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
6247 unsigned regno = REGNO (rtl);
6248 XVECEXP (p, 0, 0) = gen_rtx_REG (SImode, regno);
6249 XVECEXP (p, 0, 1) = gen_rtx_REG (SImode, regno + 1);
6250
6251 return p;
6252}
6253
5326695a
AS
6254/* }}} */
6255/* {{{ TARGET hook overrides. */
6256
6257#undef TARGET_ADDR_SPACE_ADDRESS_MODE
6258#define TARGET_ADDR_SPACE_ADDRESS_MODE gcn_addr_space_address_mode
6259#undef TARGET_ADDR_SPACE_LEGITIMATE_ADDRESS_P
6260#define TARGET_ADDR_SPACE_LEGITIMATE_ADDRESS_P \
6261 gcn_addr_space_legitimate_address_p
6262#undef TARGET_ADDR_SPACE_LEGITIMIZE_ADDRESS
6263#define TARGET_ADDR_SPACE_LEGITIMIZE_ADDRESS gcn_addr_space_legitimize_address
6264#undef TARGET_ADDR_SPACE_POINTER_MODE
6265#define TARGET_ADDR_SPACE_POINTER_MODE gcn_addr_space_pointer_mode
6266#undef TARGET_ADDR_SPACE_SUBSET_P
6267#define TARGET_ADDR_SPACE_SUBSET_P gcn_addr_space_subset_p
6268#undef TARGET_ADDR_SPACE_CONVERT
6269#define TARGET_ADDR_SPACE_CONVERT gcn_addr_space_convert
6270#undef TARGET_ARG_PARTIAL_BYTES
6271#define TARGET_ARG_PARTIAL_BYTES gcn_arg_partial_bytes
6272#undef TARGET_ASM_ALIGNED_DI_OP
6273#define TARGET_ASM_ALIGNED_DI_OP "\t.8byte\t"
5326695a
AS
6274#undef TARGET_ASM_FILE_START
6275#define TARGET_ASM_FILE_START output_file_start
6276#undef TARGET_ASM_FUNCTION_PROLOGUE
6277#define TARGET_ASM_FUNCTION_PROLOGUE gcn_target_asm_function_prologue
6278#undef TARGET_ASM_SELECT_SECTION
6279#define TARGET_ASM_SELECT_SECTION gcn_asm_select_section
6280#undef TARGET_ASM_TRAMPOLINE_TEMPLATE
6281#define TARGET_ASM_TRAMPOLINE_TEMPLATE gcn_asm_trampoline_template
6282#undef TARGET_ATTRIBUTE_TABLE
6283#define TARGET_ATTRIBUTE_TABLE gcn_attribute_table
6284#undef TARGET_BUILTIN_DECL
6285#define TARGET_BUILTIN_DECL gcn_builtin_decl
6286#undef TARGET_CAN_CHANGE_MODE_CLASS
6287#define TARGET_CAN_CHANGE_MODE_CLASS gcn_can_change_mode_class
6288#undef TARGET_CAN_ELIMINATE
6289#define TARGET_CAN_ELIMINATE gcn_can_eliminate_p
6290#undef TARGET_CANNOT_COPY_INSN_P
6291#define TARGET_CANNOT_COPY_INSN_P gcn_cannot_copy_insn_p
6292#undef TARGET_CLASS_LIKELY_SPILLED_P
6293#define TARGET_CLASS_LIKELY_SPILLED_P gcn_class_likely_spilled_p
6294#undef TARGET_CLASS_MAX_NREGS
6295#define TARGET_CLASS_MAX_NREGS gcn_class_max_nregs
6296#undef TARGET_CONDITIONAL_REGISTER_USAGE
6297#define TARGET_CONDITIONAL_REGISTER_USAGE gcn_conditional_register_usage
6298#undef TARGET_CONSTANT_ALIGNMENT
6299#define TARGET_CONSTANT_ALIGNMENT gcn_constant_alignment
6300#undef TARGET_DEBUG_UNWIND_INFO
6301#define TARGET_DEBUG_UNWIND_INFO gcn_debug_unwind_info
eff23b79
AS
6302#undef TARGET_DWARF_REGISTER_SPAN
6303#define TARGET_DWARF_REGISTER_SPAN gcn_dwarf_register_span
76d46331
KCY
6304#undef TARGET_EMUTLS_VAR_INIT
6305#define TARGET_EMUTLS_VAR_INIT gcn_emutls_var_init
5326695a
AS
6306#undef TARGET_EXPAND_BUILTIN
6307#define TARGET_EXPAND_BUILTIN gcn_expand_builtin
6308#undef TARGET_FUNCTION_ARG
6309#undef TARGET_FUNCTION_ARG_ADVANCE
6310#define TARGET_FUNCTION_ARG_ADVANCE gcn_function_arg_advance
6311#define TARGET_FUNCTION_ARG gcn_function_arg
6312#undef TARGET_FUNCTION_VALUE
6313#define TARGET_FUNCTION_VALUE gcn_function_value
6314#undef TARGET_FUNCTION_VALUE_REGNO_P
6315#define TARGET_FUNCTION_VALUE_REGNO_P gcn_function_value_regno_p
6316#undef TARGET_GIMPLIFY_VA_ARG_EXPR
6317#define TARGET_GIMPLIFY_VA_ARG_EXPR gcn_gimplify_va_arg_expr
955cd057
TB
6318#undef TARGET_OMP_DEVICE_KIND_ARCH_ISA
6319#define TARGET_OMP_DEVICE_KIND_ARCH_ISA gcn_omp_device_kind_arch_isa
5326695a
AS
6320#undef TARGET_GOACC_ADJUST_PROPAGATION_RECORD
6321#define TARGET_GOACC_ADJUST_PROPAGATION_RECORD \
6322 gcn_goacc_adjust_propagation_record
6323#undef TARGET_GOACC_ADJUST_GANGPRIVATE_DECL
6324#define TARGET_GOACC_ADJUST_GANGPRIVATE_DECL gcn_goacc_adjust_gangprivate_decl
6325#undef TARGET_GOACC_FORK_JOIN
6326#define TARGET_GOACC_FORK_JOIN gcn_fork_join
6327#undef TARGET_GOACC_REDUCTION
6328#define TARGET_GOACC_REDUCTION gcn_goacc_reduction
6329#undef TARGET_GOACC_VALIDATE_DIMS
6330#define TARGET_GOACC_VALIDATE_DIMS gcn_goacc_validate_dims
5326695a
AS
6331#undef TARGET_HARD_REGNO_MODE_OK
6332#define TARGET_HARD_REGNO_MODE_OK gcn_hard_regno_mode_ok
6333#undef TARGET_HARD_REGNO_NREGS
6334#define TARGET_HARD_REGNO_NREGS gcn_hard_regno_nregs
6335#undef TARGET_HAVE_SPECULATION_SAFE_VALUE
6336#define TARGET_HAVE_SPECULATION_SAFE_VALUE speculation_safe_value_not_needed
6337#undef TARGET_INIT_BUILTINS
6338#define TARGET_INIT_BUILTINS gcn_init_builtins
6339#undef TARGET_IRA_CHANGE_PSEUDO_ALLOCNO_CLASS
6340#define TARGET_IRA_CHANGE_PSEUDO_ALLOCNO_CLASS \
6341 gcn_ira_change_pseudo_allocno_class
6342#undef TARGET_LEGITIMATE_CONSTANT_P
6343#define TARGET_LEGITIMATE_CONSTANT_P gcn_legitimate_constant_p
6344#undef TARGET_LRA_P
6345#define TARGET_LRA_P hook_bool_void_true
6346#undef TARGET_MACHINE_DEPENDENT_REORG
6347#define TARGET_MACHINE_DEPENDENT_REORG gcn_md_reorg
6348#undef TARGET_MEMORY_MOVE_COST
6349#define TARGET_MEMORY_MOVE_COST gcn_memory_move_cost
6350#undef TARGET_MODES_TIEABLE_P
6351#define TARGET_MODES_TIEABLE_P gcn_modes_tieable_p
6352#undef TARGET_OPTION_OVERRIDE
6353#define TARGET_OPTION_OVERRIDE gcn_option_override
6354#undef TARGET_PRETEND_OUTGOING_VARARGS_NAMED
6355#define TARGET_PRETEND_OUTGOING_VARARGS_NAMED \
6356 gcn_pretend_outgoing_varargs_named
6357#undef TARGET_PROMOTE_FUNCTION_MODE
6358#define TARGET_PROMOTE_FUNCTION_MODE gcn_promote_function_mode
6359#undef TARGET_REGISTER_MOVE_COST
6360#define TARGET_REGISTER_MOVE_COST gcn_register_move_cost
6361#undef TARGET_RETURN_IN_MEMORY
6362#define TARGET_RETURN_IN_MEMORY gcn_return_in_memory
6363#undef TARGET_RTX_COSTS
6364#define TARGET_RTX_COSTS gcn_rtx_costs
6365#undef TARGET_SECONDARY_RELOAD
6366#define TARGET_SECONDARY_RELOAD gcn_secondary_reload
6367#undef TARGET_SECTION_TYPE_FLAGS
6368#define TARGET_SECTION_TYPE_FLAGS gcn_section_type_flags
8d0b2b33
AS
6369#undef TARGET_SCALAR_MODE_SUPPORTED_P
6370#define TARGET_SCALAR_MODE_SUPPORTED_P gcn_scalar_mode_supported_p
5326695a
AS
6371#undef TARGET_SMALL_REGISTER_CLASSES_FOR_MODE_P
6372#define TARGET_SMALL_REGISTER_CLASSES_FOR_MODE_P \
6373 gcn_small_register_classes_for_mode_p
6374#undef TARGET_SPILL_CLASS
6375#define TARGET_SPILL_CLASS gcn_spill_class
6376#undef TARGET_STRICT_ARGUMENT_NAMING
6377#define TARGET_STRICT_ARGUMENT_NAMING gcn_strict_argument_naming
6378#undef TARGET_TRAMPOLINE_INIT
6379#define TARGET_TRAMPOLINE_INIT gcn_trampoline_init
6380#undef TARGET_TRULY_NOOP_TRUNCATION
6381#define TARGET_TRULY_NOOP_TRUNCATION gcn_truly_noop_truncation
6382#undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
6383#define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST gcn_vectorization_cost
6384#undef TARGET_VECTORIZE_GET_MASK_MODE
6385#define TARGET_VECTORIZE_GET_MASK_MODE gcn_vectorize_get_mask_mode
6386#undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
6387#define TARGET_VECTORIZE_PREFERRED_SIMD_MODE gcn_vectorize_preferred_simd_mode
6388#undef TARGET_VECTORIZE_PREFERRED_VECTOR_ALIGNMENT
6389#define TARGET_VECTORIZE_PREFERRED_VECTOR_ALIGNMENT \
6390 gcn_preferred_vector_alignment
2b99bed8
AS
6391#undef TARGET_VECTORIZE_RELATED_MODE
6392#define TARGET_VECTORIZE_RELATED_MODE gcn_related_vector_mode
5326695a
AS
6393#undef TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT
6394#define TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT \
6395 gcn_vectorize_support_vector_misalignment
6396#undef TARGET_VECTORIZE_VEC_PERM_CONST
6397#define TARGET_VECTORIZE_VEC_PERM_CONST gcn_vectorize_vec_perm_const
6398#undef TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE
6399#define TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE \
6400 gcn_vector_alignment_reachable
6401#undef TARGET_VECTOR_MODE_SUPPORTED_P
6402#define TARGET_VECTOR_MODE_SUPPORTED_P gcn_vector_mode_supported_p
6403
6404struct gcc_target targetm = TARGET_INITIALIZER;
6405
6406#include "gt-gcn.h"
6407/* }}} */