]> git.ipfire.org Git - thirdparty/gcc.git/blame - gcc/config/gcn/gcn.cc
Update copyright years.
[thirdparty/gcc.git] / gcc / config / gcn / gcn.cc
CommitLineData
a945c346 1/* Copyright (C) 2016-2024 Free Software Foundation, Inc.
5326695a
AS
2
3 This file is free software; you can redistribute it and/or modify it under
4 the terms of the GNU General Public License as published by the Free
5 Software Foundation; either version 3 of the License, or (at your option)
6 any later version.
7
8 This file is distributed in the hope that it will be useful, but WITHOUT
9 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
10 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
11 for more details.
12
13 You should have received a copy of the GNU General Public License
14 along with GCC; see the file COPYING3. If not see
15 <http://www.gnu.org/licenses/>. */
16
17/* {{{ Includes. */
18
19/* We want GET_MODE_SIZE et al to return integers, please. */
20#define IN_TARGET_CODE 1
21
22#include "config.h"
23#include "system.h"
24#include "coretypes.h"
25#include "backend.h"
26#include "target.h"
27#include "memmodel.h"
28#include "rtl.h"
29#include "tree.h"
30#include "df.h"
31#include "tm_p.h"
32#include "stringpool.h"
33#include "optabs.h"
34#include "regs.h"
35#include "emit-rtl.h"
36#include "recog.h"
37#include "diagnostic-core.h"
38#include "insn-attr.h"
39#include "fold-const.h"
40#include "calls.h"
41#include "explow.h"
42#include "expr.h"
43#include "output.h"
44#include "cfgrtl.h"
45#include "langhooks.h"
46#include "builtins.h"
47#include "omp-general.h"
48#include "print-rtl.h"
49#include "attribs.h"
50#include "varasm.h"
51#include "intl.h"
52#include "rtl-iter.h"
b5bb7f32 53#include "dwarf2.h"
2961ac45 54#include "gimple.h"
b73c49f6 55#include "cgraph.h"
ce9cd725 56#include "case-cfn-macros.h"
5326695a
AS
57
58/* This file should be included last. */
59#include "target-def.h"
60
61/* }}} */
62/* {{{ Global variables. */
63
64/* Constants used by FP instructions. */
65
66static REAL_VALUE_TYPE dconst4, dconst1over2pi;
67static bool ext_gcn_constants_init = 0;
68
69/* Holds the ISA variant, derived from the command line parameters. */
70
cde52d3a 71enum gcn_isa gcn_isa = ISA_GCN3; /* Default to GCN3. */
5326695a
AS
72
73/* Reserve this much space for LDS (for propagating variables from
74 worker-single mode to worker-partitioned mode), per workgroup. Global
75 analysis could calculate an exact bound, but we don't do that yet.
76
86b0eb81 77 We want to permit full occupancy, so size accordingly. */
5326695a 78
2a3f9f65
JB
79/* Use this as a default, but allow it to grow if the user requests a large
80 amount of gang-private shared-memory space. */
81static int acc_lds_size = 0x600;
82
86b0eb81 83#define OMP_LDS_SIZE 0x600 /* 0x600 is 1/40 total, rounded down. */
2a3f9f65 84#define ACC_LDS_SIZE acc_lds_size
86b0eb81
AS
85#define OTHER_LDS_SIZE 65536 /* If in doubt, reserve all of it. */
86
87#define LDS_SIZE (flag_openacc ? ACC_LDS_SIZE \
88 : flag_openmp ? OMP_LDS_SIZE \
89 : OTHER_LDS_SIZE)
5326695a 90
2a3f9f65
JB
91static int gang_private_hwm = 32;
92static hash_map<tree, int> lds_allocs;
93
87fdbe69
KCY
94/* The number of registers usable by normal non-kernel functions.
95 The SGPR count includes any special extra registers such as VCC. */
96
f062c3f1 97#define MAX_NORMAL_SGPR_COUNT 62 // i.e. 64 with VCC
87fdbe69 98#define MAX_NORMAL_VGPR_COUNT 24
ae0d2c24 99#define MAX_NORMAL_AVGPR_COUNT 24
87fdbe69 100
5326695a
AS
101/* }}} */
102/* {{{ Initialization and options. */
103
104/* Initialize machine_function. */
105
106static struct machine_function *
107gcn_init_machine_status (void)
108{
109 struct machine_function *f;
110
111 f = ggc_cleared_alloc<machine_function> ();
112
e7d6c277
AS
113 // FIXME: re-enable global addressing with safety for LDS-flat addresses
114 //if (TARGET_GCN3)
5326695a
AS
115 f->use_flat_addressing = true;
116
117 return f;
118}
119
120/* Implement TARGET_OPTION_OVERRIDE.
121
122 Override option settings where defaults are variable, or we have specific
123 needs to consider. */
124
125static void
126gcn_option_override (void)
127{
128 init_machine_status = gcn_init_machine_status;
129
130 /* The HSA runtime does not respect ELF load addresses, so force PIE. */
131 if (!flag_pie)
132 flag_pie = 2;
133 if (!flag_pic)
134 flag_pic = flag_pie;
135
cde52d3a
AS
136 gcn_isa = (gcn_arch == PROCESSOR_FIJI ? ISA_GCN3
137 : gcn_arch == PROCESSOR_VEGA10 ? ISA_GCN5
138 : gcn_arch == PROCESSOR_VEGA20 ? ISA_GCN5
139 : gcn_arch == PROCESSOR_GFX908 ? ISA_CDNA1
140 : gcn_arch == PROCESSOR_GFX90a ? ISA_CDNA2
c7ec7bd1 141 : gcn_arch == PROCESSOR_GFX1030 ? ISA_RDNA2
cde52d3a
AS
142 : ISA_UNKNOWN);
143 gcc_assert (gcn_isa != ISA_UNKNOWN);
5326695a 144
2a3f9f65
JB
145 /* Reserve 1Kb (somewhat arbitrarily) of LDS space for reduction results and
146 worker broadcasts. */
147 if (gang_private_size_opt == -1)
148 gang_private_size_opt = 512;
149 else if (gang_private_size_opt < gang_private_hwm)
150 gang_private_size_opt = gang_private_hwm;
151 else if (gang_private_size_opt >= acc_lds_size - 1024)
152 {
153 /* We need some space for reductions and worker broadcasting. If the
154 user requests a large amount of gang-private LDS space, we might not
155 have enough left for the former. Increase the LDS allocation in that
156 case, although this may reduce the maximum occupancy on the
157 hardware. */
158 acc_lds_size = gang_private_size_opt + 1024;
159 if (acc_lds_size > 32768)
160 acc_lds_size = 32768;
161 }
162
392f70cc
AS
163 /* gfx803 "Fiji" and gfx1030 do not support XNACK. */
164 if (gcn_arch == PROCESSOR_FIJI
165 || gcn_arch == PROCESSOR_GFX1030)
166 {
167 if (flag_xnack == HSACO_ATTR_ON)
168 error ("-mxnack=on is incompatible with -march=%s",
169 (gcn_arch == PROCESSOR_FIJI ? "fiji"
170 : gcn_arch == PROCESSOR_GFX1030 ? "gfx1030"
171 : NULL));
172 /* Allow HSACO_ATTR_ANY silently because that's the default. */
173 flag_xnack = HSACO_ATTR_OFF;
174 }
4c12bcbe
AS
175
176 /* There's no need for XNACK on devices without USM, and there are register
177 allocation problems caused by the early-clobber when AVGPR spills are not
178 available.
179 FIXME: can the regalloc mean the default can be really "any"? */
180 if (flag_xnack == HSACO_ATTR_DEFAULT)
181 switch (gcn_arch)
182 {
183 case PROCESSOR_FIJI:
184 case PROCESSOR_VEGA10:
185 case PROCESSOR_VEGA20:
186 case PROCESSOR_GFX908:
187 flag_xnack = HSACO_ATTR_OFF;
188 break;
189 case PROCESSOR_GFX90a:
190 flag_xnack = HSACO_ATTR_ANY;
191 break;
192 default:
193 gcc_unreachable ();
194 }
195
196 if (flag_sram_ecc == HSACO_ATTR_DEFAULT)
197 flag_sram_ecc = HSACO_ATTR_ANY;
5326695a
AS
198}
199
200/* }}} */
201/* {{{ Attributes. */
202
203/* This table defines the arguments that are permitted in
204 __attribute__ ((amdgpu_hsa_kernel (...))).
205
206 The names and values correspond to the HSA metadata that is encoded
207 into the assembler file and binary. */
208
209static const struct gcn_kernel_arg_type
210{
211 const char *name;
212 const char *header_pseudo;
213 machine_mode mode;
214
215 /* This should be set to -1 or -2 for a dynamically allocated register
216 number. Use -1 if this argument contributes to the user_sgpr_count,
217 -2 otherwise. */
218 int fixed_regno;
219} gcn_kernel_arg_types[] = {
220 {"exec", NULL, DImode, EXEC_REG},
221#define PRIVATE_SEGMENT_BUFFER_ARG 1
222 {"private_segment_buffer",
f062c3f1 223 ".amdhsa_user_sgpr_private_segment_buffer", TImode, -1},
5326695a 224#define DISPATCH_PTR_ARG 2
f062c3f1 225 {"dispatch_ptr", ".amdhsa_user_sgpr_dispatch_ptr", DImode, -1},
5326695a 226#define QUEUE_PTR_ARG 3
f062c3f1 227 {"queue_ptr", ".amdhsa_user_sgpr_queue_ptr", DImode, -1},
5326695a 228#define KERNARG_SEGMENT_PTR_ARG 4
f062c3f1
AS
229 {"kernarg_segment_ptr", ".amdhsa_user_sgpr_kernarg_segment_ptr", DImode, -1},
230 {"dispatch_id", ".amdhsa_user_sgpr_dispatch_id", DImode, -1},
5326695a 231#define FLAT_SCRATCH_INIT_ARG 6
f062c3f1 232 {"flat_scratch_init", ".amdhsa_user_sgpr_flat_scratch_init", DImode, -1},
5326695a 233#define FLAT_SCRATCH_SEGMENT_SIZE_ARG 7
f062c3f1
AS
234 {"private_segment_size", ".amdhsa_user_sgpr_private_segment_size", SImode, -1},
235#define WORKGROUP_ID_X_ARG 8
236 {"workgroup_id_X", ".amdhsa_system_sgpr_workgroup_id_x", SImode, -2},
237 {"workgroup_id_Y", ".amdhsa_system_sgpr_workgroup_id_y", SImode, -2},
238 {"workgroup_id_Z", ".amdhsa_system_sgpr_workgroup_id_z", SImode, -2},
239 {"workgroup_info", ".amdhsa_system_sgpr_workgroup_info", SImode, -1},
240#define PRIVATE_SEGMENT_WAVE_OFFSET_ARG 12
5326695a 241 {"private_segment_wave_offset",
f062c3f1
AS
242 ".amdhsa_system_sgpr_private_segment_wavefront_offset", SImode, -2},
243#define WORK_ITEM_ID_X_ARG 13
5326695a 244 {"work_item_id_X", NULL, V64SImode, FIRST_VGPR_REG},
f062c3f1 245#define WORK_ITEM_ID_Y_ARG 14
5326695a 246 {"work_item_id_Y", NULL, V64SImode, FIRST_VGPR_REG + 1},
f062c3f1 247#define WORK_ITEM_ID_Z_ARG 15
5326695a
AS
248 {"work_item_id_Z", NULL, V64SImode, FIRST_VGPR_REG + 2}
249};
250
342f9464 251static const long default_requested_args
f6fff8a6 252 = (1 << DISPATCH_PTR_ARG)
342f9464
KCY
253 | (1 << QUEUE_PTR_ARG)
254 | (1 << KERNARG_SEGMENT_PTR_ARG)
342f9464
KCY
255 | (1 << WORKGROUP_ID_X_ARG)
256 | (1 << WORK_ITEM_ID_X_ARG)
257 | (1 << WORK_ITEM_ID_Y_ARG)
258 | (1 << WORK_ITEM_ID_Z_ARG);
259
5326695a
AS
260/* Extract parameter settings from __attribute__((amdgpu_hsa_kernel ())).
261 This function also sets the default values for some arguments.
262
263 Return true on success, with ARGS populated. */
264
265static bool
266gcn_parse_amdgpu_hsa_kernel_attribute (struct gcn_kernel_args *args,
267 tree list)
268{
269 bool err = false;
342f9464 270 args->requested = default_requested_args;
5326695a
AS
271 args->nargs = 0;
272
273 for (int a = 0; a < GCN_KERNEL_ARG_TYPES; a++)
274 args->reg[a] = -1;
275
276 for (; list; list = TREE_CHAIN (list))
277 {
278 const char *str;
279 if (TREE_CODE (TREE_VALUE (list)) != STRING_CST)
280 {
55308fc2 281 error ("%<amdgpu_hsa_kernel%> attribute requires string constant "
5326695a
AS
282 "arguments");
283 break;
284 }
285 str = TREE_STRING_POINTER (TREE_VALUE (list));
286 int a;
287 for (a = 0; a < GCN_KERNEL_ARG_TYPES; a++)
288 {
289 if (!strcmp (str, gcn_kernel_arg_types[a].name))
290 break;
291 }
292 if (a == GCN_KERNEL_ARG_TYPES)
293 {
5cded5af 294 error ("unknown specifier %qs in %<amdgpu_hsa_kernel%> attribute",
55308fc2 295 str);
5326695a
AS
296 err = true;
297 break;
298 }
299 if (args->requested & (1 << a))
300 {
55308fc2 301 error ("duplicated parameter specifier %qs in %<amdgpu_hsa_kernel%> "
5326695a
AS
302 "attribute", str);
303 err = true;
304 break;
305 }
306 args->requested |= (1 << a);
307 args->order[args->nargs++] = a;
308 }
5326695a
AS
309
310 /* Requesting WORK_ITEM_ID_Z_ARG implies requesting WORK_ITEM_ID_X_ARG and
311 WORK_ITEM_ID_Y_ARG. Similarly, requesting WORK_ITEM_ID_Y_ARG implies
312 requesting WORK_ITEM_ID_X_ARG. */
313 if (args->requested & (1 << WORK_ITEM_ID_Z_ARG))
314 args->requested |= (1 << WORK_ITEM_ID_Y_ARG);
315 if (args->requested & (1 << WORK_ITEM_ID_Y_ARG))
316 args->requested |= (1 << WORK_ITEM_ID_X_ARG);
317
5326695a
AS
318 int sgpr_regno = FIRST_SGPR_REG;
319 args->nsgprs = 0;
320 for (int a = 0; a < GCN_KERNEL_ARG_TYPES; a++)
321 {
322 if (!(args->requested & (1 << a)))
323 continue;
324
325 if (gcn_kernel_arg_types[a].fixed_regno >= 0)
326 args->reg[a] = gcn_kernel_arg_types[a].fixed_regno;
327 else
328 {
329 int reg_count;
330
331 switch (gcn_kernel_arg_types[a].mode)
332 {
333 case E_SImode:
334 reg_count = 1;
335 break;
336 case E_DImode:
337 reg_count = 2;
338 break;
339 case E_TImode:
340 reg_count = 4;
341 break;
342 default:
343 gcc_unreachable ();
344 }
345 args->reg[a] = sgpr_regno;
346 sgpr_regno += reg_count;
347 if (gcn_kernel_arg_types[a].fixed_regno == -1)
348 args->nsgprs += reg_count;
349 }
350 }
351 if (sgpr_regno > FIRST_SGPR_REG + 16)
352 {
353 error ("too many arguments passed in sgpr registers");
354 }
355 return err;
356}
357
358/* Referenced by TARGET_ATTRIBUTE_TABLE.
359
360 Validates target specific attributes. */
361
362static tree
363gcn_handle_amdgpu_hsa_kernel_attribute (tree *node, tree name,
364 tree args, int, bool *no_add_attrs)
365{
7039cebf 366 if (!FUNC_OR_METHOD_TYPE_P (*node))
5326695a
AS
367 {
368 warning (OPT_Wattributes, "%qE attribute only applies to functions",
369 name);
370 *no_add_attrs = true;
371 return NULL_TREE;
372 }
373
374 /* Can combine regparm with all attributes but fastcall, and thiscall. */
375 if (is_attribute_p ("gcnhsa_kernel", name))
376 {
377 struct gcn_kernel_args kernelarg;
378
379 if (gcn_parse_amdgpu_hsa_kernel_attribute (&kernelarg, args))
380 *no_add_attrs = true;
381
382 return NULL_TREE;
383 }
384
385 return NULL_TREE;
386}
387
388/* Implement TARGET_ATTRIBUTE_TABLE.
389
390 Create target-specific __attribute__ types. */
391
7fa24687 392TARGET_GNU_ATTRIBUTES (gcn_attribute_table, {
5326695a
AS
393 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler,
394 affects_type_identity } */
395 {"amdgpu_hsa_kernel", 0, GCN_KERNEL_ARG_TYPES, false, true,
7fa24687
RS
396 true, true, gcn_handle_amdgpu_hsa_kernel_attribute, NULL}
397});
5326695a
AS
398
399/* }}} */
400/* {{{ Registers and modes. */
401
8d0b2b33
AS
402/* Implement TARGET_SCALAR_MODE_SUPPORTED_P. */
403
404bool
405gcn_scalar_mode_supported_p (scalar_mode mode)
406{
407 return (mode == BImode
408 || mode == QImode
409 || mode == HImode /* || mode == HFmode */
410 || mode == SImode || mode == SFmode
411 || mode == DImode || mode == DFmode
292da5c5 412 || mode == TImode);
8d0b2b33
AS
413}
414
45381d6f
AS
415/* Return a vector mode with N lanes of MODE. */
416
417static machine_mode
418VnMODE (int n, machine_mode mode)
419{
420 switch (mode)
421 {
612de72b 422 case E_QImode:
45381d6f
AS
423 switch (n)
424 {
425 case 2: return V2QImode;
426 case 4: return V4QImode;
427 case 8: return V8QImode;
428 case 16: return V16QImode;
429 case 32: return V32QImode;
430 case 64: return V64QImode;
431 }
432 break;
612de72b 433 case E_HImode:
45381d6f
AS
434 switch (n)
435 {
436 case 2: return V2HImode;
437 case 4: return V4HImode;
438 case 8: return V8HImode;
439 case 16: return V16HImode;
440 case 32: return V32HImode;
441 case 64: return V64HImode;
442 }
443 break;
612de72b 444 case E_HFmode:
45381d6f
AS
445 switch (n)
446 {
447 case 2: return V2HFmode;
448 case 4: return V4HFmode;
449 case 8: return V8HFmode;
450 case 16: return V16HFmode;
451 case 32: return V32HFmode;
452 case 64: return V64HFmode;
453 }
454 break;
612de72b 455 case E_SImode:
45381d6f
AS
456 switch (n)
457 {
458 case 2: return V2SImode;
459 case 4: return V4SImode;
460 case 8: return V8SImode;
461 case 16: return V16SImode;
462 case 32: return V32SImode;
463 case 64: return V64SImode;
464 }
465 break;
612de72b 466 case E_SFmode:
45381d6f
AS
467 switch (n)
468 {
469 case 2: return V2SFmode;
470 case 4: return V4SFmode;
471 case 8: return V8SFmode;
472 case 16: return V16SFmode;
473 case 32: return V32SFmode;
474 case 64: return V64SFmode;
475 }
476 break;
612de72b 477 case E_DImode:
45381d6f
AS
478 switch (n)
479 {
480 case 2: return V2DImode;
481 case 4: return V4DImode;
482 case 8: return V8DImode;
483 case 16: return V16DImode;
484 case 32: return V32DImode;
485 case 64: return V64DImode;
486 }
487 break;
612de72b 488 case E_DFmode:
45381d6f
AS
489 switch (n)
490 {
491 case 2: return V2DFmode;
492 case 4: return V4DFmode;
493 case 8: return V8DFmode;
494 case 16: return V16DFmode;
495 case 32: return V32DFmode;
496 case 64: return V64DFmode;
497 }
498 break;
499 default:
500 break;
501 }
502
503 return VOIDmode;
504}
505
5326695a
AS
506/* Implement TARGET_CLASS_MAX_NREGS.
507
508 Return the number of hard registers needed to hold a value of MODE in
509 a register of class RCLASS. */
510
511static unsigned char
512gcn_class_max_nregs (reg_class_t rclass, machine_mode mode)
513{
514 /* Scalar registers are 32bit, vector registers are in fact tuples of
515 64 lanes. */
ae0d2c24
AS
516 if (rclass == VGPR_REGS || rclass == AVGPR_REGS
517 || rclass == ALL_VGPR_REGS)
5326695a
AS
518 {
519 if (vgpr_1reg_mode_p (mode))
520 return 1;
521 if (vgpr_2reg_mode_p (mode))
522 return 2;
523 /* TImode is used by DImode compare_and_swap. */
8aeabd9f 524 if (vgpr_4reg_mode_p (mode))
5326695a
AS
525 return 4;
526 }
527 else if (rclass == VCC_CONDITIONAL_REG && mode == BImode)
528 return 2;
3b97715a
AS
529
530 /* Vector modes in SGPRs are not supposed to happen (disallowed by
531 gcn_hard_regno_mode_ok), but there are some patterns that have an "Sv"
532 constraint and are used by splitters, post-reload.
533 This ensures that we don't accidentally mark the following 63 scalar
534 registers as "live". */
535 if (rclass == SGPR_REGS && VECTOR_MODE_P (mode))
536 return CEIL (GET_MODE_SIZE (GET_MODE_INNER (mode)), 4);
537
5326695a
AS
538 return CEIL (GET_MODE_SIZE (mode), 4);
539}
540
541/* Implement TARGET_HARD_REGNO_NREGS.
542
543 Return the number of hard registers needed to hold a value of MODE in
544 REGNO. */
545
546unsigned int
547gcn_hard_regno_nregs (unsigned int regno, machine_mode mode)
548{
549 return gcn_class_max_nregs (REGNO_REG_CLASS (regno), mode);
550}
551
552/* Implement TARGET_HARD_REGNO_MODE_OK.
553
554 Return true if REGNO can hold value in MODE. */
555
556bool
557gcn_hard_regno_mode_ok (unsigned int regno, machine_mode mode)
558{
559 /* Treat a complex mode as if it were a scalar mode of the same overall
560 size for the purposes of allocating hard registers. */
561 if (COMPLEX_MODE_P (mode))
562 switch (mode)
563 {
564 case E_CQImode:
565 case E_CHImode:
566 mode = SImode;
567 break;
568 case E_CSImode:
569 mode = DImode;
570 break;
571 case E_CDImode:
572 mode = TImode;
573 break;
574 case E_HCmode:
575 mode = SFmode;
576 break;
577 case E_SCmode:
578 mode = DFmode;
579 break;
580 default:
581 /* Not supported. */
582 return false;
583 }
584
585 switch (regno)
586 {
587 case FLAT_SCRATCH_LO_REG:
588 case XNACK_MASK_LO_REG:
589 case TBA_LO_REG:
590 case TMA_LO_REG:
591 return (mode == SImode || mode == DImode);
592 case VCC_LO_REG:
593 case EXEC_LO_REG:
594 return (mode == BImode || mode == SImode || mode == DImode);
595 case M0_REG:
596 case FLAT_SCRATCH_HI_REG:
597 case XNACK_MASK_HI_REG:
598 case TBA_HI_REG:
599 case TMA_HI_REG:
600 return mode == SImode;
601 case VCC_HI_REG:
602 return false;
603 case EXEC_HI_REG:
604 return mode == SImode /*|| mode == V32BImode */ ;
605 case SCC_REG:
606 case VCCZ_REG:
607 case EXECZ_REG:
608 return mode == BImode;
609 }
610 if (regno == ARG_POINTER_REGNUM || regno == FRAME_POINTER_REGNUM)
611 return true;
612 if (SGPR_REGNO_P (regno))
613 /* We restrict double register values to aligned registers. */
614 return (sgpr_1reg_mode_p (mode)
615 || (!((regno - FIRST_SGPR_REG) & 1) && sgpr_2reg_mode_p (mode))
616 || (((regno - FIRST_SGPR_REG) & 3) == 0 && mode == TImode));
ae0d2c24 617 if (VGPR_REGNO_P (regno) || (AVGPR_REGNO_P (regno) && TARGET_CDNA1_PLUS))
3abfd4f3
AS
618 /* Vector instructions do not care about the alignment of register
619 pairs, but where there is no 64-bit instruction, many of the
620 define_split do not work if the input and output registers partially
621 overlap. We tried to fix this with early clobber and match
622 constraints, but it was bug prone, added complexity, and conflicts
623 with the 'U0' constraints on vec_merge.
624 Therefore, we restrict ourselved to aligned registers. */
625 return (vgpr_1reg_mode_p (mode)
626 || (!((regno - FIRST_VGPR_REG) & 1) && vgpr_2reg_mode_p (mode))
8aeabd9f
AS
627 /* TImode is used by DImode compare_and_swap,
628 and by DIVMOD V64DImode libfuncs. */
629 || (!((regno - FIRST_VGPR_REG) & 3) && vgpr_4reg_mode_p (mode)));
5326695a
AS
630 return false;
631}
632
633/* Implement REGNO_REG_CLASS via gcn.h.
634
635 Return smallest class containing REGNO. */
636
637enum reg_class
638gcn_regno_reg_class (int regno)
639{
640 switch (regno)
641 {
642 case SCC_REG:
643 return SCC_CONDITIONAL_REG;
9ecf84e6
KCY
644 case VCC_LO_REG:
645 case VCC_HI_REG:
646 return VCC_CONDITIONAL_REG;
5326695a
AS
647 case VCCZ_REG:
648 return VCCZ_CONDITIONAL_REG;
649 case EXECZ_REG:
650 return EXECZ_CONDITIONAL_REG;
651 case EXEC_LO_REG:
652 case EXEC_HI_REG:
653 return EXEC_MASK_REG;
654 }
655 if (VGPR_REGNO_P (regno))
656 return VGPR_REGS;
ae0d2c24
AS
657 if (AVGPR_REGNO_P (regno))
658 return AVGPR_REGS;
5326695a
AS
659 if (SGPR_REGNO_P (regno))
660 return SGPR_REGS;
661 if (regno < FIRST_VGPR_REG)
662 return GENERAL_REGS;
663 if (regno == ARG_POINTER_REGNUM || regno == FRAME_POINTER_REGNUM)
664 return AFP_REGS;
665 return ALL_REGS;
666}
667
668/* Implement TARGET_CAN_CHANGE_MODE_CLASS.
669
670 GCC assumes that lowpart contains first part of value as stored in memory.
671 This is not the case for vector registers. */
672
673bool
674gcn_can_change_mode_class (machine_mode from, machine_mode to,
675 reg_class_t regclass)
676{
677 if (!vgpr_vector_mode_p (from) && !vgpr_vector_mode_p (to))
678 return true;
45381d6f
AS
679
680 /* Vector conversions are only valid when changing mode with a fixed number
681 of lanes, or changing number of lanes with a fixed mode. Anything else
682 would require actual data movement. */
683 if (VECTOR_MODE_P (from) && VECTOR_MODE_P (to)
684 && GET_MODE_NUNITS (from) != GET_MODE_NUNITS (to)
685 && GET_MODE_INNER (from) != GET_MODE_INNER (to))
686 return false;
687
688 /* Vector/scalar conversions are only permitted when the scalar mode
689 is the same or smaller than the inner vector mode. */
690 if ((VECTOR_MODE_P (from) && !VECTOR_MODE_P (to)
691 && GET_MODE_SIZE (to) >= GET_MODE_SIZE (GET_MODE_INNER (from)))
692 || (VECTOR_MODE_P (to) && !VECTOR_MODE_P (from)
693 && GET_MODE_SIZE (from) >= GET_MODE_SIZE (GET_MODE_INNER (to))))
694 return false;
695
5326695a
AS
696 return (gcn_class_max_nregs (regclass, from)
697 == gcn_class_max_nregs (regclass, to));
698}
699
700/* Implement TARGET_SMALL_REGISTER_CLASSES_FOR_MODE_P.
701
702 When this hook returns true for MODE, the compiler allows
703 registers explicitly used in the rtl to be used as spill registers
704 but prevents the compiler from extending the lifetime of these
705 registers. */
706
707bool
708gcn_small_register_classes_for_mode_p (machine_mode mode)
709{
710 /* We allocate into exec and vcc regs. Those make small register class. */
711 return mode == DImode || mode == SImode;
712}
713
714/* Implement TARGET_CLASS_LIKELY_SPILLED_P.
715
716 Returns true if pseudos that have been assigned to registers of class RCLASS
717 would likely be spilled because registers of RCLASS are needed for spill
718 registers. */
719
720static bool
721gcn_class_likely_spilled_p (reg_class_t rclass)
722{
723 return (rclass == EXEC_MASK_REG
724 || reg_classes_intersect_p (ALL_CONDITIONAL_REGS, rclass));
725}
726
727/* Implement TARGET_MODES_TIEABLE_P.
728
729 Returns true if a value of MODE1 is accessible in MODE2 without
730 copying. */
731
732bool
733gcn_modes_tieable_p (machine_mode mode1, machine_mode mode2)
734{
45381d6f
AS
735 if (VECTOR_MODE_P (mode1) || VECTOR_MODE_P (mode2))
736 {
737 int vf1 = (VECTOR_MODE_P (mode1) ? GET_MODE_NUNITS (mode1) : 1);
738 int vf2 = (VECTOR_MODE_P (mode2) ? GET_MODE_NUNITS (mode2) : 1);
739 machine_mode inner1 = (vf1 > 1 ? GET_MODE_INNER (mode1) : mode1);
740 machine_mode inner2 = (vf2 > 1 ? GET_MODE_INNER (mode2) : mode2);
741
742 return (vf1 == vf2 || (inner1 == inner2 && vf2 <= vf1));
743 }
744
5326695a
AS
745 return (GET_MODE_BITSIZE (mode1) <= MAX_FIXED_MODE_SIZE
746 && GET_MODE_BITSIZE (mode2) <= MAX_FIXED_MODE_SIZE);
747}
748
749/* Implement TARGET_TRULY_NOOP_TRUNCATION.
750
751 Returns true if it is safe to “convert” a value of INPREC bits to one of
752 OUTPREC bits (where OUTPREC is smaller than INPREC) by merely operating on
753 it as if it had only OUTPREC bits. */
754
755bool
756gcn_truly_noop_truncation (poly_uint64 outprec, poly_uint64 inprec)
757{
758 return ((inprec <= 32) && (outprec <= inprec));
759}
760
761/* Return N-th part of value occupying multiple registers. */
762
763rtx
764gcn_operand_part (machine_mode mode, rtx op, int n)
765{
45381d6f
AS
766 int vf = VECTOR_MODE_P (mode) ? GET_MODE_NUNITS (mode) : 1;
767
768 if (vf > 1)
5326695a 769 {
45381d6f 770 machine_mode vsimode = VnMODE (vf, SImode);
5326695a
AS
771
772 if (REG_P (op))
773 {
774 gcc_assert (REGNO (op) + n < FIRST_PSEUDO_REGISTER);
45381d6f 775 return gen_rtx_REG (vsimode, REGNO (op) + n);
5326695a
AS
776 }
777 if (GET_CODE (op) == CONST_VECTOR)
778 {
779 int units = GET_MODE_NUNITS (mode);
780 rtvec v = rtvec_alloc (units);
781
782 for (int i = 0; i < units; ++i)
783 RTVEC_ELT (v, i) = gcn_operand_part (GET_MODE_INNER (mode),
784 CONST_VECTOR_ELT (op, i), n);
785
45381d6f 786 return gen_rtx_CONST_VECTOR (vsimode, v);
5326695a
AS
787 }
788 if (GET_CODE (op) == UNSPEC && XINT (op, 1) == UNSPEC_VECTOR)
45381d6f 789 return gcn_gen_undef (vsimode);
5326695a
AS
790 gcc_unreachable ();
791 }
792 else if (GET_MODE_SIZE (mode) == 8 && REG_P (op))
793 {
794 gcc_assert (REGNO (op) + n < FIRST_PSEUDO_REGISTER);
795 return gen_rtx_REG (SImode, REGNO (op) + n);
796 }
797 else
798 {
799 if (GET_CODE (op) == UNSPEC && XINT (op, 1) == UNSPEC_VECTOR)
800 return gcn_gen_undef (SImode);
801
802 /* If it's a constant then let's assume it is of the largest mode
803 available, otherwise simplify_gen_subreg will fail. */
804 if (mode == VOIDmode && CONST_INT_P (op))
805 mode = DImode;
806 return simplify_gen_subreg (SImode, op, mode, n * 4);
807 }
808}
809
810/* Return N-th part of value occupying multiple registers. */
811
812rtx
813gcn_operand_doublepart (machine_mode mode, rtx op, int n)
814{
815 return simplify_gen_subreg (DImode, op, mode, n * 8);
816}
817
818/* Return true if OP can be split into subregs or high/low parts.
819 This is always true for scalars, but not normally true for vectors.
820 However, for vectors in hardregs we can use the low and high registers. */
821
822bool
823gcn_can_split_p (machine_mode, rtx op)
824{
825 if (vgpr_vector_mode_p (GET_MODE (op)))
826 {
827 if (GET_CODE (op) == SUBREG)
828 op = SUBREG_REG (op);
829 if (!REG_P (op))
830 return true;
831 return REGNO (op) <= FIRST_PSEUDO_REGISTER;
832 }
833 return true;
834}
835
836/* Implement TARGET_SPILL_CLASS.
837
838 Return class of registers which could be used for pseudo of MODE
839 and of class RCLASS for spilling instead of memory. Return NO_REGS
840 if it is not possible or non-profitable. */
841
842static reg_class_t
843gcn_spill_class (reg_class_t c, machine_mode /*mode */ )
844{
9ecf84e6 845 if (reg_classes_intersect_p (ALL_CONDITIONAL_REGS, c)
553ff252 846 || c == VCC_CONDITIONAL_REG || c == EXEC_MASK_REG)
5326695a
AS
847 return SGPR_REGS;
848 else
ae0d2c24 849 return c == VGPR_REGS && TARGET_CDNA1_PLUS ? AVGPR_REGS : NO_REGS;
5326695a
AS
850}
851
852/* Implement TARGET_IRA_CHANGE_PSEUDO_ALLOCNO_CLASS.
853
854 Change allocno class for given pseudo from allocno and best class
855 calculated by IRA. */
856
857static reg_class_t
858gcn_ira_change_pseudo_allocno_class (int regno, reg_class_t cl,
859 reg_class_t best_cl)
860{
861 /* Avoid returning classes that contain both vgpr and sgpr registers. */
862 if (cl != ALL_REGS && cl != SRCDST_REGS && cl != ALL_GPR_REGS)
863 return cl;
864 if (best_cl != ALL_REGS && best_cl != SRCDST_REGS
865 && best_cl != ALL_GPR_REGS)
866 return best_cl;
867
868 machine_mode mode = PSEUDO_REGNO_MODE (regno);
869 if (vgpr_vector_mode_p (mode))
870 return VGPR_REGS;
871
872 return GENERAL_REGS;
873}
874
875/* Create a new DImode pseudo reg and emit an instruction to initialize
876 it to VAL. */
877
5cfe0855 878rtx
5326695a
AS
879get_exec (int64_t val)
880{
881 rtx reg = gen_reg_rtx (DImode);
882 emit_insn (gen_rtx_SET (reg, gen_int_mode (val, DImode)));
883 return reg;
884}
885
5cfe0855
AS
886rtx
887get_exec (machine_mode mode)
888{
889 int vf = (VECTOR_MODE_P (mode) ? GET_MODE_NUNITS (mode) : 1);
890 return get_exec (0xffffffffffffffffUL >> (64-vf));
891}
892
5326695a
AS
893/* }}} */
894/* {{{ Immediate constants. */
895
896/* Initialize shared numeric constants. */
897
898static void
899init_ext_gcn_constants (void)
900{
901 real_from_integer (&dconst4, DFmode, 4, SIGNED);
902
903 /* FIXME: this constant probably does not match what hardware really loads.
904 Reality check it eventually. */
905 real_from_string (&dconst1over2pi,
eff73c10 906 "0.15915494309189532");
5326695a
AS
907 real_convert (&dconst1over2pi, SFmode, &dconst1over2pi);
908
909 ext_gcn_constants_init = 1;
910}
911
eff73c10
KCY
912REAL_VALUE_TYPE
913gcn_dconst1over2pi (void)
914{
915 if (!ext_gcn_constants_init)
916 init_ext_gcn_constants ();
917 return dconst1over2pi;
918}
919
5326695a
AS
920/* Return non-zero if X is a constant that can appear as an inline operand.
921 This is 0, 0.5, -0.5, 1, -1, 2, -2, 4,-4, 1/(2*pi)
922 Or a vector of those.
923 The value returned should be the encoding of this constant. */
924
925int
926gcn_inline_fp_constant_p (rtx x, bool allow_vector)
927{
928 machine_mode mode = GET_MODE (x);
45381d6f 929 int vf = VECTOR_MODE_P (mode) ? GET_MODE_NUNITS (mode) : 1;
5326695a 930
45381d6f
AS
931 if (vf > 1)
932 mode = GET_MODE_INNER (mode);
933
934 if (vf > 1
935 && (mode == HFmode || mode == SFmode || mode == DFmode)
5326695a
AS
936 && allow_vector)
937 {
938 int n;
939 if (GET_CODE (x) != CONST_VECTOR)
940 return 0;
941 n = gcn_inline_fp_constant_p (CONST_VECTOR_ELT (x, 0), false);
942 if (!n)
943 return 0;
45381d6f 944 for (int i = 1; i < vf; i++)
5326695a
AS
945 if (CONST_VECTOR_ELT (x, i) != CONST_VECTOR_ELT (x, 0))
946 return 0;
947 return 1;
948 }
949
950 if (mode != HFmode && mode != SFmode && mode != DFmode)
951 return 0;
952
953 const REAL_VALUE_TYPE *r;
954
955 if (x == CONST0_RTX (mode))
956 return 128;
957 if (x == CONST1_RTX (mode))
958 return 242;
959
960 r = CONST_DOUBLE_REAL_VALUE (x);
961
962 if (real_identical (r, &dconstm1))
963 return 243;
964
965 if (real_identical (r, &dconsthalf))
966 return 240;
967 if (real_identical (r, &dconstm1))
968 return 243;
969 if (real_identical (r, &dconst2))
970 return 244;
971 if (real_identical (r, &dconst4))
972 return 246;
973 if (real_identical (r, &dconst1over2pi))
974 return 248;
975 if (!ext_gcn_constants_init)
976 init_ext_gcn_constants ();
977 real_value_negate (r);
978 if (real_identical (r, &dconsthalf))
979 return 241;
980 if (real_identical (r, &dconst2))
981 return 245;
982 if (real_identical (r, &dconst4))
983 return 247;
984
985 /* FIXME: add 4, -4 and 1/(2*PI). */
986
987 return 0;
988}
989
990/* Return non-zero if X is a constant that can appear as an immediate operand.
991 This is 0, 0.5, -0.5, 1, -1, 2, -2, 4,-4, 1/(2*pi)
992 Or a vector of those.
993 The value returned should be the encoding of this constant. */
994
995bool
996gcn_fp_constant_p (rtx x, bool allow_vector)
997{
998 machine_mode mode = GET_MODE (x);
45381d6f 999 int vf = VECTOR_MODE_P (mode) ? GET_MODE_NUNITS (mode) : 1;
5326695a 1000
45381d6f
AS
1001 if (vf > 1)
1002 mode = GET_MODE_INNER (mode);
1003
1004 if (vf > 1
1005 && (mode == HFmode || mode == SFmode || mode == DFmode)
5326695a
AS
1006 && allow_vector)
1007 {
1008 int n;
1009 if (GET_CODE (x) != CONST_VECTOR)
1010 return false;
1011 n = gcn_fp_constant_p (CONST_VECTOR_ELT (x, 0), false);
1012 if (!n)
1013 return false;
45381d6f 1014 for (int i = 1; i < vf; i++)
5326695a
AS
1015 if (CONST_VECTOR_ELT (x, i) != CONST_VECTOR_ELT (x, 0))
1016 return false;
1017 return true;
1018 }
1019 if (mode != HFmode && mode != SFmode && mode != DFmode)
1020 return false;
1021
1022 if (gcn_inline_fp_constant_p (x, false))
1023 return true;
1024 /* FIXME: It is not clear how 32bit immediates are interpreted here. */
1025 return (mode != DFmode);
1026}
1027
1028/* Return true if X is a constant representable as an inline immediate
1029 constant in a 32-bit instruction encoding. */
1030
1031bool
1032gcn_inline_constant_p (rtx x)
1033{
1034 if (GET_CODE (x) == CONST_INT)
5960de78 1035 return INTVAL (x) >= -16 && INTVAL (x) <= 64;
5326695a
AS
1036 if (GET_CODE (x) == CONST_DOUBLE)
1037 return gcn_inline_fp_constant_p (x, false);
1038 if (GET_CODE (x) == CONST_VECTOR)
1039 {
1040 int n;
1041 if (!vgpr_vector_mode_p (GET_MODE (x)))
1042 return false;
1043 n = gcn_inline_constant_p (CONST_VECTOR_ELT (x, 0));
1044 if (!n)
1045 return false;
1046 for (int i = 1; i < 64; i++)
1047 if (CONST_VECTOR_ELT (x, i) != CONST_VECTOR_ELT (x, 0))
1048 return false;
1049 return 1;
1050 }
1051 return false;
1052}
1053
1054/* Return true if X is a constant representable as an immediate constant
1055 in a 32 or 64-bit instruction encoding. */
1056
1057bool
1058gcn_constant_p (rtx x)
1059{
1060 switch (GET_CODE (x))
1061 {
1062 case CONST_INT:
1063 return true;
1064
1065 case CONST_DOUBLE:
1066 return gcn_fp_constant_p (x, false);
1067
1068 case CONST_VECTOR:
1069 {
1070 int n;
1071 if (!vgpr_vector_mode_p (GET_MODE (x)))
1072 return false;
1073 n = gcn_constant_p (CONST_VECTOR_ELT (x, 0));
1074 if (!n)
1075 return false;
1076 for (int i = 1; i < 64; i++)
1077 if (CONST_VECTOR_ELT (x, i) != CONST_VECTOR_ELT (x, 0))
1078 return false;
1079 return true;
1080 }
1081
1082 case SYMBOL_REF:
1083 case LABEL_REF:
1084 return true;
1085
1086 default:
1087 ;
1088 }
1089
1090 return false;
1091}
1092
1093/* Return true if X is a constant representable as two inline immediate
1094 constants in a 64-bit instruction that is split into two 32-bit
66b01cc3
AS
1095 instructions.
1096 When MIXED is set, the low-part is permitted to use the full 32-bits. */
5326695a
AS
1097
1098bool
66b01cc3 1099gcn_inline_constant64_p (rtx x, bool mixed)
5326695a
AS
1100{
1101 if (GET_CODE (x) == CONST_VECTOR)
1102 {
1103 if (!vgpr_vector_mode_p (GET_MODE (x)))
1104 return false;
66b01cc3 1105 if (!gcn_inline_constant64_p (CONST_VECTOR_ELT (x, 0), mixed))
5326695a
AS
1106 return false;
1107 for (int i = 1; i < 64; i++)
1108 if (CONST_VECTOR_ELT (x, i) != CONST_VECTOR_ELT (x, 0))
1109 return false;
1110
1111 return true;
1112 }
1113
1114 if (GET_CODE (x) != CONST_INT)
1115 return false;
1116
1117 rtx val_lo = gcn_operand_part (DImode, x, 0);
1118 rtx val_hi = gcn_operand_part (DImode, x, 1);
66b01cc3
AS
1119 return ((mixed || gcn_inline_constant_p (val_lo))
1120 && gcn_inline_constant_p (val_hi));
5326695a
AS
1121}
1122
1123/* Return true if X is a constant representable as an immediate constant
1124 in a 32 or 64-bit instruction encoding where the hardware will
1125 extend the immediate to 64-bits. */
1126
1127bool
1128gcn_constant64_p (rtx x)
1129{
1130 if (!gcn_constant_p (x))
1131 return false;
1132
1133 if (GET_CODE (x) != CONST_INT)
1134 return true;
1135
1136 /* Negative numbers are only allowed if they can be encoded within src0,
1137 because the 32-bit immediates do not get sign-extended.
1138 Unsigned numbers must not be encodable as 32-bit -1..-16, because the
1139 assembler will use a src0 inline immediate and that will get
1140 sign-extended. */
1141 HOST_WIDE_INT val = INTVAL (x);
1142 return (((val & 0xffffffff) == val /* Positive 32-bit. */
1143 && (val & 0xfffffff0) != 0xfffffff0) /* Not -1..-16. */
1144 || gcn_inline_constant_p (x)); /* Src0. */
1145}
1146
1147/* Implement TARGET_LEGITIMATE_CONSTANT_P.
1148
1149 Returns true if X is a legitimate constant for a MODE immediate operand. */
1150
1151bool
1152gcn_legitimate_constant_p (machine_mode, rtx x)
1153{
1154 return gcn_constant_p (x);
1155}
1156
1157/* Return true if X is a CONST_VECTOR of single constant. */
1158
1159static bool
1160single_cst_vector_p (rtx x)
1161{
1162 if (GET_CODE (x) != CONST_VECTOR)
1163 return false;
1164 for (int i = 1; i < 64; i++)
1165 if (CONST_VECTOR_ELT (x, i) != CONST_VECTOR_ELT (x, 0))
1166 return false;
1167 return true;
1168}
1169
1170/* Create a CONST_VECTOR of duplicated value A. */
1171
1172rtx
1173gcn_vec_constant (machine_mode mode, int a)
1174{
1175 /*if (!a)
1176 return CONST0_RTX (mode);
1177 if (a == -1)
1178 return CONSTM1_RTX (mode);
1179 if (a == 1)
1180 return CONST1_RTX (mode);
1181 if (a == 2)
1182 return CONST2_RTX (mode);*/
1183
1184 int units = GET_MODE_NUNITS (mode);
95607c12
AS
1185 machine_mode innermode = GET_MODE_INNER (mode);
1186
1187 rtx tem;
1188 if (FLOAT_MODE_P (innermode))
1189 {
1190 REAL_VALUE_TYPE rv;
1191 real_from_integer (&rv, NULL, a, SIGNED);
1192 tem = const_double_from_real_value (rv, innermode);
1193 }
1194 else
1195 tem = gen_int_mode (a, innermode);
5326695a 1196
95607c12 1197 rtvec v = rtvec_alloc (units);
5326695a
AS
1198 for (int i = 0; i < units; ++i)
1199 RTVEC_ELT (v, i) = tem;
1200
1201 return gen_rtx_CONST_VECTOR (mode, v);
1202}
1203
1204/* Create a CONST_VECTOR of duplicated value A. */
1205
1206rtx
1207gcn_vec_constant (machine_mode mode, rtx a)
1208{
1209 int units = GET_MODE_NUNITS (mode);
1210 rtvec v = rtvec_alloc (units);
1211
1212 for (int i = 0; i < units; ++i)
1213 RTVEC_ELT (v, i) = a;
1214
1215 return gen_rtx_CONST_VECTOR (mode, v);
1216}
1217
1218/* Create an undefined vector value, used where an insn operand is
1219 optional. */
1220
1221rtx
1222gcn_gen_undef (machine_mode mode)
1223{
1224 return gen_rtx_UNSPEC (mode, gen_rtvec (1, const0_rtx), UNSPEC_VECTOR);
1225}
1226
45381d6f
AS
1227/* }}} */
1228/* {{{ Utility functions. */
1229
1230/* Generalised accessor functions for instruction patterns.
1231 The machine desription '@' prefix does something similar, but as of
1232 GCC 10 is incompatible with define_subst, and anyway it doesn't
1233 auto-handle the exec feature.
1234
1235 Four macros are provided; each function only needs one:
1236
1237 GEN_VN - create accessor functions for all sizes of one mode
1238 GEN_VNM - create accessor functions for all sizes of all modes
1239 GEN_VN_NOEXEC - for insns without "_exec" variants
1240 GEN_VNM_NOEXEC - likewise
1241
1242 E.g. add<mode>3
1243 GEN_VNM (add, 3, A(rtx dest, rtx s1, rtx s2), A(dest, s1, s2)
1244
1245 gen_addvNsi3 (dst, a, b)
1246 -> calls gen_addv64si3, or gen_addv32si3, etc.
1247
1248 gen_addvNm3 (dst, a, b)
1249 -> calls gen_addv64qi3, or gen_addv2di3, etc.
1250
1251 The mode is determined from the first parameter, which must be called
1252 "dest" (or else the macro doesn't work).
1253
1254 Each function has two optional parameters at the end: merge_src and exec.
1255 If exec is non-null, the function will call the "_exec" variant of the
1256 insn. If exec is non-null but merge_src is null then an undef unspec
1257 will be created.
1258
1259 E.g. cont.
1260 gen_addvNsi3 (v64sidst, a, b, oldval, exec)
1261 -> calls gen_addv64si3_exec (v64sidst, a, b, oldval, exec)
1262
1263 gen_addvNm3 (v2qidst, a, b, NULL, exec)
1264 -> calls gen_addv2qi3_exec (v2qidst, a, b,
1265 gcn_gen_undef (V2QImode), exec)
1266 */
1267
1268#define A(...) __VA_ARGS__
1269#define GEN_VN_NOEXEC(PREFIX, SUFFIX, PARAMS, ARGS) \
1270static rtx \
1271gen_##PREFIX##vN##SUFFIX (PARAMS) \
1272{ \
1273 machine_mode mode = GET_MODE (dest); \
1274 int n = GET_MODE_NUNITS (mode); \
1275 \
1276 switch (n) \
1277 { \
1278 case 2: return gen_##PREFIX##v2##SUFFIX (ARGS); \
1279 case 4: return gen_##PREFIX##v4##SUFFIX (ARGS); \
1280 case 8: return gen_##PREFIX##v8##SUFFIX (ARGS); \
1281 case 16: return gen_##PREFIX##v16##SUFFIX (ARGS); \
1282 case 32: return gen_##PREFIX##v32##SUFFIX (ARGS); \
1283 case 64: return gen_##PREFIX##v64##SUFFIX (ARGS); \
1284 } \
1285 \
1286 gcc_unreachable (); \
1287 return NULL_RTX; \
1288}
1289
1290#define GEN_VNM_NOEXEC(PREFIX, SUFFIX, PARAMS, ARGS) \
1291GEN_VN_NOEXEC (PREFIX, qi##SUFFIX, A(PARAMS), A(ARGS)) \
1292GEN_VN_NOEXEC (PREFIX, hi##SUFFIX, A(PARAMS), A(ARGS)) \
1293GEN_VN_NOEXEC (PREFIX, hf##SUFFIX, A(PARAMS), A(ARGS)) \
1294GEN_VN_NOEXEC (PREFIX, si##SUFFIX, A(PARAMS), A(ARGS)) \
1295GEN_VN_NOEXEC (PREFIX, sf##SUFFIX, A(PARAMS), A(ARGS)) \
1296GEN_VN_NOEXEC (PREFIX, di##SUFFIX, A(PARAMS), A(ARGS)) \
1297GEN_VN_NOEXEC (PREFIX, df##SUFFIX, A(PARAMS), A(ARGS)) \
1298static rtx \
1299gen_##PREFIX##vNm##SUFFIX (PARAMS) \
1300{ \
1301 machine_mode mode = GET_MODE_INNER (GET_MODE (dest)); \
1302 \
1303 switch (mode) \
1304 { \
1305 case E_QImode: return gen_##PREFIX##vNqi##SUFFIX (ARGS); \
1306 case E_HImode: return gen_##PREFIX##vNhi##SUFFIX (ARGS); \
1307 case E_HFmode: return gen_##PREFIX##vNhf##SUFFIX (ARGS); \
1308 case E_SImode: return gen_##PREFIX##vNsi##SUFFIX (ARGS); \
1309 case E_SFmode: return gen_##PREFIX##vNsf##SUFFIX (ARGS); \
1310 case E_DImode: return gen_##PREFIX##vNdi##SUFFIX (ARGS); \
1311 case E_DFmode: return gen_##PREFIX##vNdf##SUFFIX (ARGS); \
1312 default: \
1313 break; \
1314 } \
1315 \
1316 gcc_unreachable (); \
1317 return NULL_RTX; \
1318}
1319
1320#define GEN_VN(PREFIX, SUFFIX, PARAMS, ARGS) \
1321static rtx \
1322gen_##PREFIX##vN##SUFFIX (PARAMS, rtx merge_src=NULL, rtx exec=NULL) \
1323{ \
1324 machine_mode mode = GET_MODE (dest); \
1325 int n = GET_MODE_NUNITS (mode); \
1326 \
1327 if (exec && !merge_src) \
1328 merge_src = gcn_gen_undef (mode); \
1329 \
1330 if (exec) \
1331 switch (n) \
1332 { \
1333 case 2: return gen_##PREFIX##v2##SUFFIX##_exec (ARGS, merge_src, exec); \
1334 case 4: return gen_##PREFIX##v4##SUFFIX##_exec (ARGS, merge_src, exec); \
1335 case 8: return gen_##PREFIX##v8##SUFFIX##_exec (ARGS, merge_src, exec); \
1336 case 16: return gen_##PREFIX##v16##SUFFIX##_exec (ARGS, merge_src, exec); \
1337 case 32: return gen_##PREFIX##v32##SUFFIX##_exec (ARGS, merge_src, exec); \
1338 case 64: return gen_##PREFIX##v64##SUFFIX##_exec (ARGS, merge_src, exec); \
1339 } \
1340 else \
1341 switch (n) \
1342 { \
1343 case 2: return gen_##PREFIX##v2##SUFFIX (ARGS); \
1344 case 4: return gen_##PREFIX##v4##SUFFIX (ARGS); \
1345 case 8: return gen_##PREFIX##v8##SUFFIX (ARGS); \
1346 case 16: return gen_##PREFIX##v16##SUFFIX (ARGS); \
1347 case 32: return gen_##PREFIX##v32##SUFFIX (ARGS); \
1348 case 64: return gen_##PREFIX##v64##SUFFIX (ARGS); \
1349 } \
1350 \
1351 gcc_unreachable (); \
1352 return NULL_RTX; \
1353}
1354
1355#define GEN_VNM(PREFIX, SUFFIX, PARAMS, ARGS) \
1356GEN_VN (PREFIX, qi##SUFFIX, A(PARAMS), A(ARGS)) \
1357GEN_VN (PREFIX, hi##SUFFIX, A(PARAMS), A(ARGS)) \
1358GEN_VN (PREFIX, hf##SUFFIX, A(PARAMS), A(ARGS)) \
1359GEN_VN (PREFIX, si##SUFFIX, A(PARAMS), A(ARGS)) \
1360GEN_VN (PREFIX, sf##SUFFIX, A(PARAMS), A(ARGS)) \
1361GEN_VN (PREFIX, di##SUFFIX, A(PARAMS), A(ARGS)) \
1362GEN_VN (PREFIX, df##SUFFIX, A(PARAMS), A(ARGS)) \
8aeabd9f 1363USE_TI (GEN_VN (PREFIX, ti##SUFFIX, A(PARAMS), A(ARGS))) \
45381d6f
AS
1364static rtx \
1365gen_##PREFIX##vNm##SUFFIX (PARAMS, rtx merge_src=NULL, rtx exec=NULL) \
1366{ \
1367 machine_mode mode = GET_MODE_INNER (GET_MODE (dest)); \
1368 \
1369 switch (mode) \
1370 { \
1371 case E_QImode: return gen_##PREFIX##vNqi##SUFFIX (ARGS, merge_src, exec); \
1372 case E_HImode: return gen_##PREFIX##vNhi##SUFFIX (ARGS, merge_src, exec); \
1373 case E_HFmode: return gen_##PREFIX##vNhf##SUFFIX (ARGS, merge_src, exec); \
1374 case E_SImode: return gen_##PREFIX##vNsi##SUFFIX (ARGS, merge_src, exec); \
1375 case E_SFmode: return gen_##PREFIX##vNsf##SUFFIX (ARGS, merge_src, exec); \
1376 case E_DImode: return gen_##PREFIX##vNdi##SUFFIX (ARGS, merge_src, exec); \
1377 case E_DFmode: return gen_##PREFIX##vNdf##SUFFIX (ARGS, merge_src, exec); \
8aeabd9f
AS
1378 case E_TImode: \
1379 USE_TI (return gen_##PREFIX##vNti##SUFFIX (ARGS, merge_src, exec);) \
45381d6f
AS
1380 default: \
1381 break; \
1382 } \
1383 \
1384 gcc_unreachable (); \
1385 return NULL_RTX; \
1386}
1387
8aeabd9f
AS
1388/* These have TImode support. */
1389#define USE_TI(ARGS) ARGS
1390GEN_VNM (mov,, A(rtx dest, rtx src), A(dest, src))
1391GEN_VNM (vec_duplicate,, A(rtx dest, rtx src), A(dest, src))
1392
1393/* These do not have TImode support. */
1394#undef USE_TI
1395#define USE_TI(ARGS)
45381d6f
AS
1396GEN_VNM (add,3, A(rtx dest, rtx src1, rtx src2), A(dest, src1, src2))
1397GEN_VN (add,si3_dup, A(rtx dest, rtx src1, rtx src2), A(dest, src1, src2))
1398GEN_VN (add,si3_vcc_dup, A(rtx dest, rtx src1, rtx src2, rtx vcc),
1399 A(dest, src1, src2, vcc))
1400GEN_VN (add,di3_sext_dup2, A(rtx dest, rtx src1, rtx src2), A(dest, src1, src2))
1401GEN_VN (add,di3_vcc_zext_dup, A(rtx dest, rtx src1, rtx src2, rtx vcc),
1402 A(dest, src1, src2, vcc))
1403GEN_VN (add,di3_zext_dup2, A(rtx dest, rtx src1, rtx src2), A(dest, src1, src2))
1404GEN_VN (add,di3_vcc_zext_dup2, A(rtx dest, rtx src1, rtx src2, rtx vcc),
1405 A(dest, src1, src2, vcc))
1406GEN_VN (addc,si3, A(rtx dest, rtx src1, rtx src2, rtx vccout, rtx vccin),
1407 A(dest, src1, src2, vccout, vccin))
769a10d0 1408GEN_VN (and,si3, A(rtx dest, rtx src1, rtx src2), A(dest, src1, src2))
45381d6f
AS
1409GEN_VN (ashl,si3, A(rtx dest, rtx src, rtx shift), A(dest, src, shift))
1410GEN_VNM_NOEXEC (ds_bpermute,, A(rtx dest, rtx addr, rtx src, rtx exec),
1411 A(dest, addr, src, exec))
769a10d0
AS
1412GEN_VNM (gather,_expr, A(rtx dest, rtx addr, rtx as, rtx vol),
1413 A(dest, addr, as, vol))
45381d6f 1414GEN_VN (mul,si3_dup, A(rtx dest, rtx src1, rtx src2), A(dest, src1, src2))
769a10d0 1415GEN_VN (sub,si3, A(rtx dest, rtx src1, rtx src2), A(dest, src1, src2))
769a10d0 1416GEN_VN_NOEXEC (vec_series,si, A(rtx dest, rtx x, rtx c), A(dest, x, c))
45381d6f 1417
8aeabd9f 1418#undef USE_TI
45381d6f
AS
1419#undef GEN_VNM
1420#undef GEN_VN
1421#undef GET_VN_FN
1422#undef A
1423
db80ccd3
AS
1424/* Return true if OP is a PARALLEL of CONST_INTs that form a linear
1425 series with step STEP. */
1426
1427bool
1428gcn_stepped_zero_int_parallel_p (rtx op, int step)
1429{
1430 if (GET_CODE (op) != PARALLEL || !CONST_INT_P (XVECEXP (op, 0, 0)))
1431 return false;
1432
1433 unsigned HOST_WIDE_INT base = 0;
1434 for (int i = 0; i < XVECLEN (op, 0); ++i)
1435 if (!CONST_INT_P (XVECEXP (op, 0, i))
1436 || UINTVAL (XVECEXP (op, 0, i)) != base + i * step)
1437 return false;
1438
1439 return true;
1440}
1441
5326695a
AS
1442/* }}} */
1443/* {{{ Addresses, pointers and moves. */
1444
1445/* Return true is REG is a valid place to store a pointer,
1446 for instructions that require an SGPR.
1447 FIXME rename. */
1448
1449static bool
1450gcn_address_register_p (rtx reg, machine_mode mode, bool strict)
1451{
1452 if (GET_CODE (reg) == SUBREG)
1453 reg = SUBREG_REG (reg);
1454
1455 if (!REG_P (reg))
1456 return false;
1457
1458 if (GET_MODE (reg) != mode)
1459 return false;
1460
1461 int regno = REGNO (reg);
1462
1463 if (regno >= FIRST_PSEUDO_REGISTER)
1464 {
1465 if (!strict)
1466 return true;
1467
1468 if (!reg_renumber)
1469 return false;
1470
1471 regno = reg_renumber[regno];
1472 }
1473
1474 return (SGPR_REGNO_P (regno) || regno == M0_REG
1475 || regno == ARG_POINTER_REGNUM || regno == FRAME_POINTER_REGNUM);
1476}
1477
1478/* Return true is REG is a valid place to store a pointer,
1479 for instructions that require a VGPR. */
1480
1481static bool
1482gcn_vec_address_register_p (rtx reg, machine_mode mode, bool strict)
1483{
1484 if (GET_CODE (reg) == SUBREG)
1485 reg = SUBREG_REG (reg);
1486
1487 if (!REG_P (reg))
1488 return false;
1489
1490 if (GET_MODE (reg) != mode)
1491 return false;
1492
1493 int regno = REGNO (reg);
1494
1495 if (regno >= FIRST_PSEUDO_REGISTER)
1496 {
1497 if (!strict)
1498 return true;
1499
1500 if (!reg_renumber)
1501 return false;
1502
1503 regno = reg_renumber[regno];
1504 }
1505
1506 return VGPR_REGNO_P (regno);
1507}
1508
1509/* Return true if X would be valid inside a MEM using the Flat address
1510 space. */
1511
1512bool
1513gcn_flat_address_p (rtx x, machine_mode mode)
1514{
1515 bool vec_mode = (GET_MODE_CLASS (mode) == MODE_VECTOR_INT
1516 || GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT);
1517
1518 if (vec_mode && gcn_address_register_p (x, DImode, false))
1519 return true;
1520
1521 if (!vec_mode && gcn_vec_address_register_p (x, DImode, false))
1522 return true;
1523
1524 if (TARGET_GCN5_PLUS
1525 && GET_CODE (x) == PLUS
1526 && gcn_vec_address_register_p (XEXP (x, 0), DImode, false)
1527 && CONST_INT_P (XEXP (x, 1)))
1528 return true;
1529
1530 return false;
1531}
1532
1533/* Return true if X would be valid inside a MEM using the Scalar Flat
1534 address space. */
1535
1536bool
1537gcn_scalar_flat_address_p (rtx x)
1538{
1539 if (gcn_address_register_p (x, DImode, false))
1540 return true;
1541
1542 if (GET_CODE (x) == PLUS
1543 && gcn_address_register_p (XEXP (x, 0), DImode, false)
1544 && CONST_INT_P (XEXP (x, 1)))
1545 return true;
1546
1547 return false;
1548}
1549
1550/* Return true if MEM X would be valid for the Scalar Flat address space. */
1551
1552bool
1553gcn_scalar_flat_mem_p (rtx x)
1554{
1555 if (!MEM_P (x))
1556 return false;
1557
1558 if (GET_MODE_SIZE (GET_MODE (x)) < 4)
1559 return false;
1560
1561 return gcn_scalar_flat_address_p (XEXP (x, 0));
1562}
1563
1564/* Return true if X would be valid inside a MEM using the LDS or GDS
1565 address spaces. */
1566
1567bool
1568gcn_ds_address_p (rtx x)
1569{
1570 if (gcn_vec_address_register_p (x, SImode, false))
1571 return true;
1572
1573 if (GET_CODE (x) == PLUS
1574 && gcn_vec_address_register_p (XEXP (x, 0), SImode, false)
1575 && CONST_INT_P (XEXP (x, 1)))
1576 return true;
1577
1578 return false;
1579}
1580
1581/* Return true if ADDR would be valid inside a MEM using the Global
1582 address space. */
1583
1584bool
1585gcn_global_address_p (rtx addr)
1586{
1587 if (gcn_address_register_p (addr, DImode, false)
1588 || gcn_vec_address_register_p (addr, DImode, false))
1589 return true;
1590
1591 if (GET_CODE (addr) == PLUS)
1592 {
1593 rtx base = XEXP (addr, 0);
1594 rtx offset = XEXP (addr, 1);
c7ec7bd1 1595 int offsetbits = (TARGET_RDNA2 ? 11 : 12);
5326695a
AS
1596 bool immediate_p = (CONST_INT_P (offset)
1597 && INTVAL (offset) >= -(1 << 12)
1598 && INTVAL (offset) < (1 << 12));
1599
1600 if ((gcn_address_register_p (base, DImode, false)
1601 || gcn_vec_address_register_p (base, DImode, false))
1602 && immediate_p)
1603 /* SGPR + CONST or VGPR + CONST */
1604 return true;
1605
1606 if (gcn_address_register_p (base, DImode, false)
1607 && gcn_vgpr_register_operand (offset, SImode))
1608 /* SPGR + VGPR */
1609 return true;
1610
1611 if (GET_CODE (base) == PLUS
1612 && gcn_address_register_p (XEXP (base, 0), DImode, false)
1613 && gcn_vgpr_register_operand (XEXP (base, 1), SImode)
1614 && immediate_p)
1615 /* (SGPR + VGPR) + CONST */
1616 return true;
1617 }
1618
1619 return false;
1620}
1621
1622/* Implement TARGET_ADDR_SPACE_LEGITIMATE_ADDRESS_P.
1623
1624 Recognizes RTL expressions that are valid memory addresses for an
1625 instruction. The MODE argument is the machine mode for the MEM
1626 expression that wants to use this address.
1627
1628 It only recognizes address in canonical form. LEGITIMIZE_ADDRESS should
1629 convert common non-canonical forms to canonical form so that they will
1630 be recognized. */
1631
1632static bool
1633gcn_addr_space_legitimate_address_p (machine_mode mode, rtx x, bool strict,
165b1f6a 1634 addr_space_t as, code_helper = ERROR_MARK)
5326695a
AS
1635{
1636 /* All vector instructions need to work on addresses in registers. */
1637 if (!TARGET_GCN5_PLUS && (vgpr_vector_mode_p (mode) && !REG_P (x)))
1638 return false;
1639
1640 if (AS_SCALAR_FLAT_P (as))
1641 {
1642 if (mode == QImode || mode == HImode)
1643 return 0;
1644
1645 switch (GET_CODE (x))
1646 {
1647 case REG:
1648 return gcn_address_register_p (x, DImode, strict);
1649 /* Addresses are in the form BASE+OFFSET
1650 OFFSET is either 20bit unsigned immediate, SGPR or M0.
1651 Writes and atomics do not accept SGPR. */
1652 case PLUS:
1653 {
1654 rtx x0 = XEXP (x, 0);
1655 rtx x1 = XEXP (x, 1);
1656 if (!gcn_address_register_p (x0, DImode, strict))
1657 return false;
1658 /* FIXME: This is disabled because of the mode mismatch between
1659 SImode (for the address or m0 register) and the DImode PLUS.
1660 We'll need a zero_extend or similar.
1661
1662 if (gcn_m0_register_p (x1, SImode, strict)
1663 || gcn_address_register_p (x1, SImode, strict))
1664 return true;
1665 else*/
1666 if (GET_CODE (x1) == CONST_INT)
1667 {
1668 if (INTVAL (x1) >= 0 && INTVAL (x1) < (1 << 20)
1669 /* The low bits of the offset are ignored, even when
1670 they're meant to realign the pointer. */
1671 && !(INTVAL (x1) & 0x3))
1672 return true;
1673 }
1674 return false;
1675 }
1676
1677 default:
1678 break;
1679 }
1680 }
1681 else if (AS_SCRATCH_P (as))
1682 return gcn_address_register_p (x, SImode, strict);
1683 else if (AS_FLAT_P (as) || AS_FLAT_SCRATCH_P (as))
1684 {
1685 if (TARGET_GCN3 || GET_CODE (x) == REG)
1686 return ((GET_MODE_CLASS (mode) == MODE_VECTOR_INT
1687 || GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT)
1688 ? gcn_address_register_p (x, DImode, strict)
1689 : gcn_vec_address_register_p (x, DImode, strict));
1690 else
1691 {
1692 gcc_assert (TARGET_GCN5_PLUS);
1693
1694 if (GET_CODE (x) == PLUS)
1695 {
1696 rtx x1 = XEXP (x, 1);
1697
1698 if (VECTOR_MODE_P (mode)
1699 ? !gcn_address_register_p (x, DImode, strict)
1700 : !gcn_vec_address_register_p (x, DImode, strict))
1701 return false;
1702
1703 if (GET_CODE (x1) == CONST_INT)
1704 {
1705 if (INTVAL (x1) >= 0 && INTVAL (x1) < (1 << 12)
1706 /* The low bits of the offset are ignored, even when
1707 they're meant to realign the pointer. */
1708 && !(INTVAL (x1) & 0x3))
1709 return true;
1710 }
1711 }
1712 return false;
1713 }
1714 }
1715 else if (AS_GLOBAL_P (as))
1716 {
1717 gcc_assert (TARGET_GCN5_PLUS);
1718
1719 if (GET_CODE (x) == REG)
1720 return (gcn_address_register_p (x, DImode, strict)
1721 || (!VECTOR_MODE_P (mode)
1722 && gcn_vec_address_register_p (x, DImode, strict)));
1723 else if (GET_CODE (x) == PLUS)
1724 {
1725 rtx base = XEXP (x, 0);
1726 rtx offset = XEXP (x, 1);
1727
c7ec7bd1 1728 int offsetbits = (TARGET_RDNA2 ? 11 : 12);
5326695a 1729 bool immediate_p = (GET_CODE (offset) == CONST_INT
c7ec7bd1
AS
1730 /* Signed 12/13-bit immediate. */
1731 && INTVAL (offset) >= -(1 << offsetbits)
1732 && INTVAL (offset) < (1 << offsetbits)
5326695a
AS
1733 /* The low bits of the offset are ignored, even
1734 when they're meant to realign the pointer. */
1735 && !(INTVAL (offset) & 0x3));
1736
1737 if (!VECTOR_MODE_P (mode))
1738 {
1739 if ((gcn_address_register_p (base, DImode, strict)
1740 || gcn_vec_address_register_p (base, DImode, strict))
1741 && immediate_p)
1742 /* SGPR + CONST or VGPR + CONST */
1743 return true;
1744
1745 if (gcn_address_register_p (base, DImode, strict)
1746 && gcn_vgpr_register_operand (offset, SImode))
1747 /* SGPR + VGPR */
1748 return true;
1749
1750 if (GET_CODE (base) == PLUS
1751 && gcn_address_register_p (XEXP (base, 0), DImode, strict)
1752 && gcn_vgpr_register_operand (XEXP (base, 1), SImode)
1753 && immediate_p)
1754 /* (SGPR + VGPR) + CONST */
1755 return true;
1756 }
1757 else
1758 {
1759 if (gcn_address_register_p (base, DImode, strict)
1760 && immediate_p)
1761 /* SGPR + CONST */
1762 return true;
1763 }
1764 }
1765 else
1766 return false;
1767 }
1768 else if (AS_ANY_DS_P (as))
1769 switch (GET_CODE (x))
1770 {
1771 case REG:
1772 return (VECTOR_MODE_P (mode)
1773 ? gcn_address_register_p (x, SImode, strict)
1774 : gcn_vec_address_register_p (x, SImode, strict));
1775 /* Addresses are in the form BASE+OFFSET
1776 OFFSET is either 20bit unsigned immediate, SGPR or M0.
1777 Writes and atomics do not accept SGPR. */
1778 case PLUS:
1779 {
1780 rtx x0 = XEXP (x, 0);
1781 rtx x1 = XEXP (x, 1);
1782 if (!gcn_vec_address_register_p (x0, DImode, strict))
1783 return false;
1784 if (GET_CODE (x1) == REG)
1785 {
1786 if (GET_CODE (x1) != REG
1787 || (REGNO (x1) <= FIRST_PSEUDO_REGISTER
1788 && !gcn_ssrc_register_operand (x1, DImode)))
1789 return false;
1790 }
1791 else if (GET_CODE (x1) == CONST_VECTOR
1792 && GET_CODE (CONST_VECTOR_ELT (x1, 0)) == CONST_INT
1793 && single_cst_vector_p (x1))
1794 {
1795 x1 = CONST_VECTOR_ELT (x1, 0);
1796 if (INTVAL (x1) >= 0 && INTVAL (x1) < (1 << 20))
1797 return true;
1798 }
1799 return false;
1800 }
1801
1802 default:
1803 break;
1804 }
1805 else
1806 gcc_unreachable ();
1807 return false;
1808}
1809
1810/* Implement TARGET_ADDR_SPACE_POINTER_MODE.
1811
1812 Return the appropriate mode for a named address pointer. */
1813
1814static scalar_int_mode
1815gcn_addr_space_pointer_mode (addr_space_t addrspace)
1816{
1817 switch (addrspace)
1818 {
1819 case ADDR_SPACE_SCRATCH:
1820 case ADDR_SPACE_LDS:
1821 case ADDR_SPACE_GDS:
1822 return SImode;
1823 case ADDR_SPACE_DEFAULT:
1824 case ADDR_SPACE_FLAT:
1825 case ADDR_SPACE_FLAT_SCRATCH:
1826 case ADDR_SPACE_SCALAR_FLAT:
1827 return DImode;
1828 default:
1829 gcc_unreachable ();
1830 }
1831}
1832
1833/* Implement TARGET_ADDR_SPACE_ADDRESS_MODE.
1834
1835 Return the appropriate mode for a named address space address. */
1836
1837static scalar_int_mode
1838gcn_addr_space_address_mode (addr_space_t addrspace)
1839{
1840 return gcn_addr_space_pointer_mode (addrspace);
1841}
1842
1843/* Implement TARGET_ADDR_SPACE_SUBSET_P.
1844
1845 Determine if one named address space is a subset of another. */
1846
1847static bool
1848gcn_addr_space_subset_p (addr_space_t subset, addr_space_t superset)
1849{
1850 if (subset == superset)
1851 return true;
1852 /* FIXME is this true? */
1853 if (AS_FLAT_P (superset) || AS_SCALAR_FLAT_P (superset))
1854 return true;
1855 return false;
1856}
1857
1858/* Convert from one address space to another. */
1859
1860static rtx
1861gcn_addr_space_convert (rtx op, tree from_type, tree to_type)
1862{
1863 gcc_assert (POINTER_TYPE_P (from_type));
1864 gcc_assert (POINTER_TYPE_P (to_type));
1865
1866 addr_space_t as_from = TYPE_ADDR_SPACE (TREE_TYPE (from_type));
1867 addr_space_t as_to = TYPE_ADDR_SPACE (TREE_TYPE (to_type));
1868
1869 if (AS_LDS_P (as_from) && AS_FLAT_P (as_to))
1870 {
f6fff8a6
AS
1871 /* The high bits of the QUEUE_PTR_ARG register are used by
1872 GCN_BUILTIN_FIRST_CALL_THIS_THREAD_P, so mask them out. */
1873 rtx queue_reg = gen_rtx_REG (DImode,
1874 cfun->machine->args.reg[QUEUE_PTR_ARG]);
1875 rtx queue_ptr = gen_reg_rtx (DImode);
1876 emit_insn (gen_anddi3 (queue_ptr, queue_reg, GEN_INT (0xffffffffffff)));
5326695a 1877 rtx group_seg_aperture_hi = gen_rtx_MEM (SImode,
f6fff8a6 1878 gen_rtx_PLUS (DImode, queue_ptr,
5326695a
AS
1879 gen_int_mode (64, SImode)));
1880 rtx tmp = gen_reg_rtx (DImode);
1881
1882 emit_move_insn (gen_lowpart (SImode, tmp), op);
1883 emit_move_insn (gen_highpart_mode (SImode, DImode, tmp),
1884 group_seg_aperture_hi);
1885
1886 return tmp;
1887 }
1888 else if (as_from == as_to)
1889 return op;
1890 else
1891 gcc_unreachable ();
1892}
1893
b5bb7f32
HAQ
1894/* Implement TARGET_ADDR_SPACE_DEBUG.
1895
1896 Return the dwarf address space class for each hardware address space. */
1897
1898static int
1899gcn_addr_space_debug (addr_space_t as)
1900{
1901 switch (as)
1902 {
1903 case ADDR_SPACE_DEFAULT:
1904 case ADDR_SPACE_FLAT:
1905 case ADDR_SPACE_SCALAR_FLAT:
1906 case ADDR_SPACE_FLAT_SCRATCH:
1907 return DW_ADDR_none;
1908 case ADDR_SPACE_GLOBAL:
1909 return 1; // DW_ADDR_LLVM_global
1910 case ADDR_SPACE_LDS:
1911 return 3; // DW_ADDR_LLVM_group
1912 case ADDR_SPACE_SCRATCH:
1913 return 4; // DW_ADDR_LLVM_private
1914 case ADDR_SPACE_GDS:
1915 return 0x8000; // DW_ADDR_AMDGPU_region
1916 }
1917 gcc_unreachable ();
1918}
1919
5326695a
AS
1920
1921/* Implement REGNO_MODE_CODE_OK_FOR_BASE_P via gcn.h
1922
1923 Retun true if REGNO is OK for memory adressing. */
1924
1925bool
1926gcn_regno_mode_code_ok_for_base_p (int regno,
1927 machine_mode, addr_space_t as, int, int)
1928{
1929 if (regno >= FIRST_PSEUDO_REGISTER)
1930 {
1931 if (reg_renumber)
1932 regno = reg_renumber[regno];
1933 else
1934 return true;
1935 }
1936 if (AS_FLAT_P (as))
1937 return (VGPR_REGNO_P (regno)
1938 || regno == ARG_POINTER_REGNUM || regno == FRAME_POINTER_REGNUM);
1939 else if (AS_SCALAR_FLAT_P (as))
1940 return (SGPR_REGNO_P (regno)
1941 || regno == ARG_POINTER_REGNUM || regno == FRAME_POINTER_REGNUM);
1942 else if (AS_GLOBAL_P (as))
1943 {
1944 return (SGPR_REGNO_P (regno)
1945 || VGPR_REGNO_P (regno)
1946 || regno == ARG_POINTER_REGNUM
1947 || regno == FRAME_POINTER_REGNUM);
1948 }
1949 else
1950 /* For now. */
1951 return false;
1952}
1953
1954/* Implement MODE_CODE_BASE_REG_CLASS via gcn.h.
1955
1956 Return a suitable register class for memory addressing. */
1957
1958reg_class
1959gcn_mode_code_base_reg_class (machine_mode mode, addr_space_t as, int oc,
1960 int ic)
1961{
1962 switch (as)
1963 {
1964 case ADDR_SPACE_DEFAULT:
1965 return gcn_mode_code_base_reg_class (mode, DEFAULT_ADDR_SPACE, oc, ic);
1966 case ADDR_SPACE_SCALAR_FLAT:
1967 case ADDR_SPACE_SCRATCH:
1968 return SGPR_REGS;
1969 break;
1970 case ADDR_SPACE_FLAT:
1971 case ADDR_SPACE_FLAT_SCRATCH:
1972 case ADDR_SPACE_LDS:
1973 case ADDR_SPACE_GDS:
1974 return ((GET_MODE_CLASS (mode) == MODE_VECTOR_INT
1975 || GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT)
1976 ? SGPR_REGS : VGPR_REGS);
1977 case ADDR_SPACE_GLOBAL:
1978 return ((GET_MODE_CLASS (mode) == MODE_VECTOR_INT
1979 || GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT)
1980 ? SGPR_REGS : ALL_GPR_REGS);
1981 }
1982 gcc_unreachable ();
1983}
1984
1985/* Implement REGNO_OK_FOR_INDEX_P via gcn.h.
1986
1987 Return true if REGNO is OK for index of memory addressing. */
1988
1989bool
1990regno_ok_for_index_p (int regno)
1991{
1992 if (regno >= FIRST_PSEUDO_REGISTER)
1993 {
1994 if (reg_renumber)
1995 regno = reg_renumber[regno];
1996 else
1997 return true;
1998 }
1999 return regno == M0_REG || VGPR_REGNO_P (regno);
2000}
2001
5326695a
AS
2002/* Expand vector init of OP0 by VEC.
2003 Implements vec_init instruction pattern. */
2004
2005void
2006gcn_expand_vector_init (rtx op0, rtx vec)
2007{
769a10d0 2008 rtx val[64];
5326695a 2009 machine_mode mode = GET_MODE (op0);
45381d6f 2010 int vf = GET_MODE_NUNITS (mode);
769a10d0
AS
2011 machine_mode addrmode = VnMODE (vf, DImode);
2012 machine_mode offsetmode = VnMODE (vf, SImode);
5326695a 2013
769a10d0
AS
2014 int64_t mem_mask = 0;
2015 int64_t item_mask[64];
2016 rtx ramp = gen_reg_rtx (offsetmode);
2017 rtx addr = gen_reg_rtx (addrmode);
5326695a 2018
769a10d0
AS
2019 int unit_size = GET_MODE_SIZE (GET_MODE_INNER (GET_MODE (op0)));
2020 emit_insn (gen_mulvNsi3_dup (ramp, gen_rtx_REG (offsetmode, VGPR_REGNO (1)),
2021 GEN_INT (unit_size)));
5326695a 2022
769a10d0
AS
2023 bool simple_repeat = true;
2024
2025 /* Expand nested vectors into one vector. */
2026 int item_count = XVECLEN (vec, 0);
2027 for (int i = 0, j = 0; i < item_count; i++)
5326695a 2028 {
769a10d0
AS
2029 rtx item = XVECEXP (vec, 0, i);
2030 machine_mode mode = GET_MODE (item);
2031 int units = VECTOR_MODE_P (mode) ? GET_MODE_NUNITS (mode) : 1;
2032 item_mask[j] = (((uint64_t)-1)>>(64-units)) << j;
2033
2034 if (simple_repeat && i != 0)
2035 simple_repeat = item == XVECEXP (vec, 0, i-1);
2036
2037 /* If its a vector of values then copy them into the final location. */
2038 if (GET_CODE (item) == CONST_VECTOR)
2039 {
2040 for (int k = 0; k < units; k++)
2041 val[j++] = XVECEXP (item, 0, k);
2042 continue;
2043 }
2044 /* Otherwise, we have a scalar or an expression that expands... */
2045
2046 if (MEM_P (item))
2047 {
2048 rtx base = XEXP (item, 0);
2049 if (MEM_ADDR_SPACE (item) == DEFAULT_ADDR_SPACE
2050 && REG_P (base))
2051 {
2052 /* We have a simple vector load. We can put the addresses in
2053 the vector, combine it with any other such MEMs, and load it
2054 all with a single gather at the end. */
2055 int64_t mask = ((0xffffffffffffffffUL
2056 >> (64-GET_MODE_NUNITS (mode)))
2057 << j);
2058 rtx exec = get_exec (mask);
2059 emit_insn (gen_subvNsi3
2060 (ramp, ramp,
2061 gcn_vec_constant (offsetmode, j*unit_size),
2062 ramp, exec));
2063 emit_insn (gen_addvNdi3_zext_dup2
2064 (addr, ramp, base,
2065 (mem_mask ? addr : gcn_gen_undef (addrmode)),
2066 exec));
2067 mem_mask |= mask;
2068 }
2069 else
2070 /* The MEM is non-trivial, so let's load it independently. */
2071 item = force_reg (mode, item);
2072 }
2073 else if (!CONST_INT_P (item) && !CONST_DOUBLE_P (item))
2074 /* The item may be a symbol_ref, or something else non-trivial. */
2075 item = force_reg (mode, item);
2076
2077 /* Duplicate the vector across each item.
2078 It is either a smaller vector register that needs shifting,
2079 or a MEM that needs loading. */
2080 val[j] = item;
2081 j += units;
5326695a 2082 }
769a10d0
AS
2083
2084 int64_t initialized_mask = 0;
2085 rtx prev = NULL;
2086
2087 if (mem_mask)
2088 {
2089 emit_insn (gen_gathervNm_expr
2090 (op0, gen_rtx_PLUS (addrmode, addr,
2091 gen_rtx_VEC_DUPLICATE (addrmode,
2092 const0_rtx)),
2093 GEN_INT (DEFAULT_ADDR_SPACE), GEN_INT (0),
2094 NULL, get_exec (mem_mask)));
2095 prev = op0;
2096 initialized_mask = mem_mask;
2097 }
2098
2099 if (simple_repeat && item_count > 1 && !prev)
2100 {
2101 /* Special case for instances of {A, B, A, B, A, B, ....}, etc. */
2102 rtx src = gen_rtx_SUBREG (mode, val[0], 0);
2103 rtx input_vf_mask = GEN_INT (GET_MODE_NUNITS (GET_MODE (val[0]))-1);
2104
2105 rtx permutation = gen_reg_rtx (VnMODE (vf, SImode));
2106 emit_insn (gen_vec_seriesvNsi (permutation, GEN_INT (0), GEN_INT (1)));
2107 rtx mask_dup = gen_reg_rtx (VnMODE (vf, SImode));
2108 emit_insn (gen_vec_duplicatevNsi (mask_dup, input_vf_mask));
2109 emit_insn (gen_andvNsi3 (permutation, permutation, mask_dup));
2110 emit_insn (gen_ashlvNsi3 (permutation, permutation, GEN_INT (2)));
2111 emit_insn (gen_ds_bpermutevNm (op0, permutation, src, get_exec (mode)));
2112 return;
2113 }
2114
2115 /* Write each value, elementwise, but coalesce matching values into one
2116 instruction, where possible. */
2117 for (int i = 0; i < vf; i++)
5326695a
AS
2118 if (!(initialized_mask & ((int64_t) 1 << i)))
2119 {
769a10d0
AS
2120 if (gcn_constant_p (val[i]))
2121 emit_insn (gen_movvNm (op0, gcn_vec_constant (mode, val[i]), prev,
2122 get_exec (item_mask[i])));
2123 else if (VECTOR_MODE_P (GET_MODE (val[i]))
2124 && (GET_MODE_NUNITS (GET_MODE (val[i])) == vf
2125 || i == 0))
2126 emit_insn (gen_movvNm (op0, gen_rtx_SUBREG (mode, val[i], 0), prev,
2127 get_exec (item_mask[i])));
2128 else if (VECTOR_MODE_P (GET_MODE (val[i])))
2129 {
2130 rtx permutation = gen_reg_rtx (VnMODE (vf, SImode));
2131 emit_insn (gen_vec_seriesvNsi (permutation, GEN_INT (-i*4),
2132 GEN_INT (4)));
2133 rtx tmp = gen_reg_rtx (mode);
2134 emit_insn (gen_ds_bpermutevNm (tmp, permutation,
2135 gen_rtx_SUBREG (mode, val[i], 0),
2136 get_exec (-1)));
2137 emit_insn (gen_movvNm (op0, tmp, prev, get_exec (item_mask[i])));
2138 }
5326695a
AS
2139 else
2140 {
769a10d0
AS
2141 rtx reg = force_reg (GET_MODE_INNER (mode), val[i]);
2142 emit_insn (gen_vec_duplicatevNm (op0, reg, prev,
2143 get_exec (item_mask[i])));
5326695a 2144 }
769a10d0
AS
2145
2146 initialized_mask |= item_mask[i];
2147 prev = op0;
5326695a
AS
2148 }
2149}
2150
2151/* Load vector constant where n-th lane contains BASE+n*VAL. */
2152
2153static rtx
2154strided_constant (machine_mode mode, int base, int val)
2155{
2156 rtx x = gen_reg_rtx (mode);
2157 emit_move_insn (x, gcn_vec_constant (mode, base));
45381d6f
AS
2158 emit_insn (gen_addvNm3 (x, x, gcn_vec_constant (mode, val * 32),
2159 x, get_exec (0xffffffff00000000)));
2160 emit_insn (gen_addvNm3 (x, x, gcn_vec_constant (mode, val * 16),
2161 x, get_exec (0xffff0000ffff0000)));
2162 emit_insn (gen_addvNm3 (x, x, gcn_vec_constant (mode, val * 8),
2163 x, get_exec (0xff00ff00ff00ff00)));
2164 emit_insn (gen_addvNm3 (x, x, gcn_vec_constant (mode, val * 4),
2165 x, get_exec (0xf0f0f0f0f0f0f0f0)));
2166 emit_insn (gen_addvNm3 (x, x, gcn_vec_constant (mode, val * 2),
2167 x, get_exec (0xcccccccccccccccc)));
2168 emit_insn (gen_addvNm3 (x, x, gcn_vec_constant (mode, val * 1),
2169 x, get_exec (0xaaaaaaaaaaaaaaaa)));
5326695a
AS
2170 return x;
2171}
2172
2173/* Implement TARGET_ADDR_SPACE_LEGITIMIZE_ADDRESS. */
2174
2175static rtx
2176gcn_addr_space_legitimize_address (rtx x, rtx old, machine_mode mode,
2177 addr_space_t as)
2178{
2179 switch (as)
2180 {
2181 case ADDR_SPACE_DEFAULT:
2182 return gcn_addr_space_legitimize_address (x, old, mode,
2183 DEFAULT_ADDR_SPACE);
2184 case ADDR_SPACE_SCALAR_FLAT:
2185 case ADDR_SPACE_SCRATCH:
2186 /* Instructions working on vectors need the address to be in
2187 a register. */
2188 if (vgpr_vector_mode_p (mode))
2189 return force_reg (GET_MODE (x), x);
2190
2191 return x;
2192 case ADDR_SPACE_FLAT:
2193 case ADDR_SPACE_FLAT_SCRATCH:
2194 case ADDR_SPACE_GLOBAL:
2195 return TARGET_GCN3 ? force_reg (DImode, x) : x;
2196 case ADDR_SPACE_LDS:
2197 case ADDR_SPACE_GDS:
2198 /* FIXME: LDS support offsets, handle them!. */
45381d6f
AS
2199 if (vgpr_vector_mode_p (mode)
2200 && GET_MODE_INNER (GET_MODE (x)) != SImode)
5326695a 2201 {
45381d6f
AS
2202 machine_mode simode = VnMODE (GET_MODE_NUNITS (mode), SImode);
2203 rtx addrs = gen_reg_rtx (simode);
5326695a 2204 rtx base = force_reg (SImode, x);
45381d6f 2205 rtx offsets = strided_constant (simode, 0,
5326695a
AS
2206 GET_MODE_UNIT_SIZE (mode));
2207
45381d6f
AS
2208 emit_insn (gen_vec_duplicatevNsi (addrs, base));
2209 emit_insn (gen_addvNsi3 (addrs, offsets, addrs));
5326695a
AS
2210 return addrs;
2211 }
2212 return x;
2213 }
2214 gcc_unreachable ();
2215}
2216
45381d6f 2217/* Convert a (mem:<MODE> (reg:DI)) to (mem:<MODE> (reg:VnDI)) with the
5326695a
AS
2218 proper vector of stepped addresses.
2219
2220 MEM will be a DImode address of a vector in an SGPR.
45381d6f 2221 TMP will be a VnDImode VGPR pair or (scratch:VnDI). */
5326695a
AS
2222
2223rtx
2224gcn_expand_scalar_to_vector_address (machine_mode mode, rtx exec, rtx mem,
2225 rtx tmp)
2226{
45381d6f
AS
2227 machine_mode pmode = VnMODE (GET_MODE_NUNITS (mode), DImode);
2228 machine_mode offmode = VnMODE (GET_MODE_NUNITS (mode), SImode);
5326695a
AS
2229 gcc_assert (MEM_P (mem));
2230 rtx mem_base = XEXP (mem, 0);
2231 rtx mem_index = NULL_RTX;
2232
2233 if (!TARGET_GCN5_PLUS)
2234 {
2235 /* gcn_addr_space_legitimize_address should have put the address in a
2236 register. If not, it is too late to do anything about it. */
2237 gcc_assert (REG_P (mem_base));
2238 }
2239
2240 if (GET_CODE (mem_base) == PLUS)
2241 {
2242 mem_index = XEXP (mem_base, 1);
2243 mem_base = XEXP (mem_base, 0);
2244 }
2245
2246 /* RF and RM base registers for vector modes should be always an SGPR. */
2247 gcc_assert (SGPR_REGNO_P (REGNO (mem_base))
2248 || REGNO (mem_base) >= FIRST_PSEUDO_REGISTER);
2249
2250 machine_mode inner = GET_MODE_INNER (mode);
2251 int shift = exact_log2 (GET_MODE_SIZE (inner));
45381d6f 2252 rtx ramp = gen_rtx_REG (offmode, VGPR_REGNO (1));
5326695a
AS
2253 rtx new_base = NULL_RTX;
2254 addr_space_t as = MEM_ADDR_SPACE (mem);
2255
2256 rtx tmplo = (REG_P (tmp)
45381d6f
AS
2257 ? gcn_operand_part (pmode, tmp, 0)
2258 : gen_reg_rtx (offmode));
5326695a
AS
2259
2260 /* tmplo[:] = ramp[:] << shift */
45381d6f
AS
2261 emit_insn (gen_ashlvNsi3 (tmplo, ramp,
2262 gen_int_mode (shift, SImode),
2263 NULL, exec));
5326695a
AS
2264
2265 if (AS_FLAT_P (as))
2266 {
75d0b3d7
AS
2267 rtx vcc = gen_rtx_REG (DImode, CC_SAVE_REG);
2268
5326695a
AS
2269 if (REG_P (tmp))
2270 {
5326695a
AS
2271 rtx mem_base_lo = gcn_operand_part (DImode, mem_base, 0);
2272 rtx mem_base_hi = gcn_operand_part (DImode, mem_base, 1);
45381d6f 2273 rtx tmphi = gcn_operand_part (pmode, tmp, 1);
5326695a
AS
2274
2275 /* tmphi[:] = mem_base_hi */
45381d6f 2276 emit_insn (gen_vec_duplicatevNsi (tmphi, mem_base_hi, NULL, exec));
5326695a
AS
2277
2278 /* tmp[:] += zext (mem_base) */
2279 if (exec)
2280 {
45381d6f
AS
2281 emit_insn (gen_addvNsi3_vcc_dup (tmplo, mem_base_lo, tmplo,
2282 vcc, NULL, exec));
2283 emit_insn (gen_addcvNsi3 (tmphi, tmphi, const0_rtx,
2284 vcc, vcc, NULL, exec));
5326695a
AS
2285 }
2286 else
45381d6f 2287 emit_insn (gen_addvNdi3_vcc_zext_dup (tmp, mem_base_lo, tmp, vcc));
5326695a
AS
2288 }
2289 else
2290 {
45381d6f
AS
2291 tmp = gen_reg_rtx (pmode);
2292 emit_insn (gen_addvNdi3_vcc_zext_dup2 (tmp, tmplo, mem_base, vcc,
2293 NULL, exec));
5326695a
AS
2294 }
2295
2296 new_base = tmp;
2297 }
2298 else if (AS_ANY_DS_P (as))
2299 {
45381d6f 2300 emit_insn (gen_addvNsi3_dup (tmplo, tmplo, mem_base, NULL, exec));
5326695a
AS
2301 new_base = tmplo;
2302 }
2303 else
2304 {
45381d6f
AS
2305 mem_base = gen_rtx_VEC_DUPLICATE (pmode, mem_base);
2306 new_base = gen_rtx_PLUS (pmode, mem_base,
2307 gen_rtx_SIGN_EXTEND (pmode, tmplo));
5326695a
AS
2308 }
2309
2310 return gen_rtx_PLUS (GET_MODE (new_base), new_base,
2311 gen_rtx_VEC_DUPLICATE (GET_MODE (new_base),
2312 (mem_index ? mem_index
2313 : const0_rtx)));
2314}
2315
2316/* Convert a BASE address, a vector of OFFSETS, and a SCALE, to addresses
2317 suitable for the given address space. This is indented for use in
2318 gather/scatter patterns.
2319
2320 The offsets may be signed or unsigned, according to UNSIGNED_P.
2321 If EXEC is set then _exec patterns will be used, otherwise plain.
2322
2323 Return values.
45381d6f
AS
2324 ADDR_SPACE_FLAT - return VnDImode vector of absolute addresses.
2325 ADDR_SPACE_GLOBAL - return VnSImode vector of offsets. */
5326695a
AS
2326
2327rtx
2328gcn_expand_scaled_offsets (addr_space_t as, rtx base, rtx offsets, rtx scale,
2329 bool unsigned_p, rtx exec)
2330{
45381d6f
AS
2331 int vf = GET_MODE_NUNITS (GET_MODE (offsets));
2332 rtx tmpsi = gen_reg_rtx (VnMODE (vf, SImode));
2333 rtx tmpdi = gen_reg_rtx (VnMODE (vf, DImode));
5326695a
AS
2334
2335 if (CONST_INT_P (scale)
2336 && INTVAL (scale) > 0
2337 && exact_log2 (INTVAL (scale)) >= 0)
45381d6f
AS
2338 emit_insn (gen_ashlvNsi3 (tmpsi, offsets,
2339 GEN_INT (exact_log2 (INTVAL (scale))),
2340 NULL, exec));
5326695a 2341 else
45381d6f 2342 emit_insn (gen_mulvNsi3_dup (tmpsi, offsets, scale, NULL, exec));
5326695a
AS
2343
2344 /* "Global" instructions do not support negative register offsets. */
2345 if (as == ADDR_SPACE_FLAT || !unsigned_p)
2346 {
2347 if (unsigned_p)
45381d6f 2348 emit_insn (gen_addvNdi3_zext_dup2 (tmpdi, tmpsi, base, NULL, exec));
5326695a 2349 else
45381d6f 2350 emit_insn (gen_addvNdi3_sext_dup2 (tmpdi, tmpsi, base, NULL, exec));
5326695a
AS
2351 return tmpdi;
2352 }
2353 else if (as == ADDR_SPACE_GLOBAL)
2354 return tmpsi;
2355
2356 gcc_unreachable ();
2357}
2358
2359/* Return true if move from OP0 to OP1 is known to be executed in vector
2360 unit. */
2361
2362bool
2363gcn_vgpr_move_p (rtx op0, rtx op1)
2364{
2365 if (MEM_P (op0) && AS_SCALAR_FLAT_P (MEM_ADDR_SPACE (op0)))
2366 return true;
2367 if (MEM_P (op1) && AS_SCALAR_FLAT_P (MEM_ADDR_SPACE (op1)))
2368 return true;
2369 return ((REG_P (op0) && VGPR_REGNO_P (REGNO (op0)))
2370 || (REG_P (op1) && VGPR_REGNO_P (REGNO (op1)))
2371 || vgpr_vector_mode_p (GET_MODE (op0)));
2372}
2373
2374/* Return true if move from OP0 to OP1 is known to be executed in scalar
2375 unit. Used in the machine description. */
2376
2377bool
2378gcn_sgpr_move_p (rtx op0, rtx op1)
2379{
2380 if (MEM_P (op0) && AS_SCALAR_FLAT_P (MEM_ADDR_SPACE (op0)))
2381 return true;
2382 if (MEM_P (op1) && AS_SCALAR_FLAT_P (MEM_ADDR_SPACE (op1)))
2383 return true;
ae0d2c24
AS
2384 if (!REG_P (op0)
2385 || REGNO (op0) >= FIRST_PSEUDO_REGISTER
2386 || VGPR_REGNO_P (REGNO (op0))
2387 || AVGPR_REGNO_P (REGNO (op0)))
5326695a
AS
2388 return false;
2389 if (REG_P (op1)
2390 && REGNO (op1) < FIRST_PSEUDO_REGISTER
ae0d2c24
AS
2391 && !VGPR_REGNO_P (REGNO (op1))
2392 && !AVGPR_REGNO_P (REGNO (op1)))
5326695a
AS
2393 return true;
2394 return immediate_operand (op1, VOIDmode) || memory_operand (op1, VOIDmode);
2395}
2396
2397/* Implement TARGET_SECONDARY_RELOAD.
2398
2399 The address space determines which registers can be used for loads and
2400 stores. */
2401
2402static reg_class_t
2403gcn_secondary_reload (bool in_p, rtx x, reg_class_t rclass,
2404 machine_mode reload_mode, secondary_reload_info *sri)
2405{
2406 reg_class_t result = NO_REGS;
2407 bool spilled_pseudo =
2408 (REG_P (x) || GET_CODE (x) == SUBREG) && true_regnum (x) == -1;
2409
2410 if (dump_file && (dump_flags & TDF_DETAILS))
2411 {
2412 fprintf (dump_file, "gcn_secondary_reload: ");
2413 dump_value_slim (dump_file, x, 1);
2414 fprintf (dump_file, " %s %s:%s", (in_p ? "->" : "<-"),
2415 reg_class_names[rclass], GET_MODE_NAME (reload_mode));
2416 if (REG_P (x) || GET_CODE (x) == SUBREG)
2417 fprintf (dump_file, " (true regnum: %d \"%s\")", true_regnum (x),
2418 (true_regnum (x) >= 0
2419 && true_regnum (x) < FIRST_PSEUDO_REGISTER
2420 ? reg_names[true_regnum (x)]
2421 : (spilled_pseudo ? "stack spill" : "??")));
2422 fprintf (dump_file, "\n");
2423 }
2424
2425 /* Some callers don't use or initialize icode. */
2426 sri->icode = CODE_FOR_nothing;
2427
2428 if (MEM_P (x) || spilled_pseudo)
2429 {
2430 addr_space_t as = DEFAULT_ADDR_SPACE;
2431
2432 /* If we have a spilled pseudo, we can't find the address space
2433 directly, but we know it's in ADDR_SPACE_FLAT space for GCN3 or
2434 ADDR_SPACE_GLOBAL for GCN5. */
2435 if (MEM_P (x))
2436 as = MEM_ADDR_SPACE (x);
2437
2438 if (as == ADDR_SPACE_DEFAULT)
2439 as = DEFAULT_ADDR_SPACE;
2440
2441 switch (as)
2442 {
2443 case ADDR_SPACE_SCALAR_FLAT:
2444 result =
2445 ((!MEM_P (x) || rclass == SGPR_REGS) ? NO_REGS : SGPR_REGS);
2446 break;
2447 case ADDR_SPACE_FLAT:
2448 case ADDR_SPACE_FLAT_SCRATCH:
2449 case ADDR_SPACE_GLOBAL:
2450 if (GET_MODE_CLASS (reload_mode) == MODE_VECTOR_INT
2451 || GET_MODE_CLASS (reload_mode) == MODE_VECTOR_FLOAT)
2452 {
a0e6306b 2453 sri->icode = code_for_mov_sgprbase (reload_mode);
5326695a
AS
2454 break;
2455 }
2456 /* Fallthrough. */
2457 case ADDR_SPACE_LDS:
2458 case ADDR_SPACE_GDS:
2459 case ADDR_SPACE_SCRATCH:
2460 result = (rclass == VGPR_REGS ? NO_REGS : VGPR_REGS);
2461 break;
2462 }
ae0d2c24
AS
2463
2464 /* CDNA1 doesn't have an instruction for going between the accumulator
2465 registers and memory. Go via a VGPR in this case. */
2466 if (TARGET_CDNA1 && rclass == AVGPR_REGS && result != VGPR_REGS)
2467 result = VGPR_REGS;
5326695a
AS
2468 }
2469
2470 if (dump_file && (dump_flags & TDF_DETAILS))
2471 fprintf (dump_file, " <= %s (icode: %s)\n", reg_class_names[result],
2472 get_insn_name (sri->icode));
2473
2474 return result;
2475}
2476
2477/* Update register usage after having seen the compiler flags and kernel
2478 attributes. We typically want to fix registers that contain values
2479 set by the HSA runtime. */
2480
2481static void
2482gcn_conditional_register_usage (void)
2483{
342f9464
KCY
2484 if (!cfun || !cfun->machine)
2485 return;
5326695a 2486
342f9464
KCY
2487 if (cfun->machine->normal_function)
2488 {
ae0d2c24
AS
2489 /* Restrict the set of SGPRs, VGPRs and AVGPRs used by non-kernel
2490 functions. */
f062c3f1 2491 for (int i = SGPR_REGNO (MAX_NORMAL_SGPR_COUNT);
87fdbe69 2492 i <= LAST_SGPR_REG; i++)
342f9464 2493 fixed_regs[i] = 1, call_used_regs[i] = 1;
5326695a 2494
87fdbe69
KCY
2495 for (int i = VGPR_REGNO (MAX_NORMAL_VGPR_COUNT);
2496 i <= LAST_VGPR_REG; i++)
342f9464 2497 fixed_regs[i] = 1, call_used_regs[i] = 1;
5326695a 2498
ae0d2c24
AS
2499 for (int i = AVGPR_REGNO (MAX_NORMAL_AVGPR_COUNT);
2500 i <= LAST_AVGPR_REG; i++)
2501 fixed_regs[i] = 1, call_used_regs[i] = 1;
5326695a
AS
2502 return;
2503 }
2504
342f9464
KCY
2505 /* If the set of requested args is the default set, nothing more needs to
2506 be done. */
2507 if (cfun->machine->args.requested == default_requested_args)
2508 return;
2509
2510 /* Requesting a set of args different from the default violates the ABI. */
2511 if (!leaf_function_p ())
2512 warning (0, "A non-default set of initial values has been requested, "
55308fc2 2513 "which violates the ABI");
342f9464
KCY
2514
2515 for (int i = SGPR_REGNO (0); i < SGPR_REGNO (14); i++)
2516 fixed_regs[i] = 0;
2517
5326695a
AS
2518 /* Fix the runtime argument register containing values that may be
2519 needed later. DISPATCH_PTR_ARG and FLAT_SCRATCH_* should not be
2520 needed after the prologue so there's no need to fix them. */
2521 if (cfun->machine->args.reg[PRIVATE_SEGMENT_WAVE_OFFSET_ARG] >= 0)
2522 fixed_regs[cfun->machine->args.reg[PRIVATE_SEGMENT_WAVE_OFFSET_ARG]] = 1;
2523 if (cfun->machine->args.reg[PRIVATE_SEGMENT_BUFFER_ARG] >= 0)
2524 {
342f9464
KCY
2525 /* The upper 32-bits of the 64-bit descriptor are not used, so allow
2526 the containing registers to be used for other purposes. */
5326695a
AS
2527 fixed_regs[cfun->machine->args.reg[PRIVATE_SEGMENT_BUFFER_ARG]] = 1;
2528 fixed_regs[cfun->machine->args.reg[PRIVATE_SEGMENT_BUFFER_ARG] + 1] = 1;
5326695a
AS
2529 }
2530 if (cfun->machine->args.reg[KERNARG_SEGMENT_PTR_ARG] >= 0)
2531 {
2532 fixed_regs[cfun->machine->args.reg[KERNARG_SEGMENT_PTR_ARG]] = 1;
2533 fixed_regs[cfun->machine->args.reg[KERNARG_SEGMENT_PTR_ARG] + 1] = 1;
2534 }
2535 if (cfun->machine->args.reg[DISPATCH_PTR_ARG] >= 0)
2536 {
2537 fixed_regs[cfun->machine->args.reg[DISPATCH_PTR_ARG]] = 1;
2538 fixed_regs[cfun->machine->args.reg[DISPATCH_PTR_ARG] + 1] = 1;
2539 }
f6fff8a6
AS
2540 if (cfun->machine->args.reg[QUEUE_PTR_ARG] >= 0)
2541 {
2542 fixed_regs[cfun->machine->args.reg[QUEUE_PTR_ARG]] = 1;
2543 fixed_regs[cfun->machine->args.reg[QUEUE_PTR_ARG] + 1] = 1;
2544 }
5326695a
AS
2545 if (cfun->machine->args.reg[WORKGROUP_ID_X_ARG] >= 0)
2546 fixed_regs[cfun->machine->args.reg[WORKGROUP_ID_X_ARG]] = 1;
2547 if (cfun->machine->args.reg[WORK_ITEM_ID_X_ARG] >= 0)
2548 fixed_regs[cfun->machine->args.reg[WORK_ITEM_ID_X_ARG]] = 1;
2549 if (cfun->machine->args.reg[WORK_ITEM_ID_Y_ARG] >= 0)
2550 fixed_regs[cfun->machine->args.reg[WORK_ITEM_ID_Y_ARG]] = 1;
2551 if (cfun->machine->args.reg[WORK_ITEM_ID_Z_ARG] >= 0)
2552 fixed_regs[cfun->machine->args.reg[WORK_ITEM_ID_Z_ARG]] = 1;
5326695a
AS
2553}
2554
ae0d2c24
AS
2555static bool
2556gcn_vgpr_equivalent_register_operand (rtx x, machine_mode mode)
2557{
2558 if (gcn_vgpr_register_operand (x, mode))
2559 return true;
2560 if (TARGET_CDNA2_PLUS && gcn_avgpr_register_operand (x, mode))
2561 return true;
2562 return false;
2563}
2564
5326695a
AS
2565/* Determine if a load or store is valid, according to the register classes
2566 and address space. Used primarily by the machine description to decide
2567 when to split a move into two steps. */
2568
2569bool
2570gcn_valid_move_p (machine_mode mode, rtx dest, rtx src)
2571{
2572 if (!MEM_P (dest) && !MEM_P (src))
ae0d2c24
AS
2573 {
2574 if (gcn_vgpr_register_operand (src, mode)
2575 && gcn_avgpr_register_operand (dest, mode))
2576 return true;
2577 if (gcn_avgpr_register_operand (src, mode)
2578 && gcn_vgpr_register_operand (dest, mode))
2579 return true;
2580 if (TARGET_CDNA2_PLUS
2581 && gcn_avgpr_register_operand (src, mode)
2582 && gcn_avgpr_register_operand (dest, mode))
2583 return true;
2584 if (gcn_avgpr_hard_register_operand (src, mode)
2585 || gcn_avgpr_hard_register_operand (dest, mode))
2586 return false;
2587 return true;
2588 }
5326695a
AS
2589
2590 if (MEM_P (dest)
2591 && AS_FLAT_P (MEM_ADDR_SPACE (dest))
2592 && (gcn_flat_address_p (XEXP (dest, 0), mode)
2593 || GET_CODE (XEXP (dest, 0)) == SYMBOL_REF
2594 || GET_CODE (XEXP (dest, 0)) == LABEL_REF)
ae0d2c24 2595 && gcn_vgpr_equivalent_register_operand (src, mode))
5326695a
AS
2596 return true;
2597 else if (MEM_P (src)
2598 && AS_FLAT_P (MEM_ADDR_SPACE (src))
2599 && (gcn_flat_address_p (XEXP (src, 0), mode)
2600 || GET_CODE (XEXP (src, 0)) == SYMBOL_REF
2601 || GET_CODE (XEXP (src, 0)) == LABEL_REF)
ae0d2c24 2602 && gcn_vgpr_equivalent_register_operand (dest, mode))
5326695a
AS
2603 return true;
2604
2605 if (MEM_P (dest)
2606 && AS_GLOBAL_P (MEM_ADDR_SPACE (dest))
2607 && (gcn_global_address_p (XEXP (dest, 0))
2608 || GET_CODE (XEXP (dest, 0)) == SYMBOL_REF
2609 || GET_CODE (XEXP (dest, 0)) == LABEL_REF)
ae0d2c24 2610 && gcn_vgpr_equivalent_register_operand (src, mode))
5326695a
AS
2611 return true;
2612 else if (MEM_P (src)
2613 && AS_GLOBAL_P (MEM_ADDR_SPACE (src))
2614 && (gcn_global_address_p (XEXP (src, 0))
2615 || GET_CODE (XEXP (src, 0)) == SYMBOL_REF
2616 || GET_CODE (XEXP (src, 0)) == LABEL_REF)
ae0d2c24 2617 && gcn_vgpr_equivalent_register_operand (dest, mode))
5326695a
AS
2618 return true;
2619
2620 if (MEM_P (dest)
2621 && MEM_ADDR_SPACE (dest) == ADDR_SPACE_SCALAR_FLAT
2622 && (gcn_scalar_flat_address_p (XEXP (dest, 0))
2623 || GET_CODE (XEXP (dest, 0)) == SYMBOL_REF
2624 || GET_CODE (XEXP (dest, 0)) == LABEL_REF)
2625 && gcn_ssrc_register_operand (src, mode))
2626 return true;
2627 else if (MEM_P (src)
2628 && MEM_ADDR_SPACE (src) == ADDR_SPACE_SCALAR_FLAT
2629 && (gcn_scalar_flat_address_p (XEXP (src, 0))
2630 || GET_CODE (XEXP (src, 0)) == SYMBOL_REF
2631 || GET_CODE (XEXP (src, 0)) == LABEL_REF)
2632 && gcn_sdst_register_operand (dest, mode))
2633 return true;
2634
2635 if (MEM_P (dest)
2636 && AS_ANY_DS_P (MEM_ADDR_SPACE (dest))
2637 && gcn_ds_address_p (XEXP (dest, 0))
ae0d2c24 2638 && gcn_vgpr_equivalent_register_operand (src, mode))
5326695a
AS
2639 return true;
2640 else if (MEM_P (src)
2641 && AS_ANY_DS_P (MEM_ADDR_SPACE (src))
2642 && gcn_ds_address_p (XEXP (src, 0))
ae0d2c24 2643 && gcn_vgpr_equivalent_register_operand (dest, mode))
5326695a
AS
2644 return true;
2645
2646 return false;
2647}
2648
2649/* }}} */
2650/* {{{ Functions and ABI. */
2651
2652/* Implement TARGET_FUNCTION_VALUE.
2653
2654 Define how to find the value returned by a function.
2655 The register location is always the same, but the mode depends on
2656 VALTYPE. */
2657
2658static rtx
2659gcn_function_value (const_tree valtype, const_tree, bool)
2660{
2661 machine_mode mode = TYPE_MODE (valtype);
2662
2663 if (INTEGRAL_TYPE_P (valtype)
2664 && GET_MODE_CLASS (mode) == MODE_INT
2665 && GET_MODE_SIZE (mode) < 4)
2666 mode = SImode;
2667
4e191462 2668 return gen_rtx_REG (mode, RETURN_VALUE_REG);
5326695a
AS
2669}
2670
2671/* Implement TARGET_FUNCTION_VALUE_REGNO_P.
2672
2673 Return true if N is a possible register number for the function return
2674 value. */
2675
2676static bool
2677gcn_function_value_regno_p (const unsigned int n)
2678{
2679 return n == RETURN_VALUE_REG;
2680}
2681
0ffef200
RS
2682/* Calculate the number of registers required to hold function argument
2683 ARG. */
5326695a
AS
2684
2685static int
0ffef200 2686num_arg_regs (const function_arg_info &arg)
5326695a 2687{
0ffef200 2688 if (targetm.calls.must_pass_in_stack (arg))
5326695a
AS
2689 return 0;
2690
0ffef200 2691 int size = arg.promoted_size_in_bytes ();
4e191462
AS
2692 int regsize = UNITS_PER_WORD * (VECTOR_MODE_P (arg.mode)
2693 ? GET_MODE_NUNITS (arg.mode) : 1);
2694 return (size + regsize - 1) / regsize;
5326695a
AS
2695}
2696
2697/* Implement TARGET_STRICT_ARGUMENT_NAMING.
2698
2699 Return true if the location where a function argument is passed
2700 depends on whether or not it is a named argument
2701
2702 For gcn, we know how to handle functions declared as stdarg: by
2703 passing an extra pointer to the unnamed arguments. However, the
2704 Fortran frontend can produce a different situation, where a
2705 function pointer is declared with no arguments, but the actual
2706 function and calls to it take more arguments. In that case, we
2707 want to ensure the call matches the definition of the function. */
2708
2709static bool
2710gcn_strict_argument_naming (cumulative_args_t cum_v)
2711{
2712 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
2713
2714 return cum->fntype == NULL_TREE || stdarg_p (cum->fntype);
2715}
2716
2717/* Implement TARGET_PRETEND_OUTGOING_VARARGS_NAMED.
2718
2719 See comment on gcn_strict_argument_naming. */
2720
2721static bool
2722gcn_pretend_outgoing_varargs_named (cumulative_args_t cum_v)
2723{
2724 return !gcn_strict_argument_naming (cum_v);
2725}
2726
2727/* Implement TARGET_FUNCTION_ARG.
2728
2729 Return an RTX indicating whether a function argument is passed in a register
2730 and if so, which register. */
2731
2732static rtx
6783fdb7 2733gcn_function_arg (cumulative_args_t cum_v, const function_arg_info &arg)
5326695a
AS
2734{
2735 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
2736 if (cum->normal_function)
2737 {
6783fdb7 2738 if (!arg.named || arg.end_marker_p ())
5326695a
AS
2739 return 0;
2740
0ffef200 2741 if (targetm.calls.must_pass_in_stack (arg))
5326695a
AS
2742 return 0;
2743
4e191462
AS
2744 int first_reg = (VECTOR_MODE_P (arg.mode)
2745 ? FIRST_VPARM_REG : FIRST_PARM_REG);
2746 int cum_num = (VECTOR_MODE_P (arg.mode)
2747 ? cum->vnum : cum->num);
2748 int reg_num = first_reg + cum_num;
0ffef200 2749 int num_regs = num_arg_regs (arg);
5326695a
AS
2750 if (num_regs > 0)
2751 while (reg_num % num_regs != 0)
2752 reg_num++;
4e191462 2753 if (reg_num + num_regs <= first_reg + NUM_PARM_REGS)
6783fdb7 2754 return gen_rtx_REG (arg.mode, reg_num);
5326695a
AS
2755 }
2756 else
2757 {
2758 if (cum->num >= cum->args.nargs)
2759 {
6783fdb7
RS
2760 cum->offset = (cum->offset + TYPE_ALIGN (arg.type) / 8 - 1)
2761 & -(TYPE_ALIGN (arg.type) / 8);
5326695a
AS
2762 cfun->machine->kernarg_segment_alignment
2763 = MAX ((unsigned) cfun->machine->kernarg_segment_alignment,
6783fdb7 2764 TYPE_ALIGN (arg.type) / 8);
5326695a
AS
2765 rtx addr = gen_rtx_REG (DImode,
2766 cum->args.reg[KERNARG_SEGMENT_PTR_ARG]);
2767 if (cum->offset)
2768 addr = gen_rtx_PLUS (DImode, addr,
2769 gen_int_mode (cum->offset, DImode));
6783fdb7
RS
2770 rtx mem = gen_rtx_MEM (arg.mode, addr);
2771 set_mem_attributes (mem, arg.type, 1);
5326695a
AS
2772 set_mem_addr_space (mem, ADDR_SPACE_SCALAR_FLAT);
2773 MEM_READONLY_P (mem) = 1;
2774 return mem;
2775 }
2776
2777 int a = cum->args.order[cum->num];
6783fdb7 2778 if (arg.mode != gcn_kernel_arg_types[a].mode)
5326695a
AS
2779 {
2780 error ("wrong type of argument %s", gcn_kernel_arg_types[a].name);
2781 return 0;
2782 }
2783 return gen_rtx_REG ((machine_mode) gcn_kernel_arg_types[a].mode,
2784 cum->args.reg[a]);
2785 }
2786 return 0;
2787}
2788
2789/* Implement TARGET_FUNCTION_ARG_ADVANCE.
2790
2791 Updates the summarizer variable pointed to by CUM_V to advance past an
2792 argument in the argument list. */
2793
2794static void
6930c98c
RS
2795gcn_function_arg_advance (cumulative_args_t cum_v,
2796 const function_arg_info &arg)
5326695a
AS
2797{
2798 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
2799
2800 if (cum->normal_function)
2801 {
6930c98c 2802 if (!arg.named)
5326695a
AS
2803 return;
2804
4e191462
AS
2805 int first_reg = (VECTOR_MODE_P (arg.mode)
2806 ? FIRST_VPARM_REG : FIRST_PARM_REG);
2807 int *cum_num = (VECTOR_MODE_P (arg.mode)
2808 ? &cum->vnum : &cum->num);
0ffef200 2809 int num_regs = num_arg_regs (arg);
5326695a 2810 if (num_regs > 0)
4e191462
AS
2811 while ((first_reg + *cum_num) % num_regs != 0)
2812 (*cum_num)++;
2813 *cum_num += num_regs;
5326695a
AS
2814 }
2815 else
2816 {
2817 if (cum->num < cum->args.nargs)
2818 cum->num++;
2819 else
2820 {
6930c98c 2821 cum->offset += tree_to_uhwi (TYPE_SIZE_UNIT (arg.type));
5326695a
AS
2822 cfun->machine->kernarg_segment_byte_size = cum->offset;
2823 }
2824 }
2825}
2826
2827/* Implement TARGET_ARG_PARTIAL_BYTES.
2828
2829 Returns the number of bytes at the beginning of an argument that must be put
2830 in registers. The value must be zero for arguments that are passed entirely
2831 in registers or that are entirely pushed on the stack. */
2832
2833static int
a7c81bc1 2834gcn_arg_partial_bytes (cumulative_args_t cum_v, const function_arg_info &arg)
5326695a
AS
2835{
2836 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
2837
a7c81bc1 2838 if (!arg.named)
5326695a
AS
2839 return 0;
2840
0ffef200 2841 if (targetm.calls.must_pass_in_stack (arg))
5326695a
AS
2842 return 0;
2843
4e191462
AS
2844 int cum_num = (VECTOR_MODE_P (arg.mode) ? cum->vnum : cum->num);
2845 int regsize = UNITS_PER_WORD * (VECTOR_MODE_P (arg.mode)
2846 ? GET_MODE_NUNITS (arg.mode) : 1);
2847
2848 if (cum_num >= NUM_PARM_REGS)
5326695a
AS
2849 return 0;
2850
2851 /* If the argument fits entirely in registers, return 0. */
4e191462 2852 if (cum_num + num_arg_regs (arg) <= NUM_PARM_REGS)
5326695a
AS
2853 return 0;
2854
4e191462 2855 return (NUM_PARM_REGS - cum_num) * regsize;
5326695a
AS
2856}
2857
7c55755d
JB
2858/* A normal function which takes a pointer argument may be passed a pointer to
2859 LDS space (via a high-bits-set aperture), and that only works with FLAT
2860 addressing, not GLOBAL. Force FLAT addressing if the function has an
2861 incoming pointer parameter. NOTE: This is a heuristic that works in the
2862 offloading case, but in general, a function might read global pointer
2863 variables, etc. that may refer to LDS space or other special memory areas
2864 not supported by GLOBAL instructions, and then this argument check would not
2865 suffice. */
5326695a
AS
2866
2867static void
2868gcn_detect_incoming_pointer_arg (tree fndecl)
2869{
2870 gcc_assert (cfun && cfun->machine);
2871
2872 for (tree arg = TYPE_ARG_TYPES (TREE_TYPE (fndecl));
2873 arg;
2874 arg = TREE_CHAIN (arg))
7c55755d 2875 if (POINTER_TYPE_P (TREE_VALUE (arg)))
5326695a
AS
2876 cfun->machine->use_flat_addressing = true;
2877}
2878
2879/* Implement INIT_CUMULATIVE_ARGS, via gcn.h.
2880
2881 Initialize a variable CUM of type CUMULATIVE_ARGS for a call to a function
2882 whose data type is FNTYPE. For a library call, FNTYPE is 0. */
2883
2884void
2885gcn_init_cumulative_args (CUMULATIVE_ARGS *cum /* Argument info to init */ ,
2886 tree fntype /* tree ptr for function decl */ ,
2887 rtx libname /* SYMBOL_REF of library name or 0 */ ,
2888 tree fndecl, int caller)
2889{
2890 memset (cum, 0, sizeof (*cum));
2891 cum->fntype = fntype;
2892 if (libname)
2893 {
2894 gcc_assert (cfun && cfun->machine);
2895 cum->normal_function = true;
2896 if (!caller)
2897 {
2898 cfun->machine->normal_function = true;
2899 gcn_detect_incoming_pointer_arg (fndecl);
2900 }
2901 return;
2902 }
2903 tree attr = NULL;
2904 if (fndecl)
2905 attr = lookup_attribute ("amdgpu_hsa_kernel", DECL_ATTRIBUTES (fndecl));
2906 if (fndecl && !attr)
2907 attr = lookup_attribute ("amdgpu_hsa_kernel",
2908 TYPE_ATTRIBUTES (TREE_TYPE (fndecl)));
2909 if (!attr && fntype)
2910 attr = lookup_attribute ("amdgpu_hsa_kernel", TYPE_ATTRIBUTES (fntype));
2911 /* Handle main () as kernel, so we can run testsuite.
2912 Handle OpenACC kernels similarly to main. */
2913 if (!attr && !caller && fndecl
2914 && (MAIN_NAME_P (DECL_NAME (fndecl))
2915 || lookup_attribute ("omp target entrypoint",
2916 DECL_ATTRIBUTES (fndecl)) != NULL_TREE))
2917 gcn_parse_amdgpu_hsa_kernel_attribute (&cum->args, NULL_TREE);
2918 else
2919 {
2920 if (!attr || caller)
2921 {
2922 gcc_assert (cfun && cfun->machine);
2923 cum->normal_function = true;
2924 if (!caller)
2925 cfun->machine->normal_function = true;
2926 }
2927 gcn_parse_amdgpu_hsa_kernel_attribute
2928 (&cum->args, attr ? TREE_VALUE (attr) : NULL_TREE);
2929 }
2930 cfun->machine->args = cum->args;
2931 if (!caller && cfun->machine->normal_function)
2932 gcn_detect_incoming_pointer_arg (fndecl);
3ed8f692
KCY
2933
2934 reinit_regs ();
5326695a
AS
2935}
2936
2937static bool
2938gcn_return_in_memory (const_tree type, const_tree ARG_UNUSED (fntype))
2939{
2940 machine_mode mode = TYPE_MODE (type);
2941 HOST_WIDE_INT size = int_size_in_bytes (type);
2942
2943 if (AGGREGATE_TYPE_P (type))
2944 return true;
2945
2946 if (mode == BLKmode)
2947 return true;
2948
4e191462
AS
2949 if ((!VECTOR_TYPE_P (type) && size > 2 * UNITS_PER_WORD)
2950 || size > 2 * UNITS_PER_WORD * 64)
5326695a
AS
2951 return true;
2952
2953 return false;
2954}
2955
2956/* Implement TARGET_PROMOTE_FUNCTION_MODE.
2957
2958 Return the mode to use for outgoing function arguments. */
2959
2960machine_mode
2961gcn_promote_function_mode (const_tree ARG_UNUSED (type), machine_mode mode,
2962 int *ARG_UNUSED (punsignedp),
2963 const_tree ARG_UNUSED (funtype),
2964 int ARG_UNUSED (for_return))
2965{
2966 if (GET_MODE_CLASS (mode) == MODE_INT && GET_MODE_SIZE (mode) < 4)
2967 return SImode;
2968
2969 return mode;
2970}
2971
2972/* Implement TARGET_GIMPLIFY_VA_ARG_EXPR.
2973
2974 Derived from hppa_gimplify_va_arg_expr. The generic routine doesn't handle
2975 ARGS_GROW_DOWNWARDS. */
2976
2977static tree
2978gcn_gimplify_va_arg_expr (tree valist, tree type,
2979 gimple_seq *ARG_UNUSED (pre_p),
2980 gimple_seq *ARG_UNUSED (post_p))
2981{
2982 tree ptr = build_pointer_type (type);
2983 tree valist_type;
2984 tree t, u;
2985 bool indirect;
2986
fde65a89 2987 indirect = pass_va_arg_by_reference (type);
5326695a
AS
2988 if (indirect)
2989 {
2990 type = ptr;
2991 ptr = build_pointer_type (type);
2992 }
2993 valist_type = TREE_TYPE (valist);
2994
2995 /* Args grow down. Not handled by generic routines. */
2996
2997 u = fold_convert (sizetype, size_in_bytes (type));
2998 u = fold_build1 (NEGATE_EXPR, sizetype, u);
2999 t = fold_build_pointer_plus (valist, u);
3000
3001 /* Align to 8 byte boundary. */
3002
3003 u = build_int_cst (TREE_TYPE (t), -8);
3004 t = build2 (BIT_AND_EXPR, TREE_TYPE (t), t, u);
3005 t = fold_convert (valist_type, t);
3006
3007 t = build2 (MODIFY_EXPR, valist_type, valist, t);
3008
3009 t = fold_convert (ptr, t);
3010 t = build_va_arg_indirect_ref (t);
3011
3012 if (indirect)
3013 t = build_va_arg_indirect_ref (t);
3014
3015 return t;
3016}
3017
955cd057
TB
3018/* Return 1 if TRAIT NAME is present in the OpenMP context's
3019 device trait set, return 0 if not present in any OpenMP context in the
3020 whole translation unit, or -1 if not present in the current OpenMP context
3021 but might be present in another OpenMP context in the same TU. */
3022
3023int
3024gcn_omp_device_kind_arch_isa (enum omp_device_kind_arch_isa trait,
3025 const char *name)
3026{
3027 switch (trait)
3028 {
3029 case omp_device_kind:
3030 return strcmp (name, "gpu") == 0;
3031 case omp_device_arch:
ebe5dace 3032 return strcmp (name, "amdgcn") == 0 || strcmp (name, "gcn") == 0;
955cd057 3033 case omp_device_isa:
1fd50874 3034 if (strcmp (name, "fiji") == 0 || strcmp (name, "gfx803") == 0)
955cd057
TB
3035 return gcn_arch == PROCESSOR_FIJI;
3036 if (strcmp (name, "gfx900") == 0)
f062c3f1 3037 return gcn_arch == PROCESSOR_VEGA10;
955cd057 3038 if (strcmp (name, "gfx906") == 0)
f062c3f1 3039 return gcn_arch == PROCESSOR_VEGA20;
3535402e
AS
3040 if (strcmp (name, "gfx908") == 0)
3041 return gcn_arch == PROCESSOR_GFX908;
cde52d3a
AS
3042 if (strcmp (name, "gfx90a") == 0)
3043 return gcn_arch == PROCESSOR_GFX90a;
c7ec7bd1
AS
3044 if (strcmp (name, "gfx1030") == 0)
3045 return gcn_arch == PROCESSOR_GFX1030;
955cd057
TB
3046 return 0;
3047 default:
3048 gcc_unreachable ();
3049 }
3050}
3051
5326695a
AS
3052/* Calculate stack offsets needed to create prologues and epilogues. */
3053
3054static struct machine_function *
3055gcn_compute_frame_offsets (void)
3056{
3057 machine_function *offsets = cfun->machine;
3058
3059 if (reload_completed)
3060 return offsets;
3061
3062 offsets->need_frame_pointer = frame_pointer_needed;
3063
3064 offsets->outgoing_args_size = crtl->outgoing_args_size;
3065 offsets->pretend_size = crtl->args.pretend_args_size;
3066
3067 offsets->local_vars = get_frame_size ();
3068
3069 offsets->lr_needs_saving = (!leaf_function_p ()
3070 || df_regs_ever_live_p (LR_REGNUM)
3071 || df_regs_ever_live_p (LR_REGNUM + 1));
3072
3073 offsets->callee_saves = offsets->lr_needs_saving ? 8 : 0;
3074
3075 for (int regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
a365fa06 3076 if ((df_regs_ever_live_p (regno) && !call_used_or_fixed_reg_p (regno))
5326695a
AS
3077 || ((regno & ~1) == HARD_FRAME_POINTER_REGNUM
3078 && frame_pointer_needed))
ae0d2c24
AS
3079 offsets->callee_saves += (VGPR_REGNO_P (regno)
3080 || AVGPR_REGNO_P (regno) ? 256 : 4);
5326695a
AS
3081
3082 /* Round up to 64-bit boundary to maintain stack alignment. */
3083 offsets->callee_saves = (offsets->callee_saves + 7) & ~7;
3084
3085 return offsets;
3086}
3087
3088/* Insert code into the prologue or epilogue to store or load any
3089 callee-save register to/from the stack.
3090
3091 Helper function for gcn_expand_prologue and gcn_expand_epilogue. */
3092
3093static void
3094move_callee_saved_registers (rtx sp, machine_function *offsets,
3095 bool prologue)
3096{
3097 int regno, offset, saved_scalars;
3098 rtx exec = gen_rtx_REG (DImode, EXEC_REG);
3099 rtx vcc = gen_rtx_REG (DImode, VCC_LO_REG);
3100 rtx offreg = gen_rtx_REG (SImode, SGPR_REGNO (22));
3101 rtx as = gen_rtx_CONST_INT (VOIDmode, STACK_ADDR_SPACE);
3102 HOST_WIDE_INT exec_set = 0;
3103 int offreg_set = 0;
251697a6 3104 auto_vec<int> saved_sgprs;
5326695a
AS
3105
3106 start_sequence ();
3107
3108 /* Move scalars into two vector registers. */
3109 for (regno = 0, saved_scalars = 0; regno < FIRST_VGPR_REG; regno++)
a365fa06 3110 if ((df_regs_ever_live_p (regno) && !call_used_or_fixed_reg_p (regno))
5326695a
AS
3111 || ((regno & ~1) == LINK_REGNUM && offsets->lr_needs_saving)
3112 || ((regno & ~1) == HARD_FRAME_POINTER_REGNUM
3113 && offsets->need_frame_pointer))
3114 {
3115 rtx reg = gen_rtx_REG (SImode, regno);
3116 rtx vreg = gen_rtx_REG (V64SImode,
3117 VGPR_REGNO (6 + (saved_scalars / 64)));
3118 int lane = saved_scalars % 64;
3119
3120 if (prologue)
251697a6
HAQ
3121 {
3122 emit_insn (gen_vec_setv64si (vreg, reg, GEN_INT (lane)));
3123 saved_sgprs.safe_push (regno);
3124 }
5326695a
AS
3125 else
3126 emit_insn (gen_vec_extractv64sisi (reg, vreg, GEN_INT (lane)));
3127
3128 saved_scalars++;
3129 }
3130
3131 rtx move_scalars = get_insns ();
3132 end_sequence ();
3133 start_sequence ();
3134
3135 /* Ensure that all vector lanes are moved. */
3136 exec_set = -1;
3137 emit_move_insn (exec, GEN_INT (exec_set));
3138
3139 /* Set up a vector stack pointer. */
3140 rtx _0_1_2_3 = gen_rtx_REG (V64SImode, VGPR_REGNO (1));
3141 rtx _0_4_8_12 = gen_rtx_REG (V64SImode, VGPR_REGNO (3));
3142 emit_insn (gen_ashlv64si3_exec (_0_4_8_12, _0_1_2_3, GEN_INT (2),
3143 gcn_gen_undef (V64SImode), exec));
3144 rtx vsp = gen_rtx_REG (V64DImode, VGPR_REGNO (4));
3145 emit_insn (gen_vec_duplicatev64di_exec (vsp, sp, gcn_gen_undef (V64DImode),
3146 exec));
3147 emit_insn (gen_addv64si3_vcc_exec (gcn_operand_part (V64SImode, vsp, 0),
3148 gcn_operand_part (V64SImode, vsp, 0),
3149 _0_4_8_12, vcc, gcn_gen_undef (V64SImode),
3150 exec));
3151 emit_insn (gen_addcv64si3_exec (gcn_operand_part (V64SImode, vsp, 1),
3152 gcn_operand_part (V64SImode, vsp, 1),
3153 const0_rtx, vcc, vcc,
3154 gcn_gen_undef (V64SImode), exec));
3155
3156 /* Move vectors. */
251697a6 3157 for (regno = FIRST_VGPR_REG, offset = 0;
5326695a 3158 regno < FIRST_PSEUDO_REGISTER; regno++)
a365fa06 3159 if ((df_regs_ever_live_p (regno) && !call_used_or_fixed_reg_p (regno))
5326695a
AS
3160 || (regno == VGPR_REGNO (6) && saved_scalars > 0)
3161 || (regno == VGPR_REGNO (7) && saved_scalars > 63))
3162 {
3163 rtx reg = gen_rtx_REG (V64SImode, regno);
3164 int size = 256;
3165
3166 if (regno == VGPR_REGNO (6) && saved_scalars < 64)
3167 size = saved_scalars * 4;
3168 else if (regno == VGPR_REGNO (7) && saved_scalars < 128)
3169 size = (saved_scalars - 64) * 4;
3170
3171 if (size != 256 || exec_set != -1)
3172 {
3173 exec_set = ((unsigned HOST_WIDE_INT) 1 << (size / 4)) - 1;
3174 emit_move_insn (exec, gen_int_mode (exec_set, DImode));
3175 }
3176
3177 if (prologue)
251697a6
HAQ
3178 {
3179 rtx insn = emit_insn (gen_scatterv64si_insn_1offset_exec
3180 (vsp, const0_rtx, reg, as, const0_rtx,
3181 exec));
3182
3183 /* Add CFI metadata. */
3184 rtx note;
3185 if (regno == VGPR_REGNO (6) || regno == VGPR_REGNO (7))
3186 {
3187 int start = (regno == VGPR_REGNO (7) ? 64 : 0);
3188 int count = MIN (saved_scalars - start, 64);
3189 int add_lr = (regno == VGPR_REGNO (6)
58d50a5d 3190 && offsets->lr_needs_saving);
251697a6
HAQ
3191 int lrdest = -1;
3192 rtvec seq = rtvec_alloc (count + add_lr);
3193
3194 /* Add an REG_FRAME_RELATED_EXPR entry for each scalar
3195 register that was saved in this batch. */
3196 for (int idx = 0; idx < count; idx++)
3197 {
3198 int stackaddr = offset + idx * 4;
3199 rtx dest = gen_rtx_MEM (SImode,
3200 gen_rtx_PLUS
3201 (DImode, sp,
3202 GEN_INT (stackaddr)));
3203 rtx src = gen_rtx_REG (SImode, saved_sgprs[start + idx]);
3204 rtx set = gen_rtx_SET (dest, src);
3205 RTX_FRAME_RELATED_P (set) = 1;
3206 RTVEC_ELT (seq, idx) = set;
3207
3208 if (saved_sgprs[start + idx] == LINK_REGNUM)
3209 lrdest = stackaddr;
3210 }
3211
3212 /* Add an additional expression for DWARF_LINK_REGISTER if
3213 LINK_REGNUM was saved. */
3214 if (lrdest != -1)
3215 {
3216 rtx dest = gen_rtx_MEM (DImode,
3217 gen_rtx_PLUS
3218 (DImode, sp,
3219 GEN_INT (lrdest)));
3220 rtx src = gen_rtx_REG (DImode, DWARF_LINK_REGISTER);
3221 rtx set = gen_rtx_SET (dest, src);
3222 RTX_FRAME_RELATED_P (set) = 1;
3223 RTVEC_ELT (seq, count) = set;
3224 }
3225
3226 note = gen_rtx_SEQUENCE (VOIDmode, seq);
3227 }
3228 else
3229 {
3230 rtx dest = gen_rtx_MEM (V64SImode,
3231 gen_rtx_PLUS (DImode, sp,
3232 GEN_INT (offset)));
3233 rtx src = gen_rtx_REG (V64SImode, regno);
3234 note = gen_rtx_SET (dest, src);
3235 }
3236 RTX_FRAME_RELATED_P (insn) = 1;
3237 add_reg_note (insn, REG_FRAME_RELATED_EXPR, note);
3238 }
5326695a
AS
3239 else
3240 emit_insn (gen_gatherv64si_insn_1offset_exec
3241 (reg, vsp, const0_rtx, as, const0_rtx,
3242 gcn_gen_undef (V64SImode), exec));
3243
3244 /* Move our VSP to the next stack entry. */
3245 if (offreg_set != size)
3246 {
3247 offreg_set = size;
3248 emit_move_insn (offreg, GEN_INT (size));
3249 }
3250 if (exec_set != -1)
3251 {
3252 exec_set = -1;
3253 emit_move_insn (exec, GEN_INT (exec_set));
3254 }
3255 emit_insn (gen_addv64si3_vcc_dup_exec
3256 (gcn_operand_part (V64SImode, vsp, 0),
3257 offreg, gcn_operand_part (V64SImode, vsp, 0),
3258 vcc, gcn_gen_undef (V64SImode), exec));
3259 emit_insn (gen_addcv64si3_exec
3260 (gcn_operand_part (V64SImode, vsp, 1),
3261 gcn_operand_part (V64SImode, vsp, 1),
3262 const0_rtx, vcc, vcc, gcn_gen_undef (V64SImode), exec));
3263
3264 offset += size;
3265 }
3266
3267 rtx move_vectors = get_insns ();
3268 end_sequence ();
3269
3270 if (prologue)
3271 {
3272 emit_insn (move_scalars);
3273 emit_insn (move_vectors);
3274 }
3275 else
3276 {
3277 emit_insn (move_vectors);
3278 emit_insn (move_scalars);
3279 }
3b97715a
AS
3280
3281 /* This happens when a new register becomes "live" after reload.
3282 Check your splitters! */
3283 gcc_assert (offset <= offsets->callee_saves);
5326695a
AS
3284}
3285
3286/* Generate prologue. Called from gen_prologue during pro_and_epilogue pass.
3287
3288 For a non-kernel function, the stack layout looks like this (interim),
3289 growing *upwards*:
3290
3291 hi | + ...
3292 |__________________| <-- current SP
3293 | outgoing args |
3294 |__________________|
3295 | (alloca space) |
3296 |__________________|
3297 | local vars |
3298 |__________________| <-- FP/hard FP
3299 | callee-save regs |
3300 |__________________| <-- soft arg pointer
3301 | pretend args |
3302 |__________________| <-- incoming SP
3303 | incoming args |
3304 lo |..................|
3305
3306 This implies arguments (beyond the first N in registers) must grow
3307 downwards (as, apparently, PA has them do).
3308
3309 For a kernel function we have the simpler:
3310
3311 hi | + ...
3312 |__________________| <-- current SP
3313 | outgoing args |
3314 |__________________|
3315 | (alloca space) |
3316 |__________________|
3317 | local vars |
3318 lo |__________________| <-- FP/hard FP
3319
3320*/
3321
3322void
3323gcn_expand_prologue ()
3324{
3325 machine_function *offsets = gcn_compute_frame_offsets ();
3326
3327 if (!cfun || !cfun->machine || cfun->machine->normal_function)
3328 {
3329 rtx sp = gen_rtx_REG (Pmode, STACK_POINTER_REGNUM);
eff23b79
AS
3330 rtx sp_hi = gcn_operand_part (Pmode, sp, 1);
3331 rtx sp_lo = gcn_operand_part (Pmode, sp, 0);
5326695a 3332 rtx fp = gen_rtx_REG (Pmode, HARD_FRAME_POINTER_REGNUM);
eff23b79
AS
3333 rtx fp_hi = gcn_operand_part (Pmode, fp, 1);
3334 rtx fp_lo = gcn_operand_part (Pmode, fp, 0);
5326695a
AS
3335
3336 start_sequence ();
3337
3338 if (offsets->pretend_size > 0)
3339 {
3340 /* FIXME: Do the actual saving of register pretend args to the stack.
3341 Register order needs consideration. */
3342 }
3343
3344 /* Save callee-save regs. */
3345 move_callee_saved_registers (sp, offsets, true);
3346
3347 HOST_WIDE_INT sp_adjust = offsets->pretend_size
3348 + offsets->callee_saves
3349 + offsets->local_vars + offsets->outgoing_args_size;
3350 if (sp_adjust > 0)
eff23b79
AS
3351 {
3352 /* Adding RTX_FRAME_RELATED_P effectively disables spliting, so
3353 we use split add explictly, and specify the DImode add in
3354 the note. */
3355 rtx scc = gen_rtx_REG (BImode, SCC_REG);
3356 rtx adjustment = gen_int_mode (sp_adjust, SImode);
3357 rtx insn = emit_insn (gen_addsi3_scalar_carry (sp_lo, sp_lo,
3358 adjustment, scc));
22f201e4
HAQ
3359 if (!offsets->need_frame_pointer)
3360 {
3361 RTX_FRAME_RELATED_P (insn) = 1;
3362 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
3363 gen_rtx_SET (sp,
3364 gen_rtx_PLUS (DImode, sp,
3365 adjustment)));
3366 }
eff23b79
AS
3367 emit_insn (gen_addcsi3_scalar_zero (sp_hi, sp_hi, scc));
3368 }
5326695a
AS
3369
3370 if (offsets->need_frame_pointer)
eff23b79
AS
3371 {
3372 /* Adding RTX_FRAME_RELATED_P effectively disables spliting, so
3373 we use split add explictly, and specify the DImode add in
3374 the note. */
3375 rtx scc = gen_rtx_REG (BImode, SCC_REG);
3376 int fp_adjust = -(offsets->local_vars + offsets->outgoing_args_size);
3377 rtx adjustment = gen_int_mode (fp_adjust, SImode);
3378 rtx insn = emit_insn (gen_addsi3_scalar_carry(fp_lo, sp_lo,
3379 adjustment, scc));
eff23b79
AS
3380 emit_insn (gen_addcsi3_scalar (fp_hi, sp_hi,
3381 (fp_adjust < 0 ? GEN_INT (-1)
3382 : const0_rtx),
3383 scc, scc));
22f201e4
HAQ
3384
3385 /* Set the CFA to the entry stack address, as an offset from the
3386 frame pointer. This is preferred because the frame pointer is
3387 saved in each frame, whereas the stack pointer is not. */
3388 RTX_FRAME_RELATED_P (insn) = 1;
3389 add_reg_note (insn, REG_CFA_DEF_CFA,
3390 gen_rtx_PLUS (DImode, fp,
3391 GEN_INT (-(offsets->pretend_size
3392 + offsets->callee_saves))));
eff23b79 3393 }
5326695a
AS
3394
3395 rtx_insn *seq = get_insns ();
3396 end_sequence ();
3397
5326695a
AS
3398 emit_insn (seq);
3399 }
3400 else
3401 {
f6fff8a6
AS
3402 if (TARGET_PACKED_WORK_ITEMS)
3403 {
3404 /* v0 conatins the X, Y and Z dimensions all in one.
3405 Expand them out for ABI compatibility. */
3406 /* TODO: implement and use zero_extract. */
3407 rtx v1 = gen_rtx_REG (V64SImode, VGPR_REGNO (1));
3408 emit_insn (gen_andv64si3 (v1, gen_rtx_REG (V64SImode, VGPR_REGNO (0)),
3409 gen_rtx_CONST_INT (VOIDmode, 0x3FF << 10)));
3410 emit_insn (gen_lshrv64si3 (v1, v1, gen_rtx_CONST_INT (VOIDmode, 10)));
3411 emit_insn (gen_prologue_use (v1));
3412
3413 rtx v2 = gen_rtx_REG (V64SImode, VGPR_REGNO (2));
3414 emit_insn (gen_andv64si3 (v2, gen_rtx_REG (V64SImode, VGPR_REGNO (0)),
3415 gen_rtx_CONST_INT (VOIDmode, 0x3FF << 20)));
3416 emit_insn (gen_lshrv64si3 (v2, v2, gen_rtx_CONST_INT (VOIDmode, 20)));
3417 emit_insn (gen_prologue_use (v2));
3418 }
3419
3420 /* We no longer use the private segment for the stack (it's not
3421 accessible to reverse offload), so we must calculate a wave offset
3422 from the grid dimensions and stack size, which is calculated on the
3423 host, and passed in the kernargs region.
3424 See libgomp-gcn.h for details. */
3425 rtx wave_offset = gen_rtx_REG (SImode, FIRST_PARM_REG);
3426
3427 rtx num_waves_mem = gcn_oacc_dim_size (1);
3428 rtx num_waves = gen_rtx_REG (SImode, FIRST_PARM_REG+1);
3429 set_mem_addr_space (num_waves_mem, ADDR_SPACE_SCALAR_FLAT);
3430 emit_move_insn (num_waves, num_waves_mem);
3431
3432 rtx workgroup_num = gcn_oacc_dim_pos (0);
3433 rtx wave_num = gen_rtx_REG (SImode, FIRST_PARM_REG+2);
3434 emit_move_insn(wave_num, gcn_oacc_dim_pos (1));
5326695a 3435
f6fff8a6
AS
3436 rtx thread_id = gen_rtx_REG (SImode, FIRST_PARM_REG+3);
3437 emit_insn (gen_mulsi3 (thread_id, num_waves, workgroup_num));
3438 emit_insn (gen_addsi3_scc (thread_id, thread_id, wave_num));
3439
3440 rtx kernarg_reg = gen_rtx_REG (DImode, cfun->machine->args.reg
3441 [KERNARG_SEGMENT_PTR_ARG]);
3442 rtx stack_size_mem = gen_rtx_MEM (SImode,
3443 gen_rtx_PLUS (DImode, kernarg_reg,
3444 GEN_INT (52)));
3445 set_mem_addr_space (stack_size_mem, ADDR_SPACE_SCALAR_FLAT);
3446 emit_move_insn (wave_offset, stack_size_mem);
3447
3448 emit_insn (gen_mulsi3 (wave_offset, wave_offset, thread_id));
3449
3450 /* The FLAT_SCRATCH_INIT is not usually needed, but can be enabled
3451 via the function attributes. */
5326695a
AS
3452 if (cfun->machine->args.requested & (1 << FLAT_SCRATCH_INIT_ARG))
3453 {
3454 rtx fs_init_lo =
3455 gen_rtx_REG (SImode,
3456 cfun->machine->args.reg[FLAT_SCRATCH_INIT_ARG]);
3457 rtx fs_init_hi =
3458 gen_rtx_REG (SImode,
3459 cfun->machine->args.reg[FLAT_SCRATCH_INIT_ARG] + 1);
3460 rtx fs_reg_lo = gen_rtx_REG (SImode, FLAT_SCRATCH_REG);
3461 rtx fs_reg_hi = gen_rtx_REG (SImode, FLAT_SCRATCH_REG + 1);
3462
3463 /*rtx queue = gen_rtx_REG(DImode,
3464 cfun->machine->args.reg[QUEUE_PTR_ARG]);
3465 rtx aperture = gen_rtx_MEM (SImode,
3466 gen_rtx_PLUS (DImode, queue,
3467 gen_int_mode (68, SImode)));
3468 set_mem_addr_space (aperture, ADDR_SPACE_SCALAR_FLAT);*/
3469
3470 /* Set up flat_scratch. */
3471 emit_insn (gen_addsi3_scc (fs_reg_hi, fs_init_lo, wave_offset));
3472 emit_insn (gen_lshrsi3_scc (fs_reg_hi, fs_reg_hi,
3473 gen_int_mode (8, SImode)));
3474 emit_move_insn (fs_reg_lo, fs_init_hi);
3475 }
3476
3477 /* Set up frame pointer and stack pointer. */
3478 rtx sp = gen_rtx_REG (DImode, STACK_POINTER_REGNUM);
eff23b79
AS
3479 rtx sp_hi = simplify_gen_subreg (SImode, sp, DImode, 4);
3480 rtx sp_lo = simplify_gen_subreg (SImode, sp, DImode, 0);
5326695a
AS
3481 rtx fp = gen_rtx_REG (DImode, HARD_FRAME_POINTER_REGNUM);
3482 rtx fp_hi = simplify_gen_subreg (SImode, fp, DImode, 4);
3483 rtx fp_lo = simplify_gen_subreg (SImode, fp, DImode, 0);
3484
3485 HOST_WIDE_INT sp_adjust = (offsets->local_vars
3486 + offsets->outgoing_args_size);
3487
f6fff8a6
AS
3488 /* Initialize FP and SP from space allocated on the host. */
3489 rtx stack_addr_mem = gen_rtx_MEM (DImode,
3490 gen_rtx_PLUS (DImode, kernarg_reg,
3491 GEN_INT (40)));
3492 set_mem_addr_space (stack_addr_mem, ADDR_SPACE_SCALAR_FLAT);
3493 emit_move_insn (fp, stack_addr_mem);
3258c2d6
AS
3494 rtx scc = gen_rtx_REG (BImode, SCC_REG);
3495 emit_insn (gen_addsi3_scalar_carry (fp_lo, fp_lo, wave_offset, scc));
3496 emit_insn (gen_addcsi3_scalar_zero (fp_hi, fp_hi, scc));
5326695a 3497
eff23b79
AS
3498 /* Adding RTX_FRAME_RELATED_P effectively disables spliting, so we use
3499 split add explictly, and specify the DImode add in the note.
3500 The DWARF info expects that the callee-save data is in the frame,
3501 even though it isn't (because this is the entry point), so we
3502 make a notional adjustment to the DWARF frame offset here. */
3503 rtx dbg_adjustment = gen_int_mode (sp_adjust + offsets->callee_saves,
3504 DImode);
3505 rtx insn;
5326695a 3506 if (sp_adjust > 0)
eff23b79
AS
3507 {
3508 rtx scc = gen_rtx_REG (BImode, SCC_REG);
3509 rtx adjustment = gen_int_mode (sp_adjust, DImode);
3510 insn = emit_insn (gen_addsi3_scalar_carry(sp_lo, fp_lo, adjustment,
3511 scc));
3512 emit_insn (gen_addcsi3_scalar_zero (sp_hi, fp_hi, scc));
3513 }
5326695a 3514 else
eff23b79
AS
3515 insn = emit_move_insn (sp, fp);
3516 RTX_FRAME_RELATED_P (insn) = 1;
3517 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
3518 gen_rtx_SET (sp, gen_rtx_PLUS (DImode, sp,
3519 dbg_adjustment)));
5326695a 3520
22f201e4
HAQ
3521 if (offsets->need_frame_pointer)
3522 {
3523 /* Set the CFA to the entry stack address, as an offset from the
3524 frame pointer. This is necessary when alloca is used, and
3525 harmless otherwise. */
3526 rtx neg_adjust = gen_int_mode (-offsets->callee_saves, DImode);
3527 add_reg_note (insn, REG_CFA_DEF_CFA,
3528 gen_rtx_PLUS (DImode, fp, neg_adjust));
3529 }
3530
5326695a
AS
3531 /* Make sure the flat scratch reg doesn't get optimised away. */
3532 emit_insn (gen_prologue_use (gen_rtx_REG (DImode, FLAT_SCRATCH_REG)));
3533 }
3534
3535 /* Ensure that the scheduler doesn't do anything unexpected. */
3536 emit_insn (gen_blockage ());
3537
cde52d3a
AS
3538 if (TARGET_M0_LDS_LIMIT)
3539 {
3540 /* m0 is initialized for the usual LDS DS and FLAT memory case.
3541 The low-part is the address of the topmost addressable byte, which is
3542 size-1. The high-part is an offset and should be zero. */
3543 emit_move_insn (gen_rtx_REG (SImode, M0_REG),
3544 gen_int_mode (LDS_SIZE, SImode));
3545
3546 emit_insn (gen_prologue_use (gen_rtx_REG (SImode, M0_REG)));
3547 }
5326695a 3548
5326695a
AS
3549 if (cfun && cfun->machine && !cfun->machine->normal_function && flag_openmp)
3550 {
3551 /* OpenMP kernels have an implicit call to gomp_gcn_enter_kernel. */
3552 rtx fn_reg = gen_rtx_REG (Pmode, FIRST_PARM_REG);
3553 emit_move_insn (fn_reg, gen_rtx_SYMBOL_REF (Pmode,
3554 "gomp_gcn_enter_kernel"));
3555 emit_call_insn (gen_gcn_indirect_call (fn_reg, const0_rtx));
3556 }
3557}
3558
3559/* Generate epilogue. Called from gen_epilogue during pro_and_epilogue pass.
3560
3561 See gcn_expand_prologue for stack details. */
3562
3563void
3564gcn_expand_epilogue (void)
3565{
3566 /* Ensure that the scheduler doesn't do anything unexpected. */
3567 emit_insn (gen_blockage ());
3568
3569 if (!cfun || !cfun->machine || cfun->machine->normal_function)
3570 {
3571 machine_function *offsets = gcn_compute_frame_offsets ();
3572 rtx sp = gen_rtx_REG (Pmode, STACK_POINTER_REGNUM);
3573 rtx fp = gen_rtx_REG (Pmode, HARD_FRAME_POINTER_REGNUM);
3574
3575 HOST_WIDE_INT sp_adjust = offsets->callee_saves + offsets->pretend_size;
3576
3577 if (offsets->need_frame_pointer)
3578 {
3579 /* Restore old SP from the frame pointer. */
3580 if (sp_adjust > 0)
3581 emit_insn (gen_subdi3 (sp, fp, gen_int_mode (sp_adjust, DImode)));
3582 else
3583 emit_move_insn (sp, fp);
3584 }
3585 else
3586 {
3587 /* Restore old SP from current SP. */
3588 sp_adjust += offsets->outgoing_args_size + offsets->local_vars;
3589
3590 if (sp_adjust > 0)
3591 emit_insn (gen_subdi3 (sp, sp, gen_int_mode (sp_adjust, DImode)));
3592 }
3593
3594 move_callee_saved_registers (sp, offsets, false);
3595
3596 /* There's no explicit use of the link register on the return insn. Emit
3597 one here instead. */
3598 if (offsets->lr_needs_saving)
3599 emit_use (gen_rtx_REG (DImode, LINK_REGNUM));
3600
3601 /* Similar for frame pointer. */
3602 if (offsets->need_frame_pointer)
3603 emit_use (gen_rtx_REG (DImode, HARD_FRAME_POINTER_REGNUM));
3604 }
3605 else if (flag_openmp)
3606 {
3607 /* OpenMP kernels have an implicit call to gomp_gcn_exit_kernel. */
3608 rtx fn_reg = gen_rtx_REG (Pmode, FIRST_PARM_REG);
3609 emit_move_insn (fn_reg,
3610 gen_rtx_SYMBOL_REF (Pmode, "gomp_gcn_exit_kernel"));
3611 emit_call_insn (gen_gcn_indirect_call (fn_reg, const0_rtx));
3612 }
3613 else if (TREE_CODE (TREE_TYPE (DECL_RESULT (cfun->decl))) != VOID_TYPE)
3614 {
3615 /* Assume that an exit value compatible with gcn-run is expected.
3616 That is, the third input parameter is an int*.
3617
392f70cc
AS
3618 We can't allocate any new registers, but the dispatch_ptr and
3619 kernarg_reg are dead after this, so we'll use those. */
3620 rtx dispatch_ptr_reg = gen_rtx_REG (DImode, cfun->machine->args.reg
3621 [DISPATCH_PTR_ARG]);
5326695a
AS
3622 rtx kernarg_reg = gen_rtx_REG (DImode, cfun->machine->args.reg
3623 [KERNARG_SEGMENT_PTR_ARG]);
3624 rtx retptr_mem = gen_rtx_MEM (DImode,
3625 gen_rtx_PLUS (DImode, kernarg_reg,
3626 GEN_INT (16)));
3627 set_mem_addr_space (retptr_mem, ADDR_SPACE_SCALAR_FLAT);
392f70cc 3628 emit_move_insn (dispatch_ptr_reg, retptr_mem);
5326695a 3629
9f3c4c67 3630 rtx retval_addr = gen_rtx_REG (DImode, FIRST_VPARM_REG + 2);
392f70cc 3631 emit_move_insn (retval_addr, dispatch_ptr_reg);
c7ec7bd1 3632 rtx retval_mem = gen_rtx_MEM (SImode, retval_addr);
c7ec7bd1 3633 set_mem_addr_space (retval_mem, ADDR_SPACE_FLAT);
9f3c4c67 3634 emit_move_insn (retval_mem, gen_rtx_REG (SImode, RETURN_VALUE_REG));
5326695a
AS
3635 }
3636
3637 emit_jump_insn (gen_gcn_return ());
3638}
3639
22f201e4
HAQ
3640/* Implement TARGET_FRAME_POINTER_REQUIRED.
3641
3642 Return true if the frame pointer should not be eliminated. */
3643
3644bool
3645gcn_frame_pointer_rqd (void)
3646{
3647 /* GDB needs the frame pointer in order to unwind properly,
3648 but that's not important for the entry point, unless alloca is used.
3649 It's not important for code execution, so we should repect the
3650 -fomit-frame-pointer flag. */
3651 return (!flag_omit_frame_pointer
3652 && cfun
3653 && (cfun->calls_alloca
3654 || (cfun->machine && cfun->machine->normal_function)));
3655}
3656
5326695a
AS
3657/* Implement TARGET_CAN_ELIMINATE.
3658
3659 Return true if the compiler is allowed to try to replace register number
3660 FROM_REG with register number TO_REG.
3661
3662 FIXME: is the default "true" not enough? Should this be a negative set? */
3663
3664bool
3665gcn_can_eliminate_p (int /*from_reg */ , int to_reg)
3666{
3667 return (to_reg == HARD_FRAME_POINTER_REGNUM
3668 || to_reg == STACK_POINTER_REGNUM);
3669}
3670
3671/* Implement INITIAL_ELIMINATION_OFFSET.
3672
3673 Returns the initial difference between the specified pair of registers, in
3674 terms of stack position. */
3675
3676HOST_WIDE_INT
3677gcn_initial_elimination_offset (int from, int to)
3678{
3679 machine_function *offsets = gcn_compute_frame_offsets ();
3680
3681 switch (from)
3682 {
3683 case ARG_POINTER_REGNUM:
3684 if (to == STACK_POINTER_REGNUM)
3685 return -(offsets->callee_saves + offsets->local_vars
3686 + offsets->outgoing_args_size);
3687 else if (to == FRAME_POINTER_REGNUM || to == HARD_FRAME_POINTER_REGNUM)
3688 return -offsets->callee_saves;
3689 else
3690 gcc_unreachable ();
3691 break;
3692
3693 case FRAME_POINTER_REGNUM:
3694 if (to == STACK_POINTER_REGNUM)
3695 return -(offsets->local_vars + offsets->outgoing_args_size);
3696 else if (to == HARD_FRAME_POINTER_REGNUM)
3697 return 0;
3698 else
3699 gcc_unreachable ();
3700 break;
3701
3702 default:
3703 gcc_unreachable ();
3704 }
3705}
3706
3707/* Implement HARD_REGNO_RENAME_OK.
3708
3709 Return true if it is permissible to rename a hard register from
3710 FROM_REG to TO_REG. */
3711
3712bool
3713gcn_hard_regno_rename_ok (unsigned int from_reg, unsigned int to_reg)
3714{
3715 if (from_reg == SCC_REG
3716 || from_reg == VCC_LO_REG || from_reg == VCC_HI_REG
3717 || from_reg == EXEC_LO_REG || from_reg == EXEC_HI_REG
3718 || to_reg == SCC_REG
3719 || to_reg == VCC_LO_REG || to_reg == VCC_HI_REG
3720 || to_reg == EXEC_LO_REG || to_reg == EXEC_HI_REG)
3721 return false;
3722
3723 /* Allow the link register to be used if it was saved. */
3724 if ((to_reg & ~1) == LINK_REGNUM)
3725 return !cfun || cfun->machine->lr_needs_saving;
3726
3727 /* Allow the registers used for the static chain to be used if the chain is
3728 not in active use. */
3729 if ((to_reg & ~1) == STATIC_CHAIN_REGNUM)
3730 return !cfun
3731 || !(cfun->static_chain_decl
3732 && df_regs_ever_live_p (STATIC_CHAIN_REGNUM)
3733 && df_regs_ever_live_p (STATIC_CHAIN_REGNUM + 1));
3734
3735 return true;
3736}
3737
3738/* Implement HARD_REGNO_CALLER_SAVE_MODE.
3739
3740 Which mode is required for saving NREGS of a pseudo-register in
3741 call-clobbered hard register REGNO. */
3742
3743machine_mode
3744gcn_hard_regno_caller_save_mode (unsigned int regno, unsigned int nregs,
3745 machine_mode regmode)
3746{
737d6a1a 3747 machine_mode result = choose_hard_reg_mode (regno, nregs, NULL);
5326695a
AS
3748
3749 if (VECTOR_MODE_P (result) && !VECTOR_MODE_P (regmode))
3750 result = (nregs == 1 ? SImode : DImode);
3751
3752 return result;
3753}
3754
3755/* Implement TARGET_ASM_TRAMPOLINE_TEMPLATE.
3756
3757 Output assembler code for a block containing the constant parts
3758 of a trampoline, leaving space for the variable parts. */
3759
3760static void
3761gcn_asm_trampoline_template (FILE *f)
3762{
3763 /* The source operand of the move instructions must be a 32-bit
3764 constant following the opcode. */
3765 asm_fprintf (f, "\ts_mov_b32\ts%i, 0xffff\n", STATIC_CHAIN_REGNUM);
3766 asm_fprintf (f, "\ts_mov_b32\ts%i, 0xffff\n", STATIC_CHAIN_REGNUM + 1);
3767 asm_fprintf (f, "\ts_mov_b32\ts%i, 0xffff\n", CC_SAVE_REG);
3768 asm_fprintf (f, "\ts_mov_b32\ts%i, 0xffff\n", CC_SAVE_REG + 1);
3769 asm_fprintf (f, "\ts_setpc_b64\ts[%i:%i]\n", CC_SAVE_REG, CC_SAVE_REG + 1);
3770}
3771
3772/* Implement TARGET_TRAMPOLINE_INIT.
3773
3774 Emit RTL insns to initialize the variable parts of a trampoline.
3775 FNDECL is the decl of the target address, M_TRAMP is a MEM for
3776 the trampoline, and CHAIN_VALUE is an RTX for the static chain
3777 to be passed to the target function. */
3778
3779static void
3780gcn_trampoline_init (rtx m_tramp, tree fndecl, rtx chain_value)
3781{
b7c28a47
AS
3782 if (TARGET_GCN5_PLUS)
3783 sorry ("nested function trampolines not supported on GCN5 due to"
3784 " non-executable stacks");
3785
5326695a
AS
3786 emit_block_move (m_tramp, assemble_trampoline_template (),
3787 GEN_INT (TRAMPOLINE_SIZE), BLOCK_OP_NORMAL);
3788
3789 rtx fnaddr = XEXP (DECL_RTL (fndecl), 0);
3790 rtx chain_value_reg = copy_to_reg (chain_value);
3791 rtx fnaddr_reg = copy_to_reg (fnaddr);
3792
3793 for (int i = 0; i < 4; i++)
3794 {
3795 rtx mem = adjust_address (m_tramp, SImode, i * 8 + 4);
3796 rtx reg = i < 2 ? chain_value_reg : fnaddr_reg;
3797 emit_move_insn (mem, gen_rtx_SUBREG (SImode, reg, (i % 2) * 4));
3798 }
3799
3800 rtx tramp_addr = XEXP (m_tramp, 0);
3801 emit_insn (gen_clear_icache (tramp_addr,
3802 plus_constant (ptr_mode, tramp_addr,
3803 TRAMPOLINE_SIZE)));
3804}
3805
d9d67745
AS
3806/* Implement TARGET_EXPAND_DIVMOD_LIBFUNC.
3807
3808 There are divmod libfuncs for all modes except TImode. They return the
3809 two values packed into a larger integer/vector. */
3810
3811void
3812gcn_expand_divmod_libfunc (rtx libfunc, machine_mode mode, rtx op0, rtx op1,
3813 rtx *quot, rtx *rem)
3814{
3815 machine_mode innermode = (VECTOR_MODE_P (mode)
3816 ? GET_MODE_INNER (mode) : mode);
3817 machine_mode wideinnermode = VOIDmode;
3818 machine_mode widemode = VOIDmode;
3819
3820 switch (innermode)
3821 {
3822 case E_QImode:
3823 case E_HImode:
3824 case E_SImode:
3825 wideinnermode = DImode;
3826 break;
3827 case E_DImode:
3828 wideinnermode = TImode;
3829 break;
3830 default:
3831 gcc_unreachable ();
3832 }
3833
3834 if (VECTOR_MODE_P (mode))
3835 widemode = VnMODE (GET_MODE_NUNITS (mode), wideinnermode);
3836 else
3837 widemode = wideinnermode;
3838
3839 emit_library_call_value (libfunc, gen_rtx_REG (widemode, RETURN_VALUE_REG),
3840 LCT_NORMAL, widemode, op0, mode, op1, mode);
3841
3842 *quot = gen_rtx_REG (mode, RETURN_VALUE_REG);
3843 *rem = gen_rtx_REG (mode,
3844 RETURN_VALUE_REG + (wideinnermode == TImode ? 2 : 1));
3845}
3846
5326695a
AS
3847/* }}} */
3848/* {{{ Miscellaneous. */
3849
3850/* Implement TARGET_CANNOT_COPY_INSN_P.
3851
3852 Return true if INSN must not be duplicated. */
3853
3854static bool
3855gcn_cannot_copy_insn_p (rtx_insn *insn)
3856{
3857 if (recog_memoized (insn) == CODE_FOR_gcn_wavefront_barrier)
3858 return true;
3859
3860 return false;
3861}
3862
3863/* Implement TARGET_DEBUG_UNWIND_INFO.
3864
3865 Defines the mechanism that will be used for describing frame unwind
3866 information to the debugger. */
3867
3868static enum unwind_info_type
3869gcn_debug_unwind_info ()
3870{
251697a6 3871 return UI_DWARF2;
5326695a
AS
3872}
3873
3874/* Determine if there is a suitable hardware conversion instruction.
3875 Used primarily by the machine description. */
3876
3877bool
3878gcn_valid_cvt_p (machine_mode from, machine_mode to, enum gcn_cvt_t op)
3879{
3880 if (VECTOR_MODE_P (from) != VECTOR_MODE_P (to))
3881 return false;
3882
3883 if (VECTOR_MODE_P (from))
3884 {
45381d6f
AS
3885 if (GET_MODE_NUNITS (from) != GET_MODE_NUNITS (to))
3886 return false;
3887
5326695a
AS
3888 from = GET_MODE_INNER (from);
3889 to = GET_MODE_INNER (to);
3890 }
3891
3892 switch (op)
3893 {
3894 case fix_trunc_cvt:
3895 case fixuns_trunc_cvt:
3896 if (GET_MODE_CLASS (from) != MODE_FLOAT
3897 || GET_MODE_CLASS (to) != MODE_INT)
3898 return false;
3899 break;
3900 case float_cvt:
3901 case floatuns_cvt:
3902 if (GET_MODE_CLASS (from) != MODE_INT
3903 || GET_MODE_CLASS (to) != MODE_FLOAT)
3904 return false;
3905 break;
3906 case extend_cvt:
3907 if (GET_MODE_CLASS (from) != MODE_FLOAT
3908 || GET_MODE_CLASS (to) != MODE_FLOAT
3909 || GET_MODE_SIZE (from) >= GET_MODE_SIZE (to))
3910 return false;
3911 break;
3912 case trunc_cvt:
3913 if (GET_MODE_CLASS (from) != MODE_FLOAT
3914 || GET_MODE_CLASS (to) != MODE_FLOAT
3915 || GET_MODE_SIZE (from) <= GET_MODE_SIZE (to))
3916 return false;
3917 break;
3918 }
3919
3920 return ((to == HImode && from == HFmode)
3921 || (to == SImode && (from == SFmode || from == DFmode))
3922 || (to == HFmode && (from == HImode || from == SFmode))
3923 || (to == SFmode && (from == SImode || from == HFmode
3924 || from == DFmode))
3925 || (to == DFmode && (from == SImode || from == SFmode)));
3926}
3927
76d46331
KCY
3928/* Implement TARGET_EMUTLS_VAR_INIT.
3929
3930 Disable emutls (gthr-gcn.h does not support it, yet). */
3931
3932tree
3933gcn_emutls_var_init (tree, tree decl, tree)
3934{
3935 sorry_at (DECL_SOURCE_LOCATION (decl), "TLS is not implemented for GCN.");
6349e4f0 3936 return NULL_TREE;
76d46331
KCY
3937}
3938
5326695a
AS
3939/* }}} */
3940/* {{{ Costs. */
3941
3942/* Implement TARGET_RTX_COSTS.
3943
3944 Compute a (partial) cost for rtx X. Return true if the complete
3945 cost has been computed, and false if subexpressions should be
3946 scanned. In either case, *TOTAL contains the cost result. */
3947
3948static bool
3949gcn_rtx_costs (rtx x, machine_mode, int, int, int *total, bool)
3950{
3951 enum rtx_code code = GET_CODE (x);
3952 switch (code)
3953 {
3954 case CONST:
3955 case CONST_DOUBLE:
3956 case CONST_VECTOR:
3957 case CONST_INT:
3958 if (gcn_inline_constant_p (x))
3959 *total = 0;
3960 else if (code == CONST_INT
3961 && ((unsigned HOST_WIDE_INT) INTVAL (x) + 0x8000) < 0x10000)
3962 *total = 1;
3963 else if (gcn_constant_p (x))
3964 *total = 2;
3965 else
3966 *total = vgpr_vector_mode_p (GET_MODE (x)) ? 64 : 4;
3967 return true;
3968
3969 case DIV:
3970 *total = 100;
3971 return false;
3972
3973 default:
3974 *total = 3;
3975 return false;
3976 }
3977}
3978
3979/* Implement TARGET_MEMORY_MOVE_COST.
3980
3981 Return the cost of moving data of mode M between a
3982 register and memory. A value of 2 is the default; this cost is
3983 relative to those in `REGISTER_MOVE_COST'.
3984
3985 This function is used extensively by register_move_cost that is used to
3986 build tables at startup. Make it inline in this case.
3987 When IN is 2, return maximum of in and out move cost.
3988
3989 If moving between registers and memory is more expensive than
3990 between two registers, you should define this macro to express the
3991 relative cost.
3992
3993 Model also increased moving costs of QImode registers in non
3994 Q_REGS classes. */
3995
3996#define LOAD_COST 32
3997#define STORE_COST 32
3998static int
3999gcn_memory_move_cost (machine_mode mode, reg_class_t regclass, bool in)
4000{
4001 int nregs = CEIL (GET_MODE_SIZE (mode), 4);
4002 switch (regclass)
4003 {
4004 case SCC_CONDITIONAL_REG:
4005 case VCCZ_CONDITIONAL_REG:
4006 case VCC_CONDITIONAL_REG:
4007 case EXECZ_CONDITIONAL_REG:
4008 case ALL_CONDITIONAL_REGS:
4009 case SGPR_REGS:
4010 case SGPR_EXEC_REGS:
4011 case EXEC_MASK_REG:
4012 case SGPR_VOP_SRC_REGS:
4013 case SGPR_MEM_SRC_REGS:
4014 case SGPR_SRC_REGS:
4015 case SGPR_DST_REGS:
4016 case GENERAL_REGS:
4017 case AFP_REGS:
4018 if (!in)
4019 return (STORE_COST + 2) * nregs;
4020 return LOAD_COST * nregs;
4021 case VGPR_REGS:
4022 if (in)
4023 return (LOAD_COST + 2) * nregs;
4024 return STORE_COST * nregs;
ae0d2c24
AS
4025 case AVGPR_REGS:
4026 case ALL_VGPR_REGS:
4027 if (in)
4028 return (LOAD_COST + (TARGET_CDNA2_PLUS ? 2 : 4)) * nregs;
4029 return (STORE_COST + (TARGET_CDNA2_PLUS ? 0 : 2)) * nregs;
5326695a
AS
4030 case ALL_REGS:
4031 case ALL_GPR_REGS:
4032 case SRCDST_REGS:
4033 if (in)
4034 return (LOAD_COST + 2) * nregs;
4035 return (STORE_COST + 2) * nregs;
4036 default:
4037 gcc_unreachable ();
4038 }
4039}
4040
4041/* Implement TARGET_REGISTER_MOVE_COST.
4042
4043 Return the cost of moving data from a register in class CLASS1 to
4044 one in class CLASS2. Base value is 2. */
4045
4046static int
4047gcn_register_move_cost (machine_mode, reg_class_t dst, reg_class_t src)
4048{
ae0d2c24
AS
4049 if (src == AVGPR_REGS)
4050 {
4051 if (dst == AVGPR_REGS)
4052 return TARGET_CDNA1 ? 6 : 2;
4053 if (dst != VGPR_REGS)
4054 return 6;
4055 }
4056 if (dst == AVGPR_REGS && src != VGPR_REGS)
4057 return 6;
5326695a
AS
4058 /* Increase cost of moving from and to vector registers. While this is
4059 fast in hardware (I think), it has hidden cost of setting up the exec
4060 flags. */
4061 if ((src < VGPR_REGS) != (dst < VGPR_REGS))
4062 return 4;
4063 return 2;
4064}
4065
4066/* }}} */
4067/* {{{ Builtins. */
4068
4069/* Type codes used by GCN built-in definitions. */
4070
4071enum gcn_builtin_type_index
4072{
4073 GCN_BTI_END_OF_PARAMS,
4074
4075 GCN_BTI_VOID,
4076 GCN_BTI_BOOL,
4077 GCN_BTI_INT,
4078 GCN_BTI_UINT,
4079 GCN_BTI_SIZE_T,
4080 GCN_BTI_LLINT,
4081 GCN_BTI_LLUINT,
4082 GCN_BTI_EXEC,
4083
4084 GCN_BTI_SF,
4085 GCN_BTI_V64SI,
4086 GCN_BTI_V64SF,
eff73c10 4087 GCN_BTI_V64DF,
5326695a
AS
4088 GCN_BTI_V64PTR,
4089 GCN_BTI_SIPTR,
4090 GCN_BTI_SFPTR,
4091 GCN_BTI_VOIDPTR,
4092
4093 GCN_BTI_LDS_VOIDPTR,
4094
4095 GCN_BTI_MAX
4096};
4097
4098static GTY(()) tree gcn_builtin_types[GCN_BTI_MAX];
4099
4100#define exec_type_node (gcn_builtin_types[GCN_BTI_EXEC])
4101#define sf_type_node (gcn_builtin_types[GCN_BTI_SF])
4102#define v64si_type_node (gcn_builtin_types[GCN_BTI_V64SI])
4103#define v64sf_type_node (gcn_builtin_types[GCN_BTI_V64SF])
eff73c10 4104#define v64df_type_node (gcn_builtin_types[GCN_BTI_V64DF])
5326695a
AS
4105#define v64ptr_type_node (gcn_builtin_types[GCN_BTI_V64PTR])
4106#define siptr_type_node (gcn_builtin_types[GCN_BTI_SIPTR])
4107#define sfptr_type_node (gcn_builtin_types[GCN_BTI_SFPTR])
4108#define voidptr_type_node (gcn_builtin_types[GCN_BTI_VOIDPTR])
4109#define size_t_type_node (gcn_builtin_types[GCN_BTI_SIZE_T])
4110
4111static rtx gcn_expand_builtin_1 (tree, rtx, rtx, machine_mode, int,
4112 struct gcn_builtin_description *);
4113static rtx gcn_expand_builtin_binop (tree, rtx, rtx, machine_mode, int,
4114 struct gcn_builtin_description *);
4115
4116struct gcn_builtin_description;
4117typedef rtx (*gcn_builtin_expander) (tree, rtx, rtx, machine_mode, int,
4118 struct gcn_builtin_description *);
4119
4120enum gcn_builtin_type
4121{
4122 B_UNIMPLEMENTED, /* Sorry out */
4123 B_INSN, /* Emit a pattern */
4124 B_OVERLOAD /* Placeholder for an overloaded function */
4125};
4126
4127struct gcn_builtin_description
4128{
4129 int fcode;
4130 int icode;
4131 const char *name;
4132 enum gcn_builtin_type type;
4133 /* The first element of parm is always the return type. The rest
4134 are a zero terminated list of parameters. */
4135 int parm[6];
4136 gcn_builtin_expander expander;
4137};
4138
4139/* Read in the GCN builtins from gcn-builtins.def. */
4140
4141extern GTY(()) struct gcn_builtin_description gcn_builtins[GCN_BUILTIN_MAX];
4142
4143struct gcn_builtin_description gcn_builtins[] = {
4144#define DEF_BUILTIN(fcode, icode, name, type, params, expander) \
4145 {GCN_BUILTIN_ ## fcode, icode, name, type, params, expander},
4146
4147#define DEF_BUILTIN_BINOP_INT_FP(fcode, ic, name) \
4148 {GCN_BUILTIN_ ## fcode ## _V64SI, \
4149 CODE_FOR_ ## ic ##v64si3_exec, name "_v64int", B_INSN, \
4150 {GCN_BTI_V64SI, GCN_BTI_EXEC, GCN_BTI_V64SI, GCN_BTI_V64SI, \
4151 GCN_BTI_V64SI, GCN_BTI_END_OF_PARAMS}, gcn_expand_builtin_binop}, \
4152 {GCN_BUILTIN_ ## fcode ## _V64SI_unspec, \
4153 CODE_FOR_ ## ic ##v64si3_exec, name "_v64int_unspec", B_INSN, \
4154 {GCN_BTI_V64SI, GCN_BTI_EXEC, GCN_BTI_V64SI, GCN_BTI_V64SI, \
4155 GCN_BTI_END_OF_PARAMS}, gcn_expand_builtin_binop},
4156
4157#include "gcn-builtins.def"
4158#undef DEF_BUILTIN_BINOP_INT_FP
4159#undef DEF_BUILTIN
4160};
4161
4162static GTY(()) tree gcn_builtin_decls[GCN_BUILTIN_MAX];
4163
4164/* Implement TARGET_BUILTIN_DECL.
4165
4166 Return the GCN builtin for CODE. */
4167
4168tree
4169gcn_builtin_decl (unsigned code, bool ARG_UNUSED (initialize_p))
4170{
4171 if (code >= GCN_BUILTIN_MAX)
4172 return error_mark_node;
4173
4174 return gcn_builtin_decls[code];
4175}
4176
4177/* Helper function for gcn_init_builtins. */
4178
4179static void
4180gcn_init_builtin_types (void)
4181{
4182 gcn_builtin_types[GCN_BTI_VOID] = void_type_node;
4183 gcn_builtin_types[GCN_BTI_BOOL] = boolean_type_node;
4184 gcn_builtin_types[GCN_BTI_INT] = intSI_type_node;
4185 gcn_builtin_types[GCN_BTI_UINT] = unsigned_type_for (intSI_type_node);
4186 gcn_builtin_types[GCN_BTI_SIZE_T] = size_type_node;
4187 gcn_builtin_types[GCN_BTI_LLINT] = intDI_type_node;
4188 gcn_builtin_types[GCN_BTI_LLUINT] = unsigned_type_for (intDI_type_node);
4189
4190 exec_type_node = unsigned_intDI_type_node;
4191 sf_type_node = float32_type_node;
4192 v64si_type_node = build_vector_type (intSI_type_node, 64);
4193 v64sf_type_node = build_vector_type (float_type_node, 64);
eff73c10 4194 v64df_type_node = build_vector_type (double_type_node, 64);
5326695a
AS
4195 v64ptr_type_node = build_vector_type (unsigned_intDI_type_node
4196 /*build_pointer_type
4197 (integer_type_node) */
4198 , 64);
4199 tree tmp = build_distinct_type_copy (intSI_type_node);
6f83861c 4200 TYPE_ADDR_SPACE (tmp) = ADDR_SPACE_DEFAULT;
5326695a
AS
4201 siptr_type_node = build_pointer_type (tmp);
4202
4203 tmp = build_distinct_type_copy (float_type_node);
6f83861c 4204 TYPE_ADDR_SPACE (tmp) = ADDR_SPACE_DEFAULT;
5326695a
AS
4205 sfptr_type_node = build_pointer_type (tmp);
4206
4207 tmp = build_distinct_type_copy (void_type_node);
6f83861c 4208 TYPE_ADDR_SPACE (tmp) = ADDR_SPACE_DEFAULT;
5326695a
AS
4209 voidptr_type_node = build_pointer_type (tmp);
4210
4211 tmp = build_distinct_type_copy (void_type_node);
4212 TYPE_ADDR_SPACE (tmp) = ADDR_SPACE_LDS;
4213 gcn_builtin_types[GCN_BTI_LDS_VOIDPTR] = build_pointer_type (tmp);
4214}
4215
4216/* Implement TARGET_INIT_BUILTINS.
4217
4218 Set up all builtin functions for this target. */
4219
4220static void
4221gcn_init_builtins (void)
4222{
4223 gcn_init_builtin_types ();
4224
4225 struct gcn_builtin_description *d;
4226 unsigned int i;
4227 for (i = 0, d = gcn_builtins; i < GCN_BUILTIN_MAX; i++, d++)
4228 {
4229 tree p;
4230 char name[64]; /* build_function will make a copy. */
4231 int parm;
4232
4233 /* FIXME: Is this necessary/useful? */
4234 if (d->name == 0)
4235 continue;
4236
4237 /* Find last parm. */
4238 for (parm = 1; d->parm[parm] != GCN_BTI_END_OF_PARAMS; parm++)
4239 ;
4240
4241 p = void_list_node;
4242 while (parm > 1)
4243 p = tree_cons (NULL_TREE, gcn_builtin_types[d->parm[--parm]], p);
4244
4245 p = build_function_type (gcn_builtin_types[d->parm[0]], p);
4246
4247 sprintf (name, "__builtin_gcn_%s", d->name);
4248 gcn_builtin_decls[i]
4249 = add_builtin_function (name, p, i, BUILT_IN_MD, NULL, NULL_TREE);
4250
4251 /* These builtins don't throw. */
4252 TREE_NOTHROW (gcn_builtin_decls[i]) = 1;
4253 }
4254
5326695a
AS
4255 /* These builtins need to take/return an LDS pointer: override the generic
4256 versions here. */
4257
4258 set_builtin_decl (BUILT_IN_GOACC_SINGLE_START,
4259 gcn_builtin_decls[GCN_BUILTIN_ACC_SINGLE_START], false);
4260
4261 set_builtin_decl (BUILT_IN_GOACC_SINGLE_COPY_START,
4262 gcn_builtin_decls[GCN_BUILTIN_ACC_SINGLE_COPY_START],
4263 false);
4264
4265 set_builtin_decl (BUILT_IN_GOACC_SINGLE_COPY_END,
4266 gcn_builtin_decls[GCN_BUILTIN_ACC_SINGLE_COPY_END],
4267 false);
4268
4269 set_builtin_decl (BUILT_IN_GOACC_BARRIER,
4270 gcn_builtin_decls[GCN_BUILTIN_ACC_BARRIER], false);
5326695a
AS
4271}
4272
a8a730cd
JB
4273/* Implement TARGET_INIT_LIBFUNCS. */
4274
4275static void
4276gcn_init_libfuncs (void)
4277{
4278 /* BITS_PER_UNIT * 2 is 64 bits, which causes
e53b6e56 4279 optabs-libfuncs.cc:gen_int_libfunc to omit TImode (i.e 128 bits)
a8a730cd
JB
4280 libcalls that we need to support operations for that type. Initialise
4281 them here instead. */
4282 set_optab_libfunc (udiv_optab, TImode, "__udivti3");
4283 set_optab_libfunc (umod_optab, TImode, "__umodti3");
4284 set_optab_libfunc (sdiv_optab, TImode, "__divti3");
4285 set_optab_libfunc (smod_optab, TImode, "__modti3");
4286 set_optab_libfunc (smul_optab, TImode, "__multi3");
4287 set_optab_libfunc (addv_optab, TImode, "__addvti3");
4288 set_optab_libfunc (subv_optab, TImode, "__subvti3");
4289 set_optab_libfunc (negv_optab, TImode, "__negvti2");
4290 set_optab_libfunc (absv_optab, TImode, "__absvti2");
4291 set_optab_libfunc (smulv_optab, TImode, "__mulvti3");
4292 set_optab_libfunc (ffs_optab, TImode, "__ffsti2");
4293 set_optab_libfunc (clz_optab, TImode, "__clzti2");
4294 set_optab_libfunc (ctz_optab, TImode, "__ctzti2");
4295 set_optab_libfunc (clrsb_optab, TImode, "__clrsbti2");
4296 set_optab_libfunc (popcount_optab, TImode, "__popcountti2");
4297 set_optab_libfunc (parity_optab, TImode, "__parityti2");
4298 set_optab_libfunc (bswap_optab, TImode, "__bswapti2");
d9d67745
AS
4299
4300 set_optab_libfunc (sdivmod_optab, SImode, "__divmodsi4");
4301 set_optab_libfunc (udivmod_optab, SImode, "__udivmodsi4");
4302 set_optab_libfunc (sdivmod_optab, DImode, "__divmoddi4");
4303 set_optab_libfunc (udivmod_optab, DImode, "__udivmoddi4");
4304
4305 set_optab_libfunc (sdiv_optab, V2QImode, "__divv2qi3");
4306 set_optab_libfunc (udiv_optab, V2QImode, "__udivv2qi3");
4307 set_optab_libfunc (smod_optab, V2QImode, "__modv2qi3");
4308 set_optab_libfunc (umod_optab, V2QImode, "__umodv2qi3");
4309#if 0
4310 set_optab_libfunc (sdivmod_optab, V2QImode, "__divmodv2qi4");
4311 set_optab_libfunc (udivmod_optab, V2QImode, "__udivmodv2qi4");
4312#endif
4313 set_optab_libfunc (sdiv_optab, V4QImode, "__divv4qi3");
4314 set_optab_libfunc (udiv_optab, V4QImode, "__udivv4qi3");
4315 set_optab_libfunc (smod_optab, V4QImode, "__modv4qi3");
4316 set_optab_libfunc (umod_optab, V4QImode, "__umodv4qi3");
4317#if 0
4318 set_optab_libfunc (sdivmod_optab, V4QImode, "__divmodv4qi4");
4319 set_optab_libfunc (udivmod_optab, V4QImode, "__udivmodv4qi4");
4320#endif
4321 set_optab_libfunc (sdiv_optab, V8QImode, "__divv8qi3");
4322 set_optab_libfunc (udiv_optab, V8QImode, "__udivv8qi3");
4323 set_optab_libfunc (smod_optab, V8QImode, "__modv8qi3");
4324 set_optab_libfunc (umod_optab, V8QImode, "__umodv8qi3");
4325#if 0
4326 set_optab_libfunc (sdivmod_optab, V8QImode, "__divmodv8qi4");
4327 set_optab_libfunc (udivmod_optab, V8QImode, "__udivmodv8qi4");
4328#endif
4329 set_optab_libfunc (sdiv_optab, V16QImode, "__divv16qi3");
4330 set_optab_libfunc (udiv_optab, V16QImode, "__udivv16qi3");
4331 set_optab_libfunc (smod_optab, V16QImode, "__modv16qi3");
4332 set_optab_libfunc (umod_optab, V16QImode, "__umodv16qi3");
4333#if 0
4334 set_optab_libfunc (sdivmod_optab, V16QImode, "__divmodv16qi4");
4335 set_optab_libfunc (udivmod_optab, V16QImode, "__udivmodv16qi4");
4336#endif
4337 set_optab_libfunc (sdiv_optab, V32QImode, "__divv32qi3");
4338 set_optab_libfunc (udiv_optab, V32QImode, "__udivv32qi3");
4339 set_optab_libfunc (smod_optab, V32QImode, "__modv32qi3");
4340 set_optab_libfunc (umod_optab, V32QImode, "__umodv32qi3");
4341#if 0
4342 set_optab_libfunc (sdivmod_optab, V32QImode, "__divmodv32qi4");
4343 set_optab_libfunc (udivmod_optab, V32QImode, "__udivmodv32qi4");
4344#endif
4345 set_optab_libfunc (sdiv_optab, V64QImode, "__divv64qi3");
4346 set_optab_libfunc (udiv_optab, V64QImode, "__udivv64qi3");
4347 set_optab_libfunc (smod_optab, V64QImode, "__modv64qi3");
4348 set_optab_libfunc (umod_optab, V64QImode, "__umodv64qi3");
4349#if 0
4350 set_optab_libfunc (sdivmod_optab, V64QImode, "__divmodv64qi4");
4351 set_optab_libfunc (udivmod_optab, V64QImode, "__udivmodv64qi4");
4352#endif
4353
4354 set_optab_libfunc (sdiv_optab, V2HImode, "__divv2hi3");
4355 set_optab_libfunc (udiv_optab, V2HImode, "__udivv2hi3");
4356 set_optab_libfunc (smod_optab, V2HImode, "__modv2hi3");
4357 set_optab_libfunc (umod_optab, V2HImode, "__umodv2hi3");
4358#if 0
4359 set_optab_libfunc (sdivmod_optab, V2HImode, "__divmodv2hi4");
4360 set_optab_libfunc (udivmod_optab, V2HImode, "__udivmodv2hi4");
4361#endif
4362 set_optab_libfunc (sdiv_optab, V4HImode, "__divv4hi3");
4363 set_optab_libfunc (udiv_optab, V4HImode, "__udivv4hi3");
4364 set_optab_libfunc (smod_optab, V4HImode, "__modv4hi3");
4365 set_optab_libfunc (umod_optab, V4HImode, "__umodv4hi3");
4366#if 0
4367 set_optab_libfunc (sdivmod_optab, V4HImode, "__divmodv4hi4");
4368 set_optab_libfunc (udivmod_optab, V4HImode, "__udivmodv4hi4");
4369#endif
4370 set_optab_libfunc (sdiv_optab, V8HImode, "__divv8hi3");
4371 set_optab_libfunc (udiv_optab, V8HImode, "__udivv8hi3");
4372 set_optab_libfunc (smod_optab, V8HImode, "__modv8hi3");
4373 set_optab_libfunc (umod_optab, V8HImode, "__umodv8hi3");
4374#if 0
4375 set_optab_libfunc (sdivmod_optab, V8HImode, "__divmodv8hi4");
4376 set_optab_libfunc (udivmod_optab, V8HImode, "__udivmodv8hi4");
4377#endif
4378 set_optab_libfunc (sdiv_optab, V16HImode, "__divv16hi3");
4379 set_optab_libfunc (udiv_optab, V16HImode, "__udivv16hi3");
4380 set_optab_libfunc (smod_optab, V16HImode, "__modv16hi3");
4381 set_optab_libfunc (umod_optab, V16HImode, "__umodv16hi3");
4382#if 0
4383 set_optab_libfunc (sdivmod_optab, V16HImode, "__divmodv16hi4");
4384 set_optab_libfunc (udivmod_optab, V16HImode, "__udivmodv16hi4");
4385#endif
4386 set_optab_libfunc (sdiv_optab, V32HImode, "__divv32hi3");
4387 set_optab_libfunc (udiv_optab, V32HImode, "__udivv32hi3");
4388 set_optab_libfunc (smod_optab, V32HImode, "__modv32hi3");
4389 set_optab_libfunc (umod_optab, V32HImode, "__umodv32hi3");
4390#if 0
4391 set_optab_libfunc (sdivmod_optab, V32HImode, "__divmodv32hi4");
4392 set_optab_libfunc (udivmod_optab, V32HImode, "__udivmodv32hi4");
4393#endif
4394 set_optab_libfunc (sdiv_optab, V64HImode, "__divv64hi3");
4395 set_optab_libfunc (udiv_optab, V64HImode, "__udivv64hi3");
4396 set_optab_libfunc (smod_optab, V64HImode, "__modv64hi3");
4397 set_optab_libfunc (umod_optab, V64HImode, "__umodv64hi3");
4398#if 0
4399 set_optab_libfunc (sdivmod_optab, V64HImode, "__divmodv64hi4");
4400 set_optab_libfunc (udivmod_optab, V64HImode, "__udivmodv64hi4");
4401#endif
4402
4403 set_optab_libfunc (sdiv_optab, V2SImode, "__divv2si3");
4404 set_optab_libfunc (udiv_optab, V2SImode, "__udivv2si3");
4405 set_optab_libfunc (smod_optab, V2SImode, "__modv2si3");
4406 set_optab_libfunc (umod_optab, V2SImode, "__umodv2si3");
4407#if 0
4408 set_optab_libfunc (sdivmod_optab, V2SImode, "__divmodv2si4");
4409 set_optab_libfunc (udivmod_optab, V2SImode, "__udivmodv2si4");
4410#endif
4411 set_optab_libfunc (sdiv_optab, V4SImode, "__divv4si3");
4412 set_optab_libfunc (udiv_optab, V4SImode, "__udivv4si3");
4413 set_optab_libfunc (smod_optab, V4SImode, "__modv4si3");
4414 set_optab_libfunc (umod_optab, V4SImode, "__umodv4si3");
4415#if 0
4416 set_optab_libfunc (sdivmod_optab, V4SImode, "__divmodv4si4");
4417 set_optab_libfunc (udivmod_optab, V4SImode, "__udivmodv4si4");
4418#endif
4419 set_optab_libfunc (sdiv_optab, V8SImode, "__divv8si3");
4420 set_optab_libfunc (udiv_optab, V8SImode, "__udivv8si3");
4421 set_optab_libfunc (smod_optab, V8SImode, "__modv8si3");
4422 set_optab_libfunc (umod_optab, V8SImode, "__umodv8si3");
4423#if 0
4424 set_optab_libfunc (sdivmod_optab, V8SImode, "__divmodv8si4");
4425 set_optab_libfunc (udivmod_optab, V8SImode, "__udivmodv8si4");
4426#endif
4427 set_optab_libfunc (sdiv_optab, V16SImode, "__divv16si3");
4428 set_optab_libfunc (udiv_optab, V16SImode, "__udivv16si3");
4429 set_optab_libfunc (smod_optab, V16SImode, "__modv16si3");
4430 set_optab_libfunc (umod_optab, V16SImode, "__umodv16si3");
4431#if 0
4432 set_optab_libfunc (sdivmod_optab, V16SImode, "__divmodv16si4");
4433 set_optab_libfunc (udivmod_optab, V16SImode, "__udivmodv16si4");
4434#endif
4435 set_optab_libfunc (sdiv_optab, V32SImode, "__divv32si3");
4436 set_optab_libfunc (udiv_optab, V32SImode, "__udivv32si3");
4437 set_optab_libfunc (smod_optab, V32SImode, "__modv32si3");
4438 set_optab_libfunc (umod_optab, V32SImode, "__umodv32si3");
4439#if 0
4440 set_optab_libfunc (sdivmod_optab, V32SImode, "__divmodv32si4");
4441 set_optab_libfunc (udivmod_optab, V32SImode, "__udivmodv32si4");
4442#endif
4443 set_optab_libfunc (sdiv_optab, V64SImode, "__divv64si3");
4444 set_optab_libfunc (udiv_optab, V64SImode, "__udivv64si3");
4445 set_optab_libfunc (smod_optab, V64SImode, "__modv64si3");
4446 set_optab_libfunc (umod_optab, V64SImode, "__umodv64si3");
4447#if 0
4448 set_optab_libfunc (sdivmod_optab, V64SImode, "__divmodv64si4");
4449 set_optab_libfunc (udivmod_optab, V64SImode, "__udivmodv64si4");
4450#endif
4451
4452 set_optab_libfunc (sdiv_optab, V2DImode, "__divv2di3");
4453 set_optab_libfunc (udiv_optab, V2DImode, "__udivv2di3");
4454 set_optab_libfunc (smod_optab, V2DImode, "__modv2di3");
4455 set_optab_libfunc (umod_optab, V2DImode, "__umodv2di3");
4456#if 0
4457 set_optab_libfunc (sdivmod_optab, V2DImode, "__divmodv2di4");
4458 set_optab_libfunc (udivmod_optab, V2DImode, "__udivmodv2di4");
4459#endif
4460 set_optab_libfunc (sdiv_optab, V4DImode, "__divv4di3");
4461 set_optab_libfunc (udiv_optab, V4DImode, "__udivv4di3");
4462 set_optab_libfunc (smod_optab, V4DImode, "__modv4di3");
4463 set_optab_libfunc (umod_optab, V4DImode, "__umodv4di3");
4464#if 0
4465 set_optab_libfunc (sdivmod_optab, V4DImode, "__divmodv4di4");
4466 set_optab_libfunc (udivmod_optab, V4DImode, "__udivmodv4di4");
4467#endif
4468 set_optab_libfunc (sdiv_optab, V8DImode, "__divv8di3");
4469 set_optab_libfunc (udiv_optab, V8DImode, "__udivv8di3");
4470 set_optab_libfunc (smod_optab, V8DImode, "__modv8di3");
4471 set_optab_libfunc (umod_optab, V8DImode, "__umodv8di3");
4472#if 0
4473 set_optab_libfunc (sdivmod_optab, V8DImode, "__divmodv8di4");
4474 set_optab_libfunc (udivmod_optab, V8DImode, "__udivmodv8di4");
4475#endif
4476 set_optab_libfunc (sdiv_optab, V16DImode, "__divv16di3");
4477 set_optab_libfunc (udiv_optab, V16DImode, "__udivv16di3");
4478 set_optab_libfunc (smod_optab, V16DImode, "__modv16di3");
4479 set_optab_libfunc (umod_optab, V16DImode, "__umodv16di3");
4480#if 0
4481 set_optab_libfunc (sdivmod_optab, V16DImode, "__divmodv16di4");
4482 set_optab_libfunc (udivmod_optab, V16DImode, "__udivmodv16di4");
4483#endif
4484 set_optab_libfunc (sdiv_optab, V32DImode, "__divv32di3");
4485 set_optab_libfunc (udiv_optab, V32DImode, "__udivv32di3");
4486 set_optab_libfunc (smod_optab, V32DImode, "__modv32di3");
4487 set_optab_libfunc (umod_optab, V32DImode, "__umodv32di3");
4488#if 0
4489 set_optab_libfunc (sdivmod_optab, V32DImode, "__divmodv32di4");
4490 set_optab_libfunc (udivmod_optab, V32DImode, "__udivmodv32di4");
4491#endif
4492 set_optab_libfunc (sdiv_optab, V64DImode, "__divv64di3");
4493 set_optab_libfunc (udiv_optab, V64DImode, "__udivv64di3");
4494 set_optab_libfunc (smod_optab, V64DImode, "__modv64di3");
4495 set_optab_libfunc (umod_optab, V64DImode, "__umodv64di3");
4496#if 0
4497 set_optab_libfunc (sdivmod_optab, V64DImode, "__divmodv64di4");
4498 set_optab_libfunc (udivmod_optab, V64DImode, "__udivmodv64di4");
4499#endif
a8a730cd
JB
4500}
4501
5326695a
AS
4502/* Expand the CMP_SWAP GCN builtins. We have our own versions that do
4503 not require taking the address of any object, other than the memory
4504 cell being operated on.
4505
4506 Helper function for gcn_expand_builtin_1. */
4507
4508static rtx
4509gcn_expand_cmp_swap (tree exp, rtx target)
4510{
4511 machine_mode mode = TYPE_MODE (TREE_TYPE (exp));
4512 addr_space_t as
4513 = TYPE_ADDR_SPACE (TREE_TYPE (TREE_TYPE (CALL_EXPR_ARG (exp, 0))));
4514 machine_mode as_mode = gcn_addr_space_address_mode (as);
4515
4516 if (!target)
4517 target = gen_reg_rtx (mode);
4518
4519 rtx addr = expand_expr (CALL_EXPR_ARG (exp, 0),
4520 NULL_RTX, as_mode, EXPAND_NORMAL);
4521 rtx cmp = expand_expr (CALL_EXPR_ARG (exp, 1),
4522 NULL_RTX, mode, EXPAND_NORMAL);
4523 rtx src = expand_expr (CALL_EXPR_ARG (exp, 2),
4524 NULL_RTX, mode, EXPAND_NORMAL);
4525 rtx pat;
4526
4527 rtx mem = gen_rtx_MEM (mode, force_reg (as_mode, addr));
4528 set_mem_addr_space (mem, as);
4529
4530 if (!REG_P (cmp))
4531 cmp = copy_to_mode_reg (mode, cmp);
4532 if (!REG_P (src))
4533 src = copy_to_mode_reg (mode, src);
4534
4535 if (mode == SImode)
4536 pat = gen_sync_compare_and_swapsi (target, mem, cmp, src);
4537 else
4538 pat = gen_sync_compare_and_swapdi (target, mem, cmp, src);
4539
4540 emit_insn (pat);
4541
4542 return target;
4543}
4544
4545/* Expand many different builtins.
4546
4547 Intended for use in gcn-builtins.def. */
4548
4549static rtx
4550gcn_expand_builtin_1 (tree exp, rtx target, rtx /*subtarget */ ,
4551 machine_mode /*mode */ , int ignore,
4552 struct gcn_builtin_description *)
4553{
4554 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
4d732405 4555 switch (DECL_MD_FUNCTION_CODE (fndecl))
5326695a
AS
4556 {
4557 case GCN_BUILTIN_FLAT_LOAD_INT32:
4558 {
4559 if (ignore)
4560 return target;
4561 /*rtx exec = */
4562 force_reg (DImode,
4563 expand_expr (CALL_EXPR_ARG (exp, 0), NULL_RTX, DImode,
4564 EXPAND_NORMAL));
4565 /*rtx ptr = */
4566 force_reg (V64DImode,
4567 expand_expr (CALL_EXPR_ARG (exp, 1), NULL_RTX, V64DImode,
4568 EXPAND_NORMAL));
4569 /*emit_insn (gen_vector_flat_loadv64si
4570 (target, gcn_gen_undef (V64SImode), ptr, exec)); */
4571 return target;
4572 }
4573 case GCN_BUILTIN_FLAT_LOAD_PTR_INT32:
4574 case GCN_BUILTIN_FLAT_LOAD_PTR_FLOAT:
4575 {
4576 if (ignore)
4577 return target;
4578 rtx exec = force_reg (DImode,
4579 expand_expr (CALL_EXPR_ARG (exp, 0), NULL_RTX,
4580 DImode,
4581 EXPAND_NORMAL));
4582 rtx ptr = force_reg (DImode,
4583 expand_expr (CALL_EXPR_ARG (exp, 1), NULL_RTX,
4584 V64DImode,
4585 EXPAND_NORMAL));
4586 rtx offsets = force_reg (V64SImode,
4587 expand_expr (CALL_EXPR_ARG (exp, 2),
4588 NULL_RTX, V64DImode,
4589 EXPAND_NORMAL));
4590 rtx addrs = gen_reg_rtx (V64DImode);
4591 rtx tmp = gen_reg_rtx (V64SImode);
4592 emit_insn (gen_ashlv64si3_exec (tmp, offsets,
4593 GEN_INT (2),
4594 gcn_gen_undef (V64SImode), exec));
4595 emit_insn (gen_addv64di3_zext_dup2_exec (addrs, tmp, ptr,
4596 gcn_gen_undef (V64DImode),
4597 exec));
4598 rtx mem = gen_rtx_MEM (GET_MODE (target), addrs);
4599 /*set_mem_addr_space (mem, ADDR_SPACE_FLAT); */
4600 /* FIXME: set attributes. */
45381d6f 4601 emit_insn (gen_movvNm (target, mem, NULL, exec));
5326695a
AS
4602 return target;
4603 }
4604 case GCN_BUILTIN_FLAT_STORE_PTR_INT32:
4605 case GCN_BUILTIN_FLAT_STORE_PTR_FLOAT:
4606 {
4607 rtx exec = force_reg (DImode,
4608 expand_expr (CALL_EXPR_ARG (exp, 0), NULL_RTX,
4609 DImode,
4610 EXPAND_NORMAL));
4611 rtx ptr = force_reg (DImode,
4612 expand_expr (CALL_EXPR_ARG (exp, 1), NULL_RTX,
4613 V64DImode,
4614 EXPAND_NORMAL));
4615 rtx offsets = force_reg (V64SImode,
4616 expand_expr (CALL_EXPR_ARG (exp, 2),
4617 NULL_RTX, V64DImode,
4618 EXPAND_NORMAL));
4619 machine_mode vmode = TYPE_MODE (TREE_TYPE (CALL_EXPR_ARG (exp,
4620 3)));
4621 rtx val = force_reg (vmode,
4622 expand_expr (CALL_EXPR_ARG (exp, 3), NULL_RTX,
4623 vmode,
4624 EXPAND_NORMAL));
4625 rtx addrs = gen_reg_rtx (V64DImode);
4626 rtx tmp = gen_reg_rtx (V64SImode);
4627 emit_insn (gen_ashlv64si3_exec (tmp, offsets,
4628 GEN_INT (2),
4629 gcn_gen_undef (V64SImode), exec));
4630 emit_insn (gen_addv64di3_zext_dup2_exec (addrs, tmp, ptr,
4631 gcn_gen_undef (V64DImode),
4632 exec));
4633 rtx mem = gen_rtx_MEM (vmode, addrs);
4634 /*set_mem_addr_space (mem, ADDR_SPACE_FLAT); */
4635 /* FIXME: set attributes. */
45381d6f 4636 emit_insn (gen_movvNm (mem, val, NULL, exec));
5326695a
AS
4637 return target;
4638 }
4639 case GCN_BUILTIN_SQRTVF:
4640 {
4641 if (ignore)
4642 return target;
5326695a
AS
4643 rtx arg = force_reg (V64SFmode,
4644 expand_expr (CALL_EXPR_ARG (exp, 0), NULL_RTX,
4645 V64SFmode,
4646 EXPAND_NORMAL));
45381d6f 4647 emit_insn (gen_sqrtv64sf2 (target, arg));
5326695a
AS
4648 return target;
4649 }
4650 case GCN_BUILTIN_SQRTF:
4651 {
4652 if (ignore)
4653 return target;
4654 rtx arg = force_reg (SFmode,
4655 expand_expr (CALL_EXPR_ARG (exp, 0), NULL_RTX,
4656 SFmode,
4657 EXPAND_NORMAL));
4658 emit_insn (gen_sqrtsf2 (target, arg));
4659 return target;
4660 }
eff73c10
KCY
4661 case GCN_BUILTIN_FABSVF:
4662 {
4663 if (ignore)
4664 return target;
eff73c10
KCY
4665 rtx arg = force_reg (V64SFmode,
4666 expand_expr (CALL_EXPR_ARG (exp, 0), NULL_RTX,
4667 V64SFmode,
4668 EXPAND_NORMAL));
45381d6f 4669 emit_insn (gen_absv64sf2 (target, arg));
eff73c10
KCY
4670 return target;
4671 }
ee2be8f3
KCY
4672 case GCN_BUILTIN_FABSV:
4673 {
4674 if (ignore)
4675 return target;
4676 rtx arg = force_reg (V64DFmode,
4677 expand_expr (CALL_EXPR_ARG (exp, 0), NULL_RTX,
4678 V64DFmode,
4679 EXPAND_NORMAL));
4680 emit_insn (gen_absv64df2 (target, arg));
4681 return target;
4682 }
4683 case GCN_BUILTIN_FLOORVF:
4684 {
4685 if (ignore)
4686 return target;
4687 rtx arg = force_reg (V64SFmode,
4688 expand_expr (CALL_EXPR_ARG (exp, 0), NULL_RTX,
4689 V64SFmode,
4690 EXPAND_NORMAL));
4691 emit_insn (gen_floorv64sf2 (target, arg));
4692 return target;
4693 }
4694 case GCN_BUILTIN_FLOORV:
4695 {
4696 if (ignore)
4697 return target;
4698 rtx arg = force_reg (V64DFmode,
4699 expand_expr (CALL_EXPR_ARG (exp, 0), NULL_RTX,
4700 V64DFmode,
4701 EXPAND_NORMAL));
4702 emit_insn (gen_floorv64df2 (target, arg));
4703 return target;
4704 }
eff73c10
KCY
4705 case GCN_BUILTIN_LDEXPVF:
4706 {
4707 if (ignore)
4708 return target;
eff73c10
KCY
4709 rtx arg1 = force_reg (V64SFmode,
4710 expand_expr (CALL_EXPR_ARG (exp, 0), NULL_RTX,
4711 V64SFmode,
4712 EXPAND_NORMAL));
4713 rtx arg2 = force_reg (V64SImode,
4714 expand_expr (CALL_EXPR_ARG (exp, 1), NULL_RTX,
4715 V64SImode,
4716 EXPAND_NORMAL));
45381d6f 4717 emit_insn (gen_ldexpv64sf3 (target, arg1, arg2));
eff73c10
KCY
4718 return target;
4719 }
4720 case GCN_BUILTIN_LDEXPV:
4721 {
4722 if (ignore)
4723 return target;
eff73c10
KCY
4724 rtx arg1 = force_reg (V64DFmode,
4725 expand_expr (CALL_EXPR_ARG (exp, 0), NULL_RTX,
cb0a2b1f 4726 V64DFmode,
eff73c10
KCY
4727 EXPAND_NORMAL));
4728 rtx arg2 = force_reg (V64SImode,
4729 expand_expr (CALL_EXPR_ARG (exp, 1), NULL_RTX,
4730 V64SImode,
4731 EXPAND_NORMAL));
45381d6f 4732 emit_insn (gen_ldexpv64df3 (target, arg1, arg2));
eff73c10
KCY
4733 return target;
4734 }
4735 case GCN_BUILTIN_FREXPVF_EXP:
4736 {
4737 if (ignore)
4738 return target;
eff73c10
KCY
4739 rtx arg = force_reg (V64SFmode,
4740 expand_expr (CALL_EXPR_ARG (exp, 0), NULL_RTX,
4741 V64SFmode,
4742 EXPAND_NORMAL));
45381d6f 4743 emit_insn (gen_frexpv64sf_exp2 (target, arg));
eff73c10
KCY
4744 return target;
4745 }
4746 case GCN_BUILTIN_FREXPVF_MANT:
4747 {
4748 if (ignore)
4749 return target;
eff73c10
KCY
4750 rtx arg = force_reg (V64SFmode,
4751 expand_expr (CALL_EXPR_ARG (exp, 0), NULL_RTX,
4752 V64SFmode,
4753 EXPAND_NORMAL));
45381d6f 4754 emit_insn (gen_frexpv64sf_mant2 (target, arg));
eff73c10
KCY
4755 return target;
4756 }
4757 case GCN_BUILTIN_FREXPV_EXP:
4758 {
4759 if (ignore)
4760 return target;
eff73c10
KCY
4761 rtx arg = force_reg (V64DFmode,
4762 expand_expr (CALL_EXPR_ARG (exp, 0), NULL_RTX,
4763 V64DFmode,
4764 EXPAND_NORMAL));
45381d6f 4765 emit_insn (gen_frexpv64df_exp2 (target, arg));
eff73c10
KCY
4766 return target;
4767 }
4768 case GCN_BUILTIN_FREXPV_MANT:
4769 {
4770 if (ignore)
4771 return target;
eff73c10
KCY
4772 rtx arg = force_reg (V64DFmode,
4773 expand_expr (CALL_EXPR_ARG (exp, 0), NULL_RTX,
4774 V64DFmode,
4775 EXPAND_NORMAL));
45381d6f 4776 emit_insn (gen_frexpv64df_mant2 (target, arg));
eff73c10
KCY
4777 return target;
4778 }
5326695a
AS
4779 case GCN_BUILTIN_OMP_DIM_SIZE:
4780 {
4781 if (ignore)
4782 return target;
4783 emit_insn (gen_oacc_dim_size (target,
4784 expand_expr (CALL_EXPR_ARG (exp, 0),
4785 NULL_RTX, SImode,
4786 EXPAND_NORMAL)));
4787 return target;
4788 }
4789 case GCN_BUILTIN_OMP_DIM_POS:
4790 {
4791 if (ignore)
4792 return target;
4793 emit_insn (gen_oacc_dim_pos (target,
4794 expand_expr (CALL_EXPR_ARG (exp, 0),
4795 NULL_RTX, SImode,
4796 EXPAND_NORMAL)));
4797 return target;
4798 }
4799 case GCN_BUILTIN_CMP_SWAP:
4800 case GCN_BUILTIN_CMP_SWAPLL:
4801 return gcn_expand_cmp_swap (exp, target);
4802
4803 case GCN_BUILTIN_ACC_SINGLE_START:
4804 {
4805 if (ignore)
4806 return target;
4807
4808 rtx wavefront = gcn_oacc_dim_pos (1);
4809 rtx cond = gen_rtx_EQ (VOIDmode, wavefront, const0_rtx);
4810 rtx cc = (target && REG_P (target)) ? target : gen_reg_rtx (BImode);
4811 emit_insn (gen_cstoresi4 (cc, cond, wavefront, const0_rtx));
4812 return cc;
4813 }
4814
4815 case GCN_BUILTIN_ACC_SINGLE_COPY_START:
4816 {
4817 rtx blk = force_reg (SImode,
4818 expand_expr (CALL_EXPR_ARG (exp, 0), NULL_RTX,
4819 SImode, EXPAND_NORMAL));
4820 rtx wavefront = gcn_oacc_dim_pos (1);
4821 rtx cond = gen_rtx_NE (VOIDmode, wavefront, const0_rtx);
4822 rtx not_zero = gen_label_rtx ();
4823 emit_insn (gen_cbranchsi4 (cond, wavefront, const0_rtx, not_zero));
4824 emit_move_insn (blk, const0_rtx);
4825 emit_label (not_zero);
4826 return blk;
4827 }
4828
4829 case GCN_BUILTIN_ACC_SINGLE_COPY_END:
4830 return target;
4831
4832 case GCN_BUILTIN_ACC_BARRIER:
4833 emit_insn (gen_gcn_wavefront_barrier ());
4834 return target;
4835
d6bbca7b
TB
4836 case GCN_BUILTIN_GET_STACK_LIMIT:
4837 {
4838 /* stackbase = (stack_segment_decr & 0x0000ffffffffffff)
4839 + stack_wave_offset);
4840 seg_size = dispatch_ptr->private_segment_size;
4841 stacklimit = stackbase + seg_size*64;
4842 with segsize = *(uint32_t *) ((char *) dispatch_ptr
4843 + 6*sizeof(int16_t) + 3*sizeof(int32_t));
4844 cf. struct hsa_kernel_dispatch_packet_s in the HSA doc. */
4845 rtx ptr;
4846 if (cfun->machine->args.reg[DISPATCH_PTR_ARG] >= 0
f6fff8a6 4847 && cfun->machine->args.reg[KERNARG_SEGMENT_PTR_ARG] >= 0)
d6bbca7b 4848 {
f6fff8a6
AS
4849 rtx num_waves_mem = gcn_oacc_dim_size (1);
4850 rtx num_waves = gen_reg_rtx (SImode);
4851 set_mem_addr_space (num_waves_mem, ADDR_SPACE_SCALAR_FLAT);
4852 emit_move_insn (num_waves, num_waves_mem);
4853
4854 rtx workgroup_num = gcn_oacc_dim_pos (0);
4855 rtx wave_num = gen_reg_rtx (SImode);
4856 emit_move_insn(wave_num, gcn_oacc_dim_pos (1));
4857
4858 rtx thread_id = gen_reg_rtx (SImode);
4859 emit_insn (gen_mulsi3 (thread_id, num_waves, workgroup_num));
4860 emit_insn (gen_addsi3_scc (thread_id, thread_id, wave_num));
4861
4862 rtx kernarg_reg = gen_rtx_REG (DImode, cfun->machine->args.reg
4863 [KERNARG_SEGMENT_PTR_ARG]);
4864 rtx stack_size_mem = gen_rtx_MEM (SImode,
4865 gen_rtx_PLUS (DImode,
4866 kernarg_reg,
4867 GEN_INT (52)));
4868 set_mem_addr_space (stack_size_mem, ADDR_SPACE_SCALAR_FLAT);
4869 rtx stack_size = gen_reg_rtx (SImode);
4870 emit_move_insn (stack_size, stack_size_mem);
4871
4872 rtx wave_offset = gen_reg_rtx (SImode);
4873 emit_insn (gen_mulsi3 (wave_offset, stack_size, thread_id));
4874
4875 rtx stack_limit_offset = gen_reg_rtx (SImode);
4876 emit_insn (gen_addsi3 (stack_limit_offset, wave_offset,
4877 stack_size));
4878
4879 rtx stack_limit_offset_di = gen_reg_rtx (DImode);
4880 emit_move_insn (gen_rtx_SUBREG (SImode, stack_limit_offset_di, 4),
4881 const0_rtx);
4882 emit_move_insn (gen_rtx_SUBREG (SImode, stack_limit_offset_di, 0),
4883 stack_limit_offset);
4884
4885 rtx stack_addr_mem = gen_rtx_MEM (DImode,
4886 gen_rtx_PLUS (DImode,
4887 kernarg_reg,
4888 GEN_INT (40)));
4889 set_mem_addr_space (stack_addr_mem, ADDR_SPACE_SCALAR_FLAT);
4890 rtx stack_addr = gen_reg_rtx (DImode);
4891 emit_move_insn (stack_addr, stack_addr_mem);
4892
4893 ptr = gen_rtx_PLUS (DImode, stack_addr, stack_limit_offset_di);
d6bbca7b
TB
4894 }
4895 else
4896 {
4897 ptr = gen_reg_rtx (DImode);
4898 emit_move_insn (ptr, const0_rtx);
4899 }
4900 return ptr;
4901 }
6f83861c
TB
4902 case GCN_BUILTIN_KERNARG_PTR:
4903 {
4904 rtx ptr;
4905 if (cfun->machine->args.reg[KERNARG_SEGMENT_PTR_ARG] >= 0)
4906 ptr = gen_rtx_REG (DImode,
4907 cfun->machine->args.reg[KERNARG_SEGMENT_PTR_ARG]);
4908 else
4909 {
4910 ptr = gen_reg_rtx (DImode);
4911 emit_move_insn (ptr, const0_rtx);
4912 }
4913 return ptr;
4914 }
e7d6c277
AS
4915 case GCN_BUILTIN_DISPATCH_PTR:
4916 {
4917 rtx ptr;
4918 if (cfun->machine->args.reg[DISPATCH_PTR_ARG] >= 0)
4919 ptr = gen_rtx_REG (DImode,
4920 cfun->machine->args.reg[DISPATCH_PTR_ARG]);
4921 else
4922 {
4923 ptr = gen_reg_rtx (DImode);
4924 emit_move_insn (ptr, const0_rtx);
4925 }
4926 return ptr;
4927 }
d6bbca7b
TB
4928 case GCN_BUILTIN_FIRST_CALL_THIS_THREAD_P:
4929 {
4930 /* Stash a marker in the unused upper 16 bits of s[0:1] to indicate
4931 whether it was the first call. */
4932 rtx result = gen_reg_rtx (BImode);
4933 emit_move_insn (result, const0_rtx);
f6fff8a6 4934 if (cfun->machine->args.reg[QUEUE_PTR_ARG] >= 0)
d6bbca7b
TB
4935 {
4936 rtx not_first = gen_label_rtx ();
4937 rtx reg = gen_rtx_REG (DImode,
f6fff8a6 4938 cfun->machine->args.reg[QUEUE_PTR_ARG]);
9fa67f1c
TB
4939 reg = gcn_operand_part (DImode, reg, 1);
4940 rtx cmp = force_reg (SImode,
4941 gen_rtx_LSHIFTRT (SImode, reg, GEN_INT (16)));
d6bbca7b
TB
4942 emit_insn (gen_cstoresi4 (result, gen_rtx_NE (BImode, cmp,
4943 GEN_INT(12345)),
4944 cmp, GEN_INT(12345)));
4945 emit_jump_insn (gen_cjump (not_first, gen_rtx_EQ (BImode, result,
4946 const0_rtx),
4947 result));
4948 emit_move_insn (reg,
9fa67f1c
TB
4949 force_reg (SImode,
4950 gen_rtx_IOR (SImode,
4951 gen_rtx_AND (SImode, reg, GEN_INT (0x0000ffff)),
4952 GEN_INT (12345L << 16))));
4953 emit_insn (gen_rtx_USE (VOIDmode, reg));
d6bbca7b
TB
4954 emit_label (not_first);
4955 }
4956 return result;
4957 }
5326695a
AS
4958 default:
4959 gcc_unreachable ();
4960 }
4961}
4962
4963/* Expansion of simple arithmetic and bit binary operation builtins.
4964
4965 Intended for use with gcn_builtins table. */
4966
4967static rtx
4968gcn_expand_builtin_binop (tree exp, rtx target, rtx /*subtarget */ ,
4969 machine_mode /*mode */ , int ignore,
4970 struct gcn_builtin_description *d)
4971{
4972 int icode = d->icode;
4973 if (ignore)
4974 return target;
4975
4976 rtx exec = force_reg (DImode,
4977 expand_expr (CALL_EXPR_ARG (exp, 0), NULL_RTX, DImode,
4978 EXPAND_NORMAL));
4979
4980 machine_mode m1 = insn_data[icode].operand[1].mode;
4981 rtx arg1 = expand_expr (CALL_EXPR_ARG (exp, 1), NULL_RTX, m1,
4982 EXPAND_NORMAL);
4983 if (!insn_data[icode].operand[1].predicate (arg1, m1))
4984 arg1 = force_reg (m1, arg1);
4985
4986 machine_mode m2 = insn_data[icode].operand[2].mode;
4987 rtx arg2 = expand_expr (CALL_EXPR_ARG (exp, 2), NULL_RTX, m2,
4988 EXPAND_NORMAL);
4989 if (!insn_data[icode].operand[2].predicate (arg2, m2))
4990 arg2 = force_reg (m2, arg2);
4991
4992 rtx arg_prev;
4993 if (call_expr_nargs (exp) == 4)
4994 {
4995 machine_mode m_prev = insn_data[icode].operand[4].mode;
4996 arg_prev = force_reg (m_prev,
4997 expand_expr (CALL_EXPR_ARG (exp, 3), NULL_RTX,
4998 m_prev, EXPAND_NORMAL));
4999 }
5000 else
5001 arg_prev = gcn_gen_undef (GET_MODE (target));
5002
5003 rtx pat = GEN_FCN (icode) (target, arg1, arg2, exec, arg_prev);
5004 emit_insn (pat);
5005 return target;
5006}
5007
5008/* Implement TARGET_EXPAND_BUILTIN.
5009
5010 Expand an expression EXP that calls a built-in function, with result going
5011 to TARGET if that's convenient (and in mode MODE if that's convenient).
5012 SUBTARGET may be used as the target for computing one of EXP's operands.
5013 IGNORE is nonzero if the value is to be ignored. */
5014
5015rtx
5016gcn_expand_builtin (tree exp, rtx target, rtx subtarget, machine_mode mode,
5017 int ignore)
5018{
5019 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
4d732405 5020 unsigned int fcode = DECL_MD_FUNCTION_CODE (fndecl);
5326695a
AS
5021 struct gcn_builtin_description *d;
5022
5023 gcc_assert (fcode < GCN_BUILTIN_MAX);
5024 d = &gcn_builtins[fcode];
5025
5026 if (d->type == B_UNIMPLEMENTED)
5027 sorry ("Builtin not implemented");
5028
5029 return d->expander (exp, target, subtarget, mode, ignore, d);
5030}
5031
5032/* }}} */
5033/* {{{ Vectorization. */
5034
5035/* Implement TARGET_VECTORIZE_GET_MASK_MODE.
5036
5037 A vector mask is a value that holds one boolean result for every element in
5038 a vector. */
5039
5040opt_machine_mode
10116ec1 5041gcn_vectorize_get_mask_mode (machine_mode)
5326695a
AS
5042{
5043 /* GCN uses a DImode bit-mask. */
5044 return DImode;
5045}
5046
5047/* Return an RTX that references a vector with the i-th lane containing
5048 PERM[i]*4.
5049
5050 Helper function for gcn_vectorize_vec_perm_const. */
5051
5052static rtx
45381d6f 5053gcn_make_vec_perm_address (unsigned int *perm, int nelt)
5326695a 5054{
45381d6f
AS
5055 machine_mode mode = VnMODE (nelt, SImode);
5056 rtx x = gen_reg_rtx (mode);
5057 emit_move_insn (x, gcn_vec_constant (mode, 0));
5326695a
AS
5058
5059 /* Permutation addresses use byte addressing. With each vector lane being
5060 4 bytes wide, and with 64 lanes in total, only bits 2..7 are significant,
5061 so only set those.
5062
5063 The permutation given to the vec_perm* patterns range from 0 to 2N-1 to
5064 select between lanes in two vectors, but as the DS_BPERMUTE* instructions
5065 only take one source vector, the most-significant bit can be ignored
5066 here. Instead, we can use EXEC masking to select the relevant part of
5067 each source vector after they are permuted separately. */
5068 uint64_t bit_mask = 1 << 2;
5069 for (int i = 2; i < 8; i++, bit_mask <<= 1)
5070 {
5071 uint64_t exec_mask = 0;
5072 uint64_t lane_mask = 1;
45381d6f
AS
5073 for (int j = 0; j < nelt; j++, lane_mask <<= 1)
5074 if (((perm[j] % nelt) * 4) & bit_mask)
5326695a
AS
5075 exec_mask |= lane_mask;
5076
5077 if (exec_mask)
45381d6f
AS
5078 emit_insn (gen_addvNsi3 (x, x, gcn_vec_constant (mode, bit_mask),
5079 x, get_exec (exec_mask)));
5326695a
AS
5080 }
5081
5082 return x;
5083}
5084
5085/* Implement TARGET_VECTORIZE_VEC_PERM_CONST.
5086
5087 Return true if permutation with SEL is possible.
5088
5089 If DST/SRC0/SRC1 are non-null, emit the instructions to perform the
5090 permutations. */
5091
5092static bool
ae8decf1
PK
5093gcn_vectorize_vec_perm_const (machine_mode vmode, machine_mode op_mode,
5094 rtx dst, rtx src0, rtx src1,
5326695a
AS
5095 const vec_perm_indices & sel)
5096{
e7d3414d
AS
5097 if (vmode != op_mode
5098 || !VECTOR_MODE_P (vmode)
5099 || GET_MODE_INNER (vmode) == TImode)
ae8decf1
PK
5100 return false;
5101
5326695a
AS
5102 unsigned int nelt = GET_MODE_NUNITS (vmode);
5103
5104 gcc_assert (VECTOR_MODE_P (vmode));
5105 gcc_assert (nelt <= 64);
5106 gcc_assert (sel.length () == nelt);
5107
5108 if (!dst)
5109 {
5110 /* All vector permutations are possible on this architecture,
5111 with varying degrees of efficiency depending on the permutation. */
5112 return true;
5113 }
5114
5115 unsigned int perm[64];
5116 for (unsigned int i = 0; i < nelt; ++i)
5117 perm[i] = sel[i] & (2 * nelt - 1);
55308fc2
AS
5118 for (unsigned int i = nelt; i < 64; ++i)
5119 perm[i] = 0;
5326695a 5120
b1d1e2b5
JJ
5121 src0 = force_reg (vmode, src0);
5122 src1 = force_reg (vmode, src1);
5123
5326695a
AS
5124 /* Make life a bit easier by swapping operands if necessary so that
5125 the first element always comes from src0. */
5126 if (perm[0] >= nelt)
5127 {
b1d1e2b5 5128 std::swap (src0, src1);
5326695a
AS
5129
5130 for (unsigned int i = 0; i < nelt; ++i)
5131 if (perm[i] < nelt)
5132 perm[i] += nelt;
5133 else
5134 perm[i] -= nelt;
5135 }
5136
5137 /* TODO: There are more efficient ways to implement certain permutations
5138 using ds_swizzle_b32 and/or DPP. Test for and expand them here, before
5139 this more inefficient generic approach is used. */
5140
5141 int64_t src1_lanes = 0;
5142 int64_t lane_bit = 1;
5143
5144 for (unsigned int i = 0; i < nelt; ++i, lane_bit <<= 1)
5145 {
5146 /* Set the bits for lanes from src1. */
5147 if (perm[i] >= nelt)
5148 src1_lanes |= lane_bit;
5149 }
5150
45381d6f 5151 rtx addr = gcn_make_vec_perm_address (perm, nelt);
5326695a
AS
5152
5153 /* Load elements from src0 to dst. */
45381d6f
AS
5154 gcc_assert ((~src1_lanes) & (0xffffffffffffffffUL > (64-nelt)));
5155 emit_insn (gen_ds_bpermutevNm (dst, addr, src0, get_exec (vmode)));
5326695a
AS
5156
5157 /* Load elements from src1 to dst. */
5158 if (src1_lanes)
5159 {
5160 /* Masking a lane masks both the destination and source lanes for
5161 DS_BPERMUTE, so we need to have all lanes enabled for the permute,
5162 then add an extra masked move to merge the results of permuting
5163 the two source vectors together.
5164 */
5165 rtx tmp = gen_reg_rtx (vmode);
45381d6f
AS
5166 emit_insn (gen_ds_bpermutevNm (tmp, addr, src1, get_exec (vmode)));
5167 emit_insn (gen_movvNm (dst, tmp, dst, get_exec (src1_lanes)));
5326695a
AS
5168 }
5169
5170 return true;
5171}
5172
5173/* Implements TARGET_VECTOR_MODE_SUPPORTED_P.
5174
5175 Return nonzero if vector MODE is supported with at least move
5176 instructions. */
5177
5178static bool
5179gcn_vector_mode_supported_p (machine_mode mode)
5180{
2b99bed8
AS
5181 return (mode == V64QImode || mode == V64HImode
5182 || mode == V64SImode || mode == V64DImode
45381d6f
AS
5183 || mode == V64SFmode || mode == V64DFmode
5184 || mode == V32QImode || mode == V32HImode
5185 || mode == V32SImode || mode == V32DImode
5186 || mode == V32SFmode || mode == V32DFmode
5187 || mode == V16QImode || mode == V16HImode
5188 || mode == V16SImode || mode == V16DImode
5189 || mode == V16SFmode || mode == V16DFmode
5190 || mode == V8QImode || mode == V8HImode
5191 || mode == V8SImode || mode == V8DImode
5192 || mode == V8SFmode || mode == V8DFmode
5193 || mode == V4QImode || mode == V4HImode
5194 || mode == V4SImode || mode == V4DImode
5195 || mode == V4SFmode || mode == V4DFmode
5196 || mode == V2QImode || mode == V2HImode
5197 || mode == V2SImode || mode == V2DImode
8aeabd9f
AS
5198 || mode == V2SFmode || mode == V2DFmode
5199 /* TImode vectors are allowed to exist for divmod, but there
5200 are almost no instructions defined for them, and the
5201 autovectorizer does not use them. */
5202 || mode == V64TImode || mode == V32TImode
5203 || mode == V16TImode || mode == V8TImode
5204 || mode == V4TImode || mode == V2TImode);
5326695a
AS
5205}
5206
5207/* Implement TARGET_VECTORIZE_PREFERRED_SIMD_MODE.
5208
5209 Enables autovectorization for all supported modes. */
5210
5211static machine_mode
5212gcn_vectorize_preferred_simd_mode (scalar_mode mode)
5213{
5214 switch (mode)
5215 {
5216 case E_QImode:
5217 return V64QImode;
5218 case E_HImode:
5219 return V64HImode;
5220 case E_SImode:
5221 return V64SImode;
5222 case E_DImode:
5223 return V64DImode;
5224 case E_SFmode:
5225 return V64SFmode;
5226 case E_DFmode:
5227 return V64DFmode;
5228 default:
5229 return word_mode;
5230 }
5231}
5232
45381d6f
AS
5233/* Implement TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_MODES.
5234
5235 Try all the vector modes. */
5236
5237unsigned int gcn_autovectorize_vector_modes (vector_modes *modes,
5238 bool ARG_UNUSED (all))
5239{
5240 modes->safe_push (V64QImode);
5241 modes->safe_push (V64HImode);
5242 modes->safe_push (V64SImode);
5243 modes->safe_push (V64SFmode);
5244 modes->safe_push (V64DImode);
5245 modes->safe_push (V64DFmode);
5246
5247 modes->safe_push (V32QImode);
5248 modes->safe_push (V32HImode);
5249 modes->safe_push (V32SImode);
5250 modes->safe_push (V32SFmode);
5251 modes->safe_push (V32DImode);
5252 modes->safe_push (V32DFmode);
5253
5254 modes->safe_push (V16QImode);
5255 modes->safe_push (V16HImode);
5256 modes->safe_push (V16SImode);
5257 modes->safe_push (V16SFmode);
5258 modes->safe_push (V16DImode);
5259 modes->safe_push (V16DFmode);
5260
5261 modes->safe_push (V8QImode);
5262 modes->safe_push (V8HImode);
5263 modes->safe_push (V8SImode);
5264 modes->safe_push (V8SFmode);
5265 modes->safe_push (V8DImode);
5266 modes->safe_push (V8DFmode);
5267
5268 modes->safe_push (V4QImode);
5269 modes->safe_push (V4HImode);
5270 modes->safe_push (V4SImode);
5271 modes->safe_push (V4SFmode);
5272 modes->safe_push (V4DImode);
5273 modes->safe_push (V4DFmode);
5274
5275 modes->safe_push (V2QImode);
5276 modes->safe_push (V2HImode);
5277 modes->safe_push (V2SImode);
5278 modes->safe_push (V2SFmode);
5279 modes->safe_push (V2DImode);
5280 modes->safe_push (V2DFmode);
5281
5282 /* We shouldn't need VECT_COMPARE_COSTS as they should all cost the same. */
5283 return 0;
5284}
5285
2b99bed8
AS
5286/* Implement TARGET_VECTORIZE_RELATED_MODE.
5287
5288 All GCN vectors are 64-lane, so this is simpler than other architectures.
5289 In particular, we do *not* want to match vector bit-size. */
5290
5291static opt_machine_mode
45381d6f 5292gcn_related_vector_mode (machine_mode vector_mode,
dd455df7 5293 scalar_mode element_mode, poly_uint64 nunits)
2b99bed8 5294{
45381d6f 5295 int n = nunits.to_constant ();
2b99bed8 5296
45381d6f
AS
5297 if (n == 0)
5298 n = GET_MODE_NUNITS (vector_mode);
2b99bed8 5299
45381d6f 5300 return VnMODE (n, element_mode);
2b99bed8
AS
5301}
5302
5326695a
AS
5303/* Implement TARGET_VECTORIZE_PREFERRED_VECTOR_ALIGNMENT.
5304
5305 Returns the preferred alignment in bits for accesses to vectors of type type
5306 in vectorized code. This might be less than or greater than the ABI-defined
5307 value returned by TARGET_VECTOR_ALIGNMENT. It can be equal to the alignment
5308 of a single element, in which case the vectorizer will not try to optimize
5309 for alignment. */
5310
5311static poly_uint64
5312gcn_preferred_vector_alignment (const_tree type)
5313{
5314 return TYPE_ALIGN (TREE_TYPE (type));
5315}
5316
5317/* Implement TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT.
5318
5319 Return true if the target supports misaligned vector store/load of a
5320 specific factor denoted in the misalignment parameter. */
5321
5322static bool
5323gcn_vectorize_support_vector_misalignment (machine_mode ARG_UNUSED (mode),
5324 const_tree type, int misalignment,
5325 bool is_packed)
5326{
5327 if (is_packed)
5328 return false;
5329
5330 /* If the misalignment is unknown, we should be able to handle the access
5331 so long as it is not to a member of a packed data structure. */
5332 if (misalignment == -1)
5333 return true;
5334
5335 /* Return true if the misalignment is a multiple of the natural alignment
5336 of the vector's element type. This is probably always going to be
5337 true in practice, since we've already established that this isn't a
5338 packed access. */
5339 return misalignment % TYPE_ALIGN_UNIT (type) == 0;
5340}
5341
5342/* Implement TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE.
5343
5344 Return true if vector alignment is reachable (by peeling N iterations) for
5345 the given scalar type TYPE. */
5346
5347static bool
5348gcn_vector_alignment_reachable (const_tree ARG_UNUSED (type), bool is_packed)
5349{
5350 /* Vectors which aren't in packed structures will not be less aligned than
5351 the natural alignment of their element type, so this is safe. */
5352 return !is_packed;
5353}
5354
1bde3ace
AJ
5355/* Generate DPP pairwise swap instruction.
5356 This instruction swaps the values in each even lane with the value in the
5357 next one:
5358 a, b, c, d -> b, a, d, c.
5359 The opcode is given by INSN. */
5360
5361char *
5362gcn_expand_dpp_swap_pairs_insn (machine_mode mode, const char *insn,
5363 int ARG_UNUSED (unspec))
5364{
5365 static char buf[128];
5366 const char *dpp;
5367
5368 /* Add the DPP modifiers. */
5369 dpp = "quad_perm:[1,0,3,2]";
5370
5371 if (vgpr_2reg_mode_p (mode))
5372 sprintf (buf, "%s\t%%L0, %%L1 %s\n\t%s\t%%H0, %%H1 %s",
5373 insn, dpp, insn, dpp);
5374 else
5375 sprintf (buf, "%s\t%%0, %%1 %s", insn, dpp);
5376
5377 return buf;
5378}
5379
5380/* Generate DPP distribute even instruction.
5381 This instruction copies the value in each even lane to the next one:
5382 a, b, c, d -> a, a, c, c.
5383 The opcode is given by INSN. */
5384
5385char *
5386gcn_expand_dpp_distribute_even_insn (machine_mode mode, const char *insn,
5387 int ARG_UNUSED (unspec))
5388{
5389 static char buf[128];
5390 const char *dpp;
5391
5392 /* Add the DPP modifiers. */
5393 dpp = "quad_perm:[0,0,2,2]";
5394
5395 if (vgpr_2reg_mode_p (mode))
5396 sprintf (buf, "%s\t%%L0, %%L1 %s\n\t%s\t%%H0, %%H1 %s",
5397 insn, dpp, insn, dpp);
5398 else
5399 sprintf (buf, "%s\t%%0, %%1 %s", insn, dpp);
5400
5401 return buf;
5402}
5403
5404/* Generate DPP distribute odd instruction.
5405 This isntruction copies the value in each odd lane to the previous one:
5406 a, b, c, d -> b, b, d, d.
5407 The opcode is given by INSN. */
5408
5409char *
5410gcn_expand_dpp_distribute_odd_insn (machine_mode mode, const char *insn,
5411 int ARG_UNUSED (unspec))
5412{
5413 static char buf[128];
5414 const char *dpp;
5415
5416 /* Add the DPP modifiers. */
5417 dpp = "quad_perm:[1,1,3,3]";
5418
5419 if (vgpr_2reg_mode_p (mode))
5420 sprintf (buf, "%s\t%%L0, %%L1 %s\n\t%s\t%%H0, %%H1 %s",
5421 insn, dpp, insn, dpp);
5422 else
5423 sprintf (buf, "%s\t%%0, %%1 %s", insn, dpp);
5424
5425 return buf;
5426}
5427
5326695a
AS
5428/* Generate DPP instructions used for vector reductions.
5429
5430 The opcode is given by INSN.
5431 The first operand of the operation is shifted right by SHIFT vector lanes.
5432 SHIFT must be a power of 2. If SHIFT is 16, the 15th lane of each row is
5433 broadcast the next row (thereby acting like a shift of 16 for the end of
5434 each row). If SHIFT is 32, lane 31 is broadcast to all the
5435 following lanes (thereby acting like a shift of 32 for lane 63). */
5436
5437char *
5438gcn_expand_dpp_shr_insn (machine_mode mode, const char *insn,
5439 int unspec, int shift)
5440{
a5879399 5441 static char buf[128];
5326695a
AS
5442 const char *dpp;
5443 const char *vcc_in = "";
5444 const char *vcc_out = "";
5445
5446 /* Add the vcc operand if needed. */
5447 if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
5448 {
5449 if (unspec == UNSPEC_PLUS_CARRY_IN_DPP_SHR)
5450 vcc_in = ", vcc";
5451
5452 if (unspec == UNSPEC_PLUS_CARRY_DPP_SHR
5453 || unspec == UNSPEC_PLUS_CARRY_IN_DPP_SHR)
5454 vcc_out = ", vcc";
5455 }
5456
5457 /* Add the DPP modifiers. */
5458 switch (shift)
5459 {
5460 case 1:
5461 dpp = "row_shr:1 bound_ctrl:0";
5462 break;
5463 case 2:
5464 dpp = "row_shr:2 bound_ctrl:0";
5465 break;
5466 case 4:
5467 dpp = "row_shr:4 bank_mask:0xe";
5468 break;
5469 case 8:
5470 dpp = "row_shr:8 bank_mask:0xc";
5471 break;
5472 case 16:
5473 dpp = "row_bcast:15 row_mask:0xa";
5474 break;
5475 case 32:
5476 dpp = "row_bcast:31 row_mask:0xc";
5477 break;
5478 default:
5479 gcc_unreachable ();
5480 }
5481
a5879399
AS
5482 if (unspec == UNSPEC_MOV_DPP_SHR && vgpr_2reg_mode_p (mode))
5483 sprintf (buf, "%s\t%%L0, %%L1 %s\n\t%s\t%%H0, %%H1 %s",
5484 insn, dpp, insn, dpp);
5485 else if (unspec == UNSPEC_MOV_DPP_SHR)
5486 sprintf (buf, "%s\t%%0, %%1 %s", insn, dpp);
5487 else
5488 sprintf (buf, "%s\t%%0%s, %%1, %%2%s %s", insn, vcc_out, vcc_in, dpp);
5326695a
AS
5489
5490 return buf;
5491}
5492
5493/* Generate vector reductions in terms of DPP instructions.
5494
5495 The vector register SRC of mode MODE is reduced using the operation given
5496 by UNSPEC, and the scalar result is returned in lane 63 of a vector
f539029c 5497 register (or lane 31, 15, 7, 3, 1 for partial vectors). */
5326695a
AS
5498
5499rtx
5500gcn_expand_reduc_scalar (machine_mode mode, rtx src, int unspec)
5501{
a5879399 5502 machine_mode orig_mode = mode;
f539029c
AS
5503 machine_mode scalar_mode = GET_MODE_INNER (mode);
5504 int vf = GET_MODE_NUNITS (mode);
a5879399
AS
5505 bool use_moves = (((unspec == UNSPEC_SMIN_DPP_SHR
5506 || unspec == UNSPEC_SMAX_DPP_SHR
5507 || unspec == UNSPEC_UMIN_DPP_SHR
5508 || unspec == UNSPEC_UMAX_DPP_SHR)
f539029c
AS
5509 && (scalar_mode == DImode
5510 || scalar_mode == DFmode))
a5879399 5511 || (unspec == UNSPEC_PLUS_DPP_SHR
f539029c 5512 && scalar_mode == DFmode));
a5879399
AS
5513 rtx_code code = (unspec == UNSPEC_SMIN_DPP_SHR ? SMIN
5514 : unspec == UNSPEC_SMAX_DPP_SHR ? SMAX
5515 : unspec == UNSPEC_UMIN_DPP_SHR ? UMIN
5516 : unspec == UNSPEC_UMAX_DPP_SHR ? UMAX
5517 : unspec == UNSPEC_PLUS_DPP_SHR ? PLUS
5518 : UNKNOWN);
5519 bool use_extends = ((unspec == UNSPEC_SMIN_DPP_SHR
5520 || unspec == UNSPEC_SMAX_DPP_SHR
5521 || unspec == UNSPEC_UMIN_DPP_SHR
5522 || unspec == UNSPEC_UMAX_DPP_SHR)
f539029c
AS
5523 && (scalar_mode == QImode
5524 || scalar_mode == HImode));
a5879399
AS
5525 bool unsignedp = (unspec == UNSPEC_UMIN_DPP_SHR
5526 || unspec == UNSPEC_UMAX_DPP_SHR);
5326695a
AS
5527 bool use_plus_carry = unspec == UNSPEC_PLUS_DPP_SHR
5528 && GET_MODE_CLASS (mode) == MODE_VECTOR_INT
f539029c 5529 && (TARGET_GCN3 || scalar_mode == DImode);
5326695a
AS
5530
5531 if (use_plus_carry)
5532 unspec = UNSPEC_PLUS_CARRY_DPP_SHR;
5533
a5879399
AS
5534 if (use_extends)
5535 {
f539029c
AS
5536 mode = VnMODE (vf, SImode);
5537 rtx tmp = gen_reg_rtx (mode);
a5879399
AS
5538 convert_move (tmp, src, unsignedp);
5539 src = tmp;
a5879399
AS
5540 }
5541
5326695a
AS
5542 /* Perform reduction by first performing the reduction operation on every
5543 pair of lanes, then on every pair of results from the previous
5544 iteration (thereby effectively reducing every 4 lanes) and so on until
5545 all lanes are reduced. */
d51cad0b 5546 rtx in, out = force_reg (mode, src);
f539029c
AS
5547 int iterations = exact_log2 (vf);
5548 for (int i = 0, shift = 1; i < iterations; i++, shift <<= 1)
5326695a
AS
5549 {
5550 rtx shift_val = gen_rtx_CONST_INT (VOIDmode, shift);
a5879399
AS
5551 in = out;
5552 out = gen_reg_rtx (mode);
5553
5554 if (use_moves)
5326695a 5555 {
a5879399
AS
5556 rtx tmp = gen_reg_rtx (mode);
5557 emit_insn (gen_dpp_move (mode, tmp, in, shift_val));
8da8b922
AS
5558 rtx insn = gen_rtx_SET (out, gen_rtx_fmt_ee (code, mode, tmp, in));
5559 if (scalar_mode == DImode)
5560 {
5561 rtx clobber = gen_rtx_CLOBBER (VOIDmode,
5562 gen_rtx_REG (DImode, VCC_REG));
5563 insn = gen_rtx_PARALLEL (VOIDmode,
5564 gen_rtvec (2, insn, clobber));
5565 }
5566 emit_insn (insn);
5326695a 5567 }
a5879399
AS
5568 else
5569 {
5570 rtx insn = gen_rtx_SET (out,
5571 gen_rtx_UNSPEC (mode,
5572 gen_rtvec (3, in, in,
5573 shift_val),
5574 unspec));
5575
5576 /* Add clobber for instructions that set the carry flags. */
5577 if (use_plus_carry)
5578 {
5579 rtx clobber = gen_rtx_CLOBBER (VOIDmode,
5580 gen_rtx_REG (DImode, VCC_REG));
5581 insn = gen_rtx_PARALLEL (VOIDmode,
5582 gen_rtvec (2, insn, clobber));
5583 }
5326695a 5584
a5879399
AS
5585 emit_insn (insn);
5586 }
5587 }
5326695a 5588
a5879399
AS
5589 if (use_extends)
5590 {
5591 rtx tmp = gen_reg_rtx (orig_mode);
5592 convert_move (tmp, out, unsignedp);
5593 out = tmp;
5326695a
AS
5594 }
5595
a5879399 5596 return out;
5326695a
AS
5597}
5598
5599/* Implement TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST. */
5600
5601int
5602gcn_vectorization_cost (enum vect_cost_for_stmt ARG_UNUSED (type_of_cost),
5603 tree ARG_UNUSED (vectype), int ARG_UNUSED (misalign))
5604{
5605 /* Always vectorize. */
5606 return 1;
5607}
5608
b73c49f6
AS
5609/* Implement TARGET_SIMD_CLONE_COMPUTE_VECSIZE_AND_SIMDLEN. */
5610
5611static int
5612gcn_simd_clone_compute_vecsize_and_simdlen (struct cgraph_node *ARG_UNUSED (node),
5613 struct cgraph_simd_clone *clonei,
12a10856 5614 tree ARG_UNUSED (base_type),
309e2d95
SL
5615 int ARG_UNUSED (num),
5616 bool explicit_p)
b73c49f6 5617{
b73c49f6
AS
5618 if (known_eq (clonei->simdlen, 0U))
5619 clonei->simdlen = 64;
5620 else if (maybe_ne (clonei->simdlen, 64U))
5621 {
5622 /* Note that x86 has a similar message that is likely to trigger on
5623 sizes that are OK for gcn; the user can't win. */
309e2d95
SL
5624 if (explicit_p)
5625 warning_at (DECL_SOURCE_LOCATION (node->decl), 0,
5626 "unsupported simdlen %wd (amdgcn)",
5627 clonei->simdlen.to_constant ());
b73c49f6
AS
5628 return 0;
5629 }
5630
5631 clonei->vecsize_mangle = 'n';
5632 clonei->vecsize_int = 0;
5633 clonei->vecsize_float = 0;
5634
5635 /* DImode ought to be more natural here, but VOIDmode produces better code,
5636 at present, due to the shift-and-test steps not being optimized away
5637 inside the in-branch clones. */
5638 clonei->mask_mode = VOIDmode;
5639
5640 return 1;
5641}
5642
5643/* Implement TARGET_SIMD_CLONE_ADJUST. */
5644
5645static void
5646gcn_simd_clone_adjust (struct cgraph_node *ARG_UNUSED (node))
5647{
5648 /* This hook has to be defined when
5649 TARGET_SIMD_CLONE_COMPUTE_VECSIZE_AND_SIMDLEN is defined, but we don't
5650 need it to do anything yet. */
5651}
5652
5653/* Implement TARGET_SIMD_CLONE_USABLE. */
5654
5655static int
5656gcn_simd_clone_usable (struct cgraph_node *ARG_UNUSED (node))
5657{
5658 /* We don't need to do anything here because
5659 gcn_simd_clone_compute_vecsize_and_simdlen currently only returns one
5660 possibility. */
5661 return 0;
5662}
5663
ce9cd725
KCY
5664tree mathfn_built_in_explicit (tree, combined_fn);
5665
5666/* Implement TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION.
5667 Return the function declaration of the vectorized version of the builtin
5668 in the math library if available. */
5669
5670tree
5671gcn_vectorize_builtin_vectorized_function (unsigned int fn, tree type_out,
5672 tree type_in)
5673{
5674 if (TREE_CODE (type_out) != VECTOR_TYPE
5675 || TREE_CODE (type_in) != VECTOR_TYPE)
5676 return NULL_TREE;
5677
5678 machine_mode out_mode = TYPE_MODE (TREE_TYPE (type_out));
5679 int out_n = TYPE_VECTOR_SUBPARTS (type_out);
ce9cd725
KCY
5680 combined_fn cfn = combined_fn (fn);
5681
5682 /* Keep this consistent with the list of vectorized math routines. */
5683 int implicit_p;
5684 switch (fn)
5685 {
5686 CASE_CFN_ACOS:
5687 CASE_CFN_ACOSH:
5688 CASE_CFN_ASIN:
5689 CASE_CFN_ASINH:
5690 CASE_CFN_ATAN:
5691 CASE_CFN_ATAN2:
5692 CASE_CFN_ATANH:
5693 CASE_CFN_COPYSIGN:
5694 CASE_CFN_COS:
5695 CASE_CFN_COSH:
5696 CASE_CFN_ERF:
5697 CASE_CFN_EXP:
5698 CASE_CFN_EXP2:
5699 CASE_CFN_FINITE:
5700 CASE_CFN_FMOD:
5701 CASE_CFN_GAMMA:
5702 CASE_CFN_HYPOT:
5703 CASE_CFN_ISNAN:
5704 CASE_CFN_LGAMMA:
5705 CASE_CFN_LOG:
5706 CASE_CFN_LOG10:
5707 CASE_CFN_LOG2:
5708 CASE_CFN_POW:
5709 CASE_CFN_REMAINDER:
5710 CASE_CFN_RINT:
5711 CASE_CFN_SIN:
5712 CASE_CFN_SINH:
5713 CASE_CFN_SQRT:
5714 CASE_CFN_TAN:
5715 CASE_CFN_TANH:
5716 CASE_CFN_TGAMMA:
5717 implicit_p = 1;
5718 break;
5719
5720 CASE_CFN_SCALB:
5721 CASE_CFN_SIGNIFICAND:
5722 implicit_p = 0;
5723 break;
5724
5725 default:
5726 return NULL_TREE;
5727 }
5728
5729 tree out_t_node = (out_mode == DFmode) ? double_type_node : float_type_node;
5730 tree fndecl = implicit_p ? mathfn_built_in (out_t_node, cfn)
5731 : mathfn_built_in_explicit (out_t_node, cfn);
5732
5733 const char *bname = IDENTIFIER_POINTER (DECL_NAME (fndecl));
5734 char name[20];
5735 sprintf (name, out_mode == DFmode ? "v%ddf_%s" : "v%dsf_%s",
5736 out_n, bname + 10);
5737
5738 unsigned arity = 0;
5739 for (tree args = DECL_ARGUMENTS (fndecl); args; args = TREE_CHAIN (args))
5740 arity++;
5741
5742 tree fntype = (arity == 1)
5743 ? build_function_type_list (type_out, type_in, NULL)
5744 : build_function_type_list (type_out, type_in, type_in, NULL);
5745
5746 /* Build a function declaration for the vectorized function. */
5747 tree new_fndecl = build_decl (BUILTINS_LOCATION,
5748 FUNCTION_DECL, get_identifier (name), fntype);
5749 TREE_PUBLIC (new_fndecl) = 1;
5750 DECL_EXTERNAL (new_fndecl) = 1;
5751 DECL_IS_NOVOPS (new_fndecl) = 1;
5752 TREE_READONLY (new_fndecl) = 1;
5753
5754 return new_fndecl;
5755}
5756
5757/* Implement TARGET_LIBC_HAS_FUNCTION. */
5758
5759bool
5760gcn_libc_has_function (enum function_class fn_class,
5761 tree type)
5762{
5763 return bsd_libc_has_function (fn_class, type);
5764}
5765
5326695a
AS
5766/* }}} */
5767/* {{{ md_reorg pass. */
5768
5769/* Identify VMEM instructions from their "type" attribute. */
5770
5771static bool
5772gcn_vmem_insn_p (attr_type type)
5773{
5774 switch (type)
5775 {
5776 case TYPE_MUBUF:
5777 case TYPE_MTBUF:
5778 case TYPE_FLAT:
ae0d2c24 5779 case TYPE_VOP3P_MAI:
5326695a
AS
5780 return true;
5781 case TYPE_UNKNOWN:
5782 case TYPE_SOP1:
5783 case TYPE_SOP2:
5784 case TYPE_SOPK:
5785 case TYPE_SOPC:
5786 case TYPE_SOPP:
5787 case TYPE_SMEM:
5788 case TYPE_DS:
5789 case TYPE_VOP2:
5790 case TYPE_VOP1:
5791 case TYPE_VOPC:
5792 case TYPE_VOP3A:
5793 case TYPE_VOP3B:
5794 case TYPE_VOP_SDWA:
5795 case TYPE_VOP_DPP:
5796 case TYPE_MULT:
5797 case TYPE_VMULT:
5798 return false;
5799 }
5800 gcc_unreachable ();
5801 return false;
5802}
5803
5804/* If INSN sets the EXEC register to a constant value, return the value,
5805 otherwise return zero. */
5806
5807static int64_t
5808gcn_insn_exec_value (rtx_insn *insn)
5809{
5810 if (!NONDEBUG_INSN_P (insn))
5811 return 0;
5812
5813 rtx pattern = PATTERN (insn);
5814
5815 if (GET_CODE (pattern) == SET)
5816 {
5817 rtx dest = XEXP (pattern, 0);
5818 rtx src = XEXP (pattern, 1);
5819
5820 if (GET_MODE (dest) == DImode
5821 && REG_P (dest) && REGNO (dest) == EXEC_REG
5822 && CONST_INT_P (src))
5823 return INTVAL (src);
5824 }
5825
5826 return 0;
5827}
5828
5829/* Sets the EXEC register before INSN to the value that it had after
5830 LAST_EXEC_DEF. The constant value of the EXEC register is returned if
5831 known, otherwise it returns zero. */
5832
5833static int64_t
5834gcn_restore_exec (rtx_insn *insn, rtx_insn *last_exec_def, int64_t curr_exec,
5835 bool curr_exec_known, bool &last_exec_def_saved)
5836{
5837 rtx exec_reg = gen_rtx_REG (DImode, EXEC_REG);
5838 rtx exec;
5839
5840 int64_t exec_value = gcn_insn_exec_value (last_exec_def);
5841
5842 if (exec_value)
5843 {
5844 /* If the EXEC value is a constant and it happens to be the same as the
5845 current EXEC value, the restore can be skipped. */
5846 if (curr_exec_known && exec_value == curr_exec)
5847 return exec_value;
5848
5849 exec = GEN_INT (exec_value);
5850 }
5851 else
5852 {
5853 /* If the EXEC value is not a constant, save it in a register after the
5854 point of definition. */
5855 rtx exec_save_reg = gen_rtx_REG (DImode, EXEC_SAVE_REG);
5856
5857 if (!last_exec_def_saved)
5858 {
5859 start_sequence ();
5860 emit_move_insn (exec_save_reg, exec_reg);
5861 rtx_insn *seq = get_insns ();
5862 end_sequence ();
5863
5864 emit_insn_after (seq, last_exec_def);
5865 if (dump_file && (dump_flags & TDF_DETAILS))
5866 fprintf (dump_file, "Saving EXEC after insn %d.\n",
5867 INSN_UID (last_exec_def));
5868
5869 last_exec_def_saved = true;
5870 }
5871
5872 exec = exec_save_reg;
5873 }
5874
5875 /* Restore EXEC register before the usage. */
5876 start_sequence ();
5877 emit_move_insn (exec_reg, exec);
5878 rtx_insn *seq = get_insns ();
5879 end_sequence ();
5880 emit_insn_before (seq, insn);
5881
5882 if (dump_file && (dump_flags & TDF_DETAILS))
5883 {
5884 if (exec_value)
5885 fprintf (dump_file, "Restoring EXEC to %ld before insn %d.\n",
5886 exec_value, INSN_UID (insn));
5887 else
5888 fprintf (dump_file,
5889 "Restoring EXEC from saved value before insn %d.\n",
5890 INSN_UID (insn));
5891 }
5892
5893 return exec_value;
5894}
5895
5896/* Implement TARGET_MACHINE_DEPENDENT_REORG.
5897
5898 Ensure that pipeline dependencies and lane masking are set correctly. */
5899
5900static void
5901gcn_md_reorg (void)
5902{
5903 basic_block bb;
5904 rtx exec_reg = gen_rtx_REG (DImode, EXEC_REG);
5326695a
AS
5905 regset_head live;
5906
5907 INIT_REG_SET (&live);
5908
5909 compute_bb_for_insn ();
5910
5911 if (!optimize)
5912 {
5913 split_all_insns ();
5914 if (dump_file && (dump_flags & TDF_DETAILS))
5915 {
5916 fprintf (dump_file, "After split:\n");
5917 print_rtl_with_bb (dump_file, get_insns (), dump_flags);
5918 }
5919
5920 /* Update data-flow information for split instructions. */
5921 df_insn_rescan_all ();
5922 }
5923
3df6fac0
JB
5924 df_live_add_problem ();
5925 df_live_set_all_dirty ();
5326695a
AS
5926 df_analyze ();
5927
5928 /* This pass ensures that the EXEC register is set correctly, according
5929 to the "exec" attribute. However, care must be taken so that the
5930 value that reaches explicit uses of the EXEC register remains the
5931 same as before.
5932 */
5933
5934 FOR_EACH_BB_FN (bb, cfun)
5935 {
5936 if (dump_file && (dump_flags & TDF_DETAILS))
5937 fprintf (dump_file, "BB %d:\n", bb->index);
5938
5939 rtx_insn *insn, *curr;
5940 rtx_insn *last_exec_def = BB_HEAD (bb);
5941 bool last_exec_def_saved = false;
5942 bool curr_exec_explicit = true;
5943 bool curr_exec_known = true;
5944 int64_t curr_exec = 0; /* 0 here means 'the value is that of EXEC
5945 after last_exec_def is executed'. */
5946
3df6fac0
JB
5947 bitmap live_in = DF_LR_IN (bb);
5948 bool exec_live_on_entry = false;
5949 if (bitmap_bit_p (live_in, EXEC_LO_REG)
5950 || bitmap_bit_p (live_in, EXEC_HI_REG))
5951 {
5952 if (dump_file)
5953 fprintf (dump_file, "EXEC reg is live on entry to block %d\n",
5954 (int) bb->index);
5955 exec_live_on_entry = true;
5956 }
5957
5326695a
AS
5958 FOR_BB_INSNS_SAFE (bb, insn, curr)
5959 {
5960 if (!NONDEBUG_INSN_P (insn))
5961 continue;
5962
5963 if (GET_CODE (PATTERN (insn)) == USE
5964 || GET_CODE (PATTERN (insn)) == CLOBBER)
5965 continue;
5966
5967 HARD_REG_SET defs, uses;
5968 CLEAR_HARD_REG_SET (defs);
5969 CLEAR_HARD_REG_SET (uses);
e8448ba5 5970 note_stores (insn, record_hard_reg_sets, &defs);
5326695a
AS
5971 note_uses (&PATTERN (insn), record_hard_reg_uses, &uses);
5972
5973 bool exec_lo_def_p = TEST_HARD_REG_BIT (defs, EXEC_LO_REG);
5974 bool exec_hi_def_p = TEST_HARD_REG_BIT (defs, EXEC_HI_REG);
5975 bool exec_used = (hard_reg_set_intersect_p
5976 (uses, reg_class_contents[(int) EXEC_MASK_REG])
5977 || TEST_HARD_REG_BIT (uses, EXECZ_REG));
5978
5979 /* Check the instruction for implicit setting of EXEC via an
5980 attribute. */
5981 attr_exec exec_attr = get_attr_exec (insn);
5982 int64_t new_exec;
5983
5984 switch (exec_attr)
5985 {
5986 case EXEC_NONE:
5987 new_exec = 0;
5988 break;
5989
5990 case EXEC_SINGLE:
5991 /* Instructions that do not involve memory accesses only require
5992 bit 0 of EXEC to be set. */
5993 if (gcn_vmem_insn_p (get_attr_type (insn))
5994 || get_attr_type (insn) == TYPE_DS)
5995 new_exec = 1;
5996 else
5997 new_exec = curr_exec | 1;
5998 break;
5999
6000 case EXEC_FULL:
6001 new_exec = -1;
6002 break;
6003
6004 default: /* Auto-detect what setting is appropriate. */
6005 {
6006 new_exec = 0;
6007
6008 /* If EXEC is referenced explicitly then we don't need to do
6009 anything to set it, so we're done. */
6010 if (exec_used)
6011 break;
6012
6013 /* Scan the insn for VGPRs defs or uses. The mode determines
6014 what kind of exec is needed. */
6015 subrtx_iterator::array_type array;
6016 FOR_EACH_SUBRTX (iter, array, PATTERN (insn), NONCONST)
6017 {
6018 const_rtx x = *iter;
ae0d2c24
AS
6019 if (REG_P (x) && (VGPR_REGNO_P (REGNO (x))
6020 || AVGPR_REGNO_P (REGNO (x))))
5326695a
AS
6021 {
6022 if (VECTOR_MODE_P (GET_MODE (x)))
6023 {
45381d6f
AS
6024 int vf = GET_MODE_NUNITS (GET_MODE (x));
6025 new_exec = MAX ((uint64_t)new_exec,
6026 0xffffffffffffffffUL >> (64-vf));
5326695a 6027 }
45381d6f 6028 else if (new_exec == 0)
5326695a
AS
6029 new_exec = 1;
6030 }
6031 }
6032 }
6033 break;
6034 }
6035
6036 if (new_exec && (!curr_exec_known || new_exec != curr_exec))
6037 {
6038 start_sequence ();
6039 emit_move_insn (exec_reg, GEN_INT (new_exec));
6040 rtx_insn *seq = get_insns ();
6041 end_sequence ();
6042 emit_insn_before (seq, insn);
6043
6044 if (dump_file && (dump_flags & TDF_DETAILS))
6045 fprintf (dump_file, "Setting EXEC to %ld before insn %d.\n",
6046 new_exec, INSN_UID (insn));
6047
6048 curr_exec = new_exec;
6049 curr_exec_explicit = false;
6050 curr_exec_known = true;
6051 }
6052 else if (new_exec && dump_file && (dump_flags & TDF_DETAILS))
6053 {
6054 fprintf (dump_file, "Exec already is %ld before insn %d.\n",
6055 new_exec, INSN_UID (insn));
6056 }
6057
6058 /* The state of the EXEC register is unknown after a
6059 function call. */
6060 if (CALL_P (insn))
6061 curr_exec_known = false;
6062
6063 /* Handle explicit uses of EXEC. If the instruction is a partial
6064 explicit definition of EXEC, then treat it as an explicit use of
6065 EXEC as well. */
6066 if (exec_used || exec_lo_def_p != exec_hi_def_p)
6067 {
6068 /* An instruction that explicitly uses EXEC should not also
6069 implicitly define it. */
6070 gcc_assert (!exec_used || !new_exec);
6071
6072 if (!curr_exec_known || !curr_exec_explicit)
6073 {
6074 /* Restore the previous explicitly defined value. */
6075 curr_exec = gcn_restore_exec (insn, last_exec_def,
6076 curr_exec, curr_exec_known,
6077 last_exec_def_saved);
6078 curr_exec_explicit = true;
6079 curr_exec_known = true;
6080 }
6081 }
6082
6083 /* Handle explicit definitions of EXEC. */
6084 if (exec_lo_def_p || exec_hi_def_p)
6085 {
6086 last_exec_def = insn;
6087 last_exec_def_saved = false;
6088 curr_exec = gcn_insn_exec_value (insn);
6089 curr_exec_explicit = true;
6090 curr_exec_known = true;
6091
6092 if (dump_file && (dump_flags & TDF_DETAILS))
6093 fprintf (dump_file,
6094 "Found %s definition of EXEC at insn %d.\n",
6095 exec_lo_def_p == exec_hi_def_p ? "full" : "partial",
6096 INSN_UID (insn));
6097 }
3df6fac0
JB
6098
6099 exec_live_on_entry = false;
5326695a
AS
6100 }
6101
6102 COPY_REG_SET (&live, DF_LR_OUT (bb));
6103 df_simulate_initialize_backwards (bb, &live);
6104
6105 /* If EXEC is live after the basic block, restore the value of EXEC
6106 at the end of the block. */
6107 if ((REGNO_REG_SET_P (&live, EXEC_LO_REG)
6108 || REGNO_REG_SET_P (&live, EXEC_HI_REG))
3df6fac0 6109 && (!curr_exec_known || !curr_exec_explicit || exec_live_on_entry))
5326695a
AS
6110 {
6111 rtx_insn *end_insn = BB_END (bb);
6112
6113 /* If the instruction is not a jump instruction, do the restore
6114 after the last instruction in the basic block. */
6115 if (NONJUMP_INSN_P (end_insn))
6116 end_insn = NEXT_INSN (end_insn);
6117
6118 gcn_restore_exec (end_insn, last_exec_def, curr_exec,
6119 curr_exec_known, last_exec_def_saved);
6120 }
6121 }
6122
6123 CLEAR_REG_SET (&live);
6124
6125 /* "Manually Inserted Wait States (NOPs)."
6126
6127 GCN hardware detects most kinds of register dependencies, but there
6128 are some exceptions documented in the ISA manual. This pass
6129 detects the missed cases, and inserts the documented number of NOPs
6130 required for correct execution. */
6131
6132 const int max_waits = 5;
6133 struct ilist
6134 {
6135 rtx_insn *insn;
6136 attr_unit unit;
930c5599 6137 attr_delayeduse delayeduse;
5326695a 6138 HARD_REG_SET writes;
930c5599 6139 HARD_REG_SET reads;
5326695a
AS
6140 int age;
6141 } back[max_waits];
6142 int oldest = 0;
6143 for (int i = 0; i < max_waits; i++)
6144 back[i].insn = NULL;
6145
6146 rtx_insn *insn, *last_insn = NULL;
6147 for (insn = get_insns (); insn != 0; insn = NEXT_INSN (insn))
6148 {
6149 if (!NONDEBUG_INSN_P (insn))
6150 continue;
6151
6152 if (GET_CODE (PATTERN (insn)) == USE
6153 || GET_CODE (PATTERN (insn)) == CLOBBER)
6154 continue;
6155
6156 attr_type itype = get_attr_type (insn);
6157 attr_unit iunit = get_attr_unit (insn);
930c5599 6158 attr_delayeduse idelayeduse = get_attr_delayeduse (insn);
cfdc45f7 6159 int ivccwait = get_attr_vccwait (insn);
5326695a
AS
6160 HARD_REG_SET ireads, iwrites;
6161 CLEAR_HARD_REG_SET (ireads);
6162 CLEAR_HARD_REG_SET (iwrites);
e8448ba5 6163 note_stores (insn, record_hard_reg_sets, &iwrites);
5326695a
AS
6164 note_uses (&PATTERN (insn), record_hard_reg_uses, &ireads);
6165
6166 /* Scan recent previous instructions for dependencies not handled in
6167 hardware. */
6168 int nops_rqd = 0;
6169 for (int i = oldest; i < oldest + max_waits; i++)
6170 {
6171 struct ilist *prev_insn = &back[i % max_waits];
6172
6173 if (!prev_insn->insn)
6174 continue;
6175
ae0d2c24
AS
6176 HARD_REG_SET depregs = prev_insn->writes & ireads;
6177
5326695a
AS
6178 /* VALU writes SGPR followed by VMEM reading the same SGPR
6179 requires 5 wait states. */
6180 if ((prev_insn->age + nops_rqd) < 5
6181 && prev_insn->unit == UNIT_VECTOR
ae0d2c24
AS
6182 && gcn_vmem_insn_p (itype)
6183 && hard_reg_set_intersect_p
6184 (depregs, reg_class_contents[(int) SGPR_REGS]))
6185 nops_rqd = 5 - prev_insn->age;
5326695a
AS
6186
6187 /* VALU sets VCC/EXEC followed by VALU uses VCCZ/EXECZ
6188 requires 5 wait states. */
6189 if ((prev_insn->age + nops_rqd) < 5
6190 && prev_insn->unit == UNIT_VECTOR
6191 && iunit == UNIT_VECTOR
6192 && ((hard_reg_set_intersect_p
6193 (prev_insn->writes,
6194 reg_class_contents[(int) EXEC_MASK_REG])
6195 && TEST_HARD_REG_BIT (ireads, EXECZ_REG))
6196 ||
6197 (hard_reg_set_intersect_p
6198 (prev_insn->writes,
6199 reg_class_contents[(int) VCC_CONDITIONAL_REG])
6200 && TEST_HARD_REG_BIT (ireads, VCCZ_REG))))
6201 nops_rqd = 5 - prev_insn->age;
6202
6203 /* VALU writes SGPR/VCC followed by v_{read,write}lane using
6204 SGPR/VCC as lane select requires 4 wait states. */
6205 if ((prev_insn->age + nops_rqd) < 4
6206 && prev_insn->unit == UNIT_VECTOR
ae0d2c24
AS
6207 && get_attr_laneselect (insn) == LANESELECT_YES
6208 && (hard_reg_set_intersect_p
6209 (depregs, reg_class_contents[(int) SGPR_REGS])
5326695a 6210 || hard_reg_set_intersect_p
ae0d2c24
AS
6211 (depregs, reg_class_contents[(int) VCC_CONDITIONAL_REG])))
6212 nops_rqd = 4 - prev_insn->age;
5326695a
AS
6213
6214 /* VALU writes VGPR followed by VALU_DPP reading that VGPR
6215 requires 2 wait states. */
6216 if ((prev_insn->age + nops_rqd) < 2
6217 && prev_insn->unit == UNIT_VECTOR
6218 && itype == TYPE_VOP_DPP)
6219 {
5326695a 6220 if (hard_reg_set_intersect_p
ae0d2c24 6221 (depregs, reg_class_contents[(int) VGPR_REGS]))
5326695a
AS
6222 nops_rqd = 2 - prev_insn->age;
6223 }
930c5599
AS
6224
6225 /* Store that requires input registers are not overwritten by
6226 following instruction. */
6227 if ((prev_insn->age + nops_rqd) < 1
6228 && prev_insn->delayeduse == DELAYEDUSE_YES
6229 && ((hard_reg_set_intersect_p
6230 (prev_insn->reads, iwrites))))
6231 nops_rqd = 1 - prev_insn->age;
cfdc45f7
AS
6232
6233 /* Instruction that requires VCC is not written too close before
6234 using it. */
6235 if (prev_insn->age < ivccwait
6236 && (hard_reg_set_intersect_p
6237 (prev_insn->writes,
6238 reg_class_contents[(int)VCC_CONDITIONAL_REG])))
6239 nops_rqd = ivccwait - prev_insn->age;
ae0d2c24
AS
6240
6241 /* CDNA1: write VGPR before v_accvgpr_write reads it. */
6242 if (TARGET_CDNA1
6243 && (prev_insn->age + nops_rqd) < 2
6244 && hard_reg_set_intersect_p
6245 (depregs, reg_class_contents[(int) VGPR_REGS])
6246 && hard_reg_set_intersect_p
6247 (iwrites, reg_class_contents[(int) AVGPR_REGS]))
6248 nops_rqd = 2 - prev_insn->age;
6249
6250 /* CDNA1: v_accvgpr_write writes AVGPR before v_accvgpr_read. */
6251 if (TARGET_CDNA1
6252 && (prev_insn->age + nops_rqd) < 3
6253 && hard_reg_set_intersect_p
6254 (depregs, reg_class_contents[(int) AVGPR_REGS])
6255 && hard_reg_set_intersect_p
6256 (iwrites, reg_class_contents[(int) VGPR_REGS]))
6257 nops_rqd = 3 - prev_insn->age;
6258
6259 /* CDNA1: Undocumented(?!) read-after-write when restoring values
6260 from AVGPRs to VGPRS. Observed problem was for address register
6261 of flat_load instruction, but others may be affected? */
6262 if (TARGET_CDNA1
6263 && (prev_insn->age + nops_rqd) < 2
6264 && hard_reg_set_intersect_p
6265 (prev_insn->reads, reg_class_contents[(int) AVGPR_REGS])
6266 && hard_reg_set_intersect_p
6267 (depregs, reg_class_contents[(int) VGPR_REGS]))
6268 nops_rqd = 2 - prev_insn->age;
5326695a
AS
6269 }
6270
6271 /* Insert the required number of NOPs. */
6272 for (int i = nops_rqd; i > 0; i--)
6273 emit_insn_after (gen_nop (), last_insn);
6274
6275 /* Age the previous instructions. We can also ignore writes to
6276 registers subsequently overwritten. */
6277 HARD_REG_SET written;
6278 CLEAR_HARD_REG_SET (written);
6279 for (int i = oldest + max_waits - 1; i > oldest; i--)
6280 {
6281 struct ilist *prev_insn = &back[i % max_waits];
6282
6283 /* Assume all instructions are equivalent to one "wait", the same
6284 as s_nop. This is probably true for SALU, but not VALU (which
6285 may take longer), so this is not optimal. However, AMD do
6286 not publish the cycle times for instructions. */
6287 prev_insn->age += 1 + nops_rqd;
6288
44942965 6289 written |= iwrites;
d15e5131 6290 prev_insn->writes &= ~written;
5326695a
AS
6291 }
6292
6293 /* Track the current instruction as a previous instruction. */
6294 back[oldest].insn = insn;
6295 back[oldest].unit = iunit;
930c5599 6296 back[oldest].delayeduse = idelayeduse;
6576d245
RS
6297 back[oldest].writes = iwrites;
6298 back[oldest].reads = ireads;
5326695a
AS
6299 back[oldest].age = 0;
6300 oldest = (oldest + 1) % max_waits;
6301
6302 last_insn = insn;
6303 }
6304}
6305
6306/* }}} */
6307/* {{{ OpenACC / OpenMP. */
6308
6309#define GCN_DEFAULT_GANGS 0 /* Choose at runtime. */
6310#define GCN_DEFAULT_WORKERS 0 /* Choose at runtime. */
6311#define GCN_DEFAULT_VECTORS 1 /* Use autovectorization only, for now. */
6312
6313/* Implement TARGET_GOACC_VALIDATE_DIMS.
6314
6315 Check the launch dimensions provided for an OpenACC compute
6316 region, or routine. */
6317
6318static bool
6319gcn_goacc_validate_dims (tree decl, int dims[], int fn_level,
6320 unsigned /*used*/)
6321{
6322 bool changed = false;
c408512e 6323 const int max_workers = 16;
fe22e0d4 6324
5326695a
AS
6325 /* The vector size must appear to be 64, to the user, unless this is a
6326 SEQ routine. The real, internal value is always 1, which means use
6327 autovectorization, but the user should not see that. */
6328 if (fn_level <= GOMP_DIM_VECTOR && fn_level >= -1
6329 && dims[GOMP_DIM_VECTOR] >= 0)
6330 {
6331 if (fn_level < 0 && dims[GOMP_DIM_VECTOR] >= 0
6332 && dims[GOMP_DIM_VECTOR] != 64)
6333 warning_at (decl ? DECL_SOURCE_LOCATION (decl) : UNKNOWN_LOCATION,
6334 OPT_Wopenacc_dims,
6335 (dims[GOMP_DIM_VECTOR]
55308fc2
AS
6336 ? G_("using %<vector_length (64)%>, ignoring %d")
6337 : G_("using %<vector_length (64)%>, "
5326695a
AS
6338 "ignoring runtime setting")),
6339 dims[GOMP_DIM_VECTOR]);
6340 dims[GOMP_DIM_VECTOR] = 1;
6341 changed = true;
6342 }
6343
6344 /* Check the num workers is not too large. */
6345 if (dims[GOMP_DIM_WORKER] > max_workers)
6346 {
6347 warning_at (decl ? DECL_SOURCE_LOCATION (decl) : UNKNOWN_LOCATION,
6348 OPT_Wopenacc_dims,
55308fc2 6349 "using %<num_workers (%d)%>, ignoring %d",
5326695a
AS
6350 max_workers, dims[GOMP_DIM_WORKER]);
6351 dims[GOMP_DIM_WORKER] = max_workers;
6352 changed = true;
6353 }
6354
6355 /* Set global defaults. */
6356 if (!decl)
6357 {
6358 dims[GOMP_DIM_VECTOR] = GCN_DEFAULT_VECTORS;
6359 if (dims[GOMP_DIM_WORKER] < 0)
c408512e 6360 dims[GOMP_DIM_WORKER] = GCN_DEFAULT_WORKERS;
5326695a
AS
6361 if (dims[GOMP_DIM_GANG] < 0)
6362 dims[GOMP_DIM_GANG] = GCN_DEFAULT_GANGS;
6363 changed = true;
6364 }
6365
6366 return changed;
6367}
6368
6369/* Helper function for oacc_dim_size instruction.
6370 Also used for OpenMP, via builtin_gcn_dim_size, and the omp_gcn pass. */
6371
6372rtx
6373gcn_oacc_dim_size (int dim)
6374{
6375 if (dim < 0 || dim > 2)
6376 error ("offload dimension out of range (%d)", dim);
6377
6378 /* Vectors are a special case. */
6379 if (dim == 2)
6380 return const1_rtx; /* Think of this as 1 times 64. */
6381
6382 static int offset[] = {
6383 /* Offsets into dispatch packet. */
6384 12, /* X dim = Gang / Team / Work-group. */
6385 20, /* Z dim = Worker / Thread / Wavefront. */
6386 16 /* Y dim = Vector / SIMD / Work-item. */
6387 };
6388 rtx addr = gen_rtx_PLUS (DImode,
6389 gen_rtx_REG (DImode,
6390 cfun->machine->args.
6391 reg[DISPATCH_PTR_ARG]),
6392 GEN_INT (offset[dim]));
6f83861c
TB
6393 rtx mem = gen_rtx_MEM (SImode, addr);
6394 set_mem_addr_space (mem, ADDR_SPACE_SCALAR_FLAT);
6395 return mem;
5326695a
AS
6396}
6397
6398/* Helper function for oacc_dim_pos instruction.
6399 Also used for OpenMP, via builtin_gcn_dim_pos, and the omp_gcn pass. */
6400
6401rtx
6402gcn_oacc_dim_pos (int dim)
6403{
6404 if (dim < 0 || dim > 2)
6405 error ("offload dimension out of range (%d)", dim);
6406
6407 static const int reg[] = {
6408 WORKGROUP_ID_X_ARG, /* Gang / Team / Work-group. */
6409 WORK_ITEM_ID_Z_ARG, /* Worker / Thread / Wavefront. */
6410 WORK_ITEM_ID_Y_ARG /* Vector / SIMD / Work-item. */
6411 };
6412
6413 int reg_num = cfun->machine->args.reg[reg[dim]];
6414
6415 /* The information must have been requested by the kernel. */
6416 gcc_assert (reg_num >= 0);
6417
6418 return gen_rtx_REG (SImode, reg_num);
6419}
6420
6421/* Implement TARGET_GOACC_FORK_JOIN. */
6422
6423static bool
2961ac45 6424gcn_fork_join (gcall *call, const int dims[], bool is_fork)
5326695a 6425{
2961ac45
JB
6426 tree arg = gimple_call_arg (call, 2);
6427 unsigned axis = TREE_INT_CST_LOW (arg);
6428
6429 if (!is_fork && axis == GOMP_DIM_WORKER && dims[axis] != 1)
6430 return true;
6431
5326695a
AS
6432 return false;
6433}
6434
6435/* Implement ???????
6436 FIXME make this a real hook.
6437
6438 Adjust FNDECL such that options inherited from the host compiler
6439 are made appropriate for the accelerator compiler. */
6440
6441void
6442gcn_fixup_accel_lto_options (tree fndecl)
6443{
6444 tree func_optimize = DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl);
6445 if (!func_optimize)
6446 return;
6447
ba948b37
JJ
6448 tree old_optimize
6449 = build_optimization_node (&global_options, &global_options_set);
5326695a
AS
6450 tree new_optimize;
6451
6452 /* If the function changed the optimization levels as well as
6453 setting target options, start with the optimizations
6454 specified. */
6455 if (func_optimize != old_optimize)
ba948b37 6456 cl_optimization_restore (&global_options, &global_options_set,
5326695a
AS
6457 TREE_OPTIMIZATION (func_optimize));
6458
6459 gcn_option_override ();
6460
6461 /* The target attributes may also change some optimization flags,
6462 so update the optimization options if necessary. */
ba948b37
JJ
6463 new_optimize = build_optimization_node (&global_options,
6464 &global_options_set);
5326695a
AS
6465
6466 if (old_optimize != new_optimize)
6467 {
6468 DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl) = new_optimize;
ba948b37 6469 cl_optimization_restore (&global_options, &global_options_set,
5326695a
AS
6470 TREE_OPTIMIZATION (old_optimize));
6471 }
6472}
6473
2a3f9f65
JB
6474/* Implement TARGET_GOACC_SHARED_MEM_LAYOUT hook. */
6475
6476static void
6477gcn_shared_mem_layout (unsigned HOST_WIDE_INT *lo,
6478 unsigned HOST_WIDE_INT *hi,
6479 int ARG_UNUSED (dims[GOMP_DIM_MAX]),
6480 unsigned HOST_WIDE_INT
6481 ARG_UNUSED (private_size[GOMP_DIM_MAX]),
6482 unsigned HOST_WIDE_INT reduction_size[GOMP_DIM_MAX])
6483{
6484 *lo = gang_private_size_opt + reduction_size[GOMP_DIM_WORKER];
6485 /* !!! We can maybe use dims[] to estimate the maximum number of work
6486 groups/wavefronts/etc. we will launch, and therefore tune the maximum
6487 amount of LDS we should use. For now, use a minimal amount to try to
6488 maximise occupancy. */
6489 *hi = acc_lds_size;
6490 machine_function *machfun = cfun->machine;
6491 machfun->reduction_base = gang_private_size_opt;
6492 machfun->reduction_limit
6493 = gang_private_size_opt + reduction_size[GOMP_DIM_WORKER];
6494}
6495
5326695a
AS
6496/* }}} */
6497/* {{{ ASM Output. */
6498
6499/* Implement TARGET_ASM_FILE_START.
6500
6501 Print assembler file header text. */
6502
6503static void
6504output_file_start (void)
6505{
8086230e 6506 /* In HSACOv4 no attribute setting means the binary supports "any" hardware
366e3d30
TB
6507 configuration. */
6508 const char *xnack = (flag_xnack == HSACO_ATTR_ON ? ":xnack+"
6509 : flag_xnack == HSACO_ATTR_OFF ? ":xnack-"
6510 : "");
6511 const char *sram_ecc = (flag_sram_ecc == HSACO_ATTR_ON ? ":sramecc+"
6512 : flag_sram_ecc == HSACO_ATTR_OFF ? ":sramecc-"
8086230e
AS
6513 : "");
6514
dd455df7 6515 const char *cpu;
f062c3f1
AS
6516 switch (gcn_arch)
6517 {
1af16666
AS
6518 case PROCESSOR_FIJI:
6519 cpu = "gfx803";
8086230e
AS
6520 xnack = "";
6521 sram_ecc = "";
1af16666
AS
6522 break;
6523 case PROCESSOR_VEGA10:
6524 cpu = "gfx900";
8086230e 6525 sram_ecc = "";
1af16666
AS
6526 break;
6527 case PROCESSOR_VEGA20:
6528 cpu = "gfx906";
8086230e 6529 sram_ecc = "";
1af16666
AS
6530 break;
6531 case PROCESSOR_GFX908:
6532 cpu = "gfx908";
1af16666 6533 break;
cde52d3a
AS
6534 case PROCESSOR_GFX90a:
6535 cpu = "gfx90a";
6536 break;
c7ec7bd1
AS
6537 case PROCESSOR_GFX1030:
6538 cpu = "gfx1030";
6539 xnack = "";
6540 sram_ecc = "";
6541 break;
f062c3f1
AS
6542 default: gcc_unreachable ();
6543 }
6544
aad32a00 6545 fprintf(asm_out_file, "\t.amdgcn_target \"amdgcn-unknown-amdhsa--%s%s%s\"\n",
8086230e 6546 cpu, sram_ecc, xnack);
5326695a
AS
6547}
6548
6549/* Implement ASM_DECLARE_FUNCTION_NAME via gcn-hsa.h.
6550
6551 Print the initial definition of a function name.
6552
6553 For GCN kernel entry points this includes all the HSA meta-data, special
6554 alignment constraints that don't apply to regular functions, and magic
6555 comments that pass information to mkoffload. */
6556
6557void
6558gcn_hsa_declare_function_name (FILE *file, const char *name, tree)
6559{
ae0d2c24 6560 int sgpr, vgpr, avgpr;
366e3d30 6561 bool xnack_enabled = TARGET_XNACK;
f062c3f1
AS
6562
6563 fputs ("\n\n", file);
5326695a
AS
6564
6565 if (cfun && cfun->machine && cfun->machine->normal_function)
6566 {
6567 fputs ("\t.type\t", file);
6568 assemble_name (file, name);
6569 fputs (",@function\n", file);
6570 assemble_name (file, name);
6571 fputs (":\n", file);
6572 return;
6573 }
6574
6575 /* Determine count of sgpr/vgpr registers by looking for last
6576 one used. */
6577 for (sgpr = 101; sgpr >= 0; sgpr--)
6578 if (df_regs_ever_live_p (FIRST_SGPR_REG + sgpr))
6579 break;
6580 sgpr++;
6581 for (vgpr = 255; vgpr >= 0; vgpr--)
6582 if (df_regs_ever_live_p (FIRST_VGPR_REG + vgpr))
6583 break;
6584 vgpr++;
ae0d2c24
AS
6585 for (avgpr = 255; avgpr >= 0; avgpr--)
6586 if (df_regs_ever_live_p (FIRST_AVGPR_REG + avgpr))
6587 break;
6588 avgpr++;
6589 vgpr = (vgpr + 3) & ~3;
6590 avgpr = (avgpr + 3) & ~3;
5326695a 6591
5326695a
AS
6592 if (!leaf_function_p ())
6593 {
6594 /* We can't know how many registers function calls might use. */
87fdbe69
KCY
6595 if (vgpr < MAX_NORMAL_VGPR_COUNT)
6596 vgpr = MAX_NORMAL_VGPR_COUNT;
f062c3f1
AS
6597 if (sgpr < MAX_NORMAL_SGPR_COUNT)
6598 sgpr = MAX_NORMAL_SGPR_COUNT;
ae0d2c24
AS
6599 if (avgpr < MAX_NORMAL_AVGPR_COUNT)
6600 avgpr = MAX_NORMAL_AVGPR_COUNT;
5326695a
AS
6601 }
6602
cde52d3a
AS
6603 /* The gfx90a accum_offset field can't represent 0 registers. */
6604 if (gcn_arch == PROCESSOR_GFX90a && vgpr < 4)
6605 vgpr = 4;
6606
f062c3f1
AS
6607 fputs ("\t.rodata\n"
6608 "\t.p2align\t6\n"
6609 "\t.amdhsa_kernel\t", file);
5326695a
AS
6610 assemble_name (file, name);
6611 fputs ("\n", file);
5326695a
AS
6612 int reg = FIRST_SGPR_REG;
6613 for (int a = 0; a < GCN_KERNEL_ARG_TYPES; a++)
6614 {
6615 int reg_first = -1;
6616 int reg_last;
6617 if ((cfun->machine->args.requested & (1 << a))
6618 && (gcn_kernel_arg_types[a].fixed_regno < 0))
6619 {
6620 reg_first = reg;
6621 reg_last = (reg_first
6622 + (GET_MODE_SIZE (gcn_kernel_arg_types[a].mode)
6623 / UNITS_PER_WORD) - 1);
6624 reg = reg_last + 1;
6625 }
6626
6627 if (gcn_kernel_arg_types[a].header_pseudo)
6628 {
f062c3f1
AS
6629 fprintf (file, "\t %s%s\t%i",
6630 (cfun->machine->args.requested & (1 << a)) != 0 ? "" : ";",
5326695a
AS
6631 gcn_kernel_arg_types[a].header_pseudo,
6632 (cfun->machine->args.requested & (1 << a)) != 0);
6633 if (reg_first != -1)
6634 {
6635 fprintf (file, " ; (");
6636 for (int i = reg_first; i <= reg_last; ++i)
6637 {
6638 if (i != reg_first)
6639 fprintf (file, ", ");
6640 fprintf (file, "%s", reg_names[i]);
6641 }
6642 fprintf (file, ")");
6643 }
6644 fprintf (file, "\n");
6645 }
6646 else if (gcn_kernel_arg_types[a].fixed_regno >= 0
6647 && cfun->machine->args.requested & (1 << a))
f062c3f1 6648 fprintf (file, "\t ; %s\t%i (%s)\n",
5326695a
AS
6649 gcn_kernel_arg_types[a].name,
6650 (cfun->machine->args.requested & (1 << a)) != 0,
6651 reg_names[gcn_kernel_arg_types[a].fixed_regno]);
6652 }
f062c3f1 6653 fprintf (file, "\t .amdhsa_system_vgpr_workitem_id\t%i\n",
5326695a
AS
6654 (cfun->machine->args.requested & (1 << WORK_ITEM_ID_Z_ARG))
6655 ? 2
6656 : cfun->machine->args.requested & (1 << WORK_ITEM_ID_Y_ARG)
6657 ? 1 : 0);
ae0d2c24
AS
6658 int next_free_vgpr = vgpr;
6659 if (TARGET_CDNA1 && avgpr > vgpr)
6660 next_free_vgpr = avgpr;
6661 if (TARGET_CDNA2_PLUS)
6662 next_free_vgpr += avgpr;
f062c3f1
AS
6663 fprintf (file,
6664 "\t .amdhsa_next_free_vgpr\t%i\n"
6665 "\t .amdhsa_next_free_sgpr\t%i\n"
6666 "\t .amdhsa_reserve_vcc\t1\n"
6667 "\t .amdhsa_reserve_flat_scratch\t0\n"
6668 "\t .amdhsa_reserve_xnack_mask\t%i\n"
f6fff8a6 6669 "\t .amdhsa_private_segment_fixed_size\t0\n"
f062c3f1
AS
6670 "\t .amdhsa_group_segment_fixed_size\t%u\n"
6671 "\t .amdhsa_float_denorm_mode_32\t3\n"
6672 "\t .amdhsa_float_denorm_mode_16_64\t3\n",
ae0d2c24 6673 next_free_vgpr,
f062c3f1
AS
6674 sgpr,
6675 xnack_enabled,
f062c3f1 6676 LDS_SIZE);
cde52d3a
AS
6677 if (gcn_arch == PROCESSOR_GFX90a)
6678 fprintf (file,
6679 "\t .amdhsa_accum_offset\t%i\n"
6680 "\t .amdhsa_tg_split\t0\n",
ae0d2c24 6681 vgpr); /* The AGPRs come after the VGPRs. */
f062c3f1
AS
6682 fputs ("\t.end_amdhsa_kernel\n", file);
6683
6684#if 1
6685 /* The following is YAML embedded in assembler; tabs are not allowed. */
6686 fputs (" .amdgpu_metadata\n"
6687 " amdhsa.version:\n"
6688 " - 1\n"
6689 " - 0\n"
6690 " amdhsa.kernels:\n"
6691 " - .name: ", file);
6692 assemble_name (file, name);
6693 fputs ("\n .symbol: ", file);
6694 assemble_name (file, name);
6695 fprintf (file,
6696 ".kd\n"
6697 " .kernarg_segment_size: %i\n"
6698 " .kernarg_segment_align: %i\n"
6699 " .group_segment_fixed_size: %u\n"
f6fff8a6 6700 " .private_segment_fixed_size: 0\n"
f062c3f1
AS
6701 " .wavefront_size: 64\n"
6702 " .sgpr_count: %i\n"
6703 " .vgpr_count: %i\n"
6704 " .max_flat_workgroup_size: 1024\n",
6705 cfun->machine->kernarg_segment_byte_size,
5326695a 6706 cfun->machine->kernarg_segment_alignment,
f062c3f1 6707 LDS_SIZE,
ae0d2c24
AS
6708 sgpr, next_free_vgpr);
6709 if (gcn_arch == PROCESSOR_GFX90a || gcn_arch == PROCESSOR_GFX908)
6710 fprintf (file, " .agpr_count: %i\n", avgpr);
f062c3f1
AS
6711 fputs (" .end_amdgpu_metadata\n", file);
6712#endif
6713
6714 fputs ("\t.text\n", file);
6715 fputs ("\t.align\t256\n", file);
6716 fputs ("\t.type\t", file);
6717 assemble_name (file, name);
6718 fputs (",@function\n", file);
6719 assemble_name (file, name);
6720 fputs (":\n", file);
5326695a
AS
6721
6722 /* This comment is read by mkoffload. */
6723 if (flag_openacc)
6724 fprintf (file, "\t;; OPENACC-DIMS: %d, %d, %d : %s\n",
6725 oacc_get_fn_dim_size (cfun->decl, GOMP_DIM_GANG),
6726 oacc_get_fn_dim_size (cfun->decl, GOMP_DIM_WORKER),
6727 oacc_get_fn_dim_size (cfun->decl, GOMP_DIM_VECTOR), name);
6728}
6729
6730/* Implement TARGET_ASM_SELECT_SECTION.
6731
6732 Return the section into which EXP should be placed. */
6733
6734static section *
6735gcn_asm_select_section (tree exp, int reloc, unsigned HOST_WIDE_INT align)
6736{
6737 if (TREE_TYPE (exp) != error_mark_node
6738 && TYPE_ADDR_SPACE (TREE_TYPE (exp)) == ADDR_SPACE_LDS)
6739 {
6740 if (!DECL_P (exp))
6741 return get_section (".lds_bss",
6742 SECTION_WRITE | SECTION_BSS | SECTION_DEBUG,
6743 NULL);
6744
6745 return get_named_section (exp, ".lds_bss", reloc);
6746 }
6747
6748 return default_elf_select_section (exp, reloc, align);
6749}
6750
6751/* Implement TARGET_ASM_FUNCTION_PROLOGUE.
6752
6753 Emits custom text into the assembler file at the head of each function. */
6754
6755static void
6756gcn_target_asm_function_prologue (FILE *file)
6757{
6758 machine_function *offsets = gcn_compute_frame_offsets ();
6759
6760 asm_fprintf (file, "\t; using %s addressing in function\n",
6761 offsets->use_flat_addressing ? "flat" : "global");
6762
6763 if (offsets->normal_function)
6764 {
6765 asm_fprintf (file, "\t; frame pointer needed: %s\n",
6766 offsets->need_frame_pointer ? "true" : "false");
6767 asm_fprintf (file, "\t; lr needs saving: %s\n",
6768 offsets->lr_needs_saving ? "true" : "false");
6769 asm_fprintf (file, "\t; outgoing args size: %wd\n",
6770 offsets->outgoing_args_size);
6771 asm_fprintf (file, "\t; pretend size: %wd\n", offsets->pretend_size);
6772 asm_fprintf (file, "\t; local vars size: %wd\n", offsets->local_vars);
6773 asm_fprintf (file, "\t; callee save size: %wd\n",
6774 offsets->callee_saves);
6775 }
6776 else
6777 {
6778 asm_fprintf (file, "\t; HSA kernel entry point\n");
6779 asm_fprintf (file, "\t; local vars size: %wd\n", offsets->local_vars);
6780 asm_fprintf (file, "\t; outgoing args size: %wd\n",
6781 offsets->outgoing_args_size);
5326695a
AS
6782 }
6783}
6784
6785/* Helper function for print_operand and print_operand_address.
6786
6787 Print a register as the assembler requires, according to mode and name. */
6788
6789static void
6790print_reg (FILE *file, rtx x)
6791{
6792 machine_mode mode = GET_MODE (x);
45381d6f
AS
6793 if (VECTOR_MODE_P (mode))
6794 mode = GET_MODE_INNER (mode);
5326695a 6795 if (mode == BImode || mode == QImode || mode == HImode || mode == SImode
45381d6f 6796 || mode == HFmode || mode == SFmode)
5326695a 6797 fprintf (file, "%s", reg_names[REGNO (x)]);
45381d6f 6798 else if (mode == DImode || mode == DFmode)
5326695a
AS
6799 {
6800 if (SGPR_REGNO_P (REGNO (x)))
6801 fprintf (file, "s[%i:%i]", REGNO (x) - FIRST_SGPR_REG,
6802 REGNO (x) - FIRST_SGPR_REG + 1);
6803 else if (VGPR_REGNO_P (REGNO (x)))
6804 fprintf (file, "v[%i:%i]", REGNO (x) - FIRST_VGPR_REG,
6805 REGNO (x) - FIRST_VGPR_REG + 1);
ae0d2c24
AS
6806 else if (AVGPR_REGNO_P (REGNO (x)))
6807 fprintf (file, "a[%i:%i]", REGNO (x) - FIRST_AVGPR_REG,
6808 REGNO (x) - FIRST_AVGPR_REG + 1);
5326695a
AS
6809 else if (REGNO (x) == FLAT_SCRATCH_REG)
6810 fprintf (file, "flat_scratch");
6811 else if (REGNO (x) == EXEC_REG)
6812 fprintf (file, "exec");
6813 else if (REGNO (x) == VCC_LO_REG)
6814 fprintf (file, "vcc");
6815 else
6816 fprintf (file, "[%s:%s]",
6817 reg_names[REGNO (x)], reg_names[REGNO (x) + 1]);
6818 }
6819 else if (mode == TImode)
6820 {
6821 if (SGPR_REGNO_P (REGNO (x)))
6822 fprintf (file, "s[%i:%i]", REGNO (x) - FIRST_SGPR_REG,
6823 REGNO (x) - FIRST_SGPR_REG + 3);
6824 else if (VGPR_REGNO_P (REGNO (x)))
6825 fprintf (file, "v[%i:%i]", REGNO (x) - FIRST_VGPR_REG,
6826 REGNO (x) - FIRST_VGPR_REG + 3);
ae0d2c24
AS
6827 else if (AVGPR_REGNO_P (REGNO (x)))
6828 fprintf (file, "a[%i:%i]", REGNO (x) - FIRST_AVGPR_REG,
6829 REGNO (x) - FIRST_AVGPR_REG + 3);
5326695a
AS
6830 else
6831 gcc_unreachable ();
6832 }
6833 else
6834 gcc_unreachable ();
6835}
6836
6837/* Implement TARGET_SECTION_TYPE_FLAGS.
6838
6839 Return a set of section attributes for use by TARGET_ASM_NAMED_SECTION. */
6840
6841static unsigned int
6842gcn_section_type_flags (tree decl, const char *name, int reloc)
6843{
6844 if (strcmp (name, ".lds_bss") == 0)
6845 return SECTION_WRITE | SECTION_BSS | SECTION_DEBUG;
6846
6847 return default_section_type_flags (decl, name, reloc);
6848}
6849
6850/* Helper function for gcn_asm_output_symbol_ref.
6851
2a3f9f65
JB
6852 FIXME: This function is used to lay out gang-private variables in LDS
6853 on a per-CU basis.
6854 There may be cases in which gang-private variables in different compilation
6855 units could clobber each other. In that case we should be relying on the
6856 linker to lay out gang-private LDS space, but that doesn't appear to be
6857 possible at present. */
5326695a
AS
6858
6859static void
6860gcn_print_lds_decl (FILE *f, tree var)
6861{
6862 int *offset;
2a3f9f65 6863 if ((offset = lds_allocs.get (var)))
5326695a
AS
6864 fprintf (f, "%u", (unsigned) *offset);
6865 else
6866 {
6867 unsigned HOST_WIDE_INT align = DECL_ALIGN_UNIT (var);
6868 tree type = TREE_TYPE (var);
6869 unsigned HOST_WIDE_INT size = tree_to_uhwi (TYPE_SIZE_UNIT (type));
6870 if (size > align && size > 4 && align < 8)
6871 align = 8;
6872
2a3f9f65 6873 gang_private_hwm = ((gang_private_hwm + align - 1) & ~(align - 1));
5326695a 6874
2a3f9f65
JB
6875 lds_allocs.put (var, gang_private_hwm);
6876 fprintf (f, "%u", gang_private_hwm);
6877 gang_private_hwm += size;
6878 if (gang_private_hwm > gang_private_size_opt)
2579d612
TS
6879 error ("%d bytes of gang-private data-share memory exhausted"
6880 " (increase with %<-mgang-private-size=%d%>, for example)",
6881 gang_private_size_opt, gang_private_hwm);
5326695a
AS
6882 }
6883}
6884
6885/* Implement ASM_OUTPUT_SYMBOL_REF via gcn-hsa.h. */
6886
6887void
6888gcn_asm_output_symbol_ref (FILE *file, rtx x)
6889{
6890 tree decl;
9200b53a
JB
6891 if (cfun
6892 && (decl = SYMBOL_REF_DECL (x)) != 0
9907413a 6893 && VAR_P (decl)
5326695a
AS
6894 && AS_LDS_P (TYPE_ADDR_SPACE (TREE_TYPE (decl))))
6895 {
6896 /* LDS symbols (emitted using this hook) are only used at present
6897 to propagate worker values from an active thread to neutered
6898 threads. Use the same offset for each such block, but don't
6899 use zero because null pointers are used to identify the active
6900 thread in GOACC_single_copy_start calls. */
6901 gcn_print_lds_decl (file, decl);
6902 }
6903 else
6904 {
6905 assemble_name (file, XSTR (x, 0));
6906 /* FIXME: See above -- this condition is unreachable. */
9200b53a
JB
6907 if (cfun
6908 && (decl = SYMBOL_REF_DECL (x)) != 0
9907413a 6909 && VAR_P (decl)
5326695a
AS
6910 && AS_LDS_P (TYPE_ADDR_SPACE (TREE_TYPE (decl))))
6911 fputs ("@abs32", file);
6912 }
6913}
6914
6915/* Implement TARGET_CONSTANT_ALIGNMENT.
6916
6917 Returns the alignment in bits of a constant that is being placed in memory.
6918 CONSTANT is the constant and BASIC_ALIGN is the alignment that the object
6919 would ordinarily have. */
6920
6921static HOST_WIDE_INT
6922gcn_constant_alignment (const_tree ARG_UNUSED (constant),
6923 HOST_WIDE_INT basic_align)
6924{
6925 return basic_align > 128 ? basic_align : 128;
6926}
6927
6928/* Implement PRINT_OPERAND_ADDRESS via gcn.h. */
6929
6930void
6931print_operand_address (FILE *file, rtx mem)
6932{
6933 gcc_assert (MEM_P (mem));
6934
6935 rtx reg;
6936 rtx offset;
6937 addr_space_t as = MEM_ADDR_SPACE (mem);
6938 rtx addr = XEXP (mem, 0);
6939 gcc_assert (REG_P (addr) || GET_CODE (addr) == PLUS);
6940
6941 if (AS_SCRATCH_P (as))
6942 switch (GET_CODE (addr))
6943 {
6944 case REG:
6945 print_reg (file, addr);
6946 break;
6947
6948 case PLUS:
6949 reg = XEXP (addr, 0);
6950 offset = XEXP (addr, 1);
6951 print_reg (file, reg);
6952 if (GET_CODE (offset) == CONST_INT)
6953 fprintf (file, " offset:" HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
6954 else
6955 abort ();
6956 break;
6957
6958 default:
6959 debug_rtx (addr);
6960 abort ();
6961 }
6962 else if (AS_ANY_FLAT_P (as))
6963 {
6964 if (GET_CODE (addr) == REG)
6965 print_reg (file, addr);
6966 else
6967 {
6968 gcc_assert (TARGET_GCN5_PLUS);
6969 print_reg (file, XEXP (addr, 0));
6970 }
6971 }
6972 else if (AS_GLOBAL_P (as))
6973 {
6974 gcc_assert (TARGET_GCN5_PLUS);
6975
6976 rtx base = addr;
6977 rtx vgpr_offset = NULL_RTX;
6978
6979 if (GET_CODE (addr) == PLUS)
6980 {
6981 base = XEXP (addr, 0);
6982
6983 if (GET_CODE (base) == PLUS)
6984 {
6985 /* (SGPR + VGPR) + CONST */
6986 vgpr_offset = XEXP (base, 1);
6987 base = XEXP (base, 0);
6988 }
6989 else
6990 {
6991 rtx offset = XEXP (addr, 1);
6992
6993 if (REG_P (offset))
6994 /* SGPR + VGPR */
6995 vgpr_offset = offset;
6996 else if (CONST_INT_P (offset))
6997 /* VGPR + CONST or SGPR + CONST */
6998 ;
6999 else
7000 output_operand_lossage ("bad ADDR_SPACE_GLOBAL address");
7001 }
7002 }
7003
7004 if (REG_P (base))
7005 {
7006 if (VGPR_REGNO_P (REGNO (base)))
7007 print_reg (file, base);
7008 else if (SGPR_REGNO_P (REGNO (base)))
7009 {
7010 /* The assembler requires a 64-bit VGPR pair here, even though
7011 the offset should be only 32-bit. */
7012 if (vgpr_offset == NULL_RTX)
f6e20012
KCY
7013 /* In this case, the vector offset is zero, so we use the first
7014 lane of v1, which is initialized to zero. */
8086230e 7015 fprintf (file, "v1");
5326695a
AS
7016 else if (REG_P (vgpr_offset)
7017 && VGPR_REGNO_P (REGNO (vgpr_offset)))
8086230e 7018 fprintf (file, "v%d", REGNO (vgpr_offset) - FIRST_VGPR_REG);
5326695a
AS
7019 else
7020 output_operand_lossage ("bad ADDR_SPACE_GLOBAL address");
7021 }
7022 }
7023 else
7024 output_operand_lossage ("bad ADDR_SPACE_GLOBAL address");
7025 }
7026 else if (AS_ANY_DS_P (as))
7027 switch (GET_CODE (addr))
7028 {
7029 case REG:
7030 print_reg (file, addr);
7031 break;
7032
7033 case PLUS:
7034 reg = XEXP (addr, 0);
7035 print_reg (file, reg);
7036 break;
7037
7038 default:
7039 debug_rtx (addr);
7040 abort ();
7041 }
7042 else
7043 switch (GET_CODE (addr))
7044 {
7045 case REG:
7046 print_reg (file, addr);
7047 fprintf (file, ", 0");
7048 break;
7049
7050 case PLUS:
7051 reg = XEXP (addr, 0);
7052 offset = XEXP (addr, 1);
7053 print_reg (file, reg);
7054 fprintf (file, ", ");
7055 if (GET_CODE (offset) == REG)
7056 print_reg (file, reg);
7057 else if (GET_CODE (offset) == CONST_INT)
7058 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
7059 else
7060 abort ();
7061 break;
7062
7063 default:
7064 debug_rtx (addr);
7065 abort ();
7066 }
7067}
7068
7069/* Implement PRINT_OPERAND via gcn.h.
7070
7071 b - print operand size as untyped operand (b8/b16/b32/b64)
7072 B - print operand size as SI/DI untyped operand (b32/b32/b32/b64)
7073 i - print operand size as untyped operand (i16/b32/i64)
a5879399 7074 I - print operand size as SI/DI untyped operand(i32/b32/i64)
5326695a 7075 u - print operand size as untyped operand (u16/u32/u64)
a5879399 7076 U - print operand size as SI/DI untyped operand(u32/u64)
5326695a
AS
7077 o - print operand size as memory access size for loads
7078 (ubyte/ushort/dword/dwordx2/wordx3/dwordx4)
7079 s - print operand size as memory access size for stores
7080 (byte/short/dword/dwordx2/wordx3/dwordx4)
7081 C - print conditional code for s_cbranch (_sccz/_sccnz/_vccz/_vccnz...)
7082 c - print inverse conditional code for s_cbranch
7083 D - print conditional code for s_cmp (eq_u64/lg_u64...)
7084 E - print conditional code for v_cmp (eq_u64/ne_u64...)
7085 A - print address in formatting suitable for given address space.
7086 O - print offset:n for data share operations.
7087 ^ - print "_co" suffix for GCN5 mnemonics
7088 g - print "glc", if appropriate for given MEM
8aeabd9f
AS
7089 L - print low-part of a multi-reg value
7090 H - print second part of a multi-reg value (high-part of 2-reg value)
7091 J - print third part of a multi-reg value
7092 K - print fourth part of a multi-reg value
5326695a
AS
7093 */
7094
7095void
7096print_operand (FILE *file, rtx x, int code)
7097{
eb239c7f 7098 rtx_code xcode = x ? GET_CODE (x) : UNKNOWN;
5326695a
AS
7099 bool invert = false;
7100 switch (code)
7101 {
7102 /* Instructions have the following suffixes.
7103 If there are two suffixes, the first is the destination type,
7104 and the second is the source type.
7105
7106 B32 Bitfield (untyped data) 32-bit
7107 B64 Bitfield (untyped data) 64-bit
7108 F16 floating-point 16-bit
7109 F32 floating-point 32-bit (IEEE 754 single-precision float)
7110 F64 floating-point 64-bit (IEEE 754 double-precision float)
7111 I16 signed 32-bit integer
7112 I32 signed 32-bit integer
7113 I64 signed 64-bit integer
7114 U16 unsigned 32-bit integer
7115 U32 unsigned 32-bit integer
7116 U64 unsigned 64-bit integer */
7117
7118 /* Print operand size as untyped suffix. */
7119 case 'b':
7120 {
7121 const char *s = "";
7122 machine_mode mode = GET_MODE (x);
7123 if (VECTOR_MODE_P (mode))
7124 mode = GET_MODE_INNER (mode);
7125 switch (GET_MODE_SIZE (mode))
7126 {
7127 case 1:
7128 s = "_b8";
7129 break;
7130 case 2:
7131 s = "_b16";
7132 break;
7133 case 4:
7134 s = "_b32";
7135 break;
7136 case 8:
7137 s = "_b64";
7138 break;
7139 default:
7140 output_operand_lossage ("invalid operand %%xn code");
7141 return;
7142 }
7143 fputs (s, file);
7144 }
7145 return;
7146 case 'B':
7147 {
7148 const char *s = "";
7149 machine_mode mode = GET_MODE (x);
7150 if (VECTOR_MODE_P (mode))
7151 mode = GET_MODE_INNER (mode);
7152 switch (GET_MODE_SIZE (mode))
7153 {
7154 case 1:
7155 case 2:
7156 case 4:
7157 s = "_b32";
7158 break;
7159 case 8:
7160 s = "_b64";
7161 break;
7162 default:
7163 output_operand_lossage ("invalid operand %%xn code");
7164 return;
7165 }
7166 fputs (s, file);
7167 }
7168 return;
7169 case 'e':
7170 fputs ("sext(", file);
7171 print_operand (file, x, 0);
7172 fputs (")", file);
7173 return;
7174 case 'i':
a5879399 7175 case 'I':
5326695a 7176 case 'u':
a5879399 7177 case 'U':
5326695a
AS
7178 {
7179 bool signed_p = code == 'i';
a5879399 7180 bool min32_p = code == 'I' || code == 'U';
5326695a
AS
7181 const char *s = "";
7182 machine_mode mode = GET_MODE (x);
7183 if (VECTOR_MODE_P (mode))
7184 mode = GET_MODE_INNER (mode);
7185 if (mode == VOIDmode)
7186 switch (GET_CODE (x))
7187 {
7188 case CONST_INT:
7189 s = signed_p ? "_i32" : "_u32";
7190 break;
7191 case CONST_DOUBLE:
7192 s = "_f64";
7193 break;
7194 default:
7195 output_operand_lossage ("invalid operand %%xn code");
7196 return;
7197 }
7198 else if (FLOAT_MODE_P (mode))
7199 switch (GET_MODE_SIZE (mode))
7200 {
7201 case 2:
7202 s = "_f16";
7203 break;
7204 case 4:
7205 s = "_f32";
7206 break;
7207 case 8:
7208 s = "_f64";
7209 break;
7210 default:
7211 output_operand_lossage ("invalid operand %%xn code");
7212 return;
7213 }
a5879399
AS
7214 else if (min32_p)
7215 switch (GET_MODE_SIZE (mode))
7216 {
7217 case 1:
7218 case 2:
7219 case 4:
7220 s = signed_p ? "_i32" : "_u32";
7221 break;
7222 case 8:
7223 s = signed_p ? "_i64" : "_u64";
7224 break;
7225 default:
7226 output_operand_lossage ("invalid operand %%xn code");
7227 return;
7228 }
5326695a
AS
7229 else
7230 switch (GET_MODE_SIZE (mode))
7231 {
7232 case 1:
7233 s = signed_p ? "_i8" : "_u8";
7234 break;
7235 case 2:
7236 s = signed_p ? "_i16" : "_u16";
7237 break;
7238 case 4:
7239 s = signed_p ? "_i32" : "_u32";
7240 break;
7241 case 8:
7242 s = signed_p ? "_i64" : "_u64";
7243 break;
7244 default:
7245 output_operand_lossage ("invalid operand %%xn code");
7246 return;
7247 }
7248 fputs (s, file);
7249 }
7250 return;
7251 /* Print operand size as untyped suffix. */
7252 case 'o':
7253 {
7254 const char *s = 0;
45381d6f
AS
7255 machine_mode mode = GET_MODE (x);
7256 if (VECTOR_MODE_P (mode))
7257 mode = GET_MODE_INNER (mode);
7258
7259 switch (mode)
5326695a 7260 {
45381d6f 7261 case E_QImode:
5326695a
AS
7262 s = "_ubyte";
7263 break;
45381d6f
AS
7264 case E_HImode:
7265 case E_HFmode:
5326695a
AS
7266 s = "_ushort";
7267 break;
45381d6f 7268 default:
5326695a
AS
7269 break;
7270 }
7271
7272 if (s)
7273 {
7274 fputs (s, file);
7275 return;
7276 }
7277
7278 /* Fall-through - the other cases for 'o' are the same as for 's'. */
7279 gcc_fallthrough();
7280 }
7281 case 's':
7282 {
45381d6f
AS
7283 const char *s;
7284 machine_mode mode = GET_MODE (x);
7285 if (VECTOR_MODE_P (mode))
7286 mode = GET_MODE_INNER (mode);
7287
7288 switch (mode)
5326695a 7289 {
45381d6f 7290 case E_QImode:
5326695a
AS
7291 s = "_byte";
7292 break;
45381d6f
AS
7293 case E_HImode:
7294 case E_HFmode:
5326695a
AS
7295 s = "_short";
7296 break;
45381d6f
AS
7297 case E_SImode:
7298 case E_SFmode:
5326695a
AS
7299 s = "_dword";
7300 break;
45381d6f
AS
7301 case E_DImode:
7302 case E_DFmode:
5326695a
AS
7303 s = "_dwordx2";
7304 break;
45381d6f 7305 case E_TImode:
5326695a
AS
7306 s = "_dwordx4";
7307 break;
5326695a
AS
7308 default:
7309 output_operand_lossage ("invalid operand %%xn code");
7310 return;
7311 }
7312 fputs (s, file);
7313 }
7314 return;
7315 case 'A':
7316 if (xcode != MEM)
7317 {
7318 output_operand_lossage ("invalid %%xn code");
7319 return;
7320 }
7321 print_operand_address (file, x);
7322 return;
7323 case 'O':
7324 {
7325 if (xcode != MEM)
7326 {
7327 output_operand_lossage ("invalid %%xn code");
7328 return;
7329 }
7330 if (AS_GDS_P (MEM_ADDR_SPACE (x)))
7331 fprintf (file, " gds");
7332
7333 rtx x0 = XEXP (x, 0);
7334 if (AS_GLOBAL_P (MEM_ADDR_SPACE (x)))
7335 {
7336 gcc_assert (TARGET_GCN5_PLUS);
7337
7338 fprintf (file, ", ");
7339
7340 rtx base = x0;
7341 rtx const_offset = NULL_RTX;
7342
7343 if (GET_CODE (base) == PLUS)
7344 {
7345 rtx offset = XEXP (x0, 1);
7346 base = XEXP (x0, 0);
7347
7348 if (GET_CODE (base) == PLUS)
7349 /* (SGPR + VGPR) + CONST */
7350 /* Ignore the VGPR offset for this operand. */
7351 base = XEXP (base, 0);
7352
7353 if (CONST_INT_P (offset))
7354 const_offset = XEXP (x0, 1);
7355 else if (REG_P (offset))
7356 /* SGPR + VGPR */
7357 /* Ignore the VGPR offset for this operand. */
7358 ;
7359 else
7360 output_operand_lossage ("bad ADDR_SPACE_GLOBAL address");
7361 }
7362
7363 if (REG_P (base))
7364 {
7365 if (VGPR_REGNO_P (REGNO (base)))
7366 /* The VGPR address is specified in the %A operand. */
7367 fprintf (file, "off");
7368 else if (SGPR_REGNO_P (REGNO (base)))
7369 print_reg (file, base);
7370 else
7371 output_operand_lossage ("bad ADDR_SPACE_GLOBAL address");
7372 }
7373 else
7374 output_operand_lossage ("bad ADDR_SPACE_GLOBAL address");
7375
7376 if (const_offset != NULL_RTX)
7377 fprintf (file, " offset:" HOST_WIDE_INT_PRINT_DEC,
7378 INTVAL (const_offset));
7379
7380 return;
7381 }
7382
7383 if (GET_CODE (x0) == REG)
7384 return;
7385 if (GET_CODE (x0) != PLUS)
7386 {
7387 output_operand_lossage ("invalid %%xn code");
7388 return;
7389 }
7390 rtx val = XEXP (x0, 1);
7391 if (GET_CODE (val) == CONST_VECTOR)
7392 val = CONST_VECTOR_ELT (val, 0);
7393 if (GET_CODE (val) != CONST_INT)
7394 {
7395 output_operand_lossage ("invalid %%xn code");
7396 return;
7397 }
7398 fprintf (file, " offset:" HOST_WIDE_INT_PRINT_DEC, INTVAL (val));
7399
7400 }
7401 return;
7402 case 'c':
7403 invert = true;
7404 /* Fall through. */
7405 case 'C':
7406 {
7407 const char *s;
7408 bool num = false;
7409 if ((xcode != EQ && xcode != NE) || !REG_P (XEXP (x, 0)))
7410 {
7411 output_operand_lossage ("invalid %%xn code");
7412 return;
7413 }
7414 switch (REGNO (XEXP (x, 0)))
7415 {
7416 case VCC_REG:
7417 case VCCZ_REG:
7418 s = "_vcc";
7419 break;
7420 case SCC_REG:
7421 /* For some reason llvm-mc insists on scc0 instead of sccz. */
7422 num = true;
7423 s = "_scc";
7424 break;
7425 case EXECZ_REG:
7426 s = "_exec";
7427 break;
7428 default:
7429 output_operand_lossage ("invalid %%xn code");
7430 return;
7431 }
7432 fputs (s, file);
7433 if (xcode == (invert ? NE : EQ))
7434 fputc (num ? '0' : 'z', file);
7435 else
7436 fputs (num ? "1" : "nz", file);
7437 return;
7438 }
7439 case 'D':
7440 {
7441 const char *s;
7442 bool cmp_signed = false;
7443 switch (xcode)
7444 {
7445 case EQ:
7446 s = "_eq_";
7447 break;
7448 case NE:
7449 s = "_lg_";
7450 break;
7451 case LT:
7452 s = "_lt_";
7453 cmp_signed = true;
7454 break;
7455 case LE:
7456 s = "_le_";
7457 cmp_signed = true;
7458 break;
7459 case GT:
7460 s = "_gt_";
7461 cmp_signed = true;
7462 break;
7463 case GE:
7464 s = "_ge_";
7465 cmp_signed = true;
7466 break;
7467 case LTU:
7468 s = "_lt_";
7469 break;
7470 case LEU:
7471 s = "_le_";
7472 break;
7473 case GTU:
7474 s = "_gt_";
7475 break;
7476 case GEU:
7477 s = "_ge_";
7478 break;
7479 default:
7480 output_operand_lossage ("invalid %%xn code");
7481 return;
7482 }
7483 fputs (s, file);
7484 fputc (cmp_signed ? 'i' : 'u', file);
7485
7486 machine_mode mode = GET_MODE (XEXP (x, 0));
7487
7488 if (mode == VOIDmode)
7489 mode = GET_MODE (XEXP (x, 1));
7490
7491 /* If both sides are constants, then assume the instruction is in
7492 SImode since s_cmp can only do integer compares. */
7493 if (mode == VOIDmode)
7494 mode = SImode;
7495
7496 switch (GET_MODE_SIZE (mode))
7497 {
7498 case 4:
7499 s = "32";
7500 break;
7501 case 8:
7502 s = "64";
7503 break;
7504 default:
7505 output_operand_lossage ("invalid operand %%xn code");
7506 return;
7507 }
7508 fputs (s, file);
7509 return;
7510 }
7511 case 'E':
7512 {
7513 const char *s;
7514 bool cmp_signed = false;
7515 machine_mode mode = GET_MODE (XEXP (x, 0));
7516
7517 if (mode == VOIDmode)
7518 mode = GET_MODE (XEXP (x, 1));
7519
7520 /* If both sides are constants, assume the instruction is in SFmode
7521 if either operand is floating point, otherwise assume SImode. */
7522 if (mode == VOIDmode)
7523 {
7524 if (GET_CODE (XEXP (x, 0)) == CONST_DOUBLE
7525 || GET_CODE (XEXP (x, 1)) == CONST_DOUBLE)
7526 mode = SFmode;
7527 else
7528 mode = SImode;
7529 }
7530
7531 /* Use the same format code for vector comparisons. */
7532 if (GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT
7533 || GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
7534 mode = GET_MODE_INNER (mode);
7535
7536 bool float_p = GET_MODE_CLASS (mode) == MODE_FLOAT;
7537
7538 switch (xcode)
7539 {
7540 case EQ:
7541 s = "_eq_";
7542 break;
7543 case NE:
7544 s = float_p ? "_neq_" : "_ne_";
7545 break;
7546 case LT:
7547 s = "_lt_";
7548 cmp_signed = true;
7549 break;
7550 case LE:
7551 s = "_le_";
7552 cmp_signed = true;
7553 break;
7554 case GT:
7555 s = "_gt_";
7556 cmp_signed = true;
7557 break;
7558 case GE:
7559 s = "_ge_";
7560 cmp_signed = true;
7561 break;
7562 case LTU:
7563 s = "_lt_";
7564 break;
7565 case LEU:
7566 s = "_le_";
7567 break;
7568 case GTU:
7569 s = "_gt_";
7570 break;
7571 case GEU:
7572 s = "_ge_";
7573 break;
7574 case ORDERED:
7575 s = "_o_";
7576 break;
7577 case UNORDERED:
7578 s = "_u_";
7579 break;
1dff18a1
KCY
7580 case UNEQ:
7581 s = "_nlg_";
7582 break;
7583 case UNGE:
7584 s = "_nlt_";
7585 break;
7586 case UNGT:
7587 s = "_nle_";
7588 break;
7589 case UNLE:
7590 s = "_ngt_";
7591 break;
7592 case UNLT:
7593 s = "_nge_";
7594 break;
59e6d62b
AS
7595 case LTGT:
7596 s = "_lg_";
7597 break;
5326695a
AS
7598 default:
7599 output_operand_lossage ("invalid %%xn code");
7600 return;
7601 }
7602 fputs (s, file);
7603 fputc (float_p ? 'f' : cmp_signed ? 'i' : 'u', file);
7604
7605 switch (GET_MODE_SIZE (mode))
7606 {
7607 case 1:
0e159efc
AS
7608 output_operand_lossage ("operand %%xn code invalid for QImode");
7609 return;
5326695a 7610 case 2:
0e159efc 7611 s = "16";
5326695a
AS
7612 break;
7613 case 4:
7614 s = "32";
7615 break;
7616 case 8:
7617 s = "64";
7618 break;
7619 default:
7620 output_operand_lossage ("invalid operand %%xn code");
7621 return;
7622 }
7623 fputs (s, file);
7624 return;
7625 }
7626 case 'L':
7627 print_operand (file, gcn_operand_part (GET_MODE (x), x, 0), 0);
7628 return;
7629 case 'H':
7630 print_operand (file, gcn_operand_part (GET_MODE (x), x, 1), 0);
7631 return;
8aeabd9f
AS
7632 case 'J':
7633 print_operand (file, gcn_operand_part (GET_MODE (x), x, 2), 0);
7634 return;
7635 case 'K':
7636 print_operand (file, gcn_operand_part (GET_MODE (x), x, 3), 0);
7637 return;
5326695a
AS
7638 case 'R':
7639 /* Print a scalar register number as an integer. Temporary hack. */
7640 gcc_assert (REG_P (x));
7641 fprintf (file, "%u", (int) REGNO (x));
7642 return;
7643 case 'V':
7644 /* Print a vector register number as an integer. Temporary hack. */
7645 gcc_assert (REG_P (x));
7646 fprintf (file, "%u", (int) REGNO (x) - FIRST_VGPR_REG);
7647 return;
7648 case 0:
7649 if (xcode == REG)
7650 print_reg (file, x);
7651 else if (xcode == MEM)
7652 output_address (GET_MODE (x), x);
7653 else if (xcode == CONST_INT)
7654 fprintf (file, "%i", (int) INTVAL (x));
7655 else if (xcode == CONST_VECTOR)
7656 print_operand (file, CONST_VECTOR_ELT (x, 0), code);
7657 else if (xcode == CONST_DOUBLE)
7658 {
7659 const char *str;
7660 switch (gcn_inline_fp_constant_p (x, false))
7661 {
7662 case 240:
7663 str = "0.5";
7664 break;
7665 case 241:
7666 str = "-0.5";
7667 break;
7668 case 242:
7669 str = "1.0";
7670 break;
7671 case 243:
7672 str = "-1.0";
7673 break;
7674 case 244:
7675 str = "2.0";
7676 break;
7677 case 245:
7678 str = "-2.0";
7679 break;
7680 case 246:
7681 str = "4.0";
7682 break;
7683 case 247:
7684 str = "-4.0";
7685 break;
7686 case 248:
eff73c10 7687 str = "0.15915494";
5326695a
AS
7688 break;
7689 default:
7690 rtx ix = simplify_gen_subreg (GET_MODE (x) == DFmode
7691 ? DImode : SImode,
7692 x, GET_MODE (x), 0);
7693 if (x)
7694 print_operand (file, ix, code);
7695 else
a94d5170 7696 output_operand_lossage ("invalid fp constant");
5326695a
AS
7697 return;
7698 break;
7699 }
7700 fprintf (file, str);
7701 return;
7702 }
7703 else
7704 output_addr_const (file, x);
7705 return;
7706 case '^':
7707 if (TARGET_GCN5_PLUS)
7708 fputs ("_co", file);
7709 return;
7710 case 'g':
7711 gcc_assert (xcode == MEM);
7712 if (MEM_VOLATILE_P (x))
7713 fputs (" glc", file);
7714 return;
7715 default:
7716 output_operand_lossage ("invalid %%xn code");
7717 }
7718 gcc_unreachable ();
7719}
7720
ca60bd93 7721/* Implement DEBUGGER_REGNO macro.
eff23b79
AS
7722
7723 Return the DWARF register number that corresponds to the GCC internal
7724 REGNO. */
7725
7726unsigned int
7727gcn_dwarf_register_number (unsigned int regno)
7728{
7729 /* Registers defined in DWARF. */
7730 if (regno == EXEC_LO_REG)
7731 return 17;
7732 /* We need to use a more complex DWARF expression for this
7733 else if (regno == EXEC_HI_REG)
7734 return 17; */
7735 else if (regno == VCC_LO_REG)
7736 return 768;
7737 /* We need to use a more complex DWARF expression for this
7738 else if (regno == VCC_HI_REG)
7739 return 768; */
7740 else if (regno == SCC_REG)
7741 return 128;
251697a6
HAQ
7742 else if (regno == DWARF_LINK_REGISTER)
7743 return 16;
eff23b79
AS
7744 else if (SGPR_REGNO_P (regno))
7745 {
7746 if (regno - FIRST_SGPR_REG < 64)
7747 return (regno - FIRST_SGPR_REG + 32);
7748 else
7749 return (regno - FIRST_SGPR_REG + 1024);
7750 }
7751 else if (VGPR_REGNO_P (regno))
7752 return (regno - FIRST_VGPR_REG + 2560);
ae0d2c24
AS
7753 else if (AVGPR_REGNO_P (regno))
7754 return (regno - FIRST_AVGPR_REG + 3072);
eff23b79
AS
7755
7756 /* Otherwise, there's nothing sensible to do. */
7757 return regno + 100000;
7758}
7759
7760/* Implement TARGET_DWARF_REGISTER_SPAN.
7761
7762 DImode and Vector DImode require additional registers. */
7763
7764static rtx
7765gcn_dwarf_register_span (rtx rtl)
7766{
7767 machine_mode mode = GET_MODE (rtl);
7768
7769 if (VECTOR_MODE_P (mode))
7770 mode = GET_MODE_INNER (mode);
7771
7772 if (GET_MODE_SIZE (mode) != 8)
7773 return NULL_RTX;
7774
eff23b79 7775 unsigned regno = REGNO (rtl);
251697a6
HAQ
7776
7777 if (regno == DWARF_LINK_REGISTER)
7778 return NULL_RTX;
7779
7780 rtx p = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
eff23b79
AS
7781 XVECEXP (p, 0, 0) = gen_rtx_REG (SImode, regno);
7782 XVECEXP (p, 0, 1) = gen_rtx_REG (SImode, regno + 1);
7783
7784 return p;
7785}
7786
5326695a
AS
7787/* }}} */
7788/* {{{ TARGET hook overrides. */
7789
7790#undef TARGET_ADDR_SPACE_ADDRESS_MODE
7791#define TARGET_ADDR_SPACE_ADDRESS_MODE gcn_addr_space_address_mode
b5bb7f32
HAQ
7792#undef TARGET_ADDR_SPACE_DEBUG
7793#define TARGET_ADDR_SPACE_DEBUG gcn_addr_space_debug
5326695a
AS
7794#undef TARGET_ADDR_SPACE_LEGITIMATE_ADDRESS_P
7795#define TARGET_ADDR_SPACE_LEGITIMATE_ADDRESS_P \
7796 gcn_addr_space_legitimate_address_p
7797#undef TARGET_ADDR_SPACE_LEGITIMIZE_ADDRESS
7798#define TARGET_ADDR_SPACE_LEGITIMIZE_ADDRESS gcn_addr_space_legitimize_address
7799#undef TARGET_ADDR_SPACE_POINTER_MODE
7800#define TARGET_ADDR_SPACE_POINTER_MODE gcn_addr_space_pointer_mode
7801#undef TARGET_ADDR_SPACE_SUBSET_P
7802#define TARGET_ADDR_SPACE_SUBSET_P gcn_addr_space_subset_p
7803#undef TARGET_ADDR_SPACE_CONVERT
7804#define TARGET_ADDR_SPACE_CONVERT gcn_addr_space_convert
7805#undef TARGET_ARG_PARTIAL_BYTES
7806#define TARGET_ARG_PARTIAL_BYTES gcn_arg_partial_bytes
7807#undef TARGET_ASM_ALIGNED_DI_OP
7808#define TARGET_ASM_ALIGNED_DI_OP "\t.8byte\t"
5326695a
AS
7809#undef TARGET_ASM_FILE_START
7810#define TARGET_ASM_FILE_START output_file_start
7811#undef TARGET_ASM_FUNCTION_PROLOGUE
7812#define TARGET_ASM_FUNCTION_PROLOGUE gcn_target_asm_function_prologue
7813#undef TARGET_ASM_SELECT_SECTION
7814#define TARGET_ASM_SELECT_SECTION gcn_asm_select_section
7815#undef TARGET_ASM_TRAMPOLINE_TEMPLATE
7816#define TARGET_ASM_TRAMPOLINE_TEMPLATE gcn_asm_trampoline_template
7817#undef TARGET_ATTRIBUTE_TABLE
7818#define TARGET_ATTRIBUTE_TABLE gcn_attribute_table
45381d6f
AS
7819#undef TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_MODES
7820#define TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_MODES \
7821 gcn_autovectorize_vector_modes
5326695a
AS
7822#undef TARGET_BUILTIN_DECL
7823#define TARGET_BUILTIN_DECL gcn_builtin_decl
7824#undef TARGET_CAN_CHANGE_MODE_CLASS
7825#define TARGET_CAN_CHANGE_MODE_CLASS gcn_can_change_mode_class
7826#undef TARGET_CAN_ELIMINATE
7827#define TARGET_CAN_ELIMINATE gcn_can_eliminate_p
7828#undef TARGET_CANNOT_COPY_INSN_P
7829#define TARGET_CANNOT_COPY_INSN_P gcn_cannot_copy_insn_p
7830#undef TARGET_CLASS_LIKELY_SPILLED_P
7831#define TARGET_CLASS_LIKELY_SPILLED_P gcn_class_likely_spilled_p
7832#undef TARGET_CLASS_MAX_NREGS
7833#define TARGET_CLASS_MAX_NREGS gcn_class_max_nregs
7834#undef TARGET_CONDITIONAL_REGISTER_USAGE
7835#define TARGET_CONDITIONAL_REGISTER_USAGE gcn_conditional_register_usage
7836#undef TARGET_CONSTANT_ALIGNMENT
7837#define TARGET_CONSTANT_ALIGNMENT gcn_constant_alignment
7838#undef TARGET_DEBUG_UNWIND_INFO
7839#define TARGET_DEBUG_UNWIND_INFO gcn_debug_unwind_info
eff23b79
AS
7840#undef TARGET_DWARF_REGISTER_SPAN
7841#define TARGET_DWARF_REGISTER_SPAN gcn_dwarf_register_span
76d46331
KCY
7842#undef TARGET_EMUTLS_VAR_INIT
7843#define TARGET_EMUTLS_VAR_INIT gcn_emutls_var_init
5326695a
AS
7844#undef TARGET_EXPAND_BUILTIN
7845#define TARGET_EXPAND_BUILTIN gcn_expand_builtin
d9d67745
AS
7846#undef TARGET_EXPAND_DIVMOD_LIBFUNC
7847#define TARGET_EXPAND_DIVMOD_LIBFUNC gcn_expand_divmod_libfunc
22f201e4
HAQ
7848#undef TARGET_FRAME_POINTER_REQUIRED
7849#define TARGET_FRAME_POINTER_REQUIRED gcn_frame_pointer_rqd
5326695a
AS
7850#undef TARGET_FUNCTION_ARG
7851#undef TARGET_FUNCTION_ARG_ADVANCE
7852#define TARGET_FUNCTION_ARG_ADVANCE gcn_function_arg_advance
7853#define TARGET_FUNCTION_ARG gcn_function_arg
7854#undef TARGET_FUNCTION_VALUE
7855#define TARGET_FUNCTION_VALUE gcn_function_value
7856#undef TARGET_FUNCTION_VALUE_REGNO_P
7857#define TARGET_FUNCTION_VALUE_REGNO_P gcn_function_value_regno_p
7858#undef TARGET_GIMPLIFY_VA_ARG_EXPR
7859#define TARGET_GIMPLIFY_VA_ARG_EXPR gcn_gimplify_va_arg_expr
955cd057
TB
7860#undef TARGET_OMP_DEVICE_KIND_ARCH_ISA
7861#define TARGET_OMP_DEVICE_KIND_ARCH_ISA gcn_omp_device_kind_arch_isa
29a2f518
JB
7862#undef TARGET_GOACC_ADJUST_PRIVATE_DECL
7863#define TARGET_GOACC_ADJUST_PRIVATE_DECL gcn_goacc_adjust_private_decl
e2a58ed6
JB
7864#undef TARGET_GOACC_CREATE_WORKER_BROADCAST_RECORD
7865#define TARGET_GOACC_CREATE_WORKER_BROADCAST_RECORD \
7866 gcn_goacc_create_worker_broadcast_record
5326695a
AS
7867#undef TARGET_GOACC_FORK_JOIN
7868#define TARGET_GOACC_FORK_JOIN gcn_fork_join
7869#undef TARGET_GOACC_REDUCTION
7870#define TARGET_GOACC_REDUCTION gcn_goacc_reduction
7871#undef TARGET_GOACC_VALIDATE_DIMS
7872#define TARGET_GOACC_VALIDATE_DIMS gcn_goacc_validate_dims
2a3f9f65
JB
7873#undef TARGET_GOACC_SHARED_MEM_LAYOUT
7874#define TARGET_GOACC_SHARED_MEM_LAYOUT gcn_shared_mem_layout
5326695a
AS
7875#undef TARGET_HARD_REGNO_MODE_OK
7876#define TARGET_HARD_REGNO_MODE_OK gcn_hard_regno_mode_ok
7877#undef TARGET_HARD_REGNO_NREGS
7878#define TARGET_HARD_REGNO_NREGS gcn_hard_regno_nregs
7879#undef TARGET_HAVE_SPECULATION_SAFE_VALUE
7880#define TARGET_HAVE_SPECULATION_SAFE_VALUE speculation_safe_value_not_needed
7881#undef TARGET_INIT_BUILTINS
7882#define TARGET_INIT_BUILTINS gcn_init_builtins
a8a730cd
JB
7883#undef TARGET_INIT_LIBFUNCS
7884#define TARGET_INIT_LIBFUNCS gcn_init_libfuncs
5326695a
AS
7885#undef TARGET_IRA_CHANGE_PSEUDO_ALLOCNO_CLASS
7886#define TARGET_IRA_CHANGE_PSEUDO_ALLOCNO_CLASS \
7887 gcn_ira_change_pseudo_allocno_class
7888#undef TARGET_LEGITIMATE_CONSTANT_P
7889#define TARGET_LEGITIMATE_CONSTANT_P gcn_legitimate_constant_p
ce9cd725
KCY
7890#undef TARGET_LIBC_HAS_FUNCTION
7891#define TARGET_LIBC_HAS_FUNCTION gcn_libc_has_function
5326695a
AS
7892#undef TARGET_LRA_P
7893#define TARGET_LRA_P hook_bool_void_true
7894#undef TARGET_MACHINE_DEPENDENT_REORG
7895#define TARGET_MACHINE_DEPENDENT_REORG gcn_md_reorg
7896#undef TARGET_MEMORY_MOVE_COST
7897#define TARGET_MEMORY_MOVE_COST gcn_memory_move_cost
7898#undef TARGET_MODES_TIEABLE_P
7899#define TARGET_MODES_TIEABLE_P gcn_modes_tieable_p
7900#undef TARGET_OPTION_OVERRIDE
7901#define TARGET_OPTION_OVERRIDE gcn_option_override
7902#undef TARGET_PRETEND_OUTGOING_VARARGS_NAMED
7903#define TARGET_PRETEND_OUTGOING_VARARGS_NAMED \
7904 gcn_pretend_outgoing_varargs_named
7905#undef TARGET_PROMOTE_FUNCTION_MODE
7906#define TARGET_PROMOTE_FUNCTION_MODE gcn_promote_function_mode
7907#undef TARGET_REGISTER_MOVE_COST
7908#define TARGET_REGISTER_MOVE_COST gcn_register_move_cost
7909#undef TARGET_RETURN_IN_MEMORY
7910#define TARGET_RETURN_IN_MEMORY gcn_return_in_memory
7911#undef TARGET_RTX_COSTS
7912#define TARGET_RTX_COSTS gcn_rtx_costs
7913#undef TARGET_SECONDARY_RELOAD
7914#define TARGET_SECONDARY_RELOAD gcn_secondary_reload
7915#undef TARGET_SECTION_TYPE_FLAGS
7916#define TARGET_SECTION_TYPE_FLAGS gcn_section_type_flags
8d0b2b33
AS
7917#undef TARGET_SCALAR_MODE_SUPPORTED_P
7918#define TARGET_SCALAR_MODE_SUPPORTED_P gcn_scalar_mode_supported_p
b73c49f6
AS
7919#undef TARGET_SIMD_CLONE_ADJUST
7920#define TARGET_SIMD_CLONE_ADJUST gcn_simd_clone_adjust
7921#undef TARGET_SIMD_CLONE_COMPUTE_VECSIZE_AND_SIMDLEN
7922#define TARGET_SIMD_CLONE_COMPUTE_VECSIZE_AND_SIMDLEN \
7923 gcn_simd_clone_compute_vecsize_and_simdlen
7924#undef TARGET_SIMD_CLONE_USABLE
7925#define TARGET_SIMD_CLONE_USABLE gcn_simd_clone_usable
5326695a
AS
7926#undef TARGET_SMALL_REGISTER_CLASSES_FOR_MODE_P
7927#define TARGET_SMALL_REGISTER_CLASSES_FOR_MODE_P \
7928 gcn_small_register_classes_for_mode_p
7929#undef TARGET_SPILL_CLASS
7930#define TARGET_SPILL_CLASS gcn_spill_class
7931#undef TARGET_STRICT_ARGUMENT_NAMING
7932#define TARGET_STRICT_ARGUMENT_NAMING gcn_strict_argument_naming
7933#undef TARGET_TRAMPOLINE_INIT
7934#define TARGET_TRAMPOLINE_INIT gcn_trampoline_init
7935#undef TARGET_TRULY_NOOP_TRUNCATION
7936#define TARGET_TRULY_NOOP_TRUNCATION gcn_truly_noop_truncation
7937#undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
7938#define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST gcn_vectorization_cost
ce9cd725
KCY
7939#undef TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION
7940#define TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION \
7941 gcn_vectorize_builtin_vectorized_function
5326695a
AS
7942#undef TARGET_VECTORIZE_GET_MASK_MODE
7943#define TARGET_VECTORIZE_GET_MASK_MODE gcn_vectorize_get_mask_mode
7944#undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
7945#define TARGET_VECTORIZE_PREFERRED_SIMD_MODE gcn_vectorize_preferred_simd_mode
7946#undef TARGET_VECTORIZE_PREFERRED_VECTOR_ALIGNMENT
7947#define TARGET_VECTORIZE_PREFERRED_VECTOR_ALIGNMENT \
7948 gcn_preferred_vector_alignment
2b99bed8
AS
7949#undef TARGET_VECTORIZE_RELATED_MODE
7950#define TARGET_VECTORIZE_RELATED_MODE gcn_related_vector_mode
5326695a
AS
7951#undef TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT
7952#define TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT \
7953 gcn_vectorize_support_vector_misalignment
7954#undef TARGET_VECTORIZE_VEC_PERM_CONST
7955#define TARGET_VECTORIZE_VEC_PERM_CONST gcn_vectorize_vec_perm_const
7956#undef TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE
7957#define TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE \
7958 gcn_vector_alignment_reachable
7959#undef TARGET_VECTOR_MODE_SUPPORTED_P
7960#define TARGET_VECTOR_MODE_SUPPORTED_P gcn_vector_mode_supported_p
7961
7962struct gcc_target targetm = TARGET_INITIALIZER;
7963
7964#include "gt-gcn.h"
7965/* }}} */