1 /* Copyright (C) 2016-2023 Free Software Foundation, Inc.
3 This file is free software; you can redistribute it and/or modify it under
4 the terms of the GNU General Public License as published by the Free
5 Software Foundation; either version 3 of the License, or (at your option)
8 This file is distributed in the hope that it will be useful, but WITHOUT
9 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
10 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
13 You should have received a copy of the GNU General Public License
14 along with GCC; see the file COPYING3. If not see
15 <http://www.gnu.org/licenses/>. */
19 /* We want GET_MODE_SIZE et al to return integers, please. */
20 #define IN_TARGET_CODE 1
24 #include "coretypes.h"
32 #include "stringpool.h"
37 #include "diagnostic-core.h"
38 #include "insn-attr.h"
39 #include "fold-const.h"
45 #include "langhooks.h"
47 #include "omp-general.h"
48 #include "print-rtl.h"
56 #include "case-cfn-macros.h"
58 /* This file should be included last. */
59 #include "target-def.h"
62 /* {{{ Global variables. */
64 /* Constants used by FP instructions. */
66 static REAL_VALUE_TYPE dconst4
, dconst1over2pi
;
67 static bool ext_gcn_constants_init
= 0;
69 /* Holds the ISA variant, derived from the command line parameters. */
71 enum gcn_isa gcn_isa
= ISA_GCN3
; /* Default to GCN3. */
73 /* Reserve this much space for LDS (for propagating variables from
74 worker-single mode to worker-partitioned mode), per workgroup. Global
75 analysis could calculate an exact bound, but we don't do that yet.
77 We want to permit full occupancy, so size accordingly. */
79 /* Use this as a default, but allow it to grow if the user requests a large
80 amount of gang-private shared-memory space. */
81 static int acc_lds_size
= 0x600;
83 #define OMP_LDS_SIZE 0x600 /* 0x600 is 1/40 total, rounded down. */
84 #define ACC_LDS_SIZE acc_lds_size
85 #define OTHER_LDS_SIZE 65536 /* If in doubt, reserve all of it. */
87 #define LDS_SIZE (flag_openacc ? ACC_LDS_SIZE \
88 : flag_openmp ? OMP_LDS_SIZE \
91 static int gang_private_hwm
= 32;
92 static hash_map
<tree
, int> lds_allocs
;
94 /* The number of registers usable by normal non-kernel functions.
95 The SGPR count includes any special extra registers such as VCC. */
97 #define MAX_NORMAL_SGPR_COUNT 62 // i.e. 64 with VCC
98 #define MAX_NORMAL_VGPR_COUNT 24
101 /* {{{ Initialization and options. */
103 /* Initialize machine_function. */
105 static struct machine_function
*
106 gcn_init_machine_status (void)
108 struct machine_function
*f
;
110 f
= ggc_cleared_alloc
<machine_function
> ();
113 f
->use_flat_addressing
= true;
118 /* Implement TARGET_OPTION_OVERRIDE.
120 Override option settings where defaults are variable, or we have specific
121 needs to consider. */
124 gcn_option_override (void)
126 init_machine_status
= gcn_init_machine_status
;
128 /* The HSA runtime does not respect ELF load addresses, so force PIE. */
134 gcn_isa
= (gcn_arch
== PROCESSOR_FIJI
? ISA_GCN3
135 : gcn_arch
== PROCESSOR_VEGA10
? ISA_GCN5
136 : gcn_arch
== PROCESSOR_VEGA20
? ISA_GCN5
137 : gcn_arch
== PROCESSOR_GFX908
? ISA_CDNA1
138 : gcn_arch
== PROCESSOR_GFX90a
? ISA_CDNA2
140 gcc_assert (gcn_isa
!= ISA_UNKNOWN
);
142 /* Reserve 1Kb (somewhat arbitrarily) of LDS space for reduction results and
143 worker broadcasts. */
144 if (gang_private_size_opt
== -1)
145 gang_private_size_opt
= 512;
146 else if (gang_private_size_opt
< gang_private_hwm
)
147 gang_private_size_opt
= gang_private_hwm
;
148 else if (gang_private_size_opt
>= acc_lds_size
- 1024)
150 /* We need some space for reductions and worker broadcasting. If the
151 user requests a large amount of gang-private LDS space, we might not
152 have enough left for the former. Increase the LDS allocation in that
153 case, although this may reduce the maximum occupancy on the
155 acc_lds_size
= gang_private_size_opt
+ 1024;
156 if (acc_lds_size
> 32768)
157 acc_lds_size
= 32768;
160 /* The xnack option is a placeholder, for now. Before removing, update
161 gcn-hsa.h's XNACKOPT, gcn.opt's mxnack= default init+descr, and
162 invoke.texi's default description. */
163 if (flag_xnack
!= HSACO_ATTR_OFF
)
164 sorry ("XNACK support");
168 /* {{{ Attributes. */
170 /* This table defines the arguments that are permitted in
171 __attribute__ ((amdgpu_hsa_kernel (...))).
173 The names and values correspond to the HSA metadata that is encoded
174 into the assembler file and binary. */
176 static const struct gcn_kernel_arg_type
179 const char *header_pseudo
;
182 /* This should be set to -1 or -2 for a dynamically allocated register
183 number. Use -1 if this argument contributes to the user_sgpr_count,
186 } gcn_kernel_arg_types
[] = {
187 {"exec", NULL
, DImode
, EXEC_REG
},
188 #define PRIVATE_SEGMENT_BUFFER_ARG 1
189 {"private_segment_buffer",
190 ".amdhsa_user_sgpr_private_segment_buffer", TImode
, -1},
191 #define DISPATCH_PTR_ARG 2
192 {"dispatch_ptr", ".amdhsa_user_sgpr_dispatch_ptr", DImode
, -1},
193 #define QUEUE_PTR_ARG 3
194 {"queue_ptr", ".amdhsa_user_sgpr_queue_ptr", DImode
, -1},
195 #define KERNARG_SEGMENT_PTR_ARG 4
196 {"kernarg_segment_ptr", ".amdhsa_user_sgpr_kernarg_segment_ptr", DImode
, -1},
197 {"dispatch_id", ".amdhsa_user_sgpr_dispatch_id", DImode
, -1},
198 #define FLAT_SCRATCH_INIT_ARG 6
199 {"flat_scratch_init", ".amdhsa_user_sgpr_flat_scratch_init", DImode
, -1},
200 #define FLAT_SCRATCH_SEGMENT_SIZE_ARG 7
201 {"private_segment_size", ".amdhsa_user_sgpr_private_segment_size", SImode
, -1},
202 #define WORKGROUP_ID_X_ARG 8
203 {"workgroup_id_X", ".amdhsa_system_sgpr_workgroup_id_x", SImode
, -2},
204 {"workgroup_id_Y", ".amdhsa_system_sgpr_workgroup_id_y", SImode
, -2},
205 {"workgroup_id_Z", ".amdhsa_system_sgpr_workgroup_id_z", SImode
, -2},
206 {"workgroup_info", ".amdhsa_system_sgpr_workgroup_info", SImode
, -1},
207 #define PRIVATE_SEGMENT_WAVE_OFFSET_ARG 12
208 {"private_segment_wave_offset",
209 ".amdhsa_system_sgpr_private_segment_wavefront_offset", SImode
, -2},
210 #define WORK_ITEM_ID_X_ARG 13
211 {"work_item_id_X", NULL
, V64SImode
, FIRST_VGPR_REG
},
212 #define WORK_ITEM_ID_Y_ARG 14
213 {"work_item_id_Y", NULL
, V64SImode
, FIRST_VGPR_REG
+ 1},
214 #define WORK_ITEM_ID_Z_ARG 15
215 {"work_item_id_Z", NULL
, V64SImode
, FIRST_VGPR_REG
+ 2}
218 static const long default_requested_args
219 = (1 << DISPATCH_PTR_ARG
)
220 | (1 << QUEUE_PTR_ARG
)
221 | (1 << KERNARG_SEGMENT_PTR_ARG
)
222 | (1 << WORKGROUP_ID_X_ARG
)
223 | (1 << WORK_ITEM_ID_X_ARG
)
224 | (1 << WORK_ITEM_ID_Y_ARG
)
225 | (1 << WORK_ITEM_ID_Z_ARG
);
227 /* Extract parameter settings from __attribute__((amdgpu_hsa_kernel ())).
228 This function also sets the default values for some arguments.
230 Return true on success, with ARGS populated. */
233 gcn_parse_amdgpu_hsa_kernel_attribute (struct gcn_kernel_args
*args
,
237 args
->requested
= default_requested_args
;
240 for (int a
= 0; a
< GCN_KERNEL_ARG_TYPES
; a
++)
243 for (; list
; list
= TREE_CHAIN (list
))
246 if (TREE_CODE (TREE_VALUE (list
)) != STRING_CST
)
248 error ("%<amdgpu_hsa_kernel%> attribute requires string constant "
252 str
= TREE_STRING_POINTER (TREE_VALUE (list
));
254 for (a
= 0; a
< GCN_KERNEL_ARG_TYPES
; a
++)
256 if (!strcmp (str
, gcn_kernel_arg_types
[a
].name
))
259 if (a
== GCN_KERNEL_ARG_TYPES
)
261 error ("unknown specifier %qs in %<amdgpu_hsa_kernel%> attribute",
266 if (args
->requested
& (1 << a
))
268 error ("duplicated parameter specifier %qs in %<amdgpu_hsa_kernel%> "
273 args
->requested
|= (1 << a
);
274 args
->order
[args
->nargs
++] = a
;
277 /* Requesting WORK_ITEM_ID_Z_ARG implies requesting WORK_ITEM_ID_X_ARG and
278 WORK_ITEM_ID_Y_ARG. Similarly, requesting WORK_ITEM_ID_Y_ARG implies
279 requesting WORK_ITEM_ID_X_ARG. */
280 if (args
->requested
& (1 << WORK_ITEM_ID_Z_ARG
))
281 args
->requested
|= (1 << WORK_ITEM_ID_Y_ARG
);
282 if (args
->requested
& (1 << WORK_ITEM_ID_Y_ARG
))
283 args
->requested
|= (1 << WORK_ITEM_ID_X_ARG
);
285 int sgpr_regno
= FIRST_SGPR_REG
;
287 for (int a
= 0; a
< GCN_KERNEL_ARG_TYPES
; a
++)
289 if (!(args
->requested
& (1 << a
)))
292 if (gcn_kernel_arg_types
[a
].fixed_regno
>= 0)
293 args
->reg
[a
] = gcn_kernel_arg_types
[a
].fixed_regno
;
298 switch (gcn_kernel_arg_types
[a
].mode
)
312 args
->reg
[a
] = sgpr_regno
;
313 sgpr_regno
+= reg_count
;
314 if (gcn_kernel_arg_types
[a
].fixed_regno
== -1)
315 args
->nsgprs
+= reg_count
;
318 if (sgpr_regno
> FIRST_SGPR_REG
+ 16)
320 error ("too many arguments passed in sgpr registers");
325 /* Referenced by TARGET_ATTRIBUTE_TABLE.
327 Validates target specific attributes. */
330 gcn_handle_amdgpu_hsa_kernel_attribute (tree
*node
, tree name
,
331 tree args
, int, bool *no_add_attrs
)
333 if (!FUNC_OR_METHOD_TYPE_P (*node
))
335 warning (OPT_Wattributes
, "%qE attribute only applies to functions",
337 *no_add_attrs
= true;
341 /* Can combine regparm with all attributes but fastcall, and thiscall. */
342 if (is_attribute_p ("gcnhsa_kernel", name
))
344 struct gcn_kernel_args kernelarg
;
346 if (gcn_parse_amdgpu_hsa_kernel_attribute (&kernelarg
, args
))
347 *no_add_attrs
= true;
355 /* Implement TARGET_ATTRIBUTE_TABLE.
357 Create target-specific __attribute__ types. */
359 static const struct attribute_spec gcn_attribute_table
[] = {
360 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler,
361 affects_type_identity } */
362 {"amdgpu_hsa_kernel", 0, GCN_KERNEL_ARG_TYPES
, false, true,
363 true, true, gcn_handle_amdgpu_hsa_kernel_attribute
, NULL
},
365 {NULL
, 0, 0, false, false, false, false, NULL
, NULL
}
369 /* {{{ Registers and modes. */
371 /* Implement TARGET_SCALAR_MODE_SUPPORTED_P. */
374 gcn_scalar_mode_supported_p (scalar_mode mode
)
376 return (mode
== BImode
378 || mode
== HImode
/* || mode == HFmode */
379 || mode
== SImode
|| mode
== SFmode
380 || mode
== DImode
|| mode
== DFmode
384 /* Return a vector mode with N lanes of MODE. */
387 VnMODE (int n
, machine_mode mode
)
394 case 2: return V2QImode
;
395 case 4: return V4QImode
;
396 case 8: return V8QImode
;
397 case 16: return V16QImode
;
398 case 32: return V32QImode
;
399 case 64: return V64QImode
;
405 case 2: return V2HImode
;
406 case 4: return V4HImode
;
407 case 8: return V8HImode
;
408 case 16: return V16HImode
;
409 case 32: return V32HImode
;
410 case 64: return V64HImode
;
416 case 2: return V2HFmode
;
417 case 4: return V4HFmode
;
418 case 8: return V8HFmode
;
419 case 16: return V16HFmode
;
420 case 32: return V32HFmode
;
421 case 64: return V64HFmode
;
427 case 2: return V2SImode
;
428 case 4: return V4SImode
;
429 case 8: return V8SImode
;
430 case 16: return V16SImode
;
431 case 32: return V32SImode
;
432 case 64: return V64SImode
;
438 case 2: return V2SFmode
;
439 case 4: return V4SFmode
;
440 case 8: return V8SFmode
;
441 case 16: return V16SFmode
;
442 case 32: return V32SFmode
;
443 case 64: return V64SFmode
;
449 case 2: return V2DImode
;
450 case 4: return V4DImode
;
451 case 8: return V8DImode
;
452 case 16: return V16DImode
;
453 case 32: return V32DImode
;
454 case 64: return V64DImode
;
460 case 2: return V2DFmode
;
461 case 4: return V4DFmode
;
462 case 8: return V8DFmode
;
463 case 16: return V16DFmode
;
464 case 32: return V32DFmode
;
465 case 64: return V64DFmode
;
475 /* Implement TARGET_CLASS_MAX_NREGS.
477 Return the number of hard registers needed to hold a value of MODE in
478 a register of class RCLASS. */
481 gcn_class_max_nregs (reg_class_t rclass
, machine_mode mode
)
483 /* Scalar registers are 32bit, vector registers are in fact tuples of
485 if (rclass
== VGPR_REGS
)
487 if (vgpr_1reg_mode_p (mode
))
489 if (vgpr_2reg_mode_p (mode
))
491 /* TImode is used by DImode compare_and_swap. */
492 if (vgpr_4reg_mode_p (mode
))
495 else if (rclass
== VCC_CONDITIONAL_REG
&& mode
== BImode
)
498 /* Vector modes in SGPRs are not supposed to happen (disallowed by
499 gcn_hard_regno_mode_ok), but there are some patterns that have an "Sv"
500 constraint and are used by splitters, post-reload.
501 This ensures that we don't accidentally mark the following 63 scalar
502 registers as "live". */
503 if (rclass
== SGPR_REGS
&& VECTOR_MODE_P (mode
))
504 return CEIL (GET_MODE_SIZE (GET_MODE_INNER (mode
)), 4);
506 return CEIL (GET_MODE_SIZE (mode
), 4);
509 /* Implement TARGET_HARD_REGNO_NREGS.
511 Return the number of hard registers needed to hold a value of MODE in
515 gcn_hard_regno_nregs (unsigned int regno
, machine_mode mode
)
517 return gcn_class_max_nregs (REGNO_REG_CLASS (regno
), mode
);
520 /* Implement TARGET_HARD_REGNO_MODE_OK.
522 Return true if REGNO can hold value in MODE. */
525 gcn_hard_regno_mode_ok (unsigned int regno
, machine_mode mode
)
527 /* Treat a complex mode as if it were a scalar mode of the same overall
528 size for the purposes of allocating hard registers. */
529 if (COMPLEX_MODE_P (mode
))
555 case FLAT_SCRATCH_LO_REG
:
556 case XNACK_MASK_LO_REG
:
559 return (mode
== SImode
|| mode
== DImode
);
562 return (mode
== BImode
|| mode
== SImode
|| mode
== DImode
);
564 case FLAT_SCRATCH_HI_REG
:
565 case XNACK_MASK_HI_REG
:
568 return mode
== SImode
;
572 return mode
== SImode
/*|| mode == V32BImode */ ;
576 return mode
== BImode
;
578 if (regno
== ARG_POINTER_REGNUM
|| regno
== FRAME_POINTER_REGNUM
)
580 if (SGPR_REGNO_P (regno
))
581 /* We restrict double register values to aligned registers. */
582 return (sgpr_1reg_mode_p (mode
)
583 || (!((regno
- FIRST_SGPR_REG
) & 1) && sgpr_2reg_mode_p (mode
))
584 || (((regno
- FIRST_SGPR_REG
) & 3) == 0 && mode
== TImode
));
585 if (VGPR_REGNO_P (regno
))
586 /* Vector instructions do not care about the alignment of register
587 pairs, but where there is no 64-bit instruction, many of the
588 define_split do not work if the input and output registers partially
589 overlap. We tried to fix this with early clobber and match
590 constraints, but it was bug prone, added complexity, and conflicts
591 with the 'U0' constraints on vec_merge.
592 Therefore, we restrict ourselved to aligned registers. */
593 return (vgpr_1reg_mode_p (mode
)
594 || (!((regno
- FIRST_VGPR_REG
) & 1) && vgpr_2reg_mode_p (mode
))
595 /* TImode is used by DImode compare_and_swap,
596 and by DIVMOD V64DImode libfuncs. */
597 || (!((regno
- FIRST_VGPR_REG
) & 3) && vgpr_4reg_mode_p (mode
)));
601 /* Implement REGNO_REG_CLASS via gcn.h.
603 Return smallest class containing REGNO. */
606 gcn_regno_reg_class (int regno
)
611 return SCC_CONDITIONAL_REG
;
614 return VCC_CONDITIONAL_REG
;
616 return VCCZ_CONDITIONAL_REG
;
618 return EXECZ_CONDITIONAL_REG
;
621 return EXEC_MASK_REG
;
623 if (VGPR_REGNO_P (regno
))
625 if (SGPR_REGNO_P (regno
))
627 if (regno
< FIRST_VGPR_REG
)
629 if (regno
== ARG_POINTER_REGNUM
|| regno
== FRAME_POINTER_REGNUM
)
634 /* Implement TARGET_CAN_CHANGE_MODE_CLASS.
636 GCC assumes that lowpart contains first part of value as stored in memory.
637 This is not the case for vector registers. */
640 gcn_can_change_mode_class (machine_mode from
, machine_mode to
,
641 reg_class_t regclass
)
643 if (!vgpr_vector_mode_p (from
) && !vgpr_vector_mode_p (to
))
646 /* Vector conversions are only valid when changing mode with a fixed number
647 of lanes, or changing number of lanes with a fixed mode. Anything else
648 would require actual data movement. */
649 if (VECTOR_MODE_P (from
) && VECTOR_MODE_P (to
)
650 && GET_MODE_NUNITS (from
) != GET_MODE_NUNITS (to
)
651 && GET_MODE_INNER (from
) != GET_MODE_INNER (to
))
654 /* Vector/scalar conversions are only permitted when the scalar mode
655 is the same or smaller than the inner vector mode. */
656 if ((VECTOR_MODE_P (from
) && !VECTOR_MODE_P (to
)
657 && GET_MODE_SIZE (to
) >= GET_MODE_SIZE (GET_MODE_INNER (from
)))
658 || (VECTOR_MODE_P (to
) && !VECTOR_MODE_P (from
)
659 && GET_MODE_SIZE (from
) >= GET_MODE_SIZE (GET_MODE_INNER (to
))))
662 return (gcn_class_max_nregs (regclass
, from
)
663 == gcn_class_max_nregs (regclass
, to
));
666 /* Implement TARGET_SMALL_REGISTER_CLASSES_FOR_MODE_P.
668 When this hook returns true for MODE, the compiler allows
669 registers explicitly used in the rtl to be used as spill registers
670 but prevents the compiler from extending the lifetime of these
674 gcn_small_register_classes_for_mode_p (machine_mode mode
)
676 /* We allocate into exec and vcc regs. Those make small register class. */
677 return mode
== DImode
|| mode
== SImode
;
680 /* Implement TARGET_CLASS_LIKELY_SPILLED_P.
682 Returns true if pseudos that have been assigned to registers of class RCLASS
683 would likely be spilled because registers of RCLASS are needed for spill
687 gcn_class_likely_spilled_p (reg_class_t rclass
)
689 return (rclass
== EXEC_MASK_REG
690 || reg_classes_intersect_p (ALL_CONDITIONAL_REGS
, rclass
));
693 /* Implement TARGET_MODES_TIEABLE_P.
695 Returns true if a value of MODE1 is accessible in MODE2 without
699 gcn_modes_tieable_p (machine_mode mode1
, machine_mode mode2
)
701 if (VECTOR_MODE_P (mode1
) || VECTOR_MODE_P (mode2
))
703 int vf1
= (VECTOR_MODE_P (mode1
) ? GET_MODE_NUNITS (mode1
) : 1);
704 int vf2
= (VECTOR_MODE_P (mode2
) ? GET_MODE_NUNITS (mode2
) : 1);
705 machine_mode inner1
= (vf1
> 1 ? GET_MODE_INNER (mode1
) : mode1
);
706 machine_mode inner2
= (vf2
> 1 ? GET_MODE_INNER (mode2
) : mode2
);
708 return (vf1
== vf2
|| (inner1
== inner2
&& vf2
<= vf1
));
711 return (GET_MODE_BITSIZE (mode1
) <= MAX_FIXED_MODE_SIZE
712 && GET_MODE_BITSIZE (mode2
) <= MAX_FIXED_MODE_SIZE
);
715 /* Implement TARGET_TRULY_NOOP_TRUNCATION.
717 Returns true if it is safe to “convert” a value of INPREC bits to one of
718 OUTPREC bits (where OUTPREC is smaller than INPREC) by merely operating on
719 it as if it had only OUTPREC bits. */
722 gcn_truly_noop_truncation (poly_uint64 outprec
, poly_uint64 inprec
)
724 return ((inprec
<= 32) && (outprec
<= inprec
));
727 /* Return N-th part of value occupying multiple registers. */
730 gcn_operand_part (machine_mode mode
, rtx op
, int n
)
732 int vf
= VECTOR_MODE_P (mode
) ? GET_MODE_NUNITS (mode
) : 1;
736 machine_mode vsimode
= VnMODE (vf
, SImode
);
740 gcc_assert (REGNO (op
) + n
< FIRST_PSEUDO_REGISTER
);
741 return gen_rtx_REG (vsimode
, REGNO (op
) + n
);
743 if (GET_CODE (op
) == CONST_VECTOR
)
745 int units
= GET_MODE_NUNITS (mode
);
746 rtvec v
= rtvec_alloc (units
);
748 for (int i
= 0; i
< units
; ++i
)
749 RTVEC_ELT (v
, i
) = gcn_operand_part (GET_MODE_INNER (mode
),
750 CONST_VECTOR_ELT (op
, i
), n
);
752 return gen_rtx_CONST_VECTOR (vsimode
, v
);
754 if (GET_CODE (op
) == UNSPEC
&& XINT (op
, 1) == UNSPEC_VECTOR
)
755 return gcn_gen_undef (vsimode
);
758 else if (GET_MODE_SIZE (mode
) == 8 && REG_P (op
))
760 gcc_assert (REGNO (op
) + n
< FIRST_PSEUDO_REGISTER
);
761 return gen_rtx_REG (SImode
, REGNO (op
) + n
);
765 if (GET_CODE (op
) == UNSPEC
&& XINT (op
, 1) == UNSPEC_VECTOR
)
766 return gcn_gen_undef (SImode
);
768 /* If it's a constant then let's assume it is of the largest mode
769 available, otherwise simplify_gen_subreg will fail. */
770 if (mode
== VOIDmode
&& CONST_INT_P (op
))
772 return simplify_gen_subreg (SImode
, op
, mode
, n
* 4);
776 /* Return N-th part of value occupying multiple registers. */
779 gcn_operand_doublepart (machine_mode mode
, rtx op
, int n
)
781 return simplify_gen_subreg (DImode
, op
, mode
, n
* 8);
784 /* Return true if OP can be split into subregs or high/low parts.
785 This is always true for scalars, but not normally true for vectors.
786 However, for vectors in hardregs we can use the low and high registers. */
789 gcn_can_split_p (machine_mode
, rtx op
)
791 if (vgpr_vector_mode_p (GET_MODE (op
)))
793 if (GET_CODE (op
) == SUBREG
)
794 op
= SUBREG_REG (op
);
797 return REGNO (op
) <= FIRST_PSEUDO_REGISTER
;
802 /* Implement TARGET_SPILL_CLASS.
804 Return class of registers which could be used for pseudo of MODE
805 and of class RCLASS for spilling instead of memory. Return NO_REGS
806 if it is not possible or non-profitable. */
809 gcn_spill_class (reg_class_t c
, machine_mode
/*mode */ )
811 if (reg_classes_intersect_p (ALL_CONDITIONAL_REGS
, c
)
812 || c
== VCC_CONDITIONAL_REG
|| c
== EXEC_MASK_REG
)
818 /* Implement TARGET_IRA_CHANGE_PSEUDO_ALLOCNO_CLASS.
820 Change allocno class for given pseudo from allocno and best class
821 calculated by IRA. */
824 gcn_ira_change_pseudo_allocno_class (int regno
, reg_class_t cl
,
827 /* Avoid returning classes that contain both vgpr and sgpr registers. */
828 if (cl
!= ALL_REGS
&& cl
!= SRCDST_REGS
&& cl
!= ALL_GPR_REGS
)
830 if (best_cl
!= ALL_REGS
&& best_cl
!= SRCDST_REGS
831 && best_cl
!= ALL_GPR_REGS
)
834 machine_mode mode
= PSEUDO_REGNO_MODE (regno
);
835 if (vgpr_vector_mode_p (mode
))
841 /* Create a new DImode pseudo reg and emit an instruction to initialize
845 get_exec (int64_t val
)
847 rtx reg
= gen_reg_rtx (DImode
);
848 emit_insn (gen_rtx_SET (reg
, gen_int_mode (val
, DImode
)));
853 get_exec (machine_mode mode
)
855 int vf
= (VECTOR_MODE_P (mode
) ? GET_MODE_NUNITS (mode
) : 1);
856 return get_exec (0xffffffffffffffffUL
>> (64-vf
));
860 /* {{{ Immediate constants. */
862 /* Initialize shared numeric constants. */
865 init_ext_gcn_constants (void)
867 real_from_integer (&dconst4
, DFmode
, 4, SIGNED
);
869 /* FIXME: this constant probably does not match what hardware really loads.
870 Reality check it eventually. */
871 real_from_string (&dconst1over2pi
,
872 "0.15915494309189532");
873 real_convert (&dconst1over2pi
, SFmode
, &dconst1over2pi
);
875 ext_gcn_constants_init
= 1;
879 gcn_dconst1over2pi (void)
881 if (!ext_gcn_constants_init
)
882 init_ext_gcn_constants ();
883 return dconst1over2pi
;
886 /* Return non-zero if X is a constant that can appear as an inline operand.
887 This is 0, 0.5, -0.5, 1, -1, 2, -2, 4,-4, 1/(2*pi)
888 Or a vector of those.
889 The value returned should be the encoding of this constant. */
892 gcn_inline_fp_constant_p (rtx x
, bool allow_vector
)
894 machine_mode mode
= GET_MODE (x
);
895 int vf
= VECTOR_MODE_P (mode
) ? GET_MODE_NUNITS (mode
) : 1;
898 mode
= GET_MODE_INNER (mode
);
901 && (mode
== HFmode
|| mode
== SFmode
|| mode
== DFmode
)
905 if (GET_CODE (x
) != CONST_VECTOR
)
907 n
= gcn_inline_fp_constant_p (CONST_VECTOR_ELT (x
, 0), false);
910 for (int i
= 1; i
< vf
; i
++)
911 if (CONST_VECTOR_ELT (x
, i
) != CONST_VECTOR_ELT (x
, 0))
916 if (mode
!= HFmode
&& mode
!= SFmode
&& mode
!= DFmode
)
919 const REAL_VALUE_TYPE
*r
;
921 if (x
== CONST0_RTX (mode
))
923 if (x
== CONST1_RTX (mode
))
926 r
= CONST_DOUBLE_REAL_VALUE (x
);
928 if (real_identical (r
, &dconstm1
))
931 if (real_identical (r
, &dconsthalf
))
933 if (real_identical (r
, &dconstm1
))
935 if (real_identical (r
, &dconst2
))
937 if (real_identical (r
, &dconst4
))
939 if (real_identical (r
, &dconst1over2pi
))
941 if (!ext_gcn_constants_init
)
942 init_ext_gcn_constants ();
943 real_value_negate (r
);
944 if (real_identical (r
, &dconsthalf
))
946 if (real_identical (r
, &dconst2
))
948 if (real_identical (r
, &dconst4
))
951 /* FIXME: add 4, -4 and 1/(2*PI). */
956 /* Return non-zero if X is a constant that can appear as an immediate operand.
957 This is 0, 0.5, -0.5, 1, -1, 2, -2, 4,-4, 1/(2*pi)
958 Or a vector of those.
959 The value returned should be the encoding of this constant. */
962 gcn_fp_constant_p (rtx x
, bool allow_vector
)
964 machine_mode mode
= GET_MODE (x
);
965 int vf
= VECTOR_MODE_P (mode
) ? GET_MODE_NUNITS (mode
) : 1;
968 mode
= GET_MODE_INNER (mode
);
971 && (mode
== HFmode
|| mode
== SFmode
|| mode
== DFmode
)
975 if (GET_CODE (x
) != CONST_VECTOR
)
977 n
= gcn_fp_constant_p (CONST_VECTOR_ELT (x
, 0), false);
980 for (int i
= 1; i
< vf
; i
++)
981 if (CONST_VECTOR_ELT (x
, i
) != CONST_VECTOR_ELT (x
, 0))
985 if (mode
!= HFmode
&& mode
!= SFmode
&& mode
!= DFmode
)
988 if (gcn_inline_fp_constant_p (x
, false))
990 /* FIXME: It is not clear how 32bit immediates are interpreted here. */
991 return (mode
!= DFmode
);
994 /* Return true if X is a constant representable as an inline immediate
995 constant in a 32-bit instruction encoding. */
998 gcn_inline_constant_p (rtx x
)
1000 if (GET_CODE (x
) == CONST_INT
)
1001 return INTVAL (x
) >= -16 && INTVAL (x
) <= 64;
1002 if (GET_CODE (x
) == CONST_DOUBLE
)
1003 return gcn_inline_fp_constant_p (x
, false);
1004 if (GET_CODE (x
) == CONST_VECTOR
)
1007 if (!vgpr_vector_mode_p (GET_MODE (x
)))
1009 n
= gcn_inline_constant_p (CONST_VECTOR_ELT (x
, 0));
1012 for (int i
= 1; i
< 64; i
++)
1013 if (CONST_VECTOR_ELT (x
, i
) != CONST_VECTOR_ELT (x
, 0))
1020 /* Return true if X is a constant representable as an immediate constant
1021 in a 32 or 64-bit instruction encoding. */
1024 gcn_constant_p (rtx x
)
1026 switch (GET_CODE (x
))
1032 return gcn_fp_constant_p (x
, false);
1037 if (!vgpr_vector_mode_p (GET_MODE (x
)))
1039 n
= gcn_constant_p (CONST_VECTOR_ELT (x
, 0));
1042 for (int i
= 1; i
< 64; i
++)
1043 if (CONST_VECTOR_ELT (x
, i
) != CONST_VECTOR_ELT (x
, 0))
1059 /* Return true if X is a constant representable as two inline immediate
1060 constants in a 64-bit instruction that is split into two 32-bit
1062 When MIXED is set, the low-part is permitted to use the full 32-bits. */
1065 gcn_inline_constant64_p (rtx x
, bool mixed
)
1067 if (GET_CODE (x
) == CONST_VECTOR
)
1069 if (!vgpr_vector_mode_p (GET_MODE (x
)))
1071 if (!gcn_inline_constant64_p (CONST_VECTOR_ELT (x
, 0), mixed
))
1073 for (int i
= 1; i
< 64; i
++)
1074 if (CONST_VECTOR_ELT (x
, i
) != CONST_VECTOR_ELT (x
, 0))
1080 if (GET_CODE (x
) != CONST_INT
)
1083 rtx val_lo
= gcn_operand_part (DImode
, x
, 0);
1084 rtx val_hi
= gcn_operand_part (DImode
, x
, 1);
1085 return ((mixed
|| gcn_inline_constant_p (val_lo
))
1086 && gcn_inline_constant_p (val_hi
));
1089 /* Return true if X is a constant representable as an immediate constant
1090 in a 32 or 64-bit instruction encoding where the hardware will
1091 extend the immediate to 64-bits. */
1094 gcn_constant64_p (rtx x
)
1096 if (!gcn_constant_p (x
))
1099 if (GET_CODE (x
) != CONST_INT
)
1102 /* Negative numbers are only allowed if they can be encoded within src0,
1103 because the 32-bit immediates do not get sign-extended.
1104 Unsigned numbers must not be encodable as 32-bit -1..-16, because the
1105 assembler will use a src0 inline immediate and that will get
1107 HOST_WIDE_INT val
= INTVAL (x
);
1108 return (((val
& 0xffffffff) == val
/* Positive 32-bit. */
1109 && (val
& 0xfffffff0) != 0xfffffff0) /* Not -1..-16. */
1110 || gcn_inline_constant_p (x
)); /* Src0. */
1113 /* Implement TARGET_LEGITIMATE_CONSTANT_P.
1115 Returns true if X is a legitimate constant for a MODE immediate operand. */
1118 gcn_legitimate_constant_p (machine_mode
, rtx x
)
1120 return gcn_constant_p (x
);
1123 /* Return true if X is a CONST_VECTOR of single constant. */
1126 single_cst_vector_p (rtx x
)
1128 if (GET_CODE (x
) != CONST_VECTOR
)
1130 for (int i
= 1; i
< 64; i
++)
1131 if (CONST_VECTOR_ELT (x
, i
) != CONST_VECTOR_ELT (x
, 0))
1136 /* Create a CONST_VECTOR of duplicated value A. */
1139 gcn_vec_constant (machine_mode mode
, int a
)
1142 return CONST0_RTX (mode);
1144 return CONSTM1_RTX (mode);
1146 return CONST1_RTX (mode);
1148 return CONST2_RTX (mode);*/
1150 int units
= GET_MODE_NUNITS (mode
);
1151 machine_mode innermode
= GET_MODE_INNER (mode
);
1154 if (FLOAT_MODE_P (innermode
))
1157 real_from_integer (&rv
, NULL
, a
, SIGNED
);
1158 tem
= const_double_from_real_value (rv
, innermode
);
1161 tem
= gen_int_mode (a
, innermode
);
1163 rtvec v
= rtvec_alloc (units
);
1164 for (int i
= 0; i
< units
; ++i
)
1165 RTVEC_ELT (v
, i
) = tem
;
1167 return gen_rtx_CONST_VECTOR (mode
, v
);
1170 /* Create a CONST_VECTOR of duplicated value A. */
1173 gcn_vec_constant (machine_mode mode
, rtx a
)
1175 int units
= GET_MODE_NUNITS (mode
);
1176 rtvec v
= rtvec_alloc (units
);
1178 for (int i
= 0; i
< units
; ++i
)
1179 RTVEC_ELT (v
, i
) = a
;
1181 return gen_rtx_CONST_VECTOR (mode
, v
);
1184 /* Create an undefined vector value, used where an insn operand is
1188 gcn_gen_undef (machine_mode mode
)
1190 return gen_rtx_UNSPEC (mode
, gen_rtvec (1, const0_rtx
), UNSPEC_VECTOR
);
1194 /* {{{ Utility functions. */
1196 /* Generalised accessor functions for instruction patterns.
1197 The machine desription '@' prefix does something similar, but as of
1198 GCC 10 is incompatible with define_subst, and anyway it doesn't
1199 auto-handle the exec feature.
1201 Four macros are provided; each function only needs one:
1203 GEN_VN - create accessor functions for all sizes of one mode
1204 GEN_VNM - create accessor functions for all sizes of all modes
1205 GEN_VN_NOEXEC - for insns without "_exec" variants
1206 GEN_VNM_NOEXEC - likewise
1209 GEN_VNM (add, 3, A(rtx dest, rtx s1, rtx s2), A(dest, s1, s2)
1211 gen_addvNsi3 (dst, a, b)
1212 -> calls gen_addv64si3, or gen_addv32si3, etc.
1214 gen_addvNm3 (dst, a, b)
1215 -> calls gen_addv64qi3, or gen_addv2di3, etc.
1217 The mode is determined from the first parameter, which must be called
1218 "dest" (or else the macro doesn't work).
1220 Each function has two optional parameters at the end: merge_src and exec.
1221 If exec is non-null, the function will call the "_exec" variant of the
1222 insn. If exec is non-null but merge_src is null then an undef unspec
1226 gen_addvNsi3 (v64sidst, a, b, oldval, exec)
1227 -> calls gen_addv64si3_exec (v64sidst, a, b, oldval, exec)
1229 gen_addvNm3 (v2qidst, a, b, NULL, exec)
1230 -> calls gen_addv2qi3_exec (v2qidst, a, b,
1231 gcn_gen_undef (V2QImode), exec)
1234 #define A(...) __VA_ARGS__
1235 #define GEN_VN_NOEXEC(PREFIX, SUFFIX, PARAMS, ARGS) \
1237 gen_##PREFIX##vN##SUFFIX (PARAMS) \
1239 machine_mode mode = GET_MODE (dest); \
1240 int n = GET_MODE_NUNITS (mode); \
1244 case 2: return gen_##PREFIX##v2##SUFFIX (ARGS); \
1245 case 4: return gen_##PREFIX##v4##SUFFIX (ARGS); \
1246 case 8: return gen_##PREFIX##v8##SUFFIX (ARGS); \
1247 case 16: return gen_##PREFIX##v16##SUFFIX (ARGS); \
1248 case 32: return gen_##PREFIX##v32##SUFFIX (ARGS); \
1249 case 64: return gen_##PREFIX##v64##SUFFIX (ARGS); \
1252 gcc_unreachable (); \
1256 #define GEN_VNM_NOEXEC(PREFIX, SUFFIX, PARAMS, ARGS) \
1257 GEN_VN_NOEXEC (PREFIX, qi##SUFFIX, A(PARAMS), A(ARGS)) \
1258 GEN_VN_NOEXEC (PREFIX, hi##SUFFIX, A(PARAMS), A(ARGS)) \
1259 GEN_VN_NOEXEC (PREFIX, hf##SUFFIX, A(PARAMS), A(ARGS)) \
1260 GEN_VN_NOEXEC (PREFIX, si##SUFFIX, A(PARAMS), A(ARGS)) \
1261 GEN_VN_NOEXEC (PREFIX, sf##SUFFIX, A(PARAMS), A(ARGS)) \
1262 GEN_VN_NOEXEC (PREFIX, di##SUFFIX, A(PARAMS), A(ARGS)) \
1263 GEN_VN_NOEXEC (PREFIX, df##SUFFIX, A(PARAMS), A(ARGS)) \
1265 gen_##PREFIX##vNm##SUFFIX (PARAMS) \
1267 machine_mode mode = GET_MODE_INNER (GET_MODE (dest)); \
1271 case E_QImode: return gen_##PREFIX##vNqi##SUFFIX (ARGS); \
1272 case E_HImode: return gen_##PREFIX##vNhi##SUFFIX (ARGS); \
1273 case E_HFmode: return gen_##PREFIX##vNhf##SUFFIX (ARGS); \
1274 case E_SImode: return gen_##PREFIX##vNsi##SUFFIX (ARGS); \
1275 case E_SFmode: return gen_##PREFIX##vNsf##SUFFIX (ARGS); \
1276 case E_DImode: return gen_##PREFIX##vNdi##SUFFIX (ARGS); \
1277 case E_DFmode: return gen_##PREFIX##vNdf##SUFFIX (ARGS); \
1282 gcc_unreachable (); \
1286 #define GEN_VN(PREFIX, SUFFIX, PARAMS, ARGS) \
1288 gen_##PREFIX##vN##SUFFIX (PARAMS, rtx merge_src=NULL, rtx exec=NULL) \
1290 machine_mode mode = GET_MODE (dest); \
1291 int n = GET_MODE_NUNITS (mode); \
1293 if (exec && !merge_src) \
1294 merge_src = gcn_gen_undef (mode); \
1299 case 2: return gen_##PREFIX##v2##SUFFIX##_exec (ARGS, merge_src, exec); \
1300 case 4: return gen_##PREFIX##v4##SUFFIX##_exec (ARGS, merge_src, exec); \
1301 case 8: return gen_##PREFIX##v8##SUFFIX##_exec (ARGS, merge_src, exec); \
1302 case 16: return gen_##PREFIX##v16##SUFFIX##_exec (ARGS, merge_src, exec); \
1303 case 32: return gen_##PREFIX##v32##SUFFIX##_exec (ARGS, merge_src, exec); \
1304 case 64: return gen_##PREFIX##v64##SUFFIX##_exec (ARGS, merge_src, exec); \
1309 case 2: return gen_##PREFIX##v2##SUFFIX (ARGS); \
1310 case 4: return gen_##PREFIX##v4##SUFFIX (ARGS); \
1311 case 8: return gen_##PREFIX##v8##SUFFIX (ARGS); \
1312 case 16: return gen_##PREFIX##v16##SUFFIX (ARGS); \
1313 case 32: return gen_##PREFIX##v32##SUFFIX (ARGS); \
1314 case 64: return gen_##PREFIX##v64##SUFFIX (ARGS); \
1317 gcc_unreachable (); \
1321 #define GEN_VNM(PREFIX, SUFFIX, PARAMS, ARGS) \
1322 GEN_VN (PREFIX, qi##SUFFIX, A(PARAMS), A(ARGS)) \
1323 GEN_VN (PREFIX, hi##SUFFIX, A(PARAMS), A(ARGS)) \
1324 GEN_VN (PREFIX, hf##SUFFIX, A(PARAMS), A(ARGS)) \
1325 GEN_VN (PREFIX, si##SUFFIX, A(PARAMS), A(ARGS)) \
1326 GEN_VN (PREFIX, sf##SUFFIX, A(PARAMS), A(ARGS)) \
1327 GEN_VN (PREFIX, di##SUFFIX, A(PARAMS), A(ARGS)) \
1328 GEN_VN (PREFIX, df##SUFFIX, A(PARAMS), A(ARGS)) \
1329 USE_TI (GEN_VN (PREFIX, ti##SUFFIX, A(PARAMS), A(ARGS))) \
1331 gen_##PREFIX##vNm##SUFFIX (PARAMS, rtx merge_src=NULL, rtx exec=NULL) \
1333 machine_mode mode = GET_MODE_INNER (GET_MODE (dest)); \
1337 case E_QImode: return gen_##PREFIX##vNqi##SUFFIX (ARGS, merge_src, exec); \
1338 case E_HImode: return gen_##PREFIX##vNhi##SUFFIX (ARGS, merge_src, exec); \
1339 case E_HFmode: return gen_##PREFIX##vNhf##SUFFIX (ARGS, merge_src, exec); \
1340 case E_SImode: return gen_##PREFIX##vNsi##SUFFIX (ARGS, merge_src, exec); \
1341 case E_SFmode: return gen_##PREFIX##vNsf##SUFFIX (ARGS, merge_src, exec); \
1342 case E_DImode: return gen_##PREFIX##vNdi##SUFFIX (ARGS, merge_src, exec); \
1343 case E_DFmode: return gen_##PREFIX##vNdf##SUFFIX (ARGS, merge_src, exec); \
1345 USE_TI (return gen_##PREFIX##vNti##SUFFIX (ARGS, merge_src, exec);) \
1350 gcc_unreachable (); \
1354 /* These have TImode support. */
1355 #define USE_TI(ARGS) ARGS
1356 GEN_VNM (mov
,, A(rtx dest
, rtx src
), A(dest
, src
))
1357 GEN_VNM (vec_duplicate
,, A(rtx dest
, rtx src
), A(dest
, src
))
1359 /* These do not have TImode support. */
1361 #define USE_TI(ARGS)
1362 GEN_VNM (add
,3, A(rtx dest
, rtx src1
, rtx src2
), A(dest
, src1
, src2
))
1363 GEN_VN (add
,si3_dup
, A(rtx dest
, rtx src1
, rtx src2
), A(dest
, src1
, src2
))
1364 GEN_VN (add
,si3_vcc_dup
, A(rtx dest
, rtx src1
, rtx src2
, rtx vcc
),
1365 A(dest
, src1
, src2
, vcc
))
1366 GEN_VN (add
,di3_sext_dup2
, A(rtx dest
, rtx src1
, rtx src2
), A(dest
, src1
, src2
))
1367 GEN_VN (add
,di3_vcc_zext_dup
, A(rtx dest
, rtx src1
, rtx src2
, rtx vcc
),
1368 A(dest
, src1
, src2
, vcc
))
1369 GEN_VN (add
,di3_zext_dup2
, A(rtx dest
, rtx src1
, rtx src2
), A(dest
, src1
, src2
))
1370 GEN_VN (add
,di3_vcc_zext_dup2
, A(rtx dest
, rtx src1
, rtx src2
, rtx vcc
),
1371 A(dest
, src1
, src2
, vcc
))
1372 GEN_VN (addc
,si3
, A(rtx dest
, rtx src1
, rtx src2
, rtx vccout
, rtx vccin
),
1373 A(dest
, src1
, src2
, vccout
, vccin
))
1374 GEN_VN (and,si3
, A(rtx dest
, rtx src1
, rtx src2
), A(dest
, src1
, src2
))
1375 GEN_VN (ashl
,si3
, A(rtx dest
, rtx src
, rtx shift
), A(dest
, src
, shift
))
1376 GEN_VNM_NOEXEC (ds_bpermute
,, A(rtx dest
, rtx addr
, rtx src
, rtx exec
),
1377 A(dest
, addr
, src
, exec
))
1378 GEN_VNM (gather
,_expr
, A(rtx dest
, rtx addr
, rtx as
, rtx vol
),
1379 A(dest
, addr
, as
, vol
))
1380 GEN_VN (mul
,si3_dup
, A(rtx dest
, rtx src1
, rtx src2
), A(dest
, src1
, src2
))
1381 GEN_VN (sub
,si3
, A(rtx dest
, rtx src1
, rtx src2
), A(dest
, src1
, src2
))
1382 GEN_VN_NOEXEC (vec_series
,si
, A(rtx dest
, rtx x
, rtx c
), A(dest
, x
, c
))
1390 /* Get icode for vector instructions without an optab. */
1392 #define CODE_FOR(PREFIX, SUFFIX) \
1394 get_code_for_##PREFIX##vN##SUFFIX (int nunits) \
1398 case 2: return CODE_FOR_##PREFIX##v2##SUFFIX; \
1399 case 4: return CODE_FOR_##PREFIX##v4##SUFFIX; \
1400 case 8: return CODE_FOR_##PREFIX##v8##SUFFIX; \
1401 case 16: return CODE_FOR_##PREFIX##v16##SUFFIX; \
1402 case 32: return CODE_FOR_##PREFIX##v32##SUFFIX; \
1403 case 64: return CODE_FOR_##PREFIX##v64##SUFFIX; \
1406 gcc_unreachable (); \
1407 return CODE_FOR_nothing; \
1410 #define CODE_FOR_OP(PREFIX) \
1411 CODE_FOR (PREFIX, qi) \
1412 CODE_FOR (PREFIX, hi) \
1413 CODE_FOR (PREFIX, hf) \
1414 CODE_FOR (PREFIX, si) \
1415 CODE_FOR (PREFIX, sf) \
1416 CODE_FOR (PREFIX, di) \
1417 CODE_FOR (PREFIX, df) \
1418 CODE_FOR (PREFIX, ti) \
1420 get_code_for_##PREFIX (machine_mode mode) \
1422 int vf = GET_MODE_NUNITS (mode); \
1423 machine_mode smode = GET_MODE_INNER (mode); \
1427 case E_QImode: return get_code_for_##PREFIX##vNqi (vf); \
1428 case E_HImode: return get_code_for_##PREFIX##vNhi (vf); \
1429 case E_HFmode: return get_code_for_##PREFIX##vNhf (vf); \
1430 case E_SImode: return get_code_for_##PREFIX##vNsi (vf); \
1431 case E_SFmode: return get_code_for_##PREFIX##vNsf (vf); \
1432 case E_DImode: return get_code_for_##PREFIX##vNdi (vf); \
1433 case E_DFmode: return get_code_for_##PREFIX##vNdf (vf); \
1434 case E_TImode: return get_code_for_##PREFIX##vNti (vf); \
1438 gcc_unreachable (); \
1439 return CODE_FOR_nothing; \
1442 CODE_FOR_OP (reload_in
)
1443 CODE_FOR_OP (reload_out
)
1448 /* Return true if OP is a PARALLEL of CONST_INTs that form a linear
1449 series with step STEP. */
1452 gcn_stepped_zero_int_parallel_p (rtx op
, int step
)
1454 if (GET_CODE (op
) != PARALLEL
|| !CONST_INT_P (XVECEXP (op
, 0, 0)))
1457 unsigned HOST_WIDE_INT base
= 0;
1458 for (int i
= 0; i
< XVECLEN (op
, 0); ++i
)
1459 if (!CONST_INT_P (XVECEXP (op
, 0, i
))
1460 || UINTVAL (XVECEXP (op
, 0, i
)) != base
+ i
* step
)
1467 /* {{{ Addresses, pointers and moves. */
1469 /* Return true is REG is a valid place to store a pointer,
1470 for instructions that require an SGPR.
1474 gcn_address_register_p (rtx reg
, machine_mode mode
, bool strict
)
1476 if (GET_CODE (reg
) == SUBREG
)
1477 reg
= SUBREG_REG (reg
);
1482 if (GET_MODE (reg
) != mode
)
1485 int regno
= REGNO (reg
);
1487 if (regno
>= FIRST_PSEUDO_REGISTER
)
1495 regno
= reg_renumber
[regno
];
1498 return (SGPR_REGNO_P (regno
) || regno
== M0_REG
1499 || regno
== ARG_POINTER_REGNUM
|| regno
== FRAME_POINTER_REGNUM
);
1502 /* Return true is REG is a valid place to store a pointer,
1503 for instructions that require a VGPR. */
1506 gcn_vec_address_register_p (rtx reg
, machine_mode mode
, bool strict
)
1508 if (GET_CODE (reg
) == SUBREG
)
1509 reg
= SUBREG_REG (reg
);
1514 if (GET_MODE (reg
) != mode
)
1517 int regno
= REGNO (reg
);
1519 if (regno
>= FIRST_PSEUDO_REGISTER
)
1527 regno
= reg_renumber
[regno
];
1530 return VGPR_REGNO_P (regno
);
1533 /* Return true if X would be valid inside a MEM using the Flat address
1537 gcn_flat_address_p (rtx x
, machine_mode mode
)
1539 bool vec_mode
= (GET_MODE_CLASS (mode
) == MODE_VECTOR_INT
1540 || GET_MODE_CLASS (mode
) == MODE_VECTOR_FLOAT
);
1542 if (vec_mode
&& gcn_address_register_p (x
, DImode
, false))
1545 if (!vec_mode
&& gcn_vec_address_register_p (x
, DImode
, false))
1548 if (TARGET_GCN5_PLUS
1549 && GET_CODE (x
) == PLUS
1550 && gcn_vec_address_register_p (XEXP (x
, 0), DImode
, false)
1551 && CONST_INT_P (XEXP (x
, 1)))
1557 /* Return true if X would be valid inside a MEM using the Scalar Flat
1561 gcn_scalar_flat_address_p (rtx x
)
1563 if (gcn_address_register_p (x
, DImode
, false))
1566 if (GET_CODE (x
) == PLUS
1567 && gcn_address_register_p (XEXP (x
, 0), DImode
, false)
1568 && CONST_INT_P (XEXP (x
, 1)))
1574 /* Return true if MEM X would be valid for the Scalar Flat address space. */
1577 gcn_scalar_flat_mem_p (rtx x
)
1582 if (GET_MODE_SIZE (GET_MODE (x
)) < 4)
1585 return gcn_scalar_flat_address_p (XEXP (x
, 0));
1588 /* Return true if X would be valid inside a MEM using the LDS or GDS
1592 gcn_ds_address_p (rtx x
)
1594 if (gcn_vec_address_register_p (x
, SImode
, false))
1597 if (GET_CODE (x
) == PLUS
1598 && gcn_vec_address_register_p (XEXP (x
, 0), SImode
, false)
1599 && CONST_INT_P (XEXP (x
, 1)))
1605 /* Return true if ADDR would be valid inside a MEM using the Global
1609 gcn_global_address_p (rtx addr
)
1611 if (gcn_address_register_p (addr
, DImode
, false)
1612 || gcn_vec_address_register_p (addr
, DImode
, false))
1615 if (GET_CODE (addr
) == PLUS
)
1617 rtx base
= XEXP (addr
, 0);
1618 rtx offset
= XEXP (addr
, 1);
1619 bool immediate_p
= (CONST_INT_P (offset
)
1620 && INTVAL (offset
) >= -(1 << 12)
1621 && INTVAL (offset
) < (1 << 12));
1623 if ((gcn_address_register_p (base
, DImode
, false)
1624 || gcn_vec_address_register_p (base
, DImode
, false))
1626 /* SGPR + CONST or VGPR + CONST */
1629 if (gcn_address_register_p (base
, DImode
, false)
1630 && gcn_vgpr_register_operand (offset
, SImode
))
1634 if (GET_CODE (base
) == PLUS
1635 && gcn_address_register_p (XEXP (base
, 0), DImode
, false)
1636 && gcn_vgpr_register_operand (XEXP (base
, 1), SImode
)
1638 /* (SGPR + VGPR) + CONST */
1645 /* Implement TARGET_ADDR_SPACE_LEGITIMATE_ADDRESS_P.
1647 Recognizes RTL expressions that are valid memory addresses for an
1648 instruction. The MODE argument is the machine mode for the MEM
1649 expression that wants to use this address.
1651 It only recognizes address in canonical form. LEGITIMIZE_ADDRESS should
1652 convert common non-canonical forms to canonical form so that they will
1656 gcn_addr_space_legitimate_address_p (machine_mode mode
, rtx x
, bool strict
,
1659 /* All vector instructions need to work on addresses in registers. */
1660 if (!TARGET_GCN5_PLUS
&& (vgpr_vector_mode_p (mode
) && !REG_P (x
)))
1663 if (AS_SCALAR_FLAT_P (as
))
1665 if (mode
== QImode
|| mode
== HImode
)
1668 switch (GET_CODE (x
))
1671 return gcn_address_register_p (x
, DImode
, strict
);
1672 /* Addresses are in the form BASE+OFFSET
1673 OFFSET is either 20bit unsigned immediate, SGPR or M0.
1674 Writes and atomics do not accept SGPR. */
1677 rtx x0
= XEXP (x
, 0);
1678 rtx x1
= XEXP (x
, 1);
1679 if (!gcn_address_register_p (x0
, DImode
, strict
))
1681 /* FIXME: This is disabled because of the mode mismatch between
1682 SImode (for the address or m0 register) and the DImode PLUS.
1683 We'll need a zero_extend or similar.
1685 if (gcn_m0_register_p (x1, SImode, strict)
1686 || gcn_address_register_p (x1, SImode, strict))
1689 if (GET_CODE (x1
) == CONST_INT
)
1691 if (INTVAL (x1
) >= 0 && INTVAL (x1
) < (1 << 20)
1692 /* The low bits of the offset are ignored, even when
1693 they're meant to realign the pointer. */
1694 && !(INTVAL (x1
) & 0x3))
1704 else if (AS_SCRATCH_P (as
))
1705 return gcn_address_register_p (x
, SImode
, strict
);
1706 else if (AS_FLAT_P (as
) || AS_FLAT_SCRATCH_P (as
))
1708 if (TARGET_GCN3
|| GET_CODE (x
) == REG
)
1709 return ((GET_MODE_CLASS (mode
) == MODE_VECTOR_INT
1710 || GET_MODE_CLASS (mode
) == MODE_VECTOR_FLOAT
)
1711 ? gcn_address_register_p (x
, DImode
, strict
)
1712 : gcn_vec_address_register_p (x
, DImode
, strict
));
1715 gcc_assert (TARGET_GCN5_PLUS
);
1717 if (GET_CODE (x
) == PLUS
)
1719 rtx x1
= XEXP (x
, 1);
1721 if (VECTOR_MODE_P (mode
)
1722 ? !gcn_address_register_p (x
, DImode
, strict
)
1723 : !gcn_vec_address_register_p (x
, DImode
, strict
))
1726 if (GET_CODE (x1
) == CONST_INT
)
1728 if (INTVAL (x1
) >= 0 && INTVAL (x1
) < (1 << 12)
1729 /* The low bits of the offset are ignored, even when
1730 they're meant to realign the pointer. */
1731 && !(INTVAL (x1
) & 0x3))
1738 else if (AS_GLOBAL_P (as
))
1740 gcc_assert (TARGET_GCN5_PLUS
);
1742 if (GET_CODE (x
) == REG
)
1743 return (gcn_address_register_p (x
, DImode
, strict
)
1744 || (!VECTOR_MODE_P (mode
)
1745 && gcn_vec_address_register_p (x
, DImode
, strict
)));
1746 else if (GET_CODE (x
) == PLUS
)
1748 rtx base
= XEXP (x
, 0);
1749 rtx offset
= XEXP (x
, 1);
1751 bool immediate_p
= (GET_CODE (offset
) == CONST_INT
1752 /* Signed 13-bit immediate. */
1753 && INTVAL (offset
) >= -(1 << 12)
1754 && INTVAL (offset
) < (1 << 12)
1755 /* The low bits of the offset are ignored, even
1756 when they're meant to realign the pointer. */
1757 && !(INTVAL (offset
) & 0x3));
1759 if (!VECTOR_MODE_P (mode
))
1761 if ((gcn_address_register_p (base
, DImode
, strict
)
1762 || gcn_vec_address_register_p (base
, DImode
, strict
))
1764 /* SGPR + CONST or VGPR + CONST */
1767 if (gcn_address_register_p (base
, DImode
, strict
)
1768 && gcn_vgpr_register_operand (offset
, SImode
))
1772 if (GET_CODE (base
) == PLUS
1773 && gcn_address_register_p (XEXP (base
, 0), DImode
, strict
)
1774 && gcn_vgpr_register_operand (XEXP (base
, 1), SImode
)
1776 /* (SGPR + VGPR) + CONST */
1781 if (gcn_address_register_p (base
, DImode
, strict
)
1790 else if (AS_ANY_DS_P (as
))
1791 switch (GET_CODE (x
))
1794 return (VECTOR_MODE_P (mode
)
1795 ? gcn_address_register_p (x
, SImode
, strict
)
1796 : gcn_vec_address_register_p (x
, SImode
, strict
));
1797 /* Addresses are in the form BASE+OFFSET
1798 OFFSET is either 20bit unsigned immediate, SGPR or M0.
1799 Writes and atomics do not accept SGPR. */
1802 rtx x0
= XEXP (x
, 0);
1803 rtx x1
= XEXP (x
, 1);
1804 if (!gcn_vec_address_register_p (x0
, DImode
, strict
))
1806 if (GET_CODE (x1
) == REG
)
1808 if (GET_CODE (x1
) != REG
1809 || (REGNO (x1
) <= FIRST_PSEUDO_REGISTER
1810 && !gcn_ssrc_register_operand (x1
, DImode
)))
1813 else if (GET_CODE (x1
) == CONST_VECTOR
1814 && GET_CODE (CONST_VECTOR_ELT (x1
, 0)) == CONST_INT
1815 && single_cst_vector_p (x1
))
1817 x1
= CONST_VECTOR_ELT (x1
, 0);
1818 if (INTVAL (x1
) >= 0 && INTVAL (x1
) < (1 << 20))
1832 /* Implement TARGET_ADDR_SPACE_POINTER_MODE.
1834 Return the appropriate mode for a named address pointer. */
1836 static scalar_int_mode
1837 gcn_addr_space_pointer_mode (addr_space_t addrspace
)
1841 case ADDR_SPACE_SCRATCH
:
1842 case ADDR_SPACE_LDS
:
1843 case ADDR_SPACE_GDS
:
1845 case ADDR_SPACE_DEFAULT
:
1846 case ADDR_SPACE_FLAT
:
1847 case ADDR_SPACE_FLAT_SCRATCH
:
1848 case ADDR_SPACE_SCALAR_FLAT
:
1855 /* Implement TARGET_ADDR_SPACE_ADDRESS_MODE.
1857 Return the appropriate mode for a named address space address. */
1859 static scalar_int_mode
1860 gcn_addr_space_address_mode (addr_space_t addrspace
)
1862 return gcn_addr_space_pointer_mode (addrspace
);
1865 /* Implement TARGET_ADDR_SPACE_SUBSET_P.
1867 Determine if one named address space is a subset of another. */
1870 gcn_addr_space_subset_p (addr_space_t subset
, addr_space_t superset
)
1872 if (subset
== superset
)
1874 /* FIXME is this true? */
1875 if (AS_FLAT_P (superset
) || AS_SCALAR_FLAT_P (superset
))
1880 /* Convert from one address space to another. */
1883 gcn_addr_space_convert (rtx op
, tree from_type
, tree to_type
)
1885 gcc_assert (POINTER_TYPE_P (from_type
));
1886 gcc_assert (POINTER_TYPE_P (to_type
));
1888 addr_space_t as_from
= TYPE_ADDR_SPACE (TREE_TYPE (from_type
));
1889 addr_space_t as_to
= TYPE_ADDR_SPACE (TREE_TYPE (to_type
));
1891 if (AS_LDS_P (as_from
) && AS_FLAT_P (as_to
))
1893 /* The high bits of the QUEUE_PTR_ARG register are used by
1894 GCN_BUILTIN_FIRST_CALL_THIS_THREAD_P, so mask them out. */
1895 rtx queue_reg
= gen_rtx_REG (DImode
,
1896 cfun
->machine
->args
.reg
[QUEUE_PTR_ARG
]);
1897 rtx queue_ptr
= gen_reg_rtx (DImode
);
1898 emit_insn (gen_anddi3 (queue_ptr
, queue_reg
, GEN_INT (0xffffffffffff)));
1899 rtx group_seg_aperture_hi
= gen_rtx_MEM (SImode
,
1900 gen_rtx_PLUS (DImode
, queue_ptr
,
1901 gen_int_mode (64, SImode
)));
1902 rtx tmp
= gen_reg_rtx (DImode
);
1904 emit_move_insn (gen_lowpart (SImode
, tmp
), op
);
1905 emit_move_insn (gen_highpart_mode (SImode
, DImode
, tmp
),
1906 group_seg_aperture_hi
);
1910 else if (as_from
== as_to
)
1916 /* Implement TARGET_ADDR_SPACE_DEBUG.
1918 Return the dwarf address space class for each hardware address space. */
1921 gcn_addr_space_debug (addr_space_t as
)
1925 case ADDR_SPACE_DEFAULT
:
1926 case ADDR_SPACE_FLAT
:
1927 case ADDR_SPACE_SCALAR_FLAT
:
1928 case ADDR_SPACE_FLAT_SCRATCH
:
1929 return DW_ADDR_none
;
1930 case ADDR_SPACE_GLOBAL
:
1931 return 1; // DW_ADDR_LLVM_global
1932 case ADDR_SPACE_LDS
:
1933 return 3; // DW_ADDR_LLVM_group
1934 case ADDR_SPACE_SCRATCH
:
1935 return 4; // DW_ADDR_LLVM_private
1936 case ADDR_SPACE_GDS
:
1937 return 0x8000; // DW_ADDR_AMDGPU_region
1943 /* Implement REGNO_MODE_CODE_OK_FOR_BASE_P via gcn.h
1945 Retun true if REGNO is OK for memory adressing. */
1948 gcn_regno_mode_code_ok_for_base_p (int regno
,
1949 machine_mode
, addr_space_t as
, int, int)
1951 if (regno
>= FIRST_PSEUDO_REGISTER
)
1954 regno
= reg_renumber
[regno
];
1959 return (VGPR_REGNO_P (regno
)
1960 || regno
== ARG_POINTER_REGNUM
|| regno
== FRAME_POINTER_REGNUM
);
1961 else if (AS_SCALAR_FLAT_P (as
))
1962 return (SGPR_REGNO_P (regno
)
1963 || regno
== ARG_POINTER_REGNUM
|| regno
== FRAME_POINTER_REGNUM
);
1964 else if (AS_GLOBAL_P (as
))
1966 return (SGPR_REGNO_P (regno
)
1967 || VGPR_REGNO_P (regno
)
1968 || regno
== ARG_POINTER_REGNUM
1969 || regno
== FRAME_POINTER_REGNUM
);
1976 /* Implement MODE_CODE_BASE_REG_CLASS via gcn.h.
1978 Return a suitable register class for memory addressing. */
1981 gcn_mode_code_base_reg_class (machine_mode mode
, addr_space_t as
, int oc
,
1986 case ADDR_SPACE_DEFAULT
:
1987 return gcn_mode_code_base_reg_class (mode
, DEFAULT_ADDR_SPACE
, oc
, ic
);
1988 case ADDR_SPACE_SCALAR_FLAT
:
1989 case ADDR_SPACE_SCRATCH
:
1992 case ADDR_SPACE_FLAT
:
1993 case ADDR_SPACE_FLAT_SCRATCH
:
1994 case ADDR_SPACE_LDS
:
1995 case ADDR_SPACE_GDS
:
1996 return ((GET_MODE_CLASS (mode
) == MODE_VECTOR_INT
1997 || GET_MODE_CLASS (mode
) == MODE_VECTOR_FLOAT
)
1998 ? SGPR_REGS
: VGPR_REGS
);
1999 case ADDR_SPACE_GLOBAL
:
2000 return ((GET_MODE_CLASS (mode
) == MODE_VECTOR_INT
2001 || GET_MODE_CLASS (mode
) == MODE_VECTOR_FLOAT
)
2002 ? SGPR_REGS
: ALL_GPR_REGS
);
2007 /* Implement REGNO_OK_FOR_INDEX_P via gcn.h.
2009 Return true if REGNO is OK for index of memory addressing. */
2012 regno_ok_for_index_p (int regno
)
2014 if (regno
>= FIRST_PSEUDO_REGISTER
)
2017 regno
= reg_renumber
[regno
];
2021 return regno
== M0_REG
|| VGPR_REGNO_P (regno
);
2024 /* Expand vector init of OP0 by VEC.
2025 Implements vec_init instruction pattern. */
2028 gcn_expand_vector_init (rtx op0
, rtx vec
)
2031 machine_mode mode
= GET_MODE (op0
);
2032 int vf
= GET_MODE_NUNITS (mode
);
2033 machine_mode addrmode
= VnMODE (vf
, DImode
);
2034 machine_mode offsetmode
= VnMODE (vf
, SImode
);
2036 int64_t mem_mask
= 0;
2037 int64_t item_mask
[64];
2038 rtx ramp
= gen_reg_rtx (offsetmode
);
2039 rtx addr
= gen_reg_rtx (addrmode
);
2041 int unit_size
= GET_MODE_SIZE (GET_MODE_INNER (GET_MODE (op0
)));
2042 emit_insn (gen_mulvNsi3_dup (ramp
, gen_rtx_REG (offsetmode
, VGPR_REGNO (1)),
2043 GEN_INT (unit_size
)));
2045 bool simple_repeat
= true;
2047 /* Expand nested vectors into one vector. */
2048 int item_count
= XVECLEN (vec
, 0);
2049 for (int i
= 0, j
= 0; i
< item_count
; i
++)
2051 rtx item
= XVECEXP (vec
, 0, i
);
2052 machine_mode mode
= GET_MODE (item
);
2053 int units
= VECTOR_MODE_P (mode
) ? GET_MODE_NUNITS (mode
) : 1;
2054 item_mask
[j
] = (((uint64_t)-1)>>(64-units
)) << j
;
2056 if (simple_repeat
&& i
!= 0)
2057 simple_repeat
= item
== XVECEXP (vec
, 0, i
-1);
2059 /* If its a vector of values then copy them into the final location. */
2060 if (GET_CODE (item
) == CONST_VECTOR
)
2062 for (int k
= 0; k
< units
; k
++)
2063 val
[j
++] = XVECEXP (item
, 0, k
);
2066 /* Otherwise, we have a scalar or an expression that expands... */
2070 rtx base
= XEXP (item
, 0);
2071 if (MEM_ADDR_SPACE (item
) == DEFAULT_ADDR_SPACE
2074 /* We have a simple vector load. We can put the addresses in
2075 the vector, combine it with any other such MEMs, and load it
2076 all with a single gather at the end. */
2077 int64_t mask
= ((0xffffffffffffffffUL
2078 >> (64-GET_MODE_NUNITS (mode
)))
2080 rtx exec
= get_exec (mask
);
2081 emit_insn (gen_subvNsi3
2083 gcn_vec_constant (offsetmode
, j
*unit_size
),
2085 emit_insn (gen_addvNdi3_zext_dup2
2087 (mem_mask
? addr
: gcn_gen_undef (addrmode
)),
2092 /* The MEM is non-trivial, so let's load it independently. */
2093 item
= force_reg (mode
, item
);
2095 else if (!CONST_INT_P (item
) && !CONST_DOUBLE_P (item
))
2096 /* The item may be a symbol_ref, or something else non-trivial. */
2097 item
= force_reg (mode
, item
);
2099 /* Duplicate the vector across each item.
2100 It is either a smaller vector register that needs shifting,
2101 or a MEM that needs loading. */
2106 int64_t initialized_mask
= 0;
2111 emit_insn (gen_gathervNm_expr
2112 (op0
, gen_rtx_PLUS (addrmode
, addr
,
2113 gen_rtx_VEC_DUPLICATE (addrmode
,
2115 GEN_INT (DEFAULT_ADDR_SPACE
), GEN_INT (0),
2116 NULL
, get_exec (mem_mask
)));
2118 initialized_mask
= mem_mask
;
2121 if (simple_repeat
&& item_count
> 1 && !prev
)
2123 /* Special case for instances of {A, B, A, B, A, B, ....}, etc. */
2124 rtx src
= gen_rtx_SUBREG (mode
, val
[0], 0);
2125 rtx input_vf_mask
= GEN_INT (GET_MODE_NUNITS (GET_MODE (val
[0]))-1);
2127 rtx permutation
= gen_reg_rtx (VnMODE (vf
, SImode
));
2128 emit_insn (gen_vec_seriesvNsi (permutation
, GEN_INT (0), GEN_INT (1)));
2129 rtx mask_dup
= gen_reg_rtx (VnMODE (vf
, SImode
));
2130 emit_insn (gen_vec_duplicatevNsi (mask_dup
, input_vf_mask
));
2131 emit_insn (gen_andvNsi3 (permutation
, permutation
, mask_dup
));
2132 emit_insn (gen_ashlvNsi3 (permutation
, permutation
, GEN_INT (2)));
2133 emit_insn (gen_ds_bpermutevNm (op0
, permutation
, src
, get_exec (mode
)));
2137 /* Write each value, elementwise, but coalesce matching values into one
2138 instruction, where possible. */
2139 for (int i
= 0; i
< vf
; i
++)
2140 if (!(initialized_mask
& ((int64_t) 1 << i
)))
2142 if (gcn_constant_p (val
[i
]))
2143 emit_insn (gen_movvNm (op0
, gcn_vec_constant (mode
, val
[i
]), prev
,
2144 get_exec (item_mask
[i
])));
2145 else if (VECTOR_MODE_P (GET_MODE (val
[i
]))
2146 && (GET_MODE_NUNITS (GET_MODE (val
[i
])) == vf
2148 emit_insn (gen_movvNm (op0
, gen_rtx_SUBREG (mode
, val
[i
], 0), prev
,
2149 get_exec (item_mask
[i
])));
2150 else if (VECTOR_MODE_P (GET_MODE (val
[i
])))
2152 rtx permutation
= gen_reg_rtx (VnMODE (vf
, SImode
));
2153 emit_insn (gen_vec_seriesvNsi (permutation
, GEN_INT (-i
*4),
2155 rtx tmp
= gen_reg_rtx (mode
);
2156 emit_insn (gen_ds_bpermutevNm (tmp
, permutation
,
2157 gen_rtx_SUBREG (mode
, val
[i
], 0),
2159 emit_insn (gen_movvNm (op0
, tmp
, prev
, get_exec (item_mask
[i
])));
2163 rtx reg
= force_reg (GET_MODE_INNER (mode
), val
[i
]);
2164 emit_insn (gen_vec_duplicatevNm (op0
, reg
, prev
,
2165 get_exec (item_mask
[i
])));
2168 initialized_mask
|= item_mask
[i
];
2173 /* Load vector constant where n-th lane contains BASE+n*VAL. */
2176 strided_constant (machine_mode mode
, int base
, int val
)
2178 rtx x
= gen_reg_rtx (mode
);
2179 emit_move_insn (x
, gcn_vec_constant (mode
, base
));
2180 emit_insn (gen_addvNm3 (x
, x
, gcn_vec_constant (mode
, val
* 32),
2181 x
, get_exec (0xffffffff00000000)));
2182 emit_insn (gen_addvNm3 (x
, x
, gcn_vec_constant (mode
, val
* 16),
2183 x
, get_exec (0xffff0000ffff0000)));
2184 emit_insn (gen_addvNm3 (x
, x
, gcn_vec_constant (mode
, val
* 8),
2185 x
, get_exec (0xff00ff00ff00ff00)));
2186 emit_insn (gen_addvNm3 (x
, x
, gcn_vec_constant (mode
, val
* 4),
2187 x
, get_exec (0xf0f0f0f0f0f0f0f0)));
2188 emit_insn (gen_addvNm3 (x
, x
, gcn_vec_constant (mode
, val
* 2),
2189 x
, get_exec (0xcccccccccccccccc)));
2190 emit_insn (gen_addvNm3 (x
, x
, gcn_vec_constant (mode
, val
* 1),
2191 x
, get_exec (0xaaaaaaaaaaaaaaaa)));
2195 /* Implement TARGET_ADDR_SPACE_LEGITIMIZE_ADDRESS. */
2198 gcn_addr_space_legitimize_address (rtx x
, rtx old
, machine_mode mode
,
2203 case ADDR_SPACE_DEFAULT
:
2204 return gcn_addr_space_legitimize_address (x
, old
, mode
,
2205 DEFAULT_ADDR_SPACE
);
2206 case ADDR_SPACE_SCALAR_FLAT
:
2207 case ADDR_SPACE_SCRATCH
:
2208 /* Instructions working on vectors need the address to be in
2210 if (vgpr_vector_mode_p (mode
))
2211 return force_reg (GET_MODE (x
), x
);
2214 case ADDR_SPACE_FLAT
:
2215 case ADDR_SPACE_FLAT_SCRATCH
:
2216 case ADDR_SPACE_GLOBAL
:
2217 return TARGET_GCN3
? force_reg (DImode
, x
) : x
;
2218 case ADDR_SPACE_LDS
:
2219 case ADDR_SPACE_GDS
:
2220 /* FIXME: LDS support offsets, handle them!. */
2221 if (vgpr_vector_mode_p (mode
)
2222 && GET_MODE_INNER (GET_MODE (x
)) != SImode
)
2224 machine_mode simode
= VnMODE (GET_MODE_NUNITS (mode
), SImode
);
2225 rtx addrs
= gen_reg_rtx (simode
);
2226 rtx base
= force_reg (SImode
, x
);
2227 rtx offsets
= strided_constant (simode
, 0,
2228 GET_MODE_UNIT_SIZE (mode
));
2230 emit_insn (gen_vec_duplicatevNsi (addrs
, base
));
2231 emit_insn (gen_addvNsi3 (addrs
, offsets
, addrs
));
2239 /* Convert a (mem:<MODE> (reg:DI)) to (mem:<MODE> (reg:VnDI)) with the
2240 proper vector of stepped addresses.
2242 MEM will be a DImode address of a vector in an SGPR.
2243 TMP will be a VnDImode VGPR pair or (scratch:VnDI). */
2246 gcn_expand_scalar_to_vector_address (machine_mode mode
, rtx exec
, rtx mem
,
2249 machine_mode pmode
= VnMODE (GET_MODE_NUNITS (mode
), DImode
);
2250 machine_mode offmode
= VnMODE (GET_MODE_NUNITS (mode
), SImode
);
2251 gcc_assert (MEM_P (mem
));
2252 rtx mem_base
= XEXP (mem
, 0);
2253 rtx mem_index
= NULL_RTX
;
2255 if (!TARGET_GCN5_PLUS
)
2257 /* gcn_addr_space_legitimize_address should have put the address in a
2258 register. If not, it is too late to do anything about it. */
2259 gcc_assert (REG_P (mem_base
));
2262 if (GET_CODE (mem_base
) == PLUS
)
2264 mem_index
= XEXP (mem_base
, 1);
2265 mem_base
= XEXP (mem_base
, 0);
2268 /* RF and RM base registers for vector modes should be always an SGPR. */
2269 gcc_assert (SGPR_REGNO_P (REGNO (mem_base
))
2270 || REGNO (mem_base
) >= FIRST_PSEUDO_REGISTER
);
2272 machine_mode inner
= GET_MODE_INNER (mode
);
2273 int shift
= exact_log2 (GET_MODE_SIZE (inner
));
2274 rtx ramp
= gen_rtx_REG (offmode
, VGPR_REGNO (1));
2275 rtx new_base
= NULL_RTX
;
2276 addr_space_t as
= MEM_ADDR_SPACE (mem
);
2278 rtx tmplo
= (REG_P (tmp
)
2279 ? gcn_operand_part (pmode
, tmp
, 0)
2280 : gen_reg_rtx (offmode
));
2282 /* tmplo[:] = ramp[:] << shift */
2283 emit_insn (gen_ashlvNsi3 (tmplo
, ramp
,
2284 gen_int_mode (shift
, SImode
),
2289 rtx vcc
= gen_rtx_REG (DImode
, CC_SAVE_REG
);
2293 rtx mem_base_lo
= gcn_operand_part (DImode
, mem_base
, 0);
2294 rtx mem_base_hi
= gcn_operand_part (DImode
, mem_base
, 1);
2295 rtx tmphi
= gcn_operand_part (pmode
, tmp
, 1);
2297 /* tmphi[:] = mem_base_hi */
2298 emit_insn (gen_vec_duplicatevNsi (tmphi
, mem_base_hi
, NULL
, exec
));
2300 /* tmp[:] += zext (mem_base) */
2303 emit_insn (gen_addvNsi3_vcc_dup (tmplo
, mem_base_lo
, tmplo
,
2305 emit_insn (gen_addcvNsi3 (tmphi
, tmphi
, const0_rtx
,
2306 vcc
, vcc
, NULL
, exec
));
2309 emit_insn (gen_addvNdi3_vcc_zext_dup (tmp
, mem_base_lo
, tmp
, vcc
));
2313 tmp
= gen_reg_rtx (pmode
);
2314 emit_insn (gen_addvNdi3_vcc_zext_dup2 (tmp
, tmplo
, mem_base
, vcc
,
2320 else if (AS_ANY_DS_P (as
))
2322 emit_insn (gen_addvNsi3_dup (tmplo
, tmplo
, mem_base
, NULL
, exec
));
2327 mem_base
= gen_rtx_VEC_DUPLICATE (pmode
, mem_base
);
2328 new_base
= gen_rtx_PLUS (pmode
, mem_base
,
2329 gen_rtx_SIGN_EXTEND (pmode
, tmplo
));
2332 return gen_rtx_PLUS (GET_MODE (new_base
), new_base
,
2333 gen_rtx_VEC_DUPLICATE (GET_MODE (new_base
),
2334 (mem_index
? mem_index
2338 /* Convert a BASE address, a vector of OFFSETS, and a SCALE, to addresses
2339 suitable for the given address space. This is indented for use in
2340 gather/scatter patterns.
2342 The offsets may be signed or unsigned, according to UNSIGNED_P.
2343 If EXEC is set then _exec patterns will be used, otherwise plain.
2346 ADDR_SPACE_FLAT - return VnDImode vector of absolute addresses.
2347 ADDR_SPACE_GLOBAL - return VnSImode vector of offsets. */
2350 gcn_expand_scaled_offsets (addr_space_t as
, rtx base
, rtx offsets
, rtx scale
,
2351 bool unsigned_p
, rtx exec
)
2353 int vf
= GET_MODE_NUNITS (GET_MODE (offsets
));
2354 rtx tmpsi
= gen_reg_rtx (VnMODE (vf
, SImode
));
2355 rtx tmpdi
= gen_reg_rtx (VnMODE (vf
, DImode
));
2357 if (CONST_INT_P (scale
)
2358 && INTVAL (scale
) > 0
2359 && exact_log2 (INTVAL (scale
)) >= 0)
2360 emit_insn (gen_ashlvNsi3 (tmpsi
, offsets
,
2361 GEN_INT (exact_log2 (INTVAL (scale
))),
2364 emit_insn (gen_mulvNsi3_dup (tmpsi
, offsets
, scale
, NULL
, exec
));
2366 /* "Global" instructions do not support negative register offsets. */
2367 if (as
== ADDR_SPACE_FLAT
|| !unsigned_p
)
2370 emit_insn (gen_addvNdi3_zext_dup2 (tmpdi
, tmpsi
, base
, NULL
, exec
));
2372 emit_insn (gen_addvNdi3_sext_dup2 (tmpdi
, tmpsi
, base
, NULL
, exec
));
2375 else if (as
== ADDR_SPACE_GLOBAL
)
2381 /* Return true if move from OP0 to OP1 is known to be executed in vector
2385 gcn_vgpr_move_p (rtx op0
, rtx op1
)
2387 if (MEM_P (op0
) && AS_SCALAR_FLAT_P (MEM_ADDR_SPACE (op0
)))
2389 if (MEM_P (op1
) && AS_SCALAR_FLAT_P (MEM_ADDR_SPACE (op1
)))
2391 return ((REG_P (op0
) && VGPR_REGNO_P (REGNO (op0
)))
2392 || (REG_P (op1
) && VGPR_REGNO_P (REGNO (op1
)))
2393 || vgpr_vector_mode_p (GET_MODE (op0
)));
2396 /* Return true if move from OP0 to OP1 is known to be executed in scalar
2397 unit. Used in the machine description. */
2400 gcn_sgpr_move_p (rtx op0
, rtx op1
)
2402 if (MEM_P (op0
) && AS_SCALAR_FLAT_P (MEM_ADDR_SPACE (op0
)))
2404 if (MEM_P (op1
) && AS_SCALAR_FLAT_P (MEM_ADDR_SPACE (op1
)))
2406 if (!REG_P (op0
) || REGNO (op0
) >= FIRST_PSEUDO_REGISTER
2407 || VGPR_REGNO_P (REGNO (op0
)))
2410 && REGNO (op1
) < FIRST_PSEUDO_REGISTER
2411 && !VGPR_REGNO_P (REGNO (op1
)))
2413 return immediate_operand (op1
, VOIDmode
) || memory_operand (op1
, VOIDmode
);
2416 /* Implement TARGET_SECONDARY_RELOAD.
2418 The address space determines which registers can be used for loads and
2422 gcn_secondary_reload (bool in_p
, rtx x
, reg_class_t rclass
,
2423 machine_mode reload_mode
, secondary_reload_info
*sri
)
2425 reg_class_t result
= NO_REGS
;
2426 bool spilled_pseudo
=
2427 (REG_P (x
) || GET_CODE (x
) == SUBREG
) && true_regnum (x
) == -1;
2429 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2431 fprintf (dump_file
, "gcn_secondary_reload: ");
2432 dump_value_slim (dump_file
, x
, 1);
2433 fprintf (dump_file
, " %s %s:%s", (in_p
? "->" : "<-"),
2434 reg_class_names
[rclass
], GET_MODE_NAME (reload_mode
));
2435 if (REG_P (x
) || GET_CODE (x
) == SUBREG
)
2436 fprintf (dump_file
, " (true regnum: %d \"%s\")", true_regnum (x
),
2437 (true_regnum (x
) >= 0
2438 && true_regnum (x
) < FIRST_PSEUDO_REGISTER
2439 ? reg_names
[true_regnum (x
)]
2440 : (spilled_pseudo
? "stack spill" : "??")));
2441 fprintf (dump_file
, "\n");
2444 /* Some callers don't use or initialize icode. */
2445 sri
->icode
= CODE_FOR_nothing
;
2447 if (MEM_P (x
) || spilled_pseudo
)
2449 addr_space_t as
= DEFAULT_ADDR_SPACE
;
2451 /* If we have a spilled pseudo, we can't find the address space
2452 directly, but we know it's in ADDR_SPACE_FLAT space for GCN3 or
2453 ADDR_SPACE_GLOBAL for GCN5. */
2455 as
= MEM_ADDR_SPACE (x
);
2457 if (as
== ADDR_SPACE_DEFAULT
)
2458 as
= DEFAULT_ADDR_SPACE
;
2462 case ADDR_SPACE_SCALAR_FLAT
:
2464 ((!MEM_P (x
) || rclass
== SGPR_REGS
) ? NO_REGS
: SGPR_REGS
);
2466 case ADDR_SPACE_FLAT
:
2467 case ADDR_SPACE_FLAT_SCRATCH
:
2468 case ADDR_SPACE_GLOBAL
:
2469 if (GET_MODE_CLASS (reload_mode
) == MODE_VECTOR_INT
2470 || GET_MODE_CLASS (reload_mode
) == MODE_VECTOR_FLOAT
)
2473 sri
->icode
= get_code_for_reload_in (reload_mode
);
2475 sri
->icode
= get_code_for_reload_out (reload_mode
);
2479 case ADDR_SPACE_LDS
:
2480 case ADDR_SPACE_GDS
:
2481 case ADDR_SPACE_SCRATCH
:
2482 result
= (rclass
== VGPR_REGS
? NO_REGS
: VGPR_REGS
);
2487 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2488 fprintf (dump_file
, " <= %s (icode: %s)\n", reg_class_names
[result
],
2489 get_insn_name (sri
->icode
));
2494 /* Update register usage after having seen the compiler flags and kernel
2495 attributes. We typically want to fix registers that contain values
2496 set by the HSA runtime. */
2499 gcn_conditional_register_usage (void)
2501 if (!cfun
|| !cfun
->machine
)
2504 if (cfun
->machine
->normal_function
)
2506 /* Restrict the set of SGPRs and VGPRs used by non-kernel functions. */
2507 for (int i
= SGPR_REGNO (MAX_NORMAL_SGPR_COUNT
);
2508 i
<= LAST_SGPR_REG
; i
++)
2509 fixed_regs
[i
] = 1, call_used_regs
[i
] = 1;
2511 for (int i
= VGPR_REGNO (MAX_NORMAL_VGPR_COUNT
);
2512 i
<= LAST_VGPR_REG
; i
++)
2513 fixed_regs
[i
] = 1, call_used_regs
[i
] = 1;
2518 /* If the set of requested args is the default set, nothing more needs to
2520 if (cfun
->machine
->args
.requested
== default_requested_args
)
2523 /* Requesting a set of args different from the default violates the ABI. */
2524 if (!leaf_function_p ())
2525 warning (0, "A non-default set of initial values has been requested, "
2526 "which violates the ABI");
2528 for (int i
= SGPR_REGNO (0); i
< SGPR_REGNO (14); i
++)
2531 /* Fix the runtime argument register containing values that may be
2532 needed later. DISPATCH_PTR_ARG and FLAT_SCRATCH_* should not be
2533 needed after the prologue so there's no need to fix them. */
2534 if (cfun
->machine
->args
.reg
[PRIVATE_SEGMENT_WAVE_OFFSET_ARG
] >= 0)
2535 fixed_regs
[cfun
->machine
->args
.reg
[PRIVATE_SEGMENT_WAVE_OFFSET_ARG
]] = 1;
2536 if (cfun
->machine
->args
.reg
[PRIVATE_SEGMENT_BUFFER_ARG
] >= 0)
2538 /* The upper 32-bits of the 64-bit descriptor are not used, so allow
2539 the containing registers to be used for other purposes. */
2540 fixed_regs
[cfun
->machine
->args
.reg
[PRIVATE_SEGMENT_BUFFER_ARG
]] = 1;
2541 fixed_regs
[cfun
->machine
->args
.reg
[PRIVATE_SEGMENT_BUFFER_ARG
] + 1] = 1;
2543 if (cfun
->machine
->args
.reg
[KERNARG_SEGMENT_PTR_ARG
] >= 0)
2545 fixed_regs
[cfun
->machine
->args
.reg
[KERNARG_SEGMENT_PTR_ARG
]] = 1;
2546 fixed_regs
[cfun
->machine
->args
.reg
[KERNARG_SEGMENT_PTR_ARG
] + 1] = 1;
2548 if (cfun
->machine
->args
.reg
[DISPATCH_PTR_ARG
] >= 0)
2550 fixed_regs
[cfun
->machine
->args
.reg
[DISPATCH_PTR_ARG
]] = 1;
2551 fixed_regs
[cfun
->machine
->args
.reg
[DISPATCH_PTR_ARG
] + 1] = 1;
2553 if (cfun
->machine
->args
.reg
[QUEUE_PTR_ARG
] >= 0)
2555 fixed_regs
[cfun
->machine
->args
.reg
[QUEUE_PTR_ARG
]] = 1;
2556 fixed_regs
[cfun
->machine
->args
.reg
[QUEUE_PTR_ARG
] + 1] = 1;
2558 if (cfun
->machine
->args
.reg
[WORKGROUP_ID_X_ARG
] >= 0)
2559 fixed_regs
[cfun
->machine
->args
.reg
[WORKGROUP_ID_X_ARG
]] = 1;
2560 if (cfun
->machine
->args
.reg
[WORK_ITEM_ID_X_ARG
] >= 0)
2561 fixed_regs
[cfun
->machine
->args
.reg
[WORK_ITEM_ID_X_ARG
]] = 1;
2562 if (cfun
->machine
->args
.reg
[WORK_ITEM_ID_Y_ARG
] >= 0)
2563 fixed_regs
[cfun
->machine
->args
.reg
[WORK_ITEM_ID_Y_ARG
]] = 1;
2564 if (cfun
->machine
->args
.reg
[WORK_ITEM_ID_Z_ARG
] >= 0)
2565 fixed_regs
[cfun
->machine
->args
.reg
[WORK_ITEM_ID_Z_ARG
]] = 1;
2568 /* Determine if a load or store is valid, according to the register classes
2569 and address space. Used primarily by the machine description to decide
2570 when to split a move into two steps. */
2573 gcn_valid_move_p (machine_mode mode
, rtx dest
, rtx src
)
2575 if (!MEM_P (dest
) && !MEM_P (src
))
2579 && AS_FLAT_P (MEM_ADDR_SPACE (dest
))
2580 && (gcn_flat_address_p (XEXP (dest
, 0), mode
)
2581 || GET_CODE (XEXP (dest
, 0)) == SYMBOL_REF
2582 || GET_CODE (XEXP (dest
, 0)) == LABEL_REF
)
2583 && gcn_vgpr_register_operand (src
, mode
))
2585 else if (MEM_P (src
)
2586 && AS_FLAT_P (MEM_ADDR_SPACE (src
))
2587 && (gcn_flat_address_p (XEXP (src
, 0), mode
)
2588 || GET_CODE (XEXP (src
, 0)) == SYMBOL_REF
2589 || GET_CODE (XEXP (src
, 0)) == LABEL_REF
)
2590 && gcn_vgpr_register_operand (dest
, mode
))
2594 && AS_GLOBAL_P (MEM_ADDR_SPACE (dest
))
2595 && (gcn_global_address_p (XEXP (dest
, 0))
2596 || GET_CODE (XEXP (dest
, 0)) == SYMBOL_REF
2597 || GET_CODE (XEXP (dest
, 0)) == LABEL_REF
)
2598 && gcn_vgpr_register_operand (src
, mode
))
2600 else if (MEM_P (src
)
2601 && AS_GLOBAL_P (MEM_ADDR_SPACE (src
))
2602 && (gcn_global_address_p (XEXP (src
, 0))
2603 || GET_CODE (XEXP (src
, 0)) == SYMBOL_REF
2604 || GET_CODE (XEXP (src
, 0)) == LABEL_REF
)
2605 && gcn_vgpr_register_operand (dest
, mode
))
2609 && MEM_ADDR_SPACE (dest
) == ADDR_SPACE_SCALAR_FLAT
2610 && (gcn_scalar_flat_address_p (XEXP (dest
, 0))
2611 || GET_CODE (XEXP (dest
, 0)) == SYMBOL_REF
2612 || GET_CODE (XEXP (dest
, 0)) == LABEL_REF
)
2613 && gcn_ssrc_register_operand (src
, mode
))
2615 else if (MEM_P (src
)
2616 && MEM_ADDR_SPACE (src
) == ADDR_SPACE_SCALAR_FLAT
2617 && (gcn_scalar_flat_address_p (XEXP (src
, 0))
2618 || GET_CODE (XEXP (src
, 0)) == SYMBOL_REF
2619 || GET_CODE (XEXP (src
, 0)) == LABEL_REF
)
2620 && gcn_sdst_register_operand (dest
, mode
))
2624 && AS_ANY_DS_P (MEM_ADDR_SPACE (dest
))
2625 && gcn_ds_address_p (XEXP (dest
, 0))
2626 && gcn_vgpr_register_operand (src
, mode
))
2628 else if (MEM_P (src
)
2629 && AS_ANY_DS_P (MEM_ADDR_SPACE (src
))
2630 && gcn_ds_address_p (XEXP (src
, 0))
2631 && gcn_vgpr_register_operand (dest
, mode
))
2638 /* {{{ Functions and ABI. */
2640 /* Implement TARGET_FUNCTION_VALUE.
2642 Define how to find the value returned by a function.
2643 The register location is always the same, but the mode depends on
2647 gcn_function_value (const_tree valtype
, const_tree
, bool)
2649 machine_mode mode
= TYPE_MODE (valtype
);
2651 if (INTEGRAL_TYPE_P (valtype
)
2652 && GET_MODE_CLASS (mode
) == MODE_INT
2653 && GET_MODE_SIZE (mode
) < 4)
2656 return gen_rtx_REG (mode
, RETURN_VALUE_REG
);
2659 /* Implement TARGET_FUNCTION_VALUE_REGNO_P.
2661 Return true if N is a possible register number for the function return
2665 gcn_function_value_regno_p (const unsigned int n
)
2667 return n
== RETURN_VALUE_REG
;
2670 /* Calculate the number of registers required to hold function argument
2674 num_arg_regs (const function_arg_info
&arg
)
2676 if (targetm
.calls
.must_pass_in_stack (arg
))
2679 int size
= arg
.promoted_size_in_bytes ();
2680 int regsize
= UNITS_PER_WORD
* (VECTOR_MODE_P (arg
.mode
)
2681 ? GET_MODE_NUNITS (arg
.mode
) : 1);
2682 return (size
+ regsize
- 1) / regsize
;
2685 /* Implement TARGET_STRICT_ARGUMENT_NAMING.
2687 Return true if the location where a function argument is passed
2688 depends on whether or not it is a named argument
2690 For gcn, we know how to handle functions declared as stdarg: by
2691 passing an extra pointer to the unnamed arguments. However, the
2692 Fortran frontend can produce a different situation, where a
2693 function pointer is declared with no arguments, but the actual
2694 function and calls to it take more arguments. In that case, we
2695 want to ensure the call matches the definition of the function. */
2698 gcn_strict_argument_naming (cumulative_args_t cum_v
)
2700 CUMULATIVE_ARGS
*cum
= get_cumulative_args (cum_v
);
2702 return cum
->fntype
== NULL_TREE
|| stdarg_p (cum
->fntype
);
2705 /* Implement TARGET_PRETEND_OUTGOING_VARARGS_NAMED.
2707 See comment on gcn_strict_argument_naming. */
2710 gcn_pretend_outgoing_varargs_named (cumulative_args_t cum_v
)
2712 return !gcn_strict_argument_naming (cum_v
);
2715 /* Implement TARGET_FUNCTION_ARG.
2717 Return an RTX indicating whether a function argument is passed in a register
2718 and if so, which register. */
2721 gcn_function_arg (cumulative_args_t cum_v
, const function_arg_info
&arg
)
2723 CUMULATIVE_ARGS
*cum
= get_cumulative_args (cum_v
);
2724 if (cum
->normal_function
)
2726 if (!arg
.named
|| arg
.end_marker_p ())
2729 if (targetm
.calls
.must_pass_in_stack (arg
))
2732 int first_reg
= (VECTOR_MODE_P (arg
.mode
)
2733 ? FIRST_VPARM_REG
: FIRST_PARM_REG
);
2734 int cum_num
= (VECTOR_MODE_P (arg
.mode
)
2735 ? cum
->vnum
: cum
->num
);
2736 int reg_num
= first_reg
+ cum_num
;
2737 int num_regs
= num_arg_regs (arg
);
2739 while (reg_num
% num_regs
!= 0)
2741 if (reg_num
+ num_regs
<= first_reg
+ NUM_PARM_REGS
)
2742 return gen_rtx_REG (arg
.mode
, reg_num
);
2746 if (cum
->num
>= cum
->args
.nargs
)
2748 cum
->offset
= (cum
->offset
+ TYPE_ALIGN (arg
.type
) / 8 - 1)
2749 & -(TYPE_ALIGN (arg
.type
) / 8);
2750 cfun
->machine
->kernarg_segment_alignment
2751 = MAX ((unsigned) cfun
->machine
->kernarg_segment_alignment
,
2752 TYPE_ALIGN (arg
.type
) / 8);
2753 rtx addr
= gen_rtx_REG (DImode
,
2754 cum
->args
.reg
[KERNARG_SEGMENT_PTR_ARG
]);
2756 addr
= gen_rtx_PLUS (DImode
, addr
,
2757 gen_int_mode (cum
->offset
, DImode
));
2758 rtx mem
= gen_rtx_MEM (arg
.mode
, addr
);
2759 set_mem_attributes (mem
, arg
.type
, 1);
2760 set_mem_addr_space (mem
, ADDR_SPACE_SCALAR_FLAT
);
2761 MEM_READONLY_P (mem
) = 1;
2765 int a
= cum
->args
.order
[cum
->num
];
2766 if (arg
.mode
!= gcn_kernel_arg_types
[a
].mode
)
2768 error ("wrong type of argument %s", gcn_kernel_arg_types
[a
].name
);
2771 return gen_rtx_REG ((machine_mode
) gcn_kernel_arg_types
[a
].mode
,
2777 /* Implement TARGET_FUNCTION_ARG_ADVANCE.
2779 Updates the summarizer variable pointed to by CUM_V to advance past an
2780 argument in the argument list. */
2783 gcn_function_arg_advance (cumulative_args_t cum_v
,
2784 const function_arg_info
&arg
)
2786 CUMULATIVE_ARGS
*cum
= get_cumulative_args (cum_v
);
2788 if (cum
->normal_function
)
2793 int first_reg
= (VECTOR_MODE_P (arg
.mode
)
2794 ? FIRST_VPARM_REG
: FIRST_PARM_REG
);
2795 int *cum_num
= (VECTOR_MODE_P (arg
.mode
)
2796 ? &cum
->vnum
: &cum
->num
);
2797 int num_regs
= num_arg_regs (arg
);
2799 while ((first_reg
+ *cum_num
) % num_regs
!= 0)
2801 *cum_num
+= num_regs
;
2805 if (cum
->num
< cum
->args
.nargs
)
2809 cum
->offset
+= tree_to_uhwi (TYPE_SIZE_UNIT (arg
.type
));
2810 cfun
->machine
->kernarg_segment_byte_size
= cum
->offset
;
2815 /* Implement TARGET_ARG_PARTIAL_BYTES.
2817 Returns the number of bytes at the beginning of an argument that must be put
2818 in registers. The value must be zero for arguments that are passed entirely
2819 in registers or that are entirely pushed on the stack. */
2822 gcn_arg_partial_bytes (cumulative_args_t cum_v
, const function_arg_info
&arg
)
2824 CUMULATIVE_ARGS
*cum
= get_cumulative_args (cum_v
);
2829 if (targetm
.calls
.must_pass_in_stack (arg
))
2832 int cum_num
= (VECTOR_MODE_P (arg
.mode
) ? cum
->vnum
: cum
->num
);
2833 int regsize
= UNITS_PER_WORD
* (VECTOR_MODE_P (arg
.mode
)
2834 ? GET_MODE_NUNITS (arg
.mode
) : 1);
2836 if (cum_num
>= NUM_PARM_REGS
)
2839 /* If the argument fits entirely in registers, return 0. */
2840 if (cum_num
+ num_arg_regs (arg
) <= NUM_PARM_REGS
)
2843 return (NUM_PARM_REGS
- cum_num
) * regsize
;
2846 /* A normal function which takes a pointer argument may be passed a pointer to
2847 LDS space (via a high-bits-set aperture), and that only works with FLAT
2848 addressing, not GLOBAL. Force FLAT addressing if the function has an
2849 incoming pointer parameter. NOTE: This is a heuristic that works in the
2850 offloading case, but in general, a function might read global pointer
2851 variables, etc. that may refer to LDS space or other special memory areas
2852 not supported by GLOBAL instructions, and then this argument check would not
2856 gcn_detect_incoming_pointer_arg (tree fndecl
)
2858 gcc_assert (cfun
&& cfun
->machine
);
2860 for (tree arg
= TYPE_ARG_TYPES (TREE_TYPE (fndecl
));
2862 arg
= TREE_CHAIN (arg
))
2863 if (POINTER_TYPE_P (TREE_VALUE (arg
)))
2864 cfun
->machine
->use_flat_addressing
= true;
2867 /* Implement INIT_CUMULATIVE_ARGS, via gcn.h.
2869 Initialize a variable CUM of type CUMULATIVE_ARGS for a call to a function
2870 whose data type is FNTYPE. For a library call, FNTYPE is 0. */
2873 gcn_init_cumulative_args (CUMULATIVE_ARGS
*cum
/* Argument info to init */ ,
2874 tree fntype
/* tree ptr for function decl */ ,
2875 rtx libname
/* SYMBOL_REF of library name or 0 */ ,
2876 tree fndecl
, int caller
)
2878 memset (cum
, 0, sizeof (*cum
));
2879 cum
->fntype
= fntype
;
2882 gcc_assert (cfun
&& cfun
->machine
);
2883 cum
->normal_function
= true;
2886 cfun
->machine
->normal_function
= true;
2887 gcn_detect_incoming_pointer_arg (fndecl
);
2893 attr
= lookup_attribute ("amdgpu_hsa_kernel", DECL_ATTRIBUTES (fndecl
));
2894 if (fndecl
&& !attr
)
2895 attr
= lookup_attribute ("amdgpu_hsa_kernel",
2896 TYPE_ATTRIBUTES (TREE_TYPE (fndecl
)));
2897 if (!attr
&& fntype
)
2898 attr
= lookup_attribute ("amdgpu_hsa_kernel", TYPE_ATTRIBUTES (fntype
));
2899 /* Handle main () as kernel, so we can run testsuite.
2900 Handle OpenACC kernels similarly to main. */
2901 if (!attr
&& !caller
&& fndecl
2902 && (MAIN_NAME_P (DECL_NAME (fndecl
))
2903 || lookup_attribute ("omp target entrypoint",
2904 DECL_ATTRIBUTES (fndecl
)) != NULL_TREE
))
2905 gcn_parse_amdgpu_hsa_kernel_attribute (&cum
->args
, NULL_TREE
);
2908 if (!attr
|| caller
)
2910 gcc_assert (cfun
&& cfun
->machine
);
2911 cum
->normal_function
= true;
2913 cfun
->machine
->normal_function
= true;
2915 gcn_parse_amdgpu_hsa_kernel_attribute
2916 (&cum
->args
, attr
? TREE_VALUE (attr
) : NULL_TREE
);
2918 cfun
->machine
->args
= cum
->args
;
2919 if (!caller
&& cfun
->machine
->normal_function
)
2920 gcn_detect_incoming_pointer_arg (fndecl
);
2926 gcn_return_in_memory (const_tree type
, const_tree
ARG_UNUSED (fntype
))
2928 machine_mode mode
= TYPE_MODE (type
);
2929 HOST_WIDE_INT size
= int_size_in_bytes (type
);
2931 if (AGGREGATE_TYPE_P (type
))
2934 if (mode
== BLKmode
)
2937 if ((!VECTOR_TYPE_P (type
) && size
> 2 * UNITS_PER_WORD
)
2938 || size
> 2 * UNITS_PER_WORD
* 64)
2944 /* Implement TARGET_PROMOTE_FUNCTION_MODE.
2946 Return the mode to use for outgoing function arguments. */
2949 gcn_promote_function_mode (const_tree
ARG_UNUSED (type
), machine_mode mode
,
2950 int *ARG_UNUSED (punsignedp
),
2951 const_tree
ARG_UNUSED (funtype
),
2952 int ARG_UNUSED (for_return
))
2954 if (GET_MODE_CLASS (mode
) == MODE_INT
&& GET_MODE_SIZE (mode
) < 4)
2960 /* Implement TARGET_GIMPLIFY_VA_ARG_EXPR.
2962 Derived from hppa_gimplify_va_arg_expr. The generic routine doesn't handle
2963 ARGS_GROW_DOWNWARDS. */
2966 gcn_gimplify_va_arg_expr (tree valist
, tree type
,
2967 gimple_seq
*ARG_UNUSED (pre_p
),
2968 gimple_seq
*ARG_UNUSED (post_p
))
2970 tree ptr
= build_pointer_type (type
);
2975 indirect
= pass_va_arg_by_reference (type
);
2979 ptr
= build_pointer_type (type
);
2981 valist_type
= TREE_TYPE (valist
);
2983 /* Args grow down. Not handled by generic routines. */
2985 u
= fold_convert (sizetype
, size_in_bytes (type
));
2986 u
= fold_build1 (NEGATE_EXPR
, sizetype
, u
);
2987 t
= fold_build_pointer_plus (valist
, u
);
2989 /* Align to 8 byte boundary. */
2991 u
= build_int_cst (TREE_TYPE (t
), -8);
2992 t
= build2 (BIT_AND_EXPR
, TREE_TYPE (t
), t
, u
);
2993 t
= fold_convert (valist_type
, t
);
2995 t
= build2 (MODIFY_EXPR
, valist_type
, valist
, t
);
2997 t
= fold_convert (ptr
, t
);
2998 t
= build_va_arg_indirect_ref (t
);
3001 t
= build_va_arg_indirect_ref (t
);
3006 /* Return 1 if TRAIT NAME is present in the OpenMP context's
3007 device trait set, return 0 if not present in any OpenMP context in the
3008 whole translation unit, or -1 if not present in the current OpenMP context
3009 but might be present in another OpenMP context in the same TU. */
3012 gcn_omp_device_kind_arch_isa (enum omp_device_kind_arch_isa trait
,
3017 case omp_device_kind
:
3018 return strcmp (name
, "gpu") == 0;
3019 case omp_device_arch
:
3020 return strcmp (name
, "amdgcn") == 0 || strcmp (name
, "gcn") == 0;
3021 case omp_device_isa
:
3022 if (strcmp (name
, "fiji") == 0 || strcmp (name
, "gfx803") == 0)
3023 return gcn_arch
== PROCESSOR_FIJI
;
3024 if (strcmp (name
, "gfx900") == 0)
3025 return gcn_arch
== PROCESSOR_VEGA10
;
3026 if (strcmp (name
, "gfx906") == 0)
3027 return gcn_arch
== PROCESSOR_VEGA20
;
3028 if (strcmp (name
, "gfx908") == 0)
3029 return gcn_arch
== PROCESSOR_GFX908
;
3030 if (strcmp (name
, "gfx90a") == 0)
3031 return gcn_arch
== PROCESSOR_GFX90a
;
3038 /* Calculate stack offsets needed to create prologues and epilogues. */
3040 static struct machine_function
*
3041 gcn_compute_frame_offsets (void)
3043 machine_function
*offsets
= cfun
->machine
;
3045 if (reload_completed
)
3048 offsets
->need_frame_pointer
= frame_pointer_needed
;
3050 offsets
->outgoing_args_size
= crtl
->outgoing_args_size
;
3051 offsets
->pretend_size
= crtl
->args
.pretend_args_size
;
3053 offsets
->local_vars
= get_frame_size ();
3055 offsets
->lr_needs_saving
= (!leaf_function_p ()
3056 || df_regs_ever_live_p (LR_REGNUM
)
3057 || df_regs_ever_live_p (LR_REGNUM
+ 1));
3059 offsets
->callee_saves
= offsets
->lr_needs_saving
? 8 : 0;
3061 for (int regno
= 0; regno
< FIRST_PSEUDO_REGISTER
; regno
++)
3062 if ((df_regs_ever_live_p (regno
) && !call_used_or_fixed_reg_p (regno
))
3063 || ((regno
& ~1) == HARD_FRAME_POINTER_REGNUM
3064 && frame_pointer_needed
))
3065 offsets
->callee_saves
+= (VGPR_REGNO_P (regno
) ? 256 : 4);
3067 /* Round up to 64-bit boundary to maintain stack alignment. */
3068 offsets
->callee_saves
= (offsets
->callee_saves
+ 7) & ~7;
3073 /* Insert code into the prologue or epilogue to store or load any
3074 callee-save register to/from the stack.
3076 Helper function for gcn_expand_prologue and gcn_expand_epilogue. */
3079 move_callee_saved_registers (rtx sp
, machine_function
*offsets
,
3082 int regno
, offset
, saved_scalars
;
3083 rtx exec
= gen_rtx_REG (DImode
, EXEC_REG
);
3084 rtx vcc
= gen_rtx_REG (DImode
, VCC_LO_REG
);
3085 rtx offreg
= gen_rtx_REG (SImode
, SGPR_REGNO (22));
3086 rtx as
= gen_rtx_CONST_INT (VOIDmode
, STACK_ADDR_SPACE
);
3087 HOST_WIDE_INT exec_set
= 0;
3089 auto_vec
<int> saved_sgprs
;
3093 /* Move scalars into two vector registers. */
3094 for (regno
= 0, saved_scalars
= 0; regno
< FIRST_VGPR_REG
; regno
++)
3095 if ((df_regs_ever_live_p (regno
) && !call_used_or_fixed_reg_p (regno
))
3096 || ((regno
& ~1) == LINK_REGNUM
&& offsets
->lr_needs_saving
)
3097 || ((regno
& ~1) == HARD_FRAME_POINTER_REGNUM
3098 && offsets
->need_frame_pointer
))
3100 rtx reg
= gen_rtx_REG (SImode
, regno
);
3101 rtx vreg
= gen_rtx_REG (V64SImode
,
3102 VGPR_REGNO (6 + (saved_scalars
/ 64)));
3103 int lane
= saved_scalars
% 64;
3107 emit_insn (gen_vec_setv64si (vreg
, reg
, GEN_INT (lane
)));
3108 saved_sgprs
.safe_push (regno
);
3111 emit_insn (gen_vec_extractv64sisi (reg
, vreg
, GEN_INT (lane
)));
3116 rtx move_scalars
= get_insns ();
3120 /* Ensure that all vector lanes are moved. */
3122 emit_move_insn (exec
, GEN_INT (exec_set
));
3124 /* Set up a vector stack pointer. */
3125 rtx _0_1_2_3
= gen_rtx_REG (V64SImode
, VGPR_REGNO (1));
3126 rtx _0_4_8_12
= gen_rtx_REG (V64SImode
, VGPR_REGNO (3));
3127 emit_insn (gen_ashlv64si3_exec (_0_4_8_12
, _0_1_2_3
, GEN_INT (2),
3128 gcn_gen_undef (V64SImode
), exec
));
3129 rtx vsp
= gen_rtx_REG (V64DImode
, VGPR_REGNO (4));
3130 emit_insn (gen_vec_duplicatev64di_exec (vsp
, sp
, gcn_gen_undef (V64DImode
),
3132 emit_insn (gen_addv64si3_vcc_exec (gcn_operand_part (V64SImode
, vsp
, 0),
3133 gcn_operand_part (V64SImode
, vsp
, 0),
3134 _0_4_8_12
, vcc
, gcn_gen_undef (V64SImode
),
3136 emit_insn (gen_addcv64si3_exec (gcn_operand_part (V64SImode
, vsp
, 1),
3137 gcn_operand_part (V64SImode
, vsp
, 1),
3138 const0_rtx
, vcc
, vcc
,
3139 gcn_gen_undef (V64SImode
), exec
));
3142 for (regno
= FIRST_VGPR_REG
, offset
= 0;
3143 regno
< FIRST_PSEUDO_REGISTER
; regno
++)
3144 if ((df_regs_ever_live_p (regno
) && !call_used_or_fixed_reg_p (regno
))
3145 || (regno
== VGPR_REGNO (6) && saved_scalars
> 0)
3146 || (regno
== VGPR_REGNO (7) && saved_scalars
> 63))
3148 rtx reg
= gen_rtx_REG (V64SImode
, regno
);
3151 if (regno
== VGPR_REGNO (6) && saved_scalars
< 64)
3152 size
= saved_scalars
* 4;
3153 else if (regno
== VGPR_REGNO (7) && saved_scalars
< 128)
3154 size
= (saved_scalars
- 64) * 4;
3156 if (size
!= 256 || exec_set
!= -1)
3158 exec_set
= ((unsigned HOST_WIDE_INT
) 1 << (size
/ 4)) - 1;
3159 emit_move_insn (exec
, gen_int_mode (exec_set
, DImode
));
3164 rtx insn
= emit_insn (gen_scatterv64si_insn_1offset_exec
3165 (vsp
, const0_rtx
, reg
, as
, const0_rtx
,
3168 /* Add CFI metadata. */
3170 if (regno
== VGPR_REGNO (6) || regno
== VGPR_REGNO (7))
3172 int start
= (regno
== VGPR_REGNO (7) ? 64 : 0);
3173 int count
= MIN (saved_scalars
- start
, 64);
3174 int add_lr
= (regno
== VGPR_REGNO (6)
3175 && offsets
->lr_needs_saving
);
3177 rtvec seq
= rtvec_alloc (count
+ add_lr
);
3179 /* Add an REG_FRAME_RELATED_EXPR entry for each scalar
3180 register that was saved in this batch. */
3181 for (int idx
= 0; idx
< count
; idx
++)
3183 int stackaddr
= offset
+ idx
* 4;
3184 rtx dest
= gen_rtx_MEM (SImode
,
3187 GEN_INT (stackaddr
)));
3188 rtx src
= gen_rtx_REG (SImode
, saved_sgprs
[start
+ idx
]);
3189 rtx set
= gen_rtx_SET (dest
, src
);
3190 RTX_FRAME_RELATED_P (set
) = 1;
3191 RTVEC_ELT (seq
, idx
) = set
;
3193 if (saved_sgprs
[start
+ idx
] == LINK_REGNUM
)
3197 /* Add an additional expression for DWARF_LINK_REGISTER if
3198 LINK_REGNUM was saved. */
3201 rtx dest
= gen_rtx_MEM (DImode
,
3205 rtx src
= gen_rtx_REG (DImode
, DWARF_LINK_REGISTER
);
3206 rtx set
= gen_rtx_SET (dest
, src
);
3207 RTX_FRAME_RELATED_P (set
) = 1;
3208 RTVEC_ELT (seq
, count
) = set
;
3211 note
= gen_rtx_SEQUENCE (VOIDmode
, seq
);
3215 rtx dest
= gen_rtx_MEM (V64SImode
,
3216 gen_rtx_PLUS (DImode
, sp
,
3218 rtx src
= gen_rtx_REG (V64SImode
, regno
);
3219 note
= gen_rtx_SET (dest
, src
);
3221 RTX_FRAME_RELATED_P (insn
) = 1;
3222 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
, note
);
3225 emit_insn (gen_gatherv64si_insn_1offset_exec
3226 (reg
, vsp
, const0_rtx
, as
, const0_rtx
,
3227 gcn_gen_undef (V64SImode
), exec
));
3229 /* Move our VSP to the next stack entry. */
3230 if (offreg_set
!= size
)
3233 emit_move_insn (offreg
, GEN_INT (size
));
3238 emit_move_insn (exec
, GEN_INT (exec_set
));
3240 emit_insn (gen_addv64si3_vcc_dup_exec
3241 (gcn_operand_part (V64SImode
, vsp
, 0),
3242 offreg
, gcn_operand_part (V64SImode
, vsp
, 0),
3243 vcc
, gcn_gen_undef (V64SImode
), exec
));
3244 emit_insn (gen_addcv64si3_exec
3245 (gcn_operand_part (V64SImode
, vsp
, 1),
3246 gcn_operand_part (V64SImode
, vsp
, 1),
3247 const0_rtx
, vcc
, vcc
, gcn_gen_undef (V64SImode
), exec
));
3252 rtx move_vectors
= get_insns ();
3257 emit_insn (move_scalars
);
3258 emit_insn (move_vectors
);
3262 emit_insn (move_vectors
);
3263 emit_insn (move_scalars
);
3266 /* This happens when a new register becomes "live" after reload.
3267 Check your splitters! */
3268 gcc_assert (offset
<= offsets
->callee_saves
);
3271 /* Generate prologue. Called from gen_prologue during pro_and_epilogue pass.
3273 For a non-kernel function, the stack layout looks like this (interim),
3277 |__________________| <-- current SP
3279 |__________________|
3281 |__________________|
3283 |__________________| <-- FP/hard FP
3284 | callee-save regs |
3285 |__________________| <-- soft arg pointer
3287 |__________________| <-- incoming SP
3289 lo |..................|
3291 This implies arguments (beyond the first N in registers) must grow
3292 downwards (as, apparently, PA has them do).
3294 For a kernel function we have the simpler:
3297 |__________________| <-- current SP
3299 |__________________|
3301 |__________________|
3303 lo |__________________| <-- FP/hard FP
3308 gcn_expand_prologue ()
3310 machine_function
*offsets
= gcn_compute_frame_offsets ();
3312 if (!cfun
|| !cfun
->machine
|| cfun
->machine
->normal_function
)
3314 rtx sp
= gen_rtx_REG (Pmode
, STACK_POINTER_REGNUM
);
3315 rtx sp_hi
= gcn_operand_part (Pmode
, sp
, 1);
3316 rtx sp_lo
= gcn_operand_part (Pmode
, sp
, 0);
3317 rtx fp
= gen_rtx_REG (Pmode
, HARD_FRAME_POINTER_REGNUM
);
3318 rtx fp_hi
= gcn_operand_part (Pmode
, fp
, 1);
3319 rtx fp_lo
= gcn_operand_part (Pmode
, fp
, 0);
3323 if (offsets
->pretend_size
> 0)
3325 /* FIXME: Do the actual saving of register pretend args to the stack.
3326 Register order needs consideration. */
3329 /* Save callee-save regs. */
3330 move_callee_saved_registers (sp
, offsets
, true);
3332 HOST_WIDE_INT sp_adjust
= offsets
->pretend_size
3333 + offsets
->callee_saves
3334 + offsets
->local_vars
+ offsets
->outgoing_args_size
;
3337 /* Adding RTX_FRAME_RELATED_P effectively disables spliting, so
3338 we use split add explictly, and specify the DImode add in
3340 rtx scc
= gen_rtx_REG (BImode
, SCC_REG
);
3341 rtx adjustment
= gen_int_mode (sp_adjust
, SImode
);
3342 rtx insn
= emit_insn (gen_addsi3_scalar_carry (sp_lo
, sp_lo
,
3344 if (!offsets
->need_frame_pointer
)
3346 RTX_FRAME_RELATED_P (insn
) = 1;
3347 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
,
3349 gen_rtx_PLUS (DImode
, sp
,
3352 emit_insn (gen_addcsi3_scalar_zero (sp_hi
, sp_hi
, scc
));
3355 if (offsets
->need_frame_pointer
)
3357 /* Adding RTX_FRAME_RELATED_P effectively disables spliting, so
3358 we use split add explictly, and specify the DImode add in
3360 rtx scc
= gen_rtx_REG (BImode
, SCC_REG
);
3361 int fp_adjust
= -(offsets
->local_vars
+ offsets
->outgoing_args_size
);
3362 rtx adjustment
= gen_int_mode (fp_adjust
, SImode
);
3363 rtx insn
= emit_insn (gen_addsi3_scalar_carry(fp_lo
, sp_lo
,
3365 emit_insn (gen_addcsi3_scalar (fp_hi
, sp_hi
,
3366 (fp_adjust
< 0 ? GEN_INT (-1)
3370 /* Set the CFA to the entry stack address, as an offset from the
3371 frame pointer. This is preferred because the frame pointer is
3372 saved in each frame, whereas the stack pointer is not. */
3373 RTX_FRAME_RELATED_P (insn
) = 1;
3374 add_reg_note (insn
, REG_CFA_DEF_CFA
,
3375 gen_rtx_PLUS (DImode
, fp
,
3376 GEN_INT (-(offsets
->pretend_size
3377 + offsets
->callee_saves
))));
3380 rtx_insn
*seq
= get_insns ();
3387 if (TARGET_PACKED_WORK_ITEMS
)
3389 /* v0 conatins the X, Y and Z dimensions all in one.
3390 Expand them out for ABI compatibility. */
3391 /* TODO: implement and use zero_extract. */
3392 rtx v1
= gen_rtx_REG (V64SImode
, VGPR_REGNO (1));
3393 emit_insn (gen_andv64si3 (v1
, gen_rtx_REG (V64SImode
, VGPR_REGNO (0)),
3394 gen_rtx_CONST_INT (VOIDmode
, 0x3FF << 10)));
3395 emit_insn (gen_lshrv64si3 (v1
, v1
, gen_rtx_CONST_INT (VOIDmode
, 10)));
3396 emit_insn (gen_prologue_use (v1
));
3398 rtx v2
= gen_rtx_REG (V64SImode
, VGPR_REGNO (2));
3399 emit_insn (gen_andv64si3 (v2
, gen_rtx_REG (V64SImode
, VGPR_REGNO (0)),
3400 gen_rtx_CONST_INT (VOIDmode
, 0x3FF << 20)));
3401 emit_insn (gen_lshrv64si3 (v2
, v2
, gen_rtx_CONST_INT (VOIDmode
, 20)));
3402 emit_insn (gen_prologue_use (v2
));
3405 /* We no longer use the private segment for the stack (it's not
3406 accessible to reverse offload), so we must calculate a wave offset
3407 from the grid dimensions and stack size, which is calculated on the
3408 host, and passed in the kernargs region.
3409 See libgomp-gcn.h for details. */
3410 rtx wave_offset
= gen_rtx_REG (SImode
, FIRST_PARM_REG
);
3412 rtx num_waves_mem
= gcn_oacc_dim_size (1);
3413 rtx num_waves
= gen_rtx_REG (SImode
, FIRST_PARM_REG
+1);
3414 set_mem_addr_space (num_waves_mem
, ADDR_SPACE_SCALAR_FLAT
);
3415 emit_move_insn (num_waves
, num_waves_mem
);
3417 rtx workgroup_num
= gcn_oacc_dim_pos (0);
3418 rtx wave_num
= gen_rtx_REG (SImode
, FIRST_PARM_REG
+2);
3419 emit_move_insn(wave_num
, gcn_oacc_dim_pos (1));
3421 rtx thread_id
= gen_rtx_REG (SImode
, FIRST_PARM_REG
+3);
3422 emit_insn (gen_mulsi3 (thread_id
, num_waves
, workgroup_num
));
3423 emit_insn (gen_addsi3_scc (thread_id
, thread_id
, wave_num
));
3425 rtx kernarg_reg
= gen_rtx_REG (DImode
, cfun
->machine
->args
.reg
3426 [KERNARG_SEGMENT_PTR_ARG
]);
3427 rtx stack_size_mem
= gen_rtx_MEM (SImode
,
3428 gen_rtx_PLUS (DImode
, kernarg_reg
,
3430 set_mem_addr_space (stack_size_mem
, ADDR_SPACE_SCALAR_FLAT
);
3431 emit_move_insn (wave_offset
, stack_size_mem
);
3433 emit_insn (gen_mulsi3 (wave_offset
, wave_offset
, thread_id
));
3435 /* The FLAT_SCRATCH_INIT is not usually needed, but can be enabled
3436 via the function attributes. */
3437 if (cfun
->machine
->args
.requested
& (1 << FLAT_SCRATCH_INIT_ARG
))
3440 gen_rtx_REG (SImode
,
3441 cfun
->machine
->args
.reg
[FLAT_SCRATCH_INIT_ARG
]);
3443 gen_rtx_REG (SImode
,
3444 cfun
->machine
->args
.reg
[FLAT_SCRATCH_INIT_ARG
] + 1);
3445 rtx fs_reg_lo
= gen_rtx_REG (SImode
, FLAT_SCRATCH_REG
);
3446 rtx fs_reg_hi
= gen_rtx_REG (SImode
, FLAT_SCRATCH_REG
+ 1);
3448 /*rtx queue = gen_rtx_REG(DImode,
3449 cfun->machine->args.reg[QUEUE_PTR_ARG]);
3450 rtx aperture = gen_rtx_MEM (SImode,
3451 gen_rtx_PLUS (DImode, queue,
3452 gen_int_mode (68, SImode)));
3453 set_mem_addr_space (aperture, ADDR_SPACE_SCALAR_FLAT);*/
3455 /* Set up flat_scratch. */
3456 emit_insn (gen_addsi3_scc (fs_reg_hi
, fs_init_lo
, wave_offset
));
3457 emit_insn (gen_lshrsi3_scc (fs_reg_hi
, fs_reg_hi
,
3458 gen_int_mode (8, SImode
)));
3459 emit_move_insn (fs_reg_lo
, fs_init_hi
);
3462 /* Set up frame pointer and stack pointer. */
3463 rtx sp
= gen_rtx_REG (DImode
, STACK_POINTER_REGNUM
);
3464 rtx sp_hi
= simplify_gen_subreg (SImode
, sp
, DImode
, 4);
3465 rtx sp_lo
= simplify_gen_subreg (SImode
, sp
, DImode
, 0);
3466 rtx fp
= gen_rtx_REG (DImode
, HARD_FRAME_POINTER_REGNUM
);
3467 rtx fp_hi
= simplify_gen_subreg (SImode
, fp
, DImode
, 4);
3468 rtx fp_lo
= simplify_gen_subreg (SImode
, fp
, DImode
, 0);
3470 HOST_WIDE_INT sp_adjust
= (offsets
->local_vars
3471 + offsets
->outgoing_args_size
);
3473 /* Initialize FP and SP from space allocated on the host. */
3474 rtx stack_addr_mem
= gen_rtx_MEM (DImode
,
3475 gen_rtx_PLUS (DImode
, kernarg_reg
,
3477 set_mem_addr_space (stack_addr_mem
, ADDR_SPACE_SCALAR_FLAT
);
3478 emit_move_insn (fp
, stack_addr_mem
);
3479 rtx scc
= gen_rtx_REG (BImode
, SCC_REG
);
3480 emit_insn (gen_addsi3_scalar_carry (fp_lo
, fp_lo
, wave_offset
, scc
));
3481 emit_insn (gen_addcsi3_scalar_zero (fp_hi
, fp_hi
, scc
));
3483 /* Adding RTX_FRAME_RELATED_P effectively disables spliting, so we use
3484 split add explictly, and specify the DImode add in the note.
3485 The DWARF info expects that the callee-save data is in the frame,
3486 even though it isn't (because this is the entry point), so we
3487 make a notional adjustment to the DWARF frame offset here. */
3488 rtx dbg_adjustment
= gen_int_mode (sp_adjust
+ offsets
->callee_saves
,
3493 rtx scc
= gen_rtx_REG (BImode
, SCC_REG
);
3494 rtx adjustment
= gen_int_mode (sp_adjust
, DImode
);
3495 insn
= emit_insn (gen_addsi3_scalar_carry(sp_lo
, fp_lo
, adjustment
,
3497 emit_insn (gen_addcsi3_scalar_zero (sp_hi
, fp_hi
, scc
));
3500 insn
= emit_move_insn (sp
, fp
);
3501 RTX_FRAME_RELATED_P (insn
) = 1;
3502 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
,
3503 gen_rtx_SET (sp
, gen_rtx_PLUS (DImode
, sp
,
3506 if (offsets
->need_frame_pointer
)
3508 /* Set the CFA to the entry stack address, as an offset from the
3509 frame pointer. This is necessary when alloca is used, and
3510 harmless otherwise. */
3511 rtx neg_adjust
= gen_int_mode (-offsets
->callee_saves
, DImode
);
3512 add_reg_note (insn
, REG_CFA_DEF_CFA
,
3513 gen_rtx_PLUS (DImode
, fp
, neg_adjust
));
3516 /* Make sure the flat scratch reg doesn't get optimised away. */
3517 emit_insn (gen_prologue_use (gen_rtx_REG (DImode
, FLAT_SCRATCH_REG
)));
3520 /* Ensure that the scheduler doesn't do anything unexpected. */
3521 emit_insn (gen_blockage ());
3523 if (TARGET_M0_LDS_LIMIT
)
3525 /* m0 is initialized for the usual LDS DS and FLAT memory case.
3526 The low-part is the address of the topmost addressable byte, which is
3527 size-1. The high-part is an offset and should be zero. */
3528 emit_move_insn (gen_rtx_REG (SImode
, M0_REG
),
3529 gen_int_mode (LDS_SIZE
, SImode
));
3531 emit_insn (gen_prologue_use (gen_rtx_REG (SImode
, M0_REG
)));
3534 if (cfun
&& cfun
->machine
&& !cfun
->machine
->normal_function
&& flag_openmp
)
3536 /* OpenMP kernels have an implicit call to gomp_gcn_enter_kernel. */
3537 rtx fn_reg
= gen_rtx_REG (Pmode
, FIRST_PARM_REG
);
3538 emit_move_insn (fn_reg
, gen_rtx_SYMBOL_REF (Pmode
,
3539 "gomp_gcn_enter_kernel"));
3540 emit_call_insn (gen_gcn_indirect_call (fn_reg
, const0_rtx
));
3544 /* Generate epilogue. Called from gen_epilogue during pro_and_epilogue pass.
3546 See gcn_expand_prologue for stack details. */
3549 gcn_expand_epilogue (void)
3551 /* Ensure that the scheduler doesn't do anything unexpected. */
3552 emit_insn (gen_blockage ());
3554 if (!cfun
|| !cfun
->machine
|| cfun
->machine
->normal_function
)
3556 machine_function
*offsets
= gcn_compute_frame_offsets ();
3557 rtx sp
= gen_rtx_REG (Pmode
, STACK_POINTER_REGNUM
);
3558 rtx fp
= gen_rtx_REG (Pmode
, HARD_FRAME_POINTER_REGNUM
);
3560 HOST_WIDE_INT sp_adjust
= offsets
->callee_saves
+ offsets
->pretend_size
;
3562 if (offsets
->need_frame_pointer
)
3564 /* Restore old SP from the frame pointer. */
3566 emit_insn (gen_subdi3 (sp
, fp
, gen_int_mode (sp_adjust
, DImode
)));
3568 emit_move_insn (sp
, fp
);
3572 /* Restore old SP from current SP. */
3573 sp_adjust
+= offsets
->outgoing_args_size
+ offsets
->local_vars
;
3576 emit_insn (gen_subdi3 (sp
, sp
, gen_int_mode (sp_adjust
, DImode
)));
3579 move_callee_saved_registers (sp
, offsets
, false);
3581 /* There's no explicit use of the link register on the return insn. Emit
3582 one here instead. */
3583 if (offsets
->lr_needs_saving
)
3584 emit_use (gen_rtx_REG (DImode
, LINK_REGNUM
));
3586 /* Similar for frame pointer. */
3587 if (offsets
->need_frame_pointer
)
3588 emit_use (gen_rtx_REG (DImode
, HARD_FRAME_POINTER_REGNUM
));
3590 else if (flag_openmp
)
3592 /* OpenMP kernels have an implicit call to gomp_gcn_exit_kernel. */
3593 rtx fn_reg
= gen_rtx_REG (Pmode
, FIRST_PARM_REG
);
3594 emit_move_insn (fn_reg
,
3595 gen_rtx_SYMBOL_REF (Pmode
, "gomp_gcn_exit_kernel"));
3596 emit_call_insn (gen_gcn_indirect_call (fn_reg
, const0_rtx
));
3598 else if (TREE_CODE (TREE_TYPE (DECL_RESULT (cfun
->decl
))) != VOID_TYPE
)
3600 /* Assume that an exit value compatible with gcn-run is expected.
3601 That is, the third input parameter is an int*.
3603 We can't allocate any new registers, but the kernarg_reg is
3604 dead after this, so we'll use that. */
3605 rtx kernarg_reg
= gen_rtx_REG (DImode
, cfun
->machine
->args
.reg
3606 [KERNARG_SEGMENT_PTR_ARG
]);
3607 rtx retptr_mem
= gen_rtx_MEM (DImode
,
3608 gen_rtx_PLUS (DImode
, kernarg_reg
,
3610 set_mem_addr_space (retptr_mem
, ADDR_SPACE_SCALAR_FLAT
);
3611 emit_move_insn (kernarg_reg
, retptr_mem
);
3613 rtx retval_mem
= gen_rtx_MEM (SImode
, kernarg_reg
);
3614 rtx scalar_retval
= gen_rtx_REG (SImode
, FIRST_PARM_REG
);
3615 set_mem_addr_space (retval_mem
, ADDR_SPACE_SCALAR_FLAT
);
3616 emit_move_insn (scalar_retval
, gen_rtx_REG (SImode
, RETURN_VALUE_REG
));
3617 emit_move_insn (retval_mem
, scalar_retval
);
3620 emit_jump_insn (gen_gcn_return ());
3623 /* Implement TARGET_FRAME_POINTER_REQUIRED.
3625 Return true if the frame pointer should not be eliminated. */
3628 gcn_frame_pointer_rqd (void)
3630 /* GDB needs the frame pointer in order to unwind properly,
3631 but that's not important for the entry point, unless alloca is used.
3632 It's not important for code execution, so we should repect the
3633 -fomit-frame-pointer flag. */
3634 return (!flag_omit_frame_pointer
3636 && (cfun
->calls_alloca
3637 || (cfun
->machine
&& cfun
->machine
->normal_function
)));
3640 /* Implement TARGET_CAN_ELIMINATE.
3642 Return true if the compiler is allowed to try to replace register number
3643 FROM_REG with register number TO_REG.
3645 FIXME: is the default "true" not enough? Should this be a negative set? */
3648 gcn_can_eliminate_p (int /*from_reg */ , int to_reg
)
3650 return (to_reg
== HARD_FRAME_POINTER_REGNUM
3651 || to_reg
== STACK_POINTER_REGNUM
);
3654 /* Implement INITIAL_ELIMINATION_OFFSET.
3656 Returns the initial difference between the specified pair of registers, in
3657 terms of stack position. */
3660 gcn_initial_elimination_offset (int from
, int to
)
3662 machine_function
*offsets
= gcn_compute_frame_offsets ();
3666 case ARG_POINTER_REGNUM
:
3667 if (to
== STACK_POINTER_REGNUM
)
3668 return -(offsets
->callee_saves
+ offsets
->local_vars
3669 + offsets
->outgoing_args_size
);
3670 else if (to
== FRAME_POINTER_REGNUM
|| to
== HARD_FRAME_POINTER_REGNUM
)
3671 return -offsets
->callee_saves
;
3676 case FRAME_POINTER_REGNUM
:
3677 if (to
== STACK_POINTER_REGNUM
)
3678 return -(offsets
->local_vars
+ offsets
->outgoing_args_size
);
3679 else if (to
== HARD_FRAME_POINTER_REGNUM
)
3690 /* Implement HARD_REGNO_RENAME_OK.
3692 Return true if it is permissible to rename a hard register from
3693 FROM_REG to TO_REG. */
3696 gcn_hard_regno_rename_ok (unsigned int from_reg
, unsigned int to_reg
)
3698 if (from_reg
== SCC_REG
3699 || from_reg
== VCC_LO_REG
|| from_reg
== VCC_HI_REG
3700 || from_reg
== EXEC_LO_REG
|| from_reg
== EXEC_HI_REG
3701 || to_reg
== SCC_REG
3702 || to_reg
== VCC_LO_REG
|| to_reg
== VCC_HI_REG
3703 || to_reg
== EXEC_LO_REG
|| to_reg
== EXEC_HI_REG
)
3706 /* Allow the link register to be used if it was saved. */
3707 if ((to_reg
& ~1) == LINK_REGNUM
)
3708 return !cfun
|| cfun
->machine
->lr_needs_saving
;
3710 /* Allow the registers used for the static chain to be used if the chain is
3711 not in active use. */
3712 if ((to_reg
& ~1) == STATIC_CHAIN_REGNUM
)
3714 || !(cfun
->static_chain_decl
3715 && df_regs_ever_live_p (STATIC_CHAIN_REGNUM
)
3716 && df_regs_ever_live_p (STATIC_CHAIN_REGNUM
+ 1));
3721 /* Implement HARD_REGNO_CALLER_SAVE_MODE.
3723 Which mode is required for saving NREGS of a pseudo-register in
3724 call-clobbered hard register REGNO. */
3727 gcn_hard_regno_caller_save_mode (unsigned int regno
, unsigned int nregs
,
3728 machine_mode regmode
)
3730 machine_mode result
= choose_hard_reg_mode (regno
, nregs
, NULL
);
3732 if (VECTOR_MODE_P (result
) && !VECTOR_MODE_P (regmode
))
3733 result
= (nregs
== 1 ? SImode
: DImode
);
3738 /* Implement TARGET_ASM_TRAMPOLINE_TEMPLATE.
3740 Output assembler code for a block containing the constant parts
3741 of a trampoline, leaving space for the variable parts. */
3744 gcn_asm_trampoline_template (FILE *f
)
3746 /* The source operand of the move instructions must be a 32-bit
3747 constant following the opcode. */
3748 asm_fprintf (f
, "\ts_mov_b32\ts%i, 0xffff\n", STATIC_CHAIN_REGNUM
);
3749 asm_fprintf (f
, "\ts_mov_b32\ts%i, 0xffff\n", STATIC_CHAIN_REGNUM
+ 1);
3750 asm_fprintf (f
, "\ts_mov_b32\ts%i, 0xffff\n", CC_SAVE_REG
);
3751 asm_fprintf (f
, "\ts_mov_b32\ts%i, 0xffff\n", CC_SAVE_REG
+ 1);
3752 asm_fprintf (f
, "\ts_setpc_b64\ts[%i:%i]\n", CC_SAVE_REG
, CC_SAVE_REG
+ 1);
3755 /* Implement TARGET_TRAMPOLINE_INIT.
3757 Emit RTL insns to initialize the variable parts of a trampoline.
3758 FNDECL is the decl of the target address, M_TRAMP is a MEM for
3759 the trampoline, and CHAIN_VALUE is an RTX for the static chain
3760 to be passed to the target function. */
3763 gcn_trampoline_init (rtx m_tramp
, tree fndecl
, rtx chain_value
)
3765 if (TARGET_GCN5_PLUS
)
3766 sorry ("nested function trampolines not supported on GCN5 due to"
3767 " non-executable stacks");
3769 emit_block_move (m_tramp
, assemble_trampoline_template (),
3770 GEN_INT (TRAMPOLINE_SIZE
), BLOCK_OP_NORMAL
);
3772 rtx fnaddr
= XEXP (DECL_RTL (fndecl
), 0);
3773 rtx chain_value_reg
= copy_to_reg (chain_value
);
3774 rtx fnaddr_reg
= copy_to_reg (fnaddr
);
3776 for (int i
= 0; i
< 4; i
++)
3778 rtx mem
= adjust_address (m_tramp
, SImode
, i
* 8 + 4);
3779 rtx reg
= i
< 2 ? chain_value_reg
: fnaddr_reg
;
3780 emit_move_insn (mem
, gen_rtx_SUBREG (SImode
, reg
, (i
% 2) * 4));
3783 rtx tramp_addr
= XEXP (m_tramp
, 0);
3784 emit_insn (gen_clear_icache (tramp_addr
,
3785 plus_constant (ptr_mode
, tramp_addr
,
3789 /* Implement TARGET_EXPAND_DIVMOD_LIBFUNC.
3791 There are divmod libfuncs for all modes except TImode. They return the
3792 two values packed into a larger integer/vector. */
3795 gcn_expand_divmod_libfunc (rtx libfunc
, machine_mode mode
, rtx op0
, rtx op1
,
3796 rtx
*quot
, rtx
*rem
)
3798 machine_mode innermode
= (VECTOR_MODE_P (mode
)
3799 ? GET_MODE_INNER (mode
) : mode
);
3800 machine_mode wideinnermode
= VOIDmode
;
3801 machine_mode widemode
= VOIDmode
;
3808 wideinnermode
= DImode
;
3811 wideinnermode
= TImode
;
3817 if (VECTOR_MODE_P (mode
))
3818 widemode
= VnMODE (GET_MODE_NUNITS (mode
), wideinnermode
);
3820 widemode
= wideinnermode
;
3822 emit_library_call_value (libfunc
, gen_rtx_REG (widemode
, RETURN_VALUE_REG
),
3823 LCT_NORMAL
, widemode
, op0
, mode
, op1
, mode
);
3825 *quot
= gen_rtx_REG (mode
, RETURN_VALUE_REG
);
3826 *rem
= gen_rtx_REG (mode
,
3827 RETURN_VALUE_REG
+ (wideinnermode
== TImode
? 2 : 1));
3831 /* {{{ Miscellaneous. */
3833 /* Implement TARGET_CANNOT_COPY_INSN_P.
3835 Return true if INSN must not be duplicated. */
3838 gcn_cannot_copy_insn_p (rtx_insn
*insn
)
3840 if (recog_memoized (insn
) == CODE_FOR_gcn_wavefront_barrier
)
3846 /* Implement TARGET_DEBUG_UNWIND_INFO.
3848 Defines the mechanism that will be used for describing frame unwind
3849 information to the debugger. */
3851 static enum unwind_info_type
3852 gcn_debug_unwind_info ()
3857 /* Determine if there is a suitable hardware conversion instruction.
3858 Used primarily by the machine description. */
3861 gcn_valid_cvt_p (machine_mode from
, machine_mode to
, enum gcn_cvt_t op
)
3863 if (VECTOR_MODE_P (from
) != VECTOR_MODE_P (to
))
3866 if (VECTOR_MODE_P (from
))
3868 if (GET_MODE_NUNITS (from
) != GET_MODE_NUNITS (to
))
3871 from
= GET_MODE_INNER (from
);
3872 to
= GET_MODE_INNER (to
);
3878 case fixuns_trunc_cvt
:
3879 if (GET_MODE_CLASS (from
) != MODE_FLOAT
3880 || GET_MODE_CLASS (to
) != MODE_INT
)
3885 if (GET_MODE_CLASS (from
) != MODE_INT
3886 || GET_MODE_CLASS (to
) != MODE_FLOAT
)
3890 if (GET_MODE_CLASS (from
) != MODE_FLOAT
3891 || GET_MODE_CLASS (to
) != MODE_FLOAT
3892 || GET_MODE_SIZE (from
) >= GET_MODE_SIZE (to
))
3896 if (GET_MODE_CLASS (from
) != MODE_FLOAT
3897 || GET_MODE_CLASS (to
) != MODE_FLOAT
3898 || GET_MODE_SIZE (from
) <= GET_MODE_SIZE (to
))
3903 return ((to
== HImode
&& from
== HFmode
)
3904 || (to
== SImode
&& (from
== SFmode
|| from
== DFmode
))
3905 || (to
== HFmode
&& (from
== HImode
|| from
== SFmode
))
3906 || (to
== SFmode
&& (from
== SImode
|| from
== HFmode
3908 || (to
== DFmode
&& (from
== SImode
|| from
== SFmode
)));
3911 /* Implement TARGET_EMUTLS_VAR_INIT.
3913 Disable emutls (gthr-gcn.h does not support it, yet). */
3916 gcn_emutls_var_init (tree
, tree decl
, tree
)
3918 sorry_at (DECL_SOURCE_LOCATION (decl
), "TLS is not implemented for GCN.");
3925 /* Implement TARGET_RTX_COSTS.
3927 Compute a (partial) cost for rtx X. Return true if the complete
3928 cost has been computed, and false if subexpressions should be
3929 scanned. In either case, *TOTAL contains the cost result. */
3932 gcn_rtx_costs (rtx x
, machine_mode
, int, int, int *total
, bool)
3934 enum rtx_code code
= GET_CODE (x
);
3941 if (gcn_inline_constant_p (x
))
3943 else if (code
== CONST_INT
3944 && ((unsigned HOST_WIDE_INT
) INTVAL (x
) + 0x8000) < 0x10000)
3946 else if (gcn_constant_p (x
))
3949 *total
= vgpr_vector_mode_p (GET_MODE (x
)) ? 64 : 4;
3962 /* Implement TARGET_MEMORY_MOVE_COST.
3964 Return the cost of moving data of mode M between a
3965 register and memory. A value of 2 is the default; this cost is
3966 relative to those in `REGISTER_MOVE_COST'.
3968 This function is used extensively by register_move_cost that is used to
3969 build tables at startup. Make it inline in this case.
3970 When IN is 2, return maximum of in and out move cost.
3972 If moving between registers and memory is more expensive than
3973 between two registers, you should define this macro to express the
3976 Model also increased moving costs of QImode registers in non
3979 #define LOAD_COST 32
3980 #define STORE_COST 32
3982 gcn_memory_move_cost (machine_mode mode
, reg_class_t regclass
, bool in
)
3984 int nregs
= CEIL (GET_MODE_SIZE (mode
), 4);
3987 case SCC_CONDITIONAL_REG
:
3988 case VCCZ_CONDITIONAL_REG
:
3989 case VCC_CONDITIONAL_REG
:
3990 case EXECZ_CONDITIONAL_REG
:
3991 case ALL_CONDITIONAL_REGS
:
3993 case SGPR_EXEC_REGS
:
3995 case SGPR_VOP_SRC_REGS
:
3996 case SGPR_MEM_SRC_REGS
:
4002 return (STORE_COST
+ 2) * nregs
;
4003 return LOAD_COST
* nregs
;
4006 return (LOAD_COST
+ 2) * nregs
;
4007 return STORE_COST
* nregs
;
4012 return (LOAD_COST
+ 2) * nregs
;
4013 return (STORE_COST
+ 2) * nregs
;
4019 /* Implement TARGET_REGISTER_MOVE_COST.
4021 Return the cost of moving data from a register in class CLASS1 to
4022 one in class CLASS2. Base value is 2. */
4025 gcn_register_move_cost (machine_mode
, reg_class_t dst
, reg_class_t src
)
4027 /* Increase cost of moving from and to vector registers. While this is
4028 fast in hardware (I think), it has hidden cost of setting up the exec
4030 if ((src
< VGPR_REGS
) != (dst
< VGPR_REGS
))
4038 /* Type codes used by GCN built-in definitions. */
4040 enum gcn_builtin_type_index
4042 GCN_BTI_END_OF_PARAMS
,
4062 GCN_BTI_LDS_VOIDPTR
,
4067 static GTY(()) tree gcn_builtin_types
[GCN_BTI_MAX
];
4069 #define exec_type_node (gcn_builtin_types[GCN_BTI_EXEC])
4070 #define sf_type_node (gcn_builtin_types[GCN_BTI_SF])
4071 #define v64si_type_node (gcn_builtin_types[GCN_BTI_V64SI])
4072 #define v64sf_type_node (gcn_builtin_types[GCN_BTI_V64SF])
4073 #define v64df_type_node (gcn_builtin_types[GCN_BTI_V64DF])
4074 #define v64ptr_type_node (gcn_builtin_types[GCN_BTI_V64PTR])
4075 #define siptr_type_node (gcn_builtin_types[GCN_BTI_SIPTR])
4076 #define sfptr_type_node (gcn_builtin_types[GCN_BTI_SFPTR])
4077 #define voidptr_type_node (gcn_builtin_types[GCN_BTI_VOIDPTR])
4078 #define size_t_type_node (gcn_builtin_types[GCN_BTI_SIZE_T])
4080 static rtx
gcn_expand_builtin_1 (tree
, rtx
, rtx
, machine_mode
, int,
4081 struct gcn_builtin_description
*);
4082 static rtx
gcn_expand_builtin_binop (tree
, rtx
, rtx
, machine_mode
, int,
4083 struct gcn_builtin_description
*);
4085 struct gcn_builtin_description
;
4086 typedef rtx (*gcn_builtin_expander
) (tree
, rtx
, rtx
, machine_mode
, int,
4087 struct gcn_builtin_description
*);
4089 enum gcn_builtin_type
4091 B_UNIMPLEMENTED
, /* Sorry out */
4092 B_INSN
, /* Emit a pattern */
4093 B_OVERLOAD
/* Placeholder for an overloaded function */
4096 struct gcn_builtin_description
4101 enum gcn_builtin_type type
;
4102 /* The first element of parm is always the return type. The rest
4103 are a zero terminated list of parameters. */
4105 gcn_builtin_expander expander
;
4108 /* Read in the GCN builtins from gcn-builtins.def. */
4110 extern GTY(()) struct gcn_builtin_description gcn_builtins
[GCN_BUILTIN_MAX
];
4112 struct gcn_builtin_description gcn_builtins
[] = {
4113 #define DEF_BUILTIN(fcode, icode, name, type, params, expander) \
4114 {GCN_BUILTIN_ ## fcode, icode, name, type, params, expander},
4116 #define DEF_BUILTIN_BINOP_INT_FP(fcode, ic, name) \
4117 {GCN_BUILTIN_ ## fcode ## _V64SI, \
4118 CODE_FOR_ ## ic ##v64si3_exec, name "_v64int", B_INSN, \
4119 {GCN_BTI_V64SI, GCN_BTI_EXEC, GCN_BTI_V64SI, GCN_BTI_V64SI, \
4120 GCN_BTI_V64SI, GCN_BTI_END_OF_PARAMS}, gcn_expand_builtin_binop}, \
4121 {GCN_BUILTIN_ ## fcode ## _V64SI_unspec, \
4122 CODE_FOR_ ## ic ##v64si3_exec, name "_v64int_unspec", B_INSN, \
4123 {GCN_BTI_V64SI, GCN_BTI_EXEC, GCN_BTI_V64SI, GCN_BTI_V64SI, \
4124 GCN_BTI_END_OF_PARAMS}, gcn_expand_builtin_binop},
4126 #include "gcn-builtins.def"
4127 #undef DEF_BUILTIN_BINOP_INT_FP
4131 static GTY(()) tree gcn_builtin_decls
[GCN_BUILTIN_MAX
];
4133 /* Implement TARGET_BUILTIN_DECL.
4135 Return the GCN builtin for CODE. */
4138 gcn_builtin_decl (unsigned code
, bool ARG_UNUSED (initialize_p
))
4140 if (code
>= GCN_BUILTIN_MAX
)
4141 return error_mark_node
;
4143 return gcn_builtin_decls
[code
];
4146 /* Helper function for gcn_init_builtins. */
4149 gcn_init_builtin_types (void)
4151 gcn_builtin_types
[GCN_BTI_VOID
] = void_type_node
;
4152 gcn_builtin_types
[GCN_BTI_BOOL
] = boolean_type_node
;
4153 gcn_builtin_types
[GCN_BTI_INT
] = intSI_type_node
;
4154 gcn_builtin_types
[GCN_BTI_UINT
] = unsigned_type_for (intSI_type_node
);
4155 gcn_builtin_types
[GCN_BTI_SIZE_T
] = size_type_node
;
4156 gcn_builtin_types
[GCN_BTI_LLINT
] = intDI_type_node
;
4157 gcn_builtin_types
[GCN_BTI_LLUINT
] = unsigned_type_for (intDI_type_node
);
4159 exec_type_node
= unsigned_intDI_type_node
;
4160 sf_type_node
= float32_type_node
;
4161 v64si_type_node
= build_vector_type (intSI_type_node
, 64);
4162 v64sf_type_node
= build_vector_type (float_type_node
, 64);
4163 v64df_type_node
= build_vector_type (double_type_node
, 64);
4164 v64ptr_type_node
= build_vector_type (unsigned_intDI_type_node
4165 /*build_pointer_type
4166 (integer_type_node) */
4168 tree tmp
= build_distinct_type_copy (intSI_type_node
);
4169 TYPE_ADDR_SPACE (tmp
) = ADDR_SPACE_DEFAULT
;
4170 siptr_type_node
= build_pointer_type (tmp
);
4172 tmp
= build_distinct_type_copy (float_type_node
);
4173 TYPE_ADDR_SPACE (tmp
) = ADDR_SPACE_DEFAULT
;
4174 sfptr_type_node
= build_pointer_type (tmp
);
4176 tmp
= build_distinct_type_copy (void_type_node
);
4177 TYPE_ADDR_SPACE (tmp
) = ADDR_SPACE_DEFAULT
;
4178 voidptr_type_node
= build_pointer_type (tmp
);
4180 tmp
= build_distinct_type_copy (void_type_node
);
4181 TYPE_ADDR_SPACE (tmp
) = ADDR_SPACE_LDS
;
4182 gcn_builtin_types
[GCN_BTI_LDS_VOIDPTR
] = build_pointer_type (tmp
);
4185 /* Implement TARGET_INIT_BUILTINS.
4187 Set up all builtin functions for this target. */
4190 gcn_init_builtins (void)
4192 gcn_init_builtin_types ();
4194 struct gcn_builtin_description
*d
;
4196 for (i
= 0, d
= gcn_builtins
; i
< GCN_BUILTIN_MAX
; i
++, d
++)
4199 char name
[64]; /* build_function will make a copy. */
4202 /* FIXME: Is this necessary/useful? */
4206 /* Find last parm. */
4207 for (parm
= 1; d
->parm
[parm
] != GCN_BTI_END_OF_PARAMS
; parm
++)
4212 p
= tree_cons (NULL_TREE
, gcn_builtin_types
[d
->parm
[--parm
]], p
);
4214 p
= build_function_type (gcn_builtin_types
[d
->parm
[0]], p
);
4216 sprintf (name
, "__builtin_gcn_%s", d
->name
);
4217 gcn_builtin_decls
[i
]
4218 = add_builtin_function (name
, p
, i
, BUILT_IN_MD
, NULL
, NULL_TREE
);
4220 /* These builtins don't throw. */
4221 TREE_NOTHROW (gcn_builtin_decls
[i
]) = 1;
4224 /* These builtins need to take/return an LDS pointer: override the generic
4227 set_builtin_decl (BUILT_IN_GOACC_SINGLE_START
,
4228 gcn_builtin_decls
[GCN_BUILTIN_ACC_SINGLE_START
], false);
4230 set_builtin_decl (BUILT_IN_GOACC_SINGLE_COPY_START
,
4231 gcn_builtin_decls
[GCN_BUILTIN_ACC_SINGLE_COPY_START
],
4234 set_builtin_decl (BUILT_IN_GOACC_SINGLE_COPY_END
,
4235 gcn_builtin_decls
[GCN_BUILTIN_ACC_SINGLE_COPY_END
],
4238 set_builtin_decl (BUILT_IN_GOACC_BARRIER
,
4239 gcn_builtin_decls
[GCN_BUILTIN_ACC_BARRIER
], false);
4242 /* Implement TARGET_INIT_LIBFUNCS. */
4245 gcn_init_libfuncs (void)
4247 /* BITS_PER_UNIT * 2 is 64 bits, which causes
4248 optabs-libfuncs.cc:gen_int_libfunc to omit TImode (i.e 128 bits)
4249 libcalls that we need to support operations for that type. Initialise
4250 them here instead. */
4251 set_optab_libfunc (udiv_optab
, TImode
, "__udivti3");
4252 set_optab_libfunc (umod_optab
, TImode
, "__umodti3");
4253 set_optab_libfunc (sdiv_optab
, TImode
, "__divti3");
4254 set_optab_libfunc (smod_optab
, TImode
, "__modti3");
4255 set_optab_libfunc (smul_optab
, TImode
, "__multi3");
4256 set_optab_libfunc (addv_optab
, TImode
, "__addvti3");
4257 set_optab_libfunc (subv_optab
, TImode
, "__subvti3");
4258 set_optab_libfunc (negv_optab
, TImode
, "__negvti2");
4259 set_optab_libfunc (absv_optab
, TImode
, "__absvti2");
4260 set_optab_libfunc (smulv_optab
, TImode
, "__mulvti3");
4261 set_optab_libfunc (ffs_optab
, TImode
, "__ffsti2");
4262 set_optab_libfunc (clz_optab
, TImode
, "__clzti2");
4263 set_optab_libfunc (ctz_optab
, TImode
, "__ctzti2");
4264 set_optab_libfunc (clrsb_optab
, TImode
, "__clrsbti2");
4265 set_optab_libfunc (popcount_optab
, TImode
, "__popcountti2");
4266 set_optab_libfunc (parity_optab
, TImode
, "__parityti2");
4267 set_optab_libfunc (bswap_optab
, TImode
, "__bswapti2");
4269 set_optab_libfunc (sdivmod_optab
, SImode
, "__divmodsi4");
4270 set_optab_libfunc (udivmod_optab
, SImode
, "__udivmodsi4");
4271 set_optab_libfunc (sdivmod_optab
, DImode
, "__divmoddi4");
4272 set_optab_libfunc (udivmod_optab
, DImode
, "__udivmoddi4");
4274 set_optab_libfunc (sdiv_optab
, V2QImode
, "__divv2qi3");
4275 set_optab_libfunc (udiv_optab
, V2QImode
, "__udivv2qi3");
4276 set_optab_libfunc (smod_optab
, V2QImode
, "__modv2qi3");
4277 set_optab_libfunc (umod_optab
, V2QImode
, "__umodv2qi3");
4279 set_optab_libfunc (sdivmod_optab
, V2QImode
, "__divmodv2qi4");
4280 set_optab_libfunc (udivmod_optab
, V2QImode
, "__udivmodv2qi4");
4282 set_optab_libfunc (sdiv_optab
, V4QImode
, "__divv4qi3");
4283 set_optab_libfunc (udiv_optab
, V4QImode
, "__udivv4qi3");
4284 set_optab_libfunc (smod_optab
, V4QImode
, "__modv4qi3");
4285 set_optab_libfunc (umod_optab
, V4QImode
, "__umodv4qi3");
4287 set_optab_libfunc (sdivmod_optab
, V4QImode
, "__divmodv4qi4");
4288 set_optab_libfunc (udivmod_optab
, V4QImode
, "__udivmodv4qi4");
4290 set_optab_libfunc (sdiv_optab
, V8QImode
, "__divv8qi3");
4291 set_optab_libfunc (udiv_optab
, V8QImode
, "__udivv8qi3");
4292 set_optab_libfunc (smod_optab
, V8QImode
, "__modv8qi3");
4293 set_optab_libfunc (umod_optab
, V8QImode
, "__umodv8qi3");
4295 set_optab_libfunc (sdivmod_optab
, V8QImode
, "__divmodv8qi4");
4296 set_optab_libfunc (udivmod_optab
, V8QImode
, "__udivmodv8qi4");
4298 set_optab_libfunc (sdiv_optab
, V16QImode
, "__divv16qi3");
4299 set_optab_libfunc (udiv_optab
, V16QImode
, "__udivv16qi3");
4300 set_optab_libfunc (smod_optab
, V16QImode
, "__modv16qi3");
4301 set_optab_libfunc (umod_optab
, V16QImode
, "__umodv16qi3");
4303 set_optab_libfunc (sdivmod_optab
, V16QImode
, "__divmodv16qi4");
4304 set_optab_libfunc (udivmod_optab
, V16QImode
, "__udivmodv16qi4");
4306 set_optab_libfunc (sdiv_optab
, V32QImode
, "__divv32qi3");
4307 set_optab_libfunc (udiv_optab
, V32QImode
, "__udivv32qi3");
4308 set_optab_libfunc (smod_optab
, V32QImode
, "__modv32qi3");
4309 set_optab_libfunc (umod_optab
, V32QImode
, "__umodv32qi3");
4311 set_optab_libfunc (sdivmod_optab
, V32QImode
, "__divmodv32qi4");
4312 set_optab_libfunc (udivmod_optab
, V32QImode
, "__udivmodv32qi4");
4314 set_optab_libfunc (sdiv_optab
, V64QImode
, "__divv64qi3");
4315 set_optab_libfunc (udiv_optab
, V64QImode
, "__udivv64qi3");
4316 set_optab_libfunc (smod_optab
, V64QImode
, "__modv64qi3");
4317 set_optab_libfunc (umod_optab
, V64QImode
, "__umodv64qi3");
4319 set_optab_libfunc (sdivmod_optab
, V64QImode
, "__divmodv64qi4");
4320 set_optab_libfunc (udivmod_optab
, V64QImode
, "__udivmodv64qi4");
4323 set_optab_libfunc (sdiv_optab
, V2HImode
, "__divv2hi3");
4324 set_optab_libfunc (udiv_optab
, V2HImode
, "__udivv2hi3");
4325 set_optab_libfunc (smod_optab
, V2HImode
, "__modv2hi3");
4326 set_optab_libfunc (umod_optab
, V2HImode
, "__umodv2hi3");
4328 set_optab_libfunc (sdivmod_optab
, V2HImode
, "__divmodv2hi4");
4329 set_optab_libfunc (udivmod_optab
, V2HImode
, "__udivmodv2hi4");
4331 set_optab_libfunc (sdiv_optab
, V4HImode
, "__divv4hi3");
4332 set_optab_libfunc (udiv_optab
, V4HImode
, "__udivv4hi3");
4333 set_optab_libfunc (smod_optab
, V4HImode
, "__modv4hi3");
4334 set_optab_libfunc (umod_optab
, V4HImode
, "__umodv4hi3");
4336 set_optab_libfunc (sdivmod_optab
, V4HImode
, "__divmodv4hi4");
4337 set_optab_libfunc (udivmod_optab
, V4HImode
, "__udivmodv4hi4");
4339 set_optab_libfunc (sdiv_optab
, V8HImode
, "__divv8hi3");
4340 set_optab_libfunc (udiv_optab
, V8HImode
, "__udivv8hi3");
4341 set_optab_libfunc (smod_optab
, V8HImode
, "__modv8hi3");
4342 set_optab_libfunc (umod_optab
, V8HImode
, "__umodv8hi3");
4344 set_optab_libfunc (sdivmod_optab
, V8HImode
, "__divmodv8hi4");
4345 set_optab_libfunc (udivmod_optab
, V8HImode
, "__udivmodv8hi4");
4347 set_optab_libfunc (sdiv_optab
, V16HImode
, "__divv16hi3");
4348 set_optab_libfunc (udiv_optab
, V16HImode
, "__udivv16hi3");
4349 set_optab_libfunc (smod_optab
, V16HImode
, "__modv16hi3");
4350 set_optab_libfunc (umod_optab
, V16HImode
, "__umodv16hi3");
4352 set_optab_libfunc (sdivmod_optab
, V16HImode
, "__divmodv16hi4");
4353 set_optab_libfunc (udivmod_optab
, V16HImode
, "__udivmodv16hi4");
4355 set_optab_libfunc (sdiv_optab
, V32HImode
, "__divv32hi3");
4356 set_optab_libfunc (udiv_optab
, V32HImode
, "__udivv32hi3");
4357 set_optab_libfunc (smod_optab
, V32HImode
, "__modv32hi3");
4358 set_optab_libfunc (umod_optab
, V32HImode
, "__umodv32hi3");
4360 set_optab_libfunc (sdivmod_optab
, V32HImode
, "__divmodv32hi4");
4361 set_optab_libfunc (udivmod_optab
, V32HImode
, "__udivmodv32hi4");
4363 set_optab_libfunc (sdiv_optab
, V64HImode
, "__divv64hi3");
4364 set_optab_libfunc (udiv_optab
, V64HImode
, "__udivv64hi3");
4365 set_optab_libfunc (smod_optab
, V64HImode
, "__modv64hi3");
4366 set_optab_libfunc (umod_optab
, V64HImode
, "__umodv64hi3");
4368 set_optab_libfunc (sdivmod_optab
, V64HImode
, "__divmodv64hi4");
4369 set_optab_libfunc (udivmod_optab
, V64HImode
, "__udivmodv64hi4");
4372 set_optab_libfunc (sdiv_optab
, V2SImode
, "__divv2si3");
4373 set_optab_libfunc (udiv_optab
, V2SImode
, "__udivv2si3");
4374 set_optab_libfunc (smod_optab
, V2SImode
, "__modv2si3");
4375 set_optab_libfunc (umod_optab
, V2SImode
, "__umodv2si3");
4377 set_optab_libfunc (sdivmod_optab
, V2SImode
, "__divmodv2si4");
4378 set_optab_libfunc (udivmod_optab
, V2SImode
, "__udivmodv2si4");
4380 set_optab_libfunc (sdiv_optab
, V4SImode
, "__divv4si3");
4381 set_optab_libfunc (udiv_optab
, V4SImode
, "__udivv4si3");
4382 set_optab_libfunc (smod_optab
, V4SImode
, "__modv4si3");
4383 set_optab_libfunc (umod_optab
, V4SImode
, "__umodv4si3");
4385 set_optab_libfunc (sdivmod_optab
, V4SImode
, "__divmodv4si4");
4386 set_optab_libfunc (udivmod_optab
, V4SImode
, "__udivmodv4si4");
4388 set_optab_libfunc (sdiv_optab
, V8SImode
, "__divv8si3");
4389 set_optab_libfunc (udiv_optab
, V8SImode
, "__udivv8si3");
4390 set_optab_libfunc (smod_optab
, V8SImode
, "__modv8si3");
4391 set_optab_libfunc (umod_optab
, V8SImode
, "__umodv8si3");
4393 set_optab_libfunc (sdivmod_optab
, V8SImode
, "__divmodv8si4");
4394 set_optab_libfunc (udivmod_optab
, V8SImode
, "__udivmodv8si4");
4396 set_optab_libfunc (sdiv_optab
, V16SImode
, "__divv16si3");
4397 set_optab_libfunc (udiv_optab
, V16SImode
, "__udivv16si3");
4398 set_optab_libfunc (smod_optab
, V16SImode
, "__modv16si3");
4399 set_optab_libfunc (umod_optab
, V16SImode
, "__umodv16si3");
4401 set_optab_libfunc (sdivmod_optab
, V16SImode
, "__divmodv16si4");
4402 set_optab_libfunc (udivmod_optab
, V16SImode
, "__udivmodv16si4");
4404 set_optab_libfunc (sdiv_optab
, V32SImode
, "__divv32si3");
4405 set_optab_libfunc (udiv_optab
, V32SImode
, "__udivv32si3");
4406 set_optab_libfunc (smod_optab
, V32SImode
, "__modv32si3");
4407 set_optab_libfunc (umod_optab
, V32SImode
, "__umodv32si3");
4409 set_optab_libfunc (sdivmod_optab
, V32SImode
, "__divmodv32si4");
4410 set_optab_libfunc (udivmod_optab
, V32SImode
, "__udivmodv32si4");
4412 set_optab_libfunc (sdiv_optab
, V64SImode
, "__divv64si3");
4413 set_optab_libfunc (udiv_optab
, V64SImode
, "__udivv64si3");
4414 set_optab_libfunc (smod_optab
, V64SImode
, "__modv64si3");
4415 set_optab_libfunc (umod_optab
, V64SImode
, "__umodv64si3");
4417 set_optab_libfunc (sdivmod_optab
, V64SImode
, "__divmodv64si4");
4418 set_optab_libfunc (udivmod_optab
, V64SImode
, "__udivmodv64si4");
4421 set_optab_libfunc (sdiv_optab
, V2DImode
, "__divv2di3");
4422 set_optab_libfunc (udiv_optab
, V2DImode
, "__udivv2di3");
4423 set_optab_libfunc (smod_optab
, V2DImode
, "__modv2di3");
4424 set_optab_libfunc (umod_optab
, V2DImode
, "__umodv2di3");
4426 set_optab_libfunc (sdivmod_optab
, V2DImode
, "__divmodv2di4");
4427 set_optab_libfunc (udivmod_optab
, V2DImode
, "__udivmodv2di4");
4429 set_optab_libfunc (sdiv_optab
, V4DImode
, "__divv4di3");
4430 set_optab_libfunc (udiv_optab
, V4DImode
, "__udivv4di3");
4431 set_optab_libfunc (smod_optab
, V4DImode
, "__modv4di3");
4432 set_optab_libfunc (umod_optab
, V4DImode
, "__umodv4di3");
4434 set_optab_libfunc (sdivmod_optab
, V4DImode
, "__divmodv4di4");
4435 set_optab_libfunc (udivmod_optab
, V4DImode
, "__udivmodv4di4");
4437 set_optab_libfunc (sdiv_optab
, V8DImode
, "__divv8di3");
4438 set_optab_libfunc (udiv_optab
, V8DImode
, "__udivv8di3");
4439 set_optab_libfunc (smod_optab
, V8DImode
, "__modv8di3");
4440 set_optab_libfunc (umod_optab
, V8DImode
, "__umodv8di3");
4442 set_optab_libfunc (sdivmod_optab
, V8DImode
, "__divmodv8di4");
4443 set_optab_libfunc (udivmod_optab
, V8DImode
, "__udivmodv8di4");
4445 set_optab_libfunc (sdiv_optab
, V16DImode
, "__divv16di3");
4446 set_optab_libfunc (udiv_optab
, V16DImode
, "__udivv16di3");
4447 set_optab_libfunc (smod_optab
, V16DImode
, "__modv16di3");
4448 set_optab_libfunc (umod_optab
, V16DImode
, "__umodv16di3");
4450 set_optab_libfunc (sdivmod_optab
, V16DImode
, "__divmodv16di4");
4451 set_optab_libfunc (udivmod_optab
, V16DImode
, "__udivmodv16di4");
4453 set_optab_libfunc (sdiv_optab
, V32DImode
, "__divv32di3");
4454 set_optab_libfunc (udiv_optab
, V32DImode
, "__udivv32di3");
4455 set_optab_libfunc (smod_optab
, V32DImode
, "__modv32di3");
4456 set_optab_libfunc (umod_optab
, V32DImode
, "__umodv32di3");
4458 set_optab_libfunc (sdivmod_optab
, V32DImode
, "__divmodv32di4");
4459 set_optab_libfunc (udivmod_optab
, V32DImode
, "__udivmodv32di4");
4461 set_optab_libfunc (sdiv_optab
, V64DImode
, "__divv64di3");
4462 set_optab_libfunc (udiv_optab
, V64DImode
, "__udivv64di3");
4463 set_optab_libfunc (smod_optab
, V64DImode
, "__modv64di3");
4464 set_optab_libfunc (umod_optab
, V64DImode
, "__umodv64di3");
4466 set_optab_libfunc (sdivmod_optab
, V64DImode
, "__divmodv64di4");
4467 set_optab_libfunc (udivmod_optab
, V64DImode
, "__udivmodv64di4");
4471 /* Expand the CMP_SWAP GCN builtins. We have our own versions that do
4472 not require taking the address of any object, other than the memory
4473 cell being operated on.
4475 Helper function for gcn_expand_builtin_1. */
4478 gcn_expand_cmp_swap (tree exp
, rtx target
)
4480 machine_mode mode
= TYPE_MODE (TREE_TYPE (exp
));
4482 = TYPE_ADDR_SPACE (TREE_TYPE (TREE_TYPE (CALL_EXPR_ARG (exp
, 0))));
4483 machine_mode as_mode
= gcn_addr_space_address_mode (as
);
4486 target
= gen_reg_rtx (mode
);
4488 rtx addr
= expand_expr (CALL_EXPR_ARG (exp
, 0),
4489 NULL_RTX
, as_mode
, EXPAND_NORMAL
);
4490 rtx cmp
= expand_expr (CALL_EXPR_ARG (exp
, 1),
4491 NULL_RTX
, mode
, EXPAND_NORMAL
);
4492 rtx src
= expand_expr (CALL_EXPR_ARG (exp
, 2),
4493 NULL_RTX
, mode
, EXPAND_NORMAL
);
4496 rtx mem
= gen_rtx_MEM (mode
, force_reg (as_mode
, addr
));
4497 set_mem_addr_space (mem
, as
);
4500 cmp
= copy_to_mode_reg (mode
, cmp
);
4502 src
= copy_to_mode_reg (mode
, src
);
4505 pat
= gen_sync_compare_and_swapsi (target
, mem
, cmp
, src
);
4507 pat
= gen_sync_compare_and_swapdi (target
, mem
, cmp
, src
);
4514 /* Expand many different builtins.
4516 Intended for use in gcn-builtins.def. */
4519 gcn_expand_builtin_1 (tree exp
, rtx target
, rtx
/*subtarget */ ,
4520 machine_mode
/*mode */ , int ignore
,
4521 struct gcn_builtin_description
*)
4523 tree fndecl
= TREE_OPERAND (CALL_EXPR_FN (exp
), 0);
4524 switch (DECL_MD_FUNCTION_CODE (fndecl
))
4526 case GCN_BUILTIN_FLAT_LOAD_INT32
:
4532 expand_expr (CALL_EXPR_ARG (exp
, 0), NULL_RTX
, DImode
,
4535 force_reg (V64DImode
,
4536 expand_expr (CALL_EXPR_ARG (exp
, 1), NULL_RTX
, V64DImode
,
4538 /*emit_insn (gen_vector_flat_loadv64si
4539 (target, gcn_gen_undef (V64SImode), ptr, exec)); */
4542 case GCN_BUILTIN_FLAT_LOAD_PTR_INT32
:
4543 case GCN_BUILTIN_FLAT_LOAD_PTR_FLOAT
:
4547 rtx exec
= force_reg (DImode
,
4548 expand_expr (CALL_EXPR_ARG (exp
, 0), NULL_RTX
,
4551 rtx ptr
= force_reg (DImode
,
4552 expand_expr (CALL_EXPR_ARG (exp
, 1), NULL_RTX
,
4555 rtx offsets
= force_reg (V64SImode
,
4556 expand_expr (CALL_EXPR_ARG (exp
, 2),
4557 NULL_RTX
, V64DImode
,
4559 rtx addrs
= gen_reg_rtx (V64DImode
);
4560 rtx tmp
= gen_reg_rtx (V64SImode
);
4561 emit_insn (gen_ashlv64si3_exec (tmp
, offsets
,
4563 gcn_gen_undef (V64SImode
), exec
));
4564 emit_insn (gen_addv64di3_zext_dup2_exec (addrs
, tmp
, ptr
,
4565 gcn_gen_undef (V64DImode
),
4567 rtx mem
= gen_rtx_MEM (GET_MODE (target
), addrs
);
4568 /*set_mem_addr_space (mem, ADDR_SPACE_FLAT); */
4569 /* FIXME: set attributes. */
4570 emit_insn (gen_movvNm (target
, mem
, NULL
, exec
));
4573 case GCN_BUILTIN_FLAT_STORE_PTR_INT32
:
4574 case GCN_BUILTIN_FLAT_STORE_PTR_FLOAT
:
4576 rtx exec
= force_reg (DImode
,
4577 expand_expr (CALL_EXPR_ARG (exp
, 0), NULL_RTX
,
4580 rtx ptr
= force_reg (DImode
,
4581 expand_expr (CALL_EXPR_ARG (exp
, 1), NULL_RTX
,
4584 rtx offsets
= force_reg (V64SImode
,
4585 expand_expr (CALL_EXPR_ARG (exp
, 2),
4586 NULL_RTX
, V64DImode
,
4588 machine_mode vmode
= TYPE_MODE (TREE_TYPE (CALL_EXPR_ARG (exp
,
4590 rtx val
= force_reg (vmode
,
4591 expand_expr (CALL_EXPR_ARG (exp
, 3), NULL_RTX
,
4594 rtx addrs
= gen_reg_rtx (V64DImode
);
4595 rtx tmp
= gen_reg_rtx (V64SImode
);
4596 emit_insn (gen_ashlv64si3_exec (tmp
, offsets
,
4598 gcn_gen_undef (V64SImode
), exec
));
4599 emit_insn (gen_addv64di3_zext_dup2_exec (addrs
, tmp
, ptr
,
4600 gcn_gen_undef (V64DImode
),
4602 rtx mem
= gen_rtx_MEM (vmode
, addrs
);
4603 /*set_mem_addr_space (mem, ADDR_SPACE_FLAT); */
4604 /* FIXME: set attributes. */
4605 emit_insn (gen_movvNm (mem
, val
, NULL
, exec
));
4608 case GCN_BUILTIN_SQRTVF
:
4612 rtx arg
= force_reg (V64SFmode
,
4613 expand_expr (CALL_EXPR_ARG (exp
, 0), NULL_RTX
,
4616 emit_insn (gen_sqrtv64sf2 (target
, arg
));
4619 case GCN_BUILTIN_SQRTF
:
4623 rtx arg
= force_reg (SFmode
,
4624 expand_expr (CALL_EXPR_ARG (exp
, 0), NULL_RTX
,
4627 emit_insn (gen_sqrtsf2 (target
, arg
));
4630 case GCN_BUILTIN_FABSVF
:
4634 rtx arg
= force_reg (V64SFmode
,
4635 expand_expr (CALL_EXPR_ARG (exp
, 0), NULL_RTX
,
4638 emit_insn (gen_absv64sf2 (target
, arg
));
4641 case GCN_BUILTIN_FABSV
:
4645 rtx arg
= force_reg (V64DFmode
,
4646 expand_expr (CALL_EXPR_ARG (exp
, 0), NULL_RTX
,
4649 emit_insn (gen_absv64df2 (target
, arg
));
4652 case GCN_BUILTIN_FLOORVF
:
4656 rtx arg
= force_reg (V64SFmode
,
4657 expand_expr (CALL_EXPR_ARG (exp
, 0), NULL_RTX
,
4660 emit_insn (gen_floorv64sf2 (target
, arg
));
4663 case GCN_BUILTIN_FLOORV
:
4667 rtx arg
= force_reg (V64DFmode
,
4668 expand_expr (CALL_EXPR_ARG (exp
, 0), NULL_RTX
,
4671 emit_insn (gen_floorv64df2 (target
, arg
));
4674 case GCN_BUILTIN_LDEXPVF
:
4678 rtx arg1
= force_reg (V64SFmode
,
4679 expand_expr (CALL_EXPR_ARG (exp
, 0), NULL_RTX
,
4682 rtx arg2
= force_reg (V64SImode
,
4683 expand_expr (CALL_EXPR_ARG (exp
, 1), NULL_RTX
,
4686 emit_insn (gen_ldexpv64sf3 (target
, arg1
, arg2
));
4689 case GCN_BUILTIN_LDEXPV
:
4693 rtx arg1
= force_reg (V64DFmode
,
4694 expand_expr (CALL_EXPR_ARG (exp
, 0), NULL_RTX
,
4697 rtx arg2
= force_reg (V64SImode
,
4698 expand_expr (CALL_EXPR_ARG (exp
, 1), NULL_RTX
,
4701 emit_insn (gen_ldexpv64df3 (target
, arg1
, arg2
));
4704 case GCN_BUILTIN_FREXPVF_EXP
:
4708 rtx arg
= force_reg (V64SFmode
,
4709 expand_expr (CALL_EXPR_ARG (exp
, 0), NULL_RTX
,
4712 emit_insn (gen_frexpv64sf_exp2 (target
, arg
));
4715 case GCN_BUILTIN_FREXPVF_MANT
:
4719 rtx arg
= force_reg (V64SFmode
,
4720 expand_expr (CALL_EXPR_ARG (exp
, 0), NULL_RTX
,
4723 emit_insn (gen_frexpv64sf_mant2 (target
, arg
));
4726 case GCN_BUILTIN_FREXPV_EXP
:
4730 rtx arg
= force_reg (V64DFmode
,
4731 expand_expr (CALL_EXPR_ARG (exp
, 0), NULL_RTX
,
4734 emit_insn (gen_frexpv64df_exp2 (target
, arg
));
4737 case GCN_BUILTIN_FREXPV_MANT
:
4741 rtx arg
= force_reg (V64DFmode
,
4742 expand_expr (CALL_EXPR_ARG (exp
, 0), NULL_RTX
,
4745 emit_insn (gen_frexpv64df_mant2 (target
, arg
));
4748 case GCN_BUILTIN_OMP_DIM_SIZE
:
4752 emit_insn (gen_oacc_dim_size (target
,
4753 expand_expr (CALL_EXPR_ARG (exp
, 0),
4758 case GCN_BUILTIN_OMP_DIM_POS
:
4762 emit_insn (gen_oacc_dim_pos (target
,
4763 expand_expr (CALL_EXPR_ARG (exp
, 0),
4768 case GCN_BUILTIN_CMP_SWAP
:
4769 case GCN_BUILTIN_CMP_SWAPLL
:
4770 return gcn_expand_cmp_swap (exp
, target
);
4772 case GCN_BUILTIN_ACC_SINGLE_START
:
4777 rtx wavefront
= gcn_oacc_dim_pos (1);
4778 rtx cond
= gen_rtx_EQ (VOIDmode
, wavefront
, const0_rtx
);
4779 rtx cc
= (target
&& REG_P (target
)) ? target
: gen_reg_rtx (BImode
);
4780 emit_insn (gen_cstoresi4 (cc
, cond
, wavefront
, const0_rtx
));
4784 case GCN_BUILTIN_ACC_SINGLE_COPY_START
:
4786 rtx blk
= force_reg (SImode
,
4787 expand_expr (CALL_EXPR_ARG (exp
, 0), NULL_RTX
,
4788 SImode
, EXPAND_NORMAL
));
4789 rtx wavefront
= gcn_oacc_dim_pos (1);
4790 rtx cond
= gen_rtx_NE (VOIDmode
, wavefront
, const0_rtx
);
4791 rtx not_zero
= gen_label_rtx ();
4792 emit_insn (gen_cbranchsi4 (cond
, wavefront
, const0_rtx
, not_zero
));
4793 emit_move_insn (blk
, const0_rtx
);
4794 emit_label (not_zero
);
4798 case GCN_BUILTIN_ACC_SINGLE_COPY_END
:
4801 case GCN_BUILTIN_ACC_BARRIER
:
4802 emit_insn (gen_gcn_wavefront_barrier ());
4805 case GCN_BUILTIN_GET_STACK_LIMIT
:
4807 /* stackbase = (stack_segment_decr & 0x0000ffffffffffff)
4808 + stack_wave_offset);
4809 seg_size = dispatch_ptr->private_segment_size;
4810 stacklimit = stackbase + seg_size*64;
4811 with segsize = *(uint32_t *) ((char *) dispatch_ptr
4812 + 6*sizeof(int16_t) + 3*sizeof(int32_t));
4813 cf. struct hsa_kernel_dispatch_packet_s in the HSA doc. */
4815 if (cfun
->machine
->args
.reg
[DISPATCH_PTR_ARG
] >= 0
4816 && cfun
->machine
->args
.reg
[KERNARG_SEGMENT_PTR_ARG
] >= 0)
4818 rtx num_waves_mem
= gcn_oacc_dim_size (1);
4819 rtx num_waves
= gen_reg_rtx (SImode
);
4820 set_mem_addr_space (num_waves_mem
, ADDR_SPACE_SCALAR_FLAT
);
4821 emit_move_insn (num_waves
, num_waves_mem
);
4823 rtx workgroup_num
= gcn_oacc_dim_pos (0);
4824 rtx wave_num
= gen_reg_rtx (SImode
);
4825 emit_move_insn(wave_num
, gcn_oacc_dim_pos (1));
4827 rtx thread_id
= gen_reg_rtx (SImode
);
4828 emit_insn (gen_mulsi3 (thread_id
, num_waves
, workgroup_num
));
4829 emit_insn (gen_addsi3_scc (thread_id
, thread_id
, wave_num
));
4831 rtx kernarg_reg
= gen_rtx_REG (DImode
, cfun
->machine
->args
.reg
4832 [KERNARG_SEGMENT_PTR_ARG
]);
4833 rtx stack_size_mem
= gen_rtx_MEM (SImode
,
4834 gen_rtx_PLUS (DImode
,
4837 set_mem_addr_space (stack_size_mem
, ADDR_SPACE_SCALAR_FLAT
);
4838 rtx stack_size
= gen_reg_rtx (SImode
);
4839 emit_move_insn (stack_size
, stack_size_mem
);
4841 rtx wave_offset
= gen_reg_rtx (SImode
);
4842 emit_insn (gen_mulsi3 (wave_offset
, stack_size
, thread_id
));
4844 rtx stack_limit_offset
= gen_reg_rtx (SImode
);
4845 emit_insn (gen_addsi3 (stack_limit_offset
, wave_offset
,
4848 rtx stack_limit_offset_di
= gen_reg_rtx (DImode
);
4849 emit_move_insn (gen_rtx_SUBREG (SImode
, stack_limit_offset_di
, 4),
4851 emit_move_insn (gen_rtx_SUBREG (SImode
, stack_limit_offset_di
, 0),
4852 stack_limit_offset
);
4854 rtx stack_addr_mem
= gen_rtx_MEM (DImode
,
4855 gen_rtx_PLUS (DImode
,
4858 set_mem_addr_space (stack_addr_mem
, ADDR_SPACE_SCALAR_FLAT
);
4859 rtx stack_addr
= gen_reg_rtx (DImode
);
4860 emit_move_insn (stack_addr
, stack_addr_mem
);
4862 ptr
= gen_rtx_PLUS (DImode
, stack_addr
, stack_limit_offset_di
);
4866 ptr
= gen_reg_rtx (DImode
);
4867 emit_move_insn (ptr
, const0_rtx
);
4871 case GCN_BUILTIN_KERNARG_PTR
:
4874 if (cfun
->machine
->args
.reg
[KERNARG_SEGMENT_PTR_ARG
] >= 0)
4875 ptr
= gen_rtx_REG (DImode
,
4876 cfun
->machine
->args
.reg
[KERNARG_SEGMENT_PTR_ARG
]);
4879 ptr
= gen_reg_rtx (DImode
);
4880 emit_move_insn (ptr
, const0_rtx
);
4884 case GCN_BUILTIN_FIRST_CALL_THIS_THREAD_P
:
4886 /* Stash a marker in the unused upper 16 bits of s[0:1] to indicate
4887 whether it was the first call. */
4888 rtx result
= gen_reg_rtx (BImode
);
4889 emit_move_insn (result
, const0_rtx
);
4890 if (cfun
->machine
->args
.reg
[QUEUE_PTR_ARG
] >= 0)
4892 rtx not_first
= gen_label_rtx ();
4893 rtx reg
= gen_rtx_REG (DImode
,
4894 cfun
->machine
->args
.reg
[QUEUE_PTR_ARG
]);
4895 reg
= gcn_operand_part (DImode
, reg
, 1);
4896 rtx cmp
= force_reg (SImode
,
4897 gen_rtx_LSHIFTRT (SImode
, reg
, GEN_INT (16)));
4898 emit_insn (gen_cstoresi4 (result
, gen_rtx_NE (BImode
, cmp
,
4900 cmp
, GEN_INT(12345)));
4901 emit_jump_insn (gen_cjump (not_first
, gen_rtx_EQ (BImode
, result
,
4904 emit_move_insn (reg
,
4906 gen_rtx_IOR (SImode
,
4907 gen_rtx_AND (SImode
, reg
, GEN_INT (0x0000ffff)),
4908 GEN_INT (12345L << 16))));
4909 emit_insn (gen_rtx_USE (VOIDmode
, reg
));
4910 emit_label (not_first
);
4919 /* Expansion of simple arithmetic and bit binary operation builtins.
4921 Intended for use with gcn_builtins table. */
4924 gcn_expand_builtin_binop (tree exp
, rtx target
, rtx
/*subtarget */ ,
4925 machine_mode
/*mode */ , int ignore
,
4926 struct gcn_builtin_description
*d
)
4928 int icode
= d
->icode
;
4932 rtx exec
= force_reg (DImode
,
4933 expand_expr (CALL_EXPR_ARG (exp
, 0), NULL_RTX
, DImode
,
4936 machine_mode m1
= insn_data
[icode
].operand
[1].mode
;
4937 rtx arg1
= expand_expr (CALL_EXPR_ARG (exp
, 1), NULL_RTX
, m1
,
4939 if (!insn_data
[icode
].operand
[1].predicate (arg1
, m1
))
4940 arg1
= force_reg (m1
, arg1
);
4942 machine_mode m2
= insn_data
[icode
].operand
[2].mode
;
4943 rtx arg2
= expand_expr (CALL_EXPR_ARG (exp
, 2), NULL_RTX
, m2
,
4945 if (!insn_data
[icode
].operand
[2].predicate (arg2
, m2
))
4946 arg2
= force_reg (m2
, arg2
);
4949 if (call_expr_nargs (exp
) == 4)
4951 machine_mode m_prev
= insn_data
[icode
].operand
[4].mode
;
4952 arg_prev
= force_reg (m_prev
,
4953 expand_expr (CALL_EXPR_ARG (exp
, 3), NULL_RTX
,
4954 m_prev
, EXPAND_NORMAL
));
4957 arg_prev
= gcn_gen_undef (GET_MODE (target
));
4959 rtx pat
= GEN_FCN (icode
) (target
, arg1
, arg2
, exec
, arg_prev
);
4964 /* Implement TARGET_EXPAND_BUILTIN.
4966 Expand an expression EXP that calls a built-in function, with result going
4967 to TARGET if that's convenient (and in mode MODE if that's convenient).
4968 SUBTARGET may be used as the target for computing one of EXP's operands.
4969 IGNORE is nonzero if the value is to be ignored. */
4972 gcn_expand_builtin (tree exp
, rtx target
, rtx subtarget
, machine_mode mode
,
4975 tree fndecl
= TREE_OPERAND (CALL_EXPR_FN (exp
), 0);
4976 unsigned int fcode
= DECL_MD_FUNCTION_CODE (fndecl
);
4977 struct gcn_builtin_description
*d
;
4979 gcc_assert (fcode
< GCN_BUILTIN_MAX
);
4980 d
= &gcn_builtins
[fcode
];
4982 if (d
->type
== B_UNIMPLEMENTED
)
4983 sorry ("Builtin not implemented");
4985 return d
->expander (exp
, target
, subtarget
, mode
, ignore
, d
);
4989 /* {{{ Vectorization. */
4991 /* Implement TARGET_VECTORIZE_GET_MASK_MODE.
4993 A vector mask is a value that holds one boolean result for every element in
4997 gcn_vectorize_get_mask_mode (machine_mode
)
4999 /* GCN uses a DImode bit-mask. */
5003 /* Return an RTX that references a vector with the i-th lane containing
5006 Helper function for gcn_vectorize_vec_perm_const. */
5009 gcn_make_vec_perm_address (unsigned int *perm
, int nelt
)
5011 machine_mode mode
= VnMODE (nelt
, SImode
);
5012 rtx x
= gen_reg_rtx (mode
);
5013 emit_move_insn (x
, gcn_vec_constant (mode
, 0));
5015 /* Permutation addresses use byte addressing. With each vector lane being
5016 4 bytes wide, and with 64 lanes in total, only bits 2..7 are significant,
5019 The permutation given to the vec_perm* patterns range from 0 to 2N-1 to
5020 select between lanes in two vectors, but as the DS_BPERMUTE* instructions
5021 only take one source vector, the most-significant bit can be ignored
5022 here. Instead, we can use EXEC masking to select the relevant part of
5023 each source vector after they are permuted separately. */
5024 uint64_t bit_mask
= 1 << 2;
5025 for (int i
= 2; i
< 8; i
++, bit_mask
<<= 1)
5027 uint64_t exec_mask
= 0;
5028 uint64_t lane_mask
= 1;
5029 for (int j
= 0; j
< nelt
; j
++, lane_mask
<<= 1)
5030 if (((perm
[j
] % nelt
) * 4) & bit_mask
)
5031 exec_mask
|= lane_mask
;
5034 emit_insn (gen_addvNsi3 (x
, x
, gcn_vec_constant (mode
, bit_mask
),
5035 x
, get_exec (exec_mask
)));
5041 /* Implement TARGET_VECTORIZE_VEC_PERM_CONST.
5043 Return true if permutation with SEL is possible.
5045 If DST/SRC0/SRC1 are non-null, emit the instructions to perform the
5049 gcn_vectorize_vec_perm_const (machine_mode vmode
, machine_mode op_mode
,
5050 rtx dst
, rtx src0
, rtx src1
,
5051 const vec_perm_indices
& sel
)
5053 if (vmode
!= op_mode
)
5056 unsigned int nelt
= GET_MODE_NUNITS (vmode
);
5058 gcc_assert (VECTOR_MODE_P (vmode
));
5059 gcc_assert (nelt
<= 64);
5060 gcc_assert (sel
.length () == nelt
);
5064 /* All vector permutations are possible on this architecture,
5065 with varying degrees of efficiency depending on the permutation. */
5069 unsigned int perm
[64];
5070 for (unsigned int i
= 0; i
< nelt
; ++i
)
5071 perm
[i
] = sel
[i
] & (2 * nelt
- 1);
5072 for (unsigned int i
= nelt
; i
< 64; ++i
)
5075 src0
= force_reg (vmode
, src0
);
5076 src1
= force_reg (vmode
, src1
);
5078 /* Make life a bit easier by swapping operands if necessary so that
5079 the first element always comes from src0. */
5080 if (perm
[0] >= nelt
)
5082 std::swap (src0
, src1
);
5084 for (unsigned int i
= 0; i
< nelt
; ++i
)
5091 /* TODO: There are more efficient ways to implement certain permutations
5092 using ds_swizzle_b32 and/or DPP. Test for and expand them here, before
5093 this more inefficient generic approach is used. */
5095 int64_t src1_lanes
= 0;
5096 int64_t lane_bit
= 1;
5098 for (unsigned int i
= 0; i
< nelt
; ++i
, lane_bit
<<= 1)
5100 /* Set the bits for lanes from src1. */
5101 if (perm
[i
] >= nelt
)
5102 src1_lanes
|= lane_bit
;
5105 rtx addr
= gcn_make_vec_perm_address (perm
, nelt
);
5107 /* Load elements from src0 to dst. */
5108 gcc_assert ((~src1_lanes
) & (0xffffffffffffffffUL
> (64-nelt
)));
5109 emit_insn (gen_ds_bpermutevNm (dst
, addr
, src0
, get_exec (vmode
)));
5111 /* Load elements from src1 to dst. */
5114 /* Masking a lane masks both the destination and source lanes for
5115 DS_BPERMUTE, so we need to have all lanes enabled for the permute,
5116 then add an extra masked move to merge the results of permuting
5117 the two source vectors together.
5119 rtx tmp
= gen_reg_rtx (vmode
);
5120 emit_insn (gen_ds_bpermutevNm (tmp
, addr
, src1
, get_exec (vmode
)));
5121 emit_insn (gen_movvNm (dst
, tmp
, dst
, get_exec (src1_lanes
)));
5127 /* Implements TARGET_VECTOR_MODE_SUPPORTED_P.
5129 Return nonzero if vector MODE is supported with at least move
5133 gcn_vector_mode_supported_p (machine_mode mode
)
5135 return (mode
== V64QImode
|| mode
== V64HImode
5136 || mode
== V64SImode
|| mode
== V64DImode
5137 || mode
== V64SFmode
|| mode
== V64DFmode
5138 || mode
== V32QImode
|| mode
== V32HImode
5139 || mode
== V32SImode
|| mode
== V32DImode
5140 || mode
== V32SFmode
|| mode
== V32DFmode
5141 || mode
== V16QImode
|| mode
== V16HImode
5142 || mode
== V16SImode
|| mode
== V16DImode
5143 || mode
== V16SFmode
|| mode
== V16DFmode
5144 || mode
== V8QImode
|| mode
== V8HImode
5145 || mode
== V8SImode
|| mode
== V8DImode
5146 || mode
== V8SFmode
|| mode
== V8DFmode
5147 || mode
== V4QImode
|| mode
== V4HImode
5148 || mode
== V4SImode
|| mode
== V4DImode
5149 || mode
== V4SFmode
|| mode
== V4DFmode
5150 || mode
== V2QImode
|| mode
== V2HImode
5151 || mode
== V2SImode
|| mode
== V2DImode
5152 || mode
== V2SFmode
|| mode
== V2DFmode
5153 /* TImode vectors are allowed to exist for divmod, but there
5154 are almost no instructions defined for them, and the
5155 autovectorizer does not use them. */
5156 || mode
== V64TImode
|| mode
== V32TImode
5157 || mode
== V16TImode
|| mode
== V8TImode
5158 || mode
== V4TImode
|| mode
== V2TImode
);
5161 /* Implement TARGET_VECTORIZE_PREFERRED_SIMD_MODE.
5163 Enables autovectorization for all supported modes. */
5166 gcn_vectorize_preferred_simd_mode (scalar_mode mode
)
5187 /* Implement TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_MODES.
5189 Try all the vector modes. */
5191 unsigned int gcn_autovectorize_vector_modes (vector_modes
*modes
,
5192 bool ARG_UNUSED (all
))
5194 modes
->safe_push (V64QImode
);
5195 modes
->safe_push (V64HImode
);
5196 modes
->safe_push (V64SImode
);
5197 modes
->safe_push (V64SFmode
);
5198 modes
->safe_push (V64DImode
);
5199 modes
->safe_push (V64DFmode
);
5201 modes
->safe_push (V32QImode
);
5202 modes
->safe_push (V32HImode
);
5203 modes
->safe_push (V32SImode
);
5204 modes
->safe_push (V32SFmode
);
5205 modes
->safe_push (V32DImode
);
5206 modes
->safe_push (V32DFmode
);
5208 modes
->safe_push (V16QImode
);
5209 modes
->safe_push (V16HImode
);
5210 modes
->safe_push (V16SImode
);
5211 modes
->safe_push (V16SFmode
);
5212 modes
->safe_push (V16DImode
);
5213 modes
->safe_push (V16DFmode
);
5215 modes
->safe_push (V8QImode
);
5216 modes
->safe_push (V8HImode
);
5217 modes
->safe_push (V8SImode
);
5218 modes
->safe_push (V8SFmode
);
5219 modes
->safe_push (V8DImode
);
5220 modes
->safe_push (V8DFmode
);
5222 modes
->safe_push (V4QImode
);
5223 modes
->safe_push (V4HImode
);
5224 modes
->safe_push (V4SImode
);
5225 modes
->safe_push (V4SFmode
);
5226 modes
->safe_push (V4DImode
);
5227 modes
->safe_push (V4DFmode
);
5229 modes
->safe_push (V2QImode
);
5230 modes
->safe_push (V2HImode
);
5231 modes
->safe_push (V2SImode
);
5232 modes
->safe_push (V2SFmode
);
5233 modes
->safe_push (V2DImode
);
5234 modes
->safe_push (V2DFmode
);
5236 /* We shouldn't need VECT_COMPARE_COSTS as they should all cost the same. */
5240 /* Implement TARGET_VECTORIZE_RELATED_MODE.
5242 All GCN vectors are 64-lane, so this is simpler than other architectures.
5243 In particular, we do *not* want to match vector bit-size. */
5245 static opt_machine_mode
5246 gcn_related_vector_mode (machine_mode vector_mode
,
5247 scalar_mode element_mode
, poly_uint64 nunits
)
5249 int n
= nunits
.to_constant ();
5252 n
= GET_MODE_NUNITS (vector_mode
);
5254 return VnMODE (n
, element_mode
);
5257 /* Implement TARGET_VECTORIZE_PREFERRED_VECTOR_ALIGNMENT.
5259 Returns the preferred alignment in bits for accesses to vectors of type type
5260 in vectorized code. This might be less than or greater than the ABI-defined
5261 value returned by TARGET_VECTOR_ALIGNMENT. It can be equal to the alignment
5262 of a single element, in which case the vectorizer will not try to optimize
5266 gcn_preferred_vector_alignment (const_tree type
)
5268 return TYPE_ALIGN (TREE_TYPE (type
));
5271 /* Implement TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT.
5273 Return true if the target supports misaligned vector store/load of a
5274 specific factor denoted in the misalignment parameter. */
5277 gcn_vectorize_support_vector_misalignment (machine_mode
ARG_UNUSED (mode
),
5278 const_tree type
, int misalignment
,
5284 /* If the misalignment is unknown, we should be able to handle the access
5285 so long as it is not to a member of a packed data structure. */
5286 if (misalignment
== -1)
5289 /* Return true if the misalignment is a multiple of the natural alignment
5290 of the vector's element type. This is probably always going to be
5291 true in practice, since we've already established that this isn't a
5293 return misalignment
% TYPE_ALIGN_UNIT (type
) == 0;
5296 /* Implement TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE.
5298 Return true if vector alignment is reachable (by peeling N iterations) for
5299 the given scalar type TYPE. */
5302 gcn_vector_alignment_reachable (const_tree
ARG_UNUSED (type
), bool is_packed
)
5304 /* Vectors which aren't in packed structures will not be less aligned than
5305 the natural alignment of their element type, so this is safe. */
5309 /* Generate DPP pairwise swap instruction.
5310 This instruction swaps the values in each even lane with the value in the
5312 a, b, c, d -> b, a, d, c.
5313 The opcode is given by INSN. */
5316 gcn_expand_dpp_swap_pairs_insn (machine_mode mode
, const char *insn
,
5317 int ARG_UNUSED (unspec
))
5319 static char buf
[128];
5322 /* Add the DPP modifiers. */
5323 dpp
= "quad_perm:[1,0,3,2]";
5325 if (vgpr_2reg_mode_p (mode
))
5326 sprintf (buf
, "%s\t%%L0, %%L1 %s\n\t%s\t%%H0, %%H1 %s",
5327 insn
, dpp
, insn
, dpp
);
5329 sprintf (buf
, "%s\t%%0, %%1 %s", insn
, dpp
);
5334 /* Generate DPP distribute even instruction.
5335 This instruction copies the value in each even lane to the next one:
5336 a, b, c, d -> a, a, c, c.
5337 The opcode is given by INSN. */
5340 gcn_expand_dpp_distribute_even_insn (machine_mode mode
, const char *insn
,
5341 int ARG_UNUSED (unspec
))
5343 static char buf
[128];
5346 /* Add the DPP modifiers. */
5347 dpp
= "quad_perm:[0,0,2,2]";
5349 if (vgpr_2reg_mode_p (mode
))
5350 sprintf (buf
, "%s\t%%L0, %%L1 %s\n\t%s\t%%H0, %%H1 %s",
5351 insn
, dpp
, insn
, dpp
);
5353 sprintf (buf
, "%s\t%%0, %%1 %s", insn
, dpp
);
5358 /* Generate DPP distribute odd instruction.
5359 This isntruction copies the value in each odd lane to the previous one:
5360 a, b, c, d -> b, b, d, d.
5361 The opcode is given by INSN. */
5364 gcn_expand_dpp_distribute_odd_insn (machine_mode mode
, const char *insn
,
5365 int ARG_UNUSED (unspec
))
5367 static char buf
[128];
5370 /* Add the DPP modifiers. */
5371 dpp
= "quad_perm:[1,1,3,3]";
5373 if (vgpr_2reg_mode_p (mode
))
5374 sprintf (buf
, "%s\t%%L0, %%L1 %s\n\t%s\t%%H0, %%H1 %s",
5375 insn
, dpp
, insn
, dpp
);
5377 sprintf (buf
, "%s\t%%0, %%1 %s", insn
, dpp
);
5382 /* Generate DPP instructions used for vector reductions.
5384 The opcode is given by INSN.
5385 The first operand of the operation is shifted right by SHIFT vector lanes.
5386 SHIFT must be a power of 2. If SHIFT is 16, the 15th lane of each row is
5387 broadcast the next row (thereby acting like a shift of 16 for the end of
5388 each row). If SHIFT is 32, lane 31 is broadcast to all the
5389 following lanes (thereby acting like a shift of 32 for lane 63). */
5392 gcn_expand_dpp_shr_insn (machine_mode mode
, const char *insn
,
5393 int unspec
, int shift
)
5395 static char buf
[128];
5397 const char *vcc_in
= "";
5398 const char *vcc_out
= "";
5400 /* Add the vcc operand if needed. */
5401 if (GET_MODE_CLASS (mode
) == MODE_VECTOR_INT
)
5403 if (unspec
== UNSPEC_PLUS_CARRY_IN_DPP_SHR
)
5406 if (unspec
== UNSPEC_PLUS_CARRY_DPP_SHR
5407 || unspec
== UNSPEC_PLUS_CARRY_IN_DPP_SHR
)
5411 /* Add the DPP modifiers. */
5415 dpp
= "row_shr:1 bound_ctrl:0";
5418 dpp
= "row_shr:2 bound_ctrl:0";
5421 dpp
= "row_shr:4 bank_mask:0xe";
5424 dpp
= "row_shr:8 bank_mask:0xc";
5427 dpp
= "row_bcast:15 row_mask:0xa";
5430 dpp
= "row_bcast:31 row_mask:0xc";
5436 if (unspec
== UNSPEC_MOV_DPP_SHR
&& vgpr_2reg_mode_p (mode
))
5437 sprintf (buf
, "%s\t%%L0, %%L1 %s\n\t%s\t%%H0, %%H1 %s",
5438 insn
, dpp
, insn
, dpp
);
5439 else if (unspec
== UNSPEC_MOV_DPP_SHR
)
5440 sprintf (buf
, "%s\t%%0, %%1 %s", insn
, dpp
);
5442 sprintf (buf
, "%s\t%%0%s, %%1, %%2%s %s", insn
, vcc_out
, vcc_in
, dpp
);
5447 /* Generate vector reductions in terms of DPP instructions.
5449 The vector register SRC of mode MODE is reduced using the operation given
5450 by UNSPEC, and the scalar result is returned in lane 63 of a vector
5451 register (or lane 31, 15, 7, 3, 1 for partial vectors). */
5454 gcn_expand_reduc_scalar (machine_mode mode
, rtx src
, int unspec
)
5456 machine_mode orig_mode
= mode
;
5457 machine_mode scalar_mode
= GET_MODE_INNER (mode
);
5458 int vf
= GET_MODE_NUNITS (mode
);
5459 bool use_moves
= (((unspec
== UNSPEC_SMIN_DPP_SHR
5460 || unspec
== UNSPEC_SMAX_DPP_SHR
5461 || unspec
== UNSPEC_UMIN_DPP_SHR
5462 || unspec
== UNSPEC_UMAX_DPP_SHR
)
5463 && (scalar_mode
== DImode
5464 || scalar_mode
== DFmode
))
5465 || (unspec
== UNSPEC_PLUS_DPP_SHR
5466 && scalar_mode
== DFmode
));
5467 rtx_code code
= (unspec
== UNSPEC_SMIN_DPP_SHR
? SMIN
5468 : unspec
== UNSPEC_SMAX_DPP_SHR
? SMAX
5469 : unspec
== UNSPEC_UMIN_DPP_SHR
? UMIN
5470 : unspec
== UNSPEC_UMAX_DPP_SHR
? UMAX
5471 : unspec
== UNSPEC_PLUS_DPP_SHR
? PLUS
5473 bool use_extends
= ((unspec
== UNSPEC_SMIN_DPP_SHR
5474 || unspec
== UNSPEC_SMAX_DPP_SHR
5475 || unspec
== UNSPEC_UMIN_DPP_SHR
5476 || unspec
== UNSPEC_UMAX_DPP_SHR
)
5477 && (scalar_mode
== QImode
5478 || scalar_mode
== HImode
));
5479 bool unsignedp
= (unspec
== UNSPEC_UMIN_DPP_SHR
5480 || unspec
== UNSPEC_UMAX_DPP_SHR
);
5481 bool use_plus_carry
= unspec
== UNSPEC_PLUS_DPP_SHR
5482 && GET_MODE_CLASS (mode
) == MODE_VECTOR_INT
5483 && (TARGET_GCN3
|| scalar_mode
== DImode
);
5486 unspec
= UNSPEC_PLUS_CARRY_DPP_SHR
;
5490 mode
= VnMODE (vf
, SImode
);
5491 rtx tmp
= gen_reg_rtx (mode
);
5492 convert_move (tmp
, src
, unsignedp
);
5496 /* Perform reduction by first performing the reduction operation on every
5497 pair of lanes, then on every pair of results from the previous
5498 iteration (thereby effectively reducing every 4 lanes) and so on until
5499 all lanes are reduced. */
5500 rtx in
, out
= force_reg (mode
, src
);
5501 int iterations
= exact_log2 (vf
);
5502 for (int i
= 0, shift
= 1; i
< iterations
; i
++, shift
<<= 1)
5504 rtx shift_val
= gen_rtx_CONST_INT (VOIDmode
, shift
);
5506 out
= gen_reg_rtx (mode
);
5510 rtx tmp
= gen_reg_rtx (mode
);
5511 emit_insn (gen_dpp_move (mode
, tmp
, in
, shift_val
));
5512 emit_insn (gen_rtx_SET (out
, gen_rtx_fmt_ee (code
, mode
, tmp
, in
)));
5516 rtx insn
= gen_rtx_SET (out
,
5517 gen_rtx_UNSPEC (mode
,
5518 gen_rtvec (3, in
, in
,
5522 /* Add clobber for instructions that set the carry flags. */
5525 rtx clobber
= gen_rtx_CLOBBER (VOIDmode
,
5526 gen_rtx_REG (DImode
, VCC_REG
));
5527 insn
= gen_rtx_PARALLEL (VOIDmode
,
5528 gen_rtvec (2, insn
, clobber
));
5537 rtx tmp
= gen_reg_rtx (orig_mode
);
5538 convert_move (tmp
, out
, unsignedp
);
5545 /* Implement TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST. */
5548 gcn_vectorization_cost (enum vect_cost_for_stmt
ARG_UNUSED (type_of_cost
),
5549 tree
ARG_UNUSED (vectype
), int ARG_UNUSED (misalign
))
5551 /* Always vectorize. */
5555 /* Implement TARGET_SIMD_CLONE_COMPUTE_VECSIZE_AND_SIMDLEN. */
5558 gcn_simd_clone_compute_vecsize_and_simdlen (struct cgraph_node
*ARG_UNUSED (node
),
5559 struct cgraph_simd_clone
*clonei
,
5560 tree
ARG_UNUSED (base_type
),
5561 int ARG_UNUSED (num
),
5564 if (known_eq (clonei
->simdlen
, 0U))
5565 clonei
->simdlen
= 64;
5566 else if (maybe_ne (clonei
->simdlen
, 64U))
5568 /* Note that x86 has a similar message that is likely to trigger on
5569 sizes that are OK for gcn; the user can't win. */
5571 warning_at (DECL_SOURCE_LOCATION (node
->decl
), 0,
5572 "unsupported simdlen %wd (amdgcn)",
5573 clonei
->simdlen
.to_constant ());
5577 clonei
->vecsize_mangle
= 'n';
5578 clonei
->vecsize_int
= 0;
5579 clonei
->vecsize_float
= 0;
5581 /* DImode ought to be more natural here, but VOIDmode produces better code,
5582 at present, due to the shift-and-test steps not being optimized away
5583 inside the in-branch clones. */
5584 clonei
->mask_mode
= VOIDmode
;
5589 /* Implement TARGET_SIMD_CLONE_ADJUST. */
5592 gcn_simd_clone_adjust (struct cgraph_node
*ARG_UNUSED (node
))
5594 /* This hook has to be defined when
5595 TARGET_SIMD_CLONE_COMPUTE_VECSIZE_AND_SIMDLEN is defined, but we don't
5596 need it to do anything yet. */
5599 /* Implement TARGET_SIMD_CLONE_USABLE. */
5602 gcn_simd_clone_usable (struct cgraph_node
*ARG_UNUSED (node
))
5604 /* We don't need to do anything here because
5605 gcn_simd_clone_compute_vecsize_and_simdlen currently only returns one
5610 tree
mathfn_built_in_explicit (tree
, combined_fn
);
5612 /* Implement TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION.
5613 Return the function declaration of the vectorized version of the builtin
5614 in the math library if available. */
5617 gcn_vectorize_builtin_vectorized_function (unsigned int fn
, tree type_out
,
5620 if (TREE_CODE (type_out
) != VECTOR_TYPE
5621 || TREE_CODE (type_in
) != VECTOR_TYPE
)
5624 machine_mode out_mode
= TYPE_MODE (TREE_TYPE (type_out
));
5625 int out_n
= TYPE_VECTOR_SUBPARTS (type_out
);
5626 combined_fn cfn
= combined_fn (fn
);
5628 /* Keep this consistent with the list of vectorized math routines. */
5667 CASE_CFN_SIGNIFICAND
:
5675 tree out_t_node
= (out_mode
== DFmode
) ? double_type_node
: float_type_node
;
5676 tree fndecl
= implicit_p
? mathfn_built_in (out_t_node
, cfn
)
5677 : mathfn_built_in_explicit (out_t_node
, cfn
);
5679 const char *bname
= IDENTIFIER_POINTER (DECL_NAME (fndecl
));
5681 sprintf (name
, out_mode
== DFmode
? "v%ddf_%s" : "v%dsf_%s",
5685 for (tree args
= DECL_ARGUMENTS (fndecl
); args
; args
= TREE_CHAIN (args
))
5688 tree fntype
= (arity
== 1)
5689 ? build_function_type_list (type_out
, type_in
, NULL
)
5690 : build_function_type_list (type_out
, type_in
, type_in
, NULL
);
5692 /* Build a function declaration for the vectorized function. */
5693 tree new_fndecl
= build_decl (BUILTINS_LOCATION
,
5694 FUNCTION_DECL
, get_identifier (name
), fntype
);
5695 TREE_PUBLIC (new_fndecl
) = 1;
5696 DECL_EXTERNAL (new_fndecl
) = 1;
5697 DECL_IS_NOVOPS (new_fndecl
) = 1;
5698 TREE_READONLY (new_fndecl
) = 1;
5703 /* Implement TARGET_LIBC_HAS_FUNCTION. */
5706 gcn_libc_has_function (enum function_class fn_class
,
5709 return bsd_libc_has_function (fn_class
, type
);
5713 /* {{{ md_reorg pass. */
5715 /* Identify VMEM instructions from their "type" attribute. */
5718 gcn_vmem_insn_p (attr_type type
)
5749 /* If INSN sets the EXEC register to a constant value, return the value,
5750 otherwise return zero. */
5753 gcn_insn_exec_value (rtx_insn
*insn
)
5755 if (!NONDEBUG_INSN_P (insn
))
5758 rtx pattern
= PATTERN (insn
);
5760 if (GET_CODE (pattern
) == SET
)
5762 rtx dest
= XEXP (pattern
, 0);
5763 rtx src
= XEXP (pattern
, 1);
5765 if (GET_MODE (dest
) == DImode
5766 && REG_P (dest
) && REGNO (dest
) == EXEC_REG
5767 && CONST_INT_P (src
))
5768 return INTVAL (src
);
5774 /* Sets the EXEC register before INSN to the value that it had after
5775 LAST_EXEC_DEF. The constant value of the EXEC register is returned if
5776 known, otherwise it returns zero. */
5779 gcn_restore_exec (rtx_insn
*insn
, rtx_insn
*last_exec_def
, int64_t curr_exec
,
5780 bool curr_exec_known
, bool &last_exec_def_saved
)
5782 rtx exec_reg
= gen_rtx_REG (DImode
, EXEC_REG
);
5785 int64_t exec_value
= gcn_insn_exec_value (last_exec_def
);
5789 /* If the EXEC value is a constant and it happens to be the same as the
5790 current EXEC value, the restore can be skipped. */
5791 if (curr_exec_known
&& exec_value
== curr_exec
)
5794 exec
= GEN_INT (exec_value
);
5798 /* If the EXEC value is not a constant, save it in a register after the
5799 point of definition. */
5800 rtx exec_save_reg
= gen_rtx_REG (DImode
, EXEC_SAVE_REG
);
5802 if (!last_exec_def_saved
)
5805 emit_move_insn (exec_save_reg
, exec_reg
);
5806 rtx_insn
*seq
= get_insns ();
5809 emit_insn_after (seq
, last_exec_def
);
5810 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
5811 fprintf (dump_file
, "Saving EXEC after insn %d.\n",
5812 INSN_UID (last_exec_def
));
5814 last_exec_def_saved
= true;
5817 exec
= exec_save_reg
;
5820 /* Restore EXEC register before the usage. */
5822 emit_move_insn (exec_reg
, exec
);
5823 rtx_insn
*seq
= get_insns ();
5825 emit_insn_before (seq
, insn
);
5827 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
5830 fprintf (dump_file
, "Restoring EXEC to %ld before insn %d.\n",
5831 exec_value
, INSN_UID (insn
));
5834 "Restoring EXEC from saved value before insn %d.\n",
5841 /* Implement TARGET_MACHINE_DEPENDENT_REORG.
5843 Ensure that pipeline dependencies and lane masking are set correctly. */
5849 rtx exec_reg
= gen_rtx_REG (DImode
, EXEC_REG
);
5852 INIT_REG_SET (&live
);
5854 compute_bb_for_insn ();
5859 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
5861 fprintf (dump_file
, "After split:\n");
5862 print_rtl_with_bb (dump_file
, get_insns (), dump_flags
);
5865 /* Update data-flow information for split instructions. */
5866 df_insn_rescan_all ();
5869 df_live_add_problem ();
5870 df_live_set_all_dirty ();
5873 /* This pass ensures that the EXEC register is set correctly, according
5874 to the "exec" attribute. However, care must be taken so that the
5875 value that reaches explicit uses of the EXEC register remains the
5879 FOR_EACH_BB_FN (bb
, cfun
)
5881 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
5882 fprintf (dump_file
, "BB %d:\n", bb
->index
);
5884 rtx_insn
*insn
, *curr
;
5885 rtx_insn
*last_exec_def
= BB_HEAD (bb
);
5886 bool last_exec_def_saved
= false;
5887 bool curr_exec_explicit
= true;
5888 bool curr_exec_known
= true;
5889 int64_t curr_exec
= 0; /* 0 here means 'the value is that of EXEC
5890 after last_exec_def is executed'. */
5892 bitmap live_in
= DF_LR_IN (bb
);
5893 bool exec_live_on_entry
= false;
5894 if (bitmap_bit_p (live_in
, EXEC_LO_REG
)
5895 || bitmap_bit_p (live_in
, EXEC_HI_REG
))
5898 fprintf (dump_file
, "EXEC reg is live on entry to block %d\n",
5900 exec_live_on_entry
= true;
5903 FOR_BB_INSNS_SAFE (bb
, insn
, curr
)
5905 if (!NONDEBUG_INSN_P (insn
))
5908 if (GET_CODE (PATTERN (insn
)) == USE
5909 || GET_CODE (PATTERN (insn
)) == CLOBBER
)
5912 HARD_REG_SET defs
, uses
;
5913 CLEAR_HARD_REG_SET (defs
);
5914 CLEAR_HARD_REG_SET (uses
);
5915 note_stores (insn
, record_hard_reg_sets
, &defs
);
5916 note_uses (&PATTERN (insn
), record_hard_reg_uses
, &uses
);
5918 bool exec_lo_def_p
= TEST_HARD_REG_BIT (defs
, EXEC_LO_REG
);
5919 bool exec_hi_def_p
= TEST_HARD_REG_BIT (defs
, EXEC_HI_REG
);
5920 bool exec_used
= (hard_reg_set_intersect_p
5921 (uses
, reg_class_contents
[(int) EXEC_MASK_REG
])
5922 || TEST_HARD_REG_BIT (uses
, EXECZ_REG
));
5924 /* Check the instruction for implicit setting of EXEC via an
5926 attr_exec exec_attr
= get_attr_exec (insn
);
5936 /* Instructions that do not involve memory accesses only require
5937 bit 0 of EXEC to be set. */
5938 if (gcn_vmem_insn_p (get_attr_type (insn
))
5939 || get_attr_type (insn
) == TYPE_DS
)
5942 new_exec
= curr_exec
| 1;
5949 default: /* Auto-detect what setting is appropriate. */
5953 /* If EXEC is referenced explicitly then we don't need to do
5954 anything to set it, so we're done. */
5958 /* Scan the insn for VGPRs defs or uses. The mode determines
5959 what kind of exec is needed. */
5960 subrtx_iterator::array_type array
;
5961 FOR_EACH_SUBRTX (iter
, array
, PATTERN (insn
), NONCONST
)
5963 const_rtx x
= *iter
;
5964 if (REG_P (x
) && VGPR_REGNO_P (REGNO (x
)))
5966 if (VECTOR_MODE_P (GET_MODE (x
)))
5968 int vf
= GET_MODE_NUNITS (GET_MODE (x
));
5969 new_exec
= MAX ((uint64_t)new_exec
,
5970 0xffffffffffffffffUL
>> (64-vf
));
5972 else if (new_exec
== 0)
5980 if (new_exec
&& (!curr_exec_known
|| new_exec
!= curr_exec
))
5983 emit_move_insn (exec_reg
, GEN_INT (new_exec
));
5984 rtx_insn
*seq
= get_insns ();
5986 emit_insn_before (seq
, insn
);
5988 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
5989 fprintf (dump_file
, "Setting EXEC to %ld before insn %d.\n",
5990 new_exec
, INSN_UID (insn
));
5992 curr_exec
= new_exec
;
5993 curr_exec_explicit
= false;
5994 curr_exec_known
= true;
5996 else if (new_exec
&& dump_file
&& (dump_flags
& TDF_DETAILS
))
5998 fprintf (dump_file
, "Exec already is %ld before insn %d.\n",
5999 new_exec
, INSN_UID (insn
));
6002 /* The state of the EXEC register is unknown after a
6005 curr_exec_known
= false;
6007 /* Handle explicit uses of EXEC. If the instruction is a partial
6008 explicit definition of EXEC, then treat it as an explicit use of
6010 if (exec_used
|| exec_lo_def_p
!= exec_hi_def_p
)
6012 /* An instruction that explicitly uses EXEC should not also
6013 implicitly define it. */
6014 gcc_assert (!exec_used
|| !new_exec
);
6016 if (!curr_exec_known
|| !curr_exec_explicit
)
6018 /* Restore the previous explicitly defined value. */
6019 curr_exec
= gcn_restore_exec (insn
, last_exec_def
,
6020 curr_exec
, curr_exec_known
,
6021 last_exec_def_saved
);
6022 curr_exec_explicit
= true;
6023 curr_exec_known
= true;
6027 /* Handle explicit definitions of EXEC. */
6028 if (exec_lo_def_p
|| exec_hi_def_p
)
6030 last_exec_def
= insn
;
6031 last_exec_def_saved
= false;
6032 curr_exec
= gcn_insn_exec_value (insn
);
6033 curr_exec_explicit
= true;
6034 curr_exec_known
= true;
6036 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
6038 "Found %s definition of EXEC at insn %d.\n",
6039 exec_lo_def_p
== exec_hi_def_p
? "full" : "partial",
6043 exec_live_on_entry
= false;
6046 COPY_REG_SET (&live
, DF_LR_OUT (bb
));
6047 df_simulate_initialize_backwards (bb
, &live
);
6049 /* If EXEC is live after the basic block, restore the value of EXEC
6050 at the end of the block. */
6051 if ((REGNO_REG_SET_P (&live
, EXEC_LO_REG
)
6052 || REGNO_REG_SET_P (&live
, EXEC_HI_REG
))
6053 && (!curr_exec_known
|| !curr_exec_explicit
|| exec_live_on_entry
))
6055 rtx_insn
*end_insn
= BB_END (bb
);
6057 /* If the instruction is not a jump instruction, do the restore
6058 after the last instruction in the basic block. */
6059 if (NONJUMP_INSN_P (end_insn
))
6060 end_insn
= NEXT_INSN (end_insn
);
6062 gcn_restore_exec (end_insn
, last_exec_def
, curr_exec
,
6063 curr_exec_known
, last_exec_def_saved
);
6067 CLEAR_REG_SET (&live
);
6069 /* "Manually Inserted Wait States (NOPs)."
6071 GCN hardware detects most kinds of register dependencies, but there
6072 are some exceptions documented in the ISA manual. This pass
6073 detects the missed cases, and inserts the documented number of NOPs
6074 required for correct execution. */
6076 const int max_waits
= 5;
6081 attr_delayeduse delayeduse
;
6082 HARD_REG_SET writes
;
6087 for (int i
= 0; i
< max_waits
; i
++)
6088 back
[i
].insn
= NULL
;
6090 rtx_insn
*insn
, *last_insn
= NULL
;
6091 for (insn
= get_insns (); insn
!= 0; insn
= NEXT_INSN (insn
))
6093 if (!NONDEBUG_INSN_P (insn
))
6096 if (GET_CODE (PATTERN (insn
)) == USE
6097 || GET_CODE (PATTERN (insn
)) == CLOBBER
)
6100 attr_type itype
= get_attr_type (insn
);
6101 attr_unit iunit
= get_attr_unit (insn
);
6102 attr_delayeduse idelayeduse
= get_attr_delayeduse (insn
);
6103 int ivccwait
= get_attr_vccwait (insn
);
6104 HARD_REG_SET ireads
, iwrites
;
6105 CLEAR_HARD_REG_SET (ireads
);
6106 CLEAR_HARD_REG_SET (iwrites
);
6107 note_stores (insn
, record_hard_reg_sets
, &iwrites
);
6108 note_uses (&PATTERN (insn
), record_hard_reg_uses
, &ireads
);
6110 /* Scan recent previous instructions for dependencies not handled in
6113 for (int i
= oldest
; i
< oldest
+ max_waits
; i
++)
6115 struct ilist
*prev_insn
= &back
[i
% max_waits
];
6117 if (!prev_insn
->insn
)
6120 /* VALU writes SGPR followed by VMEM reading the same SGPR
6121 requires 5 wait states. */
6122 if ((prev_insn
->age
+ nops_rqd
) < 5
6123 && prev_insn
->unit
== UNIT_VECTOR
6124 && gcn_vmem_insn_p (itype
))
6126 HARD_REG_SET regs
= prev_insn
->writes
& ireads
;
6127 if (hard_reg_set_intersect_p
6128 (regs
, reg_class_contents
[(int) SGPR_REGS
]))
6129 nops_rqd
= 5 - prev_insn
->age
;
6132 /* VALU sets VCC/EXEC followed by VALU uses VCCZ/EXECZ
6133 requires 5 wait states. */
6134 if ((prev_insn
->age
+ nops_rqd
) < 5
6135 && prev_insn
->unit
== UNIT_VECTOR
6136 && iunit
== UNIT_VECTOR
6137 && ((hard_reg_set_intersect_p
6139 reg_class_contents
[(int) EXEC_MASK_REG
])
6140 && TEST_HARD_REG_BIT (ireads
, EXECZ_REG
))
6142 (hard_reg_set_intersect_p
6144 reg_class_contents
[(int) VCC_CONDITIONAL_REG
])
6145 && TEST_HARD_REG_BIT (ireads
, VCCZ_REG
))))
6146 nops_rqd
= 5 - prev_insn
->age
;
6148 /* VALU writes SGPR/VCC followed by v_{read,write}lane using
6149 SGPR/VCC as lane select requires 4 wait states. */
6150 if ((prev_insn
->age
+ nops_rqd
) < 4
6151 && prev_insn
->unit
== UNIT_VECTOR
6152 && get_attr_laneselect (insn
) == LANESELECT_YES
)
6154 HARD_REG_SET regs
= prev_insn
->writes
& ireads
;
6155 if (hard_reg_set_intersect_p
6156 (regs
, reg_class_contents
[(int) SGPR_REGS
])
6157 || hard_reg_set_intersect_p
6158 (regs
, reg_class_contents
[(int) VCC_CONDITIONAL_REG
]))
6159 nops_rqd
= 4 - prev_insn
->age
;
6162 /* VALU writes VGPR followed by VALU_DPP reading that VGPR
6163 requires 2 wait states. */
6164 if ((prev_insn
->age
+ nops_rqd
) < 2
6165 && prev_insn
->unit
== UNIT_VECTOR
6166 && itype
== TYPE_VOP_DPP
)
6168 HARD_REG_SET regs
= prev_insn
->writes
& ireads
;
6169 if (hard_reg_set_intersect_p
6170 (regs
, reg_class_contents
[(int) VGPR_REGS
]))
6171 nops_rqd
= 2 - prev_insn
->age
;
6174 /* Store that requires input registers are not overwritten by
6175 following instruction. */
6176 if ((prev_insn
->age
+ nops_rqd
) < 1
6177 && prev_insn
->delayeduse
== DELAYEDUSE_YES
6178 && ((hard_reg_set_intersect_p
6179 (prev_insn
->reads
, iwrites
))))
6180 nops_rqd
= 1 - prev_insn
->age
;
6182 /* Instruction that requires VCC is not written too close before
6184 if (prev_insn
->age
< ivccwait
6185 && (hard_reg_set_intersect_p
6187 reg_class_contents
[(int)VCC_CONDITIONAL_REG
])))
6188 nops_rqd
= ivccwait
- prev_insn
->age
;
6191 /* Insert the required number of NOPs. */
6192 for (int i
= nops_rqd
; i
> 0; i
--)
6193 emit_insn_after (gen_nop (), last_insn
);
6195 /* Age the previous instructions. We can also ignore writes to
6196 registers subsequently overwritten. */
6197 HARD_REG_SET written
;
6198 CLEAR_HARD_REG_SET (written
);
6199 for (int i
= oldest
+ max_waits
- 1; i
> oldest
; i
--)
6201 struct ilist
*prev_insn
= &back
[i
% max_waits
];
6203 /* Assume all instructions are equivalent to one "wait", the same
6204 as s_nop. This is probably true for SALU, but not VALU (which
6205 may take longer), so this is not optimal. However, AMD do
6206 not publish the cycle times for instructions. */
6207 prev_insn
->age
+= 1 + nops_rqd
;
6210 prev_insn
->writes
&= ~written
;
6213 /* Track the current instruction as a previous instruction. */
6214 back
[oldest
].insn
= insn
;
6215 back
[oldest
].unit
= iunit
;
6216 back
[oldest
].delayeduse
= idelayeduse
;
6217 back
[oldest
].writes
= iwrites
;
6218 back
[oldest
].reads
= ireads
;
6219 back
[oldest
].age
= 0;
6220 oldest
= (oldest
+ 1) % max_waits
;
6227 /* {{{ OpenACC / OpenMP. */
6229 #define GCN_DEFAULT_GANGS 0 /* Choose at runtime. */
6230 #define GCN_DEFAULT_WORKERS 0 /* Choose at runtime. */
6231 #define GCN_DEFAULT_VECTORS 1 /* Use autovectorization only, for now. */
6233 /* Implement TARGET_GOACC_VALIDATE_DIMS.
6235 Check the launch dimensions provided for an OpenACC compute
6236 region, or routine. */
6239 gcn_goacc_validate_dims (tree decl
, int dims
[], int fn_level
,
6242 bool changed
= false;
6243 const int max_workers
= 16;
6245 /* The vector size must appear to be 64, to the user, unless this is a
6246 SEQ routine. The real, internal value is always 1, which means use
6247 autovectorization, but the user should not see that. */
6248 if (fn_level
<= GOMP_DIM_VECTOR
&& fn_level
>= -1
6249 && dims
[GOMP_DIM_VECTOR
] >= 0)
6251 if (fn_level
< 0 && dims
[GOMP_DIM_VECTOR
] >= 0
6252 && dims
[GOMP_DIM_VECTOR
] != 64)
6253 warning_at (decl
? DECL_SOURCE_LOCATION (decl
) : UNKNOWN_LOCATION
,
6255 (dims
[GOMP_DIM_VECTOR
]
6256 ? G_("using %<vector_length (64)%>, ignoring %d")
6257 : G_("using %<vector_length (64)%>, "
6258 "ignoring runtime setting")),
6259 dims
[GOMP_DIM_VECTOR
]);
6260 dims
[GOMP_DIM_VECTOR
] = 1;
6264 /* Check the num workers is not too large. */
6265 if (dims
[GOMP_DIM_WORKER
] > max_workers
)
6267 warning_at (decl
? DECL_SOURCE_LOCATION (decl
) : UNKNOWN_LOCATION
,
6269 "using %<num_workers (%d)%>, ignoring %d",
6270 max_workers
, dims
[GOMP_DIM_WORKER
]);
6271 dims
[GOMP_DIM_WORKER
] = max_workers
;
6275 /* Set global defaults. */
6278 dims
[GOMP_DIM_VECTOR
] = GCN_DEFAULT_VECTORS
;
6279 if (dims
[GOMP_DIM_WORKER
] < 0)
6280 dims
[GOMP_DIM_WORKER
] = GCN_DEFAULT_WORKERS
;
6281 if (dims
[GOMP_DIM_GANG
] < 0)
6282 dims
[GOMP_DIM_GANG
] = GCN_DEFAULT_GANGS
;
6289 /* Helper function for oacc_dim_size instruction.
6290 Also used for OpenMP, via builtin_gcn_dim_size, and the omp_gcn pass. */
6293 gcn_oacc_dim_size (int dim
)
6295 if (dim
< 0 || dim
> 2)
6296 error ("offload dimension out of range (%d)", dim
);
6298 /* Vectors are a special case. */
6300 return const1_rtx
; /* Think of this as 1 times 64. */
6302 static int offset
[] = {
6303 /* Offsets into dispatch packet. */
6304 12, /* X dim = Gang / Team / Work-group. */
6305 20, /* Z dim = Worker / Thread / Wavefront. */
6306 16 /* Y dim = Vector / SIMD / Work-item. */
6308 rtx addr
= gen_rtx_PLUS (DImode
,
6309 gen_rtx_REG (DImode
,
6310 cfun
->machine
->args
.
6311 reg
[DISPATCH_PTR_ARG
]),
6312 GEN_INT (offset
[dim
]));
6313 rtx mem
= gen_rtx_MEM (SImode
, addr
);
6314 set_mem_addr_space (mem
, ADDR_SPACE_SCALAR_FLAT
);
6318 /* Helper function for oacc_dim_pos instruction.
6319 Also used for OpenMP, via builtin_gcn_dim_pos, and the omp_gcn pass. */
6322 gcn_oacc_dim_pos (int dim
)
6324 if (dim
< 0 || dim
> 2)
6325 error ("offload dimension out of range (%d)", dim
);
6327 static const int reg
[] = {
6328 WORKGROUP_ID_X_ARG
, /* Gang / Team / Work-group. */
6329 WORK_ITEM_ID_Z_ARG
, /* Worker / Thread / Wavefront. */
6330 WORK_ITEM_ID_Y_ARG
/* Vector / SIMD / Work-item. */
6333 int reg_num
= cfun
->machine
->args
.reg
[reg
[dim
]];
6335 /* The information must have been requested by the kernel. */
6336 gcc_assert (reg_num
>= 0);
6338 return gen_rtx_REG (SImode
, reg_num
);
6341 /* Implement TARGET_GOACC_FORK_JOIN. */
6344 gcn_fork_join (gcall
*call
, const int dims
[], bool is_fork
)
6346 tree arg
= gimple_call_arg (call
, 2);
6347 unsigned axis
= TREE_INT_CST_LOW (arg
);
6349 if (!is_fork
&& axis
== GOMP_DIM_WORKER
&& dims
[axis
] != 1)
6355 /* Implement ???????
6356 FIXME make this a real hook.
6358 Adjust FNDECL such that options inherited from the host compiler
6359 are made appropriate for the accelerator compiler. */
6362 gcn_fixup_accel_lto_options (tree fndecl
)
6364 tree func_optimize
= DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl
);
6369 = build_optimization_node (&global_options
, &global_options_set
);
6372 /* If the function changed the optimization levels as well as
6373 setting target options, start with the optimizations
6375 if (func_optimize
!= old_optimize
)
6376 cl_optimization_restore (&global_options
, &global_options_set
,
6377 TREE_OPTIMIZATION (func_optimize
));
6379 gcn_option_override ();
6381 /* The target attributes may also change some optimization flags,
6382 so update the optimization options if necessary. */
6383 new_optimize
= build_optimization_node (&global_options
,
6384 &global_options_set
);
6386 if (old_optimize
!= new_optimize
)
6388 DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl
) = new_optimize
;
6389 cl_optimization_restore (&global_options
, &global_options_set
,
6390 TREE_OPTIMIZATION (old_optimize
));
6394 /* Implement TARGET_GOACC_SHARED_MEM_LAYOUT hook. */
6397 gcn_shared_mem_layout (unsigned HOST_WIDE_INT
*lo
,
6398 unsigned HOST_WIDE_INT
*hi
,
6399 int ARG_UNUSED (dims
[GOMP_DIM_MAX
]),
6400 unsigned HOST_WIDE_INT
6401 ARG_UNUSED (private_size
[GOMP_DIM_MAX
]),
6402 unsigned HOST_WIDE_INT reduction_size
[GOMP_DIM_MAX
])
6404 *lo
= gang_private_size_opt
+ reduction_size
[GOMP_DIM_WORKER
];
6405 /* !!! We can maybe use dims[] to estimate the maximum number of work
6406 groups/wavefronts/etc. we will launch, and therefore tune the maximum
6407 amount of LDS we should use. For now, use a minimal amount to try to
6408 maximise occupancy. */
6410 machine_function
*machfun
= cfun
->machine
;
6411 machfun
->reduction_base
= gang_private_size_opt
;
6412 machfun
->reduction_limit
6413 = gang_private_size_opt
+ reduction_size
[GOMP_DIM_WORKER
];
6417 /* {{{ ASM Output. */
6419 /* Implement TARGET_ASM_FILE_START.
6421 Print assembler file header text. */
6424 output_file_start (void)
6426 /* In HSACOv4 no attribute setting means the binary supports "any" hardware
6428 const char *xnack
= (flag_xnack
== HSACO_ATTR_ON
? ":xnack+"
6429 : flag_xnack
== HSACO_ATTR_OFF
? ":xnack-"
6431 const char *sram_ecc
= (flag_sram_ecc
== HSACO_ATTR_ON
? ":sramecc+"
6432 : flag_sram_ecc
== HSACO_ATTR_OFF
? ":sramecc-"
6438 case PROCESSOR_FIJI
:
6443 case PROCESSOR_VEGA10
:
6447 case PROCESSOR_VEGA20
:
6451 case PROCESSOR_GFX908
:
6454 case PROCESSOR_GFX90a
:
6457 default: gcc_unreachable ();
6460 fprintf(asm_out_file
, "\t.amdgcn_target \"amdgcn-unknown-amdhsa--%s%s%s\"\n",
6461 cpu
, sram_ecc
, xnack
);
6464 /* Implement ASM_DECLARE_FUNCTION_NAME via gcn-hsa.h.
6466 Print the initial definition of a function name.
6468 For GCN kernel entry points this includes all the HSA meta-data, special
6469 alignment constraints that don't apply to regular functions, and magic
6470 comments that pass information to mkoffload. */
6473 gcn_hsa_declare_function_name (FILE *file
, const char *name
, tree
)
6476 bool xnack_enabled
= TARGET_XNACK
;
6478 fputs ("\n\n", file
);
6480 if (cfun
&& cfun
->machine
&& cfun
->machine
->normal_function
)
6482 fputs ("\t.type\t", file
);
6483 assemble_name (file
, name
);
6484 fputs (",@function\n", file
);
6485 assemble_name (file
, name
);
6486 fputs (":\n", file
);
6490 /* Determine count of sgpr/vgpr registers by looking for last
6492 for (sgpr
= 101; sgpr
>= 0; sgpr
--)
6493 if (df_regs_ever_live_p (FIRST_SGPR_REG
+ sgpr
))
6496 for (vgpr
= 255; vgpr
>= 0; vgpr
--)
6497 if (df_regs_ever_live_p (FIRST_VGPR_REG
+ vgpr
))
6501 if (!leaf_function_p ())
6503 /* We can't know how many registers function calls might use. */
6504 if (vgpr
< MAX_NORMAL_VGPR_COUNT
)
6505 vgpr
= MAX_NORMAL_VGPR_COUNT
;
6506 if (sgpr
< MAX_NORMAL_SGPR_COUNT
)
6507 sgpr
= MAX_NORMAL_SGPR_COUNT
;
6510 /* The gfx90a accum_offset field can't represent 0 registers. */
6511 if (gcn_arch
== PROCESSOR_GFX90a
&& vgpr
< 4)
6514 fputs ("\t.rodata\n"
6516 "\t.amdhsa_kernel\t", file
);
6517 assemble_name (file
, name
);
6519 int reg
= FIRST_SGPR_REG
;
6520 for (int a
= 0; a
< GCN_KERNEL_ARG_TYPES
; a
++)
6524 if ((cfun
->machine
->args
.requested
& (1 << a
))
6525 && (gcn_kernel_arg_types
[a
].fixed_regno
< 0))
6528 reg_last
= (reg_first
6529 + (GET_MODE_SIZE (gcn_kernel_arg_types
[a
].mode
)
6530 / UNITS_PER_WORD
) - 1);
6534 if (gcn_kernel_arg_types
[a
].header_pseudo
)
6536 fprintf (file
, "\t %s%s\t%i",
6537 (cfun
->machine
->args
.requested
& (1 << a
)) != 0 ? "" : ";",
6538 gcn_kernel_arg_types
[a
].header_pseudo
,
6539 (cfun
->machine
->args
.requested
& (1 << a
)) != 0);
6540 if (reg_first
!= -1)
6542 fprintf (file
, " ; (");
6543 for (int i
= reg_first
; i
<= reg_last
; ++i
)
6546 fprintf (file
, ", ");
6547 fprintf (file
, "%s", reg_names
[i
]);
6549 fprintf (file
, ")");
6551 fprintf (file
, "\n");
6553 else if (gcn_kernel_arg_types
[a
].fixed_regno
>= 0
6554 && cfun
->machine
->args
.requested
& (1 << a
))
6555 fprintf (file
, "\t ; %s\t%i (%s)\n",
6556 gcn_kernel_arg_types
[a
].name
,
6557 (cfun
->machine
->args
.requested
& (1 << a
)) != 0,
6558 reg_names
[gcn_kernel_arg_types
[a
].fixed_regno
]);
6560 fprintf (file
, "\t .amdhsa_system_vgpr_workitem_id\t%i\n",
6561 (cfun
->machine
->args
.requested
& (1 << WORK_ITEM_ID_Z_ARG
))
6563 : cfun
->machine
->args
.requested
& (1 << WORK_ITEM_ID_Y_ARG
)
6566 "\t .amdhsa_next_free_vgpr\t%i\n"
6567 "\t .amdhsa_next_free_sgpr\t%i\n"
6568 "\t .amdhsa_reserve_vcc\t1\n"
6569 "\t .amdhsa_reserve_flat_scratch\t0\n"
6570 "\t .amdhsa_reserve_xnack_mask\t%i\n"
6571 "\t .amdhsa_private_segment_fixed_size\t0\n"
6572 "\t .amdhsa_group_segment_fixed_size\t%u\n"
6573 "\t .amdhsa_float_denorm_mode_32\t3\n"
6574 "\t .amdhsa_float_denorm_mode_16_64\t3\n",
6579 if (gcn_arch
== PROCESSOR_GFX90a
)
6581 "\t .amdhsa_accum_offset\t%i\n"
6582 "\t .amdhsa_tg_split\t0\n",
6583 (vgpr
+3)&~3); // I think this means the AGPRs come after the VGPRs
6584 fputs ("\t.end_amdhsa_kernel\n", file
);
6587 /* The following is YAML embedded in assembler; tabs are not allowed. */
6588 fputs (" .amdgpu_metadata\n"
6589 " amdhsa.version:\n"
6592 " amdhsa.kernels:\n"
6593 " - .name: ", file
);
6594 assemble_name (file
, name
);
6595 fputs ("\n .symbol: ", file
);
6596 assemble_name (file
, name
);
6599 " .kernarg_segment_size: %i\n"
6600 " .kernarg_segment_align: %i\n"
6601 " .group_segment_fixed_size: %u\n"
6602 " .private_segment_fixed_size: 0\n"
6603 " .wavefront_size: 64\n"
6604 " .sgpr_count: %i\n"
6605 " .vgpr_count: %i\n"
6606 " .max_flat_workgroup_size: 1024\n",
6607 cfun
->machine
->kernarg_segment_byte_size
,
6608 cfun
->machine
->kernarg_segment_alignment
,
6611 if (gcn_arch
== PROCESSOR_GFX90a
)
6612 fprintf (file
, " .agpr_count: 0\n"); // AGPRs are not used, yet
6613 fputs (" .end_amdgpu_metadata\n", file
);
6616 fputs ("\t.text\n", file
);
6617 fputs ("\t.align\t256\n", file
);
6618 fputs ("\t.type\t", file
);
6619 assemble_name (file
, name
);
6620 fputs (",@function\n", file
);
6621 assemble_name (file
, name
);
6622 fputs (":\n", file
);
6624 /* This comment is read by mkoffload. */
6626 fprintf (file
, "\t;; OPENACC-DIMS: %d, %d, %d : %s\n",
6627 oacc_get_fn_dim_size (cfun
->decl
, GOMP_DIM_GANG
),
6628 oacc_get_fn_dim_size (cfun
->decl
, GOMP_DIM_WORKER
),
6629 oacc_get_fn_dim_size (cfun
->decl
, GOMP_DIM_VECTOR
), name
);
6632 /* Implement TARGET_ASM_SELECT_SECTION.
6634 Return the section into which EXP should be placed. */
6637 gcn_asm_select_section (tree exp
, int reloc
, unsigned HOST_WIDE_INT align
)
6639 if (TREE_TYPE (exp
) != error_mark_node
6640 && TYPE_ADDR_SPACE (TREE_TYPE (exp
)) == ADDR_SPACE_LDS
)
6643 return get_section (".lds_bss",
6644 SECTION_WRITE
| SECTION_BSS
| SECTION_DEBUG
,
6647 return get_named_section (exp
, ".lds_bss", reloc
);
6650 return default_elf_select_section (exp
, reloc
, align
);
6653 /* Implement TARGET_ASM_FUNCTION_PROLOGUE.
6655 Emits custom text into the assembler file at the head of each function. */
6658 gcn_target_asm_function_prologue (FILE *file
)
6660 machine_function
*offsets
= gcn_compute_frame_offsets ();
6662 asm_fprintf (file
, "\t; using %s addressing in function\n",
6663 offsets
->use_flat_addressing
? "flat" : "global");
6665 if (offsets
->normal_function
)
6667 asm_fprintf (file
, "\t; frame pointer needed: %s\n",
6668 offsets
->need_frame_pointer
? "true" : "false");
6669 asm_fprintf (file
, "\t; lr needs saving: %s\n",
6670 offsets
->lr_needs_saving
? "true" : "false");
6671 asm_fprintf (file
, "\t; outgoing args size: %wd\n",
6672 offsets
->outgoing_args_size
);
6673 asm_fprintf (file
, "\t; pretend size: %wd\n", offsets
->pretend_size
);
6674 asm_fprintf (file
, "\t; local vars size: %wd\n", offsets
->local_vars
);
6675 asm_fprintf (file
, "\t; callee save size: %wd\n",
6676 offsets
->callee_saves
);
6680 asm_fprintf (file
, "\t; HSA kernel entry point\n");
6681 asm_fprintf (file
, "\t; local vars size: %wd\n", offsets
->local_vars
);
6682 asm_fprintf (file
, "\t; outgoing args size: %wd\n",
6683 offsets
->outgoing_args_size
);
6687 /* Helper function for print_operand and print_operand_address.
6689 Print a register as the assembler requires, according to mode and name. */
6692 print_reg (FILE *file
, rtx x
)
6694 machine_mode mode
= GET_MODE (x
);
6695 if (VECTOR_MODE_P (mode
))
6696 mode
= GET_MODE_INNER (mode
);
6697 if (mode
== BImode
|| mode
== QImode
|| mode
== HImode
|| mode
== SImode
6698 || mode
== HFmode
|| mode
== SFmode
)
6699 fprintf (file
, "%s", reg_names
[REGNO (x
)]);
6700 else if (mode
== DImode
|| mode
== DFmode
)
6702 if (SGPR_REGNO_P (REGNO (x
)))
6703 fprintf (file
, "s[%i:%i]", REGNO (x
) - FIRST_SGPR_REG
,
6704 REGNO (x
) - FIRST_SGPR_REG
+ 1);
6705 else if (VGPR_REGNO_P (REGNO (x
)))
6706 fprintf (file
, "v[%i:%i]", REGNO (x
) - FIRST_VGPR_REG
,
6707 REGNO (x
) - FIRST_VGPR_REG
+ 1);
6708 else if (REGNO (x
) == FLAT_SCRATCH_REG
)
6709 fprintf (file
, "flat_scratch");
6710 else if (REGNO (x
) == EXEC_REG
)
6711 fprintf (file
, "exec");
6712 else if (REGNO (x
) == VCC_LO_REG
)
6713 fprintf (file
, "vcc");
6715 fprintf (file
, "[%s:%s]",
6716 reg_names
[REGNO (x
)], reg_names
[REGNO (x
) + 1]);
6718 else if (mode
== TImode
)
6720 if (SGPR_REGNO_P (REGNO (x
)))
6721 fprintf (file
, "s[%i:%i]", REGNO (x
) - FIRST_SGPR_REG
,
6722 REGNO (x
) - FIRST_SGPR_REG
+ 3);
6723 else if (VGPR_REGNO_P (REGNO (x
)))
6724 fprintf (file
, "v[%i:%i]", REGNO (x
) - FIRST_VGPR_REG
,
6725 REGNO (x
) - FIRST_VGPR_REG
+ 3);
6733 /* Implement TARGET_SECTION_TYPE_FLAGS.
6735 Return a set of section attributes for use by TARGET_ASM_NAMED_SECTION. */
6738 gcn_section_type_flags (tree decl
, const char *name
, int reloc
)
6740 if (strcmp (name
, ".lds_bss") == 0)
6741 return SECTION_WRITE
| SECTION_BSS
| SECTION_DEBUG
;
6743 return default_section_type_flags (decl
, name
, reloc
);
6746 /* Helper function for gcn_asm_output_symbol_ref.
6748 FIXME: This function is used to lay out gang-private variables in LDS
6750 There may be cases in which gang-private variables in different compilation
6751 units could clobber each other. In that case we should be relying on the
6752 linker to lay out gang-private LDS space, but that doesn't appear to be
6753 possible at present. */
6756 gcn_print_lds_decl (FILE *f
, tree var
)
6759 if ((offset
= lds_allocs
.get (var
)))
6760 fprintf (f
, "%u", (unsigned) *offset
);
6763 unsigned HOST_WIDE_INT align
= DECL_ALIGN_UNIT (var
);
6764 tree type
= TREE_TYPE (var
);
6765 unsigned HOST_WIDE_INT size
= tree_to_uhwi (TYPE_SIZE_UNIT (type
));
6766 if (size
> align
&& size
> 4 && align
< 8)
6769 gang_private_hwm
= ((gang_private_hwm
+ align
- 1) & ~(align
- 1));
6771 lds_allocs
.put (var
, gang_private_hwm
);
6772 fprintf (f
, "%u", gang_private_hwm
);
6773 gang_private_hwm
+= size
;
6774 if (gang_private_hwm
> gang_private_size_opt
)
6775 error ("%d bytes of gang-private data-share memory exhausted"
6776 " (increase with %<-mgang-private-size=%d%>, for example)",
6777 gang_private_size_opt
, gang_private_hwm
);
6781 /* Implement ASM_OUTPUT_SYMBOL_REF via gcn-hsa.h. */
6784 gcn_asm_output_symbol_ref (FILE *file
, rtx x
)
6788 && (decl
= SYMBOL_REF_DECL (x
)) != 0
6790 && AS_LDS_P (TYPE_ADDR_SPACE (TREE_TYPE (decl
))))
6792 /* LDS symbols (emitted using this hook) are only used at present
6793 to propagate worker values from an active thread to neutered
6794 threads. Use the same offset for each such block, but don't
6795 use zero because null pointers are used to identify the active
6796 thread in GOACC_single_copy_start calls. */
6797 gcn_print_lds_decl (file
, decl
);
6801 assemble_name (file
, XSTR (x
, 0));
6802 /* FIXME: See above -- this condition is unreachable. */
6804 && (decl
= SYMBOL_REF_DECL (x
)) != 0
6806 && AS_LDS_P (TYPE_ADDR_SPACE (TREE_TYPE (decl
))))
6807 fputs ("@abs32", file
);
6811 /* Implement TARGET_CONSTANT_ALIGNMENT.
6813 Returns the alignment in bits of a constant that is being placed in memory.
6814 CONSTANT is the constant and BASIC_ALIGN is the alignment that the object
6815 would ordinarily have. */
6817 static HOST_WIDE_INT
6818 gcn_constant_alignment (const_tree
ARG_UNUSED (constant
),
6819 HOST_WIDE_INT basic_align
)
6821 return basic_align
> 128 ? basic_align
: 128;
6824 /* Implement PRINT_OPERAND_ADDRESS via gcn.h. */
6827 print_operand_address (FILE *file
, rtx mem
)
6829 gcc_assert (MEM_P (mem
));
6833 addr_space_t as
= MEM_ADDR_SPACE (mem
);
6834 rtx addr
= XEXP (mem
, 0);
6835 gcc_assert (REG_P (addr
) || GET_CODE (addr
) == PLUS
);
6837 if (AS_SCRATCH_P (as
))
6838 switch (GET_CODE (addr
))
6841 print_reg (file
, addr
);
6845 reg
= XEXP (addr
, 0);
6846 offset
= XEXP (addr
, 1);
6847 print_reg (file
, reg
);
6848 if (GET_CODE (offset
) == CONST_INT
)
6849 fprintf (file
, " offset:" HOST_WIDE_INT_PRINT_DEC
, INTVAL (offset
));
6858 else if (AS_ANY_FLAT_P (as
))
6860 if (GET_CODE (addr
) == REG
)
6861 print_reg (file
, addr
);
6864 gcc_assert (TARGET_GCN5_PLUS
);
6865 print_reg (file
, XEXP (addr
, 0));
6868 else if (AS_GLOBAL_P (as
))
6870 gcc_assert (TARGET_GCN5_PLUS
);
6873 rtx vgpr_offset
= NULL_RTX
;
6875 if (GET_CODE (addr
) == PLUS
)
6877 base
= XEXP (addr
, 0);
6879 if (GET_CODE (base
) == PLUS
)
6881 /* (SGPR + VGPR) + CONST */
6882 vgpr_offset
= XEXP (base
, 1);
6883 base
= XEXP (base
, 0);
6887 rtx offset
= XEXP (addr
, 1);
6891 vgpr_offset
= offset
;
6892 else if (CONST_INT_P (offset
))
6893 /* VGPR + CONST or SGPR + CONST */
6896 output_operand_lossage ("bad ADDR_SPACE_GLOBAL address");
6902 if (VGPR_REGNO_P (REGNO (base
)))
6903 print_reg (file
, base
);
6904 else if (SGPR_REGNO_P (REGNO (base
)))
6906 /* The assembler requires a 64-bit VGPR pair here, even though
6907 the offset should be only 32-bit. */
6908 if (vgpr_offset
== NULL_RTX
)
6909 /* In this case, the vector offset is zero, so we use the first
6910 lane of v1, which is initialized to zero. */
6911 fprintf (file
, "v1");
6912 else if (REG_P (vgpr_offset
)
6913 && VGPR_REGNO_P (REGNO (vgpr_offset
)))
6914 fprintf (file
, "v%d", REGNO (vgpr_offset
) - FIRST_VGPR_REG
);
6916 output_operand_lossage ("bad ADDR_SPACE_GLOBAL address");
6920 output_operand_lossage ("bad ADDR_SPACE_GLOBAL address");
6922 else if (AS_ANY_DS_P (as
))
6923 switch (GET_CODE (addr
))
6926 print_reg (file
, addr
);
6930 reg
= XEXP (addr
, 0);
6931 print_reg (file
, reg
);
6939 switch (GET_CODE (addr
))
6942 print_reg (file
, addr
);
6943 fprintf (file
, ", 0");
6947 reg
= XEXP (addr
, 0);
6948 offset
= XEXP (addr
, 1);
6949 print_reg (file
, reg
);
6950 fprintf (file
, ", ");
6951 if (GET_CODE (offset
) == REG
)
6952 print_reg (file
, reg
);
6953 else if (GET_CODE (offset
) == CONST_INT
)
6954 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, INTVAL (offset
));
6965 /* Implement PRINT_OPERAND via gcn.h.
6967 b - print operand size as untyped operand (b8/b16/b32/b64)
6968 B - print operand size as SI/DI untyped operand (b32/b32/b32/b64)
6969 i - print operand size as untyped operand (i16/b32/i64)
6970 I - print operand size as SI/DI untyped operand(i32/b32/i64)
6971 u - print operand size as untyped operand (u16/u32/u64)
6972 U - print operand size as SI/DI untyped operand(u32/u64)
6973 o - print operand size as memory access size for loads
6974 (ubyte/ushort/dword/dwordx2/wordx3/dwordx4)
6975 s - print operand size as memory access size for stores
6976 (byte/short/dword/dwordx2/wordx3/dwordx4)
6977 C - print conditional code for s_cbranch (_sccz/_sccnz/_vccz/_vccnz...)
6978 c - print inverse conditional code for s_cbranch
6979 D - print conditional code for s_cmp (eq_u64/lg_u64...)
6980 E - print conditional code for v_cmp (eq_u64/ne_u64...)
6981 A - print address in formatting suitable for given address space.
6982 O - print offset:n for data share operations.
6983 ^ - print "_co" suffix for GCN5 mnemonics
6984 g - print "glc", if appropriate for given MEM
6985 L - print low-part of a multi-reg value
6986 H - print second part of a multi-reg value (high-part of 2-reg value)
6987 J - print third part of a multi-reg value
6988 K - print fourth part of a multi-reg value
6992 print_operand (FILE *file
, rtx x
, int code
)
6994 int xcode
= x
? GET_CODE (x
) : 0;
6995 bool invert
= false;
6998 /* Instructions have the following suffixes.
6999 If there are two suffixes, the first is the destination type,
7000 and the second is the source type.
7002 B32 Bitfield (untyped data) 32-bit
7003 B64 Bitfield (untyped data) 64-bit
7004 F16 floating-point 16-bit
7005 F32 floating-point 32-bit (IEEE 754 single-precision float)
7006 F64 floating-point 64-bit (IEEE 754 double-precision float)
7007 I16 signed 32-bit integer
7008 I32 signed 32-bit integer
7009 I64 signed 64-bit integer
7010 U16 unsigned 32-bit integer
7011 U32 unsigned 32-bit integer
7012 U64 unsigned 64-bit integer */
7014 /* Print operand size as untyped suffix. */
7018 machine_mode mode
= GET_MODE (x
);
7019 if (VECTOR_MODE_P (mode
))
7020 mode
= GET_MODE_INNER (mode
);
7021 switch (GET_MODE_SIZE (mode
))
7036 output_operand_lossage ("invalid operand %%xn code");
7045 machine_mode mode
= GET_MODE (x
);
7046 if (VECTOR_MODE_P (mode
))
7047 mode
= GET_MODE_INNER (mode
);
7048 switch (GET_MODE_SIZE (mode
))
7059 output_operand_lossage ("invalid operand %%xn code");
7066 fputs ("sext(", file
);
7067 print_operand (file
, x
, 0);
7075 bool signed_p
= code
== 'i';
7076 bool min32_p
= code
== 'I' || code
== 'U';
7078 machine_mode mode
= GET_MODE (x
);
7079 if (VECTOR_MODE_P (mode
))
7080 mode
= GET_MODE_INNER (mode
);
7081 if (mode
== VOIDmode
)
7082 switch (GET_CODE (x
))
7085 s
= signed_p
? "_i32" : "_u32";
7091 output_operand_lossage ("invalid operand %%xn code");
7094 else if (FLOAT_MODE_P (mode
))
7095 switch (GET_MODE_SIZE (mode
))
7107 output_operand_lossage ("invalid operand %%xn code");
7111 switch (GET_MODE_SIZE (mode
))
7116 s
= signed_p
? "_i32" : "_u32";
7119 s
= signed_p
? "_i64" : "_u64";
7122 output_operand_lossage ("invalid operand %%xn code");
7126 switch (GET_MODE_SIZE (mode
))
7129 s
= signed_p
? "_i8" : "_u8";
7132 s
= signed_p
? "_i16" : "_u16";
7135 s
= signed_p
? "_i32" : "_u32";
7138 s
= signed_p
? "_i64" : "_u64";
7141 output_operand_lossage ("invalid operand %%xn code");
7147 /* Print operand size as untyped suffix. */
7151 machine_mode mode
= GET_MODE (x
);
7152 if (VECTOR_MODE_P (mode
))
7153 mode
= GET_MODE_INNER (mode
);
7174 /* Fall-through - the other cases for 'o' are the same as for 's'. */
7180 machine_mode mode
= GET_MODE (x
);
7181 if (VECTOR_MODE_P (mode
))
7182 mode
= GET_MODE_INNER (mode
);
7205 output_operand_lossage ("invalid operand %%xn code");
7214 output_operand_lossage ("invalid %%xn code");
7217 print_operand_address (file
, x
);
7223 output_operand_lossage ("invalid %%xn code");
7226 if (AS_GDS_P (MEM_ADDR_SPACE (x
)))
7227 fprintf (file
, " gds");
7229 rtx x0
= XEXP (x
, 0);
7230 if (AS_GLOBAL_P (MEM_ADDR_SPACE (x
)))
7232 gcc_assert (TARGET_GCN5_PLUS
);
7234 fprintf (file
, ", ");
7237 rtx const_offset
= NULL_RTX
;
7239 if (GET_CODE (base
) == PLUS
)
7241 rtx offset
= XEXP (x0
, 1);
7242 base
= XEXP (x0
, 0);
7244 if (GET_CODE (base
) == PLUS
)
7245 /* (SGPR + VGPR) + CONST */
7246 /* Ignore the VGPR offset for this operand. */
7247 base
= XEXP (base
, 0);
7249 if (CONST_INT_P (offset
))
7250 const_offset
= XEXP (x0
, 1);
7251 else if (REG_P (offset
))
7253 /* Ignore the VGPR offset for this operand. */
7256 output_operand_lossage ("bad ADDR_SPACE_GLOBAL address");
7261 if (VGPR_REGNO_P (REGNO (base
)))
7262 /* The VGPR address is specified in the %A operand. */
7263 fprintf (file
, "off");
7264 else if (SGPR_REGNO_P (REGNO (base
)))
7265 print_reg (file
, base
);
7267 output_operand_lossage ("bad ADDR_SPACE_GLOBAL address");
7270 output_operand_lossage ("bad ADDR_SPACE_GLOBAL address");
7272 if (const_offset
!= NULL_RTX
)
7273 fprintf (file
, " offset:" HOST_WIDE_INT_PRINT_DEC
,
7274 INTVAL (const_offset
));
7279 if (GET_CODE (x0
) == REG
)
7281 if (GET_CODE (x0
) != PLUS
)
7283 output_operand_lossage ("invalid %%xn code");
7286 rtx val
= XEXP (x0
, 1);
7287 if (GET_CODE (val
) == CONST_VECTOR
)
7288 val
= CONST_VECTOR_ELT (val
, 0);
7289 if (GET_CODE (val
) != CONST_INT
)
7291 output_operand_lossage ("invalid %%xn code");
7294 fprintf (file
, " offset:" HOST_WIDE_INT_PRINT_DEC
, INTVAL (val
));
7305 if ((xcode
!= EQ
&& xcode
!= NE
) || !REG_P (XEXP (x
, 0)))
7307 output_operand_lossage ("invalid %%xn code");
7310 switch (REGNO (XEXP (x
, 0)))
7317 /* For some reason llvm-mc insists on scc0 instead of sccz. */
7325 output_operand_lossage ("invalid %%xn code");
7329 if (xcode
== (invert
? NE
: EQ
))
7330 fputc (num
? '0' : 'z', file
);
7332 fputs (num
? "1" : "nz", file
);
7338 bool cmp_signed
= false;
7376 output_operand_lossage ("invalid %%xn code");
7380 fputc (cmp_signed
? 'i' : 'u', file
);
7382 machine_mode mode
= GET_MODE (XEXP (x
, 0));
7384 if (mode
== VOIDmode
)
7385 mode
= GET_MODE (XEXP (x
, 1));
7387 /* If both sides are constants, then assume the instruction is in
7388 SImode since s_cmp can only do integer compares. */
7389 if (mode
== VOIDmode
)
7392 switch (GET_MODE_SIZE (mode
))
7401 output_operand_lossage ("invalid operand %%xn code");
7410 bool cmp_signed
= false;
7411 machine_mode mode
= GET_MODE (XEXP (x
, 0));
7413 if (mode
== VOIDmode
)
7414 mode
= GET_MODE (XEXP (x
, 1));
7416 /* If both sides are constants, assume the instruction is in SFmode
7417 if either operand is floating point, otherwise assume SImode. */
7418 if (mode
== VOIDmode
)
7420 if (GET_CODE (XEXP (x
, 0)) == CONST_DOUBLE
7421 || GET_CODE (XEXP (x
, 1)) == CONST_DOUBLE
)
7427 /* Use the same format code for vector comparisons. */
7428 if (GET_MODE_CLASS (mode
) == MODE_VECTOR_FLOAT
7429 || GET_MODE_CLASS (mode
) == MODE_VECTOR_INT
)
7430 mode
= GET_MODE_INNER (mode
);
7432 bool float_p
= GET_MODE_CLASS (mode
) == MODE_FLOAT
;
7440 s
= float_p
? "_neq_" : "_ne_";
7495 output_operand_lossage ("invalid %%xn code");
7499 fputc (float_p
? 'f' : cmp_signed
? 'i' : 'u', file
);
7501 switch (GET_MODE_SIZE (mode
))
7504 output_operand_lossage ("operand %%xn code invalid for QImode");
7516 output_operand_lossage ("invalid operand %%xn code");
7523 print_operand (file
, gcn_operand_part (GET_MODE (x
), x
, 0), 0);
7526 print_operand (file
, gcn_operand_part (GET_MODE (x
), x
, 1), 0);
7529 print_operand (file
, gcn_operand_part (GET_MODE (x
), x
, 2), 0);
7532 print_operand (file
, gcn_operand_part (GET_MODE (x
), x
, 3), 0);
7535 /* Print a scalar register number as an integer. Temporary hack. */
7536 gcc_assert (REG_P (x
));
7537 fprintf (file
, "%u", (int) REGNO (x
));
7540 /* Print a vector register number as an integer. Temporary hack. */
7541 gcc_assert (REG_P (x
));
7542 fprintf (file
, "%u", (int) REGNO (x
) - FIRST_VGPR_REG
);
7546 print_reg (file
, x
);
7547 else if (xcode
== MEM
)
7548 output_address (GET_MODE (x
), x
);
7549 else if (xcode
== CONST_INT
)
7550 fprintf (file
, "%i", (int) INTVAL (x
));
7551 else if (xcode
== CONST_VECTOR
)
7552 print_operand (file
, CONST_VECTOR_ELT (x
, 0), code
);
7553 else if (xcode
== CONST_DOUBLE
)
7556 switch (gcn_inline_fp_constant_p (x
, false))
7586 rtx ix
= simplify_gen_subreg (GET_MODE (x
) == DFmode
7588 x
, GET_MODE (x
), 0);
7590 print_operand (file
, ix
, code
);
7592 output_operand_lossage ("invalid fp constant");
7596 fprintf (file
, str
);
7600 output_addr_const (file
, x
);
7603 if (TARGET_GCN5_PLUS
)
7604 fputs ("_co", file
);
7607 gcc_assert (xcode
== MEM
);
7608 if (MEM_VOLATILE_P (x
))
7609 fputs (" glc", file
);
7612 output_operand_lossage ("invalid %%xn code");
7617 /* Implement DEBUGGER_REGNO macro.
7619 Return the DWARF register number that corresponds to the GCC internal
7623 gcn_dwarf_register_number (unsigned int regno
)
7625 /* Registers defined in DWARF. */
7626 if (regno
== EXEC_LO_REG
)
7628 /* We need to use a more complex DWARF expression for this
7629 else if (regno == EXEC_HI_REG)
7631 else if (regno
== VCC_LO_REG
)
7633 /* We need to use a more complex DWARF expression for this
7634 else if (regno == VCC_HI_REG)
7636 else if (regno
== SCC_REG
)
7638 else if (regno
== DWARF_LINK_REGISTER
)
7640 else if (SGPR_REGNO_P (regno
))
7642 if (regno
- FIRST_SGPR_REG
< 64)
7643 return (regno
- FIRST_SGPR_REG
+ 32);
7645 return (regno
- FIRST_SGPR_REG
+ 1024);
7647 else if (VGPR_REGNO_P (regno
))
7648 return (regno
- FIRST_VGPR_REG
+ 2560);
7650 /* Otherwise, there's nothing sensible to do. */
7651 return regno
+ 100000;
7654 /* Implement TARGET_DWARF_REGISTER_SPAN.
7656 DImode and Vector DImode require additional registers. */
7659 gcn_dwarf_register_span (rtx rtl
)
7661 machine_mode mode
= GET_MODE (rtl
);
7663 if (VECTOR_MODE_P (mode
))
7664 mode
= GET_MODE_INNER (mode
);
7666 if (GET_MODE_SIZE (mode
) != 8)
7669 unsigned regno
= REGNO (rtl
);
7671 if (regno
== DWARF_LINK_REGISTER
)
7674 rtx p
= gen_rtx_PARALLEL (VOIDmode
, rtvec_alloc (2));
7675 XVECEXP (p
, 0, 0) = gen_rtx_REG (SImode
, regno
);
7676 XVECEXP (p
, 0, 1) = gen_rtx_REG (SImode
, regno
+ 1);
7682 /* {{{ TARGET hook overrides. */
7684 #undef TARGET_ADDR_SPACE_ADDRESS_MODE
7685 #define TARGET_ADDR_SPACE_ADDRESS_MODE gcn_addr_space_address_mode
7686 #undef TARGET_ADDR_SPACE_DEBUG
7687 #define TARGET_ADDR_SPACE_DEBUG gcn_addr_space_debug
7688 #undef TARGET_ADDR_SPACE_LEGITIMATE_ADDRESS_P
7689 #define TARGET_ADDR_SPACE_LEGITIMATE_ADDRESS_P \
7690 gcn_addr_space_legitimate_address_p
7691 #undef TARGET_ADDR_SPACE_LEGITIMIZE_ADDRESS
7692 #define TARGET_ADDR_SPACE_LEGITIMIZE_ADDRESS gcn_addr_space_legitimize_address
7693 #undef TARGET_ADDR_SPACE_POINTER_MODE
7694 #define TARGET_ADDR_SPACE_POINTER_MODE gcn_addr_space_pointer_mode
7695 #undef TARGET_ADDR_SPACE_SUBSET_P
7696 #define TARGET_ADDR_SPACE_SUBSET_P gcn_addr_space_subset_p
7697 #undef TARGET_ADDR_SPACE_CONVERT
7698 #define TARGET_ADDR_SPACE_CONVERT gcn_addr_space_convert
7699 #undef TARGET_ARG_PARTIAL_BYTES
7700 #define TARGET_ARG_PARTIAL_BYTES gcn_arg_partial_bytes
7701 #undef TARGET_ASM_ALIGNED_DI_OP
7702 #define TARGET_ASM_ALIGNED_DI_OP "\t.8byte\t"
7703 #undef TARGET_ASM_FILE_START
7704 #define TARGET_ASM_FILE_START output_file_start
7705 #undef TARGET_ASM_FUNCTION_PROLOGUE
7706 #define TARGET_ASM_FUNCTION_PROLOGUE gcn_target_asm_function_prologue
7707 #undef TARGET_ASM_SELECT_SECTION
7708 #define TARGET_ASM_SELECT_SECTION gcn_asm_select_section
7709 #undef TARGET_ASM_TRAMPOLINE_TEMPLATE
7710 #define TARGET_ASM_TRAMPOLINE_TEMPLATE gcn_asm_trampoline_template
7711 #undef TARGET_ATTRIBUTE_TABLE
7712 #define TARGET_ATTRIBUTE_TABLE gcn_attribute_table
7713 #undef TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_MODES
7714 #define TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_MODES \
7715 gcn_autovectorize_vector_modes
7716 #undef TARGET_BUILTIN_DECL
7717 #define TARGET_BUILTIN_DECL gcn_builtin_decl
7718 #undef TARGET_CAN_CHANGE_MODE_CLASS
7719 #define TARGET_CAN_CHANGE_MODE_CLASS gcn_can_change_mode_class
7720 #undef TARGET_CAN_ELIMINATE
7721 #define TARGET_CAN_ELIMINATE gcn_can_eliminate_p
7722 #undef TARGET_CANNOT_COPY_INSN_P
7723 #define TARGET_CANNOT_COPY_INSN_P gcn_cannot_copy_insn_p
7724 #undef TARGET_CLASS_LIKELY_SPILLED_P
7725 #define TARGET_CLASS_LIKELY_SPILLED_P gcn_class_likely_spilled_p
7726 #undef TARGET_CLASS_MAX_NREGS
7727 #define TARGET_CLASS_MAX_NREGS gcn_class_max_nregs
7728 #undef TARGET_CONDITIONAL_REGISTER_USAGE
7729 #define TARGET_CONDITIONAL_REGISTER_USAGE gcn_conditional_register_usage
7730 #undef TARGET_CONSTANT_ALIGNMENT
7731 #define TARGET_CONSTANT_ALIGNMENT gcn_constant_alignment
7732 #undef TARGET_DEBUG_UNWIND_INFO
7733 #define TARGET_DEBUG_UNWIND_INFO gcn_debug_unwind_info
7734 #undef TARGET_DWARF_REGISTER_SPAN
7735 #define TARGET_DWARF_REGISTER_SPAN gcn_dwarf_register_span
7736 #undef TARGET_EMUTLS_VAR_INIT
7737 #define TARGET_EMUTLS_VAR_INIT gcn_emutls_var_init
7738 #undef TARGET_EXPAND_BUILTIN
7739 #define TARGET_EXPAND_BUILTIN gcn_expand_builtin
7740 #undef TARGET_EXPAND_DIVMOD_LIBFUNC
7741 #define TARGET_EXPAND_DIVMOD_LIBFUNC gcn_expand_divmod_libfunc
7742 #undef TARGET_FRAME_POINTER_REQUIRED
7743 #define TARGET_FRAME_POINTER_REQUIRED gcn_frame_pointer_rqd
7744 #undef TARGET_FUNCTION_ARG
7745 #undef TARGET_FUNCTION_ARG_ADVANCE
7746 #define TARGET_FUNCTION_ARG_ADVANCE gcn_function_arg_advance
7747 #define TARGET_FUNCTION_ARG gcn_function_arg
7748 #undef TARGET_FUNCTION_VALUE
7749 #define TARGET_FUNCTION_VALUE gcn_function_value
7750 #undef TARGET_FUNCTION_VALUE_REGNO_P
7751 #define TARGET_FUNCTION_VALUE_REGNO_P gcn_function_value_regno_p
7752 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
7753 #define TARGET_GIMPLIFY_VA_ARG_EXPR gcn_gimplify_va_arg_expr
7754 #undef TARGET_OMP_DEVICE_KIND_ARCH_ISA
7755 #define TARGET_OMP_DEVICE_KIND_ARCH_ISA gcn_omp_device_kind_arch_isa
7756 #undef TARGET_GOACC_ADJUST_PRIVATE_DECL
7757 #define TARGET_GOACC_ADJUST_PRIVATE_DECL gcn_goacc_adjust_private_decl
7758 #undef TARGET_GOACC_CREATE_WORKER_BROADCAST_RECORD
7759 #define TARGET_GOACC_CREATE_WORKER_BROADCAST_RECORD \
7760 gcn_goacc_create_worker_broadcast_record
7761 #undef TARGET_GOACC_FORK_JOIN
7762 #define TARGET_GOACC_FORK_JOIN gcn_fork_join
7763 #undef TARGET_GOACC_REDUCTION
7764 #define TARGET_GOACC_REDUCTION gcn_goacc_reduction
7765 #undef TARGET_GOACC_VALIDATE_DIMS
7766 #define TARGET_GOACC_VALIDATE_DIMS gcn_goacc_validate_dims
7767 #undef TARGET_GOACC_SHARED_MEM_LAYOUT
7768 #define TARGET_GOACC_SHARED_MEM_LAYOUT gcn_shared_mem_layout
7769 #undef TARGET_HARD_REGNO_MODE_OK
7770 #define TARGET_HARD_REGNO_MODE_OK gcn_hard_regno_mode_ok
7771 #undef TARGET_HARD_REGNO_NREGS
7772 #define TARGET_HARD_REGNO_NREGS gcn_hard_regno_nregs
7773 #undef TARGET_HAVE_SPECULATION_SAFE_VALUE
7774 #define TARGET_HAVE_SPECULATION_SAFE_VALUE speculation_safe_value_not_needed
7775 #undef TARGET_INIT_BUILTINS
7776 #define TARGET_INIT_BUILTINS gcn_init_builtins
7777 #undef TARGET_INIT_LIBFUNCS
7778 #define TARGET_INIT_LIBFUNCS gcn_init_libfuncs
7779 #undef TARGET_IRA_CHANGE_PSEUDO_ALLOCNO_CLASS
7780 #define TARGET_IRA_CHANGE_PSEUDO_ALLOCNO_CLASS \
7781 gcn_ira_change_pseudo_allocno_class
7782 #undef TARGET_LEGITIMATE_CONSTANT_P
7783 #define TARGET_LEGITIMATE_CONSTANT_P gcn_legitimate_constant_p
7784 #undef TARGET_LIBC_HAS_FUNCTION
7785 #define TARGET_LIBC_HAS_FUNCTION gcn_libc_has_function
7787 #define TARGET_LRA_P hook_bool_void_true
7788 #undef TARGET_MACHINE_DEPENDENT_REORG
7789 #define TARGET_MACHINE_DEPENDENT_REORG gcn_md_reorg
7790 #undef TARGET_MEMORY_MOVE_COST
7791 #define TARGET_MEMORY_MOVE_COST gcn_memory_move_cost
7792 #undef TARGET_MODES_TIEABLE_P
7793 #define TARGET_MODES_TIEABLE_P gcn_modes_tieable_p
7794 #undef TARGET_OPTION_OVERRIDE
7795 #define TARGET_OPTION_OVERRIDE gcn_option_override
7796 #undef TARGET_PRETEND_OUTGOING_VARARGS_NAMED
7797 #define TARGET_PRETEND_OUTGOING_VARARGS_NAMED \
7798 gcn_pretend_outgoing_varargs_named
7799 #undef TARGET_PROMOTE_FUNCTION_MODE
7800 #define TARGET_PROMOTE_FUNCTION_MODE gcn_promote_function_mode
7801 #undef TARGET_REGISTER_MOVE_COST
7802 #define TARGET_REGISTER_MOVE_COST gcn_register_move_cost
7803 #undef TARGET_RETURN_IN_MEMORY
7804 #define TARGET_RETURN_IN_MEMORY gcn_return_in_memory
7805 #undef TARGET_RTX_COSTS
7806 #define TARGET_RTX_COSTS gcn_rtx_costs
7807 #undef TARGET_SECONDARY_RELOAD
7808 #define TARGET_SECONDARY_RELOAD gcn_secondary_reload
7809 #undef TARGET_SECTION_TYPE_FLAGS
7810 #define TARGET_SECTION_TYPE_FLAGS gcn_section_type_flags
7811 #undef TARGET_SCALAR_MODE_SUPPORTED_P
7812 #define TARGET_SCALAR_MODE_SUPPORTED_P gcn_scalar_mode_supported_p
7813 #undef TARGET_SIMD_CLONE_ADJUST
7814 #define TARGET_SIMD_CLONE_ADJUST gcn_simd_clone_adjust
7815 #undef TARGET_SIMD_CLONE_COMPUTE_VECSIZE_AND_SIMDLEN
7816 #define TARGET_SIMD_CLONE_COMPUTE_VECSIZE_AND_SIMDLEN \
7817 gcn_simd_clone_compute_vecsize_and_simdlen
7818 #undef TARGET_SIMD_CLONE_USABLE
7819 #define TARGET_SIMD_CLONE_USABLE gcn_simd_clone_usable
7820 #undef TARGET_SMALL_REGISTER_CLASSES_FOR_MODE_P
7821 #define TARGET_SMALL_REGISTER_CLASSES_FOR_MODE_P \
7822 gcn_small_register_classes_for_mode_p
7823 #undef TARGET_SPILL_CLASS
7824 #define TARGET_SPILL_CLASS gcn_spill_class
7825 #undef TARGET_STRICT_ARGUMENT_NAMING
7826 #define TARGET_STRICT_ARGUMENT_NAMING gcn_strict_argument_naming
7827 #undef TARGET_TRAMPOLINE_INIT
7828 #define TARGET_TRAMPOLINE_INIT gcn_trampoline_init
7829 #undef TARGET_TRULY_NOOP_TRUNCATION
7830 #define TARGET_TRULY_NOOP_TRUNCATION gcn_truly_noop_truncation
7831 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
7832 #define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST gcn_vectorization_cost
7833 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION
7834 #define TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION \
7835 gcn_vectorize_builtin_vectorized_function
7836 #undef TARGET_VECTORIZE_GET_MASK_MODE
7837 #define TARGET_VECTORIZE_GET_MASK_MODE gcn_vectorize_get_mask_mode
7838 #undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
7839 #define TARGET_VECTORIZE_PREFERRED_SIMD_MODE gcn_vectorize_preferred_simd_mode
7840 #undef TARGET_VECTORIZE_PREFERRED_VECTOR_ALIGNMENT
7841 #define TARGET_VECTORIZE_PREFERRED_VECTOR_ALIGNMENT \
7842 gcn_preferred_vector_alignment
7843 #undef TARGET_VECTORIZE_RELATED_MODE
7844 #define TARGET_VECTORIZE_RELATED_MODE gcn_related_vector_mode
7845 #undef TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT
7846 #define TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT \
7847 gcn_vectorize_support_vector_misalignment
7848 #undef TARGET_VECTORIZE_VEC_PERM_CONST
7849 #define TARGET_VECTORIZE_VEC_PERM_CONST gcn_vectorize_vec_perm_const
7850 #undef TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE
7851 #define TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE \
7852 gcn_vector_alignment_reachable
7853 #undef TARGET_VECTOR_MODE_SUPPORTED_P
7854 #define TARGET_VECTOR_MODE_SUPPORTED_P gcn_vector_mode_supported_p
7856 struct gcc_target targetm
= TARGET_INITIALIZER
;