2019-11-07 Kwok Cheung Yeung <kcy@codesourcery.com>
gcc/
* ira.c (setup_alloc_regs): Setup no_unit_alloc_regs for
frame pointer in multiple registers.
(ira_setup_eliminable_regset): Setup eliminable_regset,
ira_no_alloc_regs and regs_ever_live for frame pointer in
multiple registers.
2019-11-10 Kwok Cheung Yeung <kcy@codesourcery.com>
gcc/
* lra-spills.c (assign_spill_hard_regs): Do not spill into
registers in eliminable_regset.
2019-11-14 Kwok Cheung Yeung <kcy@codesourcery.com>
gcc/
* lra-spills.c (assign_spill_hard_regs): Check that the spill
register is suitable for the mode.
2019-11-15 Kwok Cheung Yeung <kcy@codesourcery.com>
gcc/
* config/gcn/gcn.c (gcn_regno_reg_class): Return VCC_CONDITIONAL_REG
register class for VCC_LO and VCC_HI.
(gcn_spill_class): Use SGPR_REGS to spill registers in
VCC_CONDITIONAL_REG.
2019-11-15 Kwok Cheung Yeung <kcy@codesourcery.com>
gcc/
* config/gcn/gcn.c (gcn_expand_prologue): Remove initialization and
prologue use of v0.
(print_operand_address): Use v1 for zero vector offset.
2019-11-15 Kwok Cheung Yeung <kcy@codesourcery.com>
gcc/
* config/gcn/gcn.c (gcn_init_cumulative_args): Call reinit_regs.
2019-11-15 Kwok Cheung Yeung <kcy@codesourcery.com>
gcc/
* config/gcn/gcn.c (default_requested_args): New.
(gcn_parse_amdgpu_hsa_kernel_attribute): Initialize requested args
set with default_requested_args.
(gcn_conditional_register_usage): Limit register usage of non-kernel
functions. Reassign fixed registers if a non-standard set of args is
requested.
* config/gcn/gcn.h (FIXED_REGISTERS): Fix registers according to ABI.
2019-11-15 Kwok Cheung Yeung <kcy@codesourcery.com>
gcc/
* config/gcn/gcn.c (MAX_NORMAL_SGPR_COUNT, MAX_NORMAL_VGPR_COUNT): New.
(gcn_conditional_register_usage): Use constants in place of hard-coded
values.
(gcn_hsa_declare_function_name): Set lower bound for number of
SGPRs/VGPRs in non-leaf kernels to MAX_NORMAL_SGPR_COUNT and
MAX_NORMAL_VGPR_COUNT.
2019-11-15 Kwok Cheung Yeung <kcy@codesourcery.com>
gcc/
* config/gcn/gcn.h (FIXED_REGISTERS): Unfix frame pointer.
(CALL_USED_REGISTERS): Make frame pointer callee-saved.
(cherry picked from openacc-gcc-9-branch commit
20245bf2e0b8ac258f86e6495b3be3e09edd0181, and commit
e4005e490429a9595aee002528e0bf71281cb188)
+2019-11-18 Kwok Cheung Yeung <kcy@codesourcery.com>
+
+ Backport from mainline:
+
+ 2019-11-07 Kwok Cheung Yeung <kcy@codesourcery.com>
+
+ * ira.c (setup_alloc_regs): Setup no_unit_alloc_regs for
+ frame pointer in multiple registers.
+ (ira_setup_eliminable_regset): Setup eliminable_regset,
+ ira_no_alloc_regs and regs_ever_live for frame pointer in
+ multiple registers.
+
+ 2019-11-10 Kwok Cheung Yeung <kcy@codesourcery.com>
+
+ * lra-spills.c (assign_spill_hard_regs): Do not spill into
+ registers in eliminable_regset.
+
+ 2019-11-14 Kwok Cheung Yeung <kcy@codesourcery.com>
+
+ * lra-spills.c (assign_spill_hard_regs): Check that the spill
+ register is suitable for the mode.
+
+ 2019-11-15 Kwok Cheung Yeung <kcy@codesourcery.com>
+
+ * config/gcn/gcn.c (gcn_regno_reg_class): Return VCC_CONDITIONAL_REG
+ register class for VCC_LO and VCC_HI.
+ (gcn_spill_class): Use SGPR_REGS to spill registers in
+ VCC_CONDITIONAL_REG.
+
+ 2019-11-15 Kwok Cheung Yeung <kcy@codesourcery.com>
+
+ * config/gcn/gcn.c (gcn_expand_prologue): Remove initialization and
+ prologue use of v0.
+ (print_operand_address): Use v1 for zero vector offset.
+
+ 2019-11-15 Kwok Cheung Yeung <kcy@codesourcery.com>
+
+ * config/gcn/gcn.c (gcn_init_cumulative_args): Call reinit_regs.
+
+ 2019-11-15 Kwok Cheung Yeung <kcy@codesourcery.com>
+
+ * config/gcn/gcn.c (default_requested_args): New.
+ (gcn_parse_amdgpu_hsa_kernel_attribute): Initialize requested args
+ set with default_requested_args.
+ (gcn_conditional_register_usage): Limit register usage of non-kernel
+ functions. Reassign fixed registers if a non-standard set of args is
+ requested.
+ * config/gcn/gcn.h (FIXED_REGISTERS): Fix registers according to ABI.
+
+ 2019-11-15 Kwok Cheung Yeung <kcy@codesourcery.com>
+
+ * config/gcn/gcn.c (MAX_NORMAL_SGPR_COUNT, MAX_NORMAL_VGPR_COUNT): New.
+ (gcn_conditional_register_usage): Use constants in place of hard-coded
+ values.
+ (gcn_hsa_declare_function_name): Set lower bound for number of
+ SGPRs/VGPRs in non-leaf kernels to MAX_NORMAL_SGPR_COUNT and
+ MAX_NORMAL_VGPR_COUNT.
+
+ 2019-11-15 Kwok Cheung Yeung <kcy@codesourcery.com>
+
+ * config/gcn/gcn.h (FIXED_REGISTERS): Unfix frame pointer.
+ (CALL_USED_REGISTERS): Make frame pointer callee-saved.
+
2019-10-16 Julian Brown <julian@codesourcery.com>
* config/gcn/gcn-protos.h (gcn_goacc_adjust_gangprivate_decl): Rename
#define LDS_SIZE 65536
+/* The number of registers usable by normal non-kernel functions.
+ The SGPR count includes any special extra registers such as VCC. */
+
+#define MAX_NORMAL_SGPR_COUNT 64
+#define MAX_NORMAL_VGPR_COUNT 24
+
/* }}} */
/* {{{ Initialization and options. */
{"work_item_id_Z", NULL, V64SImode, FIRST_VGPR_REG + 2}
};
+static const long default_requested_args
+ = (1 << PRIVATE_SEGMENT_BUFFER_ARG)
+ | (1 << DISPATCH_PTR_ARG)
+ | (1 << QUEUE_PTR_ARG)
+ | (1 << KERNARG_SEGMENT_PTR_ARG)
+ | (1 << PRIVATE_SEGMENT_WAVE_OFFSET_ARG)
+ | (1 << WORKGROUP_ID_X_ARG)
+ | (1 << WORK_ITEM_ID_X_ARG)
+ | (1 << WORK_ITEM_ID_Y_ARG)
+ | (1 << WORK_ITEM_ID_Z_ARG);
+
/* Extract parameter settings from __attribute__((amdgpu_hsa_kernel ())).
This function also sets the default values for some arguments.
tree list)
{
bool err = false;
- args->requested = ((1 << PRIVATE_SEGMENT_BUFFER_ARG)
- | (1 << QUEUE_PTR_ARG)
- | (1 << KERNARG_SEGMENT_PTR_ARG)
- | (1 << PRIVATE_SEGMENT_WAVE_OFFSET_ARG));
+ args->requested = default_requested_args;
args->nargs = 0;
for (int a = 0; a < GCN_KERNEL_ARG_TYPES; a++)
args->requested |= (1 << a);
args->order[args->nargs++] = a;
}
- args->requested |= (1 << WORKGROUP_ID_X_ARG);
- args->requested |= (1 << WORK_ITEM_ID_Z_ARG);
/* Requesting WORK_ITEM_ID_Z_ARG implies requesting WORK_ITEM_ID_X_ARG and
WORK_ITEM_ID_Y_ARG. Similarly, requesting WORK_ITEM_ID_Y_ARG implies
if (args->requested & (1 << WORK_ITEM_ID_Y_ARG))
args->requested |= (1 << WORK_ITEM_ID_X_ARG);
- /* Always enable this so that kernargs is in a predictable place for
- gomp_print, etc. */
- args->requested |= (1 << DISPATCH_PTR_ARG);
-
int sgpr_regno = FIRST_SGPR_REG;
args->nsgprs = 0;
for (int a = 0; a < GCN_KERNEL_ARG_TYPES; a++)
{
case SCC_REG:
return SCC_CONDITIONAL_REG;
+ case VCC_LO_REG:
+ case VCC_HI_REG:
+ return VCC_CONDITIONAL_REG;
case VCCZ_REG:
return VCCZ_CONDITIONAL_REG;
case EXECZ_REG:
static reg_class_t
gcn_spill_class (reg_class_t c, machine_mode /*mode */ )
{
- if (reg_classes_intersect_p (ALL_CONDITIONAL_REGS, c))
+ if (reg_classes_intersect_p (ALL_CONDITIONAL_REGS, c)
+ || c == VCC_CONDITIONAL_REG)
return SGPR_REGS;
else
return NO_REGS;
static void
gcn_conditional_register_usage (void)
{
- int i;
+ if (!cfun || !cfun->machine)
+ return;
- /* FIXME: Do we need to reset fixed_regs? */
+ if (cfun->machine->normal_function)
+ {
+ /* Restrict the set of SGPRs and VGPRs used by non-kernel functions. */
+ for (int i = SGPR_REGNO (MAX_NORMAL_SGPR_COUNT - 2);
+ i <= LAST_SGPR_REG; i++)
+ fixed_regs[i] = 1, call_used_regs[i] = 1;
-/* Limit ourselves to 1/16 the register file for maximimum sized workgroups.
- There are enough SGPRs not to limit those.
- TODO: Adjust this more dynamically. */
- for (i = FIRST_VGPR_REG + 64; i <= LAST_VGPR_REG; i++)
- fixed_regs[i] = 1, call_used_regs[i] = 1;
+ for (int i = VGPR_REGNO (MAX_NORMAL_VGPR_COUNT);
+ i <= LAST_VGPR_REG; i++)
+ fixed_regs[i] = 1, call_used_regs[i] = 1;
- if (!cfun || !cfun->machine || cfun->machine->normal_function)
- {
- /* Normal functions can't know what kernel argument registers are
- live, so just fix the bottom 16 SGPRs, and bottom 3 VGPRs. */
- for (i = 0; i < 16; i++)
- fixed_regs[FIRST_SGPR_REG + i] = 1;
- for (i = 0; i < 3; i++)
- fixed_regs[FIRST_VGPR_REG + i] = 1;
return;
}
+ /* If the set of requested args is the default set, nothing more needs to
+ be done. */
+ if (cfun->machine->args.requested == default_requested_args)
+ return;
+
+ /* Requesting a set of args different from the default violates the ABI. */
+ if (!leaf_function_p ())
+ warning (0, "A non-default set of initial values has been requested, "
+ "which violates the ABI!");
+
+ for (int i = SGPR_REGNO (0); i < SGPR_REGNO (14); i++)
+ fixed_regs[i] = 0;
+
/* Fix the runtime argument register containing values that may be
needed later. DISPATCH_PTR_ARG and FLAT_SCRATCH_* should not be
needed after the prologue so there's no need to fix them. */
fixed_regs[cfun->machine->args.reg[PRIVATE_SEGMENT_WAVE_OFFSET_ARG]] = 1;
if (cfun->machine->args.reg[PRIVATE_SEGMENT_BUFFER_ARG] >= 0)
{
+ /* The upper 32-bits of the 64-bit descriptor are not used, so allow
+ the containing registers to be used for other purposes. */
fixed_regs[cfun->machine->args.reg[PRIVATE_SEGMENT_BUFFER_ARG]] = 1;
fixed_regs[cfun->machine->args.reg[PRIVATE_SEGMENT_BUFFER_ARG] + 1] = 1;
- fixed_regs[cfun->machine->args.reg[PRIVATE_SEGMENT_BUFFER_ARG] + 2] = 1;
- fixed_regs[cfun->machine->args.reg[PRIVATE_SEGMENT_BUFFER_ARG] + 3] = 1;
}
if (cfun->machine->args.reg[KERNARG_SEGMENT_PTR_ARG] >= 0)
{
cfun->machine->args = cum->args;
if (!caller && cfun->machine->normal_function)
gcn_detect_incoming_pointer_arg (fndecl);
+
+ reinit_regs ();
}
static bool
cfun->machine->args.
reg[PRIVATE_SEGMENT_WAVE_OFFSET_ARG]);
- if (TARGET_GCN5_PLUS)
- {
- /* v0 is reserved for constant zero so that "global"
- memory instructions can have a nul-offset without
- causing reloads. */
- emit_insn (gen_vec_duplicatev64si
- (gen_rtx_REG (V64SImode, VGPR_REGNO (0)), const0_rtx));
- }
-
if (cfun->machine->args.requested & (1 << FLAT_SCRATCH_INIT_ARG))
{
rtx fs_init_lo =
gen_int_mode (LDS_SIZE, SImode));
emit_insn (gen_prologue_use (gen_rtx_REG (SImode, M0_REG)));
- if (TARGET_GCN5_PLUS)
- emit_insn (gen_prologue_use (gen_rtx_REG (SImode, VGPR_REGNO (0))));
if (cfun && cfun->machine && !cfun->machine->normal_function && flag_openmp)
{
if (!leaf_function_p ())
{
/* We can't know how many registers function calls might use. */
- if (vgpr < 64)
- vgpr = 64;
- if (sgpr + extra_regs < 102)
- sgpr = 102 - extra_regs;
+ if (vgpr < MAX_NORMAL_VGPR_COUNT)
+ vgpr = MAX_NORMAL_VGPR_COUNT;
+ if (sgpr + extra_regs < MAX_NORMAL_SGPR_COUNT)
+ sgpr = MAX_NORMAL_SGPR_COUNT - extra_regs;
}
/* GFX8 allocates SGPRs in blocks of 8.
/* The assembler requires a 64-bit VGPR pair here, even though
the offset should be only 32-bit. */
if (vgpr_offset == NULL_RTX)
- /* In this case, the vector offset is zero, so we use v0,
- which is initialized by the kernel prologue to zero. */
- fprintf (file, "v[0:1]");
+ /* In this case, the vector offset is zero, so we use the first
+ lane of v1, which is initialized to zero. */
+ fprintf (file, "v[1:2]");
else if (REG_P (vgpr_offset)
&& VGPR_REGNO_P (REGNO (vgpr_offset)))
{
\f
#define FIXED_REGISTERS { \
/* Scalars. */ \
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, \
+ 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, \
/* fp sp lr. */ \
- 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, \
+ 1, 1, 0, 0, 0, 0, 1, 1, 0, 0, \
/* exec_save, cc_save */ \
1, 1, 1, 1, 0, 0, 0, 0, 0, 0, \
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, \
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, \
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, \
/* VGRPs */ \
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, \
+ 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, \
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, \
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, \
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, \
#define CALL_USED_REGISTERS { \
/* Scalars. */ \
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, \
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, \
+ 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, \
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, \
1, 1, 0, 0, 0, 0, 0, 0, 0, 0, \
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, \
#endif
COPY_HARD_REG_SET (no_unit_alloc_regs, fixed_nonglobal_reg_set);
if (! use_hard_frame_p)
- SET_HARD_REG_BIT (no_unit_alloc_regs, HARD_FRAME_POINTER_REGNUM);
+ add_to_hard_reg_set (&no_unit_alloc_regs, Pmode,
+ HARD_FRAME_POINTER_REGNUM);
setup_class_hard_regs ();
}
{
int i;
static const struct {const int from, to; } eliminables[] = ELIMINABLE_REGS;
+ int fp_reg_count = hard_regno_nregs (HARD_FRAME_POINTER_REGNUM, Pmode);
/* Setup is_leaf as frame_pointer_required may use it. This function
is called by sched_init before ira if scheduling is enabled. */
frame pointer in LRA. */
if (frame_pointer_needed)
- df_set_regs_ever_live (HARD_FRAME_POINTER_REGNUM, true);
+ for (i = 0; i < fp_reg_count; i++)
+ df_set_regs_ever_live (HARD_FRAME_POINTER_REGNUM + i, true);
COPY_HARD_REG_SET (ira_no_alloc_regs, no_unit_alloc_regs);
CLEAR_HARD_REG_SET (eliminable_regset);
}
if (!HARD_FRAME_POINTER_IS_FRAME_POINTER)
{
- if (!TEST_HARD_REG_BIT (crtl->asm_clobbers, HARD_FRAME_POINTER_REGNUM))
- {
- SET_HARD_REG_BIT (eliminable_regset, HARD_FRAME_POINTER_REGNUM);
- if (frame_pointer_needed)
- SET_HARD_REG_BIT (ira_no_alloc_regs, HARD_FRAME_POINTER_REGNUM);
- }
- else if (frame_pointer_needed)
- error ("%s cannot be used in asm here",
- reg_names[HARD_FRAME_POINTER_REGNUM]);
- else
- df_set_regs_ever_live (HARD_FRAME_POINTER_REGNUM, true);
+ for (i = 0; i < fp_reg_count; i++)
+ if (!TEST_HARD_REG_BIT (crtl->asm_clobbers,
+ HARD_FRAME_POINTER_REGNUM + i))
+ {
+ SET_HARD_REG_BIT (eliminable_regset,
+ HARD_FRAME_POINTER_REGNUM + i);
+ if (frame_pointer_needed)
+ SET_HARD_REG_BIT (ira_no_alloc_regs,
+ HARD_FRAME_POINTER_REGNUM + i);
+ }
+ else if (frame_pointer_needed)
+ error ("%s cannot be used in asm here",
+ reg_names[HARD_FRAME_POINTER_REGNUM + i]);
+ else
+ df_set_regs_ever_live (HARD_FRAME_POINTER_REGNUM + i, true);
}
}
for (k = 0; k < spill_class_size; k++)
{
hard_regno = ira_class_hard_regs[spill_class][k];
+ if (TEST_HARD_REG_BIT (eliminable_regset, hard_regno)
+ || !targetm.hard_regno_mode_ok (hard_regno, mode))
+ continue;
if (! overlaps_hard_reg_set_p (conflict_hard_regs, mode, hard_regno))
break;
}