for which in arch tune; do
eval "val=\$with_$which"
case ${val} in
- "" | fiji | gfx900 | gfx906 | gfx908 | gfx90a | gfx1030)
+ "" | fiji | gfx900 | gfx906 | gfx908 | gfx90a | gfx1030 | gfx1100)
# OK
;;
*)
supported for gcn. */
#define GOMP_SELF_SPECS ""
-#define NO_XNACK "march=fiji:;march=gfx1030:;" \
+#define NO_XNACK "march=fiji:;march=gfx1030:;march=gfx1100:;" \
/* These match the defaults set in gcn.cc. */ \
"!mxnack*|mxnack=default:%{march=gfx900|march=gfx906|march=gfx908:-mattr=-xnack};"
#define NO_SRAM_ECC "!march=*:;march=fiji:;march=gfx900:;march=gfx906:;"
"%{!march=*|march=fiji:--amdhsa-code-object-version=3} " \
"%{" NO_XNACK XNACKOPT "}" \
"%{" NO_SRAM_ECC SRAMOPT "} " \
- "%{march=gfx1030:-mattr=+wavefrontsize64} " \
+ "%{march=gfx1030|march=gfx1100:-mattr=+wavefrontsize64} " \
"-filetype=obj"
#define LINK_SPEC "--pie --export-dynamic"
#define LIB_SPEC "-lc"
PROCESSOR_VEGA20, // gfx906
PROCESSOR_GFX908,
PROCESSOR_GFX90a,
- PROCESSOR_GFX1030
+ PROCESSOR_GFX1030,
+ PROCESSOR_GFX1100
};
#define TARGET_FIJI (gcn_arch == PROCESSOR_FIJI)
#define TARGET_GFX908 (gcn_arch == PROCESSOR_GFX908)
#define TARGET_GFX90a (gcn_arch == PROCESSOR_GFX90a)
#define TARGET_GFX1030 (gcn_arch == PROCESSOR_GFX1030)
+#define TARGET_GFX1100 (gcn_arch == PROCESSOR_GFX1100)
/* Set in gcn_option_override. */
extern enum gcn_isa {
ISA_GCN3,
ISA_GCN5,
ISA_RDNA2,
+ ISA_RDNA3,
ISA_CDNA1,
ISA_CDNA2
} gcn_isa;
#define TARGET_CDNA2 (gcn_isa == ISA_CDNA2)
#define TARGET_CDNA2_PLUS (gcn_isa >= ISA_CDNA2)
#define TARGET_RDNA2 (gcn_isa == ISA_RDNA2)
+#define TARGET_RDNA2_PLUS (gcn_isa >= ISA_RDNA2 && gcn_isa < ISA_CDNA1)
+#define TARGET_RDNA3 (gcn_isa == ISA_RDNA3)
#define TARGET_M0_LDS_LIMIT (TARGET_GCN3)
[(match_operand:V_noHI 1 "register_operand" " v")
(match_operand:SI 2 "const_int_operand" " n")]
UNSPEC_MOV_DPP_SHR))]
- "!TARGET_RDNA2"
+ "!TARGET_RDNA2_PLUS"
{
return gcn_expand_dpp_shr_insn (<MODE>mode, "v_mov_b32",
UNSPEC_MOV_DPP_SHR, INTVAL (operands[2]));
(unspec:<SCALAR_MODE>
[(match_operand:V_ALL 1 "register_operand")]
REDUC_UNSPEC))]
- "!TARGET_RDNA2"
+ "!TARGET_RDNA2_PLUS"
{
rtx tmp = gcn_expand_reduc_scalar (<MODE>mode, operands[1],
<reduc_unspec>);
; GCN3 requires a carry out, GCN5 not
"!(TARGET_GCN3 && SCALAR_INT_MODE_P (<SCALAR_MODE>mode)
&& <reduc_unspec> == UNSPEC_PLUS_DPP_SHR)
- && !TARGET_RDNA2"
+ && !TARGET_RDNA2_PLUS"
{
return gcn_expand_dpp_shr_insn (<MODE>mode, "<reduc_insn>",
<reduc_unspec>, INTVAL (operands[3]));
(match_operand:SI 3 "const_int_operand" "n")]
UNSPEC_PLUS_CARRY_DPP_SHR))
(clobber (reg:DI VCC_REG))]
- "!TARGET_RDNA2"
+ "!TARGET_RDNA2_PLUS"
{
return gcn_expand_dpp_shr_insn (<VnSI>mode, "v_add%^_u32",
UNSPEC_PLUS_CARRY_DPP_SHR,
(match_operand:DI 4 "register_operand" "cV")]
UNSPEC_PLUS_CARRY_IN_DPP_SHR))
(clobber (reg:DI VCC_REG))]
- "!TARGET_RDNA2"
+ "!TARGET_RDNA2_PLUS"
{
return gcn_expand_dpp_shr_insn (<MODE>mode, "v_addc%^_u32",
UNSPEC_PLUS_CARRY_IN_DPP_SHR,
: gcn_arch == PROCESSOR_GFX908 ? ISA_CDNA1
: gcn_arch == PROCESSOR_GFX90a ? ISA_CDNA2
: gcn_arch == PROCESSOR_GFX1030 ? ISA_RDNA2
+ : gcn_arch == PROCESSOR_GFX1100 ? ISA_RDNA3
: ISA_UNKNOWN);
gcc_assert (gcn_isa != ISA_UNKNOWN);
acc_lds_size = 32768;
}
- /* gfx803 "Fiji" and gfx1030 do not support XNACK. */
+ /* gfx803 "Fiji", gfx1030 and gfx1100 do not support XNACK. */
if (gcn_arch == PROCESSOR_FIJI
- || gcn_arch == PROCESSOR_GFX1030)
+ || gcn_arch == PROCESSOR_GFX1030
+ || gcn_arch == PROCESSOR_GFX1100)
{
if (flag_xnack == HSACO_ATTR_ON)
- error ("-mxnack=on is incompatible with -march=%s",
+ error ("%<-mxnack=on%> is incompatible with %<-march=%s%>",
(gcn_arch == PROCESSOR_FIJI ? "fiji"
- : gcn_arch == PROCESSOR_GFX1030 ? "gfx1030"
- : NULL));
+ : gcn_arch == PROCESSOR_GFX1030 ? "gfx1030"
+ : gcn_arch == PROCESSOR_GFX1100 ? "gfx1100"
+ : NULL));
/* Allow HSACO_ATTR_ANY silently because that's the default. */
flag_xnack = HSACO_ATTR_OFF;
}
{
rtx base = XEXP (addr, 0);
rtx offset = XEXP (addr, 1);
- int offsetbits = (TARGET_RDNA2 ? 11 : 12);
+ int offsetbits = (TARGET_RDNA2_PLUS ? 11 : 12);
bool immediate_p = (CONST_INT_P (offset)
&& INTVAL (offset) >= -(1 << 12)
&& INTVAL (offset) < (1 << 12));
rtx base = XEXP (x, 0);
rtx offset = XEXP (x, 1);
- int offsetbits = (TARGET_RDNA2 ? 11 : 12);
+ int offsetbits = (TARGET_RDNA2_PLUS ? 11 : 12);
bool immediate_p = (GET_CODE (offset) == CONST_INT
/* Signed 12/13-bit immediate. */
&& INTVAL (offset) >= -(1 << offsetbits)
return gcn_arch == PROCESSOR_GFX90a;
if (strcmp (name, "gfx1030") == 0)
return gcn_arch == PROCESSOR_GFX1030;
+ if (strcmp (name, "gfx1100") == 0)
+ return gcn_arch == PROCESSOR_GFX1100;
return 0;
default:
gcc_unreachable ();
xnack = "";
sram_ecc = "";
break;
+ case PROCESSOR_GFX1100:
+ cpu = "gfx1100";
+ xnack = "";
+ sram_ecc = "";
+ break;
default: gcc_unreachable ();
}
"\t .amdhsa_next_free_vgpr\t%i\n"
"\t .amdhsa_next_free_sgpr\t%i\n"
"\t .amdhsa_reserve_vcc\t1\n"
- "\t .amdhsa_reserve_flat_scratch\t0\n"
"\t .amdhsa_reserve_xnack_mask\t%i\n"
"\t .amdhsa_private_segment_fixed_size\t0\n"
"\t .amdhsa_group_segment_fixed_size\t%u\n"
sgpr,
xnack_enabled,
LDS_SIZE);
+ /* Not supported with 'architected flat scratch'. */
+ if (gcn_arch != PROCESSOR_GFX1100)
+ fprintf (file,
+ "\t .amdhsa_reserve_flat_scratch\t0\n");
if (gcn_arch == PROCESSOR_GFX90a)
fprintf (file,
"\t .amdhsa_accum_offset\t%i\n"
builtin_define ("__CDNA2__"); \
else if (TARGET_RDNA2) \
builtin_define ("__RDNA2__"); \
+ else if (TARGET_RDNA3) \
+ builtin_define ("__RDNA3__"); \
if (TARGET_FIJI) \
{ \
builtin_define ("__fiji__"); \
builtin_define ("__gfx906__"); \
else if (TARGET_GFX908) \
builtin_define ("__gfx908__"); \
- else if (TARGET_GFX90a) \
- builtin_define ("__gfx90a__"); \
+ else if (TARGET_GFX1030) \
+ builtin_define ("__gfx1030"); \
+ else if (TARGET_GFX1100) \
+ builtin_define ("__gfx1100__"); \
} while (0)
-#define ASSEMBLER_DIALECT (TARGET_RDNA2 ? 1 : 0)
+#define ASSEMBLER_DIALECT (TARGET_RDNA2_PLUS ? 1 : 0)
/* Support for a compile-time default architecture and tuning.
The rules are:
(define_attr "enabled" ""
(cond [(and (eq_attr "rdna" "no")
- (ne (symbol_ref "TARGET_RDNA2") (const_int 0)))
+ (ne (symbol_ref "TARGET_RDNA2_PLUS") (const_int 0)))
(const_int 0)
(and (eq_attr "rdna" "yes")
- (eq (symbol_ref "TARGET_RDNA2") (const_int 0)))
+ (eq (symbol_ref "TARGET_RDNA2_PLUS") (const_int 0)))
(const_int 0)
(and (eq_attr "gcn_version" "gcn5")
(eq (symbol_ref "TARGET_GCN5_PLUS") (const_int 0)))
return "s_load%o0\t%0, %A1 glc\;s_waitcnt\tlgkmcnt(0)\;"
"s_dcache_wb_vol";
case 1:
- return (TARGET_RDNA2
+ return (TARGET_RDNA2_PLUS
? "flat_load%o0\t%0, %A1%O1 glc\;s_waitcnt\t0\;"
"buffer_gl0_inv"
: "flat_load%o0\t%0, %A1%O1 glc\;s_waitcnt\t0\;"
"buffer_wbinvl1_vol");
case 2:
- return (TARGET_RDNA2
+ return (TARGET_RDNA2_PLUS
? "global_load%o0\t%0, %A1%O1 glc\;s_waitcnt\tvmcnt(0)\;"
"buffer_gl0_inv"
: "global_load%o0\t%0, %A1%O1 glc\;s_waitcnt\tvmcnt(0)\;"
return "s_dcache_wb_vol\;s_load%o0\t%0, %A1 glc\;"
"s_waitcnt\tlgkmcnt(0)\;s_dcache_inv_vol";
case 1:
- return (TARGET_RDNA2
+ return (TARGET_RDNA2_PLUS
? "buffer_gl0_inv\;flat_load%o0\t%0, %A1%O1 glc\;"
"s_waitcnt\t0\;buffer_gl0_inv"
: "buffer_wbinvl1_vol\;flat_load%o0\t%0, %A1%O1 glc\;"
"s_waitcnt\t0\;buffer_wbinvl1_vol");
case 2:
- return (TARGET_RDNA2
+ return (TARGET_RDNA2_PLUS
? "buffer_gl0_inv\;global_load%o0\t%0, %A1%O1 glc\;"
"s_waitcnt\tvmcnt(0)\;buffer_gl0_inv"
: "buffer_wbinvl1_vol\;global_load%o0\t%0, %A1%O1 glc\;"
case 0:
return "s_dcache_wb_vol\;s_store%o1\t%1, %A0 glc";
case 1:
- return (TARGET_RDNA2
+ return (TARGET_RDNA2_PLUS
? "buffer_gl0_inv\;flat_store%o1\t%A0, %1%O0 glc"
: "buffer_wbinvl1_vol\;flat_store%o1\t%A0, %1%O0 glc");
case 2:
- return (TARGET_RDNA2
+ return (TARGET_RDNA2_PLUS
? "buffer_gl0_inv\;global_store%o1\t%A0, %1%O0 glc"
: "buffer_wbinvl1_vol\;global_store%o1\t%A0, %1%O0 glc");
}
return "s_dcache_wb_vol\;s_store%o1\t%1, %A0 glc\;"
"s_waitcnt\tlgkmcnt(0)\;s_dcache_inv_vol";
case 1:
- return (TARGET_RDNA2
+ return (TARGET_RDNA2_PLUS
? "buffer_gl0_inv\;flat_store%o1\t%A0, %1%O0 glc\;"
"s_waitcnt\t0\;buffer_gl0_inv"
: "buffer_wbinvl1_vol\;flat_store%o1\t%A0, %1%O0 glc\;"
"s_waitcnt\t0\;buffer_wbinvl1_vol");
case 2:
- return (TARGET_RDNA2
+ return (TARGET_RDNA2_PLUS
? "buffer_gl0_inv\;global_store%o1\t%A0, %1%O0 glc\;"
"s_waitcnt\tvmcnt(0)\;buffer_gl0_inv"
: "buffer_wbinvl1_vol\;global_store%o1\t%A0, %1%O0 glc\;"
return "s_atomic_swap<X>\t%0, %1, %2 glc\;s_waitcnt\tlgkmcnt(0)\;"
"s_dcache_wb_vol\;s_dcache_inv_vol";
case 1:
- return (TARGET_RDNA2
+ return (TARGET_RDNA2_PLUS
? "flat_atomic_swap<X>\t%0, %1, %2 glc\;s_waitcnt\t0\;"
"buffer_gl0_inv"
: "flat_atomic_swap<X>\t%0, %1, %2 glc\;s_waitcnt\t0\;"
"buffer_wbinvl1_vol");
case 2:
- return (TARGET_RDNA2
+ return (TARGET_RDNA2_PLUS
? "global_atomic_swap<X>\t%0, %A1, %2%O1 glc\;"
"s_waitcnt\tvmcnt(0)\;buffer_gl0_inv"
: "global_atomic_swap<X>\t%0, %A1, %2%O1 glc\;"
return "s_dcache_wb_vol\;s_atomic_swap<X>\t%0, %1, %2 glc\;"
"s_waitcnt\tlgkmcnt(0)";
case 1:
- return (TARGET_RDNA2
+ return (TARGET_RDNA2_PLUS
? "buffer_gl0_inv\;flat_atomic_swap<X>\t%0, %1, %2 glc\;"
"s_waitcnt\t0"
: "buffer_wbinvl1_vol\;flat_atomic_swap<X>\t%0, %1, %2 glc\;"
"s_waitcnt\t0");
case 2:
- return (TARGET_RDNA2
+ return (TARGET_RDNA2_PLUS
? "buffer_gl0_inv\;"
"global_atomic_swap<X>\t%0, %A1, %2%O1 glc\;"
"s_waitcnt\tvmcnt(0)"
return "s_dcache_wb_vol\;s_atomic_swap<X>\t%0, %1, %2 glc\;"
"s_waitcnt\tlgkmcnt(0)\;s_dcache_inv_vol";
case 1:
- return (TARGET_RDNA2
+ return (TARGET_RDNA2_PLUS
? "buffer_gl0_inv\;flat_atomic_swap<X>\t%0, %1, %2 glc\;"
"s_waitcnt\t0\;buffer_gl0_inv"
: "buffer_wbinvl1_vol\;flat_atomic_swap<X>\t%0, %1, %2 glc\;"
"s_waitcnt\t0\;buffer_wbinvl1_vol");
case 2:
- return (TARGET_RDNA2
+ return (TARGET_RDNA2_PLUS
? "buffer_gl0_inv\;"
"global_atomic_swap<X>\t%0, %A1, %2%O1 glc\;"
"s_waitcnt\tvmcnt(0)\;buffer_gl0_inv"
EnumValue
Enum(gpu_type) String(gfx1030) Value(PROCESSOR_GFX1030)
+EnumValue
+Enum(gpu_type) String(gfx1100) Value(PROCESSOR_GFX1100)
+
march=
Target RejectNegative Negative(march=) Joined ToLower Enum(gpu_type) Var(gcn_arch) Init(PROCESSOR_FIJI)
Specify the name of the target GPU.
#define EF_AMDGPU_MACH_AMDGCN_GFX90a 0x3f
#undef EF_AMDGPU_MACH_AMDGCN_GFX1030
#define EF_AMDGPU_MACH_AMDGCN_GFX1030 0x36
+#undef EF_AMDGPU_MACH_AMDGCN_GFX1100
+#define EF_AMDGPU_MACH_AMDGCN_GFX1100 0x41
#define EF_AMDGPU_FEATURE_XNACK_V4 0x300 /* Mask. */
#define EF_AMDGPU_FEATURE_XNACK_UNSUPPORTED_V4 0x000
elf_arch = EF_AMDGPU_MACH_AMDGCN_GFX90a;
else if (strcmp (argv[i], "-march=gfx1030") == 0)
elf_arch = EF_AMDGPU_MACH_AMDGCN_GFX1030;
+ else if (strcmp (argv[i], "-march=gfx1100") == 0)
+ elf_arch = EF_AMDGPU_MACH_AMDGCN_GFX1100;
#define STR "-mstack-size="
else if (startswith (argv[i], STR))
gcn_stack_size = atoi (argv[i] + strlen (STR));
omp-device-properties-gcn: $(srcdir)/config/gcn/gcn.cc
echo kind: gpu > $@
echo arch: amdgcn gcn >> $@
- echo isa: fiji gfx803 gfx900 gfx906 gfx908 gfx90a gfx1030 >> $@
+ echo isa: fiji gfx803 gfx900 gfx906 gfx908 gfx90a gfx1030 gfx1100 >> $@
const static char *gcn_gfx908_s = "gfx908";
const static char *gcn_gfx90a_s = "gfx90a";
const static char *gcn_gfx1030_s = "gfx1030";
-const static int gcn_isa_name_len = 6;
+const static char *gcn_gfx1100_s = "gfx1100";
+const static int gcn_isa_name_len = 7;
/* Returns the name that the HSA runtime uses for the ISA or NULL if we do not
support the ISA. */
return gcn_gfx90a_s;
case EF_AMDGPU_MACH_AMDGCN_GFX1030:
return gcn_gfx1030_s;
+ case EF_AMDGPU_MACH_AMDGCN_GFX1100:
+ return gcn_gfx1100_s;
}
return NULL;
}
if (!strncmp (isa, gcn_gfx1030_s, gcn_isa_name_len))
return EF_AMDGPU_MACH_AMDGCN_GFX1030;
+ if (!strncmp (isa, gcn_gfx1100_s, gcn_isa_name_len))
+ return EF_AMDGPU_MACH_AMDGCN_GFX1100;
+
return -1;
}
case EF_AMDGPU_MACH_AMDGCN_GFX906:
case EF_AMDGPU_MACH_AMDGCN_GFX908:
case EF_AMDGPU_MACH_AMDGCN_GFX1030:
+ case EF_AMDGPU_MACH_AMDGCN_GFX1100:
return 256;
case EF_AMDGPU_MACH_AMDGCN_GFX90a:
return 512;