for which in arch tune; do
eval "val=\$with_$which"
case ${val} in
- "" | fiji | gfx900 | gfx906 | gfx908 | gfx90a)
+ "" | fiji | gfx900 | gfx906 | gfx908 | gfx90a | gfx1030)
# OK
;;
*)
supported for gcn. */
#define GOMP_SELF_SPECS ""
-#define NO_XNACK "!march=*:;march=fiji:;"
+#define NO_XNACK "!march=*:;march=fiji:;march=gfx1030:;"
#define NO_SRAM_ECC "!march=*:;march=fiji:;march=gfx900:;march=gfx906:;"
/* In HSACOv4 no attribute setting means the binary supports "any" hardware
"%{!march=*|march=fiji:--amdhsa-code-object-version=3} " \
"%{" NO_XNACK XNACKOPT "}" \
"%{" NO_SRAM_ECC SRAMOPT "} " \
+ "%{march=gfx1030:-mattr=+wavefrontsize64} " \
"-filetype=obj"
#define LINK_SPEC "--pie --export-dynamic"
#define LIB_SPEC "-lc"
PROCESSOR_VEGA10, // gfx900
PROCESSOR_VEGA20, // gfx906
PROCESSOR_GFX908,
- PROCESSOR_GFX90a
+ PROCESSOR_GFX90a,
+ PROCESSOR_GFX1030
};
#define TARGET_FIJI (gcn_arch == PROCESSOR_FIJI)
#define TARGET_VEGA20 (gcn_arch == PROCESSOR_VEGA20)
#define TARGET_GFX908 (gcn_arch == PROCESSOR_GFX908)
#define TARGET_GFX90a (gcn_arch == PROCESSOR_GFX90a)
+#define TARGET_GFX1030 (gcn_arch == PROCESSOR_GFX1030)
/* Set in gcn_option_override. */
extern enum gcn_isa {
ISA_UNKNOWN,
ISA_GCN3,
ISA_GCN5,
+ ISA_RDNA2,
ISA_CDNA1,
ISA_CDNA2
} gcn_isa;
#define TARGET_CDNA1_PLUS (gcn_isa >= ISA_CDNA1)
#define TARGET_CDNA2 (gcn_isa == ISA_CDNA2)
#define TARGET_CDNA2_PLUS (gcn_isa >= ISA_CDNA2)
+#define TARGET_RDNA2 (gcn_isa == ISA_RDNA2)
+
#define TARGET_M0_LDS_LIMIT (TARGET_GCN3)
#define TARGET_PACKED_WORK_ITEMS (TARGET_CDNA2_PLUS)
[(match_operand:V_noHI 1 "register_operand" " v")
(match_operand:SI 2 "const_int_operand" " n")]
UNSPEC_MOV_DPP_SHR))]
- ""
+ "!TARGET_RDNA2"
{
return gcn_expand_dpp_shr_insn (<MODE>mode, "v_mov_b32",
UNSPEC_MOV_DPP_SHR, INTVAL (operands[2]));
(match_dup 1))
(match_dup 1))))]
""
- "v_addc%^_u32\t%0, %4, %2, %1, %3"
+ "{v_addc%^_u32|v_add_co_ci_u32}\t%0, %4, %2, %1, %3"
[(set_attr "type" "vop2,vop3b")
(set_attr "length" "4,8")])
(match_dup 1))))]
""
"@
- v_subb%^_u32\t%0, %4, %1, %2, %3
- v_subb%^_u32\t%0, %4, %1, %2, %3
- v_subbrev%^_u32\t%0, %4, %2, %1, %3
- v_subbrev%^_u32\t%0, %4, %2, %1, %3"
+ {v_subb%^_u32|v_sub_co_ci_u32}\t%0, %4, %1, %2, %3
+ {v_subb%^_u32|v_sub_co_ci_u32}\t%0, %4, %1, %2, %3
+ {v_subbrev%^_u32|v_subrev_co_ci_u32}\t%0, %4, %2, %1, %3
+ {v_subbrev%^_u32|v_subrev_co_ci_u32}\t%0, %4, %2, %1, %3"
[(set_attr "type" "vop2,vop3b,vop2,vop3b")
(set_attr "length" "4,8,4,8")])
;; {{{ Vector comparison/merge
(define_insn "vec_cmp<mode>di"
- [(set (match_operand:DI 0 "register_operand" "=cV,cV, e, e,Sg,Sg")
+ [(set (match_operand:DI 0 "register_operand" "=cV,cV, e, e,Sg,Sg, e, e")
(match_operator:DI 1 "gcn_fp_compare_operator"
- [(match_operand:V_noQI 2 "gcn_alu_operand" "vSv, B,vSv, B, v,vA")
- (match_operand:V_noQI 3 "gcn_vop3_operand" " v, v, v, v,vA, v")]))
- (clobber (match_scratch:DI 4 "= X, X, cV,cV, X, X"))]
+ [(match_operand:V_noQI 2 "gcn_alu_operand" "vSv, B,vSv, B, v,vA,vSv, B")
+ (match_operand:V_noQI 3 "gcn_vop3_operand" " v, v, v, v,vA, v, v, v")]))
+ (clobber (match_scratch:DI 4 "= X, X, cV,cV, X, X, X, X"))]
""
"@
v_cmp%E1\tvcc, %2, %3
v_cmpx%E1\tvcc, %2, %3
v_cmpx%E1\tvcc, %2, %3
v_cmp%E1\t%0, %2, %3
- v_cmp%E1\t%0, %2, %3"
- [(set_attr "type" "vopc,vopc,vopc,vopc,vop3a,vop3a")
- (set_attr "length" "4,8,4,8,8,8")])
+ v_cmp%E1\t%0, %2, %3
+ v_cmpx%E1\t%2, %3
+ v_cmpx%E1\t%2, %3"
+ [(set_attr "type" "vopc,vopc,vopc,vopc,vop3a,vop3a,vopc,vopc")
+ (set_attr "length" "4,8,4,8,8,8,4,8")
+ (set_attr "rdna" "*,*,no,no,*,*,yes,yes")])
(define_expand "vec_cmpu<mode>di"
[(match_operand:DI 0 "register_operand")
})
(define_insn "vec_cmp<mode>di_exec"
- [(set (match_operand:DI 0 "register_operand" "=cV,cV, e, e,Sg,Sg")
+ [(set (match_operand:DI 0 "register_operand" "=cV,cV, e, e,Sg,Sg, e, e")
(and:DI
(match_operator 1 "gcn_fp_compare_operator"
- [(match_operand:V_noQI 2 "gcn_alu_operand" "vSv, B,vSv, B, v,vA")
- (match_operand:V_noQI 3 "gcn_vop3_operand" " v, v, v, v,vA, v")])
- (match_operand:DI 4 "gcn_exec_reg_operand" " e, e, e, e, e, e")))
- (clobber (match_scratch:DI 5 "= X, X, cV,cV, X, X"))]
+ [(match_operand:V_noQI 2 "gcn_alu_operand" "vSv, B,vSv, B, v,vA,vSv, B")
+ (match_operand:V_noQI 3 "gcn_vop3_operand" " v, v, v, v,vA, v, v, v")])
+ (match_operand:DI 4 "gcn_exec_reg_operand" " e, e, e, e, e, e, e, e")))
+ (clobber (match_scratch:DI 5 "= X, X, cV,cV, X, X, X, X"))]
""
"@
v_cmp%E1\tvcc, %2, %3
v_cmpx%E1\tvcc, %2, %3
v_cmpx%E1\tvcc, %2, %3
v_cmp%E1\t%0, %2, %3
- v_cmp%E1\t%0, %2, %3"
- [(set_attr "type" "vopc,vopc,vopc,vopc,vop3a,vop3a")
- (set_attr "length" "4,8,4,8,8,8")])
+ v_cmp%E1\t%0, %2, %3
+ v_cmpx%E1\t%2, %3
+ v_cmpx%E1\t%2, %3"
+ [(set_attr "type" "vopc,vopc,vopc,vopc,vop3a,vop3a,vopc,vopc")
+ (set_attr "length" "4,8,4,8,8,8,4,8")
+ (set_attr "rdna" "*,*,no,no,*,*,yes,yes")])
(define_expand "vec_cmpu<mode>di_exec"
[(match_operand:DI 0 "register_operand")
})
(define_insn "vec_cmp<mode>di_dup"
- [(set (match_operand:DI 0 "register_operand" "=cV,cV, e,e,Sg")
+ [(set (match_operand:DI 0 "register_operand" "=cV,cV, e,e,Sg, e,e")
(match_operator:DI 1 "gcn_fp_compare_operator"
[(vec_duplicate:V_noQI
(match_operand:<SCALAR_MODE> 2 "gcn_alu_operand"
- " Sv, B,Sv,B, A"))
- (match_operand:V_noQI 3 "gcn_vop3_operand" " v, v, v,v, v")]))
- (clobber (match_scratch:DI 4 "= X,X,cV,cV, X"))]
+ " Sv, B,Sv,B, A,Sv,B"))
+ (match_operand:V_noQI 3 "gcn_vop3_operand" " v, v, v,v, v, v,v")]))
+ (clobber (match_scratch:DI 4 "= X,X,cV,cV, X, X,X"))]
""
"@
v_cmp%E1\tvcc, %2, %3
v_cmp%E1\tvcc, %2, %3
v_cmpx%E1\tvcc, %2, %3
v_cmpx%E1\tvcc, %2, %3
- v_cmp%E1\t%0, %2, %3"
- [(set_attr "type" "vopc,vopc,vopc,vopc,vop3a")
- (set_attr "length" "4,8,4,8,8")])
+ v_cmp%E1\t%0, %2, %3
+ v_cmpx%E1\t%2, %3
+ v_cmpx%E1\t%2, %3"
+ [(set_attr "type" "vopc,vopc,vopc,vopc,vop3a,vopc,vopc")
+ (set_attr "length" "4,8,4,8,8,4,8")
+ (set_attr "rdna" "*,*,no,no,*,yes,yes")])
(define_insn "vec_cmp<mode>di_dup_exec"
- [(set (match_operand:DI 0 "register_operand" "=cV,cV, e,e,Sg")
+ [(set (match_operand:DI 0 "register_operand" "=cV,cV, e,e,Sg, e,e")
(and:DI
(match_operator 1 "gcn_fp_compare_operator"
[(vec_duplicate:V_noQI
(match_operand:<SCALAR_MODE> 2 "gcn_alu_operand"
- " Sv, B,Sv,B, A"))
- (match_operand:V_noQI 3 "gcn_vop3_operand" " v, v, v,v, v")])
- (match_operand:DI 4 "gcn_exec_reg_operand" " e, e, e,e, e")))
- (clobber (match_scratch:DI 5 "= X,X,cV,cV, X"))]
+ " Sv, B,Sv,B, A,Sv,B"))
+ (match_operand:V_noQI 3 "gcn_vop3_operand" " v, v, v,v, v, v,v")])
+ (match_operand:DI 4 "gcn_exec_reg_operand" " e, e, e,e, e, e,e")))
+ (clobber (match_scratch:DI 5 "= X,X,cV,cV, X, X,X"))]
""
"@
v_cmp%E1\tvcc, %2, %3
v_cmp%E1\tvcc, %2, %3
v_cmpx%E1\tvcc, %2, %3
v_cmpx%E1\tvcc, %2, %3
- v_cmp%E1\t%0, %2, %3"
- [(set_attr "type" "vopc,vopc,vopc,vopc,vop3a")
- (set_attr "length" "4,8,4,8,8")])
+ v_cmp%E1\t%0, %2, %3
+ v_cmpx%E1\t%2, %3
+ v_cmpx%E1\t%2, %3"
+ [(set_attr "type" "vopc,vopc,vopc,vopc,vop3a,vopc,vopc")
+ (set_attr "length" "4,8,4,8,8,4,8")
+ (set_attr "rdna" "*,*,no,no,*,yes,yes")])
(define_expand "vcond_mask_<mode>di"
[(parallel
(unspec:<SCALAR_MODE>
[(match_operand:V_ALL 1 "register_operand")]
REDUC_UNSPEC))]
- ""
+ "!TARGET_RDNA2"
{
rtx tmp = gcn_expand_reduc_scalar (<MODE>mode, operands[1],
<reduc_unspec>);
REDUC_UNSPEC))]
; GCN3 requires a carry out, GCN5 not
"!(TARGET_GCN3 && SCALAR_INT_MODE_P (<SCALAR_MODE>mode)
- && <reduc_unspec> == UNSPEC_PLUS_DPP_SHR)"
+ && <reduc_unspec> == UNSPEC_PLUS_DPP_SHR)
+ && !TARGET_RDNA2"
{
return gcn_expand_dpp_shr_insn (<MODE>mode, "<reduc_insn>",
<reduc_unspec>, INTVAL (operands[3]));
(match_operand:SI 3 "const_int_operand" "n")]
UNSPEC_PLUS_CARRY_DPP_SHR))
(clobber (reg:DI VCC_REG))]
- ""
+ "!TARGET_RDNA2"
{
return gcn_expand_dpp_shr_insn (<VnSI>mode, "v_add%^_u32",
UNSPEC_PLUS_CARRY_DPP_SHR,
(match_operand:DI 4 "register_operand" "cV")]
UNSPEC_PLUS_CARRY_IN_DPP_SHR))
(clobber (reg:DI VCC_REG))]
- ""
+ "!TARGET_RDNA2"
{
return gcn_expand_dpp_shr_insn (<MODE>mode, "v_addc%^_u32",
UNSPEC_PLUS_CARRY_IN_DPP_SHR,
: gcn_arch == PROCESSOR_VEGA20 ? ISA_GCN5
: gcn_arch == PROCESSOR_GFX908 ? ISA_CDNA1
: gcn_arch == PROCESSOR_GFX90a ? ISA_CDNA2
+ : gcn_arch == PROCESSOR_GFX1030 ? ISA_RDNA2
: ISA_UNKNOWN);
gcc_assert (gcn_isa != ISA_UNKNOWN);
{
rtx base = XEXP (addr, 0);
rtx offset = XEXP (addr, 1);
+ int offsetbits = (TARGET_RDNA2 ? 11 : 12);
bool immediate_p = (CONST_INT_P (offset)
&& INTVAL (offset) >= -(1 << 12)
&& INTVAL (offset) < (1 << 12));
rtx base = XEXP (x, 0);
rtx offset = XEXP (x, 1);
+ int offsetbits = (TARGET_RDNA2 ? 11 : 12);
bool immediate_p = (GET_CODE (offset) == CONST_INT
- /* Signed 13-bit immediate. */
- && INTVAL (offset) >= -(1 << 12)
- && INTVAL (offset) < (1 << 12)
+ /* Signed 12/13-bit immediate. */
+ && INTVAL (offset) >= -(1 << offsetbits)
+ && INTVAL (offset) < (1 << offsetbits)
/* The low bits of the offset are ignored, even
when they're meant to realign the pointer. */
&& !(INTVAL (offset) & 0x3));
return gcn_arch == PROCESSOR_GFX908;
if (strcmp (name, "gfx90a") == 0)
return gcn_arch == PROCESSOR_GFX90a;
+ if (strcmp (name, "gfx1030") == 0)
+ return gcn_arch == PROCESSOR_GFX1030;
return 0;
default:
gcc_unreachable ();
set_mem_addr_space (retptr_mem, ADDR_SPACE_SCALAR_FLAT);
emit_move_insn (kernarg_reg, retptr_mem);
- rtx retval_mem = gen_rtx_MEM (SImode, kernarg_reg);
- rtx scalar_retval = gen_rtx_REG (SImode, FIRST_PARM_REG);
- set_mem_addr_space (retval_mem, ADDR_SPACE_SCALAR_FLAT);
+ rtx retval_addr = gen_rtx_REG (DImode, FIRST_VPARM_REG);
+ emit_move_insn (retval_addr, kernarg_reg);
+ rtx retval_mem = gen_rtx_MEM (SImode, retval_addr);
+ rtx scalar_retval = gen_rtx_REG (SImode, FIRST_VPARM_REG + 2);
+ set_mem_addr_space (retval_mem, ADDR_SPACE_FLAT);
emit_move_insn (scalar_retval, gen_rtx_REG (SImode, RETURN_VALUE_REG));
emit_move_insn (retval_mem, scalar_retval);
}
case PROCESSOR_GFX90a:
cpu = "gfx90a";
break;
+ case PROCESSOR_GFX1030:
+ cpu = "gfx1030";
+ xnack = "";
+ sram_ecc = "";
+ break;
default: gcc_unreachable ();
}
builtin_define ("__CDNA1__"); \
else if (TARGET_CDNA2) \
builtin_define ("__CDNA2__"); \
+ else if (TARGET_RDNA2) \
+ builtin_define ("__RDNA2__"); \
if (TARGET_FIJI) \
{ \
builtin_define ("__fiji__"); \
builtin_define ("__gfx90a__"); \
} while (0)
+#define ASSEMBLER_DIALECT (TARGET_RDNA2 ? 1 : 0)
+
/* Support for a compile-time default architecture and tuning.
The rules are:
--with-arch is ignored if -march is specified.
; Disable alternatives that only apply to specific ISA variants.
(define_attr "gcn_version" "gcn3,gcn5" (const_string "gcn3"))
+(define_attr "rdna" "any,no,yes" (const_string "any"))
(define_attr "enabled" ""
- (cond [(eq_attr "gcn_version" "gcn3") (const_int 1)
+ (cond [(and (eq_attr "rdna" "no")
+ (ne (symbol_ref "TARGET_RDNA2") (const_int 0)))
+ (const_int 0)
+ (and (eq_attr "rdna" "yes")
+ (eq (symbol_ref "TARGET_RDNA2") (const_int 0)))
+ (const_int 0)
+ (eq_attr "gcn_version" "gcn3") (const_int 1)
(and (eq_attr "gcn_version" "gcn5")
(ne (symbol_ref "TARGET_GCN5_PLUS") (const_int 0)))
(const_int 1)]
if (cfun && cfun->machine && cfun->machine->normal_function)
return "s_setpc_b64\ts[18:19]";
else
- return "s_waitcnt\tlgkmcnt(0)\;s_dcache_wb\;s_endpgm";
+ return "s_waitcnt\tlgkmcnt(0)\;s_endpgm";
}
[(set_attr "type" "sop1")
(set_attr "length" "12")])
""
"@
s_addc_u32\t%0, %1, %2
- v_addc%^_u32\t%0, vcc, %2, %1, vcc"
+ {v_addc%^_u32|v_add_co_ci_u32}\t%0, vcc, %2, %1, vcc"
[(set_attr "type" "sop2,vop2")
(set_attr "length" "8,4")])
""
"@
s_addc_u32\t%0, %1, 0
- v_addc%^_u32\t%0, vcc, 0, %1, vcc"
+ {v_addc%^_u32|v_add_co_ci_u32}\t%0, vcc, 0, %1, vcc"
[(set_attr "type" "sop2,vop2")
(set_attr "length" "4")])
gen_rtx_REG (DImode, CC_SAVE_REG) };
output_asm_insn ("v_add%^_u32\t%L0, %3, %L2, %L1", new_operands);
- output_asm_insn ("v_addc%^_u32\t%H0, %3, %H2, %H1, %3", new_operands);
+ output_asm_insn ("{v_addc%^_u32|v_add_co_ci_u32}\t%H0, %3, %H2, %H1, %3",
+ new_operands);
}
else
{
s_mul_i32\t%0, %1, %2
s_mulk_i32\t%0, %2
s_mul_i32\t%0, %1, %2
- v_mul_lo_i32\t%0, %1, %2"
+ v_mul_lo_u32\t%0, %1, %2"
[(set_attr "type" "sop2,sopk,sop2,vop3a")
(set_attr "length" "4,4,8,4")])
[(set (match_operand:BLK 0)
(unspec:BLK [(match_dup 0)] UNSPEC_MEMORY_BARRIER))]
""
- "buffer_wbinvl1_vol"
+ "{buffer_wbinvl1_vol|buffer_gl0_inv}"
[(set_attr "type" "mubuf")
(set_attr "length" "4")])
(use (match_operand:SIDI 2 "immediate_operand" " i, i, i"))]
""
{
+ /* FIXME: RDNA cache instructions may be too conservative? */
switch (INTVAL (operands[2]))
{
case MEMMODEL_RELAXED:
return "s_load%o0\t%0, %A1 glc\;s_waitcnt\tlgkmcnt(0)\;"
"s_dcache_wb_vol";
case 1:
- return "flat_load%o0\t%0, %A1%O1 glc\;s_waitcnt\t0\;"
- "buffer_wbinvl1_vol";
+ return (TARGET_RDNA2
+ ? "flat_load%o0\t%0, %A1%O1 glc\;s_waitcnt\t0\;"
+ "buffer_gl0_inv"
+ : "flat_load%o0\t%0, %A1%O1 glc\;s_waitcnt\t0\;"
+ "buffer_wbinvl1_vol");
case 2:
- return "global_load%o0\t%0, %A1%O1 glc\;s_waitcnt\tvmcnt(0)\;"
- "buffer_wbinvl1_vol";
+ return (TARGET_RDNA2
+ ? "global_load%o0\t%0, %A1%O1 glc\;s_waitcnt\tvmcnt(0)\;"
+ "buffer_gl0_inv"
+ : "global_load%o0\t%0, %A1%O1 glc\;s_waitcnt\tvmcnt(0)\;"
+ "buffer_wbinvl1_vol");
}
break;
case MEMMODEL_ACQ_REL:
return "s_dcache_wb_vol\;s_load%o0\t%0, %A1 glc\;"
"s_waitcnt\tlgkmcnt(0)\;s_dcache_inv_vol";
case 1:
- return "buffer_wbinvl1_vol\;flat_load%o0\t%0, %A1%O1 glc\;"
- "s_waitcnt\t0\;buffer_wbinvl1_vol";
+ return (TARGET_RDNA2
+ ? "buffer_gl0_inv\;flat_load%o0\t%0, %A1%O1 glc\;"
+ "s_waitcnt\t0\;buffer_gl0_inv"
+ : "buffer_wbinvl1_vol\;flat_load%o0\t%0, %A1%O1 glc\;"
+ "s_waitcnt\t0\;buffer_wbinvl1_vol");
case 2:
- return "buffer_wbinvl1_vol\;global_load%o0\t%0, %A1%O1 glc\;"
- "s_waitcnt\tvmcnt(0)\;buffer_wbinvl1_vol";
+ return (TARGET_RDNA2
+ ? "buffer_gl0_inv\;global_load%o0\t%0, %A1%O1 glc\;"
+ "s_waitcnt\tvmcnt(0)\;buffer_gl0_inv"
+ : "buffer_wbinvl1_vol\;global_load%o0\t%0, %A1%O1 glc\;"
+ "s_waitcnt\tvmcnt(0)\;buffer_wbinvl1_vol");
}
break;
}
}
[(set_attr "type" "smem,flat,flat")
(set_attr "length" "20")
- (set_attr "gcn_version" "gcn5,*,gcn5")])
+ (set_attr "gcn_version" "gcn5,*,gcn5")
+ (set_attr "rdna" "no,*,*")])
(define_insn "atomic_store<mode>"
[(set (match_operand:SIDI 0 "memory_operand" "=RS,RF,RM")
case 0:
return "s_dcache_wb_vol\;s_store%o1\t%1, %A0 glc";
case 1:
- return "buffer_wbinvl1_vol\;flat_store%o1\t%A0, %1%O0 glc";
+ return (TARGET_RDNA2
+ ? "buffer_gl0_inv\;flat_store%o1\t%A0, %1%O0 glc"
+ : "buffer_wbinvl1_vol\;flat_store%o1\t%A0, %1%O0 glc");
case 2:
- return "buffer_wbinvl1_vol\;global_store%o1\t%A0, %1%O0 glc";
+ return (TARGET_RDNA2
+ ? "buffer_gl0_inv\;global_store%o1\t%A0, %1%O0 glc"
+ : "buffer_wbinvl1_vol\;global_store%o1\t%A0, %1%O0 glc");
}
break;
case MEMMODEL_ACQ_REL:
return "s_dcache_wb_vol\;s_store%o1\t%1, %A0 glc\;"
"s_waitcnt\tlgkmcnt(0)\;s_dcache_inv_vol";
case 1:
- return "buffer_wbinvl1_vol\;flat_store%o1\t%A0, %1%O0 glc\;"
- "s_waitcnt\t0\;buffer_wbinvl1_vol";
+ return (TARGET_RDNA2
+ ? "buffer_gl0_inv\;flat_store%o1\t%A0, %1%O0 glc\;"
+ "s_waitcnt\t0\;buffer_gl0_inv"
+ : "buffer_wbinvl1_vol\;flat_store%o1\t%A0, %1%O0 glc\;"
+ "s_waitcnt\t0\;buffer_wbinvl1_vol");
case 2:
- return "buffer_wbinvl1_vol\;global_store%o1\t%A0, %1%O0 glc\;"
- "s_waitcnt\tvmcnt(0)\;buffer_wbinvl1_vol";
+ return (TARGET_RDNA2
+ ? "buffer_gl0_inv\;global_store%o1\t%A0, %1%O0 glc\;"
+ "s_waitcnt\tvmcnt(0)\;buffer_gl0_inv"
+ : "buffer_wbinvl1_vol\;global_store%o1\t%A0, %1%O0 glc\;"
+ "s_waitcnt\tvmcnt(0)\;buffer_wbinvl1_vol");
}
break;
}
}
[(set_attr "type" "smem,flat,flat")
(set_attr "length" "20")
- (set_attr "gcn_version" "gcn5,*,gcn5")])
+ (set_attr "gcn_version" "gcn5,*,gcn5")
+ (set_attr "rdna" "no,*,*")])
(define_insn "atomic_exchange<mode>"
[(set (match_operand:SIDI 0 "register_operand" "=Sm, v, v")
return "s_atomic_swap<X>\t%0, %1, %2 glc\;s_waitcnt\tlgkmcnt(0)\;"
"s_dcache_wb_vol\;s_dcache_inv_vol";
case 1:
- return "flat_atomic_swap<X>\t%0, %1, %2 glc\;s_waitcnt\t0\;"
- "buffer_wbinvl1_vol";
+ return (TARGET_RDNA2
+ ? "flat_atomic_swap<X>\t%0, %1, %2 glc\;s_waitcnt\t0\;"
+ "buffer_gl0_inv"
+ : "flat_atomic_swap<X>\t%0, %1, %2 glc\;s_waitcnt\t0\;"
+ "buffer_wbinvl1_vol");
case 2:
- return "global_atomic_swap<X>\t%0, %A1, %2%O1 glc\;"
- "s_waitcnt\tvmcnt(0)\;buffer_wbinvl1_vol";
+ return (TARGET_RDNA2
+ ? "global_atomic_swap<X>\t%0, %A1, %2%O1 glc\;"
+ "s_waitcnt\tvmcnt(0)\;buffer_gl0_inv"
+ : "global_atomic_swap<X>\t%0, %A1, %2%O1 glc\;"
+ "s_waitcnt\tvmcnt(0)\;buffer_wbinvl1_vol");
}
break;
case MEMMODEL_RELEASE:
return "s_dcache_wb_vol\;s_atomic_swap<X>\t%0, %1, %2 glc\;"
"s_waitcnt\tlgkmcnt(0)";
case 1:
- return "buffer_wbinvl1_vol\;flat_atomic_swap<X>\t%0, %1, %2 glc\;"
- "s_waitcnt\t0";
+ return (TARGET_RDNA2
+ ? "buffer_gl0_inv\;flat_atomic_swap<X>\t%0, %1, %2 glc\;"
+ "s_waitcnt\t0"
+ : "buffer_wbinvl1_vol\;flat_atomic_swap<X>\t%0, %1, %2 glc\;"
+ "s_waitcnt\t0");
case 2:
- return "buffer_wbinvl1_vol\;"
- "global_atomic_swap<X>\t%0, %A1, %2%O1 glc\;"
- "s_waitcnt\tvmcnt(0)";
+ return (TARGET_RDNA2
+ ? "buffer_gl0_inv\;"
+ "global_atomic_swap<X>\t%0, %A1, %2%O1 glc\;"
+ "s_waitcnt\tvmcnt(0)"
+ : "buffer_wbinvl1_vol\;"
+ "global_atomic_swap<X>\t%0, %A1, %2%O1 glc\;"
+ "s_waitcnt\tvmcnt(0)");
}
break;
case MEMMODEL_ACQ_REL:
return "s_dcache_wb_vol\;s_atomic_swap<X>\t%0, %1, %2 glc\;"
"s_waitcnt\tlgkmcnt(0)\;s_dcache_inv_vol";
case 1:
- return "buffer_wbinvl1_vol\;flat_atomic_swap<X>\t%0, %1, %2 glc\;"
- "s_waitcnt\t0\;buffer_wbinvl1_vol";
+ return (TARGET_RDNA2
+ ? "buffer_gl0_inv\;flat_atomic_swap<X>\t%0, %1, %2 glc\;"
+ "s_waitcnt\t0\;buffer_gl0_inv"
+ : "buffer_wbinvl1_vol\;flat_atomic_swap<X>\t%0, %1, %2 glc\;"
+ "s_waitcnt\t0\;buffer_wbinvl1_vol");
case 2:
- return "buffer_wbinvl1_vol\;"
- "global_atomic_swap<X>\t%0, %A1, %2%O1 glc\;"
- "s_waitcnt\tvmcnt(0)\;buffer_wbinvl1_vol";
+ return (TARGET_RDNA2
+ ? "buffer_gl0_inv\;"
+ "global_atomic_swap<X>\t%0, %A1, %2%O1 glc\;"
+ "s_waitcnt\tvmcnt(0)\;buffer_gl0_inv"
+ : "buffer_wbinvl1_vol\;"
+ "global_atomic_swap<X>\t%0, %A1, %2%O1 glc\;"
+ "s_waitcnt\tvmcnt(0)\;buffer_wbinvl1_vol");
}
break;
}
}
[(set_attr "type" "smem,flat,flat")
(set_attr "length" "20")
- (set_attr "gcn_version" "gcn5,*,gcn5")])
+ (set_attr "gcn_version" "gcn5,*,gcn5")
+ (set_attr "rdna" "no,*,*")])
;; }}}
;; {{{ OpenACC / OpenMP
EnumValue
Enum(gpu_type) String(gfx90a) Value(PROCESSOR_GFX90a)
+EnumValue
+Enum(gpu_type) String(gfx1030) Value(PROCESSOR_GFX1030)
+
march=
Target RejectNegative Joined ToLower Enum(gpu_type) Var(gcn_arch) Init(PROCESSOR_FIJI)
Specify the name of the target GPU.
#define EF_AMDGPU_MACH_AMDGCN_GFX908 0x30
#undef EF_AMDGPU_MACH_AMDGCN_GFX90a
#define EF_AMDGPU_MACH_AMDGCN_GFX90a 0x3f
+#undef EF_AMDGPU_MACH_AMDGCN_GFX1030
+#define EF_AMDGPU_MACH_AMDGCN_GFX1030 0x36
#define EF_AMDGPU_FEATURE_XNACK_V4 0x300 /* Mask. */
#define EF_AMDGPU_FEATURE_XNACK_UNSUPPORTED_V4 0x000
elf_arch = EF_AMDGPU_MACH_AMDGCN_GFX908;
else if (strcmp (argv[i], "-march=gfx90a") == 0)
elf_arch = EF_AMDGPU_MACH_AMDGCN_GFX90a;
+ else if (strcmp (argv[i], "-march=gfx1030") == 0)
+ elf_arch = EF_AMDGPU_MACH_AMDGCN_GFX1030;
#define STR "-mstack-size="
else if (startswith (argv[i], STR))
gcn_stack_size = atoi (argv[i] + strlen (STR));
omp-device-properties-gcn: $(srcdir)/config/gcn/gcn.cc
echo kind: gpu > $@
echo arch: amdgcn gcn >> $@
- echo isa: fiji gfx803 gfx900 gfx906 gfx908 gfx90a >> $@
+ echo isa: fiji gfx803 gfx900 gfx906 gfx908 gfx90a gfx1030 >> $@
#if defined (__GCN3__) || defined (__GCN5__) \
- || defined (__CDNA1__) || defined (__CDNA2__)
+ || defined (__CDNA1__) || defined (__CDNA2__) \
+ || defined (__RDNA2__)
#define CDNA3_PLUS 0
#else
#define CDNA3_PLUS 1
EF_AMDGPU_MACH_AMDGCN_GFX900 = 0x02c,
EF_AMDGPU_MACH_AMDGCN_GFX906 = 0x02f,
EF_AMDGPU_MACH_AMDGCN_GFX908 = 0x030,
- EF_AMDGPU_MACH_AMDGCN_GFX90a = 0x03f
+ EF_AMDGPU_MACH_AMDGCN_GFX90a = 0x03f,
+ EF_AMDGPU_MACH_AMDGCN_GFX1030 = 0x036
} EF_AMDGPU_MACH;
const static int EF_AMDGPU_MACH_MASK = 0x000000ff;
const static char *gcn_gfx906_s = "gfx906";
const static char *gcn_gfx908_s = "gfx908";
const static char *gcn_gfx90a_s = "gfx90a";
+const static char *gcn_gfx1030_s = "gfx1030";
const static int gcn_isa_name_len = 6;
/* Returns the name that the HSA runtime uses for the ISA or NULL if we do not
return gcn_gfx908_s;
case EF_AMDGPU_MACH_AMDGCN_GFX90a:
return gcn_gfx90a_s;
+ case EF_AMDGPU_MACH_AMDGCN_GFX1030:
+ return gcn_gfx1030_s;
}
return NULL;
}
if (!strncmp (isa, gcn_gfx90a_s, gcn_isa_name_len))
return EF_AMDGPU_MACH_AMDGCN_GFX90a;
+ if (!strncmp (isa, gcn_gfx1030_s, gcn_isa_name_len))
+ return EF_AMDGPU_MACH_AMDGCN_GFX1030;
+
return -1;
}
#elif defined(__nvptx__)
asm ("exit;");
#elif defined(__AMDGCN__)
- asm ("s_dcache_wb\n\t"
- "s_endpgm");
+ asm ("s_endpgm");
#else
#error gomp_free_pool_helper must terminate the thread
#endif