From 773446ee035a09e20f73e7b50dbf20e35d7efd7d Mon Sep 17 00:00:00 2001 From: Tobias Burnus Date: Mon, 28 Jul 2025 15:15:24 +0200 Subject: [PATCH] gcn: Add more s_nop for MI300 Implement another case where the CDNA3 ISA documentation requires s_nop, add a comment why another case does not need to be handled. And add one case where an s_nop is required by MI300A hardware but seems to be not mentioned in the CDNA3 ISA documentation. gcc/ChangeLog: * config/gcn/gcn.md (define_attr "vcmp"): Add with values vcmp/vcmpx/no. (*movbi, cstoredi4.., cstore4): Set it. * config/gcn/gcn-valu.md (vec_cmp...): Likewise. * config/gcn/gcn.cc (gcn_cmpx_insn_p): Remove. (gcn_md_reorg): Add two new conditions for MI300. --- gcc/config/gcn/gcn-valu.md | 4 ++ gcc/config/gcn/gcn.cc | 80 ++++++++++++++++++++------------------ gcc/config/gcn/gcn.md | 9 +++++ 3 files changed, 55 insertions(+), 38 deletions(-) diff --git a/gcc/config/gcn/gcn-valu.md b/gcc/config/gcn/gcn-valu.md index 09943293293..a34d2e30c97 100644 --- a/gcc/config/gcn/gcn-valu.md +++ b/gcc/config/gcn/gcn-valu.md @@ -3938,6 +3938,7 @@ v_cmpx%E1\t%2, %3 v_cmpx%E1\t%2, %3" [(set_attr "type" "vopc,vopc,vopc,vopc,vop3a,vop3a,vopc,vopc") + (set_attr "vcmp" "vcmp,vcmp,vcmpx,vcmpx,vcmp,vcmp,vcmpx,vcmpx") (set_attr "length" "4,8,4,8,8,8,4,8") (set_attr "rdna" "*,*,no,no,*,*,yes,yes")]) @@ -3992,6 +3993,7 @@ v_cmpx%E1\t%2, %3 v_cmpx%E1\t%2, %3" [(set_attr "type" "vopc,vopc,vopc,vopc,vop3a,vop3a,vopc,vopc") + (set_attr "vcmp" "vcmp,vcmp,vcmpx,vcmpx,vcmp,vcmp,vcmpx,vcmpx") (set_attr "length" "4,8,4,8,8,8,4,8") (set_attr "rdna" "*,*,no,no,*,*,yes,yes")]) @@ -4050,6 +4052,7 @@ v_cmpx%E1\t%2, %3 v_cmpx%E1\t%2, %3" [(set_attr "type" "vopc,vopc,vopc,vopc,vop3a,vopc,vopc") + (set_attr "vcmp" "vcmp,vcmp,vcmpx,vcmpx,vcmp,vcmpx,vcmpx") (set_attr "length" "4,8,4,8,8,4,8") (set_attr "rdna" "*,*,no,no,*,yes,yes")]) @@ -4073,6 +4076,7 @@ v_cmpx%E1\t%2, %3 v_cmpx%E1\t%2, %3" [(set_attr "type" "vopc,vopc,vopc,vopc,vop3a,vopc,vopc") + (set_attr "vcmp" "vcmp,vcmp,vcmpx,vcmpx,vcmp,vcmpx,vcmpx") (set_attr "length" "4,8,4,8,8,4,8") (set_attr "rdna" "*,*,no,no,*,yes,yes")]) diff --git a/gcc/config/gcn/gcn.cc b/gcc/config/gcn/gcn.cc index 75b0936dce8..557568c38c3 100644 --- a/gcc/config/gcn/gcn.cc +++ b/gcc/config/gcn/gcn.cc @@ -5792,42 +5792,6 @@ gcn_libc_has_function (enum function_class fn_class, /* }}} */ /* {{{ md_reorg pass. */ -/* Identify V_CMPX from the "type" attribute; - note: this will also match 'v_cmp %E1 vcc'. */ - -static bool -gcn_cmpx_insn_p (attr_type type) -{ - switch (type) - { - case TYPE_VOPC: - return true; - case TYPE_MUBUF: - case TYPE_MTBUF: - case TYPE_FLAT: - case TYPE_VOP3P_MAI: - case TYPE_UNKNOWN: - case TYPE_SOP1: - case TYPE_SOP2: - case TYPE_SOPK: - case TYPE_SOPC: - case TYPE_SOPP: - case TYPE_SMEM: - case TYPE_DS: - case TYPE_VOP2: - case TYPE_VOP1: - case TYPE_VOP3A: - case TYPE_VOP3B: - case TYPE_VOP_SDWA: - case TYPE_VOP_DPP: - case TYPE_MULT: - case TYPE_VMULT: - return false; - } - gcc_unreachable (); - return false; -} - /* Identify VMEM instructions from their "type" attribute. */ static bool @@ -6356,19 +6320,59 @@ gcn_md_reorg (void) reg_class_contents[(int)VCC_CONDITIONAL_REG]))) nops_rqd = ivccwait - prev_insn->age; + /* NOTE: The following condition for adding wait state exists, but + GCC does not access the special registers using their SGPR#. + Thus, no action is required here. The following wait-state + condition exists at least for VEGA/gfx900+ to CDNA3: + Mixed use of VCC: alias vs. SGPR# - v_readlane, + v_readfirstlane, v_cmp, v_add_*i/u, v_sub_*i/u, v_div_*scale + followed by VALU reads VCC as constant requires 1 wait state. + (As carry-in, it requires none.) + [VCC can be accessed by name or logical SGPR that holds it.] */ + + /* Testing indicates that CDNA3 requires an s_nop between + e.g. 'v_cmp_eq_u64 vcc, v[4:5], v[8:9]' and 'v_mov_b32 v0, vcc_lo'. + Thus: add it between v_cmp writing VCC and VALU read of VCC. */ + if (TARGET_CDNA3_NOPS + && (prev_insn->age + nops_rqd) < 1 + && iunit == UNIT_VECTOR + && (hard_reg_set_intersect_p + (depregs, reg_class_contents[(int)VCC_CONDITIONAL_REG])) + && get_attr_vcmp (prev_insn->insn) == VCMP_VCMP) + nops_rqd = 1 - prev_insn->age; + + /* CDNA3: VALU writes SGPR/VCC: v_readlane, v_readfirstlane, v_cmp, + v_add_*i/u, v_sub_*i/u, v_div_*scale - followed by: + - VALU reads SGPR as constant requires 1 waite state + - VALU reads SGPR as carry-in requires no waite state + - v_readlane/v_writelane reads SGPR as lane select requires 4 wait + states. */ + if (TARGET_CDNA3_NOPS + && (prev_insn->age + nops_rqd) < 4 + && iunit == UNIT_VECTOR + && prev_insn->unit == UNIT_VECTOR + && hard_reg_set_intersect_p + (depregs, reg_class_contents[(int) SGPR_SRC_REGS])) + { + if (get_attr_laneselect (insn) != LANESELECT_NO) + nops_rqd = 4 - prev_insn->age; + else if ((prev_insn->age + nops_rqd) < 1) + nops_rqd = 1 - prev_insn->age; + } + /* CDNA3: v_cmpx followed by - V_readlane, v_readfirstlane, v_writelane requires 4 wait states - VALU reads EXEC as constant requires 2 wait states - other VALU requires no wait state */ if (TARGET_CDNA3_NOPS && (prev_insn->age + nops_rqd) < 4 - && gcn_cmpx_insn_p (prev_insn->type) + && get_attr_vcmp (prev_insn->insn) == VCMP_VCMPX && get_attr_laneselect (insn) != LANESELECT_NO) nops_rqd = 4 - prev_insn->age; else if (TARGET_CDNA3_NOPS && (prev_insn->age + nops_rqd) < 2 && iunit == UNIT_VECTOR - && gcn_cmpx_insn_p (prev_insn->type) + && get_attr_vcmp (prev_insn->insn) == VCMP_VCMPX && TEST_HARD_REG_BIT (ireads, EXECZ_REG)) nops_rqd = 2 - prev_insn->age; diff --git a/gcc/config/gcn/gcn.md b/gcc/config/gcn/gcn.md index 9172db08b2e..67a45edba36 100644 --- a/gcc/config/gcn/gcn.md +++ b/gcc/config/gcn/gcn.md @@ -324,6 +324,11 @@ "store,storex34,load,atomic,atomicwait,cmpswapx2,no" (const_string "no")) +; Identify v_cmp and v_cmpx instructions for "Manually Inserted Wait State" +; handling. + +(define_attr "vcmp" "vcmp,vcmpx,no" (const_string "no")) + ; Identify instructions that require "Manually Inserted Wait State" if ; a previous instruction writes to VCC. The number gives the number of NOPs. @@ -575,6 +580,7 @@ [(set_attr "type" "sop1,vop1,vop3a,sopk,vopc,mult,smem,smem,smem,flat,flat, flat,flat,flat,flat") (set_attr "flatmemaccess" "*,*,*,*,*,*,*,*,*,load,load,store,load,load,store") + (set_attr "vcmp" "*,*,*,*,vcmp,*,*,*,*,*,*,*,*,*,*") (set_attr "exec" "*,*,none,*,*,*,*,*,*,*,*,*,*,*,*") (set_attr "length" "4,4,4,4,4,8,12,12,12,12,12,12,12,12,12") (set_attr "xnack" "*,*,*,*,*,*,off,on,*,off,on,*,off,on,*") @@ -1098,6 +1104,7 @@ s_cmp%D1\t%2, %3 v_cmp%E1\tvcc, %2, %3" [(set_attr "type" "sopc,vopc") + (set_attr "vcmp" "vcmp") (set_attr "length" "8")]) (define_insn "cstoredi4_vector" @@ -1108,6 +1115,7 @@ "" "v_cmp%E1\tvcc, %2, %3" [(set_attr "type" "vopc") + (set_attr "vcmp" "vcmp") (set_attr "length" "8")]) (define_expand "cbranchdi4" @@ -1134,6 +1142,7 @@ "" "v_cmp%E1\tvcc, %2, %3" [(set_attr "type" "vopc") + (set_attr "vcmp" "vcmp") (set_attr "length" "8")]) (define_expand "cbranch4" -- 2.47.2