]> git.ipfire.org Git - thirdparty/gcc.git/commitdiff
gcn: Add more s_nop for MI300
authorTobias Burnus <tburnus@baylibre.com>
Mon, 28 Jul 2025 13:15:24 +0000 (15:15 +0200)
committerTobias Burnus <tburnus@baylibre.com>
Mon, 28 Jul 2025 13:15:24 +0000 (15:15 +0200)
Implement another case where the CDNA3 ISA documentation requires s_nop,
add a comment why another case does not need to be handled. And add one
case where an s_nop is required by MI300A hardware but seems to be not
mentioned in the CDNA3 ISA documentation.

gcc/ChangeLog:

* config/gcn/gcn.md  (define_attr "vcmp"): Add with values
vcmp/vcmpx/no.
(*movbi, cstoredi4.., cstore<mode>4): Set it.
* config/gcn/gcn-valu.md (vec_cmp<mode>...): Likewise.
* config/gcn/gcn.cc (gcn_cmpx_insn_p): Remove.
(gcn_md_reorg): Add two new conditions for MI300.

gcc/config/gcn/gcn-valu.md
gcc/config/gcn/gcn.cc
gcc/config/gcn/gcn.md

index 099432932938b9737c55c3213abf06c4b5336dce..a34d2e30c971d8651a56882cc08ddb445c58a09f 100644 (file)
    v_cmpx%E1\t%2, %3
    v_cmpx%E1\t%2, %3"
   [(set_attr "type" "vopc,vopc,vopc,vopc,vop3a,vop3a,vopc,vopc")
+   (set_attr "vcmp" "vcmp,vcmp,vcmpx,vcmpx,vcmp,vcmp,vcmpx,vcmpx")
    (set_attr "length" "4,8,4,8,8,8,4,8")
    (set_attr "rdna" "*,*,no,no,*,*,yes,yes")])
 
    v_cmpx%E1\t%2, %3
    v_cmpx%E1\t%2, %3"
   [(set_attr "type" "vopc,vopc,vopc,vopc,vop3a,vop3a,vopc,vopc")
+   (set_attr "vcmp" "vcmp,vcmp,vcmpx,vcmpx,vcmp,vcmp,vcmpx,vcmpx")
    (set_attr "length" "4,8,4,8,8,8,4,8")
    (set_attr "rdna" "*,*,no,no,*,*,yes,yes")])
 
    v_cmpx%E1\t%2, %3
    v_cmpx%E1\t%2, %3"
   [(set_attr "type" "vopc,vopc,vopc,vopc,vop3a,vopc,vopc")
+   (set_attr "vcmp" "vcmp,vcmp,vcmpx,vcmpx,vcmp,vcmpx,vcmpx")
    (set_attr "length" "4,8,4,8,8,4,8")
    (set_attr "rdna" "*,*,no,no,*,yes,yes")])
 
    v_cmpx%E1\t%2, %3
    v_cmpx%E1\t%2, %3"
   [(set_attr "type" "vopc,vopc,vopc,vopc,vop3a,vopc,vopc")
+   (set_attr "vcmp" "vcmp,vcmp,vcmpx,vcmpx,vcmp,vcmpx,vcmpx")
    (set_attr "length" "4,8,4,8,8,4,8")
    (set_attr "rdna" "*,*,no,no,*,yes,yes")])
 
index 75b0936dce8d16e4f3edc02b7009a654a9c3c3ed..557568c38c33eb58ec5df6c97c24fe055e7e7c65 100644 (file)
@@ -5792,42 +5792,6 @@ gcn_libc_has_function (enum function_class fn_class,
 /* }}}  */
 /* {{{ md_reorg pass.  */
 
-/* Identify V_CMPX from the "type" attribute;
-   note: this will also match 'v_cmp %E1 vcc'.  */
-
-static bool
-gcn_cmpx_insn_p (attr_type type)
-{
-  switch (type)
-    {
-    case TYPE_VOPC:
-      return true;
-    case TYPE_MUBUF:
-    case TYPE_MTBUF:
-    case TYPE_FLAT:
-    case TYPE_VOP3P_MAI:
-    case TYPE_UNKNOWN:
-    case TYPE_SOP1:
-    case TYPE_SOP2:
-    case TYPE_SOPK:
-    case TYPE_SOPC:
-    case TYPE_SOPP:
-    case TYPE_SMEM:
-    case TYPE_DS:
-    case TYPE_VOP2:
-    case TYPE_VOP1:
-    case TYPE_VOP3A:
-    case TYPE_VOP3B:
-    case TYPE_VOP_SDWA:
-    case TYPE_VOP_DPP:
-    case TYPE_MULT:
-    case TYPE_VMULT:
-      return false;
-    }
-  gcc_unreachable ();
-  return false;
-}
-
 /* Identify VMEM instructions from their "type" attribute.  */
 
 static bool
@@ -6356,19 +6320,59 @@ gcn_md_reorg (void)
                   reg_class_contents[(int)VCC_CONDITIONAL_REG])))
            nops_rqd = ivccwait - prev_insn->age;
 
+         /* NOTE: The following condition for adding wait state exists, but
+            GCC does not access the special registers using their SGPR#.
+            Thus, no action is required here.  The following wait-state
+            condition exists at least for VEGA/gfx900+ to CDNA3:
+               Mixed use of VCC: alias vs. SGPR# - v_readlane,
+               v_readfirstlane, v_cmp, v_add_*i/u, v_sub_*i/u, v_div_*scale
+               followed by VALU reads VCC as constant requires 1 wait state.
+               (As carry-in, it requires none.)
+               [VCC can be accessed by name or logical SGPR that holds it.]  */
+
+         /* Testing indicates that CDNA3 requires an s_nop between
+            e.g. 'v_cmp_eq_u64 vcc, v[4:5], v[8:9]' and 'v_mov_b32 v0, vcc_lo'.
+            Thus: add it between v_cmp writing VCC and VALU read of VCC.  */
+         if (TARGET_CDNA3_NOPS
+             && (prev_insn->age + nops_rqd) < 1
+             && iunit == UNIT_VECTOR
+             && (hard_reg_set_intersect_p
+                 (depregs, reg_class_contents[(int)VCC_CONDITIONAL_REG]))
+             && get_attr_vcmp (prev_insn->insn) == VCMP_VCMP)
+           nops_rqd = 1 - prev_insn->age;
+
+         /* CDNA3: VALU writes SGPR/VCC: v_readlane, v_readfirstlane, v_cmp,
+            v_add_*i/u, v_sub_*i/u, v_div_*scale - followed by:
+            - VALU reads SGPR as constant requires 1 waite state
+            - VALU reads SGPR as carry-in requires no waite state
+            - v_readlane/v_writelane reads SGPR as lane select requires 4 wait
+              states.  */
+         if (TARGET_CDNA3_NOPS
+             && (prev_insn->age + nops_rqd) < 4
+             && iunit == UNIT_VECTOR
+             && prev_insn->unit == UNIT_VECTOR
+             && hard_reg_set_intersect_p
+                  (depregs, reg_class_contents[(int) SGPR_SRC_REGS]))
+           {
+             if (get_attr_laneselect (insn) != LANESELECT_NO)
+               nops_rqd = 4 - prev_insn->age;
+             else if ((prev_insn->age + nops_rqd) < 1)
+               nops_rqd = 1 - prev_insn->age;
+           }
+
          /* CDNA3: v_cmpx followed by
             - V_readlane, v_readfirstlane, v_writelane requires 4 wait states
             - VALU reads EXEC as constant requires 2 wait states
             - other VALU requires no wait state  */
          if (TARGET_CDNA3_NOPS
              && (prev_insn->age + nops_rqd) < 4
-             && gcn_cmpx_insn_p (prev_insn->type)
+             && get_attr_vcmp (prev_insn->insn) == VCMP_VCMPX
              && get_attr_laneselect (insn) != LANESELECT_NO)
            nops_rqd = 4 - prev_insn->age;
          else if (TARGET_CDNA3_NOPS
                   && (prev_insn->age + nops_rqd) < 2
                   && iunit == UNIT_VECTOR
-                  && gcn_cmpx_insn_p (prev_insn->type)
+                  && get_attr_vcmp (prev_insn->insn) == VCMP_VCMPX
                   && TEST_HARD_REG_BIT (ireads, EXECZ_REG))
            nops_rqd = 2 - prev_insn->age;
 
index 9172db08b2ebd72871c1ce665d57726ffdbe30a0..67a45edba363674ac0a6f015b0371ffb37a4c932 100644 (file)
              "store,storex34,load,atomic,atomicwait,cmpswapx2,no"
              (const_string "no"))
 
+; Identify v_cmp and v_cmpx instructions for "Manually Inserted Wait State"
+; handling.
+
+(define_attr "vcmp" "vcmp,vcmpx,no" (const_string "no"))
+
 ; Identify instructions that require "Manually Inserted Wait State" if
 ; a previous instruction writes to VCC.  The number gives the number of NOPs.
 
   [(set_attr "type" "sop1,vop1,vop3a,sopk,vopc,mult,smem,smem,smem,flat,flat,
                     flat,flat,flat,flat")
    (set_attr "flatmemaccess" "*,*,*,*,*,*,*,*,*,load,load,store,load,load,store")
+   (set_attr "vcmp" "*,*,*,*,vcmp,*,*,*,*,*,*,*,*,*,*")
    (set_attr "exec" "*,*,none,*,*,*,*,*,*,*,*,*,*,*,*")
    (set_attr "length" "4,4,4,4,4,8,12,12,12,12,12,12,12,12,12")
    (set_attr "xnack" "*,*,*,*,*,*,off,on,*,off,on,*,off,on,*")
    s_cmp%D1\t%2, %3
    v_cmp%E1\tvcc, %2, %3"
   [(set_attr "type" "sopc,vopc")
+   (set_attr "vcmp" "vcmp")
    (set_attr "length" "8")])
 
 (define_insn "cstoredi4_vector"
   ""
   "v_cmp%E1\tvcc, %2, %3"
   [(set_attr "type" "vopc")
+   (set_attr "vcmp" "vcmp")
    (set_attr "length" "8")])
 
 (define_expand "cbranchdi4"
   ""
   "v_cmp%E1\tvcc, %2, %3"
   [(set_attr "type" "vopc")
+   (set_attr "vcmp" "vcmp")
    (set_attr "length" "8")])
 
 (define_expand "cbranch<mode>4"