]> git.ipfire.org Git - thirdparty/gcc.git/commitdiff
amdgcn: Add waitcnt after LDS write instructions
authorJulian Brown <julian@codesourcery.com>
Mon, 10 Feb 2020 20:26:57 +0000 (12:26 -0800)
committerJulian Brown <julian@codesourcery.com>
Wed, 15 Jul 2020 17:20:02 +0000 (10:20 -0700)
Data-share write (ds_write) instructions do not necessarily complete
the write to LDS immediately. When a write completes, LGKM_CNT is
decremented. For now, we wait until LGKM_CNT reaches zero after each
ds_write instruction.

This fixes a race condition in the case where LDS is read immediately
after being written. This can happen with broadcast operations.

2020-07-15  Julian Brown  <julian@codesourcery.com>

gcc/
* config/gcn/gcn-valu.md (scatter<mode>_insn_1offset_ds<exec_scatter>):
Add waitcnt.
(*mov<mode>_insn, *movti_insn): Add waitcnt to ds_write alternatives.

gcc/ChangeLog.omp
gcc/config/gcn/gcn-valu.md
gcc/config/gcn/gcn.md

index 04b69877de051a76acb391aa46d7a687edbcd7da..21c5b355f208a32f71204cb47385b577c27b8f3f 100644 (file)
@@ -1,3 +1,9 @@
+2020-07-15  Julian Brown  <julian@codesourcery.com>
+
+       * config/gcn/gcn-valu.md (scatter<mode>_insn_1offset_ds<exec_scatter>):
+       Add waitcnt.
+       (*mov<mode>_insn, *movti_insn): Add waitcnt to ds_write alternatives.
+
 2020-07-15  Julian Brown  <julian@codesourcery.com>
 
        * config/gcn/gcn-protos.h (gcn_goacc_adjust_private_decl): Update
index 26559ff765e24bdeefa58e485d30a2fec1fc561e..e4d7f2a0f495f937cddf4498173d25e895b93fe5 100644 (file)
   {
     addr_space_t as = INTVAL (operands[3]);
     static char buf[200];
-    sprintf (buf, "ds_write%%b2\t%%0, %%2 offset:%%1%s",
+    sprintf (buf, "ds_write%%b2\t%%0, %%2 offset:%%1%s\;s_waitcnt\tlgkmcnt(0)",
             (AS_GDS_P (as) ? " gds" : ""));
     return buf;
   }
index 8cfb3a85d2561f3b1eb527277ab675bf39009377..e58669240c670ec52756ebe17b412ecc35809f74 100644 (file)
   flat_load_dword\t%0, %A1%O1%g1\;s_waitcnt\t0
   flat_store_dword\t%A0, %1%O0%g0
   v_mov_b32\t%0, %1
-  ds_write_b32\t%A0, %1%O0
+  ds_write_b32\t%A0, %1%O0\;s_waitcnt\tlgkmcnt(0)
   ds_read_b32\t%0, %A1%O1\;s_waitcnt\tlgkmcnt(0)
   s_mov_b32\t%0, %1
   global_load_dword\t%0, %A1%O1%g1\;s_waitcnt\tvmcnt(0)
   flat_load%o1\t%0, %A1%O1%g1\;s_waitcnt\t0
   flat_store%s0\t%A0, %1%O0%g0
   v_mov_b32\t%0, %1
-  ds_write%b0\t%A0, %1%O0
+  ds_write%b0\t%A0, %1%O0\;s_waitcnt\tlgkmcnt(0)
   ds_read%u1\t%0, %A1%O1\;s_waitcnt\tlgkmcnt(0)
   global_load%o1\t%0, %A1%O1%g1\;s_waitcnt\tvmcnt(0)
   global_store%s0\t%A0, %1%O0%g0"
   #
   flat_load_dwordx2\t%0, %A1%O1%g1\;s_waitcnt\t0
   flat_store_dwordx2\t%A0, %1%O0%g0
-  ds_write_b64\t%A0, %1%O0
+  ds_write_b64\t%A0, %1%O0\;s_waitcnt\tlgkmcnt(0)
   ds_read_b64\t%0, %A1%O1\;s_waitcnt\tlgkmcnt(0)
   global_load_dwordx2\t%0, %A1%O1%g1\;s_waitcnt\tvmcnt(0)
   global_store_dwordx2\t%A0, %1%O0%g0"
   #
   global_store_dwordx4\t%A0, %1%O0%g0
   global_load_dwordx4\t%0, %A1%O1%g1\;s_waitcnt\tvmcnt(0)
-  ds_write_b128\t%A0, %1%O0
+  ds_write_b128\t%A0, %1%O0\;s_waitcnt\tlgkmcnt(0)
   ds_read_b128\t%0, %A1%O1\;s_waitcnt\tlgkmcnt(0)"
   "reload_completed
    && REG_P (operands[0])