From: Julian Brown Date: Mon, 10 Feb 2020 20:26:57 +0000 (-0800) Subject: amdgcn: Add waitcnt after LDS write instructions X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=4e7d831e31e3fc179177da824fd98fd6e3121419;p=thirdparty%2Fgcc.git amdgcn: Add waitcnt after LDS write instructions Data-share write (ds_write) instructions do not necessarily complete the write to LDS immediately. When a write completes, LGKM_CNT is decremented. For now, we wait until LGKM_CNT reaches zero after each ds_write instruction. This fixes a race condition in the case where LDS is read immediately after being written. This can happen with broadcast operations. 2020-07-15 Julian Brown gcc/ * config/gcn/gcn-valu.md (scatter_insn_1offset_ds): Add waitcnt. (*mov_insn, *movti_insn): Add waitcnt to ds_write alternatives. --- diff --git a/gcc/ChangeLog.omp b/gcc/ChangeLog.omp index 04b69877de05..21c5b355f208 100644 --- a/gcc/ChangeLog.omp +++ b/gcc/ChangeLog.omp @@ -1,3 +1,9 @@ +2020-07-15 Julian Brown + + * config/gcn/gcn-valu.md (scatter_insn_1offset_ds): + Add waitcnt. + (*mov_insn, *movti_insn): Add waitcnt to ds_write alternatives. + 2020-07-15 Julian Brown * config/gcn/gcn-protos.h (gcn_goacc_adjust_private_decl): Update diff --git a/gcc/config/gcn/gcn-valu.md b/gcc/config/gcn/gcn-valu.md index 26559ff765e2..e4d7f2a0f495 100644 --- a/gcc/config/gcn/gcn-valu.md +++ b/gcc/config/gcn/gcn-valu.md @@ -923,7 +923,7 @@ { addr_space_t as = INTVAL (operands[3]); static char buf[200]; - sprintf (buf, "ds_write%%b2\t%%0, %%2 offset:%%1%s", + sprintf (buf, "ds_write%%b2\t%%0, %%2 offset:%%1%s\;s_waitcnt\tlgkmcnt(0)", (AS_GDS_P (as) ? " gds" : "")); return buf; } diff --git a/gcc/config/gcn/gcn.md b/gcc/config/gcn/gcn.md index 8cfb3a85d256..e58669240c67 100644 --- a/gcc/config/gcn/gcn.md +++ b/gcc/config/gcn/gcn.md @@ -554,7 +554,7 @@ flat_load_dword\t%0, %A1%O1%g1\;s_waitcnt\t0 flat_store_dword\t%A0, %1%O0%g0 v_mov_b32\t%0, %1 - ds_write_b32\t%A0, %1%O0 + ds_write_b32\t%A0, %1%O0\;s_waitcnt\tlgkmcnt(0) ds_read_b32\t%0, %A1%O1\;s_waitcnt\tlgkmcnt(0) s_mov_b32\t%0, %1 global_load_dword\t%0, %A1%O1%g1\;s_waitcnt\tvmcnt(0) @@ -582,7 +582,7 @@ flat_load%o1\t%0, %A1%O1%g1\;s_waitcnt\t0 flat_store%s0\t%A0, %1%O0%g0 v_mov_b32\t%0, %1 - ds_write%b0\t%A0, %1%O0 + ds_write%b0\t%A0, %1%O0\;s_waitcnt\tlgkmcnt(0) ds_read%u1\t%0, %A1%O1\;s_waitcnt\tlgkmcnt(0) global_load%o1\t%0, %A1%O1%g1\;s_waitcnt\tvmcnt(0) global_store%s0\t%A0, %1%O0%g0" @@ -611,7 +611,7 @@ # flat_load_dwordx2\t%0, %A1%O1%g1\;s_waitcnt\t0 flat_store_dwordx2\t%A0, %1%O0%g0 - ds_write_b64\t%A0, %1%O0 + ds_write_b64\t%A0, %1%O0\;s_waitcnt\tlgkmcnt(0) ds_read_b64\t%0, %A1%O1\;s_waitcnt\tlgkmcnt(0) global_load_dwordx2\t%0, %A1%O1%g1\;s_waitcnt\tvmcnt(0) global_store_dwordx2\t%A0, %1%O0%g0" @@ -667,7 +667,7 @@ # global_store_dwordx4\t%A0, %1%O0%g0 global_load_dwordx4\t%0, %A1%O1%g1\;s_waitcnt\tvmcnt(0) - ds_write_b128\t%A0, %1%O0 + ds_write_b128\t%A0, %1%O0\;s_waitcnt\tlgkmcnt(0) ds_read_b128\t%0, %A1%O1\;s_waitcnt\tlgkmcnt(0)" "reload_completed && REG_P (operands[0])