From: Tom de Vries Date: Thu, 3 Feb 2022 13:00:02 +0000 (+0100) Subject: [nvptx] Fix .local atomic regressions X-Git-Tag: basepoints/gcc-13~1221 X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=04b54cc486cc6fcc40380445e500eaf46d7901dc;p=thirdparty%2Fgcc.git [nvptx] Fix .local atomic regressions In PR target/104364, two problems were reported: - in muniform-simt mode, an atom.cas insn is no longer executed in the "master lane" only. - in msoft-stack mode, an __atomic_compare_exchange_n on stack memory is translated assuming it accesses local memory, while that's not the case. Fix these by: - ensuring that all insns with atomic attribute are also predicable, such that the validate_change in nvptx_reorg_uniform_simt will succeed, and asserting that it does, and - guarding the local atomics implementation with a new function nvptx_mem_local_p that correctly handles msoft-stack. Tested on x86_64 with nvptx accelerator. gcc/ChangeLog: 2022-02-04 Tom de Vries PR target/104364 * config/nvptx/nvptx-protos.h (nvptx_mem_local_p): Declare. * config/nvptx/nvptx.cc (nvptx_reorg_uniform_simt): Assert that change is validated. (nvptx_mem_local_p): New function. * config/nvptx/nvptx.md: Use nvptx_mem_local_p. (define_c_enum "unspecv"): Add UNSPECV_CAS_LOCAL. (define_insn "atomic_compare_and_swap_1_local"): New non-atomic, non-predicable define_insn, factored out of ... (define_insn "atomic_compare_and_swap_1"): ... here. Make predicable again. (define_expand "atomic_compare_and_swap"): Use atomic_compare_and_swap_1_local. gcc/testsuite/ChangeLog: 2022-02-04 Tom de Vries PR target/104364 * gcc.target/nvptx/softstack-2.c: New test. * gcc.target/nvptx/uniform-simt-1.c: New test. --- diff --git a/gcc/config/nvptx/nvptx-protos.h b/gcc/config/nvptx/nvptx-protos.h index 3d6ad148cb46..a846e3419177 100644 --- a/gcc/config/nvptx/nvptx-protos.h +++ b/gcc/config/nvptx/nvptx-protos.h @@ -59,5 +59,6 @@ extern const char *nvptx_output_simt_enter (rtx, rtx, rtx); extern const char *nvptx_output_simt_exit (rtx); extern const char *nvptx_output_red_partition (rtx, rtx); extern const char *nvptx_output_atomic_insn (const char *, rtx *, int, int); +extern bool nvptx_mem_local_p (rtx); #endif #endif diff --git a/gcc/config/nvptx/nvptx.cc b/gcc/config/nvptx/nvptx.cc index b3bb97c3c14d..2a694926b7a7 100644 --- a/gcc/config/nvptx/nvptx.cc +++ b/gcc/config/nvptx/nvptx.cc @@ -3150,7 +3150,8 @@ nvptx_reorg_uniform_simt () rtx pred = nvptx_get_unisimt_predicate (); pred = gen_rtx_NE (BImode, pred, const0_rtx); pat = gen_rtx_COND_EXEC (VOIDmode, pred, pat); - validate_change (insn, &PATTERN (insn), pat, false); + bool changed_p = validate_change (insn, &PATTERN (insn), pat, false); + gcc_assert (changed_p); } } @@ -6894,6 +6895,28 @@ nvptx_libc_has_function (enum function_class fn_class, tree type) return default_libc_has_function (fn_class, type); } +bool +nvptx_mem_local_p (rtx mem) +{ + gcc_assert (GET_CODE (mem) == MEM); + + struct address_info info; + decompose_mem_address (&info, mem); + + if (info.base != NULL && REG_P (*info.base) + && REGNO_PTR_FRAME_P (REGNO (*info.base))) + { + if (TARGET_SOFT_STACK) + { + /* Frame-related doesn't mean local. */ + } + else + return true; + } + + return false; +} + #undef TARGET_OPTION_OVERRIDE #define TARGET_OPTION_OVERRIDE nvptx_option_override diff --git a/gcc/config/nvptx/nvptx.md b/gcc/config/nvptx/nvptx.md index 92768dd9e956..d64dbfd8b33e 100644 --- a/gcc/config/nvptx/nvptx.md +++ b/gcc/config/nvptx/nvptx.md @@ -54,6 +54,7 @@ (define_c_enum "unspecv" [ UNSPECV_LOCK UNSPECV_CAS + UNSPECV_CAS_LOCAL UNSPECV_XCHG UNSPECV_BARSYNC UNSPECV_WARPSYNC @@ -1771,8 +1772,14 @@ (match_operand:SI 7 "const_int_operand")] ;; failure model "" { - emit_insn (gen_atomic_compare_and_swap_1 - (operands[1], operands[2], operands[3], operands[4], operands[6])); + if (nvptx_mem_local_p (operands[2])) + emit_insn (gen_atomic_compare_and_swap_1_local + (operands[1], operands[2], operands[3], operands[4], + operands[6])); + else + emit_insn (gen_atomic_compare_and_swap_1 + (operands[1], operands[2], operands[3], operands[4], + operands[6])); rtx cond = gen_reg_rtx (BImode); emit_move_insn (cond, gen_rtx_EQ (BImode, operands[1], operands[3])); @@ -1780,23 +1787,18 @@ DONE; }) -(define_insn "atomic_compare_and_swap_1" +(define_insn "atomic_compare_and_swap_1_local" [(set (match_operand:SDIM 0 "nvptx_register_operand" "=R") (unspec_volatile:SDIM [(match_operand:SDIM 1 "memory_operand" "+m") (match_operand:SDIM 2 "nvptx_nonmemory_operand" "Ri") (match_operand:SDIM 3 "nvptx_nonmemory_operand" "Ri") (match_operand:SI 4 "const_int_operand")] - UNSPECV_CAS)) + UNSPECV_CAS_LOCAL)) (set (match_dup 1) - (unspec_volatile:SDIM [(const_int 0)] UNSPECV_CAS))] + (unspec_volatile:SDIM [(const_int 0)] UNSPECV_CAS_LOCAL))] "" { - struct address_info info; - decompose_mem_address (&info, operands[1]); - if (info.base != NULL && REG_P (*info.base) - && REGNO_PTR_FRAME_P (REGNO (*info.base))) - { output_asm_insn ("{", NULL); output_asm_insn ("\\t" ".reg.pred" "\\t" "%%eq_p;", NULL); output_asm_insn ("\\t" ".reg%t0" "\\t" "%%val;", operands); @@ -1807,13 +1809,26 @@ output_asm_insn ("\\t" "mov%t0" "\\t" "%0,%%val;", operands); output_asm_insn ("}", NULL); return ""; - } + } + [(set_attr "predicable" "false")]) + +(define_insn "atomic_compare_and_swap_1" + [(set (match_operand:SDIM 0 "nvptx_register_operand" "=R") + (unspec_volatile:SDIM + [(match_operand:SDIM 1 "memory_operand" "+m") + (match_operand:SDIM 2 "nvptx_nonmemory_operand" "Ri") + (match_operand:SDIM 3 "nvptx_nonmemory_operand" "Ri") + (match_operand:SI 4 "const_int_operand")] + UNSPECV_CAS)) + (set (match_dup 1) + (unspec_volatile:SDIM [(const_int 0)] UNSPECV_CAS))] + "" + { const char *t - = "\\tatom%A1.cas.b%T0\\t%0, %1, %2, %3;"; + = "%.\\tatom%A1.cas.b%T0\\t%0, %1, %2, %3;"; return nvptx_output_atomic_insn (t, operands, 1, 4); } - [(set_attr "atomic" "true") - (set_attr "predicable" "false")]) + [(set_attr "atomic" "true")]) (define_insn "atomic_exchange" [(set (match_operand:SDIM 0 "nvptx_register_operand" "=R") ;; output @@ -1825,10 +1840,7 @@ (match_operand:SDIM 2 "nvptx_nonmemory_operand" "Ri"))] ;; input "" { - struct address_info info; - decompose_mem_address (&info, operands[1]); - if (info.base != NULL && REG_P (*info.base) - && REGNO_PTR_FRAME_P (REGNO (*info.base))) + if (nvptx_mem_local_p (operands[1])) { output_asm_insn ("{", NULL); output_asm_insn ("\\t" ".reg%t0" "\\t" "%%val;", operands); @@ -1855,10 +1867,7 @@ (match_dup 1))] "" { - struct address_info info; - decompose_mem_address (&info, operands[1]); - if (info.base != NULL && REG_P (*info.base) - && REGNO_PTR_FRAME_P (REGNO (*info.base))) + if (nvptx_mem_local_p (operands[1])) { output_asm_insn ("{", NULL); output_asm_insn ("\\t" ".reg%t0" "\\t" "%%val;", operands); @@ -1888,10 +1897,7 @@ (match_dup 1))] "" { - struct address_info info; - decompose_mem_address (&info, operands[1]); - if (info.base != NULL && REG_P (*info.base) - && REGNO_PTR_FRAME_P (REGNO (*info.base))) + if (nvptx_mem_local_p (operands[1])) { output_asm_insn ("{", NULL); output_asm_insn ("\\t" ".reg%t0" "\\t" "%%val;", operands); @@ -1924,10 +1930,7 @@ (match_dup 1))] "mode == SImode || TARGET_SM35" { - struct address_info info; - decompose_mem_address (&info, operands[1]); - if (info.base != NULL && REG_P (*info.base) - && REGNO_PTR_FRAME_P (REGNO (*info.base))) + if (nvptx_mem_local_p (operands[1])) { output_asm_insn ("{", NULL); output_asm_insn ("\\t" ".reg.b%T0" "\\t" "%%val;", operands); diff --git a/gcc/testsuite/gcc.target/nvptx/softstack-2.c b/gcc/testsuite/gcc.target/nvptx/softstack-2.c new file mode 100644 index 000000000000..cccfda947d7d --- /dev/null +++ b/gcc/testsuite/gcc.target/nvptx/softstack-2.c @@ -0,0 +1,11 @@ +/* { dg-options "-O2 -msoft-stack" } */ + +int +f (void) +{ + int a = 0; + return __sync_lock_test_and_set (&a, 1); +} + +/* { dg-final { scan-assembler-times "atom.exch" 1 } } */ + diff --git a/gcc/testsuite/gcc.target/nvptx/uniform-simt-1.c b/gcc/testsuite/gcc.target/nvptx/uniform-simt-1.c new file mode 100644 index 000000000000..1bc0adae0143 --- /dev/null +++ b/gcc/testsuite/gcc.target/nvptx/uniform-simt-1.c @@ -0,0 +1,18 @@ +/* { dg-options "-O2 -muniform-simt" } */ + +enum memmodel +{ + MEMMODEL_RELAXED = 0, +}; + +int a = 0; + +int +f (void) +{ + int expected = 1; + return __atomic_compare_exchange_n (&a, &expected, 0, 0, MEMMODEL_RELAXED, + MEMMODEL_RELAXED); +} + +/* { dg-final { scan-assembler-times "@%r\[0-9\]*\tatom.global.cas" 1 } } */