From: Peter Bergner Date: Tue, 14 Dec 2021 20:50:41 +0000 (-0600) Subject: rs6000: Do not allow combining of multiple assemble quads [PR103548] X-Git-Tag: basepoints/gcc-13~2344 X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=15c02ab2569b3c4e27d6f133c013b15a9fa70177;p=thirdparty%2Fgcc.git rs6000: Do not allow combining of multiple assemble quads [PR103548] The compiler will gladly CSE the result of two __builtin_mma_build_acc calls with the same four vector arguments, leading to illegal MMA code being generated. The fix here is to make the mma_assemble_acc pattern use a unspec_volatile to stop the CSE from happening. 2021-12-14 Peter Bergner gcc/ PR target/103548 * config/rs6000/mma.md (UNSPEC_MMA_ASSEMBLE): Rename unspec from this... (UNSPEC_VSX_ASSEMBLE): ...to this. (UNSPECV_MMA_ASSEMBLE): New unspecv. (vsx_assemble_pair): Use UNSPEC_VSX_ASSEMBLE. (*vsx_assemble_pair): Likewise. (mma_assemble_acc): Use UNSPECV_MMA_ASSEMBLE. (*mma_assemble_acc): Likewise. * config/rs6000/rs6000.c (rs6000_split_multireg_move): Handle UNSPEC_VOLATILE. Use UNSPEC_VSX_ASSEMBLE and UNSPECV_MMA_ASSEMBLE. gcc/testsuite/ PR target/103548 * gcc.target/powerpc/mma-builtin-10-pair.c: New test. * gcc.target/powerpc/mma-builtin-10-quad.c: New test. --- diff --git a/gcc/config/rs6000/mma.md b/gcc/config/rs6000/mma.md index fa081608c4c8..8a262054d5f2 100644 --- a/gcc/config/rs6000/mma.md +++ b/gcc/config/rs6000/mma.md @@ -29,7 +29,7 @@ ;; Constants for creating unspecs (define_c_enum "unspec" - [UNSPEC_MMA_ASSEMBLE + [UNSPEC_VSX_ASSEMBLE UNSPEC_MMA_EXTRACT UNSPEC_MMA_PMXVBF16GER2 UNSPEC_MMA_PMXVBF16GER2NN @@ -94,7 +94,8 @@ ]) (define_c_enum "unspecv" - [UNSPECV_MMA_XXSETACCZ + [UNSPECV_MMA_ASSEMBLE + UNSPECV_MMA_XXSETACCZ ]) ;; MMA instructions with 1 accumulator argument @@ -333,7 +334,7 @@ { rtx src = gen_rtx_UNSPEC (OOmode, gen_rtvec (2, operands[1], operands[2]), - UNSPEC_MMA_ASSEMBLE); + UNSPEC_VSX_ASSEMBLE); emit_move_insn (operands[0], src); DONE; }) @@ -345,7 +346,7 @@ [(set (match_operand:OO 0 "vsx_register_operand" "=&wa") (unspec:OO [(match_operand:V16QI 1 "mma_assemble_input_operand" "mwa") (match_operand:V16QI 2 "mma_assemble_input_operand" "mwa")] - UNSPEC_MMA_ASSEMBLE))] + UNSPEC_VSX_ASSEMBLE))] "TARGET_MMA" "#" "&& reload_completed" @@ -353,7 +354,7 @@ { rtx src = gen_rtx_UNSPEC (OOmode, gen_rtvec (2, operands[1], operands[2]), - UNSPEC_MMA_ASSEMBLE); + UNSPEC_VSX_ASSEMBLE); rs6000_split_multireg_move (operands[0], src); DONE; }) @@ -399,10 +400,10 @@ (match_operand:V16QI 4 "mma_assemble_input_operand")] "TARGET_MMA" { - rtx src = gen_rtx_UNSPEC (XOmode, - gen_rtvec (4, operands[1], operands[2], - operands[3], operands[4]), - UNSPEC_MMA_ASSEMBLE); + rtx src = gen_rtx_UNSPEC_VOLATILE (XOmode, + gen_rtvec (4, operands[1], operands[2], + operands[3], operands[4]), + UNSPECV_MMA_ASSEMBLE); emit_move_insn (operands[0], src); DONE; }) @@ -412,21 +413,22 @@ (define_insn_and_split "*mma_assemble_acc" [(set (match_operand:XO 0 "fpr_reg_operand" "=&d") - (unspec:XO [(match_operand:V16QI 1 "mma_assemble_input_operand" "mwa") - (match_operand:V16QI 2 "mma_assemble_input_operand" "mwa") - (match_operand:V16QI 3 "mma_assemble_input_operand" "mwa") - (match_operand:V16QI 4 "mma_assemble_input_operand" "mwa")] - UNSPEC_MMA_ASSEMBLE))] + (unspec_volatile:XO + [(match_operand:V16QI 1 "mma_assemble_input_operand" "mwa") + (match_operand:V16QI 2 "mma_assemble_input_operand" "mwa") + (match_operand:V16QI 3 "mma_assemble_input_operand" "mwa") + (match_operand:V16QI 4 "mma_assemble_input_operand" "mwa")] + UNSPECV_MMA_ASSEMBLE))] "TARGET_MMA && fpr_reg_operand (operands[0], XOmode)" "#" "&& reload_completed" [(const_int 0)] { - rtx src = gen_rtx_UNSPEC (XOmode, - gen_rtvec (4, operands[1], operands[2], - operands[3], operands[4]), - UNSPEC_MMA_ASSEMBLE); + rtx src = gen_rtx_UNSPEC_VOLATILE (XOmode, + gen_rtvec (4, operands[1], operands[2], + operands[3], operands[4]), + UNSPECV_MMA_ASSEMBLE); rs6000_split_multireg_move (operands[0], src); DONE; }) diff --git a/gcc/config/rs6000/rs6000.c b/gcc/config/rs6000/rs6000.c index 70df511ff981..9fc1577be402 100644 --- a/gcc/config/rs6000/rs6000.c +++ b/gcc/config/rs6000/rs6000.c @@ -27071,9 +27071,11 @@ rs6000_split_multireg_move (rtx dst, rtx src) return; } - if (GET_CODE (src) == UNSPEC) + if (GET_CODE (src) == UNSPEC + || GET_CODE (src) == UNSPEC_VOLATILE) { - gcc_assert (XINT (src, 1) == UNSPEC_MMA_ASSEMBLE); + gcc_assert (XINT (src, 1) == UNSPEC_VSX_ASSEMBLE + || XINT (src, 1) == UNSPECV_MMA_ASSEMBLE); gcc_assert (REG_P (dst)); if (GET_MODE (src) == XOmode) gcc_assert (FP_REGNO_P (REGNO (dst))); diff --git a/gcc/testsuite/gcc.target/powerpc/mma-builtin-10-pair.c b/gcc/testsuite/gcc.target/powerpc/mma-builtin-10-pair.c new file mode 100644 index 000000000000..d8748d8e7d0a --- /dev/null +++ b/gcc/testsuite/gcc.target/powerpc/mma-builtin-10-pair.c @@ -0,0 +1,21 @@ +/* { dg-require-effective-target power10_ok } */ +/* { dg-options "-mdejagnu-cpu=power10 -O2" } */ + +typedef unsigned char vec_t __attribute__((vector_size(16))); + +void +foo (__vector_pair *dst, vec_t *src) +{ + __vector_pair pair0, pair1; + /* Adjacent loads should be combined into one lxvp instruction + and identical build pairs should be combined. */ + __builtin_vsx_build_pair (&pair0, src[0], src[1]); + __builtin_vsx_build_pair (&pair1, src[0], src[1]); + dst[0] = pair0; + dst[2] = pair1; +} + +/* { dg-final { scan-assembler-not {\mlxv\M} } } */ +/* { dg-final { scan-assembler-not {\mstxv\M} } } */ +/* { dg-final { scan-assembler-times {\mlxvp\M} 1 } } */ +/* { dg-final { scan-assembler-times {\mstxvp\M} 2 } } */ diff --git a/gcc/testsuite/gcc.target/powerpc/mma-builtin-10-quad.c b/gcc/testsuite/gcc.target/powerpc/mma-builtin-10-quad.c new file mode 100644 index 000000000000..02342c76f5f5 --- /dev/null +++ b/gcc/testsuite/gcc.target/powerpc/mma-builtin-10-quad.c @@ -0,0 +1,23 @@ +/* { dg-require-effective-target power10_ok } */ +/* { dg-options "-mdejagnu-cpu=power10 -O2" } */ + +typedef unsigned char vec_t __attribute__((vector_size(16))); + +void +foo (__vector_quad *dst, vec_t *src) +{ + __vector_quad quad0, quad1; + /* Adjacent loads should be combined into two lxvp instructions. + and identical build accs should not be combined. */ + __builtin_mma_build_acc (&quad0, src[0], src[1], src[2], src[3]); + __builtin_mma_build_acc (&quad1, src[0], src[1], src[2], src[3]); + dst[0] = quad0; + dst[2] = quad1; +} + +/* { dg-final { scan-assembler-not {\mlxv\M} } } */ +/* { dg-final { scan-assembler-not {\mstxv\M} } } */ +/* { dg-final { scan-assembler-times {\mlxvp\M} 4 } } */ +/* { dg-final { scan-assembler-times {\mxxmtacc\M} 2 } } */ +/* { dg-final { scan-assembler-times {\mxxmfacc\M} 2 } } */ +/* { dg-final { scan-assembler-times {\mstxvp\M} 4 } } */