From 3bb91d31a272d7fd9f02301df101e3041d5aeb5d Mon Sep 17 00:00:00 2001 From: Michael Meissner Date: Mon, 22 May 2023 11:08:13 -0400 Subject: [PATCH] Do not generate vmaddfp and vnmsubfp This is version 3 of the patch. This is essentially version 1 with the removal of changes to altivec.md, and cleanup of the comments. Version 2 generated the vmaddfp and vnmsubfp instructions if -Ofast was used, and those changes are deleted in this patch. The Altivec instructions vmaddfp and vnmsubfp have different rounding behaviors than the VSX xvmaddsp and xvnmsubsp instructions. In particular, generating these instructions seems to break Eigen on big endian systems. I have done bootstrap builds on power9 little endian (with both IEEE long double and IBM long double). I have also done the builds and test on a power8 big endian system (testing both 32-bit and 64-bit code generation). Chip has verified that it fixes the problem that Eigen encountered. Can I check this into the master GCC branch? After a burn-in period, can I check this patch into the active GCC branches? Thanks in advance. 2023-05-22 Michael Meissner gcc/ PR target/70243 * config/rs6000/vsx.md (vsx_fmav4sf4): Do not generate vmaddfp. (vsx_nfmsv4sf4): Do not generate vnmsubfp. Back port from master 04/10/2023 change. gcc/testsuite/ PR target/70243 * gcc.target/powerpc/pr70243.c: New test. Back port from master 04/10/2023 change. --- gcc/config/rs6000/vsx.md | 31 ++++++++-------- gcc/testsuite/gcc.target/powerpc/pr70243.c | 41 ++++++++++++++++++++++ 2 files changed, 55 insertions(+), 17 deletions(-) create mode 100644 gcc/testsuite/gcc.target/powerpc/pr70243.c diff --git a/gcc/config/rs6000/vsx.md b/gcc/config/rs6000/vsx.md index 068abc1d62ea..0b69b60dc14c 100644 --- a/gcc/config/rs6000/vsx.md +++ b/gcc/config/rs6000/vsx.md @@ -2018,22 +2018,20 @@ "xtsqrtp %0,%x1" [(set_attr "type" "")]) -;; Fused vector multiply/add instructions. Support the classical Altivec -;; versions of fma, which allows the target to be a separate register from the -;; 3 inputs. Under VSX, the target must be either the addend or the first -;; multiply. - +;; Fused vector multiply/add instructions. Do not generate the Altivec versions +;; of fma (vmaddfp and vnmsubfp). These instructions allows the target to be a +;; separate register from the 3 inputs, but they have different rounding +;; behaviors than the VSX instructions. (define_insn "*vsx_fmav4sf4" - [(set (match_operand:V4SF 0 "vsx_register_operand" "=wa,wa,v") + [(set (match_operand:V4SF 0 "vsx_register_operand" "=wa,wa") (fma:V4SF - (match_operand:V4SF 1 "vsx_register_operand" "%wa,wa,v") - (match_operand:V4SF 2 "vsx_register_operand" "wa,0,v") - (match_operand:V4SF 3 "vsx_register_operand" "0,wa,v")))] + (match_operand:V4SF 1 "vsx_register_operand" "%wa,wa") + (match_operand:V4SF 2 "vsx_register_operand" "wa,0") + (match_operand:V4SF 3 "vsx_register_operand" "0,wa")))] "VECTOR_UNIT_VSX_P (V4SFmode)" "@ xvmaddasp %x0,%x1,%x2 - xvmaddmsp %x0,%x1,%x3 - vmaddfp %0,%1,%2,%3" + xvmaddmsp %x0,%x1,%x3" [(set_attr "type" "vecfloat")]) (define_insn "*vsx_fmav2df4" @@ -2075,18 +2073,17 @@ [(set_attr "type" "")]) (define_insn "*vsx_nfmsv4sf4" - [(set (match_operand:V4SF 0 "vsx_register_operand" "=wa,wa,v") + [(set (match_operand:V4SF 0 "vsx_register_operand" "=wa,wa") (neg:V4SF (fma:V4SF - (match_operand:V4SF 1 "vsx_register_operand" "%wa,wa,v") - (match_operand:V4SF 2 "vsx_register_operand" "wa,0,v") + (match_operand:V4SF 1 "vsx_register_operand" "%wa,wa") + (match_operand:V4SF 2 "vsx_register_operand" "wa,0") (neg:V4SF - (match_operand:V4SF 3 "vsx_register_operand" "0,wa,v")))))] + (match_operand:V4SF 3 "vsx_register_operand" "0,wa")))))] "VECTOR_UNIT_VSX_P (V4SFmode)" "@ xvnmsubasp %x0,%x1,%x2 - xvnmsubmsp %x0,%x1,%x3 - vnmsubfp %0,%1,%2,%3" + xvnmsubmsp %x0,%x1,%x3" [(set_attr "type" "vecfloat")]) (define_insn "*vsx_nfmsv2df4" diff --git a/gcc/testsuite/gcc.target/powerpc/pr70243.c b/gcc/testsuite/gcc.target/powerpc/pr70243.c new file mode 100644 index 000000000000..18a5ce787921 --- /dev/null +++ b/gcc/testsuite/gcc.target/powerpc/pr70243.c @@ -0,0 +1,41 @@ +/* { dg-do compile */ +/* { dg-require-effective-target powerpc_vsx_ok } */ +/* { dg-options "-O2 -mvsx" } */ + +/* PR 70423, Make sure we don't generate vmaddfp or vnmsubfp. These + instructions have different rounding modes than the VSX instructions + xvmaddsp and xvnmsubsp. These tests are written where the 3 inputs and + target are all separate registers. Because vmaddfp and vnmsubfp are no + longer generated the compiler will have to generate an xsmaddsp or xsnmsubsp + instruction followed by a move operation. */ + +#include + +vector float +do_add1 (vector float dummy, vector float a, vector float b, vector float c) +{ + return (a * b) + c; +} + +vector float +do_nsub1 (vector float dummy, vector float a, vector float b, vector float c) +{ + return -((a * b) - c); +} + +vector float +do_add2 (vector float dummy, vector float a, vector float b, vector float c) +{ + return vec_madd (a, b, c); +} + +vector float +do_nsub2 (vector float dummy, vector float a, vector float b, vector float c) +{ + return vec_nmsub (a, b, c); +} + +/* { dg-final { scan-assembler {\mxvmadd[am]sp\M} } } */ +/* { dg-final { scan-assembler {\mxvnmsub[am]sp\M} } } */ +/* { dg-final { scan-assembler-not {\mvmaddfp\M} } } */ +/* { dg-final { scan-assembler-not {\mvnmsubfp\M} } } */ -- 2.47.3