From: Hongyu Wang Date: Fri, 18 Mar 2022 17:16:29 +0000 (+0800) Subject: AVX512FP16: Fix wrong code for _mm_mask_f[c]madd.*sch [PR 104978] X-Git-Tag: basepoints/gcc-13~566 X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=7bce0be03b857eefe5990c3ef0af06ea8f8ae04e;p=thirdparty%2Fgcc.git AVX512FP16: Fix wrong code for _mm_mask_f[c]madd.*sch [PR 104978] For complex scalar intrinsic like _mm_mask_fcmadd_sch, the mask should be and by 1 to ensure the mask is bind to lowest byte. Use masked vmovss to perform same operation which omits higher bits of mask. gcc/ChangeLog: PR target/104978 * config/i386/sse.md (avx512fp16_fmaddcsh_v8hf_mask1" { - rtx op0, op1; + rtx op0, op1, dest; if () emit_insn (gen_avx512fp16_fmaddcsh_v8hf_mask ( @@ -6586,26 +6586,15 @@ emit_insn (gen_avx512fp16_fmaddcsh_v8hf_mask (operands[0], operands[1], operands[2], operands[3], operands[4])); - if (TARGET_AVX512VL) - { - op0 = lowpart_subreg (V4SFmode, operands[0], V8HFmode); - op1 = lowpart_subreg (V4SFmode, operands[1], V8HFmode); - emit_insn (gen_avx512vl_loadv4sf_mask (op0, op0, op1, operands[4])); - } - else - { - rtx mask, tmp, vec_mask; - mask = lowpart_subreg (SImode, operands[4], QImode), - tmp = gen_reg_rtx (SImode); - emit_insn (gen_ashlsi3 (tmp, mask, GEN_INT (31))); - vec_mask = gen_reg_rtx (V4SImode); - emit_insn (gen_rtx_SET (vec_mask, CONST0_RTX (V4SImode))); - emit_insn (gen_vec_setv4si_0 (vec_mask, vec_mask, tmp)); - vec_mask = lowpart_subreg (V4SFmode, vec_mask, V4SImode); - op0 = lowpart_subreg (V4SFmode, operands[0], V8HFmode); - op1 = lowpart_subreg (V4SFmode, operands[1], V8HFmode); - emit_insn (gen_sse4_1_blendvps (op0, op1, op0, vec_mask)); - } + op0 = lowpart_subreg (V4SFmode, force_reg (V8HFmode, operands[0]), + V8HFmode); + if (!MEM_P (operands[1])) + operands[1] = force_reg (V8HFmode, operands[1]); + op1 = lowpart_subreg (V4SFmode, operands[1], V8HFmode); + dest = gen_reg_rtx (V4SFmode); + emit_insn (gen_avx512f_movsf_mask (dest, op1, op0, op1, operands[4])); + emit_move_insn (operands[0], lowpart_subreg (V8HFmode, dest, + V4SFmode)); DONE; }) @@ -6631,7 +6620,7 @@ (match_operand:QI 4 "register_operand")] "TARGET_AVX512FP16 && " { - rtx op0, op1; + rtx op0, op1, dest; if () emit_insn (gen_avx512fp16_fcmaddcsh_v8hf_mask ( @@ -6641,26 +6630,15 @@ emit_insn (gen_avx512fp16_fcmaddcsh_v8hf_mask (operands[0], operands[1], operands[2], operands[3], operands[4])); - if (TARGET_AVX512VL) - { - op0 = lowpart_subreg (V4SFmode, operands[0], V8HFmode); - op1 = lowpart_subreg (V4SFmode, operands[1], V8HFmode); - emit_insn (gen_avx512vl_loadv4sf_mask (op0, op0, op1, operands[4])); - } - else - { - rtx mask, tmp, vec_mask; - mask = lowpart_subreg (SImode, operands[4], QImode), - tmp = gen_reg_rtx (SImode); - emit_insn (gen_ashlsi3 (tmp, mask, GEN_INT (31))); - vec_mask = gen_reg_rtx (V4SImode); - emit_insn (gen_rtx_SET (vec_mask, CONST0_RTX (V4SImode))); - emit_insn (gen_vec_setv4si_0 (vec_mask, vec_mask, tmp)); - vec_mask = lowpart_subreg (V4SFmode, vec_mask, V4SImode); - op0 = lowpart_subreg (V4SFmode, operands[0], V8HFmode); - op1 = lowpart_subreg (V4SFmode, operands[1], V8HFmode); - emit_insn (gen_sse4_1_blendvps (op0, op1, op0, vec_mask)); - } + op0 = lowpart_subreg (V4SFmode, force_reg (V8HFmode, operands[0]), + V8HFmode); + if (!MEM_P (operands[1])) + operands[1] = force_reg (V8HFmode, operands[1]); + op1 = lowpart_subreg (V4SFmode, operands[1], V8HFmode); + dest = gen_reg_rtx (V4SFmode); + emit_insn (gen_avx512f_movsf_mask (dest, op1, op0, op1, operands[4])); + emit_move_insn (operands[0], lowpart_subreg (V8HFmode, dest, + V4SFmode)); DONE; }) diff --git a/gcc/testsuite/gcc.target/i386/avx512fp16-vfcmaddcsh-1a.c b/gcc/testsuite/gcc.target/i386/avx512fp16-vfcmaddcsh-1a.c index eb96588df39b..0f87861f09b2 100644 --- a/gcc/testsuite/gcc.target/i386/avx512fp16-vfcmaddcsh-1a.c +++ b/gcc/testsuite/gcc.target/i386/avx512fp16-vfcmaddcsh-1a.c @@ -1,13 +1,13 @@ /* { dg-do compile } */ -/* { dg-options "-mavx512fp16 -mno-avx512vl -O2" } */ +/* { dg-options "-mavx512fp16 -O2" } */ /* { dg-final { scan-assembler-times "vfcmaddcsh\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */ /* { dg-final { scan-assembler-times "vfcmaddcsh\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\{%k\[0-9\]\}\[^\{\n\r]*(?:\n|\[ \\t\]+#)" 2 } } */ /* { dg-final { scan-assembler-times "vfcmaddcsh\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\{%k\[0-9\]\}\{z\}\[^\n\r]*(?:\n|\[ \\t\]+#)" 1 } } */ /* { dg-final { scan-assembler-times "vfcmaddcsh\[ \\t\]+\{rn-sae\}\[^\{\n\]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */ /* { dg-final { scan-assembler-times "vfcmaddcsh\[ \\t\]+\{rn-sae\}\[^\{\n\]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\{%k\[0-9\]\}\[^\n\r]*(?:\n|\[ \\t\]+#)" 2 } } */ /* { dg-final { scan-assembler-times "vfcmaddcsh\[ \\t\]+\{rz-sae\}\[^\{\n\]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\{%k\[0-9\]\}\{z\}\[^\n\r]*(?:\n|\[ \\t\]+#)" 1 } } */ -/* { dg-final { scan-assembler-times "vblendvps\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 2 } } */ /* { dg-final { scan-assembler-times "vmovss\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 2 } } */ +/* { dg-final { scan-assembler-times "vmovss\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\{%k\[0-9\]\}+(?:\n|\[ \\t\]+#)" 2 } } */ #include diff --git a/gcc/testsuite/gcc.target/i386/avx512fp16-vfcmaddcsh-1c.c b/gcc/testsuite/gcc.target/i386/avx512fp16-vfcmaddcsh-1c.c deleted file mode 100644 index 79a295f722cf..000000000000 --- a/gcc/testsuite/gcc.target/i386/avx512fp16-vfcmaddcsh-1c.c +++ /dev/null @@ -1,13 +0,0 @@ -/* { dg-do compile } */ -/* { dg-options "-mavx512fp16 -mavx512vl -O2" } */ -/* { dg-final { scan-assembler-times "vfcmaddcsh\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */ -/* { dg-final { scan-assembler-times "vfcmaddcsh\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\{%k\[0-9\]\}\[^\{\n\r]*(?:\n|\[ \\t\]+#)" 2 } } */ -/* { dg-final { scan-assembler-times "vfcmaddcsh\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\{%k\[0-9\]\}\{z\}\[^\n\r]*(?:\n|\[ \\t\]+#)" 1 } } */ -/* { dg-final { scan-assembler-times "vfcmaddcsh\[ \\t\]+\{rn-sae\}\[^\{\n\]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */ -/* { dg-final { scan-assembler-times "vfcmaddcsh\[ \\t\]+\{rn-sae\}\[^\{\n\]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\{%k\[0-9\]\}\[^\n\r]*(?:\n|\[ \\t\]+#)" 2 } } */ -/* { dg-final { scan-assembler-times "vfcmaddcsh\[ \\t\]+\{rz-sae\}\[^\{\n\]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\{%k\[0-9\]\}\{z\}\[^\n\r]*(?:\n|\[ \\t\]+#)" 1 } } */ -/* { dg-final { scan-assembler-times "vblendmps\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\{%k\[0-9\]\}\[^\{\n\r]*(?:\n|\[ \\t\]+#)" 2 } } */ -/* { dg-final { scan-assembler-times "vmovss\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 2 } } */ - -#include "avx512fp16-vfcmaddcsh-1a.c" - diff --git a/gcc/testsuite/gcc.target/i386/avx512fp16-vfmaddcsh-1a.c b/gcc/testsuite/gcc.target/i386/avx512fp16-vfmaddcsh-1a.c index 288d1c12a100..6b07957a8bbc 100644 --- a/gcc/testsuite/gcc.target/i386/avx512fp16-vfmaddcsh-1a.c +++ b/gcc/testsuite/gcc.target/i386/avx512fp16-vfmaddcsh-1a.c @@ -1,13 +1,13 @@ /* { dg-do compile } */ -/* { dg-options "-mavx512fp16 -mno-avx512vl -O2" } */ +/* { dg-options "-mavx512fp16 -O2" } */ /* { dg-final { scan-assembler-times "vfmaddcsh\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */ /* { dg-final { scan-assembler-times "vfmaddcsh\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\{%k\[0-9\]\}\[^\{\n\r]*(?:\n|\[ \\t\]+#)" 2 } } */ /* { dg-final { scan-assembler-times "vfmaddcsh\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\{%k\[0-9\]\}\{z\}\[^\n\r]*(?:\n|\[ \\t\]+#)" 1 } } */ /* { dg-final { scan-assembler-times "vfmaddcsh\[ \\t\]+\{rn-sae\}\[^\{\n\]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */ /* { dg-final { scan-assembler-times "vfmaddcsh\[ \\t\]+\{rn-sae\}\[^\{\n\]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\{%k\[0-9\]\}\[^\n\r]*(?:\n|\[ \\t\]+#)" 2 } } */ /* { dg-final { scan-assembler-times "vfmaddcsh\[ \\t\]+\{rz-sae\}\[^\{\n\]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\{%k\[0-9\]\}\{z\}\[^\n\r]*(?:\n|\[ \\t\]+#)" 1 } } */ -/* { dg-final { scan-assembler-times "vblendvps\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 2 } } */ /* { dg-final { scan-assembler-times "vmovss\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 2 } } */ +/* { dg-final { scan-assembler-times "vmovss\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\{%k\[0-9\]\}+(?:\n|\[ \\t\]+#)" 2 } } */ #include diff --git a/gcc/testsuite/gcc.target/i386/avx512fp16-vfmaddcsh-1c.c b/gcc/testsuite/gcc.target/i386/avx512fp16-vfmaddcsh-1c.c deleted file mode 100644 index 7863f8f9af96..000000000000 --- a/gcc/testsuite/gcc.target/i386/avx512fp16-vfmaddcsh-1c.c +++ /dev/null @@ -1,13 +0,0 @@ -/* { dg-do compile } */ -/* { dg-options "-mavx512fp16 -mavx512vl -O2" } */ -/* { dg-final { scan-assembler-times "vfmaddcsh\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */ -/* { dg-final { scan-assembler-times "vfmaddcsh\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\{%k\[0-9\]\}\[^\{\n\r]*(?:\n|\[ \\t\]+#)" 2 } } */ -/* { dg-final { scan-assembler-times "vfmaddcsh\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\{%k\[0-9\]\}\{z\}\[^\n\r]*(?:\n|\[ \\t\]+#)" 1 } } */ -/* { dg-final { scan-assembler-times "vfmaddcsh\[ \\t\]+\{rn-sae\}\[^\{\n\]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */ -/* { dg-final { scan-assembler-times "vfmaddcsh\[ \\t\]+\{rn-sae\}\[^\{\n\]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\{%k\[0-9\]\}\[^\n\r]*(?:\n|\[ \\t\]+#)" 2 } } */ -/* { dg-final { scan-assembler-times "vfmaddcsh\[ \\t\]+\{rz-sae\}\[^\{\n\]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\{%k\[0-9\]\}\{z\}\[^\n\r]*(?:\n|\[ \\t\]+#)" 1 } } */ -/* { dg-final { scan-assembler-times "vblendmps\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\{%k\[0-9\]\}\[^\{\n\r]*(?:\n|\[ \\t\]+#)" 2 } } */ -/* { dg-final { scan-assembler-times "vmovss\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 2 } } */ - -#include "avx512fp16-vfmaddcsh-1a.c" - diff --git a/gcc/testsuite/gcc.target/i386/pr104978.c b/gcc/testsuite/gcc.target/i386/pr104978.c new file mode 100644 index 000000000000..54788171affd --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/pr104978.c @@ -0,0 +1,18 @@ +/* PR target/104978 */ +/* { dg-do compile } */ +/* { dg-options "-O2 -mavx512fp16" } */ +/* { dg-final { scan-assembler-times "vmovss\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\{%k\[0-9\]\}+(?:\n|\[ \\t\]+#)" 2 } } */ + +#include + +__m128h +foo (__m128h a, __m128h b, __m128h c, __mmask8 m) +{ + return _mm_mask_fmadd_round_sch (a, m, b, c, 8); +} + +__m128h +foo2 (__m128h a, __m128h b, __m128h c, __mmask8 m) +{ + return _mm_mask_fcmadd_round_sch (a, m, b, c, 8); +}