i386: Add vec_fm{addsub,subadd}v2sf4 patterns [PR116979]

author Jakub Jelinek <jakub@redhat.com>

Fri, 13 Dec 2024 09:31:04 +0000 (10:31 +0100)

committer Jakub Jelinek <jakub@gcc.gnu.org>

Fri, 13 Dec 2024 09:32:57 +0000 (10:32 +0100)
author Jakub Jelinek <jakub@redhat.com>
Fri, 13 Dec 2024 09:31:04 +0000 (10:31 +0100)
committer Jakub Jelinek <jakub@gcc.gnu.org>
Fri, 13 Dec 2024 09:32:57 +0000 (10:32 +0100)
diff --git a/gcc/config/i386/mmx.md b/gcc/config/i386/mmx.md

index 4daaa2baa25d1bb0df9c14655872e63b841d07e8..d9725a4e5764133658bbacb8fb61a754e901d9e4 100644 (file)
--- a/gcc/config/i386/mmx.md
+++ b/gcc/config/i386/mmx.md
@@ -1132,6 +1132,54 @@
    DONE;
  })
  
+(define_expand "vec_fmaddsubv2sf4"
+  [(match_operand:V2SF 0 "register_operand")
+   (match_operand:V2SF 1 "nonimmediate_operand")
+   (match_operand:V2SF 2 "nonimmediate_operand")
+   (match_operand:V2SF 3 "nonimmediate_operand")]
+  "(TARGET_FMA || TARGET_FMA4 || TARGET_AVX512VL)
+   && TARGET_MMX_WITH_SSE
+   && ix86_partial_vec_fp_math"
+{
+  rtx op3 = gen_reg_rtx (V4SFmode);
+  rtx op2 = gen_reg_rtx (V4SFmode);
+  rtx op1 = gen_reg_rtx (V4SFmode);
+  rtx op0 = gen_reg_rtx (V4SFmode);
+
+  emit_insn (gen_movq_v2sf_to_sse (op3, operands[3]));
+  emit_insn (gen_movq_v2sf_to_sse (op2, operands[2]));
+  emit_insn (gen_movq_v2sf_to_sse (op1, operands[1]));
+
+  emit_insn (gen_vec_fmaddsubv4sf4 (op0, op1, op2, op3));
+
+  emit_move_insn (operands[0], lowpart_subreg (V2SFmode, op0, V4SFmode));
+  DONE;
+})
+
+(define_expand "vec_fmsubaddv2sf4"
+  [(match_operand:V2SF 0 "register_operand")
+   (match_operand:V2SF 1 "nonimmediate_operand")
+   (match_operand:V2SF 2 "nonimmediate_operand")
+   (match_operand:V2SF 3 "nonimmediate_operand")]
+  "(TARGET_FMA || TARGET_FMA4 || TARGET_AVX512VL)
+   && TARGET_MMX_WITH_SSE
+   && ix86_partial_vec_fp_math"
+{
+  rtx op3 = gen_reg_rtx (V4SFmode);
+  rtx op2 = gen_reg_rtx (V4SFmode);
+  rtx op1 = gen_reg_rtx (V4SFmode);
+  rtx op0 = gen_reg_rtx (V4SFmode);
+
+  emit_insn (gen_movq_v2sf_to_sse (op3, operands[3]));
+  emit_insn (gen_movq_v2sf_to_sse (op2, operands[2]));
+  emit_insn (gen_movq_v2sf_to_sse (op1, operands[1]));
+
+  emit_insn (gen_vec_fmsubaddv4sf4 (op0, op1, op2, op3));
+
+  emit_move_insn (operands[0], lowpart_subreg (V2SFmode, op0, V4SFmode));
+  DONE;
+})
+
  ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
  ;;
  ;; Parallel single-precision floating point comparisons
diff --git a/gcc/testsuite/gcc.target/i386/pr116979.c b/gcc/testsuite/gcc.target/i386/pr116979.c

new file mode 100644 (file)

index 0000000..0d2a958
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr116979.c
@@ -0,0 +1,24 @@
+/* PR target/116979 */
+/* { dg-do compile } */
+/* { dg-options "-O2 -mfma -fvect-cost-model=unlimited" } */
+/* { dg-final { scan-assembler "vfmaddsub(?:132|213|231)pd" } } */
+/* { dg-final { scan-assembler "vfmaddsub(?:132|213|231)ps" { target { ! ia32 } } } } */
+
+struct S { __complex__ float f; };
+struct T { __complex__ double f; };
+
+struct S
+foo (const struct S *a, const struct S *b)
+{
+  struct S r;
+  r.f = a->f * b->f;
+  return r;
+}
+
+struct T
+bar (const struct T *a, const struct T *b)
+{
+  struct T r;
+  r.f = a->f * b->f;
+  return r;
+}
author	Jakub Jelinek <jakub@redhat.com>
	Fri, 13 Dec 2024 09:31:04 +0000 (10:31 +0100)
committer	Jakub Jelinek <jakub@gcc.gnu.org>
	Fri, 13 Dec 2024 09:32:57 +0000 (10:32 +0100)
gcc/config/i386/mmx.md		patch \| blob \| blame \| history
gcc/testsuite/gcc.target/i386/pr116979.c	[new file with mode: 0644]	patch \| blob