tree-optimization/109892 - SLP reduction of fma

author Richard Biener <rguenther@suse.de>

Wed, 25 Jun 2025 08:36:59 +0000 (10:36 +0200)

committer Richard Biener <rguenth@gcc.gnu.org>

Wed, 25 Jun 2025 13:02:01 +0000 (15:02 +0200)
author Richard Biener <rguenther@suse.de>
Wed, 25 Jun 2025 08:36:59 +0000 (10:36 +0200)
committer Richard Biener <rguenth@gcc.gnu.org>
Wed, 25 Jun 2025 13:02:01 +0000 (15:02 +0200)
diff --git a/gcc/testsuite/gcc.dg/vect/vect-reduc-fma-1.c b/gcc/testsuite/gcc.dg/vect/vect-reduc-fma-1.c

new file mode 100644 (file)

index 0000000..e958b43
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/vect-reduc-fma-1.c
@@ -0,0 +1,15 @@
+/* { dg-do compile } */
+/* { dg-additional-options "-mfma" { target { x86_64-*-* i?86-*-* } } } */
+
+double f(double x[], long n)
+{
+    double r0 = 0, r1 = 0;
+    for (; n; x += 2, n--) {
+        r0 = __builtin_fma(x[0], x[0], r0);
+        r1 = __builtin_fma(x[1], x[1], r1);
+    }
+    return r0 + r1;
+}
+
+/* We should vectorize this as SLP reduction.  */
+/* { dg-final { scan-tree-dump "loop vectorized using 16 byte vectors and unroll factor 1" "vect" { target { x86_64-*-* i?86-*-* } } } } */
diff --git a/gcc/testsuite/gcc.dg/vect/vect-reduc-fma-2.c b/gcc/testsuite/gcc.dg/vect/vect-reduc-fma-2.c

new file mode 100644 (file)

index 0000000..ea1ca97
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/vect-reduc-fma-2.c
@@ -0,0 +1,20 @@
+/* { dg-do compile } */
+/* { dg-additional-options "-ffp-contract=on" } */
+/* { dg-additional-options "-mfma" { target { x86_64-*-* i?86-*-* } } } */
+
+static double muladd(double x, double y, double z)
+{
+    return x * y + z;
+}
+double g(double x[], long n)
+{
+    double r0 = 0, r1 = 0;
+    for (; n; x += 2, n--) {
+        r0 = muladd(x[0], x[0], r0);
+        r1 = muladd(x[1], x[1], r1);
+    }
+    return r0 + r1;
+}
+
+/* We should vectorize this as SLP reduction.  */
+/* { dg-final { scan-tree-dump "loop vectorized using 16 byte vectors and unroll factor 1" "vect" { target { x86_64-*-* i?86-*-* } } } } */
diff --git a/gcc/testsuite/gcc.dg/vect/vect-reduc-fma-3.c b/gcc/testsuite/gcc.dg/vect/vect-reduc-fma-3.c

new file mode 100644 (file)

index 0000000..10ceced
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/vect-reduc-fma-3.c
@@ -0,0 +1,16 @@
+/* { dg-do compile } */
+/* { dg-additional-options "-ffast-math" } */
+/* { dg-additional-options "-mfma" { target { x86_64-*-* i?86-*-* } } } */
+
+double f(double x[], long n)
+{
+    double r0 = 0, r1 = 0;
+    for (; n; x += 2, n--) {
+        r0 = __builtin_fma(x[0], x[0], r0);
+        r1 = __builtin_fma(x[1], x[1], r1);
+    }
+    return r0 + r1;
+}
+
+/* We should vectorize this as SLP reduction, higher VF possible.  */
+/* { dg-final { scan-tree-dump "optimized: loop vectorized" "vect" { target { x86_64-*-* i?86-*-* } } } } */
diff --git a/gcc/tree-vect-loop.cc b/gcc/tree-vect-loop.cc

index 9ee8e50ee75ac4eb3795c8d0c1f4dc5de14825ff..5b6769af31c305c9ae3d9405cd8ba13796da220a 100644 (file)
--- a/gcc/tree-vect-loop.cc
+++ b/gcc/tree-vect-loop.cc
@@ -4126,6 +4126,10 @@ pop:
           if (op.ops[2] == op.ops[opi])
             neg = ! neg;
         }
+      /* For an FMA the reduction code is the PLUS if the addition chain
+        is the reduction.  */
+      else if (op.code == IFN_FMA && opi == 2)
+       op.code = PLUS_EXPR;
        if (CONVERT_EXPR_CODE_P (op.code)
           && tree_nop_conversion_p (op.type, TREE_TYPE (op.ops[0])))
         ;
@@ -8070,6 +8074,19 @@ vectorizable_reduction (loop_vec_info loop_vinfo,
                                  "in-order reduction chain without SLP.\n");
               return false;
             }
+         /* Code generation doesn't support function calls other
+            than .COND_*.  */
+         if (!op.code.is_tree_code ()
+             && !(op.code.is_internal_fn ()
+                  && conditional_internal_fn_code (internal_fn (op.code))
+                       != ERROR_MARK))
+           {
+             if (dump_enabled_p ())
+               dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
+                                "in-order reduction chain operation not "
+                                "supported.\n");
+             return false;
+           }
           STMT_VINFO_REDUC_TYPE (reduc_info)
             = reduction_type = FOLD_LEFT_REDUCTION;
         }
author	Richard Biener <rguenther@suse.de>
	Wed, 25 Jun 2025 08:36:59 +0000 (10:36 +0200)
committer	Richard Biener <rguenth@gcc.gnu.org>
	Wed, 25 Jun 2025 13:02:01 +0000 (15:02 +0200)
gcc/testsuite/gcc.dg/vect/vect-reduc-fma-1.c	[new file with mode: 0644]	patch \| blob
gcc/testsuite/gcc.dg/vect/vect-reduc-fma-2.c	[new file with mode: 0644]	patch \| blob
gcc/testsuite/gcc.dg/vect/vect-reduc-fma-3.c	[new file with mode: 0644]	patch \| blob
gcc/tree-vect-loop.cc		patch \| blob \| blame \| history