]> git.ipfire.org Git - thirdparty/gcc.git/commitdiff
tree-optimization/109892 - SLP reduction of fma
authorRichard Biener <rguenther@suse.de>
Wed, 25 Jun 2025 08:36:59 +0000 (10:36 +0200)
committerRichard Biener <rguenth@gcc.gnu.org>
Wed, 25 Jun 2025 13:02:01 +0000 (15:02 +0200)
The following adds the ability to vectorize a fma reduction pair
as SLP reduction (we cannot yet handle ternary association in
reduction vectorization yet).

PR tree-optimization/109892
* tree-vect-loop.cc (check_reduction_path): Handle fma.
(vectorizable_reduction): Apply FOLD_LEFT_REDUCTION code
generation constraints.

* gcc.dg/vect/vect-reduc-fma-1.c: New testcase.
* gcc.dg/vect/vect-reduc-fma-2.c: Likewise.
* gcc.dg/vect/vect-reduc-fma-3.c: Likewise.

gcc/testsuite/gcc.dg/vect/vect-reduc-fma-1.c [new file with mode: 0644]
gcc/testsuite/gcc.dg/vect/vect-reduc-fma-2.c [new file with mode: 0644]
gcc/testsuite/gcc.dg/vect/vect-reduc-fma-3.c [new file with mode: 0644]
gcc/tree-vect-loop.cc

diff --git a/gcc/testsuite/gcc.dg/vect/vect-reduc-fma-1.c b/gcc/testsuite/gcc.dg/vect/vect-reduc-fma-1.c
new file mode 100644 (file)
index 0000000..e958b43
--- /dev/null
@@ -0,0 +1,15 @@
+/* { dg-do compile } */
+/* { dg-additional-options "-mfma" { target { x86_64-*-* i?86-*-* } } } */
+
+double f(double x[], long n)
+{
+    double r0 = 0, r1 = 0;
+    for (; n; x += 2, n--) {
+        r0 = __builtin_fma(x[0], x[0], r0);
+        r1 = __builtin_fma(x[1], x[1], r1);
+    }
+    return r0 + r1;
+}
+
+/* We should vectorize this as SLP reduction.  */
+/* { dg-final { scan-tree-dump "loop vectorized using 16 byte vectors and unroll factor 1" "vect" { target { x86_64-*-* i?86-*-* } } } } */
diff --git a/gcc/testsuite/gcc.dg/vect/vect-reduc-fma-2.c b/gcc/testsuite/gcc.dg/vect/vect-reduc-fma-2.c
new file mode 100644 (file)
index 0000000..ea1ca97
--- /dev/null
@@ -0,0 +1,20 @@
+/* { dg-do compile } */
+/* { dg-additional-options "-ffp-contract=on" } */
+/* { dg-additional-options "-mfma" { target { x86_64-*-* i?86-*-* } } } */
+
+static double muladd(double x, double y, double z)
+{
+    return x * y + z;
+}
+double g(double x[], long n)
+{
+    double r0 = 0, r1 = 0;
+    for (; n; x += 2, n--) {
+        r0 = muladd(x[0], x[0], r0);
+        r1 = muladd(x[1], x[1], r1);
+    }
+    return r0 + r1;
+}
+
+/* We should vectorize this as SLP reduction.  */
+/* { dg-final { scan-tree-dump "loop vectorized using 16 byte vectors and unroll factor 1" "vect" { target { x86_64-*-* i?86-*-* } } } } */
diff --git a/gcc/testsuite/gcc.dg/vect/vect-reduc-fma-3.c b/gcc/testsuite/gcc.dg/vect/vect-reduc-fma-3.c
new file mode 100644 (file)
index 0000000..10ceced
--- /dev/null
@@ -0,0 +1,16 @@
+/* { dg-do compile } */
+/* { dg-additional-options "-ffast-math" } */
+/* { dg-additional-options "-mfma" { target { x86_64-*-* i?86-*-* } } } */
+
+double f(double x[], long n)
+{
+    double r0 = 0, r1 = 0;
+    for (; n; x += 2, n--) {
+        r0 = __builtin_fma(x[0], x[0], r0);
+        r1 = __builtin_fma(x[1], x[1], r1);
+    }
+    return r0 + r1;
+}
+
+/* We should vectorize this as SLP reduction, higher VF possible.  */
+/* { dg-final { scan-tree-dump "optimized: loop vectorized" "vect" { target { x86_64-*-* i?86-*-* } } } } */
index 9ee8e50ee75ac4eb3795c8d0c1f4dc5de14825ff..5b6769af31c305c9ae3d9405cd8ba13796da220a 100644 (file)
@@ -4126,6 +4126,10 @@ pop:
          if (op.ops[2] == op.ops[opi])
            neg = ! neg;
        }
+      /* For an FMA the reduction code is the PLUS if the addition chain
+        is the reduction.  */
+      else if (op.code == IFN_FMA && opi == 2)
+       op.code = PLUS_EXPR;
       if (CONVERT_EXPR_CODE_P (op.code)
          && tree_nop_conversion_p (op.type, TREE_TYPE (op.ops[0])))
        ;
@@ -8070,6 +8074,19 @@ vectorizable_reduction (loop_vec_info loop_vinfo,
                                 "in-order reduction chain without SLP.\n");
              return false;
            }
+         /* Code generation doesn't support function calls other
+            than .COND_*.  */
+         if (!op.code.is_tree_code ()
+             && !(op.code.is_internal_fn ()
+                  && conditional_internal_fn_code (internal_fn (op.code))
+                       != ERROR_MARK))
+           {
+             if (dump_enabled_p ())
+               dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
+                                "in-order reduction chain operation not "
+                                "supported.\n");
+             return false;
+           }
          STMT_VINFO_REDUC_TYPE (reduc_info)
            = reduction_type = FOLD_LEFT_REDUCTION;
        }