]> git.ipfire.org Git - thirdparty/gcc.git/commitdiff
tree-optimization/120808 - SLP build with mixed .FMA/.FMS
authorRichard Biener <rguenther@suse.de>
Wed, 25 Jun 2025 07:24:41 +0000 (09:24 +0200)
committerRichard Biener <rguenth@gcc.gnu.org>
Wed, 25 Jun 2025 12:45:27 +0000 (14:45 +0200)
The following allows SLP build to succeed when mixing .FMA/.FMS
in different lanes like we handle mixed plus/minus.  This does not
yet address SLP pattern matching to not being able to form
a FMADDSUB from this.

PR tree-optimization/120808
* tree-vectorizer.h (compatible_calls_p): Add flag to
indicate a FMA/FMS pair is allowed.
* tree-vect-slp.cc (compatible_calls_p): Likewise.
(vect_build_slp_tree_1): Allow mixed .FMA/.FMS as two-operator.
(vect_build_slp_tree_2): Handle calls in two-operator SLP build.
* tree-vect-slp-patterns.cc (compatible_complex_nodes_p):
Adjust.

* gcc.dg/vect/bb-slp-pr120808.c: New testcase.

gcc/testsuite/gcc.dg/vect/bb-slp-pr120808.c [new file with mode: 0644]
gcc/tree-vect-slp-patterns.cc
gcc/tree-vect-slp.cc
gcc/tree-vectorizer.h

diff --git a/gcc/testsuite/gcc.dg/vect/bb-slp-pr120808.c b/gcc/testsuite/gcc.dg/vect/bb-slp-pr120808.c
new file mode 100644 (file)
index 0000000..c334d6a
--- /dev/null
@@ -0,0 +1,12 @@
+/* { dg-do compile } */
+/* { dg-additional-options "-ffp-contract=on" } */
+/* { dg-additional-options "-mfma" { target { x86_64-*-* i?86-*-* } } } */
+
+void f(double x[restrict], double *y, double *z)
+{
+    x[0] = x[0] * y[0] + z[0];
+    x[1] = x[1] * y[1] - z[1];
+}
+
+/* The following should check for SLP build covering the loads.  */
+/* { dg-final { scan-tree-dump "transform load" "slp2" { target { x86_64-*-* i?86-*-* } } } } */
index c0dff90d9baf5ffaf0902326acc00ca78e1b563c..24ae203e6ffe66e39a7eac24fee2a9357bf17695 100644 (file)
@@ -786,7 +786,7 @@ compatible_complex_nodes_p (slp_compat_nodes_map_t *compat_cache,
   if (is_gimple_call (a_stmt))
     {
        if (!compatible_calls_p (dyn_cast <gcall *> (a_stmt),
-                                dyn_cast <gcall *> (b_stmt)))
+                                dyn_cast <gcall *> (b_stmt), false))
          return false;
     }
   else if (!is_gimple_assign (a_stmt))
index dc89da3bf1777b1665faab3b720ba2830e892d07..603dfc0d4b2dba9418dd11f98b03322f7e1b400e 100644 (file)
@@ -986,13 +986,18 @@ vect_get_and_check_slp_defs (vec_info *vinfo, unsigned char swap,
    to be combined into the same SLP group.  */
 
 bool
-compatible_calls_p (gcall *call1, gcall *call2)
+compatible_calls_p (gcall *call1, gcall *call2, bool allow_two_operators)
 {
   unsigned int nargs = gimple_call_num_args (call1);
   if (nargs != gimple_call_num_args (call2))
     return false;
 
-  if (gimple_call_combined_fn (call1) != gimple_call_combined_fn (call2))
+  auto cfn1 = gimple_call_combined_fn (call1);
+  auto cfn2 = gimple_call_combined_fn (call2);
+  if (cfn1 != cfn2
+      && (!allow_two_operators
+         || !((cfn1 == CFN_FMA || cfn1 == CFN_FMS)
+              && (cfn2 == CFN_FMA || cfn2 == CFN_FMS))))
     return false;
 
   if (gimple_call_internal_p (call1))
@@ -1354,10 +1359,14 @@ vect_build_slp_tree_1 (vec_info *vinfo, unsigned char *swap,
                   || rhs_code != IMAGPART_EXPR)
               /* Handle mismatches in plus/minus by computing both
                  and merging the results.  */
-              && !((first_stmt_code == PLUS_EXPR
-                    || first_stmt_code == MINUS_EXPR)
-                   && (alt_stmt_code == PLUS_EXPR
-                       || alt_stmt_code == MINUS_EXPR)
+              && !((((first_stmt_code == PLUS_EXPR
+                      || first_stmt_code == MINUS_EXPR)
+                     && (alt_stmt_code == PLUS_EXPR
+                         || alt_stmt_code == MINUS_EXPR))
+                    || ((first_stmt_code == CFN_FMA
+                         || first_stmt_code == CFN_FMS)
+                        && (alt_stmt_code == CFN_FMA
+                            || alt_stmt_code == CFN_FMS)))
                    && rhs_code == alt_stmt_code)
               && !(first_stmt_code.is_tree_code ()
                    && rhs_code.is_tree_code ()
@@ -1406,7 +1415,7 @@ vect_build_slp_tree_1 (vec_info *vinfo, unsigned char *swap,
            {
              if (!is_a <gcall *> (stmts[0]->stmt)
                  || !compatible_calls_p (as_a <gcall *> (stmts[0]->stmt),
-                                         call_stmt))
+                                         call_stmt, true))
                {
                  if (dump_enabled_p ())
                    dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
@@ -3055,24 +3064,35 @@ fail:
       SLP_TREE_CODE (node) = VEC_PERM_EXPR;
       SLP_TREE_CHILDREN (node).quick_push (one);
       SLP_TREE_CHILDREN (node).quick_push (two);
-      gassign *stmt = as_a <gassign *> (stmts[0]->stmt);
-      enum tree_code code0 = gimple_assign_rhs_code (stmt);
+      enum tree_code code0 = ERROR_MARK;
       enum tree_code ocode = ERROR_MARK;
+      if (gassign *stmt = dyn_cast <gassign *> (stmts[0]->stmt))
+       code0 = gimple_assign_rhs_code (stmt);
       stmt_vec_info ostmt_info;
       unsigned j = 0;
       FOR_EACH_VEC_ELT (stmts, i, ostmt_info)
        {
-         gassign *ostmt = as_a <gassign *> (ostmt_info->stmt);
-         if (gimple_assign_rhs_code (ostmt) != code0)
+         int op = 0;
+         if (gassign *ostmt = dyn_cast <gassign *> (ostmt_info->stmt))
            {
-             SLP_TREE_LANE_PERMUTATION (node).safe_push (std::make_pair (1, i));
-             ocode = gimple_assign_rhs_code (ostmt);
-             j = i;
+             if (gimple_assign_rhs_code (ostmt) != code0)
+               {
+                 ocode = gimple_assign_rhs_code (ostmt);
+                 op = 1;
+                 j = i;
+               }
            }
          else
-           SLP_TREE_LANE_PERMUTATION (node).safe_push (std::make_pair (0, i));
+           {
+             if (gimple_call_combined_fn (stmts[0]->stmt)
+                 != gimple_call_combined_fn (ostmt_info->stmt))
+               {
+                 op = 1;
+                 j = i;
+               }
+           }
+         SLP_TREE_LANE_PERMUTATION (node).safe_push (std::make_pair (op, i));
        }
-
       SLP_TREE_CODE (one) = code0;
       SLP_TREE_CODE (two) = ocode;
       SLP_TREE_LANES (one) = stmts.length ();
index dc60b4184ee087f579f47e37fe05a34f1493e3c5..0444adf5322da8b19e587f1e47e2d3aeeffa68ca 100644 (file)
@@ -2701,7 +2701,7 @@ extern void duplicate_and_interleave (vec_info *, gimple_seq *, tree,
 extern int vect_get_place_in_interleaving_chain (stmt_vec_info, stmt_vec_info);
 extern slp_tree vect_create_new_slp_node (unsigned, tree_code);
 extern void vect_free_slp_tree (slp_tree);
-extern bool compatible_calls_p (gcall *, gcall *);
+extern bool compatible_calls_p (gcall *, gcall *, bool);
 extern int vect_slp_child_index_for_operand (const gimple *, int op, bool);
 
 extern tree prepare_vec_mask (loop_vec_info, tree, tree, tree,