]> git.ipfire.org Git - thirdparty/gcc.git/commitdiff
tree-optimization/117874 - missed vectorization that's formerly hybrid
authorRichard Biener <rguenther@suse.de>
Mon, 2 Dec 2024 13:59:00 +0000 (14:59 +0100)
committerRichard Biener <rguenth@gcc.gnu.org>
Tue, 3 Dec 2024 06:53:28 +0000 (07:53 +0100)
With SLP forced we fail to consider using single-lane SLP for a case
that we still end up discovering as hybrid (in the PR in question
this is because we run into the SLP discovery limit due to excessive
association).

PR tree-optimization/117874
* tree-vect-loop.cc (vect_analyze_loop_2): When non-SLP
analysis fails, try single-lane SLP.

* gcc.dg/vect/pr117874.c: New testcase.

gcc/testsuite/gcc.dg/vect/pr117874.c [new file with mode: 0644]
gcc/tree-vect-loop.cc

diff --git a/gcc/testsuite/gcc.dg/vect/pr117874.c b/gcc/testsuite/gcc.dg/vect/pr117874.c
new file mode 100644 (file)
index 0000000..27e5f8c
--- /dev/null
@@ -0,0 +1,50 @@
+/* { dg-do compile } */
+/* { dg-require-effective-target vect_double } */
+
+typedef struct {
+    double real;
+    double imag;
+} complex;
+
+typedef struct { complex e[3][3]; } su3_matrix;
+
+void mult_su3_an(su3_matrix *a, su3_matrix *b, su3_matrix *c)
+{
+  int j;
+  double a0r,a0i,a1r,a1i,a2r,a2i;
+  double b0r,b0i,b1r,b1i,b2r,b2i;
+  for(j=0;j<3;j++)
+    {
+      a0r=a->e[0][0].real; a0i=a->e[0][0].imag;
+      b0r=b->e[0][j].real; b0i=b->e[0][j].imag;
+      a1r=a->e[1][0].real; a1i=a->e[1][0].imag;
+      b1r=b->e[1][j].real; b1i=b->e[1][j].imag;
+      a2r=a->e[2][0].real; a2i=a->e[2][0].imag;
+      b2r=b->e[2][j].real; b2i=b->e[2][j].imag;
+
+      c->e[0][j].real = a0r*b0r + a0i*b0i + a1r*b1r + a1i*b1i + a2r*b2r + a2i*b2i;
+      c->e[0][j].imag = a0r*b0i - a0i*b0r + a1r*b1i - a1i*b1r + a2r*b2i - a2i*b2r;
+
+      a0r=a->e[0][1].real; a0i=a->e[0][1].imag;
+      b0r=b->e[0][j].real; b0i=b->e[0][j].imag;
+      a1r=a->e[1][1].real; a1i=a->e[1][1].imag;
+      b1r=b->e[1][j].real; b1i=b->e[1][j].imag;
+      a2r=a->e[2][1].real; a2i=a->e[2][1].imag;
+      b2r=b->e[2][j].real; b2i=b->e[2][j].imag;
+
+      c->e[1][j].real = a0r*b0r + a0i*b0i + a1r*b1r + a1i*b1i + a2r*b2r + a2i*b2i;
+      c->e[1][j].imag = a0r*b0i - a0i*b0r + a1r*b1i - a1i*b1r + a2r*b2i - a2i*b2r;
+
+      a0r=a->e[0][2].real; a0i=a->e[0][2].imag;
+      b0r=b->e[0][j].real; b0i=b->e[0][j].imag;
+      a1r=a->e[1][2].real; a1i=a->e[1][2].imag;
+      b1r=b->e[1][j].real; b1i=b->e[1][j].imag;
+      a2r=a->e[2][2].real; a2i=a->e[2][2].imag;
+      b2r=b->e[2][j].real; b2i=b->e[2][j].imag;
+
+      c->e[2][j].real = a0r*b0r + a0i*b0i + a1r*b1r + a1i*b1i + a2r*b2r + a2i*b2i;
+      c->e[2][j].imag = a0r*b0i - a0i*b0r + a1r*b1i - a1i*b1r + a2r*b2i - a2i*b2r;
+    }
+}
+
+/* { dg-final { scan-tree-dump "vectorized 1 loops" "vect" { target vect_hw_misalign } } } */
index 5a24fb8bf4c8139d8dfa2f9959003a797382b3e3..85209604486ae37d080f9a2bdc0891705daa6983 100644 (file)
@@ -3005,10 +3005,9 @@ start_over:
   ok = vect_analyze_loop_operations (loop_vinfo);
   if (!ok)
     {
-      if (dump_enabled_p ())
-       dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
-                        "bad operation or unsupported loop bound.\n");
-      return ok;
+      ok = opt_result::failure_at (vect_location,
+                                  "bad operation or unsupported loop bound\n");
+      goto again;
     }
 
   /* For now, we don't expect to mix both masking and length approaches for one