The vectorizer dependence analysis is confused with invariant loads
when figuring whether the circumstances are so that we preserve
scalar stmt execution order. The following rectifies this.
PR tree-optimization/113431
* tree-vect-data-refs.cc (vect_preserves_scalar_order_p):
When there is an invariant load we might not preserve
scalar order.
* gcc.dg/vect/pr113431.c: New testcase.
--- /dev/null
+/* { dg-additional-options "-O3 -fdump-tree-slp1-details" } */
+
+#include "tree-vect.h"
+
+int a[2][9];
+int b;
+int main()
+{
+ check_vect ();
+ for (b = 0; b < 2; b++)
+ for (long e = 8; e > 0; e--)
+ a[b][e] = a[0][1] == 0;
+ if (a[1][1] != 0)
+ __builtin_abort ();
+ return 0;
+}
+
+/* { dg-final { scan-tree-dump-times "optimized: basic block part vectorized" 2 "slp1" { target vect_int } } } */
&& !STMT_VINFO_GROUPED_ACCESS (stmtinfo_b))
return true;
+ /* If there is a loop invariant read involved we might vectorize it in
+ the prologue, breaking scalar oder with respect to the in-loop store. */
+ if ((DR_IS_READ (dr_info_a->dr) && integer_zerop (DR_STEP (dr_info_a->dr)))
+ || (DR_IS_READ (dr_info_b->dr) && integer_zerop (DR_STEP (dr_info_b->dr))))
+ return false;
+
/* STMT_A and STMT_B belong to overlapping groups. All loads are
emitted at the position of the first scalar load.
Stores in a group are emitted at the position of the last scalar store.