for (unsigned k = 0; k < SLP_TREE_SCALAR_STMTS (store).length (); ++k)
gimple_set_visited (SLP_TREE_SCALAR_STMTS (store)[k]->stmt, false);
+ /* If this is a SLP instance with a store check if there's a dependent
+ load that cannot be forwarded from a previous iteration of a loop
+ both are in. This is to avoid situations like that in PR115777. */
+ if (res && store)
+ {
+ stmt_vec_info store_info
+ = DR_GROUP_FIRST_ELEMENT (SLP_TREE_SCALAR_STMTS (store)[0]);
+ class loop *store_loop = gimple_bb (store_info->stmt)->loop_father;
+ if (! loop_outer (store_loop))
+ return res;
+ vec<loop_p> loop_nest;
+ loop_nest.create (1);
+ loop_nest.quick_push (store_loop);
+ data_reference *drs = nullptr;
+ for (slp_tree &load : SLP_INSTANCE_LOADS (instance))
+ {
+ if (! STMT_VINFO_GROUPED_ACCESS (SLP_TREE_SCALAR_STMTS (load)[0]))
+ continue;
+ stmt_vec_info load_info
+ = DR_GROUP_FIRST_ELEMENT (SLP_TREE_SCALAR_STMTS (load)[0]);
+ if (gimple_bb (load_info->stmt)->loop_father != store_loop)
+ continue;
+
+ /* For now concern ourselves with write-after-read as we also
+ only look for re-use of the store within the same SLP instance.
+ We can still get a RAW here when the instance contais a PHI
+ with a backedge though, thus this test. */
+ if (! vect_stmt_dominates_stmt_p (STMT_VINFO_STMT (load_info),
+ STMT_VINFO_STMT (store_info)))
+ continue;
+
+ if (! drs)
+ {
+ drs = create_data_ref (loop_preheader_edge (store_loop),
+ store_loop,
+ DR_REF (STMT_VINFO_DATA_REF (store_info)),
+ store_info->stmt, false, false);
+ if (! DR_BASE_ADDRESS (drs)
+ || TREE_CODE (DR_STEP (drs)) != INTEGER_CST)
+ break;
+ }
+ data_reference *drl
+ = create_data_ref (loop_preheader_edge (store_loop),
+ store_loop,
+ DR_REF (STMT_VINFO_DATA_REF (load_info)),
+ load_info->stmt, true, false);
+
+ /* See whether the DRs have a known constant distance throughout
+ the containing loop iteration. */
+ if (! DR_BASE_ADDRESS (drl)
+ || ! operand_equal_p (DR_STEP (drs), DR_STEP (drl))
+ || ! operand_equal_p (DR_BASE_ADDRESS (drs),
+ DR_BASE_ADDRESS (drl))
+ || ! operand_equal_p (DR_OFFSET (drs), DR_OFFSET (drl)))
+ {
+ free_data_ref (drl);
+ continue;
+ }
+
+ /* If the next iteration load overlaps with a non-power-of-two offset
+ we are surely failing any STLF attempt. */
+ HOST_WIDE_INT step = TREE_INT_CST_LOW (DR_STEP (drl));
+ unsigned HOST_WIDE_INT sizes
+ = (TREE_INT_CST_LOW (TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (drs))))
+ * DR_GROUP_SIZE (store_info));
+ unsigned HOST_WIDE_INT sizel
+ = (TREE_INT_CST_LOW (TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (drl))))
+ * DR_GROUP_SIZE (load_info));
+ if (ranges_overlap_p (TREE_INT_CST_LOW (DR_INIT (drl)) + step, sizel,
+ TREE_INT_CST_LOW (DR_INIT (drs)), sizes))
+ {
+ unsigned HOST_WIDE_INT dist
+ = absu_hwi (TREE_INT_CST_LOW (DR_INIT (drl)) + step
+ - TREE_INT_CST_LOW (DR_INIT (drs)));
+ poly_uint64 loadsz = tree_to_poly_uint64
+ (TYPE_SIZE_UNIT (SLP_TREE_VECTYPE (load)));
+ poly_uint64 storesz = tree_to_poly_uint64
+ (TYPE_SIZE_UNIT (SLP_TREE_VECTYPE (store)));
+ /* When the overlap aligns with vector sizes used for the loads
+ and the vector stores are larger or equal to the loads
+ forwarding should work. */
+ if (maybe_gt (loadsz, storesz) || ! multiple_p (dist, loadsz))
+ load->avoid_stlf_fail = true;
+ }
+ free_data_ref (drl);
+ }
+ if (drs)
+ free_data_ref (drs);
+ loop_nest.release ();
+ }
+
return res;
}
SLP_TREE_DEF_TYPE (this) = vect_uninitialized_def;
SLP_TREE_CODE (this) = ERROR_MARK;
this->ldst_lanes = false;
+ this->avoid_stlf_fail = false;
SLP_TREE_VECTYPE (this) = NULL_TREE;
SLP_TREE_REPRESENTATIVE (this) = NULL;
SLP_TREE_MEMORY_ACCESS_TYPE (this) = VMAT_INVARIANT;
SLP_TREE_REF_COUNT (node));
if (SLP_TREE_VECTYPE (node))
dump_printf (metadata, " %T", SLP_TREE_VECTYPE (node));
- dump_printf (metadata, "\n");
+ dump_printf (metadata, "%s\n",
+ node->avoid_stlf_fail ? " (avoid-stlf-fail)" : "");
if (SLP_TREE_DEF_TYPE (node) == vect_internal_def)
{
if (SLP_TREE_CODE (node) == VEC_PERM_EXPR)