--- /dev/null
+/* { dg-do compile } */
+/* { dg-additional-options "-Ofast" } */
+/* { dg-additional-options "-march=bdver2" { target x86_64-*-* i?86-*-* } } */
+
+template <class T> struct Vector3 {
+ Vector3();
+ Vector3(T, T, T);
+ T length() const;
+ T x, y, z;
+};
+template <class T>
+Vector3<T>::Vector3(T _x, T _y, T _z) : x(_x), y(_y), z(_z) {}
+Vector3<float> cross(Vector3<float> a, Vector3<float> b) {
+ return Vector3<float>(a.y * b.z - a.z * b.y, a.z * b.x - a.x * b.z,
+ a.x * b.y - a.y * b.x);
+}
+template <class T> T Vector3<T>::length() const { return z; }
+int generateNormals_i;
+float generateNormals_p2_0, generateNormals_p0_0;
+struct SphereMesh {
+ void generateNormals();
+ float vertices;
+};
+void SphereMesh::generateNormals() {
+ Vector3<float> *faceNormals = new Vector3<float>;
+ for (int j; j; j++) {
+ float *p0 = &vertices + 3, *p1 = &vertices + j * 3, *p2 = &vertices + 3,
+ *p3 = &vertices + generateNormals_i + j * 3;
+ Vector3<float> v0(p1[0] - generateNormals_p0_0, p1[1] - 1, p1[2] - 2),
+ v1(0, 1, 2);
+ if (v0.length())
+ v1 = Vector3<float>(p3[0] - generateNormals_p2_0, p3[1] - p2[1],
+ p3[2] - p2[2]);
+ else
+ v1 = Vector3<float>(generateNormals_p0_0 - p3[0], p0[1] - p3[1],
+ p0[2] - p3[2]);
+ Vector3<float> faceNormal = cross(v0, v1);
+ faceNormals[j] = faceNormal;
+ }
+}
--- /dev/null
+/* { dg-do compile } */
+/* { dg-additional-options "-fprofile-generate" } */
+/* { dg-additional-options "-mavx512vl" { target x86_64-*-* i?86-*-* } } */
+
+int *mask_slp_int64_t_8_2_x, *mask_slp_int64_t_8_2_y, *mask_slp_int64_t_8_2_z;
+
+void
+__attribute__mask_slp_int64_t_8_2() {
+ for (int i; i; i += 8) {
+ mask_slp_int64_t_8_2_x[i + 6] =
+ mask_slp_int64_t_8_2_y[i + 6] ? mask_slp_int64_t_8_2_z[i] : 1;
+ mask_slp_int64_t_8_2_x[i + 7] =
+ mask_slp_int64_t_8_2_y[i + 7] ? mask_slp_int64_t_8_2_z[i + 7] : 2;
+ }
+}
stmt_vec_info rep = SLP_TREE_REPRESENTATIVE (node);
if (rep
&& STMT_VINFO_DATA_REF (rep)
- && DR_IS_READ (STMT_VINFO_DATA_REF (rep)))
+ && DR_IS_READ (STMT_VINFO_DATA_REF (rep))
+ && SLP_TREE_LOAD_PERMUTATION (node).exists ())
{
auto_load_permutation_t tmp_perm;
tmp_perm.safe_splice (SLP_TREE_LOAD_PERMUTATION (node));
if (SLP_TREE_LOAD_PERMUTATION (node).exists ())
{
/* If splitting out a SLP_TREE_LANE_PERMUTATION can make the node
- unpermuted, record a layout that reverses this permutation. */
- gcc_assert (partition.layout == 0);
+ unpermuted, record a layout that reverses this permutation.
+
+ We would need more work to cope with loads that are internally
+ permuted and also have inputs (such as masks for
+ IFN_MASK_LOADs). */
+ gcc_assert (partition.layout == 0 && !m_slpg->vertices[node_i].succ);
if (!STMT_VINFO_GROUPED_ACCESS (dr_stmt))
continue;
dr_stmt = DR_GROUP_FIRST_ELEMENT (dr_stmt);
vertex.weight = vect_slp_node_weight (node);
/* We do not handle stores with a permutation, so all
- incoming permutations must have been materialized. */
+ incoming permutations must have been materialized.
+
+ We also don't handle masked grouped loads, which lack a
+ permutation vector. In this case the memory locations
+ form an implicit second input to the loads, on top of the
+ explicit mask input, and the memory input's layout cannot
+ be changed.
+
+ On the other hand, we do support permuting gather loads and
+ masked gather loads, where each scalar load is independent
+ of the others. This can be useful if the address/index input
+ benefits from permutation. */
if (STMT_VINFO_DATA_REF (rep)
- && DR_IS_WRITE (STMT_VINFO_DATA_REF (rep)))
- /* ??? We're forcing materialization in place
- of the child here, we'd need special handling
- in materialization to leave layout -1 here. */
+ && STMT_VINFO_GROUPED_ACCESS (rep)
+ && !SLP_TREE_LOAD_PERMUTATION (node).exists ())
partition.layout = 0;
/* We cannot change the layout of an operation that is