]> git.ipfire.org Git - thirdparty/gcc.git/commitdiff
re PR tree-optimization/56270 (loop over array of struct float causes compiler error...
authorRichard Biener <rguenther@suse.de>
Fri, 19 Apr 2013 13:37:12 +0000 (13:37 +0000)
committerRichard Biener <rguenth@gcc.gnu.org>
Fri, 19 Apr 2013 13:37:12 +0000 (13:37 +0000)
2013-04-19  Richard Biener  <rguenther@suse.de>

* tree-vectorizer.h (struct _slp_instance): Move load_permutation
member ...
(struct _slp_tree): ... here.  Make it a vector of unsigned ints.
(SLP_INSTANCE_LOAD_PERMUTATION): Remove.
(SLP_TREE_LOAD_PERMUTATION): Add.
(vect_transform_slp_perm_load): Adjust prototype.
* tree-vect-slp.c (vect_free_slp_tree): Adjust.
(vect_free_slp_instance): Likewise.
(vect_create_new_slp_node): Likewise.
(vect_supported_slp_permutation_p): Remove.
(vect_slp_rearrange_stmts): Adjust.
(vect_supported_load_permutation_p): Likewise.  Inline
vect_supported_slp_permutation_p here.
(vect_analyze_slp_instance): Compute load permutations per
slp node instead of per instance.
(vect_get_slp_defs): Adjust.
(vect_transform_slp_perm_load): Likewise.
(vect_schedule_slp_instance): Remove redundant code.
(vect_schedule_slp): Remove hack for PR56270, add it ...
* tree-vect-stmts.c (vectorizable_load): ... here, do not
CSE loads for SLP.  Adjust.

From-SVN: r198095

gcc/ChangeLog
gcc/tree-vect-slp.c
gcc/tree-vect-stmts.c
gcc/tree-vectorizer.h

index e395bd172d10a86e9f5c8800e6fc9c6869e1aab9..8bdf2a9cf76b74487c734aea580040ca6f51e16d 100644 (file)
@@ -1,3 +1,27 @@
+2013-04-19  Richard Biener  <rguenther@suse.de>
+
+       * tree-vectorizer.h (struct _slp_instance): Move load_permutation
+       member ...
+       (struct _slp_tree): ... here.  Make it a vector of unsigned ints.
+       (SLP_INSTANCE_LOAD_PERMUTATION): Remove.
+       (SLP_TREE_LOAD_PERMUTATION): Add.
+       (vect_transform_slp_perm_load): Adjust prototype.
+       * tree-vect-slp.c (vect_free_slp_tree): Adjust.
+       (vect_free_slp_instance): Likewise.
+       (vect_create_new_slp_node): Likewise.
+       (vect_supported_slp_permutation_p): Remove.
+       (vect_slp_rearrange_stmts): Adjust.
+       (vect_supported_load_permutation_p): Likewise.  Inline
+       vect_supported_slp_permutation_p here.
+       (vect_analyze_slp_instance): Compute load permutations per
+       slp node instead of per instance.
+       (vect_get_slp_defs): Adjust.
+       (vect_transform_slp_perm_load): Likewise.
+       (vect_schedule_slp_instance): Remove redundant code.
+       (vect_schedule_slp): Remove hack for PR56270, add it ...
+       * tree-vect-stmts.c (vectorizable_load): ... here, do not
+       CSE loads for SLP.  Adjust.
+
 2013-04-19  Greta Yorsh  <Greta.Yorsh@arm.com>
 
        * config/arm/arm.c (load_multiple_sequence, ldm_stm_operation_p): Fix
index 4dc79f408402b3a0eced71767e019595f588ee9a..b83e64041308a9b71965143f091dd7c9a6a9c63f 100644 (file)
@@ -78,6 +78,7 @@ vect_free_slp_tree (slp_tree node)
   SLP_TREE_CHILDREN (node).release ();
   SLP_TREE_SCALAR_STMTS (node).release ();
   SLP_TREE_VEC_STMTS (node).release ();
+  SLP_TREE_LOAD_PERMUTATION (node).release ();
 
   free (node);
 }
@@ -89,7 +90,6 @@ void
 vect_free_slp_instance (slp_instance instance)
 {
   vect_free_slp_tree (SLP_INSTANCE_TREE (instance));
-  SLP_INSTANCE_LOAD_PERMUTATION (instance).release ();
   SLP_INSTANCE_LOADS (instance).release ();
   SLP_INSTANCE_BODY_COST_VEC (instance).release ();
   free (instance);
@@ -120,6 +120,7 @@ vect_create_new_slp_node (vec<gimple> scalar_stmts)
   SLP_TREE_SCALAR_STMTS (node) = scalar_stmts;
   SLP_TREE_VEC_STMTS (node).create (0);
   SLP_TREE_CHILDREN (node).create (nops);
+  SLP_TREE_LOAD_PERMUTATION (node) = vNULL;
 
   return node;
 }
@@ -1026,73 +1027,11 @@ vect_mark_slp_stmts_relevant (slp_tree node)
 }
 
 
-/* Check if the permutation required by the SLP INSTANCE is supported.
-   Reorganize the SLP nodes stored in SLP_INSTANCE_LOADS if needed.  */
-
-static bool
-vect_supported_slp_permutation_p (slp_instance instance)
-{
-  slp_tree node = SLP_INSTANCE_LOADS (instance)[0];
-  gimple stmt = SLP_TREE_SCALAR_STMTS (node)[0];
-  gimple first_load = GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt));
-  vec<slp_tree> sorted_loads = vNULL;
-  int index;
-  slp_tree *tmp_loads = NULL;
-  int group_size = SLP_INSTANCE_GROUP_SIZE (instance), i, j;
-  slp_tree load;
-
-  /* FORNOW: The only supported loads permutation is loads from the same
-     location in all the loads in the node, when the data-refs in
-     nodes of LOADS constitute an interleaving chain.
-     Sort the nodes according to the order of accesses in the chain.  */
-  tmp_loads = (slp_tree *) xmalloc (sizeof (slp_tree) * group_size);
-  for (i = 0, j = 0;
-       SLP_INSTANCE_LOAD_PERMUTATION (instance).iterate (i, &index)
-       && SLP_INSTANCE_LOADS (instance).iterate (j, &load);
-       i += group_size, j++)
-    {
-      gimple scalar_stmt = SLP_TREE_SCALAR_STMTS (load)[0];
-      /* Check that the loads are all in the same interleaving chain.  */
-      if (GROUP_FIRST_ELEMENT (vinfo_for_stmt (scalar_stmt)) != first_load)
-        {
-          if (dump_enabled_p ())
-            {
-              dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
-                              "Build SLP failed: unsupported data "
-                              "permutation ");
-              dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
-                               scalar_stmt, 0);
-            }
-
-          free (tmp_loads);
-          return false;
-        }
-
-      tmp_loads[index] = load;
-    }
-
-  sorted_loads.create (group_size);
-  for (i = 0; i < group_size; i++)
-     sorted_loads.safe_push (tmp_loads[i]);
-
-  SLP_INSTANCE_LOADS (instance).release ();
-  SLP_INSTANCE_LOADS (instance) = sorted_loads;
-  free (tmp_loads);
-
-  if (!vect_transform_slp_perm_load (stmt, vNULL, NULL,
-                                     SLP_INSTANCE_UNROLLING_FACTOR (instance),
-                                     instance, true))
-    return false;
-
-  return true;
-}
-
-
 /* Rearrange the statements of NODE according to PERMUTATION.  */
 
 static void
 vect_slp_rearrange_stmts (slp_tree node, unsigned int group_size,
-                          vec<int> permutation)
+                          vec<unsigned> permutation)
 {
   gimple stmt;
   vec<gimple> tmp_stmts;
@@ -1114,32 +1053,29 @@ vect_slp_rearrange_stmts (slp_tree node, unsigned int group_size,
 }
 
 
-/* Check if the required load permutation is supported.
-   LOAD_PERMUTATION contains a list of indices of the loads.
-   In SLP this permutation is relative to the order of grouped stores that are
-   the base of the SLP instance.  */
+/* Check if the required load permutations in the SLP instance
+   SLP_INSTN are supported.  */
 
 static bool
-vect_supported_load_permutation_p (slp_instance slp_instn, int group_size,
-                                   vec<int> load_permutation)
+vect_supported_load_permutation_p (slp_instance slp_instn)
 {
-  int i = 0, j, prev = -1, next, k, number_of_groups;
-  bool supported, bad_permutation = false;
+  unsigned int group_size = SLP_INSTANCE_GROUP_SIZE (slp_instn);
+  unsigned int i, j, k, next;
   sbitmap load_index;
   slp_tree node;
   gimple stmt, load, next_load, first_load;
   struct data_reference *dr;
-  bb_vec_info bb_vinfo;
-
-  /* FORNOW: permutations are only supported in SLP.  */
-  if (!slp_instn)
-    return false;
 
   if (dump_enabled_p ())
     {
       dump_printf_loc (MSG_NOTE, vect_location, "Load permutation ");
-      FOR_EACH_VEC_ELT (load_permutation, i, next)
-        dump_printf (MSG_NOTE, "%d ", next);
+      FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn), i, node)
+       if (node->load_permutation.exists ())
+         FOR_EACH_VEC_ELT (node->load_permutation, j, next)
+           dump_printf (MSG_NOTE, "%d ", next);
+       else
+         for (i = 0; i < group_size; ++i)
+           dump_printf (MSG_NOTE, "%d ", i);
     }
 
   /* In case of reduction every load permutation is allowed, since the order
@@ -1150,209 +1086,161 @@ vect_supported_load_permutation_p (slp_instance slp_instn, int group_size,
      permutation).  */
 
   /* Check that all the load nodes are of the same size.  */
+  /* ???  Can't we assert this? */
   FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn), i, node)
     if (SLP_TREE_SCALAR_STMTS (node).length () != (unsigned) group_size)
       return false;
 
   node = SLP_INSTANCE_TREE (slp_instn);
   stmt = SLP_TREE_SCALAR_STMTS (node)[0];
-  /* LOAD_PERMUTATION is a list of indices of all the loads of the SLP
-     instance, not all the loads belong to the same node or interleaving
-     group.  Hence, we need to divide them into groups according to
-     GROUP_SIZE.  */
-  number_of_groups = load_permutation.length () / group_size;
 
   /* Reduction (there are no data-refs in the root).
      In reduction chain the order of the loads is important.  */
   if (!STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt))
       && !GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)))
     {
-      int first_group_load_index;
-
-      /* Compare all the permutation sequences to the first one.  */
-      for (i = 1; i < number_of_groups; i++)
-        {
-          k = 0;
-          for (j = i * group_size; j < i * group_size + group_size; j++)
-            {
-              next = load_permutation[j];
-              first_group_load_index = load_permutation[k];
+      slp_tree load;
+      unsigned int lidx;
 
-              if (next != first_group_load_index)
-                {
-                  bad_permutation = true;
-                  break;
-                }
-
-              k++;
-            }
-
-          if (bad_permutation)
-            break;
-        }
-
-      if (!bad_permutation)
-        {
-          /* Check that the loads in the first sequence are different and there
-             are no gaps between them.  */
-          load_index = sbitmap_alloc (group_size);
-          bitmap_clear (load_index);
-          for (k = 0; k < group_size; k++)
-            {
-              first_group_load_index = load_permutation[k];
-              if (bitmap_bit_p (load_index, first_group_load_index))
-                {
-                  bad_permutation = true;
-                  break;
-                }
-
-              bitmap_set_bit (load_index, first_group_load_index);
-            }
-
-          if (!bad_permutation)
-            for (k = 0; k < group_size; k++)
-              if (!bitmap_bit_p (load_index, k))
-                {
-                  bad_permutation = true;
-                  break;
-                }
-
-          sbitmap_free (load_index);
-        }
+      /* Compare all the permutation sequences to the first one.  We know
+         that at least one load is permuted.  */
+      node = SLP_INSTANCE_LOADS (slp_instn)[0];
+      if (!node->load_permutation.exists ())
+       return false;
+      for (i = 1; SLP_INSTANCE_LOADS (slp_instn).iterate (i, &load); ++i)
+       {
+         if (!load->load_permutation.exists ())
+           return false;
+         FOR_EACH_VEC_ELT (load->load_permutation, j, lidx)
+           if (lidx != node->load_permutation[j])
+             return false;
+       }
 
-      if (!bad_permutation)
-        {
-          /* This permutation is valid for reduction.  Since the order of the
-             statements in the nodes is not important unless they are memory
-             accesses, we can rearrange the statements in all the nodes 
-             according to the order of the loads.  */
-          vect_slp_rearrange_stmts (SLP_INSTANCE_TREE (slp_instn), group_size,
-                                    load_permutation);
-          SLP_INSTANCE_LOAD_PERMUTATION (slp_instn).release ();
-          return true;
-        }
+      /* Check that the loads in the first sequence are different and there
+        are no gaps between them.  */
+      load_index = sbitmap_alloc (group_size);
+      bitmap_clear (load_index);
+      FOR_EACH_VEC_ELT (node->load_permutation, i, lidx)
+       {
+         if (bitmap_bit_p (load_index, lidx))
+           {
+             sbitmap_free (load_index);
+             return false;
+           }
+         bitmap_set_bit (load_index, lidx);
+       }
+      for (i = 0; i < group_size; i++)
+       if (!bitmap_bit_p (load_index, i))
+         {
+           sbitmap_free (load_index);
+           return false;
+         }
+      sbitmap_free (load_index);
+
+      /* This permutation is valid for reduction.  Since the order of the
+        statements in the nodes is not important unless they are memory
+        accesses, we can rearrange the statements in all the nodes
+        according to the order of the loads.  */
+      vect_slp_rearrange_stmts (SLP_INSTANCE_TREE (slp_instn), group_size,
+                               node->load_permutation);
+
+      /* We are done, no actual permutations need to be generated.  */
+      FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn), i, node)
+       SLP_TREE_LOAD_PERMUTATION (node).release ();
+      return true;
     }
 
   /* In basic block vectorization we allow any subchain of an interleaving
      chain.
      FORNOW: not supported in loop SLP because of realignment compications.  */
-  bb_vinfo = STMT_VINFO_BB_VINFO (vinfo_for_stmt (stmt));
-  bad_permutation = false;
-  /* Check that for every node in the instance the loads form a subchain.  */
-  if (bb_vinfo)
+  if (STMT_VINFO_BB_VINFO (vinfo_for_stmt (stmt)))
     {
+      /* Check that for every node in the instance the loads
+        form a subchain.  */
       FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn), i, node)
         {
           next_load = NULL;
-          first_load = NULL;
           FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), j, load)
             {
-              if (!first_load)
-                first_load = GROUP_FIRST_ELEMENT (vinfo_for_stmt (load));
-              else if (first_load
-                         != GROUP_FIRST_ELEMENT (vinfo_for_stmt (load)))
-                {
-                  bad_permutation = true;
-                 break;
-               }
-
               if (j != 0 && next_load != load)
-                {
-                  bad_permutation = true;
-                  break;
-                }
-
+               return false;
               next_load = GROUP_NEXT_ELEMENT (vinfo_for_stmt (load));
             }
-
-          if (bad_permutation)
-            break;
         }
 
       /* Check that the alignment of the first load in every subchain, i.e.,
-         the first statement in every load node, is supported.  */
-      if (!bad_permutation)
-        {
-          FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn), i, node)
-            {
-              first_load = SLP_TREE_SCALAR_STMTS (node)[0];
-              if (first_load
-                    != GROUP_FIRST_ELEMENT (vinfo_for_stmt (first_load)))
-                {
-                  dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_load));
-                  if (vect_supportable_dr_alignment (dr, false)
-                      == dr_unaligned_unsupported)
-                    {
-                     if (dump_enabled_p ())
-                       {
-                         dump_printf_loc (MSG_MISSED_OPTIMIZATION,
-                                          vect_location, 
-                                          "unsupported unaligned load ");
-                          dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
-                                           first_load, 0);
-                        }
-                     bad_permutation = true;
-                      break;
-                    }
-               }
-            }
+         the first statement in every load node, is supported.
+        ???  This belongs in alignment checking.  */
+      FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn), i, node)
+       {
+         first_load = SLP_TREE_SCALAR_STMTS (node)[0];
+         if (first_load != GROUP_FIRST_ELEMENT (vinfo_for_stmt (first_load)))
+           {
+             dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_load));
+             if (vect_supportable_dr_alignment (dr, false)
+                 == dr_unaligned_unsupported)
+               {
+                 if (dump_enabled_p ())
+                   {
+                     dump_printf_loc (MSG_MISSED_OPTIMIZATION,
+                                      vect_location,
+                                      "unsupported unaligned load ");
+                     dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
+                                       first_load, 0);
+                   }
+                 return false;
+               }
+           }
+       }
 
-          if (!bad_permutation)
-            {
-              SLP_INSTANCE_LOAD_PERMUTATION (slp_instn).release ();
-              return true;
-           }
-        }
+      /* We are done, no actual permutations need to be generated.  */
+      FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn), i, node)
+       SLP_TREE_LOAD_PERMUTATION (node).release ();
+      return true;
     }
 
   /* FORNOW: the only supported permutation is 0..01..1.. of length equal to
      GROUP_SIZE and where each sequence of same drs is of GROUP_SIZE length as
      well (unless it's reduction).  */
-  if (load_permutation.length ()
-      != (unsigned int) (group_size * group_size))
+  if (SLP_INSTANCE_LOADS (slp_instn).length () != group_size)
     return false;
+  FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn), i, node)
+    if (!node->load_permutation.exists ())
+      return false;
 
-  supported = true;
   load_index = sbitmap_alloc (group_size);
   bitmap_clear (load_index);
-  for (j = 0; j < group_size; j++)
-    {
-      for (i = j * group_size, k = 0;
-           load_permutation.iterate (i, &next) && k < group_size;
-           i++, k++)
-       {
-         if (i != j * group_size && next != prev)
-          {
-            supported = false;
-            break;
-          }
-
-         prev = next;
-       }
-
-      if (bitmap_bit_p (load_index, prev))
-        {
-          supported = false;
-          break;
-        }
-
-      bitmap_set_bit (load_index, prev);
+  FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn), i, node)
+    {
+      unsigned int lidx = node->load_permutation[0];
+      if (bitmap_bit_p (load_index, lidx))
+       {
+         sbitmap_free (load_index);
+         return false;
+       }
+      bitmap_set_bit (load_index, lidx);
+      FOR_EACH_VEC_ELT (node->load_permutation, j, k)
+       if (k != lidx)
+         {
+           sbitmap_free (load_index);
+           return false;
+         }
     }
-  for (j = 0; j < group_size; j++)
-    if (!bitmap_bit_p (load_index, j))
+  for (i = 0; i < group_size; i++)
+    if (!bitmap_bit_p (load_index, i))
       {
        sbitmap_free (load_index);
        return false;
       }
-
   sbitmap_free (load_index);
 
-  if (supported && i == group_size * group_size
-      && vect_supported_slp_permutation_p (slp_instn))
-    return true;
-
-  return false;
+  FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn), i, node)
+    if (node->load_permutation.exists ()
+       && !vect_transform_slp_perm_load
+             (node, vNULL, NULL,
+              SLP_INSTANCE_UNROLLING_FACTOR (slp_instn), slp_instn, true))
+      return false;
+  return true;
 }
 
 
@@ -1642,17 +1530,17 @@ vect_analyze_slp_instance (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo,
       SLP_INSTANCE_BODY_COST_VEC (new_instance) = vNULL;
       SLP_INSTANCE_LOADS (new_instance) = loads;
       SLP_INSTANCE_FIRST_LOAD_STMT (new_instance) = NULL;
-      SLP_INSTANCE_LOAD_PERMUTATION (new_instance) = vNULL;
 
       /* Compute the load permutation.  */
       slp_tree load_node;
       bool loads_permuted = false;
-      vec<int> load_permutation;
-      load_permutation.create (group_size * group_size);
       FOR_EACH_VEC_ELT (loads, i, load_node)
        {
+         vec<unsigned> load_permutation;
          int j;
          gimple load, first_stmt;
+         bool this_load_permuted = false;
+         load_permutation.create (group_size);
          first_stmt = GROUP_FIRST_ELEMENT
              (vinfo_for_stmt (SLP_TREE_SCALAR_STMTS (load_node)[0]));
          FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (load_node), j, load)
@@ -1661,16 +1549,21 @@ vect_analyze_slp_instance (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo,
                = vect_get_place_in_interleaving_chain (load, first_stmt);
              gcc_assert (load_place != -1);
              if (load_place != j)
-               loads_permuted = true;
+               this_load_permuted = true;
              load_permutation.safe_push (load_place);
            }
+         if (!this_load_permuted)
+           {
+             load_permutation.release ();
+             continue;
+           }
+         SLP_TREE_LOAD_PERMUTATION (load_node) = load_permutation;
+         loads_permuted = true;
        }
 
       if (loads_permuted)
         {
-         SLP_INSTANCE_LOAD_PERMUTATION (new_instance) = load_permutation;
-          if (!vect_supported_load_permutation_p (new_instance, group_size,
-                                                  load_permutation))
+          if (!vect_supported_load_permutation_p (new_instance))
             {
               if (dump_enabled_p ())
                 {
@@ -1679,16 +1572,13 @@ vect_analyze_slp_instance (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo,
                                   "permutation ");
                   dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
                 }
-
               vect_free_slp_instance (new_instance);
               return false;
             }
 
           SLP_INSTANCE_FIRST_LOAD_STMT (new_instance)
-             = vect_find_first_load_in_slp_instance (new_instance);
+           = vect_find_first_load_in_slp_instance (new_instance);
         }
-      else
-        load_permutation.release ();
 
       /* Compute the costs of this SLP instance.  */
       vect_analyze_slp_cost (loop_vinfo, bb_vinfo,
@@ -2653,7 +2543,7 @@ vect_get_slp_defs (vec<tree> ops, slp_tree slp_node,
       vectorized_defs = false;
       if (SLP_TREE_CHILDREN (slp_node).length () > child_index)
         {
-          child = (slp_tree) SLP_TREE_CHILDREN (slp_node)[child_index];
+          child = SLP_TREE_CHILDREN (slp_node)[child_index];
 
          /* We have to check both pattern and original def, if available.  */
          gimple first_def = SLP_TREE_SCALAR_STMTS (child)[0];
@@ -2854,16 +2744,18 @@ vect_get_mask_element (gimple stmt, int first_mask_element, int m,
 
 /* Generate vector permute statements from a list of loads in DR_CHAIN.
    If ANALYZE_ONLY is TRUE, only check that it is possible to create valid
-   permute statements for SLP_NODE_INSTANCE.  */
+   permute statements for the SLP node NODE of the SLP instance
+   SLP_NODE_INSTANCE.  */
+
 bool
-vect_transform_slp_perm_load (gimple stmt, vec<tree> dr_chain,
+vect_transform_slp_perm_load (slp_tree node, vec<tree> dr_chain,
                               gimple_stmt_iterator *gsi, int vf,
                               slp_instance slp_node_instance, bool analyze_only)
 {
+  gimple stmt = SLP_TREE_SCALAR_STMTS (node)[0];
   stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
   tree mask_element_type = NULL_TREE, mask_type;
   int i, j, k, nunits, vec_index = 0, scalar_index;
-  slp_tree node;
   tree vectype = STMT_VINFO_VECTYPE (stmt_info);
   gimple next_scalar_stmt;
   int group_size = SLP_INSTANCE_GROUP_SIZE (slp_node_instance);
@@ -2910,6 +2802,9 @@ vect_transform_slp_perm_load (gimple stmt, vec<tree> dr_chain,
      relatively to SLP_NODE_INSTANCE unrolling factor.  */
   ncopies = vf / SLP_INSTANCE_UNROLLING_FACTOR (slp_node_instance);
 
+  if (!STMT_VINFO_GROUPED_ACCESS (stmt_info))
+    return false;
+
   /* Generate permutation masks for every NODE. Number of masks for each NODE
      is equal to GROUP_SIZE.
      E.g., we have a group of three nodes with three loads from the same
@@ -2928,7 +2823,6 @@ vect_transform_slp_perm_load (gimple stmt, vec<tree> dr_chain,
      we need the second and the third vectors: {b1,c1,a2,b2} and
      {c2,a3,b3,c3}.  */
 
-  FOR_EACH_VEC_ELT  (SLP_INSTANCE_LOADS (slp_node_instance), i, node)
     {
       scalar_index = 0;
       index = 0;
@@ -2944,6 +2838,7 @@ vect_transform_slp_perm_load (gimple stmt, vec<tree> dr_chain,
         {
           for (k = 0; k < group_size; k++)
             {
+             i = SLP_TREE_LOAD_PERMUTATION (node)[k];
               first_mask_element = i + j * group_size;
               if (!vect_get_mask_element (stmt, first_mask_element, 0,
                                          nunits, only_one_vec, index,
@@ -2956,9 +2851,7 @@ vect_transform_slp_perm_load (gimple stmt, vec<tree> dr_chain,
 
               if (index == nunits)
                 {
-                 tree mask_vec, *mask_elts;
-                 int l;
-
+                 index = 0;
                  if (!can_vec_perm_p (mode, false, mask))
                    {
                      if (dump_enabled_p ())
@@ -2974,15 +2867,17 @@ vect_transform_slp_perm_load (gimple stmt, vec<tree> dr_chain,
                      return false;
                    }
 
-                 mask_elts = XALLOCAVEC (tree, nunits);
-                 for (l = 0; l < nunits; ++l)
-                   mask_elts[l] = build_int_cst (mask_element_type, mask[l]);
-                 mask_vec = build_vector (mask_type, mask_elts);
-                 index = 0;
-
                   if (!analyze_only)
                     {
-                      if (need_next_vector)
+                     int l;
+                     tree mask_vec, *mask_elts;
+                     mask_elts = XALLOCAVEC (tree, nunits);
+                     for (l = 0; l < nunits; ++l)
+                       mask_elts[l] = build_int_cst (mask_element_type,
+                                                     mask[l]);
+                     mask_vec = build_vector (mask_type, mask_elts);
+
+                     if (need_next_vector)
                         {
                           first_vec_index = second_vec_index;
                           second_vec_index = vec_index;
@@ -3019,7 +2914,6 @@ vect_schedule_slp_instance (slp_tree node, slp_instance instance,
   unsigned int vec_stmts_size, nunits, group_size;
   tree vectype;
   int i;
-  slp_tree loads_node;
   slp_tree child;
 
   if (!node)
@@ -3043,20 +2937,6 @@ vect_schedule_slp_instance (slp_tree node, slp_instance instance,
      size.  */
   vec_stmts_size = (vectorization_factor * group_size) / nunits;
 
-  /* In case of load permutation we have to allocate vectorized statements for
-     all the nodes that participate in that permutation.  */
-  if (SLP_INSTANCE_LOAD_PERMUTATION (instance).exists ())
-    {
-      FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (instance), i, loads_node)
-        {
-          if (!SLP_TREE_VEC_STMTS (loads_node).exists ())
-            {
-              SLP_TREE_VEC_STMTS (loads_node).create (vec_stmts_size);
-              SLP_TREE_NUMBER_OF_VEC_STMTS (loads_node) = vec_stmts_size;
-            }
-        }
-    }
-
   if (!SLP_TREE_VEC_STMTS (node).exists ())
     {
       SLP_TREE_VEC_STMTS (node).create (vec_stmts_size);
@@ -3074,7 +2954,7 @@ vect_schedule_slp_instance (slp_tree node, slp_instance instance,
   if (SLP_INSTANCE_FIRST_LOAD_STMT (instance)
       && STMT_VINFO_GROUPED_ACCESS (stmt_info)
       && !REFERENCE_CLASS_P (gimple_get_lhs (stmt))
-      && SLP_INSTANCE_LOAD_PERMUTATION (instance).exists ())
+      && SLP_TREE_LOAD_PERMUTATION (node).exists ())
     si = gsi_for_stmt (SLP_INSTANCE_FIRST_LOAD_STMT (instance));
   else if (is_pattern_stmt_p (stmt_info))
     si = gsi_for_stmt (STMT_VINFO_RELATED_STMT (stmt_info));
@@ -3153,8 +3033,7 @@ vect_schedule_slp (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo)
 {
   vec<slp_instance> slp_instances;
   slp_instance instance;
-  slp_tree loads_node;
-  unsigned int i, j, vf;
+  unsigned int i, vf;
   bool is_store = false;
 
   if (loop_vinfo)
@@ -3173,14 +3052,6 @@ vect_schedule_slp (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo)
       /* Schedule the tree of INSTANCE.  */
       is_store = vect_schedule_slp_instance (SLP_INSTANCE_TREE (instance),
                                              instance, vf);
-
-      /* Clear STMT_VINFO_VEC_STMT of all loads.  With shared loads
-         between SLP instances we fail to properly initialize the
-        vectorized SLP stmts and confuse different load permutations.  */
-      FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (instance), j, loads_node)
-       STMT_VINFO_VEC_STMT
-         (vinfo_for_stmt (SLP_TREE_SCALAR_STMTS (loads_node)[0])) = NULL;
-
       if (dump_enabled_p ())
        dump_printf_loc (MSG_NOTE, vect_location,
                          "vectorizing stmts using SLP.");
index ca474c136d8218b724167315a1ebcdf733a22377..28b80bbe8bcb83c4cd2024d9920fecdeb6d440ce 100644 (file)
@@ -4754,12 +4754,21 @@ vectorizable_load (gimple stmt, gimple_stmt_iterator *gsi, gimple *vec_stmt,
     {
       first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
       if (slp
-          && !SLP_INSTANCE_LOAD_PERMUTATION (slp_node_instance).exists ()
+          && !SLP_TREE_LOAD_PERMUTATION (slp_node).exists ()
          && first_stmt != SLP_TREE_SCALAR_STMTS (slp_node)[0])
         first_stmt = SLP_TREE_SCALAR_STMTS (slp_node)[0];
 
       /* Check if the chain of loads is already vectorized.  */
-      if (STMT_VINFO_VEC_STMT (vinfo_for_stmt (first_stmt)))
+      if (STMT_VINFO_VEC_STMT (vinfo_for_stmt (first_stmt))
+         /* For SLP we would need to copy over SLP_TREE_VEC_STMTS.
+            ???  But we can only do so if there is exactly one
+            as we have no way to get at the rest.  Leave the CSE
+            opportunity alone.
+            ???  With the group load eventually participating
+            in multiple different permutations (having multiple
+            slp nodes which refer to the same group) the CSE
+            is even wrong code.  See PR56270.  */
+         && !slp)
        {
          *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
          return true;
@@ -4772,7 +4781,7 @@ vectorizable_load (gimple stmt, gimple_stmt_iterator *gsi, gimple *vec_stmt,
        {
          grouped_load = false;
          vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
-          if (SLP_INSTANCE_LOAD_PERMUTATION (slp_node_instance).exists ())
+          if (SLP_TREE_LOAD_PERMUTATION (slp_node).exists ())
             slp_perm = true;
          group_gap = GROUP_GAP (vinfo_for_stmt (first_stmt));
        }
@@ -5163,7 +5172,7 @@ vectorizable_load (gimple stmt, gimple_stmt_iterator *gsi, gimple *vec_stmt,
 
       if (slp_perm)
         {
-          if (!vect_transform_slp_perm_load (stmt, dr_chain, gsi, vf,
+          if (!vect_transform_slp_perm_load (slp_node, dr_chain, gsi, vf,
                                              slp_node_instance, false))
             {
               dr_chain.release ();
index 807114995203bc53e8d183abde252a71f37efeb2..2f0374d760c9a8cafc94ebd7ae243fdb58175f29 100644 (file)
@@ -106,6 +106,9 @@ struct _slp_tree {
   vec<slp_tree> children;
   /* A group of scalar stmts to be vectorized together.  */
   vec<gimple> stmts;
+  /* Load permutation relative to the stores, NULL if there is no
+     permutation.  */
+  vec<unsigned> load_permutation;
   /* Vectorized stmt/s.  */
   vec<gimple> vec_stmts;
   /* Number of vector stmts that are created to replace the group of scalar
@@ -131,10 +134,6 @@ typedef struct _slp_instance {
   /* Vectorization costs associated with SLP instance.  */
   stmt_vector_for_cost body_cost_vec;
 
-  /* Loads permutation relatively to the stores, NULL if there is no
-     permutation.  */
-  vec<int> load_permutation;
-
   /* The group of nodes that contain loads of this SLP instance.  */
   vec<slp_tree> loads;
 
@@ -149,7 +148,6 @@ typedef struct _slp_instance {
 #define SLP_INSTANCE_GROUP_SIZE(S)               (S)->group_size
 #define SLP_INSTANCE_UNROLLING_FACTOR(S)         (S)->unrolling_factor
 #define SLP_INSTANCE_BODY_COST_VEC(S)            (S)->body_cost_vec
-#define SLP_INSTANCE_LOAD_PERMUTATION(S)         (S)->load_permutation
 #define SLP_INSTANCE_LOADS(S)                    (S)->loads
 #define SLP_INSTANCE_FIRST_LOAD_STMT(S)          (S)->first_load
 
@@ -157,6 +155,7 @@ typedef struct _slp_instance {
 #define SLP_TREE_SCALAR_STMTS(S)                 (S)->stmts
 #define SLP_TREE_VEC_STMTS(S)                    (S)->vec_stmts
 #define SLP_TREE_NUMBER_OF_VEC_STMTS(S)          (S)->vec_stmts_size
+#define SLP_TREE_LOAD_PERMUTATION(S)             (S)->load_permutation
 
 /* This structure is used in creation of an SLP tree.  Each instance
    corresponds to the same operand in a group of scalar stmts in an SLP
@@ -961,7 +960,7 @@ extern int vect_get_single_scalar_iteration_cost (loop_vec_info);
 
 /* In tree-vect-slp.c.  */
 extern void vect_free_slp_instance (slp_instance);
-extern bool vect_transform_slp_perm_load (gimple, vec<tree> ,
+extern bool vect_transform_slp_perm_load (slp_tree, vec<tree> ,
                                           gimple_stmt_iterator *, int,
                                           slp_instance, bool);
 extern bool vect_schedule_slp (loop_vec_info, bb_vec_info);