]> git.ipfire.org Git - thirdparty/gcc.git/commitdiff
tree-vect-stmts.c (vectorizable_load): For SLP without permutation treat the first...
authorIra Rosen <ira.rosen@linaro.org>
Sun, 16 Oct 2011 10:47:12 +0000 (10:47 +0000)
committerIra Rosen <irar@gcc.gnu.org>
Sun, 16 Oct 2011 10:47:12 +0000 (10:47 +0000)
        * tree-vect-stmts.c (vectorizable_load): For SLP without
        permutation treat the first load of the node as the first
        element in its interleaving chain.
        * tree-vect-slp.c (vect_get_and_check_slp_defs): Swap the
        operands if necessary and possible.
        (vect_build_slp_tree): Add new argument.  Allow load groups of
        any size in basic blocks.  Keep all the loads for further
        permutation check.  Use the new argument to determine if there
        is a permutation.  Update the recursive calls.
        (vect_supported_load_permutation_p): Allow subchains of
        interleaving chains in basic block vectorization.
        (vect_analyze_slp_instance): Update the call to
        vect_build_slp_tree.  Check load permutation based on the new
        parameter.
        (vect_schedule_slp_instance): Don't start from the first element
        in interleaving chain unless the loads are permuted.

From-SVN: r180055

gcc/ChangeLog
gcc/testsuite/ChangeLog
gcc/testsuite/gcc.dg/vect/bb-slp-29.c [new file with mode: 0644]
gcc/tree-vect-slp.c
gcc/tree-vect-stmts.c

index 333460880ce88740df5b6ace6ab9ececb12eb728..969bc1e2abc7ba89913d5375e38b6f6d83598ef1 100644 (file)
@@ -1,3 +1,21 @@
+2011-10-16 Ira Rosen  <ira.rosen@linaro.org>
+
+       * tree-vect-stmts.c (vectorizable_load): For SLP without permutation
+       treat the first load of the node as the first element in its
+       interleaving chain.
+       * tree-vect-slp.c (vect_get_and_check_slp_defs): Swap the operands if
+       necessary and possible.
+       (vect_build_slp_tree): Add new argument.  Allow load groups of any size
+       in basic blocks.  Keep all the loads for further permutation check.
+       Use the new argument to determine if there is a permutation.  Update
+       the recursive calls.
+       (vect_supported_load_permutation_p): Allow subchains of interleaving
+       chains in basic block vectorization.
+       (vect_analyze_slp_instance): Update the call to vect_build_slp_tree.
+       Check load permutation based on the new parameter.
+       (vect_schedule_slp_instance): Don't start from the first element in
+       interleaving chain unless the loads are permuted.
+
 2011-10-15  Jan Hubicka  <jh@suse.cz>
 
        PR target/48668
index ce5705ec7ff7e9bf18ab7149d6b3f4d428ebaaf8..dbe22fed2f921ec76813fd0b008aee2f189d0a99 100644 (file)
@@ -1,3 +1,7 @@
+2011-10-16 Ira Rosen  <ira.rosen@linaro.org>
+
+       * gcc.dg/vect/bb-slp-29.c: New test.
+
 2011-10-15  Paolo Carlini  <paolo.carlini@oracle.com>
 
        PR c++/50732
diff --git a/gcc/testsuite/gcc.dg/vect/bb-slp-29.c b/gcc/testsuite/gcc.dg/vect/bb-slp-29.c
new file mode 100644 (file)
index 0000000..e37b96d
--- /dev/null
@@ -0,0 +1,59 @@
+/* { dg-require-effective-target vect_int } */
+
+#include <stdarg.h>
+#include "tree-vect.h"
+
+#define A 3
+#define B 4
+#define N 256
+
+short src[N], dst[N];
+
+void foo (short * __restrict__ dst, short * __restrict__ src, int h, int stride, int dummy)
+{
+  int i;
+  h /= 16;
+  for (i = 0; i < h; i++)
+    {
+      dst[0] = A*src[0] + B*src[1];
+      dst[1] = A*src[1] + B*src[2];
+      dst[2] = A*src[2] + B*src[3];
+      dst[3] = A*src[3] + B*src[4];
+      dst[4] = A*src[4] + B*src[5];
+      dst[5] = A*src[5] + B*src[6];
+      dst[6] = A*src[6] + B*src[7];
+      dst[7] = A*src[7] + B*src[8];
+      dst += stride;
+      src += stride;
+      if (dummy == 32)
+        abort ();
+   }
+}
+
+
+int main (void)
+{
+  int i;
+
+  check_vect ();
+
+  for (i = 0; i < N; i++)
+    {
+       dst[i] = 0;
+       src[i] = i;
+    }
+
+  foo (dst, src, N, 8, 0);
+
+  for (i = 0; i < N/2; i++)
+    {
+      if (dst[i] != A * src[i] + B * src[i+1])
+        abort ();
+    }
+
+  return 0;
+}
+
+/* { dg-final { scan-tree-dump-times "basic block vectorized using SLP" 1 "slp"  { target { vect_int_mult &&  vect_element_align } } } } */
+/* { dg-final { cleanup-tree-dump "slp" } } */
+
index 852d94c543db178dfac28702f4aa1976f84d1f8d..4fdc854db9c2e723a7ce5ee24f80db087eba7f41 100644 (file)
@@ -116,13 +116,15 @@ vect_get_and_check_slp_defs (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo,
 {
   tree oprnd;
   unsigned int i, number_of_oprnds;
-  tree def;
+  tree def[2];
   gimple def_stmt;
   enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
   stmt_vec_info stmt_info =
     vinfo_for_stmt (VEC_index (gimple, SLP_TREE_SCALAR_STMTS (slp_node), 0));
   enum gimple_rhs_class rhs_class;
   struct loop *loop = NULL;
+  enum tree_code rhs_code;
+  bool different_types = false;
 
   if (loop_vinfo)
     loop = LOOP_VINFO_LOOP (loop_vinfo);
@@ -134,7 +136,7 @@ vect_get_and_check_slp_defs (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo,
     {
       oprnd = gimple_op (stmt, i + 1);
 
-      if (!vect_is_simple_use (oprnd, loop_vinfo, bb_vinfo, &def_stmt, &def,
+      if (!vect_is_simple_use (oprnd, loop_vinfo, bb_vinfo, &def_stmt, &def[i],
                                &dt[i])
          || (!def_stmt && dt[i] != vect_constant_def))
        {
@@ -189,11 +191,11 @@ vect_get_and_check_slp_defs (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo,
           switch (gimple_code (def_stmt))
             {
               case GIMPLE_PHI:
-                def = gimple_phi_result (def_stmt);
+                def[i] = gimple_phi_result (def_stmt);
                 break;
 
               case GIMPLE_ASSIGN:
-                def = gimple_assign_lhs (def_stmt);
+                def[i] = gimple_assign_lhs (def_stmt);
                 break;
 
               default:
@@ -207,8 +209,8 @@ vect_get_and_check_slp_defs (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo,
        {
          /* op0 of the first stmt of the group - store its info.  */
          *first_stmt_dt0 = dt[i];
-         if (def)
-           *first_stmt_def0_type = TREE_TYPE (def);
+         if (def[i])
+           *first_stmt_def0_type = TREE_TYPE (def[i]);
          else
            *first_stmt_const_oprnd = oprnd;
 
@@ -228,8 +230,8 @@ vect_get_and_check_slp_defs (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo,
            {
              /* op1 of the first stmt of the group - store its info.  */
              *first_stmt_dt1 = dt[i];
-             if (def)
-               *first_stmt_def1_type = TREE_TYPE (def);
+             if (def[i])
+               *first_stmt_def1_type = TREE_TYPE (def[i]);
              else
                {
                  /* We assume that the stmt contains only one constant
@@ -255,24 +257,56 @@ vect_get_and_check_slp_defs (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo,
                   && ((*first_stmt_dt0 != dt[i]
                         && !(*first_stmt_dt0 == vect_reduction_def
                              && dt[i] == vect_internal_def))
-                      || (*first_stmt_def0_type && def
+                      || (*first_stmt_def0_type && def[0]
                           && !types_compatible_p (*first_stmt_def0_type,
-                                                  TREE_TYPE (def)))))
+                                                  TREE_TYPE (def[0])))))
                  || (i == 1
                      && ((*first_stmt_dt1 != dt[i]
                            && !(*first_stmt_dt1 == vect_reduction_def
                                 && dt[i] == vect_internal_def))
-                         || (*first_stmt_def1_type && def
+                         || (*first_stmt_def1_type && def[1]
                              && !types_compatible_p (*first_stmt_def1_type,
-                                                     TREE_TYPE (def)))))
-                 || (!def
+                                                     TREE_TYPE (def[1])))))
+                 || (!def[i]
                      && !types_compatible_p (TREE_TYPE (*first_stmt_const_oprnd),
-                                             TREE_TYPE (oprnd))))
+                                             TREE_TYPE (oprnd)))
+                 || different_types)
                {
-                 if (vect_print_dump_info (REPORT_SLP))
-                   fprintf (vect_dump, "Build SLP failed: different types ");
+                  if (i != number_of_oprnds - 1)
+                    different_types = true;
+                  else
+                   {
+                      if (is_gimple_assign (stmt)
+                          && (rhs_code = gimple_assign_rhs_code (stmt))
+                         && TREE_CODE_CLASS (rhs_code) == tcc_binary
+                         && commutative_tree_code (rhs_code)
+                         && *first_stmt_dt0 == dt[1]
+                         && *first_stmt_dt1 == dt[0]
+                         && def[0] && def[1]
+                         && !(*first_stmt_def0_type
+                              && !types_compatible_p (*first_stmt_def0_type,
+                                                      TREE_TYPE (def[1])))
+                          && !(*first_stmt_def1_type
+                               && !types_compatible_p (*first_stmt_def1_type,
+                                                       TREE_TYPE (def[0]))))
+                        {
+                           if (vect_print_dump_info (REPORT_SLP))
+                            {
+                              fprintf (vect_dump, "Swapping operands of ");
+                              print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
+                            }
+
+                          swap_tree_operands (stmt, gimple_assign_rhs1_ptr (stmt),
+                                             gimple_assign_rhs2_ptr (stmt));
+                       }
+                      else
+                        {
+                         if (vect_print_dump_info (REPORT_SLP))
+                           fprintf (vect_dump, "Build SLP failed: different types ");
 
-                 return false;
+                         return false;
+                       }
+                   }
                }
            }
        }
@@ -286,10 +320,10 @@ vect_get_and_check_slp_defs (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo,
 
        case vect_internal_def:
         case vect_reduction_def:
-         if (i == 0)
+         if ((i == 0 && !different_types) || (i == 1 && different_types))
            VEC_safe_push (gimple, heap, *def_stmts0, def_stmt);
          else
-           VEC_safe_push (gimple, heap, *def_stmts1, def_stmt);
+           VEC_safe_push (gimple, heap, *def_stmts1, def_stmt);
          break;
 
        default:
@@ -297,7 +331,7 @@ vect_get_and_check_slp_defs (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo,
          if (vect_print_dump_info (REPORT_SLP))
            {
              fprintf (vect_dump, "Build SLP failed: illegal type of def ");
-             print_generic_expr (vect_dump, def, TDF_SLIM);
+             print_generic_expr (vect_dump, def[i], TDF_SLIM);
            }
 
          return false;
@@ -320,7 +354,7 @@ vect_build_slp_tree (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo,
                      int ncopies_for_cost, unsigned int *max_nunits,
                      VEC (int, heap) **load_permutation,
                      VEC (slp_tree, heap) **loads,
-                     unsigned int vectorization_factor)
+                     unsigned int vectorization_factor, bool *loads_permuted)
 {
   VEC (gimple, heap) *def_stmts0 = VEC_alloc (gimple, heap, group_size);
   VEC (gimple, heap) *def_stmts1 =  VEC_alloc (gimple, heap, group_size);
@@ -531,7 +565,8 @@ vect_build_slp_tree (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo,
 
               /* Check that the size of interleaved loads group is not
                  greater than the SLP group size.  */
-              if (GROUP_SIZE (vinfo_for_stmt (stmt)) > ncopies * group_size)
+              if (loop_vinfo
+                  && GROUP_SIZE (vinfo_for_stmt (stmt)) > ncopies * group_size)
                 {
                   if (vect_print_dump_info (REPORT_SLP))
                     {
@@ -652,19 +687,22 @@ vect_build_slp_tree (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo,
   /* Strided loads were reached - stop the recursion.  */
   if (stop_recursion)
     {
+      VEC_safe_push (slp_tree, heap, *loads, *node);
       if (permutation)
         {
-          VEC_safe_push (slp_tree, heap, *loads, *node);
+
+          *loads_permuted = true;
           *inside_cost 
             += targetm.vectorize.builtin_vectorization_cost (vec_perm, NULL, 0) 
                * group_size;
         }
       else
-        { 
-          /* We don't check here complex numbers chains, so we keep them in
-            LOADS for further check in vect_supported_load_permutation_p.  */ 
+        {
+          /* We don't check here complex numbers chains, so we set
+             LOADS_PERMUTED for further check in
+             vect_supported_load_permutation_p.  */
           if (rhs_code == REALPART_EXPR || rhs_code == IMAGPART_EXPR)
-            VEC_safe_push (slp_tree, heap, *loads, *node);
+            *loads_permuted = true;
         }
 
       return true;
@@ -683,7 +721,7 @@ vect_build_slp_tree (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo,
       if (!vect_build_slp_tree (loop_vinfo, bb_vinfo, &left_node, group_size,
                                inside_cost, outside_cost, ncopies_for_cost,
                                max_nunits, load_permutation, loads,
-                               vectorization_factor))
+                               vectorization_factor, loads_permuted))
        return false;
 
       SLP_TREE_LEFT (*node) = left_node;
@@ -701,7 +739,7 @@ vect_build_slp_tree (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo,
       if (!vect_build_slp_tree (loop_vinfo, bb_vinfo, &right_node, group_size,
                                inside_cost, outside_cost, ncopies_for_cost,
                                max_nunits, load_permutation, loads,
-                               vectorization_factor))
+                               vectorization_factor, loads_permuted))
        return false;
 
       SLP_TREE_RIGHT (*node) = right_node;
@@ -887,8 +925,10 @@ vect_supported_load_permutation_p (slp_instance slp_instn, int group_size,
   bool supported, bad_permutation = false;
   sbitmap load_index;
   slp_tree node, other_complex_node;
-  gimple stmt, first = NULL, other_node_first;
+  gimple stmt, first = NULL, other_node_first, load, next_load, first_load;
   unsigned complex_numbers = 0;
+  struct data_reference *dr;
+  bb_vec_info bb_vinfo;
 
   /* FORNOW: permutations are only supported in SLP.  */
   if (!slp_instn)
@@ -1050,6 +1090,76 @@ vect_supported_load_permutation_p (slp_instance slp_instn, int group_size,
         }
     }
 
+  /* In basic block vectorization we allow any subchain of an interleaving
+     chain.
+     FORNOW: not supported in loop SLP because of realignment compications.  */
+  bb_vinfo = STMT_VINFO_BB_VINFO (vinfo_for_stmt (stmt));
+  bad_permutation = false;
+  /* Check that for every node in the instance teh loads form a subchain.  */
+  if (bb_vinfo)
+    {
+      FOR_EACH_VEC_ELT (slp_tree, SLP_INSTANCE_LOADS (slp_instn), i, node)
+        {
+          next_load = NULL;
+          first_load = NULL;
+          FOR_EACH_VEC_ELT (gimple, SLP_TREE_SCALAR_STMTS (node), j, load)
+            {
+              if (!first_load)
+                first_load = GROUP_FIRST_ELEMENT (vinfo_for_stmt (load));
+              else if (first_load
+                         != GROUP_FIRST_ELEMENT (vinfo_for_stmt (load)))
+                {
+                  bad_permutation = true;
+                 break;
+               }
+
+              if (j != 0 && next_load != load)
+                {
+                  bad_permutation = true;
+                  break;
+                }
+
+              next_load = GROUP_NEXT_ELEMENT (vinfo_for_stmt (load));
+            }
+
+          if (bad_permutation)
+            break;
+        }
+
+      /* Check that the alignment of the first load in every subchain, i.e.,
+         the first statement in every load node, is supported.  */
+      if (!bad_permutation)
+        {
+          FOR_EACH_VEC_ELT (slp_tree, SLP_INSTANCE_LOADS (slp_instn), i, node)
+            {
+              first_load = VEC_index (gimple, SLP_TREE_SCALAR_STMTS (node), 0);
+              if (first_load
+                    != GROUP_FIRST_ELEMENT (vinfo_for_stmt (first_load)))
+                {
+                  dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_load));
+                  if (vect_supportable_dr_alignment (dr, false)
+                      == dr_unaligned_unsupported)
+                    {
+                     if (vect_print_dump_info (REPORT_SLP))
+                       {
+                         fprintf (vect_dump, "unsupported unaligned load ");
+                          print_gimple_stmt (vect_dump, first_load, 0,
+                                            TDF_SLIM);
+                        }
+                     bad_permutation = true;
+                      break;
+                    }
+               }
+            }
+
+          if (!bad_permutation)
+            {
+              VEC_free (int, heap, SLP_INSTANCE_LOAD_PERMUTATION (slp_instn));
+              return true;
+           }
+        }
+    }
+
   /* FORNOW: the only supported permutation is 0..01..1.. of length equal to
      GROUP_SIZE and where each sequence of same drs is of GROUP_SIZE length as
      well (unless it's reduction).  */
@@ -1159,6 +1269,7 @@ vect_analyze_slp_instance (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo,
   VEC (int, heap) *load_permutation;
   VEC (slp_tree, heap) *loads;
   struct data_reference *dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt));
+  bool loads_permuted = false;
 
   if (GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)))
     {
@@ -1250,7 +1361,7 @@ vect_analyze_slp_instance (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo,
   if (vect_build_slp_tree (loop_vinfo, bb_vinfo, &node, group_size,
                            &inside_cost, &outside_cost, ncopies_for_cost,
                           &max_nunits, &load_permutation, &loads,
-                          vectorization_factor))
+                          vectorization_factor, &loads_permuted))
     {
       /* Calculate the unrolling factor based on the smallest type.  */
       if (max_nunits > nunits)
@@ -1275,7 +1386,8 @@ vect_analyze_slp_instance (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo,
       SLP_INSTANCE_LOADS (new_instance) = loads;
       SLP_INSTANCE_FIRST_LOAD_STMT (new_instance) = NULL;
       SLP_INSTANCE_LOAD_PERMUTATION (new_instance) = load_permutation;
-      if (VEC_length (slp_tree, loads))
+
+      if (loads_permuted)
         {
           if (!vect_supported_load_permutation_p (new_instance, group_size,
                                                   load_permutation))
@@ -2567,10 +2679,11 @@ vect_schedule_slp_instance (slp_tree node, slp_instance instance,
   /* Loads should be inserted before the first load.  */
   if (SLP_INSTANCE_FIRST_LOAD_STMT (instance)
       && STMT_VINFO_STRIDED_ACCESS (stmt_info)
-      && !REFERENCE_CLASS_P (gimple_get_lhs (stmt)))
+      && !REFERENCE_CLASS_P (gimple_get_lhs (stmt))
+      && SLP_INSTANCE_LOAD_PERMUTATION (instance))
     si = gsi_for_stmt (SLP_INSTANCE_FIRST_LOAD_STMT (instance));
   else if (is_pattern_stmt_p (stmt_info))
-     si = gsi_for_stmt (STMT_VINFO_RELATED_STMT (stmt_info));
+    si = gsi_for_stmt (STMT_VINFO_RELATED_STMT (stmt_info));
   else
     si = gsi_for_stmt (stmt);
 
index d986ff8a63c08b80cd9c62a8d18ed535f6fab4db..0df3c0c412eeaf0fc8c8a670ae334e91f8b05538 100644 (file)
@@ -4241,6 +4241,11 @@ vectorizable_load (gimple stmt, gimple_stmt_iterator *gsi, gimple *vec_stmt,
   if (strided_load)
     {
       first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
+      if (slp
+          && !SLP_INSTANCE_LOAD_PERMUTATION (slp_node_instance)
+         && first_stmt != VEC_index (gimple, SLP_TREE_SCALAR_STMTS (slp_node), 0))
+        first_stmt = VEC_index (gimple, SLP_TREE_SCALAR_STMTS (slp_node), 0);
+
       /* Check if the chain of loads is already vectorized.  */
       if (STMT_VINFO_VEC_STMT (vinfo_for_stmt (first_stmt)))
        {