]> git.ipfire.org Git - thirdparty/gcc.git/commitdiff
re PR tree-optimization/50031 (Sphinx3 has a 10% regression going from GCC 4.5 to...
authorBill Schmidt <wschmidt@linux.vnet.ibm.com>
Fri, 2 Mar 2012 14:51:58 +0000 (14:51 +0000)
committerWilliam Schmidt <wschmidt@gcc.gnu.org>
Fri, 2 Mar 2012 14:51:58 +0000 (14:51 +0000)
2012-03-02  Bill Schmidt <wschmidt@linux.vnet.ibm.com>
    Ira Rosen <irar@il.ibm.com>

PR tree-optimization/50031
PR tree-optimization/50969
* targhooks.c (default_builtin_vectorization_cost): Handle
vec_promote_demote.
* target.h (enum vect_cost_for_stmt): Add vec_promote_demote.
* tree-vect-loop.c (vect_get_single_scalar_iteraion_cost): Handle
all types of reduction and pattern statements.
(vect_estimate_min_profitable_iters): Likewise.
* tree-vect-stmts.c (vect_model_promotion_demotion_cost): New function.
(vect_model_store_cost): Use vec_perm rather than vector_stmt for
statement cost.
(vect_model_load_cost): Likewise.
(vect_get_load_cost): Likewise; add dump logic for explicit realigns.
(vectorizable_type_demotion): Call vect_model_promotion_demotion_cost.
(vectorizable_type_promotion): Likewise.
* config/spu/spu.c (spu_builtin_vectorization_cost): Handle
vec_promote_demote.
* config/i386/i386.c (ix86_builtin_vectorization_cost): Likewise.
* config/rs6000/rs6000.c (rs6000_builtin_vectorization_cost): Update
vec_perm for VSX and handle vec_promote_demote.

Co-Authored-By: Ira Rosen <irar@il.ibm.com>
From-SVN: r184787

gcc/ChangeLog
gcc/config/i386/i386.c
gcc/config/rs6000/rs6000.c
gcc/config/spu/spu.c
gcc/target.h
gcc/targhooks.c
gcc/tree-vect-loop.c
gcc/tree-vect-stmts.c

index 4828be18f2f59473866283812f662218cf16f470..22a1359dd964bb2a2324af6200a698b08487709e 100644 (file)
@@ -1,3 +1,27 @@
+2012-03-02  Bill Schmidt <wschmidt@linux.vnet.ibm.com>
+           Ira Rosen <irar@il.ibm.com>
+
+       PR tree-optimization/50031
+       PR tree-optimization/50969
+       * targhooks.c (default_builtin_vectorization_cost): Handle
+       vec_promote_demote.
+       * target.h (enum vect_cost_for_stmt): Add vec_promote_demote.
+       * tree-vect-loop.c (vect_get_single_scalar_iteraion_cost): Handle
+       all types of reduction and pattern statements.
+       (vect_estimate_min_profitable_iters): Likewise.
+       * tree-vect-stmts.c (vect_model_promotion_demotion_cost): New function.
+       (vect_model_store_cost): Use vec_perm rather than vector_stmt for
+       statement cost.
+       (vect_model_load_cost): Likewise.
+       (vect_get_load_cost): Likewise; add dump logic for explicit realigns.
+       (vectorizable_type_demotion): Call vect_model_promotion_demotion_cost.
+       (vectorizable_type_promotion): Likewise.
+       * config/spu/spu.c (spu_builtin_vectorization_cost): Handle
+       vec_promote_demote.
+       * config/i386/i386.c (ix86_builtin_vectorization_cost): Likewise.
+       * config/rs6000/rs6000.c (rs6000_builtin_vectorization_cost): Update
+       vec_perm for VSX and handle vec_promote_demote.
+
 2012-03-01  Jakub Jelinek  <jakub@redhat.com>
 
        * BASE-VER: Set to 4.6.4.
index 2ee8df99bde89ebd6449ec008ee1922eec0dc5c4..a58a8cac2a57d8b3d83a26e2f3ebfc1c426d33b5 100644 (file)
@@ -32823,7 +32823,8 @@ ix86_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
         return ix86_cost->cond_not_taken_branch_cost;
 
       case vec_perm:
-        return 1;
+      case vec_promote_demote:
+        return ix86_cost->vec_stmt_cost;
 
       default:
         gcc_unreachable ();
index 2f2f342e7920b594f8e7659dd4ffd0d16f5593b7..742ec237aae63e4c9ce92b617adac7d68e43788e 100644 (file)
@@ -3695,12 +3695,23 @@ rs6000_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
       case vec_to_scalar:
       case scalar_to_vec:
       case cond_branch_not_taken:
-      case vec_perm:
         return 1;
 
       case cond_branch_taken:
         return 3;
 
+      case vec_perm:
+       if (TARGET_VSX)
+         return 4;
+       else
+         return 1;
+
+      case vec_promote_demote:
+       if (TARGET_VSX)
+         return 5;
+       else
+         return 1;
+
       case unaligned_load:
         if (TARGET_VSX && TARGET_ALLOW_MOVMISALIGN)
           {
index 8901162a34139ae39cdda2e29c122f2412afde20..dffca84b040ee61c6a063c4a4a4b14f7432c3de7 100644 (file)
@@ -6794,6 +6794,7 @@ spu_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
       case scalar_to_vec:
       case cond_branch_not_taken:
       case vec_perm:
+      case vec_promote_demote:
         return 1;
 
       case scalar_store:
index eaf7aadd707a682e3dd9f6b4827f24bc452757f6..5ccd7fd67a3e1a5905292fb5930881c9fdacaeab 100644 (file)
@@ -128,7 +128,8 @@ enum vect_cost_for_stmt
   scalar_to_vec,
   cond_branch_not_taken,
   cond_branch_taken,
-  vec_perm
+  vec_perm,
+  vec_promote_demote
 };
 
 /* Sets of optimization levels at which an option may be enabled by
index 225831b9515693775b866fbbd659364cf1872f30..c1bd118179ca246f2ba226ba72ad968cbc0eb19e 100644 (file)
@@ -529,6 +529,7 @@ default_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
       case scalar_to_vec:
       case cond_branch_not_taken:
       case vec_perm:
+      case vec_promote_demote:
         return 1;
 
       case unaligned_load:
index 44c1ecddd1d8f16f58b373ebd70e4f21018d1ba0..dd9aef4174f0a4b3d762a5f4f221fd64e340b9b1 100644 (file)
@@ -2104,7 +2104,8 @@ vect_get_single_scalar_iteraion_cost (loop_vec_info loop_vinfo)
           if (stmt_info
               && !STMT_VINFO_RELEVANT_P (stmt_info)
               && (!STMT_VINFO_LIVE_P (stmt_info)
-                  || STMT_VINFO_DEF_TYPE (stmt_info) != vect_reduction_def))
+                  || !VECTORIZABLE_CYCLE_DEF (STMT_VINFO_DEF_TYPE (stmt_info)))
+             && !STMT_VINFO_IN_PATTERN_P (stmt_info))
             continue;
 
           if (STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt)))
@@ -2251,11 +2252,19 @@ vect_estimate_min_profitable_iters (loop_vec_info loop_vinfo)
        {
          gimple stmt = gsi_stmt (si);
          stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
+
+         if (STMT_VINFO_IN_PATTERN_P (stmt_info))
+           {
+             stmt = STMT_VINFO_RELATED_STMT (stmt_info);
+             stmt_info = vinfo_for_stmt (stmt);
+           }
+
          /* Skip stmts that are not vectorized inside the loop.  */
          if (!STMT_VINFO_RELEVANT_P (stmt_info)
              && (!STMT_VINFO_LIVE_P (stmt_info)
-                 || STMT_VINFO_DEF_TYPE (stmt_info) != vect_reduction_def))
+                 || !VECTORIZABLE_CYCLE_DEF (STMT_VINFO_DEF_TYPE (stmt_info))))
            continue;
+
          vec_inside_cost += STMT_VINFO_INSIDE_OF_LOOP_COST (stmt_info) * factor;
          /* FIXME: for stmts in the inner-loop in outer-loop vectorization,
             some of the "outside" costs are generated inside the outer-loop.  */
index b5ecd3f24eee25f11f236c4090a800df17bdb2a9..7a263785e9da418ed7ff24a470154e79a4015921 100644 (file)
@@ -623,6 +623,46 @@ vect_model_simple_cost (stmt_vec_info stmt_info, int ncopies,
 }
 
 
+/* Model cost for type demotion and promotion operations.  PWR is normally
+   zero for single-step promotions and demotions.  It will be one if 
+   two-step promotion/demotion is required, and so on.  Each additional
+   step doubles the number of instructions required.  */
+
+static void
+vect_model_promotion_demotion_cost (stmt_vec_info stmt_info,
+                                   enum vect_def_type *dt, int pwr)
+{
+  int i, tmp;
+  int inside_cost = 0, outside_cost = 0, single_stmt_cost;
+
+  /* The SLP costs were already calculated during SLP tree build.  */
+  if (PURE_SLP_STMT (stmt_info))
+    return;
+
+  single_stmt_cost = vect_get_stmt_cost (vec_promote_demote);
+  for (i = 0; i < pwr + 1; i++)
+    {
+      tmp = (STMT_VINFO_TYPE (stmt_info) == type_promotion_vec_info_type) ?
+       (i + 1) : i;
+      inside_cost += vect_pow2 (tmp) * single_stmt_cost;
+    }
+
+  /* FORNOW: Assuming maximum 2 args per stmts.  */
+  for (i = 0; i < 2; i++)
+    {
+      if (dt[i] == vect_constant_def || dt[i] == vect_external_def)
+        outside_cost += vect_get_stmt_cost (vector_stmt);
+    }
+
+  if (vect_print_dump_info (REPORT_COST))
+    fprintf (vect_dump, "vect_model_promotion_demotion_cost: inside_cost = %d, "
+             "outside_cost = %d .", inside_cost, outside_cost);
+
+  /* Set the costs in STMT_INFO.  */
+  stmt_vinfo_set_inside_of_loop_cost (stmt_info, NULL, inside_cost);
+  stmt_vinfo_set_outside_of_loop_cost (stmt_info, NULL, outside_cost);
+}
+
 /* Function vect_cost_strided_group_size
 
    For strided load or store, return the group_size only if it is the first
@@ -691,7 +731,7 @@ vect_model_store_cost (stmt_vec_info stmt_info, int ncopies,
     {
       /* Uses a high and low interleave operation for each needed permute.  */
       inside_cost = ncopies * exact_log2(group_size) * group_size
-        * vect_get_stmt_cost (vector_stmt);
+        * vect_get_stmt_cost (vec_perm);
 
       if (vect_print_dump_info (REPORT_COST))
         fprintf (vect_dump, "vect_model_store_cost: strided group_size = %d .",
@@ -795,7 +835,7 @@ vect_model_load_cost (stmt_vec_info stmt_info, int ncopies, slp_tree slp_node)
     {
       /* Uses an even and odd extract operations for each needed permute.  */
       inside_cost = ncopies * exact_log2(group_size) * group_size
-       * vect_get_stmt_cost (vector_stmt);
+       * vect_get_stmt_cost (vec_perm);
 
       if (vect_print_dump_info (REPORT_COST))
         fprintf (vect_dump, "vect_model_load_cost: strided group_size = %d .",
@@ -855,7 +895,7 @@ vect_get_load_cost (struct data_reference *dr, int ncopies,
     case dr_explicit_realign:
       {
         *inside_cost += ncopies * (2 * vect_get_stmt_cost (vector_load)
-           + vect_get_stmt_cost (vector_stmt));
+                                  + vect_get_stmt_cost (vec_perm));
 
         /* FIXME: If the misalignment remains fixed across the iterations of
            the containing loop, the following cost should be added to the
@@ -863,6 +903,9 @@ vect_get_load_cost (struct data_reference *dr, int ncopies,
         if (targetm.vectorize.builtin_mask_for_load)
           *inside_cost += vect_get_stmt_cost (vector_stmt);
 
+        if (vect_print_dump_info (REPORT_COST))
+          fprintf (vect_dump, "vect_model_load_cost: explicit realign");
+
         break;
       }
     case dr_explicit_realign_optimized:
@@ -886,7 +929,12 @@ vect_get_load_cost (struct data_reference *dr, int ncopies,
           }
 
         *inside_cost += ncopies * (vect_get_stmt_cost (vector_load)
-          + vect_get_stmt_cost (vector_stmt));
+                                  + vect_get_stmt_cost (vec_perm));
+
+        if (vect_print_dump_info (REPORT_COST))
+          fprintf (vect_dump,
+                  "vect_model_load_cost: explicit realign optimized");
+
         break;
       }
 
@@ -2919,7 +2967,7 @@ vectorizable_type_demotion (gimple stmt, gimple_stmt_iterator *gsi,
       STMT_VINFO_TYPE (stmt_info) = type_demotion_vec_info_type;
       if (vect_print_dump_info (REPORT_DETAILS))
         fprintf (vect_dump, "=== vectorizable_demotion ===");
-      vect_model_simple_cost (stmt_info, ncopies, dt, NULL);
+      vect_model_promotion_demotion_cost (stmt_info, dt, multi_step_cvt);
       return true;
     }
 
@@ -3217,7 +3265,7 @@ vectorizable_type_promotion (gimple stmt, gimple_stmt_iterator *gsi,
       STMT_VINFO_TYPE (stmt_info) = type_promotion_vec_info_type;
       if (vect_print_dump_info (REPORT_DETAILS))
         fprintf (vect_dump, "=== vectorizable_promotion ===");
-      vect_model_simple_cost (stmt_info, 2*ncopies, dt, NULL);
+      vect_model_promotion_demotion_cost (stmt_info, dt, multi_step_cvt);
       return true;
     }