]> git.ipfire.org Git - thirdparty/gcc.git/commitdiff
Add support for vector permute cost since various permutes can expand into a complex...
authorWilco Dijkstra <wdijkstr@arm.com>
Thu, 28 Jan 2016 11:52:08 +0000 (11:52 +0000)
committerWilco Dijkstra <wilco@gcc.gnu.org>
Thu, 28 Jan 2016 11:52:08 +0000 (11:52 +0000)
Add support for vector permute cost since various permutes can expand
into a complex sequence of instructions.  This fixes major performance
regressions due to recent changes in SLP vectorizer (which now vectorizes
more aggressively and emits many complex permutes).  Set the cost to > 1
for all microarchitectures so that the number of permutes is usually zero
and regressions disappear.

2016-01-28  Wilco Dijkstra  <wdijkstr@arm.com>

* config/aarch64/aarch64.c (generic_vector_cost):
Set vec_permute_cost.
(cortexa57_vector_cost): Likewise.
(exynosm1_vector_cost): Likewise.
(xgene1_vector_cost): Likewise.
(aarch64_builtin_vectorization_cost): Use vec_permute_cost.
* config/aarch64/aarch64-protos.h (cpu_vector_cost):
Add vec_permute_cost entry.

From-SVN: r232922

gcc/ChangeLog
gcc/config/aarch64/aarch64-protos.h
gcc/config/aarch64/aarch64.c

index 99f2bdb32d75b4d54a72d30504b1850326dcee49..1967e92279834cf66a33a14632724a527e8d6f31 100644 (file)
@@ -1,3 +1,14 @@
+2016-01-28  Wilco Dijkstra  <wdijkstr@arm.com>
+
+       * config/aarch64/aarch64.c (generic_vector_cost):
+       Set vec_permute_cost.
+       (cortexa57_vector_cost): Likewise.
+       (exynosm1_vector_cost): Likewise.
+       (xgene1_vector_cost): Likewise.
+       (aarch64_builtin_vectorization_cost): Use vec_permute_cost.
+       * config/aarch64/aarch64-protos.h (cpu_vector_cost):
+       Add vec_permute_cost entry.
+
 2016-01-28  Wilco Dijkstra  <wdijkstr@arm.com>
 
        * config/aarch64/aarch64.md (ccmp<mode>): Disassemble
index 15fc37deb9a99eeee19896b10d6d9ec9b2a340c2..bd900c6269f992d810c73e63d336964242b917ff 100644 (file)
@@ -156,9 +156,10 @@ struct cpu_vector_cost
   const int scalar_load_cost;           /* Cost of scalar load.  */
   const int scalar_store_cost;          /* Cost of scalar store.  */
   const int vec_stmt_cost;              /* Cost of any vector operation,
-                                           excluding load, store,
+                                           excluding load, store, permute,
                                            vector-to-scalar and
                                            scalar-to-vector operation.  */
+  const int vec_permute_cost;           /* Cost of permute operation.  */
   const int vec_to_scalar_cost;                 /* Cost of vec-to-scalar operation.  */
   const int scalar_to_vec_cost;                 /* Cost of scalar-to-vector
                                            operation.  */
index df3dec0a72b9d6b81d0b739a1343b430718aec12..5b3771eca6744c3dce7549468817d971e84091c4 100644 (file)
@@ -315,6 +315,7 @@ static const struct cpu_vector_cost generic_vector_cost =
   1, /* scalar_load_cost  */
   1, /* scalar_store_cost  */
   1, /* vec_stmt_cost  */
+  2, /* vec_permute_cost  */
   1, /* vec_to_scalar_cost  */
   1, /* scalar_to_vec_cost  */
   1, /* vec_align_load_cost  */
@@ -332,6 +333,7 @@ static const struct cpu_vector_cost cortexa57_vector_cost =
   4, /* scalar_load_cost  */
   1, /* scalar_store_cost  */
   3, /* vec_stmt_cost  */
+  3, /* vec_permute_cost  */
   8, /* vec_to_scalar_cost  */
   8, /* scalar_to_vec_cost  */
   5, /* vec_align_load_cost  */
@@ -348,6 +350,7 @@ static const struct cpu_vector_cost exynosm1_vector_cost =
   5, /* scalar_load_cost  */
   1, /* scalar_store_cost  */
   3, /* vec_stmt_cost  */
+  3, /* vec_permute_cost  */
   3, /* vec_to_scalar_cost  */
   3, /* scalar_to_vec_cost  */
   5, /* vec_align_load_cost  */
@@ -365,6 +368,7 @@ static const struct cpu_vector_cost xgene1_vector_cost =
   5, /* scalar_load_cost  */
   1, /* scalar_store_cost  */
   2, /* vec_stmt_cost  */
+  2, /* vec_permute_cost  */
   4, /* vec_to_scalar_cost  */
   4, /* scalar_to_vec_cost  */
   10, /* vec_align_load_cost  */
@@ -7574,6 +7578,8 @@ aarch64_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
        return aarch64_tune_params.vec_costs->cond_not_taken_branch_cost;
 
       case vec_perm:
+       return aarch64_tune_params.vec_costs->vec_permute_cost;
+
       case vec_promote_demote:
        return aarch64_tune_params.vec_costs->vec_stmt_cost;