From: Wilco Dijkstra Date: Thu, 28 Jan 2016 11:52:08 +0000 (+0000) Subject: Add support for vector permute cost since various permutes can expand into a complex... X-Git-Tag: basepoints/gcc-7~1255 X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=c428f91cb752d9b831df83f057b73e2815f2adad;p=thirdparty%2Fgcc.git Add support for vector permute cost since various permutes can expand into a complex sequence of instructions. Add support for vector permute cost since various permutes can expand into a complex sequence of instructions. This fixes major performance regressions due to recent changes in SLP vectorizer (which now vectorizes more aggressively and emits many complex permutes). Set the cost to > 1 for all microarchitectures so that the number of permutes is usually zero and regressions disappear. 2016-01-28 Wilco Dijkstra * config/aarch64/aarch64.c (generic_vector_cost): Set vec_permute_cost. (cortexa57_vector_cost): Likewise. (exynosm1_vector_cost): Likewise. (xgene1_vector_cost): Likewise. (aarch64_builtin_vectorization_cost): Use vec_permute_cost. * config/aarch64/aarch64-protos.h (cpu_vector_cost): Add vec_permute_cost entry. From-SVN: r232922 --- diff --git a/gcc/ChangeLog b/gcc/ChangeLog index 99f2bdb32d75..1967e9227983 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,14 @@ +2016-01-28 Wilco Dijkstra + + * config/aarch64/aarch64.c (generic_vector_cost): + Set vec_permute_cost. + (cortexa57_vector_cost): Likewise. + (exynosm1_vector_cost): Likewise. + (xgene1_vector_cost): Likewise. + (aarch64_builtin_vectorization_cost): Use vec_permute_cost. + * config/aarch64/aarch64-protos.h (cpu_vector_cost): + Add vec_permute_cost entry. + 2016-01-28 Wilco Dijkstra * config/aarch64/aarch64.md (ccmp): Disassemble diff --git a/gcc/config/aarch64/aarch64-protos.h b/gcc/config/aarch64/aarch64-protos.h index 15fc37deb9a9..bd900c6269f9 100644 --- a/gcc/config/aarch64/aarch64-protos.h +++ b/gcc/config/aarch64/aarch64-protos.h @@ -156,9 +156,10 @@ struct cpu_vector_cost const int scalar_load_cost; /* Cost of scalar load. */ const int scalar_store_cost; /* Cost of scalar store. */ const int vec_stmt_cost; /* Cost of any vector operation, - excluding load, store, + excluding load, store, permute, vector-to-scalar and scalar-to-vector operation. */ + const int vec_permute_cost; /* Cost of permute operation. */ const int vec_to_scalar_cost; /* Cost of vec-to-scalar operation. */ const int scalar_to_vec_cost; /* Cost of scalar-to-vector operation. */ diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c index df3dec0a72b9..5b3771eca674 100644 --- a/gcc/config/aarch64/aarch64.c +++ b/gcc/config/aarch64/aarch64.c @@ -315,6 +315,7 @@ static const struct cpu_vector_cost generic_vector_cost = 1, /* scalar_load_cost */ 1, /* scalar_store_cost */ 1, /* vec_stmt_cost */ + 2, /* vec_permute_cost */ 1, /* vec_to_scalar_cost */ 1, /* scalar_to_vec_cost */ 1, /* vec_align_load_cost */ @@ -332,6 +333,7 @@ static const struct cpu_vector_cost cortexa57_vector_cost = 4, /* scalar_load_cost */ 1, /* scalar_store_cost */ 3, /* vec_stmt_cost */ + 3, /* vec_permute_cost */ 8, /* vec_to_scalar_cost */ 8, /* scalar_to_vec_cost */ 5, /* vec_align_load_cost */ @@ -348,6 +350,7 @@ static const struct cpu_vector_cost exynosm1_vector_cost = 5, /* scalar_load_cost */ 1, /* scalar_store_cost */ 3, /* vec_stmt_cost */ + 3, /* vec_permute_cost */ 3, /* vec_to_scalar_cost */ 3, /* scalar_to_vec_cost */ 5, /* vec_align_load_cost */ @@ -365,6 +368,7 @@ static const struct cpu_vector_cost xgene1_vector_cost = 5, /* scalar_load_cost */ 1, /* scalar_store_cost */ 2, /* vec_stmt_cost */ + 2, /* vec_permute_cost */ 4, /* vec_to_scalar_cost */ 4, /* scalar_to_vec_cost */ 10, /* vec_align_load_cost */ @@ -7574,6 +7578,8 @@ aarch64_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost, return aarch64_tune_params.vec_costs->cond_not_taken_branch_cost; case vec_perm: + return aarch64_tune_params.vec_costs->vec_permute_cost; + case vec_promote_demote: return aarch64_tune_params.vec_costs->vec_stmt_cost;