aarch64: Add costs for storing one element of a vector

author Richard Sandiford <richard.sandiford@arm.com>

Fri, 26 Mar 2021 16:08:31 +0000 (16:08 +0000)

committer Richard Sandiford <richard.sandiford@arm.com>

Fri, 26 Mar 2021 16:08:31 +0000 (16:08 +0000)
author Richard Sandiford <richard.sandiford@arm.com>
Fri, 26 Mar 2021 16:08:31 +0000 (16:08 +0000)
committer Richard Sandiford <richard.sandiford@arm.com>
Fri, 26 Mar 2021 16:08:31 +0000 (16:08 +0000)
diff --git a/gcc/config/aarch64/aarch64-protos.h b/gcc/config/aarch64/aarch64-protos.h

index 3d152754981016380c028893c1f49f532a7a68f5..fabe3df70716bfcebff0b871321bf73365d03269 100644 (file)
--- a/gcc/config/aarch64/aarch64-protos.h
+++ b/gcc/config/aarch64/aarch64-protos.h
@@ -224,6 +224,10 @@ struct simd_vec_cost
    const int reduc_f32_cost;
    const int reduc_f64_cost;
  
+  /* Additional cost of storing a single vector element, on top of the
+     normal cost of a scalar store.  */
+  const int store_elt_extra_cost;
+
    /* Cost of a vector-to-scalar operation.  */
    const int vec_to_scalar_cost;
  
diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c

index 8fb723dabd263122758cd5a2e24c85110db947c9..20bb75bd56c6cdf8f09deee56fb7b316024b070a 100644 (file)
--- a/gcc/config/aarch64/aarch64.c
+++ b/gcc/config/aarch64/aarch64.c
@@ -601,6 +601,7 @@ static const advsimd_vec_cost generic_advsimd_vector_cost =
    2, /* reduc_f16_cost  */
    2, /* reduc_f32_cost  */
    2, /* reduc_f64_cost  */
+  2, /* store_elt_extra_cost  */
    2, /* vec_to_scalar_cost  */
    1, /* scalar_to_vec_cost  */
    1, /* align_load_cost  */
@@ -626,6 +627,7 @@ static const sve_vec_cost generic_sve_vector_cost =
      2, /* reduc_f16_cost  */
      2, /* reduc_f32_cost  */
      2, /* reduc_f64_cost  */
+    2, /* store_elt_extra_cost  */
      2, /* vec_to_scalar_cost  */
      1, /* scalar_to_vec_cost  */
      1, /* align_load_cost  */
@@ -667,6 +669,7 @@ static const advsimd_vec_cost a64fx_advsimd_vector_cost =
    13, /* reduc_f16_cost  */
    13, /* reduc_f32_cost  */
    13, /* reduc_f64_cost  */
+  13, /* store_elt_extra_cost  */
    13, /* vec_to_scalar_cost  */
    4, /* scalar_to_vec_cost  */
    6, /* align_load_cost  */
@@ -691,6 +694,7 @@ static const sve_vec_cost a64fx_sve_vector_cost =
      13, /* reduc_f16_cost  */
      13, /* reduc_f32_cost  */
      13, /* reduc_f64_cost  */
+    13, /* store_elt_extra_cost  */
      13, /* vec_to_scalar_cost  */
      4, /* scalar_to_vec_cost  */
      6, /* align_load_cost  */
@@ -731,6 +735,7 @@ static const advsimd_vec_cost qdf24xx_advsimd_vector_cost =
    1, /* reduc_f16_cost  */
    1, /* reduc_f32_cost  */
    1, /* reduc_f64_cost  */
+  1, /* store_elt_extra_cost  */
    1, /* vec_to_scalar_cost  */
    1, /* scalar_to_vec_cost  */
    1, /* align_load_cost  */
@@ -768,6 +773,7 @@ static const advsimd_vec_cost thunderx_advsimd_vector_cost =
    2, /* reduc_f16_cost  */
    2, /* reduc_f32_cost  */
    2, /* reduc_f64_cost  */
+  2, /* store_elt_extra_cost  */
    2, /* vec_to_scalar_cost  */
    2, /* scalar_to_vec_cost  */
    3, /* align_load_cost  */
@@ -804,6 +810,7 @@ static const advsimd_vec_cost tsv110_advsimd_vector_cost =
    3, /* reduc_f16_cost  */
    3, /* reduc_f32_cost  */
    3, /* reduc_f64_cost  */
+  3, /* store_elt_extra_cost  */
    3, /* vec_to_scalar_cost  */
    2, /* scalar_to_vec_cost  */
    5, /* align_load_cost  */
@@ -839,6 +846,7 @@ static const advsimd_vec_cost cortexa57_advsimd_vector_cost =
    8, /* reduc_f16_cost  */
    8, /* reduc_f32_cost  */
    8, /* reduc_f64_cost  */
+  8, /* store_elt_extra_cost  */
    8, /* vec_to_scalar_cost  */
    8, /* scalar_to_vec_cost  */
    4, /* align_load_cost  */
@@ -875,6 +883,7 @@ static const advsimd_vec_cost exynosm1_advsimd_vector_cost =
    3, /* reduc_f16_cost  */
    3, /* reduc_f32_cost  */
    3, /* reduc_f64_cost  */
+  3, /* store_elt_extra_cost  */
    3, /* vec_to_scalar_cost  */
    3, /* scalar_to_vec_cost  */
    5, /* align_load_cost  */
@@ -910,6 +919,7 @@ static const advsimd_vec_cost xgene1_advsimd_vector_cost =
    4, /* reduc_f16_cost  */
    4, /* reduc_f32_cost  */
    4, /* reduc_f64_cost  */
+  4, /* store_elt_extra_cost  */
    4, /* vec_to_scalar_cost  */
    4, /* scalar_to_vec_cost  */
    10, /* align_load_cost  */
@@ -946,6 +956,7 @@ static const advsimd_vec_cost thunderx2t99_advsimd_vector_cost =
    6, /* reduc_f16_cost  */
    6, /* reduc_f32_cost  */
    6, /* reduc_f64_cost  */
+  6, /* store_elt_extra_cost  */
    6, /* vec_to_scalar_cost  */
    5, /* scalar_to_vec_cost  */
    4, /* align_load_cost  */
@@ -982,6 +993,7 @@ static const advsimd_vec_cost thunderx3t110_advsimd_vector_cost =
    5, /* reduc_f16_cost  */
    5, /* reduc_f32_cost  */
    5, /* reduc_f64_cost  */
+  5, /* store_elt_extra_cost  */
    5, /* vec_to_scalar_cost  */
    5, /* scalar_to_vec_cost  */
    4, /* align_load_cost  */
@@ -14259,6 +14271,14 @@ aarch64_detect_vector_stmt_subtype (vec_info *vinfo, vect_cost_for_stmt kind,
    if (aarch64_sve_mode_p (TYPE_MODE (vectype)))
      sve_costs = aarch64_tune_params.vec_costs->sve;
  
+  /* Detect cases in which vec_to_scalar is describing the extraction of a
+     vector element in preparation for a scalar store.  The store itself is
+     costed separately.  */
+  if (kind == vec_to_scalar
+      && STMT_VINFO_DATA_REF (stmt_info)
+      && DR_IS_WRITE (STMT_VINFO_DATA_REF (stmt_info)))
+    return simd_costs->store_elt_extra_cost;
+
    /* Detect cases in which vec_to_scalar represents an in-loop reduction.  */
    if (kind == vec_to_scalar
        && where == vect_body
author	Richard Sandiford <richard.sandiford@arm.com>
	Fri, 26 Mar 2021 16:08:31 +0000 (16:08 +0000)
committer	Richard Sandiford <richard.sandiford@arm.com>
	Fri, 26 Mar 2021 16:08:31 +0000 (16:08 +0000)
gcc/config/aarch64/aarch64-protos.h		patch \| blob \| blame \| history
gcc/config/aarch64/aarch64.c		patch \| blob \| blame \| history