2, /* reduc_f16_cost */
2, /* reduc_f32_cost */
2, /* reduc_f64_cost */
+ 2, /* store_elt_extra_cost */
2, /* vec_to_scalar_cost */
1, /* scalar_to_vec_cost */
1, /* align_load_cost */
2, /* reduc_f16_cost */
2, /* reduc_f32_cost */
2, /* reduc_f64_cost */
+ 2, /* store_elt_extra_cost */
2, /* vec_to_scalar_cost */
1, /* scalar_to_vec_cost */
1, /* align_load_cost */
13, /* reduc_f16_cost */
13, /* reduc_f32_cost */
13, /* reduc_f64_cost */
+ 13, /* store_elt_extra_cost */
13, /* vec_to_scalar_cost */
4, /* scalar_to_vec_cost */
6, /* align_load_cost */
13, /* reduc_f16_cost */
13, /* reduc_f32_cost */
13, /* reduc_f64_cost */
+ 13, /* store_elt_extra_cost */
13, /* vec_to_scalar_cost */
4, /* scalar_to_vec_cost */
6, /* align_load_cost */
1, /* reduc_f16_cost */
1, /* reduc_f32_cost */
1, /* reduc_f64_cost */
+ 1, /* store_elt_extra_cost */
1, /* vec_to_scalar_cost */
1, /* scalar_to_vec_cost */
1, /* align_load_cost */
2, /* reduc_f16_cost */
2, /* reduc_f32_cost */
2, /* reduc_f64_cost */
+ 2, /* store_elt_extra_cost */
2, /* vec_to_scalar_cost */
2, /* scalar_to_vec_cost */
3, /* align_load_cost */
3, /* reduc_f16_cost */
3, /* reduc_f32_cost */
3, /* reduc_f64_cost */
+ 3, /* store_elt_extra_cost */
3, /* vec_to_scalar_cost */
2, /* scalar_to_vec_cost */
5, /* align_load_cost */
8, /* reduc_f16_cost */
8, /* reduc_f32_cost */
8, /* reduc_f64_cost */
+ 8, /* store_elt_extra_cost */
8, /* vec_to_scalar_cost */
8, /* scalar_to_vec_cost */
4, /* align_load_cost */
3, /* reduc_f16_cost */
3, /* reduc_f32_cost */
3, /* reduc_f64_cost */
+ 3, /* store_elt_extra_cost */
3, /* vec_to_scalar_cost */
3, /* scalar_to_vec_cost */
5, /* align_load_cost */
4, /* reduc_f16_cost */
4, /* reduc_f32_cost */
4, /* reduc_f64_cost */
+ 4, /* store_elt_extra_cost */
4, /* vec_to_scalar_cost */
4, /* scalar_to_vec_cost */
10, /* align_load_cost */
6, /* reduc_f16_cost */
6, /* reduc_f32_cost */
6, /* reduc_f64_cost */
+ 6, /* store_elt_extra_cost */
6, /* vec_to_scalar_cost */
5, /* scalar_to_vec_cost */
4, /* align_load_cost */
5, /* reduc_f16_cost */
5, /* reduc_f32_cost */
5, /* reduc_f64_cost */
+ 5, /* store_elt_extra_cost */
5, /* vec_to_scalar_cost */
5, /* scalar_to_vec_cost */
4, /* align_load_cost */
if (aarch64_sve_mode_p (TYPE_MODE (vectype)))
sve_costs = aarch64_tune_params.vec_costs->sve;
+ /* Detect cases in which vec_to_scalar is describing the extraction of a
+ vector element in preparation for a scalar store. The store itself is
+ costed separately. */
+ if (kind == vec_to_scalar
+ && STMT_VINFO_DATA_REF (stmt_info)
+ && DR_IS_WRITE (STMT_VINFO_DATA_REF (stmt_info)))
+ return simd_costs->store_elt_extra_cost;
+
/* Detect cases in which vec_to_scalar represents an in-loop reduction. */
if (kind == vec_to_scalar
&& where == vect_body