From: Richard Sandiford Date: Tue, 3 Aug 2021 12:00:47 +0000 (+0100) Subject: aarch64: Tweak MLA vector costs X-Git-Tag: basepoints/gcc-13~5620 X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=028059b46ec9aef7dd447792c579f35396751068;p=thirdparty%2Fgcc.git aarch64: Tweak MLA vector costs The issue-based vector costs currently assume that a multiply-add sequence can be implemented using a single instruction. This is generally true for scalars (which have a 4-operand instruction) and SVE (which allows the output to be tied to any input). However, for Advanced SIMD, multiplying two values and adding an invariant will end up being a move and an MLA. The only target to use the issue-based vector costs is Neoverse V1, which would generally prefer SVE in this case anyway. I therefore don't have a self-contained testcase. However, the distinction becomes more important with a later patch. gcc/ * config/aarch64/aarch64.c (aarch64_multiply_add_p): Add a vec_flags parameter. Detect cases in which an Advanced SIMD MLA would almost certainly require a MOV. (aarch64_count_ops): Update accordingly. --- diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c index 084f8caa0daf..19045ef69448 100644 --- a/gcc/config/aarch64/aarch64.c +++ b/gcc/config/aarch64/aarch64.c @@ -14767,9 +14767,12 @@ aarch64_integer_truncation_p (stmt_vec_info stmt_info) /* Return true if STMT_INFO is the second part of a two-statement multiply-add or multiply-subtract sequence that might be suitable for fusing into a - single instruction. */ + single instruction. If VEC_FLAGS is zero, analyze the operation as + a scalar one, otherwise analyze it as an operation on vectors with those + VEC_* flags. */ static bool -aarch64_multiply_add_p (vec_info *vinfo, stmt_vec_info stmt_info) +aarch64_multiply_add_p (vec_info *vinfo, stmt_vec_info stmt_info, + unsigned int vec_flags) { gassign *assign = dyn_cast (stmt_info->stmt); if (!assign) @@ -14797,6 +14800,22 @@ aarch64_multiply_add_p (vec_info *vinfo, stmt_vec_info stmt_info) if (!rhs_assign || gimple_assign_rhs_code (rhs_assign) != MULT_EXPR) continue; + if (vec_flags & VEC_ADVSIMD) + { + /* Scalar and SVE code can tie the result to any FMLA input (or none, + although that requires a MOVPRFX for SVE). However, Advanced SIMD + only supports MLA forms, so will require a move if the result + cannot be tied to the accumulator. The most important case in + which this is true is when the accumulator input is invariant. */ + rhs = gimple_op (assign, 3 - i); + if (TREE_CODE (rhs) != SSA_NAME) + return false; + def_stmt_info = vinfo->lookup_def (rhs); + if (!def_stmt_info + || STMT_VINFO_DEF_TYPE (def_stmt_info) == vect_external_def) + return false; + } + return true; } return false; @@ -15232,7 +15251,7 @@ aarch64_count_ops (class vec_info *vinfo, aarch64_vector_costs *costs, } /* Assume that multiply-adds will become a single operation. */ - if (stmt_info && aarch64_multiply_add_p (vinfo, stmt_info)) + if (stmt_info && aarch64_multiply_add_p (vinfo, stmt_info, vec_flags)) return; /* When costing scalar statements in vector code, the count already