From: Jennifer Schmitz Date: Mon, 4 Nov 2024 15:56:09 +0000 (-0800) Subject: match.pd: Fold vec_perm with view_convert X-Git-Tag: basepoints/gcc-16~4229 X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=c83e2d47574fd9a21f257e0f0d7e350c3f1b0618;p=thirdparty%2Fgcc.git match.pd: Fold vec_perm with view_convert This patch improves the codegen for the following test case: uint64x2_t foo (uint64x2_t r) { uint32x4_t a = vreinterpretq_u32_u64 (r); uint32_t t; t = a[0]; a[0] = a[1]; a[1] = t; t = a[2]; a[2] = a[3]; a[3] = t; return vreinterpretq_u64_u32 (a); } from (-O1): foo: mov v31.16b, v0.16b ins v0.s[0], v0.s[1] ins v0.s[1], v31.s[0] ins v0.s[2], v31.s[3] ins v0.s[3], v31.s[2] ret to: foo: rev64 v0.4s, v0.4s ret This is achieved by extending the following match.pd pattern to account for type differences between @0 and @1 due to view converts. /* Simplify vector inserts of other vector extracts to a permute. */ (simplify (bit_insert @0 (BIT_FIELD_REF@2 @1 @rsize @rpos) @ipos) The patch was bootstrapped and regtested on aarch64-linux-gnu and x86_64-linux-gnu, no regression. OK for mainline? Signed-off-by: Jennifer Schmitz Co-authored-by: Richard Biener gcc/ PR tree-optimization/117093 * match.pd: Extend (bit_insert @0 (BIT_FIELD_REF@2 @1 @rsize @rpos) @ipos) to allow type differences between @0 and @1 due to view converts. gcc/testsuite/ PR tree-optimization/117093 * gcc.dg/tree-ssa/pr117093.c: New test. --- diff --git a/gcc/match.pd b/gcc/match.pd index 0ac5674f24be..753bf811f67a 100644 --- a/gcc/match.pd +++ b/gcc/match.pd @@ -9583,7 +9583,8 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT) (if (VECTOR_TYPE_P (type) && (VECTOR_MODE_P (TYPE_MODE (type)) || optimize_vectors_before_lowering_p ()) - && types_match (@0, @1) + && operand_equal_p (TYPE_SIZE (TREE_TYPE (@0)), + TYPE_SIZE (TREE_TYPE (@1)), 0) && types_match (TREE_TYPE (TREE_TYPE (@0)), TREE_TYPE (@2)) && TYPE_VECTOR_SUBPARTS (type).is_constant () && multiple_p (wi::to_poly_offset (@rpos), @@ -9591,7 +9592,7 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT) (with { unsigned HOST_WIDE_INT elsz - = tree_to_uhwi (TYPE_SIZE (TREE_TYPE (TREE_TYPE (@1)))); + = tree_to_uhwi (TYPE_SIZE (TREE_TYPE (TREE_TYPE (@0)))); poly_uint64 relt = exact_div (tree_to_poly_uint64 (@rpos), elsz); poly_uint64 ielt = exact_div (tree_to_poly_uint64 (@ipos), elsz); unsigned nunits = TYPE_VECTOR_SUBPARTS (type).to_constant (); @@ -9602,9 +9603,11 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT) vec_perm_indices sel (builder, 2, nunits); } (if (!VECTOR_MODE_P (TYPE_MODE (type)) - || can_vec_perm_const_p (TYPE_MODE (type), TYPE_MODE (type), sel, false)) - (vec_perm @0 @1 { vec_perm_indices_to_tree - (build_vector_type (ssizetype, nunits), sel); }))))) + || can_vec_perm_const_p (TYPE_MODE (type), + TYPE_MODE (type), sel, false)) + (vec_perm @0 (view_convert @1) + { vec_perm_indices_to_tree (build_vector_type (ssizetype, nunits), + sel); }))))) (if (canonicalize_math_after_vectorization_p ()) (for fmas (FMA) diff --git a/gcc/testsuite/gcc.dg/tree-ssa/pr117093.c b/gcc/testsuite/gcc.dg/tree-ssa/pr117093.c new file mode 100644 index 000000000000..0fea32919dd0 --- /dev/null +++ b/gcc/testsuite/gcc.dg/tree-ssa/pr117093.c @@ -0,0 +1,17 @@ +/* { dg-final { check-function-bodies "**" "" } } */ +/* { dg-options "-O1" } */ + +#include + +/* +** foo: +** rev64 v0\.4s, v0\.4s +** ret +*/ +uint64x2_t foo (uint64x2_t r) { + uint32x4_t a = vreinterpretq_u32_u64 (r); + uint32_t t; + t = a[0]; a[0] = a[1]; a[1] = t; + t = a[2]; a[2] = a[3]; a[3] = t; + return vreinterpretq_u64_u32 (a); +}