From: Artemiy Volkov Date: Sat, 1 Nov 2025 17:17:15 +0000 (+0000) Subject: forwprop: allow subvectors in simplify_vector_constructor () X-Git-Url: http://git.ipfire.org/gitweb.cgi?a=commitdiff_plain;h=470411f44f51d9ef85bfcf3a8f9cb25344dd243f;p=thirdparty%2Fgcc.git forwprop: allow subvectors in simplify_vector_constructor () This is an attempt to fix https://gcc.gnu.org/pipermail/gcc-patches/2025-October/697879.html in the middle-end; the motivation in that patch was to teach gcc to compile: int16x8_t foo (int16x8_t x) { return vcombine_s16 (vget_high_s16 (x), vget_low_s16 (x)); } into one instruction: foo: ext v0.16b, v0.16b, v0.16b, #8 ret rather than the two we are generating now: foo: dup d31, v0.d[1] uzp1 v0.2d, v31.2d, v0.2d ret Instead of adding a define_insn in the backend, this patch relaxes the precondition of tree-ssa-forwprop.cc:simplify_vector_constructor () to accept subvectors as constructor elements. During initial argument processing (ll. 3817-3916), subvectors are decomposed into individual elements before populating the ELTS array; this allows the rest of the function to remain unchanged. Special handling is also implemented for constant and splat subvector elements of a constructor (the latter with the use of ssa_uniform_vector_p () from tree-vect-generic.cc, which this patch moves to tree.cc). Add GIMPLE tests to gcc.dg/tree-ssa demonstrating the intended behavior with various combinations of subvectors as constructor arguments, including constant and splat subvectors; also add some aarch64-specific tests to show that the change leads to us picking the "ext" instruction for the resulting VEC_PERM_EXPR. Bootstrapped and regtested on aarch64 and x86_64, regtested on aarch64_be. gcc/ChangeLog: * tree-ssa-forwprop.cc (simplify_vector_constructor): Support vector constructor elements. * tree-vect-generic.cc (ssa_uniform_vector_p): Make non-static and move ... * tree.cc (ssa_uniform_vector_p): ... here. * tree.h (ssa_uniform_vector_p): Declare it. gcc/testsuite/ChangeLog: * gcc.dg/tree-ssa/forwprop-43.c: New test. * gcc.target/aarch64/simd/combine_ext.c: New test. --- diff --git a/gcc/testsuite/gcc.dg/tree-ssa/forwprop-43.c b/gcc/testsuite/gcc.dg/tree-ssa/forwprop-43.c new file mode 100644 index 00000000000..f0f6170648a --- /dev/null +++ b/gcc/testsuite/gcc.dg/tree-ssa/forwprop-43.c @@ -0,0 +1,169 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -fdump-tree-forwprop1" } */ +/* { dg-additional-options "-fgimple" } */ + +#include + +typedef int32_t int32x4_t __attribute__((vector_size(16))); +typedef int32_t int32x2_t __attribute__((vector_size(8))); +typedef int32_t int32x1_t __attribute__((vector_size(4))); + +int32x4_t __GIMPLE (ssa) +foo (int32x4_t x) +{ + int32x2_t _1; + int32x2_t _2; + int32x4_t _6; + +__BB(2): + _1 = __BIT_FIELD_REF (x, 64, 64); + _2 = __BIT_FIELD_REF (x, 64, 0); + _6 = _Literal (int32x4_t) { _1, _2 }; + return _6; +} + +int32x4_t __GIMPLE (ssa) +foo2 (int32x4_t x) +{ + int32x1_t _1; + int32x1_t _2; + int32x1_t _3; + int32x1_t _4; + int32x4_t _6; + +__BB(2): + _1 = __BIT_FIELD_REF (x, 32, 64); + _2 = __BIT_FIELD_REF (x, 32, 96); + _3 = __BIT_FIELD_REF (x, 32, 0); + _4 = __BIT_FIELD_REF (x, 32, 32); + _6 = _Literal (int32x4_t) { _1, _2, _3, _4 }; + return _6; +} + +int32x4_t __GIMPLE (ssa) +foo3 (int32x4_t x, int32x4_t y) +{ + int32x2_t _1; + int32x2_t _2; + int32x4_t _6; + +__BB(2): + _1 = __BIT_FIELD_REF (x, 64, 64); + _2 = __BIT_FIELD_REF (y, 64, 0); + _6 = _Literal (int32x4_t) { _1, _2 }; + return _6; +} + +int32x4_t __GIMPLE (ssa) +foo4 (int32x4_t x, int32x4_t y) +{ + int32x1_t _1; + int32x1_t _2; + int32x1_t _3; + int32x1_t _4; + int32x4_t _6; + +__BB(2): + _1 = __BIT_FIELD_REF (x, 32, 64); + _2 = __BIT_FIELD_REF (y, 32, 96); + _3 = __BIT_FIELD_REF (x, 32, 0); + _4 = __BIT_FIELD_REF (y, 32, 32); + _6 = _Literal (int32x4_t) { _1, _2, _3, _4 }; + return _6; +} + +int32x4_t __GIMPLE (ssa) +foo5 (int32x4_t x) +{ + int32x2_t _1; + int32x2_t _2; + int32x4_t _6; + +__BB(2): + _1 = __BIT_FIELD_REF (x, 64, 64); + _2 = _Literal (int32x2_t) { 1, 2 }; + _6 = _Literal (int32x4_t) { _1, _2 }; + return _6; +} + +int32x4_t __GIMPLE (ssa) +foo6 (int32x4_t x, int32_t y) +{ + int32x2_t _1; + int32x2_t _2; + int32x4_t _6; + +__BB(2): + _1 = __BIT_FIELD_REF (x, 64, 64); + _2 = _Literal (int32x2_t) { y, y }; + _6 = _Literal (int32x4_t) { _1, _2 }; + return _6; +} + +int32x4_t __GIMPLE (ssa) +foo7 (int32x4_t x) +{ + int32x2_t _1; + int32x2_t _2; + int32x4_t _6; + +__BB(2): + _1 = __BIT_FIELD_REF (x, 64, 64); + _2 = _Literal (int32x2_t) { 1, 2 }; + _6 = _Literal (int32x4_t) { _2, _1 }; + return _6; +} + +int32x4_t __GIMPLE (ssa) +foo8 (int32x4_t x, int32_t y) +{ + int32x2_t _1; + int32x2_t _2; + int32x4_t _6; + +__BB(2): + _1 = __BIT_FIELD_REF (x, 64, 64); + _2 = _Literal (int32x2_t) { y, y }; + _6 = _Literal (int32x4_t) { _2, _1 }; + return _6; +} + +int32x4_t __GIMPLE (ssa) +foo9 (int32x4_t x) +{ + int32x1_t _1; + int32x1_t _2; + int32x1_t _3; + int32x1_t _4; + int32x4_t _6; + +__BB(2): + _1 = __BIT_FIELD_REF (x, 32, 96); + _2 = __BIT_FIELD_REF (x, 32, 64); + _3 = _Literal (int32x1_t) { 1 }; + _4 = _Literal (int32x1_t) { 1 }; + _6 = _Literal (int32x4_t) { _3, _4, _1, _2 }; + return _6; +} + +int32x4_t __GIMPLE (ssa) +foo10 (int32x4_t x, int32_t y) +{ + int32x1_t _1; + int32x1_t _2; + int32x1_t _3; + int32x1_t _4; + int32x4_t _6; + +__BB(2): + _1 = __BIT_FIELD_REF (x, 32, 96); + _2 = __BIT_FIELD_REF (x, 32, 64); + _3 = _Literal (int32x1_t) { y }; + _4 = _Literal (int32x1_t) { y }; + _6 = _Literal (int32x4_t) { _3, _4, _1, _2 }; + + return _6; +} + + +/* { dg-final { scan-tree-dump-times "VEC_PERM_EXPR" 10 "forwprop1" } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/simd/combine_ext.c b/gcc/testsuite/gcc.target/aarch64/simd/combine_ext.c new file mode 100644 index 00000000000..f10a2c6ff24 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/simd/combine_ext.c @@ -0,0 +1,46 @@ +/* { dg-do compile } */ +/* { dg-options "-O1 -fdump-tree-optimized" } */ + +#include + +#ifndef TEST_COMBINE_HIGH_LOW_1 +#define TEST_COMBINE_HIGH_LOW_1(TYPE, SUFF) \ + TYPE rev_##TYPE##_1 (TYPE x) \ + { \ + return vcombine_##SUFF (vget_high_##SUFF (x), vget_low_##SUFF (x)); \ + } +#endif + +#ifndef TEST_COMBINE_HIGH_LOW_2 +#define TEST_COMBINE_HIGH_LOW_2(TYPE, SUFF) \ + TYPE rev_##TYPE##_2 (TYPE x, TYPE y) \ + { \ + return vcombine_##SUFF (vget_high_##SUFF (x), vget_low_##SUFF (y)); \ + } +#endif + +TEST_COMBINE_HIGH_LOW_1 (int8x16_t, s8) +TEST_COMBINE_HIGH_LOW_1 (int16x8_t, s16) +TEST_COMBINE_HIGH_LOW_1 (int32x4_t, s32) +TEST_COMBINE_HIGH_LOW_1 (int64x2_t, s64) +TEST_COMBINE_HIGH_LOW_1 (uint8x16_t, u8) +TEST_COMBINE_HIGH_LOW_1 (uint16x8_t, u16) +TEST_COMBINE_HIGH_LOW_1 (uint32x4_t, u32) +TEST_COMBINE_HIGH_LOW_1 (uint64x2_t, u64) +TEST_COMBINE_HIGH_LOW_1 (float16x8_t, f16) +TEST_COMBINE_HIGH_LOW_1 (float32x4_t, f32) + +TEST_COMBINE_HIGH_LOW_2 (int8x16_t, s8) +TEST_COMBINE_HIGH_LOW_2 (int16x8_t, s16) +TEST_COMBINE_HIGH_LOW_2 (int32x4_t, s32) +TEST_COMBINE_HIGH_LOW_2 (int64x2_t, s64) +TEST_COMBINE_HIGH_LOW_2 (uint8x16_t, u8) +TEST_COMBINE_HIGH_LOW_2 (uint16x8_t, u16) +TEST_COMBINE_HIGH_LOW_2 (uint32x4_t, u32) +TEST_COMBINE_HIGH_LOW_2 (uint64x2_t, u64) +TEST_COMBINE_HIGH_LOW_2 (float16x8_t, f16) +TEST_COMBINE_HIGH_LOW_2 (float32x4_t, f32) + +/* { dg-final { scan-tree-dump-times "VEC_PERM_EXPR" 20 "optimized" } } */ +/* { dg-final { scan-assembler-times {ext\tv0.16b, v0.16b, v0.16b, #8} 10 } } */ +/* { dg-final { scan-assembler-times {ext\tv0.16b, v0.16b, v1.16b, #8} 10 } } */ diff --git a/gcc/tree-ssa-forwprop.cc b/gcc/tree-ssa-forwprop.cc index 67deecaf004..ae7f0e770ba 100644 --- a/gcc/tree-ssa-forwprop.cc +++ b/gcc/tree-ssa-forwprop.cc @@ -3807,13 +3807,16 @@ simplify_vector_constructor (gimple_stmt_iterator *gsi) bool maybe_blend[2] = { true, true }; tree one_constant = NULL_TREE; tree one_nonconstant = NULL_TREE; + tree subelt; auto_vec constants; constants.safe_grow_cleared (nelts, true); auto_vec, 64> elts; + unsigned int tsubelts = 0; FOR_EACH_VEC_SAFE_ELT (CONSTRUCTOR_ELTS (op), i, elt) { tree ref, op1; - unsigned int elem; + unsigned int elem, src_elem_size; + unsigned HOST_WIDE_INT nsubelts = 1; if (i >= nelts) return false; @@ -3824,10 +3827,16 @@ simplify_vector_constructor (gimple_stmt_iterator *gsi) if (op1 && TREE_CODE ((ref = TREE_OPERAND (op1, 0))) == SSA_NAME && VECTOR_TYPE_P (TREE_TYPE (ref)) - && useless_type_conversion_p (TREE_TYPE (op1), + && (useless_type_conversion_p (TREE_TYPE (op1), TREE_TYPE (TREE_TYPE (ref))) - && constant_multiple_p (bit_field_offset (op1), - bit_field_size (op1), &elem) + || (VECTOR_TYPE_P (TREE_TYPE (op1)) + && useless_type_conversion_p (TREE_TYPE (TREE_TYPE (op1)), + TREE_TYPE (TREE_TYPE (ref))) + && TYPE_VECTOR_SUBPARTS (TREE_TYPE (op1)) + .is_constant (&nsubelts))) + && constant_multiple_p (bit_field_size (op1), nsubelts, + &src_elem_size) + && constant_multiple_p (bit_field_offset (op1), src_elem_size, &elem) && TYPE_VECTOR_SUBPARTS (TREE_TYPE (ref)).is_constant (&refnelts)) { unsigned int j; @@ -3851,7 +3860,9 @@ simplify_vector_constructor (gimple_stmt_iterator *gsi) maybe_ident = false; if (elem != i) maybe_blend[j] = false; - elts.safe_push (std::make_pair (j, elem)); + for (unsigned int k = 0; k < nsubelts; ++k) + elts.safe_push (std::make_pair (j, elem + k)); + tsubelts += nsubelts; continue; } /* Else fallthru. */ @@ -3863,27 +3874,47 @@ simplify_vector_constructor (gimple_stmt_iterator *gsi) && orig[1] != error_mark_node) return false; orig[1] = error_mark_node; + if (VECTOR_TYPE_P (TREE_TYPE (elt->value)) + && !TYPE_VECTOR_SUBPARTS (TREE_TYPE (elt->value)) + .is_constant (&nsubelts)) + return false; if (CONSTANT_CLASS_P (elt->value)) { if (one_nonconstant) return false; if (!one_constant) - one_constant = elt->value; - constants[i] = elt->value; + one_constant = TREE_CODE (elt->value) == VECTOR_CST + ? VECTOR_CST_ELT (elt->value, 0) + : elt->value; + if (TREE_CODE (elt->value) == VECTOR_CST) + { + for (unsigned int k = 0; k < nsubelts; k++) + constants[tsubelts + k] = VECTOR_CST_ELT (elt->value, k); + } + else + constants[tsubelts] = elt->value; } else { if (one_constant) return false; + subelt = VECTOR_TYPE_P (TREE_TYPE (elt->value)) + ? ssa_uniform_vector_p (elt->value) + : elt->value; + if (!subelt) + return false; if (!one_nonconstant) - one_nonconstant = elt->value; - else if (!operand_equal_p (one_nonconstant, elt->value, 0)) + one_nonconstant = subelt; + else if (!operand_equal_p (one_nonconstant, subelt, 0)) return false; } - elts.safe_push (std::make_pair (1, i)); + for (unsigned int k = 0; k < nsubelts; ++k) + elts.safe_push (std::make_pair (1, tsubelts + k)); + tsubelts += nsubelts; maybe_ident = false; } - if (i < nelts) + + if (elts.length () < nelts) return false; if (! orig[0] diff --git a/gcc/tree-vect-generic.cc b/gcc/tree-vect-generic.cc index b8e6a7168ff..29d97cff815 100644 --- a/gcc/tree-vect-generic.cc +++ b/gcc/tree-vect-generic.cc @@ -1619,24 +1619,6 @@ lower_vec_perm (gimple_stmt_iterator *gsi) update_stmt (gsi_stmt (*gsi)); } -/* If OP is a uniform vector return the element it is a splat from. */ - -static tree -ssa_uniform_vector_p (tree op) -{ - if (TREE_CODE (op) == VECTOR_CST - || TREE_CODE (op) == VEC_DUPLICATE_EXPR - || TREE_CODE (op) == CONSTRUCTOR) - return uniform_vector_p (op); - if (TREE_CODE (op) == SSA_NAME) - { - gimple *def_stmt = SSA_NAME_DEF_STMT (op); - if (gimple_assign_single_p (def_stmt)) - return uniform_vector_p (gimple_assign_rhs1 (def_stmt)); - } - return NULL_TREE; -} - /* Return the type that should be used to implement OP on type TYPE. This is TYPE itself if the target can do the operation directly, otherwise it is a scalar type or a smaller vector type. */ diff --git a/gcc/tree.cc b/gcc/tree.cc index 446261a8a8c..298784e6960 100644 --- a/gcc/tree.cc +++ b/gcc/tree.cc @@ -10823,6 +10823,24 @@ uniform_vector_p (const_tree vec) return NULL_TREE; } +/* If OP is a uniform vector return the element it is a splat from. */ + +tree +ssa_uniform_vector_p (tree op) +{ + if (TREE_CODE (op) == VECTOR_CST + || TREE_CODE (op) == VEC_DUPLICATE_EXPR + || TREE_CODE (op) == CONSTRUCTOR) + return uniform_vector_p (op); + if (TREE_CODE (op) == SSA_NAME) + { + gimple *def_stmt = SSA_NAME_DEF_STMT (op); + if (gimple_assign_single_p (def_stmt)) + return uniform_vector_p (gimple_assign_rhs1 (def_stmt)); + } + return NULL_TREE; +} + /* If the argument is INTEGER_CST, return it. If the argument is vector with all elements the same INTEGER_CST, return that INTEGER_CST. Otherwise return NULL_TREE. diff --git a/gcc/tree.h b/gcc/tree.h index 4a4b8ef7f0a..70a5ece48ef 100644 --- a/gcc/tree.h +++ b/gcc/tree.h @@ -5303,6 +5303,10 @@ extern tree vector_cst_elt (const_tree, unsigned int); extern tree uniform_vector_p (const_tree); +/* Same as above, but if VEC is an SSA_NAME, inspect its definition. */ + +extern tree ssa_uniform_vector_p (tree); + /* If the argument is INTEGER_CST, return it. If the argument is vector with all elements the same INTEGER_CST, return that INTEGER_CST. Otherwise return NULL_TREE. */