--- /dev/null
+/* { dg-do compile } */
+/* { dg-options "-O2 -fdump-tree-forwprop1" } */
+/* { dg-additional-options "-fgimple" } */
+
+#include <stdint.h>
+
+typedef int32_t int32x4_t __attribute__((vector_size(16)));
+typedef int32_t int32x2_t __attribute__((vector_size(8)));
+typedef int32_t int32x1_t __attribute__((vector_size(4)));
+
+int32x4_t __GIMPLE (ssa)
+foo (int32x4_t x)
+{
+ int32x2_t _1;
+ int32x2_t _2;
+ int32x4_t _6;
+
+__BB(2):
+ _1 = __BIT_FIELD_REF <int32x2_t> (x, 64, 64);
+ _2 = __BIT_FIELD_REF <int32x2_t> (x, 64, 0);
+ _6 = _Literal (int32x4_t) { _1, _2 };
+ return _6;
+}
+
+int32x4_t __GIMPLE (ssa)
+foo2 (int32x4_t x)
+{
+ int32x1_t _1;
+ int32x1_t _2;
+ int32x1_t _3;
+ int32x1_t _4;
+ int32x4_t _6;
+
+__BB(2):
+ _1 = __BIT_FIELD_REF <int32x1_t> (x, 32, 64);
+ _2 = __BIT_FIELD_REF <int32x1_t> (x, 32, 96);
+ _3 = __BIT_FIELD_REF <int32x1_t> (x, 32, 0);
+ _4 = __BIT_FIELD_REF <int32x1_t> (x, 32, 32);
+ _6 = _Literal (int32x4_t) { _1, _2, _3, _4 };
+ return _6;
+}
+
+int32x4_t __GIMPLE (ssa)
+foo3 (int32x4_t x, int32x4_t y)
+{
+ int32x2_t _1;
+ int32x2_t _2;
+ int32x4_t _6;
+
+__BB(2):
+ _1 = __BIT_FIELD_REF <int32x2_t> (x, 64, 64);
+ _2 = __BIT_FIELD_REF <int32x2_t> (y, 64, 0);
+ _6 = _Literal (int32x4_t) { _1, _2 };
+ return _6;
+}
+
+int32x4_t __GIMPLE (ssa)
+foo4 (int32x4_t x, int32x4_t y)
+{
+ int32x1_t _1;
+ int32x1_t _2;
+ int32x1_t _3;
+ int32x1_t _4;
+ int32x4_t _6;
+
+__BB(2):
+ _1 = __BIT_FIELD_REF <int32x1_t> (x, 32, 64);
+ _2 = __BIT_FIELD_REF <int32x1_t> (y, 32, 96);
+ _3 = __BIT_FIELD_REF <int32x1_t> (x, 32, 0);
+ _4 = __BIT_FIELD_REF <int32x1_t> (y, 32, 32);
+ _6 = _Literal (int32x4_t) { _1, _2, _3, _4 };
+ return _6;
+}
+
+int32x4_t __GIMPLE (ssa)
+foo5 (int32x4_t x)
+{
+ int32x2_t _1;
+ int32x2_t _2;
+ int32x4_t _6;
+
+__BB(2):
+ _1 = __BIT_FIELD_REF <int32x2_t> (x, 64, 64);
+ _2 = _Literal (int32x2_t) { 1, 2 };
+ _6 = _Literal (int32x4_t) { _1, _2 };
+ return _6;
+}
+
+int32x4_t __GIMPLE (ssa)
+foo6 (int32x4_t x, int32_t y)
+{
+ int32x2_t _1;
+ int32x2_t _2;
+ int32x4_t _6;
+
+__BB(2):
+ _1 = __BIT_FIELD_REF <int32x2_t> (x, 64, 64);
+ _2 = _Literal (int32x2_t) { y, y };
+ _6 = _Literal (int32x4_t) { _1, _2 };
+ return _6;
+}
+
+int32x4_t __GIMPLE (ssa)
+foo7 (int32x4_t x)
+{
+ int32x2_t _1;
+ int32x2_t _2;
+ int32x4_t _6;
+
+__BB(2):
+ _1 = __BIT_FIELD_REF <int32x2_t> (x, 64, 64);
+ _2 = _Literal (int32x2_t) { 1, 2 };
+ _6 = _Literal (int32x4_t) { _2, _1 };
+ return _6;
+}
+
+int32x4_t __GIMPLE (ssa)
+foo8 (int32x4_t x, int32_t y)
+{
+ int32x2_t _1;
+ int32x2_t _2;
+ int32x4_t _6;
+
+__BB(2):
+ _1 = __BIT_FIELD_REF <int32x2_t> (x, 64, 64);
+ _2 = _Literal (int32x2_t) { y, y };
+ _6 = _Literal (int32x4_t) { _2, _1 };
+ return _6;
+}
+
+int32x4_t __GIMPLE (ssa)
+foo9 (int32x4_t x)
+{
+ int32x1_t _1;
+ int32x1_t _2;
+ int32x1_t _3;
+ int32x1_t _4;
+ int32x4_t _6;
+
+__BB(2):
+ _1 = __BIT_FIELD_REF <int32x1_t> (x, 32, 96);
+ _2 = __BIT_FIELD_REF <int32x1_t> (x, 32, 64);
+ _3 = _Literal (int32x1_t) { 1 };
+ _4 = _Literal (int32x1_t) { 1 };
+ _6 = _Literal (int32x4_t) { _3, _4, _1, _2 };
+ return _6;
+}
+
+int32x4_t __GIMPLE (ssa)
+foo10 (int32x4_t x, int32_t y)
+{
+ int32x1_t _1;
+ int32x1_t _2;
+ int32x1_t _3;
+ int32x1_t _4;
+ int32x4_t _6;
+
+__BB(2):
+ _1 = __BIT_FIELD_REF <int32x1_t> (x, 32, 96);
+ _2 = __BIT_FIELD_REF <int32x1_t> (x, 32, 64);
+ _3 = _Literal (int32x1_t) { y };
+ _4 = _Literal (int32x1_t) { y };
+ _6 = _Literal (int32x4_t) { _3, _4, _1, _2 };
+
+ return _6;
+}
+
+
+/* { dg-final { scan-tree-dump-times "VEC_PERM_EXPR" 10 "forwprop1" } } */
--- /dev/null
+/* { dg-do compile } */
+/* { dg-options "-O1 -fdump-tree-optimized" } */
+
+#include <arm_neon.h>
+
+#ifndef TEST_COMBINE_HIGH_LOW_1
+#define TEST_COMBINE_HIGH_LOW_1(TYPE, SUFF) \
+ TYPE rev_##TYPE##_1 (TYPE x) \
+ { \
+ return vcombine_##SUFF (vget_high_##SUFF (x), vget_low_##SUFF (x)); \
+ }
+#endif
+
+#ifndef TEST_COMBINE_HIGH_LOW_2
+#define TEST_COMBINE_HIGH_LOW_2(TYPE, SUFF) \
+ TYPE rev_##TYPE##_2 (TYPE x, TYPE y) \
+ { \
+ return vcombine_##SUFF (vget_high_##SUFF (x), vget_low_##SUFF (y)); \
+ }
+#endif
+
+TEST_COMBINE_HIGH_LOW_1 (int8x16_t, s8)
+TEST_COMBINE_HIGH_LOW_1 (int16x8_t, s16)
+TEST_COMBINE_HIGH_LOW_1 (int32x4_t, s32)
+TEST_COMBINE_HIGH_LOW_1 (int64x2_t, s64)
+TEST_COMBINE_HIGH_LOW_1 (uint8x16_t, u8)
+TEST_COMBINE_HIGH_LOW_1 (uint16x8_t, u16)
+TEST_COMBINE_HIGH_LOW_1 (uint32x4_t, u32)
+TEST_COMBINE_HIGH_LOW_1 (uint64x2_t, u64)
+TEST_COMBINE_HIGH_LOW_1 (float16x8_t, f16)
+TEST_COMBINE_HIGH_LOW_1 (float32x4_t, f32)
+
+TEST_COMBINE_HIGH_LOW_2 (int8x16_t, s8)
+TEST_COMBINE_HIGH_LOW_2 (int16x8_t, s16)
+TEST_COMBINE_HIGH_LOW_2 (int32x4_t, s32)
+TEST_COMBINE_HIGH_LOW_2 (int64x2_t, s64)
+TEST_COMBINE_HIGH_LOW_2 (uint8x16_t, u8)
+TEST_COMBINE_HIGH_LOW_2 (uint16x8_t, u16)
+TEST_COMBINE_HIGH_LOW_2 (uint32x4_t, u32)
+TEST_COMBINE_HIGH_LOW_2 (uint64x2_t, u64)
+TEST_COMBINE_HIGH_LOW_2 (float16x8_t, f16)
+TEST_COMBINE_HIGH_LOW_2 (float32x4_t, f32)
+
+/* { dg-final { scan-tree-dump-times "VEC_PERM_EXPR" 20 "optimized" } } */
+/* { dg-final { scan-assembler-times {ext\tv0.16b, v0.16b, v0.16b, #8} 10 } } */
+/* { dg-final { scan-assembler-times {ext\tv0.16b, v0.16b, v1.16b, #8} 10 } } */
bool maybe_blend[2] = { true, true };
tree one_constant = NULL_TREE;
tree one_nonconstant = NULL_TREE;
+ tree subelt;
auto_vec<tree> constants;
constants.safe_grow_cleared (nelts, true);
auto_vec<std::pair<unsigned, unsigned>, 64> elts;
+ unsigned int tsubelts = 0;
FOR_EACH_VEC_SAFE_ELT (CONSTRUCTOR_ELTS (op), i, elt)
{
tree ref, op1;
- unsigned int elem;
+ unsigned int elem, src_elem_size;
+ unsigned HOST_WIDE_INT nsubelts = 1;
if (i >= nelts)
return false;
if (op1
&& TREE_CODE ((ref = TREE_OPERAND (op1, 0))) == SSA_NAME
&& VECTOR_TYPE_P (TREE_TYPE (ref))
- && useless_type_conversion_p (TREE_TYPE (op1),
+ && (useless_type_conversion_p (TREE_TYPE (op1),
TREE_TYPE (TREE_TYPE (ref)))
- && constant_multiple_p (bit_field_offset (op1),
- bit_field_size (op1), &elem)
+ || (VECTOR_TYPE_P (TREE_TYPE (op1))
+ && useless_type_conversion_p (TREE_TYPE (TREE_TYPE (op1)),
+ TREE_TYPE (TREE_TYPE (ref)))
+ && TYPE_VECTOR_SUBPARTS (TREE_TYPE (op1))
+ .is_constant (&nsubelts)))
+ && constant_multiple_p (bit_field_size (op1), nsubelts,
+ &src_elem_size)
+ && constant_multiple_p (bit_field_offset (op1), src_elem_size, &elem)
&& TYPE_VECTOR_SUBPARTS (TREE_TYPE (ref)).is_constant (&refnelts))
{
unsigned int j;
maybe_ident = false;
if (elem != i)
maybe_blend[j] = false;
- elts.safe_push (std::make_pair (j, elem));
+ for (unsigned int k = 0; k < nsubelts; ++k)
+ elts.safe_push (std::make_pair (j, elem + k));
+ tsubelts += nsubelts;
continue;
}
/* Else fallthru. */
&& orig[1] != error_mark_node)
return false;
orig[1] = error_mark_node;
+ if (VECTOR_TYPE_P (TREE_TYPE (elt->value))
+ && !TYPE_VECTOR_SUBPARTS (TREE_TYPE (elt->value))
+ .is_constant (&nsubelts))
+ return false;
if (CONSTANT_CLASS_P (elt->value))
{
if (one_nonconstant)
return false;
if (!one_constant)
- one_constant = elt->value;
- constants[i] = elt->value;
+ one_constant = TREE_CODE (elt->value) == VECTOR_CST
+ ? VECTOR_CST_ELT (elt->value, 0)
+ : elt->value;
+ if (TREE_CODE (elt->value) == VECTOR_CST)
+ {
+ for (unsigned int k = 0; k < nsubelts; k++)
+ constants[tsubelts + k] = VECTOR_CST_ELT (elt->value, k);
+ }
+ else
+ constants[tsubelts] = elt->value;
}
else
{
if (one_constant)
return false;
+ subelt = VECTOR_TYPE_P (TREE_TYPE (elt->value))
+ ? ssa_uniform_vector_p (elt->value)
+ : elt->value;
+ if (!subelt)
+ return false;
if (!one_nonconstant)
- one_nonconstant = elt->value;
- else if (!operand_equal_p (one_nonconstant, elt->value, 0))
+ one_nonconstant = subelt;
+ else if (!operand_equal_p (one_nonconstant, subelt, 0))
return false;
}
- elts.safe_push (std::make_pair (1, i));
+ for (unsigned int k = 0; k < nsubelts; ++k)
+ elts.safe_push (std::make_pair (1, tsubelts + k));
+ tsubelts += nsubelts;
maybe_ident = false;
}
- if (i < nelts)
+
+ if (elts.length () < nelts)
return false;
if (! orig[0]
update_stmt (gsi_stmt (*gsi));
}
-/* If OP is a uniform vector return the element it is a splat from. */
-
-static tree
-ssa_uniform_vector_p (tree op)
-{
- if (TREE_CODE (op) == VECTOR_CST
- || TREE_CODE (op) == VEC_DUPLICATE_EXPR
- || TREE_CODE (op) == CONSTRUCTOR)
- return uniform_vector_p (op);
- if (TREE_CODE (op) == SSA_NAME)
- {
- gimple *def_stmt = SSA_NAME_DEF_STMT (op);
- if (gimple_assign_single_p (def_stmt))
- return uniform_vector_p (gimple_assign_rhs1 (def_stmt));
- }
- return NULL_TREE;
-}
-
/* Return the type that should be used to implement OP on type TYPE.
This is TYPE itself if the target can do the operation directly,
otherwise it is a scalar type or a smaller vector type. */
return NULL_TREE;
}
+/* If OP is a uniform vector return the element it is a splat from. */
+
+tree
+ssa_uniform_vector_p (tree op)
+{
+ if (TREE_CODE (op) == VECTOR_CST
+ || TREE_CODE (op) == VEC_DUPLICATE_EXPR
+ || TREE_CODE (op) == CONSTRUCTOR)
+ return uniform_vector_p (op);
+ if (TREE_CODE (op) == SSA_NAME)
+ {
+ gimple *def_stmt = SSA_NAME_DEF_STMT (op);
+ if (gimple_assign_single_p (def_stmt))
+ return uniform_vector_p (gimple_assign_rhs1 (def_stmt));
+ }
+ return NULL_TREE;
+}
+
/* If the argument is INTEGER_CST, return it. If the argument is vector
with all elements the same INTEGER_CST, return that INTEGER_CST. Otherwise
return NULL_TREE.
extern tree uniform_vector_p (const_tree);
+/* Same as above, but if VEC is an SSA_NAME, inspect its definition. */
+
+extern tree ssa_uniform_vector_p (tree);
+
/* If the argument is INTEGER_CST, return it. If the argument is vector
with all elements the same INTEGER_CST, return that INTEGER_CST. Otherwise
return NULL_TREE. */