(view_convert (rshift (view_convert:ntype @0) @1))
(convert (rshift (convert:ntype @0) @1))))))
+#if GIMPLE
+ /* Fold ((x + y) >> 1 into IFN_AVG_FLOOR (x, y) if x and y are vectors in
+ which each element is known to have at least one leading zero bit. */
+(simplify
+ (rshift (plus:cs @0 @1) integer_onep)
+ (if (VECTOR_TYPE_P (type)
+ && direct_internal_fn_supported_p (IFN_AVG_FLOOR, type, OPTIMIZE_FOR_BOTH)
+ && wi::clz (get_nonzero_bits (@0)) > 0
+ && wi::clz (get_nonzero_bits (@1)) > 0)
+ (IFN_AVG_FLOOR @0 @1)))
+#endif
+
/* Try to fold (type) X op CST -> (type) (X op ((type-x) CST))
when profitable.
For bitwise binary operations apply operand conversions to the
--- /dev/null
+/* Test if SIMD fused unsigned halving adds are generated */
+/* { dg-do compile } */
+/* { dg-options "-O2" } */
+
+#include <arm_neon.h>
+
+#define FUSED_SIMD_UHADD(vectype, q, ts, mask) \
+ vectype simd_uhadd ## q ## _ ## ts ## _1 (vectype a) \
+ { \
+ vectype v1 = vand ## q ## _ ## ts (a, vdup ## q ## _n_ ## ts (mask)); \
+ vectype v2 = vdup ## q ## _n_ ## ts (mask); \
+ return vshr ## q ## _n_ ## ts (vadd ## q ## _ ## ts (v1, v2), 1); \
+ } \
+ \
+ vectype simd_uhadd ## q ## _ ## ts ## _2 (vectype a, vectype b) \
+ { \
+ vectype v1 = vand ## q ## _ ## ts (a, vdup ## q ## _n_ ## ts (mask)); \
+ vectype v2 = vand ## q ## _ ## ts (b, vdup ## q ## _n_ ## ts (mask)); \
+ return vshr ## q ## _n_ ## ts (vadd ## q ## _ ## ts (v1, v2), 1); \
+ }
+
+FUSED_SIMD_UHADD (uint8x8_t, , u8, 0x7f)
+FUSED_SIMD_UHADD (uint8x16_t, q, u8, 0x7f)
+FUSED_SIMD_UHADD (uint16x4_t, , u16, 0x7fff)
+FUSED_SIMD_UHADD (uint16x8_t, q, u16, 0x7fff)
+FUSED_SIMD_UHADD (uint32x2_t, , u32, 0x7fffffff)
+FUSED_SIMD_UHADD (uint32x4_t, q, u32, 0x7fffffff)
+
+/* { dg-final { scan-assembler-times {\tuhadd\tv[0-9]+\.8b,} 2 } } */
+/* { dg-final { scan-assembler-times {\tuhadd\tv[0-9]+\.16b,} 2 } } */
+/* { dg-final { scan-assembler-times {\tuhadd\tv[0-9]+\.4h,} 2 } } */
+/* { dg-final { scan-assembler-times {\tuhadd\tv[0-9]+\.8h,} 2 } } */
+/* { dg-final { scan-assembler-times {\tuhadd\tv[0-9]+\.2s,} 2 } } */
+/* { dg-final { scan-assembler-times {\tuhadd\tv[0-9]+\.4s,} 2 } } */
{
val.lattice_val = VARYING;
val.mask = -1;
- if (flag_tree_bit_ccp)
+ if (flag_tree_bit_ccp && !VECTOR_TYPE_P (TREE_TYPE (var)))
{
wide_int nonzero_bits = get_nonzero_bits (var);
tree value;
is_constant = (val.lattice_val == CONSTANT);
}
+ tree lhs = gimple_get_lhs (stmt);
if (flag_tree_bit_ccp
+ && lhs && TREE_CODE (lhs) == SSA_NAME && !VECTOR_TYPE_P (TREE_TYPE (lhs))
&& ((is_constant && TREE_CODE (val.value) == INTEGER_CST)
- || !is_constant)
- && gimple_get_lhs (stmt)
- && TREE_CODE (gimple_get_lhs (stmt)) == SSA_NAME)
+ || !is_constant))
{
tree lhs = gimple_get_lhs (stmt);
wide_int nonzero_bits = get_nonzero_bits (lhs);
/* Use element_precision instead of TYPE_PRECISION so complex and
vector types get a non-zero precision. */
unsigned int precision = element_precision (TREE_TYPE (name));
+
+ if (VECTOR_TYPE_P (TREE_TYPE (name)))
+ {
+ tree elem = uniform_vector_p (name);
+ if (elem)
+ return get_nonzero_bits_1 (elem);
+ }
+
if (TREE_CODE (name) != SSA_NAME)
return wi::shwi (-1, precision);