This file is consumed by genmatch which produces gimple-match.c
and generic-match.c from it.
- Copyright (C) 2014-2019 Free Software Foundation, Inc.
+ Copyright (C) 2014-2020 Free Software Foundation, Inc.
Contributed by Richard Biener <rguenther@suse.de>
and Prathamesh Kulkarni <bilbotheelffriend@gmail.com>
(define_operator_list COND_TERNARY
IFN_COND_FMA IFN_COND_FMS IFN_COND_FNMA IFN_COND_FNMS)
-/* As opposed to convert?, this still creates a single pattern, so
- it is not a suitable replacement for convert? in all cases. */
+/* With nop_convert? combine convert? and view_convert? in one pattern
+ plus conditionalize on tree_nop_conversion_p conversions. */
(match (nop_convert @0)
(convert @0)
(if (tree_nop_conversion_p (type, TREE_TYPE (@0)))))
&& known_eq (TYPE_VECTOR_SUBPARTS (type),
TYPE_VECTOR_SUBPARTS (TREE_TYPE (@0)))
&& tree_nop_conversion_p (TREE_TYPE (type), TREE_TYPE (TREE_TYPE (@0))))))
-/* This one has to be last, or it shadows the others. */
-(match (nop_convert @0)
- @0)
/* Transform likes of (char) ABS_EXPR <(int) x> into (char) ABSU_EXPR <x>
ABSU_EXPR returns unsigned absolute value of the operand and the operand
/* Convert - (~A) to A + 1. */
(simplify
- (negate (nop_convert (bit_not @0)))
+ (negate (nop_convert? (bit_not @0)))
(plus (view_convert @0) { build_each_one_cst (type); }))
/* Convert ~ (A - 1) or ~ (A + -1) to -A. */
/* Otherwise prefer ~(X ^ Y) to ~X ^ Y as more canonical. */
(simplify
- (bit_xor:c (nop_convert:s (bit_not:s @0)) @1)
+ (bit_xor:c (nop_convert?:s (bit_not:s @0)) @1)
(if (tree_nop_conversion_p (type, TREE_TYPE (@0)))
(bit_not (bit_xor (view_convert @0) @1))))
/* For equality, this is also true with wrapping overflow. */
(for op (eq ne)
(simplify
- (op:c (nop_convert@3 (plus:c@2 @0 (convert1? @1))) (convert2? @1))
+ (op:c (nop_convert?@3 (plus:c@2 @0 (convert1? @1))) (convert2? @1))
(if (ANY_INTEGRAL_TYPE_P (TREE_TYPE (@0))
&& (TYPE_OVERFLOW_UNDEFINED (TREE_TYPE (@0))
|| TYPE_OVERFLOW_WRAPS (TREE_TYPE (@0)))
&& tree_nop_conversion_p (TREE_TYPE (@3), TREE_TYPE (@1)))
(op @0 { build_zero_cst (TREE_TYPE (@0)); })))
(simplify
- (op:c (nop_convert@3 (pointer_plus@2 (convert1? @0) @1)) (convert2? @0))
+ (op:c (nop_convert?@3 (pointer_plus@2 (convert1? @0) @1)) (convert2? @0))
(if (tree_nop_conversion_p (TREE_TYPE (@2), TREE_TYPE (@0))
&& tree_nop_conversion_p (TREE_TYPE (@3), TREE_TYPE (@0))
&& (CONSTANT_CLASS_P (@1) || (single_use (@2) && single_use (@3))))
|| !HONOR_SIGN_DEPENDENT_ROUNDING (type)))
(convert (negate @1))))
(simplify
- (negate (nop_convert (negate @1)))
+ (negate (nop_convert? (negate @1)))
(if (!TYPE_OVERFLOW_SANITIZED (type)
&& !TYPE_OVERFLOW_SANITIZED (TREE_TYPE (@1)))
(view_convert @1)))
/* A - (A +- B) -> -+ B */
/* A +- (B -+ A) -> +- B */
(simplify
- (minus (nop_convert (plus:c (nop_convert @0) @1)) @0)
+ (minus (nop_convert1? (plus:c (nop_convert2? @0) @1)) @0)
(view_convert @1))
(simplify
- (minus (nop_convert (minus (nop_convert @0) @1)) @0)
+ (minus (nop_convert1? (minus (nop_convert2? @0) @1)) @0)
(if (!ANY_INTEGRAL_TYPE_P (type)
|| TYPE_OVERFLOW_WRAPS (type))
(negate (view_convert @1))
(view_convert (negate @1))))
(simplify
- (plus:c (nop_convert (minus @0 (nop_convert @1))) @1)
+ (plus:c (nop_convert1? (minus @0 (nop_convert2? @1))) @1)
(view_convert @0))
(simplify
- (minus @0 (nop_convert (plus:c (nop_convert @0) @1)))
+ (minus @0 (nop_convert1? (plus:c (nop_convert2? @0) @1)))
(if (!ANY_INTEGRAL_TYPE_P (type)
|| TYPE_OVERFLOW_WRAPS (type))
(negate (view_convert @1))
(view_convert (negate @1))))
(simplify
- (minus @0 (nop_convert (minus (nop_convert @0) @1)))
+ (minus @0 (nop_convert1? (minus (nop_convert2? @0) @1)))
(view_convert @1))
/* (A +- B) + (C - A) -> C +- B */
/* (A + B) - (A - C) -> B + C */
(for inner_op (plus minus)
neg_inner_op (minus plus)
(simplify
- (outer_op (nop_convert (inner_op @0 CONSTANT_CLASS_P@1))
+ (outer_op (nop_convert? (inner_op @0 CONSTANT_CLASS_P@1))
CONSTANT_CLASS_P@2)
/* If one of the types wraps, use that one. */
(if (!ANY_INTEGRAL_TYPE_P (type) || TYPE_OVERFLOW_WRAPS (type))
/* (CST1 - A) +- CST2 -> CST3 - A */
(for outer_op (plus minus)
(simplify
- (outer_op (nop_convert (minus CONSTANT_CLASS_P@1 @0)) CONSTANT_CLASS_P@2)
+ (outer_op (nop_convert? (minus CONSTANT_CLASS_P@1 @0)) CONSTANT_CLASS_P@2)
/* If one of the types wraps, use that one. */
(if (!ANY_INTEGRAL_TYPE_P (type) || TYPE_OVERFLOW_WRAPS (type))
/* If all 3 captures are CONSTANT_CLASS_P, punt, as we might recurse
Use view_convert because it is safe for vectors and equivalent for
scalars. */
(simplify
- (minus CONSTANT_CLASS_P@1 (nop_convert (minus CONSTANT_CLASS_P@2 @0)))
+ (minus CONSTANT_CLASS_P@1 (nop_convert? (minus CONSTANT_CLASS_P@2 @0)))
/* If one of the types wraps, use that one. */
(if (!ANY_INTEGRAL_TYPE_P (type) || TYPE_OVERFLOW_WRAPS (type))
/* If all 3 captures are CONSTANT_CLASS_P, punt, as we might recurse
(cmp (minmax @0 INTEGER_CST@1) INTEGER_CST@2)
(comb (cmp @0 @2) (cmp @1 @2))))
+/* Undo fancy way of writing max/min or other ?: expressions,
+ like a - ((a - b) & -(a < b)), in this case into (a < b) ? b : a.
+ People normally use ?: and that is what we actually try to optimize. */
+(for cmp (simple_comparison)
+ (simplify
+ (minus @0 (bit_and:c (minus @0 @1)
+ (convert? (negate@4 (convert? (cmp@5 @2 @3))))))
+ (if (INTEGRAL_TYPE_P (type)
+ && INTEGRAL_TYPE_P (TREE_TYPE (@4))
+ && TREE_CODE (TREE_TYPE (@4)) != BOOLEAN_TYPE
+ && INTEGRAL_TYPE_P (TREE_TYPE (@5))
+ && (TYPE_PRECISION (TREE_TYPE (@4)) >= TYPE_PRECISION (type)
+ || !TYPE_UNSIGNED (TREE_TYPE (@4))))
+ (cond (cmp @2 @3) @1 @0)))
+ (simplify
+ (plus:c @0 (bit_and:c (minus @1 @0)
+ (convert? (negate@4 (convert? (cmp@5 @2 @3))))))
+ (if (INTEGRAL_TYPE_P (type)
+ && INTEGRAL_TYPE_P (TREE_TYPE (@4))
+ && TREE_CODE (TREE_TYPE (@4)) != BOOLEAN_TYPE
+ && INTEGRAL_TYPE_P (TREE_TYPE (@5))
+ && (TYPE_PRECISION (TREE_TYPE (@4)) >= TYPE_PRECISION (type)
+ || !TYPE_UNSIGNED (TREE_TYPE (@4))))
+ (cond (cmp @2 @3) @1 @0))))
+
/* Simplifications of shift and rotates. */
(for rotate (lrotate rrotate)
return (x * 0x01010101) >> 24;
} */
(simplify
- (rshift
- (mult
- (bit_and
- (plus:c
- (rshift @8 INTEGER_CST@5)
- (plus:c@8
- (bit_and @6 INTEGER_CST@7)
- (bit_and
- (rshift
- (minus@6
- @0
- (bit_and
- (rshift @0 INTEGER_CST@4)
- INTEGER_CST@11))
- INTEGER_CST@10)
- INTEGER_CST@9)))
- INTEGER_CST@3)
- INTEGER_CST@2)
- INTEGER_CST@1)
+ (rshift
+ (mult
+ (bit_and
+ (plus:c
+ (rshift @8 INTEGER_CST@5)
+ (plus:c@8
+ (bit_and @6 INTEGER_CST@7)
+ (bit_and
+ (rshift
+ (minus@6 @0
+ (bit_and (rshift @0 INTEGER_CST@4) INTEGER_CST@11))
+ INTEGER_CST@10)
+ INTEGER_CST@9)))
+ INTEGER_CST@3)
+ INTEGER_CST@2)
+ INTEGER_CST@1)
/* Check constants and optab. */
- (with
- {
- unsigned prec = TYPE_PRECISION (type);
- int shift = 64 - prec;
- const unsigned HOST_WIDE_INT c1 = 0x0101010101010101ULL >> shift,
- c2 = 0x0F0F0F0F0F0F0F0FULL >> shift,
- c3 = 0x3333333333333333ULL >> shift,
- c4 = 0x5555555555555555ULL >> shift;
- }
- (if (prec <= 64 && TYPE_UNSIGNED (type) && tree_to_uhwi (@4) == 1
- && tree_to_uhwi (@10) == 2 && tree_to_uhwi (@5) == 4
- && tree_to_uhwi (@1) == prec - 8 && tree_to_uhwi (@2) == c1
- && tree_to_uhwi (@3) == c2 && tree_to_uhwi (@9) == c3
- && tree_to_uhwi (@7) == c3 && tree_to_uhwi (@11) == c4
- && direct_internal_fn_supported_p (IFN_POPCOUNT, type,
- OPTIMIZE_FOR_BOTH))
- (convert (IFN_POPCOUNT:type @0)))))
+ (with { unsigned prec = TYPE_PRECISION (type);
+ int shift = (64 - prec) & 63;
+ unsigned HOST_WIDE_INT c1
+ = HOST_WIDE_INT_UC (0x0101010101010101) >> shift;
+ unsigned HOST_WIDE_INT c2
+ = HOST_WIDE_INT_UC (0x0F0F0F0F0F0F0F0F) >> shift;
+ unsigned HOST_WIDE_INT c3
+ = HOST_WIDE_INT_UC (0x3333333333333333) >> shift;
+ unsigned HOST_WIDE_INT c4
+ = HOST_WIDE_INT_UC (0x5555555555555555) >> shift;
+ }
+ (if (prec >= 16
+ && prec <= 64
+ && pow2p_hwi (prec)
+ && TYPE_UNSIGNED (type)
+ && integer_onep (@4)
+ && wi::to_widest (@10) == 2
+ && wi::to_widest (@5) == 4
+ && wi::to_widest (@1) == prec - 8
+ && tree_to_uhwi (@2) == c1
+ && tree_to_uhwi (@3) == c2
+ && tree_to_uhwi (@9) == c3
+ && tree_to_uhwi (@7) == c3
+ && tree_to_uhwi (@11) == c4
+ && direct_internal_fn_supported_p (IFN_POPCOUNT, type,
+ OPTIMIZE_FOR_BOTH))
+ (convert (IFN_POPCOUNT:type @0)))))
#endif
/* Simplify:
|| TREE_CODE (cop1) == VECTOR_CST
|| TREE_CODE (cop1) == CONSTRUCTOR))
{
- if (sel.series_p (1, 1, nelts + 1, 1))
+ bool insert_first_p = sel.series_p (1, 1, nelts + 1, 1);
+ if (insert_first_p)
{
/* After canonicalizing the first elt to come from the
first vector we only can insert the first elt from
if ((ins = fold_read_from_vector (cop0, sel[0])))
op0 = op1;
}
- else
+ /* The above can fail for two-element vectors which always
+ appear to insert the first element, so try inserting
+ into the second lane as well. For more than two
+ elements that's wasted time. */
+ if (!insert_first_p || (!ins && maybe_eq (nelts, 2u)))
{
unsigned int encoded_nelts = sel.encoding ().encoded_nelts ();
for (at = 0; at < encoded_nelts; ++at)
if (maybe_ne (sel[at], at))
break;
- if (at < encoded_nelts && sel.series_p (at + 1, 1, at + 1, 1))
+ if (at < encoded_nelts
+ && (known_eq (at + 1, nelts)
+ || sel.series_p (at + 1, 1, at + 1, 1)))
{
- if (known_lt (at, nelts))
+ if (known_lt (poly_uint64 (sel[at]), nelts))
ins = fold_read_from_vector (cop0, sel[at]);
else
ins = fold_read_from_vector (cop1, sel[at] - nelts);