/* RTL simplification functions for GNU compiler.
- Copyright (C) 1987-2017 Free Software Foundation, Inc.
+ Copyright (C) 1987-2020 Free Software Foundation, Inc.
This file is part of GCC.
#include "flags.h"
#include "selftest.h"
#include "selftest-rtl.h"
+#include "rtx-vector-builder.h"
/* Simplification and canonicalization of RTL. */
#define HWI_SIGN_EXTEND(low) \
((((HOST_WIDE_INT) low) < 0) ? HOST_WIDE_INT_M1 : HOST_WIDE_INT_0)
-static rtx neg_const_int (machine_mode, const_rtx);
static bool plus_minus_operand_p (const_rtx);
static rtx simplify_plus_minus (enum rtx_code, machine_mode, rtx, rtx);
static rtx simplify_associative_operation (enum rtx_code, machine_mode,
static rtx simplify_binary_operation_1 (enum rtx_code, machine_mode,
rtx, rtx, rtx, rtx);
\f
-/* Negate a CONST_INT rtx. */
+/* Negate I, which satisfies poly_int_rtx_p. MODE is the mode of I. */
+
static rtx
-neg_const_int (machine_mode mode, const_rtx i)
+neg_poly_int_rtx (machine_mode mode, const_rtx i)
{
- unsigned HOST_WIDE_INT val = -UINTVAL (i);
-
- if (!HWI_COMPUTABLE_MODE_P (mode)
- && val == UINTVAL (i))
- return simplify_const_unary_operation (NEG, mode, CONST_CAST_RTX (i),
- mode);
- return gen_int_mode (val, mode);
+ return immed_wide_int_const (-wi::to_poly_wide (i, mode), mode);
}
/* Test whether expression, X, is an immediate constant that represents
{
rtx c, tmp, addr;
machine_mode cmode;
- HOST_WIDE_INT offset = 0;
+ poly_int64 offset = 0;
switch (GET_CODE (x))
{
addr = targetm.delegitimize_address (addr);
/* Split the address into a base and integer offset. */
- if (GET_CODE (addr) == CONST
- && GET_CODE (XEXP (addr, 0)) == PLUS
- && CONST_INT_P (XEXP (XEXP (addr, 0), 1)))
- {
- offset = INTVAL (XEXP (XEXP (addr, 0), 1));
- addr = XEXP (XEXP (addr, 0), 0);
- }
+ addr = strip_offset (addr, &offset);
if (GET_CODE (addr) == LO_SUM)
addr = XEXP (addr, 1);
/* If we're accessing the constant in a different mode than it was
originally stored, attempt to fix that up via subreg simplifications.
If that fails we have no choice but to return the original memory. */
- if (offset == 0 && cmode == GET_MODE (x))
+ if (known_eq (offset, 0) && cmode == GET_MODE (x))
return c;
- else if (offset >= 0 && offset < GET_MODE_SIZE (cmode))
+ else if (known_in_range_p (offset, 0, GET_MODE_SIZE (cmode)))
{
rtx tem = simplify_subreg (GET_MODE (x), c, cmode, offset);
if (tem && CONSTANT_P (tem))
case IMAGPART_EXPR:
case VIEW_CONVERT_EXPR:
{
- HOST_WIDE_INT bitsize, bitpos;
+ poly_int64 bitsize, bitpos, bytepos, toffset_val = 0;
tree toffset;
int unsignedp, reversep, volatilep = 0;
decl
= get_inner_reference (decl, &bitsize, &bitpos, &toffset, &mode,
&unsignedp, &reversep, &volatilep);
- if (bitsize != GET_MODE_BITSIZE (mode)
- || (bitpos % BITS_PER_UNIT)
- || (toffset && !tree_fits_shwi_p (toffset)))
+ if (maybe_ne (bitsize, GET_MODE_BITSIZE (mode))
+ || !multiple_p (bitpos, BITS_PER_UNIT, &bytepos)
+ || (toffset && !poly_int_tree_p (toffset, &toffset_val)))
decl = NULL;
else
- {
- offset += bitpos / BITS_PER_UNIT;
- if (toffset)
- offset += tree_to_shwi (toffset);
- }
+ offset += bytepos + toffset_val;
break;
}
}
&& (INTVAL (XEXP (op, 1)) & (precision - 1)) == 0
&& UINTVAL (XEXP (op, 1)) < op_precision)
{
- int byte = subreg_lowpart_offset (mode, op_mode);
+ poly_int64 byte = subreg_lowpart_offset (mode, op_mode);
int shifted_bytes = INTVAL (XEXP (op, 1)) / BITS_PER_UNIT;
return simplify_gen_subreg (mode, XEXP (op, 0), op_mode,
(WORDS_BIG_ENDIAN
&& (GET_MODE_SIZE (int_mode) >= UNITS_PER_WORD
|| WORDS_BIG_ENDIAN == BYTES_BIG_ENDIAN))
{
- int byte = subreg_lowpart_offset (int_mode, int_op_mode);
+ poly_int64 byte = subreg_lowpart_offset (int_mode, int_op_mode);
int shifted_bytes = INTVAL (XEXP (op, 1)) / BITS_PER_UNIT;
return adjust_address_nv (XEXP (op, 0), int_mode,
(WORDS_BIG_ENDIAN
/* If we know that the value is already truncated, we can
replace the TRUNCATE with a SUBREG. */
- if (GET_MODE_NUNITS (mode) == 1
+ if (known_eq (GET_MODE_NUNITS (mode), 1)
&& (TRULY_NOOP_TRUNCATION_MODES_P (mode, GET_MODE (op))
|| truncated_to_mode (mode, op)))
{
&& SUBREG_PROMOTED_SIGNED_P (op)
&& !paradoxical_subreg_p (mode, GET_MODE (SUBREG_REG (op))))
{
- temp = rtl_hooks.gen_lowpart_no_emit (mode, op);
+ temp = rtl_hooks.gen_lowpart_no_emit (mode, SUBREG_REG (op));
if (temp)
return temp;
}
&& CONST_INT_P (XEXP (op, 1))
&& XEXP (XEXP (op, 0), 1) == XEXP (op, 1)
&& (op_mode = as_a <scalar_int_mode> (GET_MODE (op)),
- GET_MODE_BITSIZE (op_mode) > INTVAL (XEXP (op, 1))))
+ GET_MODE_PRECISION (op_mode) > INTVAL (XEXP (op, 1))))
{
scalar_int_mode tmode;
- gcc_assert (GET_MODE_BITSIZE (int_mode)
- > GET_MODE_BITSIZE (op_mode));
- if (int_mode_for_size (GET_MODE_BITSIZE (op_mode)
+ gcc_assert (GET_MODE_PRECISION (int_mode)
+ > GET_MODE_PRECISION (op_mode));
+ if (int_mode_for_size (GET_MODE_PRECISION (op_mode)
- INTVAL (XEXP (op, 1)), 1).exists (&tmode))
{
rtx inner =
&& SUBREG_PROMOTED_UNSIGNED_P (op)
&& !paradoxical_subreg_p (mode, GET_MODE (SUBREG_REG (op))))
{
- temp = rtl_hooks.gen_lowpart_no_emit (mode, op);
+ temp = rtl_hooks.gen_lowpart_no_emit (mode, SUBREG_REG (op));
if (temp)
return temp;
}
break;
}
- if (VECTOR_MODE_P (mode) && vec_duplicate_p (op, &elt))
+ if (VECTOR_MODE_P (mode)
+ && vec_duplicate_p (op, &elt)
+ && code != VEC_DUPLICATE)
{
/* Try applying the operator to ELT and see if that simplifies.
We can duplicate the result if so.
}
if (CONST_SCALAR_INT_P (op) || CONST_DOUBLE_AS_FLOAT_P (op))
return gen_const_vec_duplicate (mode, op);
- if (GET_CODE (op) == CONST_VECTOR)
- {
- unsigned int n_elts = GET_MODE_NUNITS (mode);
- unsigned int in_n_elts = CONST_VECTOR_NUNITS (op);
- gcc_assert (in_n_elts < n_elts);
- gcc_assert ((n_elts % in_n_elts) == 0);
- rtvec v = rtvec_alloc (n_elts);
- for (unsigned i = 0; i < n_elts; i++)
- RTVEC_ELT (v, i) = CONST_VECTOR_ELT (op, i % in_n_elts);
- return gen_rtx_CONST_VECTOR (mode, v);
+ if (GET_CODE (op) == CONST_VECTOR
+ && (CONST_VECTOR_DUPLICATE_P (op)
+ || CONST_VECTOR_NUNITS (op).is_constant ()))
+ {
+ unsigned int npatterns = (CONST_VECTOR_DUPLICATE_P (op)
+ ? CONST_VECTOR_NPATTERNS (op)
+ : CONST_VECTOR_NUNITS (op).to_constant ());
+ gcc_assert (multiple_p (GET_MODE_NUNITS (mode), npatterns));
+ rtx_vector_builder builder (mode, npatterns, 1);
+ for (unsigned i = 0; i < npatterns; i++)
+ builder.quick_push (CONST_VECTOR_ELT (op, i));
+ return builder.build ();
}
}
- if (VECTOR_MODE_P (mode) && GET_CODE (op) == CONST_VECTOR)
+ if (VECTOR_MODE_P (mode)
+ && GET_CODE (op) == CONST_VECTOR
+ && known_eq (GET_MODE_NUNITS (mode), CONST_VECTOR_NUNITS (op)))
{
- int elt_size = GET_MODE_UNIT_SIZE (mode);
- unsigned n_elts = (GET_MODE_SIZE (mode) / elt_size);
- machine_mode opmode = GET_MODE (op);
- int op_elt_size = GET_MODE_UNIT_SIZE (opmode);
- unsigned op_n_elts = (GET_MODE_SIZE (opmode) / op_elt_size);
- rtvec v = rtvec_alloc (n_elts);
- unsigned int i;
+ gcc_assert (GET_MODE (op) == op_mode);
+
+ rtx_vector_builder builder;
+ if (!builder.new_unary_operation (mode, op, false))
+ return 0;
- gcc_assert (op_n_elts == n_elts);
- for (i = 0; i < n_elts; i++)
+ unsigned int count = builder.encoded_nelts ();
+ for (unsigned int i = 0; i < count; i++)
{
rtx x = simplify_unary_operation (code, GET_MODE_INNER (mode),
CONST_VECTOR_ELT (op, i),
- GET_MODE_INNER (opmode));
- if (!x)
+ GET_MODE_INNER (op_mode));
+ if (!x || !valid_for_const_vector_p (mode, x))
return 0;
- RTVEC_ELT (v, i) = x;
+ builder.quick_push (x);
}
- return gen_rtx_CONST_VECTOR (mode, v);
+ return builder.build ();
}
/* The order of these tests is critical so that, for example, we don't
if (CONST_SCALAR_INT_P (op) && is_a <scalar_int_mode> (mode, &result_mode))
{
unsigned int width = GET_MODE_PRECISION (result_mode);
+ if (width > MAX_BITSIZE_MODE_ANY_INT)
+ return 0;
+
wide_int result;
scalar_int_mode imode = (op_mode == VOIDmode
? result_mode
if (wi::ne_p (op0, 0))
int_value = wi::clz (op0);
else if (! CLZ_DEFINED_VALUE_AT_ZERO (imode, int_value))
- int_value = GET_MODE_PRECISION (imode);
+ return NULL_RTX;
result = wi::shwi (int_value, result_mode);
break;
if (wi::ne_p (op0, 0))
int_value = wi::ctz (op0);
else if (! CTZ_DEFINED_VALUE_AT_ZERO (imode, int_value))
- int_value = GET_MODE_PRECISION (imode);
+ return NULL_RTX;
result = wi::shwi (int_value, result_mode);
break;
&& is_int_mode (mode, &result_mode))
{
unsigned int width = GET_MODE_PRECISION (result_mode);
+ if (width > MAX_BITSIZE_MODE_ANY_INT)
+ return 0;
+
/* Although the overflow semantics of RTL's FIX and UNSIGNED_FIX
operators are intentionally left unspecified (to ease implementation
by target backends), for consistency, this routine implements the
return 0;
}
+/* Return a mask describing the COMPARISON. */
+static int
+comparison_to_mask (enum rtx_code comparison)
+{
+ switch (comparison)
+ {
+ case LT:
+ return 8;
+ case GT:
+ return 4;
+ case EQ:
+ return 2;
+ case UNORDERED:
+ return 1;
+
+ case LTGT:
+ return 12;
+ case LE:
+ return 10;
+ case GE:
+ return 6;
+ case UNLT:
+ return 9;
+ case UNGT:
+ return 5;
+ case UNEQ:
+ return 3;
+
+ case ORDERED:
+ return 14;
+ case NE:
+ return 13;
+ case UNLE:
+ return 11;
+ case UNGE:
+ return 7;
+
+ default:
+ gcc_unreachable ();
+ }
+}
+
+/* Return a comparison corresponding to the MASK. */
+static enum rtx_code
+mask_to_comparison (int mask)
+{
+ switch (mask)
+ {
+ case 8:
+ return LT;
+ case 4:
+ return GT;
+ case 2:
+ return EQ;
+ case 1:
+ return UNORDERED;
+
+ case 12:
+ return LTGT;
+ case 10:
+ return LE;
+ case 6:
+ return GE;
+ case 9:
+ return UNLT;
+ case 5:
+ return UNGT;
+ case 3:
+ return UNEQ;
+
+ case 14:
+ return ORDERED;
+ case 13:
+ return NE;
+ case 11:
+ return UNLE;
+ case 7:
+ return UNGE;
+
+ default:
+ gcc_unreachable ();
+ }
+}
+
+/* Return true if CODE is valid for comparisons of mode MODE, false
+ otherwise.
+
+ It is always safe to return false, even if the code was valid for the
+ given mode as that will merely suppress optimizations. */
+
+static bool
+comparison_code_valid_for_mode (enum rtx_code code, enum machine_mode mode)
+{
+ switch (code)
+ {
+ /* These are valid for integral, floating and vector modes. */
+ case NE:
+ case EQ:
+ case GE:
+ case GT:
+ case LE:
+ case LT:
+ return (INTEGRAL_MODE_P (mode)
+ || FLOAT_MODE_P (mode)
+ || VECTOR_MODE_P (mode));
+
+ /* These are valid for floating point modes. */
+ case LTGT:
+ case UNORDERED:
+ case ORDERED:
+ case UNEQ:
+ case UNGE:
+ case UNGT:
+ case UNLE:
+ case UNLT:
+ return FLOAT_MODE_P (mode);
+
+ /* These are filtered out in simplify_logical_operation, but
+ we check for them too as a matter of safety. They are valid
+ for integral and vector modes. */
+ case GEU:
+ case GTU:
+ case LEU:
+ case LTU:
+ return INTEGRAL_MODE_P (mode) || VECTOR_MODE_P (mode);
+
+ default:
+ gcc_unreachable ();
+ }
+}
+
+/* Simplify a logical operation CODE with result mode MODE, operating on OP0
+ and OP1, which should be both relational operations. Return 0 if no such
+ simplification is possible. */
+rtx
+simplify_logical_relational_operation (enum rtx_code code, machine_mode mode,
+ rtx op0, rtx op1)
+{
+ /* We only handle IOR of two relational operations. */
+ if (code != IOR)
+ return 0;
+
+ if (!(COMPARISON_P (op0) && COMPARISON_P (op1)))
+ return 0;
+
+ if (!(rtx_equal_p (XEXP (op0, 0), XEXP (op1, 0))
+ && rtx_equal_p (XEXP (op0, 1), XEXP (op1, 1))))
+ return 0;
+
+ enum rtx_code code0 = GET_CODE (op0);
+ enum rtx_code code1 = GET_CODE (op1);
+
+ /* We don't handle unsigned comparisons currently. */
+ if (code0 == LTU || code0 == GTU || code0 == LEU || code0 == GEU)
+ return 0;
+ if (code1 == LTU || code1 == GTU || code1 == LEU || code1 == GEU)
+ return 0;
+
+ int mask0 = comparison_to_mask (code0);
+ int mask1 = comparison_to_mask (code1);
+
+ int mask = mask0 | mask1;
+
+ if (mask == 15)
+ return const_true_rtx;
+
+ code = mask_to_comparison (mask);
+
+ /* Many comparison codes are only valid for certain mode classes. */
+ if (!comparison_code_valid_for_mode (code, mode))
+ return 0;
+
+ op0 = XEXP (op1, 0);
+ op1 = XEXP (op1, 1);
+
+ return simplify_gen_relational (code, mode, VOIDmode, op0, op1);
+}
/* Simplify a binary operation CODE with result mode MODE, operating on OP0
and OP1. Return 0 if no simplification is possible.
if ((GET_CODE (op0) == CONST
|| GET_CODE (op0) == SYMBOL_REF
|| GET_CODE (op0) == LABEL_REF)
- && CONST_INT_P (op1))
- return plus_constant (mode, op0, INTVAL (op1));
+ && poly_int_rtx_p (op1, &offset))
+ return plus_constant (mode, op0, offset);
else if ((GET_CODE (op1) == CONST
|| GET_CODE (op1) == SYMBOL_REF
|| GET_CODE (op1) == LABEL_REF)
- && CONST_INT_P (op0))
- return plus_constant (mode, op1, INTVAL (op0));
+ && poly_int_rtx_p (op0, &offset))
+ return plus_constant (mode, op1, offset);
/* See if this is something like X * C - X or vice versa or
if the multiplication is written as a shift. If so, we can
return plus_constant (mode, op0, trunc_int_for_mode (-offset, mode));
/* Don't let a relocatable value get a negative coeff. */
- if (CONST_INT_P (op1) && GET_MODE (op0) != VOIDmode)
+ if (poly_int_rtx_p (op1) && GET_MODE (op0) != VOIDmode)
return simplify_gen_binary (PLUS, mode,
op0,
- neg_const_int (mode, op1));
+ neg_poly_int_rtx (mode, op1));
/* (x - (x & y)) -> (x & ~y) */
if (INTEGRAL_MODE_P (mode) && GET_CODE (op1) == AND)
&& GET_CODE (SUBREG_REG (opleft)) == ASHIFT
&& GET_CODE (opright) == LSHIFTRT
&& GET_CODE (XEXP (opright, 0)) == SUBREG
- && SUBREG_BYTE (opleft) == SUBREG_BYTE (XEXP (opright, 0))
+ && known_eq (SUBREG_BYTE (opleft), SUBREG_BYTE (XEXP (opright, 0)))
&& GET_MODE_SIZE (int_mode) < GET_MODE_SIZE (inner_mode)
&& rtx_equal_p (XEXP (SUBREG_REG (opleft), 0),
SUBREG_REG (XEXP (opright, 0)))
XEXP (op0, 1));
}
+ /* The following happens with bitfield merging.
+ (X & C) | ((X | Y) & ~C) -> X | (Y & ~C) */
+ if (GET_CODE (op0) == AND
+ && GET_CODE (op1) == AND
+ && CONST_INT_P (XEXP (op0, 1))
+ && CONST_INT_P (XEXP (op1, 1))
+ && (INTVAL (XEXP (op0, 1))
+ == ~INTVAL (XEXP (op1, 1))))
+ {
+ /* The IOR may be on both sides. */
+ rtx top0 = NULL_RTX, top1 = NULL_RTX;
+ if (GET_CODE (XEXP (op1, 0)) == IOR)
+ top0 = op0, top1 = op1;
+ else if (GET_CODE (XEXP (op0, 0)) == IOR)
+ top0 = op1, top1 = op0;
+ if (top0 && top1)
+ {
+ /* X may be on either side of the inner IOR. */
+ rtx tem = NULL_RTX;
+ if (rtx_equal_p (XEXP (top0, 0),
+ XEXP (XEXP (top1, 0), 0)))
+ tem = XEXP (XEXP (top1, 0), 1);
+ else if (rtx_equal_p (XEXP (top0, 0),
+ XEXP (XEXP (top1, 0), 1)))
+ tem = XEXP (XEXP (top1, 0), 0);
+ if (tem)
+ return simplify_gen_binary (IOR, mode, XEXP (top0, 0),
+ simplify_gen_binary
+ (AND, mode, tem, XEXP (top1, 1)));
+ }
+ }
+
tem = simplify_byte_swapping_operation (code, mode, op0, op1);
if (tem)
return tem;
tem = simplify_associative_operation (code, mode, op0, op1);
+ if (tem)
+ return tem;
+
+ tem = simplify_logical_relational_operation (code, mode, op0, op1);
if (tem)
return tem;
break;
if (CONST_INT_P (trueop1)
&& exact_log2 (UINTVAL (trueop1)) > 0)
return simplify_gen_binary (AND, mode, op0,
- gen_int_mode (INTVAL (op1) - 1, mode));
+ gen_int_mode (UINTVAL (trueop1) - 1,
+ mode));
break;
case MOD:
{
rtx tmp = gen_int_shift_amount
(inner_mode, INTVAL (XEXP (SUBREG_REG (op0), 1)) + INTVAL (op1));
- tmp = simplify_gen_binary (code, inner_mode,
- XEXP (SUBREG_REG (op0), 0),
- tmp);
+
+ /* Combine would usually zero out the value when combining two
+ local shifts and the range becomes larger or equal to the mode.
+ However since we fold away one of the shifts here combine won't
+ see it so we should immediately zero the result if it's out of
+ range. */
+ if (code == LSHIFTRT
+ && INTVAL (tmp) >= GET_MODE_BITSIZE (inner_mode))
+ tmp = const0_rtx;
+ else
+ tmp = simplify_gen_binary (code,
+ inner_mode,
+ XEXP (SUBREG_REG (op0), 0),
+ tmp);
+
return lowpart_subreg (int_mode, tmp, inner_mode);
}
case VEC_SERIES:
if (op1 == CONST0_RTX (GET_MODE_INNER (mode)))
return gen_vec_duplicate (mode, op0);
- if (CONSTANT_P (op0) && CONSTANT_P (op1))
+ if (valid_for_const_vector_p (mode, op0)
+ && valid_for_const_vector_p (mode, op1))
return gen_const_vec_series (mode, op0, op1);
return 0;
gcc_assert (mode == GET_MODE_INNER (GET_MODE (trueop0)));
gcc_assert (GET_CODE (trueop1) == PARALLEL);
gcc_assert (XVECLEN (trueop1, 0) == 1);
- gcc_assert (CONST_INT_P (XVECEXP (trueop1, 0, 0)));
+
+ /* We can't reason about selections made at runtime. */
+ if (!CONST_INT_P (XVECEXP (trueop1, 0, 0)))
+ return 0;
if (vec_duplicate_p (trueop0, &elt0))
return elt0;
nested VEC_SELECT expressions. When input operand is a memory
operand, this operation can be simplified to a simple scalar
load from an offseted memory address. */
- if (GET_CODE (trueop0) == VEC_SELECT)
+ int n_elts;
+ if (GET_CODE (trueop0) == VEC_SELECT
+ && (GET_MODE_NUNITS (GET_MODE (XEXP (trueop0, 0)))
+ .is_constant (&n_elts)))
{
rtx op0 = XEXP (trueop0, 0);
rtx op1 = XEXP (trueop0, 1);
- int n_elts = GET_MODE_NUNITS (GET_MODE (op0));
-
int i = INTVAL (XVECEXP (trueop1, 0, 0));
int elem;
mode00 = GET_MODE (op00);
mode01 = GET_MODE (op01);
- /* Find out number of elements of each operand. */
- n_elts00 = GET_MODE_NUNITS (mode00);
- n_elts01 = GET_MODE_NUNITS (mode01);
+ /* Find out the number of elements of each operand.
+ Since the concatenated result has a constant number
+ of elements, the operands must too. */
+ n_elts00 = GET_MODE_NUNITS (mode00).to_constant ();
+ n_elts01 = GET_MODE_NUNITS (mode01).to_constant ();
gcc_assert (n_elts == n_elts00 + n_elts01);
if (GET_CODE (trueop0) == CONST_VECTOR)
{
- int elt_size = GET_MODE_UNIT_SIZE (mode);
- unsigned n_elts = (GET_MODE_SIZE (mode) / elt_size);
+ unsigned n_elts = XVECLEN (trueop1, 0);
rtvec v = rtvec_alloc (n_elts);
unsigned int i;
- gcc_assert (XVECLEN (trueop1, 0) == (int) n_elts);
+ gcc_assert (known_eq (n_elts, GET_MODE_NUNITS (mode)));
for (i = 0; i < n_elts; i++)
{
rtx x = XVECEXP (trueop1, 0, i);
- gcc_assert (CONST_INT_P (x));
+ if (!CONST_INT_P (x))
+ return 0;
+
RTVEC_ELT (v, i) = CONST_VECTOR_ELT (trueop0,
INTVAL (x));
}
}
/* If we select one half of a vec_concat, return that. */
+ int l0, l1;
if (GET_CODE (trueop0) == VEC_CONCAT
+ && (GET_MODE_NUNITS (GET_MODE (XEXP (trueop0, 0)))
+ .is_constant (&l0))
+ && (GET_MODE_NUNITS (GET_MODE (XEXP (trueop0, 1)))
+ .is_constant (&l1))
&& CONST_INT_P (XVECEXP (trueop1, 0, 0)))
{
rtx subop0 = XEXP (trueop0, 0);
rtx subop1 = XEXP (trueop0, 1);
machine_mode mode0 = GET_MODE (subop0);
machine_mode mode1 = GET_MODE (subop1);
- int l0 = GET_MODE_NUNITS (mode0);
- int l1 = GET_MODE_NUNITS (mode1);
int i0 = INTVAL (XVECEXP (trueop1, 0, 0));
if (i0 == 0 && !side_effects_p (op1) && mode == mode0)
{
&& GET_CODE (trueop0) == VEC_CONCAT)
{
rtx vec = trueop0;
- int offset = INTVAL (XVECEXP (trueop1, 0, 0)) * GET_MODE_SIZE (mode);
+ offset = INTVAL (XVECEXP (trueop1, 0, 0)) * GET_MODE_SIZE (mode);
/* Try to find the element in the VEC_CONCAT. */
while (GET_MODE (vec) != mode
&& GET_CODE (vec) == VEC_CONCAT)
{
- HOST_WIDE_INT vec_size;
+ poly_int64 vec_size;
if (CONST_INT_P (XEXP (vec, 0)))
{
else
vec_size = GET_MODE_SIZE (GET_MODE (XEXP (vec, 0)));
- if (offset < vec_size)
+ if (known_lt (offset, vec_size))
vec = XEXP (vec, 0);
- else
+ else if (known_ge (offset, vec_size))
{
offset -= vec_size;
vec = XEXP (vec, 1);
}
+ else
+ break;
vec = avoid_constant_pool_reference (vec);
}
{
rtx op0_subop1 = XEXP (trueop0, 1);
gcc_assert (GET_CODE (op0_subop1) == PARALLEL);
- gcc_assert (XVECLEN (trueop1, 0) == GET_MODE_NUNITS (mode));
+ gcc_assert (known_eq (XVECLEN (trueop1, 0), GET_MODE_NUNITS (mode)));
/* Apply the outer ordering vector to the inner one. (The inner
ordering vector is expressly permitted to be of a different
: GET_MODE_INNER (mode));
gcc_assert (VECTOR_MODE_P (mode));
- gcc_assert (GET_MODE_SIZE (op0_mode) + GET_MODE_SIZE (op1_mode)
- == GET_MODE_SIZE (mode));
+ gcc_assert (known_eq (GET_MODE_SIZE (op0_mode)
+ + GET_MODE_SIZE (op1_mode),
+ GET_MODE_SIZE (mode)));
if (VECTOR_MODE_P (op0_mode))
gcc_assert (GET_MODE_INNER (mode)
else
gcc_assert (GET_MODE_INNER (mode) == op1_mode);
+ unsigned int n_elts, in_n_elts;
if ((GET_CODE (trueop0) == CONST_VECTOR
|| CONST_SCALAR_INT_P (trueop0)
|| CONST_DOUBLE_AS_FLOAT_P (trueop0))
&& (GET_CODE (trueop1) == CONST_VECTOR
|| CONST_SCALAR_INT_P (trueop1)
- || CONST_DOUBLE_AS_FLOAT_P (trueop1)))
+ || CONST_DOUBLE_AS_FLOAT_P (trueop1))
+ && GET_MODE_NUNITS (mode).is_constant (&n_elts)
+ && GET_MODE_NUNITS (op0_mode).is_constant (&in_n_elts))
{
- unsigned n_elts = GET_MODE_NUNITS (mode);
- unsigned in_n_elts = GET_MODE_NUNITS (op0_mode);
rtvec v = rtvec_alloc (n_elts);
unsigned int i;
for (i = 0; i < n_elts; i++)
return 0;
}
+/* Return true if binary operation OP distributes over addition in operand
+ OPNO, with the other operand being held constant. OPNO counts from 1. */
+
+static bool
+distributes_over_addition_p (rtx_code op, int opno)
+{
+ switch (op)
+ {
+ case PLUS:
+ case MINUS:
+ case MULT:
+ return true;
+
+ case ASHIFT:
+ return opno == 1;
+
+ default:
+ return false;
+ }
+}
+
rtx
simplify_const_binary_operation (enum rtx_code code, machine_mode mode,
rtx op0, rtx op1)
&& GET_CODE (op0) == CONST_VECTOR
&& GET_CODE (op1) == CONST_VECTOR)
{
- unsigned int n_elts = CONST_VECTOR_NUNITS (op0);
- gcc_assert (n_elts == (unsigned int) CONST_VECTOR_NUNITS (op1));
- gcc_assert (n_elts == GET_MODE_NUNITS (mode));
- rtvec v = rtvec_alloc (n_elts);
- unsigned int i;
+ bool step_ok_p;
+ if (CONST_VECTOR_STEPPED_P (op0)
+ && CONST_VECTOR_STEPPED_P (op1))
+ /* We can operate directly on the encoding if:
+
+ a3 - a2 == a2 - a1 && b3 - b2 == b2 - b1
+ implies
+ (a3 op b3) - (a2 op b2) == (a2 op b2) - (a1 op b1)
+
+ Addition and subtraction are the supported operators
+ for which this is true. */
+ step_ok_p = (code == PLUS || code == MINUS);
+ else if (CONST_VECTOR_STEPPED_P (op0))
+ /* We can operate directly on stepped encodings if:
+
+ a3 - a2 == a2 - a1
+ implies:
+ (a3 op c) - (a2 op c) == (a2 op c) - (a1 op c)
+
+ which is true if (x -> x op c) distributes over addition. */
+ step_ok_p = distributes_over_addition_p (code, 1);
+ else
+ /* Similarly in reverse. */
+ step_ok_p = distributes_over_addition_p (code, 2);
+ rtx_vector_builder builder;
+ if (!builder.new_binary_operation (mode, op0, op1, step_ok_p))
+ return 0;
- for (i = 0; i < n_elts; i++)
+ unsigned int count = builder.encoded_nelts ();
+ for (unsigned int i = 0; i < count; i++)
{
rtx x = simplify_binary_operation (code, GET_MODE_INNER (mode),
CONST_VECTOR_ELT (op0, i),
CONST_VECTOR_ELT (op1, i));
- if (!x)
+ if (!x || !valid_for_const_vector_p (mode, x))
return 0;
- RTVEC_ELT (v, i) = x;
+ builder.quick_push (x);
}
-
- return gen_rtx_CONST_VECTOR (mode, v);
+ return builder.build ();
}
if (VECTOR_MODE_P (mode)
&& code == VEC_CONCAT
&& (CONST_SCALAR_INT_P (op0)
- || GET_CODE (op0) == CONST_FIXED
+ || CONST_FIXED_P (op0)
|| CONST_DOUBLE_AS_FLOAT_P (op0))
&& (CONST_SCALAR_INT_P (op1)
|| CONST_DOUBLE_AS_FLOAT_P (op1)
- || GET_CODE (op1) == CONST_FIXED))
+ || CONST_FIXED_P (op1)))
{
- unsigned n_elts = GET_MODE_NUNITS (mode);
+ /* Both inputs have a constant number of elements, so the result
+ must too. */
+ unsigned n_elts = GET_MODE_NUNITS (mode).to_constant ();
rtvec v = rtvec_alloc (n_elts);
gcc_assert (n_elts >= 2);
}
else
{
- unsigned op0_n_elts = GET_MODE_NUNITS (GET_MODE (op0));
- unsigned op1_n_elts = GET_MODE_NUNITS (GET_MODE (op1));
+ unsigned op0_n_elts = GET_MODE_NUNITS (GET_MODE (op0)).to_constant ();
+ unsigned op1_n_elts = GET_MODE_NUNITS (GET_MODE (op1)).to_constant ();
unsigned i;
gcc_assert (GET_CODE (op0) == CONST_VECTOR);
gcc_assert (op0_n_elts + op1_n_elts == n_elts);
for (i = 0; i < op0_n_elts; ++i)
- RTVEC_ELT (v, i) = XVECEXP (op0, 0, i);
+ RTVEC_ELT (v, i) = CONST_VECTOR_ELT (op0, i);
for (i = 0; i < op1_n_elts; ++i)
- RTVEC_ELT (v, op0_n_elts+i) = XVECEXP (op1, 0, i);
+ RTVEC_ELT (v, op0_n_elts+i) = CONST_VECTOR_ELT (op1, i);
}
return gen_rtx_CONST_VECTOR (mode, v);
scalar_int_mode int_mode;
if (is_a <scalar_int_mode> (mode, &int_mode)
&& CONST_SCALAR_INT_P (op0)
- && CONST_SCALAR_INT_P (op1))
+ && CONST_SCALAR_INT_P (op1)
+ && GET_MODE_PRECISION (int_mode) <= MAX_BITSIZE_MODE_ANY_INT)
{
wide_int result;
- bool overflow;
+ wi::overflow_type overflow;
rtx_mode_t pop0 = rtx_mode_t (op0, int_mode);
rtx_mode_t pop1 = rtx_mode_t (op1, int_mode);
}
break;
- case CONST_INT:
+ CASE_CONST_SCALAR_INT:
+ case CONST_POLY_INT:
n_constants++;
if (this_neg)
{
- ops[i].op = neg_const_int (mode, this_op);
+ ops[i].op = neg_poly_int_rtx (mode, this_op);
ops[i].neg = 0;
changed = 1;
canonicalized = 1;
lneg &= rneg;
if (GET_CODE (tem) == NEG)
tem = XEXP (tem, 0), lneg = !lneg;
- if (CONST_INT_P (tem) && lneg)
- tem = neg_const_int (mode, tem), lneg = 0;
+ if (poly_int_rtx_p (tem) && lneg)
+ tem = neg_poly_int_rtx (mode, tem), lneg = 0;
ops[i].op = tem;
ops[i].neg = lneg;
in the array and that any other constant will be next-to-last. */
if (n_ops > 1
- && CONST_INT_P (ops[n_ops - 1].op)
+ && poly_int_rtx_p (ops[n_ops - 1].op)
&& CONSTANT_P (ops[n_ops - 2].op))
{
rtx value = ops[n_ops - 1].op;
if (ops[n_ops - 1].neg ^ ops[n_ops - 2].neg)
- value = neg_const_int (mode, value);
+ value = neg_poly_int_rtx (mode, value);
if (CONST_INT_P (value))
{
ops[n_ops - 2].op = plus_constant (mode, ops[n_ops - 2].op,
return NULL_RTX;
#endif
}
+ /* For vector comparison with scalar int result, it is unknown
+ if the target means here a comparison into an integral bitmask,
+ or comparison where all comparisons true mean const_true_rtx
+ whole result, or where any comparisons true mean const_true_rtx
+ whole result. For const0_rtx all the cases are the same. */
+ if (VECTOR_MODE_P (cmp_mode)
+ && SCALAR_INT_MODE_P (mode)
+ && tem == const_true_rtx)
+ return NULL_RTX;
return tem;
}
simplify_gen_binary (XOR, cmp_mode,
XEXP (op0, 1), op1));
- /* (eq/ne (and x y) x) simplifies to (eq/ne (and (not y) x) 0), which
- can be implemented with a BICS instruction on some targets, or
- constant-folded if y is a constant. */
+ /* Simplify eq/ne (and/ior x y) x/y) for targets with a BICS instruction or
+ constant folding if x/y is a constant. */
if ((code == EQ || code == NE)
- && op0code == AND
- && rtx_equal_p (XEXP (op0, 0), op1)
+ && (op0code == AND || op0code == IOR)
&& !side_effects_p (op1)
&& op1 != CONST0_RTX (cmp_mode))
{
- rtx not_y = simplify_gen_unary (NOT, cmp_mode, XEXP (op0, 1), cmp_mode);
- rtx lhs = simplify_gen_binary (AND, cmp_mode, not_y, XEXP (op0, 0));
+ /* Both (eq/ne (and x y) x) and (eq/ne (ior x y) y) simplify to
+ (eq/ne (and (not y) x) 0). */
+ if ((op0code == AND && rtx_equal_p (XEXP (op0, 0), op1))
+ || (op0code == IOR && rtx_equal_p (XEXP (op0, 1), op1)))
+ {
+ rtx not_y = simplify_gen_unary (NOT, cmp_mode, XEXP (op0, 1),
+ cmp_mode);
+ rtx lhs = simplify_gen_binary (AND, cmp_mode, not_y, XEXP (op0, 0));
- return simplify_gen_relational (code, mode, cmp_mode, lhs,
- CONST0_RTX (cmp_mode));
- }
+ return simplify_gen_relational (code, mode, cmp_mode, lhs,
+ CONST0_RTX (cmp_mode));
+ }
- /* Likewise for (eq/ne (and x y) y). */
- if ((code == EQ || code == NE)
- && op0code == AND
- && rtx_equal_p (XEXP (op0, 1), op1)
- && !side_effects_p (op1)
- && op1 != CONST0_RTX (cmp_mode))
- {
- rtx not_x = simplify_gen_unary (NOT, cmp_mode, XEXP (op0, 0), cmp_mode);
- rtx lhs = simplify_gen_binary (AND, cmp_mode, not_x, XEXP (op0, 1));
+ /* Both (eq/ne (and x y) y) and (eq/ne (ior x y) x) simplify to
+ (eq/ne (and (not x) y) 0). */
+ if ((op0code == AND && rtx_equal_p (XEXP (op0, 1), op1))
+ || (op0code == IOR && rtx_equal_p (XEXP (op0, 0), op1)))
+ {
+ rtx not_x = simplify_gen_unary (NOT, cmp_mode, XEXP (op0, 0),
+ cmp_mode);
+ rtx lhs = simplify_gen_binary (AND, cmp_mode, not_x, XEXP (op0, 1));
- return simplify_gen_relational (code, mode, cmp_mode, lhs,
- CONST0_RTX (cmp_mode));
+ return simplify_gen_relational (code, mode, cmp_mode, lhs,
+ CONST0_RTX (cmp_mode));
+ }
}
/* (eq/ne (bswap x) C1) simplifies to (eq/ne x C2) with C2 swapped. */
}
/* Check if the given comparison (done in the given MODE) is actually
- a tautology or a contradiction. If the mode is VOID_mode, the
+ a tautology or a contradiction. If the mode is VOIDmode, the
comparison is done in "infinite precision". If no simplification
is possible, this function returns zero. Otherwise, it returns
either const_true_rtx or const0_rtx. */
return NULL_RTX;
}
+/* Try to simplify X given that it appears within operand OP of a
+ VEC_MERGE operation whose mask is MASK. X need not use the same
+ vector mode as the VEC_MERGE, but it must have the same number of
+ elements.
+
+ Return the simplified X on success, otherwise return NULL_RTX. */
+
+rtx
+simplify_merge_mask (rtx x, rtx mask, int op)
+{
+ gcc_assert (VECTOR_MODE_P (GET_MODE (x)));
+ poly_uint64 nunits = GET_MODE_NUNITS (GET_MODE (x));
+ if (GET_CODE (x) == VEC_MERGE && rtx_equal_p (XEXP (x, 2), mask))
+ {
+ if (side_effects_p (XEXP (x, 1 - op)))
+ return NULL_RTX;
+
+ return XEXP (x, op);
+ }
+ if (UNARY_P (x)
+ && VECTOR_MODE_P (GET_MODE (XEXP (x, 0)))
+ && known_eq (GET_MODE_NUNITS (GET_MODE (XEXP (x, 0))), nunits))
+ {
+ rtx top0 = simplify_merge_mask (XEXP (x, 0), mask, op);
+ if (top0)
+ return simplify_gen_unary (GET_CODE (x), GET_MODE (x), top0,
+ GET_MODE (XEXP (x, 0)));
+ }
+ if (BINARY_P (x)
+ && VECTOR_MODE_P (GET_MODE (XEXP (x, 0)))
+ && known_eq (GET_MODE_NUNITS (GET_MODE (XEXP (x, 0))), nunits)
+ && VECTOR_MODE_P (GET_MODE (XEXP (x, 1)))
+ && known_eq (GET_MODE_NUNITS (GET_MODE (XEXP (x, 1))), nunits))
+ {
+ rtx top0 = simplify_merge_mask (XEXP (x, 0), mask, op);
+ rtx top1 = simplify_merge_mask (XEXP (x, 1), mask, op);
+ if (top0 || top1)
+ {
+ if (COMPARISON_P (x))
+ return simplify_gen_relational (GET_CODE (x), GET_MODE (x),
+ GET_MODE (XEXP (x, 0)) != VOIDmode
+ ? GET_MODE (XEXP (x, 0))
+ : GET_MODE (XEXP (x, 1)),
+ top0 ? top0 : XEXP (x, 0),
+ top1 ? top1 : XEXP (x, 1));
+ else
+ return simplify_gen_binary (GET_CODE (x), GET_MODE (x),
+ top0 ? top0 : XEXP (x, 0),
+ top1 ? top1 : XEXP (x, 1));
+ }
+ }
+ if (GET_RTX_CLASS (GET_CODE (x)) == RTX_TERNARY
+ && VECTOR_MODE_P (GET_MODE (XEXP (x, 0)))
+ && known_eq (GET_MODE_NUNITS (GET_MODE (XEXP (x, 0))), nunits)
+ && VECTOR_MODE_P (GET_MODE (XEXP (x, 1)))
+ && known_eq (GET_MODE_NUNITS (GET_MODE (XEXP (x, 1))), nunits)
+ && VECTOR_MODE_P (GET_MODE (XEXP (x, 2)))
+ && known_eq (GET_MODE_NUNITS (GET_MODE (XEXP (x, 2))), nunits))
+ {
+ rtx top0 = simplify_merge_mask (XEXP (x, 0), mask, op);
+ rtx top1 = simplify_merge_mask (XEXP (x, 1), mask, op);
+ rtx top2 = simplify_merge_mask (XEXP (x, 2), mask, op);
+ if (top0 || top1 || top2)
+ return simplify_gen_ternary (GET_CODE (x), GET_MODE (x),
+ GET_MODE (XEXP (x, 0)),
+ top0 ? top0 : XEXP (x, 0),
+ top1 ? top1 : XEXP (x, 1),
+ top2 ? top2 : XEXP (x, 2));
+ }
+ return NULL_RTX;
+}
+
\f
/* Simplify CODE, an operation with result mode MODE and three operands,
OP0, OP1, and OP2. OP0_MODE was the mode of OP0 before it became
bool any_change = false;
rtx tem, trueop2;
scalar_int_mode int_mode, int_op0_mode;
+ unsigned int n_elts;
switch (code)
{
&& GET_CODE (XEXP (op0, 1)) == CONST_VECTOR)
{
rtx cv = XEXP (op0, 1);
- int nunits = CONST_VECTOR_NUNITS (cv);
+ int nunits;
bool ok = true;
- for (int i = 0; i < nunits; ++i)
- if (CONST_VECTOR_ELT (cv, i) != const0_rtx)
- {
- ok = false;
- break;
- }
+ if (!CONST_VECTOR_NUNITS (cv).is_constant (&nunits))
+ ok = false;
+ else
+ for (int i = 0; i < nunits; ++i)
+ if (CONST_VECTOR_ELT (cv, i) != const0_rtx)
+ {
+ ok = false;
+ break;
+ }
if (ok)
{
rtx new_op0 = gen_rtx_NE (GET_MODE (op0),
gcc_assert (GET_MODE (op1) == mode);
gcc_assert (VECTOR_MODE_P (mode));
trueop2 = avoid_constant_pool_reference (op2);
- if (CONST_INT_P (trueop2))
+ if (CONST_INT_P (trueop2)
+ && GET_MODE_NUNITS (mode).is_constant (&n_elts))
{
- unsigned n_elts = GET_MODE_NUNITS (mode);
unsigned HOST_WIDE_INT sel = UINTVAL (trueop2);
unsigned HOST_WIDE_INT mask;
if (n_elts == HOST_BITS_PER_WIDE_INT)
if (GET_CODE (op0) == VEC_DUPLICATE
&& GET_CODE (XEXP (op0, 0)) == VEC_SELECT
&& GET_CODE (XEXP (XEXP (op0, 0), 1)) == PARALLEL
- && mode_nunits[GET_MODE (XEXP (op0, 0))] == 1)
+ && known_eq (GET_MODE_NUNITS (GET_MODE (XEXP (op0, 0))), 1))
{
tem = XVECEXP ((XEXP (XEXP (op0, 0), 1)), 0, 0);
if (CONST_INT_P (tem) && CONST_INT_P (op2))
(vec_concat (A) (X)) if N == 2. */
if (GET_CODE (op0) == VEC_DUPLICATE
&& GET_CODE (op1) == CONST_VECTOR
- && CONST_VECTOR_NUNITS (op1) == 2
- && GET_MODE_NUNITS (GET_MODE (op0)) == 2
+ && known_eq (CONST_VECTOR_NUNITS (op1), 2)
+ && known_eq (GET_MODE_NUNITS (GET_MODE (op0)), 2)
&& IN_RANGE (sel, 1, 2))
{
rtx newop0 = XEXP (op0, 0);
Only applies for vectors of two elements. */
if (GET_CODE (op0) == VEC_DUPLICATE
&& GET_CODE (op1) == VEC_CONCAT
- && GET_MODE_NUNITS (GET_MODE (op0)) == 2
- && GET_MODE_NUNITS (GET_MODE (op1)) == 2
+ && known_eq (GET_MODE_NUNITS (GET_MODE (op0)), 2)
+ && known_eq (GET_MODE_NUNITS (GET_MODE (op1)), 2)
&& IN_RANGE (sel, 1, 2))
{
rtx newop0 = XEXP (op0, 0);
return simplify_gen_binary (VEC_CONCAT, mode, newop0, newop1);
}
+ /* Replace:
+
+ (vec_merge:outer (vec_duplicate:outer x:inner)
+ (subreg:outer y:inner 0)
+ (const_int N))
+
+ with (vec_concat:outer x:inner y:inner) if N == 1,
+ or (vec_concat:outer y:inner x:inner) if N == 2.
+
+ Implicitly, this means we have a paradoxical subreg, but such
+ a check is cheap, so make it anyway.
+
+ Only applies for vectors of two elements. */
+ if (GET_CODE (op0) == VEC_DUPLICATE
+ && GET_CODE (op1) == SUBREG
+ && GET_MODE (op1) == GET_MODE (op0)
+ && GET_MODE (SUBREG_REG (op1)) == GET_MODE (XEXP (op0, 0))
+ && paradoxical_subreg_p (op1)
+ && subreg_lowpart_p (op1)
+ && known_eq (GET_MODE_NUNITS (GET_MODE (op0)), 2)
+ && known_eq (GET_MODE_NUNITS (GET_MODE (op1)), 2)
+ && IN_RANGE (sel, 1, 2))
+ {
+ rtx newop0 = XEXP (op0, 0);
+ rtx newop1 = SUBREG_REG (op1);
+ if (sel == 2)
+ std::swap (newop0, newop1);
+ return simplify_gen_binary (VEC_CONCAT, mode, newop0, newop1);
+ }
+
+ /* Same as above but with switched operands:
+ Replace (vec_merge:outer (subreg:outer x:inner 0)
+ (vec_duplicate:outer y:inner)
+ (const_int N))
+
+ with (vec_concat:outer x:inner y:inner) if N == 1,
+ or (vec_concat:outer y:inner x:inner) if N == 2. */
+ if (GET_CODE (op1) == VEC_DUPLICATE
+ && GET_CODE (op0) == SUBREG
+ && GET_MODE (op0) == GET_MODE (op1)
+ && GET_MODE (SUBREG_REG (op0)) == GET_MODE (XEXP (op1, 0))
+ && paradoxical_subreg_p (op0)
+ && subreg_lowpart_p (op0)
+ && known_eq (GET_MODE_NUNITS (GET_MODE (op1)), 2)
+ && known_eq (GET_MODE_NUNITS (GET_MODE (op0)), 2)
+ && IN_RANGE (sel, 1, 2))
+ {
+ rtx newop0 = SUBREG_REG (op0);
+ rtx newop1 = XEXP (op1, 0);
+ if (sel == 2)
+ std::swap (newop0, newop1);
+ return simplify_gen_binary (VEC_CONCAT, mode, newop0, newop1);
+ }
+
/* Replace (vec_merge (vec_duplicate x) (vec_duplicate y)
(const_int n))
with (vec_concat x y) or (vec_concat y x) depending on value
of N. */
if (GET_CODE (op0) == VEC_DUPLICATE
&& GET_CODE (op1) == VEC_DUPLICATE
- && GET_MODE_NUNITS (GET_MODE (op0)) == 2
- && GET_MODE_NUNITS (GET_MODE (op1)) == 2
+ && known_eq (GET_MODE_NUNITS (GET_MODE (op0)), 2)
+ && known_eq (GET_MODE_NUNITS (GET_MODE (op1)), 2)
&& IN_RANGE (sel, 1, 2))
{
rtx newop0 = XEXP (op0, 0);
&& !side_effects_p (op2) && !side_effects_p (op1))
return op0;
+ if (!side_effects_p (op2))
+ {
+ rtx top0
+ = may_trap_p (op0) ? NULL_RTX : simplify_merge_mask (op0, op2, 0);
+ rtx top1
+ = may_trap_p (op1) ? NULL_RTX : simplify_merge_mask (op1, op2, 1);
+ if (top0 || top1)
+ return simplify_gen_ternary (code, mode, mode,
+ top0 ? top0 : op0,
+ top1 ? top1 : op1, op2);
+ }
+
break;
default:
return 0;
}
-/* Evaluate a SUBREG of a CONST_INT or CONST_WIDE_INT or CONST_DOUBLE
- or CONST_FIXED or CONST_VECTOR, returning another CONST_INT or
- CONST_WIDE_INT or CONST_DOUBLE or CONST_FIXED or CONST_VECTOR.
+/* Try to calculate NUM_BYTES bytes of the target memory image of X,
+ starting at byte FIRST_BYTE. Return true on success and add the
+ bytes to BYTES, such that each byte has BITS_PER_UNIT bits and such
+ that the bytes follow target memory order. Leave BYTES unmodified
+ on failure.
- Works by unpacking OP into a collection of 8-bit values
- represented as a little-endian array of 'unsigned char', selecting by BYTE,
- and then repacking them again for OUTERMODE. */
+ MODE is the mode of X. The caller must reserve NUM_BYTES bytes in
+ BYTES before calling this function. */
-static rtx
-simplify_immed_subreg (fixed_size_mode outermode, rtx op,
- fixed_size_mode innermode, unsigned int byte)
+bool
+native_encode_rtx (machine_mode mode, rtx x, vec<target_unit> &bytes,
+ unsigned int first_byte, unsigned int num_bytes)
{
- enum {
- value_bit = 8,
- value_mask = (1 << value_bit) - 1
- };
- unsigned char value[MAX_BITSIZE_MODE_ANY_MODE / value_bit];
- int value_start;
- int i;
- int elem;
-
- int num_elem;
- rtx * elems;
- int elem_bitsize;
- rtx result_s = NULL;
- rtvec result_v = NULL;
- enum mode_class outer_class;
- scalar_mode outer_submode;
- int max_bitsize;
+ /* Check the mode is sensible. */
+ gcc_assert (GET_MODE (x) == VOIDmode
+ ? is_a <scalar_int_mode> (mode)
+ : mode == GET_MODE (x));
- /* Some ports misuse CCmode. */
- if (GET_MODE_CLASS (outermode) == MODE_CC && CONST_INT_P (op))
- return op;
+ if (GET_CODE (x) == CONST_VECTOR)
+ {
+ /* CONST_VECTOR_ELT follows target memory order, so no shuffling
+ is necessary. The only complication is that MODE_VECTOR_BOOL
+ vectors can have several elements per byte. */
+ unsigned int elt_bits = vector_element_size (GET_MODE_BITSIZE (mode),
+ GET_MODE_NUNITS (mode));
+ unsigned int elt = first_byte * BITS_PER_UNIT / elt_bits;
+ if (elt_bits < BITS_PER_UNIT)
+ {
+ /* This is the only case in which elements can be smaller than
+ a byte. */
+ gcc_assert (GET_MODE_CLASS (mode) == MODE_VECTOR_BOOL);
+ for (unsigned int i = 0; i < num_bytes; ++i)
+ {
+ target_unit value = 0;
+ for (unsigned int j = 0; j < BITS_PER_UNIT; j += elt_bits)
+ {
+ value |= (INTVAL (CONST_VECTOR_ELT (x, elt)) & 1) << j;
+ elt += 1;
+ }
+ bytes.quick_push (value);
+ }
+ return true;
+ }
- /* We have no way to represent a complex constant at the rtl level. */
- if (COMPLEX_MODE_P (outermode))
- return NULL_RTX;
+ unsigned int start = bytes.length ();
+ unsigned int elt_bytes = GET_MODE_UNIT_SIZE (mode);
+ /* Make FIRST_BYTE relative to ELT. */
+ first_byte %= elt_bytes;
+ while (num_bytes > 0)
+ {
+ /* Work out how many bytes we want from element ELT. */
+ unsigned int chunk_bytes = MIN (num_bytes, elt_bytes - first_byte);
+ if (!native_encode_rtx (GET_MODE_INNER (mode),
+ CONST_VECTOR_ELT (x, elt), bytes,
+ first_byte, chunk_bytes))
+ {
+ bytes.truncate (start);
+ return false;
+ }
+ elt += 1;
+ first_byte = 0;
+ num_bytes -= chunk_bytes;
+ }
+ return true;
+ }
- /* We support any size mode. */
- max_bitsize = MAX (GET_MODE_BITSIZE (outermode),
- GET_MODE_BITSIZE (innermode));
+ /* All subsequent cases are limited to scalars. */
+ scalar_mode smode;
+ if (!is_a <scalar_mode> (mode, &smode))
+ return false;
- /* Unpack the value. */
+ /* Make sure that the region is in range. */
+ unsigned int end_byte = first_byte + num_bytes;
+ unsigned int mode_bytes = GET_MODE_SIZE (smode);
+ gcc_assert (end_byte <= mode_bytes);
- if (GET_CODE (op) == CONST_VECTOR)
+ if (CONST_SCALAR_INT_P (x))
{
- num_elem = CONST_VECTOR_NUNITS (op);
- elems = &CONST_VECTOR_ELT (op, 0);
- elem_bitsize = GET_MODE_UNIT_BITSIZE (innermode);
+ /* The target memory layout is affected by both BYTES_BIG_ENDIAN
+ and WORDS_BIG_ENDIAN. Use the subreg machinery to get the lsb
+ position of each byte. */
+ rtx_mode_t value (x, smode);
+ wide_int_ref value_wi (value);
+ for (unsigned int byte = first_byte; byte < end_byte; ++byte)
+ {
+ /* Always constant because the inputs are. */
+ unsigned int lsb
+ = subreg_size_lsb (1, mode_bytes, byte).to_constant ();
+ /* Operate directly on the encoding rather than using
+ wi::extract_uhwi, so that we preserve the sign or zero
+ extension for modes that are not a whole number of bits in
+ size. (Zero extension is only used for the combination of
+ innermode == BImode && STORE_FLAG_VALUE == 1). */
+ unsigned int elt = lsb / HOST_BITS_PER_WIDE_INT;
+ unsigned int shift = lsb % HOST_BITS_PER_WIDE_INT;
+ unsigned HOST_WIDE_INT uhwi = value_wi.elt (elt);
+ bytes.quick_push (uhwi >> shift);
+ }
+ return true;
}
- else
+
+ if (CONST_DOUBLE_P (x))
{
- num_elem = 1;
- elems = &op;
- elem_bitsize = max_bitsize;
+ /* real_to_target produces an array of integers in target memory order.
+ All integers before the last one have 32 bits; the last one may
+ have 32 bits or fewer, depending on whether the mode bitsize
+ is divisible by 32. Each of these integers is then laid out
+ in target memory as any other integer would be. */
+ long el32[MAX_BITSIZE_MODE_ANY_MODE / 32];
+ real_to_target (el32, CONST_DOUBLE_REAL_VALUE (x), smode);
+
+ /* The (maximum) number of target bytes per element of el32. */
+ unsigned int bytes_per_el32 = 32 / BITS_PER_UNIT;
+ gcc_assert (bytes_per_el32 != 0);
+
+ /* Build up the integers in a similar way to the CONST_SCALAR_INT_P
+ handling above. */
+ for (unsigned int byte = first_byte; byte < end_byte; ++byte)
+ {
+ unsigned int index = byte / bytes_per_el32;
+ unsigned int subbyte = byte % bytes_per_el32;
+ unsigned int int_bytes = MIN (bytes_per_el32,
+ mode_bytes - index * bytes_per_el32);
+ /* Always constant because the inputs are. */
+ unsigned int lsb
+ = subreg_size_lsb (1, int_bytes, subbyte).to_constant ();
+ bytes.quick_push ((unsigned long) el32[index] >> lsb);
+ }
+ return true;
}
- /* If this asserts, it is too complicated; reducing value_bit may help. */
- gcc_assert (BITS_PER_UNIT % value_bit == 0);
- /* I don't know how to handle endianness of sub-units. */
- gcc_assert (elem_bitsize % BITS_PER_UNIT == 0);
- for (elem = 0; elem < num_elem; elem++)
+ if (GET_CODE (x) == CONST_FIXED)
{
- unsigned char * vp;
- rtx el = elems[elem];
+ for (unsigned int byte = first_byte; byte < end_byte; ++byte)
+ {
+ /* Always constant because the inputs are. */
+ unsigned int lsb
+ = subreg_size_lsb (1, mode_bytes, byte).to_constant ();
+ unsigned HOST_WIDE_INT piece = CONST_FIXED_VALUE_LOW (x);
+ if (lsb >= HOST_BITS_PER_WIDE_INT)
+ {
+ lsb -= HOST_BITS_PER_WIDE_INT;
+ piece = CONST_FIXED_VALUE_HIGH (x);
+ }
+ bytes.quick_push (piece >> lsb);
+ }
+ return true;
+ }
- /* Vectors are kept in target memory order. (This is probably
- a mistake.) */
- {
- unsigned byte = (elem * elem_bitsize) / BITS_PER_UNIT;
- unsigned ibyte = (((num_elem - 1 - elem) * elem_bitsize)
- / BITS_PER_UNIT);
- unsigned word_byte = WORDS_BIG_ENDIAN ? ibyte : byte;
- unsigned subword_byte = BYTES_BIG_ENDIAN ? ibyte : byte;
- unsigned bytele = (subword_byte % UNITS_PER_WORD
- + (word_byte / UNITS_PER_WORD) * UNITS_PER_WORD);
- vp = value + (bytele * BITS_PER_UNIT) / value_bit;
- }
+ return false;
+}
- switch (GET_CODE (el))
- {
- case CONST_INT:
- for (i = 0;
- i < HOST_BITS_PER_WIDE_INT && i < elem_bitsize;
- i += value_bit)
- *vp++ = INTVAL (el) >> i;
- /* CONST_INTs are always logically sign-extended. */
- for (; i < elem_bitsize; i += value_bit)
- *vp++ = INTVAL (el) < 0 ? -1 : 0;
- break;
+/* Read a vector of mode MODE from the target memory image given by BYTES,
+ starting at byte FIRST_BYTE. The vector is known to be encodable using
+ NPATTERNS interleaved patterns with NELTS_PER_PATTERN elements each,
+ and BYTES is known to have enough bytes to supply NPATTERNS *
+ NELTS_PER_PATTERN vector elements. Each element of BYTES contains
+ BITS_PER_UNIT bits and the bytes are in target memory order.
- case CONST_WIDE_INT:
- {
- rtx_mode_t val = rtx_mode_t (el, GET_MODE_INNER (innermode));
- unsigned char extend = wi::sign_mask (val);
- int prec = wi::get_precision (val);
-
- for (i = 0; i < prec && i < elem_bitsize; i += value_bit)
- *vp++ = wi::extract_uhwi (val, i, value_bit);
- for (; i < elem_bitsize; i += value_bit)
- *vp++ = extend;
- }
- break;
+ Return the vector on success, otherwise return NULL_RTX. */
- case CONST_DOUBLE:
- if (TARGET_SUPPORTS_WIDE_INT == 0 && GET_MODE (el) == VOIDmode)
- {
- unsigned char extend = 0;
- /* If this triggers, someone should have generated a
- CONST_INT instead. */
- gcc_assert (elem_bitsize > HOST_BITS_PER_WIDE_INT);
-
- for (i = 0; i < HOST_BITS_PER_WIDE_INT; i += value_bit)
- *vp++ = CONST_DOUBLE_LOW (el) >> i;
- while (i < HOST_BITS_PER_DOUBLE_INT && i < elem_bitsize)
- {
- *vp++
- = CONST_DOUBLE_HIGH (el) >> (i - HOST_BITS_PER_WIDE_INT);
- i += value_bit;
- }
+rtx
+native_decode_vector_rtx (machine_mode mode, vec<target_unit> bytes,
+ unsigned int first_byte, unsigned int npatterns,
+ unsigned int nelts_per_pattern)
+{
+ rtx_vector_builder builder (mode, npatterns, nelts_per_pattern);
- if (CONST_DOUBLE_HIGH (el) >> (HOST_BITS_PER_WIDE_INT - 1))
- extend = -1;
- for (; i < elem_bitsize; i += value_bit)
- *vp++ = extend;
- }
- else
- {
- /* This is big enough for anything on the platform. */
- long tmp[MAX_BITSIZE_MODE_ANY_MODE / 32];
- scalar_float_mode el_mode;
+ unsigned int elt_bits = vector_element_size (GET_MODE_BITSIZE (mode),
+ GET_MODE_NUNITS (mode));
+ if (elt_bits < BITS_PER_UNIT)
+ {
+ /* This is the only case in which elements can be smaller than a byte.
+ Element 0 is always in the lsb of the containing byte. */
+ gcc_assert (GET_MODE_CLASS (mode) == MODE_VECTOR_BOOL);
+ for (unsigned int i = 0; i < builder.encoded_nelts (); ++i)
+ {
+ unsigned int bit_index = first_byte * BITS_PER_UNIT + i * elt_bits;
+ unsigned int byte_index = bit_index / BITS_PER_UNIT;
+ unsigned int lsb = bit_index % BITS_PER_UNIT;
+ builder.quick_push (bytes[byte_index] & (1 << lsb)
+ ? CONST1_RTX (BImode)
+ : CONST0_RTX (BImode));
+ }
+ }
+ else
+ {
+ for (unsigned int i = 0; i < builder.encoded_nelts (); ++i)
+ {
+ rtx x = native_decode_rtx (GET_MODE_INNER (mode), bytes, first_byte);
+ if (!x)
+ return NULL_RTX;
+ builder.quick_push (x);
+ first_byte += elt_bits / BITS_PER_UNIT;
+ }
+ }
+ return builder.build ();
+}
- el_mode = as_a <scalar_float_mode> (GET_MODE (el));
- int bitsize = GET_MODE_BITSIZE (el_mode);
+/* Read an rtx of mode MODE from the target memory image given by BYTES,
+ starting at byte FIRST_BYTE. Each element of BYTES contains BITS_PER_UNIT
+ bits and the bytes are in target memory order. The image has enough
+ values to specify all bytes of MODE.
- gcc_assert (bitsize <= elem_bitsize);
- gcc_assert (bitsize % value_bit == 0);
+ Return the rtx on success, otherwise return NULL_RTX. */
- real_to_target (tmp, CONST_DOUBLE_REAL_VALUE (el),
- GET_MODE (el));
+rtx
+native_decode_rtx (machine_mode mode, vec<target_unit> bytes,
+ unsigned int first_byte)
+{
+ if (VECTOR_MODE_P (mode))
+ {
+ /* If we know at compile time how many elements there are,
+ pull each element directly from BYTES. */
+ unsigned int nelts;
+ if (GET_MODE_NUNITS (mode).is_constant (&nelts))
+ return native_decode_vector_rtx (mode, bytes, first_byte, nelts, 1);
+ return NULL_RTX;
+ }
- /* real_to_target produces its result in words affected by
- FLOAT_WORDS_BIG_ENDIAN. However, we ignore this,
- and use WORDS_BIG_ENDIAN instead; see the documentation
- of SUBREG in rtl.texi. */
- for (i = 0; i < bitsize; i += value_bit)
- {
- int ibase;
- if (WORDS_BIG_ENDIAN)
- ibase = bitsize - 1 - i;
- else
- ibase = i;
- *vp++ = tmp[ibase / 32] >> i % 32;
- }
+ scalar_int_mode imode;
+ if (is_a <scalar_int_mode> (mode, &imode)
+ && GET_MODE_PRECISION (imode) <= MAX_BITSIZE_MODE_ANY_INT)
+ {
+ /* Pull the bytes msb first, so that we can use simple
+ shift-and-insert wide_int operations. */
+ unsigned int size = GET_MODE_SIZE (imode);
+ wide_int result (wi::zero (GET_MODE_PRECISION (imode)));
+ for (unsigned int i = 0; i < size; ++i)
+ {
+ unsigned int lsb = (size - i - 1) * BITS_PER_UNIT;
+ /* Always constant because the inputs are. */
+ unsigned int subbyte
+ = subreg_size_offset_from_lsb (1, size, lsb).to_constant ();
+ result <<= BITS_PER_UNIT;
+ result |= bytes[first_byte + subbyte];
+ }
+ return immed_wide_int_const (result, imode);
+ }
- /* It shouldn't matter what's done here, so fill it with
- zero. */
- for (; i < elem_bitsize; i += value_bit)
- *vp++ = 0;
- }
- break;
+ scalar_float_mode fmode;
+ if (is_a <scalar_float_mode> (mode, &fmode))
+ {
+ /* We need to build an array of integers in target memory order.
+ All integers before the last one have 32 bits; the last one may
+ have 32 bits or fewer, depending on whether the mode bitsize
+ is divisible by 32. */
+ long el32[MAX_BITSIZE_MODE_ANY_MODE / 32];
+ unsigned int num_el32 = CEIL (GET_MODE_BITSIZE (fmode), 32);
+ memset (el32, 0, num_el32 * sizeof (long));
+
+ /* The (maximum) number of target bytes per element of el32. */
+ unsigned int bytes_per_el32 = 32 / BITS_PER_UNIT;
+ gcc_assert (bytes_per_el32 != 0);
+
+ unsigned int mode_bytes = GET_MODE_SIZE (fmode);
+ for (unsigned int byte = 0; byte < mode_bytes; ++byte)
+ {
+ unsigned int index = byte / bytes_per_el32;
+ unsigned int subbyte = byte % bytes_per_el32;
+ unsigned int int_bytes = MIN (bytes_per_el32,
+ mode_bytes - index * bytes_per_el32);
+ /* Always constant because the inputs are. */
+ unsigned int lsb
+ = subreg_size_lsb (1, int_bytes, subbyte).to_constant ();
+ el32[index] |= (unsigned long) bytes[first_byte + byte] << lsb;
+ }
+ REAL_VALUE_TYPE r;
+ real_from_target (&r, el32, fmode);
+ return const_double_from_real_value (r, fmode);
+ }
- case CONST_FIXED:
- if (elem_bitsize <= HOST_BITS_PER_WIDE_INT)
- {
- for (i = 0; i < elem_bitsize; i += value_bit)
- *vp++ = CONST_FIXED_VALUE_LOW (el) >> i;
- }
+ if (ALL_SCALAR_FIXED_POINT_MODE_P (mode))
+ {
+ scalar_mode smode = as_a <scalar_mode> (mode);
+ FIXED_VALUE_TYPE f;
+ f.data.low = 0;
+ f.data.high = 0;
+ f.mode = smode;
+
+ unsigned int mode_bytes = GET_MODE_SIZE (smode);
+ for (unsigned int byte = 0; byte < mode_bytes; ++byte)
+ {
+ /* Always constant because the inputs are. */
+ unsigned int lsb
+ = subreg_size_lsb (1, mode_bytes, byte).to_constant ();
+ unsigned HOST_WIDE_INT unit = bytes[first_byte + byte];
+ if (lsb >= HOST_BITS_PER_WIDE_INT)
+ f.data.high |= unit << (lsb - HOST_BITS_PER_WIDE_INT);
else
- {
- for (i = 0; i < HOST_BITS_PER_WIDE_INT; i += value_bit)
- *vp++ = CONST_FIXED_VALUE_LOW (el) >> i;
- for (; i < HOST_BITS_PER_DOUBLE_INT && i < elem_bitsize;
- i += value_bit)
- *vp++ = CONST_FIXED_VALUE_HIGH (el)
- >> (i - HOST_BITS_PER_WIDE_INT);
- for (; i < elem_bitsize; i += value_bit)
- *vp++ = 0;
- }
- break;
-
- default:
- gcc_unreachable ();
+ f.data.low |= unit << lsb;
}
+ return CONST_FIXED_FROM_FIXED_VALUE (f, mode);
}
- /* Now, pick the right byte to start with. */
- /* Renumber BYTE so that the least-significant byte is byte 0. A special
- case is paradoxical SUBREGs, which shouldn't be adjusted since they
- will already have offset 0. */
- if (GET_MODE_SIZE (innermode) >= GET_MODE_SIZE (outermode))
+ return NULL_RTX;
+}
+
+/* Simplify a byte offset BYTE into CONST_VECTOR X. The main purpose
+ is to convert a runtime BYTE value into a constant one. */
+
+static poly_uint64
+simplify_const_vector_byte_offset (rtx x, poly_uint64 byte)
+{
+ /* Cope with MODE_VECTOR_BOOL by operating on bits rather than bytes. */
+ machine_mode mode = GET_MODE (x);
+ unsigned int elt_bits = vector_element_size (GET_MODE_BITSIZE (mode),
+ GET_MODE_NUNITS (mode));
+ /* The number of bits needed to encode one element from each pattern. */
+ unsigned int sequence_bits = CONST_VECTOR_NPATTERNS (x) * elt_bits;
+
+ /* Identify the start point in terms of a sequence number and a byte offset
+ within that sequence. */
+ poly_uint64 first_sequence;
+ unsigned HOST_WIDE_INT subbit;
+ if (can_div_trunc_p (byte * BITS_PER_UNIT, sequence_bits,
+ &first_sequence, &subbit))
{
- unsigned ibyte = (GET_MODE_SIZE (innermode) - GET_MODE_SIZE (outermode)
- - byte);
- unsigned word_byte = WORDS_BIG_ENDIAN ? ibyte : byte;
- unsigned subword_byte = BYTES_BIG_ENDIAN ? ibyte : byte;
- byte = (subword_byte % UNITS_PER_WORD
- + (word_byte / UNITS_PER_WORD) * UNITS_PER_WORD);
+ unsigned int nelts_per_pattern = CONST_VECTOR_NELTS_PER_PATTERN (x);
+ if (nelts_per_pattern == 1)
+ /* This is a duplicated vector, so the value of FIRST_SEQUENCE
+ doesn't matter. */
+ byte = subbit / BITS_PER_UNIT;
+ else if (nelts_per_pattern == 2 && known_gt (first_sequence, 0U))
+ {
+ /* The subreg drops the first element from each pattern and
+ only uses the second element. Find the first sequence
+ that starts on a byte boundary. */
+ subbit += least_common_multiple (sequence_bits, BITS_PER_UNIT);
+ byte = subbit / BITS_PER_UNIT;
+ }
}
+ return byte;
+}
- /* BYTE should still be inside OP. (Note that BYTE is unsigned,
- so if it's become negative it will instead be very large.) */
- gcc_assert (byte < GET_MODE_SIZE (innermode));
+/* Subroutine of simplify_subreg in which:
- /* Convert from bytes to chunks of size value_bit. */
- value_start = byte * (BITS_PER_UNIT / value_bit);
+ - X is known to be a CONST_VECTOR
+ - OUTERMODE is known to be a vector mode
- /* Re-pack the value. */
- num_elem = GET_MODE_NUNITS (outermode);
+ Try to handle the subreg by operating on the CONST_VECTOR encoding
+ rather than on each individual element of the CONST_VECTOR.
- if (VECTOR_MODE_P (outermode))
+ Return the simplified subreg on success, otherwise return NULL_RTX. */
+
+static rtx
+simplify_const_vector_subreg (machine_mode outermode, rtx x,
+ machine_mode innermode, unsigned int first_byte)
+{
+ /* Paradoxical subregs of vectors have dubious semantics. */
+ if (paradoxical_subreg_p (outermode, innermode))
+ return NULL_RTX;
+
+ /* We can only preserve the semantics of a stepped pattern if the new
+ vector element is the same as the original one. */
+ if (CONST_VECTOR_STEPPED_P (x)
+ && GET_MODE_INNER (outermode) != GET_MODE_INNER (innermode))
+ return NULL_RTX;
+
+ /* Cope with MODE_VECTOR_BOOL by operating on bits rather than bytes. */
+ unsigned int x_elt_bits
+ = vector_element_size (GET_MODE_BITSIZE (innermode),
+ GET_MODE_NUNITS (innermode));
+ unsigned int out_elt_bits
+ = vector_element_size (GET_MODE_BITSIZE (outermode),
+ GET_MODE_NUNITS (outermode));
+
+ /* The number of bits needed to encode one element from every pattern
+ of the original vector. */
+ unsigned int x_sequence_bits = CONST_VECTOR_NPATTERNS (x) * x_elt_bits;
+
+ /* The number of bits needed to encode one element from every pattern
+ of the result. */
+ unsigned int out_sequence_bits
+ = least_common_multiple (x_sequence_bits, out_elt_bits);
+
+ /* Work out the number of interleaved patterns in the output vector
+ and the number of encoded elements per pattern. */
+ unsigned int out_npatterns = out_sequence_bits / out_elt_bits;
+ unsigned int nelts_per_pattern = CONST_VECTOR_NELTS_PER_PATTERN (x);
+
+ /* The encoding scheme requires the number of elements to be a multiple
+ of the number of patterns, so that each pattern appears at least once
+ and so that the same number of elements appear from each pattern. */
+ bool ok_p = multiple_p (GET_MODE_NUNITS (outermode), out_npatterns);
+ unsigned int const_nunits;
+ if (GET_MODE_NUNITS (outermode).is_constant (&const_nunits)
+ && (!ok_p || out_npatterns * nelts_per_pattern > const_nunits))
{
- result_v = rtvec_alloc (num_elem);
- elems = &RTVEC_ELT (result_v, 0);
+ /* Either the encoding is invalid, or applying it would give us
+ more elements than we need. Just encode each element directly. */
+ out_npatterns = const_nunits;
+ nelts_per_pattern = 1;
}
- else
- elems = &result_s;
+ else if (!ok_p)
+ return NULL_RTX;
- outer_submode = GET_MODE_INNER (outermode);
- outer_class = GET_MODE_CLASS (outer_submode);
- elem_bitsize = GET_MODE_BITSIZE (outer_submode);
+ /* Get enough bytes of X to form the new encoding. */
+ unsigned int buffer_bits = out_npatterns * nelts_per_pattern * out_elt_bits;
+ unsigned int buffer_bytes = CEIL (buffer_bits, BITS_PER_UNIT);
+ auto_vec<target_unit, 128> buffer (buffer_bytes);
+ if (!native_encode_rtx (innermode, x, buffer, first_byte, buffer_bytes))
+ return NULL_RTX;
- gcc_assert (elem_bitsize % value_bit == 0);
- gcc_assert (elem_bitsize + value_start * value_bit <= max_bitsize);
+ /* Reencode the bytes as OUTERMODE. */
+ return native_decode_vector_rtx (outermode, buffer, 0, out_npatterns,
+ nelts_per_pattern);
+}
- for (elem = 0; elem < num_elem; elem++)
- {
- unsigned char *vp;
+/* Try to simplify a subreg of a constant by encoding the subreg region
+ as a sequence of target bytes and reading them back in the new mode.
+ Return the new value on success, otherwise return null.
- /* Vectors are stored in target memory order. (This is probably
- a mistake.) */
- {
- unsigned byte = (elem * elem_bitsize) / BITS_PER_UNIT;
- unsigned ibyte = (((num_elem - 1 - elem) * elem_bitsize)
- / BITS_PER_UNIT);
- unsigned word_byte = WORDS_BIG_ENDIAN ? ibyte : byte;
- unsigned subword_byte = BYTES_BIG_ENDIAN ? ibyte : byte;
- unsigned bytele = (subword_byte % UNITS_PER_WORD
- + (word_byte / UNITS_PER_WORD) * UNITS_PER_WORD);
- vp = value + value_start + (bytele * BITS_PER_UNIT) / value_bit;
- }
+ The subreg has outer mode OUTERMODE, inner mode INNERMODE, inner value X
+ and byte offset FIRST_BYTE. */
- switch (outer_class)
- {
- case MODE_INT:
- case MODE_PARTIAL_INT:
- {
- int u;
- int base = 0;
- int units
- = (GET_MODE_BITSIZE (outer_submode) + HOST_BITS_PER_WIDE_INT - 1)
- / HOST_BITS_PER_WIDE_INT;
- HOST_WIDE_INT tmp[MAX_BITSIZE_MODE_ANY_INT / HOST_BITS_PER_WIDE_INT];
- wide_int r;
-
- if (GET_MODE_PRECISION (outer_submode) > MAX_BITSIZE_MODE_ANY_INT)
- return NULL_RTX;
- for (u = 0; u < units; u++)
- {
- unsigned HOST_WIDE_INT buf = 0;
- for (i = 0;
- i < HOST_BITS_PER_WIDE_INT && base + i < elem_bitsize;
- i += value_bit)
- buf |= (unsigned HOST_WIDE_INT)(*vp++ & value_mask) << i;
-
- tmp[u] = buf;
- base += HOST_BITS_PER_WIDE_INT;
- }
- r = wide_int::from_array (tmp, units,
- GET_MODE_PRECISION (outer_submode));
-#if TARGET_SUPPORTS_WIDE_INT == 0
- /* Make sure r will fit into CONST_INT or CONST_DOUBLE. */
- if (wi::min_precision (r, SIGNED) > HOST_BITS_PER_DOUBLE_INT)
- return NULL_RTX;
-#endif
- elems[elem] = immed_wide_int_const (r, outer_submode);
- }
- break;
+static rtx
+simplify_immed_subreg (fixed_size_mode outermode, rtx x,
+ machine_mode innermode, unsigned int first_byte)
+{
+ unsigned int buffer_bytes = GET_MODE_SIZE (outermode);
+ auto_vec<target_unit, 128> buffer (buffer_bytes);
- case MODE_FLOAT:
- case MODE_DECIMAL_FLOAT:
- {
- REAL_VALUE_TYPE r;
- long tmp[MAX_BITSIZE_MODE_ANY_MODE / 32] = { 0 };
-
- /* real_from_target wants its input in words affected by
- FLOAT_WORDS_BIG_ENDIAN. However, we ignore this,
- and use WORDS_BIG_ENDIAN instead; see the documentation
- of SUBREG in rtl.texi. */
- for (i = 0; i < elem_bitsize; i += value_bit)
- {
- int ibase;
- if (WORDS_BIG_ENDIAN)
- ibase = elem_bitsize - 1 - i;
- else
- ibase = i;
- tmp[ibase / 32] |= (*vp++ & value_mask) << i % 32;
- }
+ /* Some ports misuse CCmode. */
+ if (GET_MODE_CLASS (outermode) == MODE_CC && CONST_INT_P (x))
+ return x;
- real_from_target (&r, tmp, outer_submode);
- elems[elem] = const_double_from_real_value (r, outer_submode);
- }
- break;
+ /* Paradoxical subregs read undefined values for bytes outside of the
+ inner value. However, we have traditionally always sign-extended
+ integer constants and zero-extended others. */
+ unsigned int inner_bytes = buffer_bytes;
+ if (paradoxical_subreg_p (outermode, innermode))
+ {
+ if (!GET_MODE_SIZE (innermode).is_constant (&inner_bytes))
+ return NULL_RTX;
- case MODE_FRACT:
- case MODE_UFRACT:
- case MODE_ACCUM:
- case MODE_UACCUM:
- {
- FIXED_VALUE_TYPE f;
- f.data.low = 0;
- f.data.high = 0;
- f.mode = outer_submode;
-
- for (i = 0;
- i < HOST_BITS_PER_WIDE_INT && i < elem_bitsize;
- i += value_bit)
- f.data.low |= (unsigned HOST_WIDE_INT)(*vp++ & value_mask) << i;
- for (; i < elem_bitsize; i += value_bit)
- f.data.high |= ((unsigned HOST_WIDE_INT)(*vp++ & value_mask)
- << (i - HOST_BITS_PER_WIDE_INT));
-
- elems[elem] = CONST_FIXED_FROM_FIXED_VALUE (f, outer_submode);
- }
- break;
+ target_unit filler = 0;
+ if (CONST_SCALAR_INT_P (x) && wi::neg_p (rtx_mode_t (x, innermode)))
+ filler = -1;
- default:
- gcc_unreachable ();
- }
+ /* Add any leading bytes due to big-endian layout. The number of
+ bytes must be constant because both modes have constant size. */
+ unsigned int leading_bytes
+ = -byte_lowpart_offset (outermode, innermode).to_constant ();
+ for (unsigned int i = 0; i < leading_bytes; ++i)
+ buffer.quick_push (filler);
+
+ if (!native_encode_rtx (innermode, x, buffer, first_byte, inner_bytes))
+ return NULL_RTX;
+
+ /* Add any trailing bytes due to little-endian layout. */
+ while (buffer.length () < buffer_bytes)
+ buffer.quick_push (filler);
}
- if (VECTOR_MODE_P (outermode))
- return gen_rtx_CONST_VECTOR (outermode, result_v);
else
- return result_s;
+ {
+ if (!native_encode_rtx (innermode, x, buffer, first_byte, inner_bytes))
+ return NULL_RTX;
+ }
+ return native_decode_rtx (outermode, buffer, 0);
}
/* Simplify SUBREG:OUTERMODE(OP:INNERMODE, BYTE)
Return 0 if no simplifications are possible. */
rtx
simplify_subreg (machine_mode outermode, rtx op,
- machine_mode innermode, unsigned int byte)
+ machine_mode innermode, poly_uint64 byte)
{
/* Little bit of sanity checking. */
gcc_assert (innermode != VOIDmode);
gcc_assert (GET_MODE (op) == innermode
|| GET_MODE (op) == VOIDmode);
- if ((byte % GET_MODE_SIZE (outermode)) != 0)
+ poly_uint64 outersize = GET_MODE_SIZE (outermode);
+ if (!multiple_p (byte, outersize))
return NULL_RTX;
- if (byte >= GET_MODE_SIZE (innermode))
+ poly_uint64 innersize = GET_MODE_SIZE (innermode);
+ if (maybe_ge (byte, innersize))
return NULL_RTX;
- if (outermode == innermode && !byte)
+ if (outermode == innermode && known_eq (byte, 0U))
return op;
- if (byte % GET_MODE_UNIT_SIZE (innermode) == 0)
+ if (GET_CODE (op) == CONST_VECTOR)
+ byte = simplify_const_vector_byte_offset (op, byte);
+
+ if (multiple_p (byte, GET_MODE_UNIT_SIZE (innermode)))
{
rtx elt;
if (CONST_SCALAR_INT_P (op)
|| CONST_DOUBLE_AS_FLOAT_P (op)
- || GET_CODE (op) == CONST_FIXED
+ || CONST_FIXED_P (op)
|| GET_CODE (op) == CONST_VECTOR)
{
- /* simplify_immed_subreg deconstructs OP into bytes and constructs
- the result from bytes, so it only works if the sizes of the modes
- are known at compile time. Cases that apply to general modes
- should be handled here before calling simplify_immed_subreg. */
- fixed_size_mode fs_outermode, fs_innermode;
- if (is_a <fixed_size_mode> (outermode, &fs_outermode)
- && is_a <fixed_size_mode> (innermode, &fs_innermode))
- return simplify_immed_subreg (fs_outermode, op, fs_innermode, byte);
+ unsigned HOST_WIDE_INT cbyte;
+ if (byte.is_constant (&cbyte))
+ {
+ if (GET_CODE (op) == CONST_VECTOR && VECTOR_MODE_P (outermode))
+ {
+ rtx tmp = simplify_const_vector_subreg (outermode, op,
+ innermode, cbyte);
+ if (tmp)
+ return tmp;
+ }
- return NULL_RTX;
+ fixed_size_mode fs_outermode;
+ if (is_a <fixed_size_mode> (outermode, &fs_outermode))
+ return simplify_immed_subreg (fs_outermode, op, innermode, cbyte);
+ }
}
/* Changing mode twice with SUBREG => just change it once,
if (GET_CODE (op) == SUBREG)
{
machine_mode innermostmode = GET_MODE (SUBREG_REG (op));
+ poly_uint64 innermostsize = GET_MODE_SIZE (innermostmode);
rtx newx;
if (outermode == innermostmode
- && byte == 0 && SUBREG_BYTE (op) == 0)
+ && known_eq (byte, 0U)
+ && known_eq (SUBREG_BYTE (op), 0))
return SUBREG_REG (op);
/* Work out the memory offset of the final OUTERMODE value relative
to the inner value of OP. */
- HOST_WIDE_INT mem_offset = subreg_memory_offset (outermode,
- innermode, byte);
- HOST_WIDE_INT op_mem_offset = subreg_memory_offset (op);
- HOST_WIDE_INT final_offset = mem_offset + op_mem_offset;
+ poly_int64 mem_offset = subreg_memory_offset (outermode,
+ innermode, byte);
+ poly_int64 op_mem_offset = subreg_memory_offset (op);
+ poly_int64 final_offset = mem_offset + op_mem_offset;
/* See whether resulting subreg will be paradoxical. */
if (!paradoxical_subreg_p (outermode, innermostmode))
{
- /* In nonparadoxical subregs we can't handle negative offsets. */
- if (final_offset < 0)
- return NULL_RTX;
/* Bail out in case resulting subreg would be incorrect. */
- if (final_offset % GET_MODE_SIZE (outermode)
- || (unsigned) final_offset >= GET_MODE_SIZE (innermostmode))
+ if (maybe_lt (final_offset, 0)
+ || maybe_ge (poly_uint64 (final_offset), innermostsize)
+ || !multiple_p (final_offset, outersize))
return NULL_RTX;
}
else
{
- HOST_WIDE_INT required_offset
- = subreg_memory_offset (outermode, innermostmode, 0);
- if (final_offset != required_offset)
+ poly_int64 required_offset = subreg_memory_offset (outermode,
+ innermostmode, 0);
+ if (maybe_ne (final_offset, required_offset))
return NULL_RTX;
/* Paradoxical subregs always have byte offset 0. */
final_offset = 0;
if (SUBREG_PROMOTED_VAR_P (op)
&& SUBREG_PROMOTED_SIGN (op) >= 0
&& GET_MODE_CLASS (outermode) == MODE_INT
- && IN_RANGE (GET_MODE_SIZE (outermode),
- GET_MODE_SIZE (innermode),
- GET_MODE_SIZE (innermostmode))
+ && known_ge (outersize, innersize)
+ && known_le (outersize, innermostsize)
&& subreg_lowpart_p (newx))
{
SUBREG_PROMOTED_VAR_P (newx) = 1;
/* Propagate original regno. We don't have any way to specify
the offset inside original regno, so do so only for lowpart.
- The information is used only by alias analysis that can not
+ The information is used only by alias analysis that cannot
grog partial register anyway. */
- if (subreg_lowpart_offset (outermode, innermode) == byte)
+ if (known_eq (subreg_lowpart_offset (outermode, innermode), byte))
ORIGINAL_REGNO (x) = ORIGINAL_REGNO (op);
return x;
}
have instruction to move the whole thing. */
&& (! MEM_VOLATILE_P (op)
|| ! have_insn_for (SET, innermode))
- && GET_MODE_SIZE (outermode) <= GET_MODE_SIZE (GET_MODE (op)))
+ && known_le (outersize, innersize))
return adjust_address_nv (op, outermode, byte);
/* Handle complex or vector values represented as CONCAT or VEC_CONCAT
if (GET_CODE (op) == CONCAT
|| GET_CODE (op) == VEC_CONCAT)
{
- unsigned int part_size, final_offset;
+ poly_uint64 final_offset;
rtx part, res;
machine_mode part_mode = GET_MODE (XEXP (op, 0));
if (part_mode == VOIDmode)
part_mode = GET_MODE_INNER (GET_MODE (op));
- part_size = GET_MODE_SIZE (part_mode);
- if (byte < part_size)
+ poly_uint64 part_size = GET_MODE_SIZE (part_mode);
+ if (known_lt (byte, part_size))
{
part = XEXP (op, 0);
final_offset = byte;
}
- else
+ else if (known_ge (byte, part_size))
{
part = XEXP (op, 1);
final_offset = byte - part_size;
}
+ else
+ return NULL_RTX;
- if (final_offset + GET_MODE_SIZE (outermode) > part_size)
+ if (maybe_gt (final_offset + outersize, part_size))
return NULL_RTX;
part_mode = GET_MODE (part);
return NULL_RTX;
}
+ /* Simplify
+ (subreg (vec_merge (X)
+ (vector)
+ (const_int ((1 << N) | M)))
+ (N * sizeof (outermode)))
+ to
+ (subreg (X) (N * sizeof (outermode)))
+ */
+ unsigned int idx;
+ if (constant_multiple_p (byte, GET_MODE_SIZE (outermode), &idx)
+ && idx < HOST_BITS_PER_WIDE_INT
+ && GET_CODE (op) == VEC_MERGE
+ && GET_MODE_INNER (innermode) == outermode
+ && CONST_INT_P (XEXP (op, 2))
+ && (UINTVAL (XEXP (op, 2)) & (HOST_WIDE_INT_1U << idx)) != 0)
+ return simplify_gen_subreg (outermode, XEXP (op, 0), innermode, byte);
+
/* A SUBREG resulting from a zero extension may fold to zero if
it extracts higher bits that the ZERO_EXTEND's source bits. */
if (GET_CODE (op) == ZERO_EXTEND && SCALAR_INT_MODE_P (innermode))
{
- unsigned int bitpos = subreg_lsb_1 (outermode, innermode, byte);
- if (bitpos >= GET_MODE_PRECISION (GET_MODE (XEXP (op, 0))))
+ poly_uint64 bitpos = subreg_lsb_1 (outermode, innermode, byte);
+ if (known_ge (bitpos, GET_MODE_PRECISION (GET_MODE (XEXP (op, 0)))))
return CONST0_RTX (outermode);
}
scalar_int_mode int_outermode, int_innermode;
if (is_a <scalar_int_mode> (outermode, &int_outermode)
&& is_a <scalar_int_mode> (innermode, &int_innermode)
- && byte == subreg_lowpart_offset (int_outermode, int_innermode))
+ && known_eq (byte, subreg_lowpart_offset (int_outermode, int_innermode)))
{
/* Handle polynomial integers. The upper bits of a paradoxical
subreg are undefined, so this is safe regardless of whether
}
}
+ /* If OP is a vector comparison and the subreg is not changing the
+ number of elements or the size of the elements, change the result
+ of the comparison to the new mode. */
+ if (COMPARISON_P (op)
+ && VECTOR_MODE_P (outermode)
+ && VECTOR_MODE_P (innermode)
+ && known_eq (GET_MODE_NUNITS (outermode), GET_MODE_NUNITS (innermode))
+ && known_eq (GET_MODE_UNIT_SIZE (outermode),
+ GET_MODE_UNIT_SIZE (innermode)))
+ return simplify_gen_relational (GET_CODE (op), outermode, innermode,
+ XEXP (op, 0), XEXP (op, 1));
return NULL_RTX;
}
rtx
simplify_gen_subreg (machine_mode outermode, rtx op,
- machine_mode innermode, unsigned int byte)
+ machine_mode innermode, poly_uint64 byte)
{
rtx newx;
{
scalar_mode inner_mode = GET_MODE_INNER (mode);
rtx duplicate = gen_rtx_VEC_DUPLICATE (mode, scalar_reg);
- unsigned int nunits = GET_MODE_NUNITS (mode);
+ poly_uint64 nunits = GET_MODE_NUNITS (mode);
if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
{
/* Test some simple unary cases with VEC_DUPLICATE arguments. */
simplify_binary_operation (VEC_SELECT, inner_mode,
duplicate, zero_par));
- /* And again with the final element. */
- rtx last_index = gen_int_mode (GET_MODE_NUNITS (mode) - 1, word_mode);
- rtx last_par = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, last_index));
- ASSERT_RTX_PTR_EQ (scalar_reg,
- simplify_binary_operation (VEC_SELECT, inner_mode,
- duplicate, last_par));
+ unsigned HOST_WIDE_INT const_nunits;
+ if (nunits.is_constant (&const_nunits))
+ {
+ /* And again with the final element. */
+ rtx last_index = gen_int_mode (const_nunits - 1, word_mode);
+ rtx last_par = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, last_index));
+ ASSERT_RTX_PTR_EQ (scalar_reg,
+ simplify_binary_operation (VEC_SELECT, inner_mode,
+ duplicate, last_par));
+
+ /* Test a scalar subreg of a VEC_MERGE of a VEC_DUPLICATE. */
+ rtx vector_reg = make_test_reg (mode);
+ for (unsigned HOST_WIDE_INT i = 0; i < const_nunits; i++)
+ {
+ if (i >= HOST_BITS_PER_WIDE_INT)
+ break;
+ rtx mask = GEN_INT ((HOST_WIDE_INT_1U << i) | (i + 1));
+ rtx vm = gen_rtx_VEC_MERGE (mode, duplicate, vector_reg, mask);
+ poly_uint64 offset = i * GET_MODE_SIZE (inner_mode);
+ ASSERT_RTX_EQ (scalar_reg,
+ simplify_gen_subreg (inner_mode, vm,
+ mode, offset));
+ }
+ }
/* Test a scalar subreg of a VEC_DUPLICATE. */
- unsigned int offset = subreg_lowpart_offset (inner_mode, mode);
+ poly_uint64 offset = subreg_lowpart_offset (inner_mode, mode);
ASSERT_RTX_EQ (scalar_reg,
simplify_gen_subreg (inner_mode, duplicate,
mode, offset));
machine_mode narrower_mode;
- if (nunits > 2
+ if (maybe_ne (nunits, 2U)
+ && multiple_p (nunits, 2)
&& mode_for_vector (inner_mode, 2).exists (&narrower_mode)
&& VECTOR_MODE_P (narrower_mode))
{
+ /* Test VEC_DUPLICATE of a vector. */
+ rtx_vector_builder nbuilder (narrower_mode, 2, 1);
+ nbuilder.quick_push (const0_rtx);
+ nbuilder.quick_push (const1_rtx);
+ rtx_vector_builder builder (mode, 2, 1);
+ builder.quick_push (const0_rtx);
+ builder.quick_push (const1_rtx);
+ ASSERT_RTX_EQ (builder.build (),
+ simplify_unary_operation (VEC_DUPLICATE, mode,
+ nbuilder.build (),
+ narrower_mode));
+
/* Test VEC_SELECT of a vector. */
rtx vec_par
= gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, const1_rtx, const0_rtx));
duplicate, vec_par));
/* Test a vector subreg of a VEC_DUPLICATE. */
- unsigned int offset = subreg_lowpart_offset (narrower_mode, mode);
+ poly_uint64 offset = subreg_lowpart_offset (narrower_mode, mode);
ASSERT_RTX_EQ (narrower_duplicate,
simplify_gen_subreg (narrower_mode, duplicate,
mode, offset));
ASSERT_RTX_EQ (series_0_m1,
simplify_binary_operation (VEC_SERIES, mode, const0_rtx,
constm1_rtx));
+
+ /* Test NEG on constant vector series. */
+ ASSERT_RTX_EQ (series_0_m1,
+ simplify_unary_operation (NEG, mode, series_0_1, mode));
+ ASSERT_RTX_EQ (series_0_1,
+ simplify_unary_operation (NEG, mode, series_0_m1, mode));
+
+ /* Test PLUS and MINUS on constant vector series. */
+ rtx scalar2 = gen_int_mode (2, inner_mode);
+ rtx scalar3 = gen_int_mode (3, inner_mode);
+ rtx series_1_1 = gen_const_vec_series (mode, const1_rtx, const1_rtx);
+ rtx series_0_2 = gen_const_vec_series (mode, const0_rtx, scalar2);
+ rtx series_1_3 = gen_const_vec_series (mode, const1_rtx, scalar3);
+ ASSERT_RTX_EQ (series_1_1,
+ simplify_binary_operation (PLUS, mode, series_0_1,
+ CONST1_RTX (mode)));
+ ASSERT_RTX_EQ (series_0_m1,
+ simplify_binary_operation (PLUS, mode, CONST0_RTX (mode),
+ series_0_m1));
+ ASSERT_RTX_EQ (series_1_3,
+ simplify_binary_operation (PLUS, mode, series_1_1,
+ series_0_2));
+ ASSERT_RTX_EQ (series_0_1,
+ simplify_binary_operation (MINUS, mode, series_1_1,
+ CONST1_RTX (mode)));
+ ASSERT_RTX_EQ (series_1_1,
+ simplify_binary_operation (MINUS, mode, CONST1_RTX (mode),
+ series_0_m1));
+ ASSERT_RTX_EQ (series_1_1,
+ simplify_binary_operation (MINUS, mode, series_1_3,
+ series_0_2));
+
+ /* Test MULT between constant vectors. */
+ rtx vec2 = gen_const_vec_duplicate (mode, scalar2);
+ rtx vec3 = gen_const_vec_duplicate (mode, scalar3);
+ rtx scalar9 = gen_int_mode (9, inner_mode);
+ rtx series_3_9 = gen_const_vec_series (mode, scalar3, scalar9);
+ ASSERT_RTX_EQ (series_0_2,
+ simplify_binary_operation (MULT, mode, series_0_1, vec2));
+ ASSERT_RTX_EQ (series_3_9,
+ simplify_binary_operation (MULT, mode, vec3, series_1_3));
+ if (!GET_MODE_NUNITS (mode).is_constant ())
+ ASSERT_FALSE (simplify_binary_operation (MULT, mode, series_0_1,
+ series_0_1));
+
+ /* Test ASHIFT between constant vectors. */
+ ASSERT_RTX_EQ (series_0_2,
+ simplify_binary_operation (ASHIFT, mode, series_0_1,
+ CONST1_RTX (mode)));
+ if (!GET_MODE_NUNITS (mode).is_constant ())
+ ASSERT_FALSE (simplify_binary_operation (ASHIFT, mode, CONST1_RTX (mode),
+ series_0_1));
+}
+
+/* Verify simplify_merge_mask works correctly. */
+
+static void
+test_vec_merge (machine_mode mode)
+{
+ rtx op0 = make_test_reg (mode);
+ rtx op1 = make_test_reg (mode);
+ rtx op2 = make_test_reg (mode);
+ rtx op3 = make_test_reg (mode);
+ rtx op4 = make_test_reg (mode);
+ rtx op5 = make_test_reg (mode);
+ rtx mask1 = make_test_reg (SImode);
+ rtx mask2 = make_test_reg (SImode);
+ rtx vm1 = gen_rtx_VEC_MERGE (mode, op0, op1, mask1);
+ rtx vm2 = gen_rtx_VEC_MERGE (mode, op2, op3, mask1);
+ rtx vm3 = gen_rtx_VEC_MERGE (mode, op4, op5, mask1);
+
+ /* Simple vec_merge. */
+ ASSERT_EQ (op0, simplify_merge_mask (vm1, mask1, 0));
+ ASSERT_EQ (op1, simplify_merge_mask (vm1, mask1, 1));
+ ASSERT_EQ (NULL_RTX, simplify_merge_mask (vm1, mask2, 0));
+ ASSERT_EQ (NULL_RTX, simplify_merge_mask (vm1, mask2, 1));
+
+ /* Nested vec_merge.
+ It's tempting to make this simplify right down to opN, but we don't
+ because all the simplify_* functions assume that the operands have
+ already been simplified. */
+ rtx nvm = gen_rtx_VEC_MERGE (mode, vm1, vm2, mask1);
+ ASSERT_EQ (vm1, simplify_merge_mask (nvm, mask1, 0));
+ ASSERT_EQ (vm2, simplify_merge_mask (nvm, mask1, 1));
+
+ /* Intermediate unary op. */
+ rtx unop = gen_rtx_NOT (mode, vm1);
+ ASSERT_RTX_EQ (gen_rtx_NOT (mode, op0),
+ simplify_merge_mask (unop, mask1, 0));
+ ASSERT_RTX_EQ (gen_rtx_NOT (mode, op1),
+ simplify_merge_mask (unop, mask1, 1));
+
+ /* Intermediate binary op. */
+ rtx binop = gen_rtx_PLUS (mode, vm1, vm2);
+ ASSERT_RTX_EQ (gen_rtx_PLUS (mode, op0, op2),
+ simplify_merge_mask (binop, mask1, 0));
+ ASSERT_RTX_EQ (gen_rtx_PLUS (mode, op1, op3),
+ simplify_merge_mask (binop, mask1, 1));
+
+ /* Intermediate ternary op. */
+ rtx tenop = gen_rtx_FMA (mode, vm1, vm2, vm3);
+ ASSERT_RTX_EQ (gen_rtx_FMA (mode, op0, op2, op4),
+ simplify_merge_mask (tenop, mask1, 0));
+ ASSERT_RTX_EQ (gen_rtx_FMA (mode, op1, op3, op5),
+ simplify_merge_mask (tenop, mask1, 1));
+
+ /* Side effects. */
+ rtx badop0 = gen_rtx_PRE_INC (mode, op0);
+ rtx badvm = gen_rtx_VEC_MERGE (mode, badop0, op1, mask1);
+ ASSERT_EQ (badop0, simplify_merge_mask (badvm, mask1, 0));
+ ASSERT_EQ (NULL_RTX, simplify_merge_mask (badvm, mask1, 1));
+
+ /* Called indirectly. */
+ ASSERT_RTX_EQ (gen_rtx_VEC_MERGE (mode, op0, op3, mask1),
+ simplify_rtx (nvm));
+}
+
+/* Test subregs of integer vector constant X, trying elements in
+ the range [ELT_BIAS, ELT_BIAS + constant_lower_bound (NELTS)),
+ where NELTS is the number of elements in X. Subregs involving
+ elements [ELT_BIAS, ELT_BIAS + FIRST_VALID) are expected to fail. */
+
+static void
+test_vector_subregs_modes (rtx x, poly_uint64 elt_bias = 0,
+ unsigned int first_valid = 0)
+{
+ machine_mode inner_mode = GET_MODE (x);
+ scalar_mode int_mode = GET_MODE_INNER (inner_mode);
+
+ for (unsigned int modei = 0; modei < NUM_MACHINE_MODES; ++modei)
+ {
+ machine_mode outer_mode = (machine_mode) modei;
+ if (!VECTOR_MODE_P (outer_mode))
+ continue;
+
+ unsigned int outer_nunits;
+ if (GET_MODE_INNER (outer_mode) == int_mode
+ && GET_MODE_NUNITS (outer_mode).is_constant (&outer_nunits)
+ && multiple_p (GET_MODE_NUNITS (inner_mode), outer_nunits))
+ {
+ /* Test subregs in which the outer mode is a smaller,
+ constant-sized vector of the same element type. */
+ unsigned int limit
+ = constant_lower_bound (GET_MODE_NUNITS (inner_mode));
+ for (unsigned int elt = 0; elt < limit; elt += outer_nunits)
+ {
+ rtx expected = NULL_RTX;
+ if (elt >= first_valid)
+ {
+ rtx_vector_builder builder (outer_mode, outer_nunits, 1);
+ for (unsigned int i = 0; i < outer_nunits; ++i)
+ builder.quick_push (CONST_VECTOR_ELT (x, elt + i));
+ expected = builder.build ();
+ }
+ poly_uint64 byte = (elt_bias + elt) * GET_MODE_SIZE (int_mode);
+ ASSERT_RTX_EQ (expected,
+ simplify_subreg (outer_mode, x,
+ inner_mode, byte));
+ }
+ }
+ else if (known_eq (GET_MODE_SIZE (outer_mode),
+ GET_MODE_SIZE (inner_mode))
+ && known_eq (elt_bias, 0U)
+ && (GET_MODE_CLASS (outer_mode) != MODE_VECTOR_BOOL
+ || known_eq (GET_MODE_BITSIZE (outer_mode),
+ GET_MODE_NUNITS (outer_mode)))
+ && (!FLOAT_MODE_P (outer_mode)
+ || (FLOAT_MODE_FORMAT (outer_mode)->ieee_bits
+ == GET_MODE_UNIT_PRECISION (outer_mode)))
+ && (GET_MODE_SIZE (inner_mode).is_constant ()
+ || !CONST_VECTOR_STEPPED_P (x)))
+ {
+ /* Try converting to OUTER_MODE and back. */
+ rtx outer_x = simplify_subreg (outer_mode, x, inner_mode, 0);
+ ASSERT_TRUE (outer_x != NULL_RTX);
+ ASSERT_RTX_EQ (x, simplify_subreg (inner_mode, outer_x,
+ outer_mode, 0));
+ }
+ }
+
+ if (BYTES_BIG_ENDIAN == WORDS_BIG_ENDIAN)
+ {
+ /* Test each byte in the element range. */
+ unsigned int limit
+ = constant_lower_bound (GET_MODE_SIZE (inner_mode));
+ for (unsigned int i = 0; i < limit; ++i)
+ {
+ unsigned int elt = i / GET_MODE_SIZE (int_mode);
+ rtx expected = NULL_RTX;
+ if (elt >= first_valid)
+ {
+ unsigned int byte_shift = i % GET_MODE_SIZE (int_mode);
+ if (BYTES_BIG_ENDIAN)
+ byte_shift = GET_MODE_SIZE (int_mode) - byte_shift - 1;
+ rtx_mode_t vec_elt (CONST_VECTOR_ELT (x, elt), int_mode);
+ wide_int shifted_elt
+ = wi::lrshift (vec_elt, byte_shift * BITS_PER_UNIT);
+ expected = immed_wide_int_const (shifted_elt, QImode);
+ }
+ poly_uint64 byte = elt_bias * GET_MODE_SIZE (int_mode) + i;
+ ASSERT_RTX_EQ (expected,
+ simplify_subreg (QImode, x, inner_mode, byte));
+ }
+ }
+}
+
+/* Test constant subregs of integer vector mode INNER_MODE, using 1
+ element per pattern. */
+
+static void
+test_vector_subregs_repeating (machine_mode inner_mode)
+{
+ poly_uint64 nunits = GET_MODE_NUNITS (inner_mode);
+ unsigned int min_nunits = constant_lower_bound (nunits);
+ scalar_mode int_mode = GET_MODE_INNER (inner_mode);
+ unsigned int count = gcd (min_nunits, 8);
+
+ rtx_vector_builder builder (inner_mode, count, 1);
+ for (unsigned int i = 0; i < count; ++i)
+ builder.quick_push (gen_int_mode (8 - i, int_mode));
+ rtx x = builder.build ();
+
+ test_vector_subregs_modes (x);
+ if (!nunits.is_constant ())
+ test_vector_subregs_modes (x, nunits - min_nunits);
+}
+
+/* Test constant subregs of integer vector mode INNER_MODE, using 2
+ elements per pattern. */
+
+static void
+test_vector_subregs_fore_back (machine_mode inner_mode)
+{
+ poly_uint64 nunits = GET_MODE_NUNITS (inner_mode);
+ unsigned int min_nunits = constant_lower_bound (nunits);
+ scalar_mode int_mode = GET_MODE_INNER (inner_mode);
+ unsigned int count = gcd (min_nunits, 4);
+
+ rtx_vector_builder builder (inner_mode, count, 2);
+ for (unsigned int i = 0; i < count; ++i)
+ builder.quick_push (gen_int_mode (i, int_mode));
+ for (unsigned int i = 0; i < count; ++i)
+ builder.quick_push (gen_int_mode (-(int) i, int_mode));
+ rtx x = builder.build ();
+
+ test_vector_subregs_modes (x);
+ if (!nunits.is_constant ())
+ test_vector_subregs_modes (x, nunits - min_nunits, count);
+}
+
+/* Test constant subregs of integer vector mode INNER_MODE, using 3
+ elements per pattern. */
+
+static void
+test_vector_subregs_stepped (machine_mode inner_mode)
+{
+ /* Build { 0, 1, 2, 3, ... }. */
+ scalar_mode int_mode = GET_MODE_INNER (inner_mode);
+ rtx_vector_builder builder (inner_mode, 1, 3);
+ for (unsigned int i = 0; i < 3; ++i)
+ builder.quick_push (gen_int_mode (i, int_mode));
+ rtx x = builder.build ();
+
+ test_vector_subregs_modes (x);
+}
+
+/* Test constant subregs of integer vector mode INNER_MODE. */
+
+static void
+test_vector_subregs (machine_mode inner_mode)
+{
+ test_vector_subregs_repeating (inner_mode);
+ test_vector_subregs_fore_back (inner_mode);
+ test_vector_subregs_stepped (inner_mode);
}
/* Verify some simplifications involving vectors. */
rtx scalar_reg = make_test_reg (GET_MODE_INNER (mode));
test_vector_ops_duplicate (mode, scalar_reg);
if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT
- && GET_MODE_NUNITS (mode) > 2)
- test_vector_ops_series (mode, scalar_reg);
+ && maybe_gt (GET_MODE_NUNITS (mode), 2))
+ {
+ test_vector_ops_series (mode, scalar_reg);
+ test_vector_subregs (mode);
+ }
+ test_vec_merge (mode);
}
}
}
rtx x10 = gen_int_mode (poly_int64 (-31, -24), HImode);
rtx two = GEN_INT (2);
rtx six = GEN_INT (6);
- HOST_WIDE_INT offset = subreg_lowpart_offset (QImode, HImode);
+ poly_uint64 offset = subreg_lowpart_offset (QImode, HImode);
/* These tests only try limited operation combinations. Fuller arithmetic
testing is done directly on poly_ints. */