machine_mode, int, bool);
extern void neon_pairwise_reduce (rtx, rtx, machine_mode,
rtx (*) (rtx, rtx, rtx));
-extern rtx neon_make_constant (rtx);
+extern rtx neon_make_constant (rtx, bool generate = true);
extern tree arm_builtin_vectorized_function (unsigned int, tree, tree);
extern void neon_expand_vector_init (rtx, rtx);
extern void neon_lane_bounds (rtx, HOST_WIDE_INT, HOST_WIDE_INT, const_tree);
static bool
arm_legitimate_constant_p_1 (machine_mode, rtx x)
{
+ if (GET_CODE (x) == CONST_VECTOR && !neon_make_constant (x, false))
+ return false;
+
return flag_pic || !label_mentioned_p (x);
}
}
}
-/* If VALS is a vector constant that can be loaded into a register
- using VDUP, generate instructions to do so and return an RTX to
- assign to the register. Otherwise return NULL_RTX. */
+/* Return a non-NULL RTX iff VALS is a vector constant that can be
+ loaded into a register using VDUP.
+
+ If this is the case, and GENERATE is set, we also generate
+ instructions to do this and return an RTX to assign to the register. */
static rtx
-neon_vdup_constant (rtx vals)
+neon_vdup_constant (rtx vals, bool generate)
{
machine_mode mode = GET_MODE (vals);
machine_mode inner_mode = GET_MODE_INNER (mode);
vdup.i16). */
return NULL_RTX;
+ if (!generate)
+ return x;
+
/* We can load this constant by using VDUP and a constant in a
single ARM register. This will be cheaper than a vector
load. */
return gen_vec_duplicate (mode, x);
}
-/* Generate code to load VALS, which is a PARALLEL containing only
- constants (for vec_init) or CONST_VECTOR, efficiently into a
- register. Returns an RTX to copy into the register, or NULL_RTX
- for a PARALLEL that cannot be converted into a CONST_VECTOR. */
+/* Return a non-NULL RTX iff VALS, which is a PARALLEL containing only
+ constants (for vec_init) or CONST_VECTOR, can be effeciently loaded
+ into a register.
+
+ If this is the case, and GENERATE is set, we also generate code to do
+ this and return an RTX to copy into the register. */
rtx
-neon_make_constant (rtx vals)
+neon_make_constant (rtx vals, bool generate)
{
machine_mode mode = GET_MODE (vals);
rtx target;
&& simd_immediate_valid_for_move (const_vec, mode, NULL, NULL))
/* Load using VMOV. On Cortex-A8 this takes one cycle. */
return const_vec;
- else if ((target = neon_vdup_constant (vals)) != NULL_RTX)
+ else if ((target = neon_vdup_constant (vals, generate)) != NULL_RTX)
/* Loaded using VDUP. On Cortex-A8 the VDUP takes one NEON
pipeline cycle; creating the constant takes one or two ARM
pipeline cycles. */
(for either double or quad vectors). We cannot take advantage
of single-cycle VLD1 because we need a PC-relative addressing
mode. */
- return const_vec;
+ return arm_disable_literal_pool ? NULL_RTX : const_vec;
else
/* A PARALLEL containing something not valid inside CONST_VECTOR.
We cannot construct an initializer. */
(define_insn "*mve_mov<mode>"
[(set (match_operand:MVE_types 0 "nonimmediate_operand" "=w,w,r,w,w,r,w,Ux,w")
- (match_operand:MVE_types 1 "general_operand" "w,r,w,Dn,Uxi,r,Dm,w,Ul"))]
+ (match_operand:MVE_types 1 "general_operand" "w,r,w,Dn,UxUi,r,Dm,w,Ul"))]
"TARGET_HAVE_MVE || TARGET_HAVE_MVE_FLOAT"
{
if (which_alternative == 3 || which_alternative == 6)