/* The mode of the elements. */
scalar_mode elt_mode;
+ /* If nonzero, the vector width to print the AdvSIMD immediate. */
+ unsigned int width = 0;
+
/* The instruction to use to move the immediate into a vector. */
insn_type insn;
ELT_MODE_IN and value VALUE_IN. */
inline simd_immediate_info
::simd_immediate_info (scalar_float_mode elt_mode_in, rtx value_in)
- : elt_mode (elt_mode_in), insn (MOV)
+ : elt_mode (elt_mode_in), width (0), insn (MOV)
{
u.mov.value = value_in;
u.mov.modifier = LSL;
unsigned HOST_WIDE_INT value_in,
insn_type insn_in, modifier_type modifier_in,
unsigned int shift_in)
- : elt_mode (elt_mode_in), insn (insn_in)
+ : elt_mode (elt_mode_in), width (0), insn (insn_in)
{
u.mov.value = gen_int_mode (value_in, elt_mode_in);
u.mov.modifier = modifier_in;
and where element I is equal to BASE_IN + I * STEP_IN. */
inline simd_immediate_info
::simd_immediate_info (scalar_mode elt_mode_in, rtx base_in, rtx step_in)
- : elt_mode (elt_mode_in), insn (INDEX)
+ : elt_mode (elt_mode_in), width (0), insn (INDEX)
{
u.index.base = base_in;
u.index.step = step_in;
inline simd_immediate_info
::simd_immediate_info (scalar_int_mode elt_mode_in,
aarch64_svpattern pattern_in)
- : elt_mode (elt_mode_in), insn (PTRUE)
+ : elt_mode (elt_mode_in), width (0), insn (PTRUE)
{
u.pattern = pattern_in;
}
}
}
- /* The immediate must repeat every eight bytes. */
+ /* The immediate must normally repeat every eight bytes. For MOV
+ also allow a 128-bit AdvSIMD constant whose high 64 bits are zero
+ since it can be materialized using a 64-bit MOVI. */
unsigned int nbytes = bytes.length ();
- for (unsigned i = 8; i < nbytes; ++i)
+ unsigned int output_width = 0;
+ bool repeats_every_8_bytes = true;
+
+ for (unsigned int i = 8; i < nbytes; ++i)
if (bytes[i] != bytes[i - 8])
- return false;
+ {
+ repeats_every_8_bytes = false;
+ break;
+ }
+
+ if (!repeats_every_8_bytes)
+ {
+ if (which != AARCH64_CHECK_MOV || !(vec_flags & VEC_ADVSIMD)
+ || aarch64_sve_mode_p (mode) || nbytes != 16)
+ return false;
+
+ for (unsigned int i = 8; i < nbytes; ++i)
+ if (bytes[i] != 0)
+ return false;
+
+ output_width = 64;
+ }
/* Get the repeating 8-byte value as an integer. No endian correction
is needed here because bytes is already in lsb-first order. */
{
rtx float_val = const_double_from_real_value (r, fmode);
*info = simd_immediate_info (fmode, float_val);
+ info->width = output_width;
}
return true;
}
return aarch64_sve_valid_immediate (ival, imode, info, which);
if (aarch64_advsimd_valid_immediate (val64, imode, info, which))
- return true;
+ {
+ if (info)
+ info->width = output_width;
+ return true;
+ }
if (TARGET_SVE)
return aarch64_sve_valid_immediate (ival, imode, info, which);
is_valid = aarch64_simd_valid_imm (const_vector, &info, which);
gcc_assert (is_valid);
+ if (info.width != 0)
+ width = info.width;
+
element_char = sizetochar (GET_MODE_BITSIZE (info.elt_mode));
lane_count = width / GET_MODE_BITSIZE (info.elt_mode);
--- /dev/null
+/* { dg-do compile } */
+/* { dg-options "-O2" } */
+
+typedef signed char v16qi __attribute__((vector_size(16)));
+typedef short v8hi __attribute__((vector_size(16)));
+typedef int v4si __attribute__((vector_size(16)));
+
+v16qi
+f_qi (void)
+{
+ return (v16qi)
+ { 3, 3, 3, 3, 3, 3, 3, 3,
+ 0, 0, 0, 0, 0, 0, 0, 0 };
+}
+
+v8hi
+f_hi (void)
+{
+ return (v8hi)
+ { 2, 2, 2, 2,
+ 0, 0, 0, 0 };
+}
+
+v4si
+f_si (void)
+{
+ return (v4si)
+ { 1, 1, 0, 0 };
+}
+
+/* { dg-final { scan-assembler-times {\tmovi\tv[0-9]+\.8b, 0x3} 1 } } */
+/* { dg-final { scan-assembler-times {\tmovi\tv[0-9]+\.4h, 0x2} 1 } } */
+/* { dg-final { scan-assembler-times {\tmovi\tv[0-9]+\.2s, 0x1} 1 } } */
+/* { dg-final { scan-assembler-not {\tldr\tq[0-9]+,} } } */
--- /dev/null
+/* { dg-do compile } */
+/* { dg-options "-O2 -march=armv8.2-a+fp16" } */
+
+typedef float v2sf __attribute__((vector_size(8)));
+typedef float v4sf __attribute__((vector_size(16)));
+typedef double v2df __attribute__((vector_size(16)));
+typedef __fp16 v4hf __attribute__((vector_size(8)));
+typedef __fp16 v8hf __attribute__((vector_size(16)));
+
+v2sf
+f_v2sf (void)
+{
+ return (v2sf){ 1.0f, 1.0f };
+}
+
+v4sf
+f_v4sf (void)
+{
+ return (v4sf){ 1.0f, 1.0f, 0.0f, 0.0f };
+}
+
+v2df
+f_v2df (void)
+{
+ return (v2df){ 1.0, 0.0 };
+}
+
+v4hf
+f_v4hf (void)
+{
+ return (v4hf){ (__fp16)1.0, (__fp16)1.0, (__fp16)1.0, (__fp16)1.0 };
+}
+
+v8hf
+f_v8hf (void)
+{
+ return (v8hf){ (__fp16)1.0, (__fp16)1.0, (__fp16)1.0, (__fp16)1.0, 0, 0, 0, 0 };
+}
+
+/* Each function should use fmov, not a literal pool load. */
+/* { dg-final { scan-assembler-times {fmov[ \t]+v[0-9]+[.]2s,[ \t]+1[.]0e[+]0} 2 } } */
+/* { dg-final { scan-assembler-times {fmov[ \t]+v[0-9]+[.]4h,[ \t]+1[.]0e[+]0} 2 } } */
+/* { dg-final { scan-assembler-times {\tfmov\td[0-9]+,} 1 } } */
+/* { dg-final { scan-assembler-not {\tldr\t[sdq][0-9]+, =} } } */