[AArch64]: Use MOVI for low‑64‑bit integer SIMD constant vectors [PR113926]

author Naveen <naveen.siddegowda@oss.qualcomm.com>

Thu, 11 Jun 2026 13:26:23 +0000 (06:26 -0700)

committer Naveen <naveen.siddegowda@oss.qualcomm.com>

Thu, 11 Jun 2026 13:30:20 +0000 (06:30 -0700)
author Naveen <naveen.siddegowda@oss.qualcomm.com>
Thu, 11 Jun 2026 13:26:23 +0000 (06:26 -0700)
committer Naveen <naveen.siddegowda@oss.qualcomm.com>
Thu, 11 Jun 2026 13:30:20 +0000 (06:30 -0700)
diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc

index 983cef6d68114281e1d4388e4d6f3f1272c3920b..69530ea9d3477ef03e6483699744bcff1b3d9013 100644 (file)
--- a/gcc/config/aarch64/aarch64.cc
+++ b/gcc/config/aarch64/aarch64.cc
@@ -169,6 +169,9 @@ struct simd_immediate_info
    /* The mode of the elements.  */
    scalar_mode elt_mode;
  
+  /* If nonzero, the vector width to print the AdvSIMD immediate.  */
+  unsigned int width = 0;
+
    /* The instruction to use to move the immediate into a vector.  */
    insn_type insn;
  
@@ -203,7 +206,7 @@ struct simd_immediate_info
     ELT_MODE_IN and value VALUE_IN.  */
  inline simd_immediate_info
  ::simd_immediate_info (scalar_float_mode elt_mode_in, rtx value_in)
-  : elt_mode (elt_mode_in), insn (MOV)
+  : elt_mode (elt_mode_in), width (0), insn (MOV)
  {
    u.mov.value = value_in;
    u.mov.modifier = LSL;
@@ -218,7 +221,7 @@ inline simd_immediate_info
                        unsigned HOST_WIDE_INT value_in,
                        insn_type insn_in, modifier_type modifier_in,
                        unsigned int shift_in)
-  : elt_mode (elt_mode_in), insn (insn_in)
+  : elt_mode (elt_mode_in), width (0), insn (insn_in)
  {
    u.mov.value = gen_int_mode (value_in, elt_mode_in);
    u.mov.modifier = modifier_in;
@@ -229,7 +232,7 @@ inline simd_immediate_info
     and where element I is equal to BASE_IN + I * STEP_IN.  */
  inline simd_immediate_info
  ::simd_immediate_info (scalar_mode elt_mode_in, rtx base_in, rtx step_in)
-  : elt_mode (elt_mode_in), insn (INDEX)
+  : elt_mode (elt_mode_in), width (0), insn (INDEX)
  {
    u.index.base = base_in;
    u.index.step = step_in;
@@ -240,7 +243,7 @@ inline simd_immediate_info
  inline simd_immediate_info
  ::simd_immediate_info (scalar_int_mode elt_mode_in,
                        aarch64_svpattern pattern_in)
-  : elt_mode (elt_mode_in), insn (PTRUE)
+  : elt_mode (elt_mode_in), width (0), insn (PTRUE)
  {
    u.pattern = pattern_in;
  }
@@ -24635,11 +24638,32 @@ aarch64_simd_valid_imm (rtx op, simd_immediate_info *info,
         }
      }
  
-  /* The immediate must repeat every eight bytes.  */
+  /* The immediate must normally repeat every eight bytes.  For MOV
+     also allow a 128-bit AdvSIMD constant whose high 64 bits are zero
+     since it can be materialized using a 64-bit MOVI.  */
    unsigned int nbytes = bytes.length ();
-  for (unsigned i = 8; i < nbytes; ++i)
+  unsigned int output_width = 0;
+  bool repeats_every_8_bytes = true;
+
+  for (unsigned int i = 8; i < nbytes; ++i)
      if (bytes[i] != bytes[i - 8])
-      return false;
+      {
+       repeats_every_8_bytes = false;
+       break;
+      }
+
+  if (!repeats_every_8_bytes)
+    {
+      if (which != AARCH64_CHECK_MOV || !(vec_flags & VEC_ADVSIMD)
+         || aarch64_sve_mode_p (mode) || nbytes != 16)
+       return false;
+
+      for (unsigned int i = 8; i < nbytes; ++i)
+       if (bytes[i] != 0)
+         return false;
+
+      output_width = 64;
+    }
  
    /* Get the repeating 8-byte value as an integer.  No endian correction
       is needed here because bytes is already in lsb-first order.  */
@@ -24692,6 +24716,7 @@ aarch64_simd_valid_imm (rtx op, simd_immediate_info *info,
             {
               rtx float_val = const_double_from_real_value (r, fmode);
               *info = simd_immediate_info (fmode, float_val);
+             info->width = output_width;
             }
           return true;
         }
@@ -24701,7 +24726,11 @@ aarch64_simd_valid_imm (rtx op, simd_immediate_info *info,
      return aarch64_sve_valid_immediate (ival, imode, info, which);
  
    if (aarch64_advsimd_valid_immediate (val64, imode, info, which))
-    return true;
+    {
+      if (info)
+       info->width = output_width;
+      return true;
+    }
  
    if (TARGET_SVE)
      return aarch64_sve_valid_immediate (ival, imode, info, which);
@@ -27235,6 +27264,9 @@ aarch64_output_simd_imm (rtx const_vector, unsigned width,
    is_valid = aarch64_simd_valid_imm (const_vector, &info, which);
    gcc_assert (is_valid);
  
+  if (info.width != 0)
+    width = info.width;
+
    element_char = sizetochar (GET_MODE_BITSIZE (info.elt_mode));
    lane_count = width / GET_MODE_BITSIZE (info.elt_mode);
  
diff --git a/gcc/testsuite/gcc.target/aarch64/pr113926.c b/gcc/testsuite/gcc.target/aarch64/pr113926.c

new file mode 100644 (file)

index 0000000..d55ec77
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/pr113926.c
@@ -0,0 +1,34 @@
+/* { dg-do compile } */
+/* { dg-options "-O2" } */
+
+typedef signed char v16qi __attribute__((vector_size(16)));
+typedef short v8hi __attribute__((vector_size(16)));
+typedef int v4si __attribute__((vector_size(16)));
+
+v16qi
+f_qi (void)
+{
+  return (v16qi)
+    { 3, 3, 3, 3, 3, 3, 3, 3,
+      0, 0, 0, 0, 0, 0, 0, 0 };
+}
+
+v8hi
+f_hi (void)
+{
+  return (v8hi)
+    { 2, 2, 2, 2,
+      0, 0, 0, 0 };
+}
+
+v4si
+f_si (void)
+{
+  return (v4si)
+    { 1, 1, 0, 0 };
+}
+
+/* { dg-final { scan-assembler-times {\tmovi\tv[0-9]+\.8b, 0x3} 1 } } */
+/* { dg-final { scan-assembler-times {\tmovi\tv[0-9]+\.4h, 0x2} 1 } } */
+/* { dg-final { scan-assembler-times {\tmovi\tv[0-9]+\.2s, 0x1} 1 } } */
+/* { dg-final { scan-assembler-not {\tldr\tq[0-9]+,} } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/pr113926_1.c b/gcc/testsuite/gcc.target/aarch64/pr113926_1.c

new file mode 100644 (file)

index 0000000..8e76ca5
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/pr113926_1.c
@@ -0,0 +1,44 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -march=armv8.2-a+fp16" } */
+
+typedef float  v2sf  __attribute__((vector_size(8)));
+typedef float  v4sf  __attribute__((vector_size(16)));
+typedef double v2df  __attribute__((vector_size(16)));
+typedef __fp16 v4hf  __attribute__((vector_size(8)));
+typedef __fp16 v8hf  __attribute__((vector_size(16)));
+
+v2sf
+f_v2sf (void)
+{
+  return (v2sf){ 1.0f, 1.0f };
+}
+
+v4sf
+f_v4sf (void)
+{
+  return (v4sf){ 1.0f, 1.0f, 0.0f, 0.0f };
+}
+
+v2df
+f_v2df (void)
+{
+  return (v2df){ 1.0, 0.0 };
+}
+
+v4hf
+f_v4hf (void)
+{
+  return (v4hf){ (__fp16)1.0, (__fp16)1.0, (__fp16)1.0, (__fp16)1.0 };
+}
+
+v8hf
+f_v8hf (void)
+{
+  return (v8hf){ (__fp16)1.0, (__fp16)1.0, (__fp16)1.0, (__fp16)1.0, 0, 0, 0, 0 };
+}
+
+/* Each function should use fmov, not a literal pool load.  */
+/* { dg-final { scan-assembler-times {fmov[ \t]+v[0-9]+[.]2s,[ \t]+1[.]0e[+]0} 2 } } */
+/* { dg-final { scan-assembler-times {fmov[ \t]+v[0-9]+[.]4h,[ \t]+1[.]0e[+]0} 2 } } */
+/* { dg-final { scan-assembler-times {\tfmov\td[0-9]+,} 1 } } */
+/* { dg-final { scan-assembler-not  {\tldr\t[sdq][0-9]+, =} } } */
author	Naveen <naveen.siddegowda@oss.qualcomm.com>
	Thu, 11 Jun 2026 13:26:23 +0000 (06:26 -0700)
committer	Naveen <naveen.siddegowda@oss.qualcomm.com>
	Thu, 11 Jun 2026 13:30:20 +0000 (06:30 -0700)
gcc/config/aarch64/aarch64.cc		patch \| blob \| blame \| history
gcc/testsuite/gcc.target/aarch64/pr113926.c	[new file with mode: 0644]	patch \| blob
gcc/testsuite/gcc.target/aarch64/pr113926_1.c	[new file with mode: 0644]	patch \| blob