aarch64: PR124908 Fix ICE in svld1rq fold with -msve-vector-bits=128

author Kyrylo Tkachov <ktkachov@nvidia.com>

Mon, 20 Apr 2026 07:56:01 +0000 (00:56 -0700)

committer Kyrylo Tkachov <ktkachov@nvidia.com>

Wed, 22 Apr 2026 13:50:50 +0000 (15:50 +0200)
author Kyrylo Tkachov <ktkachov@nvidia.com>
Mon, 20 Apr 2026 07:56:01 +0000 (00:56 -0700)
committer Kyrylo Tkachov <ktkachov@nvidia.com>
Wed, 22 Apr 2026 13:50:50 +0000 (15:50 +0200)
diff --git a/gcc/config/aarch64/aarch64-sve-builtins-base.cc b/gcc/config/aarch64/aarch64-sve-builtins-base.cc

index 918642a45c192692610559475e607f60881ce2c7..9661a031fa0106b5fbad47055e4db6da9547ff63 100644 (file)
--- a/gcc/config/aarch64/aarch64-sve-builtins-base.cc
+++ b/gcc/config/aarch64/aarch64-sve-builtins-base.cc
@@ -1854,6 +1854,20 @@ public:
         gimple_seq_add_stmt_without_update (&stmts, mem_ref_stmt);
  
         int source_nelts = TYPE_VECTOR_SUBPARTS (access_type).to_constant ();
+
+       /* When the SVE vector has the same number of elements as the
+          128-bit quadword (i.e. VL == 128), the load fills the entire
+          register and no replication is needed.  Just convert the
+          loaded value from the Advanced SIMD type to the SVE type.  */
+       if (known_eq (lhs_len, (unsigned int) source_nelts))
+         {
+           gimple *g
+             = gimple_build_assign (lhs, build1 (VIEW_CONVERT_EXPR,
+                                                 lhs_type, mem_ref_lhs));
+           gimple_seq_add_stmt_without_update (&stmts, g);
+           gsi_replace_with_seq_vops (f.gsi, stmts);
+           return g;
+         }
         vec_perm_builder sel (lhs_len, source_nelts, 1);
         for (int i = 0; i < source_nelts; i++)
           sel.quick_push (i);
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general/ld1rq_2.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general/ld1rq_2.c

new file mode 100644 (file)

index 0000000..84bf773
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general/ld1rq_2.c
@@ -0,0 +1,37 @@
+/* PR target/124908 */
+/* { dg-options "-O2 -msve-vector-bits=128" } */
+
+#include <arm_sve.h>
+
+/* Verify that folding svld1rq does not ICE with -msve-vector-bits=128.  */
+
+svuint8_t
+f_u8 (const uint8_t *p)
+{
+  return svld1rq_u8 (svptrue_b8 (), p);
+}
+
+svint8_t
+f_s8 (const int8_t *p)
+{
+  return svld1rq_s8 (svptrue_b8 (), p);
+}
+
+svuint16_t
+f_u16 (const uint16_t *p)
+{
+  return svld1rq_u16 (svptrue_b16 (), p);
+}
+
+svuint32_t
+f_u32 (const uint32_t *p)
+{
+  return svld1rq_u32 (svptrue_b32 (), p);
+}
+
+svfloat64_t
+f_f64 (const float64_t *p)
+{
+  return svld1rq_f64 (svptrue_b64 (), p);
+}
+
author	Kyrylo Tkachov <ktkachov@nvidia.com>
	Mon, 20 Apr 2026 07:56:01 +0000 (00:56 -0700)
committer	Kyrylo Tkachov <ktkachov@nvidia.com>
	Wed, 22 Apr 2026 13:50:50 +0000 (15:50 +0200)
gcc/config/aarch64/aarch64-sve-builtins-base.cc		patch \| blob \| blame \| history
gcc/testsuite/gcc.target/aarch64/sve/acle/general/ld1rq_2.c	[new file with mode: 0644]	patch \| blob