AArch64: fix the SVE->SIMD lowering optimization [PR125148]

author Tamar Christina <tamar.christina@arm.com>

Wed, 27 May 2026 09:50:05 +0000 (10:50 +0100)

committer Tamar Christina <tamar.christina@arm.com>

Wed, 27 May 2026 09:50:05 +0000 (10:50 +0100)
author Tamar Christina <tamar.christina@arm.com>
Wed, 27 May 2026 09:50:05 +0000 (10:50 +0100)
committer Tamar Christina <tamar.christina@arm.com>
Wed, 27 May 2026 09:50:05 +0000 (10:50 +0100)
diff --git a/gcc/config/aarch64/aarch64-sve.md b/gcc/config/aarch64/aarch64-sve.md

index 019630eb8d21941b0ca718838ae6ef23e8aaedab..e7d98c3754f157efc7711395364e0533775eebdc 100644 (file)
--- a/gcc/config/aarch64/aarch64-sve.md
+++ b/gcc/config/aarch64/aarch64-sve.md
@@ -3004,6 +3004,51 @@
    [(set_attr "sve_type" "sve_int_general")]
  )
  
+;; Shift an SVE vector left and insert a scalar into element 0 of a zero
+;; register.
+(define_insn "*aarch64_vec_shl_insert_into_zero_<mode>"
+  [(set (match_operand:SVE_FULL_HSD 0 "register_operand")
+       (unspec:SVE_FULL_HSD
+         [(match_operand:SVE_FULL_HSD 1 "aarch64_simd_imm_zero")
+          (match_operand:<VEL> 2 "register_operand")]
+         UNSPEC_INSR))]
+  "TARGET_SVE"
+  {@ [ cons: =0 , 1  , 2 ]
+     [ w        , Dz , w ] fmov\t%<Vetype>0, %<Vetype>2
+  }
+  [(set_attr "type" "neon_move")]
+)
+
+;; Shift an SVE vector left and insert a scalar into element 0 of a zero
+;; register for bytes.
+(define_insn "*aarch64_vec_shl_insert_into_zero_vnx16qi"
+  [(set (match_operand:VNx16QI 0 "register_operand")
+       (unspec:VNx16QI
+         [(match_operand:VNx16QI 1 "aarch64_simd_imm_zero")
+          (match_operand:QI 2 "register_operand")]
+         UNSPEC_INSR))]
+  "TARGET_SVE"
+  {@ [ cons: =0 , 1  , 2 ; attrs: length ]
+     [ w        , Dz , w ; 8              ] fmov\t%h0, %h2\;and\t%0.h, %0.h, #0xff
+  }
+  [(set_attr "type" "neon_move")]
+)
+
+;; Shift an SVE vector left and insert a scalar into element 0 from a memory
+;; load.
+(define_insn "*aarch64_vec_shl_insert_from_load_<mode>"
+  [(set (match_operand:SVE_FULL 0 "register_operand")
+       (unspec:SVE_FULL
+         [(match_operand:SVE_FULL 1 "aarch64_simd_imm_zero")
+          (match_operand:<VEL> 2 "memory_operand")]
+         UNSPEC_INSR))]
+  "TARGET_SVE"
+  {@ [ cons: =0 , 1  , 2 ]
+     [ w        , Dz , m ] ldr\t%<Vetype>0, %2
+  }
+  [(set_attr "type" "neon_move")]
+)
+
  ;; -------------------------------------------------------------------------
  ;; ---- [INT] Linear series
  ;; -------------------------------------------------------------------------
diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc

index 7b1dcbdbcfa20ea00d0d374050a3e60500f60301..abd6eb5fd1ee84ec9c4b1f0edaabb0015186adef 100644 (file)
--- a/gcc/config/aarch64/aarch64.cc
+++ b/gcc/config/aarch64/aarch64.cc
@@ -6887,29 +6887,6 @@ aarch64_stack_protect_canary_mem (machine_mode mode, rtx decl_rtl,
    return gen_rtx_MEM (mode, force_reg (Pmode, addr));
  }
  
-/* Emit a load/store from a subreg of SRC to a subreg of DEST.
-   The subregs have mode NEW_MODE. Use only for reg<->mem moves.  */
-void
-aarch64_emit_load_store_through_mode (rtx dest, rtx src, machine_mode new_mode)
-{
-  gcc_assert ((MEM_P (dest) && register_operand (src, VOIDmode))
-             || (MEM_P (src) && register_operand (dest, VOIDmode)));
-  auto mode = GET_MODE (dest);
-  auto int_mode = aarch64_sve_int_mode (mode);
-  if (MEM_P (src))
-    {
-      rtx tmp = force_reg (new_mode, adjust_address (src, new_mode, 0));
-      tmp = force_lowpart_subreg (int_mode, tmp, new_mode);
-      emit_move_insn (dest, force_lowpart_subreg (mode, tmp, int_mode));
-    }
-  else
-    {
-      src = force_lowpart_subreg (int_mode, src, mode);
-      emit_move_insn (adjust_address (dest, new_mode, 0),
-                     force_lowpart_subreg (new_mode, src, int_mode));
-    }
-}
-
  /* PRED is a predicate that is known to contain PTRUE.
     For 128-bit VLS loads/stores, emit LDR/STR.
     Else, emit an SVE predicated move from SRC to DEST.  */
@@ -26312,6 +26289,47 @@ aarch64_sve_expand_vector_init_subvector (rtx target, rtx vals)
    return;
  }
  
+/* Emit a load/store from a subreg of SRC to a subreg of DEST.
+   The subregs have mode NEW_MODE. Use only for reg<->mem moves.  */
+void
+aarch64_emit_load_store_through_mode (rtx dest, rtx src, machine_mode new_mode)
+{
+  gcc_assert ((MEM_P (dest) && register_operand (src, VOIDmode))
+             || (MEM_P (src) && register_operand (dest, VOIDmode)));
+  auto mode = GET_MODE (dest);
+  auto int_mode = aarch64_sve_int_mode (mode);
+  rtx tmp_reg;
+  if (MEM_P (src))
+    {
+      rtx tmp = force_reg (new_mode, adjust_address (src, new_mode, 0));
+      if (!VECTOR_MODE_P (new_mode))
+       {
+         machine_mode full_mode = int_mode;
+         auto vmode = aarch64_classify_vector_mode (int_mode);
+         /* Partial vectors have to go through a full mode insert since we
+            don't support inserting an partial vectors.  */
+         if (GET_MODE_INNER (int_mode) != new_mode || (vmode & VEC_PARTIAL))
+           full_mode
+             = aarch64_full_sve_mode (as_a <scalar_mode> (new_mode)).require ();
+
+         /* Create an SVE register with the top bits explicitly zero'd.  */
+         tmp_reg = force_reg (full_mode, CONST0_RTX (full_mode));
+         emit_insr (tmp_reg, tmp);
+         if (full_mode != int_mode)
+           tmp_reg = force_lowpart_subreg (int_mode, tmp_reg, full_mode);
+       }
+      else
+       tmp_reg = force_lowpart_subreg (int_mode, tmp, new_mode);
+      emit_move_insn (dest, force_lowpart_subreg (mode, tmp_reg, int_mode));
+    }
+  else
+    {
+      src = force_lowpart_subreg (int_mode, src, mode);
+      emit_move_insn (adjust_address (dest, new_mode, 0),
+                     force_lowpart_subreg (new_mode, src, int_mode));
+    }
+}
+
  /* Check whether VALUE is a vector constant in which every element
     is either a power of 2 or a negated power of 2.  If so, return
     a constant vector of log2s, and flip CODE between PLUS and MINUS
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/highway_run.c b/gcc/testsuite/gcc.target/aarch64/sve/highway_run.c

new file mode 100644 (file)

index 0000000..b73fd51
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve/highway_run.c
@@ -0,0 +1,55 @@
+/* { dg-do run { target aarch64_sve_hw } } */
+/* { dg-require-effective-target lp64 } */
+/* { dg-options "-O2" } */
+
+#include <arm_sve.h>
+
+extern void abort (void) __attribute__ ((noreturn));
+
+volatile int cond = 1;
+
+int __attribute__ ((noipa))
+a (void)
+{
+  return cond;
+}
+
+#define TEST_LOAD(TYPE, NAME, BITS)                                    \
+  int __attribute__ ((noipa))                                          \
+  test_##NAME (void)                                                   \
+  {                                                                    \
+    TYPE *g = __builtin_malloc (sizeof (TYPE));                                \
+    int c = 0;                                                         \
+    if (!g)                                                            \
+      abort ();                                                                \
+    g[0] = 0;                                                          \
+    if (a ())                                                          \
+      {                                                                        \
+       g[0] = 1;                                                       \
+       c = 2;                                                          \
+      }                                                                        \
+    svint##BITS##_t d                                          \
+      = svld1_s##BITS (svptrue_pat_b##BITS (SV_VL1), g);               \
+    svbool_t e = svcmpgt_s##BITS (svptrue_b##BITS (), d,               \
+                                 svdup_n_s##BITS (0));         \
+    int f = svptest_any (svptrue_pat_b##BITS (SV_VL1), e);             \
+    if (f && c != 2)                                                   \
+      abort ();                                                                \
+    return f;                                                          \
+  }
+
+TEST_LOAD (signed char, byte, 8)
+TEST_LOAD (short, short, 16)
+TEST_LOAD (int, int, 32)
+TEST_LOAD (long, long, 64)
+
+int
+main (void)
+{
+  if (!test_byte ()
+      || !test_short ()
+      || !test_int ()
+      || !test_long ())
+    abort ();
+  return 0;
+}
author	Tamar Christina <tamar.christina@arm.com>
	Wed, 27 May 2026 09:50:05 +0000 (10:50 +0100)
committer	Tamar Christina <tamar.christina@arm.com>
	Wed, 27 May 2026 09:50:05 +0000 (10:50 +0100)
gcc/config/aarch64/aarch64-sve.md		patch \| blob \| blame \| history
gcc/config/aarch64/aarch64.cc		patch \| blob \| blame \| history
gcc/testsuite/gcc.target/aarch64/sve/highway_run.c	[new file with mode: 0644]	patch \| blob