aarch64: Add vec_set/extract for tuple modes [PR113027]

author Richard Sandiford <richard.sandiford@arm.com>

Tue, 17 Jun 2025 10:43:51 +0000 (11:43 +0100)

committer Richard Sandiford <richard.sandiford@arm.com>

Tue, 17 Jun 2025 10:43:51 +0000 (11:43 +0100)
author Richard Sandiford <richard.sandiford@arm.com>
Tue, 17 Jun 2025 10:43:51 +0000 (11:43 +0100)
committer Richard Sandiford <richard.sandiford@arm.com>
Tue, 17 Jun 2025 10:43:51 +0000 (11:43 +0100)
diff --git a/gcc/config/aarch64/aarch64-protos.h b/gcc/config/aarch64/aarch64-protos.h

index 40088db2b4230602c2d578856372985e765008ec..31f2f5b8bd2e342b9ae5a37aa907108cf0bfadc3 100644 (file)
--- a/gcc/config/aarch64/aarch64-protos.h
+++ b/gcc/config/aarch64/aarch64-protos.h
@@ -1039,6 +1039,7 @@ bool aarch64_maybe_expand_sve_subreg_move (rtx, rtx);
  rtx aarch64_replace_reg_mode (rtx, machine_mode);
  void aarch64_split_sve_subreg_move (rtx, rtx, rtx);
  void aarch64_expand_prologue (void);
+void aarch64_decompose_vec_struct_index (machine_mode, rtx *, rtx *, bool);
  void aarch64_expand_vector_init (rtx, rtx);
  void aarch64_sve_expand_vector_init_subvector (rtx, rtx);
  void aarch64_sve_expand_vector_init (rtx, rtx);
diff --git a/gcc/config/aarch64/aarch64-simd.md b/gcc/config/aarch64/aarch64-simd.md

index 6e30dc48934c3094afaf70d99d0fc0f4fb771f8d..e771defc73ff6f272d4f4cbe763998d5d2ac7068 100644 (file)
--- a/gcc/config/aarch64/aarch64-simd.md
+++ b/gcc/config/aarch64/aarch64-simd.md
@@ -1628,6 +1628,24 @@
    }
  )
  
+(define_expand "vec_set<mode>"
+  [(match_operand:VSTRUCT_QD 0 "register_operand")
+   (match_operand:<VEL> 1 "aarch64_simd_nonimmediate_operand")
+   (match_operand:SI 2 "immediate_operand")]
+  "TARGET_SIMD"
+{
+  aarch64_decompose_vec_struct_index (<VSTRUCT_ELT>mode, &operands[0],
+                                     &operands[2], true);
+  /* For tuples of 64-bit modes, <vstruct_elt> is the 64-bit scalar mode.
+     Allow gen_vec_set<vstruct_elt> to cope with those cases too.  */
+  auto gen_vec_setdi ATTRIBUTE_UNUSED = [](rtx x0, rtx x1, rtx)
+    {
+      return gen_move_insn (x0, x1);
+    };
+  auto gen_vec_setdf ATTRIBUTE_UNUSED = gen_vec_setdi;
+  emit_insn (gen_vec_set<vstruct_elt> (operands[0], operands[1], operands[2]));
+  DONE;
+})
  
  (define_insn "aarch64_mla<mode><vczle><vczbe>"
   [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
@@ -8883,6 +8901,26 @@
      DONE;
  })
  
+(define_expand "vec_extract<mode><Vel>"
+  [(match_operand:<VEL> 0 "aarch64_simd_nonimmediate_operand")
+   (match_operand:VSTRUCT_QD 1 "register_operand")
+   (match_operand:SI 2 "immediate_operand")]
+  "TARGET_SIMD"
+{
+  aarch64_decompose_vec_struct_index (<VSTRUCT_ELT>mode, &operands[1],
+                                     &operands[2], false);
+  /* For tuples of 64-bit modes, <vstruct_elt> is the 64-bit scalar mode.
+     Allow gen_vec_extract<vstruct_elt><Vel> to cope with those cases too.  */
+  auto gen_vec_extractdidi ATTRIBUTE_UNUSED = [](rtx x0, rtx x1, rtx)
+    {
+      return gen_move_insn (x0, x1);
+    };
+  auto gen_vec_extractdfdf ATTRIBUTE_UNUSED = gen_vec_extractdidi;
+  emit_insn (gen_vec_extract<vstruct_elt><Vel> (operands[0], operands[1],
+                                               operands[2]));
+  DONE;
+})
+
  ;; Extract a 64-bit vector from one half of a 128-bit vector.
  (define_expand "vec_extract<mode><Vhalf>"
    [(match_operand:<VHALF> 0 "register_operand")
diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc

index 5540946eac718c27c4f669a3f636bcd10b301b9f..af8415c29a97054da2d4169aea7a2ae4c4b1848f 100644 (file)
--- a/gcc/config/aarch64/aarch64.cc
+++ b/gcc/config/aarch64/aarch64.cc
@@ -24721,6 +24721,28 @@ seq_cost_ignoring_scalar_moves (const rtx_insn *seq, bool speed)
    return cost;
  }
  
+/* *VECTOR is an Advanced SIMD structure mode and *INDEX is a constant index
+   into it.  Narrow *VECTOR and *INDEX so that they reference a single vector
+   of mode SUBVEC_MODE.  IS_DEST is true if *VECTOR is a destination operand,
+   false if it is a source operand.  */
+
+void
+aarch64_decompose_vec_struct_index (machine_mode subvec_mode,
+                                   rtx *vector, rtx *index, bool is_dest)
+{
+  auto elts_per_vector = GET_MODE_NUNITS (subvec_mode).to_constant ();
+  auto subvec = UINTVAL (*index) / elts_per_vector;
+  auto subelt = UINTVAL (*index) % elts_per_vector;
+  auto subvec_byte = subvec * GET_MODE_SIZE (subvec_mode);
+  if (is_dest)
+    *vector = simplify_gen_subreg (subvec_mode, *vector, GET_MODE (*vector),
+                                  subvec_byte);
+  else
+    *vector = force_subreg (subvec_mode, *vector, GET_MODE (*vector),
+                           subvec_byte);
+  *index = gen_int_mode (subelt, SImode);
+}
+
  /* Expand a vector initialization sequence, such that TARGET is
     initialized to contain VALS.  */
  
diff --git a/gcc/config/aarch64/iterators.md b/gcc/config/aarch64/iterators.md

index 2700392db5fae2b0b269f017a3574366b6549838..a8957681357360f807ee5e6453a533084f9510da 100644 (file)
--- a/gcc/config/aarch64/iterators.md
+++ b/gcc/config/aarch64/iterators.md
@@ -1705,6 +1705,30 @@
                        (SI   "SI") (HI    "HI")
                        (QI   "QI")
                        (V4BF "BF") (V8BF "BF")
+                      (V2x8QI "QI") (V2x4HI "HI")
+                      (V2x2SI "SI") (V2x1DI "DI")
+                      (V2x4HF "HF") (V2x2SF "SF")
+                      (V2x1DF "DF") (V2x4BF "BF")
+                      (V3x8QI "QI") (V3x4HI "HI")
+                      (V3x2SI "SI") (V3x1DI "DI")
+                      (V3x4HF "HF") (V3x2SF "SF")
+                      (V3x1DF "DF") (V3x4BF "BF")
+                      (V4x8QI "QI") (V4x4HI "HI")
+                      (V4x2SI "SI") (V4x1DI "DI")
+                      (V4x4HF "HF") (V4x2SF "SF")
+                      (V4x1DF "DF") (V4x4BF "BF")
+                      (V2x16QI "QI") (V2x8HI "HI")
+                      (V2x4SI "SI") (V2x2DI "DI")
+                      (V2x8HF "HF") (V2x4SF "SF")
+                      (V2x2DF "DF") (V2x8BF "BF")
+                      (V3x16QI "QI") (V3x8HI "HI")
+                      (V3x4SI "SI") (V3x2DI "DI")
+                      (V3x8HF "HF") (V3x4SF "SF")
+                      (V3x2DF "DF") (V3x8BF "BF")
+                      (V4x16QI "QI") (V4x8HI "HI")
+                      (V4x4SI "SI") (V4x2DI "DI")
+                      (V4x8HF "HF") (V4x4SF "SF")
+                      (V4x2DF "DF") (V4x8BF "BF")
                        (VNx16QI "QI") (VNx8QI "QI") (VNx4QI "QI") (VNx2QI "QI")
                        (VNx8HI "HI") (VNx4HI "HI") (VNx2HI "HI")
                        (VNx8HF "HF") (VNx4HF "HF") (VNx2HF "HF")
@@ -1726,6 +1750,30 @@
                        (DF   "df") (SI   "si")
                        (HI   "hi") (QI   "qi")
                        (V4BF "bf") (V8BF "bf")
+                      (V2x8QI "qi") (V2x4HI "hi")
+                      (V2x2SI "si") (V2x1DI "di")
+                      (V2x4HF "hf") (V2x2SF "sf")
+                      (V2x1DF "df") (V2x4BF "bf")
+                      (V3x8QI "qi") (V3x4HI "hi")
+                      (V3x2SI "si") (V3x1DI "di")
+                      (V3x4HF "hf") (V3x2SF "sf")
+                      (V3x1DF "df") (V3x4BF "bf")
+                      (V4x8QI "qi") (V4x4HI "hi")
+                      (V4x2SI "si") (V4x1DI "di")
+                      (V4x4HF "hf") (V4x2SF "sf")
+                      (V4x1DF "df") (V4x4BF "bf")
+                      (V2x16QI "qi") (V2x8HI "hi")
+                      (V2x4SI "si") (V2x2DI "di")
+                      (V2x8HF "hf") (V2x4SF "sf")
+                      (V2x2DF "df") (V2x8BF "bf")
+                      (V3x16QI "qi") (V3x8HI "hi")
+                      (V3x4SI "si") (V3x2DI "di")
+                      (V3x8HF "hf") (V3x4SF "sf")
+                      (V3x2DF "df") (V3x8BF "bf")
+                      (V4x16QI "qi") (V4x8HI "hi")
+                      (V4x4SI "si") (V4x2DI "di")
+                      (V4x8HF "hf") (V4x4SF "sf")
+                      (V4x2DF "df") (V4x8BF "bf")
                        (VNx16QI "qi") (VNx8QI "qi") (VNx4QI "qi") (VNx2QI "qi")
                        (VNx8HI "hi") (VNx4HI "hi") (VNx2HI "hi")
                        (VNx8HF "hf") (VNx4HF "hf") (VNx2HF "hf")
diff --git a/gcc/testsuite/gcc.target/aarch64/pr113027-1.c b/gcc/testsuite/gcc.target/aarch64/pr113027-1.c

new file mode 100644 (file)

index 0000000..6d9a51f
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/pr113027-1.c
@@ -0,0 +1,27 @@
+/* { dg-options "-O2" } */
+
+#include <arm_neon.h>
+
+float64x2x2_t
+f1 (float64x2x2_t x)
+{
+  x.val[0][1] += 1.0;
+  return x;
+}
+
+float64x2x3_t
+f2 (float64x2x3_t x)
+{
+  x.val[0][0] = x.val[1][1] + x.val[2][0];
+  return x;
+}
+
+float64x2x4_t
+f3 (float64x2x4_t x)
+{
+  x.val[0][0] = x.val[1][1] + x.val[2][0] - x.val[3][1];
+  return x;
+}
+
+/* { dg-final { scan-assembler-not {\tmov\t} } } */
+/* { dg-final { scan-assembler-not {\[sp,} } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/pr113027-2.c b/gcc/testsuite/gcc.target/aarch64/pr113027-2.c

new file mode 100644 (file)

index 0000000..ec756ec
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/pr113027-2.c
@@ -0,0 +1,268 @@
+/* { dg-options "-O2" } */
+/* { dg-final { check-function-bodies "**" "" "" { target aarch64_little_endian } } } */
+
+#include <arm_neon.h>
+
+#define TEST(TYPE, A, B, C, D)                 \
+  TYPE                                         \
+  test_##TYPE (TYPE a)                         \
+  {                                            \
+    a.val[A][B] = a.val[C][D];                 \
+    return a;                                  \
+  }
+
+/*
+** test_bfloat16x4x2_t:
+**     ins     v1\.h\[3\], v0\.h\[2\]
+**     ret
+*/
+TEST (bfloat16x4x2_t, 1, 3, 0, 2)
+
+/*
+** test_float16x4x2_t:
+**     ins     v1\.h\[1\], v0\.h\[3\]
+**     ret
+*/
+TEST (float16x4x2_t, 1, 1, 0, 3)
+
+/*
+** test_float32x2x2_t:
+**     ins     v1\.s\[0\], v0\.s\[1\]
+**     ret
+*/
+TEST (float32x2x2_t, 1, 0, 0, 1)
+
+/*
+** test_float64x1x2_t:
+**     fmov    d1, d0
+**     ret
+*/
+TEST (float64x1x2_t, 1, 0, 0, 0)
+
+/*
+** test_int8x8x2_t:
+**     ins     v0\.b\[5\], v1\.b\[7\]
+**     ret
+*/
+TEST (int8x8x2_t, 0, 5, 1, 7)
+
+/*
+** test_int16x4x2_t:
+**     ins     v0\.h\[2\], v1\.h\[2\]
+**     ret
+*/
+TEST (int16x4x2_t, 0, 2, 1, 2)
+
+/*
+** test_int32x2x2_t:
+**     ins     v0\.s\[0\], v1\.s\[1\]
+**     ret
+*/
+TEST (int32x2x2_t, 0, 0, 1, 1)
+
+/*
+** test_int64x1x2_t:
+**     fmov    d0, d1
+**     ret
+*/
+TEST (int64x1x2_t, 0, 0, 1, 0)
+
+/*
+** test_uint8x8x2_t:
+**     ins     v1\.b\[6\], v0\.b\[3\]
+**     ret
+*/
+TEST (uint8x8x2_t, 1, 6, 0, 3)
+
+/*
+** test_uint16x4x2_t:
+**     ins     v1\.h\[2\], v1\.h\[0\]
+**     ret
+*/
+TEST (uint16x4x2_t, 1, 2, 1, 0)
+
+/*
+** test_uint32x2x2_t:
+**     ins     v1\.s\[0\], v1\.s\[1\]
+**     ret
+*/
+TEST (uint32x2x2_t, 1, 0, 1, 1)
+
+/*
+** test_uint64x1x2_t:
+**     fmov    d1, d0
+**     ret
+*/
+TEST (uint64x1x2_t, 1, 0, 0, 0)
+
+//--------------------------------------------------------------
+
+/*
+** test_bfloat16x4x3_t:
+**     ins     v2\.h\[3\], v0\.h\[2\]
+**     ret
+*/
+TEST (bfloat16x4x3_t, 2, 3, 0, 2)
+
+/*
+** test_float16x4x3_t:
+**     ins     v0\.h\[1\], v1\.h\[3\]
+**     ret
+*/
+TEST (float16x4x3_t, 0, 1, 1, 3)
+
+/*
+** test_float32x2x3_t:
+**     ins     v1\.s\[0\], v2\.s\[1\]
+**     ret
+*/
+TEST (float32x2x3_t, 1, 0, 2, 1)
+
+/*
+** test_float64x1x3_t:
+**     fmov    d1, d2
+**     ret
+*/
+TEST (float64x1x3_t, 1, 0, 2, 0)
+
+/*
+** test_int8x8x3_t:
+**     ins     v0\.b\[5\], v2\.b\[6\]
+**     ret
+*/
+TEST (int8x8x3_t, 0, 5, 2, 6)
+
+/*
+** test_int16x4x3_t:
+**     ins     v2\.h\[2\], v1\.h\[1\]
+**     ret
+*/
+TEST (int16x4x3_t, 2, 2, 1, 1)
+
+/*
+** test_int32x2x3_t:
+**     ins     v1\.s\[0\], v1\.s\[1\]
+**     ret
+*/
+TEST (int32x2x3_t, 1, 0, 1, 1)
+
+/*
+** test_int64x1x3_t:
+**     fmov    d2, d1
+**     ret
+*/
+TEST (int64x1x3_t, 2, 0, 1, 0)
+
+/*
+** test_uint8x8x3_t:
+**     ins     v1\.b\[6\], v2\.b\[7\]
+**     ret
+*/
+TEST (uint8x8x3_t, 1, 6, 2, 7)
+
+/*
+** test_uint16x4x3_t:
+**     ins     v2\.h\[2\], v1\.h\[3\]
+**     ret
+*/
+TEST (uint16x4x3_t, 2, 2, 1, 3)
+
+/*
+** test_uint32x2x3_t:
+**     ins     v2\.s\[0\], v0\.s\[1\]
+**     ret
+*/
+TEST (uint32x2x3_t, 2, 0, 0, 1)
+
+/*
+** test_uint64x1x3_t:
+**     fmov    d1, d2
+**     ret
+*/
+TEST (uint64x1x3_t, 1, 0, 2, 0)
+
+//--------------------------------------------------------------
+
+/*
+** test_bfloat16x4x4_t:
+**     ins     v2\.h\[3\], v3\.h\[2\]
+**     ret
+*/
+TEST (bfloat16x4x4_t, 2, 3, 3, 2)
+
+/*
+** test_float16x4x4_t:
+**     ins     v0\.h\[2\], v3\.h\[1\]
+**     ret
+*/
+TEST (float16x4x4_t, 0, 2, 3, 1)
+
+/*
+** test_float32x2x4_t:
+**     ins     v3\.s\[0\], v2\.s\[1\]
+**     ret
+*/
+TEST (float32x2x4_t, 3, 0, 2, 1)
+
+/*
+** test_float64x1x4_t:
+**     fmov    d1, d3
+**     ret
+*/
+TEST (float64x1x4_t, 1, 0, 3, 0)
+
+/*
+** test_int8x8x4_t:
+**     ins     v0\.b\[4\], v3\.b\[7\]
+**     ret
+*/
+TEST (int8x8x4_t, 0, 4, 3, 7)
+
+/*
+** test_int16x4x4_t:
+**     ins     v3\.h\[3\], v1\.h\[1\]
+**     ret
+*/
+TEST (int16x4x4_t, 3, 3, 1, 1)
+
+/*
+** test_int32x2x4_t:
+**     ins     v1\.s\[0\], v3\.s\[1\]
+**     ret
+*/
+TEST (int32x2x4_t, 1, 0, 3, 1)
+
+/*
+** test_int64x1x4_t:
+**     fmov    d3, d1
+**     ret
+*/
+TEST (int64x1x4_t, 3, 0, 1, 0)
+
+/*
+** test_uint8x8x4_t:
+**     ins     v3\.b\[6\], v2\.b\[4\]
+**     ret
+*/
+TEST (uint8x8x4_t, 3, 6, 2, 4)
+
+/*
+** test_uint16x4x4_t:
+**     ins     v3\.h\[1\], v1\.h\[3\]
+**     ret
+*/
+TEST (uint16x4x4_t, 3, 1, 1, 3)
+
+/*
+** test_uint32x2x4_t:
+**     ins     v0\.s\[0\], v3\.s\[1\]
+**     ret
+*/
+TEST (uint32x2x4_t, 0, 0, 3, 1)
+
+/*
+** test_uint64x1x4_t:
+**     fmov    d1, d3
+**     ret
+*/
+TEST (uint64x1x4_t, 1, 0, 3, 0)
diff --git a/gcc/testsuite/gcc.target/aarch64/pr113027-3.c b/gcc/testsuite/gcc.target/aarch64/pr113027-3.c

new file mode 100644 (file)

index 0000000..561e672
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/pr113027-3.c
@@ -0,0 +1,268 @@
+/* { dg-options "-O2" } */
+/* { dg-final { check-function-bodies "**" "" "" { target aarch64_little_endian } } } */
+
+#include <arm_neon.h>
+
+#define TEST(TYPE, A, B, C, D)                 \
+  TYPE                                         \
+  test_##TYPE (TYPE a)                         \
+  {                                            \
+    a.val[A][B] = a.val[C][D];                 \
+    return a;                                  \
+  }
+
+/*
+** test_bfloat16x8x2_t:
+**     ins     v1\.h\[6\], v0\.h\[5\]
+**     ret
+*/
+TEST (bfloat16x8x2_t, 1, 6, 0, 5)
+
+/*
+** test_float16x8x2_t:
+**     ins     v1\.h\[2\], v0\.h\[7\]
+**     ret
+*/
+TEST (float16x8x2_t, 1, 2, 0, 7)
+
+/*
+** test_float32x4x2_t:
+**     ins     v1\.s\[3\], v0\.s\[1\]
+**     ret
+*/
+TEST (float32x4x2_t, 1, 3, 0, 1)
+
+/*
+** test_float64x2x2_t:
+**     ins     v1\.d\[0\], v0\.d\[0\]
+**     ret
+*/
+TEST (float64x2x2_t, 1, 0, 0, 0)
+
+/*
+** test_int8x16x2_t:
+**     ins     v0\.b\[15\], v1\.b\[13\]
+**     ret
+*/
+TEST (int8x16x2_t, 0, 15, 1, 13)
+
+/*
+** test_int16x8x2_t:
+**     ins     v0\.h\[2\], v1\.h\[7\]
+**     ret
+*/
+TEST (int16x8x2_t, 0, 2, 1, 7)
+
+/*
+** test_int32x4x2_t:
+**     ins     v0\.s\[3\], v1\.s\[1\]
+**     ret
+*/
+TEST (int32x4x2_t, 0, 3, 1, 1)
+
+/*
+** test_int64x2x2_t:
+**     ins     v0\.d\[0\], v1\.d\[1\]
+**     ret
+*/
+TEST (int64x2x2_t, 0, 0, 1, 1)
+
+/*
+** test_uint8x16x2_t:
+**     ins     v1\.b\[13\], v0\.b\[11\]
+**     ret
+*/
+TEST (uint8x16x2_t, 1, 13, 0, 11)
+
+/*
+** test_uint16x8x2_t:
+**     ins     v1\.h\[6\], v1\.h\[3\]
+**     ret
+*/
+TEST (uint16x8x2_t, 1, 6, 1, 3)
+
+/*
+** test_uint32x4x2_t:
+**     ins     v1\.s\[3\], v1\.s\[1\]
+**     ret
+*/
+TEST (uint32x4x2_t, 1, 3, 1, 1)
+
+/*
+** test_uint64x2x2_t:
+**     ins     v1\.d\[0\], v1\.d\[1\]
+**     ret
+*/
+TEST (uint64x2x2_t, 1, 0, 1, 1)
+
+//--------------------------------------------------------------
+
+/*
+** test_bfloat16x8x3_t:
+**     ins     v2\.h\[3\], v0\.h\[7\]
+**     ret
+*/
+TEST (bfloat16x8x3_t, 2, 3, 0, 7)
+
+/*
+** test_float16x8x3_t:
+**     ins     v0\.h\[4\], v1\.h\[6\]
+**     ret
+*/
+TEST (float16x8x3_t, 0, 4, 1, 6)
+
+/*
+** test_float32x4x3_t:
+**     ins     v1\.s\[2\], v2\.s\[1\]
+**     ret
+*/
+TEST (float32x4x3_t, 1, 2, 2, 1)
+
+/*
+** test_float64x2x3_t:
+**     ins     v1\.d\[0\], v2\.d\[1\]
+**     ret
+*/
+TEST (float64x2x3_t, 1, 0, 2, 1)
+
+/*
+** test_int8x16x3_t:
+**     ins     v0\.b\[9\], v2\.b\[14\]
+**     ret
+*/
+TEST (int8x16x3_t, 0, 9, 2, 14)
+
+/*
+** test_int16x8x3_t:
+**     ins     v2\.h\[6\], v1\.h\[3\]
+**     ret
+*/
+TEST (int16x8x3_t, 2, 6, 1, 3)
+
+/*
+** test_int32x4x3_t:
+**     ins     v1\.s\[3\], v1\.s\[1\]
+**     ret
+*/
+TEST (int32x4x3_t, 1, 3, 1, 1)
+
+/*
+** test_int64x2x3_t:
+**     ins     v2\.d\[1\], v1\.d\[0\]
+**     ret
+*/
+TEST (int64x2x3_t, 2, 1, 1, 0)
+
+/*
+** test_uint8x16x3_t:
+**     ins     v1\.b\[10\], v2\.b\[8\]
+**     ret
+*/
+TEST (uint8x16x3_t, 1, 10, 2, 8)
+
+/*
+** test_uint16x8x3_t:
+**     ins     v2\.h\[5\], v1\.h\[2\]
+**     ret
+*/
+TEST (uint16x8x3_t, 2, 5, 1, 2)
+
+/*
+** test_uint32x4x3_t:
+**     ins     v2\.s\[3\], v0\.s\[1\]
+**     ret
+*/
+TEST (uint32x4x3_t, 2, 3, 0, 1)
+
+/*
+** test_uint64x2x3_t:
+**     ins     v1\.d\[0\], v2\.d\[1\]
+**     ret
+*/
+TEST (uint64x2x3_t, 1, 0, 2, 1)
+
+//--------------------------------------------------------------
+
+/*
+** test_bfloat16x8x4_t:
+**     ins     v2\.h\[5\], v3\.h\[6\]
+**     ret
+*/
+TEST (bfloat16x8x4_t, 2, 5, 3, 6)
+
+/*
+** test_float16x8x4_t:
+**     ins     v0\.h\[3\], v3\.h\[5\]
+**     ret
+*/
+TEST (float16x8x4_t, 0, 3, 3, 5)
+
+/*
+** test_float32x4x4_t:
+**     ins     v3\.s\[2\], v2\.s\[1\]
+**     ret
+*/
+TEST (float32x4x4_t, 3, 2, 2, 1)
+
+/*
+** test_float64x2x4_t:
+**     ins     v1\.d\[1\], v3\.d\[0\]
+**     ret
+*/
+TEST (float64x2x4_t, 1, 1, 3, 0)
+
+/*
+** test_int8x16x4_t:
+**     ins     v0\.b\[14\], v3\.b\[10\]
+**     ret
+*/
+TEST (int8x16x4_t, 0, 14, 3, 10)
+
+/*
+** test_int16x8x4_t:
+**     ins     v3\.h\[4\], v1\.h\[6\]
+**     ret
+*/
+TEST (int16x8x4_t, 3, 4, 1, 6)
+
+/*
+** test_int32x4x4_t:
+**     ins     v1\.s\[3\], v3\.s\[1\]
+**     ret
+*/
+TEST (int32x4x4_t, 1, 3, 3, 1)
+
+/*
+** test_int64x2x4_t:
+**     ins     v3\.d\[0\], v2\.d\[0\]
+**     ret
+*/
+TEST (int64x2x4_t, 3, 0, 2, 0)
+
+/*
+** test_uint8x16x4_t:
+**     ins     v3\.b\[13\], v2\.b\[6\]
+**     ret
+*/
+TEST (uint8x16x4_t, 3, 13, 2, 6)
+
+/*
+** test_uint16x8x4_t:
+**     ins     v3\.h\[2\], v1\.h\[7\]
+**     ret
+*/
+TEST (uint16x8x4_t, 3, 2, 1, 7)
+
+/*
+** test_uint32x4x4_t:
+**     ins     v0\.s\[3\], v3\.s\[2\]
+**     ret
+*/
+TEST (uint32x4x4_t, 0, 3, 3, 2)
+
+/*
+** test_uint64x2x4_t:
+**     ins     v1\.d\[0\], v3\.d\[1\]
+**     ret
+*/
+TEST (uint64x2x4_t, 1, 0, 3, 1)
diff --git a/gcc/testsuite/gcc.target/aarch64/pr113027-4.c b/gcc/testsuite/gcc.target/aarch64/pr113027-4.c

new file mode 100644 (file)

index 0000000..67f45df
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/pr113027-4.c
@@ -0,0 +1,268 @@
+/* { dg-options "-O2" } */
+/* { dg-final { check-function-bodies "**" "" "" { target aarch64_little_endian } } } */
+
+#include <arm_neon.h>
+
+#define TEST(TYPE, A, B)               \
+  TYPE                                 \
+  test_##TYPE (TYPE a, TYPE *ptr)      \
+  {                                    \
+    a.val[A][B] = ptr->val[0][0];      \
+    return a;                          \
+  }
+
+/*
+** test_bfloat16x4x2_t:
+**     ld1     \{v1\.h\}\[3\], \[x0\]
+**     ret
+*/
+TEST (bfloat16x4x2_t, 1, 3)
+
+/*
+** test_float16x4x2_t:
+**     ld1     \{v1\.h\}\[1\], \[x0\]
+**     ret
+*/
+TEST (float16x4x2_t, 1, 1)
+
+/*
+** test_float32x2x2_t:
+**     ld1     \{v1\.s\}\[0\], \[x0\]
+**     ret
+*/
+TEST (float32x2x2_t, 1, 0)
+
+/*
+** test_float64x1x2_t:
+**     ldr     d1, \[x0\]
+**     ret
+*/
+TEST (float64x1x2_t, 1, 0)
+
+/*
+** test_int8x8x2_t:
+**     ld1     \{v0\.b\}\[5\], \[x0\]
+**     ret
+*/
+TEST (int8x8x2_t, 0, 5)
+
+/*
+** test_int16x4x2_t:
+**     ld1     \{v0\.h\}\[2\], \[x0\]
+**     ret
+*/
+TEST (int16x4x2_t, 0, 2)
+
+/*
+** test_int32x2x2_t:
+**     ld1     \{v0\.s\}\[0\], \[x0\]
+**     ret
+*/
+TEST (int32x2x2_t, 0, 0)
+
+/*
+** test_int64x1x2_t:
+**     ldr     d0, \[x0\]
+**     ret
+*/
+TEST (int64x1x2_t, 0, 0)
+
+/*
+** test_uint8x8x2_t:
+**     ld1     \{v1\.b\}\[6\], \[x0\]
+**     ret
+*/
+TEST (uint8x8x2_t, 1, 6)
+
+/*
+** test_uint16x4x2_t:
+**     ld1     \{v1\.h\}\[2\], \[x0\]
+**     ret
+*/
+TEST (uint16x4x2_t, 1, 2)
+
+/*
+** test_uint32x2x2_t:
+**     ld1     \{v1\.s\}\[0\], \[x0\]
+**     ret
+*/
+TEST (uint32x2x2_t, 1, 0)
+
+/*
+** test_uint64x1x2_t:
+**     ldr     d1, \[x0\]
+**     ret
+*/
+TEST (uint64x1x2_t, 1, 0)
+
+//--------------------------------------------------------------
+
+/*
+** test_bfloat16x4x3_t:
+**     ld1     \{v2\.h\}\[3\], \[x0\]
+**     ret
+*/
+TEST (bfloat16x4x3_t, 2, 3)
+
+/*
+** test_float16x4x3_t:
+**     ld1     \{v0\.h\}\[1\], \[x0\]
+**     ret
+*/
+TEST (float16x4x3_t, 0, 1)
+
+/*
+** test_float32x2x3_t:
+**     ld1     \{v1\.s\}\[0\], \[x0\]
+**     ret
+*/
+TEST (float32x2x3_t, 1, 0)
+
+/*
+** test_float64x1x3_t:
+**     ldr     d1, \[x0\]
+**     ret
+*/
+TEST (float64x1x3_t, 1, 0)
+
+/*
+** test_int8x8x3_t:
+**     ld1     \{v0\.b\}\[5\], \[x0\]
+**     ret
+*/
+TEST (int8x8x3_t, 0, 5)
+
+/*
+** test_int16x4x3_t:
+**     ld1     \{v2\.h\}\[2\], \[x0\]
+**     ret
+*/
+TEST (int16x4x3_t, 2, 2)
+
+/*
+** test_int32x2x3_t:
+**     ld1     \{v1\.s\}\[0\], \[x0\]
+**     ret
+*/
+TEST (int32x2x3_t, 1, 0)
+
+/*
+** test_int64x1x3_t:
+**     ldr     d2, \[x0\]
+**     ret
+*/
+TEST (int64x1x3_t, 2, 0)
+
+/*
+** test_uint8x8x3_t:
+**     ld1     \{v1\.b\}\[6\], \[x0\]
+**     ret
+*/
+TEST (uint8x8x3_t, 1, 6)
+
+/*
+** test_uint16x4x3_t:
+**     ld1     \{v2\.h\}\[2\], \[x0\]
+**     ret
+*/
+TEST (uint16x4x3_t, 2, 2)
+
+/*
+** test_uint32x2x3_t:
+**     ld1     \{v2\.s\}\[0\], \[x0\]
+**     ret
+*/
+TEST (uint32x2x3_t, 2, 0)
+
+/*
+** test_uint64x1x3_t:
+**     ldr     d1, \[x0\]
+**     ret
+*/
+TEST (uint64x1x3_t, 1, 0)
+
+//--------------------------------------------------------------
+
+/*
+** test_bfloat16x4x4_t:
+**     ld1     \{v2\.h\}\[3\], \[x0\]
+**     ret
+*/
+TEST (bfloat16x4x4_t, 2, 3)
+
+/*
+** test_float16x4x4_t:
+**     ld1     \{v0\.h\}\[2\], \[x0\]
+**     ret
+*/
+TEST (float16x4x4_t, 0, 2)
+
+/*
+** test_float32x2x4_t:
+**     ld1     \{v3\.s\}\[0\], \[x0\]
+**     ret
+*/
+TEST (float32x2x4_t, 3, 0)
+
+/*
+** test_float64x1x4_t:
+**     ldr     d1, \[x0\]
+**     ret
+*/
+TEST (float64x1x4_t, 1, 0)
+
+/*
+** test_int8x8x4_t:
+**     ld1     \{v0\.b\}\[4\], \[x0\]
+**     ret
+*/
+TEST (int8x8x4_t, 0, 4)
+
+/*
+** test_int16x4x4_t:
+**     ld1     \{v3\.h\}\[3\], \[x0\]
+**     ret
+*/
+TEST (int16x4x4_t, 3, 3)
+
+/*
+** test_int32x2x4_t:
+**     ld1     \{v1\.s\}\[0\], \[x0\]
+**     ret
+*/
+TEST (int32x2x4_t, 1, 0)
+
+/*
+** test_int64x1x4_t:
+**     ldr     d3, \[x0\]
+**     ret
+*/
+TEST (int64x1x4_t, 3, 0)
+
+/*
+** test_uint8x8x4_t:
+**     ld1     \{v3\.b\}\[6\], \[x0\]
+**     ret
+*/
+TEST (uint8x8x4_t, 3, 6)
+
+/*
+** test_uint16x4x4_t:
+**     ld1     \{v3\.h\}\[1\], \[x0\]
+**     ret
+*/
+TEST (uint16x4x4_t, 3, 1)
+
+/*
+** test_uint32x2x4_t:
+**     ld1     \{v0\.s\}\[0\], \[x0\]
+**     ret
+*/
+TEST (uint32x2x4_t, 0, 0)
+
+/*
+** test_uint64x1x4_t:
+**     ldr     d1, \[x0\]
+**     ret
+*/
+TEST (uint64x1x4_t, 1, 0)
diff --git a/gcc/testsuite/gcc.target/aarch64/pr113027-5.c b/gcc/testsuite/gcc.target/aarch64/pr113027-5.c

new file mode 100644 (file)

index 0000000..5695eca
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/pr113027-5.c
@@ -0,0 +1,268 @@
+/* { dg-options "-O2" } */
+/* { dg-final { check-function-bodies "**" "" "" { target aarch64_little_endian } } } */
+
+#include <arm_neon.h>
+
+#define TEST(TYPE, A, B)                       \
+  TYPE                                         \
+  test_##TYPE (TYPE a, TYPE *ptr)              \
+  {                                            \
+    a.val[A][B] = ptr->val[0][0];              \
+    return a;                                  \
+  }
+
+/*
+** test_bfloat16x8x2_t:
+**     ld1     \{v1\.h\}\[6\], \[x0\]
+**     ret
+*/
+TEST (bfloat16x8x2_t, 1, 6)
+
+/*
+** test_float16x8x2_t:
+**     ld1     \{v1\.h\}\[2\], \[x0\]
+**     ret
+*/
+TEST (float16x8x2_t, 1, 2)
+
+/*
+** test_float32x4x2_t:
+**     ld1     \{v1\.s\}\[3\], \[x0\]
+**     ret
+*/
+TEST (float32x4x2_t, 1, 3)
+
+/*
+** test_float64x2x2_t:
+**     ld1     \{v1\.d\}\[0\], \[x0\]
+**     ret
+*/
+TEST (float64x2x2_t, 1, 0)
+
+/*
+** test_int8x16x2_t:
+**     ld1     \{v0\.b\}\[15\], \[x0\]
+**     ret
+*/
+TEST (int8x16x2_t, 0, 15)
+
+/*
+** test_int16x8x2_t:
+**     ld1     \{v0\.h\}\[2\], \[x0\]
+**     ret
+*/
+TEST (int16x8x2_t, 0, 2)
+
+/*
+** test_int32x4x2_t:
+**     ld1     \{v0\.s\}\[3\], \[x0\]
+**     ret
+*/
+TEST (int32x4x2_t, 0, 3)
+
+/*
+** test_int64x2x2_t:
+**     ld1     \{v0\.d\}\[0\], \[x0\]
+**     ret
+*/
+TEST (int64x2x2_t, 0, 0)
+
+/*
+** test_uint8x16x2_t:
+**     ld1     \{v1\.b\}\[13\], \[x0\]
+**     ret
+*/
+TEST (uint8x16x2_t, 1, 13)
+
+/*
+** test_uint16x8x2_t:
+**     ld1     \{v1\.h\}\[6\], \[x0\]
+**     ret
+*/
+TEST (uint16x8x2_t, 1, 6)
+
+/*
+** test_uint32x4x2_t:
+**     ld1     \{v1\.s\}\[3\], \[x0\]
+**     ret
+*/
+TEST (uint32x4x2_t, 1, 3)
+
+/*
+** test_uint64x2x2_t:
+**     ld1     \{v1\.d\}\[0\], \[x0\]
+**     ret
+*/
+TEST (uint64x2x2_t, 1, 0)
+
+//--------------------------------------------------------------
+
+/*
+** test_bfloat16x8x3_t:
+**     ld1     \{v2\.h\}\[3\], \[x0\]
+**     ret
+*/
+TEST (bfloat16x8x3_t, 2, 3)
+
+/*
+** test_float16x8x3_t:
+**     ld1     \{v0\.h\}\[4\], \[x0\]
+**     ret
+*/
+TEST (float16x8x3_t, 0, 4)
+
+/*
+** test_float32x4x3_t:
+**     ld1     \{v1\.s\}\[2\], \[x0\]
+**     ret
+*/
+TEST (float32x4x3_t, 1, 2)
+
+/*
+** test_float64x2x3_t:
+**     ld1     \{v1\.d\}\[0\], \[x0\]
+**     ret
+*/
+TEST (float64x2x3_t, 1, 0)
+
+/*
+** test_int8x16x3_t:
+**     ld1     \{v0\.b\}\[9\], \[x0\]
+**     ret
+*/
+TEST (int8x16x3_t, 0, 9)
+
+/*
+** test_int16x8x3_t:
+**     ld1     \{v2\.h\}\[6\], \[x0\]
+**     ret
+*/
+TEST (int16x8x3_t, 2, 6)
+
+/*
+** test_int32x4x3_t:
+**     ld1     \{v1\.s\}\[3\], \[x0\]
+**     ret
+*/
+TEST (int32x4x3_t, 1, 3)
+
+/*
+** test_int64x2x3_t:
+**     ld1     \{v2\.d\}\[1\], \[x0\]
+**     ret
+*/
+TEST (int64x2x3_t, 2, 1)
+
+/*
+** test_uint8x16x3_t:
+**     ld1     \{v1\.b\}\[10\], \[x0\]
+**     ret
+*/
+TEST (uint8x16x3_t, 1, 10)
+
+/*
+** test_uint16x8x3_t:
+**     ld1     \{v2\.h\}\[5\], \[x0\]
+**     ret
+*/
+TEST (uint16x8x3_t, 2, 5)
+
+/*
+** test_uint32x4x3_t:
+**     ld1     \{v2\.s\}\[3\], \[x0\]
+**     ret
+*/
+TEST (uint32x4x3_t, 2, 3)
+
+/*
+** test_uint64x2x3_t:
+**     ld1     \{v1\.d\}\[0\], \[x0\]
+**     ret
+*/
+TEST (uint64x2x3_t, 1, 0)
+
+//--------------------------------------------------------------
+
+/*
+** test_bfloat16x8x4_t:
+**     ld1     \{v2\.h\}\[5\], \[x0\]
+**     ret
+*/
+TEST (bfloat16x8x4_t, 2, 5)
+
+/*
+** test_float16x8x4_t:
+**     ld1     \{v0\.h\}\[3\], \[x0\]
+**     ret
+*/
+TEST (float16x8x4_t, 0, 3)
+
+/*
+** test_float32x4x4_t:
+**     ld1     \{v3\.s\}\[2\], \[x0\]
+**     ret
+*/
+TEST (float32x4x4_t, 3, 2)
+
+/*
+** test_float64x2x4_t:
+**     ld1     \{v1\.d\}\[1\], \[x0\]
+**     ret
+*/
+TEST (float64x2x4_t, 1, 1)
+
+/*
+** test_int8x16x4_t:
+**     ld1     \{v0\.b\}\[14\], \[x0\]
+**     ret
+*/
+TEST (int8x16x4_t, 0, 14)
+
+/*
+** test_int16x8x4_t:
+**     ld1     \{v3\.h\}\[4\], \[x0\]
+**     ret
+*/
+TEST (int16x8x4_t, 3, 4)
+
+/*
+** test_int32x4x4_t:
+**     ld1     \{v1\.s\}\[3\], \[x0\]
+**     ret
+*/
+TEST (int32x4x4_t, 1, 3)
+
+/*
+** test_int64x2x4_t:
+**     ld1     \{v3\.d\}\[0\], \[x0\]
+**     ret
+*/
+TEST (int64x2x4_t, 3, 0)
+
+/*
+** test_uint8x16x4_t:
+**     ld1     \{v3\.b\}\[13\], \[x0\]
+**     ret
+*/
+TEST (uint8x16x4_t, 3, 13)
+
+/*
+** test_uint16x8x4_t:
+**     ld1     \{v3\.h\}\[2\], \[x0\]
+**     ret
+*/
+TEST (uint16x8x4_t, 3, 2)
+
+/*
+** test_uint32x4x4_t:
+**     ld1     \{v0\.s\}\[3\], \[x0\]
+**     ret
+*/
+TEST (uint32x4x4_t, 0, 3)
+
+/*
+** test_uint64x2x4_t:
+**     ld1     \{v1\.d\}\[0\], \[x0\]
+**     ret
+*/
+TEST (uint64x2x4_t, 1, 0)
diff --git a/gcc/testsuite/gcc.target/aarch64/pr113027-6.c b/gcc/testsuite/gcc.target/aarch64/pr113027-6.c

new file mode 100644 (file)

index 0000000..12d3a38
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/pr113027-6.c
@@ -0,0 +1,267 @@
+/* { dg-options "-O2" } */
+/* { dg-final { check-function-bodies "**" "" "" { target aarch64_little_endian } } } */
+
+#include <arm_neon.h>
+
+#define TEST(TYPE, A, B)               \
+  void                                 \
+  test_##TYPE (TYPE a, TYPE *ptr)      \
+  {                                    \
+    ptr->val[0][0] = a.val[A][B];      \
+  }
+
+/*
+** test_bfloat16x4x2_t:
+**     st1     \{v1\.h\}\[3\], \[x0\]
+**     ret
+*/
+TEST (bfloat16x4x2_t, 1, 3)
+
+/*
+** test_float16x4x2_t:
+**     st1     \{v1\.h\}\[1\], \[x0\]
+**     ret
+*/
+TEST (float16x4x2_t, 1, 1)
+
+/*
+** test_float32x2x2_t:
+**     str     s1, \[x0\]
+**     ret
+*/
+TEST (float32x2x2_t, 1, 0)
+
+/*
+** test_float64x1x2_t:
+**     str     d1, \[x0\]
+**     ret
+*/
+TEST (float64x1x2_t, 1, 0)
+
+/*
+** test_int8x8x2_t:
+**     st1     \{v0\.b\}\[5\], \[x0\]
+**     ret
+*/
+TEST (int8x8x2_t, 0, 5)
+
+/*
+** test_int16x4x2_t:
+**     st1     \{v0\.h\}\[2\], \[x0\]
+**     ret
+*/
+TEST (int16x4x2_t, 0, 2)
+
+/*
+** test_int32x2x2_t:
+**     str     s0, \[x0\]
+**     ret
+*/
+TEST (int32x2x2_t, 0, 0)
+
+/*
+** test_int64x1x2_t:
+**     str     d0, \[x0\]
+**     ret
+*/
+TEST (int64x1x2_t, 0, 0)
+
+/*
+** test_uint8x8x2_t:
+**     st1     \{v1\.b\}\[6\], \[x0\]
+**     ret
+*/
+TEST (uint8x8x2_t, 1, 6)
+
+/*
+** test_uint16x4x2_t:
+**     st1     \{v1\.h\}\[2\], \[x0\]
+**     ret
+*/
+TEST (uint16x4x2_t, 1, 2)
+
+/*
+** test_uint32x2x2_t:
+**     str     s1, \[x0\]
+**     ret
+*/
+TEST (uint32x2x2_t, 1, 0)
+
+/*
+** test_uint64x1x2_t:
+**     str     d1, \[x0\]
+**     ret
+*/
+TEST (uint64x1x2_t, 1, 0)
+
+//--------------------------------------------------------------
+
+/*
+** test_bfloat16x4x3_t:
+**     st1     \{v2\.h\}\[3\], \[x0\]
+**     ret
+*/
+TEST (bfloat16x4x3_t, 2, 3)
+
+/*
+** test_float16x4x3_t:
+**     st1     \{v0\.h\}\[1\], \[x0\]
+**     ret
+*/
+TEST (float16x4x3_t, 0, 1)
+
+/*
+** test_float32x2x3_t:
+**     str     s1, \[x0\]
+**     ret
+*/
+TEST (float32x2x3_t, 1, 0)
+
+/*
+** test_float64x1x3_t:
+**     str     d1, \[x0\]
+**     ret
+*/
+TEST (float64x1x3_t, 1, 0)
+
+/*
+** test_int8x8x3_t:
+**     st1     \{v0\.b\}\[5\], \[x0\]
+**     ret
+*/
+TEST (int8x8x3_t, 0, 5)
+
+/*
+** test_int16x4x3_t:
+**     st1     \{v2\.h\}\[2\], \[x0\]
+**     ret
+*/
+TEST (int16x4x3_t, 2, 2)
+
+/*
+** test_int32x2x3_t:
+**     str     s1, \[x0\]
+**     ret
+*/
+TEST (int32x2x3_t, 1, 0)
+
+/*
+** test_int64x1x3_t:
+**     str     d2, \[x0\]
+**     ret
+*/
+TEST (int64x1x3_t, 2, 0)
+
+/*
+** test_uint8x8x3_t:
+**     st1     \{v1\.b\}\[6\], \[x0\]
+**     ret
+*/
+TEST (uint8x8x3_t, 1, 6)
+
+/*
+** test_uint16x4x3_t:
+**     st1     \{v2\.h\}\[2\], \[x0\]
+**     ret
+*/
+TEST (uint16x4x3_t, 2, 2)
+
+/*
+** test_uint32x2x3_t:
+**     str     s2, \[x0\]
+**     ret
+*/
+TEST (uint32x2x3_t, 2, 0)
+
+/*
+** test_uint64x1x3_t:
+**     str     d1, \[x0\]
+**     ret
+*/
+TEST (uint64x1x3_t, 1, 0)
+
+//--------------------------------------------------------------
+
+/*
+** test_bfloat16x4x4_t:
+**     st1     \{v2\.h\}\[3\], \[x0\]
+**     ret
+*/
+TEST (bfloat16x4x4_t, 2, 3)
+
+/*
+** test_float16x4x4_t:
+**     st1     \{v0\.h\}\[2\], \[x0\]
+**     ret
+*/
+TEST (float16x4x4_t, 0, 2)
+
+/*
+** test_float32x2x4_t:
+**     str     s3, \[x0\]
+**     ret
+*/
+TEST (float32x2x4_t, 3, 0)
+
+/*
+** test_float64x1x4_t:
+**     str     d1, \[x0\]
+**     ret
+*/
+TEST (float64x1x4_t, 1, 0)
+
+/*
+** test_int8x8x4_t:
+**     st1     \{v0\.b\}\[4\], \[x0\]
+**     ret
+*/
+TEST (int8x8x4_t, 0, 4)
+
+/*
+** test_int16x4x4_t:
+**     st1     \{v3\.h\}\[3\], \[x0\]
+**     ret
+*/
+TEST (int16x4x4_t, 3, 3)
+
+/*
+** test_int32x2x4_t:
+**     str     s1, \[x0\]
+**     ret
+*/
+TEST (int32x2x4_t, 1, 0)
+
+/*
+** test_int64x1x4_t:
+**     str     d3, \[x0\]
+**     ret
+*/
+TEST (int64x1x4_t, 3, 0)
+
+/*
+** test_uint8x8x4_t:
+**     st1     \{v3\.b\}\[6\], \[x0\]
+**     ret
+*/
+TEST (uint8x8x4_t, 3, 6)
+
+/*
+** test_uint16x4x4_t:
+**     st1     \{v3\.h\}\[1\], \[x0\]
+**     ret
+*/
+TEST (uint16x4x4_t, 3, 1)
+
+/*
+** test_uint32x2x4_t:
+**     str     s0, \[x0\]
+**     ret
+*/
+TEST (uint32x2x4_t, 0, 0)
+
+/*
+** test_uint64x1x4_t:
+**     str     d1, \[x0\]
+**     ret
+*/
+TEST (uint64x1x4_t, 1, 0)
diff --git a/gcc/testsuite/gcc.target/aarch64/pr113027-7.c b/gcc/testsuite/gcc.target/aarch64/pr113027-7.c

new file mode 100644 (file)

index 0000000..b3ae1a7
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/pr113027-7.c
@@ -0,0 +1,267 @@
+/* { dg-options "-O2" } */
+/* { dg-final { check-function-bodies "**" "" "" { target aarch64_little_endian } } } */
+
+#include <arm_neon.h>
+
+#define TEST(TYPE, A, B)                       \
+  void                                         \
+  test_##TYPE (TYPE a, TYPE *ptr)              \
+  {                                            \
+    ptr->val[0][0] = a.val[A][B];              \
+  }
+
+/*
+** test_bfloat16x8x2_t:
+**     st1     \{v1\.h\}\[6\], \[x0\]
+**     ret
+*/
+TEST (bfloat16x8x2_t, 1, 6)
+
+/*
+** test_float16x8x2_t:
+**     st1     \{v1\.h\}\[2\], \[x0\]
+**     ret
+*/
+TEST (float16x8x2_t, 1, 2)
+
+/*
+** test_float32x4x2_t:
+**     st1     \{v1\.s\}\[3\], \[x0\]
+**     ret
+*/
+TEST (float32x4x2_t, 1, 3)
+
+/*
+** test_float64x2x2_t:
+**     str     d1, \[x0\]
+**     ret
+*/
+TEST (float64x2x2_t, 1, 0)
+
+/*
+** test_int8x16x2_t:
+**     st1     \{v0\.b\}\[15\], \[x0\]
+**     ret
+*/
+TEST (int8x16x2_t, 0, 15)
+
+/*
+** test_int16x8x2_t:
+**     st1     \{v0\.h\}\[2\], \[x0\]
+**     ret
+*/
+TEST (int16x8x2_t, 0, 2)
+
+/*
+** test_int32x4x2_t:
+**     st1     \{v0\.s\}\[3\], \[x0\]
+**     ret
+*/
+TEST (int32x4x2_t, 0, 3)
+
+/*
+** test_int64x2x2_t:
+**     str     d0, \[x0\]
+**     ret
+*/
+TEST (int64x2x2_t, 0, 0)
+
+/*
+** test_uint8x16x2_t:
+**     st1     \{v1\.b\}\[13\], \[x0\]
+**     ret
+*/
+TEST (uint8x16x2_t, 1, 13)
+
+/*
+** test_uint16x8x2_t:
+**     st1     \{v1\.h\}\[6\], \[x0\]
+**     ret
+*/
+TEST (uint16x8x2_t, 1, 6)
+
+/*
+** test_uint32x4x2_t:
+**     st1     \{v1\.s\}\[3\], \[x0\]
+**     ret
+*/
+TEST (uint32x4x2_t, 1, 3)
+
+/*
+** test_uint64x2x2_t:
+**     str     d1, \[x0\]
+**     ret
+*/
+TEST (uint64x2x2_t, 1, 0)
+
+//--------------------------------------------------------------
+
+/*
+** test_bfloat16x8x3_t:
+**     st1     \{v2\.h\}\[3\], \[x0\]
+**     ret
+*/
+TEST (bfloat16x8x3_t, 2, 3)
+
+/*
+** test_float16x8x3_t:
+**     st1     \{v0\.h\}\[4\], \[x0\]
+**     ret
+*/
+TEST (float16x8x3_t, 0, 4)
+
+/*
+** test_float32x4x3_t:
+**     st1     \{v1\.s\}\[2\], \[x0\]
+**     ret
+*/
+TEST (float32x4x3_t, 1, 2)
+
+/*
+** test_float64x2x3_t:
+**     str     d1, \[x0\]
+**     ret
+*/
+TEST (float64x2x3_t, 1, 0)
+
+/*
+** test_int8x16x3_t:
+**     st1     \{v0\.b\}\[9\], \[x0\]
+**     ret
+*/
+TEST (int8x16x3_t, 0, 9)
+
+/*
+** test_int16x8x3_t:
+**     st1     \{v2\.h\}\[6\], \[x0\]
+**     ret
+*/
+TEST (int16x8x3_t, 2, 6)
+
+/*
+** test_int32x4x3_t:
+**     st1     \{v1\.s\}\[3\], \[x0\]
+**     ret
+*/
+TEST (int32x4x3_t, 1, 3)
+
+/*
+** test_int64x2x3_t:
+**     st1     \{v2\.d\}\[1\], \[x0\]
+**     ret
+*/
+TEST (int64x2x3_t, 2, 1)
+
+/*
+** test_uint8x16x3_t:
+**     st1     \{v1\.b\}\[10\], \[x0\]
+**     ret
+*/
+TEST (uint8x16x3_t, 1, 10)
+
+/*
+** test_uint16x8x3_t:
+**     st1     \{v2\.h\}\[5\], \[x0\]
+**     ret
+*/
+TEST (uint16x8x3_t, 2, 5)
+
+/*
+** test_uint32x4x3_t:
+**     st1     \{v2\.s\}\[3\], \[x0\]
+**     ret
+*/
+TEST (uint32x4x3_t, 2, 3)
+
+/*
+** test_uint64x2x3_t:
+**     str     d1, \[x0\]
+**     ret
+*/
+TEST (uint64x2x3_t, 1, 0)
+
+//--------------------------------------------------------------
+
+/*
+** test_bfloat16x8x4_t:
+**     st1     \{v2\.h\}\[5\], \[x0\]
+**     ret
+*/
+TEST (bfloat16x8x4_t, 2, 5)
+
+/*
+** test_float16x8x4_t:
+**     st1     \{v0\.h\}\[3\], \[x0\]
+**     ret
+*/
+TEST (float16x8x4_t, 0, 3)
+
+/*
+** test_float32x4x4_t:
+**     st1     \{v3\.s\}\[2\], \[x0\]
+**     ret
+*/
+TEST (float32x4x4_t, 3, 2)
+
+/*
+** test_float64x2x4_t:
+**     st1     \{v1\.d\}\[1\], \[x0\]
+**     ret
+*/
+TEST (float64x2x4_t, 1, 1)
+
+/*
+** test_int8x16x4_t:
+**     st1     \{v0\.b\}\[14\], \[x0\]
+**     ret
+*/
+TEST (int8x16x4_t, 0, 14)
+
+/*
+** test_int16x8x4_t:
+**     st1     \{v3\.h\}\[4\], \[x0\]
+**     ret
+*/
+TEST (int16x8x4_t, 3, 4)
+
+/*
+** test_int32x4x4_t:
+**     st1     \{v1\.s\}\[3\], \[x0\]
+**     ret
+*/
+TEST (int32x4x4_t, 1, 3)
+
+/*
+** test_int64x2x4_t:
+**     str     d3, \[x0\]
+**     ret
+*/
+TEST (int64x2x4_t, 3, 0)
+
+/*
+** test_uint8x16x4_t:
+**     st1     \{v3\.b\}\[13\], \[x0\]
+**     ret
+*/
+TEST (uint8x16x4_t, 3, 13)
+
+/*
+** test_uint16x8x4_t:
+**     st1     \{v3\.h\}\[2\], \[x0\]
+**     ret
+*/
+TEST (uint16x8x4_t, 3, 2)
+
+/*
+** test_uint32x4x4_t:
+**     st1     \{v0\.s\}\[3\], \[x0\]
+**     ret
+*/
+TEST (uint32x4x4_t, 0, 3)
+
+/*
+** test_uint64x2x4_t:
+**     str     d1, \[x0\]
+**     ret
+*/
+TEST (uint64x2x4_t, 1, 0)
author	Richard Sandiford <richard.sandiford@arm.com>
	Tue, 17 Jun 2025 10:43:51 +0000 (11:43 +0100)
committer	Richard Sandiford <richard.sandiford@arm.com>
	Tue, 17 Jun 2025 10:43:51 +0000 (11:43 +0100)
gcc/config/aarch64/aarch64-protos.h		patch \| blob \| blame \| history
gcc/config/aarch64/aarch64-simd.md		patch \| blob \| blame \| history
gcc/config/aarch64/aarch64.cc		patch \| blob \| blame \| history
gcc/config/aarch64/iterators.md		patch \| blob \| blame \| history
gcc/testsuite/gcc.target/aarch64/pr113027-1.c	[new file with mode: 0644]	patch \| blob
gcc/testsuite/gcc.target/aarch64/pr113027-2.c	[new file with mode: 0644]	patch \| blob
gcc/testsuite/gcc.target/aarch64/pr113027-3.c	[new file with mode: 0644]	patch \| blob
gcc/testsuite/gcc.target/aarch64/pr113027-4.c	[new file with mode: 0644]	patch \| blob
gcc/testsuite/gcc.target/aarch64/pr113027-5.c	[new file with mode: 0644]	patch \| blob
gcc/testsuite/gcc.target/aarch64/pr113027-6.c	[new file with mode: 0644]	patch \| blob
gcc/testsuite/gcc.target/aarch64/pr113027-7.c	[new file with mode: 0644]	patch \| blob