rtx aarch64_replace_reg_mode (rtx, machine_mode);
void aarch64_split_sve_subreg_move (rtx, rtx, rtx);
void aarch64_expand_prologue (void);
+void aarch64_decompose_vec_struct_index (machine_mode, rtx *, rtx *, bool);
void aarch64_expand_vector_init (rtx, rtx);
void aarch64_sve_expand_vector_init_subvector (rtx, rtx);
void aarch64_sve_expand_vector_init (rtx, rtx);
}
)
+(define_expand "vec_set<mode>"
+ [(match_operand:VSTRUCT_QD 0 "register_operand")
+ (match_operand:<VEL> 1 "aarch64_simd_nonimmediate_operand")
+ (match_operand:SI 2 "immediate_operand")]
+ "TARGET_SIMD"
+{
+ aarch64_decompose_vec_struct_index (<VSTRUCT_ELT>mode, &operands[0],
+ &operands[2], true);
+ /* For tuples of 64-bit modes, <vstruct_elt> is the 64-bit scalar mode.
+ Allow gen_vec_set<vstruct_elt> to cope with those cases too. */
+ auto gen_vec_setdi ATTRIBUTE_UNUSED = [](rtx x0, rtx x1, rtx)
+ {
+ return gen_move_insn (x0, x1);
+ };
+ auto gen_vec_setdf ATTRIBUTE_UNUSED = gen_vec_setdi;
+ emit_insn (gen_vec_set<vstruct_elt> (operands[0], operands[1], operands[2]));
+ DONE;
+})
(define_insn "aarch64_mla<mode><vczle><vczbe>"
[(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
DONE;
})
+(define_expand "vec_extract<mode><Vel>"
+ [(match_operand:<VEL> 0 "aarch64_simd_nonimmediate_operand")
+ (match_operand:VSTRUCT_QD 1 "register_operand")
+ (match_operand:SI 2 "immediate_operand")]
+ "TARGET_SIMD"
+{
+ aarch64_decompose_vec_struct_index (<VSTRUCT_ELT>mode, &operands[1],
+ &operands[2], false);
+ /* For tuples of 64-bit modes, <vstruct_elt> is the 64-bit scalar mode.
+ Allow gen_vec_extract<vstruct_elt><Vel> to cope with those cases too. */
+ auto gen_vec_extractdidi ATTRIBUTE_UNUSED = [](rtx x0, rtx x1, rtx)
+ {
+ return gen_move_insn (x0, x1);
+ };
+ auto gen_vec_extractdfdf ATTRIBUTE_UNUSED = gen_vec_extractdidi;
+ emit_insn (gen_vec_extract<vstruct_elt><Vel> (operands[0], operands[1],
+ operands[2]));
+ DONE;
+})
+
;; Extract a 64-bit vector from one half of a 128-bit vector.
(define_expand "vec_extract<mode><Vhalf>"
[(match_operand:<VHALF> 0 "register_operand")
return cost;
}
+/* *VECTOR is an Advanced SIMD structure mode and *INDEX is a constant index
+ into it. Narrow *VECTOR and *INDEX so that they reference a single vector
+ of mode SUBVEC_MODE. IS_DEST is true if *VECTOR is a destination operand,
+ false if it is a source operand. */
+
+void
+aarch64_decompose_vec_struct_index (machine_mode subvec_mode,
+ rtx *vector, rtx *index, bool is_dest)
+{
+ auto elts_per_vector = GET_MODE_NUNITS (subvec_mode).to_constant ();
+ auto subvec = UINTVAL (*index) / elts_per_vector;
+ auto subelt = UINTVAL (*index) % elts_per_vector;
+ auto subvec_byte = subvec * GET_MODE_SIZE (subvec_mode);
+ if (is_dest)
+ *vector = simplify_gen_subreg (subvec_mode, *vector, GET_MODE (*vector),
+ subvec_byte);
+ else
+ *vector = force_subreg (subvec_mode, *vector, GET_MODE (*vector),
+ subvec_byte);
+ *index = gen_int_mode (subelt, SImode);
+}
+
/* Expand a vector initialization sequence, such that TARGET is
initialized to contain VALS. */
(SI "SI") (HI "HI")
(QI "QI")
(V4BF "BF") (V8BF "BF")
+ (V2x8QI "QI") (V2x4HI "HI")
+ (V2x2SI "SI") (V2x1DI "DI")
+ (V2x4HF "HF") (V2x2SF "SF")
+ (V2x1DF "DF") (V2x4BF "BF")
+ (V3x8QI "QI") (V3x4HI "HI")
+ (V3x2SI "SI") (V3x1DI "DI")
+ (V3x4HF "HF") (V3x2SF "SF")
+ (V3x1DF "DF") (V3x4BF "BF")
+ (V4x8QI "QI") (V4x4HI "HI")
+ (V4x2SI "SI") (V4x1DI "DI")
+ (V4x4HF "HF") (V4x2SF "SF")
+ (V4x1DF "DF") (V4x4BF "BF")
+ (V2x16QI "QI") (V2x8HI "HI")
+ (V2x4SI "SI") (V2x2DI "DI")
+ (V2x8HF "HF") (V2x4SF "SF")
+ (V2x2DF "DF") (V2x8BF "BF")
+ (V3x16QI "QI") (V3x8HI "HI")
+ (V3x4SI "SI") (V3x2DI "DI")
+ (V3x8HF "HF") (V3x4SF "SF")
+ (V3x2DF "DF") (V3x8BF "BF")
+ (V4x16QI "QI") (V4x8HI "HI")
+ (V4x4SI "SI") (V4x2DI "DI")
+ (V4x8HF "HF") (V4x4SF "SF")
+ (V4x2DF "DF") (V4x8BF "BF")
(VNx16QI "QI") (VNx8QI "QI") (VNx4QI "QI") (VNx2QI "QI")
(VNx8HI "HI") (VNx4HI "HI") (VNx2HI "HI")
(VNx8HF "HF") (VNx4HF "HF") (VNx2HF "HF")
(DF "df") (SI "si")
(HI "hi") (QI "qi")
(V4BF "bf") (V8BF "bf")
+ (V2x8QI "qi") (V2x4HI "hi")
+ (V2x2SI "si") (V2x1DI "di")
+ (V2x4HF "hf") (V2x2SF "sf")
+ (V2x1DF "df") (V2x4BF "bf")
+ (V3x8QI "qi") (V3x4HI "hi")
+ (V3x2SI "si") (V3x1DI "di")
+ (V3x4HF "hf") (V3x2SF "sf")
+ (V3x1DF "df") (V3x4BF "bf")
+ (V4x8QI "qi") (V4x4HI "hi")
+ (V4x2SI "si") (V4x1DI "di")
+ (V4x4HF "hf") (V4x2SF "sf")
+ (V4x1DF "df") (V4x4BF "bf")
+ (V2x16QI "qi") (V2x8HI "hi")
+ (V2x4SI "si") (V2x2DI "di")
+ (V2x8HF "hf") (V2x4SF "sf")
+ (V2x2DF "df") (V2x8BF "bf")
+ (V3x16QI "qi") (V3x8HI "hi")
+ (V3x4SI "si") (V3x2DI "di")
+ (V3x8HF "hf") (V3x4SF "sf")
+ (V3x2DF "df") (V3x8BF "bf")
+ (V4x16QI "qi") (V4x8HI "hi")
+ (V4x4SI "si") (V4x2DI "di")
+ (V4x8HF "hf") (V4x4SF "sf")
+ (V4x2DF "df") (V4x8BF "bf")
(VNx16QI "qi") (VNx8QI "qi") (VNx4QI "qi") (VNx2QI "qi")
(VNx8HI "hi") (VNx4HI "hi") (VNx2HI "hi")
(VNx8HF "hf") (VNx4HF "hf") (VNx2HF "hf")
--- /dev/null
+/* { dg-options "-O2" } */
+
+#include <arm_neon.h>
+
+float64x2x2_t
+f1 (float64x2x2_t x)
+{
+ x.val[0][1] += 1.0;
+ return x;
+}
+
+float64x2x3_t
+f2 (float64x2x3_t x)
+{
+ x.val[0][0] = x.val[1][1] + x.val[2][0];
+ return x;
+}
+
+float64x2x4_t
+f3 (float64x2x4_t x)
+{
+ x.val[0][0] = x.val[1][1] + x.val[2][0] - x.val[3][1];
+ return x;
+}
+
+/* { dg-final { scan-assembler-not {\tmov\t} } } */
+/* { dg-final { scan-assembler-not {\[sp,} } } */
--- /dev/null
+/* { dg-options "-O2" } */
+/* { dg-final { check-function-bodies "**" "" "" { target aarch64_little_endian } } } */
+
+#include <arm_neon.h>
+
+#define TEST(TYPE, A, B, C, D) \
+ TYPE \
+ test_##TYPE (TYPE a) \
+ { \
+ a.val[A][B] = a.val[C][D]; \
+ return a; \
+ }
+
+/*
+** test_bfloat16x4x2_t:
+** ins v1\.h\[3\], v0\.h\[2\]
+** ret
+*/
+TEST (bfloat16x4x2_t, 1, 3, 0, 2)
+
+/*
+** test_float16x4x2_t:
+** ins v1\.h\[1\], v0\.h\[3\]
+** ret
+*/
+TEST (float16x4x2_t, 1, 1, 0, 3)
+
+/*
+** test_float32x2x2_t:
+** ins v1\.s\[0\], v0\.s\[1\]
+** ret
+*/
+TEST (float32x2x2_t, 1, 0, 0, 1)
+
+/*
+** test_float64x1x2_t:
+** fmov d1, d0
+** ret
+*/
+TEST (float64x1x2_t, 1, 0, 0, 0)
+
+/*
+** test_int8x8x2_t:
+** ins v0\.b\[5\], v1\.b\[7\]
+** ret
+*/
+TEST (int8x8x2_t, 0, 5, 1, 7)
+
+/*
+** test_int16x4x2_t:
+** ins v0\.h\[2\], v1\.h\[2\]
+** ret
+*/
+TEST (int16x4x2_t, 0, 2, 1, 2)
+
+/*
+** test_int32x2x2_t:
+** ins v0\.s\[0\], v1\.s\[1\]
+** ret
+*/
+TEST (int32x2x2_t, 0, 0, 1, 1)
+
+/*
+** test_int64x1x2_t:
+** fmov d0, d1
+** ret
+*/
+TEST (int64x1x2_t, 0, 0, 1, 0)
+
+/*
+** test_uint8x8x2_t:
+** ins v1\.b\[6\], v0\.b\[3\]
+** ret
+*/
+TEST (uint8x8x2_t, 1, 6, 0, 3)
+
+/*
+** test_uint16x4x2_t:
+** ins v1\.h\[2\], v1\.h\[0\]
+** ret
+*/
+TEST (uint16x4x2_t, 1, 2, 1, 0)
+
+/*
+** test_uint32x2x2_t:
+** ins v1\.s\[0\], v1\.s\[1\]
+** ret
+*/
+TEST (uint32x2x2_t, 1, 0, 1, 1)
+
+/*
+** test_uint64x1x2_t:
+** fmov d1, d0
+** ret
+*/
+TEST (uint64x1x2_t, 1, 0, 0, 0)
+
+//--------------------------------------------------------------
+
+/*
+** test_bfloat16x4x3_t:
+** ins v2\.h\[3\], v0\.h\[2\]
+** ret
+*/
+TEST (bfloat16x4x3_t, 2, 3, 0, 2)
+
+/*
+** test_float16x4x3_t:
+** ins v0\.h\[1\], v1\.h\[3\]
+** ret
+*/
+TEST (float16x4x3_t, 0, 1, 1, 3)
+
+/*
+** test_float32x2x3_t:
+** ins v1\.s\[0\], v2\.s\[1\]
+** ret
+*/
+TEST (float32x2x3_t, 1, 0, 2, 1)
+
+/*
+** test_float64x1x3_t:
+** fmov d1, d2
+** ret
+*/
+TEST (float64x1x3_t, 1, 0, 2, 0)
+
+/*
+** test_int8x8x3_t:
+** ins v0\.b\[5\], v2\.b\[6\]
+** ret
+*/
+TEST (int8x8x3_t, 0, 5, 2, 6)
+
+/*
+** test_int16x4x3_t:
+** ins v2\.h\[2\], v1\.h\[1\]
+** ret
+*/
+TEST (int16x4x3_t, 2, 2, 1, 1)
+
+/*
+** test_int32x2x3_t:
+** ins v1\.s\[0\], v1\.s\[1\]
+** ret
+*/
+TEST (int32x2x3_t, 1, 0, 1, 1)
+
+/*
+** test_int64x1x3_t:
+** fmov d2, d1
+** ret
+*/
+TEST (int64x1x3_t, 2, 0, 1, 0)
+
+/*
+** test_uint8x8x3_t:
+** ins v1\.b\[6\], v2\.b\[7\]
+** ret
+*/
+TEST (uint8x8x3_t, 1, 6, 2, 7)
+
+/*
+** test_uint16x4x3_t:
+** ins v2\.h\[2\], v1\.h\[3\]
+** ret
+*/
+TEST (uint16x4x3_t, 2, 2, 1, 3)
+
+/*
+** test_uint32x2x3_t:
+** ins v2\.s\[0\], v0\.s\[1\]
+** ret
+*/
+TEST (uint32x2x3_t, 2, 0, 0, 1)
+
+/*
+** test_uint64x1x3_t:
+** fmov d1, d2
+** ret
+*/
+TEST (uint64x1x3_t, 1, 0, 2, 0)
+
+//--------------------------------------------------------------
+
+/*
+** test_bfloat16x4x4_t:
+** ins v2\.h\[3\], v3\.h\[2\]
+** ret
+*/
+TEST (bfloat16x4x4_t, 2, 3, 3, 2)
+
+/*
+** test_float16x4x4_t:
+** ins v0\.h\[2\], v3\.h\[1\]
+** ret
+*/
+TEST (float16x4x4_t, 0, 2, 3, 1)
+
+/*
+** test_float32x2x4_t:
+** ins v3\.s\[0\], v2\.s\[1\]
+** ret
+*/
+TEST (float32x2x4_t, 3, 0, 2, 1)
+
+/*
+** test_float64x1x4_t:
+** fmov d1, d3
+** ret
+*/
+TEST (float64x1x4_t, 1, 0, 3, 0)
+
+/*
+** test_int8x8x4_t:
+** ins v0\.b\[4\], v3\.b\[7\]
+** ret
+*/
+TEST (int8x8x4_t, 0, 4, 3, 7)
+
+/*
+** test_int16x4x4_t:
+** ins v3\.h\[3\], v1\.h\[1\]
+** ret
+*/
+TEST (int16x4x4_t, 3, 3, 1, 1)
+
+/*
+** test_int32x2x4_t:
+** ins v1\.s\[0\], v3\.s\[1\]
+** ret
+*/
+TEST (int32x2x4_t, 1, 0, 3, 1)
+
+/*
+** test_int64x1x4_t:
+** fmov d3, d1
+** ret
+*/
+TEST (int64x1x4_t, 3, 0, 1, 0)
+
+/*
+** test_uint8x8x4_t:
+** ins v3\.b\[6\], v2\.b\[4\]
+** ret
+*/
+TEST (uint8x8x4_t, 3, 6, 2, 4)
+
+/*
+** test_uint16x4x4_t:
+** ins v3\.h\[1\], v1\.h\[3\]
+** ret
+*/
+TEST (uint16x4x4_t, 3, 1, 1, 3)
+
+/*
+** test_uint32x2x4_t:
+** ins v0\.s\[0\], v3\.s\[1\]
+** ret
+*/
+TEST (uint32x2x4_t, 0, 0, 3, 1)
+
+/*
+** test_uint64x1x4_t:
+** fmov d1, d3
+** ret
+*/
+TEST (uint64x1x4_t, 1, 0, 3, 0)
--- /dev/null
+/* { dg-options "-O2" } */
+/* { dg-final { check-function-bodies "**" "" "" { target aarch64_little_endian } } } */
+
+#include <arm_neon.h>
+
+#define TEST(TYPE, A, B, C, D) \
+ TYPE \
+ test_##TYPE (TYPE a) \
+ { \
+ a.val[A][B] = a.val[C][D]; \
+ return a; \
+ }
+
+/*
+** test_bfloat16x8x2_t:
+** ins v1\.h\[6\], v0\.h\[5\]
+** ret
+*/
+TEST (bfloat16x8x2_t, 1, 6, 0, 5)
+
+/*
+** test_float16x8x2_t:
+** ins v1\.h\[2\], v0\.h\[7\]
+** ret
+*/
+TEST (float16x8x2_t, 1, 2, 0, 7)
+
+/*
+** test_float32x4x2_t:
+** ins v1\.s\[3\], v0\.s\[1\]
+** ret
+*/
+TEST (float32x4x2_t, 1, 3, 0, 1)
+
+/*
+** test_float64x2x2_t:
+** ins v1\.d\[0\], v0\.d\[0\]
+** ret
+*/
+TEST (float64x2x2_t, 1, 0, 0, 0)
+
+/*
+** test_int8x16x2_t:
+** ins v0\.b\[15\], v1\.b\[13\]
+** ret
+*/
+TEST (int8x16x2_t, 0, 15, 1, 13)
+
+/*
+** test_int16x8x2_t:
+** ins v0\.h\[2\], v1\.h\[7\]
+** ret
+*/
+TEST (int16x8x2_t, 0, 2, 1, 7)
+
+/*
+** test_int32x4x2_t:
+** ins v0\.s\[3\], v1\.s\[1\]
+** ret
+*/
+TEST (int32x4x2_t, 0, 3, 1, 1)
+
+/*
+** test_int64x2x2_t:
+** ins v0\.d\[0\], v1\.d\[1\]
+** ret
+*/
+TEST (int64x2x2_t, 0, 0, 1, 1)
+
+/*
+** test_uint8x16x2_t:
+** ins v1\.b\[13\], v0\.b\[11\]
+** ret
+*/
+TEST (uint8x16x2_t, 1, 13, 0, 11)
+
+/*
+** test_uint16x8x2_t:
+** ins v1\.h\[6\], v1\.h\[3\]
+** ret
+*/
+TEST (uint16x8x2_t, 1, 6, 1, 3)
+
+/*
+** test_uint32x4x2_t:
+** ins v1\.s\[3\], v1\.s\[1\]
+** ret
+*/
+TEST (uint32x4x2_t, 1, 3, 1, 1)
+
+/*
+** test_uint64x2x2_t:
+** ins v1\.d\[0\], v1\.d\[1\]
+** ret
+*/
+TEST (uint64x2x2_t, 1, 0, 1, 1)
+
+//--------------------------------------------------------------
+
+/*
+** test_bfloat16x8x3_t:
+** ins v2\.h\[3\], v0\.h\[7\]
+** ret
+*/
+TEST (bfloat16x8x3_t, 2, 3, 0, 7)
+
+/*
+** test_float16x8x3_t:
+** ins v0\.h\[4\], v1\.h\[6\]
+** ret
+*/
+TEST (float16x8x3_t, 0, 4, 1, 6)
+
+/*
+** test_float32x4x3_t:
+** ins v1\.s\[2\], v2\.s\[1\]
+** ret
+*/
+TEST (float32x4x3_t, 1, 2, 2, 1)
+
+/*
+** test_float64x2x3_t:
+** ins v1\.d\[0\], v2\.d\[1\]
+** ret
+*/
+TEST (float64x2x3_t, 1, 0, 2, 1)
+
+/*
+** test_int8x16x3_t:
+** ins v0\.b\[9\], v2\.b\[14\]
+** ret
+*/
+TEST (int8x16x3_t, 0, 9, 2, 14)
+
+/*
+** test_int16x8x3_t:
+** ins v2\.h\[6\], v1\.h\[3\]
+** ret
+*/
+TEST (int16x8x3_t, 2, 6, 1, 3)
+
+/*
+** test_int32x4x3_t:
+** ins v1\.s\[3\], v1\.s\[1\]
+** ret
+*/
+TEST (int32x4x3_t, 1, 3, 1, 1)
+
+/*
+** test_int64x2x3_t:
+** ins v2\.d\[1\], v1\.d\[0\]
+** ret
+*/
+TEST (int64x2x3_t, 2, 1, 1, 0)
+
+/*
+** test_uint8x16x3_t:
+** ins v1\.b\[10\], v2\.b\[8\]
+** ret
+*/
+TEST (uint8x16x3_t, 1, 10, 2, 8)
+
+/*
+** test_uint16x8x3_t:
+** ins v2\.h\[5\], v1\.h\[2\]
+** ret
+*/
+TEST (uint16x8x3_t, 2, 5, 1, 2)
+
+/*
+** test_uint32x4x3_t:
+** ins v2\.s\[3\], v0\.s\[1\]
+** ret
+*/
+TEST (uint32x4x3_t, 2, 3, 0, 1)
+
+/*
+** test_uint64x2x3_t:
+** ins v1\.d\[0\], v2\.d\[1\]
+** ret
+*/
+TEST (uint64x2x3_t, 1, 0, 2, 1)
+
+//--------------------------------------------------------------
+
+/*
+** test_bfloat16x8x4_t:
+** ins v2\.h\[5\], v3\.h\[6\]
+** ret
+*/
+TEST (bfloat16x8x4_t, 2, 5, 3, 6)
+
+/*
+** test_float16x8x4_t:
+** ins v0\.h\[3\], v3\.h\[5\]
+** ret
+*/
+TEST (float16x8x4_t, 0, 3, 3, 5)
+
+/*
+** test_float32x4x4_t:
+** ins v3\.s\[2\], v2\.s\[1\]
+** ret
+*/
+TEST (float32x4x4_t, 3, 2, 2, 1)
+
+/*
+** test_float64x2x4_t:
+** ins v1\.d\[1\], v3\.d\[0\]
+** ret
+*/
+TEST (float64x2x4_t, 1, 1, 3, 0)
+
+/*
+** test_int8x16x4_t:
+** ins v0\.b\[14\], v3\.b\[10\]
+** ret
+*/
+TEST (int8x16x4_t, 0, 14, 3, 10)
+
+/*
+** test_int16x8x4_t:
+** ins v3\.h\[4\], v1\.h\[6\]
+** ret
+*/
+TEST (int16x8x4_t, 3, 4, 1, 6)
+
+/*
+** test_int32x4x4_t:
+** ins v1\.s\[3\], v3\.s\[1\]
+** ret
+*/
+TEST (int32x4x4_t, 1, 3, 3, 1)
+
+/*
+** test_int64x2x4_t:
+** ins v3\.d\[0\], v2\.d\[0\]
+** ret
+*/
+TEST (int64x2x4_t, 3, 0, 2, 0)
+
+/*
+** test_uint8x16x4_t:
+** ins v3\.b\[13\], v2\.b\[6\]
+** ret
+*/
+TEST (uint8x16x4_t, 3, 13, 2, 6)
+
+/*
+** test_uint16x8x4_t:
+** ins v3\.h\[2\], v1\.h\[7\]
+** ret
+*/
+TEST (uint16x8x4_t, 3, 2, 1, 7)
+
+/*
+** test_uint32x4x4_t:
+** ins v0\.s\[3\], v3\.s\[2\]
+** ret
+*/
+TEST (uint32x4x4_t, 0, 3, 3, 2)
+
+/*
+** test_uint64x2x4_t:
+** ins v1\.d\[0\], v3\.d\[1\]
+** ret
+*/
+TEST (uint64x2x4_t, 1, 0, 3, 1)
--- /dev/null
+/* { dg-options "-O2" } */
+/* { dg-final { check-function-bodies "**" "" "" { target aarch64_little_endian } } } */
+
+#include <arm_neon.h>
+
+#define TEST(TYPE, A, B) \
+ TYPE \
+ test_##TYPE (TYPE a, TYPE *ptr) \
+ { \
+ a.val[A][B] = ptr->val[0][0]; \
+ return a; \
+ }
+
+/*
+** test_bfloat16x4x2_t:
+** ld1 \{v1\.h\}\[3\], \[x0\]
+** ret
+*/
+TEST (bfloat16x4x2_t, 1, 3)
+
+/*
+** test_float16x4x2_t:
+** ld1 \{v1\.h\}\[1\], \[x0\]
+** ret
+*/
+TEST (float16x4x2_t, 1, 1)
+
+/*
+** test_float32x2x2_t:
+** ld1 \{v1\.s\}\[0\], \[x0\]
+** ret
+*/
+TEST (float32x2x2_t, 1, 0)
+
+/*
+** test_float64x1x2_t:
+** ldr d1, \[x0\]
+** ret
+*/
+TEST (float64x1x2_t, 1, 0)
+
+/*
+** test_int8x8x2_t:
+** ld1 \{v0\.b\}\[5\], \[x0\]
+** ret
+*/
+TEST (int8x8x2_t, 0, 5)
+
+/*
+** test_int16x4x2_t:
+** ld1 \{v0\.h\}\[2\], \[x0\]
+** ret
+*/
+TEST (int16x4x2_t, 0, 2)
+
+/*
+** test_int32x2x2_t:
+** ld1 \{v0\.s\}\[0\], \[x0\]
+** ret
+*/
+TEST (int32x2x2_t, 0, 0)
+
+/*
+** test_int64x1x2_t:
+** ldr d0, \[x0\]
+** ret
+*/
+TEST (int64x1x2_t, 0, 0)
+
+/*
+** test_uint8x8x2_t:
+** ld1 \{v1\.b\}\[6\], \[x0\]
+** ret
+*/
+TEST (uint8x8x2_t, 1, 6)
+
+/*
+** test_uint16x4x2_t:
+** ld1 \{v1\.h\}\[2\], \[x0\]
+** ret
+*/
+TEST (uint16x4x2_t, 1, 2)
+
+/*
+** test_uint32x2x2_t:
+** ld1 \{v1\.s\}\[0\], \[x0\]
+** ret
+*/
+TEST (uint32x2x2_t, 1, 0)
+
+/*
+** test_uint64x1x2_t:
+** ldr d1, \[x0\]
+** ret
+*/
+TEST (uint64x1x2_t, 1, 0)
+
+//--------------------------------------------------------------
+
+/*
+** test_bfloat16x4x3_t:
+** ld1 \{v2\.h\}\[3\], \[x0\]
+** ret
+*/
+TEST (bfloat16x4x3_t, 2, 3)
+
+/*
+** test_float16x4x3_t:
+** ld1 \{v0\.h\}\[1\], \[x0\]
+** ret
+*/
+TEST (float16x4x3_t, 0, 1)
+
+/*
+** test_float32x2x3_t:
+** ld1 \{v1\.s\}\[0\], \[x0\]
+** ret
+*/
+TEST (float32x2x3_t, 1, 0)
+
+/*
+** test_float64x1x3_t:
+** ldr d1, \[x0\]
+** ret
+*/
+TEST (float64x1x3_t, 1, 0)
+
+/*
+** test_int8x8x3_t:
+** ld1 \{v0\.b\}\[5\], \[x0\]
+** ret
+*/
+TEST (int8x8x3_t, 0, 5)
+
+/*
+** test_int16x4x3_t:
+** ld1 \{v2\.h\}\[2\], \[x0\]
+** ret
+*/
+TEST (int16x4x3_t, 2, 2)
+
+/*
+** test_int32x2x3_t:
+** ld1 \{v1\.s\}\[0\], \[x0\]
+** ret
+*/
+TEST (int32x2x3_t, 1, 0)
+
+/*
+** test_int64x1x3_t:
+** ldr d2, \[x0\]
+** ret
+*/
+TEST (int64x1x3_t, 2, 0)
+
+/*
+** test_uint8x8x3_t:
+** ld1 \{v1\.b\}\[6\], \[x0\]
+** ret
+*/
+TEST (uint8x8x3_t, 1, 6)
+
+/*
+** test_uint16x4x3_t:
+** ld1 \{v2\.h\}\[2\], \[x0\]
+** ret
+*/
+TEST (uint16x4x3_t, 2, 2)
+
+/*
+** test_uint32x2x3_t:
+** ld1 \{v2\.s\}\[0\], \[x0\]
+** ret
+*/
+TEST (uint32x2x3_t, 2, 0)
+
+/*
+** test_uint64x1x3_t:
+** ldr d1, \[x0\]
+** ret
+*/
+TEST (uint64x1x3_t, 1, 0)
+
+//--------------------------------------------------------------
+
+/*
+** test_bfloat16x4x4_t:
+** ld1 \{v2\.h\}\[3\], \[x0\]
+** ret
+*/
+TEST (bfloat16x4x4_t, 2, 3)
+
+/*
+** test_float16x4x4_t:
+** ld1 \{v0\.h\}\[2\], \[x0\]
+** ret
+*/
+TEST (float16x4x4_t, 0, 2)
+
+/*
+** test_float32x2x4_t:
+** ld1 \{v3\.s\}\[0\], \[x0\]
+** ret
+*/
+TEST (float32x2x4_t, 3, 0)
+
+/*
+** test_float64x1x4_t:
+** ldr d1, \[x0\]
+** ret
+*/
+TEST (float64x1x4_t, 1, 0)
+
+/*
+** test_int8x8x4_t:
+** ld1 \{v0\.b\}\[4\], \[x0\]
+** ret
+*/
+TEST (int8x8x4_t, 0, 4)
+
+/*
+** test_int16x4x4_t:
+** ld1 \{v3\.h\}\[3\], \[x0\]
+** ret
+*/
+TEST (int16x4x4_t, 3, 3)
+
+/*
+** test_int32x2x4_t:
+** ld1 \{v1\.s\}\[0\], \[x0\]
+** ret
+*/
+TEST (int32x2x4_t, 1, 0)
+
+/*
+** test_int64x1x4_t:
+** ldr d3, \[x0\]
+** ret
+*/
+TEST (int64x1x4_t, 3, 0)
+
+/*
+** test_uint8x8x4_t:
+** ld1 \{v3\.b\}\[6\], \[x0\]
+** ret
+*/
+TEST (uint8x8x4_t, 3, 6)
+
+/*
+** test_uint16x4x4_t:
+** ld1 \{v3\.h\}\[1\], \[x0\]
+** ret
+*/
+TEST (uint16x4x4_t, 3, 1)
+
+/*
+** test_uint32x2x4_t:
+** ld1 \{v0\.s\}\[0\], \[x0\]
+** ret
+*/
+TEST (uint32x2x4_t, 0, 0)
+
+/*
+** test_uint64x1x4_t:
+** ldr d1, \[x0\]
+** ret
+*/
+TEST (uint64x1x4_t, 1, 0)
--- /dev/null
+/* { dg-options "-O2" } */
+/* { dg-final { check-function-bodies "**" "" "" { target aarch64_little_endian } } } */
+
+#include <arm_neon.h>
+
+#define TEST(TYPE, A, B) \
+ TYPE \
+ test_##TYPE (TYPE a, TYPE *ptr) \
+ { \
+ a.val[A][B] = ptr->val[0][0]; \
+ return a; \
+ }
+
+/*
+** test_bfloat16x8x2_t:
+** ld1 \{v1\.h\}\[6\], \[x0\]
+** ret
+*/
+TEST (bfloat16x8x2_t, 1, 6)
+
+/*
+** test_float16x8x2_t:
+** ld1 \{v1\.h\}\[2\], \[x0\]
+** ret
+*/
+TEST (float16x8x2_t, 1, 2)
+
+/*
+** test_float32x4x2_t:
+** ld1 \{v1\.s\}\[3\], \[x0\]
+** ret
+*/
+TEST (float32x4x2_t, 1, 3)
+
+/*
+** test_float64x2x2_t:
+** ld1 \{v1\.d\}\[0\], \[x0\]
+** ret
+*/
+TEST (float64x2x2_t, 1, 0)
+
+/*
+** test_int8x16x2_t:
+** ld1 \{v0\.b\}\[15\], \[x0\]
+** ret
+*/
+TEST (int8x16x2_t, 0, 15)
+
+/*
+** test_int16x8x2_t:
+** ld1 \{v0\.h\}\[2\], \[x0\]
+** ret
+*/
+TEST (int16x8x2_t, 0, 2)
+
+/*
+** test_int32x4x2_t:
+** ld1 \{v0\.s\}\[3\], \[x0\]
+** ret
+*/
+TEST (int32x4x2_t, 0, 3)
+
+/*
+** test_int64x2x2_t:
+** ld1 \{v0\.d\}\[0\], \[x0\]
+** ret
+*/
+TEST (int64x2x2_t, 0, 0)
+
+/*
+** test_uint8x16x2_t:
+** ld1 \{v1\.b\}\[13\], \[x0\]
+** ret
+*/
+TEST (uint8x16x2_t, 1, 13)
+
+/*
+** test_uint16x8x2_t:
+** ld1 \{v1\.h\}\[6\], \[x0\]
+** ret
+*/
+TEST (uint16x8x2_t, 1, 6)
+
+/*
+** test_uint32x4x2_t:
+** ld1 \{v1\.s\}\[3\], \[x0\]
+** ret
+*/
+TEST (uint32x4x2_t, 1, 3)
+
+/*
+** test_uint64x2x2_t:
+** ld1 \{v1\.d\}\[0\], \[x0\]
+** ret
+*/
+TEST (uint64x2x2_t, 1, 0)
+
+//--------------------------------------------------------------
+
+/*
+** test_bfloat16x8x3_t:
+** ld1 \{v2\.h\}\[3\], \[x0\]
+** ret
+*/
+TEST (bfloat16x8x3_t, 2, 3)
+
+/*
+** test_float16x8x3_t:
+** ld1 \{v0\.h\}\[4\], \[x0\]
+** ret
+*/
+TEST (float16x8x3_t, 0, 4)
+
+/*
+** test_float32x4x3_t:
+** ld1 \{v1\.s\}\[2\], \[x0\]
+** ret
+*/
+TEST (float32x4x3_t, 1, 2)
+
+/*
+** test_float64x2x3_t:
+** ld1 \{v1\.d\}\[0\], \[x0\]
+** ret
+*/
+TEST (float64x2x3_t, 1, 0)
+
+/*
+** test_int8x16x3_t:
+** ld1 \{v0\.b\}\[9\], \[x0\]
+** ret
+*/
+TEST (int8x16x3_t, 0, 9)
+
+/*
+** test_int16x8x3_t:
+** ld1 \{v2\.h\}\[6\], \[x0\]
+** ret
+*/
+TEST (int16x8x3_t, 2, 6)
+
+/*
+** test_int32x4x3_t:
+** ld1 \{v1\.s\}\[3\], \[x0\]
+** ret
+*/
+TEST (int32x4x3_t, 1, 3)
+
+/*
+** test_int64x2x3_t:
+** ld1 \{v2\.d\}\[1\], \[x0\]
+** ret
+*/
+TEST (int64x2x3_t, 2, 1)
+
+/*
+** test_uint8x16x3_t:
+** ld1 \{v1\.b\}\[10\], \[x0\]
+** ret
+*/
+TEST (uint8x16x3_t, 1, 10)
+
+/*
+** test_uint16x8x3_t:
+** ld1 \{v2\.h\}\[5\], \[x0\]
+** ret
+*/
+TEST (uint16x8x3_t, 2, 5)
+
+/*
+** test_uint32x4x3_t:
+** ld1 \{v2\.s\}\[3\], \[x0\]
+** ret
+*/
+TEST (uint32x4x3_t, 2, 3)
+
+/*
+** test_uint64x2x3_t:
+** ld1 \{v1\.d\}\[0\], \[x0\]
+** ret
+*/
+TEST (uint64x2x3_t, 1, 0)
+
+//--------------------------------------------------------------
+
+/*
+** test_bfloat16x8x4_t:
+** ld1 \{v2\.h\}\[5\], \[x0\]
+** ret
+*/
+TEST (bfloat16x8x4_t, 2, 5)
+
+/*
+** test_float16x8x4_t:
+** ld1 \{v0\.h\}\[3\], \[x0\]
+** ret
+*/
+TEST (float16x8x4_t, 0, 3)
+
+/*
+** test_float32x4x4_t:
+** ld1 \{v3\.s\}\[2\], \[x0\]
+** ret
+*/
+TEST (float32x4x4_t, 3, 2)
+
+/*
+** test_float64x2x4_t:
+** ld1 \{v1\.d\}\[1\], \[x0\]
+** ret
+*/
+TEST (float64x2x4_t, 1, 1)
+
+/*
+** test_int8x16x4_t:
+** ld1 \{v0\.b\}\[14\], \[x0\]
+** ret
+*/
+TEST (int8x16x4_t, 0, 14)
+
+/*
+** test_int16x8x4_t:
+** ld1 \{v3\.h\}\[4\], \[x0\]
+** ret
+*/
+TEST (int16x8x4_t, 3, 4)
+
+/*
+** test_int32x4x4_t:
+** ld1 \{v1\.s\}\[3\], \[x0\]
+** ret
+*/
+TEST (int32x4x4_t, 1, 3)
+
+/*
+** test_int64x2x4_t:
+** ld1 \{v3\.d\}\[0\], \[x0\]
+** ret
+*/
+TEST (int64x2x4_t, 3, 0)
+
+/*
+** test_uint8x16x4_t:
+** ld1 \{v3\.b\}\[13\], \[x0\]
+** ret
+*/
+TEST (uint8x16x4_t, 3, 13)
+
+/*
+** test_uint16x8x4_t:
+** ld1 \{v3\.h\}\[2\], \[x0\]
+** ret
+*/
+TEST (uint16x8x4_t, 3, 2)
+
+/*
+** test_uint32x4x4_t:
+** ld1 \{v0\.s\}\[3\], \[x0\]
+** ret
+*/
+TEST (uint32x4x4_t, 0, 3)
+
+/*
+** test_uint64x2x4_t:
+** ld1 \{v1\.d\}\[0\], \[x0\]
+** ret
+*/
+TEST (uint64x2x4_t, 1, 0)
--- /dev/null
+/* { dg-options "-O2" } */
+/* { dg-final { check-function-bodies "**" "" "" { target aarch64_little_endian } } } */
+
+#include <arm_neon.h>
+
+#define TEST(TYPE, A, B) \
+ void \
+ test_##TYPE (TYPE a, TYPE *ptr) \
+ { \
+ ptr->val[0][0] = a.val[A][B]; \
+ }
+
+/*
+** test_bfloat16x4x2_t:
+** st1 \{v1\.h\}\[3\], \[x0\]
+** ret
+*/
+TEST (bfloat16x4x2_t, 1, 3)
+
+/*
+** test_float16x4x2_t:
+** st1 \{v1\.h\}\[1\], \[x0\]
+** ret
+*/
+TEST (float16x4x2_t, 1, 1)
+
+/*
+** test_float32x2x2_t:
+** str s1, \[x0\]
+** ret
+*/
+TEST (float32x2x2_t, 1, 0)
+
+/*
+** test_float64x1x2_t:
+** str d1, \[x0\]
+** ret
+*/
+TEST (float64x1x2_t, 1, 0)
+
+/*
+** test_int8x8x2_t:
+** st1 \{v0\.b\}\[5\], \[x0\]
+** ret
+*/
+TEST (int8x8x2_t, 0, 5)
+
+/*
+** test_int16x4x2_t:
+** st1 \{v0\.h\}\[2\], \[x0\]
+** ret
+*/
+TEST (int16x4x2_t, 0, 2)
+
+/*
+** test_int32x2x2_t:
+** str s0, \[x0\]
+** ret
+*/
+TEST (int32x2x2_t, 0, 0)
+
+/*
+** test_int64x1x2_t:
+** str d0, \[x0\]
+** ret
+*/
+TEST (int64x1x2_t, 0, 0)
+
+/*
+** test_uint8x8x2_t:
+** st1 \{v1\.b\}\[6\], \[x0\]
+** ret
+*/
+TEST (uint8x8x2_t, 1, 6)
+
+/*
+** test_uint16x4x2_t:
+** st1 \{v1\.h\}\[2\], \[x0\]
+** ret
+*/
+TEST (uint16x4x2_t, 1, 2)
+
+/*
+** test_uint32x2x2_t:
+** str s1, \[x0\]
+** ret
+*/
+TEST (uint32x2x2_t, 1, 0)
+
+/*
+** test_uint64x1x2_t:
+** str d1, \[x0\]
+** ret
+*/
+TEST (uint64x1x2_t, 1, 0)
+
+//--------------------------------------------------------------
+
+/*
+** test_bfloat16x4x3_t:
+** st1 \{v2\.h\}\[3\], \[x0\]
+** ret
+*/
+TEST (bfloat16x4x3_t, 2, 3)
+
+/*
+** test_float16x4x3_t:
+** st1 \{v0\.h\}\[1\], \[x0\]
+** ret
+*/
+TEST (float16x4x3_t, 0, 1)
+
+/*
+** test_float32x2x3_t:
+** str s1, \[x0\]
+** ret
+*/
+TEST (float32x2x3_t, 1, 0)
+
+/*
+** test_float64x1x3_t:
+** str d1, \[x0\]
+** ret
+*/
+TEST (float64x1x3_t, 1, 0)
+
+/*
+** test_int8x8x3_t:
+** st1 \{v0\.b\}\[5\], \[x0\]
+** ret
+*/
+TEST (int8x8x3_t, 0, 5)
+
+/*
+** test_int16x4x3_t:
+** st1 \{v2\.h\}\[2\], \[x0\]
+** ret
+*/
+TEST (int16x4x3_t, 2, 2)
+
+/*
+** test_int32x2x3_t:
+** str s1, \[x0\]
+** ret
+*/
+TEST (int32x2x3_t, 1, 0)
+
+/*
+** test_int64x1x3_t:
+** str d2, \[x0\]
+** ret
+*/
+TEST (int64x1x3_t, 2, 0)
+
+/*
+** test_uint8x8x3_t:
+** st1 \{v1\.b\}\[6\], \[x0\]
+** ret
+*/
+TEST (uint8x8x3_t, 1, 6)
+
+/*
+** test_uint16x4x3_t:
+** st1 \{v2\.h\}\[2\], \[x0\]
+** ret
+*/
+TEST (uint16x4x3_t, 2, 2)
+
+/*
+** test_uint32x2x3_t:
+** str s2, \[x0\]
+** ret
+*/
+TEST (uint32x2x3_t, 2, 0)
+
+/*
+** test_uint64x1x3_t:
+** str d1, \[x0\]
+** ret
+*/
+TEST (uint64x1x3_t, 1, 0)
+
+//--------------------------------------------------------------
+
+/*
+** test_bfloat16x4x4_t:
+** st1 \{v2\.h\}\[3\], \[x0\]
+** ret
+*/
+TEST (bfloat16x4x4_t, 2, 3)
+
+/*
+** test_float16x4x4_t:
+** st1 \{v0\.h\}\[2\], \[x0\]
+** ret
+*/
+TEST (float16x4x4_t, 0, 2)
+
+/*
+** test_float32x2x4_t:
+** str s3, \[x0\]
+** ret
+*/
+TEST (float32x2x4_t, 3, 0)
+
+/*
+** test_float64x1x4_t:
+** str d1, \[x0\]
+** ret
+*/
+TEST (float64x1x4_t, 1, 0)
+
+/*
+** test_int8x8x4_t:
+** st1 \{v0\.b\}\[4\], \[x0\]
+** ret
+*/
+TEST (int8x8x4_t, 0, 4)
+
+/*
+** test_int16x4x4_t:
+** st1 \{v3\.h\}\[3\], \[x0\]
+** ret
+*/
+TEST (int16x4x4_t, 3, 3)
+
+/*
+** test_int32x2x4_t:
+** str s1, \[x0\]
+** ret
+*/
+TEST (int32x2x4_t, 1, 0)
+
+/*
+** test_int64x1x4_t:
+** str d3, \[x0\]
+** ret
+*/
+TEST (int64x1x4_t, 3, 0)
+
+/*
+** test_uint8x8x4_t:
+** st1 \{v3\.b\}\[6\], \[x0\]
+** ret
+*/
+TEST (uint8x8x4_t, 3, 6)
+
+/*
+** test_uint16x4x4_t:
+** st1 \{v3\.h\}\[1\], \[x0\]
+** ret
+*/
+TEST (uint16x4x4_t, 3, 1)
+
+/*
+** test_uint32x2x4_t:
+** str s0, \[x0\]
+** ret
+*/
+TEST (uint32x2x4_t, 0, 0)
+
+/*
+** test_uint64x1x4_t:
+** str d1, \[x0\]
+** ret
+*/
+TEST (uint64x1x4_t, 1, 0)
--- /dev/null
+/* { dg-options "-O2" } */
+/* { dg-final { check-function-bodies "**" "" "" { target aarch64_little_endian } } } */
+
+#include <arm_neon.h>
+
+#define TEST(TYPE, A, B) \
+ void \
+ test_##TYPE (TYPE a, TYPE *ptr) \
+ { \
+ ptr->val[0][0] = a.val[A][B]; \
+ }
+
+/*
+** test_bfloat16x8x2_t:
+** st1 \{v1\.h\}\[6\], \[x0\]
+** ret
+*/
+TEST (bfloat16x8x2_t, 1, 6)
+
+/*
+** test_float16x8x2_t:
+** st1 \{v1\.h\}\[2\], \[x0\]
+** ret
+*/
+TEST (float16x8x2_t, 1, 2)
+
+/*
+** test_float32x4x2_t:
+** st1 \{v1\.s\}\[3\], \[x0\]
+** ret
+*/
+TEST (float32x4x2_t, 1, 3)
+
+/*
+** test_float64x2x2_t:
+** str d1, \[x0\]
+** ret
+*/
+TEST (float64x2x2_t, 1, 0)
+
+/*
+** test_int8x16x2_t:
+** st1 \{v0\.b\}\[15\], \[x0\]
+** ret
+*/
+TEST (int8x16x2_t, 0, 15)
+
+/*
+** test_int16x8x2_t:
+** st1 \{v0\.h\}\[2\], \[x0\]
+** ret
+*/
+TEST (int16x8x2_t, 0, 2)
+
+/*
+** test_int32x4x2_t:
+** st1 \{v0\.s\}\[3\], \[x0\]
+** ret
+*/
+TEST (int32x4x2_t, 0, 3)
+
+/*
+** test_int64x2x2_t:
+** str d0, \[x0\]
+** ret
+*/
+TEST (int64x2x2_t, 0, 0)
+
+/*
+** test_uint8x16x2_t:
+** st1 \{v1\.b\}\[13\], \[x0\]
+** ret
+*/
+TEST (uint8x16x2_t, 1, 13)
+
+/*
+** test_uint16x8x2_t:
+** st1 \{v1\.h\}\[6\], \[x0\]
+** ret
+*/
+TEST (uint16x8x2_t, 1, 6)
+
+/*
+** test_uint32x4x2_t:
+** st1 \{v1\.s\}\[3\], \[x0\]
+** ret
+*/
+TEST (uint32x4x2_t, 1, 3)
+
+/*
+** test_uint64x2x2_t:
+** str d1, \[x0\]
+** ret
+*/
+TEST (uint64x2x2_t, 1, 0)
+
+//--------------------------------------------------------------
+
+/*
+** test_bfloat16x8x3_t:
+** st1 \{v2\.h\}\[3\], \[x0\]
+** ret
+*/
+TEST (bfloat16x8x3_t, 2, 3)
+
+/*
+** test_float16x8x3_t:
+** st1 \{v0\.h\}\[4\], \[x0\]
+** ret
+*/
+TEST (float16x8x3_t, 0, 4)
+
+/*
+** test_float32x4x3_t:
+** st1 \{v1\.s\}\[2\], \[x0\]
+** ret
+*/
+TEST (float32x4x3_t, 1, 2)
+
+/*
+** test_float64x2x3_t:
+** str d1, \[x0\]
+** ret
+*/
+TEST (float64x2x3_t, 1, 0)
+
+/*
+** test_int8x16x3_t:
+** st1 \{v0\.b\}\[9\], \[x0\]
+** ret
+*/
+TEST (int8x16x3_t, 0, 9)
+
+/*
+** test_int16x8x3_t:
+** st1 \{v2\.h\}\[6\], \[x0\]
+** ret
+*/
+TEST (int16x8x3_t, 2, 6)
+
+/*
+** test_int32x4x3_t:
+** st1 \{v1\.s\}\[3\], \[x0\]
+** ret
+*/
+TEST (int32x4x3_t, 1, 3)
+
+/*
+** test_int64x2x3_t:
+** st1 \{v2\.d\}\[1\], \[x0\]
+** ret
+*/
+TEST (int64x2x3_t, 2, 1)
+
+/*
+** test_uint8x16x3_t:
+** st1 \{v1\.b\}\[10\], \[x0\]
+** ret
+*/
+TEST (uint8x16x3_t, 1, 10)
+
+/*
+** test_uint16x8x3_t:
+** st1 \{v2\.h\}\[5\], \[x0\]
+** ret
+*/
+TEST (uint16x8x3_t, 2, 5)
+
+/*
+** test_uint32x4x3_t:
+** st1 \{v2\.s\}\[3\], \[x0\]
+** ret
+*/
+TEST (uint32x4x3_t, 2, 3)
+
+/*
+** test_uint64x2x3_t:
+** str d1, \[x0\]
+** ret
+*/
+TEST (uint64x2x3_t, 1, 0)
+
+//--------------------------------------------------------------
+
+/*
+** test_bfloat16x8x4_t:
+** st1 \{v2\.h\}\[5\], \[x0\]
+** ret
+*/
+TEST (bfloat16x8x4_t, 2, 5)
+
+/*
+** test_float16x8x4_t:
+** st1 \{v0\.h\}\[3\], \[x0\]
+** ret
+*/
+TEST (float16x8x4_t, 0, 3)
+
+/*
+** test_float32x4x4_t:
+** st1 \{v3\.s\}\[2\], \[x0\]
+** ret
+*/
+TEST (float32x4x4_t, 3, 2)
+
+/*
+** test_float64x2x4_t:
+** st1 \{v1\.d\}\[1\], \[x0\]
+** ret
+*/
+TEST (float64x2x4_t, 1, 1)
+
+/*
+** test_int8x16x4_t:
+** st1 \{v0\.b\}\[14\], \[x0\]
+** ret
+*/
+TEST (int8x16x4_t, 0, 14)
+
+/*
+** test_int16x8x4_t:
+** st1 \{v3\.h\}\[4\], \[x0\]
+** ret
+*/
+TEST (int16x8x4_t, 3, 4)
+
+/*
+** test_int32x4x4_t:
+** st1 \{v1\.s\}\[3\], \[x0\]
+** ret
+*/
+TEST (int32x4x4_t, 1, 3)
+
+/*
+** test_int64x2x4_t:
+** str d3, \[x0\]
+** ret
+*/
+TEST (int64x2x4_t, 3, 0)
+
+/*
+** test_uint8x16x4_t:
+** st1 \{v3\.b\}\[13\], \[x0\]
+** ret
+*/
+TEST (uint8x16x4_t, 3, 13)
+
+/*
+** test_uint16x8x4_t:
+** st1 \{v3\.h\}\[2\], \[x0\]
+** ret
+*/
+TEST (uint16x8x4_t, 3, 2)
+
+/*
+** test_uint32x4x4_t:
+** st1 \{v0\.s\}\[3\], \[x0\]
+** ret
+*/
+TEST (uint32x4x4_t, 0, 3)
+
+/*
+** test_uint64x2x4_t:
+** str d1, \[x0\]
+** ret
+*/
+TEST (uint64x2x4_t, 1, 0)