]>
git.ipfire.org Git - thirdparty/gcc.git/blob - gcc/testsuite/gcc.target/arm/mve/intrinsics/mve_vstore_scatter_offset_p.c
1 /* { dg-require-effective-target arm_v8_1m_mve_fp_ok } */
2 /* { dg-add-options arm_v8_1m_mve_fp } */
3 /* { dg-additional-options "-O2" } */
9 foobu8( uint8_t * pDataSrc
, uint8_t * pDataDest
)
11 const uint8x16_t vecOffs1
= { 0, 3, 6, 1, 4, 7, 2, 5, 9, 11, 13, 10, 12, 15, 8, 14};
12 const uint8x16_t vecOffs2
= { 31, 29, 27, 25, 23, 28, 21, 26, 19, 24, 17, 22, 16, 20, 18, 30};
13 uint32x4_t vecIn1
= vldrwq_u32 ((uint32_t const *) pDataSrc
);
14 uint32x4_t vecIn2
= vldrwq_u32 ((uint32_t const *) &pDataSrc
[16]);
15 vstrbq_scatter_offset_p_u8(pDataDest
, vecOffs1
, (uint8x16_t
) vecIn1
, __p
);
16 vstrbq_scatter_offset_p_u8(pDataDest
, vecOffs2
, (uint8x16_t
) vecIn2
, __p
);
17 pDataDest
[32] = pDataSrc
[32];
22 foobu16( uint8_t * pDataSrc
, uint8_t * pDataDest
)
24 const uint16x8_t vecOffs1
= { 0, 3, 6, 1, 4, 7, 2, 5};
25 const uint16x8_t vecOffs2
= { 11, 13, 10, 12, 15, 8, 14, 9};
26 uint32x4_t vecIn1
= vldrwq_u32 ((uint32_t const *) pDataSrc
);
27 uint32x4_t vecIn2
= vldrwq_u32 ((uint32_t const *) &pDataSrc
[8]);
28 vstrbq_scatter_offset_p_u16 (pDataDest
, vecOffs1
, (uint16x8_t
) vecIn1
, __p
);
29 vstrbq_scatter_offset_p_u16 (pDataDest
, vecOffs2
, (uint16x8_t
) vecIn2
, __p
);
30 pDataDest
[16] = pDataSrc
[16];
35 foobu32( uint8_t * pDataSrc
, uint8_t * pDataDest
)
37 const uint32x4_t vecOffs1
= { 0, 3, 6, 1};
38 const uint32x4_t vecOffs2
= { 4, 7, 2, 5};
39 uint32x4_t vecIn1
= vldrwq_u32 ((uint32_t const *) pDataSrc
);
40 uint32x4_t vecIn2
= vldrwq_u32 ((uint32_t const *) &pDataSrc
[4]);
41 vstrbq_scatter_offset_p_u32 (pDataDest
, vecOffs1
, (uint32x4_t
) vecIn1
, __p
);
42 vstrbq_scatter_offset_p_u32 (pDataDest
, vecOffs2
, (uint32x4_t
) vecIn2
, __p
);
43 pDataDest
[8] = pDataSrc
[8];
48 foobs8( int8_t * pDataSrc
, int8_t * pDataDest
)
50 const uint8x16_t vecOffs1
= { 0, 3, 6, 1, 4, 7, 2, 5, 9, 11, 13, 10, 12, 15, 8, 14};
51 const uint8x16_t vecOffs2
= { 31, 29, 27, 25, 23, 28, 21, 26, 19, 24, 17, 22, 16, 20, 18, 30};
52 int32x4_t vecIn1
= vldrwq_s32 ((int32_t const *) pDataSrc
);
53 int32x4_t vecIn2
= vldrwq_s32 ((int32_t const *) &pDataSrc
[16]);
54 vstrbq_scatter_offset_p_s8 (pDataDest
, vecOffs1
, (int8x16_t
) vecIn1
, __p
);
55 vstrbq_scatter_offset_p_s8 (pDataDest
, vecOffs2
, (int8x16_t
) vecIn2
, __p
);
56 pDataDest
[32] = pDataSrc
[32];
61 foobs16( int8_t * pDataSrc
, int8_t * pDataDest
)
63 const uint16x8_t vecOffs1
= { 0, 3, 6, 1, 4, 7, 2, 5};
64 const uint16x8_t vecOffs2
= { 11, 13, 10, 12, 15, 8, 14, 9};
65 int32x4_t vecIn1
= vldrwq_s32 ((int32_t const *) pDataSrc
);
66 int32x4_t vecIn2
= vldrwq_s32 ((int32_t const *) &pDataSrc
[8]);
67 vstrbq_scatter_offset_p_s16 (pDataDest
, vecOffs1
, (int16x8_t
) vecIn1
, __p
);
68 vstrbq_scatter_offset_p_s16 (pDataDest
, vecOffs2
, (int16x8_t
) vecIn2
, __p
);
69 pDataDest
[16] = pDataSrc
[16];
74 foobs32( uint8_t * pDataSrc
, int8_t * pDataDest
)
76 const uint32x4_t vecOffs1
= { 0, 3, 6, 1};
77 const uint32x4_t vecOffs2
= { 4, 7, 2, 5};
78 int32x4_t vecIn1
= vldrwq_s32 ((int32_t const *) pDataSrc
);
79 int32x4_t vecIn2
= vldrwq_s32 ((int32_t const *) &pDataSrc
[4]);
80 vstrbq_scatter_offset_p_s32 (pDataDest
, vecOffs1
, (int32x4_t
) vecIn1
, __p
);
81 vstrbq_scatter_offset_p_s32 (pDataDest
, vecOffs2
, (int32x4_t
) vecIn2
, __p
);
82 pDataDest
[8] = pDataSrc
[8];
87 foohu16( uint16_t * pDataSrc
, uint16_t * pDataDest
)
89 const uint16x8_t vecOffs1
= { 0, 3, 6, 1, 4, 7, 2, 5};
90 const uint16x8_t vecOffs2
= { 11, 13, 10, 12, 15, 8, 14, 9};
91 uint32x4_t vecIn1
= vldrwq_u32 ((uint32_t const *) pDataSrc
);
92 uint32x4_t vecIn2
= vldrwq_u32 ((uint32_t const *) &pDataSrc
[8]);
93 vstrhq_scatter_offset_p_u16 (pDataDest
, vecOffs1
, (uint16x8_t
) vecIn1
, __p
);
94 vstrhq_scatter_offset_p_u16 (pDataDest
, vecOffs2
, (uint16x8_t
) vecIn2
, __p
);
95 pDataDest
[16] = pDataSrc
[16];
100 foohu32( uint16_t * pDataSrc
, uint16_t * pDataDest
)
102 const uint32x4_t vecOffs1
= { 0, 3, 6, 1};
103 const uint32x4_t vecOffs2
= { 4, 7, 2, 5};
104 uint32x4_t vecIn1
= vldrwq_u32 ((uint32_t const *) pDataSrc
);
105 uint32x4_t vecIn2
= vldrwq_u32 ((uint32_t const *) &pDataSrc
[4]);
106 vstrhq_scatter_offset_p_u32 (pDataDest
, vecOffs1
, (uint32x4_t
) vecIn1
, __p
);
107 vstrhq_scatter_offset_p_u32 (pDataDest
, vecOffs2
, (uint32x4_t
) vecIn2
, __p
);
108 pDataDest
[8] = pDataSrc
[8];
113 foohs16( int16_t * pDataSrc
, int16_t * pDataDest
)
115 const uint16x8_t vecOffs1
= { 0, 3, 6, 1, 4, 7, 2, 5};
116 const uint16x8_t vecOffs2
= { 11, 13, 10, 12, 15, 8, 14, 9};
117 int32x4_t vecIn1
= vldrwq_s32 ((int32_t const *) pDataSrc
);
118 int32x4_t vecIn2
= vldrwq_s32 ((int32_t const *) &pDataSrc
[8]);
119 vstrhq_scatter_offset_p_s16 (pDataDest
, vecOffs1
, (int16x8_t
) vecIn1
, __p
);
120 vstrhq_scatter_offset_p_s16 (pDataDest
, vecOffs2
, (int16x8_t
) vecIn2
, __p
);
121 pDataDest
[16] = pDataSrc
[16];
126 foohs32( uint16_t * pDataSrc
, int16_t * pDataDest
)
128 const uint32x4_t vecOffs1
= { 0, 3, 6, 1};
129 const uint32x4_t vecOffs2
= { 4, 7, 2, 5};
130 int32x4_t vecIn1
= vldrwq_s32 ((int32_t const *) pDataSrc
);
131 int32x4_t vecIn2
= vldrwq_s32 ((int32_t const *) &pDataSrc
[4]);
132 vstrhq_scatter_offset_p_s32 (pDataDest
, vecOffs1
, (int32x4_t
) vecIn1
, __p
);
133 vstrhq_scatter_offset_p_s32 (pDataDest
, vecOffs2
, (int32x4_t
) vecIn2
, __p
);
134 pDataDest
[8] = pDataSrc
[8];
139 foohf16( float16_t
* pDataSrc
, float16_t
* pDataDest
)
141 const uint16x8_t vecOffs1
= { 0, 3, 6, 1, 4, 7, 2, 5};
142 const uint16x8_t vecOffs2
= { 11, 13, 10, 12, 15, 8, 14, 9};
143 uint32x4_t vecIn1
= vldrwq_u32 ((uint32_t const *) pDataSrc
);
144 uint32x4_t vecIn2
= vldrwq_u32 ((uint32_t const *) &pDataSrc
[8]);
145 vstrhq_scatter_offset_p_f16 (pDataDest
, vecOffs1
, (float16x8_t
) vecIn1
, __p
);
146 vstrhq_scatter_offset_p_f16 (pDataDest
, vecOffs2
, (float16x8_t
) vecIn2
, __p
);
147 pDataDest
[16] = pDataSrc
[16];
152 foowu32( uint32_t * pDataSrc
, uint32_t * pDataDest
)
154 const uint32x4_t vecOffs1
= { 0, 3, 6, 1};
155 const uint32x4_t vecOffs2
= { 4, 7, 2, 5};
156 uint32x4_t vecIn1
= vldrwq_u32 ((uint32_t const *) pDataSrc
);
157 uint32x4_t vecIn2
= vldrwq_u32 ((uint32_t const *) &pDataSrc
[4]);
158 vstrwq_scatter_offset_p_u32 (pDataDest
, vecOffs1
, (uint32x4_t
) vecIn1
, __p
);
159 vstrwq_scatter_offset_p_u32 (pDataDest
, vecOffs2
, (uint32x4_t
) vecIn2
, __p
);
160 pDataDest
[8] = pDataSrc
[8];
165 foows32( int32_t * pDataSrc
, int32_t * pDataDest
)
167 const uint32x4_t vecOffs1
= { 0, 3, 6, 1};
168 const uint32x4_t vecOffs2
= { 4, 7, 2, 5};
169 uint32x4_t vecIn1
= vldrwq_u32 ((uint32_t const *) pDataSrc
);
170 uint32x4_t vecIn2
= vldrwq_u32 ((uint32_t const *) &pDataSrc
[4]);
171 vstrwq_scatter_offset_p_s32 (pDataDest
, vecOffs1
, (int32x4_t
) vecIn1
, __p
);
172 vstrwq_scatter_offset_p_s32 (pDataDest
, vecOffs2
, (int32x4_t
) vecIn2
, __p
);
173 pDataDest
[8] = pDataSrc
[8];
178 foowf32( float32_t
* pDataSrc
, float32_t
* pDataDest
)
180 const uint32x4_t vecOffs1
= { 0, 3, 6, 1};
181 const uint32x4_t vecOffs2
= { 4, 7, 2, 5};
182 uint32x4_t vecIn1
= vldrwq_u32 ((uint32_t const *) pDataSrc
);
183 uint32x4_t vecIn2
= vldrwq_u32 ((uint32_t const *) &pDataSrc
[8]);
184 vstrwq_scatter_offset_p_f32 (pDataDest
, vecOffs1
, (float32x4_t
) vecIn1
, __p
);
185 vstrwq_scatter_offset_p_f32 (pDataDest
, vecOffs2
, (float32x4_t
) vecIn2
, __p
);
186 pDataDest
[8] = pDataSrc
[8];
191 foowu64( uint64_t * pDataSrc
, uint64_t * pDataDest
)
193 const uint64x2_t vecOffs1
= { 0, 3};
194 const uint64x2_t vecOffs2
= { 1, 2};
195 uint32x4_t vecIn1
= vldrwq_u32 ((uint32_t const *) pDataSrc
);
196 uint32x4_t vecIn2
= vldrwq_u32 ((uint32_t const *) &pDataSrc
[2]);
197 vstrdq_scatter_offset_p_u64 (pDataDest
, vecOffs1
, (uint64x2_t
) vecIn1
, __p
);
198 vstrdq_scatter_offset_p_u64 (pDataDest
, vecOffs2
, (uint64x2_t
) vecIn2
, __p
);
199 pDataDest
[4] = pDataSrc
[4];
204 foows64( int64_t * pDataSrc
, int64_t * pDataDest
)
206 const uint64x2_t vecOffs1
= { 0, 3};
207 const uint64x2_t vecOffs2
= { 1, 2};
208 uint32x4_t vecIn1
= vldrwq_u32 ((uint32_t const *) pDataSrc
);
209 uint32x4_t vecIn2
= vldrwq_u32 ((uint32_t const *) &pDataSrc
[2]);
210 vstrdq_scatter_offset_p_s64 (pDataDest
, vecOffs1
, (int64x2_t
) vecIn1
, __p
);
211 vstrdq_scatter_offset_p_s64 (pDataDest
, vecOffs2
, (int64x2_t
) vecIn2
, __p
);
212 pDataDest
[4] = pDataSrc
[4];
216 /* { dg-final { scan-assembler-times "vstr\[a-z\]t" 32 } } */