]> git.ipfire.org Git - thirdparty/gcc.git/blob - gcc/testsuite/gcc.target/arm/mve/intrinsics/mve_vstore_scatter_offset_p.c
[ARM]: Correct the grouping of operands in MVE vector scatter store intrinsics (PR94735).
[thirdparty/gcc.git] / gcc / testsuite / gcc.target / arm / mve / intrinsics / mve_vstore_scatter_offset_p.c
1 /* { dg-require-effective-target arm_v8_1m_mve_fp_ok } */
2 /* { dg-add-options arm_v8_1m_mve_fp } */
3 /* { dg-additional-options "-O2" } */
4
5 #include "arm_mve.h"
6
7 mve_pred16_t __p;
8 int
9 foobu8( uint8_t * pDataSrc, uint8_t * pDataDest)
10 {
11 const uint8x16_t vecOffs1 = { 0, 3, 6, 1, 4, 7, 2, 5, 9, 11, 13, 10, 12, 15, 8, 14};
12 const uint8x16_t vecOffs2 = { 31, 29, 27, 25, 23, 28, 21, 26, 19, 24, 17, 22, 16, 20, 18, 30};
13 uint32x4_t vecIn1 = vldrwq_u32 ((uint32_t const *) pDataSrc);
14 uint32x4_t vecIn2 = vldrwq_u32 ((uint32_t const *) &pDataSrc[16]);
15 vstrbq_scatter_offset_p_u8(pDataDest, vecOffs1, (uint8x16_t) vecIn1, __p);
16 vstrbq_scatter_offset_p_u8(pDataDest, vecOffs2, (uint8x16_t) vecIn2, __p);
17 pDataDest[32] = pDataSrc[32];
18 return 0;
19 }
20
21 int
22 foobu16( uint8_t * pDataSrc, uint8_t * pDataDest)
23 {
24 const uint16x8_t vecOffs1 = { 0, 3, 6, 1, 4, 7, 2, 5};
25 const uint16x8_t vecOffs2 = { 11, 13, 10, 12, 15, 8, 14, 9};
26 uint32x4_t vecIn1 = vldrwq_u32 ((uint32_t const *) pDataSrc);
27 uint32x4_t vecIn2 = vldrwq_u32 ((uint32_t const *) &pDataSrc[8]);
28 vstrbq_scatter_offset_p_u16 (pDataDest, vecOffs1, (uint16x8_t) vecIn1, __p);
29 vstrbq_scatter_offset_p_u16 (pDataDest, vecOffs2, (uint16x8_t) vecIn2, __p);
30 pDataDest[16] = pDataSrc[16];
31 return 0;
32 }
33
34 int
35 foobu32( uint8_t * pDataSrc, uint8_t * pDataDest)
36 {
37 const uint32x4_t vecOffs1 = { 0, 3, 6, 1};
38 const uint32x4_t vecOffs2 = { 4, 7, 2, 5};
39 uint32x4_t vecIn1 = vldrwq_u32 ((uint32_t const *) pDataSrc);
40 uint32x4_t vecIn2 = vldrwq_u32 ((uint32_t const *) &pDataSrc[4]);
41 vstrbq_scatter_offset_p_u32 (pDataDest, vecOffs1, (uint32x4_t) vecIn1, __p);
42 vstrbq_scatter_offset_p_u32 (pDataDest, vecOffs2, (uint32x4_t) vecIn2, __p);
43 pDataDest[8] = pDataSrc[8];
44 return 0;
45 }
46
47 int
48 foobs8( int8_t * pDataSrc, int8_t * pDataDest)
49 {
50 const uint8x16_t vecOffs1 = { 0, 3, 6, 1, 4, 7, 2, 5, 9, 11, 13, 10, 12, 15, 8, 14};
51 const uint8x16_t vecOffs2 = { 31, 29, 27, 25, 23, 28, 21, 26, 19, 24, 17, 22, 16, 20, 18, 30};
52 int32x4_t vecIn1 = vldrwq_s32 ((int32_t const *) pDataSrc);
53 int32x4_t vecIn2 = vldrwq_s32 ((int32_t const *) &pDataSrc[16]);
54 vstrbq_scatter_offset_p_s8 (pDataDest, vecOffs1, (int8x16_t) vecIn1, __p);
55 vstrbq_scatter_offset_p_s8 (pDataDest, vecOffs2, (int8x16_t) vecIn2, __p);
56 pDataDest[32] = pDataSrc[32];
57 return 0;
58 }
59
60 int
61 foobs16( int8_t * pDataSrc, int8_t * pDataDest)
62 {
63 const uint16x8_t vecOffs1 = { 0, 3, 6, 1, 4, 7, 2, 5};
64 const uint16x8_t vecOffs2 = { 11, 13, 10, 12, 15, 8, 14, 9};
65 int32x4_t vecIn1 = vldrwq_s32 ((int32_t const *) pDataSrc);
66 int32x4_t vecIn2 = vldrwq_s32 ((int32_t const *) &pDataSrc[8]);
67 vstrbq_scatter_offset_p_s16 (pDataDest, vecOffs1, (int16x8_t) vecIn1, __p);
68 vstrbq_scatter_offset_p_s16 (pDataDest, vecOffs2, (int16x8_t) vecIn2, __p);
69 pDataDest[16] = pDataSrc[16];
70 return 0;
71 }
72
73 int
74 foobs32( uint8_t * pDataSrc, int8_t * pDataDest)
75 {
76 const uint32x4_t vecOffs1 = { 0, 3, 6, 1};
77 const uint32x4_t vecOffs2 = { 4, 7, 2, 5};
78 int32x4_t vecIn1 = vldrwq_s32 ((int32_t const *) pDataSrc);
79 int32x4_t vecIn2 = vldrwq_s32 ((int32_t const *) &pDataSrc[4]);
80 vstrbq_scatter_offset_p_s32 (pDataDest, vecOffs1, (int32x4_t) vecIn1, __p);
81 vstrbq_scatter_offset_p_s32 (pDataDest, vecOffs2, (int32x4_t) vecIn2, __p);
82 pDataDest[8] = pDataSrc[8];
83 return 0;
84 }
85
86 int
87 foohu16( uint16_t * pDataSrc, uint16_t * pDataDest)
88 {
89 const uint16x8_t vecOffs1 = { 0, 3, 6, 1, 4, 7, 2, 5};
90 const uint16x8_t vecOffs2 = { 11, 13, 10, 12, 15, 8, 14, 9};
91 uint32x4_t vecIn1 = vldrwq_u32 ((uint32_t const *) pDataSrc);
92 uint32x4_t vecIn2 = vldrwq_u32 ((uint32_t const *) &pDataSrc[8]);
93 vstrhq_scatter_offset_p_u16 (pDataDest, vecOffs1, (uint16x8_t) vecIn1, __p);
94 vstrhq_scatter_offset_p_u16 (pDataDest, vecOffs2, (uint16x8_t) vecIn2, __p);
95 pDataDest[16] = pDataSrc[16];
96 return 0;
97 }
98
99 int
100 foohu32( uint16_t * pDataSrc, uint16_t * pDataDest)
101 {
102 const uint32x4_t vecOffs1 = { 0, 3, 6, 1};
103 const uint32x4_t vecOffs2 = { 4, 7, 2, 5};
104 uint32x4_t vecIn1 = vldrwq_u32 ((uint32_t const *) pDataSrc);
105 uint32x4_t vecIn2 = vldrwq_u32 ((uint32_t const *) &pDataSrc[4]);
106 vstrhq_scatter_offset_p_u32 (pDataDest, vecOffs1, (uint32x4_t) vecIn1, __p);
107 vstrhq_scatter_offset_p_u32 (pDataDest, vecOffs2, (uint32x4_t) vecIn2, __p);
108 pDataDest[8] = pDataSrc[8];
109 return 0;
110 }
111
112 int
113 foohs16( int16_t * pDataSrc, int16_t * pDataDest)
114 {
115 const uint16x8_t vecOffs1 = { 0, 3, 6, 1, 4, 7, 2, 5};
116 const uint16x8_t vecOffs2 = { 11, 13, 10, 12, 15, 8, 14, 9};
117 int32x4_t vecIn1 = vldrwq_s32 ((int32_t const *) pDataSrc);
118 int32x4_t vecIn2 = vldrwq_s32 ((int32_t const *) &pDataSrc[8]);
119 vstrhq_scatter_offset_p_s16 (pDataDest, vecOffs1, (int16x8_t) vecIn1, __p);
120 vstrhq_scatter_offset_p_s16 (pDataDest, vecOffs2, (int16x8_t) vecIn2, __p);
121 pDataDest[16] = pDataSrc[16];
122 return 0;
123 }
124
125 int
126 foohs32( uint16_t * pDataSrc, int16_t * pDataDest)
127 {
128 const uint32x4_t vecOffs1 = { 0, 3, 6, 1};
129 const uint32x4_t vecOffs2 = { 4, 7, 2, 5};
130 int32x4_t vecIn1 = vldrwq_s32 ((int32_t const *) pDataSrc);
131 int32x4_t vecIn2 = vldrwq_s32 ((int32_t const *) &pDataSrc[4]);
132 vstrhq_scatter_offset_p_s32 (pDataDest, vecOffs1, (int32x4_t) vecIn1, __p);
133 vstrhq_scatter_offset_p_s32 (pDataDest, vecOffs2, (int32x4_t) vecIn2, __p);
134 pDataDest[8] = pDataSrc[8];
135 return 0;
136 }
137
138 int
139 foohf16( float16_t * pDataSrc, float16_t * pDataDest)
140 {
141 const uint16x8_t vecOffs1 = { 0, 3, 6, 1, 4, 7, 2, 5};
142 const uint16x8_t vecOffs2 = { 11, 13, 10, 12, 15, 8, 14, 9};
143 uint32x4_t vecIn1 = vldrwq_u32 ((uint32_t const *) pDataSrc);
144 uint32x4_t vecIn2 = vldrwq_u32 ((uint32_t const *) &pDataSrc[8]);
145 vstrhq_scatter_offset_p_f16 (pDataDest, vecOffs1, (float16x8_t) vecIn1, __p);
146 vstrhq_scatter_offset_p_f16 (pDataDest, vecOffs2, (float16x8_t) vecIn2, __p);
147 pDataDest[16] = pDataSrc[16];
148 return 0;
149 }
150
151 int
152 foowu32( uint32_t * pDataSrc, uint32_t * pDataDest)
153 {
154 const uint32x4_t vecOffs1 = { 0, 3, 6, 1};
155 const uint32x4_t vecOffs2 = { 4, 7, 2, 5};
156 uint32x4_t vecIn1 = vldrwq_u32 ((uint32_t const *) pDataSrc);
157 uint32x4_t vecIn2 = vldrwq_u32 ((uint32_t const *) &pDataSrc[4]);
158 vstrwq_scatter_offset_p_u32 (pDataDest, vecOffs1, (uint32x4_t) vecIn1, __p);
159 vstrwq_scatter_offset_p_u32 (pDataDest, vecOffs2, (uint32x4_t) vecIn2, __p);
160 pDataDest[8] = pDataSrc[8];
161 return 0;
162 }
163
164 int
165 foows32( int32_t * pDataSrc, int32_t * pDataDest)
166 {
167 const uint32x4_t vecOffs1 = { 0, 3, 6, 1};
168 const uint32x4_t vecOffs2 = { 4, 7, 2, 5};
169 uint32x4_t vecIn1 = vldrwq_u32 ((uint32_t const *) pDataSrc);
170 uint32x4_t vecIn2 = vldrwq_u32 ((uint32_t const *) &pDataSrc[4]);
171 vstrwq_scatter_offset_p_s32 (pDataDest, vecOffs1, (int32x4_t) vecIn1, __p);
172 vstrwq_scatter_offset_p_s32 (pDataDest, vecOffs2, (int32x4_t) vecIn2, __p);
173 pDataDest[8] = pDataSrc[8];
174 return 0;
175 }
176
177 int
178 foowf32( float32_t * pDataSrc, float32_t * pDataDest)
179 {
180 const uint32x4_t vecOffs1 = { 0, 3, 6, 1};
181 const uint32x4_t vecOffs2 = { 4, 7, 2, 5};
182 uint32x4_t vecIn1 = vldrwq_u32 ((uint32_t const *) pDataSrc);
183 uint32x4_t vecIn2 = vldrwq_u32 ((uint32_t const *) &pDataSrc[8]);
184 vstrwq_scatter_offset_p_f32 (pDataDest, vecOffs1, (float32x4_t) vecIn1, __p);
185 vstrwq_scatter_offset_p_f32 (pDataDest, vecOffs2, (float32x4_t) vecIn2, __p);
186 pDataDest[8] = pDataSrc[8];
187 return 0;
188 }
189
190 int
191 foowu64( uint64_t * pDataSrc, uint64_t * pDataDest)
192 {
193 const uint64x2_t vecOffs1 = { 0, 3};
194 const uint64x2_t vecOffs2 = { 1, 2};
195 uint32x4_t vecIn1 = vldrwq_u32 ((uint32_t const *) pDataSrc);
196 uint32x4_t vecIn2 = vldrwq_u32 ((uint32_t const *) &pDataSrc[2]);
197 vstrdq_scatter_offset_p_u64 (pDataDest, vecOffs1, (uint64x2_t) vecIn1, __p);
198 vstrdq_scatter_offset_p_u64 (pDataDest, vecOffs2, (uint64x2_t) vecIn2, __p);
199 pDataDest[4] = pDataSrc[4];
200 return 0;
201 }
202
203 int
204 foows64( int64_t * pDataSrc, int64_t * pDataDest)
205 {
206 const uint64x2_t vecOffs1 = { 0, 3};
207 const uint64x2_t vecOffs2 = { 1, 2};
208 uint32x4_t vecIn1 = vldrwq_u32 ((uint32_t const *) pDataSrc);
209 uint32x4_t vecIn2 = vldrwq_u32 ((uint32_t const *) &pDataSrc[2]);
210 vstrdq_scatter_offset_p_s64 (pDataDest, vecOffs1, (int64x2_t) vecIn1, __p);
211 vstrdq_scatter_offset_p_s64 (pDataDest, vecOffs2, (int64x2_t) vecIn2, __p);
212 pDataDest[4] = pDataSrc[4];
213 return 0;
214 }
215
216 /* { dg-final { scan-assembler-times "vstr\[a-z\]t" 32 } } */