]>
Commit | Line | Data |
---|---|---|
8fa7f434 | 1 | ;; Machine description for AArch64 SVE. |
2 | ;; Copyright (C) 2009-2016 Free Software Foundation, Inc. | |
3 | ;; Contributed by ARM Ltd. | |
4 | ;; | |
5 | ;; This file is part of GCC. | |
6 | ;; | |
7 | ;; GCC is free software; you can redistribute it and/or modify it | |
8 | ;; under the terms of the GNU General Public License as published by | |
9 | ;; the Free Software Foundation; either version 3, or (at your option) | |
10 | ;; any later version. | |
11 | ;; | |
12 | ;; GCC is distributed in the hope that it will be useful, but | |
13 | ;; WITHOUT ANY WARRANTY; without even the implied warranty of | |
14 | ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
15 | ;; General Public License for more details. | |
16 | ;; | |
17 | ;; You should have received a copy of the GNU General Public License | |
18 | ;; along with GCC; see the file COPYING3. If not see | |
19 | ;; <http://www.gnu.org/licenses/>. | |
20 | ||
21 | ;; Note on the handling of big-endian SVE | |
22 | ;; -------------------------------------- | |
23 | ;; | |
24 | ;; On big-endian systems, Advanced SIMD mov<mode> patterns act in the | |
25 | ;; same way as movdi or movti would: the first byte of memory goes | |
26 | ;; into the most significant byte of the register and the last byte | |
27 | ;; of memory goes into the least significant byte of the register. | |
28 | ;; This is the most natural ordering for Advanced SIMD and matches | |
29 | ;; the ABI layout for 64-bit and 128-bit vector types. | |
30 | ;; | |
31 | ;; As a result, the order of bytes within the register is what GCC | |
32 | ;; expects for a big-endian target, and subreg offsets therefore work | |
33 | ;; as expected, with the first element in memory having subreg offset 0 | |
34 | ;; and the last element in memory having the subreg offset associated | |
35 | ;; with a big-endian lowpart. However, this ordering also means that | |
36 | ;; GCC's lane numbering does not match the architecture's numbering: | |
37 | ;; GCC always treats the element at the lowest address in memory | |
38 | ;; (subreg offset 0) as element 0, while the architecture treats | |
39 | ;; the least significant end of the register as element 0. | |
40 | ;; | |
41 | ;; The situation for SVE is different. We want the layout of the | |
42 | ;; SVE register to be same for mov<mode> as it is for maskload<mode>: | |
43 | ;; logically, a mov<mode> load must be indistinguishable from a | |
44 | ;; maskload<mode> whose mask is all true. We therefore need the | |
45 | ;; register layout to match LD1 rather than LDR. The ABI layout of | |
46 | ;; SVE types also matches LD1 byte ordering rather than LDR byte ordering. | |
47 | ;; | |
48 | ;; As a result, the architecture lane numbering matches GCC's lane | |
49 | ;; numbering, with element 0 always being the first in memory. | |
50 | ;; However: | |
51 | ;; | |
52 | ;; - Applying a subreg offset to a register does not give the element | |
53 | ;; that GCC expects: the first element in memory has the subreg offset | |
54 | ;; associated with a big-endian lowpart while the last element in memory | |
55 | ;; has subreg offset 0. We handle this via TARGET_CAN_CHANGE_MODE_CLASS. | |
56 | ;; | |
57 | ;; - We cannot use LDR and STR for spill slots that might be accessed | |
58 | ;; via subregs, since although the elements have the order GCC expects, | |
59 | ;; the order of the bytes within the elements is different. We instead | |
60 | ;; access spill slots via LD1 and ST1, using secondary reloads to | |
61 | ;; reserve a predicate register. | |
62 | ||
63 | ||
64 | ;; SVE data moves. | |
65 | (define_expand "mov<mode>" | |
66 | [(set (match_operand:SVE_ALL 0 "nonimmediate_operand") | |
67 | (match_operand:SVE_ALL 1 "general_operand"))] | |
68 | "TARGET_SVE" | |
69 | { | |
70 | /* Use the predicated load and store patterns where possible. | |
71 | This is required for big-endian targets (see the comment at the | |
72 | head of the file) and increases the addressing choices for | |
73 | little-endian. */ | |
74 | if ((MEM_P (operands[0]) || MEM_P (operands[1])) | |
75 | && can_create_pseudo_p ()) | |
76 | { | |
77 | aarch64_expand_sve_mem_move (operands[0], operands[1], <VPRED>mode); | |
78 | DONE; | |
79 | } | |
80 | ||
81 | if (CONSTANT_P (operands[1])) | |
82 | { | |
83 | aarch64_expand_mov_immediate (operands[0], operands[1], | |
84 | gen_vec_duplicate<mode>); | |
85 | DONE; | |
86 | } | |
70857087 | 87 | |
88 | /* Optimize subregs on big-endian targets: we can use REV[BHW] | |
89 | instead of going through memory. */ | |
90 | if (BYTES_BIG_ENDIAN | |
91 | && aarch64_maybe_expand_sve_subreg_move (operands[0], operands[1])) | |
92 | DONE; | |
93 | } | |
94 | ) | |
95 | ||
96 | ;; A pattern for optimizing SUBREGs that have a reinterpreting effect | |
97 | ;; on big-endian targets; see aarch64_maybe_expand_sve_subreg_move | |
98 | ;; for details. We use a special predicate for operand 2 to reduce | |
99 | ;; the number of patterns. | |
100 | (define_insn_and_split "*aarch64_sve_mov<mode>_subreg_be" | |
101 | [(set (match_operand:SVE_ALL 0 "aarch64_sve_nonimmediate_operand" "=w") | |
102 | (unspec:SVE_ALL | |
103 | [(match_operand:VNx16BI 1 "register_operand" "Upl") | |
104 | (match_operand 2 "aarch64_any_register_operand" "w")] | |
105 | UNSPEC_REV_SUBREG))] | |
106 | "TARGET_SVE && BYTES_BIG_ENDIAN" | |
107 | "#" | |
108 | "&& reload_completed" | |
109 | [(const_int 0)] | |
110 | { | |
111 | aarch64_split_sve_subreg_move (operands[0], operands[1], operands[2]); | |
112 | DONE; | |
8fa7f434 | 113 | } |
114 | ) | |
115 | ||
116 | ;; Unpredicated moves (little-endian). Only allow memory operations | |
117 | ;; during and after RA; before RA we want the predicated load and | |
118 | ;; store patterns to be used instead. | |
119 | (define_insn "*aarch64_sve_mov<mode>_le" | |
120 | [(set (match_operand:SVE_ALL 0 "aarch64_sve_nonimmediate_operand" "=w, Utr, w, w") | |
121 | (match_operand:SVE_ALL 1 "aarch64_sve_general_operand" "Utr, w, w, Dn"))] | |
122 | "TARGET_SVE | |
123 | && !BYTES_BIG_ENDIAN | |
124 | && ((lra_in_progress || reload_completed) | |
125 | || (register_operand (operands[0], <MODE>mode) | |
126 | && nonmemory_operand (operands[1], <MODE>mode)))" | |
127 | "@ | |
128 | ldr\t%0, %1 | |
129 | str\t%1, %0 | |
130 | mov\t%0.d, %1.d | |
131 | * return aarch64_output_sve_mov_immediate (operands[1]);" | |
132 | ) | |
133 | ||
134 | ;; Unpredicated moves (big-endian). Memory accesses require secondary | |
135 | ;; reloads. | |
136 | (define_insn "*aarch64_sve_mov<mode>_be" | |
137 | [(set (match_operand:SVE_ALL 0 "register_operand" "=w, w") | |
138 | (match_operand:SVE_ALL 1 "aarch64_nonmemory_operand" "w, Dn"))] | |
139 | "TARGET_SVE && BYTES_BIG_ENDIAN" | |
140 | "@ | |
141 | mov\t%0.d, %1.d | |
142 | * return aarch64_output_sve_mov_immediate (operands[1]);" | |
143 | ) | |
144 | ||
145 | ;; Handle big-endian memory reloads. We use byte PTRUE for all modes | |
146 | ;; to try to encourage reuse. | |
147 | (define_expand "aarch64_sve_reload_be" | |
148 | [(parallel | |
149 | [(set (match_operand 0) | |
150 | (match_operand 1)) | |
151 | (clobber (match_operand:VNx16BI 2 "register_operand" "=Upl"))])] | |
152 | "TARGET_SVE && BYTES_BIG_ENDIAN" | |
153 | { | |
154 | /* Create a PTRUE. */ | |
155 | emit_move_insn (operands[2], CONSTM1_RTX (VNx16BImode)); | |
156 | ||
157 | /* Refer to the PTRUE in the appropriate mode for this move. */ | |
158 | machine_mode mode = GET_MODE (operands[0]); | |
159 | machine_mode pred_mode | |
160 | = aarch64_sve_pred_mode (GET_MODE_UNIT_SIZE (mode)).require (); | |
161 | rtx pred = gen_lowpart (pred_mode, operands[2]); | |
162 | ||
163 | /* Emit a predicated load or store. */ | |
164 | aarch64_emit_sve_pred_move (operands[0], pred, operands[1]); | |
165 | DONE; | |
166 | } | |
167 | ) | |
168 | ||
169 | ;; A predicated load or store for which the predicate is known to be | |
170 | ;; all-true. Note that this pattern is generated directly by | |
171 | ;; aarch64_emit_sve_pred_move, so changes to this pattern will | |
172 | ;; need changes there as well. | |
173 | (define_insn "*pred_mov<mode>" | |
174 | [(set (match_operand:SVE_ALL 0 "nonimmediate_operand" "=w, m") | |
175 | (unspec:SVE_ALL | |
176 | [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl") | |
177 | (match_operand:SVE_ALL 2 "nonimmediate_operand" "m, w")] | |
178 | UNSPEC_MERGE_PTRUE))] | |
179 | "TARGET_SVE | |
180 | && (register_operand (operands[0], <MODE>mode) | |
181 | || register_operand (operands[2], <MODE>mode))" | |
182 | "@ | |
183 | ld1<Vesize>\t%0.<Vetype>, %1/z, %2 | |
184 | st1<Vesize>\t%2.<Vetype>, %1, %0" | |
185 | ) | |
186 | ||
187 | (define_expand "movmisalign<mode>" | |
188 | [(set (match_operand:SVE_ALL 0 "nonimmediate_operand") | |
189 | (match_operand:SVE_ALL 1 "general_operand"))] | |
190 | "TARGET_SVE" | |
191 | { | |
192 | /* Equivalent to a normal move for our purpooses. */ | |
193 | emit_move_insn (operands[0], operands[1]); | |
194 | DONE; | |
195 | } | |
196 | ) | |
197 | ||
198 | (define_insn "maskload<mode><vpred>" | |
199 | [(set (match_operand:SVE_ALL 0 "register_operand" "=w") | |
200 | (unspec:SVE_ALL | |
201 | [(match_operand:<VPRED> 2 "register_operand" "Upl") | |
202 | (match_operand:SVE_ALL 1 "memory_operand" "m")] | |
203 | UNSPEC_LD1_SVE))] | |
204 | "TARGET_SVE" | |
205 | "ld1<Vesize>\t%0.<Vetype>, %2/z, %1" | |
206 | ) | |
207 | ||
208 | (define_insn "maskstore<mode><vpred>" | |
209 | [(set (match_operand:SVE_ALL 0 "memory_operand" "+m") | |
210 | (unspec:SVE_ALL [(match_operand:<VPRED> 2 "register_operand" "Upl") | |
211 | (match_operand:SVE_ALL 1 "register_operand" "w") | |
212 | (match_dup 0)] | |
213 | UNSPEC_ST1_SVE))] | |
214 | "TARGET_SVE" | |
215 | "st1<Vesize>\t%1.<Vetype>, %2, %0" | |
216 | ) | |
217 | ||
1619606c | 218 | ;; Unpredicated gather loads. |
219 | (define_expand "gather_load<mode>" | |
220 | [(set (match_operand:SVE_SD 0 "register_operand") | |
221 | (unspec:SVE_SD | |
222 | [(match_dup 5) | |
223 | (match_operand:DI 1 "aarch64_reg_or_zero") | |
224 | (match_operand:<V_INT_EQUIV> 2 "register_operand") | |
225 | (match_operand:DI 3 "const_int_operand") | |
226 | (match_operand:DI 4 "aarch64_gather_scale_operand_<Vesize>") | |
227 | (mem:BLK (scratch))] | |
228 | UNSPEC_LD1_GATHER))] | |
229 | "TARGET_SVE" | |
230 | { | |
231 | operands[5] = force_reg (<VPRED>mode, CONSTM1_RTX (<VPRED>mode)); | |
232 | } | |
233 | ) | |
234 | ||
235 | ;; Predicated gather loads for 32-bit elements. Operand 3 is true for | |
236 | ;; unsigned extension and false for signed extension. | |
237 | (define_insn "mask_gather_load<mode>" | |
238 | [(set (match_operand:SVE_S 0 "register_operand" "=w, w, w, w, w") | |
239 | (unspec:SVE_S | |
240 | [(match_operand:<VPRED> 5 "register_operand" "Upl, Upl, Upl, Upl, Upl") | |
241 | (match_operand:DI 1 "aarch64_reg_or_zero" "Z, rk, rk, rk, rk") | |
242 | (match_operand:<V_INT_EQUIV> 2 "register_operand" "w, w, w, w, w") | |
243 | (match_operand:DI 3 "const_int_operand" "i, Z, Ui1, Z, Ui1") | |
244 | (match_operand:DI 4 "aarch64_gather_scale_operand_w" "Ui1, Ui1, Ui1, i, i") | |
245 | (mem:BLK (scratch))] | |
246 | UNSPEC_LD1_GATHER))] | |
247 | "TARGET_SVE" | |
248 | "@ | |
249 | ld1w\t%0.s, %5/z, [%2.s] | |
250 | ld1w\t%0.s, %5/z, [%1, %2.s, sxtw] | |
251 | ld1w\t%0.s, %5/z, [%1, %2.s, uxtw] | |
252 | ld1w\t%0.s, %5/z, [%1, %2.s, sxtw %p4] | |
253 | ld1w\t%0.s, %5/z, [%1, %2.s, uxtw %p4]" | |
254 | ) | |
255 | ||
256 | ;; Predicated gather loads for 64-bit elements. The value of operand 3 | |
257 | ;; doesn't matter in this case. | |
258 | (define_insn "mask_gather_load<mode>" | |
259 | [(set (match_operand:SVE_D 0 "register_operand" "=w, w, w") | |
260 | (unspec:SVE_D | |
261 | [(match_operand:<VPRED> 5 "register_operand" "Upl, Upl, Upl") | |
262 | (match_operand:DI 1 "aarch64_reg_or_zero" "Z, rk, rk") | |
263 | (match_operand:<V_INT_EQUIV> 2 "register_operand" "w, w, w") | |
264 | (match_operand:DI 3 "const_int_operand") | |
265 | (match_operand:DI 4 "aarch64_gather_scale_operand_d" "Ui1, Ui1, i") | |
266 | (mem:BLK (scratch))] | |
267 | UNSPEC_LD1_GATHER))] | |
268 | "TARGET_SVE" | |
269 | "@ | |
270 | ld1d\t%0.d, %5/z, [%2.d] | |
271 | ld1d\t%0.d, %5/z, [%1, %2.d] | |
272 | ld1d\t%0.d, %5/z, [%1, %2.d, lsl %p4]" | |
273 | ) | |
274 | ||
0bf8b382 | 275 | ;; Unpredicated scatter store. |
276 | (define_expand "scatter_store<mode>" | |
277 | [(set (mem:BLK (scratch)) | |
278 | (unspec:BLK | |
279 | [(match_dup 5) | |
280 | (match_operand:DI 0 "aarch64_reg_or_zero") | |
281 | (match_operand:<V_INT_EQUIV> 1 "register_operand") | |
282 | (match_operand:DI 2 "const_int_operand") | |
283 | (match_operand:DI 3 "aarch64_gather_scale_operand_<Vesize>") | |
284 | (match_operand:SVE_SD 4 "register_operand")] | |
285 | UNSPEC_ST1_SCATTER))] | |
286 | "TARGET_SVE" | |
287 | { | |
288 | operands[5] = force_reg (<VPRED>mode, CONSTM1_RTX (<VPRED>mode)); | |
289 | } | |
290 | ) | |
291 | ||
292 | ;; Predicated scatter stores for 32-bit elements. Operand 2 is true for | |
293 | ;; unsigned extension and false for signed extension. | |
294 | (define_insn "mask_scatter_store<mode>" | |
295 | [(set (mem:BLK (scratch)) | |
296 | (unspec:BLK | |
297 | [(match_operand:<VPRED> 5 "register_operand" "Upl, Upl, Upl, Upl, Upl") | |
298 | (match_operand:DI 0 "aarch64_reg_or_zero" "Z, rk, rk, rk, rk") | |
299 | (match_operand:<V_INT_EQUIV> 1 "register_operand" "w, w, w, w, w") | |
300 | (match_operand:DI 2 "const_int_operand" "i, Z, Ui1, Z, Ui1") | |
301 | (match_operand:DI 3 "aarch64_gather_scale_operand_w" "Ui1, Ui1, Ui1, i, i") | |
302 | (match_operand:SVE_S 4 "register_operand" "w, w, w, w, w")] | |
303 | UNSPEC_ST1_SCATTER))] | |
304 | "TARGET_SVE" | |
305 | "@ | |
306 | st1w\t%4.s, %5, [%1.s] | |
307 | st1w\t%4.s, %5, [%0, %1.s, sxtw] | |
308 | st1w\t%4.s, %5, [%0, %1.s, uxtw] | |
309 | st1w\t%4.s, %5, [%0, %1.s, sxtw %p3] | |
310 | st1w\t%4.s, %5, [%0, %1.s, uxtw %p3]" | |
311 | ) | |
312 | ||
313 | ;; Predicated scatter stores for 64-bit elements. The value of operand 2 | |
314 | ;; doesn't matter in this case. | |
315 | (define_insn "mask_scatter_store<mode>" | |
316 | [(set (mem:BLK (scratch)) | |
317 | (unspec:BLK | |
318 | [(match_operand:<VPRED> 5 "register_operand" "Upl, Upl, Upl") | |
319 | (match_operand:DI 0 "aarch64_reg_or_zero" "Z, rk, rk") | |
320 | (match_operand:<V_INT_EQUIV> 1 "register_operand" "w, w, w") | |
321 | (match_operand:DI 2 "const_int_operand") | |
322 | (match_operand:DI 3 "aarch64_gather_scale_operand_d" "Ui1, Ui1, i") | |
323 | (match_operand:SVE_D 4 "register_operand" "w, w, w")] | |
324 | UNSPEC_ST1_SCATTER))] | |
325 | "TARGET_SVE" | |
326 | "@ | |
327 | st1d\t%4.d, %5, [%1.d] | |
328 | st1d\t%4.d, %5, [%0, %1.d] | |
329 | st1d\t%4.d, %5, [%0, %1.d, lsl %p3]" | |
330 | ) | |
331 | ||
0ac5a51b | 332 | ;; SVE structure moves. |
333 | (define_expand "mov<mode>" | |
334 | [(set (match_operand:SVE_STRUCT 0 "nonimmediate_operand") | |
335 | (match_operand:SVE_STRUCT 1 "general_operand"))] | |
336 | "TARGET_SVE" | |
337 | { | |
338 | /* Big-endian loads and stores need to be done via LD1 and ST1; | |
339 | see the comment at the head of the file for details. */ | |
340 | if ((MEM_P (operands[0]) || MEM_P (operands[1])) | |
341 | && BYTES_BIG_ENDIAN) | |
342 | { | |
343 | gcc_assert (can_create_pseudo_p ()); | |
344 | aarch64_expand_sve_mem_move (operands[0], operands[1], <VPRED>mode); | |
345 | DONE; | |
346 | } | |
347 | ||
348 | if (CONSTANT_P (operands[1])) | |
349 | { | |
350 | aarch64_expand_mov_immediate (operands[0], operands[1]); | |
351 | DONE; | |
352 | } | |
353 | } | |
354 | ) | |
355 | ||
356 | ;; Unpredicated structure moves (little-endian). | |
357 | (define_insn "*aarch64_sve_mov<mode>_le" | |
358 | [(set (match_operand:SVE_STRUCT 0 "aarch64_sve_nonimmediate_operand" "=w, Utr, w, w") | |
359 | (match_operand:SVE_STRUCT 1 "aarch64_sve_general_operand" "Utr, w, w, Dn"))] | |
360 | "TARGET_SVE && !BYTES_BIG_ENDIAN" | |
361 | "#" | |
362 | [(set_attr "length" "<insn_length>")] | |
363 | ) | |
364 | ||
365 | ;; Unpredicated structure moves (big-endian). Memory accesses require | |
366 | ;; secondary reloads. | |
367 | (define_insn "*aarch64_sve_mov<mode>_le" | |
368 | [(set (match_operand:SVE_STRUCT 0 "register_operand" "=w, w") | |
369 | (match_operand:SVE_STRUCT 1 "aarch64_nonmemory_operand" "w, Dn"))] | |
370 | "TARGET_SVE && BYTES_BIG_ENDIAN" | |
371 | "#" | |
372 | [(set_attr "length" "<insn_length>")] | |
373 | ) | |
374 | ||
375 | ;; Split unpredicated structure moves into pieces. This is the same | |
376 | ;; for both big-endian and little-endian code, although it only needs | |
377 | ;; to handle memory operands for little-endian code. | |
378 | (define_split | |
379 | [(set (match_operand:SVE_STRUCT 0 "aarch64_sve_nonimmediate_operand") | |
380 | (match_operand:SVE_STRUCT 1 "aarch64_sve_general_operand"))] | |
381 | "TARGET_SVE && reload_completed" | |
382 | [(const_int 0)] | |
383 | { | |
384 | rtx dest = operands[0]; | |
385 | rtx src = operands[1]; | |
386 | if (REG_P (dest) && REG_P (src)) | |
387 | aarch64_simd_emit_reg_reg_move (operands, <VSINGLE>mode, <vector_count>); | |
388 | else | |
389 | for (unsigned int i = 0; i < <vector_count>; ++i) | |
390 | { | |
391 | rtx subdest = simplify_gen_subreg (<VSINGLE>mode, dest, <MODE>mode, | |
392 | i * BYTES_PER_SVE_VECTOR); | |
393 | rtx subsrc = simplify_gen_subreg (<VSINGLE>mode, src, <MODE>mode, | |
394 | i * BYTES_PER_SVE_VECTOR); | |
395 | emit_insn (gen_rtx_SET (subdest, subsrc)); | |
396 | } | |
397 | DONE; | |
398 | } | |
399 | ) | |
400 | ||
401 | ;; Predicated structure moves. This works for both endiannesses but in | |
402 | ;; practice is only useful for big-endian. | |
403 | (define_insn_and_split "pred_mov<mode>" | |
404 | [(set (match_operand:SVE_STRUCT 0 "aarch64_sve_struct_nonimmediate_operand" "=w, Utx") | |
405 | (unspec:SVE_STRUCT | |
406 | [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl") | |
407 | (match_operand:SVE_STRUCT 2 "aarch64_sve_struct_nonimmediate_operand" "Utx, w")] | |
408 | UNSPEC_MERGE_PTRUE))] | |
409 | "TARGET_SVE | |
410 | && (register_operand (operands[0], <MODE>mode) | |
411 | || register_operand (operands[2], <MODE>mode))" | |
412 | "#" | |
413 | "&& reload_completed" | |
414 | [(const_int 0)] | |
415 | { | |
416 | for (unsigned int i = 0; i < <vector_count>; ++i) | |
417 | { | |
418 | rtx subdest = simplify_gen_subreg (<VSINGLE>mode, operands[0], | |
419 | <MODE>mode, | |
420 | i * BYTES_PER_SVE_VECTOR); | |
421 | rtx subsrc = simplify_gen_subreg (<VSINGLE>mode, operands[2], | |
422 | <MODE>mode, | |
423 | i * BYTES_PER_SVE_VECTOR); | |
424 | aarch64_emit_sve_pred_move (subdest, operands[1], subsrc); | |
425 | } | |
426 | DONE; | |
427 | } | |
428 | [(set_attr "length" "<insn_length>")] | |
429 | ) | |
430 | ||
8fa7f434 | 431 | (define_expand "mov<mode>" |
432 | [(set (match_operand:PRED_ALL 0 "nonimmediate_operand") | |
433 | (match_operand:PRED_ALL 1 "general_operand"))] | |
434 | "TARGET_SVE" | |
435 | { | |
436 | if (GET_CODE (operands[0]) == MEM) | |
437 | operands[1] = force_reg (<MODE>mode, operands[1]); | |
438 | } | |
439 | ) | |
440 | ||
441 | (define_insn "*aarch64_sve_mov<mode>" | |
442 | [(set (match_operand:PRED_ALL 0 "nonimmediate_operand" "=Upa, m, Upa, Upa, Upa") | |
443 | (match_operand:PRED_ALL 1 "general_operand" "Upa, Upa, m, Dz, Dm"))] | |
444 | "TARGET_SVE | |
445 | && (register_operand (operands[0], <MODE>mode) | |
446 | || register_operand (operands[1], <MODE>mode))" | |
447 | "@ | |
448 | mov\t%0.b, %1.b | |
449 | str\t%1, %0 | |
450 | ldr\t%0, %1 | |
451 | pfalse\t%0.b | |
452 | * return aarch64_output_ptrue (<MODE>mode, '<Vetype>');" | |
453 | ) | |
454 | ||
455 | ;; Handle extractions from a predicate by converting to an integer vector | |
456 | ;; and extracting from there. | |
457 | (define_expand "vec_extract<vpred><Vel>" | |
458 | [(match_operand:<VEL> 0 "register_operand") | |
459 | (match_operand:<VPRED> 1 "register_operand") | |
460 | (match_operand:SI 2 "nonmemory_operand") | |
461 | ;; Dummy operand to which we can attach the iterator. | |
462 | (reg:SVE_I V0_REGNUM)] | |
463 | "TARGET_SVE" | |
464 | { | |
465 | rtx tmp = gen_reg_rtx (<MODE>mode); | |
466 | emit_insn (gen_aarch64_sve_dup<mode>_const (tmp, operands[1], | |
467 | CONST1_RTX (<MODE>mode), | |
468 | CONST0_RTX (<MODE>mode))); | |
469 | emit_insn (gen_vec_extract<mode><Vel> (operands[0], tmp, operands[2])); | |
470 | DONE; | |
471 | } | |
472 | ) | |
473 | ||
474 | (define_expand "vec_extract<mode><Vel>" | |
475 | [(set (match_operand:<VEL> 0 "register_operand") | |
476 | (vec_select:<VEL> | |
477 | (match_operand:SVE_ALL 1 "register_operand") | |
478 | (parallel [(match_operand:SI 2 "nonmemory_operand")])))] | |
479 | "TARGET_SVE" | |
480 | { | |
481 | poly_int64 val; | |
482 | if (poly_int_rtx_p (operands[2], &val) | |
483 | && known_eq (val, GET_MODE_NUNITS (<MODE>mode) - 1)) | |
484 | { | |
485 | /* The last element can be extracted with a LASTB and a false | |
486 | predicate. */ | |
487 | rtx sel = force_reg (<VPRED>mode, CONST0_RTX (<VPRED>mode)); | |
384eaff1 | 488 | emit_insn (gen_extract_last_<mode> (operands[0], sel, operands[1])); |
8fa7f434 | 489 | DONE; |
490 | } | |
491 | if (!CONST_INT_P (operands[2])) | |
492 | { | |
493 | /* Create an index with operand[2] as the base and -1 as the step. | |
494 | It will then be zero for the element we care about. */ | |
495 | rtx index = gen_lowpart (<VEL_INT>mode, operands[2]); | |
496 | index = force_reg (<VEL_INT>mode, index); | |
497 | rtx series = gen_reg_rtx (<V_INT_EQUIV>mode); | |
498 | emit_insn (gen_vec_series<v_int_equiv> (series, index, constm1_rtx)); | |
499 | ||
500 | /* Get a predicate that is true for only that element. */ | |
501 | rtx zero = CONST0_RTX (<V_INT_EQUIV>mode); | |
502 | rtx cmp = gen_rtx_EQ (<V_INT_EQUIV>mode, series, zero); | |
503 | rtx sel = gen_reg_rtx (<VPRED>mode); | |
504 | emit_insn (gen_vec_cmp<v_int_equiv><vpred> (sel, cmp, series, zero)); | |
505 | ||
506 | /* Select the element using LASTB. */ | |
384eaff1 | 507 | emit_insn (gen_extract_last_<mode> (operands[0], sel, operands[1])); |
8fa7f434 | 508 | DONE; |
509 | } | |
510 | } | |
511 | ) | |
512 | ||
7ec0c7f5 | 513 | ;; Extract element zero. This is a special case because we want to force |
514 | ;; the registers to be the same for the second alternative, and then | |
515 | ;; split the instruction into nothing after RA. | |
516 | (define_insn_and_split "*vec_extract<mode><Vel>_0" | |
517 | [(set (match_operand:<VEL> 0 "aarch64_simd_nonimmediate_operand" "=r, w, Utv") | |
518 | (vec_select:<VEL> | |
519 | (match_operand:SVE_ALL 1 "register_operand" "w, 0, w") | |
520 | (parallel [(const_int 0)])))] | |
521 | "TARGET_SVE" | |
522 | { | |
523 | operands[1] = gen_rtx_REG (<V128>mode, REGNO (operands[1])); | |
524 | switch (which_alternative) | |
525 | { | |
526 | case 0: | |
527 | return "umov\\t%<vwcore>0, %1.<Vetype>[0]"; | |
528 | case 1: | |
529 | return "#"; | |
530 | case 2: | |
531 | return "st1\\t{%1.<Vetype>}[0], %0"; | |
532 | default: | |
533 | gcc_unreachable (); | |
534 | } | |
535 | } | |
536 | "&& reload_completed | |
537 | && REG_P (operands[0]) | |
538 | && REGNO (operands[0]) == REGNO (operands[1])" | |
539 | [(const_int 0)] | |
540 | { | |
541 | emit_note (NOTE_INSN_DELETED); | |
542 | DONE; | |
543 | } | |
544 | [(set_attr "type" "neon_to_gp_q, untyped, neon_store1_one_lane_q")] | |
545 | ) | |
546 | ||
8fa7f434 | 547 | ;; Extract an element from the Advanced SIMD portion of the register. |
548 | ;; We don't just reuse the aarch64-simd.md pattern because we don't | |
7ec0c7f5 | 549 | ;; want any change in lane number on big-endian targets. |
8fa7f434 | 550 | (define_insn "*vec_extract<mode><Vel>_v128" |
551 | [(set (match_operand:<VEL> 0 "aarch64_simd_nonimmediate_operand" "=r, w, Utv") | |
552 | (vec_select:<VEL> | |
553 | (match_operand:SVE_ALL 1 "register_operand" "w, w, w") | |
554 | (parallel [(match_operand:SI 2 "const_int_operand")])))] | |
555 | "TARGET_SVE | |
7ec0c7f5 | 556 | && IN_RANGE (INTVAL (operands[2]) * GET_MODE_SIZE (<VEL>mode), 1, 15)" |
8fa7f434 | 557 | { |
7ec0c7f5 | 558 | operands[1] = gen_rtx_REG (<V128>mode, REGNO (operands[1])); |
8fa7f434 | 559 | switch (which_alternative) |
560 | { | |
561 | case 0: | |
562 | return "umov\\t%<vwcore>0, %1.<Vetype>[%2]"; | |
563 | case 1: | |
564 | return "dup\\t%<Vetype>0, %1.<Vetype>[%2]"; | |
565 | case 2: | |
566 | return "st1\\t{%1.<Vetype>}[%2], %0"; | |
567 | default: | |
568 | gcc_unreachable (); | |
569 | } | |
570 | } | |
571 | [(set_attr "type" "neon_to_gp_q, neon_dup_q, neon_store1_one_lane_q")] | |
572 | ) | |
573 | ||
574 | ;; Extract an element in the range of DUP. This pattern allows the | |
575 | ;; source and destination to be different. | |
576 | (define_insn "*vec_extract<mode><Vel>_dup" | |
577 | [(set (match_operand:<VEL> 0 "register_operand" "=w") | |
578 | (vec_select:<VEL> | |
579 | (match_operand:SVE_ALL 1 "register_operand" "w") | |
580 | (parallel [(match_operand:SI 2 "const_int_operand")])))] | |
581 | "TARGET_SVE | |
582 | && IN_RANGE (INTVAL (operands[2]) * GET_MODE_SIZE (<VEL>mode), 16, 63)" | |
583 | { | |
584 | operands[0] = gen_rtx_REG (<MODE>mode, REGNO (operands[0])); | |
585 | return "dup\t%0.<Vetype>, %1.<Vetype>[%2]"; | |
586 | } | |
587 | ) | |
588 | ||
589 | ;; Extract an element outside the range of DUP. This pattern requires the | |
590 | ;; source and destination to be the same. | |
591 | (define_insn "*vec_extract<mode><Vel>_ext" | |
592 | [(set (match_operand:<VEL> 0 "register_operand" "=w") | |
593 | (vec_select:<VEL> | |
594 | (match_operand:SVE_ALL 1 "register_operand" "0") | |
595 | (parallel [(match_operand:SI 2 "const_int_operand")])))] | |
596 | "TARGET_SVE && INTVAL (operands[2]) * GET_MODE_SIZE (<VEL>mode) >= 64" | |
597 | { | |
598 | operands[0] = gen_rtx_REG (<MODE>mode, REGNO (operands[0])); | |
599 | operands[2] = GEN_INT (INTVAL (operands[2]) * GET_MODE_SIZE (<VEL>mode)); | |
600 | return "ext\t%0.b, %0.b, %0.b, #%2"; | |
601 | } | |
602 | ) | |
603 | ||
604 | ;; Extract the last active element of operand 1 into operand 0. | |
605 | ;; If no elements are active, extract the last inactive element instead. | |
384eaff1 | 606 | (define_insn "extract_last_<mode>" |
8fa7f434 | 607 | [(set (match_operand:<VEL> 0 "register_operand" "=r, w") |
608 | (unspec:<VEL> | |
609 | [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl") | |
610 | (match_operand:SVE_ALL 2 "register_operand" "w, w")] | |
611 | UNSPEC_LASTB))] | |
612 | "TARGET_SVE" | |
613 | "@ | |
614 | lastb\t%<vwcore>0, %1, %2.<Vetype> | |
615 | lastb\t%<Vetype>0, %1, %2.<Vetype>" | |
616 | ) | |
617 | ||
618 | (define_expand "vec_duplicate<mode>" | |
619 | [(parallel | |
620 | [(set (match_operand:SVE_ALL 0 "register_operand") | |
621 | (vec_duplicate:SVE_ALL | |
622 | (match_operand:<VEL> 1 "aarch64_sve_dup_operand"))) | |
623 | (clobber (scratch:<VPRED>))])] | |
624 | "TARGET_SVE" | |
625 | { | |
626 | if (MEM_P (operands[1])) | |
627 | { | |
628 | rtx ptrue = force_reg (<VPRED>mode, CONSTM1_RTX (<VPRED>mode)); | |
629 | emit_insn (gen_sve_ld1r<mode> (operands[0], ptrue, operands[1], | |
630 | CONST0_RTX (<MODE>mode))); | |
631 | DONE; | |
632 | } | |
633 | } | |
634 | ) | |
635 | ||
636 | ;; Accept memory operands for the benefit of combine, and also in case | |
637 | ;; the scalar input gets spilled to memory during RA. We want to split | |
638 | ;; the load at the first opportunity in order to allow the PTRUE to be | |
639 | ;; optimized with surrounding code. | |
640 | (define_insn_and_split "*vec_duplicate<mode>_reg" | |
641 | [(set (match_operand:SVE_ALL 0 "register_operand" "=w, w, w") | |
642 | (vec_duplicate:SVE_ALL | |
643 | (match_operand:<VEL> 1 "aarch64_sve_dup_operand" "r, w, Uty"))) | |
644 | (clobber (match_scratch:<VPRED> 2 "=X, X, Upl"))] | |
645 | "TARGET_SVE" | |
646 | "@ | |
647 | mov\t%0.<Vetype>, %<vwcore>1 | |
648 | mov\t%0.<Vetype>, %<Vetype>1 | |
649 | #" | |
650 | "&& MEM_P (operands[1])" | |
651 | [(const_int 0)] | |
652 | { | |
653 | if (GET_CODE (operands[2]) == SCRATCH) | |
654 | operands[2] = gen_reg_rtx (<VPRED>mode); | |
655 | emit_move_insn (operands[2], CONSTM1_RTX (<VPRED>mode)); | |
656 | emit_insn (gen_sve_ld1r<mode> (operands[0], operands[2], operands[1], | |
657 | CONST0_RTX (<MODE>mode))); | |
658 | DONE; | |
659 | } | |
660 | [(set_attr "length" "4,4,8")] | |
661 | ) | |
662 | ||
663 | ;; This is used for vec_duplicate<mode>s from memory, but can also | |
664 | ;; be used by combine to optimize selects of a a vec_duplicate<mode> | |
665 | ;; with zero. | |
666 | (define_insn "sve_ld1r<mode>" | |
667 | [(set (match_operand:SVE_ALL 0 "register_operand" "=w") | |
668 | (unspec:SVE_ALL | |
669 | [(match_operand:<VPRED> 1 "register_operand" "Upl") | |
670 | (vec_duplicate:SVE_ALL | |
671 | (match_operand:<VEL> 2 "aarch64_sve_ld1r_operand" "Uty")) | |
672 | (match_operand:SVE_ALL 3 "aarch64_simd_imm_zero")] | |
673 | UNSPEC_SEL))] | |
674 | "TARGET_SVE" | |
675 | "ld1r<Vesize>\t%0.<Vetype>, %1/z, %2" | |
676 | ) | |
677 | ||
678 | ;; Load 128 bits from memory and duplicate to fill a vector. Since there | |
679 | ;; are so few operations on 128-bit "elements", we don't define a VNx1TI | |
680 | ;; and simply use vectors of bytes instead. | |
4a5920b6 | 681 | (define_insn "*sve_ld1rq<Vesize>" |
682 | [(set (match_operand:SVE_ALL 0 "register_operand" "=w") | |
683 | (unspec:SVE_ALL | |
684 | [(match_operand:<VPRED> 1 "register_operand" "Upl") | |
8fa7f434 | 685 | (match_operand:TI 2 "aarch64_sve_ld1r_operand" "Uty")] |
686 | UNSPEC_LD1RQ))] | |
687 | "TARGET_SVE" | |
4a5920b6 | 688 | "ld1rq<Vesize>\t%0.<Vetype>, %1/z, %2" |
8fa7f434 | 689 | ) |
690 | ||
691 | ;; Implement a predicate broadcast by shifting the low bit of the scalar | |
692 | ;; input into the top bit and using a WHILELO. An alternative would be to | |
693 | ;; duplicate the input and do a compare with zero. | |
694 | (define_expand "vec_duplicate<mode>" | |
695 | [(set (match_operand:PRED_ALL 0 "register_operand") | |
696 | (vec_duplicate:PRED_ALL (match_operand 1 "register_operand")))] | |
697 | "TARGET_SVE" | |
698 | { | |
699 | rtx tmp = gen_reg_rtx (DImode); | |
700 | rtx op1 = gen_lowpart (DImode, operands[1]); | |
701 | emit_insn (gen_ashldi3 (tmp, op1, gen_int_mode (63, DImode))); | |
702 | emit_insn (gen_while_ultdi<mode> (operands[0], const0_rtx, tmp)); | |
703 | DONE; | |
704 | } | |
705 | ) | |
706 | ||
707 | (define_insn "vec_series<mode>" | |
708 | [(set (match_operand:SVE_I 0 "register_operand" "=w, w, w") | |
709 | (vec_series:SVE_I | |
710 | (match_operand:<VEL> 1 "aarch64_sve_index_operand" "Usi, r, r") | |
711 | (match_operand:<VEL> 2 "aarch64_sve_index_operand" "r, Usi, r")))] | |
712 | "TARGET_SVE" | |
713 | "@ | |
714 | index\t%0.<Vetype>, #%1, %<vw>2 | |
715 | index\t%0.<Vetype>, %<vw>1, #%2 | |
716 | index\t%0.<Vetype>, %<vw>1, %<vw>2" | |
717 | ) | |
718 | ||
719 | ;; Optimize {x, x, x, x, ...} + {0, n, 2*n, 3*n, ...} if n is in range | |
720 | ;; of an INDEX instruction. | |
721 | (define_insn "*vec_series<mode>_plus" | |
722 | [(set (match_operand:SVE_I 0 "register_operand" "=w") | |
723 | (plus:SVE_I | |
724 | (vec_duplicate:SVE_I | |
725 | (match_operand:<VEL> 1 "register_operand" "r")) | |
726 | (match_operand:SVE_I 2 "immediate_operand")))] | |
727 | "TARGET_SVE && aarch64_check_zero_based_sve_index_immediate (operands[2])" | |
728 | { | |
729 | operands[2] = aarch64_check_zero_based_sve_index_immediate (operands[2]); | |
730 | return "index\t%0.<Vetype>, %<vw>1, #%2"; | |
731 | } | |
732 | ) | |
733 | ||
0ac5a51b | 734 | ;; Unpredicated LD[234]. |
735 | (define_expand "vec_load_lanes<mode><vsingle>" | |
736 | [(set (match_operand:SVE_STRUCT 0 "register_operand") | |
737 | (unspec:SVE_STRUCT | |
738 | [(match_dup 2) | |
739 | (match_operand:SVE_STRUCT 1 "memory_operand")] | |
740 | UNSPEC_LDN))] | |
741 | "TARGET_SVE" | |
742 | { | |
743 | operands[2] = force_reg (<VPRED>mode, CONSTM1_RTX (<VPRED>mode)); | |
744 | } | |
745 | ) | |
746 | ||
747 | ;; Predicated LD[234]. | |
748 | (define_insn "vec_mask_load_lanes<mode><vsingle>" | |
749 | [(set (match_operand:SVE_STRUCT 0 "register_operand" "=w") | |
750 | (unspec:SVE_STRUCT | |
751 | [(match_operand:<VPRED> 2 "register_operand" "Upl") | |
752 | (match_operand:SVE_STRUCT 1 "memory_operand" "m")] | |
753 | UNSPEC_LDN))] | |
754 | "TARGET_SVE" | |
755 | "ld<vector_count><Vesize>\t%0, %2/z, %1" | |
756 | ) | |
757 | ||
758 | ;; Unpredicated ST[234]. This is always a full update, so the dependence | |
759 | ;; on the old value of the memory location (via (match_dup 0)) is redundant. | |
760 | ;; There doesn't seem to be any obvious benefit to treating the all-true | |
761 | ;; case differently though. In particular, it's very unlikely that we'll | |
762 | ;; only find out during RTL that a store_lanes is dead. | |
763 | (define_expand "vec_store_lanes<mode><vsingle>" | |
764 | [(set (match_operand:SVE_STRUCT 0 "memory_operand") | |
765 | (unspec:SVE_STRUCT | |
766 | [(match_dup 2) | |
767 | (match_operand:SVE_STRUCT 1 "register_operand") | |
768 | (match_dup 0)] | |
769 | UNSPEC_STN))] | |
770 | "TARGET_SVE" | |
771 | { | |
772 | operands[2] = force_reg (<VPRED>mode, CONSTM1_RTX (<VPRED>mode)); | |
773 | } | |
774 | ) | |
775 | ||
776 | ;; Predicated ST[234]. | |
777 | (define_insn "vec_mask_store_lanes<mode><vsingle>" | |
778 | [(set (match_operand:SVE_STRUCT 0 "memory_operand" "+m") | |
779 | (unspec:SVE_STRUCT | |
780 | [(match_operand:<VPRED> 2 "register_operand" "Upl") | |
781 | (match_operand:SVE_STRUCT 1 "register_operand" "w") | |
782 | (match_dup 0)] | |
783 | UNSPEC_STN))] | |
784 | "TARGET_SVE" | |
785 | "st<vector_count><Vesize>\t%1, %2, %0" | |
786 | ) | |
787 | ||
8fa7f434 | 788 | (define_expand "vec_perm<mode>" |
789 | [(match_operand:SVE_ALL 0 "register_operand") | |
790 | (match_operand:SVE_ALL 1 "register_operand") | |
791 | (match_operand:SVE_ALL 2 "register_operand") | |
792 | (match_operand:<V_INT_EQUIV> 3 "aarch64_sve_vec_perm_operand")] | |
793 | "TARGET_SVE && GET_MODE_NUNITS (<MODE>mode).is_constant ()" | |
794 | { | |
795 | aarch64_expand_sve_vec_perm (operands[0], operands[1], | |
796 | operands[2], operands[3]); | |
797 | DONE; | |
798 | } | |
799 | ) | |
800 | ||
801 | (define_insn "*aarch64_sve_tbl<mode>" | |
802 | [(set (match_operand:SVE_ALL 0 "register_operand" "=w") | |
803 | (unspec:SVE_ALL | |
804 | [(match_operand:SVE_ALL 1 "register_operand" "w") | |
805 | (match_operand:<V_INT_EQUIV> 2 "register_operand" "w")] | |
806 | UNSPEC_TBL))] | |
807 | "TARGET_SVE" | |
808 | "tbl\t%0.<Vetype>, %1.<Vetype>, %2.<Vetype>" | |
809 | ) | |
810 | ||
811 | (define_insn "*aarch64_sve_<perm_insn><perm_hilo><mode>" | |
812 | [(set (match_operand:PRED_ALL 0 "register_operand" "=Upa") | |
813 | (unspec:PRED_ALL [(match_operand:PRED_ALL 1 "register_operand" "Upa") | |
814 | (match_operand:PRED_ALL 2 "register_operand" "Upa")] | |
815 | PERMUTE))] | |
816 | "TARGET_SVE" | |
817 | "<perm_insn><perm_hilo>\t%0.<Vetype>, %1.<Vetype>, %2.<Vetype>" | |
818 | ) | |
819 | ||
88e81b08 | 820 | (define_insn "aarch64_sve_<perm_insn><perm_hilo><mode>" |
8fa7f434 | 821 | [(set (match_operand:SVE_ALL 0 "register_operand" "=w") |
822 | (unspec:SVE_ALL [(match_operand:SVE_ALL 1 "register_operand" "w") | |
823 | (match_operand:SVE_ALL 2 "register_operand" "w")] | |
824 | PERMUTE))] | |
825 | "TARGET_SVE" | |
826 | "<perm_insn><perm_hilo>\t%0.<Vetype>, %1.<Vetype>, %2.<Vetype>" | |
827 | ) | |
828 | ||
829 | (define_insn "*aarch64_sve_rev64<mode>" | |
830 | [(set (match_operand:SVE_BHS 0 "register_operand" "=w") | |
831 | (unspec:SVE_BHS | |
832 | [(match_operand:VNx2BI 1 "register_operand" "Upl") | |
833 | (unspec:SVE_BHS [(match_operand:SVE_BHS 2 "register_operand" "w")] | |
834 | UNSPEC_REV64)] | |
835 | UNSPEC_MERGE_PTRUE))] | |
836 | "TARGET_SVE" | |
837 | "rev<Vesize>\t%0.d, %1/m, %2.d" | |
838 | ) | |
839 | ||
840 | (define_insn "*aarch64_sve_rev32<mode>" | |
841 | [(set (match_operand:SVE_BH 0 "register_operand" "=w") | |
842 | (unspec:SVE_BH | |
843 | [(match_operand:VNx4BI 1 "register_operand" "Upl") | |
844 | (unspec:SVE_BH [(match_operand:SVE_BH 2 "register_operand" "w")] | |
845 | UNSPEC_REV32)] | |
846 | UNSPEC_MERGE_PTRUE))] | |
847 | "TARGET_SVE" | |
848 | "rev<Vesize>\t%0.s, %1/m, %2.s" | |
849 | ) | |
850 | ||
851 | (define_insn "*aarch64_sve_rev16vnx16qi" | |
852 | [(set (match_operand:VNx16QI 0 "register_operand" "=w") | |
853 | (unspec:VNx16QI | |
854 | [(match_operand:VNx8BI 1 "register_operand" "Upl") | |
855 | (unspec:VNx16QI [(match_operand:VNx16QI 2 "register_operand" "w")] | |
856 | UNSPEC_REV16)] | |
857 | UNSPEC_MERGE_PTRUE))] | |
858 | "TARGET_SVE" | |
859 | "revb\t%0.h, %1/m, %2.h" | |
860 | ) | |
861 | ||
862 | (define_insn "*aarch64_sve_rev<mode>" | |
863 | [(set (match_operand:SVE_ALL 0 "register_operand" "=w") | |
864 | (unspec:SVE_ALL [(match_operand:SVE_ALL 1 "register_operand" "w")] | |
865 | UNSPEC_REV))] | |
866 | "TARGET_SVE" | |
867 | "rev\t%0.<Vetype>, %1.<Vetype>") | |
868 | ||
869 | (define_insn "*aarch64_sve_dup_lane<mode>" | |
870 | [(set (match_operand:SVE_ALL 0 "register_operand" "=w") | |
871 | (vec_duplicate:SVE_ALL | |
872 | (vec_select:<VEL> | |
873 | (match_operand:SVE_ALL 1 "register_operand" "w") | |
874 | (parallel [(match_operand:SI 2 "const_int_operand")]))))] | |
875 | "TARGET_SVE | |
876 | && IN_RANGE (INTVAL (operands[2]) * GET_MODE_SIZE (<VEL>mode), 0, 63)" | |
877 | "dup\t%0.<Vetype>, %1.<Vetype>[%2]" | |
878 | ) | |
879 | ||
880 | ;; Note that the immediate (third) operand is the lane index not | |
881 | ;; the byte index. | |
882 | (define_insn "*aarch64_sve_ext<mode>" | |
883 | [(set (match_operand:SVE_ALL 0 "register_operand" "=w") | |
884 | (unspec:SVE_ALL [(match_operand:SVE_ALL 1 "register_operand" "0") | |
885 | (match_operand:SVE_ALL 2 "register_operand" "w") | |
886 | (match_operand:SI 3 "const_int_operand")] | |
887 | UNSPEC_EXT))] | |
888 | "TARGET_SVE | |
889 | && IN_RANGE (INTVAL (operands[3]) * GET_MODE_SIZE (<VEL>mode), 0, 255)" | |
890 | { | |
891 | operands[3] = GEN_INT (INTVAL (operands[3]) * GET_MODE_SIZE (<VEL>mode)); | |
892 | return "ext\\t%0.b, %0.b, %2.b, #%3"; | |
893 | } | |
894 | ) | |
895 | ||
896 | (define_insn "add<mode>3" | |
897 | [(set (match_operand:SVE_I 0 "register_operand" "=w, w, w, w") | |
898 | (plus:SVE_I | |
899 | (match_operand:SVE_I 1 "register_operand" "%0, 0, 0, w") | |
900 | (match_operand:SVE_I 2 "aarch64_sve_add_operand" "vsa, vsn, vsi, w")))] | |
901 | "TARGET_SVE" | |
902 | "@ | |
903 | add\t%0.<Vetype>, %0.<Vetype>, #%D2 | |
904 | sub\t%0.<Vetype>, %0.<Vetype>, #%N2 | |
905 | * return aarch64_output_sve_inc_dec_immediate (\"%0.<Vetype>\", operands[2]); | |
906 | add\t%0.<Vetype>, %1.<Vetype>, %2.<Vetype>" | |
907 | ) | |
908 | ||
909 | (define_insn "sub<mode>3" | |
910 | [(set (match_operand:SVE_I 0 "register_operand" "=w, w") | |
911 | (minus:SVE_I | |
912 | (match_operand:SVE_I 1 "aarch64_sve_arith_operand" "w, vsa") | |
913 | (match_operand:SVE_I 2 "register_operand" "w, 0")))] | |
914 | "TARGET_SVE" | |
915 | "@ | |
916 | sub\t%0.<Vetype>, %1.<Vetype>, %2.<Vetype> | |
917 | subr\t%0.<Vetype>, %0.<Vetype>, #%D1" | |
918 | ) | |
919 | ||
920 | ;; Unpredicated multiplication. | |
921 | (define_expand "mul<mode>3" | |
922 | [(set (match_operand:SVE_I 0 "register_operand") | |
923 | (unspec:SVE_I | |
924 | [(match_dup 3) | |
925 | (mult:SVE_I | |
926 | (match_operand:SVE_I 1 "register_operand") | |
927 | (match_operand:SVE_I 2 "aarch64_sve_mul_operand"))] | |
928 | UNSPEC_MERGE_PTRUE))] | |
929 | "TARGET_SVE" | |
930 | { | |
931 | operands[3] = force_reg (<VPRED>mode, CONSTM1_RTX (<VPRED>mode)); | |
932 | } | |
933 | ) | |
934 | ||
935 | ;; Multiplication predicated with a PTRUE. We don't actually need the | |
936 | ;; predicate for the first alternative, but using Upa or X isn't likely | |
937 | ;; to gain much and would make the instruction seem less uniform to the | |
938 | ;; register allocator. | |
0fd5a0b4 | 939 | (define_insn_and_split "*mul<mode>3" |
83d1ca63 | 940 | [(set (match_operand:SVE_I 0 "register_operand" "=w, w, ?&w") |
8fa7f434 | 941 | (unspec:SVE_I |
83d1ca63 | 942 | [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl") |
8fa7f434 | 943 | (mult:SVE_I |
83d1ca63 | 944 | (match_operand:SVE_I 2 "register_operand" "%0, 0, w") |
945 | (match_operand:SVE_I 3 "aarch64_sve_mul_operand" "vsm, w, w"))] | |
8fa7f434 | 946 | UNSPEC_MERGE_PTRUE))] |
947 | "TARGET_SVE" | |
948 | "@ | |
0fd5a0b4 | 949 | # |
83d1ca63 | 950 | mul\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype> |
951 | movprfx\t%0, %2\;mul\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>" | |
0fd5a0b4 | 952 | ; Split the unpredicated form after reload, so that we don't have |
953 | ; the unnecessary PTRUE. | |
954 | "&& reload_completed | |
955 | && !register_operand (operands[3], <MODE>mode)" | |
956 | [(set (match_dup 0) (mult:SVE_I (match_dup 2) (match_dup 3)))] | |
957 | "" | |
83d1ca63 | 958 | [(set_attr "movprfx" "*,*,yes")] |
8fa7f434 | 959 | ) |
960 | ||
0fd5a0b4 | 961 | ;; Unpredicated multiplications by a constant (post-RA only). |
962 | ;; These are generated by splitting a predicated instruction whose | |
963 | ;; predicate is unused. | |
964 | (define_insn "*post_ra_mul<mode>3" | |
965 | [(set (match_operand:SVE_I 0 "register_operand" "=w") | |
966 | (mult:SVE_I | |
967 | (match_operand:SVE_I 1 "register_operand" "0") | |
968 | (match_operand:SVE_I 2 "aarch64_sve_mul_immediate")))] | |
969 | "TARGET_SVE && reload_completed" | |
970 | "mul\t%0.<Vetype>, %0.<Vetype>, #%2" | |
971 | ) | |
972 | ||
8fa7f434 | 973 | (define_insn "*madd<mode>" |
83d1ca63 | 974 | [(set (match_operand:SVE_I 0 "register_operand" "=w, w, ?&w") |
8fa7f434 | 975 | (plus:SVE_I |
976 | (unspec:SVE_I | |
83d1ca63 | 977 | [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl") |
978 | (mult:SVE_I (match_operand:SVE_I 2 "register_operand" "%0, w, w") | |
979 | (match_operand:SVE_I 3 "register_operand" "w, w, w"))] | |
8fa7f434 | 980 | UNSPEC_MERGE_PTRUE) |
83d1ca63 | 981 | (match_operand:SVE_I 4 "register_operand" "w, 0, w")))] |
8fa7f434 | 982 | "TARGET_SVE" |
983 | "@ | |
984 | mad\t%0.<Vetype>, %1/m, %3.<Vetype>, %4.<Vetype> | |
83d1ca63 | 985 | mla\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype> |
986 | movprfx\t%0, %4\;mla\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>" | |
987 | [(set_attr "movprfx" "*,*,yes")] | |
8fa7f434 | 988 | ) |
989 | ||
990 | (define_insn "*msub<mode>3" | |
83d1ca63 | 991 | [(set (match_operand:SVE_I 0 "register_operand" "=w, w, ?&w") |
8fa7f434 | 992 | (minus:SVE_I |
83d1ca63 | 993 | (match_operand:SVE_I 4 "register_operand" "w, 0, w") |
8fa7f434 | 994 | (unspec:SVE_I |
83d1ca63 | 995 | [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl") |
996 | (mult:SVE_I (match_operand:SVE_I 2 "register_operand" "%0, w, w") | |
997 | (match_operand:SVE_I 3 "register_operand" "w, w, w"))] | |
8fa7f434 | 998 | UNSPEC_MERGE_PTRUE)))] |
999 | "TARGET_SVE" | |
1000 | "@ | |
1001 | msb\t%0.<Vetype>, %1/m, %3.<Vetype>, %4.<Vetype> | |
83d1ca63 | 1002 | mls\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype> |
1003 | movprfx\t%0, %4\;mls\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>" | |
1004 | [(set_attr "movprfx" "*,*,yes")] | |
8fa7f434 | 1005 | ) |
1006 | ||
dea784df | 1007 | ;; Unpredicated highpart multiplication. |
1008 | (define_expand "<su>mul<mode>3_highpart" | |
1009 | [(set (match_operand:SVE_I 0 "register_operand") | |
1010 | (unspec:SVE_I | |
1011 | [(match_dup 3) | |
1012 | (unspec:SVE_I [(match_operand:SVE_I 1 "register_operand") | |
1013 | (match_operand:SVE_I 2 "register_operand")] | |
1014 | MUL_HIGHPART)] | |
1015 | UNSPEC_MERGE_PTRUE))] | |
1016 | "TARGET_SVE" | |
1017 | { | |
1018 | operands[3] = force_reg (<VPRED>mode, CONSTM1_RTX (<VPRED>mode)); | |
1019 | } | |
1020 | ) | |
1021 | ||
1022 | ;; Predicated highpart multiplication. | |
1023 | (define_insn "*<su>mul<mode>3_highpart" | |
83d1ca63 | 1024 | [(set (match_operand:SVE_I 0 "register_operand" "=w, ?&w") |
dea784df | 1025 | (unspec:SVE_I |
83d1ca63 | 1026 | [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl") |
1027 | (unspec:SVE_I [(match_operand:SVE_I 2 "register_operand" "%0, w") | |
1028 | (match_operand:SVE_I 3 "register_operand" "w, w")] | |
dea784df | 1029 | MUL_HIGHPART)] |
1030 | UNSPEC_MERGE_PTRUE))] | |
1031 | "TARGET_SVE" | |
83d1ca63 | 1032 | "@ |
1033 | <su>mulh\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype> | |
1034 | movprfx\t%0, %2\;<su>mulh\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>" | |
1035 | [(set_attr "movprfx" "*,yes")] | |
dea784df | 1036 | ) |
1037 | ||
0bbf725c | 1038 | ;; Unpredicated division. |
1039 | (define_expand "<optab><mode>3" | |
1040 | [(set (match_operand:SVE_SDI 0 "register_operand") | |
1041 | (unspec:SVE_SDI | |
1042 | [(match_dup 3) | |
1043 | (SVE_INT_BINARY_SD:SVE_SDI | |
1044 | (match_operand:SVE_SDI 1 "register_operand") | |
1045 | (match_operand:SVE_SDI 2 "register_operand"))] | |
1046 | UNSPEC_MERGE_PTRUE))] | |
1047 | "TARGET_SVE" | |
1048 | { | |
1049 | operands[3] = force_reg (<VPRED>mode, CONSTM1_RTX (<VPRED>mode)); | |
1050 | } | |
1051 | ) | |
1052 | ||
1053 | ;; Division predicated with a PTRUE. | |
1054 | (define_insn "*<optab><mode>3" | |
83d1ca63 | 1055 | [(set (match_operand:SVE_SDI 0 "register_operand" "=w, w, ?&w") |
0bbf725c | 1056 | (unspec:SVE_SDI |
83d1ca63 | 1057 | [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl") |
0bbf725c | 1058 | (SVE_INT_BINARY_SD:SVE_SDI |
83d1ca63 | 1059 | (match_operand:SVE_SDI 2 "register_operand" "0, w, w") |
1060 | (match_operand:SVE_SDI 3 "aarch64_sve_mul_operand" "w, 0, w"))] | |
0bbf725c | 1061 | UNSPEC_MERGE_PTRUE))] |
1062 | "TARGET_SVE" | |
1063 | "@ | |
1064 | <sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype> | |
83d1ca63 | 1065 | <sve_int_op>r\t%0.<Vetype>, %1/m, %0.<Vetype>, %2.<Vetype> |
1066 | movprfx\t%0, %2\;<sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>" | |
1067 | [(set_attr "movprfx" "*,*,yes")] | |
0bbf725c | 1068 | ) |
1069 | ||
8fa7f434 | 1070 | ;; Unpredicated NEG, NOT and POPCOUNT. |
1071 | (define_expand "<optab><mode>2" | |
1072 | [(set (match_operand:SVE_I 0 "register_operand") | |
1073 | (unspec:SVE_I | |
1074 | [(match_dup 2) | |
1075 | (SVE_INT_UNARY:SVE_I (match_operand:SVE_I 1 "register_operand"))] | |
1076 | UNSPEC_MERGE_PTRUE))] | |
1077 | "TARGET_SVE" | |
1078 | { | |
1079 | operands[2] = force_reg (<VPRED>mode, CONSTM1_RTX (<VPRED>mode)); | |
1080 | } | |
1081 | ) | |
1082 | ||
1083 | ;; NEG, NOT and POPCOUNT predicated with a PTRUE. | |
1084 | (define_insn "*<optab><mode>2" | |
1085 | [(set (match_operand:SVE_I 0 "register_operand" "=w") | |
1086 | (unspec:SVE_I | |
1087 | [(match_operand:<VPRED> 1 "register_operand" "Upl") | |
1088 | (SVE_INT_UNARY:SVE_I | |
1089 | (match_operand:SVE_I 2 "register_operand" "w"))] | |
1090 | UNSPEC_MERGE_PTRUE))] | |
1091 | "TARGET_SVE" | |
1092 | "<sve_int_op>\t%0.<Vetype>, %1/m, %2.<Vetype>" | |
1093 | ) | |
1094 | ||
1095 | ;; Vector AND, ORR and XOR. | |
1096 | (define_insn "<optab><mode>3" | |
1097 | [(set (match_operand:SVE_I 0 "register_operand" "=w, w") | |
1098 | (LOGICAL:SVE_I | |
1099 | (match_operand:SVE_I 1 "register_operand" "%0, w") | |
1100 | (match_operand:SVE_I 2 "aarch64_sve_logical_operand" "vsl, w")))] | |
1101 | "TARGET_SVE" | |
1102 | "@ | |
1103 | <logical>\t%0.<Vetype>, %0.<Vetype>, #%C2 | |
1104 | <logical>\t%0.d, %1.d, %2.d" | |
1105 | ) | |
1106 | ||
1107 | ;; Vector AND, ORR and XOR on floating-point modes. We avoid subregs | |
1108 | ;; by providing this, but we need to use UNSPECs since rtx logical ops | |
1109 | ;; aren't defined for floating-point modes. | |
1110 | (define_insn "*<optab><mode>3" | |
1111 | [(set (match_operand:SVE_F 0 "register_operand" "=w") | |
1112 | (unspec:SVE_F [(match_operand:SVE_F 1 "register_operand" "w") | |
1113 | (match_operand:SVE_F 2 "register_operand" "w")] | |
1114 | LOGICALF))] | |
1115 | "TARGET_SVE" | |
1116 | "<logicalf_op>\t%0.d, %1.d, %2.d" | |
1117 | ) | |
1118 | ||
1119 | ;; REG_EQUAL notes on "not<mode>3" should ensure that we can generate | |
1120 | ;; this pattern even though the NOT instruction itself is predicated. | |
1121 | (define_insn "bic<mode>3" | |
1122 | [(set (match_operand:SVE_I 0 "register_operand" "=w") | |
1123 | (and:SVE_I | |
1124 | (not:SVE_I (match_operand:SVE_I 1 "register_operand" "w")) | |
1125 | (match_operand:SVE_I 2 "register_operand" "w")))] | |
1126 | "TARGET_SVE" | |
1127 | "bic\t%0.d, %2.d, %1.d" | |
1128 | ) | |
1129 | ||
1130 | ;; Predicate AND. We can reuse one of the inputs as the GP. | |
1131 | (define_insn "and<mode>3" | |
1132 | [(set (match_operand:PRED_ALL 0 "register_operand" "=Upa") | |
1133 | (and:PRED_ALL (match_operand:PRED_ALL 1 "register_operand" "Upa") | |
1134 | (match_operand:PRED_ALL 2 "register_operand" "Upa")))] | |
1135 | "TARGET_SVE" | |
1136 | "and\t%0.b, %1/z, %1.b, %2.b" | |
1137 | ) | |
1138 | ||
1139 | ;; Unpredicated predicate ORR and XOR. | |
1140 | (define_expand "<optab><mode>3" | |
1141 | [(set (match_operand:PRED_ALL 0 "register_operand") | |
1142 | (and:PRED_ALL | |
1143 | (LOGICAL_OR:PRED_ALL | |
1144 | (match_operand:PRED_ALL 1 "register_operand") | |
1145 | (match_operand:PRED_ALL 2 "register_operand")) | |
1146 | (match_dup 3)))] | |
1147 | "TARGET_SVE" | |
1148 | { | |
1149 | operands[3] = force_reg (<MODE>mode, CONSTM1_RTX (<MODE>mode)); | |
1150 | } | |
1151 | ) | |
1152 | ||
1153 | ;; Predicated predicate ORR and XOR. | |
1154 | (define_insn "pred_<optab><mode>3" | |
1155 | [(set (match_operand:PRED_ALL 0 "register_operand" "=Upa") | |
1156 | (and:PRED_ALL | |
1157 | (LOGICAL:PRED_ALL | |
1158 | (match_operand:PRED_ALL 2 "register_operand" "Upa") | |
1159 | (match_operand:PRED_ALL 3 "register_operand" "Upa")) | |
1160 | (match_operand:PRED_ALL 1 "register_operand" "Upa")))] | |
1161 | "TARGET_SVE" | |
1162 | "<logical>\t%0.b, %1/z, %2.b, %3.b" | |
1163 | ) | |
1164 | ||
1165 | ;; Perform a logical operation on operands 2 and 3, using operand 1 as | |
1166 | ;; the GP (which is known to be a PTRUE). Store the result in operand 0 | |
1167 | ;; and set the flags in the same way as for PTEST. The (and ...) in the | |
1168 | ;; UNSPEC_PTEST_PTRUE is logically redundant, but means that the tested | |
1169 | ;; value is structurally equivalent to rhs of the second set. | |
1170 | (define_insn "*<optab><mode>3_cc" | |
1171 | [(set (reg:CC CC_REGNUM) | |
1172 | (compare:CC | |
1173 | (unspec:SI [(match_operand:PRED_ALL 1 "register_operand" "Upa") | |
1174 | (and:PRED_ALL | |
1175 | (LOGICAL:PRED_ALL | |
1176 | (match_operand:PRED_ALL 2 "register_operand" "Upa") | |
1177 | (match_operand:PRED_ALL 3 "register_operand" "Upa")) | |
1178 | (match_dup 1))] | |
1179 | UNSPEC_PTEST_PTRUE) | |
1180 | (const_int 0))) | |
1181 | (set (match_operand:PRED_ALL 0 "register_operand" "=Upa") | |
1182 | (and:PRED_ALL (LOGICAL:PRED_ALL (match_dup 2) (match_dup 3)) | |
1183 | (match_dup 1)))] | |
1184 | "TARGET_SVE" | |
1185 | "<logical>s\t%0.b, %1/z, %2.b, %3.b" | |
1186 | ) | |
1187 | ||
1188 | ;; Unpredicated predicate inverse. | |
1189 | (define_expand "one_cmpl<mode>2" | |
1190 | [(set (match_operand:PRED_ALL 0 "register_operand") | |
1191 | (and:PRED_ALL | |
1192 | (not:PRED_ALL (match_operand:PRED_ALL 1 "register_operand")) | |
1193 | (match_dup 2)))] | |
1194 | "TARGET_SVE" | |
1195 | { | |
1196 | operands[2] = force_reg (<MODE>mode, CONSTM1_RTX (<MODE>mode)); | |
1197 | } | |
1198 | ) | |
1199 | ||
1200 | ;; Predicated predicate inverse. | |
1201 | (define_insn "*one_cmpl<mode>3" | |
1202 | [(set (match_operand:PRED_ALL 0 "register_operand" "=Upa") | |
1203 | (and:PRED_ALL | |
1204 | (not:PRED_ALL (match_operand:PRED_ALL 2 "register_operand" "Upa")) | |
1205 | (match_operand:PRED_ALL 1 "register_operand" "Upa")))] | |
1206 | "TARGET_SVE" | |
1207 | "not\t%0.b, %1/z, %2.b" | |
1208 | ) | |
1209 | ||
1210 | ;; Predicated predicate BIC and ORN. | |
1211 | (define_insn "*<nlogical><mode>3" | |
1212 | [(set (match_operand:PRED_ALL 0 "register_operand" "=Upa") | |
1213 | (and:PRED_ALL | |
1214 | (NLOGICAL:PRED_ALL | |
1215 | (not:PRED_ALL (match_operand:PRED_ALL 2 "register_operand" "Upa")) | |
1216 | (match_operand:PRED_ALL 3 "register_operand" "Upa")) | |
1217 | (match_operand:PRED_ALL 1 "register_operand" "Upa")))] | |
1218 | "TARGET_SVE" | |
1219 | "<nlogical>\t%0.b, %1/z, %3.b, %2.b" | |
1220 | ) | |
1221 | ||
1222 | ;; Predicated predicate NAND and NOR. | |
1223 | (define_insn "*<logical_nn><mode>3" | |
1224 | [(set (match_operand:PRED_ALL 0 "register_operand" "=Upa") | |
1225 | (and:PRED_ALL | |
1226 | (NLOGICAL:PRED_ALL | |
1227 | (not:PRED_ALL (match_operand:PRED_ALL 2 "register_operand" "Upa")) | |
1228 | (not:PRED_ALL (match_operand:PRED_ALL 3 "register_operand" "Upa"))) | |
1229 | (match_operand:PRED_ALL 1 "register_operand" "Upa")))] | |
1230 | "TARGET_SVE" | |
1231 | "<logical_nn>\t%0.b, %1/z, %2.b, %3.b" | |
1232 | ) | |
1233 | ||
1234 | ;; Unpredicated LSL, LSR and ASR by a vector. | |
1235 | (define_expand "v<optab><mode>3" | |
1236 | [(set (match_operand:SVE_I 0 "register_operand") | |
1237 | (unspec:SVE_I | |
1238 | [(match_dup 3) | |
1239 | (ASHIFT:SVE_I | |
1240 | (match_operand:SVE_I 1 "register_operand") | |
1241 | (match_operand:SVE_I 2 "aarch64_sve_<lr>shift_operand"))] | |
1242 | UNSPEC_MERGE_PTRUE))] | |
1243 | "TARGET_SVE" | |
1244 | { | |
1245 | operands[3] = force_reg (<VPRED>mode, CONSTM1_RTX (<VPRED>mode)); | |
1246 | } | |
1247 | ) | |
1248 | ||
1249 | ;; LSL, LSR and ASR by a vector, predicated with a PTRUE. We don't | |
1250 | ;; actually need the predicate for the first alternative, but using Upa | |
1251 | ;; or X isn't likely to gain much and would make the instruction seem | |
1252 | ;; less uniform to the register allocator. | |
0fd5a0b4 | 1253 | (define_insn_and_split "*v<optab><mode>3" |
83d1ca63 | 1254 | [(set (match_operand:SVE_I 0 "register_operand" "=w, w, ?&w") |
8fa7f434 | 1255 | (unspec:SVE_I |
83d1ca63 | 1256 | [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl") |
8fa7f434 | 1257 | (ASHIFT:SVE_I |
83d1ca63 | 1258 | (match_operand:SVE_I 2 "register_operand" "w, 0, w") |
1259 | (match_operand:SVE_I 3 "aarch64_sve_<lr>shift_operand" "D<lr>, w, w"))] | |
8fa7f434 | 1260 | UNSPEC_MERGE_PTRUE))] |
1261 | "TARGET_SVE" | |
1262 | "@ | |
0fd5a0b4 | 1263 | # |
83d1ca63 | 1264 | <shift>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype> |
1265 | movprfx\t%0, %2\;<shift>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>" | |
0fd5a0b4 | 1266 | "&& reload_completed |
1267 | && !register_operand (operands[3], <MODE>mode)" | |
1268 | [(set (match_dup 0) (ASHIFT:SVE_I (match_dup 2) (match_dup 3)))] | |
1269 | "" | |
83d1ca63 | 1270 | [(set_attr "movprfx" "*,*,yes")] |
8fa7f434 | 1271 | ) |
1272 | ||
0fd5a0b4 | 1273 | ;; Unpredicated shift operations by a constant (post-RA only). |
1274 | ;; These are generated by splitting a predicated instruction whose | |
1275 | ;; predicate is unused. | |
1276 | (define_insn "*post_ra_v<optab><mode>3" | |
1277 | [(set (match_operand:SVE_I 0 "register_operand" "=w") | |
1278 | (ASHIFT:SVE_I | |
1279 | (match_operand:SVE_I 1 "register_operand" "w") | |
1280 | (match_operand:SVE_I 2 "aarch64_simd_<lr>shift_imm")))] | |
1281 | "TARGET_SVE && reload_completed" | |
1282 | "<shift>\t%0.<Vetype>, %1.<Vetype>, #%2" | |
1283 | ) | |
1284 | ||
8fa7f434 | 1285 | ;; LSL, LSR and ASR by a scalar, which expands into one of the vector |
1286 | ;; shifts above. | |
1287 | (define_expand "<ASHIFT:optab><mode>3" | |
1288 | [(set (match_operand:SVE_I 0 "register_operand") | |
1289 | (ASHIFT:SVE_I (match_operand:SVE_I 1 "register_operand") | |
1290 | (match_operand:<VEL> 2 "general_operand")))] | |
1291 | "TARGET_SVE" | |
1292 | { | |
1293 | rtx amount; | |
1294 | if (CONST_INT_P (operands[2])) | |
1295 | { | |
1296 | amount = gen_const_vec_duplicate (<MODE>mode, operands[2]); | |
1297 | if (!aarch64_sve_<lr>shift_operand (operands[2], <MODE>mode)) | |
1298 | amount = force_reg (<MODE>mode, amount); | |
1299 | } | |
1300 | else | |
1301 | { | |
1302 | amount = gen_reg_rtx (<MODE>mode); | |
1303 | emit_insn (gen_vec_duplicate<mode> (amount, | |
1304 | convert_to_mode (<VEL>mode, | |
1305 | operands[2], 0))); | |
1306 | } | |
1307 | emit_insn (gen_v<optab><mode>3 (operands[0], operands[1], amount)); | |
1308 | DONE; | |
1309 | } | |
1310 | ) | |
1311 | ||
1312 | ;; Test all bits of operand 1. Operand 0 is a GP that is known to hold PTRUE. | |
1313 | ;; | |
1314 | ;; Using UNSPEC_PTEST_PTRUE allows combine patterns to assume that the GP | |
1315 | ;; is a PTRUE even if the optimizers haven't yet been able to propagate | |
1316 | ;; the constant. We would use a separate unspec code for PTESTs involving | |
1317 | ;; GPs that might not be PTRUEs. | |
1318 | (define_insn "ptest_ptrue<mode>" | |
1319 | [(set (reg:CC CC_REGNUM) | |
1320 | (compare:CC | |
1321 | (unspec:SI [(match_operand:PRED_ALL 0 "register_operand" "Upa") | |
1322 | (match_operand:PRED_ALL 1 "register_operand" "Upa")] | |
1323 | UNSPEC_PTEST_PTRUE) | |
1324 | (const_int 0)))] | |
1325 | "TARGET_SVE" | |
1326 | "ptest\t%0, %1.b" | |
1327 | ) | |
1328 | ||
1329 | ;; Set element I of the result if operand1 + J < operand2 for all J in [0, I]. | |
1330 | ;; with the comparison being unsigned. | |
1331 | (define_insn "while_ult<GPI:mode><PRED_ALL:mode>" | |
1332 | [(set (match_operand:PRED_ALL 0 "register_operand" "=Upa") | |
1333 | (unspec:PRED_ALL [(match_operand:GPI 1 "aarch64_reg_or_zero" "rZ") | |
1334 | (match_operand:GPI 2 "aarch64_reg_or_zero" "rZ")] | |
1335 | UNSPEC_WHILE_LO)) | |
1336 | (clobber (reg:CC CC_REGNUM))] | |
1337 | "TARGET_SVE" | |
1338 | "whilelo\t%0.<PRED_ALL:Vetype>, %<w>1, %<w>2" | |
1339 | ) | |
1340 | ||
1341 | ;; WHILELO sets the flags in the same way as a PTEST with a PTRUE GP. | |
1342 | ;; Handle the case in which both results are useful. The GP operand | |
1343 | ;; to the PTEST isn't needed, so we allow it to be anything. | |
1344 | (define_insn_and_split "while_ult<GPI:mode><PRED_ALL:mode>_cc" | |
1345 | [(set (reg:CC CC_REGNUM) | |
1346 | (compare:CC | |
1347 | (unspec:SI [(match_operand:PRED_ALL 1) | |
1348 | (unspec:PRED_ALL | |
1349 | [(match_operand:GPI 2 "aarch64_reg_or_zero" "rZ") | |
1350 | (match_operand:GPI 3 "aarch64_reg_or_zero" "rZ")] | |
1351 | UNSPEC_WHILE_LO)] | |
1352 | UNSPEC_PTEST_PTRUE) | |
1353 | (const_int 0))) | |
1354 | (set (match_operand:PRED_ALL 0 "register_operand" "=Upa") | |
1355 | (unspec:PRED_ALL [(match_dup 2) | |
1356 | (match_dup 3)] | |
1357 | UNSPEC_WHILE_LO))] | |
1358 | "TARGET_SVE" | |
1359 | "whilelo\t%0.<PRED_ALL:Vetype>, %<w>2, %<w>3" | |
1360 | ;; Force the compiler to drop the unused predicate operand, so that we | |
1361 | ;; don't have an unnecessary PTRUE. | |
1362 | "&& !CONSTANT_P (operands[1])" | |
1363 | [(const_int 0)] | |
1364 | { | |
1365 | emit_insn (gen_while_ult<GPI:mode><PRED_ALL:mode>_cc | |
1366 | (operands[0], CONSTM1_RTX (<MODE>mode), | |
1367 | operands[2], operands[3])); | |
1368 | DONE; | |
1369 | } | |
1370 | ) | |
1371 | ||
b293e19f | 1372 | ;; Integer comparisons predicated with a PTRUE. |
1373 | (define_insn "*cmp<cmp_op><mode>" | |
8fa7f434 | 1374 | [(set (match_operand:<VPRED> 0 "register_operand" "=Upa, Upa") |
1375 | (unspec:<VPRED> | |
1376 | [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl") | |
b293e19f | 1377 | (SVE_INT_CMP:<VPRED> |
1378 | (match_operand:SVE_I 2 "register_operand" "w, w") | |
1379 | (match_operand:SVE_I 3 "aarch64_sve_cmp_<sve_imm_con>_operand" "<sve_imm_con>, w"))] | |
1380 | UNSPEC_MERGE_PTRUE)) | |
8fa7f434 | 1381 | (clobber (reg:CC CC_REGNUM))] |
1382 | "TARGET_SVE" | |
1383 | "@ | |
1384 | cmp<cmp_op>\t%0.<Vetype>, %1/z, %2.<Vetype>, #%3 | |
1385 | cmp<cmp_op>\t%0.<Vetype>, %1/z, %2.<Vetype>, %3.<Vetype>" | |
1386 | ) | |
1387 | ||
b293e19f | 1388 | ;; Integer comparisons predicated with a PTRUE in which only the flags result |
1389 | ;; is interesting. | |
1390 | (define_insn "*cmp<cmp_op><mode>_ptest" | |
8fa7f434 | 1391 | [(set (reg:CC CC_REGNUM) |
1392 | (compare:CC | |
1393 | (unspec:SI | |
1394 | [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl") | |
1395 | (unspec:<VPRED> | |
1396 | [(match_dup 1) | |
b293e19f | 1397 | (SVE_INT_CMP:<VPRED> |
1398 | (match_operand:SVE_I 2 "register_operand" "w, w") | |
1399 | (match_operand:SVE_I 3 "aarch64_sve_cmp_<sve_imm_con>_operand" "<sve_imm_con>, w"))] | |
1400 | UNSPEC_MERGE_PTRUE)] | |
8fa7f434 | 1401 | UNSPEC_PTEST_PTRUE) |
1402 | (const_int 0))) | |
1403 | (clobber (match_scratch:<VPRED> 0 "=Upa, Upa"))] | |
1404 | "TARGET_SVE" | |
1405 | "@ | |
1406 | cmp<cmp_op>\t%0.<Vetype>, %1/z, %2.<Vetype>, #%3 | |
1407 | cmp<cmp_op>\t%0.<Vetype>, %1/z, %2.<Vetype>, %3.<Vetype>" | |
1408 | ) | |
1409 | ||
b293e19f | 1410 | ;; Integer comparisons predicated with a PTRUE in which both the flag and |
1411 | ;; predicate results are interesting. | |
1412 | (define_insn "*cmp<cmp_op><mode>_cc" | |
8fa7f434 | 1413 | [(set (reg:CC CC_REGNUM) |
1414 | (compare:CC | |
1415 | (unspec:SI | |
1416 | [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl") | |
1417 | (unspec:<VPRED> | |
1418 | [(match_dup 1) | |
b293e19f | 1419 | (SVE_INT_CMP:<VPRED> |
1420 | (match_operand:SVE_I 2 "register_operand" "w, w") | |
1421 | (match_operand:SVE_I 3 "aarch64_sve_cmp_<sve_imm_con>_operand" "<sve_imm_con>, w"))] | |
1422 | UNSPEC_MERGE_PTRUE)] | |
8fa7f434 | 1423 | UNSPEC_PTEST_PTRUE) |
1424 | (const_int 0))) | |
1425 | (set (match_operand:<VPRED> 0 "register_operand" "=Upa, Upa") | |
1426 | (unspec:<VPRED> | |
1427 | [(match_dup 1) | |
b293e19f | 1428 | (SVE_INT_CMP:<VPRED> |
1429 | (match_dup 2) | |
1430 | (match_dup 3))] | |
1431 | UNSPEC_MERGE_PTRUE))] | |
8fa7f434 | 1432 | "TARGET_SVE" |
1433 | "@ | |
1434 | cmp<cmp_op>\t%0.<Vetype>, %1/z, %2.<Vetype>, #%3 | |
1435 | cmp<cmp_op>\t%0.<Vetype>, %1/z, %2.<Vetype>, %3.<Vetype>" | |
1436 | ) | |
1437 | ||
30dd727b | 1438 | ;; Predicated integer comparisons, formed by combining a PTRUE-predicated |
1439 | ;; comparison with an AND. Split the instruction into its preferred form | |
1440 | ;; (below) at the earliest opportunity, in order to get rid of the | |
1441 | ;; redundant operand 1. | |
1442 | (define_insn_and_split "*pred_cmp<cmp_op><mode>_combine" | |
1443 | [(set (match_operand:<VPRED> 0 "register_operand" "=Upa, Upa") | |
1444 | (and:<VPRED> | |
1445 | (unspec:<VPRED> | |
1446 | [(match_operand:<VPRED> 1) | |
1447 | (SVE_INT_CMP:<VPRED> | |
1448 | (match_operand:SVE_I 2 "register_operand" "w, w") | |
1449 | (match_operand:SVE_I 3 "aarch64_sve_cmp_<sve_imm_con>_operand" "<sve_imm_con>, w"))] | |
1450 | UNSPEC_MERGE_PTRUE) | |
1451 | (match_operand:<VPRED> 4 "register_operand" "Upl, Upl"))) | |
1452 | (clobber (reg:CC CC_REGNUM))] | |
1453 | "TARGET_SVE" | |
1454 | "#" | |
1455 | "&& 1" | |
1456 | [(parallel | |
1457 | [(set (match_dup 0) | |
1458 | (and:<VPRED> | |
1459 | (SVE_INT_CMP:<VPRED> | |
1460 | (match_dup 2) | |
1461 | (match_dup 3)) | |
1462 | (match_dup 4))) | |
1463 | (clobber (reg:CC CC_REGNUM))])] | |
1464 | ) | |
1465 | ||
1466 | ;; Predicated integer comparisons. | |
1467 | (define_insn "*pred_cmp<cmp_op><mode>" | |
1468 | [(set (match_operand:<VPRED> 0 "register_operand" "=Upa, Upa") | |
1469 | (and:<VPRED> | |
1470 | (SVE_INT_CMP:<VPRED> | |
1471 | (match_operand:SVE_I 2 "register_operand" "w, w") | |
1472 | (match_operand:SVE_I 3 "aarch64_sve_cmp_<sve_imm_con>_operand" "<sve_imm_con>, w")) | |
1473 | (match_operand:<VPRED> 1 "register_operand" "Upl, Upl"))) | |
1474 | (clobber (reg:CC CC_REGNUM))] | |
1475 | "TARGET_SVE" | |
1476 | "@ | |
1477 | cmp<cmp_op>\t%0.<Vetype>, %1/z, %2.<Vetype>, #%3 | |
1478 | cmp<cmp_op>\t%0.<Vetype>, %1/z, %2.<Vetype>, %3.<Vetype>" | |
1479 | ) | |
1480 | ||
b293e19f | 1481 | ;; Floating-point comparisons predicated with a PTRUE. |
1482 | (define_insn "*fcm<cmp_op><mode>" | |
8fa7f434 | 1483 | [(set (match_operand:<VPRED> 0 "register_operand" "=Upa, Upa") |
1484 | (unspec:<VPRED> | |
1485 | [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl") | |
b293e19f | 1486 | (SVE_FP_CMP:<VPRED> |
1487 | (match_operand:SVE_F 2 "register_operand" "w, w") | |
1488 | (match_operand:SVE_F 3 "aarch64_simd_reg_or_zero" "Dz, w"))] | |
1489 | UNSPEC_MERGE_PTRUE))] | |
8fa7f434 | 1490 | "TARGET_SVE" |
1491 | "@ | |
1492 | fcm<cmp_op>\t%0.<Vetype>, %1/z, %2.<Vetype>, #0.0 | |
1493 | fcm<cmp_op>\t%0.<Vetype>, %1/z, %2.<Vetype>, %3.<Vetype>" | |
1494 | ) | |
1495 | ||
b293e19f | 1496 | (define_insn "*fcmuo<mode>" |
8fa7f434 | 1497 | [(set (match_operand:<VPRED> 0 "register_operand" "=Upa") |
1498 | (unspec:<VPRED> | |
1499 | [(match_operand:<VPRED> 1 "register_operand" "Upl") | |
b293e19f | 1500 | (unordered:<VPRED> |
1501 | (match_operand:SVE_F 2 "register_operand" "w") | |
1502 | (match_operand:SVE_F 3 "register_operand" "w"))] | |
1503 | UNSPEC_MERGE_PTRUE))] | |
8fa7f434 | 1504 | "TARGET_SVE" |
1505 | "fcmuo\t%0.<Vetype>, %1/z, %2.<Vetype>, %3.<Vetype>" | |
1506 | ) | |
1507 | ||
30dd727b | 1508 | ;; Floating-point comparisons predicated on a PTRUE, with the results ANDed |
1509 | ;; with another predicate P. This does not have the same trapping behavior | |
1510 | ;; as predicating the comparison itself on P, but it's a legitimate fold, | |
1511 | ;; since we can drop any potentially-trapping operations whose results | |
1512 | ;; are not needed. | |
1513 | ;; | |
1514 | ;; Split the instruction into its preferred form (below) at the earliest | |
1515 | ;; opportunity, in order to get rid of the redundant operand 1. | |
1516 | (define_insn_and_split "*fcm<cmp_op><mode>_and_combine" | |
1517 | [(set (match_operand:<VPRED> 0 "register_operand" "=Upa, Upa") | |
1518 | (and:<VPRED> | |
1519 | (unspec:<VPRED> | |
1520 | [(match_operand:<VPRED> 1) | |
1521 | (SVE_FP_CMP | |
1522 | (match_operand:SVE_F 2 "register_operand" "w, w") | |
1523 | (match_operand:SVE_F 3 "aarch64_simd_reg_or_zero" "Dz, w"))] | |
1524 | UNSPEC_MERGE_PTRUE) | |
1525 | (match_operand:<VPRED> 4 "register_operand" "Upl, Upl")))] | |
1526 | "TARGET_SVE" | |
1527 | "#" | |
1528 | "&& 1" | |
1529 | [(set (match_dup 0) | |
1530 | (and:<VPRED> | |
1531 | (SVE_FP_CMP:<VPRED> | |
1532 | (match_dup 2) | |
1533 | (match_dup 3)) | |
1534 | (match_dup 4)))] | |
1535 | ) | |
1536 | ||
1537 | (define_insn_and_split "*fcmuo<mode>_and_combine" | |
1538 | [(set (match_operand:<VPRED> 0 "register_operand" "=Upa") | |
1539 | (and:<VPRED> | |
1540 | (unspec:<VPRED> | |
1541 | [(match_operand:<VPRED> 1) | |
1542 | (unordered | |
1543 | (match_operand:SVE_F 2 "register_operand" "w") | |
1544 | (match_operand:SVE_F 3 "register_operand" "w"))] | |
1545 | UNSPEC_MERGE_PTRUE) | |
1546 | (match_operand:<VPRED> 4 "register_operand" "Upl")))] | |
1547 | "TARGET_SVE" | |
1548 | "#" | |
1549 | "&& 1" | |
1550 | [(set (match_dup 0) | |
1551 | (and:<VPRED> | |
1552 | (unordered:<VPRED> | |
1553 | (match_dup 2) | |
1554 | (match_dup 3)) | |
1555 | (match_dup 4)))] | |
1556 | ) | |
1557 | ||
1558 | ;; Unpredicated floating-point comparisons, with the results ANDed | |
1559 | ;; with another predicate. This is a valid fold for the same reasons | |
1560 | ;; as above. | |
1561 | (define_insn "*fcm<cmp_op><mode>_and" | |
1562 | [(set (match_operand:<VPRED> 0 "register_operand" "=Upa, Upa") | |
1563 | (and:<VPRED> | |
1564 | (SVE_FP_CMP:<VPRED> | |
1565 | (match_operand:SVE_F 2 "register_operand" "w, w") | |
1566 | (match_operand:SVE_F 3 "aarch64_simd_reg_or_zero" "Dz, w")) | |
1567 | (match_operand:<VPRED> 1 "register_operand" "Upl, Upl")))] | |
1568 | "TARGET_SVE" | |
1569 | "@ | |
1570 | fcm<cmp_op>\t%0.<Vetype>, %1/z, %2.<Vetype>, #0.0 | |
1571 | fcm<cmp_op>\t%0.<Vetype>, %1/z, %2.<Vetype>, %3.<Vetype>" | |
1572 | ) | |
1573 | ||
1574 | (define_insn "*fcmuo<mode>_and" | |
1575 | [(set (match_operand:<VPRED> 0 "register_operand" "=Upa") | |
1576 | (and:<VPRED> | |
1577 | (unordered:<VPRED> | |
1578 | (match_operand:SVE_F 2 "register_operand" "w") | |
1579 | (match_operand:SVE_F 3 "register_operand" "w")) | |
1580 | (match_operand:<VPRED> 1 "register_operand" "Upl")))] | |
1581 | "TARGET_SVE" | |
1582 | "fcmuo\t%0.<Vetype>, %1/z, %2.<Vetype>, %3.<Vetype>" | |
1583 | ) | |
1584 | ||
b293e19f | 1585 | ;; Predicated floating-point comparisons. We don't need a version |
1586 | ;; of this for unordered comparisons. | |
1587 | (define_insn "*pred_fcm<cmp_op><mode>" | |
1588 | [(set (match_operand:<VPRED> 0 "register_operand" "=Upa, Upa") | |
1589 | (unspec:<VPRED> | |
1590 | [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl") | |
1591 | (match_operand:SVE_F 2 "register_operand" "w, w") | |
1592 | (match_operand:SVE_F 3 "aarch64_simd_reg_or_zero" "Dz, w")] | |
1593 | SVE_COND_FP_CMP))] | |
1594 | "TARGET_SVE" | |
1595 | "@ | |
1596 | fcm<cmp_op>\t%0.<Vetype>, %1/z, %2.<Vetype>, #0.0 | |
1597 | fcm<cmp_op>\t%0.<Vetype>, %1/z, %2.<Vetype>, %3.<Vetype>" | |
1598 | ) | |
1599 | ||
8fa7f434 | 1600 | ;; vcond_mask operand order: true, false, mask |
1601 | ;; UNSPEC_SEL operand order: mask, true, false (as for VEC_COND_EXPR) | |
1602 | ;; SEL operand order: mask, true, false | |
1603 | (define_insn "vcond_mask_<mode><vpred>" | |
1604 | [(set (match_operand:SVE_ALL 0 "register_operand" "=w") | |
1605 | (unspec:SVE_ALL | |
1606 | [(match_operand:<VPRED> 3 "register_operand" "Upa") | |
1607 | (match_operand:SVE_ALL 1 "register_operand" "w") | |
1608 | (match_operand:SVE_ALL 2 "register_operand" "w")] | |
1609 | UNSPEC_SEL))] | |
1610 | "TARGET_SVE" | |
1611 | "sel\t%0.<Vetype>, %3, %1.<Vetype>, %2.<Vetype>" | |
1612 | ) | |
1613 | ||
1614 | ;; Selects between a duplicated immediate and zero. | |
1615 | (define_insn "aarch64_sve_dup<mode>_const" | |
1616 | [(set (match_operand:SVE_I 0 "register_operand" "=w") | |
1617 | (unspec:SVE_I | |
1618 | [(match_operand:<VPRED> 1 "register_operand" "Upl") | |
1619 | (match_operand:SVE_I 2 "aarch64_sve_dup_immediate") | |
1620 | (match_operand:SVE_I 3 "aarch64_simd_imm_zero")] | |
1621 | UNSPEC_SEL))] | |
1622 | "TARGET_SVE" | |
1623 | "mov\t%0.<Vetype>, %1/z, #%2" | |
1624 | ) | |
1625 | ||
1626 | ;; Integer (signed) vcond. Don't enforce an immediate range here, since it | |
1627 | ;; depends on the comparison; leave it to aarch64_expand_sve_vcond instead. | |
1628 | (define_expand "vcond<mode><v_int_equiv>" | |
1629 | [(set (match_operand:SVE_ALL 0 "register_operand") | |
1630 | (if_then_else:SVE_ALL | |
1631 | (match_operator 3 "comparison_operator" | |
1632 | [(match_operand:<V_INT_EQUIV> 4 "register_operand") | |
1633 | (match_operand:<V_INT_EQUIV> 5 "nonmemory_operand")]) | |
1634 | (match_operand:SVE_ALL 1 "register_operand") | |
1635 | (match_operand:SVE_ALL 2 "register_operand")))] | |
1636 | "TARGET_SVE" | |
1637 | { | |
1638 | aarch64_expand_sve_vcond (<MODE>mode, <V_INT_EQUIV>mode, operands); | |
1639 | DONE; | |
1640 | } | |
1641 | ) | |
1642 | ||
1643 | ;; Integer vcondu. Don't enforce an immediate range here, since it | |
1644 | ;; depends on the comparison; leave it to aarch64_expand_sve_vcond instead. | |
1645 | (define_expand "vcondu<mode><v_int_equiv>" | |
1646 | [(set (match_operand:SVE_ALL 0 "register_operand") | |
1647 | (if_then_else:SVE_ALL | |
1648 | (match_operator 3 "comparison_operator" | |
1649 | [(match_operand:<V_INT_EQUIV> 4 "register_operand") | |
1650 | (match_operand:<V_INT_EQUIV> 5 "nonmemory_operand")]) | |
1651 | (match_operand:SVE_ALL 1 "register_operand") | |
1652 | (match_operand:SVE_ALL 2 "register_operand")))] | |
1653 | "TARGET_SVE" | |
1654 | { | |
1655 | aarch64_expand_sve_vcond (<MODE>mode, <V_INT_EQUIV>mode, operands); | |
1656 | DONE; | |
1657 | } | |
1658 | ) | |
1659 | ||
1660 | ;; Floating-point vcond. All comparisons except FCMUO allow a zero | |
1661 | ;; operand; aarch64_expand_sve_vcond handles the case of an FCMUO | |
1662 | ;; with zero. | |
1663 | (define_expand "vcond<mode><v_fp_equiv>" | |
1664 | [(set (match_operand:SVE_SD 0 "register_operand") | |
1665 | (if_then_else:SVE_SD | |
1666 | (match_operator 3 "comparison_operator" | |
1667 | [(match_operand:<V_FP_EQUIV> 4 "register_operand") | |
1668 | (match_operand:<V_FP_EQUIV> 5 "aarch64_simd_reg_or_zero")]) | |
1669 | (match_operand:SVE_SD 1 "register_operand") | |
1670 | (match_operand:SVE_SD 2 "register_operand")))] | |
1671 | "TARGET_SVE" | |
1672 | { | |
1673 | aarch64_expand_sve_vcond (<MODE>mode, <V_FP_EQUIV>mode, operands); | |
1674 | DONE; | |
1675 | } | |
1676 | ) | |
1677 | ||
1678 | ;; Signed integer comparisons. Don't enforce an immediate range here, since | |
1679 | ;; it depends on the comparison; leave it to aarch64_expand_sve_vec_cmp_int | |
1680 | ;; instead. | |
1681 | (define_expand "vec_cmp<mode><vpred>" | |
1682 | [(parallel | |
1683 | [(set (match_operand:<VPRED> 0 "register_operand") | |
1684 | (match_operator:<VPRED> 1 "comparison_operator" | |
1685 | [(match_operand:SVE_I 2 "register_operand") | |
1686 | (match_operand:SVE_I 3 "nonmemory_operand")])) | |
1687 | (clobber (reg:CC CC_REGNUM))])] | |
1688 | "TARGET_SVE" | |
1689 | { | |
1690 | aarch64_expand_sve_vec_cmp_int (operands[0], GET_CODE (operands[1]), | |
1691 | operands[2], operands[3]); | |
1692 | DONE; | |
1693 | } | |
1694 | ) | |
1695 | ||
1696 | ;; Unsigned integer comparisons. Don't enforce an immediate range here, since | |
1697 | ;; it depends on the comparison; leave it to aarch64_expand_sve_vec_cmp_int | |
1698 | ;; instead. | |
1699 | (define_expand "vec_cmpu<mode><vpred>" | |
1700 | [(parallel | |
1701 | [(set (match_operand:<VPRED> 0 "register_operand") | |
1702 | (match_operator:<VPRED> 1 "comparison_operator" | |
1703 | [(match_operand:SVE_I 2 "register_operand") | |
1704 | (match_operand:SVE_I 3 "nonmemory_operand")])) | |
1705 | (clobber (reg:CC CC_REGNUM))])] | |
1706 | "TARGET_SVE" | |
1707 | { | |
1708 | aarch64_expand_sve_vec_cmp_int (operands[0], GET_CODE (operands[1]), | |
1709 | operands[2], operands[3]); | |
1710 | DONE; | |
1711 | } | |
1712 | ) | |
1713 | ||
1714 | ;; Floating-point comparisons. All comparisons except FCMUO allow a zero | |
1715 | ;; operand; aarch64_expand_sve_vec_cmp_float handles the case of an FCMUO | |
1716 | ;; with zero. | |
1717 | (define_expand "vec_cmp<mode><vpred>" | |
1718 | [(set (match_operand:<VPRED> 0 "register_operand") | |
1719 | (match_operator:<VPRED> 1 "comparison_operator" | |
1720 | [(match_operand:SVE_F 2 "register_operand") | |
1721 | (match_operand:SVE_F 3 "aarch64_simd_reg_or_zero")]))] | |
1722 | "TARGET_SVE" | |
1723 | { | |
1724 | aarch64_expand_sve_vec_cmp_float (operands[0], GET_CODE (operands[1]), | |
1725 | operands[2], operands[3], false); | |
1726 | DONE; | |
1727 | } | |
1728 | ) | |
1729 | ||
1730 | ;; Branch based on predicate equality or inequality. | |
1731 | (define_expand "cbranch<mode>4" | |
1732 | [(set (pc) | |
1733 | (if_then_else | |
1734 | (match_operator 0 "aarch64_equality_operator" | |
1735 | [(match_operand:PRED_ALL 1 "register_operand") | |
1736 | (match_operand:PRED_ALL 2 "aarch64_simd_reg_or_zero")]) | |
1737 | (label_ref (match_operand 3 "")) | |
1738 | (pc)))] | |
1739 | "" | |
1740 | { | |
1741 | rtx ptrue = force_reg (<MODE>mode, CONSTM1_RTX (<MODE>mode)); | |
1742 | rtx pred; | |
1743 | if (operands[2] == CONST0_RTX (<MODE>mode)) | |
1744 | pred = operands[1]; | |
1745 | else | |
1746 | { | |
1747 | pred = gen_reg_rtx (<MODE>mode); | |
1748 | emit_insn (gen_pred_xor<mode>3 (pred, ptrue, operands[1], | |
1749 | operands[2])); | |
1750 | } | |
1751 | emit_insn (gen_ptest_ptrue<mode> (ptrue, pred)); | |
1752 | operands[1] = gen_rtx_REG (CCmode, CC_REGNUM); | |
1753 | operands[2] = const0_rtx; | |
1754 | } | |
1755 | ) | |
1756 | ||
1757 | ;; Unpredicated integer MIN/MAX. | |
1758 | (define_expand "<su><maxmin><mode>3" | |
1759 | [(set (match_operand:SVE_I 0 "register_operand") | |
1760 | (unspec:SVE_I | |
1761 | [(match_dup 3) | |
1762 | (MAXMIN:SVE_I (match_operand:SVE_I 1 "register_operand") | |
1763 | (match_operand:SVE_I 2 "register_operand"))] | |
1764 | UNSPEC_MERGE_PTRUE))] | |
1765 | "TARGET_SVE" | |
1766 | { | |
1767 | operands[3] = force_reg (<VPRED>mode, CONSTM1_RTX (<VPRED>mode)); | |
1768 | } | |
1769 | ) | |
1770 | ||
1771 | ;; Integer MIN/MAX predicated with a PTRUE. | |
1772 | (define_insn "*<su><maxmin><mode>3" | |
83d1ca63 | 1773 | [(set (match_operand:SVE_I 0 "register_operand" "=w, ?&w") |
8fa7f434 | 1774 | (unspec:SVE_I |
83d1ca63 | 1775 | [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl") |
1776 | (MAXMIN:SVE_I (match_operand:SVE_I 2 "register_operand" "%0, w") | |
1777 | (match_operand:SVE_I 3 "register_operand" "w, w"))] | |
8fa7f434 | 1778 | UNSPEC_MERGE_PTRUE))] |
1779 | "TARGET_SVE" | |
83d1ca63 | 1780 | "@ |
1781 | <su><maxmin>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype> | |
1782 | movprfx\t%0, %2\;<su><maxmin>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>" | |
1783 | [(set_attr "movprfx" "*,yes")] | |
8fa7f434 | 1784 | ) |
1785 | ||
1786 | ;; Unpredicated floating-point MIN/MAX. | |
1787 | (define_expand "<su><maxmin><mode>3" | |
1788 | [(set (match_operand:SVE_F 0 "register_operand") | |
1789 | (unspec:SVE_F | |
1790 | [(match_dup 3) | |
1791 | (FMAXMIN:SVE_F (match_operand:SVE_F 1 "register_operand") | |
1792 | (match_operand:SVE_F 2 "register_operand"))] | |
1793 | UNSPEC_MERGE_PTRUE))] | |
1794 | "TARGET_SVE" | |
1795 | { | |
1796 | operands[3] = force_reg (<VPRED>mode, CONSTM1_RTX (<VPRED>mode)); | |
1797 | } | |
1798 | ) | |
1799 | ||
1800 | ;; Floating-point MIN/MAX predicated with a PTRUE. | |
1801 | (define_insn "*<su><maxmin><mode>3" | |
83d1ca63 | 1802 | [(set (match_operand:SVE_F 0 "register_operand" "=w, ?&w") |
8fa7f434 | 1803 | (unspec:SVE_F |
83d1ca63 | 1804 | [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl") |
1805 | (FMAXMIN:SVE_F (match_operand:SVE_F 2 "register_operand" "%0, w") | |
1806 | (match_operand:SVE_F 3 "register_operand" "w, w"))] | |
8fa7f434 | 1807 | UNSPEC_MERGE_PTRUE))] |
1808 | "TARGET_SVE" | |
83d1ca63 | 1809 | "@ |
1810 | f<maxmin>nm\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype> | |
1811 | movprfx\t%0, %2\;f<maxmin>nm\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>" | |
1812 | [(set_attr "movprfx" "*,yes")] | |
8fa7f434 | 1813 | ) |
1814 | ||
1815 | ;; Unpredicated fmin/fmax. | |
1816 | (define_expand "<maxmin_uns><mode>3" | |
1817 | [(set (match_operand:SVE_F 0 "register_operand") | |
1818 | (unspec:SVE_F | |
1819 | [(match_dup 3) | |
1820 | (unspec:SVE_F [(match_operand:SVE_F 1 "register_operand") | |
1821 | (match_operand:SVE_F 2 "register_operand")] | |
1822 | FMAXMIN_UNS)] | |
1823 | UNSPEC_MERGE_PTRUE))] | |
1824 | "TARGET_SVE" | |
1825 | { | |
1826 | operands[3] = force_reg (<VPRED>mode, CONSTM1_RTX (<VPRED>mode)); | |
1827 | } | |
1828 | ) | |
1829 | ||
1830 | ;; fmin/fmax predicated with a PTRUE. | |
1831 | (define_insn "*<maxmin_uns><mode>3" | |
83d1ca63 | 1832 | [(set (match_operand:SVE_F 0 "register_operand" "=w, ?&w") |
8fa7f434 | 1833 | (unspec:SVE_F |
83d1ca63 | 1834 | [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl") |
1835 | (unspec:SVE_F [(match_operand:SVE_F 2 "register_operand" "%0, w") | |
1836 | (match_operand:SVE_F 3 "register_operand" "w, w")] | |
8fa7f434 | 1837 | FMAXMIN_UNS)] |
1838 | UNSPEC_MERGE_PTRUE))] | |
1839 | "TARGET_SVE" | |
83d1ca63 | 1840 | "@ |
1841 | <maxmin_uns_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype> | |
1842 | movprfx\t%0, %2\;<maxmin_uns_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>" | |
1843 | [(set_attr "movprfx" "*,yes")] | |
8fa7f434 | 1844 | ) |
1845 | ||
47c52435 | 1846 | ;; Predicated integer operations with select. |
1847 | (define_expand "cond_<optab><mode>" | |
1848 | [(set (match_operand:SVE_I 0 "register_operand") | |
1849 | (unspec:SVE_I | |
1850 | [(match_operand:<VPRED> 1 "register_operand") | |
1851 | (SVE_INT_BINARY:SVE_I | |
1852 | (match_operand:SVE_I 2 "register_operand") | |
1853 | (match_operand:SVE_I 3 "register_operand")) | |
83d1ca63 | 1854 | (match_operand:SVE_I 4 "aarch64_simd_reg_or_zero")] |
47c52435 | 1855 | UNSPEC_SEL))] |
1856 | "TARGET_SVE" | |
83d1ca63 | 1857 | ) |
47c52435 | 1858 | |
cdb4d5d0 | 1859 | (define_expand "cond_<optab><mode>" |
1860 | [(set (match_operand:SVE_SDI 0 "register_operand") | |
1861 | (unspec:SVE_SDI | |
1862 | [(match_operand:<VPRED> 1 "register_operand") | |
1863 | (SVE_INT_BINARY_SD:SVE_SDI | |
1864 | (match_operand:SVE_SDI 2 "register_operand") | |
1865 | (match_operand:SVE_SDI 3 "register_operand")) | |
83d1ca63 | 1866 | (match_operand:SVE_SDI 4 "aarch64_simd_reg_or_zero")] |
cdb4d5d0 | 1867 | UNSPEC_SEL))] |
1868 | "TARGET_SVE" | |
83d1ca63 | 1869 | ) |
cdb4d5d0 | 1870 | |
83d1ca63 | 1871 | ;; Predicated integer operations with select matching the output operand. |
1872 | (define_insn "*cond_<optab><mode>_0" | |
1873 | [(set (match_operand:SVE_I 0 "register_operand" "+w, w, ?&w") | |
1874 | (unspec:SVE_I | |
1875 | [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl") | |
1876 | (SVE_INT_BINARY:SVE_I | |
1877 | (match_operand:SVE_I 2 "register_operand" "0, w, w") | |
1878 | (match_operand:SVE_I 3 "register_operand" "w, 0, w")) | |
1879 | (match_dup 0)] | |
1880 | UNSPEC_SEL))] | |
1881 | "TARGET_SVE" | |
1882 | "@ | |
1883 | <sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype> | |
1884 | <sve_int_op_rev>\t%0.<Vetype>, %1/m, %0.<Vetype>, %2.<Vetype> | |
1885 | movprfx\t%0, %1/m, %2\;<sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>" | |
1886 | [(set_attr "movprfx" "*,*,yes")] | |
1887 | ) | |
1888 | ||
1889 | (define_insn "*cond_<optab><mode>_0" | |
1890 | [(set (match_operand:SVE_SDI 0 "register_operand" "+w, w, ?&w") | |
1891 | (unspec:SVE_SDI | |
1892 | [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl") | |
1893 | (SVE_INT_BINARY_SD:SVE_SDI | |
1894 | (match_operand:SVE_SDI 2 "register_operand" "0, w, w") | |
1895 | (match_operand:SVE_SDI 3 "register_operand" "w, 0, w")) | |
1896 | (match_dup 0)] | |
1897 | UNSPEC_SEL))] | |
1898 | "TARGET_SVE" | |
1899 | "@ | |
1900 | <sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype> | |
1901 | <sve_int_op_rev>\t%0.<Vetype>, %1/m, %0.<Vetype>, %2.<Vetype> | |
1902 | movprfx\t%0, %1/m, %2\;<sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>" | |
1903 | [(set_attr "movprfx" "*,*,yes")] | |
1904 | ) | |
1905 | ||
1906 | ;; Predicated integer operations with select matching the first operand. | |
1907 | (define_insn "*cond_<optab><mode>_2" | |
1908 | [(set (match_operand:SVE_I 0 "register_operand" "=w, ?&w") | |
1909 | (unspec:SVE_I | |
1910 | [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl") | |
1911 | (SVE_INT_BINARY:SVE_I | |
1912 | (match_operand:SVE_I 2 "register_operand" "0, w") | |
1913 | (match_operand:SVE_I 3 "register_operand" "w, w")) | |
1914 | (match_dup 2)] | |
1915 | UNSPEC_SEL))] | |
1916 | "TARGET_SVE" | |
1917 | "@ | |
1918 | <sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype> | |
1919 | movprfx\t%0, %2\;<sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>" | |
1920 | [(set_attr "movprfx" "*,yes")] | |
1921 | ) | |
1922 | ||
1923 | (define_insn "*cond_<optab><mode>_2" | |
1924 | [(set (match_operand:SVE_SDI 0 "register_operand" "=w, ?&w") | |
1925 | (unspec:SVE_SDI | |
1926 | [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl") | |
1927 | (SVE_INT_BINARY_SD:SVE_SDI | |
1928 | (match_operand:SVE_SDI 2 "register_operand" "0, w") | |
1929 | (match_operand:SVE_SDI 3 "register_operand" "w, w")) | |
1930 | (match_dup 2)] | |
1931 | UNSPEC_SEL))] | |
1932 | "TARGET_SVE" | |
1933 | "@ | |
1934 | <sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype> | |
1935 | movprfx\t%0, %2\;<sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>" | |
1936 | [(set_attr "movprfx" "*,yes")] | |
1937 | ) | |
1938 | ||
1939 | ;; Predicated integer operations with select matching the second operand. | |
1940 | (define_insn "*cond_<optab><mode>_3" | |
1941 | [(set (match_operand:SVE_I 0 "register_operand" "=w, ?&w") | |
1942 | (unspec:SVE_I | |
1943 | [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl") | |
1944 | (SVE_INT_BINARY:SVE_I | |
1945 | (match_operand:SVE_I 2 "register_operand" "w, w") | |
1946 | (match_operand:SVE_I 3 "register_operand" "0, w")) | |
1947 | (match_dup 3)] | |
1948 | UNSPEC_SEL))] | |
1949 | "TARGET_SVE" | |
1950 | "@ | |
1951 | <sve_int_op_rev>\t%0.<Vetype>, %1/m, %0.<Vetype>, %2.<Vetype> | |
1952 | movprfx\t%0, %3\;<sve_int_op_rev>\t%0.<Vetype>, %1/m, %0.<Vetype>, %2.<Vetype>" | |
1953 | [(set_attr "movprfx" "*,yes")] | |
1954 | ) | |
1955 | ||
1956 | (define_insn "*cond_<optab><mode>_3" | |
1957 | [(set (match_operand:SVE_SDI 0 "register_operand" "=w, ?&w") | |
1958 | (unspec:SVE_SDI | |
1959 | [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl") | |
1960 | (SVE_INT_BINARY_SD:SVE_SDI | |
1961 | (match_operand:SVE_SDI 2 "register_operand" "w, w") | |
1962 | (match_operand:SVE_SDI 3 "register_operand" "0, w")) | |
1963 | (match_dup 3)] | |
1964 | UNSPEC_SEL))] | |
1965 | "TARGET_SVE" | |
1966 | "@ | |
1967 | <sve_int_op_rev>\t%0.<Vetype>, %1/m, %0.<Vetype>, %2.<Vetype> | |
1968 | movprfx\t%0, %3\;<sve_int_op_rev>\t%0.<Vetype>, %1/m, %0.<Vetype>, %2.<Vetype>" | |
1969 | [(set_attr "movprfx" "*,yes")] | |
1970 | ) | |
1971 | ||
1972 | ;; Predicated integer operations with select matching zero. | |
1973 | (define_insn "*cond_<optab><mode>_z" | |
1974 | [(set (match_operand:SVE_I 0 "register_operand" "=&w") | |
88fefa8f | 1975 | (unspec:SVE_I |
1976 | [(match_operand:<VPRED> 1 "register_operand" "Upl") | |
47c52435 | 1977 | (SVE_INT_BINARY:SVE_I |
83d1ca63 | 1978 | (match_operand:SVE_I 2 "register_operand" "w") |
47c52435 | 1979 | (match_operand:SVE_I 3 "register_operand" "w")) |
83d1ca63 | 1980 | (match_operand:SVE_I 4 "aarch64_simd_imm_zero")] |
47c52435 | 1981 | UNSPEC_SEL))] |
88fefa8f | 1982 | "TARGET_SVE" |
83d1ca63 | 1983 | "movprfx\t%0.<Vetype>, %1/z, %2.<Vetype>\;<sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>" |
1984 | [(set_attr "movprfx" "yes")] | |
88fefa8f | 1985 | ) |
1986 | ||
83d1ca63 | 1987 | (define_insn "*cond_<optab><mode>_z" |
1988 | [(set (match_operand:SVE_SDI 0 "register_operand" "=&w") | |
cdb4d5d0 | 1989 | (unspec:SVE_SDI |
1990 | [(match_operand:<VPRED> 1 "register_operand" "Upl") | |
1991 | (SVE_INT_BINARY_SD:SVE_SDI | |
83d1ca63 | 1992 | (match_operand:SVE_SDI 2 "register_operand" "w") |
cdb4d5d0 | 1993 | (match_operand:SVE_SDI 3 "register_operand" "w")) |
83d1ca63 | 1994 | (match_operand:SVE_SDI 4 "aarch64_simd_imm_zero")] |
cdb4d5d0 | 1995 | UNSPEC_SEL))] |
1996 | "TARGET_SVE" | |
83d1ca63 | 1997 | "movprfx\t%0.<Vetype>, %1/z, %2.<Vetype>\;<sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>" |
1998 | [(set_attr "movprfx" "yes")] | |
cdb4d5d0 | 1999 | ) |
2000 | ||
83d1ca63 | 2001 | ;; Synthetic predications with select unmatched. |
2002 | (define_insn "*cond_<optab><mode>_any" | |
2003 | [(set (match_operand:SVE_I 0 "register_operand" "=&w") | |
dbc7e6ae | 2004 | (unspec:SVE_I |
2005 | [(match_operand:<VPRED> 1 "register_operand" "Upl") | |
83d1ca63 | 2006 | (SVE_INT_BINARY:SVE_I |
dbc7e6ae | 2007 | (match_operand:SVE_I 2 "register_operand" "w") |
83d1ca63 | 2008 | (match_operand:SVE_I 3 "register_operand" "w")) |
2009 | (match_operand:SVE_I 4 "register_operand" "w")] | |
dbc7e6ae | 2010 | UNSPEC_SEL))] |
2011 | "TARGET_SVE" | |
83d1ca63 | 2012 | "#" |
dbc7e6ae | 2013 | ) |
2014 | ||
83d1ca63 | 2015 | (define_insn "*cond_<optab><mode>_any" |
2016 | [(set (match_operand:SVE_SDI 0 "register_operand" "=&w") | |
cdb4d5d0 | 2017 | (unspec:SVE_SDI |
2018 | [(match_operand:<VPRED> 1 "register_operand" "Upl") | |
83d1ca63 | 2019 | (SVE_INT_BINARY_SD:SVE_I |
cdb4d5d0 | 2020 | (match_operand:SVE_SDI 2 "register_operand" "w") |
83d1ca63 | 2021 | (match_operand:SVE_SDI 3 "register_operand" "w")) |
2022 | (match_operand:SVE_SDI 4 "register_operand" "w")] | |
cdb4d5d0 | 2023 | UNSPEC_SEL))] |
2024 | "TARGET_SVE" | |
83d1ca63 | 2025 | "#" |
2026 | ) | |
2027 | ||
2028 | (define_split | |
2029 | [(set (match_operand:SVE_I 0 "register_operand") | |
2030 | (unspec:SVE_I | |
2031 | [(match_operand:<VPRED> 1 "register_operand") | |
2032 | (match_operator:SVE_I 5 "aarch64_sve_any_binary_operator" | |
2033 | [(match_operand:SVE_I 2 "register_operand") | |
2034 | (match_operand:SVE_I 3 "register_operand")]) | |
2035 | (match_operand:SVE_I 4 "register_operand")] | |
2036 | UNSPEC_SEL))] | |
2037 | "TARGET_SVE && reload_completed | |
2038 | && !(rtx_equal_p (operands[0], operands[4]) | |
2039 | || rtx_equal_p (operands[2], operands[4]) | |
2040 | || rtx_equal_p (operands[3], operands[4]))" | |
2041 | ; Not matchable by any one insn or movprfx insn. We need a separate select. | |
2042 | [(set (match_dup 0) | |
2043 | (unspec:SVE_I [(match_dup 1) (match_dup 2) (match_dup 4)] | |
2044 | UNSPEC_SEL)) | |
2045 | (set (match_dup 0) | |
2046 | (unspec:SVE_I | |
2047 | [(match_dup 1) | |
2048 | (match_op_dup 5 [(match_dup 0) (match_dup 3)]) | |
2049 | (match_dup 0)] | |
2050 | UNSPEC_SEL))] | |
cdb4d5d0 | 2051 | ) |
2052 | ||
3bf95150 | 2053 | ;; Set operand 0 to the last active element in operand 3, or to tied |
2054 | ;; operand 1 if no elements are active. | |
2055 | (define_insn "fold_extract_last_<mode>" | |
2056 | [(set (match_operand:<VEL> 0 "register_operand" "=r, w") | |
2057 | (unspec:<VEL> | |
2058 | [(match_operand:<VEL> 1 "register_operand" "0, 0") | |
2059 | (match_operand:<VPRED> 2 "register_operand" "Upl, Upl") | |
2060 | (match_operand:SVE_ALL 3 "register_operand" "w, w")] | |
2061 | UNSPEC_CLASTB))] | |
2062 | "TARGET_SVE" | |
2063 | "@ | |
2064 | clastb\t%<vwcore>0, %2, %<vwcore>0, %3.<Vetype> | |
2065 | clastb\t%<vw>0, %2, %<vw>0, %3.<Vetype>" | |
2066 | ) | |
2067 | ||
8fa7f434 | 2068 | ;; Unpredicated integer add reduction. |
2069 | (define_expand "reduc_plus_scal_<mode>" | |
2070 | [(set (match_operand:<VEL> 0 "register_operand") | |
2071 | (unspec:<VEL> [(match_dup 2) | |
2072 | (match_operand:SVE_I 1 "register_operand")] | |
2073 | UNSPEC_ADDV))] | |
2074 | "TARGET_SVE" | |
2075 | { | |
2076 | operands[2] = force_reg (<VPRED>mode, CONSTM1_RTX (<VPRED>mode)); | |
2077 | } | |
2078 | ) | |
2079 | ||
2080 | ;; Predicated integer add reduction. The result is always 64-bits. | |
2081 | (define_insn "*reduc_plus_scal_<mode>" | |
2082 | [(set (match_operand:<VEL> 0 "register_operand" "=w") | |
2083 | (unspec:<VEL> [(match_operand:<VPRED> 1 "register_operand" "Upl") | |
2084 | (match_operand:SVE_I 2 "register_operand" "w")] | |
2085 | UNSPEC_ADDV))] | |
2086 | "TARGET_SVE" | |
2087 | "uaddv\t%d0, %1, %2.<Vetype>" | |
2088 | ) | |
2089 | ||
2090 | ;; Unpredicated floating-point add reduction. | |
2091 | (define_expand "reduc_plus_scal_<mode>" | |
2092 | [(set (match_operand:<VEL> 0 "register_operand") | |
2093 | (unspec:<VEL> [(match_dup 2) | |
2094 | (match_operand:SVE_F 1 "register_operand")] | |
2095 | UNSPEC_FADDV))] | |
2096 | "TARGET_SVE" | |
2097 | { | |
2098 | operands[2] = force_reg (<VPRED>mode, CONSTM1_RTX (<VPRED>mode)); | |
2099 | } | |
2100 | ) | |
2101 | ||
2102 | ;; Predicated floating-point add reduction. | |
2103 | (define_insn "*reduc_plus_scal_<mode>" | |
2104 | [(set (match_operand:<VEL> 0 "register_operand" "=w") | |
2105 | (unspec:<VEL> [(match_operand:<VPRED> 1 "register_operand" "Upl") | |
2106 | (match_operand:SVE_F 2 "register_operand" "w")] | |
2107 | UNSPEC_FADDV))] | |
2108 | "TARGET_SVE" | |
2109 | "faddv\t%<Vetype>0, %1, %2.<Vetype>" | |
2110 | ) | |
2111 | ||
2112 | ;; Unpredicated integer MIN/MAX reduction. | |
2113 | (define_expand "reduc_<maxmin_uns>_scal_<mode>" | |
2114 | [(set (match_operand:<VEL> 0 "register_operand") | |
2115 | (unspec:<VEL> [(match_dup 2) | |
2116 | (match_operand:SVE_I 1 "register_operand")] | |
2117 | MAXMINV))] | |
2118 | "TARGET_SVE" | |
2119 | { | |
2120 | operands[2] = force_reg (<VPRED>mode, CONSTM1_RTX (<VPRED>mode)); | |
2121 | } | |
2122 | ) | |
2123 | ||
2124 | ;; Predicated integer MIN/MAX reduction. | |
2125 | (define_insn "*reduc_<maxmin_uns>_scal_<mode>" | |
2126 | [(set (match_operand:<VEL> 0 "register_operand" "=w") | |
2127 | (unspec:<VEL> [(match_operand:<VPRED> 1 "register_operand" "Upl") | |
2128 | (match_operand:SVE_I 2 "register_operand" "w")] | |
2129 | MAXMINV))] | |
2130 | "TARGET_SVE" | |
2131 | "<maxmin_uns_op>v\t%<Vetype>0, %1, %2.<Vetype>" | |
2132 | ) | |
2133 | ||
2134 | ;; Unpredicated floating-point MIN/MAX reduction. | |
2135 | (define_expand "reduc_<maxmin_uns>_scal_<mode>" | |
2136 | [(set (match_operand:<VEL> 0 "register_operand") | |
2137 | (unspec:<VEL> [(match_dup 2) | |
2138 | (match_operand:SVE_F 1 "register_operand")] | |
2139 | FMAXMINV))] | |
2140 | "TARGET_SVE" | |
2141 | { | |
2142 | operands[2] = force_reg (<VPRED>mode, CONSTM1_RTX (<VPRED>mode)); | |
2143 | } | |
2144 | ) | |
2145 | ||
2146 | ;; Predicated floating-point MIN/MAX reduction. | |
2147 | (define_insn "*reduc_<maxmin_uns>_scal_<mode>" | |
2148 | [(set (match_operand:<VEL> 0 "register_operand" "=w") | |
2149 | (unspec:<VEL> [(match_operand:<VPRED> 1 "register_operand" "Upl") | |
2150 | (match_operand:SVE_F 2 "register_operand" "w")] | |
2151 | FMAXMINV))] | |
2152 | "TARGET_SVE" | |
2153 | "<maxmin_uns_op>v\t%<Vetype>0, %1, %2.<Vetype>" | |
2154 | ) | |
2155 | ||
216934f9 | 2156 | (define_expand "reduc_<optab>_scal_<mode>" |
2157 | [(set (match_operand:<VEL> 0 "register_operand") | |
2158 | (unspec:<VEL> [(match_dup 2) | |
2159 | (match_operand:SVE_I 1 "register_operand")] | |
2160 | BITWISEV))] | |
2161 | "TARGET_SVE" | |
2162 | { | |
2163 | operands[2] = force_reg (<VPRED>mode, CONSTM1_RTX (<VPRED>mode)); | |
2164 | } | |
2165 | ) | |
2166 | ||
2167 | (define_insn "*reduc_<optab>_scal_<mode>" | |
2168 | [(set (match_operand:<VEL> 0 "register_operand" "=w") | |
2169 | (unspec:<VEL> [(match_operand:<VPRED> 1 "register_operand" "Upl") | |
2170 | (match_operand:SVE_I 2 "register_operand" "w")] | |
2171 | BITWISEV))] | |
2172 | "TARGET_SVE" | |
2173 | "<bit_reduc_op>\t%<Vetype>0, %1, %2.<Vetype>" | |
2174 | ) | |
2175 | ||
d77809a4 | 2176 | ;; Unpredicated in-order FP reductions. |
2177 | (define_expand "fold_left_plus_<mode>" | |
2178 | [(set (match_operand:<VEL> 0 "register_operand") | |
2179 | (unspec:<VEL> [(match_dup 3) | |
2180 | (match_operand:<VEL> 1 "register_operand") | |
2181 | (match_operand:SVE_F 2 "register_operand")] | |
2182 | UNSPEC_FADDA))] | |
2183 | "TARGET_SVE" | |
2184 | { | |
2185 | operands[3] = force_reg (<VPRED>mode, CONSTM1_RTX (<VPRED>mode)); | |
2186 | } | |
2187 | ) | |
2188 | ||
2189 | ;; In-order FP reductions predicated with PTRUE. | |
2190 | (define_insn "*fold_left_plus_<mode>" | |
2191 | [(set (match_operand:<VEL> 0 "register_operand" "=w") | |
2192 | (unspec:<VEL> [(match_operand:<VPRED> 1 "register_operand" "Upl") | |
2193 | (match_operand:<VEL> 2 "register_operand" "0") | |
2194 | (match_operand:SVE_F 3 "register_operand" "w")] | |
2195 | UNSPEC_FADDA))] | |
2196 | "TARGET_SVE" | |
2197 | "fadda\t%<Vetype>0, %1, %<Vetype>0, %3.<Vetype>" | |
2198 | ) | |
2199 | ||
2200 | ;; Predicated form of the above in-order reduction. | |
2201 | (define_insn "*pred_fold_left_plus_<mode>" | |
2202 | [(set (match_operand:<VEL> 0 "register_operand" "=w") | |
2203 | (unspec:<VEL> | |
2204 | [(match_operand:<VEL> 1 "register_operand" "0") | |
2205 | (unspec:SVE_F | |
2206 | [(match_operand:<VPRED> 2 "register_operand" "Upl") | |
2207 | (match_operand:SVE_F 3 "register_operand" "w") | |
2208 | (match_operand:SVE_F 4 "aarch64_simd_imm_zero")] | |
2209 | UNSPEC_SEL)] | |
2210 | UNSPEC_FADDA))] | |
2211 | "TARGET_SVE" | |
2212 | "fadda\t%<Vetype>0, %2, %<Vetype>0, %3.<Vetype>" | |
2213 | ) | |
2214 | ||
8fa7f434 | 2215 | ;; Unpredicated floating-point addition. |
2216 | (define_expand "add<mode>3" | |
2217 | [(set (match_operand:SVE_F 0 "register_operand") | |
2218 | (unspec:SVE_F | |
2219 | [(match_dup 3) | |
2220 | (plus:SVE_F | |
2221 | (match_operand:SVE_F 1 "register_operand") | |
2222 | (match_operand:SVE_F 2 "aarch64_sve_float_arith_with_sub_operand"))] | |
2223 | UNSPEC_MERGE_PTRUE))] | |
2224 | "TARGET_SVE" | |
2225 | { | |
2226 | operands[3] = force_reg (<VPRED>mode, CONSTM1_RTX (<VPRED>mode)); | |
2227 | } | |
2228 | ) | |
2229 | ||
2230 | ;; Floating-point addition predicated with a PTRUE. | |
9e089468 | 2231 | (define_insn_and_split "*add<mode>3" |
8fa7f434 | 2232 | [(set (match_operand:SVE_F 0 "register_operand" "=w, w, w") |
2233 | (unspec:SVE_F | |
2234 | [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl") | |
2235 | (plus:SVE_F | |
2236 | (match_operand:SVE_F 2 "register_operand" "%0, 0, w") | |
2237 | (match_operand:SVE_F 3 "aarch64_sve_float_arith_with_sub_operand" "vsA, vsN, w"))] | |
2238 | UNSPEC_MERGE_PTRUE))] | |
2239 | "TARGET_SVE" | |
2240 | "@ | |
2241 | fadd\t%0.<Vetype>, %1/m, %0.<Vetype>, #%3 | |
2242 | fsub\t%0.<Vetype>, %1/m, %0.<Vetype>, #%N3 | |
9e089468 | 2243 | #" |
2244 | ; Split the unpredicated form after reload, so that we don't have | |
2245 | ; the unnecessary PTRUE. | |
2246 | "&& reload_completed | |
2247 | && register_operand (operands[3], <MODE>mode)" | |
2248 | [(set (match_dup 0) (plus:SVE_F (match_dup 2) (match_dup 3)))] | |
8fa7f434 | 2249 | ) |
2250 | ||
2251 | ;; Unpredicated floating-point subtraction. | |
2252 | (define_expand "sub<mode>3" | |
2253 | [(set (match_operand:SVE_F 0 "register_operand") | |
2254 | (unspec:SVE_F | |
2255 | [(match_dup 3) | |
2256 | (minus:SVE_F | |
2257 | (match_operand:SVE_F 1 "aarch64_sve_float_arith_operand") | |
2258 | (match_operand:SVE_F 2 "register_operand"))] | |
2259 | UNSPEC_MERGE_PTRUE))] | |
2260 | "TARGET_SVE" | |
2261 | { | |
2262 | operands[3] = force_reg (<VPRED>mode, CONSTM1_RTX (<VPRED>mode)); | |
2263 | } | |
2264 | ) | |
2265 | ||
2266 | ;; Floating-point subtraction predicated with a PTRUE. | |
9e089468 | 2267 | (define_insn_and_split "*sub<mode>3" |
8fa7f434 | 2268 | [(set (match_operand:SVE_F 0 "register_operand" "=w, w, w, w") |
2269 | (unspec:SVE_F | |
2270 | [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl, Upl") | |
2271 | (minus:SVE_F | |
2272 | (match_operand:SVE_F 2 "aarch64_sve_float_arith_operand" "0, 0, vsA, w") | |
2273 | (match_operand:SVE_F 3 "aarch64_sve_float_arith_with_sub_operand" "vsA, vsN, 0, w"))] | |
2274 | UNSPEC_MERGE_PTRUE))] | |
2275 | "TARGET_SVE | |
2276 | && (register_operand (operands[2], <MODE>mode) | |
2277 | || register_operand (operands[3], <MODE>mode))" | |
2278 | "@ | |
2279 | fsub\t%0.<Vetype>, %1/m, %0.<Vetype>, #%3 | |
2280 | fadd\t%0.<Vetype>, %1/m, %0.<Vetype>, #%N3 | |
2281 | fsubr\t%0.<Vetype>, %1/m, %0.<Vetype>, #%2 | |
9e089468 | 2282 | #" |
2283 | ; Split the unpredicated form after reload, so that we don't have | |
2284 | ; the unnecessary PTRUE. | |
2285 | "&& reload_completed | |
2286 | && register_operand (operands[2], <MODE>mode) | |
2287 | && register_operand (operands[3], <MODE>mode)" | |
2288 | [(set (match_dup 0) (minus:SVE_F (match_dup 2) (match_dup 3)))] | |
8fa7f434 | 2289 | ) |
2290 | ||
2291 | ;; Unpredicated floating-point multiplication. | |
2292 | (define_expand "mul<mode>3" | |
2293 | [(set (match_operand:SVE_F 0 "register_operand") | |
2294 | (unspec:SVE_F | |
2295 | [(match_dup 3) | |
2296 | (mult:SVE_F | |
2297 | (match_operand:SVE_F 1 "register_operand") | |
2298 | (match_operand:SVE_F 2 "aarch64_sve_float_mul_operand"))] | |
2299 | UNSPEC_MERGE_PTRUE))] | |
2300 | "TARGET_SVE" | |
2301 | { | |
2302 | operands[3] = force_reg (<VPRED>mode, CONSTM1_RTX (<VPRED>mode)); | |
2303 | } | |
2304 | ) | |
2305 | ||
2306 | ;; Floating-point multiplication predicated with a PTRUE. | |
9e089468 | 2307 | (define_insn_and_split "*mul<mode>3" |
8fa7f434 | 2308 | [(set (match_operand:SVE_F 0 "register_operand" "=w, w") |
2309 | (unspec:SVE_F | |
2310 | [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl") | |
2311 | (mult:SVE_F | |
2312 | (match_operand:SVE_F 2 "register_operand" "%0, w") | |
2313 | (match_operand:SVE_F 3 "aarch64_sve_float_mul_operand" "vsM, w"))] | |
2314 | UNSPEC_MERGE_PTRUE))] | |
2315 | "TARGET_SVE" | |
2316 | "@ | |
2317 | fmul\t%0.<Vetype>, %1/m, %0.<Vetype>, #%3 | |
9e089468 | 2318 | #" |
2319 | ; Split the unpredicated form after reload, so that we don't have | |
2320 | ; the unnecessary PTRUE. | |
2321 | "&& reload_completed | |
2322 | && register_operand (operands[3], <MODE>mode)" | |
2323 | [(set (match_dup 0) (mult:SVE_F (match_dup 2) (match_dup 3)))] | |
8fa7f434 | 2324 | ) |
2325 | ||
9e089468 | 2326 | ;; Unpredicated floating-point binary operations (post-RA only). |
2327 | ;; These are generated by splitting a predicated instruction whose | |
2328 | ;; predicate is unused. | |
2329 | (define_insn "*post_ra_<sve_fp_op><mode>3" | |
2330 | [(set (match_operand:SVE_F 0 "register_operand" "=w") | |
2331 | (SVE_UNPRED_FP_BINARY:SVE_F | |
2332 | (match_operand:SVE_F 1 "register_operand" "w") | |
2333 | (match_operand:SVE_F 2 "register_operand" "w")))] | |
2334 | "TARGET_SVE && reload_completed" | |
2335 | "<sve_fp_op>\t%0.<Vetype>, %1.<Vetype>, %2.<Vetype>") | |
2336 | ||
8fa7f434 | 2337 | ;; Unpredicated fma (%0 = (%1 * %2) + %3). |
2338 | (define_expand "fma<mode>4" | |
2339 | [(set (match_operand:SVE_F 0 "register_operand") | |
2340 | (unspec:SVE_F | |
2341 | [(match_dup 4) | |
2342 | (fma:SVE_F (match_operand:SVE_F 1 "register_operand") | |
2343 | (match_operand:SVE_F 2 "register_operand") | |
2344 | (match_operand:SVE_F 3 "register_operand"))] | |
2345 | UNSPEC_MERGE_PTRUE))] | |
2346 | "TARGET_SVE" | |
2347 | { | |
2348 | operands[4] = force_reg (<VPRED>mode, CONSTM1_RTX (<VPRED>mode)); | |
2349 | } | |
2350 | ) | |
2351 | ||
2352 | ;; fma predicated with a PTRUE. | |
2353 | (define_insn "*fma<mode>4" | |
83d1ca63 | 2354 | [(set (match_operand:SVE_F 0 "register_operand" "=w, w, ?&w") |
8fa7f434 | 2355 | (unspec:SVE_F |
83d1ca63 | 2356 | [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl") |
2357 | (fma:SVE_F (match_operand:SVE_F 3 "register_operand" "%0, w, w") | |
2358 | (match_operand:SVE_F 4 "register_operand" "w, w, w") | |
2359 | (match_operand:SVE_F 2 "register_operand" "w, 0, w"))] | |
8fa7f434 | 2360 | UNSPEC_MERGE_PTRUE))] |
2361 | "TARGET_SVE" | |
2362 | "@ | |
2363 | fmad\t%0.<Vetype>, %1/m, %4.<Vetype>, %2.<Vetype> | |
83d1ca63 | 2364 | fmla\t%0.<Vetype>, %1/m, %3.<Vetype>, %4.<Vetype> |
2365 | movprfx\t%0, %2\;fmla\t%0.<Vetype>, %1/m, %3.<Vetype>, %4.<Vetype>" | |
2366 | [(set_attr "movprfx" "*,*,yes")] | |
8fa7f434 | 2367 | ) |
2368 | ||
2369 | ;; Unpredicated fnma (%0 = (-%1 * %2) + %3). | |
2370 | (define_expand "fnma<mode>4" | |
2371 | [(set (match_operand:SVE_F 0 "register_operand") | |
2372 | (unspec:SVE_F | |
2373 | [(match_dup 4) | |
2374 | (fma:SVE_F (neg:SVE_F | |
2375 | (match_operand:SVE_F 1 "register_operand")) | |
2376 | (match_operand:SVE_F 2 "register_operand") | |
2377 | (match_operand:SVE_F 3 "register_operand"))] | |
2378 | UNSPEC_MERGE_PTRUE))] | |
2379 | "TARGET_SVE" | |
2380 | { | |
2381 | operands[4] = force_reg (<VPRED>mode, CONSTM1_RTX (<VPRED>mode)); | |
2382 | } | |
2383 | ) | |
2384 | ||
2385 | ;; fnma predicated with a PTRUE. | |
2386 | (define_insn "*fnma<mode>4" | |
83d1ca63 | 2387 | [(set (match_operand:SVE_F 0 "register_operand" "=w, w, ?&w") |
8fa7f434 | 2388 | (unspec:SVE_F |
83d1ca63 | 2389 | [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl") |
8fa7f434 | 2390 | (fma:SVE_F (neg:SVE_F |
83d1ca63 | 2391 | (match_operand:SVE_F 3 "register_operand" "%0, w, w")) |
2392 | (match_operand:SVE_F 4 "register_operand" "w, w, w") | |
2393 | (match_operand:SVE_F 2 "register_operand" "w, 0, w"))] | |
8fa7f434 | 2394 | UNSPEC_MERGE_PTRUE))] |
2395 | "TARGET_SVE" | |
2396 | "@ | |
2397 | fmsb\t%0.<Vetype>, %1/m, %4.<Vetype>, %2.<Vetype> | |
83d1ca63 | 2398 | fmls\t%0.<Vetype>, %1/m, %3.<Vetype>, %4.<Vetype> |
2399 | movprfx\t%0, %2\;fmls\t%0.<Vetype>, %1/m, %3.<Vetype>, %4.<Vetype>" | |
2400 | [(set_attr "movprfx" "*,*,yes")] | |
8fa7f434 | 2401 | ) |
2402 | ||
2403 | ;; Unpredicated fms (%0 = (%1 * %2) - %3). | |
2404 | (define_expand "fms<mode>4" | |
2405 | [(set (match_operand:SVE_F 0 "register_operand") | |
2406 | (unspec:SVE_F | |
2407 | [(match_dup 4) | |
2408 | (fma:SVE_F (match_operand:SVE_F 1 "register_operand") | |
2409 | (match_operand:SVE_F 2 "register_operand") | |
2410 | (neg:SVE_F | |
2411 | (match_operand:SVE_F 3 "register_operand")))] | |
2412 | UNSPEC_MERGE_PTRUE))] | |
2413 | "TARGET_SVE" | |
2414 | { | |
2415 | operands[4] = force_reg (<VPRED>mode, CONSTM1_RTX (<VPRED>mode)); | |
2416 | } | |
2417 | ) | |
2418 | ||
2419 | ;; fms predicated with a PTRUE. | |
2420 | (define_insn "*fms<mode>4" | |
83d1ca63 | 2421 | [(set (match_operand:SVE_F 0 "register_operand" "=w, w, ?&w") |
8fa7f434 | 2422 | (unspec:SVE_F |
83d1ca63 | 2423 | [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl") |
2424 | (fma:SVE_F (match_operand:SVE_F 3 "register_operand" "%0, w, w") | |
2425 | (match_operand:SVE_F 4 "register_operand" "w, w, w") | |
8fa7f434 | 2426 | (neg:SVE_F |
83d1ca63 | 2427 | (match_operand:SVE_F 2 "register_operand" "w, 0, w")))] |
8fa7f434 | 2428 | UNSPEC_MERGE_PTRUE))] |
2429 | "TARGET_SVE" | |
2430 | "@ | |
2431 | fnmsb\t%0.<Vetype>, %1/m, %4.<Vetype>, %2.<Vetype> | |
83d1ca63 | 2432 | fnmls\t%0.<Vetype>, %1/m, %3.<Vetype>, %4.<Vetype> |
2433 | movprfx\t%0, %2\;fnmls\t%0.<Vetype>, %1/m, %3.<Vetype>, %4.<Vetype>" | |
2434 | [(set_attr "movprfx" "*,*,yes")] | |
8fa7f434 | 2435 | ) |
2436 | ||
2437 | ;; Unpredicated fnms (%0 = (-%1 * %2) - %3). | |
2438 | (define_expand "fnms<mode>4" | |
2439 | [(set (match_operand:SVE_F 0 "register_operand") | |
2440 | (unspec:SVE_F | |
2441 | [(match_dup 4) | |
2442 | (fma:SVE_F (neg:SVE_F | |
2443 | (match_operand:SVE_F 1 "register_operand")) | |
2444 | (match_operand:SVE_F 2 "register_operand") | |
2445 | (neg:SVE_F | |
2446 | (match_operand:SVE_F 3 "register_operand")))] | |
2447 | UNSPEC_MERGE_PTRUE))] | |
2448 | "TARGET_SVE" | |
2449 | { | |
2450 | operands[4] = force_reg (<VPRED>mode, CONSTM1_RTX (<VPRED>mode)); | |
2451 | } | |
2452 | ) | |
2453 | ||
2454 | ;; fnms predicated with a PTRUE. | |
2455 | (define_insn "*fnms<mode>4" | |
83d1ca63 | 2456 | [(set (match_operand:SVE_F 0 "register_operand" "=w, w, ?&w") |
8fa7f434 | 2457 | (unspec:SVE_F |
83d1ca63 | 2458 | [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl") |
8fa7f434 | 2459 | (fma:SVE_F (neg:SVE_F |
83d1ca63 | 2460 | (match_operand:SVE_F 3 "register_operand" "%0, w, w")) |
2461 | (match_operand:SVE_F 4 "register_operand" "w, w, w") | |
8fa7f434 | 2462 | (neg:SVE_F |
83d1ca63 | 2463 | (match_operand:SVE_F 2 "register_operand" "w, 0, w")))] |
8fa7f434 | 2464 | UNSPEC_MERGE_PTRUE))] |
2465 | "TARGET_SVE" | |
2466 | "@ | |
2467 | fnmad\t%0.<Vetype>, %1/m, %4.<Vetype>, %2.<Vetype> | |
83d1ca63 | 2468 | fnmla\t%0.<Vetype>, %1/m, %3.<Vetype>, %4.<Vetype> |
2469 | movprfx\t%0, %2\;fnmla\t%0.<Vetype>, %1/m, %3.<Vetype>, %4.<Vetype>" | |
2470 | [(set_attr "movprfx" "*,*,yes")] | |
8fa7f434 | 2471 | ) |
2472 | ||
2473 | ;; Unpredicated floating-point division. | |
2474 | (define_expand "div<mode>3" | |
2475 | [(set (match_operand:SVE_F 0 "register_operand") | |
2476 | (unspec:SVE_F | |
2477 | [(match_dup 3) | |
2478 | (div:SVE_F (match_operand:SVE_F 1 "register_operand") | |
2479 | (match_operand:SVE_F 2 "register_operand"))] | |
2480 | UNSPEC_MERGE_PTRUE))] | |
2481 | "TARGET_SVE" | |
2482 | { | |
2483 | operands[3] = force_reg (<VPRED>mode, CONSTM1_RTX (<VPRED>mode)); | |
2484 | } | |
2485 | ) | |
2486 | ||
2487 | ;; Floating-point division predicated with a PTRUE. | |
2488 | (define_insn "*div<mode>3" | |
83d1ca63 | 2489 | [(set (match_operand:SVE_F 0 "register_operand" "=w, w, ?&w") |
8fa7f434 | 2490 | (unspec:SVE_F |
83d1ca63 | 2491 | [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl") |
2492 | (div:SVE_F (match_operand:SVE_F 2 "register_operand" "0, w, w") | |
2493 | (match_operand:SVE_F 3 "register_operand" "w, 0, w"))] | |
8fa7f434 | 2494 | UNSPEC_MERGE_PTRUE))] |
2495 | "TARGET_SVE" | |
2496 | "@ | |
2497 | fdiv\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype> | |
83d1ca63 | 2498 | fdivr\t%0.<Vetype>, %1/m, %0.<Vetype>, %2.<Vetype> |
2499 | movprfx\t%0, %2\;fdiv\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>" | |
2500 | [(set_attr "movprfx" "*,*,yes")] | |
8fa7f434 | 2501 | ) |
2502 | ||
2503 | ;; Unpredicated FNEG, FABS and FSQRT. | |
2504 | (define_expand "<optab><mode>2" | |
2505 | [(set (match_operand:SVE_F 0 "register_operand") | |
2506 | (unspec:SVE_F | |
2507 | [(match_dup 2) | |
2508 | (SVE_FP_UNARY:SVE_F (match_operand:SVE_F 1 "register_operand"))] | |
2509 | UNSPEC_MERGE_PTRUE))] | |
2510 | "TARGET_SVE" | |
2511 | { | |
2512 | operands[2] = force_reg (<VPRED>mode, CONSTM1_RTX (<VPRED>mode)); | |
2513 | } | |
2514 | ) | |
2515 | ||
2516 | ;; FNEG, FABS and FSQRT predicated with a PTRUE. | |
2517 | (define_insn "*<optab><mode>2" | |
2518 | [(set (match_operand:SVE_F 0 "register_operand" "=w") | |
2519 | (unspec:SVE_F | |
2520 | [(match_operand:<VPRED> 1 "register_operand" "Upl") | |
2521 | (SVE_FP_UNARY:SVE_F (match_operand:SVE_F 2 "register_operand" "w"))] | |
2522 | UNSPEC_MERGE_PTRUE))] | |
2523 | "TARGET_SVE" | |
2524 | "<sve_fp_op>\t%0.<Vetype>, %1/m, %2.<Vetype>" | |
2525 | ) | |
2526 | ||
2527 | ;; Unpredicated FRINTy. | |
2528 | (define_expand "<frint_pattern><mode>2" | |
2529 | [(set (match_operand:SVE_F 0 "register_operand") | |
2530 | (unspec:SVE_F | |
2531 | [(match_dup 2) | |
2532 | (unspec:SVE_F [(match_operand:SVE_F 1 "register_operand")] | |
2533 | FRINT)] | |
2534 | UNSPEC_MERGE_PTRUE))] | |
2535 | "TARGET_SVE" | |
2536 | { | |
2537 | operands[2] = force_reg (<VPRED>mode, CONSTM1_RTX (<VPRED>mode)); | |
2538 | } | |
2539 | ) | |
2540 | ||
2541 | ;; FRINTy predicated with a PTRUE. | |
2542 | (define_insn "*<frint_pattern><mode>2" | |
2543 | [(set (match_operand:SVE_F 0 "register_operand" "=w") | |
2544 | (unspec:SVE_F | |
2545 | [(match_operand:<VPRED> 1 "register_operand" "Upl") | |
2546 | (unspec:SVE_F [(match_operand:SVE_F 2 "register_operand" "w")] | |
2547 | FRINT)] | |
2548 | UNSPEC_MERGE_PTRUE))] | |
2549 | "TARGET_SVE" | |
2550 | "frint<frint_suffix>\t%0.<Vetype>, %1/m, %2.<Vetype>" | |
2551 | ) | |
2552 | ||
2553 | ;; Unpredicated conversion of floats to integers of the same size (HF to HI, | |
2554 | ;; SF to SI or DF to DI). | |
2555 | (define_expand "<fix_trunc_optab><mode><v_int_equiv>2" | |
2556 | [(set (match_operand:<V_INT_EQUIV> 0 "register_operand") | |
2557 | (unspec:<V_INT_EQUIV> | |
2558 | [(match_dup 2) | |
2559 | (FIXUORS:<V_INT_EQUIV> | |
2560 | (match_operand:SVE_F 1 "register_operand"))] | |
2561 | UNSPEC_MERGE_PTRUE))] | |
2562 | "TARGET_SVE" | |
2563 | { | |
2564 | operands[2] = force_reg (<VPRED>mode, CONSTM1_RTX (<VPRED>mode)); | |
2565 | } | |
2566 | ) | |
2567 | ||
2568 | ;; Conversion of SF to DI, SI or HI, predicated with a PTRUE. | |
2569 | (define_insn "*<fix_trunc_optab>v16hsf<mode>2" | |
2570 | [(set (match_operand:SVE_HSDI 0 "register_operand" "=w") | |
2571 | (unspec:SVE_HSDI | |
2572 | [(match_operand:<VPRED> 1 "register_operand" "Upl") | |
2573 | (FIXUORS:SVE_HSDI | |
2574 | (match_operand:VNx8HF 2 "register_operand" "w"))] | |
2575 | UNSPEC_MERGE_PTRUE))] | |
2576 | "TARGET_SVE" | |
2577 | "fcvtz<su>\t%0.<Vetype>, %1/m, %2.h" | |
2578 | ) | |
2579 | ||
2580 | ;; Conversion of SF to DI or SI, predicated with a PTRUE. | |
2581 | (define_insn "*<fix_trunc_optab>vnx4sf<mode>2" | |
2582 | [(set (match_operand:SVE_SDI 0 "register_operand" "=w") | |
2583 | (unspec:SVE_SDI | |
2584 | [(match_operand:<VPRED> 1 "register_operand" "Upl") | |
2585 | (FIXUORS:SVE_SDI | |
2586 | (match_operand:VNx4SF 2 "register_operand" "w"))] | |
2587 | UNSPEC_MERGE_PTRUE))] | |
2588 | "TARGET_SVE" | |
2589 | "fcvtz<su>\t%0.<Vetype>, %1/m, %2.s" | |
2590 | ) | |
2591 | ||
2592 | ;; Conversion of DF to DI or SI, predicated with a PTRUE. | |
2593 | (define_insn "*<fix_trunc_optab>vnx2df<mode>2" | |
2594 | [(set (match_operand:SVE_SDI 0 "register_operand" "=w") | |
2595 | (unspec:SVE_SDI | |
2596 | [(match_operand:VNx2BI 1 "register_operand" "Upl") | |
2597 | (FIXUORS:SVE_SDI | |
2598 | (match_operand:VNx2DF 2 "register_operand" "w"))] | |
2599 | UNSPEC_MERGE_PTRUE))] | |
2600 | "TARGET_SVE" | |
2601 | "fcvtz<su>\t%0.<Vetype>, %1/m, %2.d" | |
2602 | ) | |
2603 | ||
2604 | ;; Unpredicated conversion of integers to floats of the same size | |
2605 | ;; (HI to HF, SI to SF or DI to DF). | |
2606 | (define_expand "<optab><v_int_equiv><mode>2" | |
2607 | [(set (match_operand:SVE_F 0 "register_operand") | |
2608 | (unspec:SVE_F | |
2609 | [(match_dup 2) | |
2610 | (FLOATUORS:SVE_F | |
2611 | (match_operand:<V_INT_EQUIV> 1 "register_operand"))] | |
2612 | UNSPEC_MERGE_PTRUE))] | |
2613 | "TARGET_SVE" | |
2614 | { | |
2615 | operands[2] = force_reg (<VPRED>mode, CONSTM1_RTX (<VPRED>mode)); | |
2616 | } | |
2617 | ) | |
2618 | ||
2619 | ;; Conversion of DI, SI or HI to the same number of HFs, predicated | |
2620 | ;; with a PTRUE. | |
2621 | (define_insn "*<optab><mode>vnx8hf2" | |
2622 | [(set (match_operand:VNx8HF 0 "register_operand" "=w") | |
2623 | (unspec:VNx8HF | |
2624 | [(match_operand:<VPRED> 1 "register_operand" "Upl") | |
2625 | (FLOATUORS:VNx8HF | |
2626 | (match_operand:SVE_HSDI 2 "register_operand" "w"))] | |
2627 | UNSPEC_MERGE_PTRUE))] | |
2628 | "TARGET_SVE" | |
2629 | "<su_optab>cvtf\t%0.h, %1/m, %2.<Vetype>" | |
2630 | ) | |
2631 | ||
2632 | ;; Conversion of DI or SI to the same number of SFs, predicated with a PTRUE. | |
2633 | (define_insn "*<optab><mode>vnx4sf2" | |
2634 | [(set (match_operand:VNx4SF 0 "register_operand" "=w") | |
2635 | (unspec:VNx4SF | |
2636 | [(match_operand:<VPRED> 1 "register_operand" "Upl") | |
2637 | (FLOATUORS:VNx4SF | |
2638 | (match_operand:SVE_SDI 2 "register_operand" "w"))] | |
2639 | UNSPEC_MERGE_PTRUE))] | |
2640 | "TARGET_SVE" | |
2641 | "<su_optab>cvtf\t%0.s, %1/m, %2.<Vetype>" | |
2642 | ) | |
2643 | ||
2644 | ;; Conversion of DI or SI to DF, predicated with a PTRUE. | |
88e81b08 | 2645 | (define_insn "aarch64_sve_<optab><mode>vnx2df2" |
8fa7f434 | 2646 | [(set (match_operand:VNx2DF 0 "register_operand" "=w") |
2647 | (unspec:VNx2DF | |
2648 | [(match_operand:VNx2BI 1 "register_operand" "Upl") | |
2649 | (FLOATUORS:VNx2DF | |
2650 | (match_operand:SVE_SDI 2 "register_operand" "w"))] | |
2651 | UNSPEC_MERGE_PTRUE))] | |
2652 | "TARGET_SVE" | |
2653 | "<su_optab>cvtf\t%0.d, %1/m, %2.<Vetype>" | |
2654 | ) | |
2655 | ||
2656 | ;; Conversion of DFs to the same number of SFs, or SFs to the same number | |
2657 | ;; of HFs. | |
2658 | (define_insn "*trunc<Vwide><mode>2" | |
2659 | [(set (match_operand:SVE_HSF 0 "register_operand" "=w") | |
2660 | (unspec:SVE_HSF | |
2661 | [(match_operand:<VWIDE_PRED> 1 "register_operand" "Upl") | |
2662 | (unspec:SVE_HSF | |
2663 | [(match_operand:<VWIDE> 2 "register_operand" "w")] | |
2664 | UNSPEC_FLOAT_CONVERT)] | |
2665 | UNSPEC_MERGE_PTRUE))] | |
2666 | "TARGET_SVE" | |
2667 | "fcvt\t%0.<Vetype>, %1/m, %2.<Vewtype>" | |
2668 | ) | |
2669 | ||
2670 | ;; Conversion of SFs to the same number of DFs, or HFs to the same number | |
2671 | ;; of SFs. | |
88e81b08 | 2672 | (define_insn "aarch64_sve_extend<mode><Vwide>2" |
8fa7f434 | 2673 | [(set (match_operand:<VWIDE> 0 "register_operand" "=w") |
2674 | (unspec:<VWIDE> | |
2675 | [(match_operand:<VWIDE_PRED> 1 "register_operand" "Upl") | |
2676 | (unspec:<VWIDE> | |
2677 | [(match_operand:SVE_HSF 2 "register_operand" "w")] | |
2678 | UNSPEC_FLOAT_CONVERT)] | |
2679 | UNSPEC_MERGE_PTRUE))] | |
2680 | "TARGET_SVE" | |
2681 | "fcvt\t%0.<Vewtype>, %1/m, %2.<Vetype>" | |
2682 | ) | |
2683 | ||
88e81b08 | 2684 | ;; Unpack the low or high half of a predicate, where "high" refers to |
2685 | ;; the low-numbered lanes for big-endian and the high-numbered lanes | |
2686 | ;; for little-endian. | |
2687 | (define_expand "vec_unpack<su>_<perm_hilo>_<mode>" | |
2688 | [(match_operand:<VWIDE> 0 "register_operand") | |
2689 | (unspec:<VWIDE> [(match_operand:PRED_BHS 1 "register_operand")] | |
2690 | UNPACK)] | |
2691 | "TARGET_SVE" | |
2692 | { | |
2693 | emit_insn ((<hi_lanes_optab> | |
2694 | ? gen_aarch64_sve_punpkhi_<PRED_BHS:mode> | |
2695 | : gen_aarch64_sve_punpklo_<PRED_BHS:mode>) | |
2696 | (operands[0], operands[1])); | |
2697 | DONE; | |
2698 | } | |
2699 | ) | |
2700 | ||
8fa7f434 | 2701 | ;; PUNPKHI and PUNPKLO. |
88e81b08 | 2702 | (define_insn "aarch64_sve_punpk<perm_hilo>_<mode>" |
8fa7f434 | 2703 | [(set (match_operand:<VWIDE> 0 "register_operand" "=Upa") |
2704 | (unspec:<VWIDE> [(match_operand:PRED_BHS 1 "register_operand" "Upa")] | |
88e81b08 | 2705 | UNPACK_UNSIGNED))] |
8fa7f434 | 2706 | "TARGET_SVE" |
2707 | "punpk<perm_hilo>\t%0.h, %1.b" | |
2708 | ) | |
2709 | ||
88e81b08 | 2710 | ;; Unpack the low or high half of a vector, where "high" refers to |
2711 | ;; the low-numbered lanes for big-endian and the high-numbered lanes | |
2712 | ;; for little-endian. | |
2713 | (define_expand "vec_unpack<su>_<perm_hilo>_<SVE_BHSI:mode>" | |
2714 | [(match_operand:<VWIDE> 0 "register_operand") | |
2715 | (unspec:<VWIDE> [(match_operand:SVE_BHSI 1 "register_operand")] UNPACK)] | |
2716 | "TARGET_SVE" | |
2717 | { | |
2718 | emit_insn ((<hi_lanes_optab> | |
2719 | ? gen_aarch64_sve_<su>unpkhi_<SVE_BHSI:mode> | |
2720 | : gen_aarch64_sve_<su>unpklo_<SVE_BHSI:mode>) | |
2721 | (operands[0], operands[1])); | |
2722 | DONE; | |
2723 | } | |
2724 | ) | |
2725 | ||
8fa7f434 | 2726 | ;; SUNPKHI, UUNPKHI, SUNPKLO and UUNPKLO. |
88e81b08 | 2727 | (define_insn "aarch64_sve_<su>unpk<perm_hilo>_<SVE_BHSI:mode>" |
8fa7f434 | 2728 | [(set (match_operand:<VWIDE> 0 "register_operand" "=w") |
2729 | (unspec:<VWIDE> [(match_operand:SVE_BHSI 1 "register_operand" "w")] | |
2730 | UNPACK))] | |
2731 | "TARGET_SVE" | |
2732 | "<su>unpk<perm_hilo>\t%0.<Vewtype>, %1.<Vetype>" | |
2733 | ) | |
2734 | ||
8fa7f434 | 2735 | ;; Unpack one half of a VNx4SF to VNx2DF, or one half of a VNx8HF to VNx4SF. |
2736 | ;; First unpack the source without conversion, then float-convert the | |
2737 | ;; unpacked source. | |
2738 | (define_expand "vec_unpacks_<perm_hilo>_<mode>" | |
88e81b08 | 2739 | [(match_operand:<VWIDE> 0 "register_operand") |
2740 | (unspec:SVE_HSF [(match_operand:SVE_HSF 1 "register_operand")] | |
2741 | UNPACK_UNSIGNED)] | |
8fa7f434 | 2742 | "TARGET_SVE" |
2743 | { | |
88e81b08 | 2744 | /* Use ZIP to do the unpack, since we don't care about the upper halves |
2745 | and since it has the nice property of not needing any subregs. | |
2746 | If using UUNPK* turns out to be preferable, we could model it as | |
2747 | a ZIP whose first operand is zero. */ | |
2748 | rtx temp = gen_reg_rtx (<MODE>mode); | |
2749 | emit_insn ((<hi_lanes_optab> | |
2750 | ? gen_aarch64_sve_zip2<mode> | |
2751 | : gen_aarch64_sve_zip1<mode>) | |
2752 | (temp, operands[1], operands[1])); | |
2753 | rtx ptrue = force_reg (<VWIDE_PRED>mode, CONSTM1_RTX (<VWIDE_PRED>mode)); | |
2754 | emit_insn (gen_aarch64_sve_extend<mode><Vwide>2 (operands[0], | |
2755 | ptrue, temp)); | |
2756 | DONE; | |
8fa7f434 | 2757 | } |
2758 | ) | |
2759 | ||
2760 | ;; Unpack one half of a VNx4SI to VNx2DF. First unpack from VNx4SI | |
2761 | ;; to VNx2DI, reinterpret the VNx2DI as a VNx4SI, then convert the | |
2762 | ;; unpacked VNx4SI to VNx2DF. | |
2763 | (define_expand "vec_unpack<su_optab>_float_<perm_hilo>_vnx4si" | |
88e81b08 | 2764 | [(match_operand:VNx2DF 0 "register_operand") |
2765 | (FLOATUORS:VNx2DF | |
2766 | (unspec:VNx2DI [(match_operand:VNx4SI 1 "register_operand")] | |
2767 | UNPACK_UNSIGNED))] | |
2768 | "TARGET_SVE" | |
2769 | { | |
2770 | /* Use ZIP to do the unpack, since we don't care about the upper halves | |
2771 | and since it has the nice property of not needing any subregs. | |
2772 | If using UUNPK* turns out to be preferable, we could model it as | |
2773 | a ZIP whose first operand is zero. */ | |
2774 | rtx temp = gen_reg_rtx (VNx4SImode); | |
2775 | emit_insn ((<hi_lanes_optab> | |
2776 | ? gen_aarch64_sve_zip2vnx4si | |
2777 | : gen_aarch64_sve_zip1vnx4si) | |
2778 | (temp, operands[1], operands[1])); | |
2779 | rtx ptrue = force_reg (VNx2BImode, CONSTM1_RTX (VNx2BImode)); | |
2780 | emit_insn (gen_aarch64_sve_<FLOATUORS:optab>vnx4sivnx2df2 (operands[0], | |
2781 | ptrue, temp)); | |
2782 | DONE; | |
8fa7f434 | 2783 | } |
2784 | ) | |
2785 | ||
2786 | ;; Predicate pack. Use UZP1 on the narrower type, which discards | |
2787 | ;; the high part of each wide element. | |
2788 | (define_insn "vec_pack_trunc_<Vwide>" | |
2789 | [(set (match_operand:PRED_BHS 0 "register_operand" "=Upa") | |
2790 | (unspec:PRED_BHS | |
2791 | [(match_operand:<VWIDE> 1 "register_operand" "Upa") | |
2792 | (match_operand:<VWIDE> 2 "register_operand" "Upa")] | |
2793 | UNSPEC_PACK))] | |
2794 | "TARGET_SVE" | |
2795 | "uzp1\t%0.<Vetype>, %1.<Vetype>, %2.<Vetype>" | |
2796 | ) | |
2797 | ||
2798 | ;; Integer pack. Use UZP1 on the narrower type, which discards | |
2799 | ;; the high part of each wide element. | |
2800 | (define_insn "vec_pack_trunc_<Vwide>" | |
2801 | [(set (match_operand:SVE_BHSI 0 "register_operand" "=w") | |
2802 | (unspec:SVE_BHSI | |
2803 | [(match_operand:<VWIDE> 1 "register_operand" "w") | |
2804 | (match_operand:<VWIDE> 2 "register_operand" "w")] | |
2805 | UNSPEC_PACK))] | |
2806 | "TARGET_SVE" | |
2807 | "uzp1\t%0.<Vetype>, %1.<Vetype>, %2.<Vetype>" | |
2808 | ) | |
2809 | ||
2810 | ;; Convert two vectors of DF to SF, or two vectors of SF to HF, and pack | |
2811 | ;; the results into a single vector. | |
2812 | (define_expand "vec_pack_trunc_<Vwide>" | |
2813 | [(set (match_dup 4) | |
2814 | (unspec:SVE_HSF | |
2815 | [(match_dup 3) | |
2816 | (unspec:SVE_HSF [(match_operand:<VWIDE> 1 "register_operand")] | |
2817 | UNSPEC_FLOAT_CONVERT)] | |
2818 | UNSPEC_MERGE_PTRUE)) | |
2819 | (set (match_dup 5) | |
2820 | (unspec:SVE_HSF | |
2821 | [(match_dup 3) | |
2822 | (unspec:SVE_HSF [(match_operand:<VWIDE> 2 "register_operand")] | |
2823 | UNSPEC_FLOAT_CONVERT)] | |
2824 | UNSPEC_MERGE_PTRUE)) | |
2825 | (set (match_operand:SVE_HSF 0 "register_operand") | |
2826 | (unspec:SVE_HSF [(match_dup 4) (match_dup 5)] UNSPEC_UZP1))] | |
2827 | "TARGET_SVE" | |
2828 | { | |
2829 | operands[3] = force_reg (<VWIDE_PRED>mode, CONSTM1_RTX (<VWIDE_PRED>mode)); | |
2830 | operands[4] = gen_reg_rtx (<MODE>mode); | |
2831 | operands[5] = gen_reg_rtx (<MODE>mode); | |
2832 | } | |
2833 | ) | |
2834 | ||
2835 | ;; Convert two vectors of DF to SI and pack the results into a single vector. | |
2836 | (define_expand "vec_pack_<su>fix_trunc_vnx2df" | |
2837 | [(set (match_dup 4) | |
2838 | (unspec:VNx4SI | |
2839 | [(match_dup 3) | |
2840 | (FIXUORS:VNx4SI (match_operand:VNx2DF 1 "register_operand"))] | |
2841 | UNSPEC_MERGE_PTRUE)) | |
2842 | (set (match_dup 5) | |
2843 | (unspec:VNx4SI | |
2844 | [(match_dup 3) | |
2845 | (FIXUORS:VNx4SI (match_operand:VNx2DF 2 "register_operand"))] | |
2846 | UNSPEC_MERGE_PTRUE)) | |
2847 | (set (match_operand:VNx4SI 0 "register_operand") | |
2848 | (unspec:VNx4SI [(match_dup 4) (match_dup 5)] UNSPEC_UZP1))] | |
2849 | "TARGET_SVE" | |
2850 | { | |
2851 | operands[3] = force_reg (VNx2BImode, CONSTM1_RTX (VNx2BImode)); | |
2852 | operands[4] = gen_reg_rtx (VNx4SImode); | |
2853 | operands[5] = gen_reg_rtx (VNx4SImode); | |
2854 | } | |
2855 | ) | |
633af029 | 2856 | |
47c52435 | 2857 | ;; Predicated floating-point operations with select. |
2858 | (define_expand "cond_<optab><mode>" | |
2859 | [(set (match_operand:SVE_F 0 "register_operand") | |
2860 | (unspec:SVE_F | |
2861 | [(match_operand:<VPRED> 1 "register_operand") | |
2862 | (unspec:SVE_F | |
83d1ca63 | 2863 | [(match_operand:SVE_F 2 "register_operand") |
47c52435 | 2864 | (match_operand:SVE_F 3 "register_operand")] |
2865 | SVE_COND_FP_BINARY) | |
83d1ca63 | 2866 | (match_operand:SVE_F 4 "aarch64_simd_reg_or_zero")] |
47c52435 | 2867 | UNSPEC_SEL))] |
2868 | "TARGET_SVE" | |
83d1ca63 | 2869 | ) |
47c52435 | 2870 | |
83d1ca63 | 2871 | ;; Predicated floating-point operations with select matching output. |
2872 | (define_insn "*cond_<optab><mode>_0" | |
2873 | [(set (match_operand:SVE_F 0 "register_operand" "+w, w, ?&w") | |
2874 | (unspec:SVE_F | |
2875 | [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl") | |
2876 | (unspec:SVE_F | |
2877 | [(match_operand:SVE_F 2 "register_operand" "0, w, w") | |
2878 | (match_operand:SVE_F 3 "register_operand" "w, 0, w")] | |
2879 | SVE_COND_FP_BINARY) | |
2880 | (match_dup 0)] | |
2881 | UNSPEC_SEL))] | |
2882 | "TARGET_SVE" | |
2883 | "@ | |
2884 | <sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype> | |
2885 | <sve_fp_op_rev>\t%0.<Vetype>, %1/m, %0.<Vetype>, %2.<Vetype> | |
2886 | movprfx\t%0, %1/m, %2\;<sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>" | |
2887 | [(set_attr "movprfx" "*,*,yes")] | |
2888 | ) | |
2889 | ||
2890 | ;; Predicated floating-point operations with select matching first operand. | |
2891 | (define_insn "*cond_<optab><mode>_2" | |
2892 | [(set (match_operand:SVE_F 0 "register_operand" "=w, ?&w") | |
2893 | (unspec:SVE_F | |
2894 | [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl") | |
2895 | (unspec:SVE_F | |
2896 | [(match_operand:SVE_F 2 "register_operand" "0, w") | |
2897 | (match_operand:SVE_F 3 "register_operand" "w, w")] | |
2898 | SVE_COND_FP_BINARY) | |
2899 | (match_dup 2)] | |
2900 | UNSPEC_SEL))] | |
2901 | "TARGET_SVE" | |
2902 | "@ | |
2903 | <sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype> | |
2904 | movprfx\t%0, %2\;<sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>" | |
2905 | [(set_attr "movprfx" "*,yes")] | |
2906 | ) | |
2907 | ||
2908 | ;; Predicated floating-point operations with select matching second operand. | |
2909 | (define_insn "*cond_<optab><mode>_3" | |
2910 | [(set (match_operand:SVE_F 0 "register_operand" "=w, ?&w") | |
2911 | (unspec:SVE_F | |
2912 | [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl") | |
2913 | (unspec:SVE_F | |
2914 | [(match_operand:SVE_F 2 "register_operand" "w, w") | |
2915 | (match_operand:SVE_F 3 "register_operand" "0, w")] | |
2916 | SVE_COND_FP_BINARY) | |
2917 | (match_dup 3)] | |
2918 | UNSPEC_SEL))] | |
2919 | "TARGET_SVE" | |
2920 | "@ | |
2921 | <sve_fp_op_rev>\t%0.<Vetype>, %1/m, %0.<Vetype>, %2.<Vetype> | |
2922 | movprfx\t%0, %3\;<sve_fp_op_rev>\t%0.<Vetype>, %1/m, %0.<Vetype>, %2.<Vetype>" | |
2923 | [(set_attr "movprfx" "*,yes")] | |
2924 | ) | |
2925 | ||
2926 | ;; Predicated floating-point operations with select matching zero. | |
2927 | (define_insn "*cond_<optab><mode>_z" | |
2928 | [(set (match_operand:SVE_F 0 "register_operand" "=&w") | |
88fefa8f | 2929 | (unspec:SVE_F |
2930 | [(match_operand:<VPRED> 1 "register_operand" "Upl") | |
47c52435 | 2931 | (unspec:SVE_F |
83d1ca63 | 2932 | [(match_operand:SVE_F 2 "register_operand" "w") |
47c52435 | 2933 | (match_operand:SVE_F 3 "register_operand" "w")] |
2934 | SVE_COND_FP_BINARY) | |
83d1ca63 | 2935 | (match_operand:SVE_F 4 "aarch64_simd_imm_zero")] |
47c52435 | 2936 | UNSPEC_SEL))] |
88fefa8f | 2937 | "TARGET_SVE" |
83d1ca63 | 2938 | "movprfx\t%0.<Vetype>, %1/z, %2.<Vetype>\;<sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>" |
2939 | [(set_attr "movprfx" "yes")] | |
88fefa8f | 2940 | ) |
2941 | ||
83d1ca63 | 2942 | ;; Synthetic predication of floating-point operations with select unmatched. |
2943 | (define_insn_and_split "*cond_<optab><mode>_any" | |
2944 | [(set (match_operand:SVE_F 0 "register_operand" "=&w") | |
dbc7e6ae | 2945 | (unspec:SVE_F |
2946 | [(match_operand:<VPRED> 1 "register_operand" "Upl") | |
2947 | (unspec:SVE_F | |
83d1ca63 | 2948 | [(match_operand:SVE_F 2 "register_operand" "w") |
2949 | (match_operand:SVE_F 3 "register_operand" "w")] | |
dbc7e6ae | 2950 | SVE_COND_FP_BINARY) |
83d1ca63 | 2951 | (match_operand:SVE_F 4 "register_operand" "w")] |
dbc7e6ae | 2952 | UNSPEC_SEL))] |
2953 | "TARGET_SVE" | |
83d1ca63 | 2954 | "#" |
2955 | "&& reload_completed | |
2956 | && !(rtx_equal_p (operands[0], operands[4]) | |
2957 | || rtx_equal_p (operands[2], operands[4]) | |
2958 | || rtx_equal_p (operands[3], operands[4]))" | |
2959 | ; Not matchable by any one insn or movprfx insn. We need a separate select. | |
2960 | [(set (match_dup 0) | |
2961 | (unspec:SVE_F [(match_dup 1) (match_dup 2) (match_dup 4)] UNSPEC_SEL)) | |
2962 | (set (match_dup 0) | |
2963 | (unspec:SVE_F | |
2964 | [(match_dup 1) | |
2965 | (unspec:SVE_F [(match_dup 0) (match_dup 3)] SVE_COND_FP_BINARY) | |
2966 | (match_dup 0)] | |
2967 | UNSPEC_SEL))] | |
dbc7e6ae | 2968 | ) |
2969 | ||
6682fc02 | 2970 | ;; Predicated floating-point ternary operations with select. |
2971 | (define_expand "cond_<optab><mode>" | |
2972 | [(set (match_operand:SVE_F 0 "register_operand") | |
2973 | (unspec:SVE_F | |
2974 | [(match_operand:<VPRED> 1 "register_operand") | |
2975 | (unspec:SVE_F | |
2976 | [(match_operand:SVE_F 2 "register_operand") | |
2977 | (match_operand:SVE_F 3 "register_operand") | |
2978 | (match_operand:SVE_F 4 "register_operand")] | |
2979 | SVE_COND_FP_TERNARY) | |
2980 | (match_operand:SVE_F 5 "aarch64_simd_reg_or_zero")] | |
2981 | UNSPEC_SEL))] | |
2982 | "TARGET_SVE" | |
2983 | { | |
2984 | /* Swap the multiplication operands if the fallback value is the | |
2985 | second of the two. */ | |
2986 | if (rtx_equal_p (operands[3], operands[5])) | |
2987 | std::swap (operands[2], operands[3]); | |
2988 | }) | |
2989 | ||
2990 | ;; Predicated floating-point ternary operations using the FMAD-like form. | |
2991 | (define_insn "*cond_<optab><mode>_2" | |
2992 | [(set (match_operand:SVE_F 0 "register_operand" "=w, ?&w") | |
2993 | (unspec:SVE_F | |
2994 | [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl") | |
2995 | (unspec:SVE_F | |
2996 | [(match_operand:SVE_F 2 "register_operand" "0, w") | |
2997 | (match_operand:SVE_F 3 "register_operand" "w, w") | |
2998 | (match_operand:SVE_F 4 "register_operand" "w, w")] | |
2999 | SVE_COND_FP_TERNARY) | |
3000 | (match_dup 2)] | |
3001 | UNSPEC_SEL))] | |
3002 | "TARGET_SVE" | |
3003 | "@ | |
3004 | <sve_fmad_op>\t%0.<Vetype>, %1/m, %3.<Vetype>, %4.<Vetype> | |
3005 | movprfx\t%0, %2\;<sve_fmad_op>\t%0.<Vetype>, %1/m, %3.<Vetype>, %4.<Vetype>" | |
3006 | [(set_attr "movprfx" "*,yes")] | |
3007 | ) | |
3008 | ||
3009 | ;; Predicated floating-point ternary operations using the FMLA-like form. | |
3010 | (define_insn "*cond_<optab><mode>_4" | |
3011 | [(set (match_operand:SVE_F 0 "register_operand" "=w, ?&w") | |
3012 | (unspec:SVE_F | |
3013 | [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl") | |
3014 | (unspec:SVE_F | |
3015 | [(match_operand:SVE_F 2 "register_operand" "w, w") | |
3016 | (match_operand:SVE_F 3 "register_operand" "w, w") | |
3017 | (match_operand:SVE_F 4 "register_operand" "0, w")] | |
3018 | SVE_COND_FP_TERNARY) | |
3019 | (match_dup 4)] | |
3020 | UNSPEC_SEL))] | |
3021 | "TARGET_SVE" | |
3022 | "@ | |
3023 | <sve_fmla_op>\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype> | |
0f958fe3 | 3024 | movprfx\t%0, %4\;<sve_fmla_op>\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>" |
6682fc02 | 3025 | [(set_attr "movprfx" "*,yes")] |
3026 | ) | |
3027 | ||
3028 | ;; Predicated floating-point ternary operations in which the value for | |
3029 | ;; inactive lanes is distinct from the other inputs. | |
3030 | (define_insn_and_split "*cond_<optab><mode>_any" | |
3031 | [(set (match_operand:SVE_F 0 "register_operand" "=&w, &w, ?&w") | |
3032 | (unspec:SVE_F | |
3033 | [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl") | |
3034 | (unspec:SVE_F | |
3035 | [(match_operand:SVE_F 2 "register_operand" "w, w, w") | |
3036 | (match_operand:SVE_F 3 "register_operand" "w, w, w") | |
3037 | (match_operand:SVE_F 4 "register_operand" "w, w, w")] | |
3038 | SVE_COND_FP_TERNARY) | |
3039 | (match_operand:SVE_F 5 "aarch64_simd_reg_or_zero" "Dz, 0, w")] | |
3040 | UNSPEC_SEL))] | |
3041 | "TARGET_SVE | |
3042 | && !rtx_equal_p (operands[2], operands[5]) | |
3043 | && !rtx_equal_p (operands[3], operands[5]) | |
3044 | && !rtx_equal_p (operands[4], operands[5])" | |
3045 | "@ | |
3046 | movprfx\t%0.<Vetype>, %1/z, %4.<Vetype>\;<sve_fmla_op>\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype> | |
3047 | movprfx\t%0.<Vetype>, %1/m, %4.<Vetype>\;<sve_fmla_op>\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype> | |
3048 | #" | |
3049 | "&& reload_completed | |
3050 | && !CONSTANT_P (operands[5]) | |
3051 | && !rtx_equal_p (operands[0], operands[5])" | |
3052 | [(set (match_dup 0) | |
3053 | (unspec:SVE_F [(match_dup 1) (match_dup 4) (match_dup 5)] UNSPEC_SEL)) | |
3054 | (set (match_dup 0) | |
3055 | (unspec:SVE_F | |
3056 | [(match_dup 1) | |
3057 | (unspec:SVE_F [(match_dup 2) (match_dup 3) (match_dup 0)] | |
3058 | SVE_COND_FP_TERNARY) | |
3059 | (match_dup 0)] | |
3060 | UNSPEC_SEL))] | |
3061 | "" | |
3062 | [(set_attr "movprfx" "yes")] | |
3063 | ) | |
3064 | ||
633af029 | 3065 | ;; Shift an SVE vector left and insert a scalar into element 0. |
3066 | (define_insn "vec_shl_insert_<mode>" | |
3067 | [(set (match_operand:SVE_ALL 0 "register_operand" "=w, w") | |
3068 | (unspec:SVE_ALL | |
3069 | [(match_operand:SVE_ALL 1 "register_operand" "0, 0") | |
3070 | (match_operand:<VEL> 2 "register_operand" "rZ, w")] | |
3071 | UNSPEC_INSR))] | |
3072 | "TARGET_SVE" | |
3073 | "@ | |
3074 | insr\t%0.<Vetype>, %<vwcore>2 | |
3075 | insr\t%0.<Vetype>, %<Vetype>2" | |
3076 | ) |