]>
Commit | Line | Data |
---|---|---|
8fa7f434 | 1 | ;; Machine description for AArch64 SVE. |
fbd26352 | 2 | ;; Copyright (C) 2009-2019 Free Software Foundation, Inc. |
8fa7f434 | 3 | ;; Contributed by ARM Ltd. |
4 | ;; | |
5 | ;; This file is part of GCC. | |
6 | ;; | |
7 | ;; GCC is free software; you can redistribute it and/or modify it | |
8 | ;; under the terms of the GNU General Public License as published by | |
9 | ;; the Free Software Foundation; either version 3, or (at your option) | |
10 | ;; any later version. | |
11 | ;; | |
12 | ;; GCC is distributed in the hope that it will be useful, but | |
13 | ;; WITHOUT ANY WARRANTY; without even the implied warranty of | |
14 | ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
15 | ;; General Public License for more details. | |
16 | ;; | |
17 | ;; You should have received a copy of the GNU General Public License | |
18 | ;; along with GCC; see the file COPYING3. If not see | |
19 | ;; <http://www.gnu.org/licenses/>. | |
20 | ||
21 | ;; Note on the handling of big-endian SVE | |
22 | ;; -------------------------------------- | |
23 | ;; | |
24 | ;; On big-endian systems, Advanced SIMD mov<mode> patterns act in the | |
25 | ;; same way as movdi or movti would: the first byte of memory goes | |
26 | ;; into the most significant byte of the register and the last byte | |
27 | ;; of memory goes into the least significant byte of the register. | |
28 | ;; This is the most natural ordering for Advanced SIMD and matches | |
29 | ;; the ABI layout for 64-bit and 128-bit vector types. | |
30 | ;; | |
31 | ;; As a result, the order of bytes within the register is what GCC | |
32 | ;; expects for a big-endian target, and subreg offsets therefore work | |
33 | ;; as expected, with the first element in memory having subreg offset 0 | |
34 | ;; and the last element in memory having the subreg offset associated | |
35 | ;; with a big-endian lowpart. However, this ordering also means that | |
36 | ;; GCC's lane numbering does not match the architecture's numbering: | |
37 | ;; GCC always treats the element at the lowest address in memory | |
38 | ;; (subreg offset 0) as element 0, while the architecture treats | |
39 | ;; the least significant end of the register as element 0. | |
40 | ;; | |
41 | ;; The situation for SVE is different. We want the layout of the | |
42 | ;; SVE register to be same for mov<mode> as it is for maskload<mode>: | |
43 | ;; logically, a mov<mode> load must be indistinguishable from a | |
44 | ;; maskload<mode> whose mask is all true. We therefore need the | |
45 | ;; register layout to match LD1 rather than LDR. The ABI layout of | |
46 | ;; SVE types also matches LD1 byte ordering rather than LDR byte ordering. | |
47 | ;; | |
48 | ;; As a result, the architecture lane numbering matches GCC's lane | |
49 | ;; numbering, with element 0 always being the first in memory. | |
50 | ;; However: | |
51 | ;; | |
52 | ;; - Applying a subreg offset to a register does not give the element | |
53 | ;; that GCC expects: the first element in memory has the subreg offset | |
54 | ;; associated with a big-endian lowpart while the last element in memory | |
55 | ;; has subreg offset 0. We handle this via TARGET_CAN_CHANGE_MODE_CLASS. | |
56 | ;; | |
57 | ;; - We cannot use LDR and STR for spill slots that might be accessed | |
58 | ;; via subregs, since although the elements have the order GCC expects, | |
59 | ;; the order of the bytes within the elements is different. We instead | |
60 | ;; access spill slots via LD1 and ST1, using secondary reloads to | |
61 | ;; reserve a predicate register. | |
62 | ||
63 | ||
64 | ;; SVE data moves. | |
65 | (define_expand "mov<mode>" | |
66 | [(set (match_operand:SVE_ALL 0 "nonimmediate_operand") | |
67 | (match_operand:SVE_ALL 1 "general_operand"))] | |
68 | "TARGET_SVE" | |
69 | { | |
70 | /* Use the predicated load and store patterns where possible. | |
71 | This is required for big-endian targets (see the comment at the | |
72 | head of the file) and increases the addressing choices for | |
73 | little-endian. */ | |
74 | if ((MEM_P (operands[0]) || MEM_P (operands[1])) | |
75 | && can_create_pseudo_p ()) | |
76 | { | |
77 | aarch64_expand_sve_mem_move (operands[0], operands[1], <VPRED>mode); | |
78 | DONE; | |
79 | } | |
80 | ||
81 | if (CONSTANT_P (operands[1])) | |
82 | { | |
83 | aarch64_expand_mov_immediate (operands[0], operands[1], | |
84 | gen_vec_duplicate<mode>); | |
85 | DONE; | |
86 | } | |
70857087 | 87 | |
88 | /* Optimize subregs on big-endian targets: we can use REV[BHW] | |
89 | instead of going through memory. */ | |
90 | if (BYTES_BIG_ENDIAN | |
91 | && aarch64_maybe_expand_sve_subreg_move (operands[0], operands[1])) | |
92 | DONE; | |
93 | } | |
94 | ) | |
95 | ||
96 | ;; A pattern for optimizing SUBREGs that have a reinterpreting effect | |
97 | ;; on big-endian targets; see aarch64_maybe_expand_sve_subreg_move | |
98 | ;; for details. We use a special predicate for operand 2 to reduce | |
99 | ;; the number of patterns. | |
100 | (define_insn_and_split "*aarch64_sve_mov<mode>_subreg_be" | |
101 | [(set (match_operand:SVE_ALL 0 "aarch64_sve_nonimmediate_operand" "=w") | |
102 | (unspec:SVE_ALL | |
103 | [(match_operand:VNx16BI 1 "register_operand" "Upl") | |
104 | (match_operand 2 "aarch64_any_register_operand" "w")] | |
105 | UNSPEC_REV_SUBREG))] | |
106 | "TARGET_SVE && BYTES_BIG_ENDIAN" | |
107 | "#" | |
108 | "&& reload_completed" | |
109 | [(const_int 0)] | |
110 | { | |
111 | aarch64_split_sve_subreg_move (operands[0], operands[1], operands[2]); | |
112 | DONE; | |
8fa7f434 | 113 | } |
114 | ) | |
115 | ||
116 | ;; Unpredicated moves (little-endian). Only allow memory operations | |
117 | ;; during and after RA; before RA we want the predicated load and | |
118 | ;; store patterns to be used instead. | |
119 | (define_insn "*aarch64_sve_mov<mode>_le" | |
120 | [(set (match_operand:SVE_ALL 0 "aarch64_sve_nonimmediate_operand" "=w, Utr, w, w") | |
121 | (match_operand:SVE_ALL 1 "aarch64_sve_general_operand" "Utr, w, w, Dn"))] | |
122 | "TARGET_SVE | |
123 | && !BYTES_BIG_ENDIAN | |
124 | && ((lra_in_progress || reload_completed) | |
125 | || (register_operand (operands[0], <MODE>mode) | |
126 | && nonmemory_operand (operands[1], <MODE>mode)))" | |
127 | "@ | |
128 | ldr\t%0, %1 | |
129 | str\t%1, %0 | |
130 | mov\t%0.d, %1.d | |
131 | * return aarch64_output_sve_mov_immediate (operands[1]);" | |
132 | ) | |
133 | ||
134 | ;; Unpredicated moves (big-endian). Memory accesses require secondary | |
135 | ;; reloads. | |
136 | (define_insn "*aarch64_sve_mov<mode>_be" | |
137 | [(set (match_operand:SVE_ALL 0 "register_operand" "=w, w") | |
138 | (match_operand:SVE_ALL 1 "aarch64_nonmemory_operand" "w, Dn"))] | |
139 | "TARGET_SVE && BYTES_BIG_ENDIAN" | |
140 | "@ | |
141 | mov\t%0.d, %1.d | |
142 | * return aarch64_output_sve_mov_immediate (operands[1]);" | |
143 | ) | |
144 | ||
145 | ;; Handle big-endian memory reloads. We use byte PTRUE for all modes | |
146 | ;; to try to encourage reuse. | |
147 | (define_expand "aarch64_sve_reload_be" | |
148 | [(parallel | |
149 | [(set (match_operand 0) | |
150 | (match_operand 1)) | |
151 | (clobber (match_operand:VNx16BI 2 "register_operand" "=Upl"))])] | |
152 | "TARGET_SVE && BYTES_BIG_ENDIAN" | |
153 | { | |
154 | /* Create a PTRUE. */ | |
155 | emit_move_insn (operands[2], CONSTM1_RTX (VNx16BImode)); | |
156 | ||
157 | /* Refer to the PTRUE in the appropriate mode for this move. */ | |
158 | machine_mode mode = GET_MODE (operands[0]); | |
159 | machine_mode pred_mode | |
160 | = aarch64_sve_pred_mode (GET_MODE_UNIT_SIZE (mode)).require (); | |
161 | rtx pred = gen_lowpart (pred_mode, operands[2]); | |
162 | ||
163 | /* Emit a predicated load or store. */ | |
164 | aarch64_emit_sve_pred_move (operands[0], pred, operands[1]); | |
165 | DONE; | |
166 | } | |
167 | ) | |
168 | ||
169 | ;; A predicated load or store for which the predicate is known to be | |
170 | ;; all-true. Note that this pattern is generated directly by | |
171 | ;; aarch64_emit_sve_pred_move, so changes to this pattern will | |
172 | ;; need changes there as well. | |
a7f0b0f6 | 173 | (define_insn_and_split "@aarch64_pred_mov<mode>" |
e31e24ee | 174 | [(set (match_operand:SVE_ALL 0 "nonimmediate_operand" "=w, w, m") |
8fa7f434 | 175 | (unspec:SVE_ALL |
e31e24ee | 176 | [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl") |
177 | (match_operand:SVE_ALL 2 "nonimmediate_operand" "w, m, w")] | |
8fa7f434 | 178 | UNSPEC_MERGE_PTRUE))] |
179 | "TARGET_SVE | |
180 | && (register_operand (operands[0], <MODE>mode) | |
181 | || register_operand (operands[2], <MODE>mode))" | |
182 | "@ | |
e31e24ee | 183 | # |
8fa7f434 | 184 | ld1<Vesize>\t%0.<Vetype>, %1/z, %2 |
185 | st1<Vesize>\t%2.<Vetype>, %1, %0" | |
e31e24ee | 186 | "&& register_operand (operands[0], <MODE>mode) |
187 | && register_operand (operands[2], <MODE>mode)" | |
188 | [(set (match_dup 0) (match_dup 2))] | |
8fa7f434 | 189 | ) |
190 | ||
191 | (define_expand "movmisalign<mode>" | |
192 | [(set (match_operand:SVE_ALL 0 "nonimmediate_operand") | |
193 | (match_operand:SVE_ALL 1 "general_operand"))] | |
194 | "TARGET_SVE" | |
195 | { | |
196 | /* Equivalent to a normal move for our purpooses. */ | |
197 | emit_move_insn (operands[0], operands[1]); | |
198 | DONE; | |
199 | } | |
200 | ) | |
201 | ||
202 | (define_insn "maskload<mode><vpred>" | |
203 | [(set (match_operand:SVE_ALL 0 "register_operand" "=w") | |
204 | (unspec:SVE_ALL | |
205 | [(match_operand:<VPRED> 2 "register_operand" "Upl") | |
206 | (match_operand:SVE_ALL 1 "memory_operand" "m")] | |
207 | UNSPEC_LD1_SVE))] | |
208 | "TARGET_SVE" | |
209 | "ld1<Vesize>\t%0.<Vetype>, %2/z, %1" | |
210 | ) | |
211 | ||
212 | (define_insn "maskstore<mode><vpred>" | |
213 | [(set (match_operand:SVE_ALL 0 "memory_operand" "+m") | |
214 | (unspec:SVE_ALL [(match_operand:<VPRED> 2 "register_operand" "Upl") | |
215 | (match_operand:SVE_ALL 1 "register_operand" "w") | |
216 | (match_dup 0)] | |
217 | UNSPEC_ST1_SVE))] | |
218 | "TARGET_SVE" | |
219 | "st1<Vesize>\t%1.<Vetype>, %2, %0" | |
220 | ) | |
221 | ||
1619606c | 222 | ;; Unpredicated gather loads. |
223 | (define_expand "gather_load<mode>" | |
224 | [(set (match_operand:SVE_SD 0 "register_operand") | |
225 | (unspec:SVE_SD | |
226 | [(match_dup 5) | |
227 | (match_operand:DI 1 "aarch64_reg_or_zero") | |
228 | (match_operand:<V_INT_EQUIV> 2 "register_operand") | |
229 | (match_operand:DI 3 "const_int_operand") | |
230 | (match_operand:DI 4 "aarch64_gather_scale_operand_<Vesize>") | |
231 | (mem:BLK (scratch))] | |
232 | UNSPEC_LD1_GATHER))] | |
233 | "TARGET_SVE" | |
234 | { | |
f8e1b9c1 | 235 | operands[5] = aarch64_ptrue_reg (<VPRED>mode); |
1619606c | 236 | } |
237 | ) | |
238 | ||
239 | ;; Predicated gather loads for 32-bit elements. Operand 3 is true for | |
240 | ;; unsigned extension and false for signed extension. | |
241 | (define_insn "mask_gather_load<mode>" | |
242 | [(set (match_operand:SVE_S 0 "register_operand" "=w, w, w, w, w") | |
243 | (unspec:SVE_S | |
244 | [(match_operand:<VPRED> 5 "register_operand" "Upl, Upl, Upl, Upl, Upl") | |
245 | (match_operand:DI 1 "aarch64_reg_or_zero" "Z, rk, rk, rk, rk") | |
246 | (match_operand:<V_INT_EQUIV> 2 "register_operand" "w, w, w, w, w") | |
247 | (match_operand:DI 3 "const_int_operand" "i, Z, Ui1, Z, Ui1") | |
248 | (match_operand:DI 4 "aarch64_gather_scale_operand_w" "Ui1, Ui1, Ui1, i, i") | |
249 | (mem:BLK (scratch))] | |
250 | UNSPEC_LD1_GATHER))] | |
251 | "TARGET_SVE" | |
252 | "@ | |
253 | ld1w\t%0.s, %5/z, [%2.s] | |
254 | ld1w\t%0.s, %5/z, [%1, %2.s, sxtw] | |
255 | ld1w\t%0.s, %5/z, [%1, %2.s, uxtw] | |
256 | ld1w\t%0.s, %5/z, [%1, %2.s, sxtw %p4] | |
257 | ld1w\t%0.s, %5/z, [%1, %2.s, uxtw %p4]" | |
258 | ) | |
259 | ||
260 | ;; Predicated gather loads for 64-bit elements. The value of operand 3 | |
261 | ;; doesn't matter in this case. | |
262 | (define_insn "mask_gather_load<mode>" | |
263 | [(set (match_operand:SVE_D 0 "register_operand" "=w, w, w") | |
264 | (unspec:SVE_D | |
265 | [(match_operand:<VPRED> 5 "register_operand" "Upl, Upl, Upl") | |
266 | (match_operand:DI 1 "aarch64_reg_or_zero" "Z, rk, rk") | |
267 | (match_operand:<V_INT_EQUIV> 2 "register_operand" "w, w, w") | |
268 | (match_operand:DI 3 "const_int_operand") | |
269 | (match_operand:DI 4 "aarch64_gather_scale_operand_d" "Ui1, Ui1, i") | |
270 | (mem:BLK (scratch))] | |
271 | UNSPEC_LD1_GATHER))] | |
272 | "TARGET_SVE" | |
273 | "@ | |
274 | ld1d\t%0.d, %5/z, [%2.d] | |
275 | ld1d\t%0.d, %5/z, [%1, %2.d] | |
276 | ld1d\t%0.d, %5/z, [%1, %2.d, lsl %p4]" | |
277 | ) | |
278 | ||
0bf8b382 | 279 | ;; Unpredicated scatter store. |
280 | (define_expand "scatter_store<mode>" | |
281 | [(set (mem:BLK (scratch)) | |
282 | (unspec:BLK | |
283 | [(match_dup 5) | |
284 | (match_operand:DI 0 "aarch64_reg_or_zero") | |
285 | (match_operand:<V_INT_EQUIV> 1 "register_operand") | |
286 | (match_operand:DI 2 "const_int_operand") | |
287 | (match_operand:DI 3 "aarch64_gather_scale_operand_<Vesize>") | |
288 | (match_operand:SVE_SD 4 "register_operand")] | |
289 | UNSPEC_ST1_SCATTER))] | |
290 | "TARGET_SVE" | |
291 | { | |
f8e1b9c1 | 292 | operands[5] = aarch64_ptrue_reg (<VPRED>mode); |
0bf8b382 | 293 | } |
294 | ) | |
295 | ||
296 | ;; Predicated scatter stores for 32-bit elements. Operand 2 is true for | |
297 | ;; unsigned extension and false for signed extension. | |
298 | (define_insn "mask_scatter_store<mode>" | |
299 | [(set (mem:BLK (scratch)) | |
300 | (unspec:BLK | |
301 | [(match_operand:<VPRED> 5 "register_operand" "Upl, Upl, Upl, Upl, Upl") | |
302 | (match_operand:DI 0 "aarch64_reg_or_zero" "Z, rk, rk, rk, rk") | |
303 | (match_operand:<V_INT_EQUIV> 1 "register_operand" "w, w, w, w, w") | |
304 | (match_operand:DI 2 "const_int_operand" "i, Z, Ui1, Z, Ui1") | |
305 | (match_operand:DI 3 "aarch64_gather_scale_operand_w" "Ui1, Ui1, Ui1, i, i") | |
306 | (match_operand:SVE_S 4 "register_operand" "w, w, w, w, w")] | |
307 | UNSPEC_ST1_SCATTER))] | |
308 | "TARGET_SVE" | |
309 | "@ | |
310 | st1w\t%4.s, %5, [%1.s] | |
311 | st1w\t%4.s, %5, [%0, %1.s, sxtw] | |
312 | st1w\t%4.s, %5, [%0, %1.s, uxtw] | |
313 | st1w\t%4.s, %5, [%0, %1.s, sxtw %p3] | |
314 | st1w\t%4.s, %5, [%0, %1.s, uxtw %p3]" | |
315 | ) | |
316 | ||
317 | ;; Predicated scatter stores for 64-bit elements. The value of operand 2 | |
318 | ;; doesn't matter in this case. | |
319 | (define_insn "mask_scatter_store<mode>" | |
320 | [(set (mem:BLK (scratch)) | |
321 | (unspec:BLK | |
322 | [(match_operand:<VPRED> 5 "register_operand" "Upl, Upl, Upl") | |
323 | (match_operand:DI 0 "aarch64_reg_or_zero" "Z, rk, rk") | |
324 | (match_operand:<V_INT_EQUIV> 1 "register_operand" "w, w, w") | |
325 | (match_operand:DI 2 "const_int_operand") | |
326 | (match_operand:DI 3 "aarch64_gather_scale_operand_d" "Ui1, Ui1, i") | |
327 | (match_operand:SVE_D 4 "register_operand" "w, w, w")] | |
328 | UNSPEC_ST1_SCATTER))] | |
329 | "TARGET_SVE" | |
330 | "@ | |
331 | st1d\t%4.d, %5, [%1.d] | |
332 | st1d\t%4.d, %5, [%0, %1.d] | |
333 | st1d\t%4.d, %5, [%0, %1.d, lsl %p3]" | |
334 | ) | |
335 | ||
0ac5a51b | 336 | ;; SVE structure moves. |
337 | (define_expand "mov<mode>" | |
338 | [(set (match_operand:SVE_STRUCT 0 "nonimmediate_operand") | |
339 | (match_operand:SVE_STRUCT 1 "general_operand"))] | |
340 | "TARGET_SVE" | |
341 | { | |
342 | /* Big-endian loads and stores need to be done via LD1 and ST1; | |
343 | see the comment at the head of the file for details. */ | |
344 | if ((MEM_P (operands[0]) || MEM_P (operands[1])) | |
345 | && BYTES_BIG_ENDIAN) | |
346 | { | |
347 | gcc_assert (can_create_pseudo_p ()); | |
348 | aarch64_expand_sve_mem_move (operands[0], operands[1], <VPRED>mode); | |
349 | DONE; | |
350 | } | |
351 | ||
352 | if (CONSTANT_P (operands[1])) | |
353 | { | |
354 | aarch64_expand_mov_immediate (operands[0], operands[1]); | |
355 | DONE; | |
356 | } | |
357 | } | |
358 | ) | |
359 | ||
360 | ;; Unpredicated structure moves (little-endian). | |
361 | (define_insn "*aarch64_sve_mov<mode>_le" | |
362 | [(set (match_operand:SVE_STRUCT 0 "aarch64_sve_nonimmediate_operand" "=w, Utr, w, w") | |
363 | (match_operand:SVE_STRUCT 1 "aarch64_sve_general_operand" "Utr, w, w, Dn"))] | |
364 | "TARGET_SVE && !BYTES_BIG_ENDIAN" | |
365 | "#" | |
366 | [(set_attr "length" "<insn_length>")] | |
367 | ) | |
368 | ||
369 | ;; Unpredicated structure moves (big-endian). Memory accesses require | |
370 | ;; secondary reloads. | |
371 | (define_insn "*aarch64_sve_mov<mode>_le" | |
372 | [(set (match_operand:SVE_STRUCT 0 "register_operand" "=w, w") | |
373 | (match_operand:SVE_STRUCT 1 "aarch64_nonmemory_operand" "w, Dn"))] | |
374 | "TARGET_SVE && BYTES_BIG_ENDIAN" | |
375 | "#" | |
376 | [(set_attr "length" "<insn_length>")] | |
377 | ) | |
378 | ||
379 | ;; Split unpredicated structure moves into pieces. This is the same | |
380 | ;; for both big-endian and little-endian code, although it only needs | |
381 | ;; to handle memory operands for little-endian code. | |
382 | (define_split | |
383 | [(set (match_operand:SVE_STRUCT 0 "aarch64_sve_nonimmediate_operand") | |
384 | (match_operand:SVE_STRUCT 1 "aarch64_sve_general_operand"))] | |
385 | "TARGET_SVE && reload_completed" | |
386 | [(const_int 0)] | |
387 | { | |
388 | rtx dest = operands[0]; | |
389 | rtx src = operands[1]; | |
390 | if (REG_P (dest) && REG_P (src)) | |
391 | aarch64_simd_emit_reg_reg_move (operands, <VSINGLE>mode, <vector_count>); | |
392 | else | |
393 | for (unsigned int i = 0; i < <vector_count>; ++i) | |
394 | { | |
395 | rtx subdest = simplify_gen_subreg (<VSINGLE>mode, dest, <MODE>mode, | |
396 | i * BYTES_PER_SVE_VECTOR); | |
397 | rtx subsrc = simplify_gen_subreg (<VSINGLE>mode, src, <MODE>mode, | |
398 | i * BYTES_PER_SVE_VECTOR); | |
399 | emit_insn (gen_rtx_SET (subdest, subsrc)); | |
400 | } | |
401 | DONE; | |
402 | } | |
403 | ) | |
404 | ||
405 | ;; Predicated structure moves. This works for both endiannesses but in | |
406 | ;; practice is only useful for big-endian. | |
a7f0b0f6 | 407 | (define_insn_and_split "@aarch64_pred_mov<mode>" |
e31e24ee | 408 | [(set (match_operand:SVE_STRUCT 0 "aarch64_sve_struct_nonimmediate_operand" "=w, w, Utx") |
0ac5a51b | 409 | (unspec:SVE_STRUCT |
e31e24ee | 410 | [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl") |
411 | (match_operand:SVE_STRUCT 2 "aarch64_sve_struct_nonimmediate_operand" "w, Utx, w")] | |
0ac5a51b | 412 | UNSPEC_MERGE_PTRUE))] |
413 | "TARGET_SVE | |
414 | && (register_operand (operands[0], <MODE>mode) | |
415 | || register_operand (operands[2], <MODE>mode))" | |
416 | "#" | |
417 | "&& reload_completed" | |
418 | [(const_int 0)] | |
419 | { | |
420 | for (unsigned int i = 0; i < <vector_count>; ++i) | |
421 | { | |
422 | rtx subdest = simplify_gen_subreg (<VSINGLE>mode, operands[0], | |
423 | <MODE>mode, | |
424 | i * BYTES_PER_SVE_VECTOR); | |
425 | rtx subsrc = simplify_gen_subreg (<VSINGLE>mode, operands[2], | |
426 | <MODE>mode, | |
427 | i * BYTES_PER_SVE_VECTOR); | |
428 | aarch64_emit_sve_pred_move (subdest, operands[1], subsrc); | |
429 | } | |
430 | DONE; | |
431 | } | |
432 | [(set_attr "length" "<insn_length>")] | |
433 | ) | |
434 | ||
8fa7f434 | 435 | (define_expand "mov<mode>" |
436 | [(set (match_operand:PRED_ALL 0 "nonimmediate_operand") | |
437 | (match_operand:PRED_ALL 1 "general_operand"))] | |
438 | "TARGET_SVE" | |
439 | { | |
440 | if (GET_CODE (operands[0]) == MEM) | |
441 | operands[1] = force_reg (<MODE>mode, operands[1]); | |
442 | } | |
443 | ) | |
444 | ||
445 | (define_insn "*aarch64_sve_mov<mode>" | |
446 | [(set (match_operand:PRED_ALL 0 "nonimmediate_operand" "=Upa, m, Upa, Upa, Upa") | |
447 | (match_operand:PRED_ALL 1 "general_operand" "Upa, Upa, m, Dz, Dm"))] | |
448 | "TARGET_SVE | |
449 | && (register_operand (operands[0], <MODE>mode) | |
450 | || register_operand (operands[1], <MODE>mode))" | |
451 | "@ | |
452 | mov\t%0.b, %1.b | |
453 | str\t%1, %0 | |
454 | ldr\t%0, %1 | |
455 | pfalse\t%0.b | |
456 | * return aarch64_output_ptrue (<MODE>mode, '<Vetype>');" | |
457 | ) | |
458 | ||
459 | ;; Handle extractions from a predicate by converting to an integer vector | |
460 | ;; and extracting from there. | |
461 | (define_expand "vec_extract<vpred><Vel>" | |
462 | [(match_operand:<VEL> 0 "register_operand") | |
463 | (match_operand:<VPRED> 1 "register_operand") | |
464 | (match_operand:SI 2 "nonmemory_operand") | |
465 | ;; Dummy operand to which we can attach the iterator. | |
466 | (reg:SVE_I V0_REGNUM)] | |
467 | "TARGET_SVE" | |
468 | { | |
469 | rtx tmp = gen_reg_rtx (<MODE>mode); | |
470 | emit_insn (gen_aarch64_sve_dup<mode>_const (tmp, operands[1], | |
471 | CONST1_RTX (<MODE>mode), | |
472 | CONST0_RTX (<MODE>mode))); | |
473 | emit_insn (gen_vec_extract<mode><Vel> (operands[0], tmp, operands[2])); | |
474 | DONE; | |
475 | } | |
476 | ) | |
477 | ||
478 | (define_expand "vec_extract<mode><Vel>" | |
479 | [(set (match_operand:<VEL> 0 "register_operand") | |
480 | (vec_select:<VEL> | |
481 | (match_operand:SVE_ALL 1 "register_operand") | |
482 | (parallel [(match_operand:SI 2 "nonmemory_operand")])))] | |
483 | "TARGET_SVE" | |
484 | { | |
485 | poly_int64 val; | |
486 | if (poly_int_rtx_p (operands[2], &val) | |
487 | && known_eq (val, GET_MODE_NUNITS (<MODE>mode) - 1)) | |
488 | { | |
489 | /* The last element can be extracted with a LASTB and a false | |
490 | predicate. */ | |
491 | rtx sel = force_reg (<VPRED>mode, CONST0_RTX (<VPRED>mode)); | |
384eaff1 | 492 | emit_insn (gen_extract_last_<mode> (operands[0], sel, operands[1])); |
8fa7f434 | 493 | DONE; |
494 | } | |
495 | if (!CONST_INT_P (operands[2])) | |
496 | { | |
497 | /* Create an index with operand[2] as the base and -1 as the step. | |
498 | It will then be zero for the element we care about. */ | |
499 | rtx index = gen_lowpart (<VEL_INT>mode, operands[2]); | |
500 | index = force_reg (<VEL_INT>mode, index); | |
501 | rtx series = gen_reg_rtx (<V_INT_EQUIV>mode); | |
502 | emit_insn (gen_vec_series<v_int_equiv> (series, index, constm1_rtx)); | |
503 | ||
504 | /* Get a predicate that is true for only that element. */ | |
505 | rtx zero = CONST0_RTX (<V_INT_EQUIV>mode); | |
506 | rtx cmp = gen_rtx_EQ (<V_INT_EQUIV>mode, series, zero); | |
507 | rtx sel = gen_reg_rtx (<VPRED>mode); | |
508 | emit_insn (gen_vec_cmp<v_int_equiv><vpred> (sel, cmp, series, zero)); | |
509 | ||
510 | /* Select the element using LASTB. */ | |
384eaff1 | 511 | emit_insn (gen_extract_last_<mode> (operands[0], sel, operands[1])); |
8fa7f434 | 512 | DONE; |
513 | } | |
514 | } | |
515 | ) | |
516 | ||
7ec0c7f5 | 517 | ;; Extract element zero. This is a special case because we want to force |
518 | ;; the registers to be the same for the second alternative, and then | |
519 | ;; split the instruction into nothing after RA. | |
520 | (define_insn_and_split "*vec_extract<mode><Vel>_0" | |
521 | [(set (match_operand:<VEL> 0 "aarch64_simd_nonimmediate_operand" "=r, w, Utv") | |
522 | (vec_select:<VEL> | |
523 | (match_operand:SVE_ALL 1 "register_operand" "w, 0, w") | |
524 | (parallel [(const_int 0)])))] | |
525 | "TARGET_SVE" | |
526 | { | |
527 | operands[1] = gen_rtx_REG (<V128>mode, REGNO (operands[1])); | |
528 | switch (which_alternative) | |
529 | { | |
530 | case 0: | |
531 | return "umov\\t%<vwcore>0, %1.<Vetype>[0]"; | |
532 | case 1: | |
533 | return "#"; | |
534 | case 2: | |
535 | return "st1\\t{%1.<Vetype>}[0], %0"; | |
536 | default: | |
537 | gcc_unreachable (); | |
538 | } | |
539 | } | |
540 | "&& reload_completed | |
541 | && REG_P (operands[0]) | |
542 | && REGNO (operands[0]) == REGNO (operands[1])" | |
543 | [(const_int 0)] | |
544 | { | |
545 | emit_note (NOTE_INSN_DELETED); | |
546 | DONE; | |
547 | } | |
548 | [(set_attr "type" "neon_to_gp_q, untyped, neon_store1_one_lane_q")] | |
549 | ) | |
550 | ||
8fa7f434 | 551 | ;; Extract an element from the Advanced SIMD portion of the register. |
552 | ;; We don't just reuse the aarch64-simd.md pattern because we don't | |
7ec0c7f5 | 553 | ;; want any change in lane number on big-endian targets. |
8fa7f434 | 554 | (define_insn "*vec_extract<mode><Vel>_v128" |
555 | [(set (match_operand:<VEL> 0 "aarch64_simd_nonimmediate_operand" "=r, w, Utv") | |
556 | (vec_select:<VEL> | |
557 | (match_operand:SVE_ALL 1 "register_operand" "w, w, w") | |
558 | (parallel [(match_operand:SI 2 "const_int_operand")])))] | |
559 | "TARGET_SVE | |
7ec0c7f5 | 560 | && IN_RANGE (INTVAL (operands[2]) * GET_MODE_SIZE (<VEL>mode), 1, 15)" |
8fa7f434 | 561 | { |
7ec0c7f5 | 562 | operands[1] = gen_rtx_REG (<V128>mode, REGNO (operands[1])); |
8fa7f434 | 563 | switch (which_alternative) |
564 | { | |
565 | case 0: | |
566 | return "umov\\t%<vwcore>0, %1.<Vetype>[%2]"; | |
567 | case 1: | |
568 | return "dup\\t%<Vetype>0, %1.<Vetype>[%2]"; | |
569 | case 2: | |
570 | return "st1\\t{%1.<Vetype>}[%2], %0"; | |
571 | default: | |
572 | gcc_unreachable (); | |
573 | } | |
574 | } | |
575 | [(set_attr "type" "neon_to_gp_q, neon_dup_q, neon_store1_one_lane_q")] | |
576 | ) | |
577 | ||
578 | ;; Extract an element in the range of DUP. This pattern allows the | |
579 | ;; source and destination to be different. | |
580 | (define_insn "*vec_extract<mode><Vel>_dup" | |
581 | [(set (match_operand:<VEL> 0 "register_operand" "=w") | |
582 | (vec_select:<VEL> | |
583 | (match_operand:SVE_ALL 1 "register_operand" "w") | |
584 | (parallel [(match_operand:SI 2 "const_int_operand")])))] | |
585 | "TARGET_SVE | |
586 | && IN_RANGE (INTVAL (operands[2]) * GET_MODE_SIZE (<VEL>mode), 16, 63)" | |
587 | { | |
588 | operands[0] = gen_rtx_REG (<MODE>mode, REGNO (operands[0])); | |
589 | return "dup\t%0.<Vetype>, %1.<Vetype>[%2]"; | |
590 | } | |
591 | ) | |
592 | ||
593 | ;; Extract an element outside the range of DUP. This pattern requires the | |
594 | ;; source and destination to be the same. | |
595 | (define_insn "*vec_extract<mode><Vel>_ext" | |
596 | [(set (match_operand:<VEL> 0 "register_operand" "=w") | |
597 | (vec_select:<VEL> | |
598 | (match_operand:SVE_ALL 1 "register_operand" "0") | |
599 | (parallel [(match_operand:SI 2 "const_int_operand")])))] | |
600 | "TARGET_SVE && INTVAL (operands[2]) * GET_MODE_SIZE (<VEL>mode) >= 64" | |
601 | { | |
602 | operands[0] = gen_rtx_REG (<MODE>mode, REGNO (operands[0])); | |
603 | operands[2] = GEN_INT (INTVAL (operands[2]) * GET_MODE_SIZE (<VEL>mode)); | |
604 | return "ext\t%0.b, %0.b, %0.b, #%2"; | |
605 | } | |
606 | ) | |
607 | ||
608 | ;; Extract the last active element of operand 1 into operand 0. | |
609 | ;; If no elements are active, extract the last inactive element instead. | |
384eaff1 | 610 | (define_insn "extract_last_<mode>" |
8fa7f434 | 611 | [(set (match_operand:<VEL> 0 "register_operand" "=r, w") |
612 | (unspec:<VEL> | |
613 | [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl") | |
614 | (match_operand:SVE_ALL 2 "register_operand" "w, w")] | |
615 | UNSPEC_LASTB))] | |
616 | "TARGET_SVE" | |
617 | "@ | |
618 | lastb\t%<vwcore>0, %1, %2.<Vetype> | |
619 | lastb\t%<Vetype>0, %1, %2.<Vetype>" | |
620 | ) | |
621 | ||
622 | (define_expand "vec_duplicate<mode>" | |
623 | [(parallel | |
624 | [(set (match_operand:SVE_ALL 0 "register_operand") | |
625 | (vec_duplicate:SVE_ALL | |
626 | (match_operand:<VEL> 1 "aarch64_sve_dup_operand"))) | |
627 | (clobber (scratch:<VPRED>))])] | |
628 | "TARGET_SVE" | |
629 | { | |
630 | if (MEM_P (operands[1])) | |
631 | { | |
f8e1b9c1 | 632 | rtx ptrue = aarch64_ptrue_reg (<VPRED>mode); |
8fa7f434 | 633 | emit_insn (gen_sve_ld1r<mode> (operands[0], ptrue, operands[1], |
634 | CONST0_RTX (<MODE>mode))); | |
635 | DONE; | |
636 | } | |
637 | } | |
638 | ) | |
639 | ||
640 | ;; Accept memory operands for the benefit of combine, and also in case | |
641 | ;; the scalar input gets spilled to memory during RA. We want to split | |
642 | ;; the load at the first opportunity in order to allow the PTRUE to be | |
643 | ;; optimized with surrounding code. | |
644 | (define_insn_and_split "*vec_duplicate<mode>_reg" | |
645 | [(set (match_operand:SVE_ALL 0 "register_operand" "=w, w, w") | |
646 | (vec_duplicate:SVE_ALL | |
647 | (match_operand:<VEL> 1 "aarch64_sve_dup_operand" "r, w, Uty"))) | |
648 | (clobber (match_scratch:<VPRED> 2 "=X, X, Upl"))] | |
649 | "TARGET_SVE" | |
650 | "@ | |
651 | mov\t%0.<Vetype>, %<vwcore>1 | |
652 | mov\t%0.<Vetype>, %<Vetype>1 | |
653 | #" | |
654 | "&& MEM_P (operands[1])" | |
655 | [(const_int 0)] | |
656 | { | |
657 | if (GET_CODE (operands[2]) == SCRATCH) | |
658 | operands[2] = gen_reg_rtx (<VPRED>mode); | |
659 | emit_move_insn (operands[2], CONSTM1_RTX (<VPRED>mode)); | |
660 | emit_insn (gen_sve_ld1r<mode> (operands[0], operands[2], operands[1], | |
661 | CONST0_RTX (<MODE>mode))); | |
662 | DONE; | |
663 | } | |
664 | [(set_attr "length" "4,4,8")] | |
665 | ) | |
666 | ||
667 | ;; This is used for vec_duplicate<mode>s from memory, but can also | |
668 | ;; be used by combine to optimize selects of a a vec_duplicate<mode> | |
669 | ;; with zero. | |
670 | (define_insn "sve_ld1r<mode>" | |
671 | [(set (match_operand:SVE_ALL 0 "register_operand" "=w") | |
672 | (unspec:SVE_ALL | |
673 | [(match_operand:<VPRED> 1 "register_operand" "Upl") | |
674 | (vec_duplicate:SVE_ALL | |
675 | (match_operand:<VEL> 2 "aarch64_sve_ld1r_operand" "Uty")) | |
676 | (match_operand:SVE_ALL 3 "aarch64_simd_imm_zero")] | |
677 | UNSPEC_SEL))] | |
678 | "TARGET_SVE" | |
679 | "ld1r<Vesize>\t%0.<Vetype>, %1/z, %2" | |
680 | ) | |
681 | ||
682 | ;; Load 128 bits from memory and duplicate to fill a vector. Since there | |
683 | ;; are so few operations on 128-bit "elements", we don't define a VNx1TI | |
684 | ;; and simply use vectors of bytes instead. | |
4a5920b6 | 685 | (define_insn "*sve_ld1rq<Vesize>" |
686 | [(set (match_operand:SVE_ALL 0 "register_operand" "=w") | |
687 | (unspec:SVE_ALL | |
688 | [(match_operand:<VPRED> 1 "register_operand" "Upl") | |
8fa7f434 | 689 | (match_operand:TI 2 "aarch64_sve_ld1r_operand" "Uty")] |
690 | UNSPEC_LD1RQ))] | |
691 | "TARGET_SVE" | |
4a5920b6 | 692 | "ld1rq<Vesize>\t%0.<Vetype>, %1/z, %2" |
8fa7f434 | 693 | ) |
694 | ||
695 | ;; Implement a predicate broadcast by shifting the low bit of the scalar | |
696 | ;; input into the top bit and using a WHILELO. An alternative would be to | |
697 | ;; duplicate the input and do a compare with zero. | |
698 | (define_expand "vec_duplicate<mode>" | |
699 | [(set (match_operand:PRED_ALL 0 "register_operand") | |
700 | (vec_duplicate:PRED_ALL (match_operand 1 "register_operand")))] | |
701 | "TARGET_SVE" | |
702 | { | |
703 | rtx tmp = gen_reg_rtx (DImode); | |
704 | rtx op1 = gen_lowpart (DImode, operands[1]); | |
705 | emit_insn (gen_ashldi3 (tmp, op1, gen_int_mode (63, DImode))); | |
706 | emit_insn (gen_while_ultdi<mode> (operands[0], const0_rtx, tmp)); | |
707 | DONE; | |
708 | } | |
709 | ) | |
710 | ||
711 | (define_insn "vec_series<mode>" | |
712 | [(set (match_operand:SVE_I 0 "register_operand" "=w, w, w") | |
713 | (vec_series:SVE_I | |
714 | (match_operand:<VEL> 1 "aarch64_sve_index_operand" "Usi, r, r") | |
715 | (match_operand:<VEL> 2 "aarch64_sve_index_operand" "r, Usi, r")))] | |
716 | "TARGET_SVE" | |
717 | "@ | |
718 | index\t%0.<Vetype>, #%1, %<vw>2 | |
719 | index\t%0.<Vetype>, %<vw>1, #%2 | |
720 | index\t%0.<Vetype>, %<vw>1, %<vw>2" | |
721 | ) | |
722 | ||
723 | ;; Optimize {x, x, x, x, ...} + {0, n, 2*n, 3*n, ...} if n is in range | |
724 | ;; of an INDEX instruction. | |
725 | (define_insn "*vec_series<mode>_plus" | |
726 | [(set (match_operand:SVE_I 0 "register_operand" "=w") | |
727 | (plus:SVE_I | |
728 | (vec_duplicate:SVE_I | |
729 | (match_operand:<VEL> 1 "register_operand" "r")) | |
730 | (match_operand:SVE_I 2 "immediate_operand")))] | |
731 | "TARGET_SVE && aarch64_check_zero_based_sve_index_immediate (operands[2])" | |
732 | { | |
733 | operands[2] = aarch64_check_zero_based_sve_index_immediate (operands[2]); | |
734 | return "index\t%0.<Vetype>, %<vw>1, #%2"; | |
735 | } | |
736 | ) | |
737 | ||
0ac5a51b | 738 | ;; Unpredicated LD[234]. |
739 | (define_expand "vec_load_lanes<mode><vsingle>" | |
740 | [(set (match_operand:SVE_STRUCT 0 "register_operand") | |
741 | (unspec:SVE_STRUCT | |
742 | [(match_dup 2) | |
743 | (match_operand:SVE_STRUCT 1 "memory_operand")] | |
744 | UNSPEC_LDN))] | |
745 | "TARGET_SVE" | |
746 | { | |
f8e1b9c1 | 747 | operands[2] = aarch64_ptrue_reg (<VPRED>mode); |
0ac5a51b | 748 | } |
749 | ) | |
750 | ||
751 | ;; Predicated LD[234]. | |
752 | (define_insn "vec_mask_load_lanes<mode><vsingle>" | |
753 | [(set (match_operand:SVE_STRUCT 0 "register_operand" "=w") | |
754 | (unspec:SVE_STRUCT | |
755 | [(match_operand:<VPRED> 2 "register_operand" "Upl") | |
756 | (match_operand:SVE_STRUCT 1 "memory_operand" "m")] | |
757 | UNSPEC_LDN))] | |
758 | "TARGET_SVE" | |
759 | "ld<vector_count><Vesize>\t%0, %2/z, %1" | |
760 | ) | |
761 | ||
762 | ;; Unpredicated ST[234]. This is always a full update, so the dependence | |
763 | ;; on the old value of the memory location (via (match_dup 0)) is redundant. | |
764 | ;; There doesn't seem to be any obvious benefit to treating the all-true | |
765 | ;; case differently though. In particular, it's very unlikely that we'll | |
766 | ;; only find out during RTL that a store_lanes is dead. | |
767 | (define_expand "vec_store_lanes<mode><vsingle>" | |
768 | [(set (match_operand:SVE_STRUCT 0 "memory_operand") | |
769 | (unspec:SVE_STRUCT | |
770 | [(match_dup 2) | |
771 | (match_operand:SVE_STRUCT 1 "register_operand") | |
772 | (match_dup 0)] | |
773 | UNSPEC_STN))] | |
774 | "TARGET_SVE" | |
775 | { | |
f8e1b9c1 | 776 | operands[2] = aarch64_ptrue_reg (<VPRED>mode); |
0ac5a51b | 777 | } |
778 | ) | |
779 | ||
780 | ;; Predicated ST[234]. | |
781 | (define_insn "vec_mask_store_lanes<mode><vsingle>" | |
782 | [(set (match_operand:SVE_STRUCT 0 "memory_operand" "+m") | |
783 | (unspec:SVE_STRUCT | |
784 | [(match_operand:<VPRED> 2 "register_operand" "Upl") | |
785 | (match_operand:SVE_STRUCT 1 "register_operand" "w") | |
786 | (match_dup 0)] | |
787 | UNSPEC_STN))] | |
788 | "TARGET_SVE" | |
789 | "st<vector_count><Vesize>\t%1, %2, %0" | |
790 | ) | |
791 | ||
8fa7f434 | 792 | (define_expand "vec_perm<mode>" |
793 | [(match_operand:SVE_ALL 0 "register_operand") | |
794 | (match_operand:SVE_ALL 1 "register_operand") | |
795 | (match_operand:SVE_ALL 2 "register_operand") | |
796 | (match_operand:<V_INT_EQUIV> 3 "aarch64_sve_vec_perm_operand")] | |
797 | "TARGET_SVE && GET_MODE_NUNITS (<MODE>mode).is_constant ()" | |
798 | { | |
799 | aarch64_expand_sve_vec_perm (operands[0], operands[1], | |
800 | operands[2], operands[3]); | |
801 | DONE; | |
802 | } | |
803 | ) | |
804 | ||
805 | (define_insn "*aarch64_sve_tbl<mode>" | |
806 | [(set (match_operand:SVE_ALL 0 "register_operand" "=w") | |
807 | (unspec:SVE_ALL | |
808 | [(match_operand:SVE_ALL 1 "register_operand" "w") | |
809 | (match_operand:<V_INT_EQUIV> 2 "register_operand" "w")] | |
810 | UNSPEC_TBL))] | |
811 | "TARGET_SVE" | |
812 | "tbl\t%0.<Vetype>, %1.<Vetype>, %2.<Vetype>" | |
813 | ) | |
814 | ||
815 | (define_insn "*aarch64_sve_<perm_insn><perm_hilo><mode>" | |
816 | [(set (match_operand:PRED_ALL 0 "register_operand" "=Upa") | |
817 | (unspec:PRED_ALL [(match_operand:PRED_ALL 1 "register_operand" "Upa") | |
818 | (match_operand:PRED_ALL 2 "register_operand" "Upa")] | |
819 | PERMUTE))] | |
820 | "TARGET_SVE" | |
821 | "<perm_insn><perm_hilo>\t%0.<Vetype>, %1.<Vetype>, %2.<Vetype>" | |
822 | ) | |
823 | ||
88e81b08 | 824 | (define_insn "aarch64_sve_<perm_insn><perm_hilo><mode>" |
8fa7f434 | 825 | [(set (match_operand:SVE_ALL 0 "register_operand" "=w") |
826 | (unspec:SVE_ALL [(match_operand:SVE_ALL 1 "register_operand" "w") | |
827 | (match_operand:SVE_ALL 2 "register_operand" "w")] | |
828 | PERMUTE))] | |
829 | "TARGET_SVE" | |
830 | "<perm_insn><perm_hilo>\t%0.<Vetype>, %1.<Vetype>, %2.<Vetype>" | |
831 | ) | |
832 | ||
833 | (define_insn "*aarch64_sve_rev64<mode>" | |
834 | [(set (match_operand:SVE_BHS 0 "register_operand" "=w") | |
835 | (unspec:SVE_BHS | |
836 | [(match_operand:VNx2BI 1 "register_operand" "Upl") | |
837 | (unspec:SVE_BHS [(match_operand:SVE_BHS 2 "register_operand" "w")] | |
838 | UNSPEC_REV64)] | |
839 | UNSPEC_MERGE_PTRUE))] | |
840 | "TARGET_SVE" | |
841 | "rev<Vesize>\t%0.d, %1/m, %2.d" | |
842 | ) | |
843 | ||
844 | (define_insn "*aarch64_sve_rev32<mode>" | |
845 | [(set (match_operand:SVE_BH 0 "register_operand" "=w") | |
846 | (unspec:SVE_BH | |
847 | [(match_operand:VNx4BI 1 "register_operand" "Upl") | |
848 | (unspec:SVE_BH [(match_operand:SVE_BH 2 "register_operand" "w")] | |
849 | UNSPEC_REV32)] | |
850 | UNSPEC_MERGE_PTRUE))] | |
851 | "TARGET_SVE" | |
852 | "rev<Vesize>\t%0.s, %1/m, %2.s" | |
853 | ) | |
854 | ||
855 | (define_insn "*aarch64_sve_rev16vnx16qi" | |
856 | [(set (match_operand:VNx16QI 0 "register_operand" "=w") | |
857 | (unspec:VNx16QI | |
858 | [(match_operand:VNx8BI 1 "register_operand" "Upl") | |
859 | (unspec:VNx16QI [(match_operand:VNx16QI 2 "register_operand" "w")] | |
860 | UNSPEC_REV16)] | |
861 | UNSPEC_MERGE_PTRUE))] | |
862 | "TARGET_SVE" | |
863 | "revb\t%0.h, %1/m, %2.h" | |
864 | ) | |
865 | ||
08e92dcc | 866 | (define_insn "@aarch64_sve_rev<mode>" |
8fa7f434 | 867 | [(set (match_operand:SVE_ALL 0 "register_operand" "=w") |
868 | (unspec:SVE_ALL [(match_operand:SVE_ALL 1 "register_operand" "w")] | |
869 | UNSPEC_REV))] | |
870 | "TARGET_SVE" | |
871 | "rev\t%0.<Vetype>, %1.<Vetype>") | |
872 | ||
873 | (define_insn "*aarch64_sve_dup_lane<mode>" | |
874 | [(set (match_operand:SVE_ALL 0 "register_operand" "=w") | |
875 | (vec_duplicate:SVE_ALL | |
876 | (vec_select:<VEL> | |
877 | (match_operand:SVE_ALL 1 "register_operand" "w") | |
878 | (parallel [(match_operand:SI 2 "const_int_operand")]))))] | |
879 | "TARGET_SVE | |
880 | && IN_RANGE (INTVAL (operands[2]) * GET_MODE_SIZE (<VEL>mode), 0, 63)" | |
881 | "dup\t%0.<Vetype>, %1.<Vetype>[%2]" | |
882 | ) | |
883 | ||
884 | ;; Note that the immediate (third) operand is the lane index not | |
885 | ;; the byte index. | |
886 | (define_insn "*aarch64_sve_ext<mode>" | |
887 | [(set (match_operand:SVE_ALL 0 "register_operand" "=w") | |
888 | (unspec:SVE_ALL [(match_operand:SVE_ALL 1 "register_operand" "0") | |
889 | (match_operand:SVE_ALL 2 "register_operand" "w") | |
890 | (match_operand:SI 3 "const_int_operand")] | |
891 | UNSPEC_EXT))] | |
892 | "TARGET_SVE | |
893 | && IN_RANGE (INTVAL (operands[3]) * GET_MODE_SIZE (<VEL>mode), 0, 255)" | |
894 | { | |
895 | operands[3] = GEN_INT (INTVAL (operands[3]) * GET_MODE_SIZE (<VEL>mode)); | |
896 | return "ext\\t%0.b, %0.b, %2.b, #%3"; | |
897 | } | |
898 | ) | |
899 | ||
900 | (define_insn "add<mode>3" | |
901 | [(set (match_operand:SVE_I 0 "register_operand" "=w, w, w, w") | |
902 | (plus:SVE_I | |
903 | (match_operand:SVE_I 1 "register_operand" "%0, 0, 0, w") | |
904 | (match_operand:SVE_I 2 "aarch64_sve_add_operand" "vsa, vsn, vsi, w")))] | |
905 | "TARGET_SVE" | |
906 | "@ | |
907 | add\t%0.<Vetype>, %0.<Vetype>, #%D2 | |
908 | sub\t%0.<Vetype>, %0.<Vetype>, #%N2 | |
909 | * return aarch64_output_sve_inc_dec_immediate (\"%0.<Vetype>\", operands[2]); | |
910 | add\t%0.<Vetype>, %1.<Vetype>, %2.<Vetype>" | |
911 | ) | |
912 | ||
913 | (define_insn "sub<mode>3" | |
914 | [(set (match_operand:SVE_I 0 "register_operand" "=w, w") | |
915 | (minus:SVE_I | |
916 | (match_operand:SVE_I 1 "aarch64_sve_arith_operand" "w, vsa") | |
917 | (match_operand:SVE_I 2 "register_operand" "w, 0")))] | |
918 | "TARGET_SVE" | |
919 | "@ | |
920 | sub\t%0.<Vetype>, %1.<Vetype>, %2.<Vetype> | |
921 | subr\t%0.<Vetype>, %0.<Vetype>, #%D1" | |
922 | ) | |
923 | ||
924 | ;; Unpredicated multiplication. | |
925 | (define_expand "mul<mode>3" | |
926 | [(set (match_operand:SVE_I 0 "register_operand") | |
927 | (unspec:SVE_I | |
928 | [(match_dup 3) | |
929 | (mult:SVE_I | |
930 | (match_operand:SVE_I 1 "register_operand") | |
931 | (match_operand:SVE_I 2 "aarch64_sve_mul_operand"))] | |
932 | UNSPEC_MERGE_PTRUE))] | |
933 | "TARGET_SVE" | |
934 | { | |
f8e1b9c1 | 935 | operands[3] = aarch64_ptrue_reg (<VPRED>mode); |
8fa7f434 | 936 | } |
937 | ) | |
938 | ||
939 | ;; Multiplication predicated with a PTRUE. We don't actually need the | |
940 | ;; predicate for the first alternative, but using Upa or X isn't likely | |
941 | ;; to gain much and would make the instruction seem less uniform to the | |
942 | ;; register allocator. | |
0fd5a0b4 | 943 | (define_insn_and_split "*mul<mode>3" |
83d1ca63 | 944 | [(set (match_operand:SVE_I 0 "register_operand" "=w, w, ?&w") |
8fa7f434 | 945 | (unspec:SVE_I |
83d1ca63 | 946 | [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl") |
8fa7f434 | 947 | (mult:SVE_I |
83d1ca63 | 948 | (match_operand:SVE_I 2 "register_operand" "%0, 0, w") |
949 | (match_operand:SVE_I 3 "aarch64_sve_mul_operand" "vsm, w, w"))] | |
8fa7f434 | 950 | UNSPEC_MERGE_PTRUE))] |
951 | "TARGET_SVE" | |
952 | "@ | |
0fd5a0b4 | 953 | # |
83d1ca63 | 954 | mul\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype> |
955 | movprfx\t%0, %2\;mul\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>" | |
0fd5a0b4 | 956 | ; Split the unpredicated form after reload, so that we don't have |
957 | ; the unnecessary PTRUE. | |
958 | "&& reload_completed | |
959 | && !register_operand (operands[3], <MODE>mode)" | |
960 | [(set (match_dup 0) (mult:SVE_I (match_dup 2) (match_dup 3)))] | |
961 | "" | |
83d1ca63 | 962 | [(set_attr "movprfx" "*,*,yes")] |
8fa7f434 | 963 | ) |
964 | ||
0fd5a0b4 | 965 | ;; Unpredicated multiplications by a constant (post-RA only). |
966 | ;; These are generated by splitting a predicated instruction whose | |
967 | ;; predicate is unused. | |
968 | (define_insn "*post_ra_mul<mode>3" | |
969 | [(set (match_operand:SVE_I 0 "register_operand" "=w") | |
970 | (mult:SVE_I | |
971 | (match_operand:SVE_I 1 "register_operand" "0") | |
972 | (match_operand:SVE_I 2 "aarch64_sve_mul_immediate")))] | |
973 | "TARGET_SVE && reload_completed" | |
974 | "mul\t%0.<Vetype>, %0.<Vetype>, #%2" | |
975 | ) | |
976 | ||
8fa7f434 | 977 | (define_insn "*madd<mode>" |
83d1ca63 | 978 | [(set (match_operand:SVE_I 0 "register_operand" "=w, w, ?&w") |
8fa7f434 | 979 | (plus:SVE_I |
980 | (unspec:SVE_I | |
83d1ca63 | 981 | [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl") |
982 | (mult:SVE_I (match_operand:SVE_I 2 "register_operand" "%0, w, w") | |
983 | (match_operand:SVE_I 3 "register_operand" "w, w, w"))] | |
8fa7f434 | 984 | UNSPEC_MERGE_PTRUE) |
83d1ca63 | 985 | (match_operand:SVE_I 4 "register_operand" "w, 0, w")))] |
8fa7f434 | 986 | "TARGET_SVE" |
987 | "@ | |
988 | mad\t%0.<Vetype>, %1/m, %3.<Vetype>, %4.<Vetype> | |
83d1ca63 | 989 | mla\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype> |
990 | movprfx\t%0, %4\;mla\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>" | |
991 | [(set_attr "movprfx" "*,*,yes")] | |
8fa7f434 | 992 | ) |
993 | ||
994 | (define_insn "*msub<mode>3" | |
83d1ca63 | 995 | [(set (match_operand:SVE_I 0 "register_operand" "=w, w, ?&w") |
8fa7f434 | 996 | (minus:SVE_I |
83d1ca63 | 997 | (match_operand:SVE_I 4 "register_operand" "w, 0, w") |
8fa7f434 | 998 | (unspec:SVE_I |
83d1ca63 | 999 | [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl") |
1000 | (mult:SVE_I (match_operand:SVE_I 2 "register_operand" "%0, w, w") | |
1001 | (match_operand:SVE_I 3 "register_operand" "w, w, w"))] | |
8fa7f434 | 1002 | UNSPEC_MERGE_PTRUE)))] |
1003 | "TARGET_SVE" | |
1004 | "@ | |
1005 | msb\t%0.<Vetype>, %1/m, %3.<Vetype>, %4.<Vetype> | |
83d1ca63 | 1006 | mls\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype> |
1007 | movprfx\t%0, %4\;mls\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>" | |
1008 | [(set_attr "movprfx" "*,*,yes")] | |
8fa7f434 | 1009 | ) |
1010 | ||
dea784df | 1011 | ;; Unpredicated highpart multiplication. |
1012 | (define_expand "<su>mul<mode>3_highpart" | |
1013 | [(set (match_operand:SVE_I 0 "register_operand") | |
1014 | (unspec:SVE_I | |
1015 | [(match_dup 3) | |
1016 | (unspec:SVE_I [(match_operand:SVE_I 1 "register_operand") | |
1017 | (match_operand:SVE_I 2 "register_operand")] | |
1018 | MUL_HIGHPART)] | |
1019 | UNSPEC_MERGE_PTRUE))] | |
1020 | "TARGET_SVE" | |
1021 | { | |
f8e1b9c1 | 1022 | operands[3] = aarch64_ptrue_reg (<VPRED>mode); |
dea784df | 1023 | } |
1024 | ) | |
1025 | ||
1026 | ;; Predicated highpart multiplication. | |
1027 | (define_insn "*<su>mul<mode>3_highpart" | |
83d1ca63 | 1028 | [(set (match_operand:SVE_I 0 "register_operand" "=w, ?&w") |
dea784df | 1029 | (unspec:SVE_I |
83d1ca63 | 1030 | [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl") |
1031 | (unspec:SVE_I [(match_operand:SVE_I 2 "register_operand" "%0, w") | |
1032 | (match_operand:SVE_I 3 "register_operand" "w, w")] | |
dea784df | 1033 | MUL_HIGHPART)] |
1034 | UNSPEC_MERGE_PTRUE))] | |
1035 | "TARGET_SVE" | |
83d1ca63 | 1036 | "@ |
1037 | <su>mulh\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype> | |
1038 | movprfx\t%0, %2\;<su>mulh\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>" | |
1039 | [(set_attr "movprfx" "*,yes")] | |
dea784df | 1040 | ) |
1041 | ||
0bbf725c | 1042 | ;; Unpredicated division. |
1043 | (define_expand "<optab><mode>3" | |
1044 | [(set (match_operand:SVE_SDI 0 "register_operand") | |
1045 | (unspec:SVE_SDI | |
1046 | [(match_dup 3) | |
1047 | (SVE_INT_BINARY_SD:SVE_SDI | |
1048 | (match_operand:SVE_SDI 1 "register_operand") | |
1049 | (match_operand:SVE_SDI 2 "register_operand"))] | |
1050 | UNSPEC_MERGE_PTRUE))] | |
1051 | "TARGET_SVE" | |
1052 | { | |
f8e1b9c1 | 1053 | operands[3] = aarch64_ptrue_reg (<VPRED>mode); |
0bbf725c | 1054 | } |
1055 | ) | |
1056 | ||
1057 | ;; Division predicated with a PTRUE. | |
1058 | (define_insn "*<optab><mode>3" | |
83d1ca63 | 1059 | [(set (match_operand:SVE_SDI 0 "register_operand" "=w, w, ?&w") |
0bbf725c | 1060 | (unspec:SVE_SDI |
83d1ca63 | 1061 | [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl") |
0bbf725c | 1062 | (SVE_INT_BINARY_SD:SVE_SDI |
83d1ca63 | 1063 | (match_operand:SVE_SDI 2 "register_operand" "0, w, w") |
1064 | (match_operand:SVE_SDI 3 "aarch64_sve_mul_operand" "w, 0, w"))] | |
0bbf725c | 1065 | UNSPEC_MERGE_PTRUE))] |
1066 | "TARGET_SVE" | |
1067 | "@ | |
1068 | <sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype> | |
83d1ca63 | 1069 | <sve_int_op>r\t%0.<Vetype>, %1/m, %0.<Vetype>, %2.<Vetype> |
1070 | movprfx\t%0, %2\;<sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>" | |
1071 | [(set_attr "movprfx" "*,*,yes")] | |
0bbf725c | 1072 | ) |
1073 | ||
8fa7f434 | 1074 | ;; Unpredicated NEG, NOT and POPCOUNT. |
1075 | (define_expand "<optab><mode>2" | |
1076 | [(set (match_operand:SVE_I 0 "register_operand") | |
1077 | (unspec:SVE_I | |
1078 | [(match_dup 2) | |
1079 | (SVE_INT_UNARY:SVE_I (match_operand:SVE_I 1 "register_operand"))] | |
1080 | UNSPEC_MERGE_PTRUE))] | |
1081 | "TARGET_SVE" | |
1082 | { | |
f8e1b9c1 | 1083 | operands[2] = aarch64_ptrue_reg (<VPRED>mode); |
8fa7f434 | 1084 | } |
1085 | ) | |
1086 | ||
1087 | ;; NEG, NOT and POPCOUNT predicated with a PTRUE. | |
1088 | (define_insn "*<optab><mode>2" | |
1089 | [(set (match_operand:SVE_I 0 "register_operand" "=w") | |
1090 | (unspec:SVE_I | |
1091 | [(match_operand:<VPRED> 1 "register_operand" "Upl") | |
1092 | (SVE_INT_UNARY:SVE_I | |
1093 | (match_operand:SVE_I 2 "register_operand" "w"))] | |
1094 | UNSPEC_MERGE_PTRUE))] | |
1095 | "TARGET_SVE" | |
1096 | "<sve_int_op>\t%0.<Vetype>, %1/m, %2.<Vetype>" | |
1097 | ) | |
1098 | ||
1099 | ;; Vector AND, ORR and XOR. | |
1100 | (define_insn "<optab><mode>3" | |
1101 | [(set (match_operand:SVE_I 0 "register_operand" "=w, w") | |
1102 | (LOGICAL:SVE_I | |
1103 | (match_operand:SVE_I 1 "register_operand" "%0, w") | |
1104 | (match_operand:SVE_I 2 "aarch64_sve_logical_operand" "vsl, w")))] | |
1105 | "TARGET_SVE" | |
1106 | "@ | |
1107 | <logical>\t%0.<Vetype>, %0.<Vetype>, #%C2 | |
1108 | <logical>\t%0.d, %1.d, %2.d" | |
1109 | ) | |
1110 | ||
1111 | ;; Vector AND, ORR and XOR on floating-point modes. We avoid subregs | |
1112 | ;; by providing this, but we need to use UNSPECs since rtx logical ops | |
1113 | ;; aren't defined for floating-point modes. | |
1114 | (define_insn "*<optab><mode>3" | |
1115 | [(set (match_operand:SVE_F 0 "register_operand" "=w") | |
1116 | (unspec:SVE_F [(match_operand:SVE_F 1 "register_operand" "w") | |
1117 | (match_operand:SVE_F 2 "register_operand" "w")] | |
1118 | LOGICALF))] | |
1119 | "TARGET_SVE" | |
1120 | "<logicalf_op>\t%0.d, %1.d, %2.d" | |
1121 | ) | |
1122 | ||
1123 | ;; REG_EQUAL notes on "not<mode>3" should ensure that we can generate | |
1124 | ;; this pattern even though the NOT instruction itself is predicated. | |
1125 | (define_insn "bic<mode>3" | |
1126 | [(set (match_operand:SVE_I 0 "register_operand" "=w") | |
1127 | (and:SVE_I | |
1128 | (not:SVE_I (match_operand:SVE_I 1 "register_operand" "w")) | |
1129 | (match_operand:SVE_I 2 "register_operand" "w")))] | |
1130 | "TARGET_SVE" | |
1131 | "bic\t%0.d, %2.d, %1.d" | |
1132 | ) | |
1133 | ||
1134 | ;; Predicate AND. We can reuse one of the inputs as the GP. | |
1135 | (define_insn "and<mode>3" | |
1136 | [(set (match_operand:PRED_ALL 0 "register_operand" "=Upa") | |
1137 | (and:PRED_ALL (match_operand:PRED_ALL 1 "register_operand" "Upa") | |
1138 | (match_operand:PRED_ALL 2 "register_operand" "Upa")))] | |
1139 | "TARGET_SVE" | |
1140 | "and\t%0.b, %1/z, %1.b, %2.b" | |
1141 | ) | |
1142 | ||
1143 | ;; Unpredicated predicate ORR and XOR. | |
1144 | (define_expand "<optab><mode>3" | |
1145 | [(set (match_operand:PRED_ALL 0 "register_operand") | |
1146 | (and:PRED_ALL | |
1147 | (LOGICAL_OR:PRED_ALL | |
1148 | (match_operand:PRED_ALL 1 "register_operand") | |
1149 | (match_operand:PRED_ALL 2 "register_operand")) | |
1150 | (match_dup 3)))] | |
1151 | "TARGET_SVE" | |
1152 | { | |
f8e1b9c1 | 1153 | operands[3] = aarch64_ptrue_reg (<MODE>mode); |
8fa7f434 | 1154 | } |
1155 | ) | |
1156 | ||
1157 | ;; Predicated predicate ORR and XOR. | |
1158 | (define_insn "pred_<optab><mode>3" | |
1159 | [(set (match_operand:PRED_ALL 0 "register_operand" "=Upa") | |
1160 | (and:PRED_ALL | |
1161 | (LOGICAL:PRED_ALL | |
1162 | (match_operand:PRED_ALL 2 "register_operand" "Upa") | |
1163 | (match_operand:PRED_ALL 3 "register_operand" "Upa")) | |
1164 | (match_operand:PRED_ALL 1 "register_operand" "Upa")))] | |
1165 | "TARGET_SVE" | |
1166 | "<logical>\t%0.b, %1/z, %2.b, %3.b" | |
1167 | ) | |
1168 | ||
1169 | ;; Perform a logical operation on operands 2 and 3, using operand 1 as | |
1170 | ;; the GP (which is known to be a PTRUE). Store the result in operand 0 | |
1171 | ;; and set the flags in the same way as for PTEST. The (and ...) in the | |
1172 | ;; UNSPEC_PTEST_PTRUE is logically redundant, but means that the tested | |
1173 | ;; value is structurally equivalent to rhs of the second set. | |
1174 | (define_insn "*<optab><mode>3_cc" | |
1175 | [(set (reg:CC CC_REGNUM) | |
1176 | (compare:CC | |
1177 | (unspec:SI [(match_operand:PRED_ALL 1 "register_operand" "Upa") | |
1178 | (and:PRED_ALL | |
1179 | (LOGICAL:PRED_ALL | |
1180 | (match_operand:PRED_ALL 2 "register_operand" "Upa") | |
1181 | (match_operand:PRED_ALL 3 "register_operand" "Upa")) | |
1182 | (match_dup 1))] | |
1183 | UNSPEC_PTEST_PTRUE) | |
1184 | (const_int 0))) | |
1185 | (set (match_operand:PRED_ALL 0 "register_operand" "=Upa") | |
1186 | (and:PRED_ALL (LOGICAL:PRED_ALL (match_dup 2) (match_dup 3)) | |
1187 | (match_dup 1)))] | |
1188 | "TARGET_SVE" | |
1189 | "<logical>s\t%0.b, %1/z, %2.b, %3.b" | |
1190 | ) | |
1191 | ||
1192 | ;; Unpredicated predicate inverse. | |
1193 | (define_expand "one_cmpl<mode>2" | |
1194 | [(set (match_operand:PRED_ALL 0 "register_operand") | |
1195 | (and:PRED_ALL | |
1196 | (not:PRED_ALL (match_operand:PRED_ALL 1 "register_operand")) | |
1197 | (match_dup 2)))] | |
1198 | "TARGET_SVE" | |
1199 | { | |
f8e1b9c1 | 1200 | operands[2] = aarch64_ptrue_reg (<MODE>mode); |
8fa7f434 | 1201 | } |
1202 | ) | |
1203 | ||
1204 | ;; Predicated predicate inverse. | |
1205 | (define_insn "*one_cmpl<mode>3" | |
1206 | [(set (match_operand:PRED_ALL 0 "register_operand" "=Upa") | |
1207 | (and:PRED_ALL | |
1208 | (not:PRED_ALL (match_operand:PRED_ALL 2 "register_operand" "Upa")) | |
1209 | (match_operand:PRED_ALL 1 "register_operand" "Upa")))] | |
1210 | "TARGET_SVE" | |
1211 | "not\t%0.b, %1/z, %2.b" | |
1212 | ) | |
1213 | ||
1214 | ;; Predicated predicate BIC and ORN. | |
1215 | (define_insn "*<nlogical><mode>3" | |
1216 | [(set (match_operand:PRED_ALL 0 "register_operand" "=Upa") | |
1217 | (and:PRED_ALL | |
1218 | (NLOGICAL:PRED_ALL | |
1219 | (not:PRED_ALL (match_operand:PRED_ALL 2 "register_operand" "Upa")) | |
1220 | (match_operand:PRED_ALL 3 "register_operand" "Upa")) | |
1221 | (match_operand:PRED_ALL 1 "register_operand" "Upa")))] | |
1222 | "TARGET_SVE" | |
1223 | "<nlogical>\t%0.b, %1/z, %3.b, %2.b" | |
1224 | ) | |
1225 | ||
1226 | ;; Predicated predicate NAND and NOR. | |
1227 | (define_insn "*<logical_nn><mode>3" | |
1228 | [(set (match_operand:PRED_ALL 0 "register_operand" "=Upa") | |
1229 | (and:PRED_ALL | |
1230 | (NLOGICAL:PRED_ALL | |
1231 | (not:PRED_ALL (match_operand:PRED_ALL 2 "register_operand" "Upa")) | |
1232 | (not:PRED_ALL (match_operand:PRED_ALL 3 "register_operand" "Upa"))) | |
1233 | (match_operand:PRED_ALL 1 "register_operand" "Upa")))] | |
1234 | "TARGET_SVE" | |
1235 | "<logical_nn>\t%0.b, %1/z, %2.b, %3.b" | |
1236 | ) | |
1237 | ||
1238 | ;; Unpredicated LSL, LSR and ASR by a vector. | |
1239 | (define_expand "v<optab><mode>3" | |
1240 | [(set (match_operand:SVE_I 0 "register_operand") | |
1241 | (unspec:SVE_I | |
1242 | [(match_dup 3) | |
1243 | (ASHIFT:SVE_I | |
1244 | (match_operand:SVE_I 1 "register_operand") | |
1245 | (match_operand:SVE_I 2 "aarch64_sve_<lr>shift_operand"))] | |
1246 | UNSPEC_MERGE_PTRUE))] | |
1247 | "TARGET_SVE" | |
1248 | { | |
f8e1b9c1 | 1249 | operands[3] = aarch64_ptrue_reg (<VPRED>mode); |
8fa7f434 | 1250 | } |
1251 | ) | |
1252 | ||
1253 | ;; LSL, LSR and ASR by a vector, predicated with a PTRUE. We don't | |
1254 | ;; actually need the predicate for the first alternative, but using Upa | |
1255 | ;; or X isn't likely to gain much and would make the instruction seem | |
1256 | ;; less uniform to the register allocator. | |
0fd5a0b4 | 1257 | (define_insn_and_split "*v<optab><mode>3" |
83d1ca63 | 1258 | [(set (match_operand:SVE_I 0 "register_operand" "=w, w, ?&w") |
8fa7f434 | 1259 | (unspec:SVE_I |
83d1ca63 | 1260 | [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl") |
8fa7f434 | 1261 | (ASHIFT:SVE_I |
83d1ca63 | 1262 | (match_operand:SVE_I 2 "register_operand" "w, 0, w") |
1263 | (match_operand:SVE_I 3 "aarch64_sve_<lr>shift_operand" "D<lr>, w, w"))] | |
8fa7f434 | 1264 | UNSPEC_MERGE_PTRUE))] |
1265 | "TARGET_SVE" | |
1266 | "@ | |
0fd5a0b4 | 1267 | # |
83d1ca63 | 1268 | <shift>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype> |
1269 | movprfx\t%0, %2\;<shift>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>" | |
0fd5a0b4 | 1270 | "&& reload_completed |
1271 | && !register_operand (operands[3], <MODE>mode)" | |
1272 | [(set (match_dup 0) (ASHIFT:SVE_I (match_dup 2) (match_dup 3)))] | |
1273 | "" | |
83d1ca63 | 1274 | [(set_attr "movprfx" "*,*,yes")] |
8fa7f434 | 1275 | ) |
1276 | ||
0fd5a0b4 | 1277 | ;; Unpredicated shift operations by a constant (post-RA only). |
1278 | ;; These are generated by splitting a predicated instruction whose | |
1279 | ;; predicate is unused. | |
1280 | (define_insn "*post_ra_v<optab><mode>3" | |
1281 | [(set (match_operand:SVE_I 0 "register_operand" "=w") | |
1282 | (ASHIFT:SVE_I | |
1283 | (match_operand:SVE_I 1 "register_operand" "w") | |
1284 | (match_operand:SVE_I 2 "aarch64_simd_<lr>shift_imm")))] | |
1285 | "TARGET_SVE && reload_completed" | |
1286 | "<shift>\t%0.<Vetype>, %1.<Vetype>, #%2" | |
1287 | ) | |
1288 | ||
8fa7f434 | 1289 | ;; LSL, LSR and ASR by a scalar, which expands into one of the vector |
1290 | ;; shifts above. | |
1291 | (define_expand "<ASHIFT:optab><mode>3" | |
1292 | [(set (match_operand:SVE_I 0 "register_operand") | |
1293 | (ASHIFT:SVE_I (match_operand:SVE_I 1 "register_operand") | |
1294 | (match_operand:<VEL> 2 "general_operand")))] | |
1295 | "TARGET_SVE" | |
1296 | { | |
1297 | rtx amount; | |
1298 | if (CONST_INT_P (operands[2])) | |
1299 | { | |
1300 | amount = gen_const_vec_duplicate (<MODE>mode, operands[2]); | |
1301 | if (!aarch64_sve_<lr>shift_operand (operands[2], <MODE>mode)) | |
1302 | amount = force_reg (<MODE>mode, amount); | |
1303 | } | |
1304 | else | |
1305 | { | |
1306 | amount = gen_reg_rtx (<MODE>mode); | |
1307 | emit_insn (gen_vec_duplicate<mode> (amount, | |
1308 | convert_to_mode (<VEL>mode, | |
1309 | operands[2], 0))); | |
1310 | } | |
1311 | emit_insn (gen_v<optab><mode>3 (operands[0], operands[1], amount)); | |
1312 | DONE; | |
1313 | } | |
1314 | ) | |
1315 | ||
1316 | ;; Test all bits of operand 1. Operand 0 is a GP that is known to hold PTRUE. | |
1317 | ;; | |
1318 | ;; Using UNSPEC_PTEST_PTRUE allows combine patterns to assume that the GP | |
1319 | ;; is a PTRUE even if the optimizers haven't yet been able to propagate | |
1320 | ;; the constant. We would use a separate unspec code for PTESTs involving | |
1321 | ;; GPs that might not be PTRUEs. | |
1322 | (define_insn "ptest_ptrue<mode>" | |
1323 | [(set (reg:CC CC_REGNUM) | |
1324 | (compare:CC | |
1325 | (unspec:SI [(match_operand:PRED_ALL 0 "register_operand" "Upa") | |
1326 | (match_operand:PRED_ALL 1 "register_operand" "Upa")] | |
1327 | UNSPEC_PTEST_PTRUE) | |
1328 | (const_int 0)))] | |
1329 | "TARGET_SVE" | |
1330 | "ptest\t%0, %1.b" | |
1331 | ) | |
1332 | ||
1333 | ;; Set element I of the result if operand1 + J < operand2 for all J in [0, I]. | |
1334 | ;; with the comparison being unsigned. | |
1335 | (define_insn "while_ult<GPI:mode><PRED_ALL:mode>" | |
1336 | [(set (match_operand:PRED_ALL 0 "register_operand" "=Upa") | |
1337 | (unspec:PRED_ALL [(match_operand:GPI 1 "aarch64_reg_or_zero" "rZ") | |
1338 | (match_operand:GPI 2 "aarch64_reg_or_zero" "rZ")] | |
1339 | UNSPEC_WHILE_LO)) | |
1340 | (clobber (reg:CC CC_REGNUM))] | |
1341 | "TARGET_SVE" | |
1342 | "whilelo\t%0.<PRED_ALL:Vetype>, %<w>1, %<w>2" | |
1343 | ) | |
1344 | ||
1345 | ;; WHILELO sets the flags in the same way as a PTEST with a PTRUE GP. | |
1346 | ;; Handle the case in which both results are useful. The GP operand | |
1347 | ;; to the PTEST isn't needed, so we allow it to be anything. | |
e8c17f8e | 1348 | (define_insn_and_rewrite "*while_ult<GPI:mode><PRED_ALL:mode>_cc" |
8fa7f434 | 1349 | [(set (reg:CC CC_REGNUM) |
1350 | (compare:CC | |
1351 | (unspec:SI [(match_operand:PRED_ALL 1) | |
1352 | (unspec:PRED_ALL | |
1353 | [(match_operand:GPI 2 "aarch64_reg_or_zero" "rZ") | |
1354 | (match_operand:GPI 3 "aarch64_reg_or_zero" "rZ")] | |
1355 | UNSPEC_WHILE_LO)] | |
1356 | UNSPEC_PTEST_PTRUE) | |
1357 | (const_int 0))) | |
1358 | (set (match_operand:PRED_ALL 0 "register_operand" "=Upa") | |
1359 | (unspec:PRED_ALL [(match_dup 2) | |
1360 | (match_dup 3)] | |
1361 | UNSPEC_WHILE_LO))] | |
1362 | "TARGET_SVE" | |
1363 | "whilelo\t%0.<PRED_ALL:Vetype>, %<w>2, %<w>3" | |
1364 | ;; Force the compiler to drop the unused predicate operand, so that we | |
1365 | ;; don't have an unnecessary PTRUE. | |
1366 | "&& !CONSTANT_P (operands[1])" | |
8fa7f434 | 1367 | { |
e8c17f8e | 1368 | operands[1] = CONSTM1_RTX (<MODE>mode); |
8fa7f434 | 1369 | } |
1370 | ) | |
1371 | ||
b293e19f | 1372 | ;; Integer comparisons predicated with a PTRUE. |
1373 | (define_insn "*cmp<cmp_op><mode>" | |
8fa7f434 | 1374 | [(set (match_operand:<VPRED> 0 "register_operand" "=Upa, Upa") |
1375 | (unspec:<VPRED> | |
1376 | [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl") | |
b293e19f | 1377 | (SVE_INT_CMP:<VPRED> |
1378 | (match_operand:SVE_I 2 "register_operand" "w, w") | |
1379 | (match_operand:SVE_I 3 "aarch64_sve_cmp_<sve_imm_con>_operand" "<sve_imm_con>, w"))] | |
1380 | UNSPEC_MERGE_PTRUE)) | |
8fa7f434 | 1381 | (clobber (reg:CC CC_REGNUM))] |
1382 | "TARGET_SVE" | |
1383 | "@ | |
1384 | cmp<cmp_op>\t%0.<Vetype>, %1/z, %2.<Vetype>, #%3 | |
1385 | cmp<cmp_op>\t%0.<Vetype>, %1/z, %2.<Vetype>, %3.<Vetype>" | |
1386 | ) | |
1387 | ||
b293e19f | 1388 | ;; Integer comparisons predicated with a PTRUE in which only the flags result |
1389 | ;; is interesting. | |
1390 | (define_insn "*cmp<cmp_op><mode>_ptest" | |
8fa7f434 | 1391 | [(set (reg:CC CC_REGNUM) |
1392 | (compare:CC | |
1393 | (unspec:SI | |
1394 | [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl") | |
1395 | (unspec:<VPRED> | |
1396 | [(match_dup 1) | |
b293e19f | 1397 | (SVE_INT_CMP:<VPRED> |
1398 | (match_operand:SVE_I 2 "register_operand" "w, w") | |
1399 | (match_operand:SVE_I 3 "aarch64_sve_cmp_<sve_imm_con>_operand" "<sve_imm_con>, w"))] | |
1400 | UNSPEC_MERGE_PTRUE)] | |
8fa7f434 | 1401 | UNSPEC_PTEST_PTRUE) |
1402 | (const_int 0))) | |
1403 | (clobber (match_scratch:<VPRED> 0 "=Upa, Upa"))] | |
1404 | "TARGET_SVE" | |
1405 | "@ | |
1406 | cmp<cmp_op>\t%0.<Vetype>, %1/z, %2.<Vetype>, #%3 | |
1407 | cmp<cmp_op>\t%0.<Vetype>, %1/z, %2.<Vetype>, %3.<Vetype>" | |
1408 | ) | |
1409 | ||
b293e19f | 1410 | ;; Integer comparisons predicated with a PTRUE in which both the flag and |
1411 | ;; predicate results are interesting. | |
1412 | (define_insn "*cmp<cmp_op><mode>_cc" | |
8fa7f434 | 1413 | [(set (reg:CC CC_REGNUM) |
1414 | (compare:CC | |
1415 | (unspec:SI | |
1416 | [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl") | |
1417 | (unspec:<VPRED> | |
1418 | [(match_dup 1) | |
b293e19f | 1419 | (SVE_INT_CMP:<VPRED> |
1420 | (match_operand:SVE_I 2 "register_operand" "w, w") | |
1421 | (match_operand:SVE_I 3 "aarch64_sve_cmp_<sve_imm_con>_operand" "<sve_imm_con>, w"))] | |
1422 | UNSPEC_MERGE_PTRUE)] | |
8fa7f434 | 1423 | UNSPEC_PTEST_PTRUE) |
1424 | (const_int 0))) | |
1425 | (set (match_operand:<VPRED> 0 "register_operand" "=Upa, Upa") | |
1426 | (unspec:<VPRED> | |
1427 | [(match_dup 1) | |
b293e19f | 1428 | (SVE_INT_CMP:<VPRED> |
1429 | (match_dup 2) | |
1430 | (match_dup 3))] | |
1431 | UNSPEC_MERGE_PTRUE))] | |
8fa7f434 | 1432 | "TARGET_SVE" |
1433 | "@ | |
1434 | cmp<cmp_op>\t%0.<Vetype>, %1/z, %2.<Vetype>, #%3 | |
1435 | cmp<cmp_op>\t%0.<Vetype>, %1/z, %2.<Vetype>, %3.<Vetype>" | |
1436 | ) | |
1437 | ||
30dd727b | 1438 | ;; Predicated integer comparisons, formed by combining a PTRUE-predicated |
1439 | ;; comparison with an AND. Split the instruction into its preferred form | |
1440 | ;; (below) at the earliest opportunity, in order to get rid of the | |
1441 | ;; redundant operand 1. | |
1442 | (define_insn_and_split "*pred_cmp<cmp_op><mode>_combine" | |
1443 | [(set (match_operand:<VPRED> 0 "register_operand" "=Upa, Upa") | |
1444 | (and:<VPRED> | |
1445 | (unspec:<VPRED> | |
1446 | [(match_operand:<VPRED> 1) | |
1447 | (SVE_INT_CMP:<VPRED> | |
1448 | (match_operand:SVE_I 2 "register_operand" "w, w") | |
1449 | (match_operand:SVE_I 3 "aarch64_sve_cmp_<sve_imm_con>_operand" "<sve_imm_con>, w"))] | |
1450 | UNSPEC_MERGE_PTRUE) | |
1451 | (match_operand:<VPRED> 4 "register_operand" "Upl, Upl"))) | |
1452 | (clobber (reg:CC CC_REGNUM))] | |
1453 | "TARGET_SVE" | |
1454 | "#" | |
1455 | "&& 1" | |
1456 | [(parallel | |
1457 | [(set (match_dup 0) | |
1458 | (and:<VPRED> | |
1459 | (SVE_INT_CMP:<VPRED> | |
1460 | (match_dup 2) | |
1461 | (match_dup 3)) | |
1462 | (match_dup 4))) | |
1463 | (clobber (reg:CC CC_REGNUM))])] | |
1464 | ) | |
1465 | ||
1466 | ;; Predicated integer comparisons. | |
1467 | (define_insn "*pred_cmp<cmp_op><mode>" | |
1468 | [(set (match_operand:<VPRED> 0 "register_operand" "=Upa, Upa") | |
1469 | (and:<VPRED> | |
1470 | (SVE_INT_CMP:<VPRED> | |
1471 | (match_operand:SVE_I 2 "register_operand" "w, w") | |
1472 | (match_operand:SVE_I 3 "aarch64_sve_cmp_<sve_imm_con>_operand" "<sve_imm_con>, w")) | |
1473 | (match_operand:<VPRED> 1 "register_operand" "Upl, Upl"))) | |
1474 | (clobber (reg:CC CC_REGNUM))] | |
1475 | "TARGET_SVE" | |
1476 | "@ | |
1477 | cmp<cmp_op>\t%0.<Vetype>, %1/z, %2.<Vetype>, #%3 | |
1478 | cmp<cmp_op>\t%0.<Vetype>, %1/z, %2.<Vetype>, %3.<Vetype>" | |
1479 | ) | |
1480 | ||
b293e19f | 1481 | ;; Floating-point comparisons predicated with a PTRUE. |
1482 | (define_insn "*fcm<cmp_op><mode>" | |
8fa7f434 | 1483 | [(set (match_operand:<VPRED> 0 "register_operand" "=Upa, Upa") |
1484 | (unspec:<VPRED> | |
1485 | [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl") | |
b293e19f | 1486 | (SVE_FP_CMP:<VPRED> |
1487 | (match_operand:SVE_F 2 "register_operand" "w, w") | |
1488 | (match_operand:SVE_F 3 "aarch64_simd_reg_or_zero" "Dz, w"))] | |
1489 | UNSPEC_MERGE_PTRUE))] | |
8fa7f434 | 1490 | "TARGET_SVE" |
1491 | "@ | |
1492 | fcm<cmp_op>\t%0.<Vetype>, %1/z, %2.<Vetype>, #0.0 | |
1493 | fcm<cmp_op>\t%0.<Vetype>, %1/z, %2.<Vetype>, %3.<Vetype>" | |
1494 | ) | |
1495 | ||
b293e19f | 1496 | (define_insn "*fcmuo<mode>" |
8fa7f434 | 1497 | [(set (match_operand:<VPRED> 0 "register_operand" "=Upa") |
1498 | (unspec:<VPRED> | |
1499 | [(match_operand:<VPRED> 1 "register_operand" "Upl") | |
b293e19f | 1500 | (unordered:<VPRED> |
1501 | (match_operand:SVE_F 2 "register_operand" "w") | |
1502 | (match_operand:SVE_F 3 "register_operand" "w"))] | |
1503 | UNSPEC_MERGE_PTRUE))] | |
8fa7f434 | 1504 | "TARGET_SVE" |
1505 | "fcmuo\t%0.<Vetype>, %1/z, %2.<Vetype>, %3.<Vetype>" | |
1506 | ) | |
1507 | ||
30dd727b | 1508 | ;; Floating-point comparisons predicated on a PTRUE, with the results ANDed |
1509 | ;; with another predicate P. This does not have the same trapping behavior | |
1510 | ;; as predicating the comparison itself on P, but it's a legitimate fold, | |
1511 | ;; since we can drop any potentially-trapping operations whose results | |
1512 | ;; are not needed. | |
1513 | ;; | |
1514 | ;; Split the instruction into its preferred form (below) at the earliest | |
1515 | ;; opportunity, in order to get rid of the redundant operand 1. | |
1516 | (define_insn_and_split "*fcm<cmp_op><mode>_and_combine" | |
1517 | [(set (match_operand:<VPRED> 0 "register_operand" "=Upa, Upa") | |
1518 | (and:<VPRED> | |
1519 | (unspec:<VPRED> | |
1520 | [(match_operand:<VPRED> 1) | |
1521 | (SVE_FP_CMP | |
1522 | (match_operand:SVE_F 2 "register_operand" "w, w") | |
1523 | (match_operand:SVE_F 3 "aarch64_simd_reg_or_zero" "Dz, w"))] | |
1524 | UNSPEC_MERGE_PTRUE) | |
1525 | (match_operand:<VPRED> 4 "register_operand" "Upl, Upl")))] | |
1526 | "TARGET_SVE" | |
1527 | "#" | |
1528 | "&& 1" | |
1529 | [(set (match_dup 0) | |
1530 | (and:<VPRED> | |
1531 | (SVE_FP_CMP:<VPRED> | |
1532 | (match_dup 2) | |
1533 | (match_dup 3)) | |
1534 | (match_dup 4)))] | |
1535 | ) | |
1536 | ||
1537 | (define_insn_and_split "*fcmuo<mode>_and_combine" | |
1538 | [(set (match_operand:<VPRED> 0 "register_operand" "=Upa") | |
1539 | (and:<VPRED> | |
1540 | (unspec:<VPRED> | |
1541 | [(match_operand:<VPRED> 1) | |
1542 | (unordered | |
1543 | (match_operand:SVE_F 2 "register_operand" "w") | |
1544 | (match_operand:SVE_F 3 "register_operand" "w"))] | |
1545 | UNSPEC_MERGE_PTRUE) | |
1546 | (match_operand:<VPRED> 4 "register_operand" "Upl")))] | |
1547 | "TARGET_SVE" | |
1548 | "#" | |
1549 | "&& 1" | |
1550 | [(set (match_dup 0) | |
1551 | (and:<VPRED> | |
1552 | (unordered:<VPRED> | |
1553 | (match_dup 2) | |
1554 | (match_dup 3)) | |
1555 | (match_dup 4)))] | |
1556 | ) | |
1557 | ||
1558 | ;; Unpredicated floating-point comparisons, with the results ANDed | |
1559 | ;; with another predicate. This is a valid fold for the same reasons | |
1560 | ;; as above. | |
1561 | (define_insn "*fcm<cmp_op><mode>_and" | |
1562 | [(set (match_operand:<VPRED> 0 "register_operand" "=Upa, Upa") | |
1563 | (and:<VPRED> | |
1564 | (SVE_FP_CMP:<VPRED> | |
1565 | (match_operand:SVE_F 2 "register_operand" "w, w") | |
1566 | (match_operand:SVE_F 3 "aarch64_simd_reg_or_zero" "Dz, w")) | |
1567 | (match_operand:<VPRED> 1 "register_operand" "Upl, Upl")))] | |
1568 | "TARGET_SVE" | |
1569 | "@ | |
1570 | fcm<cmp_op>\t%0.<Vetype>, %1/z, %2.<Vetype>, #0.0 | |
1571 | fcm<cmp_op>\t%0.<Vetype>, %1/z, %2.<Vetype>, %3.<Vetype>" | |
1572 | ) | |
1573 | ||
1574 | (define_insn "*fcmuo<mode>_and" | |
1575 | [(set (match_operand:<VPRED> 0 "register_operand" "=Upa") | |
1576 | (and:<VPRED> | |
1577 | (unordered:<VPRED> | |
1578 | (match_operand:SVE_F 2 "register_operand" "w") | |
1579 | (match_operand:SVE_F 3 "register_operand" "w")) | |
1580 | (match_operand:<VPRED> 1 "register_operand" "Upl")))] | |
1581 | "TARGET_SVE" | |
1582 | "fcmuo\t%0.<Vetype>, %1/z, %2.<Vetype>, %3.<Vetype>" | |
1583 | ) | |
1584 | ||
b293e19f | 1585 | ;; Predicated floating-point comparisons. We don't need a version |
1586 | ;; of this for unordered comparisons. | |
1587 | (define_insn "*pred_fcm<cmp_op><mode>" | |
1588 | [(set (match_operand:<VPRED> 0 "register_operand" "=Upa, Upa") | |
1589 | (unspec:<VPRED> | |
1590 | [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl") | |
1591 | (match_operand:SVE_F 2 "register_operand" "w, w") | |
1592 | (match_operand:SVE_F 3 "aarch64_simd_reg_or_zero" "Dz, w")] | |
1593 | SVE_COND_FP_CMP))] | |
1594 | "TARGET_SVE" | |
1595 | "@ | |
1596 | fcm<cmp_op>\t%0.<Vetype>, %1/z, %2.<Vetype>, #0.0 | |
1597 | fcm<cmp_op>\t%0.<Vetype>, %1/z, %2.<Vetype>, %3.<Vetype>" | |
1598 | ) | |
1599 | ||
8fa7f434 | 1600 | ;; vcond_mask operand order: true, false, mask |
1601 | ;; UNSPEC_SEL operand order: mask, true, false (as for VEC_COND_EXPR) | |
1602 | ;; SEL operand order: mask, true, false | |
1603 | (define_insn "vcond_mask_<mode><vpred>" | |
1604 | [(set (match_operand:SVE_ALL 0 "register_operand" "=w") | |
1605 | (unspec:SVE_ALL | |
1606 | [(match_operand:<VPRED> 3 "register_operand" "Upa") | |
1607 | (match_operand:SVE_ALL 1 "register_operand" "w") | |
1608 | (match_operand:SVE_ALL 2 "register_operand" "w")] | |
1609 | UNSPEC_SEL))] | |
1610 | "TARGET_SVE" | |
1611 | "sel\t%0.<Vetype>, %3, %1.<Vetype>, %2.<Vetype>" | |
1612 | ) | |
1613 | ||
1614 | ;; Selects between a duplicated immediate and zero. | |
1615 | (define_insn "aarch64_sve_dup<mode>_const" | |
1616 | [(set (match_operand:SVE_I 0 "register_operand" "=w") | |
1617 | (unspec:SVE_I | |
1618 | [(match_operand:<VPRED> 1 "register_operand" "Upl") | |
1619 | (match_operand:SVE_I 2 "aarch64_sve_dup_immediate") | |
1620 | (match_operand:SVE_I 3 "aarch64_simd_imm_zero")] | |
1621 | UNSPEC_SEL))] | |
1622 | "TARGET_SVE" | |
1623 | "mov\t%0.<Vetype>, %1/z, #%2" | |
1624 | ) | |
1625 | ||
1626 | ;; Integer (signed) vcond. Don't enforce an immediate range here, since it | |
1627 | ;; depends on the comparison; leave it to aarch64_expand_sve_vcond instead. | |
1628 | (define_expand "vcond<mode><v_int_equiv>" | |
1629 | [(set (match_operand:SVE_ALL 0 "register_operand") | |
1630 | (if_then_else:SVE_ALL | |
1631 | (match_operator 3 "comparison_operator" | |
1632 | [(match_operand:<V_INT_EQUIV> 4 "register_operand") | |
1633 | (match_operand:<V_INT_EQUIV> 5 "nonmemory_operand")]) | |
1634 | (match_operand:SVE_ALL 1 "register_operand") | |
1635 | (match_operand:SVE_ALL 2 "register_operand")))] | |
1636 | "TARGET_SVE" | |
1637 | { | |
1638 | aarch64_expand_sve_vcond (<MODE>mode, <V_INT_EQUIV>mode, operands); | |
1639 | DONE; | |
1640 | } | |
1641 | ) | |
1642 | ||
1643 | ;; Integer vcondu. Don't enforce an immediate range here, since it | |
1644 | ;; depends on the comparison; leave it to aarch64_expand_sve_vcond instead. | |
1645 | (define_expand "vcondu<mode><v_int_equiv>" | |
1646 | [(set (match_operand:SVE_ALL 0 "register_operand") | |
1647 | (if_then_else:SVE_ALL | |
1648 | (match_operator 3 "comparison_operator" | |
1649 | [(match_operand:<V_INT_EQUIV> 4 "register_operand") | |
1650 | (match_operand:<V_INT_EQUIV> 5 "nonmemory_operand")]) | |
1651 | (match_operand:SVE_ALL 1 "register_operand") | |
1652 | (match_operand:SVE_ALL 2 "register_operand")))] | |
1653 | "TARGET_SVE" | |
1654 | { | |
1655 | aarch64_expand_sve_vcond (<MODE>mode, <V_INT_EQUIV>mode, operands); | |
1656 | DONE; | |
1657 | } | |
1658 | ) | |
1659 | ||
1660 | ;; Floating-point vcond. All comparisons except FCMUO allow a zero | |
1661 | ;; operand; aarch64_expand_sve_vcond handles the case of an FCMUO | |
1662 | ;; with zero. | |
1663 | (define_expand "vcond<mode><v_fp_equiv>" | |
1664 | [(set (match_operand:SVE_SD 0 "register_operand") | |
1665 | (if_then_else:SVE_SD | |
1666 | (match_operator 3 "comparison_operator" | |
1667 | [(match_operand:<V_FP_EQUIV> 4 "register_operand") | |
1668 | (match_operand:<V_FP_EQUIV> 5 "aarch64_simd_reg_or_zero")]) | |
1669 | (match_operand:SVE_SD 1 "register_operand") | |
1670 | (match_operand:SVE_SD 2 "register_operand")))] | |
1671 | "TARGET_SVE" | |
1672 | { | |
1673 | aarch64_expand_sve_vcond (<MODE>mode, <V_FP_EQUIV>mode, operands); | |
1674 | DONE; | |
1675 | } | |
1676 | ) | |
1677 | ||
1678 | ;; Signed integer comparisons. Don't enforce an immediate range here, since | |
1679 | ;; it depends on the comparison; leave it to aarch64_expand_sve_vec_cmp_int | |
1680 | ;; instead. | |
1681 | (define_expand "vec_cmp<mode><vpred>" | |
1682 | [(parallel | |
1683 | [(set (match_operand:<VPRED> 0 "register_operand") | |
1684 | (match_operator:<VPRED> 1 "comparison_operator" | |
1685 | [(match_operand:SVE_I 2 "register_operand") | |
1686 | (match_operand:SVE_I 3 "nonmemory_operand")])) | |
1687 | (clobber (reg:CC CC_REGNUM))])] | |
1688 | "TARGET_SVE" | |
1689 | { | |
1690 | aarch64_expand_sve_vec_cmp_int (operands[0], GET_CODE (operands[1]), | |
1691 | operands[2], operands[3]); | |
1692 | DONE; | |
1693 | } | |
1694 | ) | |
1695 | ||
1696 | ;; Unsigned integer comparisons. Don't enforce an immediate range here, since | |
1697 | ;; it depends on the comparison; leave it to aarch64_expand_sve_vec_cmp_int | |
1698 | ;; instead. | |
1699 | (define_expand "vec_cmpu<mode><vpred>" | |
1700 | [(parallel | |
1701 | [(set (match_operand:<VPRED> 0 "register_operand") | |
1702 | (match_operator:<VPRED> 1 "comparison_operator" | |
1703 | [(match_operand:SVE_I 2 "register_operand") | |
1704 | (match_operand:SVE_I 3 "nonmemory_operand")])) | |
1705 | (clobber (reg:CC CC_REGNUM))])] | |
1706 | "TARGET_SVE" | |
1707 | { | |
1708 | aarch64_expand_sve_vec_cmp_int (operands[0], GET_CODE (operands[1]), | |
1709 | operands[2], operands[3]); | |
1710 | DONE; | |
1711 | } | |
1712 | ) | |
1713 | ||
1714 | ;; Floating-point comparisons. All comparisons except FCMUO allow a zero | |
1715 | ;; operand; aarch64_expand_sve_vec_cmp_float handles the case of an FCMUO | |
1716 | ;; with zero. | |
1717 | (define_expand "vec_cmp<mode><vpred>" | |
1718 | [(set (match_operand:<VPRED> 0 "register_operand") | |
1719 | (match_operator:<VPRED> 1 "comparison_operator" | |
1720 | [(match_operand:SVE_F 2 "register_operand") | |
1721 | (match_operand:SVE_F 3 "aarch64_simd_reg_or_zero")]))] | |
1722 | "TARGET_SVE" | |
1723 | { | |
1724 | aarch64_expand_sve_vec_cmp_float (operands[0], GET_CODE (operands[1]), | |
1725 | operands[2], operands[3], false); | |
1726 | DONE; | |
1727 | } | |
1728 | ) | |
1729 | ||
1730 | ;; Branch based on predicate equality or inequality. | |
1731 | (define_expand "cbranch<mode>4" | |
1732 | [(set (pc) | |
1733 | (if_then_else | |
1734 | (match_operator 0 "aarch64_equality_operator" | |
1735 | [(match_operand:PRED_ALL 1 "register_operand") | |
1736 | (match_operand:PRED_ALL 2 "aarch64_simd_reg_or_zero")]) | |
1737 | (label_ref (match_operand 3 "")) | |
1738 | (pc)))] | |
1739 | "" | |
1740 | { | |
f8e1b9c1 | 1741 | rtx ptrue = aarch64_ptrue_reg (<MODE>mode); |
8fa7f434 | 1742 | rtx pred; |
1743 | if (operands[2] == CONST0_RTX (<MODE>mode)) | |
1744 | pred = operands[1]; | |
1745 | else | |
1746 | { | |
1747 | pred = gen_reg_rtx (<MODE>mode); | |
1748 | emit_insn (gen_pred_xor<mode>3 (pred, ptrue, operands[1], | |
1749 | operands[2])); | |
1750 | } | |
1751 | emit_insn (gen_ptest_ptrue<mode> (ptrue, pred)); | |
1752 | operands[1] = gen_rtx_REG (CCmode, CC_REGNUM); | |
1753 | operands[2] = const0_rtx; | |
1754 | } | |
1755 | ) | |
1756 | ||
1757 | ;; Unpredicated integer MIN/MAX. | |
1758 | (define_expand "<su><maxmin><mode>3" | |
1759 | [(set (match_operand:SVE_I 0 "register_operand") | |
1760 | (unspec:SVE_I | |
1761 | [(match_dup 3) | |
1762 | (MAXMIN:SVE_I (match_operand:SVE_I 1 "register_operand") | |
1763 | (match_operand:SVE_I 2 "register_operand"))] | |
1764 | UNSPEC_MERGE_PTRUE))] | |
1765 | "TARGET_SVE" | |
1766 | { | |
f8e1b9c1 | 1767 | operands[3] = aarch64_ptrue_reg (<VPRED>mode); |
8fa7f434 | 1768 | } |
1769 | ) | |
1770 | ||
1771 | ;; Integer MIN/MAX predicated with a PTRUE. | |
1772 | (define_insn "*<su><maxmin><mode>3" | |
83d1ca63 | 1773 | [(set (match_operand:SVE_I 0 "register_operand" "=w, ?&w") |
8fa7f434 | 1774 | (unspec:SVE_I |
83d1ca63 | 1775 | [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl") |
1776 | (MAXMIN:SVE_I (match_operand:SVE_I 2 "register_operand" "%0, w") | |
1777 | (match_operand:SVE_I 3 "register_operand" "w, w"))] | |
8fa7f434 | 1778 | UNSPEC_MERGE_PTRUE))] |
1779 | "TARGET_SVE" | |
83d1ca63 | 1780 | "@ |
1781 | <su><maxmin>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype> | |
1782 | movprfx\t%0, %2\;<su><maxmin>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>" | |
1783 | [(set_attr "movprfx" "*,yes")] | |
8fa7f434 | 1784 | ) |
1785 | ||
1786 | ;; Unpredicated floating-point MIN/MAX. | |
1787 | (define_expand "<su><maxmin><mode>3" | |
1788 | [(set (match_operand:SVE_F 0 "register_operand") | |
1789 | (unspec:SVE_F | |
1790 | [(match_dup 3) | |
1791 | (FMAXMIN:SVE_F (match_operand:SVE_F 1 "register_operand") | |
1792 | (match_operand:SVE_F 2 "register_operand"))] | |
1793 | UNSPEC_MERGE_PTRUE))] | |
1794 | "TARGET_SVE" | |
1795 | { | |
f8e1b9c1 | 1796 | operands[3] = aarch64_ptrue_reg (<VPRED>mode); |
8fa7f434 | 1797 | } |
1798 | ) | |
1799 | ||
1800 | ;; Floating-point MIN/MAX predicated with a PTRUE. | |
1801 | (define_insn "*<su><maxmin><mode>3" | |
83d1ca63 | 1802 | [(set (match_operand:SVE_F 0 "register_operand" "=w, ?&w") |
8fa7f434 | 1803 | (unspec:SVE_F |
83d1ca63 | 1804 | [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl") |
1805 | (FMAXMIN:SVE_F (match_operand:SVE_F 2 "register_operand" "%0, w") | |
1806 | (match_operand:SVE_F 3 "register_operand" "w, w"))] | |
8fa7f434 | 1807 | UNSPEC_MERGE_PTRUE))] |
1808 | "TARGET_SVE" | |
83d1ca63 | 1809 | "@ |
1810 | f<maxmin>nm\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype> | |
1811 | movprfx\t%0, %2\;f<maxmin>nm\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>" | |
1812 | [(set_attr "movprfx" "*,yes")] | |
8fa7f434 | 1813 | ) |
1814 | ||
1815 | ;; Unpredicated fmin/fmax. | |
1816 | (define_expand "<maxmin_uns><mode>3" | |
1817 | [(set (match_operand:SVE_F 0 "register_operand") | |
1818 | (unspec:SVE_F | |
1819 | [(match_dup 3) | |
1820 | (unspec:SVE_F [(match_operand:SVE_F 1 "register_operand") | |
1821 | (match_operand:SVE_F 2 "register_operand")] | |
1822 | FMAXMIN_UNS)] | |
1823 | UNSPEC_MERGE_PTRUE))] | |
1824 | "TARGET_SVE" | |
1825 | { | |
f8e1b9c1 | 1826 | operands[3] = aarch64_ptrue_reg (<VPRED>mode); |
8fa7f434 | 1827 | } |
1828 | ) | |
1829 | ||
1830 | ;; fmin/fmax predicated with a PTRUE. | |
1831 | (define_insn "*<maxmin_uns><mode>3" | |
83d1ca63 | 1832 | [(set (match_operand:SVE_F 0 "register_operand" "=w, ?&w") |
8fa7f434 | 1833 | (unspec:SVE_F |
83d1ca63 | 1834 | [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl") |
1835 | (unspec:SVE_F [(match_operand:SVE_F 2 "register_operand" "%0, w") | |
1836 | (match_operand:SVE_F 3 "register_operand" "w, w")] | |
8fa7f434 | 1837 | FMAXMIN_UNS)] |
1838 | UNSPEC_MERGE_PTRUE))] | |
1839 | "TARGET_SVE" | |
83d1ca63 | 1840 | "@ |
1841 | <maxmin_uns_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype> | |
1842 | movprfx\t%0, %2\;<maxmin_uns_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>" | |
1843 | [(set_attr "movprfx" "*,yes")] | |
8fa7f434 | 1844 | ) |
1845 | ||
47c52435 | 1846 | ;; Predicated integer operations with select. |
1847 | (define_expand "cond_<optab><mode>" | |
1848 | [(set (match_operand:SVE_I 0 "register_operand") | |
1849 | (unspec:SVE_I | |
1850 | [(match_operand:<VPRED> 1 "register_operand") | |
1851 | (SVE_INT_BINARY:SVE_I | |
1852 | (match_operand:SVE_I 2 "register_operand") | |
1853 | (match_operand:SVE_I 3 "register_operand")) | |
83d1ca63 | 1854 | (match_operand:SVE_I 4 "aarch64_simd_reg_or_zero")] |
47c52435 | 1855 | UNSPEC_SEL))] |
1856 | "TARGET_SVE" | |
83d1ca63 | 1857 | ) |
47c52435 | 1858 | |
cdb4d5d0 | 1859 | (define_expand "cond_<optab><mode>" |
1860 | [(set (match_operand:SVE_SDI 0 "register_operand") | |
1861 | (unspec:SVE_SDI | |
1862 | [(match_operand:<VPRED> 1 "register_operand") | |
1863 | (SVE_INT_BINARY_SD:SVE_SDI | |
1864 | (match_operand:SVE_SDI 2 "register_operand") | |
1865 | (match_operand:SVE_SDI 3 "register_operand")) | |
83d1ca63 | 1866 | (match_operand:SVE_SDI 4 "aarch64_simd_reg_or_zero")] |
cdb4d5d0 | 1867 | UNSPEC_SEL))] |
1868 | "TARGET_SVE" | |
83d1ca63 | 1869 | ) |
cdb4d5d0 | 1870 | |
83d1ca63 | 1871 | ;; Predicated integer operations with select matching the first operand. |
1872 | (define_insn "*cond_<optab><mode>_2" | |
1873 | [(set (match_operand:SVE_I 0 "register_operand" "=w, ?&w") | |
1874 | (unspec:SVE_I | |
1875 | [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl") | |
1876 | (SVE_INT_BINARY:SVE_I | |
1877 | (match_operand:SVE_I 2 "register_operand" "0, w") | |
1878 | (match_operand:SVE_I 3 "register_operand" "w, w")) | |
1879 | (match_dup 2)] | |
1880 | UNSPEC_SEL))] | |
1881 | "TARGET_SVE" | |
1882 | "@ | |
1883 | <sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype> | |
1884 | movprfx\t%0, %2\;<sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>" | |
1885 | [(set_attr "movprfx" "*,yes")] | |
1886 | ) | |
1887 | ||
1888 | (define_insn "*cond_<optab><mode>_2" | |
1889 | [(set (match_operand:SVE_SDI 0 "register_operand" "=w, ?&w") | |
1890 | (unspec:SVE_SDI | |
1891 | [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl") | |
1892 | (SVE_INT_BINARY_SD:SVE_SDI | |
1893 | (match_operand:SVE_SDI 2 "register_operand" "0, w") | |
1894 | (match_operand:SVE_SDI 3 "register_operand" "w, w")) | |
1895 | (match_dup 2)] | |
1896 | UNSPEC_SEL))] | |
1897 | "TARGET_SVE" | |
1898 | "@ | |
1899 | <sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype> | |
1900 | movprfx\t%0, %2\;<sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>" | |
1901 | [(set_attr "movprfx" "*,yes")] | |
1902 | ) | |
1903 | ||
1904 | ;; Predicated integer operations with select matching the second operand. | |
1905 | (define_insn "*cond_<optab><mode>_3" | |
1906 | [(set (match_operand:SVE_I 0 "register_operand" "=w, ?&w") | |
1907 | (unspec:SVE_I | |
1908 | [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl") | |
1909 | (SVE_INT_BINARY:SVE_I | |
1910 | (match_operand:SVE_I 2 "register_operand" "w, w") | |
1911 | (match_operand:SVE_I 3 "register_operand" "0, w")) | |
1912 | (match_dup 3)] | |
1913 | UNSPEC_SEL))] | |
1914 | "TARGET_SVE" | |
1915 | "@ | |
1916 | <sve_int_op_rev>\t%0.<Vetype>, %1/m, %0.<Vetype>, %2.<Vetype> | |
1917 | movprfx\t%0, %3\;<sve_int_op_rev>\t%0.<Vetype>, %1/m, %0.<Vetype>, %2.<Vetype>" | |
1918 | [(set_attr "movprfx" "*,yes")] | |
1919 | ) | |
1920 | ||
1921 | (define_insn "*cond_<optab><mode>_3" | |
1922 | [(set (match_operand:SVE_SDI 0 "register_operand" "=w, ?&w") | |
1923 | (unspec:SVE_SDI | |
1924 | [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl") | |
1925 | (SVE_INT_BINARY_SD:SVE_SDI | |
1926 | (match_operand:SVE_SDI 2 "register_operand" "w, w") | |
1927 | (match_operand:SVE_SDI 3 "register_operand" "0, w")) | |
1928 | (match_dup 3)] | |
1929 | UNSPEC_SEL))] | |
1930 | "TARGET_SVE" | |
1931 | "@ | |
1932 | <sve_int_op_rev>\t%0.<Vetype>, %1/m, %0.<Vetype>, %2.<Vetype> | |
1933 | movprfx\t%0, %3\;<sve_int_op_rev>\t%0.<Vetype>, %1/m, %0.<Vetype>, %2.<Vetype>" | |
1934 | [(set_attr "movprfx" "*,yes")] | |
1935 | ) | |
1936 | ||
05049c9b | 1937 | ;; Predicated integer binary operations in which the values of inactive |
1938 | ;; lanes are distinct from the other inputs. | |
e8c17f8e | 1939 | (define_insn_and_rewrite "*cond_<optab><mode>_any" |
05049c9b | 1940 | [(set (match_operand:SVE_I 0 "register_operand" "=&w, &w, &w, &w, ?&w") |
dbc7e6ae | 1941 | (unspec:SVE_I |
05049c9b | 1942 | [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl, Upl, Upl") |
83d1ca63 | 1943 | (SVE_INT_BINARY:SVE_I |
05049c9b | 1944 | (match_operand:SVE_I 2 "register_operand" "0, w, w, w, w") |
1945 | (match_operand:SVE_I 3 "register_operand" "w, 0, w, w, w")) | |
1946 | (match_operand:SVE_I 4 "aarch64_simd_reg_or_zero" "Dz, Dz, Dz, 0, w")] | |
dbc7e6ae | 1947 | UNSPEC_SEL))] |
e8c17f8e | 1948 | "TARGET_SVE |
05049c9b | 1949 | && !rtx_equal_p (operands[2], operands[4]) |
1950 | && !rtx_equal_p (operands[3], operands[4])" | |
1951 | "@ | |
1952 | movprfx\t%0.<Vetype>, %1/z, %0.<Vetype>\;<sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype> | |
1953 | movprfx\t%0.<Vetype>, %1/z, %0.<Vetype>\;<sve_int_op_rev>\t%0.<Vetype>, %1/m, %0.<Vetype>, %2.<Vetype> | |
1954 | movprfx\t%0.<Vetype>, %1/z, %2.<Vetype>\;<sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype> | |
1955 | movprfx\t%0.<Vetype>, %1/m, %2.<Vetype>\;<sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype> | |
1956 | #" | |
1957 | "&& reload_completed | |
1958 | && register_operand (operands[4], <MODE>mode) | |
1959 | && !rtx_equal_p (operands[0], operands[4])" | |
e8c17f8e | 1960 | { |
1961 | emit_insn (gen_vcond_mask_<mode><vpred> (operands[0], operands[2], | |
1962 | operands[4], operands[1])); | |
1963 | operands[4] = operands[2] = operands[0]; | |
1964 | } | |
05049c9b | 1965 | [(set_attr "movprfx" "yes")] |
dbc7e6ae | 1966 | ) |
1967 | ||
e8c17f8e | 1968 | (define_insn_and_rewrite "*cond_<optab><mode>_any" |
05049c9b | 1969 | [(set (match_operand:SVE_SDI 0 "register_operand" "=&w, &w, &w, &w, ?&w") |
cdb4d5d0 | 1970 | (unspec:SVE_SDI |
05049c9b | 1971 | [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl, Upl, Upl") |
1972 | (SVE_INT_BINARY_SD:SVE_SDI | |
1973 | (match_operand:SVE_SDI 2 "register_operand" "0, w, w, w, w") | |
1974 | (match_operand:SVE_SDI 3 "register_operand" "w, 0, w, w, w")) | |
1975 | (match_operand:SVE_SDI 4 "aarch64_simd_reg_or_zero" "Dz, Dz, Dz, 0, w")] | |
cdb4d5d0 | 1976 | UNSPEC_SEL))] |
e8c17f8e | 1977 | "TARGET_SVE |
05049c9b | 1978 | && !rtx_equal_p (operands[2], operands[4]) |
1979 | && !rtx_equal_p (operands[3], operands[4])" | |
1980 | "@ | |
1981 | movprfx\t%0.<Vetype>, %1/z, %0.<Vetype>\;<sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype> | |
1982 | movprfx\t%0.<Vetype>, %1/z, %0.<Vetype>\;<sve_int_op_rev>\t%0.<Vetype>, %1/m, %0.<Vetype>, %2.<Vetype> | |
1983 | movprfx\t%0.<Vetype>, %1/z, %2.<Vetype>\;<sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype> | |
1984 | movprfx\t%0.<Vetype>, %1/m, %2.<Vetype>\;<sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype> | |
1985 | #" | |
1986 | "&& reload_completed | |
1987 | && register_operand (operands[4], <MODE>mode) | |
1988 | && !rtx_equal_p (operands[0], operands[4])" | |
e8c17f8e | 1989 | { |
1990 | emit_insn (gen_vcond_mask_<mode><vpred> (operands[0], operands[2], | |
1991 | operands[4], operands[1])); | |
1992 | operands[4] = operands[2] = operands[0]; | |
1993 | } | |
05049c9b | 1994 | [(set_attr "movprfx" "yes")] |
cdb4d5d0 | 1995 | ) |
1996 | ||
3bf95150 | 1997 | ;; Set operand 0 to the last active element in operand 3, or to tied |
1998 | ;; operand 1 if no elements are active. | |
1999 | (define_insn "fold_extract_last_<mode>" | |
2000 | [(set (match_operand:<VEL> 0 "register_operand" "=r, w") | |
2001 | (unspec:<VEL> | |
2002 | [(match_operand:<VEL> 1 "register_operand" "0, 0") | |
2003 | (match_operand:<VPRED> 2 "register_operand" "Upl, Upl") | |
2004 | (match_operand:SVE_ALL 3 "register_operand" "w, w")] | |
2005 | UNSPEC_CLASTB))] | |
2006 | "TARGET_SVE" | |
2007 | "@ | |
2008 | clastb\t%<vwcore>0, %2, %<vwcore>0, %3.<Vetype> | |
2009 | clastb\t%<vw>0, %2, %<vw>0, %3.<Vetype>" | |
2010 | ) | |
2011 | ||
8fa7f434 | 2012 | ;; Unpredicated integer add reduction. |
2013 | (define_expand "reduc_plus_scal_<mode>" | |
2014 | [(set (match_operand:<VEL> 0 "register_operand") | |
2015 | (unspec:<VEL> [(match_dup 2) | |
2016 | (match_operand:SVE_I 1 "register_operand")] | |
2017 | UNSPEC_ADDV))] | |
2018 | "TARGET_SVE" | |
2019 | { | |
f8e1b9c1 | 2020 | operands[2] = aarch64_ptrue_reg (<VPRED>mode); |
8fa7f434 | 2021 | } |
2022 | ) | |
2023 | ||
2024 | ;; Predicated integer add reduction. The result is always 64-bits. | |
2025 | (define_insn "*reduc_plus_scal_<mode>" | |
2026 | [(set (match_operand:<VEL> 0 "register_operand" "=w") | |
2027 | (unspec:<VEL> [(match_operand:<VPRED> 1 "register_operand" "Upl") | |
2028 | (match_operand:SVE_I 2 "register_operand" "w")] | |
2029 | UNSPEC_ADDV))] | |
2030 | "TARGET_SVE" | |
2031 | "uaddv\t%d0, %1, %2.<Vetype>" | |
2032 | ) | |
2033 | ||
2034 | ;; Unpredicated floating-point add reduction. | |
2035 | (define_expand "reduc_plus_scal_<mode>" | |
2036 | [(set (match_operand:<VEL> 0 "register_operand") | |
2037 | (unspec:<VEL> [(match_dup 2) | |
2038 | (match_operand:SVE_F 1 "register_operand")] | |
2039 | UNSPEC_FADDV))] | |
2040 | "TARGET_SVE" | |
2041 | { | |
f8e1b9c1 | 2042 | operands[2] = aarch64_ptrue_reg (<VPRED>mode); |
8fa7f434 | 2043 | } |
2044 | ) | |
2045 | ||
2046 | ;; Predicated floating-point add reduction. | |
2047 | (define_insn "*reduc_plus_scal_<mode>" | |
2048 | [(set (match_operand:<VEL> 0 "register_operand" "=w") | |
2049 | (unspec:<VEL> [(match_operand:<VPRED> 1 "register_operand" "Upl") | |
2050 | (match_operand:SVE_F 2 "register_operand" "w")] | |
2051 | UNSPEC_FADDV))] | |
2052 | "TARGET_SVE" | |
2053 | "faddv\t%<Vetype>0, %1, %2.<Vetype>" | |
2054 | ) | |
2055 | ||
2056 | ;; Unpredicated integer MIN/MAX reduction. | |
2057 | (define_expand "reduc_<maxmin_uns>_scal_<mode>" | |
2058 | [(set (match_operand:<VEL> 0 "register_operand") | |
2059 | (unspec:<VEL> [(match_dup 2) | |
2060 | (match_operand:SVE_I 1 "register_operand")] | |
2061 | MAXMINV))] | |
2062 | "TARGET_SVE" | |
2063 | { | |
f8e1b9c1 | 2064 | operands[2] = aarch64_ptrue_reg (<VPRED>mode); |
8fa7f434 | 2065 | } |
2066 | ) | |
2067 | ||
2068 | ;; Predicated integer MIN/MAX reduction. | |
2069 | (define_insn "*reduc_<maxmin_uns>_scal_<mode>" | |
2070 | [(set (match_operand:<VEL> 0 "register_operand" "=w") | |
2071 | (unspec:<VEL> [(match_operand:<VPRED> 1 "register_operand" "Upl") | |
2072 | (match_operand:SVE_I 2 "register_operand" "w")] | |
2073 | MAXMINV))] | |
2074 | "TARGET_SVE" | |
2075 | "<maxmin_uns_op>v\t%<Vetype>0, %1, %2.<Vetype>" | |
2076 | ) | |
2077 | ||
2078 | ;; Unpredicated floating-point MIN/MAX reduction. | |
2079 | (define_expand "reduc_<maxmin_uns>_scal_<mode>" | |
2080 | [(set (match_operand:<VEL> 0 "register_operand") | |
2081 | (unspec:<VEL> [(match_dup 2) | |
2082 | (match_operand:SVE_F 1 "register_operand")] | |
2083 | FMAXMINV))] | |
2084 | "TARGET_SVE" | |
2085 | { | |
f8e1b9c1 | 2086 | operands[2] = aarch64_ptrue_reg (<VPRED>mode); |
8fa7f434 | 2087 | } |
2088 | ) | |
2089 | ||
2090 | ;; Predicated floating-point MIN/MAX reduction. | |
2091 | (define_insn "*reduc_<maxmin_uns>_scal_<mode>" | |
2092 | [(set (match_operand:<VEL> 0 "register_operand" "=w") | |
2093 | (unspec:<VEL> [(match_operand:<VPRED> 1 "register_operand" "Upl") | |
2094 | (match_operand:SVE_F 2 "register_operand" "w")] | |
2095 | FMAXMINV))] | |
2096 | "TARGET_SVE" | |
2097 | "<maxmin_uns_op>v\t%<Vetype>0, %1, %2.<Vetype>" | |
2098 | ) | |
2099 | ||
216934f9 | 2100 | (define_expand "reduc_<optab>_scal_<mode>" |
2101 | [(set (match_operand:<VEL> 0 "register_operand") | |
2102 | (unspec:<VEL> [(match_dup 2) | |
2103 | (match_operand:SVE_I 1 "register_operand")] | |
2104 | BITWISEV))] | |
2105 | "TARGET_SVE" | |
2106 | { | |
f8e1b9c1 | 2107 | operands[2] = aarch64_ptrue_reg (<VPRED>mode); |
216934f9 | 2108 | } |
2109 | ) | |
2110 | ||
2111 | (define_insn "*reduc_<optab>_scal_<mode>" | |
2112 | [(set (match_operand:<VEL> 0 "register_operand" "=w") | |
2113 | (unspec:<VEL> [(match_operand:<VPRED> 1 "register_operand" "Upl") | |
2114 | (match_operand:SVE_I 2 "register_operand" "w")] | |
2115 | BITWISEV))] | |
2116 | "TARGET_SVE" | |
2117 | "<bit_reduc_op>\t%<Vetype>0, %1, %2.<Vetype>" | |
2118 | ) | |
2119 | ||
d77809a4 | 2120 | ;; Unpredicated in-order FP reductions. |
2121 | (define_expand "fold_left_plus_<mode>" | |
2122 | [(set (match_operand:<VEL> 0 "register_operand") | |
2123 | (unspec:<VEL> [(match_dup 3) | |
2124 | (match_operand:<VEL> 1 "register_operand") | |
2125 | (match_operand:SVE_F 2 "register_operand")] | |
2126 | UNSPEC_FADDA))] | |
2127 | "TARGET_SVE" | |
2128 | { | |
f8e1b9c1 | 2129 | operands[3] = aarch64_ptrue_reg (<VPRED>mode); |
d77809a4 | 2130 | } |
2131 | ) | |
2132 | ||
2133 | ;; In-order FP reductions predicated with PTRUE. | |
26fb5106 | 2134 | (define_insn "mask_fold_left_plus_<mode>" |
d77809a4 | 2135 | [(set (match_operand:<VEL> 0 "register_operand" "=w") |
26fb5106 | 2136 | (unspec:<VEL> [(match_operand:<VPRED> 3 "register_operand" "Upl") |
2137 | (match_operand:<VEL> 1 "register_operand" "0") | |
2138 | (match_operand:SVE_F 2 "register_operand" "w")] | |
d77809a4 | 2139 | UNSPEC_FADDA))] |
2140 | "TARGET_SVE" | |
26fb5106 | 2141 | "fadda\t%<Vetype>0, %3, %<Vetype>0, %2.<Vetype>" |
d77809a4 | 2142 | ) |
2143 | ||
2144 | ;; Predicated form of the above in-order reduction. | |
2145 | (define_insn "*pred_fold_left_plus_<mode>" | |
2146 | [(set (match_operand:<VEL> 0 "register_operand" "=w") | |
2147 | (unspec:<VEL> | |
2148 | [(match_operand:<VEL> 1 "register_operand" "0") | |
2149 | (unspec:SVE_F | |
2150 | [(match_operand:<VPRED> 2 "register_operand" "Upl") | |
2151 | (match_operand:SVE_F 3 "register_operand" "w") | |
2152 | (match_operand:SVE_F 4 "aarch64_simd_imm_zero")] | |
2153 | UNSPEC_SEL)] | |
2154 | UNSPEC_FADDA))] | |
2155 | "TARGET_SVE" | |
2156 | "fadda\t%<Vetype>0, %2, %<Vetype>0, %3.<Vetype>" | |
2157 | ) | |
2158 | ||
8fa7f434 | 2159 | ;; Unpredicated floating-point addition. |
2160 | (define_expand "add<mode>3" | |
2161 | [(set (match_operand:SVE_F 0 "register_operand") | |
2162 | (unspec:SVE_F | |
2163 | [(match_dup 3) | |
2164 | (plus:SVE_F | |
2165 | (match_operand:SVE_F 1 "register_operand") | |
2166 | (match_operand:SVE_F 2 "aarch64_sve_float_arith_with_sub_operand"))] | |
2167 | UNSPEC_MERGE_PTRUE))] | |
2168 | "TARGET_SVE" | |
2169 | { | |
f8e1b9c1 | 2170 | operands[3] = aarch64_ptrue_reg (<VPRED>mode); |
8fa7f434 | 2171 | } |
2172 | ) | |
2173 | ||
2174 | ;; Floating-point addition predicated with a PTRUE. | |
9e089468 | 2175 | (define_insn_and_split "*add<mode>3" |
8fa7f434 | 2176 | [(set (match_operand:SVE_F 0 "register_operand" "=w, w, w") |
2177 | (unspec:SVE_F | |
2178 | [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl") | |
2179 | (plus:SVE_F | |
2180 | (match_operand:SVE_F 2 "register_operand" "%0, 0, w") | |
2181 | (match_operand:SVE_F 3 "aarch64_sve_float_arith_with_sub_operand" "vsA, vsN, w"))] | |
2182 | UNSPEC_MERGE_PTRUE))] | |
2183 | "TARGET_SVE" | |
2184 | "@ | |
2185 | fadd\t%0.<Vetype>, %1/m, %0.<Vetype>, #%3 | |
2186 | fsub\t%0.<Vetype>, %1/m, %0.<Vetype>, #%N3 | |
9e089468 | 2187 | #" |
2188 | ; Split the unpredicated form after reload, so that we don't have | |
2189 | ; the unnecessary PTRUE. | |
2190 | "&& reload_completed | |
2191 | && register_operand (operands[3], <MODE>mode)" | |
2192 | [(set (match_dup 0) (plus:SVE_F (match_dup 2) (match_dup 3)))] | |
8fa7f434 | 2193 | ) |
2194 | ||
2195 | ;; Unpredicated floating-point subtraction. | |
2196 | (define_expand "sub<mode>3" | |
2197 | [(set (match_operand:SVE_F 0 "register_operand") | |
2198 | (unspec:SVE_F | |
2199 | [(match_dup 3) | |
2200 | (minus:SVE_F | |
2201 | (match_operand:SVE_F 1 "aarch64_sve_float_arith_operand") | |
2202 | (match_operand:SVE_F 2 "register_operand"))] | |
2203 | UNSPEC_MERGE_PTRUE))] | |
2204 | "TARGET_SVE" | |
2205 | { | |
f8e1b9c1 | 2206 | operands[3] = aarch64_ptrue_reg (<VPRED>mode); |
8fa7f434 | 2207 | } |
2208 | ) | |
2209 | ||
2210 | ;; Floating-point subtraction predicated with a PTRUE. | |
9e089468 | 2211 | (define_insn_and_split "*sub<mode>3" |
8fa7f434 | 2212 | [(set (match_operand:SVE_F 0 "register_operand" "=w, w, w, w") |
2213 | (unspec:SVE_F | |
2214 | [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl, Upl") | |
2215 | (minus:SVE_F | |
2216 | (match_operand:SVE_F 2 "aarch64_sve_float_arith_operand" "0, 0, vsA, w") | |
2217 | (match_operand:SVE_F 3 "aarch64_sve_float_arith_with_sub_operand" "vsA, vsN, 0, w"))] | |
2218 | UNSPEC_MERGE_PTRUE))] | |
2219 | "TARGET_SVE | |
2220 | && (register_operand (operands[2], <MODE>mode) | |
2221 | || register_operand (operands[3], <MODE>mode))" | |
2222 | "@ | |
2223 | fsub\t%0.<Vetype>, %1/m, %0.<Vetype>, #%3 | |
2224 | fadd\t%0.<Vetype>, %1/m, %0.<Vetype>, #%N3 | |
2225 | fsubr\t%0.<Vetype>, %1/m, %0.<Vetype>, #%2 | |
9e089468 | 2226 | #" |
2227 | ; Split the unpredicated form after reload, so that we don't have | |
2228 | ; the unnecessary PTRUE. | |
2229 | "&& reload_completed | |
2230 | && register_operand (operands[2], <MODE>mode) | |
2231 | && register_operand (operands[3], <MODE>mode)" | |
2232 | [(set (match_dup 0) (minus:SVE_F (match_dup 2) (match_dup 3)))] | |
8fa7f434 | 2233 | ) |
2234 | ||
2235 | ;; Unpredicated floating-point multiplication. | |
2236 | (define_expand "mul<mode>3" | |
2237 | [(set (match_operand:SVE_F 0 "register_operand") | |
2238 | (unspec:SVE_F | |
2239 | [(match_dup 3) | |
2240 | (mult:SVE_F | |
2241 | (match_operand:SVE_F 1 "register_operand") | |
2242 | (match_operand:SVE_F 2 "aarch64_sve_float_mul_operand"))] | |
2243 | UNSPEC_MERGE_PTRUE))] | |
2244 | "TARGET_SVE" | |
2245 | { | |
f8e1b9c1 | 2246 | operands[3] = aarch64_ptrue_reg (<VPRED>mode); |
8fa7f434 | 2247 | } |
2248 | ) | |
2249 | ||
2250 | ;; Floating-point multiplication predicated with a PTRUE. | |
9e089468 | 2251 | (define_insn_and_split "*mul<mode>3" |
8fa7f434 | 2252 | [(set (match_operand:SVE_F 0 "register_operand" "=w, w") |
2253 | (unspec:SVE_F | |
2254 | [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl") | |
2255 | (mult:SVE_F | |
2256 | (match_operand:SVE_F 2 "register_operand" "%0, w") | |
2257 | (match_operand:SVE_F 3 "aarch64_sve_float_mul_operand" "vsM, w"))] | |
2258 | UNSPEC_MERGE_PTRUE))] | |
2259 | "TARGET_SVE" | |
2260 | "@ | |
2261 | fmul\t%0.<Vetype>, %1/m, %0.<Vetype>, #%3 | |
9e089468 | 2262 | #" |
2263 | ; Split the unpredicated form after reload, so that we don't have | |
2264 | ; the unnecessary PTRUE. | |
2265 | "&& reload_completed | |
2266 | && register_operand (operands[3], <MODE>mode)" | |
2267 | [(set (match_dup 0) (mult:SVE_F (match_dup 2) (match_dup 3)))] | |
8fa7f434 | 2268 | ) |
2269 | ||
9e089468 | 2270 | ;; Unpredicated floating-point binary operations (post-RA only). |
2271 | ;; These are generated by splitting a predicated instruction whose | |
2272 | ;; predicate is unused. | |
2273 | (define_insn "*post_ra_<sve_fp_op><mode>3" | |
2274 | [(set (match_operand:SVE_F 0 "register_operand" "=w") | |
2275 | (SVE_UNPRED_FP_BINARY:SVE_F | |
2276 | (match_operand:SVE_F 1 "register_operand" "w") | |
2277 | (match_operand:SVE_F 2 "register_operand" "w")))] | |
2278 | "TARGET_SVE && reload_completed" | |
2279 | "<sve_fp_op>\t%0.<Vetype>, %1.<Vetype>, %2.<Vetype>") | |
2280 | ||
8fa7f434 | 2281 | ;; Unpredicated fma (%0 = (%1 * %2) + %3). |
2282 | (define_expand "fma<mode>4" | |
2283 | [(set (match_operand:SVE_F 0 "register_operand") | |
2284 | (unspec:SVE_F | |
2285 | [(match_dup 4) | |
2286 | (fma:SVE_F (match_operand:SVE_F 1 "register_operand") | |
2287 | (match_operand:SVE_F 2 "register_operand") | |
2288 | (match_operand:SVE_F 3 "register_operand"))] | |
2289 | UNSPEC_MERGE_PTRUE))] | |
2290 | "TARGET_SVE" | |
2291 | { | |
f8e1b9c1 | 2292 | operands[4] = aarch64_ptrue_reg (<VPRED>mode); |
8fa7f434 | 2293 | } |
2294 | ) | |
2295 | ||
2296 | ;; fma predicated with a PTRUE. | |
2297 | (define_insn "*fma<mode>4" | |
83d1ca63 | 2298 | [(set (match_operand:SVE_F 0 "register_operand" "=w, w, ?&w") |
8fa7f434 | 2299 | (unspec:SVE_F |
83d1ca63 | 2300 | [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl") |
2301 | (fma:SVE_F (match_operand:SVE_F 3 "register_operand" "%0, w, w") | |
2302 | (match_operand:SVE_F 4 "register_operand" "w, w, w") | |
2303 | (match_operand:SVE_F 2 "register_operand" "w, 0, w"))] | |
8fa7f434 | 2304 | UNSPEC_MERGE_PTRUE))] |
2305 | "TARGET_SVE" | |
2306 | "@ | |
2307 | fmad\t%0.<Vetype>, %1/m, %4.<Vetype>, %2.<Vetype> | |
83d1ca63 | 2308 | fmla\t%0.<Vetype>, %1/m, %3.<Vetype>, %4.<Vetype> |
2309 | movprfx\t%0, %2\;fmla\t%0.<Vetype>, %1/m, %3.<Vetype>, %4.<Vetype>" | |
2310 | [(set_attr "movprfx" "*,*,yes")] | |
8fa7f434 | 2311 | ) |
2312 | ||
2313 | ;; Unpredicated fnma (%0 = (-%1 * %2) + %3). | |
2314 | (define_expand "fnma<mode>4" | |
2315 | [(set (match_operand:SVE_F 0 "register_operand") | |
2316 | (unspec:SVE_F | |
2317 | [(match_dup 4) | |
2318 | (fma:SVE_F (neg:SVE_F | |
2319 | (match_operand:SVE_F 1 "register_operand")) | |
2320 | (match_operand:SVE_F 2 "register_operand") | |
2321 | (match_operand:SVE_F 3 "register_operand"))] | |
2322 | UNSPEC_MERGE_PTRUE))] | |
2323 | "TARGET_SVE" | |
2324 | { | |
f8e1b9c1 | 2325 | operands[4] = aarch64_ptrue_reg (<VPRED>mode); |
8fa7f434 | 2326 | } |
2327 | ) | |
2328 | ||
2329 | ;; fnma predicated with a PTRUE. | |
2330 | (define_insn "*fnma<mode>4" | |
83d1ca63 | 2331 | [(set (match_operand:SVE_F 0 "register_operand" "=w, w, ?&w") |
8fa7f434 | 2332 | (unspec:SVE_F |
83d1ca63 | 2333 | [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl") |
8fa7f434 | 2334 | (fma:SVE_F (neg:SVE_F |
83d1ca63 | 2335 | (match_operand:SVE_F 3 "register_operand" "%0, w, w")) |
2336 | (match_operand:SVE_F 4 "register_operand" "w, w, w") | |
2337 | (match_operand:SVE_F 2 "register_operand" "w, 0, w"))] | |
8fa7f434 | 2338 | UNSPEC_MERGE_PTRUE))] |
2339 | "TARGET_SVE" | |
2340 | "@ | |
2341 | fmsb\t%0.<Vetype>, %1/m, %4.<Vetype>, %2.<Vetype> | |
83d1ca63 | 2342 | fmls\t%0.<Vetype>, %1/m, %3.<Vetype>, %4.<Vetype> |
2343 | movprfx\t%0, %2\;fmls\t%0.<Vetype>, %1/m, %3.<Vetype>, %4.<Vetype>" | |
2344 | [(set_attr "movprfx" "*,*,yes")] | |
8fa7f434 | 2345 | ) |
2346 | ||
2347 | ;; Unpredicated fms (%0 = (%1 * %2) - %3). | |
2348 | (define_expand "fms<mode>4" | |
2349 | [(set (match_operand:SVE_F 0 "register_operand") | |
2350 | (unspec:SVE_F | |
2351 | [(match_dup 4) | |
2352 | (fma:SVE_F (match_operand:SVE_F 1 "register_operand") | |
2353 | (match_operand:SVE_F 2 "register_operand") | |
2354 | (neg:SVE_F | |
2355 | (match_operand:SVE_F 3 "register_operand")))] | |
2356 | UNSPEC_MERGE_PTRUE))] | |
2357 | "TARGET_SVE" | |
2358 | { | |
f8e1b9c1 | 2359 | operands[4] = aarch64_ptrue_reg (<VPRED>mode); |
8fa7f434 | 2360 | } |
2361 | ) | |
2362 | ||
2363 | ;; fms predicated with a PTRUE. | |
2364 | (define_insn "*fms<mode>4" | |
83d1ca63 | 2365 | [(set (match_operand:SVE_F 0 "register_operand" "=w, w, ?&w") |
8fa7f434 | 2366 | (unspec:SVE_F |
83d1ca63 | 2367 | [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl") |
2368 | (fma:SVE_F (match_operand:SVE_F 3 "register_operand" "%0, w, w") | |
2369 | (match_operand:SVE_F 4 "register_operand" "w, w, w") | |
8fa7f434 | 2370 | (neg:SVE_F |
83d1ca63 | 2371 | (match_operand:SVE_F 2 "register_operand" "w, 0, w")))] |
8fa7f434 | 2372 | UNSPEC_MERGE_PTRUE))] |
2373 | "TARGET_SVE" | |
2374 | "@ | |
2375 | fnmsb\t%0.<Vetype>, %1/m, %4.<Vetype>, %2.<Vetype> | |
83d1ca63 | 2376 | fnmls\t%0.<Vetype>, %1/m, %3.<Vetype>, %4.<Vetype> |
2377 | movprfx\t%0, %2\;fnmls\t%0.<Vetype>, %1/m, %3.<Vetype>, %4.<Vetype>" | |
2378 | [(set_attr "movprfx" "*,*,yes")] | |
8fa7f434 | 2379 | ) |
2380 | ||
2381 | ;; Unpredicated fnms (%0 = (-%1 * %2) - %3). | |
2382 | (define_expand "fnms<mode>4" | |
2383 | [(set (match_operand:SVE_F 0 "register_operand") | |
2384 | (unspec:SVE_F | |
2385 | [(match_dup 4) | |
2386 | (fma:SVE_F (neg:SVE_F | |
2387 | (match_operand:SVE_F 1 "register_operand")) | |
2388 | (match_operand:SVE_F 2 "register_operand") | |
2389 | (neg:SVE_F | |
2390 | (match_operand:SVE_F 3 "register_operand")))] | |
2391 | UNSPEC_MERGE_PTRUE))] | |
2392 | "TARGET_SVE" | |
2393 | { | |
f8e1b9c1 | 2394 | operands[4] = aarch64_ptrue_reg (<VPRED>mode); |
8fa7f434 | 2395 | } |
2396 | ) | |
2397 | ||
2398 | ;; fnms predicated with a PTRUE. | |
2399 | (define_insn "*fnms<mode>4" | |
83d1ca63 | 2400 | [(set (match_operand:SVE_F 0 "register_operand" "=w, w, ?&w") |
8fa7f434 | 2401 | (unspec:SVE_F |
83d1ca63 | 2402 | [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl") |
8fa7f434 | 2403 | (fma:SVE_F (neg:SVE_F |
83d1ca63 | 2404 | (match_operand:SVE_F 3 "register_operand" "%0, w, w")) |
2405 | (match_operand:SVE_F 4 "register_operand" "w, w, w") | |
8fa7f434 | 2406 | (neg:SVE_F |
83d1ca63 | 2407 | (match_operand:SVE_F 2 "register_operand" "w, 0, w")))] |
8fa7f434 | 2408 | UNSPEC_MERGE_PTRUE))] |
2409 | "TARGET_SVE" | |
2410 | "@ | |
2411 | fnmad\t%0.<Vetype>, %1/m, %4.<Vetype>, %2.<Vetype> | |
83d1ca63 | 2412 | fnmla\t%0.<Vetype>, %1/m, %3.<Vetype>, %4.<Vetype> |
2413 | movprfx\t%0, %2\;fnmla\t%0.<Vetype>, %1/m, %3.<Vetype>, %4.<Vetype>" | |
2414 | [(set_attr "movprfx" "*,*,yes")] | |
8fa7f434 | 2415 | ) |
2416 | ||
2417 | ;; Unpredicated floating-point division. | |
2418 | (define_expand "div<mode>3" | |
2419 | [(set (match_operand:SVE_F 0 "register_operand") | |
2420 | (unspec:SVE_F | |
2421 | [(match_dup 3) | |
2422 | (div:SVE_F (match_operand:SVE_F 1 "register_operand") | |
2423 | (match_operand:SVE_F 2 "register_operand"))] | |
2424 | UNSPEC_MERGE_PTRUE))] | |
2425 | "TARGET_SVE" | |
2426 | { | |
f8e1b9c1 | 2427 | operands[3] = aarch64_ptrue_reg (<VPRED>mode); |
8fa7f434 | 2428 | } |
2429 | ) | |
2430 | ||
2431 | ;; Floating-point division predicated with a PTRUE. | |
2432 | (define_insn "*div<mode>3" | |
83d1ca63 | 2433 | [(set (match_operand:SVE_F 0 "register_operand" "=w, w, ?&w") |
8fa7f434 | 2434 | (unspec:SVE_F |
83d1ca63 | 2435 | [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl") |
2436 | (div:SVE_F (match_operand:SVE_F 2 "register_operand" "0, w, w") | |
2437 | (match_operand:SVE_F 3 "register_operand" "w, 0, w"))] | |
8fa7f434 | 2438 | UNSPEC_MERGE_PTRUE))] |
2439 | "TARGET_SVE" | |
2440 | "@ | |
2441 | fdiv\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype> | |
83d1ca63 | 2442 | fdivr\t%0.<Vetype>, %1/m, %0.<Vetype>, %2.<Vetype> |
2443 | movprfx\t%0, %2\;fdiv\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>" | |
2444 | [(set_attr "movprfx" "*,*,yes")] | |
8fa7f434 | 2445 | ) |
2446 | ||
2447 | ;; Unpredicated FNEG, FABS and FSQRT. | |
2448 | (define_expand "<optab><mode>2" | |
2449 | [(set (match_operand:SVE_F 0 "register_operand") | |
2450 | (unspec:SVE_F | |
2451 | [(match_dup 2) | |
2452 | (SVE_FP_UNARY:SVE_F (match_operand:SVE_F 1 "register_operand"))] | |
2453 | UNSPEC_MERGE_PTRUE))] | |
2454 | "TARGET_SVE" | |
2455 | { | |
f8e1b9c1 | 2456 | operands[2] = aarch64_ptrue_reg (<VPRED>mode); |
8fa7f434 | 2457 | } |
2458 | ) | |
2459 | ||
2460 | ;; FNEG, FABS and FSQRT predicated with a PTRUE. | |
2461 | (define_insn "*<optab><mode>2" | |
2462 | [(set (match_operand:SVE_F 0 "register_operand" "=w") | |
2463 | (unspec:SVE_F | |
2464 | [(match_operand:<VPRED> 1 "register_operand" "Upl") | |
2465 | (SVE_FP_UNARY:SVE_F (match_operand:SVE_F 2 "register_operand" "w"))] | |
2466 | UNSPEC_MERGE_PTRUE))] | |
2467 | "TARGET_SVE" | |
2468 | "<sve_fp_op>\t%0.<Vetype>, %1/m, %2.<Vetype>" | |
2469 | ) | |
2470 | ||
d5b52410 | 2471 | (define_insn "*fabd<mode>3" |
2472 | [(set (match_operand:SVE_F 0 "register_operand" "=w") | |
2473 | (unspec:SVE_F | |
2474 | [(match_operand:<VPRED> 1 "register_operand" "Upl") | |
2475 | (abs:SVE_F | |
2476 | (minus:SVE_F | |
2477 | (match_operand:SVE_F 2 "register_operand" "0") | |
2478 | (match_operand:SVE_F 3 "register_operand" "w")))] | |
2479 | UNSPEC_MERGE_PTRUE))] | |
2480 | "TARGET_SVE" | |
2481 | "fabd\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>" | |
2482 | ) | |
2483 | ||
8fa7f434 | 2484 | ;; Unpredicated FRINTy. |
2485 | (define_expand "<frint_pattern><mode>2" | |
2486 | [(set (match_operand:SVE_F 0 "register_operand") | |
2487 | (unspec:SVE_F | |
2488 | [(match_dup 2) | |
2489 | (unspec:SVE_F [(match_operand:SVE_F 1 "register_operand")] | |
2490 | FRINT)] | |
2491 | UNSPEC_MERGE_PTRUE))] | |
2492 | "TARGET_SVE" | |
2493 | { | |
f8e1b9c1 | 2494 | operands[2] = aarch64_ptrue_reg (<VPRED>mode); |
8fa7f434 | 2495 | } |
2496 | ) | |
2497 | ||
2498 | ;; FRINTy predicated with a PTRUE. | |
2499 | (define_insn "*<frint_pattern><mode>2" | |
2500 | [(set (match_operand:SVE_F 0 "register_operand" "=w") | |
2501 | (unspec:SVE_F | |
2502 | [(match_operand:<VPRED> 1 "register_operand" "Upl") | |
2503 | (unspec:SVE_F [(match_operand:SVE_F 2 "register_operand" "w")] | |
2504 | FRINT)] | |
2505 | UNSPEC_MERGE_PTRUE))] | |
2506 | "TARGET_SVE" | |
2507 | "frint<frint_suffix>\t%0.<Vetype>, %1/m, %2.<Vetype>" | |
2508 | ) | |
2509 | ||
2510 | ;; Unpredicated conversion of floats to integers of the same size (HF to HI, | |
2511 | ;; SF to SI or DF to DI). | |
2512 | (define_expand "<fix_trunc_optab><mode><v_int_equiv>2" | |
2513 | [(set (match_operand:<V_INT_EQUIV> 0 "register_operand") | |
2514 | (unspec:<V_INT_EQUIV> | |
2515 | [(match_dup 2) | |
2516 | (FIXUORS:<V_INT_EQUIV> | |
2517 | (match_operand:SVE_F 1 "register_operand"))] | |
2518 | UNSPEC_MERGE_PTRUE))] | |
2519 | "TARGET_SVE" | |
2520 | { | |
f8e1b9c1 | 2521 | operands[2] = aarch64_ptrue_reg (<VPRED>mode); |
8fa7f434 | 2522 | } |
2523 | ) | |
2524 | ||
2525 | ;; Conversion of SF to DI, SI or HI, predicated with a PTRUE. | |
2526 | (define_insn "*<fix_trunc_optab>v16hsf<mode>2" | |
2527 | [(set (match_operand:SVE_HSDI 0 "register_operand" "=w") | |
2528 | (unspec:SVE_HSDI | |
2529 | [(match_operand:<VPRED> 1 "register_operand" "Upl") | |
2530 | (FIXUORS:SVE_HSDI | |
2531 | (match_operand:VNx8HF 2 "register_operand" "w"))] | |
2532 | UNSPEC_MERGE_PTRUE))] | |
2533 | "TARGET_SVE" | |
2534 | "fcvtz<su>\t%0.<Vetype>, %1/m, %2.h" | |
2535 | ) | |
2536 | ||
2537 | ;; Conversion of SF to DI or SI, predicated with a PTRUE. | |
2538 | (define_insn "*<fix_trunc_optab>vnx4sf<mode>2" | |
2539 | [(set (match_operand:SVE_SDI 0 "register_operand" "=w") | |
2540 | (unspec:SVE_SDI | |
2541 | [(match_operand:<VPRED> 1 "register_operand" "Upl") | |
2542 | (FIXUORS:SVE_SDI | |
2543 | (match_operand:VNx4SF 2 "register_operand" "w"))] | |
2544 | UNSPEC_MERGE_PTRUE))] | |
2545 | "TARGET_SVE" | |
2546 | "fcvtz<su>\t%0.<Vetype>, %1/m, %2.s" | |
2547 | ) | |
2548 | ||
2549 | ;; Conversion of DF to DI or SI, predicated with a PTRUE. | |
2550 | (define_insn "*<fix_trunc_optab>vnx2df<mode>2" | |
2551 | [(set (match_operand:SVE_SDI 0 "register_operand" "=w") | |
2552 | (unspec:SVE_SDI | |
2553 | [(match_operand:VNx2BI 1 "register_operand" "Upl") | |
2554 | (FIXUORS:SVE_SDI | |
2555 | (match_operand:VNx2DF 2 "register_operand" "w"))] | |
2556 | UNSPEC_MERGE_PTRUE))] | |
2557 | "TARGET_SVE" | |
2558 | "fcvtz<su>\t%0.<Vetype>, %1/m, %2.d" | |
2559 | ) | |
2560 | ||
2561 | ;; Unpredicated conversion of integers to floats of the same size | |
2562 | ;; (HI to HF, SI to SF or DI to DF). | |
2563 | (define_expand "<optab><v_int_equiv><mode>2" | |
2564 | [(set (match_operand:SVE_F 0 "register_operand") | |
2565 | (unspec:SVE_F | |
2566 | [(match_dup 2) | |
2567 | (FLOATUORS:SVE_F | |
2568 | (match_operand:<V_INT_EQUIV> 1 "register_operand"))] | |
2569 | UNSPEC_MERGE_PTRUE))] | |
2570 | "TARGET_SVE" | |
2571 | { | |
f8e1b9c1 | 2572 | operands[2] = aarch64_ptrue_reg (<VPRED>mode); |
8fa7f434 | 2573 | } |
2574 | ) | |
2575 | ||
2576 | ;; Conversion of DI, SI or HI to the same number of HFs, predicated | |
2577 | ;; with a PTRUE. | |
2578 | (define_insn "*<optab><mode>vnx8hf2" | |
2579 | [(set (match_operand:VNx8HF 0 "register_operand" "=w") | |
2580 | (unspec:VNx8HF | |
2581 | [(match_operand:<VPRED> 1 "register_operand" "Upl") | |
2582 | (FLOATUORS:VNx8HF | |
2583 | (match_operand:SVE_HSDI 2 "register_operand" "w"))] | |
2584 | UNSPEC_MERGE_PTRUE))] | |
2585 | "TARGET_SVE" | |
2586 | "<su_optab>cvtf\t%0.h, %1/m, %2.<Vetype>" | |
2587 | ) | |
2588 | ||
2589 | ;; Conversion of DI or SI to the same number of SFs, predicated with a PTRUE. | |
2590 | (define_insn "*<optab><mode>vnx4sf2" | |
2591 | [(set (match_operand:VNx4SF 0 "register_operand" "=w") | |
2592 | (unspec:VNx4SF | |
2593 | [(match_operand:<VPRED> 1 "register_operand" "Upl") | |
2594 | (FLOATUORS:VNx4SF | |
2595 | (match_operand:SVE_SDI 2 "register_operand" "w"))] | |
2596 | UNSPEC_MERGE_PTRUE))] | |
2597 | "TARGET_SVE" | |
2598 | "<su_optab>cvtf\t%0.s, %1/m, %2.<Vetype>" | |
2599 | ) | |
2600 | ||
2601 | ;; Conversion of DI or SI to DF, predicated with a PTRUE. | |
88e81b08 | 2602 | (define_insn "aarch64_sve_<optab><mode>vnx2df2" |
8fa7f434 | 2603 | [(set (match_operand:VNx2DF 0 "register_operand" "=w") |
2604 | (unspec:VNx2DF | |
2605 | [(match_operand:VNx2BI 1 "register_operand" "Upl") | |
2606 | (FLOATUORS:VNx2DF | |
2607 | (match_operand:SVE_SDI 2 "register_operand" "w"))] | |
2608 | UNSPEC_MERGE_PTRUE))] | |
2609 | "TARGET_SVE" | |
2610 | "<su_optab>cvtf\t%0.d, %1/m, %2.<Vetype>" | |
2611 | ) | |
2612 | ||
2613 | ;; Conversion of DFs to the same number of SFs, or SFs to the same number | |
2614 | ;; of HFs. | |
2615 | (define_insn "*trunc<Vwide><mode>2" | |
2616 | [(set (match_operand:SVE_HSF 0 "register_operand" "=w") | |
2617 | (unspec:SVE_HSF | |
2618 | [(match_operand:<VWIDE_PRED> 1 "register_operand" "Upl") | |
2619 | (unspec:SVE_HSF | |
2620 | [(match_operand:<VWIDE> 2 "register_operand" "w")] | |
2621 | UNSPEC_FLOAT_CONVERT)] | |
2622 | UNSPEC_MERGE_PTRUE))] | |
2623 | "TARGET_SVE" | |
2624 | "fcvt\t%0.<Vetype>, %1/m, %2.<Vewtype>" | |
2625 | ) | |
2626 | ||
2627 | ;; Conversion of SFs to the same number of DFs, or HFs to the same number | |
2628 | ;; of SFs. | |
88e81b08 | 2629 | (define_insn "aarch64_sve_extend<mode><Vwide>2" |
8fa7f434 | 2630 | [(set (match_operand:<VWIDE> 0 "register_operand" "=w") |
2631 | (unspec:<VWIDE> | |
2632 | [(match_operand:<VWIDE_PRED> 1 "register_operand" "Upl") | |
2633 | (unspec:<VWIDE> | |
2634 | [(match_operand:SVE_HSF 2 "register_operand" "w")] | |
2635 | UNSPEC_FLOAT_CONVERT)] | |
2636 | UNSPEC_MERGE_PTRUE))] | |
2637 | "TARGET_SVE" | |
2638 | "fcvt\t%0.<Vewtype>, %1/m, %2.<Vetype>" | |
2639 | ) | |
2640 | ||
88e81b08 | 2641 | ;; Unpack the low or high half of a predicate, where "high" refers to |
2642 | ;; the low-numbered lanes for big-endian and the high-numbered lanes | |
2643 | ;; for little-endian. | |
2644 | (define_expand "vec_unpack<su>_<perm_hilo>_<mode>" | |
2645 | [(match_operand:<VWIDE> 0 "register_operand") | |
2646 | (unspec:<VWIDE> [(match_operand:PRED_BHS 1 "register_operand")] | |
2647 | UNPACK)] | |
2648 | "TARGET_SVE" | |
2649 | { | |
2650 | emit_insn ((<hi_lanes_optab> | |
2651 | ? gen_aarch64_sve_punpkhi_<PRED_BHS:mode> | |
2652 | : gen_aarch64_sve_punpklo_<PRED_BHS:mode>) | |
2653 | (operands[0], operands[1])); | |
2654 | DONE; | |
2655 | } | |
2656 | ) | |
2657 | ||
8fa7f434 | 2658 | ;; PUNPKHI and PUNPKLO. |
88e81b08 | 2659 | (define_insn "aarch64_sve_punpk<perm_hilo>_<mode>" |
8fa7f434 | 2660 | [(set (match_operand:<VWIDE> 0 "register_operand" "=Upa") |
2661 | (unspec:<VWIDE> [(match_operand:PRED_BHS 1 "register_operand" "Upa")] | |
88e81b08 | 2662 | UNPACK_UNSIGNED))] |
8fa7f434 | 2663 | "TARGET_SVE" |
2664 | "punpk<perm_hilo>\t%0.h, %1.b" | |
2665 | ) | |
2666 | ||
88e81b08 | 2667 | ;; Unpack the low or high half of a vector, where "high" refers to |
2668 | ;; the low-numbered lanes for big-endian and the high-numbered lanes | |
2669 | ;; for little-endian. | |
2670 | (define_expand "vec_unpack<su>_<perm_hilo>_<SVE_BHSI:mode>" | |
2671 | [(match_operand:<VWIDE> 0 "register_operand") | |
2672 | (unspec:<VWIDE> [(match_operand:SVE_BHSI 1 "register_operand")] UNPACK)] | |
2673 | "TARGET_SVE" | |
2674 | { | |
2675 | emit_insn ((<hi_lanes_optab> | |
2676 | ? gen_aarch64_sve_<su>unpkhi_<SVE_BHSI:mode> | |
2677 | : gen_aarch64_sve_<su>unpklo_<SVE_BHSI:mode>) | |
2678 | (operands[0], operands[1])); | |
2679 | DONE; | |
2680 | } | |
2681 | ) | |
2682 | ||
8fa7f434 | 2683 | ;; SUNPKHI, UUNPKHI, SUNPKLO and UUNPKLO. |
88e81b08 | 2684 | (define_insn "aarch64_sve_<su>unpk<perm_hilo>_<SVE_BHSI:mode>" |
8fa7f434 | 2685 | [(set (match_operand:<VWIDE> 0 "register_operand" "=w") |
2686 | (unspec:<VWIDE> [(match_operand:SVE_BHSI 1 "register_operand" "w")] | |
2687 | UNPACK))] | |
2688 | "TARGET_SVE" | |
2689 | "<su>unpk<perm_hilo>\t%0.<Vewtype>, %1.<Vetype>" | |
2690 | ) | |
2691 | ||
8fa7f434 | 2692 | ;; Unpack one half of a VNx4SF to VNx2DF, or one half of a VNx8HF to VNx4SF. |
2693 | ;; First unpack the source without conversion, then float-convert the | |
2694 | ;; unpacked source. | |
2695 | (define_expand "vec_unpacks_<perm_hilo>_<mode>" | |
88e81b08 | 2696 | [(match_operand:<VWIDE> 0 "register_operand") |
2697 | (unspec:SVE_HSF [(match_operand:SVE_HSF 1 "register_operand")] | |
2698 | UNPACK_UNSIGNED)] | |
8fa7f434 | 2699 | "TARGET_SVE" |
2700 | { | |
88e81b08 | 2701 | /* Use ZIP to do the unpack, since we don't care about the upper halves |
2702 | and since it has the nice property of not needing any subregs. | |
2703 | If using UUNPK* turns out to be preferable, we could model it as | |
2704 | a ZIP whose first operand is zero. */ | |
2705 | rtx temp = gen_reg_rtx (<MODE>mode); | |
2706 | emit_insn ((<hi_lanes_optab> | |
2707 | ? gen_aarch64_sve_zip2<mode> | |
2708 | : gen_aarch64_sve_zip1<mode>) | |
2709 | (temp, operands[1], operands[1])); | |
f8e1b9c1 | 2710 | rtx ptrue = aarch64_ptrue_reg (<VWIDE_PRED>mode); |
88e81b08 | 2711 | emit_insn (gen_aarch64_sve_extend<mode><Vwide>2 (operands[0], |
2712 | ptrue, temp)); | |
2713 | DONE; | |
8fa7f434 | 2714 | } |
2715 | ) | |
2716 | ||
2717 | ;; Unpack one half of a VNx4SI to VNx2DF. First unpack from VNx4SI | |
2718 | ;; to VNx2DI, reinterpret the VNx2DI as a VNx4SI, then convert the | |
2719 | ;; unpacked VNx4SI to VNx2DF. | |
2720 | (define_expand "vec_unpack<su_optab>_float_<perm_hilo>_vnx4si" | |
88e81b08 | 2721 | [(match_operand:VNx2DF 0 "register_operand") |
2722 | (FLOATUORS:VNx2DF | |
2723 | (unspec:VNx2DI [(match_operand:VNx4SI 1 "register_operand")] | |
2724 | UNPACK_UNSIGNED))] | |
2725 | "TARGET_SVE" | |
2726 | { | |
2727 | /* Use ZIP to do the unpack, since we don't care about the upper halves | |
2728 | and since it has the nice property of not needing any subregs. | |
2729 | If using UUNPK* turns out to be preferable, we could model it as | |
2730 | a ZIP whose first operand is zero. */ | |
2731 | rtx temp = gen_reg_rtx (VNx4SImode); | |
2732 | emit_insn ((<hi_lanes_optab> | |
2733 | ? gen_aarch64_sve_zip2vnx4si | |
2734 | : gen_aarch64_sve_zip1vnx4si) | |
2735 | (temp, operands[1], operands[1])); | |
f8e1b9c1 | 2736 | rtx ptrue = aarch64_ptrue_reg (VNx2BImode); |
88e81b08 | 2737 | emit_insn (gen_aarch64_sve_<FLOATUORS:optab>vnx4sivnx2df2 (operands[0], |
2738 | ptrue, temp)); | |
2739 | DONE; | |
8fa7f434 | 2740 | } |
2741 | ) | |
2742 | ||
2743 | ;; Predicate pack. Use UZP1 on the narrower type, which discards | |
2744 | ;; the high part of each wide element. | |
2745 | (define_insn "vec_pack_trunc_<Vwide>" | |
2746 | [(set (match_operand:PRED_BHS 0 "register_operand" "=Upa") | |
2747 | (unspec:PRED_BHS | |
2748 | [(match_operand:<VWIDE> 1 "register_operand" "Upa") | |
2749 | (match_operand:<VWIDE> 2 "register_operand" "Upa")] | |
2750 | UNSPEC_PACK))] | |
2751 | "TARGET_SVE" | |
2752 | "uzp1\t%0.<Vetype>, %1.<Vetype>, %2.<Vetype>" | |
2753 | ) | |
2754 | ||
2755 | ;; Integer pack. Use UZP1 on the narrower type, which discards | |
2756 | ;; the high part of each wide element. | |
2757 | (define_insn "vec_pack_trunc_<Vwide>" | |
2758 | [(set (match_operand:SVE_BHSI 0 "register_operand" "=w") | |
2759 | (unspec:SVE_BHSI | |
2760 | [(match_operand:<VWIDE> 1 "register_operand" "w") | |
2761 | (match_operand:<VWIDE> 2 "register_operand" "w")] | |
2762 | UNSPEC_PACK))] | |
2763 | "TARGET_SVE" | |
2764 | "uzp1\t%0.<Vetype>, %1.<Vetype>, %2.<Vetype>" | |
2765 | ) | |
2766 | ||
2767 | ;; Convert two vectors of DF to SF, or two vectors of SF to HF, and pack | |
2768 | ;; the results into a single vector. | |
2769 | (define_expand "vec_pack_trunc_<Vwide>" | |
2770 | [(set (match_dup 4) | |
2771 | (unspec:SVE_HSF | |
2772 | [(match_dup 3) | |
2773 | (unspec:SVE_HSF [(match_operand:<VWIDE> 1 "register_operand")] | |
2774 | UNSPEC_FLOAT_CONVERT)] | |
2775 | UNSPEC_MERGE_PTRUE)) | |
2776 | (set (match_dup 5) | |
2777 | (unspec:SVE_HSF | |
2778 | [(match_dup 3) | |
2779 | (unspec:SVE_HSF [(match_operand:<VWIDE> 2 "register_operand")] | |
2780 | UNSPEC_FLOAT_CONVERT)] | |
2781 | UNSPEC_MERGE_PTRUE)) | |
2782 | (set (match_operand:SVE_HSF 0 "register_operand") | |
2783 | (unspec:SVE_HSF [(match_dup 4) (match_dup 5)] UNSPEC_UZP1))] | |
2784 | "TARGET_SVE" | |
2785 | { | |
f8e1b9c1 | 2786 | operands[3] = aarch64_ptrue_reg (<VWIDE_PRED>mode); |
8fa7f434 | 2787 | operands[4] = gen_reg_rtx (<MODE>mode); |
2788 | operands[5] = gen_reg_rtx (<MODE>mode); | |
2789 | } | |
2790 | ) | |
2791 | ||
2792 | ;; Convert two vectors of DF to SI and pack the results into a single vector. | |
2793 | (define_expand "vec_pack_<su>fix_trunc_vnx2df" | |
2794 | [(set (match_dup 4) | |
2795 | (unspec:VNx4SI | |
2796 | [(match_dup 3) | |
2797 | (FIXUORS:VNx4SI (match_operand:VNx2DF 1 "register_operand"))] | |
2798 | UNSPEC_MERGE_PTRUE)) | |
2799 | (set (match_dup 5) | |
2800 | (unspec:VNx4SI | |
2801 | [(match_dup 3) | |
2802 | (FIXUORS:VNx4SI (match_operand:VNx2DF 2 "register_operand"))] | |
2803 | UNSPEC_MERGE_PTRUE)) | |
2804 | (set (match_operand:VNx4SI 0 "register_operand") | |
2805 | (unspec:VNx4SI [(match_dup 4) (match_dup 5)] UNSPEC_UZP1))] | |
2806 | "TARGET_SVE" | |
2807 | { | |
f8e1b9c1 | 2808 | operands[3] = aarch64_ptrue_reg (VNx2BImode); |
8fa7f434 | 2809 | operands[4] = gen_reg_rtx (VNx4SImode); |
2810 | operands[5] = gen_reg_rtx (VNx4SImode); | |
2811 | } | |
2812 | ) | |
633af029 | 2813 | |
47c52435 | 2814 | ;; Predicated floating-point operations with select. |
2815 | (define_expand "cond_<optab><mode>" | |
2816 | [(set (match_operand:SVE_F 0 "register_operand") | |
2817 | (unspec:SVE_F | |
2818 | [(match_operand:<VPRED> 1 "register_operand") | |
2819 | (unspec:SVE_F | |
83d1ca63 | 2820 | [(match_operand:SVE_F 2 "register_operand") |
47c52435 | 2821 | (match_operand:SVE_F 3 "register_operand")] |
2822 | SVE_COND_FP_BINARY) | |
83d1ca63 | 2823 | (match_operand:SVE_F 4 "aarch64_simd_reg_or_zero")] |
47c52435 | 2824 | UNSPEC_SEL))] |
2825 | "TARGET_SVE" | |
83d1ca63 | 2826 | ) |
47c52435 | 2827 | |
83d1ca63 | 2828 | ;; Predicated floating-point operations with select matching first operand. |
2829 | (define_insn "*cond_<optab><mode>_2" | |
2830 | [(set (match_operand:SVE_F 0 "register_operand" "=w, ?&w") | |
2831 | (unspec:SVE_F | |
2832 | [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl") | |
2833 | (unspec:SVE_F | |
2834 | [(match_operand:SVE_F 2 "register_operand" "0, w") | |
2835 | (match_operand:SVE_F 3 "register_operand" "w, w")] | |
2836 | SVE_COND_FP_BINARY) | |
2837 | (match_dup 2)] | |
2838 | UNSPEC_SEL))] | |
2839 | "TARGET_SVE" | |
2840 | "@ | |
2841 | <sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype> | |
2842 | movprfx\t%0, %2\;<sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>" | |
2843 | [(set_attr "movprfx" "*,yes")] | |
2844 | ) | |
2845 | ||
2846 | ;; Predicated floating-point operations with select matching second operand. | |
2847 | (define_insn "*cond_<optab><mode>_3" | |
2848 | [(set (match_operand:SVE_F 0 "register_operand" "=w, ?&w") | |
2849 | (unspec:SVE_F | |
2850 | [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl") | |
2851 | (unspec:SVE_F | |
2852 | [(match_operand:SVE_F 2 "register_operand" "w, w") | |
2853 | (match_operand:SVE_F 3 "register_operand" "0, w")] | |
2854 | SVE_COND_FP_BINARY) | |
2855 | (match_dup 3)] | |
2856 | UNSPEC_SEL))] | |
2857 | "TARGET_SVE" | |
2858 | "@ | |
2859 | <sve_fp_op_rev>\t%0.<Vetype>, %1/m, %0.<Vetype>, %2.<Vetype> | |
2860 | movprfx\t%0, %3\;<sve_fp_op_rev>\t%0.<Vetype>, %1/m, %0.<Vetype>, %2.<Vetype>" | |
2861 | [(set_attr "movprfx" "*,yes")] | |
2862 | ) | |
2863 | ||
05049c9b | 2864 | ;; Predicated floating-point binary operations in which the values of |
2865 | ;; inactive lanes are distinct from the other inputs. | |
e8c17f8e | 2866 | (define_insn_and_rewrite "*cond_<optab><mode>_any" |
05049c9b | 2867 | [(set (match_operand:SVE_F 0 "register_operand" "=&w, &w, &w, &w, ?&w") |
dbc7e6ae | 2868 | (unspec:SVE_F |
05049c9b | 2869 | [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl, Upl, Upl") |
dbc7e6ae | 2870 | (unspec:SVE_F |
05049c9b | 2871 | [(match_operand:SVE_F 2 "register_operand" "0, w, w, w, w") |
2872 | (match_operand:SVE_F 3 "register_operand" "w, 0, w, w, w")] | |
dbc7e6ae | 2873 | SVE_COND_FP_BINARY) |
05049c9b | 2874 | (match_operand:SVE_F 4 "aarch64_simd_reg_or_zero" "Dz, Dz, Dz, 0, w")] |
dbc7e6ae | 2875 | UNSPEC_SEL))] |
05049c9b | 2876 | "TARGET_SVE |
2877 | && !rtx_equal_p (operands[2], operands[4]) | |
2878 | && !rtx_equal_p (operands[3], operands[4])" | |
2879 | "@ | |
2880 | movprfx\t%0.<Vetype>, %1/z, %0.<Vetype>\;<sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype> | |
2881 | movprfx\t%0.<Vetype>, %1/z, %0.<Vetype>\;<sve_fp_op_rev>\t%0.<Vetype>, %1/m, %0.<Vetype>, %2.<Vetype> | |
2882 | movprfx\t%0.<Vetype>, %1/z, %2.<Vetype>\;<sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype> | |
2883 | movprfx\t%0.<Vetype>, %1/m, %2.<Vetype>\;<sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype> | |
2884 | #" | |
83d1ca63 | 2885 | "&& reload_completed |
05049c9b | 2886 | && register_operand (operands[4], <MODE>mode) |
2887 | && !rtx_equal_p (operands[0], operands[4])" | |
e8c17f8e | 2888 | { |
2889 | emit_insn (gen_vcond_mask_<mode><vpred> (operands[0], operands[2], | |
2890 | operands[4], operands[1])); | |
2891 | operands[4] = operands[2] = operands[0]; | |
2892 | } | |
05049c9b | 2893 | [(set_attr "movprfx" "yes")] |
dbc7e6ae | 2894 | ) |
2895 | ||
6682fc02 | 2896 | ;; Predicated floating-point ternary operations with select. |
2897 | (define_expand "cond_<optab><mode>" | |
2898 | [(set (match_operand:SVE_F 0 "register_operand") | |
2899 | (unspec:SVE_F | |
2900 | [(match_operand:<VPRED> 1 "register_operand") | |
2901 | (unspec:SVE_F | |
2902 | [(match_operand:SVE_F 2 "register_operand") | |
2903 | (match_operand:SVE_F 3 "register_operand") | |
2904 | (match_operand:SVE_F 4 "register_operand")] | |
2905 | SVE_COND_FP_TERNARY) | |
2906 | (match_operand:SVE_F 5 "aarch64_simd_reg_or_zero")] | |
2907 | UNSPEC_SEL))] | |
2908 | "TARGET_SVE" | |
2909 | { | |
2910 | /* Swap the multiplication operands if the fallback value is the | |
2911 | second of the two. */ | |
2912 | if (rtx_equal_p (operands[3], operands[5])) | |
2913 | std::swap (operands[2], operands[3]); | |
2914 | }) | |
2915 | ||
2916 | ;; Predicated floating-point ternary operations using the FMAD-like form. | |
2917 | (define_insn "*cond_<optab><mode>_2" | |
2918 | [(set (match_operand:SVE_F 0 "register_operand" "=w, ?&w") | |
2919 | (unspec:SVE_F | |
2920 | [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl") | |
2921 | (unspec:SVE_F | |
2922 | [(match_operand:SVE_F 2 "register_operand" "0, w") | |
2923 | (match_operand:SVE_F 3 "register_operand" "w, w") | |
2924 | (match_operand:SVE_F 4 "register_operand" "w, w")] | |
2925 | SVE_COND_FP_TERNARY) | |
2926 | (match_dup 2)] | |
2927 | UNSPEC_SEL))] | |
2928 | "TARGET_SVE" | |
2929 | "@ | |
2930 | <sve_fmad_op>\t%0.<Vetype>, %1/m, %3.<Vetype>, %4.<Vetype> | |
2931 | movprfx\t%0, %2\;<sve_fmad_op>\t%0.<Vetype>, %1/m, %3.<Vetype>, %4.<Vetype>" | |
2932 | [(set_attr "movprfx" "*,yes")] | |
2933 | ) | |
2934 | ||
2935 | ;; Predicated floating-point ternary operations using the FMLA-like form. | |
2936 | (define_insn "*cond_<optab><mode>_4" | |
2937 | [(set (match_operand:SVE_F 0 "register_operand" "=w, ?&w") | |
2938 | (unspec:SVE_F | |
2939 | [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl") | |
2940 | (unspec:SVE_F | |
2941 | [(match_operand:SVE_F 2 "register_operand" "w, w") | |
2942 | (match_operand:SVE_F 3 "register_operand" "w, w") | |
2943 | (match_operand:SVE_F 4 "register_operand" "0, w")] | |
2944 | SVE_COND_FP_TERNARY) | |
2945 | (match_dup 4)] | |
2946 | UNSPEC_SEL))] | |
2947 | "TARGET_SVE" | |
2948 | "@ | |
2949 | <sve_fmla_op>\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype> | |
0f958fe3 | 2950 | movprfx\t%0, %4\;<sve_fmla_op>\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>" |
6682fc02 | 2951 | [(set_attr "movprfx" "*,yes")] |
2952 | ) | |
2953 | ||
2954 | ;; Predicated floating-point ternary operations in which the value for | |
2955 | ;; inactive lanes is distinct from the other inputs. | |
e8c17f8e | 2956 | (define_insn_and_rewrite "*cond_<optab><mode>_any" |
6682fc02 | 2957 | [(set (match_operand:SVE_F 0 "register_operand" "=&w, &w, ?&w") |
2958 | (unspec:SVE_F | |
2959 | [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl") | |
2960 | (unspec:SVE_F | |
2961 | [(match_operand:SVE_F 2 "register_operand" "w, w, w") | |
2962 | (match_operand:SVE_F 3 "register_operand" "w, w, w") | |
2963 | (match_operand:SVE_F 4 "register_operand" "w, w, w")] | |
2964 | SVE_COND_FP_TERNARY) | |
2965 | (match_operand:SVE_F 5 "aarch64_simd_reg_or_zero" "Dz, 0, w")] | |
2966 | UNSPEC_SEL))] | |
2967 | "TARGET_SVE | |
2968 | && !rtx_equal_p (operands[2], operands[5]) | |
2969 | && !rtx_equal_p (operands[3], operands[5]) | |
2970 | && !rtx_equal_p (operands[4], operands[5])" | |
2971 | "@ | |
2972 | movprfx\t%0.<Vetype>, %1/z, %4.<Vetype>\;<sve_fmla_op>\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype> | |
2973 | movprfx\t%0.<Vetype>, %1/m, %4.<Vetype>\;<sve_fmla_op>\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype> | |
2974 | #" | |
2975 | "&& reload_completed | |
2976 | && !CONSTANT_P (operands[5]) | |
2977 | && !rtx_equal_p (operands[0], operands[5])" | |
e8c17f8e | 2978 | { |
2979 | emit_insn (gen_vcond_mask_<mode><vpred> (operands[0], operands[4], | |
2980 | operands[5], operands[1])); | |
2981 | operands[5] = operands[4] = operands[0]; | |
2982 | } | |
6682fc02 | 2983 | [(set_attr "movprfx" "yes")] |
2984 | ) | |
2985 | ||
633af029 | 2986 | ;; Shift an SVE vector left and insert a scalar into element 0. |
2987 | (define_insn "vec_shl_insert_<mode>" | |
2988 | [(set (match_operand:SVE_ALL 0 "register_operand" "=w, w") | |
2989 | (unspec:SVE_ALL | |
2990 | [(match_operand:SVE_ALL 1 "register_operand" "0, 0") | |
2991 | (match_operand:<VEL> 2 "register_operand" "rZ, w")] | |
2992 | UNSPEC_INSR))] | |
2993 | "TARGET_SVE" | |
2994 | "@ | |
2995 | insr\t%0.<Vetype>, %<vwcore>2 | |
2996 | insr\t%0.<Vetype>, %<Vetype>2" | |
2997 | ) | |
fd87572b | 2998 | |
2999 | (define_expand "copysign<mode>3" | |
3000 | [(match_operand:SVE_F 0 "register_operand") | |
3001 | (match_operand:SVE_F 1 "register_operand") | |
3002 | (match_operand:SVE_F 2 "register_operand")] | |
3003 | "TARGET_SVE" | |
3004 | { | |
3005 | rtx sign = gen_reg_rtx (<V_INT_EQUIV>mode); | |
3006 | rtx mant = gen_reg_rtx (<V_INT_EQUIV>mode); | |
3007 | rtx int_res = gen_reg_rtx (<V_INT_EQUIV>mode); | |
3008 | int bits = GET_MODE_UNIT_BITSIZE (<MODE>mode) - 1; | |
3009 | ||
3010 | rtx arg1 = lowpart_subreg (<V_INT_EQUIV>mode, operands[1], <MODE>mode); | |
3011 | rtx arg2 = lowpart_subreg (<V_INT_EQUIV>mode, operands[2], <MODE>mode); | |
3012 | ||
3013 | emit_insn (gen_and<v_int_equiv>3 | |
3014 | (sign, arg2, | |
3015 | aarch64_simd_gen_const_vector_dup (<V_INT_EQUIV>mode, | |
3016 | HOST_WIDE_INT_M1U | |
3017 | << bits))); | |
3018 | emit_insn (gen_and<v_int_equiv>3 | |
3019 | (mant, arg1, | |
3020 | aarch64_simd_gen_const_vector_dup (<V_INT_EQUIV>mode, | |
3021 | ~(HOST_WIDE_INT_M1U | |
3022 | << bits)))); | |
3023 | emit_insn (gen_ior<v_int_equiv>3 (int_res, sign, mant)); | |
3024 | emit_move_insn (operands[0], gen_lowpart (<MODE>mode, int_res)); | |
3025 | DONE; | |
3026 | } | |
3027 | ) | |
3028 | ||
3029 | (define_expand "xorsign<mode>3" | |
3030 | [(match_operand:SVE_F 0 "register_operand") | |
3031 | (match_operand:SVE_F 1 "register_operand") | |
3032 | (match_operand:SVE_F 2 "register_operand")] | |
3033 | "TARGET_SVE" | |
3034 | { | |
3035 | rtx sign = gen_reg_rtx (<V_INT_EQUIV>mode); | |
3036 | rtx int_res = gen_reg_rtx (<V_INT_EQUIV>mode); | |
3037 | int bits = GET_MODE_UNIT_BITSIZE (<MODE>mode) - 1; | |
3038 | ||
3039 | rtx arg1 = lowpart_subreg (<V_INT_EQUIV>mode, operands[1], <MODE>mode); | |
3040 | rtx arg2 = lowpart_subreg (<V_INT_EQUIV>mode, operands[2], <MODE>mode); | |
3041 | ||
3042 | emit_insn (gen_and<v_int_equiv>3 | |
3043 | (sign, arg2, | |
3044 | aarch64_simd_gen_const_vector_dup (<V_INT_EQUIV>mode, | |
3045 | HOST_WIDE_INT_M1U | |
3046 | << bits))); | |
3047 | emit_insn (gen_xor<v_int_equiv>3 (int_res, arg1, sign)); | |
3048 | emit_move_insn (operands[0], gen_lowpart (<MODE>mode, int_res)); | |
3049 | DONE; | |
3050 | } | |
3051 | ) | |
d3a7159f | 3052 | |
3053 | ;; Unpredicated DOT product. | |
3054 | (define_insn "<sur>dot_prod<vsi2qi>" | |
3055 | [(set (match_operand:SVE_SDI 0 "register_operand" "=w, ?&w") | |
3056 | (plus:SVE_SDI | |
3057 | (unspec:SVE_SDI | |
3058 | [(match_operand:<VSI2QI> 1 "register_operand" "w, w") | |
3059 | (match_operand:<VSI2QI> 2 "register_operand" "w, w")] | |
3060 | DOTPROD) | |
3061 | (match_operand:SVE_SDI 3 "register_operand" "0, w")))] | |
3062 | "TARGET_SVE" | |
3063 | "@ | |
3064 | <sur>dot\\t%0.<Vetype>, %1.<Vetype_fourth>, %2.<Vetype_fourth> | |
3065 | movprfx\t%0, %3\;<sur>dot\\t%0.<Vetype>, %1.<Vetype_fourth>, %2.<Vetype_fourth>" | |
3066 | [(set_attr "movprfx" "*,yes")] | |
3067 | ) | |
2cbc1ad8 | 3068 | |
c08a1447 | 3069 | ;; Unpredicated integer absolute difference. |
2cbc1ad8 | 3070 | (define_expand "<su>abd<mode>_3" |
3071 | [(use (match_operand:SVE_I 0 "register_operand")) | |
3072 | (USMAX:SVE_I (match_operand:SVE_I 1 "register_operand") | |
3073 | (match_operand:SVE_I 2 "register_operand"))] | |
3074 | "TARGET_SVE" | |
3075 | { | |
f8e1b9c1 | 3076 | rtx pred = aarch64_ptrue_reg (<VPRED>mode); |
2cbc1ad8 | 3077 | emit_insn (gen_aarch64_<su>abd<mode>_3 (operands[0], pred, operands[1], |
c08a1447 | 3078 | operands[2])); |
2cbc1ad8 | 3079 | DONE; |
3080 | } | |
3081 | ) | |
3082 | ||
3083 | ;; Predicated integer absolute difference. | |
3084 | (define_insn "aarch64_<su>abd<mode>_3" | |
3085 | [(set (match_operand:SVE_I 0 "register_operand" "=w, ?&w") | |
3086 | (unspec:SVE_I | |
3087 | [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl") | |
3088 | (minus:SVE_I | |
3089 | (USMAX:SVE_I | |
3090 | (match_operand:SVE_I 2 "register_operand" "0, w") | |
3091 | (match_operand:SVE_I 3 "register_operand" "w, w")) | |
c08a1447 | 3092 | (<max_opp>:SVE_I |
3093 | (match_dup 2) | |
3094 | (match_dup 3)))] | |
2cbc1ad8 | 3095 | UNSPEC_MERGE_PTRUE))] |
3096 | "TARGET_SVE" | |
3097 | "@ | |
3098 | <su>abd\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype> | |
3099 | movprfx\t%0, %2\;<su>abd\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>" | |
3100 | [(set_attr "movprfx" "*,yes")] | |
3101 | ) | |
3102 | ||
3103 | ;; Emit a sequence to produce a sum-of-absolute-differences of the inputs in | |
3104 | ;; operands 1 and 2. The sequence also has to perform a widening reduction of | |
3105 | ;; the difference into a vector and accumulate that into operand 3 before | |
3106 | ;; copying that into the result operand 0. | |
3107 | ;; Perform that with a sequence of: | |
3108 | ;; MOV ones.b, #1 | |
3109 | ;; [SU]ABD diff.b, p0/m, op1.b, op2.b | |
3110 | ;; MOVPRFX op0, op3 // If necessary | |
3111 | ;; UDOT op0.s, diff.b, ones.b | |
3112 | ||
3113 | (define_expand "<sur>sad<vsi2qi>" | |
3114 | [(use (match_operand:SVE_SDI 0 "register_operand")) | |
3115 | (unspec:<VSI2QI> [(use (match_operand:<VSI2QI> 1 "register_operand")) | |
3116 | (use (match_operand:<VSI2QI> 2 "register_operand"))] ABAL) | |
3117 | (use (match_operand:SVE_SDI 3 "register_operand"))] | |
3118 | "TARGET_SVE" | |
3119 | { | |
3120 | rtx ones = force_reg (<VSI2QI>mode, CONST1_RTX (<VSI2QI>mode)); | |
3121 | rtx diff = gen_reg_rtx (<VSI2QI>mode); | |
3122 | emit_insn (gen_<sur>abd<vsi2qi>_3 (diff, operands[1], operands[2])); | |
3123 | emit_insn (gen_udot_prod<vsi2qi> (operands[0], diff, ones, operands[3])); | |
3124 | DONE; | |
3125 | } | |
3126 | ) | |
08e92dcc | 3127 | |
3128 | ;; Standard pattern name vec_init<mode><Vel>. | |
3129 | (define_expand "vec_init<mode><Vel>" | |
3130 | [(match_operand:SVE_ALL 0 "register_operand" "") | |
3131 | (match_operand 1 "" "")] | |
3132 | "TARGET_SVE" | |
3133 | { | |
3134 | aarch64_sve_expand_vector_init (operands[0], operands[1]); | |
3135 | DONE; | |
3136 | } | |
3137 | ) |