]>
Commit | Line | Data |
---|---|---|
43cacb12 | 1 | ;; Machine description for AArch64 SVE. |
a5544970 | 2 | ;; Copyright (C) 2009-2019 Free Software Foundation, Inc. |
43cacb12 RS |
3 | ;; Contributed by ARM Ltd. |
4 | ;; | |
5 | ;; This file is part of GCC. | |
6 | ;; | |
7 | ;; GCC is free software; you can redistribute it and/or modify it | |
8 | ;; under the terms of the GNU General Public License as published by | |
9 | ;; the Free Software Foundation; either version 3, or (at your option) | |
10 | ;; any later version. | |
11 | ;; | |
12 | ;; GCC is distributed in the hope that it will be useful, but | |
13 | ;; WITHOUT ANY WARRANTY; without even the implied warranty of | |
14 | ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
15 | ;; General Public License for more details. | |
16 | ;; | |
17 | ;; You should have received a copy of the GNU General Public License | |
18 | ;; along with GCC; see the file COPYING3. If not see | |
19 | ;; <http://www.gnu.org/licenses/>. | |
20 | ||
915d28fe RS |
21 | ;; The file is organised into the following sections (search for the full |
22 | ;; line): | |
23 | ;; | |
24 | ;; == General notes | |
25 | ;; ---- Note on the handling of big-endian SVE | |
34467289 | 26 | ;; ---- Description of UNSPEC_PTEST |
915d28fe RS |
27 | ;; |
28 | ;; == Moves | |
29 | ;; ---- Moves of single vectors | |
30 | ;; ---- Moves of multiple vectors | |
31 | ;; ---- Moves of predicates | |
32 | ;; | |
33 | ;; == Loads | |
34 | ;; ---- Normal contiguous loads | |
35 | ;; ---- Normal gather loads | |
36 | ;; | |
37 | ;; == Stores | |
38 | ;; ---- Normal contiguous stores | |
39 | ;; ---- Normal scatter stores | |
40 | ;; | |
41 | ;; == Vector creation | |
42 | ;; ---- [INT,FP] Duplicate element | |
43 | ;; ---- [INT,FP] Initialize from individual elements | |
44 | ;; ---- [INT] Linear series | |
45 | ;; ---- [PRED] Duplicate element | |
46 | ;; | |
47 | ;; == Vector decomposition | |
48 | ;; ---- [INT,FP] Extract index | |
49 | ;; ---- [INT,FP] Extract active element | |
50 | ;; ---- [PRED] Extract index | |
51 | ;; | |
52 | ;; == Unary arithmetic | |
53 | ;; ---- [INT] General unary arithmetic corresponding to rtx codes | |
d45b20a5 | 54 | ;; ---- [FP] General unary arithmetic corresponding to unspecs |
915d28fe RS |
55 | ;; ---- [PRED] Inverse |
56 | ||
57 | ;; == Binary arithmetic | |
58 | ;; ---- [INT] General binary arithmetic corresponding to rtx codes | |
59 | ;; ---- [INT] Addition | |
60 | ;; ---- [INT] Subtraction | |
61 | ;; ---- [INT] Absolute difference | |
62 | ;; ---- [INT] Multiplication | |
63 | ;; ---- [INT] Highpart multiplication | |
64 | ;; ---- [INT] Division | |
65 | ;; ---- [INT] Binary logical operations | |
66 | ;; ---- [INT] Binary logical operations (inverted second input) | |
67 | ;; ---- [INT] Shifts | |
68 | ;; ---- [INT] Maximum and minimum | |
69 | ;; ---- [FP] General binary arithmetic corresponding to rtx codes | |
70 | ;; ---- [FP] General binary arithmetic corresponding to unspecs | |
71 | ;; ---- [FP] Addition | |
72 | ;; ---- [FP] Subtraction | |
73 | ;; ---- [FP] Absolute difference | |
74 | ;; ---- [FP] Multiplication | |
75 | ;; ---- [FP] Division | |
76 | ;; ---- [FP] Binary logical operations | |
77 | ;; ---- [FP] Sign copying | |
78 | ;; ---- [FP] Maximum and minimum | |
79 | ;; ---- [PRED] Binary logical operations | |
80 | ;; ---- [PRED] Binary logical operations (inverted second input) | |
81 | ;; ---- [PRED] Binary logical operations (inverted result) | |
82 | ;; | |
83 | ;; == Ternary arithmetic | |
84 | ;; ---- [INT] MLA and MAD | |
85 | ;; ---- [INT] MLS and MSB | |
86 | ;; ---- [INT] Dot product | |
87 | ;; ---- [INT] Sum of absolute differences | |
88 | ;; ---- [FP] General ternary arithmetic corresponding to unspecs | |
915d28fe RS |
89 | ;; |
90 | ;; == Comparisons and selects | |
91 | ;; ---- [INT,FP] Select based on predicates | |
92 | ;; ---- [INT,FP] Compare and select | |
93 | ;; ---- [INT] Comparisons | |
94 | ;; ---- [INT] While tests | |
95 | ;; ---- [FP] Comparisons | |
96 | ;; ---- [PRED] Test bits | |
97 | ;; | |
98 | ;; == Reductions | |
99 | ;; ---- [INT,FP] Conditional reductions | |
100 | ;; ---- [INT] Tree reductions | |
101 | ;; ---- [FP] Tree reductions | |
102 | ;; ---- [FP] Left-to-right reductions | |
103 | ;; | |
104 | ;; == Permutes | |
105 | ;; ---- [INT,FP] General permutes | |
106 | ;; ---- [INT,FP] Special-purpose unary permutes | |
107 | ;; ---- [INT,FP] Special-purpose binary permutes | |
108 | ;; ---- [PRED] Special-purpose binary permutes | |
109 | ;; | |
110 | ;; == Conversions | |
111 | ;; ---- [INT<-INT] Packs | |
112 | ;; ---- [INT<-INT] Unpacks | |
113 | ;; ---- [INT<-FP] Conversions | |
114 | ;; ---- [INT<-FP] Packs | |
115 | ;; ---- [INT<-FP] Unpacks | |
116 | ;; ---- [FP<-INT] Conversions | |
117 | ;; ---- [FP<-INT] Packs | |
118 | ;; ---- [FP<-INT] Unpacks | |
119 | ;; ---- [FP<-FP] Packs | |
120 | ;; ---- [FP<-FP] Unpacks | |
121 | ;; ---- [PRED<-PRED] Packs | |
122 | ;; ---- [PRED<-PRED] Unpacks | |
123 | ||
124 | ;; ========================================================================= | |
125 | ;; == General notes | |
126 | ;; ========================================================================= | |
127 | ;; | |
128 | ;; ------------------------------------------------------------------------- | |
129 | ;; ---- Note on the handling of big-endian SVE | |
130 | ;; ------------------------------------------------------------------------- | |
43cacb12 RS |
131 | ;; |
132 | ;; On big-endian systems, Advanced SIMD mov<mode> patterns act in the | |
133 | ;; same way as movdi or movti would: the first byte of memory goes | |
134 | ;; into the most significant byte of the register and the last byte | |
135 | ;; of memory goes into the least significant byte of the register. | |
136 | ;; This is the most natural ordering for Advanced SIMD and matches | |
137 | ;; the ABI layout for 64-bit and 128-bit vector types. | |
138 | ;; | |
139 | ;; As a result, the order of bytes within the register is what GCC | |
140 | ;; expects for a big-endian target, and subreg offsets therefore work | |
141 | ;; as expected, with the first element in memory having subreg offset 0 | |
142 | ;; and the last element in memory having the subreg offset associated | |
143 | ;; with a big-endian lowpart. However, this ordering also means that | |
144 | ;; GCC's lane numbering does not match the architecture's numbering: | |
145 | ;; GCC always treats the element at the lowest address in memory | |
146 | ;; (subreg offset 0) as element 0, while the architecture treats | |
147 | ;; the least significant end of the register as element 0. | |
148 | ;; | |
149 | ;; The situation for SVE is different. We want the layout of the | |
150 | ;; SVE register to be same for mov<mode> as it is for maskload<mode>: | |
151 | ;; logically, a mov<mode> load must be indistinguishable from a | |
152 | ;; maskload<mode> whose mask is all true. We therefore need the | |
153 | ;; register layout to match LD1 rather than LDR. The ABI layout of | |
154 | ;; SVE types also matches LD1 byte ordering rather than LDR byte ordering. | |
155 | ;; | |
156 | ;; As a result, the architecture lane numbering matches GCC's lane | |
157 | ;; numbering, with element 0 always being the first in memory. | |
158 | ;; However: | |
159 | ;; | |
160 | ;; - Applying a subreg offset to a register does not give the element | |
161 | ;; that GCC expects: the first element in memory has the subreg offset | |
162 | ;; associated with a big-endian lowpart while the last element in memory | |
163 | ;; has subreg offset 0. We handle this via TARGET_CAN_CHANGE_MODE_CLASS. | |
164 | ;; | |
165 | ;; - We cannot use LDR and STR for spill slots that might be accessed | |
166 | ;; via subregs, since although the elements have the order GCC expects, | |
167 | ;; the order of the bytes within the elements is different. We instead | |
168 | ;; access spill slots via LD1 and ST1, using secondary reloads to | |
169 | ;; reserve a predicate register. | |
34467289 RS |
170 | ;; |
171 | ;; ------------------------------------------------------------------------- | |
172 | ;; ---- Description of UNSPEC_PTEST | |
173 | ;; ------------------------------------------------------------------------- | |
174 | ;; | |
175 | ;; SVE provides a PTEST instruction for testing the active lanes of a | |
176 | ;; predicate and setting the flags based on the result. The associated | |
177 | ;; condition code tests are: | |
178 | ;; | |
179 | ;; - any (= ne): at least one active bit is set | |
180 | ;; - none (= eq): all active bits are clear (*) | |
181 | ;; - first (= mi): the first active bit is set | |
182 | ;; - nfrst (= pl): the first active bit is clear (*) | |
183 | ;; - last (= cc): the last active bit is set | |
184 | ;; - nlast (= cs): the last active bit is clear (*) | |
185 | ;; | |
186 | ;; where the conditions marked (*) are also true when there are no active | |
187 | ;; lanes (i.e. when the governing predicate is a PFALSE). The flags results | |
188 | ;; of a PTEST use the condition code mode CC_NZC. | |
189 | ;; | |
190 | ;; PTEST is always a .B operation (i.e. it always operates on VNx16BI). | |
191 | ;; This means that for other predicate modes, we need a governing predicate | |
192 | ;; in which all bits are defined. | |
193 | ;; | |
194 | ;; For example, most predicated .H operations ignore the odd bits of the | |
195 | ;; governing predicate, so that an active lane is represented by the | |
196 | ;; bits "1x" and an inactive lane by the bits "0x", where "x" can be | |
197 | ;; any value. To test a .H predicate, we instead need "10" and "00" | |
198 | ;; respectively, so that the condition only tests the even bits of the | |
199 | ;; predicate. | |
200 | ;; | |
201 | ;; Several instructions set the flags as a side-effect, in the same way | |
202 | ;; that a separate PTEST would. It's important for code quality that we | |
203 | ;; use these flags results as often as possible, particularly in the case | |
204 | ;; of WHILE* and RDFFR. | |
205 | ;; | |
206 | ;; Also, some of the instructions that set the flags are unpredicated | |
207 | ;; and instead implicitly test all .B, .H, .S or .D elements, as though | |
208 | ;; they were predicated on a PTRUE of that size. For example, a .S | |
209 | ;; WHILELO sets the flags in the same way as a PTEST with a .S PTRUE | |
210 | ;; would. | |
211 | ;; | |
212 | ;; We therefore need to represent PTEST operations in a way that | |
213 | ;; makes it easy to combine them with both predicated and unpredicated | |
214 | ;; operations, while using a VNx16BI governing predicate for all | |
215 | ;; predicate modes. We do this using: | |
216 | ;; | |
217 | ;; (unspec:CC_NZC [gp cast_gp ptrue_flag op] UNSPEC_PTEST) | |
218 | ;; | |
219 | ;; where: | |
220 | ;; | |
221 | ;; - GP is the real VNx16BI governing predicate | |
222 | ;; | |
223 | ;; - CAST_GP is GP cast to the mode of OP. All bits dropped by casting | |
224 | ;; GP to CAST_GP are guaranteed to be clear in GP. | |
225 | ;; | |
226 | ;; - PTRUE_FLAG is a CONST_INT (conceptually of mode SI) that has the value | |
227 | ;; SVE_KNOWN_PTRUE if we know that CAST_GP (rather than GP) is all-true and | |
228 | ;; SVE_MAYBE_NOT_PTRUE otherwise. | |
229 | ;; | |
230 | ;; - OP is the predicate we want to test, of the same mode as CAST_GP. | |
43cacb12 | 231 | |
915d28fe RS |
232 | ;; ========================================================================= |
233 | ;; == Moves | |
234 | ;; ========================================================================= | |
235 | ||
236 | ;; ------------------------------------------------------------------------- | |
237 | ;; ---- Moves of single vectors | |
238 | ;; ------------------------------------------------------------------------- | |
239 | ;; Includes: | |
240 | ;; - MOV (including aliases) | |
241 | ;; - LD1B (contiguous form) | |
242 | ;; - LD1D ( " " ) | |
243 | ;; - LD1H ( " " ) | |
244 | ;; - LD1W ( " " ) | |
245 | ;; - LDR | |
246 | ;; - ST1B (contiguous form) | |
247 | ;; - ST1D ( " " ) | |
248 | ;; - ST1H ( " " ) | |
249 | ;; - ST1W ( " " ) | |
250 | ;; - STR | |
251 | ;; ------------------------------------------------------------------------- | |
252 | ||
43cacb12 RS |
253 | (define_expand "mov<mode>" |
254 | [(set (match_operand:SVE_ALL 0 "nonimmediate_operand") | |
255 | (match_operand:SVE_ALL 1 "general_operand"))] | |
256 | "TARGET_SVE" | |
257 | { | |
258 | /* Use the predicated load and store patterns where possible. | |
259 | This is required for big-endian targets (see the comment at the | |
260 | head of the file) and increases the addressing choices for | |
261 | little-endian. */ | |
262 | if ((MEM_P (operands[0]) || MEM_P (operands[1])) | |
ea403d8b | 263 | && can_create_pseudo_p ()) |
43cacb12 RS |
264 | { |
265 | aarch64_expand_sve_mem_move (operands[0], operands[1], <VPRED>mode); | |
266 | DONE; | |
267 | } | |
268 | ||
269 | if (CONSTANT_P (operands[1])) | |
270 | { | |
4aeb1ba7 | 271 | aarch64_expand_mov_immediate (operands[0], operands[1]); |
43cacb12 RS |
272 | DONE; |
273 | } | |
002092be RS |
274 | |
275 | /* Optimize subregs on big-endian targets: we can use REV[BHW] | |
276 | instead of going through memory. */ | |
277 | if (BYTES_BIG_ENDIAN | |
ea403d8b | 278 | && aarch64_maybe_expand_sve_subreg_move (operands[0], operands[1])) |
002092be RS |
279 | DONE; |
280 | } | |
281 | ) | |
282 | ||
915d28fe RS |
283 | (define_expand "movmisalign<mode>" |
284 | [(set (match_operand:SVE_ALL 0 "nonimmediate_operand") | |
285 | (match_operand:SVE_ALL 1 "general_operand"))] | |
286 | "TARGET_SVE" | |
002092be | 287 | { |
915d28fe RS |
288 | /* Equivalent to a normal move for our purpooses. */ |
289 | emit_move_insn (operands[0], operands[1]); | |
002092be | 290 | DONE; |
43cacb12 RS |
291 | } |
292 | ) | |
293 | ||
294 | ;; Unpredicated moves (little-endian). Only allow memory operations | |
295 | ;; during and after RA; before RA we want the predicated load and | |
296 | ;; store patterns to be used instead. | |
297 | (define_insn "*aarch64_sve_mov<mode>_le" | |
298 | [(set (match_operand:SVE_ALL 0 "aarch64_sve_nonimmediate_operand" "=w, Utr, w, w") | |
299 | (match_operand:SVE_ALL 1 "aarch64_sve_general_operand" "Utr, w, w, Dn"))] | |
300 | "TARGET_SVE | |
301 | && !BYTES_BIG_ENDIAN | |
302 | && ((lra_in_progress || reload_completed) | |
303 | || (register_operand (operands[0], <MODE>mode) | |
304 | && nonmemory_operand (operands[1], <MODE>mode)))" | |
305 | "@ | |
306 | ldr\t%0, %1 | |
307 | str\t%1, %0 | |
308 | mov\t%0.d, %1.d | |
309 | * return aarch64_output_sve_mov_immediate (operands[1]);" | |
310 | ) | |
311 | ||
312 | ;; Unpredicated moves (big-endian). Memory accesses require secondary | |
313 | ;; reloads. | |
314 | (define_insn "*aarch64_sve_mov<mode>_be" | |
315 | [(set (match_operand:SVE_ALL 0 "register_operand" "=w, w") | |
316 | (match_operand:SVE_ALL 1 "aarch64_nonmemory_operand" "w, Dn"))] | |
317 | "TARGET_SVE && BYTES_BIG_ENDIAN" | |
318 | "@ | |
319 | mov\t%0.d, %1.d | |
320 | * return aarch64_output_sve_mov_immediate (operands[1]);" | |
321 | ) | |
322 | ||
323 | ;; Handle big-endian memory reloads. We use byte PTRUE for all modes | |
324 | ;; to try to encourage reuse. | |
1bbffb87 | 325 | ;; This pattern needs constraints due to TARGET_SECONDARY_RELOAD hook. |
43cacb12 RS |
326 | (define_expand "aarch64_sve_reload_be" |
327 | [(parallel | |
328 | [(set (match_operand 0) | |
ea403d8b | 329 | (match_operand 1)) |
43cacb12 RS |
330 | (clobber (match_operand:VNx16BI 2 "register_operand" "=Upl"))])] |
331 | "TARGET_SVE && BYTES_BIG_ENDIAN" | |
332 | { | |
333 | /* Create a PTRUE. */ | |
334 | emit_move_insn (operands[2], CONSTM1_RTX (VNx16BImode)); | |
335 | ||
336 | /* Refer to the PTRUE in the appropriate mode for this move. */ | |
337 | machine_mode mode = GET_MODE (operands[0]); | |
338 | machine_mode pred_mode | |
339 | = aarch64_sve_pred_mode (GET_MODE_UNIT_SIZE (mode)).require (); | |
340 | rtx pred = gen_lowpart (pred_mode, operands[2]); | |
341 | ||
342 | /* Emit a predicated load or store. */ | |
343 | aarch64_emit_sve_pred_move (operands[0], pred, operands[1]); | |
344 | DONE; | |
345 | } | |
346 | ) | |
347 | ||
915d28fe RS |
348 | ;; A predicated move in which the predicate is known to be all-true. |
349 | ;; Note that this pattern is generated directly by aarch64_emit_sve_pred_move, | |
350 | ;; so changes to this pattern will need changes there as well. | |
0c63a8ee | 351 | (define_insn_and_split "@aarch64_pred_mov<mode>" |
9c6b4601 | 352 | [(set (match_operand:SVE_ALL 0 "nonimmediate_operand" "=w, w, m") |
43cacb12 | 353 | (unspec:SVE_ALL |
9c6b4601 RS |
354 | [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl") |
355 | (match_operand:SVE_ALL 2 "nonimmediate_operand" "w, m, w")] | |
43cacb12 RS |
356 | UNSPEC_MERGE_PTRUE))] |
357 | "TARGET_SVE | |
358 | && (register_operand (operands[0], <MODE>mode) | |
359 | || register_operand (operands[2], <MODE>mode))" | |
360 | "@ | |
9c6b4601 | 361 | # |
43cacb12 RS |
362 | ld1<Vesize>\t%0.<Vetype>, %1/z, %2 |
363 | st1<Vesize>\t%2.<Vetype>, %1, %0" | |
9c6b4601 RS |
364 | "&& register_operand (operands[0], <MODE>mode) |
365 | && register_operand (operands[2], <MODE>mode)" | |
366 | [(set (match_dup 0) (match_dup 2))] | |
43cacb12 RS |
367 | ) |
368 | ||
915d28fe RS |
369 | ;; A pattern for optimizing SUBREGs that have a reinterpreting effect |
370 | ;; on big-endian targets; see aarch64_maybe_expand_sve_subreg_move | |
371 | ;; for details. We use a special predicate for operand 2 to reduce | |
372 | ;; the number of patterns. | |
373 | (define_insn_and_split "*aarch64_sve_mov<mode>_subreg_be" | |
374 | [(set (match_operand:SVE_ALL 0 "aarch64_sve_nonimmediate_operand" "=w") | |
43cacb12 | 375 | (unspec:SVE_ALL |
915d28fe RS |
376 | [(match_operand:VNx16BI 1 "register_operand" "Upl") |
377 | (match_operand 2 "aarch64_any_register_operand" "w")] | |
378 | UNSPEC_REV_SUBREG))] | |
379 | "TARGET_SVE && BYTES_BIG_ENDIAN" | |
380 | "#" | |
381 | "&& reload_completed" | |
382 | [(const_int 0)] | |
f307441a | 383 | { |
915d28fe RS |
384 | aarch64_split_sve_subreg_move (operands[0], operands[1], operands[2]); |
385 | DONE; | |
f307441a RS |
386 | } |
387 | ) | |
388 | ||
4aeb1ba7 RS |
389 | ;; Reinterpret operand 1 in operand 0's mode, without changing its contents. |
390 | ;; This is equivalent to a subreg on little-endian targets but not for | |
391 | ;; big-endian; see the comment at the head of the file for details. | |
392 | (define_expand "@aarch64_sve_reinterpret<mode>" | |
393 | [(set (match_operand:SVE_ALL 0 "register_operand") | |
394 | (unspec:SVE_ALL [(match_operand 1 "aarch64_any_register_operand")] | |
395 | UNSPEC_REINTERPRET))] | |
396 | "TARGET_SVE" | |
397 | { | |
398 | if (!BYTES_BIG_ENDIAN) | |
399 | { | |
400 | emit_move_insn (operands[0], gen_lowpart (<MODE>mode, operands[1])); | |
401 | DONE; | |
402 | } | |
403 | } | |
404 | ) | |
405 | ||
406 | ;; A pattern for handling type punning on big-endian targets. We use a | |
407 | ;; special predicate for operand 1 to reduce the number of patterns. | |
408 | (define_insn_and_split "*aarch64_sve_reinterpret<mode>" | |
409 | [(set (match_operand:SVE_ALL 0 "register_operand" "=w") | |
410 | (unspec:SVE_ALL [(match_operand 1 "aarch64_any_register_operand" "0")] | |
411 | UNSPEC_REINTERPRET))] | |
412 | "TARGET_SVE" | |
413 | "#" | |
414 | "&& reload_completed" | |
415 | [(set (match_dup 0) (match_dup 1))] | |
416 | { | |
417 | emit_note (NOTE_INSN_DELETED); | |
418 | DONE; | |
419 | } | |
420 | ) | |
421 | ||
915d28fe RS |
422 | ;; ------------------------------------------------------------------------- |
423 | ;; ---- Moves of multiple vectors | |
424 | ;; ------------------------------------------------------------------------- | |
425 | ;; All patterns in this section are synthetic and split to real | |
426 | ;; instructions after reload. | |
427 | ;; ------------------------------------------------------------------------- | |
f307441a | 428 | |
9f4cbab8 RS |
429 | (define_expand "mov<mode>" |
430 | [(set (match_operand:SVE_STRUCT 0 "nonimmediate_operand") | |
431 | (match_operand:SVE_STRUCT 1 "general_operand"))] | |
432 | "TARGET_SVE" | |
433 | { | |
434 | /* Big-endian loads and stores need to be done via LD1 and ST1; | |
435 | see the comment at the head of the file for details. */ | |
436 | if ((MEM_P (operands[0]) || MEM_P (operands[1])) | |
437 | && BYTES_BIG_ENDIAN) | |
438 | { | |
439 | gcc_assert (can_create_pseudo_p ()); | |
440 | aarch64_expand_sve_mem_move (operands[0], operands[1], <VPRED>mode); | |
441 | DONE; | |
442 | } | |
443 | ||
444 | if (CONSTANT_P (operands[1])) | |
445 | { | |
446 | aarch64_expand_mov_immediate (operands[0], operands[1]); | |
447 | DONE; | |
448 | } | |
449 | } | |
450 | ) | |
451 | ||
452 | ;; Unpredicated structure moves (little-endian). | |
453 | (define_insn "*aarch64_sve_mov<mode>_le" | |
454 | [(set (match_operand:SVE_STRUCT 0 "aarch64_sve_nonimmediate_operand" "=w, Utr, w, w") | |
455 | (match_operand:SVE_STRUCT 1 "aarch64_sve_general_operand" "Utr, w, w, Dn"))] | |
456 | "TARGET_SVE && !BYTES_BIG_ENDIAN" | |
457 | "#" | |
458 | [(set_attr "length" "<insn_length>")] | |
459 | ) | |
460 | ||
461 | ;; Unpredicated structure moves (big-endian). Memory accesses require | |
462 | ;; secondary reloads. | |
915d28fe | 463 | (define_insn "*aarch64_sve_mov<mode>_be" |
9f4cbab8 RS |
464 | [(set (match_operand:SVE_STRUCT 0 "register_operand" "=w, w") |
465 | (match_operand:SVE_STRUCT 1 "aarch64_nonmemory_operand" "w, Dn"))] | |
466 | "TARGET_SVE && BYTES_BIG_ENDIAN" | |
467 | "#" | |
468 | [(set_attr "length" "<insn_length>")] | |
469 | ) | |
470 | ||
471 | ;; Split unpredicated structure moves into pieces. This is the same | |
472 | ;; for both big-endian and little-endian code, although it only needs | |
473 | ;; to handle memory operands for little-endian code. | |
474 | (define_split | |
475 | [(set (match_operand:SVE_STRUCT 0 "aarch64_sve_nonimmediate_operand") | |
476 | (match_operand:SVE_STRUCT 1 "aarch64_sve_general_operand"))] | |
477 | "TARGET_SVE && reload_completed" | |
478 | [(const_int 0)] | |
479 | { | |
480 | rtx dest = operands[0]; | |
481 | rtx src = operands[1]; | |
482 | if (REG_P (dest) && REG_P (src)) | |
483 | aarch64_simd_emit_reg_reg_move (operands, <VSINGLE>mode, <vector_count>); | |
484 | else | |
485 | for (unsigned int i = 0; i < <vector_count>; ++i) | |
486 | { | |
487 | rtx subdest = simplify_gen_subreg (<VSINGLE>mode, dest, <MODE>mode, | |
488 | i * BYTES_PER_SVE_VECTOR); | |
489 | rtx subsrc = simplify_gen_subreg (<VSINGLE>mode, src, <MODE>mode, | |
490 | i * BYTES_PER_SVE_VECTOR); | |
491 | emit_insn (gen_rtx_SET (subdest, subsrc)); | |
492 | } | |
493 | DONE; | |
494 | } | |
495 | ) | |
496 | ||
497 | ;; Predicated structure moves. This works for both endiannesses but in | |
498 | ;; practice is only useful for big-endian. | |
0c63a8ee | 499 | (define_insn_and_split "@aarch64_pred_mov<mode>" |
9c6b4601 | 500 | [(set (match_operand:SVE_STRUCT 0 "aarch64_sve_struct_nonimmediate_operand" "=w, w, Utx") |
9f4cbab8 | 501 | (unspec:SVE_STRUCT |
9c6b4601 RS |
502 | [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl") |
503 | (match_operand:SVE_STRUCT 2 "aarch64_sve_struct_nonimmediate_operand" "w, Utx, w")] | |
9f4cbab8 RS |
504 | UNSPEC_MERGE_PTRUE))] |
505 | "TARGET_SVE | |
506 | && (register_operand (operands[0], <MODE>mode) | |
507 | || register_operand (operands[2], <MODE>mode))" | |
508 | "#" | |
509 | "&& reload_completed" | |
510 | [(const_int 0)] | |
511 | { | |
512 | for (unsigned int i = 0; i < <vector_count>; ++i) | |
513 | { | |
514 | rtx subdest = simplify_gen_subreg (<VSINGLE>mode, operands[0], | |
515 | <MODE>mode, | |
516 | i * BYTES_PER_SVE_VECTOR); | |
517 | rtx subsrc = simplify_gen_subreg (<VSINGLE>mode, operands[2], | |
518 | <MODE>mode, | |
519 | i * BYTES_PER_SVE_VECTOR); | |
520 | aarch64_emit_sve_pred_move (subdest, operands[1], subsrc); | |
521 | } | |
522 | DONE; | |
523 | } | |
524 | [(set_attr "length" "<insn_length>")] | |
525 | ) | |
526 | ||
915d28fe RS |
527 | ;; ------------------------------------------------------------------------- |
528 | ;; ---- Moves of predicates | |
529 | ;; ------------------------------------------------------------------------- | |
530 | ;; Includes: | |
531 | ;; - MOV | |
532 | ;; - LDR | |
533 | ;; - PFALSE | |
534 | ;; - PTRUE | |
535 | ;; - STR | |
536 | ;; ------------------------------------------------------------------------- | |
537 | ||
43cacb12 RS |
538 | (define_expand "mov<mode>" |
539 | [(set (match_operand:PRED_ALL 0 "nonimmediate_operand") | |
540 | (match_operand:PRED_ALL 1 "general_operand"))] | |
541 | "TARGET_SVE" | |
542 | { | |
543 | if (GET_CODE (operands[0]) == MEM) | |
544 | operands[1] = force_reg (<MODE>mode, operands[1]); | |
0b1fe8cf RS |
545 | |
546 | if (CONSTANT_P (operands[1])) | |
547 | { | |
548 | aarch64_expand_mov_immediate (operands[0], operands[1]); | |
549 | DONE; | |
550 | } | |
43cacb12 RS |
551 | } |
552 | ) | |
553 | ||
554 | (define_insn "*aarch64_sve_mov<mode>" | |
1044fa32 | 555 | [(set (match_operand:PRED_ALL 0 "nonimmediate_operand" "=Upa, m, Upa, Upa") |
0b1fe8cf | 556 | (match_operand:PRED_ALL 1 "aarch64_mov_operand" "Upa, Upa, m, Dn"))] |
43cacb12 RS |
557 | "TARGET_SVE |
558 | && (register_operand (operands[0], <MODE>mode) | |
559 | || register_operand (operands[1], <MODE>mode))" | |
560 | "@ | |
561 | mov\t%0.b, %1.b | |
562 | str\t%1, %0 | |
563 | ldr\t%0, %1 | |
1044fa32 | 564 | * return aarch64_output_sve_mov_immediate (operands[1]);" |
43cacb12 RS |
565 | ) |
566 | ||
915d28fe RS |
567 | ;; ========================================================================= |
568 | ;; == Loads | |
569 | ;; ========================================================================= | |
570 | ||
571 | ;; ------------------------------------------------------------------------- | |
572 | ;; ---- Normal contiguous loads | |
573 | ;; ------------------------------------------------------------------------- | |
574 | ;; Includes contiguous forms of: | |
575 | ;; - LD1B | |
576 | ;; - LD1D | |
577 | ;; - LD1H | |
578 | ;; - LD1W | |
579 | ;; - LD2B | |
580 | ;; - LD2D | |
581 | ;; - LD2H | |
582 | ;; - LD2W | |
583 | ;; - LD3B | |
584 | ;; - LD3D | |
585 | ;; - LD3H | |
586 | ;; - LD3W | |
587 | ;; - LD4B | |
588 | ;; - LD4D | |
589 | ;; - LD4H | |
590 | ;; - LD4W | |
591 | ;; ------------------------------------------------------------------------- | |
592 | ||
593 | ;; Predicated LD1. | |
594 | (define_insn "maskload<mode><vpred>" | |
595 | [(set (match_operand:SVE_ALL 0 "register_operand" "=w") | |
596 | (unspec:SVE_ALL | |
597 | [(match_operand:<VPRED> 2 "register_operand" "Upl") | |
598 | (match_operand:SVE_ALL 1 "memory_operand" "m")] | |
599 | UNSPEC_LD1_SVE))] | |
43cacb12 | 600 | "TARGET_SVE" |
915d28fe | 601 | "ld1<Vesize>\t%0.<Vetype>, %2/z, %1" |
43cacb12 RS |
602 | ) |
603 | ||
915d28fe RS |
604 | ;; Unpredicated LD[234]. |
605 | (define_expand "vec_load_lanes<mode><vsingle>" | |
606 | [(set (match_operand:SVE_STRUCT 0 "register_operand") | |
607 | (unspec:SVE_STRUCT | |
608 | [(match_dup 2) | |
609 | (match_operand:SVE_STRUCT 1 "memory_operand")] | |
610 | UNSPEC_LDN))] | |
43cacb12 RS |
611 | "TARGET_SVE" |
612 | { | |
915d28fe | 613 | operands[2] = aarch64_ptrue_reg (<VPRED>mode); |
43cacb12 RS |
614 | } |
615 | ) | |
616 | ||
915d28fe RS |
617 | ;; Predicated LD[234]. |
618 | (define_insn "vec_mask_load_lanes<mode><vsingle>" | |
619 | [(set (match_operand:SVE_STRUCT 0 "register_operand" "=w") | |
620 | (unspec:SVE_STRUCT | |
621 | [(match_operand:<VPRED> 2 "register_operand" "Upl") | |
622 | (match_operand:SVE_STRUCT 1 "memory_operand" "m")] | |
623 | UNSPEC_LDN))] | |
8711e791 | 624 | "TARGET_SVE" |
915d28fe | 625 | "ld<vector_count><Vesize>\t%0, %2/z, %1" |
8711e791 RS |
626 | ) |
627 | ||
915d28fe RS |
628 | ;; ------------------------------------------------------------------------- |
629 | ;; ---- Normal gather loads | |
630 | ;; ------------------------------------------------------------------------- | |
631 | ;; Includes gather forms of: | |
632 | ;; - LD1D | |
633 | ;; - LD1W | |
634 | ;; ------------------------------------------------------------------------- | |
635 | ||
636 | ;; Unpredicated gather loads. | |
637 | (define_expand "gather_load<mode>" | |
638 | [(set (match_operand:SVE_SD 0 "register_operand") | |
639 | (unspec:SVE_SD | |
640 | [(match_dup 5) | |
641 | (match_operand:DI 1 "aarch64_reg_or_zero") | |
642 | (match_operand:<V_INT_EQUIV> 2 "register_operand") | |
643 | (match_operand:DI 3 "const_int_operand") | |
644 | (match_operand:DI 4 "aarch64_gather_scale_operand_<Vesize>") | |
645 | (mem:BLK (scratch))] | |
646 | UNSPEC_LD1_GATHER))] | |
647 | "TARGET_SVE" | |
43cacb12 | 648 | { |
915d28fe | 649 | operands[5] = aarch64_ptrue_reg (<VPRED>mode); |
43cacb12 | 650 | } |
43cacb12 RS |
651 | ) |
652 | ||
915d28fe RS |
653 | ;; Predicated gather loads for 32-bit elements. Operand 3 is true for |
654 | ;; unsigned extension and false for signed extension. | |
655 | (define_insn "mask_gather_load<mode>" | |
656 | [(set (match_operand:SVE_S 0 "register_operand" "=w, w, w, w, w") | |
657 | (unspec:SVE_S | |
658 | [(match_operand:<VPRED> 5 "register_operand" "Upl, Upl, Upl, Upl, Upl") | |
659 | (match_operand:DI 1 "aarch64_reg_or_zero" "Z, rk, rk, rk, rk") | |
660 | (match_operand:<V_INT_EQUIV> 2 "register_operand" "w, w, w, w, w") | |
661 | (match_operand:DI 3 "const_int_operand" "i, Z, Ui1, Z, Ui1") | |
662 | (match_operand:DI 4 "aarch64_gather_scale_operand_w" "Ui1, Ui1, Ui1, i, i") | |
663 | (mem:BLK (scratch))] | |
664 | UNSPEC_LD1_GATHER))] | |
665 | "TARGET_SVE" | |
666 | "@ | |
667 | ld1w\t%0.s, %5/z, [%2.s] | |
668 | ld1w\t%0.s, %5/z, [%1, %2.s, sxtw] | |
669 | ld1w\t%0.s, %5/z, [%1, %2.s, uxtw] | |
670 | ld1w\t%0.s, %5/z, [%1, %2.s, sxtw %p4] | |
671 | ld1w\t%0.s, %5/z, [%1, %2.s, uxtw %p4]" | |
672 | ) | |
673 | ||
674 | ;; Predicated gather loads for 64-bit elements. The value of operand 3 | |
675 | ;; doesn't matter in this case. | |
676 | (define_insn "mask_gather_load<mode>" | |
677 | [(set (match_operand:SVE_D 0 "register_operand" "=w, w, w") | |
678 | (unspec:SVE_D | |
679 | [(match_operand:<VPRED> 5 "register_operand" "Upl, Upl, Upl") | |
680 | (match_operand:DI 1 "aarch64_reg_or_zero" "Z, rk, rk") | |
681 | (match_operand:<V_INT_EQUIV> 2 "register_operand" "w, w, w") | |
682 | (match_operand:DI 3 "const_int_operand") | |
683 | (match_operand:DI 4 "aarch64_gather_scale_operand_d" "Ui1, Ui1, i") | |
684 | (mem:BLK (scratch))] | |
685 | UNSPEC_LD1_GATHER))] | |
686 | "TARGET_SVE" | |
687 | "@ | |
688 | ld1d\t%0.d, %5/z, [%2.d] | |
689 | ld1d\t%0.d, %5/z, [%1, %2.d] | |
690 | ld1d\t%0.d, %5/z, [%1, %2.d, lsl %p4]" | |
691 | ) | |
692 | ||
693 | ;; ========================================================================= | |
694 | ;; == Stores | |
695 | ;; ========================================================================= | |
696 | ||
697 | ;; ------------------------------------------------------------------------- | |
698 | ;; ---- Normal contiguous stores | |
699 | ;; ------------------------------------------------------------------------- | |
700 | ;; Includes contiguous forms of: | |
701 | ;; - ST1B | |
702 | ;; - ST1D | |
703 | ;; - ST1H | |
704 | ;; - ST1W | |
705 | ;; - ST2B | |
706 | ;; - ST2D | |
707 | ;; - ST2H | |
708 | ;; - ST2W | |
709 | ;; - ST3B | |
710 | ;; - ST3D | |
711 | ;; - ST3H | |
712 | ;; - ST3W | |
713 | ;; - ST4B | |
714 | ;; - ST4D | |
715 | ;; - ST4H | |
716 | ;; - ST4W | |
717 | ;; ------------------------------------------------------------------------- | |
718 | ||
719 | ;; Predicated ST1. | |
720 | (define_insn "maskstore<mode><vpred>" | |
721 | [(set (match_operand:SVE_ALL 0 "memory_operand" "+m") | |
722 | (unspec:SVE_ALL [(match_operand:<VPRED> 2 "register_operand" "Upl") | |
723 | (match_operand:SVE_ALL 1 "register_operand" "w") | |
724 | (match_dup 0)] | |
725 | UNSPEC_ST1_SVE))] | |
726 | "TARGET_SVE" | |
727 | "st1<Vesize>\t%1.<Vetype>, %2, %0" | |
728 | ) | |
729 | ||
730 | ;; Unpredicated ST[234]. This is always a full update, so the dependence | |
731 | ;; on the old value of the memory location (via (match_dup 0)) is redundant. | |
732 | ;; There doesn't seem to be any obvious benefit to treating the all-true | |
733 | ;; case differently though. In particular, it's very unlikely that we'll | |
734 | ;; only find out during RTL that a store_lanes is dead. | |
735 | (define_expand "vec_store_lanes<mode><vsingle>" | |
736 | [(set (match_operand:SVE_STRUCT 0 "memory_operand") | |
737 | (unspec:SVE_STRUCT | |
738 | [(match_dup 2) | |
739 | (match_operand:SVE_STRUCT 1 "register_operand") | |
740 | (match_dup 0)] | |
741 | UNSPEC_STN))] | |
742 | "TARGET_SVE" | |
43cacb12 | 743 | { |
915d28fe | 744 | operands[2] = aarch64_ptrue_reg (<VPRED>mode); |
43cacb12 RS |
745 | } |
746 | ) | |
747 | ||
915d28fe RS |
748 | ;; Predicated ST[234]. |
749 | (define_insn "vec_mask_store_lanes<mode><vsingle>" | |
750 | [(set (match_operand:SVE_STRUCT 0 "memory_operand" "+m") | |
751 | (unspec:SVE_STRUCT | |
752 | [(match_operand:<VPRED> 2 "register_operand" "Upl") | |
753 | (match_operand:SVE_STRUCT 1 "register_operand" "w") | |
754 | (match_dup 0)] | |
755 | UNSPEC_STN))] | |
756 | "TARGET_SVE" | |
757 | "st<vector_count><Vesize>\t%1, %2, %0" | |
758 | ) | |
759 | ||
760 | ;; ------------------------------------------------------------------------- | |
761 | ;; ---- Normal scatter stores | |
762 | ;; ------------------------------------------------------------------------- | |
763 | ;; Includes scatter forms of: | |
764 | ;; - ST1D | |
765 | ;; - ST1W | |
766 | ;; ------------------------------------------------------------------------- | |
767 | ||
768 | ;; Unpredicated scatter stores. | |
769 | (define_expand "scatter_store<mode>" | |
770 | [(set (mem:BLK (scratch)) | |
771 | (unspec:BLK | |
772 | [(match_dup 5) | |
773 | (match_operand:DI 0 "aarch64_reg_or_zero") | |
774 | (match_operand:<V_INT_EQUIV> 1 "register_operand") | |
775 | (match_operand:DI 2 "const_int_operand") | |
776 | (match_operand:DI 3 "aarch64_gather_scale_operand_<Vesize>") | |
777 | (match_operand:SVE_SD 4 "register_operand")] | |
778 | UNSPEC_ST1_SCATTER))] | |
779 | "TARGET_SVE" | |
43cacb12 | 780 | { |
915d28fe | 781 | operands[5] = aarch64_ptrue_reg (<VPRED>mode); |
43cacb12 RS |
782 | } |
783 | ) | |
784 | ||
915d28fe RS |
785 | ;; Predicated scatter stores for 32-bit elements. Operand 2 is true for |
786 | ;; unsigned extension and false for signed extension. | |
787 | (define_insn "mask_scatter_store<mode>" | |
788 | [(set (mem:BLK (scratch)) | |
789 | (unspec:BLK | |
790 | [(match_operand:<VPRED> 5 "register_operand" "Upl, Upl, Upl, Upl, Upl") | |
791 | (match_operand:DI 0 "aarch64_reg_or_zero" "Z, rk, rk, rk, rk") | |
792 | (match_operand:<V_INT_EQUIV> 1 "register_operand" "w, w, w, w, w") | |
793 | (match_operand:DI 2 "const_int_operand" "i, Z, Ui1, Z, Ui1") | |
794 | (match_operand:DI 3 "aarch64_gather_scale_operand_w" "Ui1, Ui1, Ui1, i, i") | |
795 | (match_operand:SVE_S 4 "register_operand" "w, w, w, w, w")] | |
796 | UNSPEC_ST1_SCATTER))] | |
43cacb12 RS |
797 | "TARGET_SVE" |
798 | "@ | |
915d28fe RS |
799 | st1w\t%4.s, %5, [%1.s] |
800 | st1w\t%4.s, %5, [%0, %1.s, sxtw] | |
801 | st1w\t%4.s, %5, [%0, %1.s, uxtw] | |
802 | st1w\t%4.s, %5, [%0, %1.s, sxtw %p3] | |
803 | st1w\t%4.s, %5, [%0, %1.s, uxtw %p3]" | |
804 | ) | |
805 | ||
806 | ;; Predicated scatter stores for 64-bit elements. The value of operand 2 | |
807 | ;; doesn't matter in this case. | |
808 | (define_insn "mask_scatter_store<mode>" | |
809 | [(set (mem:BLK (scratch)) | |
810 | (unspec:BLK | |
811 | [(match_operand:<VPRED> 5 "register_operand" "Upl, Upl, Upl") | |
812 | (match_operand:DI 0 "aarch64_reg_or_zero" "Z, rk, rk") | |
813 | (match_operand:<V_INT_EQUIV> 1 "register_operand" "w, w, w") | |
814 | (match_operand:DI 2 "const_int_operand") | |
815 | (match_operand:DI 3 "aarch64_gather_scale_operand_d" "Ui1, Ui1, i") | |
816 | (match_operand:SVE_D 4 "register_operand" "w, w, w")] | |
817 | UNSPEC_ST1_SCATTER))] | |
818 | "TARGET_SVE" | |
819 | "@ | |
820 | st1d\t%4.d, %5, [%1.d] | |
821 | st1d\t%4.d, %5, [%0, %1.d] | |
822 | st1d\t%4.d, %5, [%0, %1.d, lsl %p3]" | |
43cacb12 RS |
823 | ) |
824 | ||
915d28fe RS |
825 | ;; ========================================================================= |
826 | ;; == Vector creation | |
827 | ;; ========================================================================= | |
828 | ||
829 | ;; ------------------------------------------------------------------------- | |
830 | ;; ---- [INT,FP] Duplicate element | |
831 | ;; ------------------------------------------------------------------------- | |
832 | ;; Includes: | |
833 | ;; - MOV | |
834 | ;; - LD1RB | |
835 | ;; - LD1RD | |
836 | ;; - LD1RH | |
837 | ;; - LD1RW | |
838 | ;; - LD1RQB | |
839 | ;; - LD1RQD | |
840 | ;; - LD1RQH | |
841 | ;; - LD1RQW | |
842 | ;; ------------------------------------------------------------------------- | |
843 | ||
43cacb12 RS |
844 | (define_expand "vec_duplicate<mode>" |
845 | [(parallel | |
846 | [(set (match_operand:SVE_ALL 0 "register_operand") | |
847 | (vec_duplicate:SVE_ALL | |
848 | (match_operand:<VEL> 1 "aarch64_sve_dup_operand"))) | |
678faefc | 849 | (clobber (scratch:VNx16BI))])] |
43cacb12 RS |
850 | "TARGET_SVE" |
851 | { | |
852 | if (MEM_P (operands[1])) | |
853 | { | |
16de3637 | 854 | rtx ptrue = aarch64_ptrue_reg (<VPRED>mode); |
43cacb12 RS |
855 | emit_insn (gen_sve_ld1r<mode> (operands[0], ptrue, operands[1], |
856 | CONST0_RTX (<MODE>mode))); | |
857 | DONE; | |
858 | } | |
859 | } | |
860 | ) | |
861 | ||
862 | ;; Accept memory operands for the benefit of combine, and also in case | |
863 | ;; the scalar input gets spilled to memory during RA. We want to split | |
864 | ;; the load at the first opportunity in order to allow the PTRUE to be | |
865 | ;; optimized with surrounding code. | |
866 | (define_insn_and_split "*vec_duplicate<mode>_reg" | |
867 | [(set (match_operand:SVE_ALL 0 "register_operand" "=w, w, w") | |
868 | (vec_duplicate:SVE_ALL | |
869 | (match_operand:<VEL> 1 "aarch64_sve_dup_operand" "r, w, Uty"))) | |
678faefc | 870 | (clobber (match_scratch:VNx16BI 2 "=X, X, Upl"))] |
43cacb12 RS |
871 | "TARGET_SVE" |
872 | "@ | |
873 | mov\t%0.<Vetype>, %<vwcore>1 | |
874 | mov\t%0.<Vetype>, %<Vetype>1 | |
875 | #" | |
876 | "&& MEM_P (operands[1])" | |
877 | [(const_int 0)] | |
878 | { | |
879 | if (GET_CODE (operands[2]) == SCRATCH) | |
678faefc RS |
880 | operands[2] = gen_reg_rtx (VNx16BImode); |
881 | emit_move_insn (operands[2], CONSTM1_RTX (VNx16BImode)); | |
882 | rtx gp = gen_lowpart (<VPRED>mode, operands[2]); | |
883 | emit_insn (gen_sve_ld1r<mode> (operands[0], gp, operands[1], | |
43cacb12 RS |
884 | CONST0_RTX (<MODE>mode))); |
885 | DONE; | |
886 | } | |
887 | [(set_attr "length" "4,4,8")] | |
888 | ) | |
889 | ||
4aeb1ba7 RS |
890 | ;; Duplicate an Advanced SIMD vector to fill an SVE vector (LE version). |
891 | (define_insn "@aarch64_vec_duplicate_vq<mode>_le" | |
892 | [(set (match_operand:SVE_ALL 0 "register_operand" "=w") | |
893 | (vec_duplicate:SVE_ALL | |
894 | (match_operand:<V128> 1 "register_operand" "w")))] | |
895 | "TARGET_SVE && !BYTES_BIG_ENDIAN" | |
896 | { | |
897 | operands[1] = gen_rtx_REG (<MODE>mode, REGNO (operands[1])); | |
898 | return "dup\t%0.q, %1.q[0]"; | |
899 | } | |
900 | ) | |
901 | ||
902 | ;; Duplicate an Advanced SIMD vector to fill an SVE vector (BE version). | |
903 | ;; The SVE register layout puts memory lane N into (architectural) | |
904 | ;; register lane N, whereas the Advanced SIMD layout puts the memory | |
905 | ;; lsb into the register lsb. We therefore have to describe this in rtl | |
906 | ;; terms as a reverse of the V128 vector followed by a duplicate. | |
907 | (define_insn "@aarch64_vec_duplicate_vq<mode>_be" | |
908 | [(set (match_operand:SVE_ALL 0 "register_operand" "=w") | |
909 | (vec_duplicate:SVE_ALL | |
910 | (vec_select:<V128> | |
911 | (match_operand:<V128> 1 "register_operand" "w") | |
912 | (match_operand 2 "descending_int_parallel"))))] | |
913 | "TARGET_SVE | |
914 | && BYTES_BIG_ENDIAN | |
915 | && known_eq (INTVAL (XVECEXP (operands[2], 0, 0)), | |
916 | GET_MODE_NUNITS (<V128>mode) - 1)" | |
917 | { | |
918 | operands[1] = gen_rtx_REG (<MODE>mode, REGNO (operands[1])); | |
919 | return "dup\t%0.q, %1.q[0]"; | |
920 | } | |
921 | ) | |
922 | ||
43cacb12 RS |
923 | ;; This is used for vec_duplicate<mode>s from memory, but can also |
924 | ;; be used by combine to optimize selects of a a vec_duplicate<mode> | |
925 | ;; with zero. | |
926 | (define_insn "sve_ld1r<mode>" | |
927 | [(set (match_operand:SVE_ALL 0 "register_operand" "=w") | |
928 | (unspec:SVE_ALL | |
929 | [(match_operand:<VPRED> 1 "register_operand" "Upl") | |
930 | (vec_duplicate:SVE_ALL | |
931 | (match_operand:<VEL> 2 "aarch64_sve_ld1r_operand" "Uty")) | |
932 | (match_operand:SVE_ALL 3 "aarch64_simd_imm_zero")] | |
933 | UNSPEC_SEL))] | |
934 | "TARGET_SVE" | |
935 | "ld1r<Vesize>\t%0.<Vetype>, %1/z, %2" | |
936 | ) | |
937 | ||
4aeb1ba7 RS |
938 | ;; Load 128 bits from memory under predicate control and duplicate to |
939 | ;; fill a vector. | |
940 | (define_insn "@aarch64_sve_ld1rq<mode>" | |
947b1372 RS |
941 | [(set (match_operand:SVE_ALL 0 "register_operand" "=w") |
942 | (unspec:SVE_ALL | |
4aeb1ba7 RS |
943 | [(match_operand:<VPRED> 2 "register_operand" "Upl") |
944 | (match_operand:<V128> 1 "aarch64_sve_ld1rq_operand" "UtQ")] | |
43cacb12 RS |
945 | UNSPEC_LD1RQ))] |
946 | "TARGET_SVE" | |
4aeb1ba7 RS |
947 | { |
948 | operands[1] = gen_rtx_MEM (<VEL>mode, XEXP (operands[1], 0)); | |
949 | return "ld1rq<Vesize>\t%0.<Vetype>, %2/z, %1"; | |
950 | } | |
43cacb12 RS |
951 | ) |
952 | ||
915d28fe RS |
953 | ;; ------------------------------------------------------------------------- |
954 | ;; ---- [INT,FP] Initialize from individual elements | |
955 | ;; ------------------------------------------------------------------------- | |
956 | ;; Includes: | |
957 | ;; - INSR | |
958 | ;; ------------------------------------------------------------------------- | |
959 | ||
960 | (define_expand "vec_init<mode><Vel>" | |
961 | [(match_operand:SVE_ALL 0 "register_operand") | |
962 | (match_operand 1 "")] | |
43cacb12 RS |
963 | "TARGET_SVE" |
964 | { | |
915d28fe | 965 | aarch64_sve_expand_vector_init (operands[0], operands[1]); |
43cacb12 RS |
966 | DONE; |
967 | } | |
968 | ) | |
969 | ||
915d28fe RS |
970 | ;; Shift an SVE vector left and insert a scalar into element 0. |
971 | (define_insn "vec_shl_insert_<mode>" | |
61ee25b9 | 972 | [(set (match_operand:SVE_ALL 0 "register_operand" "=?w, w, ??&w, ?&w") |
915d28fe | 973 | (unspec:SVE_ALL |
61ee25b9 RS |
974 | [(match_operand:SVE_ALL 1 "register_operand" "0, 0, w, w") |
975 | (match_operand:<VEL> 2 "aarch64_reg_or_zero" "rZ, w, rZ, w")] | |
915d28fe RS |
976 | UNSPEC_INSR))] |
977 | "TARGET_SVE" | |
978 | "@ | |
979 | insr\t%0.<Vetype>, %<vwcore>2 | |
61ee25b9 RS |
980 | insr\t%0.<Vetype>, %<Vetype>2 |
981 | movprfx\t%0, %1\;insr\t%0.<Vetype>, %<vwcore>2 | |
982 | movprfx\t%0, %1\;insr\t%0.<Vetype>, %<Vetype>2" | |
983 | [(set_attr "movprfx" "*,*,yes,yes")] | |
915d28fe RS |
984 | ) |
985 | ||
986 | ;; ------------------------------------------------------------------------- | |
987 | ;; ---- [INT] Linear series | |
988 | ;; ------------------------------------------------------------------------- | |
989 | ;; Includes: | |
990 | ;; - INDEX | |
991 | ;; ------------------------------------------------------------------------- | |
992 | ||
993 | (define_insn "vec_series<mode>" | |
994 | [(set (match_operand:SVE_I 0 "register_operand" "=w, w, w") | |
995 | (vec_series:SVE_I | |
996 | (match_operand:<VEL> 1 "aarch64_sve_index_operand" "Usi, r, r") | |
997 | (match_operand:<VEL> 2 "aarch64_sve_index_operand" "r, Usi, r")))] | |
998 | "TARGET_SVE" | |
999 | "@ | |
1000 | index\t%0.<Vetype>, #%1, %<vw>2 | |
43cacb12 RS |
1001 | index\t%0.<Vetype>, %<vw>1, #%2 |
1002 | index\t%0.<Vetype>, %<vw>1, %<vw>2" | |
1003 | ) | |
1004 | ||
1005 | ;; Optimize {x, x, x, x, ...} + {0, n, 2*n, 3*n, ...} if n is in range | |
1006 | ;; of an INDEX instruction. | |
1007 | (define_insn "*vec_series<mode>_plus" | |
1008 | [(set (match_operand:SVE_I 0 "register_operand" "=w") | |
1009 | (plus:SVE_I | |
1010 | (vec_duplicate:SVE_I | |
1011 | (match_operand:<VEL> 1 "register_operand" "r")) | |
1012 | (match_operand:SVE_I 2 "immediate_operand")))] | |
1013 | "TARGET_SVE && aarch64_check_zero_based_sve_index_immediate (operands[2])" | |
1014 | { | |
1015 | operands[2] = aarch64_check_zero_based_sve_index_immediate (operands[2]); | |
1016 | return "index\t%0.<Vetype>, %<vw>1, #%2"; | |
1017 | } | |
1018 | ) | |
1019 | ||
915d28fe RS |
1020 | ;; ------------------------------------------------------------------------- |
1021 | ;; ---- [PRED] Duplicate element | |
1022 | ;; ------------------------------------------------------------------------- | |
1023 | ;; The patterns in this section are synthetic. | |
1024 | ;; ------------------------------------------------------------------------- | |
1025 | ||
1026 | ;; Implement a predicate broadcast by shifting the low bit of the scalar | |
1027 | ;; input into the top bit and using a WHILELO. An alternative would be to | |
1028 | ;; duplicate the input and do a compare with zero. | |
1029 | (define_expand "vec_duplicate<mode>" | |
1030 | [(set (match_operand:PRED_ALL 0 "register_operand") | |
1031 | (vec_duplicate:PRED_ALL (match_operand 1 "register_operand")))] | |
9f4cbab8 RS |
1032 | "TARGET_SVE" |
1033 | { | |
915d28fe RS |
1034 | rtx tmp = gen_reg_rtx (DImode); |
1035 | rtx op1 = gen_lowpart (DImode, operands[1]); | |
1036 | emit_insn (gen_ashldi3 (tmp, op1, gen_int_mode (63, DImode))); | |
1037 | emit_insn (gen_while_ultdi<mode> (operands[0], const0_rtx, tmp)); | |
1038 | DONE; | |
9f4cbab8 RS |
1039 | } |
1040 | ) | |
1041 | ||
915d28fe RS |
1042 | ;; ========================================================================= |
1043 | ;; == Vector decomposition | |
1044 | ;; ========================================================================= | |
9f4cbab8 | 1045 | |
915d28fe RS |
1046 | ;; ------------------------------------------------------------------------- |
1047 | ;; ---- [INT,FP] Extract index | |
1048 | ;; ------------------------------------------------------------------------- | |
1049 | ;; Includes: | |
1050 | ;; - DUP (Advanced SIMD) | |
1051 | ;; - DUP (SVE) | |
1052 | ;; - EXT (SVE) | |
1053 | ;; - ST1 (Advanced SIMD) | |
1054 | ;; - UMOV (Advanced SIMD) | |
1055 | ;; ------------------------------------------------------------------------- | |
1056 | ||
1057 | (define_expand "vec_extract<mode><Vel>" | |
1058 | [(set (match_operand:<VEL> 0 "register_operand") | |
1059 | (vec_select:<VEL> | |
1060 | (match_operand:SVE_ALL 1 "register_operand") | |
1061 | (parallel [(match_operand:SI 2 "nonmemory_operand")])))] | |
9f4cbab8 RS |
1062 | "TARGET_SVE" |
1063 | { | |
915d28fe RS |
1064 | poly_int64 val; |
1065 | if (poly_int_rtx_p (operands[2], &val) | |
1066 | && known_eq (val, GET_MODE_NUNITS (<MODE>mode) - 1)) | |
1067 | { | |
1068 | /* The last element can be extracted with a LASTB and a false | |
1069 | predicate. */ | |
1070 | rtx sel = aarch64_pfalse_reg (<VPRED>mode); | |
1071 | emit_insn (gen_extract_last_<mode> (operands[0], sel, operands[1])); | |
1072 | DONE; | |
1073 | } | |
1074 | if (!CONST_INT_P (operands[2])) | |
1075 | { | |
1076 | /* Create an index with operand[2] as the base and -1 as the step. | |
1077 | It will then be zero for the element we care about. */ | |
1078 | rtx index = gen_lowpart (<VEL_INT>mode, operands[2]); | |
1079 | index = force_reg (<VEL_INT>mode, index); | |
1080 | rtx series = gen_reg_rtx (<V_INT_EQUIV>mode); | |
1081 | emit_insn (gen_vec_series<v_int_equiv> (series, index, constm1_rtx)); | |
1082 | ||
1083 | /* Get a predicate that is true for only that element. */ | |
1084 | rtx zero = CONST0_RTX (<V_INT_EQUIV>mode); | |
1085 | rtx cmp = gen_rtx_EQ (<V_INT_EQUIV>mode, series, zero); | |
1086 | rtx sel = gen_reg_rtx (<VPRED>mode); | |
1087 | emit_insn (gen_vec_cmp<v_int_equiv><vpred> (sel, cmp, series, zero)); | |
1088 | ||
1089 | /* Select the element using LASTB. */ | |
1090 | emit_insn (gen_extract_last_<mode> (operands[0], sel, operands[1])); | |
1091 | DONE; | |
1092 | } | |
9f4cbab8 RS |
1093 | } |
1094 | ) | |
1095 | ||
915d28fe RS |
1096 | ;; Extract element zero. This is a special case because we want to force |
1097 | ;; the registers to be the same for the second alternative, and then | |
1098 | ;; split the instruction into nothing after RA. | |
1099 | (define_insn_and_split "*vec_extract<mode><Vel>_0" | |
1100 | [(set (match_operand:<VEL> 0 "aarch64_simd_nonimmediate_operand" "=r, w, Utv") | |
1101 | (vec_select:<VEL> | |
1102 | (match_operand:SVE_ALL 1 "register_operand" "w, 0, w") | |
1103 | (parallel [(const_int 0)])))] | |
9f4cbab8 | 1104 | "TARGET_SVE" |
915d28fe RS |
1105 | { |
1106 | operands[1] = gen_rtx_REG (<V128>mode, REGNO (operands[1])); | |
1107 | switch (which_alternative) | |
1108 | { | |
1109 | case 0: | |
1110 | return "umov\\t%<vwcore>0, %1.<Vetype>[0]"; | |
1111 | case 1: | |
1112 | return "#"; | |
1113 | case 2: | |
1114 | return "st1\\t{%1.<Vetype>}[0], %0"; | |
1115 | default: | |
1116 | gcc_unreachable (); | |
1117 | } | |
1118 | } | |
1119 | "&& reload_completed | |
1120 | && REG_P (operands[0]) | |
1121 | && REGNO (operands[0]) == REGNO (operands[1])" | |
1122 | [(const_int 0)] | |
1123 | { | |
1124 | emit_note (NOTE_INSN_DELETED); | |
1125 | DONE; | |
1126 | } | |
1127 | [(set_attr "type" "neon_to_gp_q, untyped, neon_store1_one_lane_q")] | |
9f4cbab8 RS |
1128 | ) |
1129 | ||
915d28fe RS |
1130 | ;; Extract an element from the Advanced SIMD portion of the register. |
1131 | ;; We don't just reuse the aarch64-simd.md pattern because we don't | |
1132 | ;; want any change in lane number on big-endian targets. | |
1133 | (define_insn "*vec_extract<mode><Vel>_v128" | |
1134 | [(set (match_operand:<VEL> 0 "aarch64_simd_nonimmediate_operand" "=r, w, Utv") | |
1135 | (vec_select:<VEL> | |
1136 | (match_operand:SVE_ALL 1 "register_operand" "w, w, w") | |
1137 | (parallel [(match_operand:SI 2 "const_int_operand")])))] | |
1138 | "TARGET_SVE | |
1139 | && IN_RANGE (INTVAL (operands[2]) * GET_MODE_SIZE (<VEL>mode), 1, 15)" | |
43cacb12 | 1140 | { |
915d28fe RS |
1141 | operands[1] = gen_rtx_REG (<V128>mode, REGNO (operands[1])); |
1142 | switch (which_alternative) | |
1143 | { | |
1144 | case 0: | |
1145 | return "umov\\t%<vwcore>0, %1.<Vetype>[%2]"; | |
1146 | case 1: | |
1147 | return "dup\\t%<Vetype>0, %1.<Vetype>[%2]"; | |
1148 | case 2: | |
1149 | return "st1\\t{%1.<Vetype>}[%2], %0"; | |
1150 | default: | |
1151 | gcc_unreachable (); | |
1152 | } | |
43cacb12 | 1153 | } |
915d28fe | 1154 | [(set_attr "type" "neon_to_gp_q, neon_dup_q, neon_store1_one_lane_q")] |
43cacb12 RS |
1155 | ) |
1156 | ||
915d28fe RS |
1157 | ;; Extract an element in the range of DUP. This pattern allows the |
1158 | ;; source and destination to be different. | |
1159 | (define_insn "*vec_extract<mode><Vel>_dup" | |
1160 | [(set (match_operand:<VEL> 0 "register_operand" "=w") | |
1161 | (vec_select:<VEL> | |
1162 | (match_operand:SVE_ALL 1 "register_operand" "w") | |
1163 | (parallel [(match_operand:SI 2 "const_int_operand")])))] | |
1164 | "TARGET_SVE | |
1165 | && IN_RANGE (INTVAL (operands[2]) * GET_MODE_SIZE (<VEL>mode), 16, 63)" | |
1166 | { | |
1167 | operands[0] = gen_rtx_REG (<MODE>mode, REGNO (operands[0])); | |
1168 | return "dup\t%0.<Vetype>, %1.<Vetype>[%2]"; | |
1169 | } | |
43cacb12 RS |
1170 | ) |
1171 | ||
915d28fe RS |
1172 | ;; Extract an element outside the range of DUP. This pattern requires the |
1173 | ;; source and destination to be the same. | |
1174 | (define_insn "*vec_extract<mode><Vel>_ext" | |
1175 | [(set (match_operand:<VEL> 0 "register_operand" "=w") | |
1176 | (vec_select:<VEL> | |
1177 | (match_operand:SVE_ALL 1 "register_operand" "0") | |
1178 | (parallel [(match_operand:SI 2 "const_int_operand")])))] | |
1179 | "TARGET_SVE && INTVAL (operands[2]) * GET_MODE_SIZE (<VEL>mode) >= 64" | |
1180 | { | |
1181 | operands[0] = gen_rtx_REG (<MODE>mode, REGNO (operands[0])); | |
1182 | operands[2] = GEN_INT (INTVAL (operands[2]) * GET_MODE_SIZE (<VEL>mode)); | |
1183 | return "ext\t%0.b, %0.b, %0.b, #%2"; | |
1184 | } | |
43cacb12 RS |
1185 | ) |
1186 | ||
915d28fe RS |
1187 | ;; ------------------------------------------------------------------------- |
1188 | ;; ---- [INT,FP] Extract active element | |
1189 | ;; ------------------------------------------------------------------------- | |
1190 | ;; Includes: | |
1191 | ;; - LASTB | |
1192 | ;; ------------------------------------------------------------------------- | |
1193 | ||
1194 | ;; Extract the last active element of operand 1 into operand 0. | |
1195 | ;; If no elements are active, extract the last inactive element instead. | |
1196 | (define_insn "extract_last_<mode>" | |
1197 | [(set (match_operand:<VEL> 0 "register_operand" "=r, w") | |
1198 | (unspec:<VEL> | |
1199 | [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl") | |
1200 | (match_operand:SVE_ALL 2 "register_operand" "w, w")] | |
1201 | UNSPEC_LASTB))] | |
43cacb12 | 1202 | "TARGET_SVE" |
915d28fe RS |
1203 | "@ |
1204 | lastb\t%<vwcore>0, %1, %2.<Vetype> | |
1205 | lastb\t%<Vetype>0, %1, %2.<Vetype>" | |
43cacb12 RS |
1206 | ) |
1207 | ||
915d28fe RS |
1208 | ;; ------------------------------------------------------------------------- |
1209 | ;; ---- [PRED] Extract index | |
1210 | ;; ------------------------------------------------------------------------- | |
1211 | ;; The patterns in this section are synthetic. | |
1212 | ;; ------------------------------------------------------------------------- | |
1213 | ||
1214 | ;; Handle extractions from a predicate by converting to an integer vector | |
1215 | ;; and extracting from there. | |
1216 | (define_expand "vec_extract<vpred><Vel>" | |
1217 | [(match_operand:<VEL> 0 "register_operand") | |
1218 | (match_operand:<VPRED> 1 "register_operand") | |
1219 | (match_operand:SI 2 "nonmemory_operand") | |
1220 | ;; Dummy operand to which we can attach the iterator. | |
1221 | (reg:SVE_I V0_REGNUM)] | |
43cacb12 | 1222 | "TARGET_SVE" |
915d28fe RS |
1223 | { |
1224 | rtx tmp = gen_reg_rtx (<MODE>mode); | |
1225 | emit_insn (gen_aarch64_sve_dup<mode>_const (tmp, operands[1], | |
1226 | CONST1_RTX (<MODE>mode), | |
1227 | CONST0_RTX (<MODE>mode))); | |
1228 | emit_insn (gen_vec_extract<mode><Vel> (operands[0], tmp, operands[2])); | |
1229 | DONE; | |
1230 | } | |
43cacb12 RS |
1231 | ) |
1232 | ||
915d28fe RS |
1233 | ;; ========================================================================= |
1234 | ;; == Unary arithmetic | |
1235 | ;; ========================================================================= | |
1236 | ||
1237 | ;; ------------------------------------------------------------------------- | |
1238 | ;; ---- [INT] General unary arithmetic corresponding to rtx codes | |
1239 | ;; ------------------------------------------------------------------------- | |
1240 | ;; Includes: | |
1241 | ;; - ABS | |
1242 | ;; - CNT (= popcount) | |
1243 | ;; - NEG | |
1244 | ;; - NOT | |
1245 | ;; ------------------------------------------------------------------------- | |
1246 | ||
1247 | ;; Unpredicated integer unary arithmetic. | |
1248 | (define_expand "<optab><mode>2" | |
1249 | [(set (match_operand:SVE_I 0 "register_operand") | |
1250 | (unspec:SVE_I | |
1251 | [(match_dup 2) | |
1252 | (SVE_INT_UNARY:SVE_I (match_operand:SVE_I 1 "register_operand"))] | |
43cacb12 RS |
1253 | UNSPEC_MERGE_PTRUE))] |
1254 | "TARGET_SVE" | |
915d28fe RS |
1255 | { |
1256 | operands[2] = aarch64_ptrue_reg (<VPRED>mode); | |
1257 | } | |
43cacb12 RS |
1258 | ) |
1259 | ||
915d28fe RS |
1260 | ;; Integer unary arithmetic predicated with a PTRUE. |
1261 | (define_insn "*<optab><mode>2" | |
1262 | [(set (match_operand:SVE_I 0 "register_operand" "=w") | |
1263 | (unspec:SVE_I | |
1264 | [(match_operand:<VPRED> 1 "register_operand" "Upl") | |
1265 | (SVE_INT_UNARY:SVE_I | |
1266 | (match_operand:SVE_I 2 "register_operand" "w"))] | |
43cacb12 RS |
1267 | UNSPEC_MERGE_PTRUE))] |
1268 | "TARGET_SVE" | |
915d28fe | 1269 | "<sve_int_op>\t%0.<Vetype>, %1/m, %2.<Vetype>" |
43cacb12 RS |
1270 | ) |
1271 | ||
915d28fe | 1272 | ;; ------------------------------------------------------------------------- |
d45b20a5 | 1273 | ;; ---- [FP] General unary arithmetic corresponding to unspecs |
915d28fe RS |
1274 | ;; ------------------------------------------------------------------------- |
1275 | ;; Includes: | |
1276 | ;; - FABS | |
1277 | ;; - FNEG | |
915d28fe RS |
1278 | ;; - FRINTA |
1279 | ;; - FRINTI | |
1280 | ;; - FRINTM | |
1281 | ;; - FRINTN | |
1282 | ;; - FRINTP | |
1283 | ;; - FRINTX | |
1284 | ;; - FRINTZ | |
d45b20a5 | 1285 | ;; - FSQRT |
915d28fe RS |
1286 | ;; ------------------------------------------------------------------------- |
1287 | ||
d45b20a5 RS |
1288 | ;; Unpredicated floating-point unary operations. |
1289 | (define_expand "<optab><mode>2" | |
915d28fe RS |
1290 | [(set (match_operand:SVE_F 0 "register_operand") |
1291 | (unspec:SVE_F | |
1292 | [(match_dup 2) | |
d45b20a5 RS |
1293 | (match_operand:SVE_F 1 "register_operand")] |
1294 | SVE_COND_FP_UNARY))] | |
915d28fe RS |
1295 | "TARGET_SVE" |
1296 | { | |
1297 | operands[2] = aarch64_ptrue_reg (<VPRED>mode); | |
1298 | } | |
1299 | ) | |
1300 | ||
d45b20a5 RS |
1301 | ;; Predicated floating-point unary operations. |
1302 | (define_insn "*<optab><mode>2" | |
915d28fe RS |
1303 | [(set (match_operand:SVE_F 0 "register_operand" "=w") |
1304 | (unspec:SVE_F | |
1305 | [(match_operand:<VPRED> 1 "register_operand" "Upl") | |
d45b20a5 RS |
1306 | (match_operand:SVE_F 2 "register_operand" "w")] |
1307 | SVE_COND_FP_UNARY))] | |
915d28fe | 1308 | "TARGET_SVE" |
d45b20a5 | 1309 | "<sve_fp_op>\t%0.<Vetype>, %1/m, %2.<Vetype>" |
915d28fe RS |
1310 | ) |
1311 | ||
1312 | ;; ------------------------------------------------------------------------- | |
1313 | ;; ---- [PRED] Inverse | |
1314 | ;; ------------------------------------------------------------------------- | |
1315 | ;; Includes: | |
1316 | ;; - NOT | |
1317 | ;; ------------------------------------------------------------------------- | |
1318 | ||
1319 | ;; Unpredicated predicate inverse. | |
1320 | (define_expand "one_cmpl<mode>2" | |
1321 | [(set (match_operand:PRED_ALL 0 "register_operand") | |
1322 | (and:PRED_ALL | |
1323 | (not:PRED_ALL (match_operand:PRED_ALL 1 "register_operand")) | |
1324 | (match_dup 2)))] | |
1325 | "TARGET_SVE" | |
1326 | { | |
1327 | operands[2] = aarch64_ptrue_reg (<MODE>mode); | |
1328 | } | |
1329 | ) | |
1330 | ||
1331 | ;; Predicated predicate inverse. | |
1332 | (define_insn "*one_cmpl<mode>3" | |
1333 | [(set (match_operand:PRED_ALL 0 "register_operand" "=Upa") | |
1334 | (and:PRED_ALL | |
1335 | (not:PRED_ALL (match_operand:PRED_ALL 2 "register_operand" "Upa")) | |
1336 | (match_operand:PRED_ALL 1 "register_operand" "Upa")))] | |
1337 | "TARGET_SVE" | |
1338 | "not\t%0.b, %1/z, %2.b" | |
1339 | ) | |
1340 | ||
1341 | ;; ========================================================================= | |
1342 | ;; == Binary arithmetic | |
1343 | ;; ========================================================================= | |
1344 | ||
1345 | ;; ------------------------------------------------------------------------- | |
1346 | ;; ---- [INT] General binary arithmetic corresponding to rtx codes | |
1347 | ;; ------------------------------------------------------------------------- | |
1348 | ;; Includes merging patterns for: | |
1349 | ;; - ADD | |
1350 | ;; - AND | |
1351 | ;; - EOR | |
1352 | ;; - MUL | |
1353 | ;; - ORR | |
1354 | ;; - SMAX | |
1355 | ;; - SMIN | |
1356 | ;; - SUB | |
1357 | ;; - UMAX | |
1358 | ;; - UMIN | |
1359 | ;; ------------------------------------------------------------------------- | |
1360 | ||
1361 | ;; Predicated integer operations with merging. | |
1362 | (define_expand "cond_<optab><mode>" | |
1363 | [(set (match_operand:SVE_I 0 "register_operand") | |
1364 | (unspec:SVE_I | |
1365 | [(match_operand:<VPRED> 1 "register_operand") | |
1366 | (SVE_INT_BINARY:SVE_I | |
1367 | (match_operand:SVE_I 2 "register_operand") | |
1368 | (match_operand:SVE_I 3 "register_operand")) | |
1369 | (match_operand:SVE_I 4 "aarch64_simd_reg_or_zero")] | |
1370 | UNSPEC_SEL))] | |
1371 | "TARGET_SVE" | |
1372 | ) | |
1373 | ||
1374 | ;; Predicated integer operations, merging with the first input. | |
1375 | (define_insn "*cond_<optab><mode>_2" | |
1376 | [(set (match_operand:SVE_I 0 "register_operand" "=w, ?&w") | |
1377 | (unspec:SVE_I | |
1378 | [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl") | |
1379 | (SVE_INT_BINARY:SVE_I | |
1380 | (match_operand:SVE_I 2 "register_operand" "0, w") | |
1381 | (match_operand:SVE_I 3 "register_operand" "w, w")) | |
1382 | (match_dup 2)] | |
1383 | UNSPEC_SEL))] | |
1384 | "TARGET_SVE" | |
1385 | "@ | |
1386 | <sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype> | |
1387 | movprfx\t%0, %2\;<sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>" | |
1388 | [(set_attr "movprfx" "*,yes")] | |
1389 | ) | |
1390 | ||
1391 | ;; Predicated integer operations, merging with the second input. | |
1392 | (define_insn "*cond_<optab><mode>_3" | |
1393 | [(set (match_operand:SVE_I 0 "register_operand" "=w, ?&w") | |
1394 | (unspec:SVE_I | |
1395 | [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl") | |
1396 | (SVE_INT_BINARY:SVE_I | |
1397 | (match_operand:SVE_I 2 "register_operand" "w, w") | |
1398 | (match_operand:SVE_I 3 "register_operand" "0, w")) | |
1399 | (match_dup 3)] | |
1400 | UNSPEC_SEL))] | |
1401 | "TARGET_SVE" | |
1402 | "@ | |
1403 | <sve_int_op_rev>\t%0.<Vetype>, %1/m, %0.<Vetype>, %2.<Vetype> | |
1404 | movprfx\t%0, %3\;<sve_int_op_rev>\t%0.<Vetype>, %1/m, %0.<Vetype>, %2.<Vetype>" | |
1405 | [(set_attr "movprfx" "*,yes")] | |
1406 | ) | |
1407 | ||
1408 | ;; Predicated integer operations, merging with an independent value. | |
1409 | (define_insn_and_rewrite "*cond_<optab><mode>_any" | |
1410 | [(set (match_operand:SVE_I 0 "register_operand" "=&w, &w, &w, &w, ?&w") | |
1411 | (unspec:SVE_I | |
1412 | [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl, Upl, Upl") | |
1413 | (SVE_INT_BINARY:SVE_I | |
1414 | (match_operand:SVE_I 2 "register_operand" "0, w, w, w, w") | |
1415 | (match_operand:SVE_I 3 "register_operand" "w, 0, w, w, w")) | |
1416 | (match_operand:SVE_I 4 "aarch64_simd_reg_or_zero" "Dz, Dz, Dz, 0, w")] | |
1417 | UNSPEC_SEL))] | |
43cacb12 | 1418 | "TARGET_SVE |
915d28fe RS |
1419 | && !rtx_equal_p (operands[2], operands[4]) |
1420 | && !rtx_equal_p (operands[3], operands[4])" | |
1421 | "@ | |
1422 | movprfx\t%0.<Vetype>, %1/z, %0.<Vetype>\;<sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype> | |
1423 | movprfx\t%0.<Vetype>, %1/z, %0.<Vetype>\;<sve_int_op_rev>\t%0.<Vetype>, %1/m, %0.<Vetype>, %2.<Vetype> | |
1424 | movprfx\t%0.<Vetype>, %1/z, %2.<Vetype>\;<sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype> | |
1425 | movprfx\t%0.<Vetype>, %1/m, %2.<Vetype>\;<sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype> | |
1426 | #" | |
1427 | "&& reload_completed | |
1428 | && register_operand (operands[4], <MODE>mode) | |
1429 | && !rtx_equal_p (operands[0], operands[4])" | |
43cacb12 | 1430 | { |
915d28fe RS |
1431 | emit_insn (gen_vcond_mask_<mode><vpred> (operands[0], operands[2], |
1432 | operands[4], operands[1])); | |
1433 | operands[4] = operands[2] = operands[0]; | |
43cacb12 | 1434 | } |
915d28fe | 1435 | [(set_attr "movprfx" "yes")] |
43cacb12 RS |
1436 | ) |
1437 | ||
915d28fe RS |
1438 | ;; ------------------------------------------------------------------------- |
1439 | ;; ---- [INT] Addition | |
1440 | ;; ------------------------------------------------------------------------- | |
1441 | ;; Includes: | |
1442 | ;; - ADD | |
1443 | ;; - DECB | |
1444 | ;; - DECD | |
1445 | ;; - DECH | |
1446 | ;; - DECW | |
1447 | ;; - INCB | |
1448 | ;; - INCD | |
1449 | ;; - INCH | |
1450 | ;; - INCW | |
1451 | ;; - SUB | |
1452 | ;; ------------------------------------------------------------------------- | |
1453 | ||
43cacb12 RS |
1454 | (define_insn "add<mode>3" |
1455 | [(set (match_operand:SVE_I 0 "register_operand" "=w, w, w, w") | |
1456 | (plus:SVE_I | |
1457 | (match_operand:SVE_I 1 "register_operand" "%0, 0, 0, w") | |
1458 | (match_operand:SVE_I 2 "aarch64_sve_add_operand" "vsa, vsn, vsi, w")))] | |
1459 | "TARGET_SVE" | |
1460 | "@ | |
1461 | add\t%0.<Vetype>, %0.<Vetype>, #%D2 | |
1462 | sub\t%0.<Vetype>, %0.<Vetype>, #%N2 | |
1463 | * return aarch64_output_sve_inc_dec_immediate (\"%0.<Vetype>\", operands[2]); | |
1464 | add\t%0.<Vetype>, %1.<Vetype>, %2.<Vetype>" | |
1465 | ) | |
1466 | ||
915d28fe RS |
1467 | ;; Merging forms are handled through SVE_INT_BINARY. |
1468 | ||
1469 | ;; ------------------------------------------------------------------------- | |
1470 | ;; ---- [INT] Subtraction | |
1471 | ;; ------------------------------------------------------------------------- | |
1472 | ;; Includes: | |
1473 | ;; - SUB | |
1474 | ;; - SUBR | |
1475 | ;; ------------------------------------------------------------------------- | |
1476 | ||
43cacb12 RS |
1477 | (define_insn "sub<mode>3" |
1478 | [(set (match_operand:SVE_I 0 "register_operand" "=w, w") | |
1479 | (minus:SVE_I | |
1480 | (match_operand:SVE_I 1 "aarch64_sve_arith_operand" "w, vsa") | |
1481 | (match_operand:SVE_I 2 "register_operand" "w, 0")))] | |
1482 | "TARGET_SVE" | |
1483 | "@ | |
1484 | sub\t%0.<Vetype>, %1.<Vetype>, %2.<Vetype> | |
1485 | subr\t%0.<Vetype>, %0.<Vetype>, #%D1" | |
1486 | ) | |
1487 | ||
915d28fe RS |
1488 | ;; Merging forms are handled through SVE_INT_BINARY. |
1489 | ||
1490 | ;; ------------------------------------------------------------------------- | |
1491 | ;; ---- [INT] Absolute difference | |
1492 | ;; ------------------------------------------------------------------------- | |
1493 | ;; Includes: | |
1494 | ;; - SABD | |
1495 | ;; - UABD | |
1496 | ;; ------------------------------------------------------------------------- | |
1497 | ||
1498 | ;; Unpredicated integer absolute difference. | |
1499 | (define_expand "<su>abd<mode>_3" | |
1500 | [(use (match_operand:SVE_I 0 "register_operand")) | |
1501 | (USMAX:SVE_I (match_operand:SVE_I 1 "register_operand") | |
1502 | (match_operand:SVE_I 2 "register_operand"))] | |
1503 | "TARGET_SVE" | |
1504 | { | |
1505 | rtx pred = aarch64_ptrue_reg (<VPRED>mode); | |
1506 | emit_insn (gen_aarch64_<su>abd<mode>_3 (operands[0], pred, operands[1], | |
1507 | operands[2])); | |
1508 | DONE; | |
1509 | } | |
1510 | ) | |
1511 | ||
1512 | ;; Predicated integer absolute difference. | |
1513 | (define_insn "aarch64_<su>abd<mode>_3" | |
1514 | [(set (match_operand:SVE_I 0 "register_operand" "=w, ?&w") | |
1515 | (unspec:SVE_I | |
1516 | [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl") | |
1517 | (minus:SVE_I | |
1518 | (USMAX:SVE_I | |
1519 | (match_operand:SVE_I 2 "register_operand" "0, w") | |
1520 | (match_operand:SVE_I 3 "register_operand" "w, w")) | |
1521 | (<max_opp>:SVE_I | |
1522 | (match_dup 2) | |
1523 | (match_dup 3)))] | |
1524 | UNSPEC_MERGE_PTRUE))] | |
1525 | "TARGET_SVE" | |
1526 | "@ | |
1527 | <su>abd\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype> | |
1528 | movprfx\t%0, %2\;<su>abd\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>" | |
1529 | [(set_attr "movprfx" "*,yes")] | |
1530 | ) | |
1531 | ||
1532 | ;; ------------------------------------------------------------------------- | |
1533 | ;; ---- [INT] Multiplication | |
1534 | ;; ------------------------------------------------------------------------- | |
1535 | ;; Includes: | |
1536 | ;; - MUL | |
1537 | ;; ------------------------------------------------------------------------- | |
1538 | ||
43cacb12 RS |
1539 | ;; Unpredicated multiplication. |
1540 | (define_expand "mul<mode>3" | |
1541 | [(set (match_operand:SVE_I 0 "register_operand") | |
1542 | (unspec:SVE_I | |
1543 | [(match_dup 3) | |
1544 | (mult:SVE_I | |
1545 | (match_operand:SVE_I 1 "register_operand") | |
1546 | (match_operand:SVE_I 2 "aarch64_sve_mul_operand"))] | |
1547 | UNSPEC_MERGE_PTRUE))] | |
1548 | "TARGET_SVE" | |
1549 | { | |
16de3637 | 1550 | operands[3] = aarch64_ptrue_reg (<VPRED>mode); |
43cacb12 RS |
1551 | } |
1552 | ) | |
1553 | ||
1554 | ;; Multiplication predicated with a PTRUE. We don't actually need the | |
1555 | ;; predicate for the first alternative, but using Upa or X isn't likely | |
1556 | ;; to gain much and would make the instruction seem less uniform to the | |
1557 | ;; register allocator. | |
26004f51 | 1558 | (define_insn_and_split "*mul<mode>3" |
a08acce8 | 1559 | [(set (match_operand:SVE_I 0 "register_operand" "=w, w, ?&w") |
43cacb12 | 1560 | (unspec:SVE_I |
a08acce8 | 1561 | [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl") |
43cacb12 | 1562 | (mult:SVE_I |
a08acce8 RH |
1563 | (match_operand:SVE_I 2 "register_operand" "%0, 0, w") |
1564 | (match_operand:SVE_I 3 "aarch64_sve_mul_operand" "vsm, w, w"))] | |
43cacb12 RS |
1565 | UNSPEC_MERGE_PTRUE))] |
1566 | "TARGET_SVE" | |
1567 | "@ | |
26004f51 | 1568 | # |
a08acce8 RH |
1569 | mul\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype> |
1570 | movprfx\t%0, %2\;mul\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>" | |
26004f51 RS |
1571 | ; Split the unpredicated form after reload, so that we don't have |
1572 | ; the unnecessary PTRUE. | |
1573 | "&& reload_completed | |
1574 | && !register_operand (operands[3], <MODE>mode)" | |
1575 | [(set (match_dup 0) (mult:SVE_I (match_dup 2) (match_dup 3)))] | |
1576 | "" | |
a08acce8 | 1577 | [(set_attr "movprfx" "*,*,yes")] |
43cacb12 RS |
1578 | ) |
1579 | ||
26004f51 RS |
1580 | ;; Unpredicated multiplications by a constant (post-RA only). |
1581 | ;; These are generated by splitting a predicated instruction whose | |
1582 | ;; predicate is unused. | |
1583 | (define_insn "*post_ra_mul<mode>3" | |
1584 | [(set (match_operand:SVE_I 0 "register_operand" "=w") | |
1585 | (mult:SVE_I | |
1586 | (match_operand:SVE_I 1 "register_operand" "0") | |
1587 | (match_operand:SVE_I 2 "aarch64_sve_mul_immediate")))] | |
1588 | "TARGET_SVE && reload_completed" | |
1589 | "mul\t%0.<Vetype>, %0.<Vetype>, #%2" | |
1590 | ) | |
1591 | ||
915d28fe | 1592 | ;; Merging forms are handled through SVE_INT_BINARY. |
43cacb12 | 1593 | |
915d28fe RS |
1594 | ;; ------------------------------------------------------------------------- |
1595 | ;; ---- [INT] Highpart multiplication | |
1596 | ;; ------------------------------------------------------------------------- | |
1597 | ;; Includes: | |
1598 | ;; - SMULH | |
1599 | ;; - UMULH | |
1600 | ;; ------------------------------------------------------------------------- | |
43cacb12 | 1601 | |
11e9443f RS |
1602 | ;; Unpredicated highpart multiplication. |
1603 | (define_expand "<su>mul<mode>3_highpart" | |
1604 | [(set (match_operand:SVE_I 0 "register_operand") | |
1605 | (unspec:SVE_I | |
1606 | [(match_dup 3) | |
1607 | (unspec:SVE_I [(match_operand:SVE_I 1 "register_operand") | |
1608 | (match_operand:SVE_I 2 "register_operand")] | |
1609 | MUL_HIGHPART)] | |
1610 | UNSPEC_MERGE_PTRUE))] | |
1611 | "TARGET_SVE" | |
1612 | { | |
16de3637 | 1613 | operands[3] = aarch64_ptrue_reg (<VPRED>mode); |
11e9443f RS |
1614 | } |
1615 | ) | |
1616 | ||
1617 | ;; Predicated highpart multiplication. | |
1618 | (define_insn "*<su>mul<mode>3_highpart" | |
a08acce8 | 1619 | [(set (match_operand:SVE_I 0 "register_operand" "=w, ?&w") |
11e9443f | 1620 | (unspec:SVE_I |
a08acce8 RH |
1621 | [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl") |
1622 | (unspec:SVE_I [(match_operand:SVE_I 2 "register_operand" "%0, w") | |
1623 | (match_operand:SVE_I 3 "register_operand" "w, w")] | |
11e9443f RS |
1624 | MUL_HIGHPART)] |
1625 | UNSPEC_MERGE_PTRUE))] | |
1626 | "TARGET_SVE" | |
a08acce8 RH |
1627 | "@ |
1628 | <su>mulh\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype> | |
1629 | movprfx\t%0, %2\;<su>mulh\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>" | |
1630 | [(set_attr "movprfx" "*,yes")] | |
11e9443f RS |
1631 | ) |
1632 | ||
915d28fe RS |
1633 | ;; ------------------------------------------------------------------------- |
1634 | ;; ---- [INT] Division | |
1635 | ;; ------------------------------------------------------------------------- | |
1636 | ;; Includes: | |
1637 | ;; - SDIV | |
1638 | ;; - SDIVR | |
1639 | ;; - UDIV | |
1640 | ;; - UDIVR | |
1641 | ;; ------------------------------------------------------------------------- | |
1642 | ||
1643 | ;; Unpredicated integer division. | |
c38f7319 RS |
1644 | (define_expand "<optab><mode>3" |
1645 | [(set (match_operand:SVE_SDI 0 "register_operand") | |
1646 | (unspec:SVE_SDI | |
1647 | [(match_dup 3) | |
1648 | (SVE_INT_BINARY_SD:SVE_SDI | |
1649 | (match_operand:SVE_SDI 1 "register_operand") | |
1650 | (match_operand:SVE_SDI 2 "register_operand"))] | |
1651 | UNSPEC_MERGE_PTRUE))] | |
1652 | "TARGET_SVE" | |
1653 | { | |
16de3637 | 1654 | operands[3] = aarch64_ptrue_reg (<VPRED>mode); |
c38f7319 RS |
1655 | } |
1656 | ) | |
1657 | ||
915d28fe | 1658 | ;; Integer division predicated with a PTRUE. |
c38f7319 | 1659 | (define_insn "*<optab><mode>3" |
a08acce8 | 1660 | [(set (match_operand:SVE_SDI 0 "register_operand" "=w, w, ?&w") |
c38f7319 | 1661 | (unspec:SVE_SDI |
a08acce8 | 1662 | [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl") |
c38f7319 | 1663 | (SVE_INT_BINARY_SD:SVE_SDI |
a08acce8 RH |
1664 | (match_operand:SVE_SDI 2 "register_operand" "0, w, w") |
1665 | (match_operand:SVE_SDI 3 "aarch64_sve_mul_operand" "w, 0, w"))] | |
c38f7319 RS |
1666 | UNSPEC_MERGE_PTRUE))] |
1667 | "TARGET_SVE" | |
1668 | "@ | |
1669 | <sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype> | |
a08acce8 RH |
1670 | <sve_int_op>r\t%0.<Vetype>, %1/m, %0.<Vetype>, %2.<Vetype> |
1671 | movprfx\t%0, %2\;<sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>" | |
1672 | [(set_attr "movprfx" "*,*,yes")] | |
c38f7319 RS |
1673 | ) |
1674 | ||
915d28fe RS |
1675 | ;; Predicated integer division with merging. |
1676 | (define_expand "cond_<optab><mode>" | |
1677 | [(set (match_operand:SVE_SDI 0 "register_operand") | |
1678 | (unspec:SVE_SDI | |
1679 | [(match_operand:<VPRED> 1 "register_operand") | |
1680 | (SVE_INT_BINARY_SD:SVE_SDI | |
1681 | (match_operand:SVE_SDI 2 "register_operand") | |
1682 | (match_operand:SVE_SDI 3 "register_operand")) | |
1683 | (match_operand:SVE_SDI 4 "aarch64_simd_reg_or_zero")] | |
1684 | UNSPEC_SEL))] | |
43cacb12 | 1685 | "TARGET_SVE" |
43cacb12 RS |
1686 | ) |
1687 | ||
915d28fe RS |
1688 | ;; Predicated integer division, merging with the first input. |
1689 | (define_insn "*cond_<optab><mode>_2" | |
1690 | [(set (match_operand:SVE_SDI 0 "register_operand" "=w, ?&w") | |
1691 | (unspec:SVE_SDI | |
1692 | [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl") | |
1693 | (SVE_INT_BINARY_SD:SVE_SDI | |
1694 | (match_operand:SVE_SDI 2 "register_operand" "0, w") | |
1695 | (match_operand:SVE_SDI 3 "register_operand" "w, w")) | |
1696 | (match_dup 2)] | |
1697 | UNSPEC_SEL))] | |
43cacb12 | 1698 | "TARGET_SVE" |
915d28fe RS |
1699 | "@ |
1700 | <sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype> | |
1701 | movprfx\t%0, %2\;<sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>" | |
1702 | [(set_attr "movprfx" "*,yes")] | |
1703 | ) | |
1704 | ||
1705 | ;; Predicated integer division, merging with the second input. | |
1706 | (define_insn "*cond_<optab><mode>_3" | |
1707 | [(set (match_operand:SVE_SDI 0 "register_operand" "=w, ?&w") | |
1708 | (unspec:SVE_SDI | |
1709 | [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl") | |
1710 | (SVE_INT_BINARY_SD:SVE_SDI | |
1711 | (match_operand:SVE_SDI 2 "register_operand" "w, w") | |
1712 | (match_operand:SVE_SDI 3 "register_operand" "0, w")) | |
1713 | (match_dup 3)] | |
1714 | UNSPEC_SEL))] | |
1715 | "TARGET_SVE" | |
1716 | "@ | |
1717 | <sve_int_op_rev>\t%0.<Vetype>, %1/m, %0.<Vetype>, %2.<Vetype> | |
1718 | movprfx\t%0, %3\;<sve_int_op_rev>\t%0.<Vetype>, %1/m, %0.<Vetype>, %2.<Vetype>" | |
1719 | [(set_attr "movprfx" "*,yes")] | |
1720 | ) | |
1721 | ||
1722 | ;; Predicated integer division, merging with an independent value. | |
1723 | (define_insn_and_rewrite "*cond_<optab><mode>_any" | |
1724 | [(set (match_operand:SVE_SDI 0 "register_operand" "=&w, &w, &w, &w, ?&w") | |
1725 | (unspec:SVE_SDI | |
1726 | [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl, Upl, Upl") | |
1727 | (SVE_INT_BINARY_SD:SVE_SDI | |
1728 | (match_operand:SVE_SDI 2 "register_operand" "0, w, w, w, w") | |
1729 | (match_operand:SVE_SDI 3 "register_operand" "w, 0, w, w, w")) | |
1730 | (match_operand:SVE_SDI 4 "aarch64_simd_reg_or_zero" "Dz, Dz, Dz, 0, w")] | |
1731 | UNSPEC_SEL))] | |
1732 | "TARGET_SVE | |
1733 | && !rtx_equal_p (operands[2], operands[4]) | |
1734 | && !rtx_equal_p (operands[3], operands[4])" | |
1735 | "@ | |
1736 | movprfx\t%0.<Vetype>, %1/z, %0.<Vetype>\;<sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype> | |
1737 | movprfx\t%0.<Vetype>, %1/z, %0.<Vetype>\;<sve_int_op_rev>\t%0.<Vetype>, %1/m, %0.<Vetype>, %2.<Vetype> | |
1738 | movprfx\t%0.<Vetype>, %1/z, %2.<Vetype>\;<sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype> | |
1739 | movprfx\t%0.<Vetype>, %1/m, %2.<Vetype>\;<sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype> | |
1740 | #" | |
1741 | "&& reload_completed | |
1742 | && register_operand (operands[4], <MODE>mode) | |
1743 | && !rtx_equal_p (operands[0], operands[4])" | |
1744 | { | |
1745 | emit_insn (gen_vcond_mask_<mode><vpred> (operands[0], operands[2], | |
1746 | operands[4], operands[1])); | |
1747 | operands[4] = operands[2] = operands[0]; | |
1748 | } | |
1749 | [(set_attr "movprfx" "yes")] | |
43cacb12 RS |
1750 | ) |
1751 | ||
915d28fe RS |
1752 | ;; ------------------------------------------------------------------------- |
1753 | ;; ---- [INT] Binary logical operations | |
1754 | ;; ------------------------------------------------------------------------- | |
1755 | ;; Includes: | |
1756 | ;; - AND | |
1757 | ;; - EOR | |
1758 | ;; - ORR | |
1759 | ;; ------------------------------------------------------------------------- | |
1760 | ||
1761 | ;; Unpredicated integer binary logical operations. | |
43cacb12 RS |
1762 | (define_insn "<optab><mode>3" |
1763 | [(set (match_operand:SVE_I 0 "register_operand" "=w, w") | |
1764 | (LOGICAL:SVE_I | |
1765 | (match_operand:SVE_I 1 "register_operand" "%0, w") | |
1766 | (match_operand:SVE_I 2 "aarch64_sve_logical_operand" "vsl, w")))] | |
1767 | "TARGET_SVE" | |
1768 | "@ | |
1769 | <logical>\t%0.<Vetype>, %0.<Vetype>, #%C2 | |
1770 | <logical>\t%0.d, %1.d, %2.d" | |
1771 | ) | |
1772 | ||
915d28fe RS |
1773 | ;; Merging forms are handled through SVE_INT_BINARY. |
1774 | ||
1775 | ;; ------------------------------------------------------------------------- | |
1776 | ;; ---- [INT] Binary logical operations (inverted second input) | |
1777 | ;; ------------------------------------------------------------------------- | |
1778 | ;; Includes: | |
1779 | ;; - BIC | |
1780 | ;; ------------------------------------------------------------------------- | |
43cacb12 RS |
1781 | |
1782 | ;; REG_EQUAL notes on "not<mode>3" should ensure that we can generate | |
1783 | ;; this pattern even though the NOT instruction itself is predicated. | |
1784 | (define_insn "bic<mode>3" | |
1785 | [(set (match_operand:SVE_I 0 "register_operand" "=w") | |
1786 | (and:SVE_I | |
1787 | (not:SVE_I (match_operand:SVE_I 1 "register_operand" "w")) | |
1788 | (match_operand:SVE_I 2 "register_operand" "w")))] | |
1789 | "TARGET_SVE" | |
1790 | "bic\t%0.d, %2.d, %1.d" | |
1791 | ) | |
1792 | ||
915d28fe RS |
1793 | ;; ------------------------------------------------------------------------- |
1794 | ;; ---- [INT] Shifts | |
1795 | ;; ------------------------------------------------------------------------- | |
1796 | ;; Includes: | |
1797 | ;; - ASR | |
1798 | ;; - LSL | |
1799 | ;; - LSR | |
1800 | ;; ------------------------------------------------------------------------- | |
43cacb12 | 1801 | |
915d28fe RS |
1802 | ;; Unpredicated shift by a scalar, which expands into one of the vector |
1803 | ;; shifts below. | |
1804 | (define_expand "<ASHIFT:optab><mode>3" | |
1805 | [(set (match_operand:SVE_I 0 "register_operand") | |
1806 | (ASHIFT:SVE_I (match_operand:SVE_I 1 "register_operand") | |
1807 | (match_operand:<VEL> 2 "general_operand")))] | |
43cacb12 RS |
1808 | "TARGET_SVE" |
1809 | { | |
915d28fe RS |
1810 | rtx amount; |
1811 | if (CONST_INT_P (operands[2])) | |
1812 | { | |
1813 | amount = gen_const_vec_duplicate (<MODE>mode, operands[2]); | |
1814 | if (!aarch64_sve_<lr>shift_operand (operands[2], <MODE>mode)) | |
1815 | amount = force_reg (<MODE>mode, amount); | |
1816 | } | |
1817 | else | |
1818 | { | |
1819 | amount = gen_reg_rtx (<MODE>mode); | |
1820 | emit_insn (gen_vec_duplicate<mode> (amount, | |
1821 | convert_to_mode (<VEL>mode, | |
1822 | operands[2], 0))); | |
1823 | } | |
1824 | emit_insn (gen_v<optab><mode>3 (operands[0], operands[1], amount)); | |
1825 | DONE; | |
43cacb12 RS |
1826 | } |
1827 | ) | |
1828 | ||
915d28fe | 1829 | ;; Unpredicated shift by a vector. |
43cacb12 RS |
1830 | (define_expand "v<optab><mode>3" |
1831 | [(set (match_operand:SVE_I 0 "register_operand") | |
1832 | (unspec:SVE_I | |
1833 | [(match_dup 3) | |
1834 | (ASHIFT:SVE_I | |
1835 | (match_operand:SVE_I 1 "register_operand") | |
1836 | (match_operand:SVE_I 2 "aarch64_sve_<lr>shift_operand"))] | |
1837 | UNSPEC_MERGE_PTRUE))] | |
1838 | "TARGET_SVE" | |
1839 | { | |
16de3637 | 1840 | operands[3] = aarch64_ptrue_reg (<VPRED>mode); |
43cacb12 RS |
1841 | } |
1842 | ) | |
1843 | ||
915d28fe RS |
1844 | ;; Shift by a vector, predicated with a PTRUE. We don't actually need |
1845 | ;; the predicate for the first alternative, but using Upa or X isn't | |
1846 | ;; likely to gain much and would make the instruction seem less uniform | |
1847 | ;; to the register allocator. | |
26004f51 | 1848 | (define_insn_and_split "*v<optab><mode>3" |
a08acce8 | 1849 | [(set (match_operand:SVE_I 0 "register_operand" "=w, w, ?&w") |
43cacb12 | 1850 | (unspec:SVE_I |
a08acce8 | 1851 | [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl") |
43cacb12 | 1852 | (ASHIFT:SVE_I |
a08acce8 RH |
1853 | (match_operand:SVE_I 2 "register_operand" "w, 0, w") |
1854 | (match_operand:SVE_I 3 "aarch64_sve_<lr>shift_operand" "D<lr>, w, w"))] | |
43cacb12 RS |
1855 | UNSPEC_MERGE_PTRUE))] |
1856 | "TARGET_SVE" | |
1857 | "@ | |
26004f51 | 1858 | # |
a08acce8 RH |
1859 | <shift>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype> |
1860 | movprfx\t%0, %2\;<shift>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>" | |
26004f51 RS |
1861 | "&& reload_completed |
1862 | && !register_operand (operands[3], <MODE>mode)" | |
1863 | [(set (match_dup 0) (ASHIFT:SVE_I (match_dup 2) (match_dup 3)))] | |
1864 | "" | |
a08acce8 | 1865 | [(set_attr "movprfx" "*,*,yes")] |
43cacb12 RS |
1866 | ) |
1867 | ||
26004f51 RS |
1868 | ;; Unpredicated shift operations by a constant (post-RA only). |
1869 | ;; These are generated by splitting a predicated instruction whose | |
1870 | ;; predicate is unused. | |
1871 | (define_insn "*post_ra_v<optab><mode>3" | |
1872 | [(set (match_operand:SVE_I 0 "register_operand" "=w") | |
1873 | (ASHIFT:SVE_I | |
1874 | (match_operand:SVE_I 1 "register_operand" "w") | |
1875 | (match_operand:SVE_I 2 "aarch64_simd_<lr>shift_imm")))] | |
1876 | "TARGET_SVE && reload_completed" | |
1877 | "<shift>\t%0.<Vetype>, %1.<Vetype>, #%2" | |
1878 | ) | |
1879 | ||
915d28fe RS |
1880 | ;; ------------------------------------------------------------------------- |
1881 | ;; ---- [INT] Maximum and minimum | |
1882 | ;; ------------------------------------------------------------------------- | |
1883 | ;; Includes: | |
1884 | ;; - SMAX | |
1885 | ;; - SMIN | |
1886 | ;; - UMAX | |
1887 | ;; - UMIN | |
1888 | ;; ------------------------------------------------------------------------- | |
1889 | ||
1890 | ;; Unpredicated integer MAX/MIN. | |
1891 | (define_expand "<su><maxmin><mode>3" | |
43cacb12 | 1892 | [(set (match_operand:SVE_I 0 "register_operand") |
915d28fe RS |
1893 | (unspec:SVE_I |
1894 | [(match_dup 3) | |
1895 | (MAXMIN:SVE_I (match_operand:SVE_I 1 "register_operand") | |
1896 | (match_operand:SVE_I 2 "register_operand"))] | |
1897 | UNSPEC_MERGE_PTRUE))] | |
43cacb12 RS |
1898 | "TARGET_SVE" |
1899 | { | |
915d28fe | 1900 | operands[3] = aarch64_ptrue_reg (<VPRED>mode); |
43cacb12 RS |
1901 | } |
1902 | ) | |
1903 | ||
915d28fe RS |
1904 | ;; Integer MAX/MIN predicated with a PTRUE. |
1905 | (define_insn "*<su><maxmin><mode>3" | |
1906 | [(set (match_operand:SVE_I 0 "register_operand" "=w, ?&w") | |
1907 | (unspec:SVE_I | |
1908 | [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl") | |
1909 | (MAXMIN:SVE_I (match_operand:SVE_I 2 "register_operand" "%0, w") | |
1910 | (match_operand:SVE_I 3 "register_operand" "w, w"))] | |
1911 | UNSPEC_MERGE_PTRUE))] | |
43cacb12 | 1912 | "TARGET_SVE" |
915d28fe RS |
1913 | "@ |
1914 | <su><maxmin>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype> | |
1915 | movprfx\t%0, %2\;<su><maxmin>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>" | |
1916 | [(set_attr "movprfx" "*,yes")] | |
43cacb12 RS |
1917 | ) |
1918 | ||
915d28fe | 1919 | ;; Merging forms are handled through SVE_INT_BINARY. |
43cacb12 | 1920 | |
915d28fe RS |
1921 | ;; ------------------------------------------------------------------------- |
1922 | ;; ---- [FP] General binary arithmetic corresponding to rtx codes | |
1923 | ;; ------------------------------------------------------------------------- | |
1924 | ;; Includes post-RA forms of: | |
1925 | ;; - FADD | |
1926 | ;; - FMUL | |
1927 | ;; - FSUB | |
1928 | ;; ------------------------------------------------------------------------- | |
43cacb12 | 1929 | |
915d28fe RS |
1930 | ;; Unpredicated floating-point binary operations (post-RA only). |
1931 | ;; These are generated by splitting a predicated instruction whose | |
1932 | ;; predicate is unused. | |
1933 | (define_insn "*post_ra_<sve_fp_op><mode>3" | |
1934 | [(set (match_operand:SVE_F 0 "register_operand" "=w") | |
1935 | (SVE_UNPRED_FP_BINARY:SVE_F | |
1936 | (match_operand:SVE_F 1 "register_operand" "w") | |
1937 | (match_operand:SVE_F 2 "register_operand" "w")))] | |
1938 | "TARGET_SVE && reload_completed" | |
1939 | "<sve_fp_op>\t%0.<Vetype>, %1.<Vetype>, %2.<Vetype>") | |
1940 | ||
1941 | ;; ------------------------------------------------------------------------- | |
1942 | ;; ---- [FP] General binary arithmetic corresponding to unspecs | |
1943 | ;; ------------------------------------------------------------------------- | |
1944 | ;; Includes merging forms of: | |
1945 | ;; - FADD | |
1946 | ;; - FDIV | |
1947 | ;; - FDIVR | |
1948 | ;; - FMAXNM | |
1949 | ;; - FMINNM | |
1950 | ;; - FMUL | |
1951 | ;; - FSUB | |
1952 | ;; - FSUBR | |
1953 | ;; ------------------------------------------------------------------------- | |
1954 | ||
1955 | ;; Predicated floating-point operations with merging. | |
1956 | (define_expand "cond_<optab><mode>" | |
1957 | [(set (match_operand:SVE_F 0 "register_operand") | |
1958 | (unspec:SVE_F | |
1959 | [(match_operand:<VPRED> 1 "register_operand") | |
1960 | (unspec:SVE_F | |
1961 | [(match_operand:SVE_F 2 "register_operand") | |
1962 | (match_operand:SVE_F 3 "register_operand")] | |
1963 | SVE_COND_FP_BINARY) | |
1964 | (match_operand:SVE_F 4 "aarch64_simd_reg_or_zero")] | |
1965 | UNSPEC_SEL))] | |
43cacb12 | 1966 | "TARGET_SVE" |
43cacb12 RS |
1967 | ) |
1968 | ||
915d28fe RS |
1969 | ;; Predicated floating-point operations, merging with the first input. |
1970 | (define_insn "*cond_<optab><mode>_2" | |
1971 | [(set (match_operand:SVE_F 0 "register_operand" "=w, ?&w") | |
1972 | (unspec:SVE_F | |
57d6f4d0 | 1973 | [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl") |
915d28fe RS |
1974 | (unspec:SVE_F |
1975 | [(match_operand:SVE_F 2 "register_operand" "0, w") | |
1976 | (match_operand:SVE_F 3 "register_operand" "w, w")] | |
1977 | SVE_COND_FP_BINARY) | |
1978 | (match_dup 2)] | |
1979 | UNSPEC_SEL))] | |
43cacb12 RS |
1980 | "TARGET_SVE" |
1981 | "@ | |
915d28fe RS |
1982 | <sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype> |
1983 | movprfx\t%0, %2\;<sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>" | |
1984 | [(set_attr "movprfx" "*,yes")] | |
43cacb12 RS |
1985 | ) |
1986 | ||
915d28fe RS |
1987 | ;; Predicated floating-point operations, merging with the second input. |
1988 | (define_insn "*cond_<optab><mode>_3" | |
1989 | [(set (match_operand:SVE_F 0 "register_operand" "=w, ?&w") | |
1990 | (unspec:SVE_F | |
57d6f4d0 | 1991 | [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl") |
915d28fe RS |
1992 | (unspec:SVE_F |
1993 | [(match_operand:SVE_F 2 "register_operand" "w, w") | |
1994 | (match_operand:SVE_F 3 "register_operand" "0, w")] | |
1995 | SVE_COND_FP_BINARY) | |
1996 | (match_dup 3)] | |
1997 | UNSPEC_SEL))] | |
43cacb12 RS |
1998 | "TARGET_SVE" |
1999 | "@ | |
915d28fe RS |
2000 | <sve_fp_op_rev>\t%0.<Vetype>, %1/m, %0.<Vetype>, %2.<Vetype> |
2001 | movprfx\t%0, %3\;<sve_fp_op_rev>\t%0.<Vetype>, %1/m, %0.<Vetype>, %2.<Vetype>" | |
2002 | [(set_attr "movprfx" "*,yes")] | |
cee99fa0 RS |
2003 | ) |
2004 | ||
915d28fe RS |
2005 | ;; Predicated floating-point operations, merging with an independent value. |
2006 | (define_insn_and_rewrite "*cond_<optab><mode>_any" | |
2007 | [(set (match_operand:SVE_F 0 "register_operand" "=&w, &w, &w, &w, ?&w") | |
2008 | (unspec:SVE_F | |
2009 | [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl, Upl, Upl") | |
2010 | (unspec:SVE_F | |
2011 | [(match_operand:SVE_F 2 "register_operand" "0, w, w, w, w") | |
2012 | (match_operand:SVE_F 3 "register_operand" "w, 0, w, w, w")] | |
2013 | SVE_COND_FP_BINARY) | |
2014 | (match_operand:SVE_F 4 "aarch64_simd_reg_or_zero" "Dz, Dz, Dz, 0, w")] | |
2015 | UNSPEC_SEL))] | |
2016 | "TARGET_SVE | |
2017 | && !rtx_equal_p (operands[2], operands[4]) | |
2018 | && !rtx_equal_p (operands[3], operands[4])" | |
cee99fa0 | 2019 | "@ |
915d28fe RS |
2020 | movprfx\t%0.<Vetype>, %1/z, %0.<Vetype>\;<sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype> |
2021 | movprfx\t%0.<Vetype>, %1/z, %0.<Vetype>\;<sve_fp_op_rev>\t%0.<Vetype>, %1/m, %0.<Vetype>, %2.<Vetype> | |
2022 | movprfx\t%0.<Vetype>, %1/z, %2.<Vetype>\;<sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype> | |
2023 | movprfx\t%0.<Vetype>, %1/m, %2.<Vetype>\;<sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype> | |
2024 | #" | |
2025 | "&& reload_completed | |
2026 | && register_operand (operands[4], <MODE>mode) | |
2027 | && !rtx_equal_p (operands[0], operands[4])" | |
2028 | { | |
2029 | emit_insn (gen_vcond_mask_<mode><vpred> (operands[0], operands[2], | |
2030 | operands[4], operands[1])); | |
2031 | operands[4] = operands[2] = operands[0]; | |
2032 | } | |
2033 | [(set_attr "movprfx" "yes")] | |
cee99fa0 RS |
2034 | ) |
2035 | ||
915d28fe RS |
2036 | ;; ------------------------------------------------------------------------- |
2037 | ;; ---- [FP] Addition | |
2038 | ;; ------------------------------------------------------------------------- | |
2039 | ;; Includes: | |
2040 | ;; - FADD | |
2041 | ;; - FSUB | |
2042 | ;; ------------------------------------------------------------------------- | |
43cacb12 | 2043 | |
915d28fe RS |
2044 | ;; Unpredicated floating-point addition. |
2045 | (define_expand "add<mode>3" | |
2046 | [(set (match_operand:SVE_F 0 "register_operand") | |
2047 | (unspec:SVE_F | |
2048 | [(match_dup 3) | |
2049 | (plus:SVE_F | |
2050 | (match_operand:SVE_F 1 "register_operand") | |
2051 | (match_operand:SVE_F 2 "aarch64_sve_float_arith_with_sub_operand"))] | |
f22d7973 | 2052 | UNSPEC_MERGE_PTRUE))] |
43cacb12 | 2053 | "TARGET_SVE" |
915d28fe RS |
2054 | { |
2055 | operands[3] = aarch64_ptrue_reg (<VPRED>mode); | |
2056 | } | |
43cacb12 RS |
2057 | ) |
2058 | ||
915d28fe RS |
2059 | ;; Floating-point addition predicated with a PTRUE. |
2060 | (define_insn_and_split "*add<mode>3" | |
2061 | [(set (match_operand:SVE_F 0 "register_operand" "=w, w, w") | |
2062 | (unspec:SVE_F | |
2063 | [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl") | |
2064 | (plus:SVE_F | |
2065 | (match_operand:SVE_F 2 "register_operand" "%0, 0, w") | |
2066 | (match_operand:SVE_F 3 "aarch64_sve_float_arith_with_sub_operand" "vsA, vsN, w"))] | |
2067 | UNSPEC_MERGE_PTRUE))] | |
cee99fa0 | 2068 | "TARGET_SVE" |
915d28fe RS |
2069 | "@ |
2070 | fadd\t%0.<Vetype>, %1/m, %0.<Vetype>, #%3 | |
2071 | fsub\t%0.<Vetype>, %1/m, %0.<Vetype>, #%N3 | |
2072 | #" | |
2073 | ; Split the unpredicated form after reload, so that we don't have | |
2074 | ; the unnecessary PTRUE. | |
2075 | "&& reload_completed | |
2076 | && register_operand (operands[3], <MODE>mode)" | |
2077 | [(set (match_dup 0) (plus:SVE_F (match_dup 2) (match_dup 3)))] | |
cee99fa0 RS |
2078 | ) |
2079 | ||
915d28fe | 2080 | ;; Merging forms are handled through SVE_COND_FP_BINARY. |
cee99fa0 | 2081 | |
915d28fe RS |
2082 | ;; ------------------------------------------------------------------------- |
2083 | ;; ---- [FP] Subtraction | |
2084 | ;; ------------------------------------------------------------------------- | |
2085 | ;; Includes: | |
2086 | ;; - FADD | |
2087 | ;; - FSUB | |
2088 | ;; - FSUBR | |
2089 | ;; ------------------------------------------------------------------------- | |
cee99fa0 | 2090 | |
915d28fe RS |
2091 | ;; Unpredicated floating-point subtraction. |
2092 | (define_expand "sub<mode>3" | |
2093 | [(set (match_operand:SVE_F 0 "register_operand") | |
2094 | (unspec:SVE_F | |
2095 | [(match_dup 3) | |
2096 | (minus:SVE_F | |
2097 | (match_operand:SVE_F 1 "aarch64_sve_float_arith_operand") | |
2098 | (match_operand:SVE_F 2 "register_operand"))] | |
2099 | UNSPEC_MERGE_PTRUE))] | |
cee99fa0 | 2100 | "TARGET_SVE" |
915d28fe RS |
2101 | { |
2102 | operands[3] = aarch64_ptrue_reg (<VPRED>mode); | |
2103 | } | |
cee99fa0 RS |
2104 | ) |
2105 | ||
915d28fe RS |
2106 | ;; Floating-point subtraction predicated with a PTRUE. |
2107 | (define_insn_and_split "*sub<mode>3" | |
2108 | [(set (match_operand:SVE_F 0 "register_operand" "=w, w, w, w") | |
2109 | (unspec:SVE_F | |
2110 | [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl, Upl") | |
2111 | (minus:SVE_F | |
2112 | (match_operand:SVE_F 2 "aarch64_sve_float_arith_operand" "0, 0, vsA, w") | |
2113 | (match_operand:SVE_F 3 "aarch64_sve_float_arith_with_sub_operand" "vsA, vsN, 0, w"))] | |
2114 | UNSPEC_MERGE_PTRUE))] | |
2115 | "TARGET_SVE | |
2116 | && (register_operand (operands[2], <MODE>mode) | |
2117 | || register_operand (operands[3], <MODE>mode))" | |
f22d7973 | 2118 | "@ |
915d28fe RS |
2119 | fsub\t%0.<Vetype>, %1/m, %0.<Vetype>, #%3 |
2120 | fadd\t%0.<Vetype>, %1/m, %0.<Vetype>, #%N3 | |
2121 | fsubr\t%0.<Vetype>, %1/m, %0.<Vetype>, #%2 | |
2122 | #" | |
2123 | ; Split the unpredicated form after reload, so that we don't have | |
2124 | ; the unnecessary PTRUE. | |
2125 | "&& reload_completed | |
2126 | && register_operand (operands[2], <MODE>mode) | |
2127 | && register_operand (operands[3], <MODE>mode)" | |
2128 | [(set (match_dup 0) (minus:SVE_F (match_dup 2) (match_dup 3)))] | |
f22d7973 RS |
2129 | ) |
2130 | ||
915d28fe | 2131 | ;; Merging forms are handled through SVE_COND_FP_BINARY. |
43cacb12 | 2132 | |
915d28fe RS |
2133 | ;; ------------------------------------------------------------------------- |
2134 | ;; ---- [FP] Absolute difference | |
2135 | ;; ------------------------------------------------------------------------- | |
2136 | ;; Includes: | |
2137 | ;; - FABD | |
2138 | ;; ------------------------------------------------------------------------- | |
2139 | ||
2140 | ;; Predicated floating-point absolute difference. | |
2141 | (define_insn "*fabd<mode>3" | |
2142 | [(set (match_operand:SVE_F 0 "register_operand" "=w") | |
2143 | (unspec:SVE_F | |
43cacb12 | 2144 | [(match_operand:<VPRED> 1 "register_operand" "Upl") |
d45b20a5 RS |
2145 | (minus:SVE_F |
2146 | (match_operand:SVE_F 2 "register_operand" "0") | |
2147 | (match_operand:SVE_F 3 "register_operand" "w"))] | |
2148 | UNSPEC_COND_FABS))] | |
43cacb12 | 2149 | "TARGET_SVE" |
915d28fe | 2150 | "fabd\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>" |
43cacb12 RS |
2151 | ) |
2152 | ||
915d28fe RS |
2153 | ;; ------------------------------------------------------------------------- |
2154 | ;; ---- [FP] Multiplication | |
2155 | ;; ------------------------------------------------------------------------- | |
2156 | ;; Includes: | |
2157 | ;; - FMUL | |
2158 | ;; ------------------------------------------------------------------------- | |
2159 | ||
2160 | ;; Unpredicated floating-point multiplication. | |
2161 | (define_expand "mul<mode>3" | |
2162 | [(set (match_operand:SVE_F 0 "register_operand") | |
2163 | (unspec:SVE_F | |
2164 | [(match_dup 3) | |
2165 | (mult:SVE_F | |
2166 | (match_operand:SVE_F 1 "register_operand") | |
2167 | (match_operand:SVE_F 2 "aarch64_sve_float_mul_operand"))] | |
2168 | UNSPEC_MERGE_PTRUE))] | |
43cacb12 RS |
2169 | "TARGET_SVE" |
2170 | { | |
915d28fe | 2171 | operands[3] = aarch64_ptrue_reg (<VPRED>mode); |
43cacb12 RS |
2172 | } |
2173 | ) | |
2174 | ||
915d28fe RS |
2175 | ;; Floating-point multiplication predicated with a PTRUE. |
2176 | (define_insn_and_split "*mul<mode>3" | |
2177 | [(set (match_operand:SVE_F 0 "register_operand" "=w, w") | |
2178 | (unspec:SVE_F | |
2179 | [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl") | |
2180 | (mult:SVE_F | |
2181 | (match_operand:SVE_F 2 "register_operand" "%0, w") | |
2182 | (match_operand:SVE_F 3 "aarch64_sve_float_mul_operand" "vsM, w"))] | |
2183 | UNSPEC_MERGE_PTRUE))] | |
43cacb12 | 2184 | "TARGET_SVE" |
915d28fe RS |
2185 | "@ |
2186 | fmul\t%0.<Vetype>, %1/m, %0.<Vetype>, #%3 | |
2187 | #" | |
2188 | ; Split the unpredicated form after reload, so that we don't have | |
2189 | ; the unnecessary PTRUE. | |
2190 | "&& reload_completed | |
2191 | && register_operand (operands[3], <MODE>mode)" | |
2192 | [(set (match_dup 0) (mult:SVE_F (match_dup 2) (match_dup 3)))] | |
43cacb12 RS |
2193 | ) |
2194 | ||
915d28fe RS |
2195 | ;; Merging forms are handled through SVE_COND_FP_BINARY. |
2196 | ||
2197 | ;; ------------------------------------------------------------------------- | |
2198 | ;; ---- [FP] Division | |
2199 | ;; ------------------------------------------------------------------------- | |
2200 | ;; Includes: | |
2201 | ;; - FDIV | |
2202 | ;; - FDIVR | |
2203 | ;; ------------------------------------------------------------------------- | |
2204 | ||
2205 | ;; Unpredicated floating-point division. | |
2206 | (define_expand "div<mode>3" | |
2207 | [(set (match_operand:SVE_F 0 "register_operand") | |
2208 | (unspec:SVE_F | |
2209 | [(match_dup 3) | |
2210 | (div:SVE_F (match_operand:SVE_F 1 "register_operand") | |
2211 | (match_operand:SVE_F 2 "register_operand"))] | |
2212 | UNSPEC_MERGE_PTRUE))] | |
43cacb12 RS |
2213 | "TARGET_SVE" |
2214 | { | |
915d28fe | 2215 | operands[3] = aarch64_ptrue_reg (<VPRED>mode); |
43cacb12 RS |
2216 | } |
2217 | ) | |
2218 | ||
915d28fe RS |
2219 | ;; Floating-point division predicated with a PTRUE. |
2220 | (define_insn "*div<mode>3" | |
2221 | [(set (match_operand:SVE_F 0 "register_operand" "=w, w, ?&w") | |
2222 | (unspec:SVE_F | |
2223 | [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl") | |
2224 | (div:SVE_F (match_operand:SVE_F 2 "register_operand" "0, w, w") | |
2225 | (match_operand:SVE_F 3 "register_operand" "w, 0, w"))] | |
2226 | UNSPEC_MERGE_PTRUE))] | |
43cacb12 | 2227 | "TARGET_SVE" |
915d28fe RS |
2228 | "@ |
2229 | fdiv\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype> | |
2230 | fdivr\t%0.<Vetype>, %1/m, %0.<Vetype>, %2.<Vetype> | |
2231 | movprfx\t%0, %2\;fdiv\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>" | |
2232 | [(set_attr "movprfx" "*,*,yes")] | |
43cacb12 RS |
2233 | ) |
2234 | ||
915d28fe RS |
2235 | ;; Merging forms are handled through SVE_COND_FP_BINARY. |
2236 | ||
2237 | ;; ------------------------------------------------------------------------- | |
2238 | ;; ---- [FP] Binary logical operations | |
2239 | ;; ------------------------------------------------------------------------- | |
2240 | ;; Includes | |
2241 | ;; - AND | |
2242 | ;; - EOR | |
2243 | ;; - ORR | |
2244 | ;; ------------------------------------------------------------------------- | |
2245 | ||
2246 | ;; Binary logical operations on floating-point modes. We avoid subregs | |
2247 | ;; by providing this, but we need to use UNSPECs since rtx logical ops | |
2248 | ;; aren't defined for floating-point modes. | |
2249 | (define_insn "*<optab><mode>3" | |
2250 | [(set (match_operand:SVE_F 0 "register_operand" "=w") | |
2251 | (unspec:SVE_F [(match_operand:SVE_F 1 "register_operand" "w") | |
2252 | (match_operand:SVE_F 2 "register_operand" "w")] | |
2253 | LOGICALF))] | |
43cacb12 | 2254 | "TARGET_SVE" |
915d28fe | 2255 | "<logicalf_op>\t%0.d, %1.d, %2.d" |
43cacb12 RS |
2256 | ) |
2257 | ||
915d28fe RS |
2258 | ;; ------------------------------------------------------------------------- |
2259 | ;; ---- [FP] Sign copying | |
2260 | ;; ------------------------------------------------------------------------- | |
2261 | ;; The patterns in this section are synthetic. | |
2262 | ;; ------------------------------------------------------------------------- | |
2263 | ||
2264 | (define_expand "copysign<mode>3" | |
2265 | [(match_operand:SVE_F 0 "register_operand") | |
2266 | (match_operand:SVE_F 1 "register_operand") | |
2267 | (match_operand:SVE_F 2 "register_operand")] | |
43cacb12 RS |
2268 | "TARGET_SVE" |
2269 | { | |
915d28fe RS |
2270 | rtx sign = gen_reg_rtx (<V_INT_EQUIV>mode); |
2271 | rtx mant = gen_reg_rtx (<V_INT_EQUIV>mode); | |
2272 | rtx int_res = gen_reg_rtx (<V_INT_EQUIV>mode); | |
2273 | int bits = GET_MODE_UNIT_BITSIZE (<MODE>mode) - 1; | |
43cacb12 | 2274 | |
915d28fe RS |
2275 | rtx arg1 = lowpart_subreg (<V_INT_EQUIV>mode, operands[1], <MODE>mode); |
2276 | rtx arg2 = lowpart_subreg (<V_INT_EQUIV>mode, operands[2], <MODE>mode); | |
2277 | ||
2278 | emit_insn (gen_and<v_int_equiv>3 | |
2279 | (sign, arg2, | |
2280 | aarch64_simd_gen_const_vector_dup (<V_INT_EQUIV>mode, | |
2281 | HOST_WIDE_INT_M1U | |
2282 | << bits))); | |
2283 | emit_insn (gen_and<v_int_equiv>3 | |
2284 | (mant, arg1, | |
2285 | aarch64_simd_gen_const_vector_dup (<V_INT_EQUIV>mode, | |
2286 | ~(HOST_WIDE_INT_M1U | |
2287 | << bits)))); | |
2288 | emit_insn (gen_ior<v_int_equiv>3 (int_res, sign, mant)); | |
2289 | emit_move_insn (operands[0], gen_lowpart (<MODE>mode, int_res)); | |
2290 | DONE; | |
43cacb12 RS |
2291 | } |
2292 | ) | |
2293 | ||
915d28fe RS |
2294 | (define_expand "xorsign<mode>3" |
2295 | [(match_operand:SVE_F 0 "register_operand") | |
2296 | (match_operand:SVE_F 1 "register_operand") | |
2297 | (match_operand:SVE_F 2 "register_operand")] | |
43cacb12 RS |
2298 | "TARGET_SVE" |
2299 | { | |
915d28fe RS |
2300 | rtx sign = gen_reg_rtx (<V_INT_EQUIV>mode); |
2301 | rtx int_res = gen_reg_rtx (<V_INT_EQUIV>mode); | |
2302 | int bits = GET_MODE_UNIT_BITSIZE (<MODE>mode) - 1; | |
2303 | ||
2304 | rtx arg1 = lowpart_subreg (<V_INT_EQUIV>mode, operands[1], <MODE>mode); | |
2305 | rtx arg2 = lowpart_subreg (<V_INT_EQUIV>mode, operands[2], <MODE>mode); | |
2306 | ||
2307 | emit_insn (gen_and<v_int_equiv>3 | |
2308 | (sign, arg2, | |
2309 | aarch64_simd_gen_const_vector_dup (<V_INT_EQUIV>mode, | |
2310 | HOST_WIDE_INT_M1U | |
2311 | << bits))); | |
2312 | emit_insn (gen_xor<v_int_equiv>3 (int_res, arg1, sign)); | |
2313 | emit_move_insn (operands[0], gen_lowpart (<MODE>mode, int_res)); | |
2314 | DONE; | |
43cacb12 RS |
2315 | } |
2316 | ) | |
2317 | ||
915d28fe RS |
2318 | ;; ------------------------------------------------------------------------- |
2319 | ;; ---- [FP] Maximum and minimum | |
2320 | ;; ------------------------------------------------------------------------- | |
2321 | ;; Includes: | |
915d28fe | 2322 | ;; - FMAXNM |
915d28fe RS |
2323 | ;; - FMINNM |
2324 | ;; ------------------------------------------------------------------------- | |
43cacb12 | 2325 | |
214c42fa RS |
2326 | ;; Unpredicated floating-point MAX/MIN (the rtx codes). These are more |
2327 | ;; relaxed than fmax/fmin, but we implement them in the same way. | |
2328 | (define_expand "<optab><mode>3" | |
43cacb12 RS |
2329 | [(set (match_operand:SVE_F 0 "register_operand") |
2330 | (unspec:SVE_F | |
2331 | [(match_dup 3) | |
214c42fa RS |
2332 | (match_operand:SVE_F 1 "register_operand") |
2333 | (match_operand:SVE_F 2 "register_operand")] | |
2334 | SVE_COND_FP_MAXMIN_PUBLIC))] | |
43cacb12 RS |
2335 | "TARGET_SVE" |
2336 | { | |
16de3637 | 2337 | operands[3] = aarch64_ptrue_reg (<VPRED>mode); |
43cacb12 RS |
2338 | } |
2339 | ) | |
2340 | ||
214c42fa | 2341 | ;; Unpredicated fmax/fmin (the libm functions). |
43cacb12 RS |
2342 | (define_expand "<maxmin_uns><mode>3" |
2343 | [(set (match_operand:SVE_F 0 "register_operand") | |
2344 | (unspec:SVE_F | |
2345 | [(match_dup 3) | |
214c42fa RS |
2346 | (match_operand:SVE_F 1 "register_operand") |
2347 | (match_operand:SVE_F 2 "register_operand")] | |
2348 | SVE_COND_FP_MAXMIN_PUBLIC))] | |
43cacb12 RS |
2349 | "TARGET_SVE" |
2350 | { | |
16de3637 | 2351 | operands[3] = aarch64_ptrue_reg (<VPRED>mode); |
43cacb12 RS |
2352 | } |
2353 | ) | |
2354 | ||
214c42fa RS |
2355 | ;; Predicated floating-point maximum/minimum. |
2356 | (define_insn "*<optab><mode>3" | |
a08acce8 | 2357 | [(set (match_operand:SVE_F 0 "register_operand" "=w, ?&w") |
43cacb12 | 2358 | (unspec:SVE_F |
a08acce8 | 2359 | [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl") |
214c42fa RS |
2360 | (match_operand:SVE_F 2 "register_operand" "%0, w") |
2361 | (match_operand:SVE_F 3 "register_operand" "w, w")] | |
2362 | SVE_COND_FP_MAXMIN_PUBLIC))] | |
43cacb12 | 2363 | "TARGET_SVE" |
a08acce8 | 2364 | "@ |
214c42fa RS |
2365 | <sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype> |
2366 | movprfx\t%0, %2\;<sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>" | |
a08acce8 | 2367 | [(set_attr "movprfx" "*,yes")] |
43cacb12 RS |
2368 | ) |
2369 | ||
915d28fe RS |
2370 | ;; Merging forms are handled through SVE_COND_FP_BINARY. |
2371 | ||
2372 | ;; ------------------------------------------------------------------------- | |
2373 | ;; ---- [PRED] Binary logical operations | |
2374 | ;; ------------------------------------------------------------------------- | |
2375 | ;; Includes: | |
2376 | ;; - AND | |
2377 | ;; - ANDS | |
2378 | ;; - EOR | |
2379 | ;; - EORS | |
2380 | ;; - ORR | |
2381 | ;; - ORRS | |
2382 | ;; ------------------------------------------------------------------------- | |
2383 | ||
2384 | ;; Predicate AND. We can reuse one of the inputs as the GP. | |
2385 | (define_insn "and<mode>3" | |
2386 | [(set (match_operand:PRED_ALL 0 "register_operand" "=Upa") | |
2387 | (and:PRED_ALL (match_operand:PRED_ALL 1 "register_operand" "Upa") | |
2388 | (match_operand:PRED_ALL 2 "register_operand" "Upa")))] | |
9d4ac06e | 2389 | "TARGET_SVE" |
915d28fe | 2390 | "and\t%0.b, %1/z, %1.b, %2.b" |
a08acce8 | 2391 | ) |
9d4ac06e | 2392 | |
915d28fe RS |
2393 | ;; Unpredicated predicate EOR and ORR. |
2394 | (define_expand "<optab><mode>3" | |
2395 | [(set (match_operand:PRED_ALL 0 "register_operand") | |
2396 | (and:PRED_ALL | |
2397 | (LOGICAL_OR:PRED_ALL | |
2398 | (match_operand:PRED_ALL 1 "register_operand") | |
2399 | (match_operand:PRED_ALL 2 "register_operand")) | |
2400 | (match_dup 3)))] | |
6c4fd4a9 | 2401 | "TARGET_SVE" |
915d28fe RS |
2402 | { |
2403 | operands[3] = aarch64_ptrue_reg (<MODE>mode); | |
2404 | } | |
a08acce8 | 2405 | ) |
6c4fd4a9 | 2406 | |
915d28fe | 2407 | ;; Predicated predicate AND, EOR and ORR. |
34467289 | 2408 | (define_insn "@aarch64_pred_<optab><mode>_z" |
915d28fe RS |
2409 | [(set (match_operand:PRED_ALL 0 "register_operand" "=Upa") |
2410 | (and:PRED_ALL | |
2411 | (LOGICAL:PRED_ALL | |
2412 | (match_operand:PRED_ALL 2 "register_operand" "Upa") | |
2413 | (match_operand:PRED_ALL 3 "register_operand" "Upa")) | |
2414 | (match_operand:PRED_ALL 1 "register_operand" "Upa")))] | |
2415 | "TARGET_SVE" | |
2416 | "<logical>\t%0.b, %1/z, %2.b, %3.b" | |
2417 | ) | |
2418 | ||
2419 | ;; Perform a logical operation on operands 2 and 3, using operand 1 as | |
34467289 RS |
2420 | ;; the GP. Store the result in operand 0 and set the flags in the same |
2421 | ;; way as for PTEST. | |
915d28fe RS |
2422 | (define_insn "*<optab><mode>3_cc" |
2423 | [(set (reg:CC_NZC CC_REGNUM) | |
2424 | (unspec:CC_NZC | |
34467289 RS |
2425 | [(match_operand:VNx16BI 1 "register_operand" "Upa") |
2426 | (match_operand 4) | |
2427 | (match_operand:SI 5 "aarch64_sve_ptrue_flag") | |
915d28fe RS |
2428 | (and:PRED_ALL |
2429 | (LOGICAL:PRED_ALL | |
2430 | (match_operand:PRED_ALL 2 "register_operand" "Upa") | |
2431 | (match_operand:PRED_ALL 3 "register_operand" "Upa")) | |
34467289 RS |
2432 | (match_dup 4))] |
2433 | UNSPEC_PTEST)) | |
915d28fe RS |
2434 | (set (match_operand:PRED_ALL 0 "register_operand" "=Upa") |
2435 | (and:PRED_ALL (LOGICAL:PRED_ALL (match_dup 2) (match_dup 3)) | |
34467289 | 2436 | (match_dup 4)))] |
915d28fe RS |
2437 | "TARGET_SVE" |
2438 | "<logical>s\t%0.b, %1/z, %2.b, %3.b" | |
2439 | ) | |
2440 | ||
2441 | ;; ------------------------------------------------------------------------- | |
2442 | ;; ---- [PRED] Binary logical operations (inverted second input) | |
2443 | ;; ------------------------------------------------------------------------- | |
2444 | ;; Includes: | |
2445 | ;; - BIC | |
2446 | ;; - ORN | |
2447 | ;; ------------------------------------------------------------------------- | |
2448 | ||
2449 | ;; Predicated predicate BIC and ORN. | |
2450 | (define_insn "*<nlogical><mode>3" | |
2451 | [(set (match_operand:PRED_ALL 0 "register_operand" "=Upa") | |
2452 | (and:PRED_ALL | |
2453 | (NLOGICAL:PRED_ALL | |
2454 | (not:PRED_ALL (match_operand:PRED_ALL 2 "register_operand" "Upa")) | |
2455 | (match_operand:PRED_ALL 3 "register_operand" "Upa")) | |
2456 | (match_operand:PRED_ALL 1 "register_operand" "Upa")))] | |
2457 | "TARGET_SVE" | |
2458 | "<nlogical>\t%0.b, %1/z, %3.b, %2.b" | |
2459 | ) | |
2460 | ||
2461 | ;; ------------------------------------------------------------------------- | |
2462 | ;; ---- [PRED] Binary logical operations (inverted result) | |
2463 | ;; ------------------------------------------------------------------------- | |
2464 | ;; Includes: | |
2465 | ;; - NAND | |
2466 | ;; - NOR | |
2467 | ;; ------------------------------------------------------------------------- | |
2468 | ||
2469 | ;; Predicated predicate NAND and NOR. | |
2470 | (define_insn "*<logical_nn><mode>3" | |
2471 | [(set (match_operand:PRED_ALL 0 "register_operand" "=Upa") | |
2472 | (and:PRED_ALL | |
2473 | (NLOGICAL:PRED_ALL | |
2474 | (not:PRED_ALL (match_operand:PRED_ALL 2 "register_operand" "Upa")) | |
2475 | (not:PRED_ALL (match_operand:PRED_ALL 3 "register_operand" "Upa"))) | |
2476 | (match_operand:PRED_ALL 1 "register_operand" "Upa")))] | |
2477 | "TARGET_SVE" | |
2478 | "<logical_nn>\t%0.b, %1/z, %2.b, %3.b" | |
2479 | ) | |
2480 | ||
2481 | ;; ========================================================================= | |
2482 | ;; == Ternary arithmetic | |
2483 | ;; ========================================================================= | |
2484 | ||
2485 | ;; ------------------------------------------------------------------------- | |
2486 | ;; ---- [INT] MLA and MAD | |
2487 | ;; ------------------------------------------------------------------------- | |
2488 | ;; Includes: | |
2489 | ;; - MAD | |
2490 | ;; - MLA | |
2491 | ;; ------------------------------------------------------------------------- | |
2492 | ||
2493 | ;; Predicated integer addition of product. | |
2494 | (define_insn "*madd<mode>" | |
2495 | [(set (match_operand:SVE_I 0 "register_operand" "=w, w, ?&w") | |
2496 | (plus:SVE_I | |
2497 | (unspec:SVE_I | |
2498 | [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl") | |
2499 | (mult:SVE_I (match_operand:SVE_I 2 "register_operand" "%0, w, w") | |
2500 | (match_operand:SVE_I 3 "register_operand" "w, w, w"))] | |
2501 | UNSPEC_MERGE_PTRUE) | |
2502 | (match_operand:SVE_I 4 "register_operand" "w, 0, w")))] | |
a08acce8 RH |
2503 | "TARGET_SVE" |
2504 | "@ | |
915d28fe RS |
2505 | mad\t%0.<Vetype>, %1/m, %3.<Vetype>, %4.<Vetype> |
2506 | mla\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype> | |
2507 | movprfx\t%0, %4\;mla\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>" | |
2508 | [(set_attr "movprfx" "*,*,yes")] | |
a08acce8 RH |
2509 | ) |
2510 | ||
915d28fe RS |
2511 | ;; ------------------------------------------------------------------------- |
2512 | ;; ---- [INT] MLS and MSB | |
2513 | ;; ------------------------------------------------------------------------- | |
2514 | ;; Includes: | |
2515 | ;; - MLS | |
2516 | ;; - MSB | |
2517 | ;; ------------------------------------------------------------------------- | |
2518 | ||
2519 | ;; Predicated integer subtraction of product. | |
2520 | (define_insn "*msub<mode>3" | |
2521 | [(set (match_operand:SVE_I 0 "register_operand" "=w, w, ?&w") | |
2522 | (minus:SVE_I | |
2523 | (match_operand:SVE_I 4 "register_operand" "w, 0, w") | |
2524 | (unspec:SVE_I | |
2525 | [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl") | |
2526 | (mult:SVE_I (match_operand:SVE_I 2 "register_operand" "%0, w, w") | |
2527 | (match_operand:SVE_I 3 "register_operand" "w, w, w"))] | |
2528 | UNSPEC_MERGE_PTRUE)))] | |
2529 | "TARGET_SVE" | |
2530 | "@ | |
2531 | msb\t%0.<Vetype>, %1/m, %3.<Vetype>, %4.<Vetype> | |
2532 | mls\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype> | |
2533 | movprfx\t%0, %4\;mls\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>" | |
2534 | [(set_attr "movprfx" "*,*,yes")] | |
2535 | ) | |
2536 | ||
2537 | ;; ------------------------------------------------------------------------- | |
2538 | ;; ---- [INT] Dot product | |
2539 | ;; ------------------------------------------------------------------------- | |
2540 | ;; Includes: | |
2541 | ;; - SDOT | |
2542 | ;; - UDOT | |
2543 | ;; ------------------------------------------------------------------------- | |
2544 | ||
2545 | ;; Four-element integer dot-product with accumulation. | |
2546 | (define_insn "<sur>dot_prod<vsi2qi>" | |
a08acce8 | 2547 | [(set (match_operand:SVE_SDI 0 "register_operand" "=w, ?&w") |
915d28fe RS |
2548 | (plus:SVE_SDI |
2549 | (unspec:SVE_SDI | |
2550 | [(match_operand:<VSI2QI> 1 "register_operand" "w, w") | |
2551 | (match_operand:<VSI2QI> 2 "register_operand" "w, w")] | |
2552 | DOTPROD) | |
2553 | (match_operand:SVE_SDI 3 "register_operand" "0, w")))] | |
a08acce8 RH |
2554 | "TARGET_SVE" |
2555 | "@ | |
915d28fe RS |
2556 | <sur>dot\\t%0.<Vetype>, %1.<Vetype_fourth>, %2.<Vetype_fourth> |
2557 | movprfx\t%0, %3\;<sur>dot\\t%0.<Vetype>, %1.<Vetype_fourth>, %2.<Vetype_fourth>" | |
a08acce8 RH |
2558 | [(set_attr "movprfx" "*,yes")] |
2559 | ) | |
2560 | ||
915d28fe RS |
2561 | ;; ------------------------------------------------------------------------- |
2562 | ;; ---- [INT] Sum of absolute differences | |
2563 | ;; ------------------------------------------------------------------------- | |
2564 | ;; The patterns in this section are synthetic. | |
2565 | ;; ------------------------------------------------------------------------- | |
2566 | ||
2567 | ;; Emit a sequence to produce a sum-of-absolute-differences of the inputs in | |
2568 | ;; operands 1 and 2. The sequence also has to perform a widening reduction of | |
2569 | ;; the difference into a vector and accumulate that into operand 3 before | |
2570 | ;; copying that into the result operand 0. | |
2571 | ;; Perform that with a sequence of: | |
2572 | ;; MOV ones.b, #1 | |
2573 | ;; [SU]ABD diff.b, p0/m, op1.b, op2.b | |
2574 | ;; MOVPRFX op0, op3 // If necessary | |
2575 | ;; UDOT op0.s, diff.b, ones.b | |
2576 | (define_expand "<sur>sad<vsi2qi>" | |
2577 | [(use (match_operand:SVE_SDI 0 "register_operand")) | |
2578 | (unspec:<VSI2QI> [(use (match_operand:<VSI2QI> 1 "register_operand")) | |
2579 | (use (match_operand:<VSI2QI> 2 "register_operand"))] ABAL) | |
2580 | (use (match_operand:SVE_SDI 3 "register_operand"))] | |
2581 | "TARGET_SVE" | |
2582 | { | |
2583 | rtx ones = force_reg (<VSI2QI>mode, CONST1_RTX (<VSI2QI>mode)); | |
2584 | rtx diff = gen_reg_rtx (<VSI2QI>mode); | |
2585 | emit_insn (gen_<sur>abd<vsi2qi>_3 (diff, operands[1], operands[2])); | |
2586 | emit_insn (gen_udot_prod<vsi2qi> (operands[0], diff, ones, operands[3])); | |
2587 | DONE; | |
2588 | } | |
2589 | ) | |
2590 | ||
2591 | ;; ------------------------------------------------------------------------- | |
2592 | ;; ---- [FP] General ternary arithmetic corresponding to unspecs | |
2593 | ;; ------------------------------------------------------------------------- | |
2594 | ;; Includes merging patterns for: | |
2595 | ;; - FMAD | |
2596 | ;; - FMLA | |
2597 | ;; - FMLS | |
2598 | ;; - FMSB | |
2599 | ;; - FNMAD | |
2600 | ;; - FNMLA | |
2601 | ;; - FNMLS | |
2602 | ;; - FNMSB | |
2603 | ;; ------------------------------------------------------------------------- | |
2604 | ||
0d80d083 RS |
2605 | ;; Unpredicated floating-point ternary operations. |
2606 | (define_expand "<optab><mode>4" | |
2607 | [(set (match_operand:SVE_F 0 "register_operand") | |
2608 | (unspec:SVE_F | |
2609 | [(match_dup 4) | |
2610 | (match_operand:SVE_F 1 "register_operand") | |
2611 | (match_operand:SVE_F 2 "register_operand") | |
2612 | (match_operand:SVE_F 3 "register_operand")] | |
2613 | SVE_COND_FP_TERNARY))] | |
2614 | "TARGET_SVE" | |
2615 | { | |
2616 | operands[4] = aarch64_ptrue_reg (<VPRED>mode); | |
2617 | } | |
2618 | ) | |
2619 | ||
2620 | ;; Predicated floating-point ternary operations. | |
2621 | (define_insn "*<optab><mode>4" | |
2622 | [(set (match_operand:SVE_F 0 "register_operand" "=w, w, ?&w") | |
2623 | (unspec:SVE_F | |
2624 | [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl") | |
2625 | (match_operand:SVE_F 2 "register_operand" "%w, 0, w") | |
2626 | (match_operand:SVE_F 3 "register_operand" "w, w, w") | |
2627 | (match_operand:SVE_F 4 "register_operand" "0, w, w")] | |
2628 | SVE_COND_FP_TERNARY))] | |
2629 | "TARGET_SVE" | |
2630 | "@ | |
2631 | <sve_fmla_op>\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype> | |
2632 | <sve_fmad_op>\t%0.<Vetype>, %1/m, %3.<Vetype>, %4.<Vetype> | |
2633 | movprfx\t%0, %4\;<sve_fmla_op>\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>" | |
2634 | [(set_attr "movprfx" "*,*,yes")] | |
2635 | ) | |
2636 | ||
915d28fe RS |
2637 | ;; Predicated floating-point ternary operations with merging. |
2638 | (define_expand "cond_<optab><mode>" | |
2639 | [(set (match_operand:SVE_F 0 "register_operand") | |
2640 | (unspec:SVE_F | |
2641 | [(match_operand:<VPRED> 1 "register_operand") | |
2642 | (unspec:SVE_F | |
0d80d083 RS |
2643 | [(match_dup 1) |
2644 | (match_operand:SVE_F 2 "register_operand") | |
915d28fe RS |
2645 | (match_operand:SVE_F 3 "register_operand") |
2646 | (match_operand:SVE_F 4 "register_operand")] | |
2647 | SVE_COND_FP_TERNARY) | |
2648 | (match_operand:SVE_F 5 "aarch64_simd_reg_or_zero")] | |
2649 | UNSPEC_SEL))] | |
2650 | "TARGET_SVE" | |
2651 | { | |
2652 | /* Swap the multiplication operands if the fallback value is the | |
2653 | second of the two. */ | |
2654 | if (rtx_equal_p (operands[3], operands[5])) | |
2655 | std::swap (operands[2], operands[3]); | |
2656 | }) | |
2657 | ||
2658 | ;; Predicated floating-point ternary operations, merging with the | |
2659 | ;; first input. | |
2660 | (define_insn "*cond_<optab><mode>_2" | |
2661 | [(set (match_operand:SVE_F 0 "register_operand" "=w, ?&w") | |
2662 | (unspec:SVE_F | |
a08acce8 | 2663 | [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl") |
915d28fe | 2664 | (unspec:SVE_F |
0d80d083 RS |
2665 | [(match_dup 1) |
2666 | (match_operand:SVE_F 2 "register_operand" "0, w") | |
915d28fe RS |
2667 | (match_operand:SVE_F 3 "register_operand" "w, w") |
2668 | (match_operand:SVE_F 4 "register_operand" "w, w")] | |
2669 | SVE_COND_FP_TERNARY) | |
2670 | (match_dup 2)] | |
a08acce8 RH |
2671 | UNSPEC_SEL))] |
2672 | "TARGET_SVE" | |
2673 | "@ | |
915d28fe RS |
2674 | <sve_fmad_op>\t%0.<Vetype>, %1/m, %3.<Vetype>, %4.<Vetype> |
2675 | movprfx\t%0, %2\;<sve_fmad_op>\t%0.<Vetype>, %1/m, %3.<Vetype>, %4.<Vetype>" | |
a08acce8 RH |
2676 | [(set_attr "movprfx" "*,yes")] |
2677 | ) | |
2678 | ||
915d28fe RS |
2679 | ;; Predicated floating-point ternary operations, merging with the |
2680 | ;; third input. | |
2681 | (define_insn "*cond_<optab><mode>_4" | |
2682 | [(set (match_operand:SVE_F 0 "register_operand" "=w, ?&w") | |
2683 | (unspec:SVE_F | |
a08acce8 | 2684 | [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl") |
915d28fe | 2685 | (unspec:SVE_F |
0d80d083 RS |
2686 | [(match_dup 1) |
2687 | (match_operand:SVE_F 2 "register_operand" "w, w") | |
915d28fe RS |
2688 | (match_operand:SVE_F 3 "register_operand" "w, w") |
2689 | (match_operand:SVE_F 4 "register_operand" "0, w")] | |
2690 | SVE_COND_FP_TERNARY) | |
2691 | (match_dup 4)] | |
a08acce8 RH |
2692 | UNSPEC_SEL))] |
2693 | "TARGET_SVE" | |
2694 | "@ | |
915d28fe RS |
2695 | <sve_fmla_op>\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype> |
2696 | movprfx\t%0, %4\;<sve_fmla_op>\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>" | |
a08acce8 RH |
2697 | [(set_attr "movprfx" "*,yes")] |
2698 | ) | |
2699 | ||
915d28fe RS |
2700 | ;; Predicated floating-point ternary operations, merging with an |
2701 | ;; independent value. | |
f4fde1b3 | 2702 | (define_insn_and_rewrite "*cond_<optab><mode>_any" |
915d28fe RS |
2703 | [(set (match_operand:SVE_F 0 "register_operand" "=&w, &w, ?&w") |
2704 | (unspec:SVE_F | |
2705 | [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl") | |
2706 | (unspec:SVE_F | |
0d80d083 RS |
2707 | [(match_dup 1) |
2708 | (match_operand:SVE_F 2 "register_operand" "w, w, w") | |
915d28fe RS |
2709 | (match_operand:SVE_F 3 "register_operand" "w, w, w") |
2710 | (match_operand:SVE_F 4 "register_operand" "w, w, w")] | |
2711 | SVE_COND_FP_TERNARY) | |
2712 | (match_operand:SVE_F 5 "aarch64_simd_reg_or_zero" "Dz, 0, w")] | |
0d2b3bca | 2713 | UNSPEC_SEL))] |
f4fde1b3 | 2714 | "TARGET_SVE |
915d28fe RS |
2715 | && !rtx_equal_p (operands[2], operands[5]) |
2716 | && !rtx_equal_p (operands[3], operands[5]) | |
2717 | && !rtx_equal_p (operands[4], operands[5])" | |
32cf949c | 2718 | "@ |
915d28fe RS |
2719 | movprfx\t%0.<Vetype>, %1/z, %4.<Vetype>\;<sve_fmla_op>\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype> |
2720 | movprfx\t%0.<Vetype>, %1/m, %4.<Vetype>\;<sve_fmla_op>\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype> | |
32cf949c RS |
2721 | #" |
2722 | "&& reload_completed | |
915d28fe RS |
2723 | && !CONSTANT_P (operands[5]) |
2724 | && !rtx_equal_p (operands[0], operands[5])" | |
f4fde1b3 | 2725 | { |
915d28fe RS |
2726 | emit_insn (gen_vcond_mask_<mode><vpred> (operands[0], operands[4], |
2727 | operands[5], operands[1])); | |
2728 | operands[5] = operands[4] = operands[0]; | |
f4fde1b3 | 2729 | } |
32cf949c | 2730 | [(set_attr "movprfx" "yes")] |
0d2b3bca RS |
2731 | ) |
2732 | ||
915d28fe RS |
2733 | ;; ========================================================================= |
2734 | ;; == Comparisons and selects | |
2735 | ;; ========================================================================= | |
2736 | ||
2737 | ;; ------------------------------------------------------------------------- | |
2738 | ;; ---- [INT,FP] Select based on predicates | |
2739 | ;; ------------------------------------------------------------------------- | |
2740 | ;; Includes merging patterns for: | |
2741 | ;; - MOV | |
2742 | ;; - SEL | |
2743 | ;; ------------------------------------------------------------------------- | |
2744 | ||
2745 | ;; vcond_mask operand order: true, false, mask | |
2746 | ;; UNSPEC_SEL operand order: mask, true, false (as for VEC_COND_EXPR) | |
2747 | ;; SEL operand order: mask, true, false | |
2748 | (define_insn "vcond_mask_<mode><vpred>" | |
2749 | [(set (match_operand:SVE_ALL 0 "register_operand" "=w") | |
2750 | (unspec:SVE_ALL | |
2751 | [(match_operand:<VPRED> 3 "register_operand" "Upa") | |
2752 | (match_operand:SVE_ALL 1 "register_operand" "w") | |
2753 | (match_operand:SVE_ALL 2 "register_operand" "w")] | |
2754 | UNSPEC_SEL))] | |
2755 | "TARGET_SVE" | |
2756 | "sel\t%0.<Vetype>, %3, %1.<Vetype>, %2.<Vetype>" | |
2757 | ) | |
2758 | ||
2759 | ;; Selects between a duplicated immediate and zero. | |
2760 | (define_insn "aarch64_sve_dup<mode>_const" | |
2761 | [(set (match_operand:SVE_I 0 "register_operand" "=w") | |
2762 | (unspec:SVE_I | |
2763 | [(match_operand:<VPRED> 1 "register_operand" "Upl") | |
2764 | (match_operand:SVE_I 2 "aarch64_sve_dup_immediate") | |
2765 | (match_operand:SVE_I 3 "aarch64_simd_imm_zero")] | |
2766 | UNSPEC_SEL))] | |
43cacb12 | 2767 | "TARGET_SVE" |
915d28fe | 2768 | "mov\t%0.<Vetype>, %1/z, #%2" |
43cacb12 RS |
2769 | ) |
2770 | ||
915d28fe RS |
2771 | ;; ------------------------------------------------------------------------- |
2772 | ;; ---- [INT,FP] Compare and select | |
2773 | ;; ------------------------------------------------------------------------- | |
2774 | ;; The patterns in this section are synthetic. | |
2775 | ;; ------------------------------------------------------------------------- | |
43cacb12 | 2776 | |
915d28fe RS |
2777 | ;; Integer (signed) vcond. Don't enforce an immediate range here, since it |
2778 | ;; depends on the comparison; leave it to aarch64_expand_sve_vcond instead. | |
2779 | (define_expand "vcond<mode><v_int_equiv>" | |
2780 | [(set (match_operand:SVE_ALL 0 "register_operand") | |
2781 | (if_then_else:SVE_ALL | |
2782 | (match_operator 3 "comparison_operator" | |
2783 | [(match_operand:<V_INT_EQUIV> 4 "register_operand") | |
2784 | (match_operand:<V_INT_EQUIV> 5 "nonmemory_operand")]) | |
2785 | (match_operand:SVE_ALL 1 "register_operand") | |
2786 | (match_operand:SVE_ALL 2 "register_operand")))] | |
898f07b0 RS |
2787 | "TARGET_SVE" |
2788 | { | |
915d28fe RS |
2789 | aarch64_expand_sve_vcond (<MODE>mode, <V_INT_EQUIV>mode, operands); |
2790 | DONE; | |
898f07b0 RS |
2791 | } |
2792 | ) | |
2793 | ||
915d28fe RS |
2794 | ;; Integer vcondu. Don't enforce an immediate range here, since it |
2795 | ;; depends on the comparison; leave it to aarch64_expand_sve_vcond instead. | |
2796 | (define_expand "vcondu<mode><v_int_equiv>" | |
2797 | [(set (match_operand:SVE_ALL 0 "register_operand") | |
2798 | (if_then_else:SVE_ALL | |
2799 | (match_operator 3 "comparison_operator" | |
2800 | [(match_operand:<V_INT_EQUIV> 4 "register_operand") | |
2801 | (match_operand:<V_INT_EQUIV> 5 "nonmemory_operand")]) | |
2802 | (match_operand:SVE_ALL 1 "register_operand") | |
2803 | (match_operand:SVE_ALL 2 "register_operand")))] | |
898f07b0 | 2804 | "TARGET_SVE" |
915d28fe RS |
2805 | { |
2806 | aarch64_expand_sve_vcond (<MODE>mode, <V_INT_EQUIV>mode, operands); | |
2807 | DONE; | |
2808 | } | |
898f07b0 RS |
2809 | ) |
2810 | ||
915d28fe RS |
2811 | ;; Floating-point vcond. All comparisons except FCMUO allow a zero operand; |
2812 | ;; aarch64_expand_sve_vcond handles the case of an FCMUO with zero. | |
2813 | (define_expand "vcond<mode><v_fp_equiv>" | |
2814 | [(set (match_operand:SVE_SD 0 "register_operand") | |
2815 | (if_then_else:SVE_SD | |
2816 | (match_operator 3 "comparison_operator" | |
2817 | [(match_operand:<V_FP_EQUIV> 4 "register_operand") | |
2818 | (match_operand:<V_FP_EQUIV> 5 "aarch64_simd_reg_or_zero")]) | |
2819 | (match_operand:SVE_SD 1 "register_operand") | |
2820 | (match_operand:SVE_SD 2 "register_operand")))] | |
b781a135 RS |
2821 | "TARGET_SVE" |
2822 | { | |
915d28fe RS |
2823 | aarch64_expand_sve_vcond (<MODE>mode, <V_FP_EQUIV>mode, operands); |
2824 | DONE; | |
b781a135 RS |
2825 | } |
2826 | ) | |
2827 | ||
915d28fe RS |
2828 | ;; ------------------------------------------------------------------------- |
2829 | ;; ---- [INT] Comparisons | |
2830 | ;; ------------------------------------------------------------------------- | |
2831 | ;; Includes merging patterns for: | |
2832 | ;; - CMPEQ | |
2833 | ;; - CMPGE | |
2834 | ;; - CMPGT | |
2835 | ;; - CMPHI | |
2836 | ;; - CMPHS | |
2837 | ;; - CMPLE | |
2838 | ;; - CMPLO | |
2839 | ;; - CMPLS | |
2840 | ;; - CMPLT | |
2841 | ;; - CMPNE | |
2842 | ;; ------------------------------------------------------------------------- | |
b781a135 | 2843 | |
915d28fe RS |
2844 | ;; Signed integer comparisons. Don't enforce an immediate range here, since |
2845 | ;; it depends on the comparison; leave it to aarch64_expand_sve_vec_cmp_int | |
2846 | ;; instead. | |
2847 | (define_expand "vec_cmp<mode><vpred>" | |
2848 | [(parallel | |
2849 | [(set (match_operand:<VPRED> 0 "register_operand") | |
2850 | (match_operator:<VPRED> 1 "comparison_operator" | |
2851 | [(match_operand:SVE_I 2 "register_operand") | |
2852 | (match_operand:SVE_I 3 "nonmemory_operand")])) | |
2853 | (clobber (reg:CC_NZC CC_REGNUM))])] | |
b781a135 | 2854 | "TARGET_SVE" |
915d28fe RS |
2855 | { |
2856 | aarch64_expand_sve_vec_cmp_int (operands[0], GET_CODE (operands[1]), | |
2857 | operands[2], operands[3]); | |
2858 | DONE; | |
2859 | } | |
b781a135 RS |
2860 | ) |
2861 | ||
915d28fe RS |
2862 | ;; Unsigned integer comparisons. Don't enforce an immediate range here, since |
2863 | ;; it depends on the comparison; leave it to aarch64_expand_sve_vec_cmp_int | |
2864 | ;; instead. | |
2865 | (define_expand "vec_cmpu<mode><vpred>" | |
2866 | [(parallel | |
2867 | [(set (match_operand:<VPRED> 0 "register_operand") | |
2868 | (match_operator:<VPRED> 1 "comparison_operator" | |
2869 | [(match_operand:SVE_I 2 "register_operand") | |
2870 | (match_operand:SVE_I 3 "nonmemory_operand")])) | |
2871 | (clobber (reg:CC_NZC CC_REGNUM))])] | |
43cacb12 RS |
2872 | "TARGET_SVE" |
2873 | { | |
915d28fe RS |
2874 | aarch64_expand_sve_vec_cmp_int (operands[0], GET_CODE (operands[1]), |
2875 | operands[2], operands[3]); | |
2876 | DONE; | |
43cacb12 RS |
2877 | } |
2878 | ) | |
2879 | ||
915d28fe RS |
2880 | ;; Integer comparisons predicated with a PTRUE. |
2881 | (define_insn "*cmp<cmp_op><mode>" | |
2882 | [(set (match_operand:<VPRED> 0 "register_operand" "=Upa, Upa") | |
2883 | (unspec:<VPRED> | |
2884 | [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl") | |
2885 | (SVE_INT_CMP:<VPRED> | |
2886 | (match_operand:SVE_I 2 "register_operand" "w, w") | |
2887 | (match_operand:SVE_I 3 "aarch64_sve_cmp_<sve_imm_con>_operand" "<sve_imm_con>, w"))] | |
2888 | UNSPEC_MERGE_PTRUE)) | |
2889 | (clobber (reg:CC_NZC CC_REGNUM))] | |
43cacb12 RS |
2890 | "TARGET_SVE" |
2891 | "@ | |
915d28fe RS |
2892 | cmp<cmp_op>\t%0.<Vetype>, %1/z, %2.<Vetype>, #%3 |
2893 | cmp<cmp_op>\t%0.<Vetype>, %1/z, %2.<Vetype>, %3.<Vetype>" | |
43cacb12 RS |
2894 | ) |
2895 | ||
915d28fe RS |
2896 | ;; Integer comparisons predicated with a PTRUE in which both the flag and |
2897 | ;; predicate results are interesting. | |
2898 | (define_insn "*cmp<cmp_op><mode>_cc" | |
2899 | [(set (reg:CC_NZC CC_REGNUM) | |
2900 | (unspec:CC_NZC | |
34467289 RS |
2901 | [(match_operand:VNx16BI 1 "register_operand" "Upl, Upl") |
2902 | (match_operand 4) | |
2903 | (match_operand:SI 5 "aarch64_sve_ptrue_flag") | |
915d28fe | 2904 | (unspec:<VPRED> |
34467289 | 2905 | [(match_dup 4) |
915d28fe RS |
2906 | (SVE_INT_CMP:<VPRED> |
2907 | (match_operand:SVE_I 2 "register_operand" "w, w") | |
2908 | (match_operand:SVE_I 3 "aarch64_sve_cmp_<sve_imm_con>_operand" "<sve_imm_con>, w"))] | |
2909 | UNSPEC_MERGE_PTRUE)] | |
34467289 | 2910 | UNSPEC_PTEST)) |
915d28fe RS |
2911 | (set (match_operand:<VPRED> 0 "register_operand" "=Upa, Upa") |
2912 | (unspec:<VPRED> | |
34467289 | 2913 | [(match_dup 4) |
915d28fe RS |
2914 | (SVE_INT_CMP:<VPRED> |
2915 | (match_dup 2) | |
2916 | (match_dup 3))] | |
43cacb12 RS |
2917 | UNSPEC_MERGE_PTRUE))] |
2918 | "TARGET_SVE" | |
915d28fe RS |
2919 | "@ |
2920 | cmp<cmp_op>\t%0.<Vetype>, %1/z, %2.<Vetype>, #%3 | |
2921 | cmp<cmp_op>\t%0.<Vetype>, %1/z, %2.<Vetype>, %3.<Vetype>" | |
43cacb12 RS |
2922 | ) |
2923 | ||
915d28fe RS |
2924 | ;; Integer comparisons predicated with a PTRUE in which only the flags result |
2925 | ;; is interesting. | |
2926 | (define_insn "*cmp<cmp_op><mode>_ptest" | |
2927 | [(set (reg:CC_NZC CC_REGNUM) | |
2928 | (unspec:CC_NZC | |
34467289 RS |
2929 | [(match_operand:VNx16BI 1 "register_operand" "Upl, Upl") |
2930 | (match_operand 4) | |
2931 | (match_operand:SI 5 "aarch64_sve_ptrue_flag") | |
915d28fe | 2932 | (unspec:<VPRED> |
34467289 | 2933 | [(match_dup 4) |
915d28fe RS |
2934 | (SVE_INT_CMP:<VPRED> |
2935 | (match_operand:SVE_I 2 "register_operand" "w, w") | |
2936 | (match_operand:SVE_I 3 "aarch64_sve_cmp_<sve_imm_con>_operand" "<sve_imm_con>, w"))] | |
2937 | UNSPEC_MERGE_PTRUE)] | |
34467289 | 2938 | UNSPEC_PTEST)) |
915d28fe RS |
2939 | (clobber (match_scratch:<VPRED> 0 "=Upa, Upa"))] |
2940 | "TARGET_SVE" | |
43cacb12 | 2941 | "@ |
915d28fe RS |
2942 | cmp<cmp_op>\t%0.<Vetype>, %1/z, %2.<Vetype>, #%3 |
2943 | cmp<cmp_op>\t%0.<Vetype>, %1/z, %2.<Vetype>, %3.<Vetype>" | |
43cacb12 RS |
2944 | ) |
2945 | ||
915d28fe RS |
2946 | ;; Predicated integer comparisons, formed by combining a PTRUE-predicated |
2947 | ;; comparison with an AND. Split the instruction into its preferred form | |
2948 | ;; (below) at the earliest opportunity, in order to get rid of the | |
2949 | ;; redundant operand 1. | |
2950 | (define_insn_and_split "*pred_cmp<cmp_op><mode>_combine" | |
2951 | [(set (match_operand:<VPRED> 0 "register_operand" "=Upa, Upa") | |
2952 | (and:<VPRED> | |
2953 | (unspec:<VPRED> | |
2954 | [(match_operand:<VPRED> 1) | |
2955 | (SVE_INT_CMP:<VPRED> | |
2956 | (match_operand:SVE_I 2 "register_operand" "w, w") | |
2957 | (match_operand:SVE_I 3 "aarch64_sve_cmp_<sve_imm_con>_operand" "<sve_imm_con>, w"))] | |
2958 | UNSPEC_MERGE_PTRUE) | |
2959 | (match_operand:<VPRED> 4 "register_operand" "Upl, Upl"))) | |
2960 | (clobber (reg:CC_NZC CC_REGNUM))] | |
2961 | "TARGET_SVE" | |
2962 | "#" | |
2963 | "&& 1" | |
2964 | [(parallel | |
2965 | [(set (match_dup 0) | |
2966 | (and:<VPRED> | |
2967 | (SVE_INT_CMP:<VPRED> | |
2968 | (match_dup 2) | |
2969 | (match_dup 3)) | |
2970 | (match_dup 4))) | |
2971 | (clobber (reg:CC_NZC CC_REGNUM))])] | |
43cacb12 RS |
2972 | ) |
2973 | ||
915d28fe | 2974 | ;; Predicated integer comparisons. |
678faefc | 2975 | (define_insn "@aarch64_pred_cmp<cmp_op><mode>" |
915d28fe RS |
2976 | [(set (match_operand:<VPRED> 0 "register_operand" "=Upa, Upa") |
2977 | (and:<VPRED> | |
2978 | (SVE_INT_CMP:<VPRED> | |
2979 | (match_operand:SVE_I 2 "register_operand" "w, w") | |
2980 | (match_operand:SVE_I 3 "aarch64_sve_cmp_<sve_imm_con>_operand" "<sve_imm_con>, w")) | |
2981 | (match_operand:<VPRED> 1 "register_operand" "Upl, Upl"))) | |
2982 | (clobber (reg:CC_NZC CC_REGNUM))] | |
43cacb12 RS |
2983 | "TARGET_SVE" |
2984 | "@ | |
915d28fe RS |
2985 | cmp<cmp_op>\t%0.<Vetype>, %1/z, %2.<Vetype>, #%3 |
2986 | cmp<cmp_op>\t%0.<Vetype>, %1/z, %2.<Vetype>, %3.<Vetype>" | |
43cacb12 RS |
2987 | ) |
2988 | ||
915d28fe RS |
2989 | ;; ------------------------------------------------------------------------- |
2990 | ;; ---- [INT] While tests | |
2991 | ;; ------------------------------------------------------------------------- | |
2992 | ;; Includes: | |
2993 | ;; - WHILELO | |
2994 | ;; ------------------------------------------------------------------------- | |
740c1ed7 | 2995 | |
915d28fe RS |
2996 | ;; Set element I of the result if operand1 + J < operand2 for all J in [0, I], |
2997 | ;; with the comparison being unsigned. | |
0b1fe8cf | 2998 | (define_insn "@while_ult<GPI:mode><PRED_ALL:mode>" |
915d28fe RS |
2999 | [(set (match_operand:PRED_ALL 0 "register_operand" "=Upa") |
3000 | (unspec:PRED_ALL [(match_operand:GPI 1 "aarch64_reg_or_zero" "rZ") | |
3001 | (match_operand:GPI 2 "aarch64_reg_or_zero" "rZ")] | |
3002 | UNSPEC_WHILE_LO)) | |
3003 | (clobber (reg:CC_NZC CC_REGNUM))] | |
43cacb12 | 3004 | "TARGET_SVE" |
915d28fe | 3005 | "whilelo\t%0.<PRED_ALL:Vetype>, %<w>1, %<w>2" |
43cacb12 RS |
3006 | ) |
3007 | ||
915d28fe | 3008 | ;; WHILELO sets the flags in the same way as a PTEST with a PTRUE GP. |
34467289 RS |
3009 | ;; Handle the case in which both results are useful. The GP operands |
3010 | ;; to the PTEST aren't needed, so we allow them to be anything. | |
915d28fe RS |
3011 | (define_insn_and_rewrite "*while_ult<GPI:mode><PRED_ALL:mode>_cc" |
3012 | [(set (reg:CC_NZC CC_REGNUM) | |
3013 | (unspec:CC_NZC | |
34467289 RS |
3014 | [(match_operand 3) |
3015 | (match_operand 4) | |
3016 | (const_int SVE_KNOWN_PTRUE) | |
915d28fe | 3017 | (unspec:PRED_ALL |
34467289 RS |
3018 | [(match_operand:GPI 1 "aarch64_reg_or_zero" "rZ") |
3019 | (match_operand:GPI 2 "aarch64_reg_or_zero" "rZ")] | |
915d28fe | 3020 | UNSPEC_WHILE_LO)] |
34467289 | 3021 | UNSPEC_PTEST)) |
915d28fe | 3022 | (set (match_operand:PRED_ALL 0 "register_operand" "=Upa") |
34467289 RS |
3023 | (unspec:PRED_ALL [(match_dup 1) |
3024 | (match_dup 2)] | |
915d28fe | 3025 | UNSPEC_WHILE_LO))] |
43cacb12 | 3026 | "TARGET_SVE" |
34467289 | 3027 | "whilelo\t%0.<PRED_ALL:Vetype>, %<w>1, %<w>2" |
915d28fe RS |
3028 | ;; Force the compiler to drop the unused predicate operand, so that we |
3029 | ;; don't have an unnecessary PTRUE. | |
34467289 | 3030 | "&& (!CONSTANT_P (operands[3]) || !CONSTANT_P (operands[4]))" |
915d28fe | 3031 | { |
34467289 RS |
3032 | operands[3] = CONSTM1_RTX (VNx16BImode); |
3033 | operands[4] = CONSTM1_RTX (<PRED_ALL:MODE>mode); | |
915d28fe | 3034 | } |
43cacb12 RS |
3035 | ) |
3036 | ||
915d28fe RS |
3037 | ;; ------------------------------------------------------------------------- |
3038 | ;; ---- [FP] Comparisons | |
3039 | ;; ------------------------------------------------------------------------- | |
3040 | ;; Includes: | |
3041 | ;; - FCMEQ | |
3042 | ;; - FCMGE | |
3043 | ;; - FCMGT | |
3044 | ;; - FCMLE | |
3045 | ;; - FCMLT | |
3046 | ;; - FCMNE | |
3047 | ;; - FCMUO | |
3048 | ;; ------------------------------------------------------------------------- | |
3049 | ||
3050 | ;; Floating-point comparisons. All comparisons except FCMUO allow a zero | |
3051 | ;; operand; aarch64_expand_sve_vec_cmp_float handles the case of an FCMUO | |
3052 | ;; with zero. | |
3053 | (define_expand "vec_cmp<mode><vpred>" | |
3054 | [(set (match_operand:<VPRED> 0 "register_operand") | |
3055 | (match_operator:<VPRED> 1 "comparison_operator" | |
3056 | [(match_operand:SVE_F 2 "register_operand") | |
3057 | (match_operand:SVE_F 3 "aarch64_simd_reg_or_zero")]))] | |
43cacb12 RS |
3058 | "TARGET_SVE" |
3059 | { | |
915d28fe RS |
3060 | aarch64_expand_sve_vec_cmp_float (operands[0], GET_CODE (operands[1]), |
3061 | operands[2], operands[3], false); | |
3062 | DONE; | |
43cacb12 RS |
3063 | } |
3064 | ) | |
3065 | ||
915d28fe RS |
3066 | ;; Floating-point comparisons predicated with a PTRUE. |
3067 | (define_insn "*fcm<cmp_op><mode>" | |
3068 | [(set (match_operand:<VPRED> 0 "register_operand" "=Upa, Upa") | |
3069 | (unspec:<VPRED> | |
3070 | [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl") | |
3071 | (SVE_FP_CMP:<VPRED> | |
3072 | (match_operand:SVE_F 2 "register_operand" "w, w") | |
3073 | (match_operand:SVE_F 3 "aarch64_simd_reg_or_zero" "Dz, w"))] | |
43cacb12 RS |
3074 | UNSPEC_MERGE_PTRUE))] |
3075 | "TARGET_SVE" | |
3076 | "@ | |
915d28fe RS |
3077 | fcm<cmp_op>\t%0.<Vetype>, %1/z, %2.<Vetype>, #0.0 |
3078 | fcm<cmp_op>\t%0.<Vetype>, %1/z, %2.<Vetype>, %3.<Vetype>" | |
43cacb12 RS |
3079 | ) |
3080 | ||
915d28fe RS |
3081 | ;; Same for unordered comparisons. |
3082 | (define_insn "*fcmuo<mode>" | |
3083 | [(set (match_operand:<VPRED> 0 "register_operand" "=Upa") | |
3084 | (unspec:<VPRED> | |
3085 | [(match_operand:<VPRED> 1 "register_operand" "Upl") | |
3086 | (unordered:<VPRED> | |
3087 | (match_operand:SVE_F 2 "register_operand" "w") | |
3088 | (match_operand:SVE_F 3 "register_operand" "w"))] | |
43cacb12 RS |
3089 | UNSPEC_MERGE_PTRUE))] |
3090 | "TARGET_SVE" | |
915d28fe | 3091 | "fcmuo\t%0.<Vetype>, %1/z, %2.<Vetype>, %3.<Vetype>" |
43cacb12 RS |
3092 | ) |
3093 | ||
915d28fe RS |
3094 | ;; Floating-point comparisons predicated on a PTRUE, with the results ANDed |
3095 | ;; with another predicate P. This does not have the same trapping behavior | |
3096 | ;; as predicating the comparison itself on P, but it's a legitimate fold, | |
3097 | ;; since we can drop any potentially-trapping operations whose results | |
3098 | ;; are not needed. | |
3099 | ;; | |
3100 | ;; Split the instruction into its preferred form (below) at the earliest | |
3101 | ;; opportunity, in order to get rid of the redundant operand 1. | |
3102 | (define_insn_and_split "*fcm<cmp_op><mode>_and_combine" | |
3103 | [(set (match_operand:<VPRED> 0 "register_operand" "=Upa, Upa") | |
3104 | (and:<VPRED> | |
3105 | (unspec:<VPRED> | |
3106 | [(match_operand:<VPRED> 1) | |
3107 | (SVE_FP_CMP | |
3108 | (match_operand:SVE_F 2 "register_operand" "w, w") | |
3109 | (match_operand:SVE_F 3 "aarch64_simd_reg_or_zero" "Dz, w"))] | |
3110 | UNSPEC_MERGE_PTRUE) | |
3111 | (match_operand:<VPRED> 4 "register_operand" "Upl, Upl")))] | |
43cacb12 | 3112 | "TARGET_SVE" |
915d28fe RS |
3113 | "#" |
3114 | "&& 1" | |
3115 | [(set (match_dup 0) | |
3116 | (and:<VPRED> | |
3117 | (SVE_FP_CMP:<VPRED> | |
3118 | (match_dup 2) | |
3119 | (match_dup 3)) | |
3120 | (match_dup 4)))] | |
43cacb12 RS |
3121 | ) |
3122 | ||
915d28fe RS |
3123 | ;; Same for unordered comparisons. |
3124 | (define_insn_and_split "*fcmuo<mode>_and_combine" | |
3125 | [(set (match_operand:<VPRED> 0 "register_operand" "=Upa") | |
3126 | (and:<VPRED> | |
3127 | (unspec:<VPRED> | |
3128 | [(match_operand:<VPRED> 1) | |
3129 | (unordered | |
3130 | (match_operand:SVE_F 2 "register_operand" "w") | |
3131 | (match_operand:SVE_F 3 "register_operand" "w"))] | |
3132 | UNSPEC_MERGE_PTRUE) | |
3133 | (match_operand:<VPRED> 4 "register_operand" "Upl")))] | |
43cacb12 | 3134 | "TARGET_SVE" |
915d28fe RS |
3135 | "#" |
3136 | "&& 1" | |
3137 | [(set (match_dup 0) | |
3138 | (and:<VPRED> | |
3139 | (unordered:<VPRED> | |
3140 | (match_dup 2) | |
3141 | (match_dup 3)) | |
3142 | (match_dup 4)))] | |
43cacb12 RS |
3143 | ) |
3144 | ||
915d28fe RS |
3145 | ;; Unpredicated floating-point comparisons, with the results ANDed with |
3146 | ;; another predicate. This is a valid fold for the same reasons as above. | |
3147 | (define_insn "*fcm<cmp_op><mode>_and" | |
3148 | [(set (match_operand:<VPRED> 0 "register_operand" "=Upa, Upa") | |
3149 | (and:<VPRED> | |
3150 | (SVE_FP_CMP:<VPRED> | |
3151 | (match_operand:SVE_F 2 "register_operand" "w, w") | |
3152 | (match_operand:SVE_F 3 "aarch64_simd_reg_or_zero" "Dz, w")) | |
3153 | (match_operand:<VPRED> 1 "register_operand" "Upl, Upl")))] | |
43cacb12 RS |
3154 | "TARGET_SVE" |
3155 | "@ | |
915d28fe RS |
3156 | fcm<cmp_op>\t%0.<Vetype>, %1/z, %2.<Vetype>, #0.0 |
3157 | fcm<cmp_op>\t%0.<Vetype>, %1/z, %2.<Vetype>, %3.<Vetype>" | |
43cacb12 RS |
3158 | ) |
3159 | ||
915d28fe RS |
3160 | ;; Same for unordered comparisons. |
3161 | (define_insn "*fcmuo<mode>_and" | |
3162 | [(set (match_operand:<VPRED> 0 "register_operand" "=Upa") | |
3163 | (and:<VPRED> | |
3164 | (unordered:<VPRED> | |
3165 | (match_operand:SVE_F 2 "register_operand" "w") | |
3166 | (match_operand:SVE_F 3 "register_operand" "w")) | |
3167 | (match_operand:<VPRED> 1 "register_operand" "Upl")))] | |
43cacb12 | 3168 | "TARGET_SVE" |
915d28fe | 3169 | "fcmuo\t%0.<Vetype>, %1/z, %2.<Vetype>, %3.<Vetype>" |
43cacb12 RS |
3170 | ) |
3171 | ||
915d28fe RS |
3172 | ;; Predicated floating-point comparisons. We don't need a version |
3173 | ;; of this for unordered comparisons. | |
3174 | (define_insn "*pred_fcm<cmp_op><mode>" | |
3175 | [(set (match_operand:<VPRED> 0 "register_operand" "=Upa, Upa") | |
3176 | (unspec:<VPRED> | |
3177 | [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl") | |
3178 | (match_operand:SVE_F 2 "register_operand" "w, w") | |
3179 | (match_operand:SVE_F 3 "aarch64_simd_reg_or_zero" "Dz, w")] | |
3180 | SVE_COND_FP_CMP))] | |
43cacb12 RS |
3181 | "TARGET_SVE" |
3182 | "@ | |
915d28fe RS |
3183 | fcm<cmp_op>\t%0.<Vetype>, %1/z, %2.<Vetype>, #0.0 |
3184 | fcm<cmp_op>\t%0.<Vetype>, %1/z, %2.<Vetype>, %3.<Vetype>" | |
43cacb12 RS |
3185 | ) |
3186 | ||
915d28fe RS |
3187 | ;; ------------------------------------------------------------------------- |
3188 | ;; ---- [PRED] Test bits | |
3189 | ;; ------------------------------------------------------------------------- | |
3190 | ;; Includes: | |
3191 | ;; - PTEST | |
3192 | ;; ------------------------------------------------------------------------- | |
3193 | ||
3194 | ;; Branch based on predicate equality or inequality. | |
3195 | (define_expand "cbranch<mode>4" | |
3196 | [(set (pc) | |
3197 | (if_then_else | |
3198 | (match_operator 0 "aarch64_equality_operator" | |
3199 | [(match_operand:PRED_ALL 1 "register_operand") | |
3200 | (match_operand:PRED_ALL 2 "aarch64_simd_reg_or_zero")]) | |
3201 | (label_ref (match_operand 3 "")) | |
3202 | (pc)))] | |
3203 | "" | |
43cacb12 | 3204 | { |
34467289 RS |
3205 | rtx ptrue = force_reg (VNx16BImode, aarch64_ptrue_all (<data_bytes>)); |
3206 | rtx cast_ptrue = gen_lowpart (<MODE>mode, ptrue); | |
3207 | rtx ptrue_flag = gen_int_mode (SVE_KNOWN_PTRUE, SImode); | |
915d28fe RS |
3208 | rtx pred; |
3209 | if (operands[2] == CONST0_RTX (<MODE>mode)) | |
3210 | pred = operands[1]; | |
3211 | else | |
3212 | { | |
3213 | pred = gen_reg_rtx (<MODE>mode); | |
34467289 RS |
3214 | emit_insn (gen_aarch64_pred_xor<mode>_z (pred, cast_ptrue, operands[1], |
3215 | operands[2])); | |
915d28fe | 3216 | } |
34467289 | 3217 | emit_insn (gen_aarch64_ptest<mode> (ptrue, cast_ptrue, ptrue_flag, pred)); |
915d28fe RS |
3218 | operands[1] = gen_rtx_REG (CC_NZCmode, CC_REGNUM); |
3219 | operands[2] = const0_rtx; | |
43cacb12 RS |
3220 | } |
3221 | ) | |
3222 | ||
34467289 RS |
3223 | ;; See "Description of UNSPEC_PTEST" above for details. |
3224 | (define_insn "aarch64_ptest<mode>" | |
915d28fe | 3225 | [(set (reg:CC_NZC CC_REGNUM) |
34467289 RS |
3226 | (unspec:CC_NZC [(match_operand:VNx16BI 0 "register_operand" "Upa") |
3227 | (match_operand 1) | |
3228 | (match_operand:SI 2 "aarch64_sve_ptrue_flag") | |
3229 | (match_operand:PRED_ALL 3 "register_operand" "Upa")] | |
3230 | UNSPEC_PTEST))] | |
43cacb12 | 3231 | "TARGET_SVE" |
34467289 | 3232 | "ptest\t%0, %3.b" |
43cacb12 RS |
3233 | ) |
3234 | ||
915d28fe RS |
3235 | ;; ========================================================================= |
3236 | ;; == Reductions | |
3237 | ;; ========================================================================= | |
3238 | ||
3239 | ;; ------------------------------------------------------------------------- | |
3240 | ;; ---- [INT,FP] Conditional reductions | |
3241 | ;; ------------------------------------------------------------------------- | |
3242 | ;; Includes: | |
3243 | ;; - CLASTB | |
3244 | ;; ------------------------------------------------------------------------- | |
3245 | ||
3246 | ;; Set operand 0 to the last active element in operand 3, or to tied | |
3247 | ;; operand 1 if no elements are active. | |
3248 | (define_insn "fold_extract_last_<mode>" | |
801790b3 | 3249 | [(set (match_operand:<VEL> 0 "register_operand" "=?r, w") |
915d28fe RS |
3250 | (unspec:<VEL> |
3251 | [(match_operand:<VEL> 1 "register_operand" "0, 0") | |
3252 | (match_operand:<VPRED> 2 "register_operand" "Upl, Upl") | |
3253 | (match_operand:SVE_ALL 3 "register_operand" "w, w")] | |
3254 | UNSPEC_CLASTB))] | |
3db85990 | 3255 | "TARGET_SVE" |
915d28fe RS |
3256 | "@ |
3257 | clastb\t%<vwcore>0, %2, %<vwcore>0, %3.<Vetype> | |
801790b3 | 3258 | clastb\t%<Vetype>0, %2, %<Vetype>0, %3.<Vetype>" |
3db85990 ST |
3259 | ) |
3260 | ||
915d28fe RS |
3261 | ;; ------------------------------------------------------------------------- |
3262 | ;; ---- [INT] Tree reductions | |
3263 | ;; ------------------------------------------------------------------------- | |
3264 | ;; Includes: | |
3265 | ;; - ANDV | |
3266 | ;; - EORV | |
3267 | ;; - ORV | |
3268 | ;; - SMAXV | |
3269 | ;; - SMINV | |
3270 | ;; - UADDV | |
3271 | ;; - UMAXV | |
3272 | ;; - UMINV | |
3273 | ;; ------------------------------------------------------------------------- | |
3274 | ||
3275 | ;; Unpredicated integer add reduction. | |
3276 | (define_expand "reduc_plus_scal_<mode>" | |
3277 | [(set (match_operand:<VEL> 0 "register_operand") | |
3278 | (unspec:<VEL> [(match_dup 2) | |
3279 | (match_operand:SVE_I 1 "register_operand")] | |
3280 | UNSPEC_ADDV))] | |
43cacb12 RS |
3281 | "TARGET_SVE" |
3282 | { | |
16de3637 | 3283 | operands[2] = aarch64_ptrue_reg (<VPRED>mode); |
43cacb12 RS |
3284 | } |
3285 | ) | |
3286 | ||
915d28fe RS |
3287 | ;; Predicated integer add reduction. The result is always 64-bits. |
3288 | (define_insn "*reduc_plus_scal_<mode>" | |
3289 | [(set (match_operand:<VEL> 0 "register_operand" "=w") | |
3290 | (unspec:<VEL> [(match_operand:<VPRED> 1 "register_operand" "Upl") | |
3291 | (match_operand:SVE_I 2 "register_operand" "w")] | |
3292 | UNSPEC_ADDV))] | |
43cacb12 | 3293 | "TARGET_SVE" |
915d28fe | 3294 | "uaddv\t%d0, %1, %2.<Vetype>" |
43cacb12 RS |
3295 | ) |
3296 | ||
b0760a40 | 3297 | ;; Unpredicated integer reductions. |
915d28fe RS |
3298 | (define_expand "reduc_<optab>_scal_<mode>" |
3299 | [(set (match_operand:<VEL> 0 "register_operand") | |
3300 | (unspec:<VEL> [(match_dup 2) | |
3301 | (match_operand:SVE_I 1 "register_operand")] | |
b0760a40 | 3302 | SVE_INT_REDUCTION))] |
43cacb12 | 3303 | "TARGET_SVE" |
915d28fe RS |
3304 | { |
3305 | operands[2] = aarch64_ptrue_reg (<VPRED>mode); | |
3306 | } | |
43cacb12 RS |
3307 | ) |
3308 | ||
b0760a40 | 3309 | ;; Predicated integer reductions. |
915d28fe RS |
3310 | (define_insn "*reduc_<optab>_scal_<mode>" |
3311 | [(set (match_operand:<VEL> 0 "register_operand" "=w") | |
3312 | (unspec:<VEL> [(match_operand:<VPRED> 1 "register_operand" "Upl") | |
3313 | (match_operand:SVE_I 2 "register_operand" "w")] | |
b0760a40 | 3314 | SVE_INT_REDUCTION))] |
43cacb12 | 3315 | "TARGET_SVE" |
b0760a40 | 3316 | "<sve_int_op>\t%<Vetype>0, %1, %2.<Vetype>" |
43cacb12 RS |
3317 | ) |
3318 | ||
915d28fe RS |
3319 | ;; ------------------------------------------------------------------------- |
3320 | ;; ---- [FP] Tree reductions | |
3321 | ;; ------------------------------------------------------------------------- | |
3322 | ;; Includes: | |
3323 | ;; - FADDV | |
3324 | ;; - FMAXNMV | |
3325 | ;; - FMAXV | |
3326 | ;; - FMINNMV | |
3327 | ;; - FMINV | |
3328 | ;; ------------------------------------------------------------------------- | |
3329 | ||
b0760a40 RS |
3330 | ;; Unpredicated floating-point tree reductions. |
3331 | (define_expand "reduc_<optab>_scal_<mode>" | |
915d28fe RS |
3332 | [(set (match_operand:<VEL> 0 "register_operand") |
3333 | (unspec:<VEL> [(match_dup 2) | |
3334 | (match_operand:SVE_F 1 "register_operand")] | |
b0760a40 | 3335 | SVE_FP_REDUCTION))] |
43cacb12 | 3336 | "TARGET_SVE" |
915d28fe RS |
3337 | { |
3338 | operands[2] = aarch64_ptrue_reg (<VPRED>mode); | |
3339 | } | |
43cacb12 RS |
3340 | ) |
3341 | ||
b0760a40 RS |
3342 | ;; Predicated floating-point tree reductions. |
3343 | (define_insn "*reduc_<optab>_scal_<mode>" | |
915d28fe RS |
3344 | [(set (match_operand:<VEL> 0 "register_operand" "=w") |
3345 | (unspec:<VEL> [(match_operand:<VPRED> 1 "register_operand" "Upl") | |
3346 | (match_operand:SVE_F 2 "register_operand" "w")] | |
b0760a40 | 3347 | SVE_FP_REDUCTION))] |
43cacb12 | 3348 | "TARGET_SVE" |
b0760a40 | 3349 | "<sve_fp_op>\t%<Vetype>0, %1, %2.<Vetype>" |
43cacb12 RS |
3350 | ) |
3351 | ||
915d28fe RS |
3352 | ;; ------------------------------------------------------------------------- |
3353 | ;; ---- [FP] Left-to-right reductions | |
3354 | ;; ------------------------------------------------------------------------- | |
3355 | ;; Includes: | |
3356 | ;; - FADDA | |
3357 | ;; ------------------------------------------------------------------------- | |
3358 | ||
3359 | ;; Unpredicated in-order FP reductions. | |
3360 | (define_expand "fold_left_plus_<mode>" | |
3361 | [(set (match_operand:<VEL> 0 "register_operand") | |
3362 | (unspec:<VEL> [(match_dup 3) | |
3363 | (match_operand:<VEL> 1 "register_operand") | |
3364 | (match_operand:SVE_F 2 "register_operand")] | |
3365 | UNSPEC_FADDA))] | |
43cacb12 | 3366 | "TARGET_SVE" |
915d28fe RS |
3367 | { |
3368 | operands[3] = aarch64_ptrue_reg (<VPRED>mode); | |
3369 | } | |
43cacb12 RS |
3370 | ) |
3371 | ||
915d28fe RS |
3372 | ;; Predicated in-order FP reductions. |
3373 | (define_insn "mask_fold_left_plus_<mode>" | |
3374 | [(set (match_operand:<VEL> 0 "register_operand" "=w") | |
3375 | (unspec:<VEL> [(match_operand:<VPRED> 3 "register_operand" "Upl") | |
3376 | (match_operand:<VEL> 1 "register_operand" "0") | |
3377 | (match_operand:SVE_F 2 "register_operand" "w")] | |
3378 | UNSPEC_FADDA))] | |
43cacb12 | 3379 | "TARGET_SVE" |
915d28fe | 3380 | "fadda\t%<Vetype>0, %3, %<Vetype>0, %2.<Vetype>" |
43cacb12 RS |
3381 | ) |
3382 | ||
915d28fe RS |
3383 | ;; ========================================================================= |
3384 | ;; == Permutes | |
3385 | ;; ========================================================================= | |
3386 | ||
3387 | ;; ------------------------------------------------------------------------- | |
3388 | ;; ---- [INT,FP] General permutes | |
3389 | ;; ------------------------------------------------------------------------- | |
3390 | ;; Includes: | |
3391 | ;; - TBL | |
3392 | ;; ------------------------------------------------------------------------- | |
3393 | ||
3394 | (define_expand "vec_perm<mode>" | |
3395 | [(match_operand:SVE_ALL 0 "register_operand") | |
3396 | (match_operand:SVE_ALL 1 "register_operand") | |
3397 | (match_operand:SVE_ALL 2 "register_operand") | |
3398 | (match_operand:<V_INT_EQUIV> 3 "aarch64_sve_vec_perm_operand")] | |
3399 | "TARGET_SVE && GET_MODE_NUNITS (<MODE>mode).is_constant ()" | |
9bfb28ed | 3400 | { |
915d28fe RS |
3401 | aarch64_expand_sve_vec_perm (operands[0], operands[1], |
3402 | operands[2], operands[3]); | |
9bfb28ed RS |
3403 | DONE; |
3404 | } | |
3405 | ) | |
3406 | ||
915d28fe RS |
3407 | (define_insn "*aarch64_sve_tbl<mode>" |
3408 | [(set (match_operand:SVE_ALL 0 "register_operand" "=w") | |
3409 | (unspec:SVE_ALL | |
3410 | [(match_operand:SVE_ALL 1 "register_operand" "w") | |
3411 | (match_operand:<V_INT_EQUIV> 2 "register_operand" "w")] | |
3412 | UNSPEC_TBL))] | |
43cacb12 | 3413 | "TARGET_SVE" |
915d28fe | 3414 | "tbl\t%0.<Vetype>, %1.<Vetype>, %2.<Vetype>" |
43cacb12 RS |
3415 | ) |
3416 | ||
915d28fe RS |
3417 | ;; ------------------------------------------------------------------------- |
3418 | ;; ---- [INT,FP] Special-purpose unary permutes | |
3419 | ;; ------------------------------------------------------------------------- | |
3420 | ;; Includes: | |
3421 | ;; - DUP | |
3422 | ;; - REV | |
3423 | ;; - REVB | |
3424 | ;; - REVH | |
3425 | ;; - REVW | |
3426 | ;; ------------------------------------------------------------------------- | |
3427 | ||
3428 | ;; Duplicate one element of a vector. | |
3429 | (define_insn "*aarch64_sve_dup_lane<mode>" | |
3430 | [(set (match_operand:SVE_ALL 0 "register_operand" "=w") | |
3431 | (vec_duplicate:SVE_ALL | |
3432 | (vec_select:<VEL> | |
3433 | (match_operand:SVE_ALL 1 "register_operand" "w") | |
3434 | (parallel [(match_operand:SI 2 "const_int_operand")]))))] | |
3435 | "TARGET_SVE | |
3436 | && IN_RANGE (INTVAL (operands[2]) * GET_MODE_SIZE (<VEL>mode), 0, 63)" | |
3437 | "dup\t%0.<Vetype>, %1.<Vetype>[%2]" | |
3438 | ) | |
3439 | ||
3440 | ;; Reverse the order of elements within a full vector. | |
3441 | (define_insn "@aarch64_sve_rev<mode>" | |
3442 | [(set (match_operand:SVE_ALL 0 "register_operand" "=w") | |
3443 | (unspec:SVE_ALL [(match_operand:SVE_ALL 1 "register_operand" "w")] | |
3444 | UNSPEC_REV))] | |
9bfb28ed | 3445 | "TARGET_SVE" |
915d28fe RS |
3446 | "rev\t%0.<Vetype>, %1.<Vetype>") |
3447 | ||
3448 | ;; Reverse the order elements within a 64-bit container. | |
3449 | (define_insn "*aarch64_sve_rev64<mode>" | |
3450 | [(set (match_operand:SVE_BHS 0 "register_operand" "=w") | |
3451 | (unspec:SVE_BHS | |
3452 | [(match_operand:VNx2BI 1 "register_operand" "Upl") | |
3453 | (unspec:SVE_BHS [(match_operand:SVE_BHS 2 "register_operand" "w")] | |
3454 | UNSPEC_REV64)] | |
3455 | UNSPEC_MERGE_PTRUE))] | |
3456 | "TARGET_SVE" | |
3457 | "rev<Vesize>\t%0.d, %1/m, %2.d" | |
9bfb28ed RS |
3458 | ) |
3459 | ||
915d28fe RS |
3460 | ;; Reverse the order elements within a 32-bit container. |
3461 | (define_insn "*aarch64_sve_rev32<mode>" | |
3462 | [(set (match_operand:SVE_BH 0 "register_operand" "=w") | |
3463 | (unspec:SVE_BH | |
3464 | [(match_operand:VNx4BI 1 "register_operand" "Upl") | |
3465 | (unspec:SVE_BH [(match_operand:SVE_BH 2 "register_operand" "w")] | |
3466 | UNSPEC_REV32)] | |
3467 | UNSPEC_MERGE_PTRUE))] | |
43cacb12 | 3468 | "TARGET_SVE" |
915d28fe | 3469 | "rev<Vesize>\t%0.s, %1/m, %2.s" |
43cacb12 RS |
3470 | ) |
3471 | ||
915d28fe RS |
3472 | ;; Reverse the order elements within a 16-bit container. |
3473 | (define_insn "*aarch64_sve_rev16vnx16qi" | |
3474 | [(set (match_operand:VNx16QI 0 "register_operand" "=w") | |
3475 | (unspec:VNx16QI | |
3476 | [(match_operand:VNx8BI 1 "register_operand" "Upl") | |
3477 | (unspec:VNx16QI [(match_operand:VNx16QI 2 "register_operand" "w")] | |
3478 | UNSPEC_REV16)] | |
3479 | UNSPEC_MERGE_PTRUE))] | |
43cacb12 | 3480 | "TARGET_SVE" |
915d28fe | 3481 | "revb\t%0.h, %1/m, %2.h" |
43cacb12 RS |
3482 | ) |
3483 | ||
915d28fe RS |
3484 | ;; ------------------------------------------------------------------------- |
3485 | ;; ---- [INT,FP] Special-purpose binary permutes | |
3486 | ;; ------------------------------------------------------------------------- | |
3487 | ;; Includes: | |
3488 | ;; - TRN1 | |
3489 | ;; - TRN2 | |
3490 | ;; - UZP1 | |
3491 | ;; - UZP2 | |
3492 | ;; - ZIP1 | |
3493 | ;; - ZIP2 | |
3494 | ;; ------------------------------------------------------------------------- | |
3495 | ||
3496 | ;; Permutes that take half the elements from one vector and half the | |
3497 | ;; elements from the other. | |
3e2751ce | 3498 | (define_insn "aarch64_sve_<perm_insn><mode>" |
915d28fe RS |
3499 | [(set (match_operand:SVE_ALL 0 "register_operand" "=w") |
3500 | (unspec:SVE_ALL [(match_operand:SVE_ALL 1 "register_operand" "w") | |
3501 | (match_operand:SVE_ALL 2 "register_operand" "w")] | |
3502 | PERMUTE))] | |
9bfb28ed | 3503 | "TARGET_SVE" |
3e2751ce | 3504 | "<perm_insn>\t%0.<Vetype>, %1.<Vetype>, %2.<Vetype>" |
915d28fe RS |
3505 | ) |
3506 | ||
3507 | ;; Concatenate two vectors and extract a subvector. Note that the | |
3508 | ;; immediate (third) operand is the lane index not the byte index. | |
3509 | (define_insn "*aarch64_sve_ext<mode>" | |
3510 | [(set (match_operand:SVE_ALL 0 "register_operand" "=w") | |
3511 | (unspec:SVE_ALL [(match_operand:SVE_ALL 1 "register_operand" "0") | |
3512 | (match_operand:SVE_ALL 2 "register_operand" "w") | |
3513 | (match_operand:SI 3 "const_int_operand")] | |
3514 | UNSPEC_EXT))] | |
3515 | "TARGET_SVE | |
3516 | && IN_RANGE (INTVAL (operands[3]) * GET_MODE_SIZE (<VEL>mode), 0, 255)" | |
9bfb28ed | 3517 | { |
915d28fe RS |
3518 | operands[3] = GEN_INT (INTVAL (operands[3]) * GET_MODE_SIZE (<VEL>mode)); |
3519 | return "ext\\t%0.b, %0.b, %2.b, #%3"; | |
43cacb12 RS |
3520 | } |
3521 | ) | |
3522 | ||
915d28fe RS |
3523 | ;; ------------------------------------------------------------------------- |
3524 | ;; ---- [PRED] Special-purpose binary permutes | |
3525 | ;; ------------------------------------------------------------------------- | |
3526 | ;; Includes: | |
3527 | ;; - TRN1 | |
3528 | ;; - TRN2 | |
3529 | ;; - UZP1 | |
3530 | ;; - UZP2 | |
3531 | ;; - ZIP1 | |
3532 | ;; - ZIP2 | |
3533 | ;; ------------------------------------------------------------------------- | |
3534 | ||
3535 | ;; Permutes that take half the elements from one vector and half the | |
3536 | ;; elements from the other. | |
3e2751ce | 3537 | (define_insn "*aarch64_sve_<perm_insn><mode>" |
915d28fe RS |
3538 | [(set (match_operand:PRED_ALL 0 "register_operand" "=Upa") |
3539 | (unspec:PRED_ALL [(match_operand:PRED_ALL 1 "register_operand" "Upa") | |
3540 | (match_operand:PRED_ALL 2 "register_operand" "Upa")] | |
3541 | PERMUTE))] | |
43cacb12 | 3542 | "TARGET_SVE" |
3e2751ce | 3543 | "<perm_insn>\t%0.<Vetype>, %1.<Vetype>, %2.<Vetype>" |
43cacb12 RS |
3544 | ) |
3545 | ||
915d28fe RS |
3546 | ;; ========================================================================= |
3547 | ;; == Conversions | |
3548 | ;; ========================================================================= | |
3549 | ||
3550 | ;; ------------------------------------------------------------------------- | |
3551 | ;; ---- [INT<-INT] Packs | |
3552 | ;; ------------------------------------------------------------------------- | |
3553 | ;; Includes: | |
3554 | ;; - UZP1 | |
3555 | ;; ------------------------------------------------------------------------- | |
3556 | ||
43cacb12 RS |
3557 | ;; Integer pack. Use UZP1 on the narrower type, which discards |
3558 | ;; the high part of each wide element. | |
3559 | (define_insn "vec_pack_trunc_<Vwide>" | |
3560 | [(set (match_operand:SVE_BHSI 0 "register_operand" "=w") | |
3561 | (unspec:SVE_BHSI | |
3562 | [(match_operand:<VWIDE> 1 "register_operand" "w") | |
3563 | (match_operand:<VWIDE> 2 "register_operand" "w")] | |
3564 | UNSPEC_PACK))] | |
3565 | "TARGET_SVE" | |
3566 | "uzp1\t%0.<Vetype>, %1.<Vetype>, %2.<Vetype>" | |
3567 | ) | |
3568 | ||
915d28fe RS |
3569 | ;; ------------------------------------------------------------------------- |
3570 | ;; ---- [INT<-INT] Unpacks | |
3571 | ;; ------------------------------------------------------------------------- | |
3572 | ;; Includes: | |
3573 | ;; - SUNPKHI | |
3574 | ;; - SUNPKLO | |
3575 | ;; - UUNPKHI | |
3576 | ;; - UUNPKLO | |
3577 | ;; ------------------------------------------------------------------------- | |
3578 | ||
3579 | ;; Unpack the low or high half of a vector, where "high" refers to | |
3580 | ;; the low-numbered lanes for big-endian and the high-numbered lanes | |
3581 | ;; for little-endian. | |
3582 | (define_expand "vec_unpack<su>_<perm_hilo>_<SVE_BHSI:mode>" | |
3583 | [(match_operand:<VWIDE> 0 "register_operand") | |
3584 | (unspec:<VWIDE> [(match_operand:SVE_BHSI 1 "register_operand")] UNPACK)] | |
43cacb12 RS |
3585 | "TARGET_SVE" |
3586 | { | |
915d28fe RS |
3587 | emit_insn ((<hi_lanes_optab> |
3588 | ? gen_aarch64_sve_<su>unpkhi_<SVE_BHSI:mode> | |
3589 | : gen_aarch64_sve_<su>unpklo_<SVE_BHSI:mode>) | |
3590 | (operands[0], operands[1])); | |
3591 | DONE; | |
3592 | } | |
3593 | ) | |
3594 | ||
3595 | (define_insn "aarch64_sve_<su>unpk<perm_hilo>_<SVE_BHSI:mode>" | |
3596 | [(set (match_operand:<VWIDE> 0 "register_operand" "=w") | |
3597 | (unspec:<VWIDE> [(match_operand:SVE_BHSI 1 "register_operand" "w")] | |
3598 | UNPACK))] | |
3599 | "TARGET_SVE" | |
3600 | "<su>unpk<perm_hilo>\t%0.<Vewtype>, %1.<Vetype>" | |
3601 | ) | |
3602 | ||
3603 | ;; ------------------------------------------------------------------------- | |
3604 | ;; ---- [INT<-FP] Conversions | |
3605 | ;; ------------------------------------------------------------------------- | |
3606 | ;; Includes: | |
3607 | ;; - FCVTZS | |
3608 | ;; - FCVTZU | |
3609 | ;; ------------------------------------------------------------------------- | |
3610 | ||
3611 | ;; Unpredicated conversion of floats to integers of the same size (HF to HI, | |
3612 | ;; SF to SI or DF to DI). | |
3613 | (define_expand "<fix_trunc_optab><mode><v_int_equiv>2" | |
3614 | [(set (match_operand:<V_INT_EQUIV> 0 "register_operand") | |
3615 | (unspec:<V_INT_EQUIV> | |
3616 | [(match_dup 2) | |
3617 | (FIXUORS:<V_INT_EQUIV> | |
3618 | (match_operand:SVE_F 1 "register_operand"))] | |
3619 | UNSPEC_MERGE_PTRUE))] | |
3620 | "TARGET_SVE" | |
3621 | { | |
3622 | operands[2] = aarch64_ptrue_reg (<VPRED>mode); | |
43cacb12 RS |
3623 | } |
3624 | ) | |
3625 | ||
915d28fe RS |
3626 | ;; Conversion of SF to DI, SI or HI, predicated with a PTRUE. |
3627 | (define_insn "*<fix_trunc_optab>v16hsf<mode>2" | |
3628 | [(set (match_operand:SVE_HSDI 0 "register_operand" "=w") | |
3629 | (unspec:SVE_HSDI | |
3630 | [(match_operand:<VPRED> 1 "register_operand" "Upl") | |
3631 | (FIXUORS:SVE_HSDI | |
3632 | (match_operand:VNx8HF 2 "register_operand" "w"))] | |
3633 | UNSPEC_MERGE_PTRUE))] | |
3634 | "TARGET_SVE" | |
3635 | "fcvtz<su>\t%0.<Vetype>, %1/m, %2.h" | |
3636 | ) | |
3637 | ||
3638 | ;; Conversion of SF to DI or SI, predicated with a PTRUE. | |
3639 | (define_insn "*<fix_trunc_optab>vnx4sf<mode>2" | |
3640 | [(set (match_operand:SVE_SDI 0 "register_operand" "=w") | |
3641 | (unspec:SVE_SDI | |
3642 | [(match_operand:<VPRED> 1 "register_operand" "Upl") | |
3643 | (FIXUORS:SVE_SDI | |
3644 | (match_operand:VNx4SF 2 "register_operand" "w"))] | |
3645 | UNSPEC_MERGE_PTRUE))] | |
3646 | "TARGET_SVE" | |
3647 | "fcvtz<su>\t%0.<Vetype>, %1/m, %2.s" | |
3648 | ) | |
3649 | ||
3650 | ;; Conversion of DF to DI or SI, predicated with a PTRUE. | |
3651 | (define_insn "*<fix_trunc_optab>vnx2df<mode>2" | |
3652 | [(set (match_operand:SVE_SDI 0 "register_operand" "=w") | |
3653 | (unspec:SVE_SDI | |
3654 | [(match_operand:VNx2BI 1 "register_operand" "Upl") | |
3655 | (FIXUORS:SVE_SDI | |
3656 | (match_operand:VNx2DF 2 "register_operand" "w"))] | |
3657 | UNSPEC_MERGE_PTRUE))] | |
3658 | "TARGET_SVE" | |
3659 | "fcvtz<su>\t%0.<Vetype>, %1/m, %2.d" | |
3660 | ) | |
3661 | ||
3662 | ;; ------------------------------------------------------------------------- | |
3663 | ;; ---- [INT<-FP] Packs | |
3664 | ;; ------------------------------------------------------------------------- | |
3665 | ;; The patterns in this section are synthetic. | |
3666 | ;; ------------------------------------------------------------------------- | |
3667 | ||
43cacb12 RS |
3668 | ;; Convert two vectors of DF to SI and pack the results into a single vector. |
3669 | (define_expand "vec_pack_<su>fix_trunc_vnx2df" | |
3670 | [(set (match_dup 4) | |
3671 | (unspec:VNx4SI | |
3672 | [(match_dup 3) | |
3673 | (FIXUORS:VNx4SI (match_operand:VNx2DF 1 "register_operand"))] | |
3674 | UNSPEC_MERGE_PTRUE)) | |
3675 | (set (match_dup 5) | |
3676 | (unspec:VNx4SI | |
3677 | [(match_dup 3) | |
3678 | (FIXUORS:VNx4SI (match_operand:VNx2DF 2 "register_operand"))] | |
3679 | UNSPEC_MERGE_PTRUE)) | |
3680 | (set (match_operand:VNx4SI 0 "register_operand") | |
3681 | (unspec:VNx4SI [(match_dup 4) (match_dup 5)] UNSPEC_UZP1))] | |
3682 | "TARGET_SVE" | |
3683 | { | |
16de3637 | 3684 | operands[3] = aarch64_ptrue_reg (VNx2BImode); |
43cacb12 RS |
3685 | operands[4] = gen_reg_rtx (VNx4SImode); |
3686 | operands[5] = gen_reg_rtx (VNx4SImode); | |
3687 | } | |
3688 | ) | |
f1739b48 | 3689 | |
915d28fe RS |
3690 | ;; ------------------------------------------------------------------------- |
3691 | ;; ---- [INT<-FP] Unpacks | |
3692 | ;; ------------------------------------------------------------------------- | |
3693 | ;; No patterns here yet! | |
3694 | ;; ------------------------------------------------------------------------- | |
9d4ac06e | 3695 | |
915d28fe RS |
3696 | ;; ------------------------------------------------------------------------- |
3697 | ;; ---- [FP<-INT] Conversions | |
3698 | ;; ------------------------------------------------------------------------- | |
3699 | ;; Includes: | |
3700 | ;; - SCVTF | |
3701 | ;; - UCVTF | |
3702 | ;; ------------------------------------------------------------------------- | |
a08acce8 | 3703 | |
915d28fe RS |
3704 | ;; Unpredicated conversion of integers to floats of the same size |
3705 | ;; (HI to HF, SI to SF or DI to DF). | |
3706 | (define_expand "<optab><v_int_equiv><mode>2" | |
3707 | [(set (match_operand:SVE_F 0 "register_operand") | |
a08acce8 | 3708 | (unspec:SVE_F |
915d28fe RS |
3709 | [(match_dup 2) |
3710 | (FLOATUORS:SVE_F | |
3711 | (match_operand:<V_INT_EQUIV> 1 "register_operand"))] | |
3712 | UNSPEC_MERGE_PTRUE))] | |
a08acce8 | 3713 | "TARGET_SVE" |
f4fde1b3 | 3714 | { |
915d28fe | 3715 | operands[2] = aarch64_ptrue_reg (<VPRED>mode); |
f4fde1b3 | 3716 | } |
b41d1f6e RS |
3717 | ) |
3718 | ||
915d28fe RS |
3719 | ;; Conversion of DI, SI or HI to the same number of HFs, predicated |
3720 | ;; with a PTRUE. | |
3721 | (define_insn "*<optab><mode>vnx8hf2" | |
3722 | [(set (match_operand:VNx8HF 0 "register_operand" "=w") | |
3723 | (unspec:VNx8HF | |
3724 | [(match_operand:<VPRED> 1 "register_operand" "Upl") | |
3725 | (FLOATUORS:VNx8HF | |
3726 | (match_operand:SVE_HSDI 2 "register_operand" "w"))] | |
3727 | UNSPEC_MERGE_PTRUE))] | |
3728 | "TARGET_SVE" | |
3729 | "<su_optab>cvtf\t%0.h, %1/m, %2.<Vetype>" | |
b41d1f6e RS |
3730 | ) |
3731 | ||
915d28fe RS |
3732 | ;; Conversion of DI or SI to the same number of SFs, predicated with a PTRUE. |
3733 | (define_insn "*<optab><mode>vnx4sf2" | |
3734 | [(set (match_operand:VNx4SF 0 "register_operand" "=w") | |
3735 | (unspec:VNx4SF | |
3736 | [(match_operand:<VPRED> 1 "register_operand" "Upl") | |
3737 | (FLOATUORS:VNx4SF | |
3738 | (match_operand:SVE_SDI 2 "register_operand" "w"))] | |
3739 | UNSPEC_MERGE_PTRUE))] | |
f1739b48 | 3740 | "TARGET_SVE" |
915d28fe | 3741 | "<su_optab>cvtf\t%0.s, %1/m, %2.<Vetype>" |
f1739b48 | 3742 | ) |
6c9c7b73 | 3743 | |
915d28fe RS |
3744 | ;; Conversion of DI or SI to DF, predicated with a PTRUE. |
3745 | (define_insn "aarch64_sve_<optab><mode>vnx2df2" | |
3746 | [(set (match_operand:VNx2DF 0 "register_operand" "=w") | |
3747 | (unspec:VNx2DF | |
3748 | [(match_operand:VNx2BI 1 "register_operand" "Upl") | |
3749 | (FLOATUORS:VNx2DF | |
3750 | (match_operand:SVE_SDI 2 "register_operand" "w"))] | |
3751 | UNSPEC_MERGE_PTRUE))] | |
6c9c7b73 | 3752 | "TARGET_SVE" |
915d28fe RS |
3753 | "<su_optab>cvtf\t%0.d, %1/m, %2.<Vetype>" |
3754 | ) | |
6c9c7b73 | 3755 | |
915d28fe RS |
3756 | ;; ------------------------------------------------------------------------- |
3757 | ;; ---- [FP<-INT] Packs | |
3758 | ;; ------------------------------------------------------------------------- | |
3759 | ;; No patterns here yet! | |
3760 | ;; ------------------------------------------------------------------------- | |
6c9c7b73 | 3761 | |
915d28fe RS |
3762 | ;; ------------------------------------------------------------------------- |
3763 | ;; ---- [FP<-INT] Unpacks | |
3764 | ;; ------------------------------------------------------------------------- | |
3765 | ;; The patterns in this section are synthetic. | |
3766 | ;; ------------------------------------------------------------------------- | |
3767 | ||
3768 | ;; Unpack one half of a VNx4SI to VNx2DF. First unpack from VNx4SI | |
3769 | ;; to VNx2DI, reinterpret the VNx2DI as a VNx4SI, then convert the | |
3770 | ;; unpacked VNx4SI to VNx2DF. | |
3771 | (define_expand "vec_unpack<su_optab>_float_<perm_hilo>_vnx4si" | |
3772 | [(match_operand:VNx2DF 0 "register_operand") | |
3773 | (FLOATUORS:VNx2DF | |
3774 | (unspec:VNx2DI [(match_operand:VNx4SI 1 "register_operand")] | |
3775 | UNPACK_UNSIGNED))] | |
3776 | "TARGET_SVE" | |
3777 | { | |
3778 | /* Use ZIP to do the unpack, since we don't care about the upper halves | |
3779 | and since it has the nice property of not needing any subregs. | |
3780 | If using UUNPK* turns out to be preferable, we could model it as | |
3781 | a ZIP whose first operand is zero. */ | |
3782 | rtx temp = gen_reg_rtx (VNx4SImode); | |
3783 | emit_insn ((<hi_lanes_optab> | |
3784 | ? gen_aarch64_sve_zip2vnx4si | |
3785 | : gen_aarch64_sve_zip1vnx4si) | |
3786 | (temp, operands[1], operands[1])); | |
3787 | rtx ptrue = aarch64_ptrue_reg (VNx2BImode); | |
3788 | emit_insn (gen_aarch64_sve_<FLOATUORS:optab>vnx4sivnx2df2 (operands[0], | |
3789 | ptrue, temp)); | |
6c9c7b73 AM |
3790 | DONE; |
3791 | } | |
3792 | ) | |
3793 | ||
915d28fe RS |
3794 | ;; ------------------------------------------------------------------------- |
3795 | ;; ---- [FP<-FP] Packs | |
3796 | ;; ------------------------------------------------------------------------- | |
3797 | ;; Includes: | |
3798 | ;; - FCVT | |
3799 | ;; ------------------------------------------------------------------------- | |
3800 | ||
3801 | ;; Convert two vectors of DF to SF, or two vectors of SF to HF, and pack | |
3802 | ;; the results into a single vector. | |
3803 | (define_expand "vec_pack_trunc_<Vwide>" | |
3804 | [(set (match_dup 4) | |
3805 | (unspec:SVE_HSF | |
3806 | [(match_dup 3) | |
3807 | (unspec:SVE_HSF [(match_operand:<VWIDE> 1 "register_operand")] | |
3808 | UNSPEC_FLOAT_CONVERT)] | |
3809 | UNSPEC_MERGE_PTRUE)) | |
3810 | (set (match_dup 5) | |
3811 | (unspec:SVE_HSF | |
3812 | [(match_dup 3) | |
3813 | (unspec:SVE_HSF [(match_operand:<VWIDE> 2 "register_operand")] | |
3814 | UNSPEC_FLOAT_CONVERT)] | |
3815 | UNSPEC_MERGE_PTRUE)) | |
3816 | (set (match_operand:SVE_HSF 0 "register_operand") | |
3817 | (unspec:SVE_HSF [(match_dup 4) (match_dup 5)] UNSPEC_UZP1))] | |
6c9c7b73 AM |
3818 | "TARGET_SVE" |
3819 | { | |
915d28fe RS |
3820 | operands[3] = aarch64_ptrue_reg (<VWIDE_PRED>mode); |
3821 | operands[4] = gen_reg_rtx (<MODE>mode); | |
3822 | operands[5] = gen_reg_rtx (<MODE>mode); | |
6c9c7b73 AM |
3823 | } |
3824 | ) | |
9feeafd7 | 3825 | |
915d28fe RS |
3826 | ;; Conversion of DFs to the same number of SFs, or SFs to the same number |
3827 | ;; of HFs. | |
3828 | (define_insn "*trunc<Vwide><mode>2" | |
3829 | [(set (match_operand:SVE_HSF 0 "register_operand" "=w") | |
3830 | (unspec:SVE_HSF | |
3831 | [(match_operand:<VWIDE_PRED> 1 "register_operand" "Upl") | |
3832 | (unspec:SVE_HSF | |
3833 | [(match_operand:<VWIDE> 2 "register_operand" "w")] | |
3834 | UNSPEC_FLOAT_CONVERT)] | |
3835 | UNSPEC_MERGE_PTRUE))] | |
9feeafd7 | 3836 | "TARGET_SVE" |
915d28fe | 3837 | "fcvt\t%0.<Vetype>, %1/m, %2.<Vewtype>" |
9feeafd7 | 3838 | ) |
a9fad8fe | 3839 | |
915d28fe RS |
3840 | ;; ------------------------------------------------------------------------- |
3841 | ;; ---- [FP<-FP] Unpacks | |
3842 | ;; ------------------------------------------------------------------------- | |
3843 | ;; Includes: | |
3844 | ;; - FCVT | |
3845 | ;; ------------------------------------------------------------------------- | |
3846 | ||
3847 | ;; Unpack one half of a VNx4SF to VNx2DF, or one half of a VNx8HF to VNx4SF. | |
3848 | ;; First unpack the source without conversion, then float-convert the | |
3849 | ;; unpacked source. | |
3850 | (define_expand "vec_unpacks_<perm_hilo>_<mode>" | |
3851 | [(match_operand:<VWIDE> 0 "register_operand") | |
3852 | (unspec:SVE_HSF [(match_operand:SVE_HSF 1 "register_operand")] | |
3853 | UNPACK_UNSIGNED)] | |
a9fad8fe AM |
3854 | "TARGET_SVE" |
3855 | { | |
915d28fe RS |
3856 | /* Use ZIP to do the unpack, since we don't care about the upper halves |
3857 | and since it has the nice property of not needing any subregs. | |
3858 | If using UUNPK* turns out to be preferable, we could model it as | |
3859 | a ZIP whose first operand is zero. */ | |
3860 | rtx temp = gen_reg_rtx (<MODE>mode); | |
3861 | emit_insn ((<hi_lanes_optab> | |
3862 | ? gen_aarch64_sve_zip2<mode> | |
3863 | : gen_aarch64_sve_zip1<mode>) | |
3864 | (temp, operands[1], operands[1])); | |
3865 | rtx ptrue = aarch64_ptrue_reg (<VWIDE_PRED>mode); | |
3866 | emit_insn (gen_aarch64_sve_extend<mode><Vwide>2 (operands[0], | |
3867 | ptrue, temp)); | |
a9fad8fe AM |
3868 | DONE; |
3869 | } | |
3870 | ) | |
3871 | ||
915d28fe RS |
3872 | ;; Conversion of SFs to the same number of DFs, or HFs to the same number |
3873 | ;; of SFs. | |
3874 | (define_insn "aarch64_sve_extend<mode><Vwide>2" | |
3875 | [(set (match_operand:<VWIDE> 0 "register_operand" "=w") | |
3876 | (unspec:<VWIDE> | |
3877 | [(match_operand:<VWIDE_PRED> 1 "register_operand" "Upl") | |
3878 | (unspec:<VWIDE> | |
3879 | [(match_operand:SVE_HSF 2 "register_operand" "w")] | |
3880 | UNSPEC_FLOAT_CONVERT)] | |
a9fad8fe AM |
3881 | UNSPEC_MERGE_PTRUE))] |
3882 | "TARGET_SVE" | |
915d28fe | 3883 | "fcvt\t%0.<Vewtype>, %1/m, %2.<Vetype>" |
a9fad8fe AM |
3884 | ) |
3885 | ||
915d28fe RS |
3886 | ;; ------------------------------------------------------------------------- |
3887 | ;; ---- [PRED<-PRED] Packs | |
3888 | ;; ------------------------------------------------------------------------- | |
3889 | ;; Includes: | |
3890 | ;; - UZP1 | |
3891 | ;; ------------------------------------------------------------------------- | |
a9fad8fe | 3892 | |
915d28fe RS |
3893 | ;; Predicate pack. Use UZP1 on the narrower type, which discards |
3894 | ;; the high part of each wide element. | |
3895 | (define_insn "vec_pack_trunc_<Vwide>" | |
3896 | [(set (match_operand:PRED_BHS 0 "register_operand" "=Upa") | |
3897 | (unspec:PRED_BHS | |
3898 | [(match_operand:<VWIDE> 1 "register_operand" "Upa") | |
3899 | (match_operand:<VWIDE> 2 "register_operand" "Upa")] | |
3900 | UNSPEC_PACK))] | |
a9fad8fe | 3901 | "TARGET_SVE" |
915d28fe | 3902 | "uzp1\t%0.<Vetype>, %1.<Vetype>, %2.<Vetype>" |
a9fad8fe | 3903 | ) |
3a0afad0 | 3904 | |
915d28fe RS |
3905 | ;; ------------------------------------------------------------------------- |
3906 | ;; ---- [PRED<-PRED] Unpacks | |
3907 | ;; ------------------------------------------------------------------------- | |
3908 | ;; Includes: | |
3909 | ;; - PUNPKHI | |
3910 | ;; - PUNPKLO | |
3911 | ;; ------------------------------------------------------------------------- | |
3912 | ||
3913 | ;; Unpack the low or high half of a predicate, where "high" refers to | |
3914 | ;; the low-numbered lanes for big-endian and the high-numbered lanes | |
3915 | ;; for little-endian. | |
3916 | (define_expand "vec_unpack<su>_<perm_hilo>_<mode>" | |
3917 | [(match_operand:<VWIDE> 0 "register_operand") | |
3918 | (unspec:<VWIDE> [(match_operand:PRED_BHS 1 "register_operand")] | |
3919 | UNPACK)] | |
3a0afad0 PK |
3920 | "TARGET_SVE" |
3921 | { | |
915d28fe RS |
3922 | emit_insn ((<hi_lanes_optab> |
3923 | ? gen_aarch64_sve_punpkhi_<PRED_BHS:mode> | |
3924 | : gen_aarch64_sve_punpklo_<PRED_BHS:mode>) | |
3925 | (operands[0], operands[1])); | |
3a0afad0 PK |
3926 | DONE; |
3927 | } | |
3928 | ) | |
915d28fe RS |
3929 | |
3930 | (define_insn "aarch64_sve_punpk<perm_hilo>_<mode>" | |
3931 | [(set (match_operand:<VWIDE> 0 "register_operand" "=Upa") | |
3932 | (unspec:<VWIDE> [(match_operand:PRED_BHS 1 "register_operand" "Upa")] | |
3933 | UNPACK_UNSIGNED))] | |
3934 | "TARGET_SVE" | |
3935 | "punpk<perm_hilo>\t%0.h, %1.b" | |
3936 | ) |