]>
Commit | Line | Data |
---|---|---|
43cacb12 | 1 | ;; Machine description for AArch64 SVE. |
7adcbafe | 2 | ;; Copyright (C) 2009-2022 Free Software Foundation, Inc. |
43cacb12 RS |
3 | ;; Contributed by ARM Ltd. |
4 | ;; | |
5 | ;; This file is part of GCC. | |
6 | ;; | |
7 | ;; GCC is free software; you can redistribute it and/or modify it | |
8 | ;; under the terms of the GNU General Public License as published by | |
9 | ;; the Free Software Foundation; either version 3, or (at your option) | |
10 | ;; any later version. | |
11 | ;; | |
12 | ;; GCC is distributed in the hope that it will be useful, but | |
13 | ;; WITHOUT ANY WARRANTY; without even the implied warranty of | |
14 | ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
15 | ;; General Public License for more details. | |
16 | ;; | |
17 | ;; You should have received a copy of the GNU General Public License | |
18 | ;; along with GCC; see the file COPYING3. If not see | |
19 | ;; <http://www.gnu.org/licenses/>. | |
20 | ||
915d28fe RS |
21 | ;; The file is organised into the following sections (search for the full |
22 | ;; line): | |
23 | ;; | |
24 | ;; == General notes | |
25 | ;; ---- Note on the handling of big-endian SVE | |
34467289 | 26 | ;; ---- Description of UNSPEC_PTEST |
00fa90d9 | 27 | ;; ---- Description of UNSPEC_PRED_Z |
06308276 | 28 | ;; ---- Note on predicated integer arithemtic and UNSPEC_PRED_X |
c9c5a809 | 29 | ;; ---- Note on predicated FP arithmetic patterns and GP "strictness" |
624d0f07 | 30 | ;; ---- Note on FFR handling |
915d28fe RS |
31 | ;; |
32 | ;; == Moves | |
33 | ;; ---- Moves of single vectors | |
34 | ;; ---- Moves of multiple vectors | |
35 | ;; ---- Moves of predicates | |
624d0f07 | 36 | ;; ---- Moves relating to the FFR |
915d28fe RS |
37 | ;; |
38 | ;; == Loads | |
39 | ;; ---- Normal contiguous loads | |
624d0f07 RS |
40 | ;; ---- Extending contiguous loads |
41 | ;; ---- First-faulting contiguous loads | |
42 | ;; ---- First-faulting extending contiguous loads | |
43 | ;; ---- Non-temporal contiguous loads | |
915d28fe | 44 | ;; ---- Normal gather loads |
624d0f07 RS |
45 | ;; ---- Extending gather loads |
46 | ;; ---- First-faulting gather loads | |
47 | ;; ---- First-faulting extending gather loads | |
48 | ;; | |
49 | ;; == Prefetches | |
50 | ;; ---- Contiguous prefetches | |
51 | ;; ---- Gather prefetches | |
915d28fe RS |
52 | ;; |
53 | ;; == Stores | |
54 | ;; ---- Normal contiguous stores | |
624d0f07 RS |
55 | ;; ---- Truncating contiguous stores |
56 | ;; ---- Non-temporal contiguous stores | |
915d28fe | 57 | ;; ---- Normal scatter stores |
624d0f07 | 58 | ;; ---- Truncating scatter stores |
915d28fe RS |
59 | ;; |
60 | ;; == Vector creation | |
61 | ;; ---- [INT,FP] Duplicate element | |
62 | ;; ---- [INT,FP] Initialize from individual elements | |
63 | ;; ---- [INT] Linear series | |
64 | ;; ---- [PRED] Duplicate element | |
65 | ;; | |
66 | ;; == Vector decomposition | |
67 | ;; ---- [INT,FP] Extract index | |
68 | ;; ---- [INT,FP] Extract active element | |
69 | ;; ---- [PRED] Extract index | |
70 | ;; | |
71 | ;; == Unary arithmetic | |
72 | ;; ---- [INT] General unary arithmetic corresponding to rtx codes | |
d7a09c44 | 73 | ;; ---- [INT] General unary arithmetic corresponding to unspecs |
e58703e2 | 74 | ;; ---- [INT] Sign and zero extension |
2d56600c | 75 | ;; ---- [INT] Truncation |
e0a0be93 | 76 | ;; ---- [INT] Logical inverse |
624d0f07 | 77 | ;; ---- [FP<-INT] General unary arithmetic that maps to unspecs |
d45b20a5 | 78 | ;; ---- [FP] General unary arithmetic corresponding to unspecs |
a0ee8352 RS |
79 | ;; ---- [FP] Square root |
80 | ;; ---- [FP] Reciprocal square root | |
915d28fe RS |
81 | ;; ---- [PRED] Inverse |
82 | ||
83 | ;; == Binary arithmetic | |
84 | ;; ---- [INT] General binary arithmetic corresponding to rtx codes | |
85 | ;; ---- [INT] Addition | |
86 | ;; ---- [INT] Subtraction | |
a229966c | 87 | ;; ---- [INT] Take address |
915d28fe | 88 | ;; ---- [INT] Absolute difference |
624d0f07 | 89 | ;; ---- [INT] Saturating addition and subtraction |
915d28fe RS |
90 | ;; ---- [INT] Highpart multiplication |
91 | ;; ---- [INT] Division | |
92 | ;; ---- [INT] Binary logical operations | |
93 | ;; ---- [INT] Binary logical operations (inverted second input) | |
624d0f07 | 94 | ;; ---- [INT] Shifts (rounding towards -Inf) |
c0c2f013 | 95 | ;; ---- [INT] Shifts (rounding towards 0) |
624d0f07 | 96 | ;; ---- [FP<-INT] General binary arithmetic corresponding to unspecs |
915d28fe RS |
97 | ;; ---- [FP] General binary arithmetic corresponding to rtx codes |
98 | ;; ---- [FP] General binary arithmetic corresponding to unspecs | |
99 | ;; ---- [FP] Addition | |
624d0f07 | 100 | ;; ---- [FP] Complex addition |
915d28fe RS |
101 | ;; ---- [FP] Subtraction |
102 | ;; ---- [FP] Absolute difference | |
103 | ;; ---- [FP] Multiplication | |
04f307cb | 104 | ;; ---- [FP] Division |
915d28fe RS |
105 | ;; ---- [FP] Binary logical operations |
106 | ;; ---- [FP] Sign copying | |
107 | ;; ---- [FP] Maximum and minimum | |
108 | ;; ---- [PRED] Binary logical operations | |
109 | ;; ---- [PRED] Binary logical operations (inverted second input) | |
110 | ;; ---- [PRED] Binary logical operations (inverted result) | |
111 | ;; | |
112 | ;; == Ternary arithmetic | |
113 | ;; ---- [INT] MLA and MAD | |
114 | ;; ---- [INT] MLS and MSB | |
115 | ;; ---- [INT] Dot product | |
116 | ;; ---- [INT] Sum of absolute differences | |
36696774 | 117 | ;; ---- [INT] Matrix multiply-accumulate |
915d28fe | 118 | ;; ---- [FP] General ternary arithmetic corresponding to unspecs |
624d0f07 RS |
119 | ;; ---- [FP] Complex multiply-add |
120 | ;; ---- [FP] Trigonometric multiply-add | |
896dff99 | 121 | ;; ---- [FP] Bfloat16 long ternary arithmetic (SF,BF,BF) |
36696774 | 122 | ;; ---- [FP] Matrix multiply-accumulate |
915d28fe RS |
123 | ;; |
124 | ;; == Comparisons and selects | |
125 | ;; ---- [INT,FP] Select based on predicates | |
126 | ;; ---- [INT,FP] Compare and select | |
127 | ;; ---- [INT] Comparisons | |
128 | ;; ---- [INT] While tests | |
42b4e87d RS |
129 | ;; ---- [FP] Direct comparisons |
130 | ;; ---- [FP] Absolute comparisons | |
624d0f07 | 131 | ;; ---- [PRED] Select |
915d28fe RS |
132 | ;; ---- [PRED] Test bits |
133 | ;; | |
134 | ;; == Reductions | |
135 | ;; ---- [INT,FP] Conditional reductions | |
136 | ;; ---- [INT] Tree reductions | |
137 | ;; ---- [FP] Tree reductions | |
138 | ;; ---- [FP] Left-to-right reductions | |
139 | ;; | |
140 | ;; == Permutes | |
141 | ;; ---- [INT,FP] General permutes | |
142 | ;; ---- [INT,FP] Special-purpose unary permutes | |
143 | ;; ---- [INT,FP] Special-purpose binary permutes | |
28350fd1 | 144 | ;; ---- [PRED] Special-purpose unary permutes |
915d28fe RS |
145 | ;; ---- [PRED] Special-purpose binary permutes |
146 | ;; | |
147 | ;; == Conversions | |
148 | ;; ---- [INT<-INT] Packs | |
149 | ;; ---- [INT<-INT] Unpacks | |
150 | ;; ---- [INT<-FP] Conversions | |
151 | ;; ---- [INT<-FP] Packs | |
152 | ;; ---- [INT<-FP] Unpacks | |
153 | ;; ---- [FP<-INT] Conversions | |
154 | ;; ---- [FP<-INT] Packs | |
155 | ;; ---- [FP<-INT] Unpacks | |
156 | ;; ---- [FP<-FP] Packs | |
896dff99 | 157 | ;; ---- [FP<-FP] Packs (bfloat16) |
915d28fe RS |
158 | ;; ---- [FP<-FP] Unpacks |
159 | ;; ---- [PRED<-PRED] Packs | |
160 | ;; ---- [PRED<-PRED] Unpacks | |
624d0f07 RS |
161 | ;; |
162 | ;; == Vector partitioning | |
163 | ;; ---- [PRED] Unary partitioning | |
164 | ;; ---- [PRED] Binary partitioning | |
165 | ;; ---- [PRED] Scalarization | |
166 | ;; | |
167 | ;; == Counting elements | |
168 | ;; ---- [INT] Count elements in a pattern (scalar) | |
169 | ;; ---- [INT] Increment by the number of elements in a pattern (scalar) | |
170 | ;; ---- [INT] Increment by the number of elements in a pattern (vector) | |
171 | ;; ---- [INT] Decrement by the number of elements in a pattern (scalar) | |
172 | ;; ---- [INT] Decrement by the number of elements in a pattern (vector) | |
173 | ;; ---- [INT] Count elements in a predicate (scalar) | |
174 | ;; ---- [INT] Increment by the number of elements in a predicate (scalar) | |
175 | ;; ---- [INT] Increment by the number of elements in a predicate (vector) | |
176 | ;; ---- [INT] Decrement by the number of elements in a predicate (scalar) | |
177 | ;; ---- [INT] Decrement by the number of elements in a predicate (vector) | |
915d28fe RS |
178 | |
179 | ;; ========================================================================= | |
180 | ;; == General notes | |
181 | ;; ========================================================================= | |
182 | ;; | |
183 | ;; ------------------------------------------------------------------------- | |
184 | ;; ---- Note on the handling of big-endian SVE | |
185 | ;; ------------------------------------------------------------------------- | |
43cacb12 RS |
186 | ;; |
187 | ;; On big-endian systems, Advanced SIMD mov<mode> patterns act in the | |
188 | ;; same way as movdi or movti would: the first byte of memory goes | |
189 | ;; into the most significant byte of the register and the last byte | |
190 | ;; of memory goes into the least significant byte of the register. | |
191 | ;; This is the most natural ordering for Advanced SIMD and matches | |
192 | ;; the ABI layout for 64-bit and 128-bit vector types. | |
193 | ;; | |
194 | ;; As a result, the order of bytes within the register is what GCC | |
195 | ;; expects for a big-endian target, and subreg offsets therefore work | |
196 | ;; as expected, with the first element in memory having subreg offset 0 | |
197 | ;; and the last element in memory having the subreg offset associated | |
198 | ;; with a big-endian lowpart. However, this ordering also means that | |
199 | ;; GCC's lane numbering does not match the architecture's numbering: | |
200 | ;; GCC always treats the element at the lowest address in memory | |
201 | ;; (subreg offset 0) as element 0, while the architecture treats | |
202 | ;; the least significant end of the register as element 0. | |
203 | ;; | |
204 | ;; The situation for SVE is different. We want the layout of the | |
205 | ;; SVE register to be same for mov<mode> as it is for maskload<mode>: | |
206 | ;; logically, a mov<mode> load must be indistinguishable from a | |
207 | ;; maskload<mode> whose mask is all true. We therefore need the | |
208 | ;; register layout to match LD1 rather than LDR. The ABI layout of | |
209 | ;; SVE types also matches LD1 byte ordering rather than LDR byte ordering. | |
210 | ;; | |
211 | ;; As a result, the architecture lane numbering matches GCC's lane | |
212 | ;; numbering, with element 0 always being the first in memory. | |
213 | ;; However: | |
214 | ;; | |
215 | ;; - Applying a subreg offset to a register does not give the element | |
216 | ;; that GCC expects: the first element in memory has the subreg offset | |
217 | ;; associated with a big-endian lowpart while the last element in memory | |
218 | ;; has subreg offset 0. We handle this via TARGET_CAN_CHANGE_MODE_CLASS. | |
219 | ;; | |
220 | ;; - We cannot use LDR and STR for spill slots that might be accessed | |
221 | ;; via subregs, since although the elements have the order GCC expects, | |
222 | ;; the order of the bytes within the elements is different. We instead | |
223 | ;; access spill slots via LD1 and ST1, using secondary reloads to | |
224 | ;; reserve a predicate register. | |
34467289 RS |
225 | ;; |
226 | ;; ------------------------------------------------------------------------- | |
227 | ;; ---- Description of UNSPEC_PTEST | |
228 | ;; ------------------------------------------------------------------------- | |
229 | ;; | |
230 | ;; SVE provides a PTEST instruction for testing the active lanes of a | |
231 | ;; predicate and setting the flags based on the result. The associated | |
232 | ;; condition code tests are: | |
233 | ;; | |
234 | ;; - any (= ne): at least one active bit is set | |
235 | ;; - none (= eq): all active bits are clear (*) | |
236 | ;; - first (= mi): the first active bit is set | |
237 | ;; - nfrst (= pl): the first active bit is clear (*) | |
238 | ;; - last (= cc): the last active bit is set | |
239 | ;; - nlast (= cs): the last active bit is clear (*) | |
240 | ;; | |
241 | ;; where the conditions marked (*) are also true when there are no active | |
242 | ;; lanes (i.e. when the governing predicate is a PFALSE). The flags results | |
243 | ;; of a PTEST use the condition code mode CC_NZC. | |
244 | ;; | |
245 | ;; PTEST is always a .B operation (i.e. it always operates on VNx16BI). | |
246 | ;; This means that for other predicate modes, we need a governing predicate | |
247 | ;; in which all bits are defined. | |
248 | ;; | |
249 | ;; For example, most predicated .H operations ignore the odd bits of the | |
250 | ;; governing predicate, so that an active lane is represented by the | |
251 | ;; bits "1x" and an inactive lane by the bits "0x", where "x" can be | |
252 | ;; any value. To test a .H predicate, we instead need "10" and "00" | |
253 | ;; respectively, so that the condition only tests the even bits of the | |
254 | ;; predicate. | |
255 | ;; | |
256 | ;; Several instructions set the flags as a side-effect, in the same way | |
257 | ;; that a separate PTEST would. It's important for code quality that we | |
258 | ;; use these flags results as often as possible, particularly in the case | |
259 | ;; of WHILE* and RDFFR. | |
260 | ;; | |
261 | ;; Also, some of the instructions that set the flags are unpredicated | |
262 | ;; and instead implicitly test all .B, .H, .S or .D elements, as though | |
263 | ;; they were predicated on a PTRUE of that size. For example, a .S | |
264 | ;; WHILELO sets the flags in the same way as a PTEST with a .S PTRUE | |
265 | ;; would. | |
266 | ;; | |
267 | ;; We therefore need to represent PTEST operations in a way that | |
268 | ;; makes it easy to combine them with both predicated and unpredicated | |
269 | ;; operations, while using a VNx16BI governing predicate for all | |
270 | ;; predicate modes. We do this using: | |
271 | ;; | |
272 | ;; (unspec:CC_NZC [gp cast_gp ptrue_flag op] UNSPEC_PTEST) | |
273 | ;; | |
274 | ;; where: | |
275 | ;; | |
276 | ;; - GP is the real VNx16BI governing predicate | |
277 | ;; | |
278 | ;; - CAST_GP is GP cast to the mode of OP. All bits dropped by casting | |
279 | ;; GP to CAST_GP are guaranteed to be clear in GP. | |
280 | ;; | |
281 | ;; - PTRUE_FLAG is a CONST_INT (conceptually of mode SI) that has the value | |
282 | ;; SVE_KNOWN_PTRUE if we know that CAST_GP (rather than GP) is all-true and | |
283 | ;; SVE_MAYBE_NOT_PTRUE otherwise. | |
284 | ;; | |
285 | ;; - OP is the predicate we want to test, of the same mode as CAST_GP. | |
c9c5a809 RS |
286 | ;; |
287 | ;; ------------------------------------------------------------------------- | |
00fa90d9 RS |
288 | ;; ---- Description of UNSPEC_PRED_Z |
289 | ;; ------------------------------------------------------------------------- | |
290 | ;; | |
291 | ;; SVE integer comparisons are predicated and return zero for inactive | |
292 | ;; lanes. Sometimes we use them with predicates that are all-true and | |
293 | ;; sometimes we use them with general predicates. | |
294 | ;; | |
295 | ;; The integer comparisons also set the flags and so build-in the effect | |
296 | ;; of a PTEST. We therefore want to be able to combine integer comparison | |
297 | ;; patterns with PTESTs of the result. One difficulty with doing this is | |
298 | ;; that (as noted above) the PTEST is always a .B operation and so can place | |
299 | ;; stronger requirements on the governing predicate than the comparison does. | |
300 | ;; | |
301 | ;; For example, when applying a separate PTEST to the result of a full-vector | |
302 | ;; .H comparison, the PTEST must be predicated on a .H PTRUE instead of a | |
303 | ;; .B PTRUE. In constrast, the comparison might be predicated on either | |
304 | ;; a .H PTRUE or a .B PTRUE, since the values of odd-indexed predicate | |
305 | ;; bits don't matter for .H operations. | |
306 | ;; | |
307 | ;; We therefore can't rely on a full-vector comparison using the same | |
308 | ;; predicate register as a following PTEST. We instead need to remember | |
309 | ;; whether a comparison is known to be a full-vector comparison and use | |
310 | ;; this information in addition to a check for equal predicate registers. | |
311 | ;; At the same time, it's useful to have a common representation for all | |
312 | ;; integer comparisons, so that they can be handled by a single set of | |
313 | ;; patterns. | |
314 | ;; | |
315 | ;; We therefore take a similar approach to UNSPEC_PTEST above and use: | |
316 | ;; | |
317 | ;; (unspec:<M:VPRED> [gp ptrue_flag (code:M op0 op1)] UNSPEC_PRED_Z) | |
318 | ;; | |
319 | ;; where: | |
320 | ;; | |
321 | ;; - GP is the governing predicate, of mode <M:VPRED> | |
322 | ;; | |
323 | ;; - PTRUE_FLAG is a CONST_INT (conceptually of mode SI) that has the value | |
324 | ;; SVE_KNOWN_PTRUE if we know that GP is all-true and SVE_MAYBE_NOT_PTRUE | |
325 | ;; otherwise | |
326 | ;; | |
327 | ;; - CODE is the comparison code | |
328 | ;; | |
329 | ;; - OP0 and OP1 are the values being compared, of mode M | |
330 | ;; | |
331 | ;; The "Z" in UNSPEC_PRED_Z indicates that inactive lanes are zero. | |
332 | ;; | |
333 | ;; ------------------------------------------------------------------------- | |
06308276 RS |
334 | ;; ---- Note on predicated integer arithemtic and UNSPEC_PRED_X |
335 | ;; ------------------------------------------------------------------------- | |
336 | ;; | |
337 | ;; Many SVE integer operations are predicated. We can generate them | |
338 | ;; from four sources: | |
339 | ;; | |
340 | ;; (1) Using normal unpredicated optabs. In this case we need to create | |
341 | ;; an all-true predicate register to act as the governing predicate | |
342 | ;; for the SVE instruction. There are no inactive lanes, and thus | |
343 | ;; the values of inactive lanes don't matter. | |
344 | ;; | |
345 | ;; (2) Using _x ACLE functions. In this case the function provides a | |
346 | ;; specific predicate and some lanes might be inactive. However, | |
347 | ;; as for (1), the values of the inactive lanes don't matter. | |
348 | ;; We can make extra lanes active without changing the behavior | |
349 | ;; (although for code-quality reasons we should avoid doing so | |
350 | ;; needlessly). | |
351 | ;; | |
352 | ;; (3) Using cond_* optabs that correspond to IFN_COND_* internal functions. | |
353 | ;; These optabs have a predicate operand that specifies which lanes are | |
354 | ;; active and another operand that provides the values of inactive lanes. | |
355 | ;; | |
356 | ;; (4) Using _m and _z ACLE functions. These functions map to the same | |
357 | ;; patterns as (3), with the _z functions setting inactive lanes to zero | |
358 | ;; and the _m functions setting the inactive lanes to one of the function | |
359 | ;; arguments. | |
360 | ;; | |
361 | ;; For (1) and (2) we need a way of attaching the predicate to a normal | |
362 | ;; unpredicated integer operation. We do this using: | |
363 | ;; | |
364 | ;; (unspec:M [pred (code:M (op0 op1 ...))] UNSPEC_PRED_X) | |
365 | ;; | |
366 | ;; where (code:M (op0 op1 ...)) is the normal integer operation and PRED | |
367 | ;; is a predicate of mode <M:VPRED>. PRED might or might not be a PTRUE; | |
368 | ;; it always is for (1), but might not be for (2). | |
369 | ;; | |
370 | ;; The unspec as a whole has the same value as (code:M ...) when PRED is | |
371 | ;; all-true. It is always semantically valid to replace PRED with a PTRUE, | |
372 | ;; but as noted above, we should only do so if there's a specific benefit. | |
373 | ;; | |
374 | ;; (The "_X" in the unspec is named after the ACLE functions in (2).) | |
375 | ;; | |
376 | ;; For (3) and (4) we can simply use the SVE port's normal representation | |
377 | ;; of a predicate-based select: | |
378 | ;; | |
379 | ;; (unspec:M [pred (code:M (op0 op1 ...)) inactive] UNSPEC_SEL) | |
380 | ;; | |
381 | ;; where INACTIVE specifies the values of inactive lanes. | |
382 | ;; | |
383 | ;; We can also use the UNSPEC_PRED_X wrapper in the UNSPEC_SEL rather | |
384 | ;; than inserting the integer operation directly. This is mostly useful | |
385 | ;; if we want the combine pass to merge an integer operation with an explicit | |
386 | ;; vcond_mask (in other words, with a following SEL instruction). However, | |
387 | ;; it's generally better to merge such operations at the gimple level | |
388 | ;; using (3). | |
389 | ;; | |
390 | ;; ------------------------------------------------------------------------- | |
c9c5a809 RS |
391 | ;; ---- Note on predicated FP arithmetic patterns and GP "strictness" |
392 | ;; ------------------------------------------------------------------------- | |
393 | ;; | |
394 | ;; Most SVE floating-point operations are predicated. We can generate | |
395 | ;; them from four sources: | |
396 | ;; | |
397 | ;; (1) Using normal unpredicated optabs. In this case we need to create | |
398 | ;; an all-true predicate register to act as the governing predicate | |
399 | ;; for the SVE instruction. There are no inactive lanes, and thus | |
400 | ;; the values of inactive lanes don't matter. | |
401 | ;; | |
402 | ;; (2) Using _x ACLE functions. In this case the function provides a | |
403 | ;; specific predicate and some lanes might be inactive. However, | |
404 | ;; as for (1), the values of the inactive lanes don't matter. | |
405 | ;; | |
406 | ;; The instruction must have the same exception behavior as the | |
407 | ;; function call unless things like command-line flags specifically | |
408 | ;; allow otherwise. For example, with -ffast-math, it is OK to | |
409 | ;; raise exceptions for inactive lanes, but normally it isn't. | |
410 | ;; | |
411 | ;; (3) Using cond_* optabs that correspond to IFN_COND_* internal functions. | |
412 | ;; These optabs have a predicate operand that specifies which lanes are | |
413 | ;; active and another operand that provides the values of inactive lanes. | |
414 | ;; | |
415 | ;; (4) Using _m and _z ACLE functions. These functions map to the same | |
416 | ;; patterns as (3), with the _z functions setting inactive lanes to zero | |
417 | ;; and the _m functions setting the inactive lanes to one of the function | |
418 | ;; arguments. | |
419 | ;; | |
420 | ;; So: | |
421 | ;; | |
422 | ;; - In (1), the predicate is known to be all true and the pattern can use | |
423 | ;; unpredicated operations where available. | |
424 | ;; | |
425 | ;; - In (2), the predicate might or might not be all true. The pattern can | |
426 | ;; use unpredicated instructions if the predicate is all-true or if things | |
427 | ;; like command-line flags allow exceptions for inactive lanes. | |
428 | ;; | |
429 | ;; - (3) and (4) represent a native SVE predicated operation. Some lanes | |
430 | ;; might be inactive and inactive lanes of the result must have specific | |
431 | ;; values. There is no scope for using unpredicated instructions (and no | |
432 | ;; reason to want to), so the question about command-line flags doesn't | |
433 | ;; arise. | |
434 | ;; | |
435 | ;; It would be inaccurate to model (2) as an rtx code like (sqrt ...) | |
436 | ;; in combination with a separate predicate operand, e.g. | |
437 | ;; | |
438 | ;; (unspec [(match_operand:<VPRED> 1 "register_operand" "Upl") | |
f75cdd2c | 439 | ;; (sqrt:SVE_FULL_F 2 "register_operand" "w")] |
c9c5a809 RS |
440 | ;; ....) |
441 | ;; | |
442 | ;; because (sqrt ...) can raise an exception for any lane, including | |
443 | ;; inactive ones. We therefore need to use an unspec instead. | |
444 | ;; | |
445 | ;; Also, (2) requires some way of distinguishing the case in which the | |
446 | ;; predicate might have inactive lanes and cannot be changed from the | |
447 | ;; case in which the predicate has no inactive lanes or can be changed. | |
448 | ;; This information is also useful when matching combined FP patterns | |
449 | ;; in which the predicates might not be equal. | |
450 | ;; | |
451 | ;; We therefore model FP operations as an unspec of the form: | |
452 | ;; | |
453 | ;; (unspec [pred strictness op0 op1 ...] UNSPEC_COND_<MNEMONIC>) | |
454 | ;; | |
455 | ;; where: | |
456 | ;; | |
457 | ;; - PRED is the governing predicate. | |
458 | ;; | |
459 | ;; - STRICTNESS is a CONST_INT that conceptually has mode SI. It has the | |
460 | ;; value SVE_STRICT_GP if PRED might have inactive lanes and if those | |
461 | ;; lanes must remain inactive. It has the value SVE_RELAXED_GP otherwise. | |
462 | ;; | |
463 | ;; - OP0 OP1 ... are the normal input operands to the operation. | |
464 | ;; | |
465 | ;; - MNEMONIC is the mnemonic of the associated SVE instruction. | |
624d0f07 | 466 | ;; |
0eb5e901 RS |
467 | ;; For (3) and (4), we combine these operations with an UNSPEC_SEL |
468 | ;; that selects between the result of the FP operation and the "else" | |
469 | ;; value. (This else value is a merge input for _m ACLE functions | |
470 | ;; and zero for _z ACLE functions.) The outer pattern then has the form: | |
471 | ;; | |
472 | ;; (unspec [pred fp_operation else_value] UNSPEC_SEL) | |
473 | ;; | |
474 | ;; This means that the patterns for (3) and (4) have two predicates: | |
475 | ;; one for the FP operation itself and one for the UNSPEC_SEL. | |
476 | ;; This pattern is equivalent to the result of combining an instance | |
477 | ;; of (1) or (2) with a separate vcond instruction, so these patterns | |
478 | ;; are useful as combine targets too. | |
479 | ;; | |
480 | ;; However, in the combine case, the instructions that we want to | |
481 | ;; combine might use different predicates. Then: | |
482 | ;; | |
483 | ;; - Some of the active lanes of the FP operation might be discarded | |
484 | ;; by the UNSPEC_SEL. It's OK to drop the FP operation on those lanes, | |
485 | ;; even for SVE_STRICT_GP, since the operations on those lanes are | |
486 | ;; effectively dead code. | |
487 | ;; | |
488 | ;; - Some of the inactive lanes of the FP operation might be selected | |
489 | ;; by the UNSPEC_SEL, giving unspecified values for those lanes. | |
490 | ;; SVE_RELAXED_GP lets us extend the FP operation to cover these | |
491 | ;; extra lanes, but SVE_STRICT_GP does not. | |
492 | ;; | |
493 | ;; Thus SVE_RELAXED_GP allows us to ignore the predicate on the FP operation | |
494 | ;; and operate on exactly the lanes selected by the UNSPEC_SEL predicate. | |
495 | ;; This typically leads to patterns like: | |
496 | ;; | |
497 | ;; (unspec [(match_operand 1 "register_operand" "Upl") | |
498 | ;; (unspec [(match_operand N) | |
499 | ;; (const_int SVE_RELAXED_GP) | |
500 | ;; ...] | |
501 | ;; UNSPEC_COND_<MNEMONIC>) | |
502 | ;; ...]) | |
503 | ;; | |
504 | ;; where operand N is allowed to be anything. These instructions then | |
505 | ;; have rewrite rules to replace operand N with operand 1, which gives the | |
506 | ;; instructions a canonical form and means that the original operand N is | |
507 | ;; not kept live unnecessarily. | |
508 | ;; | |
509 | ;; In contrast, SVE_STRICT_GP only allows the UNSPEC_SEL predicate to be | |
510 | ;; a subset of the FP operation predicate. This case isn't interesting | |
511 | ;; for FP operations that have an all-true predicate, since such operations | |
512 | ;; use SVE_RELAXED_GP instead. And it is not possible for instruction | |
513 | ;; conditions to track the subset relationship for arbitrary registers. | |
514 | ;; So in practice, the only useful case for SVE_STRICT_GP is the one | |
515 | ;; in which the predicates match: | |
516 | ;; | |
517 | ;; (unspec [(match_operand 1 "register_operand" "Upl") | |
518 | ;; (unspec [(match_dup 1) | |
519 | ;; (const_int SVE_STRICT_GP) | |
520 | ;; ...] | |
521 | ;; UNSPEC_COND_<MNEMONIC>) | |
522 | ;; ...]) | |
523 | ;; | |
524 | ;; This pattern would also be correct for SVE_RELAXED_GP, but it would | |
525 | ;; be redundant with the one above. However, if the combine pattern | |
526 | ;; has multiple FP operations, using a match_operand allows combinations | |
527 | ;; of SVE_STRICT_GP and SVE_RELAXED_GP in the same operation, provided | |
528 | ;; that the predicates are the same: | |
529 | ;; | |
530 | ;; (unspec [(match_operand 1 "register_operand" "Upl") | |
531 | ;; (... | |
532 | ;; (unspec [(match_dup 1) | |
533 | ;; (match_operand:SI N "aarch64_sve_gp_strictness") | |
534 | ;; ...] | |
535 | ;; UNSPEC_COND_<MNEMONIC1>) | |
536 | ;; (unspec [(match_dup 1) | |
537 | ;; (match_operand:SI M "aarch64_sve_gp_strictness") | |
538 | ;; ...] | |
539 | ;; UNSPEC_COND_<MNEMONIC2>) ...) | |
540 | ;; ...]) | |
541 | ;; | |
542 | ;; The fully-relaxed version of this pattern is: | |
543 | ;; | |
544 | ;; (unspec [(match_operand 1 "register_operand" "Upl") | |
545 | ;; (... | |
546 | ;; (unspec [(match_operand:SI N) | |
547 | ;; (const_int SVE_RELAXED_GP) | |
548 | ;; ...] | |
549 | ;; UNSPEC_COND_<MNEMONIC1>) | |
550 | ;; (unspec [(match_operand:SI M) | |
551 | ;; (const_int SVE_RELAXED_GP) | |
552 | ;; ...] | |
553 | ;; UNSPEC_COND_<MNEMONIC2>) ...) | |
554 | ;; ...]) | |
555 | ;; | |
624d0f07 RS |
556 | ;; ------------------------------------------------------------------------- |
557 | ;; ---- Note on FFR handling | |
558 | ;; ------------------------------------------------------------------------- | |
559 | ;; | |
560 | ;; Logically we want to divide FFR-related instructions into regions | |
561 | ;; that contain exactly one of: | |
562 | ;; | |
563 | ;; - a single write to the FFR | |
564 | ;; - any number of reads from the FFR (but only one read is likely) | |
565 | ;; - any number of LDFF1 and LDNF1 instructions | |
566 | ;; | |
567 | ;; However, LDFF1 and LDNF1 instructions should otherwise behave like | |
568 | ;; normal loads as far as possible. This means that they should be | |
569 | ;; schedulable within a region in the same way that LD1 would be, | |
570 | ;; and they should be deleted as dead if the result is unused. The loads | |
571 | ;; should therefore not write to the FFR, since that would both serialize | |
572 | ;; the loads with respect to each other and keep the loads live for any | |
573 | ;; later RDFFR. | |
574 | ;; | |
575 | ;; We get around this by using a fake "FFR token" (FFRT) to help describe | |
576 | ;; the dependencies. Writing to the FFRT starts a new "FFRT region", | |
577 | ;; while using the FFRT keeps the instruction within its region. | |
578 | ;; Specifically: | |
579 | ;; | |
580 | ;; - Writes start a new FFRT region as well as setting the FFR: | |
581 | ;; | |
582 | ;; W1: parallel (FFRT = <new value>, FFR = <actual FFR value>) | |
583 | ;; | |
584 | ;; - Loads use an LD1-like instruction that also uses the FFRT, so that the | |
585 | ;; loads stay within the same FFRT region: | |
586 | ;; | |
587 | ;; L1: load data while using the FFRT | |
588 | ;; | |
589 | ;; In addition, any FFRT region that includes a load also has at least one | |
590 | ;; instance of: | |
591 | ;; | |
592 | ;; L2: FFR = update(FFR, FFRT) [type == no_insn] | |
593 | ;; | |
594 | ;; to make it clear that the region both reads from and writes to the FFR. | |
595 | ;; | |
596 | ;; - Reads do the following: | |
597 | ;; | |
598 | ;; R1: FFRT = FFR [type == no_insn] | |
599 | ;; R2: read from the FFRT | |
600 | ;; R3: FFRT = update(FFRT) [type == no_insn] | |
601 | ;; | |
602 | ;; R1 and R3 both create new FFRT regions, so that previous LDFF1s and | |
603 | ;; LDNF1s cannot move forwards across R1 and later LDFF1s and LDNF1s | |
604 | ;; cannot move backwards across R3. | |
605 | ;; | |
606 | ;; This way, writes are only kept alive by later loads or reads, | |
607 | ;; and write/read pairs fold normally. For two consecutive reads, | |
608 | ;; the first R3 is made dead by the second R1, which in turn becomes | |
609 | ;; redundant with the first R1. We then have: | |
610 | ;; | |
611 | ;; first R1: FFRT = FFR | |
612 | ;; first read from the FFRT | |
613 | ;; second read from the FFRT | |
614 | ;; second R3: FFRT = update(FFRT) | |
615 | ;; | |
616 | ;; i.e. the two FFRT regions collapse into a single one with two | |
617 | ;; independent reads. | |
618 | ;; | |
619 | ;; The model still prevents some valid optimizations though. For example, | |
620 | ;; if all loads in an FFRT region are deleted as dead, nothing would remove | |
621 | ;; the L2 instructions. | |
43cacb12 | 622 | |
915d28fe RS |
623 | ;; ========================================================================= |
624 | ;; == Moves | |
625 | ;; ========================================================================= | |
626 | ||
627 | ;; ------------------------------------------------------------------------- | |
628 | ;; ---- Moves of single vectors | |
629 | ;; ------------------------------------------------------------------------- | |
630 | ;; Includes: | |
631 | ;; - MOV (including aliases) | |
632 | ;; - LD1B (contiguous form) | |
633 | ;; - LD1D ( " " ) | |
634 | ;; - LD1H ( " " ) | |
635 | ;; - LD1W ( " " ) | |
636 | ;; - LDR | |
637 | ;; - ST1B (contiguous form) | |
638 | ;; - ST1D ( " " ) | |
639 | ;; - ST1H ( " " ) | |
640 | ;; - ST1W ( " " ) | |
641 | ;; - STR | |
642 | ;; ------------------------------------------------------------------------- | |
643 | ||
43cacb12 | 644 | (define_expand "mov<mode>" |
cc68f7c2 RS |
645 | [(set (match_operand:SVE_ALL 0 "nonimmediate_operand") |
646 | (match_operand:SVE_ALL 1 "general_operand"))] | |
43cacb12 RS |
647 | "TARGET_SVE" |
648 | { | |
649 | /* Use the predicated load and store patterns where possible. | |
650 | This is required for big-endian targets (see the comment at the | |
651 | head of the file) and increases the addressing choices for | |
652 | little-endian. */ | |
653 | if ((MEM_P (operands[0]) || MEM_P (operands[1])) | |
ea403d8b | 654 | && can_create_pseudo_p ()) |
43cacb12 RS |
655 | { |
656 | aarch64_expand_sve_mem_move (operands[0], operands[1], <VPRED>mode); | |
657 | DONE; | |
658 | } | |
659 | ||
660 | if (CONSTANT_P (operands[1])) | |
661 | { | |
4aeb1ba7 | 662 | aarch64_expand_mov_immediate (operands[0], operands[1]); |
43cacb12 RS |
663 | DONE; |
664 | } | |
002092be RS |
665 | |
666 | /* Optimize subregs on big-endian targets: we can use REV[BHW] | |
667 | instead of going through memory. */ | |
668 | if (BYTES_BIG_ENDIAN | |
ea403d8b | 669 | && aarch64_maybe_expand_sve_subreg_move (operands[0], operands[1])) |
002092be RS |
670 | DONE; |
671 | } | |
672 | ) | |
673 | ||
915d28fe | 674 | (define_expand "movmisalign<mode>" |
cc68f7c2 RS |
675 | [(set (match_operand:SVE_ALL 0 "nonimmediate_operand") |
676 | (match_operand:SVE_ALL 1 "general_operand"))] | |
915d28fe | 677 | "TARGET_SVE" |
002092be | 678 | { |
915d28fe RS |
679 | /* Equivalent to a normal move for our purpooses. */ |
680 | emit_move_insn (operands[0], operands[1]); | |
002092be | 681 | DONE; |
43cacb12 RS |
682 | } |
683 | ) | |
684 | ||
cc68f7c2 RS |
685 | ;; Unpredicated moves that can use LDR and STR, i.e. full vectors for which |
686 | ;; little-endian ordering is acceptable. Only allow memory operations during | |
687 | ;; and after RA; before RA we want the predicated load and store patterns to | |
688 | ;; be used instead. | |
689 | (define_insn "*aarch64_sve_mov<mode>_ldr_str" | |
f75cdd2c RS |
690 | [(set (match_operand:SVE_FULL 0 "aarch64_sve_nonimmediate_operand" "=w, Utr, w, w") |
691 | (match_operand:SVE_FULL 1 "aarch64_sve_general_operand" "Utr, w, w, Dn"))] | |
43cacb12 | 692 | "TARGET_SVE |
c600df9a | 693 | && (<MODE>mode == VNx16QImode || !BYTES_BIG_ENDIAN) |
43cacb12 RS |
694 | && ((lra_in_progress || reload_completed) |
695 | || (register_operand (operands[0], <MODE>mode) | |
696 | && nonmemory_operand (operands[1], <MODE>mode)))" | |
697 | "@ | |
698 | ldr\t%0, %1 | |
699 | str\t%1, %0 | |
700 | mov\t%0.d, %1.d | |
701 | * return aarch64_output_sve_mov_immediate (operands[1]);" | |
702 | ) | |
703 | ||
cc68f7c2 RS |
704 | ;; Unpredicated moves that cannot use LDR and STR, i.e. partial vectors |
705 | ;; or vectors for which little-endian ordering isn't acceptable. Memory | |
706 | ;; accesses require secondary reloads. | |
707 | (define_insn "*aarch64_sve_mov<mode>_no_ldr_str" | |
708 | [(set (match_operand:SVE_ALL 0 "register_operand" "=w, w") | |
709 | (match_operand:SVE_ALL 1 "aarch64_nonmemory_operand" "w, Dn"))] | |
710 | "TARGET_SVE | |
711 | && <MODE>mode != VNx16QImode | |
712 | && (BYTES_BIG_ENDIAN | |
713 | || maybe_ne (BYTES_PER_SVE_VECTOR, GET_MODE_SIZE (<MODE>mode)))" | |
43cacb12 RS |
714 | "@ |
715 | mov\t%0.d, %1.d | |
716 | * return aarch64_output_sve_mov_immediate (operands[1]);" | |
717 | ) | |
718 | ||
cc68f7c2 RS |
719 | ;; Handle memory reloads for modes that can't use LDR and STR. We use |
720 | ;; byte PTRUE for all modes to try to encourage reuse. This pattern | |
721 | ;; needs constraints because it is returned by TARGET_SECONDARY_RELOAD. | |
722 | (define_expand "aarch64_sve_reload_mem" | |
43cacb12 RS |
723 | [(parallel |
724 | [(set (match_operand 0) | |
ea403d8b | 725 | (match_operand 1)) |
43cacb12 | 726 | (clobber (match_operand:VNx16BI 2 "register_operand" "=Upl"))])] |
cc68f7c2 | 727 | "TARGET_SVE" |
43cacb12 RS |
728 | { |
729 | /* Create a PTRUE. */ | |
730 | emit_move_insn (operands[2], CONSTM1_RTX (VNx16BImode)); | |
731 | ||
732 | /* Refer to the PTRUE in the appropriate mode for this move. */ | |
733 | machine_mode mode = GET_MODE (operands[0]); | |
cc68f7c2 | 734 | rtx pred = gen_lowpart (aarch64_sve_pred_mode (mode), operands[2]); |
43cacb12 RS |
735 | |
736 | /* Emit a predicated load or store. */ | |
737 | aarch64_emit_sve_pred_move (operands[0], pred, operands[1]); | |
738 | DONE; | |
739 | } | |
740 | ) | |
741 | ||
915d28fe RS |
742 | ;; A predicated move in which the predicate is known to be all-true. |
743 | ;; Note that this pattern is generated directly by aarch64_emit_sve_pred_move, | |
744 | ;; so changes to this pattern will need changes there as well. | |
0c63a8ee | 745 | (define_insn_and_split "@aarch64_pred_mov<mode>" |
cc68f7c2 RS |
746 | [(set (match_operand:SVE_ALL 0 "nonimmediate_operand" "=w, w, m") |
747 | (unspec:SVE_ALL | |
9c6b4601 | 748 | [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl") |
cc68f7c2 | 749 | (match_operand:SVE_ALL 2 "nonimmediate_operand" "w, m, w")] |
06308276 | 750 | UNSPEC_PRED_X))] |
43cacb12 RS |
751 | "TARGET_SVE |
752 | && (register_operand (operands[0], <MODE>mode) | |
753 | || register_operand (operands[2], <MODE>mode))" | |
754 | "@ | |
9c6b4601 | 755 | # |
cc68f7c2 RS |
756 | ld1<Vesize>\t%0.<Vctype>, %1/z, %2 |
757 | st1<Vesize>\t%2.<Vctype>, %1, %0" | |
9c6b4601 RS |
758 | "&& register_operand (operands[0], <MODE>mode) |
759 | && register_operand (operands[2], <MODE>mode)" | |
760 | [(set (match_dup 0) (match_dup 2))] | |
43cacb12 RS |
761 | ) |
762 | ||
915d28fe RS |
763 | ;; A pattern for optimizing SUBREGs that have a reinterpreting effect |
764 | ;; on big-endian targets; see aarch64_maybe_expand_sve_subreg_move | |
765 | ;; for details. We use a special predicate for operand 2 to reduce | |
766 | ;; the number of patterns. | |
767 | (define_insn_and_split "*aarch64_sve_mov<mode>_subreg_be" | |
cc68f7c2 RS |
768 | [(set (match_operand:SVE_ALL 0 "aarch64_sve_nonimmediate_operand" "=w") |
769 | (unspec:SVE_ALL | |
915d28fe RS |
770 | [(match_operand:VNx16BI 1 "register_operand" "Upl") |
771 | (match_operand 2 "aarch64_any_register_operand" "w")] | |
772 | UNSPEC_REV_SUBREG))] | |
773 | "TARGET_SVE && BYTES_BIG_ENDIAN" | |
774 | "#" | |
775 | "&& reload_completed" | |
776 | [(const_int 0)] | |
f307441a | 777 | { |
915d28fe RS |
778 | aarch64_split_sve_subreg_move (operands[0], operands[1], operands[2]); |
779 | DONE; | |
f307441a RS |
780 | } |
781 | ) | |
782 | ||
4aeb1ba7 RS |
783 | ;; Reinterpret operand 1 in operand 0's mode, without changing its contents. |
784 | ;; This is equivalent to a subreg on little-endian targets but not for | |
785 | ;; big-endian; see the comment at the head of the file for details. | |
786 | (define_expand "@aarch64_sve_reinterpret<mode>" | |
cc68f7c2 RS |
787 | [(set (match_operand:SVE_ALL 0 "register_operand") |
788 | (unspec:SVE_ALL | |
f75cdd2c RS |
789 | [(match_operand 1 "aarch64_any_register_operand")] |
790 | UNSPEC_REINTERPRET))] | |
4aeb1ba7 RS |
791 | "TARGET_SVE" |
792 | { | |
b23c6a2c RS |
793 | machine_mode src_mode = GET_MODE (operands[1]); |
794 | if (targetm.can_change_mode_class (<MODE>mode, src_mode, FP_REGS)) | |
4aeb1ba7 RS |
795 | { |
796 | emit_move_insn (operands[0], gen_lowpart (<MODE>mode, operands[1])); | |
797 | DONE; | |
798 | } | |
799 | } | |
800 | ) | |
801 | ||
802 | ;; A pattern for handling type punning on big-endian targets. We use a | |
803 | ;; special predicate for operand 1 to reduce the number of patterns. | |
804 | (define_insn_and_split "*aarch64_sve_reinterpret<mode>" | |
cc68f7c2 RS |
805 | [(set (match_operand:SVE_ALL 0 "register_operand" "=w") |
806 | (unspec:SVE_ALL | |
f75cdd2c RS |
807 | [(match_operand 1 "aarch64_any_register_operand" "w")] |
808 | UNSPEC_REINTERPRET))] | |
4aeb1ba7 RS |
809 | "TARGET_SVE" |
810 | "#" | |
811 | "&& reload_completed" | |
812 | [(set (match_dup 0) (match_dup 1))] | |
813 | { | |
624d0f07 | 814 | operands[1] = aarch64_replace_reg_mode (operands[1], <MODE>mode); |
4aeb1ba7 RS |
815 | } |
816 | ) | |
817 | ||
915d28fe RS |
818 | ;; ------------------------------------------------------------------------- |
819 | ;; ---- Moves of multiple vectors | |
820 | ;; ------------------------------------------------------------------------- | |
821 | ;; All patterns in this section are synthetic and split to real | |
822 | ;; instructions after reload. | |
823 | ;; ------------------------------------------------------------------------- | |
f307441a | 824 | |
9f4cbab8 RS |
825 | (define_expand "mov<mode>" |
826 | [(set (match_operand:SVE_STRUCT 0 "nonimmediate_operand") | |
827 | (match_operand:SVE_STRUCT 1 "general_operand"))] | |
828 | "TARGET_SVE" | |
829 | { | |
830 | /* Big-endian loads and stores need to be done via LD1 and ST1; | |
831 | see the comment at the head of the file for details. */ | |
832 | if ((MEM_P (operands[0]) || MEM_P (operands[1])) | |
833 | && BYTES_BIG_ENDIAN) | |
834 | { | |
835 | gcc_assert (can_create_pseudo_p ()); | |
836 | aarch64_expand_sve_mem_move (operands[0], operands[1], <VPRED>mode); | |
837 | DONE; | |
838 | } | |
839 | ||
840 | if (CONSTANT_P (operands[1])) | |
841 | { | |
842 | aarch64_expand_mov_immediate (operands[0], operands[1]); | |
843 | DONE; | |
844 | } | |
845 | } | |
846 | ) | |
847 | ||
848 | ;; Unpredicated structure moves (little-endian). | |
849 | (define_insn "*aarch64_sve_mov<mode>_le" | |
850 | [(set (match_operand:SVE_STRUCT 0 "aarch64_sve_nonimmediate_operand" "=w, Utr, w, w") | |
851 | (match_operand:SVE_STRUCT 1 "aarch64_sve_general_operand" "Utr, w, w, Dn"))] | |
852 | "TARGET_SVE && !BYTES_BIG_ENDIAN" | |
853 | "#" | |
854 | [(set_attr "length" "<insn_length>")] | |
855 | ) | |
856 | ||
857 | ;; Unpredicated structure moves (big-endian). Memory accesses require | |
858 | ;; secondary reloads. | |
915d28fe | 859 | (define_insn "*aarch64_sve_mov<mode>_be" |
9f4cbab8 RS |
860 | [(set (match_operand:SVE_STRUCT 0 "register_operand" "=w, w") |
861 | (match_operand:SVE_STRUCT 1 "aarch64_nonmemory_operand" "w, Dn"))] | |
862 | "TARGET_SVE && BYTES_BIG_ENDIAN" | |
863 | "#" | |
864 | [(set_attr "length" "<insn_length>")] | |
865 | ) | |
866 | ||
867 | ;; Split unpredicated structure moves into pieces. This is the same | |
868 | ;; for both big-endian and little-endian code, although it only needs | |
869 | ;; to handle memory operands for little-endian code. | |
870 | (define_split | |
871 | [(set (match_operand:SVE_STRUCT 0 "aarch64_sve_nonimmediate_operand") | |
872 | (match_operand:SVE_STRUCT 1 "aarch64_sve_general_operand"))] | |
873 | "TARGET_SVE && reload_completed" | |
874 | [(const_int 0)] | |
875 | { | |
876 | rtx dest = operands[0]; | |
877 | rtx src = operands[1]; | |
878 | if (REG_P (dest) && REG_P (src)) | |
879 | aarch64_simd_emit_reg_reg_move (operands, <VSINGLE>mode, <vector_count>); | |
880 | else | |
881 | for (unsigned int i = 0; i < <vector_count>; ++i) | |
882 | { | |
883 | rtx subdest = simplify_gen_subreg (<VSINGLE>mode, dest, <MODE>mode, | |
884 | i * BYTES_PER_SVE_VECTOR); | |
885 | rtx subsrc = simplify_gen_subreg (<VSINGLE>mode, src, <MODE>mode, | |
886 | i * BYTES_PER_SVE_VECTOR); | |
887 | emit_insn (gen_rtx_SET (subdest, subsrc)); | |
888 | } | |
889 | DONE; | |
890 | } | |
891 | ) | |
892 | ||
893 | ;; Predicated structure moves. This works for both endiannesses but in | |
894 | ;; practice is only useful for big-endian. | |
0c63a8ee | 895 | (define_insn_and_split "@aarch64_pred_mov<mode>" |
9c6b4601 | 896 | [(set (match_operand:SVE_STRUCT 0 "aarch64_sve_struct_nonimmediate_operand" "=w, w, Utx") |
9f4cbab8 | 897 | (unspec:SVE_STRUCT |
9c6b4601 RS |
898 | [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl") |
899 | (match_operand:SVE_STRUCT 2 "aarch64_sve_struct_nonimmediate_operand" "w, Utx, w")] | |
06308276 | 900 | UNSPEC_PRED_X))] |
9f4cbab8 RS |
901 | "TARGET_SVE |
902 | && (register_operand (operands[0], <MODE>mode) | |
903 | || register_operand (operands[2], <MODE>mode))" | |
904 | "#" | |
905 | "&& reload_completed" | |
906 | [(const_int 0)] | |
907 | { | |
908 | for (unsigned int i = 0; i < <vector_count>; ++i) | |
909 | { | |
910 | rtx subdest = simplify_gen_subreg (<VSINGLE>mode, operands[0], | |
911 | <MODE>mode, | |
912 | i * BYTES_PER_SVE_VECTOR); | |
913 | rtx subsrc = simplify_gen_subreg (<VSINGLE>mode, operands[2], | |
914 | <MODE>mode, | |
915 | i * BYTES_PER_SVE_VECTOR); | |
916 | aarch64_emit_sve_pred_move (subdest, operands[1], subsrc); | |
917 | } | |
918 | DONE; | |
919 | } | |
920 | [(set_attr "length" "<insn_length>")] | |
921 | ) | |
922 | ||
915d28fe RS |
923 | ;; ------------------------------------------------------------------------- |
924 | ;; ---- Moves of predicates | |
925 | ;; ------------------------------------------------------------------------- | |
926 | ;; Includes: | |
927 | ;; - MOV | |
928 | ;; - LDR | |
929 | ;; - PFALSE | |
930 | ;; - PTRUE | |
624d0f07 | 931 | ;; - PTRUES |
915d28fe RS |
932 | ;; - STR |
933 | ;; ------------------------------------------------------------------------- | |
934 | ||
43cacb12 RS |
935 | (define_expand "mov<mode>" |
936 | [(set (match_operand:PRED_ALL 0 "nonimmediate_operand") | |
937 | (match_operand:PRED_ALL 1 "general_operand"))] | |
938 | "TARGET_SVE" | |
939 | { | |
940 | if (GET_CODE (operands[0]) == MEM) | |
941 | operands[1] = force_reg (<MODE>mode, operands[1]); | |
0b1fe8cf RS |
942 | |
943 | if (CONSTANT_P (operands[1])) | |
944 | { | |
945 | aarch64_expand_mov_immediate (operands[0], operands[1]); | |
946 | DONE; | |
947 | } | |
43cacb12 RS |
948 | } |
949 | ) | |
950 | ||
951 | (define_insn "*aarch64_sve_mov<mode>" | |
1044fa32 | 952 | [(set (match_operand:PRED_ALL 0 "nonimmediate_operand" "=Upa, m, Upa, Upa") |
0b1fe8cf | 953 | (match_operand:PRED_ALL 1 "aarch64_mov_operand" "Upa, Upa, m, Dn"))] |
43cacb12 RS |
954 | "TARGET_SVE |
955 | && (register_operand (operands[0], <MODE>mode) | |
956 | || register_operand (operands[1], <MODE>mode))" | |
957 | "@ | |
958 | mov\t%0.b, %1.b | |
959 | str\t%1, %0 | |
960 | ldr\t%0, %1 | |
1044fa32 | 961 | * return aarch64_output_sve_mov_immediate (operands[1]);" |
43cacb12 RS |
962 | ) |
963 | ||
624d0f07 RS |
964 | ;; Match PTRUES Pn.B when both the predicate and flags are useful. |
965 | (define_insn_and_rewrite "*aarch64_sve_ptruevnx16bi_cc" | |
966 | [(set (reg:CC_NZC CC_REGNUM) | |
967 | (unspec:CC_NZC | |
968 | [(match_operand 2) | |
969 | (match_operand 3) | |
970 | (const_int SVE_KNOWN_PTRUE) | |
971 | (match_operator:VNx16BI 1 "aarch64_sve_ptrue_svpattern_immediate" | |
972 | [(unspec:VNx16BI | |
973 | [(match_operand:SI 4 "const_int_operand") | |
974 | (match_operand:VNx16BI 5 "aarch64_simd_imm_zero")] | |
975 | UNSPEC_PTRUE)])] | |
976 | UNSPEC_PTEST)) | |
977 | (set (match_operand:VNx16BI 0 "register_operand" "=Upa") | |
978 | (match_dup 1))] | |
979 | "TARGET_SVE" | |
980 | { | |
981 | return aarch64_output_sve_ptrues (operands[1]); | |
982 | } | |
983 | "&& (!CONSTANT_P (operands[2]) || !CONSTANT_P (operands[3]))" | |
984 | { | |
985 | operands[2] = operands[3] = CONSTM1_RTX (VNx16BImode); | |
986 | } | |
987 | ) | |
988 | ||
989 | ;; Match PTRUES Pn.[HSD] when both the predicate and flags are useful. | |
990 | (define_insn_and_rewrite "*aarch64_sve_ptrue<mode>_cc" | |
991 | [(set (reg:CC_NZC CC_REGNUM) | |
992 | (unspec:CC_NZC | |
993 | [(match_operand 2) | |
994 | (match_operand 3) | |
995 | (const_int SVE_KNOWN_PTRUE) | |
996 | (subreg:PRED_HSD | |
997 | (match_operator:VNx16BI 1 "aarch64_sve_ptrue_svpattern_immediate" | |
998 | [(unspec:VNx16BI | |
999 | [(match_operand:SI 4 "const_int_operand") | |
1000 | (match_operand:PRED_HSD 5 "aarch64_simd_imm_zero")] | |
1001 | UNSPEC_PTRUE)]) 0)] | |
1002 | UNSPEC_PTEST)) | |
1003 | (set (match_operand:VNx16BI 0 "register_operand" "=Upa") | |
1004 | (match_dup 1))] | |
1005 | "TARGET_SVE" | |
1006 | { | |
1007 | return aarch64_output_sve_ptrues (operands[1]); | |
1008 | } | |
1009 | "&& (!CONSTANT_P (operands[2]) || !CONSTANT_P (operands[3]))" | |
1010 | { | |
1011 | operands[2] = CONSTM1_RTX (VNx16BImode); | |
1012 | operands[3] = CONSTM1_RTX (<MODE>mode); | |
1013 | } | |
1014 | ) | |
1015 | ||
1016 | ;; Match PTRUES Pn.B when only the flags result is useful (which is | |
1017 | ;; a way of testing VL). | |
1018 | (define_insn_and_rewrite "*aarch64_sve_ptruevnx16bi_ptest" | |
1019 | [(set (reg:CC_NZC CC_REGNUM) | |
1020 | (unspec:CC_NZC | |
1021 | [(match_operand 2) | |
1022 | (match_operand 3) | |
1023 | (const_int SVE_KNOWN_PTRUE) | |
1024 | (match_operator:VNx16BI 1 "aarch64_sve_ptrue_svpattern_immediate" | |
1025 | [(unspec:VNx16BI | |
1026 | [(match_operand:SI 4 "const_int_operand") | |
1027 | (match_operand:VNx16BI 5 "aarch64_simd_imm_zero")] | |
1028 | UNSPEC_PTRUE)])] | |
1029 | UNSPEC_PTEST)) | |
1030 | (clobber (match_scratch:VNx16BI 0 "=Upa"))] | |
1031 | "TARGET_SVE" | |
1032 | { | |
1033 | return aarch64_output_sve_ptrues (operands[1]); | |
1034 | } | |
1035 | "&& (!CONSTANT_P (operands[2]) || !CONSTANT_P (operands[3]))" | |
1036 | { | |
1037 | operands[2] = operands[3] = CONSTM1_RTX (VNx16BImode); | |
1038 | } | |
1039 | ) | |
1040 | ||
1041 | ;; Match PTRUES Pn.[HWD] when only the flags result is useful (which is | |
1042 | ;; a way of testing VL). | |
1043 | (define_insn_and_rewrite "*aarch64_sve_ptrue<mode>_ptest" | |
1044 | [(set (reg:CC_NZC CC_REGNUM) | |
1045 | (unspec:CC_NZC | |
1046 | [(match_operand 2) | |
1047 | (match_operand 3) | |
1048 | (const_int SVE_KNOWN_PTRUE) | |
1049 | (subreg:PRED_HSD | |
1050 | (match_operator:VNx16BI 1 "aarch64_sve_ptrue_svpattern_immediate" | |
1051 | [(unspec:VNx16BI | |
1052 | [(match_operand:SI 4 "const_int_operand") | |
1053 | (match_operand:PRED_HSD 5 "aarch64_simd_imm_zero")] | |
1054 | UNSPEC_PTRUE)]) 0)] | |
1055 | UNSPEC_PTEST)) | |
1056 | (clobber (match_scratch:VNx16BI 0 "=Upa"))] | |
1057 | "TARGET_SVE" | |
1058 | { | |
1059 | return aarch64_output_sve_ptrues (operands[1]); | |
1060 | } | |
1061 | "&& (!CONSTANT_P (operands[2]) || !CONSTANT_P (operands[3]))" | |
1062 | { | |
1063 | operands[2] = CONSTM1_RTX (VNx16BImode); | |
1064 | operands[3] = CONSTM1_RTX (<MODE>mode); | |
1065 | } | |
1066 | ) | |
1067 | ||
1068 | ;; ------------------------------------------------------------------------- | |
1069 | ;; ---- Moves relating to the FFR | |
1070 | ;; ------------------------------------------------------------------------- | |
1071 | ;; RDFFR | |
1072 | ;; RDFFRS | |
1073 | ;; SETFFR | |
1074 | ;; WRFFR | |
1075 | ;; ------------------------------------------------------------------------- | |
1076 | ||
1077 | ;; [W1 in the block comment above about FFR handling] | |
1078 | ;; | |
1079 | ;; Write to the FFR and start a new FFRT scheduling region. | |
1080 | (define_insn "aarch64_wrffr" | |
1081 | [(set (reg:VNx16BI FFR_REGNUM) | |
1082 | (match_operand:VNx16BI 0 "aarch64_simd_reg_or_minus_one" "Dm, Upa")) | |
1083 | (set (reg:VNx16BI FFRT_REGNUM) | |
4ec943d6 | 1084 | (unspec:VNx16BI [(match_dup 0)] UNSPEC_WRFFR))] |
624d0f07 RS |
1085 | "TARGET_SVE" |
1086 | "@ | |
1087 | setffr | |
1088 | wrffr\t%0.b" | |
1089 | ) | |
1090 | ||
1091 | ;; [L2 in the block comment above about FFR handling] | |
1092 | ;; | |
1093 | ;; Introduce a read from and write to the FFR in the current FFRT region, | |
1094 | ;; so that the FFR value is live on entry to the region and so that the FFR | |
1095 | ;; value visibly changes within the region. This is used (possibly multiple | |
1096 | ;; times) in an FFRT region that includes LDFF1 or LDNF1 instructions. | |
1097 | (define_insn "aarch64_update_ffr_for_load" | |
1098 | [(set (reg:VNx16BI FFR_REGNUM) | |
1099 | (unspec:VNx16BI [(reg:VNx16BI FFRT_REGNUM) | |
1100 | (reg:VNx16BI FFR_REGNUM)] UNSPEC_UPDATE_FFR))] | |
1101 | "TARGET_SVE" | |
1102 | "" | |
1103 | [(set_attr "type" "no_insn")] | |
1104 | ) | |
1105 | ||
1106 | ;; [R1 in the block comment above about FFR handling] | |
1107 | ;; | |
1108 | ;; Notionally copy the FFR to the FFRT, so that the current FFR value | |
1109 | ;; can be read from there by the RDFFR instructions below. This acts | |
1110 | ;; as a scheduling barrier for earlier LDFF1 and LDNF1 instructions and | |
1111 | ;; creates a natural dependency with earlier writes. | |
1112 | (define_insn "aarch64_copy_ffr_to_ffrt" | |
1113 | [(set (reg:VNx16BI FFRT_REGNUM) | |
1114 | (reg:VNx16BI FFR_REGNUM))] | |
1115 | "TARGET_SVE" | |
1116 | "" | |
1117 | [(set_attr "type" "no_insn")] | |
1118 | ) | |
1119 | ||
1120 | ;; [R2 in the block comment above about FFR handling] | |
1121 | ;; | |
1122 | ;; Read the FFR via the FFRT. | |
1123 | (define_insn "aarch64_rdffr" | |
1124 | [(set (match_operand:VNx16BI 0 "register_operand" "=Upa") | |
1125 | (reg:VNx16BI FFRT_REGNUM))] | |
1126 | "TARGET_SVE" | |
1127 | "rdffr\t%0.b" | |
1128 | ) | |
1129 | ||
1130 | ;; Likewise with zero predication. | |
1131 | (define_insn "aarch64_rdffr_z" | |
1132 | [(set (match_operand:VNx16BI 0 "register_operand" "=Upa") | |
1133 | (and:VNx16BI | |
1134 | (reg:VNx16BI FFRT_REGNUM) | |
1135 | (match_operand:VNx16BI 1 "register_operand" "Upa")))] | |
1136 | "TARGET_SVE" | |
1137 | "rdffr\t%0.b, %1/z" | |
1138 | ) | |
1139 | ||
1140 | ;; Read the FFR to test for a fault, without using the predicate result. | |
1141 | (define_insn "*aarch64_rdffr_z_ptest" | |
1142 | [(set (reg:CC_NZC CC_REGNUM) | |
1143 | (unspec:CC_NZC | |
1144 | [(match_operand:VNx16BI 1 "register_operand" "Upa") | |
1145 | (match_dup 1) | |
1146 | (match_operand:SI 2 "aarch64_sve_ptrue_flag") | |
1147 | (and:VNx16BI | |
1148 | (reg:VNx16BI FFRT_REGNUM) | |
1149 | (match_dup 1))] | |
1150 | UNSPEC_PTEST)) | |
1151 | (clobber (match_scratch:VNx16BI 0 "=Upa"))] | |
1152 | "TARGET_SVE" | |
1153 | "rdffrs\t%0.b, %1/z" | |
1154 | ) | |
1155 | ||
1156 | ;; Same for unpredicated RDFFR when tested with a known PTRUE. | |
1157 | (define_insn "*aarch64_rdffr_ptest" | |
1158 | [(set (reg:CC_NZC CC_REGNUM) | |
1159 | (unspec:CC_NZC | |
1160 | [(match_operand:VNx16BI 1 "register_operand" "Upa") | |
1161 | (match_dup 1) | |
1162 | (const_int SVE_KNOWN_PTRUE) | |
1163 | (reg:VNx16BI FFRT_REGNUM)] | |
1164 | UNSPEC_PTEST)) | |
1165 | (clobber (match_scratch:VNx16BI 0 "=Upa"))] | |
1166 | "TARGET_SVE" | |
1167 | "rdffrs\t%0.b, %1/z" | |
1168 | ) | |
1169 | ||
1170 | ;; Read the FFR with zero predication and test the result. | |
1171 | (define_insn "*aarch64_rdffr_z_cc" | |
1172 | [(set (reg:CC_NZC CC_REGNUM) | |
1173 | (unspec:CC_NZC | |
1174 | [(match_operand:VNx16BI 1 "register_operand" "Upa") | |
1175 | (match_dup 1) | |
1176 | (match_operand:SI 2 "aarch64_sve_ptrue_flag") | |
1177 | (and:VNx16BI | |
1178 | (reg:VNx16BI FFRT_REGNUM) | |
1179 | (match_dup 1))] | |
1180 | UNSPEC_PTEST)) | |
1181 | (set (match_operand:VNx16BI 0 "register_operand" "=Upa") | |
1182 | (and:VNx16BI | |
1183 | (reg:VNx16BI FFRT_REGNUM) | |
1184 | (match_dup 1)))] | |
1185 | "TARGET_SVE" | |
1186 | "rdffrs\t%0.b, %1/z" | |
1187 | ) | |
1188 | ||
1189 | ;; Same for unpredicated RDFFR when tested with a known PTRUE. | |
1190 | (define_insn "*aarch64_rdffr_cc" | |
1191 | [(set (reg:CC_NZC CC_REGNUM) | |
1192 | (unspec:CC_NZC | |
1193 | [(match_operand:VNx16BI 1 "register_operand" "Upa") | |
1194 | (match_dup 1) | |
1195 | (const_int SVE_KNOWN_PTRUE) | |
1196 | (reg:VNx16BI FFRT_REGNUM)] | |
1197 | UNSPEC_PTEST)) | |
1198 | (set (match_operand:VNx16BI 0 "register_operand" "=Upa") | |
1199 | (reg:VNx16BI FFRT_REGNUM))] | |
1200 | "TARGET_SVE" | |
1201 | "rdffrs\t%0.b, %1/z" | |
1202 | ) | |
1203 | ||
1204 | ;; [R3 in the block comment above about FFR handling] | |
1205 | ;; | |
1206 | ;; Arbitrarily update the FFRT after a read from the FFR. This acts as | |
1207 | ;; a scheduling barrier for later LDFF1 and LDNF1 instructions. | |
1208 | (define_insn "aarch64_update_ffrt" | |
1209 | [(set (reg:VNx16BI FFRT_REGNUM) | |
1210 | (unspec:VNx16BI [(reg:VNx16BI FFRT_REGNUM)] UNSPEC_UPDATE_FFRT))] | |
1211 | "TARGET_SVE" | |
1212 | "" | |
1213 | [(set_attr "type" "no_insn")] | |
1214 | ) | |
1215 | ||
915d28fe RS |
1216 | ;; ========================================================================= |
1217 | ;; == Loads | |
1218 | ;; ========================================================================= | |
1219 | ||
1220 | ;; ------------------------------------------------------------------------- | |
1221 | ;; ---- Normal contiguous loads | |
1222 | ;; ------------------------------------------------------------------------- | |
1223 | ;; Includes contiguous forms of: | |
1224 | ;; - LD1B | |
1225 | ;; - LD1D | |
1226 | ;; - LD1H | |
1227 | ;; - LD1W | |
1228 | ;; - LD2B | |
1229 | ;; - LD2D | |
1230 | ;; - LD2H | |
1231 | ;; - LD2W | |
1232 | ;; - LD3B | |
1233 | ;; - LD3D | |
1234 | ;; - LD3H | |
1235 | ;; - LD3W | |
1236 | ;; - LD4B | |
1237 | ;; - LD4D | |
1238 | ;; - LD4H | |
1239 | ;; - LD4W | |
1240 | ;; ------------------------------------------------------------------------- | |
1241 | ||
1242 | ;; Predicated LD1. | |
1243 | (define_insn "maskload<mode><vpred>" | |
cc68f7c2 RS |
1244 | [(set (match_operand:SVE_ALL 0 "register_operand" "=w") |
1245 | (unspec:SVE_ALL | |
915d28fe | 1246 | [(match_operand:<VPRED> 2 "register_operand" "Upl") |
cc68f7c2 | 1247 | (match_operand:SVE_ALL 1 "memory_operand" "m")] |
915d28fe | 1248 | UNSPEC_LD1_SVE))] |
43cacb12 | 1249 | "TARGET_SVE" |
cc68f7c2 | 1250 | "ld1<Vesize>\t%0.<Vctype>, %2/z, %1" |
43cacb12 RS |
1251 | ) |
1252 | ||
915d28fe RS |
1253 | ;; Unpredicated LD[234]. |
1254 | (define_expand "vec_load_lanes<mode><vsingle>" | |
1255 | [(set (match_operand:SVE_STRUCT 0 "register_operand") | |
1256 | (unspec:SVE_STRUCT | |
1257 | [(match_dup 2) | |
1258 | (match_operand:SVE_STRUCT 1 "memory_operand")] | |
1259 | UNSPEC_LDN))] | |
43cacb12 RS |
1260 | "TARGET_SVE" |
1261 | { | |
915d28fe | 1262 | operands[2] = aarch64_ptrue_reg (<VPRED>mode); |
43cacb12 RS |
1263 | } |
1264 | ) | |
1265 | ||
915d28fe RS |
1266 | ;; Predicated LD[234]. |
1267 | (define_insn "vec_mask_load_lanes<mode><vsingle>" | |
1268 | [(set (match_operand:SVE_STRUCT 0 "register_operand" "=w") | |
1269 | (unspec:SVE_STRUCT | |
1270 | [(match_operand:<VPRED> 2 "register_operand" "Upl") | |
1271 | (match_operand:SVE_STRUCT 1 "memory_operand" "m")] | |
1272 | UNSPEC_LDN))] | |
8711e791 | 1273 | "TARGET_SVE" |
915d28fe | 1274 | "ld<vector_count><Vesize>\t%0, %2/z, %1" |
8711e791 RS |
1275 | ) |
1276 | ||
624d0f07 RS |
1277 | ;; ------------------------------------------------------------------------- |
1278 | ;; ---- Extending contiguous loads | |
1279 | ;; ------------------------------------------------------------------------- | |
1280 | ;; Includes contiguous forms of: | |
1281 | ;; LD1B | |
1282 | ;; LD1H | |
1283 | ;; LD1SB | |
1284 | ;; LD1SH | |
1285 | ;; LD1SW | |
1286 | ;; LD1W | |
1287 | ;; ------------------------------------------------------------------------- | |
1288 | ||
1289 | ;; Predicated load and extend, with 8 elements per 128-bit block. | |
7bb4b7a5 | 1290 | (define_insn_and_rewrite "@aarch64_load<SVE_PRED_LOAD:pred_load>_<ANY_EXTEND:optab><SVE_HSDI:mode><SVE_PARTIAL_I:mode>" |
217ccab8 RS |
1291 | [(set (match_operand:SVE_HSDI 0 "register_operand" "=w") |
1292 | (unspec:SVE_HSDI | |
1293 | [(match_operand:<SVE_HSDI:VPRED> 3 "general_operand" "UplDnm") | |
1294 | (ANY_EXTEND:SVE_HSDI | |
1295 | (unspec:SVE_PARTIAL_I | |
1296 | [(match_operand:<SVE_PARTIAL_I:VPRED> 2 "register_operand" "Upl") | |
1297 | (match_operand:SVE_PARTIAL_I 1 "memory_operand" "m")] | |
7bb4b7a5 | 1298 | SVE_PRED_LOAD))] |
217ccab8 RS |
1299 | UNSPEC_PRED_X))] |
1300 | "TARGET_SVE && (~<SVE_HSDI:narrower_mask> & <SVE_PARTIAL_I:self_mask>) == 0" | |
1301 | "ld1<ANY_EXTEND:s><SVE_PARTIAL_I:Vesize>\t%0.<SVE_HSDI:Vctype>, %2/z, %1" | |
1302 | "&& !CONSTANT_P (operands[3])" | |
1303 | { | |
1304 | operands[3] = CONSTM1_RTX (<SVE_HSDI:VPRED>mode); | |
1305 | } | |
624d0f07 RS |
1306 | ) |
1307 | ||
1308 | ;; ------------------------------------------------------------------------- | |
1309 | ;; ---- First-faulting contiguous loads | |
1310 | ;; ------------------------------------------------------------------------- | |
1311 | ;; Includes contiguous forms of: | |
1312 | ;; - LDFF1B | |
1313 | ;; - LDFF1D | |
1314 | ;; - LDFF1H | |
1315 | ;; - LDFF1W | |
1316 | ;; - LDNF1B | |
1317 | ;; - LDNF1D | |
1318 | ;; - LDNF1H | |
1319 | ;; - LDNF1W | |
1320 | ;; ------------------------------------------------------------------------- | |
1321 | ||
1322 | ;; Contiguous non-extending first-faulting or non-faulting loads. | |
1323 | (define_insn "@aarch64_ld<fn>f1<mode>" | |
f75cdd2c RS |
1324 | [(set (match_operand:SVE_FULL 0 "register_operand" "=w") |
1325 | (unspec:SVE_FULL | |
624d0f07 | 1326 | [(match_operand:<VPRED> 2 "register_operand" "Upl") |
f75cdd2c | 1327 | (match_operand:SVE_FULL 1 "aarch64_sve_ld<fn>f1_operand" "Ut<fn>") |
624d0f07 RS |
1328 | (reg:VNx16BI FFRT_REGNUM)] |
1329 | SVE_LDFF1_LDNF1))] | |
1330 | "TARGET_SVE" | |
1331 | "ld<fn>f1<Vesize>\t%0.<Vetype>, %2/z, %1" | |
1332 | ) | |
1333 | ||
1334 | ;; ------------------------------------------------------------------------- | |
1335 | ;; ---- First-faulting extending contiguous loads | |
1336 | ;; ------------------------------------------------------------------------- | |
1337 | ;; Includes contiguous forms of: | |
1338 | ;; - LDFF1B | |
1339 | ;; - LDFF1H | |
1340 | ;; - LDFF1SB | |
1341 | ;; - LDFF1SH | |
1342 | ;; - LDFF1SW | |
1343 | ;; - LDFF1W | |
1344 | ;; - LDNF1B | |
1345 | ;; - LDNF1H | |
1346 | ;; - LDNF1SB | |
1347 | ;; - LDNF1SH | |
1348 | ;; - LDNF1SW | |
1349 | ;; - LDNF1W | |
1350 | ;; ------------------------------------------------------------------------- | |
1351 | ||
217ccab8 RS |
1352 | ;; Predicated first-faulting or non-faulting load and extend. |
1353 | (define_insn_and_rewrite "@aarch64_ld<fn>f1_<ANY_EXTEND:optab><SVE_HSDI:mode><SVE_PARTIAL_I:mode>" | |
1354 | [(set (match_operand:SVE_HSDI 0 "register_operand" "=w") | |
1355 | (unspec:SVE_HSDI | |
1356 | [(match_operand:<SVE_HSDI:VPRED> 3 "general_operand" "UplDnm") | |
1357 | (ANY_EXTEND:SVE_HSDI | |
1358 | (unspec:SVE_PARTIAL_I | |
1359 | [(match_operand:<SVE_PARTIAL_I:VPRED> 2 "register_operand" "Upl") | |
1360 | (match_operand:SVE_PARTIAL_I 1 "aarch64_sve_ld<fn>f1_operand" "Ut<fn>") | |
1361 | (reg:VNx16BI FFRT_REGNUM)] | |
1362 | SVE_LDFF1_LDNF1))] | |
1363 | UNSPEC_PRED_X))] | |
1364 | "TARGET_SVE && (~<SVE_HSDI:narrower_mask> & <SVE_PARTIAL_I:self_mask>) == 0" | |
1365 | "ld<fn>f1<ANY_EXTEND:s><SVE_PARTIAL_I:Vesize>\t%0.<SVE_HSDI:Vctype>, %2/z, %1" | |
1366 | "&& !CONSTANT_P (operands[3])" | |
1367 | { | |
1368 | operands[3] = CONSTM1_RTX (<SVE_HSDI:VPRED>mode); | |
1369 | } | |
624d0f07 RS |
1370 | ) |
1371 | ||
1372 | ;; ------------------------------------------------------------------------- | |
1373 | ;; ---- Non-temporal contiguous loads | |
1374 | ;; ------------------------------------------------------------------------- | |
1375 | ;; Includes: | |
1376 | ;; - LDNT1B | |
1377 | ;; - LDNT1D | |
1378 | ;; - LDNT1H | |
1379 | ;; - LDNT1W | |
1380 | ;; ------------------------------------------------------------------------- | |
1381 | ||
1382 | ;; Predicated contiguous non-temporal load. | |
1383 | (define_insn "@aarch64_ldnt1<mode>" | |
f75cdd2c RS |
1384 | [(set (match_operand:SVE_FULL 0 "register_operand" "=w") |
1385 | (unspec:SVE_FULL | |
624d0f07 | 1386 | [(match_operand:<VPRED> 2 "register_operand" "Upl") |
f75cdd2c | 1387 | (match_operand:SVE_FULL 1 "memory_operand" "m")] |
624d0f07 RS |
1388 | UNSPEC_LDNT1_SVE))] |
1389 | "TARGET_SVE" | |
1390 | "ldnt1<Vesize>\t%0.<Vetype>, %2/z, %1" | |
1391 | ) | |
1392 | ||
915d28fe RS |
1393 | ;; ------------------------------------------------------------------------- |
1394 | ;; ---- Normal gather loads | |
1395 | ;; ------------------------------------------------------------------------- | |
1396 | ;; Includes gather forms of: | |
1397 | ;; - LD1D | |
1398 | ;; - LD1W | |
1399 | ;; ------------------------------------------------------------------------- | |
1400 | ||
1401 | ;; Unpredicated gather loads. | |
f8186eea RS |
1402 | (define_expand "gather_load<mode><v_int_container>" |
1403 | [(set (match_operand:SVE_24 0 "register_operand") | |
1404 | (unspec:SVE_24 | |
915d28fe | 1405 | [(match_dup 5) |
624d0f07 | 1406 | (match_operand:DI 1 "aarch64_sve_gather_offset_<Vesize>") |
f8186eea | 1407 | (match_operand:<V_INT_CONTAINER> 2 "register_operand") |
915d28fe RS |
1408 | (match_operand:DI 3 "const_int_operand") |
1409 | (match_operand:DI 4 "aarch64_gather_scale_operand_<Vesize>") | |
1410 | (mem:BLK (scratch))] | |
1411 | UNSPEC_LD1_GATHER))] | |
1412 | "TARGET_SVE" | |
43cacb12 | 1413 | { |
915d28fe | 1414 | operands[5] = aarch64_ptrue_reg (<VPRED>mode); |
43cacb12 | 1415 | } |
43cacb12 RS |
1416 | ) |
1417 | ||
915d28fe RS |
1418 | ;; Predicated gather loads for 32-bit elements. Operand 3 is true for |
1419 | ;; unsigned extension and false for signed extension. | |
f8186eea RS |
1420 | (define_insn "mask_gather_load<mode><v_int_container>" |
1421 | [(set (match_operand:SVE_4 0 "register_operand" "=w, w, w, w, w, w") | |
1422 | (unspec:SVE_4 | |
624d0f07 | 1423 | [(match_operand:VNx4BI 5 "register_operand" "Upl, Upl, Upl, Upl, Upl, Upl") |
f8186eea | 1424 | (match_operand:DI 1 "aarch64_sve_gather_offset_<Vesize>" "Z, vgw, rk, rk, rk, rk") |
624d0f07 RS |
1425 | (match_operand:VNx4SI 2 "register_operand" "w, w, w, w, w, w") |
1426 | (match_operand:DI 3 "const_int_operand" "Ui1, Ui1, Z, Ui1, Z, Ui1") | |
f8186eea | 1427 | (match_operand:DI 4 "aarch64_gather_scale_operand_<Vesize>" "Ui1, Ui1, Ui1, Ui1, i, i") |
915d28fe RS |
1428 | (mem:BLK (scratch))] |
1429 | UNSPEC_LD1_GATHER))] | |
1430 | "TARGET_SVE" | |
1431 | "@ | |
f8186eea RS |
1432 | ld1<Vesize>\t%0.s, %5/z, [%2.s] |
1433 | ld1<Vesize>\t%0.s, %5/z, [%2.s, #%1] | |
1434 | ld1<Vesize>\t%0.s, %5/z, [%1, %2.s, sxtw] | |
1435 | ld1<Vesize>\t%0.s, %5/z, [%1, %2.s, uxtw] | |
1436 | ld1<Vesize>\t%0.s, %5/z, [%1, %2.s, sxtw %p4] | |
1437 | ld1<Vesize>\t%0.s, %5/z, [%1, %2.s, uxtw %p4]" | |
915d28fe RS |
1438 | ) |
1439 | ||
1440 | ;; Predicated gather loads for 64-bit elements. The value of operand 3 | |
1441 | ;; doesn't matter in this case. | |
f8186eea RS |
1442 | (define_insn "mask_gather_load<mode><v_int_container>" |
1443 | [(set (match_operand:SVE_2 0 "register_operand" "=w, w, w, w") | |
1444 | (unspec:SVE_2 | |
624d0f07 | 1445 | [(match_operand:VNx2BI 5 "register_operand" "Upl, Upl, Upl, Upl") |
f8186eea | 1446 | (match_operand:DI 1 "aarch64_sve_gather_offset_<Vesize>" "Z, vgd, rk, rk") |
624d0f07 | 1447 | (match_operand:VNx2DI 2 "register_operand" "w, w, w, w") |
915d28fe | 1448 | (match_operand:DI 3 "const_int_operand") |
f8186eea | 1449 | (match_operand:DI 4 "aarch64_gather_scale_operand_<Vesize>" "Ui1, Ui1, Ui1, i") |
915d28fe RS |
1450 | (mem:BLK (scratch))] |
1451 | UNSPEC_LD1_GATHER))] | |
1452 | "TARGET_SVE" | |
1453 | "@ | |
f8186eea RS |
1454 | ld1<Vesize>\t%0.d, %5/z, [%2.d] |
1455 | ld1<Vesize>\t%0.d, %5/z, [%2.d, #%1] | |
1456 | ld1<Vesize>\t%0.d, %5/z, [%1, %2.d] | |
1457 | ld1<Vesize>\t%0.d, %5/z, [%1, %2.d, lsl %p4]" | |
915d28fe RS |
1458 | ) |
1459 | ||
f8186eea RS |
1460 | ;; Likewise, but with the offset being extended from 32 bits. |
1461 | (define_insn_and_rewrite "*mask_gather_load<mode><v_int_container>_<su>xtw_unpacked" | |
1462 | [(set (match_operand:SVE_2 0 "register_operand" "=w, w") | |
1463 | (unspec:SVE_2 | |
624d0f07 RS |
1464 | [(match_operand:VNx2BI 5 "register_operand" "Upl, Upl") |
1465 | (match_operand:DI 1 "register_operand" "rk, rk") | |
1466 | (unspec:VNx2DI | |
f8186eea RS |
1467 | [(match_operand 6) |
1468 | (ANY_EXTEND:VNx2DI | |
1469 | (match_operand:VNx2SI 2 "register_operand" "w, w"))] | |
1470 | UNSPEC_PRED_X) | |
1471 | (match_operand:DI 3 "const_int_operand") | |
1472 | (match_operand:DI 4 "aarch64_gather_scale_operand_<Vesize>" "Ui1, i") | |
1473 | (mem:BLK (scratch))] | |
1474 | UNSPEC_LD1_GATHER))] | |
1475 | "TARGET_SVE" | |
1476 | "@ | |
1477 | ld1<Vesize>\t%0.d, %5/z, [%1, %2.d, <su>xtw] | |
1478 | ld1<Vesize>\t%0.d, %5/z, [%1, %2.d, <su>xtw %p4]" | |
1479 | "&& !CONSTANT_P (operands[6])" | |
1480 | { | |
1481 | operands[6] = CONSTM1_RTX (VNx2BImode); | |
1482 | } | |
1483 | ) | |
1484 | ||
1485 | ;; Likewise, but with the offset being truncated to 32 bits and then | |
1486 | ;; sign-extended. | |
1487 | (define_insn_and_rewrite "*mask_gather_load<mode><v_int_container>_sxtw" | |
1488 | [(set (match_operand:SVE_2 0 "register_operand" "=w, w") | |
1489 | (unspec:SVE_2 | |
1490 | [(match_operand:VNx2BI 5 "register_operand" "Upl, Upl") | |
1491 | (match_operand:DI 1 "register_operand" "rk, rk") | |
1492 | (unspec:VNx2DI | |
1493 | [(match_operand 6) | |
624d0f07 RS |
1494 | (sign_extend:VNx2DI |
1495 | (truncate:VNx2SI | |
1496 | (match_operand:VNx2DI 2 "register_operand" "w, w")))] | |
1497 | UNSPEC_PRED_X) | |
1498 | (match_operand:DI 3 "const_int_operand") | |
f8186eea | 1499 | (match_operand:DI 4 "aarch64_gather_scale_operand_<Vesize>" "Ui1, i") |
624d0f07 RS |
1500 | (mem:BLK (scratch))] |
1501 | UNSPEC_LD1_GATHER))] | |
1502 | "TARGET_SVE" | |
1503 | "@ | |
f8186eea RS |
1504 | ld1<Vesize>\t%0.d, %5/z, [%1, %2.d, sxtw] |
1505 | ld1<Vesize>\t%0.d, %5/z, [%1, %2.d, sxtw %p4]" | |
1506 | "&& !CONSTANT_P (operands[6])" | |
1507 | { | |
1508 | operands[6] = CONSTM1_RTX (VNx2BImode); | |
1509 | } | |
624d0f07 RS |
1510 | ) |
1511 | ||
f8186eea RS |
1512 | ;; Likewise, but with the offset being truncated to 32 bits and then |
1513 | ;; zero-extended. | |
1514 | (define_insn "*mask_gather_load<mode><v_int_container>_uxtw" | |
1515 | [(set (match_operand:SVE_2 0 "register_operand" "=w, w") | |
1516 | (unspec:SVE_2 | |
624d0f07 RS |
1517 | [(match_operand:VNx2BI 5 "register_operand" "Upl, Upl") |
1518 | (match_operand:DI 1 "register_operand" "rk, rk") | |
1519 | (and:VNx2DI | |
1520 | (match_operand:VNx2DI 2 "register_operand" "w, w") | |
1521 | (match_operand:VNx2DI 6 "aarch64_sve_uxtw_immediate")) | |
1522 | (match_operand:DI 3 "const_int_operand") | |
f8186eea | 1523 | (match_operand:DI 4 "aarch64_gather_scale_operand_<Vesize>" "Ui1, i") |
624d0f07 RS |
1524 | (mem:BLK (scratch))] |
1525 | UNSPEC_LD1_GATHER))] | |
1526 | "TARGET_SVE" | |
1527 | "@ | |
f8186eea RS |
1528 | ld1<Vesize>\t%0.d, %5/z, [%1, %2.d, uxtw] |
1529 | ld1<Vesize>\t%0.d, %5/z, [%1, %2.d, uxtw %p4]" | |
624d0f07 | 1530 | ) |
915d28fe RS |
1531 | |
1532 | ;; ------------------------------------------------------------------------- | |
624d0f07 | 1533 | ;; ---- Extending gather loads |
915d28fe | 1534 | ;; ------------------------------------------------------------------------- |
624d0f07 RS |
1535 | ;; Includes gather forms of: |
1536 | ;; - LD1B | |
1537 | ;; - LD1H | |
1538 | ;; - LD1SB | |
1539 | ;; - LD1SH | |
1540 | ;; - LD1SW | |
1541 | ;; - LD1W | |
915d28fe RS |
1542 | ;; ------------------------------------------------------------------------- |
1543 | ||
624d0f07 RS |
1544 | ;; Predicated extending gather loads for 32-bit elements. Operand 3 is |
1545 | ;; true for unsigned extension and false for signed extension. | |
87a80d27 RS |
1546 | (define_insn_and_rewrite "@aarch64_gather_load_<ANY_EXTEND:optab><SVE_4HSI:mode><SVE_4BHI:mode>" |
1547 | [(set (match_operand:SVE_4HSI 0 "register_operand" "=w, w, w, w, w, w") | |
1548 | (unspec:SVE_4HSI | |
1549 | [(match_operand:VNx4BI 6 "general_operand" "UplDnm, UplDnm, UplDnm, UplDnm, UplDnm, UplDnm") | |
1550 | (ANY_EXTEND:SVE_4HSI | |
1551 | (unspec:SVE_4BHI | |
1552 | [(match_operand:VNx4BI 5 "register_operand" "Upl, Upl, Upl, Upl, Upl, Upl") | |
1553 | (match_operand:DI 1 "aarch64_sve_gather_offset_<SVE_4BHI:Vesize>" "Z, vg<SVE_4BHI:Vesize>, rk, rk, rk, rk") | |
1554 | (match_operand:VNx4SI 2 "register_operand" "w, w, w, w, w, w") | |
1555 | (match_operand:DI 3 "const_int_operand" "Ui1, Ui1, Z, Ui1, Z, Ui1") | |
1556 | (match_operand:DI 4 "aarch64_gather_scale_operand_<SVE_4BHI:Vesize>" "Ui1, Ui1, Ui1, Ui1, i, i") | |
1557 | (mem:BLK (scratch))] | |
1558 | UNSPEC_LD1_GATHER))] | |
1559 | UNSPEC_PRED_X))] | |
1560 | "TARGET_SVE && (~<SVE_4HSI:narrower_mask> & <SVE_4BHI:self_mask>) == 0" | |
624d0f07 | 1561 | "@ |
87a80d27 RS |
1562 | ld1<ANY_EXTEND:s><SVE_4BHI:Vesize>\t%0.s, %5/z, [%2.s] |
1563 | ld1<ANY_EXTEND:s><SVE_4BHI:Vesize>\t%0.s, %5/z, [%2.s, #%1] | |
1564 | ld1<ANY_EXTEND:s><SVE_4BHI:Vesize>\t%0.s, %5/z, [%1, %2.s, sxtw] | |
1565 | ld1<ANY_EXTEND:s><SVE_4BHI:Vesize>\t%0.s, %5/z, [%1, %2.s, uxtw] | |
1566 | ld1<ANY_EXTEND:s><SVE_4BHI:Vesize>\t%0.s, %5/z, [%1, %2.s, sxtw %p4] | |
1567 | ld1<ANY_EXTEND:s><SVE_4BHI:Vesize>\t%0.s, %5/z, [%1, %2.s, uxtw %p4]" | |
1568 | "&& !CONSTANT_P (operands[6])" | |
1569 | { | |
1570 | operands[6] = CONSTM1_RTX (VNx4BImode); | |
1571 | } | |
624d0f07 RS |
1572 | ) |
1573 | ||
1574 | ;; Predicated extending gather loads for 64-bit elements. The value of | |
1575 | ;; operand 3 doesn't matter in this case. | |
87a80d27 RS |
1576 | (define_insn_and_rewrite "@aarch64_gather_load_<ANY_EXTEND:optab><SVE_2HSDI:mode><SVE_2BHSI:mode>" |
1577 | [(set (match_operand:SVE_2HSDI 0 "register_operand" "=w, w, w, w") | |
1578 | (unspec:SVE_2HSDI | |
1579 | [(match_operand:VNx2BI 6 "general_operand" "UplDnm, UplDnm, UplDnm, UplDnm") | |
1580 | (ANY_EXTEND:SVE_2HSDI | |
1581 | (unspec:SVE_2BHSI | |
1582 | [(match_operand:VNx2BI 5 "register_operand" "Upl, Upl, Upl, Upl") | |
1583 | (match_operand:DI 1 "aarch64_sve_gather_offset_<SVE_2BHSI:Vesize>" "Z, vg<SVE_2BHSI:Vesize>, rk, rk") | |
1584 | (match_operand:VNx2DI 2 "register_operand" "w, w, w, w") | |
1585 | (match_operand:DI 3 "const_int_operand") | |
1586 | (match_operand:DI 4 "aarch64_gather_scale_operand_<SVE_2BHSI:Vesize>" "Ui1, Ui1, Ui1, i") | |
1587 | (mem:BLK (scratch))] | |
1588 | UNSPEC_LD1_GATHER))] | |
1589 | UNSPEC_PRED_X))] | |
1590 | "TARGET_SVE && (~<SVE_2HSDI:narrower_mask> & <SVE_2BHSI:self_mask>) == 0" | |
624d0f07 | 1591 | "@ |
87a80d27 RS |
1592 | ld1<ANY_EXTEND:s><SVE_2BHSI:Vesize>\t%0.d, %5/z, [%2.d] |
1593 | ld1<ANY_EXTEND:s><SVE_2BHSI:Vesize>\t%0.d, %5/z, [%2.d, #%1] | |
1594 | ld1<ANY_EXTEND:s><SVE_2BHSI:Vesize>\t%0.d, %5/z, [%1, %2.d] | |
1595 | ld1<ANY_EXTEND:s><SVE_2BHSI:Vesize>\t%0.d, %5/z, [%1, %2.d, lsl %p4]" | |
1596 | "&& !CONSTANT_P (operands[6])" | |
1597 | { | |
1598 | operands[6] = CONSTM1_RTX (VNx2BImode); | |
1599 | } | |
624d0f07 RS |
1600 | ) |
1601 | ||
87a80d27 RS |
1602 | ;; Likewise, but with the offset being extended from 32 bits. |
1603 | (define_insn_and_rewrite "*aarch64_gather_load_<ANY_EXTEND:optab><SVE_2HSDI:mode><SVE_2BHSI:mode>_<ANY_EXTEND2:su>xtw_unpacked" | |
1604 | [(set (match_operand:SVE_2HSDI 0 "register_operand" "=w, w") | |
1605 | (unspec:SVE_2HSDI | |
1606 | [(match_operand 6) | |
1607 | (ANY_EXTEND:SVE_2HSDI | |
1608 | (unspec:SVE_2BHSI | |
1609 | [(match_operand:VNx2BI 5 "register_operand" "Upl, Upl") | |
1610 | (match_operand:DI 1 "aarch64_reg_or_zero" "rk, rk") | |
1611 | (unspec:VNx2DI | |
1612 | [(match_operand 7) | |
1613 | (ANY_EXTEND2:VNx2DI | |
1614 | (match_operand:VNx2SI 2 "register_operand" "w, w"))] | |
1615 | UNSPEC_PRED_X) | |
1616 | (match_operand:DI 3 "const_int_operand") | |
1617 | (match_operand:DI 4 "aarch64_gather_scale_operand_<SVE_2BHSI:Vesize>" "Ui1, i") | |
1618 | (mem:BLK (scratch))] | |
1619 | UNSPEC_LD1_GATHER))] | |
1620 | UNSPEC_PRED_X))] | |
1621 | "TARGET_SVE && (~<SVE_2HSDI:narrower_mask> & <SVE_2BHSI:self_mask>) == 0" | |
624d0f07 | 1622 | "@ |
87a80d27 RS |
1623 | ld1<ANY_EXTEND:s><SVE_2BHSI:Vesize>\t%0.d, %5/z, [%1, %2.d, <ANY_EXTEND2:su>xtw] |
1624 | ld1<ANY_EXTEND:s><SVE_2BHSI:Vesize>\t%0.d, %5/z, [%1, %2.d, <ANY_EXTEND2:su>xtw %p4]" | |
1625 | "&& (!CONSTANT_P (operands[6]) || !CONSTANT_P (operands[7]))" | |
43cacb12 | 1626 | { |
87a80d27 RS |
1627 | operands[6] = CONSTM1_RTX (VNx2BImode); |
1628 | operands[7] = CONSTM1_RTX (VNx2BImode); | |
43cacb12 RS |
1629 | } |
1630 | ) | |
1631 | ||
87a80d27 RS |
1632 | ;; Likewise, but with the offset being truncated to 32 bits and then |
1633 | ;; sign-extended. | |
1634 | (define_insn_and_rewrite "*aarch64_gather_load_<ANY_EXTEND:optab><SVE_2HSDI:mode><SVE_2BHSI:mode>_sxtw" | |
1635 | [(set (match_operand:SVE_2HSDI 0 "register_operand" "=w, w") | |
1636 | (unspec:SVE_2HSDI | |
1637 | [(match_operand 6) | |
1638 | (ANY_EXTEND:SVE_2HSDI | |
1639 | (unspec:SVE_2BHSI | |
1640 | [(match_operand:VNx2BI 5 "register_operand" "Upl, Upl") | |
1641 | (match_operand:DI 1 "aarch64_reg_or_zero" "rk, rk") | |
1642 | (unspec:VNx2DI | |
1643 | [(match_operand 7) | |
1644 | (sign_extend:VNx2DI | |
1645 | (truncate:VNx2SI | |
1646 | (match_operand:VNx2DI 2 "register_operand" "w, w")))] | |
1647 | UNSPEC_PRED_X) | |
1648 | (match_operand:DI 3 "const_int_operand") | |
1649 | (match_operand:DI 4 "aarch64_gather_scale_operand_<SVE_2BHSI:Vesize>" "Ui1, i") | |
1650 | (mem:BLK (scratch))] | |
1651 | UNSPEC_LD1_GATHER))] | |
1652 | UNSPEC_PRED_X))] | |
1653 | "TARGET_SVE && (~<SVE_2HSDI:narrower_mask> & <SVE_2BHSI:self_mask>) == 0" | |
1654 | "@ | |
1655 | ld1<ANY_EXTEND:s><SVE_2BHSI:Vesize>\t%0.d, %5/z, [%1, %2.d, sxtw] | |
1656 | ld1<ANY_EXTEND:s><SVE_2BHSI:Vesize>\t%0.d, %5/z, [%1, %2.d, sxtw %p4]" | |
1657 | "&& (!CONSTANT_P (operands[6]) || !CONSTANT_P (operands[7]))" | |
1658 | { | |
1659 | operands[6] = CONSTM1_RTX (VNx2BImode); | |
1660 | operands[7] = CONSTM1_RTX (VNx2BImode); | |
1661 | } | |
1662 | ) | |
1663 | ||
1664 | ;; Likewise, but with the offset being truncated to 32 bits and then | |
1665 | ;; zero-extended. | |
1666 | (define_insn_and_rewrite "*aarch64_gather_load_<ANY_EXTEND:optab><SVE_2HSDI:mode><SVE_2BHSI:mode>_uxtw" | |
1667 | [(set (match_operand:SVE_2HSDI 0 "register_operand" "=w, w") | |
1668 | (unspec:SVE_2HSDI | |
1669 | [(match_operand 7) | |
1670 | (ANY_EXTEND:SVE_2HSDI | |
1671 | (unspec:SVE_2BHSI | |
1672 | [(match_operand:VNx2BI 5 "register_operand" "Upl, Upl") | |
1673 | (match_operand:DI 1 "aarch64_reg_or_zero" "rk, rk") | |
1674 | (and:VNx2DI | |
1675 | (match_operand:VNx2DI 2 "register_operand" "w, w") | |
1676 | (match_operand:VNx2DI 6 "aarch64_sve_uxtw_immediate")) | |
1677 | (match_operand:DI 3 "const_int_operand") | |
1678 | (match_operand:DI 4 "aarch64_gather_scale_operand_<SVE_2BHSI:Vesize>" "Ui1, i") | |
1679 | (mem:BLK (scratch))] | |
1680 | UNSPEC_LD1_GATHER))] | |
1681 | UNSPEC_PRED_X))] | |
1682 | "TARGET_SVE && (~<SVE_2HSDI:narrower_mask> & <SVE_2BHSI:self_mask>) == 0" | |
624d0f07 | 1683 | "@ |
87a80d27 RS |
1684 | ld1<ANY_EXTEND:s><SVE_2BHSI:Vesize>\t%0.d, %5/z, [%1, %2.d, uxtw] |
1685 | ld1<ANY_EXTEND:s><SVE_2BHSI:Vesize>\t%0.d, %5/z, [%1, %2.d, uxtw %p4]" | |
1686 | "&& !CONSTANT_P (operands[7])" | |
1687 | { | |
1688 | operands[7] = CONSTM1_RTX (VNx2BImode); | |
1689 | } | |
915d28fe RS |
1690 | ) |
1691 | ||
1692 | ;; ------------------------------------------------------------------------- | |
624d0f07 | 1693 | ;; ---- First-faulting gather loads |
915d28fe | 1694 | ;; ------------------------------------------------------------------------- |
624d0f07 RS |
1695 | ;; Includes gather forms of: |
1696 | ;; - LDFF1D | |
1697 | ;; - LDFF1W | |
915d28fe RS |
1698 | ;; ------------------------------------------------------------------------- |
1699 | ||
624d0f07 RS |
1700 | ;; Predicated first-faulting gather loads for 32-bit elements. Operand |
1701 | ;; 3 is true for unsigned extension and false for signed extension. | |
1702 | (define_insn "@aarch64_ldff1_gather<mode>" | |
f75cdd2c RS |
1703 | [(set (match_operand:SVE_FULL_S 0 "register_operand" "=w, w, w, w, w, w") |
1704 | (unspec:SVE_FULL_S | |
624d0f07 RS |
1705 | [(match_operand:VNx4BI 5 "register_operand" "Upl, Upl, Upl, Upl, Upl, Upl") |
1706 | (match_operand:DI 1 "aarch64_sve_gather_offset_w" "Z, vgw, rk, rk, rk, rk") | |
1707 | (match_operand:VNx4SI 2 "register_operand" "w, w, w, w, w, w") | |
1708 | (match_operand:DI 3 "const_int_operand" "i, i, Z, Ui1, Z, Ui1") | |
1709 | (match_operand:DI 4 "aarch64_gather_scale_operand_w" "Ui1, Ui1, Ui1, Ui1, i, i") | |
1710 | (mem:BLK (scratch)) | |
1711 | (reg:VNx16BI FFRT_REGNUM)] | |
1712 | UNSPEC_LDFF1_GATHER))] | |
1713 | "TARGET_SVE" | |
1714 | "@ | |
1715 | ldff1w\t%0.s, %5/z, [%2.s] | |
1716 | ldff1w\t%0.s, %5/z, [%2.s, #%1] | |
1717 | ldff1w\t%0.s, %5/z, [%1, %2.s, sxtw] | |
1718 | ldff1w\t%0.s, %5/z, [%1, %2.s, uxtw] | |
1719 | ldff1w\t%0.s, %5/z, [%1, %2.s, sxtw %p4] | |
1720 | ldff1w\t%0.s, %5/z, [%1, %2.s, uxtw %p4]" | |
1721 | ) | |
1722 | ||
1723 | ;; Predicated first-faulting gather loads for 64-bit elements. The value | |
1724 | ;; of operand 3 doesn't matter in this case. | |
1725 | (define_insn "@aarch64_ldff1_gather<mode>" | |
f75cdd2c RS |
1726 | [(set (match_operand:SVE_FULL_D 0 "register_operand" "=w, w, w, w") |
1727 | (unspec:SVE_FULL_D | |
624d0f07 RS |
1728 | [(match_operand:VNx2BI 5 "register_operand" "Upl, Upl, Upl, Upl") |
1729 | (match_operand:DI 1 "aarch64_sve_gather_offset_d" "Z, vgd, rk, rk") | |
1730 | (match_operand:VNx2DI 2 "register_operand" "w, w, w, w") | |
1731 | (match_operand:DI 3 "const_int_operand") | |
1732 | (match_operand:DI 4 "aarch64_gather_scale_operand_d" "Ui1, Ui1, Ui1, i") | |
1733 | (mem:BLK (scratch)) | |
1734 | (reg:VNx16BI FFRT_REGNUM)] | |
1735 | UNSPEC_LDFF1_GATHER))] | |
1736 | "TARGET_SVE" | |
1737 | "@ | |
1738 | ldff1d\t%0.d, %5/z, [%2.d] | |
1739 | ldff1d\t%0.d, %5/z, [%2.d, #%1] | |
1740 | ldff1d\t%0.d, %5/z, [%1, %2.d] | |
1741 | ldff1d\t%0.d, %5/z, [%1, %2.d, lsl %p4]" | |
1742 | ) | |
1743 | ||
1744 | ;; Likewise, but with the offset being sign-extended from 32 bits. | |
1745 | (define_insn_and_rewrite "*aarch64_ldff1_gather<mode>_sxtw" | |
f75cdd2c RS |
1746 | [(set (match_operand:SVE_FULL_D 0 "register_operand" "=w, w") |
1747 | (unspec:SVE_FULL_D | |
624d0f07 RS |
1748 | [(match_operand:VNx2BI 5 "register_operand" "Upl, Upl") |
1749 | (match_operand:DI 1 "register_operand" "rk, rk") | |
1750 | (unspec:VNx2DI | |
1751 | [(match_operand 6) | |
1752 | (sign_extend:VNx2DI | |
1753 | (truncate:VNx2SI | |
1754 | (match_operand:VNx2DI 2 "register_operand" "w, w")))] | |
1755 | UNSPEC_PRED_X) | |
1756 | (match_operand:DI 3 "const_int_operand") | |
1757 | (match_operand:DI 4 "aarch64_gather_scale_operand_d" "Ui1, i") | |
1758 | (mem:BLK (scratch)) | |
1759 | (reg:VNx16BI FFRT_REGNUM)] | |
1760 | UNSPEC_LDFF1_GATHER))] | |
1761 | "TARGET_SVE" | |
1762 | "@ | |
1763 | ldff1d\t%0.d, %5/z, [%1, %2.d, sxtw] | |
1764 | ldff1d\t%0.d, %5/z, [%1, %2.d, sxtw %p4]" | |
87a80d27 | 1765 | "&& !CONSTANT_P (operands[6])" |
624d0f07 | 1766 | { |
87a80d27 | 1767 | operands[6] = CONSTM1_RTX (VNx2BImode); |
624d0f07 RS |
1768 | } |
1769 | ) | |
1770 | ||
1771 | ;; Likewise, but with the offset being zero-extended from 32 bits. | |
1772 | (define_insn "*aarch64_ldff1_gather<mode>_uxtw" | |
f75cdd2c RS |
1773 | [(set (match_operand:SVE_FULL_D 0 "register_operand" "=w, w") |
1774 | (unspec:SVE_FULL_D | |
624d0f07 RS |
1775 | [(match_operand:VNx2BI 5 "register_operand" "Upl, Upl") |
1776 | (match_operand:DI 1 "register_operand" "rk, rk") | |
1777 | (and:VNx2DI | |
1778 | (match_operand:VNx2DI 2 "register_operand" "w, w") | |
1779 | (match_operand:VNx2DI 6 "aarch64_sve_uxtw_immediate")) | |
1780 | (match_operand:DI 3 "const_int_operand") | |
1781 | (match_operand:DI 4 "aarch64_gather_scale_operand_d" "Ui1, i") | |
1782 | (mem:BLK (scratch)) | |
1783 | (reg:VNx16BI FFRT_REGNUM)] | |
1784 | UNSPEC_LDFF1_GATHER))] | |
1785 | "TARGET_SVE" | |
1786 | "@ | |
1787 | ldff1d\t%0.d, %5/z, [%1, %2.d, uxtw] | |
1788 | ldff1d\t%0.d, %5/z, [%1, %2.d, uxtw %p4]" | |
1789 | ) | |
1790 | ||
1791 | ;; ------------------------------------------------------------------------- | |
1792 | ;; ---- First-faulting extending gather loads | |
1793 | ;; ------------------------------------------------------------------------- | |
1794 | ;; Includes gather forms of: | |
1795 | ;; - LDFF1B | |
1796 | ;; - LDFF1H | |
1797 | ;; - LDFF1SB | |
1798 | ;; - LDFF1SH | |
1799 | ;; - LDFF1SW | |
1800 | ;; - LDFF1W | |
1801 | ;; ------------------------------------------------------------------------- | |
1802 | ||
1803 | ;; Predicated extending first-faulting gather loads for 32-bit elements. | |
1804 | ;; Operand 3 is true for unsigned extension and false for signed extension. | |
87a80d27 | 1805 | (define_insn_and_rewrite "@aarch64_ldff1_gather_<ANY_EXTEND:optab><VNx4_WIDE:mode><VNx4_NARROW:mode>" |
624d0f07 | 1806 | [(set (match_operand:VNx4_WIDE 0 "register_operand" "=w, w, w, w, w, w") |
87a80d27 RS |
1807 | (unspec:VNx4_WIDE |
1808 | [(match_operand:VNx4BI 6 "general_operand" "UplDnm, UplDnm, UplDnm, UplDnm, UplDnm, UplDnm") | |
1809 | (ANY_EXTEND:VNx4_WIDE | |
1810 | (unspec:VNx4_NARROW | |
1811 | [(match_operand:VNx4BI 5 "register_operand" "Upl, Upl, Upl, Upl, Upl, Upl") | |
1812 | (match_operand:DI 1 "aarch64_sve_gather_offset_<VNx4_NARROW:Vesize>" "Z, vg<VNx4_NARROW:Vesize>, rk, rk, rk, rk") | |
1813 | (match_operand:VNx4_WIDE 2 "register_operand" "w, w, w, w, w, w") | |
1814 | (match_operand:DI 3 "const_int_operand" "i, i, Z, Ui1, Z, Ui1") | |
1815 | (match_operand:DI 4 "aarch64_gather_scale_operand_<VNx4_NARROW:Vesize>" "Ui1, Ui1, Ui1, Ui1, i, i") | |
1816 | (mem:BLK (scratch)) | |
1817 | (reg:VNx16BI FFRT_REGNUM)] | |
1818 | UNSPEC_LDFF1_GATHER))] | |
1819 | UNSPEC_PRED_X))] | |
624d0f07 RS |
1820 | "TARGET_SVE" |
1821 | "@ | |
1822 | ldff1<ANY_EXTEND:s><VNx4_NARROW:Vesize>\t%0.s, %5/z, [%2.s] | |
1823 | ldff1<ANY_EXTEND:s><VNx4_NARROW:Vesize>\t%0.s, %5/z, [%2.s, #%1] | |
1824 | ldff1<ANY_EXTEND:s><VNx4_NARROW:Vesize>\t%0.s, %5/z, [%1, %2.s, sxtw] | |
1825 | ldff1<ANY_EXTEND:s><VNx4_NARROW:Vesize>\t%0.s, %5/z, [%1, %2.s, uxtw] | |
1826 | ldff1<ANY_EXTEND:s><VNx4_NARROW:Vesize>\t%0.s, %5/z, [%1, %2.s, sxtw %p4] | |
1827 | ldff1<ANY_EXTEND:s><VNx4_NARROW:Vesize>\t%0.s, %5/z, [%1, %2.s, uxtw %p4]" | |
87a80d27 RS |
1828 | "&& !CONSTANT_P (operands[6])" |
1829 | { | |
1830 | operands[6] = CONSTM1_RTX (VNx4BImode); | |
1831 | } | |
624d0f07 RS |
1832 | ) |
1833 | ||
1834 | ;; Predicated extending first-faulting gather loads for 64-bit elements. | |
1835 | ;; The value of operand 3 doesn't matter in this case. | |
87a80d27 | 1836 | (define_insn_and_rewrite "@aarch64_ldff1_gather_<ANY_EXTEND:optab><VNx2_WIDE:mode><VNx2_NARROW:mode>" |
624d0f07 | 1837 | [(set (match_operand:VNx2_WIDE 0 "register_operand" "=w, w, w, w") |
87a80d27 RS |
1838 | (unspec:VNx2_WIDE |
1839 | [(match_operand:VNx2BI 6 "general_operand" "UplDnm, UplDnm, UplDnm, UplDnm") | |
1840 | (ANY_EXTEND:VNx2_WIDE | |
1841 | (unspec:VNx2_NARROW | |
1842 | [(match_operand:VNx2BI 5 "register_operand" "Upl, Upl, Upl, Upl") | |
1843 | (match_operand:DI 1 "aarch64_sve_gather_offset_<VNx2_NARROW:Vesize>" "Z, vg<VNx2_NARROW:Vesize>, rk, rk") | |
1844 | (match_operand:VNx2_WIDE 2 "register_operand" "w, w, w, w") | |
1845 | (match_operand:DI 3 "const_int_operand") | |
1846 | (match_operand:DI 4 "aarch64_gather_scale_operand_<VNx2_NARROW:Vesize>" "Ui1, Ui1, Ui1, i") | |
1847 | (mem:BLK (scratch)) | |
1848 | (reg:VNx16BI FFRT_REGNUM)] | |
1849 | UNSPEC_LDFF1_GATHER))] | |
1850 | UNSPEC_PRED_X))] | |
624d0f07 RS |
1851 | "TARGET_SVE" |
1852 | "@ | |
1853 | ldff1<ANY_EXTEND:s><VNx2_NARROW:Vesize>\t%0.d, %5/z, [%2.d] | |
1854 | ldff1<ANY_EXTEND:s><VNx2_NARROW:Vesize>\t%0.d, %5/z, [%2.d, #%1] | |
1855 | ldff1<ANY_EXTEND:s><VNx2_NARROW:Vesize>\t%0.d, %5/z, [%1, %2.d] | |
1856 | ldff1<ANY_EXTEND:s><VNx2_NARROW:Vesize>\t%0.d, %5/z, [%1, %2.d, lsl %p4]" | |
87a80d27 RS |
1857 | "&& !CONSTANT_P (operands[6])" |
1858 | { | |
1859 | operands[6] = CONSTM1_RTX (VNx2BImode); | |
1860 | } | |
624d0f07 RS |
1861 | ) |
1862 | ||
1863 | ;; Likewise, but with the offset being sign-extended from 32 bits. | |
1864 | (define_insn_and_rewrite "*aarch64_ldff1_gather_<ANY_EXTEND:optab><VNx2_WIDE:mode><VNx2_NARROW:mode>_sxtw" | |
1865 | [(set (match_operand:VNx2_WIDE 0 "register_operand" "=w, w") | |
87a80d27 RS |
1866 | (unspec:VNx2_WIDE |
1867 | [(match_operand 6) | |
1868 | (ANY_EXTEND:VNx2_WIDE | |
1869 | (unspec:VNx2_NARROW | |
1870 | [(match_operand:VNx2BI 5 "register_operand" "Upl, Upl") | |
1871 | (match_operand:DI 1 "aarch64_reg_or_zero" "rk, rk") | |
1872 | (unspec:VNx2DI | |
1873 | [(match_operand 7) | |
1874 | (sign_extend:VNx2DI | |
1875 | (truncate:VNx2SI | |
1876 | (match_operand:VNx2DI 2 "register_operand" "w, w")))] | |
1877 | UNSPEC_PRED_X) | |
1878 | (match_operand:DI 3 "const_int_operand") | |
1879 | (match_operand:DI 4 "aarch64_gather_scale_operand_<VNx2_NARROW:Vesize>" "Ui1, i") | |
1880 | (mem:BLK (scratch)) | |
1881 | (reg:VNx16BI FFRT_REGNUM)] | |
1882 | UNSPEC_LDFF1_GATHER))] | |
1883 | UNSPEC_PRED_X))] | |
624d0f07 RS |
1884 | "TARGET_SVE" |
1885 | "@ | |
1886 | ldff1<ANY_EXTEND:s><VNx2_NARROW:Vesize>\t%0.d, %5/z, [%1, %2.d, sxtw] | |
1887 | ldff1<ANY_EXTEND:s><VNx2_NARROW:Vesize>\t%0.d, %5/z, [%1, %2.d, sxtw %p4]" | |
87a80d27 | 1888 | "&& (!CONSTANT_P (operands[6]) || !CONSTANT_P (operands[7]))" |
624d0f07 | 1889 | { |
87a80d27 RS |
1890 | operands[6] = CONSTM1_RTX (VNx2BImode); |
1891 | operands[7] = CONSTM1_RTX (VNx2BImode); | |
624d0f07 RS |
1892 | } |
1893 | ) | |
1894 | ||
1895 | ;; Likewise, but with the offset being zero-extended from 32 bits. | |
87a80d27 | 1896 | (define_insn_and_rewrite "*aarch64_ldff1_gather_<ANY_EXTEND:optab><VNx2_WIDE:mode><VNx2_NARROW:mode>_uxtw" |
624d0f07 | 1897 | [(set (match_operand:VNx2_WIDE 0 "register_operand" "=w, w") |
87a80d27 RS |
1898 | (unspec:VNx2_WIDE |
1899 | [(match_operand 7) | |
1900 | (ANY_EXTEND:VNx2_WIDE | |
1901 | (unspec:VNx2_NARROW | |
1902 | [(match_operand:VNx2BI 5 "register_operand" "Upl, Upl") | |
1903 | (match_operand:DI 1 "aarch64_reg_or_zero" "rk, rk") | |
1904 | (and:VNx2DI | |
1905 | (match_operand:VNx2DI 2 "register_operand" "w, w") | |
1906 | (match_operand:VNx2DI 6 "aarch64_sve_uxtw_immediate")) | |
1907 | (match_operand:DI 3 "const_int_operand") | |
1908 | (match_operand:DI 4 "aarch64_gather_scale_operand_<VNx2_NARROW:Vesize>" "Ui1, i") | |
1909 | (mem:BLK (scratch)) | |
1910 | (reg:VNx16BI FFRT_REGNUM)] | |
1911 | UNSPEC_LDFF1_GATHER))] | |
1912 | UNSPEC_PRED_X))] | |
624d0f07 RS |
1913 | "TARGET_SVE" |
1914 | "@ | |
1915 | ldff1<ANY_EXTEND:s><VNx2_NARROW:Vesize>\t%0.d, %5/z, [%1, %2.d, uxtw] | |
1916 | ldff1<ANY_EXTEND:s><VNx2_NARROW:Vesize>\t%0.d, %5/z, [%1, %2.d, uxtw %p4]" | |
87a80d27 RS |
1917 | "&& !CONSTANT_P (operands[7])" |
1918 | { | |
1919 | operands[7] = CONSTM1_RTX (VNx2BImode); | |
1920 | } | |
624d0f07 RS |
1921 | ) |
1922 | ||
1923 | ;; ========================================================================= | |
1924 | ;; == Prefetches | |
1925 | ;; ========================================================================= | |
1926 | ||
1927 | ;; ------------------------------------------------------------------------- | |
1928 | ;; ---- Contiguous prefetches | |
1929 | ;; ------------------------------------------------------------------------- | |
1930 | ;; Includes contiguous forms of: | |
1931 | ;; - PRFB | |
1932 | ;; - PRFD | |
1933 | ;; - PRFH | |
1934 | ;; - PRFW | |
1935 | ;; ------------------------------------------------------------------------- | |
1936 | ||
1937 | ;; Contiguous predicated prefetches. Operand 2 gives the real prefetch | |
1938 | ;; operation (as an svprfop), with operands 3 and 4 providing distilled | |
1939 | ;; information. | |
1940 | (define_insn "@aarch64_sve_prefetch<mode>" | |
1941 | [(prefetch (unspec:DI | |
1942 | [(match_operand:<VPRED> 0 "register_operand" "Upl") | |
f75cdd2c | 1943 | (match_operand:SVE_FULL_I 1 "aarch64_sve_prefetch_operand" "UP<Vesize>") |
624d0f07 RS |
1944 | (match_operand:DI 2 "const_int_operand")] |
1945 | UNSPEC_SVE_PREFETCH) | |
1946 | (match_operand:DI 3 "const_int_operand") | |
1947 | (match_operand:DI 4 "const_int_operand"))] | |
1948 | "TARGET_SVE" | |
1949 | { | |
1950 | operands[1] = gen_rtx_MEM (<MODE>mode, operands[1]); | |
1951 | return aarch64_output_sve_prefetch ("prf<Vesize>", operands[2], "%0, %1"); | |
1952 | } | |
1953 | ) | |
1954 | ||
1955 | ;; ------------------------------------------------------------------------- | |
1956 | ;; ---- Gather prefetches | |
1957 | ;; ------------------------------------------------------------------------- | |
1958 | ;; Includes gather forms of: | |
1959 | ;; - PRFB | |
1960 | ;; - PRFD | |
1961 | ;; - PRFH | |
1962 | ;; - PRFW | |
1963 | ;; ------------------------------------------------------------------------- | |
1964 | ||
1965 | ;; Predicated gather prefetches for 32-bit bases and offsets. The operands | |
1966 | ;; are: | |
1967 | ;; 0: the governing predicate | |
1968 | ;; 1: the scalar component of the address | |
1969 | ;; 2: the vector component of the address | |
1970 | ;; 3: 1 for zero extension, 0 for sign extension | |
1971 | ;; 4: the scale multiplier | |
1972 | ;; 5: a vector zero that identifies the mode of data being accessed | |
1973 | ;; 6: the prefetch operator (an svprfop) | |
1974 | ;; 7: the normal RTL prefetch rw flag | |
1975 | ;; 8: the normal RTL prefetch locality value | |
f75cdd2c | 1976 | (define_insn "@aarch64_sve_gather_prefetch<SVE_FULL_I:mode><VNx4SI_ONLY:mode>" |
624d0f07 RS |
1977 | [(prefetch (unspec:DI |
1978 | [(match_operand:VNx4BI 0 "register_operand" "Upl, Upl, Upl, Upl, Upl, Upl") | |
f75cdd2c | 1979 | (match_operand:DI 1 "aarch64_sve_gather_offset_<SVE_FULL_I:Vesize>" "Z, vg<SVE_FULL_I:Vesize>, rk, rk, rk, rk") |
624d0f07 RS |
1980 | (match_operand:VNx4SI_ONLY 2 "register_operand" "w, w, w, w, w, w") |
1981 | (match_operand:DI 3 "const_int_operand" "i, i, Z, Ui1, Z, Ui1") | |
f75cdd2c RS |
1982 | (match_operand:DI 4 "aarch64_gather_scale_operand_<SVE_FULL_I:Vesize>" "Ui1, Ui1, Ui1, Ui1, i, i") |
1983 | (match_operand:SVE_FULL_I 5 "aarch64_simd_imm_zero") | |
624d0f07 RS |
1984 | (match_operand:DI 6 "const_int_operand")] |
1985 | UNSPEC_SVE_PREFETCH_GATHER) | |
1986 | (match_operand:DI 7 "const_int_operand") | |
1987 | (match_operand:DI 8 "const_int_operand"))] | |
1988 | "TARGET_SVE" | |
1989 | { | |
1990 | static const char *const insns[][2] = { | |
f75cdd2c RS |
1991 | "prf<SVE_FULL_I:Vesize>", "%0, [%2.s]", |
1992 | "prf<SVE_FULL_I:Vesize>", "%0, [%2.s, #%1]", | |
624d0f07 RS |
1993 | "prfb", "%0, [%1, %2.s, sxtw]", |
1994 | "prfb", "%0, [%1, %2.s, uxtw]", | |
f75cdd2c RS |
1995 | "prf<SVE_FULL_I:Vesize>", "%0, [%1, %2.s, sxtw %p4]", |
1996 | "prf<SVE_FULL_I:Vesize>", "%0, [%1, %2.s, uxtw %p4]" | |
624d0f07 RS |
1997 | }; |
1998 | const char *const *parts = insns[which_alternative]; | |
1999 | return aarch64_output_sve_prefetch (parts[0], operands[6], parts[1]); | |
2000 | } | |
2001 | ) | |
2002 | ||
2003 | ;; Predicated gather prefetches for 64-bit elements. The value of operand 3 | |
2004 | ;; doesn't matter in this case. | |
f75cdd2c | 2005 | (define_insn "@aarch64_sve_gather_prefetch<SVE_FULL_I:mode><VNx2DI_ONLY:mode>" |
624d0f07 RS |
2006 | [(prefetch (unspec:DI |
2007 | [(match_operand:VNx2BI 0 "register_operand" "Upl, Upl, Upl, Upl") | |
f75cdd2c | 2008 | (match_operand:DI 1 "aarch64_sve_gather_offset_<SVE_FULL_I:Vesize>" "Z, vg<SVE_FULL_I:Vesize>, rk, rk") |
624d0f07 RS |
2009 | (match_operand:VNx2DI_ONLY 2 "register_operand" "w, w, w, w") |
2010 | (match_operand:DI 3 "const_int_operand") | |
f75cdd2c RS |
2011 | (match_operand:DI 4 "aarch64_gather_scale_operand_<SVE_FULL_I:Vesize>" "Ui1, Ui1, Ui1, i") |
2012 | (match_operand:SVE_FULL_I 5 "aarch64_simd_imm_zero") | |
624d0f07 RS |
2013 | (match_operand:DI 6 "const_int_operand")] |
2014 | UNSPEC_SVE_PREFETCH_GATHER) | |
2015 | (match_operand:DI 7 "const_int_operand") | |
2016 | (match_operand:DI 8 "const_int_operand"))] | |
2017 | "TARGET_SVE" | |
2018 | { | |
2019 | static const char *const insns[][2] = { | |
f75cdd2c RS |
2020 | "prf<SVE_FULL_I:Vesize>", "%0, [%2.d]", |
2021 | "prf<SVE_FULL_I:Vesize>", "%0, [%2.d, #%1]", | |
624d0f07 | 2022 | "prfb", "%0, [%1, %2.d]", |
f75cdd2c | 2023 | "prf<SVE_FULL_I:Vesize>", "%0, [%1, %2.d, lsl %p4]" |
624d0f07 RS |
2024 | }; |
2025 | const char *const *parts = insns[which_alternative]; | |
2026 | return aarch64_output_sve_prefetch (parts[0], operands[6], parts[1]); | |
2027 | } | |
2028 | ) | |
2029 | ||
2030 | ;; Likewise, but with the offset being sign-extended from 32 bits. | |
f75cdd2c | 2031 | (define_insn_and_rewrite "*aarch64_sve_gather_prefetch<SVE_FULL_I:mode><VNx2DI_ONLY:mode>_sxtw" |
624d0f07 RS |
2032 | [(prefetch (unspec:DI |
2033 | [(match_operand:VNx2BI 0 "register_operand" "Upl, Upl") | |
2034 | (match_operand:DI 1 "register_operand" "rk, rk") | |
2035 | (unspec:VNx2DI_ONLY | |
2036 | [(match_operand 9) | |
2037 | (sign_extend:VNx2DI | |
2038 | (truncate:VNx2SI | |
2039 | (match_operand:VNx2DI 2 "register_operand" "w, w")))] | |
2040 | UNSPEC_PRED_X) | |
2041 | (match_operand:DI 3 "const_int_operand") | |
f75cdd2c RS |
2042 | (match_operand:DI 4 "aarch64_gather_scale_operand_<SVE_FULL_I:Vesize>" "Ui1, i") |
2043 | (match_operand:SVE_FULL_I 5 "aarch64_simd_imm_zero") | |
624d0f07 RS |
2044 | (match_operand:DI 6 "const_int_operand")] |
2045 | UNSPEC_SVE_PREFETCH_GATHER) | |
2046 | (match_operand:DI 7 "const_int_operand") | |
2047 | (match_operand:DI 8 "const_int_operand"))] | |
2048 | "TARGET_SVE" | |
2049 | { | |
2050 | static const char *const insns[][2] = { | |
2051 | "prfb", "%0, [%1, %2.d, sxtw]", | |
f75cdd2c | 2052 | "prf<SVE_FULL_I:Vesize>", "%0, [%1, %2.d, sxtw %p4]" |
624d0f07 RS |
2053 | }; |
2054 | const char *const *parts = insns[which_alternative]; | |
2055 | return aarch64_output_sve_prefetch (parts[0], operands[6], parts[1]); | |
2056 | } | |
2057 | "&& !rtx_equal_p (operands[0], operands[9])" | |
2058 | { | |
2059 | operands[9] = copy_rtx (operands[0]); | |
2060 | } | |
2061 | ) | |
2062 | ||
2063 | ;; Likewise, but with the offset being zero-extended from 32 bits. | |
f75cdd2c | 2064 | (define_insn "*aarch64_sve_gather_prefetch<SVE_FULL_I:mode><VNx2DI_ONLY:mode>_uxtw" |
624d0f07 RS |
2065 | [(prefetch (unspec:DI |
2066 | [(match_operand:VNx2BI 0 "register_operand" "Upl, Upl") | |
2067 | (match_operand:DI 1 "register_operand" "rk, rk") | |
2068 | (and:VNx2DI_ONLY | |
2069 | (match_operand:VNx2DI 2 "register_operand" "w, w") | |
2070 | (match_operand:VNx2DI 9 "aarch64_sve_uxtw_immediate")) | |
2071 | (match_operand:DI 3 "const_int_operand") | |
f75cdd2c RS |
2072 | (match_operand:DI 4 "aarch64_gather_scale_operand_<SVE_FULL_I:Vesize>" "Ui1, i") |
2073 | (match_operand:SVE_FULL_I 5 "aarch64_simd_imm_zero") | |
624d0f07 RS |
2074 | (match_operand:DI 6 "const_int_operand")] |
2075 | UNSPEC_SVE_PREFETCH_GATHER) | |
2076 | (match_operand:DI 7 "const_int_operand") | |
2077 | (match_operand:DI 8 "const_int_operand"))] | |
2078 | "TARGET_SVE" | |
2079 | { | |
2080 | static const char *const insns[][2] = { | |
2081 | "prfb", "%0, [%1, %2.d, uxtw]", | |
f75cdd2c | 2082 | "prf<SVE_FULL_I:Vesize>", "%0, [%1, %2.d, uxtw %p4]" |
624d0f07 RS |
2083 | }; |
2084 | const char *const *parts = insns[which_alternative]; | |
2085 | return aarch64_output_sve_prefetch (parts[0], operands[6], parts[1]); | |
2086 | } | |
2087 | ) | |
2088 | ||
2089 | ;; ========================================================================= | |
2090 | ;; == Stores | |
2091 | ;; ========================================================================= | |
2092 | ||
2093 | ;; ------------------------------------------------------------------------- | |
2094 | ;; ---- Normal contiguous stores | |
2095 | ;; ------------------------------------------------------------------------- | |
2096 | ;; Includes contiguous forms of: | |
2097 | ;; - ST1B | |
2098 | ;; - ST1D | |
2099 | ;; - ST1H | |
2100 | ;; - ST1W | |
2101 | ;; - ST2B | |
2102 | ;; - ST2D | |
2103 | ;; - ST2H | |
2104 | ;; - ST2W | |
2105 | ;; - ST3B | |
2106 | ;; - ST3D | |
2107 | ;; - ST3H | |
2108 | ;; - ST3W | |
2109 | ;; - ST4B | |
2110 | ;; - ST4D | |
2111 | ;; - ST4H | |
2112 | ;; - ST4W | |
2113 | ;; ------------------------------------------------------------------------- | |
2114 | ||
2115 | ;; Predicated ST1. | |
2116 | (define_insn "maskstore<mode><vpred>" | |
cc68f7c2 RS |
2117 | [(set (match_operand:SVE_ALL 0 "memory_operand" "+m") |
2118 | (unspec:SVE_ALL | |
f75cdd2c | 2119 | [(match_operand:<VPRED> 2 "register_operand" "Upl") |
cc68f7c2 | 2120 | (match_operand:SVE_ALL 1 "register_operand" "w") |
f75cdd2c RS |
2121 | (match_dup 0)] |
2122 | UNSPEC_ST1_SVE))] | |
624d0f07 | 2123 | "TARGET_SVE" |
cc68f7c2 | 2124 | "st1<Vesize>\t%1.<Vctype>, %2, %0" |
624d0f07 RS |
2125 | ) |
2126 | ||
2127 | ;; Unpredicated ST[234]. This is always a full update, so the dependence | |
2128 | ;; on the old value of the memory location (via (match_dup 0)) is redundant. | |
2129 | ;; There doesn't seem to be any obvious benefit to treating the all-true | |
2130 | ;; case differently though. In particular, it's very unlikely that we'll | |
2131 | ;; only find out during RTL that a store_lanes is dead. | |
2132 | (define_expand "vec_store_lanes<mode><vsingle>" | |
2133 | [(set (match_operand:SVE_STRUCT 0 "memory_operand") | |
2134 | (unspec:SVE_STRUCT | |
2135 | [(match_dup 2) | |
2136 | (match_operand:SVE_STRUCT 1 "register_operand") | |
2137 | (match_dup 0)] | |
2138 | UNSPEC_STN))] | |
2139 | "TARGET_SVE" | |
2140 | { | |
2141 | operands[2] = aarch64_ptrue_reg (<VPRED>mode); | |
2142 | } | |
2143 | ) | |
2144 | ||
2145 | ;; Predicated ST[234]. | |
2146 | (define_insn "vec_mask_store_lanes<mode><vsingle>" | |
2147 | [(set (match_operand:SVE_STRUCT 0 "memory_operand" "+m") | |
2148 | (unspec:SVE_STRUCT | |
2149 | [(match_operand:<VPRED> 2 "register_operand" "Upl") | |
2150 | (match_operand:SVE_STRUCT 1 "register_operand" "w") | |
2151 | (match_dup 0)] | |
2152 | UNSPEC_STN))] | |
2153 | "TARGET_SVE" | |
2154 | "st<vector_count><Vesize>\t%1, %2, %0" | |
2155 | ) | |
2156 | ||
2157 | ;; ------------------------------------------------------------------------- | |
2158 | ;; ---- Truncating contiguous stores | |
2159 | ;; ------------------------------------------------------------------------- | |
2160 | ;; Includes: | |
2161 | ;; - ST1B | |
2162 | ;; - ST1H | |
2163 | ;; - ST1W | |
2164 | ;; ------------------------------------------------------------------------- | |
2165 | ||
2166 | ;; Predicated truncate and store, with 8 elements per 128-bit block. | |
2167 | (define_insn "@aarch64_store_trunc<VNx8_NARROW:mode><VNx8_WIDE:mode>" | |
2168 | [(set (match_operand:VNx8_NARROW 0 "memory_operand" "+m") | |
2169 | (unspec:VNx8_NARROW | |
2170 | [(match_operand:VNx8BI 2 "register_operand" "Upl") | |
2171 | (truncate:VNx8_NARROW | |
2172 | (match_operand:VNx8_WIDE 1 "register_operand" "w")) | |
2173 | (match_dup 0)] | |
2174 | UNSPEC_ST1_SVE))] | |
2175 | "TARGET_SVE" | |
2176 | "st1<VNx8_NARROW:Vesize>\t%1.<VNx8_WIDE:Vetype>, %2, %0" | |
2177 | ) | |
2178 | ||
2179 | ;; Predicated truncate and store, with 4 elements per 128-bit block. | |
2180 | (define_insn "@aarch64_store_trunc<VNx4_NARROW:mode><VNx4_WIDE:mode>" | |
2181 | [(set (match_operand:VNx4_NARROW 0 "memory_operand" "+m") | |
2182 | (unspec:VNx4_NARROW | |
2183 | [(match_operand:VNx4BI 2 "register_operand" "Upl") | |
2184 | (truncate:VNx4_NARROW | |
2185 | (match_operand:VNx4_WIDE 1 "register_operand" "w")) | |
2186 | (match_dup 0)] | |
2187 | UNSPEC_ST1_SVE))] | |
2188 | "TARGET_SVE" | |
2189 | "st1<VNx4_NARROW:Vesize>\t%1.<VNx4_WIDE:Vetype>, %2, %0" | |
2190 | ) | |
2191 | ||
2192 | ;; Predicated truncate and store, with 2 elements per 128-bit block. | |
2193 | (define_insn "@aarch64_store_trunc<VNx2_NARROW:mode><VNx2_WIDE:mode>" | |
2194 | [(set (match_operand:VNx2_NARROW 0 "memory_operand" "+m") | |
2195 | (unspec:VNx2_NARROW | |
2196 | [(match_operand:VNx2BI 2 "register_operand" "Upl") | |
2197 | (truncate:VNx2_NARROW | |
2198 | (match_operand:VNx2_WIDE 1 "register_operand" "w")) | |
2199 | (match_dup 0)] | |
2200 | UNSPEC_ST1_SVE))] | |
2201 | "TARGET_SVE" | |
2202 | "st1<VNx2_NARROW:Vesize>\t%1.<VNx2_WIDE:Vetype>, %2, %0" | |
2203 | ) | |
2204 | ||
2205 | ;; ------------------------------------------------------------------------- | |
2206 | ;; ---- Non-temporal contiguous stores | |
2207 | ;; ------------------------------------------------------------------------- | |
2208 | ;; Includes: | |
2209 | ;; - STNT1B | |
2210 | ;; - STNT1D | |
2211 | ;; - STNT1H | |
2212 | ;; - STNT1W | |
2213 | ;; ------------------------------------------------------------------------- | |
2214 | ||
2215 | (define_insn "@aarch64_stnt1<mode>" | |
f75cdd2c RS |
2216 | [(set (match_operand:SVE_FULL 0 "memory_operand" "+m") |
2217 | (unspec:SVE_FULL | |
2218 | [(match_operand:<VPRED> 2 "register_operand" "Upl") | |
2219 | (match_operand:SVE_FULL 1 "register_operand" "w") | |
2220 | (match_dup 0)] | |
2221 | UNSPEC_STNT1_SVE))] | |
624d0f07 RS |
2222 | "TARGET_SVE" |
2223 | "stnt1<Vesize>\t%1.<Vetype>, %2, %0" | |
2224 | ) | |
2225 | ||
2226 | ;; ------------------------------------------------------------------------- | |
2227 | ;; ---- Normal scatter stores | |
2228 | ;; ------------------------------------------------------------------------- | |
2229 | ;; Includes scatter forms of: | |
2230 | ;; - ST1D | |
2231 | ;; - ST1W | |
2232 | ;; ------------------------------------------------------------------------- | |
2233 | ||
2234 | ;; Unpredicated scatter stores. | |
37a3662f | 2235 | (define_expand "scatter_store<mode><v_int_container>" |
915d28fe RS |
2236 | [(set (mem:BLK (scratch)) |
2237 | (unspec:BLK | |
2238 | [(match_dup 5) | |
624d0f07 | 2239 | (match_operand:DI 0 "aarch64_sve_gather_offset_<Vesize>") |
37a3662f | 2240 | (match_operand:<V_INT_CONTAINER> 1 "register_operand") |
915d28fe RS |
2241 | (match_operand:DI 2 "const_int_operand") |
2242 | (match_operand:DI 3 "aarch64_gather_scale_operand_<Vesize>") | |
37a3662f | 2243 | (match_operand:SVE_24 4 "register_operand")] |
915d28fe RS |
2244 | UNSPEC_ST1_SCATTER))] |
2245 | "TARGET_SVE" | |
43cacb12 | 2246 | { |
915d28fe | 2247 | operands[5] = aarch64_ptrue_reg (<VPRED>mode); |
43cacb12 RS |
2248 | } |
2249 | ) | |
2250 | ||
915d28fe RS |
2251 | ;; Predicated scatter stores for 32-bit elements. Operand 2 is true for |
2252 | ;; unsigned extension and false for signed extension. | |
37a3662f | 2253 | (define_insn "mask_scatter_store<mode><v_int_container>" |
915d28fe RS |
2254 | [(set (mem:BLK (scratch)) |
2255 | (unspec:BLK | |
624d0f07 | 2256 | [(match_operand:VNx4BI 5 "register_operand" "Upl, Upl, Upl, Upl, Upl, Upl") |
37a3662f | 2257 | (match_operand:DI 0 "aarch64_sve_gather_offset_<Vesize>" "Z, vgw, rk, rk, rk, rk") |
624d0f07 RS |
2258 | (match_operand:VNx4SI 1 "register_operand" "w, w, w, w, w, w") |
2259 | (match_operand:DI 2 "const_int_operand" "Ui1, Ui1, Z, Ui1, Z, Ui1") | |
37a3662f RS |
2260 | (match_operand:DI 3 "aarch64_gather_scale_operand_<Vesize>" "Ui1, Ui1, Ui1, Ui1, i, i") |
2261 | (match_operand:SVE_4 4 "register_operand" "w, w, w, w, w, w")] | |
915d28fe | 2262 | UNSPEC_ST1_SCATTER))] |
43cacb12 RS |
2263 | "TARGET_SVE" |
2264 | "@ | |
37a3662f RS |
2265 | st1<Vesize>\t%4.s, %5, [%1.s] |
2266 | st1<Vesize>\t%4.s, %5, [%1.s, #%0] | |
2267 | st1<Vesize>\t%4.s, %5, [%0, %1.s, sxtw] | |
2268 | st1<Vesize>\t%4.s, %5, [%0, %1.s, uxtw] | |
2269 | st1<Vesize>\t%4.s, %5, [%0, %1.s, sxtw %p3] | |
2270 | st1<Vesize>\t%4.s, %5, [%0, %1.s, uxtw %p3]" | |
915d28fe RS |
2271 | ) |
2272 | ||
2273 | ;; Predicated scatter stores for 64-bit elements. The value of operand 2 | |
2274 | ;; doesn't matter in this case. | |
37a3662f | 2275 | (define_insn "mask_scatter_store<mode><v_int_container>" |
915d28fe RS |
2276 | [(set (mem:BLK (scratch)) |
2277 | (unspec:BLK | |
624d0f07 | 2278 | [(match_operand:VNx2BI 5 "register_operand" "Upl, Upl, Upl, Upl") |
37a3662f | 2279 | (match_operand:DI 0 "aarch64_sve_gather_offset_<Vesize>" "Z, vgd, rk, rk") |
624d0f07 | 2280 | (match_operand:VNx2DI 1 "register_operand" "w, w, w, w") |
915d28fe | 2281 | (match_operand:DI 2 "const_int_operand") |
37a3662f RS |
2282 | (match_operand:DI 3 "aarch64_gather_scale_operand_<Vesize>" "Ui1, Ui1, Ui1, i") |
2283 | (match_operand:SVE_2 4 "register_operand" "w, w, w, w")] | |
915d28fe RS |
2284 | UNSPEC_ST1_SCATTER))] |
2285 | "TARGET_SVE" | |
2286 | "@ | |
37a3662f RS |
2287 | st1<Vesize>\t%4.d, %5, [%1.d] |
2288 | st1<Vesize>\t%4.d, %5, [%1.d, #%0] | |
2289 | st1<Vesize>\t%4.d, %5, [%0, %1.d] | |
2290 | st1<Vesize>\t%4.d, %5, [%0, %1.d, lsl %p3]" | |
43cacb12 RS |
2291 | ) |
2292 | ||
37a3662f RS |
2293 | ;; Likewise, but with the offset being extended from 32 bits. |
2294 | (define_insn_and_rewrite "*mask_scatter_store<mode><v_int_container>_<su>xtw_unpacked" | |
2295 | [(set (mem:BLK (scratch)) | |
2296 | (unspec:BLK | |
2297 | [(match_operand:VNx2BI 5 "register_operand" "Upl, Upl") | |
2298 | (match_operand:DI 0 "register_operand" "rk, rk") | |
2299 | (unspec:VNx2DI | |
2300 | [(match_operand 6) | |
2301 | (ANY_EXTEND:VNx2DI | |
2302 | (match_operand:VNx2SI 1 "register_operand" "w, w"))] | |
2303 | UNSPEC_PRED_X) | |
2304 | (match_operand:DI 2 "const_int_operand") | |
2305 | (match_operand:DI 3 "aarch64_gather_scale_operand_<Vesize>" "Ui1, i") | |
2306 | (match_operand:SVE_2 4 "register_operand" "w, w")] | |
2307 | UNSPEC_ST1_SCATTER))] | |
2308 | "TARGET_SVE" | |
2309 | "@ | |
2310 | st1<Vesize>\t%4.d, %5, [%0, %1.d, <su>xtw] | |
2311 | st1<Vesize>\t%4.d, %5, [%0, %1.d, <su>xtw %p3]" | |
2312 | "&& !CONSTANT_P (operands[6])" | |
2313 | { | |
2314 | operands[6] = CONSTM1_RTX (<VPRED>mode); | |
2315 | } | |
2316 | ) | |
2317 | ||
2318 | ;; Likewise, but with the offset being truncated to 32 bits and then | |
2319 | ;; sign-extended. | |
2320 | (define_insn_and_rewrite "*mask_scatter_store<mode><v_int_container>_sxtw" | |
624d0f07 RS |
2321 | [(set (mem:BLK (scratch)) |
2322 | (unspec:BLK | |
2323 | [(match_operand:VNx2BI 5 "register_operand" "Upl, Upl") | |
2324 | (match_operand:DI 0 "register_operand" "rk, rk") | |
2325 | (unspec:VNx2DI | |
2326 | [(match_operand 6) | |
2327 | (sign_extend:VNx2DI | |
2328 | (truncate:VNx2SI | |
2329 | (match_operand:VNx2DI 1 "register_operand" "w, w")))] | |
2330 | UNSPEC_PRED_X) | |
2331 | (match_operand:DI 2 "const_int_operand") | |
37a3662f RS |
2332 | (match_operand:DI 3 "aarch64_gather_scale_operand_<Vesize>" "Ui1, i") |
2333 | (match_operand:SVE_2 4 "register_operand" "w, w")] | |
624d0f07 RS |
2334 | UNSPEC_ST1_SCATTER))] |
2335 | "TARGET_SVE" | |
2336 | "@ | |
37a3662f RS |
2337 | st1<Vesize>\t%4.d, %5, [%0, %1.d, sxtw] |
2338 | st1<Vesize>\t%4.d, %5, [%0, %1.d, sxtw %p3]" | |
2339 | "&& !CONSTANT_P (operands[6])" | |
624d0f07 | 2340 | { |
37a3662f | 2341 | operands[6] = CONSTM1_RTX (<VPRED>mode); |
624d0f07 RS |
2342 | } |
2343 | ) | |
2344 | ||
37a3662f RS |
2345 | ;; Likewise, but with the offset being truncated to 32 bits and then |
2346 | ;; zero-extended. | |
2347 | (define_insn "*mask_scatter_store<mode><v_int_container>_uxtw" | |
624d0f07 RS |
2348 | [(set (mem:BLK (scratch)) |
2349 | (unspec:BLK | |
2350 | [(match_operand:VNx2BI 5 "register_operand" "Upl, Upl") | |
2351 | (match_operand:DI 0 "aarch64_reg_or_zero" "rk, rk") | |
2352 | (and:VNx2DI | |
2353 | (match_operand:VNx2DI 1 "register_operand" "w, w") | |
2354 | (match_operand:VNx2DI 6 "aarch64_sve_uxtw_immediate")) | |
2355 | (match_operand:DI 2 "const_int_operand") | |
37a3662f RS |
2356 | (match_operand:DI 3 "aarch64_gather_scale_operand_<Vesize>" "Ui1, i") |
2357 | (match_operand:SVE_2 4 "register_operand" "w, w")] | |
624d0f07 RS |
2358 | UNSPEC_ST1_SCATTER))] |
2359 | "TARGET_SVE" | |
2360 | "@ | |
37a3662f RS |
2361 | st1<Vesize>\t%4.d, %5, [%0, %1.d, uxtw] |
2362 | st1<Vesize>\t%4.d, %5, [%0, %1.d, uxtw %p3]" | |
624d0f07 RS |
2363 | ) |
2364 | ||
2365 | ;; ------------------------------------------------------------------------- | |
2366 | ;; ---- Truncating scatter stores | |
2367 | ;; ------------------------------------------------------------------------- | |
2368 | ;; Includes scatter forms of: | |
2369 | ;; - ST1B | |
2370 | ;; - ST1H | |
2371 | ;; - ST1W | |
2372 | ;; ------------------------------------------------------------------------- | |
2373 | ||
2374 | ;; Predicated truncating scatter stores for 32-bit elements. Operand 2 is | |
2375 | ;; true for unsigned extension and false for signed extension. | |
2376 | (define_insn "@aarch64_scatter_store_trunc<VNx4_NARROW:mode><VNx4_WIDE:mode>" | |
2377 | [(set (mem:BLK (scratch)) | |
2378 | (unspec:BLK | |
2379 | [(match_operand:VNx4BI 5 "register_operand" "Upl, Upl, Upl, Upl, Upl, Upl") | |
2380 | (match_operand:DI 0 "aarch64_sve_gather_offset_<VNx4_NARROW:Vesize>" "Z, vg<VNx4_NARROW:Vesize>, rk, rk, rk, rk") | |
2381 | (match_operand:VNx4SI 1 "register_operand" "w, w, w, w, w, w") | |
2382 | (match_operand:DI 2 "const_int_operand" "Ui1, Ui1, Z, Ui1, Z, Ui1") | |
2383 | (match_operand:DI 3 "aarch64_gather_scale_operand_<VNx4_NARROW:Vesize>" "Ui1, Ui1, Ui1, Ui1, i, i") | |
2384 | (truncate:VNx4_NARROW | |
2385 | (match_operand:VNx4_WIDE 4 "register_operand" "w, w, w, w, w, w"))] | |
2386 | UNSPEC_ST1_SCATTER))] | |
2387 | "TARGET_SVE" | |
2388 | "@ | |
2389 | st1<VNx4_NARROW:Vesize>\t%4.s, %5, [%1.s] | |
2390 | st1<VNx4_NARROW:Vesize>\t%4.s, %5, [%1.s, #%0] | |
2391 | st1<VNx4_NARROW:Vesize>\t%4.s, %5, [%0, %1.s, sxtw] | |
2392 | st1<VNx4_NARROW:Vesize>\t%4.s, %5, [%0, %1.s, uxtw] | |
2393 | st1<VNx4_NARROW:Vesize>\t%4.s, %5, [%0, %1.s, sxtw %p3] | |
2394 | st1<VNx4_NARROW:Vesize>\t%4.s, %5, [%0, %1.s, uxtw %p3]" | |
2395 | ) | |
2396 | ||
2397 | ;; Predicated truncating scatter stores for 64-bit elements. The value of | |
2398 | ;; operand 2 doesn't matter in this case. | |
2399 | (define_insn "@aarch64_scatter_store_trunc<VNx2_NARROW:mode><VNx2_WIDE:mode>" | |
2400 | [(set (mem:BLK (scratch)) | |
2401 | (unspec:BLK | |
2402 | [(match_operand:VNx2BI 5 "register_operand" "Upl, Upl, Upl, Upl") | |
2403 | (match_operand:DI 0 "aarch64_sve_gather_offset_<VNx2_NARROW:Vesize>" "Z, vg<VNx2_NARROW:Vesize>, rk, rk") | |
2404 | (match_operand:VNx2DI 1 "register_operand" "w, w, w, w") | |
2405 | (match_operand:DI 2 "const_int_operand") | |
2406 | (match_operand:DI 3 "aarch64_gather_scale_operand_<VNx2_NARROW:Vesize>" "Ui1, Ui1, Ui1, i") | |
2407 | (truncate:VNx2_NARROW | |
2408 | (match_operand:VNx2_WIDE 4 "register_operand" "w, w, w, w"))] | |
2409 | UNSPEC_ST1_SCATTER))] | |
2410 | "TARGET_SVE" | |
2411 | "@ | |
2412 | st1<VNx2_NARROW:Vesize>\t%4.d, %5, [%1.d] | |
2413 | st1<VNx2_NARROW:Vesize>\t%4.d, %5, [%1.d, #%0] | |
2414 | st1<VNx2_NARROW:Vesize>\t%4.d, %5, [%0, %1.d] | |
2415 | st1<VNx2_NARROW:Vesize>\t%4.d, %5, [%0, %1.d, lsl %p3]" | |
2416 | ) | |
2417 | ||
2418 | ;; Likewise, but with the offset being sign-extended from 32 bits. | |
2419 | (define_insn_and_rewrite "*aarch64_scatter_store_trunc<VNx2_NARROW:mode><VNx2_WIDE:mode>_sxtw" | |
2420 | [(set (mem:BLK (scratch)) | |
2421 | (unspec:BLK | |
2422 | [(match_operand:VNx2BI 5 "register_operand" "Upl, Upl") | |
2423 | (match_operand:DI 0 "register_operand" "rk, rk") | |
2424 | (unspec:VNx2DI | |
2425 | [(match_operand 6) | |
2426 | (sign_extend:VNx2DI | |
2427 | (truncate:VNx2SI | |
2428 | (match_operand:VNx2DI 1 "register_operand" "w, w")))] | |
2429 | UNSPEC_PRED_X) | |
2430 | (match_operand:DI 2 "const_int_operand") | |
2431 | (match_operand:DI 3 "aarch64_gather_scale_operand_<VNx2_NARROW:Vesize>" "Ui1, i") | |
2432 | (truncate:VNx2_NARROW | |
2433 | (match_operand:VNx2_WIDE 4 "register_operand" "w, w"))] | |
2434 | UNSPEC_ST1_SCATTER))] | |
2435 | "TARGET_SVE" | |
2436 | "@ | |
2437 | st1<VNx2_NARROW:Vesize>\t%4.d, %5, [%0, %1.d, sxtw] | |
2438 | st1<VNx2_NARROW:Vesize>\t%4.d, %5, [%0, %1.d, sxtw %p3]" | |
2439 | "&& !rtx_equal_p (operands[5], operands[6])" | |
2440 | { | |
2441 | operands[6] = copy_rtx (operands[5]); | |
2442 | } | |
2443 | ) | |
2444 | ||
2445 | ;; Likewise, but with the offset being zero-extended from 32 bits. | |
2446 | (define_insn "*aarch64_scatter_store_trunc<VNx2_NARROW:mode><VNx2_WIDE:mode>_uxtw" | |
2447 | [(set (mem:BLK (scratch)) | |
2448 | (unspec:BLK | |
2449 | [(match_operand:VNx2BI 5 "register_operand" "Upl, Upl") | |
2450 | (match_operand:DI 0 "aarch64_reg_or_zero" "rk, rk") | |
2451 | (and:VNx2DI | |
2452 | (match_operand:VNx2DI 1 "register_operand" "w, w") | |
2453 | (match_operand:VNx2DI 6 "aarch64_sve_uxtw_immediate")) | |
2454 | (match_operand:DI 2 "const_int_operand") | |
2455 | (match_operand:DI 3 "aarch64_gather_scale_operand_<VNx2_NARROW:Vesize>" "Ui1, i") | |
2456 | (truncate:VNx2_NARROW | |
2457 | (match_operand:VNx2_WIDE 4 "register_operand" "w, w"))] | |
2458 | UNSPEC_ST1_SCATTER))] | |
2459 | "TARGET_SVE" | |
2460 | "@ | |
2461 | st1<VNx2_NARROW:Vesize>\t%4.d, %5, [%0, %1.d, uxtw] | |
2462 | st1<VNx2_NARROW:Vesize>\t%4.d, %5, [%0, %1.d, uxtw %p3]" | |
2463 | ) | |
2464 | ||
915d28fe RS |
2465 | ;; ========================================================================= |
2466 | ;; == Vector creation | |
2467 | ;; ========================================================================= | |
2468 | ||
2469 | ;; ------------------------------------------------------------------------- | |
2470 | ;; ---- [INT,FP] Duplicate element | |
2471 | ;; ------------------------------------------------------------------------- | |
2472 | ;; Includes: | |
624d0f07 | 2473 | ;; - DUP |
915d28fe RS |
2474 | ;; - MOV |
2475 | ;; - LD1RB | |
2476 | ;; - LD1RD | |
2477 | ;; - LD1RH | |
2478 | ;; - LD1RW | |
36696774 RS |
2479 | ;; - LD1ROB (F64MM) |
2480 | ;; - LD1ROD (F64MM) | |
2481 | ;; - LD1ROH (F64MM) | |
2482 | ;; - LD1ROW (F64MM) | |
915d28fe RS |
2483 | ;; - LD1RQB |
2484 | ;; - LD1RQD | |
2485 | ;; - LD1RQH | |
2486 | ;; - LD1RQW | |
2487 | ;; ------------------------------------------------------------------------- | |
2488 | ||
43cacb12 RS |
2489 | (define_expand "vec_duplicate<mode>" |
2490 | [(parallel | |
cc68f7c2 RS |
2491 | [(set (match_operand:SVE_ALL 0 "register_operand") |
2492 | (vec_duplicate:SVE_ALL | |
43cacb12 | 2493 | (match_operand:<VEL> 1 "aarch64_sve_dup_operand"))) |
678faefc | 2494 | (clobber (scratch:VNx16BI))])] |
43cacb12 RS |
2495 | "TARGET_SVE" |
2496 | { | |
2497 | if (MEM_P (operands[1])) | |
2498 | { | |
16de3637 | 2499 | rtx ptrue = aarch64_ptrue_reg (<VPRED>mode); |
43cacb12 RS |
2500 | emit_insn (gen_sve_ld1r<mode> (operands[0], ptrue, operands[1], |
2501 | CONST0_RTX (<MODE>mode))); | |
2502 | DONE; | |
2503 | } | |
2504 | } | |
2505 | ) | |
2506 | ||
2507 | ;; Accept memory operands for the benefit of combine, and also in case | |
2508 | ;; the scalar input gets spilled to memory during RA. We want to split | |
2509 | ;; the load at the first opportunity in order to allow the PTRUE to be | |
2510 | ;; optimized with surrounding code. | |
2511 | (define_insn_and_split "*vec_duplicate<mode>_reg" | |
cc68f7c2 RS |
2512 | [(set (match_operand:SVE_ALL 0 "register_operand" "=w, w, w") |
2513 | (vec_duplicate:SVE_ALL | |
43cacb12 | 2514 | (match_operand:<VEL> 1 "aarch64_sve_dup_operand" "r, w, Uty"))) |
678faefc | 2515 | (clobber (match_scratch:VNx16BI 2 "=X, X, Upl"))] |
43cacb12 RS |
2516 | "TARGET_SVE" |
2517 | "@ | |
2518 | mov\t%0.<Vetype>, %<vwcore>1 | |
2519 | mov\t%0.<Vetype>, %<Vetype>1 | |
2520 | #" | |
2521 | "&& MEM_P (operands[1])" | |
2522 | [(const_int 0)] | |
2523 | { | |
2524 | if (GET_CODE (operands[2]) == SCRATCH) | |
678faefc RS |
2525 | operands[2] = gen_reg_rtx (VNx16BImode); |
2526 | emit_move_insn (operands[2], CONSTM1_RTX (VNx16BImode)); | |
2527 | rtx gp = gen_lowpart (<VPRED>mode, operands[2]); | |
2528 | emit_insn (gen_sve_ld1r<mode> (operands[0], gp, operands[1], | |
43cacb12 RS |
2529 | CONST0_RTX (<MODE>mode))); |
2530 | DONE; | |
2531 | } | |
2532 | [(set_attr "length" "4,4,8")] | |
2533 | ) | |
2534 | ||
4aeb1ba7 RS |
2535 | ;; Duplicate an Advanced SIMD vector to fill an SVE vector (LE version). |
2536 | (define_insn "@aarch64_vec_duplicate_vq<mode>_le" | |
f75cdd2c RS |
2537 | [(set (match_operand:SVE_FULL 0 "register_operand" "=w") |
2538 | (vec_duplicate:SVE_FULL | |
4aeb1ba7 RS |
2539 | (match_operand:<V128> 1 "register_operand" "w")))] |
2540 | "TARGET_SVE && !BYTES_BIG_ENDIAN" | |
2541 | { | |
2542 | operands[1] = gen_rtx_REG (<MODE>mode, REGNO (operands[1])); | |
2543 | return "dup\t%0.q, %1.q[0]"; | |
2544 | } | |
2545 | ) | |
2546 | ||
2547 | ;; Duplicate an Advanced SIMD vector to fill an SVE vector (BE version). | |
2548 | ;; The SVE register layout puts memory lane N into (architectural) | |
2549 | ;; register lane N, whereas the Advanced SIMD layout puts the memory | |
2550 | ;; lsb into the register lsb. We therefore have to describe this in rtl | |
2551 | ;; terms as a reverse of the V128 vector followed by a duplicate. | |
2552 | (define_insn "@aarch64_vec_duplicate_vq<mode>_be" | |
f75cdd2c RS |
2553 | [(set (match_operand:SVE_FULL 0 "register_operand" "=w") |
2554 | (vec_duplicate:SVE_FULL | |
4aeb1ba7 RS |
2555 | (vec_select:<V128> |
2556 | (match_operand:<V128> 1 "register_operand" "w") | |
2557 | (match_operand 2 "descending_int_parallel"))))] | |
2558 | "TARGET_SVE | |
2559 | && BYTES_BIG_ENDIAN | |
2560 | && known_eq (INTVAL (XVECEXP (operands[2], 0, 0)), | |
2561 | GET_MODE_NUNITS (<V128>mode) - 1)" | |
2562 | { | |
2563 | operands[1] = gen_rtx_REG (<MODE>mode, REGNO (operands[1])); | |
2564 | return "dup\t%0.q, %1.q[0]"; | |
2565 | } | |
2566 | ) | |
2567 | ||
43cacb12 | 2568 | ;; This is used for vec_duplicate<mode>s from memory, but can also |
700d4cb0 | 2569 | ;; be used by combine to optimize selects of a vec_duplicate<mode> |
43cacb12 RS |
2570 | ;; with zero. |
2571 | (define_insn "sve_ld1r<mode>" | |
cc68f7c2 RS |
2572 | [(set (match_operand:SVE_ALL 0 "register_operand" "=w") |
2573 | (unspec:SVE_ALL | |
43cacb12 | 2574 | [(match_operand:<VPRED> 1 "register_operand" "Upl") |
cc68f7c2 | 2575 | (vec_duplicate:SVE_ALL |
43cacb12 | 2576 | (match_operand:<VEL> 2 "aarch64_sve_ld1r_operand" "Uty")) |
cc68f7c2 | 2577 | (match_operand:SVE_ALL 3 "aarch64_simd_imm_zero")] |
43cacb12 RS |
2578 | UNSPEC_SEL))] |
2579 | "TARGET_SVE" | |
2580 | "ld1r<Vesize>\t%0.<Vetype>, %1/z, %2" | |
2581 | ) | |
2582 | ||
4aeb1ba7 RS |
2583 | ;; Load 128 bits from memory under predicate control and duplicate to |
2584 | ;; fill a vector. | |
2585 | (define_insn "@aarch64_sve_ld1rq<mode>" | |
f75cdd2c RS |
2586 | [(set (match_operand:SVE_FULL 0 "register_operand" "=w") |
2587 | (unspec:SVE_FULL | |
4aeb1ba7 RS |
2588 | [(match_operand:<VPRED> 2 "register_operand" "Upl") |
2589 | (match_operand:<V128> 1 "aarch64_sve_ld1rq_operand" "UtQ")] | |
43cacb12 RS |
2590 | UNSPEC_LD1RQ))] |
2591 | "TARGET_SVE" | |
4aeb1ba7 RS |
2592 | { |
2593 | operands[1] = gen_rtx_MEM (<VEL>mode, XEXP (operands[1], 0)); | |
2594 | return "ld1rq<Vesize>\t%0.<Vetype>, %2/z, %1"; | |
2595 | } | |
43cacb12 RS |
2596 | ) |
2597 | ||
9ceec73f MM |
2598 | (define_insn "@aarch64_sve_ld1ro<mode>" |
2599 | [(set (match_operand:SVE_FULL 0 "register_operand" "=w") | |
2600 | (unspec:SVE_FULL | |
2601 | [(match_operand:<VPRED> 2 "register_operand" "Upl") | |
2602 | (match_operand:OI 1 "aarch64_sve_ld1ro_operand_<Vesize>" | |
2603 | "UO<Vesize>")] | |
2604 | UNSPEC_LD1RO))] | |
36696774 | 2605 | "TARGET_SVE_F64MM" |
9ceec73f MM |
2606 | { |
2607 | operands[1] = gen_rtx_MEM (<VEL>mode, XEXP (operands[1], 0)); | |
2608 | return "ld1ro<Vesize>\t%0.<Vetype>, %2/z, %1"; | |
2609 | } | |
2610 | ) | |
2611 | ||
915d28fe RS |
2612 | ;; ------------------------------------------------------------------------- |
2613 | ;; ---- [INT,FP] Initialize from individual elements | |
2614 | ;; ------------------------------------------------------------------------- | |
2615 | ;; Includes: | |
2616 | ;; - INSR | |
2617 | ;; ------------------------------------------------------------------------- | |
2618 | ||
2619 | (define_expand "vec_init<mode><Vel>" | |
f75cdd2c | 2620 | [(match_operand:SVE_FULL 0 "register_operand") |
915d28fe | 2621 | (match_operand 1 "")] |
43cacb12 RS |
2622 | "TARGET_SVE" |
2623 | { | |
915d28fe | 2624 | aarch64_sve_expand_vector_init (operands[0], operands[1]); |
43cacb12 RS |
2625 | DONE; |
2626 | } | |
2627 | ) | |
2628 | ||
915d28fe RS |
2629 | ;; Shift an SVE vector left and insert a scalar into element 0. |
2630 | (define_insn "vec_shl_insert_<mode>" | |
f75cdd2c RS |
2631 | [(set (match_operand:SVE_FULL 0 "register_operand" "=?w, w, ??&w, ?&w") |
2632 | (unspec:SVE_FULL | |
2633 | [(match_operand:SVE_FULL 1 "register_operand" "0, 0, w, w") | |
61ee25b9 | 2634 | (match_operand:<VEL> 2 "aarch64_reg_or_zero" "rZ, w, rZ, w")] |
915d28fe RS |
2635 | UNSPEC_INSR))] |
2636 | "TARGET_SVE" | |
2637 | "@ | |
2638 | insr\t%0.<Vetype>, %<vwcore>2 | |
61ee25b9 RS |
2639 | insr\t%0.<Vetype>, %<Vetype>2 |
2640 | movprfx\t%0, %1\;insr\t%0.<Vetype>, %<vwcore>2 | |
2641 | movprfx\t%0, %1\;insr\t%0.<Vetype>, %<Vetype>2" | |
2642 | [(set_attr "movprfx" "*,*,yes,yes")] | |
915d28fe RS |
2643 | ) |
2644 | ||
2645 | ;; ------------------------------------------------------------------------- | |
2646 | ;; ---- [INT] Linear series | |
2647 | ;; ------------------------------------------------------------------------- | |
2648 | ;; Includes: | |
2649 | ;; - INDEX | |
2650 | ;; ------------------------------------------------------------------------- | |
2651 | ||
2652 | (define_insn "vec_series<mode>" | |
cc68f7c2 RS |
2653 | [(set (match_operand:SVE_I 0 "register_operand" "=w, w, w") |
2654 | (vec_series:SVE_I | |
915d28fe RS |
2655 | (match_operand:<VEL> 1 "aarch64_sve_index_operand" "Usi, r, r") |
2656 | (match_operand:<VEL> 2 "aarch64_sve_index_operand" "r, Usi, r")))] | |
2657 | "TARGET_SVE" | |
2658 | "@ | |
30f8bf3d RS |
2659 | index\t%0.<Vctype>, #%1, %<vccore>2 |
2660 | index\t%0.<Vctype>, %<vccore>1, #%2 | |
2661 | index\t%0.<Vctype>, %<vccore>1, %<vccore>2" | |
43cacb12 RS |
2662 | ) |
2663 | ||
2664 | ;; Optimize {x, x, x, x, ...} + {0, n, 2*n, 3*n, ...} if n is in range | |
2665 | ;; of an INDEX instruction. | |
2666 | (define_insn "*vec_series<mode>_plus" | |
cc68f7c2 RS |
2667 | [(set (match_operand:SVE_I 0 "register_operand" "=w") |
2668 | (plus:SVE_I | |
2669 | (vec_duplicate:SVE_I | |
43cacb12 | 2670 | (match_operand:<VEL> 1 "register_operand" "r")) |
cc68f7c2 | 2671 | (match_operand:SVE_I 2 "immediate_operand")))] |
43cacb12 RS |
2672 | "TARGET_SVE && aarch64_check_zero_based_sve_index_immediate (operands[2])" |
2673 | { | |
2674 | operands[2] = aarch64_check_zero_based_sve_index_immediate (operands[2]); | |
30f8bf3d | 2675 | return "index\t%0.<Vctype>, %<vccore>1, #%2"; |
43cacb12 RS |
2676 | } |
2677 | ) | |
2678 | ||
915d28fe RS |
2679 | ;; ------------------------------------------------------------------------- |
2680 | ;; ---- [PRED] Duplicate element | |
2681 | ;; ------------------------------------------------------------------------- | |
2682 | ;; The patterns in this section are synthetic. | |
2683 | ;; ------------------------------------------------------------------------- | |
2684 | ||
2685 | ;; Implement a predicate broadcast by shifting the low bit of the scalar | |
2686 | ;; input into the top bit and using a WHILELO. An alternative would be to | |
2687 | ;; duplicate the input and do a compare with zero. | |
2688 | (define_expand "vec_duplicate<mode>" | |
2689 | [(set (match_operand:PRED_ALL 0 "register_operand") | |
624d0f07 | 2690 | (vec_duplicate:PRED_ALL (match_operand:QI 1 "register_operand")))] |
9f4cbab8 RS |
2691 | "TARGET_SVE" |
2692 | { | |
915d28fe RS |
2693 | rtx tmp = gen_reg_rtx (DImode); |
2694 | rtx op1 = gen_lowpart (DImode, operands[1]); | |
2695 | emit_insn (gen_ashldi3 (tmp, op1, gen_int_mode (63, DImode))); | |
2696 | emit_insn (gen_while_ultdi<mode> (operands[0], const0_rtx, tmp)); | |
2697 | DONE; | |
9f4cbab8 RS |
2698 | } |
2699 | ) | |
2700 | ||
915d28fe RS |
2701 | ;; ========================================================================= |
2702 | ;; == Vector decomposition | |
2703 | ;; ========================================================================= | |
9f4cbab8 | 2704 | |
915d28fe RS |
2705 | ;; ------------------------------------------------------------------------- |
2706 | ;; ---- [INT,FP] Extract index | |
2707 | ;; ------------------------------------------------------------------------- | |
2708 | ;; Includes: | |
2709 | ;; - DUP (Advanced SIMD) | |
2710 | ;; - DUP (SVE) | |
2711 | ;; - EXT (SVE) | |
2712 | ;; - ST1 (Advanced SIMD) | |
2713 | ;; - UMOV (Advanced SIMD) | |
2714 | ;; ------------------------------------------------------------------------- | |
2715 | ||
2716 | (define_expand "vec_extract<mode><Vel>" | |
2717 | [(set (match_operand:<VEL> 0 "register_operand") | |
2718 | (vec_select:<VEL> | |
f75cdd2c | 2719 | (match_operand:SVE_FULL 1 "register_operand") |
915d28fe | 2720 | (parallel [(match_operand:SI 2 "nonmemory_operand")])))] |
9f4cbab8 RS |
2721 | "TARGET_SVE" |
2722 | { | |
915d28fe RS |
2723 | poly_int64 val; |
2724 | if (poly_int_rtx_p (operands[2], &val) | |
2725 | && known_eq (val, GET_MODE_NUNITS (<MODE>mode) - 1)) | |
2726 | { | |
2727 | /* The last element can be extracted with a LASTB and a false | |
2728 | predicate. */ | |
2729 | rtx sel = aarch64_pfalse_reg (<VPRED>mode); | |
2730 | emit_insn (gen_extract_last_<mode> (operands[0], sel, operands[1])); | |
2731 | DONE; | |
2732 | } | |
2733 | if (!CONST_INT_P (operands[2])) | |
2734 | { | |
2735 | /* Create an index with operand[2] as the base and -1 as the step. | |
2736 | It will then be zero for the element we care about. */ | |
2737 | rtx index = gen_lowpart (<VEL_INT>mode, operands[2]); | |
2738 | index = force_reg (<VEL_INT>mode, index); | |
2739 | rtx series = gen_reg_rtx (<V_INT_EQUIV>mode); | |
2740 | emit_insn (gen_vec_series<v_int_equiv> (series, index, constm1_rtx)); | |
2741 | ||
2742 | /* Get a predicate that is true for only that element. */ | |
2743 | rtx zero = CONST0_RTX (<V_INT_EQUIV>mode); | |
2744 | rtx cmp = gen_rtx_EQ (<V_INT_EQUIV>mode, series, zero); | |
2745 | rtx sel = gen_reg_rtx (<VPRED>mode); | |
2746 | emit_insn (gen_vec_cmp<v_int_equiv><vpred> (sel, cmp, series, zero)); | |
2747 | ||
2748 | /* Select the element using LASTB. */ | |
2749 | emit_insn (gen_extract_last_<mode> (operands[0], sel, operands[1])); | |
2750 | DONE; | |
2751 | } | |
9f4cbab8 RS |
2752 | } |
2753 | ) | |
2754 | ||
915d28fe RS |
2755 | ;; Extract element zero. This is a special case because we want to force |
2756 | ;; the registers to be the same for the second alternative, and then | |
2757 | ;; split the instruction into nothing after RA. | |
2758 | (define_insn_and_split "*vec_extract<mode><Vel>_0" | |
2759 | [(set (match_operand:<VEL> 0 "aarch64_simd_nonimmediate_operand" "=r, w, Utv") | |
2760 | (vec_select:<VEL> | |
f75cdd2c | 2761 | (match_operand:SVE_FULL 1 "register_operand" "w, 0, w") |
915d28fe | 2762 | (parallel [(const_int 0)])))] |
9f4cbab8 | 2763 | "TARGET_SVE" |
915d28fe RS |
2764 | { |
2765 | operands[1] = gen_rtx_REG (<V128>mode, REGNO (operands[1])); | |
2766 | switch (which_alternative) | |
2767 | { | |
2768 | case 0: | |
2769 | return "umov\\t%<vwcore>0, %1.<Vetype>[0]"; | |
2770 | case 1: | |
2771 | return "#"; | |
2772 | case 2: | |
2773 | return "st1\\t{%1.<Vetype>}[0], %0"; | |
2774 | default: | |
2775 | gcc_unreachable (); | |
2776 | } | |
2777 | } | |
2778 | "&& reload_completed | |
2779 | && REG_P (operands[0]) | |
2780 | && REGNO (operands[0]) == REGNO (operands[1])" | |
2781 | [(const_int 0)] | |
2782 | { | |
2783 | emit_note (NOTE_INSN_DELETED); | |
2784 | DONE; | |
2785 | } | |
2786 | [(set_attr "type" "neon_to_gp_q, untyped, neon_store1_one_lane_q")] | |
9f4cbab8 RS |
2787 | ) |
2788 | ||
915d28fe RS |
2789 | ;; Extract an element from the Advanced SIMD portion of the register. |
2790 | ;; We don't just reuse the aarch64-simd.md pattern because we don't | |
2791 | ;; want any change in lane number on big-endian targets. | |
2792 | (define_insn "*vec_extract<mode><Vel>_v128" | |
2793 | [(set (match_operand:<VEL> 0 "aarch64_simd_nonimmediate_operand" "=r, w, Utv") | |
2794 | (vec_select:<VEL> | |
f75cdd2c | 2795 | (match_operand:SVE_FULL 1 "register_operand" "w, w, w") |
915d28fe RS |
2796 | (parallel [(match_operand:SI 2 "const_int_operand")])))] |
2797 | "TARGET_SVE | |
2798 | && IN_RANGE (INTVAL (operands[2]) * GET_MODE_SIZE (<VEL>mode), 1, 15)" | |
43cacb12 | 2799 | { |
915d28fe RS |
2800 | operands[1] = gen_rtx_REG (<V128>mode, REGNO (operands[1])); |
2801 | switch (which_alternative) | |
2802 | { | |
2803 | case 0: | |
2804 | return "umov\\t%<vwcore>0, %1.<Vetype>[%2]"; | |
2805 | case 1: | |
2806 | return "dup\\t%<Vetype>0, %1.<Vetype>[%2]"; | |
2807 | case 2: | |
2808 | return "st1\\t{%1.<Vetype>}[%2], %0"; | |
2809 | default: | |
2810 | gcc_unreachable (); | |
2811 | } | |
43cacb12 | 2812 | } |
915d28fe | 2813 | [(set_attr "type" "neon_to_gp_q, neon_dup_q, neon_store1_one_lane_q")] |
43cacb12 RS |
2814 | ) |
2815 | ||
915d28fe RS |
2816 | ;; Extract an element in the range of DUP. This pattern allows the |
2817 | ;; source and destination to be different. | |
2818 | (define_insn "*vec_extract<mode><Vel>_dup" | |
2819 | [(set (match_operand:<VEL> 0 "register_operand" "=w") | |
2820 | (vec_select:<VEL> | |
f75cdd2c | 2821 | (match_operand:SVE_FULL 1 "register_operand" "w") |
915d28fe RS |
2822 | (parallel [(match_operand:SI 2 "const_int_operand")])))] |
2823 | "TARGET_SVE | |
2824 | && IN_RANGE (INTVAL (operands[2]) * GET_MODE_SIZE (<VEL>mode), 16, 63)" | |
2825 | { | |
2826 | operands[0] = gen_rtx_REG (<MODE>mode, REGNO (operands[0])); | |
2827 | return "dup\t%0.<Vetype>, %1.<Vetype>[%2]"; | |
2828 | } | |
43cacb12 RS |
2829 | ) |
2830 | ||
915d28fe RS |
2831 | ;; Extract an element outside the range of DUP. This pattern requires the |
2832 | ;; source and destination to be the same. | |
2833 | (define_insn "*vec_extract<mode><Vel>_ext" | |
06b3ba23 | 2834 | [(set (match_operand:<VEL> 0 "register_operand" "=w, ?&w") |
915d28fe | 2835 | (vec_select:<VEL> |
f75cdd2c | 2836 | (match_operand:SVE_FULL 1 "register_operand" "0, w") |
915d28fe RS |
2837 | (parallel [(match_operand:SI 2 "const_int_operand")])))] |
2838 | "TARGET_SVE && INTVAL (operands[2]) * GET_MODE_SIZE (<VEL>mode) >= 64" | |
2839 | { | |
2840 | operands[0] = gen_rtx_REG (<MODE>mode, REGNO (operands[0])); | |
2841 | operands[2] = GEN_INT (INTVAL (operands[2]) * GET_MODE_SIZE (<VEL>mode)); | |
06b3ba23 RS |
2842 | return (which_alternative == 0 |
2843 | ? "ext\t%0.b, %0.b, %0.b, #%2" | |
2844 | : "movprfx\t%0, %1\;ext\t%0.b, %0.b, %1.b, #%2"); | |
915d28fe | 2845 | } |
06b3ba23 | 2846 | [(set_attr "movprfx" "*,yes")] |
43cacb12 RS |
2847 | ) |
2848 | ||
915d28fe RS |
2849 | ;; ------------------------------------------------------------------------- |
2850 | ;; ---- [INT,FP] Extract active element | |
2851 | ;; ------------------------------------------------------------------------- | |
2852 | ;; Includes: | |
624d0f07 | 2853 | ;; - LASTA |
915d28fe RS |
2854 | ;; - LASTB |
2855 | ;; ------------------------------------------------------------------------- | |
2856 | ||
2857 | ;; Extract the last active element of operand 1 into operand 0. | |
2858 | ;; If no elements are active, extract the last inactive element instead. | |
624d0f07 RS |
2859 | (define_insn "@extract_<last_op>_<mode>" |
2860 | [(set (match_operand:<VEL> 0 "register_operand" "=?r, w") | |
915d28fe RS |
2861 | (unspec:<VEL> |
2862 | [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl") | |
f75cdd2c | 2863 | (match_operand:SVE_FULL 2 "register_operand" "w, w")] |
624d0f07 | 2864 | LAST))] |
43cacb12 | 2865 | "TARGET_SVE" |
915d28fe | 2866 | "@ |
624d0f07 RS |
2867 | last<ab>\t%<vwcore>0, %1, %2.<Vetype> |
2868 | last<ab>\t%<Vetype>0, %1, %2.<Vetype>" | |
43cacb12 RS |
2869 | ) |
2870 | ||
915d28fe RS |
2871 | ;; ------------------------------------------------------------------------- |
2872 | ;; ---- [PRED] Extract index | |
2873 | ;; ------------------------------------------------------------------------- | |
2874 | ;; The patterns in this section are synthetic. | |
2875 | ;; ------------------------------------------------------------------------- | |
2876 | ||
2877 | ;; Handle extractions from a predicate by converting to an integer vector | |
2878 | ;; and extracting from there. | |
2879 | (define_expand "vec_extract<vpred><Vel>" | |
2880 | [(match_operand:<VEL> 0 "register_operand") | |
2881 | (match_operand:<VPRED> 1 "register_operand") | |
2882 | (match_operand:SI 2 "nonmemory_operand") | |
2883 | ;; Dummy operand to which we can attach the iterator. | |
f75cdd2c | 2884 | (reg:SVE_FULL_I V0_REGNUM)] |
43cacb12 | 2885 | "TARGET_SVE" |
915d28fe RS |
2886 | { |
2887 | rtx tmp = gen_reg_rtx (<MODE>mode); | |
d29f7dd5 RS |
2888 | emit_insn (gen_vcond_mask_<mode><vpred> (tmp, operands[1], |
2889 | CONST1_RTX (<MODE>mode), | |
2890 | CONST0_RTX (<MODE>mode))); | |
915d28fe RS |
2891 | emit_insn (gen_vec_extract<mode><Vel> (operands[0], tmp, operands[2])); |
2892 | DONE; | |
2893 | } | |
43cacb12 RS |
2894 | ) |
2895 | ||
915d28fe RS |
2896 | ;; ========================================================================= |
2897 | ;; == Unary arithmetic | |
2898 | ;; ========================================================================= | |
2899 | ||
2900 | ;; ------------------------------------------------------------------------- | |
2901 | ;; ---- [INT] General unary arithmetic corresponding to rtx codes | |
2902 | ;; ------------------------------------------------------------------------- | |
2903 | ;; Includes: | |
2904 | ;; - ABS | |
bca5a997 RS |
2905 | ;; - CLS (= clrsb) |
2906 | ;; - CLZ | |
915d28fe RS |
2907 | ;; - CNT (= popcount) |
2908 | ;; - NEG | |
2909 | ;; - NOT | |
2910 | ;; ------------------------------------------------------------------------- | |
2911 | ||
2912 | ;; Unpredicated integer unary arithmetic. | |
2913 | (define_expand "<optab><mode>2" | |
bb3ab62a JR |
2914 | [(set (match_operand:SVE_I 0 "register_operand") |
2915 | (unspec:SVE_I | |
915d28fe | 2916 | [(match_dup 2) |
bb3ab62a JR |
2917 | (SVE_INT_UNARY:SVE_I |
2918 | (match_operand:SVE_I 1 "register_operand"))] | |
06308276 | 2919 | UNSPEC_PRED_X))] |
43cacb12 | 2920 | "TARGET_SVE" |
915d28fe RS |
2921 | { |
2922 | operands[2] = aarch64_ptrue_reg (<VPRED>mode); | |
2923 | } | |
43cacb12 RS |
2924 | ) |
2925 | ||
915d28fe | 2926 | ;; Integer unary arithmetic predicated with a PTRUE. |
624d0f07 | 2927 | (define_insn "@aarch64_pred_<optab><mode>" |
a4d9837e | 2928 | [(set (match_operand:SVE_I 0 "register_operand" "=w, ?&w") |
bb3ab62a | 2929 | (unspec:SVE_I |
a4d9837e | 2930 | [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl") |
bb3ab62a | 2931 | (SVE_INT_UNARY:SVE_I |
a4d9837e | 2932 | (match_operand:SVE_I 2 "register_operand" "0, w"))] |
06308276 | 2933 | UNSPEC_PRED_X))] |
43cacb12 | 2934 | "TARGET_SVE" |
a4d9837e RS |
2935 | "@ |
2936 | <sve_int_op>\t%0.<Vetype>, %1/m, %2.<Vetype> | |
2937 | movprfx\t%0, %2\;<sve_int_op>\t%0.<Vetype>, %1/m, %2.<Vetype>" | |
2938 | [(set_attr "movprfx" "*,yes")] | |
43cacb12 RS |
2939 | ) |
2940 | ||
624d0f07 RS |
2941 | ;; Predicated integer unary arithmetic with merging. |
2942 | (define_expand "@cond_<optab><mode>" | |
0f9d2c1a RS |
2943 | [(set (match_operand:SVE_I 0 "register_operand") |
2944 | (unspec:SVE_I | |
624d0f07 | 2945 | [(match_operand:<VPRED> 1 "register_operand") |
0f9d2c1a RS |
2946 | (SVE_INT_UNARY:SVE_I |
2947 | (match_operand:SVE_I 2 "register_operand")) | |
2948 | (match_operand:SVE_I 3 "aarch64_simd_reg_or_zero")] | |
624d0f07 RS |
2949 | UNSPEC_SEL))] |
2950 | "TARGET_SVE" | |
2951 | ) | |
2952 | ||
2953 | ;; Predicated integer unary arithmetic, merging with the first input. | |
2954 | (define_insn "*cond_<optab><mode>_2" | |
0f9d2c1a RS |
2955 | [(set (match_operand:SVE_I 0 "register_operand" "=w, ?&w") |
2956 | (unspec:SVE_I | |
3c9f4963 | 2957 | [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl") |
0f9d2c1a RS |
2958 | (SVE_INT_UNARY:SVE_I |
2959 | (match_operand:SVE_I 2 "register_operand" "0, w")) | |
3c9f4963 RS |
2960 | (match_dup 2)] |
2961 | UNSPEC_SEL))] | |
2962 | "TARGET_SVE" | |
2963 | "@ | |
2964 | <sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype> | |
2965 | movprfx\t%0, %2\;<sve_int_op>\t%0.<Vetype>, %1/m, %2.<Vetype>" | |
2966 | [(set_attr "movprfx" "*,yes")] | |
2967 | ) | |
2968 | ||
2969 | ;; Predicated integer unary arithmetic, merging with an independent value. | |
2970 | ;; | |
2971 | ;; The earlyclobber isn't needed for the first alternative, but omitting | |
2972 | ;; it would only help the case in which operands 2 and 3 are the same, | |
2973 | ;; which is handled above rather than here. Marking all the alternatives | |
2974 | ;; as earlyclobber helps to make the instruction more regular to the | |
2975 | ;; register allocator. | |
2976 | (define_insn "*cond_<optab><mode>_any" | |
0f9d2c1a RS |
2977 | [(set (match_operand:SVE_I 0 "register_operand" "=&w, ?&w, ?&w") |
2978 | (unspec:SVE_I | |
3c9f4963 | 2979 | [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl") |
0f9d2c1a RS |
2980 | (SVE_INT_UNARY:SVE_I |
2981 | (match_operand:SVE_I 2 "register_operand" "w, w, w")) | |
2982 | (match_operand:SVE_I 3 "aarch64_simd_reg_or_zero" "0, Dz, w")] | |
3c9f4963 RS |
2983 | UNSPEC_SEL))] |
2984 | "TARGET_SVE && !rtx_equal_p (operands[2], operands[3])" | |
2985 | "@ | |
2986 | <sve_int_op>\t%0.<Vetype>, %1/m, %2.<Vetype> | |
2987 | movprfx\t%0.<Vetype>, %1/z, %2.<Vetype>\;<sve_int_op>\t%0.<Vetype>, %1/m, %2.<Vetype> | |
2988 | movprfx\t%0, %3\;<sve_int_op>\t%0.<Vetype>, %1/m, %2.<Vetype>" | |
2989 | [(set_attr "movprfx" "*,yes,yes")] | |
2990 | ) | |
2991 | ||
d7a09c44 RS |
2992 | ;; ------------------------------------------------------------------------- |
2993 | ;; ---- [INT] General unary arithmetic corresponding to unspecs | |
2994 | ;; ------------------------------------------------------------------------- | |
2995 | ;; Includes | |
624d0f07 | 2996 | ;; - RBIT |
d7a09c44 RS |
2997 | ;; - REVB |
2998 | ;; - REVH | |
2999 | ;; - REVW | |
3000 | ;; ------------------------------------------------------------------------- | |
3001 | ||
3002 | ;; Predicated integer unary operations. | |
3003 | (define_insn "@aarch64_pred_<optab><mode>" | |
a4d9837e | 3004 | [(set (match_operand:SVE_FULL_I 0 "register_operand" "=w, ?&w") |
f75cdd2c | 3005 | (unspec:SVE_FULL_I |
a4d9837e | 3006 | [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl") |
f75cdd2c | 3007 | (unspec:SVE_FULL_I |
a4d9837e | 3008 | [(match_operand:SVE_FULL_I 2 "register_operand" "0, w")] |
d7a09c44 RS |
3009 | SVE_INT_UNARY)] |
3010 | UNSPEC_PRED_X))] | |
3011 | "TARGET_SVE && <elem_bits> >= <min_elem_bits>" | |
a4d9837e RS |
3012 | "@ |
3013 | <sve_int_op>\t%0.<Vetype>, %1/m, %2.<Vetype> | |
3014 | movprfx\t%0, %2\;<sve_int_op>\t%0.<Vetype>, %1/m, %2.<Vetype>" | |
3015 | [(set_attr "movprfx" "*,yes")] | |
d7a09c44 RS |
3016 | ) |
3017 | ||
6c3ce63b RS |
3018 | ;; Another way of expressing the REVB, REVH and REVW patterns, with this |
3019 | ;; form being easier for permutes. The predicate mode determines the number | |
3020 | ;; of lanes and the data mode decides the granularity of the reversal within | |
3021 | ;; each lane. | |
3022 | (define_insn "@aarch64_sve_revbhw_<SVE_ALL:mode><PRED_HSD:mode>" | |
a4d9837e | 3023 | [(set (match_operand:SVE_ALL 0 "register_operand" "=w, ?&w") |
6c3ce63b | 3024 | (unspec:SVE_ALL |
a4d9837e | 3025 | [(match_operand:PRED_HSD 1 "register_operand" "Upl, Upl") |
6c3ce63b | 3026 | (unspec:SVE_ALL |
a4d9837e | 3027 | [(match_operand:SVE_ALL 2 "register_operand" "0, w")] |
6c3ce63b RS |
3028 | UNSPEC_REVBHW)] |
3029 | UNSPEC_PRED_X))] | |
3030 | "TARGET_SVE && <PRED_HSD:elem_bits> > <SVE_ALL:container_bits>" | |
a4d9837e RS |
3031 | "@ |
3032 | rev<SVE_ALL:Vcwtype>\t%0.<PRED_HSD:Vetype>, %1/m, %2.<PRED_HSD:Vetype> | |
3033 | movprfx\t%0, %2\;rev<SVE_ALL:Vcwtype>\t%0.<PRED_HSD:Vetype>, %1/m, %2.<PRED_HSD:Vetype>" | |
3034 | [(set_attr "movprfx" "*,yes")] | |
6c3ce63b RS |
3035 | ) |
3036 | ||
624d0f07 RS |
3037 | ;; Predicated integer unary operations with merging. |
3038 | (define_insn "@cond_<optab><mode>" | |
f75cdd2c RS |
3039 | [(set (match_operand:SVE_FULL_I 0 "register_operand" "=w, ?&w, ?&w") |
3040 | (unspec:SVE_FULL_I | |
624d0f07 | 3041 | [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl") |
f75cdd2c RS |
3042 | (unspec:SVE_FULL_I |
3043 | [(match_operand:SVE_FULL_I 2 "register_operand" "w, w, w")] | |
624d0f07 | 3044 | SVE_INT_UNARY) |
f75cdd2c | 3045 | (match_operand:SVE_FULL_I 3 "aarch64_simd_reg_or_zero" "0, Dz, w")] |
624d0f07 RS |
3046 | UNSPEC_SEL))] |
3047 | "TARGET_SVE && <elem_bits> >= <min_elem_bits>" | |
3048 | "@ | |
3049 | <sve_int_op>\t%0.<Vetype>, %1/m, %2.<Vetype> | |
3050 | movprfx\t%0.<Vetype>, %1/z, %2.<Vetype>\;<sve_int_op>\t%0.<Vetype>, %1/m, %2.<Vetype> | |
3051 | movprfx\t%0, %3\;<sve_int_op>\t%0.<Vetype>, %1/m, %2.<Vetype>" | |
3052 | [(set_attr "movprfx" "*,yes,yes")] | |
3053 | ) | |
3054 | ||
3055 | ;; ------------------------------------------------------------------------- | |
e58703e2 | 3056 | ;; ---- [INT] Sign and zero extension |
624d0f07 RS |
3057 | ;; ------------------------------------------------------------------------- |
3058 | ;; Includes: | |
3059 | ;; - SXTB | |
3060 | ;; - SXTH | |
3061 | ;; - SXTW | |
e58703e2 RS |
3062 | ;; - UXTB |
3063 | ;; - UXTH | |
3064 | ;; - UXTW | |
624d0f07 RS |
3065 | ;; ------------------------------------------------------------------------- |
3066 | ||
e58703e2 RS |
3067 | ;; Unpredicated sign and zero extension from a narrower mode. |
3068 | (define_expand "<optab><SVE_PARTIAL_I:mode><SVE_HSDI:mode>2" | |
3069 | [(set (match_operand:SVE_HSDI 0 "register_operand") | |
3070 | (unspec:SVE_HSDI | |
3071 | [(match_dup 2) | |
3072 | (ANY_EXTEND:SVE_HSDI | |
3073 | (match_operand:SVE_PARTIAL_I 1 "register_operand"))] | |
3074 | UNSPEC_PRED_X))] | |
3075 | "TARGET_SVE && (~<SVE_HSDI:narrower_mask> & <SVE_PARTIAL_I:self_mask>) == 0" | |
3076 | { | |
3077 | operands[2] = aarch64_ptrue_reg (<SVE_HSDI:VPRED>mode); | |
3078 | } | |
3079 | ) | |
3080 | ||
3081 | ;; Predicated sign and zero extension from a narrower mode. | |
3082 | (define_insn "*<optab><SVE_PARTIAL_I:mode><SVE_HSDI:mode>2" | |
a4d9837e | 3083 | [(set (match_operand:SVE_HSDI 0 "register_operand" "=w, ?&w") |
e58703e2 | 3084 | (unspec:SVE_HSDI |
a4d9837e | 3085 | [(match_operand:<SVE_HSDI:VPRED> 1 "register_operand" "Upl, Upl") |
e58703e2 | 3086 | (ANY_EXTEND:SVE_HSDI |
a4d9837e | 3087 | (match_operand:SVE_PARTIAL_I 2 "register_operand" "0, w"))] |
e58703e2 RS |
3088 | UNSPEC_PRED_X))] |
3089 | "TARGET_SVE && (~<SVE_HSDI:narrower_mask> & <SVE_PARTIAL_I:self_mask>) == 0" | |
a4d9837e RS |
3090 | "@ |
3091 | <su>xt<SVE_PARTIAL_I:Vesize>\t%0.<SVE_HSDI:Vetype>, %1/m, %2.<SVE_HSDI:Vetype> | |
3092 | movprfx\t%0, %2\;<su>xt<SVE_PARTIAL_I:Vesize>\t%0.<SVE_HSDI:Vetype>, %1/m, %2.<SVE_HSDI:Vetype>" | |
3093 | [(set_attr "movprfx" "*,yes")] | |
e58703e2 RS |
3094 | ) |
3095 | ||
3096 | ;; Predicated truncate-and-sign-extend operations. | |
6544cb52 | 3097 | (define_insn "@aarch64_pred_sxt<SVE_FULL_HSDI:mode><SVE_PARTIAL_I:mode>" |
a4d9837e | 3098 | [(set (match_operand:SVE_FULL_HSDI 0 "register_operand" "=w, ?&w") |
f75cdd2c | 3099 | (unspec:SVE_FULL_HSDI |
a4d9837e | 3100 | [(match_operand:<SVE_FULL_HSDI:VPRED> 1 "register_operand" "Upl, Upl") |
f75cdd2c | 3101 | (sign_extend:SVE_FULL_HSDI |
6544cb52 | 3102 | (truncate:SVE_PARTIAL_I |
a4d9837e | 3103 | (match_operand:SVE_FULL_HSDI 2 "register_operand" "0, w")))] |
624d0f07 | 3104 | UNSPEC_PRED_X))] |
e58703e2 RS |
3105 | "TARGET_SVE |
3106 | && (~<SVE_FULL_HSDI:narrower_mask> & <SVE_PARTIAL_I:self_mask>) == 0" | |
a4d9837e RS |
3107 | "@ |
3108 | sxt<SVE_PARTIAL_I:Vesize>\t%0.<SVE_FULL_HSDI:Vetype>, %1/m, %2.<SVE_FULL_HSDI:Vetype> | |
3109 | movprfx\t%0, %2\;sxt<SVE_PARTIAL_I:Vesize>\t%0.<SVE_FULL_HSDI:Vetype>, %1/m, %2.<SVE_FULL_HSDI:Vetype>" | |
3110 | [(set_attr "movprfx" "*,yes")] | |
624d0f07 RS |
3111 | ) |
3112 | ||
e58703e2 | 3113 | ;; Predicated truncate-and-sign-extend operations with merging. |
6544cb52 | 3114 | (define_insn "@aarch64_cond_sxt<SVE_FULL_HSDI:mode><SVE_PARTIAL_I:mode>" |
f75cdd2c RS |
3115 | [(set (match_operand:SVE_FULL_HSDI 0 "register_operand" "=w, ?&w, ?&w") |
3116 | (unspec:SVE_FULL_HSDI | |
cc68f7c2 | 3117 | [(match_operand:<SVE_FULL_HSDI:VPRED> 1 "register_operand" "Upl, Upl, Upl") |
f75cdd2c | 3118 | (sign_extend:SVE_FULL_HSDI |
6544cb52 | 3119 | (truncate:SVE_PARTIAL_I |
f75cdd2c RS |
3120 | (match_operand:SVE_FULL_HSDI 2 "register_operand" "w, w, w"))) |
3121 | (match_operand:SVE_FULL_HSDI 3 "aarch64_simd_reg_or_zero" "0, Dz, w")] | |
624d0f07 | 3122 | UNSPEC_SEL))] |
e58703e2 RS |
3123 | "TARGET_SVE |
3124 | && (~<SVE_FULL_HSDI:narrower_mask> & <SVE_PARTIAL_I:self_mask>) == 0" | |
624d0f07 | 3125 | "@ |
6544cb52 RS |
3126 | sxt<SVE_PARTIAL_I:Vesize>\t%0.<SVE_FULL_HSDI:Vetype>, %1/m, %2.<SVE_FULL_HSDI:Vetype> |
3127 | movprfx\t%0.<SVE_FULL_HSDI:Vetype>, %1/z, %2.<SVE_FULL_HSDI:Vetype>\;sxt<SVE_PARTIAL_I:Vesize>\t%0.<SVE_FULL_HSDI:Vetype>, %1/m, %2.<SVE_FULL_HSDI:Vetype> | |
3128 | movprfx\t%0, %3\;sxt<SVE_PARTIAL_I:Vesize>\t%0.<SVE_FULL_HSDI:Vetype>, %1/m, %2.<SVE_FULL_HSDI:Vetype>" | |
624d0f07 RS |
3129 | [(set_attr "movprfx" "*,yes,yes")] |
3130 | ) | |
3131 | ||
e58703e2 | 3132 | ;; Predicated truncate-and-zero-extend operations, merging with the |
d113ece6 | 3133 | ;; first input. |
e58703e2 RS |
3134 | ;; |
3135 | ;; The canonical form of this operation is an AND of a constant rather | |
3136 | ;; than (zero_extend (truncate ...)). | |
d113ece6 | 3137 | (define_insn "*cond_uxt<mode>_2" |
f3c5d1fa RS |
3138 | [(set (match_operand:SVE_I 0 "register_operand" "=w, ?&w") |
3139 | (unspec:SVE_I | |
d113ece6 | 3140 | [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl") |
f3c5d1fa RS |
3141 | (and:SVE_I |
3142 | (match_operand:SVE_I 2 "register_operand" "0, w") | |
3143 | (match_operand:SVE_I 3 "aarch64_sve_uxt_immediate")) | |
d113ece6 RS |
3144 | (match_dup 2)] |
3145 | UNSPEC_SEL))] | |
3146 | "TARGET_SVE" | |
3147 | "@ | |
3148 | uxt%e3\t%0.<Vetype>, %1/m, %0.<Vetype> | |
3149 | movprfx\t%0, %2\;uxt%e3\t%0.<Vetype>, %1/m, %2.<Vetype>" | |
3150 | [(set_attr "movprfx" "*,yes")] | |
3151 | ) | |
3152 | ||
e58703e2 | 3153 | ;; Predicated truncate-and-zero-extend operations, merging with an |
d113ece6 RS |
3154 | ;; independent value. |
3155 | ;; | |
3156 | ;; The earlyclobber isn't needed for the first alternative, but omitting | |
3157 | ;; it would only help the case in which operands 2 and 4 are the same, | |
3158 | ;; which is handled above rather than here. Marking all the alternatives | |
3159 | ;; as early-clobber helps to make the instruction more regular to the | |
3160 | ;; register allocator. | |
3161 | (define_insn "*cond_uxt<mode>_any" | |
f3c5d1fa RS |
3162 | [(set (match_operand:SVE_I 0 "register_operand" "=&w, ?&w, ?&w") |
3163 | (unspec:SVE_I | |
d113ece6 | 3164 | [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl") |
f3c5d1fa RS |
3165 | (and:SVE_I |
3166 | (match_operand:SVE_I 2 "register_operand" "w, w, w") | |
3167 | (match_operand:SVE_I 3 "aarch64_sve_uxt_immediate")) | |
3168 | (match_operand:SVE_I 4 "aarch64_simd_reg_or_zero" "0, Dz, w")] | |
d113ece6 RS |
3169 | UNSPEC_SEL))] |
3170 | "TARGET_SVE && !rtx_equal_p (operands[2], operands[4])" | |
3171 | "@ | |
3172 | uxt%e3\t%0.<Vetype>, %1/m, %2.<Vetype> | |
3173 | movprfx\t%0.<Vetype>, %1/z, %2.<Vetype>\;uxt%e3\t%0.<Vetype>, %1/m, %2.<Vetype> | |
3174 | movprfx\t%0, %4\;uxt%e3\t%0.<Vetype>, %1/m, %2.<Vetype>" | |
3175 | [(set_attr "movprfx" "*,yes,yes")] | |
3176 | ) | |
3177 | ||
2d56600c RS |
3178 | ;; ------------------------------------------------------------------------- |
3179 | ;; ---- [INT] Truncation | |
3180 | ;; ------------------------------------------------------------------------- | |
3181 | ;; The patterns in this section are synthetic. | |
3182 | ;; ------------------------------------------------------------------------- | |
3183 | ||
3184 | ;; Truncate to a partial SVE vector from either a full vector or a | |
3185 | ;; wider partial vector. This is a no-op, because we can just ignore | |
3186 | ;; the unused upper bits of the source. | |
3187 | (define_insn_and_split "trunc<SVE_HSDI:mode><SVE_PARTIAL_I:mode>2" | |
3188 | [(set (match_operand:SVE_PARTIAL_I 0 "register_operand" "=w") | |
3189 | (truncate:SVE_PARTIAL_I | |
3190 | (match_operand:SVE_HSDI 1 "register_operand" "w")))] | |
3191 | "TARGET_SVE && (~<SVE_HSDI:narrower_mask> & <SVE_PARTIAL_I:self_mask>) == 0" | |
3192 | "#" | |
3193 | "&& reload_completed" | |
3194 | [(set (match_dup 0) (match_dup 1))] | |
3195 | { | |
3196 | operands[1] = aarch64_replace_reg_mode (operands[1], | |
3197 | <SVE_PARTIAL_I:MODE>mode); | |
3198 | } | |
3199 | ) | |
3200 | ||
e0a0be93 RS |
3201 | ;; ------------------------------------------------------------------------- |
3202 | ;; ---- [INT] Logical inverse | |
3203 | ;; ------------------------------------------------------------------------- | |
624d0f07 RS |
3204 | ;; Includes: |
3205 | ;; - CNOT | |
3206 | ;; ------------------------------------------------------------------------- | |
e0a0be93 RS |
3207 | |
3208 | ;; Predicated logical inverse. | |
624d0f07 | 3209 | (define_expand "@aarch64_pred_cnot<mode>" |
f75cdd2c RS |
3210 | [(set (match_operand:SVE_FULL_I 0 "register_operand") |
3211 | (unspec:SVE_FULL_I | |
624d0f07 RS |
3212 | [(unspec:<VPRED> |
3213 | [(match_operand:<VPRED> 1 "register_operand") | |
3214 | (match_operand:SI 2 "aarch64_sve_ptrue_flag") | |
3215 | (eq:<VPRED> | |
f75cdd2c | 3216 | (match_operand:SVE_FULL_I 3 "register_operand") |
624d0f07 RS |
3217 | (match_dup 4))] |
3218 | UNSPEC_PRED_Z) | |
3219 | (match_dup 5) | |
3220 | (match_dup 4)] | |
3221 | UNSPEC_SEL))] | |
3222 | "TARGET_SVE" | |
3223 | { | |
3224 | operands[4] = CONST0_RTX (<MODE>mode); | |
3225 | operands[5] = CONST1_RTX (<MODE>mode); | |
3226 | } | |
3227 | ) | |
3228 | ||
e0a0be93 | 3229 | (define_insn "*cnot<mode>" |
5fe3e6bf RS |
3230 | [(set (match_operand:SVE_I 0 "register_operand" "=w, ?&w") |
3231 | (unspec:SVE_I | |
e0a0be93 | 3232 | [(unspec:<VPRED> |
a4d9837e | 3233 | [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl") |
e0a0be93 RS |
3234 | (match_operand:SI 5 "aarch64_sve_ptrue_flag") |
3235 | (eq:<VPRED> | |
5fe3e6bf RS |
3236 | (match_operand:SVE_I 2 "register_operand" "0, w") |
3237 | (match_operand:SVE_I 3 "aarch64_simd_imm_zero"))] | |
e0a0be93 | 3238 | UNSPEC_PRED_Z) |
5fe3e6bf | 3239 | (match_operand:SVE_I 4 "aarch64_simd_imm_one") |
e0a0be93 RS |
3240 | (match_dup 3)] |
3241 | UNSPEC_SEL))] | |
3242 | "TARGET_SVE" | |
a4d9837e RS |
3243 | "@ |
3244 | cnot\t%0.<Vetype>, %1/m, %2.<Vetype> | |
3245 | movprfx\t%0, %2\;cnot\t%0.<Vetype>, %1/m, %2.<Vetype>" | |
3246 | [(set_attr "movprfx" "*,yes")] | |
e0a0be93 RS |
3247 | ) |
3248 | ||
624d0f07 RS |
3249 | ;; Predicated logical inverse with merging. |
3250 | (define_expand "@cond_cnot<mode>" | |
f75cdd2c RS |
3251 | [(set (match_operand:SVE_FULL_I 0 "register_operand") |
3252 | (unspec:SVE_FULL_I | |
624d0f07 | 3253 | [(match_operand:<VPRED> 1 "register_operand") |
f75cdd2c | 3254 | (unspec:SVE_FULL_I |
624d0f07 RS |
3255 | [(unspec:<VPRED> |
3256 | [(match_dup 4) | |
3257 | (const_int SVE_KNOWN_PTRUE) | |
3258 | (eq:<VPRED> | |
f75cdd2c | 3259 | (match_operand:SVE_FULL_I 2 "register_operand") |
624d0f07 RS |
3260 | (match_dup 5))] |
3261 | UNSPEC_PRED_Z) | |
3262 | (match_dup 6) | |
3263 | (match_dup 5)] | |
3264 | UNSPEC_SEL) | |
f75cdd2c | 3265 | (match_operand:SVE_FULL_I 3 "aarch64_simd_reg_or_zero")] |
624d0f07 RS |
3266 | UNSPEC_SEL))] |
3267 | "TARGET_SVE" | |
3268 | { | |
3269 | operands[4] = CONSTM1_RTX (<VPRED>mode); | |
3270 | operands[5] = CONST0_RTX (<MODE>mode); | |
3271 | operands[6] = CONST1_RTX (<MODE>mode); | |
3272 | } | |
3273 | ) | |
3274 | ||
e0a0be93 RS |
3275 | ;; Predicated logical inverse, merging with the first input. |
3276 | (define_insn_and_rewrite "*cond_cnot<mode>_2" | |
5fe3e6bf RS |
3277 | [(set (match_operand:SVE_I 0 "register_operand" "=w, ?&w") |
3278 | (unspec:SVE_I | |
e0a0be93 RS |
3279 | [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl") |
3280 | ;; Logical inverse of operand 2 (as above). | |
5fe3e6bf | 3281 | (unspec:SVE_I |
e0a0be93 RS |
3282 | [(unspec:<VPRED> |
3283 | [(match_operand 5) | |
3284 | (const_int SVE_KNOWN_PTRUE) | |
3285 | (eq:<VPRED> | |
5fe3e6bf RS |
3286 | (match_operand:SVE_I 2 "register_operand" "0, w") |
3287 | (match_operand:SVE_I 3 "aarch64_simd_imm_zero"))] | |
e0a0be93 | 3288 | UNSPEC_PRED_Z) |
5fe3e6bf | 3289 | (match_operand:SVE_I 4 "aarch64_simd_imm_one") |
e0a0be93 RS |
3290 | (match_dup 3)] |
3291 | UNSPEC_SEL) | |
3292 | (match_dup 2)] | |
3293 | UNSPEC_SEL))] | |
3294 | "TARGET_SVE" | |
3295 | "@ | |
3296 | cnot\t%0.<Vetype>, %1/m, %0.<Vetype> | |
3297 | movprfx\t%0, %2\;cnot\t%0.<Vetype>, %1/m, %2.<Vetype>" | |
3298 | "&& !CONSTANT_P (operands[5])" | |
3299 | { | |
3300 | operands[5] = CONSTM1_RTX (<VPRED>mode); | |
3301 | } | |
3302 | [(set_attr "movprfx" "*,yes")] | |
3303 | ) | |
3304 | ||
3305 | ;; Predicated logical inverse, merging with an independent value. | |
3306 | ;; | |
3307 | ;; The earlyclobber isn't needed for the first alternative, but omitting | |
3308 | ;; it would only help the case in which operands 2 and 6 are the same, | |
3309 | ;; which is handled above rather than here. Marking all the alternatives | |
3310 | ;; as earlyclobber helps to make the instruction more regular to the | |
3311 | ;; register allocator. | |
3312 | (define_insn_and_rewrite "*cond_cnot<mode>_any" | |
5fe3e6bf RS |
3313 | [(set (match_operand:SVE_I 0 "register_operand" "=&w, ?&w, ?&w") |
3314 | (unspec:SVE_I | |
e0a0be93 RS |
3315 | [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl") |
3316 | ;; Logical inverse of operand 2 (as above). | |
5fe3e6bf | 3317 | (unspec:SVE_I |
e0a0be93 RS |
3318 | [(unspec:<VPRED> |
3319 | [(match_operand 5) | |
3320 | (const_int SVE_KNOWN_PTRUE) | |
3321 | (eq:<VPRED> | |
5fe3e6bf RS |
3322 | (match_operand:SVE_I 2 "register_operand" "w, w, w") |
3323 | (match_operand:SVE_I 3 "aarch64_simd_imm_zero"))] | |
e0a0be93 | 3324 | UNSPEC_PRED_Z) |
5fe3e6bf | 3325 | (match_operand:SVE_I 4 "aarch64_simd_imm_one") |
e0a0be93 RS |
3326 | (match_dup 3)] |
3327 | UNSPEC_SEL) | |
5fe3e6bf | 3328 | (match_operand:SVE_I 6 "aarch64_simd_reg_or_zero" "0, Dz, w")] |
e0a0be93 RS |
3329 | UNSPEC_SEL))] |
3330 | "TARGET_SVE && !rtx_equal_p (operands[2], operands[6])" | |
3331 | "@ | |
3332 | cnot\t%0.<Vetype>, %1/m, %2.<Vetype> | |
3333 | movprfx\t%0.<Vetype>, %1/z, %2.<Vetype>\;cnot\t%0.<Vetype>, %1/m, %2.<Vetype> | |
3334 | movprfx\t%0, %6\;cnot\t%0.<Vetype>, %1/m, %2.<Vetype>" | |
3335 | "&& !CONSTANT_P (operands[5])" | |
3336 | { | |
3337 | operands[5] = CONSTM1_RTX (<VPRED>mode); | |
3338 | } | |
3339 | [(set_attr "movprfx" "*,yes,yes")] | |
3340 | ) | |
3341 | ||
624d0f07 RS |
3342 | ;; ------------------------------------------------------------------------- |
3343 | ;; ---- [FP<-INT] General unary arithmetic that maps to unspecs | |
3344 | ;; ------------------------------------------------------------------------- | |
3345 | ;; Includes: | |
3346 | ;; - FEXPA | |
3347 | ;; ------------------------------------------------------------------------- | |
3348 | ||
3349 | ;; Unpredicated unary operations that take an integer and return a float. | |
3350 | (define_insn "@aarch64_sve_<optab><mode>" | |
f75cdd2c RS |
3351 | [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w") |
3352 | (unspec:SVE_FULL_F | |
3353 | [(match_operand:<V_INT_EQUIV> 1 "register_operand" "w")] | |
3354 | SVE_FP_UNARY_INT))] | |
624d0f07 RS |
3355 | "TARGET_SVE" |
3356 | "<sve_fp_op>\t%0.<Vetype>, %1.<Vetype>" | |
3357 | ) | |
3358 | ||
915d28fe | 3359 | ;; ------------------------------------------------------------------------- |
d45b20a5 | 3360 | ;; ---- [FP] General unary arithmetic corresponding to unspecs |
915d28fe RS |
3361 | ;; ------------------------------------------------------------------------- |
3362 | ;; Includes: | |
3363 | ;; - FABS | |
3364 | ;; - FNEG | |
624d0f07 RS |
3365 | ;; - FRECPE |
3366 | ;; - FRECPX | |
915d28fe RS |
3367 | ;; - FRINTA |
3368 | ;; - FRINTI | |
3369 | ;; - FRINTM | |
3370 | ;; - FRINTN | |
3371 | ;; - FRINTP | |
3372 | ;; - FRINTX | |
3373 | ;; - FRINTZ | |
a0ee8352 | 3374 | ;; - FRSQRTE |
d45b20a5 | 3375 | ;; - FSQRT |
915d28fe RS |
3376 | ;; ------------------------------------------------------------------------- |
3377 | ||
624d0f07 RS |
3378 | ;; Unpredicated floating-point unary operations. |
3379 | (define_insn "@aarch64_sve_<optab><mode>" | |
f75cdd2c RS |
3380 | [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w") |
3381 | (unspec:SVE_FULL_F | |
3382 | [(match_operand:SVE_FULL_F 1 "register_operand" "w")] | |
3383 | SVE_FP_UNARY))] | |
624d0f07 RS |
3384 | "TARGET_SVE" |
3385 | "<sve_fp_op>\t%0.<Vetype>, %1.<Vetype>" | |
3386 | ) | |
3387 | ||
d45b20a5 RS |
3388 | ;; Unpredicated floating-point unary operations. |
3389 | (define_expand "<optab><mode>2" | |
f75cdd2c RS |
3390 | [(set (match_operand:SVE_FULL_F 0 "register_operand") |
3391 | (unspec:SVE_FULL_F | |
915d28fe | 3392 | [(match_dup 2) |
c9c5a809 | 3393 | (const_int SVE_RELAXED_GP) |
f75cdd2c | 3394 | (match_operand:SVE_FULL_F 1 "register_operand")] |
a0ee8352 | 3395 | SVE_COND_FP_UNARY_OPTAB))] |
915d28fe RS |
3396 | "TARGET_SVE" |
3397 | { | |
3398 | operands[2] = aarch64_ptrue_reg (<VPRED>mode); | |
3399 | } | |
3400 | ) | |
3401 | ||
d45b20a5 | 3402 | ;; Predicated floating-point unary operations. |
624d0f07 | 3403 | (define_insn "@aarch64_pred_<optab><mode>" |
a4d9837e | 3404 | [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w, ?&w") |
f75cdd2c | 3405 | (unspec:SVE_FULL_F |
a4d9837e | 3406 | [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl") |
c9c5a809 | 3407 | (match_operand:SI 3 "aarch64_sve_gp_strictness") |
a4d9837e | 3408 | (match_operand:SVE_FULL_F 2 "register_operand" "0, w")] |
d45b20a5 | 3409 | SVE_COND_FP_UNARY))] |
915d28fe | 3410 | "TARGET_SVE" |
a4d9837e RS |
3411 | "@ |
3412 | <sve_fp_op>\t%0.<Vetype>, %1/m, %2.<Vetype> | |
3413 | movprfx\t%0, %2\;<sve_fp_op>\t%0.<Vetype>, %1/m, %2.<Vetype>" | |
3414 | [(set_attr "movprfx" "*,yes")] | |
915d28fe RS |
3415 | ) |
3416 | ||
624d0f07 RS |
3417 | ;; Predicated floating-point unary arithmetic with merging. |
3418 | (define_expand "@cond_<optab><mode>" | |
f75cdd2c RS |
3419 | [(set (match_operand:SVE_FULL_F 0 "register_operand") |
3420 | (unspec:SVE_FULL_F | |
624d0f07 | 3421 | [(match_operand:<VPRED> 1 "register_operand") |
f75cdd2c | 3422 | (unspec:SVE_FULL_F |
624d0f07 RS |
3423 | [(match_dup 1) |
3424 | (const_int SVE_STRICT_GP) | |
f75cdd2c | 3425 | (match_operand:SVE_FULL_F 2 "register_operand")] |
624d0f07 | 3426 | SVE_COND_FP_UNARY) |
f75cdd2c | 3427 | (match_operand:SVE_FULL_F 3 "aarch64_simd_reg_or_zero")] |
624d0f07 RS |
3428 | UNSPEC_SEL))] |
3429 | "TARGET_SVE" | |
3430 | ) | |
3431 | ||
b21f7d53 | 3432 | ;; Predicated floating-point unary arithmetic, merging with the first input. |
0eb5e901 | 3433 | (define_insn_and_rewrite "*cond_<optab><mode>_2_relaxed" |
f75cdd2c RS |
3434 | [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w, ?&w") |
3435 | (unspec:SVE_FULL_F | |
b21f7d53 | 3436 | [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl") |
f75cdd2c | 3437 | (unspec:SVE_FULL_F |
b21f7d53 | 3438 | [(match_operand 3) |
0eb5e901 | 3439 | (const_int SVE_RELAXED_GP) |
f75cdd2c | 3440 | (match_operand:SVE_FULL_F 2 "register_operand" "0, w")] |
b21f7d53 RS |
3441 | SVE_COND_FP_UNARY) |
3442 | (match_dup 2)] | |
3443 | UNSPEC_SEL))] | |
0eb5e901 | 3444 | "TARGET_SVE" |
b21f7d53 RS |
3445 | "@ |
3446 | <sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype> | |
3447 | movprfx\t%0, %2\;<sve_fp_op>\t%0.<Vetype>, %1/m, %2.<Vetype>" | |
3448 | "&& !rtx_equal_p (operands[1], operands[3])" | |
3449 | { | |
3450 | operands[3] = copy_rtx (operands[1]); | |
3451 | } | |
3452 | [(set_attr "movprfx" "*,yes")] | |
3453 | ) | |
3454 | ||
0eb5e901 RS |
3455 | (define_insn "*cond_<optab><mode>_2_strict" |
3456 | [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w, ?&w") | |
3457 | (unspec:SVE_FULL_F | |
3458 | [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl") | |
3459 | (unspec:SVE_FULL_F | |
3460 | [(match_dup 1) | |
3461 | (const_int SVE_STRICT_GP) | |
3462 | (match_operand:SVE_FULL_F 2 "register_operand" "0, w")] | |
3463 | SVE_COND_FP_UNARY) | |
3464 | (match_dup 2)] | |
3465 | UNSPEC_SEL))] | |
3466 | "TARGET_SVE" | |
3467 | "@ | |
3468 | <sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype> | |
3469 | movprfx\t%0, %2\;<sve_fp_op>\t%0.<Vetype>, %1/m, %2.<Vetype>" | |
3470 | [(set_attr "movprfx" "*,yes")] | |
3471 | ) | |
3472 | ||
b21f7d53 RS |
3473 | ;; Predicated floating-point unary arithmetic, merging with an independent |
3474 | ;; value. | |
3475 | ;; | |
3476 | ;; The earlyclobber isn't needed for the first alternative, but omitting | |
3477 | ;; it would only help the case in which operands 2 and 3 are the same, | |
3478 | ;; which is handled above rather than here. Marking all the alternatives | |
3479 | ;; as earlyclobber helps to make the instruction more regular to the | |
3480 | ;; register allocator. | |
0eb5e901 | 3481 | (define_insn_and_rewrite "*cond_<optab><mode>_any_relaxed" |
f75cdd2c RS |
3482 | [(set (match_operand:SVE_FULL_F 0 "register_operand" "=&w, ?&w, ?&w") |
3483 | (unspec:SVE_FULL_F | |
b21f7d53 | 3484 | [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl") |
f75cdd2c | 3485 | (unspec:SVE_FULL_F |
b21f7d53 | 3486 | [(match_operand 4) |
0eb5e901 | 3487 | (const_int SVE_RELAXED_GP) |
f75cdd2c | 3488 | (match_operand:SVE_FULL_F 2 "register_operand" "w, w, w")] |
b21f7d53 | 3489 | SVE_COND_FP_UNARY) |
f75cdd2c | 3490 | (match_operand:SVE_FULL_F 3 "aarch64_simd_reg_or_zero" "0, Dz, w")] |
b21f7d53 | 3491 | UNSPEC_SEL))] |
0eb5e901 | 3492 | "TARGET_SVE && !rtx_equal_p (operands[2], operands[3])" |
b21f7d53 RS |
3493 | "@ |
3494 | <sve_fp_op>\t%0.<Vetype>, %1/m, %2.<Vetype> | |
3495 | movprfx\t%0.<Vetype>, %1/z, %2.<Vetype>\;<sve_fp_op>\t%0.<Vetype>, %1/m, %2.<Vetype> | |
3496 | movprfx\t%0, %3\;<sve_fp_op>\t%0.<Vetype>, %1/m, %2.<Vetype>" | |
3497 | "&& !rtx_equal_p (operands[1], operands[4])" | |
3498 | { | |
3499 | operands[4] = copy_rtx (operands[1]); | |
3500 | } | |
3501 | [(set_attr "movprfx" "*,yes,yes")] | |
3502 | ) | |
3503 | ||
0eb5e901 RS |
3504 | (define_insn "*cond_<optab><mode>_any_strict" |
3505 | [(set (match_operand:SVE_FULL_F 0 "register_operand" "=&w, ?&w, ?&w") | |
3506 | (unspec:SVE_FULL_F | |
3507 | [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl") | |
3508 | (unspec:SVE_FULL_F | |
3509 | [(match_dup 1) | |
3510 | (const_int SVE_STRICT_GP) | |
3511 | (match_operand:SVE_FULL_F 2 "register_operand" "w, w, w")] | |
3512 | SVE_COND_FP_UNARY) | |
3513 | (match_operand:SVE_FULL_F 3 "aarch64_simd_reg_or_zero" "0, Dz, w")] | |
3514 | UNSPEC_SEL))] | |
3515 | "TARGET_SVE && !rtx_equal_p (operands[2], operands[3])" | |
3516 | "@ | |
3517 | <sve_fp_op>\t%0.<Vetype>, %1/m, %2.<Vetype> | |
3518 | movprfx\t%0.<Vetype>, %1/z, %2.<Vetype>\;<sve_fp_op>\t%0.<Vetype>, %1/m, %2.<Vetype> | |
3519 | movprfx\t%0, %3\;<sve_fp_op>\t%0.<Vetype>, %1/m, %2.<Vetype>" | |
3520 | [(set_attr "movprfx" "*,yes,yes")] | |
3521 | ) | |
3522 | ||
a0ee8352 RS |
3523 | ;; ------------------------------------------------------------------------- |
3524 | ;; ---- [FP] Square root | |
3525 | ;; ------------------------------------------------------------------------- | |
3526 | ||
3527 | (define_expand "sqrt<mode>2" | |
3528 | [(set (match_operand:SVE_FULL_F 0 "register_operand") | |
3529 | (unspec:SVE_FULL_F | |
3530 | [(match_dup 2) | |
3531 | (const_int SVE_RELAXED_GP) | |
3532 | (match_operand:SVE_FULL_F 1 "register_operand")] | |
3533 | UNSPEC_COND_FSQRT))] | |
3534 | "TARGET_SVE" | |
3535 | { | |
3536 | if (aarch64_emit_approx_sqrt (operands[0], operands[1], false)) | |
3537 | DONE; | |
3538 | operands[2] = aarch64_ptrue_reg (<VPRED>mode); | |
3539 | }) | |
3540 | ||
3541 | ;; ------------------------------------------------------------------------- | |
3542 | ;; ---- [FP] Reciprocal square root | |
3543 | ;; ------------------------------------------------------------------------- | |
3544 | ||
3545 | (define_expand "rsqrt<mode>2" | |
3546 | [(set (match_operand:SVE_FULL_SDF 0 "register_operand") | |
3547 | (unspec:SVE_FULL_SDF | |
3548 | [(match_operand:SVE_FULL_SDF 1 "register_operand")] | |
3549 | UNSPEC_RSQRT))] | |
3550 | "TARGET_SVE" | |
3551 | { | |
3552 | aarch64_emit_approx_sqrt (operands[0], operands[1], true); | |
3553 | DONE; | |
3554 | }) | |
3555 | ||
3556 | (define_expand "@aarch64_rsqrte<mode>" | |
3557 | [(set (match_operand:SVE_FULL_SDF 0 "register_operand") | |
3558 | (unspec:SVE_FULL_SDF | |
3559 | [(match_operand:SVE_FULL_SDF 1 "register_operand")] | |
3560 | UNSPEC_RSQRTE))] | |
3561 | "TARGET_SVE" | |
3562 | ) | |
3563 | ||
3564 | (define_expand "@aarch64_rsqrts<mode>" | |
3565 | [(set (match_operand:SVE_FULL_SDF 0 "register_operand") | |
3566 | (unspec:SVE_FULL_SDF | |
3567 | [(match_operand:SVE_FULL_SDF 1 "register_operand") | |
3568 | (match_operand:SVE_FULL_SDF 2 "register_operand")] | |
3569 | UNSPEC_RSQRTS))] | |
3570 | "TARGET_SVE" | |
3571 | ) | |
3572 | ||
915d28fe RS |
3573 | ;; ------------------------------------------------------------------------- |
3574 | ;; ---- [PRED] Inverse | |
3575 | ;; ------------------------------------------------------------------------- | |
3576 | ;; Includes: | |
3577 | ;; - NOT | |
3578 | ;; ------------------------------------------------------------------------- | |
3579 | ||
3580 | ;; Unpredicated predicate inverse. | |
3581 | (define_expand "one_cmpl<mode>2" | |
3582 | [(set (match_operand:PRED_ALL 0 "register_operand") | |
3583 | (and:PRED_ALL | |
3584 | (not:PRED_ALL (match_operand:PRED_ALL 1 "register_operand")) | |
3585 | (match_dup 2)))] | |
3586 | "TARGET_SVE" | |
3587 | { | |
3588 | operands[2] = aarch64_ptrue_reg (<MODE>mode); | |
3589 | } | |
3590 | ) | |
3591 | ||
3592 | ;; Predicated predicate inverse. | |
3593 | (define_insn "*one_cmpl<mode>3" | |
3594 | [(set (match_operand:PRED_ALL 0 "register_operand" "=Upa") | |
3595 | (and:PRED_ALL | |
3596 | (not:PRED_ALL (match_operand:PRED_ALL 2 "register_operand" "Upa")) | |
3597 | (match_operand:PRED_ALL 1 "register_operand" "Upa")))] | |
3598 | "TARGET_SVE" | |
3599 | "not\t%0.b, %1/z, %2.b" | |
3600 | ) | |
3601 | ||
3602 | ;; ========================================================================= | |
3603 | ;; == Binary arithmetic | |
3604 | ;; ========================================================================= | |
3605 | ||
3606 | ;; ------------------------------------------------------------------------- | |
3607 | ;; ---- [INT] General binary arithmetic corresponding to rtx codes | |
3608 | ;; ------------------------------------------------------------------------- | |
f8c22a8b RS |
3609 | ;; Includes: |
3610 | ;; - ADD (merging form only) | |
3611 | ;; - AND (merging form only) | |
20103c0e | 3612 | ;; - ASR (merging form only) |
f8c22a8b | 3613 | ;; - EOR (merging form only) |
20103c0e RS |
3614 | ;; - LSL (merging form only) |
3615 | ;; - LSR (merging form only) | |
915d28fe | 3616 | ;; - MUL |
f8c22a8b | 3617 | ;; - ORR (merging form only) |
915d28fe RS |
3618 | ;; - SMAX |
3619 | ;; - SMIN | |
0a09a948 RS |
3620 | ;; - SQADD (SVE2 merging form only) |
3621 | ;; - SQSUB (SVE2 merging form only) | |
f8c22a8b | 3622 | ;; - SUB (merging form only) |
915d28fe RS |
3623 | ;; - UMAX |
3624 | ;; - UMIN | |
0a09a948 RS |
3625 | ;; - UQADD (SVE2 merging form only) |
3626 | ;; - UQSUB (SVE2 merging form only) | |
915d28fe RS |
3627 | ;; ------------------------------------------------------------------------- |
3628 | ||
f8c22a8b RS |
3629 | ;; Unpredicated integer binary operations that have an immediate form. |
3630 | (define_expand "<optab><mode>3" | |
48c7f5b8 RS |
3631 | [(set (match_operand:SVE_I 0 "register_operand") |
3632 | (unspec:SVE_I | |
f8c22a8b | 3633 | [(match_dup 3) |
48c7f5b8 RS |
3634 | (SVE_INT_BINARY_IMM:SVE_I |
3635 | (match_operand:SVE_I 1 "register_operand") | |
3636 | (match_operand:SVE_I 2 "aarch64_sve_<sve_imm_con>_operand"))] | |
f8c22a8b RS |
3637 | UNSPEC_PRED_X))] |
3638 | "TARGET_SVE" | |
3639 | { | |
3640 | operands[3] = aarch64_ptrue_reg (<VPRED>mode); | |
3641 | } | |
3642 | ) | |
3643 | ||
3644 | ;; Integer binary operations that have an immediate form, predicated | |
3645 | ;; with a PTRUE. We don't actually need the predicate for the first | |
3646 | ;; and third alternatives, but using Upa or X isn't likely to gain much | |
3647 | ;; and would make the instruction seem less uniform to the register | |
3648 | ;; allocator. | |
624d0f07 | 3649 | (define_insn_and_split "@aarch64_pred_<optab><mode>" |
48c7f5b8 RS |
3650 | [(set (match_operand:SVE_I 0 "register_operand" "=w, w, ?&w, ?&w") |
3651 | (unspec:SVE_I | |
f8c22a8b | 3652 | [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl, Upl") |
48c7f5b8 RS |
3653 | (SVE_INT_BINARY_IMM:SVE_I |
3654 | (match_operand:SVE_I 2 "register_operand" "%0, 0, w, w") | |
3655 | (match_operand:SVE_I 3 "aarch64_sve_<sve_imm_con>_operand" "<sve_imm_con>, w, <sve_imm_con>, w"))] | |
f8c22a8b RS |
3656 | UNSPEC_PRED_X))] |
3657 | "TARGET_SVE" | |
3658 | "@ | |
3659 | # | |
3660 | <sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype> | |
3661 | # | |
3662 | movprfx\t%0, %2\;<sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>" | |
3663 | ; Split the unpredicated form after reload, so that we don't have | |
3664 | ; the unnecessary PTRUE. | |
3665 | "&& reload_completed | |
3666 | && !register_operand (operands[3], <MODE>mode)" | |
f75cdd2c | 3667 | [(set (match_dup 0) |
48c7f5b8 | 3668 | (SVE_INT_BINARY_IMM:SVE_I (match_dup 2) (match_dup 3)))] |
f8c22a8b RS |
3669 | "" |
3670 | [(set_attr "movprfx" "*,*,yes,yes")] | |
3671 | ) | |
3672 | ||
3673 | ;; Unpredicated binary operations with a constant (post-RA only). | |
3674 | ;; These are generated by splitting a predicated instruction whose | |
3675 | ;; predicate is unused. | |
3676 | (define_insn "*post_ra_<optab><mode>3" | |
48c7f5b8 RS |
3677 | [(set (match_operand:SVE_I 0 "register_operand" "=w, ?&w") |
3678 | (SVE_INT_BINARY_IMM:SVE_I | |
3679 | (match_operand:SVE_I 1 "register_operand" "0, w") | |
3680 | (match_operand:SVE_I 2 "aarch64_sve_<sve_imm_con>_immediate")))] | |
f8c22a8b RS |
3681 | "TARGET_SVE && reload_completed" |
3682 | "@ | |
3683 | <sve_int_op>\t%0.<Vetype>, %0.<Vetype>, #%<sve_imm_prefix>2 | |
3684 | movprfx\t%0, %1\;<sve_int_op>\t%0.<Vetype>, %0.<Vetype>, #%<sve_imm_prefix>2" | |
3685 | [(set_attr "movprfx" "*,yes")] | |
3686 | ) | |
3687 | ||
915d28fe | 3688 | ;; Predicated integer operations with merging. |
b6c3aea1 | 3689 | (define_expand "@cond_<optab><mode>" |
ab76e3db RS |
3690 | [(set (match_operand:SVE_I 0 "register_operand") |
3691 | (unspec:SVE_I | |
915d28fe | 3692 | [(match_operand:<VPRED> 1 "register_operand") |
ab76e3db RS |
3693 | (SVE_INT_BINARY:SVE_I |
3694 | (match_operand:SVE_I 2 "register_operand") | |
3695 | (match_operand:SVE_I 3 "<sve_pred_int_rhs2_operand>")) | |
3696 | (match_operand:SVE_I 4 "aarch64_simd_reg_or_zero")] | |
915d28fe RS |
3697 | UNSPEC_SEL))] |
3698 | "TARGET_SVE" | |
3699 | ) | |
3700 | ||
3701 | ;; Predicated integer operations, merging with the first input. | |
3702 | (define_insn "*cond_<optab><mode>_2" | |
ab76e3db RS |
3703 | [(set (match_operand:SVE_I 0 "register_operand" "=w, ?&w") |
3704 | (unspec:SVE_I | |
915d28fe | 3705 | [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl") |
ab76e3db RS |
3706 | (SVE_INT_BINARY:SVE_I |
3707 | (match_operand:SVE_I 2 "register_operand" "0, w") | |
3708 | (match_operand:SVE_I 3 "register_operand" "w, w")) | |
915d28fe RS |
3709 | (match_dup 2)] |
3710 | UNSPEC_SEL))] | |
3711 | "TARGET_SVE" | |
3712 | "@ | |
3713 | <sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype> | |
3714 | movprfx\t%0, %2\;<sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>" | |
3715 | [(set_attr "movprfx" "*,yes")] | |
3716 | ) | |
3717 | ||
3718 | ;; Predicated integer operations, merging with the second input. | |
3719 | (define_insn "*cond_<optab><mode>_3" | |
ab76e3db RS |
3720 | [(set (match_operand:SVE_I 0 "register_operand" "=w, ?&w") |
3721 | (unspec:SVE_I | |
915d28fe | 3722 | [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl") |
ab76e3db RS |
3723 | (SVE_INT_BINARY:SVE_I |
3724 | (match_operand:SVE_I 2 "register_operand" "w, w") | |
3725 | (match_operand:SVE_I 3 "register_operand" "0, w")) | |
915d28fe RS |
3726 | (match_dup 3)] |
3727 | UNSPEC_SEL))] | |
3728 | "TARGET_SVE" | |
3729 | "@ | |
3730 | <sve_int_op_rev>\t%0.<Vetype>, %1/m, %0.<Vetype>, %2.<Vetype> | |
3731 | movprfx\t%0, %3\;<sve_int_op_rev>\t%0.<Vetype>, %1/m, %0.<Vetype>, %2.<Vetype>" | |
3732 | [(set_attr "movprfx" "*,yes")] | |
3733 | ) | |
3734 | ||
3735 | ;; Predicated integer operations, merging with an independent value. | |
3736 | (define_insn_and_rewrite "*cond_<optab><mode>_any" | |
ab76e3db RS |
3737 | [(set (match_operand:SVE_I 0 "register_operand" "=&w, &w, &w, &w, ?&w") |
3738 | (unspec:SVE_I | |
915d28fe | 3739 | [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl, Upl, Upl") |
ab76e3db RS |
3740 | (SVE_INT_BINARY:SVE_I |
3741 | (match_operand:SVE_I 2 "register_operand" "0, w, w, w, w") | |
3742 | (match_operand:SVE_I 3 "register_operand" "w, 0, w, w, w")) | |
3743 | (match_operand:SVE_I 4 "aarch64_simd_reg_or_zero" "Dz, Dz, Dz, 0, w")] | |
915d28fe | 3744 | UNSPEC_SEL))] |
43cacb12 | 3745 | "TARGET_SVE |
915d28fe RS |
3746 | && !rtx_equal_p (operands[2], operands[4]) |
3747 | && !rtx_equal_p (operands[3], operands[4])" | |
3748 | "@ | |
3749 | movprfx\t%0.<Vetype>, %1/z, %0.<Vetype>\;<sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype> | |
3750 | movprfx\t%0.<Vetype>, %1/z, %0.<Vetype>\;<sve_int_op_rev>\t%0.<Vetype>, %1/m, %0.<Vetype>, %2.<Vetype> | |
3751 | movprfx\t%0.<Vetype>, %1/z, %2.<Vetype>\;<sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype> | |
3752 | movprfx\t%0.<Vetype>, %1/m, %2.<Vetype>\;<sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype> | |
3753 | #" | |
3754 | "&& reload_completed | |
3755 | && register_operand (operands[4], <MODE>mode) | |
3756 | && !rtx_equal_p (operands[0], operands[4])" | |
43cacb12 | 3757 | { |
915d28fe RS |
3758 | emit_insn (gen_vcond_mask_<mode><vpred> (operands[0], operands[2], |
3759 | operands[4], operands[1])); | |
3760 | operands[4] = operands[2] = operands[0]; | |
43cacb12 | 3761 | } |
915d28fe | 3762 | [(set_attr "movprfx" "yes")] |
43cacb12 RS |
3763 | ) |
3764 | ||
915d28fe RS |
3765 | ;; ------------------------------------------------------------------------- |
3766 | ;; ---- [INT] Addition | |
3767 | ;; ------------------------------------------------------------------------- | |
3768 | ;; Includes: | |
3769 | ;; - ADD | |
3770 | ;; - DECB | |
3771 | ;; - DECD | |
3772 | ;; - DECH | |
3773 | ;; - DECW | |
3774 | ;; - INCB | |
3775 | ;; - INCD | |
3776 | ;; - INCH | |
3777 | ;; - INCW | |
3778 | ;; - SUB | |
3779 | ;; ------------------------------------------------------------------------- | |
3780 | ||
43cacb12 | 3781 | (define_insn "add<mode>3" |
cc68f7c2 RS |
3782 | [(set (match_operand:SVE_I 0 "register_operand" "=w, w, w, ?w, ?w, w") |
3783 | (plus:SVE_I | |
3784 | (match_operand:SVE_I 1 "register_operand" "%0, 0, 0, w, w, w") | |
3785 | (match_operand:SVE_I 2 "aarch64_sve_add_operand" "vsa, vsn, vsi, vsa, vsn, w")))] | |
43cacb12 RS |
3786 | "TARGET_SVE" |
3787 | "@ | |
3788 | add\t%0.<Vetype>, %0.<Vetype>, #%D2 | |
3789 | sub\t%0.<Vetype>, %0.<Vetype>, #%N2 | |
0fdc30bc | 3790 | * return aarch64_output_sve_vector_inc_dec (\"%0.<Vetype>\", operands[2]); |
5e176a61 RS |
3791 | movprfx\t%0, %1\;add\t%0.<Vetype>, %0.<Vetype>, #%D2 |
3792 | movprfx\t%0, %1\;sub\t%0.<Vetype>, %0.<Vetype>, #%N2 | |
43cacb12 | 3793 | add\t%0.<Vetype>, %1.<Vetype>, %2.<Vetype>" |
5e176a61 | 3794 | [(set_attr "movprfx" "*,*,*,yes,yes,*")] |
43cacb12 RS |
3795 | ) |
3796 | ||
915d28fe RS |
3797 | ;; Merging forms are handled through SVE_INT_BINARY. |
3798 | ||
3799 | ;; ------------------------------------------------------------------------- | |
3800 | ;; ---- [INT] Subtraction | |
3801 | ;; ------------------------------------------------------------------------- | |
3802 | ;; Includes: | |
3803 | ;; - SUB | |
3804 | ;; - SUBR | |
3805 | ;; ------------------------------------------------------------------------- | |
3806 | ||
43cacb12 | 3807 | (define_insn "sub<mode>3" |
9623f61b BL |
3808 | [(set (match_operand:SVE_I 0 "register_operand" "=w, w, ?&w") |
3809 | (minus:SVE_I | |
3810 | (match_operand:SVE_I 1 "aarch64_sve_arith_operand" "w, vsa, vsa") | |
3811 | (match_operand:SVE_I 2 "register_operand" "w, 0, w")))] | |
43cacb12 RS |
3812 | "TARGET_SVE" |
3813 | "@ | |
3814 | sub\t%0.<Vetype>, %1.<Vetype>, %2.<Vetype> | |
5e176a61 RS |
3815 | subr\t%0.<Vetype>, %0.<Vetype>, #%D1 |
3816 | movprfx\t%0, %2\;subr\t%0.<Vetype>, %0.<Vetype>, #%D1" | |
3817 | [(set_attr "movprfx" "*,*,yes")] | |
43cacb12 RS |
3818 | ) |
3819 | ||
915d28fe RS |
3820 | ;; Merging forms are handled through SVE_INT_BINARY. |
3821 | ||
a229966c RS |
3822 | ;; ------------------------------------------------------------------------- |
3823 | ;; ---- [INT] Take address | |
3824 | ;; ------------------------------------------------------------------------- | |
3825 | ;; Includes: | |
3826 | ;; - ADR | |
3827 | ;; ------------------------------------------------------------------------- | |
3828 | ||
624d0f07 RS |
3829 | ;; An unshifted and unscaled ADR. This is functionally equivalent to an ADD, |
3830 | ;; but the svadrb intrinsics should preserve the user's choice. | |
3831 | (define_insn "@aarch64_adr<mode>" | |
f75cdd2c RS |
3832 | [(set (match_operand:SVE_FULL_SDI 0 "register_operand" "=w") |
3833 | (unspec:SVE_FULL_SDI | |
3834 | [(match_operand:SVE_FULL_SDI 1 "register_operand" "w") | |
3835 | (match_operand:SVE_FULL_SDI 2 "register_operand" "w")] | |
624d0f07 RS |
3836 | UNSPEC_ADR))] |
3837 | "TARGET_SVE" | |
3838 | "adr\t%0.<Vetype>, [%1.<Vetype>, %2.<Vetype>]" | |
3839 | ) | |
3840 | ||
3841 | ;; Same, but with the offset being sign-extended from the low 32 bits. | |
3842 | (define_insn_and_rewrite "*aarch64_adr_sxtw" | |
3843 | [(set (match_operand:VNx2DI 0 "register_operand" "=w") | |
3844 | (unspec:VNx2DI | |
3845 | [(match_operand:VNx2DI 1 "register_operand" "w") | |
3846 | (unspec:VNx2DI | |
3847 | [(match_operand 3) | |
3848 | (sign_extend:VNx2DI | |
3849 | (truncate:VNx2SI | |
3850 | (match_operand:VNx2DI 2 "register_operand" "w")))] | |
3851 | UNSPEC_PRED_X)] | |
3852 | UNSPEC_ADR))] | |
3853 | "TARGET_SVE" | |
3854 | "adr\t%0.d, [%1.d, %2.d, sxtw]" | |
3855 | "&& !CONSTANT_P (operands[3])" | |
3856 | { | |
3857 | operands[3] = CONSTM1_RTX (VNx2BImode); | |
3858 | } | |
3859 | ) | |
3860 | ||
3861 | ;; Same, but with the offset being zero-extended from the low 32 bits. | |
3862 | (define_insn "*aarch64_adr_uxtw_unspec" | |
3863 | [(set (match_operand:VNx2DI 0 "register_operand" "=w") | |
3864 | (unspec:VNx2DI | |
3865 | [(match_operand:VNx2DI 1 "register_operand" "w") | |
3866 | (and:VNx2DI | |
3867 | (match_operand:VNx2DI 2 "register_operand" "w") | |
3868 | (match_operand:VNx2DI 3 "aarch64_sve_uxtw_immediate"))] | |
3869 | UNSPEC_ADR))] | |
3870 | "TARGET_SVE" | |
3871 | "adr\t%0.d, [%1.d, %2.d, uxtw]" | |
3872 | ) | |
3873 | ||
3874 | ;; Same, matching as a PLUS rather than unspec. | |
3875 | (define_insn "*aarch64_adr_uxtw_and" | |
a229966c RS |
3876 | [(set (match_operand:VNx2DI 0 "register_operand" "=w") |
3877 | (plus:VNx2DI | |
3878 | (and:VNx2DI | |
3879 | (match_operand:VNx2DI 2 "register_operand" "w") | |
3880 | (match_operand:VNx2DI 3 "aarch64_sve_uxtw_immediate")) | |
3881 | (match_operand:VNx2DI 1 "register_operand" "w")))] | |
3882 | "TARGET_SVE" | |
3883 | "adr\t%0.d, [%1.d, %2.d, uxtw]" | |
3884 | ) | |
3885 | ||
3886 | ;; ADR with a nonzero shift. | |
624d0f07 | 3887 | (define_expand "@aarch64_adr<mode>_shift" |
f75cdd2c RS |
3888 | [(set (match_operand:SVE_FULL_SDI 0 "register_operand") |
3889 | (plus:SVE_FULL_SDI | |
3890 | (unspec:SVE_FULL_SDI | |
624d0f07 | 3891 | [(match_dup 4) |
f75cdd2c RS |
3892 | (ashift:SVE_FULL_SDI |
3893 | (match_operand:SVE_FULL_SDI 2 "register_operand") | |
3894 | (match_operand:SVE_FULL_SDI 3 "const_1_to_3_operand"))] | |
624d0f07 | 3895 | UNSPEC_PRED_X) |
f75cdd2c | 3896 | (match_operand:SVE_FULL_SDI 1 "register_operand")))] |
624d0f07 RS |
3897 | "TARGET_SVE" |
3898 | { | |
3899 | operands[4] = CONSTM1_RTX (<VPRED>mode); | |
3900 | } | |
3901 | ) | |
3902 | ||
a229966c | 3903 | (define_insn_and_rewrite "*aarch64_adr<mode>_shift" |
3f8b0bba RS |
3904 | [(set (match_operand:SVE_24I 0 "register_operand" "=w") |
3905 | (plus:SVE_24I | |
3906 | (unspec:SVE_24I | |
a229966c | 3907 | [(match_operand 4) |
3f8b0bba RS |
3908 | (ashift:SVE_24I |
3909 | (match_operand:SVE_24I 2 "register_operand" "w") | |
3910 | (match_operand:SVE_24I 3 "const_1_to_3_operand"))] | |
a229966c | 3911 | UNSPEC_PRED_X) |
3f8b0bba | 3912 | (match_operand:SVE_24I 1 "register_operand" "w")))] |
a229966c | 3913 | "TARGET_SVE" |
3f8b0bba | 3914 | "adr\t%0.<Vctype>, [%1.<Vctype>, %2.<Vctype>, lsl %3]" |
a229966c RS |
3915 | "&& !CONSTANT_P (operands[4])" |
3916 | { | |
3917 | operands[4] = CONSTM1_RTX (<VPRED>mode); | |
3918 | } | |
3919 | ) | |
3920 | ||
624d0f07 RS |
3921 | ;; Same, but with the index being sign-extended from the low 32 bits. |
3922 | (define_insn_and_rewrite "*aarch64_adr_shift_sxtw" | |
3923 | [(set (match_operand:VNx2DI 0 "register_operand" "=w") | |
3924 | (plus:VNx2DI | |
3925 | (unspec:VNx2DI | |
3926 | [(match_operand 4) | |
3927 | (ashift:VNx2DI | |
3928 | (unspec:VNx2DI | |
3929 | [(match_operand 5) | |
3930 | (sign_extend:VNx2DI | |
3931 | (truncate:VNx2SI | |
3932 | (match_operand:VNx2DI 2 "register_operand" "w")))] | |
3933 | UNSPEC_PRED_X) | |
3934 | (match_operand:VNx2DI 3 "const_1_to_3_operand"))] | |
3935 | UNSPEC_PRED_X) | |
3936 | (match_operand:VNx2DI 1 "register_operand" "w")))] | |
3937 | "TARGET_SVE" | |
3938 | "adr\t%0.d, [%1.d, %2.d, sxtw %3]" | |
3939 | "&& (!CONSTANT_P (operands[4]) || !CONSTANT_P (operands[5]))" | |
3940 | { | |
3941 | operands[5] = operands[4] = CONSTM1_RTX (VNx2BImode); | |
3942 | } | |
3943 | ) | |
3944 | ||
a229966c RS |
3945 | ;; Same, but with the index being zero-extended from the low 32 bits. |
3946 | (define_insn_and_rewrite "*aarch64_adr_shift_uxtw" | |
3947 | [(set (match_operand:VNx2DI 0 "register_operand" "=w") | |
3948 | (plus:VNx2DI | |
3949 | (unspec:VNx2DI | |
3950 | [(match_operand 5) | |
3951 | (ashift:VNx2DI | |
3952 | (and:VNx2DI | |
3953 | (match_operand:VNx2DI 2 "register_operand" "w") | |
3954 | (match_operand:VNx2DI 4 "aarch64_sve_uxtw_immediate")) | |
3955 | (match_operand:VNx2DI 3 "const_1_to_3_operand"))] | |
3956 | UNSPEC_PRED_X) | |
3957 | (match_operand:VNx2DI 1 "register_operand" "w")))] | |
3958 | "TARGET_SVE" | |
3959 | "adr\t%0.d, [%1.d, %2.d, uxtw %3]" | |
3960 | "&& !CONSTANT_P (operands[5])" | |
3961 | { | |
3962 | operands[5] = CONSTM1_RTX (VNx2BImode); | |
3963 | } | |
3964 | ) | |
3965 | ||
915d28fe RS |
3966 | ;; ------------------------------------------------------------------------- |
3967 | ;; ---- [INT] Absolute difference | |
3968 | ;; ------------------------------------------------------------------------- | |
3969 | ;; Includes: | |
3970 | ;; - SABD | |
3971 | ;; - UABD | |
3972 | ;; ------------------------------------------------------------------------- | |
3973 | ||
3974 | ;; Unpredicated integer absolute difference. | |
3975 | (define_expand "<su>abd<mode>_3" | |
907ea379 RS |
3976 | [(use (match_operand:SVE_I 0 "register_operand")) |
3977 | (USMAX:SVE_I | |
3978 | (match_operand:SVE_I 1 "register_operand") | |
3979 | (match_operand:SVE_I 2 "register_operand"))] | |
915d28fe RS |
3980 | "TARGET_SVE" |
3981 | { | |
3982 | rtx pred = aarch64_ptrue_reg (<VPRED>mode); | |
624d0f07 RS |
3983 | emit_insn (gen_aarch64_pred_<su>abd<mode> (operands[0], pred, operands[1], |
3984 | operands[2])); | |
915d28fe RS |
3985 | DONE; |
3986 | } | |
3987 | ) | |
3988 | ||
3989 | ;; Predicated integer absolute difference. | |
624d0f07 | 3990 | (define_insn "@aarch64_pred_<su>abd<mode>" |
907ea379 RS |
3991 | [(set (match_operand:SVE_I 0 "register_operand" "=w, ?&w") |
3992 | (minus:SVE_I | |
3993 | (unspec:SVE_I | |
3994 | [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl") | |
3995 | (USMAX:SVE_I | |
3996 | (match_operand:SVE_I 2 "register_operand" "%0, w") | |
3997 | (match_operand:SVE_I 3 "register_operand" "w, w"))] | |
3998 | UNSPEC_PRED_X) | |
3999 | (unspec:SVE_I | |
4000 | [(match_dup 1) | |
4001 | (<max_opp>:SVE_I | |
915d28fe | 4002 | (match_dup 2) |
907ea379 RS |
4003 | (match_dup 3))] |
4004 | UNSPEC_PRED_X)))] | |
915d28fe RS |
4005 | "TARGET_SVE" |
4006 | "@ | |
4007 | <su>abd\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype> | |
4008 | movprfx\t%0, %2\;<su>abd\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>" | |
4009 | [(set_attr "movprfx" "*,yes")] | |
4010 | ) | |
4011 | ||
624d0f07 | 4012 | (define_expand "@aarch64_cond_<su>abd<mode>" |
f75cdd2c RS |
4013 | [(set (match_operand:SVE_FULL_I 0 "register_operand") |
4014 | (unspec:SVE_FULL_I | |
624d0f07 | 4015 | [(match_operand:<VPRED> 1 "register_operand") |
f75cdd2c RS |
4016 | (minus:SVE_FULL_I |
4017 | (unspec:SVE_FULL_I | |
624d0f07 | 4018 | [(match_dup 1) |
f75cdd2c RS |
4019 | (USMAX:SVE_FULL_I |
4020 | (match_operand:SVE_FULL_I 2 "register_operand") | |
4021 | (match_operand:SVE_FULL_I 3 "register_operand"))] | |
624d0f07 | 4022 | UNSPEC_PRED_X) |
f75cdd2c | 4023 | (unspec:SVE_FULL_I |
624d0f07 | 4024 | [(match_dup 1) |
f75cdd2c | 4025 | (<max_opp>:SVE_FULL_I |
624d0f07 RS |
4026 | (match_dup 2) |
4027 | (match_dup 3))] | |
4028 | UNSPEC_PRED_X)) | |
f75cdd2c | 4029 | (match_operand:SVE_FULL_I 4 "aarch64_simd_reg_or_zero")] |
624d0f07 RS |
4030 | UNSPEC_SEL))] |
4031 | "TARGET_SVE" | |
4032 | { | |
4033 | if (rtx_equal_p (operands[3], operands[4])) | |
4034 | std::swap (operands[2], operands[3]); | |
4035 | }) | |
4036 | ||
9730c5cc RS |
4037 | ;; Predicated integer absolute difference, merging with the first input. |
4038 | (define_insn_and_rewrite "*aarch64_cond_<su>abd<mode>_2" | |
907ea379 RS |
4039 | [(set (match_operand:SVE_I 0 "register_operand" "=w, ?&w") |
4040 | (unspec:SVE_I | |
9730c5cc | 4041 | [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl") |
907ea379 RS |
4042 | (minus:SVE_I |
4043 | (unspec:SVE_I | |
9730c5cc | 4044 | [(match_operand 4) |
907ea379 RS |
4045 | (USMAX:SVE_I |
4046 | (match_operand:SVE_I 2 "register_operand" "0, w") | |
4047 | (match_operand:SVE_I 3 "register_operand" "w, w"))] | |
9730c5cc | 4048 | UNSPEC_PRED_X) |
907ea379 | 4049 | (unspec:SVE_I |
9730c5cc | 4050 | [(match_operand 5) |
907ea379 | 4051 | (<max_opp>:SVE_I |
9730c5cc RS |
4052 | (match_dup 2) |
4053 | (match_dup 3))] | |
4054 | UNSPEC_PRED_X)) | |
4055 | (match_dup 2)] | |
4056 | UNSPEC_SEL))] | |
4057 | "TARGET_SVE" | |
4058 | "@ | |
4059 | <su>abd\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype> | |
4060 | movprfx\t%0, %2\;<su>abd\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>" | |
4061 | "&& (!CONSTANT_P (operands[4]) || !CONSTANT_P (operands[5]))" | |
4062 | { | |
4063 | operands[4] = operands[5] = CONSTM1_RTX (<VPRED>mode); | |
4064 | } | |
4065 | [(set_attr "movprfx" "*,yes")] | |
4066 | ) | |
4067 | ||
907ea379 RS |
4068 | ;; Predicated integer absolute difference, merging with the second input. |
4069 | (define_insn_and_rewrite "*aarch64_cond_<su>abd<mode>_3" | |
4070 | [(set (match_operand:SVE_I 0 "register_operand" "=w, ?&w") | |
4071 | (unspec:SVE_I | |
4072 | [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl") | |
4073 | (minus:SVE_I | |
4074 | (unspec:SVE_I | |
4075 | [(match_operand 4) | |
4076 | (USMAX:SVE_I | |
4077 | (match_operand:SVE_I 2 "register_operand" "w, w") | |
4078 | (match_operand:SVE_I 3 "register_operand" "0, w"))] | |
4079 | UNSPEC_PRED_X) | |
4080 | (unspec:SVE_I | |
4081 | [(match_operand 5) | |
4082 | (<max_opp>:SVE_I | |
4083 | (match_dup 2) | |
4084 | (match_dup 3))] | |
4085 | UNSPEC_PRED_X)) | |
4086 | (match_dup 3)] | |
4087 | UNSPEC_SEL))] | |
4088 | "TARGET_SVE" | |
4089 | "@ | |
4090 | <su>abd\t%0.<Vetype>, %1/m, %0.<Vetype>, %2.<Vetype> | |
4091 | movprfx\t%0, %3\;<su>abd\t%0.<Vetype>, %1/m, %0.<Vetype>, %2.<Vetype>" | |
4092 | "&& (!CONSTANT_P (operands[4]) || !CONSTANT_P (operands[5]))" | |
4093 | { | |
4094 | operands[4] = operands[5] = CONSTM1_RTX (<VPRED>mode); | |
4095 | } | |
4096 | [(set_attr "movprfx" "*,yes")] | |
4097 | ) | |
4098 | ||
9730c5cc RS |
4099 | ;; Predicated integer absolute difference, merging with an independent value. |
4100 | (define_insn_and_rewrite "*aarch64_cond_<su>abd<mode>_any" | |
907ea379 RS |
4101 | [(set (match_operand:SVE_I 0 "register_operand" "=&w, &w, &w, &w, ?&w") |
4102 | (unspec:SVE_I | |
9730c5cc | 4103 | [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl, Upl, Upl") |
907ea379 RS |
4104 | (minus:SVE_I |
4105 | (unspec:SVE_I | |
9730c5cc | 4106 | [(match_operand 5) |
907ea379 RS |
4107 | (USMAX:SVE_I |
4108 | (match_operand:SVE_I 2 "register_operand" "0, w, w, w, w") | |
4109 | (match_operand:SVE_I 3 "register_operand" "w, 0, w, w, w"))] | |
9730c5cc | 4110 | UNSPEC_PRED_X) |
907ea379 | 4111 | (unspec:SVE_I |
9730c5cc | 4112 | [(match_operand 6) |
907ea379 | 4113 | (<max_opp>:SVE_I |
9730c5cc RS |
4114 | (match_dup 2) |
4115 | (match_dup 3))] | |
4116 | UNSPEC_PRED_X)) | |
907ea379 | 4117 | (match_operand:SVE_I 4 "aarch64_simd_reg_or_zero" "Dz, Dz, Dz, 0, w")] |
9730c5cc RS |
4118 | UNSPEC_SEL))] |
4119 | "TARGET_SVE | |
4120 | && !rtx_equal_p (operands[2], operands[4]) | |
4121 | && !rtx_equal_p (operands[3], operands[4])" | |
4122 | "@ | |
4123 | movprfx\t%0.<Vetype>, %1/z, %0.<Vetype>\;<su>abd\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype> | |
4124 | movprfx\t%0.<Vetype>, %1/z, %0.<Vetype>\;<su>abd\t%0.<Vetype>, %1/m, %0.<Vetype>, %2.<Vetype> | |
4125 | movprfx\t%0.<Vetype>, %1/z, %2.<Vetype>\;<su>abd\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype> | |
4126 | movprfx\t%0.<Vetype>, %1/m, %2.<Vetype>\;<su>abd\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype> | |
4127 | #" | |
4128 | "&& 1" | |
4129 | { | |
4130 | if (!CONSTANT_P (operands[5]) || !CONSTANT_P (operands[6])) | |
4131 | operands[5] = operands[6] = CONSTM1_RTX (<VPRED>mode); | |
4132 | else if (reload_completed | |
4133 | && register_operand (operands[4], <MODE>mode) | |
4134 | && !rtx_equal_p (operands[0], operands[4])) | |
4135 | { | |
4136 | emit_insn (gen_vcond_mask_<mode><vpred> (operands[0], operands[2], | |
4137 | operands[4], operands[1])); | |
4138 | operands[4] = operands[2] = operands[0]; | |
4139 | } | |
4140 | else | |
4141 | FAIL; | |
4142 | } | |
4143 | [(set_attr "movprfx" "yes")] | |
4144 | ) | |
4145 | ||
624d0f07 RS |
4146 | ;; ------------------------------------------------------------------------- |
4147 | ;; ---- [INT] Saturating addition and subtraction | |
4148 | ;; ------------------------------------------------------------------------- | |
4149 | ;; - SQADD | |
4150 | ;; - SQSUB | |
4151 | ;; - UQADD | |
4152 | ;; - UQSUB | |
4153 | ;; ------------------------------------------------------------------------- | |
4154 | ||
4155 | ;; Unpredicated saturating signed addition and subtraction. | |
694e6b19 | 4156 | (define_insn "@aarch64_sve_<optab><mode>" |
f75cdd2c RS |
4157 | [(set (match_operand:SVE_FULL_I 0 "register_operand" "=w, w, ?&w, ?&w, w") |
4158 | (SBINQOPS:SVE_FULL_I | |
4159 | (match_operand:SVE_FULL_I 1 "register_operand" "0, 0, w, w, w") | |
4160 | (match_operand:SVE_FULL_I 2 "aarch64_sve_sqadd_operand" "vsQ, vsS, vsQ, vsS, w")))] | |
624d0f07 RS |
4161 | "TARGET_SVE" |
4162 | "@ | |
4163 | <binqops_op>\t%0.<Vetype>, %0.<Vetype>, #%D2 | |
4164 | <binqops_op_rev>\t%0.<Vetype>, %0.<Vetype>, #%N2 | |
4165 | movprfx\t%0, %1\;<binqops_op>\t%0.<Vetype>, %0.<Vetype>, #%D2 | |
4166 | movprfx\t%0, %1\;<binqops_op_rev>\t%0.<Vetype>, %0.<Vetype>, #%N2 | |
4167 | <binqops_op>\t%0.<Vetype>, %1.<Vetype>, %2.<Vetype>" | |
4168 | [(set_attr "movprfx" "*,*,yes,yes,*")] | |
4169 | ) | |
4170 | ||
4171 | ;; Unpredicated saturating unsigned addition and subtraction. | |
694e6b19 | 4172 | (define_insn "@aarch64_sve_<optab><mode>" |
f75cdd2c RS |
4173 | [(set (match_operand:SVE_FULL_I 0 "register_operand" "=w, ?&w, w") |
4174 | (UBINQOPS:SVE_FULL_I | |
4175 | (match_operand:SVE_FULL_I 1 "register_operand" "0, w, w") | |
4176 | (match_operand:SVE_FULL_I 2 "aarch64_sve_arith_operand" "vsa, vsa, w")))] | |
624d0f07 RS |
4177 | "TARGET_SVE" |
4178 | "@ | |
4179 | <binqops_op>\t%0.<Vetype>, %0.<Vetype>, #%D2 | |
4180 | movprfx\t%0, %1\;<binqops_op>\t%0.<Vetype>, %0.<Vetype>, #%D2 | |
4181 | <binqops_op>\t%0.<Vetype>, %1.<Vetype>, %2.<Vetype>" | |
4182 | [(set_attr "movprfx" "*,yes,*")] | |
4183 | ) | |
4184 | ||
915d28fe RS |
4185 | ;; ------------------------------------------------------------------------- |
4186 | ;; ---- [INT] Highpart multiplication | |
4187 | ;; ------------------------------------------------------------------------- | |
4188 | ;; Includes: | |
4189 | ;; - SMULH | |
4190 | ;; - UMULH | |
4191 | ;; ------------------------------------------------------------------------- | |
43cacb12 | 4192 | |
11e9443f RS |
4193 | ;; Unpredicated highpart multiplication. |
4194 | (define_expand "<su>mul<mode>3_highpart" | |
7446de5a RS |
4195 | [(set (match_operand:SVE_I 0 "register_operand") |
4196 | (unspec:SVE_I | |
11e9443f | 4197 | [(match_dup 3) |
7446de5a RS |
4198 | (unspec:SVE_I |
4199 | [(match_operand:SVE_I 1 "register_operand") | |
4200 | (match_operand:SVE_I 2 "register_operand")] | |
f75cdd2c | 4201 | MUL_HIGHPART)] |
06308276 | 4202 | UNSPEC_PRED_X))] |
11e9443f RS |
4203 | "TARGET_SVE" |
4204 | { | |
16de3637 | 4205 | operands[3] = aarch64_ptrue_reg (<VPRED>mode); |
11e9443f RS |
4206 | } |
4207 | ) | |
4208 | ||
4209 | ;; Predicated highpart multiplication. | |
624d0f07 | 4210 | (define_insn "@aarch64_pred_<optab><mode>" |
7446de5a RS |
4211 | [(set (match_operand:SVE_I 0 "register_operand" "=w, ?&w") |
4212 | (unspec:SVE_I | |
a08acce8 | 4213 | [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl") |
7446de5a RS |
4214 | (unspec:SVE_I |
4215 | [(match_operand:SVE_I 2 "register_operand" "%0, w") | |
4216 | (match_operand:SVE_I 3 "register_operand" "w, w")] | |
f75cdd2c | 4217 | MUL_HIGHPART)] |
06308276 | 4218 | UNSPEC_PRED_X))] |
11e9443f | 4219 | "TARGET_SVE" |
a08acce8 RH |
4220 | "@ |
4221 | <su>mulh\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype> | |
4222 | movprfx\t%0, %2\;<su>mulh\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>" | |
4223 | [(set_attr "movprfx" "*,yes")] | |
11e9443f RS |
4224 | ) |
4225 | ||
624d0f07 RS |
4226 | ;; Predicated highpart multiplications with merging. |
4227 | (define_expand "@cond_<optab><mode>" | |
f75cdd2c RS |
4228 | [(set (match_operand:SVE_FULL_I 0 "register_operand") |
4229 | (unspec:SVE_FULL_I | |
624d0f07 | 4230 | [(match_operand:<VPRED> 1 "register_operand") |
f75cdd2c RS |
4231 | (unspec:SVE_FULL_I |
4232 | [(match_operand:SVE_FULL_I 2 "register_operand") | |
4233 | (match_operand:SVE_FULL_I 3 "register_operand")] | |
624d0f07 | 4234 | MUL_HIGHPART) |
f75cdd2c | 4235 | (match_operand:SVE_FULL_I 4 "aarch64_simd_reg_or_zero")] |
624d0f07 RS |
4236 | UNSPEC_SEL))] |
4237 | "TARGET_SVE" | |
4238 | { | |
4239 | /* Only target code is aware of these operations, so we don't need | |
4240 | to handle the fully-general case. */ | |
4241 | gcc_assert (rtx_equal_p (operands[2], operands[4]) | |
4242 | || CONSTANT_P (operands[4])); | |
4243 | }) | |
4244 | ||
4245 | ;; Predicated highpart multiplications, merging with the first input. | |
4246 | (define_insn "*cond_<optab><mode>_2" | |
f75cdd2c RS |
4247 | [(set (match_operand:SVE_FULL_I 0 "register_operand" "=w, ?&w") |
4248 | (unspec:SVE_FULL_I | |
624d0f07 | 4249 | [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl") |
f75cdd2c RS |
4250 | (unspec:SVE_FULL_I |
4251 | [(match_operand:SVE_FULL_I 2 "register_operand" "0, w") | |
4252 | (match_operand:SVE_FULL_I 3 "register_operand" "w, w")] | |
624d0f07 RS |
4253 | MUL_HIGHPART) |
4254 | (match_dup 2)] | |
4255 | UNSPEC_SEL))] | |
4256 | "TARGET_SVE" | |
4257 | "@ | |
4258 | <sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype> | |
4259 | movprfx\t%0, %2\;<sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>" | |
4260 | [(set_attr "movprfx" "*,yes")]) | |
4261 | ||
4262 | ;; Predicated highpart multiplications, merging with zero. | |
4263 | (define_insn "*cond_<optab><mode>_z" | |
f75cdd2c RS |
4264 | [(set (match_operand:SVE_FULL_I 0 "register_operand" "=&w, &w") |
4265 | (unspec:SVE_FULL_I | |
624d0f07 | 4266 | [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl") |
f75cdd2c RS |
4267 | (unspec:SVE_FULL_I |
4268 | [(match_operand:SVE_FULL_I 2 "register_operand" "%0, w") | |
4269 | (match_operand:SVE_FULL_I 3 "register_operand" "w, w")] | |
624d0f07 | 4270 | MUL_HIGHPART) |
f75cdd2c | 4271 | (match_operand:SVE_FULL_I 4 "aarch64_simd_imm_zero")] |
624d0f07 RS |
4272 | UNSPEC_SEL))] |
4273 | "TARGET_SVE" | |
4274 | "@ | |
4275 | movprfx\t%0.<Vetype>, %1/z, %0.<Vetype>\;<sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype> | |
4276 | movprfx\t%0.<Vetype>, %1/z, %2.<Vetype>\;<sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>" | |
4277 | [(set_attr "movprfx" "yes")]) | |
4278 | ||
915d28fe RS |
4279 | ;; ------------------------------------------------------------------------- |
4280 | ;; ---- [INT] Division | |
4281 | ;; ------------------------------------------------------------------------- | |
4282 | ;; Includes: | |
4283 | ;; - SDIV | |
4284 | ;; - SDIVR | |
4285 | ;; - UDIV | |
4286 | ;; - UDIVR | |
4287 | ;; ------------------------------------------------------------------------- | |
4288 | ||
4289 | ;; Unpredicated integer division. | |
c38f7319 | 4290 | (define_expand "<optab><mode>3" |
f75cdd2c RS |
4291 | [(set (match_operand:SVE_FULL_SDI 0 "register_operand") |
4292 | (unspec:SVE_FULL_SDI | |
c38f7319 | 4293 | [(match_dup 3) |
f75cdd2c RS |
4294 | (SVE_INT_BINARY_SD:SVE_FULL_SDI |
4295 | (match_operand:SVE_FULL_SDI 1 "register_operand") | |
4296 | (match_operand:SVE_FULL_SDI 2 "register_operand"))] | |
06308276 | 4297 | UNSPEC_PRED_X))] |
c38f7319 RS |
4298 | "TARGET_SVE" |
4299 | { | |
16de3637 | 4300 | operands[3] = aarch64_ptrue_reg (<VPRED>mode); |
c38f7319 RS |
4301 | } |
4302 | ) | |
4303 | ||
915d28fe | 4304 | ;; Integer division predicated with a PTRUE. |
624d0f07 | 4305 | (define_insn "@aarch64_pred_<optab><mode>" |
f75cdd2c RS |
4306 | [(set (match_operand:SVE_FULL_SDI 0 "register_operand" "=w, w, ?&w") |
4307 | (unspec:SVE_FULL_SDI | |
a08acce8 | 4308 | [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl") |
f75cdd2c RS |
4309 | (SVE_INT_BINARY_SD:SVE_FULL_SDI |
4310 | (match_operand:SVE_FULL_SDI 2 "register_operand" "0, w, w") | |
4311 | (match_operand:SVE_FULL_SDI 3 "register_operand" "w, 0, w"))] | |
06308276 | 4312 | UNSPEC_PRED_X))] |
c38f7319 RS |
4313 | "TARGET_SVE" |
4314 | "@ | |
4315 | <sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype> | |
a08acce8 RH |
4316 | <sve_int_op>r\t%0.<Vetype>, %1/m, %0.<Vetype>, %2.<Vetype> |
4317 | movprfx\t%0, %2\;<sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>" | |
4318 | [(set_attr "movprfx" "*,*,yes")] | |
c38f7319 RS |
4319 | ) |
4320 | ||
915d28fe | 4321 | ;; Predicated integer division with merging. |
624d0f07 | 4322 | (define_expand "@cond_<optab><mode>" |
f75cdd2c RS |
4323 | [(set (match_operand:SVE_FULL_SDI 0 "register_operand") |
4324 | (unspec:SVE_FULL_SDI | |
915d28fe | 4325 | [(match_operand:<VPRED> 1 "register_operand") |
f75cdd2c RS |
4326 | (SVE_INT_BINARY_SD:SVE_FULL_SDI |
4327 | (match_operand:SVE_FULL_SDI 2 "register_operand") | |
4328 | (match_operand:SVE_FULL_SDI 3 "register_operand")) | |
4329 | (match_operand:SVE_FULL_SDI 4 "aarch64_simd_reg_or_zero")] | |
915d28fe | 4330 | UNSPEC_SEL))] |
43cacb12 | 4331 | "TARGET_SVE" |
43cacb12 RS |
4332 | ) |
4333 | ||
915d28fe RS |
4334 | ;; Predicated integer division, merging with the first input. |
4335 | (define_insn "*cond_<optab><mode>_2" | |
f75cdd2c RS |
4336 | [(set (match_operand:SVE_FULL_SDI 0 "register_operand" "=w, ?&w") |
4337 | (unspec:SVE_FULL_SDI | |
915d28fe | 4338 | [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl") |
f75cdd2c RS |
4339 | (SVE_INT_BINARY_SD:SVE_FULL_SDI |
4340 | (match_operand:SVE_FULL_SDI 2 "register_operand" "0, w") | |
4341 | (match_operand:SVE_FULL_SDI 3 "register_operand" "w, w")) | |
915d28fe RS |
4342 | (match_dup 2)] |
4343 | UNSPEC_SEL))] | |
43cacb12 | 4344 | "TARGET_SVE" |
915d28fe RS |
4345 | "@ |
4346 | <sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype> | |
4347 | movprfx\t%0, %2\;<sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>" | |
4348 | [(set_attr "movprfx" "*,yes")] | |
4349 | ) | |
4350 | ||
4351 | ;; Predicated integer division, merging with the second input. | |
4352 | (define_insn "*cond_<optab><mode>_3" | |
f75cdd2c RS |
4353 | [(set (match_operand:SVE_FULL_SDI 0 "register_operand" "=w, ?&w") |
4354 | (unspec:SVE_FULL_SDI | |
915d28fe | 4355 | [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl") |
f75cdd2c RS |
4356 | (SVE_INT_BINARY_SD:SVE_FULL_SDI |
4357 | (match_operand:SVE_FULL_SDI 2 "register_operand" "w, w") | |
4358 | (match_operand:SVE_FULL_SDI 3 "register_operand" "0, w")) | |
915d28fe RS |
4359 | (match_dup 3)] |
4360 | UNSPEC_SEL))] | |
4361 | "TARGET_SVE" | |
4362 | "@ | |
4363 | <sve_int_op_rev>\t%0.<Vetype>, %1/m, %0.<Vetype>, %2.<Vetype> | |
4364 | movprfx\t%0, %3\;<sve_int_op_rev>\t%0.<Vetype>, %1/m, %0.<Vetype>, %2.<Vetype>" | |
4365 | [(set_attr "movprfx" "*,yes")] | |
4366 | ) | |
4367 | ||
4368 | ;; Predicated integer division, merging with an independent value. | |
4369 | (define_insn_and_rewrite "*cond_<optab><mode>_any" | |
f75cdd2c RS |
4370 | [(set (match_operand:SVE_FULL_SDI 0 "register_operand" "=&w, &w, &w, &w, ?&w") |
4371 | (unspec:SVE_FULL_SDI | |
915d28fe | 4372 | [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl, Upl, Upl") |
f75cdd2c RS |
4373 | (SVE_INT_BINARY_SD:SVE_FULL_SDI |
4374 | (match_operand:SVE_FULL_SDI 2 "register_operand" "0, w, w, w, w") | |
4375 | (match_operand:SVE_FULL_SDI 3 "register_operand" "w, 0, w, w, w")) | |
4376 | (match_operand:SVE_FULL_SDI 4 "aarch64_simd_reg_or_zero" "Dz, Dz, Dz, 0, w")] | |
915d28fe RS |
4377 | UNSPEC_SEL))] |
4378 | "TARGET_SVE | |
4379 | && !rtx_equal_p (operands[2], operands[4]) | |
4380 | && !rtx_equal_p (operands[3], operands[4])" | |
4381 | "@ | |
4382 | movprfx\t%0.<Vetype>, %1/z, %0.<Vetype>\;<sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype> | |
4383 | movprfx\t%0.<Vetype>, %1/z, %0.<Vetype>\;<sve_int_op_rev>\t%0.<Vetype>, %1/m, %0.<Vetype>, %2.<Vetype> | |
4384 | movprfx\t%0.<Vetype>, %1/z, %2.<Vetype>\;<sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype> | |
4385 | movprfx\t%0.<Vetype>, %1/m, %2.<Vetype>\;<sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype> | |
4386 | #" | |
4387 | "&& reload_completed | |
4388 | && register_operand (operands[4], <MODE>mode) | |
4389 | && !rtx_equal_p (operands[0], operands[4])" | |
4390 | { | |
4391 | emit_insn (gen_vcond_mask_<mode><vpred> (operands[0], operands[2], | |
4392 | operands[4], operands[1])); | |
4393 | operands[4] = operands[2] = operands[0]; | |
4394 | } | |
4395 | [(set_attr "movprfx" "yes")] | |
43cacb12 RS |
4396 | ) |
4397 | ||
915d28fe RS |
4398 | ;; ------------------------------------------------------------------------- |
4399 | ;; ---- [INT] Binary logical operations | |
4400 | ;; ------------------------------------------------------------------------- | |
4401 | ;; Includes: | |
4402 | ;; - AND | |
4403 | ;; - EOR | |
4404 | ;; - ORR | |
4405 | ;; ------------------------------------------------------------------------- | |
4406 | ||
4407 | ;; Unpredicated integer binary logical operations. | |
43cacb12 | 4408 | (define_insn "<optab><mode>3" |
6802b5ba JR |
4409 | [(set (match_operand:SVE_I 0 "register_operand" "=w, ?w, w") |
4410 | (LOGICAL:SVE_I | |
4411 | (match_operand:SVE_I 1 "register_operand" "%0, w, w") | |
4412 | (match_operand:SVE_I 2 "aarch64_sve_logical_operand" "vsl, vsl, w")))] | |
43cacb12 RS |
4413 | "TARGET_SVE" |
4414 | "@ | |
4415 | <logical>\t%0.<Vetype>, %0.<Vetype>, #%C2 | |
5e176a61 | 4416 | movprfx\t%0, %1\;<logical>\t%0.<Vetype>, %0.<Vetype>, #%C2 |
43cacb12 | 4417 | <logical>\t%0.d, %1.d, %2.d" |
5e176a61 | 4418 | [(set_attr "movprfx" "*,yes,*")] |
43cacb12 RS |
4419 | ) |
4420 | ||
915d28fe RS |
4421 | ;; Merging forms are handled through SVE_INT_BINARY. |
4422 | ||
4423 | ;; ------------------------------------------------------------------------- | |
4424 | ;; ---- [INT] Binary logical operations (inverted second input) | |
4425 | ;; ------------------------------------------------------------------------- | |
4426 | ;; Includes: | |
4427 | ;; - BIC | |
4428 | ;; ------------------------------------------------------------------------- | |
43cacb12 | 4429 | |
624d0f07 RS |
4430 | ;; Unpredicated BIC. |
4431 | (define_expand "@aarch64_bic<mode>" | |
bb3ab62a JR |
4432 | [(set (match_operand:SVE_I 0 "register_operand") |
4433 | (and:SVE_I | |
4434 | (unspec:SVE_I | |
624d0f07 | 4435 | [(match_dup 3) |
bb3ab62a | 4436 | (not:SVE_I (match_operand:SVE_I 2 "register_operand"))] |
624d0f07 | 4437 | UNSPEC_PRED_X) |
bb3ab62a | 4438 | (match_operand:SVE_I 1 "register_operand")))] |
624d0f07 RS |
4439 | "TARGET_SVE" |
4440 | { | |
4441 | operands[3] = CONSTM1_RTX (<VPRED>mode); | |
4442 | } | |
4443 | ) | |
4444 | ||
4445 | ;; Predicated BIC. | |
35d6c591 | 4446 | (define_insn_and_rewrite "*bic<mode>3" |
bb3ab62a JR |
4447 | [(set (match_operand:SVE_I 0 "register_operand" "=w") |
4448 | (and:SVE_I | |
4449 | (unspec:SVE_I | |
35d6c591 | 4450 | [(match_operand 3) |
bb3ab62a JR |
4451 | (not:SVE_I |
4452 | (match_operand:SVE_I 2 "register_operand" "w"))] | |
06308276 | 4453 | UNSPEC_PRED_X) |
bb3ab62a | 4454 | (match_operand:SVE_I 1 "register_operand" "w")))] |
35d6c591 RS |
4455 | "TARGET_SVE" |
4456 | "bic\t%0.d, %1.d, %2.d" | |
4457 | "&& !CONSTANT_P (operands[3])" | |
4458 | { | |
4459 | operands[3] = CONSTM1_RTX (<VPRED>mode); | |
4460 | } | |
43cacb12 RS |
4461 | ) |
4462 | ||
624d0f07 RS |
4463 | ;; Predicated BIC with merging. |
4464 | (define_expand "@cond_bic<mode>" | |
f75cdd2c RS |
4465 | [(set (match_operand:SVE_FULL_I 0 "register_operand") |
4466 | (unspec:SVE_FULL_I | |
624d0f07 | 4467 | [(match_operand:<VPRED> 1 "register_operand") |
f75cdd2c RS |
4468 | (and:SVE_FULL_I |
4469 | (not:SVE_FULL_I (match_operand:SVE_FULL_I 3 "register_operand")) | |
4470 | (match_operand:SVE_FULL_I 2 "register_operand")) | |
4471 | (match_operand:SVE_FULL_I 4 "aarch64_simd_reg_or_zero")] | |
624d0f07 RS |
4472 | UNSPEC_SEL))] |
4473 | "TARGET_SVE" | |
4474 | ) | |
4475 | ||
1b187f36 RS |
4476 | ;; Predicated integer BIC, merging with the first input. |
4477 | (define_insn "*cond_bic<mode>_2" | |
37426e0f RS |
4478 | [(set (match_operand:SVE_I 0 "register_operand" "=w, ?&w") |
4479 | (unspec:SVE_I | |
1b187f36 | 4480 | [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl") |
37426e0f RS |
4481 | (and:SVE_I |
4482 | (not:SVE_I | |
4483 | (match_operand:SVE_I 3 "register_operand" "w, w")) | |
4484 | (match_operand:SVE_I 2 "register_operand" "0, w")) | |
1b187f36 RS |
4485 | (match_dup 2)] |
4486 | UNSPEC_SEL))] | |
4487 | "TARGET_SVE" | |
4488 | "@ | |
4489 | bic\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype> | |
4490 | movprfx\t%0, %2\;bic\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>" | |
4491 | [(set_attr "movprfx" "*,yes")] | |
4492 | ) | |
4493 | ||
4494 | ;; Predicated integer BIC, merging with an independent value. | |
4495 | (define_insn_and_rewrite "*cond_bic<mode>_any" | |
37426e0f RS |
4496 | [(set (match_operand:SVE_I 0 "register_operand" "=&w, &w, &w, ?&w") |
4497 | (unspec:SVE_I | |
1b187f36 | 4498 | [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl, Upl") |
37426e0f RS |
4499 | (and:SVE_I |
4500 | (not:SVE_I | |
4501 | (match_operand:SVE_I 3 "register_operand" "w, w, w, w")) | |
4502 | (match_operand:SVE_I 2 "register_operand" "0, w, w, w")) | |
4503 | (match_operand:SVE_I 4 "aarch64_simd_reg_or_zero" "Dz, Dz, 0, w")] | |
1b187f36 RS |
4504 | UNSPEC_SEL))] |
4505 | "TARGET_SVE && !rtx_equal_p (operands[2], operands[4])" | |
4506 | "@ | |
4507 | movprfx\t%0.<Vetype>, %1/z, %0.<Vetype>\;bic\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype> | |
4508 | movprfx\t%0.<Vetype>, %1/z, %2.<Vetype>\;bic\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype> | |
4509 | movprfx\t%0.<Vetype>, %1/m, %2.<Vetype>\;bic\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype> | |
4510 | #" | |
4511 | "&& reload_completed | |
4512 | && register_operand (operands[4], <MODE>mode) | |
4513 | && !rtx_equal_p (operands[0], operands[4])" | |
4514 | { | |
4515 | emit_insn (gen_vcond_mask_<mode><vpred> (operands[0], operands[2], | |
4516 | operands[4], operands[1])); | |
4517 | operands[4] = operands[2] = operands[0]; | |
4518 | } | |
4519 | [(set_attr "movprfx" "yes")] | |
4520 | ) | |
4521 | ||
915d28fe | 4522 | ;; ------------------------------------------------------------------------- |
624d0f07 | 4523 | ;; ---- [INT] Shifts (rounding towards -Inf) |
915d28fe RS |
4524 | ;; ------------------------------------------------------------------------- |
4525 | ;; Includes: | |
4526 | ;; - ASR | |
624d0f07 | 4527 | ;; - ASRR |
915d28fe | 4528 | ;; - LSL |
624d0f07 | 4529 | ;; - LSLR |
915d28fe | 4530 | ;; - LSR |
624d0f07 | 4531 | ;; - LSRR |
915d28fe | 4532 | ;; ------------------------------------------------------------------------- |
43cacb12 | 4533 | |
915d28fe RS |
4534 | ;; Unpredicated shift by a scalar, which expands into one of the vector |
4535 | ;; shifts below. | |
4536 | (define_expand "<ASHIFT:optab><mode>3" | |
b81fbfe1 RS |
4537 | [(set (match_operand:SVE_I 0 "register_operand") |
4538 | (ASHIFT:SVE_I | |
4539 | (match_operand:SVE_I 1 "register_operand") | |
f75cdd2c | 4540 | (match_operand:<VEL> 2 "general_operand")))] |
43cacb12 RS |
4541 | "TARGET_SVE" |
4542 | { | |
915d28fe RS |
4543 | rtx amount; |
4544 | if (CONST_INT_P (operands[2])) | |
4545 | { | |
4546 | amount = gen_const_vec_duplicate (<MODE>mode, operands[2]); | |
4547 | if (!aarch64_sve_<lr>shift_operand (operands[2], <MODE>mode)) | |
4548 | amount = force_reg (<MODE>mode, amount); | |
4549 | } | |
4550 | else | |
4551 | { | |
6070e39c AV |
4552 | amount = convert_to_mode (<VEL>mode, operands[2], 0); |
4553 | amount = expand_vector_broadcast (<MODE>mode, amount); | |
915d28fe RS |
4554 | } |
4555 | emit_insn (gen_v<optab><mode>3 (operands[0], operands[1], amount)); | |
4556 | DONE; | |
43cacb12 RS |
4557 | } |
4558 | ) | |
4559 | ||
915d28fe | 4560 | ;; Unpredicated shift by a vector. |
43cacb12 | 4561 | (define_expand "v<optab><mode>3" |
b81fbfe1 RS |
4562 | [(set (match_operand:SVE_I 0 "register_operand") |
4563 | (unspec:SVE_I | |
43cacb12 | 4564 | [(match_dup 3) |
b81fbfe1 RS |
4565 | (ASHIFT:SVE_I |
4566 | (match_operand:SVE_I 1 "register_operand") | |
4567 | (match_operand:SVE_I 2 "aarch64_sve_<lr>shift_operand"))] | |
06308276 | 4568 | UNSPEC_PRED_X))] |
43cacb12 RS |
4569 | "TARGET_SVE" |
4570 | { | |
16de3637 | 4571 | operands[3] = aarch64_ptrue_reg (<VPRED>mode); |
43cacb12 RS |
4572 | } |
4573 | ) | |
4574 | ||
915d28fe RS |
4575 | ;; Shift by a vector, predicated with a PTRUE. We don't actually need |
4576 | ;; the predicate for the first alternative, but using Upa or X isn't | |
4577 | ;; likely to gain much and would make the instruction seem less uniform | |
4578 | ;; to the register allocator. | |
624d0f07 | 4579 | (define_insn_and_split "@aarch64_pred_<optab><mode>" |
b81fbfe1 RS |
4580 | [(set (match_operand:SVE_I 0 "register_operand" "=w, w, w, ?&w") |
4581 | (unspec:SVE_I | |
7d1f2401 | 4582 | [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl, Upl") |
b81fbfe1 RS |
4583 | (ASHIFT:SVE_I |
4584 | (match_operand:SVE_I 2 "register_operand" "w, 0, w, w") | |
4585 | (match_operand:SVE_I 3 "aarch64_sve_<lr>shift_operand" "D<lr>, w, 0, w"))] | |
06308276 | 4586 | UNSPEC_PRED_X))] |
43cacb12 RS |
4587 | "TARGET_SVE" |
4588 | "@ | |
26004f51 | 4589 | # |
a08acce8 | 4590 | <shift>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype> |
7d1f2401 | 4591 | <shift>r\t%0.<Vetype>, %1/m, %3.<Vetype>, %2.<Vetype> |
a08acce8 | 4592 | movprfx\t%0, %2\;<shift>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>" |
26004f51 RS |
4593 | "&& reload_completed |
4594 | && !register_operand (operands[3], <MODE>mode)" | |
b81fbfe1 | 4595 | [(set (match_dup 0) (ASHIFT:SVE_I (match_dup 2) (match_dup 3)))] |
26004f51 | 4596 | "" |
7d1f2401 | 4597 | [(set_attr "movprfx" "*,*,*,yes")] |
43cacb12 RS |
4598 | ) |
4599 | ||
26004f51 RS |
4600 | ;; Unpredicated shift operations by a constant (post-RA only). |
4601 | ;; These are generated by splitting a predicated instruction whose | |
4602 | ;; predicate is unused. | |
4603 | (define_insn "*post_ra_v<optab><mode>3" | |
b81fbfe1 RS |
4604 | [(set (match_operand:SVE_I 0 "register_operand" "=w") |
4605 | (ASHIFT:SVE_I | |
4606 | (match_operand:SVE_I 1 "register_operand" "w") | |
4607 | (match_operand:SVE_I 2 "aarch64_simd_<lr>shift_imm")))] | |
26004f51 RS |
4608 | "TARGET_SVE && reload_completed" |
4609 | "<shift>\t%0.<Vetype>, %1.<Vetype>, #%2" | |
4610 | ) | |
4611 | ||
20103c0e RS |
4612 | ;; Predicated integer shift, merging with the first input. |
4613 | (define_insn "*cond_<optab><mode>_2_const" | |
ab76e3db RS |
4614 | [(set (match_operand:SVE_I 0 "register_operand" "=w, ?&w") |
4615 | (unspec:SVE_I | |
20103c0e | 4616 | [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl") |
ab76e3db RS |
4617 | (ASHIFT:SVE_I |
4618 | (match_operand:SVE_I 2 "register_operand" "0, w") | |
4619 | (match_operand:SVE_I 3 "aarch64_simd_<lr>shift_imm")) | |
20103c0e RS |
4620 | (match_dup 2)] |
4621 | UNSPEC_SEL))] | |
4622 | "TARGET_SVE" | |
4623 | "@ | |
4624 | <shift>\t%0.<Vetype>, %1/m, %0.<Vetype>, #%3 | |
4625 | movprfx\t%0, %2\;<shift>\t%0.<Vetype>, %1/m, %0.<Vetype>, #%3" | |
4626 | [(set_attr "movprfx" "*,yes")] | |
4627 | ) | |
4628 | ||
4629 | ;; Predicated integer shift, merging with an independent value. | |
4630 | (define_insn_and_rewrite "*cond_<optab><mode>_any_const" | |
ab76e3db RS |
4631 | [(set (match_operand:SVE_I 0 "register_operand" "=w, &w, ?&w") |
4632 | (unspec:SVE_I | |
20103c0e | 4633 | [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl") |
ab76e3db RS |
4634 | (ASHIFT:SVE_I |
4635 | (match_operand:SVE_I 2 "register_operand" "w, w, w") | |
4636 | (match_operand:SVE_I 3 "aarch64_simd_<lr>shift_imm")) | |
4637 | (match_operand:SVE_I 4 "aarch64_simd_reg_or_zero" "Dz, 0, w")] | |
20103c0e RS |
4638 | UNSPEC_SEL))] |
4639 | "TARGET_SVE && !rtx_equal_p (operands[2], operands[4])" | |
4640 | "@ | |
4641 | movprfx\t%0.<Vetype>, %1/z, %2.<Vetype>\;<shift>\t%0.<Vetype>, %1/m, %0.<Vetype>, #%3 | |
4642 | movprfx\t%0.<Vetype>, %1/m, %2.<Vetype>\;<shift>\t%0.<Vetype>, %1/m, %0.<Vetype>, #%3 | |
4643 | #" | |
4644 | "&& reload_completed | |
4645 | && register_operand (operands[4], <MODE>mode) | |
4646 | && !rtx_equal_p (operands[0], operands[4])" | |
4647 | { | |
4648 | emit_insn (gen_vcond_mask_<mode><vpred> (operands[0], operands[2], | |
4649 | operands[4], operands[1])); | |
4650 | operands[4] = operands[2] = operands[0]; | |
4651 | } | |
4652 | [(set_attr "movprfx" "yes")] | |
4653 | ) | |
4654 | ||
624d0f07 RS |
4655 | ;; Unpredicated shifts of narrow elements by 64-bit amounts. |
4656 | (define_insn "@aarch64_sve_<sve_int_op><mode>" | |
f75cdd2c RS |
4657 | [(set (match_operand:SVE_FULL_BHSI 0 "register_operand" "=w") |
4658 | (unspec:SVE_FULL_BHSI | |
4659 | [(match_operand:SVE_FULL_BHSI 1 "register_operand" "w") | |
624d0f07 RS |
4660 | (match_operand:VNx2DI 2 "register_operand" "w")] |
4661 | SVE_SHIFT_WIDE))] | |
4662 | "TARGET_SVE" | |
4663 | "<sve_int_op>\t%0.<Vetype>, %1.<Vetype>, %2.d" | |
4664 | ) | |
c0c2f013 | 4665 | |
624d0f07 RS |
4666 | ;; Merging predicated shifts of narrow elements by 64-bit amounts. |
4667 | (define_expand "@cond_<sve_int_op><mode>" | |
f75cdd2c RS |
4668 | [(set (match_operand:SVE_FULL_BHSI 0 "register_operand") |
4669 | (unspec:SVE_FULL_BHSI | |
624d0f07 | 4670 | [(match_operand:<VPRED> 1 "register_operand") |
f75cdd2c RS |
4671 | (unspec:SVE_FULL_BHSI |
4672 | [(match_operand:SVE_FULL_BHSI 2 "register_operand") | |
624d0f07 RS |
4673 | (match_operand:VNx2DI 3 "register_operand")] |
4674 | SVE_SHIFT_WIDE) | |
f75cdd2c | 4675 | (match_operand:SVE_FULL_BHSI 4 "aarch64_simd_reg_or_zero")] |
624d0f07 | 4676 | UNSPEC_SEL))] |
c0c2f013 | 4677 | "TARGET_SVE" |
c0c2f013 YW |
4678 | ) |
4679 | ||
624d0f07 RS |
4680 | ;; Predicated shifts of narrow elements by 64-bit amounts, merging with |
4681 | ;; the first input. | |
4682 | (define_insn "*cond_<sve_int_op><mode>_m" | |
f75cdd2c RS |
4683 | [(set (match_operand:SVE_FULL_BHSI 0 "register_operand" "=w, ?&w") |
4684 | (unspec:SVE_FULL_BHSI | |
624d0f07 | 4685 | [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl") |
f75cdd2c RS |
4686 | (unspec:SVE_FULL_BHSI |
4687 | [(match_operand:SVE_FULL_BHSI 2 "register_operand" "0, w") | |
624d0f07 RS |
4688 | (match_operand:VNx2DI 3 "register_operand" "w, w")] |
4689 | SVE_SHIFT_WIDE) | |
4690 | (match_dup 2)] | |
4691 | UNSPEC_SEL))] | |
4692 | "TARGET_SVE" | |
4693 | "@ | |
4694 | <sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.d | |
4695 | movprfx\t%0, %2\;<sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.d" | |
4696 | [(set_attr "movprfx" "*, yes")]) | |
4697 | ||
4698 | ;; Predicated shifts of narrow elements by 64-bit amounts, merging with zero. | |
4699 | (define_insn "*cond_<sve_int_op><mode>_z" | |
f75cdd2c RS |
4700 | [(set (match_operand:SVE_FULL_BHSI 0 "register_operand" "=&w, &w") |
4701 | (unspec:SVE_FULL_BHSI | |
624d0f07 | 4702 | [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl") |
f75cdd2c RS |
4703 | (unspec:SVE_FULL_BHSI |
4704 | [(match_operand:SVE_FULL_BHSI 2 "register_operand" "0, w") | |
624d0f07 RS |
4705 | (match_operand:VNx2DI 3 "register_operand" "w, w")] |
4706 | SVE_SHIFT_WIDE) | |
f75cdd2c | 4707 | (match_operand:SVE_FULL_BHSI 4 "aarch64_simd_imm_zero")] |
624d0f07 RS |
4708 | UNSPEC_SEL))] |
4709 | "TARGET_SVE" | |
4710 | "@ | |
4711 | movprfx\t%0.<Vetype>, %1/z, %0.<Vetype>\;<sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.d | |
4712 | movprfx\t%0.<Vetype>, %1/z, %2.<Vetype>\;<sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.d" | |
4713 | [(set_attr "movprfx" "yes")]) | |
4714 | ||
4715 | ;; ------------------------------------------------------------------------- | |
4716 | ;; ---- [INT] Shifts (rounding towards 0) | |
4717 | ;; ------------------------------------------------------------------------- | |
4718 | ;; Includes: | |
4719 | ;; - ASRD | |
0a09a948 RS |
4720 | ;; - SQSHLU (SVE2) |
4721 | ;; - SRSHR (SVE2) | |
4722 | ;; - URSHR (SVE2) | |
624d0f07 RS |
4723 | ;; ------------------------------------------------------------------------- |
4724 | ||
a958b2fc | 4725 | ;; Unpredicated ASRD. |
624d0f07 | 4726 | (define_expand "sdiv_pow2<mode>3" |
a958b2fc RS |
4727 | [(set (match_operand:SVE_I 0 "register_operand") |
4728 | (unspec:SVE_I | |
624d0f07 | 4729 | [(match_dup 3) |
a958b2fc RS |
4730 | (unspec:SVE_I |
4731 | [(match_operand:SVE_I 1 "register_operand") | |
624d0f07 | 4732 | (match_operand 2 "aarch64_simd_rshift_imm")] |
a958b2fc RS |
4733 | UNSPEC_ASRD)] |
4734 | UNSPEC_PRED_X))] | |
624d0f07 RS |
4735 | "TARGET_SVE" |
4736 | { | |
4737 | operands[3] = aarch64_ptrue_reg (<VPRED>mode); | |
4738 | } | |
4739 | ) | |
4740 | ||
a958b2fc RS |
4741 | ;; Predicated ASRD. |
4742 | (define_insn "*sdiv_pow2<mode>3" | |
4743 | [(set (match_operand:SVE_I 0 "register_operand" "=w, ?&w") | |
4744 | (unspec:SVE_I | |
4745 | [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl") | |
4746 | (unspec:SVE_I | |
4747 | [(match_operand:SVE_I 2 "register_operand" "0, w") | |
4748 | (match_operand:SVE_I 3 "aarch64_simd_rshift_imm")] | |
4749 | UNSPEC_ASRD)] | |
4750 | UNSPEC_PRED_X))] | |
4751 | "TARGET_SVE" | |
4752 | "@ | |
4753 | asrd\t%0.<Vetype>, %1/m, %0.<Vetype>, #%3 | |
4754 | movprfx\t%0, %2\;asrd\t%0.<Vetype>, %1/m, %0.<Vetype>, #%3" | |
4755 | [(set_attr "movprfx" "*,yes")]) | |
4756 | ||
4757 | ;; Predicated shift with merging. | |
0a09a948 | 4758 | (define_expand "@cond_<sve_int_op><mode>" |
a958b2fc RS |
4759 | [(set (match_operand:SVE_I 0 "register_operand") |
4760 | (unspec:SVE_I | |
624d0f07 | 4761 | [(match_operand:<VPRED> 1 "register_operand") |
a958b2fc RS |
4762 | (unspec:SVE_I |
4763 | [(match_dup 5) | |
4764 | (unspec:SVE_I | |
4765 | [(match_operand:SVE_I 2 "register_operand") | |
4766 | (match_operand:SVE_I 3 "aarch64_simd_<lr>shift_imm")] | |
4767 | SVE_INT_SHIFT_IMM)] | |
4768 | UNSPEC_PRED_X) | |
4769 | (match_operand:SVE_I 4 "aarch64_simd_reg_or_zero")] | |
624d0f07 RS |
4770 | UNSPEC_SEL))] |
4771 | "TARGET_SVE" | |
a958b2fc RS |
4772 | { |
4773 | operands[5] = aarch64_ptrue_reg (<VPRED>mode); | |
4774 | } | |
624d0f07 RS |
4775 | ) |
4776 | ||
a958b2fc RS |
4777 | ;; Predicated shift, merging with the first input. |
4778 | (define_insn_and_rewrite "*cond_<sve_int_op><mode>_2" | |
4779 | [(set (match_operand:SVE_I 0 "register_operand" "=w, ?&w") | |
4780 | (unspec:SVE_I | |
c0c2f013 | 4781 | [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl") |
a958b2fc RS |
4782 | (unspec:SVE_I |
4783 | [(match_operand 4) | |
4784 | (unspec:SVE_I | |
4785 | [(match_operand:SVE_I 2 "register_operand" "0, w") | |
4786 | (match_operand:SVE_I 3 "aarch64_simd_<lr>shift_imm")] | |
4787 | SVE_INT_SHIFT_IMM)] | |
4788 | UNSPEC_PRED_X) | |
624d0f07 RS |
4789 | (match_dup 2)] |
4790 | UNSPEC_SEL))] | |
4791 | "TARGET_SVE" | |
4792 | "@ | |
0a09a948 RS |
4793 | <sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, #%3 |
4794 | movprfx\t%0, %2\;<sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, #%3" | |
a958b2fc RS |
4795 | "&& !CONSTANT_P (operands[4])" |
4796 | { | |
4797 | operands[4] = CONSTM1_RTX (<VPRED>mode); | |
4798 | } | |
624d0f07 RS |
4799 | [(set_attr "movprfx" "*,yes")]) |
4800 | ||
a958b2fc RS |
4801 | ;; Predicated shift, merging with an independent value. |
4802 | (define_insn_and_rewrite "*cond_<sve_int_op><mode>_any" | |
4803 | [(set (match_operand:SVE_I 0 "register_operand" "=w, &w, ?&w") | |
4804 | (unspec:SVE_I | |
4805 | [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl") | |
4806 | (unspec:SVE_I | |
4807 | [(match_operand 5) | |
4808 | (unspec:SVE_I | |
4809 | [(match_operand:SVE_I 2 "register_operand" "w, w, w") | |
4810 | (match_operand:SVE_I 3 "aarch64_simd_<lr>shift_imm")] | |
4811 | SVE_INT_SHIFT_IMM)] | |
4812 | UNSPEC_PRED_X) | |
4813 | (match_operand:SVE_I 4 "aarch64_simd_reg_or_zero" "Dz, 0, w")] | |
4814 | UNSPEC_SEL))] | |
4815 | "TARGET_SVE && !rtx_equal_p (operands[2], operands[4])" | |
4816 | "@ | |
4817 | movprfx\t%0.<Vetype>, %1/z, %2.<Vetype>\;<sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, #%3 | |
4818 | movprfx\t%0.<Vetype>, %1/m, %2.<Vetype>\;<sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, #%3 | |
4819 | #" | |
4820 | "&& reload_completed | |
4821 | && register_operand (operands[4], <MODE>mode) | |
4822 | && !rtx_equal_p (operands[0], operands[4])" | |
4823 | { | |
4824 | emit_insn (gen_vcond_mask_<mode><vpred> (operands[0], operands[2], | |
4825 | operands[4], operands[1])); | |
4826 | operands[4] = operands[2] = operands[0]; | |
4827 | } | |
4828 | [(set_attr "movprfx" "yes")] | |
4829 | ) | |
624d0f07 RS |
4830 | |
4831 | ;; ------------------------------------------------------------------------- | |
4832 | ;; ---- [FP<-INT] General binary arithmetic corresponding to unspecs | |
4833 | ;; ------------------------------------------------------------------------- | |
4834 | ;; Includes: | |
4835 | ;; - FSCALE | |
4836 | ;; - FTSMUL | |
4837 | ;; - FTSSEL | |
4838 | ;; ------------------------------------------------------------------------- | |
4839 | ||
4840 | ;; Unpredicated floating-point binary operations that take an integer as | |
4841 | ;; their second operand. | |
4842 | (define_insn "@aarch64_sve_<optab><mode>" | |
f75cdd2c RS |
4843 | [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w") |
4844 | (unspec:SVE_FULL_F | |
4845 | [(match_operand:SVE_FULL_F 1 "register_operand" "w") | |
4846 | (match_operand:<V_INT_EQUIV> 2 "register_operand" "w")] | |
4847 | SVE_FP_BINARY_INT))] | |
624d0f07 RS |
4848 | "TARGET_SVE" |
4849 | "<sve_fp_op>\t%0.<Vetype>, %1.<Vetype>, %2.<Vetype>" | |
4850 | ) | |
4851 | ||
4852 | ;; Predicated floating-point binary operations that take an integer | |
4853 | ;; as their second operand. | |
4854 | (define_insn "@aarch64_pred_<optab><mode>" | |
f75cdd2c RS |
4855 | [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w, ?&w") |
4856 | (unspec:SVE_FULL_F | |
624d0f07 RS |
4857 | [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl") |
4858 | (match_operand:SI 4 "aarch64_sve_gp_strictness") | |
f75cdd2c | 4859 | (match_operand:SVE_FULL_F 2 "register_operand" "0, w") |
624d0f07 RS |
4860 | (match_operand:<V_INT_EQUIV> 3 "register_operand" "w, w")] |
4861 | SVE_COND_FP_BINARY_INT))] | |
4862 | "TARGET_SVE" | |
4863 | "@ | |
4864 | <sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype> | |
4865 | movprfx\t%0, %2\;<sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>" | |
4866 | [(set_attr "movprfx" "*,yes")] | |
4867 | ) | |
4868 | ||
4869 | ;; Predicated floating-point binary operations with merging, taking an | |
4870 | ;; integer as their second operand. | |
4871 | (define_expand "@cond_<optab><mode>" | |
f75cdd2c RS |
4872 | [(set (match_operand:SVE_FULL_F 0 "register_operand") |
4873 | (unspec:SVE_FULL_F | |
624d0f07 | 4874 | [(match_operand:<VPRED> 1 "register_operand") |
f75cdd2c | 4875 | (unspec:SVE_FULL_F |
624d0f07 RS |
4876 | [(match_dup 1) |
4877 | (const_int SVE_STRICT_GP) | |
f75cdd2c | 4878 | (match_operand:SVE_FULL_F 2 "register_operand") |
624d0f07 RS |
4879 | (match_operand:<V_INT_EQUIV> 3 "register_operand")] |
4880 | SVE_COND_FP_BINARY_INT) | |
f75cdd2c | 4881 | (match_operand:SVE_FULL_F 4 "aarch64_simd_reg_or_zero")] |
624d0f07 | 4882 | UNSPEC_SEL))] |
c0c2f013 | 4883 | "TARGET_SVE" |
624d0f07 RS |
4884 | ) |
4885 | ||
4886 | ;; Predicated floating-point binary operations that take an integer as their | |
4887 | ;; second operand, with inactive lanes coming from the first operand. | |
0eb5e901 | 4888 | (define_insn_and_rewrite "*cond_<optab><mode>_2_relaxed" |
f75cdd2c RS |
4889 | [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w, ?&w") |
4890 | (unspec:SVE_FULL_F | |
624d0f07 | 4891 | [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl") |
f75cdd2c | 4892 | (unspec:SVE_FULL_F |
624d0f07 | 4893 | [(match_operand 4) |
0eb5e901 | 4894 | (const_int SVE_RELAXED_GP) |
f75cdd2c | 4895 | (match_operand:SVE_FULL_F 2 "register_operand" "0, w") |
624d0f07 RS |
4896 | (match_operand:<V_INT_EQUIV> 3 "register_operand" "w, w")] |
4897 | SVE_COND_FP_BINARY_INT) | |
4898 | (match_dup 2)] | |
4899 | UNSPEC_SEL))] | |
0eb5e901 | 4900 | "TARGET_SVE" |
c0c2f013 | 4901 | "@ |
624d0f07 RS |
4902 | <sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype> |
4903 | movprfx\t%0, %2\;<sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>" | |
4904 | "&& !rtx_equal_p (operands[1], operands[4])" | |
4905 | { | |
4906 | operands[4] = copy_rtx (operands[1]); | |
4907 | } | |
c0c2f013 YW |
4908 | [(set_attr "movprfx" "*,yes")] |
4909 | ) | |
4910 | ||
0eb5e901 RS |
4911 | (define_insn "*cond_<optab><mode>_2_strict" |
4912 | [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w, ?&w") | |
4913 | (unspec:SVE_FULL_F | |
4914 | [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl") | |
4915 | (unspec:SVE_FULL_F | |
4916 | [(match_dup 1) | |
4917 | (const_int SVE_STRICT_GP) | |
4918 | (match_operand:SVE_FULL_F 2 "register_operand" "0, w") | |
4919 | (match_operand:<V_INT_EQUIV> 3 "register_operand" "w, w")] | |
4920 | SVE_COND_FP_BINARY_INT) | |
4921 | (match_dup 2)] | |
4922 | UNSPEC_SEL))] | |
4923 | "TARGET_SVE" | |
4924 | "@ | |
4925 | <sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype> | |
4926 | movprfx\t%0, %2\;<sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>" | |
4927 | [(set_attr "movprfx" "*,yes")] | |
4928 | ) | |
4929 | ||
624d0f07 RS |
4930 | ;; Predicated floating-point binary operations that take an integer as |
4931 | ;; their second operand, with the values of inactive lanes being distinct | |
4932 | ;; from the other inputs. | |
0eb5e901 | 4933 | (define_insn_and_rewrite "*cond_<optab><mode>_any_relaxed" |
f75cdd2c RS |
4934 | [(set (match_operand:SVE_FULL_F 0 "register_operand" "=&w, &w, &w, ?&w") |
4935 | (unspec:SVE_FULL_F | |
624d0f07 | 4936 | [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl, Upl") |
f75cdd2c | 4937 | (unspec:SVE_FULL_F |
624d0f07 | 4938 | [(match_operand 5) |
0eb5e901 | 4939 | (const_int SVE_RELAXED_GP) |
f75cdd2c | 4940 | (match_operand:SVE_FULL_F 2 "register_operand" "0, w, w, w") |
624d0f07 RS |
4941 | (match_operand:<V_INT_EQUIV> 3 "register_operand" "w, w, w, w")] |
4942 | SVE_COND_FP_BINARY_INT) | |
f75cdd2c | 4943 | (match_operand:SVE_FULL_F 4 "aarch64_simd_reg_or_zero" "Dz, Dz, 0, w")] |
624d0f07 | 4944 | UNSPEC_SEL))] |
0eb5e901 | 4945 | "TARGET_SVE && !rtx_equal_p (operands[2], operands[4])" |
624d0f07 RS |
4946 | "@ |
4947 | movprfx\t%0.<Vetype>, %1/z, %2.<Vetype>\;<sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype> | |
4948 | movprfx\t%0.<Vetype>, %1/z, %2.<Vetype>\;<sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype> | |
4949 | movprfx\t%0.<Vetype>, %1/m, %2.<Vetype>\;<sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype> | |
4950 | #" | |
4951 | "&& 1" | |
4952 | { | |
4953 | if (reload_completed | |
4954 | && register_operand (operands[4], <MODE>mode) | |
4955 | && !rtx_equal_p (operands[0], operands[4])) | |
4956 | { | |
4957 | emit_insn (gen_vcond_mask_<mode><vpred> (operands[0], operands[2], | |
4958 | operands[4], operands[1])); | |
4959 | operands[4] = operands[2] = operands[0]; | |
4960 | } | |
4961 | else if (!rtx_equal_p (operands[1], operands[5])) | |
4962 | operands[5] = copy_rtx (operands[1]); | |
4963 | else | |
4964 | FAIL; | |
4965 | } | |
4966 | [(set_attr "movprfx" "yes")] | |
4967 | ) | |
4968 | ||
0eb5e901 RS |
4969 | (define_insn_and_rewrite "*cond_<optab><mode>_any_strict" |
4970 | [(set (match_operand:SVE_FULL_F 0 "register_operand" "=&w, &w, &w, ?&w") | |
4971 | (unspec:SVE_FULL_F | |
4972 | [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl, Upl") | |
4973 | (unspec:SVE_FULL_F | |
4974 | [(match_dup 1) | |
4975 | (const_int SVE_STRICT_GP) | |
4976 | (match_operand:SVE_FULL_F 2 "register_operand" "0, w, w, w") | |
4977 | (match_operand:<V_INT_EQUIV> 3 "register_operand" "w, w, w, w")] | |
4978 | SVE_COND_FP_BINARY_INT) | |
4979 | (match_operand:SVE_FULL_F 4 "aarch64_simd_reg_or_zero" "Dz, Dz, 0, w")] | |
4980 | UNSPEC_SEL))] | |
4981 | "TARGET_SVE && !rtx_equal_p (operands[2], operands[4])" | |
4982 | "@ | |
4983 | movprfx\t%0.<Vetype>, %1/z, %2.<Vetype>\;<sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype> | |
4984 | movprfx\t%0.<Vetype>, %1/z, %2.<Vetype>\;<sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype> | |
4985 | movprfx\t%0.<Vetype>, %1/m, %2.<Vetype>\;<sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype> | |
4986 | #" | |
4987 | "&& reload_completed | |
4988 | && register_operand (operands[4], <MODE>mode) | |
4989 | && !rtx_equal_p (operands[0], operands[4])" | |
4990 | { | |
4991 | emit_insn (gen_vcond_mask_<mode><vpred> (operands[0], operands[2], | |
4992 | operands[4], operands[1])); | |
4993 | operands[4] = operands[2] = operands[0]; | |
4994 | } | |
4995 | [(set_attr "movprfx" "yes")] | |
4996 | ) | |
4997 | ||
915d28fe RS |
4998 | ;; ------------------------------------------------------------------------- |
4999 | ;; ---- [FP] General binary arithmetic corresponding to rtx codes | |
5000 | ;; ------------------------------------------------------------------------- | |
5001 | ;; Includes post-RA forms of: | |
5002 | ;; - FADD | |
5003 | ;; - FMUL | |
5004 | ;; - FSUB | |
5005 | ;; ------------------------------------------------------------------------- | |
43cacb12 | 5006 | |
915d28fe RS |
5007 | ;; Unpredicated floating-point binary operations (post-RA only). |
5008 | ;; These are generated by splitting a predicated instruction whose | |
5009 | ;; predicate is unused. | |
5010 | (define_insn "*post_ra_<sve_fp_op><mode>3" | |
f75cdd2c RS |
5011 | [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w") |
5012 | (SVE_UNPRED_FP_BINARY:SVE_FULL_F | |
5013 | (match_operand:SVE_FULL_F 1 "register_operand" "w") | |
5014 | (match_operand:SVE_FULL_F 2 "register_operand" "w")))] | |
915d28fe RS |
5015 | "TARGET_SVE && reload_completed" |
5016 | "<sve_fp_op>\t%0.<Vetype>, %1.<Vetype>, %2.<Vetype>") | |
5017 | ||
5018 | ;; ------------------------------------------------------------------------- | |
5019 | ;; ---- [FP] General binary arithmetic corresponding to unspecs | |
5020 | ;; ------------------------------------------------------------------------- | |
5021 | ;; Includes merging forms of: | |
a19ba9e1 | 5022 | ;; - FADD (constant forms handled in the "Addition" section) |
915d28fe RS |
5023 | ;; - FDIV |
5024 | ;; - FDIVR | |
624d0f07 | 5025 | ;; - FMAX |
a19ba9e1 | 5026 | ;; - FMAXNM (including #0.0 and #1.0) |
624d0f07 | 5027 | ;; - FMIN |
a19ba9e1 RS |
5028 | ;; - FMINNM (including #0.0 and #1.0) |
5029 | ;; - FMUL (including #0.5 and #2.0) | |
624d0f07 RS |
5030 | ;; - FMULX |
5031 | ;; - FRECPS | |
5032 | ;; - FRSQRTS | |
a19ba9e1 RS |
5033 | ;; - FSUB (constant forms handled in the "Addition" section) |
5034 | ;; - FSUBR (constant forms handled in the "Subtraction" section) | |
915d28fe RS |
5035 | ;; ------------------------------------------------------------------------- |
5036 | ||
0254ed79 | 5037 | ;; Unpredicated floating-point binary operations. |
624d0f07 | 5038 | (define_insn "@aarch64_sve_<optab><mode>" |
f75cdd2c RS |
5039 | [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w") |
5040 | (unspec:SVE_FULL_F | |
5041 | [(match_operand:SVE_FULL_F 1 "register_operand" "w") | |
5042 | (match_operand:SVE_FULL_F 2 "register_operand" "w")] | |
5043 | SVE_FP_BINARY))] | |
624d0f07 RS |
5044 | "TARGET_SVE" |
5045 | "<sve_fp_op>\t%0.<Vetype>, %1.<Vetype>, %2.<Vetype>" | |
5046 | ) | |
5047 | ||
5048 | ;; Unpredicated floating-point binary operations that need to be predicated | |
5049 | ;; for SVE. | |
0254ed79 | 5050 | (define_expand "<optab><mode>3" |
f75cdd2c RS |
5051 | [(set (match_operand:SVE_FULL_F 0 "register_operand") |
5052 | (unspec:SVE_FULL_F | |
0254ed79 RS |
5053 | [(match_dup 3) |
5054 | (const_int SVE_RELAXED_GP) | |
f75cdd2c RS |
5055 | (match_operand:SVE_FULL_F 1 "<sve_pred_fp_rhs1_operand>") |
5056 | (match_operand:SVE_FULL_F 2 "<sve_pred_fp_rhs2_operand>")] | |
04f307cb | 5057 | SVE_COND_FP_BINARY_OPTAB))] |
0254ed79 RS |
5058 | "TARGET_SVE" |
5059 | { | |
5060 | operands[3] = aarch64_ptrue_reg (<VPRED>mode); | |
5061 | } | |
5062 | ) | |
5063 | ||
5064 | ;; Predicated floating-point binary operations that have no immediate forms. | |
624d0f07 | 5065 | (define_insn "@aarch64_pred_<optab><mode>" |
f75cdd2c RS |
5066 | [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w, w, ?&w") |
5067 | (unspec:SVE_FULL_F | |
0254ed79 RS |
5068 | [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl") |
5069 | (match_operand:SI 4 "aarch64_sve_gp_strictness") | |
f75cdd2c RS |
5070 | (match_operand:SVE_FULL_F 2 "register_operand" "0, w, w") |
5071 | (match_operand:SVE_FULL_F 3 "register_operand" "w, 0, w")] | |
0254ed79 RS |
5072 | SVE_COND_FP_BINARY_REG))] |
5073 | "TARGET_SVE" | |
5074 | "@ | |
5075 | <sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype> | |
5076 | <sve_fp_op_rev>\t%0.<Vetype>, %1/m, %0.<Vetype>, %2.<Vetype> | |
5077 | movprfx\t%0, %2\;<sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>" | |
5078 | [(set_attr "movprfx" "*,*,yes")] | |
5079 | ) | |
5080 | ||
915d28fe | 5081 | ;; Predicated floating-point operations with merging. |
624d0f07 | 5082 | (define_expand "@cond_<optab><mode>" |
f75cdd2c RS |
5083 | [(set (match_operand:SVE_FULL_F 0 "register_operand") |
5084 | (unspec:SVE_FULL_F | |
915d28fe | 5085 | [(match_operand:<VPRED> 1 "register_operand") |
f75cdd2c | 5086 | (unspec:SVE_FULL_F |
6fe679cc | 5087 | [(match_dup 1) |
c9c5a809 | 5088 | (const_int SVE_STRICT_GP) |
f75cdd2c RS |
5089 | (match_operand:SVE_FULL_F 2 "<sve_pred_fp_rhs1_operand>") |
5090 | (match_operand:SVE_FULL_F 3 "<sve_pred_fp_rhs2_operand>")] | |
915d28fe | 5091 | SVE_COND_FP_BINARY) |
f75cdd2c | 5092 | (match_operand:SVE_FULL_F 4 "aarch64_simd_reg_or_zero")] |
915d28fe | 5093 | UNSPEC_SEL))] |
43cacb12 | 5094 | "TARGET_SVE" |
43cacb12 RS |
5095 | ) |
5096 | ||
915d28fe | 5097 | ;; Predicated floating-point operations, merging with the first input. |
0eb5e901 | 5098 | (define_insn_and_rewrite "*cond_<optab><mode>_2_relaxed" |
f75cdd2c RS |
5099 | [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w, ?&w") |
5100 | (unspec:SVE_FULL_F | |
57d6f4d0 | 5101 | [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl") |
f75cdd2c | 5102 | (unspec:SVE_FULL_F |
c9c5a809 | 5103 | [(match_operand 4) |
0eb5e901 | 5104 | (const_int SVE_RELAXED_GP) |
f75cdd2c RS |
5105 | (match_operand:SVE_FULL_F 2 "register_operand" "0, w") |
5106 | (match_operand:SVE_FULL_F 3 "register_operand" "w, w")] | |
915d28fe RS |
5107 | SVE_COND_FP_BINARY) |
5108 | (match_dup 2)] | |
5109 | UNSPEC_SEL))] | |
0eb5e901 | 5110 | "TARGET_SVE" |
43cacb12 | 5111 | "@ |
915d28fe RS |
5112 | <sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype> |
5113 | movprfx\t%0, %2\;<sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>" | |
c9c5a809 RS |
5114 | "&& !rtx_equal_p (operands[1], operands[4])" |
5115 | { | |
5116 | operands[4] = copy_rtx (operands[1]); | |
5117 | } | |
915d28fe | 5118 | [(set_attr "movprfx" "*,yes")] |
43cacb12 RS |
5119 | ) |
5120 | ||
0eb5e901 RS |
5121 | (define_insn "*cond_<optab><mode>_2_strict" |
5122 | [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w, ?&w") | |
5123 | (unspec:SVE_FULL_F | |
5124 | [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl") | |
5125 | (unspec:SVE_FULL_F | |
5126 | [(match_dup 1) | |
5127 | (const_int SVE_STRICT_GP) | |
5128 | (match_operand:SVE_FULL_F 2 "register_operand" "0, w") | |
5129 | (match_operand:SVE_FULL_F 3 "register_operand" "w, w")] | |
5130 | SVE_COND_FP_BINARY) | |
5131 | (match_dup 2)] | |
5132 | UNSPEC_SEL))] | |
5133 | "TARGET_SVE" | |
5134 | "@ | |
5135 | <sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype> | |
5136 | movprfx\t%0, %2\;<sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>" | |
5137 | [(set_attr "movprfx" "*,yes")] | |
5138 | ) | |
5139 | ||
a19ba9e1 | 5140 | ;; Same for operations that take a 1-bit constant. |
0eb5e901 | 5141 | (define_insn_and_rewrite "*cond_<optab><mode>_2_const_relaxed" |
f75cdd2c RS |
5142 | [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w, ?w") |
5143 | (unspec:SVE_FULL_F | |
a19ba9e1 | 5144 | [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl") |
f75cdd2c | 5145 | (unspec:SVE_FULL_F |
a19ba9e1 | 5146 | [(match_operand 4) |
0eb5e901 | 5147 | (const_int SVE_RELAXED_GP) |
f75cdd2c RS |
5148 | (match_operand:SVE_FULL_F 2 "register_operand" "0, w") |
5149 | (match_operand:SVE_FULL_F 3 "<sve_pred_fp_rhs2_immediate>")] | |
a19ba9e1 RS |
5150 | SVE_COND_FP_BINARY_I1) |
5151 | (match_dup 2)] | |
5152 | UNSPEC_SEL))] | |
0eb5e901 | 5153 | "TARGET_SVE" |
a19ba9e1 RS |
5154 | "@ |
5155 | <sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, #%3 | |
5156 | movprfx\t%0, %2\;<sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, #%3" | |
5157 | "&& !rtx_equal_p (operands[1], operands[4])" | |
5158 | { | |
5159 | operands[4] = copy_rtx (operands[1]); | |
5160 | } | |
5161 | [(set_attr "movprfx" "*,yes")] | |
5162 | ) | |
5163 | ||
0eb5e901 RS |
5164 | (define_insn "*cond_<optab><mode>_2_const_strict" |
5165 | [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w, ?w") | |
5166 | (unspec:SVE_FULL_F | |
5167 | [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl") | |
5168 | (unspec:SVE_FULL_F | |
5169 | [(match_dup 1) | |
5170 | (const_int SVE_STRICT_GP) | |
5171 | (match_operand:SVE_FULL_F 2 "register_operand" "0, w") | |
5172 | (match_operand:SVE_FULL_F 3 "<sve_pred_fp_rhs2_immediate>")] | |
5173 | SVE_COND_FP_BINARY_I1) | |
5174 | (match_dup 2)] | |
5175 | UNSPEC_SEL))] | |
5176 | "TARGET_SVE" | |
5177 | "@ | |
5178 | <sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, #%3 | |
5179 | movprfx\t%0, %2\;<sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, #%3" | |
5180 | [(set_attr "movprfx" "*,yes")] | |
5181 | ) | |
5182 | ||
915d28fe | 5183 | ;; Predicated floating-point operations, merging with the second input. |
0eb5e901 | 5184 | (define_insn_and_rewrite "*cond_<optab><mode>_3_relaxed" |
f75cdd2c RS |
5185 | [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w, ?&w") |
5186 | (unspec:SVE_FULL_F | |
57d6f4d0 | 5187 | [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl") |
f75cdd2c | 5188 | (unspec:SVE_FULL_F |
c9c5a809 | 5189 | [(match_operand 4) |
0eb5e901 | 5190 | (const_int SVE_RELAXED_GP) |
f75cdd2c RS |
5191 | (match_operand:SVE_FULL_F 2 "register_operand" "w, w") |
5192 | (match_operand:SVE_FULL_F 3 "register_operand" "0, w")] | |
915d28fe RS |
5193 | SVE_COND_FP_BINARY) |
5194 | (match_dup 3)] | |
5195 | UNSPEC_SEL))] | |
0eb5e901 | 5196 | "TARGET_SVE" |
43cacb12 | 5197 | "@ |
915d28fe RS |
5198 | <sve_fp_op_rev>\t%0.<Vetype>, %1/m, %0.<Vetype>, %2.<Vetype> |
5199 | movprfx\t%0, %3\;<sve_fp_op_rev>\t%0.<Vetype>, %1/m, %0.<Vetype>, %2.<Vetype>" | |
c9c5a809 RS |
5200 | "&& !rtx_equal_p (operands[1], operands[4])" |
5201 | { | |
5202 | operands[4] = copy_rtx (operands[1]); | |
5203 | } | |
915d28fe | 5204 | [(set_attr "movprfx" "*,yes")] |
cee99fa0 RS |
5205 | ) |
5206 | ||
0eb5e901 RS |
5207 | (define_insn "*cond_<optab><mode>_3_strict" |
5208 | [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w, ?&w") | |
5209 | (unspec:SVE_FULL_F | |
5210 | [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl") | |
5211 | (unspec:SVE_FULL_F | |
5212 | [(match_dup 1) | |
5213 | (const_int SVE_STRICT_GP) | |
5214 | (match_operand:SVE_FULL_F 2 "register_operand" "w, w") | |
5215 | (match_operand:SVE_FULL_F 3 "register_operand" "0, w")] | |
5216 | SVE_COND_FP_BINARY) | |
5217 | (match_dup 3)] | |
5218 | UNSPEC_SEL))] | |
5219 | "TARGET_SVE" | |
5220 | "@ | |
5221 | <sve_fp_op_rev>\t%0.<Vetype>, %1/m, %0.<Vetype>, %2.<Vetype> | |
5222 | movprfx\t%0, %3\;<sve_fp_op_rev>\t%0.<Vetype>, %1/m, %0.<Vetype>, %2.<Vetype>" | |
5223 | [(set_attr "movprfx" "*,yes")] | |
5224 | ) | |
5225 | ||
915d28fe | 5226 | ;; Predicated floating-point operations, merging with an independent value. |
0eb5e901 | 5227 | (define_insn_and_rewrite "*cond_<optab><mode>_any_relaxed" |
f75cdd2c RS |
5228 | [(set (match_operand:SVE_FULL_F 0 "register_operand" "=&w, &w, &w, &w, ?&w") |
5229 | (unspec:SVE_FULL_F | |
915d28fe | 5230 | [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl, Upl, Upl") |
f75cdd2c | 5231 | (unspec:SVE_FULL_F |
c9c5a809 | 5232 | [(match_operand 5) |
0eb5e901 | 5233 | (const_int SVE_RELAXED_GP) |
f75cdd2c RS |
5234 | (match_operand:SVE_FULL_F 2 "register_operand" "0, w, w, w, w") |
5235 | (match_operand:SVE_FULL_F 3 "register_operand" "w, 0, w, w, w")] | |
915d28fe | 5236 | SVE_COND_FP_BINARY) |
f75cdd2c | 5237 | (match_operand:SVE_FULL_F 4 "aarch64_simd_reg_or_zero" "Dz, Dz, Dz, 0, w")] |
915d28fe RS |
5238 | UNSPEC_SEL))] |
5239 | "TARGET_SVE | |
5240 | && !rtx_equal_p (operands[2], operands[4]) | |
0eb5e901 | 5241 | && !rtx_equal_p (operands[3], operands[4])" |
cee99fa0 | 5242 | "@ |
915d28fe RS |
5243 | movprfx\t%0.<Vetype>, %1/z, %0.<Vetype>\;<sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype> |
5244 | movprfx\t%0.<Vetype>, %1/z, %0.<Vetype>\;<sve_fp_op_rev>\t%0.<Vetype>, %1/m, %0.<Vetype>, %2.<Vetype> | |
5245 | movprfx\t%0.<Vetype>, %1/z, %2.<Vetype>\;<sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype> | |
5246 | movprfx\t%0.<Vetype>, %1/m, %2.<Vetype>\;<sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype> | |
5247 | #" | |
c9c5a809 | 5248 | "&& 1" |
915d28fe | 5249 | { |
c9c5a809 RS |
5250 | if (reload_completed |
5251 | && register_operand (operands[4], <MODE>mode) | |
5252 | && !rtx_equal_p (operands[0], operands[4])) | |
5253 | { | |
5254 | emit_insn (gen_vcond_mask_<mode><vpred> (operands[0], operands[2], | |
5255 | operands[4], operands[1])); | |
5256 | operands[4] = operands[2] = operands[0]; | |
5257 | } | |
5258 | else if (!rtx_equal_p (operands[1], operands[5])) | |
5259 | operands[5] = copy_rtx (operands[1]); | |
5260 | else | |
5261 | FAIL; | |
915d28fe RS |
5262 | } |
5263 | [(set_attr "movprfx" "yes")] | |
cee99fa0 RS |
5264 | ) |
5265 | ||
0eb5e901 RS |
5266 | (define_insn_and_rewrite "*cond_<optab><mode>_any_strict" |
5267 | [(set (match_operand:SVE_FULL_F 0 "register_operand" "=&w, &w, &w, &w, ?&w") | |
5268 | (unspec:SVE_FULL_F | |
5269 | [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl, Upl, Upl") | |
5270 | (unspec:SVE_FULL_F | |
5271 | [(match_dup 1) | |
5272 | (const_int SVE_STRICT_GP) | |
5273 | (match_operand:SVE_FULL_F 2 "register_operand" "0, w, w, w, w") | |
5274 | (match_operand:SVE_FULL_F 3 "register_operand" "w, 0, w, w, w")] | |
5275 | SVE_COND_FP_BINARY) | |
5276 | (match_operand:SVE_FULL_F 4 "aarch64_simd_reg_or_zero" "Dz, Dz, Dz, 0, w")] | |
5277 | UNSPEC_SEL))] | |
5278 | "TARGET_SVE | |
5279 | && !rtx_equal_p (operands[2], operands[4]) | |
5280 | && !rtx_equal_p (operands[3], operands[4])" | |
5281 | "@ | |
5282 | movprfx\t%0.<Vetype>, %1/z, %0.<Vetype>\;<sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype> | |
5283 | movprfx\t%0.<Vetype>, %1/z, %0.<Vetype>\;<sve_fp_op_rev>\t%0.<Vetype>, %1/m, %0.<Vetype>, %2.<Vetype> | |
5284 | movprfx\t%0.<Vetype>, %1/z, %2.<Vetype>\;<sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype> | |
5285 | movprfx\t%0.<Vetype>, %1/m, %2.<Vetype>\;<sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype> | |
5286 | #" | |
5287 | "&& reload_completed | |
5288 | && register_operand (operands[4], <MODE>mode) | |
5289 | && !rtx_equal_p (operands[0], operands[4])" | |
5290 | { | |
5291 | emit_insn (gen_vcond_mask_<mode><vpred> (operands[0], operands[2], | |
5292 | operands[4], operands[1])); | |
5293 | operands[4] = operands[2] = operands[0]; | |
5294 | } | |
5295 | [(set_attr "movprfx" "yes")] | |
5296 | ) | |
5297 | ||
a19ba9e1 | 5298 | ;; Same for operations that take a 1-bit constant. |
0eb5e901 | 5299 | (define_insn_and_rewrite "*cond_<optab><mode>_any_const_relaxed" |
f75cdd2c RS |
5300 | [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w, w, ?w") |
5301 | (unspec:SVE_FULL_F | |
a19ba9e1 | 5302 | [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl") |
f75cdd2c | 5303 | (unspec:SVE_FULL_F |
a19ba9e1 | 5304 | [(match_operand 5) |
0eb5e901 | 5305 | (const_int SVE_RELAXED_GP) |
f75cdd2c RS |
5306 | (match_operand:SVE_FULL_F 2 "register_operand" "w, w, w") |
5307 | (match_operand:SVE_FULL_F 3 "<sve_pred_fp_rhs2_immediate>")] | |
a19ba9e1 | 5308 | SVE_COND_FP_BINARY_I1) |
f75cdd2c | 5309 | (match_operand:SVE_FULL_F 4 "aarch64_simd_reg_or_zero" "Dz, 0, w")] |
a19ba9e1 | 5310 | UNSPEC_SEL))] |
0eb5e901 | 5311 | "TARGET_SVE && !rtx_equal_p (operands[2], operands[4])" |
a19ba9e1 RS |
5312 | "@ |
5313 | movprfx\t%0.<Vetype>, %1/z, %2.<Vetype>\;<sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, #%3 | |
5314 | movprfx\t%0.<Vetype>, %1/m, %2.<Vetype>\;<sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, #%3 | |
5315 | #" | |
5316 | "&& 1" | |
5317 | { | |
5318 | if (reload_completed | |
5319 | && register_operand (operands[4], <MODE>mode) | |
5320 | && !rtx_equal_p (operands[0], operands[4])) | |
5321 | { | |
5322 | emit_insn (gen_vcond_mask_<mode><vpred> (operands[0], operands[2], | |
5323 | operands[4], operands[1])); | |
5324 | operands[4] = operands[2] = operands[0]; | |
5325 | } | |
5326 | else if (!rtx_equal_p (operands[1], operands[5])) | |
5327 | operands[5] = copy_rtx (operands[1]); | |
5328 | else | |
5329 | FAIL; | |
5330 | } | |
5331 | [(set_attr "movprfx" "yes")] | |
5332 | ) | |
5333 | ||
0eb5e901 RS |
5334 | (define_insn_and_rewrite "*cond_<optab><mode>_any_const_strict" |
5335 | [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w, w, ?w") | |
5336 | (unspec:SVE_FULL_F | |
5337 | [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl") | |
5338 | (unspec:SVE_FULL_F | |
5339 | [(match_dup 1) | |
5340 | (const_int SVE_STRICT_GP) | |
5341 | (match_operand:SVE_FULL_F 2 "register_operand" "w, w, w") | |
5342 | (match_operand:SVE_FULL_F 3 "<sve_pred_fp_rhs2_immediate>")] | |
5343 | SVE_COND_FP_BINARY_I1) | |
5344 | (match_operand:SVE_FULL_F 4 "aarch64_simd_reg_or_zero" "Dz, 0, w")] | |
5345 | UNSPEC_SEL))] | |
5346 | "TARGET_SVE && !rtx_equal_p (operands[2], operands[4])" | |
5347 | "@ | |
5348 | movprfx\t%0.<Vetype>, %1/z, %2.<Vetype>\;<sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, #%3 | |
5349 | movprfx\t%0.<Vetype>, %1/m, %2.<Vetype>\;<sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, #%3 | |
5350 | #" | |
5351 | "&& reload_completed | |
5352 | && register_operand (operands[4], <MODE>mode) | |
5353 | && !rtx_equal_p (operands[0], operands[4])" | |
5354 | { | |
5355 | emit_insn (gen_vcond_mask_<mode><vpred> (operands[0], operands[2], | |
5356 | operands[4], operands[1])); | |
5357 | operands[4] = operands[2] = operands[0]; | |
5358 | } | |
5359 | [(set_attr "movprfx" "yes")] | |
5360 | ) | |
5361 | ||
915d28fe RS |
5362 | ;; ------------------------------------------------------------------------- |
5363 | ;; ---- [FP] Addition | |
5364 | ;; ------------------------------------------------------------------------- | |
5365 | ;; Includes: | |
5366 | ;; - FADD | |
5367 | ;; - FSUB | |
5368 | ;; ------------------------------------------------------------------------- | |
43cacb12 | 5369 | |
c9c5a809 | 5370 | ;; Predicated floating-point addition. |
624d0f07 | 5371 | (define_insn_and_split "@aarch64_pred_<optab><mode>" |
f75cdd2c RS |
5372 | [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w, w, w, w, ?&w, ?&w, ?&w") |
5373 | (unspec:SVE_FULL_F | |
624d0f07 RS |
5374 | [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl, Upl, Upl, Upl, Upl") |
5375 | (match_operand:SI 4 "aarch64_sve_gp_strictness" "i, i, Z, Ui1, i, i, Ui1") | |
f75cdd2c RS |
5376 | (match_operand:SVE_FULL_F 2 "register_operand" "%0, 0, w, 0, w, w, w") |
5377 | (match_operand:SVE_FULL_F 3 "aarch64_sve_float_arith_with_sub_operand" "vsA, vsN, w, w, vsA, vsN, w")] | |
624d0f07 | 5378 | SVE_COND_FP_ADD))] |
cee99fa0 | 5379 | "TARGET_SVE" |
915d28fe RS |
5380 | "@ |
5381 | fadd\t%0.<Vetype>, %1/m, %0.<Vetype>, #%3 | |
5382 | fsub\t%0.<Vetype>, %1/m, %0.<Vetype>, #%N3 | |
5e176a61 | 5383 | # |
624d0f07 | 5384 | fadd\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype> |
5e176a61 | 5385 | movprfx\t%0, %2\;fadd\t%0.<Vetype>, %1/m, %0.<Vetype>, #%3 |
624d0f07 RS |
5386 | movprfx\t%0, %2\;fsub\t%0.<Vetype>, %1/m, %0.<Vetype>, #%N3 |
5387 | movprfx\t%0, %2\;fadd\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>" | |
915d28fe RS |
5388 | ; Split the unpredicated form after reload, so that we don't have |
5389 | ; the unnecessary PTRUE. | |
5390 | "&& reload_completed | |
624d0f07 RS |
5391 | && register_operand (operands[3], <MODE>mode) |
5392 | && INTVAL (operands[4]) == SVE_RELAXED_GP" | |
f75cdd2c | 5393 | [(set (match_dup 0) (plus:SVE_FULL_F (match_dup 2) (match_dup 3)))] |
5e176a61 | 5394 | "" |
624d0f07 | 5395 | [(set_attr "movprfx" "*,*,*,*,yes,yes,yes")] |
cee99fa0 RS |
5396 | ) |
5397 | ||
a19ba9e1 RS |
5398 | ;; Predicated floating-point addition of a constant, merging with the |
5399 | ;; first input. | |
0eb5e901 | 5400 | (define_insn_and_rewrite "*cond_add<mode>_2_const_relaxed" |
f75cdd2c RS |
5401 | [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w, w, ?w, ?w") |
5402 | (unspec:SVE_FULL_F | |
a19ba9e1 | 5403 | [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl, Upl") |
f75cdd2c | 5404 | (unspec:SVE_FULL_F |
a19ba9e1 | 5405 | [(match_operand 4) |
0eb5e901 | 5406 | (const_int SVE_RELAXED_GP) |
f75cdd2c RS |
5407 | (match_operand:SVE_FULL_F 2 "register_operand" "0, 0, w, w") |
5408 | (match_operand:SVE_FULL_F 3 "aarch64_sve_float_arith_with_sub_immediate" "vsA, vsN, vsA, vsN")] | |
a19ba9e1 RS |
5409 | UNSPEC_COND_FADD) |
5410 | (match_dup 2)] | |
5411 | UNSPEC_SEL))] | |
0eb5e901 | 5412 | "TARGET_SVE" |
a19ba9e1 RS |
5413 | "@ |
5414 | fadd\t%0.<Vetype>, %1/m, %0.<Vetype>, #%3 | |
5415 | fsub\t%0.<Vetype>, %1/m, %0.<Vetype>, #%N3 | |
5416 | movprfx\t%0, %2\;fadd\t%0.<Vetype>, %1/m, %0.<Vetype>, #%3 | |
5417 | movprfx\t%0, %2\;fsub\t%0.<Vetype>, %1/m, %0.<Vetype>, #%N3" | |
5418 | "&& !rtx_equal_p (operands[1], operands[4])" | |
5419 | { | |
5420 | operands[4] = copy_rtx (operands[1]); | |
5421 | } | |
5422 | [(set_attr "movprfx" "*,*,yes,yes")] | |
5423 | ) | |
5424 | ||
0eb5e901 RS |
5425 | (define_insn "*cond_add<mode>_2_const_strict" |
5426 | [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w, w, ?w, ?w") | |
f75cdd2c | 5427 | (unspec:SVE_FULL_F |
0eb5e901 | 5428 | [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl, Upl") |
f75cdd2c | 5429 | (unspec:SVE_FULL_F |
0eb5e901 RS |
5430 | [(match_dup 1) |
5431 | (const_int SVE_STRICT_GP) | |
5432 | (match_operand:SVE_FULL_F 2 "register_operand" "0, 0, w, w") | |
5433 | (match_operand:SVE_FULL_F 3 "aarch64_sve_float_arith_with_sub_immediate" "vsA, vsN, vsA, vsN")] | |
5434 | UNSPEC_COND_FADD) | |
5435 | (match_dup 2)] | |
5436 | UNSPEC_SEL))] | |
5437 | "TARGET_SVE" | |
5438 | "@ | |
5439 | fadd\t%0.<Vetype>, %1/m, %0.<Vetype>, #%3 | |
5440 | fsub\t%0.<Vetype>, %1/m, %0.<Vetype>, #%N3 | |
5441 | movprfx\t%0, %2\;fadd\t%0.<Vetype>, %1/m, %0.<Vetype>, #%3 | |
5442 | movprfx\t%0, %2\;fsub\t%0.<Vetype>, %1/m, %0.<Vetype>, #%N3" | |
5443 | [(set_attr "movprfx" "*,*,yes,yes")] | |
5444 | ) | |
5445 | ||
5446 | ;; Predicated floating-point addition of a constant, merging with an | |
5447 | ;; independent value. | |
5448 | (define_insn_and_rewrite "*cond_add<mode>_any_const_relaxed" | |
5449 | [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w, w, w, w, ?w, ?w") | |
5450 | (unspec:SVE_FULL_F | |
5451 | [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl, Upl, Upl, Upl") | |
5452 | (unspec:SVE_FULL_F | |
5453 | [(match_operand 5) | |
5454 | (const_int SVE_RELAXED_GP) | |
5455 | (match_operand:SVE_FULL_F 2 "register_operand" "w, w, w, w, w, w") | |
f75cdd2c | 5456 | (match_operand:SVE_FULL_F 3 "aarch64_sve_float_arith_with_sub_immediate" "vsA, vsN, vsA, vsN, vsA, vsN")] |
a19ba9e1 | 5457 | UNSPEC_COND_FADD) |
f75cdd2c | 5458 | (match_operand:SVE_FULL_F 4 "aarch64_simd_reg_or_zero" "Dz, Dz, 0, 0, w, w")] |
a19ba9e1 | 5459 | UNSPEC_SEL))] |
0eb5e901 | 5460 | "TARGET_SVE && !rtx_equal_p (operands[2], operands[4])" |
a19ba9e1 RS |
5461 | "@ |
5462 | movprfx\t%0.<Vetype>, %1/z, %2.<Vetype>\;fadd\t%0.<Vetype>, %1/m, %0.<Vetype>, #%3 | |
5463 | movprfx\t%0.<Vetype>, %1/z, %2.<Vetype>\;fsub\t%0.<Vetype>, %1/m, %0.<Vetype>, #%N3 | |
5464 | movprfx\t%0.<Vetype>, %1/m, %2.<Vetype>\;fadd\t%0.<Vetype>, %1/m, %0.<Vetype>, #%3 | |
5465 | movprfx\t%0.<Vetype>, %1/m, %2.<Vetype>\;fsub\t%0.<Vetype>, %1/m, %0.<Vetype>, #%N3 | |
5466 | # | |
5467 | #" | |
5468 | "&& 1" | |
5469 | { | |
5470 | if (reload_completed | |
5471 | && register_operand (operands[4], <MODE>mode) | |
5472 | && !rtx_equal_p (operands[0], operands[4])) | |
5473 | { | |
5474 | emit_insn (gen_vcond_mask_<mode><vpred> (operands[0], operands[2], | |
5475 | operands[4], operands[1])); | |
5476 | operands[4] = operands[2] = operands[0]; | |
5477 | } | |
5478 | else if (!rtx_equal_p (operands[1], operands[5])) | |
5479 | operands[5] = copy_rtx (operands[1]); | |
5480 | else | |
5481 | FAIL; | |
5482 | } | |
5483 | [(set_attr "movprfx" "yes")] | |
5484 | ) | |
5485 | ||
0eb5e901 RS |
5486 | (define_insn_and_rewrite "*cond_add<mode>_any_const_strict" |
5487 | [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w, w, w, w, ?w, ?w") | |
5488 | (unspec:SVE_FULL_F | |
5489 | [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl, Upl, Upl, Upl") | |
5490 | (unspec:SVE_FULL_F | |
5491 | [(match_dup 1) | |
5492 | (const_int SVE_STRICT_GP) | |
5493 | (match_operand:SVE_FULL_F 2 "register_operand" "w, w, w, w, w, w") | |
5494 | (match_operand:SVE_FULL_F 3 "aarch64_sve_float_arith_with_sub_immediate" "vsA, vsN, vsA, vsN, vsA, vsN")] | |
5495 | UNSPEC_COND_FADD) | |
5496 | (match_operand:SVE_FULL_F 4 "aarch64_simd_reg_or_zero" "Dz, Dz, 0, 0, w, w")] | |
5497 | UNSPEC_SEL))] | |
5498 | "TARGET_SVE && !rtx_equal_p (operands[2], operands[4])" | |
5499 | "@ | |
5500 | movprfx\t%0.<Vetype>, %1/z, %2.<Vetype>\;fadd\t%0.<Vetype>, %1/m, %0.<Vetype>, #%3 | |
5501 | movprfx\t%0.<Vetype>, %1/z, %2.<Vetype>\;fsub\t%0.<Vetype>, %1/m, %0.<Vetype>, #%N3 | |
5502 | movprfx\t%0.<Vetype>, %1/m, %2.<Vetype>\;fadd\t%0.<Vetype>, %1/m, %0.<Vetype>, #%3 | |
5503 | movprfx\t%0.<Vetype>, %1/m, %2.<Vetype>\;fsub\t%0.<Vetype>, %1/m, %0.<Vetype>, #%N3 | |
5504 | # | |
5505 | #" | |
5506 | "&& reload_completed | |
5507 | && register_operand (operands[4], <MODE>mode) | |
5508 | && !rtx_equal_p (operands[0], operands[4])" | |
5509 | { | |
5510 | emit_insn (gen_vcond_mask_<mode><vpred> (operands[0], operands[2], | |
5511 | operands[4], operands[1])); | |
5512 | operands[4] = operands[2] = operands[0]; | |
5513 | } | |
5514 | [(set_attr "movprfx" "yes")] | |
5515 | ) | |
5516 | ||
a19ba9e1 | 5517 | ;; Register merging forms are handled through SVE_COND_FP_BINARY. |
cee99fa0 | 5518 | |
624d0f07 RS |
5519 | ;; ------------------------------------------------------------------------- |
5520 | ;; ---- [FP] Complex addition | |
5521 | ;; ------------------------------------------------------------------------- | |
5522 | ;; Includes: | |
5523 | ;; - FCADD | |
5524 | ;; ------------------------------------------------------------------------- | |
5525 | ||
5526 | ;; Predicated FCADD. | |
5527 | (define_insn "@aarch64_pred_<optab><mode>" | |
f75cdd2c RS |
5528 | [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w, ?&w") |
5529 | (unspec:SVE_FULL_F | |
624d0f07 RS |
5530 | [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl") |
5531 | (match_operand:SI 4 "aarch64_sve_gp_strictness") | |
f75cdd2c RS |
5532 | (match_operand:SVE_FULL_F 2 "register_operand" "0, w") |
5533 | (match_operand:SVE_FULL_F 3 "register_operand" "w, w")] | |
624d0f07 RS |
5534 | SVE_COND_FCADD))] |
5535 | "TARGET_SVE" | |
5536 | "@ | |
5537 | fcadd\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>, #<rot> | |
5538 | movprfx\t%0, %2\;fcadd\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>, #<rot>" | |
5539 | [(set_attr "movprfx" "*,yes")] | |
5540 | ) | |
5541 | ||
5542 | ;; Predicated FCADD with merging. | |
5543 | (define_expand "@cond_<optab><mode>" | |
f75cdd2c RS |
5544 | [(set (match_operand:SVE_FULL_F 0 "register_operand") |
5545 | (unspec:SVE_FULL_F | |
624d0f07 | 5546 | [(match_operand:<VPRED> 1 "register_operand") |
f75cdd2c | 5547 | (unspec:SVE_FULL_F |
624d0f07 RS |
5548 | [(match_dup 1) |
5549 | (const_int SVE_STRICT_GP) | |
f75cdd2c RS |
5550 | (match_operand:SVE_FULL_F 2 "register_operand") |
5551 | (match_operand:SVE_FULL_F 3 "register_operand")] | |
624d0f07 | 5552 | SVE_COND_FCADD) |
f75cdd2c | 5553 | (match_operand:SVE_FULL_F 4 "aarch64_simd_reg_or_zero")] |
624d0f07 RS |
5554 | UNSPEC_SEL))] |
5555 | "TARGET_SVE" | |
5556 | ) | |
5557 | ||
84747acf TC |
5558 | ;; Predicated FCADD using ptrue for unpredicated optab for auto-vectorizer |
5559 | (define_expand "@cadd<rot><mode>3" | |
5560 | [(set (match_operand:SVE_FULL_F 0 "register_operand") | |
5561 | (unspec:SVE_FULL_F | |
5562 | [(match_dup 3) | |
5563 | (const_int SVE_RELAXED_GP) | |
5564 | (match_operand:SVE_FULL_F 1 "register_operand") | |
5565 | (match_operand:SVE_FULL_F 2 "register_operand")] | |
5566 | SVE_COND_FCADD))] | |
5567 | "TARGET_SVE" | |
5568 | { | |
5569 | operands[3] = aarch64_ptrue_reg (<VPRED>mode); | |
5570 | }) | |
5571 | ||
624d0f07 | 5572 | ;; Predicated FCADD, merging with the first input. |
0eb5e901 | 5573 | (define_insn_and_rewrite "*cond_<optab><mode>_2_relaxed" |
f75cdd2c RS |
5574 | [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w, ?&w") |
5575 | (unspec:SVE_FULL_F | |
624d0f07 | 5576 | [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl") |
f75cdd2c | 5577 | (unspec:SVE_FULL_F |
624d0f07 | 5578 | [(match_operand 4) |
0eb5e901 | 5579 | (const_int SVE_RELAXED_GP) |
f75cdd2c RS |
5580 | (match_operand:SVE_FULL_F 2 "register_operand" "0, w") |
5581 | (match_operand:SVE_FULL_F 3 "register_operand" "w, w")] | |
624d0f07 RS |
5582 | SVE_COND_FCADD) |
5583 | (match_dup 2)] | |
5584 | UNSPEC_SEL))] | |
0eb5e901 | 5585 | "TARGET_SVE" |
624d0f07 RS |
5586 | "@ |
5587 | fcadd\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>, #<rot> | |
5588 | movprfx\t%0, %2\;fcadd\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>, #<rot>" | |
5589 | "&& !rtx_equal_p (operands[1], operands[4])" | |
5590 | { | |
5591 | operands[4] = copy_rtx (operands[1]); | |
5592 | } | |
5593 | [(set_attr "movprfx" "*,yes")] | |
5594 | ) | |
5595 | ||
0eb5e901 RS |
5596 | (define_insn "*cond_<optab><mode>_2_strict" |
5597 | [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w, ?&w") | |
5598 | (unspec:SVE_FULL_F | |
5599 | [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl") | |
5600 | (unspec:SVE_FULL_F | |
5601 | [(match_dup 1) | |
5602 | (const_int SVE_STRICT_GP) | |
5603 | (match_operand:SVE_FULL_F 2 "register_operand" "0, w") | |
5604 | (match_operand:SVE_FULL_F 3 "register_operand" "w, w")] | |
5605 | SVE_COND_FCADD) | |
5606 | (match_dup 2)] | |
5607 | UNSPEC_SEL))] | |
5608 | "TARGET_SVE" | |
5609 | "@ | |
5610 | fcadd\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>, #<rot> | |
5611 | movprfx\t%0, %2\;fcadd\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>, #<rot>" | |
5612 | [(set_attr "movprfx" "*,yes")] | |
5613 | ) | |
5614 | ||
624d0f07 | 5615 | ;; Predicated FCADD, merging with an independent value. |
0eb5e901 | 5616 | (define_insn_and_rewrite "*cond_<optab><mode>_any_relaxed" |
f75cdd2c RS |
5617 | [(set (match_operand:SVE_FULL_F 0 "register_operand" "=&w, &w, &w, ?&w") |
5618 | (unspec:SVE_FULL_F | |
624d0f07 | 5619 | [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl, Upl") |
f75cdd2c | 5620 | (unspec:SVE_FULL_F |
624d0f07 | 5621 | [(match_operand 5) |
0eb5e901 | 5622 | (const_int SVE_RELAXED_GP) |
f75cdd2c RS |
5623 | (match_operand:SVE_FULL_F 2 "register_operand" "w, 0, w, w") |
5624 | (match_operand:SVE_FULL_F 3 "register_operand" "w, w, w, w")] | |
624d0f07 | 5625 | SVE_COND_FCADD) |
f75cdd2c | 5626 | (match_operand:SVE_FULL_F 4 "aarch64_simd_reg_or_zero" "Dz, Dz, 0, w")] |
624d0f07 | 5627 | UNSPEC_SEL))] |
0eb5e901 | 5628 | "TARGET_SVE && !rtx_equal_p (operands[2], operands[4])" |
624d0f07 RS |
5629 | "@ |
5630 | movprfx\t%0.<Vetype>, %1/z, %2.<Vetype>\;fcadd\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>, #<rot> | |
5631 | movprfx\t%0.<Vetype>, %1/z, %0.<Vetype>\;fcadd\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>, #<rot> | |
5632 | movprfx\t%0.<Vetype>, %1/m, %2.<Vetype>\;fcadd\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>, #<rot> | |
5633 | #" | |
5634 | "&& 1" | |
5635 | { | |
5636 | if (reload_completed | |
5637 | && register_operand (operands[4], <MODE>mode) | |
5638 | && !rtx_equal_p (operands[0], operands[4])) | |
5639 | { | |
5640 | emit_insn (gen_vcond_mask_<mode><vpred> (operands[0], operands[2], | |
5641 | operands[4], operands[1])); | |
5642 | operands[4] = operands[2] = operands[0]; | |
5643 | } | |
5644 | else if (!rtx_equal_p (operands[1], operands[5])) | |
5645 | operands[5] = copy_rtx (operands[1]); | |
5646 | else | |
5647 | FAIL; | |
5648 | } | |
5649 | [(set_attr "movprfx" "yes")] | |
5650 | ) | |
5651 | ||
0eb5e901 RS |
5652 | (define_insn_and_rewrite "*cond_<optab><mode>_any_strict" |
5653 | [(set (match_operand:SVE_FULL_F 0 "register_operand" "=&w, &w, &w, ?&w") | |
5654 | (unspec:SVE_FULL_F | |
5655 | [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl, Upl") | |
5656 | (unspec:SVE_FULL_F | |
5657 | [(match_dup 1) | |
5658 | (const_int SVE_STRICT_GP) | |
5659 | (match_operand:SVE_FULL_F 2 "register_operand" "w, 0, w, w") | |
5660 | (match_operand:SVE_FULL_F 3 "register_operand" "w, w, w, w")] | |
5661 | SVE_COND_FCADD) | |
5662 | (match_operand:SVE_FULL_F 4 "aarch64_simd_reg_or_zero" "Dz, Dz, 0, w")] | |
5663 | UNSPEC_SEL))] | |
5664 | "TARGET_SVE && !rtx_equal_p (operands[2], operands[4])" | |
5665 | "@ | |
5666 | movprfx\t%0.<Vetype>, %1/z, %2.<Vetype>\;fcadd\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>, #<rot> | |
5667 | movprfx\t%0.<Vetype>, %1/z, %0.<Vetype>\;fcadd\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>, #<rot> | |
5668 | movprfx\t%0.<Vetype>, %1/m, %2.<Vetype>\;fcadd\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>, #<rot> | |
5669 | #" | |
5670 | "&& reload_completed | |
5671 | && register_operand (operands[4], <MODE>mode) | |
5672 | && !rtx_equal_p (operands[0], operands[4])" | |
5673 | { | |
5674 | emit_insn (gen_vcond_mask_<mode><vpred> (operands[0], operands[2], | |
5675 | operands[4], operands[1])); | |
5676 | operands[4] = operands[2] = operands[0]; | |
5677 | } | |
5678 | [(set_attr "movprfx" "yes")] | |
5679 | ) | |
5680 | ||
915d28fe RS |
5681 | ;; ------------------------------------------------------------------------- |
5682 | ;; ---- [FP] Subtraction | |
5683 | ;; ------------------------------------------------------------------------- | |
5684 | ;; Includes: | |
915d28fe RS |
5685 | ;; - FSUB |
5686 | ;; - FSUBR | |
5687 | ;; ------------------------------------------------------------------------- | |
cee99fa0 | 5688 | |
c9c5a809 | 5689 | ;; Predicated floating-point subtraction. |
624d0f07 | 5690 | (define_insn_and_split "@aarch64_pred_<optab><mode>" |
f75cdd2c RS |
5691 | [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w, w, w, w, ?&w, ?&w") |
5692 | (unspec:SVE_FULL_F | |
624d0f07 RS |
5693 | [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl, Upl, Upl, Upl") |
5694 | (match_operand:SI 4 "aarch64_sve_gp_strictness" "i, Z, Ui1, Ui1, i, Ui1") | |
f75cdd2c RS |
5695 | (match_operand:SVE_FULL_F 2 "aarch64_sve_float_arith_operand" "vsA, w, 0, w, vsA, w") |
5696 | (match_operand:SVE_FULL_F 3 "register_operand" "0, w, w, 0, w, w")] | |
624d0f07 | 5697 | SVE_COND_FP_SUB))] |
2ae21bd1 | 5698 | "TARGET_SVE" |
f22d7973 | 5699 | "@ |
915d28fe | 5700 | fsubr\t%0.<Vetype>, %1/m, %0.<Vetype>, #%2 |
2ae21bd1 | 5701 | # |
624d0f07 RS |
5702 | fsub\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype> |
5703 | fsubr\t%0.<Vetype>, %1/m, %0.<Vetype>, %2.<Vetype> | |
5704 | movprfx\t%0, %3\;fsubr\t%0.<Vetype>, %1/m, %0.<Vetype>, #%2 | |
5705 | movprfx\t%0, %2\;fsub\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>" | |
915d28fe RS |
5706 | ; Split the unpredicated form after reload, so that we don't have |
5707 | ; the unnecessary PTRUE. | |
5708 | "&& reload_completed | |
624d0f07 RS |
5709 | && register_operand (operands[2], <MODE>mode) |
5710 | && INTVAL (operands[4]) == SVE_RELAXED_GP" | |
f75cdd2c | 5711 | [(set (match_dup 0) (minus:SVE_FULL_F (match_dup 2) (match_dup 3)))] |
2ae21bd1 | 5712 | "" |
624d0f07 | 5713 | [(set_attr "movprfx" "*,*,*,*,yes,yes")] |
f22d7973 RS |
5714 | ) |
5715 | ||
a19ba9e1 RS |
5716 | ;; Predicated floating-point subtraction from a constant, merging with the |
5717 | ;; second input. | |
0eb5e901 | 5718 | (define_insn_and_rewrite "*cond_sub<mode>_3_const_relaxed" |
f75cdd2c RS |
5719 | [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w, ?w") |
5720 | (unspec:SVE_FULL_F | |
a19ba9e1 | 5721 | [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl") |
f75cdd2c | 5722 | (unspec:SVE_FULL_F |
a19ba9e1 | 5723 | [(match_operand 4) |
0eb5e901 | 5724 | (const_int SVE_RELAXED_GP) |
f75cdd2c RS |
5725 | (match_operand:SVE_FULL_F 2 "aarch64_sve_float_arith_immediate") |
5726 | (match_operand:SVE_FULL_F 3 "register_operand" "0, w")] | |
a19ba9e1 RS |
5727 | UNSPEC_COND_FSUB) |
5728 | (match_dup 3)] | |
5729 | UNSPEC_SEL))] | |
0eb5e901 | 5730 | "TARGET_SVE" |
a19ba9e1 RS |
5731 | "@ |
5732 | fsubr\t%0.<Vetype>, %1/m, %0.<Vetype>, #%2 | |
5733 | movprfx\t%0, %3\;fsubr\t%0.<Vetype>, %1/m, %0.<Vetype>, #%2" | |
5734 | "&& !rtx_equal_p (operands[1], operands[4])" | |
5735 | { | |
5736 | operands[4] = copy_rtx (operands[1]); | |
5737 | } | |
5738 | [(set_attr "movprfx" "*,yes")] | |
5739 | ) | |
5740 | ||
0eb5e901 RS |
5741 | (define_insn "*cond_sub<mode>_3_const_strict" |
5742 | [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w, ?w") | |
5743 | (unspec:SVE_FULL_F | |
5744 | [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl") | |
5745 | (unspec:SVE_FULL_F | |
5746 | [(match_dup 1) | |
5747 | (const_int SVE_STRICT_GP) | |
5748 | (match_operand:SVE_FULL_F 2 "aarch64_sve_float_arith_immediate") | |
5749 | (match_operand:SVE_FULL_F 3 "register_operand" "0, w")] | |
5750 | UNSPEC_COND_FSUB) | |
5751 | (match_dup 3)] | |
5752 | UNSPEC_SEL))] | |
5753 | "TARGET_SVE" | |
5754 | "@ | |
5755 | fsubr\t%0.<Vetype>, %1/m, %0.<Vetype>, #%2 | |
5756 | movprfx\t%0, %3\;fsubr\t%0.<Vetype>, %1/m, %0.<Vetype>, #%2" | |
5757 | [(set_attr "movprfx" "*,yes")] | |
5758 | ) | |
5759 | ||
a19ba9e1 RS |
5760 | ;; Predicated floating-point subtraction from a constant, merging with an |
5761 | ;; independent value. | |
0eb5e901 | 5762 | (define_insn_and_rewrite "*cond_sub<mode>_const_relaxed" |
f75cdd2c RS |
5763 | [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w, w, ?w") |
5764 | (unspec:SVE_FULL_F | |
a19ba9e1 | 5765 | [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl") |
f75cdd2c | 5766 | (unspec:SVE_FULL_F |
a19ba9e1 | 5767 | [(match_operand 5) |
b648814c | 5768 | (const_int SVE_RELAXED_GP) |
f75cdd2c RS |
5769 | (match_operand:SVE_FULL_F 2 "aarch64_sve_float_arith_immediate") |
5770 | (match_operand:SVE_FULL_F 3 "register_operand" "w, w, w")] | |
a19ba9e1 | 5771 | UNSPEC_COND_FSUB) |
f75cdd2c | 5772 | (match_operand:SVE_FULL_F 4 "aarch64_simd_reg_or_zero" "Dz, 0, w")] |
a19ba9e1 | 5773 | UNSPEC_SEL))] |
b648814c | 5774 | "TARGET_SVE && !rtx_equal_p (operands[3], operands[4])" |
a19ba9e1 RS |
5775 | "@ |
5776 | movprfx\t%0.<Vetype>, %1/z, %3.<Vetype>\;fsubr\t%0.<Vetype>, %1/m, %0.<Vetype>, #%2 | |
5777 | movprfx\t%0.<Vetype>, %1/m, %3.<Vetype>\;fsubr\t%0.<Vetype>, %1/m, %0.<Vetype>, #%2 | |
5778 | #" | |
5779 | "&& 1" | |
5780 | { | |
5781 | if (reload_completed | |
5782 | && register_operand (operands[4], <MODE>mode) | |
5783 | && !rtx_equal_p (operands[0], operands[4])) | |
5784 | { | |
5785 | emit_insn (gen_vcond_mask_<mode><vpred> (operands[0], operands[3], | |
5786 | operands[4], operands[1])); | |
5787 | operands[4] = operands[3] = operands[0]; | |
5788 | } | |
5789 | else if (!rtx_equal_p (operands[1], operands[5])) | |
5790 | operands[5] = copy_rtx (operands[1]); | |
5791 | else | |
5792 | FAIL; | |
5793 | } | |
5794 | [(set_attr "movprfx" "yes")] | |
5795 | ) | |
5796 | ||
0eb5e901 | 5797 | (define_insn_and_rewrite "*cond_sub<mode>_const_strict" |
b648814c PW |
5798 | [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w, w, ?w") |
5799 | (unspec:SVE_FULL_F | |
5800 | [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl") | |
5801 | (unspec:SVE_FULL_F | |
5802 | [(match_dup 1) | |
5803 | (const_int SVE_STRICT_GP) | |
5804 | (match_operand:SVE_FULL_F 2 "aarch64_sve_float_arith_immediate") | |
5805 | (match_operand:SVE_FULL_F 3 "register_operand" "w, w, w")] | |
5806 | UNSPEC_COND_FSUB) | |
5807 | (match_operand:SVE_FULL_F 4 "aarch64_simd_reg_or_zero" "Dz, 0, w")] | |
5808 | UNSPEC_SEL))] | |
5809 | "TARGET_SVE && !rtx_equal_p (operands[3], operands[4])" | |
5810 | "@ | |
5811 | movprfx\t%0.<Vetype>, %1/z, %3.<Vetype>\;fsubr\t%0.<Vetype>, %1/m, %0.<Vetype>, #%2 | |
5812 | movprfx\t%0.<Vetype>, %1/m, %3.<Vetype>\;fsubr\t%0.<Vetype>, %1/m, %0.<Vetype>, #%2 | |
5813 | #" | |
5814 | "&& reload_completed | |
5815 | && register_operand (operands[4], <MODE>mode) | |
5816 | && !rtx_equal_p (operands[0], operands[4])" | |
5817 | { | |
5818 | emit_insn (gen_vcond_mask_<mode><vpred> (operands[0], operands[3], | |
5819 | operands[4], operands[1])); | |
5820 | operands[4] = operands[3] = operands[0]; | |
5821 | } | |
5822 | [(set_attr "movprfx" "yes")] | |
5823 | ) | |
a19ba9e1 | 5824 | ;; Register merging forms are handled through SVE_COND_FP_BINARY. |
43cacb12 | 5825 | |
915d28fe RS |
5826 | ;; ------------------------------------------------------------------------- |
5827 | ;; ---- [FP] Absolute difference | |
5828 | ;; ------------------------------------------------------------------------- | |
5829 | ;; Includes: | |
5830 | ;; - FABD | |
5831 | ;; ------------------------------------------------------------------------- | |
5832 | ||
5833 | ;; Predicated floating-point absolute difference. | |
624d0f07 | 5834 | (define_expand "@aarch64_pred_abd<mode>" |
f75cdd2c RS |
5835 | [(set (match_operand:SVE_FULL_F 0 "register_operand") |
5836 | (unspec:SVE_FULL_F | |
624d0f07 RS |
5837 | [(match_operand:<VPRED> 1 "register_operand") |
5838 | (match_operand:SI 4 "aarch64_sve_gp_strictness") | |
f75cdd2c | 5839 | (unspec:SVE_FULL_F |
624d0f07 RS |
5840 | [(match_dup 1) |
5841 | (match_dup 4) | |
f75cdd2c RS |
5842 | (match_operand:SVE_FULL_F 2 "register_operand") |
5843 | (match_operand:SVE_FULL_F 3 "register_operand")] | |
624d0f07 RS |
5844 | UNSPEC_COND_FSUB)] |
5845 | UNSPEC_COND_FABS))] | |
5846 | "TARGET_SVE" | |
5847 | ) | |
5848 | ||
5849 | ;; Predicated floating-point absolute difference. | |
0eb5e901 | 5850 | (define_insn_and_rewrite "*aarch64_pred_abd<mode>_relaxed" |
f75cdd2c RS |
5851 | [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w, ?&w") |
5852 | (unspec:SVE_FULL_F | |
5e176a61 | 5853 | [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl") |
c9c5a809 | 5854 | (match_operand:SI 4 "aarch64_sve_gp_strictness") |
f75cdd2c | 5855 | (unspec:SVE_FULL_F |
c9c5a809 | 5856 | [(match_operand 5) |
0eb5e901 | 5857 | (const_int SVE_RELAXED_GP) |
f75cdd2c RS |
5858 | (match_operand:SVE_FULL_F 2 "register_operand" "%0, w") |
5859 | (match_operand:SVE_FULL_F 3 "register_operand" "w, w")] | |
6fe679cc RS |
5860 | UNSPEC_COND_FSUB)] |
5861 | UNSPEC_COND_FABS))] | |
0eb5e901 | 5862 | "TARGET_SVE" |
5e176a61 RS |
5863 | "@ |
5864 | fabd\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype> | |
5865 | movprfx\t%0, %2\;fabd\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>" | |
c9c5a809 RS |
5866 | "&& !rtx_equal_p (operands[1], operands[5])" |
5867 | { | |
5868 | operands[5] = copy_rtx (operands[1]); | |
5869 | } | |
5e176a61 | 5870 | [(set_attr "movprfx" "*,yes")] |
43cacb12 RS |
5871 | ) |
5872 | ||
0eb5e901 RS |
5873 | (define_insn "*aarch64_pred_abd<mode>_strict" |
5874 | [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w, ?&w") | |
5875 | (unspec:SVE_FULL_F | |
5876 | [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl") | |
5877 | (match_operand:SI 4 "aarch64_sve_gp_strictness") | |
5878 | (unspec:SVE_FULL_F | |
5879 | [(match_dup 1) | |
5880 | (const_int SVE_STRICT_GP) | |
5881 | (match_operand:SVE_FULL_F 2 "register_operand" "%0, w") | |
5882 | (match_operand:SVE_FULL_F 3 "register_operand" "w, w")] | |
5883 | UNSPEC_COND_FSUB)] | |
5884 | UNSPEC_COND_FABS))] | |
5885 | "TARGET_SVE" | |
5886 | "@ | |
5887 | fabd\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype> | |
5888 | movprfx\t%0, %2\;fabd\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>" | |
5889 | [(set_attr "movprfx" "*,yes")] | |
5890 | ) | |
5891 | ||
624d0f07 | 5892 | (define_expand "@aarch64_cond_abd<mode>" |
f75cdd2c RS |
5893 | [(set (match_operand:SVE_FULL_F 0 "register_operand") |
5894 | (unspec:SVE_FULL_F | |
624d0f07 | 5895 | [(match_operand:<VPRED> 1 "register_operand") |
f75cdd2c | 5896 | (unspec:SVE_FULL_F |
624d0f07 RS |
5897 | [(match_dup 1) |
5898 | (const_int SVE_STRICT_GP) | |
f75cdd2c | 5899 | (unspec:SVE_FULL_F |
624d0f07 RS |
5900 | [(match_dup 1) |
5901 | (const_int SVE_STRICT_GP) | |
f75cdd2c RS |
5902 | (match_operand:SVE_FULL_F 2 "register_operand") |
5903 | (match_operand:SVE_FULL_F 3 "register_operand")] | |
624d0f07 RS |
5904 | UNSPEC_COND_FSUB)] |
5905 | UNSPEC_COND_FABS) | |
f75cdd2c | 5906 | (match_operand:SVE_FULL_F 4 "aarch64_simd_reg_or_zero")] |
624d0f07 RS |
5907 | UNSPEC_SEL))] |
5908 | "TARGET_SVE" | |
5909 | { | |
5910 | if (rtx_equal_p (operands[3], operands[4])) | |
5911 | std::swap (operands[2], operands[3]); | |
5912 | }) | |
5913 | ||
5914 | ;; Predicated floating-point absolute difference, merging with the first | |
5915 | ;; input. | |
0eb5e901 | 5916 | (define_insn_and_rewrite "*aarch64_cond_abd<mode>_2_relaxed" |
f75cdd2c RS |
5917 | [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w, ?&w") |
5918 | (unspec:SVE_FULL_F | |
624d0f07 | 5919 | [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl") |
f75cdd2c | 5920 | (unspec:SVE_FULL_F |
bf30864e | 5921 | [(match_operand 4) |
0eb5e901 | 5922 | (const_int SVE_RELAXED_GP) |
f75cdd2c | 5923 | (unspec:SVE_FULL_F |
0eb5e901 RS |
5924 | [(match_operand 5) |
5925 | (const_int SVE_RELAXED_GP) | |
f75cdd2c RS |
5926 | (match_operand:SVE_FULL_F 2 "register_operand" "0, w") |
5927 | (match_operand:SVE_FULL_F 3 "register_operand" "w, w")] | |
bf30864e RS |
5928 | UNSPEC_COND_FSUB)] |
5929 | UNSPEC_COND_FABS) | |
5930 | (match_dup 2)] | |
5931 | UNSPEC_SEL))] | |
0eb5e901 | 5932 | "TARGET_SVE" |
bf30864e RS |
5933 | "@ |
5934 | fabd\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype> | |
5935 | movprfx\t%0, %2\;fabd\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>" | |
5936 | "&& (!rtx_equal_p (operands[1], operands[4]) | |
0eb5e901 | 5937 | || !rtx_equal_p (operands[1], operands[5]))" |
bf30864e RS |
5938 | { |
5939 | operands[4] = copy_rtx (operands[1]); | |
0eb5e901 | 5940 | operands[5] = copy_rtx (operands[1]); |
bf30864e RS |
5941 | } |
5942 | [(set_attr "movprfx" "*,yes")] | |
5943 | ) | |
5944 | ||
0eb5e901 RS |
5945 | (define_insn "*aarch64_cond_abd<mode>_2_strict" |
5946 | [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w, ?&w") | |
5947 | (unspec:SVE_FULL_F | |
5948 | [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl") | |
5949 | (unspec:SVE_FULL_F | |
5950 | [(match_dup 1) | |
5951 | (match_operand:SI 4 "aarch64_sve_gp_strictness") | |
5952 | (unspec:SVE_FULL_F | |
5953 | [(match_dup 1) | |
5954 | (match_operand:SI 5 "aarch64_sve_gp_strictness") | |
5955 | (match_operand:SVE_FULL_F 2 "register_operand" "0, w") | |
5956 | (match_operand:SVE_FULL_F 3 "register_operand" "w, w")] | |
5957 | UNSPEC_COND_FSUB)] | |
5958 | UNSPEC_COND_FABS) | |
5959 | (match_dup 2)] | |
5960 | UNSPEC_SEL))] | |
5961 | "TARGET_SVE" | |
5962 | "@ | |
5963 | fabd\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype> | |
5964 | movprfx\t%0, %2\;fabd\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>" | |
5965 | [(set_attr "movprfx" "*,yes")] | |
5966 | ) | |
5967 | ||
bf30864e RS |
5968 | ;; Predicated floating-point absolute difference, merging with the second |
5969 | ;; input. | |
0eb5e901 | 5970 | (define_insn_and_rewrite "*aarch64_cond_abd<mode>_3_relaxed" |
f75cdd2c RS |
5971 | [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w, ?&w") |
5972 | (unspec:SVE_FULL_F | |
bf30864e | 5973 | [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl") |
f75cdd2c | 5974 | (unspec:SVE_FULL_F |
bf30864e | 5975 | [(match_operand 4) |
0eb5e901 | 5976 | (const_int SVE_RELAXED_GP) |
f75cdd2c | 5977 | (unspec:SVE_FULL_F |
0eb5e901 RS |
5978 | [(match_operand 5) |
5979 | (const_int SVE_RELAXED_GP) | |
f75cdd2c RS |
5980 | (match_operand:SVE_FULL_F 2 "register_operand" "w, w") |
5981 | (match_operand:SVE_FULL_F 3 "register_operand" "0, w")] | |
bf30864e RS |
5982 | UNSPEC_COND_FSUB)] |
5983 | UNSPEC_COND_FABS) | |
5984 | (match_dup 3)] | |
5985 | UNSPEC_SEL))] | |
0eb5e901 | 5986 | "TARGET_SVE" |
bf30864e RS |
5987 | "@ |
5988 | fabd\t%0.<Vetype>, %1/m, %0.<Vetype>, %2.<Vetype> | |
5989 | movprfx\t%0, %3\;fabd\t%0.<Vetype>, %1/m, %0.<Vetype>, %2.<Vetype>" | |
5990 | "&& (!rtx_equal_p (operands[1], operands[4]) | |
0eb5e901 | 5991 | || !rtx_equal_p (operands[1], operands[5]))" |
bf30864e RS |
5992 | { |
5993 | operands[4] = copy_rtx (operands[1]); | |
0eb5e901 | 5994 | operands[5] = copy_rtx (operands[1]); |
bf30864e RS |
5995 | } |
5996 | [(set_attr "movprfx" "*,yes")] | |
5997 | ) | |
5998 | ||
0eb5e901 RS |
5999 | (define_insn "*aarch64_cond_abd<mode>_3_strict" |
6000 | [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w, ?&w") | |
6001 | (unspec:SVE_FULL_F | |
6002 | [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl") | |
6003 | (unspec:SVE_FULL_F | |
6004 | [(match_dup 1) | |
6005 | (match_operand:SI 4 "aarch64_sve_gp_strictness") | |
6006 | (unspec:SVE_FULL_F | |
6007 | [(match_dup 1) | |
6008 | (match_operand:SI 5 "aarch64_sve_gp_strictness") | |
6009 | (match_operand:SVE_FULL_F 2 "register_operand" "w, w") | |
6010 | (match_operand:SVE_FULL_F 3 "register_operand" "0, w")] | |
6011 | UNSPEC_COND_FSUB)] | |
6012 | UNSPEC_COND_FABS) | |
6013 | (match_dup 3)] | |
6014 | UNSPEC_SEL))] | |
6015 | "TARGET_SVE" | |
6016 | "@ | |
6017 | fabd\t%0.<Vetype>, %1/m, %0.<Vetype>, %2.<Vetype> | |
6018 | movprfx\t%0, %3\;fabd\t%0.<Vetype>, %1/m, %0.<Vetype>, %2.<Vetype>" | |
6019 | [(set_attr "movprfx" "*,yes")] | |
6020 | ) | |
6021 | ||
bf30864e RS |
6022 | ;; Predicated floating-point absolute difference, merging with an |
6023 | ;; independent value. | |
0eb5e901 | 6024 | (define_insn_and_rewrite "*aarch64_cond_abd<mode>_any_relaxed" |
f75cdd2c RS |
6025 | [(set (match_operand:SVE_FULL_F 0 "register_operand" "=&w, &w, &w, &w, ?&w") |
6026 | (unspec:SVE_FULL_F | |
bf30864e | 6027 | [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl, Upl, Upl") |
f75cdd2c | 6028 | (unspec:SVE_FULL_F |
bf30864e | 6029 | [(match_operand 5) |
0eb5e901 | 6030 | (const_int SVE_RELAXED_GP) |
f75cdd2c | 6031 | (unspec:SVE_FULL_F |
0eb5e901 RS |
6032 | [(match_operand 6) |
6033 | (const_int SVE_RELAXED_GP) | |
f75cdd2c RS |
6034 | (match_operand:SVE_FULL_F 2 "register_operand" "0, w, w, w, w") |
6035 | (match_operand:SVE_FULL_F 3 "register_operand" "w, 0, w, w, w")] | |
bf30864e RS |
6036 | UNSPEC_COND_FSUB)] |
6037 | UNSPEC_COND_FABS) | |
f75cdd2c | 6038 | (match_operand:SVE_FULL_F 4 "aarch64_simd_reg_or_zero" "Dz, Dz, Dz, 0, w")] |
bf30864e RS |
6039 | UNSPEC_SEL))] |
6040 | "TARGET_SVE | |
6041 | && !rtx_equal_p (operands[2], operands[4]) | |
0eb5e901 | 6042 | && !rtx_equal_p (operands[3], operands[4])" |
bf30864e RS |
6043 | "@ |
6044 | movprfx\t%0.<Vetype>, %1/z, %0.<Vetype>\;fabd\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype> | |
6045 | movprfx\t%0.<Vetype>, %1/z, %0.<Vetype>\;fabd\t%0.<Vetype>, %1/m, %0.<Vetype>, %2.<Vetype> | |
6046 | movprfx\t%0.<Vetype>, %1/z, %2.<Vetype>\;fabd\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype> | |
6047 | movprfx\t%0.<Vetype>, %1/m, %2.<Vetype>\;fabd\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype> | |
6048 | #" | |
6049 | "&& 1" | |
6050 | { | |
6051 | if (reload_completed | |
0eb5e901 RS |
6052 | && register_operand (operands[4], <MODE>mode) |
6053 | && !rtx_equal_p (operands[0], operands[4])) | |
bf30864e RS |
6054 | { |
6055 | emit_insn (gen_vcond_mask_<mode><vpred> (operands[0], operands[3], | |
6056 | operands[4], operands[1])); | |
6057 | operands[4] = operands[3] = operands[0]; | |
6058 | } | |
6059 | else if (!rtx_equal_p (operands[1], operands[5]) | |
0eb5e901 | 6060 | || !rtx_equal_p (operands[1], operands[6])) |
bf30864e RS |
6061 | { |
6062 | operands[5] = copy_rtx (operands[1]); | |
0eb5e901 | 6063 | operands[6] = copy_rtx (operands[1]); |
bf30864e RS |
6064 | } |
6065 | else | |
6066 | FAIL; | |
6067 | } | |
6068 | [(set_attr "movprfx" "yes")] | |
6069 | ) | |
6070 | ||
0eb5e901 RS |
6071 | (define_insn_and_rewrite "*aarch64_cond_abd<mode>_any_strict" |
6072 | [(set (match_operand:SVE_FULL_F 0 "register_operand" "=&w, &w, &w, &w, ?&w") | |
6073 | (unspec:SVE_FULL_F | |
6074 | [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl, Upl, Upl") | |
6075 | (unspec:SVE_FULL_F | |
6076 | [(match_dup 1) | |
6077 | (match_operand:SI 5 "aarch64_sve_gp_strictness") | |
6078 | (unspec:SVE_FULL_F | |
6079 | [(match_dup 1) | |
6080 | (match_operand:SI 6 "aarch64_sve_gp_strictness") | |
6081 | (match_operand:SVE_FULL_F 2 "register_operand" "0, w, w, w, w") | |
6082 | (match_operand:SVE_FULL_F 3 "register_operand" "w, 0, w, w, w")] | |
6083 | UNSPEC_COND_FSUB)] | |
6084 | UNSPEC_COND_FABS) | |
6085 | (match_operand:SVE_FULL_F 4 "aarch64_simd_reg_or_zero" "Dz, Dz, Dz, 0, w")] | |
6086 | UNSPEC_SEL))] | |
6087 | "TARGET_SVE | |
6088 | && !rtx_equal_p (operands[2], operands[4]) | |
6089 | && !rtx_equal_p (operands[3], operands[4])" | |
6090 | "@ | |
6091 | movprfx\t%0.<Vetype>, %1/z, %0.<Vetype>\;fabd\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype> | |
6092 | movprfx\t%0.<Vetype>, %1/z, %0.<Vetype>\;fabd\t%0.<Vetype>, %1/m, %0.<Vetype>, %2.<Vetype> | |
6093 | movprfx\t%0.<Vetype>, %1/z, %2.<Vetype>\;fabd\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype> | |
6094 | movprfx\t%0.<Vetype>, %1/m, %2.<Vetype>\;fabd\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype> | |
6095 | #" | |
6096 | "&& reload_completed | |
6097 | && register_operand (operands[4], <MODE>mode) | |
6098 | && !rtx_equal_p (operands[0], operands[4])" | |
6099 | { | |
6100 | emit_insn (gen_vcond_mask_<mode><vpred> (operands[0], operands[3], | |
6101 | operands[4], operands[1])); | |
6102 | operands[4] = operands[3] = operands[0]; | |
6103 | } | |
6104 | [(set_attr "movprfx" "yes")] | |
6105 | ) | |
6106 | ||
915d28fe RS |
6107 | ;; ------------------------------------------------------------------------- |
6108 | ;; ---- [FP] Multiplication | |
6109 | ;; ------------------------------------------------------------------------- | |
6110 | ;; Includes: | |
6111 | ;; - FMUL | |
6112 | ;; ------------------------------------------------------------------------- | |
6113 | ||
c9c5a809 | 6114 | ;; Predicated floating-point multiplication. |
624d0f07 | 6115 | (define_insn_and_split "@aarch64_pred_<optab><mode>" |
f75cdd2c RS |
6116 | [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w, w, w, ?&w, ?&w") |
6117 | (unspec:SVE_FULL_F | |
624d0f07 RS |
6118 | [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl, Upl, Upl") |
6119 | (match_operand:SI 4 "aarch64_sve_gp_strictness" "i, Z, Ui1, i, Ui1") | |
f75cdd2c RS |
6120 | (match_operand:SVE_FULL_F 2 "register_operand" "%0, w, 0, w, w") |
6121 | (match_operand:SVE_FULL_F 3 "aarch64_sve_float_mul_operand" "vsM, w, w, vsM, w")] | |
624d0f07 | 6122 | SVE_COND_FP_MUL))] |
43cacb12 | 6123 | "TARGET_SVE" |
915d28fe RS |
6124 | "@ |
6125 | fmul\t%0.<Vetype>, %1/m, %0.<Vetype>, #%3 | |
5e176a61 | 6126 | # |
624d0f07 RS |
6127 | fmul\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype> |
6128 | movprfx\t%0, %2\;fmul\t%0.<Vetype>, %1/m, %0.<Vetype>, #%3 | |
6129 | movprfx\t%0, %2\;fmul\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>" | |
915d28fe RS |
6130 | ; Split the unpredicated form after reload, so that we don't have |
6131 | ; the unnecessary PTRUE. | |
6132 | "&& reload_completed | |
624d0f07 RS |
6133 | && register_operand (operands[3], <MODE>mode) |
6134 | && INTVAL (operands[4]) == SVE_RELAXED_GP" | |
f75cdd2c | 6135 | [(set (match_dup 0) (mult:SVE_FULL_F (match_dup 2) (match_dup 3)))] |
5e176a61 | 6136 | "" |
624d0f07 | 6137 | [(set_attr "movprfx" "*,*,*,yes,yes")] |
43cacb12 RS |
6138 | ) |
6139 | ||
a19ba9e1 RS |
6140 | ;; Merging forms are handled through SVE_COND_FP_BINARY and |
6141 | ;; SVE_COND_FP_BINARY_I1. | |
915d28fe | 6142 | |
624d0f07 RS |
6143 | ;; Unpredicated multiplication by selected lanes. |
6144 | (define_insn "@aarch64_mul_lane_<mode>" | |
f75cdd2c RS |
6145 | [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w") |
6146 | (mult:SVE_FULL_F | |
6147 | (unspec:SVE_FULL_F | |
6148 | [(match_operand:SVE_FULL_F 2 "register_operand" "<sve_lane_con>") | |
624d0f07 RS |
6149 | (match_operand:SI 3 "const_int_operand")] |
6150 | UNSPEC_SVE_LANE_SELECT) | |
f75cdd2c | 6151 | (match_operand:SVE_FULL_F 1 "register_operand" "w")))] |
624d0f07 RS |
6152 | "TARGET_SVE" |
6153 | "fmul\t%0.<Vetype>, %1.<Vetype>, %2.<Vetype>[%3]" | |
6154 | ) | |
6155 | ||
04f307cb RS |
6156 | ;; ------------------------------------------------------------------------- |
6157 | ;; ---- [FP] Division | |
6158 | ;; ------------------------------------------------------------------------- | |
6159 | ;; The patterns in this section are synthetic. | |
6160 | ;; ------------------------------------------------------------------------- | |
6161 | ||
6162 | (define_expand "div<mode>3" | |
6163 | [(set (match_operand:SVE_FULL_F 0 "register_operand") | |
6164 | (unspec:SVE_FULL_F | |
6165 | [(match_dup 3) | |
6166 | (const_int SVE_RELAXED_GP) | |
6167 | (match_operand:SVE_FULL_F 1 "nonmemory_operand") | |
6168 | (match_operand:SVE_FULL_F 2 "register_operand")] | |
6169 | UNSPEC_COND_FDIV))] | |
6170 | "TARGET_SVE" | |
6171 | { | |
6172 | if (aarch64_emit_approx_div (operands[0], operands[1], operands[2])) | |
6173 | DONE; | |
6174 | ||
6175 | operands[1] = force_reg (<MODE>mode, operands[1]); | |
6176 | operands[3] = aarch64_ptrue_reg (<VPRED>mode); | |
6177 | } | |
6178 | ) | |
6179 | ||
6180 | (define_expand "@aarch64_frecpe<mode>" | |
6181 | [(set (match_operand:SVE_FULL_F 0 "register_operand") | |
6182 | (unspec:SVE_FULL_F | |
6183 | [(match_operand:SVE_FULL_F 1 "register_operand")] | |
6184 | UNSPEC_FRECPE))] | |
6185 | "TARGET_SVE" | |
6186 | ) | |
6187 | ||
6188 | (define_expand "@aarch64_frecps<mode>" | |
6189 | [(set (match_operand:SVE_FULL_F 0 "register_operand") | |
6190 | (unspec:SVE_FULL_F | |
6191 | [(match_operand:SVE_FULL_F 1 "register_operand") | |
6192 | (match_operand:SVE_FULL_F 2 "register_operand")] | |
6193 | UNSPEC_FRECPS))] | |
6194 | "TARGET_SVE" | |
6195 | ) | |
6196 | ||
915d28fe RS |
6197 | ;; ------------------------------------------------------------------------- |
6198 | ;; ---- [FP] Binary logical operations | |
6199 | ;; ------------------------------------------------------------------------- | |
6200 | ;; Includes | |
6201 | ;; - AND | |
6202 | ;; - EOR | |
6203 | ;; - ORR | |
6204 | ;; ------------------------------------------------------------------------- | |
6205 | ||
6206 | ;; Binary logical operations on floating-point modes. We avoid subregs | |
6207 | ;; by providing this, but we need to use UNSPECs since rtx logical ops | |
6208 | ;; aren't defined for floating-point modes. | |
6209 | (define_insn "*<optab><mode>3" | |
f75cdd2c RS |
6210 | [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w") |
6211 | (unspec:SVE_FULL_F | |
6212 | [(match_operand:SVE_FULL_F 1 "register_operand" "w") | |
6213 | (match_operand:SVE_FULL_F 2 "register_operand" "w")] | |
6214 | LOGICALF))] | |
43cacb12 | 6215 | "TARGET_SVE" |
915d28fe | 6216 | "<logicalf_op>\t%0.d, %1.d, %2.d" |
43cacb12 RS |
6217 | ) |
6218 | ||
915d28fe RS |
6219 | ;; ------------------------------------------------------------------------- |
6220 | ;; ---- [FP] Sign copying | |
6221 | ;; ------------------------------------------------------------------------- | |
6222 | ;; The patterns in this section are synthetic. | |
6223 | ;; ------------------------------------------------------------------------- | |
6224 | ||
6225 | (define_expand "copysign<mode>3" | |
f75cdd2c RS |
6226 | [(match_operand:SVE_FULL_F 0 "register_operand") |
6227 | (match_operand:SVE_FULL_F 1 "register_operand") | |
6228 | (match_operand:SVE_FULL_F 2 "register_operand")] | |
43cacb12 RS |
6229 | "TARGET_SVE" |
6230 | { | |
915d28fe RS |
6231 | rtx sign = gen_reg_rtx (<V_INT_EQUIV>mode); |
6232 | rtx mant = gen_reg_rtx (<V_INT_EQUIV>mode); | |
6233 | rtx int_res = gen_reg_rtx (<V_INT_EQUIV>mode); | |
6234 | int bits = GET_MODE_UNIT_BITSIZE (<MODE>mode) - 1; | |
43cacb12 | 6235 | |
915d28fe RS |
6236 | rtx arg1 = lowpart_subreg (<V_INT_EQUIV>mode, operands[1], <MODE>mode); |
6237 | rtx arg2 = lowpart_subreg (<V_INT_EQUIV>mode, operands[2], <MODE>mode); | |
6238 | ||
6239 | emit_insn (gen_and<v_int_equiv>3 | |
6240 | (sign, arg2, | |
6241 | aarch64_simd_gen_const_vector_dup (<V_INT_EQUIV>mode, | |
6242 | HOST_WIDE_INT_M1U | |
6243 | << bits))); | |
6244 | emit_insn (gen_and<v_int_equiv>3 | |
6245 | (mant, arg1, | |
6246 | aarch64_simd_gen_const_vector_dup (<V_INT_EQUIV>mode, | |
6247 | ~(HOST_WIDE_INT_M1U | |
6248 | << bits)))); | |
6249 | emit_insn (gen_ior<v_int_equiv>3 (int_res, sign, mant)); | |
6250 | emit_move_insn (operands[0], gen_lowpart (<MODE>mode, int_res)); | |
6251 | DONE; | |
43cacb12 RS |
6252 | } |
6253 | ) | |
6254 | ||
915d28fe | 6255 | (define_expand "xorsign<mode>3" |
f75cdd2c RS |
6256 | [(match_operand:SVE_FULL_F 0 "register_operand") |
6257 | (match_operand:SVE_FULL_F 1 "register_operand") | |
6258 | (match_operand:SVE_FULL_F 2 "register_operand")] | |
43cacb12 RS |
6259 | "TARGET_SVE" |
6260 | { | |
915d28fe RS |
6261 | rtx sign = gen_reg_rtx (<V_INT_EQUIV>mode); |
6262 | rtx int_res = gen_reg_rtx (<V_INT_EQUIV>mode); | |
6263 | int bits = GET_MODE_UNIT_BITSIZE (<MODE>mode) - 1; | |
6264 | ||
6265 | rtx arg1 = lowpart_subreg (<V_INT_EQUIV>mode, operands[1], <MODE>mode); | |
6266 | rtx arg2 = lowpart_subreg (<V_INT_EQUIV>mode, operands[2], <MODE>mode); | |
6267 | ||
6268 | emit_insn (gen_and<v_int_equiv>3 | |
6269 | (sign, arg2, | |
6270 | aarch64_simd_gen_const_vector_dup (<V_INT_EQUIV>mode, | |
6271 | HOST_WIDE_INT_M1U | |
6272 | << bits))); | |
6273 | emit_insn (gen_xor<v_int_equiv>3 (int_res, arg1, sign)); | |
6274 | emit_move_insn (operands[0], gen_lowpart (<MODE>mode, int_res)); | |
6275 | DONE; | |
43cacb12 RS |
6276 | } |
6277 | ) | |
6278 | ||
915d28fe RS |
6279 | ;; ------------------------------------------------------------------------- |
6280 | ;; ---- [FP] Maximum and minimum | |
6281 | ;; ------------------------------------------------------------------------- | |
6282 | ;; Includes: | |
624d0f07 | 6283 | ;; - FMAX |
915d28fe | 6284 | ;; - FMAXNM |
624d0f07 | 6285 | ;; - FMIN |
915d28fe RS |
6286 | ;; - FMINNM |
6287 | ;; ------------------------------------------------------------------------- | |
43cacb12 | 6288 | |
0254ed79 | 6289 | ;; Unpredicated fmax/fmin (the libm functions). The optabs for the |
70613000 | 6290 | ;; smax/smin rtx codes are handled in the generic section above. |
6d331688 | 6291 | (define_expand "<fmaxmin><mode>3" |
f75cdd2c RS |
6292 | [(set (match_operand:SVE_FULL_F 0 "register_operand") |
6293 | (unspec:SVE_FULL_F | |
43cacb12 | 6294 | [(match_dup 3) |
c9c5a809 | 6295 | (const_int SVE_RELAXED_GP) |
f75cdd2c RS |
6296 | (match_operand:SVE_FULL_F 1 "register_operand") |
6297 | (match_operand:SVE_FULL_F 2 "aarch64_sve_float_maxmin_operand")] | |
214c42fa | 6298 | SVE_COND_FP_MAXMIN_PUBLIC))] |
43cacb12 RS |
6299 | "TARGET_SVE" |
6300 | { | |
16de3637 | 6301 | operands[3] = aarch64_ptrue_reg (<VPRED>mode); |
43cacb12 RS |
6302 | } |
6303 | ) | |
6304 | ||
70613000 RS |
6305 | ;; Predicated fmax/fmin (the libm functions). The optabs for the |
6306 | ;; smax/smin rtx codes are handled in the generic section above. | |
6307 | (define_expand "cond_<fmaxmin><mode>" | |
6308 | [(set (match_operand:SVE_FULL_F 0 "register_operand") | |
6309 | (unspec:SVE_FULL_F | |
6310 | [(match_operand:<VPRED> 1 "register_operand") | |
6311 | (unspec:SVE_FULL_F | |
6312 | [(match_dup 1) | |
6313 | (const_int SVE_RELAXED_GP) | |
6314 | (match_operand:SVE_FULL_F 2 "register_operand") | |
6315 | (match_operand:SVE_FULL_F 3 "aarch64_sve_float_maxmin_operand")] | |
6316 | SVE_COND_FP_MAXMIN_PUBLIC) | |
6317 | (match_operand:SVE_FULL_F 4 "aarch64_simd_reg_or_zero")] | |
6318 | UNSPEC_SEL))] | |
6319 | "TARGET_SVE" | |
6320 | ) | |
6321 | ||
214c42fa | 6322 | ;; Predicated floating-point maximum/minimum. |
624d0f07 | 6323 | (define_insn "@aarch64_pred_<optab><mode>" |
f75cdd2c RS |
6324 | [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w, w, ?&w, ?&w") |
6325 | (unspec:SVE_FULL_F | |
75079ddf | 6326 | [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl, Upl") |
c9c5a809 | 6327 | (match_operand:SI 4 "aarch64_sve_gp_strictness") |
f75cdd2c RS |
6328 | (match_operand:SVE_FULL_F 2 "register_operand" "%0, 0, w, w") |
6329 | (match_operand:SVE_FULL_F 3 "aarch64_sve_float_maxmin_operand" "vsB, w, vsB, w")] | |
624d0f07 | 6330 | SVE_COND_FP_MAXMIN))] |
43cacb12 | 6331 | "TARGET_SVE" |
a08acce8 | 6332 | "@ |
75079ddf | 6333 | <sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, #%3 |
214c42fa | 6334 | <sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype> |
75079ddf | 6335 | movprfx\t%0, %2\;<sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, #%3 |
214c42fa | 6336 | movprfx\t%0, %2\;<sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>" |
75079ddf | 6337 | [(set_attr "movprfx" "*,*,yes,yes")] |
43cacb12 RS |
6338 | ) |
6339 | ||
a19ba9e1 RS |
6340 | ;; Merging forms are handled through SVE_COND_FP_BINARY and |
6341 | ;; SVE_COND_FP_BINARY_I1. | |
915d28fe RS |
6342 | |
6343 | ;; ------------------------------------------------------------------------- | |
6344 | ;; ---- [PRED] Binary logical operations | |
6345 | ;; ------------------------------------------------------------------------- | |
6346 | ;; Includes: | |
6347 | ;; - AND | |
6348 | ;; - ANDS | |
6349 | ;; - EOR | |
6350 | ;; - EORS | |
6351 | ;; - ORR | |
6352 | ;; - ORRS | |
6353 | ;; ------------------------------------------------------------------------- | |
6354 | ||
6355 | ;; Predicate AND. We can reuse one of the inputs as the GP. | |
2d2388f8 RS |
6356 | ;; Doubling the second operand is the preferred implementation |
6357 | ;; of the MOV alias, so we use that instead of %1/z, %1, %2. | |
915d28fe RS |
6358 | (define_insn "and<mode>3" |
6359 | [(set (match_operand:PRED_ALL 0 "register_operand" "=Upa") | |
6360 | (and:PRED_ALL (match_operand:PRED_ALL 1 "register_operand" "Upa") | |
6361 | (match_operand:PRED_ALL 2 "register_operand" "Upa")))] | |
9d4ac06e | 6362 | "TARGET_SVE" |
2d2388f8 | 6363 | "and\t%0.b, %1/z, %2.b, %2.b" |
a08acce8 | 6364 | ) |
9d4ac06e | 6365 | |
915d28fe RS |
6366 | ;; Unpredicated predicate EOR and ORR. |
6367 | (define_expand "<optab><mode>3" | |
6368 | [(set (match_operand:PRED_ALL 0 "register_operand") | |
6369 | (and:PRED_ALL | |
6370 | (LOGICAL_OR:PRED_ALL | |
6371 | (match_operand:PRED_ALL 1 "register_operand") | |
6372 | (match_operand:PRED_ALL 2 "register_operand")) | |
6373 | (match_dup 3)))] | |
6c4fd4a9 | 6374 | "TARGET_SVE" |
915d28fe RS |
6375 | { |
6376 | operands[3] = aarch64_ptrue_reg (<MODE>mode); | |
6377 | } | |
a08acce8 | 6378 | ) |
6c4fd4a9 | 6379 | |
915d28fe | 6380 | ;; Predicated predicate AND, EOR and ORR. |
34467289 | 6381 | (define_insn "@aarch64_pred_<optab><mode>_z" |
915d28fe RS |
6382 | [(set (match_operand:PRED_ALL 0 "register_operand" "=Upa") |
6383 | (and:PRED_ALL | |
6384 | (LOGICAL:PRED_ALL | |
6385 | (match_operand:PRED_ALL 2 "register_operand" "Upa") | |
6386 | (match_operand:PRED_ALL 3 "register_operand" "Upa")) | |
6387 | (match_operand:PRED_ALL 1 "register_operand" "Upa")))] | |
6388 | "TARGET_SVE" | |
6389 | "<logical>\t%0.b, %1/z, %2.b, %3.b" | |
6390 | ) | |
6391 | ||
6392 | ;; Perform a logical operation on operands 2 and 3, using operand 1 as | |
34467289 RS |
6393 | ;; the GP. Store the result in operand 0 and set the flags in the same |
6394 | ;; way as for PTEST. | |
915d28fe RS |
6395 | (define_insn "*<optab><mode>3_cc" |
6396 | [(set (reg:CC_NZC CC_REGNUM) | |
6397 | (unspec:CC_NZC | |
34467289 RS |
6398 | [(match_operand:VNx16BI 1 "register_operand" "Upa") |
6399 | (match_operand 4) | |
6400 | (match_operand:SI 5 "aarch64_sve_ptrue_flag") | |
915d28fe RS |
6401 | (and:PRED_ALL |
6402 | (LOGICAL:PRED_ALL | |
6403 | (match_operand:PRED_ALL 2 "register_operand" "Upa") | |
6404 | (match_operand:PRED_ALL 3 "register_operand" "Upa")) | |
34467289 RS |
6405 | (match_dup 4))] |
6406 | UNSPEC_PTEST)) | |
915d28fe RS |
6407 | (set (match_operand:PRED_ALL 0 "register_operand" "=Upa") |
6408 | (and:PRED_ALL (LOGICAL:PRED_ALL (match_dup 2) (match_dup 3)) | |
34467289 | 6409 | (match_dup 4)))] |
915d28fe RS |
6410 | "TARGET_SVE" |
6411 | "<logical>s\t%0.b, %1/z, %2.b, %3.b" | |
6412 | ) | |
6413 | ||
624d0f07 RS |
6414 | ;; Same with just the flags result. |
6415 | (define_insn "*<optab><mode>3_ptest" | |
6416 | [(set (reg:CC_NZC CC_REGNUM) | |
6417 | (unspec:CC_NZC | |
6418 | [(match_operand:VNx16BI 1 "register_operand" "Upa") | |
6419 | (match_operand 4) | |
6420 | (match_operand:SI 5 "aarch64_sve_ptrue_flag") | |
6421 | (and:PRED_ALL | |
6422 | (LOGICAL:PRED_ALL | |
6423 | (match_operand:PRED_ALL 2 "register_operand" "Upa") | |
6424 | (match_operand:PRED_ALL 3 "register_operand" "Upa")) | |
6425 | (match_dup 4))] | |
6426 | UNSPEC_PTEST)) | |
6427 | (clobber (match_scratch:VNx16BI 0 "=Upa"))] | |
6428 | "TARGET_SVE" | |
6429 | "<logical>s\t%0.b, %1/z, %2.b, %3.b" | |
6430 | ) | |
6431 | ||
915d28fe RS |
6432 | ;; ------------------------------------------------------------------------- |
6433 | ;; ---- [PRED] Binary logical operations (inverted second input) | |
6434 | ;; ------------------------------------------------------------------------- | |
6435 | ;; Includes: | |
6436 | ;; - BIC | |
6437 | ;; - ORN | |
6438 | ;; ------------------------------------------------------------------------- | |
6439 | ||
6440 | ;; Predicated predicate BIC and ORN. | |
624d0f07 | 6441 | (define_insn "aarch64_pred_<nlogical><mode>_z" |
915d28fe RS |
6442 | [(set (match_operand:PRED_ALL 0 "register_operand" "=Upa") |
6443 | (and:PRED_ALL | |
6444 | (NLOGICAL:PRED_ALL | |
35d6c591 RS |
6445 | (not:PRED_ALL (match_operand:PRED_ALL 3 "register_operand" "Upa")) |
6446 | (match_operand:PRED_ALL 2 "register_operand" "Upa")) | |
915d28fe RS |
6447 | (match_operand:PRED_ALL 1 "register_operand" "Upa")))] |
6448 | "TARGET_SVE" | |
35d6c591 | 6449 | "<nlogical>\t%0.b, %1/z, %2.b, %3.b" |
915d28fe RS |
6450 | ) |
6451 | ||
624d0f07 RS |
6452 | ;; Same, but set the flags as a side-effect. |
6453 | (define_insn "*<nlogical><mode>3_cc" | |
6454 | [(set (reg:CC_NZC CC_REGNUM) | |
6455 | (unspec:CC_NZC | |
6456 | [(match_operand:VNx16BI 1 "register_operand" "Upa") | |
6457 | (match_operand 4) | |
6458 | (match_operand:SI 5 "aarch64_sve_ptrue_flag") | |
6459 | (and:PRED_ALL | |
6460 | (NLOGICAL:PRED_ALL | |
6461 | (not:PRED_ALL | |
6462 | (match_operand:PRED_ALL 3 "register_operand" "Upa")) | |
6463 | (match_operand:PRED_ALL 2 "register_operand" "Upa")) | |
6464 | (match_dup 4))] | |
6465 | UNSPEC_PTEST)) | |
6466 | (set (match_operand:PRED_ALL 0 "register_operand" "=Upa") | |
6467 | (and:PRED_ALL (NLOGICAL:PRED_ALL | |
6468 | (not:PRED_ALL (match_dup 3)) | |
6469 | (match_dup 2)) | |
6470 | (match_dup 4)))] | |
6471 | "TARGET_SVE" | |
6472 | "<nlogical>s\t%0.b, %1/z, %2.b, %3.b" | |
6473 | ) | |
6474 | ||
6475 | ;; Same with just the flags result. | |
6476 | (define_insn "*<nlogical><mode>3_ptest" | |
6477 | [(set (reg:CC_NZC CC_REGNUM) | |
6478 | (unspec:CC_NZC | |
6479 | [(match_operand:VNx16BI 1 "register_operand" "Upa") | |
6480 | (match_operand 4) | |
6481 | (match_operand:SI 5 "aarch64_sve_ptrue_flag") | |
6482 | (and:PRED_ALL | |
6483 | (NLOGICAL:PRED_ALL | |
6484 | (not:PRED_ALL | |
6485 | (match_operand:PRED_ALL 3 "register_operand" "Upa")) | |
6486 | (match_operand:PRED_ALL 2 "register_operand" "Upa")) | |
6487 | (match_dup 4))] | |
6488 | UNSPEC_PTEST)) | |
6489 | (clobber (match_scratch:VNx16BI 0 "=Upa"))] | |
6490 | "TARGET_SVE" | |
6491 | "<nlogical>s\t%0.b, %1/z, %2.b, %3.b" | |
6492 | ) | |
6493 | ||
915d28fe RS |
6494 | ;; ------------------------------------------------------------------------- |
6495 | ;; ---- [PRED] Binary logical operations (inverted result) | |
6496 | ;; ------------------------------------------------------------------------- | |
6497 | ;; Includes: | |
6498 | ;; - NAND | |
6499 | ;; - NOR | |
6500 | ;; ------------------------------------------------------------------------- | |
6501 | ||
6502 | ;; Predicated predicate NAND and NOR. | |
624d0f07 | 6503 | (define_insn "aarch64_pred_<logical_nn><mode>_z" |
915d28fe RS |
6504 | [(set (match_operand:PRED_ALL 0 "register_operand" "=Upa") |
6505 | (and:PRED_ALL | |
6506 | (NLOGICAL:PRED_ALL | |
6507 | (not:PRED_ALL (match_operand:PRED_ALL 2 "register_operand" "Upa")) | |
6508 | (not:PRED_ALL (match_operand:PRED_ALL 3 "register_operand" "Upa"))) | |
6509 | (match_operand:PRED_ALL 1 "register_operand" "Upa")))] | |
6510 | "TARGET_SVE" | |
6511 | "<logical_nn>\t%0.b, %1/z, %2.b, %3.b" | |
6512 | ) | |
6513 | ||
624d0f07 RS |
6514 | ;; Same, but set the flags as a side-effect. |
6515 | (define_insn "*<logical_nn><mode>3_cc" | |
6516 | [(set (reg:CC_NZC CC_REGNUM) | |
6517 | (unspec:CC_NZC | |
6518 | [(match_operand:VNx16BI 1 "register_operand" "Upa") | |
6519 | (match_operand 4) | |
6520 | (match_operand:SI 5 "aarch64_sve_ptrue_flag") | |
6521 | (and:PRED_ALL | |
6522 | (NLOGICAL:PRED_ALL | |
6523 | (not:PRED_ALL | |
6524 | (match_operand:PRED_ALL 2 "register_operand" "Upa")) | |
6525 | (not:PRED_ALL | |
6526 | (match_operand:PRED_ALL 3 "register_operand" "Upa"))) | |
6527 | (match_dup 4))] | |
6528 | UNSPEC_PTEST)) | |
6529 | (set (match_operand:PRED_ALL 0 "register_operand" "=Upa") | |
6530 | (and:PRED_ALL (NLOGICAL:PRED_ALL | |
6531 | (not:PRED_ALL (match_dup 2)) | |
6532 | (not:PRED_ALL (match_dup 3))) | |
6533 | (match_dup 4)))] | |
6534 | "TARGET_SVE" | |
6535 | "<logical_nn>s\t%0.b, %1/z, %2.b, %3.b" | |
6536 | ) | |
6537 | ||
6538 | ;; Same with just the flags result. | |
6539 | (define_insn "*<logical_nn><mode>3_ptest" | |
6540 | [(set (reg:CC_NZC CC_REGNUM) | |
6541 | (unspec:CC_NZC | |
6542 | [(match_operand:VNx16BI 1 "register_operand" "Upa") | |
6543 | (match_operand 4) | |
6544 | (match_operand:SI 5 "aarch64_sve_ptrue_flag") | |
6545 | (and:PRED_ALL | |
6546 | (NLOGICAL:PRED_ALL | |
6547 | (not:PRED_ALL | |
6548 | (match_operand:PRED_ALL 2 "register_operand" "Upa")) | |
6549 | (not:PRED_ALL | |
6550 | (match_operand:PRED_ALL 3 "register_operand" "Upa"))) | |
6551 | (match_dup 4))] | |
6552 | UNSPEC_PTEST)) | |
6553 | (clobber (match_scratch:VNx16BI 0 "=Upa"))] | |
6554 | "TARGET_SVE" | |
6555 | "<logical_nn>s\t%0.b, %1/z, %2.b, %3.b" | |
6556 | ) | |
6557 | ||
915d28fe RS |
6558 | ;; ========================================================================= |
6559 | ;; == Ternary arithmetic | |
6560 | ;; ========================================================================= | |
6561 | ||
6562 | ;; ------------------------------------------------------------------------- | |
6563 | ;; ---- [INT] MLA and MAD | |
6564 | ;; ------------------------------------------------------------------------- | |
6565 | ;; Includes: | |
6566 | ;; - MAD | |
6567 | ;; - MLA | |
6568 | ;; ------------------------------------------------------------------------- | |
6569 | ||
b6c3aea1 RS |
6570 | ;; Unpredicated integer addition of product. |
6571 | (define_expand "fma<mode>4" | |
cf7a3353 RS |
6572 | [(set (match_operand:SVE_I 0 "register_operand") |
6573 | (plus:SVE_I | |
6574 | (unspec:SVE_I | |
b6c3aea1 | 6575 | [(match_dup 4) |
cf7a3353 RS |
6576 | (mult:SVE_I |
6577 | (match_operand:SVE_I 1 "register_operand") | |
6578 | (match_operand:SVE_I 2 "nonmemory_operand"))] | |
b6c3aea1 | 6579 | UNSPEC_PRED_X) |
cf7a3353 | 6580 | (match_operand:SVE_I 3 "register_operand")))] |
b6c3aea1 RS |
6581 | "TARGET_SVE" |
6582 | { | |
6583 | if (aarch64_prepare_sve_int_fma (operands, PLUS)) | |
6584 | DONE; | |
6585 | operands[4] = aarch64_ptrue_reg (<VPRED>mode); | |
6586 | } | |
6587 | ) | |
6588 | ||
915d28fe | 6589 | ;; Predicated integer addition of product. |
624d0f07 | 6590 | (define_insn "@aarch64_pred_fma<mode>" |
cf7a3353 RS |
6591 | [(set (match_operand:SVE_I 0 "register_operand" "=w, w, ?&w") |
6592 | (plus:SVE_I | |
6593 | (unspec:SVE_I | |
915d28fe | 6594 | [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl") |
cf7a3353 RS |
6595 | (mult:SVE_I |
6596 | (match_operand:SVE_I 2 "register_operand" "%0, w, w") | |
6597 | (match_operand:SVE_I 3 "register_operand" "w, w, w"))] | |
06308276 | 6598 | UNSPEC_PRED_X) |
cf7a3353 | 6599 | (match_operand:SVE_I 4 "register_operand" "w, 0, w")))] |
a08acce8 RH |
6600 | "TARGET_SVE" |
6601 | "@ | |
915d28fe RS |
6602 | mad\t%0.<Vetype>, %1/m, %3.<Vetype>, %4.<Vetype> |
6603 | mla\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype> | |
6604 | movprfx\t%0, %4\;mla\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>" | |
6605 | [(set_attr "movprfx" "*,*,yes")] | |
a08acce8 RH |
6606 | ) |
6607 | ||
b6c3aea1 RS |
6608 | ;; Predicated integer addition of product with merging. |
6609 | (define_expand "cond_fma<mode>" | |
cf7a3353 RS |
6610 | [(set (match_operand:SVE_I 0 "register_operand") |
6611 | (unspec:SVE_I | |
b6c3aea1 | 6612 | [(match_operand:<VPRED> 1 "register_operand") |
cf7a3353 RS |
6613 | (plus:SVE_I |
6614 | (mult:SVE_I | |
6615 | (match_operand:SVE_I 2 "register_operand") | |
6616 | (match_operand:SVE_I 3 "general_operand")) | |
6617 | (match_operand:SVE_I 4 "register_operand")) | |
6618 | (match_operand:SVE_I 5 "aarch64_simd_reg_or_zero")] | |
b6c3aea1 RS |
6619 | UNSPEC_SEL))] |
6620 | "TARGET_SVE" | |
6621 | { | |
6622 | if (aarch64_prepare_sve_cond_int_fma (operands, PLUS)) | |
6623 | DONE; | |
6624 | /* Swap the multiplication operands if the fallback value is the | |
6625 | second of the two. */ | |
6626 | if (rtx_equal_p (operands[3], operands[5])) | |
6627 | std::swap (operands[2], operands[3]); | |
6628 | } | |
6629 | ) | |
6630 | ||
6631 | ;; Predicated integer addition of product, merging with the first input. | |
6632 | (define_insn "*cond_fma<mode>_2" | |
cf7a3353 RS |
6633 | [(set (match_operand:SVE_I 0 "register_operand" "=w, ?&w") |
6634 | (unspec:SVE_I | |
b6c3aea1 | 6635 | [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl") |
cf7a3353 RS |
6636 | (plus:SVE_I |
6637 | (mult:SVE_I | |
6638 | (match_operand:SVE_I 2 "register_operand" "0, w") | |
6639 | (match_operand:SVE_I 3 "register_operand" "w, w")) | |
6640 | (match_operand:SVE_I 4 "register_operand" "w, w")) | |
b6c3aea1 RS |
6641 | (match_dup 2)] |
6642 | UNSPEC_SEL))] | |
6643 | "TARGET_SVE" | |
6644 | "@ | |
6645 | mad\t%0.<Vetype>, %1/m, %3.<Vetype>, %4.<Vetype> | |
6646 | movprfx\t%0, %2\;mad\t%0.<Vetype>, %1/m, %3.<Vetype>, %4.<Vetype>" | |
6647 | [(set_attr "movprfx" "*,yes")] | |
6648 | ) | |
6649 | ||
6650 | ;; Predicated integer addition of product, merging with the third input. | |
6651 | (define_insn "*cond_fma<mode>_4" | |
cf7a3353 RS |
6652 | [(set (match_operand:SVE_I 0 "register_operand" "=w, ?&w") |
6653 | (unspec:SVE_I | |
b6c3aea1 | 6654 | [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl") |
cf7a3353 RS |
6655 | (plus:SVE_I |
6656 | (mult:SVE_I | |
6657 | (match_operand:SVE_I 2 "register_operand" "w, w") | |
6658 | (match_operand:SVE_I 3 "register_operand" "w, w")) | |
6659 | (match_operand:SVE_I 4 "register_operand" "0, w")) | |
b6c3aea1 RS |
6660 | (match_dup 4)] |
6661 | UNSPEC_SEL))] | |
6662 | "TARGET_SVE" | |
6663 | "@ | |
6664 | mla\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype> | |
6665 | movprfx\t%0, %4\;mla\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>" | |
6666 | [(set_attr "movprfx" "*,yes")] | |
6667 | ) | |
6668 | ||
6669 | ;; Predicated integer addition of product, merging with an independent value. | |
6670 | (define_insn_and_rewrite "*cond_fma<mode>_any" | |
cf7a3353 RS |
6671 | [(set (match_operand:SVE_I 0 "register_operand" "=&w, &w, &w, &w, &w, ?&w") |
6672 | (unspec:SVE_I | |
b6c3aea1 | 6673 | [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl, Upl, Upl, Upl") |
cf7a3353 RS |
6674 | (plus:SVE_I |
6675 | (mult:SVE_I | |
6676 | (match_operand:SVE_I 2 "register_operand" "w, w, 0, w, w, w") | |
6677 | (match_operand:SVE_I 3 "register_operand" "w, w, w, 0, w, w")) | |
6678 | (match_operand:SVE_I 4 "register_operand" "w, 0, w, w, w, w")) | |
6679 | (match_operand:SVE_I 5 "aarch64_simd_reg_or_zero" "Dz, Dz, Dz, Dz, 0, w")] | |
b6c3aea1 RS |
6680 | UNSPEC_SEL))] |
6681 | "TARGET_SVE | |
6682 | && !rtx_equal_p (operands[2], operands[5]) | |
6683 | && !rtx_equal_p (operands[3], operands[5]) | |
6684 | && !rtx_equal_p (operands[4], operands[5])" | |
6685 | "@ | |
6686 | movprfx\t%0.<Vetype>, %1/z, %4.<Vetype>\;mla\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype> | |
6687 | movprfx\t%0.<Vetype>, %1/z, %0.<Vetype>\;mla\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype> | |
6688 | movprfx\t%0.<Vetype>, %1/z, %0.<Vetype>\;mad\t%0.<Vetype>, %1/m, %3.<Vetype>, %4.<Vetype> | |
6689 | movprfx\t%0.<Vetype>, %1/z, %0.<Vetype>\;mad\t%0.<Vetype>, %1/m, %2.<Vetype>, %4.<Vetype> | |
6690 | movprfx\t%0.<Vetype>, %1/m, %4.<Vetype>\;mla\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype> | |
6691 | #" | |
6692 | "&& reload_completed | |
6693 | && register_operand (operands[5], <MODE>mode) | |
6694 | && !rtx_equal_p (operands[0], operands[5])" | |
6695 | { | |
6696 | emit_insn (gen_vcond_mask_<mode><vpred> (operands[0], operands[4], | |
6697 | operands[5], operands[1])); | |
6698 | operands[5] = operands[4] = operands[0]; | |
6699 | } | |
6700 | [(set_attr "movprfx" "yes")] | |
6701 | ) | |
6702 | ||
915d28fe RS |
6703 | ;; ------------------------------------------------------------------------- |
6704 | ;; ---- [INT] MLS and MSB | |
6705 | ;; ------------------------------------------------------------------------- | |
6706 | ;; Includes: | |
6707 | ;; - MLS | |
6708 | ;; - MSB | |
6709 | ;; ------------------------------------------------------------------------- | |
6710 | ||
b6c3aea1 RS |
6711 | ;; Unpredicated integer subtraction of product. |
6712 | (define_expand "fnma<mode>4" | |
264a1269 RS |
6713 | [(set (match_operand:SVE_I 0 "register_operand") |
6714 | (minus:SVE_I | |
6715 | (match_operand:SVE_I 3 "register_operand") | |
6716 | (unspec:SVE_I | |
b6c3aea1 | 6717 | [(match_dup 4) |
264a1269 RS |
6718 | (mult:SVE_I |
6719 | (match_operand:SVE_I 1 "register_operand") | |
6720 | (match_operand:SVE_I 2 "general_operand"))] | |
b6c3aea1 RS |
6721 | UNSPEC_PRED_X)))] |
6722 | "TARGET_SVE" | |
6723 | { | |
6724 | if (aarch64_prepare_sve_int_fma (operands, MINUS)) | |
6725 | DONE; | |
6726 | operands[4] = aarch64_ptrue_reg (<VPRED>mode); | |
6727 | } | |
6728 | ) | |
6729 | ||
915d28fe | 6730 | ;; Predicated integer subtraction of product. |
624d0f07 | 6731 | (define_insn "@aarch64_pred_fnma<mode>" |
264a1269 RS |
6732 | [(set (match_operand:SVE_I 0 "register_operand" "=w, w, ?&w") |
6733 | (minus:SVE_I | |
6734 | (match_operand:SVE_I 4 "register_operand" "w, 0, w") | |
6735 | (unspec:SVE_I | |
915d28fe | 6736 | [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl") |
264a1269 RS |
6737 | (mult:SVE_I |
6738 | (match_operand:SVE_I 2 "register_operand" "%0, w, w") | |
6739 | (match_operand:SVE_I 3 "register_operand" "w, w, w"))] | |
06308276 | 6740 | UNSPEC_PRED_X)))] |
915d28fe RS |
6741 | "TARGET_SVE" |
6742 | "@ | |
6743 | msb\t%0.<Vetype>, %1/m, %3.<Vetype>, %4.<Vetype> | |
6744 | mls\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype> | |
6745 | movprfx\t%0, %4\;mls\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>" | |
6746 | [(set_attr "movprfx" "*,*,yes")] | |
6747 | ) | |
6748 | ||
b6c3aea1 RS |
6749 | ;; Predicated integer subtraction of product with merging. |
6750 | (define_expand "cond_fnma<mode>" | |
264a1269 RS |
6751 | [(set (match_operand:SVE_I 0 "register_operand") |
6752 | (unspec:SVE_I | |
b6c3aea1 | 6753 | [(match_operand:<VPRED> 1 "register_operand") |
264a1269 RS |
6754 | (minus:SVE_I |
6755 | (match_operand:SVE_I 4 "register_operand") | |
6756 | (mult:SVE_I | |
6757 | (match_operand:SVE_I 2 "register_operand") | |
6758 | (match_operand:SVE_I 3 "general_operand"))) | |
6759 | (match_operand:SVE_I 5 "aarch64_simd_reg_or_zero")] | |
b6c3aea1 RS |
6760 | UNSPEC_SEL))] |
6761 | "TARGET_SVE" | |
6762 | { | |
6763 | if (aarch64_prepare_sve_cond_int_fma (operands, MINUS)) | |
6764 | DONE; | |
6765 | /* Swap the multiplication operands if the fallback value is the | |
6766 | second of the two. */ | |
6767 | if (rtx_equal_p (operands[3], operands[5])) | |
6768 | std::swap (operands[2], operands[3]); | |
6769 | } | |
6770 | ) | |
6771 | ||
6772 | ;; Predicated integer subtraction of product, merging with the first input. | |
6773 | (define_insn "*cond_fnma<mode>_2" | |
264a1269 RS |
6774 | [(set (match_operand:SVE_I 0 "register_operand" "=w, ?&w") |
6775 | (unspec:SVE_I | |
b6c3aea1 | 6776 | [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl") |
264a1269 RS |
6777 | (minus:SVE_I |
6778 | (match_operand:SVE_I 4 "register_operand" "w, w") | |
6779 | (mult:SVE_I | |
6780 | (match_operand:SVE_I 2 "register_operand" "0, w") | |
6781 | (match_operand:SVE_I 3 "register_operand" "w, w"))) | |
b6c3aea1 RS |
6782 | (match_dup 2)] |
6783 | UNSPEC_SEL))] | |
6784 | "TARGET_SVE" | |
6785 | "@ | |
6786 | msb\t%0.<Vetype>, %1/m, %3.<Vetype>, %4.<Vetype> | |
6787 | movprfx\t%0, %2\;msb\t%0.<Vetype>, %1/m, %3.<Vetype>, %4.<Vetype>" | |
6788 | [(set_attr "movprfx" "*,yes")] | |
6789 | ) | |
6790 | ||
6791 | ;; Predicated integer subtraction of product, merging with the third input. | |
6792 | (define_insn "*cond_fnma<mode>_4" | |
264a1269 RS |
6793 | [(set (match_operand:SVE_I 0 "register_operand" "=w, ?&w") |
6794 | (unspec:SVE_I | |
b6c3aea1 | 6795 | [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl") |
264a1269 RS |
6796 | (minus:SVE_I |
6797 | (match_operand:SVE_I 4 "register_operand" "0, w") | |
6798 | (mult:SVE_I | |
6799 | (match_operand:SVE_I 2 "register_operand" "w, w") | |
6800 | (match_operand:SVE_I 3 "register_operand" "w, w"))) | |
b6c3aea1 RS |
6801 | (match_dup 4)] |
6802 | UNSPEC_SEL))] | |
6803 | "TARGET_SVE" | |
6804 | "@ | |
6805 | mls\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype> | |
6806 | movprfx\t%0, %4\;mls\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>" | |
6807 | [(set_attr "movprfx" "*,yes")] | |
6808 | ) | |
6809 | ||
6810 | ;; Predicated integer subtraction of product, merging with an | |
6811 | ;; independent value. | |
6812 | (define_insn_and_rewrite "*cond_fnma<mode>_any" | |
264a1269 RS |
6813 | [(set (match_operand:SVE_I 0 "register_operand" "=&w, &w, &w, &w, &w, ?&w") |
6814 | (unspec:SVE_I | |
b6c3aea1 | 6815 | [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl, Upl, Upl, Upl") |
264a1269 RS |
6816 | (minus:SVE_I |
6817 | (match_operand:SVE_I 4 "register_operand" "w, 0, w, w, w, w") | |
6818 | (mult:SVE_I | |
6819 | (match_operand:SVE_I 2 "register_operand" "w, w, 0, w, w, w") | |
6820 | (match_operand:SVE_I 3 "register_operand" "w, w, w, 0, w, w"))) | |
6821 | (match_operand:SVE_I 5 "aarch64_simd_reg_or_zero" "Dz, Dz, Dz, Dz, 0, w")] | |
b6c3aea1 RS |
6822 | UNSPEC_SEL))] |
6823 | "TARGET_SVE | |
6824 | && !rtx_equal_p (operands[2], operands[5]) | |
6825 | && !rtx_equal_p (operands[3], operands[5]) | |
6826 | && !rtx_equal_p (operands[4], operands[5])" | |
6827 | "@ | |
6828 | movprfx\t%0.<Vetype>, %1/z, %4.<Vetype>\;mls\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype> | |
6829 | movprfx\t%0.<Vetype>, %1/z, %0.<Vetype>\;mls\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype> | |
6830 | movprfx\t%0.<Vetype>, %1/z, %0.<Vetype>\;msb\t%0.<Vetype>, %1/m, %3.<Vetype>, %4.<Vetype> | |
6831 | movprfx\t%0.<Vetype>, %1/z, %0.<Vetype>\;msb\t%0.<Vetype>, %1/m, %2.<Vetype>, %4.<Vetype> | |
6832 | movprfx\t%0.<Vetype>, %1/m, %4.<Vetype>\;mls\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype> | |
6833 | #" | |
6834 | "&& reload_completed | |
6835 | && register_operand (operands[5], <MODE>mode) | |
6836 | && !rtx_equal_p (operands[0], operands[5])" | |
6837 | { | |
6838 | emit_insn (gen_vcond_mask_<mode><vpred> (operands[0], operands[4], | |
6839 | operands[5], operands[1])); | |
6840 | operands[5] = operands[4] = operands[0]; | |
6841 | } | |
6842 | [(set_attr "movprfx" "yes")] | |
6843 | ) | |
6844 | ||
915d28fe RS |
6845 | ;; ------------------------------------------------------------------------- |
6846 | ;; ---- [INT] Dot product | |
6847 | ;; ------------------------------------------------------------------------- | |
6848 | ;; Includes: | |
6849 | ;; - SDOT | |
36696774 | 6850 | ;; - SUDOT (I8MM) |
915d28fe | 6851 | ;; - UDOT |
36696774 | 6852 | ;; - USDOT (I8MM) |
915d28fe RS |
6853 | ;; ------------------------------------------------------------------------- |
6854 | ||
6855 | ;; Four-element integer dot-product with accumulation. | |
6856 | (define_insn "<sur>dot_prod<vsi2qi>" | |
f75cdd2c RS |
6857 | [(set (match_operand:SVE_FULL_SDI 0 "register_operand" "=w, ?&w") |
6858 | (plus:SVE_FULL_SDI | |
6859 | (unspec:SVE_FULL_SDI | |
915d28fe RS |
6860 | [(match_operand:<VSI2QI> 1 "register_operand" "w, w") |
6861 | (match_operand:<VSI2QI> 2 "register_operand" "w, w")] | |
6862 | DOTPROD) | |
f75cdd2c | 6863 | (match_operand:SVE_FULL_SDI 3 "register_operand" "0, w")))] |
a08acce8 RH |
6864 | "TARGET_SVE" |
6865 | "@ | |
915d28fe RS |
6866 | <sur>dot\\t%0.<Vetype>, %1.<Vetype_fourth>, %2.<Vetype_fourth> |
6867 | movprfx\t%0, %3\;<sur>dot\\t%0.<Vetype>, %1.<Vetype_fourth>, %2.<Vetype_fourth>" | |
a08acce8 RH |
6868 | [(set_attr "movprfx" "*,yes")] |
6869 | ) | |
6870 | ||
624d0f07 RS |
6871 | ;; Four-element integer dot-product by selected lanes with accumulation. |
6872 | (define_insn "@aarch64_<sur>dot_prod_lane<vsi2qi>" | |
f75cdd2c RS |
6873 | [(set (match_operand:SVE_FULL_SDI 0 "register_operand" "=w, ?&w") |
6874 | (plus:SVE_FULL_SDI | |
6875 | (unspec:SVE_FULL_SDI | |
624d0f07 RS |
6876 | [(match_operand:<VSI2QI> 1 "register_operand" "w, w") |
6877 | (unspec:<VSI2QI> | |
6878 | [(match_operand:<VSI2QI> 2 "register_operand" "<sve_lane_con>, <sve_lane_con>") | |
6879 | (match_operand:SI 3 "const_int_operand")] | |
6880 | UNSPEC_SVE_LANE_SELECT)] | |
6881 | DOTPROD) | |
f75cdd2c | 6882 | (match_operand:SVE_FULL_SDI 4 "register_operand" "0, w")))] |
624d0f07 RS |
6883 | "TARGET_SVE" |
6884 | "@ | |
6885 | <sur>dot\\t%0.<Vetype>, %1.<Vetype_fourth>, %2.<Vetype_fourth>[%3] | |
6886 | movprfx\t%0, %4\;<sur>dot\\t%0.<Vetype>, %1.<Vetype_fourth>, %2.<Vetype_fourth>[%3]" | |
6887 | [(set_attr "movprfx" "*,yes")] | |
6888 | ) | |
6889 | ||
752045ed | 6890 | (define_insn "@<sur>dot_prod<vsi2qi>" |
36696774 RS |
6891 | [(set (match_operand:VNx4SI_ONLY 0 "register_operand" "=w, ?&w") |
6892 | (plus:VNx4SI_ONLY | |
6893 | (unspec:VNx4SI_ONLY | |
6894 | [(match_operand:<VSI2QI> 1 "register_operand" "w, w") | |
6895 | (match_operand:<VSI2QI> 2 "register_operand" "w, w")] | |
6896 | DOTPROD_US_ONLY) | |
6897 | (match_operand:VNx4SI_ONLY 3 "register_operand" "0, w")))] | |
6898 | "TARGET_SVE_I8MM" | |
6899 | "@ | |
6900 | <sur>dot\\t%0.s, %1.b, %2.b | |
6901 | movprfx\t%0, %3\;<sur>dot\\t%0.s, %1.b, %2.b" | |
6902 | [(set_attr "movprfx" "*,yes")] | |
6903 | ) | |
6904 | ||
6905 | (define_insn "@aarch64_<sur>dot_prod_lane<vsi2qi>" | |
6906 | [(set (match_operand:VNx4SI_ONLY 0 "register_operand" "=w, ?&w") | |
6907 | (plus:VNx4SI_ONLY | |
6908 | (unspec:VNx4SI_ONLY | |
6909 | [(match_operand:<VSI2QI> 1 "register_operand" "w, w") | |
6910 | (unspec:<VSI2QI> | |
6911 | [(match_operand:<VSI2QI> 2 "register_operand" "y, y") | |
6912 | (match_operand:SI 3 "const_int_operand")] | |
6913 | UNSPEC_SVE_LANE_SELECT)] | |
6914 | DOTPROD_I8MM) | |
6915 | (match_operand:VNx4SI_ONLY 4 "register_operand" "0, w")))] | |
6916 | "TARGET_SVE_I8MM" | |
6917 | "@ | |
6918 | <sur>dot\\t%0.s, %1.b, %2.b[%3] | |
6919 | movprfx\t%0, %4\;<sur>dot\\t%0.s, %1.b, %2.b[%3]" | |
6920 | [(set_attr "movprfx" "*,yes")] | |
6921 | ) | |
6922 | ||
915d28fe RS |
6923 | ;; ------------------------------------------------------------------------- |
6924 | ;; ---- [INT] Sum of absolute differences | |
6925 | ;; ------------------------------------------------------------------------- | |
6926 | ;; The patterns in this section are synthetic. | |
6927 | ;; ------------------------------------------------------------------------- | |
6928 | ||
6929 | ;; Emit a sequence to produce a sum-of-absolute-differences of the inputs in | |
6930 | ;; operands 1 and 2. The sequence also has to perform a widening reduction of | |
6931 | ;; the difference into a vector and accumulate that into operand 3 before | |
6932 | ;; copying that into the result operand 0. | |
6933 | ;; Perform that with a sequence of: | |
6934 | ;; MOV ones.b, #1 | |
6935 | ;; [SU]ABD diff.b, p0/m, op1.b, op2.b | |
6936 | ;; MOVPRFX op0, op3 // If necessary | |
6937 | ;; UDOT op0.s, diff.b, ones.b | |
6938 | (define_expand "<sur>sad<vsi2qi>" | |
f75cdd2c | 6939 | [(use (match_operand:SVE_FULL_SDI 0 "register_operand")) |
915d28fe RS |
6940 | (unspec:<VSI2QI> [(use (match_operand:<VSI2QI> 1 "register_operand")) |
6941 | (use (match_operand:<VSI2QI> 2 "register_operand"))] ABAL) | |
f75cdd2c | 6942 | (use (match_operand:SVE_FULL_SDI 3 "register_operand"))] |
915d28fe RS |
6943 | "TARGET_SVE" |
6944 | { | |
6945 | rtx ones = force_reg (<VSI2QI>mode, CONST1_RTX (<VSI2QI>mode)); | |
6946 | rtx diff = gen_reg_rtx (<VSI2QI>mode); | |
6947 | emit_insn (gen_<sur>abd<vsi2qi>_3 (diff, operands[1], operands[2])); | |
6948 | emit_insn (gen_udot_prod<vsi2qi> (operands[0], diff, ones, operands[3])); | |
6949 | DONE; | |
6950 | } | |
6951 | ) | |
6952 | ||
36696774 RS |
6953 | ;; ------------------------------------------------------------------------- |
6954 | ;; ---- [INT] Matrix multiply-accumulate | |
6955 | ;; ------------------------------------------------------------------------- | |
6956 | ;; Includes: | |
6957 | ;; - SMMLA (I8MM) | |
6958 | ;; - UMMLA (I8MM) | |
6959 | ;; - USMMLA (I8MM) | |
6960 | ;; ------------------------------------------------------------------------- | |
6961 | ||
6962 | (define_insn "@aarch64_sve_add_<optab><vsi2qi>" | |
6963 | [(set (match_operand:VNx4SI_ONLY 0 "register_operand" "=w, ?&w") | |
6964 | (plus:VNx4SI_ONLY | |
6965 | (unspec:VNx4SI_ONLY | |
6966 | [(match_operand:<VSI2QI> 2 "register_operand" "w, w") | |
6967 | (match_operand:<VSI2QI> 3 "register_operand" "w, w")] | |
6968 | MATMUL) | |
6969 | (match_operand:VNx4SI_ONLY 1 "register_operand" "0, w")))] | |
6970 | "TARGET_SVE_I8MM" | |
6971 | "@ | |
6972 | <sur>mmla\\t%0.s, %2.b, %3.b | |
6973 | movprfx\t%0, %1\;<sur>mmla\\t%0.s, %2.b, %3.b" | |
6974 | [(set_attr "movprfx" "*,yes")] | |
6975 | ) | |
6976 | ||
915d28fe RS |
6977 | ;; ------------------------------------------------------------------------- |
6978 | ;; ---- [FP] General ternary arithmetic corresponding to unspecs | |
6979 | ;; ------------------------------------------------------------------------- | |
6980 | ;; Includes merging patterns for: | |
6981 | ;; - FMAD | |
6982 | ;; - FMLA | |
6983 | ;; - FMLS | |
6984 | ;; - FMSB | |
6985 | ;; - FNMAD | |
6986 | ;; - FNMLA | |
6987 | ;; - FNMLS | |
6988 | ;; - FNMSB | |
6989 | ;; ------------------------------------------------------------------------- | |
6990 | ||
0d80d083 RS |
6991 | ;; Unpredicated floating-point ternary operations. |
6992 | (define_expand "<optab><mode>4" | |
f75cdd2c RS |
6993 | [(set (match_operand:SVE_FULL_F 0 "register_operand") |
6994 | (unspec:SVE_FULL_F | |
0d80d083 | 6995 | [(match_dup 4) |
c9c5a809 | 6996 | (const_int SVE_RELAXED_GP) |
f75cdd2c RS |
6997 | (match_operand:SVE_FULL_F 1 "register_operand") |
6998 | (match_operand:SVE_FULL_F 2 "register_operand") | |
6999 | (match_operand:SVE_FULL_F 3 "register_operand")] | |
0d80d083 RS |
7000 | SVE_COND_FP_TERNARY))] |
7001 | "TARGET_SVE" | |
7002 | { | |
7003 | operands[4] = aarch64_ptrue_reg (<VPRED>mode); | |
7004 | } | |
7005 | ) | |
7006 | ||
7007 | ;; Predicated floating-point ternary operations. | |
624d0f07 | 7008 | (define_insn "@aarch64_pred_<optab><mode>" |
f75cdd2c RS |
7009 | [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w, w, ?&w") |
7010 | (unspec:SVE_FULL_F | |
0d80d083 | 7011 | [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl") |
c9c5a809 | 7012 | (match_operand:SI 5 "aarch64_sve_gp_strictness") |
f75cdd2c RS |
7013 | (match_operand:SVE_FULL_F 2 "register_operand" "%w, 0, w") |
7014 | (match_operand:SVE_FULL_F 3 "register_operand" "w, w, w") | |
7015 | (match_operand:SVE_FULL_F 4 "register_operand" "0, w, w")] | |
0d80d083 RS |
7016 | SVE_COND_FP_TERNARY))] |
7017 | "TARGET_SVE" | |
7018 | "@ | |
7019 | <sve_fmla_op>\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype> | |
7020 | <sve_fmad_op>\t%0.<Vetype>, %1/m, %3.<Vetype>, %4.<Vetype> | |
7021 | movprfx\t%0, %4\;<sve_fmla_op>\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>" | |
7022 | [(set_attr "movprfx" "*,*,yes")] | |
7023 | ) | |
7024 | ||
915d28fe | 7025 | ;; Predicated floating-point ternary operations with merging. |
624d0f07 | 7026 | (define_expand "@cond_<optab><mode>" |
f75cdd2c RS |
7027 | [(set (match_operand:SVE_FULL_F 0 "register_operand") |
7028 | (unspec:SVE_FULL_F | |
915d28fe | 7029 | [(match_operand:<VPRED> 1 "register_operand") |
f75cdd2c | 7030 | (unspec:SVE_FULL_F |
0d80d083 | 7031 | [(match_dup 1) |
c9c5a809 | 7032 | (const_int SVE_STRICT_GP) |
f75cdd2c RS |
7033 | (match_operand:SVE_FULL_F 2 "register_operand") |
7034 | (match_operand:SVE_FULL_F 3 "register_operand") | |
7035 | (match_operand:SVE_FULL_F 4 "register_operand")] | |
915d28fe | 7036 | SVE_COND_FP_TERNARY) |
f75cdd2c | 7037 | (match_operand:SVE_FULL_F 5 "aarch64_simd_reg_or_zero")] |
915d28fe RS |
7038 | UNSPEC_SEL))] |
7039 | "TARGET_SVE" | |
7040 | { | |
7041 | /* Swap the multiplication operands if the fallback value is the | |
7042 | second of the two. */ | |
7043 | if (rtx_equal_p (operands[3], operands[5])) | |
7044 | std::swap (operands[2], operands[3]); | |
7045 | }) | |
7046 | ||
7047 | ;; Predicated floating-point ternary operations, merging with the | |
7048 | ;; first input. | |
0eb5e901 | 7049 | (define_insn_and_rewrite "*cond_<optab><mode>_2_relaxed" |
f75cdd2c RS |
7050 | [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w, ?&w") |
7051 | (unspec:SVE_FULL_F | |
a08acce8 | 7052 | [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl") |
f75cdd2c | 7053 | (unspec:SVE_FULL_F |
c9c5a809 | 7054 | [(match_operand 5) |
0eb5e901 | 7055 | (const_int SVE_RELAXED_GP) |
f75cdd2c RS |
7056 | (match_operand:SVE_FULL_F 2 "register_operand" "0, w") |
7057 | (match_operand:SVE_FULL_F 3 "register_operand" "w, w") | |
7058 | (match_operand:SVE_FULL_F 4 "register_operand" "w, w")] | |
915d28fe RS |
7059 | SVE_COND_FP_TERNARY) |
7060 | (match_dup 2)] | |
a08acce8 | 7061 | UNSPEC_SEL))] |
0eb5e901 | 7062 | "TARGET_SVE" |
a08acce8 | 7063 | "@ |
915d28fe RS |
7064 | <sve_fmad_op>\t%0.<Vetype>, %1/m, %3.<Vetype>, %4.<Vetype> |
7065 | movprfx\t%0, %2\;<sve_fmad_op>\t%0.<Vetype>, %1/m, %3.<Vetype>, %4.<Vetype>" | |
c9c5a809 RS |
7066 | "&& !rtx_equal_p (operands[1], operands[5])" |
7067 | { | |
7068 | operands[5] = copy_rtx (operands[1]); | |
7069 | } | |
a08acce8 RH |
7070 | [(set_attr "movprfx" "*,yes")] |
7071 | ) | |
7072 | ||
0eb5e901 RS |
7073 | (define_insn "*cond_<optab><mode>_2_strict" |
7074 | [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w, ?&w") | |
7075 | (unspec:SVE_FULL_F | |
7076 | [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl") | |
7077 | (unspec:SVE_FULL_F | |
7078 | [(match_dup 1) | |
7079 | (const_int SVE_STRICT_GP) | |
7080 | (match_operand:SVE_FULL_F 2 "register_operand" "0, w") | |
7081 | (match_operand:SVE_FULL_F 3 "register_operand" "w, w") | |
7082 | (match_operand:SVE_FULL_F 4 "register_operand" "w, w")] | |
7083 | SVE_COND_FP_TERNARY) | |
7084 | (match_dup 2)] | |
7085 | UNSPEC_SEL))] | |
7086 | "TARGET_SVE" | |
7087 | "@ | |
7088 | <sve_fmad_op>\t%0.<Vetype>, %1/m, %3.<Vetype>, %4.<Vetype> | |
7089 | movprfx\t%0, %2\;<sve_fmad_op>\t%0.<Vetype>, %1/m, %3.<Vetype>, %4.<Vetype>" | |
7090 | [(set_attr "movprfx" "*,yes")] | |
7091 | ) | |
7092 | ||
915d28fe RS |
7093 | ;; Predicated floating-point ternary operations, merging with the |
7094 | ;; third input. | |
0eb5e901 | 7095 | (define_insn_and_rewrite "*cond_<optab><mode>_4_relaxed" |
f75cdd2c RS |
7096 | [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w, ?&w") |
7097 | (unspec:SVE_FULL_F | |
a08acce8 | 7098 | [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl") |
f75cdd2c | 7099 | (unspec:SVE_FULL_F |
c9c5a809 | 7100 | [(match_operand 5) |
0eb5e901 | 7101 | (const_int SVE_RELAXED_GP) |
f75cdd2c RS |
7102 | (match_operand:SVE_FULL_F 2 "register_operand" "w, w") |
7103 | (match_operand:SVE_FULL_F 3 "register_operand" "w, w") | |
7104 | (match_operand:SVE_FULL_F 4 "register_operand" "0, w")] | |
915d28fe RS |
7105 | SVE_COND_FP_TERNARY) |
7106 | (match_dup 4)] | |
a08acce8 | 7107 | UNSPEC_SEL))] |
0eb5e901 | 7108 | "TARGET_SVE" |
a08acce8 | 7109 | "@ |
915d28fe RS |
7110 | <sve_fmla_op>\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype> |
7111 | movprfx\t%0, %4\;<sve_fmla_op>\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>" | |
c9c5a809 RS |
7112 | "&& !rtx_equal_p (operands[1], operands[5])" |
7113 | { | |
7114 | operands[5] = copy_rtx (operands[1]); | |
7115 | } | |
a08acce8 RH |
7116 | [(set_attr "movprfx" "*,yes")] |
7117 | ) | |
7118 | ||
0eb5e901 RS |
7119 | (define_insn "*cond_<optab><mode>_4_strict" |
7120 | [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w, ?&w") | |
7121 | (unspec:SVE_FULL_F | |
7122 | [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl") | |
7123 | (unspec:SVE_FULL_F | |
7124 | [(match_dup 1) | |
7125 | (const_int SVE_STRICT_GP) | |
7126 | (match_operand:SVE_FULL_F 2 "register_operand" "w, w") | |
7127 | (match_operand:SVE_FULL_F 3 "register_operand" "w, w") | |
7128 | (match_operand:SVE_FULL_F 4 "register_operand" "0, w")] | |
7129 | SVE_COND_FP_TERNARY) | |
7130 | (match_dup 4)] | |
7131 | UNSPEC_SEL))] | |
7132 | "TARGET_SVE" | |
7133 | "@ | |
7134 | <sve_fmla_op>\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype> | |
7135 | movprfx\t%0, %4\;<sve_fmla_op>\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>" | |
7136 | [(set_attr "movprfx" "*,yes")] | |
7137 | ) | |
7138 | ||
915d28fe RS |
7139 | ;; Predicated floating-point ternary operations, merging with an |
7140 | ;; independent value. | |
0eb5e901 | 7141 | (define_insn_and_rewrite "*cond_<optab><mode>_any_relaxed" |
f75cdd2c RS |
7142 | [(set (match_operand:SVE_FULL_F 0 "register_operand" "=&w, &w, &w, &w, &w, ?&w") |
7143 | (unspec:SVE_FULL_F | |
432b29c1 | 7144 | [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl, Upl, Upl, Upl") |
f75cdd2c | 7145 | (unspec:SVE_FULL_F |
c9c5a809 | 7146 | [(match_operand 6) |
0eb5e901 | 7147 | (const_int SVE_RELAXED_GP) |
f75cdd2c RS |
7148 | (match_operand:SVE_FULL_F 2 "register_operand" "w, w, 0, w, w, w") |
7149 | (match_operand:SVE_FULL_F 3 "register_operand" "w, w, w, 0, w, w") | |
7150 | (match_operand:SVE_FULL_F 4 "register_operand" "w, 0, w, w, w, w")] | |
915d28fe | 7151 | SVE_COND_FP_TERNARY) |
f75cdd2c | 7152 | (match_operand:SVE_FULL_F 5 "aarch64_simd_reg_or_zero" "Dz, Dz, Dz, Dz, 0, w")] |
0d2b3bca | 7153 | UNSPEC_SEL))] |
f4fde1b3 | 7154 | "TARGET_SVE |
915d28fe RS |
7155 | && !rtx_equal_p (operands[2], operands[5]) |
7156 | && !rtx_equal_p (operands[3], operands[5]) | |
0eb5e901 | 7157 | && !rtx_equal_p (operands[4], operands[5])" |
32cf949c | 7158 | "@ |
915d28fe | 7159 | movprfx\t%0.<Vetype>, %1/z, %4.<Vetype>\;<sve_fmla_op>\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype> |
432b29c1 RS |
7160 | movprfx\t%0.<Vetype>, %1/z, %0.<Vetype>\;<sve_fmla_op>\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype> |
7161 | movprfx\t%0.<Vetype>, %1/z, %0.<Vetype>\;<sve_fmad_op>\t%0.<Vetype>, %1/m, %3.<Vetype>, %4.<Vetype> | |
7162 | movprfx\t%0.<Vetype>, %1/z, %0.<Vetype>\;<sve_fmad_op>\t%0.<Vetype>, %1/m, %2.<Vetype>, %4.<Vetype> | |
915d28fe | 7163 | movprfx\t%0.<Vetype>, %1/m, %4.<Vetype>\;<sve_fmla_op>\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype> |
32cf949c | 7164 | #" |
c9c5a809 | 7165 | "&& 1" |
f4fde1b3 | 7166 | { |
c9c5a809 RS |
7167 | if (reload_completed |
7168 | && register_operand (operands[5], <MODE>mode) | |
7169 | && !rtx_equal_p (operands[0], operands[5])) | |
7170 | { | |
7171 | emit_insn (gen_vcond_mask_<mode><vpred> (operands[0], operands[4], | |
7172 | operands[5], operands[1])); | |
7173 | operands[5] = operands[4] = operands[0]; | |
7174 | } | |
7175 | else if (!rtx_equal_p (operands[1], operands[6])) | |
7176 | operands[6] = copy_rtx (operands[1]); | |
7177 | else | |
7178 | FAIL; | |
f4fde1b3 | 7179 | } |
32cf949c | 7180 | [(set_attr "movprfx" "yes")] |
0d2b3bca RS |
7181 | ) |
7182 | ||
0eb5e901 RS |
7183 | (define_insn_and_rewrite "*cond_<optab><mode>_any_strict" |
7184 | [(set (match_operand:SVE_FULL_F 0 "register_operand" "=&w, &w, &w, &w, &w, ?&w") | |
7185 | (unspec:SVE_FULL_F | |
7186 | [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl, Upl, Upl, Upl") | |
7187 | (unspec:SVE_FULL_F | |
7188 | [(match_dup 1) | |
7189 | (const_int SVE_STRICT_GP) | |
7190 | (match_operand:SVE_FULL_F 2 "register_operand" "w, w, 0, w, w, w") | |
7191 | (match_operand:SVE_FULL_F 3 "register_operand" "w, w, w, 0, w, w") | |
7192 | (match_operand:SVE_FULL_F 4 "register_operand" "w, 0, w, w, w, w")] | |
7193 | SVE_COND_FP_TERNARY) | |
7194 | (match_operand:SVE_FULL_F 5 "aarch64_simd_reg_or_zero" "Dz, Dz, Dz, Dz, 0, w")] | |
7195 | UNSPEC_SEL))] | |
7196 | "TARGET_SVE | |
7197 | && !rtx_equal_p (operands[2], operands[5]) | |
7198 | && !rtx_equal_p (operands[3], operands[5]) | |
7199 | && !rtx_equal_p (operands[4], operands[5])" | |
7200 | "@ | |
7201 | movprfx\t%0.<Vetype>, %1/z, %4.<Vetype>\;<sve_fmla_op>\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype> | |
7202 | movprfx\t%0.<Vetype>, %1/z, %0.<Vetype>\;<sve_fmla_op>\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype> | |
7203 | movprfx\t%0.<Vetype>, %1/z, %0.<Vetype>\;<sve_fmad_op>\t%0.<Vetype>, %1/m, %3.<Vetype>, %4.<Vetype> | |
7204 | movprfx\t%0.<Vetype>, %1/z, %0.<Vetype>\;<sve_fmad_op>\t%0.<Vetype>, %1/m, %2.<Vetype>, %4.<Vetype> | |
7205 | movprfx\t%0.<Vetype>, %1/m, %4.<Vetype>\;<sve_fmla_op>\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype> | |
7206 | #" | |
7207 | "&& reload_completed | |
7208 | && register_operand (operands[5], <MODE>mode) | |
7209 | && !rtx_equal_p (operands[0], operands[5])" | |
7210 | { | |
7211 | emit_insn (gen_vcond_mask_<mode><vpred> (operands[0], operands[4], | |
7212 | operands[5], operands[1])); | |
7213 | operands[5] = operands[4] = operands[0]; | |
7214 | } | |
7215 | [(set_attr "movprfx" "yes")] | |
7216 | ) | |
7217 | ||
624d0f07 RS |
7218 | ;; Unpredicated FMLA and FMLS by selected lanes. It doesn't seem worth using |
7219 | ;; (fma ...) since target-independent code won't understand the indexing. | |
7220 | (define_insn "@aarch64_<optab>_lane_<mode>" | |
f75cdd2c RS |
7221 | [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w, ?&w") |
7222 | (unspec:SVE_FULL_F | |
7223 | [(match_operand:SVE_FULL_F 1 "register_operand" "w, w") | |
7224 | (unspec:SVE_FULL_F | |
7225 | [(match_operand:SVE_FULL_F 2 "register_operand" "<sve_lane_con>, <sve_lane_con>") | |
624d0f07 RS |
7226 | (match_operand:SI 3 "const_int_operand")] |
7227 | UNSPEC_SVE_LANE_SELECT) | |
f75cdd2c | 7228 | (match_operand:SVE_FULL_F 4 "register_operand" "0, w")] |
624d0f07 RS |
7229 | SVE_FP_TERNARY_LANE))] |
7230 | "TARGET_SVE" | |
7231 | "@ | |
7232 | <sve_fp_op>\t%0.<Vetype>, %1.<Vetype>, %2.<Vetype>[%3] | |
7233 | movprfx\t%0, %4\;<sve_fp_op>\t%0.<Vetype>, %1.<Vetype>, %2.<Vetype>[%3]" | |
7234 | [(set_attr "movprfx" "*,yes")] | |
7235 | ) | |
7236 | ||
7237 | ;; ------------------------------------------------------------------------- | |
7238 | ;; ---- [FP] Complex multiply-add | |
7239 | ;; ------------------------------------------------------------------------- | |
7240 | ;; Includes merging patterns for: | |
7241 | ;; - FCMLA | |
7242 | ;; ------------------------------------------------------------------------- | |
7243 | ||
7244 | ;; Predicated FCMLA. | |
7245 | (define_insn "@aarch64_pred_<optab><mode>" | |
f75cdd2c RS |
7246 | [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w, ?&w") |
7247 | (unspec:SVE_FULL_F | |
624d0f07 RS |
7248 | [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl") |
7249 | (match_operand:SI 5 "aarch64_sve_gp_strictness") | |
f75cdd2c RS |
7250 | (match_operand:SVE_FULL_F 2 "register_operand" "w, w") |
7251 | (match_operand:SVE_FULL_F 3 "register_operand" "w, w") | |
7252 | (match_operand:SVE_FULL_F 4 "register_operand" "0, w")] | |
624d0f07 RS |
7253 | SVE_COND_FCMLA))] |
7254 | "TARGET_SVE" | |
7255 | "@ | |
7256 | fcmla\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>, #<rot> | |
7257 | movprfx\t%0, %4\;fcmla\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>, #<rot>" | |
7258 | [(set_attr "movprfx" "*,yes")] | |
7259 | ) | |
7260 | ||
ad260343 TC |
7261 | ;; unpredicated optab pattern for auto-vectorizer |
7262 | ;; The complex mla/mls operations always need to expand to two instructions. | |
7263 | ;; The first operation does half the computation and the second does the | |
7264 | ;; remainder. Because of this, expand early. | |
7265 | (define_expand "cml<fcmac1><conj_op><mode>4" | |
7266 | [(set (match_operand:SVE_FULL_F 0 "register_operand") | |
7267 | (unspec:SVE_FULL_F | |
7268 | [(match_dup 4) | |
7269 | (match_dup 5) | |
7270 | (match_operand:SVE_FULL_F 1 "register_operand") | |
7271 | (match_operand:SVE_FULL_F 2 "register_operand") | |
7272 | (match_operand:SVE_FULL_F 3 "register_operand")] | |
7273 | FCMLA_OP))] | |
7274 | "TARGET_SVE" | |
7275 | { | |
7276 | operands[4] = aarch64_ptrue_reg (<VPRED>mode); | |
7277 | operands[5] = gen_int_mode (SVE_RELAXED_GP, SImode); | |
7278 | rtx tmp = gen_reg_rtx (<MODE>mode); | |
7279 | emit_insn | |
7280 | (gen_aarch64_pred_fcmla<sve_rot1><mode> (tmp, operands[4], | |
7281 | operands[3], operands[2], | |
7282 | operands[1], operands[5])); | |
7283 | emit_insn | |
7284 | (gen_aarch64_pred_fcmla<sve_rot2><mode> (operands[0], operands[4], | |
7285 | operands[3], operands[2], | |
7286 | tmp, operands[5])); | |
7287 | DONE; | |
7288 | }) | |
7289 | ||
7290 | ;; unpredicated optab pattern for auto-vectorizer | |
7291 | ;; The complex mul operations always need to expand to two instructions. | |
7292 | ;; The first operation does half the computation and the second does the | |
7293 | ;; remainder. Because of this, expand early. | |
7294 | (define_expand "cmul<conj_op><mode>3" | |
7295 | [(set (match_operand:SVE_FULL_F 0 "register_operand") | |
7296 | (unspec:SVE_FULL_F | |
7297 | [(match_operand:SVE_FULL_F 1 "register_operand") | |
7298 | (match_operand:SVE_FULL_F 2 "register_operand")] | |
7299 | FCMUL_OP))] | |
7300 | "TARGET_SVE" | |
7301 | { | |
7302 | rtx pred_reg = aarch64_ptrue_reg (<VPRED>mode); | |
7303 | rtx gp_mode = gen_int_mode (SVE_RELAXED_GP, SImode); | |
7304 | rtx accum = force_reg (<MODE>mode, CONST0_RTX (<MODE>mode)); | |
7305 | rtx tmp = gen_reg_rtx (<MODE>mode); | |
7306 | emit_insn | |
7307 | (gen_aarch64_pred_fcmla<sve_rot1><mode> (tmp, pred_reg, | |
7308 | operands[2], operands[1], | |
7309 | accum, gp_mode)); | |
7310 | emit_insn | |
7311 | (gen_aarch64_pred_fcmla<sve_rot2><mode> (operands[0], pred_reg, | |
7312 | operands[2], operands[1], | |
7313 | tmp, gp_mode)); | |
7314 | DONE; | |
7315 | }) | |
7316 | ||
624d0f07 RS |
7317 | ;; Predicated FCMLA with merging. |
7318 | (define_expand "@cond_<optab><mode>" | |
f75cdd2c RS |
7319 | [(set (match_operand:SVE_FULL_F 0 "register_operand") |
7320 | (unspec:SVE_FULL_F | |
624d0f07 | 7321 | [(match_operand:<VPRED> 1 "register_operand") |
f75cdd2c | 7322 | (unspec:SVE_FULL_F |
624d0f07 RS |
7323 | [(match_dup 1) |
7324 | (const_int SVE_STRICT_GP) | |
f75cdd2c RS |
7325 | (match_operand:SVE_FULL_F 2 "register_operand") |
7326 | (match_operand:SVE_FULL_F 3 "register_operand") | |
7327 | (match_operand:SVE_FULL_F 4 "register_operand")] | |
624d0f07 | 7328 | SVE_COND_FCMLA) |
f75cdd2c | 7329 | (match_operand:SVE_FULL_F 5 "aarch64_simd_reg_or_zero")] |
624d0f07 RS |
7330 | UNSPEC_SEL))] |
7331 | "TARGET_SVE" | |
7332 | ) | |
7333 | ||
7334 | ;; Predicated FCMLA, merging with the third input. | |
0eb5e901 | 7335 | (define_insn_and_rewrite "*cond_<optab><mode>_4_relaxed" |
f75cdd2c RS |
7336 | [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w, ?&w") |
7337 | (unspec:SVE_FULL_F | |
624d0f07 | 7338 | [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl") |
f75cdd2c | 7339 | (unspec:SVE_FULL_F |
624d0f07 | 7340 | [(match_operand 5) |
0eb5e901 | 7341 | (const_int SVE_RELAXED_GP) |
f75cdd2c RS |
7342 | (match_operand:SVE_FULL_F 2 "register_operand" "w, w") |
7343 | (match_operand:SVE_FULL_F 3 "register_operand" "w, w") | |
7344 | (match_operand:SVE_FULL_F 4 "register_operand" "0, w")] | |
624d0f07 RS |
7345 | SVE_COND_FCMLA) |
7346 | (match_dup 4)] | |
7347 | UNSPEC_SEL))] | |
0eb5e901 | 7348 | "TARGET_SVE" |
624d0f07 RS |
7349 | "@ |
7350 | fcmla\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>, #<rot> | |
7351 | movprfx\t%0, %4\;fcmla\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>, #<rot>" | |
7352 | "&& !rtx_equal_p (operands[1], operands[5])" | |
7353 | { | |
7354 | operands[5] = copy_rtx (operands[1]); | |
7355 | } | |
7356 | [(set_attr "movprfx" "*,yes")] | |
7357 | ) | |
7358 | ||
0eb5e901 RS |
7359 | (define_insn "*cond_<optab><mode>_4_strict" |
7360 | [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w, ?&w") | |
7361 | (unspec:SVE_FULL_F | |
7362 | [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl") | |
7363 | (unspec:SVE_FULL_F | |
7364 | [(match_dup 1) | |
7365 | (const_int SVE_STRICT_GP) | |
7366 | (match_operand:SVE_FULL_F 2 "register_operand" "w, w") | |
7367 | (match_operand:SVE_FULL_F 3 "register_operand" "w, w") | |
7368 | (match_operand:SVE_FULL_F 4 "register_operand" "0, w")] | |
7369 | SVE_COND_FCMLA) | |
7370 | (match_dup 4)] | |
7371 | UNSPEC_SEL))] | |
7372 | "TARGET_SVE" | |
7373 | "@ | |
7374 | fcmla\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>, #<rot> | |
7375 | movprfx\t%0, %4\;fcmla\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>, #<rot>" | |
7376 | [(set_attr "movprfx" "*,yes")] | |
7377 | ) | |
7378 | ||
624d0f07 | 7379 | ;; Predicated FCMLA, merging with an independent value. |
0eb5e901 | 7380 | (define_insn_and_rewrite "*cond_<optab><mode>_any_relaxed" |
f75cdd2c RS |
7381 | [(set (match_operand:SVE_FULL_F 0 "register_operand" "=&w, &w, &w, ?&w") |
7382 | (unspec:SVE_FULL_F | |
624d0f07 | 7383 | [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl, Upl") |
f75cdd2c | 7384 | (unspec:SVE_FULL_F |
624d0f07 | 7385 | [(match_operand 6) |
0eb5e901 | 7386 | (const_int SVE_RELAXED_GP) |
f75cdd2c RS |
7387 | (match_operand:SVE_FULL_F 2 "register_operand" "w, w, w, w") |
7388 | (match_operand:SVE_FULL_F 3 "register_operand" "w, w, w, w") | |
7389 | (match_operand:SVE_FULL_F 4 "register_operand" "w, 0, w, w")] | |
624d0f07 | 7390 | SVE_COND_FCMLA) |
f75cdd2c | 7391 | (match_operand:SVE_FULL_F 5 "aarch64_simd_reg_or_zero" "Dz, Dz, 0, w")] |
624d0f07 | 7392 | UNSPEC_SEL))] |
0eb5e901 | 7393 | "TARGET_SVE && !rtx_equal_p (operands[4], operands[5])" |
624d0f07 RS |
7394 | "@ |
7395 | movprfx\t%0.<Vetype>, %1/z, %4.<Vetype>\;fcmla\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>, #<rot> | |
7396 | movprfx\t%0.<Vetype>, %1/z, %0.<Vetype>\;fcmla\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>, #<rot> | |
7397 | movprfx\t%0.<Vetype>, %1/m, %4.<Vetype>\;fcmla\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>, #<rot> | |
7398 | #" | |
7399 | "&& 1" | |
7400 | { | |
7401 | if (reload_completed | |
7402 | && register_operand (operands[5], <MODE>mode) | |
7403 | && !rtx_equal_p (operands[0], operands[5])) | |
7404 | { | |
7405 | emit_insn (gen_vcond_mask_<mode><vpred> (operands[0], operands[4], | |
7406 | operands[5], operands[1])); | |
7407 | operands[5] = operands[4] = operands[0]; | |
7408 | } | |
7409 | else if (!rtx_equal_p (operands[1], operands[6])) | |
7410 | operands[6] = copy_rtx (operands[1]); | |
7411 | else | |
7412 | FAIL; | |
7413 | } | |
7414 | [(set_attr "movprfx" "yes")] | |
7415 | ) | |
7416 | ||
0eb5e901 RS |
7417 | (define_insn_and_rewrite "*cond_<optab><mode>_any_strict" |
7418 | [(set (match_operand:SVE_FULL_F 0 "register_operand" "=&w, &w, &w, ?&w") | |
7419 | (unspec:SVE_FULL_F | |
7420 | [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl, Upl") | |
7421 | (unspec:SVE_FULL_F | |
7422 | [(match_dup 1) | |
7423 | (const_int SVE_STRICT_GP) | |
7424 | (match_operand:SVE_FULL_F 2 "register_operand" "w, w, w, w") | |
7425 | (match_operand:SVE_FULL_F 3 "register_operand" "w, w, w, w") | |
7426 | (match_operand:SVE_FULL_F 4 "register_operand" "w, 0, w, w")] | |
7427 | SVE_COND_FCMLA) | |
7428 | (match_operand:SVE_FULL_F 5 "aarch64_simd_reg_or_zero" "Dz, Dz, 0, w")] | |
7429 | UNSPEC_SEL))] | |
7430 | "TARGET_SVE && !rtx_equal_p (operands[4], operands[5])" | |
7431 | "@ | |
7432 | movprfx\t%0.<Vetype>, %1/z, %4.<Vetype>\;fcmla\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>, #<rot> | |
7433 | movprfx\t%0.<Vetype>, %1/z, %0.<Vetype>\;fcmla\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>, #<rot> | |
7434 | movprfx\t%0.<Vetype>, %1/m, %4.<Vetype>\;fcmla\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>, #<rot> | |
7435 | #" | |
7436 | "&& reload_completed | |
7437 | && register_operand (operands[5], <MODE>mode) | |
7438 | && !rtx_equal_p (operands[0], operands[5])" | |
7439 | { | |
7440 | emit_insn (gen_vcond_mask_<mode><vpred> (operands[0], operands[4], | |
7441 | operands[5], operands[1])); | |
7442 | operands[5] = operands[4] = operands[0]; | |
7443 | } | |
7444 | [(set_attr "movprfx" "yes")] | |
7445 | ) | |
7446 | ||
624d0f07 RS |
7447 | ;; Unpredicated FCMLA with indexing. |
7448 | (define_insn "@aarch64_<optab>_lane_<mode>" | |
f75cdd2c RS |
7449 | [(set (match_operand:SVE_FULL_HSF 0 "register_operand" "=w, ?&w") |
7450 | (unspec:SVE_FULL_HSF | |
7451 | [(match_operand:SVE_FULL_HSF 1 "register_operand" "w, w") | |
7452 | (unspec:SVE_FULL_HSF | |
7453 | [(match_operand:SVE_FULL_HSF 2 "register_operand" "<sve_lane_pair_con>, <sve_lane_pair_con>") | |
624d0f07 RS |
7454 | (match_operand:SI 3 "const_int_operand")] |
7455 | UNSPEC_SVE_LANE_SELECT) | |
f75cdd2c | 7456 | (match_operand:SVE_FULL_HSF 4 "register_operand" "0, w")] |
624d0f07 RS |
7457 | FCMLA))] |
7458 | "TARGET_SVE" | |
7459 | "@ | |
7460 | fcmla\t%0.<Vetype>, %1.<Vetype>, %2.<Vetype>[%3], #<rot> | |
7461 | movprfx\t%0, %4\;fcmla\t%0.<Vetype>, %1.<Vetype>, %2.<Vetype>[%3], #<rot>" | |
7462 | [(set_attr "movprfx" "*,yes")] | |
7463 | ) | |
7464 | ||
7465 | ;; ------------------------------------------------------------------------- | |
7466 | ;; ---- [FP] Trigonometric multiply-add | |
7467 | ;; ------------------------------------------------------------------------- | |
7468 | ;; Includes: | |
7469 | ;; - FTMAD | |
7470 | ;; ------------------------------------------------------------------------- | |
7471 | ||
7472 | (define_insn "@aarch64_sve_tmad<mode>" | |
f75cdd2c RS |
7473 | [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w, ?&w") |
7474 | (unspec:SVE_FULL_F | |
7475 | [(match_operand:SVE_FULL_F 1 "register_operand" "0, w") | |
7476 | (match_operand:SVE_FULL_F 2 "register_operand" "w, w") | |
7477 | (match_operand:DI 3 "const_int_operand")] | |
7478 | UNSPEC_FTMAD))] | |
624d0f07 RS |
7479 | "TARGET_SVE" |
7480 | "@ | |
7481 | ftmad\t%0.<Vetype>, %0.<Vetype>, %2.<Vetype>, #%3 | |
7482 | movprfx\t%0, %1\;ftmad\t%0.<Vetype>, %0.<Vetype>, %2.<Vetype>, #%3" | |
7483 | [(set_attr "movprfx" "*,yes")] | |
7484 | ) | |
7485 | ||
896dff99 RS |
7486 | ;; ------------------------------------------------------------------------- |
7487 | ;; ---- [FP] Bfloat16 long ternary arithmetic (SF,BF,BF) | |
7488 | ;; ------------------------------------------------------------------------- | |
7489 | ;; Includes: | |
7490 | ;; - BFDOT (BF16) | |
7491 | ;; - BFMLALB (BF16) | |
7492 | ;; - BFMLALT (BF16) | |
7493 | ;; - BFMMLA (BF16) | |
7494 | ;; ------------------------------------------------------------------------- | |
7495 | ||
7496 | (define_insn "@aarch64_sve_<sve_fp_op>vnx4sf" | |
7497 | [(set (match_operand:VNx4SF 0 "register_operand" "=w, ?&w") | |
7498 | (unspec:VNx4SF | |
7499 | [(match_operand:VNx4SF 1 "register_operand" "0, w") | |
7500 | (match_operand:VNx8BF 2 "register_operand" "w, w") | |
7501 | (match_operand:VNx8BF 3 "register_operand" "w, w")] | |
7502 | SVE_BFLOAT_TERNARY_LONG))] | |
7503 | "TARGET_SVE_BF16" | |
7504 | "@ | |
7505 | <sve_fp_op>\t%0.s, %2.h, %3.h | |
7506 | movprfx\t%0, %1\;<sve_fp_op>\t%0.s, %2.h, %3.h" | |
7507 | [(set_attr "movprfx" "*,yes")] | |
7508 | ) | |
7509 | ||
7510 | ;; The immediate range is enforced before generating the instruction. | |
7511 | (define_insn "@aarch64_sve_<sve_fp_op>_lanevnx4sf" | |
7512 | [(set (match_operand:VNx4SF 0 "register_operand" "=w, ?&w") | |
7513 | (unspec:VNx4SF | |
7514 | [(match_operand:VNx4SF 1 "register_operand" "0, w") | |
7515 | (match_operand:VNx8BF 2 "register_operand" "w, w") | |
7516 | (match_operand:VNx8BF 3 "register_operand" "y, y") | |
7517 | (match_operand:SI 4 "const_int_operand")] | |
7518 | SVE_BFLOAT_TERNARY_LONG_LANE))] | |
7519 | "TARGET_SVE_BF16" | |
7520 | "@ | |
7521 | <sve_fp_op>\t%0.s, %2.h, %3.h[%4] | |
7522 | movprfx\t%0, %1\;<sve_fp_op>\t%0.s, %2.h, %3.h[%4]" | |
7523 | [(set_attr "movprfx" "*,yes")] | |
7524 | ) | |
7525 | ||
36696774 RS |
7526 | ;; ------------------------------------------------------------------------- |
7527 | ;; ---- [FP] Matrix multiply-accumulate | |
7528 | ;; ------------------------------------------------------------------------- | |
7529 | ;; Includes: | |
7530 | ;; - FMMLA (F32MM,F64MM) | |
7531 | ;; ------------------------------------------------------------------------- | |
7532 | ||
7533 | ;; The mode iterator enforces the target requirements. | |
7534 | (define_insn "@aarch64_sve_<sve_fp_op><mode>" | |
7535 | [(set (match_operand:SVE_MATMULF 0 "register_operand" "=w, ?&w") | |
7536 | (unspec:SVE_MATMULF | |
7537 | [(match_operand:SVE_MATMULF 2 "register_operand" "w, w") | |
7538 | (match_operand:SVE_MATMULF 3 "register_operand" "w, w") | |
7539 | (match_operand:SVE_MATMULF 1 "register_operand" "0, w")] | |
7540 | FMMLA))] | |
7541 | "TARGET_SVE" | |
7542 | "@ | |
7543 | <sve_fp_op>\\t%0.<Vetype>, %2.<Vetype>, %3.<Vetype> | |
7544 | movprfx\t%0, %1\;<sve_fp_op>\\t%0.<Vetype>, %2.<Vetype>, %3.<Vetype>" | |
7545 | [(set_attr "movprfx" "*,yes")] | |
7546 | ) | |
7547 | ||
915d28fe RS |
7548 | ;; ========================================================================= |
7549 | ;; == Comparisons and selects | |
7550 | ;; ========================================================================= | |
7551 | ||
7552 | ;; ------------------------------------------------------------------------- | |
7553 | ;; ---- [INT,FP] Select based on predicates | |
7554 | ;; ------------------------------------------------------------------------- | |
7555 | ;; Includes merging patterns for: | |
d29f7dd5 | 7556 | ;; - FMOV |
915d28fe RS |
7557 | ;; - MOV |
7558 | ;; - SEL | |
7559 | ;; ------------------------------------------------------------------------- | |
7560 | ||
7561 | ;; vcond_mask operand order: true, false, mask | |
7562 | ;; UNSPEC_SEL operand order: mask, true, false (as for VEC_COND_EXPR) | |
7563 | ;; SEL operand order: mask, true, false | |
b1c9ec72 | 7564 | (define_expand "@vcond_mask_<mode><vpred>" |
46c705e7 RS |
7565 | [(set (match_operand:SVE_ALL 0 "register_operand") |
7566 | (unspec:SVE_ALL | |
d29f7dd5 | 7567 | [(match_operand:<VPRED> 3 "register_operand") |
46c705e7 RS |
7568 | (match_operand:SVE_ALL 1 "aarch64_sve_reg_or_dup_imm") |
7569 | (match_operand:SVE_ALL 2 "aarch64_simd_reg_or_zero")] | |
915d28fe RS |
7570 | UNSPEC_SEL))] |
7571 | "TARGET_SVE" | |
d29f7dd5 RS |
7572 | { |
7573 | if (register_operand (operands[1], <MODE>mode)) | |
7574 | operands[2] = force_reg (<MODE>mode, operands[2]); | |
7575 | } | |
915d28fe RS |
7576 | ) |
7577 | ||
d29f7dd5 RS |
7578 | ;; Selects between: |
7579 | ;; - two registers | |
7580 | ;; - a duplicated immediate and a register | |
7581 | ;; - a duplicated immediate and zero | |
46c705e7 RS |
7582 | ;; |
7583 | ;; For unpacked vectors, it doesn't really matter whether SEL uses the | |
7584 | ;; the container size or the element size. If SEL used the container size, | |
7585 | ;; it would ignore undefined bits of the predicate but would copy the | |
7586 | ;; upper (undefined) bits of each container along with the defined bits. | |
7587 | ;; If SEL used the element size, it would use undefined bits of the predicate | |
7588 | ;; to select between undefined elements in each input vector. Thus the only | |
7589 | ;; difference is whether the undefined bits in a container always come from | |
7590 | ;; the same input as the defined bits, or whether the choice can vary | |
7591 | ;; independently of the defined bits. | |
7592 | ;; | |
7593 | ;; For the other instructions, using the element size is more natural, | |
7594 | ;; so we do that for SEL as well. | |
d29f7dd5 | 7595 | (define_insn "*vcond_mask_<mode><vpred>" |
46c705e7 RS |
7596 | [(set (match_operand:SVE_ALL 0 "register_operand" "=w, w, w, w, ?w, ?&w, ?&w") |
7597 | (unspec:SVE_ALL | |
d29f7dd5 | 7598 | [(match_operand:<VPRED> 3 "register_operand" "Upa, Upa, Upa, Upa, Upl, Upl, Upl") |
46c705e7 RS |
7599 | (match_operand:SVE_ALL 1 "aarch64_sve_reg_or_dup_imm" "w, vss, vss, Ufc, Ufc, vss, Ufc") |
7600 | (match_operand:SVE_ALL 2 "aarch64_simd_reg_or_zero" "w, 0, Dz, 0, Dz, w, w")] | |
915d28fe | 7601 | UNSPEC_SEL))] |
d29f7dd5 RS |
7602 | "TARGET_SVE |
7603 | && (!register_operand (operands[1], <MODE>mode) | |
7604 | || register_operand (operands[2], <MODE>mode))" | |
7605 | "@ | |
7606 | sel\t%0.<Vetype>, %3, %1.<Vetype>, %2.<Vetype> | |
7607 | mov\t%0.<Vetype>, %3/m, #%I1 | |
7608 | mov\t%0.<Vetype>, %3/z, #%I1 | |
7609 | fmov\t%0.<Vetype>, %3/m, #%1 | |
7610 | movprfx\t%0.<Vetype>, %3/z, %0.<Vetype>\;fmov\t%0.<Vetype>, %3/m, #%1 | |
7611 | movprfx\t%0, %2\;mov\t%0.<Vetype>, %3/m, #%I1 | |
7612 | movprfx\t%0, %2\;fmov\t%0.<Vetype>, %3/m, #%1" | |
7613 | [(set_attr "movprfx" "*,*,*,*,yes,yes,yes")] | |
43cacb12 RS |
7614 | ) |
7615 | ||
88a37c4d RS |
7616 | ;; Optimize selects between a duplicated scalar variable and another vector, |
7617 | ;; the latter of which can be a zero constant or a variable. Treat duplicates | |
7618 | ;; of GPRs as being more expensive than duplicates of FPRs, since they | |
7619 | ;; involve a cross-file move. | |
624d0f07 | 7620 | (define_insn "@aarch64_sel_dup<mode>" |
46c705e7 RS |
7621 | [(set (match_operand:SVE_ALL 0 "register_operand" "=?w, w, ??w, ?&w, ??&w, ?&w") |
7622 | (unspec:SVE_ALL | |
3c2707f3 | 7623 | [(match_operand:<VPRED> 3 "register_operand" "Upl, Upl, Upl, Upl, Upl, Upl") |
46c705e7 | 7624 | (vec_duplicate:SVE_ALL |
88a37c4d | 7625 | (match_operand:<VEL> 1 "register_operand" "r, w, r, w, r, w")) |
46c705e7 | 7626 | (match_operand:SVE_ALL 2 "aarch64_simd_reg_or_zero" "0, 0, Dz, Dz, w, w")] |
88a37c4d RS |
7627 | UNSPEC_SEL))] |
7628 | "TARGET_SVE" | |
7629 | "@ | |
7630 | mov\t%0.<Vetype>, %3/m, %<vwcore>1 | |
7631 | mov\t%0.<Vetype>, %3/m, %<Vetype>1 | |
7632 | movprfx\t%0.<Vetype>, %3/z, %0.<Vetype>\;mov\t%0.<Vetype>, %3/m, %<vwcore>1 | |
7633 | movprfx\t%0.<Vetype>, %3/z, %0.<Vetype>\;mov\t%0.<Vetype>, %3/m, %<Vetype>1 | |
7634 | movprfx\t%0, %2\;mov\t%0.<Vetype>, %3/m, %<vwcore>1 | |
7635 | movprfx\t%0, %2\;mov\t%0.<Vetype>, %3/m, %<Vetype>1" | |
7636 | [(set_attr "movprfx" "*,*,yes,yes,yes,yes")] | |
7637 | ) | |
7638 | ||
915d28fe RS |
7639 | ;; ------------------------------------------------------------------------- |
7640 | ;; ---- [INT,FP] Compare and select | |
7641 | ;; ------------------------------------------------------------------------- | |
7642 | ;; The patterns in this section are synthetic. | |
7643 | ;; ------------------------------------------------------------------------- | |
43cacb12 | 7644 | |
915d28fe RS |
7645 | ;; Integer (signed) vcond. Don't enforce an immediate range here, since it |
7646 | ;; depends on the comparison; leave it to aarch64_expand_sve_vcond instead. | |
46c705e7 RS |
7647 | (define_expand "vcond<SVE_ALL:mode><SVE_I:mode>" |
7648 | [(set (match_operand:SVE_ALL 0 "register_operand") | |
7649 | (if_then_else:SVE_ALL | |
915d28fe | 7650 | (match_operator 3 "comparison_operator" |
46c705e7 RS |
7651 | [(match_operand:SVE_I 4 "register_operand") |
7652 | (match_operand:SVE_I 5 "nonmemory_operand")]) | |
7653 | (match_operand:SVE_ALL 1 "nonmemory_operand") | |
7654 | (match_operand:SVE_ALL 2 "nonmemory_operand")))] | |
7655 | "TARGET_SVE && <SVE_ALL:container_bits> == <SVE_I:container_bits>" | |
898f07b0 | 7656 | { |
46c705e7 | 7657 | aarch64_expand_sve_vcond (<SVE_ALL:MODE>mode, <SVE_I:MODE>mode, operands); |
915d28fe | 7658 | DONE; |
898f07b0 RS |
7659 | } |
7660 | ) | |
7661 | ||
915d28fe RS |
7662 | ;; Integer vcondu. Don't enforce an immediate range here, since it |
7663 | ;; depends on the comparison; leave it to aarch64_expand_sve_vcond instead. | |
46c705e7 RS |
7664 | (define_expand "vcondu<SVE_ALL:mode><SVE_I:mode>" |
7665 | [(set (match_operand:SVE_ALL 0 "register_operand") | |
7666 | (if_then_else:SVE_ALL | |
915d28fe | 7667 | (match_operator 3 "comparison_operator" |
46c705e7 RS |
7668 | [(match_operand:SVE_I 4 "register_operand") |
7669 | (match_operand:SVE_I 5 "nonmemory_operand")]) | |
7670 | (match_operand:SVE_ALL 1 "nonmemory_operand") | |
7671 | (match_operand:SVE_ALL 2 "nonmemory_operand")))] | |
7672 | "TARGET_SVE && <SVE_ALL:container_bits> == <SVE_I:container_bits>" | |
915d28fe | 7673 | { |
46c705e7 | 7674 | aarch64_expand_sve_vcond (<SVE_ALL:MODE>mode, <SVE_I:MODE>mode, operands); |
915d28fe RS |
7675 | DONE; |
7676 | } | |
898f07b0 RS |
7677 | ) |
7678 | ||
915d28fe RS |
7679 | ;; Floating-point vcond. All comparisons except FCMUO allow a zero operand; |
7680 | ;; aarch64_expand_sve_vcond handles the case of an FCMUO with zero. | |
7681 | (define_expand "vcond<mode><v_fp_equiv>" | |
f75cdd2c RS |
7682 | [(set (match_operand:SVE_FULL_HSD 0 "register_operand") |
7683 | (if_then_else:SVE_FULL_HSD | |
915d28fe RS |
7684 | (match_operator 3 "comparison_operator" |
7685 | [(match_operand:<V_FP_EQUIV> 4 "register_operand") | |
7686 | (match_operand:<V_FP_EQUIV> 5 "aarch64_simd_reg_or_zero")]) | |
f75cdd2c RS |
7687 | (match_operand:SVE_FULL_HSD 1 "nonmemory_operand") |
7688 | (match_operand:SVE_FULL_HSD 2 "nonmemory_operand")))] | |
b781a135 RS |
7689 | "TARGET_SVE" |
7690 | { | |
915d28fe RS |
7691 | aarch64_expand_sve_vcond (<MODE>mode, <V_FP_EQUIV>mode, operands); |
7692 | DONE; | |
b781a135 RS |
7693 | } |
7694 | ) | |
7695 | ||
915d28fe RS |
7696 | ;; ------------------------------------------------------------------------- |
7697 | ;; ---- [INT] Comparisons | |
7698 | ;; ------------------------------------------------------------------------- | |
624d0f07 | 7699 | ;; Includes: |
915d28fe RS |
7700 | ;; - CMPEQ |
7701 | ;; - CMPGE | |
7702 | ;; - CMPGT | |
7703 | ;; - CMPHI | |
7704 | ;; - CMPHS | |
7705 | ;; - CMPLE | |
7706 | ;; - CMPLO | |
7707 | ;; - CMPLS | |
7708 | ;; - CMPLT | |
7709 | ;; - CMPNE | |
7710 | ;; ------------------------------------------------------------------------- | |
b781a135 | 7711 | |
915d28fe RS |
7712 | ;; Signed integer comparisons. Don't enforce an immediate range here, since |
7713 | ;; it depends on the comparison; leave it to aarch64_expand_sve_vec_cmp_int | |
7714 | ;; instead. | |
7715 | (define_expand "vec_cmp<mode><vpred>" | |
7716 | [(parallel | |
7717 | [(set (match_operand:<VPRED> 0 "register_operand") | |
7718 | (match_operator:<VPRED> 1 "comparison_operator" | |
46c705e7 RS |
7719 | [(match_operand:SVE_I 2 "register_operand") |
7720 | (match_operand:SVE_I 3 "nonmemory_operand")])) | |
915d28fe | 7721 | (clobber (reg:CC_NZC CC_REGNUM))])] |
b781a135 | 7722 | "TARGET_SVE" |
915d28fe RS |
7723 | { |
7724 | aarch64_expand_sve_vec_cmp_int (operands[0], GET_CODE (operands[1]), | |
7725 | operands[2], operands[3]); | |
7726 | DONE; | |
7727 | } | |
b781a135 RS |
7728 | ) |
7729 | ||
915d28fe RS |
7730 | ;; Unsigned integer comparisons. Don't enforce an immediate range here, since |
7731 | ;; it depends on the comparison; leave it to aarch64_expand_sve_vec_cmp_int | |
7732 | ;; instead. | |
7733 | (define_expand "vec_cmpu<mode><vpred>" | |
7734 | [(parallel | |
7735 | [(set (match_operand:<VPRED> 0 "register_operand") | |
7736 | (match_operator:<VPRED> 1 "comparison_operator" | |
46c705e7 RS |
7737 | [(match_operand:SVE_I 2 "register_operand") |
7738 | (match_operand:SVE_I 3 "nonmemory_operand")])) | |
915d28fe | 7739 | (clobber (reg:CC_NZC CC_REGNUM))])] |
43cacb12 RS |
7740 | "TARGET_SVE" |
7741 | { | |
915d28fe RS |
7742 | aarch64_expand_sve_vec_cmp_int (operands[0], GET_CODE (operands[1]), |
7743 | operands[2], operands[3]); | |
7744 | DONE; | |
43cacb12 RS |
7745 | } |
7746 | ) | |
7747 | ||
00fa90d9 | 7748 | ;; Predicated integer comparisons. |
46c705e7 RS |
7749 | ;; |
7750 | ;; For unpacked vectors, only the lowpart element in each input container | |
7751 | ;; has a defined value, and only the predicate bits associated with | |
7752 | ;; those elements are defined. For example, when comparing two VNx2SIs: | |
7753 | ;; | |
7754 | ;; - The VNx2SIs can be seem as VNx2DIs in which the low halves of each | |
7755 | ;; DI container store an SI element. The upper bits of each DI container | |
7756 | ;; are undefined. | |
7757 | ;; | |
7758 | ;; - Alternatively, the VNx2SIs can be seen as VNx4SIs in which the | |
7759 | ;; even elements are defined and the odd elements are undefined. | |
7760 | ;; | |
7761 | ;; - The associated predicate mode is VNx2BI. This means that only the | |
7762 | ;; low bit in each predicate byte is defined (on input and on output). | |
7763 | ;; | |
7764 | ;; - We use a .s comparison to compare VNx2SIs, under the control of a | |
7765 | ;; VNx2BI governing predicate, to produce a VNx2BI result. If we view | |
7766 | ;; the .s operation as operating on VNx4SIs then for odd lanes: | |
7767 | ;; | |
7768 | ;; - the input governing predicate bit is undefined | |
7769 | ;; - the SI elements being compared are undefined | |
7770 | ;; - the predicate result bit is therefore undefined, but | |
7771 | ;; - the predicate result bit is in the undefined part of a VNx2BI, | |
7772 | ;; so its value doesn't matter anyway. | |
00fa90d9 | 7773 | (define_insn "@aarch64_pred_cmp<cmp_op><mode>" |
915d28fe RS |
7774 | [(set (match_operand:<VPRED> 0 "register_operand" "=Upa, Upa") |
7775 | (unspec:<VPRED> | |
7776 | [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl") | |
00fa90d9 | 7777 | (match_operand:SI 2 "aarch64_sve_ptrue_flag") |
915d28fe | 7778 | (SVE_INT_CMP:<VPRED> |
46c705e7 RS |
7779 | (match_operand:SVE_I 3 "register_operand" "w, w") |
7780 | (match_operand:SVE_I 4 "aarch64_sve_cmp_<sve_imm_con>_operand" "<sve_imm_con>, w"))] | |
00fa90d9 | 7781 | UNSPEC_PRED_Z)) |
915d28fe | 7782 | (clobber (reg:CC_NZC CC_REGNUM))] |
43cacb12 RS |
7783 | "TARGET_SVE" |
7784 | "@ | |
00fa90d9 RS |
7785 | cmp<cmp_op>\t%0.<Vetype>, %1/z, %3.<Vetype>, #%4 |
7786 | cmp<cmp_op>\t%0.<Vetype>, %1/z, %3.<Vetype>, %4.<Vetype>" | |
43cacb12 RS |
7787 | ) |
7788 | ||
00fa90d9 RS |
7789 | ;; Predicated integer comparisons in which both the flag and predicate |
7790 | ;; results are interesting. | |
7791 | (define_insn_and_rewrite "*cmp<cmp_op><mode>_cc" | |
915d28fe RS |
7792 | [(set (reg:CC_NZC CC_REGNUM) |
7793 | (unspec:CC_NZC | |
34467289 RS |
7794 | [(match_operand:VNx16BI 1 "register_operand" "Upl, Upl") |
7795 | (match_operand 4) | |
7796 | (match_operand:SI 5 "aarch64_sve_ptrue_flag") | |
915d28fe | 7797 | (unspec:<VPRED> |
00fa90d9 RS |
7798 | [(match_operand 6) |
7799 | (match_operand:SI 7 "aarch64_sve_ptrue_flag") | |
915d28fe | 7800 | (SVE_INT_CMP:<VPRED> |
46c705e7 RS |
7801 | (match_operand:SVE_I 2 "register_operand" "w, w") |
7802 | (match_operand:SVE_I 3 "aarch64_sve_cmp_<sve_imm_con>_operand" "<sve_imm_con>, w"))] | |
00fa90d9 | 7803 | UNSPEC_PRED_Z)] |
34467289 | 7804 | UNSPEC_PTEST)) |
915d28fe RS |
7805 | (set (match_operand:<VPRED> 0 "register_operand" "=Upa, Upa") |
7806 | (unspec:<VPRED> | |
00fa90d9 RS |
7807 | [(match_dup 6) |
7808 | (match_dup 7) | |
915d28fe RS |
7809 | (SVE_INT_CMP:<VPRED> |
7810 | (match_dup 2) | |
7811 | (match_dup 3))] | |
00fa90d9 RS |
7812 | UNSPEC_PRED_Z))] |
7813 | "TARGET_SVE | |
7814 | && aarch64_sve_same_pred_for_ptest_p (&operands[4], &operands[6])" | |
915d28fe RS |
7815 | "@ |
7816 | cmp<cmp_op>\t%0.<Vetype>, %1/z, %2.<Vetype>, #%3 | |
7817 | cmp<cmp_op>\t%0.<Vetype>, %1/z, %2.<Vetype>, %3.<Vetype>" | |
00fa90d9 RS |
7818 | "&& !rtx_equal_p (operands[4], operands[6])" |
7819 | { | |
7820 | operands[6] = copy_rtx (operands[4]); | |
7821 | operands[7] = operands[5]; | |
7822 | } | |
43cacb12 RS |
7823 | ) |
7824 | ||
00fa90d9 RS |
7825 | ;; Predicated integer comparisons in which only the flags result is |
7826 | ;; interesting. | |
7827 | (define_insn_and_rewrite "*cmp<cmp_op><mode>_ptest" | |
915d28fe RS |
7828 | [(set (reg:CC_NZC CC_REGNUM) |
7829 | (unspec:CC_NZC | |
34467289 RS |
7830 | [(match_operand:VNx16BI 1 "register_operand" "Upl, Upl") |
7831 | (match_operand 4) | |
7832 | (match_operand:SI 5 "aarch64_sve_ptrue_flag") | |
915d28fe | 7833 | (unspec:<VPRED> |
00fa90d9 RS |
7834 | [(match_operand 6) |
7835 | (match_operand:SI 7 "aarch64_sve_ptrue_flag") | |
915d28fe | 7836 | (SVE_INT_CMP:<VPRED> |
46c705e7 RS |
7837 | (match_operand:SVE_I 2 "register_operand" "w, w") |
7838 | (match_operand:SVE_I 3 "aarch64_sve_cmp_<sve_imm_con>_operand" "<sve_imm_con>, w"))] | |
00fa90d9 | 7839 | UNSPEC_PRED_Z)] |
34467289 | 7840 | UNSPEC_PTEST)) |
915d28fe | 7841 | (clobber (match_scratch:<VPRED> 0 "=Upa, Upa"))] |
00fa90d9 RS |
7842 | "TARGET_SVE |
7843 | && aarch64_sve_same_pred_for_ptest_p (&operands[4], &operands[6])" | |
43cacb12 | 7844 | "@ |
915d28fe RS |
7845 | cmp<cmp_op>\t%0.<Vetype>, %1/z, %2.<Vetype>, #%3 |
7846 | cmp<cmp_op>\t%0.<Vetype>, %1/z, %2.<Vetype>, %3.<Vetype>" | |
00fa90d9 RS |
7847 | "&& !rtx_equal_p (operands[4], operands[6])" |
7848 | { | |
7849 | operands[6] = copy_rtx (operands[4]); | |
7850 | operands[7] = operands[5]; | |
7851 | } | |
43cacb12 RS |
7852 | ) |
7853 | ||
915d28fe RS |
7854 | ;; Predicated integer comparisons, formed by combining a PTRUE-predicated |
7855 | ;; comparison with an AND. Split the instruction into its preferred form | |
00fa90d9 RS |
7856 | ;; at the earliest opportunity, in order to get rid of the redundant |
7857 | ;; operand 4. | |
7858 | (define_insn_and_split "*cmp<cmp_op><mode>_and" | |
915d28fe | 7859 | [(set (match_operand:<VPRED> 0 "register_operand" "=Upa, Upa") |
00fa90d9 RS |
7860 | (and:<VPRED> |
7861 | (unspec:<VPRED> | |
7862 | [(match_operand 4) | |
7863 | (const_int SVE_KNOWN_PTRUE) | |
7864 | (SVE_INT_CMP:<VPRED> | |
46c705e7 RS |
7865 | (match_operand:SVE_I 2 "register_operand" "w, w") |
7866 | (match_operand:SVE_I 3 "aarch64_sve_cmp_<sve_imm_con>_operand" "<sve_imm_con>, w"))] | |
00fa90d9 RS |
7867 | UNSPEC_PRED_Z) |
7868 | (match_operand:<VPRED> 1 "register_operand" "Upl, Upl"))) | |
915d28fe RS |
7869 | (clobber (reg:CC_NZC CC_REGNUM))] |
7870 | "TARGET_SVE" | |
7871 | "#" | |
7872 | "&& 1" | |
7873 | [(parallel | |
7874 | [(set (match_dup 0) | |
00fa90d9 RS |
7875 | (unspec:<VPRED> |
7876 | [(match_dup 1) | |
7877 | (const_int SVE_MAYBE_NOT_PTRUE) | |
7878 | (SVE_INT_CMP:<VPRED> | |
7879 | (match_dup 2) | |
7880 | (match_dup 3))] | |
7881 | UNSPEC_PRED_Z)) | |
915d28fe | 7882 | (clobber (reg:CC_NZC CC_REGNUM))])] |
43cacb12 RS |
7883 | ) |
7884 | ||
624d0f07 RS |
7885 | ;; Predicated integer wide comparisons. |
7886 | (define_insn "@aarch64_pred_cmp<cmp_op><mode>_wide" | |
7887 | [(set (match_operand:<VPRED> 0 "register_operand" "=Upa") | |
7888 | (unspec:<VPRED> | |
7889 | [(match_operand:VNx16BI 1 "register_operand" "Upl") | |
7890 | (match_operand:SI 2 "aarch64_sve_ptrue_flag") | |
7891 | (unspec:<VPRED> | |
f75cdd2c | 7892 | [(match_operand:SVE_FULL_BHSI 3 "register_operand" "w") |
624d0f07 RS |
7893 | (match_operand:VNx2DI 4 "register_operand" "w")] |
7894 | SVE_COND_INT_CMP_WIDE)] | |
7895 | UNSPEC_PRED_Z)) | |
915d28fe | 7896 | (clobber (reg:CC_NZC CC_REGNUM))] |
43cacb12 | 7897 | "TARGET_SVE" |
624d0f07 | 7898 | "cmp<cmp_op>\t%0.<Vetype>, %1/z, %3.<Vetype>, %4.d" |
43cacb12 RS |
7899 | ) |
7900 | ||
624d0f07 RS |
7901 | ;; Predicated integer wide comparisons in which both the flag and |
7902 | ;; predicate results are interesting. | |
7903 | (define_insn "*aarch64_pred_cmp<cmp_op><mode>_wide_cc" | |
915d28fe RS |
7904 | [(set (reg:CC_NZC CC_REGNUM) |
7905 | (unspec:CC_NZC | |
624d0f07 | 7906 | [(match_operand:VNx16BI 1 "register_operand" "Upl") |
34467289 | 7907 | (match_operand 4) |
624d0f07 RS |
7908 | (match_operand:SI 5 "aarch64_sve_ptrue_flag") |
7909 | (unspec:<VPRED> | |
7910 | [(match_operand:VNx16BI 6 "register_operand" "Upl") | |
7911 | (match_operand:SI 7 "aarch64_sve_ptrue_flag") | |
7912 | (unspec:<VPRED> | |
f75cdd2c | 7913 | [(match_operand:SVE_FULL_BHSI 2 "register_operand" "w") |
624d0f07 RS |
7914 | (match_operand:VNx2DI 3 "register_operand" "w")] |
7915 | SVE_COND_INT_CMP_WIDE)] | |
7916 | UNSPEC_PRED_Z)] | |
34467289 | 7917 | UNSPEC_PTEST)) |
624d0f07 RS |
7918 | (set (match_operand:<VPRED> 0 "register_operand" "=Upa") |
7919 | (unspec:<VPRED> | |
7920 | [(match_dup 6) | |
7921 | (match_dup 7) | |
7922 | (unspec:<VPRED> | |
7923 | [(match_dup 2) | |
7924 | (match_dup 3)] | |
7925 | SVE_COND_INT_CMP_WIDE)] | |
7926 | UNSPEC_PRED_Z))] | |
7927 | "TARGET_SVE | |
7928 | && aarch64_sve_same_pred_for_ptest_p (&operands[4], &operands[6])" | |
7929 | "cmp<cmp_op>\t%0.<Vetype>, %1/z, %2.<Vetype>, %3.d" | |
7930 | ) | |
7931 | ||
7932 | ;; Predicated integer wide comparisons in which only the flags result | |
7933 | ;; is interesting. | |
7934 | (define_insn "*aarch64_pred_cmp<cmp_op><mode>_wide_ptest" | |
7935 | [(set (reg:CC_NZC CC_REGNUM) | |
7936 | (unspec:CC_NZC | |
7937 | [(match_operand:VNx16BI 1 "register_operand" "Upl") | |
7938 | (match_operand 4) | |
7939 | (match_operand:SI 5 "aarch64_sve_ptrue_flag") | |
7940 | (unspec:<VPRED> | |
7941 | [(match_operand:VNx16BI 6 "register_operand" "Upl") | |
7942 | (match_operand:SI 7 "aarch64_sve_ptrue_flag") | |
7943 | (unspec:<VPRED> | |
f75cdd2c | 7944 | [(match_operand:SVE_FULL_BHSI 2 "register_operand" "w") |
624d0f07 RS |
7945 | (match_operand:VNx2DI 3 "register_operand" "w")] |
7946 | SVE_COND_INT_CMP_WIDE)] | |
7947 | UNSPEC_PRED_Z)] | |
7948 | UNSPEC_PTEST)) | |
7949 | (clobber (match_scratch:<VPRED> 0 "=Upa"))] | |
7950 | "TARGET_SVE | |
7951 | && aarch64_sve_same_pred_for_ptest_p (&operands[4], &operands[6])" | |
7952 | "cmp<cmp_op>\t%0.<Vetype>, %1/z, %2.<Vetype>, %3.d" | |
7953 | ) | |
7954 | ||
7955 | ;; ------------------------------------------------------------------------- | |
7956 | ;; ---- [INT] While tests | |
7957 | ;; ------------------------------------------------------------------------- | |
7958 | ;; Includes: | |
0a09a948 RS |
7959 | ;; - WHILEGE (SVE2) |
7960 | ;; - WHILEGT (SVE2) | |
7961 | ;; - WHILEHI (SVE2) | |
7962 | ;; - WHILEHS (SVE2) | |
624d0f07 RS |
7963 | ;; - WHILELE |
7964 | ;; - WHILELO | |
7965 | ;; - WHILELS | |
7966 | ;; - WHILELT | |
bad5e58a RS |
7967 | ;; - WHILERW (SVE2) |
7968 | ;; - WHILEWR (SVE2) | |
624d0f07 RS |
7969 | ;; ------------------------------------------------------------------------- |
7970 | ||
7971 | ;; Set element I of the result if (cmp (plus operand1 J) operand2) is | |
7972 | ;; true for all J in [0, I]. | |
7973 | (define_insn "@while_<while_optab_cmp><GPI:mode><PRED_ALL:mode>" | |
7974 | [(set (match_operand:PRED_ALL 0 "register_operand" "=Upa") | |
7975 | (unspec:PRED_ALL [(match_operand:GPI 1 "aarch64_reg_or_zero" "rZ") | |
7976 | (match_operand:GPI 2 "aarch64_reg_or_zero" "rZ")] | |
7977 | SVE_WHILE)) | |
7978 | (clobber (reg:CC_NZC CC_REGNUM))] | |
7979 | "TARGET_SVE" | |
7980 | "while<cmp_op>\t%0.<PRED_ALL:Vetype>, %<w>1, %<w>2" | |
7981 | ) | |
7982 | ||
7983 | ;; The WHILE instructions set the flags in the same way as a PTEST with | |
7984 | ;; a PTRUE GP. Handle the case in which both results are useful. The GP | |
7985 | ;; operands to the PTEST aren't needed, so we allow them to be anything. | |
7986 | (define_insn_and_rewrite "*while_<while_optab_cmp><GPI:mode><PRED_ALL:mode>_cc" | |
7987 | [(set (reg:CC_NZC CC_REGNUM) | |
7988 | (unspec:CC_NZC | |
7989 | [(match_operand 3) | |
7990 | (match_operand 4) | |
7991 | (const_int SVE_KNOWN_PTRUE) | |
7992 | (unspec:PRED_ALL | |
7993 | [(match_operand:GPI 1 "aarch64_reg_or_zero" "rZ") | |
7994 | (match_operand:GPI 2 "aarch64_reg_or_zero" "rZ")] | |
7995 | SVE_WHILE)] | |
7996 | UNSPEC_PTEST)) | |
7997 | (set (match_operand:PRED_ALL 0 "register_operand" "=Upa") | |
7998 | (unspec:PRED_ALL [(match_dup 1) | |
7999 | (match_dup 2)] | |
8000 | SVE_WHILE))] | |
8001 | "TARGET_SVE" | |
8002 | "while<cmp_op>\t%0.<PRED_ALL:Vetype>, %<w>1, %<w>2" | |
8003 | ;; Force the compiler to drop the unused predicate operand, so that we | |
8004 | ;; don't have an unnecessary PTRUE. | |
8005 | "&& (!CONSTANT_P (operands[3]) || !CONSTANT_P (operands[4]))" | |
8006 | { | |
8007 | operands[3] = CONSTM1_RTX (VNx16BImode); | |
8008 | operands[4] = CONSTM1_RTX (<PRED_ALL:MODE>mode); | |
8009 | } | |
8010 | ) | |
8011 | ||
8012 | ;; Same, but handle the case in which only the flags result is useful. | |
bad5e58a | 8013 | (define_insn_and_rewrite "@while_<while_optab_cmp><GPI:mode><PRED_ALL:mode>_ptest" |
624d0f07 RS |
8014 | [(set (reg:CC_NZC CC_REGNUM) |
8015 | (unspec:CC_NZC | |
8016 | [(match_operand 3) | |
8017 | (match_operand 4) | |
8018 | (const_int SVE_KNOWN_PTRUE) | |
8019 | (unspec:PRED_ALL | |
8020 | [(match_operand:GPI 1 "aarch64_reg_or_zero" "rZ") | |
8021 | (match_operand:GPI 2 "aarch64_reg_or_zero" "rZ")] | |
8022 | SVE_WHILE)] | |
8023 | UNSPEC_PTEST)) | |
8024 | (clobber (match_scratch:PRED_ALL 0 "=Upa"))] | |
8025 | "TARGET_SVE" | |
8026 | "while<cmp_op>\t%0.<PRED_ALL:Vetype>, %<w>1, %<w>2" | |
8027 | ;; Force the compiler to drop the unused predicate operand, so that we | |
8028 | ;; don't have an unnecessary PTRUE. | |
8029 | "&& (!CONSTANT_P (operands[3]) || !CONSTANT_P (operands[4]))" | |
8030 | { | |
8031 | operands[3] = CONSTM1_RTX (VNx16BImode); | |
8032 | operands[4] = CONSTM1_RTX (<PRED_ALL:MODE>mode); | |
8033 | } | |
8034 | ) | |
8035 | ||
915d28fe | 8036 | ;; ------------------------------------------------------------------------- |
42b4e87d | 8037 | ;; ---- [FP] Direct comparisons |
915d28fe RS |
8038 | ;; ------------------------------------------------------------------------- |
8039 | ;; Includes: | |
8040 | ;; - FCMEQ | |
8041 | ;; - FCMGE | |
8042 | ;; - FCMGT | |
8043 | ;; - FCMLE | |
8044 | ;; - FCMLT | |
8045 | ;; - FCMNE | |
8046 | ;; - FCMUO | |
8047 | ;; ------------------------------------------------------------------------- | |
8048 | ||
8049 | ;; Floating-point comparisons. All comparisons except FCMUO allow a zero | |
8050 | ;; operand; aarch64_expand_sve_vec_cmp_float handles the case of an FCMUO | |
8051 | ;; with zero. | |
8052 | (define_expand "vec_cmp<mode><vpred>" | |
8053 | [(set (match_operand:<VPRED> 0 "register_operand") | |
8054 | (match_operator:<VPRED> 1 "comparison_operator" | |
f75cdd2c RS |
8055 | [(match_operand:SVE_FULL_F 2 "register_operand") |
8056 | (match_operand:SVE_FULL_F 3 "aarch64_simd_reg_or_zero")]))] | |
43cacb12 RS |
8057 | "TARGET_SVE" |
8058 | { | |
915d28fe RS |
8059 | aarch64_expand_sve_vec_cmp_float (operands[0], GET_CODE (operands[1]), |
8060 | operands[2], operands[3], false); | |
8061 | DONE; | |
43cacb12 RS |
8062 | } |
8063 | ) | |
8064 | ||
4a942af6 | 8065 | ;; Predicated floating-point comparisons. |
624d0f07 | 8066 | (define_insn "@aarch64_pred_fcm<cmp_op><mode>" |
915d28fe RS |
8067 | [(set (match_operand:<VPRED> 0 "register_operand" "=Upa, Upa") |
8068 | (unspec:<VPRED> | |
8069 | [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl") | |
624d0f07 | 8070 | (match_operand:SI 2 "aarch64_sve_ptrue_flag") |
f75cdd2c RS |
8071 | (match_operand:SVE_FULL_F 3 "register_operand" "w, w") |
8072 | (match_operand:SVE_FULL_F 4 "aarch64_simd_reg_or_zero" "Dz, w")] | |
4a942af6 | 8073 | SVE_COND_FP_CMP_I0))] |
43cacb12 RS |
8074 | "TARGET_SVE" |
8075 | "@ | |
624d0f07 RS |
8076 | fcm<cmp_op>\t%0.<Vetype>, %1/z, %3.<Vetype>, #0.0 |
8077 | fcm<cmp_op>\t%0.<Vetype>, %1/z, %3.<Vetype>, %4.<Vetype>" | |
43cacb12 RS |
8078 | ) |
8079 | ||
915d28fe | 8080 | ;; Same for unordered comparisons. |
624d0f07 | 8081 | (define_insn "@aarch64_pred_fcmuo<mode>" |
915d28fe RS |
8082 | [(set (match_operand:<VPRED> 0 "register_operand" "=Upa") |
8083 | (unspec:<VPRED> | |
8084 | [(match_operand:<VPRED> 1 "register_operand" "Upl") | |
624d0f07 | 8085 | (match_operand:SI 2 "aarch64_sve_ptrue_flag") |
f75cdd2c RS |
8086 | (match_operand:SVE_FULL_F 3 "register_operand" "w") |
8087 | (match_operand:SVE_FULL_F 4 "register_operand" "w")] | |
4a942af6 | 8088 | UNSPEC_COND_FCMUO))] |
43cacb12 | 8089 | "TARGET_SVE" |
624d0f07 | 8090 | "fcmuo\t%0.<Vetype>, %1/z, %3.<Vetype>, %4.<Vetype>" |
43cacb12 RS |
8091 | ) |
8092 | ||
915d28fe RS |
8093 | ;; Floating-point comparisons predicated on a PTRUE, with the results ANDed |
8094 | ;; with another predicate P. This does not have the same trapping behavior | |
8095 | ;; as predicating the comparison itself on P, but it's a legitimate fold, | |
8096 | ;; since we can drop any potentially-trapping operations whose results | |
8097 | ;; are not needed. | |
8098 | ;; | |
8099 | ;; Split the instruction into its preferred form (below) at the earliest | |
8100 | ;; opportunity, in order to get rid of the redundant operand 1. | |
8101 | (define_insn_and_split "*fcm<cmp_op><mode>_and_combine" | |
8102 | [(set (match_operand:<VPRED> 0 "register_operand" "=Upa, Upa") | |
8103 | (and:<VPRED> | |
8104 | (unspec:<VPRED> | |
8105 | [(match_operand:<VPRED> 1) | |
4a942af6 | 8106 | (const_int SVE_KNOWN_PTRUE) |
f75cdd2c RS |
8107 | (match_operand:SVE_FULL_F 2 "register_operand" "w, w") |
8108 | (match_operand:SVE_FULL_F 3 "aarch64_simd_reg_or_zero" "Dz, w")] | |
4a942af6 | 8109 | SVE_COND_FP_CMP_I0) |
915d28fe | 8110 | (match_operand:<VPRED> 4 "register_operand" "Upl, Upl")))] |
43cacb12 | 8111 | "TARGET_SVE" |
915d28fe RS |
8112 | "#" |
8113 | "&& 1" | |
8114 | [(set (match_dup 0) | |
4a942af6 RS |
8115 | (unspec:<VPRED> |
8116 | [(match_dup 4) | |
8117 | (const_int SVE_MAYBE_NOT_PTRUE) | |
8118 | (match_dup 2) | |
8119 | (match_dup 3)] | |
8120 | SVE_COND_FP_CMP_I0))] | |
43cacb12 RS |
8121 | ) |
8122 | ||
915d28fe RS |
8123 | ;; Same for unordered comparisons. |
8124 | (define_insn_and_split "*fcmuo<mode>_and_combine" | |
8125 | [(set (match_operand:<VPRED> 0 "register_operand" "=Upa") | |
8126 | (and:<VPRED> | |
8127 | (unspec:<VPRED> | |
8128 | [(match_operand:<VPRED> 1) | |
4a942af6 | 8129 | (const_int SVE_KNOWN_PTRUE) |
f75cdd2c RS |
8130 | (match_operand:SVE_FULL_F 2 "register_operand" "w") |
8131 | (match_operand:SVE_FULL_F 3 "register_operand" "w")] | |
4a942af6 | 8132 | UNSPEC_COND_FCMUO) |
915d28fe | 8133 | (match_operand:<VPRED> 4 "register_operand" "Upl")))] |
43cacb12 | 8134 | "TARGET_SVE" |
915d28fe RS |
8135 | "#" |
8136 | "&& 1" | |
8137 | [(set (match_dup 0) | |
915d28fe | 8138 | (unspec:<VPRED> |
4a942af6 RS |
8139 | [(match_dup 4) |
8140 | (const_int SVE_MAYBE_NOT_PTRUE) | |
8141 | (match_dup 2) | |
8142 | (match_dup 3)] | |
8143 | UNSPEC_COND_FCMUO))] | |
43cacb12 RS |
8144 | ) |
8145 | ||
e36206c9 TC |
8146 | ;; Similar to *fcm<cmp_op><mode>_and_combine, but for BIC rather than AND. |
8147 | ;; In this case, we still need a separate NOT/BIC operation, but predicating | |
8148 | ;; the comparison on the BIC operand removes the need for a PTRUE. | |
8149 | (define_insn_and_split "*fcm<cmp_op><mode>_bic_combine" | |
8150 | [(set (match_operand:<VPRED> 0 "register_operand" "=Upa") | |
8151 | (and:<VPRED> | |
8152 | (and:<VPRED> | |
8153 | (not:<VPRED> | |
8154 | (unspec:<VPRED> | |
8155 | [(match_operand:<VPRED> 1) | |
8156 | (const_int SVE_KNOWN_PTRUE) | |
8157 | (match_operand:SVE_FULL_F 2 "register_operand" "w") | |
8158 | (match_operand:SVE_FULL_F 3 "aarch64_simd_reg_or_zero" "wDz")] | |
8159 | SVE_COND_FP_CMP_I0)) | |
8160 | (match_operand:<VPRED> 4 "register_operand" "Upa")) | |
8161 | (match_dup:<VPRED> 1))) | |
8162 | (clobber (match_scratch:<VPRED> 5 "=&Upl"))] | |
8163 | "TARGET_SVE" | |
8164 | "#" | |
8165 | "&& 1" | |
8166 | [(set (match_dup 5) | |
8167 | (unspec:<VPRED> | |
8168 | [(match_dup 4) | |
8169 | (const_int SVE_MAYBE_NOT_PTRUE) | |
8170 | (match_dup 2) | |
8171 | (match_dup 3)] | |
8172 | SVE_COND_FP_CMP_I0)) | |
8173 | (set (match_dup 0) | |
8174 | (and:<VPRED> | |
8175 | (not:<VPRED> | |
8176 | (match_dup 5)) | |
8177 | (match_dup 4)))] | |
8178 | { | |
8179 | if (can_create_pseudo_p ()) | |
8180 | operands[5] = gen_reg_rtx (<VPRED>mode); | |
8181 | } | |
8182 | ) | |
8183 | ||
8184 | ;; Make sure that we expand to a nor when the operand 4 of | |
8185 | ;; *fcm<cmp_op><mode>_bic_combine is a not. | |
8186 | (define_insn_and_split "*fcm<cmp_op><mode>_nor_combine" | |
8187 | [(set (match_operand:<VPRED> 0 "register_operand" "=Upa") | |
8188 | (and:<VPRED> | |
8189 | (and:<VPRED> | |
8190 | (not:<VPRED> | |
8191 | (unspec:<VPRED> | |
8192 | [(match_operand:<VPRED> 1) | |
8193 | (const_int SVE_KNOWN_PTRUE) | |
8194 | (match_operand:SVE_FULL_F 2 "register_operand" "w") | |
8195 | (match_operand:SVE_FULL_F 3 "aarch64_simd_reg_or_zero" "wDz")] | |
8196 | SVE_COND_FP_CMP_I0)) | |
8197 | (not:<VPRED> | |
8198 | (match_operand:<VPRED> 4 "register_operand" "Upa"))) | |
8199 | (match_dup:<VPRED> 1))) | |
8200 | (clobber (match_scratch:<VPRED> 5 "=&Upl"))] | |
8201 | "TARGET_SVE" | |
8202 | "#" | |
8203 | "&& 1" | |
8204 | [(set (match_dup 5) | |
8205 | (unspec:<VPRED> | |
8206 | [(match_dup 1) | |
8207 | (const_int SVE_KNOWN_PTRUE) | |
8208 | (match_dup 2) | |
8209 | (match_dup 3)] | |
8210 | SVE_COND_FP_CMP_I0)) | |
8211 | (set (match_dup 0) | |
8212 | (and:<VPRED> | |
8213 | (and:<VPRED> | |
8214 | (not:<VPRED> | |
8215 | (match_dup 5)) | |
8216 | (not:<VPRED> | |
8217 | (match_dup 4))) | |
8218 | (match_dup 1)))] | |
8219 | { | |
8220 | if (can_create_pseudo_p ()) | |
8221 | operands[5] = gen_reg_rtx (<VPRED>mode); | |
8222 | } | |
8223 | ) | |
8224 | ||
8225 | (define_insn_and_split "*fcmuo<mode>_bic_combine" | |
8226 | [(set (match_operand:<VPRED> 0 "register_operand" "=Upa") | |
8227 | (and:<VPRED> | |
8228 | (and:<VPRED> | |
8229 | (not:<VPRED> | |
8230 | (unspec:<VPRED> | |
8231 | [(match_operand:<VPRED> 1) | |
8232 | (const_int SVE_KNOWN_PTRUE) | |
8233 | (match_operand:SVE_FULL_F 2 "register_operand" "w") | |
8234 | (match_operand:SVE_FULL_F 3 "aarch64_simd_reg_or_zero" "wDz")] | |
8235 | UNSPEC_COND_FCMUO)) | |
8236 | (match_operand:<VPRED> 4 "register_operand" "Upa")) | |
8237 | (match_dup:<VPRED> 1))) | |
8238 | (clobber (match_scratch:<VPRED> 5 "=&Upl"))] | |
8239 | "TARGET_SVE" | |
8240 | "#" | |
8241 | "&& 1" | |
8242 | [(set (match_dup 5) | |
8243 | (unspec:<VPRED> | |
8244 | [(match_dup 4) | |
8245 | (const_int SVE_MAYBE_NOT_PTRUE) | |
8246 | (match_dup 2) | |
8247 | (match_dup 3)] | |
8248 | UNSPEC_COND_FCMUO)) | |
8249 | (set (match_dup 0) | |
8250 | (and:<VPRED> | |
8251 | (not:<VPRED> | |
8252 | (match_dup 5)) | |
8253 | (match_dup 4)))] | |
8254 | { | |
8255 | if (can_create_pseudo_p ()) | |
8256 | operands[5] = gen_reg_rtx (<VPRED>mode); | |
8257 | } | |
8258 | ) | |
8259 | ||
8260 | ;; Same for unordered comparisons. | |
8261 | (define_insn_and_split "*fcmuo<mode>_nor_combine" | |
8262 | [(set (match_operand:<VPRED> 0 "register_operand" "=Upa") | |
8263 | (and:<VPRED> | |
8264 | (and:<VPRED> | |
8265 | (not:<VPRED> | |
8266 | (unspec:<VPRED> | |
8267 | [(match_operand:<VPRED> 1) | |
8268 | (const_int SVE_KNOWN_PTRUE) | |
8269 | (match_operand:SVE_FULL_F 2 "register_operand" "w") | |
8270 | (match_operand:SVE_FULL_F 3 "aarch64_simd_reg_or_zero" "wDz")] | |
8271 | UNSPEC_COND_FCMUO)) | |
8272 | (not:<VPRED> | |
8273 | (match_operand:<VPRED> 4 "register_operand" "Upa"))) | |
8274 | (match_dup:<VPRED> 1))) | |
8275 | (clobber (match_scratch:<VPRED> 5 "=&Upl"))] | |
8276 | "TARGET_SVE" | |
8277 | "#" | |
8278 | "&& 1" | |
8279 | [(set (match_dup 5) | |
8280 | (unspec:<VPRED> | |
8281 | [(match_dup 1) | |
8282 | (const_int SVE_KNOWN_PTRUE) | |
8283 | (match_dup 2) | |
8284 | (match_dup 3)] | |
8285 | UNSPEC_COND_FCMUO)) | |
8286 | (set (match_dup 0) | |
8287 | (and:<VPRED> | |
8288 | (and:<VPRED> | |
8289 | (not:<VPRED> | |
8290 | (match_dup 5)) | |
8291 | (not:<VPRED> | |
8292 | (match_dup 4))) | |
8293 | (match_dup 1)))] | |
8294 | { | |
8295 | if (can_create_pseudo_p ()) | |
8296 | operands[5] = gen_reg_rtx (<VPRED>mode); | |
8297 | } | |
8298 | ) | |
8299 | ||
42b4e87d RS |
8300 | ;; ------------------------------------------------------------------------- |
8301 | ;; ---- [FP] Absolute comparisons | |
8302 | ;; ------------------------------------------------------------------------- | |
8303 | ;; Includes: | |
8304 | ;; - FACGE | |
8305 | ;; - FACGT | |
8306 | ;; - FACLE | |
8307 | ;; - FACLT | |
8308 | ;; ------------------------------------------------------------------------- | |
8309 | ||
8310 | ;; Predicated floating-point absolute comparisons. | |
624d0f07 RS |
8311 | (define_expand "@aarch64_pred_fac<cmp_op><mode>" |
8312 | [(set (match_operand:<VPRED> 0 "register_operand") | |
8313 | (unspec:<VPRED> | |
8314 | [(match_operand:<VPRED> 1 "register_operand") | |
8315 | (match_operand:SI 2 "aarch64_sve_ptrue_flag") | |
f75cdd2c | 8316 | (unspec:SVE_FULL_F |
624d0f07 RS |
8317 | [(match_dup 1) |
8318 | (match_dup 2) | |
f75cdd2c | 8319 | (match_operand:SVE_FULL_F 3 "register_operand")] |
624d0f07 | 8320 | UNSPEC_COND_FABS) |
f75cdd2c | 8321 | (unspec:SVE_FULL_F |
624d0f07 RS |
8322 | [(match_dup 1) |
8323 | (match_dup 2) | |
f75cdd2c | 8324 | (match_operand:SVE_FULL_F 4 "register_operand")] |
624d0f07 RS |
8325 | UNSPEC_COND_FABS)] |
8326 | SVE_COND_FP_ABS_CMP))] | |
8327 | "TARGET_SVE" | |
8328 | ) | |
8329 | ||
0eb5e901 | 8330 | (define_insn_and_rewrite "*aarch64_pred_fac<cmp_op><mode>_relaxed" |
42b4e87d RS |
8331 | [(set (match_operand:<VPRED> 0 "register_operand" "=Upa") |
8332 | (unspec:<VPRED> | |
8333 | [(match_operand:<VPRED> 1 "register_operand" "Upl") | |
8334 | (match_operand:SI 4 "aarch64_sve_ptrue_flag") | |
f75cdd2c | 8335 | (unspec:SVE_FULL_F |
42b4e87d | 8336 | [(match_operand 5) |
0eb5e901 | 8337 | (const_int SVE_RELAXED_GP) |
f75cdd2c | 8338 | (match_operand:SVE_FULL_F 2 "register_operand" "w")] |
42b4e87d | 8339 | UNSPEC_COND_FABS) |
f75cdd2c | 8340 | (unspec:SVE_FULL_F |
0eb5e901 RS |
8341 | [(match_operand 6) |
8342 | (const_int SVE_RELAXED_GP) | |
f75cdd2c | 8343 | (match_operand:SVE_FULL_F 3 "register_operand" "w")] |
42b4e87d RS |
8344 | UNSPEC_COND_FABS)] |
8345 | SVE_COND_FP_ABS_CMP))] | |
0eb5e901 | 8346 | "TARGET_SVE" |
42b4e87d RS |
8347 | "fac<cmp_op>\t%0.<Vetype>, %1/z, %2.<Vetype>, %3.<Vetype>" |
8348 | "&& (!rtx_equal_p (operands[1], operands[5]) | |
0eb5e901 | 8349 | || !rtx_equal_p (operands[1], operands[6]))" |
42b4e87d RS |
8350 | { |
8351 | operands[5] = copy_rtx (operands[1]); | |
0eb5e901 | 8352 | operands[6] = copy_rtx (operands[1]); |
42b4e87d RS |
8353 | } |
8354 | ) | |
8355 | ||
0eb5e901 RS |
8356 | (define_insn "*aarch64_pred_fac<cmp_op><mode>_strict" |
8357 | [(set (match_operand:<VPRED> 0 "register_operand" "=Upa") | |
8358 | (unspec:<VPRED> | |
8359 | [(match_operand:<VPRED> 1 "register_operand" "Upl") | |
8360 | (match_operand:SI 4 "aarch64_sve_ptrue_flag") | |
8361 | (unspec:SVE_FULL_F | |
8362 | [(match_dup 1) | |
8363 | (match_operand:SI 5 "aarch64_sve_gp_strictness") | |
8364 | (match_operand:SVE_FULL_F 2 "register_operand" "w")] | |
8365 | UNSPEC_COND_FABS) | |
8366 | (unspec:SVE_FULL_F | |
8367 | [(match_dup 1) | |
8368 | (match_operand:SI 6 "aarch64_sve_gp_strictness") | |
8369 | (match_operand:SVE_FULL_F 3 "register_operand" "w")] | |
8370 | UNSPEC_COND_FABS)] | |
8371 | SVE_COND_FP_ABS_CMP))] | |
8372 | "TARGET_SVE" | |
8373 | "fac<cmp_op>\t%0.<Vetype>, %1/z, %2.<Vetype>, %3.<Vetype>" | |
8374 | ) | |
8375 | ||
624d0f07 RS |
8376 | ;; ------------------------------------------------------------------------- |
8377 | ;; ---- [PRED] Select | |
8378 | ;; ------------------------------------------------------------------------- | |
8379 | ;; Includes: | |
8380 | ;; - SEL | |
8381 | ;; ------------------------------------------------------------------------- | |
8382 | ||
8383 | (define_insn "@vcond_mask_<mode><mode>" | |
8384 | [(set (match_operand:PRED_ALL 0 "register_operand" "=Upa") | |
8385 | (ior:PRED_ALL | |
8386 | (and:PRED_ALL | |
8387 | (match_operand:PRED_ALL 3 "register_operand" "Upa") | |
8388 | (match_operand:PRED_ALL 1 "register_operand" "Upa")) | |
8389 | (and:PRED_ALL | |
8390 | (not (match_dup 3)) | |
8391 | (match_operand:PRED_ALL 2 "register_operand" "Upa"))))] | |
8392 | "TARGET_SVE" | |
8393 | "sel\t%0.b, %3, %1.b, %2.b" | |
8394 | ) | |
8395 | ||
915d28fe RS |
8396 | ;; ------------------------------------------------------------------------- |
8397 | ;; ---- [PRED] Test bits | |
8398 | ;; ------------------------------------------------------------------------- | |
8399 | ;; Includes: | |
8400 | ;; - PTEST | |
8401 | ;; ------------------------------------------------------------------------- | |
8402 | ||
8403 | ;; Branch based on predicate equality or inequality. | |
8404 | (define_expand "cbranch<mode>4" | |
8405 | [(set (pc) | |
8406 | (if_then_else | |
8407 | (match_operator 0 "aarch64_equality_operator" | |
8408 | [(match_operand:PRED_ALL 1 "register_operand") | |
8409 | (match_operand:PRED_ALL 2 "aarch64_simd_reg_or_zero")]) | |
8410 | (label_ref (match_operand 3 "")) | |
8411 | (pc)))] | |
8412 | "" | |
43cacb12 | 8413 | { |
34467289 RS |
8414 | rtx ptrue = force_reg (VNx16BImode, aarch64_ptrue_all (<data_bytes>)); |
8415 | rtx cast_ptrue = gen_lowpart (<MODE>mode, ptrue); | |
8416 | rtx ptrue_flag = gen_int_mode (SVE_KNOWN_PTRUE, SImode); | |
915d28fe RS |
8417 | rtx pred; |
8418 | if (operands[2] == CONST0_RTX (<MODE>mode)) | |
8419 | pred = operands[1]; | |
8420 | else | |
8421 | { | |
8422 | pred = gen_reg_rtx (<MODE>mode); | |
34467289 RS |
8423 | emit_insn (gen_aarch64_pred_xor<mode>_z (pred, cast_ptrue, operands[1], |
8424 | operands[2])); | |
915d28fe | 8425 | } |
34467289 | 8426 | emit_insn (gen_aarch64_ptest<mode> (ptrue, cast_ptrue, ptrue_flag, pred)); |
915d28fe RS |
8427 | operands[1] = gen_rtx_REG (CC_NZCmode, CC_REGNUM); |
8428 | operands[2] = const0_rtx; | |
43cacb12 RS |
8429 | } |
8430 | ) | |
8431 | ||
34467289 RS |
8432 | ;; See "Description of UNSPEC_PTEST" above for details. |
8433 | (define_insn "aarch64_ptest<mode>" | |
915d28fe | 8434 | [(set (reg:CC_NZC CC_REGNUM) |
34467289 RS |
8435 | (unspec:CC_NZC [(match_operand:VNx16BI 0 "register_operand" "Upa") |
8436 | (match_operand 1) | |
8437 | (match_operand:SI 2 "aarch64_sve_ptrue_flag") | |
8438 | (match_operand:PRED_ALL 3 "register_operand" "Upa")] | |
8439 | UNSPEC_PTEST))] | |
43cacb12 | 8440 | "TARGET_SVE" |
34467289 | 8441 | "ptest\t%0, %3.b" |
43cacb12 RS |
8442 | ) |
8443 | ||
915d28fe RS |
8444 | ;; ========================================================================= |
8445 | ;; == Reductions | |
8446 | ;; ========================================================================= | |
8447 | ||
8448 | ;; ------------------------------------------------------------------------- | |
8449 | ;; ---- [INT,FP] Conditional reductions | |
8450 | ;; ------------------------------------------------------------------------- | |
8451 | ;; Includes: | |
624d0f07 | 8452 | ;; - CLASTA |
915d28fe RS |
8453 | ;; - CLASTB |
8454 | ;; ------------------------------------------------------------------------- | |
8455 | ||
8456 | ;; Set operand 0 to the last active element in operand 3, or to tied | |
8457 | ;; operand 1 if no elements are active. | |
624d0f07 | 8458 | (define_insn "@fold_extract_<last_op>_<mode>" |
801790b3 | 8459 | [(set (match_operand:<VEL> 0 "register_operand" "=?r, w") |
915d28fe RS |
8460 | (unspec:<VEL> |
8461 | [(match_operand:<VEL> 1 "register_operand" "0, 0") | |
8462 | (match_operand:<VPRED> 2 "register_operand" "Upl, Upl") | |
f75cdd2c | 8463 | (match_operand:SVE_FULL 3 "register_operand" "w, w")] |
624d0f07 RS |
8464 | CLAST))] |
8465 | "TARGET_SVE" | |
8466 | "@ | |
8467 | clast<ab>\t%<vwcore>0, %2, %<vwcore>0, %3.<Vetype> | |
8468 | clast<ab>\t%<Vetype>0, %2, %<Vetype>0, %3.<Vetype>" | |
8469 | ) | |
8470 | ||
8471 | (define_insn "@aarch64_fold_extract_vector_<last_op>_<mode>" | |
f75cdd2c RS |
8472 | [(set (match_operand:SVE_FULL 0 "register_operand" "=w, ?&w") |
8473 | (unspec:SVE_FULL | |
8474 | [(match_operand:SVE_FULL 1 "register_operand" "0, w") | |
624d0f07 | 8475 | (match_operand:<VPRED> 2 "register_operand" "Upl, Upl") |
f75cdd2c | 8476 | (match_operand:SVE_FULL 3 "register_operand" "w, w")] |
624d0f07 | 8477 | CLAST))] |
3db85990 | 8478 | "TARGET_SVE" |
915d28fe | 8479 | "@ |
624d0f07 RS |
8480 | clast<ab>\t%0.<Vetype>, %2, %0.<Vetype>, %3.<Vetype> |
8481 | movprfx\t%0, %1\;clast<ab>\t%0.<Vetype>, %2, %0.<Vetype>, %3.<Vetype>" | |
3db85990 ST |
8482 | ) |
8483 | ||
915d28fe RS |
8484 | ;; ------------------------------------------------------------------------- |
8485 | ;; ---- [INT] Tree reductions | |
8486 | ;; ------------------------------------------------------------------------- | |
8487 | ;; Includes: | |
8488 | ;; - ANDV | |
8489 | ;; - EORV | |
8490 | ;; - ORV | |
624d0f07 | 8491 | ;; - SADDV |
915d28fe RS |
8492 | ;; - SMAXV |
8493 | ;; - SMINV | |
8494 | ;; - UADDV | |
8495 | ;; - UMAXV | |
8496 | ;; - UMINV | |
8497 | ;; ------------------------------------------------------------------------- | |
8498 | ||
8499 | ;; Unpredicated integer add reduction. | |
8500 | (define_expand "reduc_plus_scal_<mode>" | |
624d0f07 | 8501 | [(match_operand:<VEL> 0 "register_operand") |
f75cdd2c | 8502 | (match_operand:SVE_FULL_I 1 "register_operand")] |
43cacb12 RS |
8503 | "TARGET_SVE" |
8504 | { | |
624d0f07 RS |
8505 | rtx pred = aarch64_ptrue_reg (<VPRED>mode); |
8506 | rtx tmp = <VEL>mode == DImode ? operands[0] : gen_reg_rtx (DImode); | |
8507 | emit_insn (gen_aarch64_pred_reduc_uadd_<mode> (tmp, pred, operands[1])); | |
8508 | if (tmp != operands[0]) | |
8509 | emit_move_insn (operands[0], gen_lowpart (<VEL>mode, tmp)); | |
8510 | DONE; | |
43cacb12 RS |
8511 | } |
8512 | ) | |
8513 | ||
915d28fe | 8514 | ;; Predicated integer add reduction. The result is always 64-bits. |
624d0f07 RS |
8515 | (define_insn "@aarch64_pred_reduc_<optab>_<mode>" |
8516 | [(set (match_operand:DI 0 "register_operand" "=w") | |
8517 | (unspec:DI [(match_operand:<VPRED> 1 "register_operand" "Upl") | |
f75cdd2c | 8518 | (match_operand:SVE_FULL_I 2 "register_operand" "w")] |
624d0f07 RS |
8519 | SVE_INT_ADDV))] |
8520 | "TARGET_SVE && <max_elem_bits> >= <elem_bits>" | |
8521 | "<su>addv\t%d0, %1, %2.<Vetype>" | |
43cacb12 RS |
8522 | ) |
8523 | ||
b0760a40 | 8524 | ;; Unpredicated integer reductions. |
915d28fe RS |
8525 | (define_expand "reduc_<optab>_scal_<mode>" |
8526 | [(set (match_operand:<VEL> 0 "register_operand") | |
8527 | (unspec:<VEL> [(match_dup 2) | |
f75cdd2c | 8528 | (match_operand:SVE_FULL_I 1 "register_operand")] |
b0760a40 | 8529 | SVE_INT_REDUCTION))] |
43cacb12 | 8530 | "TARGET_SVE" |
915d28fe RS |
8531 | { |
8532 | operands[2] = aarch64_ptrue_reg (<VPRED>mode); | |
8533 | } | |
43cacb12 RS |
8534 | ) |
8535 | ||
b0760a40 | 8536 | ;; Predicated integer reductions. |
624d0f07 | 8537 | (define_insn "@aarch64_pred_reduc_<optab>_<mode>" |
915d28fe RS |
8538 | [(set (match_operand:<VEL> 0 "register_operand" "=w") |
8539 | (unspec:<VEL> [(match_operand:<VPRED> 1 "register_operand" "Upl") | |
f75cdd2c | 8540 | (match_operand:SVE_FULL_I 2 "register_operand" "w")] |
b0760a40 | 8541 | SVE_INT_REDUCTION))] |
43cacb12 | 8542 | "TARGET_SVE" |
b0760a40 | 8543 | "<sve_int_op>\t%<Vetype>0, %1, %2.<Vetype>" |
43cacb12 RS |
8544 | ) |
8545 | ||
915d28fe RS |
8546 | ;; ------------------------------------------------------------------------- |
8547 | ;; ---- [FP] Tree reductions | |
8548 | ;; ------------------------------------------------------------------------- | |
8549 | ;; Includes: | |
8550 | ;; - FADDV | |
8551 | ;; - FMAXNMV | |
8552 | ;; - FMAXV | |
8553 | ;; - FMINNMV | |
8554 | ;; - FMINV | |
8555 | ;; ------------------------------------------------------------------------- | |
8556 | ||
b0760a40 RS |
8557 | ;; Unpredicated floating-point tree reductions. |
8558 | (define_expand "reduc_<optab>_scal_<mode>" | |
915d28fe RS |
8559 | [(set (match_operand:<VEL> 0 "register_operand") |
8560 | (unspec:<VEL> [(match_dup 2) | |
f75cdd2c | 8561 | (match_operand:SVE_FULL_F 1 "register_operand")] |
b0760a40 | 8562 | SVE_FP_REDUCTION))] |
43cacb12 | 8563 | "TARGET_SVE" |
915d28fe RS |
8564 | { |
8565 | operands[2] = aarch64_ptrue_reg (<VPRED>mode); | |
8566 | } | |
43cacb12 RS |
8567 | ) |
8568 | ||
e32b9eb3 RS |
8569 | (define_expand "reduc_<fmaxmin>_scal_<mode>" |
8570 | [(match_operand:<VEL> 0 "register_operand") | |
8571 | (unspec:<VEL> [(match_operand:SVE_FULL_F 1 "register_operand")] | |
8572 | FMAXMINNMV)] | |
8573 | "TARGET_SVE" | |
8574 | { | |
8575 | emit_insn (gen_reduc_<optab>_scal_<mode> (operands[0], operands[1])); | |
8576 | DONE; | |
8577 | } | |
8578 | ) | |
8579 | ||
b0760a40 | 8580 | ;; Predicated floating-point tree reductions. |
624d0f07 | 8581 | (define_insn "@aarch64_pred_reduc_<optab>_<mode>" |
915d28fe RS |
8582 | [(set (match_operand:<VEL> 0 "register_operand" "=w") |
8583 | (unspec:<VEL> [(match_operand:<VPRED> 1 "register_operand" "Upl") | |
f75cdd2c | 8584 | (match_operand:SVE_FULL_F 2 "register_operand" "w")] |
b0760a40 | 8585 | SVE_FP_REDUCTION))] |
43cacb12 | 8586 | "TARGET_SVE" |
b0760a40 | 8587 | "<sve_fp_op>\t%<Vetype>0, %1, %2.<Vetype>" |
43cacb12 RS |
8588 | ) |
8589 | ||
915d28fe RS |
8590 | ;; ------------------------------------------------------------------------- |
8591 | ;; ---- [FP] Left-to-right reductions | |
8592 | ;; ------------------------------------------------------------------------- | |
8593 | ;; Includes: | |
8594 | ;; - FADDA | |
8595 | ;; ------------------------------------------------------------------------- | |
8596 | ||
8597 | ;; Unpredicated in-order FP reductions. | |
8598 | (define_expand "fold_left_plus_<mode>" | |
8599 | [(set (match_operand:<VEL> 0 "register_operand") | |
8600 | (unspec:<VEL> [(match_dup 3) | |
8601 | (match_operand:<VEL> 1 "register_operand") | |
f75cdd2c | 8602 | (match_operand:SVE_FULL_F 2 "register_operand")] |
915d28fe | 8603 | UNSPEC_FADDA))] |
43cacb12 | 8604 | "TARGET_SVE" |
915d28fe RS |
8605 | { |
8606 | operands[3] = aarch64_ptrue_reg (<VPRED>mode); | |
8607 | } | |
43cacb12 RS |
8608 | ) |
8609 | ||
915d28fe RS |
8610 | ;; Predicated in-order FP reductions. |
8611 | (define_insn "mask_fold_left_plus_<mode>" | |
8612 | [(set (match_operand:<VEL> 0 "register_operand" "=w") | |
8613 | (unspec:<VEL> [(match_operand:<VPRED> 3 "register_operand" "Upl") | |
8614 | (match_operand:<VEL> 1 "register_operand" "0") | |
f75cdd2c | 8615 | (match_operand:SVE_FULL_F 2 "register_operand" "w")] |
915d28fe | 8616 | UNSPEC_FADDA))] |
43cacb12 | 8617 | "TARGET_SVE" |
915d28fe | 8618 | "fadda\t%<Vetype>0, %3, %<Vetype>0, %2.<Vetype>" |
43cacb12 RS |
8619 | ) |
8620 | ||
915d28fe RS |
8621 | ;; ========================================================================= |
8622 | ;; == Permutes | |
8623 | ;; ========================================================================= | |
8624 | ||
8625 | ;; ------------------------------------------------------------------------- | |
8626 | ;; ---- [INT,FP] General permutes | |
8627 | ;; ------------------------------------------------------------------------- | |
8628 | ;; Includes: | |
8629 | ;; - TBL | |
8630 | ;; ------------------------------------------------------------------------- | |
8631 | ||
8632 | (define_expand "vec_perm<mode>" | |
f75cdd2c RS |
8633 | [(match_operand:SVE_FULL 0 "register_operand") |
8634 | (match_operand:SVE_FULL 1 "register_operand") | |
8635 | (match_operand:SVE_FULL 2 "register_operand") | |
915d28fe RS |
8636 | (match_operand:<V_INT_EQUIV> 3 "aarch64_sve_vec_perm_operand")] |
8637 | "TARGET_SVE && GET_MODE_NUNITS (<MODE>mode).is_constant ()" | |
9bfb28ed | 8638 | { |
915d28fe RS |
8639 | aarch64_expand_sve_vec_perm (operands[0], operands[1], |
8640 | operands[2], operands[3]); | |
9bfb28ed RS |
8641 | DONE; |
8642 | } | |
8643 | ) | |
8644 | ||
624d0f07 | 8645 | (define_insn "@aarch64_sve_tbl<mode>" |
f75cdd2c RS |
8646 | [(set (match_operand:SVE_FULL 0 "register_operand" "=w") |
8647 | (unspec:SVE_FULL | |
8648 | [(match_operand:SVE_FULL 1 "register_operand" "w") | |
915d28fe RS |
8649 | (match_operand:<V_INT_EQUIV> 2 "register_operand" "w")] |
8650 | UNSPEC_TBL))] | |
43cacb12 | 8651 | "TARGET_SVE" |
915d28fe | 8652 | "tbl\t%0.<Vetype>, %1.<Vetype>, %2.<Vetype>" |
43cacb12 RS |
8653 | ) |
8654 | ||
915d28fe RS |
8655 | ;; ------------------------------------------------------------------------- |
8656 | ;; ---- [INT,FP] Special-purpose unary permutes | |
8657 | ;; ------------------------------------------------------------------------- | |
8658 | ;; Includes: | |
624d0f07 | 8659 | ;; - COMPACT |
915d28fe RS |
8660 | ;; - DUP |
8661 | ;; - REV | |
915d28fe RS |
8662 | ;; ------------------------------------------------------------------------- |
8663 | ||
624d0f07 RS |
8664 | ;; Compact active elements and pad with zeros. |
8665 | (define_insn "@aarch64_sve_compact<mode>" | |
f75cdd2c RS |
8666 | [(set (match_operand:SVE_FULL_SD 0 "register_operand" "=w") |
8667 | (unspec:SVE_FULL_SD | |
8668 | [(match_operand:<VPRED> 1 "register_operand" "Upl") | |
8669 | (match_operand:SVE_FULL_SD 2 "register_operand" "w")] | |
8670 | UNSPEC_SVE_COMPACT))] | |
624d0f07 RS |
8671 | "TARGET_SVE" |
8672 | "compact\t%0.<Vetype>, %1, %2.<Vetype>" | |
8673 | ) | |
8674 | ||
915d28fe | 8675 | ;; Duplicate one element of a vector. |
624d0f07 | 8676 | (define_insn "@aarch64_sve_dup_lane<mode>" |
6c3ce63b RS |
8677 | [(set (match_operand:SVE_ALL 0 "register_operand" "=w") |
8678 | (vec_duplicate:SVE_ALL | |
915d28fe | 8679 | (vec_select:<VEL> |
6c3ce63b | 8680 | (match_operand:SVE_ALL 1 "register_operand" "w") |
915d28fe RS |
8681 | (parallel [(match_operand:SI 2 "const_int_operand")]))))] |
8682 | "TARGET_SVE | |
6c3ce63b RS |
8683 | && IN_RANGE (INTVAL (operands[2]) * <container_bits> / 8, 0, 63)" |
8684 | "dup\t%0.<Vctype>, %1.<Vctype>[%2]" | |
915d28fe RS |
8685 | ) |
8686 | ||
624d0f07 RS |
8687 | ;; Use DUP.Q to duplicate a 128-bit segment of a register. |
8688 | ;; | |
8689 | ;; The vec_select:<V128> sets memory lane number N of the V128 to lane | |
8690 | ;; number op2 + N of op1. (We don't need to distinguish between memory | |
8691 | ;; and architectural register lane numbering for op1 or op0, since the | |
8692 | ;; two numbering schemes are the same for SVE.) | |
8693 | ;; | |
f75cdd2c | 8694 | ;; The vec_duplicate:SVE_FULL then copies memory lane number N of the |
624d0f07 RS |
8695 | ;; V128 (and thus lane number op2 + N of op1) to lane numbers N + I * STEP |
8696 | ;; of op0. We therefore get the correct result for both endiannesses. | |
8697 | ;; | |
8698 | ;; The wrinkle is that for big-endian V128 registers, memory lane numbering | |
8699 | ;; is in the opposite order to architectural register lane numbering. | |
8700 | ;; Thus if we were to do this operation via a V128 temporary register, | |
8701 | ;; the vec_select and vec_duplicate would both involve a reverse operation | |
8702 | ;; for big-endian targets. In this fused pattern the two reverses cancel | |
8703 | ;; each other out. | |
8704 | (define_insn "@aarch64_sve_dupq_lane<mode>" | |
f75cdd2c RS |
8705 | [(set (match_operand:SVE_FULL 0 "register_operand" "=w") |
8706 | (vec_duplicate:SVE_FULL | |
624d0f07 | 8707 | (vec_select:<V128> |
f75cdd2c | 8708 | (match_operand:SVE_FULL 1 "register_operand" "w") |
624d0f07 RS |
8709 | (match_operand 2 "ascending_int_parallel"))))] |
8710 | "TARGET_SVE | |
8711 | && (INTVAL (XVECEXP (operands[2], 0, 0)) | |
8712 | * GET_MODE_SIZE (<VEL>mode)) % 16 == 0 | |
8713 | && IN_RANGE (INTVAL (XVECEXP (operands[2], 0, 0)) | |
8714 | * GET_MODE_SIZE (<VEL>mode), 0, 63)" | |
8715 | { | |
8716 | unsigned int byte = (INTVAL (XVECEXP (operands[2], 0, 0)) | |
8717 | * GET_MODE_SIZE (<VEL>mode)); | |
8718 | operands[2] = gen_int_mode (byte / 16, DImode); | |
8719 | return "dup\t%0.q, %1.q[%2]"; | |
8720 | } | |
8721 | ) | |
8722 | ||
915d28fe RS |
8723 | ;; Reverse the order of elements within a full vector. |
8724 | (define_insn "@aarch64_sve_rev<mode>" | |
6c3ce63b RS |
8725 | [(set (match_operand:SVE_ALL 0 "register_operand" "=w") |
8726 | (unspec:SVE_ALL | |
8727 | [(match_operand:SVE_ALL 1 "register_operand" "w")] | |
f75cdd2c | 8728 | UNSPEC_REV))] |
9bfb28ed | 8729 | "TARGET_SVE" |
6c3ce63b | 8730 | "rev\t%0.<Vctype>, %1.<Vctype>") |
915d28fe | 8731 | |
915d28fe RS |
8732 | ;; ------------------------------------------------------------------------- |
8733 | ;; ---- [INT,FP] Special-purpose binary permutes | |
8734 | ;; ------------------------------------------------------------------------- | |
8735 | ;; Includes: | |
6c3ce63b | 8736 | ;; - EXT |
624d0f07 | 8737 | ;; - SPLICE |
915d28fe RS |
8738 | ;; - TRN1 |
8739 | ;; - TRN2 | |
8740 | ;; - UZP1 | |
8741 | ;; - UZP2 | |
8742 | ;; - ZIP1 | |
8743 | ;; - ZIP2 | |
8744 | ;; ------------------------------------------------------------------------- | |
8745 | ||
624d0f07 RS |
8746 | ;; Like EXT, but start at the first active element. |
8747 | (define_insn "@aarch64_sve_splice<mode>" | |
f75cdd2c RS |
8748 | [(set (match_operand:SVE_FULL 0 "register_operand" "=w, ?&w") |
8749 | (unspec:SVE_FULL | |
8750 | [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl") | |
8751 | (match_operand:SVE_FULL 2 "register_operand" "0, w") | |
8752 | (match_operand:SVE_FULL 3 "register_operand" "w, w")] | |
8753 | UNSPEC_SVE_SPLICE))] | |
624d0f07 RS |
8754 | "TARGET_SVE" |
8755 | "@ | |
8756 | splice\t%0.<Vetype>, %1, %0.<Vetype>, %3.<Vetype> | |
8757 | movprfx\t%0, %2\;splice\t%0.<Vetype>, %1, %0.<Vetype>, %3.<Vetype>" | |
8758 | [(set_attr "movprfx" "*, yes")] | |
8759 | ) | |
8760 | ||
915d28fe RS |
8761 | ;; Permutes that take half the elements from one vector and half the |
8762 | ;; elements from the other. | |
624d0f07 | 8763 | (define_insn "@aarch64_sve_<perm_insn><mode>" |
6c3ce63b RS |
8764 | [(set (match_operand:SVE_ALL 0 "register_operand" "=w") |
8765 | (unspec:SVE_ALL | |
8766 | [(match_operand:SVE_ALL 1 "register_operand" "w") | |
8767 | (match_operand:SVE_ALL 2 "register_operand" "w")] | |
f75cdd2c | 8768 | PERMUTE))] |
9bfb28ed | 8769 | "TARGET_SVE" |
6c3ce63b | 8770 | "<perm_insn>\t%0.<Vctype>, %1.<Vctype>, %2.<Vctype>" |
915d28fe RS |
8771 | ) |
8772 | ||
36696774 RS |
8773 | ;; Apply PERMUTE to 128-bit sequences. The behavior of these patterns |
8774 | ;; doesn't depend on the mode. | |
8775 | (define_insn "@aarch64_sve_<optab><mode>" | |
8776 | [(set (match_operand:SVE_FULL 0 "register_operand" "=w") | |
8777 | (unspec:SVE_FULL | |
8778 | [(match_operand:SVE_FULL 1 "register_operand" "w") | |
8779 | (match_operand:SVE_FULL 2 "register_operand" "w")] | |
8780 | PERMUTEQ))] | |
8781 | "TARGET_SVE_F64MM" | |
8782 | "<perm_insn>\t%0.q, %1.q, %2.q" | |
8783 | ) | |
8784 | ||
915d28fe RS |
8785 | ;; Concatenate two vectors and extract a subvector. Note that the |
8786 | ;; immediate (third) operand is the lane index not the byte index. | |
624d0f07 | 8787 | (define_insn "@aarch64_sve_ext<mode>" |
6c3ce63b RS |
8788 | [(set (match_operand:SVE_ALL 0 "register_operand" "=w, ?&w") |
8789 | (unspec:SVE_ALL | |
8790 | [(match_operand:SVE_ALL 1 "register_operand" "0, w") | |
8791 | (match_operand:SVE_ALL 2 "register_operand" "w, w") | |
f75cdd2c RS |
8792 | (match_operand:SI 3 "const_int_operand")] |
8793 | UNSPEC_EXT))] | |
915d28fe | 8794 | "TARGET_SVE |
6c3ce63b | 8795 | && IN_RANGE (INTVAL (operands[3]) * <container_bits> / 8, 0, 255)" |
9bfb28ed | 8796 | { |
6c3ce63b | 8797 | operands[3] = GEN_INT (INTVAL (operands[3]) * <container_bits> / 8); |
06b3ba23 RS |
8798 | return (which_alternative == 0 |
8799 | ? "ext\\t%0.b, %0.b, %2.b, #%3" | |
8800 | : "movprfx\t%0, %1\;ext\\t%0.b, %0.b, %2.b, #%3"); | |
43cacb12 | 8801 | } |
06b3ba23 | 8802 | [(set_attr "movprfx" "*,yes")] |
43cacb12 RS |
8803 | ) |
8804 | ||
28350fd1 RS |
8805 | ;; ------------------------------------------------------------------------- |
8806 | ;; ---- [PRED] Special-purpose unary permutes | |
8807 | ;; ------------------------------------------------------------------------- | |
8808 | ;; Includes: | |
8809 | ;; - REV | |
8810 | ;; ------------------------------------------------------------------------- | |
8811 | ||
8812 | (define_insn "@aarch64_sve_rev<mode>" | |
8813 | [(set (match_operand:PRED_ALL 0 "register_operand" "=Upa") | |
8814 | (unspec:PRED_ALL [(match_operand:PRED_ALL 1 "register_operand" "Upa")] | |
8815 | UNSPEC_REV))] | |
8816 | "TARGET_SVE" | |
8817 | "rev\t%0.<Vetype>, %1.<Vetype>") | |
8818 | ||
915d28fe RS |
8819 | ;; ------------------------------------------------------------------------- |
8820 | ;; ---- [PRED] Special-purpose binary permutes | |
8821 | ;; ------------------------------------------------------------------------- | |
8822 | ;; Includes: | |
8823 | ;; - TRN1 | |
8824 | ;; - TRN2 | |
8825 | ;; - UZP1 | |
8826 | ;; - UZP2 | |
8827 | ;; - ZIP1 | |
8828 | ;; - ZIP2 | |
8829 | ;; ------------------------------------------------------------------------- | |
8830 | ||
8831 | ;; Permutes that take half the elements from one vector and half the | |
8832 | ;; elements from the other. | |
2803bc3b | 8833 | (define_insn "@aarch64_sve_<perm_insn><mode>" |
915d28fe RS |
8834 | [(set (match_operand:PRED_ALL 0 "register_operand" "=Upa") |
8835 | (unspec:PRED_ALL [(match_operand:PRED_ALL 1 "register_operand" "Upa") | |
8836 | (match_operand:PRED_ALL 2 "register_operand" "Upa")] | |
8837 | PERMUTE))] | |
43cacb12 | 8838 | "TARGET_SVE" |
3e2751ce | 8839 | "<perm_insn>\t%0.<Vetype>, %1.<Vetype>, %2.<Vetype>" |
43cacb12 RS |
8840 | ) |
8841 | ||
8535755a TC |
8842 | ;; Special purpose permute used by the predicate generation instructions. |
8843 | ;; Unlike the normal permute patterns, these instructions operate on VNx16BI | |
8844 | ;; regardless of the element size, so that all input and output bits are | |
8845 | ;; well-defined. Operand 3 then indicates the size of the permute. | |
8846 | (define_insn "@aarch64_sve_trn1_conv<mode>" | |
8847 | [(set (match_operand:VNx16BI 0 "register_operand" "=Upa") | |
8848 | (unspec:VNx16BI [(match_operand:VNx16BI 1 "register_operand" "Upa") | |
8849 | (match_operand:VNx16BI 2 "register_operand" "Upa") | |
8850 | (match_operand:PRED_ALL 3 "aarch64_simd_imm_zero")] | |
8851 | UNSPEC_TRN1_CONV))] | |
8852 | "TARGET_SVE" | |
8853 | "trn1\t%0.<PRED_ALL:Vetype>, %1.<PRED_ALL:Vetype>, %2.<PRED_ALL:Vetype>" | |
8854 | ) | |
8855 | ||
915d28fe RS |
8856 | ;; ========================================================================= |
8857 | ;; == Conversions | |
8858 | ;; ========================================================================= | |
8859 | ||
8860 | ;; ------------------------------------------------------------------------- | |
8861 | ;; ---- [INT<-INT] Packs | |
8862 | ;; ------------------------------------------------------------------------- | |
8863 | ;; Includes: | |
8864 | ;; - UZP1 | |
8865 | ;; ------------------------------------------------------------------------- | |
8866 | ||
43cacb12 RS |
8867 | ;; Integer pack. Use UZP1 on the narrower type, which discards |
8868 | ;; the high part of each wide element. | |
8869 | (define_insn "vec_pack_trunc_<Vwide>" | |
f75cdd2c RS |
8870 | [(set (match_operand:SVE_FULL_BHSI 0 "register_operand" "=w") |
8871 | (unspec:SVE_FULL_BHSI | |
43cacb12 RS |
8872 | [(match_operand:<VWIDE> 1 "register_operand" "w") |
8873 | (match_operand:<VWIDE> 2 "register_operand" "w")] | |
8874 | UNSPEC_PACK))] | |
8875 | "TARGET_SVE" | |
8876 | "uzp1\t%0.<Vetype>, %1.<Vetype>, %2.<Vetype>" | |
8877 | ) | |
8878 | ||
915d28fe RS |
8879 | ;; ------------------------------------------------------------------------- |
8880 | ;; ---- [INT<-INT] Unpacks | |
8881 | ;; ------------------------------------------------------------------------- | |
8882 | ;; Includes: | |
8883 | ;; - SUNPKHI | |
8884 | ;; - SUNPKLO | |
8885 | ;; - UUNPKHI | |
8886 | ;; - UUNPKLO | |
8887 | ;; ------------------------------------------------------------------------- | |
8888 | ||
8889 | ;; Unpack the low or high half of a vector, where "high" refers to | |
8890 | ;; the low-numbered lanes for big-endian and the high-numbered lanes | |
8891 | ;; for little-endian. | |
f75cdd2c | 8892 | (define_expand "vec_unpack<su>_<perm_hilo>_<SVE_FULL_BHSI:mode>" |
915d28fe | 8893 | [(match_operand:<VWIDE> 0 "register_operand") |
f75cdd2c RS |
8894 | (unspec:<VWIDE> |
8895 | [(match_operand:SVE_FULL_BHSI 1 "register_operand")] UNPACK)] | |
43cacb12 RS |
8896 | "TARGET_SVE" |
8897 | { | |
915d28fe | 8898 | emit_insn ((<hi_lanes_optab> |
f75cdd2c RS |
8899 | ? gen_aarch64_sve_<su>unpkhi_<SVE_FULL_BHSI:mode> |
8900 | : gen_aarch64_sve_<su>unpklo_<SVE_FULL_BHSI:mode>) | |
915d28fe RS |
8901 | (operands[0], operands[1])); |
8902 | DONE; | |
8903 | } | |
8904 | ) | |
8905 | ||
f75cdd2c | 8906 | (define_insn "@aarch64_sve_<su>unpk<perm_hilo>_<SVE_FULL_BHSI:mode>" |
915d28fe | 8907 | [(set (match_operand:<VWIDE> 0 "register_operand" "=w") |
f75cdd2c RS |
8908 | (unspec:<VWIDE> |
8909 | [(match_operand:SVE_FULL_BHSI 1 "register_operand" "w")] | |
8910 | UNPACK))] | |
915d28fe RS |
8911 | "TARGET_SVE" |
8912 | "<su>unpk<perm_hilo>\t%0.<Vewtype>, %1.<Vetype>" | |
8913 | ) | |
8914 | ||
8915 | ;; ------------------------------------------------------------------------- | |
8916 | ;; ---- [INT<-FP] Conversions | |
8917 | ;; ------------------------------------------------------------------------- | |
8918 | ;; Includes: | |
8919 | ;; - FCVTZS | |
8920 | ;; - FCVTZU | |
8921 | ;; ------------------------------------------------------------------------- | |
8922 | ||
8923 | ;; Unpredicated conversion of floats to integers of the same size (HF to HI, | |
8924 | ;; SF to SI or DF to DI). | |
99361551 | 8925 | (define_expand "<optab><mode><v_int_equiv>2" |
915d28fe RS |
8926 | [(set (match_operand:<V_INT_EQUIV> 0 "register_operand") |
8927 | (unspec:<V_INT_EQUIV> | |
8928 | [(match_dup 2) | |
99361551 | 8929 | (const_int SVE_RELAXED_GP) |
f75cdd2c | 8930 | (match_operand:SVE_FULL_F 1 "register_operand")] |
99361551 | 8931 | SVE_COND_FCVTI))] |
915d28fe RS |
8932 | "TARGET_SVE" |
8933 | { | |
8934 | operands[2] = aarch64_ptrue_reg (<VPRED>mode); | |
43cacb12 RS |
8935 | } |
8936 | ) | |
8937 | ||
95eb5537 | 8938 | ;; Predicated float-to-integer conversion, either to the same width or wider. |
f75cdd2c | 8939 | (define_insn "@aarch64_sve_<optab>_nontrunc<SVE_FULL_F:mode><SVE_FULL_HSDI:mode>" |
a4d9837e | 8940 | [(set (match_operand:SVE_FULL_HSDI 0 "register_operand" "=w, ?&w") |
f75cdd2c | 8941 | (unspec:SVE_FULL_HSDI |
a4d9837e | 8942 | [(match_operand:<SVE_FULL_HSDI:VPRED> 1 "register_operand" "Upl, Upl") |
99361551 | 8943 | (match_operand:SI 3 "aarch64_sve_gp_strictness") |
a4d9837e | 8944 | (match_operand:SVE_FULL_F 2 "register_operand" "0, w")] |
99361551 | 8945 | SVE_COND_FCVTI))] |
f75cdd2c | 8946 | "TARGET_SVE && <SVE_FULL_HSDI:elem_bits> >= <SVE_FULL_F:elem_bits>" |
a4d9837e RS |
8947 | "@ |
8948 | fcvtz<su>\t%0.<SVE_FULL_HSDI:Vetype>, %1/m, %2.<SVE_FULL_F:Vetype> | |
8949 | movprfx\t%0, %2\;fcvtz<su>\t%0.<SVE_FULL_HSDI:Vetype>, %1/m, %2.<SVE_FULL_F:Vetype>" | |
8950 | [(set_attr "movprfx" "*,yes")] | |
915d28fe RS |
8951 | ) |
8952 | ||
95eb5537 | 8953 | ;; Predicated narrowing float-to-integer conversion. |
624d0f07 | 8954 | (define_insn "@aarch64_sve_<optab>_trunc<VNx2DF_ONLY:mode><VNx4SI_ONLY:mode>" |
a4d9837e | 8955 | [(set (match_operand:VNx4SI_ONLY 0 "register_operand" "=w, ?&w") |
95eb5537 | 8956 | (unspec:VNx4SI_ONLY |
a4d9837e | 8957 | [(match_operand:VNx2BI 1 "register_operand" "Upl, Upl") |
99361551 | 8958 | (match_operand:SI 3 "aarch64_sve_gp_strictness") |
a4d9837e | 8959 | (match_operand:VNx2DF_ONLY 2 "register_operand" "0, w")] |
99361551 | 8960 | SVE_COND_FCVTI))] |
915d28fe | 8961 | "TARGET_SVE" |
a4d9837e RS |
8962 | "@ |
8963 | fcvtz<su>\t%0.<VNx4SI_ONLY:Vetype>, %1/m, %2.<VNx2DF_ONLY:Vetype> | |
8964 | movprfx\t%0, %2\;fcvtz<su>\t%0.<VNx4SI_ONLY:Vetype>, %1/m, %2.<VNx2DF_ONLY:Vetype>" | |
8965 | [(set_attr "movprfx" "*,yes")] | |
915d28fe RS |
8966 | ) |
8967 | ||
c5e16983 RS |
8968 | ;; Predicated float-to-integer conversion with merging, either to the same |
8969 | ;; width or wider. | |
f75cdd2c RS |
8970 | (define_expand "@cond_<optab>_nontrunc<SVE_FULL_F:mode><SVE_FULL_HSDI:mode>" |
8971 | [(set (match_operand:SVE_FULL_HSDI 0 "register_operand") | |
8972 | (unspec:SVE_FULL_HSDI | |
8973 | [(match_operand:<SVE_FULL_HSDI:VPRED> 1 "register_operand") | |
8974 | (unspec:SVE_FULL_HSDI | |
624d0f07 RS |
8975 | [(match_dup 1) |
8976 | (const_int SVE_STRICT_GP) | |
f75cdd2c | 8977 | (match_operand:SVE_FULL_F 2 "register_operand")] |
624d0f07 | 8978 | SVE_COND_FCVTI) |
f75cdd2c | 8979 | (match_operand:SVE_FULL_HSDI 3 "aarch64_simd_reg_or_zero")] |
624d0f07 | 8980 | UNSPEC_SEL))] |
f75cdd2c | 8981 | "TARGET_SVE && <SVE_FULL_HSDI:elem_bits> >= <SVE_FULL_F:elem_bits>" |
624d0f07 RS |
8982 | ) |
8983 | ||
c5e16983 RS |
8984 | ;; The first alternative doesn't need the earlyclobber, but the only case |
8985 | ;; it would help is the uninteresting one in which operands 2 and 3 are | |
8986 | ;; the same register (despite having different modes). Making all the | |
8987 | ;; alternatives earlyclobber makes things more consistent for the | |
8988 | ;; register allocator. | |
0eb5e901 | 8989 | (define_insn_and_rewrite "*cond_<optab>_nontrunc<SVE_FULL_F:mode><SVE_FULL_HSDI:mode>_relaxed" |
f75cdd2c RS |
8990 | [(set (match_operand:SVE_FULL_HSDI 0 "register_operand" "=&w, &w, ?&w") |
8991 | (unspec:SVE_FULL_HSDI | |
8992 | [(match_operand:<SVE_FULL_HSDI:VPRED> 1 "register_operand" "Upl, Upl, Upl") | |
8993 | (unspec:SVE_FULL_HSDI | |
c5e16983 | 8994 | [(match_operand 4) |
0eb5e901 | 8995 | (const_int SVE_RELAXED_GP) |
f75cdd2c | 8996 | (match_operand:SVE_FULL_F 2 "register_operand" "w, w, w")] |
c5e16983 | 8997 | SVE_COND_FCVTI) |
f75cdd2c | 8998 | (match_operand:SVE_FULL_HSDI 3 "aarch64_simd_reg_or_zero" "0, Dz, w")] |
c5e16983 | 8999 | UNSPEC_SEL))] |
0eb5e901 | 9000 | "TARGET_SVE && <SVE_FULL_HSDI:elem_bits> >= <SVE_FULL_F:elem_bits>" |
c5e16983 | 9001 | "@ |
f75cdd2c RS |
9002 | fcvtz<su>\t%0.<SVE_FULL_HSDI:Vetype>, %1/m, %2.<SVE_FULL_F:Vetype> |
9003 | movprfx\t%0.<SVE_FULL_HSDI:Vetype>, %1/z, %2.<SVE_FULL_HSDI:Vetype>\;fcvtz<su>\t%0.<SVE_FULL_HSDI:Vetype>, %1/m, %2.<SVE_FULL_F:Vetype> | |
9004 | movprfx\t%0, %3\;fcvtz<su>\t%0.<SVE_FULL_HSDI:Vetype>, %1/m, %2.<SVE_FULL_F:Vetype>" | |
c5e16983 RS |
9005 | "&& !rtx_equal_p (operands[1], operands[4])" |
9006 | { | |
9007 | operands[4] = copy_rtx (operands[1]); | |
9008 | } | |
9009 | [(set_attr "movprfx" "*,yes,yes")] | |
9010 | ) | |
9011 | ||
0eb5e901 RS |
9012 | (define_insn "*cond_<optab>_nontrunc<SVE_FULL_F:mode><SVE_FULL_HSDI:mode>_strict" |
9013 | [(set (match_operand:SVE_FULL_HSDI 0 "register_operand" "=&w, &w, ?&w") | |
9014 | (unspec:SVE_FULL_HSDI | |
9015 | [(match_operand:<SVE_FULL_HSDI:VPRED> 1 "register_operand" "Upl, Upl, Upl") | |
9016 | (unspec:SVE_FULL_HSDI | |
9017 | [(match_dup 1) | |
9018 | (const_int SVE_STRICT_GP) | |
9019 | (match_operand:SVE_FULL_F 2 "register_operand" "w, w, w")] | |
9020 | SVE_COND_FCVTI) | |
9021 | (match_operand:SVE_FULL_HSDI 3 "aarch64_simd_reg_or_zero" "0, Dz, w")] | |
9022 | UNSPEC_SEL))] | |
9023 | "TARGET_SVE && <SVE_FULL_HSDI:elem_bits> >= <SVE_FULL_F:elem_bits>" | |
9024 | "@ | |
9025 | fcvtz<su>\t%0.<SVE_FULL_HSDI:Vetype>, %1/m, %2.<SVE_FULL_F:Vetype> | |
9026 | movprfx\t%0.<SVE_FULL_HSDI:Vetype>, %1/z, %2.<SVE_FULL_HSDI:Vetype>\;fcvtz<su>\t%0.<SVE_FULL_HSDI:Vetype>, %1/m, %2.<SVE_FULL_F:Vetype> | |
9027 | movprfx\t%0, %3\;fcvtz<su>\t%0.<SVE_FULL_HSDI:Vetype>, %1/m, %2.<SVE_FULL_F:Vetype>" | |
9028 | [(set_attr "movprfx" "*,yes,yes")] | |
9029 | ) | |
9030 | ||
624d0f07 RS |
9031 | ;; Predicated narrowing float-to-integer conversion with merging. |
9032 | (define_expand "@cond_<optab>_trunc<VNx2DF_ONLY:mode><VNx4SI_ONLY:mode>" | |
9033 | [(set (match_operand:VNx4SI_ONLY 0 "register_operand") | |
9034 | (unspec:VNx4SI_ONLY | |
9035 | [(match_operand:VNx2BI 1 "register_operand") | |
9036 | (unspec:VNx4SI_ONLY | |
9037 | [(match_dup 1) | |
9038 | (const_int SVE_STRICT_GP) | |
9039 | (match_operand:VNx2DF_ONLY 2 "register_operand")] | |
9040 | SVE_COND_FCVTI) | |
9041 | (match_operand:VNx4SI_ONLY 3 "aarch64_simd_reg_or_zero")] | |
9042 | UNSPEC_SEL))] | |
9043 | "TARGET_SVE" | |
9044 | ) | |
9045 | ||
9046 | (define_insn "*cond_<optab>_trunc<VNx2DF_ONLY:mode><VNx4SI_ONLY:mode>" | |
9047 | [(set (match_operand:VNx4SI_ONLY 0 "register_operand" "=&w, &w, ?&w") | |
9048 | (unspec:VNx4SI_ONLY | |
9049 | [(match_operand:VNx2BI 1 "register_operand" "Upl, Upl, Upl") | |
9050 | (unspec:VNx4SI_ONLY | |
9051 | [(match_dup 1) | |
9052 | (match_operand:SI 4 "aarch64_sve_gp_strictness") | |
9053 | (match_operand:VNx2DF_ONLY 2 "register_operand" "w, w, w")] | |
9054 | SVE_COND_FCVTI) | |
9055 | (match_operand:VNx4SI_ONLY 3 "aarch64_simd_reg_or_zero" "0, Dz, w")] | |
9056 | UNSPEC_SEL))] | |
9057 | "TARGET_SVE" | |
9058 | "@ | |
9059 | fcvtz<su>\t%0.<VNx4SI_ONLY:Vetype>, %1/m, %2.<VNx2DF_ONLY:Vetype> | |
9060 | movprfx\t%0.<VNx2DF_ONLY:Vetype>, %1/z, %2.<VNx2DF_ONLY:Vetype>\;fcvtz<su>\t%0.<VNx4SI_ONLY:Vetype>, %1/m, %2.<VNx2DF_ONLY:Vetype> | |
9061 | movprfx\t%0, %3\;fcvtz<su>\t%0.<VNx4SI_ONLY:Vetype>, %1/m, %2.<VNx2DF_ONLY:Vetype>" | |
9062 | [(set_attr "movprfx" "*,yes,yes")] | |
9063 | ) | |
9064 | ||
915d28fe RS |
9065 | ;; ------------------------------------------------------------------------- |
9066 | ;; ---- [INT<-FP] Packs | |
9067 | ;; ------------------------------------------------------------------------- | |
9068 | ;; The patterns in this section are synthetic. | |
9069 | ;; ------------------------------------------------------------------------- | |
9070 | ||
43cacb12 RS |
9071 | ;; Convert two vectors of DF to SI and pack the results into a single vector. |
9072 | (define_expand "vec_pack_<su>fix_trunc_vnx2df" | |
9073 | [(set (match_dup 4) | |
9074 | (unspec:VNx4SI | |
9075 | [(match_dup 3) | |
99361551 RS |
9076 | (const_int SVE_RELAXED_GP) |
9077 | (match_operand:VNx2DF 1 "register_operand")] | |
9078 | SVE_COND_FCVTI)) | |
43cacb12 RS |
9079 | (set (match_dup 5) |
9080 | (unspec:VNx4SI | |
9081 | [(match_dup 3) | |
99361551 RS |
9082 | (const_int SVE_RELAXED_GP) |
9083 | (match_operand:VNx2DF 2 "register_operand")] | |
9084 | SVE_COND_FCVTI)) | |
43cacb12 RS |
9085 | (set (match_operand:VNx4SI 0 "register_operand") |
9086 | (unspec:VNx4SI [(match_dup 4) (match_dup 5)] UNSPEC_UZP1))] | |
9087 | "TARGET_SVE" | |
9088 | { | |
16de3637 | 9089 | operands[3] = aarch64_ptrue_reg (VNx2BImode); |
43cacb12 RS |
9090 | operands[4] = gen_reg_rtx (VNx4SImode); |
9091 | operands[5] = gen_reg_rtx (VNx4SImode); | |
9092 | } | |
9093 | ) | |
f1739b48 | 9094 | |
915d28fe RS |
9095 | ;; ------------------------------------------------------------------------- |
9096 | ;; ---- [INT<-FP] Unpacks | |
9097 | ;; ------------------------------------------------------------------------- | |
9098 | ;; No patterns here yet! | |
9099 | ;; ------------------------------------------------------------------------- | |
9d4ac06e | 9100 | |
915d28fe RS |
9101 | ;; ------------------------------------------------------------------------- |
9102 | ;; ---- [FP<-INT] Conversions | |
9103 | ;; ------------------------------------------------------------------------- | |
9104 | ;; Includes: | |
9105 | ;; - SCVTF | |
9106 | ;; - UCVTF | |
9107 | ;; ------------------------------------------------------------------------- | |
a08acce8 | 9108 | |
915d28fe RS |
9109 | ;; Unpredicated conversion of integers to floats of the same size |
9110 | ;; (HI to HF, SI to SF or DI to DF). | |
9111 | (define_expand "<optab><v_int_equiv><mode>2" | |
f75cdd2c RS |
9112 | [(set (match_operand:SVE_FULL_F 0 "register_operand") |
9113 | (unspec:SVE_FULL_F | |
915d28fe | 9114 | [(match_dup 2) |
99361551 RS |
9115 | (const_int SVE_RELAXED_GP) |
9116 | (match_operand:<V_INT_EQUIV> 1 "register_operand")] | |
9117 | SVE_COND_ICVTF))] | |
a08acce8 | 9118 | "TARGET_SVE" |
f4fde1b3 | 9119 | { |
915d28fe | 9120 | operands[2] = aarch64_ptrue_reg (<VPRED>mode); |
f4fde1b3 | 9121 | } |
b41d1f6e RS |
9122 | ) |
9123 | ||
95eb5537 RS |
9124 | ;; Predicated integer-to-float conversion, either to the same width or |
9125 | ;; narrower. | |
f75cdd2c | 9126 | (define_insn "@aarch64_sve_<optab>_nonextend<SVE_FULL_HSDI:mode><SVE_FULL_F:mode>" |
a4d9837e | 9127 | [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w, ?&w") |
f75cdd2c | 9128 | (unspec:SVE_FULL_F |
a4d9837e | 9129 | [(match_operand:<SVE_FULL_HSDI:VPRED> 1 "register_operand" "Upl, Upl") |
99361551 | 9130 | (match_operand:SI 3 "aarch64_sve_gp_strictness") |
a4d9837e | 9131 | (match_operand:SVE_FULL_HSDI 2 "register_operand" "0, w")] |
99361551 | 9132 | SVE_COND_ICVTF))] |
f75cdd2c | 9133 | "TARGET_SVE && <SVE_FULL_HSDI:elem_bits> >= <SVE_FULL_F:elem_bits>" |
a4d9837e RS |
9134 | "@ |
9135 | <su>cvtf\t%0.<SVE_FULL_F:Vetype>, %1/m, %2.<SVE_FULL_HSDI:Vetype> | |
9136 | movprfx\t%0, %2\;<su>cvtf\t%0.<SVE_FULL_F:Vetype>, %1/m, %2.<SVE_FULL_HSDI:Vetype>" | |
9137 | [(set_attr "movprfx" "*,yes")] | |
f1739b48 | 9138 | ) |
6c9c7b73 | 9139 | |
95eb5537 | 9140 | ;; Predicated widening integer-to-float conversion. |
624d0f07 | 9141 | (define_insn "@aarch64_sve_<optab>_extend<VNx4SI_ONLY:mode><VNx2DF_ONLY:mode>" |
a4d9837e | 9142 | [(set (match_operand:VNx2DF_ONLY 0 "register_operand" "=w, ?&w") |
95eb5537 | 9143 | (unspec:VNx2DF_ONLY |
a4d9837e | 9144 | [(match_operand:VNx2BI 1 "register_operand" "Upl, Upl") |
99361551 | 9145 | (match_operand:SI 3 "aarch64_sve_gp_strictness") |
a4d9837e | 9146 | (match_operand:VNx4SI_ONLY 2 "register_operand" "0, w")] |
99361551 | 9147 | SVE_COND_ICVTF))] |
6c9c7b73 | 9148 | "TARGET_SVE" |
a4d9837e RS |
9149 | "@ |
9150 | <su>cvtf\t%0.<VNx2DF_ONLY:Vetype>, %1/m, %2.<VNx4SI_ONLY:Vetype> | |
9151 | movprfx\t%0, %2\;<su>cvtf\t%0.<VNx2DF_ONLY:Vetype>, %1/m, %2.<VNx4SI_ONLY:Vetype>" | |
9152 | [(set_attr "movprfx" "*,yes")] | |
915d28fe | 9153 | ) |
6c9c7b73 | 9154 | |
c5e16983 RS |
9155 | ;; Predicated integer-to-float conversion with merging, either to the same |
9156 | ;; width or narrower. | |
f75cdd2c RS |
9157 | (define_expand "@cond_<optab>_nonextend<SVE_FULL_HSDI:mode><SVE_FULL_F:mode>" |
9158 | [(set (match_operand:SVE_FULL_F 0 "register_operand") | |
9159 | (unspec:SVE_FULL_F | |
9160 | [(match_operand:<SVE_FULL_HSDI:VPRED> 1 "register_operand") | |
9161 | (unspec:SVE_FULL_F | |
624d0f07 RS |
9162 | [(match_dup 1) |
9163 | (const_int SVE_STRICT_GP) | |
f75cdd2c | 9164 | (match_operand:SVE_FULL_HSDI 2 "register_operand")] |
624d0f07 | 9165 | SVE_COND_ICVTF) |
f75cdd2c | 9166 | (match_operand:SVE_FULL_F 3 "aarch64_simd_reg_or_zero")] |
624d0f07 | 9167 | UNSPEC_SEL))] |
f75cdd2c | 9168 | "TARGET_SVE && <SVE_FULL_HSDI:elem_bits> >= <SVE_FULL_F:elem_bits>" |
624d0f07 RS |
9169 | ) |
9170 | ||
c5e16983 RS |
9171 | ;; The first alternative doesn't need the earlyclobber, but the only case |
9172 | ;; it would help is the uninteresting one in which operands 2 and 3 are | |
9173 | ;; the same register (despite having different modes). Making all the | |
9174 | ;; alternatives earlyclobber makes things more consistent for the | |
9175 | ;; register allocator. | |
0eb5e901 | 9176 | (define_insn_and_rewrite "*cond_<optab>_nonextend<SVE_FULL_HSDI:mode><SVE_FULL_F:mode>_relaxed" |
f75cdd2c RS |
9177 | [(set (match_operand:SVE_FULL_F 0 "register_operand" "=&w, &w, ?&w") |
9178 | (unspec:SVE_FULL_F | |
9179 | [(match_operand:<SVE_FULL_HSDI:VPRED> 1 "register_operand" "Upl, Upl, Upl") | |
9180 | (unspec:SVE_FULL_F | |
c5e16983 | 9181 | [(match_operand 4) |
0eb5e901 | 9182 | (const_int SVE_RELAXED_GP) |
f75cdd2c | 9183 | (match_operand:SVE_FULL_HSDI 2 "register_operand" "w, w, w")] |
c5e16983 | 9184 | SVE_COND_ICVTF) |
f75cdd2c | 9185 | (match_operand:SVE_FULL_F 3 "aarch64_simd_reg_or_zero" "0, Dz, w")] |
c5e16983 | 9186 | UNSPEC_SEL))] |
0eb5e901 | 9187 | "TARGET_SVE && <SVE_FULL_HSDI:elem_bits> >= <SVE_FULL_F:elem_bits>" |
c5e16983 | 9188 | "@ |
f75cdd2c RS |
9189 | <su>cvtf\t%0.<SVE_FULL_F:Vetype>, %1/m, %2.<SVE_FULL_HSDI:Vetype> |
9190 | movprfx\t%0.<SVE_FULL_HSDI:Vetype>, %1/z, %2.<SVE_FULL_HSDI:Vetype>\;<su>cvtf\t%0.<SVE_FULL_F:Vetype>, %1/m, %2.<SVE_FULL_HSDI:Vetype> | |
9191 | movprfx\t%0, %3\;<su>cvtf\t%0.<SVE_FULL_F:Vetype>, %1/m, %2.<SVE_FULL_HSDI:Vetype>" | |
c5e16983 RS |
9192 | "&& !rtx_equal_p (operands[1], operands[4])" |
9193 | { | |
9194 | operands[4] = copy_rtx (operands[1]); | |
9195 | } | |
9196 | [(set_attr "movprfx" "*,yes,yes")] | |
0eb5e901 RS |
9197 | ) |
9198 | ||
9199 | (define_insn "*cond_<optab>_nonextend<SVE_FULL_HSDI:mode><SVE_FULL_F:mode>_strict" | |
9200 | [(set (match_operand:SVE_FULL_F 0 "register_operand" "=&w, &w, ?&w") | |
9201 | (unspec:SVE_FULL_F | |
9202 | [(match_operand:<SVE_FULL_HSDI:VPRED> 1 "register_operand" "Upl, Upl, Upl") | |
9203 | (unspec:SVE_FULL_F | |
9204 | [(match_dup 1) | |
9205 | (const_int SVE_STRICT_GP) | |
9206 | (match_operand:SVE_FULL_HSDI 2 "register_operand" "w, w, w")] | |
9207 | SVE_COND_ICVTF) | |
9208 | (match_operand:SVE_FULL_F 3 "aarch64_simd_reg_or_zero" "0, Dz, w")] | |
9209 | UNSPEC_SEL))] | |
9210 | "TARGET_SVE && <SVE_FULL_HSDI:elem_bits> >= <SVE_FULL_F:elem_bits>" | |
9211 | "@ | |
9212 | <su>cvtf\t%0.<SVE_FULL_F:Vetype>, %1/m, %2.<SVE_FULL_HSDI:Vetype> | |
9213 | movprfx\t%0.<SVE_FULL_HSDI:Vetype>, %1/z, %2.<SVE_FULL_HSDI:Vetype>\;<su>cvtf\t%0.<SVE_FULL_F:Vetype>, %1/m, %2.<SVE_FULL_HSDI:Vetype> | |
9214 | movprfx\t%0, %3\;<su>cvtf\t%0.<SVE_FULL_F:Vetype>, %1/m, %2.<SVE_FULL_HSDI:Vetype>" | |
9215 | [(set_attr "movprfx" "*,yes,yes")] | |
c5e16983 RS |
9216 | ) |
9217 | ||
624d0f07 RS |
9218 | ;; Predicated widening integer-to-float conversion with merging. |
9219 | (define_expand "@cond_<optab>_extend<VNx4SI_ONLY:mode><VNx2DF_ONLY:mode>" | |
9220 | [(set (match_operand:VNx2DF_ONLY 0 "register_operand") | |
9221 | (unspec:VNx2DF_ONLY | |
9222 | [(match_operand:VNx2BI 1 "register_operand") | |
9223 | (unspec:VNx2DF_ONLY | |
9224 | [(match_dup 1) | |
9225 | (const_int SVE_STRICT_GP) | |
9226 | (match_operand:VNx4SI_ONLY 2 "register_operand")] | |
9227 | SVE_COND_ICVTF) | |
9228 | (match_operand:VNx2DF_ONLY 3 "aarch64_simd_reg_or_zero")] | |
9229 | UNSPEC_SEL))] | |
9230 | "TARGET_SVE" | |
9231 | ) | |
9232 | ||
9233 | (define_insn "*cond_<optab>_extend<VNx4SI_ONLY:mode><VNx2DF_ONLY:mode>" | |
9234 | [(set (match_operand:VNx2DF_ONLY 0 "register_operand" "=w, ?&w, ?&w") | |
9235 | (unspec:VNx2DF_ONLY | |
9236 | [(match_operand:VNx2BI 1 "register_operand" "Upl, Upl, Upl") | |
9237 | (unspec:VNx2DF_ONLY | |
9238 | [(match_dup 1) | |
9239 | (match_operand:SI 4 "aarch64_sve_gp_strictness") | |
9240 | (match_operand:VNx4SI_ONLY 2 "register_operand" "w, w, w")] | |
9241 | SVE_COND_ICVTF) | |
9242 | (match_operand:VNx2DF_ONLY 3 "aarch64_simd_reg_or_zero" "0, Dz, w")] | |
9243 | UNSPEC_SEL))] | |
9244 | "TARGET_SVE" | |
9245 | "@ | |
9246 | <su>cvtf\t%0.<VNx2DF_ONLY:Vetype>, %1/m, %2.<VNx4SI_ONLY:Vetype> | |
9247 | movprfx\t%0.<VNx2DF_ONLY:Vetype>, %1/z, %2.<VNx2DF_ONLY:Vetype>\;<su>cvtf\t%0.<VNx2DF_ONLY:Vetype>, %1/m, %2.<VNx4SI_ONLY:Vetype> | |
9248 | movprfx\t%0, %3\;<su>cvtf\t%0.<VNx2DF_ONLY:Vetype>, %1/m, %2.<VNx4SI_ONLY:Vetype>" | |
9249 | [(set_attr "movprfx" "*,yes,yes")] | |
9250 | ) | |
9251 | ||
915d28fe RS |
9252 | ;; ------------------------------------------------------------------------- |
9253 | ;; ---- [FP<-INT] Packs | |
9254 | ;; ------------------------------------------------------------------------- | |
9255 | ;; No patterns here yet! | |
9256 | ;; ------------------------------------------------------------------------- | |
6c9c7b73 | 9257 | |
915d28fe RS |
9258 | ;; ------------------------------------------------------------------------- |
9259 | ;; ---- [FP<-INT] Unpacks | |
9260 | ;; ------------------------------------------------------------------------- | |
9261 | ;; The patterns in this section are synthetic. | |
9262 | ;; ------------------------------------------------------------------------- | |
9263 | ||
9264 | ;; Unpack one half of a VNx4SI to VNx2DF. First unpack from VNx4SI | |
9265 | ;; to VNx2DI, reinterpret the VNx2DI as a VNx4SI, then convert the | |
9266 | ;; unpacked VNx4SI to VNx2DF. | |
9267 | (define_expand "vec_unpack<su_optab>_float_<perm_hilo>_vnx4si" | |
9268 | [(match_operand:VNx2DF 0 "register_operand") | |
9269 | (FLOATUORS:VNx2DF | |
9270 | (unspec:VNx2DI [(match_operand:VNx4SI 1 "register_operand")] | |
9271 | UNPACK_UNSIGNED))] | |
9272 | "TARGET_SVE" | |
9273 | { | |
9274 | /* Use ZIP to do the unpack, since we don't care about the upper halves | |
9275 | and since it has the nice property of not needing any subregs. | |
9276 | If using UUNPK* turns out to be preferable, we could model it as | |
9277 | a ZIP whose first operand is zero. */ | |
9278 | rtx temp = gen_reg_rtx (VNx4SImode); | |
9279 | emit_insn ((<hi_lanes_optab> | |
9280 | ? gen_aarch64_sve_zip2vnx4si | |
9281 | : gen_aarch64_sve_zip1vnx4si) | |
9282 | (temp, operands[1], operands[1])); | |
9283 | rtx ptrue = aarch64_ptrue_reg (VNx2BImode); | |
99361551 | 9284 | rtx strictness = gen_int_mode (SVE_RELAXED_GP, SImode); |
95eb5537 | 9285 | emit_insn (gen_aarch64_sve_<FLOATUORS:optab>_extendvnx4sivnx2df |
99361551 | 9286 | (operands[0], ptrue, temp, strictness)); |
6c9c7b73 AM |
9287 | DONE; |
9288 | } | |
9289 | ) | |
9290 | ||
915d28fe RS |
9291 | ;; ------------------------------------------------------------------------- |
9292 | ;; ---- [FP<-FP] Packs | |
9293 | ;; ------------------------------------------------------------------------- | |
9294 | ;; Includes: | |
9295 | ;; - FCVT | |
9296 | ;; ------------------------------------------------------------------------- | |
9297 | ||
9298 | ;; Convert two vectors of DF to SF, or two vectors of SF to HF, and pack | |
9299 | ;; the results into a single vector. | |
9300 | (define_expand "vec_pack_trunc_<Vwide>" | |
9301 | [(set (match_dup 4) | |
f75cdd2c | 9302 | (unspec:SVE_FULL_HSF |
915d28fe | 9303 | [(match_dup 3) |
99361551 RS |
9304 | (const_int SVE_RELAXED_GP) |
9305 | (match_operand:<VWIDE> 1 "register_operand")] | |
9306 | UNSPEC_COND_FCVT)) | |
915d28fe | 9307 | (set (match_dup 5) |
f75cdd2c | 9308 | (unspec:SVE_FULL_HSF |
915d28fe | 9309 | [(match_dup 3) |
99361551 RS |
9310 | (const_int SVE_RELAXED_GP) |
9311 | (match_operand:<VWIDE> 2 "register_operand")] | |
9312 | UNSPEC_COND_FCVT)) | |
f75cdd2c RS |
9313 | (set (match_operand:SVE_FULL_HSF 0 "register_operand") |
9314 | (unspec:SVE_FULL_HSF [(match_dup 4) (match_dup 5)] UNSPEC_UZP1))] | |
6c9c7b73 AM |
9315 | "TARGET_SVE" |
9316 | { | |
915d28fe RS |
9317 | operands[3] = aarch64_ptrue_reg (<VWIDE_PRED>mode); |
9318 | operands[4] = gen_reg_rtx (<MODE>mode); | |
9319 | operands[5] = gen_reg_rtx (<MODE>mode); | |
6c9c7b73 AM |
9320 | } |
9321 | ) | |
9feeafd7 | 9322 | |
95eb5537 | 9323 | ;; Predicated float-to-float truncation. |
f75cdd2c | 9324 | (define_insn "@aarch64_sve_<optab>_trunc<SVE_FULL_SDF:mode><SVE_FULL_HSF:mode>" |
a4d9837e | 9325 | [(set (match_operand:SVE_FULL_HSF 0 "register_operand" "=w, ?&w") |
f75cdd2c | 9326 | (unspec:SVE_FULL_HSF |
a4d9837e | 9327 | [(match_operand:<SVE_FULL_SDF:VPRED> 1 "register_operand" "Upl, Upl") |
99361551 | 9328 | (match_operand:SI 3 "aarch64_sve_gp_strictness") |
a4d9837e | 9329 | (match_operand:SVE_FULL_SDF 2 "register_operand" "0, w")] |
95eb5537 | 9330 | SVE_COND_FCVT))] |
f75cdd2c | 9331 | "TARGET_SVE && <SVE_FULL_SDF:elem_bits> > <SVE_FULL_HSF:elem_bits>" |
a4d9837e RS |
9332 | "@ |
9333 | fcvt\t%0.<SVE_FULL_HSF:Vetype>, %1/m, %2.<SVE_FULL_SDF:Vetype> | |
9334 | movprfx\t%0, %2\;fcvt\t%0.<SVE_FULL_HSF:Vetype>, %1/m, %2.<SVE_FULL_SDF:Vetype>" | |
9335 | [(set_attr "movprfx" "*,yes")] | |
9feeafd7 | 9336 | ) |
a9fad8fe | 9337 | |
624d0f07 | 9338 | ;; Predicated float-to-float truncation with merging. |
f75cdd2c RS |
9339 | (define_expand "@cond_<optab>_trunc<SVE_FULL_SDF:mode><SVE_FULL_HSF:mode>" |
9340 | [(set (match_operand:SVE_FULL_HSF 0 "register_operand") | |
9341 | (unspec:SVE_FULL_HSF | |
9342 | [(match_operand:<SVE_FULL_SDF:VPRED> 1 "register_operand") | |
9343 | (unspec:SVE_FULL_HSF | |
624d0f07 RS |
9344 | [(match_dup 1) |
9345 | (const_int SVE_STRICT_GP) | |
f75cdd2c | 9346 | (match_operand:SVE_FULL_SDF 2 "register_operand")] |
624d0f07 | 9347 | SVE_COND_FCVT) |
f75cdd2c | 9348 | (match_operand:SVE_FULL_HSF 3 "aarch64_simd_reg_or_zero")] |
624d0f07 | 9349 | UNSPEC_SEL))] |
f75cdd2c | 9350 | "TARGET_SVE && <SVE_FULL_SDF:elem_bits> > <SVE_FULL_HSF:elem_bits>" |
624d0f07 RS |
9351 | ) |
9352 | ||
f75cdd2c RS |
9353 | (define_insn "*cond_<optab>_trunc<SVE_FULL_SDF:mode><SVE_FULL_HSF:mode>" |
9354 | [(set (match_operand:SVE_FULL_HSF 0 "register_operand" "=w, ?&w, ?&w") | |
9355 | (unspec:SVE_FULL_HSF | |
9356 | [(match_operand:<SVE_FULL_SDF:VPRED> 1 "register_operand" "Upl, Upl, Upl") | |
9357 | (unspec:SVE_FULL_HSF | |
624d0f07 RS |
9358 | [(match_dup 1) |
9359 | (match_operand:SI 4 "aarch64_sve_gp_strictness") | |
f75cdd2c | 9360 | (match_operand:SVE_FULL_SDF 2 "register_operand" "w, w, w")] |
624d0f07 | 9361 | SVE_COND_FCVT) |
f75cdd2c | 9362 | (match_operand:SVE_FULL_HSF 3 "aarch64_simd_reg_or_zero" "0, Dz, w")] |
624d0f07 | 9363 | UNSPEC_SEL))] |
f75cdd2c | 9364 | "TARGET_SVE && <SVE_FULL_SDF:elem_bits> > <SVE_FULL_HSF:elem_bits>" |
624d0f07 | 9365 | "@ |
f75cdd2c RS |
9366 | fcvt\t%0.<SVE_FULL_HSF:Vetype>, %1/m, %2.<SVE_FULL_SDF:Vetype> |
9367 | movprfx\t%0.<SVE_FULL_SDF:Vetype>, %1/z, %2.<SVE_FULL_SDF:Vetype>\;fcvt\t%0.<SVE_FULL_HSF:Vetype>, %1/m, %2.<SVE_FULL_SDF:Vetype> | |
9368 | movprfx\t%0, %3\;fcvt\t%0.<SVE_FULL_HSF:Vetype>, %1/m, %2.<SVE_FULL_SDF:Vetype>" | |
624d0f07 RS |
9369 | [(set_attr "movprfx" "*,yes,yes")] |
9370 | ) | |
9371 | ||
896dff99 RS |
9372 | ;; ------------------------------------------------------------------------- |
9373 | ;; ---- [FP<-FP] Packs (bfloat16) | |
9374 | ;; ------------------------------------------------------------------------- | |
9375 | ;; Includes: | |
9376 | ;; - BFCVT (BF16) | |
9377 | ;; - BFCVTNT (BF16) | |
9378 | ;; ------------------------------------------------------------------------- | |
9379 | ||
9380 | ;; Predicated BFCVT. | |
9381 | (define_insn "@aarch64_sve_<optab>_trunc<VNx4SF_ONLY:mode><VNx8BF_ONLY:mode>" | |
a4d9837e | 9382 | [(set (match_operand:VNx8BF_ONLY 0 "register_operand" "=w, ?&w") |
896dff99 | 9383 | (unspec:VNx8BF_ONLY |
a4d9837e | 9384 | [(match_operand:VNx4BI 1 "register_operand" "Upl, Upl") |
896dff99 | 9385 | (match_operand:SI 3 "aarch64_sve_gp_strictness") |
a4d9837e | 9386 | (match_operand:VNx4SF_ONLY 2 "register_operand" "0, w")] |
896dff99 RS |
9387 | SVE_COND_FCVT))] |
9388 | "TARGET_SVE_BF16" | |
a4d9837e RS |
9389 | "@ |
9390 | bfcvt\t%0.h, %1/m, %2.s | |
9391 | movprfx\t%0, %2\;bfcvt\t%0.h, %1/m, %2.s" | |
9392 | [(set_attr "movprfx" "*,yes")] | |
896dff99 RS |
9393 | ) |
9394 | ||
9395 | ;; Predicated BFCVT with merging. | |
9396 | (define_expand "@cond_<optab>_trunc<VNx4SF_ONLY:mode><VNx8BF_ONLY:mode>" | |
9397 | [(set (match_operand:VNx8BF_ONLY 0 "register_operand") | |
9398 | (unspec:VNx8BF_ONLY | |
9399 | [(match_operand:VNx4BI 1 "register_operand") | |
9400 | (unspec:VNx8BF_ONLY | |
9401 | [(match_dup 1) | |
9402 | (const_int SVE_STRICT_GP) | |
9403 | (match_operand:VNx4SF_ONLY 2 "register_operand")] | |
9404 | SVE_COND_FCVT) | |
9405 | (match_operand:VNx8BF_ONLY 3 "aarch64_simd_reg_or_zero")] | |
9406 | UNSPEC_SEL))] | |
9407 | "TARGET_SVE_BF16" | |
9408 | ) | |
9409 | ||
9410 | (define_insn "*cond_<optab>_trunc<VNx4SF_ONLY:mode><VNx8BF_ONLY:mode>" | |
9411 | [(set (match_operand:VNx8BF_ONLY 0 "register_operand" "=w, ?&w, ?&w") | |
9412 | (unspec:VNx8BF_ONLY | |
9413 | [(match_operand:VNx4BI 1 "register_operand" "Upl, Upl, Upl") | |
9414 | (unspec:VNx8BF_ONLY | |
9415 | [(match_dup 1) | |
9416 | (match_operand:SI 4 "aarch64_sve_gp_strictness") | |
9417 | (match_operand:VNx4SF_ONLY 2 "register_operand" "w, w, w")] | |
9418 | SVE_COND_FCVT) | |
9419 | (match_operand:VNx8BF_ONLY 3 "aarch64_simd_reg_or_zero" "0, Dz, w")] | |
9420 | UNSPEC_SEL))] | |
9421 | "TARGET_SVE_BF16" | |
9422 | "@ | |
9423 | bfcvt\t%0.h, %1/m, %2.s | |
9424 | movprfx\t%0.s, %1/z, %2.s\;bfcvt\t%0.h, %1/m, %2.s | |
9425 | movprfx\t%0, %3\;bfcvt\t%0.h, %1/m, %2.s" | |
9426 | [(set_attr "movprfx" "*,yes,yes")] | |
9427 | ) | |
9428 | ||
9429 | ;; Predicated BFCVTNT. This doesn't give a natural aarch64_pred_*/cond_* | |
9430 | ;; pair because the even elements always have to be supplied for active | |
9431 | ;; elements, even if the inactive elements don't matter. | |
9432 | ;; | |
9433 | ;; This instructions does not take MOVPRFX. | |
9434 | (define_insn "@aarch64_sve_cvtnt<mode>" | |
9435 | [(set (match_operand:VNx8BF_ONLY 0 "register_operand" "=w") | |
9436 | (unspec:VNx8BF_ONLY | |
9437 | [(match_operand:VNx4BI 2 "register_operand" "Upl") | |
9438 | (const_int SVE_STRICT_GP) | |
9439 | (match_operand:VNx8BF_ONLY 1 "register_operand" "0") | |
9440 | (match_operand:VNx4SF 3 "register_operand" "w")] | |
9441 | UNSPEC_COND_FCVTNT))] | |
9442 | "TARGET_SVE_BF16" | |
9443 | "bfcvtnt\t%0.h, %2/m, %3.s" | |
9444 | ) | |
9445 | ||
915d28fe RS |
9446 | ;; ------------------------------------------------------------------------- |
9447 | ;; ---- [FP<-FP] Unpacks | |
9448 | ;; ------------------------------------------------------------------------- | |
9449 | ;; Includes: | |
9450 | ;; - FCVT | |
9451 | ;; ------------------------------------------------------------------------- | |
9452 | ||
9453 | ;; Unpack one half of a VNx4SF to VNx2DF, or one half of a VNx8HF to VNx4SF. | |
9454 | ;; First unpack the source without conversion, then float-convert the | |
9455 | ;; unpacked source. | |
9456 | (define_expand "vec_unpacks_<perm_hilo>_<mode>" | |
9457 | [(match_operand:<VWIDE> 0 "register_operand") | |
f75cdd2c RS |
9458 | (unspec:SVE_FULL_HSF |
9459 | [(match_operand:SVE_FULL_HSF 1 "register_operand")] | |
9460 | UNPACK_UNSIGNED)] | |
a9fad8fe AM |
9461 | "TARGET_SVE" |
9462 | { | |
915d28fe RS |
9463 | /* Use ZIP to do the unpack, since we don't care about the upper halves |
9464 | and since it has the nice property of not needing any subregs. | |
9465 | If using UUNPK* turns out to be preferable, we could model it as | |
9466 | a ZIP whose first operand is zero. */ | |
9467 | rtx temp = gen_reg_rtx (<MODE>mode); | |
9468 | emit_insn ((<hi_lanes_optab> | |
9469 | ? gen_aarch64_sve_zip2<mode> | |
9470 | : gen_aarch64_sve_zip1<mode>) | |
9471 | (temp, operands[1], operands[1])); | |
9472 | rtx ptrue = aarch64_ptrue_reg (<VWIDE_PRED>mode); | |
99361551 | 9473 | rtx strictness = gen_int_mode (SVE_RELAXED_GP, SImode); |
95eb5537 | 9474 | emit_insn (gen_aarch64_sve_fcvt_nontrunc<mode><Vwide> |
99361551 | 9475 | (operands[0], ptrue, temp, strictness)); |
a9fad8fe AM |
9476 | DONE; |
9477 | } | |
9478 | ) | |
9479 | ||
95eb5537 | 9480 | ;; Predicated float-to-float extension. |
f75cdd2c | 9481 | (define_insn "@aarch64_sve_<optab>_nontrunc<SVE_FULL_HSF:mode><SVE_FULL_SDF:mode>" |
a4d9837e | 9482 | [(set (match_operand:SVE_FULL_SDF 0 "register_operand" "=w, ?&w") |
f75cdd2c | 9483 | (unspec:SVE_FULL_SDF |
a4d9837e | 9484 | [(match_operand:<SVE_FULL_SDF:VPRED> 1 "register_operand" "Upl, Upl") |
99361551 | 9485 | (match_operand:SI 3 "aarch64_sve_gp_strictness") |
a4d9837e | 9486 | (match_operand:SVE_FULL_HSF 2 "register_operand" "0, w")] |
95eb5537 | 9487 | SVE_COND_FCVT))] |
f75cdd2c | 9488 | "TARGET_SVE && <SVE_FULL_SDF:elem_bits> > <SVE_FULL_HSF:elem_bits>" |
a4d9837e RS |
9489 | "@ |
9490 | fcvt\t%0.<SVE_FULL_SDF:Vetype>, %1/m, %2.<SVE_FULL_HSF:Vetype> | |
9491 | movprfx\t%0, %2\;fcvt\t%0.<SVE_FULL_SDF:Vetype>, %1/m, %2.<SVE_FULL_HSF:Vetype>" | |
9492 | [(set_attr "movprfx" "*,yes")] | |
a9fad8fe AM |
9493 | ) |
9494 | ||
624d0f07 | 9495 | ;; Predicated float-to-float extension with merging. |
f75cdd2c RS |
9496 | (define_expand "@cond_<optab>_nontrunc<SVE_FULL_HSF:mode><SVE_FULL_SDF:mode>" |
9497 | [(set (match_operand:SVE_FULL_SDF 0 "register_operand") | |
9498 | (unspec:SVE_FULL_SDF | |
9499 | [(match_operand:<SVE_FULL_SDF:VPRED> 1 "register_operand") | |
9500 | (unspec:SVE_FULL_SDF | |
624d0f07 RS |
9501 | [(match_dup 1) |
9502 | (const_int SVE_STRICT_GP) | |
f75cdd2c | 9503 | (match_operand:SVE_FULL_HSF 2 "register_operand")] |
624d0f07 | 9504 | SVE_COND_FCVT) |
f75cdd2c | 9505 | (match_operand:SVE_FULL_SDF 3 "aarch64_simd_reg_or_zero")] |
624d0f07 | 9506 | UNSPEC_SEL))] |
f75cdd2c | 9507 | "TARGET_SVE && <SVE_FULL_SDF:elem_bits> > <SVE_FULL_HSF:elem_bits>" |
624d0f07 RS |
9508 | ) |
9509 | ||
f75cdd2c RS |
9510 | (define_insn "*cond_<optab>_nontrunc<SVE_FULL_HSF:mode><SVE_FULL_SDF:mode>" |
9511 | [(set (match_operand:SVE_FULL_SDF 0 "register_operand" "=w, ?&w, ?&w") | |
9512 | (unspec:SVE_FULL_SDF | |
9513 | [(match_operand:<SVE_FULL_SDF:VPRED> 1 "register_operand" "Upl, Upl, Upl") | |
9514 | (unspec:SVE_FULL_SDF | |
624d0f07 RS |
9515 | [(match_dup 1) |
9516 | (match_operand:SI 4 "aarch64_sve_gp_strictness") | |
f75cdd2c | 9517 | (match_operand:SVE_FULL_HSF 2 "register_operand" "w, w, w")] |
624d0f07 | 9518 | SVE_COND_FCVT) |
f75cdd2c | 9519 | (match_operand:SVE_FULL_SDF 3 "aarch64_simd_reg_or_zero" "0, Dz, w")] |
624d0f07 | 9520 | UNSPEC_SEL))] |
f75cdd2c | 9521 | "TARGET_SVE && <SVE_FULL_SDF:elem_bits> > <SVE_FULL_HSF:elem_bits>" |
624d0f07 | 9522 | "@ |
f75cdd2c RS |
9523 | fcvt\t%0.<SVE_FULL_SDF:Vetype>, %1/m, %2.<SVE_FULL_HSF:Vetype> |
9524 | movprfx\t%0.<SVE_FULL_SDF:Vetype>, %1/z, %2.<SVE_FULL_SDF:Vetype>\;fcvt\t%0.<SVE_FULL_SDF:Vetype>, %1/m, %2.<SVE_FULL_HSF:Vetype> | |
9525 | movprfx\t%0, %3\;fcvt\t%0.<SVE_FULL_SDF:Vetype>, %1/m, %2.<SVE_FULL_HSF:Vetype>" | |
624d0f07 RS |
9526 | [(set_attr "movprfx" "*,yes,yes")] |
9527 | ) | |
9528 | ||
915d28fe RS |
9529 | ;; ------------------------------------------------------------------------- |
9530 | ;; ---- [PRED<-PRED] Packs | |
9531 | ;; ------------------------------------------------------------------------- | |
9532 | ;; Includes: | |
9533 | ;; - UZP1 | |
9534 | ;; ------------------------------------------------------------------------- | |
a9fad8fe | 9535 | |
915d28fe RS |
9536 | ;; Predicate pack. Use UZP1 on the narrower type, which discards |
9537 | ;; the high part of each wide element. | |
9538 | (define_insn "vec_pack_trunc_<Vwide>" | |
9539 | [(set (match_operand:PRED_BHS 0 "register_operand" "=Upa") | |
9540 | (unspec:PRED_BHS | |
9541 | [(match_operand:<VWIDE> 1 "register_operand" "Upa") | |
9542 | (match_operand:<VWIDE> 2 "register_operand" "Upa")] | |
9543 | UNSPEC_PACK))] | |
a9fad8fe | 9544 | "TARGET_SVE" |
915d28fe | 9545 | "uzp1\t%0.<Vetype>, %1.<Vetype>, %2.<Vetype>" |
a9fad8fe | 9546 | ) |
3a0afad0 | 9547 | |
915d28fe RS |
9548 | ;; ------------------------------------------------------------------------- |
9549 | ;; ---- [PRED<-PRED] Unpacks | |
9550 | ;; ------------------------------------------------------------------------- | |
9551 | ;; Includes: | |
9552 | ;; - PUNPKHI | |
9553 | ;; - PUNPKLO | |
9554 | ;; ------------------------------------------------------------------------- | |
9555 | ||
9556 | ;; Unpack the low or high half of a predicate, where "high" refers to | |
9557 | ;; the low-numbered lanes for big-endian and the high-numbered lanes | |
9558 | ;; for little-endian. | |
9559 | (define_expand "vec_unpack<su>_<perm_hilo>_<mode>" | |
9560 | [(match_operand:<VWIDE> 0 "register_operand") | |
9561 | (unspec:<VWIDE> [(match_operand:PRED_BHS 1 "register_operand")] | |
9562 | UNPACK)] | |
3a0afad0 PK |
9563 | "TARGET_SVE" |
9564 | { | |
915d28fe RS |
9565 | emit_insn ((<hi_lanes_optab> |
9566 | ? gen_aarch64_sve_punpkhi_<PRED_BHS:mode> | |
9567 | : gen_aarch64_sve_punpklo_<PRED_BHS:mode>) | |
9568 | (operands[0], operands[1])); | |
3a0afad0 PK |
9569 | DONE; |
9570 | } | |
9571 | ) | |
915d28fe | 9572 | |
624d0f07 | 9573 | (define_insn "@aarch64_sve_punpk<perm_hilo>_<mode>" |
915d28fe RS |
9574 | [(set (match_operand:<VWIDE> 0 "register_operand" "=Upa") |
9575 | (unspec:<VWIDE> [(match_operand:PRED_BHS 1 "register_operand" "Upa")] | |
9576 | UNPACK_UNSIGNED))] | |
9577 | "TARGET_SVE" | |
9578 | "punpk<perm_hilo>\t%0.h, %1.b" | |
9579 | ) | |
624d0f07 RS |
9580 | |
9581 | ;; ========================================================================= | |
9582 | ;; == Vector partitioning | |
9583 | ;; ========================================================================= | |
9584 | ||
9585 | ;; ------------------------------------------------------------------------- | |
9586 | ;; ---- [PRED] Unary partitioning | |
9587 | ;; ------------------------------------------------------------------------- | |
9588 | ;; Includes: | |
9589 | ;; - BRKA | |
9590 | ;; - BRKAS | |
9591 | ;; - BRKB | |
9592 | ;; - BRKBS | |
9593 | ;; ------------------------------------------------------------------------- | |
9594 | ||
9595 | ;; Note that unlike most other instructions that have both merging and | |
9596 | ;; zeroing forms, these instructions don't operate elementwise and so | |
9597 | ;; don't fit the IFN_COND model. | |
9598 | (define_insn "@aarch64_brk<brk_op>" | |
9599 | [(set (match_operand:VNx16BI 0 "register_operand" "=Upa, Upa") | |
9600 | (unspec:VNx16BI | |
9601 | [(match_operand:VNx16BI 1 "register_operand" "Upa, Upa") | |
9602 | (match_operand:VNx16BI 2 "register_operand" "Upa, Upa") | |
9603 | (match_operand:VNx16BI 3 "aarch64_simd_reg_or_zero" "Dz, 0")] | |
9604 | SVE_BRK_UNARY))] | |
9605 | "TARGET_SVE" | |
9606 | "@ | |
9607 | brk<brk_op>\t%0.b, %1/z, %2.b | |
9608 | brk<brk_op>\t%0.b, %1/m, %2.b" | |
9609 | ) | |
9610 | ||
9611 | ;; Same, but also producing a flags result. | |
9612 | (define_insn "*aarch64_brk<brk_op>_cc" | |
9613 | [(set (reg:CC_NZC CC_REGNUM) | |
9614 | (unspec:CC_NZC | |
9615 | [(match_operand:VNx16BI 1 "register_operand" "Upa, Upa") | |
9616 | (match_dup 1) | |
9617 | (match_operand:SI 4 "aarch64_sve_ptrue_flag") | |
9618 | (unspec:VNx16BI | |
9619 | [(match_dup 1) | |
9620 | (match_operand:VNx16BI 2 "register_operand" "Upa, Upa") | |
9621 | (match_operand:VNx16BI 3 "aarch64_simd_reg_or_zero" "Dz, 0")] | |
9622 | SVE_BRK_UNARY)] | |
9623 | UNSPEC_PTEST)) | |
9624 | (set (match_operand:VNx16BI 0 "register_operand" "=Upa, Upa") | |
9625 | (unspec:VNx16BI | |
9626 | [(match_dup 1) | |
9627 | (match_dup 2) | |
9628 | (match_dup 3)] | |
9629 | SVE_BRK_UNARY))] | |
9630 | "TARGET_SVE" | |
9631 | "@ | |
9632 | brk<brk_op>s\t%0.b, %1/z, %2.b | |
9633 | brk<brk_op>s\t%0.b, %1/m, %2.b" | |
9634 | ) | |
9635 | ||
9636 | ;; Same, but with only the flags result being interesting. | |
9637 | (define_insn "*aarch64_brk<brk_op>_ptest" | |
9638 | [(set (reg:CC_NZC CC_REGNUM) | |
9639 | (unspec:CC_NZC | |
9640 | [(match_operand:VNx16BI 1 "register_operand" "Upa, Upa") | |
9641 | (match_dup 1) | |
9642 | (match_operand:SI 4 "aarch64_sve_ptrue_flag") | |
9643 | (unspec:VNx16BI | |
9644 | [(match_dup 1) | |
9645 | (match_operand:VNx16BI 2 "register_operand" "Upa, Upa") | |
9646 | (match_operand:VNx16BI 3 "aarch64_simd_reg_or_zero" "Dz, 0")] | |
9647 | SVE_BRK_UNARY)] | |
9648 | UNSPEC_PTEST)) | |
9649 | (clobber (match_scratch:VNx16BI 0 "=Upa, Upa"))] | |
9650 | "TARGET_SVE" | |
9651 | "@ | |
9652 | brk<brk_op>s\t%0.b, %1/z, %2.b | |
9653 | brk<brk_op>s\t%0.b, %1/m, %2.b" | |
9654 | ) | |
9655 | ||
9656 | ;; ------------------------------------------------------------------------- | |
9657 | ;; ---- [PRED] Binary partitioning | |
9658 | ;; ------------------------------------------------------------------------- | |
9659 | ;; Includes: | |
9660 | ;; - BRKN | |
9661 | ;; - BRKNS | |
9662 | ;; - BRKPA | |
9663 | ;; - BRKPAS | |
9664 | ;; - BRKPB | |
9665 | ;; - BRKPBS | |
9666 | ;; ------------------------------------------------------------------------- | |
9667 | ||
9668 | ;; Binary BRKs (BRKN, BRKPA, BRKPB). | |
9669 | (define_insn "@aarch64_brk<brk_op>" | |
9670 | [(set (match_operand:VNx16BI 0 "register_operand" "=Upa") | |
9671 | (unspec:VNx16BI | |
9672 | [(match_operand:VNx16BI 1 "register_operand" "Upa") | |
9673 | (match_operand:VNx16BI 2 "register_operand" "Upa") | |
9674 | (match_operand:VNx16BI 3 "register_operand" "<brk_reg_con>")] | |
9675 | SVE_BRK_BINARY))] | |
9676 | "TARGET_SVE" | |
9677 | "brk<brk_op>\t%0.b, %1/z, %2.b, %<brk_reg_opno>.b" | |
9678 | ) | |
9679 | ||
9680 | ;; Same, but also producing a flags result. | |
9681 | (define_insn "*aarch64_brk<brk_op>_cc" | |
9682 | [(set (reg:CC_NZC CC_REGNUM) | |
9683 | (unspec:CC_NZC | |
9684 | [(match_operand:VNx16BI 1 "register_operand" "Upa") | |
9685 | (match_dup 1) | |
9686 | (match_operand:SI 4 "aarch64_sve_ptrue_flag") | |
9687 | (unspec:VNx16BI | |
9688 | [(match_dup 1) | |
9689 | (match_operand:VNx16BI 2 "register_operand" "Upa") | |
9690 | (match_operand:VNx16BI 3 "register_operand" "<brk_reg_con>")] | |
9691 | SVE_BRK_BINARY)] | |
9692 | UNSPEC_PTEST)) | |
9693 | (set (match_operand:VNx16BI 0 "register_operand" "=Upa") | |
9694 | (unspec:VNx16BI | |
9695 | [(match_dup 1) | |
9696 | (match_dup 2) | |
9697 | (match_dup 3)] | |
9698 | SVE_BRK_BINARY))] | |
9699 | "TARGET_SVE" | |
9700 | "brk<brk_op>s\t%0.b, %1/z, %2.b, %<brk_reg_opno>.b" | |
9701 | ) | |
9702 | ||
9703 | ;; Same, but with only the flags result being interesting. | |
9704 | (define_insn "*aarch64_brk<brk_op>_ptest" | |
9705 | [(set (reg:CC_NZC CC_REGNUM) | |
9706 | (unspec:CC_NZC | |
9707 | [(match_operand:VNx16BI 1 "register_operand" "Upa") | |
9708 | (match_dup 1) | |
9709 | (match_operand:SI 4 "aarch64_sve_ptrue_flag") | |
9710 | (unspec:VNx16BI | |
9711 | [(match_dup 1) | |
9712 | (match_operand:VNx16BI 2 "register_operand" "Upa") | |
9713 | (match_operand:VNx16BI 3 "register_operand" "<brk_reg_con>")] | |
9714 | SVE_BRK_BINARY)] | |
9715 | UNSPEC_PTEST)) | |
9716 | (clobber (match_scratch:VNx16BI 0 "=Upa"))] | |
9717 | "TARGET_SVE" | |
9718 | "brk<brk_op>s\t%0.b, %1/z, %2.b, %<brk_reg_opno>.b" | |
9719 | ) | |
9720 | ||
9721 | ;; ------------------------------------------------------------------------- | |
9722 | ;; ---- [PRED] Scalarization | |
9723 | ;; ------------------------------------------------------------------------- | |
9724 | ;; Includes: | |
9725 | ;; - PFIRST | |
9726 | ;; - PNEXT | |
9727 | ;; ------------------------------------------------------------------------- | |
9728 | ||
9729 | (define_insn "@aarch64_sve_<sve_pred_op><mode>" | |
9730 | [(set (match_operand:PRED_ALL 0 "register_operand" "=Upa") | |
9731 | (unspec:PRED_ALL | |
9732 | [(match_operand:PRED_ALL 1 "register_operand" "Upa") | |
9733 | (match_operand:SI 2 "aarch64_sve_ptrue_flag") | |
9734 | (match_operand:PRED_ALL 3 "register_operand" "0")] | |
9735 | SVE_PITER)) | |
9736 | (clobber (reg:CC_NZC CC_REGNUM))] | |
9737 | "TARGET_SVE && <max_elem_bits> >= <elem_bits>" | |
9738 | "<sve_pred_op>\t%0.<Vetype>, %1, %0.<Vetype>" | |
9739 | ) | |
9740 | ||
9741 | ;; Same, but also producing a flags result. | |
9742 | (define_insn_and_rewrite "*aarch64_sve_<sve_pred_op><mode>_cc" | |
9743 | [(set (reg:CC_NZC CC_REGNUM) | |
9744 | (unspec:CC_NZC | |
9745 | [(match_operand:VNx16BI 1 "register_operand" "Upa") | |
9746 | (match_operand 2) | |
9747 | (match_operand:SI 3 "aarch64_sve_ptrue_flag") | |
9748 | (unspec:PRED_ALL | |
9749 | [(match_operand 4) | |
9750 | (match_operand:SI 5 "aarch64_sve_ptrue_flag") | |
9751 | (match_operand:PRED_ALL 6 "register_operand" "0")] | |
9752 | SVE_PITER)] | |
9753 | UNSPEC_PTEST)) | |
9754 | (set (match_operand:PRED_ALL 0 "register_operand" "=Upa") | |
9755 | (unspec:PRED_ALL | |
9756 | [(match_dup 4) | |
9757 | (match_dup 5) | |
9758 | (match_dup 6)] | |
9759 | SVE_PITER))] | |
9760 | "TARGET_SVE | |
9761 | && <max_elem_bits> >= <elem_bits> | |
9762 | && aarch64_sve_same_pred_for_ptest_p (&operands[2], &operands[4])" | |
9763 | "<sve_pred_op>\t%0.<Vetype>, %1, %0.<Vetype>" | |
9764 | "&& !rtx_equal_p (operands[2], operands[4])" | |
9765 | { | |
9766 | operands[4] = operands[2]; | |
9767 | operands[5] = operands[3]; | |
9768 | } | |
9769 | ) | |
9770 | ||
9771 | ;; Same, but with only the flags result being interesting. | |
9772 | (define_insn_and_rewrite "*aarch64_sve_<sve_pred_op><mode>_ptest" | |
9773 | [(set (reg:CC_NZC CC_REGNUM) | |
9774 | (unspec:CC_NZC | |
9775 | [(match_operand:VNx16BI 1 "register_operand" "Upa") | |
9776 | (match_operand 2) | |
9777 | (match_operand:SI 3 "aarch64_sve_ptrue_flag") | |
9778 | (unspec:PRED_ALL | |
9779 | [(match_operand 4) | |
9780 | (match_operand:SI 5 "aarch64_sve_ptrue_flag") | |
9781 | (match_operand:PRED_ALL 6 "register_operand" "0")] | |
9782 | SVE_PITER)] | |
9783 | UNSPEC_PTEST)) | |
9784 | (clobber (match_scratch:PRED_ALL 0 "=Upa"))] | |
9785 | "TARGET_SVE | |
9786 | && <max_elem_bits> >= <elem_bits> | |
9787 | && aarch64_sve_same_pred_for_ptest_p (&operands[2], &operands[4])" | |
9788 | "<sve_pred_op>\t%0.<Vetype>, %1, %0.<Vetype>" | |
9789 | "&& !rtx_equal_p (operands[2], operands[4])" | |
9790 | { | |
9791 | operands[4] = operands[2]; | |
9792 | operands[5] = operands[3]; | |
9793 | } | |
9794 | ) | |
9795 | ||
9796 | ;; ========================================================================= | |
9797 | ;; == Counting elements | |
9798 | ;; ========================================================================= | |
9799 | ||
9800 | ;; ------------------------------------------------------------------------- | |
9801 | ;; ---- [INT] Count elements in a pattern (scalar) | |
9802 | ;; ------------------------------------------------------------------------- | |
9803 | ;; Includes: | |
9804 | ;; - CNTB | |
9805 | ;; - CNTD | |
9806 | ;; - CNTH | |
9807 | ;; - CNTW | |
9808 | ;; ------------------------------------------------------------------------- | |
9809 | ||
9810 | ;; Count the number of elements in an svpattern. Operand 1 is the pattern, | |
9811 | ;; operand 2 is the number of elements that fit in a 128-bit block, and | |
9812 | ;; operand 3 is a multiplier in the range [1, 16]. | |
9813 | ;; | |
9814 | ;; Note that this pattern isn't used for SV_ALL (but would work for that too). | |
9815 | (define_insn "aarch64_sve_cnt_pat" | |
9816 | [(set (match_operand:DI 0 "register_operand" "=r") | |
9817 | (zero_extend:DI | |
9818 | (unspec:SI [(match_operand:DI 1 "const_int_operand") | |
9819 | (match_operand:DI 2 "const_int_operand") | |
9820 | (match_operand:DI 3 "const_int_operand")] | |
9821 | UNSPEC_SVE_CNT_PAT)))] | |
9822 | "TARGET_SVE" | |
9823 | { | |
9824 | return aarch64_output_sve_cnt_pat_immediate ("cnt", "%x0", operands + 1); | |
9825 | } | |
9826 | ) | |
9827 | ||
9828 | ;; ------------------------------------------------------------------------- | |
9829 | ;; ---- [INT] Increment by the number of elements in a pattern (scalar) | |
9830 | ;; ------------------------------------------------------------------------- | |
9831 | ;; Includes: | |
9832 | ;; - INC | |
9833 | ;; - SQINC | |
9834 | ;; - UQINC | |
9835 | ;; ------------------------------------------------------------------------- | |
9836 | ||
9837 | ;; Increment a DImode register by the number of elements in an svpattern. | |
9838 | ;; See aarch64_sve_cnt_pat for the counting behavior. | |
9839 | (define_insn "@aarch64_sve_<inc_dec><mode>_pat" | |
9840 | [(set (match_operand:DI 0 "register_operand" "=r") | |
9841 | (ANY_PLUS:DI (zero_extend:DI | |
9842 | (unspec:SI [(match_operand:DI 2 "const_int_operand") | |
9843 | (match_operand:DI 3 "const_int_operand") | |
9844 | (match_operand:DI 4 "const_int_operand")] | |
9845 | UNSPEC_SVE_CNT_PAT)) | |
9846 | (match_operand:DI_ONLY 1 "register_operand" "0")))] | |
9847 | "TARGET_SVE" | |
9848 | { | |
9849 | return aarch64_output_sve_cnt_pat_immediate ("<inc_dec>", "%x0", | |
9850 | operands + 2); | |
9851 | } | |
9852 | ) | |
9853 | ||
9854 | ;; Increment an SImode register by the number of elements in an svpattern | |
9855 | ;; using modular arithmetic. See aarch64_sve_cnt_pat for the counting | |
9856 | ;; behavior. | |
9857 | (define_insn "*aarch64_sve_incsi_pat" | |
9858 | [(set (match_operand:SI 0 "register_operand" "=r") | |
9859 | (plus:SI (unspec:SI [(match_operand:DI 2 "const_int_operand") | |
9860 | (match_operand:DI 3 "const_int_operand") | |
9861 | (match_operand:DI 4 "const_int_operand")] | |
9862 | UNSPEC_SVE_CNT_PAT) | |
9863 | (match_operand:SI 1 "register_operand" "0")))] | |
9864 | "TARGET_SVE" | |
9865 | { | |
9866 | return aarch64_output_sve_cnt_pat_immediate ("inc", "%x0", operands + 2); | |
9867 | } | |
9868 | ) | |
9869 | ||
9870 | ;; Increment an SImode register by the number of elements in an svpattern | |
9871 | ;; using saturating arithmetic, extending the result to 64 bits. | |
9872 | ;; | |
9873 | ;; See aarch64_sve_cnt_pat for the counting behavior. | |
9874 | (define_insn "@aarch64_sve_<inc_dec><mode>_pat" | |
9875 | [(set (match_operand:DI 0 "register_operand" "=r") | |
9876 | (<paired_extend>:DI | |
9877 | (SAT_PLUS:SI | |
9878 | (unspec:SI [(match_operand:DI 2 "const_int_operand") | |
9879 | (match_operand:DI 3 "const_int_operand") | |
9880 | (match_operand:DI 4 "const_int_operand")] | |
9881 | UNSPEC_SVE_CNT_PAT) | |
9882 | (match_operand:SI_ONLY 1 "register_operand" "0"))))] | |
9883 | "TARGET_SVE" | |
9884 | { | |
9885 | const char *registers = (<CODE> == SS_PLUS ? "%x0, %w0" : "%w0"); | |
9886 | return aarch64_output_sve_cnt_pat_immediate ("<inc_dec>", registers, | |
9887 | operands + 2); | |
9888 | } | |
9889 | ) | |
9890 | ||
9891 | ;; ------------------------------------------------------------------------- | |
9892 | ;; ---- [INT] Increment by the number of elements in a pattern (vector) | |
9893 | ;; ------------------------------------------------------------------------- | |
9894 | ;; Includes: | |
9895 | ;; - INC | |
9896 | ;; - SQINC | |
9897 | ;; - UQINC | |
9898 | ;; ------------------------------------------------------------------------- | |
9899 | ||
9900 | ;; Increment a vector of DIs by the number of elements in an svpattern. | |
9901 | ;; See aarch64_sve_cnt_pat for the counting behavior. | |
9902 | (define_insn "@aarch64_sve_<inc_dec><mode>_pat" | |
9903 | [(set (match_operand:VNx2DI 0 "register_operand" "=w, ?&w") | |
9904 | (ANY_PLUS:VNx2DI | |
9905 | (vec_duplicate:VNx2DI | |
9906 | (zero_extend:DI | |
9907 | (unspec:SI [(match_operand:DI 2 "const_int_operand") | |
9908 | (match_operand:DI 3 "const_int_operand") | |
9909 | (match_operand:DI 4 "const_int_operand")] | |
9910 | UNSPEC_SVE_CNT_PAT))) | |
9911 | (match_operand:VNx2DI_ONLY 1 "register_operand" "0, w")))] | |
9912 | "TARGET_SVE" | |
9913 | { | |
9914 | if (which_alternative == 1) | |
9915 | output_asm_insn ("movprfx\t%0, %1", operands); | |
9916 | return aarch64_output_sve_cnt_pat_immediate ("<inc_dec>", "%0.<Vetype>", | |
9917 | operands + 2); | |
9918 | } | |
9919 | [(set_attr "movprfx" "*,yes")] | |
9920 | ) | |
9921 | ||
9922 | ;; Increment a vector of SIs by the number of elements in an svpattern. | |
9923 | ;; See aarch64_sve_cnt_pat for the counting behavior. | |
9924 | (define_insn "@aarch64_sve_<inc_dec><mode>_pat" | |
9925 | [(set (match_operand:VNx4SI 0 "register_operand" "=w, ?&w") | |
9926 | (ANY_PLUS:VNx4SI | |
9927 | (vec_duplicate:VNx4SI | |
9928 | (unspec:SI [(match_operand:DI 2 "const_int_operand") | |
9929 | (match_operand:DI 3 "const_int_operand") | |
9930 | (match_operand:DI 4 "const_int_operand")] | |
9931 | UNSPEC_SVE_CNT_PAT)) | |
9932 | (match_operand:VNx4SI_ONLY 1 "register_operand" "0, w")))] | |
9933 | "TARGET_SVE" | |
9934 | { | |
9935 | if (which_alternative == 1) | |
9936 | output_asm_insn ("movprfx\t%0, %1", operands); | |
9937 | return aarch64_output_sve_cnt_pat_immediate ("<inc_dec>", "%0.<Vetype>", | |
9938 | operands + 2); | |
9939 | } | |
9940 | [(set_attr "movprfx" "*,yes")] | |
9941 | ) | |
9942 | ||
9943 | ;; Increment a vector of HIs by the number of elements in an svpattern. | |
9944 | ;; See aarch64_sve_cnt_pat for the counting behavior. | |
9945 | (define_expand "@aarch64_sve_<inc_dec><mode>_pat" | |
9946 | [(set (match_operand:VNx8HI 0 "register_operand") | |
9947 | (ANY_PLUS:VNx8HI | |
9948 | (vec_duplicate:VNx8HI | |
9949 | (truncate:HI | |
9950 | (unspec:SI [(match_operand:DI 2 "const_int_operand") | |
9951 | (match_operand:DI 3 "const_int_operand") | |
9952 | (match_operand:DI 4 "const_int_operand")] | |
9953 | UNSPEC_SVE_CNT_PAT))) | |
9954 | (match_operand:VNx8HI_ONLY 1 "register_operand")))] | |
9955 | "TARGET_SVE" | |
9956 | ) | |
9957 | ||
9958 | (define_insn "*aarch64_sve_<inc_dec><mode>_pat" | |
9959 | [(set (match_operand:VNx8HI 0 "register_operand" "=w, ?&w") | |
9960 | (ANY_PLUS:VNx8HI | |
9961 | (vec_duplicate:VNx8HI | |
9962 | (match_operator:HI 5 "subreg_lowpart_operator" | |
9963 | [(unspec:SI [(match_operand:DI 2 "const_int_operand") | |
9964 | (match_operand:DI 3 "const_int_operand") | |
9965 | (match_operand:DI 4 "const_int_operand")] | |
9966 | UNSPEC_SVE_CNT_PAT)])) | |
9967 | (match_operand:VNx8HI_ONLY 1 "register_operand" "0, w")))] | |
9968 | "TARGET_SVE" | |
9969 | { | |
9970 | if (which_alternative == 1) | |
9971 | output_asm_insn ("movprfx\t%0, %1", operands); | |
9972 | return aarch64_output_sve_cnt_pat_immediate ("<inc_dec>", "%0.<Vetype>", | |
9973 | operands + 2); | |
9974 | } | |
9975 | [(set_attr "movprfx" "*,yes")] | |
9976 | ) | |
9977 | ||
9978 | ;; ------------------------------------------------------------------------- | |
9979 | ;; ---- [INT] Decrement by the number of elements in a pattern (scalar) | |
9980 | ;; ------------------------------------------------------------------------- | |
9981 | ;; Includes: | |
9982 | ;; - DEC | |
9983 | ;; - SQDEC | |
9984 | ;; - UQDEC | |
9985 | ;; ------------------------------------------------------------------------- | |
9986 | ||
9987 | ;; Decrement a DImode register by the number of elements in an svpattern. | |
9988 | ;; See aarch64_sve_cnt_pat for the counting behavior. | |
9989 | (define_insn "@aarch64_sve_<inc_dec><mode>_pat" | |
9990 | [(set (match_operand:DI 0 "register_operand" "=r") | |
9991 | (ANY_MINUS:DI (match_operand:DI_ONLY 1 "register_operand" "0") | |
9992 | (zero_extend:DI | |
9993 | (unspec:SI [(match_operand:DI 2 "const_int_operand") | |
9994 | (match_operand:DI 3 "const_int_operand") | |
9995 | (match_operand:DI 4 "const_int_operand")] | |
9996 | UNSPEC_SVE_CNT_PAT))))] | |
9997 | "TARGET_SVE" | |
9998 | { | |
9999 | return aarch64_output_sve_cnt_pat_immediate ("<inc_dec>", "%x0", | |
10000 | operands + 2); | |
10001 | } | |
10002 | ) | |
10003 | ||
10004 | ;; Decrement an SImode register by the number of elements in an svpattern | |
10005 | ;; using modular arithmetic. See aarch64_sve_cnt_pat for the counting | |
10006 | ;; behavior. | |
10007 | (define_insn "*aarch64_sve_decsi_pat" | |
10008 | [(set (match_operand:SI 0 "register_operand" "=r") | |
10009 | (minus:SI (match_operand:SI 1 "register_operand" "0") | |
10010 | (unspec:SI [(match_operand:DI 2 "const_int_operand") | |
10011 | (match_operand:DI 3 "const_int_operand") | |
10012 | (match_operand:DI 4 "const_int_operand")] | |
10013 | UNSPEC_SVE_CNT_PAT)))] | |
10014 | "TARGET_SVE" | |
10015 | { | |
10016 | return aarch64_output_sve_cnt_pat_immediate ("dec", "%x0", operands + 2); | |
10017 | } | |
10018 | ) | |
10019 | ||
10020 | ;; Decrement an SImode register by the number of elements in an svpattern | |
10021 | ;; using saturating arithmetic, extending the result to 64 bits. | |
10022 | ;; | |
10023 | ;; See aarch64_sve_cnt_pat for the counting behavior. | |
10024 | (define_insn "@aarch64_sve_<inc_dec><mode>_pat" | |
10025 | [(set (match_operand:DI 0 "register_operand" "=r") | |
10026 | (<paired_extend>:DI | |
10027 | (SAT_MINUS:SI | |
10028 | (match_operand:SI_ONLY 1 "register_operand" "0") | |
10029 | (unspec:SI [(match_operand:DI 2 "const_int_operand") | |
10030 | (match_operand:DI 3 "const_int_operand") | |
10031 | (match_operand:DI 4 "const_int_operand")] | |
10032 | UNSPEC_SVE_CNT_PAT))))] | |
10033 | "TARGET_SVE" | |
10034 | { | |
10035 | const char *registers = (<CODE> == SS_MINUS ? "%x0, %w0" : "%w0"); | |
10036 | return aarch64_output_sve_cnt_pat_immediate ("<inc_dec>", registers, | |
10037 | operands + 2); | |
10038 | } | |
10039 | ) | |
10040 | ||
10041 | ;; ------------------------------------------------------------------------- | |
10042 | ;; ---- [INT] Decrement by the number of elements in a pattern (vector) | |
10043 | ;; ------------------------------------------------------------------------- | |
10044 | ;; Includes: | |
10045 | ;; - DEC | |
10046 | ;; - SQDEC | |
10047 | ;; - UQDEC | |
10048 | ;; ------------------------------------------------------------------------- | |
10049 | ||
10050 | ;; Decrement a vector of DIs by the number of elements in an svpattern. | |
10051 | ;; See aarch64_sve_cnt_pat for the counting behavior. | |
10052 | (define_insn "@aarch64_sve_<inc_dec><mode>_pat" | |
10053 | [(set (match_operand:VNx2DI 0 "register_operand" "=w, ?&w") | |
10054 | (ANY_MINUS:VNx2DI | |
10055 | (match_operand:VNx2DI_ONLY 1 "register_operand" "0, w") | |
10056 | (vec_duplicate:VNx2DI | |
10057 | (zero_extend:DI | |
10058 | (unspec:SI [(match_operand:DI 2 "const_int_operand") | |
10059 | (match_operand:DI 3 "const_int_operand") | |
10060 | (match_operand:DI 4 "const_int_operand")] | |
10061 | UNSPEC_SVE_CNT_PAT)))))] | |
10062 | "TARGET_SVE" | |
10063 | { | |
10064 | if (which_alternative == 1) | |
10065 | output_asm_insn ("movprfx\t%0, %1", operands); | |
10066 | return aarch64_output_sve_cnt_pat_immediate ("<inc_dec>", "%0.<Vetype>", | |
10067 | operands + 2); | |
10068 | } | |
10069 | [(set_attr "movprfx" "*,yes")] | |
10070 | ) | |
10071 | ||
10072 | ;; Decrement a vector of SIs by the number of elements in an svpattern. | |
10073 | ;; See aarch64_sve_cnt_pat for the counting behavior. | |
10074 | (define_insn "@aarch64_sve_<inc_dec><mode>_pat" | |
10075 | [(set (match_operand:VNx4SI 0 "register_operand" "=w, ?&w") | |
10076 | (ANY_MINUS:VNx4SI | |
10077 | (match_operand:VNx4SI_ONLY 1 "register_operand" "0, w") | |
10078 | (vec_duplicate:VNx4SI | |
10079 | (unspec:SI [(match_operand:DI 2 "const_int_operand") | |
10080 | (match_operand:DI 3 "const_int_operand") | |
10081 | (match_operand:DI 4 "const_int_operand")] | |
10082 | UNSPEC_SVE_CNT_PAT))))] | |
10083 | "TARGET_SVE" | |
10084 | { | |
10085 | if (which_alternative == 1) | |
10086 | output_asm_insn ("movprfx\t%0, %1", operands); | |
10087 | return aarch64_output_sve_cnt_pat_immediate ("<inc_dec>", "%0.<Vetype>", | |
10088 | operands + 2); | |
10089 | } | |
10090 | [(set_attr "movprfx" "*,yes")] | |
10091 | ) | |
10092 | ||
10093 | ;; Decrement a vector of HIs by the number of elements in an svpattern. | |
10094 | ;; See aarch64_sve_cnt_pat for the counting behavior. | |
10095 | (define_expand "@aarch64_sve_<inc_dec><mode>_pat" | |
10096 | [(set (match_operand:VNx8HI 0 "register_operand") | |
10097 | (ANY_MINUS:VNx8HI | |
10098 | (match_operand:VNx8HI_ONLY 1 "register_operand") | |
10099 | (vec_duplicate:VNx8HI | |
10100 | (truncate:HI | |
10101 | (unspec:SI [(match_operand:DI 2 "const_int_operand") | |
10102 | (match_operand:DI 3 "const_int_operand") | |
10103 | (match_operand:DI 4 "const_int_operand")] | |
10104 | UNSPEC_SVE_CNT_PAT)))))] | |
10105 | "TARGET_SVE" | |
10106 | ) | |
10107 | ||
10108 | (define_insn "*aarch64_sve_<inc_dec><mode>_pat" | |
10109 | [(set (match_operand:VNx8HI 0 "register_operand" "=w, ?&w") | |
10110 | (ANY_MINUS:VNx8HI | |
10111 | (match_operand:VNx8HI_ONLY 1 "register_operand" "0, w") | |
10112 | (vec_duplicate:VNx8HI | |
10113 | (match_operator:HI 5 "subreg_lowpart_operator" | |
10114 | [(unspec:SI [(match_operand:DI 2 "const_int_operand") | |
10115 | (match_operand:DI 3 "const_int_operand") | |
10116 | (match_operand:DI 4 "const_int_operand")] | |
10117 | UNSPEC_SVE_CNT_PAT)]))))] | |
10118 | "TARGET_SVE" | |
10119 | { | |
10120 | if (which_alternative == 1) | |
10121 | output_asm_insn ("movprfx\t%0, %1", operands); | |
10122 | return aarch64_output_sve_cnt_pat_immediate ("<inc_dec>", "%0.<Vetype>", | |
10123 | operands + 2); | |
10124 | } | |
10125 | [(set_attr "movprfx" "*,yes")] | |
10126 | ) | |
10127 | ||
10128 | ;; ------------------------------------------------------------------------- | |
10129 | ;; ---- [INT] Count elements in a predicate (scalar) | |
10130 | ;; ------------------------------------------------------------------------- | |
10131 | ;; Includes: | |
10132 | ;; - CNTP | |
10133 | ;; ------------------------------------------------------------------------- | |
10134 | ||
10135 | ;; Count the number of set bits in a predicate. Operand 3 is true if | |
10136 | ;; operand 1 is known to be all-true. | |
10137 | (define_insn "@aarch64_pred_cntp<mode>" | |
10138 | [(set (match_operand:DI 0 "register_operand" "=r") | |
10139 | (zero_extend:DI | |
10140 | (unspec:SI [(match_operand:PRED_ALL 1 "register_operand" "Upl") | |
10141 | (match_operand:SI 2 "aarch64_sve_ptrue_flag") | |
10142 | (match_operand:PRED_ALL 3 "register_operand" "Upa")] | |
10143 | UNSPEC_CNTP)))] | |
10144 | "TARGET_SVE" | |
10145 | "cntp\t%x0, %1, %3.<Vetype>") | |
10146 | ||
10147 | ;; ------------------------------------------------------------------------- | |
10148 | ;; ---- [INT] Increment by the number of elements in a predicate (scalar) | |
10149 | ;; ------------------------------------------------------------------------- | |
10150 | ;; Includes: | |
10151 | ;; - INCP | |
10152 | ;; - SQINCP | |
10153 | ;; - UQINCP | |
10154 | ;; ------------------------------------------------------------------------- | |
10155 | ||
10156 | ;; Increment a DImode register by the number of set bits in a predicate. | |
10157 | ;; See aarch64_sve_cntp for a description of the operands. | |
10158 | (define_expand "@aarch64_sve_<inc_dec><DI_ONLY:mode><PRED_ALL:mode>_cntp" | |
10159 | [(set (match_operand:DI 0 "register_operand") | |
10160 | (ANY_PLUS:DI | |
10161 | (zero_extend:DI | |
10162 | (unspec:SI [(match_dup 3) | |
10163 | (const_int SVE_KNOWN_PTRUE) | |
10164 | (match_operand:PRED_ALL 2 "register_operand")] | |
10165 | UNSPEC_CNTP)) | |
10166 | (match_operand:DI_ONLY 1 "register_operand")))] | |
10167 | "TARGET_SVE" | |
10168 | { | |
10169 | operands[3] = CONSTM1_RTX (<PRED_ALL:MODE>mode); | |
10170 | } | |
10171 | ) | |
10172 | ||
10173 | (define_insn_and_rewrite "*aarch64_sve_<inc_dec><DI_ONLY:mode><PRED_ALL:mode>_cntp" | |
10174 | [(set (match_operand:DI 0 "register_operand" "=r") | |
10175 | (ANY_PLUS:DI | |
10176 | (zero_extend:DI | |
10177 | (unspec:SI [(match_operand 3) | |
10178 | (const_int SVE_KNOWN_PTRUE) | |
10179 | (match_operand:PRED_ALL 2 "register_operand" "Upa")] | |
10180 | UNSPEC_CNTP)) | |
10181 | (match_operand:DI_ONLY 1 "register_operand" "0")))] | |
10182 | "TARGET_SVE" | |
10183 | "<inc_dec>p\t%x0, %2.<PRED_ALL:Vetype>" | |
10184 | "&& !CONSTANT_P (operands[3])" | |
10185 | { | |
10186 | operands[3] = CONSTM1_RTX (<PRED_ALL:MODE>mode); | |
10187 | } | |
10188 | ) | |
10189 | ||
10190 | ;; Increment an SImode register by the number of set bits in a predicate | |
10191 | ;; using modular arithmetic. See aarch64_sve_cntp for a description of | |
10192 | ;; the operands. | |
10193 | (define_insn_and_rewrite "*aarch64_incsi<mode>_cntp" | |
10194 | [(set (match_operand:SI 0 "register_operand" "=r") | |
10195 | (plus:SI | |
10196 | (unspec:SI [(match_operand 3) | |
10197 | (const_int SVE_KNOWN_PTRUE) | |
10198 | (match_operand:PRED_ALL 2 "register_operand" "Upa")] | |
10199 | UNSPEC_CNTP) | |
10200 | (match_operand:SI 1 "register_operand" "0")))] | |
10201 | "TARGET_SVE" | |
10202 | "incp\t%x0, %2.<Vetype>" | |
10203 | "&& !CONSTANT_P (operands[3])" | |
10204 | { | |
10205 | operands[3] = CONSTM1_RTX (<MODE>mode); | |
10206 | } | |
10207 | ) | |
10208 | ||
10209 | ;; Increment an SImode register by the number of set bits in a predicate | |
10210 | ;; using saturating arithmetic, extending the result to 64 bits. | |
10211 | ;; | |
10212 | ;; See aarch64_sve_cntp for a description of the operands. | |
10213 | (define_expand "@aarch64_sve_<inc_dec><SI_ONLY:mode><PRED_ALL:mode>_cntp" | |
10214 | [(set (match_operand:DI 0 "register_operand") | |
10215 | (<paired_extend>:DI | |
10216 | (SAT_PLUS:SI | |
10217 | (unspec:SI [(match_dup 3) | |
10218 | (const_int SVE_KNOWN_PTRUE) | |
10219 | (match_operand:PRED_ALL 2 "register_operand")] | |
10220 | UNSPEC_CNTP) | |
10221 | (match_operand:SI_ONLY 1 "register_operand"))))] | |
10222 | "TARGET_SVE" | |
10223 | { | |
10224 | operands[3] = CONSTM1_RTX (<PRED_ALL:MODE>mode); | |
10225 | } | |
10226 | ) | |
10227 | ||
10228 | (define_insn_and_rewrite "*aarch64_sve_<inc_dec><SI_ONLY:mode><PRED_ALL:mode>_cntp" | |
10229 | [(set (match_operand:DI 0 "register_operand" "=r") | |
10230 | (<paired_extend>:DI | |
10231 | (SAT_PLUS:SI | |
10232 | (unspec:SI [(match_operand 3) | |
10233 | (const_int SVE_KNOWN_PTRUE) | |
10234 | (match_operand:PRED_ALL 2 "register_operand" "Upa")] | |
10235 | UNSPEC_CNTP) | |
10236 | (match_operand:SI_ONLY 1 "register_operand" "0"))))] | |
10237 | "TARGET_SVE" | |
10238 | { | |
10239 | if (<CODE> == SS_PLUS) | |
10240 | return "<inc_dec>p\t%x0, %2.<PRED_ALL:Vetype>, %w0"; | |
10241 | else | |
10242 | return "<inc_dec>p\t%w0, %2.<PRED_ALL:Vetype>"; | |
10243 | } | |
10244 | "&& !CONSTANT_P (operands[3])" | |
10245 | { | |
10246 | operands[3] = CONSTM1_RTX (<PRED_ALL:MODE>mode); | |
10247 | } | |
10248 | ) | |
10249 | ||
10250 | ;; ------------------------------------------------------------------------- | |
10251 | ;; ---- [INT] Increment by the number of elements in a predicate (vector) | |
10252 | ;; ------------------------------------------------------------------------- | |
10253 | ;; Includes: | |
10254 | ;; - INCP | |
10255 | ;; - SQINCP | |
10256 | ;; - UQINCP | |
10257 | ;; ------------------------------------------------------------------------- | |
10258 | ||
10259 | ;; Increment a vector of DIs by the number of set bits in a predicate. | |
10260 | ;; See aarch64_sve_cntp for a description of the operands. | |
10261 | (define_expand "@aarch64_sve_<inc_dec><mode>_cntp" | |
10262 | [(set (match_operand:VNx2DI 0 "register_operand") | |
10263 | (ANY_PLUS:VNx2DI | |
10264 | (vec_duplicate:VNx2DI | |
10265 | (zero_extend:DI | |
10266 | (unspec:SI | |
10267 | [(match_dup 3) | |
10268 | (const_int SVE_KNOWN_PTRUE) | |
10269 | (match_operand:<VPRED> 2 "register_operand")] | |
10270 | UNSPEC_CNTP))) | |
10271 | (match_operand:VNx2DI_ONLY 1 "register_operand")))] | |
10272 | "TARGET_SVE" | |
10273 | { | |
10274 | operands[3] = CONSTM1_RTX (<VPRED>mode); | |
10275 | } | |
10276 | ) | |
10277 | ||
10278 | (define_insn_and_rewrite "*aarch64_sve_<inc_dec><mode>_cntp" | |
10279 | [(set (match_operand:VNx2DI 0 "register_operand" "=w, ?&w") | |
10280 | (ANY_PLUS:VNx2DI | |
10281 | (vec_duplicate:VNx2DI | |
10282 | (zero_extend:DI | |
10283 | (unspec:SI | |
10284 | [(match_operand 3) | |
10285 | (const_int SVE_KNOWN_PTRUE) | |
10286 | (match_operand:<VPRED> 2 "register_operand" "Upa, Upa")] | |
10287 | UNSPEC_CNTP))) | |
10288 | (match_operand:VNx2DI_ONLY 1 "register_operand" "0, w")))] | |
10289 | "TARGET_SVE" | |
10290 | "@ | |
10291 | <inc_dec>p\t%0.d, %2 | |
10292 | movprfx\t%0, %1\;<inc_dec>p\t%0.d, %2" | |
10293 | "&& !CONSTANT_P (operands[3])" | |
10294 | { | |
10295 | operands[3] = CONSTM1_RTX (<VPRED>mode); | |
10296 | } | |
10297 | [(set_attr "movprfx" "*,yes")] | |
10298 | ) | |
10299 | ||
10300 | ;; Increment a vector of SIs by the number of set bits in a predicate. | |
10301 | ;; See aarch64_sve_cntp for a description of the operands. | |
10302 | (define_expand "@aarch64_sve_<inc_dec><mode>_cntp" | |
10303 | [(set (match_operand:VNx4SI 0 "register_operand") | |
10304 | (ANY_PLUS:VNx4SI | |
10305 | (vec_duplicate:VNx4SI | |
10306 | (unspec:SI | |
10307 | [(match_dup 3) | |
10308 | (const_int SVE_KNOWN_PTRUE) | |
10309 | (match_operand:<VPRED> 2 "register_operand")] | |
10310 | UNSPEC_CNTP)) | |
10311 | (match_operand:VNx4SI_ONLY 1 "register_operand")))] | |
10312 | "TARGET_SVE" | |
10313 | { | |
10314 | operands[3] = CONSTM1_RTX (<VPRED>mode); | |
10315 | } | |
10316 | ) | |
10317 | ||
10318 | (define_insn_and_rewrite "*aarch64_sve_<inc_dec><mode>_cntp" | |
10319 | [(set (match_operand:VNx4SI 0 "register_operand" "=w, ?&w") | |
10320 | (ANY_PLUS:VNx4SI | |
10321 | (vec_duplicate:VNx4SI | |
10322 | (unspec:SI | |
10323 | [(match_operand 3) | |
10324 | (const_int SVE_KNOWN_PTRUE) | |
10325 | (match_operand:<VPRED> 2 "register_operand" "Upa, Upa")] | |
10326 | UNSPEC_CNTP)) | |
10327 | (match_operand:VNx4SI_ONLY 1 "register_operand" "0, w")))] | |
10328 | "TARGET_SVE" | |
10329 | "@ | |
10330 | <inc_dec>p\t%0.s, %2 | |
10331 | movprfx\t%0, %1\;<inc_dec>p\t%0.s, %2" | |
10332 | "&& !CONSTANT_P (operands[3])" | |
10333 | { | |
10334 | operands[3] = CONSTM1_RTX (<VPRED>mode); | |
10335 | } | |
10336 | [(set_attr "movprfx" "*,yes")] | |
10337 | ) | |
10338 | ||
10339 | ;; Increment a vector of HIs by the number of set bits in a predicate. | |
10340 | ;; See aarch64_sve_cntp for a description of the operands. | |
10341 | (define_expand "@aarch64_sve_<inc_dec><mode>_cntp" | |
10342 | [(set (match_operand:VNx8HI 0 "register_operand") | |
10343 | (ANY_PLUS:VNx8HI | |
10344 | (vec_duplicate:VNx8HI | |
10345 | (truncate:HI | |
10346 | (unspec:SI | |
10347 | [(match_dup 3) | |
10348 | (const_int SVE_KNOWN_PTRUE) | |
10349 | (match_operand:<VPRED> 2 "register_operand")] | |
10350 | UNSPEC_CNTP))) | |
10351 | (match_operand:VNx8HI_ONLY 1 "register_operand")))] | |
10352 | "TARGET_SVE" | |
10353 | { | |
10354 | operands[3] = CONSTM1_RTX (<VPRED>mode); | |
10355 | } | |
10356 | ) | |
10357 | ||
10358 | (define_insn_and_rewrite "*aarch64_sve_<inc_dec><mode>_cntp" | |
10359 | [(set (match_operand:VNx8HI 0 "register_operand" "=w, ?&w") | |
10360 | (ANY_PLUS:VNx8HI | |
10361 | (vec_duplicate:VNx8HI | |
10362 | (match_operator:HI 3 "subreg_lowpart_operator" | |
10363 | [(unspec:SI | |
10364 | [(match_operand 4) | |
10365 | (const_int SVE_KNOWN_PTRUE) | |
10366 | (match_operand:<VPRED> 2 "register_operand" "Upa, Upa")] | |
10367 | UNSPEC_CNTP)])) | |
10368 | (match_operand:VNx8HI_ONLY 1 "register_operand" "0, w")))] | |
10369 | "TARGET_SVE" | |
10370 | "@ | |
10371 | <inc_dec>p\t%0.h, %2 | |
10372 | movprfx\t%0, %1\;<inc_dec>p\t%0.h, %2" | |
10373 | "&& !CONSTANT_P (operands[4])" | |
10374 | { | |
10375 | operands[4] = CONSTM1_RTX (<VPRED>mode); | |
10376 | } | |
10377 | [(set_attr "movprfx" "*,yes")] | |
10378 | ) | |
10379 | ||
10380 | ;; ------------------------------------------------------------------------- | |
10381 | ;; ---- [INT] Decrement by the number of elements in a predicate (scalar) | |
10382 | ;; ------------------------------------------------------------------------- | |
10383 | ;; Includes: | |
10384 | ;; - DECP | |
10385 | ;; - SQDECP | |
10386 | ;; - UQDECP | |
10387 | ;; ------------------------------------------------------------------------- | |
10388 | ||
10389 | ;; Decrement a DImode register by the number of set bits in a predicate. | |
10390 | ;; See aarch64_sve_cntp for a description of the operands. | |
10391 | (define_expand "@aarch64_sve_<inc_dec><DI_ONLY:mode><PRED_ALL:mode>_cntp" | |
10392 | [(set (match_operand:DI 0 "register_operand") | |
10393 | (ANY_MINUS:DI | |
10394 | (match_operand:DI_ONLY 1 "register_operand") | |
10395 | (zero_extend:DI | |
10396 | (unspec:SI [(match_dup 3) | |
10397 | (const_int SVE_KNOWN_PTRUE) | |
10398 | (match_operand:PRED_ALL 2 "register_operand")] | |
10399 | UNSPEC_CNTP))))] | |
10400 | "TARGET_SVE" | |
10401 | { | |
10402 | operands[3] = CONSTM1_RTX (<PRED_ALL:MODE>mode); | |
10403 | } | |
10404 | ) | |
10405 | ||
10406 | (define_insn_and_rewrite "*aarch64_sve_<inc_dec><DI_ONLY:mode><PRED_ALL:mode>_cntp" | |
10407 | [(set (match_operand:DI 0 "register_operand" "=r") | |
10408 | (ANY_MINUS:DI | |
10409 | (match_operand:DI_ONLY 1 "register_operand" "0") | |
10410 | (zero_extend:DI | |
10411 | (unspec:SI [(match_operand 3) | |
10412 | (const_int SVE_KNOWN_PTRUE) | |
10413 | (match_operand:PRED_ALL 2 "register_operand" "Upa")] | |
10414 | UNSPEC_CNTP))))] | |
10415 | "TARGET_SVE" | |
10416 | "<inc_dec>p\t%x0, %2.<PRED_ALL:Vetype>" | |
10417 | "&& !CONSTANT_P (operands[3])" | |
10418 | { | |
10419 | operands[3] = CONSTM1_RTX (<PRED_ALL:MODE>mode); | |
10420 | } | |
10421 | ) | |
10422 | ||
10423 | ;; Decrement an SImode register by the number of set bits in a predicate | |
10424 | ;; using modular arithmetic. See aarch64_sve_cntp for a description of the | |
10425 | ;; operands. | |
10426 | (define_insn_and_rewrite "*aarch64_decsi<mode>_cntp" | |
10427 | [(set (match_operand:SI 0 "register_operand" "=r") | |
10428 | (minus:SI | |
10429 | (match_operand:SI 1 "register_operand" "0") | |
10430 | (unspec:SI [(match_operand 3) | |
10431 | (const_int SVE_KNOWN_PTRUE) | |
10432 | (match_operand:PRED_ALL 2 "register_operand" "Upa")] | |
10433 | UNSPEC_CNTP)))] | |
10434 | "TARGET_SVE" | |
10435 | "decp\t%x0, %2.<Vetype>" | |
10436 | "&& !CONSTANT_P (operands[3])" | |
10437 | { | |
10438 | operands[3] = CONSTM1_RTX (<MODE>mode); | |
10439 | } | |
10440 | ) | |
10441 | ||
10442 | ;; Decrement an SImode register by the number of set bits in a predicate | |
10443 | ;; using saturating arithmetic, extending the result to 64 bits. | |
10444 | ;; | |
10445 | ;; See aarch64_sve_cntp for a description of the operands. | |
10446 | (define_expand "@aarch64_sve_<inc_dec><SI_ONLY:mode><PRED_ALL:mode>_cntp" | |
10447 | [(set (match_operand:DI 0 "register_operand") | |
10448 | (<paired_extend>:DI | |
10449 | (SAT_MINUS:SI | |
10450 | (match_operand:SI_ONLY 1 "register_operand") | |
10451 | (unspec:SI [(match_dup 3) | |
10452 | (const_int SVE_KNOWN_PTRUE) | |
10453 | (match_operand:PRED_ALL 2 "register_operand")] | |
10454 | UNSPEC_CNTP))))] | |
10455 | "TARGET_SVE" | |
10456 | { | |
10457 | operands[3] = CONSTM1_RTX (<PRED_ALL:MODE>mode); | |
10458 | } | |
10459 | ) | |
10460 | ||
10461 | (define_insn_and_rewrite "*aarch64_sve_<inc_dec><SI_ONLY:mode><PRED_ALL:mode>_cntp" | |
10462 | [(set (match_operand:DI 0 "register_operand" "=r") | |
10463 | (<paired_extend>:DI | |
10464 | (SAT_MINUS:SI | |
10465 | (match_operand:SI_ONLY 1 "register_operand" "0") | |
10466 | (unspec:SI [(match_operand 3) | |
10467 | (const_int SVE_KNOWN_PTRUE) | |
10468 | (match_operand:PRED_ALL 2 "register_operand" "Upa")] | |
10469 | UNSPEC_CNTP))))] | |
10470 | "TARGET_SVE" | |
10471 | { | |
10472 | if (<CODE> == SS_MINUS) | |
10473 | return "<inc_dec>p\t%x0, %2.<PRED_ALL:Vetype>, %w0"; | |
10474 | else | |
10475 | return "<inc_dec>p\t%w0, %2.<PRED_ALL:Vetype>"; | |
10476 | } | |
10477 | "&& !CONSTANT_P (operands[3])" | |
10478 | { | |
10479 | operands[3] = CONSTM1_RTX (<PRED_ALL:MODE>mode); | |
10480 | } | |
10481 | ) | |
10482 | ||
10483 | ;; ------------------------------------------------------------------------- | |
10484 | ;; ---- [INT] Decrement by the number of elements in a predicate (vector) | |
10485 | ;; ------------------------------------------------------------------------- | |
10486 | ;; Includes: | |
10487 | ;; - DECP | |
10488 | ;; - SQDECP | |
10489 | ;; - UQDECP | |
10490 | ;; ------------------------------------------------------------------------- | |
10491 | ||
10492 | ;; Decrement a vector of DIs by the number of set bits in a predicate. | |
10493 | ;; See aarch64_sve_cntp for a description of the operands. | |
10494 | (define_expand "@aarch64_sve_<inc_dec><mode>_cntp" | |
10495 | [(set (match_operand:VNx2DI 0 "register_operand") | |
10496 | (ANY_MINUS:VNx2DI | |
10497 | (match_operand:VNx2DI_ONLY 1 "register_operand") | |
10498 | (vec_duplicate:VNx2DI | |
10499 | (zero_extend:DI | |
10500 | (unspec:SI | |
10501 | [(match_dup 3) | |
10502 | (const_int SVE_KNOWN_PTRUE) | |
10503 | (match_operand:<VPRED> 2 "register_operand")] | |
10504 | UNSPEC_CNTP)))))] | |
10505 | "TARGET_SVE" | |
10506 | { | |
10507 | operands[3] = CONSTM1_RTX (<VPRED>mode); | |
10508 | } | |
10509 | ) | |
10510 | ||
10511 | (define_insn_and_rewrite "*aarch64_sve_<inc_dec><mode>_cntp" | |
10512 | [(set (match_operand:VNx2DI 0 "register_operand" "=w, ?&w") | |
10513 | (ANY_MINUS:VNx2DI | |
10514 | (match_operand:VNx2DI_ONLY 1 "register_operand" "0, w") | |
10515 | (vec_duplicate:VNx2DI | |
10516 | (zero_extend:DI | |
10517 | (unspec:SI | |
10518 | [(match_operand 3) | |
10519 | (const_int SVE_KNOWN_PTRUE) | |
10520 | (match_operand:<VPRED> 2 "register_operand" "Upa, Upa")] | |
10521 | UNSPEC_CNTP)))))] | |
10522 | "TARGET_SVE" | |
10523 | "@ | |
10524 | <inc_dec>p\t%0.d, %2 | |
10525 | movprfx\t%0, %1\;<inc_dec>p\t%0.d, %2" | |
10526 | "&& !CONSTANT_P (operands[3])" | |
10527 | { | |
10528 | operands[3] = CONSTM1_RTX (<VPRED>mode); | |
10529 | } | |
10530 | [(set_attr "movprfx" "*,yes")] | |
10531 | ) | |
10532 | ||
10533 | ;; Decrement a vector of SIs by the number of set bits in a predicate. | |
10534 | ;; See aarch64_sve_cntp for a description of the operands. | |
10535 | (define_expand "@aarch64_sve_<inc_dec><mode>_cntp" | |
10536 | [(set (match_operand:VNx4SI 0 "register_operand") | |
10537 | (ANY_MINUS:VNx4SI | |
10538 | (match_operand:VNx4SI_ONLY 1 "register_operand") | |
10539 | (vec_duplicate:VNx4SI | |
10540 | (unspec:SI | |
10541 | [(match_dup 3) | |
10542 | (const_int SVE_KNOWN_PTRUE) | |
10543 | (match_operand:<VPRED> 2 "register_operand")] | |
10544 | UNSPEC_CNTP))))] | |
10545 | "TARGET_SVE" | |
10546 | { | |
10547 | operands[3] = CONSTM1_RTX (<VPRED>mode); | |
10548 | } | |
10549 | ) | |
10550 | ||
10551 | (define_insn_and_rewrite "*aarch64_sve_<inc_dec><mode>_cntp" | |
10552 | [(set (match_operand:VNx4SI 0 "register_operand" "=w, ?&w") | |
10553 | (ANY_MINUS:VNx4SI | |
10554 | (match_operand:VNx4SI_ONLY 1 "register_operand" "0, w") | |
10555 | (vec_duplicate:VNx4SI | |
10556 | (unspec:SI | |
10557 | [(match_operand 3) | |
10558 | (const_int SVE_KNOWN_PTRUE) | |
10559 | (match_operand:<VPRED> 2 "register_operand" "Upa, Upa")] | |
10560 | UNSPEC_CNTP))))] | |
10561 | "TARGET_SVE" | |
10562 | "@ | |
10563 | <inc_dec>p\t%0.s, %2 | |
10564 | movprfx\t%0, %1\;<inc_dec>p\t%0.s, %2" | |
10565 | "&& !CONSTANT_P (operands[3])" | |
10566 | { | |
10567 | operands[3] = CONSTM1_RTX (<VPRED>mode); | |
10568 | } | |
10569 | [(set_attr "movprfx" "*,yes")] | |
10570 | ) | |
10571 | ||
10572 | ;; Decrement a vector of HIs by the number of set bits in a predicate. | |
10573 | ;; See aarch64_sve_cntp for a description of the operands. | |
10574 | (define_expand "@aarch64_sve_<inc_dec><mode>_cntp" | |
10575 | [(set (match_operand:VNx8HI 0 "register_operand") | |
10576 | (ANY_MINUS:VNx8HI | |
10577 | (match_operand:VNx8HI_ONLY 1 "register_operand") | |
10578 | (vec_duplicate:VNx8HI | |
10579 | (truncate:HI | |
10580 | (unspec:SI | |
10581 | [(match_dup 3) | |
10582 | (const_int SVE_KNOWN_PTRUE) | |
10583 | (match_operand:<VPRED> 2 "register_operand")] | |
10584 | UNSPEC_CNTP)))))] | |
10585 | "TARGET_SVE" | |
10586 | { | |
10587 | operands[3] = CONSTM1_RTX (<VPRED>mode); | |
10588 | } | |
10589 | ) | |
10590 | ||
10591 | (define_insn_and_rewrite "*aarch64_sve_<inc_dec><mode>_cntp" | |
10592 | [(set (match_operand:VNx8HI 0 "register_operand" "=w, ?&w") | |
10593 | (ANY_MINUS:VNx8HI | |
10594 | (match_operand:VNx8HI_ONLY 1 "register_operand" "0, w") | |
10595 | (vec_duplicate:VNx8HI | |
10596 | (match_operator:HI 3 "subreg_lowpart_operator" | |
10597 | [(unspec:SI | |
10598 | [(match_operand 4) | |
10599 | (const_int SVE_KNOWN_PTRUE) | |
10600 | (match_operand:<VPRED> 2 "register_operand" "Upa, Upa")] | |
10601 | UNSPEC_CNTP)]))))] | |
10602 | "TARGET_SVE" | |
10603 | "@ | |
10604 | <inc_dec>p\t%0.h, %2 | |
10605 | movprfx\t%0, %1\;<inc_dec>p\t%0.h, %2" | |
10606 | "&& !CONSTANT_P (operands[4])" | |
10607 | { | |
10608 | operands[4] = CONSTM1_RTX (<VPRED>mode); | |
10609 | } | |
10610 | [(set_attr "movprfx" "*,yes")] | |
10611 | ) |