]>
Commit | Line | Data |
---|---|---|
8fa7f434 | 1 | ;; Machine description for AArch64 SVE. |
2 | ;; Copyright (C) 2009-2016 Free Software Foundation, Inc. | |
3 | ;; Contributed by ARM Ltd. | |
4 | ;; | |
5 | ;; This file is part of GCC. | |
6 | ;; | |
7 | ;; GCC is free software; you can redistribute it and/or modify it | |
8 | ;; under the terms of the GNU General Public License as published by | |
9 | ;; the Free Software Foundation; either version 3, or (at your option) | |
10 | ;; any later version. | |
11 | ;; | |
12 | ;; GCC is distributed in the hope that it will be useful, but | |
13 | ;; WITHOUT ANY WARRANTY; without even the implied warranty of | |
14 | ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
15 | ;; General Public License for more details. | |
16 | ;; | |
17 | ;; You should have received a copy of the GNU General Public License | |
18 | ;; along with GCC; see the file COPYING3. If not see | |
19 | ;; <http://www.gnu.org/licenses/>. | |
20 | ||
21 | ;; Note on the handling of big-endian SVE | |
22 | ;; -------------------------------------- | |
23 | ;; | |
24 | ;; On big-endian systems, Advanced SIMD mov<mode> patterns act in the | |
25 | ;; same way as movdi or movti would: the first byte of memory goes | |
26 | ;; into the most significant byte of the register and the last byte | |
27 | ;; of memory goes into the least significant byte of the register. | |
28 | ;; This is the most natural ordering for Advanced SIMD and matches | |
29 | ;; the ABI layout for 64-bit and 128-bit vector types. | |
30 | ;; | |
31 | ;; As a result, the order of bytes within the register is what GCC | |
32 | ;; expects for a big-endian target, and subreg offsets therefore work | |
33 | ;; as expected, with the first element in memory having subreg offset 0 | |
34 | ;; and the last element in memory having the subreg offset associated | |
35 | ;; with a big-endian lowpart. However, this ordering also means that | |
36 | ;; GCC's lane numbering does not match the architecture's numbering: | |
37 | ;; GCC always treats the element at the lowest address in memory | |
38 | ;; (subreg offset 0) as element 0, while the architecture treats | |
39 | ;; the least significant end of the register as element 0. | |
40 | ;; | |
41 | ;; The situation for SVE is different. We want the layout of the | |
42 | ;; SVE register to be same for mov<mode> as it is for maskload<mode>: | |
43 | ;; logically, a mov<mode> load must be indistinguishable from a | |
44 | ;; maskload<mode> whose mask is all true. We therefore need the | |
45 | ;; register layout to match LD1 rather than LDR. The ABI layout of | |
46 | ;; SVE types also matches LD1 byte ordering rather than LDR byte ordering. | |
47 | ;; | |
48 | ;; As a result, the architecture lane numbering matches GCC's lane | |
49 | ;; numbering, with element 0 always being the first in memory. | |
50 | ;; However: | |
51 | ;; | |
52 | ;; - Applying a subreg offset to a register does not give the element | |
53 | ;; that GCC expects: the first element in memory has the subreg offset | |
54 | ;; associated with a big-endian lowpart while the last element in memory | |
55 | ;; has subreg offset 0. We handle this via TARGET_CAN_CHANGE_MODE_CLASS. | |
56 | ;; | |
57 | ;; - We cannot use LDR and STR for spill slots that might be accessed | |
58 | ;; via subregs, since although the elements have the order GCC expects, | |
59 | ;; the order of the bytes within the elements is different. We instead | |
60 | ;; access spill slots via LD1 and ST1, using secondary reloads to | |
61 | ;; reserve a predicate register. | |
62 | ||
63 | ||
64 | ;; SVE data moves. | |
65 | (define_expand "mov<mode>" | |
66 | [(set (match_operand:SVE_ALL 0 "nonimmediate_operand") | |
67 | (match_operand:SVE_ALL 1 "general_operand"))] | |
68 | "TARGET_SVE" | |
69 | { | |
70 | /* Use the predicated load and store patterns where possible. | |
71 | This is required for big-endian targets (see the comment at the | |
72 | head of the file) and increases the addressing choices for | |
73 | little-endian. */ | |
74 | if ((MEM_P (operands[0]) || MEM_P (operands[1])) | |
75 | && can_create_pseudo_p ()) | |
76 | { | |
77 | aarch64_expand_sve_mem_move (operands[0], operands[1], <VPRED>mode); | |
78 | DONE; | |
79 | } | |
80 | ||
81 | if (CONSTANT_P (operands[1])) | |
82 | { | |
83 | aarch64_expand_mov_immediate (operands[0], operands[1], | |
84 | gen_vec_duplicate<mode>); | |
85 | DONE; | |
86 | } | |
87 | } | |
88 | ) | |
89 | ||
90 | ;; Unpredicated moves (little-endian). Only allow memory operations | |
91 | ;; during and after RA; before RA we want the predicated load and | |
92 | ;; store patterns to be used instead. | |
93 | (define_insn "*aarch64_sve_mov<mode>_le" | |
94 | [(set (match_operand:SVE_ALL 0 "aarch64_sve_nonimmediate_operand" "=w, Utr, w, w") | |
95 | (match_operand:SVE_ALL 1 "aarch64_sve_general_operand" "Utr, w, w, Dn"))] | |
96 | "TARGET_SVE | |
97 | && !BYTES_BIG_ENDIAN | |
98 | && ((lra_in_progress || reload_completed) | |
99 | || (register_operand (operands[0], <MODE>mode) | |
100 | && nonmemory_operand (operands[1], <MODE>mode)))" | |
101 | "@ | |
102 | ldr\t%0, %1 | |
103 | str\t%1, %0 | |
104 | mov\t%0.d, %1.d | |
105 | * return aarch64_output_sve_mov_immediate (operands[1]);" | |
106 | ) | |
107 | ||
108 | ;; Unpredicated moves (big-endian). Memory accesses require secondary | |
109 | ;; reloads. | |
110 | (define_insn "*aarch64_sve_mov<mode>_be" | |
111 | [(set (match_operand:SVE_ALL 0 "register_operand" "=w, w") | |
112 | (match_operand:SVE_ALL 1 "aarch64_nonmemory_operand" "w, Dn"))] | |
113 | "TARGET_SVE && BYTES_BIG_ENDIAN" | |
114 | "@ | |
115 | mov\t%0.d, %1.d | |
116 | * return aarch64_output_sve_mov_immediate (operands[1]);" | |
117 | ) | |
118 | ||
119 | ;; Handle big-endian memory reloads. We use byte PTRUE for all modes | |
120 | ;; to try to encourage reuse. | |
121 | (define_expand "aarch64_sve_reload_be" | |
122 | [(parallel | |
123 | [(set (match_operand 0) | |
124 | (match_operand 1)) | |
125 | (clobber (match_operand:VNx16BI 2 "register_operand" "=Upl"))])] | |
126 | "TARGET_SVE && BYTES_BIG_ENDIAN" | |
127 | { | |
128 | /* Create a PTRUE. */ | |
129 | emit_move_insn (operands[2], CONSTM1_RTX (VNx16BImode)); | |
130 | ||
131 | /* Refer to the PTRUE in the appropriate mode for this move. */ | |
132 | machine_mode mode = GET_MODE (operands[0]); | |
133 | machine_mode pred_mode | |
134 | = aarch64_sve_pred_mode (GET_MODE_UNIT_SIZE (mode)).require (); | |
135 | rtx pred = gen_lowpart (pred_mode, operands[2]); | |
136 | ||
137 | /* Emit a predicated load or store. */ | |
138 | aarch64_emit_sve_pred_move (operands[0], pred, operands[1]); | |
139 | DONE; | |
140 | } | |
141 | ) | |
142 | ||
143 | ;; A predicated load or store for which the predicate is known to be | |
144 | ;; all-true. Note that this pattern is generated directly by | |
145 | ;; aarch64_emit_sve_pred_move, so changes to this pattern will | |
146 | ;; need changes there as well. | |
147 | (define_insn "*pred_mov<mode>" | |
148 | [(set (match_operand:SVE_ALL 0 "nonimmediate_operand" "=w, m") | |
149 | (unspec:SVE_ALL | |
150 | [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl") | |
151 | (match_operand:SVE_ALL 2 "nonimmediate_operand" "m, w")] | |
152 | UNSPEC_MERGE_PTRUE))] | |
153 | "TARGET_SVE | |
154 | && (register_operand (operands[0], <MODE>mode) | |
155 | || register_operand (operands[2], <MODE>mode))" | |
156 | "@ | |
157 | ld1<Vesize>\t%0.<Vetype>, %1/z, %2 | |
158 | st1<Vesize>\t%2.<Vetype>, %1, %0" | |
159 | ) | |
160 | ||
161 | (define_expand "movmisalign<mode>" | |
162 | [(set (match_operand:SVE_ALL 0 "nonimmediate_operand") | |
163 | (match_operand:SVE_ALL 1 "general_operand"))] | |
164 | "TARGET_SVE" | |
165 | { | |
166 | /* Equivalent to a normal move for our purpooses. */ | |
167 | emit_move_insn (operands[0], operands[1]); | |
168 | DONE; | |
169 | } | |
170 | ) | |
171 | ||
172 | (define_insn "maskload<mode><vpred>" | |
173 | [(set (match_operand:SVE_ALL 0 "register_operand" "=w") | |
174 | (unspec:SVE_ALL | |
175 | [(match_operand:<VPRED> 2 "register_operand" "Upl") | |
176 | (match_operand:SVE_ALL 1 "memory_operand" "m")] | |
177 | UNSPEC_LD1_SVE))] | |
178 | "TARGET_SVE" | |
179 | "ld1<Vesize>\t%0.<Vetype>, %2/z, %1" | |
180 | ) | |
181 | ||
182 | (define_insn "maskstore<mode><vpred>" | |
183 | [(set (match_operand:SVE_ALL 0 "memory_operand" "+m") | |
184 | (unspec:SVE_ALL [(match_operand:<VPRED> 2 "register_operand" "Upl") | |
185 | (match_operand:SVE_ALL 1 "register_operand" "w") | |
186 | (match_dup 0)] | |
187 | UNSPEC_ST1_SVE))] | |
188 | "TARGET_SVE" | |
189 | "st1<Vesize>\t%1.<Vetype>, %2, %0" | |
190 | ) | |
191 | ||
192 | (define_expand "mov<mode>" | |
193 | [(set (match_operand:PRED_ALL 0 "nonimmediate_operand") | |
194 | (match_operand:PRED_ALL 1 "general_operand"))] | |
195 | "TARGET_SVE" | |
196 | { | |
197 | if (GET_CODE (operands[0]) == MEM) | |
198 | operands[1] = force_reg (<MODE>mode, operands[1]); | |
199 | } | |
200 | ) | |
201 | ||
202 | (define_insn "*aarch64_sve_mov<mode>" | |
203 | [(set (match_operand:PRED_ALL 0 "nonimmediate_operand" "=Upa, m, Upa, Upa, Upa") | |
204 | (match_operand:PRED_ALL 1 "general_operand" "Upa, Upa, m, Dz, Dm"))] | |
205 | "TARGET_SVE | |
206 | && (register_operand (operands[0], <MODE>mode) | |
207 | || register_operand (operands[1], <MODE>mode))" | |
208 | "@ | |
209 | mov\t%0.b, %1.b | |
210 | str\t%1, %0 | |
211 | ldr\t%0, %1 | |
212 | pfalse\t%0.b | |
213 | * return aarch64_output_ptrue (<MODE>mode, '<Vetype>');" | |
214 | ) | |
215 | ||
216 | ;; Handle extractions from a predicate by converting to an integer vector | |
217 | ;; and extracting from there. | |
218 | (define_expand "vec_extract<vpred><Vel>" | |
219 | [(match_operand:<VEL> 0 "register_operand") | |
220 | (match_operand:<VPRED> 1 "register_operand") | |
221 | (match_operand:SI 2 "nonmemory_operand") | |
222 | ;; Dummy operand to which we can attach the iterator. | |
223 | (reg:SVE_I V0_REGNUM)] | |
224 | "TARGET_SVE" | |
225 | { | |
226 | rtx tmp = gen_reg_rtx (<MODE>mode); | |
227 | emit_insn (gen_aarch64_sve_dup<mode>_const (tmp, operands[1], | |
228 | CONST1_RTX (<MODE>mode), | |
229 | CONST0_RTX (<MODE>mode))); | |
230 | emit_insn (gen_vec_extract<mode><Vel> (operands[0], tmp, operands[2])); | |
231 | DONE; | |
232 | } | |
233 | ) | |
234 | ||
235 | (define_expand "vec_extract<mode><Vel>" | |
236 | [(set (match_operand:<VEL> 0 "register_operand") | |
237 | (vec_select:<VEL> | |
238 | (match_operand:SVE_ALL 1 "register_operand") | |
239 | (parallel [(match_operand:SI 2 "nonmemory_operand")])))] | |
240 | "TARGET_SVE" | |
241 | { | |
242 | poly_int64 val; | |
243 | if (poly_int_rtx_p (operands[2], &val) | |
244 | && known_eq (val, GET_MODE_NUNITS (<MODE>mode) - 1)) | |
245 | { | |
246 | /* The last element can be extracted with a LASTB and a false | |
247 | predicate. */ | |
248 | rtx sel = force_reg (<VPRED>mode, CONST0_RTX (<VPRED>mode)); | |
249 | emit_insn (gen_aarch64_sve_lastb<mode> (operands[0], sel, | |
250 | operands[1])); | |
251 | DONE; | |
252 | } | |
253 | if (!CONST_INT_P (operands[2])) | |
254 | { | |
255 | /* Create an index with operand[2] as the base and -1 as the step. | |
256 | It will then be zero for the element we care about. */ | |
257 | rtx index = gen_lowpart (<VEL_INT>mode, operands[2]); | |
258 | index = force_reg (<VEL_INT>mode, index); | |
259 | rtx series = gen_reg_rtx (<V_INT_EQUIV>mode); | |
260 | emit_insn (gen_vec_series<v_int_equiv> (series, index, constm1_rtx)); | |
261 | ||
262 | /* Get a predicate that is true for only that element. */ | |
263 | rtx zero = CONST0_RTX (<V_INT_EQUIV>mode); | |
264 | rtx cmp = gen_rtx_EQ (<V_INT_EQUIV>mode, series, zero); | |
265 | rtx sel = gen_reg_rtx (<VPRED>mode); | |
266 | emit_insn (gen_vec_cmp<v_int_equiv><vpred> (sel, cmp, series, zero)); | |
267 | ||
268 | /* Select the element using LASTB. */ | |
269 | emit_insn (gen_aarch64_sve_lastb<mode> (operands[0], sel, | |
270 | operands[1])); | |
271 | DONE; | |
272 | } | |
273 | } | |
274 | ) | |
275 | ||
276 | ;; Extract an element from the Advanced SIMD portion of the register. | |
277 | ;; We don't just reuse the aarch64-simd.md pattern because we don't | |
278 | ;; want any chnage in lane number on big-endian targets. | |
279 | (define_insn "*vec_extract<mode><Vel>_v128" | |
280 | [(set (match_operand:<VEL> 0 "aarch64_simd_nonimmediate_operand" "=r, w, Utv") | |
281 | (vec_select:<VEL> | |
282 | (match_operand:SVE_ALL 1 "register_operand" "w, w, w") | |
283 | (parallel [(match_operand:SI 2 "const_int_operand")])))] | |
284 | "TARGET_SVE | |
285 | && IN_RANGE (INTVAL (operands[2]) * GET_MODE_SIZE (<VEL>mode), 0, 15)" | |
286 | { | |
287 | operands[1] = gen_lowpart (<V128>mode, operands[1]); | |
288 | switch (which_alternative) | |
289 | { | |
290 | case 0: | |
291 | return "umov\\t%<vwcore>0, %1.<Vetype>[%2]"; | |
292 | case 1: | |
293 | return "dup\\t%<Vetype>0, %1.<Vetype>[%2]"; | |
294 | case 2: | |
295 | return "st1\\t{%1.<Vetype>}[%2], %0"; | |
296 | default: | |
297 | gcc_unreachable (); | |
298 | } | |
299 | } | |
300 | [(set_attr "type" "neon_to_gp_q, neon_dup_q, neon_store1_one_lane_q")] | |
301 | ) | |
302 | ||
303 | ;; Extract an element in the range of DUP. This pattern allows the | |
304 | ;; source and destination to be different. | |
305 | (define_insn "*vec_extract<mode><Vel>_dup" | |
306 | [(set (match_operand:<VEL> 0 "register_operand" "=w") | |
307 | (vec_select:<VEL> | |
308 | (match_operand:SVE_ALL 1 "register_operand" "w") | |
309 | (parallel [(match_operand:SI 2 "const_int_operand")])))] | |
310 | "TARGET_SVE | |
311 | && IN_RANGE (INTVAL (operands[2]) * GET_MODE_SIZE (<VEL>mode), 16, 63)" | |
312 | { | |
313 | operands[0] = gen_rtx_REG (<MODE>mode, REGNO (operands[0])); | |
314 | return "dup\t%0.<Vetype>, %1.<Vetype>[%2]"; | |
315 | } | |
316 | ) | |
317 | ||
318 | ;; Extract an element outside the range of DUP. This pattern requires the | |
319 | ;; source and destination to be the same. | |
320 | (define_insn "*vec_extract<mode><Vel>_ext" | |
321 | [(set (match_operand:<VEL> 0 "register_operand" "=w") | |
322 | (vec_select:<VEL> | |
323 | (match_operand:SVE_ALL 1 "register_operand" "0") | |
324 | (parallel [(match_operand:SI 2 "const_int_operand")])))] | |
325 | "TARGET_SVE && INTVAL (operands[2]) * GET_MODE_SIZE (<VEL>mode) >= 64" | |
326 | { | |
327 | operands[0] = gen_rtx_REG (<MODE>mode, REGNO (operands[0])); | |
328 | operands[2] = GEN_INT (INTVAL (operands[2]) * GET_MODE_SIZE (<VEL>mode)); | |
329 | return "ext\t%0.b, %0.b, %0.b, #%2"; | |
330 | } | |
331 | ) | |
332 | ||
333 | ;; Extract the last active element of operand 1 into operand 0. | |
334 | ;; If no elements are active, extract the last inactive element instead. | |
335 | (define_insn "aarch64_sve_lastb<mode>" | |
336 | [(set (match_operand:<VEL> 0 "register_operand" "=r, w") | |
337 | (unspec:<VEL> | |
338 | [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl") | |
339 | (match_operand:SVE_ALL 2 "register_operand" "w, w")] | |
340 | UNSPEC_LASTB))] | |
341 | "TARGET_SVE" | |
342 | "@ | |
343 | lastb\t%<vwcore>0, %1, %2.<Vetype> | |
344 | lastb\t%<Vetype>0, %1, %2.<Vetype>" | |
345 | ) | |
346 | ||
347 | (define_expand "vec_duplicate<mode>" | |
348 | [(parallel | |
349 | [(set (match_operand:SVE_ALL 0 "register_operand") | |
350 | (vec_duplicate:SVE_ALL | |
351 | (match_operand:<VEL> 1 "aarch64_sve_dup_operand"))) | |
352 | (clobber (scratch:<VPRED>))])] | |
353 | "TARGET_SVE" | |
354 | { | |
355 | if (MEM_P (operands[1])) | |
356 | { | |
357 | rtx ptrue = force_reg (<VPRED>mode, CONSTM1_RTX (<VPRED>mode)); | |
358 | emit_insn (gen_sve_ld1r<mode> (operands[0], ptrue, operands[1], | |
359 | CONST0_RTX (<MODE>mode))); | |
360 | DONE; | |
361 | } | |
362 | } | |
363 | ) | |
364 | ||
365 | ;; Accept memory operands for the benefit of combine, and also in case | |
366 | ;; the scalar input gets spilled to memory during RA. We want to split | |
367 | ;; the load at the first opportunity in order to allow the PTRUE to be | |
368 | ;; optimized with surrounding code. | |
369 | (define_insn_and_split "*vec_duplicate<mode>_reg" | |
370 | [(set (match_operand:SVE_ALL 0 "register_operand" "=w, w, w") | |
371 | (vec_duplicate:SVE_ALL | |
372 | (match_operand:<VEL> 1 "aarch64_sve_dup_operand" "r, w, Uty"))) | |
373 | (clobber (match_scratch:<VPRED> 2 "=X, X, Upl"))] | |
374 | "TARGET_SVE" | |
375 | "@ | |
376 | mov\t%0.<Vetype>, %<vwcore>1 | |
377 | mov\t%0.<Vetype>, %<Vetype>1 | |
378 | #" | |
379 | "&& MEM_P (operands[1])" | |
380 | [(const_int 0)] | |
381 | { | |
382 | if (GET_CODE (operands[2]) == SCRATCH) | |
383 | operands[2] = gen_reg_rtx (<VPRED>mode); | |
384 | emit_move_insn (operands[2], CONSTM1_RTX (<VPRED>mode)); | |
385 | emit_insn (gen_sve_ld1r<mode> (operands[0], operands[2], operands[1], | |
386 | CONST0_RTX (<MODE>mode))); | |
387 | DONE; | |
388 | } | |
389 | [(set_attr "length" "4,4,8")] | |
390 | ) | |
391 | ||
392 | ;; This is used for vec_duplicate<mode>s from memory, but can also | |
393 | ;; be used by combine to optimize selects of a a vec_duplicate<mode> | |
394 | ;; with zero. | |
395 | (define_insn "sve_ld1r<mode>" | |
396 | [(set (match_operand:SVE_ALL 0 "register_operand" "=w") | |
397 | (unspec:SVE_ALL | |
398 | [(match_operand:<VPRED> 1 "register_operand" "Upl") | |
399 | (vec_duplicate:SVE_ALL | |
400 | (match_operand:<VEL> 2 "aarch64_sve_ld1r_operand" "Uty")) | |
401 | (match_operand:SVE_ALL 3 "aarch64_simd_imm_zero")] | |
402 | UNSPEC_SEL))] | |
403 | "TARGET_SVE" | |
404 | "ld1r<Vesize>\t%0.<Vetype>, %1/z, %2" | |
405 | ) | |
406 | ||
407 | ;; Load 128 bits from memory and duplicate to fill a vector. Since there | |
408 | ;; are so few operations on 128-bit "elements", we don't define a VNx1TI | |
409 | ;; and simply use vectors of bytes instead. | |
410 | (define_insn "sve_ld1rq" | |
411 | [(set (match_operand:VNx16QI 0 "register_operand" "=w") | |
412 | (unspec:VNx16QI | |
413 | [(match_operand:VNx16BI 1 "register_operand" "Upl") | |
414 | (match_operand:TI 2 "aarch64_sve_ld1r_operand" "Uty")] | |
415 | UNSPEC_LD1RQ))] | |
416 | "TARGET_SVE" | |
417 | "ld1rqb\t%0.b, %1/z, %2" | |
418 | ) | |
419 | ||
420 | ;; Implement a predicate broadcast by shifting the low bit of the scalar | |
421 | ;; input into the top bit and using a WHILELO. An alternative would be to | |
422 | ;; duplicate the input and do a compare with zero. | |
423 | (define_expand "vec_duplicate<mode>" | |
424 | [(set (match_operand:PRED_ALL 0 "register_operand") | |
425 | (vec_duplicate:PRED_ALL (match_operand 1 "register_operand")))] | |
426 | "TARGET_SVE" | |
427 | { | |
428 | rtx tmp = gen_reg_rtx (DImode); | |
429 | rtx op1 = gen_lowpart (DImode, operands[1]); | |
430 | emit_insn (gen_ashldi3 (tmp, op1, gen_int_mode (63, DImode))); | |
431 | emit_insn (gen_while_ultdi<mode> (operands[0], const0_rtx, tmp)); | |
432 | DONE; | |
433 | } | |
434 | ) | |
435 | ||
436 | (define_insn "vec_series<mode>" | |
437 | [(set (match_operand:SVE_I 0 "register_operand" "=w, w, w") | |
438 | (vec_series:SVE_I | |
439 | (match_operand:<VEL> 1 "aarch64_sve_index_operand" "Usi, r, r") | |
440 | (match_operand:<VEL> 2 "aarch64_sve_index_operand" "r, Usi, r")))] | |
441 | "TARGET_SVE" | |
442 | "@ | |
443 | index\t%0.<Vetype>, #%1, %<vw>2 | |
444 | index\t%0.<Vetype>, %<vw>1, #%2 | |
445 | index\t%0.<Vetype>, %<vw>1, %<vw>2" | |
446 | ) | |
447 | ||
448 | ;; Optimize {x, x, x, x, ...} + {0, n, 2*n, 3*n, ...} if n is in range | |
449 | ;; of an INDEX instruction. | |
450 | (define_insn "*vec_series<mode>_plus" | |
451 | [(set (match_operand:SVE_I 0 "register_operand" "=w") | |
452 | (plus:SVE_I | |
453 | (vec_duplicate:SVE_I | |
454 | (match_operand:<VEL> 1 "register_operand" "r")) | |
455 | (match_operand:SVE_I 2 "immediate_operand")))] | |
456 | "TARGET_SVE && aarch64_check_zero_based_sve_index_immediate (operands[2])" | |
457 | { | |
458 | operands[2] = aarch64_check_zero_based_sve_index_immediate (operands[2]); | |
459 | return "index\t%0.<Vetype>, %<vw>1, #%2"; | |
460 | } | |
461 | ) | |
462 | ||
463 | (define_expand "vec_perm<mode>" | |
464 | [(match_operand:SVE_ALL 0 "register_operand") | |
465 | (match_operand:SVE_ALL 1 "register_operand") | |
466 | (match_operand:SVE_ALL 2 "register_operand") | |
467 | (match_operand:<V_INT_EQUIV> 3 "aarch64_sve_vec_perm_operand")] | |
468 | "TARGET_SVE && GET_MODE_NUNITS (<MODE>mode).is_constant ()" | |
469 | { | |
470 | aarch64_expand_sve_vec_perm (operands[0], operands[1], | |
471 | operands[2], operands[3]); | |
472 | DONE; | |
473 | } | |
474 | ) | |
475 | ||
476 | (define_insn "*aarch64_sve_tbl<mode>" | |
477 | [(set (match_operand:SVE_ALL 0 "register_operand" "=w") | |
478 | (unspec:SVE_ALL | |
479 | [(match_operand:SVE_ALL 1 "register_operand" "w") | |
480 | (match_operand:<V_INT_EQUIV> 2 "register_operand" "w")] | |
481 | UNSPEC_TBL))] | |
482 | "TARGET_SVE" | |
483 | "tbl\t%0.<Vetype>, %1.<Vetype>, %2.<Vetype>" | |
484 | ) | |
485 | ||
486 | (define_insn "*aarch64_sve_<perm_insn><perm_hilo><mode>" | |
487 | [(set (match_operand:PRED_ALL 0 "register_operand" "=Upa") | |
488 | (unspec:PRED_ALL [(match_operand:PRED_ALL 1 "register_operand" "Upa") | |
489 | (match_operand:PRED_ALL 2 "register_operand" "Upa")] | |
490 | PERMUTE))] | |
491 | "TARGET_SVE" | |
492 | "<perm_insn><perm_hilo>\t%0.<Vetype>, %1.<Vetype>, %2.<Vetype>" | |
493 | ) | |
494 | ||
495 | (define_insn "*aarch64_sve_<perm_insn><perm_hilo><mode>" | |
496 | [(set (match_operand:SVE_ALL 0 "register_operand" "=w") | |
497 | (unspec:SVE_ALL [(match_operand:SVE_ALL 1 "register_operand" "w") | |
498 | (match_operand:SVE_ALL 2 "register_operand" "w")] | |
499 | PERMUTE))] | |
500 | "TARGET_SVE" | |
501 | "<perm_insn><perm_hilo>\t%0.<Vetype>, %1.<Vetype>, %2.<Vetype>" | |
502 | ) | |
503 | ||
504 | (define_insn "*aarch64_sve_rev64<mode>" | |
505 | [(set (match_operand:SVE_BHS 0 "register_operand" "=w") | |
506 | (unspec:SVE_BHS | |
507 | [(match_operand:VNx2BI 1 "register_operand" "Upl") | |
508 | (unspec:SVE_BHS [(match_operand:SVE_BHS 2 "register_operand" "w")] | |
509 | UNSPEC_REV64)] | |
510 | UNSPEC_MERGE_PTRUE))] | |
511 | "TARGET_SVE" | |
512 | "rev<Vesize>\t%0.d, %1/m, %2.d" | |
513 | ) | |
514 | ||
515 | (define_insn "*aarch64_sve_rev32<mode>" | |
516 | [(set (match_operand:SVE_BH 0 "register_operand" "=w") | |
517 | (unspec:SVE_BH | |
518 | [(match_operand:VNx4BI 1 "register_operand" "Upl") | |
519 | (unspec:SVE_BH [(match_operand:SVE_BH 2 "register_operand" "w")] | |
520 | UNSPEC_REV32)] | |
521 | UNSPEC_MERGE_PTRUE))] | |
522 | "TARGET_SVE" | |
523 | "rev<Vesize>\t%0.s, %1/m, %2.s" | |
524 | ) | |
525 | ||
526 | (define_insn "*aarch64_sve_rev16vnx16qi" | |
527 | [(set (match_operand:VNx16QI 0 "register_operand" "=w") | |
528 | (unspec:VNx16QI | |
529 | [(match_operand:VNx8BI 1 "register_operand" "Upl") | |
530 | (unspec:VNx16QI [(match_operand:VNx16QI 2 "register_operand" "w")] | |
531 | UNSPEC_REV16)] | |
532 | UNSPEC_MERGE_PTRUE))] | |
533 | "TARGET_SVE" | |
534 | "revb\t%0.h, %1/m, %2.h" | |
535 | ) | |
536 | ||
537 | (define_insn "*aarch64_sve_rev<mode>" | |
538 | [(set (match_operand:SVE_ALL 0 "register_operand" "=w") | |
539 | (unspec:SVE_ALL [(match_operand:SVE_ALL 1 "register_operand" "w")] | |
540 | UNSPEC_REV))] | |
541 | "TARGET_SVE" | |
542 | "rev\t%0.<Vetype>, %1.<Vetype>") | |
543 | ||
544 | (define_insn "*aarch64_sve_dup_lane<mode>" | |
545 | [(set (match_operand:SVE_ALL 0 "register_operand" "=w") | |
546 | (vec_duplicate:SVE_ALL | |
547 | (vec_select:<VEL> | |
548 | (match_operand:SVE_ALL 1 "register_operand" "w") | |
549 | (parallel [(match_operand:SI 2 "const_int_operand")]))))] | |
550 | "TARGET_SVE | |
551 | && IN_RANGE (INTVAL (operands[2]) * GET_MODE_SIZE (<VEL>mode), 0, 63)" | |
552 | "dup\t%0.<Vetype>, %1.<Vetype>[%2]" | |
553 | ) | |
554 | ||
555 | ;; Note that the immediate (third) operand is the lane index not | |
556 | ;; the byte index. | |
557 | (define_insn "*aarch64_sve_ext<mode>" | |
558 | [(set (match_operand:SVE_ALL 0 "register_operand" "=w") | |
559 | (unspec:SVE_ALL [(match_operand:SVE_ALL 1 "register_operand" "0") | |
560 | (match_operand:SVE_ALL 2 "register_operand" "w") | |
561 | (match_operand:SI 3 "const_int_operand")] | |
562 | UNSPEC_EXT))] | |
563 | "TARGET_SVE | |
564 | && IN_RANGE (INTVAL (operands[3]) * GET_MODE_SIZE (<VEL>mode), 0, 255)" | |
565 | { | |
566 | operands[3] = GEN_INT (INTVAL (operands[3]) * GET_MODE_SIZE (<VEL>mode)); | |
567 | return "ext\\t%0.b, %0.b, %2.b, #%3"; | |
568 | } | |
569 | ) | |
570 | ||
571 | (define_insn "add<mode>3" | |
572 | [(set (match_operand:SVE_I 0 "register_operand" "=w, w, w, w") | |
573 | (plus:SVE_I | |
574 | (match_operand:SVE_I 1 "register_operand" "%0, 0, 0, w") | |
575 | (match_operand:SVE_I 2 "aarch64_sve_add_operand" "vsa, vsn, vsi, w")))] | |
576 | "TARGET_SVE" | |
577 | "@ | |
578 | add\t%0.<Vetype>, %0.<Vetype>, #%D2 | |
579 | sub\t%0.<Vetype>, %0.<Vetype>, #%N2 | |
580 | * return aarch64_output_sve_inc_dec_immediate (\"%0.<Vetype>\", operands[2]); | |
581 | add\t%0.<Vetype>, %1.<Vetype>, %2.<Vetype>" | |
582 | ) | |
583 | ||
584 | (define_insn "sub<mode>3" | |
585 | [(set (match_operand:SVE_I 0 "register_operand" "=w, w") | |
586 | (minus:SVE_I | |
587 | (match_operand:SVE_I 1 "aarch64_sve_arith_operand" "w, vsa") | |
588 | (match_operand:SVE_I 2 "register_operand" "w, 0")))] | |
589 | "TARGET_SVE" | |
590 | "@ | |
591 | sub\t%0.<Vetype>, %1.<Vetype>, %2.<Vetype> | |
592 | subr\t%0.<Vetype>, %0.<Vetype>, #%D1" | |
593 | ) | |
594 | ||
595 | ;; Unpredicated multiplication. | |
596 | (define_expand "mul<mode>3" | |
597 | [(set (match_operand:SVE_I 0 "register_operand") | |
598 | (unspec:SVE_I | |
599 | [(match_dup 3) | |
600 | (mult:SVE_I | |
601 | (match_operand:SVE_I 1 "register_operand") | |
602 | (match_operand:SVE_I 2 "aarch64_sve_mul_operand"))] | |
603 | UNSPEC_MERGE_PTRUE))] | |
604 | "TARGET_SVE" | |
605 | { | |
606 | operands[3] = force_reg (<VPRED>mode, CONSTM1_RTX (<VPRED>mode)); | |
607 | } | |
608 | ) | |
609 | ||
610 | ;; Multiplication predicated with a PTRUE. We don't actually need the | |
611 | ;; predicate for the first alternative, but using Upa or X isn't likely | |
612 | ;; to gain much and would make the instruction seem less uniform to the | |
613 | ;; register allocator. | |
614 | (define_insn "*mul<mode>3" | |
615 | [(set (match_operand:SVE_I 0 "register_operand" "=w, w") | |
616 | (unspec:SVE_I | |
617 | [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl") | |
618 | (mult:SVE_I | |
619 | (match_operand:SVE_I 2 "register_operand" "%0, 0") | |
620 | (match_operand:SVE_I 3 "aarch64_sve_mul_operand" "vsm, w"))] | |
621 | UNSPEC_MERGE_PTRUE))] | |
622 | "TARGET_SVE" | |
623 | "@ | |
624 | mul\t%0.<Vetype>, %0.<Vetype>, #%3 | |
625 | mul\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>" | |
626 | ) | |
627 | ||
628 | (define_insn "*madd<mode>" | |
629 | [(set (match_operand:SVE_I 0 "register_operand" "=w, w") | |
630 | (plus:SVE_I | |
631 | (unspec:SVE_I | |
632 | [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl") | |
633 | (mult:SVE_I (match_operand:SVE_I 2 "register_operand" "%0, w") | |
634 | (match_operand:SVE_I 3 "register_operand" "w, w"))] | |
635 | UNSPEC_MERGE_PTRUE) | |
636 | (match_operand:SVE_I 4 "register_operand" "w, 0")))] | |
637 | "TARGET_SVE" | |
638 | "@ | |
639 | mad\t%0.<Vetype>, %1/m, %3.<Vetype>, %4.<Vetype> | |
640 | mla\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>" | |
641 | ) | |
642 | ||
643 | (define_insn "*msub<mode>3" | |
644 | [(set (match_operand:SVE_I 0 "register_operand" "=w, w") | |
645 | (minus:SVE_I | |
646 | (match_operand:SVE_I 4 "register_operand" "w, 0") | |
647 | (unspec:SVE_I | |
648 | [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl") | |
649 | (mult:SVE_I (match_operand:SVE_I 2 "register_operand" "%0, w") | |
650 | (match_operand:SVE_I 3 "register_operand" "w, w"))] | |
651 | UNSPEC_MERGE_PTRUE)))] | |
652 | "TARGET_SVE" | |
653 | "@ | |
654 | msb\t%0.<Vetype>, %1/m, %3.<Vetype>, %4.<Vetype> | |
655 | mls\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>" | |
656 | ) | |
657 | ||
658 | ;; Unpredicated NEG, NOT and POPCOUNT. | |
659 | (define_expand "<optab><mode>2" | |
660 | [(set (match_operand:SVE_I 0 "register_operand") | |
661 | (unspec:SVE_I | |
662 | [(match_dup 2) | |
663 | (SVE_INT_UNARY:SVE_I (match_operand:SVE_I 1 "register_operand"))] | |
664 | UNSPEC_MERGE_PTRUE))] | |
665 | "TARGET_SVE" | |
666 | { | |
667 | operands[2] = force_reg (<VPRED>mode, CONSTM1_RTX (<VPRED>mode)); | |
668 | } | |
669 | ) | |
670 | ||
671 | ;; NEG, NOT and POPCOUNT predicated with a PTRUE. | |
672 | (define_insn "*<optab><mode>2" | |
673 | [(set (match_operand:SVE_I 0 "register_operand" "=w") | |
674 | (unspec:SVE_I | |
675 | [(match_operand:<VPRED> 1 "register_operand" "Upl") | |
676 | (SVE_INT_UNARY:SVE_I | |
677 | (match_operand:SVE_I 2 "register_operand" "w"))] | |
678 | UNSPEC_MERGE_PTRUE))] | |
679 | "TARGET_SVE" | |
680 | "<sve_int_op>\t%0.<Vetype>, %1/m, %2.<Vetype>" | |
681 | ) | |
682 | ||
683 | ;; Vector AND, ORR and XOR. | |
684 | (define_insn "<optab><mode>3" | |
685 | [(set (match_operand:SVE_I 0 "register_operand" "=w, w") | |
686 | (LOGICAL:SVE_I | |
687 | (match_operand:SVE_I 1 "register_operand" "%0, w") | |
688 | (match_operand:SVE_I 2 "aarch64_sve_logical_operand" "vsl, w")))] | |
689 | "TARGET_SVE" | |
690 | "@ | |
691 | <logical>\t%0.<Vetype>, %0.<Vetype>, #%C2 | |
692 | <logical>\t%0.d, %1.d, %2.d" | |
693 | ) | |
694 | ||
695 | ;; Vector AND, ORR and XOR on floating-point modes. We avoid subregs | |
696 | ;; by providing this, but we need to use UNSPECs since rtx logical ops | |
697 | ;; aren't defined for floating-point modes. | |
698 | (define_insn "*<optab><mode>3" | |
699 | [(set (match_operand:SVE_F 0 "register_operand" "=w") | |
700 | (unspec:SVE_F [(match_operand:SVE_F 1 "register_operand" "w") | |
701 | (match_operand:SVE_F 2 "register_operand" "w")] | |
702 | LOGICALF))] | |
703 | "TARGET_SVE" | |
704 | "<logicalf_op>\t%0.d, %1.d, %2.d" | |
705 | ) | |
706 | ||
707 | ;; REG_EQUAL notes on "not<mode>3" should ensure that we can generate | |
708 | ;; this pattern even though the NOT instruction itself is predicated. | |
709 | (define_insn "bic<mode>3" | |
710 | [(set (match_operand:SVE_I 0 "register_operand" "=w") | |
711 | (and:SVE_I | |
712 | (not:SVE_I (match_operand:SVE_I 1 "register_operand" "w")) | |
713 | (match_operand:SVE_I 2 "register_operand" "w")))] | |
714 | "TARGET_SVE" | |
715 | "bic\t%0.d, %2.d, %1.d" | |
716 | ) | |
717 | ||
718 | ;; Predicate AND. We can reuse one of the inputs as the GP. | |
719 | (define_insn "and<mode>3" | |
720 | [(set (match_operand:PRED_ALL 0 "register_operand" "=Upa") | |
721 | (and:PRED_ALL (match_operand:PRED_ALL 1 "register_operand" "Upa") | |
722 | (match_operand:PRED_ALL 2 "register_operand" "Upa")))] | |
723 | "TARGET_SVE" | |
724 | "and\t%0.b, %1/z, %1.b, %2.b" | |
725 | ) | |
726 | ||
727 | ;; Unpredicated predicate ORR and XOR. | |
728 | (define_expand "<optab><mode>3" | |
729 | [(set (match_operand:PRED_ALL 0 "register_operand") | |
730 | (and:PRED_ALL | |
731 | (LOGICAL_OR:PRED_ALL | |
732 | (match_operand:PRED_ALL 1 "register_operand") | |
733 | (match_operand:PRED_ALL 2 "register_operand")) | |
734 | (match_dup 3)))] | |
735 | "TARGET_SVE" | |
736 | { | |
737 | operands[3] = force_reg (<MODE>mode, CONSTM1_RTX (<MODE>mode)); | |
738 | } | |
739 | ) | |
740 | ||
741 | ;; Predicated predicate ORR and XOR. | |
742 | (define_insn "pred_<optab><mode>3" | |
743 | [(set (match_operand:PRED_ALL 0 "register_operand" "=Upa") | |
744 | (and:PRED_ALL | |
745 | (LOGICAL:PRED_ALL | |
746 | (match_operand:PRED_ALL 2 "register_operand" "Upa") | |
747 | (match_operand:PRED_ALL 3 "register_operand" "Upa")) | |
748 | (match_operand:PRED_ALL 1 "register_operand" "Upa")))] | |
749 | "TARGET_SVE" | |
750 | "<logical>\t%0.b, %1/z, %2.b, %3.b" | |
751 | ) | |
752 | ||
753 | ;; Perform a logical operation on operands 2 and 3, using operand 1 as | |
754 | ;; the GP (which is known to be a PTRUE). Store the result in operand 0 | |
755 | ;; and set the flags in the same way as for PTEST. The (and ...) in the | |
756 | ;; UNSPEC_PTEST_PTRUE is logically redundant, but means that the tested | |
757 | ;; value is structurally equivalent to rhs of the second set. | |
758 | (define_insn "*<optab><mode>3_cc" | |
759 | [(set (reg:CC CC_REGNUM) | |
760 | (compare:CC | |
761 | (unspec:SI [(match_operand:PRED_ALL 1 "register_operand" "Upa") | |
762 | (and:PRED_ALL | |
763 | (LOGICAL:PRED_ALL | |
764 | (match_operand:PRED_ALL 2 "register_operand" "Upa") | |
765 | (match_operand:PRED_ALL 3 "register_operand" "Upa")) | |
766 | (match_dup 1))] | |
767 | UNSPEC_PTEST_PTRUE) | |
768 | (const_int 0))) | |
769 | (set (match_operand:PRED_ALL 0 "register_operand" "=Upa") | |
770 | (and:PRED_ALL (LOGICAL:PRED_ALL (match_dup 2) (match_dup 3)) | |
771 | (match_dup 1)))] | |
772 | "TARGET_SVE" | |
773 | "<logical>s\t%0.b, %1/z, %2.b, %3.b" | |
774 | ) | |
775 | ||
776 | ;; Unpredicated predicate inverse. | |
777 | (define_expand "one_cmpl<mode>2" | |
778 | [(set (match_operand:PRED_ALL 0 "register_operand") | |
779 | (and:PRED_ALL | |
780 | (not:PRED_ALL (match_operand:PRED_ALL 1 "register_operand")) | |
781 | (match_dup 2)))] | |
782 | "TARGET_SVE" | |
783 | { | |
784 | operands[2] = force_reg (<MODE>mode, CONSTM1_RTX (<MODE>mode)); | |
785 | } | |
786 | ) | |
787 | ||
788 | ;; Predicated predicate inverse. | |
789 | (define_insn "*one_cmpl<mode>3" | |
790 | [(set (match_operand:PRED_ALL 0 "register_operand" "=Upa") | |
791 | (and:PRED_ALL | |
792 | (not:PRED_ALL (match_operand:PRED_ALL 2 "register_operand" "Upa")) | |
793 | (match_operand:PRED_ALL 1 "register_operand" "Upa")))] | |
794 | "TARGET_SVE" | |
795 | "not\t%0.b, %1/z, %2.b" | |
796 | ) | |
797 | ||
798 | ;; Predicated predicate BIC and ORN. | |
799 | (define_insn "*<nlogical><mode>3" | |
800 | [(set (match_operand:PRED_ALL 0 "register_operand" "=Upa") | |
801 | (and:PRED_ALL | |
802 | (NLOGICAL:PRED_ALL | |
803 | (not:PRED_ALL (match_operand:PRED_ALL 2 "register_operand" "Upa")) | |
804 | (match_operand:PRED_ALL 3 "register_operand" "Upa")) | |
805 | (match_operand:PRED_ALL 1 "register_operand" "Upa")))] | |
806 | "TARGET_SVE" | |
807 | "<nlogical>\t%0.b, %1/z, %3.b, %2.b" | |
808 | ) | |
809 | ||
810 | ;; Predicated predicate NAND and NOR. | |
811 | (define_insn "*<logical_nn><mode>3" | |
812 | [(set (match_operand:PRED_ALL 0 "register_operand" "=Upa") | |
813 | (and:PRED_ALL | |
814 | (NLOGICAL:PRED_ALL | |
815 | (not:PRED_ALL (match_operand:PRED_ALL 2 "register_operand" "Upa")) | |
816 | (not:PRED_ALL (match_operand:PRED_ALL 3 "register_operand" "Upa"))) | |
817 | (match_operand:PRED_ALL 1 "register_operand" "Upa")))] | |
818 | "TARGET_SVE" | |
819 | "<logical_nn>\t%0.b, %1/z, %2.b, %3.b" | |
820 | ) | |
821 | ||
822 | ;; Unpredicated LSL, LSR and ASR by a vector. | |
823 | (define_expand "v<optab><mode>3" | |
824 | [(set (match_operand:SVE_I 0 "register_operand") | |
825 | (unspec:SVE_I | |
826 | [(match_dup 3) | |
827 | (ASHIFT:SVE_I | |
828 | (match_operand:SVE_I 1 "register_operand") | |
829 | (match_operand:SVE_I 2 "aarch64_sve_<lr>shift_operand"))] | |
830 | UNSPEC_MERGE_PTRUE))] | |
831 | "TARGET_SVE" | |
832 | { | |
833 | operands[3] = force_reg (<VPRED>mode, CONSTM1_RTX (<VPRED>mode)); | |
834 | } | |
835 | ) | |
836 | ||
837 | ;; LSL, LSR and ASR by a vector, predicated with a PTRUE. We don't | |
838 | ;; actually need the predicate for the first alternative, but using Upa | |
839 | ;; or X isn't likely to gain much and would make the instruction seem | |
840 | ;; less uniform to the register allocator. | |
841 | (define_insn "*v<optab><mode>3" | |
842 | [(set (match_operand:SVE_I 0 "register_operand" "=w, w") | |
843 | (unspec:SVE_I | |
844 | [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl") | |
845 | (ASHIFT:SVE_I | |
846 | (match_operand:SVE_I 2 "register_operand" "w, 0") | |
847 | (match_operand:SVE_I 3 "aarch64_sve_<lr>shift_operand" "D<lr>, w"))] | |
848 | UNSPEC_MERGE_PTRUE))] | |
849 | "TARGET_SVE" | |
850 | "@ | |
851 | <shift>\t%0.<Vetype>, %2.<Vetype>, #%3 | |
852 | <shift>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>" | |
853 | ) | |
854 | ||
855 | ;; LSL, LSR and ASR by a scalar, which expands into one of the vector | |
856 | ;; shifts above. | |
857 | (define_expand "<ASHIFT:optab><mode>3" | |
858 | [(set (match_operand:SVE_I 0 "register_operand") | |
859 | (ASHIFT:SVE_I (match_operand:SVE_I 1 "register_operand") | |
860 | (match_operand:<VEL> 2 "general_operand")))] | |
861 | "TARGET_SVE" | |
862 | { | |
863 | rtx amount; | |
864 | if (CONST_INT_P (operands[2])) | |
865 | { | |
866 | amount = gen_const_vec_duplicate (<MODE>mode, operands[2]); | |
867 | if (!aarch64_sve_<lr>shift_operand (operands[2], <MODE>mode)) | |
868 | amount = force_reg (<MODE>mode, amount); | |
869 | } | |
870 | else | |
871 | { | |
872 | amount = gen_reg_rtx (<MODE>mode); | |
873 | emit_insn (gen_vec_duplicate<mode> (amount, | |
874 | convert_to_mode (<VEL>mode, | |
875 | operands[2], 0))); | |
876 | } | |
877 | emit_insn (gen_v<optab><mode>3 (operands[0], operands[1], amount)); | |
878 | DONE; | |
879 | } | |
880 | ) | |
881 | ||
882 | ;; Test all bits of operand 1. Operand 0 is a GP that is known to hold PTRUE. | |
883 | ;; | |
884 | ;; Using UNSPEC_PTEST_PTRUE allows combine patterns to assume that the GP | |
885 | ;; is a PTRUE even if the optimizers haven't yet been able to propagate | |
886 | ;; the constant. We would use a separate unspec code for PTESTs involving | |
887 | ;; GPs that might not be PTRUEs. | |
888 | (define_insn "ptest_ptrue<mode>" | |
889 | [(set (reg:CC CC_REGNUM) | |
890 | (compare:CC | |
891 | (unspec:SI [(match_operand:PRED_ALL 0 "register_operand" "Upa") | |
892 | (match_operand:PRED_ALL 1 "register_operand" "Upa")] | |
893 | UNSPEC_PTEST_PTRUE) | |
894 | (const_int 0)))] | |
895 | "TARGET_SVE" | |
896 | "ptest\t%0, %1.b" | |
897 | ) | |
898 | ||
899 | ;; Set element I of the result if operand1 + J < operand2 for all J in [0, I]. | |
900 | ;; with the comparison being unsigned. | |
901 | (define_insn "while_ult<GPI:mode><PRED_ALL:mode>" | |
902 | [(set (match_operand:PRED_ALL 0 "register_operand" "=Upa") | |
903 | (unspec:PRED_ALL [(match_operand:GPI 1 "aarch64_reg_or_zero" "rZ") | |
904 | (match_operand:GPI 2 "aarch64_reg_or_zero" "rZ")] | |
905 | UNSPEC_WHILE_LO)) | |
906 | (clobber (reg:CC CC_REGNUM))] | |
907 | "TARGET_SVE" | |
908 | "whilelo\t%0.<PRED_ALL:Vetype>, %<w>1, %<w>2" | |
909 | ) | |
910 | ||
911 | ;; WHILELO sets the flags in the same way as a PTEST with a PTRUE GP. | |
912 | ;; Handle the case in which both results are useful. The GP operand | |
913 | ;; to the PTEST isn't needed, so we allow it to be anything. | |
914 | (define_insn_and_split "while_ult<GPI:mode><PRED_ALL:mode>_cc" | |
915 | [(set (reg:CC CC_REGNUM) | |
916 | (compare:CC | |
917 | (unspec:SI [(match_operand:PRED_ALL 1) | |
918 | (unspec:PRED_ALL | |
919 | [(match_operand:GPI 2 "aarch64_reg_or_zero" "rZ") | |
920 | (match_operand:GPI 3 "aarch64_reg_or_zero" "rZ")] | |
921 | UNSPEC_WHILE_LO)] | |
922 | UNSPEC_PTEST_PTRUE) | |
923 | (const_int 0))) | |
924 | (set (match_operand:PRED_ALL 0 "register_operand" "=Upa") | |
925 | (unspec:PRED_ALL [(match_dup 2) | |
926 | (match_dup 3)] | |
927 | UNSPEC_WHILE_LO))] | |
928 | "TARGET_SVE" | |
929 | "whilelo\t%0.<PRED_ALL:Vetype>, %<w>2, %<w>3" | |
930 | ;; Force the compiler to drop the unused predicate operand, so that we | |
931 | ;; don't have an unnecessary PTRUE. | |
932 | "&& !CONSTANT_P (operands[1])" | |
933 | [(const_int 0)] | |
934 | { | |
935 | emit_insn (gen_while_ult<GPI:mode><PRED_ALL:mode>_cc | |
936 | (operands[0], CONSTM1_RTX (<MODE>mode), | |
937 | operands[2], operands[3])); | |
938 | DONE; | |
939 | } | |
940 | ) | |
941 | ||
942 | ;; Predicated integer comparison. | |
943 | (define_insn "*vec_cmp<cmp_op>_<mode>" | |
944 | [(set (match_operand:<VPRED> 0 "register_operand" "=Upa, Upa") | |
945 | (unspec:<VPRED> | |
946 | [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl") | |
947 | (match_operand:SVE_I 2 "register_operand" "w, w") | |
948 | (match_operand:SVE_I 3 "aarch64_sve_cmp_<imm_con>_operand" "<imm_con>, w")] | |
949 | SVE_COND_INT_CMP)) | |
950 | (clobber (reg:CC CC_REGNUM))] | |
951 | "TARGET_SVE" | |
952 | "@ | |
953 | cmp<cmp_op>\t%0.<Vetype>, %1/z, %2.<Vetype>, #%3 | |
954 | cmp<cmp_op>\t%0.<Vetype>, %1/z, %2.<Vetype>, %3.<Vetype>" | |
955 | ) | |
956 | ||
957 | ;; Predicated integer comparison in which only the flags result is interesting. | |
958 | (define_insn "*vec_cmp<cmp_op>_<mode>_ptest" | |
959 | [(set (reg:CC CC_REGNUM) | |
960 | (compare:CC | |
961 | (unspec:SI | |
962 | [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl") | |
963 | (unspec:<VPRED> | |
964 | [(match_dup 1) | |
965 | (match_operand:SVE_I 2 "register_operand" "w, w") | |
966 | (match_operand:SVE_I 3 "aarch64_sve_cmp_<imm_con>_operand" "<imm_con>, w")] | |
967 | SVE_COND_INT_CMP)] | |
968 | UNSPEC_PTEST_PTRUE) | |
969 | (const_int 0))) | |
970 | (clobber (match_scratch:<VPRED> 0 "=Upa, Upa"))] | |
971 | "TARGET_SVE" | |
972 | "@ | |
973 | cmp<cmp_op>\t%0.<Vetype>, %1/z, %2.<Vetype>, #%3 | |
974 | cmp<cmp_op>\t%0.<Vetype>, %1/z, %2.<Vetype>, %3.<Vetype>" | |
975 | ) | |
976 | ||
977 | ;; Predicated comparison in which both the flag and predicate results | |
978 | ;; are interesting. | |
979 | (define_insn "*vec_cmp<cmp_op>_<mode>_cc" | |
980 | [(set (reg:CC CC_REGNUM) | |
981 | (compare:CC | |
982 | (unspec:SI | |
983 | [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl") | |
984 | (unspec:<VPRED> | |
985 | [(match_dup 1) | |
986 | (match_operand:SVE_I 2 "register_operand" "w, w") | |
987 | (match_operand:SVE_I 3 "aarch64_sve_cmp_<imm_con>_operand" "<imm_con>, w")] | |
988 | SVE_COND_INT_CMP)] | |
989 | UNSPEC_PTEST_PTRUE) | |
990 | (const_int 0))) | |
991 | (set (match_operand:<VPRED> 0 "register_operand" "=Upa, Upa") | |
992 | (unspec:<VPRED> | |
993 | [(match_dup 1) | |
994 | (match_dup 2) | |
995 | (match_dup 3)] | |
996 | SVE_COND_INT_CMP))] | |
997 | "TARGET_SVE" | |
998 | "@ | |
999 | cmp<cmp_op>\t%0.<Vetype>, %1/z, %2.<Vetype>, #%3 | |
1000 | cmp<cmp_op>\t%0.<Vetype>, %1/z, %2.<Vetype>, %3.<Vetype>" | |
1001 | ) | |
1002 | ||
1003 | ;; Predicated floating-point comparison (excluding FCMUO, which doesn't | |
1004 | ;; allow #0.0 as an operand). | |
1005 | (define_insn "*vec_fcm<cmp_op><mode>" | |
1006 | [(set (match_operand:<VPRED> 0 "register_operand" "=Upa, Upa") | |
1007 | (unspec:<VPRED> | |
1008 | [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl") | |
1009 | (match_operand:SVE_F 2 "register_operand" "w, w") | |
1010 | (match_operand:SVE_F 3 "aarch64_simd_reg_or_zero" "Dz, w")] | |
1011 | SVE_COND_FP_CMP))] | |
1012 | "TARGET_SVE" | |
1013 | "@ | |
1014 | fcm<cmp_op>\t%0.<Vetype>, %1/z, %2.<Vetype>, #0.0 | |
1015 | fcm<cmp_op>\t%0.<Vetype>, %1/z, %2.<Vetype>, %3.<Vetype>" | |
1016 | ) | |
1017 | ||
1018 | ;; Predicated FCMUO. | |
1019 | (define_insn "*vec_fcmuo<mode>" | |
1020 | [(set (match_operand:<VPRED> 0 "register_operand" "=Upa") | |
1021 | (unspec:<VPRED> | |
1022 | [(match_operand:<VPRED> 1 "register_operand" "Upl") | |
1023 | (match_operand:SVE_F 2 "register_operand" "w") | |
1024 | (match_operand:SVE_F 3 "register_operand" "w")] | |
1025 | UNSPEC_COND_UO))] | |
1026 | "TARGET_SVE" | |
1027 | "fcmuo\t%0.<Vetype>, %1/z, %2.<Vetype>, %3.<Vetype>" | |
1028 | ) | |
1029 | ||
1030 | ;; vcond_mask operand order: true, false, mask | |
1031 | ;; UNSPEC_SEL operand order: mask, true, false (as for VEC_COND_EXPR) | |
1032 | ;; SEL operand order: mask, true, false | |
1033 | (define_insn "vcond_mask_<mode><vpred>" | |
1034 | [(set (match_operand:SVE_ALL 0 "register_operand" "=w") | |
1035 | (unspec:SVE_ALL | |
1036 | [(match_operand:<VPRED> 3 "register_operand" "Upa") | |
1037 | (match_operand:SVE_ALL 1 "register_operand" "w") | |
1038 | (match_operand:SVE_ALL 2 "register_operand" "w")] | |
1039 | UNSPEC_SEL))] | |
1040 | "TARGET_SVE" | |
1041 | "sel\t%0.<Vetype>, %3, %1.<Vetype>, %2.<Vetype>" | |
1042 | ) | |
1043 | ||
1044 | ;; Selects between a duplicated immediate and zero. | |
1045 | (define_insn "aarch64_sve_dup<mode>_const" | |
1046 | [(set (match_operand:SVE_I 0 "register_operand" "=w") | |
1047 | (unspec:SVE_I | |
1048 | [(match_operand:<VPRED> 1 "register_operand" "Upl") | |
1049 | (match_operand:SVE_I 2 "aarch64_sve_dup_immediate") | |
1050 | (match_operand:SVE_I 3 "aarch64_simd_imm_zero")] | |
1051 | UNSPEC_SEL))] | |
1052 | "TARGET_SVE" | |
1053 | "mov\t%0.<Vetype>, %1/z, #%2" | |
1054 | ) | |
1055 | ||
1056 | ;; Integer (signed) vcond. Don't enforce an immediate range here, since it | |
1057 | ;; depends on the comparison; leave it to aarch64_expand_sve_vcond instead. | |
1058 | (define_expand "vcond<mode><v_int_equiv>" | |
1059 | [(set (match_operand:SVE_ALL 0 "register_operand") | |
1060 | (if_then_else:SVE_ALL | |
1061 | (match_operator 3 "comparison_operator" | |
1062 | [(match_operand:<V_INT_EQUIV> 4 "register_operand") | |
1063 | (match_operand:<V_INT_EQUIV> 5 "nonmemory_operand")]) | |
1064 | (match_operand:SVE_ALL 1 "register_operand") | |
1065 | (match_operand:SVE_ALL 2 "register_operand")))] | |
1066 | "TARGET_SVE" | |
1067 | { | |
1068 | aarch64_expand_sve_vcond (<MODE>mode, <V_INT_EQUIV>mode, operands); | |
1069 | DONE; | |
1070 | } | |
1071 | ) | |
1072 | ||
1073 | ;; Integer vcondu. Don't enforce an immediate range here, since it | |
1074 | ;; depends on the comparison; leave it to aarch64_expand_sve_vcond instead. | |
1075 | (define_expand "vcondu<mode><v_int_equiv>" | |
1076 | [(set (match_operand:SVE_ALL 0 "register_operand") | |
1077 | (if_then_else:SVE_ALL | |
1078 | (match_operator 3 "comparison_operator" | |
1079 | [(match_operand:<V_INT_EQUIV> 4 "register_operand") | |
1080 | (match_operand:<V_INT_EQUIV> 5 "nonmemory_operand")]) | |
1081 | (match_operand:SVE_ALL 1 "register_operand") | |
1082 | (match_operand:SVE_ALL 2 "register_operand")))] | |
1083 | "TARGET_SVE" | |
1084 | { | |
1085 | aarch64_expand_sve_vcond (<MODE>mode, <V_INT_EQUIV>mode, operands); | |
1086 | DONE; | |
1087 | } | |
1088 | ) | |
1089 | ||
1090 | ;; Floating-point vcond. All comparisons except FCMUO allow a zero | |
1091 | ;; operand; aarch64_expand_sve_vcond handles the case of an FCMUO | |
1092 | ;; with zero. | |
1093 | (define_expand "vcond<mode><v_fp_equiv>" | |
1094 | [(set (match_operand:SVE_SD 0 "register_operand") | |
1095 | (if_then_else:SVE_SD | |
1096 | (match_operator 3 "comparison_operator" | |
1097 | [(match_operand:<V_FP_EQUIV> 4 "register_operand") | |
1098 | (match_operand:<V_FP_EQUIV> 5 "aarch64_simd_reg_or_zero")]) | |
1099 | (match_operand:SVE_SD 1 "register_operand") | |
1100 | (match_operand:SVE_SD 2 "register_operand")))] | |
1101 | "TARGET_SVE" | |
1102 | { | |
1103 | aarch64_expand_sve_vcond (<MODE>mode, <V_FP_EQUIV>mode, operands); | |
1104 | DONE; | |
1105 | } | |
1106 | ) | |
1107 | ||
1108 | ;; Signed integer comparisons. Don't enforce an immediate range here, since | |
1109 | ;; it depends on the comparison; leave it to aarch64_expand_sve_vec_cmp_int | |
1110 | ;; instead. | |
1111 | (define_expand "vec_cmp<mode><vpred>" | |
1112 | [(parallel | |
1113 | [(set (match_operand:<VPRED> 0 "register_operand") | |
1114 | (match_operator:<VPRED> 1 "comparison_operator" | |
1115 | [(match_operand:SVE_I 2 "register_operand") | |
1116 | (match_operand:SVE_I 3 "nonmemory_operand")])) | |
1117 | (clobber (reg:CC CC_REGNUM))])] | |
1118 | "TARGET_SVE" | |
1119 | { | |
1120 | aarch64_expand_sve_vec_cmp_int (operands[0], GET_CODE (operands[1]), | |
1121 | operands[2], operands[3]); | |
1122 | DONE; | |
1123 | } | |
1124 | ) | |
1125 | ||
1126 | ;; Unsigned integer comparisons. Don't enforce an immediate range here, since | |
1127 | ;; it depends on the comparison; leave it to aarch64_expand_sve_vec_cmp_int | |
1128 | ;; instead. | |
1129 | (define_expand "vec_cmpu<mode><vpred>" | |
1130 | [(parallel | |
1131 | [(set (match_operand:<VPRED> 0 "register_operand") | |
1132 | (match_operator:<VPRED> 1 "comparison_operator" | |
1133 | [(match_operand:SVE_I 2 "register_operand") | |
1134 | (match_operand:SVE_I 3 "nonmemory_operand")])) | |
1135 | (clobber (reg:CC CC_REGNUM))])] | |
1136 | "TARGET_SVE" | |
1137 | { | |
1138 | aarch64_expand_sve_vec_cmp_int (operands[0], GET_CODE (operands[1]), | |
1139 | operands[2], operands[3]); | |
1140 | DONE; | |
1141 | } | |
1142 | ) | |
1143 | ||
1144 | ;; Floating-point comparisons. All comparisons except FCMUO allow a zero | |
1145 | ;; operand; aarch64_expand_sve_vec_cmp_float handles the case of an FCMUO | |
1146 | ;; with zero. | |
1147 | (define_expand "vec_cmp<mode><vpred>" | |
1148 | [(set (match_operand:<VPRED> 0 "register_operand") | |
1149 | (match_operator:<VPRED> 1 "comparison_operator" | |
1150 | [(match_operand:SVE_F 2 "register_operand") | |
1151 | (match_operand:SVE_F 3 "aarch64_simd_reg_or_zero")]))] | |
1152 | "TARGET_SVE" | |
1153 | { | |
1154 | aarch64_expand_sve_vec_cmp_float (operands[0], GET_CODE (operands[1]), | |
1155 | operands[2], operands[3], false); | |
1156 | DONE; | |
1157 | } | |
1158 | ) | |
1159 | ||
1160 | ;; Branch based on predicate equality or inequality. | |
1161 | (define_expand "cbranch<mode>4" | |
1162 | [(set (pc) | |
1163 | (if_then_else | |
1164 | (match_operator 0 "aarch64_equality_operator" | |
1165 | [(match_operand:PRED_ALL 1 "register_operand") | |
1166 | (match_operand:PRED_ALL 2 "aarch64_simd_reg_or_zero")]) | |
1167 | (label_ref (match_operand 3 "")) | |
1168 | (pc)))] | |
1169 | "" | |
1170 | { | |
1171 | rtx ptrue = force_reg (<MODE>mode, CONSTM1_RTX (<MODE>mode)); | |
1172 | rtx pred; | |
1173 | if (operands[2] == CONST0_RTX (<MODE>mode)) | |
1174 | pred = operands[1]; | |
1175 | else | |
1176 | { | |
1177 | pred = gen_reg_rtx (<MODE>mode); | |
1178 | emit_insn (gen_pred_xor<mode>3 (pred, ptrue, operands[1], | |
1179 | operands[2])); | |
1180 | } | |
1181 | emit_insn (gen_ptest_ptrue<mode> (ptrue, pred)); | |
1182 | operands[1] = gen_rtx_REG (CCmode, CC_REGNUM); | |
1183 | operands[2] = const0_rtx; | |
1184 | } | |
1185 | ) | |
1186 | ||
1187 | ;; Unpredicated integer MIN/MAX. | |
1188 | (define_expand "<su><maxmin><mode>3" | |
1189 | [(set (match_operand:SVE_I 0 "register_operand") | |
1190 | (unspec:SVE_I | |
1191 | [(match_dup 3) | |
1192 | (MAXMIN:SVE_I (match_operand:SVE_I 1 "register_operand") | |
1193 | (match_operand:SVE_I 2 "register_operand"))] | |
1194 | UNSPEC_MERGE_PTRUE))] | |
1195 | "TARGET_SVE" | |
1196 | { | |
1197 | operands[3] = force_reg (<VPRED>mode, CONSTM1_RTX (<VPRED>mode)); | |
1198 | } | |
1199 | ) | |
1200 | ||
1201 | ;; Integer MIN/MAX predicated with a PTRUE. | |
1202 | (define_insn "*<su><maxmin><mode>3" | |
1203 | [(set (match_operand:SVE_I 0 "register_operand" "=w") | |
1204 | (unspec:SVE_I | |
1205 | [(match_operand:<VPRED> 1 "register_operand" "Upl") | |
1206 | (MAXMIN:SVE_I (match_operand:SVE_I 2 "register_operand" "%0") | |
1207 | (match_operand:SVE_I 3 "register_operand" "w"))] | |
1208 | UNSPEC_MERGE_PTRUE))] | |
1209 | "TARGET_SVE" | |
1210 | "<su><maxmin>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>" | |
1211 | ) | |
1212 | ||
1213 | ;; Unpredicated floating-point MIN/MAX. | |
1214 | (define_expand "<su><maxmin><mode>3" | |
1215 | [(set (match_operand:SVE_F 0 "register_operand") | |
1216 | (unspec:SVE_F | |
1217 | [(match_dup 3) | |
1218 | (FMAXMIN:SVE_F (match_operand:SVE_F 1 "register_operand") | |
1219 | (match_operand:SVE_F 2 "register_operand"))] | |
1220 | UNSPEC_MERGE_PTRUE))] | |
1221 | "TARGET_SVE" | |
1222 | { | |
1223 | operands[3] = force_reg (<VPRED>mode, CONSTM1_RTX (<VPRED>mode)); | |
1224 | } | |
1225 | ) | |
1226 | ||
1227 | ;; Floating-point MIN/MAX predicated with a PTRUE. | |
1228 | (define_insn "*<su><maxmin><mode>3" | |
1229 | [(set (match_operand:SVE_F 0 "register_operand" "=w") | |
1230 | (unspec:SVE_F | |
1231 | [(match_operand:<VPRED> 1 "register_operand" "Upl") | |
1232 | (FMAXMIN:SVE_F (match_operand:SVE_F 2 "register_operand" "%0") | |
1233 | (match_operand:SVE_F 3 "register_operand" "w"))] | |
1234 | UNSPEC_MERGE_PTRUE))] | |
1235 | "TARGET_SVE" | |
1236 | "f<maxmin>nm\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>" | |
1237 | ) | |
1238 | ||
1239 | ;; Unpredicated fmin/fmax. | |
1240 | (define_expand "<maxmin_uns><mode>3" | |
1241 | [(set (match_operand:SVE_F 0 "register_operand") | |
1242 | (unspec:SVE_F | |
1243 | [(match_dup 3) | |
1244 | (unspec:SVE_F [(match_operand:SVE_F 1 "register_operand") | |
1245 | (match_operand:SVE_F 2 "register_operand")] | |
1246 | FMAXMIN_UNS)] | |
1247 | UNSPEC_MERGE_PTRUE))] | |
1248 | "TARGET_SVE" | |
1249 | { | |
1250 | operands[3] = force_reg (<VPRED>mode, CONSTM1_RTX (<VPRED>mode)); | |
1251 | } | |
1252 | ) | |
1253 | ||
1254 | ;; fmin/fmax predicated with a PTRUE. | |
1255 | (define_insn "*<maxmin_uns><mode>3" | |
1256 | [(set (match_operand:SVE_F 0 "register_operand" "=w") | |
1257 | (unspec:SVE_F | |
1258 | [(match_operand:<VPRED> 1 "register_operand" "Upl") | |
1259 | (unspec:SVE_F [(match_operand:SVE_F 2 "register_operand" "%0") | |
1260 | (match_operand:SVE_F 3 "register_operand" "w")] | |
1261 | FMAXMIN_UNS)] | |
1262 | UNSPEC_MERGE_PTRUE))] | |
1263 | "TARGET_SVE" | |
1264 | "<maxmin_uns_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>" | |
1265 | ) | |
1266 | ||
1267 | ;; Unpredicated integer add reduction. | |
1268 | (define_expand "reduc_plus_scal_<mode>" | |
1269 | [(set (match_operand:<VEL> 0 "register_operand") | |
1270 | (unspec:<VEL> [(match_dup 2) | |
1271 | (match_operand:SVE_I 1 "register_operand")] | |
1272 | UNSPEC_ADDV))] | |
1273 | "TARGET_SVE" | |
1274 | { | |
1275 | operands[2] = force_reg (<VPRED>mode, CONSTM1_RTX (<VPRED>mode)); | |
1276 | } | |
1277 | ) | |
1278 | ||
1279 | ;; Predicated integer add reduction. The result is always 64-bits. | |
1280 | (define_insn "*reduc_plus_scal_<mode>" | |
1281 | [(set (match_operand:<VEL> 0 "register_operand" "=w") | |
1282 | (unspec:<VEL> [(match_operand:<VPRED> 1 "register_operand" "Upl") | |
1283 | (match_operand:SVE_I 2 "register_operand" "w")] | |
1284 | UNSPEC_ADDV))] | |
1285 | "TARGET_SVE" | |
1286 | "uaddv\t%d0, %1, %2.<Vetype>" | |
1287 | ) | |
1288 | ||
1289 | ;; Unpredicated floating-point add reduction. | |
1290 | (define_expand "reduc_plus_scal_<mode>" | |
1291 | [(set (match_operand:<VEL> 0 "register_operand") | |
1292 | (unspec:<VEL> [(match_dup 2) | |
1293 | (match_operand:SVE_F 1 "register_operand")] | |
1294 | UNSPEC_FADDV))] | |
1295 | "TARGET_SVE" | |
1296 | { | |
1297 | operands[2] = force_reg (<VPRED>mode, CONSTM1_RTX (<VPRED>mode)); | |
1298 | } | |
1299 | ) | |
1300 | ||
1301 | ;; Predicated floating-point add reduction. | |
1302 | (define_insn "*reduc_plus_scal_<mode>" | |
1303 | [(set (match_operand:<VEL> 0 "register_operand" "=w") | |
1304 | (unspec:<VEL> [(match_operand:<VPRED> 1 "register_operand" "Upl") | |
1305 | (match_operand:SVE_F 2 "register_operand" "w")] | |
1306 | UNSPEC_FADDV))] | |
1307 | "TARGET_SVE" | |
1308 | "faddv\t%<Vetype>0, %1, %2.<Vetype>" | |
1309 | ) | |
1310 | ||
1311 | ;; Unpredicated integer MIN/MAX reduction. | |
1312 | (define_expand "reduc_<maxmin_uns>_scal_<mode>" | |
1313 | [(set (match_operand:<VEL> 0 "register_operand") | |
1314 | (unspec:<VEL> [(match_dup 2) | |
1315 | (match_operand:SVE_I 1 "register_operand")] | |
1316 | MAXMINV))] | |
1317 | "TARGET_SVE" | |
1318 | { | |
1319 | operands[2] = force_reg (<VPRED>mode, CONSTM1_RTX (<VPRED>mode)); | |
1320 | } | |
1321 | ) | |
1322 | ||
1323 | ;; Predicated integer MIN/MAX reduction. | |
1324 | (define_insn "*reduc_<maxmin_uns>_scal_<mode>" | |
1325 | [(set (match_operand:<VEL> 0 "register_operand" "=w") | |
1326 | (unspec:<VEL> [(match_operand:<VPRED> 1 "register_operand" "Upl") | |
1327 | (match_operand:SVE_I 2 "register_operand" "w")] | |
1328 | MAXMINV))] | |
1329 | "TARGET_SVE" | |
1330 | "<maxmin_uns_op>v\t%<Vetype>0, %1, %2.<Vetype>" | |
1331 | ) | |
1332 | ||
1333 | ;; Unpredicated floating-point MIN/MAX reduction. | |
1334 | (define_expand "reduc_<maxmin_uns>_scal_<mode>" | |
1335 | [(set (match_operand:<VEL> 0 "register_operand") | |
1336 | (unspec:<VEL> [(match_dup 2) | |
1337 | (match_operand:SVE_F 1 "register_operand")] | |
1338 | FMAXMINV))] | |
1339 | "TARGET_SVE" | |
1340 | { | |
1341 | operands[2] = force_reg (<VPRED>mode, CONSTM1_RTX (<VPRED>mode)); | |
1342 | } | |
1343 | ) | |
1344 | ||
1345 | ;; Predicated floating-point MIN/MAX reduction. | |
1346 | (define_insn "*reduc_<maxmin_uns>_scal_<mode>" | |
1347 | [(set (match_operand:<VEL> 0 "register_operand" "=w") | |
1348 | (unspec:<VEL> [(match_operand:<VPRED> 1 "register_operand" "Upl") | |
1349 | (match_operand:SVE_F 2 "register_operand" "w")] | |
1350 | FMAXMINV))] | |
1351 | "TARGET_SVE" | |
1352 | "<maxmin_uns_op>v\t%<Vetype>0, %1, %2.<Vetype>" | |
1353 | ) | |
1354 | ||
1355 | ;; Unpredicated floating-point addition. | |
1356 | (define_expand "add<mode>3" | |
1357 | [(set (match_operand:SVE_F 0 "register_operand") | |
1358 | (unspec:SVE_F | |
1359 | [(match_dup 3) | |
1360 | (plus:SVE_F | |
1361 | (match_operand:SVE_F 1 "register_operand") | |
1362 | (match_operand:SVE_F 2 "aarch64_sve_float_arith_with_sub_operand"))] | |
1363 | UNSPEC_MERGE_PTRUE))] | |
1364 | "TARGET_SVE" | |
1365 | { | |
1366 | operands[3] = force_reg (<VPRED>mode, CONSTM1_RTX (<VPRED>mode)); | |
1367 | } | |
1368 | ) | |
1369 | ||
1370 | ;; Floating-point addition predicated with a PTRUE. | |
1371 | (define_insn "*add<mode>3" | |
1372 | [(set (match_operand:SVE_F 0 "register_operand" "=w, w, w") | |
1373 | (unspec:SVE_F | |
1374 | [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl") | |
1375 | (plus:SVE_F | |
1376 | (match_operand:SVE_F 2 "register_operand" "%0, 0, w") | |
1377 | (match_operand:SVE_F 3 "aarch64_sve_float_arith_with_sub_operand" "vsA, vsN, w"))] | |
1378 | UNSPEC_MERGE_PTRUE))] | |
1379 | "TARGET_SVE" | |
1380 | "@ | |
1381 | fadd\t%0.<Vetype>, %1/m, %0.<Vetype>, #%3 | |
1382 | fsub\t%0.<Vetype>, %1/m, %0.<Vetype>, #%N3 | |
1383 | fadd\t%0.<Vetype>, %2.<Vetype>, %3.<Vetype>" | |
1384 | ) | |
1385 | ||
1386 | ;; Unpredicated floating-point subtraction. | |
1387 | (define_expand "sub<mode>3" | |
1388 | [(set (match_operand:SVE_F 0 "register_operand") | |
1389 | (unspec:SVE_F | |
1390 | [(match_dup 3) | |
1391 | (minus:SVE_F | |
1392 | (match_operand:SVE_F 1 "aarch64_sve_float_arith_operand") | |
1393 | (match_operand:SVE_F 2 "register_operand"))] | |
1394 | UNSPEC_MERGE_PTRUE))] | |
1395 | "TARGET_SVE" | |
1396 | { | |
1397 | operands[3] = force_reg (<VPRED>mode, CONSTM1_RTX (<VPRED>mode)); | |
1398 | } | |
1399 | ) | |
1400 | ||
1401 | ;; Floating-point subtraction predicated with a PTRUE. | |
1402 | (define_insn "*sub<mode>3" | |
1403 | [(set (match_operand:SVE_F 0 "register_operand" "=w, w, w, w") | |
1404 | (unspec:SVE_F | |
1405 | [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl, Upl") | |
1406 | (minus:SVE_F | |
1407 | (match_operand:SVE_F 2 "aarch64_sve_float_arith_operand" "0, 0, vsA, w") | |
1408 | (match_operand:SVE_F 3 "aarch64_sve_float_arith_with_sub_operand" "vsA, vsN, 0, w"))] | |
1409 | UNSPEC_MERGE_PTRUE))] | |
1410 | "TARGET_SVE | |
1411 | && (register_operand (operands[2], <MODE>mode) | |
1412 | || register_operand (operands[3], <MODE>mode))" | |
1413 | "@ | |
1414 | fsub\t%0.<Vetype>, %1/m, %0.<Vetype>, #%3 | |
1415 | fadd\t%0.<Vetype>, %1/m, %0.<Vetype>, #%N3 | |
1416 | fsubr\t%0.<Vetype>, %1/m, %0.<Vetype>, #%2 | |
1417 | fsub\t%0.<Vetype>, %2.<Vetype>, %3.<Vetype>" | |
1418 | ) | |
1419 | ||
1420 | ;; Unpredicated floating-point multiplication. | |
1421 | (define_expand "mul<mode>3" | |
1422 | [(set (match_operand:SVE_F 0 "register_operand") | |
1423 | (unspec:SVE_F | |
1424 | [(match_dup 3) | |
1425 | (mult:SVE_F | |
1426 | (match_operand:SVE_F 1 "register_operand") | |
1427 | (match_operand:SVE_F 2 "aarch64_sve_float_mul_operand"))] | |
1428 | UNSPEC_MERGE_PTRUE))] | |
1429 | "TARGET_SVE" | |
1430 | { | |
1431 | operands[3] = force_reg (<VPRED>mode, CONSTM1_RTX (<VPRED>mode)); | |
1432 | } | |
1433 | ) | |
1434 | ||
1435 | ;; Floating-point multiplication predicated with a PTRUE. | |
1436 | (define_insn "*mul<mode>3" | |
1437 | [(set (match_operand:SVE_F 0 "register_operand" "=w, w") | |
1438 | (unspec:SVE_F | |
1439 | [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl") | |
1440 | (mult:SVE_F | |
1441 | (match_operand:SVE_F 2 "register_operand" "%0, w") | |
1442 | (match_operand:SVE_F 3 "aarch64_sve_float_mul_operand" "vsM, w"))] | |
1443 | UNSPEC_MERGE_PTRUE))] | |
1444 | "TARGET_SVE" | |
1445 | "@ | |
1446 | fmul\t%0.<Vetype>, %1/m, %0.<Vetype>, #%3 | |
1447 | fmul\t%0.<Vetype>, %2.<Vetype>, %3.<Vetype>" | |
1448 | ) | |
1449 | ||
1450 | ;; Unpredicated fma (%0 = (%1 * %2) + %3). | |
1451 | (define_expand "fma<mode>4" | |
1452 | [(set (match_operand:SVE_F 0 "register_operand") | |
1453 | (unspec:SVE_F | |
1454 | [(match_dup 4) | |
1455 | (fma:SVE_F (match_operand:SVE_F 1 "register_operand") | |
1456 | (match_operand:SVE_F 2 "register_operand") | |
1457 | (match_operand:SVE_F 3 "register_operand"))] | |
1458 | UNSPEC_MERGE_PTRUE))] | |
1459 | "TARGET_SVE" | |
1460 | { | |
1461 | operands[4] = force_reg (<VPRED>mode, CONSTM1_RTX (<VPRED>mode)); | |
1462 | } | |
1463 | ) | |
1464 | ||
1465 | ;; fma predicated with a PTRUE. | |
1466 | (define_insn "*fma<mode>4" | |
1467 | [(set (match_operand:SVE_F 0 "register_operand" "=w, w") | |
1468 | (unspec:SVE_F | |
1469 | [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl") | |
1470 | (fma:SVE_F (match_operand:SVE_F 3 "register_operand" "%0, w") | |
1471 | (match_operand:SVE_F 4 "register_operand" "w, w") | |
1472 | (match_operand:SVE_F 2 "register_operand" "w, 0"))] | |
1473 | UNSPEC_MERGE_PTRUE))] | |
1474 | "TARGET_SVE" | |
1475 | "@ | |
1476 | fmad\t%0.<Vetype>, %1/m, %4.<Vetype>, %2.<Vetype> | |
1477 | fmla\t%0.<Vetype>, %1/m, %3.<Vetype>, %4.<Vetype>" | |
1478 | ) | |
1479 | ||
1480 | ;; Unpredicated fnma (%0 = (-%1 * %2) + %3). | |
1481 | (define_expand "fnma<mode>4" | |
1482 | [(set (match_operand:SVE_F 0 "register_operand") | |
1483 | (unspec:SVE_F | |
1484 | [(match_dup 4) | |
1485 | (fma:SVE_F (neg:SVE_F | |
1486 | (match_operand:SVE_F 1 "register_operand")) | |
1487 | (match_operand:SVE_F 2 "register_operand") | |
1488 | (match_operand:SVE_F 3 "register_operand"))] | |
1489 | UNSPEC_MERGE_PTRUE))] | |
1490 | "TARGET_SVE" | |
1491 | { | |
1492 | operands[4] = force_reg (<VPRED>mode, CONSTM1_RTX (<VPRED>mode)); | |
1493 | } | |
1494 | ) | |
1495 | ||
1496 | ;; fnma predicated with a PTRUE. | |
1497 | (define_insn "*fnma<mode>4" | |
1498 | [(set (match_operand:SVE_F 0 "register_operand" "=w, w") | |
1499 | (unspec:SVE_F | |
1500 | [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl") | |
1501 | (fma:SVE_F (neg:SVE_F | |
1502 | (match_operand:SVE_F 3 "register_operand" "%0, w")) | |
1503 | (match_operand:SVE_F 4 "register_operand" "w, w") | |
1504 | (match_operand:SVE_F 2 "register_operand" "w, 0"))] | |
1505 | UNSPEC_MERGE_PTRUE))] | |
1506 | "TARGET_SVE" | |
1507 | "@ | |
1508 | fmsb\t%0.<Vetype>, %1/m, %4.<Vetype>, %2.<Vetype> | |
1509 | fmls\t%0.<Vetype>, %1/m, %3.<Vetype>, %4.<Vetype>" | |
1510 | ) | |
1511 | ||
1512 | ;; Unpredicated fms (%0 = (%1 * %2) - %3). | |
1513 | (define_expand "fms<mode>4" | |
1514 | [(set (match_operand:SVE_F 0 "register_operand") | |
1515 | (unspec:SVE_F | |
1516 | [(match_dup 4) | |
1517 | (fma:SVE_F (match_operand:SVE_F 1 "register_operand") | |
1518 | (match_operand:SVE_F 2 "register_operand") | |
1519 | (neg:SVE_F | |
1520 | (match_operand:SVE_F 3 "register_operand")))] | |
1521 | UNSPEC_MERGE_PTRUE))] | |
1522 | "TARGET_SVE" | |
1523 | { | |
1524 | operands[4] = force_reg (<VPRED>mode, CONSTM1_RTX (<VPRED>mode)); | |
1525 | } | |
1526 | ) | |
1527 | ||
1528 | ;; fms predicated with a PTRUE. | |
1529 | (define_insn "*fms<mode>4" | |
1530 | [(set (match_operand:SVE_F 0 "register_operand" "=w, w") | |
1531 | (unspec:SVE_F | |
1532 | [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl") | |
1533 | (fma:SVE_F (match_operand:SVE_F 3 "register_operand" "%0, w") | |
1534 | (match_operand:SVE_F 4 "register_operand" "w, w") | |
1535 | (neg:SVE_F | |
1536 | (match_operand:SVE_F 2 "register_operand" "w, 0")))] | |
1537 | UNSPEC_MERGE_PTRUE))] | |
1538 | "TARGET_SVE" | |
1539 | "@ | |
1540 | fnmsb\t%0.<Vetype>, %1/m, %4.<Vetype>, %2.<Vetype> | |
1541 | fnmls\t%0.<Vetype>, %1/m, %3.<Vetype>, %4.<Vetype>" | |
1542 | ) | |
1543 | ||
1544 | ;; Unpredicated fnms (%0 = (-%1 * %2) - %3). | |
1545 | (define_expand "fnms<mode>4" | |
1546 | [(set (match_operand:SVE_F 0 "register_operand") | |
1547 | (unspec:SVE_F | |
1548 | [(match_dup 4) | |
1549 | (fma:SVE_F (neg:SVE_F | |
1550 | (match_operand:SVE_F 1 "register_operand")) | |
1551 | (match_operand:SVE_F 2 "register_operand") | |
1552 | (neg:SVE_F | |
1553 | (match_operand:SVE_F 3 "register_operand")))] | |
1554 | UNSPEC_MERGE_PTRUE))] | |
1555 | "TARGET_SVE" | |
1556 | { | |
1557 | operands[4] = force_reg (<VPRED>mode, CONSTM1_RTX (<VPRED>mode)); | |
1558 | } | |
1559 | ) | |
1560 | ||
1561 | ;; fnms predicated with a PTRUE. | |
1562 | (define_insn "*fnms<mode>4" | |
1563 | [(set (match_operand:SVE_F 0 "register_operand" "=w, w") | |
1564 | (unspec:SVE_F | |
1565 | [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl") | |
1566 | (fma:SVE_F (neg:SVE_F | |
1567 | (match_operand:SVE_F 3 "register_operand" "%0, w")) | |
1568 | (match_operand:SVE_F 4 "register_operand" "w, w") | |
1569 | (neg:SVE_F | |
1570 | (match_operand:SVE_F 2 "register_operand" "w, 0")))] | |
1571 | UNSPEC_MERGE_PTRUE))] | |
1572 | "TARGET_SVE" | |
1573 | "@ | |
1574 | fnmad\t%0.<Vetype>, %1/m, %4.<Vetype>, %2.<Vetype> | |
1575 | fnmla\t%0.<Vetype>, %1/m, %3.<Vetype>, %4.<Vetype>" | |
1576 | ) | |
1577 | ||
1578 | ;; Unpredicated floating-point division. | |
1579 | (define_expand "div<mode>3" | |
1580 | [(set (match_operand:SVE_F 0 "register_operand") | |
1581 | (unspec:SVE_F | |
1582 | [(match_dup 3) | |
1583 | (div:SVE_F (match_operand:SVE_F 1 "register_operand") | |
1584 | (match_operand:SVE_F 2 "register_operand"))] | |
1585 | UNSPEC_MERGE_PTRUE))] | |
1586 | "TARGET_SVE" | |
1587 | { | |
1588 | operands[3] = force_reg (<VPRED>mode, CONSTM1_RTX (<VPRED>mode)); | |
1589 | } | |
1590 | ) | |
1591 | ||
1592 | ;; Floating-point division predicated with a PTRUE. | |
1593 | (define_insn "*div<mode>3" | |
1594 | [(set (match_operand:SVE_F 0 "register_operand" "=w, w") | |
1595 | (unspec:SVE_F | |
1596 | [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl") | |
1597 | (div:SVE_F (match_operand:SVE_F 2 "register_operand" "0, w") | |
1598 | (match_operand:SVE_F 3 "register_operand" "w, 0"))] | |
1599 | UNSPEC_MERGE_PTRUE))] | |
1600 | "TARGET_SVE" | |
1601 | "@ | |
1602 | fdiv\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype> | |
1603 | fdivr\t%0.<Vetype>, %1/m, %0.<Vetype>, %2.<Vetype>" | |
1604 | ) | |
1605 | ||
1606 | ;; Unpredicated FNEG, FABS and FSQRT. | |
1607 | (define_expand "<optab><mode>2" | |
1608 | [(set (match_operand:SVE_F 0 "register_operand") | |
1609 | (unspec:SVE_F | |
1610 | [(match_dup 2) | |
1611 | (SVE_FP_UNARY:SVE_F (match_operand:SVE_F 1 "register_operand"))] | |
1612 | UNSPEC_MERGE_PTRUE))] | |
1613 | "TARGET_SVE" | |
1614 | { | |
1615 | operands[2] = force_reg (<VPRED>mode, CONSTM1_RTX (<VPRED>mode)); | |
1616 | } | |
1617 | ) | |
1618 | ||
1619 | ;; FNEG, FABS and FSQRT predicated with a PTRUE. | |
1620 | (define_insn "*<optab><mode>2" | |
1621 | [(set (match_operand:SVE_F 0 "register_operand" "=w") | |
1622 | (unspec:SVE_F | |
1623 | [(match_operand:<VPRED> 1 "register_operand" "Upl") | |
1624 | (SVE_FP_UNARY:SVE_F (match_operand:SVE_F 2 "register_operand" "w"))] | |
1625 | UNSPEC_MERGE_PTRUE))] | |
1626 | "TARGET_SVE" | |
1627 | "<sve_fp_op>\t%0.<Vetype>, %1/m, %2.<Vetype>" | |
1628 | ) | |
1629 | ||
1630 | ;; Unpredicated FRINTy. | |
1631 | (define_expand "<frint_pattern><mode>2" | |
1632 | [(set (match_operand:SVE_F 0 "register_operand") | |
1633 | (unspec:SVE_F | |
1634 | [(match_dup 2) | |
1635 | (unspec:SVE_F [(match_operand:SVE_F 1 "register_operand")] | |
1636 | FRINT)] | |
1637 | UNSPEC_MERGE_PTRUE))] | |
1638 | "TARGET_SVE" | |
1639 | { | |
1640 | operands[2] = force_reg (<VPRED>mode, CONSTM1_RTX (<VPRED>mode)); | |
1641 | } | |
1642 | ) | |
1643 | ||
1644 | ;; FRINTy predicated with a PTRUE. | |
1645 | (define_insn "*<frint_pattern><mode>2" | |
1646 | [(set (match_operand:SVE_F 0 "register_operand" "=w") | |
1647 | (unspec:SVE_F | |
1648 | [(match_operand:<VPRED> 1 "register_operand" "Upl") | |
1649 | (unspec:SVE_F [(match_operand:SVE_F 2 "register_operand" "w")] | |
1650 | FRINT)] | |
1651 | UNSPEC_MERGE_PTRUE))] | |
1652 | "TARGET_SVE" | |
1653 | "frint<frint_suffix>\t%0.<Vetype>, %1/m, %2.<Vetype>" | |
1654 | ) | |
1655 | ||
1656 | ;; Unpredicated conversion of floats to integers of the same size (HF to HI, | |
1657 | ;; SF to SI or DF to DI). | |
1658 | (define_expand "<fix_trunc_optab><mode><v_int_equiv>2" | |
1659 | [(set (match_operand:<V_INT_EQUIV> 0 "register_operand") | |
1660 | (unspec:<V_INT_EQUIV> | |
1661 | [(match_dup 2) | |
1662 | (FIXUORS:<V_INT_EQUIV> | |
1663 | (match_operand:SVE_F 1 "register_operand"))] | |
1664 | UNSPEC_MERGE_PTRUE))] | |
1665 | "TARGET_SVE" | |
1666 | { | |
1667 | operands[2] = force_reg (<VPRED>mode, CONSTM1_RTX (<VPRED>mode)); | |
1668 | } | |
1669 | ) | |
1670 | ||
1671 | ;; Conversion of SF to DI, SI or HI, predicated with a PTRUE. | |
1672 | (define_insn "*<fix_trunc_optab>v16hsf<mode>2" | |
1673 | [(set (match_operand:SVE_HSDI 0 "register_operand" "=w") | |
1674 | (unspec:SVE_HSDI | |
1675 | [(match_operand:<VPRED> 1 "register_operand" "Upl") | |
1676 | (FIXUORS:SVE_HSDI | |
1677 | (match_operand:VNx8HF 2 "register_operand" "w"))] | |
1678 | UNSPEC_MERGE_PTRUE))] | |
1679 | "TARGET_SVE" | |
1680 | "fcvtz<su>\t%0.<Vetype>, %1/m, %2.h" | |
1681 | ) | |
1682 | ||
1683 | ;; Conversion of SF to DI or SI, predicated with a PTRUE. | |
1684 | (define_insn "*<fix_trunc_optab>vnx4sf<mode>2" | |
1685 | [(set (match_operand:SVE_SDI 0 "register_operand" "=w") | |
1686 | (unspec:SVE_SDI | |
1687 | [(match_operand:<VPRED> 1 "register_operand" "Upl") | |
1688 | (FIXUORS:SVE_SDI | |
1689 | (match_operand:VNx4SF 2 "register_operand" "w"))] | |
1690 | UNSPEC_MERGE_PTRUE))] | |
1691 | "TARGET_SVE" | |
1692 | "fcvtz<su>\t%0.<Vetype>, %1/m, %2.s" | |
1693 | ) | |
1694 | ||
1695 | ;; Conversion of DF to DI or SI, predicated with a PTRUE. | |
1696 | (define_insn "*<fix_trunc_optab>vnx2df<mode>2" | |
1697 | [(set (match_operand:SVE_SDI 0 "register_operand" "=w") | |
1698 | (unspec:SVE_SDI | |
1699 | [(match_operand:VNx2BI 1 "register_operand" "Upl") | |
1700 | (FIXUORS:SVE_SDI | |
1701 | (match_operand:VNx2DF 2 "register_operand" "w"))] | |
1702 | UNSPEC_MERGE_PTRUE))] | |
1703 | "TARGET_SVE" | |
1704 | "fcvtz<su>\t%0.<Vetype>, %1/m, %2.d" | |
1705 | ) | |
1706 | ||
1707 | ;; Unpredicated conversion of integers to floats of the same size | |
1708 | ;; (HI to HF, SI to SF or DI to DF). | |
1709 | (define_expand "<optab><v_int_equiv><mode>2" | |
1710 | [(set (match_operand:SVE_F 0 "register_operand") | |
1711 | (unspec:SVE_F | |
1712 | [(match_dup 2) | |
1713 | (FLOATUORS:SVE_F | |
1714 | (match_operand:<V_INT_EQUIV> 1 "register_operand"))] | |
1715 | UNSPEC_MERGE_PTRUE))] | |
1716 | "TARGET_SVE" | |
1717 | { | |
1718 | operands[2] = force_reg (<VPRED>mode, CONSTM1_RTX (<VPRED>mode)); | |
1719 | } | |
1720 | ) | |
1721 | ||
1722 | ;; Conversion of DI, SI or HI to the same number of HFs, predicated | |
1723 | ;; with a PTRUE. | |
1724 | (define_insn "*<optab><mode>vnx8hf2" | |
1725 | [(set (match_operand:VNx8HF 0 "register_operand" "=w") | |
1726 | (unspec:VNx8HF | |
1727 | [(match_operand:<VPRED> 1 "register_operand" "Upl") | |
1728 | (FLOATUORS:VNx8HF | |
1729 | (match_operand:SVE_HSDI 2 "register_operand" "w"))] | |
1730 | UNSPEC_MERGE_PTRUE))] | |
1731 | "TARGET_SVE" | |
1732 | "<su_optab>cvtf\t%0.h, %1/m, %2.<Vetype>" | |
1733 | ) | |
1734 | ||
1735 | ;; Conversion of DI or SI to the same number of SFs, predicated with a PTRUE. | |
1736 | (define_insn "*<optab><mode>vnx4sf2" | |
1737 | [(set (match_operand:VNx4SF 0 "register_operand" "=w") | |
1738 | (unspec:VNx4SF | |
1739 | [(match_operand:<VPRED> 1 "register_operand" "Upl") | |
1740 | (FLOATUORS:VNx4SF | |
1741 | (match_operand:SVE_SDI 2 "register_operand" "w"))] | |
1742 | UNSPEC_MERGE_PTRUE))] | |
1743 | "TARGET_SVE" | |
1744 | "<su_optab>cvtf\t%0.s, %1/m, %2.<Vetype>" | |
1745 | ) | |
1746 | ||
1747 | ;; Conversion of DI or SI to DF, predicated with a PTRUE. | |
1748 | (define_insn "*<optab><mode>vnx2df2" | |
1749 | [(set (match_operand:VNx2DF 0 "register_operand" "=w") | |
1750 | (unspec:VNx2DF | |
1751 | [(match_operand:VNx2BI 1 "register_operand" "Upl") | |
1752 | (FLOATUORS:VNx2DF | |
1753 | (match_operand:SVE_SDI 2 "register_operand" "w"))] | |
1754 | UNSPEC_MERGE_PTRUE))] | |
1755 | "TARGET_SVE" | |
1756 | "<su_optab>cvtf\t%0.d, %1/m, %2.<Vetype>" | |
1757 | ) | |
1758 | ||
1759 | ;; Conversion of DFs to the same number of SFs, or SFs to the same number | |
1760 | ;; of HFs. | |
1761 | (define_insn "*trunc<Vwide><mode>2" | |
1762 | [(set (match_operand:SVE_HSF 0 "register_operand" "=w") | |
1763 | (unspec:SVE_HSF | |
1764 | [(match_operand:<VWIDE_PRED> 1 "register_operand" "Upl") | |
1765 | (unspec:SVE_HSF | |
1766 | [(match_operand:<VWIDE> 2 "register_operand" "w")] | |
1767 | UNSPEC_FLOAT_CONVERT)] | |
1768 | UNSPEC_MERGE_PTRUE))] | |
1769 | "TARGET_SVE" | |
1770 | "fcvt\t%0.<Vetype>, %1/m, %2.<Vewtype>" | |
1771 | ) | |
1772 | ||
1773 | ;; Conversion of SFs to the same number of DFs, or HFs to the same number | |
1774 | ;; of SFs. | |
1775 | (define_insn "*extend<mode><Vwide>2" | |
1776 | [(set (match_operand:<VWIDE> 0 "register_operand" "=w") | |
1777 | (unspec:<VWIDE> | |
1778 | [(match_operand:<VWIDE_PRED> 1 "register_operand" "Upl") | |
1779 | (unspec:<VWIDE> | |
1780 | [(match_operand:SVE_HSF 2 "register_operand" "w")] | |
1781 | UNSPEC_FLOAT_CONVERT)] | |
1782 | UNSPEC_MERGE_PTRUE))] | |
1783 | "TARGET_SVE" | |
1784 | "fcvt\t%0.<Vewtype>, %1/m, %2.<Vetype>" | |
1785 | ) | |
1786 | ||
1787 | ;; PUNPKHI and PUNPKLO. | |
1788 | (define_insn "vec_unpack<su>_<perm_hilo>_<mode>" | |
1789 | [(set (match_operand:<VWIDE> 0 "register_operand" "=Upa") | |
1790 | (unspec:<VWIDE> [(match_operand:PRED_BHS 1 "register_operand" "Upa")] | |
1791 | UNPACK))] | |
1792 | "TARGET_SVE" | |
1793 | "punpk<perm_hilo>\t%0.h, %1.b" | |
1794 | ) | |
1795 | ||
1796 | ;; SUNPKHI, UUNPKHI, SUNPKLO and UUNPKLO. | |
1797 | (define_insn "vec_unpack<su>_<perm_hilo>_<SVE_BHSI:mode>" | |
1798 | [(set (match_operand:<VWIDE> 0 "register_operand" "=w") | |
1799 | (unspec:<VWIDE> [(match_operand:SVE_BHSI 1 "register_operand" "w")] | |
1800 | UNPACK))] | |
1801 | "TARGET_SVE" | |
1802 | "<su>unpk<perm_hilo>\t%0.<Vewtype>, %1.<Vetype>" | |
1803 | ) | |
1804 | ||
1805 | ;; Used by the vec_unpacks_<perm_hilo>_<mode> expander to unpack the bit | |
1806 | ;; representation of a VNx4SF or VNx8HF without conversion. The choice | |
1807 | ;; between signed and unsigned isn't significant. | |
1808 | (define_insn "*vec_unpacku_<perm_hilo>_<mode>_no_convert" | |
1809 | [(set (match_operand:SVE_HSF 0 "register_operand" "=w") | |
1810 | (unspec:SVE_HSF [(match_operand:SVE_HSF 1 "register_operand" "w")] | |
1811 | UNPACK_UNSIGNED))] | |
1812 | "TARGET_SVE" | |
1813 | "uunpk<perm_hilo>\t%0.<Vewtype>, %1.<Vetype>" | |
1814 | ) | |
1815 | ||
1816 | ;; Unpack one half of a VNx4SF to VNx2DF, or one half of a VNx8HF to VNx4SF. | |
1817 | ;; First unpack the source without conversion, then float-convert the | |
1818 | ;; unpacked source. | |
1819 | (define_expand "vec_unpacks_<perm_hilo>_<mode>" | |
1820 | [(set (match_dup 2) | |
1821 | (unspec:SVE_HSF [(match_operand:SVE_HSF 1 "register_operand")] | |
1822 | UNPACK_UNSIGNED)) | |
1823 | (set (match_operand:<VWIDE> 0 "register_operand") | |
1824 | (unspec:<VWIDE> [(match_dup 3) | |
1825 | (unspec:<VWIDE> [(match_dup 2)] UNSPEC_FLOAT_CONVERT)] | |
1826 | UNSPEC_MERGE_PTRUE))] | |
1827 | "TARGET_SVE" | |
1828 | { | |
1829 | operands[2] = gen_reg_rtx (<MODE>mode); | |
1830 | operands[3] = force_reg (<VWIDE_PRED>mode, CONSTM1_RTX (<VWIDE_PRED>mode)); | |
1831 | } | |
1832 | ) | |
1833 | ||
1834 | ;; Unpack one half of a VNx4SI to VNx2DF. First unpack from VNx4SI | |
1835 | ;; to VNx2DI, reinterpret the VNx2DI as a VNx4SI, then convert the | |
1836 | ;; unpacked VNx4SI to VNx2DF. | |
1837 | (define_expand "vec_unpack<su_optab>_float_<perm_hilo>_vnx4si" | |
1838 | [(set (match_dup 2) | |
1839 | (unspec:VNx2DI [(match_operand:VNx4SI 1 "register_operand")] | |
1840 | UNPACK_UNSIGNED)) | |
1841 | (set (match_operand:VNx2DF 0 "register_operand") | |
1842 | (unspec:VNx2DF [(match_dup 3) | |
1843 | (FLOATUORS:VNx2DF (match_dup 4))] | |
1844 | UNSPEC_MERGE_PTRUE))] | |
1845 | "TARGET_SVE" | |
1846 | { | |
1847 | operands[2] = gen_reg_rtx (VNx2DImode); | |
1848 | operands[3] = force_reg (VNx2BImode, CONSTM1_RTX (VNx2BImode)); | |
1849 | operands[4] = gen_rtx_SUBREG (VNx4SImode, operands[2], 0); | |
1850 | } | |
1851 | ) | |
1852 | ||
1853 | ;; Predicate pack. Use UZP1 on the narrower type, which discards | |
1854 | ;; the high part of each wide element. | |
1855 | (define_insn "vec_pack_trunc_<Vwide>" | |
1856 | [(set (match_operand:PRED_BHS 0 "register_operand" "=Upa") | |
1857 | (unspec:PRED_BHS | |
1858 | [(match_operand:<VWIDE> 1 "register_operand" "Upa") | |
1859 | (match_operand:<VWIDE> 2 "register_operand" "Upa")] | |
1860 | UNSPEC_PACK))] | |
1861 | "TARGET_SVE" | |
1862 | "uzp1\t%0.<Vetype>, %1.<Vetype>, %2.<Vetype>" | |
1863 | ) | |
1864 | ||
1865 | ;; Integer pack. Use UZP1 on the narrower type, which discards | |
1866 | ;; the high part of each wide element. | |
1867 | (define_insn "vec_pack_trunc_<Vwide>" | |
1868 | [(set (match_operand:SVE_BHSI 0 "register_operand" "=w") | |
1869 | (unspec:SVE_BHSI | |
1870 | [(match_operand:<VWIDE> 1 "register_operand" "w") | |
1871 | (match_operand:<VWIDE> 2 "register_operand" "w")] | |
1872 | UNSPEC_PACK))] | |
1873 | "TARGET_SVE" | |
1874 | "uzp1\t%0.<Vetype>, %1.<Vetype>, %2.<Vetype>" | |
1875 | ) | |
1876 | ||
1877 | ;; Convert two vectors of DF to SF, or two vectors of SF to HF, and pack | |
1878 | ;; the results into a single vector. | |
1879 | (define_expand "vec_pack_trunc_<Vwide>" | |
1880 | [(set (match_dup 4) | |
1881 | (unspec:SVE_HSF | |
1882 | [(match_dup 3) | |
1883 | (unspec:SVE_HSF [(match_operand:<VWIDE> 1 "register_operand")] | |
1884 | UNSPEC_FLOAT_CONVERT)] | |
1885 | UNSPEC_MERGE_PTRUE)) | |
1886 | (set (match_dup 5) | |
1887 | (unspec:SVE_HSF | |
1888 | [(match_dup 3) | |
1889 | (unspec:SVE_HSF [(match_operand:<VWIDE> 2 "register_operand")] | |
1890 | UNSPEC_FLOAT_CONVERT)] | |
1891 | UNSPEC_MERGE_PTRUE)) | |
1892 | (set (match_operand:SVE_HSF 0 "register_operand") | |
1893 | (unspec:SVE_HSF [(match_dup 4) (match_dup 5)] UNSPEC_UZP1))] | |
1894 | "TARGET_SVE" | |
1895 | { | |
1896 | operands[3] = force_reg (<VWIDE_PRED>mode, CONSTM1_RTX (<VWIDE_PRED>mode)); | |
1897 | operands[4] = gen_reg_rtx (<MODE>mode); | |
1898 | operands[5] = gen_reg_rtx (<MODE>mode); | |
1899 | } | |
1900 | ) | |
1901 | ||
1902 | ;; Convert two vectors of DF to SI and pack the results into a single vector. | |
1903 | (define_expand "vec_pack_<su>fix_trunc_vnx2df" | |
1904 | [(set (match_dup 4) | |
1905 | (unspec:VNx4SI | |
1906 | [(match_dup 3) | |
1907 | (FIXUORS:VNx4SI (match_operand:VNx2DF 1 "register_operand"))] | |
1908 | UNSPEC_MERGE_PTRUE)) | |
1909 | (set (match_dup 5) | |
1910 | (unspec:VNx4SI | |
1911 | [(match_dup 3) | |
1912 | (FIXUORS:VNx4SI (match_operand:VNx2DF 2 "register_operand"))] | |
1913 | UNSPEC_MERGE_PTRUE)) | |
1914 | (set (match_operand:VNx4SI 0 "register_operand") | |
1915 | (unspec:VNx4SI [(match_dup 4) (match_dup 5)] UNSPEC_UZP1))] | |
1916 | "TARGET_SVE" | |
1917 | { | |
1918 | operands[3] = force_reg (VNx2BImode, CONSTM1_RTX (VNx2BImode)); | |
1919 | operands[4] = gen_reg_rtx (VNx4SImode); | |
1920 | operands[5] = gen_reg_rtx (VNx4SImode); | |
1921 | } | |
1922 | ) |