]> git.ipfire.org Git - thirdparty/gcc.git/blame - gcc/config/aarch64/aarch64-sme.md
Update copyright years.
[thirdparty/gcc.git] / gcc / config / aarch64 / aarch64-sme.md
CommitLineData
dd8090f4 1;; Machine description for AArch64 SME.
a945c346 2;; Copyright (C) 2023-2024 Free Software Foundation, Inc.
dd8090f4
RS
3;;
4;; This file is part of GCC.
5;;
6;; GCC is free software; you can redistribute it and/or modify it
7;; under the terms of the GNU General Public License as published by
8;; the Free Software Foundation; either version 3, or (at your option)
9;; any later version.
10;;
11;; GCC is distributed in the hope that it will be useful, but
12;; WITHOUT ANY WARRANTY; without even the implied warranty of
13;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14;; General Public License for more details.
15;;
16;; You should have received a copy of the GNU General Public License
17;; along with GCC; see the file COPYING3. If not see
18;; <http://www.gnu.org/licenses/>.
19
20;; The file is organised into the following sections (search for the full
21;; line):
22;;
23;; == State management
24;; ---- Test current state
25;; ---- PSTATE.SM management
3af9ceb6 26;; ---- PSTATE.ZA management
4f6ab953
RS
27;;
28;; == Loads, stores and moves
29;; ---- Single-vector loads
8d29b7ac 30;; ---- Table loads
4f6ab953 31;; ---- Single-vector stores
8d29b7ac 32;; ---- Table stores
4f6ab953 33;; ---- Single-vector moves
c1c267df 34;; ---- Multi-vector moves
4f6ab953
RS
35;; ---- Zeroing
36;;
37;; == Binary arithmetic
38;; ---- Binary arithmetic on ZA tile
c1c267df
RS
39;; ---- Binary arithmetic on ZA slice
40;; ---- Binary arithmetic, writing to ZA slice
4f6ab953
RS
41;;
42;; == Ternary arithmetic
c1c267df
RS
43;; ---- [INT] Dot product
44;; ---- [INT] Ternary widening arithmetic on ZA slice
4f6ab953 45;; ---- [INT] Sum of outer products
c1c267df
RS
46;; ---- [FP] Dot product
47;; ---- [FP] Ternary arithmetic on ZA slice
48;; ---- [FP] Ternary widening arithmetic on ZA slice
4f6ab953 49;; ---- [FP] Sum of outer products
c1c267df
RS
50;;
51;; == Table lookup
52;; ---- Table lookup
dd8090f4
RS
53
54;; =========================================================================
55;; == State management
56;; =========================================================================
57;;
58;; Many of the instructions in this section are only valid when SME is
59;; present. However, they don't have a TARGET_SME condition since
60;; (a) they are only emitted under direct control of aarch64 code and
61;; (b) they are sometimes used conditionally, particularly in streaming-
62;; compatible code.
63;;
64;; =========================================================================
65
66;; -------------------------------------------------------------------------
67;; ---- Test current state
68;; -------------------------------------------------------------------------
69
70(define_c_enum "unspec" [
71 UNSPEC_OLD_VG_SAVED
72 UNSPEC_UPDATE_VG
73 UNSPEC_GET_SME_STATE
74 UNSPEC_READ_SVCR
75])
76
77;; A marker instruction to say that the old value of the DWARF VG register
78;; has been saved to the stack, for CFI purposes. Operand 0 is the old
79;; value of the register and operand 1 is the save slot.
80(define_insn "aarch64_old_vg_saved"
81 [(set (reg:DI VG_REGNUM)
82 (unspec:DI [(match_operand 0)
83 (match_operand 1)] UNSPEC_OLD_VG_SAVED))]
84 ""
85 ""
86 [(set_attr "type" "no_insn")]
87)
88
89;; A marker to indicate places where a call temporarily changes VG.
90(define_insn "aarch64_update_vg"
91 [(set (reg:DI VG_REGNUM)
92 (unspec:DI [(reg:DI VG_REGNUM)] UNSPEC_UPDATE_VG))]
93 ""
94 ""
95 [(set_attr "type" "no_insn")]
96)
97
98(define_insn "aarch64_get_sme_state"
99 [(set (reg:TI R0_REGNUM)
100 (unspec_volatile:TI [(const_int 0)] UNSPEC_GET_SME_STATE))
101 (clobber (reg:DI R16_REGNUM))
102 (clobber (reg:DI R17_REGNUM))
103 (clobber (reg:DI R18_REGNUM))
104 (clobber (reg:DI R30_REGNUM))
105 (clobber (reg:CC CC_REGNUM))]
106 ""
107 "bl\t__arm_sme_state"
108)
109
110(define_insn "aarch64_read_svcr"
111 [(set (match_operand:DI 0 "register_operand" "=r")
112 (unspec_volatile:DI [(const_int 0)] UNSPEC_READ_SVCR))]
113 ""
114 "mrs\t%0, svcr"
115)
116
117;; -------------------------------------------------------------------------
118;; ---- PSTATE.SM management
119;; -------------------------------------------------------------------------
120;; Includes:
121;; - SMSTART SM
122;; - SMSTOP SM
123;; -------------------------------------------------------------------------
124
125(define_c_enum "unspec" [
126 UNSPEC_SMSTART_SM
127 UNSPEC_SMSTOP_SM
128])
129
130;; Turn on streaming mode. This clobbers all SVE state.
131;;
132;; Depend on VG_REGNUM to ensure that the VG save slot has already been
133;; initialized.
134(define_insn "aarch64_smstart_sm"
135 [(unspec_volatile [(const_int 0)] UNSPEC_SMSTART_SM)
136 (use (reg:DI VG_REGNUM))
137 (clobber (reg:V4x16QI V0_REGNUM))
138 (clobber (reg:V4x16QI V4_REGNUM))
139 (clobber (reg:V4x16QI V8_REGNUM))
140 (clobber (reg:V4x16QI V12_REGNUM))
141 (clobber (reg:V4x16QI V16_REGNUM))
142 (clobber (reg:V4x16QI V20_REGNUM))
143 (clobber (reg:V4x16QI V24_REGNUM))
144 (clobber (reg:V4x16QI V28_REGNUM))
145 (clobber (reg:VNx16BI P0_REGNUM))
146 (clobber (reg:VNx16BI P1_REGNUM))
147 (clobber (reg:VNx16BI P2_REGNUM))
148 (clobber (reg:VNx16BI P3_REGNUM))
149 (clobber (reg:VNx16BI P4_REGNUM))
150 (clobber (reg:VNx16BI P5_REGNUM))
151 (clobber (reg:VNx16BI P6_REGNUM))
152 (clobber (reg:VNx16BI P7_REGNUM))
153 (clobber (reg:VNx16BI P8_REGNUM))
154 (clobber (reg:VNx16BI P9_REGNUM))
155 (clobber (reg:VNx16BI P10_REGNUM))
156 (clobber (reg:VNx16BI P11_REGNUM))
157 (clobber (reg:VNx16BI P12_REGNUM))
158 (clobber (reg:VNx16BI P13_REGNUM))
159 (clobber (reg:VNx16BI P14_REGNUM))
160 (clobber (reg:VNx16BI P15_REGNUM))]
161 ""
162 "smstart\tsm"
163)
164
165;; Turn off streaming mode. This clobbers all SVE state.
166;;
167;; Depend on VG_REGNUM to ensure that the VG save slot has already been
168;; initialized.
169(define_insn "aarch64_smstop_sm"
170 [(unspec_volatile [(const_int 0)] UNSPEC_SMSTOP_SM)
171 (use (reg:DI VG_REGNUM))
172 (clobber (reg:V4x16QI V0_REGNUM))
173 (clobber (reg:V4x16QI V4_REGNUM))
174 (clobber (reg:V4x16QI V8_REGNUM))
175 (clobber (reg:V4x16QI V12_REGNUM))
176 (clobber (reg:V4x16QI V16_REGNUM))
177 (clobber (reg:V4x16QI V20_REGNUM))
178 (clobber (reg:V4x16QI V24_REGNUM))
179 (clobber (reg:V4x16QI V28_REGNUM))
180 (clobber (reg:VNx16BI P0_REGNUM))
181 (clobber (reg:VNx16BI P1_REGNUM))
182 (clobber (reg:VNx16BI P2_REGNUM))
183 (clobber (reg:VNx16BI P3_REGNUM))
184 (clobber (reg:VNx16BI P4_REGNUM))
185 (clobber (reg:VNx16BI P5_REGNUM))
186 (clobber (reg:VNx16BI P6_REGNUM))
187 (clobber (reg:VNx16BI P7_REGNUM))
188 (clobber (reg:VNx16BI P8_REGNUM))
189 (clobber (reg:VNx16BI P9_REGNUM))
190 (clobber (reg:VNx16BI P10_REGNUM))
191 (clobber (reg:VNx16BI P11_REGNUM))
192 (clobber (reg:VNx16BI P12_REGNUM))
193 (clobber (reg:VNx16BI P13_REGNUM))
194 (clobber (reg:VNx16BI P14_REGNUM))
195 (clobber (reg:VNx16BI P15_REGNUM))]
196 ""
197 "smstop\tsm"
198)
3af9ceb6
RS
199
200;; -------------------------------------------------------------------------
201;; ---- PSTATE.ZA management
202;; -------------------------------------------------------------------------
203;; Includes:
204;; - SMSTART ZA
205;; - SMSTOP ZA
206;; plus calls to support routines.
207;; -------------------------------------------------------------------------
208
209(define_c_enum "unspec" [
210 UNSPEC_SMSTOP_ZA
211 UNSPEC_INITIAL_ZERO_ZA
212 UNSPEC_TPIDR2_SAVE
213 UNSPEC_TPIDR2_RESTORE
214 UNSPEC_READ_TPIDR2
215 UNSPEC_WRITE_TPIDR2
216 UNSPEC_SETUP_LOCAL_TPIDR2
217 UNSPEC_RESTORE_ZA
218 UNSPEC_START_PRIVATE_ZA_CALL
219 UNSPEC_END_PRIVATE_ZA_CALL
220 UNSPEC_COMMIT_LAZY_SAVE
221])
222
223(define_c_enum "unspecv" [
224 UNSPECV_ASM_UPDATE_ZA
8d29b7ac 225 UNSPECV_ASM_UPDATE_ZT0
3af9ceb6
RS
226])
227
228;; Use the ABI-defined routine to commit an uncommitted lazy save.
229;; This relies on the current PSTATE.ZA, so depends on SME_STATE_REGNUM.
230;; The fake TPIDR2_SETUP_REGNUM register initially holds the incoming
231;; value of the architected TPIDR2_EL0.
232(define_insn "aarch64_tpidr2_save"
233 [(set (reg:DI ZA_FREE_REGNUM)
234 (unspec:DI [(reg:DI SME_STATE_REGNUM)
235 (reg:DI TPIDR2_SETUP_REGNUM)] UNSPEC_TPIDR2_SAVE))
236 (clobber (reg:DI R14_REGNUM))
237 (clobber (reg:DI R15_REGNUM))
238 (clobber (reg:DI R16_REGNUM))
239 (clobber (reg:DI R17_REGNUM))
240 (clobber (reg:DI R18_REGNUM))
241 (clobber (reg:DI R30_REGNUM))
242 (clobber (reg:CC CC_REGNUM))]
243 ""
244 "bl\t__arm_tpidr2_save"
245)
246
247;; Set PSTATE.ZA to 1. If ZA was previously dormant or active,
248;; it remains in the same state afterwards, with the same contents.
249;; Otherwise, it goes from off to on with zeroed contents.
250;;
251;; Later writes of TPIDR2_EL0 to a nonzero value must not be moved
252;; up past this instruction, since that could create an invalid
253;; combination of having an active lazy save while ZA is off.
254;; Create an anti-dependence by reading the current contents
255;; of TPIDR2_SETUP_REGNUM.
256;;
257;; Making this depend on ZA_FREE_REGNUM ensures that contents belonging
258;; to the caller have already been saved. That isn't necessary for this
259;; instruction itself, since PSTATE.ZA is already 1 if it contains data.
260;; But doing this here means that other uses of ZA can just depend on
261;; SME_STATE_REGNUM, rather than both SME_STATE_REGNUM and ZA_FREE_REGNUM.
262(define_insn "aarch64_smstart_za"
263 [(set (reg:DI SME_STATE_REGNUM)
264 (const_int 1))
265 (use (reg:DI TPIDR2_SETUP_REGNUM))
266 (use (reg:DI ZA_FREE_REGNUM))]
267 ""
268 "smstart\tza"
269)
270
271;; Disable ZA and discard its current contents.
272;;
273;; The ABI says that the ZA save buffer must be null whenever PSTATE.ZA
274;; is zero, so earlier writes to TPIDR2_EL0 must not be moved down past
275;; this instruction. Depend on TPIDR2_SETUP_REGNUM to ensure this.
276;;
277;; We can only turn ZA off once we know that it is free (i.e. doesn't
278;; contain data belonging to the caller). Depend on ZA_FREE_REGNUM
279;; to ensure this.
280;;
281;; We only turn ZA off when the current function's ZA state is dead,
282;; or perhaps if we're sure that the contents are saved. Either way,
283;; we know whether ZA is saved or not.
284(define_insn "aarch64_smstop_za"
285 [(set (reg:DI SME_STATE_REGNUM)
286 (const_int 0))
287 (set (reg:DI ZA_SAVED_REGNUM)
288 (unspec:DI [(reg:DI TPIDR2_SETUP_REGNUM)
289 (reg:DI ZA_FREE_REGNUM)] UNSPEC_SMSTOP_ZA))]
290 ""
291 "smstop\tza"
292)
293
294;; Zero ZA after committing a lazy save. The sequencing is enforced
295;; by reading ZA_FREE_REGNUM.
296(define_insn "aarch64_initial_zero_za"
297 [(set (reg:DI ZA_REGNUM)
298 (unspec:DI [(reg:DI SME_STATE_REGNUM)
299 (reg:DI ZA_FREE_REGNUM)] UNSPEC_INITIAL_ZERO_ZA))]
300 ""
301 "zero\t{ za }"
302)
303
304;; Initialize the abstract TPIDR2_BLOCK_REGNUM from the contents of
305;; the current function's TPIDR2 block. Other instructions can then
306;; depend on TPIDR2_BLOCK_REGNUM rather than on the memory block.
307(define_insn "aarch64_setup_local_tpidr2"
308 [(set (reg:DI TPIDR2_BLOCK_REGNUM)
309 (unspec:DI [(match_operand:V16QI 0 "memory_operand" "m")]
310 UNSPEC_SETUP_LOCAL_TPIDR2))]
311 ""
312 ""
313 [(set_attr "type" "no_insn")]
314)
315
316;; Clear TPIDR2_EL0, cancelling any uncommitted lazy save.
317(define_insn "aarch64_clear_tpidr2"
318 [(set (reg:DI TPIDR2_SETUP_REGNUM)
319 (const_int 0))]
320 ""
321 "msr\ttpidr2_el0, xzr"
322)
323
324;; Point TPIDR2_EL0 to the current function's TPIDR2 block, whose address
325;; is given by operand 0. TPIDR2_BLOCK_REGNUM represents the contents of the
326;; pointed-to block.
327(define_insn "aarch64_write_tpidr2"
328 [(set (reg:DI TPIDR2_SETUP_REGNUM)
329 (unspec:DI [(match_operand 0 "pmode_register_operand" "r")
330 (reg:DI TPIDR2_BLOCK_REGNUM)] UNSPEC_WRITE_TPIDR2))]
331 ""
332 "msr\ttpidr2_el0, %0"
333)
334
335;; Check whether ZA has been saved. The system depends on the value that
336;; we wrote to TPIDR2_EL0 previously, so it depends on TPDIR2_SETUP_REGNUM.
337(define_insn "aarch64_read_tpidr2"
338 [(set (match_operand:DI 0 "register_operand" "=r")
339 (unspec:DI [(reg:DI TPIDR2_SETUP_REGNUM)
340 (reg:DI ZA_SAVED_REGNUM)] UNSPEC_READ_TPIDR2))]
341 ""
342 "mrs\t%0, tpidr2_el0"
343)
344
345;; Use the ABI-defined routine to restore lazy-saved ZA contents
346;; from the TPIDR2 block pointed to by X0. ZA must already be active.
347(define_insn "aarch64_tpidr2_restore"
348 [(set (reg:DI ZA_SAVED_REGNUM)
349 (unspec:DI [(reg:DI R0_REGNUM)] UNSPEC_TPIDR2_RESTORE))
350 (set (reg:DI SME_STATE_REGNUM)
351 (unspec:DI [(reg:DI SME_STATE_REGNUM)] UNSPEC_TPIDR2_RESTORE))
352 (clobber (reg:DI R14_REGNUM))
353 (clobber (reg:DI R15_REGNUM))
354 (clobber (reg:DI R16_REGNUM))
355 (clobber (reg:DI R17_REGNUM))
356 (clobber (reg:DI R18_REGNUM))
357 (clobber (reg:DI R30_REGNUM))
358 (clobber (reg:CC CC_REGNUM))]
359 ""
360 "bl\t__arm_tpidr2_restore"
361)
362
363;; Check whether a lazy save set up by aarch64_save_za was committed
364;; and restore the saved contents if so.
365;;
366;; Operand 0 is the address of the current function's TPIDR2 block.
367(define_insn_and_split "aarch64_restore_za"
368 [(set (reg:DI ZA_SAVED_REGNUM)
369 (unspec:DI [(match_operand 0 "pmode_register_operand" "r")
370 (reg:DI SME_STATE_REGNUM)
371 (reg:DI TPIDR2_SETUP_REGNUM)
372 (reg:DI ZA_SAVED_REGNUM)] UNSPEC_RESTORE_ZA))
373 (clobber (reg:DI R0_REGNUM))
374 (clobber (reg:DI R14_REGNUM))
375 (clobber (reg:DI R15_REGNUM))
376 (clobber (reg:DI R16_REGNUM))
377 (clobber (reg:DI R17_REGNUM))
378 (clobber (reg:DI R18_REGNUM))
379 (clobber (reg:DI R30_REGNUM))
380 (clobber (reg:CC CC_REGNUM))]
381 ""
382 "#"
383 "&& epilogue_completed"
384 [(const_int 0)]
385 {
386 auto label = gen_label_rtx ();
387 auto tpidr2 = gen_rtx_REG (DImode, R16_REGNUM);
388 emit_insn (gen_aarch64_read_tpidr2 (tpidr2));
389 auto jump = emit_likely_jump_insn (gen_aarch64_cbnedi1 (tpidr2, label));
390 JUMP_LABEL (jump) = label;
391
392 aarch64_restore_za (operands[0]);
393 emit_label (label);
394 DONE;
395 }
396)
397
398;; This instruction is emitted after asms that alter ZA, in order to model
399;; the effect on dataflow. The asm itself can't have ZA as an input or
400;; an output, since there is no associated data type. Instead it retains
401;; the original "za" clobber, which on its own would indicate that ZA
402;; is dead.
403;;
404;; The operand is a unique identifier.
405(define_insn "aarch64_asm_update_za"
406 [(set (reg:VNx16QI ZA_REGNUM)
407 (unspec_volatile:VNx16QI
408 [(reg:VNx16QI ZA_REGNUM)
409 (reg:DI SME_STATE_REGNUM)
410 (match_operand 0 "const_int_operand")]
411 UNSPECV_ASM_UPDATE_ZA))]
412 ""
413 ""
414 [(set_attr "type" "no_insn")]
415)
416
8d29b7ac
RS
417;; A similar pattern for ZT0.
418(define_insn "aarch64_asm_update_zt0"
419 [(set (reg:V8DI ZT0_REGNUM)
420 (unspec_volatile:V8DI
421 [(reg:V8DI ZT0_REGNUM)
422 (reg:DI SME_STATE_REGNUM)
423 (match_operand 0 "const_int_operand")]
424 UNSPECV_ASM_UPDATE_ZT0))]
425 ""
426 ""
427 [(set_attr "type" "no_insn")]
428)
429
3af9ceb6
RS
430;; This pseudo-instruction is emitted as part of a call to a private-ZA
431;; function from a function with ZA state. It marks a natural place to set
432;; up a lazy save, if that turns out to be necessary. The save itself
433;; is managed by the mode-switching pass.
434(define_insn "aarch64_start_private_za_call"
435 [(set (reg:DI LOWERING_REGNUM)
436 (unspec:DI [(reg:DI LOWERING_REGNUM)] UNSPEC_START_PRIVATE_ZA_CALL))]
437 ""
438 ""
439 [(set_attr "type" "no_insn")]
440)
441
442;; This pseudo-instruction is emitted as part of a call to a private-ZA
443;; function from a function with ZA state. It marks a natural place to restore
444;; the current function's ZA contents from the lazy save buffer, if that
445;; turns out to be necessary. The save itself is managed by the
446;; mode-switching pass.
447(define_insn "aarch64_end_private_za_call"
448 [(set (reg:DI LOWERING_REGNUM)
449 (unspec:DI [(reg:DI LOWERING_REGNUM)] UNSPEC_END_PRIVATE_ZA_CALL))]
450 ""
451 ""
452 [(set_attr "type" "no_insn")]
453)
454
455;; This pseudo-instruction is emitted before a private-ZA function uses
456;; PSTATE.ZA state for the first time. The instruction checks whether
457;; ZA currently contains data belonging to a caller and commits the
458;; lazy save if so.
459;;
460;; Operand 0 is the incoming value of TPIDR2_EL0. Operand 1 is nonzero
461;; if ZA is live, and should therefore be zeroed after committing a save.
462;;
463;; The instruction is generated by the mode-switching pass. It is a
464;; define_insn_and_split rather than a define_expand because of the
465;; internal control flow.
466(define_insn_and_split "aarch64_commit_lazy_save"
467 [(set (reg:DI ZA_FREE_REGNUM)
468 (unspec:DI [(match_operand 0 "pmode_register_operand" "r")
469 (match_operand 1 "const_int_operand")
470 (reg:DI SME_STATE_REGNUM)
471 (reg:DI TPIDR2_SETUP_REGNUM)
472 (reg:VNx16QI ZA_REGNUM)] UNSPEC_COMMIT_LAZY_SAVE))
473 (set (reg:DI ZA_REGNUM)
474 (unspec:DI [(reg:DI SME_STATE_REGNUM)
475 (reg:DI ZA_FREE_REGNUM)] UNSPEC_INITIAL_ZERO_ZA))
476 (clobber (reg:DI R14_REGNUM))
477 (clobber (reg:DI R15_REGNUM))
478 (clobber (reg:DI R16_REGNUM))
479 (clobber (reg:DI R17_REGNUM))
480 (clobber (reg:DI R18_REGNUM))
481 (clobber (reg:DI R30_REGNUM))
482 (clobber (reg:CC CC_REGNUM))]
483 ""
484 "#"
485 "true"
486 [(const_int 0)]
487 {
488 auto label = gen_label_rtx ();
489 auto jump = emit_jump_insn (gen_aarch64_cbeqdi1 (operands[0], label));
490 JUMP_LABEL (jump) = label;
491 emit_insn (gen_aarch64_tpidr2_save ());
492 emit_insn (gen_aarch64_clear_tpidr2 ());
493 if (INTVAL (operands[1]) != 0)
494 emit_insn (gen_aarch64_initial_zero_za ());
495 emit_label (label);
496 DONE;
497 }
498)
4f6ab953
RS
499
500;; =========================================================================
501;; == Loads, stores and moves
502;; =========================================================================
503
504;; -------------------------------------------------------------------------
505;; ---- Single-vector loads
506;; -------------------------------------------------------------------------
507;; Includes:
508;; - LD1
509;; - LDR
510;; -------------------------------------------------------------------------
511
512(define_c_enum "unspec" [
513 UNSPEC_SME_LDR
514])
515
516(define_insn "@aarch64_sme_<optab><mode>"
517 [(set (reg:SME_ZA_I ZA_REGNUM)
518 (unspec:SME_ZA_I
519 [(reg:SME_ZA_I ZA_REGNUM)
520 (reg:DI SME_STATE_REGNUM)
521 (match_operand:DI 0 "const_int_operand")
522 (match_operand:SI 1 "register_operand" "Ucj")
523 (match_operand:<VPRED> 2 "register_operand" "Upl")
524 (match_operand:SME_ZA_I 3 "aarch64_sve_ldff1_operand" "Utf")]
525 SME_LD1))]
526 "TARGET_STREAMING_SME"
527 "ld1<Vesize>\t{ za%0<hv>.<Vetype>[%w1, 0] }, %2/z, %3"
528)
529
530(define_insn "@aarch64_sme_<optab><mode>_plus"
531 [(set (reg:SME_ZA_I ZA_REGNUM)
532 (unspec:SME_ZA_I
533 [(reg:SME_ZA_I ZA_REGNUM)
534 (reg:DI SME_STATE_REGNUM)
535 (match_operand:DI 0 "const_int_operand")
536 (plus:SI (match_operand:SI 1 "register_operand" "Ucj")
537 (match_operand:SI 2 "const_int_operand"))
538 (match_operand:<VPRED> 3 "register_operand" "Upl")
539 (match_operand:SME_ZA_I 4 "aarch64_sve_ldff1_operand" "Utf")]
540 SME_LD1))]
541 "TARGET_STREAMING_SME
542 && UINTVAL (operands[2]) < 128 / <elem_bits>"
543 "ld1<Vesize>\t{ za%0<hv>.<Vetype>[%w1, %2] }, %3/z, %4"
544)
545
546(define_insn "aarch64_sme_ldr0"
547 [(set (reg:VNx16QI ZA_REGNUM)
548 (unspec:VNx16QI
549 [(reg:VNx16QI ZA_REGNUM)
550 (reg:DI SME_STATE_REGNUM)
551 (match_operand:SI 0 "register_operand" "Ucj")
552 (mem:VNx16QI (match_operand 1 "pmode_register_operand" "rk"))]
553 UNSPEC_SME_LDR))]
554 "TARGET_SME"
555 "ldr\tza[%w0, 0], [%1, #0, mul vl]"
556)
557
558(define_insn "@aarch64_sme_ldrn<mode>"
559 [(set (reg:VNx16QI ZA_REGNUM)
560 (unspec:VNx16QI
561 [(reg:VNx16QI ZA_REGNUM)
562 (reg:DI SME_STATE_REGNUM)
563 (plus:SI (match_operand:SI 0 "register_operand" "Ucj")
564 (match_operand:SI 1 "const_int_operand"))
565 (mem:VNx16QI
566 (plus:P (match_operand:P 2 "register_operand" "rk")
567 (match_operand:P 3 "aarch64_mov_operand")))]
568 UNSPEC_SME_LDR))]
569 "TARGET_SME
570 && aarch64_sme_ldr_vnum_offset_p (operands[1], operands[3])"
571 "ldr\tza[%w0, %1], [%2, #%1, mul vl]"
572)
573
8d29b7ac
RS
574;; -------------------------------------------------------------------------
575;; ---- Table loads
576;; -------------------------------------------------------------------------
577;; Includes:
578;; - LDR
579;; -------------------------------------------------------------------------
580
581(define_c_enum "unspec" [
582 UNSPEC_RESTORE_ZT0
583])
584
585(define_insn "aarch64_sme_ldr_zt0"
586 [(set (reg:V8DI ZT0_REGNUM)
587 (match_operand:V8DI 0 "aarch64_sync_memory_operand" "Q"))
588 (use (reg:DI SME_STATE_REGNUM))]
589 "TARGET_SME2"
590 "ldr\tzt0, %0"
591)
592
593;; This version is used after calls to private-ZA functions. Since ZT0_REGNUM
594;; represents the current function's state, it isn't clobbered by private-ZA
595;; functions, so we need to make it depend on the ZA reinitialization code.
596(define_insn "aarch64_restore_zt0"
597 [(set (reg:V8DI ZT0_REGNUM)
598 (unspec:V8DI
599 [(reg:DI SME_STATE_REGNUM)
600 (match_operand:V8DI 0 "aarch64_sync_memory_operand" "Q")]
601 UNSPEC_RESTORE_ZT0))]
602 "TARGET_SME2"
603 "ldr\tzt0, %0"
604)
605
4f6ab953
RS
606;; -------------------------------------------------------------------------
607;; ---- Single-vector stores
608;; -------------------------------------------------------------------------
609;; Includes:
610;; - ST1
611;; - STR
612;; -------------------------------------------------------------------------
613
614(define_c_enum "unspec" [
615 UNSPEC_SME_STR
616])
617
618(define_insn "@aarch64_sme_<optab><mode>"
619 [(set (match_operand:SME_ZA_I 0 "aarch64_sve_ldff1_operand" "+Utf")
620 (unspec:SME_ZA_I
621 [(reg:SME_ZA_I ZA_REGNUM)
622 (reg:DI SME_STATE_REGNUM)
623 (match_dup 0)
624 (match_operand:DI 1 "const_int_operand")
625 (match_operand:SI 2 "register_operand" "Ucj")
626 (match_operand:<VPRED> 3 "register_operand" "Upl")]
627 SME_ST1))]
628 "TARGET_STREAMING_SME"
629 "st1<Vesize>\t{ za%1<hv>.<Vetype>[%w2, 0] }, %3, %0"
630)
631
632(define_insn "@aarch64_sme_<optab><mode>_plus"
633 [(set (match_operand:SME_ZA_I 0 "aarch64_sve_ldff1_operand" "+Utf")
634 (unspec:SME_ZA_I
635 [(reg:SME_ZA_I ZA_REGNUM)
636 (reg:DI SME_STATE_REGNUM)
637 (match_dup 0)
638 (match_operand:DI 1 "const_int_operand")
639 (plus:SI (match_operand:SI 2 "register_operand" "Ucj")
640 (match_operand:SI 3 "const_int_operand"))
641 (match_operand:<VPRED> 4 "register_operand" "Upl")]
642 SME_ST1))]
643 "TARGET_STREAMING_SME
644 && UINTVAL (operands[3]) < 128 / <elem_bits>"
645 "st1<Vesize>\t{ za%1<hv>.<Vetype>[%w2, %3] }, %4, %0"
646)
647
648(define_insn "aarch64_sme_str0"
649 [(set (mem:VNx16QI (match_operand 1 "pmode_register_operand" "rk"))
650 (unspec:VNx16QI
651 [(reg:VNx16QI ZA_REGNUM)
652 (reg:DI SME_STATE_REGNUM)
653 (mem:VNx16QI (match_dup 1))
654 (match_operand:SI 0 "register_operand" "Ucj")]
655 UNSPEC_SME_STR))]
656 "TARGET_SME"
657 "str\tza[%w0, 0], [%1, #0, mul vl]"
658)
659
660(define_insn "@aarch64_sme_strn<mode>"
661 [(set (mem:VNx16QI
662 (plus:P (match_operand:P 2 "register_operand" "rk")
663 (match_operand:P 3 "aarch64_mov_operand")))
664 (unspec:VNx16QI
665 [(reg:VNx16QI ZA_REGNUM)
666 (reg:DI SME_STATE_REGNUM)
667 (mem:VNx16QI (plus:P (match_dup 2) (match_dup 3)))
668 (plus:SI (match_operand:SI 0 "register_operand" "Ucj")
669 (match_operand:SI 1 "const_int_operand"))]
670 UNSPEC_SME_STR))]
671 "TARGET_SME
672 && aarch64_sme_ldr_vnum_offset_p (operands[1], operands[3])"
673 "str\tza[%w0, %1], [%2, #%1, mul vl]"
674)
675
8d29b7ac
RS
676;; -------------------------------------------------------------------------
677;; ---- Table stores
678;; -------------------------------------------------------------------------
679;; Includes:
680;; - STR
681;; -------------------------------------------------------------------------
682
683(define_insn "aarch64_sme_str_zt0"
684 [(set (match_operand:V8DI 0 "aarch64_sync_memory_operand" "=Q")
685 (reg:V8DI ZT0_REGNUM))
686 (use (reg:DI SME_STATE_REGNUM))]
687 "TARGET_SME2"
688 "str\tzt0, %0"
689)
690
4f6ab953
RS
691;; -------------------------------------------------------------------------
692;; ---- Single-vector moves
693;; -------------------------------------------------------------------------
694;; Includes:
695;; - MOVA
696;; -------------------------------------------------------------------------
697
698(define_insn "@aarch64_sme_<optab><v_int_container><mode>"
699 [(set (match_operand:SVE_FULL 0 "register_operand" "=w")
700 (unspec:SVE_FULL
701 [(reg:<V_INT_CONTAINER> ZA_REGNUM)
702 (reg:DI SME_STATE_REGNUM)
703 (match_operand:SVE_FULL 1 "register_operand" "0")
704 (match_operand:<VPRED> 2 "register_operand" "Upl")
705 (match_operand:DI 3 "const_int_operand")
706 (match_operand:SI 4 "register_operand" "Ucj")]
707 SME_READ))]
708 "TARGET_STREAMING_SME"
709 "mova\t%0.<Vetype>, %2/m, za%3<hv>.<Vetype>[%w4, 0]"
710)
711
712(define_insn "*aarch64_sme_<optab><v_int_container><mode>_plus"
713 [(set (match_operand:SVE_FULL 0 "register_operand" "=w")
714 (unspec:SVE_FULL
715 [(reg:<V_INT_CONTAINER> ZA_REGNUM)
716 (reg:DI SME_STATE_REGNUM)
717 (match_operand:SVE_FULL 1 "register_operand" "0")
718 (match_operand:<VPRED> 2 "register_operand" "Upl")
719 (match_operand:DI 3 "const_int_operand")
720 (plus:SI (match_operand:SI 4 "register_operand" "Ucj")
721 (match_operand:SI 5 "const_int_operand"))]
722 SME_READ))]
723 "TARGET_STREAMING_SME
724 && UINTVAL (operands[5]) < 128 / <elem_bits>"
725 "mova\t%0.<Vetype>, %2/m, za%3<hv>.<Vetype>[%w4, %5]"
726)
727
728(define_insn "@aarch64_sme_<optab><VNx1TI_ONLY:mode><SVE_FULL:mode>"
729 [(set (match_operand:SVE_FULL 0 "register_operand" "=w")
730 (unspec:SVE_FULL
731 [(reg:VNx1TI_ONLY ZA_REGNUM)
732 (reg:DI SME_STATE_REGNUM)
733 (match_operand:SVE_FULL 1 "register_operand" "0")
734 (match_operand:VNx2BI 2 "register_operand" "Upl")
735 (match_operand:DI 3 "const_int_operand")
736 (match_operand:SI 4 "register_operand" "Ucj")]
737 SME_READ))]
738 "TARGET_STREAMING_SME"
739 "mova\t%0.q, %2/m, za%3<hv>.q[%w4, 0]"
740)
741
742(define_insn "@aarch64_sme_<optab><v_int_container><mode>"
743 [(set (reg:<V_INT_CONTAINER> ZA_REGNUM)
744 (unspec:<V_INT_CONTAINER>
745 [(reg:SVE_FULL ZA_REGNUM)
746 (reg:DI SME_STATE_REGNUM)
747 (match_operand:DI 0 "const_int_operand")
748 (match_operand:SI 1 "register_operand" "Ucj")
749 (match_operand:<VPRED> 2 "register_operand" "Upl")
750 (match_operand:SVE_FULL 3 "register_operand" "w")]
751 SME_WRITE))]
752 "TARGET_STREAMING_SME"
753 "mova\tza%0<hv>.<Vetype>[%w1, 0], %2/m, %3.<Vetype>"
754)
755
756(define_insn "*aarch64_sme_<optab><v_int_container><mode>_plus"
757 [(set (reg:<V_INT_CONTAINER> ZA_REGNUM)
758 (unspec:<V_INT_CONTAINER>
759 [(reg:SVE_FULL ZA_REGNUM)
760 (reg:DI SME_STATE_REGNUM)
761 (match_operand:DI 0 "const_int_operand")
762 (plus:SI (match_operand:SI 1 "register_operand" "Ucj")
763 (match_operand:SI 2 "const_int_operand"))
764 (match_operand:<VPRED> 3 "register_operand" "Upl")
765 (match_operand:SVE_FULL 4 "register_operand" "w")]
766 SME_WRITE))]
767 "TARGET_STREAMING_SME
768 && UINTVAL (operands[2]) < 128 / <elem_bits>"
769 "mova\tza%0<hv>.<Vetype>[%w1, %2], %3/m, %4.<Vetype>"
770)
771
772(define_insn "@aarch64_sme_<optab><VNx1TI_ONLY:mode><SVE_FULL:mode>"
773 [(set (reg:VNx1TI_ONLY ZA_REGNUM)
774 (unspec:VNx1TI_ONLY
775 [(reg:VNx1TI_ONLY ZA_REGNUM)
776 (reg:DI SME_STATE_REGNUM)
777 (match_operand:DI 0 "const_int_operand")
778 (match_operand:SI 1 "register_operand" "Ucj")
779 (match_operand:VNx2BI 2 "register_operand" "Upl")
780 (match_operand:SVE_FULL 3 "register_operand" "w")]
781 SME_WRITE))]
782 "TARGET_STREAMING_SME"
783 "mova\tza%0<hv>.q[%w1, 0], %2/m, %3.q"
784)
785
c1c267df
RS
786;; -------------------------------------------------------------------------
787;; ---- Multi-vector moves
788;; -------------------------------------------------------------------------
789;; Includes:
790;; - MOVA
791;; -------------------------------------------------------------------------
792
793(define_insn "@aarch64_sme_<optab><mode><mode>"
794 [(set (match_operand:SVE_FULLx24 0 "aligned_register_operand" "=Uw<vector_count>")
795 (unspec:SVE_FULLx24
796 [(reg:SVE_FULLx24 ZA_REGNUM)
797 (reg:DI SME_STATE_REGNUM)
798 (match_operand:DI 1 "const_int_operand")
799 (match_operand:SI 2 "register_operand" "Ucj")]
800 SME_READ))]
801 "TARGET_STREAMING_SME2"
802 {
803 operands[3] = GEN_INT (<vector_count> - 1);
804 return "mova\t%0, za%1<hv>.<Vetype>[%w2, 0:%3]";
805 }
806)
807
808(define_insn "*aarch64_sme_<optab><mode><mode>_plus"
809 [(set (match_operand:SVE_FULLx24 0 "aligned_register_operand" "=Uw<vector_count>")
810 (unspec:SVE_FULLx24
811 [(reg:SVE_FULLx24 ZA_REGNUM)
812 (reg:DI SME_STATE_REGNUM)
813 (match_operand:DI 1 "const_int_operand")
814 (plus:SI
815 (match_operand:SI 2 "register_operand" "Ucj")
816 (match_operand:SI 3 "const_int_operand"))]
817 SME_READ))]
818 "TARGET_STREAMING_SME2
819 && UINTVAL (operands[3]) % <vector_count> == 0
820 && UINTVAL (operands[3]) < 128 / <elem_bits>"
821 {
822 operands[4] = GEN_INT (INTVAL (operands[3]) + <vector_count> - 1);
823 return "mova\t%0, za%1<hv>.<Vetype>[%w2, %3:%4]";
824 }
825)
826
827(define_insn "@aarch64_sme_read<mode>"
828 [(set (match_operand:SVE_DIx24 0 "aligned_register_operand" "=Uw<vector_count>")
829 (unspec:SVE_DIx24
830 [(reg:SVE_DIx24 ZA_REGNUM)
831 (reg:DI SME_STATE_REGNUM)
832 (match_operand:SI 1 "register_operand" "Uci")]
833 UNSPEC_SME_READ))]
834 "TARGET_STREAMING_SME2"
835 "mova\t%0, za.d[%w1, 0, vgx<vector_count>]"
836)
837
838(define_insn "*aarch64_sme_read<mode>_plus"
839 [(set (match_operand:SVE_DIx24 0 "aligned_register_operand" "=Uw<vector_count>")
840 (unspec:SVE_DIx24
841 [(reg:SVE_DIx24 ZA_REGNUM)
842 (reg:DI SME_STATE_REGNUM)
843 (plus:SI (match_operand:SI 1 "register_operand" "Uci")
844 (match_operand:SI 2 "const_0_to_7_operand"))]
845 UNSPEC_SME_READ))]
846 "TARGET_STREAMING_SME2"
847 "mova\t%0, za.d[%w1, %2, vgx<vector_count>]"
848)
849
850(define_insn "@aarch64_sme_<optab><mode><mode>"
851 [(set (reg:SVE_FULLx24 ZA_REGNUM)
852 (unspec:SVE_FULLx24
853 [(reg:SVE_FULLx24 ZA_REGNUM)
854 (reg:DI SME_STATE_REGNUM)
855 (match_operand:DI 0 "const_int_operand")
856 (match_operand:SI 1 "register_operand" "Ucj")
857 (match_operand:SVE_FULLx24 2 "aligned_register_operand" "Uw<vector_count>")]
858 SME_WRITE))]
859 "TARGET_STREAMING_SME2"
860 {
861 operands[3] = GEN_INT (<vector_count> - 1);
862 return "mova\tza%0<hv>.<Vetype>[%w1, 0:%3], %2";
863 }
864)
865
866(define_insn "*aarch64_sme_<optab><mode><mode>_plus"
867 [(set (reg:SVE_FULLx24 ZA_REGNUM)
868 (unspec:SVE_FULLx24
869 [(reg:SVE_FULLx24 ZA_REGNUM)
870 (reg:DI SME_STATE_REGNUM)
871 (match_operand:DI 0 "const_int_operand")
872 (plus:SI
873 (match_operand:SI 1 "register_operand" "Ucj")
874 (match_operand:SI 2 "const_int_operand"))
875 (match_operand:SVE_FULLx24 3 "aligned_register_operand" "Uw<vector_count>")]
876 SME_WRITE))]
877 "TARGET_STREAMING_SME2
878 && UINTVAL (operands[2]) % <vector_count> == 0
879 && UINTVAL (operands[2]) < 128 / <elem_bits>"
880 {
881 operands[4] = GEN_INT (INTVAL (operands[2]) + <vector_count> - 1);
882 return "mova\tza%0<hv>.<Vetype>[%w1, %2:%4], %3";
883 }
884)
885
886(define_insn "@aarch64_sme_write<mode>"
887 [(set (reg:SVE_DIx24 ZA_REGNUM)
888 (unspec:SVE_DIx24
889 [(reg:SVE_DIx24 ZA_REGNUM)
890 (reg:DI SME_STATE_REGNUM)
891 (match_operand:SI 0 "register_operand" "Uci")
892 (match_operand:SVE_DIx24 1 "aligned_register_operand" "Uw<vector_count>")]
893 UNSPEC_SME_READ))]
894 "TARGET_STREAMING_SME2"
895 "mova\tza.d[%w0, 0, vgx<vector_count>], %1"
896)
897
898(define_insn "*aarch64_sme_write<mode>_plus"
899 [(set (reg:SVE_DIx24 ZA_REGNUM)
900 (unspec:SVE_DIx24
901 [(reg:SVE_DIx24 ZA_REGNUM)
902 (reg:DI SME_STATE_REGNUM)
903 (plus:SI (match_operand:SI 0 "register_operand" "Uci")
904 (match_operand:SI 1 "const_0_to_7_operand"))
905 (match_operand:SVE_DIx24 2 "aligned_register_operand" "Uw<vector_count>")]
906 UNSPEC_SME_READ))]
907 "TARGET_STREAMING_SME2"
908 "mova\tza.d[%w0, %1, vgx<vector_count>], %2"
909)
910
4f6ab953
RS
911;; -------------------------------------------------------------------------
912;; ---- Zeroing
913;; -------------------------------------------------------------------------
914;; Includes:
915;; - ZERO
916;; -------------------------------------------------------------------------
917
918(define_c_enum "unspec" [UNSPEC_SME_ZERO])
919
920(define_insn "aarch64_sme_zero_za"
921 [(set (reg:VNx16QI ZA_REGNUM)
922 (unspec:VNx16QI [(reg:VNx16QI ZA_REGNUM)
923 (reg:DI SME_STATE_REGNUM)
924 (match_operand:DI 0 "const_int_operand")]
925 UNSPEC_SME_ZERO))]
926 "TARGET_SME"
927 {
928 return aarch64_output_sme_zero_za (operands[0]);
929 }
930)
931
c1c267df
RS
932(define_insn "aarch64_sme_zero_zt0"
933 [(set (reg:V8DI ZT0_REGNUM)
934 (const_int 0))
935 (use (reg:DI SME_STATE_REGNUM))]
936 "TARGET_SME2"
937 "zero\t{ zt0 }"
938)
939
4f6ab953
RS
940;; =========================================================================
941;; == Binary arithmetic
942;; =========================================================================
943
944;; -------------------------------------------------------------------------
945;; ---- Binary arithmetic on ZA tile
946;; -------------------------------------------------------------------------
947;; Includes:
948;; - ADDHA
949;; - ADDVA
950;; -------------------------------------------------------------------------
951
952(define_insn "@aarch64_sme_<optab><mode>"
953 [(set (reg:SME_ZA_SDI ZA_REGNUM)
954 (unspec:SME_ZA_SDI
955 [(reg:SME_ZA_SDI ZA_REGNUM)
956 (reg:DI SME_STATE_REGNUM)
957 (match_operand:DI 0 "const_int_operand")
958 (match_operand:<VPRED> 1 "register_operand" "Upl")
959 (match_operand:<VPRED> 2 "register_operand" "Upl")
960 (match_operand:SME_ZA_SDI 3 "register_operand" "w")]
961 SME_BINARY_SDI))]
962 "TARGET_STREAMING_SME"
963 "<optab>\tza%0.<Vetype>, %1/m, %2/m, %3.<Vetype>"
964)
965
c1c267df
RS
966;; -------------------------------------------------------------------------
967;; ---- Binary arithmetic on ZA slice
968;; -------------------------------------------------------------------------
969;; Includes:
970;; - ADD
971;; -------------------------------------------------------------------------
972
973(define_insn "@aarch64_sme_<optab><mode>"
974 [(set (reg:SME_ZA_SDIx24 ZA_REGNUM)
975 (unspec:SME_ZA_SDIx24
976 [(reg:SME_ZA_SDIx24 ZA_REGNUM)
977 (reg:DI SME_STATE_REGNUM)
978 (match_operand:SI 0 "register_operand" "Uci")
979 (match_operand:SME_ZA_SDIx24 1 "aligned_register_operand" "Uw<vector_count>")]
980 SME_BINARY_SLICE_SDI))]
981 "TARGET_STREAMING_SME2"
982 "<optab>\tza.<Vetype>[%w0, 0, vgx<vector_count>], %1"
983)
984
985(define_insn "*aarch64_sme_<optab><mode>_plus"
986 [(set (reg:SME_ZA_SDIx24 ZA_REGNUM)
987 (unspec:SME_ZA_SDIx24
988 [(reg:SME_ZA_SDIx24 ZA_REGNUM)
989 (reg:DI SME_STATE_REGNUM)
990 (plus:SI (match_operand:SI 0 "register_operand" "Uci")
991 (match_operand:SI 1 "const_0_to_7_operand"))
992 (match_operand:SME_ZA_SDIx24 2 "aligned_register_operand" "Uw<vector_count>")]
993 SME_BINARY_SLICE_SDI))]
994 "TARGET_STREAMING_SME2"
995 "<optab>\tza.<Vetype>[%w0, %1, vgx<vector_count>], %2"
996)
997
998(define_insn "@aarch64_sme_<optab><mode>"
999 [(set (reg:SME_ZA_SDFx24 ZA_REGNUM)
1000 (unspec:SME_ZA_SDFx24
1001 [(reg:SME_ZA_SDFx24 ZA_REGNUM)
1002 (reg:DI SME_STATE_REGNUM)
1003 (match_operand:SI 0 "register_operand" "Uci")
1004 (match_operand:SME_ZA_SDFx24 1 "aligned_register_operand" "Uw<vector_count>")]
1005 SME_BINARY_SLICE_SDF))]
1006 "TARGET_STREAMING_SME2"
1007 "<optab>\tza.<Vetype>[%w0, 0, vgx<vector_count>], %1"
1008)
1009
1010(define_insn "*aarch64_sme_<optab><mode>_plus"
1011 [(set (reg:SME_ZA_SDFx24 ZA_REGNUM)
1012 (unspec:SME_ZA_SDFx24
1013 [(reg:SME_ZA_SDFx24 ZA_REGNUM)
1014 (reg:DI SME_STATE_REGNUM)
1015 (plus:SI (match_operand:SI 0 "register_operand" "Uci")
1016 (match_operand:SI 1 "const_0_to_7_operand"))
1017 (match_operand:SME_ZA_SDFx24 2 "aligned_register_operand" "Uw<vector_count>")]
1018 SME_BINARY_SLICE_SDF))]
1019 "TARGET_STREAMING_SME2"
1020 "<optab>\tza.<Vetype>[%w0, %1, vgx<vector_count>], %2"
1021)
1022
1023;; -------------------------------------------------------------------------
1024;; ---- Binary arithmetic, writing to ZA slice
1025;; -------------------------------------------------------------------------
1026;; Includes:
1027;; - ADD
1028;; - SUB
1029;; -------------------------------------------------------------------------
1030
1031(define_insn "@aarch64_sme_<optab><mode>"
1032 [(set (reg:SME_ZA_SDIx24 ZA_REGNUM)
1033 (unspec:SME_ZA_SDIx24
1034 [(reg:SME_ZA_SDIx24 ZA_REGNUM)
1035 (reg:DI SME_STATE_REGNUM)
1036 (match_operand:SI 0 "register_operand" "Uci")
1037 (match_operand:SME_ZA_SDIx24 1 "aligned_register_operand" "Uw<vector_count>")
1038 (match_operand:SME_ZA_SDIx24 2 "aligned_register_operand" "Uw<vector_count>")]
1039 SME_BINARY_WRITE_SLICE_SDI))]
1040 "TARGET_STREAMING_SME2"
1041 "<sme_int_op>\tza.<Vetype>[%w0, 0, vgx<vector_count>], %1, %2"
1042)
1043
1044(define_insn "*aarch64_sme_<optab><mode>_plus"
1045 [(set (reg:SME_ZA_SDIx24 ZA_REGNUM)
1046 (unspec:SME_ZA_SDIx24
1047 [(reg:SME_ZA_SDIx24 ZA_REGNUM)
1048 (reg:DI SME_STATE_REGNUM)
1049 (plus:SI (match_operand:SI 0 "register_operand" "Uci")
1050 (match_operand:SI 1 "const_0_to_7_operand"))
1051 (match_operand:SME_ZA_SDIx24 2 "aligned_register_operand" "Uw<vector_count>")
1052 (match_operand:SME_ZA_SDIx24 3 "aligned_register_operand" "Uw<vector_count>")]
1053 SME_BINARY_WRITE_SLICE_SDI))]
1054 "TARGET_STREAMING_SME2"
1055 "<sme_int_op>\tza.<Vetype>[%w0, %1, vgx<vector_count>], %2, %3"
1056)
1057
1058(define_insn "@aarch64_sme_single_<optab><mode>"
1059 [(set (reg:SME_ZA_SDIx24 ZA_REGNUM)
1060 (unspec:SME_ZA_SDIx24
1061 [(reg:SME_ZA_SDIx24 ZA_REGNUM)
1062 (reg:DI SME_STATE_REGNUM)
1063 (match_operand:SI 0 "register_operand" "Uci")
1064 (match_operand:SME_ZA_SDIx24 1 "register_operand" "w")
1065 (vec_duplicate:SME_ZA_SDIx24
1066 (match_operand:<VSINGLE> 2 "register_operand" "x"))]
1067 SME_BINARY_WRITE_SLICE_SDI))]
1068 "TARGET_STREAMING_SME2"
1069 "<sme_int_op>\tza.<Vetype>[%w0, 0, vgx<vector_count>], %1, %2.<Vetype>"
1070)
1071
1072(define_insn "*aarch64_sme_single_<optab><mode>_plus"
1073 [(set (reg:SME_ZA_SDIx24 ZA_REGNUM)
1074 (unspec:SME_ZA_SDIx24
1075 [(reg:SME_ZA_SDIx24 ZA_REGNUM)
1076 (reg:DI SME_STATE_REGNUM)
1077 (plus:SI (match_operand:SI 0 "register_operand" "Uci")
1078 (match_operand:SI 1 "const_0_to_7_operand"))
1079 (match_operand:SME_ZA_SDIx24 2 "register_operand" "w")
1080 (vec_duplicate:SME_ZA_SDIx24
1081 (match_operand:<VSINGLE> 3 "register_operand" "x"))]
1082 SME_BINARY_WRITE_SLICE_SDI))]
1083 "TARGET_STREAMING_SME2"
1084 "<sme_int_op>\tza.<Vetype>[%w0, %1, vgx<vector_count>], %2, %3.<Vetype>"
1085)
1086
4f6ab953
RS
1087;; =========================================================================
1088;; == Ternary arithmetic
1089;; =========================================================================
1090
1091;; -------------------------------------------------------------------------
c1c267df
RS
1092;; ---- [INT] Dot product
1093;; -------------------------------------------------------------------------
1094;; Includes:
1095;; - SDOT
1096;; - SUDOT
1097;; - UDOT
1098;; - USDOT
1099;; -------------------------------------------------------------------------
1100
1101(define_insn "@aarch64_sme_<optab><SME_ZA_SDI:mode><SME_ZA_BHIx24:mode>"
1102 [(set (reg:SME_ZA_SDI ZA_REGNUM)
1103 (unspec:SME_ZA_SDI
1104 [(reg:SME_ZA_SDI ZA_REGNUM)
1105 (reg:DI SME_STATE_REGNUM)
1106 (match_operand:SI 0 "register_operand" "Uci")
1107 (match_operand:SME_ZA_BHIx24 1 "aligned_register_operand" "Uw<vector_count>")
1108 (match_operand:SME_ZA_BHIx24 2 "aligned_register_operand" "Uw<vector_count>")]
1109 SME_INT_DOTPROD))]
1110 "TARGET_STREAMING_SME2
1111 && (<SME_ZA_SDI:elem_bits> == 32 || <SME_ZA_BHIx24:elem_bits> == 16)
1112 && (<SME_ZA_BHIx24:elem_bits> == 8 || <has_16bit_form>)"
1113 "<optab>\tza.<SME_ZA_SDI:Vetype>[%w0, 0, vgx<vector_count>], %1, %2"
1114)
1115
1116(define_insn "*aarch64_sme_<optab><SME_ZA_SDI:mode><SME_ZA_BHIx24:mode>_plus"
1117 [(set (reg:SME_ZA_SDI ZA_REGNUM)
1118 (unspec:SME_ZA_SDI
1119 [(reg:SME_ZA_SDI ZA_REGNUM)
1120 (reg:DI SME_STATE_REGNUM)
1121 (plus:SI (match_operand:SI 0 "register_operand" "Uci")
1122 (match_operand:SI 1 "const_0_to_7_operand"))
1123 (match_operand:SME_ZA_BHIx24 2 "aligned_register_operand" "Uw<vector_count>")
1124 (match_operand:SME_ZA_BHIx24 3 "aligned_register_operand" "Uw<vector_count>")]
1125 SME_INT_DOTPROD))]
1126 "TARGET_STREAMING_SME2
1127 && (<SME_ZA_SDI:elem_bits> == 32 || <SME_ZA_BHIx24:elem_bits> == 16)
1128 && (<SME_ZA_BHIx24:elem_bits> == 8 || <has_16bit_form>)"
1129 "<optab>\tza.<SME_ZA_SDI:Vetype>[%w0, %1, vgx<vector_count>], %2, %3"
1130)
1131
1132(define_insn "@aarch64_sme_single_<optab><SME_ZA_SDI:mode><SME_ZA_BHIx24:mode>"
1133 [(set (reg:SME_ZA_SDI ZA_REGNUM)
1134 (unspec:SME_ZA_SDI
1135 [(reg:SME_ZA_SDI ZA_REGNUM)
1136 (reg:DI SME_STATE_REGNUM)
1137 (match_operand:SI 0 "register_operand" "Uci")
1138 (match_operand:SME_ZA_BHIx24 1 "register_operand" "w")
1139 (vec_duplicate:SME_ZA_BHIx24
1140 (match_operand:<VSINGLE> 2 "register_operand" "x"))]
1141 SME_INT_DOTPROD))]
1142 "TARGET_STREAMING_SME2
1143 && (<SME_ZA_SDI:elem_bits> == 32 || <SME_ZA_BHIx24:elem_bits> == 16)
1144 && (<SME_ZA_BHIx24:elem_bits> == 8 || <has_16bit_form>)"
1145 "<optab>\tza.<SME_ZA_SDI:Vetype>[%w0, 0, vgx<vector_count>], %1, %2.<SME_ZA_BHIx24:Vetype>"
1146)
1147
1148(define_insn "*aarch64_sme_single_<optab><SME_ZA_SDI:mode><SME_ZA_BHIx24:mode>_plus"
1149 [(set (reg:SME_ZA_SDI ZA_REGNUM)
1150 (unspec:SME_ZA_SDI
1151 [(reg:SME_ZA_SDI ZA_REGNUM)
1152 (reg:DI SME_STATE_REGNUM)
1153 (plus:SI (match_operand:SI 0 "register_operand" "Uci")
1154 (match_operand:SI 1 "const_0_to_7_operand"))
1155 (match_operand:SME_ZA_BHIx24 2 "register_operand" "w")
1156 (vec_duplicate:SME_ZA_BHIx24
1157 (match_operand:<VSINGLE> 3 "register_operand" "x"))]
1158 SME_INT_DOTPROD))]
1159 "TARGET_STREAMING_SME2
1160 && (<SME_ZA_SDI:elem_bits> == 32 || <SME_ZA_BHIx24:elem_bits> == 16)
1161 && (<SME_ZA_BHIx24:elem_bits> == 8 || <has_16bit_form>)"
1162 "<optab>\tza.<SME_ZA_SDI:Vetype>[%w0, %1, vgx<vector_count>], %2, %3.<SME_ZA_BHIx24:Vetype>"
1163)
1164
1165;; SUDOT is USDOT with the operands swapped.
1166(define_insn "@aarch64_sme_single_sudot<VNx4SI_ONLY:mode><SME_ZA_BIx24:mode>"
1167 [(set (reg:VNx4SI_ONLY ZA_REGNUM)
1168 (unspec:VNx4SI_ONLY
1169 [(reg:VNx4SI_ONLY ZA_REGNUM)
1170 (reg:DI SME_STATE_REGNUM)
1171 (match_operand:SI 0 "register_operand" "Uci")
1172 (vec_duplicate:SME_ZA_BIx24
1173 (match_operand:<VSINGLE> 2 "register_operand" "x"))
1174 (match_operand:SME_ZA_BIx24 1 "register_operand" "w")]
1175 UNSPEC_SME_USDOT))]
1176 "TARGET_STREAMING_SME2"
1177 "sudot\tza.s[%w0, 0, vgx<vector_count>], %1, %2.b"
1178)
1179
1180(define_insn "*aarch64_sme_single_sudot<VNx4SI_ONLY:mode><SME_ZA_BIx24:mode>_plus"
1181 [(set (reg:VNx4SI_ONLY ZA_REGNUM)
1182 (unspec:VNx4SI_ONLY
1183 [(reg:VNx4SI_ONLY ZA_REGNUM)
1184 (reg:DI SME_STATE_REGNUM)
1185 (plus:SI (match_operand:SI 0 "register_operand" "Uci")
1186 (match_operand:SI 1 "const_0_to_7_operand"))
1187 (vec_duplicate:SME_ZA_BIx24
1188 (match_operand:<VSINGLE> 3 "register_operand" "x"))
1189 (match_operand:SME_ZA_BIx24 2 "register_operand" "w")]
1190 UNSPEC_SME_USDOT))]
1191 "TARGET_STREAMING_SME2"
1192 "sudot\tza.s[%w0, %1, vgx<vector_count>], %2, %3.b"
1193)
1194
1195(define_insn "@aarch64_sme_lane_<optab><SME_ZA_SDI:mode><SME_ZA_BHIx24:mode>"
1196 [(set (reg:SME_ZA_SDI ZA_REGNUM)
1197 (unspec:SME_ZA_SDI
1198 [(reg:SME_ZA_SDI ZA_REGNUM)
1199 (reg:DI SME_STATE_REGNUM)
1200 (match_operand:SI 0 "register_operand" "Uci")
1201 (match_operand:SME_ZA_BHIx24 1 "aligned_register_operand" "Uw<vector_count>")
1202 (unspec:SME_ZA_BHIx24
1203 [(match_operand:<VSINGLE> 2 "register_operand" "x")
1204 (match_operand:SI 3 "const_int_operand")]
1205 UNSPEC_SVE_LANE_SELECT)]
1206 SME_INT_DOTPROD_LANE))]
1207 "TARGET_STREAMING_SME2
1208 && (<SME_ZA_SDI:elem_bits> == 32 || <SME_ZA_BHIx24:elem_bits> == 16)
1209 && (<SME_ZA_BHIx24:elem_bits> == 8 || <has_16bit_form>)"
1210 "<optab>\tza.<SME_ZA_SDI:Vetype>[%w0, 0, vgx<vector_count>], %1, %2.<SME_ZA_BHIx24:Vetype>[%3]"
1211)
1212
1213(define_insn "*aarch64_sme_lane_<optab><SME_ZA_SDI:mode><SME_ZA_BHIx24:mode>_plus"
1214 [(set (reg:SME_ZA_SDI ZA_REGNUM)
1215 (unspec:SME_ZA_SDI
1216 [(reg:SME_ZA_SDI ZA_REGNUM)
1217 (reg:DI SME_STATE_REGNUM)
1218 (plus:SI (match_operand:SI 0 "register_operand" "Uci")
1219 (match_operand:SI 1 "const_0_to_7_operand"))
1220 (match_operand:SME_ZA_BHIx24 2 "aligned_register_operand" "Uw<vector_count>")
1221 (unspec:SME_ZA_BHIx24
1222 [(match_operand:<VSINGLE> 3 "register_operand" "x")
1223 (match_operand:SI 4 "const_int_operand")]
1224 UNSPEC_SVE_LANE_SELECT)]
1225 SME_INT_DOTPROD_LANE))]
1226 "TARGET_STREAMING_SME2
1227 && (<SME_ZA_SDI:elem_bits> == 32 || <SME_ZA_BHIx24:elem_bits> == 16)
1228 && (<SME_ZA_BHIx24:elem_bits> == 8 || <has_16bit_form>)"
1229 "<optab>\tza.<SME_ZA_SDI:Vetype>[%w0, %1, vgx<vector_count>], %2, %3.<SME_ZA_BHIx24:Vetype>[%4]"
1230)
1231
1232;; -------------------------------------------------------------------------
1233;; ---- [INT] Ternary widening arithmetic on ZA slice
4f6ab953
RS
1234;; -------------------------------------------------------------------------
1235;; Includes:
c1c267df
RS
1236;; - SMLA
1237;; - SMLS
1238;; - UMLA
1239;; - UMLS
1240;; -------------------------------------------------------------------------
1241
1242(define_insn "@aarch64_sme_<optab><VNx4SI_ONLY:mode><SVE_FULL_BHI:mode>"
1243 [(set (reg:VNx4SI_ONLY ZA_REGNUM)
1244 (unspec:VNx4SI_ONLY
1245 [(reg:VNx4SI_ONLY ZA_REGNUM)
1246 (reg:DI SME_STATE_REGNUM)
1247 (match_operand:SI 0 "register_operand" "Uci")
1248 (match_operand:SVE_FULL_BHI 1 "register_operand" "w")
1249 (match_operand:SVE_FULL_BHI 2 "register_operand" "x")]
1250 SME_INT_TERNARY_SLICE))]
1251 "TARGET_STREAMING_SME2"
1252 "<optab><za32_long>\tza.s[%w0, 0:<za32_last_offset>], %1.<SVE_FULL_BHI:Vetype>, %2.<SVE_FULL_BHI:Vetype>"
1253)
1254
1255(define_insn "*aarch64_sme_<optab><VNx4SI_ONLY:mode><SVE_FULL_BHI:mode>_plus"
1256 [(set (reg:VNx4SI_ONLY ZA_REGNUM)
1257 (unspec:VNx4SI_ONLY
1258 [(reg:VNx4SI_ONLY ZA_REGNUM)
1259 (reg:DI SME_STATE_REGNUM)
1260 (plus:SI (match_operand:SI 0 "register_operand" "Uci")
1261 (match_operand:SI 1 "const_<za32_offset_range>_operand"))
1262 (match_operand:SVE_FULL_BHI 2 "register_operand" "w")
1263 (match_operand:SVE_FULL_BHI 3 "register_operand" "x")]
1264 SME_INT_TERNARY_SLICE))]
1265 "TARGET_STREAMING_SME2"
1266 {
1267 operands[4] = GEN_INT (INTVAL (operands[1]) + <za32_last_offset>);
1268 return "<optab><za32_long>\tza.s[%w0, %1:%4], %2.<SVE_FULL_BHI:Vetype>, %3.<SVE_FULL_BHI:Vetype>";
1269 }
1270)
1271
1272(define_insn "@aarch64_sme_<optab><VNx4SI_ONLY:mode><SME_ZA_BHIx24:mode>"
1273 [(set (reg:VNx4SI_ONLY ZA_REGNUM)
1274 (unspec:VNx4SI_ONLY
1275 [(reg:VNx4SI_ONLY ZA_REGNUM)
1276 (reg:DI SME_STATE_REGNUM)
1277 (match_operand:SI 0 "register_operand" "Uci")
1278 (match_operand:SME_ZA_BHIx24 1 "aligned_register_operand" "Uw<vector_count>")
1279 (match_operand:SME_ZA_BHIx24 2 "aligned_register_operand" "Uw<vector_count>")]
1280 SME_INT_TERNARY_SLICE))]
1281 "TARGET_STREAMING_SME2"
1282 "<optab><za32_long>\tza.s[%w0, 0:<za32_last_offset>, vgx<vector_count>], %1, %2"
1283)
1284
1285(define_insn "*aarch64_sme_<optab><VNx4SI_ONLY:mode><SME_ZA_BHIx24:mode>_plus"
1286 [(set (reg:VNx4SI_ONLY ZA_REGNUM)
1287 (unspec:VNx4SI_ONLY
1288 [(reg:VNx4SI_ONLY ZA_REGNUM)
1289 (reg:DI SME_STATE_REGNUM)
1290 (plus:SI (match_operand:SI 0 "register_operand" "Uci")
1291 (match_operand:SI 1 "const_<za32_offset_range>_operand"))
1292 (match_operand:SME_ZA_BHIx24 2 "aligned_register_operand" "Uw<vector_count>")
1293 (match_operand:SME_ZA_BHIx24 3 "aligned_register_operand" "Uw<vector_count>")]
1294 SME_INT_TERNARY_SLICE))]
1295 "TARGET_STREAMING_SME2"
1296 {
1297 operands[4] = GEN_INT (INTVAL (operands[1]) + <za32_last_offset>);
1298 return "<optab><za32_long>\tza.s[%w0, %1:%4, vgx<vector_count>], %2, %3";
1299 }
1300)
1301
1302(define_insn "@aarch64_sme_single_<optab><VNx4SI_ONLY:mode><SME_ZA_BHIx24:mode>"
1303 [(set (reg:VNx4SI_ONLY ZA_REGNUM)
1304 (unspec:VNx4SI_ONLY
1305 [(reg:VNx4SI_ONLY ZA_REGNUM)
1306 (reg:DI SME_STATE_REGNUM)
1307 (match_operand:SI 0 "register_operand" "Uci")
1308 (match_operand:SME_ZA_BHIx24 1 "register_operand" "w")
1309 (vec_duplicate:SME_ZA_BHIx24
1310 (match_operand:<SME_ZA_BHIx24:VSINGLE> 2 "register_operand" "x"))]
1311 SME_INT_TERNARY_SLICE))]
1312 "TARGET_STREAMING_SME2"
1313 "<optab><za32_long>\tza.s[%w0, 0:<za32_last_offset>, vgx<vector_count>], %1, %2.<SME_ZA_BHIx24:Vetype>"
1314)
1315
1316(define_insn "*aarch64_sme_single_<optab><VNx4SI_ONLY:mode><SME_ZA_BHIx24:mode>_plus"
1317 [(set (reg:VNx4SI_ONLY ZA_REGNUM)
1318 (unspec:VNx4SI_ONLY
1319 [(reg:VNx4SI_ONLY ZA_REGNUM)
1320 (reg:DI SME_STATE_REGNUM)
1321 (plus:SI (match_operand:SI 0 "register_operand" "Uci")
1322 (match_operand:SI 1 "const_<za32_offset_range>_operand"))
1323 (match_operand:SME_ZA_BHIx24 2 "register_operand" "w")
1324 (vec_duplicate:SME_ZA_BHIx24
1325 (match_operand:<SME_ZA_BHIx24:VSINGLE> 3 "register_operand" "x"))]
1326 SME_INT_TERNARY_SLICE))]
1327 "TARGET_STREAMING_SME2"
1328 {
1329 operands[4] = GEN_INT (INTVAL (operands[1]) + <za32_last_offset>);
1330 return "<optab><za32_long>\tza.s[%w0, %1:%4, vgx<vector_count>], %2, %3.<SME_ZA_BHIx24:Vetype>";
1331 }
1332)
1333
1334(define_insn "@aarch64_sme_lane_<optab><VNx4SI_ONLY:mode><SME_ZA_BHIx124:mode>"
1335 [(set (reg:VNx4SI_ONLY ZA_REGNUM)
1336 (unspec:VNx4SI_ONLY
1337 [(reg:VNx4SI_ONLY ZA_REGNUM)
1338 (reg:DI SME_STATE_REGNUM)
1339 (match_operand:SI 0 "register_operand" "Uci")
1340 (match_operand:SME_ZA_BHIx124 1 "<aligned_operand>" "<aligned_fpr>")
1341 (unspec:SME_ZA_BHIx124
1342 [(match_operand:<VSINGLE> 2 "register_operand" "x")
1343 (match_operand:SI 3 "const_int_operand")]
1344 UNSPEC_SVE_LANE_SELECT)]
1345 SME_INT_TERNARY_SLICE))]
1346 "TARGET_STREAMING_SME2"
1347 "<optab><za32_long>\tza.s[%w0, 0:<za32_last_offset><vg_modifier>], %1<z_suffix>, %2.<SME_ZA_BHIx124:Vetype>[%3]"
1348)
1349
1350(define_insn "*aarch64_sme_lane_<optab><VNx4SI_ONLY:mode><SME_ZA_BHIx124:mode>"
1351 [(set (reg:VNx4SI_ONLY ZA_REGNUM)
1352 (unspec:VNx4SI_ONLY
1353 [(reg:VNx4SI_ONLY ZA_REGNUM)
1354 (reg:DI SME_STATE_REGNUM)
1355 (plus:SI (match_operand:SI 0 "register_operand" "Uci")
1356 (match_operand:SI 1 "const_<za32_offset_range>_operand"))
1357 (match_operand:SME_ZA_BHIx124 2 "<aligned_operand>" "<aligned_fpr>")
1358 (unspec:SME_ZA_BHIx124
1359 [(match_operand:<VSINGLE> 3 "register_operand" "x")
1360 (match_operand:SI 4 "const_int_operand")]
1361 UNSPEC_SVE_LANE_SELECT)]
1362 SME_INT_TERNARY_SLICE))]
1363 "TARGET_STREAMING_SME2"
1364 {
1365 operands[5] = GEN_INT (INTVAL (operands[1]) + <za32_last_offset>);
1366 return "<optab><za32_long>\tza.s[%w0, %1:%5<vg_modifier>], %2<z_suffix>, %3.<SME_ZA_BHIx124:Vetype>[%4]";
1367 }
1368)
1369
1370(define_insn "@aarch64_sme_<optab><VNx2DI_ONLY:mode><VNx8HI_ONLY:mode>"
1371 [(set (reg:VNx2DI_ONLY ZA_REGNUM)
1372 (unspec:VNx2DI_ONLY
1373 [(reg:VNx2DI_ONLY ZA_REGNUM)
1374 (reg:DI SME_STATE_REGNUM)
1375 (match_operand:SI 0 "register_operand" "Uci")
1376 (match_operand:VNx8HI_ONLY 1 "register_operand" "w")
1377 (match_operand:VNx8HI_ONLY 2 "register_operand" "x")]
1378 SME_INT_TERNARY_SLICE))]
1379 "TARGET_SME2 && TARGET_SME_I16I64 && TARGET_STREAMING_SME"
1380 "<optab>ll\tza.d[%w0, 0:3], %1.h, %2.h"
1381)
1382
1383(define_insn "*aarch64_sme_<optab><VNx2DI_ONLY:mode><VNx8HI_ONLY:mode>_plus"
1384 [(set (reg:VNx2DI_ONLY ZA_REGNUM)
1385 (unspec:VNx2DI_ONLY
1386 [(reg:VNx2DI_ONLY ZA_REGNUM)
1387 (reg:DI SME_STATE_REGNUM)
1388 (plus:SI (match_operand:SI 0 "register_operand" "Uci")
1389 (match_operand:SI 1 "const_<za64_offset_range>_operand"))
1390 (match_operand:VNx8HI_ONLY 2 "register_operand" "w")
1391 (match_operand:VNx8HI_ONLY 3 "register_operand" "x")]
1392 SME_INT_TERNARY_SLICE))]
1393 "TARGET_SME2 && TARGET_SME_I16I64 && TARGET_STREAMING_SME"
1394 {
1395 operands[4] = GEN_INT (INTVAL (operands[1]) + 3);
1396 return "<optab>ll\tza.d[%w0, %1:%4], %2.h, %3.h";
1397 }
1398)
1399
1400(define_insn "@aarch64_sme_<optab><VNx2DI_ONLY:mode><SME_ZA_HIx24:mode>"
1401 [(set (reg:VNx2DI_ONLY ZA_REGNUM)
1402 (unspec:VNx2DI_ONLY
1403 [(reg:VNx2DI_ONLY ZA_REGNUM)
1404 (reg:DI SME_STATE_REGNUM)
1405 (match_operand:SI 0 "register_operand" "Uci")
1406 (match_operand:SME_ZA_HIx24 1 "aligned_register_operand" "Uw<vector_count>")
1407 (match_operand:SME_ZA_HIx24 2 "aligned_register_operand" "Uw<vector_count>")]
1408 SME_INT_TERNARY_SLICE))]
1409 "TARGET_SME2 && TARGET_SME_I16I64 && TARGET_STREAMING_SME"
1410 "<optab>ll\tza.d[%w0, 0:3, vgx<vector_count>], %1, %2"
1411)
1412
1413(define_insn "*aarch64_sme_<optab><VNx2DI_ONLY:mode><SME_ZA_HIx24:mode>_plus"
1414 [(set (reg:VNx2DI_ONLY ZA_REGNUM)
1415 (unspec:VNx2DI_ONLY
1416 [(reg:VNx2DI_ONLY ZA_REGNUM)
1417 (reg:DI SME_STATE_REGNUM)
1418 (plus:SI (match_operand:SI 0 "register_operand" "Uci")
1419 (match_operand:SI 1 "const_<za64_offset_range>_operand"))
1420 (match_operand:SME_ZA_HIx24 2 "aligned_register_operand" "Uw<vector_count>")
1421 (match_operand:SME_ZA_HIx24 3 "aligned_register_operand" "Uw<vector_count>")]
1422 SME_INT_TERNARY_SLICE))]
1423 "TARGET_SME2 && TARGET_SME_I16I64 && TARGET_STREAMING_SME"
1424 {
1425 operands[4] = GEN_INT (INTVAL (operands[1]) + 3);
1426 return "<optab>ll\tza.d[%w0, %1:%4, vgx<vector_count>], %2, %3";
1427 }
1428)
1429
1430(define_insn "@aarch64_sme_single_<optab><VNx2DI_ONLY:mode><SME_ZA_HIx24:mode>"
1431 [(set (reg:VNx2DI_ONLY ZA_REGNUM)
1432 (unspec:VNx2DI_ONLY
1433 [(reg:VNx2DI_ONLY ZA_REGNUM)
1434 (reg:DI SME_STATE_REGNUM)
1435 (match_operand:SI 0 "register_operand" "Uci")
1436 (match_operand:SME_ZA_HIx24 1 "register_operand" "w")
1437 (vec_duplicate:SME_ZA_HIx24
1438 (match_operand:<SME_ZA_HIx24:VSINGLE> 2 "register_operand" "x"))]
1439 SME_INT_TERNARY_SLICE))]
1440 "TARGET_SME2 && TARGET_SME_I16I64 && TARGET_STREAMING_SME"
1441 "<optab>ll\tza.d[%w0, 0:3, vgx<vector_count>], %1, %2.h"
1442)
1443
1444(define_insn "*aarch64_sme_single_<optab><VNx2DI_ONLY:mode><SME_ZA_HIx24:mode>_plus"
1445 [(set (reg:VNx2DI_ONLY ZA_REGNUM)
1446 (unspec:VNx2DI_ONLY
1447 [(reg:VNx2DI_ONLY ZA_REGNUM)
1448 (reg:DI SME_STATE_REGNUM)
1449 (plus:SI (match_operand:SI 0 "register_operand" "Uci")
1450 (match_operand:SI 1 "const_<za64_offset_range>_operand"))
1451 (match_operand:SME_ZA_HIx24 2 "register_operand" "w")
1452 (vec_duplicate:SME_ZA_HIx24
1453 (match_operand:<SME_ZA_HIx24:VSINGLE> 3 "register_operand" "x"))]
1454 SME_INT_TERNARY_SLICE))]
1455 "TARGET_SME2 && TARGET_SME_I16I64 && TARGET_STREAMING_SME"
1456 {
1457 operands[4] = GEN_INT (INTVAL (operands[1]) + 3);
1458 return "<optab>ll\tza.d[%w0, %1:%4, vgx<vector_count>], %2, %3.h";
1459 }
1460)
1461
1462(define_insn "@aarch64_sme_lane_<optab><VNx2DI_ONLY:mode><SME_ZA_HIx124:mode>"
1463 [(set (reg:VNx2DI_ONLY ZA_REGNUM)
1464 (unspec:VNx2DI_ONLY
1465 [(reg:VNx2DI_ONLY ZA_REGNUM)
1466 (reg:DI SME_STATE_REGNUM)
1467 (match_operand:SI 0 "register_operand" "Uci")
1468 (match_operand:SME_ZA_HIx124 1 "<aligned_operand>" "<aligned_fpr>")
1469 (unspec:SME_ZA_HIx124
1470 [(match_operand:<VSINGLE> 2 "register_operand" "x")
1471 (match_operand:SI 3 "const_int_operand")]
1472 UNSPEC_SVE_LANE_SELECT)]
1473 SME_INT_TERNARY_SLICE))]
1474 "TARGET_SME2 && TARGET_SME_I16I64 && TARGET_STREAMING_SME"
1475 "<optab>ll\tza.d[%w0, 0:3<vg_modifier>], %1<z_suffix>, %2.h[%3]"
1476)
1477
1478(define_insn "*aarch64_sme_lane_<optab><VNx2DI_ONLY:mode><SME_ZA_HIx124:mode>"
1479 [(set (reg:VNx2DI_ONLY ZA_REGNUM)
1480 (unspec:VNx2DI_ONLY
1481 [(reg:VNx2DI_ONLY ZA_REGNUM)
1482 (reg:DI SME_STATE_REGNUM)
1483 (plus:SI (match_operand:SI 0 "register_operand" "Uci")
1484 (match_operand:SI 1 "const_<za64_offset_range>_operand"))
1485 (match_operand:SME_ZA_HIx124 2 "<aligned_operand>" "<aligned_fpr>")
1486 (unspec:SME_ZA_HIx124
1487 [(match_operand:<VSINGLE> 3 "register_operand" "x")
1488 (match_operand:SI 4 "const_int_operand")]
1489 UNSPEC_SVE_LANE_SELECT)]
1490 SME_INT_TERNARY_SLICE))]
1491 "TARGET_SME2 && TARGET_SME_I16I64 && TARGET_STREAMING_SME"
1492 {
1493 operands[5] = GEN_INT (INTVAL (operands[1]) + 3);
1494 return "<optab>ll\tza.d[%w0, %1:%5<vg_modifier>], %2<z_suffix>, %3.h[%4]";
1495 }
1496)
1497
1498;; -------------------------------------------------------------------------
1499;; ---- [INT] Sum of outer products
1500;; -------------------------------------------------------------------------
1501;; - BMOPA
1502;; - BMOPS
4f6ab953
RS
1503;; - SMOPA
1504;; - SMOPS
1505;; - SUMOPA
1506;; - SUMOPS
1507;; - UMOPA
1508;; - UMOPS
1509;; - USMOPA
1510;; - USMOPS
1511;; -------------------------------------------------------------------------
1512
1513(define_insn "@aarch64_sme_<optab><VNx4SI_ONLY:mode><VNx16QI_ONLY:mode>"
1514 [(set (reg:VNx4SI_ONLY ZA_REGNUM)
1515 (unspec:VNx4SI_ONLY
1516 [(reg:VNx4SI_ONLY ZA_REGNUM)
1517 (reg:DI SME_STATE_REGNUM)
1518 (match_operand:DI 0 "const_int_operand")
1519 (match_operand:<VNx4SI_ONLY:VPRED> 1 "register_operand" "Upl")
1520 (match_operand:<VNx4SI_ONLY:VPRED> 2 "register_operand" "Upl")
1521 (match_operand:VNx16QI_ONLY 3 "register_operand" "w")
1522 (match_operand:VNx16QI_ONLY 4 "register_operand" "w")]
1523 SME_INT_MOP))]
1524 "TARGET_STREAMING_SME"
1525 "<optab>\tza%0.s, %1/m, %2/m, %3.b, %4.b"
1526)
1527
1528(define_insn "@aarch64_sme_<optab><VNx2DI_ONLY:mode><VNx8HI_ONLY:mode>"
1529 [(set (reg:VNx2DI_ONLY ZA_REGNUM)
1530 (unspec:VNx2DI_ONLY
1531 [(reg:VNx2DI_ONLY ZA_REGNUM)
1532 (reg:DI SME_STATE_REGNUM)
1533 (match_operand:DI 0 "const_int_operand")
1534 (match_operand:<VNx2DI_ONLY:VPRED> 1 "register_operand" "Upl")
1535 (match_operand:<VNx2DI_ONLY:VPRED> 2 "register_operand" "Upl")
1536 (match_operand:VNx8HI_ONLY 3 "register_operand" "w")
1537 (match_operand:VNx8HI_ONLY 4 "register_operand" "w")]
1538 SME_INT_MOP))]
1539 "TARGET_STREAMING_SME && TARGET_SME_I16I64"
1540 "<optab>\tza%0.d, %1/m, %2/m, %3.h, %4.h"
1541)
1542
c1c267df
RS
1543(define_insn "@aarch64_sme_<optab><VNx4SI_ONLY:mode><VNx8HI_ONLY:mode>"
1544 [(set (reg:VNx4SI_ONLY ZA_REGNUM)
1545 (unspec:VNx4SI_ONLY
1546 [(reg:VNx4SI_ONLY ZA_REGNUM)
1547 (reg:DI SME_STATE_REGNUM)
1548 (match_operand:DI 0 "const_int_operand")
1549 (match_operand:<VNx4SI_ONLY:VPRED> 1 "register_operand" "Upl")
1550 (match_operand:<VNx4SI_ONLY:VPRED> 2 "register_operand" "Upl")
1551 (match_operand:VNx8HI_ONLY 3 "register_operand" "w")
1552 (match_operand:VNx8HI_ONLY 4 "register_operand" "w")]
1553 SME2_INT_MOP))]
1554 "TARGET_STREAMING_SME2"
1555 "<optab>\tza%0.s, %1/m, %2/m, %3.h, %4.h"
1556)
1557
1558(define_insn "@aarch64_sme_<optab><VNx4SI_ONLY:mode><VNx4SI_ONLY:mode>"
1559 [(set (reg:VNx4SI_ONLY ZA_REGNUM)
1560 (unspec:VNx4SI_ONLY
1561 [(reg:VNx4SI_ONLY ZA_REGNUM)
1562 (reg:DI SME_STATE_REGNUM)
1563 (match_operand:DI 0 "const_int_operand")
1564 (match_operand:<VNx4SI_ONLY:VPRED> 1 "register_operand" "Upl")
1565 (match_operand:<VNx4SI_ONLY:VPRED> 2 "register_operand" "Upl")
1566 (match_operand:VNx4SI_ONLY 3 "register_operand" "w")
1567 (match_operand:VNx4SI_ONLY 4 "register_operand" "w")]
1568 SME2_BMOP))]
1569 "TARGET_STREAMING_SME2"
1570 "<optab>\tza%0.s, %1/m, %2/m, %3.s, %4.s"
1571)
1572
1573;; -------------------------------------------------------------------------
1574;; ---- [FP] Dot product
1575;; -------------------------------------------------------------------------
1576;; Includes:
1577;; - BFDOT
1578;; - FDOT
1579;; -------------------------------------------------------------------------
1580
1581(define_insn "@aarch64_sme_<optab><VNx4SI_ONLY:mode><SME_ZA_HFx24:mode>"
1582 [(set (reg:VNx4SI_ONLY ZA_REGNUM)
1583 (unspec:VNx4SI_ONLY
1584 [(reg:VNx4SI_ONLY ZA_REGNUM)
1585 (reg:DI SME_STATE_REGNUM)
1586 (match_operand:SI 0 "register_operand" "Uci")
1587 (match_operand:SME_ZA_HFx24 1 "aligned_register_operand" "Uw<vector_count>")
1588 (match_operand:SME_ZA_HFx24 2 "aligned_register_operand" "Uw<vector_count>")]
1589 SME_FP_DOTPROD))]
1590 "TARGET_STREAMING_SME2"
1591 "<b><optab>\tza.s[%w0, 0, vgx<vector_count>], %1, %2"
1592)
1593
1594(define_insn "*aarch64_sme_<optab><VNx4SI_ONLY:mode><SME_ZA_HFx24:mode>_plus"
1595 [(set (reg:VNx4SI_ONLY ZA_REGNUM)
1596 (unspec:VNx4SI_ONLY
1597 [(reg:VNx4SI_ONLY ZA_REGNUM)
1598 (reg:DI SME_STATE_REGNUM)
1599 (plus:SI (match_operand:SI 0 "register_operand" "Uci")
1600 (match_operand:SI 1 "const_0_to_7_operand"))
1601 (match_operand:SME_ZA_HFx24 2 "aligned_register_operand" "Uw<vector_count>")
1602 (match_operand:SME_ZA_HFx24 3 "aligned_register_operand" "Uw<vector_count>")]
1603 SME_FP_DOTPROD))]
1604 "TARGET_STREAMING_SME2"
1605 "<b><optab>\tza.s[%w0, %1, vgx<vector_count>], %2, %3"
1606)
1607
1608(define_insn "@aarch64_sme_single_<optab><VNx4SI_ONLY:mode><SME_ZA_HFx24:mode>"
1609 [(set (reg:VNx4SI_ONLY ZA_REGNUM)
1610 (unspec:VNx4SI_ONLY
1611 [(reg:VNx4SI_ONLY ZA_REGNUM)
1612 (reg:DI SME_STATE_REGNUM)
1613 (match_operand:SI 0 "register_operand" "Uci")
1614 (match_operand:SME_ZA_HFx24 1 "register_operand" "w")
1615 (vec_duplicate:SME_ZA_HFx24
1616 (match_operand:<VSINGLE> 2 "register_operand" "x"))]
1617 SME_FP_DOTPROD))]
1618 "TARGET_STREAMING_SME2"
1619 "<b><optab>\tza.s[%w0, 0, vgx<vector_count>], %1, %2.h"
1620)
1621
1622(define_insn "*aarch64_sme_single_<optab><VNx4SI_ONLY:mode><SME_ZA_HFx24:mode>_plus"
1623 [(set (reg:VNx4SI_ONLY ZA_REGNUM)
1624 (unspec:VNx4SI_ONLY
1625 [(reg:VNx4SI_ONLY ZA_REGNUM)
1626 (reg:DI SME_STATE_REGNUM)
1627 (plus:SI (match_operand:SI 0 "register_operand" "Uci")
1628 (match_operand:SI 1 "const_0_to_7_operand"))
1629 (match_operand:SME_ZA_HFx24 2 "register_operand" "w")
1630 (vec_duplicate:SME_ZA_HFx24
1631 (match_operand:<VSINGLE> 3 "register_operand" "x"))]
1632 SME_FP_DOTPROD))]
1633 "TARGET_STREAMING_SME2"
1634 "<b><optab>\tza.s[%w0, %1, vgx<vector_count>], %2, %3.h"
1635)
1636
1637(define_insn "@aarch64_sme_lane_<optab><VNx4SI_ONLY:mode><SME_ZA_HFx24:mode>"
1638 [(set (reg:VNx4SI_ONLY ZA_REGNUM)
1639 (unspec:VNx4SI_ONLY
1640 [(reg:VNx4SI_ONLY ZA_REGNUM)
1641 (reg:DI SME_STATE_REGNUM)
1642 (match_operand:SI 0 "register_operand" "Uci")
1643 (match_operand:SME_ZA_HFx24 1 "aligned_register_operand" "Uw<vector_count>")
1644 (unspec:SME_ZA_HFx24
1645 [(match_operand:<VSINGLE> 2 "register_operand" "x")
1646 (match_operand:SI 3 "const_int_operand")]
1647 UNSPEC_SVE_LANE_SELECT)]
1648 SME_FP_DOTPROD_LANE))]
1649 "TARGET_STREAMING_SME2"
1650 "<b><optab>\tza.s[%w0, 0, vgx<vector_count>], %1, %2.h[%3]"
1651)
1652
1653(define_insn "*aarch64_sme_lane_<optab><VNx4SI_ONLY:mode><SME_ZA_HFx24:mode>_plus"
1654 [(set (reg:VNx4SI_ONLY ZA_REGNUM)
1655 (unspec:VNx4SI_ONLY
1656 [(reg:VNx4SI_ONLY ZA_REGNUM)
1657 (reg:DI SME_STATE_REGNUM)
1658 (plus:SI (match_operand:SI 0 "register_operand" "Uci")
1659 (match_operand:SI 1 "const_0_to_7_operand"))
1660 (match_operand:SME_ZA_HFx24 2 "aligned_register_operand" "Uw<vector_count>")
1661 (unspec:SME_ZA_HFx24
1662 [(match_operand:<VSINGLE> 3 "register_operand" "x")
1663 (match_operand:SI 4 "const_int_operand")]
1664 UNSPEC_SVE_LANE_SELECT)]
1665 SME_FP_DOTPROD_LANE))]
1666 "TARGET_STREAMING_SME2"
1667 "<b><optab>\tza.s[%w0, %1, vgx<vector_count>], %2, %3.h[%4]"
1668)
1669
1670;; -------------------------------------------------------------------------
1671;; ---- [FP] Ternary arithmetic on ZA slice
1672;; -------------------------------------------------------------------------
1673;; Includes:
1674;; - FMLA
1675;; - FMLS
1676;; -------------------------------------------------------------------------
1677
1678(define_insn "@aarch64_sme_<optab><SME_ZA_SDF_I:mode><SME_ZA_SDFx24:mode>"
1679 [(set (reg:SME_ZA_SDF_I ZA_REGNUM)
1680 (unspec:SME_ZA_SDF_I
1681 [(reg:SME_ZA_SDF_I ZA_REGNUM)
1682 (reg:DI SME_STATE_REGNUM)
1683 (match_operand:SI 0 "register_operand" "Uci")
1684 (match_operand:SME_ZA_SDFx24 1 "aligned_register_operand" "Uw<vector_count>")
1685 (match_operand:SME_ZA_SDFx24 2 "aligned_register_operand" "Uw<vector_count>")]
1686 SME_FP_TERNARY_SLICE))]
1687 "TARGET_SME2
1688 && TARGET_STREAMING_SME
1689 && <SME_ZA_SDF_I:elem_bits> == <SME_ZA_SDFx24:elem_bits>"
1690 "<optab>\tza.<SME_ZA_SDF_I:Vetype>[%w0, 0, vgx<vector_count>], %1, %2"
1691)
1692
1693(define_insn "*aarch64_sme_<optab><SME_ZA_SDF_I:mode><SME_ZA_SDFx24:mode>_plus"
1694 [(set (reg:SME_ZA_SDF_I ZA_REGNUM)
1695 (unspec:SME_ZA_SDF_I
1696 [(reg:SME_ZA_SDF_I ZA_REGNUM)
1697 (reg:DI SME_STATE_REGNUM)
1698 (plus:SI (match_operand:SI 0 "register_operand" "Uci")
1699 (match_operand:SI 1 "const_0_to_7_operand"))
1700 (match_operand:SME_ZA_SDFx24 2 "aligned_register_operand" "Uw<vector_count>")
1701 (match_operand:SME_ZA_SDFx24 3 "aligned_register_operand" "Uw<vector_count>")]
1702 SME_FP_TERNARY_SLICE))]
1703 "TARGET_SME2
1704 && TARGET_STREAMING_SME
1705 && <SME_ZA_SDF_I:elem_bits> == <SME_ZA_SDFx24:elem_bits>"
1706 "<optab>\tza.<SME_ZA_SDF_I:Vetype>[%w0, %1, vgx<vector_count>], %2, %3"
1707)
1708
1709(define_insn "@aarch64_sme_single_<optab><SME_ZA_SDF_I:mode><SME_ZA_SDFx24:mode>"
1710 [(set (reg:SME_ZA_SDF_I ZA_REGNUM)
1711 (unspec:SME_ZA_SDF_I
1712 [(reg:SME_ZA_SDF_I ZA_REGNUM)
1713 (reg:DI SME_STATE_REGNUM)
1714 (match_operand:SI 0 "register_operand" "Uci")
1715 (match_operand:SME_ZA_SDFx24 1 "register_operand" "w")
1716 (vec_duplicate:SME_ZA_SDFx24
1717 (match_operand:<VSINGLE> 2 "register_operand" "x"))]
1718 SME_FP_TERNARY_SLICE))]
1719 "TARGET_SME2
1720 && TARGET_STREAMING_SME
1721 && <SME_ZA_SDF_I:elem_bits> == <SME_ZA_SDFx24:elem_bits>"
1722 "<optab>\tza.<SME_ZA_SDF_I:Vetype>[%w0, 0, vgx<vector_count>], %1, %2.<SME_ZA_SDFx24:Vetype>"
1723)
1724
1725(define_insn "*aarch64_sme_single_<optab><SME_ZA_SDF_I:mode><SME_ZA_SDFx24:mode>_plus"
1726 [(set (reg:SME_ZA_SDF_I ZA_REGNUM)
1727 (unspec:SME_ZA_SDF_I
1728 [(reg:SME_ZA_SDF_I ZA_REGNUM)
1729 (reg:DI SME_STATE_REGNUM)
1730 (plus:SI (match_operand:SI 0 "register_operand" "Uci")
1731 (match_operand:SI 1 "const_0_to_7_operand"))
1732 (match_operand:SME_ZA_SDFx24 2 "register_operand" "w")
1733 (vec_duplicate:SME_ZA_SDFx24
1734 (match_operand:<VSINGLE> 3 "register_operand" "x"))]
1735 SME_FP_TERNARY_SLICE))]
1736 "TARGET_SME2
1737 && TARGET_STREAMING_SME
1738 && <SME_ZA_SDF_I:elem_bits> == <SME_ZA_SDFx24:elem_bits>"
1739 "<optab>\tza.<SME_ZA_SDF_I:Vetype>[%w0, %1, vgx<vector_count>], %2, %3.<SME_ZA_SDFx24:Vetype>"
1740)
1741
1742(define_insn "@aarch64_sme_lane_<optab><SME_ZA_SDF_I:mode><SME_ZA_SDFx24:mode>"
1743 [(set (reg:SME_ZA_SDF_I ZA_REGNUM)
1744 (unspec:SME_ZA_SDF_I
1745 [(reg:SME_ZA_SDF_I ZA_REGNUM)
1746 (reg:DI SME_STATE_REGNUM)
1747 (match_operand:SI 0 "register_operand" "Uci")
1748 (match_operand:SME_ZA_SDFx24 1 "aligned_register_operand" "Uw<vector_count>")
1749 (unspec:SME_ZA_SDFx24
1750 [(match_operand:<VSINGLE> 2 "register_operand" "x")
1751 (match_operand:SI 3 "const_int_operand")]
1752 UNSPEC_SVE_LANE_SELECT)]
1753 SME_FP_TERNARY_SLICE))]
1754 "TARGET_SME2
1755 && TARGET_STREAMING_SME
1756 && <SME_ZA_SDF_I:elem_bits> == <SME_ZA_SDFx24:elem_bits>"
1757 "<optab>\tza.<SME_ZA_SDF_I:Vetype>[%w0, 0, vgx<vector_count>], %1, %2.<SME_ZA_SDFx24:Vetype>[%3]"
1758)
1759
1760(define_insn "*aarch64_sme_lane_<optab><SME_ZA_SDF_I:mode><SME_ZA_SDFx24:mode>"
1761 [(set (reg:SME_ZA_SDF_I ZA_REGNUM)
1762 (unspec:SME_ZA_SDF_I
1763 [(reg:SME_ZA_SDF_I ZA_REGNUM)
1764 (reg:DI SME_STATE_REGNUM)
1765 (plus:SI (match_operand:SI 0 "register_operand" "Uci")
1766 (match_operand:SI 1 "const_0_to_7_operand"))
1767 (match_operand:SME_ZA_SDFx24 2 "aligned_register_operand" "Uw<vector_count>")
1768 (unspec:SME_ZA_SDFx24
1769 [(match_operand:<VSINGLE> 3 "register_operand" "x")
1770 (match_operand:SI 4 "const_int_operand")]
1771 UNSPEC_SVE_LANE_SELECT)]
1772 SME_FP_TERNARY_SLICE))]
1773 "TARGET_SME2
1774 && TARGET_STREAMING_SME
1775 && <SME_ZA_SDF_I:elem_bits> == <SME_ZA_SDFx24:elem_bits>"
1776 "<optab>\tza.<SME_ZA_SDF_I:Vetype>[%w0, %1, vgx<vector_count>], %2, %3.<SME_ZA_SDFx24:Vetype>[%4]"
1777)
1778
1779;; -------------------------------------------------------------------------
1780;; ---- [FP] Ternary widening arithmetic on ZA slice
1781;; -------------------------------------------------------------------------
1782;; Includes:
1783;; - BFMLAL
1784;; - BFMLSL
1785;; - FMLAL
1786;; - FMLSL
1787;; -------------------------------------------------------------------------
1788
1789(define_insn "@aarch64_sme_<optab><VNx4SI_ONLY:mode><SVE_FULL_HF:mode>"
1790 [(set (reg:VNx4SI_ONLY ZA_REGNUM)
1791 (unspec:VNx4SI_ONLY
1792 [(reg:VNx4SI_ONLY ZA_REGNUM)
1793 (reg:DI SME_STATE_REGNUM)
1794 (match_operand:SI 0 "register_operand" "Uci")
1795 (match_operand:SVE_FULL_HF 1 "register_operand" "w")
1796 (match_operand:SVE_FULL_HF 2 "register_operand" "x")]
1797 SME_FP_TERNARY_SLICE))]
1798 "TARGET_STREAMING_SME2"
1799 "<b><optab>l\tza.s[%w0, 0:1], %1.h, %2.h"
1800)
1801
1802(define_insn "*aarch64_sme_<optab><VNx4SI_ONLY:mode><SVE_FULL_HF:mode>_plus"
1803 [(set (reg:VNx4SI_ONLY ZA_REGNUM)
1804 (unspec:VNx4SI_ONLY
1805 [(reg:VNx4SI_ONLY ZA_REGNUM)
1806 (reg:DI SME_STATE_REGNUM)
1807 (plus:SI (match_operand:SI 0 "register_operand" "Uci")
1808 (match_operand:SI 1 "const_<za32_offset_range>_operand"))
1809 (match_operand:SVE_FULL_HF 2 "register_operand" "w")
1810 (match_operand:SVE_FULL_HF 3 "register_operand" "x")]
1811 SME_FP_TERNARY_SLICE))]
1812 "TARGET_STREAMING_SME2"
1813 {
1814 operands[4] = GEN_INT (INTVAL (operands[1]) + 1);
1815 return "<b><optab>l\tza.s[%w0, %1:%4], %2.h, %3.h";
1816 }
1817)
1818
1819(define_insn "@aarch64_sme_<optab><VNx4SI_ONLY:mode><SME_ZA_HFx24:mode>"
1820 [(set (reg:VNx4SI_ONLY ZA_REGNUM)
1821 (unspec:VNx4SI_ONLY
1822 [(reg:VNx4SI_ONLY ZA_REGNUM)
1823 (reg:DI SME_STATE_REGNUM)
1824 (match_operand:SI 0 "register_operand" "Uci")
1825 (match_operand:SME_ZA_HFx24 1 "aligned_register_operand" "Uw<vector_count>")
1826 (match_operand:SME_ZA_HFx24 2 "aligned_register_operand" "Uw<vector_count>")]
1827 SME_FP_TERNARY_SLICE))]
1828 "TARGET_STREAMING_SME2"
1829 "<b><optab>l\tza.s[%w0, 0:1, vgx<vector_count>], %1, %2"
1830)
1831
1832(define_insn "*aarch64_sme_<optab><VNx4SI_ONLY:mode><SME_ZA_HFx24:mode>_plus"
1833 [(set (reg:VNx4SI_ONLY ZA_REGNUM)
1834 (unspec:VNx4SI_ONLY
1835 [(reg:VNx4SI_ONLY ZA_REGNUM)
1836 (reg:DI SME_STATE_REGNUM)
1837 (plus:SI (match_operand:SI 0 "register_operand" "Uci")
1838 (match_operand:SI 1 "const_<za32_offset_range>_operand"))
1839 (match_operand:SME_ZA_HFx24 2 "aligned_register_operand" "Uw<vector_count>")
1840 (match_operand:SME_ZA_HFx24 3 "aligned_register_operand" "Uw<vector_count>")]
1841 SME_FP_TERNARY_SLICE))]
1842 "TARGET_STREAMING_SME2"
1843 {
1844 operands[4] = GEN_INT (INTVAL (operands[1]) + 1);
1845 return "<b><optab>l\tza.s[%w0, %1:%4, vgx<vector_count>], %2, %3";
1846 }
1847)
1848
1849(define_insn "@aarch64_sme_single_<optab><VNx4SI_ONLY:mode><SME_ZA_HFx24:mode>"
1850 [(set (reg:VNx4SI_ONLY ZA_REGNUM)
1851 (unspec:VNx4SI_ONLY
1852 [(reg:VNx4SI_ONLY ZA_REGNUM)
1853 (reg:DI SME_STATE_REGNUM)
1854 (match_operand:SI 0 "register_operand" "Uci")
1855 (match_operand:SME_ZA_HFx24 1 "register_operand" "w")
1856 (vec_duplicate:SME_ZA_HFx24
1857 (match_operand:<SME_ZA_HFx24:VSINGLE> 2 "register_operand" "x"))]
1858 SME_FP_TERNARY_SLICE))]
1859 "TARGET_STREAMING_SME2"
1860 "<b><optab>l\tza.s[%w0, 0:1, vgx<vector_count>], %1, %2.h"
1861)
1862
1863(define_insn "*aarch64_sme_single_<optab><VNx4SI_ONLY:mode><SME_ZA_HFx24:mode>_plus"
1864 [(set (reg:VNx4SI_ONLY ZA_REGNUM)
1865 (unspec:VNx4SI_ONLY
1866 [(reg:VNx4SI_ONLY ZA_REGNUM)
1867 (reg:DI SME_STATE_REGNUM)
1868 (plus:SI (match_operand:SI 0 "register_operand" "Uci")
1869 (match_operand:SI 1 "const_<za32_offset_range>_operand"))
1870 (match_operand:SME_ZA_HFx24 2 "register_operand" "w")
1871 (vec_duplicate:SME_ZA_HFx24
1872 (match_operand:<SME_ZA_HFx24:VSINGLE> 3 "register_operand" "x"))]
1873 SME_FP_TERNARY_SLICE))]
1874 "TARGET_STREAMING_SME2"
1875 {
1876 operands[4] = GEN_INT (INTVAL (operands[1]) + 1);
1877 return "<b><optab>l\tza.s[%w0, %1:%4, vgx<vector_count>], %2, %3.h";
1878 }
1879)
1880
1881(define_insn "@aarch64_sme_lane_<optab><VNx4SI_ONLY:mode><SME_ZA_HFx124:mode>"
1882 [(set (reg:VNx4SI_ONLY ZA_REGNUM)
1883 (unspec:VNx4SI_ONLY
1884 [(reg:VNx4SI_ONLY ZA_REGNUM)
1885 (reg:DI SME_STATE_REGNUM)
1886 (match_operand:SI 0 "register_operand" "Uci")
1887 (match_operand:SME_ZA_HFx124 1 "<aligned_operand>" "<aligned_fpr>")
1888 (unspec:SME_ZA_HFx124
1889 [(match_operand:<VSINGLE> 2 "register_operand" "x")
1890 (match_operand:SI 3 "const_int_operand")]
1891 UNSPEC_SVE_LANE_SELECT)]
1892 SME_FP_TERNARY_SLICE))]
1893 "TARGET_STREAMING_SME2"
1894 "<b><optab>l\tza.s[%w0, 0:1<vg_modifier>], %1<z_suffix>, %2.h[%3]"
1895)
1896
1897(define_insn "*aarch64_sme_lane_<optab><VNx4SI_ONLY:mode><SME_ZA_HFx124:mode>"
1898 [(set (reg:VNx4SI_ONLY ZA_REGNUM)
1899 (unspec:VNx4SI_ONLY
1900 [(reg:VNx4SI_ONLY ZA_REGNUM)
1901 (reg:DI SME_STATE_REGNUM)
1902 (plus:SI (match_operand:SI 0 "register_operand" "Uci")
1903 (match_operand:SI 1 "const_<za32_offset_range>_operand"))
1904 (match_operand:SME_ZA_HFx124 2 "<aligned_operand>" "<aligned_fpr>")
1905 (unspec:SME_ZA_HFx124
1906 [(match_operand:<VSINGLE> 3 "register_operand" "x")
1907 (match_operand:SI 4 "const_int_operand")]
1908 UNSPEC_SVE_LANE_SELECT)]
1909 SME_FP_TERNARY_SLICE))]
1910 "TARGET_STREAMING_SME2"
1911 {
1912 operands[5] = GEN_INT (INTVAL (operands[1]) + 1);
1913 return "<b><optab>l\tza.s[%w0, %1:%5<vg_modifier>], %2<z_suffix>, %3.h[%4]";
1914 }
1915)
1916
4f6ab953
RS
1917;; -------------------------------------------------------------------------
1918;; ---- [FP] Sum of outer products
1919;; -------------------------------------------------------------------------
1920;; Includes:
1921;; - BFMOPA
1922;; - BFMOPS
1923;; - FMOPA
1924;; - FMOPS
1925;; -------------------------------------------------------------------------
1926
1927(define_insn "@aarch64_sme_<optab><SME_ZA_SDF_I:mode><SME_MOP_HSDF:mode>"
1928 [(set (reg:SME_ZA_SDF_I ZA_REGNUM)
1929 (unspec:SME_ZA_SDF_I
1930 [(reg:SME_ZA_SDF_I ZA_REGNUM)
1931 (reg:DI SME_STATE_REGNUM)
1932 (match_operand:DI 0 "const_int_operand")
1933 (match_operand:<SME_ZA_SDF_I:VPRED> 1 "register_operand" "Upl")
1934 (match_operand:<SME_ZA_SDF_I:VPRED> 2 "register_operand" "Upl")
1935 (match_operand:SME_MOP_HSDF 3 "register_operand" "w")
1936 (match_operand:SME_MOP_HSDF 4 "register_operand" "w")]
1937 SME_FP_MOP))]
1938 "TARGET_STREAMING_SME
1939 && (<SME_ZA_SDF_I:elem_bits> == 32) == (<SME_MOP_HSDF:elem_bits> <= 32)"
1940 "<b><optab>\tza%0.<SME_ZA_SDF_I:Vetype>, %1/m, %2/m, %3.<SME_MOP_HSDF:Vetype>, %4.<SME_MOP_HSDF:Vetype>"
1941)
c1c267df
RS
1942
1943;; =========================================================================
1944;; == Table lookup
1945;; =========================================================================
1946
1947;; -------------------------------------------------------------------------
1948;; ---- Table lookup
1949;; -------------------------------------------------------------------------
1950;; Includes:
1951;; - LUTI2
1952;; - LUTI4
1953;; -------------------------------------------------------------------------
1954
1955(define_c_enum "unspec" [
1956 UNSPEC_SME_LUTI
1957])
1958
1959(define_insn "@aarch64_sme_lut<LUTI_BITS><mode>"
1960 [(set (match_operand:SVE_FULL_BHS 0 "register_operand" "=w")
1961 (unspec:SVE_FULL_BHS
1962 [(reg:V8DI ZT0_REGNUM)
1963 (reg:DI SME_STATE_REGNUM)
1964 (match_operand:VNx16QI 1 "register_operand" "w")
1965 (match_operand:DI 2 "const_int_operand")
1966 (const_int LUTI_BITS)]
1967 UNSPEC_SME_LUTI))]
1968 "TARGET_STREAMING_SME2"
1969 "luti<LUTI_BITS>\t%0.<Vetype>, zt0, %1[%2]"
1970)
1971
1972(define_insn "@aarch64_sme_lut<LUTI_BITS><mode>"
1973 [(set (match_operand:SVE_BHSx24 0 "aligned_register_operand" "=Uw<vector_count>")
1974 (unspec:SVE_BHSx24
1975 [(reg:V8DI ZT0_REGNUM)
1976 (reg:DI SME_STATE_REGNUM)
1977 (match_operand:VNx16QI 1 "register_operand" "w")
1978 (match_operand:DI 2 "const_int_operand")
1979 (const_int LUTI_BITS)]
1980 UNSPEC_SME_LUTI))]
1981 "TARGET_STREAMING_SME2
1982 && !(<LUTI_BITS> == 4 && <vector_count> == 4 && <elem_bits> == 8)"
1983 "luti<LUTI_BITS>\t%0, zt0, %1[%2]"
9f0f7d80
RS
1984 [(set_attr "stride_type" "luti_consecutive")]
1985)
1986
1987(define_insn "@aarch64_sme_lut<LUTI_BITS><mode>_strided2"
1988 [(set (match_operand:SVE_FULL_BHS 0 "aarch64_simd_register" "=Uwd")
1989 (unspec:SVE_FULL_BHS
1990 [(reg:V8DI ZT0_REGNUM)
1991 (reg:DI SME_STATE_REGNUM)
1992 (match_operand:VNx16QI 2 "register_operand" "w")
1993 (match_operand:DI 3 "const_int_operand")
1994 (const_int LUTI_BITS)
1995 (const_int 0)]
1996 UNSPEC_SME_LUTI))
1997 (set (match_operand:SVE_FULL_BHS 1 "aarch64_simd_register" "=w")
1998 (unspec:SVE_FULL_BHS
1999 [(reg:V8DI ZT0_REGNUM)
2000 (reg:DI SME_STATE_REGNUM)
2001 (match_dup 2)
2002 (match_dup 3)
2003 (const_int LUTI_BITS)
2004 (const_int 1)]
2005 UNSPEC_SME_LUTI))]
2006 "TARGET_STREAMING_SME2
2007 && aarch64_strided_registers_p (operands, 2, 8)"
2008 "luti<LUTI_BITS>\t{%0.<Vetype>, %1.<Vetype>}, zt0, %2[%3]"
2009 [(set_attr "stride_type" "luti_strided")]
2010)
2011
2012(define_insn "@aarch64_sme_lut<LUTI_BITS><mode>_strided4"
2013 [(set (match_operand:SVE_FULL_BHS 0 "aarch64_simd_register" "=Uwt")
2014 (unspec:SVE_FULL_BHS
2015 [(reg:V8DI ZT0_REGNUM)
2016 (reg:DI SME_STATE_REGNUM)
2017 (match_operand:VNx16QI 4 "register_operand" "w")
2018 (match_operand:DI 5 "const_int_operand")
2019 (const_int LUTI_BITS)
2020 (const_int 0)]
2021 UNSPEC_SME_LUTI))
2022 (set (match_operand:SVE_FULL_BHS 1 "aarch64_simd_register" "=w")
2023 (unspec:SVE_FULL_BHS
2024 [(reg:V8DI ZT0_REGNUM)
2025 (reg:DI SME_STATE_REGNUM)
2026 (match_dup 4)
2027 (match_dup 5)
2028 (const_int LUTI_BITS)
2029 (const_int 1)]
2030 UNSPEC_SME_LUTI))
2031 (set (match_operand:SVE_FULL_BHS 2 "aarch64_simd_register" "=w")
2032 (unspec:SVE_FULL_BHS
2033 [(reg:V8DI ZT0_REGNUM)
2034 (reg:DI SME_STATE_REGNUM)
2035 (match_dup 4)
2036 (match_dup 5)
2037 (const_int LUTI_BITS)
2038 (const_int 2)]
2039 UNSPEC_SME_LUTI))
2040 (set (match_operand:SVE_FULL_BHS 3 "aarch64_simd_register" "=w")
2041 (unspec:SVE_FULL_BHS
2042 [(reg:V8DI ZT0_REGNUM)
2043 (reg:DI SME_STATE_REGNUM)
2044 (match_dup 4)
2045 (match_dup 5)
2046 (const_int LUTI_BITS)
2047 (const_int 3)]
2048 UNSPEC_SME_LUTI))]
2049 "TARGET_STREAMING_SME2
2050 && !(<LUTI_BITS> == 4 && <elem_bits> == 8)
2051 && aarch64_strided_registers_p (operands, 4, 4)"
2052 "luti<LUTI_BITS>\t{%0.<Vetype>, %1.<Vetype>, %2.<Vetype>, %3.<Vetype>}, zt0, %4[%5]"
2053 [(set_attr "stride_type" "luti_strided")]
c1c267df 2054)