]> git.ipfire.org Git - thirdparty/gcc.git/blame - gcc/config/aarch64/aarch64-sme.md
aarch64: Add ZT0
[thirdparty/gcc.git] / gcc / config / aarch64 / aarch64-sme.md
CommitLineData
dd8090f4
RS
1;; Machine description for AArch64 SME.
2;; Copyright (C) 2023 Free Software Foundation, Inc.
3;;
4;; This file is part of GCC.
5;;
6;; GCC is free software; you can redistribute it and/or modify it
7;; under the terms of the GNU General Public License as published by
8;; the Free Software Foundation; either version 3, or (at your option)
9;; any later version.
10;;
11;; GCC is distributed in the hope that it will be useful, but
12;; WITHOUT ANY WARRANTY; without even the implied warranty of
13;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14;; General Public License for more details.
15;;
16;; You should have received a copy of the GNU General Public License
17;; along with GCC; see the file COPYING3. If not see
18;; <http://www.gnu.org/licenses/>.
19
20;; The file is organised into the following sections (search for the full
21;; line):
22;;
23;; == State management
24;; ---- Test current state
25;; ---- PSTATE.SM management
3af9ceb6 26;; ---- PSTATE.ZA management
4f6ab953
RS
27;;
28;; == Loads, stores and moves
29;; ---- Single-vector loads
8d29b7ac 30;; ---- Table loads
4f6ab953 31;; ---- Single-vector stores
8d29b7ac 32;; ---- Table stores
4f6ab953
RS
33;; ---- Single-vector moves
34;; ---- Zeroing
35;;
36;; == Binary arithmetic
37;; ---- Binary arithmetic on ZA tile
38;;
39;; == Ternary arithmetic
40;; ---- [INT] Sum of outer products
41;; ---- [FP] Sum of outer products
dd8090f4
RS
42
43;; =========================================================================
44;; == State management
45;; =========================================================================
46;;
47;; Many of the instructions in this section are only valid when SME is
48;; present. However, they don't have a TARGET_SME condition since
49;; (a) they are only emitted under direct control of aarch64 code and
50;; (b) they are sometimes used conditionally, particularly in streaming-
51;; compatible code.
52;;
53;; =========================================================================
54
55;; -------------------------------------------------------------------------
56;; ---- Test current state
57;; -------------------------------------------------------------------------
58
59(define_c_enum "unspec" [
60 UNSPEC_OLD_VG_SAVED
61 UNSPEC_UPDATE_VG
62 UNSPEC_GET_SME_STATE
63 UNSPEC_READ_SVCR
64])
65
66;; A marker instruction to say that the old value of the DWARF VG register
67;; has been saved to the stack, for CFI purposes. Operand 0 is the old
68;; value of the register and operand 1 is the save slot.
69(define_insn "aarch64_old_vg_saved"
70 [(set (reg:DI VG_REGNUM)
71 (unspec:DI [(match_operand 0)
72 (match_operand 1)] UNSPEC_OLD_VG_SAVED))]
73 ""
74 ""
75 [(set_attr "type" "no_insn")]
76)
77
78;; A marker to indicate places where a call temporarily changes VG.
79(define_insn "aarch64_update_vg"
80 [(set (reg:DI VG_REGNUM)
81 (unspec:DI [(reg:DI VG_REGNUM)] UNSPEC_UPDATE_VG))]
82 ""
83 ""
84 [(set_attr "type" "no_insn")]
85)
86
87(define_insn "aarch64_get_sme_state"
88 [(set (reg:TI R0_REGNUM)
89 (unspec_volatile:TI [(const_int 0)] UNSPEC_GET_SME_STATE))
90 (clobber (reg:DI R16_REGNUM))
91 (clobber (reg:DI R17_REGNUM))
92 (clobber (reg:DI R18_REGNUM))
93 (clobber (reg:DI R30_REGNUM))
94 (clobber (reg:CC CC_REGNUM))]
95 ""
96 "bl\t__arm_sme_state"
97)
98
99(define_insn "aarch64_read_svcr"
100 [(set (match_operand:DI 0 "register_operand" "=r")
101 (unspec_volatile:DI [(const_int 0)] UNSPEC_READ_SVCR))]
102 ""
103 "mrs\t%0, svcr"
104)
105
106;; -------------------------------------------------------------------------
107;; ---- PSTATE.SM management
108;; -------------------------------------------------------------------------
109;; Includes:
110;; - SMSTART SM
111;; - SMSTOP SM
112;; -------------------------------------------------------------------------
113
114(define_c_enum "unspec" [
115 UNSPEC_SMSTART_SM
116 UNSPEC_SMSTOP_SM
117])
118
119;; Turn on streaming mode. This clobbers all SVE state.
120;;
121;; Depend on VG_REGNUM to ensure that the VG save slot has already been
122;; initialized.
123(define_insn "aarch64_smstart_sm"
124 [(unspec_volatile [(const_int 0)] UNSPEC_SMSTART_SM)
125 (use (reg:DI VG_REGNUM))
126 (clobber (reg:V4x16QI V0_REGNUM))
127 (clobber (reg:V4x16QI V4_REGNUM))
128 (clobber (reg:V4x16QI V8_REGNUM))
129 (clobber (reg:V4x16QI V12_REGNUM))
130 (clobber (reg:V4x16QI V16_REGNUM))
131 (clobber (reg:V4x16QI V20_REGNUM))
132 (clobber (reg:V4x16QI V24_REGNUM))
133 (clobber (reg:V4x16QI V28_REGNUM))
134 (clobber (reg:VNx16BI P0_REGNUM))
135 (clobber (reg:VNx16BI P1_REGNUM))
136 (clobber (reg:VNx16BI P2_REGNUM))
137 (clobber (reg:VNx16BI P3_REGNUM))
138 (clobber (reg:VNx16BI P4_REGNUM))
139 (clobber (reg:VNx16BI P5_REGNUM))
140 (clobber (reg:VNx16BI P6_REGNUM))
141 (clobber (reg:VNx16BI P7_REGNUM))
142 (clobber (reg:VNx16BI P8_REGNUM))
143 (clobber (reg:VNx16BI P9_REGNUM))
144 (clobber (reg:VNx16BI P10_REGNUM))
145 (clobber (reg:VNx16BI P11_REGNUM))
146 (clobber (reg:VNx16BI P12_REGNUM))
147 (clobber (reg:VNx16BI P13_REGNUM))
148 (clobber (reg:VNx16BI P14_REGNUM))
149 (clobber (reg:VNx16BI P15_REGNUM))]
150 ""
151 "smstart\tsm"
152)
153
154;; Turn off streaming mode. This clobbers all SVE state.
155;;
156;; Depend on VG_REGNUM to ensure that the VG save slot has already been
157;; initialized.
158(define_insn "aarch64_smstop_sm"
159 [(unspec_volatile [(const_int 0)] UNSPEC_SMSTOP_SM)
160 (use (reg:DI VG_REGNUM))
161 (clobber (reg:V4x16QI V0_REGNUM))
162 (clobber (reg:V4x16QI V4_REGNUM))
163 (clobber (reg:V4x16QI V8_REGNUM))
164 (clobber (reg:V4x16QI V12_REGNUM))
165 (clobber (reg:V4x16QI V16_REGNUM))
166 (clobber (reg:V4x16QI V20_REGNUM))
167 (clobber (reg:V4x16QI V24_REGNUM))
168 (clobber (reg:V4x16QI V28_REGNUM))
169 (clobber (reg:VNx16BI P0_REGNUM))
170 (clobber (reg:VNx16BI P1_REGNUM))
171 (clobber (reg:VNx16BI P2_REGNUM))
172 (clobber (reg:VNx16BI P3_REGNUM))
173 (clobber (reg:VNx16BI P4_REGNUM))
174 (clobber (reg:VNx16BI P5_REGNUM))
175 (clobber (reg:VNx16BI P6_REGNUM))
176 (clobber (reg:VNx16BI P7_REGNUM))
177 (clobber (reg:VNx16BI P8_REGNUM))
178 (clobber (reg:VNx16BI P9_REGNUM))
179 (clobber (reg:VNx16BI P10_REGNUM))
180 (clobber (reg:VNx16BI P11_REGNUM))
181 (clobber (reg:VNx16BI P12_REGNUM))
182 (clobber (reg:VNx16BI P13_REGNUM))
183 (clobber (reg:VNx16BI P14_REGNUM))
184 (clobber (reg:VNx16BI P15_REGNUM))]
185 ""
186 "smstop\tsm"
187)
3af9ceb6
RS
188
189;; -------------------------------------------------------------------------
190;; ---- PSTATE.ZA management
191;; -------------------------------------------------------------------------
192;; Includes:
193;; - SMSTART ZA
194;; - SMSTOP ZA
195;; plus calls to support routines.
196;; -------------------------------------------------------------------------
197
198(define_c_enum "unspec" [
199 UNSPEC_SMSTOP_ZA
200 UNSPEC_INITIAL_ZERO_ZA
201 UNSPEC_TPIDR2_SAVE
202 UNSPEC_TPIDR2_RESTORE
203 UNSPEC_READ_TPIDR2
204 UNSPEC_WRITE_TPIDR2
205 UNSPEC_SETUP_LOCAL_TPIDR2
206 UNSPEC_RESTORE_ZA
207 UNSPEC_START_PRIVATE_ZA_CALL
208 UNSPEC_END_PRIVATE_ZA_CALL
209 UNSPEC_COMMIT_LAZY_SAVE
210])
211
212(define_c_enum "unspecv" [
213 UNSPECV_ASM_UPDATE_ZA
8d29b7ac 214 UNSPECV_ASM_UPDATE_ZT0
3af9ceb6
RS
215])
216
217;; Use the ABI-defined routine to commit an uncommitted lazy save.
218;; This relies on the current PSTATE.ZA, so depends on SME_STATE_REGNUM.
219;; The fake TPIDR2_SETUP_REGNUM register initially holds the incoming
220;; value of the architected TPIDR2_EL0.
221(define_insn "aarch64_tpidr2_save"
222 [(set (reg:DI ZA_FREE_REGNUM)
223 (unspec:DI [(reg:DI SME_STATE_REGNUM)
224 (reg:DI TPIDR2_SETUP_REGNUM)] UNSPEC_TPIDR2_SAVE))
225 (clobber (reg:DI R14_REGNUM))
226 (clobber (reg:DI R15_REGNUM))
227 (clobber (reg:DI R16_REGNUM))
228 (clobber (reg:DI R17_REGNUM))
229 (clobber (reg:DI R18_REGNUM))
230 (clobber (reg:DI R30_REGNUM))
231 (clobber (reg:CC CC_REGNUM))]
232 ""
233 "bl\t__arm_tpidr2_save"
234)
235
236;; Set PSTATE.ZA to 1. If ZA was previously dormant or active,
237;; it remains in the same state afterwards, with the same contents.
238;; Otherwise, it goes from off to on with zeroed contents.
239;;
240;; Later writes of TPIDR2_EL0 to a nonzero value must not be moved
241;; up past this instruction, since that could create an invalid
242;; combination of having an active lazy save while ZA is off.
243;; Create an anti-dependence by reading the current contents
244;; of TPIDR2_SETUP_REGNUM.
245;;
246;; Making this depend on ZA_FREE_REGNUM ensures that contents belonging
247;; to the caller have already been saved. That isn't necessary for this
248;; instruction itself, since PSTATE.ZA is already 1 if it contains data.
249;; But doing this here means that other uses of ZA can just depend on
250;; SME_STATE_REGNUM, rather than both SME_STATE_REGNUM and ZA_FREE_REGNUM.
251(define_insn "aarch64_smstart_za"
252 [(set (reg:DI SME_STATE_REGNUM)
253 (const_int 1))
254 (use (reg:DI TPIDR2_SETUP_REGNUM))
255 (use (reg:DI ZA_FREE_REGNUM))]
256 ""
257 "smstart\tza"
258)
259
260;; Disable ZA and discard its current contents.
261;;
262;; The ABI says that the ZA save buffer must be null whenever PSTATE.ZA
263;; is zero, so earlier writes to TPIDR2_EL0 must not be moved down past
264;; this instruction. Depend on TPIDR2_SETUP_REGNUM to ensure this.
265;;
266;; We can only turn ZA off once we know that it is free (i.e. doesn't
267;; contain data belonging to the caller). Depend on ZA_FREE_REGNUM
268;; to ensure this.
269;;
270;; We only turn ZA off when the current function's ZA state is dead,
271;; or perhaps if we're sure that the contents are saved. Either way,
272;; we know whether ZA is saved or not.
273(define_insn "aarch64_smstop_za"
274 [(set (reg:DI SME_STATE_REGNUM)
275 (const_int 0))
276 (set (reg:DI ZA_SAVED_REGNUM)
277 (unspec:DI [(reg:DI TPIDR2_SETUP_REGNUM)
278 (reg:DI ZA_FREE_REGNUM)] UNSPEC_SMSTOP_ZA))]
279 ""
280 "smstop\tza"
281)
282
283;; Zero ZA after committing a lazy save. The sequencing is enforced
284;; by reading ZA_FREE_REGNUM.
285(define_insn "aarch64_initial_zero_za"
286 [(set (reg:DI ZA_REGNUM)
287 (unspec:DI [(reg:DI SME_STATE_REGNUM)
288 (reg:DI ZA_FREE_REGNUM)] UNSPEC_INITIAL_ZERO_ZA))]
289 ""
290 "zero\t{ za }"
291)
292
293;; Initialize the abstract TPIDR2_BLOCK_REGNUM from the contents of
294;; the current function's TPIDR2 block. Other instructions can then
295;; depend on TPIDR2_BLOCK_REGNUM rather than on the memory block.
296(define_insn "aarch64_setup_local_tpidr2"
297 [(set (reg:DI TPIDR2_BLOCK_REGNUM)
298 (unspec:DI [(match_operand:V16QI 0 "memory_operand" "m")]
299 UNSPEC_SETUP_LOCAL_TPIDR2))]
300 ""
301 ""
302 [(set_attr "type" "no_insn")]
303)
304
305;; Clear TPIDR2_EL0, cancelling any uncommitted lazy save.
306(define_insn "aarch64_clear_tpidr2"
307 [(set (reg:DI TPIDR2_SETUP_REGNUM)
308 (const_int 0))]
309 ""
310 "msr\ttpidr2_el0, xzr"
311)
312
313;; Point TPIDR2_EL0 to the current function's TPIDR2 block, whose address
314;; is given by operand 0. TPIDR2_BLOCK_REGNUM represents the contents of the
315;; pointed-to block.
316(define_insn "aarch64_write_tpidr2"
317 [(set (reg:DI TPIDR2_SETUP_REGNUM)
318 (unspec:DI [(match_operand 0 "pmode_register_operand" "r")
319 (reg:DI TPIDR2_BLOCK_REGNUM)] UNSPEC_WRITE_TPIDR2))]
320 ""
321 "msr\ttpidr2_el0, %0"
322)
323
324;; Check whether ZA has been saved. The system depends on the value that
325;; we wrote to TPIDR2_EL0 previously, so it depends on TPDIR2_SETUP_REGNUM.
326(define_insn "aarch64_read_tpidr2"
327 [(set (match_operand:DI 0 "register_operand" "=r")
328 (unspec:DI [(reg:DI TPIDR2_SETUP_REGNUM)
329 (reg:DI ZA_SAVED_REGNUM)] UNSPEC_READ_TPIDR2))]
330 ""
331 "mrs\t%0, tpidr2_el0"
332)
333
334;; Use the ABI-defined routine to restore lazy-saved ZA contents
335;; from the TPIDR2 block pointed to by X0. ZA must already be active.
336(define_insn "aarch64_tpidr2_restore"
337 [(set (reg:DI ZA_SAVED_REGNUM)
338 (unspec:DI [(reg:DI R0_REGNUM)] UNSPEC_TPIDR2_RESTORE))
339 (set (reg:DI SME_STATE_REGNUM)
340 (unspec:DI [(reg:DI SME_STATE_REGNUM)] UNSPEC_TPIDR2_RESTORE))
341 (clobber (reg:DI R14_REGNUM))
342 (clobber (reg:DI R15_REGNUM))
343 (clobber (reg:DI R16_REGNUM))
344 (clobber (reg:DI R17_REGNUM))
345 (clobber (reg:DI R18_REGNUM))
346 (clobber (reg:DI R30_REGNUM))
347 (clobber (reg:CC CC_REGNUM))]
348 ""
349 "bl\t__arm_tpidr2_restore"
350)
351
352;; Check whether a lazy save set up by aarch64_save_za was committed
353;; and restore the saved contents if so.
354;;
355;; Operand 0 is the address of the current function's TPIDR2 block.
356(define_insn_and_split "aarch64_restore_za"
357 [(set (reg:DI ZA_SAVED_REGNUM)
358 (unspec:DI [(match_operand 0 "pmode_register_operand" "r")
359 (reg:DI SME_STATE_REGNUM)
360 (reg:DI TPIDR2_SETUP_REGNUM)
361 (reg:DI ZA_SAVED_REGNUM)] UNSPEC_RESTORE_ZA))
362 (clobber (reg:DI R0_REGNUM))
363 (clobber (reg:DI R14_REGNUM))
364 (clobber (reg:DI R15_REGNUM))
365 (clobber (reg:DI R16_REGNUM))
366 (clobber (reg:DI R17_REGNUM))
367 (clobber (reg:DI R18_REGNUM))
368 (clobber (reg:DI R30_REGNUM))
369 (clobber (reg:CC CC_REGNUM))]
370 ""
371 "#"
372 "&& epilogue_completed"
373 [(const_int 0)]
374 {
375 auto label = gen_label_rtx ();
376 auto tpidr2 = gen_rtx_REG (DImode, R16_REGNUM);
377 emit_insn (gen_aarch64_read_tpidr2 (tpidr2));
378 auto jump = emit_likely_jump_insn (gen_aarch64_cbnedi1 (tpidr2, label));
379 JUMP_LABEL (jump) = label;
380
381 aarch64_restore_za (operands[0]);
382 emit_label (label);
383 DONE;
384 }
385)
386
387;; This instruction is emitted after asms that alter ZA, in order to model
388;; the effect on dataflow. The asm itself can't have ZA as an input or
389;; an output, since there is no associated data type. Instead it retains
390;; the original "za" clobber, which on its own would indicate that ZA
391;; is dead.
392;;
393;; The operand is a unique identifier.
394(define_insn "aarch64_asm_update_za"
395 [(set (reg:VNx16QI ZA_REGNUM)
396 (unspec_volatile:VNx16QI
397 [(reg:VNx16QI ZA_REGNUM)
398 (reg:DI SME_STATE_REGNUM)
399 (match_operand 0 "const_int_operand")]
400 UNSPECV_ASM_UPDATE_ZA))]
401 ""
402 ""
403 [(set_attr "type" "no_insn")]
404)
405
8d29b7ac
RS
406;; A similar pattern for ZT0.
407(define_insn "aarch64_asm_update_zt0"
408 [(set (reg:V8DI ZT0_REGNUM)
409 (unspec_volatile:V8DI
410 [(reg:V8DI ZT0_REGNUM)
411 (reg:DI SME_STATE_REGNUM)
412 (match_operand 0 "const_int_operand")]
413 UNSPECV_ASM_UPDATE_ZT0))]
414 ""
415 ""
416 [(set_attr "type" "no_insn")]
417)
418
3af9ceb6
RS
419;; This pseudo-instruction is emitted as part of a call to a private-ZA
420;; function from a function with ZA state. It marks a natural place to set
421;; up a lazy save, if that turns out to be necessary. The save itself
422;; is managed by the mode-switching pass.
423(define_insn "aarch64_start_private_za_call"
424 [(set (reg:DI LOWERING_REGNUM)
425 (unspec:DI [(reg:DI LOWERING_REGNUM)] UNSPEC_START_PRIVATE_ZA_CALL))]
426 ""
427 ""
428 [(set_attr "type" "no_insn")]
429)
430
431;; This pseudo-instruction is emitted as part of a call to a private-ZA
432;; function from a function with ZA state. It marks a natural place to restore
433;; the current function's ZA contents from the lazy save buffer, if that
434;; turns out to be necessary. The save itself is managed by the
435;; mode-switching pass.
436(define_insn "aarch64_end_private_za_call"
437 [(set (reg:DI LOWERING_REGNUM)
438 (unspec:DI [(reg:DI LOWERING_REGNUM)] UNSPEC_END_PRIVATE_ZA_CALL))]
439 ""
440 ""
441 [(set_attr "type" "no_insn")]
442)
443
444;; This pseudo-instruction is emitted before a private-ZA function uses
445;; PSTATE.ZA state for the first time. The instruction checks whether
446;; ZA currently contains data belonging to a caller and commits the
447;; lazy save if so.
448;;
449;; Operand 0 is the incoming value of TPIDR2_EL0. Operand 1 is nonzero
450;; if ZA is live, and should therefore be zeroed after committing a save.
451;;
452;; The instruction is generated by the mode-switching pass. It is a
453;; define_insn_and_split rather than a define_expand because of the
454;; internal control flow.
455(define_insn_and_split "aarch64_commit_lazy_save"
456 [(set (reg:DI ZA_FREE_REGNUM)
457 (unspec:DI [(match_operand 0 "pmode_register_operand" "r")
458 (match_operand 1 "const_int_operand")
459 (reg:DI SME_STATE_REGNUM)
460 (reg:DI TPIDR2_SETUP_REGNUM)
461 (reg:VNx16QI ZA_REGNUM)] UNSPEC_COMMIT_LAZY_SAVE))
462 (set (reg:DI ZA_REGNUM)
463 (unspec:DI [(reg:DI SME_STATE_REGNUM)
464 (reg:DI ZA_FREE_REGNUM)] UNSPEC_INITIAL_ZERO_ZA))
465 (clobber (reg:DI R14_REGNUM))
466 (clobber (reg:DI R15_REGNUM))
467 (clobber (reg:DI R16_REGNUM))
468 (clobber (reg:DI R17_REGNUM))
469 (clobber (reg:DI R18_REGNUM))
470 (clobber (reg:DI R30_REGNUM))
471 (clobber (reg:CC CC_REGNUM))]
472 ""
473 "#"
474 "true"
475 [(const_int 0)]
476 {
477 auto label = gen_label_rtx ();
478 auto jump = emit_jump_insn (gen_aarch64_cbeqdi1 (operands[0], label));
479 JUMP_LABEL (jump) = label;
480 emit_insn (gen_aarch64_tpidr2_save ());
481 emit_insn (gen_aarch64_clear_tpidr2 ());
482 if (INTVAL (operands[1]) != 0)
483 emit_insn (gen_aarch64_initial_zero_za ());
484 emit_label (label);
485 DONE;
486 }
487)
4f6ab953
RS
488
489;; =========================================================================
490;; == Loads, stores and moves
491;; =========================================================================
492
493;; -------------------------------------------------------------------------
494;; ---- Single-vector loads
495;; -------------------------------------------------------------------------
496;; Includes:
497;; - LD1
498;; - LDR
499;; -------------------------------------------------------------------------
500
501(define_c_enum "unspec" [
502 UNSPEC_SME_LDR
503])
504
505(define_insn "@aarch64_sme_<optab><mode>"
506 [(set (reg:SME_ZA_I ZA_REGNUM)
507 (unspec:SME_ZA_I
508 [(reg:SME_ZA_I ZA_REGNUM)
509 (reg:DI SME_STATE_REGNUM)
510 (match_operand:DI 0 "const_int_operand")
511 (match_operand:SI 1 "register_operand" "Ucj")
512 (match_operand:<VPRED> 2 "register_operand" "Upl")
513 (match_operand:SME_ZA_I 3 "aarch64_sve_ldff1_operand" "Utf")]
514 SME_LD1))]
515 "TARGET_STREAMING_SME"
516 "ld1<Vesize>\t{ za%0<hv>.<Vetype>[%w1, 0] }, %2/z, %3"
517)
518
519(define_insn "@aarch64_sme_<optab><mode>_plus"
520 [(set (reg:SME_ZA_I ZA_REGNUM)
521 (unspec:SME_ZA_I
522 [(reg:SME_ZA_I ZA_REGNUM)
523 (reg:DI SME_STATE_REGNUM)
524 (match_operand:DI 0 "const_int_operand")
525 (plus:SI (match_operand:SI 1 "register_operand" "Ucj")
526 (match_operand:SI 2 "const_int_operand"))
527 (match_operand:<VPRED> 3 "register_operand" "Upl")
528 (match_operand:SME_ZA_I 4 "aarch64_sve_ldff1_operand" "Utf")]
529 SME_LD1))]
530 "TARGET_STREAMING_SME
531 && UINTVAL (operands[2]) < 128 / <elem_bits>"
532 "ld1<Vesize>\t{ za%0<hv>.<Vetype>[%w1, %2] }, %3/z, %4"
533)
534
535(define_insn "aarch64_sme_ldr0"
536 [(set (reg:VNx16QI ZA_REGNUM)
537 (unspec:VNx16QI
538 [(reg:VNx16QI ZA_REGNUM)
539 (reg:DI SME_STATE_REGNUM)
540 (match_operand:SI 0 "register_operand" "Ucj")
541 (mem:VNx16QI (match_operand 1 "pmode_register_operand" "rk"))]
542 UNSPEC_SME_LDR))]
543 "TARGET_SME"
544 "ldr\tza[%w0, 0], [%1, #0, mul vl]"
545)
546
547(define_insn "@aarch64_sme_ldrn<mode>"
548 [(set (reg:VNx16QI ZA_REGNUM)
549 (unspec:VNx16QI
550 [(reg:VNx16QI ZA_REGNUM)
551 (reg:DI SME_STATE_REGNUM)
552 (plus:SI (match_operand:SI 0 "register_operand" "Ucj")
553 (match_operand:SI 1 "const_int_operand"))
554 (mem:VNx16QI
555 (plus:P (match_operand:P 2 "register_operand" "rk")
556 (match_operand:P 3 "aarch64_mov_operand")))]
557 UNSPEC_SME_LDR))]
558 "TARGET_SME
559 && aarch64_sme_ldr_vnum_offset_p (operands[1], operands[3])"
560 "ldr\tza[%w0, %1], [%2, #%1, mul vl]"
561)
562
8d29b7ac
RS
563;; -------------------------------------------------------------------------
564;; ---- Table loads
565;; -------------------------------------------------------------------------
566;; Includes:
567;; - LDR
568;; -------------------------------------------------------------------------
569
570(define_c_enum "unspec" [
571 UNSPEC_RESTORE_ZT0
572])
573
574(define_insn "aarch64_sme_ldr_zt0"
575 [(set (reg:V8DI ZT0_REGNUM)
576 (match_operand:V8DI 0 "aarch64_sync_memory_operand" "Q"))
577 (use (reg:DI SME_STATE_REGNUM))]
578 "TARGET_SME2"
579 "ldr\tzt0, %0"
580)
581
582;; This version is used after calls to private-ZA functions. Since ZT0_REGNUM
583;; represents the current function's state, it isn't clobbered by private-ZA
584;; functions, so we need to make it depend on the ZA reinitialization code.
585(define_insn "aarch64_restore_zt0"
586 [(set (reg:V8DI ZT0_REGNUM)
587 (unspec:V8DI
588 [(reg:DI SME_STATE_REGNUM)
589 (match_operand:V8DI 0 "aarch64_sync_memory_operand" "Q")]
590 UNSPEC_RESTORE_ZT0))]
591 "TARGET_SME2"
592 "ldr\tzt0, %0"
593)
594
4f6ab953
RS
595;; -------------------------------------------------------------------------
596;; ---- Single-vector stores
597;; -------------------------------------------------------------------------
598;; Includes:
599;; - ST1
600;; - STR
601;; -------------------------------------------------------------------------
602
603(define_c_enum "unspec" [
604 UNSPEC_SME_STR
605])
606
607(define_insn "@aarch64_sme_<optab><mode>"
608 [(set (match_operand:SME_ZA_I 0 "aarch64_sve_ldff1_operand" "+Utf")
609 (unspec:SME_ZA_I
610 [(reg:SME_ZA_I ZA_REGNUM)
611 (reg:DI SME_STATE_REGNUM)
612 (match_dup 0)
613 (match_operand:DI 1 "const_int_operand")
614 (match_operand:SI 2 "register_operand" "Ucj")
615 (match_operand:<VPRED> 3 "register_operand" "Upl")]
616 SME_ST1))]
617 "TARGET_STREAMING_SME"
618 "st1<Vesize>\t{ za%1<hv>.<Vetype>[%w2, 0] }, %3, %0"
619)
620
621(define_insn "@aarch64_sme_<optab><mode>_plus"
622 [(set (match_operand:SME_ZA_I 0 "aarch64_sve_ldff1_operand" "+Utf")
623 (unspec:SME_ZA_I
624 [(reg:SME_ZA_I ZA_REGNUM)
625 (reg:DI SME_STATE_REGNUM)
626 (match_dup 0)
627 (match_operand:DI 1 "const_int_operand")
628 (plus:SI (match_operand:SI 2 "register_operand" "Ucj")
629 (match_operand:SI 3 "const_int_operand"))
630 (match_operand:<VPRED> 4 "register_operand" "Upl")]
631 SME_ST1))]
632 "TARGET_STREAMING_SME
633 && UINTVAL (operands[3]) < 128 / <elem_bits>"
634 "st1<Vesize>\t{ za%1<hv>.<Vetype>[%w2, %3] }, %4, %0"
635)
636
637(define_insn "aarch64_sme_str0"
638 [(set (mem:VNx16QI (match_operand 1 "pmode_register_operand" "rk"))
639 (unspec:VNx16QI
640 [(reg:VNx16QI ZA_REGNUM)
641 (reg:DI SME_STATE_REGNUM)
642 (mem:VNx16QI (match_dup 1))
643 (match_operand:SI 0 "register_operand" "Ucj")]
644 UNSPEC_SME_STR))]
645 "TARGET_SME"
646 "str\tza[%w0, 0], [%1, #0, mul vl]"
647)
648
649(define_insn "@aarch64_sme_strn<mode>"
650 [(set (mem:VNx16QI
651 (plus:P (match_operand:P 2 "register_operand" "rk")
652 (match_operand:P 3 "aarch64_mov_operand")))
653 (unspec:VNx16QI
654 [(reg:VNx16QI ZA_REGNUM)
655 (reg:DI SME_STATE_REGNUM)
656 (mem:VNx16QI (plus:P (match_dup 2) (match_dup 3)))
657 (plus:SI (match_operand:SI 0 "register_operand" "Ucj")
658 (match_operand:SI 1 "const_int_operand"))]
659 UNSPEC_SME_STR))]
660 "TARGET_SME
661 && aarch64_sme_ldr_vnum_offset_p (operands[1], operands[3])"
662 "str\tza[%w0, %1], [%2, #%1, mul vl]"
663)
664
8d29b7ac
RS
665;; -------------------------------------------------------------------------
666;; ---- Table stores
667;; -------------------------------------------------------------------------
668;; Includes:
669;; - STR
670;; -------------------------------------------------------------------------
671
672(define_insn "aarch64_sme_str_zt0"
673 [(set (match_operand:V8DI 0 "aarch64_sync_memory_operand" "=Q")
674 (reg:V8DI ZT0_REGNUM))
675 (use (reg:DI SME_STATE_REGNUM))]
676 "TARGET_SME2"
677 "str\tzt0, %0"
678)
679
4f6ab953
RS
680;; -------------------------------------------------------------------------
681;; ---- Single-vector moves
682;; -------------------------------------------------------------------------
683;; Includes:
684;; - MOVA
685;; -------------------------------------------------------------------------
686
687(define_insn "@aarch64_sme_<optab><v_int_container><mode>"
688 [(set (match_operand:SVE_FULL 0 "register_operand" "=w")
689 (unspec:SVE_FULL
690 [(reg:<V_INT_CONTAINER> ZA_REGNUM)
691 (reg:DI SME_STATE_REGNUM)
692 (match_operand:SVE_FULL 1 "register_operand" "0")
693 (match_operand:<VPRED> 2 "register_operand" "Upl")
694 (match_operand:DI 3 "const_int_operand")
695 (match_operand:SI 4 "register_operand" "Ucj")]
696 SME_READ))]
697 "TARGET_STREAMING_SME"
698 "mova\t%0.<Vetype>, %2/m, za%3<hv>.<Vetype>[%w4, 0]"
699)
700
701(define_insn "*aarch64_sme_<optab><v_int_container><mode>_plus"
702 [(set (match_operand:SVE_FULL 0 "register_operand" "=w")
703 (unspec:SVE_FULL
704 [(reg:<V_INT_CONTAINER> ZA_REGNUM)
705 (reg:DI SME_STATE_REGNUM)
706 (match_operand:SVE_FULL 1 "register_operand" "0")
707 (match_operand:<VPRED> 2 "register_operand" "Upl")
708 (match_operand:DI 3 "const_int_operand")
709 (plus:SI (match_operand:SI 4 "register_operand" "Ucj")
710 (match_operand:SI 5 "const_int_operand"))]
711 SME_READ))]
712 "TARGET_STREAMING_SME
713 && UINTVAL (operands[5]) < 128 / <elem_bits>"
714 "mova\t%0.<Vetype>, %2/m, za%3<hv>.<Vetype>[%w4, %5]"
715)
716
717(define_insn "@aarch64_sme_<optab><VNx1TI_ONLY:mode><SVE_FULL:mode>"
718 [(set (match_operand:SVE_FULL 0 "register_operand" "=w")
719 (unspec:SVE_FULL
720 [(reg:VNx1TI_ONLY ZA_REGNUM)
721 (reg:DI SME_STATE_REGNUM)
722 (match_operand:SVE_FULL 1 "register_operand" "0")
723 (match_operand:VNx2BI 2 "register_operand" "Upl")
724 (match_operand:DI 3 "const_int_operand")
725 (match_operand:SI 4 "register_operand" "Ucj")]
726 SME_READ))]
727 "TARGET_STREAMING_SME"
728 "mova\t%0.q, %2/m, za%3<hv>.q[%w4, 0]"
729)
730
731(define_insn "@aarch64_sme_<optab><v_int_container><mode>"
732 [(set (reg:<V_INT_CONTAINER> ZA_REGNUM)
733 (unspec:<V_INT_CONTAINER>
734 [(reg:SVE_FULL ZA_REGNUM)
735 (reg:DI SME_STATE_REGNUM)
736 (match_operand:DI 0 "const_int_operand")
737 (match_operand:SI 1 "register_operand" "Ucj")
738 (match_operand:<VPRED> 2 "register_operand" "Upl")
739 (match_operand:SVE_FULL 3 "register_operand" "w")]
740 SME_WRITE))]
741 "TARGET_STREAMING_SME"
742 "mova\tza%0<hv>.<Vetype>[%w1, 0], %2/m, %3.<Vetype>"
743)
744
745(define_insn "*aarch64_sme_<optab><v_int_container><mode>_plus"
746 [(set (reg:<V_INT_CONTAINER> ZA_REGNUM)
747 (unspec:<V_INT_CONTAINER>
748 [(reg:SVE_FULL ZA_REGNUM)
749 (reg:DI SME_STATE_REGNUM)
750 (match_operand:DI 0 "const_int_operand")
751 (plus:SI (match_operand:SI 1 "register_operand" "Ucj")
752 (match_operand:SI 2 "const_int_operand"))
753 (match_operand:<VPRED> 3 "register_operand" "Upl")
754 (match_operand:SVE_FULL 4 "register_operand" "w")]
755 SME_WRITE))]
756 "TARGET_STREAMING_SME
757 && UINTVAL (operands[2]) < 128 / <elem_bits>"
758 "mova\tza%0<hv>.<Vetype>[%w1, %2], %3/m, %4.<Vetype>"
759)
760
761(define_insn "@aarch64_sme_<optab><VNx1TI_ONLY:mode><SVE_FULL:mode>"
762 [(set (reg:VNx1TI_ONLY ZA_REGNUM)
763 (unspec:VNx1TI_ONLY
764 [(reg:VNx1TI_ONLY ZA_REGNUM)
765 (reg:DI SME_STATE_REGNUM)
766 (match_operand:DI 0 "const_int_operand")
767 (match_operand:SI 1 "register_operand" "Ucj")
768 (match_operand:VNx2BI 2 "register_operand" "Upl")
769 (match_operand:SVE_FULL 3 "register_operand" "w")]
770 SME_WRITE))]
771 "TARGET_STREAMING_SME"
772 "mova\tza%0<hv>.q[%w1, 0], %2/m, %3.q"
773)
774
775;; -------------------------------------------------------------------------
776;; ---- Zeroing
777;; -------------------------------------------------------------------------
778;; Includes:
779;; - ZERO
780;; -------------------------------------------------------------------------
781
782(define_c_enum "unspec" [UNSPEC_SME_ZERO])
783
784(define_insn "aarch64_sme_zero_za"
785 [(set (reg:VNx16QI ZA_REGNUM)
786 (unspec:VNx16QI [(reg:VNx16QI ZA_REGNUM)
787 (reg:DI SME_STATE_REGNUM)
788 (match_operand:DI 0 "const_int_operand")]
789 UNSPEC_SME_ZERO))]
790 "TARGET_SME"
791 {
792 return aarch64_output_sme_zero_za (operands[0]);
793 }
794)
795
796;; =========================================================================
797;; == Binary arithmetic
798;; =========================================================================
799
800;; -------------------------------------------------------------------------
801;; ---- Binary arithmetic on ZA tile
802;; -------------------------------------------------------------------------
803;; Includes:
804;; - ADDHA
805;; - ADDVA
806;; -------------------------------------------------------------------------
807
808(define_insn "@aarch64_sme_<optab><mode>"
809 [(set (reg:SME_ZA_SDI ZA_REGNUM)
810 (unspec:SME_ZA_SDI
811 [(reg:SME_ZA_SDI ZA_REGNUM)
812 (reg:DI SME_STATE_REGNUM)
813 (match_operand:DI 0 "const_int_operand")
814 (match_operand:<VPRED> 1 "register_operand" "Upl")
815 (match_operand:<VPRED> 2 "register_operand" "Upl")
816 (match_operand:SME_ZA_SDI 3 "register_operand" "w")]
817 SME_BINARY_SDI))]
818 "TARGET_STREAMING_SME"
819 "<optab>\tza%0.<Vetype>, %1/m, %2/m, %3.<Vetype>"
820)
821
822;; =========================================================================
823;; == Ternary arithmetic
824;; =========================================================================
825
826;; -------------------------------------------------------------------------
827;; ---- [INT] Sum of outer products
828;; -------------------------------------------------------------------------
829;; Includes:
830;; - SMOPA
831;; - SMOPS
832;; - SUMOPA
833;; - SUMOPS
834;; - UMOPA
835;; - UMOPS
836;; - USMOPA
837;; - USMOPS
838;; -------------------------------------------------------------------------
839
840(define_insn "@aarch64_sme_<optab><VNx4SI_ONLY:mode><VNx16QI_ONLY:mode>"
841 [(set (reg:VNx4SI_ONLY ZA_REGNUM)
842 (unspec:VNx4SI_ONLY
843 [(reg:VNx4SI_ONLY ZA_REGNUM)
844 (reg:DI SME_STATE_REGNUM)
845 (match_operand:DI 0 "const_int_operand")
846 (match_operand:<VNx4SI_ONLY:VPRED> 1 "register_operand" "Upl")
847 (match_operand:<VNx4SI_ONLY:VPRED> 2 "register_operand" "Upl")
848 (match_operand:VNx16QI_ONLY 3 "register_operand" "w")
849 (match_operand:VNx16QI_ONLY 4 "register_operand" "w")]
850 SME_INT_MOP))]
851 "TARGET_STREAMING_SME"
852 "<optab>\tza%0.s, %1/m, %2/m, %3.b, %4.b"
853)
854
855(define_insn "@aarch64_sme_<optab><VNx2DI_ONLY:mode><VNx8HI_ONLY:mode>"
856 [(set (reg:VNx2DI_ONLY ZA_REGNUM)
857 (unspec:VNx2DI_ONLY
858 [(reg:VNx2DI_ONLY ZA_REGNUM)
859 (reg:DI SME_STATE_REGNUM)
860 (match_operand:DI 0 "const_int_operand")
861 (match_operand:<VNx2DI_ONLY:VPRED> 1 "register_operand" "Upl")
862 (match_operand:<VNx2DI_ONLY:VPRED> 2 "register_operand" "Upl")
863 (match_operand:VNx8HI_ONLY 3 "register_operand" "w")
864 (match_operand:VNx8HI_ONLY 4 "register_operand" "w")]
865 SME_INT_MOP))]
866 "TARGET_STREAMING_SME && TARGET_SME_I16I64"
867 "<optab>\tza%0.d, %1/m, %2/m, %3.h, %4.h"
868)
869
870;; -------------------------------------------------------------------------
871;; ---- [FP] Sum of outer products
872;; -------------------------------------------------------------------------
873;; Includes:
874;; - BFMOPA
875;; - BFMOPS
876;; - FMOPA
877;; - FMOPS
878;; -------------------------------------------------------------------------
879
880(define_insn "@aarch64_sme_<optab><SME_ZA_SDF_I:mode><SME_MOP_HSDF:mode>"
881 [(set (reg:SME_ZA_SDF_I ZA_REGNUM)
882 (unspec:SME_ZA_SDF_I
883 [(reg:SME_ZA_SDF_I ZA_REGNUM)
884 (reg:DI SME_STATE_REGNUM)
885 (match_operand:DI 0 "const_int_operand")
886 (match_operand:<SME_ZA_SDF_I:VPRED> 1 "register_operand" "Upl")
887 (match_operand:<SME_ZA_SDF_I:VPRED> 2 "register_operand" "Upl")
888 (match_operand:SME_MOP_HSDF 3 "register_operand" "w")
889 (match_operand:SME_MOP_HSDF 4 "register_operand" "w")]
890 SME_FP_MOP))]
891 "TARGET_STREAMING_SME
892 && (<SME_ZA_SDF_I:elem_bits> == 32) == (<SME_MOP_HSDF:elem_bits> <= 32)"
893 "<b><optab>\tza%0.<SME_ZA_SDF_I:Vetype>, %1/m, %2/m, %3.<SME_MOP_HSDF:Vetype>, %4.<SME_MOP_HSDF:Vetype>"
894)