]>
Commit | Line | Data |
---|---|---|
dd8090f4 | 1 | ;; Machine description for AArch64 SME. |
a945c346 | 2 | ;; Copyright (C) 2023-2024 Free Software Foundation, Inc. |
dd8090f4 RS |
3 | ;; |
4 | ;; This file is part of GCC. | |
5 | ;; | |
6 | ;; GCC is free software; you can redistribute it and/or modify it | |
7 | ;; under the terms of the GNU General Public License as published by | |
8 | ;; the Free Software Foundation; either version 3, or (at your option) | |
9 | ;; any later version. | |
10 | ;; | |
11 | ;; GCC is distributed in the hope that it will be useful, but | |
12 | ;; WITHOUT ANY WARRANTY; without even the implied warranty of | |
13 | ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
14 | ;; General Public License for more details. | |
15 | ;; | |
16 | ;; You should have received a copy of the GNU General Public License | |
17 | ;; along with GCC; see the file COPYING3. If not see | |
18 | ;; <http://www.gnu.org/licenses/>. | |
19 | ||
20 | ;; The file is organised into the following sections (search for the full | |
21 | ;; line): | |
22 | ;; | |
23 | ;; == State management | |
24 | ;; ---- Test current state | |
25 | ;; ---- PSTATE.SM management | |
3af9ceb6 | 26 | ;; ---- PSTATE.ZA management |
4f6ab953 RS |
27 | ;; |
28 | ;; == Loads, stores and moves | |
29 | ;; ---- Single-vector loads | |
8d29b7ac | 30 | ;; ---- Table loads |
4f6ab953 | 31 | ;; ---- Single-vector stores |
8d29b7ac | 32 | ;; ---- Table stores |
4f6ab953 | 33 | ;; ---- Single-vector moves |
c1c267df | 34 | ;; ---- Multi-vector moves |
4f6ab953 RS |
35 | ;; ---- Zeroing |
36 | ;; | |
37 | ;; == Binary arithmetic | |
38 | ;; ---- Binary arithmetic on ZA tile | |
c1c267df RS |
39 | ;; ---- Binary arithmetic on ZA slice |
40 | ;; ---- Binary arithmetic, writing to ZA slice | |
4f6ab953 RS |
41 | ;; |
42 | ;; == Ternary arithmetic | |
c1c267df RS |
43 | ;; ---- [INT] Dot product |
44 | ;; ---- [INT] Ternary widening arithmetic on ZA slice | |
4f6ab953 | 45 | ;; ---- [INT] Sum of outer products |
c1c267df RS |
46 | ;; ---- [FP] Dot product |
47 | ;; ---- [FP] Ternary arithmetic on ZA slice | |
48 | ;; ---- [FP] Ternary widening arithmetic on ZA slice | |
4f6ab953 | 49 | ;; ---- [FP] Sum of outer products |
c1c267df RS |
50 | ;; |
51 | ;; == Table lookup | |
52 | ;; ---- Table lookup | |
dd8090f4 RS |
53 | |
54 | ;; ========================================================================= | |
55 | ;; == State management | |
56 | ;; ========================================================================= | |
57 | ;; | |
58 | ;; Many of the instructions in this section are only valid when SME is | |
59 | ;; present. However, they don't have a TARGET_SME condition since | |
60 | ;; (a) they are only emitted under direct control of aarch64 code and | |
61 | ;; (b) they are sometimes used conditionally, particularly in streaming- | |
62 | ;; compatible code. | |
63 | ;; | |
64 | ;; ========================================================================= | |
65 | ||
66 | ;; ------------------------------------------------------------------------- | |
67 | ;; ---- Test current state | |
68 | ;; ------------------------------------------------------------------------- | |
69 | ||
70 | (define_c_enum "unspec" [ | |
71 | UNSPEC_OLD_VG_SAVED | |
72 | UNSPEC_UPDATE_VG | |
73 | UNSPEC_GET_SME_STATE | |
74 | UNSPEC_READ_SVCR | |
75 | ]) | |
76 | ||
77 | ;; A marker instruction to say that the old value of the DWARF VG register | |
78 | ;; has been saved to the stack, for CFI purposes. Operand 0 is the old | |
79 | ;; value of the register and operand 1 is the save slot. | |
80 | (define_insn "aarch64_old_vg_saved" | |
81 | [(set (reg:DI VG_REGNUM) | |
82 | (unspec:DI [(match_operand 0) | |
83 | (match_operand 1)] UNSPEC_OLD_VG_SAVED))] | |
84 | "" | |
85 | "" | |
86 | [(set_attr "type" "no_insn")] | |
87 | ) | |
88 | ||
89 | ;; A marker to indicate places where a call temporarily changes VG. | |
90 | (define_insn "aarch64_update_vg" | |
91 | [(set (reg:DI VG_REGNUM) | |
92 | (unspec:DI [(reg:DI VG_REGNUM)] UNSPEC_UPDATE_VG))] | |
93 | "" | |
94 | "" | |
95 | [(set_attr "type" "no_insn")] | |
96 | ) | |
97 | ||
98 | (define_insn "aarch64_get_sme_state" | |
99 | [(set (reg:TI R0_REGNUM) | |
100 | (unspec_volatile:TI [(const_int 0)] UNSPEC_GET_SME_STATE)) | |
101 | (clobber (reg:DI R16_REGNUM)) | |
102 | (clobber (reg:DI R17_REGNUM)) | |
103 | (clobber (reg:DI R18_REGNUM)) | |
104 | (clobber (reg:DI R30_REGNUM)) | |
105 | (clobber (reg:CC CC_REGNUM))] | |
106 | "" | |
107 | "bl\t__arm_sme_state" | |
108 | ) | |
109 | ||
110 | (define_insn "aarch64_read_svcr" | |
111 | [(set (match_operand:DI 0 "register_operand" "=r") | |
112 | (unspec_volatile:DI [(const_int 0)] UNSPEC_READ_SVCR))] | |
113 | "" | |
114 | "mrs\t%0, svcr" | |
115 | ) | |
116 | ||
117 | ;; ------------------------------------------------------------------------- | |
118 | ;; ---- PSTATE.SM management | |
119 | ;; ------------------------------------------------------------------------- | |
120 | ;; Includes: | |
121 | ;; - SMSTART SM | |
122 | ;; - SMSTOP SM | |
123 | ;; ------------------------------------------------------------------------- | |
124 | ||
125 | (define_c_enum "unspec" [ | |
126 | UNSPEC_SMSTART_SM | |
127 | UNSPEC_SMSTOP_SM | |
128 | ]) | |
129 | ||
130 | ;; Turn on streaming mode. This clobbers all SVE state. | |
131 | ;; | |
132 | ;; Depend on VG_REGNUM to ensure that the VG save slot has already been | |
133 | ;; initialized. | |
134 | (define_insn "aarch64_smstart_sm" | |
135 | [(unspec_volatile [(const_int 0)] UNSPEC_SMSTART_SM) | |
136 | (use (reg:DI VG_REGNUM)) | |
137 | (clobber (reg:V4x16QI V0_REGNUM)) | |
138 | (clobber (reg:V4x16QI V4_REGNUM)) | |
139 | (clobber (reg:V4x16QI V8_REGNUM)) | |
140 | (clobber (reg:V4x16QI V12_REGNUM)) | |
141 | (clobber (reg:V4x16QI V16_REGNUM)) | |
142 | (clobber (reg:V4x16QI V20_REGNUM)) | |
143 | (clobber (reg:V4x16QI V24_REGNUM)) | |
144 | (clobber (reg:V4x16QI V28_REGNUM)) | |
145 | (clobber (reg:VNx16BI P0_REGNUM)) | |
146 | (clobber (reg:VNx16BI P1_REGNUM)) | |
147 | (clobber (reg:VNx16BI P2_REGNUM)) | |
148 | (clobber (reg:VNx16BI P3_REGNUM)) | |
149 | (clobber (reg:VNx16BI P4_REGNUM)) | |
150 | (clobber (reg:VNx16BI P5_REGNUM)) | |
151 | (clobber (reg:VNx16BI P6_REGNUM)) | |
152 | (clobber (reg:VNx16BI P7_REGNUM)) | |
153 | (clobber (reg:VNx16BI P8_REGNUM)) | |
154 | (clobber (reg:VNx16BI P9_REGNUM)) | |
155 | (clobber (reg:VNx16BI P10_REGNUM)) | |
156 | (clobber (reg:VNx16BI P11_REGNUM)) | |
157 | (clobber (reg:VNx16BI P12_REGNUM)) | |
158 | (clobber (reg:VNx16BI P13_REGNUM)) | |
159 | (clobber (reg:VNx16BI P14_REGNUM)) | |
160 | (clobber (reg:VNx16BI P15_REGNUM))] | |
161 | "" | |
162 | "smstart\tsm" | |
163 | ) | |
164 | ||
165 | ;; Turn off streaming mode. This clobbers all SVE state. | |
166 | ;; | |
167 | ;; Depend on VG_REGNUM to ensure that the VG save slot has already been | |
168 | ;; initialized. | |
169 | (define_insn "aarch64_smstop_sm" | |
170 | [(unspec_volatile [(const_int 0)] UNSPEC_SMSTOP_SM) | |
171 | (use (reg:DI VG_REGNUM)) | |
172 | (clobber (reg:V4x16QI V0_REGNUM)) | |
173 | (clobber (reg:V4x16QI V4_REGNUM)) | |
174 | (clobber (reg:V4x16QI V8_REGNUM)) | |
175 | (clobber (reg:V4x16QI V12_REGNUM)) | |
176 | (clobber (reg:V4x16QI V16_REGNUM)) | |
177 | (clobber (reg:V4x16QI V20_REGNUM)) | |
178 | (clobber (reg:V4x16QI V24_REGNUM)) | |
179 | (clobber (reg:V4x16QI V28_REGNUM)) | |
180 | (clobber (reg:VNx16BI P0_REGNUM)) | |
181 | (clobber (reg:VNx16BI P1_REGNUM)) | |
182 | (clobber (reg:VNx16BI P2_REGNUM)) | |
183 | (clobber (reg:VNx16BI P3_REGNUM)) | |
184 | (clobber (reg:VNx16BI P4_REGNUM)) | |
185 | (clobber (reg:VNx16BI P5_REGNUM)) | |
186 | (clobber (reg:VNx16BI P6_REGNUM)) | |
187 | (clobber (reg:VNx16BI P7_REGNUM)) | |
188 | (clobber (reg:VNx16BI P8_REGNUM)) | |
189 | (clobber (reg:VNx16BI P9_REGNUM)) | |
190 | (clobber (reg:VNx16BI P10_REGNUM)) | |
191 | (clobber (reg:VNx16BI P11_REGNUM)) | |
192 | (clobber (reg:VNx16BI P12_REGNUM)) | |
193 | (clobber (reg:VNx16BI P13_REGNUM)) | |
194 | (clobber (reg:VNx16BI P14_REGNUM)) | |
195 | (clobber (reg:VNx16BI P15_REGNUM))] | |
196 | "" | |
197 | "smstop\tsm" | |
198 | ) | |
3af9ceb6 RS |
199 | |
200 | ;; ------------------------------------------------------------------------- | |
201 | ;; ---- PSTATE.ZA management | |
202 | ;; ------------------------------------------------------------------------- | |
203 | ;; Includes: | |
204 | ;; - SMSTART ZA | |
205 | ;; - SMSTOP ZA | |
206 | ;; plus calls to support routines. | |
207 | ;; ------------------------------------------------------------------------- | |
208 | ||
209 | (define_c_enum "unspec" [ | |
210 | UNSPEC_SMSTOP_ZA | |
211 | UNSPEC_INITIAL_ZERO_ZA | |
212 | UNSPEC_TPIDR2_SAVE | |
213 | UNSPEC_TPIDR2_RESTORE | |
214 | UNSPEC_READ_TPIDR2 | |
215 | UNSPEC_WRITE_TPIDR2 | |
216 | UNSPEC_SETUP_LOCAL_TPIDR2 | |
217 | UNSPEC_RESTORE_ZA | |
218 | UNSPEC_START_PRIVATE_ZA_CALL | |
219 | UNSPEC_END_PRIVATE_ZA_CALL | |
220 | UNSPEC_COMMIT_LAZY_SAVE | |
221 | ]) | |
222 | ||
223 | (define_c_enum "unspecv" [ | |
224 | UNSPECV_ASM_UPDATE_ZA | |
8d29b7ac | 225 | UNSPECV_ASM_UPDATE_ZT0 |
3af9ceb6 RS |
226 | ]) |
227 | ||
228 | ;; Use the ABI-defined routine to commit an uncommitted lazy save. | |
229 | ;; This relies on the current PSTATE.ZA, so depends on SME_STATE_REGNUM. | |
230 | ;; The fake TPIDR2_SETUP_REGNUM register initially holds the incoming | |
231 | ;; value of the architected TPIDR2_EL0. | |
232 | (define_insn "aarch64_tpidr2_save" | |
233 | [(set (reg:DI ZA_FREE_REGNUM) | |
234 | (unspec:DI [(reg:DI SME_STATE_REGNUM) | |
235 | (reg:DI TPIDR2_SETUP_REGNUM)] UNSPEC_TPIDR2_SAVE)) | |
236 | (clobber (reg:DI R14_REGNUM)) | |
237 | (clobber (reg:DI R15_REGNUM)) | |
238 | (clobber (reg:DI R16_REGNUM)) | |
239 | (clobber (reg:DI R17_REGNUM)) | |
240 | (clobber (reg:DI R18_REGNUM)) | |
241 | (clobber (reg:DI R30_REGNUM)) | |
242 | (clobber (reg:CC CC_REGNUM))] | |
243 | "" | |
244 | "bl\t__arm_tpidr2_save" | |
245 | ) | |
246 | ||
247 | ;; Set PSTATE.ZA to 1. If ZA was previously dormant or active, | |
248 | ;; it remains in the same state afterwards, with the same contents. | |
249 | ;; Otherwise, it goes from off to on with zeroed contents. | |
250 | ;; | |
251 | ;; Later writes of TPIDR2_EL0 to a nonzero value must not be moved | |
252 | ;; up past this instruction, since that could create an invalid | |
253 | ;; combination of having an active lazy save while ZA is off. | |
254 | ;; Create an anti-dependence by reading the current contents | |
255 | ;; of TPIDR2_SETUP_REGNUM. | |
256 | ;; | |
257 | ;; Making this depend on ZA_FREE_REGNUM ensures that contents belonging | |
258 | ;; to the caller have already been saved. That isn't necessary for this | |
259 | ;; instruction itself, since PSTATE.ZA is already 1 if it contains data. | |
260 | ;; But doing this here means that other uses of ZA can just depend on | |
261 | ;; SME_STATE_REGNUM, rather than both SME_STATE_REGNUM and ZA_FREE_REGNUM. | |
262 | (define_insn "aarch64_smstart_za" | |
263 | [(set (reg:DI SME_STATE_REGNUM) | |
264 | (const_int 1)) | |
265 | (use (reg:DI TPIDR2_SETUP_REGNUM)) | |
266 | (use (reg:DI ZA_FREE_REGNUM))] | |
267 | "" | |
268 | "smstart\tza" | |
269 | ) | |
270 | ||
271 | ;; Disable ZA and discard its current contents. | |
272 | ;; | |
273 | ;; The ABI says that the ZA save buffer must be null whenever PSTATE.ZA | |
274 | ;; is zero, so earlier writes to TPIDR2_EL0 must not be moved down past | |
275 | ;; this instruction. Depend on TPIDR2_SETUP_REGNUM to ensure this. | |
276 | ;; | |
277 | ;; We can only turn ZA off once we know that it is free (i.e. doesn't | |
278 | ;; contain data belonging to the caller). Depend on ZA_FREE_REGNUM | |
279 | ;; to ensure this. | |
280 | ;; | |
281 | ;; We only turn ZA off when the current function's ZA state is dead, | |
282 | ;; or perhaps if we're sure that the contents are saved. Either way, | |
283 | ;; we know whether ZA is saved or not. | |
284 | (define_insn "aarch64_smstop_za" | |
285 | [(set (reg:DI SME_STATE_REGNUM) | |
286 | (const_int 0)) | |
287 | (set (reg:DI ZA_SAVED_REGNUM) | |
288 | (unspec:DI [(reg:DI TPIDR2_SETUP_REGNUM) | |
289 | (reg:DI ZA_FREE_REGNUM)] UNSPEC_SMSTOP_ZA))] | |
290 | "" | |
291 | "smstop\tza" | |
292 | ) | |
293 | ||
294 | ;; Zero ZA after committing a lazy save. The sequencing is enforced | |
295 | ;; by reading ZA_FREE_REGNUM. | |
296 | (define_insn "aarch64_initial_zero_za" | |
297 | [(set (reg:DI ZA_REGNUM) | |
298 | (unspec:DI [(reg:DI SME_STATE_REGNUM) | |
299 | (reg:DI ZA_FREE_REGNUM)] UNSPEC_INITIAL_ZERO_ZA))] | |
300 | "" | |
301 | "zero\t{ za }" | |
302 | ) | |
303 | ||
304 | ;; Initialize the abstract TPIDR2_BLOCK_REGNUM from the contents of | |
305 | ;; the current function's TPIDR2 block. Other instructions can then | |
306 | ;; depend on TPIDR2_BLOCK_REGNUM rather than on the memory block. | |
307 | (define_insn "aarch64_setup_local_tpidr2" | |
308 | [(set (reg:DI TPIDR2_BLOCK_REGNUM) | |
309 | (unspec:DI [(match_operand:V16QI 0 "memory_operand" "m")] | |
310 | UNSPEC_SETUP_LOCAL_TPIDR2))] | |
311 | "" | |
312 | "" | |
313 | [(set_attr "type" "no_insn")] | |
314 | ) | |
315 | ||
316 | ;; Clear TPIDR2_EL0, cancelling any uncommitted lazy save. | |
317 | (define_insn "aarch64_clear_tpidr2" | |
318 | [(set (reg:DI TPIDR2_SETUP_REGNUM) | |
319 | (const_int 0))] | |
320 | "" | |
321 | "msr\ttpidr2_el0, xzr" | |
322 | ) | |
323 | ||
324 | ;; Point TPIDR2_EL0 to the current function's TPIDR2 block, whose address | |
325 | ;; is given by operand 0. TPIDR2_BLOCK_REGNUM represents the contents of the | |
326 | ;; pointed-to block. | |
327 | (define_insn "aarch64_write_tpidr2" | |
328 | [(set (reg:DI TPIDR2_SETUP_REGNUM) | |
329 | (unspec:DI [(match_operand 0 "pmode_register_operand" "r") | |
330 | (reg:DI TPIDR2_BLOCK_REGNUM)] UNSPEC_WRITE_TPIDR2))] | |
331 | "" | |
332 | "msr\ttpidr2_el0, %0" | |
333 | ) | |
334 | ||
335 | ;; Check whether ZA has been saved. The system depends on the value that | |
336 | ;; we wrote to TPIDR2_EL0 previously, so it depends on TPDIR2_SETUP_REGNUM. | |
337 | (define_insn "aarch64_read_tpidr2" | |
338 | [(set (match_operand:DI 0 "register_operand" "=r") | |
339 | (unspec:DI [(reg:DI TPIDR2_SETUP_REGNUM) | |
340 | (reg:DI ZA_SAVED_REGNUM)] UNSPEC_READ_TPIDR2))] | |
341 | "" | |
342 | "mrs\t%0, tpidr2_el0" | |
343 | ) | |
344 | ||
345 | ;; Use the ABI-defined routine to restore lazy-saved ZA contents | |
346 | ;; from the TPIDR2 block pointed to by X0. ZA must already be active. | |
347 | (define_insn "aarch64_tpidr2_restore" | |
348 | [(set (reg:DI ZA_SAVED_REGNUM) | |
349 | (unspec:DI [(reg:DI R0_REGNUM)] UNSPEC_TPIDR2_RESTORE)) | |
350 | (set (reg:DI SME_STATE_REGNUM) | |
351 | (unspec:DI [(reg:DI SME_STATE_REGNUM)] UNSPEC_TPIDR2_RESTORE)) | |
352 | (clobber (reg:DI R14_REGNUM)) | |
353 | (clobber (reg:DI R15_REGNUM)) | |
354 | (clobber (reg:DI R16_REGNUM)) | |
355 | (clobber (reg:DI R17_REGNUM)) | |
356 | (clobber (reg:DI R18_REGNUM)) | |
357 | (clobber (reg:DI R30_REGNUM)) | |
358 | (clobber (reg:CC CC_REGNUM))] | |
359 | "" | |
360 | "bl\t__arm_tpidr2_restore" | |
361 | ) | |
362 | ||
363 | ;; Check whether a lazy save set up by aarch64_save_za was committed | |
364 | ;; and restore the saved contents if so. | |
365 | ;; | |
366 | ;; Operand 0 is the address of the current function's TPIDR2 block. | |
367 | (define_insn_and_split "aarch64_restore_za" | |
368 | [(set (reg:DI ZA_SAVED_REGNUM) | |
369 | (unspec:DI [(match_operand 0 "pmode_register_operand" "r") | |
370 | (reg:DI SME_STATE_REGNUM) | |
371 | (reg:DI TPIDR2_SETUP_REGNUM) | |
372 | (reg:DI ZA_SAVED_REGNUM)] UNSPEC_RESTORE_ZA)) | |
373 | (clobber (reg:DI R0_REGNUM)) | |
374 | (clobber (reg:DI R14_REGNUM)) | |
375 | (clobber (reg:DI R15_REGNUM)) | |
376 | (clobber (reg:DI R16_REGNUM)) | |
377 | (clobber (reg:DI R17_REGNUM)) | |
378 | (clobber (reg:DI R18_REGNUM)) | |
379 | (clobber (reg:DI R30_REGNUM)) | |
380 | (clobber (reg:CC CC_REGNUM))] | |
381 | "" | |
382 | "#" | |
383 | "&& epilogue_completed" | |
384 | [(const_int 0)] | |
385 | { | |
386 | auto label = gen_label_rtx (); | |
387 | auto tpidr2 = gen_rtx_REG (DImode, R16_REGNUM); | |
388 | emit_insn (gen_aarch64_read_tpidr2 (tpidr2)); | |
389 | auto jump = emit_likely_jump_insn (gen_aarch64_cbnedi1 (tpidr2, label)); | |
390 | JUMP_LABEL (jump) = label; | |
391 | ||
392 | aarch64_restore_za (operands[0]); | |
393 | emit_label (label); | |
394 | DONE; | |
395 | } | |
396 | ) | |
397 | ||
398 | ;; This instruction is emitted after asms that alter ZA, in order to model | |
399 | ;; the effect on dataflow. The asm itself can't have ZA as an input or | |
400 | ;; an output, since there is no associated data type. Instead it retains | |
401 | ;; the original "za" clobber, which on its own would indicate that ZA | |
402 | ;; is dead. | |
403 | ;; | |
404 | ;; The operand is a unique identifier. | |
405 | (define_insn "aarch64_asm_update_za" | |
406 | [(set (reg:VNx16QI ZA_REGNUM) | |
407 | (unspec_volatile:VNx16QI | |
408 | [(reg:VNx16QI ZA_REGNUM) | |
409 | (reg:DI SME_STATE_REGNUM) | |
410 | (match_operand 0 "const_int_operand")] | |
411 | UNSPECV_ASM_UPDATE_ZA))] | |
412 | "" | |
413 | "" | |
414 | [(set_attr "type" "no_insn")] | |
415 | ) | |
416 | ||
8d29b7ac RS |
417 | ;; A similar pattern for ZT0. |
418 | (define_insn "aarch64_asm_update_zt0" | |
419 | [(set (reg:V8DI ZT0_REGNUM) | |
420 | (unspec_volatile:V8DI | |
421 | [(reg:V8DI ZT0_REGNUM) | |
422 | (reg:DI SME_STATE_REGNUM) | |
423 | (match_operand 0 "const_int_operand")] | |
424 | UNSPECV_ASM_UPDATE_ZT0))] | |
425 | "" | |
426 | "" | |
427 | [(set_attr "type" "no_insn")] | |
428 | ) | |
429 | ||
3af9ceb6 RS |
430 | ;; This pseudo-instruction is emitted as part of a call to a private-ZA |
431 | ;; function from a function with ZA state. It marks a natural place to set | |
432 | ;; up a lazy save, if that turns out to be necessary. The save itself | |
433 | ;; is managed by the mode-switching pass. | |
434 | (define_insn "aarch64_start_private_za_call" | |
435 | [(set (reg:DI LOWERING_REGNUM) | |
436 | (unspec:DI [(reg:DI LOWERING_REGNUM)] UNSPEC_START_PRIVATE_ZA_CALL))] | |
437 | "" | |
438 | "" | |
439 | [(set_attr "type" "no_insn")] | |
440 | ) | |
441 | ||
442 | ;; This pseudo-instruction is emitted as part of a call to a private-ZA | |
443 | ;; function from a function with ZA state. It marks a natural place to restore | |
444 | ;; the current function's ZA contents from the lazy save buffer, if that | |
445 | ;; turns out to be necessary. The save itself is managed by the | |
446 | ;; mode-switching pass. | |
447 | (define_insn "aarch64_end_private_za_call" | |
448 | [(set (reg:DI LOWERING_REGNUM) | |
449 | (unspec:DI [(reg:DI LOWERING_REGNUM)] UNSPEC_END_PRIVATE_ZA_CALL))] | |
450 | "" | |
451 | "" | |
452 | [(set_attr "type" "no_insn")] | |
453 | ) | |
454 | ||
455 | ;; This pseudo-instruction is emitted before a private-ZA function uses | |
456 | ;; PSTATE.ZA state for the first time. The instruction checks whether | |
457 | ;; ZA currently contains data belonging to a caller and commits the | |
458 | ;; lazy save if so. | |
459 | ;; | |
460 | ;; Operand 0 is the incoming value of TPIDR2_EL0. Operand 1 is nonzero | |
461 | ;; if ZA is live, and should therefore be zeroed after committing a save. | |
462 | ;; | |
463 | ;; The instruction is generated by the mode-switching pass. It is a | |
464 | ;; define_insn_and_split rather than a define_expand because of the | |
465 | ;; internal control flow. | |
466 | (define_insn_and_split "aarch64_commit_lazy_save" | |
467 | [(set (reg:DI ZA_FREE_REGNUM) | |
468 | (unspec:DI [(match_operand 0 "pmode_register_operand" "r") | |
469 | (match_operand 1 "const_int_operand") | |
470 | (reg:DI SME_STATE_REGNUM) | |
471 | (reg:DI TPIDR2_SETUP_REGNUM) | |
472 | (reg:VNx16QI ZA_REGNUM)] UNSPEC_COMMIT_LAZY_SAVE)) | |
473 | (set (reg:DI ZA_REGNUM) | |
474 | (unspec:DI [(reg:DI SME_STATE_REGNUM) | |
475 | (reg:DI ZA_FREE_REGNUM)] UNSPEC_INITIAL_ZERO_ZA)) | |
476 | (clobber (reg:DI R14_REGNUM)) | |
477 | (clobber (reg:DI R15_REGNUM)) | |
478 | (clobber (reg:DI R16_REGNUM)) | |
479 | (clobber (reg:DI R17_REGNUM)) | |
480 | (clobber (reg:DI R18_REGNUM)) | |
481 | (clobber (reg:DI R30_REGNUM)) | |
482 | (clobber (reg:CC CC_REGNUM))] | |
483 | "" | |
484 | "#" | |
485 | "true" | |
486 | [(const_int 0)] | |
487 | { | |
488 | auto label = gen_label_rtx (); | |
489 | auto jump = emit_jump_insn (gen_aarch64_cbeqdi1 (operands[0], label)); | |
490 | JUMP_LABEL (jump) = label; | |
491 | emit_insn (gen_aarch64_tpidr2_save ()); | |
492 | emit_insn (gen_aarch64_clear_tpidr2 ()); | |
493 | if (INTVAL (operands[1]) != 0) | |
494 | emit_insn (gen_aarch64_initial_zero_za ()); | |
495 | emit_label (label); | |
496 | DONE; | |
497 | } | |
498 | ) | |
4f6ab953 RS |
499 | |
500 | ;; ========================================================================= | |
501 | ;; == Loads, stores and moves | |
502 | ;; ========================================================================= | |
503 | ||
504 | ;; ------------------------------------------------------------------------- | |
505 | ;; ---- Single-vector loads | |
506 | ;; ------------------------------------------------------------------------- | |
507 | ;; Includes: | |
508 | ;; - LD1 | |
509 | ;; - LDR | |
510 | ;; ------------------------------------------------------------------------- | |
511 | ||
512 | (define_c_enum "unspec" [ | |
513 | UNSPEC_SME_LDR | |
514 | ]) | |
515 | ||
516 | (define_insn "@aarch64_sme_<optab><mode>" | |
517 | [(set (reg:SME_ZA_I ZA_REGNUM) | |
518 | (unspec:SME_ZA_I | |
519 | [(reg:SME_ZA_I ZA_REGNUM) | |
520 | (reg:DI SME_STATE_REGNUM) | |
521 | (match_operand:DI 0 "const_int_operand") | |
522 | (match_operand:SI 1 "register_operand" "Ucj") | |
523 | (match_operand:<VPRED> 2 "register_operand" "Upl") | |
524 | (match_operand:SME_ZA_I 3 "aarch64_sve_ldff1_operand" "Utf")] | |
525 | SME_LD1))] | |
526 | "TARGET_STREAMING_SME" | |
527 | "ld1<Vesize>\t{ za%0<hv>.<Vetype>[%w1, 0] }, %2/z, %3" | |
528 | ) | |
529 | ||
530 | (define_insn "@aarch64_sme_<optab><mode>_plus" | |
531 | [(set (reg:SME_ZA_I ZA_REGNUM) | |
532 | (unspec:SME_ZA_I | |
533 | [(reg:SME_ZA_I ZA_REGNUM) | |
534 | (reg:DI SME_STATE_REGNUM) | |
535 | (match_operand:DI 0 "const_int_operand") | |
536 | (plus:SI (match_operand:SI 1 "register_operand" "Ucj") | |
537 | (match_operand:SI 2 "const_int_operand")) | |
538 | (match_operand:<VPRED> 3 "register_operand" "Upl") | |
539 | (match_operand:SME_ZA_I 4 "aarch64_sve_ldff1_operand" "Utf")] | |
540 | SME_LD1))] | |
541 | "TARGET_STREAMING_SME | |
542 | && UINTVAL (operands[2]) < 128 / <elem_bits>" | |
543 | "ld1<Vesize>\t{ za%0<hv>.<Vetype>[%w1, %2] }, %3/z, %4" | |
544 | ) | |
545 | ||
546 | (define_insn "aarch64_sme_ldr0" | |
547 | [(set (reg:VNx16QI ZA_REGNUM) | |
548 | (unspec:VNx16QI | |
549 | [(reg:VNx16QI ZA_REGNUM) | |
550 | (reg:DI SME_STATE_REGNUM) | |
551 | (match_operand:SI 0 "register_operand" "Ucj") | |
552 | (mem:VNx16QI (match_operand 1 "pmode_register_operand" "rk"))] | |
553 | UNSPEC_SME_LDR))] | |
554 | "TARGET_SME" | |
555 | "ldr\tza[%w0, 0], [%1, #0, mul vl]" | |
556 | ) | |
557 | ||
558 | (define_insn "@aarch64_sme_ldrn<mode>" | |
559 | [(set (reg:VNx16QI ZA_REGNUM) | |
560 | (unspec:VNx16QI | |
561 | [(reg:VNx16QI ZA_REGNUM) | |
562 | (reg:DI SME_STATE_REGNUM) | |
563 | (plus:SI (match_operand:SI 0 "register_operand" "Ucj") | |
564 | (match_operand:SI 1 "const_int_operand")) | |
565 | (mem:VNx16QI | |
566 | (plus:P (match_operand:P 2 "register_operand" "rk") | |
567 | (match_operand:P 3 "aarch64_mov_operand")))] | |
568 | UNSPEC_SME_LDR))] | |
569 | "TARGET_SME | |
570 | && aarch64_sme_ldr_vnum_offset_p (operands[1], operands[3])" | |
571 | "ldr\tza[%w0, %1], [%2, #%1, mul vl]" | |
572 | ) | |
573 | ||
8d29b7ac RS |
574 | ;; ------------------------------------------------------------------------- |
575 | ;; ---- Table loads | |
576 | ;; ------------------------------------------------------------------------- | |
577 | ;; Includes: | |
578 | ;; - LDR | |
579 | ;; ------------------------------------------------------------------------- | |
580 | ||
581 | (define_c_enum "unspec" [ | |
582 | UNSPEC_RESTORE_ZT0 | |
583 | ]) | |
584 | ||
585 | (define_insn "aarch64_sme_ldr_zt0" | |
586 | [(set (reg:V8DI ZT0_REGNUM) | |
587 | (match_operand:V8DI 0 "aarch64_sync_memory_operand" "Q")) | |
588 | (use (reg:DI SME_STATE_REGNUM))] | |
589 | "TARGET_SME2" | |
590 | "ldr\tzt0, %0" | |
591 | ) | |
592 | ||
593 | ;; This version is used after calls to private-ZA functions. Since ZT0_REGNUM | |
594 | ;; represents the current function's state, it isn't clobbered by private-ZA | |
595 | ;; functions, so we need to make it depend on the ZA reinitialization code. | |
596 | (define_insn "aarch64_restore_zt0" | |
597 | [(set (reg:V8DI ZT0_REGNUM) | |
598 | (unspec:V8DI | |
599 | [(reg:DI SME_STATE_REGNUM) | |
600 | (match_operand:V8DI 0 "aarch64_sync_memory_operand" "Q")] | |
601 | UNSPEC_RESTORE_ZT0))] | |
602 | "TARGET_SME2" | |
603 | "ldr\tzt0, %0" | |
604 | ) | |
605 | ||
4f6ab953 RS |
606 | ;; ------------------------------------------------------------------------- |
607 | ;; ---- Single-vector stores | |
608 | ;; ------------------------------------------------------------------------- | |
609 | ;; Includes: | |
610 | ;; - ST1 | |
611 | ;; - STR | |
612 | ;; ------------------------------------------------------------------------- | |
613 | ||
614 | (define_c_enum "unspec" [ | |
615 | UNSPEC_SME_STR | |
616 | ]) | |
617 | ||
618 | (define_insn "@aarch64_sme_<optab><mode>" | |
619 | [(set (match_operand:SME_ZA_I 0 "aarch64_sve_ldff1_operand" "+Utf") | |
620 | (unspec:SME_ZA_I | |
621 | [(reg:SME_ZA_I ZA_REGNUM) | |
622 | (reg:DI SME_STATE_REGNUM) | |
623 | (match_dup 0) | |
624 | (match_operand:DI 1 "const_int_operand") | |
625 | (match_operand:SI 2 "register_operand" "Ucj") | |
626 | (match_operand:<VPRED> 3 "register_operand" "Upl")] | |
627 | SME_ST1))] | |
628 | "TARGET_STREAMING_SME" | |
629 | "st1<Vesize>\t{ za%1<hv>.<Vetype>[%w2, 0] }, %3, %0" | |
630 | ) | |
631 | ||
632 | (define_insn "@aarch64_sme_<optab><mode>_plus" | |
633 | [(set (match_operand:SME_ZA_I 0 "aarch64_sve_ldff1_operand" "+Utf") | |
634 | (unspec:SME_ZA_I | |
635 | [(reg:SME_ZA_I ZA_REGNUM) | |
636 | (reg:DI SME_STATE_REGNUM) | |
637 | (match_dup 0) | |
638 | (match_operand:DI 1 "const_int_operand") | |
639 | (plus:SI (match_operand:SI 2 "register_operand" "Ucj") | |
640 | (match_operand:SI 3 "const_int_operand")) | |
641 | (match_operand:<VPRED> 4 "register_operand" "Upl")] | |
642 | SME_ST1))] | |
643 | "TARGET_STREAMING_SME | |
644 | && UINTVAL (operands[3]) < 128 / <elem_bits>" | |
645 | "st1<Vesize>\t{ za%1<hv>.<Vetype>[%w2, %3] }, %4, %0" | |
646 | ) | |
647 | ||
648 | (define_insn "aarch64_sme_str0" | |
649 | [(set (mem:VNx16QI (match_operand 1 "pmode_register_operand" "rk")) | |
650 | (unspec:VNx16QI | |
651 | [(reg:VNx16QI ZA_REGNUM) | |
652 | (reg:DI SME_STATE_REGNUM) | |
653 | (mem:VNx16QI (match_dup 1)) | |
654 | (match_operand:SI 0 "register_operand" "Ucj")] | |
655 | UNSPEC_SME_STR))] | |
656 | "TARGET_SME" | |
657 | "str\tza[%w0, 0], [%1, #0, mul vl]" | |
658 | ) | |
659 | ||
660 | (define_insn "@aarch64_sme_strn<mode>" | |
661 | [(set (mem:VNx16QI | |
662 | (plus:P (match_operand:P 2 "register_operand" "rk") | |
663 | (match_operand:P 3 "aarch64_mov_operand"))) | |
664 | (unspec:VNx16QI | |
665 | [(reg:VNx16QI ZA_REGNUM) | |
666 | (reg:DI SME_STATE_REGNUM) | |
667 | (mem:VNx16QI (plus:P (match_dup 2) (match_dup 3))) | |
668 | (plus:SI (match_operand:SI 0 "register_operand" "Ucj") | |
669 | (match_operand:SI 1 "const_int_operand"))] | |
670 | UNSPEC_SME_STR))] | |
671 | "TARGET_SME | |
672 | && aarch64_sme_ldr_vnum_offset_p (operands[1], operands[3])" | |
673 | "str\tza[%w0, %1], [%2, #%1, mul vl]" | |
674 | ) | |
675 | ||
8d29b7ac RS |
676 | ;; ------------------------------------------------------------------------- |
677 | ;; ---- Table stores | |
678 | ;; ------------------------------------------------------------------------- | |
679 | ;; Includes: | |
680 | ;; - STR | |
681 | ;; ------------------------------------------------------------------------- | |
682 | ||
683 | (define_insn "aarch64_sme_str_zt0" | |
684 | [(set (match_operand:V8DI 0 "aarch64_sync_memory_operand" "=Q") | |
685 | (reg:V8DI ZT0_REGNUM)) | |
686 | (use (reg:DI SME_STATE_REGNUM))] | |
687 | "TARGET_SME2" | |
688 | "str\tzt0, %0" | |
689 | ) | |
690 | ||
4f6ab953 RS |
691 | ;; ------------------------------------------------------------------------- |
692 | ;; ---- Single-vector moves | |
693 | ;; ------------------------------------------------------------------------- | |
694 | ;; Includes: | |
695 | ;; - MOVA | |
696 | ;; ------------------------------------------------------------------------- | |
697 | ||
698 | (define_insn "@aarch64_sme_<optab><v_int_container><mode>" | |
699 | [(set (match_operand:SVE_FULL 0 "register_operand" "=w") | |
700 | (unspec:SVE_FULL | |
701 | [(reg:<V_INT_CONTAINER> ZA_REGNUM) | |
702 | (reg:DI SME_STATE_REGNUM) | |
703 | (match_operand:SVE_FULL 1 "register_operand" "0") | |
704 | (match_operand:<VPRED> 2 "register_operand" "Upl") | |
705 | (match_operand:DI 3 "const_int_operand") | |
706 | (match_operand:SI 4 "register_operand" "Ucj")] | |
707 | SME_READ))] | |
708 | "TARGET_STREAMING_SME" | |
709 | "mova\t%0.<Vetype>, %2/m, za%3<hv>.<Vetype>[%w4, 0]" | |
710 | ) | |
711 | ||
712 | (define_insn "*aarch64_sme_<optab><v_int_container><mode>_plus" | |
713 | [(set (match_operand:SVE_FULL 0 "register_operand" "=w") | |
714 | (unspec:SVE_FULL | |
715 | [(reg:<V_INT_CONTAINER> ZA_REGNUM) | |
716 | (reg:DI SME_STATE_REGNUM) | |
717 | (match_operand:SVE_FULL 1 "register_operand" "0") | |
718 | (match_operand:<VPRED> 2 "register_operand" "Upl") | |
719 | (match_operand:DI 3 "const_int_operand") | |
720 | (plus:SI (match_operand:SI 4 "register_operand" "Ucj") | |
721 | (match_operand:SI 5 "const_int_operand"))] | |
722 | SME_READ))] | |
723 | "TARGET_STREAMING_SME | |
724 | && UINTVAL (operands[5]) < 128 / <elem_bits>" | |
725 | "mova\t%0.<Vetype>, %2/m, za%3<hv>.<Vetype>[%w4, %5]" | |
726 | ) | |
727 | ||
728 | (define_insn "@aarch64_sme_<optab><VNx1TI_ONLY:mode><SVE_FULL:mode>" | |
729 | [(set (match_operand:SVE_FULL 0 "register_operand" "=w") | |
730 | (unspec:SVE_FULL | |
731 | [(reg:VNx1TI_ONLY ZA_REGNUM) | |
732 | (reg:DI SME_STATE_REGNUM) | |
733 | (match_operand:SVE_FULL 1 "register_operand" "0") | |
734 | (match_operand:VNx2BI 2 "register_operand" "Upl") | |
735 | (match_operand:DI 3 "const_int_operand") | |
736 | (match_operand:SI 4 "register_operand" "Ucj")] | |
737 | SME_READ))] | |
738 | "TARGET_STREAMING_SME" | |
739 | "mova\t%0.q, %2/m, za%3<hv>.q[%w4, 0]" | |
740 | ) | |
741 | ||
742 | (define_insn "@aarch64_sme_<optab><v_int_container><mode>" | |
743 | [(set (reg:<V_INT_CONTAINER> ZA_REGNUM) | |
744 | (unspec:<V_INT_CONTAINER> | |
745 | [(reg:SVE_FULL ZA_REGNUM) | |
746 | (reg:DI SME_STATE_REGNUM) | |
747 | (match_operand:DI 0 "const_int_operand") | |
748 | (match_operand:SI 1 "register_operand" "Ucj") | |
749 | (match_operand:<VPRED> 2 "register_operand" "Upl") | |
750 | (match_operand:SVE_FULL 3 "register_operand" "w")] | |
751 | SME_WRITE))] | |
752 | "TARGET_STREAMING_SME" | |
753 | "mova\tza%0<hv>.<Vetype>[%w1, 0], %2/m, %3.<Vetype>" | |
754 | ) | |
755 | ||
756 | (define_insn "*aarch64_sme_<optab><v_int_container><mode>_plus" | |
757 | [(set (reg:<V_INT_CONTAINER> ZA_REGNUM) | |
758 | (unspec:<V_INT_CONTAINER> | |
759 | [(reg:SVE_FULL ZA_REGNUM) | |
760 | (reg:DI SME_STATE_REGNUM) | |
761 | (match_operand:DI 0 "const_int_operand") | |
762 | (plus:SI (match_operand:SI 1 "register_operand" "Ucj") | |
763 | (match_operand:SI 2 "const_int_operand")) | |
764 | (match_operand:<VPRED> 3 "register_operand" "Upl") | |
765 | (match_operand:SVE_FULL 4 "register_operand" "w")] | |
766 | SME_WRITE))] | |
767 | "TARGET_STREAMING_SME | |
768 | && UINTVAL (operands[2]) < 128 / <elem_bits>" | |
769 | "mova\tza%0<hv>.<Vetype>[%w1, %2], %3/m, %4.<Vetype>" | |
770 | ) | |
771 | ||
772 | (define_insn "@aarch64_sme_<optab><VNx1TI_ONLY:mode><SVE_FULL:mode>" | |
773 | [(set (reg:VNx1TI_ONLY ZA_REGNUM) | |
774 | (unspec:VNx1TI_ONLY | |
775 | [(reg:VNx1TI_ONLY ZA_REGNUM) | |
776 | (reg:DI SME_STATE_REGNUM) | |
777 | (match_operand:DI 0 "const_int_operand") | |
778 | (match_operand:SI 1 "register_operand" "Ucj") | |
779 | (match_operand:VNx2BI 2 "register_operand" "Upl") | |
780 | (match_operand:SVE_FULL 3 "register_operand" "w")] | |
781 | SME_WRITE))] | |
782 | "TARGET_STREAMING_SME" | |
783 | "mova\tza%0<hv>.q[%w1, 0], %2/m, %3.q" | |
784 | ) | |
785 | ||
c1c267df RS |
786 | ;; ------------------------------------------------------------------------- |
787 | ;; ---- Multi-vector moves | |
788 | ;; ------------------------------------------------------------------------- | |
789 | ;; Includes: | |
790 | ;; - MOVA | |
791 | ;; ------------------------------------------------------------------------- | |
792 | ||
793 | (define_insn "@aarch64_sme_<optab><mode><mode>" | |
794 | [(set (match_operand:SVE_FULLx24 0 "aligned_register_operand" "=Uw<vector_count>") | |
795 | (unspec:SVE_FULLx24 | |
796 | [(reg:SVE_FULLx24 ZA_REGNUM) | |
797 | (reg:DI SME_STATE_REGNUM) | |
798 | (match_operand:DI 1 "const_int_operand") | |
799 | (match_operand:SI 2 "register_operand" "Ucj")] | |
800 | SME_READ))] | |
801 | "TARGET_STREAMING_SME2" | |
802 | { | |
803 | operands[3] = GEN_INT (<vector_count> - 1); | |
804 | return "mova\t%0, za%1<hv>.<Vetype>[%w2, 0:%3]"; | |
805 | } | |
806 | ) | |
807 | ||
808 | (define_insn "*aarch64_sme_<optab><mode><mode>_plus" | |
809 | [(set (match_operand:SVE_FULLx24 0 "aligned_register_operand" "=Uw<vector_count>") | |
810 | (unspec:SVE_FULLx24 | |
811 | [(reg:SVE_FULLx24 ZA_REGNUM) | |
812 | (reg:DI SME_STATE_REGNUM) | |
813 | (match_operand:DI 1 "const_int_operand") | |
814 | (plus:SI | |
815 | (match_operand:SI 2 "register_operand" "Ucj") | |
816 | (match_operand:SI 3 "const_int_operand"))] | |
817 | SME_READ))] | |
818 | "TARGET_STREAMING_SME2 | |
819 | && UINTVAL (operands[3]) % <vector_count> == 0 | |
820 | && UINTVAL (operands[3]) < 128 / <elem_bits>" | |
821 | { | |
822 | operands[4] = GEN_INT (INTVAL (operands[3]) + <vector_count> - 1); | |
823 | return "mova\t%0, za%1<hv>.<Vetype>[%w2, %3:%4]"; | |
824 | } | |
825 | ) | |
826 | ||
827 | (define_insn "@aarch64_sme_read<mode>" | |
828 | [(set (match_operand:SVE_DIx24 0 "aligned_register_operand" "=Uw<vector_count>") | |
829 | (unspec:SVE_DIx24 | |
830 | [(reg:SVE_DIx24 ZA_REGNUM) | |
831 | (reg:DI SME_STATE_REGNUM) | |
832 | (match_operand:SI 1 "register_operand" "Uci")] | |
833 | UNSPEC_SME_READ))] | |
834 | "TARGET_STREAMING_SME2" | |
835 | "mova\t%0, za.d[%w1, 0, vgx<vector_count>]" | |
836 | ) | |
837 | ||
838 | (define_insn "*aarch64_sme_read<mode>_plus" | |
839 | [(set (match_operand:SVE_DIx24 0 "aligned_register_operand" "=Uw<vector_count>") | |
840 | (unspec:SVE_DIx24 | |
841 | [(reg:SVE_DIx24 ZA_REGNUM) | |
842 | (reg:DI SME_STATE_REGNUM) | |
843 | (plus:SI (match_operand:SI 1 "register_operand" "Uci") | |
844 | (match_operand:SI 2 "const_0_to_7_operand"))] | |
845 | UNSPEC_SME_READ))] | |
846 | "TARGET_STREAMING_SME2" | |
847 | "mova\t%0, za.d[%w1, %2, vgx<vector_count>]" | |
848 | ) | |
849 | ||
850 | (define_insn "@aarch64_sme_<optab><mode><mode>" | |
851 | [(set (reg:SVE_FULLx24 ZA_REGNUM) | |
852 | (unspec:SVE_FULLx24 | |
853 | [(reg:SVE_FULLx24 ZA_REGNUM) | |
854 | (reg:DI SME_STATE_REGNUM) | |
855 | (match_operand:DI 0 "const_int_operand") | |
856 | (match_operand:SI 1 "register_operand" "Ucj") | |
857 | (match_operand:SVE_FULLx24 2 "aligned_register_operand" "Uw<vector_count>")] | |
858 | SME_WRITE))] | |
859 | "TARGET_STREAMING_SME2" | |
860 | { | |
861 | operands[3] = GEN_INT (<vector_count> - 1); | |
862 | return "mova\tza%0<hv>.<Vetype>[%w1, 0:%3], %2"; | |
863 | } | |
864 | ) | |
865 | ||
866 | (define_insn "*aarch64_sme_<optab><mode><mode>_plus" | |
867 | [(set (reg:SVE_FULLx24 ZA_REGNUM) | |
868 | (unspec:SVE_FULLx24 | |
869 | [(reg:SVE_FULLx24 ZA_REGNUM) | |
870 | (reg:DI SME_STATE_REGNUM) | |
871 | (match_operand:DI 0 "const_int_operand") | |
872 | (plus:SI | |
873 | (match_operand:SI 1 "register_operand" "Ucj") | |
874 | (match_operand:SI 2 "const_int_operand")) | |
875 | (match_operand:SVE_FULLx24 3 "aligned_register_operand" "Uw<vector_count>")] | |
876 | SME_WRITE))] | |
877 | "TARGET_STREAMING_SME2 | |
878 | && UINTVAL (operands[2]) % <vector_count> == 0 | |
879 | && UINTVAL (operands[2]) < 128 / <elem_bits>" | |
880 | { | |
881 | operands[4] = GEN_INT (INTVAL (operands[2]) + <vector_count> - 1); | |
882 | return "mova\tza%0<hv>.<Vetype>[%w1, %2:%4], %3"; | |
883 | } | |
884 | ) | |
885 | ||
886 | (define_insn "@aarch64_sme_write<mode>" | |
887 | [(set (reg:SVE_DIx24 ZA_REGNUM) | |
888 | (unspec:SVE_DIx24 | |
889 | [(reg:SVE_DIx24 ZA_REGNUM) | |
890 | (reg:DI SME_STATE_REGNUM) | |
891 | (match_operand:SI 0 "register_operand" "Uci") | |
892 | (match_operand:SVE_DIx24 1 "aligned_register_operand" "Uw<vector_count>")] | |
893 | UNSPEC_SME_READ))] | |
894 | "TARGET_STREAMING_SME2" | |
895 | "mova\tza.d[%w0, 0, vgx<vector_count>], %1" | |
896 | ) | |
897 | ||
898 | (define_insn "*aarch64_sme_write<mode>_plus" | |
899 | [(set (reg:SVE_DIx24 ZA_REGNUM) | |
900 | (unspec:SVE_DIx24 | |
901 | [(reg:SVE_DIx24 ZA_REGNUM) | |
902 | (reg:DI SME_STATE_REGNUM) | |
903 | (plus:SI (match_operand:SI 0 "register_operand" "Uci") | |
904 | (match_operand:SI 1 "const_0_to_7_operand")) | |
905 | (match_operand:SVE_DIx24 2 "aligned_register_operand" "Uw<vector_count>")] | |
906 | UNSPEC_SME_READ))] | |
907 | "TARGET_STREAMING_SME2" | |
908 | "mova\tza.d[%w0, %1, vgx<vector_count>], %2" | |
909 | ) | |
910 | ||
4f6ab953 RS |
911 | ;; ------------------------------------------------------------------------- |
912 | ;; ---- Zeroing | |
913 | ;; ------------------------------------------------------------------------- | |
914 | ;; Includes: | |
915 | ;; - ZERO | |
916 | ;; ------------------------------------------------------------------------- | |
917 | ||
918 | (define_c_enum "unspec" [UNSPEC_SME_ZERO]) | |
919 | ||
920 | (define_insn "aarch64_sme_zero_za" | |
921 | [(set (reg:VNx16QI ZA_REGNUM) | |
922 | (unspec:VNx16QI [(reg:VNx16QI ZA_REGNUM) | |
923 | (reg:DI SME_STATE_REGNUM) | |
924 | (match_operand:DI 0 "const_int_operand")] | |
925 | UNSPEC_SME_ZERO))] | |
926 | "TARGET_SME" | |
927 | { | |
928 | return aarch64_output_sme_zero_za (operands[0]); | |
929 | } | |
930 | ) | |
931 | ||
c1c267df RS |
932 | (define_insn "aarch64_sme_zero_zt0" |
933 | [(set (reg:V8DI ZT0_REGNUM) | |
934 | (const_int 0)) | |
935 | (use (reg:DI SME_STATE_REGNUM))] | |
936 | "TARGET_SME2" | |
937 | "zero\t{ zt0 }" | |
938 | ) | |
939 | ||
4f6ab953 RS |
940 | ;; ========================================================================= |
941 | ;; == Binary arithmetic | |
942 | ;; ========================================================================= | |
943 | ||
944 | ;; ------------------------------------------------------------------------- | |
945 | ;; ---- Binary arithmetic on ZA tile | |
946 | ;; ------------------------------------------------------------------------- | |
947 | ;; Includes: | |
948 | ;; - ADDHA | |
949 | ;; - ADDVA | |
950 | ;; ------------------------------------------------------------------------- | |
951 | ||
952 | (define_insn "@aarch64_sme_<optab><mode>" | |
953 | [(set (reg:SME_ZA_SDI ZA_REGNUM) | |
954 | (unspec:SME_ZA_SDI | |
955 | [(reg:SME_ZA_SDI ZA_REGNUM) | |
956 | (reg:DI SME_STATE_REGNUM) | |
957 | (match_operand:DI 0 "const_int_operand") | |
958 | (match_operand:<VPRED> 1 "register_operand" "Upl") | |
959 | (match_operand:<VPRED> 2 "register_operand" "Upl") | |
960 | (match_operand:SME_ZA_SDI 3 "register_operand" "w")] | |
961 | SME_BINARY_SDI))] | |
962 | "TARGET_STREAMING_SME" | |
963 | "<optab>\tza%0.<Vetype>, %1/m, %2/m, %3.<Vetype>" | |
964 | ) | |
965 | ||
c1c267df RS |
966 | ;; ------------------------------------------------------------------------- |
967 | ;; ---- Binary arithmetic on ZA slice | |
968 | ;; ------------------------------------------------------------------------- | |
969 | ;; Includes: | |
970 | ;; - ADD | |
971 | ;; ------------------------------------------------------------------------- | |
972 | ||
973 | (define_insn "@aarch64_sme_<optab><mode>" | |
974 | [(set (reg:SME_ZA_SDIx24 ZA_REGNUM) | |
975 | (unspec:SME_ZA_SDIx24 | |
976 | [(reg:SME_ZA_SDIx24 ZA_REGNUM) | |
977 | (reg:DI SME_STATE_REGNUM) | |
978 | (match_operand:SI 0 "register_operand" "Uci") | |
979 | (match_operand:SME_ZA_SDIx24 1 "aligned_register_operand" "Uw<vector_count>")] | |
980 | SME_BINARY_SLICE_SDI))] | |
981 | "TARGET_STREAMING_SME2" | |
982 | "<optab>\tza.<Vetype>[%w0, 0, vgx<vector_count>], %1" | |
983 | ) | |
984 | ||
985 | (define_insn "*aarch64_sme_<optab><mode>_plus" | |
986 | [(set (reg:SME_ZA_SDIx24 ZA_REGNUM) | |
987 | (unspec:SME_ZA_SDIx24 | |
988 | [(reg:SME_ZA_SDIx24 ZA_REGNUM) | |
989 | (reg:DI SME_STATE_REGNUM) | |
990 | (plus:SI (match_operand:SI 0 "register_operand" "Uci") | |
991 | (match_operand:SI 1 "const_0_to_7_operand")) | |
992 | (match_operand:SME_ZA_SDIx24 2 "aligned_register_operand" "Uw<vector_count>")] | |
993 | SME_BINARY_SLICE_SDI))] | |
994 | "TARGET_STREAMING_SME2" | |
995 | "<optab>\tza.<Vetype>[%w0, %1, vgx<vector_count>], %2" | |
996 | ) | |
997 | ||
998 | (define_insn "@aarch64_sme_<optab><mode>" | |
999 | [(set (reg:SME_ZA_SDFx24 ZA_REGNUM) | |
1000 | (unspec:SME_ZA_SDFx24 | |
1001 | [(reg:SME_ZA_SDFx24 ZA_REGNUM) | |
1002 | (reg:DI SME_STATE_REGNUM) | |
1003 | (match_operand:SI 0 "register_operand" "Uci") | |
1004 | (match_operand:SME_ZA_SDFx24 1 "aligned_register_operand" "Uw<vector_count>")] | |
1005 | SME_BINARY_SLICE_SDF))] | |
1006 | "TARGET_STREAMING_SME2" | |
1007 | "<optab>\tza.<Vetype>[%w0, 0, vgx<vector_count>], %1" | |
1008 | ) | |
1009 | ||
1010 | (define_insn "*aarch64_sme_<optab><mode>_plus" | |
1011 | [(set (reg:SME_ZA_SDFx24 ZA_REGNUM) | |
1012 | (unspec:SME_ZA_SDFx24 | |
1013 | [(reg:SME_ZA_SDFx24 ZA_REGNUM) | |
1014 | (reg:DI SME_STATE_REGNUM) | |
1015 | (plus:SI (match_operand:SI 0 "register_operand" "Uci") | |
1016 | (match_operand:SI 1 "const_0_to_7_operand")) | |
1017 | (match_operand:SME_ZA_SDFx24 2 "aligned_register_operand" "Uw<vector_count>")] | |
1018 | SME_BINARY_SLICE_SDF))] | |
1019 | "TARGET_STREAMING_SME2" | |
1020 | "<optab>\tza.<Vetype>[%w0, %1, vgx<vector_count>], %2" | |
1021 | ) | |
1022 | ||
1023 | ;; ------------------------------------------------------------------------- | |
1024 | ;; ---- Binary arithmetic, writing to ZA slice | |
1025 | ;; ------------------------------------------------------------------------- | |
1026 | ;; Includes: | |
1027 | ;; - ADD | |
1028 | ;; - SUB | |
1029 | ;; ------------------------------------------------------------------------- | |
1030 | ||
1031 | (define_insn "@aarch64_sme_<optab><mode>" | |
1032 | [(set (reg:SME_ZA_SDIx24 ZA_REGNUM) | |
1033 | (unspec:SME_ZA_SDIx24 | |
1034 | [(reg:SME_ZA_SDIx24 ZA_REGNUM) | |
1035 | (reg:DI SME_STATE_REGNUM) | |
1036 | (match_operand:SI 0 "register_operand" "Uci") | |
1037 | (match_operand:SME_ZA_SDIx24 1 "aligned_register_operand" "Uw<vector_count>") | |
1038 | (match_operand:SME_ZA_SDIx24 2 "aligned_register_operand" "Uw<vector_count>")] | |
1039 | SME_BINARY_WRITE_SLICE_SDI))] | |
1040 | "TARGET_STREAMING_SME2" | |
1041 | "<sme_int_op>\tza.<Vetype>[%w0, 0, vgx<vector_count>], %1, %2" | |
1042 | ) | |
1043 | ||
1044 | (define_insn "*aarch64_sme_<optab><mode>_plus" | |
1045 | [(set (reg:SME_ZA_SDIx24 ZA_REGNUM) | |
1046 | (unspec:SME_ZA_SDIx24 | |
1047 | [(reg:SME_ZA_SDIx24 ZA_REGNUM) | |
1048 | (reg:DI SME_STATE_REGNUM) | |
1049 | (plus:SI (match_operand:SI 0 "register_operand" "Uci") | |
1050 | (match_operand:SI 1 "const_0_to_7_operand")) | |
1051 | (match_operand:SME_ZA_SDIx24 2 "aligned_register_operand" "Uw<vector_count>") | |
1052 | (match_operand:SME_ZA_SDIx24 3 "aligned_register_operand" "Uw<vector_count>")] | |
1053 | SME_BINARY_WRITE_SLICE_SDI))] | |
1054 | "TARGET_STREAMING_SME2" | |
1055 | "<sme_int_op>\tza.<Vetype>[%w0, %1, vgx<vector_count>], %2, %3" | |
1056 | ) | |
1057 | ||
1058 | (define_insn "@aarch64_sme_single_<optab><mode>" | |
1059 | [(set (reg:SME_ZA_SDIx24 ZA_REGNUM) | |
1060 | (unspec:SME_ZA_SDIx24 | |
1061 | [(reg:SME_ZA_SDIx24 ZA_REGNUM) | |
1062 | (reg:DI SME_STATE_REGNUM) | |
1063 | (match_operand:SI 0 "register_operand" "Uci") | |
1064 | (match_operand:SME_ZA_SDIx24 1 "register_operand" "w") | |
1065 | (vec_duplicate:SME_ZA_SDIx24 | |
1066 | (match_operand:<VSINGLE> 2 "register_operand" "x"))] | |
1067 | SME_BINARY_WRITE_SLICE_SDI))] | |
1068 | "TARGET_STREAMING_SME2" | |
1069 | "<sme_int_op>\tza.<Vetype>[%w0, 0, vgx<vector_count>], %1, %2.<Vetype>" | |
1070 | ) | |
1071 | ||
1072 | (define_insn "*aarch64_sme_single_<optab><mode>_plus" | |
1073 | [(set (reg:SME_ZA_SDIx24 ZA_REGNUM) | |
1074 | (unspec:SME_ZA_SDIx24 | |
1075 | [(reg:SME_ZA_SDIx24 ZA_REGNUM) | |
1076 | (reg:DI SME_STATE_REGNUM) | |
1077 | (plus:SI (match_operand:SI 0 "register_operand" "Uci") | |
1078 | (match_operand:SI 1 "const_0_to_7_operand")) | |
1079 | (match_operand:SME_ZA_SDIx24 2 "register_operand" "w") | |
1080 | (vec_duplicate:SME_ZA_SDIx24 | |
1081 | (match_operand:<VSINGLE> 3 "register_operand" "x"))] | |
1082 | SME_BINARY_WRITE_SLICE_SDI))] | |
1083 | "TARGET_STREAMING_SME2" | |
1084 | "<sme_int_op>\tza.<Vetype>[%w0, %1, vgx<vector_count>], %2, %3.<Vetype>" | |
1085 | ) | |
1086 | ||
4f6ab953 RS |
1087 | ;; ========================================================================= |
1088 | ;; == Ternary arithmetic | |
1089 | ;; ========================================================================= | |
1090 | ||
1091 | ;; ------------------------------------------------------------------------- | |
c1c267df RS |
1092 | ;; ---- [INT] Dot product |
1093 | ;; ------------------------------------------------------------------------- | |
1094 | ;; Includes: | |
1095 | ;; - SDOT | |
1096 | ;; - SUDOT | |
1097 | ;; - UDOT | |
1098 | ;; - USDOT | |
1099 | ;; ------------------------------------------------------------------------- | |
1100 | ||
1101 | (define_insn "@aarch64_sme_<optab><SME_ZA_SDI:mode><SME_ZA_BHIx24:mode>" | |
1102 | [(set (reg:SME_ZA_SDI ZA_REGNUM) | |
1103 | (unspec:SME_ZA_SDI | |
1104 | [(reg:SME_ZA_SDI ZA_REGNUM) | |
1105 | (reg:DI SME_STATE_REGNUM) | |
1106 | (match_operand:SI 0 "register_operand" "Uci") | |
1107 | (match_operand:SME_ZA_BHIx24 1 "aligned_register_operand" "Uw<vector_count>") | |
1108 | (match_operand:SME_ZA_BHIx24 2 "aligned_register_operand" "Uw<vector_count>")] | |
1109 | SME_INT_DOTPROD))] | |
1110 | "TARGET_STREAMING_SME2 | |
1111 | && (<SME_ZA_SDI:elem_bits> == 32 || <SME_ZA_BHIx24:elem_bits> == 16) | |
1112 | && (<SME_ZA_BHIx24:elem_bits> == 8 || <has_16bit_form>)" | |
1113 | "<optab>\tza.<SME_ZA_SDI:Vetype>[%w0, 0, vgx<vector_count>], %1, %2" | |
1114 | ) | |
1115 | ||
1116 | (define_insn "*aarch64_sme_<optab><SME_ZA_SDI:mode><SME_ZA_BHIx24:mode>_plus" | |
1117 | [(set (reg:SME_ZA_SDI ZA_REGNUM) | |
1118 | (unspec:SME_ZA_SDI | |
1119 | [(reg:SME_ZA_SDI ZA_REGNUM) | |
1120 | (reg:DI SME_STATE_REGNUM) | |
1121 | (plus:SI (match_operand:SI 0 "register_operand" "Uci") | |
1122 | (match_operand:SI 1 "const_0_to_7_operand")) | |
1123 | (match_operand:SME_ZA_BHIx24 2 "aligned_register_operand" "Uw<vector_count>") | |
1124 | (match_operand:SME_ZA_BHIx24 3 "aligned_register_operand" "Uw<vector_count>")] | |
1125 | SME_INT_DOTPROD))] | |
1126 | "TARGET_STREAMING_SME2 | |
1127 | && (<SME_ZA_SDI:elem_bits> == 32 || <SME_ZA_BHIx24:elem_bits> == 16) | |
1128 | && (<SME_ZA_BHIx24:elem_bits> == 8 || <has_16bit_form>)" | |
1129 | "<optab>\tza.<SME_ZA_SDI:Vetype>[%w0, %1, vgx<vector_count>], %2, %3" | |
1130 | ) | |
1131 | ||
1132 | (define_insn "@aarch64_sme_single_<optab><SME_ZA_SDI:mode><SME_ZA_BHIx24:mode>" | |
1133 | [(set (reg:SME_ZA_SDI ZA_REGNUM) | |
1134 | (unspec:SME_ZA_SDI | |
1135 | [(reg:SME_ZA_SDI ZA_REGNUM) | |
1136 | (reg:DI SME_STATE_REGNUM) | |
1137 | (match_operand:SI 0 "register_operand" "Uci") | |
1138 | (match_operand:SME_ZA_BHIx24 1 "register_operand" "w") | |
1139 | (vec_duplicate:SME_ZA_BHIx24 | |
1140 | (match_operand:<VSINGLE> 2 "register_operand" "x"))] | |
1141 | SME_INT_DOTPROD))] | |
1142 | "TARGET_STREAMING_SME2 | |
1143 | && (<SME_ZA_SDI:elem_bits> == 32 || <SME_ZA_BHIx24:elem_bits> == 16) | |
1144 | && (<SME_ZA_BHIx24:elem_bits> == 8 || <has_16bit_form>)" | |
1145 | "<optab>\tza.<SME_ZA_SDI:Vetype>[%w0, 0, vgx<vector_count>], %1, %2.<SME_ZA_BHIx24:Vetype>" | |
1146 | ) | |
1147 | ||
1148 | (define_insn "*aarch64_sme_single_<optab><SME_ZA_SDI:mode><SME_ZA_BHIx24:mode>_plus" | |
1149 | [(set (reg:SME_ZA_SDI ZA_REGNUM) | |
1150 | (unspec:SME_ZA_SDI | |
1151 | [(reg:SME_ZA_SDI ZA_REGNUM) | |
1152 | (reg:DI SME_STATE_REGNUM) | |
1153 | (plus:SI (match_operand:SI 0 "register_operand" "Uci") | |
1154 | (match_operand:SI 1 "const_0_to_7_operand")) | |
1155 | (match_operand:SME_ZA_BHIx24 2 "register_operand" "w") | |
1156 | (vec_duplicate:SME_ZA_BHIx24 | |
1157 | (match_operand:<VSINGLE> 3 "register_operand" "x"))] | |
1158 | SME_INT_DOTPROD))] | |
1159 | "TARGET_STREAMING_SME2 | |
1160 | && (<SME_ZA_SDI:elem_bits> == 32 || <SME_ZA_BHIx24:elem_bits> == 16) | |
1161 | && (<SME_ZA_BHIx24:elem_bits> == 8 || <has_16bit_form>)" | |
1162 | "<optab>\tza.<SME_ZA_SDI:Vetype>[%w0, %1, vgx<vector_count>], %2, %3.<SME_ZA_BHIx24:Vetype>" | |
1163 | ) | |
1164 | ||
1165 | ;; SUDOT is USDOT with the operands swapped. | |
1166 | (define_insn "@aarch64_sme_single_sudot<VNx4SI_ONLY:mode><SME_ZA_BIx24:mode>" | |
1167 | [(set (reg:VNx4SI_ONLY ZA_REGNUM) | |
1168 | (unspec:VNx4SI_ONLY | |
1169 | [(reg:VNx4SI_ONLY ZA_REGNUM) | |
1170 | (reg:DI SME_STATE_REGNUM) | |
1171 | (match_operand:SI 0 "register_operand" "Uci") | |
1172 | (vec_duplicate:SME_ZA_BIx24 | |
1173 | (match_operand:<VSINGLE> 2 "register_operand" "x")) | |
1174 | (match_operand:SME_ZA_BIx24 1 "register_operand" "w")] | |
1175 | UNSPEC_SME_USDOT))] | |
1176 | "TARGET_STREAMING_SME2" | |
1177 | "sudot\tza.s[%w0, 0, vgx<vector_count>], %1, %2.b" | |
1178 | ) | |
1179 | ||
1180 | (define_insn "*aarch64_sme_single_sudot<VNx4SI_ONLY:mode><SME_ZA_BIx24:mode>_plus" | |
1181 | [(set (reg:VNx4SI_ONLY ZA_REGNUM) | |
1182 | (unspec:VNx4SI_ONLY | |
1183 | [(reg:VNx4SI_ONLY ZA_REGNUM) | |
1184 | (reg:DI SME_STATE_REGNUM) | |
1185 | (plus:SI (match_operand:SI 0 "register_operand" "Uci") | |
1186 | (match_operand:SI 1 "const_0_to_7_operand")) | |
1187 | (vec_duplicate:SME_ZA_BIx24 | |
1188 | (match_operand:<VSINGLE> 3 "register_operand" "x")) | |
1189 | (match_operand:SME_ZA_BIx24 2 "register_operand" "w")] | |
1190 | UNSPEC_SME_USDOT))] | |
1191 | "TARGET_STREAMING_SME2" | |
1192 | "sudot\tza.s[%w0, %1, vgx<vector_count>], %2, %3.b" | |
1193 | ) | |
1194 | ||
1195 | (define_insn "@aarch64_sme_lane_<optab><SME_ZA_SDI:mode><SME_ZA_BHIx24:mode>" | |
1196 | [(set (reg:SME_ZA_SDI ZA_REGNUM) | |
1197 | (unspec:SME_ZA_SDI | |
1198 | [(reg:SME_ZA_SDI ZA_REGNUM) | |
1199 | (reg:DI SME_STATE_REGNUM) | |
1200 | (match_operand:SI 0 "register_operand" "Uci") | |
1201 | (match_operand:SME_ZA_BHIx24 1 "aligned_register_operand" "Uw<vector_count>") | |
1202 | (unspec:SME_ZA_BHIx24 | |
1203 | [(match_operand:<VSINGLE> 2 "register_operand" "x") | |
1204 | (match_operand:SI 3 "const_int_operand")] | |
1205 | UNSPEC_SVE_LANE_SELECT)] | |
1206 | SME_INT_DOTPROD_LANE))] | |
1207 | "TARGET_STREAMING_SME2 | |
1208 | && (<SME_ZA_SDI:elem_bits> == 32 || <SME_ZA_BHIx24:elem_bits> == 16) | |
1209 | && (<SME_ZA_BHIx24:elem_bits> == 8 || <has_16bit_form>)" | |
1210 | "<optab>\tza.<SME_ZA_SDI:Vetype>[%w0, 0, vgx<vector_count>], %1, %2.<SME_ZA_BHIx24:Vetype>[%3]" | |
1211 | ) | |
1212 | ||
1213 | (define_insn "*aarch64_sme_lane_<optab><SME_ZA_SDI:mode><SME_ZA_BHIx24:mode>_plus" | |
1214 | [(set (reg:SME_ZA_SDI ZA_REGNUM) | |
1215 | (unspec:SME_ZA_SDI | |
1216 | [(reg:SME_ZA_SDI ZA_REGNUM) | |
1217 | (reg:DI SME_STATE_REGNUM) | |
1218 | (plus:SI (match_operand:SI 0 "register_operand" "Uci") | |
1219 | (match_operand:SI 1 "const_0_to_7_operand")) | |
1220 | (match_operand:SME_ZA_BHIx24 2 "aligned_register_operand" "Uw<vector_count>") | |
1221 | (unspec:SME_ZA_BHIx24 | |
1222 | [(match_operand:<VSINGLE> 3 "register_operand" "x") | |
1223 | (match_operand:SI 4 "const_int_operand")] | |
1224 | UNSPEC_SVE_LANE_SELECT)] | |
1225 | SME_INT_DOTPROD_LANE))] | |
1226 | "TARGET_STREAMING_SME2 | |
1227 | && (<SME_ZA_SDI:elem_bits> == 32 || <SME_ZA_BHIx24:elem_bits> == 16) | |
1228 | && (<SME_ZA_BHIx24:elem_bits> == 8 || <has_16bit_form>)" | |
1229 | "<optab>\tza.<SME_ZA_SDI:Vetype>[%w0, %1, vgx<vector_count>], %2, %3.<SME_ZA_BHIx24:Vetype>[%4]" | |
1230 | ) | |
1231 | ||
1232 | ;; ------------------------------------------------------------------------- | |
1233 | ;; ---- [INT] Ternary widening arithmetic on ZA slice | |
4f6ab953 RS |
1234 | ;; ------------------------------------------------------------------------- |
1235 | ;; Includes: | |
c1c267df RS |
1236 | ;; - SMLA |
1237 | ;; - SMLS | |
1238 | ;; - UMLA | |
1239 | ;; - UMLS | |
1240 | ;; ------------------------------------------------------------------------- | |
1241 | ||
1242 | (define_insn "@aarch64_sme_<optab><VNx4SI_ONLY:mode><SVE_FULL_BHI:mode>" | |
1243 | [(set (reg:VNx4SI_ONLY ZA_REGNUM) | |
1244 | (unspec:VNx4SI_ONLY | |
1245 | [(reg:VNx4SI_ONLY ZA_REGNUM) | |
1246 | (reg:DI SME_STATE_REGNUM) | |
1247 | (match_operand:SI 0 "register_operand" "Uci") | |
1248 | (match_operand:SVE_FULL_BHI 1 "register_operand" "w") | |
1249 | (match_operand:SVE_FULL_BHI 2 "register_operand" "x")] | |
1250 | SME_INT_TERNARY_SLICE))] | |
1251 | "TARGET_STREAMING_SME2" | |
1252 | "<optab><za32_long>\tza.s[%w0, 0:<za32_last_offset>], %1.<SVE_FULL_BHI:Vetype>, %2.<SVE_FULL_BHI:Vetype>" | |
1253 | ) | |
1254 | ||
1255 | (define_insn "*aarch64_sme_<optab><VNx4SI_ONLY:mode><SVE_FULL_BHI:mode>_plus" | |
1256 | [(set (reg:VNx4SI_ONLY ZA_REGNUM) | |
1257 | (unspec:VNx4SI_ONLY | |
1258 | [(reg:VNx4SI_ONLY ZA_REGNUM) | |
1259 | (reg:DI SME_STATE_REGNUM) | |
1260 | (plus:SI (match_operand:SI 0 "register_operand" "Uci") | |
1261 | (match_operand:SI 1 "const_<za32_offset_range>_operand")) | |
1262 | (match_operand:SVE_FULL_BHI 2 "register_operand" "w") | |
1263 | (match_operand:SVE_FULL_BHI 3 "register_operand" "x")] | |
1264 | SME_INT_TERNARY_SLICE))] | |
1265 | "TARGET_STREAMING_SME2" | |
1266 | { | |
1267 | operands[4] = GEN_INT (INTVAL (operands[1]) + <za32_last_offset>); | |
1268 | return "<optab><za32_long>\tza.s[%w0, %1:%4], %2.<SVE_FULL_BHI:Vetype>, %3.<SVE_FULL_BHI:Vetype>"; | |
1269 | } | |
1270 | ) | |
1271 | ||
1272 | (define_insn "@aarch64_sme_<optab><VNx4SI_ONLY:mode><SME_ZA_BHIx24:mode>" | |
1273 | [(set (reg:VNx4SI_ONLY ZA_REGNUM) | |
1274 | (unspec:VNx4SI_ONLY | |
1275 | [(reg:VNx4SI_ONLY ZA_REGNUM) | |
1276 | (reg:DI SME_STATE_REGNUM) | |
1277 | (match_operand:SI 0 "register_operand" "Uci") | |
1278 | (match_operand:SME_ZA_BHIx24 1 "aligned_register_operand" "Uw<vector_count>") | |
1279 | (match_operand:SME_ZA_BHIx24 2 "aligned_register_operand" "Uw<vector_count>")] | |
1280 | SME_INT_TERNARY_SLICE))] | |
1281 | "TARGET_STREAMING_SME2" | |
1282 | "<optab><za32_long>\tza.s[%w0, 0:<za32_last_offset>, vgx<vector_count>], %1, %2" | |
1283 | ) | |
1284 | ||
1285 | (define_insn "*aarch64_sme_<optab><VNx4SI_ONLY:mode><SME_ZA_BHIx24:mode>_plus" | |
1286 | [(set (reg:VNx4SI_ONLY ZA_REGNUM) | |
1287 | (unspec:VNx4SI_ONLY | |
1288 | [(reg:VNx4SI_ONLY ZA_REGNUM) | |
1289 | (reg:DI SME_STATE_REGNUM) | |
1290 | (plus:SI (match_operand:SI 0 "register_operand" "Uci") | |
1291 | (match_operand:SI 1 "const_<za32_offset_range>_operand")) | |
1292 | (match_operand:SME_ZA_BHIx24 2 "aligned_register_operand" "Uw<vector_count>") | |
1293 | (match_operand:SME_ZA_BHIx24 3 "aligned_register_operand" "Uw<vector_count>")] | |
1294 | SME_INT_TERNARY_SLICE))] | |
1295 | "TARGET_STREAMING_SME2" | |
1296 | { | |
1297 | operands[4] = GEN_INT (INTVAL (operands[1]) + <za32_last_offset>); | |
1298 | return "<optab><za32_long>\tza.s[%w0, %1:%4, vgx<vector_count>], %2, %3"; | |
1299 | } | |
1300 | ) | |
1301 | ||
1302 | (define_insn "@aarch64_sme_single_<optab><VNx4SI_ONLY:mode><SME_ZA_BHIx24:mode>" | |
1303 | [(set (reg:VNx4SI_ONLY ZA_REGNUM) | |
1304 | (unspec:VNx4SI_ONLY | |
1305 | [(reg:VNx4SI_ONLY ZA_REGNUM) | |
1306 | (reg:DI SME_STATE_REGNUM) | |
1307 | (match_operand:SI 0 "register_operand" "Uci") | |
1308 | (match_operand:SME_ZA_BHIx24 1 "register_operand" "w") | |
1309 | (vec_duplicate:SME_ZA_BHIx24 | |
1310 | (match_operand:<SME_ZA_BHIx24:VSINGLE> 2 "register_operand" "x"))] | |
1311 | SME_INT_TERNARY_SLICE))] | |
1312 | "TARGET_STREAMING_SME2" | |
1313 | "<optab><za32_long>\tza.s[%w0, 0:<za32_last_offset>, vgx<vector_count>], %1, %2.<SME_ZA_BHIx24:Vetype>" | |
1314 | ) | |
1315 | ||
1316 | (define_insn "*aarch64_sme_single_<optab><VNx4SI_ONLY:mode><SME_ZA_BHIx24:mode>_plus" | |
1317 | [(set (reg:VNx4SI_ONLY ZA_REGNUM) | |
1318 | (unspec:VNx4SI_ONLY | |
1319 | [(reg:VNx4SI_ONLY ZA_REGNUM) | |
1320 | (reg:DI SME_STATE_REGNUM) | |
1321 | (plus:SI (match_operand:SI 0 "register_operand" "Uci") | |
1322 | (match_operand:SI 1 "const_<za32_offset_range>_operand")) | |
1323 | (match_operand:SME_ZA_BHIx24 2 "register_operand" "w") | |
1324 | (vec_duplicate:SME_ZA_BHIx24 | |
1325 | (match_operand:<SME_ZA_BHIx24:VSINGLE> 3 "register_operand" "x"))] | |
1326 | SME_INT_TERNARY_SLICE))] | |
1327 | "TARGET_STREAMING_SME2" | |
1328 | { | |
1329 | operands[4] = GEN_INT (INTVAL (operands[1]) + <za32_last_offset>); | |
1330 | return "<optab><za32_long>\tza.s[%w0, %1:%4, vgx<vector_count>], %2, %3.<SME_ZA_BHIx24:Vetype>"; | |
1331 | } | |
1332 | ) | |
1333 | ||
1334 | (define_insn "@aarch64_sme_lane_<optab><VNx4SI_ONLY:mode><SME_ZA_BHIx124:mode>" | |
1335 | [(set (reg:VNx4SI_ONLY ZA_REGNUM) | |
1336 | (unspec:VNx4SI_ONLY | |
1337 | [(reg:VNx4SI_ONLY ZA_REGNUM) | |
1338 | (reg:DI SME_STATE_REGNUM) | |
1339 | (match_operand:SI 0 "register_operand" "Uci") | |
1340 | (match_operand:SME_ZA_BHIx124 1 "<aligned_operand>" "<aligned_fpr>") | |
1341 | (unspec:SME_ZA_BHIx124 | |
1342 | [(match_operand:<VSINGLE> 2 "register_operand" "x") | |
1343 | (match_operand:SI 3 "const_int_operand")] | |
1344 | UNSPEC_SVE_LANE_SELECT)] | |
1345 | SME_INT_TERNARY_SLICE))] | |
1346 | "TARGET_STREAMING_SME2" | |
1347 | "<optab><za32_long>\tza.s[%w0, 0:<za32_last_offset><vg_modifier>], %1<z_suffix>, %2.<SME_ZA_BHIx124:Vetype>[%3]" | |
1348 | ) | |
1349 | ||
1350 | (define_insn "*aarch64_sme_lane_<optab><VNx4SI_ONLY:mode><SME_ZA_BHIx124:mode>" | |
1351 | [(set (reg:VNx4SI_ONLY ZA_REGNUM) | |
1352 | (unspec:VNx4SI_ONLY | |
1353 | [(reg:VNx4SI_ONLY ZA_REGNUM) | |
1354 | (reg:DI SME_STATE_REGNUM) | |
1355 | (plus:SI (match_operand:SI 0 "register_operand" "Uci") | |
1356 | (match_operand:SI 1 "const_<za32_offset_range>_operand")) | |
1357 | (match_operand:SME_ZA_BHIx124 2 "<aligned_operand>" "<aligned_fpr>") | |
1358 | (unspec:SME_ZA_BHIx124 | |
1359 | [(match_operand:<VSINGLE> 3 "register_operand" "x") | |
1360 | (match_operand:SI 4 "const_int_operand")] | |
1361 | UNSPEC_SVE_LANE_SELECT)] | |
1362 | SME_INT_TERNARY_SLICE))] | |
1363 | "TARGET_STREAMING_SME2" | |
1364 | { | |
1365 | operands[5] = GEN_INT (INTVAL (operands[1]) + <za32_last_offset>); | |
1366 | return "<optab><za32_long>\tza.s[%w0, %1:%5<vg_modifier>], %2<z_suffix>, %3.<SME_ZA_BHIx124:Vetype>[%4]"; | |
1367 | } | |
1368 | ) | |
1369 | ||
1370 | (define_insn "@aarch64_sme_<optab><VNx2DI_ONLY:mode><VNx8HI_ONLY:mode>" | |
1371 | [(set (reg:VNx2DI_ONLY ZA_REGNUM) | |
1372 | (unspec:VNx2DI_ONLY | |
1373 | [(reg:VNx2DI_ONLY ZA_REGNUM) | |
1374 | (reg:DI SME_STATE_REGNUM) | |
1375 | (match_operand:SI 0 "register_operand" "Uci") | |
1376 | (match_operand:VNx8HI_ONLY 1 "register_operand" "w") | |
1377 | (match_operand:VNx8HI_ONLY 2 "register_operand" "x")] | |
1378 | SME_INT_TERNARY_SLICE))] | |
1379 | "TARGET_SME2 && TARGET_SME_I16I64 && TARGET_STREAMING_SME" | |
1380 | "<optab>ll\tza.d[%w0, 0:3], %1.h, %2.h" | |
1381 | ) | |
1382 | ||
1383 | (define_insn "*aarch64_sme_<optab><VNx2DI_ONLY:mode><VNx8HI_ONLY:mode>_plus" | |
1384 | [(set (reg:VNx2DI_ONLY ZA_REGNUM) | |
1385 | (unspec:VNx2DI_ONLY | |
1386 | [(reg:VNx2DI_ONLY ZA_REGNUM) | |
1387 | (reg:DI SME_STATE_REGNUM) | |
1388 | (plus:SI (match_operand:SI 0 "register_operand" "Uci") | |
1389 | (match_operand:SI 1 "const_<za64_offset_range>_operand")) | |
1390 | (match_operand:VNx8HI_ONLY 2 "register_operand" "w") | |
1391 | (match_operand:VNx8HI_ONLY 3 "register_operand" "x")] | |
1392 | SME_INT_TERNARY_SLICE))] | |
1393 | "TARGET_SME2 && TARGET_SME_I16I64 && TARGET_STREAMING_SME" | |
1394 | { | |
1395 | operands[4] = GEN_INT (INTVAL (operands[1]) + 3); | |
1396 | return "<optab>ll\tza.d[%w0, %1:%4], %2.h, %3.h"; | |
1397 | } | |
1398 | ) | |
1399 | ||
1400 | (define_insn "@aarch64_sme_<optab><VNx2DI_ONLY:mode><SME_ZA_HIx24:mode>" | |
1401 | [(set (reg:VNx2DI_ONLY ZA_REGNUM) | |
1402 | (unspec:VNx2DI_ONLY | |
1403 | [(reg:VNx2DI_ONLY ZA_REGNUM) | |
1404 | (reg:DI SME_STATE_REGNUM) | |
1405 | (match_operand:SI 0 "register_operand" "Uci") | |
1406 | (match_operand:SME_ZA_HIx24 1 "aligned_register_operand" "Uw<vector_count>") | |
1407 | (match_operand:SME_ZA_HIx24 2 "aligned_register_operand" "Uw<vector_count>")] | |
1408 | SME_INT_TERNARY_SLICE))] | |
1409 | "TARGET_SME2 && TARGET_SME_I16I64 && TARGET_STREAMING_SME" | |
1410 | "<optab>ll\tza.d[%w0, 0:3, vgx<vector_count>], %1, %2" | |
1411 | ) | |
1412 | ||
1413 | (define_insn "*aarch64_sme_<optab><VNx2DI_ONLY:mode><SME_ZA_HIx24:mode>_plus" | |
1414 | [(set (reg:VNx2DI_ONLY ZA_REGNUM) | |
1415 | (unspec:VNx2DI_ONLY | |
1416 | [(reg:VNx2DI_ONLY ZA_REGNUM) | |
1417 | (reg:DI SME_STATE_REGNUM) | |
1418 | (plus:SI (match_operand:SI 0 "register_operand" "Uci") | |
1419 | (match_operand:SI 1 "const_<za64_offset_range>_operand")) | |
1420 | (match_operand:SME_ZA_HIx24 2 "aligned_register_operand" "Uw<vector_count>") | |
1421 | (match_operand:SME_ZA_HIx24 3 "aligned_register_operand" "Uw<vector_count>")] | |
1422 | SME_INT_TERNARY_SLICE))] | |
1423 | "TARGET_SME2 && TARGET_SME_I16I64 && TARGET_STREAMING_SME" | |
1424 | { | |
1425 | operands[4] = GEN_INT (INTVAL (operands[1]) + 3); | |
1426 | return "<optab>ll\tza.d[%w0, %1:%4, vgx<vector_count>], %2, %3"; | |
1427 | } | |
1428 | ) | |
1429 | ||
1430 | (define_insn "@aarch64_sme_single_<optab><VNx2DI_ONLY:mode><SME_ZA_HIx24:mode>" | |
1431 | [(set (reg:VNx2DI_ONLY ZA_REGNUM) | |
1432 | (unspec:VNx2DI_ONLY | |
1433 | [(reg:VNx2DI_ONLY ZA_REGNUM) | |
1434 | (reg:DI SME_STATE_REGNUM) | |
1435 | (match_operand:SI 0 "register_operand" "Uci") | |
1436 | (match_operand:SME_ZA_HIx24 1 "register_operand" "w") | |
1437 | (vec_duplicate:SME_ZA_HIx24 | |
1438 | (match_operand:<SME_ZA_HIx24:VSINGLE> 2 "register_operand" "x"))] | |
1439 | SME_INT_TERNARY_SLICE))] | |
1440 | "TARGET_SME2 && TARGET_SME_I16I64 && TARGET_STREAMING_SME" | |
1441 | "<optab>ll\tza.d[%w0, 0:3, vgx<vector_count>], %1, %2.h" | |
1442 | ) | |
1443 | ||
1444 | (define_insn "*aarch64_sme_single_<optab><VNx2DI_ONLY:mode><SME_ZA_HIx24:mode>_plus" | |
1445 | [(set (reg:VNx2DI_ONLY ZA_REGNUM) | |
1446 | (unspec:VNx2DI_ONLY | |
1447 | [(reg:VNx2DI_ONLY ZA_REGNUM) | |
1448 | (reg:DI SME_STATE_REGNUM) | |
1449 | (plus:SI (match_operand:SI 0 "register_operand" "Uci") | |
1450 | (match_operand:SI 1 "const_<za64_offset_range>_operand")) | |
1451 | (match_operand:SME_ZA_HIx24 2 "register_operand" "w") | |
1452 | (vec_duplicate:SME_ZA_HIx24 | |
1453 | (match_operand:<SME_ZA_HIx24:VSINGLE> 3 "register_operand" "x"))] | |
1454 | SME_INT_TERNARY_SLICE))] | |
1455 | "TARGET_SME2 && TARGET_SME_I16I64 && TARGET_STREAMING_SME" | |
1456 | { | |
1457 | operands[4] = GEN_INT (INTVAL (operands[1]) + 3); | |
1458 | return "<optab>ll\tza.d[%w0, %1:%4, vgx<vector_count>], %2, %3.h"; | |
1459 | } | |
1460 | ) | |
1461 | ||
1462 | (define_insn "@aarch64_sme_lane_<optab><VNx2DI_ONLY:mode><SME_ZA_HIx124:mode>" | |
1463 | [(set (reg:VNx2DI_ONLY ZA_REGNUM) | |
1464 | (unspec:VNx2DI_ONLY | |
1465 | [(reg:VNx2DI_ONLY ZA_REGNUM) | |
1466 | (reg:DI SME_STATE_REGNUM) | |
1467 | (match_operand:SI 0 "register_operand" "Uci") | |
1468 | (match_operand:SME_ZA_HIx124 1 "<aligned_operand>" "<aligned_fpr>") | |
1469 | (unspec:SME_ZA_HIx124 | |
1470 | [(match_operand:<VSINGLE> 2 "register_operand" "x") | |
1471 | (match_operand:SI 3 "const_int_operand")] | |
1472 | UNSPEC_SVE_LANE_SELECT)] | |
1473 | SME_INT_TERNARY_SLICE))] | |
1474 | "TARGET_SME2 && TARGET_SME_I16I64 && TARGET_STREAMING_SME" | |
1475 | "<optab>ll\tza.d[%w0, 0:3<vg_modifier>], %1<z_suffix>, %2.h[%3]" | |
1476 | ) | |
1477 | ||
1478 | (define_insn "*aarch64_sme_lane_<optab><VNx2DI_ONLY:mode><SME_ZA_HIx124:mode>" | |
1479 | [(set (reg:VNx2DI_ONLY ZA_REGNUM) | |
1480 | (unspec:VNx2DI_ONLY | |
1481 | [(reg:VNx2DI_ONLY ZA_REGNUM) | |
1482 | (reg:DI SME_STATE_REGNUM) | |
1483 | (plus:SI (match_operand:SI 0 "register_operand" "Uci") | |
1484 | (match_operand:SI 1 "const_<za64_offset_range>_operand")) | |
1485 | (match_operand:SME_ZA_HIx124 2 "<aligned_operand>" "<aligned_fpr>") | |
1486 | (unspec:SME_ZA_HIx124 | |
1487 | [(match_operand:<VSINGLE> 3 "register_operand" "x") | |
1488 | (match_operand:SI 4 "const_int_operand")] | |
1489 | UNSPEC_SVE_LANE_SELECT)] | |
1490 | SME_INT_TERNARY_SLICE))] | |
1491 | "TARGET_SME2 && TARGET_SME_I16I64 && TARGET_STREAMING_SME" | |
1492 | { | |
1493 | operands[5] = GEN_INT (INTVAL (operands[1]) + 3); | |
1494 | return "<optab>ll\tza.d[%w0, %1:%5<vg_modifier>], %2<z_suffix>, %3.h[%4]"; | |
1495 | } | |
1496 | ) | |
1497 | ||
1498 | ;; ------------------------------------------------------------------------- | |
1499 | ;; ---- [INT] Sum of outer products | |
1500 | ;; ------------------------------------------------------------------------- | |
1501 | ;; - BMOPA | |
1502 | ;; - BMOPS | |
4f6ab953 RS |
1503 | ;; - SMOPA |
1504 | ;; - SMOPS | |
1505 | ;; - SUMOPA | |
1506 | ;; - SUMOPS | |
1507 | ;; - UMOPA | |
1508 | ;; - UMOPS | |
1509 | ;; - USMOPA | |
1510 | ;; - USMOPS | |
1511 | ;; ------------------------------------------------------------------------- | |
1512 | ||
1513 | (define_insn "@aarch64_sme_<optab><VNx4SI_ONLY:mode><VNx16QI_ONLY:mode>" | |
1514 | [(set (reg:VNx4SI_ONLY ZA_REGNUM) | |
1515 | (unspec:VNx4SI_ONLY | |
1516 | [(reg:VNx4SI_ONLY ZA_REGNUM) | |
1517 | (reg:DI SME_STATE_REGNUM) | |
1518 | (match_operand:DI 0 "const_int_operand") | |
1519 | (match_operand:<VNx4SI_ONLY:VPRED> 1 "register_operand" "Upl") | |
1520 | (match_operand:<VNx4SI_ONLY:VPRED> 2 "register_operand" "Upl") | |
1521 | (match_operand:VNx16QI_ONLY 3 "register_operand" "w") | |
1522 | (match_operand:VNx16QI_ONLY 4 "register_operand" "w")] | |
1523 | SME_INT_MOP))] | |
1524 | "TARGET_STREAMING_SME" | |
1525 | "<optab>\tza%0.s, %1/m, %2/m, %3.b, %4.b" | |
1526 | ) | |
1527 | ||
1528 | (define_insn "@aarch64_sme_<optab><VNx2DI_ONLY:mode><VNx8HI_ONLY:mode>" | |
1529 | [(set (reg:VNx2DI_ONLY ZA_REGNUM) | |
1530 | (unspec:VNx2DI_ONLY | |
1531 | [(reg:VNx2DI_ONLY ZA_REGNUM) | |
1532 | (reg:DI SME_STATE_REGNUM) | |
1533 | (match_operand:DI 0 "const_int_operand") | |
1534 | (match_operand:<VNx2DI_ONLY:VPRED> 1 "register_operand" "Upl") | |
1535 | (match_operand:<VNx2DI_ONLY:VPRED> 2 "register_operand" "Upl") | |
1536 | (match_operand:VNx8HI_ONLY 3 "register_operand" "w") | |
1537 | (match_operand:VNx8HI_ONLY 4 "register_operand" "w")] | |
1538 | SME_INT_MOP))] | |
1539 | "TARGET_STREAMING_SME && TARGET_SME_I16I64" | |
1540 | "<optab>\tza%0.d, %1/m, %2/m, %3.h, %4.h" | |
1541 | ) | |
1542 | ||
c1c267df RS |
1543 | (define_insn "@aarch64_sme_<optab><VNx4SI_ONLY:mode><VNx8HI_ONLY:mode>" |
1544 | [(set (reg:VNx4SI_ONLY ZA_REGNUM) | |
1545 | (unspec:VNx4SI_ONLY | |
1546 | [(reg:VNx4SI_ONLY ZA_REGNUM) | |
1547 | (reg:DI SME_STATE_REGNUM) | |
1548 | (match_operand:DI 0 "const_int_operand") | |
1549 | (match_operand:<VNx4SI_ONLY:VPRED> 1 "register_operand" "Upl") | |
1550 | (match_operand:<VNx4SI_ONLY:VPRED> 2 "register_operand" "Upl") | |
1551 | (match_operand:VNx8HI_ONLY 3 "register_operand" "w") | |
1552 | (match_operand:VNx8HI_ONLY 4 "register_operand" "w")] | |
1553 | SME2_INT_MOP))] | |
1554 | "TARGET_STREAMING_SME2" | |
1555 | "<optab>\tza%0.s, %1/m, %2/m, %3.h, %4.h" | |
1556 | ) | |
1557 | ||
1558 | (define_insn "@aarch64_sme_<optab><VNx4SI_ONLY:mode><VNx4SI_ONLY:mode>" | |
1559 | [(set (reg:VNx4SI_ONLY ZA_REGNUM) | |
1560 | (unspec:VNx4SI_ONLY | |
1561 | [(reg:VNx4SI_ONLY ZA_REGNUM) | |
1562 | (reg:DI SME_STATE_REGNUM) | |
1563 | (match_operand:DI 0 "const_int_operand") | |
1564 | (match_operand:<VNx4SI_ONLY:VPRED> 1 "register_operand" "Upl") | |
1565 | (match_operand:<VNx4SI_ONLY:VPRED> 2 "register_operand" "Upl") | |
1566 | (match_operand:VNx4SI_ONLY 3 "register_operand" "w") | |
1567 | (match_operand:VNx4SI_ONLY 4 "register_operand" "w")] | |
1568 | SME2_BMOP))] | |
1569 | "TARGET_STREAMING_SME2" | |
1570 | "<optab>\tza%0.s, %1/m, %2/m, %3.s, %4.s" | |
1571 | ) | |
1572 | ||
1573 | ;; ------------------------------------------------------------------------- | |
1574 | ;; ---- [FP] Dot product | |
1575 | ;; ------------------------------------------------------------------------- | |
1576 | ;; Includes: | |
1577 | ;; - BFDOT | |
1578 | ;; - FDOT | |
1579 | ;; ------------------------------------------------------------------------- | |
1580 | ||
1581 | (define_insn "@aarch64_sme_<optab><VNx4SI_ONLY:mode><SME_ZA_HFx24:mode>" | |
1582 | [(set (reg:VNx4SI_ONLY ZA_REGNUM) | |
1583 | (unspec:VNx4SI_ONLY | |
1584 | [(reg:VNx4SI_ONLY ZA_REGNUM) | |
1585 | (reg:DI SME_STATE_REGNUM) | |
1586 | (match_operand:SI 0 "register_operand" "Uci") | |
1587 | (match_operand:SME_ZA_HFx24 1 "aligned_register_operand" "Uw<vector_count>") | |
1588 | (match_operand:SME_ZA_HFx24 2 "aligned_register_operand" "Uw<vector_count>")] | |
1589 | SME_FP_DOTPROD))] | |
1590 | "TARGET_STREAMING_SME2" | |
1591 | "<b><optab>\tza.s[%w0, 0, vgx<vector_count>], %1, %2" | |
1592 | ) | |
1593 | ||
1594 | (define_insn "*aarch64_sme_<optab><VNx4SI_ONLY:mode><SME_ZA_HFx24:mode>_plus" | |
1595 | [(set (reg:VNx4SI_ONLY ZA_REGNUM) | |
1596 | (unspec:VNx4SI_ONLY | |
1597 | [(reg:VNx4SI_ONLY ZA_REGNUM) | |
1598 | (reg:DI SME_STATE_REGNUM) | |
1599 | (plus:SI (match_operand:SI 0 "register_operand" "Uci") | |
1600 | (match_operand:SI 1 "const_0_to_7_operand")) | |
1601 | (match_operand:SME_ZA_HFx24 2 "aligned_register_operand" "Uw<vector_count>") | |
1602 | (match_operand:SME_ZA_HFx24 3 "aligned_register_operand" "Uw<vector_count>")] | |
1603 | SME_FP_DOTPROD))] | |
1604 | "TARGET_STREAMING_SME2" | |
1605 | "<b><optab>\tza.s[%w0, %1, vgx<vector_count>], %2, %3" | |
1606 | ) | |
1607 | ||
1608 | (define_insn "@aarch64_sme_single_<optab><VNx4SI_ONLY:mode><SME_ZA_HFx24:mode>" | |
1609 | [(set (reg:VNx4SI_ONLY ZA_REGNUM) | |
1610 | (unspec:VNx4SI_ONLY | |
1611 | [(reg:VNx4SI_ONLY ZA_REGNUM) | |
1612 | (reg:DI SME_STATE_REGNUM) | |
1613 | (match_operand:SI 0 "register_operand" "Uci") | |
1614 | (match_operand:SME_ZA_HFx24 1 "register_operand" "w") | |
1615 | (vec_duplicate:SME_ZA_HFx24 | |
1616 | (match_operand:<VSINGLE> 2 "register_operand" "x"))] | |
1617 | SME_FP_DOTPROD))] | |
1618 | "TARGET_STREAMING_SME2" | |
1619 | "<b><optab>\tza.s[%w0, 0, vgx<vector_count>], %1, %2.h" | |
1620 | ) | |
1621 | ||
1622 | (define_insn "*aarch64_sme_single_<optab><VNx4SI_ONLY:mode><SME_ZA_HFx24:mode>_plus" | |
1623 | [(set (reg:VNx4SI_ONLY ZA_REGNUM) | |
1624 | (unspec:VNx4SI_ONLY | |
1625 | [(reg:VNx4SI_ONLY ZA_REGNUM) | |
1626 | (reg:DI SME_STATE_REGNUM) | |
1627 | (plus:SI (match_operand:SI 0 "register_operand" "Uci") | |
1628 | (match_operand:SI 1 "const_0_to_7_operand")) | |
1629 | (match_operand:SME_ZA_HFx24 2 "register_operand" "w") | |
1630 | (vec_duplicate:SME_ZA_HFx24 | |
1631 | (match_operand:<VSINGLE> 3 "register_operand" "x"))] | |
1632 | SME_FP_DOTPROD))] | |
1633 | "TARGET_STREAMING_SME2" | |
1634 | "<b><optab>\tza.s[%w0, %1, vgx<vector_count>], %2, %3.h" | |
1635 | ) | |
1636 | ||
1637 | (define_insn "@aarch64_sme_lane_<optab><VNx4SI_ONLY:mode><SME_ZA_HFx24:mode>" | |
1638 | [(set (reg:VNx4SI_ONLY ZA_REGNUM) | |
1639 | (unspec:VNx4SI_ONLY | |
1640 | [(reg:VNx4SI_ONLY ZA_REGNUM) | |
1641 | (reg:DI SME_STATE_REGNUM) | |
1642 | (match_operand:SI 0 "register_operand" "Uci") | |
1643 | (match_operand:SME_ZA_HFx24 1 "aligned_register_operand" "Uw<vector_count>") | |
1644 | (unspec:SME_ZA_HFx24 | |
1645 | [(match_operand:<VSINGLE> 2 "register_operand" "x") | |
1646 | (match_operand:SI 3 "const_int_operand")] | |
1647 | UNSPEC_SVE_LANE_SELECT)] | |
1648 | SME_FP_DOTPROD_LANE))] | |
1649 | "TARGET_STREAMING_SME2" | |
1650 | "<b><optab>\tza.s[%w0, 0, vgx<vector_count>], %1, %2.h[%3]" | |
1651 | ) | |
1652 | ||
1653 | (define_insn "*aarch64_sme_lane_<optab><VNx4SI_ONLY:mode><SME_ZA_HFx24:mode>_plus" | |
1654 | [(set (reg:VNx4SI_ONLY ZA_REGNUM) | |
1655 | (unspec:VNx4SI_ONLY | |
1656 | [(reg:VNx4SI_ONLY ZA_REGNUM) | |
1657 | (reg:DI SME_STATE_REGNUM) | |
1658 | (plus:SI (match_operand:SI 0 "register_operand" "Uci") | |
1659 | (match_operand:SI 1 "const_0_to_7_operand")) | |
1660 | (match_operand:SME_ZA_HFx24 2 "aligned_register_operand" "Uw<vector_count>") | |
1661 | (unspec:SME_ZA_HFx24 | |
1662 | [(match_operand:<VSINGLE> 3 "register_operand" "x") | |
1663 | (match_operand:SI 4 "const_int_operand")] | |
1664 | UNSPEC_SVE_LANE_SELECT)] | |
1665 | SME_FP_DOTPROD_LANE))] | |
1666 | "TARGET_STREAMING_SME2" | |
1667 | "<b><optab>\tza.s[%w0, %1, vgx<vector_count>], %2, %3.h[%4]" | |
1668 | ) | |
1669 | ||
1670 | ;; ------------------------------------------------------------------------- | |
1671 | ;; ---- [FP] Ternary arithmetic on ZA slice | |
1672 | ;; ------------------------------------------------------------------------- | |
1673 | ;; Includes: | |
1674 | ;; - FMLA | |
1675 | ;; - FMLS | |
1676 | ;; ------------------------------------------------------------------------- | |
1677 | ||
1678 | (define_insn "@aarch64_sme_<optab><SME_ZA_SDF_I:mode><SME_ZA_SDFx24:mode>" | |
1679 | [(set (reg:SME_ZA_SDF_I ZA_REGNUM) | |
1680 | (unspec:SME_ZA_SDF_I | |
1681 | [(reg:SME_ZA_SDF_I ZA_REGNUM) | |
1682 | (reg:DI SME_STATE_REGNUM) | |
1683 | (match_operand:SI 0 "register_operand" "Uci") | |
1684 | (match_operand:SME_ZA_SDFx24 1 "aligned_register_operand" "Uw<vector_count>") | |
1685 | (match_operand:SME_ZA_SDFx24 2 "aligned_register_operand" "Uw<vector_count>")] | |
1686 | SME_FP_TERNARY_SLICE))] | |
1687 | "TARGET_SME2 | |
1688 | && TARGET_STREAMING_SME | |
1689 | && <SME_ZA_SDF_I:elem_bits> == <SME_ZA_SDFx24:elem_bits>" | |
1690 | "<optab>\tza.<SME_ZA_SDF_I:Vetype>[%w0, 0, vgx<vector_count>], %1, %2" | |
1691 | ) | |
1692 | ||
1693 | (define_insn "*aarch64_sme_<optab><SME_ZA_SDF_I:mode><SME_ZA_SDFx24:mode>_plus" | |
1694 | [(set (reg:SME_ZA_SDF_I ZA_REGNUM) | |
1695 | (unspec:SME_ZA_SDF_I | |
1696 | [(reg:SME_ZA_SDF_I ZA_REGNUM) | |
1697 | (reg:DI SME_STATE_REGNUM) | |
1698 | (plus:SI (match_operand:SI 0 "register_operand" "Uci") | |
1699 | (match_operand:SI 1 "const_0_to_7_operand")) | |
1700 | (match_operand:SME_ZA_SDFx24 2 "aligned_register_operand" "Uw<vector_count>") | |
1701 | (match_operand:SME_ZA_SDFx24 3 "aligned_register_operand" "Uw<vector_count>")] | |
1702 | SME_FP_TERNARY_SLICE))] | |
1703 | "TARGET_SME2 | |
1704 | && TARGET_STREAMING_SME | |
1705 | && <SME_ZA_SDF_I:elem_bits> == <SME_ZA_SDFx24:elem_bits>" | |
1706 | "<optab>\tza.<SME_ZA_SDF_I:Vetype>[%w0, %1, vgx<vector_count>], %2, %3" | |
1707 | ) | |
1708 | ||
1709 | (define_insn "@aarch64_sme_single_<optab><SME_ZA_SDF_I:mode><SME_ZA_SDFx24:mode>" | |
1710 | [(set (reg:SME_ZA_SDF_I ZA_REGNUM) | |
1711 | (unspec:SME_ZA_SDF_I | |
1712 | [(reg:SME_ZA_SDF_I ZA_REGNUM) | |
1713 | (reg:DI SME_STATE_REGNUM) | |
1714 | (match_operand:SI 0 "register_operand" "Uci") | |
1715 | (match_operand:SME_ZA_SDFx24 1 "register_operand" "w") | |
1716 | (vec_duplicate:SME_ZA_SDFx24 | |
1717 | (match_operand:<VSINGLE> 2 "register_operand" "x"))] | |
1718 | SME_FP_TERNARY_SLICE))] | |
1719 | "TARGET_SME2 | |
1720 | && TARGET_STREAMING_SME | |
1721 | && <SME_ZA_SDF_I:elem_bits> == <SME_ZA_SDFx24:elem_bits>" | |
1722 | "<optab>\tza.<SME_ZA_SDF_I:Vetype>[%w0, 0, vgx<vector_count>], %1, %2.<SME_ZA_SDFx24:Vetype>" | |
1723 | ) | |
1724 | ||
1725 | (define_insn "*aarch64_sme_single_<optab><SME_ZA_SDF_I:mode><SME_ZA_SDFx24:mode>_plus" | |
1726 | [(set (reg:SME_ZA_SDF_I ZA_REGNUM) | |
1727 | (unspec:SME_ZA_SDF_I | |
1728 | [(reg:SME_ZA_SDF_I ZA_REGNUM) | |
1729 | (reg:DI SME_STATE_REGNUM) | |
1730 | (plus:SI (match_operand:SI 0 "register_operand" "Uci") | |
1731 | (match_operand:SI 1 "const_0_to_7_operand")) | |
1732 | (match_operand:SME_ZA_SDFx24 2 "register_operand" "w") | |
1733 | (vec_duplicate:SME_ZA_SDFx24 | |
1734 | (match_operand:<VSINGLE> 3 "register_operand" "x"))] | |
1735 | SME_FP_TERNARY_SLICE))] | |
1736 | "TARGET_SME2 | |
1737 | && TARGET_STREAMING_SME | |
1738 | && <SME_ZA_SDF_I:elem_bits> == <SME_ZA_SDFx24:elem_bits>" | |
1739 | "<optab>\tza.<SME_ZA_SDF_I:Vetype>[%w0, %1, vgx<vector_count>], %2, %3.<SME_ZA_SDFx24:Vetype>" | |
1740 | ) | |
1741 | ||
1742 | (define_insn "@aarch64_sme_lane_<optab><SME_ZA_SDF_I:mode><SME_ZA_SDFx24:mode>" | |
1743 | [(set (reg:SME_ZA_SDF_I ZA_REGNUM) | |
1744 | (unspec:SME_ZA_SDF_I | |
1745 | [(reg:SME_ZA_SDF_I ZA_REGNUM) | |
1746 | (reg:DI SME_STATE_REGNUM) | |
1747 | (match_operand:SI 0 "register_operand" "Uci") | |
1748 | (match_operand:SME_ZA_SDFx24 1 "aligned_register_operand" "Uw<vector_count>") | |
1749 | (unspec:SME_ZA_SDFx24 | |
1750 | [(match_operand:<VSINGLE> 2 "register_operand" "x") | |
1751 | (match_operand:SI 3 "const_int_operand")] | |
1752 | UNSPEC_SVE_LANE_SELECT)] | |
1753 | SME_FP_TERNARY_SLICE))] | |
1754 | "TARGET_SME2 | |
1755 | && TARGET_STREAMING_SME | |
1756 | && <SME_ZA_SDF_I:elem_bits> == <SME_ZA_SDFx24:elem_bits>" | |
1757 | "<optab>\tza.<SME_ZA_SDF_I:Vetype>[%w0, 0, vgx<vector_count>], %1, %2.<SME_ZA_SDFx24:Vetype>[%3]" | |
1758 | ) | |
1759 | ||
1760 | (define_insn "*aarch64_sme_lane_<optab><SME_ZA_SDF_I:mode><SME_ZA_SDFx24:mode>" | |
1761 | [(set (reg:SME_ZA_SDF_I ZA_REGNUM) | |
1762 | (unspec:SME_ZA_SDF_I | |
1763 | [(reg:SME_ZA_SDF_I ZA_REGNUM) | |
1764 | (reg:DI SME_STATE_REGNUM) | |
1765 | (plus:SI (match_operand:SI 0 "register_operand" "Uci") | |
1766 | (match_operand:SI 1 "const_0_to_7_operand")) | |
1767 | (match_operand:SME_ZA_SDFx24 2 "aligned_register_operand" "Uw<vector_count>") | |
1768 | (unspec:SME_ZA_SDFx24 | |
1769 | [(match_operand:<VSINGLE> 3 "register_operand" "x") | |
1770 | (match_operand:SI 4 "const_int_operand")] | |
1771 | UNSPEC_SVE_LANE_SELECT)] | |
1772 | SME_FP_TERNARY_SLICE))] | |
1773 | "TARGET_SME2 | |
1774 | && TARGET_STREAMING_SME | |
1775 | && <SME_ZA_SDF_I:elem_bits> == <SME_ZA_SDFx24:elem_bits>" | |
1776 | "<optab>\tza.<SME_ZA_SDF_I:Vetype>[%w0, %1, vgx<vector_count>], %2, %3.<SME_ZA_SDFx24:Vetype>[%4]" | |
1777 | ) | |
1778 | ||
1779 | ;; ------------------------------------------------------------------------- | |
1780 | ;; ---- [FP] Ternary widening arithmetic on ZA slice | |
1781 | ;; ------------------------------------------------------------------------- | |
1782 | ;; Includes: | |
1783 | ;; - BFMLAL | |
1784 | ;; - BFMLSL | |
1785 | ;; - FMLAL | |
1786 | ;; - FMLSL | |
1787 | ;; ------------------------------------------------------------------------- | |
1788 | ||
1789 | (define_insn "@aarch64_sme_<optab><VNx4SI_ONLY:mode><SVE_FULL_HF:mode>" | |
1790 | [(set (reg:VNx4SI_ONLY ZA_REGNUM) | |
1791 | (unspec:VNx4SI_ONLY | |
1792 | [(reg:VNx4SI_ONLY ZA_REGNUM) | |
1793 | (reg:DI SME_STATE_REGNUM) | |
1794 | (match_operand:SI 0 "register_operand" "Uci") | |
1795 | (match_operand:SVE_FULL_HF 1 "register_operand" "w") | |
1796 | (match_operand:SVE_FULL_HF 2 "register_operand" "x")] | |
1797 | SME_FP_TERNARY_SLICE))] | |
1798 | "TARGET_STREAMING_SME2" | |
1799 | "<b><optab>l\tza.s[%w0, 0:1], %1.h, %2.h" | |
1800 | ) | |
1801 | ||
1802 | (define_insn "*aarch64_sme_<optab><VNx4SI_ONLY:mode><SVE_FULL_HF:mode>_plus" | |
1803 | [(set (reg:VNx4SI_ONLY ZA_REGNUM) | |
1804 | (unspec:VNx4SI_ONLY | |
1805 | [(reg:VNx4SI_ONLY ZA_REGNUM) | |
1806 | (reg:DI SME_STATE_REGNUM) | |
1807 | (plus:SI (match_operand:SI 0 "register_operand" "Uci") | |
1808 | (match_operand:SI 1 "const_<za32_offset_range>_operand")) | |
1809 | (match_operand:SVE_FULL_HF 2 "register_operand" "w") | |
1810 | (match_operand:SVE_FULL_HF 3 "register_operand" "x")] | |
1811 | SME_FP_TERNARY_SLICE))] | |
1812 | "TARGET_STREAMING_SME2" | |
1813 | { | |
1814 | operands[4] = GEN_INT (INTVAL (operands[1]) + 1); | |
1815 | return "<b><optab>l\tza.s[%w0, %1:%4], %2.h, %3.h"; | |
1816 | } | |
1817 | ) | |
1818 | ||
1819 | (define_insn "@aarch64_sme_<optab><VNx4SI_ONLY:mode><SME_ZA_HFx24:mode>" | |
1820 | [(set (reg:VNx4SI_ONLY ZA_REGNUM) | |
1821 | (unspec:VNx4SI_ONLY | |
1822 | [(reg:VNx4SI_ONLY ZA_REGNUM) | |
1823 | (reg:DI SME_STATE_REGNUM) | |
1824 | (match_operand:SI 0 "register_operand" "Uci") | |
1825 | (match_operand:SME_ZA_HFx24 1 "aligned_register_operand" "Uw<vector_count>") | |
1826 | (match_operand:SME_ZA_HFx24 2 "aligned_register_operand" "Uw<vector_count>")] | |
1827 | SME_FP_TERNARY_SLICE))] | |
1828 | "TARGET_STREAMING_SME2" | |
1829 | "<b><optab>l\tza.s[%w0, 0:1, vgx<vector_count>], %1, %2" | |
1830 | ) | |
1831 | ||
1832 | (define_insn "*aarch64_sme_<optab><VNx4SI_ONLY:mode><SME_ZA_HFx24:mode>_plus" | |
1833 | [(set (reg:VNx4SI_ONLY ZA_REGNUM) | |
1834 | (unspec:VNx4SI_ONLY | |
1835 | [(reg:VNx4SI_ONLY ZA_REGNUM) | |
1836 | (reg:DI SME_STATE_REGNUM) | |
1837 | (plus:SI (match_operand:SI 0 "register_operand" "Uci") | |
1838 | (match_operand:SI 1 "const_<za32_offset_range>_operand")) | |
1839 | (match_operand:SME_ZA_HFx24 2 "aligned_register_operand" "Uw<vector_count>") | |
1840 | (match_operand:SME_ZA_HFx24 3 "aligned_register_operand" "Uw<vector_count>")] | |
1841 | SME_FP_TERNARY_SLICE))] | |
1842 | "TARGET_STREAMING_SME2" | |
1843 | { | |
1844 | operands[4] = GEN_INT (INTVAL (operands[1]) + 1); | |
1845 | return "<b><optab>l\tza.s[%w0, %1:%4, vgx<vector_count>], %2, %3"; | |
1846 | } | |
1847 | ) | |
1848 | ||
1849 | (define_insn "@aarch64_sme_single_<optab><VNx4SI_ONLY:mode><SME_ZA_HFx24:mode>" | |
1850 | [(set (reg:VNx4SI_ONLY ZA_REGNUM) | |
1851 | (unspec:VNx4SI_ONLY | |
1852 | [(reg:VNx4SI_ONLY ZA_REGNUM) | |
1853 | (reg:DI SME_STATE_REGNUM) | |
1854 | (match_operand:SI 0 "register_operand" "Uci") | |
1855 | (match_operand:SME_ZA_HFx24 1 "register_operand" "w") | |
1856 | (vec_duplicate:SME_ZA_HFx24 | |
1857 | (match_operand:<SME_ZA_HFx24:VSINGLE> 2 "register_operand" "x"))] | |
1858 | SME_FP_TERNARY_SLICE))] | |
1859 | "TARGET_STREAMING_SME2" | |
1860 | "<b><optab>l\tza.s[%w0, 0:1, vgx<vector_count>], %1, %2.h" | |
1861 | ) | |
1862 | ||
1863 | (define_insn "*aarch64_sme_single_<optab><VNx4SI_ONLY:mode><SME_ZA_HFx24:mode>_plus" | |
1864 | [(set (reg:VNx4SI_ONLY ZA_REGNUM) | |
1865 | (unspec:VNx4SI_ONLY | |
1866 | [(reg:VNx4SI_ONLY ZA_REGNUM) | |
1867 | (reg:DI SME_STATE_REGNUM) | |
1868 | (plus:SI (match_operand:SI 0 "register_operand" "Uci") | |
1869 | (match_operand:SI 1 "const_<za32_offset_range>_operand")) | |
1870 | (match_operand:SME_ZA_HFx24 2 "register_operand" "w") | |
1871 | (vec_duplicate:SME_ZA_HFx24 | |
1872 | (match_operand:<SME_ZA_HFx24:VSINGLE> 3 "register_operand" "x"))] | |
1873 | SME_FP_TERNARY_SLICE))] | |
1874 | "TARGET_STREAMING_SME2" | |
1875 | { | |
1876 | operands[4] = GEN_INT (INTVAL (operands[1]) + 1); | |
1877 | return "<b><optab>l\tza.s[%w0, %1:%4, vgx<vector_count>], %2, %3.h"; | |
1878 | } | |
1879 | ) | |
1880 | ||
1881 | (define_insn "@aarch64_sme_lane_<optab><VNx4SI_ONLY:mode><SME_ZA_HFx124:mode>" | |
1882 | [(set (reg:VNx4SI_ONLY ZA_REGNUM) | |
1883 | (unspec:VNx4SI_ONLY | |
1884 | [(reg:VNx4SI_ONLY ZA_REGNUM) | |
1885 | (reg:DI SME_STATE_REGNUM) | |
1886 | (match_operand:SI 0 "register_operand" "Uci") | |
1887 | (match_operand:SME_ZA_HFx124 1 "<aligned_operand>" "<aligned_fpr>") | |
1888 | (unspec:SME_ZA_HFx124 | |
1889 | [(match_operand:<VSINGLE> 2 "register_operand" "x") | |
1890 | (match_operand:SI 3 "const_int_operand")] | |
1891 | UNSPEC_SVE_LANE_SELECT)] | |
1892 | SME_FP_TERNARY_SLICE))] | |
1893 | "TARGET_STREAMING_SME2" | |
1894 | "<b><optab>l\tza.s[%w0, 0:1<vg_modifier>], %1<z_suffix>, %2.h[%3]" | |
1895 | ) | |
1896 | ||
1897 | (define_insn "*aarch64_sme_lane_<optab><VNx4SI_ONLY:mode><SME_ZA_HFx124:mode>" | |
1898 | [(set (reg:VNx4SI_ONLY ZA_REGNUM) | |
1899 | (unspec:VNx4SI_ONLY | |
1900 | [(reg:VNx4SI_ONLY ZA_REGNUM) | |
1901 | (reg:DI SME_STATE_REGNUM) | |
1902 | (plus:SI (match_operand:SI 0 "register_operand" "Uci") | |
1903 | (match_operand:SI 1 "const_<za32_offset_range>_operand")) | |
1904 | (match_operand:SME_ZA_HFx124 2 "<aligned_operand>" "<aligned_fpr>") | |
1905 | (unspec:SME_ZA_HFx124 | |
1906 | [(match_operand:<VSINGLE> 3 "register_operand" "x") | |
1907 | (match_operand:SI 4 "const_int_operand")] | |
1908 | UNSPEC_SVE_LANE_SELECT)] | |
1909 | SME_FP_TERNARY_SLICE))] | |
1910 | "TARGET_STREAMING_SME2" | |
1911 | { | |
1912 | operands[5] = GEN_INT (INTVAL (operands[1]) + 1); | |
1913 | return "<b><optab>l\tza.s[%w0, %1:%5<vg_modifier>], %2<z_suffix>, %3.h[%4]"; | |
1914 | } | |
1915 | ) | |
1916 | ||
4f6ab953 RS |
1917 | ;; ------------------------------------------------------------------------- |
1918 | ;; ---- [FP] Sum of outer products | |
1919 | ;; ------------------------------------------------------------------------- | |
1920 | ;; Includes: | |
1921 | ;; - BFMOPA | |
1922 | ;; - BFMOPS | |
1923 | ;; - FMOPA | |
1924 | ;; - FMOPS | |
1925 | ;; ------------------------------------------------------------------------- | |
1926 | ||
1927 | (define_insn "@aarch64_sme_<optab><SME_ZA_SDF_I:mode><SME_MOP_HSDF:mode>" | |
1928 | [(set (reg:SME_ZA_SDF_I ZA_REGNUM) | |
1929 | (unspec:SME_ZA_SDF_I | |
1930 | [(reg:SME_ZA_SDF_I ZA_REGNUM) | |
1931 | (reg:DI SME_STATE_REGNUM) | |
1932 | (match_operand:DI 0 "const_int_operand") | |
1933 | (match_operand:<SME_ZA_SDF_I:VPRED> 1 "register_operand" "Upl") | |
1934 | (match_operand:<SME_ZA_SDF_I:VPRED> 2 "register_operand" "Upl") | |
1935 | (match_operand:SME_MOP_HSDF 3 "register_operand" "w") | |
1936 | (match_operand:SME_MOP_HSDF 4 "register_operand" "w")] | |
1937 | SME_FP_MOP))] | |
1938 | "TARGET_STREAMING_SME | |
1939 | && (<SME_ZA_SDF_I:elem_bits> == 32) == (<SME_MOP_HSDF:elem_bits> <= 32)" | |
1940 | "<b><optab>\tza%0.<SME_ZA_SDF_I:Vetype>, %1/m, %2/m, %3.<SME_MOP_HSDF:Vetype>, %4.<SME_MOP_HSDF:Vetype>" | |
1941 | ) | |
c1c267df RS |
1942 | |
1943 | ;; ========================================================================= | |
1944 | ;; == Table lookup | |
1945 | ;; ========================================================================= | |
1946 | ||
1947 | ;; ------------------------------------------------------------------------- | |
1948 | ;; ---- Table lookup | |
1949 | ;; ------------------------------------------------------------------------- | |
1950 | ;; Includes: | |
1951 | ;; - LUTI2 | |
1952 | ;; - LUTI4 | |
1953 | ;; ------------------------------------------------------------------------- | |
1954 | ||
1955 | (define_c_enum "unspec" [ | |
1956 | UNSPEC_SME_LUTI | |
1957 | ]) | |
1958 | ||
1959 | (define_insn "@aarch64_sme_lut<LUTI_BITS><mode>" | |
1960 | [(set (match_operand:SVE_FULL_BHS 0 "register_operand" "=w") | |
1961 | (unspec:SVE_FULL_BHS | |
1962 | [(reg:V8DI ZT0_REGNUM) | |
1963 | (reg:DI SME_STATE_REGNUM) | |
1964 | (match_operand:VNx16QI 1 "register_operand" "w") | |
1965 | (match_operand:DI 2 "const_int_operand") | |
1966 | (const_int LUTI_BITS)] | |
1967 | UNSPEC_SME_LUTI))] | |
1968 | "TARGET_STREAMING_SME2" | |
1969 | "luti<LUTI_BITS>\t%0.<Vetype>, zt0, %1[%2]" | |
1970 | ) | |
1971 | ||
1972 | (define_insn "@aarch64_sme_lut<LUTI_BITS><mode>" | |
1973 | [(set (match_operand:SVE_BHSx24 0 "aligned_register_operand" "=Uw<vector_count>") | |
1974 | (unspec:SVE_BHSx24 | |
1975 | [(reg:V8DI ZT0_REGNUM) | |
1976 | (reg:DI SME_STATE_REGNUM) | |
1977 | (match_operand:VNx16QI 1 "register_operand" "w") | |
1978 | (match_operand:DI 2 "const_int_operand") | |
1979 | (const_int LUTI_BITS)] | |
1980 | UNSPEC_SME_LUTI))] | |
1981 | "TARGET_STREAMING_SME2 | |
1982 | && !(<LUTI_BITS> == 4 && <vector_count> == 4 && <elem_bits> == 8)" | |
1983 | "luti<LUTI_BITS>\t%0, zt0, %1[%2]" | |
9f0f7d80 RS |
1984 | [(set_attr "stride_type" "luti_consecutive")] |
1985 | ) | |
1986 | ||
1987 | (define_insn "@aarch64_sme_lut<LUTI_BITS><mode>_strided2" | |
1988 | [(set (match_operand:SVE_FULL_BHS 0 "aarch64_simd_register" "=Uwd") | |
1989 | (unspec:SVE_FULL_BHS | |
1990 | [(reg:V8DI ZT0_REGNUM) | |
1991 | (reg:DI SME_STATE_REGNUM) | |
1992 | (match_operand:VNx16QI 2 "register_operand" "w") | |
1993 | (match_operand:DI 3 "const_int_operand") | |
1994 | (const_int LUTI_BITS) | |
1995 | (const_int 0)] | |
1996 | UNSPEC_SME_LUTI)) | |
1997 | (set (match_operand:SVE_FULL_BHS 1 "aarch64_simd_register" "=w") | |
1998 | (unspec:SVE_FULL_BHS | |
1999 | [(reg:V8DI ZT0_REGNUM) | |
2000 | (reg:DI SME_STATE_REGNUM) | |
2001 | (match_dup 2) | |
2002 | (match_dup 3) | |
2003 | (const_int LUTI_BITS) | |
2004 | (const_int 1)] | |
2005 | UNSPEC_SME_LUTI))] | |
2006 | "TARGET_STREAMING_SME2 | |
2007 | && aarch64_strided_registers_p (operands, 2, 8)" | |
2008 | "luti<LUTI_BITS>\t{%0.<Vetype>, %1.<Vetype>}, zt0, %2[%3]" | |
2009 | [(set_attr "stride_type" "luti_strided")] | |
2010 | ) | |
2011 | ||
2012 | (define_insn "@aarch64_sme_lut<LUTI_BITS><mode>_strided4" | |
2013 | [(set (match_operand:SVE_FULL_BHS 0 "aarch64_simd_register" "=Uwt") | |
2014 | (unspec:SVE_FULL_BHS | |
2015 | [(reg:V8DI ZT0_REGNUM) | |
2016 | (reg:DI SME_STATE_REGNUM) | |
2017 | (match_operand:VNx16QI 4 "register_operand" "w") | |
2018 | (match_operand:DI 5 "const_int_operand") | |
2019 | (const_int LUTI_BITS) | |
2020 | (const_int 0)] | |
2021 | UNSPEC_SME_LUTI)) | |
2022 | (set (match_operand:SVE_FULL_BHS 1 "aarch64_simd_register" "=w") | |
2023 | (unspec:SVE_FULL_BHS | |
2024 | [(reg:V8DI ZT0_REGNUM) | |
2025 | (reg:DI SME_STATE_REGNUM) | |
2026 | (match_dup 4) | |
2027 | (match_dup 5) | |
2028 | (const_int LUTI_BITS) | |
2029 | (const_int 1)] | |
2030 | UNSPEC_SME_LUTI)) | |
2031 | (set (match_operand:SVE_FULL_BHS 2 "aarch64_simd_register" "=w") | |
2032 | (unspec:SVE_FULL_BHS | |
2033 | [(reg:V8DI ZT0_REGNUM) | |
2034 | (reg:DI SME_STATE_REGNUM) | |
2035 | (match_dup 4) | |
2036 | (match_dup 5) | |
2037 | (const_int LUTI_BITS) | |
2038 | (const_int 2)] | |
2039 | UNSPEC_SME_LUTI)) | |
2040 | (set (match_operand:SVE_FULL_BHS 3 "aarch64_simd_register" "=w") | |
2041 | (unspec:SVE_FULL_BHS | |
2042 | [(reg:V8DI ZT0_REGNUM) | |
2043 | (reg:DI SME_STATE_REGNUM) | |
2044 | (match_dup 4) | |
2045 | (match_dup 5) | |
2046 | (const_int LUTI_BITS) | |
2047 | (const_int 3)] | |
2048 | UNSPEC_SME_LUTI))] | |
2049 | "TARGET_STREAMING_SME2 | |
2050 | && !(<LUTI_BITS> == 4 && <elem_bits> == 8) | |
2051 | && aarch64_strided_registers_p (operands, 4, 4)" | |
2052 | "luti<LUTI_BITS>\t{%0.<Vetype>, %1.<Vetype>, %2.<Vetype>, %3.<Vetype>}, zt0, %4[%5]" | |
2053 | [(set_attr "stride_type" "luti_strided")] | |
c1c267df | 2054 | ) |