]>
Commit | Line | Data |
---|---|---|
88f77cba | 1 | ;; ARM NEON coprocessor Machine Description |
8d9254fc | 2 | ;; Copyright (C) 2006-2020 Free Software Foundation, Inc. |
88f77cba JB |
3 | ;; Written by CodeSourcery. |
4 | ;; | |
5 | ;; This file is part of GCC. | |
6 | ;; | |
7 | ;; GCC is free software; you can redistribute it and/or modify it | |
8 | ;; under the terms of the GNU General Public License as published by | |
2f83c7d6 | 9 | ;; the Free Software Foundation; either version 3, or (at your option) |
88f77cba JB |
10 | ;; any later version. |
11 | ;; | |
12 | ;; GCC is distributed in the hope that it will be useful, but | |
13 | ;; WITHOUT ANY WARRANTY; without even the implied warranty of | |
14 | ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
15 | ;; General Public License for more details. | |
16 | ;; | |
17 | ;; You should have received a copy of the GNU General Public License | |
2f83c7d6 NC |
18 | ;; along with GCC; see the file COPYING3. If not see |
19 | ;; <http://www.gnu.org/licenses/>. | |
88f77cba | 20 | |
88f77cba | 21 | |
c956e102 | 22 | ;; Attribute used to permit string comparisons against <VQH_mnem> in |
003bb7f3 | 23 | ;; type attribute definitions. |
c956e102 MS |
24 | (define_attr "vqh_mnem" "vadd,vmin,vmax" (const_string "vadd")) |
25 | ||
3eefaaa9 BE |
26 | (define_insn "unaligned_storev8qi" |
27 | [(set (match_operand:V8QI 0 "memory_operand" "=Un") | |
28 | (unspec:V8QI [(match_operand:V8QI 1 "s_register_operand" "w")] | |
29 | UNSPEC_UNALIGNED_STORE))] | |
30 | "TARGET_NEON" | |
31 | "* | |
32 | return output_move_neon (operands); | |
33 | " | |
34 | [(set_attr "type" "neon_store1_1reg")]) | |
35 | ||
88f77cba | 36 | (define_insn "*neon_mov<mode>" |
2e87b2f4 | 37 | [(set (match_operand:VDXMOV 0 "nonimmediate_operand" |
e009dfb3 | 38 | "=w,Un,w, w, w, ?r,?w,?r, ?Us,*r") |
2e87b2f4 | 39 | (match_operand:VDXMOV 1 "general_operand" |
e009dfb3 | 40 | " w,w, Dm,Dn,Uni, w, r, Usi,r,*r"))] |
40f73786 DJ |
41 | "TARGET_NEON |
42 | && (register_operand (operands[0], <MODE>mode) | |
43 | || register_operand (operands[1], <MODE>mode))" | |
88f77cba | 44 | { |
e009dfb3 | 45 | if (which_alternative == 2 || which_alternative == 3) |
88f77cba JB |
46 | { |
47 | int width, is_valid; | |
48 | static char templ[40]; | |
49 | ||
63c8f7d6 | 50 | is_valid = simd_immediate_valid_for_move (operands[1], <MODE>mode, |
88f77cba JB |
51 | &operands[1], &width); |
52 | ||
53 | gcc_assert (is_valid != 0); | |
54 | ||
55 | if (width == 0) | |
56 | return "vmov.f32\t%P0, %1 @ <mode>"; | |
57 | else | |
00a3a76a | 58 | sprintf (templ, "vmov.i%d\t%%P0, %%x1 @ <mode>", width); |
88f77cba JB |
59 | |
60 | return templ; | |
61 | } | |
62 | ||
88f77cba JB |
63 | switch (which_alternative) |
64 | { | |
65 | case 0: return "vmov\t%P0, %P1 @ <mode>"; | |
e009dfb3 MM |
66 | case 1: case 4: return output_move_neon (operands); |
67 | case 2: case 3: gcc_unreachable (); | |
68 | case 5: return "vmov\t%Q0, %R0, %P1 @ <mode>"; | |
69 | case 6: return "vmov\t%P0, %Q1, %R1 @ <mode>"; | |
70 | case 9: return "#"; | |
3598da80 | 71 | default: return output_move_double (operands, true, NULL); |
88f77cba JB |
72 | } |
73 | } | |
f7379e5e | 74 | [(set_attr "type" "neon_move<q>,neon_store1_1reg,neon_move<q>,\ |
e009dfb3 MM |
75 | neon_move<q>,neon_load1_1reg, neon_to_gp<q>,\ |
76 | neon_from_gp<q>,neon_load1_2reg, neon_store1_2reg,\ | |
77 | multiple") | |
78 | (set_attr "length" "4,4,4,4,4,4,4,8,8,8") | |
79 | (set_attr "arm_pool_range" "*,*,*,*,1020,*,*,1020,*,*") | |
80 | (set_attr "thumb2_pool_range" "*,*,*,*,1018,*,*,1018,*,*") | |
81 | (set_attr "neg_pool_range" "*,*,*,*,1004,*,*,1004,*,*")]) | |
88f77cba JB |
82 | |
83 | (define_insn "*neon_mov<mode>" | |
84 | [(set (match_operand:VQXMOV 0 "nonimmediate_operand" | |
e009dfb3 | 85 | "=w,Un,w, w, w, ?r,?w,?r,?r, ?Us") |
88f77cba | 86 | (match_operand:VQXMOV 1 "general_operand" |
e009dfb3 | 87 | " w,w, Dm,DN,Uni, w, r, r, Usi, r"))] |
40f73786 DJ |
88 | "TARGET_NEON |
89 | && (register_operand (operands[0], <MODE>mode) | |
90 | || register_operand (operands[1], <MODE>mode))" | |
88f77cba | 91 | { |
e009dfb3 | 92 | if (which_alternative == 2 || which_alternative == 3) |
88f77cba JB |
93 | { |
94 | int width, is_valid; | |
95 | static char templ[40]; | |
96 | ||
63c8f7d6 | 97 | is_valid = simd_immediate_valid_for_move (operands[1], <MODE>mode, |
88f77cba JB |
98 | &operands[1], &width); |
99 | ||
100 | gcc_assert (is_valid != 0); | |
101 | ||
102 | if (width == 0) | |
103 | return "vmov.f32\t%q0, %1 @ <mode>"; | |
104 | else | |
105 | sprintf (templ, "vmov.i%d\t%%q0, %%1 @ <mode>", width); | |
106 | ||
107 | return templ; | |
108 | } | |
109 | ||
110 | switch (which_alternative) | |
111 | { | |
112 | case 0: return "vmov\t%q0, %q1 @ <mode>"; | |
e009dfb3 MM |
113 | case 1: case 4: return output_move_neon (operands); |
114 | case 2: case 3: gcc_unreachable (); | |
115 | case 5: return "vmov\t%Q0, %R0, %e1 @ <mode>\;vmov\t%J0, %K0, %f1"; | |
116 | case 6: return "vmov\t%e0, %Q1, %R1 @ <mode>\;vmov\t%f0, %J1, %K1"; | |
88f77cba JB |
117 | default: return output_move_quad (operands); |
118 | } | |
119 | } | |
f7379e5e | 120 | [(set_attr "type" "neon_move_q,neon_store2_2reg_q,neon_move_q,\ |
e009dfb3 MM |
121 | neon_move_q,neon_load2_2reg_q,neon_to_gp_q,\ |
122 | neon_from_gp_q,mov_reg,neon_load1_4reg,neon_store1_4reg") | |
123 | (set_attr "length" "4,8,4,4,8,8,8,16,8,16") | |
124 | (set_attr "arm_pool_range" "*,*,*,*,1020,*,*,*,1020,*") | |
125 | (set_attr "thumb2_pool_range" "*,*,*,*,1018,*,*,*,1018,*") | |
126 | (set_attr "neg_pool_range" "*,*,*,*,996,*,*,*,996,*")]) | |
88f77cba | 127 | |
2a9234e8 TC |
128 | /* We define these mov expanders to match the standard mov$a optab to prevent |
129 | the mid-end from trying to do a subreg for these modes which is the most | |
130 | inefficient way to expand the move. Also big-endian subreg's aren't | |
131 | allowed for a subset of modes, See TARGET_CAN_CHANGE_MODE_CLASS. | |
132 | Without these RTL generation patterns the mid-end would attempt to take a | |
133 | sub-reg and may ICE if it can't. */ | |
134 | ||
88f77cba | 135 | (define_expand "movti" |
cd65e265 DZ |
136 | [(set (match_operand:TI 0 "nonimmediate_operand") |
137 | (match_operand:TI 1 "general_operand"))] | |
88f77cba JB |
138 | "TARGET_NEON" |
139 | { | |
70cdb21e BE |
140 | gcc_checking_assert (aligned_operand (operands[0], TImode)); |
141 | gcc_checking_assert (aligned_operand (operands[1], TImode)); | |
40f73786 DJ |
142 | if (can_create_pseudo_p ()) |
143 | { | |
d435a4be | 144 | if (!REG_P (operands[0])) |
40f73786 DJ |
145 | operands[1] = force_reg (TImode, operands[1]); |
146 | } | |
88f77cba JB |
147 | }) |
148 | ||
149 | (define_expand "mov<mode>" | |
cd65e265 DZ |
150 | [(set (match_operand:VSTRUCT 0 "nonimmediate_operand") |
151 | (match_operand:VSTRUCT 1 "general_operand"))] | |
14782c81 | 152 | "TARGET_NEON || TARGET_HAVE_MVE" |
88f77cba | 153 | { |
70cdb21e BE |
154 | gcc_checking_assert (aligned_operand (operands[0], <MODE>mode)); |
155 | gcc_checking_assert (aligned_operand (operands[1], <MODE>mode)); | |
40f73786 DJ |
156 | if (can_create_pseudo_p ()) |
157 | { | |
d435a4be | 158 | if (!REG_P (operands[0])) |
40f73786 DJ |
159 | operands[1] = force_reg (<MODE>mode, operands[1]); |
160 | } | |
88f77cba JB |
161 | }) |
162 | ||
63c8f7d6 SP |
163 | ;; The pattern mov<mode> where mode is v8hf, v4hf, v4bf and v8bf are split into |
164 | ;; two groups. The pattern movv8hf is common for MVE and NEON, so it is moved | |
165 | ;; into vec-common.md file. Remaining mov expand patterns with half float and | |
166 | ;; bfloats are implemented below. | |
2a9234e8 | 167 | (define_expand "mov<mode>" |
63c8f7d6 SP |
168 | [(set (match_operand:VHFBF_split 0 "s_register_operand") |
169 | (match_operand:VHFBF_split 1 "s_register_operand"))] | |
2a9234e8 | 170 | "TARGET_NEON" |
92422235 | 171 | { |
70cdb21e BE |
172 | gcc_checking_assert (aligned_operand (operands[0], <MODE>mode)); |
173 | gcc_checking_assert (aligned_operand (operands[1], <MODE>mode)); | |
92422235 CL |
174 | if (can_create_pseudo_p ()) |
175 | { | |
176 | if (!REG_P (operands[0])) | |
2a9234e8 | 177 | operands[1] = force_reg (<MODE>mode, operands[1]); |
92422235 CL |
178 | } |
179 | }) | |
180 | ||
88f77cba JB |
181 | (define_insn "*neon_mov<mode>" |
182 | [(set (match_operand:VSTRUCT 0 "nonimmediate_operand" "=w,Ut,w") | |
183 | (match_operand:VSTRUCT 1 "general_operand" " w,w, Ut"))] | |
14782c81 | 184 | "(TARGET_NEON || TARGET_HAVE_MVE) |
40f73786 DJ |
185 | && (register_operand (operands[0], <MODE>mode) |
186 | || register_operand (operands[1], <MODE>mode))" | |
88f77cba JB |
187 | { |
188 | switch (which_alternative) | |
189 | { | |
190 | case 0: return "#"; | |
191 | case 1: case 2: return output_move_neon (operands); | |
192 | default: gcc_unreachable (); | |
193 | } | |
194 | } | |
f7379e5e | 195 | [(set_attr "type" "neon_move_q,neon_store2_2reg_q,neon_load2_2reg_q") |
7c4f0041 | 196 | (set (attr "length") (symbol_ref "arm_attr_length_move_neon (insn)"))]) |
88f77cba JB |
197 | |
198 | (define_split | |
199 | [(set (match_operand:EI 0 "s_register_operand" "") | |
200 | (match_operand:EI 1 "s_register_operand" ""))] | |
201 | "TARGET_NEON && reload_completed" | |
202 | [(set (match_dup 0) (match_dup 1)) | |
203 | (set (match_dup 2) (match_dup 3))] | |
204 | { | |
205 | int rdest = REGNO (operands[0]); | |
206 | int rsrc = REGNO (operands[1]); | |
207 | rtx dest[2], src[2]; | |
208 | ||
209 | dest[0] = gen_rtx_REG (TImode, rdest); | |
210 | src[0] = gen_rtx_REG (TImode, rsrc); | |
211 | dest[1] = gen_rtx_REG (DImode, rdest + 4); | |
212 | src[1] = gen_rtx_REG (DImode, rsrc + 4); | |
213 | ||
214 | neon_disambiguate_copy (operands, dest, src, 2); | |
215 | }) | |
216 | ||
217 | (define_split | |
218 | [(set (match_operand:OI 0 "s_register_operand" "") | |
219 | (match_operand:OI 1 "s_register_operand" ""))] | |
14782c81 | 220 | "(TARGET_NEON || TARGET_HAVE_MVE)&& reload_completed" |
88f77cba JB |
221 | [(set (match_dup 0) (match_dup 1)) |
222 | (set (match_dup 2) (match_dup 3))] | |
223 | { | |
224 | int rdest = REGNO (operands[0]); | |
225 | int rsrc = REGNO (operands[1]); | |
226 | rtx dest[2], src[2]; | |
227 | ||
228 | dest[0] = gen_rtx_REG (TImode, rdest); | |
229 | src[0] = gen_rtx_REG (TImode, rsrc); | |
230 | dest[1] = gen_rtx_REG (TImode, rdest + 4); | |
231 | src[1] = gen_rtx_REG (TImode, rsrc + 4); | |
232 | ||
233 | neon_disambiguate_copy (operands, dest, src, 2); | |
234 | }) | |
235 | ||
236 | (define_split | |
237 | [(set (match_operand:CI 0 "s_register_operand" "") | |
238 | (match_operand:CI 1 "s_register_operand" ""))] | |
239 | "TARGET_NEON && reload_completed" | |
240 | [(set (match_dup 0) (match_dup 1)) | |
241 | (set (match_dup 2) (match_dup 3)) | |
242 | (set (match_dup 4) (match_dup 5))] | |
243 | { | |
244 | int rdest = REGNO (operands[0]); | |
245 | int rsrc = REGNO (operands[1]); | |
246 | rtx dest[3], src[3]; | |
247 | ||
248 | dest[0] = gen_rtx_REG (TImode, rdest); | |
249 | src[0] = gen_rtx_REG (TImode, rsrc); | |
250 | dest[1] = gen_rtx_REG (TImode, rdest + 4); | |
251 | src[1] = gen_rtx_REG (TImode, rsrc + 4); | |
252 | dest[2] = gen_rtx_REG (TImode, rdest + 8); | |
253 | src[2] = gen_rtx_REG (TImode, rsrc + 8); | |
254 | ||
255 | neon_disambiguate_copy (operands, dest, src, 3); | |
256 | }) | |
257 | ||
258 | (define_split | |
259 | [(set (match_operand:XI 0 "s_register_operand" "") | |
260 | (match_operand:XI 1 "s_register_operand" ""))] | |
14782c81 | 261 | "(TARGET_NEON || TARGET_HAVE_MVE) && reload_completed" |
88f77cba JB |
262 | [(set (match_dup 0) (match_dup 1)) |
263 | (set (match_dup 2) (match_dup 3)) | |
264 | (set (match_dup 4) (match_dup 5)) | |
265 | (set (match_dup 6) (match_dup 7))] | |
266 | { | |
267 | int rdest = REGNO (operands[0]); | |
268 | int rsrc = REGNO (operands[1]); | |
269 | rtx dest[4], src[4]; | |
270 | ||
271 | dest[0] = gen_rtx_REG (TImode, rdest); | |
272 | src[0] = gen_rtx_REG (TImode, rsrc); | |
273 | dest[1] = gen_rtx_REG (TImode, rdest + 4); | |
274 | src[1] = gen_rtx_REG (TImode, rsrc + 4); | |
275 | dest[2] = gen_rtx_REG (TImode, rdest + 8); | |
276 | src[2] = gen_rtx_REG (TImode, rsrc + 8); | |
277 | dest[3] = gen_rtx_REG (TImode, rdest + 12); | |
278 | src[3] = gen_rtx_REG (TImode, rsrc + 12); | |
279 | ||
280 | neon_disambiguate_copy (operands, dest, src, 4); | |
281 | }) | |
282 | ||
c452684d | 283 | (define_expand "movmisalign<mode>" |
33255ae3 JB |
284 | [(set (match_operand:VDQX 0 "neon_perm_struct_or_reg_operand") |
285 | (unspec:VDQX [(match_operand:VDQX 1 "neon_perm_struct_or_reg_operand")] | |
c452684d | 286 | UNSPEC_MISALIGNED_ACCESS))] |
869b9125 | 287 | "TARGET_NEON && !BYTES_BIG_ENDIAN && unaligned_access" |
c452684d | 288 | { |
3416dd87 | 289 | rtx adjust_mem; |
c452684d JB |
290 | /* This pattern is not permitted to fail during expansion: if both arguments |
291 | are non-registers (e.g. memory := constant, which can be created by the | |
292 | auto-vectorizer), force operand 1 into a register. */ | |
293 | if (!s_register_operand (operands[0], <MODE>mode) | |
294 | && !s_register_operand (operands[1], <MODE>mode)) | |
295 | operands[1] = force_reg (<MODE>mode, operands[1]); | |
3416dd87 RR |
296 | |
297 | if (s_register_operand (operands[0], <MODE>mode)) | |
298 | adjust_mem = operands[1]; | |
299 | else | |
300 | adjust_mem = operands[0]; | |
301 | ||
302 | /* Legitimize address. */ | |
303 | if (!neon_vector_mem_operand (adjust_mem, 2, true)) | |
304 | XEXP (adjust_mem, 0) = force_reg (Pmode, XEXP (adjust_mem, 0)); | |
305 | ||
c452684d JB |
306 | }) |
307 | ||
308 | (define_insn "*movmisalign<mode>_neon_store" | |
33255ae3 | 309 | [(set (match_operand:VDX 0 "neon_permissive_struct_operand" "=Um") |
c452684d JB |
310 | (unspec:VDX [(match_operand:VDX 1 "s_register_operand" " w")] |
311 | UNSPEC_MISALIGNED_ACCESS))] | |
869b9125 | 312 | "TARGET_NEON && !BYTES_BIG_ENDIAN && unaligned_access" |
c452684d | 313 | "vst1.<V_sz_elem>\t{%P1}, %A0" |
f7379e5e | 314 | [(set_attr "type" "neon_store1_1reg<q>")]) |
c452684d JB |
315 | |
316 | (define_insn "*movmisalign<mode>_neon_load" | |
33255ae3 JB |
317 | [(set (match_operand:VDX 0 "s_register_operand" "=w") |
318 | (unspec:VDX [(match_operand:VDX 1 "neon_permissive_struct_operand" | |
319 | " Um")] | |
c452684d | 320 | UNSPEC_MISALIGNED_ACCESS))] |
869b9125 | 321 | "TARGET_NEON && !BYTES_BIG_ENDIAN && unaligned_access" |
c452684d | 322 | "vld1.<V_sz_elem>\t{%P0}, %A1" |
f7379e5e | 323 | [(set_attr "type" "neon_load1_1reg<q>")]) |
c452684d JB |
324 | |
325 | (define_insn "*movmisalign<mode>_neon_store" | |
33255ae3 | 326 | [(set (match_operand:VQX 0 "neon_permissive_struct_operand" "=Um") |
c452684d JB |
327 | (unspec:VQX [(match_operand:VQX 1 "s_register_operand" " w")] |
328 | UNSPEC_MISALIGNED_ACCESS))] | |
869b9125 | 329 | "TARGET_NEON && !BYTES_BIG_ENDIAN && unaligned_access" |
c452684d | 330 | "vst1.<V_sz_elem>\t{%q1}, %A0" |
f7379e5e | 331 | [(set_attr "type" "neon_store1_1reg<q>")]) |
c452684d JB |
332 | |
333 | (define_insn "*movmisalign<mode>_neon_load" | |
33255ae3 JB |
334 | [(set (match_operand:VQX 0 "s_register_operand" "=w") |
335 | (unspec:VQX [(match_operand:VQX 1 "neon_permissive_struct_operand" | |
336 | " Um")] | |
c452684d | 337 | UNSPEC_MISALIGNED_ACCESS))] |
869b9125 | 338 | "TARGET_NEON && !BYTES_BIG_ENDIAN && unaligned_access" |
c452684d | 339 | "vld1.<V_sz_elem>\t{%q0}, %A1" |
b5a26023 | 340 | [(set_attr "type" "neon_load1_1reg<q>")]) |
c452684d | 341 | |
8ba8ebff | 342 | (define_insn "@vec_set<mode>_internal" |
92422235 CL |
343 | [(set (match_operand:VD_LANE 0 "s_register_operand" "=w,w") |
344 | (vec_merge:VD_LANE | |
345 | (vec_duplicate:VD_LANE | |
058e2674 | 346 | (match_operand:<V_elem> 1 "nonimmediate_operand" "Um,r")) |
92422235 | 347 | (match_operand:VD_LANE 3 "s_register_operand" "0,0") |
058e2674 | 348 | (match_operand:SI 2 "immediate_operand" "i,i")))] |
88f77cba | 349 | "TARGET_NEON" |
80b8eb11 | 350 | { |
d19eb620 | 351 | int elt = ffs ((int) INTVAL (operands[2])) - 1; |
874d42b9 JM |
352 | if (BYTES_BIG_ENDIAN) |
353 | elt = GET_MODE_NUNITS (<MODE>mode) - 1 - elt; | |
354 | operands[2] = GEN_INT (elt); | |
058e2674 UW |
355 | |
356 | if (which_alternative == 0) | |
357 | return "vld1.<V_sz_elem>\t{%P0[%c2]}, %A1"; | |
358 | else | |
359 | return "vmov.<V_sz_elem>\t%P0[%c2], %1"; | |
80b8eb11 | 360 | } |
f7379e5e | 361 | [(set_attr "type" "neon_load1_all_lanes<q>,neon_from_gp<q>")]) |
88f77cba | 362 | |
8ba8ebff | 363 | (define_insn "@vec_set<mode>_internal" |
4b644867 AL |
364 | [(set (match_operand:VQ2 0 "s_register_operand" "=w,w") |
365 | (vec_merge:VQ2 | |
366 | (vec_duplicate:VQ2 | |
058e2674 | 367 | (match_operand:<V_elem> 1 "nonimmediate_operand" "Um,r")) |
4b644867 | 368 | (match_operand:VQ2 3 "s_register_operand" "0,0") |
058e2674 | 369 | (match_operand:SI 2 "immediate_operand" "i,i")))] |
88f77cba JB |
370 | "TARGET_NEON" |
371 | { | |
466e4b7a | 372 | HOST_WIDE_INT elem = ffs ((int) INTVAL (operands[2])) - 1; |
88f77cba | 373 | int half_elts = GET_MODE_NUNITS (<MODE>mode) / 2; |
80b8eb11 JB |
374 | int elt = elem % half_elts; |
375 | int hi = (elem / half_elts) * 2; | |
88f77cba JB |
376 | int regno = REGNO (operands[0]); |
377 | ||
874d42b9 JM |
378 | if (BYTES_BIG_ENDIAN) |
379 | elt = half_elts - 1 - elt; | |
380 | ||
88f77cba JB |
381 | operands[0] = gen_rtx_REG (<V_HALF>mode, regno + hi); |
382 | operands[2] = GEN_INT (elt); | |
383 | ||
058e2674 UW |
384 | if (which_alternative == 0) |
385 | return "vld1.<V_sz_elem>\t{%P0[%c2]}, %A1"; | |
386 | else | |
387 | return "vmov.<V_sz_elem>\t%P0[%c2], %1"; | |
88f77cba | 388 | } |
f7379e5e | 389 | [(set_attr "type" "neon_load1_all_lanes<q>,neon_from_gp<q>")] |
c956e102 | 390 | ) |
88f77cba | 391 | |
8ba8ebff RS |
392 | (define_insn "@vec_set<mode>_internal" |
393 | [(set (match_operand:V2DI_ONLY 0 "s_register_operand" "=w,w") | |
394 | (vec_merge:V2DI_ONLY | |
395 | (vec_duplicate:V2DI_ONLY | |
058e2674 | 396 | (match_operand:DI 1 "nonimmediate_operand" "Um,r")) |
8ba8ebff | 397 | (match_operand:V2DI_ONLY 3 "s_register_operand" "0,0") |
058e2674 | 398 | (match_operand:SI 2 "immediate_operand" "i,i")))] |
88f77cba JB |
399 | "TARGET_NEON" |
400 | { | |
466e4b7a | 401 | HOST_WIDE_INT elem = ffs ((int) INTVAL (operands[2])) - 1; |
80b8eb11 | 402 | int regno = REGNO (operands[0]) + 2 * elem; |
88f77cba JB |
403 | |
404 | operands[0] = gen_rtx_REG (DImode, regno); | |
405 | ||
058e2674 UW |
406 | if (which_alternative == 0) |
407 | return "vld1.64\t%P0, %A1"; | |
408 | else | |
409 | return "vmov\t%P0, %Q1, %R1"; | |
88f77cba | 410 | } |
f7379e5e | 411 | [(set_attr "type" "neon_load1_all_lanes_q,neon_from_gp_q")] |
c956e102 | 412 | ) |
88f77cba | 413 | |
80b8eb11 | 414 | (define_expand "vec_set<mode>" |
cd65e265 DZ |
415 | [(match_operand:VDQ 0 "s_register_operand") |
416 | (match_operand:<V_elem> 1 "s_register_operand") | |
417 | (match_operand:SI 2 "immediate_operand")] | |
80b8eb11 JB |
418 | "TARGET_NEON" |
419 | { | |
d4b5c77d | 420 | HOST_WIDE_INT elem = HOST_WIDE_INT_1 << INTVAL (operands[2]); |
80b8eb11 JB |
421 | emit_insn (gen_vec_set<mode>_internal (operands[0], operands[1], |
422 | GEN_INT (elem), operands[0])); | |
423 | DONE; | |
424 | }) | |
425 | ||
ff03930a | 426 | (define_insn "vec_extract<mode><V_elem_l>" |
058e2674 | 427 | [(set (match_operand:<V_elem> 0 "nonimmediate_operand" "=Um,r") |
88f77cba | 428 | (vec_select:<V_elem> |
92422235 | 429 | (match_operand:VD_LANE 1 "s_register_operand" "w,w") |
058e2674 | 430 | (parallel [(match_operand:SI 2 "immediate_operand" "i,i")])))] |
88f77cba | 431 | "TARGET_NEON" |
874d42b9 JM |
432 | { |
433 | if (BYTES_BIG_ENDIAN) | |
434 | { | |
435 | int elt = INTVAL (operands[2]); | |
436 | elt = GET_MODE_NUNITS (<MODE>mode) - 1 - elt; | |
437 | operands[2] = GEN_INT (elt); | |
438 | } | |
058e2674 UW |
439 | |
440 | if (which_alternative == 0) | |
441 | return "vst1.<V_sz_elem>\t{%P1[%c2]}, %A0"; | |
442 | else | |
443 | return "vmov.<V_uf_sclr>\t%0, %P1[%c2]"; | |
874d42b9 | 444 | } |
f7379e5e | 445 | [(set_attr "type" "neon_store1_one_lane<q>,neon_to_gp<q>")] |
c956e102 | 446 | ) |
88f77cba | 447 | |
ff03930a | 448 | (define_insn "vec_extract<mode><V_elem_l>" |
058e2674 | 449 | [(set (match_operand:<V_elem> 0 "nonimmediate_operand" "=Um,r") |
88f77cba | 450 | (vec_select:<V_elem> |
4b644867 | 451 | (match_operand:VQ2 1 "s_register_operand" "w,w") |
058e2674 | 452 | (parallel [(match_operand:SI 2 "immediate_operand" "i,i")])))] |
88f77cba JB |
453 | "TARGET_NEON" |
454 | { | |
455 | int half_elts = GET_MODE_NUNITS (<MODE>mode) / 2; | |
456 | int elt = INTVAL (operands[2]) % half_elts; | |
457 | int hi = (INTVAL (operands[2]) / half_elts) * 2; | |
458 | int regno = REGNO (operands[1]); | |
459 | ||
874d42b9 JM |
460 | if (BYTES_BIG_ENDIAN) |
461 | elt = half_elts - 1 - elt; | |
462 | ||
88f77cba JB |
463 | operands[1] = gen_rtx_REG (<V_HALF>mode, regno + hi); |
464 | operands[2] = GEN_INT (elt); | |
465 | ||
058e2674 UW |
466 | if (which_alternative == 0) |
467 | return "vst1.<V_sz_elem>\t{%P1[%c2]}, %A0"; | |
468 | else | |
469 | return "vmov.<V_uf_sclr>\t%0, %P1[%c2]"; | |
88f77cba | 470 | } |
f7379e5e | 471 | [(set_attr "type" "neon_store1_one_lane<q>,neon_to_gp<q>")] |
c956e102 | 472 | ) |
88f77cba | 473 | |
ff03930a | 474 | (define_insn "vec_extractv2didi" |
058e2674 | 475 | [(set (match_operand:DI 0 "nonimmediate_operand" "=Um,r") |
88f77cba | 476 | (vec_select:DI |
058e2674 UW |
477 | (match_operand:V2DI 1 "s_register_operand" "w,w") |
478 | (parallel [(match_operand:SI 2 "immediate_operand" "i,i")])))] | |
88f77cba JB |
479 | "TARGET_NEON" |
480 | { | |
8c98c2a6 | 481 | int regno = REGNO (operands[1]) + 2 * INTVAL (operands[2]); |
88f77cba JB |
482 | |
483 | operands[1] = gen_rtx_REG (DImode, regno); | |
484 | ||
058e2674 UW |
485 | if (which_alternative == 0) |
486 | return "vst1.64\t{%P1}, %A0 @ v2di"; | |
487 | else | |
488 | return "vmov\t%Q0, %R0, %P1 @ v2di"; | |
88f77cba | 489 | } |
f7379e5e | 490 | [(set_attr "type" "neon_store1_one_lane_q,neon_to_gp_q")] |
c956e102 | 491 | ) |
88f77cba | 492 | |
ff03930a | 493 | (define_expand "vec_init<mode><V_elem_l>" |
cd65e265 | 494 | [(match_operand:VDQ 0 "s_register_operand") |
88f77cba | 495 | (match_operand 1 "" "")] |
63c8f7d6 | 496 | "TARGET_NEON || TARGET_HAVE_MVE" |
88f77cba JB |
497 | { |
498 | neon_expand_vector_init (operands[0], operands[1]); | |
499 | DONE; | |
500 | }) | |
501 | ||
502 | ;; Doubleword and quadword arithmetic. | |
503 | ||
bab53516 SL |
504 | ;; NOTE: some other instructions also support 64-bit integer |
505 | ;; element size, which we could potentially use for "long long" operations. | |
88f77cba JB |
506 | |
507 | (define_insn "*add<mode>3_neon" | |
508 | [(set (match_operand:VDQ 0 "s_register_operand" "=w") | |
509 | (plus:VDQ (match_operand:VDQ 1 "s_register_operand" "w") | |
510 | (match_operand:VDQ 2 "s_register_operand" "w")))] | |
400cfcf5 | 511 | "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)" |
c956e102 | 512 | "vadd.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2" |
003bb7f3 | 513 | [(set (attr "type") |
b75b1be2 | 514 | (if_then_else (match_test "<Is_float_mode>") |
f7379e5e JG |
515 | (const_string "neon_fp_addsub_s<q>") |
516 | (const_string "neon_add<q>")))] | |
c956e102 | 517 | ) |
88f77cba | 518 | |
6da37857 MW |
519 | ;; As with SFmode, full support for HFmode vector arithmetic is only available |
520 | ;; when flag-unsafe-math-optimizations is enabled. | |
521 | ||
522 | (define_insn "add<mode>3" | |
523 | [(set | |
524 | (match_operand:VH 0 "s_register_operand" "=w") | |
525 | (plus:VH | |
526 | (match_operand:VH 1 "s_register_operand" "w") | |
527 | (match_operand:VH 2 "s_register_operand" "w")))] | |
528 | "TARGET_NEON_FP16INST && flag_unsafe_math_optimizations" | |
529 | "vadd.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2" | |
530 | [(set (attr "type") | |
531 | (if_then_else (match_test "<Is_float_mode>") | |
532 | (const_string "neon_fp_addsub_s<q>") | |
533 | (const_string "neon_add<q>")))] | |
534 | ) | |
535 | ||
55a9b91b MW |
536 | (define_insn "add<mode>3_fp16" |
537 | [(set | |
538 | (match_operand:VH 0 "s_register_operand" "=w") | |
539 | (plus:VH | |
540 | (match_operand:VH 1 "s_register_operand" "w") | |
541 | (match_operand:VH 2 "s_register_operand" "w")))] | |
542 | "TARGET_NEON_FP16INST" | |
543 | "vadd.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2" | |
544 | [(set (attr "type") | |
545 | (if_then_else (match_test "<Is_float_mode>") | |
546 | (const_string "neon_fp_addsub_s<q>") | |
547 | (const_string "neon_add<q>")))] | |
548 | ) | |
549 | ||
88f77cba JB |
550 | (define_insn "*sub<mode>3_neon" |
551 | [(set (match_operand:VDQ 0 "s_register_operand" "=w") | |
552 | (minus:VDQ (match_operand:VDQ 1 "s_register_operand" "w") | |
553 | (match_operand:VDQ 2 "s_register_operand" "w")))] | |
400cfcf5 | 554 | "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)" |
c956e102 | 555 | "vsub.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2" |
003bb7f3 | 556 | [(set (attr "type") |
b75b1be2 | 557 | (if_then_else (match_test "<Is_float_mode>") |
f7379e5e JG |
558 | (const_string "neon_fp_addsub_s<q>") |
559 | (const_string "neon_sub<q>")))] | |
c956e102 | 560 | ) |
88f77cba | 561 | |
6da37857 MW |
562 | (define_insn "sub<mode>3" |
563 | [(set | |
564 | (match_operand:VH 0 "s_register_operand" "=w") | |
565 | (minus:VH | |
566 | (match_operand:VH 1 "s_register_operand" "w") | |
567 | (match_operand:VH 2 "s_register_operand" "w")))] | |
568 | "TARGET_NEON_FP16INST && flag_unsafe_math_optimizations" | |
569 | "vsub.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2" | |
570 | [(set_attr "type" "neon_sub<q>")] | |
571 | ) | |
572 | ||
55a9b91b MW |
573 | (define_insn "sub<mode>3_fp16" |
574 | [(set | |
575 | (match_operand:VH 0 "s_register_operand" "=w") | |
576 | (minus:VH | |
577 | (match_operand:VH 1 "s_register_operand" "w") | |
578 | (match_operand:VH 2 "s_register_operand" "w")))] | |
579 | "TARGET_NEON_FP16INST" | |
580 | "vsub.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2" | |
581 | [(set_attr "type" "neon_sub<q>")] | |
582 | ) | |
583 | ||
88f77cba | 584 | (define_insn "*mul<mode>3_neon" |
f7379e5e JG |
585 | [(set (match_operand:VDQW 0 "s_register_operand" "=w") |
586 | (mult:VDQW (match_operand:VDQW 1 "s_register_operand" "w") | |
587 | (match_operand:VDQW 2 "s_register_operand" "w")))] | |
400cfcf5 | 588 | "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)" |
c956e102 | 589 | "vmul.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2" |
003bb7f3 | 590 | [(set (attr "type") |
b75b1be2 | 591 | (if_then_else (match_test "<Is_float_mode>") |
f7379e5e JG |
592 | (const_string "neon_fp_mul_s<q>") |
593 | (const_string "neon_mul_<V_elem_ch><q>")))] | |
c956e102 | 594 | ) |
88f77cba | 595 | |
536ecfc4 PK |
596 | /* Perform division using multiply-by-reciprocal. |
597 | Reciprocal is calculated using Newton-Raphson method. | |
598 | Enabled with -funsafe-math-optimizations -freciprocal-math | |
599 | and disabled for -Os since it increases code size . */ | |
600 | ||
601 | (define_expand "div<mode>3" | |
cd65e265 DZ |
602 | [(set (match_operand:VCVTF 0 "s_register_operand") |
603 | (div:VCVTF (match_operand:VCVTF 1 "s_register_operand") | |
604 | (match_operand:VCVTF 2 "s_register_operand")))] | |
536ecfc4 PK |
605 | "TARGET_NEON && !optimize_size |
606 | && flag_reciprocal_math" | |
607 | { | |
608 | rtx rec = gen_reg_rtx (<MODE>mode); | |
609 | rtx vrecps_temp = gen_reg_rtx (<MODE>mode); | |
610 | ||
611 | /* Reciprocal estimate. */ | |
612 | emit_insn (gen_neon_vrecpe<mode> (rec, operands[2])); | |
613 | ||
614 | /* Perform 2 iterations of newton-raphson method. */ | |
615 | for (int i = 0; i < 2; i++) | |
616 | { | |
617 | emit_insn (gen_neon_vrecps<mode> (vrecps_temp, rec, operands[2])); | |
618 | emit_insn (gen_mul<mode>3 (rec, rec, vrecps_temp)); | |
619 | } | |
620 | ||
621 | /* We now have reciprocal in rec, perform operands[0] = operands[1] * rec. */ | |
622 | emit_insn (gen_mul<mode>3 (operands[0], operands[1], rec)); | |
623 | DONE; | |
624 | } | |
625 | ) | |
626 | ||
627 | ||
bab53516 | 628 | (define_insn "mul<mode>3add<mode>_neon" |
f7379e5e JG |
629 | [(set (match_operand:VDQW 0 "s_register_operand" "=w") |
630 | (plus:VDQW (mult:VDQW (match_operand:VDQW 2 "s_register_operand" "w") | |
631 | (match_operand:VDQW 3 "s_register_operand" "w")) | |
632 | (match_operand:VDQW 1 "s_register_operand" "0")))] | |
400cfcf5 | 633 | "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)" |
1ea9fe56 | 634 | "vmla.<V_if_elem>\t%<V_reg>0, %<V_reg>2, %<V_reg>3" |
003bb7f3 | 635 | [(set (attr "type") |
b75b1be2 | 636 | (if_then_else (match_test "<Is_float_mode>") |
f7379e5e JG |
637 | (const_string "neon_fp_mla_s<q>") |
638 | (const_string "neon_mla_<V_elem_ch><q>")))] | |
1ea9fe56 MM |
639 | ) |
640 | ||
55a9b91b MW |
641 | (define_insn "mul<mode>3add<mode>_neon" |
642 | [(set (match_operand:VH 0 "s_register_operand" "=w") | |
643 | (plus:VH (mult:VH (match_operand:VH 2 "s_register_operand" "w") | |
644 | (match_operand:VH 3 "s_register_operand" "w")) | |
645 | (match_operand:VH 1 "s_register_operand" "0")))] | |
646 | "TARGET_NEON_FP16INST && (!<Is_float_mode> || flag_unsafe_math_optimizations)" | |
647 | "vmla.f16\t%<V_reg>0, %<V_reg>2, %<V_reg>3" | |
648 | [(set_attr "type" "neon_fp_mla_s<q>")] | |
649 | ) | |
650 | ||
bab53516 | 651 | (define_insn "mul<mode>3neg<mode>add<mode>_neon" |
f7379e5e JG |
652 | [(set (match_operand:VDQW 0 "s_register_operand" "=w") |
653 | (minus:VDQW (match_operand:VDQW 1 "s_register_operand" "0") | |
654 | (mult:VDQW (match_operand:VDQW 2 "s_register_operand" "w") | |
655 | (match_operand:VDQW 3 "s_register_operand" "w"))))] | |
400cfcf5 | 656 | "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)" |
1ea9fe56 | 657 | "vmls.<V_if_elem>\t%<V_reg>0, %<V_reg>2, %<V_reg>3" |
003bb7f3 | 658 | [(set (attr "type") |
b75b1be2 | 659 | (if_then_else (match_test "<Is_float_mode>") |
f7379e5e JG |
660 | (const_string "neon_fp_mla_s<q>") |
661 | (const_string "neon_mla_<V_elem_ch><q>")))] | |
1ea9fe56 MM |
662 | ) |
663 | ||
8b2ab9cb | 664 | ;; Fused multiply-accumulate |
c4216388 MGD |
665 | ;; We define each insn twice here: |
666 | ;; 1: with flag_unsafe_math_optimizations for the widening multiply phase | |
667 | ;; to be able to use when converting to FMA. | |
668 | ;; 2: without flag_unsafe_math_optimizations for the intrinsics to use. | |
8b2ab9cb RR |
669 | (define_insn "fma<VCVTF:mode>4" |
670 | [(set (match_operand:VCVTF 0 "register_operand" "=w") | |
671 | (fma:VCVTF (match_operand:VCVTF 1 "register_operand" "w") | |
672 | (match_operand:VCVTF 2 "register_operand" "w") | |
673 | (match_operand:VCVTF 3 "register_operand" "0")))] | |
674 | "TARGET_NEON && TARGET_FMA && flag_unsafe_math_optimizations" | |
e60226ff | 675 | "vfma.<V_if_elem>\\t%<V_reg>0, %<V_reg>1, %<V_reg>2" |
f7379e5e | 676 | [(set_attr "type" "neon_fp_mla_s<q>")] |
8b2ab9cb RR |
677 | ) |
678 | ||
c4216388 MGD |
679 | (define_insn "fma<VCVTF:mode>4_intrinsic" |
680 | [(set (match_operand:VCVTF 0 "register_operand" "=w") | |
681 | (fma:VCVTF (match_operand:VCVTF 1 "register_operand" "w") | |
682 | (match_operand:VCVTF 2 "register_operand" "w") | |
683 | (match_operand:VCVTF 3 "register_operand" "0")))] | |
684 | "TARGET_NEON && TARGET_FMA" | |
e60226ff | 685 | "vfma.<V_if_elem>\\t%<V_reg>0, %<V_reg>1, %<V_reg>2" |
f7379e5e | 686 | [(set_attr "type" "neon_fp_mla_s<q>")] |
c4216388 MGD |
687 | ) |
688 | ||
6da37857 MW |
689 | (define_insn "fma<VH:mode>4" |
690 | [(set (match_operand:VH 0 "register_operand" "=w") | |
691 | (fma:VH | |
692 | (match_operand:VH 1 "register_operand" "w") | |
693 | (match_operand:VH 2 "register_operand" "w") | |
694 | (match_operand:VH 3 "register_operand" "0")))] | |
695 | "TARGET_NEON_FP16INST && flag_unsafe_math_optimizations" | |
696 | "vfma.<V_if_elem>\\t%<V_reg>0, %<V_reg>1, %<V_reg>2" | |
697 | [(set_attr "type" "neon_fp_mla_s<q>")] | |
698 | ) | |
699 | ||
55a9b91b MW |
700 | (define_insn "fma<VH:mode>4_intrinsic" |
701 | [(set (match_operand:VH 0 "register_operand" "=w") | |
702 | (fma:VH | |
703 | (match_operand:VH 1 "register_operand" "w") | |
704 | (match_operand:VH 2 "register_operand" "w") | |
705 | (match_operand:VH 3 "register_operand" "0")))] | |
706 | "TARGET_NEON_FP16INST" | |
707 | "vfma.<V_if_elem>\\t%<V_reg>0, %<V_reg>1, %<V_reg>2" | |
708 | [(set_attr "type" "neon_fp_mla_s<q>")] | |
709 | ) | |
710 | ||
8b2ab9cb RR |
711 | (define_insn "*fmsub<VCVTF:mode>4" |
712 | [(set (match_operand:VCVTF 0 "register_operand" "=w") | |
713 | (fma:VCVTF (neg:VCVTF (match_operand:VCVTF 1 "register_operand" "w")) | |
714 | (match_operand:VCVTF 2 "register_operand" "w") | |
715 | (match_operand:VCVTF 3 "register_operand" "0")))] | |
716 | "TARGET_NEON && TARGET_FMA && flag_unsafe_math_optimizations" | |
e60226ff | 717 | "vfms.<V_if_elem>\\t%<V_reg>0, %<V_reg>1, %<V_reg>2" |
f7379e5e | 718 | [(set_attr "type" "neon_fp_mla_s<q>")] |
8b2ab9cb RR |
719 | ) |
720 | ||
c4216388 | 721 | (define_insn "fmsub<VCVTF:mode>4_intrinsic" |
55a9b91b MW |
722 | [(set (match_operand:VCVTF 0 "register_operand" "=w") |
723 | (fma:VCVTF | |
724 | (neg:VCVTF (match_operand:VCVTF 1 "register_operand" "w")) | |
725 | (match_operand:VCVTF 2 "register_operand" "w") | |
726 | (match_operand:VCVTF 3 "register_operand" "0")))] | |
727 | "TARGET_NEON && TARGET_FMA" | |
e60226ff | 728 | "vfms.<V_if_elem>\\t%<V_reg>0, %<V_reg>1, %<V_reg>2" |
55a9b91b MW |
729 | [(set_attr "type" "neon_fp_mla_s<q>")] |
730 | ) | |
731 | ||
732 | (define_insn "fmsub<VH:mode>4_intrinsic" | |
733 | [(set (match_operand:VH 0 "register_operand" "=w") | |
734 | (fma:VH | |
735 | (neg:VH (match_operand:VH 1 "register_operand" "w")) | |
736 | (match_operand:VH 2 "register_operand" "w") | |
737 | (match_operand:VH 3 "register_operand" "0")))] | |
738 | "TARGET_NEON_FP16INST" | |
739 | "vfms.<V_if_elem>\\t%<V_reg>0, %<V_reg>1, %<V_reg>2" | |
740 | [(set_attr "type" "neon_fp_mla_s<q>")] | |
c4216388 MGD |
741 | ) |
742 | ||
79739965 KT |
743 | (define_insn "neon_vrint<NEON_VRINT:nvrint_variant><VCVTF:mode>" |
744 | [(set (match_operand:VCVTF 0 "s_register_operand" "=w") | |
745 | (unspec:VCVTF [(match_operand:VCVTF 1 | |
746 | "s_register_operand" "w")] | |
747 | NEON_VRINT))] | |
c8d61ab8 | 748 | "TARGET_NEON && TARGET_VFP5" |
e60226ff | 749 | "vrint<nvrint_variant>.f32\\t%<V_reg>0, %<V_reg>1" |
f7379e5e | 750 | [(set_attr "type" "neon_fp_round_<V_elem_ch><q>")] |
79739965 KT |
751 | ) |
752 | ||
e9e67af1 KT |
753 | (define_insn "neon_vcvt<NEON_VCVT:nvrint_variant><su_optab><VCVTF:mode><v_cmp_result>" |
754 | [(set (match_operand:<V_cmp_result> 0 "register_operand" "=w") | |
755 | (FIXUORS:<V_cmp_result> (unspec:VCVTF | |
756 | [(match_operand:VCVTF 1 "register_operand" "w")] | |
757 | NEON_VCVT)))] | |
c8d61ab8 | 758 | "TARGET_NEON && TARGET_VFP5" |
e9e67af1 KT |
759 | "vcvt<nvrint_variant>.<su>32.f32\\t%<V_reg>0, %<V_reg>1" |
760 | [(set_attr "type" "neon_fp_to_int_<V_elem_ch><q>") | |
761 | (set_attr "predicable" "no")] | |
762 | ) | |
763 | ||
88f77cba JB |
764 | (define_insn "ior<mode>3" |
765 | [(set (match_operand:VDQ 0 "s_register_operand" "=w,w") | |
766 | (ior:VDQ (match_operand:VDQ 1 "s_register_operand" "w,0") | |
767 | (match_operand:VDQ 2 "neon_logic_op2" "w,Dl")))] | |
768 | "TARGET_NEON" | |
769 | { | |
770 | switch (which_alternative) | |
771 | { | |
772 | case 0: return "vorr\t%<V_reg>0, %<V_reg>1, %<V_reg>2"; | |
773 | case 1: return neon_output_logic_immediate ("vorr", &operands[2], | |
774 | <MODE>mode, 0, VALID_NEON_QREG_MODE (<MODE>mode)); | |
775 | default: gcc_unreachable (); | |
776 | } | |
c956e102 | 777 | } |
f7379e5e | 778 | [(set_attr "type" "neon_logic<q>")] |
c956e102 | 779 | ) |
88f77cba | 780 | |
88f77cba JB |
781 | ;; The concrete forms of the Neon immediate-logic instructions are vbic and |
782 | ;; vorr. We support the pseudo-instruction vand instead, because that | |
783 | ;; corresponds to the canonical form the middle-end expects to use for | |
784 | ;; immediate bitwise-ANDs. | |
785 | ||
786 | (define_insn "and<mode>3" | |
787 | [(set (match_operand:VDQ 0 "s_register_operand" "=w,w") | |
788 | (and:VDQ (match_operand:VDQ 1 "s_register_operand" "w,0") | |
789 | (match_operand:VDQ 2 "neon_inv_logic_op2" "w,DL")))] | |
790 | "TARGET_NEON" | |
791 | { | |
792 | switch (which_alternative) | |
793 | { | |
794 | case 0: return "vand\t%<V_reg>0, %<V_reg>1, %<V_reg>2"; | |
795 | case 1: return neon_output_logic_immediate ("vand", &operands[2], | |
796 | <MODE>mode, 1, VALID_NEON_QREG_MODE (<MODE>mode)); | |
797 | default: gcc_unreachable (); | |
798 | } | |
c956e102 | 799 | } |
f7379e5e | 800 | [(set_attr "type" "neon_logic<q>")] |
c956e102 | 801 | ) |
88f77cba | 802 | |
88f77cba JB |
803 | (define_insn "orn<mode>3_neon" |
804 | [(set (match_operand:VDQ 0 "s_register_operand" "=w") | |
50fed7bf RR |
805 | (ior:VDQ (not:VDQ (match_operand:VDQ 2 "s_register_operand" "w")) |
806 | (match_operand:VDQ 1 "s_register_operand" "w")))] | |
88f77cba | 807 | "TARGET_NEON" |
c956e102 | 808 | "vorn\t%<V_reg>0, %<V_reg>1, %<V_reg>2" |
f7379e5e | 809 | [(set_attr "type" "neon_logic<q>")] |
c956e102 | 810 | ) |
88f77cba | 811 | |
88f77cba JB |
812 | (define_insn "bic<mode>3_neon" |
813 | [(set (match_operand:VDQ 0 "s_register_operand" "=w") | |
50fed7bf RR |
814 | (and:VDQ (not:VDQ (match_operand:VDQ 2 "s_register_operand" "w")) |
815 | (match_operand:VDQ 1 "s_register_operand" "w")))] | |
88f77cba | 816 | "TARGET_NEON" |
c956e102 | 817 | "vbic\t%<V_reg>0, %<V_reg>1, %<V_reg>2" |
f7379e5e | 818 | [(set_attr "type" "neon_logic<q>")] |
c956e102 | 819 | ) |
88f77cba | 820 | |
88f77cba JB |
821 | (define_insn "xor<mode>3" |
822 | [(set (match_operand:VDQ 0 "s_register_operand" "=w") | |
823 | (xor:VDQ (match_operand:VDQ 1 "s_register_operand" "w") | |
824 | (match_operand:VDQ 2 "s_register_operand" "w")))] | |
825 | "TARGET_NEON" | |
c956e102 | 826 | "veor\t%<V_reg>0, %<V_reg>1, %<V_reg>2" |
f7379e5e | 827 | [(set_attr "type" "neon_logic<q>")] |
c956e102 | 828 | ) |
88f77cba | 829 | |
88f77cba JB |
830 | (define_insn "one_cmpl<mode>2" |
831 | [(set (match_operand:VDQ 0 "s_register_operand" "=w") | |
832 | (not:VDQ (match_operand:VDQ 1 "s_register_operand" "w")))] | |
833 | "TARGET_NEON" | |
c956e102 | 834 | "vmvn\t%<V_reg>0, %<V_reg>1" |
f7379e5e | 835 | [(set_attr "type" "neon_move<q>")] |
c956e102 | 836 | ) |
88f77cba JB |
837 | |
838 | (define_insn "abs<mode>2" | |
839 | [(set (match_operand:VDQW 0 "s_register_operand" "=w") | |
840 | (abs:VDQW (match_operand:VDQW 1 "s_register_operand" "w")))] | |
841 | "TARGET_NEON" | |
c956e102 | 842 | "vabs.<V_s_elem>\t%<V_reg>0, %<V_reg>1" |
003bb7f3 | 843 | [(set (attr "type") |
b75b1be2 | 844 | (if_then_else (match_test "<Is_float_mode>") |
f7379e5e JG |
845 | (const_string "neon_fp_abs_s<q>") |
846 | (const_string "neon_abs<q>")))] | |
c956e102 | 847 | ) |
88f77cba JB |
848 | |
849 | (define_insn "neg<mode>2" | |
850 | [(set (match_operand:VDQW 0 "s_register_operand" "=w") | |
851 | (neg:VDQW (match_operand:VDQW 1 "s_register_operand" "w")))] | |
852 | "TARGET_NEON" | |
c956e102 | 853 | "vneg.<V_s_elem>\t%<V_reg>0, %<V_reg>1" |
003bb7f3 | 854 | [(set (attr "type") |
b75b1be2 | 855 | (if_then_else (match_test "<Is_float_mode>") |
f7379e5e JG |
856 | (const_string "neon_fp_neg_s<q>") |
857 | (const_string "neon_neg<q>")))] | |
c956e102 | 858 | ) |
88f77cba | 859 | |
55a9b91b MW |
860 | (define_insn "<absneg_str><mode>2" |
861 | [(set (match_operand:VH 0 "s_register_operand" "=w") | |
862 | (ABSNEG:VH (match_operand:VH 1 "s_register_operand" "w")))] | |
863 | "TARGET_NEON_FP16INST" | |
864 | "v<absneg_str>.<V_s_elem>\t%<V_reg>0, %<V_reg>1" | |
865 | [(set_attr "type" "neon_abs<q>")] | |
866 | ) | |
867 | ||
868 | (define_expand "neon_v<absneg_str><mode>" | |
869 | [(set | |
870 | (match_operand:VH 0 "s_register_operand") | |
871 | (ABSNEG:VH (match_operand:VH 1 "s_register_operand")))] | |
872 | "TARGET_NEON_FP16INST" | |
873 | { | |
874 | emit_insn (gen_<absneg_str><mode>2 (operands[0], operands[1])); | |
875 | DONE; | |
876 | }) | |
877 | ||
878 | (define_insn "neon_v<fp16_rnd_str><mode>" | |
879 | [(set (match_operand:VH 0 "s_register_operand" "=w") | |
880 | (unspec:VH | |
881 | [(match_operand:VH 1 "s_register_operand" "w")] | |
882 | FP16_RND))] | |
883 | "TARGET_NEON_FP16INST" | |
884 | "<fp16_rnd_insn>.<V_s_elem>\t%<V_reg>0, %<V_reg>1" | |
885 | [(set_attr "type" "neon_fp_round_s<q>")] | |
886 | ) | |
887 | ||
888 | (define_insn "neon_vrsqrte<mode>" | |
889 | [(set (match_operand:VH 0 "s_register_operand" "=w") | |
890 | (unspec:VH | |
891 | [(match_operand:VH 1 "s_register_operand" "w")] | |
892 | UNSPEC_VRSQRTE))] | |
893 | "TARGET_NEON_FP16INST" | |
894 | "vrsqrte.f16\t%<V_reg>0, %<V_reg>1" | |
895 | [(set_attr "type" "neon_fp_rsqrte_s<q>")] | |
896 | ) | |
897 | ||
88f77cba JB |
898 | (define_insn "*umin<mode>3_neon" |
899 | [(set (match_operand:VDQIW 0 "s_register_operand" "=w") | |
900 | (umin:VDQIW (match_operand:VDQIW 1 "s_register_operand" "w") | |
901 | (match_operand:VDQIW 2 "s_register_operand" "w")))] | |
902 | "TARGET_NEON" | |
c956e102 | 903 | "vmin.<V_u_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2" |
f7379e5e | 904 | [(set_attr "type" "neon_minmax<q>")] |
c956e102 | 905 | ) |
88f77cba JB |
906 | |
907 | (define_insn "*umax<mode>3_neon" | |
908 | [(set (match_operand:VDQIW 0 "s_register_operand" "=w") | |
909 | (umax:VDQIW (match_operand:VDQIW 1 "s_register_operand" "w") | |
910 | (match_operand:VDQIW 2 "s_register_operand" "w")))] | |
911 | "TARGET_NEON" | |
c956e102 | 912 | "vmax.<V_u_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2" |
f7379e5e | 913 | [(set_attr "type" "neon_minmax<q>")] |
c956e102 | 914 | ) |
88f77cba JB |
915 | |
916 | (define_insn "*smin<mode>3_neon" | |
917 | [(set (match_operand:VDQW 0 "s_register_operand" "=w") | |
918 | (smin:VDQW (match_operand:VDQW 1 "s_register_operand" "w") | |
919 | (match_operand:VDQW 2 "s_register_operand" "w")))] | |
920 | "TARGET_NEON" | |
c956e102 | 921 | "vmin.<V_s_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2" |
003bb7f3 | 922 | [(set (attr "type") |
b75b1be2 | 923 | (if_then_else (match_test "<Is_float_mode>") |
f7379e5e JG |
924 | (const_string "neon_fp_minmax_s<q>") |
925 | (const_string "neon_minmax<q>")))] | |
c956e102 | 926 | ) |
88f77cba JB |
927 | |
928 | (define_insn "*smax<mode>3_neon" | |
929 | [(set (match_operand:VDQW 0 "s_register_operand" "=w") | |
930 | (smax:VDQW (match_operand:VDQW 1 "s_register_operand" "w") | |
931 | (match_operand:VDQW 2 "s_register_operand" "w")))] | |
932 | "TARGET_NEON" | |
c956e102 | 933 | "vmax.<V_s_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2" |
003bb7f3 | 934 | [(set (attr "type") |
b75b1be2 | 935 | (if_then_else (match_test "<Is_float_mode>") |
f7379e5e JG |
936 | (const_string "neon_fp_minmax_s<q>") |
937 | (const_string "neon_minmax<q>")))] | |
c956e102 | 938 | ) |
88f77cba JB |
939 | |
940 | ; TODO: V2DI shifts are current disabled because there are bugs in the | |
941 | ; generic vectorizer code. It ends up creating a V2DI constructor with | |
942 | ; SImode elements. | |
943 | ||
d44463a9 | 944 | (define_insn "vashl<mode>3" |
31a0c825 DP |
945 | [(set (match_operand:VDQIW 0 "s_register_operand" "=w,w") |
946 | (ashift:VDQIW (match_operand:VDQIW 1 "s_register_operand" "w,w") | |
e009dfb3 | 947 | (match_operand:VDQIW 2 "imm_lshift_or_reg_neon" "w,Dm")))] |
31a0c825 DP |
948 | "TARGET_NEON" |
949 | { | |
950 | switch (which_alternative) | |
951 | { | |
952 | case 0: return "vshl.<V_s_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"; | |
953 | case 1: return neon_output_shift_immediate ("vshl", 'i', &operands[2], | |
954 | <MODE>mode, | |
955 | VALID_NEON_QREG_MODE (<MODE>mode), | |
956 | true); | |
957 | default: gcc_unreachable (); | |
958 | } | |
959 | } | |
f7379e5e | 960 | [(set_attr "type" "neon_shift_reg<q>, neon_shift_imm<q>")] |
31a0c825 DP |
961 | ) |
962 | ||
963 | (define_insn "vashr<mode>3_imm" | |
88f77cba | 964 | [(set (match_operand:VDQIW 0 "s_register_operand" "=w") |
31a0c825 | 965 | (ashiftrt:VDQIW (match_operand:VDQIW 1 "s_register_operand" "w") |
e009dfb3 | 966 | (match_operand:VDQIW 2 "imm_for_neon_rshift_operand" "Dm")))] |
88f77cba | 967 | "TARGET_NEON" |
31a0c825 DP |
968 | { |
969 | return neon_output_shift_immediate ("vshr", 's', &operands[2], | |
970 | <MODE>mode, VALID_NEON_QREG_MODE (<MODE>mode), | |
971 | false); | |
972 | } | |
f7379e5e | 973 | [(set_attr "type" "neon_shift_imm<q>")] |
c956e102 | 974 | ) |
88f77cba | 975 | |
31a0c825 DP |
976 | (define_insn "vlshr<mode>3_imm" |
977 | [(set (match_operand:VDQIW 0 "s_register_operand" "=w") | |
978 | (lshiftrt:VDQIW (match_operand:VDQIW 1 "s_register_operand" "w") | |
e009dfb3 | 979 | (match_operand:VDQIW 2 "imm_for_neon_rshift_operand" "Dm")))] |
31a0c825 DP |
980 | "TARGET_NEON" |
981 | { | |
982 | return neon_output_shift_immediate ("vshr", 'u', &operands[2], | |
983 | <MODE>mode, VALID_NEON_QREG_MODE (<MODE>mode), | |
984 | false); | |
985 | } | |
f7379e5e | 986 | [(set_attr "type" "neon_shift_imm<q>")] |
31a0c825 DP |
987 | ) |
988 | ||
88f77cba JB |
989 | ; Used for implementing logical shift-right, which is a left-shift by a negative |
990 | ; amount, with signed operands. This is essentially the same as ashl<mode>3 | |
991 | ; above, but using an unspec in case GCC tries anything tricky with negative | |
992 | ; shift amounts. | |
993 | ||
994 | (define_insn "ashl<mode>3_signed" | |
995 | [(set (match_operand:VDQI 0 "s_register_operand" "=w") | |
996 | (unspec:VDQI [(match_operand:VDQI 1 "s_register_operand" "w") | |
997 | (match_operand:VDQI 2 "s_register_operand" "w")] | |
998 | UNSPEC_ASHIFT_SIGNED))] | |
999 | "TARGET_NEON" | |
c956e102 | 1000 | "vshl.<V_s_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2" |
f7379e5e | 1001 | [(set_attr "type" "neon_shift_reg<q>")] |
c956e102 | 1002 | ) |
88f77cba JB |
1003 | |
1004 | ; Used for implementing logical shift-right, which is a left-shift by a negative | |
1005 | ; amount, with unsigned operands. | |
1006 | ||
1007 | (define_insn "ashl<mode>3_unsigned" | |
1008 | [(set (match_operand:VDQI 0 "s_register_operand" "=w") | |
1009 | (unspec:VDQI [(match_operand:VDQI 1 "s_register_operand" "w") | |
1010 | (match_operand:VDQI 2 "s_register_operand" "w")] | |
1011 | UNSPEC_ASHIFT_UNSIGNED))] | |
1012 | "TARGET_NEON" | |
c956e102 | 1013 | "vshl.<V_u_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2" |
f7379e5e | 1014 | [(set_attr "type" "neon_shift_reg<q>")] |
c956e102 | 1015 | ) |
88f77cba | 1016 | |
d44463a9 | 1017 | (define_expand "vashr<mode>3" |
cd65e265 DZ |
1018 | [(set (match_operand:VDQIW 0 "s_register_operand") |
1019 | (ashiftrt:VDQIW (match_operand:VDQIW 1 "s_register_operand") | |
1020 | (match_operand:VDQIW 2 "imm_rshift_or_reg_neon")))] | |
88f77cba JB |
1021 | "TARGET_NEON" |
1022 | { | |
56b15099 | 1023 | if (s_register_operand (operands[2], <MODE>mode)) |
31a0c825 | 1024 | { |
56b15099 | 1025 | rtx neg = gen_reg_rtx (<MODE>mode); |
31a0c825 DP |
1026 | emit_insn (gen_neg<mode>2 (neg, operands[2])); |
1027 | emit_insn (gen_ashl<mode>3_signed (operands[0], operands[1], neg)); | |
1028 | } | |
1029 | else | |
1030 | emit_insn (gen_vashr<mode>3_imm (operands[0], operands[1], operands[2])); | |
88f77cba JB |
1031 | DONE; |
1032 | }) | |
1033 | ||
d44463a9 | 1034 | (define_expand "vlshr<mode>3" |
cd65e265 DZ |
1035 | [(set (match_operand:VDQIW 0 "s_register_operand") |
1036 | (lshiftrt:VDQIW (match_operand:VDQIW 1 "s_register_operand") | |
1037 | (match_operand:VDQIW 2 "imm_rshift_or_reg_neon")))] | |
88f77cba JB |
1038 | "TARGET_NEON" |
1039 | { | |
56b15099 | 1040 | if (s_register_operand (operands[2], <MODE>mode)) |
31a0c825 | 1041 | { |
56b15099 | 1042 | rtx neg = gen_reg_rtx (<MODE>mode); |
31a0c825 DP |
1043 | emit_insn (gen_neg<mode>2 (neg, operands[2])); |
1044 | emit_insn (gen_ashl<mode>3_unsigned (operands[0], operands[1], neg)); | |
1045 | } | |
1046 | else | |
1047 | emit_insn (gen_vlshr<mode>3_imm (operands[0], operands[1], operands[2])); | |
88f77cba JB |
1048 | DONE; |
1049 | }) | |
1050 | ||
3f2dc806 AS |
1051 | ;; 64-bit shifts |
1052 | ||
1053 | ;; This pattern loads a 32-bit shift count into a 64-bit NEON register, | |
1054 | ;; leaving the upper half uninitalized. This is OK since the shift | |
1055 | ;; instruction only looks at the low 8 bits anyway. To avoid confusing | |
1056 | ;; data flow analysis however, we pretend the full register is set | |
1057 | ;; using an unspec. | |
1058 | (define_insn "neon_load_count" | |
1059 | [(set (match_operand:DI 0 "s_register_operand" "=w,w") | |
1060 | (unspec:DI [(match_operand:SI 1 "nonimmediate_operand" "Um,r")] | |
1061 | UNSPEC_LOAD_COUNT))] | |
1062 | "TARGET_NEON" | |
1063 | "@ | |
1064 | vld1.32\t{%P0[0]}, %A1 | |
1065 | vmov.32\t%P0[0], %1" | |
f7379e5e | 1066 | [(set_attr "type" "neon_load1_1reg,neon_from_gp")] |
3f2dc806 AS |
1067 | ) |
1068 | ||
88f77cba JB |
1069 | ;; Widening operations |
1070 | ||
93c590ee | 1071 | (define_expand "widen_ssum<mode>3" |
cd65e265 | 1072 | [(set (match_operand:<V_double_width> 0 "s_register_operand") |
93c590ee MC |
1073 | (plus:<V_double_width> |
1074 | (sign_extend:<V_double_width> | |
cd65e265 DZ |
1075 | (match_operand:VQI 1 "s_register_operand")) |
1076 | (match_operand:<V_double_width> 2 "s_register_operand")))] | |
93c590ee MC |
1077 | "TARGET_NEON" |
1078 | { | |
1079 | machine_mode mode = GET_MODE (operands[1]); | |
1080 | rtx p1, p2; | |
1081 | ||
1082 | p1 = arm_simd_vect_par_cnst_half (mode, false); | |
1083 | p2 = arm_simd_vect_par_cnst_half (mode, true); | |
1084 | ||
1085 | if (operands[0] != operands[2]) | |
1086 | emit_move_insn (operands[0], operands[2]); | |
1087 | ||
1088 | emit_insn (gen_vec_sel_widen_ssum_lo<mode><V_half>3 (operands[0], | |
1089 | operands[1], | |
1090 | p1, | |
1091 | operands[0])); | |
1092 | emit_insn (gen_vec_sel_widen_ssum_hi<mode><V_half>3 (operands[0], | |
1093 | operands[1], | |
1094 | p2, | |
1095 | operands[0])); | |
1096 | DONE; | |
1097 | } | |
1098 | ) | |
1099 | ||
b8c36603 KT |
1100 | (define_insn "vec_sel_widen_ssum_lo<mode><V_half>3" |
1101 | [(set (match_operand:<V_double_width> 0 "s_register_operand" "=w") | |
1102 | (plus:<V_double_width> | |
1103 | (sign_extend:<V_double_width> | |
1104 | (vec_select:<V_HALF> | |
93c590ee MC |
1105 | (match_operand:VQI 1 "s_register_operand" "%w") |
1106 | (match_operand:VQI 2 "vect_par_constant_low" ""))) | |
b8c36603 | 1107 | (match_operand:<V_double_width> 3 "s_register_operand" "0")))] |
93c590ee MC |
1108 | "TARGET_NEON" |
1109 | { | |
1110 | return BYTES_BIG_ENDIAN ? "vaddw.<V_s_elem>\t%q0, %q3, %f1" : | |
1111 | "vaddw.<V_s_elem>\t%q0, %q3, %e1"; | |
1112 | } | |
1113 | [(set_attr "type" "neon_add_widen")]) | |
1114 | ||
b8c36603 KT |
1115 | (define_insn "vec_sel_widen_ssum_hi<mode><V_half>3" |
1116 | [(set (match_operand:<V_double_width> 0 "s_register_operand" "=w") | |
1117 | (plus:<V_double_width> | |
1118 | (sign_extend:<V_double_width> | |
1119 | (vec_select:<V_HALF> | |
1120 | (match_operand:VQI 1 "s_register_operand" "%w") | |
93c590ee | 1121 | (match_operand:VQI 2 "vect_par_constant_high" ""))) |
b8c36603 | 1122 | (match_operand:<V_double_width> 3 "s_register_operand" "0")))] |
93c590ee MC |
1123 | "TARGET_NEON" |
1124 | { | |
1125 | return BYTES_BIG_ENDIAN ? "vaddw.<V_s_elem>\t%q0, %q3, %e1" : | |
1126 | "vaddw.<V_s_elem>\t%q0, %q3, %f1"; | |
1127 | } | |
1128 | [(set_attr "type" "neon_add_widen")]) | |
1129 | ||
88f77cba JB |
1130 | (define_insn "widen_ssum<mode>3" |
1131 | [(set (match_operand:<V_widen> 0 "s_register_operand" "=w") | |
93c590ee MC |
1132 | (plus:<V_widen> |
1133 | (sign_extend:<V_widen> | |
1134 | (match_operand:VW 1 "s_register_operand" "%w")) | |
1135 | (match_operand:<V_widen> 2 "s_register_operand" "w")))] | |
88f77cba | 1136 | "TARGET_NEON" |
c956e102 | 1137 | "vaddw.<V_s_elem>\t%q0, %q2, %P1" |
f7379e5e | 1138 | [(set_attr "type" "neon_add_widen")] |
c956e102 | 1139 | ) |
88f77cba | 1140 | |
93c590ee | 1141 | (define_expand "widen_usum<mode>3" |
cd65e265 | 1142 | [(set (match_operand:<V_double_width> 0 "s_register_operand") |
93c590ee MC |
1143 | (plus:<V_double_width> |
1144 | (zero_extend:<V_double_width> | |
cd65e265 DZ |
1145 | (match_operand:VQI 1 "s_register_operand")) |
1146 | (match_operand:<V_double_width> 2 "s_register_operand")))] | |
93c590ee MC |
1147 | "TARGET_NEON" |
1148 | { | |
1149 | machine_mode mode = GET_MODE (operands[1]); | |
1150 | rtx p1, p2; | |
1151 | ||
1152 | p1 = arm_simd_vect_par_cnst_half (mode, false); | |
1153 | p2 = arm_simd_vect_par_cnst_half (mode, true); | |
1154 | ||
1155 | if (operands[0] != operands[2]) | |
1156 | emit_move_insn (operands[0], operands[2]); | |
1157 | ||
1158 | emit_insn (gen_vec_sel_widen_usum_lo<mode><V_half>3 (operands[0], | |
1159 | operands[1], | |
1160 | p1, | |
1161 | operands[0])); | |
1162 | emit_insn (gen_vec_sel_widen_usum_hi<mode><V_half>3 (operands[0], | |
1163 | operands[1], | |
1164 | p2, | |
1165 | operands[0])); | |
1166 | DONE; | |
1167 | } | |
1168 | ) | |
1169 | ||
b8c36603 KT |
1170 | (define_insn "vec_sel_widen_usum_lo<mode><V_half>3" |
1171 | [(set (match_operand:<V_double_width> 0 "s_register_operand" "=w") | |
1172 | (plus:<V_double_width> | |
1173 | (zero_extend:<V_double_width> | |
1174 | (vec_select:<V_HALF> | |
93c590ee MC |
1175 | (match_operand:VQI 1 "s_register_operand" "%w") |
1176 | (match_operand:VQI 2 "vect_par_constant_low" ""))) | |
b8c36603 | 1177 | (match_operand:<V_double_width> 3 "s_register_operand" "0")))] |
93c590ee MC |
1178 | "TARGET_NEON" |
1179 | { | |
1180 | return BYTES_BIG_ENDIAN ? "vaddw.<V_u_elem>\t%q0, %q3, %f1" : | |
1181 | "vaddw.<V_u_elem>\t%q0, %q3, %e1"; | |
1182 | } | |
1183 | [(set_attr "type" "neon_add_widen")]) | |
1184 | ||
b8c36603 KT |
1185 | (define_insn "vec_sel_widen_usum_hi<mode><V_half>3" |
1186 | [(set (match_operand:<V_double_width> 0 "s_register_operand" "=w") | |
1187 | (plus:<V_double_width> | |
1188 | (zero_extend:<V_double_width> | |
1189 | (vec_select:<V_HALF> | |
1190 | (match_operand:VQI 1 "s_register_operand" "%w") | |
93c590ee | 1191 | (match_operand:VQI 2 "vect_par_constant_high" ""))) |
b8c36603 | 1192 | (match_operand:<V_double_width> 3 "s_register_operand" "0")))] |
93c590ee MC |
1193 | "TARGET_NEON" |
1194 | { | |
1195 | return BYTES_BIG_ENDIAN ? "vaddw.<V_u_elem>\t%q0, %q3, %e1" : | |
1196 | "vaddw.<V_u_elem>\t%q0, %q3, %f1"; | |
1197 | } | |
1198 | [(set_attr "type" "neon_add_widen")]) | |
1199 | ||
88f77cba JB |
1200 | (define_insn "widen_usum<mode>3" |
1201 | [(set (match_operand:<V_widen> 0 "s_register_operand" "=w") | |
1202 | (plus:<V_widen> (zero_extend:<V_widen> | |
1203 | (match_operand:VW 1 "s_register_operand" "%w")) | |
1204 | (match_operand:<V_widen> 2 "s_register_operand" "w")))] | |
1205 | "TARGET_NEON" | |
c956e102 | 1206 | "vaddw.<V_u_elem>\t%q0, %q2, %P1" |
f7379e5e | 1207 | [(set_attr "type" "neon_add_widen")] |
c956e102 | 1208 | ) |
88f77cba | 1209 | |
88f77cba JB |
1210 | ;; Helpers for quad-word reduction operations |
1211 | ||
1212 | ; Add (or smin, smax...) the low N/2 elements of the N-element vector | |
1213 | ; operand[1] to the high N/2 elements of same. Put the result in operand[0], an | |
1214 | ; N/2-element vector. | |
1215 | ||
1216 | (define_insn "quad_halves_<code>v4si" | |
1217 | [(set (match_operand:V2SI 0 "s_register_operand" "=w") | |
728dc153 | 1218 | (VQH_OPS:V2SI |
88f77cba JB |
1219 | (vec_select:V2SI (match_operand:V4SI 1 "s_register_operand" "w") |
1220 | (parallel [(const_int 0) (const_int 1)])) | |
1221 | (vec_select:V2SI (match_dup 1) | |
1222 | (parallel [(const_int 2) (const_int 3)]))))] | |
1223 | "TARGET_NEON" | |
c956e102 MS |
1224 | "<VQH_mnem>.<VQH_sign>32\t%P0, %e1, %f1" |
1225 | [(set_attr "vqh_mnem" "<VQH_mnem>") | |
f7379e5e | 1226 | (set_attr "type" "neon_reduc_<VQH_type>_q")] |
c956e102 | 1227 | ) |
88f77cba JB |
1228 | |
1229 | (define_insn "quad_halves_<code>v4sf" | |
1230 | [(set (match_operand:V2SF 0 "s_register_operand" "=w") | |
728dc153 | 1231 | (VQHS_OPS:V2SF |
88f77cba JB |
1232 | (vec_select:V2SF (match_operand:V4SF 1 "s_register_operand" "w") |
1233 | (parallel [(const_int 0) (const_int 1)])) | |
1234 | (vec_select:V2SF (match_dup 1) | |
1235 | (parallel [(const_int 2) (const_int 3)]))))] | |
400cfcf5 | 1236 | "TARGET_NEON && flag_unsafe_math_optimizations" |
c956e102 MS |
1237 | "<VQH_mnem>.f32\t%P0, %e1, %f1" |
1238 | [(set_attr "vqh_mnem" "<VQH_mnem>") | |
f7379e5e | 1239 | (set_attr "type" "neon_fp_reduc_<VQH_type>_s_q")] |
c956e102 | 1240 | ) |
88f77cba JB |
1241 | |
1242 | (define_insn "quad_halves_<code>v8hi" | |
1243 | [(set (match_operand:V4HI 0 "s_register_operand" "+w") | |
728dc153 | 1244 | (VQH_OPS:V4HI |
88f77cba JB |
1245 | (vec_select:V4HI (match_operand:V8HI 1 "s_register_operand" "w") |
1246 | (parallel [(const_int 0) (const_int 1) | |
1247 | (const_int 2) (const_int 3)])) | |
1248 | (vec_select:V4HI (match_dup 1) | |
1249 | (parallel [(const_int 4) (const_int 5) | |
1250 | (const_int 6) (const_int 7)]))))] | |
1251 | "TARGET_NEON" | |
c956e102 MS |
1252 | "<VQH_mnem>.<VQH_sign>16\t%P0, %e1, %f1" |
1253 | [(set_attr "vqh_mnem" "<VQH_mnem>") | |
f7379e5e | 1254 | (set_attr "type" "neon_reduc_<VQH_type>_q")] |
c956e102 | 1255 | ) |
88f77cba JB |
1256 | |
1257 | (define_insn "quad_halves_<code>v16qi" | |
1258 | [(set (match_operand:V8QI 0 "s_register_operand" "+w") | |
728dc153 | 1259 | (VQH_OPS:V8QI |
88f77cba JB |
1260 | (vec_select:V8QI (match_operand:V16QI 1 "s_register_operand" "w") |
1261 | (parallel [(const_int 0) (const_int 1) | |
1262 | (const_int 2) (const_int 3) | |
1263 | (const_int 4) (const_int 5) | |
1264 | (const_int 6) (const_int 7)])) | |
1265 | (vec_select:V8QI (match_dup 1) | |
1266 | (parallel [(const_int 8) (const_int 9) | |
1267 | (const_int 10) (const_int 11) | |
1268 | (const_int 12) (const_int 13) | |
1269 | (const_int 14) (const_int 15)]))))] | |
1270 | "TARGET_NEON" | |
c956e102 MS |
1271 | "<VQH_mnem>.<VQH_sign>8\t%P0, %e1, %f1" |
1272 | [(set_attr "vqh_mnem" "<VQH_mnem>") | |
f7379e5e | 1273 | (set_attr "type" "neon_reduc_<VQH_type>_q")] |
c956e102 | 1274 | ) |
88f77cba | 1275 | |
0f38f229 | 1276 | (define_expand "move_hi_quad_<mode>" |
cd65e265 DZ |
1277 | [(match_operand:ANY128 0 "s_register_operand") |
1278 | (match_operand:<V_HALF> 1 "s_register_operand")] | |
0f38f229 | 1279 | "TARGET_NEON" |
88f77cba | 1280 | { |
d92aed06 RS |
1281 | emit_move_insn (simplify_gen_subreg (<V_HALF>mode, operands[0], <MODE>mode, |
1282 | GET_MODE_SIZE (<V_HALF>mode)), | |
1283 | operands[1]); | |
0f38f229 TB |
1284 | DONE; |
1285 | }) | |
1286 | ||
1287 | (define_expand "move_lo_quad_<mode>" | |
cd65e265 DZ |
1288 | [(match_operand:ANY128 0 "s_register_operand") |
1289 | (match_operand:<V_HALF> 1 "s_register_operand")] | |
0f38f229 | 1290 | "TARGET_NEON" |
88f77cba | 1291 | { |
d92aed06 RS |
1292 | emit_move_insn (simplify_gen_subreg (<V_HALF>mode, operands[0], |
1293 | <MODE>mode, 0), | |
1294 | operands[1]); | |
0f38f229 TB |
1295 | DONE; |
1296 | }) | |
88f77cba JB |
1297 | |
1298 | ;; Reduction operations | |
1299 | ||
89edc986 | 1300 | (define_expand "reduc_plus_scal_<mode>" |
cd65e265 DZ |
1301 | [(match_operand:<V_elem> 0 "nonimmediate_operand") |
1302 | (match_operand:VD 1 "s_register_operand")] | |
400cfcf5 | 1303 | "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)" |
88f77cba | 1304 | { |
89edc986 AL |
1305 | rtx vec = gen_reg_rtx (<MODE>mode); |
1306 | neon_pairwise_reduce (vec, operands[1], <MODE>mode, | |
88f77cba | 1307 | &gen_neon_vpadd_internal<mode>); |
89edc986 | 1308 | /* The same result is actually computed into every element. */ |
ff03930a | 1309 | emit_insn (gen_vec_extract<mode><V_elem_l> (operands[0], vec, const0_rtx)); |
88f77cba JB |
1310 | DONE; |
1311 | }) | |
1312 | ||
89edc986 | 1313 | (define_expand "reduc_plus_scal_<mode>" |
cd65e265 DZ |
1314 | [(match_operand:<V_elem> 0 "nonimmediate_operand") |
1315 | (match_operand:VQ 1 "s_register_operand")] | |
0094f21b JB |
1316 | "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations) |
1317 | && !BYTES_BIG_ENDIAN" | |
88f77cba JB |
1318 | { |
1319 | rtx step1 = gen_reg_rtx (<V_HALF>mode); | |
88f77cba JB |
1320 | |
1321 | emit_insn (gen_quad_halves_plus<mode> (step1, operands[1])); | |
89edc986 | 1322 | emit_insn (gen_reduc_plus_scal_<V_half> (operands[0], step1)); |
88f77cba JB |
1323 | |
1324 | DONE; | |
1325 | }) | |
1326 | ||
89edc986 | 1327 | (define_expand "reduc_plus_scal_v2di" |
cd65e265 DZ |
1328 | [(match_operand:DI 0 "nonimmediate_operand") |
1329 | (match_operand:V2DI 1 "s_register_operand")] | |
89edc986 AL |
1330 | "TARGET_NEON && !BYTES_BIG_ENDIAN" |
1331 | { | |
1332 | rtx vec = gen_reg_rtx (V2DImode); | |
1333 | ||
1334 | emit_insn (gen_arm_reduc_plus_internal_v2di (vec, operands[1])); | |
ff03930a | 1335 | emit_insn (gen_vec_extractv2didi (operands[0], vec, const0_rtx)); |
89edc986 AL |
1336 | |
1337 | DONE; | |
1338 | }) | |
1339 | ||
1340 | (define_insn "arm_reduc_plus_internal_v2di" | |
88f77cba JB |
1341 | [(set (match_operand:V2DI 0 "s_register_operand" "=w") |
1342 | (unspec:V2DI [(match_operand:V2DI 1 "s_register_operand" "w")] | |
1343 | UNSPEC_VPADD))] | |
0094f21b | 1344 | "TARGET_NEON && !BYTES_BIG_ENDIAN" |
c956e102 | 1345 | "vadd.i64\t%e0, %e1, %f1" |
f7379e5e | 1346 | [(set_attr "type" "neon_add_q")] |
c956e102 | 1347 | ) |
88f77cba | 1348 | |
f5dcbee1 | 1349 | (define_expand "reduc_smin_scal_<mode>" |
cd65e265 DZ |
1350 | [(match_operand:<V_elem> 0 "nonimmediate_operand") |
1351 | (match_operand:VD 1 "s_register_operand")] | |
400cfcf5 | 1352 | "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)" |
88f77cba | 1353 | { |
f5dcbee1 AL |
1354 | rtx vec = gen_reg_rtx (<MODE>mode); |
1355 | ||
1356 | neon_pairwise_reduce (vec, operands[1], <MODE>mode, | |
88f77cba | 1357 | &gen_neon_vpsmin<mode>); |
f5dcbee1 | 1358 | /* The result is computed into every element of the vector. */ |
ff03930a | 1359 | emit_insn (gen_vec_extract<mode><V_elem_l> (operands[0], vec, const0_rtx)); |
88f77cba JB |
1360 | DONE; |
1361 | }) | |
1362 | ||
f5dcbee1 | 1363 | (define_expand "reduc_smin_scal_<mode>" |
cd65e265 DZ |
1364 | [(match_operand:<V_elem> 0 "nonimmediate_operand") |
1365 | (match_operand:VQ 1 "s_register_operand")] | |
0094f21b JB |
1366 | "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations) |
1367 | && !BYTES_BIG_ENDIAN" | |
88f77cba JB |
1368 | { |
1369 | rtx step1 = gen_reg_rtx (<V_HALF>mode); | |
88f77cba JB |
1370 | |
1371 | emit_insn (gen_quad_halves_smin<mode> (step1, operands[1])); | |
f5dcbee1 | 1372 | emit_insn (gen_reduc_smin_scal_<V_half> (operands[0], step1)); |
88f77cba JB |
1373 | |
1374 | DONE; | |
1375 | }) | |
1376 | ||
f5dcbee1 | 1377 | (define_expand "reduc_smax_scal_<mode>" |
cd65e265 DZ |
1378 | [(match_operand:<V_elem> 0 "nonimmediate_operand") |
1379 | (match_operand:VD 1 "s_register_operand")] | |
400cfcf5 | 1380 | "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)" |
88f77cba | 1381 | { |
f5dcbee1 AL |
1382 | rtx vec = gen_reg_rtx (<MODE>mode); |
1383 | neon_pairwise_reduce (vec, operands[1], <MODE>mode, | |
88f77cba | 1384 | &gen_neon_vpsmax<mode>); |
f5dcbee1 | 1385 | /* The result is computed into every element of the vector. */ |
ff03930a | 1386 | emit_insn (gen_vec_extract<mode><V_elem_l> (operands[0], vec, const0_rtx)); |
88f77cba JB |
1387 | DONE; |
1388 | }) | |
1389 | ||
f5dcbee1 | 1390 | (define_expand "reduc_smax_scal_<mode>" |
cd65e265 DZ |
1391 | [(match_operand:<V_elem> 0 "nonimmediate_operand") |
1392 | (match_operand:VQ 1 "s_register_operand")] | |
0094f21b JB |
1393 | "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations) |
1394 | && !BYTES_BIG_ENDIAN" | |
88f77cba JB |
1395 | { |
1396 | rtx step1 = gen_reg_rtx (<V_HALF>mode); | |
88f77cba JB |
1397 | |
1398 | emit_insn (gen_quad_halves_smax<mode> (step1, operands[1])); | |
f5dcbee1 | 1399 | emit_insn (gen_reduc_smax_scal_<V_half> (operands[0], step1)); |
88f77cba JB |
1400 | |
1401 | DONE; | |
1402 | }) | |
1403 | ||
f5dcbee1 | 1404 | (define_expand "reduc_umin_scal_<mode>" |
cd65e265 DZ |
1405 | [(match_operand:<V_elem> 0 "nonimmediate_operand") |
1406 | (match_operand:VDI 1 "s_register_operand")] | |
88f77cba JB |
1407 | "TARGET_NEON" |
1408 | { | |
f5dcbee1 AL |
1409 | rtx vec = gen_reg_rtx (<MODE>mode); |
1410 | neon_pairwise_reduce (vec, operands[1], <MODE>mode, | |
88f77cba | 1411 | &gen_neon_vpumin<mode>); |
f5dcbee1 | 1412 | /* The result is computed into every element of the vector. */ |
ff03930a | 1413 | emit_insn (gen_vec_extract<mode><V_elem_l> (operands[0], vec, const0_rtx)); |
88f77cba JB |
1414 | DONE; |
1415 | }) | |
1416 | ||
f5dcbee1 | 1417 | (define_expand "reduc_umin_scal_<mode>" |
cd65e265 DZ |
1418 | [(match_operand:<V_elem> 0 "nonimmediate_operand") |
1419 | (match_operand:VQI 1 "s_register_operand")] | |
0094f21b | 1420 | "TARGET_NEON && !BYTES_BIG_ENDIAN" |
88f77cba JB |
1421 | { |
1422 | rtx step1 = gen_reg_rtx (<V_HALF>mode); | |
88f77cba JB |
1423 | |
1424 | emit_insn (gen_quad_halves_umin<mode> (step1, operands[1])); | |
f5dcbee1 | 1425 | emit_insn (gen_reduc_umin_scal_<V_half> (operands[0], step1)); |
88f77cba JB |
1426 | |
1427 | DONE; | |
1428 | }) | |
1429 | ||
f5dcbee1 | 1430 | (define_expand "reduc_umax_scal_<mode>" |
cd65e265 DZ |
1431 | [(match_operand:<V_elem> 0 "nonimmediate_operand") |
1432 | (match_operand:VDI 1 "s_register_operand")] | |
88f77cba JB |
1433 | "TARGET_NEON" |
1434 | { | |
f5dcbee1 AL |
1435 | rtx vec = gen_reg_rtx (<MODE>mode); |
1436 | neon_pairwise_reduce (vec, operands[1], <MODE>mode, | |
88f77cba | 1437 | &gen_neon_vpumax<mode>); |
f5dcbee1 | 1438 | /* The result is computed into every element of the vector. */ |
ff03930a | 1439 | emit_insn (gen_vec_extract<mode><V_elem_l> (operands[0], vec, const0_rtx)); |
88f77cba JB |
1440 | DONE; |
1441 | }) | |
1442 | ||
f5dcbee1 | 1443 | (define_expand "reduc_umax_scal_<mode>" |
cd65e265 DZ |
1444 | [(match_operand:<V_elem> 0 "nonimmediate_operand") |
1445 | (match_operand:VQI 1 "s_register_operand")] | |
0094f21b | 1446 | "TARGET_NEON && !BYTES_BIG_ENDIAN" |
88f77cba JB |
1447 | { |
1448 | rtx step1 = gen_reg_rtx (<V_HALF>mode); | |
88f77cba JB |
1449 | |
1450 | emit_insn (gen_quad_halves_umax<mode> (step1, operands[1])); | |
f5dcbee1 | 1451 | emit_insn (gen_reduc_umax_scal_<V_half> (operands[0], step1)); |
88f77cba JB |
1452 | |
1453 | DONE; | |
1454 | }) | |
1455 | ||
1456 | (define_insn "neon_vpadd_internal<mode>" | |
1457 | [(set (match_operand:VD 0 "s_register_operand" "=w") | |
1458 | (unspec:VD [(match_operand:VD 1 "s_register_operand" "w") | |
1459 | (match_operand:VD 2 "s_register_operand" "w")] | |
1460 | UNSPEC_VPADD))] | |
1461 | "TARGET_NEON" | |
c956e102 MS |
1462 | "vpadd.<V_if_elem>\t%P0, %P1, %P2" |
1463 | ;; Assume this schedules like vadd. | |
003bb7f3 | 1464 | [(set (attr "type") |
b75b1be2 | 1465 | (if_then_else (match_test "<Is_float_mode>") |
f7379e5e JG |
1466 | (const_string "neon_fp_reduc_add_s<q>") |
1467 | (const_string "neon_reduc_add<q>")))] | |
c956e102 | 1468 | ) |
88f77cba | 1469 | |
55a9b91b MW |
1470 | (define_insn "neon_vpaddv4hf" |
1471 | [(set | |
1472 | (match_operand:V4HF 0 "s_register_operand" "=w") | |
1473 | (unspec:V4HF [(match_operand:V4HF 1 "s_register_operand" "w") | |
1474 | (match_operand:V4HF 2 "s_register_operand" "w")] | |
1475 | UNSPEC_VPADD))] | |
1476 | "TARGET_NEON_FP16INST" | |
1477 | "vpadd.f16\t%P0, %P1, %P2" | |
1478 | [(set_attr "type" "neon_reduc_add")] | |
1479 | ) | |
1480 | ||
88f77cba JB |
1481 | (define_insn "neon_vpsmin<mode>" |
1482 | [(set (match_operand:VD 0 "s_register_operand" "=w") | |
1483 | (unspec:VD [(match_operand:VD 1 "s_register_operand" "w") | |
1484 | (match_operand:VD 2 "s_register_operand" "w")] | |
1485 | UNSPEC_VPSMIN))] | |
1486 | "TARGET_NEON" | |
c956e102 | 1487 | "vpmin.<V_s_elem>\t%P0, %P1, %P2" |
003bb7f3 | 1488 | [(set (attr "type") |
b75b1be2 | 1489 | (if_then_else (match_test "<Is_float_mode>") |
f7379e5e JG |
1490 | (const_string "neon_fp_reduc_minmax_s<q>") |
1491 | (const_string "neon_reduc_minmax<q>")))] | |
c956e102 | 1492 | ) |
88f77cba JB |
1493 | |
1494 | (define_insn "neon_vpsmax<mode>" | |
1495 | [(set (match_operand:VD 0 "s_register_operand" "=w") | |
1496 | (unspec:VD [(match_operand:VD 1 "s_register_operand" "w") | |
1497 | (match_operand:VD 2 "s_register_operand" "w")] | |
1498 | UNSPEC_VPSMAX))] | |
1499 | "TARGET_NEON" | |
c956e102 | 1500 | "vpmax.<V_s_elem>\t%P0, %P1, %P2" |
003bb7f3 | 1501 | [(set (attr "type") |
b75b1be2 | 1502 | (if_then_else (match_test "<Is_float_mode>") |
f7379e5e JG |
1503 | (const_string "neon_fp_reduc_minmax_s<q>") |
1504 | (const_string "neon_reduc_minmax<q>")))] | |
c956e102 | 1505 | ) |
88f77cba JB |
1506 | |
1507 | (define_insn "neon_vpumin<mode>" | |
1508 | [(set (match_operand:VDI 0 "s_register_operand" "=w") | |
1509 | (unspec:VDI [(match_operand:VDI 1 "s_register_operand" "w") | |
1510 | (match_operand:VDI 2 "s_register_operand" "w")] | |
1511 | UNSPEC_VPUMIN))] | |
1512 | "TARGET_NEON" | |
c956e102 | 1513 | "vpmin.<V_u_elem>\t%P0, %P1, %P2" |
f7379e5e | 1514 | [(set_attr "type" "neon_reduc_minmax<q>")] |
c956e102 | 1515 | ) |
88f77cba JB |
1516 | |
1517 | (define_insn "neon_vpumax<mode>" | |
1518 | [(set (match_operand:VDI 0 "s_register_operand" "=w") | |
1519 | (unspec:VDI [(match_operand:VDI 1 "s_register_operand" "w") | |
1520 | (match_operand:VDI 2 "s_register_operand" "w")] | |
1521 | UNSPEC_VPUMAX))] | |
1522 | "TARGET_NEON" | |
c956e102 | 1523 | "vpmax.<V_u_elem>\t%P0, %P1, %P2" |
f7379e5e | 1524 | [(set_attr "type" "neon_reduc_minmax<q>")] |
c956e102 | 1525 | ) |
88f77cba JB |
1526 | |
1527 | ;; Saturating arithmetic | |
1528 | ||
1529 | ; NOTE: Neon supports many more saturating variants of instructions than the | |
1530 | ; following, but these are all GCC currently understands. | |
1531 | ; FIXME: Actually, GCC doesn't know how to create saturating add/sub by itself | |
1532 | ; yet either, although these patterns may be used by intrinsics when they're | |
1533 | ; added. | |
1534 | ||
1535 | (define_insn "*ss_add<mode>_neon" | |
1536 | [(set (match_operand:VD 0 "s_register_operand" "=w") | |
1537 | (ss_plus:VD (match_operand:VD 1 "s_register_operand" "w") | |
1538 | (match_operand:VD 2 "s_register_operand" "w")))] | |
1539 | "TARGET_NEON" | |
c956e102 | 1540 | "vqadd.<V_s_elem>\t%P0, %P1, %P2" |
f7379e5e | 1541 | [(set_attr "type" "neon_qadd<q>")] |
c956e102 | 1542 | ) |
88f77cba JB |
1543 | |
1544 | (define_insn "*us_add<mode>_neon" | |
1545 | [(set (match_operand:VD 0 "s_register_operand" "=w") | |
1546 | (us_plus:VD (match_operand:VD 1 "s_register_operand" "w") | |
1547 | (match_operand:VD 2 "s_register_operand" "w")))] | |
1548 | "TARGET_NEON" | |
c956e102 | 1549 | "vqadd.<V_u_elem>\t%P0, %P1, %P2" |
f7379e5e | 1550 | [(set_attr "type" "neon_qadd<q>")] |
c956e102 | 1551 | ) |
88f77cba JB |
1552 | |
1553 | (define_insn "*ss_sub<mode>_neon" | |
1554 | [(set (match_operand:VD 0 "s_register_operand" "=w") | |
1555 | (ss_minus:VD (match_operand:VD 1 "s_register_operand" "w") | |
1556 | (match_operand:VD 2 "s_register_operand" "w")))] | |
1557 | "TARGET_NEON" | |
c956e102 | 1558 | "vqsub.<V_s_elem>\t%P0, %P1, %P2" |
f7379e5e | 1559 | [(set_attr "type" "neon_qsub<q>")] |
c956e102 | 1560 | ) |
88f77cba JB |
1561 | |
1562 | (define_insn "*us_sub<mode>_neon" | |
1563 | [(set (match_operand:VD 0 "s_register_operand" "=w") | |
1564 | (us_minus:VD (match_operand:VD 1 "s_register_operand" "w") | |
1565 | (match_operand:VD 2 "s_register_operand" "w")))] | |
1566 | "TARGET_NEON" | |
c956e102 | 1567 | "vqsub.<V_u_elem>\t%P0, %P1, %P2" |
f7379e5e | 1568 | [(set_attr "type" "neon_qsub<q>")] |
c956e102 | 1569 | ) |
88f77cba | 1570 | |
5bfc5baa JB |
1571 | ;; Conditional instructions. These are comparisons with conditional moves for |
1572 | ;; vectors. They perform the assignment: | |
1573 | ;; | |
1574 | ;; Vop0 = (Vop4 <op3> Vop5) ? Vop1 : Vop2; | |
1575 | ;; | |
1576 | ;; where op3 is <, <=, ==, !=, >= or >. Operations are performed | |
1577 | ;; element-wise. | |
1578 | ||
e9e1d143 | 1579 | (define_expand "vcond<mode><mode>" |
cd65e265 | 1580 | [(set (match_operand:VDQW 0 "s_register_operand") |
5bfc5baa | 1581 | (if_then_else:VDQW |
f35c297f | 1582 | (match_operator 3 "comparison_operator" |
cd65e265 DZ |
1583 | [(match_operand:VDQW 4 "s_register_operand") |
1584 | (match_operand:VDQW 5 "nonmemory_operand")]) | |
1585 | (match_operand:VDQW 1 "s_register_operand") | |
1586 | (match_operand:VDQW 2 "s_register_operand")))] | |
5bfc5baa JB |
1587 | "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)" |
1588 | { | |
f35c297f | 1589 | int inverse = 0; |
ff522f7f | 1590 | int use_zero_form = 0; |
f35c297f KT |
1591 | int swap_bsl_operands = 0; |
1592 | rtx mask = gen_reg_rtx (<V_cmp_result>mode); | |
1593 | rtx tmp = gen_reg_rtx (<V_cmp_result>mode); | |
1594 | ||
94f0f2cc JG |
1595 | rtx (*base_comparison) (rtx, rtx, rtx); |
1596 | rtx (*complimentary_comparison) (rtx, rtx, rtx); | |
f35c297f | 1597 | |
5bfc5baa JB |
1598 | switch (GET_CODE (operands[3])) |
1599 | { | |
1600 | case GE: | |
ff522f7f | 1601 | case GT: |
f35c297f | 1602 | case LE: |
ff522f7f | 1603 | case LT: |
f35c297f | 1604 | case EQ: |
ff522f7f ZC |
1605 | if (operands[5] == CONST0_RTX (<MODE>mode)) |
1606 | { | |
1607 | use_zero_form = 1; | |
1608 | break; | |
1609 | } | |
1610 | /* Fall through. */ | |
f35c297f KT |
1611 | default: |
1612 | if (!REG_P (operands[5])) | |
1613 | operands[5] = force_reg (<MODE>mode, operands[5]); | |
1614 | } | |
1615 | ||
1616 | switch (GET_CODE (operands[3])) | |
1617 | { | |
1618 | case LT: | |
1619 | case UNLT: | |
1620 | inverse = 1; | |
1621 | /* Fall through. */ | |
1622 | case GE: | |
1623 | case UNGE: | |
1624 | case ORDERED: | |
1625 | case UNORDERED: | |
1626 | base_comparison = gen_neon_vcge<mode>; | |
1627 | complimentary_comparison = gen_neon_vcgt<mode>; | |
1628 | break; | |
1629 | case LE: | |
1630 | case UNLE: | |
1631 | inverse = 1; | |
1632 | /* Fall through. */ | |
5bfc5baa | 1633 | case GT: |
f35c297f KT |
1634 | case UNGT: |
1635 | base_comparison = gen_neon_vcgt<mode>; | |
1636 | complimentary_comparison = gen_neon_vcge<mode>; | |
5bfc5baa | 1637 | break; |
5bfc5baa | 1638 | case EQ: |
f35c297f KT |
1639 | case NE: |
1640 | case UNEQ: | |
1641 | base_comparison = gen_neon_vceq<mode>; | |
1642 | complimentary_comparison = gen_neon_vceq<mode>; | |
5bfc5baa | 1643 | break; |
f35c297f KT |
1644 | default: |
1645 | gcc_unreachable (); | |
1646 | } | |
1647 | ||
1648 | switch (GET_CODE (operands[3])) | |
1649 | { | |
1650 | case LT: | |
5bfc5baa | 1651 | case LE: |
f35c297f KT |
1652 | case GT: |
1653 | case GE: | |
1654 | case EQ: | |
1655 | /* The easy case. Here we emit one of vcge, vcgt or vceq. | |
1656 | As a LT b <=> b GE a && a LE b <=> b GT a. Our transformations are: | |
1657 | a GE b -> a GE b | |
1658 | a GT b -> a GT b | |
1659 | a LE b -> b GE a | |
1660 | a LT b -> b GT a | |
ff522f7f ZC |
1661 | a EQ b -> a EQ b |
1662 | Note that there also exist direct comparison against 0 forms, | |
1663 | so catch those as a special case. */ | |
1664 | if (use_zero_form) | |
1665 | { | |
1666 | inverse = 0; | |
1667 | switch (GET_CODE (operands[3])) | |
1668 | { | |
1669 | case LT: | |
1670 | base_comparison = gen_neon_vclt<mode>; | |
1671 | break; | |
1672 | case LE: | |
1673 | base_comparison = gen_neon_vcle<mode>; | |
1674 | break; | |
1675 | default: | |
1676 | /* Do nothing, other zero form cases already have the correct | |
1677 | base_comparison. */ | |
1678 | break; | |
1679 | } | |
1680 | } | |
f35c297f KT |
1681 | |
1682 | if (!inverse) | |
94f0f2cc | 1683 | emit_insn (base_comparison (mask, operands[4], operands[5])); |
5bfc5baa | 1684 | else |
94f0f2cc | 1685 | emit_insn (complimentary_comparison (mask, operands[5], operands[4])); |
5bfc5baa | 1686 | break; |
f35c297f KT |
1687 | case UNLT: |
1688 | case UNLE: | |
1689 | case UNGT: | |
1690 | case UNGE: | |
1691 | case NE: | |
1692 | /* Vector compare returns false for lanes which are unordered, so if we use | |
1693 | the inverse of the comparison we actually want to emit, then | |
1694 | swap the operands to BSL, we will end up with the correct result. | |
1695 | Note that a NE NaN and NaN NE b are true for all a, b. | |
1696 | ||
1697 | Our transformations are: | |
1698 | a GE b -> !(b GT a) | |
1699 | a GT b -> !(b GE a) | |
1700 | a LE b -> !(a GT b) | |
1701 | a LT b -> !(a GE b) | |
1702 | a NE b -> !(a EQ b) */ | |
1703 | ||
1704 | if (inverse) | |
94f0f2cc | 1705 | emit_insn (base_comparison (mask, operands[4], operands[5])); |
5bfc5baa | 1706 | else |
94f0f2cc | 1707 | emit_insn (complimentary_comparison (mask, operands[5], operands[4])); |
f35c297f KT |
1708 | |
1709 | swap_bsl_operands = 1; | |
5bfc5baa | 1710 | break; |
f35c297f KT |
1711 | case UNEQ: |
1712 | /* We check (a > b || b > a). combining these comparisons give us | |
1713 | true iff !(a != b && a ORDERED b), swapping the operands to BSL | |
1714 | will then give us (a == b || a UNORDERED b) as intended. */ | |
1715 | ||
94f0f2cc JG |
1716 | emit_insn (gen_neon_vcgt<mode> (mask, operands[4], operands[5])); |
1717 | emit_insn (gen_neon_vcgt<mode> (tmp, operands[5], operands[4])); | |
f35c297f KT |
1718 | emit_insn (gen_ior<v_cmp_result>3 (mask, mask, tmp)); |
1719 | swap_bsl_operands = 1; | |
1720 | break; | |
1721 | case UNORDERED: | |
1722 | /* Operands are ORDERED iff (a > b || b >= a). | |
1723 | Swapping the operands to BSL will give the UNORDERED case. */ | |
1724 | swap_bsl_operands = 1; | |
1725 | /* Fall through. */ | |
1726 | case ORDERED: | |
94f0f2cc JG |
1727 | emit_insn (gen_neon_vcgt<mode> (tmp, operands[4], operands[5])); |
1728 | emit_insn (gen_neon_vcge<mode> (mask, operands[5], operands[4])); | |
f35c297f | 1729 | emit_insn (gen_ior<v_cmp_result>3 (mask, mask, tmp)); |
5bfc5baa | 1730 | break; |
5bfc5baa JB |
1731 | default: |
1732 | gcc_unreachable (); | |
1733 | } | |
f35c297f KT |
1734 | |
1735 | if (swap_bsl_operands) | |
5bfc5baa JB |
1736 | emit_insn (gen_neon_vbsl<mode> (operands[0], mask, operands[2], |
1737 | operands[1])); | |
1738 | else | |
1739 | emit_insn (gen_neon_vbsl<mode> (operands[0], mask, operands[1], | |
1740 | operands[2])); | |
5bfc5baa JB |
1741 | DONE; |
1742 | }) | |
1743 | ||
e9e1d143 | 1744 | (define_expand "vcondu<mode><mode>" |
cd65e265 | 1745 | [(set (match_operand:VDQIW 0 "s_register_operand") |
5bfc5baa JB |
1746 | (if_then_else:VDQIW |
1747 | (match_operator 3 "arm_comparison_operator" | |
cd65e265 DZ |
1748 | [(match_operand:VDQIW 4 "s_register_operand") |
1749 | (match_operand:VDQIW 5 "s_register_operand")]) | |
1750 | (match_operand:VDQIW 1 "s_register_operand") | |
1751 | (match_operand:VDQIW 2 "s_register_operand")))] | |
5bfc5baa JB |
1752 | "TARGET_NEON" |
1753 | { | |
1754 | rtx mask; | |
1755 | int inverse = 0, immediate_zero = 0; | |
1756 | ||
1757 | mask = gen_reg_rtx (<V_cmp_result>mode); | |
1758 | ||
1759 | if (operands[5] == CONST0_RTX (<MODE>mode)) | |
1760 | immediate_zero = 1; | |
1761 | else if (!REG_P (operands[5])) | |
1762 | operands[5] = force_reg (<MODE>mode, operands[5]); | |
1763 | ||
1764 | switch (GET_CODE (operands[3])) | |
1765 | { | |
1766 | case GEU: | |
94f0f2cc | 1767 | emit_insn (gen_neon_vcgeu<mode> (mask, operands[4], operands[5])); |
5bfc5baa JB |
1768 | break; |
1769 | ||
1770 | case GTU: | |
94f0f2cc | 1771 | emit_insn (gen_neon_vcgtu<mode> (mask, operands[4], operands[5])); |
5bfc5baa JB |
1772 | break; |
1773 | ||
1774 | case EQ: | |
94f0f2cc | 1775 | emit_insn (gen_neon_vceq<mode> (mask, operands[4], operands[5])); |
5bfc5baa JB |
1776 | break; |
1777 | ||
1778 | case LEU: | |
1779 | if (immediate_zero) | |
94f0f2cc | 1780 | emit_insn (gen_neon_vcle<mode> (mask, operands[4], operands[5])); |
5bfc5baa | 1781 | else |
94f0f2cc | 1782 | emit_insn (gen_neon_vcgeu<mode> (mask, operands[5], operands[4])); |
5bfc5baa JB |
1783 | break; |
1784 | ||
1785 | case LTU: | |
1786 | if (immediate_zero) | |
94f0f2cc | 1787 | emit_insn (gen_neon_vclt<mode> (mask, operands[4], operands[5])); |
5bfc5baa | 1788 | else |
94f0f2cc | 1789 | emit_insn (gen_neon_vcgtu<mode> (mask, operands[5], operands[4])); |
5bfc5baa JB |
1790 | break; |
1791 | ||
1792 | case NE: | |
94f0f2cc | 1793 | emit_insn (gen_neon_vceq<mode> (mask, operands[4], operands[5])); |
5bfc5baa JB |
1794 | inverse = 1; |
1795 | break; | |
1796 | ||
1797 | default: | |
1798 | gcc_unreachable (); | |
1799 | } | |
1800 | ||
1801 | if (inverse) | |
1802 | emit_insn (gen_neon_vbsl<mode> (operands[0], mask, operands[2], | |
1803 | operands[1])); | |
1804 | else | |
1805 | emit_insn (gen_neon_vbsl<mode> (operands[0], mask, operands[1], | |
1806 | operands[2])); | |
1807 | ||
1808 | DONE; | |
1809 | }) | |
1810 | ||
88f77cba JB |
1811 | ;; Patterns for builtins. |
1812 | ||
1813 | ; good for plain vadd, vaddq. | |
1814 | ||
bab53516 | 1815 | (define_expand "neon_vadd<mode>" |
cd65e265 DZ |
1816 | [(match_operand:VCVTF 0 "s_register_operand") |
1817 | (match_operand:VCVTF 1 "s_register_operand") | |
1818 | (match_operand:VCVTF 2 "s_register_operand")] | |
bab53516 SL |
1819 | "TARGET_NEON" |
1820 | { | |
1821 | if (!<Is_float_mode> || flag_unsafe_math_optimizations) | |
1822 | emit_insn (gen_add<mode>3 (operands[0], operands[1], operands[2])); | |
1823 | else | |
1824 | emit_insn (gen_neon_vadd<mode>_unspec (operands[0], operands[1], | |
1825 | operands[2])); | |
1826 | DONE; | |
1827 | }) | |
1828 | ||
55a9b91b MW |
1829 | (define_expand "neon_vadd<mode>" |
1830 | [(match_operand:VH 0 "s_register_operand") | |
1831 | (match_operand:VH 1 "s_register_operand") | |
1832 | (match_operand:VH 2 "s_register_operand")] | |
1833 | "TARGET_NEON_FP16INST" | |
1834 | { | |
1835 | emit_insn (gen_add<mode>3_fp16 (operands[0], operands[1], operands[2])); | |
1836 | DONE; | |
1837 | }) | |
1838 | ||
1839 | (define_expand "neon_vsub<mode>" | |
1840 | [(match_operand:VH 0 "s_register_operand") | |
1841 | (match_operand:VH 1 "s_register_operand") | |
1842 | (match_operand:VH 2 "s_register_operand")] | |
1843 | "TARGET_NEON_FP16INST" | |
1844 | { | |
1845 | emit_insn (gen_sub<mode>3_fp16 (operands[0], operands[1], operands[2])); | |
1846 | DONE; | |
1847 | }) | |
1848 | ||
bab53516 SL |
1849 | ; Note that NEON operations don't support the full IEEE 754 standard: in |
1850 | ; particular, denormal values are flushed to zero. This means that GCC cannot | |
1851 | ; use those instructions for autovectorization, etc. unless | |
1852 | ; -funsafe-math-optimizations is in effect (in which case flush-to-zero | |
9c582551 | 1853 | ; behavior is permissible). Intrinsic operations (provided by the arm_neon.h |
bab53516 SL |
1854 | ; header) must work in either case: if -funsafe-math-optimizations is given, |
1855 | ; intrinsics expand to "canonical" RTL where possible, otherwise intrinsics | |
1856 | ; expand to unspecs (which may potentially limit the extent to which they might | |
1857 | ; be optimized by generic code). | |
1858 | ||
1859 | ; Used for intrinsics when flag_unsafe_math_optimizations is false. | |
1860 | ||
1861 | (define_insn "neon_vadd<mode>_unspec" | |
0d0b79a6 RR |
1862 | [(set (match_operand:VCVTF 0 "s_register_operand" "=w") |
1863 | (unspec:VCVTF [(match_operand:VCVTF 1 "s_register_operand" "w") | |
1864 | (match_operand:VCVTF 2 "s_register_operand" "w")] | |
88f77cba JB |
1865 | UNSPEC_VADD))] |
1866 | "TARGET_NEON" | |
c956e102 | 1867 | "vadd.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2" |
003bb7f3 | 1868 | [(set (attr "type") |
b75b1be2 | 1869 | (if_then_else (match_test "<Is_float_mode>") |
f7379e5e JG |
1870 | (const_string "neon_fp_addsub_s<q>") |
1871 | (const_string "neon_add<q>")))] | |
c956e102 | 1872 | ) |
88f77cba | 1873 | |
94f0f2cc | 1874 | (define_insn "neon_vaddl<sup><mode>" |
88f77cba JB |
1875 | [(set (match_operand:<V_widen> 0 "s_register_operand" "=w") |
1876 | (unspec:<V_widen> [(match_operand:VDI 1 "s_register_operand" "w") | |
94f0f2cc JG |
1877 | (match_operand:VDI 2 "s_register_operand" "w")] |
1878 | VADDL))] | |
88f77cba | 1879 | "TARGET_NEON" |
94f0f2cc | 1880 | "vaddl.<sup>%#<V_sz_elem>\t%q0, %P1, %P2" |
f7379e5e | 1881 | [(set_attr "type" "neon_add_long")] |
c956e102 | 1882 | ) |
88f77cba | 1883 | |
94f0f2cc | 1884 | (define_insn "neon_vaddw<sup><mode>" |
88f77cba JB |
1885 | [(set (match_operand:<V_widen> 0 "s_register_operand" "=w") |
1886 | (unspec:<V_widen> [(match_operand:<V_widen> 1 "s_register_operand" "w") | |
94f0f2cc JG |
1887 | (match_operand:VDI 2 "s_register_operand" "w")] |
1888 | VADDW))] | |
88f77cba | 1889 | "TARGET_NEON" |
94f0f2cc | 1890 | "vaddw.<sup>%#<V_sz_elem>\t%q0, %q1, %P2" |
f7379e5e | 1891 | [(set_attr "type" "neon_add_widen")] |
c956e102 | 1892 | ) |
88f77cba JB |
1893 | |
1894 | ; vhadd and vrhadd. | |
1895 | ||
94f0f2cc | 1896 | (define_insn "neon_v<r>hadd<sup><mode>" |
88f77cba JB |
1897 | [(set (match_operand:VDQIW 0 "s_register_operand" "=w") |
1898 | (unspec:VDQIW [(match_operand:VDQIW 1 "s_register_operand" "w") | |
94f0f2cc JG |
1899 | (match_operand:VDQIW 2 "s_register_operand" "w")] |
1900 | VHADD))] | |
88f77cba | 1901 | "TARGET_NEON" |
94f0f2cc | 1902 | "v<r>hadd.<sup>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2" |
f7379e5e | 1903 | [(set_attr "type" "neon_add_halve_q")] |
c956e102 | 1904 | ) |
88f77cba | 1905 | |
94f0f2cc | 1906 | (define_insn "neon_vqadd<sup><mode>" |
88f77cba JB |
1907 | [(set (match_operand:VDQIX 0 "s_register_operand" "=w") |
1908 | (unspec:VDQIX [(match_operand:VDQIX 1 "s_register_operand" "w") | |
94f0f2cc JG |
1909 | (match_operand:VDQIX 2 "s_register_operand" "w")] |
1910 | VQADD))] | |
88f77cba | 1911 | "TARGET_NEON" |
94f0f2cc | 1912 | "vqadd.<sup>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2" |
f7379e5e | 1913 | [(set_attr "type" "neon_qadd<q>")] |
c956e102 | 1914 | ) |
88f77cba | 1915 | |
94f0f2cc | 1916 | (define_insn "neon_v<r>addhn<mode>" |
88f77cba JB |
1917 | [(set (match_operand:<V_narrow> 0 "s_register_operand" "=w") |
1918 | (unspec:<V_narrow> [(match_operand:VN 1 "s_register_operand" "w") | |
94f0f2cc JG |
1919 | (match_operand:VN 2 "s_register_operand" "w")] |
1920 | VADDHN))] | |
88f77cba | 1921 | "TARGET_NEON" |
94f0f2cc | 1922 | "v<r>addhn.<V_if_elem>\t%P0, %q1, %q2" |
f7379e5e | 1923 | [(set_attr "type" "neon_add_halve_narrow_q")] |
c956e102 | 1924 | ) |
88f77cba | 1925 | |
94f0f2cc JG |
1926 | ;; Polynomial and Float multiplication. |
1927 | (define_insn "neon_vmul<pf><mode>" | |
1928 | [(set (match_operand:VPF 0 "s_register_operand" "=w") | |
1929 | (unspec:VPF [(match_operand:VPF 1 "s_register_operand" "w") | |
1930 | (match_operand:VPF 2 "s_register_operand" "w")] | |
88f77cba JB |
1931 | UNSPEC_VMUL))] |
1932 | "TARGET_NEON" | |
94f0f2cc | 1933 | "vmul.<pf>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2" |
003bb7f3 | 1934 | [(set (attr "type") |
b75b1be2 | 1935 | (if_then_else (match_test "<Is_float_mode>") |
f7379e5e JG |
1936 | (const_string "neon_fp_mul_s<q>") |
1937 | (const_string "neon_mul_<V_elem_ch><q>")))] | |
c956e102 | 1938 | ) |
88f77cba | 1939 | |
6da37857 MW |
1940 | (define_insn "mul<mode>3" |
1941 | [(set | |
1942 | (match_operand:VH 0 "s_register_operand" "=w") | |
1943 | (mult:VH | |
1944 | (match_operand:VH 1 "s_register_operand" "w") | |
1945 | (match_operand:VH 2 "s_register_operand" "w")))] | |
1946 | "TARGET_NEON_FP16INST && flag_unsafe_math_optimizations" | |
1947 | "vmul.f16\t%<V_reg>0, %<V_reg>1, %<V_reg>2" | |
1948 | [(set_attr "type" "neon_mul_<VH_elem_ch><q>")] | |
1949 | ) | |
1950 | ||
55a9b91b MW |
1951 | (define_insn "neon_vmulf<mode>" |
1952 | [(set | |
1953 | (match_operand:VH 0 "s_register_operand" "=w") | |
1954 | (mult:VH | |
1955 | (match_operand:VH 1 "s_register_operand" "w") | |
1956 | (match_operand:VH 2 "s_register_operand" "w")))] | |
1957 | "TARGET_NEON_FP16INST" | |
1958 | "vmul.f16\t%<V_reg>0, %<V_reg>1, %<V_reg>2" | |
1959 | [(set_attr "type" "neon_mul_<VH_elem_ch><q>")] | |
1960 | ) | |
1961 | ||
bab53516 | 1962 | (define_expand "neon_vmla<mode>" |
cd65e265 DZ |
1963 | [(match_operand:VDQW 0 "s_register_operand") |
1964 | (match_operand:VDQW 1 "s_register_operand") | |
1965 | (match_operand:VDQW 2 "s_register_operand") | |
1966 | (match_operand:VDQW 3 "s_register_operand")] | |
bab53516 SL |
1967 | "TARGET_NEON" |
1968 | { | |
1969 | if (!<Is_float_mode> || flag_unsafe_math_optimizations) | |
1970 | emit_insn (gen_mul<mode>3add<mode>_neon (operands[0], operands[1], | |
1971 | operands[2], operands[3])); | |
1972 | else | |
1973 | emit_insn (gen_neon_vmla<mode>_unspec (operands[0], operands[1], | |
1974 | operands[2], operands[3])); | |
1975 | DONE; | |
1976 | }) | |
1977 | ||
c4216388 MGD |
1978 | (define_expand "neon_vfma<VCVTF:mode>" |
1979 | [(match_operand:VCVTF 0 "s_register_operand") | |
1980 | (match_operand:VCVTF 1 "s_register_operand") | |
1981 | (match_operand:VCVTF 2 "s_register_operand") | |
94f0f2cc | 1982 | (match_operand:VCVTF 3 "s_register_operand")] |
c4216388 MGD |
1983 | "TARGET_NEON && TARGET_FMA" |
1984 | { | |
1985 | emit_insn (gen_fma<mode>4_intrinsic (operands[0], operands[2], operands[3], | |
1986 | operands[1])); | |
1987 | DONE; | |
1988 | }) | |
1989 | ||
55a9b91b MW |
1990 | (define_expand "neon_vfma<VH:mode>" |
1991 | [(match_operand:VH 0 "s_register_operand") | |
1992 | (match_operand:VH 1 "s_register_operand") | |
1993 | (match_operand:VH 2 "s_register_operand") | |
1994 | (match_operand:VH 3 "s_register_operand")] | |
1995 | "TARGET_NEON_FP16INST" | |
1996 | { | |
1997 | emit_insn (gen_fma<mode>4_intrinsic (operands[0], operands[2], operands[3], | |
1998 | operands[1])); | |
1999 | DONE; | |
2000 | }) | |
2001 | ||
c4216388 MGD |
2002 | (define_expand "neon_vfms<VCVTF:mode>" |
2003 | [(match_operand:VCVTF 0 "s_register_operand") | |
2004 | (match_operand:VCVTF 1 "s_register_operand") | |
2005 | (match_operand:VCVTF 2 "s_register_operand") | |
94f0f2cc | 2006 | (match_operand:VCVTF 3 "s_register_operand")] |
c4216388 MGD |
2007 | "TARGET_NEON && TARGET_FMA" |
2008 | { | |
2009 | emit_insn (gen_fmsub<mode>4_intrinsic (operands[0], operands[2], operands[3], | |
2010 | operands[1])); | |
2011 | DONE; | |
2012 | }) | |
2013 | ||
55a9b91b MW |
2014 | (define_expand "neon_vfms<VH:mode>" |
2015 | [(match_operand:VH 0 "s_register_operand") | |
2016 | (match_operand:VH 1 "s_register_operand") | |
2017 | (match_operand:VH 2 "s_register_operand") | |
2018 | (match_operand:VH 3 "s_register_operand")] | |
2019 | "TARGET_NEON_FP16INST" | |
2020 | { | |
2021 | emit_insn (gen_fmsub<mode>4_intrinsic (operands[0], operands[2], operands[3], | |
2022 | operands[1])); | |
2023 | DONE; | |
2024 | }) | |
2025 | ||
06e95715 KT |
2026 | ;; The expand RTL structure here is not important. |
2027 | ;; We use the gen_* functions anyway. | |
2028 | ;; We just need something to wrap the iterators around. | |
2029 | ||
2030 | (define_expand "neon_vfm<vfml_op>l_<vfml_half><mode>" | |
2031 | [(set (match_operand:VCVTF 0 "s_register_operand") | |
2032 | (unspec:VCVTF | |
2033 | [(match_operand:VCVTF 1 "s_register_operand") | |
2034 | (PLUSMINUS:<VFML> | |
2035 | (match_operand:<VFML> 2 "s_register_operand") | |
2036 | (match_operand:<VFML> 3 "s_register_operand"))] VFMLHALVES))] | |
2037 | "TARGET_FP16FML" | |
2038 | { | |
2039 | rtx half = arm_simd_vect_par_cnst_half (<VFML>mode, <vfml_half_selector>); | |
2040 | emit_insn (gen_vfm<vfml_op>l_<vfml_half><mode>_intrinsic (operands[0], | |
2041 | operands[1], | |
2042 | operands[2], | |
2043 | operands[3], | |
2044 | half, half)); | |
2045 | DONE; | |
2046 | }) | |
2047 | ||
2048 | (define_insn "vfmal_low<mode>_intrinsic" | |
2049 | [(set (match_operand:VCVTF 0 "s_register_operand" "=w") | |
2050 | (fma:VCVTF | |
2051 | (float_extend:VCVTF | |
2052 | (vec_select:<VFMLSEL> | |
2053 | (match_operand:<VFML> 2 "s_register_operand" "<VF_constraint>") | |
2054 | (match_operand:<VFML> 4 "vect_par_constant_low" ""))) | |
2055 | (float_extend:VCVTF | |
2056 | (vec_select:<VFMLSEL> | |
2057 | (match_operand:<VFML> 3 "s_register_operand" "<VF_constraint>") | |
2058 | (match_operand:<VFML> 5 "vect_par_constant_low" ""))) | |
2059 | (match_operand:VCVTF 1 "s_register_operand" "0")))] | |
2060 | "TARGET_FP16FML" | |
2061 | "vfmal.f16\\t%<V_reg>0, %<V_lo>2, %<V_lo>3" | |
2062 | [(set_attr "type" "neon_fp_mla_s<q>")] | |
2063 | ) | |
2064 | ||
2065 | (define_insn "vfmsl_high<mode>_intrinsic" | |
2066 | [(set (match_operand:VCVTF 0 "s_register_operand" "=w") | |
2067 | (fma:VCVTF | |
2068 | (float_extend:VCVTF | |
2069 | (neg:<VFMLSEL> | |
2070 | (vec_select:<VFMLSEL> | |
2071 | (match_operand:<VFML> 2 "s_register_operand" "<VF_constraint>") | |
2072 | (match_operand:<VFML> 4 "vect_par_constant_high" "")))) | |
2073 | (float_extend:VCVTF | |
2074 | (vec_select:<VFMLSEL> | |
2075 | (match_operand:<VFML> 3 "s_register_operand" "<VF_constraint>") | |
2076 | (match_operand:<VFML> 5 "vect_par_constant_high" ""))) | |
2077 | (match_operand:VCVTF 1 "s_register_operand" "0")))] | |
2078 | "TARGET_FP16FML" | |
2079 | "vfmsl.f16\\t%<V_reg>0, %<V_hi>2, %<V_hi>3" | |
2080 | [(set_attr "type" "neon_fp_mla_s<q>")] | |
2081 | ) | |
2082 | ||
2083 | (define_insn "vfmal_high<mode>_intrinsic" | |
2084 | [(set (match_operand:VCVTF 0 "s_register_operand" "=w") | |
2085 | (fma:VCVTF | |
2086 | (float_extend:VCVTF | |
2087 | (vec_select:<VFMLSEL> | |
2088 | (match_operand:<VFML> 2 "s_register_operand" "<VF_constraint>") | |
2089 | (match_operand:<VFML> 4 "vect_par_constant_high" ""))) | |
2090 | (float_extend:VCVTF | |
2091 | (vec_select:<VFMLSEL> | |
2092 | (match_operand:<VFML> 3 "s_register_operand" "<VF_constraint>") | |
2093 | (match_operand:<VFML> 5 "vect_par_constant_high" ""))) | |
2094 | (match_operand:VCVTF 1 "s_register_operand" "0")))] | |
2095 | "TARGET_FP16FML" | |
2096 | "vfmal.f16\\t%<V_reg>0, %<V_hi>2, %<V_hi>3" | |
2097 | [(set_attr "type" "neon_fp_mla_s<q>")] | |
2098 | ) | |
2099 | ||
2100 | (define_insn "vfmsl_low<mode>_intrinsic" | |
2101 | [(set (match_operand:VCVTF 0 "s_register_operand" "=w") | |
2102 | (fma:VCVTF | |
2103 | (float_extend:VCVTF | |
2104 | (neg:<VFMLSEL> | |
2105 | (vec_select:<VFMLSEL> | |
2106 | (match_operand:<VFML> 2 "s_register_operand" "<VF_constraint>") | |
2107 | (match_operand:<VFML> 4 "vect_par_constant_low" "")))) | |
2108 | (float_extend:VCVTF | |
2109 | (vec_select:<VFMLSEL> | |
2110 | (match_operand:<VFML> 3 "s_register_operand" "<VF_constraint>") | |
2111 | (match_operand:<VFML> 5 "vect_par_constant_low" ""))) | |
2112 | (match_operand:VCVTF 1 "s_register_operand" "0")))] | |
2113 | "TARGET_FP16FML" | |
2114 | "vfmsl.f16\\t%<V_reg>0, %<V_lo>2, %<V_lo>3" | |
2115 | [(set_attr "type" "neon_fp_mla_s<q>")] | |
2116 | ) | |
2117 | ||
eccf4d70 KT |
2118 | (define_expand "neon_vfm<vfml_op>l_lane_<vfml_half><VCVTF:mode>" |
2119 | [(set:VCVTF (match_operand:VCVTF 0 "s_register_operand") | |
2120 | (unspec:VCVTF | |
2121 | [(match_operand:VCVTF 1 "s_register_operand") | |
2122 | (PLUSMINUS:<VFML> | |
2123 | (match_operand:<VFML> 2 "s_register_operand") | |
2124 | (match_operand:<VFML> 3 "s_register_operand")) | |
2125 | (match_operand:SI 4 "const_int_operand")] VFMLHALVES))] | |
2126 | "TARGET_FP16FML" | |
2127 | { | |
2128 | rtx lane = GEN_INT (NEON_ENDIAN_LANE_N (<VFML>mode, INTVAL (operands[4]))); | |
2129 | rtx half = arm_simd_vect_par_cnst_half (<VFML>mode, <vfml_half_selector>); | |
2130 | emit_insn (gen_vfm<vfml_op>l_lane_<vfml_half><mode>_intrinsic | |
2131 | (operands[0], operands[1], | |
2132 | operands[2], operands[3], | |
2133 | half, lane)); | |
2134 | DONE; | |
2135 | }) | |
2136 | ||
2137 | (define_insn "vfmal_lane_low<mode>_intrinsic" | |
2138 | [(set (match_operand:VCVTF 0 "s_register_operand" "=w") | |
2139 | (fma:VCVTF | |
2140 | (float_extend:VCVTF | |
2141 | (vec_select:<VFMLSEL> | |
2142 | (match_operand:<VFML> 2 "s_register_operand" "<VF_constraint>") | |
2143 | (match_operand:<VFML> 4 "vect_par_constant_low" ""))) | |
2144 | (float_extend:VCVTF | |
2145 | (vec_duplicate:<VFMLSEL> | |
2146 | (vec_select:HF | |
2147 | (match_operand:<VFML> 3 "s_register_operand" "x") | |
2148 | (parallel [(match_operand:SI 5 "const_int_operand" "n")])))) | |
2149 | (match_operand:VCVTF 1 "s_register_operand" "0")))] | |
2150 | "TARGET_FP16FML" | |
2151 | { | |
2152 | int lane = NEON_ENDIAN_LANE_N (<VFML>mode, INTVAL (operands[5])); | |
2153 | if (lane > GET_MODE_NUNITS (<VFMLSEL>mode) - 1) | |
2154 | { | |
2155 | operands[5] = GEN_INT (lane - GET_MODE_NUNITS (<VFMLSEL>mode)); | |
2156 | return "vfmal.f16\\t%<V_reg>0, %<V_lo>2, %<V_hi>3[%c5]"; | |
2157 | } | |
2158 | else | |
2159 | { | |
2160 | operands[5] = GEN_INT (lane); | |
2161 | return "vfmal.f16\\t%<V_reg>0, %<V_lo>2, %<V_lo>3[%c5]"; | |
2162 | } | |
2163 | } | |
2164 | [(set_attr "type" "neon_fp_mla_s<q>")] | |
2165 | ) | |
2166 | ||
2167 | (define_expand "neon_vfm<vfml_op>l_lane_<vfml_half><vfmlsel2><mode>" | |
2168 | [(set:VCVTF (match_operand:VCVTF 0 "s_register_operand") | |
2169 | (unspec:VCVTF | |
2170 | [(match_operand:VCVTF 1 "s_register_operand") | |
2171 | (PLUSMINUS:<VFML> | |
2172 | (match_operand:<VFML> 2 "s_register_operand") | |
2173 | (match_operand:<VFMLSEL2> 3 "s_register_operand")) | |
2174 | (match_operand:SI 4 "const_int_operand")] VFMLHALVES))] | |
2175 | "TARGET_FP16FML" | |
2176 | { | |
2177 | rtx lane | |
2178 | = GEN_INT (NEON_ENDIAN_LANE_N (<VFMLSEL2>mode, INTVAL (operands[4]))); | |
2179 | rtx half = arm_simd_vect_par_cnst_half (<VFML>mode, <vfml_half_selector>); | |
2180 | emit_insn (gen_vfm<vfml_op>l_lane_<vfml_half><vfmlsel2><mode>_intrinsic | |
2181 | (operands[0], operands[1], operands[2], operands[3], | |
2182 | half, lane)); | |
2183 | DONE; | |
2184 | }) | |
2185 | ||
2186 | ;; Used to implement the intrinsics: | |
99cf78cf TC |
2187 | ;; float32x4_t vfmlalq_lane_low_f16 (float32x4_t r, float16x8_t a, float16x4_t b, const int lane) |
2188 | ;; float32x2_t vfmlal_laneq_low_f16 (float32x2_t r, float16x4_t a, float16x8_t b, const int lane) | |
eccf4d70 KT |
2189 | ;; Needs a bit of care to get the modes of the different sub-expressions right |
2190 | ;; due to 'a' and 'b' having different sizes and make sure we use the right | |
2191 | ;; S or D subregister to select the appropriate lane from. | |
2192 | ||
2193 | (define_insn "vfmal_lane_low<vfmlsel2><mode>_intrinsic" | |
2194 | [(set (match_operand:VCVTF 0 "s_register_operand" "=w") | |
2195 | (fma:VCVTF | |
2196 | (float_extend:VCVTF | |
2197 | (vec_select:<VFMLSEL> | |
2198 | (match_operand:<VFML> 2 "s_register_operand" "<VF_constraint>") | |
2199 | (match_operand:<VFML> 4 "vect_par_constant_low" ""))) | |
2200 | (float_extend:VCVTF | |
2201 | (vec_duplicate:<VFMLSEL> | |
2202 | (vec_select:HF | |
2203 | (match_operand:<VFMLSEL2> 3 "s_register_operand" "x") | |
2204 | (parallel [(match_operand:SI 5 "const_int_operand" "n")])))) | |
2205 | (match_operand:VCVTF 1 "s_register_operand" "0")))] | |
2206 | "TARGET_FP16FML" | |
2207 | { | |
2208 | int lane = NEON_ENDIAN_LANE_N (<VFMLSEL2>mode, INTVAL (operands[5])); | |
2209 | int elts_per_reg = GET_MODE_NUNITS (<VFMLSEL>mode); | |
2210 | int new_lane = lane % elts_per_reg; | |
2211 | int regdiff = lane / elts_per_reg; | |
2212 | operands[5] = GEN_INT (new_lane); | |
2213 | /* We re-create operands[2] and operands[3] in the halved VFMLSEL modes | |
2214 | because we want the print_operand code to print the appropriate | |
2215 | S or D register prefix. */ | |
2216 | operands[3] = gen_rtx_REG (<VFMLSEL>mode, REGNO (operands[3]) + regdiff); | |
2217 | operands[2] = gen_rtx_REG (<VFMLSEL>mode, REGNO (operands[2])); | |
2218 | return "vfmal.f16\\t%<V_reg>0, %<V_lane_reg>2, %<V_lane_reg>3[%c5]"; | |
2219 | } | |
2220 | [(set_attr "type" "neon_fp_mla_s<q>")] | |
2221 | ) | |
2222 | ||
2223 | ;; Used to implement the intrinsics: | |
99cf78cf TC |
2224 | ;; float32x4_t vfmlalq_lane_high_f16 (float32x4_t r, float16x8_t a, float16x4_t b, const int lane) |
2225 | ;; float32x2_t vfmlal_laneq_high_f16 (float32x2_t r, float16x4_t a, float16x8_t b, const int lane) | |
eccf4d70 KT |
2226 | ;; Needs a bit of care to get the modes of the different sub-expressions right |
2227 | ;; due to 'a' and 'b' having different sizes and make sure we use the right | |
2228 | ;; S or D subregister to select the appropriate lane from. | |
2229 | ||
2230 | (define_insn "vfmal_lane_high<vfmlsel2><mode>_intrinsic" | |
2231 | [(set (match_operand:VCVTF 0 "s_register_operand" "=w") | |
2232 | (fma:VCVTF | |
2233 | (float_extend:VCVTF | |
2234 | (vec_select:<VFMLSEL> | |
2235 | (match_operand:<VFML> 2 "s_register_operand" "<VF_constraint>") | |
2236 | (match_operand:<VFML> 4 "vect_par_constant_high" ""))) | |
2237 | (float_extend:VCVTF | |
2238 | (vec_duplicate:<VFMLSEL> | |
2239 | (vec_select:HF | |
2240 | (match_operand:<VFMLSEL2> 3 "s_register_operand" "x") | |
2241 | (parallel [(match_operand:SI 5 "const_int_operand" "n")])))) | |
2242 | (match_operand:VCVTF 1 "s_register_operand" "0")))] | |
2243 | "TARGET_FP16FML" | |
2244 | { | |
2245 | int lane = NEON_ENDIAN_LANE_N (<VFMLSEL2>mode, INTVAL (operands[5])); | |
2246 | int elts_per_reg = GET_MODE_NUNITS (<VFMLSEL>mode); | |
2247 | int new_lane = lane % elts_per_reg; | |
2248 | int regdiff = lane / elts_per_reg; | |
2249 | operands[5] = GEN_INT (new_lane); | |
2250 | /* We re-create operands[3] in the halved VFMLSEL mode | |
2251 | because we've calculated the correct half-width subreg to extract | |
2252 | the lane from and we want to print *that* subreg instead. */ | |
2253 | operands[3] = gen_rtx_REG (<VFMLSEL>mode, REGNO (operands[3]) + regdiff); | |
2254 | return "vfmal.f16\\t%<V_reg>0, %<V_hi>2, %<V_lane_reg>3[%c5]"; | |
2255 | } | |
2256 | [(set_attr "type" "neon_fp_mla_s<q>")] | |
2257 | ) | |
2258 | ||
2259 | (define_insn "vfmal_lane_high<mode>_intrinsic" | |
2260 | [(set (match_operand:VCVTF 0 "s_register_operand" "=w") | |
2261 | (fma:VCVTF | |
2262 | (float_extend:VCVTF | |
2263 | (vec_select:<VFMLSEL> | |
2264 | (match_operand:<VFML> 2 "s_register_operand" "<VF_constraint>") | |
2265 | (match_operand:<VFML> 4 "vect_par_constant_high" ""))) | |
2266 | (float_extend:VCVTF | |
2267 | (vec_duplicate:<VFMLSEL> | |
2268 | (vec_select:HF | |
2269 | (match_operand:<VFML> 3 "s_register_operand" "x") | |
2270 | (parallel [(match_operand:SI 5 "const_int_operand" "n")])))) | |
2271 | (match_operand:VCVTF 1 "s_register_operand" "0")))] | |
2272 | "TARGET_FP16FML" | |
2273 | { | |
2274 | int lane = NEON_ENDIAN_LANE_N (<VFML>mode, INTVAL (operands[5])); | |
2275 | if (lane > GET_MODE_NUNITS (<VFMLSEL>mode) - 1) | |
2276 | { | |
2277 | operands[5] = GEN_INT (lane - GET_MODE_NUNITS (<VFMLSEL>mode)); | |
2278 | return "vfmal.f16\\t%<V_reg>0, %<V_hi>2, %<V_hi>3[%c5]"; | |
2279 | } | |
2280 | else | |
2281 | { | |
2282 | operands[5] = GEN_INT (lane); | |
2283 | return "vfmal.f16\\t%<V_reg>0, %<V_hi>2, %<V_lo>3[%c5]"; | |
2284 | } | |
2285 | } | |
2286 | [(set_attr "type" "neon_fp_mla_s<q>")] | |
2287 | ) | |
2288 | ||
2289 | (define_insn "vfmsl_lane_low<mode>_intrinsic" | |
2290 | [(set (match_operand:VCVTF 0 "s_register_operand" "=w") | |
2291 | (fma:VCVTF | |
2292 | (float_extend:VCVTF | |
2293 | (neg:<VFMLSEL> | |
2294 | (vec_select:<VFMLSEL> | |
2295 | (match_operand:<VFML> 2 "s_register_operand" "<VF_constraint>") | |
2296 | (match_operand:<VFML> 4 "vect_par_constant_low" "")))) | |
2297 | (float_extend:VCVTF | |
2298 | (vec_duplicate:<VFMLSEL> | |
2299 | (vec_select:HF | |
2300 | (match_operand:<VFML> 3 "s_register_operand" "x") | |
2301 | (parallel [(match_operand:SI 5 "const_int_operand" "n")])))) | |
2302 | (match_operand:VCVTF 1 "s_register_operand" "0")))] | |
2303 | "TARGET_FP16FML" | |
2304 | { | |
2305 | int lane = NEON_ENDIAN_LANE_N (<VFML>mode, INTVAL (operands[5])); | |
2306 | if (lane > GET_MODE_NUNITS (<VFMLSEL>mode) - 1) | |
2307 | { | |
2308 | operands[5] = GEN_INT (lane - GET_MODE_NUNITS (<VFMLSEL>mode)); | |
2309 | return "vfmsl.f16\\t%<V_reg>0, %<V_lo>2, %<V_hi>3[%c5]"; | |
2310 | } | |
2311 | else | |
2312 | { | |
2313 | operands[5] = GEN_INT (lane); | |
2314 | return "vfmsl.f16\\t%<V_reg>0, %<V_lo>2, %<V_lo>3[%c5]"; | |
2315 | } | |
2316 | } | |
2317 | [(set_attr "type" "neon_fp_mla_s<q>")] | |
2318 | ) | |
2319 | ||
2320 | ;; Used to implement the intrinsics: | |
99cf78cf TC |
2321 | ;; float32x4_t vfmlslq_lane_low_f16 (float32x4_t r, float16x8_t a, float16x4_t b, const int lane) |
2322 | ;; float32x2_t vfmlsl_laneq_low_f16 (float32x2_t r, float16x4_t a, float16x8_t b, const int lane) | |
eccf4d70 KT |
2323 | ;; Needs a bit of care to get the modes of the different sub-expressions right |
2324 | ;; due to 'a' and 'b' having different sizes and make sure we use the right | |
2325 | ;; S or D subregister to select the appropriate lane from. | |
2326 | ||
2327 | (define_insn "vfmsl_lane_low<vfmlsel2><mode>_intrinsic" | |
2328 | [(set (match_operand:VCVTF 0 "s_register_operand" "=w") | |
2329 | (fma:VCVTF | |
2330 | (float_extend:VCVTF | |
2331 | (neg:<VFMLSEL> | |
2332 | (vec_select:<VFMLSEL> | |
2333 | (match_operand:<VFML> 2 "s_register_operand" "<VF_constraint>") | |
2334 | (match_operand:<VFML> 4 "vect_par_constant_low" "")))) | |
2335 | (float_extend:VCVTF | |
2336 | (vec_duplicate:<VFMLSEL> | |
2337 | (vec_select:HF | |
2338 | (match_operand:<VFMLSEL2> 3 "s_register_operand" "x") | |
2339 | (parallel [(match_operand:SI 5 "const_int_operand" "n")])))) | |
2340 | (match_operand:VCVTF 1 "s_register_operand" "0")))] | |
2341 | "TARGET_FP16FML" | |
2342 | { | |
2343 | int lane = NEON_ENDIAN_LANE_N (<VFMLSEL2>mode, INTVAL (operands[5])); | |
2344 | int elts_per_reg = GET_MODE_NUNITS (<VFMLSEL>mode); | |
2345 | int new_lane = lane % elts_per_reg; | |
2346 | int regdiff = lane / elts_per_reg; | |
2347 | operands[5] = GEN_INT (new_lane); | |
2348 | /* We re-create operands[2] and operands[3] in the halved VFMLSEL modes | |
2349 | because we want the print_operand code to print the appropriate | |
2350 | S or D register prefix. */ | |
2351 | operands[3] = gen_rtx_REG (<VFMLSEL>mode, REGNO (operands[3]) + regdiff); | |
2352 | operands[2] = gen_rtx_REG (<VFMLSEL>mode, REGNO (operands[2])); | |
2353 | return "vfmsl.f16\\t%<V_reg>0, %<V_lane_reg>2, %<V_lane_reg>3[%c5]"; | |
2354 | } | |
2355 | [(set_attr "type" "neon_fp_mla_s<q>")] | |
2356 | ) | |
2357 | ||
2358 | ;; Used to implement the intrinsics: | |
99cf78cf TC |
2359 | ;; float32x4_t vfmlslq_lane_high_f16 (float32x4_t r, float16x8_t a, float16x4_t b, const int lane) |
2360 | ;; float32x2_t vfmlsl_laneq_high_f16 (float32x2_t r, float16x4_t a, float16x8_t b, const int lane) | |
eccf4d70 KT |
2361 | ;; Needs a bit of care to get the modes of the different sub-expressions right |
2362 | ;; due to 'a' and 'b' having different sizes and make sure we use the right | |
2363 | ;; S or D subregister to select the appropriate lane from. | |
2364 | ||
2365 | (define_insn "vfmsl_lane_high<vfmlsel2><mode>_intrinsic" | |
2366 | [(set (match_operand:VCVTF 0 "s_register_operand" "=w") | |
2367 | (fma:VCVTF | |
2368 | (float_extend:VCVTF | |
2369 | (neg:<VFMLSEL> | |
2370 | (vec_select:<VFMLSEL> | |
2371 | (match_operand:<VFML> 2 "s_register_operand" "<VF_constraint>") | |
2372 | (match_operand:<VFML> 4 "vect_par_constant_high" "")))) | |
2373 | (float_extend:VCVTF | |
2374 | (vec_duplicate:<VFMLSEL> | |
2375 | (vec_select:HF | |
2376 | (match_operand:<VFMLSEL2> 3 "s_register_operand" "x") | |
2377 | (parallel [(match_operand:SI 5 "const_int_operand" "n")])))) | |
2378 | (match_operand:VCVTF 1 "s_register_operand" "0")))] | |
2379 | "TARGET_FP16FML" | |
2380 | { | |
2381 | int lane = NEON_ENDIAN_LANE_N (<VFMLSEL2>mode, INTVAL (operands[5])); | |
2382 | int elts_per_reg = GET_MODE_NUNITS (<VFMLSEL>mode); | |
2383 | int new_lane = lane % elts_per_reg; | |
2384 | int regdiff = lane / elts_per_reg; | |
2385 | operands[5] = GEN_INT (new_lane); | |
2386 | /* We re-create operands[3] in the halved VFMLSEL mode | |
2387 | because we've calculated the correct half-width subreg to extract | |
2388 | the lane from and we want to print *that* subreg instead. */ | |
2389 | operands[3] = gen_rtx_REG (<VFMLSEL>mode, REGNO (operands[3]) + regdiff); | |
2390 | return "vfmsl.f16\\t%<V_reg>0, %<V_hi>2, %<V_lane_reg>3[%c5]"; | |
2391 | } | |
2392 | [(set_attr "type" "neon_fp_mla_s<q>")] | |
2393 | ) | |
2394 | ||
2395 | (define_insn "vfmsl_lane_high<mode>_intrinsic" | |
2396 | [(set (match_operand:VCVTF 0 "s_register_operand" "=w") | |
2397 | (fma:VCVTF | |
2398 | (float_extend:VCVTF | |
2399 | (neg:<VFMLSEL> | |
2400 | (vec_select:<VFMLSEL> | |
2401 | (match_operand:<VFML> 2 "s_register_operand" "<VF_constraint>") | |
2402 | (match_operand:<VFML> 4 "vect_par_constant_high" "")))) | |
2403 | (float_extend:VCVTF | |
2404 | (vec_duplicate:<VFMLSEL> | |
2405 | (vec_select:HF | |
2406 | (match_operand:<VFML> 3 "s_register_operand" "x") | |
2407 | (parallel [(match_operand:SI 5 "const_int_operand" "n")])))) | |
2408 | (match_operand:VCVTF 1 "s_register_operand" "0")))] | |
2409 | "TARGET_FP16FML" | |
2410 | { | |
2411 | int lane = NEON_ENDIAN_LANE_N (<VFML>mode, INTVAL (operands[5])); | |
2412 | if (lane > GET_MODE_NUNITS (<VFMLSEL>mode) - 1) | |
2413 | { | |
2414 | operands[5] = GEN_INT (lane - GET_MODE_NUNITS (<VFMLSEL>mode)); | |
2415 | return "vfmsl.f16\\t%<V_reg>0, %<V_hi>2, %<V_hi>3[%c5]"; | |
2416 | } | |
2417 | else | |
2418 | { | |
2419 | operands[5] = GEN_INT (lane); | |
2420 | return "vfmsl.f16\\t%<V_reg>0, %<V_hi>2, %<V_lo>3[%c5]"; | |
2421 | } | |
2422 | } | |
2423 | [(set_attr "type" "neon_fp_mla_s<q>")] | |
2424 | ) | |
2425 | ||
bab53516 SL |
2426 | ; Used for intrinsics when flag_unsafe_math_optimizations is false. |
2427 | ||
2428 | (define_insn "neon_vmla<mode>_unspec" | |
f7379e5e JG |
2429 | [(set (match_operand:VDQW 0 "s_register_operand" "=w") |
2430 | (unspec:VDQW [(match_operand:VDQW 1 "s_register_operand" "0") | |
2431 | (match_operand:VDQW 2 "s_register_operand" "w") | |
2432 | (match_operand:VDQW 3 "s_register_operand" "w")] | |
bab53516 | 2433 | UNSPEC_VMLA))] |
88f77cba | 2434 | "TARGET_NEON" |
c956e102 | 2435 | "vmla.<V_if_elem>\t%<V_reg>0, %<V_reg>2, %<V_reg>3" |
003bb7f3 | 2436 | [(set (attr "type") |
b75b1be2 | 2437 | (if_then_else (match_test "<Is_float_mode>") |
f7379e5e JG |
2438 | (const_string "neon_fp_mla_s<q>") |
2439 | (const_string "neon_mla_<V_elem_ch><q>")))] | |
c956e102 | 2440 | ) |
88f77cba | 2441 | |
94f0f2cc | 2442 | (define_insn "neon_vmlal<sup><mode>" |
88f77cba JB |
2443 | [(set (match_operand:<V_widen> 0 "s_register_operand" "=w") |
2444 | (unspec:<V_widen> [(match_operand:<V_widen> 1 "s_register_operand" "0") | |
2445 | (match_operand:VW 2 "s_register_operand" "w") | |
94f0f2cc JG |
2446 | (match_operand:VW 3 "s_register_operand" "w")] |
2447 | VMLAL))] | |
88f77cba | 2448 | "TARGET_NEON" |
94f0f2cc | 2449 | "vmlal.<sup>%#<V_sz_elem>\t%q0, %P2, %P3" |
f7379e5e | 2450 | [(set_attr "type" "neon_mla_<V_elem_ch>_long")] |
c956e102 | 2451 | ) |
88f77cba | 2452 | |
bab53516 | 2453 | (define_expand "neon_vmls<mode>" |
cd65e265 DZ |
2454 | [(match_operand:VDQW 0 "s_register_operand") |
2455 | (match_operand:VDQW 1 "s_register_operand") | |
2456 | (match_operand:VDQW 2 "s_register_operand") | |
2457 | (match_operand:VDQW 3 "s_register_operand")] | |
bab53516 SL |
2458 | "TARGET_NEON" |
2459 | { | |
2460 | if (!<Is_float_mode> || flag_unsafe_math_optimizations) | |
2461 | emit_insn (gen_mul<mode>3neg<mode>add<mode>_neon (operands[0], | |
2462 | operands[1], operands[2], operands[3])); | |
2463 | else | |
2464 | emit_insn (gen_neon_vmls<mode>_unspec (operands[0], operands[1], | |
2465 | operands[2], operands[3])); | |
2466 | DONE; | |
2467 | }) | |
2468 | ||
2469 | ; Used for intrinsics when flag_unsafe_math_optimizations is false. | |
2470 | ||
2471 | (define_insn "neon_vmls<mode>_unspec" | |
f7379e5e JG |
2472 | [(set (match_operand:VDQW 0 "s_register_operand" "=w") |
2473 | (unspec:VDQW [(match_operand:VDQW 1 "s_register_operand" "0") | |
2474 | (match_operand:VDQW 2 "s_register_operand" "w") | |
2475 | (match_operand:VDQW 3 "s_register_operand" "w")] | |
bab53516 | 2476 | UNSPEC_VMLS))] |
88f77cba | 2477 | "TARGET_NEON" |
c956e102 | 2478 | "vmls.<V_if_elem>\t%<V_reg>0, %<V_reg>2, %<V_reg>3" |
003bb7f3 | 2479 | [(set (attr "type") |
b75b1be2 | 2480 | (if_then_else (match_test "<Is_float_mode>") |
f7379e5e JG |
2481 | (const_string "neon_fp_mla_s<q>") |
2482 | (const_string "neon_mla_<V_elem_ch><q>")))] | |
c956e102 | 2483 | ) |
88f77cba | 2484 | |
94f0f2cc | 2485 | (define_insn "neon_vmlsl<sup><mode>" |
88f77cba JB |
2486 | [(set (match_operand:<V_widen> 0 "s_register_operand" "=w") |
2487 | (unspec:<V_widen> [(match_operand:<V_widen> 1 "s_register_operand" "0") | |
2488 | (match_operand:VW 2 "s_register_operand" "w") | |
94f0f2cc JG |
2489 | (match_operand:VW 3 "s_register_operand" "w")] |
2490 | VMLSL))] | |
88f77cba | 2491 | "TARGET_NEON" |
94f0f2cc | 2492 | "vmlsl.<sup>%#<V_sz_elem>\t%q0, %P2, %P3" |
f7379e5e | 2493 | [(set_attr "type" "neon_mla_<V_elem_ch>_long")] |
c956e102 | 2494 | ) |
88f77cba | 2495 | |
94f0f2cc JG |
2496 | ;; vqdmulh, vqrdmulh |
2497 | (define_insn "neon_vq<r>dmulh<mode>" | |
88f77cba JB |
2498 | [(set (match_operand:VMDQI 0 "s_register_operand" "=w") |
2499 | (unspec:VMDQI [(match_operand:VMDQI 1 "s_register_operand" "w") | |
94f0f2cc JG |
2500 | (match_operand:VMDQI 2 "s_register_operand" "w")] |
2501 | VQDMULH))] | |
88f77cba | 2502 | "TARGET_NEON" |
94f0f2cc | 2503 | "vq<r>dmulh.<V_s_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2" |
f7379e5e | 2504 | [(set_attr "type" "neon_sat_mul_<V_elem_ch><q>")] |
c956e102 | 2505 | ) |
88f77cba | 2506 | |
5f2ca3b2 MW |
2507 | ;; vqrdmlah, vqrdmlsh |
2508 | (define_insn "neon_vqrdml<VQRDMLH_AS:neon_rdma_as>h<mode>" | |
2509 | [(set (match_operand:VMDQI 0 "s_register_operand" "=w") | |
2510 | (unspec:VMDQI [(match_operand:VMDQI 1 "s_register_operand" "0") | |
2511 | (match_operand:VMDQI 2 "s_register_operand" "w") | |
2512 | (match_operand:VMDQI 3 "s_register_operand" "w")] | |
2513 | VQRDMLH_AS))] | |
2514 | "TARGET_NEON_RDMA" | |
2515 | "vqrdml<VQRDMLH_AS:neon_rdma_as>h.<V_s_elem>\t%<V_reg>0, %<V_reg>2, %<V_reg>3" | |
2516 | [(set_attr "type" "neon_sat_mla_<V_elem_ch>_long")] | |
2517 | ) | |
2518 | ||
88f77cba JB |
2519 | (define_insn "neon_vqdmlal<mode>" |
2520 | [(set (match_operand:<V_widen> 0 "s_register_operand" "=w") | |
2521 | (unspec:<V_widen> [(match_operand:<V_widen> 1 "s_register_operand" "0") | |
2522 | (match_operand:VMDI 2 "s_register_operand" "w") | |
94f0f2cc | 2523 | (match_operand:VMDI 3 "s_register_operand" "w")] |
88f77cba JB |
2524 | UNSPEC_VQDMLAL))] |
2525 | "TARGET_NEON" | |
c956e102 | 2526 | "vqdmlal.<V_s_elem>\t%q0, %P2, %P3" |
f7379e5e | 2527 | [(set_attr "type" "neon_sat_mla_<V_elem_ch>_long")] |
c956e102 | 2528 | ) |
88f77cba JB |
2529 | |
2530 | (define_insn "neon_vqdmlsl<mode>" | |
2531 | [(set (match_operand:<V_widen> 0 "s_register_operand" "=w") | |
2532 | (unspec:<V_widen> [(match_operand:<V_widen> 1 "s_register_operand" "0") | |
2533 | (match_operand:VMDI 2 "s_register_operand" "w") | |
94f0f2cc | 2534 | (match_operand:VMDI 3 "s_register_operand" "w")] |
88f77cba JB |
2535 | UNSPEC_VQDMLSL))] |
2536 | "TARGET_NEON" | |
c956e102 | 2537 | "vqdmlsl.<V_s_elem>\t%q0, %P2, %P3" |
f7379e5e | 2538 | [(set_attr "type" "neon_sat_mla_<V_elem_ch>_long")] |
c956e102 | 2539 | ) |
88f77cba | 2540 | |
94f0f2cc | 2541 | (define_insn "neon_vmull<sup><mode>" |
88f77cba JB |
2542 | [(set (match_operand:<V_widen> 0 "s_register_operand" "=w") |
2543 | (unspec:<V_widen> [(match_operand:VW 1 "s_register_operand" "w") | |
94f0f2cc JG |
2544 | (match_operand:VW 2 "s_register_operand" "w")] |
2545 | VMULL))] | |
88f77cba | 2546 | "TARGET_NEON" |
94f0f2cc | 2547 | "vmull.<sup>%#<V_sz_elem>\t%q0, %P1, %P2" |
f7379e5e | 2548 | [(set_attr "type" "neon_mul_<V_elem_ch>_long")] |
c956e102 | 2549 | ) |
88f77cba JB |
2550 | |
2551 | (define_insn "neon_vqdmull<mode>" | |
2552 | [(set (match_operand:<V_widen> 0 "s_register_operand" "=w") | |
2553 | (unspec:<V_widen> [(match_operand:VMDI 1 "s_register_operand" "w") | |
94f0f2cc | 2554 | (match_operand:VMDI 2 "s_register_operand" "w")] |
88f77cba JB |
2555 | UNSPEC_VQDMULL))] |
2556 | "TARGET_NEON" | |
c956e102 | 2557 | "vqdmull.<V_s_elem>\t%q0, %P1, %P2" |
f7379e5e | 2558 | [(set_attr "type" "neon_sat_mul_<V_elem_ch>_long")] |
c956e102 | 2559 | ) |
88f77cba | 2560 | |
bab53516 | 2561 | (define_expand "neon_vsub<mode>" |
cd65e265 DZ |
2562 | [(match_operand:VCVTF 0 "s_register_operand") |
2563 | (match_operand:VCVTF 1 "s_register_operand") | |
2564 | (match_operand:VCVTF 2 "s_register_operand")] | |
bab53516 SL |
2565 | "TARGET_NEON" |
2566 | { | |
2567 | if (!<Is_float_mode> || flag_unsafe_math_optimizations) | |
2568 | emit_insn (gen_sub<mode>3 (operands[0], operands[1], operands[2])); | |
2569 | else | |
2570 | emit_insn (gen_neon_vsub<mode>_unspec (operands[0], operands[1], | |
2571 | operands[2])); | |
2572 | DONE; | |
2573 | }) | |
2574 | ||
2575 | ; Used for intrinsics when flag_unsafe_math_optimizations is false. | |
2576 | ||
2577 | (define_insn "neon_vsub<mode>_unspec" | |
0d0b79a6 RR |
2578 | [(set (match_operand:VCVTF 0 "s_register_operand" "=w") |
2579 | (unspec:VCVTF [(match_operand:VCVTF 1 "s_register_operand" "w") | |
2580 | (match_operand:VCVTF 2 "s_register_operand" "w")] | |
88f77cba JB |
2581 | UNSPEC_VSUB))] |
2582 | "TARGET_NEON" | |
c956e102 | 2583 | "vsub.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2" |
003bb7f3 | 2584 | [(set (attr "type") |
b75b1be2 | 2585 | (if_then_else (match_test "<Is_float_mode>") |
f7379e5e JG |
2586 | (const_string "neon_fp_addsub_s<q>") |
2587 | (const_string "neon_sub<q>")))] | |
c956e102 | 2588 | ) |
88f77cba | 2589 | |
94f0f2cc | 2590 | (define_insn "neon_vsubl<sup><mode>" |
88f77cba JB |
2591 | [(set (match_operand:<V_widen> 0 "s_register_operand" "=w") |
2592 | (unspec:<V_widen> [(match_operand:VDI 1 "s_register_operand" "w") | |
94f0f2cc JG |
2593 | (match_operand:VDI 2 "s_register_operand" "w")] |
2594 | VSUBL))] | |
88f77cba | 2595 | "TARGET_NEON" |
94f0f2cc | 2596 | "vsubl.<sup>%#<V_sz_elem>\t%q0, %P1, %P2" |
f7379e5e | 2597 | [(set_attr "type" "neon_sub_long")] |
c956e102 | 2598 | ) |
88f77cba | 2599 | |
94f0f2cc | 2600 | (define_insn "neon_vsubw<sup><mode>" |
88f77cba JB |
2601 | [(set (match_operand:<V_widen> 0 "s_register_operand" "=w") |
2602 | (unspec:<V_widen> [(match_operand:<V_widen> 1 "s_register_operand" "w") | |
94f0f2cc JG |
2603 | (match_operand:VDI 2 "s_register_operand" "w")] |
2604 | VSUBW))] | |
88f77cba | 2605 | "TARGET_NEON" |
94f0f2cc | 2606 | "vsubw.<sup>%#<V_sz_elem>\t%q0, %q1, %P2" |
f7379e5e | 2607 | [(set_attr "type" "neon_sub_widen")] |
c956e102 | 2608 | ) |
88f77cba | 2609 | |
94f0f2cc | 2610 | (define_insn "neon_vqsub<sup><mode>" |
88f77cba JB |
2611 | [(set (match_operand:VDQIX 0 "s_register_operand" "=w") |
2612 | (unspec:VDQIX [(match_operand:VDQIX 1 "s_register_operand" "w") | |
94f0f2cc JG |
2613 | (match_operand:VDQIX 2 "s_register_operand" "w")] |
2614 | VQSUB))] | |
88f77cba | 2615 | "TARGET_NEON" |
94f0f2cc | 2616 | "vqsub.<sup>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2" |
f7379e5e | 2617 | [(set_attr "type" "neon_qsub<q>")] |
c956e102 | 2618 | ) |
88f77cba | 2619 | |
94f0f2cc | 2620 | (define_insn "neon_vhsub<sup><mode>" |
88f77cba JB |
2621 | [(set (match_operand:VDQIW 0 "s_register_operand" "=w") |
2622 | (unspec:VDQIW [(match_operand:VDQIW 1 "s_register_operand" "w") | |
94f0f2cc JG |
2623 | (match_operand:VDQIW 2 "s_register_operand" "w")] |
2624 | VHSUB))] | |
88f77cba | 2625 | "TARGET_NEON" |
94f0f2cc | 2626 | "vhsub.<sup>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2" |
f7379e5e | 2627 | [(set_attr "type" "neon_sub_halve<q>")] |
c956e102 | 2628 | ) |
88f77cba | 2629 | |
94f0f2cc | 2630 | (define_insn "neon_v<r>subhn<mode>" |
88f77cba JB |
2631 | [(set (match_operand:<V_narrow> 0 "s_register_operand" "=w") |
2632 | (unspec:<V_narrow> [(match_operand:VN 1 "s_register_operand" "w") | |
94f0f2cc JG |
2633 | (match_operand:VN 2 "s_register_operand" "w")] |
2634 | VSUBHN))] | |
88f77cba | 2635 | "TARGET_NEON" |
94f0f2cc | 2636 | "v<r>subhn.<V_if_elem>\t%P0, %q1, %q2" |
f7379e5e | 2637 | [(set_attr "type" "neon_sub_halve_narrow_q")] |
c956e102 | 2638 | ) |
88f77cba | 2639 | |
381811fa KT |
2640 | ;; These may expand to an UNSPEC pattern when a floating point mode is used |
2641 | ;; without unsafe math optimizations. | |
2642 | (define_expand "neon_vc<cmp_op><mode>" | |
cd65e265 | 2643 | [(match_operand:<V_cmp_result> 0 "s_register_operand") |
381811fa | 2644 | (neg:<V_cmp_result> |
cd65e265 DZ |
2645 | (COMPARISONS:VDQW (match_operand:VDQW 1 "s_register_operand") |
2646 | (match_operand:VDQW 2 "reg_or_zero_operand")))] | |
88f77cba | 2647 | "TARGET_NEON" |
381811fa KT |
2648 | { |
2649 | /* For FP comparisons use UNSPECS unless -funsafe-math-optimizations | |
2650 | are enabled. */ | |
2651 | if (GET_MODE_CLASS (<MODE>mode) == MODE_VECTOR_FLOAT | |
2652 | && !flag_unsafe_math_optimizations) | |
2653 | { | |
2654 | /* We don't just emit a gen_neon_vc<cmp_op><mode>_insn_unspec because | |
2655 | we define gen_neon_vceq<mode>_insn_unspec only for float modes | |
2656 | whereas this expander iterates over the integer modes as well, | |
2657 | but we will never expand to UNSPECs for the integer comparisons. */ | |
2658 | switch (<MODE>mode) | |
2659 | { | |
4e10a5a7 | 2660 | case E_V2SFmode: |
381811fa KT |
2661 | emit_insn (gen_neon_vc<cmp_op>v2sf_insn_unspec (operands[0], |
2662 | operands[1], | |
2663 | operands[2])); | |
2664 | break; | |
4e10a5a7 | 2665 | case E_V4SFmode: |
381811fa KT |
2666 | emit_insn (gen_neon_vc<cmp_op>v4sf_insn_unspec (operands[0], |
2667 | operands[1], | |
2668 | operands[2])); | |
2669 | break; | |
2670 | default: | |
2671 | gcc_unreachable (); | |
2672 | } | |
2673 | } | |
2674 | else | |
2675 | emit_insn (gen_neon_vc<cmp_op><mode>_insn (operands[0], | |
2676 | operands[1], | |
2677 | operands[2])); | |
2678 | DONE; | |
2679 | } | |
c956e102 | 2680 | ) |
88f77cba | 2681 | |
381811fa | 2682 | (define_insn "neon_vc<cmp_op><mode>_insn" |
5bfc5baa | 2683 | [(set (match_operand:<V_cmp_result> 0 "s_register_operand" "=w,w") |
381811fa KT |
2684 | (neg:<V_cmp_result> |
2685 | (COMPARISONS:<V_cmp_result> | |
2686 | (match_operand:VDQW 1 "s_register_operand" "w,w") | |
2687 | (match_operand:VDQW 2 "reg_or_zero_operand" "w,Dz"))))] | |
2688 | "TARGET_NEON && !(GET_MODE_CLASS (<MODE>mode) == MODE_VECTOR_FLOAT | |
2689 | && !flag_unsafe_math_optimizations)" | |
2690 | { | |
2691 | char pattern[100]; | |
2692 | sprintf (pattern, "vc<cmp_op>.%s%%#<V_sz_elem>\t%%<V_reg>0," | |
2693 | " %%<V_reg>1, %s", | |
2694 | GET_MODE_CLASS (<MODE>mode) == MODE_VECTOR_FLOAT | |
2695 | ? "f" : "<cmp_type>", | |
2696 | which_alternative == 0 | |
2697 | ? "%<V_reg>2" : "#0"); | |
2698 | output_asm_insn (pattern, operands); | |
2699 | return ""; | |
2700 | } | |
003bb7f3 | 2701 | [(set (attr "type") |
381811fa | 2702 | (if_then_else (match_operand 2 "zero_operand") |
f7379e5e | 2703 | (const_string "neon_compare_zero<q>") |
381811fa | 2704 | (const_string "neon_compare<q>")))] |
c956e102 | 2705 | ) |
88f77cba | 2706 | |
381811fa | 2707 | (define_insn "neon_vc<cmp_op_unsp><mode>_insn_unspec" |
5bfc5baa JB |
2708 | [(set (match_operand:<V_cmp_result> 0 "s_register_operand" "=w,w") |
2709 | (unspec:<V_cmp_result> | |
381811fa KT |
2710 | [(match_operand:VCVTF 1 "s_register_operand" "w,w") |
2711 | (match_operand:VCVTF 2 "reg_or_zero_operand" "w,Dz")] | |
2712 | NEON_VCMP))] | |
88f77cba | 2713 | "TARGET_NEON" |
381811fa KT |
2714 | { |
2715 | char pattern[100]; | |
2716 | sprintf (pattern, "vc<cmp_op_unsp>.f%%#<V_sz_elem>\t%%<V_reg>0," | |
2717 | " %%<V_reg>1, %s", | |
2718 | which_alternative == 0 | |
2719 | ? "%<V_reg>2" : "#0"); | |
2720 | output_asm_insn (pattern, operands); | |
2721 | return ""; | |
2722 | } | |
2723 | [(set_attr "type" "neon_fp_compare_s<q>")] | |
c956e102 | 2724 | ) |
88f77cba | 2725 | |
55a9b91b MW |
2726 | (define_expand "neon_vc<cmp_op><mode>" |
2727 | [(match_operand:<V_cmp_result> 0 "s_register_operand") | |
2728 | (neg:<V_cmp_result> | |
2729 | (COMPARISONS:VH | |
2730 | (match_operand:VH 1 "s_register_operand") | |
2731 | (match_operand:VH 2 "reg_or_zero_operand")))] | |
2732 | "TARGET_NEON_FP16INST" | |
2733 | { | |
2734 | /* For FP comparisons use UNSPECS unless -funsafe-math-optimizations | |
2735 | are enabled. */ | |
2736 | if (GET_MODE_CLASS (<MODE>mode) == MODE_VECTOR_FLOAT | |
2737 | && !flag_unsafe_math_optimizations) | |
2738 | emit_insn | |
2739 | (gen_neon_vc<cmp_op><mode>_fp16insn_unspec | |
2740 | (operands[0], operands[1], operands[2])); | |
2741 | else | |
2742 | emit_insn | |
2743 | (gen_neon_vc<cmp_op><mode>_fp16insn | |
2744 | (operands[0], operands[1], operands[2])); | |
2745 | DONE; | |
2746 | }) | |
2747 | ||
2748 | (define_insn "neon_vc<cmp_op><mode>_fp16insn" | |
2749 | [(set (match_operand:<V_cmp_result> 0 "s_register_operand" "=w,w") | |
2750 | (neg:<V_cmp_result> | |
2751 | (COMPARISONS:<V_cmp_result> | |
2752 | (match_operand:VH 1 "s_register_operand" "w,w") | |
2753 | (match_operand:VH 2 "reg_or_zero_operand" "w,Dz"))))] | |
2754 | "TARGET_NEON_FP16INST | |
2755 | && !(GET_MODE_CLASS (<MODE>mode) == MODE_VECTOR_FLOAT | |
2756 | && !flag_unsafe_math_optimizations)" | |
2757 | { | |
2758 | char pattern[100]; | |
2759 | sprintf (pattern, "vc<cmp_op>.%s%%#<V_sz_elem>\t%%<V_reg>0," | |
2760 | " %%<V_reg>1, %s", | |
2761 | GET_MODE_CLASS (<MODE>mode) == MODE_VECTOR_FLOAT | |
2762 | ? "f" : "<cmp_type>", | |
2763 | which_alternative == 0 | |
2764 | ? "%<V_reg>2" : "#0"); | |
2765 | output_asm_insn (pattern, operands); | |
2766 | return ""; | |
2767 | } | |
2768 | [(set (attr "type") | |
2769 | (if_then_else (match_operand 2 "zero_operand") | |
2770 | (const_string "neon_compare_zero<q>") | |
2771 | (const_string "neon_compare<q>")))]) | |
2772 | ||
2773 | (define_insn "neon_vc<cmp_op_unsp><mode>_fp16insn_unspec" | |
2774 | [(set | |
2775 | (match_operand:<V_cmp_result> 0 "s_register_operand" "=w,w") | |
2776 | (unspec:<V_cmp_result> | |
2777 | [(match_operand:VH 1 "s_register_operand" "w,w") | |
2778 | (match_operand:VH 2 "reg_or_zero_operand" "w,Dz")] | |
2779 | NEON_VCMP))] | |
2780 | "TARGET_NEON_FP16INST" | |
2781 | { | |
2782 | char pattern[100]; | |
2783 | sprintf (pattern, "vc<cmp_op_unsp>.f%%#<V_sz_elem>\t%%<V_reg>0," | |
2784 | " %%<V_reg>1, %s", | |
2785 | which_alternative == 0 | |
2786 | ? "%<V_reg>2" : "#0"); | |
2787 | output_asm_insn (pattern, operands); | |
2788 | return ""; | |
2789 | } | |
2790 | [(set_attr "type" "neon_fp_compare_s<q>")]) | |
2791 | ||
381811fa | 2792 | (define_insn "neon_vc<cmp_op>u<mode>" |
fd92bb80 | 2793 | [(set (match_operand:<V_cmp_result> 0 "s_register_operand" "=w") |
381811fa KT |
2794 | (neg:<V_cmp_result> |
2795 | (GTUGEU:<V_cmp_result> | |
2796 | (match_operand:VDQIW 1 "s_register_operand" "w") | |
2797 | (match_operand:VDQIW 2 "s_register_operand" "w"))))] | |
fd92bb80 | 2798 | "TARGET_NEON" |
381811fa | 2799 | "vc<cmp_op>.u%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2" |
f7379e5e | 2800 | [(set_attr "type" "neon_compare<q>")] |
fd92bb80 MGD |
2801 | ) |
2802 | ||
381811fa KT |
2803 | (define_expand "neon_vca<cmp_op><mode>" |
2804 | [(set (match_operand:<V_cmp_result> 0 "s_register_operand") | |
2805 | (neg:<V_cmp_result> | |
2806 | (GTGE:<V_cmp_result> | |
2807 | (abs:VCVTF (match_operand:VCVTF 1 "s_register_operand")) | |
2808 | (abs:VCVTF (match_operand:VCVTF 2 "s_register_operand")))))] | |
5bfc5baa | 2809 | "TARGET_NEON" |
381811fa KT |
2810 | { |
2811 | if (flag_unsafe_math_optimizations) | |
2812 | emit_insn (gen_neon_vca<cmp_op><mode>_insn (operands[0], operands[1], | |
2813 | operands[2])); | |
2814 | else | |
2815 | emit_insn (gen_neon_vca<cmp_op><mode>_insn_unspec (operands[0], | |
2816 | operands[1], | |
2817 | operands[2])); | |
2818 | DONE; | |
2819 | } | |
5bfc5baa JB |
2820 | ) |
2821 | ||
381811fa | 2822 | (define_insn "neon_vca<cmp_op><mode>_insn" |
88f77cba | 2823 | [(set (match_operand:<V_cmp_result> 0 "s_register_operand" "=w") |
381811fa KT |
2824 | (neg:<V_cmp_result> |
2825 | (GTGE:<V_cmp_result> | |
2826 | (abs:VCVTF (match_operand:VCVTF 1 "s_register_operand" "w")) | |
2827 | (abs:VCVTF (match_operand:VCVTF 2 "s_register_operand" "w")))))] | |
2828 | "TARGET_NEON && flag_unsafe_math_optimizations" | |
2829 | "vac<cmp_op>.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2" | |
f7379e5e | 2830 | [(set_attr "type" "neon_fp_compare_s<q>")] |
c956e102 | 2831 | ) |
88f77cba | 2832 | |
381811fa | 2833 | (define_insn "neon_vca<cmp_op_unsp><mode>_insn_unspec" |
88f77cba JB |
2834 | [(set (match_operand:<V_cmp_result> 0 "s_register_operand" "=w") |
2835 | (unspec:<V_cmp_result> [(match_operand:VCVTF 1 "s_register_operand" "w") | |
94f0f2cc | 2836 | (match_operand:VCVTF 2 "s_register_operand" "w")] |
381811fa | 2837 | NEON_VACMP))] |
88f77cba | 2838 | "TARGET_NEON" |
381811fa | 2839 | "vac<cmp_op_unsp>.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2" |
f7379e5e | 2840 | [(set_attr "type" "neon_fp_compare_s<q>")] |
c956e102 | 2841 | ) |
88f77cba | 2842 | |
55a9b91b MW |
2843 | (define_expand "neon_vca<cmp_op><mode>" |
2844 | [(set | |
2845 | (match_operand:<V_cmp_result> 0 "s_register_operand") | |
2846 | (neg:<V_cmp_result> | |
2847 | (GLTE:<V_cmp_result> | |
2848 | (abs:VH (match_operand:VH 1 "s_register_operand")) | |
2849 | (abs:VH (match_operand:VH 2 "s_register_operand")))))] | |
2850 | "TARGET_NEON_FP16INST" | |
2851 | { | |
2852 | if (flag_unsafe_math_optimizations) | |
2853 | emit_insn (gen_neon_vca<cmp_op><mode>_fp16insn | |
2854 | (operands[0], operands[1], operands[2])); | |
2855 | else | |
2856 | emit_insn (gen_neon_vca<cmp_op><mode>_fp16insn_unspec | |
2857 | (operands[0], operands[1], operands[2])); | |
2858 | DONE; | |
2859 | }) | |
2860 | ||
2861 | (define_insn "neon_vca<cmp_op><mode>_fp16insn" | |
2862 | [(set | |
2863 | (match_operand:<V_cmp_result> 0 "s_register_operand" "=w") | |
2864 | (neg:<V_cmp_result> | |
2865 | (GLTE:<V_cmp_result> | |
2866 | (abs:VH (match_operand:VH 1 "s_register_operand" "w")) | |
2867 | (abs:VH (match_operand:VH 2 "s_register_operand" "w")))))] | |
2868 | "TARGET_NEON_FP16INST && flag_unsafe_math_optimizations" | |
2869 | "vac<cmp_op>.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2" | |
2870 | [(set_attr "type" "neon_fp_compare_s<q>")] | |
2871 | ) | |
2872 | ||
2873 | (define_insn "neon_vca<cmp_op_unsp><mode>_fp16insn_unspec" | |
2874 | [(set (match_operand:<V_cmp_result> 0 "s_register_operand" "=w") | |
2875 | (unspec:<V_cmp_result> | |
2876 | [(match_operand:VH 1 "s_register_operand" "w") | |
2877 | (match_operand:VH 2 "s_register_operand" "w")] | |
2878 | NEON_VAGLTE))] | |
2879 | "TARGET_NEON" | |
2880 | "vac<cmp_op_unsp>.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2" | |
2881 | [(set_attr "type" "neon_fp_compare_s<q>")] | |
2882 | ) | |
2883 | ||
2884 | (define_expand "neon_vc<cmp_op>z<mode>" | |
2885 | [(set | |
2886 | (match_operand:<V_cmp_result> 0 "s_register_operand") | |
2887 | (COMPARISONS:<V_cmp_result> | |
2888 | (match_operand:VH 1 "s_register_operand") | |
2889 | (const_int 0)))] | |
2890 | "TARGET_NEON_FP16INST" | |
2891 | { | |
2892 | emit_insn (gen_neon_vc<cmp_op><mode> (operands[0], operands[1], | |
2893 | CONST0_RTX (<MODE>mode))); | |
2894 | DONE; | |
2895 | }) | |
2896 | ||
88f77cba JB |
2897 | (define_insn "neon_vtst<mode>" |
2898 | [(set (match_operand:VDQIW 0 "s_register_operand" "=w") | |
2899 | (unspec:VDQIW [(match_operand:VDQIW 1 "s_register_operand" "w") | |
94f0f2cc | 2900 | (match_operand:VDQIW 2 "s_register_operand" "w")] |
88f77cba JB |
2901 | UNSPEC_VTST))] |
2902 | "TARGET_NEON" | |
c956e102 | 2903 | "vtst.<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2" |
f7379e5e | 2904 | [(set_attr "type" "neon_tst<q>")] |
c956e102 | 2905 | ) |
88f77cba | 2906 | |
94f0f2cc JG |
2907 | (define_insn "neon_vabd<sup><mode>" |
2908 | [(set (match_operand:VDQIW 0 "s_register_operand" "=w") | |
2909 | (unspec:VDQIW [(match_operand:VDQIW 1 "s_register_operand" "w") | |
2910 | (match_operand:VDQIW 2 "s_register_operand" "w")] | |
2911 | VABD))] | |
88f77cba | 2912 | "TARGET_NEON" |
94f0f2cc JG |
2913 | "vabd.<sup>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2" |
2914 | [(set_attr "type" "neon_abd<q>")] | |
c956e102 | 2915 | ) |
88f77cba | 2916 | |
55a9b91b MW |
2917 | (define_insn "neon_vabd<mode>" |
2918 | [(set (match_operand:VH 0 "s_register_operand" "=w") | |
2919 | (unspec:VH [(match_operand:VH 1 "s_register_operand" "w") | |
2920 | (match_operand:VH 2 "s_register_operand" "w")] | |
2921 | UNSPEC_VABD_F))] | |
2922 | "TARGET_NEON_FP16INST" | |
2923 | "vabd.<V_s_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2" | |
2924 | [(set_attr "type" "neon_abd<q>")] | |
2925 | ) | |
2926 | ||
94f0f2cc JG |
2927 | (define_insn "neon_vabdf<mode>" |
2928 | [(set (match_operand:VCVTF 0 "s_register_operand" "=w") | |
2929 | (unspec:VCVTF [(match_operand:VCVTF 1 "s_register_operand" "w") | |
2930 | (match_operand:VCVTF 2 "s_register_operand" "w")] | |
2931 | UNSPEC_VABD_F))] | |
2932 | "TARGET_NEON" | |
2933 | "vabd.<V_s_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2" | |
2934 | [(set_attr "type" "neon_fp_abd_s<q>")] | |
2935 | ) | |
2936 | ||
2937 | (define_insn "neon_vabdl<sup><mode>" | |
88f77cba JB |
2938 | [(set (match_operand:<V_widen> 0 "s_register_operand" "=w") |
2939 | (unspec:<V_widen> [(match_operand:VW 1 "s_register_operand" "w") | |
94f0f2cc JG |
2940 | (match_operand:VW 2 "s_register_operand" "w")] |
2941 | VABDL))] | |
88f77cba | 2942 | "TARGET_NEON" |
94f0f2cc | 2943 | "vabdl.<sup>%#<V_sz_elem>\t%q0, %P1, %P2" |
f7379e5e | 2944 | [(set_attr "type" "neon_abd_long")] |
c956e102 | 2945 | ) |
88f77cba | 2946 | |
94f0f2cc | 2947 | (define_insn "neon_vaba<sup><mode>" |
88f77cba | 2948 | [(set (match_operand:VDQIW 0 "s_register_operand" "=w") |
5b28d821 | 2949 | (plus:VDQIW (unspec:VDQIW [(match_operand:VDQIW 2 "s_register_operand" "w") |
94f0f2cc JG |
2950 | (match_operand:VDQIW 3 "s_register_operand" "w")] |
2951 | VABD) | |
5b28d821 | 2952 | (match_operand:VDQIW 1 "s_register_operand" "0")))] |
88f77cba | 2953 | "TARGET_NEON" |
94f0f2cc | 2954 | "vaba.<sup>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>2, %<V_reg>3" |
f7379e5e | 2955 | [(set_attr "type" "neon_arith_acc<q>")] |
c956e102 | 2956 | ) |
88f77cba | 2957 | |
94f0f2cc | 2958 | (define_insn "neon_vabal<sup><mode>" |
88f77cba | 2959 | [(set (match_operand:<V_widen> 0 "s_register_operand" "=w") |
5b28d821 | 2960 | (plus:<V_widen> (unspec:<V_widen> [(match_operand:VW 2 "s_register_operand" "w") |
94f0f2cc JG |
2961 | (match_operand:VW 3 "s_register_operand" "w")] |
2962 | VABDL) | |
5b28d821 | 2963 | (match_operand:<V_widen> 1 "s_register_operand" "0")))] |
88f77cba | 2964 | "TARGET_NEON" |
94f0f2cc | 2965 | "vabal.<sup>%#<V_sz_elem>\t%q0, %P2, %P3" |
f7379e5e | 2966 | [(set_attr "type" "neon_arith_acc<q>")] |
c956e102 | 2967 | ) |
88f77cba | 2968 | |
84ae7213 PW |
2969 | (define_expand "<sup>sadv16qi" |
2970 | [(use (match_operand:V4SI 0 "register_operand")) | |
2971 | (unspec:V16QI [(use (match_operand:V16QI 1 "register_operand")) | |
2972 | (use (match_operand:V16QI 2 "register_operand"))] VABAL) | |
2973 | (use (match_operand:V4SI 3 "register_operand"))] | |
2974 | "TARGET_NEON" | |
2975 | { | |
2976 | rtx reduc = gen_reg_rtx (V8HImode); | |
2977 | rtx op1_highpart = gen_reg_rtx (V8QImode); | |
2978 | rtx op2_highpart = gen_reg_rtx (V8QImode); | |
2979 | ||
2980 | emit_insn (gen_neon_vabdl<sup>v8qi (reduc, | |
2981 | gen_lowpart (V8QImode, operands[1]), | |
2982 | gen_lowpart (V8QImode, operands[2]))); | |
2983 | ||
2984 | emit_insn (gen_neon_vget_highv16qi (op1_highpart, operands[1])); | |
2985 | emit_insn (gen_neon_vget_highv16qi (op2_highpart, operands[2])); | |
2986 | emit_insn (gen_neon_vabal<sup>v8qi (reduc, reduc, | |
2987 | op1_highpart, op2_highpart)); | |
2988 | emit_insn (gen_neon_vpadal<sup>v8hi (operands[3], operands[3], reduc)); | |
2989 | ||
2990 | emit_move_insn (operands[0], operands[3]); | |
2991 | DONE; | |
2992 | } | |
2993 | ) | |
2994 | ||
94f0f2cc JG |
2995 | (define_insn "neon_v<maxmin><sup><mode>" |
2996 | [(set (match_operand:VDQIW 0 "s_register_operand" "=w") | |
2997 | (unspec:VDQIW [(match_operand:VDQIW 1 "s_register_operand" "w") | |
2998 | (match_operand:VDQIW 2 "s_register_operand" "w")] | |
2999 | VMAXMIN))] | |
88f77cba | 3000 | "TARGET_NEON" |
94f0f2cc JG |
3001 | "v<maxmin>.<sup>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2" |
3002 | [(set_attr "type" "neon_minmax<q>")] | |
c956e102 | 3003 | ) |
88f77cba | 3004 | |
94f0f2cc JG |
3005 | (define_insn "neon_v<maxmin>f<mode>" |
3006 | [(set (match_operand:VCVTF 0 "s_register_operand" "=w") | |
3007 | (unspec:VCVTF [(match_operand:VCVTF 1 "s_register_operand" "w") | |
3008 | (match_operand:VCVTF 2 "s_register_operand" "w")] | |
3009 | VMAXMINF))] | |
88f77cba | 3010 | "TARGET_NEON" |
94f0f2cc | 3011 | "v<maxmin>.<V_s_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2" |
0a18c19f DS |
3012 | [(set_attr "type" "neon_fp_minmax_s<q>")] |
3013 | ) | |
3014 | ||
55a9b91b MW |
3015 | (define_insn "neon_v<maxmin>f<mode>" |
3016 | [(set (match_operand:VH 0 "s_register_operand" "=w") | |
3017 | (unspec:VH | |
3018 | [(match_operand:VH 1 "s_register_operand" "w") | |
3019 | (match_operand:VH 2 "s_register_operand" "w")] | |
3020 | VMAXMINF))] | |
3021 | "TARGET_NEON_FP16INST" | |
3022 | "v<maxmin>.<V_s_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2" | |
3023 | [(set_attr "type" "neon_fp_minmax_s<q>")] | |
3024 | ) | |
3025 | ||
3026 | (define_insn "neon_vp<maxmin>fv4hf" | |
3027 | [(set (match_operand:V4HF 0 "s_register_operand" "=w") | |
3028 | (unspec:V4HF | |
3029 | [(match_operand:V4HF 1 "s_register_operand" "w") | |
3030 | (match_operand:V4HF 2 "s_register_operand" "w")] | |
3031 | VPMAXMINF))] | |
3032 | "TARGET_NEON_FP16INST" | |
3033 | "vp<maxmin>.f16\t%P0, %P1, %P2" | |
3034 | [(set_attr "type" "neon_reduc_minmax")] | |
3035 | ) | |
3036 | ||
3037 | (define_insn "neon_<fmaxmin_op><mode>" | |
3038 | [(set | |
3039 | (match_operand:VH 0 "s_register_operand" "=w") | |
3040 | (unspec:VH | |
3041 | [(match_operand:VH 1 "s_register_operand" "w") | |
3042 | (match_operand:VH 2 "s_register_operand" "w")] | |
3043 | VMAXMINFNM))] | |
3044 | "TARGET_NEON_FP16INST" | |
3045 | "<fmaxmin_op>.<V_s_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2" | |
3046 | [(set_attr "type" "neon_fp_minmax_s<q>")] | |
3047 | ) | |
9fc158eb BB |
3048 | |
3049 | ;; v<maxmin>nm intrinsics. | |
3050 | (define_insn "neon_<fmaxmin_op><mode>" | |
3051 | [(set (match_operand:VCVTF 0 "s_register_operand" "=w") | |
3052 | (unspec:VCVTF [(match_operand:VCVTF 1 "s_register_operand" "w") | |
3053 | (match_operand:VCVTF 2 "s_register_operand" "w")] | |
3054 | VMAXMINFNM))] | |
c8d61ab8 | 3055 | "TARGET_NEON && TARGET_VFP5" |
9fc158eb BB |
3056 | "<fmaxmin_op>.<V_s_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2" |
3057 | [(set_attr "type" "neon_fp_minmax_s<q>")] | |
3058 | ) | |
55a9b91b | 3059 | |
0a18c19f DS |
3060 | ;; Vector forms for the IEEE-754 fmax()/fmin() functions |
3061 | (define_insn "<fmaxmin><mode>3" | |
3062 | [(set (match_operand:VCVTF 0 "s_register_operand" "=w") | |
3063 | (unspec:VCVTF [(match_operand:VCVTF 1 "s_register_operand" "w") | |
3064 | (match_operand:VCVTF 2 "s_register_operand" "w")] | |
3065 | VMAXMINFNM))] | |
c8d61ab8 | 3066 | "TARGET_NEON && TARGET_VFP5" |
0a18c19f | 3067 | "<fmaxmin_op>.<V_s_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2" |
94f0f2cc | 3068 | [(set_attr "type" "neon_fp_minmax_s<q>")] |
c956e102 | 3069 | ) |
88f77cba JB |
3070 | |
3071 | (define_expand "neon_vpadd<mode>" | |
cd65e265 DZ |
3072 | [(match_operand:VD 0 "s_register_operand") |
3073 | (match_operand:VD 1 "s_register_operand") | |
3074 | (match_operand:VD 2 "s_register_operand")] | |
88f77cba JB |
3075 | "TARGET_NEON" |
3076 | { | |
3077 | emit_insn (gen_neon_vpadd_internal<mode> (operands[0], operands[1], | |
3078 | operands[2])); | |
3079 | DONE; | |
3080 | }) | |
3081 | ||
94f0f2cc | 3082 | (define_insn "neon_vpaddl<sup><mode>" |
88f77cba | 3083 | [(set (match_operand:<V_double_width> 0 "s_register_operand" "=w") |
94f0f2cc JG |
3084 | (unspec:<V_double_width> [(match_operand:VDQIW 1 "s_register_operand" "w")] |
3085 | VPADDL))] | |
88f77cba | 3086 | "TARGET_NEON" |
94f0f2cc | 3087 | "vpaddl.<sup>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1" |
f7379e5e | 3088 | [(set_attr "type" "neon_reduc_add_long")] |
c956e102 | 3089 | ) |
88f77cba | 3090 | |
94f0f2cc | 3091 | (define_insn "neon_vpadal<sup><mode>" |
88f77cba JB |
3092 | [(set (match_operand:<V_double_width> 0 "s_register_operand" "=w") |
3093 | (unspec:<V_double_width> [(match_operand:<V_double_width> 1 "s_register_operand" "0") | |
94f0f2cc JG |
3094 | (match_operand:VDQIW 2 "s_register_operand" "w")] |
3095 | VPADAL))] | |
88f77cba | 3096 | "TARGET_NEON" |
94f0f2cc | 3097 | "vpadal.<sup>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>2" |
f7379e5e | 3098 | [(set_attr "type" "neon_reduc_add_acc")] |
c956e102 | 3099 | ) |
88f77cba | 3100 | |
94f0f2cc JG |
3101 | (define_insn "neon_vp<maxmin><sup><mode>" |
3102 | [(set (match_operand:VDI 0 "s_register_operand" "=w") | |
3103 | (unspec:VDI [(match_operand:VDI 1 "s_register_operand" "w") | |
3104 | (match_operand:VDI 2 "s_register_operand" "w")] | |
3105 | VPMAXMIN))] | |
88f77cba | 3106 | "TARGET_NEON" |
94f0f2cc JG |
3107 | "vp<maxmin>.<sup>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2" |
3108 | [(set_attr "type" "neon_reduc_minmax<q>")] | |
c956e102 | 3109 | ) |
88f77cba | 3110 | |
94f0f2cc JG |
3111 | (define_insn "neon_vp<maxmin>f<mode>" |
3112 | [(set (match_operand:VCVTF 0 "s_register_operand" "=w") | |
3113 | (unspec:VCVTF [(match_operand:VCVTF 1 "s_register_operand" "w") | |
3114 | (match_operand:VCVTF 2 "s_register_operand" "w")] | |
3115 | VPMAXMINF))] | |
88f77cba | 3116 | "TARGET_NEON" |
94f0f2cc JG |
3117 | "vp<maxmin>.<V_s_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2" |
3118 | [(set_attr "type" "neon_fp_reduc_minmax_s<q>")] | |
c956e102 | 3119 | ) |
88f77cba JB |
3120 | |
3121 | (define_insn "neon_vrecps<mode>" | |
3122 | [(set (match_operand:VCVTF 0 "s_register_operand" "=w") | |
3123 | (unspec:VCVTF [(match_operand:VCVTF 1 "s_register_operand" "w") | |
94f0f2cc | 3124 | (match_operand:VCVTF 2 "s_register_operand" "w")] |
88f77cba JB |
3125 | UNSPEC_VRECPS))] |
3126 | "TARGET_NEON" | |
c956e102 | 3127 | "vrecps.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2" |
f7379e5e | 3128 | [(set_attr "type" "neon_fp_recps_s<q>")] |
c956e102 | 3129 | ) |
88f77cba | 3130 | |
55a9b91b MW |
3131 | (define_insn "neon_vrecps<mode>" |
3132 | [(set | |
3133 | (match_operand:VH 0 "s_register_operand" "=w") | |
3134 | (unspec:VH [(match_operand:VH 1 "s_register_operand" "w") | |
3135 | (match_operand:VH 2 "s_register_operand" "w")] | |
3136 | UNSPEC_VRECPS))] | |
3137 | "TARGET_NEON_FP16INST" | |
3138 | "vrecps.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2" | |
3139 | [(set_attr "type" "neon_fp_recps_s<q>")] | |
3140 | ) | |
3141 | ||
88f77cba JB |
3142 | (define_insn "neon_vrsqrts<mode>" |
3143 | [(set (match_operand:VCVTF 0 "s_register_operand" "=w") | |
3144 | (unspec:VCVTF [(match_operand:VCVTF 1 "s_register_operand" "w") | |
94f0f2cc | 3145 | (match_operand:VCVTF 2 "s_register_operand" "w")] |
88f77cba JB |
3146 | UNSPEC_VRSQRTS))] |
3147 | "TARGET_NEON" | |
c956e102 | 3148 | "vrsqrts.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2" |
f7379e5e | 3149 | [(set_attr "type" "neon_fp_rsqrts_s<q>")] |
c956e102 | 3150 | ) |
88f77cba | 3151 | |
55a9b91b MW |
3152 | (define_insn "neon_vrsqrts<mode>" |
3153 | [(set | |
3154 | (match_operand:VH 0 "s_register_operand" "=w") | |
3155 | (unspec:VH [(match_operand:VH 1 "s_register_operand" "w") | |
3156 | (match_operand:VH 2 "s_register_operand" "w")] | |
3157 | UNSPEC_VRSQRTS))] | |
3158 | "TARGET_NEON_FP16INST" | |
3159 | "vrsqrts.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2" | |
3160 | [(set_attr "type" "neon_fp_rsqrts_s<q>")] | |
3161 | ) | |
3162 | ||
bab53516 | 3163 | (define_expand "neon_vabs<mode>" |
cd65e265 DZ |
3164 | [(match_operand:VDQW 0 "s_register_operand") |
3165 | (match_operand:VDQW 1 "s_register_operand")] | |
88f77cba | 3166 | "TARGET_NEON" |
bab53516 SL |
3167 | { |
3168 | emit_insn (gen_abs<mode>2 (operands[0], operands[1])); | |
3169 | DONE; | |
3170 | }) | |
88f77cba JB |
3171 | |
3172 | (define_insn "neon_vqabs<mode>" | |
3173 | [(set (match_operand:VDQIW 0 "s_register_operand" "=w") | |
94f0f2cc | 3174 | (unspec:VDQIW [(match_operand:VDQIW 1 "s_register_operand" "w")] |
88f77cba JB |
3175 | UNSPEC_VQABS))] |
3176 | "TARGET_NEON" | |
c956e102 | 3177 | "vqabs.<V_s_elem>\t%<V_reg>0, %<V_reg>1" |
f7379e5e | 3178 | [(set_attr "type" "neon_qabs<q>")] |
c956e102 | 3179 | ) |
88f77cba | 3180 | |
7a10ea9f KT |
3181 | (define_insn "neon_bswap<mode>" |
3182 | [(set (match_operand:VDQHSD 0 "register_operand" "=w") | |
3183 | (bswap:VDQHSD (match_operand:VDQHSD 1 "register_operand" "w")))] | |
3184 | "TARGET_NEON" | |
3185 | "vrev<V_sz_elem>.8\\t%<V_reg>0, %<V_reg>1" | |
3186 | [(set_attr "type" "neon_rev<q>")] | |
3187 | ) | |
3188 | ||
88f77cba | 3189 | (define_expand "neon_vneg<mode>" |
cd65e265 DZ |
3190 | [(match_operand:VDQW 0 "s_register_operand") |
3191 | (match_operand:VDQW 1 "s_register_operand")] | |
88f77cba JB |
3192 | "TARGET_NEON" |
3193 | { | |
3194 | emit_insn (gen_neg<mode>2 (operands[0], operands[1])); | |
3195 | DONE; | |
3196 | }) | |
3197 | ||
c2b7062d TC |
3198 | |
3199 | ;; The vcadd and vcmla patterns are made UNSPEC for the explicitly due to the | |
3200 | ;; fact that their usage need to guarantee that the source vectors are | |
3201 | ;; contiguous. It would be wrong to describe the operation without being able | |
3202 | ;; to describe the permute that is also required, but even if that is done | |
3203 | ;; the permute would have been created as a LOAD_LANES which means the values | |
3204 | ;; in the registers are in the wrong order. | |
3205 | (define_insn "neon_vcadd<rot><mode>" | |
3206 | [(set (match_operand:VF 0 "register_operand" "=w") | |
3207 | (unspec:VF [(match_operand:VF 1 "register_operand" "w") | |
3208 | (match_operand:VF 2 "register_operand" "w")] | |
3209 | VCADD))] | |
3210 | "TARGET_COMPLEX" | |
3211 | "vcadd.<V_s_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2, #<rot>" | |
3212 | [(set_attr "type" "neon_fcadd")] | |
3213 | ) | |
3214 | ||
3215 | (define_insn "neon_vcmla<rot><mode>" | |
3216 | [(set (match_operand:VF 0 "register_operand" "=w") | |
3217 | (plus:VF (match_operand:VF 1 "register_operand" "0") | |
3218 | (unspec:VF [(match_operand:VF 2 "register_operand" "w") | |
3219 | (match_operand:VF 3 "register_operand" "w")] | |
3220 | VCMLA)))] | |
3221 | "TARGET_COMPLEX" | |
3222 | "vcmla.<V_s_elem>\t%<V_reg>0, %<V_reg>2, %<V_reg>3, #<rot>" | |
3223 | [(set_attr "type" "neon_fcmla")] | |
3224 | ) | |
3225 | ||
3226 | (define_insn "neon_vcmla_lane<rot><mode>" | |
3227 | [(set (match_operand:VF 0 "s_register_operand" "=w") | |
3228 | (plus:VF (match_operand:VF 1 "s_register_operand" "0") | |
3229 | (unspec:VF [(match_operand:VF 2 "s_register_operand" "w") | |
3230 | (match_operand:VF 3 "s_register_operand" "<VF_constraint>") | |
3231 | (match_operand:SI 4 "const_int_operand" "n")] | |
3232 | VCMLA)))] | |
3233 | "TARGET_COMPLEX" | |
3234 | { | |
ee8045e5 | 3235 | operands = neon_vcmla_lane_prepare_operands (operands); |
c2b7062d TC |
3236 | return "vcmla.<V_s_elem>\t%<V_reg>0, %<V_reg>2, d%c3[%c4], #<rot>"; |
3237 | } | |
3238 | [(set_attr "type" "neon_fcmla")] | |
3239 | ) | |
3240 | ||
3241 | (define_insn "neon_vcmla_laneq<rot><mode>" | |
3242 | [(set (match_operand:VDF 0 "s_register_operand" "=w") | |
3243 | (plus:VDF (match_operand:VDF 1 "s_register_operand" "0") | |
3244 | (unspec:VDF [(match_operand:VDF 2 "s_register_operand" "w") | |
3245 | (match_operand:<V_DOUBLE> 3 "s_register_operand" "<VF_constraint>") | |
3246 | (match_operand:SI 4 "const_int_operand" "n")] | |
3247 | VCMLA)))] | |
3248 | "TARGET_COMPLEX" | |
3249 | { | |
ee8045e5 | 3250 | operands = neon_vcmla_lane_prepare_operands (operands); |
c2b7062d TC |
3251 | return "vcmla.<V_s_elem>\t%<V_reg>0, %<V_reg>2, d%c3[%c4], #<rot>"; |
3252 | } | |
3253 | [(set_attr "type" "neon_fcmla")] | |
3254 | ) | |
3255 | ||
3256 | (define_insn "neon_vcmlaq_lane<rot><mode>" | |
3257 | [(set (match_operand:VQ_HSF 0 "s_register_operand" "=w") | |
3258 | (plus:VQ_HSF (match_operand:VQ_HSF 1 "s_register_operand" "0") | |
3259 | (unspec:VQ_HSF [(match_operand:VQ_HSF 2 "s_register_operand" "w") | |
3260 | (match_operand:<V_HALF> 3 "s_register_operand" "<VF_constraint>") | |
3261 | (match_operand:SI 4 "const_int_operand" "n")] | |
3262 | VCMLA)))] | |
3263 | "TARGET_COMPLEX" | |
3264 | { | |
ee8045e5 | 3265 | operands = neon_vcmla_lane_prepare_operands (operands); |
c2b7062d TC |
3266 | return "vcmla.<V_s_elem>\t%<V_reg>0, %<V_reg>2, d%c3[%c4], #<rot>"; |
3267 | } | |
3268 | [(set_attr "type" "neon_fcmla")] | |
3269 | ) | |
3270 | ||
3271 | ||
f8e109ba TC |
3272 | ;; These instructions map to the __builtins for the Dot Product operations. |
3273 | (define_insn "neon_<sup>dot<vsi2qi>" | |
3274 | [(set (match_operand:VCVTI 0 "register_operand" "=w") | |
3275 | (plus:VCVTI (match_operand:VCVTI 1 "register_operand" "0") | |
3276 | (unspec:VCVTI [(match_operand:<VSI2QI> 2 | |
3277 | "register_operand" "w") | |
3278 | (match_operand:<VSI2QI> 3 | |
3279 | "register_operand" "w")] | |
3280 | DOTPROD)))] | |
3281 | "TARGET_DOTPROD" | |
3282 | "v<sup>dot.<opsuffix>\\t%<V_reg>0, %<V_reg>2, %<V_reg>3" | |
51e6029f | 3283 | [(set_attr "type" "neon_dot<q>")] |
f8e109ba TC |
3284 | ) |
3285 | ||
f348846e SMW |
3286 | ;; These instructions map to the __builtins for the Dot Product operations. |
3287 | (define_insn "neon_usdot<vsi2qi>" | |
3288 | [(set (match_operand:VCVTI 0 "register_operand" "=w") | |
3289 | (plus:VCVTI | |
3290 | (unspec:VCVTI | |
3291 | [(match_operand:<VSI2QI> 2 "register_operand" "w") | |
3292 | (match_operand:<VSI2QI> 3 "register_operand" "w")] | |
3293 | UNSPEC_DOT_US) | |
3294 | (match_operand:VCVTI 1 "register_operand" "0")))] | |
3295 | "TARGET_I8MM" | |
3296 | "vusdot.s8\\t%<V_reg>0, %<V_reg>2, %<V_reg>3" | |
3297 | [(set_attr "type" "neon_dot<q>")] | |
3298 | ) | |
3299 | ||
f8e109ba TC |
3300 | ;; These instructions map to the __builtins for the Dot Product |
3301 | ;; indexed operations. | |
3302 | (define_insn "neon_<sup>dot_lane<vsi2qi>" | |
3303 | [(set (match_operand:VCVTI 0 "register_operand" "=w") | |
3304 | (plus:VCVTI (match_operand:VCVTI 1 "register_operand" "0") | |
3305 | (unspec:VCVTI [(match_operand:<VSI2QI> 2 | |
3306 | "register_operand" "w") | |
3307 | (match_operand:V8QI 3 "register_operand" "t") | |
3308 | (match_operand:SI 4 "immediate_operand" "i")] | |
3309 | DOTPROD)))] | |
3310 | "TARGET_DOTPROD" | |
3311 | { | |
3312 | operands[4] | |
3313 | = GEN_INT (NEON_ENDIAN_LANE_N (V8QImode, INTVAL (operands[4]))); | |
3314 | return "v<sup>dot.<opsuffix>\\t%<V_reg>0, %<V_reg>2, %P3[%c4]"; | |
3315 | } | |
51e6029f | 3316 | [(set_attr "type" "neon_dot<q>")] |
f8e109ba TC |
3317 | ) |
3318 | ||
f348846e SMW |
3319 | ;; These instructions map to the __builtins for the Dot Product |
3320 | ;; indexed operations in the v8.6 I8MM extension. | |
3321 | (define_insn "neon_<sup>dot_lane<vsi2qi>" | |
3322 | [(set (match_operand:VCVTI 0 "register_operand" "=w") | |
3323 | (plus:VCVTI | |
3324 | (unspec:VCVTI | |
3325 | [(match_operand:<VSI2QI> 2 "register_operand" "w") | |
3326 | (match_operand:V8QI 3 "register_operand" "t") | |
3327 | (match_operand:SI 4 "immediate_operand" "i")] | |
3328 | DOTPROD_I8MM) | |
3329 | (match_operand:VCVTI 1 "register_operand" "0")))] | |
3330 | "TARGET_I8MM" | |
3331 | { | |
3332 | operands[4] = GEN_INT (INTVAL (operands[4])); | |
3333 | return "v<sup>dot.<opsuffix>\\t%<V_reg>0, %<V_reg>2, %P3[%c4]"; | |
3334 | } | |
3335 | [(set_attr "type" "neon_dot<q>")] | |
3336 | ) | |
3337 | ||
f8e109ba TC |
3338 | ;; These expands map to the Dot Product optab the vectorizer checks for. |
3339 | ;; The auto-vectorizer expects a dot product builtin that also does an | |
3340 | ;; accumulation into the provided register. | |
3341 | ;; Given the following pattern | |
3342 | ;; | |
3343 | ;; for (i=0; i<len; i++) { | |
3344 | ;; c = a[i] * b[i]; | |
3345 | ;; r += c; | |
3346 | ;; } | |
3347 | ;; return result; | |
3348 | ;; | |
3349 | ;; This can be auto-vectorized to | |
3350 | ;; r = a[0]*b[0] + a[1]*b[1] + a[2]*b[2] + a[3]*b[3]; | |
3351 | ;; | |
3352 | ;; given enough iterations. However the vectorizer can keep unrolling the loop | |
3353 | ;; r += a[4]*b[4] + a[5]*b[5] + a[6]*b[6] + a[7]*b[7]; | |
3354 | ;; r += a[8]*b[8] + a[9]*b[9] + a[10]*b[10] + a[11]*b[11]; | |
3355 | ;; ... | |
3356 | ;; | |
3357 | ;; and so the vectorizer provides r, in which the result has to be accumulated. | |
3358 | (define_expand "<sup>dot_prod<vsi2qi>" | |
3359 | [(set (match_operand:VCVTI 0 "register_operand") | |
3360 | (plus:VCVTI (unspec:VCVTI [(match_operand:<VSI2QI> 1 | |
3361 | "register_operand") | |
3362 | (match_operand:<VSI2QI> 2 | |
3363 | "register_operand")] | |
3364 | DOTPROD) | |
3365 | (match_operand:VCVTI 3 "register_operand")))] | |
3366 | "TARGET_DOTPROD" | |
3367 | { | |
3368 | emit_insn ( | |
3369 | gen_neon_<sup>dot<vsi2qi> (operands[3], operands[3], operands[1], | |
3370 | operands[2])); | |
3371 | emit_insn (gen_rtx_SET (operands[0], operands[3])); | |
3372 | DONE; | |
3373 | }) | |
3374 | ||
97f518b3 JW |
3375 | (define_expand "neon_copysignf<mode>" |
3376 | [(match_operand:VCVTF 0 "register_operand") | |
3377 | (match_operand:VCVTF 1 "register_operand") | |
3378 | (match_operand:VCVTF 2 "register_operand")] | |
3379 | "TARGET_NEON" | |
3380 | "{ | |
3381 | rtx v_bitmask_cast; | |
3382 | rtx v_bitmask = gen_reg_rtx (<VCVTF:V_cmp_result>mode); | |
4199c859 | 3383 | rtx c = gen_int_mode (0x80000000, SImode); |
97f518b3 JW |
3384 | |
3385 | emit_move_insn (v_bitmask, | |
59d06c05 | 3386 | gen_const_vec_duplicate (<VCVTF:V_cmp_result>mode, c)); |
97f518b3 JW |
3387 | emit_move_insn (operands[0], operands[2]); |
3388 | v_bitmask_cast = simplify_gen_subreg (<MODE>mode, v_bitmask, | |
3389 | <VCVTF:V_cmp_result>mode, 0); | |
3390 | emit_insn (gen_neon_vbsl<mode> (operands[0], v_bitmask_cast, operands[0], | |
3391 | operands[1])); | |
3392 | ||
3393 | DONE; | |
3394 | }" | |
3395 | ) | |
3396 | ||
88f77cba JB |
3397 | (define_insn "neon_vqneg<mode>" |
3398 | [(set (match_operand:VDQIW 0 "s_register_operand" "=w") | |
94f0f2cc | 3399 | (unspec:VDQIW [(match_operand:VDQIW 1 "s_register_operand" "w")] |
88f77cba JB |
3400 | UNSPEC_VQNEG))] |
3401 | "TARGET_NEON" | |
c956e102 | 3402 | "vqneg.<V_s_elem>\t%<V_reg>0, %<V_reg>1" |
f7379e5e | 3403 | [(set_attr "type" "neon_qneg<q>")] |
c956e102 | 3404 | ) |
88f77cba JB |
3405 | |
3406 | (define_insn "neon_vcls<mode>" | |
3407 | [(set (match_operand:VDQIW 0 "s_register_operand" "=w") | |
94f0f2cc | 3408 | (unspec:VDQIW [(match_operand:VDQIW 1 "s_register_operand" "w")] |
88f77cba JB |
3409 | UNSPEC_VCLS))] |
3410 | "TARGET_NEON" | |
c956e102 | 3411 | "vcls.<V_s_elem>\t%<V_reg>0, %<V_reg>1" |
f7379e5e | 3412 | [(set_attr "type" "neon_cls<q>")] |
c956e102 | 3413 | ) |
88f77cba | 3414 | |
b3b7bbce | 3415 | (define_insn "clz<mode>2" |
88f77cba | 3416 | [(set (match_operand:VDQIW 0 "s_register_operand" "=w") |
b3b7bbce | 3417 | (clz:VDQIW (match_operand:VDQIW 1 "s_register_operand" "w")))] |
88f77cba | 3418 | "TARGET_NEON" |
c956e102 | 3419 | "vclz.<V_if_elem>\t%<V_reg>0, %<V_reg>1" |
f7379e5e | 3420 | [(set_attr "type" "neon_cnt<q>")] |
c956e102 | 3421 | ) |
88f77cba | 3422 | |
b3b7bbce | 3423 | (define_expand "neon_vclz<mode>" |
cd65e265 DZ |
3424 | [(match_operand:VDQIW 0 "s_register_operand") |
3425 | (match_operand:VDQIW 1 "s_register_operand")] | |
b3b7bbce SL |
3426 | "TARGET_NEON" |
3427 | { | |
3428 | emit_insn (gen_clz<mode>2 (operands[0], operands[1])); | |
3429 | DONE; | |
3430 | }) | |
3431 | ||
3432 | (define_insn "popcount<mode>2" | |
88f77cba | 3433 | [(set (match_operand:VE 0 "s_register_operand" "=w") |
b3b7bbce | 3434 | (popcount:VE (match_operand:VE 1 "s_register_operand" "w")))] |
88f77cba | 3435 | "TARGET_NEON" |
c956e102 | 3436 | "vcnt.<V_sz_elem>\t%<V_reg>0, %<V_reg>1" |
f7379e5e | 3437 | [(set_attr "type" "neon_cnt<q>")] |
c956e102 | 3438 | ) |
88f77cba | 3439 | |
b3b7bbce | 3440 | (define_expand "neon_vcnt<mode>" |
cd65e265 DZ |
3441 | [(match_operand:VE 0 "s_register_operand") |
3442 | (match_operand:VE 1 "s_register_operand")] | |
b3b7bbce SL |
3443 | "TARGET_NEON" |
3444 | { | |
3445 | emit_insn (gen_popcount<mode>2 (operands[0], operands[1])); | |
3446 | DONE; | |
3447 | }) | |
3448 | ||
55a9b91b MW |
3449 | (define_insn "neon_vrecpe<mode>" |
3450 | [(set (match_operand:VH 0 "s_register_operand" "=w") | |
3451 | (unspec:VH [(match_operand:VH 1 "s_register_operand" "w")] | |
3452 | UNSPEC_VRECPE))] | |
3453 | "TARGET_NEON_FP16INST" | |
3454 | "vrecpe.f16\t%<V_reg>0, %<V_reg>1" | |
3455 | [(set_attr "type" "neon_fp_recpe_s<q>")] | |
3456 | ) | |
3457 | ||
88f77cba JB |
3458 | (define_insn "neon_vrecpe<mode>" |
3459 | [(set (match_operand:V32 0 "s_register_operand" "=w") | |
94f0f2cc | 3460 | (unspec:V32 [(match_operand:V32 1 "s_register_operand" "w")] |
88f77cba JB |
3461 | UNSPEC_VRECPE))] |
3462 | "TARGET_NEON" | |
c956e102 | 3463 | "vrecpe.<V_u_elem>\t%<V_reg>0, %<V_reg>1" |
f7379e5e | 3464 | [(set_attr "type" "neon_fp_recpe_s<q>")] |
c956e102 | 3465 | ) |
88f77cba JB |
3466 | |
3467 | (define_insn "neon_vrsqrte<mode>" | |
3468 | [(set (match_operand:V32 0 "s_register_operand" "=w") | |
94f0f2cc | 3469 | (unspec:V32 [(match_operand:V32 1 "s_register_operand" "w")] |
88f77cba JB |
3470 | UNSPEC_VRSQRTE))] |
3471 | "TARGET_NEON" | |
c956e102 | 3472 | "vrsqrte.<V_u_elem>\t%<V_reg>0, %<V_reg>1" |
f7379e5e | 3473 | [(set_attr "type" "neon_fp_rsqrte_s<q>")] |
c956e102 | 3474 | ) |
88f77cba JB |
3475 | |
3476 | (define_expand "neon_vmvn<mode>" | |
cd65e265 DZ |
3477 | [(match_operand:VDQIW 0 "s_register_operand") |
3478 | (match_operand:VDQIW 1 "s_register_operand")] | |
88f77cba JB |
3479 | "TARGET_NEON" |
3480 | { | |
3481 | emit_insn (gen_one_cmpl<mode>2 (operands[0], operands[1])); | |
3482 | DONE; | |
3483 | }) | |
3484 | ||
89ffa8fc JB |
3485 | (define_insn "neon_vget_lane<mode>_sext_internal" |
3486 | [(set (match_operand:SI 0 "s_register_operand" "=r") | |
3487 | (sign_extend:SI | |
3488 | (vec_select:<V_elem> | |
3489 | (match_operand:VD 1 "s_register_operand" "w") | |
3490 | (parallel [(match_operand:SI 2 "immediate_operand" "i")]))))] | |
88f77cba | 3491 | "TARGET_NEON" |
874d42b9 JM |
3492 | { |
3493 | if (BYTES_BIG_ENDIAN) | |
3494 | { | |
3495 | int elt = INTVAL (operands[2]); | |
3496 | elt = GET_MODE_NUNITS (<MODE>mode) - 1 - elt; | |
3497 | operands[2] = GEN_INT (elt); | |
3498 | } | |
c3b1709a | 3499 | return "vmov.s<V_sz_elem>\t%0, %P1[%c2]"; |
874d42b9 | 3500 | } |
f7379e5e | 3501 | [(set_attr "type" "neon_to_gp")] |
c956e102 | 3502 | ) |
88f77cba | 3503 | |
89ffa8fc JB |
3504 | (define_insn "neon_vget_lane<mode>_zext_internal" |
3505 | [(set (match_operand:SI 0 "s_register_operand" "=r") | |
3506 | (zero_extend:SI | |
3507 | (vec_select:<V_elem> | |
3508 | (match_operand:VD 1 "s_register_operand" "w") | |
3509 | (parallel [(match_operand:SI 2 "immediate_operand" "i")]))))] | |
3510 | "TARGET_NEON" | |
874d42b9 JM |
3511 | { |
3512 | if (BYTES_BIG_ENDIAN) | |
3513 | { | |
3514 | int elt = INTVAL (operands[2]); | |
3515 | elt = GET_MODE_NUNITS (<MODE>mode) - 1 - elt; | |
3516 | operands[2] = GEN_INT (elt); | |
3517 | } | |
c3b1709a | 3518 | return "vmov.u<V_sz_elem>\t%0, %P1[%c2]"; |
874d42b9 | 3519 | } |
f7379e5e | 3520 | [(set_attr "type" "neon_to_gp")] |
89ffa8fc | 3521 | ) |
88f77cba | 3522 | |
89ffa8fc JB |
3523 | (define_insn "neon_vget_lane<mode>_sext_internal" |
3524 | [(set (match_operand:SI 0 "s_register_operand" "=r") | |
3525 | (sign_extend:SI | |
3526 | (vec_select:<V_elem> | |
4b644867 | 3527 | (match_operand:VQ2 1 "s_register_operand" "w") |
89ffa8fc | 3528 | (parallel [(match_operand:SI 2 "immediate_operand" "i")]))))] |
88f77cba | 3529 | "TARGET_NEON" |
b617fc71 | 3530 | { |
89ffa8fc JB |
3531 | rtx ops[3]; |
3532 | int regno = REGNO (operands[1]); | |
3533 | unsigned int halfelts = GET_MODE_NUNITS (<MODE>mode) / 2; | |
3534 | unsigned int elt = INTVAL (operands[2]); | |
874d42b9 JM |
3535 | unsigned int elt_adj = elt % halfelts; |
3536 | ||
3537 | if (BYTES_BIG_ENDIAN) | |
3538 | elt_adj = halfelts - 1 - elt_adj; | |
89ffa8fc JB |
3539 | |
3540 | ops[0] = operands[0]; | |
3541 | ops[1] = gen_rtx_REG (<V_HALF>mode, regno + 2 * (elt / halfelts)); | |
874d42b9 | 3542 | ops[2] = GEN_INT (elt_adj); |
c3b1709a | 3543 | output_asm_insn ("vmov.s<V_sz_elem>\t%0, %P1[%c2]", ops); |
89ffa8fc JB |
3544 | |
3545 | return ""; | |
b617fc71 | 3546 | } |
f7379e5e | 3547 | [(set_attr "type" "neon_to_gp_q")] |
c956e102 | 3548 | ) |
88f77cba | 3549 | |
89ffa8fc JB |
3550 | (define_insn "neon_vget_lane<mode>_zext_internal" |
3551 | [(set (match_operand:SI 0 "s_register_operand" "=r") | |
3552 | (zero_extend:SI | |
3553 | (vec_select:<V_elem> | |
4b644867 | 3554 | (match_operand:VQ2 1 "s_register_operand" "w") |
89ffa8fc | 3555 | (parallel [(match_operand:SI 2 "immediate_operand" "i")]))))] |
88f77cba JB |
3556 | "TARGET_NEON" |
3557 | { | |
89ffa8fc | 3558 | rtx ops[3]; |
88f77cba JB |
3559 | int regno = REGNO (operands[1]); |
3560 | unsigned int halfelts = GET_MODE_NUNITS (<MODE>mode) / 2; | |
3561 | unsigned int elt = INTVAL (operands[2]); | |
874d42b9 JM |
3562 | unsigned int elt_adj = elt % halfelts; |
3563 | ||
3564 | if (BYTES_BIG_ENDIAN) | |
3565 | elt_adj = halfelts - 1 - elt_adj; | |
88f77cba JB |
3566 | |
3567 | ops[0] = operands[0]; | |
3568 | ops[1] = gen_rtx_REG (<V_HALF>mode, regno + 2 * (elt / halfelts)); | |
874d42b9 | 3569 | ops[2] = GEN_INT (elt_adj); |
c3b1709a | 3570 | output_asm_insn ("vmov.u<V_sz_elem>\t%0, %P1[%c2]", ops); |
88f77cba JB |
3571 | |
3572 | return ""; | |
3573 | } | |
f7379e5e | 3574 | [(set_attr "type" "neon_to_gp_q")] |
89ffa8fc JB |
3575 | ) |
3576 | ||
3577 | (define_expand "neon_vget_lane<mode>" | |
cd65e265 DZ |
3578 | [(match_operand:<V_ext> 0 "s_register_operand") |
3579 | (match_operand:VDQW 1 "s_register_operand") | |
3580 | (match_operand:SI 2 "immediate_operand")] | |
89ffa8fc JB |
3581 | "TARGET_NEON" |
3582 | { | |
874d42b9 JM |
3583 | if (BYTES_BIG_ENDIAN) |
3584 | { | |
3585 | /* The intrinsics are defined in terms of a model where the | |
3586 | element ordering in memory is vldm order, whereas the generic | |
3587 | RTL is defined in terms of a model where the element ordering | |
3588 | in memory is array order. Convert the lane number to conform | |
3589 | to this model. */ | |
3590 | unsigned int elt = INTVAL (operands[2]); | |
3591 | unsigned int reg_nelts | |
6c825cd4 | 3592 | = 64 / GET_MODE_UNIT_BITSIZE (<MODE>mode); |
874d42b9 JM |
3593 | elt ^= reg_nelts - 1; |
3594 | operands[2] = GEN_INT (elt); | |
3595 | } | |
3596 | ||
6c825cd4 | 3597 | if (GET_MODE_UNIT_BITSIZE (<MODE>mode) == 32) |
ff03930a JJ |
3598 | emit_insn (gen_vec_extract<mode><V_elem_l> (operands[0], operands[1], |
3599 | operands[2])); | |
89ffa8fc | 3600 | else |
94f0f2cc JG |
3601 | emit_insn (gen_neon_vget_lane<mode>_sext_internal (operands[0], |
3602 | operands[1], | |
3603 | operands[2])); | |
3604 | DONE; | |
3605 | }) | |
3606 | ||
3607 | (define_expand "neon_vget_laneu<mode>" | |
cd65e265 DZ |
3608 | [(match_operand:<V_ext> 0 "s_register_operand") |
3609 | (match_operand:VDQIW 1 "s_register_operand") | |
3610 | (match_operand:SI 2 "immediate_operand")] | |
94f0f2cc JG |
3611 | "TARGET_NEON" |
3612 | { | |
94f0f2cc | 3613 | if (BYTES_BIG_ENDIAN) |
89ffa8fc | 3614 | { |
94f0f2cc JG |
3615 | /* The intrinsics are defined in terms of a model where the |
3616 | element ordering in memory is vldm order, whereas the generic | |
3617 | RTL is defined in terms of a model where the element ordering | |
3618 | in memory is array order. Convert the lane number to conform | |
3619 | to this model. */ | |
3620 | unsigned int elt = INTVAL (operands[2]); | |
3621 | unsigned int reg_nelts | |
6c825cd4 | 3622 | = 64 / GET_MODE_UNIT_BITSIZE (<MODE>mode); |
94f0f2cc JG |
3623 | elt ^= reg_nelts - 1; |
3624 | operands[2] = GEN_INT (elt); | |
89ffa8fc | 3625 | } |
94f0f2cc | 3626 | |
6c825cd4 | 3627 | if (GET_MODE_UNIT_BITSIZE (<MODE>mode) == 32) |
ff03930a JJ |
3628 | emit_insn (gen_vec_extract<mode><V_elem_l> (operands[0], operands[1], |
3629 | operands[2])); | |
94f0f2cc JG |
3630 | else |
3631 | emit_insn (gen_neon_vget_lane<mode>_zext_internal (operands[0], | |
3632 | operands[1], | |
3633 | operands[2])); | |
89ffa8fc JB |
3634 | DONE; |
3635 | }) | |
3636 | ||
a277dd9b | 3637 | (define_expand "neon_vget_lanedi" |
cd65e265 DZ |
3638 | [(match_operand:DI 0 "s_register_operand") |
3639 | (match_operand:DI 1 "s_register_operand") | |
3640 | (match_operand:SI 2 "immediate_operand")] | |
89ffa8fc JB |
3641 | "TARGET_NEON" |
3642 | { | |
a277dd9b SL |
3643 | emit_move_insn (operands[0], operands[1]); |
3644 | DONE; | |
3645 | }) | |
88f77cba | 3646 | |
a277dd9b | 3647 | (define_expand "neon_vget_lanev2di" |
cd65e265 DZ |
3648 | [(match_operand:DI 0 "s_register_operand") |
3649 | (match_operand:V2DI 1 "s_register_operand") | |
3650 | (match_operand:SI 2 "immediate_operand")] | |
88f77cba JB |
3651 | "TARGET_NEON" |
3652 | { | |
69b23ad6 CL |
3653 | int lane; |
3654 | ||
3655 | if (BYTES_BIG_ENDIAN) | |
3656 | { | |
3657 | /* The intrinsics are defined in terms of a model where the | |
3658 | element ordering in memory is vldm order, whereas the generic | |
3659 | RTL is defined in terms of a model where the element ordering | |
3660 | in memory is array order. Convert the lane number to conform | |
3661 | to this model. */ | |
3662 | unsigned int elt = INTVAL (operands[2]); | |
3663 | unsigned int reg_nelts = 2; | |
3664 | elt ^= reg_nelts - 1; | |
3665 | operands[2] = GEN_INT (elt); | |
3666 | } | |
3667 | ||
3668 | lane = INTVAL (operands[2]); | |
eaa80f64 AL |
3669 | gcc_assert ((lane ==0) || (lane == 1)); |
3670 | emit_move_insn (operands[0], lane == 0 | |
3671 | ? gen_lowpart (DImode, operands[1]) | |
3672 | : gen_highpart (DImode, operands[1])); | |
a277dd9b SL |
3673 | DONE; |
3674 | }) | |
b617fc71 | 3675 | |
a277dd9b | 3676 | (define_expand "neon_vset_lane<mode>" |
cd65e265 DZ |
3677 | [(match_operand:VDQ 0 "s_register_operand") |
3678 | (match_operand:<V_elem> 1 "s_register_operand") | |
3679 | (match_operand:VDQ 2 "s_register_operand") | |
3680 | (match_operand:SI 3 "immediate_operand")] | |
88f77cba JB |
3681 | "TARGET_NEON" |
3682 | { | |
88f77cba JB |
3683 | unsigned int elt = INTVAL (operands[3]); |
3684 | ||
a277dd9b SL |
3685 | if (BYTES_BIG_ENDIAN) |
3686 | { | |
3687 | unsigned int reg_nelts | |
6c825cd4 | 3688 | = 64 / GET_MODE_UNIT_BITSIZE (<MODE>mode); |
a277dd9b SL |
3689 | elt ^= reg_nelts - 1; |
3690 | } | |
b617fc71 | 3691 | |
a277dd9b SL |
3692 | emit_insn (gen_vec_set<mode>_internal (operands[0], operands[1], |
3693 | GEN_INT (1 << elt), operands[2])); | |
3694 | DONE; | |
3695 | }) | |
88f77cba | 3696 | |
a277dd9b | 3697 | ; See neon_vget_lanedi comment for reasons operands 2 & 3 are ignored. |
88f77cba | 3698 | |
a277dd9b | 3699 | (define_expand "neon_vset_lanedi" |
cd65e265 DZ |
3700 | [(match_operand:DI 0 "s_register_operand") |
3701 | (match_operand:DI 1 "s_register_operand") | |
3702 | (match_operand:DI 2 "s_register_operand") | |
3703 | (match_operand:SI 3 "immediate_operand")] | |
88f77cba JB |
3704 | "TARGET_NEON" |
3705 | { | |
a277dd9b SL |
3706 | emit_move_insn (operands[0], operands[1]); |
3707 | DONE; | |
3708 | }) | |
88f77cba JB |
3709 | |
3710 | (define_expand "neon_vcreate<mode>" | |
cd65e265 DZ |
3711 | [(match_operand:VD_RE 0 "s_register_operand") |
3712 | (match_operand:DI 1 "general_operand")] | |
88f77cba JB |
3713 | "TARGET_NEON" |
3714 | { | |
3715 | rtx src = gen_lowpart (<MODE>mode, operands[1]); | |
3716 | emit_move_insn (operands[0], src); | |
3717 | DONE; | |
3718 | }) | |
3719 | ||
3720 | (define_insn "neon_vdup_n<mode>" | |
814a4c3b | 3721 | [(set (match_operand:VX 0 "s_register_operand" "=w") |
a277dd9b | 3722 | (vec_duplicate:VX (match_operand:<V_elem> 1 "s_register_operand" "r")))] |
88f77cba | 3723 | "TARGET_NEON" |
c3b1709a | 3724 | "vdup.<V_sz_elem>\t%<V_reg>0, %1" |
f7379e5e | 3725 | [(set_attr "type" "neon_from_gp<q>")] |
c956e102 | 3726 | ) |
88f77cba | 3727 | |
92422235 CL |
3728 | (define_insn "neon_vdup_nv4hf" |
3729 | [(set (match_operand:V4HF 0 "s_register_operand" "=w") | |
3730 | (vec_duplicate:V4HF (match_operand:HF 1 "s_register_operand" "r")))] | |
3731 | "TARGET_NEON" | |
3732 | "vdup.16\t%P0, %1" | |
3733 | [(set_attr "type" "neon_from_gp")] | |
3734 | ) | |
3735 | ||
3736 | (define_insn "neon_vdup_nv8hf" | |
3737 | [(set (match_operand:V8HF 0 "s_register_operand" "=w") | |
3738 | (vec_duplicate:V8HF (match_operand:HF 1 "s_register_operand" "r")))] | |
3739 | "TARGET_NEON" | |
3740 | "vdup.16\t%q0, %1" | |
3741 | [(set_attr "type" "neon_from_gp_q")] | |
3742 | ) | |
3743 | ||
17a13507 MI |
3744 | (define_insn "neon_vdup_nv4bf" |
3745 | [(set (match_operand:V4BF 0 "s_register_operand" "=w") | |
3746 | (vec_duplicate:V4BF (match_operand:BF 1 "s_register_operand" "r")))] | |
3747 | "TARGET_NEON" | |
3748 | "vdup.16\t%P0, %1" | |
3749 | [(set_attr "type" "neon_from_gp")] | |
3750 | ) | |
3751 | ||
3752 | (define_insn "neon_vdup_nv8bf" | |
3753 | [(set (match_operand:V8BF 0 "s_register_operand" "=w") | |
3754 | (vec_duplicate:V8BF (match_operand:BF 1 "s_register_operand" "r")))] | |
3755 | "TARGET_NEON" | |
3756 | "vdup.16\t%q0, %1" | |
3757 | [(set_attr "type" "neon_from_gp_q")] | |
3758 | ) | |
3759 | ||
814a4c3b DJ |
3760 | (define_insn "neon_vdup_n<mode>" |
3761 | [(set (match_operand:V32 0 "s_register_operand" "=w,w") | |
a277dd9b | 3762 | (vec_duplicate:V32 (match_operand:<V_elem> 1 "s_register_operand" "r,t")))] |
814a4c3b DJ |
3763 | "TARGET_NEON" |
3764 | "@ | |
c3b1709a RR |
3765 | vdup.<V_sz_elem>\t%<V_reg>0, %1 |
3766 | vdup.<V_sz_elem>\t%<V_reg>0, %y1" | |
f7379e5e | 3767 | [(set_attr "type" "neon_from_gp<q>,neon_dup<q>")] |
814a4c3b DJ |
3768 | ) |
3769 | ||
a277dd9b | 3770 | (define_expand "neon_vdup_ndi" |
cd65e265 DZ |
3771 | [(match_operand:DI 0 "s_register_operand") |
3772 | (match_operand:DI 1 "s_register_operand")] | |
88f77cba | 3773 | "TARGET_NEON" |
a277dd9b SL |
3774 | { |
3775 | emit_move_insn (operands[0], operands[1]); | |
3776 | DONE; | |
3777 | } | |
c956e102 | 3778 | ) |
88f77cba JB |
3779 | |
3780 | (define_insn "neon_vdup_nv2di" | |
a277dd9b SL |
3781 | [(set (match_operand:V2DI 0 "s_register_operand" "=w,w") |
3782 | (vec_duplicate:V2DI (match_operand:DI 1 "s_register_operand" "r,w")))] | |
88f77cba | 3783 | "TARGET_NEON" |
a277dd9b | 3784 | "@ |
c3b1709a RR |
3785 | vmov\t%e0, %Q1, %R1\;vmov\t%f0, %Q1, %R1 |
3786 | vmov\t%e0, %P1\;vmov\t%f0, %P1" | |
3787 | [(set_attr "length" "8") | |
f7379e5e | 3788 | (set_attr "type" "multiple")] |
c956e102 | 3789 | ) |
88f77cba | 3790 | |
a277dd9b SL |
3791 | (define_insn "neon_vdup_lane<mode>_internal" |
3792 | [(set (match_operand:VDQW 0 "s_register_operand" "=w") | |
3793 | (vec_duplicate:VDQW | |
3794 | (vec_select:<V_elem> | |
3795 | (match_operand:<V_double_vector_mode> 1 "s_register_operand" "w") | |
3796 | (parallel [(match_operand:SI 2 "immediate_operand" "i")]))))] | |
88f77cba | 3797 | "TARGET_NEON" |
b617fc71 | 3798 | { |
a277dd9b SL |
3799 | if (BYTES_BIG_ENDIAN) |
3800 | { | |
3801 | int elt = INTVAL (operands[2]); | |
3802 | elt = GET_MODE_NUNITS (<V_double_vector_mode>mode) - 1 - elt; | |
3803 | operands[2] = GEN_INT (elt); | |
3804 | } | |
3805 | if (<Is_d_reg>) | |
3806 | return "vdup.<V_sz_elem>\t%P0, %P1[%c2]"; | |
3807 | else | |
3808 | return "vdup.<V_sz_elem>\t%q0, %P1[%c2]"; | |
b617fc71 | 3809 | } |
f7379e5e | 3810 | [(set_attr "type" "neon_dup<q>")] |
c956e102 | 3811 | ) |
88f77cba | 3812 | |
b1a970a5 | 3813 | (define_insn "neon_vdup_lane<mode>_internal" |
17a13507 MI |
3814 | [(set (match_operand:VHFBF 0 "s_register_operand" "=w") |
3815 | (vec_duplicate:VHFBF | |
b1a970a5 MW |
3816 | (vec_select:<V_elem> |
3817 | (match_operand:<V_double_vector_mode> 1 "s_register_operand" "w") | |
3818 | (parallel [(match_operand:SI 2 "immediate_operand" "i")]))))] | |
17a13507 | 3819 | "TARGET_NEON && (TARGET_FP16 || TARGET_BF16_SIMD)" |
b1a970a5 MW |
3820 | { |
3821 | if (BYTES_BIG_ENDIAN) | |
3822 | { | |
3823 | int elt = INTVAL (operands[2]); | |
3824 | elt = GET_MODE_NUNITS (<V_double_vector_mode>mode) - 1 - elt; | |
3825 | operands[2] = GEN_INT (elt); | |
3826 | } | |
3827 | if (<Is_d_reg>) | |
3828 | return "vdup.<V_sz_elem>\t%P0, %P1[%c2]"; | |
3829 | else | |
3830 | return "vdup.<V_sz_elem>\t%q0, %P1[%c2]"; | |
3831 | } | |
3832 | [(set_attr "type" "neon_dup<q>")] | |
3833 | ) | |
3834 | ||
a277dd9b | 3835 | (define_expand "neon_vdup_lane<mode>" |
cd65e265 DZ |
3836 | [(match_operand:VDQW 0 "s_register_operand") |
3837 | (match_operand:<V_double_vector_mode> 1 "s_register_operand") | |
3838 | (match_operand:SI 2 "immediate_operand")] | |
88f77cba | 3839 | "TARGET_NEON" |
b617fc71 | 3840 | { |
a277dd9b SL |
3841 | if (BYTES_BIG_ENDIAN) |
3842 | { | |
3843 | unsigned int elt = INTVAL (operands[2]); | |
3844 | unsigned int reg_nelts | |
6c825cd4 | 3845 | = 64 / GET_MODE_UNIT_BITSIZE (<V_double_vector_mode>mode); |
a277dd9b SL |
3846 | elt ^= reg_nelts - 1; |
3847 | operands[2] = GEN_INT (elt); | |
3848 | } | |
3849 | emit_insn (gen_neon_vdup_lane<mode>_internal (operands[0], operands[1], | |
3850 | operands[2])); | |
3851 | DONE; | |
3852 | }) | |
88f77cba | 3853 | |
b1a970a5 | 3854 | (define_expand "neon_vdup_lane<mode>" |
17a13507 | 3855 | [(match_operand:VHFBF 0 "s_register_operand") |
b1a970a5 MW |
3856 | (match_operand:<V_double_vector_mode> 1 "s_register_operand") |
3857 | (match_operand:SI 2 "immediate_operand")] | |
17a13507 | 3858 | "TARGET_NEON && (TARGET_FP16 || TARGET_BF16_SIMD)" |
b1a970a5 MW |
3859 | { |
3860 | if (BYTES_BIG_ENDIAN) | |
3861 | { | |
3862 | unsigned int elt = INTVAL (operands[2]); | |
3863 | unsigned int reg_nelts | |
3864 | = 64 / GET_MODE_UNIT_BITSIZE (<V_double_vector_mode>mode); | |
3865 | elt ^= reg_nelts - 1; | |
3866 | operands[2] = GEN_INT (elt); | |
3867 | } | |
3868 | emit_insn (gen_neon_vdup_lane<mode>_internal (operands[0], operands[1], | |
3869 | operands[2])); | |
3870 | DONE; | |
3871 | }) | |
3872 | ||
88f77cba JB |
3873 | ; Scalar index is ignored, since only zero is valid here. |
3874 | (define_expand "neon_vdup_lanedi" | |
cd65e265 DZ |
3875 | [(match_operand:DI 0 "s_register_operand") |
3876 | (match_operand:DI 1 "s_register_operand") | |
3877 | (match_operand:SI 2 "immediate_operand")] | |
88f77cba JB |
3878 | "TARGET_NEON" |
3879 | { | |
3880 | emit_move_insn (operands[0], operands[1]); | |
3881 | DONE; | |
3882 | }) | |
3883 | ||
a277dd9b SL |
3884 | ; Likewise for v2di, as the DImode second operand has only a single element. |
3885 | (define_expand "neon_vdup_lanev2di" | |
cd65e265 DZ |
3886 | [(match_operand:V2DI 0 "s_register_operand") |
3887 | (match_operand:DI 1 "s_register_operand") | |
3888 | (match_operand:SI 2 "immediate_operand")] | |
88f77cba | 3889 | "TARGET_NEON" |
b617fc71 | 3890 | { |
a277dd9b SL |
3891 | emit_insn (gen_neon_vdup_nv2di (operands[0], operands[1])); |
3892 | DONE; | |
3893 | }) | |
88f77cba | 3894 | |
b440f324 RH |
3895 | ; Disabled before reload because we don't want combine doing something silly, |
3896 | ; but used by the post-reload expansion of neon_vcombine. | |
3897 | (define_insn "*neon_vswp<mode>" | |
3898 | [(set (match_operand:VDQX 0 "s_register_operand" "+w") | |
3899 | (match_operand:VDQX 1 "s_register_operand" "+w")) | |
3900 | (set (match_dup 1) (match_dup 0))] | |
3901 | "TARGET_NEON && reload_completed" | |
dc2c7a52 | 3902 | "vswp\t%<V_reg>0, %<V_reg>1" |
f7379e5e | 3903 | [(set_attr "type" "neon_permute<q>")] |
b440f324 RH |
3904 | ) |
3905 | ||
88f77cba JB |
3906 | ;; In this insn, operand 1 should be low, and operand 2 the high part of the |
3907 | ;; dest vector. | |
3908 | ;; FIXME: A different implementation of this builtin could make it much | |
3909 | ;; more likely that we wouldn't actually need to output anything (we could make | |
3910 | ;; it so that the reg allocator puts things in the right places magically | |
3911 | ;; instead). Lack of subregs for vectors makes that tricky though, I think. | |
3912 | ||
b440f324 | 3913 | (define_insn_and_split "neon_vcombine<mode>" |
88f77cba | 3914 | [(set (match_operand:<V_DOUBLE> 0 "s_register_operand" "=w") |
b440f324 RH |
3915 | (vec_concat:<V_DOUBLE> |
3916 | (match_operand:VDX 1 "s_register_operand" "w") | |
3917 | (match_operand:VDX 2 "s_register_operand" "w")))] | |
88f77cba | 3918 | "TARGET_NEON" |
b440f324 RH |
3919 | "#" |
3920 | "&& reload_completed" | |
3921 | [(const_int 0)] | |
88f77cba | 3922 | { |
b440f324 RH |
3923 | neon_split_vcombine (operands); |
3924 | DONE; | |
f7379e5e JG |
3925 | } |
3926 | [(set_attr "type" "multiple")] | |
3927 | ) | |
88f77cba | 3928 | |
ddfd2edf RS |
3929 | (define_expand "neon_vget_high<mode>" |
3930 | [(match_operand:<V_HALF> 0 "s_register_operand") | |
2d22ab64 | 3931 | (match_operand:VQXBF 1 "s_register_operand")] |
a277dd9b SL |
3932 | "TARGET_NEON" |
3933 | { | |
ddfd2edf RS |
3934 | emit_move_insn (operands[0], |
3935 | simplify_gen_subreg (<V_HALF>mode, operands[1], <MODE>mode, | |
3936 | GET_MODE_SIZE (<V_HALF>mode))); | |
3937 | DONE; | |
3938 | }) | |
a277dd9b | 3939 | |
ddfd2edf RS |
3940 | (define_expand "neon_vget_low<mode>" |
3941 | [(match_operand:<V_HALF> 0 "s_register_operand") | |
3942 | (match_operand:VQX 1 "s_register_operand")] | |
88f77cba JB |
3943 | "TARGET_NEON" |
3944 | { | |
ddfd2edf RS |
3945 | emit_move_insn (operands[0], |
3946 | simplify_gen_subreg (<V_HALF>mode, operands[1], | |
3947 | <MODE>mode, 0)); | |
3948 | DONE; | |
3949 | }) | |
88f77cba | 3950 | |
5bf4dcf2 DP |
3951 | (define_insn "float<mode><V_cvtto>2" |
3952 | [(set (match_operand:<V_CVTTO> 0 "s_register_operand" "=w") | |
3953 | (float:<V_CVTTO> (match_operand:VCVTI 1 "s_register_operand" "w")))] | |
3954 | "TARGET_NEON && !flag_rounding_math" | |
3955 | "vcvt.f32.s32\t%<V_reg>0, %<V_reg>1" | |
f7379e5e | 3956 | [(set_attr "type" "neon_int_to_fp_<V_elem_ch><q>")] |
5bf4dcf2 DP |
3957 | ) |
3958 | ||
3959 | (define_insn "floatuns<mode><V_cvtto>2" | |
3960 | [(set (match_operand:<V_CVTTO> 0 "s_register_operand" "=w") | |
3961 | (unsigned_float:<V_CVTTO> (match_operand:VCVTI 1 "s_register_operand" "w")))] | |
3962 | "TARGET_NEON && !flag_rounding_math" | |
3963 | "vcvt.f32.u32\t%<V_reg>0, %<V_reg>1" | |
f7379e5e | 3964 | [(set_attr "type" "neon_int_to_fp_<V_elem_ch><q>")] |
5bf4dcf2 DP |
3965 | ) |
3966 | ||
3967 | (define_insn "fix_trunc<mode><V_cvtto>2" | |
3968 | [(set (match_operand:<V_CVTTO> 0 "s_register_operand" "=w") | |
3969 | (fix:<V_CVTTO> (match_operand:VCVTF 1 "s_register_operand" "w")))] | |
3970 | "TARGET_NEON" | |
3971 | "vcvt.s32.f32\t%<V_reg>0, %<V_reg>1" | |
f7379e5e | 3972 | [(set_attr "type" "neon_fp_to_int_<V_elem_ch><q>")] |
5bf4dcf2 DP |
3973 | ) |
3974 | ||
3975 | (define_insn "fixuns_trunc<mode><V_cvtto>2" | |
3976 | [(set (match_operand:<V_CVTTO> 0 "s_register_operand" "=w") | |
3977 | (unsigned_fix:<V_CVTTO> (match_operand:VCVTF 1 "s_register_operand" "w")))] | |
3978 | "TARGET_NEON" | |
3979 | "vcvt.u32.f32\t%<V_reg>0, %<V_reg>1" | |
f7379e5e | 3980 | [(set_attr "type" "neon_fp_to_int_<V_elem_ch><q>")] |
5bf4dcf2 DP |
3981 | ) |
3982 | ||
94f0f2cc | 3983 | (define_insn "neon_vcvt<sup><mode>" |
88f77cba | 3984 | [(set (match_operand:<V_CVTTO> 0 "s_register_operand" "=w") |
94f0f2cc JG |
3985 | (unspec:<V_CVTTO> [(match_operand:VCVTF 1 "s_register_operand" "w")] |
3986 | VCVT_US))] | |
88f77cba | 3987 | "TARGET_NEON" |
94f0f2cc | 3988 | "vcvt.<sup>%#32.f32\t%<V_reg>0, %<V_reg>1" |
f7379e5e | 3989 | [(set_attr "type" "neon_fp_to_int_<V_elem_ch><q>")] |
c956e102 | 3990 | ) |
88f77cba | 3991 | |
94f0f2cc | 3992 | (define_insn "neon_vcvt<sup><mode>" |
88f77cba | 3993 | [(set (match_operand:<V_CVTTO> 0 "s_register_operand" "=w") |
94f0f2cc JG |
3994 | (unspec:<V_CVTTO> [(match_operand:VCVTI 1 "s_register_operand" "w")] |
3995 | VCVT_US))] | |
88f77cba | 3996 | "TARGET_NEON" |
94f0f2cc | 3997 | "vcvt.f32.<sup>%#32\t%<V_reg>0, %<V_reg>1" |
f7379e5e | 3998 | [(set_attr "type" "neon_int_to_fp_<V_elem_ch><q>")] |
c956e102 | 3999 | ) |
88f77cba | 4000 | |
5819f96f KT |
4001 | (define_insn "neon_vcvtv4sfv4hf" |
4002 | [(set (match_operand:V4SF 0 "s_register_operand" "=w") | |
4003 | (unspec:V4SF [(match_operand:V4HF 1 "s_register_operand" "w")] | |
4004 | UNSPEC_VCVT))] | |
4005 | "TARGET_NEON && TARGET_FP16" | |
4006 | "vcvt.f32.f16\t%q0, %P1" | |
f7379e5e | 4007 | [(set_attr "type" "neon_fp_cvt_widen_h")] |
5819f96f KT |
4008 | ) |
4009 | ||
4010 | (define_insn "neon_vcvtv4hfv4sf" | |
4011 | [(set (match_operand:V4HF 0 "s_register_operand" "=w") | |
4012 | (unspec:V4HF [(match_operand:V4SF 1 "s_register_operand" "w")] | |
4013 | UNSPEC_VCVT))] | |
4014 | "TARGET_NEON && TARGET_FP16" | |
4015 | "vcvt.f16.f32\t%P0, %q1" | |
f7379e5e | 4016 | [(set_attr "type" "neon_fp_cvt_narrow_s_q")] |
5819f96f KT |
4017 | ) |
4018 | ||
55a9b91b MW |
4019 | (define_insn "neon_vcvt<sup><mode>" |
4020 | [(set | |
4021 | (match_operand:<VH_CVTTO> 0 "s_register_operand" "=w") | |
4022 | (unspec:<VH_CVTTO> | |
4023 | [(match_operand:VCVTHI 1 "s_register_operand" "w")] | |
4024 | VCVT_US))] | |
4025 | "TARGET_NEON_FP16INST" | |
4026 | "vcvt.f16.<sup>%#16\t%<V_reg>0, %<V_reg>1" | |
4027 | [(set_attr "type" "neon_int_to_fp_<VH_elem_ch><q>")] | |
4028 | ) | |
4029 | ||
4030 | (define_insn "neon_vcvt<sup><mode>" | |
4031 | [(set | |
4032 | (match_operand:<VH_CVTTO> 0 "s_register_operand" "=w") | |
4033 | (unspec:<VH_CVTTO> | |
4034 | [(match_operand:VH 1 "s_register_operand" "w")] | |
4035 | VCVT_US))] | |
4036 | "TARGET_NEON_FP16INST" | |
4037 | "vcvt.<sup>%#16.f16\t%<V_reg>0, %<V_reg>1" | |
4038 | [(set_attr "type" "neon_fp_to_int_<VH_elem_ch><q>")] | |
4039 | ) | |
4040 | ||
94f0f2cc | 4041 | (define_insn "neon_vcvt<sup>_n<mode>" |
88f77cba JB |
4042 | [(set (match_operand:<V_CVTTO> 0 "s_register_operand" "=w") |
4043 | (unspec:<V_CVTTO> [(match_operand:VCVTF 1 "s_register_operand" "w") | |
94f0f2cc JG |
4044 | (match_operand:SI 2 "immediate_operand" "i")] |
4045 | VCVT_US_N))] | |
88f77cba | 4046 | "TARGET_NEON" |
b617fc71 | 4047 | { |
d57daa0c | 4048 | arm_const_bounds (operands[2], 1, 33); |
94f0f2cc | 4049 | return "vcvt.<sup>%#32.f32\t%<V_reg>0, %<V_reg>1, %2"; |
b617fc71 | 4050 | } |
f7379e5e | 4051 | [(set_attr "type" "neon_fp_to_int_<V_elem_ch><q>")] |
c956e102 | 4052 | ) |
88f77cba | 4053 | |
55a9b91b MW |
4054 | (define_insn "neon_vcvt<sup>_n<mode>" |
4055 | [(set (match_operand:<VH_CVTTO> 0 "s_register_operand" "=w") | |
4056 | (unspec:<VH_CVTTO> | |
4057 | [(match_operand:VH 1 "s_register_operand" "w") | |
4058 | (match_operand:SI 2 "immediate_operand" "i")] | |
4059 | VCVT_US_N))] | |
4060 | "TARGET_NEON_FP16INST" | |
4061 | { | |
d57daa0c | 4062 | arm_const_bounds (operands[2], 0, 17); |
55a9b91b MW |
4063 | return "vcvt.<sup>%#16.f16\t%<V_reg>0, %<V_reg>1, %2"; |
4064 | } | |
4065 | [(set_attr "type" "neon_fp_to_int_<VH_elem_ch><q>")] | |
4066 | ) | |
4067 | ||
94f0f2cc | 4068 | (define_insn "neon_vcvt<sup>_n<mode>" |
88f77cba JB |
4069 | [(set (match_operand:<V_CVTTO> 0 "s_register_operand" "=w") |
4070 | (unspec:<V_CVTTO> [(match_operand:VCVTI 1 "s_register_operand" "w") | |
94f0f2cc JG |
4071 | (match_operand:SI 2 "immediate_operand" "i")] |
4072 | VCVT_US_N))] | |
88f77cba | 4073 | "TARGET_NEON" |
b617fc71 | 4074 | { |
d57daa0c | 4075 | arm_const_bounds (operands[2], 1, 33); |
94f0f2cc | 4076 | return "vcvt.f32.<sup>%#32\t%<V_reg>0, %<V_reg>1, %2"; |
b617fc71 | 4077 | } |
f7379e5e | 4078 | [(set_attr "type" "neon_int_to_fp_<V_elem_ch><q>")] |
c956e102 | 4079 | ) |
88f77cba | 4080 | |
55a9b91b MW |
4081 | (define_insn "neon_vcvt<sup>_n<mode>" |
4082 | [(set (match_operand:<VH_CVTTO> 0 "s_register_operand" "=w") | |
4083 | (unspec:<VH_CVTTO> | |
4084 | [(match_operand:VCVTHI 1 "s_register_operand" "w") | |
4085 | (match_operand:SI 2 "immediate_operand" "i")] | |
4086 | VCVT_US_N))] | |
4087 | "TARGET_NEON_FP16INST" | |
4088 | { | |
d57daa0c | 4089 | arm_const_bounds (operands[2], 0, 17); |
55a9b91b MW |
4090 | return "vcvt.f16.<sup>%#16\t%<V_reg>0, %<V_reg>1, %2"; |
4091 | } | |
4092 | [(set_attr "type" "neon_int_to_fp_<VH_elem_ch><q>")] | |
4093 | ) | |
4094 | ||
4095 | (define_insn "neon_vcvt<vcvth_op><sup><mode>" | |
4096 | [(set | |
4097 | (match_operand:<VH_CVTTO> 0 "s_register_operand" "=w") | |
4098 | (unspec:<VH_CVTTO> | |
4099 | [(match_operand:VH 1 "s_register_operand" "w")] | |
4100 | VCVT_HF_US))] | |
4101 | "TARGET_NEON_FP16INST" | |
4102 | "vcvt<vcvth_op>.<sup>%#16.f16\t%<V_reg>0, %<V_reg>1" | |
4103 | [(set_attr "type" "neon_fp_to_int_<VH_elem_ch><q>")] | |
4104 | ) | |
4105 | ||
88f77cba JB |
4106 | (define_insn "neon_vmovn<mode>" |
4107 | [(set (match_operand:<V_narrow> 0 "s_register_operand" "=w") | |
94f0f2cc | 4108 | (unspec:<V_narrow> [(match_operand:VN 1 "s_register_operand" "w")] |
88f77cba JB |
4109 | UNSPEC_VMOVN))] |
4110 | "TARGET_NEON" | |
c956e102 | 4111 | "vmovn.<V_if_elem>\t%P0, %q1" |
f7379e5e | 4112 | [(set_attr "type" "neon_shift_imm_narrow_q")] |
c956e102 | 4113 | ) |
88f77cba | 4114 | |
94f0f2cc | 4115 | (define_insn "neon_vqmovn<sup><mode>" |
88f77cba | 4116 | [(set (match_operand:<V_narrow> 0 "s_register_operand" "=w") |
94f0f2cc JG |
4117 | (unspec:<V_narrow> [(match_operand:VN 1 "s_register_operand" "w")] |
4118 | VQMOVN))] | |
88f77cba | 4119 | "TARGET_NEON" |
94f0f2cc | 4120 | "vqmovn.<sup>%#<V_sz_elem>\t%P0, %q1" |
f7379e5e | 4121 | [(set_attr "type" "neon_sat_shift_imm_narrow_q")] |
c956e102 | 4122 | ) |
88f77cba JB |
4123 | |
4124 | (define_insn "neon_vqmovun<mode>" | |
4125 | [(set (match_operand:<V_narrow> 0 "s_register_operand" "=w") | |
94f0f2cc | 4126 | (unspec:<V_narrow> [(match_operand:VN 1 "s_register_operand" "w")] |
88f77cba JB |
4127 | UNSPEC_VQMOVUN))] |
4128 | "TARGET_NEON" | |
c956e102 | 4129 | "vqmovun.<V_s_elem>\t%P0, %q1" |
f7379e5e | 4130 | [(set_attr "type" "neon_sat_shift_imm_narrow_q")] |
c956e102 | 4131 | ) |
88f77cba | 4132 | |
94f0f2cc | 4133 | (define_insn "neon_vmovl<sup><mode>" |
88f77cba | 4134 | [(set (match_operand:<V_widen> 0 "s_register_operand" "=w") |
94f0f2cc JG |
4135 | (unspec:<V_widen> [(match_operand:VW 1 "s_register_operand" "w")] |
4136 | VMOVL))] | |
88f77cba | 4137 | "TARGET_NEON" |
94f0f2cc | 4138 | "vmovl.<sup>%#<V_sz_elem>\t%q0, %P1" |
f7379e5e | 4139 | [(set_attr "type" "neon_shift_imm_long")] |
c956e102 | 4140 | ) |
88f77cba JB |
4141 | |
4142 | (define_insn "neon_vmul_lane<mode>" | |
4143 | [(set (match_operand:VMD 0 "s_register_operand" "=w") | |
4144 | (unspec:VMD [(match_operand:VMD 1 "s_register_operand" "w") | |
4145 | (match_operand:VMD 2 "s_register_operand" | |
4146 | "<scalar_mul_constraint>") | |
94f0f2cc | 4147 | (match_operand:SI 3 "immediate_operand" "i")] |
88f77cba JB |
4148 | UNSPEC_VMUL_LANE))] |
4149 | "TARGET_NEON" | |
b617fc71 | 4150 | { |
b617fc71 JB |
4151 | return "vmul.<V_if_elem>\t%P0, %P1, %P2[%c3]"; |
4152 | } | |
003bb7f3 | 4153 | [(set (attr "type") |
b75b1be2 | 4154 | (if_then_else (match_test "<Is_float_mode>") |
f7379e5e JG |
4155 | (const_string "neon_fp_mul_s_scalar<q>") |
4156 | (const_string "neon_mul_<V_elem_ch>_scalar<q>")))] | |
c956e102 | 4157 | ) |
88f77cba JB |
4158 | |
4159 | (define_insn "neon_vmul_lane<mode>" | |
4160 | [(set (match_operand:VMQ 0 "s_register_operand" "=w") | |
4161 | (unspec:VMQ [(match_operand:VMQ 1 "s_register_operand" "w") | |
4162 | (match_operand:<V_HALF> 2 "s_register_operand" | |
4163 | "<scalar_mul_constraint>") | |
94f0f2cc | 4164 | (match_operand:SI 3 "immediate_operand" "i")] |
88f77cba JB |
4165 | UNSPEC_VMUL_LANE))] |
4166 | "TARGET_NEON" | |
b617fc71 | 4167 | { |
b617fc71 JB |
4168 | return "vmul.<V_if_elem>\t%q0, %q1, %P2[%c3]"; |
4169 | } | |
003bb7f3 | 4170 | [(set (attr "type") |
b75b1be2 | 4171 | (if_then_else (match_test "<Is_float_mode>") |
f7379e5e JG |
4172 | (const_string "neon_fp_mul_s_scalar<q>") |
4173 | (const_string "neon_mul_<V_elem_ch>_scalar<q>")))] | |
c956e102 | 4174 | ) |
88f77cba | 4175 | |
55a9b91b MW |
4176 | (define_insn "neon_vmul_lane<mode>" |
4177 | [(set (match_operand:VH 0 "s_register_operand" "=w") | |
4178 | (unspec:VH [(match_operand:VH 1 "s_register_operand" "w") | |
4179 | (match_operand:V4HF 2 "s_register_operand" | |
4180 | "<scalar_mul_constraint>") | |
4181 | (match_operand:SI 3 "immediate_operand" "i")] | |
4182 | UNSPEC_VMUL_LANE))] | |
4183 | "TARGET_NEON_FP16INST" | |
4184 | "vmul.f16\t%<V_reg>0, %<V_reg>1, %P2[%c3]" | |
4185 | [(set_attr "type" "neon_fp_mul_s_scalar<q>")] | |
4186 | ) | |
4187 | ||
94f0f2cc | 4188 | (define_insn "neon_vmull<sup>_lane<mode>" |
88f77cba JB |
4189 | [(set (match_operand:<V_widen> 0 "s_register_operand" "=w") |
4190 | (unspec:<V_widen> [(match_operand:VMDI 1 "s_register_operand" "w") | |
4191 | (match_operand:VMDI 2 "s_register_operand" | |
4192 | "<scalar_mul_constraint>") | |
94f0f2cc JG |
4193 | (match_operand:SI 3 "immediate_operand" "i")] |
4194 | VMULL_LANE))] | |
88f77cba | 4195 | "TARGET_NEON" |
b617fc71 | 4196 | { |
94f0f2cc | 4197 | return "vmull.<sup>%#<V_sz_elem>\t%q0, %P1, %P2[%c3]"; |
b617fc71 | 4198 | } |
f7379e5e | 4199 | [(set_attr "type" "neon_mul_<V_elem_ch>_scalar_long")] |
c956e102 | 4200 | ) |
88f77cba JB |
4201 | |
4202 | (define_insn "neon_vqdmull_lane<mode>" | |
4203 | [(set (match_operand:<V_widen> 0 "s_register_operand" "=w") | |
4204 | (unspec:<V_widen> [(match_operand:VMDI 1 "s_register_operand" "w") | |
4205 | (match_operand:VMDI 2 "s_register_operand" | |
4206 | "<scalar_mul_constraint>") | |
94f0f2cc | 4207 | (match_operand:SI 3 "immediate_operand" "i")] |
88f77cba JB |
4208 | UNSPEC_VQDMULL_LANE))] |
4209 | "TARGET_NEON" | |
b617fc71 | 4210 | { |
b617fc71 JB |
4211 | return "vqdmull.<V_s_elem>\t%q0, %P1, %P2[%c3]"; |
4212 | } | |
f7379e5e | 4213 | [(set_attr "type" "neon_sat_mul_<V_elem_ch>_scalar_long")] |
c956e102 | 4214 | ) |
88f77cba | 4215 | |
94f0f2cc | 4216 | (define_insn "neon_vq<r>dmulh_lane<mode>" |
88f77cba JB |
4217 | [(set (match_operand:VMQI 0 "s_register_operand" "=w") |
4218 | (unspec:VMQI [(match_operand:VMQI 1 "s_register_operand" "w") | |
4219 | (match_operand:<V_HALF> 2 "s_register_operand" | |
4220 | "<scalar_mul_constraint>") | |
94f0f2cc JG |
4221 | (match_operand:SI 3 "immediate_operand" "i")] |
4222 | VQDMULH_LANE))] | |
88f77cba | 4223 | "TARGET_NEON" |
b617fc71 | 4224 | { |
94f0f2cc | 4225 | return "vq<r>dmulh.<V_s_elem>\t%q0, %q1, %P2[%c3]"; |
b617fc71 | 4226 | } |
f7379e5e | 4227 | [(set_attr "type" "neon_sat_mul_<V_elem_ch>_scalar_q")] |
c956e102 | 4228 | ) |
88f77cba | 4229 | |
94f0f2cc | 4230 | (define_insn "neon_vq<r>dmulh_lane<mode>" |
88f77cba JB |
4231 | [(set (match_operand:VMDI 0 "s_register_operand" "=w") |
4232 | (unspec:VMDI [(match_operand:VMDI 1 "s_register_operand" "w") | |
4233 | (match_operand:VMDI 2 "s_register_operand" | |
4234 | "<scalar_mul_constraint>") | |
94f0f2cc JG |
4235 | (match_operand:SI 3 "immediate_operand" "i")] |
4236 | VQDMULH_LANE))] | |
88f77cba | 4237 | "TARGET_NEON" |
b617fc71 | 4238 | { |
94f0f2cc | 4239 | return "vq<r>dmulh.<V_s_elem>\t%P0, %P1, %P2[%c3]"; |
b617fc71 | 4240 | } |
f7379e5e | 4241 | [(set_attr "type" "neon_sat_mul_<V_elem_ch>_scalar_q")] |
c956e102 | 4242 | ) |
88f77cba | 4243 | |
5f2ca3b2 MW |
4244 | ;; vqrdmlah_lane, vqrdmlsh_lane |
4245 | (define_insn "neon_vqrdml<VQRDMLH_AS:neon_rdma_as>h_lane<mode>" | |
4246 | [(set (match_operand:VMQI 0 "s_register_operand" "=w") | |
4247 | (unspec:VMQI [(match_operand:VMQI 1 "s_register_operand" "0") | |
4248 | (match_operand:VMQI 2 "s_register_operand" "w") | |
4249 | (match_operand:<V_HALF> 3 "s_register_operand" | |
4250 | "<scalar_mul_constraint>") | |
4251 | (match_operand:SI 4 "immediate_operand" "i")] | |
4252 | VQRDMLH_AS))] | |
4253 | "TARGET_NEON_RDMA" | |
4254 | { | |
4255 | return | |
4256 | "vqrdml<VQRDMLH_AS:neon_rdma_as>h.<V_s_elem>\t%q0, %q2, %P3[%c4]"; | |
4257 | } | |
4258 | [(set_attr "type" "neon_mla_<V_elem_ch>_scalar<q>")] | |
4259 | ) | |
4260 | ||
4261 | (define_insn "neon_vqrdml<VQRDMLH_AS:neon_rdma_as>h_lane<mode>" | |
4262 | [(set (match_operand:VMDI 0 "s_register_operand" "=w") | |
4263 | (unspec:VMDI [(match_operand:VMDI 1 "s_register_operand" "0") | |
4264 | (match_operand:VMDI 2 "s_register_operand" "w") | |
4265 | (match_operand:VMDI 3 "s_register_operand" | |
4266 | "<scalar_mul_constraint>") | |
4267 | (match_operand:SI 4 "immediate_operand" "i")] | |
4268 | VQRDMLH_AS))] | |
4269 | "TARGET_NEON_RDMA" | |
4270 | { | |
4271 | return | |
4272 | "vqrdml<VQRDMLH_AS:neon_rdma_as>h.<V_s_elem>\t%P0, %P2, %P3[%c4]"; | |
4273 | } | |
4274 | [(set_attr "type" "neon_mla_<V_elem_ch>_scalar")] | |
4275 | ) | |
4276 | ||
88f77cba JB |
4277 | (define_insn "neon_vmla_lane<mode>" |
4278 | [(set (match_operand:VMD 0 "s_register_operand" "=w") | |
4279 | (unspec:VMD [(match_operand:VMD 1 "s_register_operand" "0") | |
4280 | (match_operand:VMD 2 "s_register_operand" "w") | |
4281 | (match_operand:VMD 3 "s_register_operand" | |
4282 | "<scalar_mul_constraint>") | |
94f0f2cc | 4283 | (match_operand:SI 4 "immediate_operand" "i")] |
88f77cba JB |
4284 | UNSPEC_VMLA_LANE))] |
4285 | "TARGET_NEON" | |
b617fc71 | 4286 | { |
b617fc71 JB |
4287 | return "vmla.<V_if_elem>\t%P0, %P2, %P3[%c4]"; |
4288 | } | |
003bb7f3 | 4289 | [(set (attr "type") |
b75b1be2 | 4290 | (if_then_else (match_test "<Is_float_mode>") |
f7379e5e JG |
4291 | (const_string "neon_fp_mla_s_scalar<q>") |
4292 | (const_string "neon_mla_<V_elem_ch>_scalar<q>")))] | |
c956e102 | 4293 | ) |
88f77cba JB |
4294 | |
4295 | (define_insn "neon_vmla_lane<mode>" | |
4296 | [(set (match_operand:VMQ 0 "s_register_operand" "=w") | |
4297 | (unspec:VMQ [(match_operand:VMQ 1 "s_register_operand" "0") | |
4298 | (match_operand:VMQ 2 "s_register_operand" "w") | |
4299 | (match_operand:<V_HALF> 3 "s_register_operand" | |
4300 | "<scalar_mul_constraint>") | |
94f0f2cc | 4301 | (match_operand:SI 4 "immediate_operand" "i")] |
88f77cba JB |
4302 | UNSPEC_VMLA_LANE))] |
4303 | "TARGET_NEON" | |
b617fc71 | 4304 | { |
b617fc71 JB |
4305 | return "vmla.<V_if_elem>\t%q0, %q2, %P3[%c4]"; |
4306 | } | |
003bb7f3 | 4307 | [(set (attr "type") |
b75b1be2 | 4308 | (if_then_else (match_test "<Is_float_mode>") |
f7379e5e JG |
4309 | (const_string "neon_fp_mla_s_scalar<q>") |
4310 | (const_string "neon_mla_<V_elem_ch>_scalar<q>")))] | |
c956e102 | 4311 | ) |
88f77cba | 4312 | |
94f0f2cc | 4313 | (define_insn "neon_vmlal<sup>_lane<mode>" |
88f77cba JB |
4314 | [(set (match_operand:<V_widen> 0 "s_register_operand" "=w") |
4315 | (unspec:<V_widen> [(match_operand:<V_widen> 1 "s_register_operand" "0") | |
4316 | (match_operand:VMDI 2 "s_register_operand" "w") | |
4317 | (match_operand:VMDI 3 "s_register_operand" | |
4318 | "<scalar_mul_constraint>") | |
94f0f2cc JG |
4319 | (match_operand:SI 4 "immediate_operand" "i")] |
4320 | VMLAL_LANE))] | |
88f77cba | 4321 | "TARGET_NEON" |
b617fc71 | 4322 | { |
94f0f2cc | 4323 | return "vmlal.<sup>%#<V_sz_elem>\t%q0, %P2, %P3[%c4]"; |
b617fc71 | 4324 | } |
f7379e5e | 4325 | [(set_attr "type" "neon_mla_<V_elem_ch>_scalar_long")] |
c956e102 | 4326 | ) |
88f77cba JB |
4327 | |
4328 | (define_insn "neon_vqdmlal_lane<mode>" | |
4329 | [(set (match_operand:<V_widen> 0 "s_register_operand" "=w") | |
4330 | (unspec:<V_widen> [(match_operand:<V_widen> 1 "s_register_operand" "0") | |
4331 | (match_operand:VMDI 2 "s_register_operand" "w") | |
4332 | (match_operand:VMDI 3 "s_register_operand" | |
4333 | "<scalar_mul_constraint>") | |
94f0f2cc | 4334 | (match_operand:SI 4 "immediate_operand" "i")] |
88f77cba JB |
4335 | UNSPEC_VQDMLAL_LANE))] |
4336 | "TARGET_NEON" | |
b617fc71 | 4337 | { |
b617fc71 JB |
4338 | return "vqdmlal.<V_s_elem>\t%q0, %P2, %P3[%c4]"; |
4339 | } | |
f7379e5e | 4340 | [(set_attr "type" "neon_sat_mla_<V_elem_ch>_scalar_long")] |
c956e102 | 4341 | ) |
88f77cba JB |
4342 | |
4343 | (define_insn "neon_vmls_lane<mode>" | |
4344 | [(set (match_operand:VMD 0 "s_register_operand" "=w") | |
4345 | (unspec:VMD [(match_operand:VMD 1 "s_register_operand" "0") | |
4346 | (match_operand:VMD 2 "s_register_operand" "w") | |
4347 | (match_operand:VMD 3 "s_register_operand" | |
4348 | "<scalar_mul_constraint>") | |
94f0f2cc | 4349 | (match_operand:SI 4 "immediate_operand" "i")] |
88f77cba JB |
4350 | UNSPEC_VMLS_LANE))] |
4351 | "TARGET_NEON" | |
b617fc71 | 4352 | { |
b617fc71 JB |
4353 | return "vmls.<V_if_elem>\t%P0, %P2, %P3[%c4]"; |
4354 | } | |
003bb7f3 | 4355 | [(set (attr "type") |
b75b1be2 | 4356 | (if_then_else (match_test "<Is_float_mode>") |
f7379e5e JG |
4357 | (const_string "neon_fp_mla_s_scalar<q>") |
4358 | (const_string "neon_mla_<V_elem_ch>_scalar<q>")))] | |
c956e102 | 4359 | ) |
88f77cba JB |
4360 | |
4361 | (define_insn "neon_vmls_lane<mode>" | |
4362 | [(set (match_operand:VMQ 0 "s_register_operand" "=w") | |
4363 | (unspec:VMQ [(match_operand:VMQ 1 "s_register_operand" "0") | |
4364 | (match_operand:VMQ 2 "s_register_operand" "w") | |
4365 | (match_operand:<V_HALF> 3 "s_register_operand" | |
4366 | "<scalar_mul_constraint>") | |
94f0f2cc | 4367 | (match_operand:SI 4 "immediate_operand" "i")] |
88f77cba JB |
4368 | UNSPEC_VMLS_LANE))] |
4369 | "TARGET_NEON" | |
b617fc71 | 4370 | { |
b617fc71 JB |
4371 | return "vmls.<V_if_elem>\t%q0, %q2, %P3[%c4]"; |
4372 | } | |
003bb7f3 | 4373 | [(set (attr "type") |
b75b1be2 | 4374 | (if_then_else (match_test "<Is_float_mode>") |
f7379e5e JG |
4375 | (const_string "neon_fp_mla_s_scalar<q>") |
4376 | (const_string "neon_mla_<V_elem_ch>_scalar<q>")))] | |
c956e102 | 4377 | ) |
88f77cba | 4378 | |
94f0f2cc | 4379 | (define_insn "neon_vmlsl<sup>_lane<mode>" |
88f77cba JB |
4380 | [(set (match_operand:<V_widen> 0 "s_register_operand" "=w") |
4381 | (unspec:<V_widen> [(match_operand:<V_widen> 1 "s_register_operand" "0") | |
4382 | (match_operand:VMDI 2 "s_register_operand" "w") | |
4383 | (match_operand:VMDI 3 "s_register_operand" | |
4384 | "<scalar_mul_constraint>") | |
94f0f2cc JG |
4385 | (match_operand:SI 4 "immediate_operand" "i")] |
4386 | VMLSL_LANE))] | |
88f77cba | 4387 | "TARGET_NEON" |
b617fc71 | 4388 | { |
94f0f2cc | 4389 | return "vmlsl.<sup>%#<V_sz_elem>\t%q0, %P2, %P3[%c4]"; |
b617fc71 | 4390 | } |
f7379e5e | 4391 | [(set_attr "type" "neon_mla_<V_elem_ch>_scalar_long")] |
c956e102 | 4392 | ) |
88f77cba JB |
4393 | |
4394 | (define_insn "neon_vqdmlsl_lane<mode>" | |
4395 | [(set (match_operand:<V_widen> 0 "s_register_operand" "=w") | |
4396 | (unspec:<V_widen> [(match_operand:<V_widen> 1 "s_register_operand" "0") | |
4397 | (match_operand:VMDI 2 "s_register_operand" "w") | |
4398 | (match_operand:VMDI 3 "s_register_operand" | |
4399 | "<scalar_mul_constraint>") | |
94f0f2cc | 4400 | (match_operand:SI 4 "immediate_operand" "i")] |
88f77cba JB |
4401 | UNSPEC_VQDMLSL_LANE))] |
4402 | "TARGET_NEON" | |
b617fc71 | 4403 | { |
b617fc71 JB |
4404 | return "vqdmlsl.<V_s_elem>\t%q0, %P2, %P3[%c4]"; |
4405 | } | |
f7379e5e | 4406 | [(set_attr "type" "neon_sat_mla_<V_elem_ch>_scalar_long")] |
c956e102 | 4407 | ) |
88f77cba JB |
4408 | |
4409 | ; FIXME: For the "_n" multiply/multiply-accumulate insns, we copy a value in a | |
4410 | ; core register into a temp register, then use a scalar taken from that. This | |
4411 | ; isn't an optimal solution if e.g. the scalar has just been read from memory | |
4412 | ; or extracted from another vector. The latter case it's currently better to | |
4413 | ; use the "_lane" variant, and the former case can probably be implemented | |
4414 | ; using vld1_lane, but that hasn't been done yet. | |
4415 | ||
4416 | (define_expand "neon_vmul_n<mode>" | |
cd65e265 DZ |
4417 | [(match_operand:VMD 0 "s_register_operand") |
4418 | (match_operand:VMD 1 "s_register_operand") | |
4419 | (match_operand:<V_elem> 2 "s_register_operand")] | |
88f77cba JB |
4420 | "TARGET_NEON" |
4421 | { | |
4422 | rtx tmp = gen_reg_rtx (<MODE>mode); | |
4423 | emit_insn (gen_neon_vset_lane<mode> (tmp, operands[2], tmp, const0_rtx)); | |
4424 | emit_insn (gen_neon_vmul_lane<mode> (operands[0], operands[1], tmp, | |
94f0f2cc | 4425 | const0_rtx)); |
88f77cba JB |
4426 | DONE; |
4427 | }) | |
4428 | ||
4429 | (define_expand "neon_vmul_n<mode>" | |
cd65e265 DZ |
4430 | [(match_operand:VMQ 0 "s_register_operand") |
4431 | (match_operand:VMQ 1 "s_register_operand") | |
4432 | (match_operand:<V_elem> 2 "s_register_operand")] | |
88f77cba JB |
4433 | "TARGET_NEON" |
4434 | { | |
4435 | rtx tmp = gen_reg_rtx (<V_HALF>mode); | |
4436 | emit_insn (gen_neon_vset_lane<V_half> (tmp, operands[2], tmp, const0_rtx)); | |
4437 | emit_insn (gen_neon_vmul_lane<mode> (operands[0], operands[1], tmp, | |
94f0f2cc | 4438 | const0_rtx)); |
88f77cba JB |
4439 | DONE; |
4440 | }) | |
4441 | ||
55a9b91b MW |
4442 | (define_expand "neon_vmul_n<mode>" |
4443 | [(match_operand:VH 0 "s_register_operand") | |
4444 | (match_operand:VH 1 "s_register_operand") | |
4445 | (match_operand:<V_elem> 2 "s_register_operand")] | |
4446 | "TARGET_NEON_FP16INST" | |
4447 | { | |
4448 | rtx tmp = gen_reg_rtx (V4HFmode); | |
4449 | emit_insn (gen_neon_vset_lanev4hf (tmp, operands[2], tmp, const0_rtx)); | |
4450 | emit_insn (gen_neon_vmul_lane<mode> (operands[0], operands[1], tmp, | |
4451 | const0_rtx)); | |
4452 | DONE; | |
4453 | }) | |
4454 | ||
94f0f2cc | 4455 | (define_expand "neon_vmulls_n<mode>" |
cd65e265 DZ |
4456 | [(match_operand:<V_widen> 0 "s_register_operand") |
4457 | (match_operand:VMDI 1 "s_register_operand") | |
4458 | (match_operand:<V_elem> 2 "s_register_operand")] | |
88f77cba JB |
4459 | "TARGET_NEON" |
4460 | { | |
4461 | rtx tmp = gen_reg_rtx (<MODE>mode); | |
4462 | emit_insn (gen_neon_vset_lane<mode> (tmp, operands[2], tmp, const0_rtx)); | |
94f0f2cc JG |
4463 | emit_insn (gen_neon_vmulls_lane<mode> (operands[0], operands[1], tmp, |
4464 | const0_rtx)); | |
4465 | DONE; | |
4466 | }) | |
4467 | ||
4468 | (define_expand "neon_vmullu_n<mode>" | |
cd65e265 DZ |
4469 | [(match_operand:<V_widen> 0 "s_register_operand") |
4470 | (match_operand:VMDI 1 "s_register_operand") | |
4471 | (match_operand:<V_elem> 2 "s_register_operand")] | |
94f0f2cc JG |
4472 | "TARGET_NEON" |
4473 | { | |
4474 | rtx tmp = gen_reg_rtx (<MODE>mode); | |
4475 | emit_insn (gen_neon_vset_lane<mode> (tmp, operands[2], tmp, const0_rtx)); | |
4476 | emit_insn (gen_neon_vmullu_lane<mode> (operands[0], operands[1], tmp, | |
4477 | const0_rtx)); | |
88f77cba JB |
4478 | DONE; |
4479 | }) | |
4480 | ||
4481 | (define_expand "neon_vqdmull_n<mode>" | |
cd65e265 DZ |
4482 | [(match_operand:<V_widen> 0 "s_register_operand") |
4483 | (match_operand:VMDI 1 "s_register_operand") | |
4484 | (match_operand:<V_elem> 2 "s_register_operand")] | |
88f77cba JB |
4485 | "TARGET_NEON" |
4486 | { | |
4487 | rtx tmp = gen_reg_rtx (<MODE>mode); | |
4488 | emit_insn (gen_neon_vset_lane<mode> (tmp, operands[2], tmp, const0_rtx)); | |
4489 | emit_insn (gen_neon_vqdmull_lane<mode> (operands[0], operands[1], tmp, | |
94f0f2cc | 4490 | const0_rtx)); |
88f77cba JB |
4491 | DONE; |
4492 | }) | |
4493 | ||
4494 | (define_expand "neon_vqdmulh_n<mode>" | |
cd65e265 DZ |
4495 | [(match_operand:VMDI 0 "s_register_operand") |
4496 | (match_operand:VMDI 1 "s_register_operand") | |
4497 | (match_operand:<V_elem> 2 "s_register_operand")] | |
88f77cba JB |
4498 | "TARGET_NEON" |
4499 | { | |
4500 | rtx tmp = gen_reg_rtx (<MODE>mode); | |
4501 | emit_insn (gen_neon_vset_lane<mode> (tmp, operands[2], tmp, const0_rtx)); | |
4502 | emit_insn (gen_neon_vqdmulh_lane<mode> (operands[0], operands[1], tmp, | |
94f0f2cc JG |
4503 | const0_rtx)); |
4504 | DONE; | |
4505 | }) | |
4506 | ||
4507 | (define_expand "neon_vqrdmulh_n<mode>" | |
cd65e265 DZ |
4508 | [(match_operand:VMDI 0 "s_register_operand") |
4509 | (match_operand:VMDI 1 "s_register_operand") | |
4510 | (match_operand:<V_elem> 2 "s_register_operand")] | |
94f0f2cc JG |
4511 | "TARGET_NEON" |
4512 | { | |
4513 | rtx tmp = gen_reg_rtx (<MODE>mode); | |
4514 | emit_insn (gen_neon_vset_lane<mode> (tmp, operands[2], tmp, const0_rtx)); | |
4515 | emit_insn (gen_neon_vqrdmulh_lane<mode> (operands[0], operands[1], tmp, | |
4516 | const0_rtx)); | |
88f77cba JB |
4517 | DONE; |
4518 | }) | |
4519 | ||
4520 | (define_expand "neon_vqdmulh_n<mode>" | |
cd65e265 DZ |
4521 | [(match_operand:VMQI 0 "s_register_operand") |
4522 | (match_operand:VMQI 1 "s_register_operand") | |
4523 | (match_operand:<V_elem> 2 "s_register_operand")] | |
88f77cba JB |
4524 | "TARGET_NEON" |
4525 | { | |
4526 | rtx tmp = gen_reg_rtx (<V_HALF>mode); | |
4527 | emit_insn (gen_neon_vset_lane<V_half> (tmp, operands[2], tmp, const0_rtx)); | |
4528 | emit_insn (gen_neon_vqdmulh_lane<mode> (operands[0], operands[1], tmp, | |
94f0f2cc JG |
4529 | const0_rtx)); |
4530 | DONE; | |
4531 | }) | |
4532 | ||
4533 | (define_expand "neon_vqrdmulh_n<mode>" | |
cd65e265 DZ |
4534 | [(match_operand:VMQI 0 "s_register_operand") |
4535 | (match_operand:VMQI 1 "s_register_operand") | |
4536 | (match_operand:<V_elem> 2 "s_register_operand")] | |
94f0f2cc JG |
4537 | "TARGET_NEON" |
4538 | { | |
4539 | rtx tmp = gen_reg_rtx (<V_HALF>mode); | |
4540 | emit_insn (gen_neon_vset_lane<V_half> (tmp, operands[2], tmp, const0_rtx)); | |
4541 | emit_insn (gen_neon_vqrdmulh_lane<mode> (operands[0], operands[1], tmp, | |
4542 | const0_rtx)); | |
88f77cba JB |
4543 | DONE; |
4544 | }) | |
4545 | ||
4546 | (define_expand "neon_vmla_n<mode>" | |
cd65e265 DZ |
4547 | [(match_operand:VMD 0 "s_register_operand") |
4548 | (match_operand:VMD 1 "s_register_operand") | |
4549 | (match_operand:VMD 2 "s_register_operand") | |
4550 | (match_operand:<V_elem> 3 "s_register_operand")] | |
88f77cba JB |
4551 | "TARGET_NEON" |
4552 | { | |
4553 | rtx tmp = gen_reg_rtx (<MODE>mode); | |
4554 | emit_insn (gen_neon_vset_lane<mode> (tmp, operands[3], tmp, const0_rtx)); | |
4555 | emit_insn (gen_neon_vmla_lane<mode> (operands[0], operands[1], operands[2], | |
94f0f2cc | 4556 | tmp, const0_rtx)); |
88f77cba JB |
4557 | DONE; |
4558 | }) | |
4559 | ||
4560 | (define_expand "neon_vmla_n<mode>" | |
cd65e265 DZ |
4561 | [(match_operand:VMQ 0 "s_register_operand") |
4562 | (match_operand:VMQ 1 "s_register_operand") | |
4563 | (match_operand:VMQ 2 "s_register_operand") | |
4564 | (match_operand:<V_elem> 3 "s_register_operand")] | |
88f77cba JB |
4565 | "TARGET_NEON" |
4566 | { | |
4567 | rtx tmp = gen_reg_rtx (<V_HALF>mode); | |
4568 | emit_insn (gen_neon_vset_lane<V_half> (tmp, operands[3], tmp, const0_rtx)); | |
4569 | emit_insn (gen_neon_vmla_lane<mode> (operands[0], operands[1], operands[2], | |
94f0f2cc | 4570 | tmp, const0_rtx)); |
88f77cba JB |
4571 | DONE; |
4572 | }) | |
4573 | ||
94f0f2cc | 4574 | (define_expand "neon_vmlals_n<mode>" |
cd65e265 DZ |
4575 | [(match_operand:<V_widen> 0 "s_register_operand") |
4576 | (match_operand:<V_widen> 1 "s_register_operand") | |
4577 | (match_operand:VMDI 2 "s_register_operand") | |
4578 | (match_operand:<V_elem> 3 "s_register_operand")] | |
88f77cba JB |
4579 | "TARGET_NEON" |
4580 | { | |
4581 | rtx tmp = gen_reg_rtx (<MODE>mode); | |
4582 | emit_insn (gen_neon_vset_lane<mode> (tmp, operands[3], tmp, const0_rtx)); | |
94f0f2cc JG |
4583 | emit_insn (gen_neon_vmlals_lane<mode> (operands[0], operands[1], operands[2], |
4584 | tmp, const0_rtx)); | |
4585 | DONE; | |
4586 | }) | |
4587 | ||
4588 | (define_expand "neon_vmlalu_n<mode>" | |
cd65e265 DZ |
4589 | [(match_operand:<V_widen> 0 "s_register_operand") |
4590 | (match_operand:<V_widen> 1 "s_register_operand") | |
4591 | (match_operand:VMDI 2 "s_register_operand") | |
4592 | (match_operand:<V_elem> 3 "s_register_operand")] | |
94f0f2cc JG |
4593 | "TARGET_NEON" |
4594 | { | |
4595 | rtx tmp = gen_reg_rtx (<MODE>mode); | |
4596 | emit_insn (gen_neon_vset_lane<mode> (tmp, operands[3], tmp, const0_rtx)); | |
4597 | emit_insn (gen_neon_vmlalu_lane<mode> (operands[0], operands[1], operands[2], | |
4598 | tmp, const0_rtx)); | |
88f77cba JB |
4599 | DONE; |
4600 | }) | |
4601 | ||
4602 | (define_expand "neon_vqdmlal_n<mode>" | |
cd65e265 DZ |
4603 | [(match_operand:<V_widen> 0 "s_register_operand") |
4604 | (match_operand:<V_widen> 1 "s_register_operand") | |
4605 | (match_operand:VMDI 2 "s_register_operand") | |
4606 | (match_operand:<V_elem> 3 "s_register_operand")] | |
88f77cba JB |
4607 | "TARGET_NEON" |
4608 | { | |
4609 | rtx tmp = gen_reg_rtx (<MODE>mode); | |
4610 | emit_insn (gen_neon_vset_lane<mode> (tmp, operands[3], tmp, const0_rtx)); | |
4611 | emit_insn (gen_neon_vqdmlal_lane<mode> (operands[0], operands[1], operands[2], | |
94f0f2cc | 4612 | tmp, const0_rtx)); |
88f77cba JB |
4613 | DONE; |
4614 | }) | |
4615 | ||
4616 | (define_expand "neon_vmls_n<mode>" | |
cd65e265 DZ |
4617 | [(match_operand:VMD 0 "s_register_operand") |
4618 | (match_operand:VMD 1 "s_register_operand") | |
4619 | (match_operand:VMD 2 "s_register_operand") | |
4620 | (match_operand:<V_elem> 3 "s_register_operand")] | |
88f77cba JB |
4621 | "TARGET_NEON" |
4622 | { | |
4623 | rtx tmp = gen_reg_rtx (<MODE>mode); | |
4624 | emit_insn (gen_neon_vset_lane<mode> (tmp, operands[3], tmp, const0_rtx)); | |
4625 | emit_insn (gen_neon_vmls_lane<mode> (operands[0], operands[1], operands[2], | |
94f0f2cc | 4626 | tmp, const0_rtx)); |
88f77cba JB |
4627 | DONE; |
4628 | }) | |
4629 | ||
4630 | (define_expand "neon_vmls_n<mode>" | |
cd65e265 DZ |
4631 | [(match_operand:VMQ 0 "s_register_operand") |
4632 | (match_operand:VMQ 1 "s_register_operand") | |
4633 | (match_operand:VMQ 2 "s_register_operand") | |
4634 | (match_operand:<V_elem> 3 "s_register_operand")] | |
88f77cba JB |
4635 | "TARGET_NEON" |
4636 | { | |
4637 | rtx tmp = gen_reg_rtx (<V_HALF>mode); | |
4638 | emit_insn (gen_neon_vset_lane<V_half> (tmp, operands[3], tmp, const0_rtx)); | |
4639 | emit_insn (gen_neon_vmls_lane<mode> (operands[0], operands[1], operands[2], | |
94f0f2cc JG |
4640 | tmp, const0_rtx)); |
4641 | DONE; | |
4642 | }) | |
4643 | ||
4644 | (define_expand "neon_vmlsls_n<mode>" | |
cd65e265 DZ |
4645 | [(match_operand:<V_widen> 0 "s_register_operand") |
4646 | (match_operand:<V_widen> 1 "s_register_operand") | |
4647 | (match_operand:VMDI 2 "s_register_operand") | |
4648 | (match_operand:<V_elem> 3 "s_register_operand")] | |
94f0f2cc JG |
4649 | "TARGET_NEON" |
4650 | { | |
4651 | rtx tmp = gen_reg_rtx (<MODE>mode); | |
4652 | emit_insn (gen_neon_vset_lane<mode> (tmp, operands[3], tmp, const0_rtx)); | |
4653 | emit_insn (gen_neon_vmlsls_lane<mode> (operands[0], operands[1], operands[2], | |
4654 | tmp, const0_rtx)); | |
88f77cba JB |
4655 | DONE; |
4656 | }) | |
4657 | ||
94f0f2cc | 4658 | (define_expand "neon_vmlslu_n<mode>" |
cd65e265 DZ |
4659 | [(match_operand:<V_widen> 0 "s_register_operand") |
4660 | (match_operand:<V_widen> 1 "s_register_operand") | |
4661 | (match_operand:VMDI 2 "s_register_operand") | |
4662 | (match_operand:<V_elem> 3 "s_register_operand")] | |
88f77cba JB |
4663 | "TARGET_NEON" |
4664 | { | |
4665 | rtx tmp = gen_reg_rtx (<MODE>mode); | |
4666 | emit_insn (gen_neon_vset_lane<mode> (tmp, operands[3], tmp, const0_rtx)); | |
94f0f2cc JG |
4667 | emit_insn (gen_neon_vmlslu_lane<mode> (operands[0], operands[1], operands[2], |
4668 | tmp, const0_rtx)); | |
88f77cba JB |
4669 | DONE; |
4670 | }) | |
4671 | ||
4672 | (define_expand "neon_vqdmlsl_n<mode>" | |
cd65e265 DZ |
4673 | [(match_operand:<V_widen> 0 "s_register_operand") |
4674 | (match_operand:<V_widen> 1 "s_register_operand") | |
4675 | (match_operand:VMDI 2 "s_register_operand") | |
4676 | (match_operand:<V_elem> 3 "s_register_operand")] | |
88f77cba JB |
4677 | "TARGET_NEON" |
4678 | { | |
4679 | rtx tmp = gen_reg_rtx (<MODE>mode); | |
4680 | emit_insn (gen_neon_vset_lane<mode> (tmp, operands[3], tmp, const0_rtx)); | |
4681 | emit_insn (gen_neon_vqdmlsl_lane<mode> (operands[0], operands[1], operands[2], | |
94f0f2cc | 4682 | tmp, const0_rtx)); |
88f77cba JB |
4683 | DONE; |
4684 | }) | |
4685 | ||
ff03f2d1 | 4686 | (define_insn "@neon_vext<mode>" |
88f77cba JB |
4687 | [(set (match_operand:VDQX 0 "s_register_operand" "=w") |
4688 | (unspec:VDQX [(match_operand:VDQX 1 "s_register_operand" "w") | |
4689 | (match_operand:VDQX 2 "s_register_operand" "w") | |
4690 | (match_operand:SI 3 "immediate_operand" "i")] | |
4691 | UNSPEC_VEXT))] | |
4692 | "TARGET_NEON" | |
b617fc71 | 4693 | { |
d57daa0c | 4694 | arm_const_bounds (operands[3], 0, GET_MODE_NUNITS (<MODE>mode)); |
b617fc71 JB |
4695 | return "vext.<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2, %3"; |
4696 | } | |
f7379e5e | 4697 | [(set_attr "type" "neon_ext<q>")] |
c956e102 | 4698 | ) |
88f77cba | 4699 | |
ff03f2d1 | 4700 | (define_insn "@neon_vrev64<mode>" |
88f77cba | 4701 | [(set (match_operand:VDQ 0 "s_register_operand" "=w") |
94f0f2cc | 4702 | (unspec:VDQ [(match_operand:VDQ 1 "s_register_operand" "w")] |
88f77cba JB |
4703 | UNSPEC_VREV64))] |
4704 | "TARGET_NEON" | |
c956e102 | 4705 | "vrev64.<V_sz_elem>\t%<V_reg>0, %<V_reg>1" |
f7379e5e | 4706 | [(set_attr "type" "neon_rev<q>")] |
c956e102 | 4707 | ) |
88f77cba | 4708 | |
ff03f2d1 | 4709 | (define_insn "@neon_vrev32<mode>" |
88f77cba | 4710 | [(set (match_operand:VX 0 "s_register_operand" "=w") |
94f0f2cc | 4711 | (unspec:VX [(match_operand:VX 1 "s_register_operand" "w")] |
88f77cba JB |
4712 | UNSPEC_VREV32))] |
4713 | "TARGET_NEON" | |
c956e102 | 4714 | "vrev32.<V_sz_elem>\t%<V_reg>0, %<V_reg>1" |
f7379e5e | 4715 | [(set_attr "type" "neon_rev<q>")] |
c956e102 | 4716 | ) |
88f77cba | 4717 | |
ff03f2d1 | 4718 | (define_insn "@neon_vrev16<mode>" |
88f77cba | 4719 | [(set (match_operand:VE 0 "s_register_operand" "=w") |
94f0f2cc | 4720 | (unspec:VE [(match_operand:VE 1 "s_register_operand" "w")] |
88f77cba JB |
4721 | UNSPEC_VREV16))] |
4722 | "TARGET_NEON" | |
c956e102 | 4723 | "vrev16.<V_sz_elem>\t%<V_reg>0, %<V_reg>1" |
f7379e5e | 4724 | [(set_attr "type" "neon_rev<q>")] |
c956e102 | 4725 | ) |
88f77cba JB |
4726 | |
4727 | ; vbsl_* intrinsics may compile to any of vbsl/vbif/vbit depending on register | |
4728 | ; allocation. For an intrinsic of form: | |
4729 | ; rD = vbsl_* (rS, rN, rM) | |
4730 | ; We can use any of: | |
4731 | ; vbsl rS, rN, rM (if D = S) | |
4732 | ; vbit rD, rN, rS (if D = M, so 1-bits in rS choose bits from rN, else rM) | |
4733 | ; vbif rD, rM, rS (if D = N, so 0-bits in rS choose bits from rM, else rN) | |
4734 | ||
4735 | (define_insn "neon_vbsl<mode>_internal" | |
4736 | [(set (match_operand:VDQX 0 "s_register_operand" "=w,w,w") | |
4737 | (unspec:VDQX [(match_operand:VDQX 1 "s_register_operand" " 0,w,w") | |
4738 | (match_operand:VDQX 2 "s_register_operand" " w,w,0") | |
4739 | (match_operand:VDQX 3 "s_register_operand" " w,0,w")] | |
4740 | UNSPEC_VBSL))] | |
4741 | "TARGET_NEON" | |
4742 | "@ | |
4743 | vbsl\t%<V_reg>0, %<V_reg>2, %<V_reg>3 | |
4744 | vbit\t%<V_reg>0, %<V_reg>2, %<V_reg>1 | |
c956e102 | 4745 | vbif\t%<V_reg>0, %<V_reg>3, %<V_reg>1" |
f7379e5e | 4746 | [(set_attr "type" "neon_bsl<q>")] |
c956e102 | 4747 | ) |
88f77cba JB |
4748 | |
4749 | (define_expand "neon_vbsl<mode>" | |
cd65e265 DZ |
4750 | [(set (match_operand:VDQX 0 "s_register_operand") |
4751 | (unspec:VDQX [(match_operand:<V_cmp_result> 1 "s_register_operand") | |
4752 | (match_operand:VDQX 2 "s_register_operand") | |
4753 | (match_operand:VDQX 3 "s_register_operand")] | |
88f77cba JB |
4754 | UNSPEC_VBSL))] |
4755 | "TARGET_NEON" | |
4756 | { | |
4757 | /* We can't alias operands together if they have different modes. */ | |
4758 | operands[1] = gen_lowpart (<MODE>mode, operands[1]); | |
4759 | }) | |
4760 | ||
94f0f2cc JG |
4761 | ;; vshl, vrshl |
4762 | (define_insn "neon_v<shift_op><sup><mode>" | |
88f77cba JB |
4763 | [(set (match_operand:VDQIX 0 "s_register_operand" "=w") |
4764 | (unspec:VDQIX [(match_operand:VDQIX 1 "s_register_operand" "w") | |
94f0f2cc JG |
4765 | (match_operand:VDQIX 2 "s_register_operand" "w")] |
4766 | VSHL))] | |
88f77cba | 4767 | "TARGET_NEON" |
94f0f2cc | 4768 | "v<shift_op>.<sup>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2" |
f7379e5e | 4769 | [(set_attr "type" "neon_shift_imm<q>")] |
c956e102 | 4770 | ) |
88f77cba | 4771 | |
94f0f2cc JG |
4772 | ;; vqshl, vqrshl |
4773 | (define_insn "neon_v<shift_op><sup><mode>" | |
88f77cba JB |
4774 | [(set (match_operand:VDQIX 0 "s_register_operand" "=w") |
4775 | (unspec:VDQIX [(match_operand:VDQIX 1 "s_register_operand" "w") | |
94f0f2cc JG |
4776 | (match_operand:VDQIX 2 "s_register_operand" "w")] |
4777 | VQSHL))] | |
88f77cba | 4778 | "TARGET_NEON" |
94f0f2cc | 4779 | "v<shift_op>.<sup>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2" |
f7379e5e | 4780 | [(set_attr "type" "neon_sat_shift_imm<q>")] |
c956e102 | 4781 | ) |
88f77cba | 4782 | |
94f0f2cc JG |
4783 | ;; vshr_n, vrshr_n |
4784 | (define_insn "neon_v<shift_op><sup>_n<mode>" | |
88f77cba JB |
4785 | [(set (match_operand:VDQIX 0 "s_register_operand" "=w") |
4786 | (unspec:VDQIX [(match_operand:VDQIX 1 "s_register_operand" "w") | |
94f0f2cc JG |
4787 | (match_operand:SI 2 "immediate_operand" "i")] |
4788 | VSHR_N))] | |
88f77cba | 4789 | "TARGET_NEON" |
b617fc71 | 4790 | { |
d57daa0c | 4791 | arm_const_bounds (operands[2], 1, neon_element_bits (<MODE>mode) + 1); |
94f0f2cc | 4792 | return "v<shift_op>.<sup>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %2"; |
b617fc71 | 4793 | } |
f7379e5e | 4794 | [(set_attr "type" "neon_shift_imm<q>")] |
c956e102 | 4795 | ) |
88f77cba | 4796 | |
94f0f2cc JG |
4797 | ;; vshrn_n, vrshrn_n |
4798 | (define_insn "neon_v<shift_op>_n<mode>" | |
88f77cba JB |
4799 | [(set (match_operand:<V_narrow> 0 "s_register_operand" "=w") |
4800 | (unspec:<V_narrow> [(match_operand:VN 1 "s_register_operand" "w") | |
94f0f2cc JG |
4801 | (match_operand:SI 2 "immediate_operand" "i")] |
4802 | VSHRN_N))] | |
88f77cba | 4803 | "TARGET_NEON" |
b617fc71 | 4804 | { |
d57daa0c | 4805 | arm_const_bounds (operands[2], 1, neon_element_bits (<MODE>mode) / 2 + 1); |
94f0f2cc | 4806 | return "v<shift_op>.<V_if_elem>\t%P0, %q1, %2"; |
b617fc71 | 4807 | } |
f7379e5e | 4808 | [(set_attr "type" "neon_shift_imm_narrow_q")] |
c956e102 | 4809 | ) |
88f77cba | 4810 | |
94f0f2cc JG |
4811 | ;; vqshrn_n, vqrshrn_n |
4812 | (define_insn "neon_v<shift_op><sup>_n<mode>" | |
88f77cba JB |
4813 | [(set (match_operand:<V_narrow> 0 "s_register_operand" "=w") |
4814 | (unspec:<V_narrow> [(match_operand:VN 1 "s_register_operand" "w") | |
94f0f2cc JG |
4815 | (match_operand:SI 2 "immediate_operand" "i")] |
4816 | VQSHRN_N))] | |
88f77cba | 4817 | "TARGET_NEON" |
b617fc71 | 4818 | { |
d57daa0c | 4819 | arm_const_bounds (operands[2], 1, neon_element_bits (<MODE>mode) / 2 + 1); |
94f0f2cc | 4820 | return "v<shift_op>.<sup>%#<V_sz_elem>\t%P0, %q1, %2"; |
b617fc71 | 4821 | } |
f7379e5e | 4822 | [(set_attr "type" "neon_sat_shift_imm_narrow_q")] |
c956e102 | 4823 | ) |
88f77cba | 4824 | |
94f0f2cc JG |
4825 | ;; vqshrun_n, vqrshrun_n |
4826 | (define_insn "neon_v<shift_op>_n<mode>" | |
88f77cba JB |
4827 | [(set (match_operand:<V_narrow> 0 "s_register_operand" "=w") |
4828 | (unspec:<V_narrow> [(match_operand:VN 1 "s_register_operand" "w") | |
94f0f2cc JG |
4829 | (match_operand:SI 2 "immediate_operand" "i")] |
4830 | VQSHRUN_N))] | |
88f77cba | 4831 | "TARGET_NEON" |
b617fc71 | 4832 | { |
d57daa0c | 4833 | arm_const_bounds (operands[2], 1, neon_element_bits (<MODE>mode) / 2 + 1); |
94f0f2cc | 4834 | return "v<shift_op>.<V_s_elem>\t%P0, %q1, %2"; |
b617fc71 | 4835 | } |
f7379e5e | 4836 | [(set_attr "type" "neon_sat_shift_imm_narrow_q")] |
c956e102 | 4837 | ) |
88f77cba JB |
4838 | |
4839 | (define_insn "neon_vshl_n<mode>" | |
4840 | [(set (match_operand:VDQIX 0 "s_register_operand" "=w") | |
4841 | (unspec:VDQIX [(match_operand:VDQIX 1 "s_register_operand" "w") | |
94f0f2cc | 4842 | (match_operand:SI 2 "immediate_operand" "i")] |
88f77cba JB |
4843 | UNSPEC_VSHL_N))] |
4844 | "TARGET_NEON" | |
b617fc71 | 4845 | { |
d57daa0c | 4846 | arm_const_bounds (operands[2], 0, neon_element_bits (<MODE>mode)); |
b617fc71 JB |
4847 | return "vshl.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %2"; |
4848 | } | |
f7379e5e | 4849 | [(set_attr "type" "neon_shift_imm<q>")] |
c956e102 | 4850 | ) |
88f77cba | 4851 | |
94f0f2cc | 4852 | (define_insn "neon_vqshl_<sup>_n<mode>" |
88f77cba JB |
4853 | [(set (match_operand:VDQIX 0 "s_register_operand" "=w") |
4854 | (unspec:VDQIX [(match_operand:VDQIX 1 "s_register_operand" "w") | |
94f0f2cc JG |
4855 | (match_operand:SI 2 "immediate_operand" "i")] |
4856 | VQSHL_N))] | |
88f77cba | 4857 | "TARGET_NEON" |
b617fc71 | 4858 | { |
d57daa0c | 4859 | arm_const_bounds (operands[2], 0, neon_element_bits (<MODE>mode)); |
94f0f2cc | 4860 | return "vqshl.<sup>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %2"; |
b617fc71 | 4861 | } |
f7379e5e | 4862 | [(set_attr "type" "neon_sat_shift_imm<q>")] |
c956e102 | 4863 | ) |
88f77cba JB |
4864 | |
4865 | (define_insn "neon_vqshlu_n<mode>" | |
4866 | [(set (match_operand:VDQIX 0 "s_register_operand" "=w") | |
4867 | (unspec:VDQIX [(match_operand:VDQIX 1 "s_register_operand" "w") | |
94f0f2cc | 4868 | (match_operand:SI 2 "immediate_operand" "i")] |
88f77cba JB |
4869 | UNSPEC_VQSHLU_N))] |
4870 | "TARGET_NEON" | |
b617fc71 | 4871 | { |
d57daa0c | 4872 | arm_const_bounds (operands[2], 0, neon_element_bits (<MODE>mode)); |
94f0f2cc | 4873 | return "vqshlu.<V_s_elem>\t%<V_reg>0, %<V_reg>1, %2"; |
b617fc71 | 4874 | } |
f7379e5e | 4875 | [(set_attr "type" "neon_sat_shift_imm<q>")] |
c956e102 | 4876 | ) |
88f77cba | 4877 | |
94f0f2cc | 4878 | (define_insn "neon_vshll<sup>_n<mode>" |
88f77cba JB |
4879 | [(set (match_operand:<V_widen> 0 "s_register_operand" "=w") |
4880 | (unspec:<V_widen> [(match_operand:VW 1 "s_register_operand" "w") | |
94f0f2cc JG |
4881 | (match_operand:SI 2 "immediate_operand" "i")] |
4882 | VSHLL_N))] | |
88f77cba | 4883 | "TARGET_NEON" |
b617fc71 | 4884 | { |
8cb32ff2 | 4885 | /* The boundaries are: 0 < imm <= size. */ |
d57daa0c | 4886 | arm_const_bounds (operands[2], 0, neon_element_bits (<MODE>mode) + 1); |
94f0f2cc | 4887 | return "vshll.<sup>%#<V_sz_elem>\t%q0, %P1, %2"; |
b617fc71 | 4888 | } |
f7379e5e | 4889 | [(set_attr "type" "neon_shift_imm_long")] |
c956e102 | 4890 | ) |
88f77cba | 4891 | |
94f0f2cc JG |
4892 | ;; vsra_n, vrsra_n |
4893 | (define_insn "neon_v<shift_op><sup>_n<mode>" | |
88f77cba JB |
4894 | [(set (match_operand:VDQIX 0 "s_register_operand" "=w") |
4895 | (unspec:VDQIX [(match_operand:VDQIX 1 "s_register_operand" "0") | |
4896 | (match_operand:VDQIX 2 "s_register_operand" "w") | |
94f0f2cc JG |
4897 | (match_operand:SI 3 "immediate_operand" "i")] |
4898 | VSRA_N))] | |
88f77cba | 4899 | "TARGET_NEON" |
b617fc71 | 4900 | { |
d57daa0c | 4901 | arm_const_bounds (operands[3], 1, neon_element_bits (<MODE>mode) + 1); |
94f0f2cc | 4902 | return "v<shift_op>.<sup>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>2, %3"; |
b617fc71 | 4903 | } |
f7379e5e | 4904 | [(set_attr "type" "neon_shift_acc<q>")] |
c956e102 | 4905 | ) |
88f77cba JB |
4906 | |
4907 | (define_insn "neon_vsri_n<mode>" | |
4908 | [(set (match_operand:VDQIX 0 "s_register_operand" "=w") | |
4909 | (unspec:VDQIX [(match_operand:VDQIX 1 "s_register_operand" "0") | |
4910 | (match_operand:VDQIX 2 "s_register_operand" "w") | |
4911 | (match_operand:SI 3 "immediate_operand" "i")] | |
4912 | UNSPEC_VSRI))] | |
4913 | "TARGET_NEON" | |
b617fc71 | 4914 | { |
d57daa0c | 4915 | arm_const_bounds (operands[3], 1, neon_element_bits (<MODE>mode) + 1); |
b617fc71 JB |
4916 | return "vsri.<V_sz_elem>\t%<V_reg>0, %<V_reg>2, %3"; |
4917 | } | |
f7379e5e | 4918 | [(set_attr "type" "neon_shift_reg<q>")] |
c956e102 | 4919 | ) |
88f77cba JB |
4920 | |
4921 | (define_insn "neon_vsli_n<mode>" | |
4922 | [(set (match_operand:VDQIX 0 "s_register_operand" "=w") | |
4923 | (unspec:VDQIX [(match_operand:VDQIX 1 "s_register_operand" "0") | |
4924 | (match_operand:VDQIX 2 "s_register_operand" "w") | |
4925 | (match_operand:SI 3 "immediate_operand" "i")] | |
4926 | UNSPEC_VSLI))] | |
4927 | "TARGET_NEON" | |
b617fc71 | 4928 | { |
d57daa0c | 4929 | arm_const_bounds (operands[3], 0, neon_element_bits (<MODE>mode)); |
b617fc71 JB |
4930 | return "vsli.<V_sz_elem>\t%<V_reg>0, %<V_reg>2, %3"; |
4931 | } | |
f7379e5e | 4932 | [(set_attr "type" "neon_shift_reg<q>")] |
c956e102 | 4933 | ) |
88f77cba JB |
4934 | |
4935 | (define_insn "neon_vtbl1v8qi" | |
4936 | [(set (match_operand:V8QI 0 "s_register_operand" "=w") | |
4937 | (unspec:V8QI [(match_operand:V8QI 1 "s_register_operand" "w") | |
4938 | (match_operand:V8QI 2 "s_register_operand" "w")] | |
4939 | UNSPEC_VTBL))] | |
4940 | "TARGET_NEON" | |
c956e102 | 4941 | "vtbl.8\t%P0, {%P1}, %P2" |
f7379e5e | 4942 | [(set_attr "type" "neon_tbl1")] |
c956e102 | 4943 | ) |
88f77cba JB |
4944 | |
4945 | (define_insn "neon_vtbl2v8qi" | |
4946 | [(set (match_operand:V8QI 0 "s_register_operand" "=w") | |
4947 | (unspec:V8QI [(match_operand:TI 1 "s_register_operand" "w") | |
4948 | (match_operand:V8QI 2 "s_register_operand" "w")] | |
4949 | UNSPEC_VTBL))] | |
4950 | "TARGET_NEON" | |
4951 | { | |
4952 | rtx ops[4]; | |
4953 | int tabbase = REGNO (operands[1]); | |
4954 | ||
4955 | ops[0] = operands[0]; | |
4956 | ops[1] = gen_rtx_REG (V8QImode, tabbase); | |
4957 | ops[2] = gen_rtx_REG (V8QImode, tabbase + 2); | |
4958 | ops[3] = operands[2]; | |
4959 | output_asm_insn ("vtbl.8\t%P0, {%P1, %P2}, %P3", ops); | |
4960 | ||
4961 | return ""; | |
c956e102 | 4962 | } |
f7379e5e | 4963 | [(set_attr "type" "neon_tbl2")] |
c956e102 | 4964 | ) |
88f77cba JB |
4965 | |
4966 | (define_insn "neon_vtbl3v8qi" | |
4967 | [(set (match_operand:V8QI 0 "s_register_operand" "=w") | |
4968 | (unspec:V8QI [(match_operand:EI 1 "s_register_operand" "w") | |
4969 | (match_operand:V8QI 2 "s_register_operand" "w")] | |
4970 | UNSPEC_VTBL))] | |
4971 | "TARGET_NEON" | |
4972 | { | |
4973 | rtx ops[5]; | |
4974 | int tabbase = REGNO (operands[1]); | |
4975 | ||
4976 | ops[0] = operands[0]; | |
4977 | ops[1] = gen_rtx_REG (V8QImode, tabbase); | |
4978 | ops[2] = gen_rtx_REG (V8QImode, tabbase + 2); | |
4979 | ops[3] = gen_rtx_REG (V8QImode, tabbase + 4); | |
4980 | ops[4] = operands[2]; | |
4981 | output_asm_insn ("vtbl.8\t%P0, {%P1, %P2, %P3}, %P4", ops); | |
4982 | ||
4983 | return ""; | |
c956e102 | 4984 | } |
f7379e5e | 4985 | [(set_attr "type" "neon_tbl3")] |
c956e102 | 4986 | ) |
88f77cba JB |
4987 | |
4988 | (define_insn "neon_vtbl4v8qi" | |
4989 | [(set (match_operand:V8QI 0 "s_register_operand" "=w") | |
4990 | (unspec:V8QI [(match_operand:OI 1 "s_register_operand" "w") | |
4991 | (match_operand:V8QI 2 "s_register_operand" "w")] | |
4992 | UNSPEC_VTBL))] | |
4993 | "TARGET_NEON" | |
4994 | { | |
4995 | rtx ops[6]; | |
4996 | int tabbase = REGNO (operands[1]); | |
4997 | ||
4998 | ops[0] = operands[0]; | |
4999 | ops[1] = gen_rtx_REG (V8QImode, tabbase); | |
5000 | ops[2] = gen_rtx_REG (V8QImode, tabbase + 2); | |
5001 | ops[3] = gen_rtx_REG (V8QImode, tabbase + 4); | |
5002 | ops[4] = gen_rtx_REG (V8QImode, tabbase + 6); | |
5003 | ops[5] = operands[2]; | |
5004 | output_asm_insn ("vtbl.8\t%P0, {%P1, %P2, %P3, %P4}, %P5", ops); | |
5005 | ||
5006 | return ""; | |
c956e102 | 5007 | } |
f7379e5e | 5008 | [(set_attr "type" "neon_tbl4")] |
c956e102 | 5009 | ) |
88f77cba | 5010 | |
b440f324 RH |
5011 | ;; These three are used by the vec_perm infrastructure for V16QImode. |
5012 | (define_insn_and_split "neon_vtbl1v16qi" | |
5013 | [(set (match_operand:V16QI 0 "s_register_operand" "=&w") | |
5014 | (unspec:V16QI [(match_operand:V16QI 1 "s_register_operand" "w") | |
5015 | (match_operand:V16QI 2 "s_register_operand" "w")] | |
5016 | UNSPEC_VTBL))] | |
5017 | "TARGET_NEON" | |
5018 | "#" | |
5019 | "&& reload_completed" | |
5020 | [(const_int 0)] | |
5021 | { | |
5022 | rtx op0, op1, op2, part0, part2; | |
5023 | unsigned ofs; | |
5024 | ||
5025 | op0 = operands[0]; | |
5026 | op1 = gen_lowpart (TImode, operands[1]); | |
5027 | op2 = operands[2]; | |
5028 | ||
5029 | ofs = subreg_lowpart_offset (V8QImode, V16QImode); | |
5030 | part0 = simplify_subreg (V8QImode, op0, V16QImode, ofs); | |
5031 | part2 = simplify_subreg (V8QImode, op2, V16QImode, ofs); | |
5032 | emit_insn (gen_neon_vtbl2v8qi (part0, op1, part2)); | |
5033 | ||
5034 | ofs = subreg_highpart_offset (V8QImode, V16QImode); | |
5035 | part0 = simplify_subreg (V8QImode, op0, V16QImode, ofs); | |
5036 | part2 = simplify_subreg (V8QImode, op2, V16QImode, ofs); | |
5037 | emit_insn (gen_neon_vtbl2v8qi (part0, op1, part2)); | |
5038 | DONE; | |
f7379e5e JG |
5039 | } |
5040 | [(set_attr "type" "multiple")] | |
5041 | ) | |
b440f324 RH |
5042 | |
5043 | (define_insn_and_split "neon_vtbl2v16qi" | |
5044 | [(set (match_operand:V16QI 0 "s_register_operand" "=&w") | |
5045 | (unspec:V16QI [(match_operand:OI 1 "s_register_operand" "w") | |
5046 | (match_operand:V16QI 2 "s_register_operand" "w")] | |
5047 | UNSPEC_VTBL))] | |
5048 | "TARGET_NEON" | |
5049 | "#" | |
5050 | "&& reload_completed" | |
5051 | [(const_int 0)] | |
5052 | { | |
5053 | rtx op0, op1, op2, part0, part2; | |
5054 | unsigned ofs; | |
5055 | ||
5056 | op0 = operands[0]; | |
5057 | op1 = operands[1]; | |
5058 | op2 = operands[2]; | |
5059 | ||
5060 | ofs = subreg_lowpart_offset (V8QImode, V16QImode); | |
5061 | part0 = simplify_subreg (V8QImode, op0, V16QImode, ofs); | |
5062 | part2 = simplify_subreg (V8QImode, op2, V16QImode, ofs); | |
5063 | emit_insn (gen_neon_vtbl2v8qi (part0, op1, part2)); | |
5064 | ||
5065 | ofs = subreg_highpart_offset (V8QImode, V16QImode); | |
5066 | part0 = simplify_subreg (V8QImode, op0, V16QImode, ofs); | |
5067 | part2 = simplify_subreg (V8QImode, op2, V16QImode, ofs); | |
5068 | emit_insn (gen_neon_vtbl2v8qi (part0, op1, part2)); | |
5069 | DONE; | |
f7379e5e JG |
5070 | } |
5071 | [(set_attr "type" "multiple")] | |
5072 | ) | |
b440f324 RH |
5073 | |
5074 | ;; ??? Logically we should extend the regular neon_vcombine pattern to | |
5075 | ;; handle quad-word input modes, producing octa-word output modes. But | |
5076 | ;; that requires us to add support for octa-word vector modes in moves. | |
5077 | ;; That seems overkill for this one use in vec_perm. | |
5078 | (define_insn_and_split "neon_vcombinev16qi" | |
5079 | [(set (match_operand:OI 0 "s_register_operand" "=w") | |
5080 | (unspec:OI [(match_operand:V16QI 1 "s_register_operand" "w") | |
5081 | (match_operand:V16QI 2 "s_register_operand" "w")] | |
5082 | UNSPEC_VCONCAT))] | |
5083 | "TARGET_NEON" | |
5084 | "#" | |
5085 | "&& reload_completed" | |
5086 | [(const_int 0)] | |
5087 | { | |
5088 | neon_split_vcombine (operands); | |
5089 | DONE; | |
f7379e5e JG |
5090 | } |
5091 | [(set_attr "type" "multiple")] | |
5092 | ) | |
b440f324 | 5093 | |
88f77cba JB |
5094 | (define_insn "neon_vtbx1v8qi" |
5095 | [(set (match_operand:V8QI 0 "s_register_operand" "=w") | |
5096 | (unspec:V8QI [(match_operand:V8QI 1 "s_register_operand" "0") | |
5097 | (match_operand:V8QI 2 "s_register_operand" "w") | |
5098 | (match_operand:V8QI 3 "s_register_operand" "w")] | |
5099 | UNSPEC_VTBX))] | |
5100 | "TARGET_NEON" | |
c956e102 | 5101 | "vtbx.8\t%P0, {%P2}, %P3" |
f7379e5e | 5102 | [(set_attr "type" "neon_tbl1")] |
c956e102 | 5103 | ) |
88f77cba JB |
5104 | |
5105 | (define_insn "neon_vtbx2v8qi" | |
5106 | [(set (match_operand:V8QI 0 "s_register_operand" "=w") | |
5107 | (unspec:V8QI [(match_operand:V8QI 1 "s_register_operand" "0") | |
5108 | (match_operand:TI 2 "s_register_operand" "w") | |
5109 | (match_operand:V8QI 3 "s_register_operand" "w")] | |
5110 | UNSPEC_VTBX))] | |
5111 | "TARGET_NEON" | |
5112 | { | |
5113 | rtx ops[4]; | |
5114 | int tabbase = REGNO (operands[2]); | |
5115 | ||
5116 | ops[0] = operands[0]; | |
5117 | ops[1] = gen_rtx_REG (V8QImode, tabbase); | |
5118 | ops[2] = gen_rtx_REG (V8QImode, tabbase + 2); | |
5119 | ops[3] = operands[3]; | |
5120 | output_asm_insn ("vtbx.8\t%P0, {%P1, %P2}, %P3", ops); | |
5121 | ||
5122 | return ""; | |
c956e102 | 5123 | } |
f7379e5e | 5124 | [(set_attr "type" "neon_tbl2")] |
c956e102 | 5125 | ) |
88f77cba JB |
5126 | |
5127 | (define_insn "neon_vtbx3v8qi" | |
5128 | [(set (match_operand:V8QI 0 "s_register_operand" "=w") | |
5129 | (unspec:V8QI [(match_operand:V8QI 1 "s_register_operand" "0") | |
5130 | (match_operand:EI 2 "s_register_operand" "w") | |
5131 | (match_operand:V8QI 3 "s_register_operand" "w")] | |
5132 | UNSPEC_VTBX))] | |
5133 | "TARGET_NEON" | |
5134 | { | |
5135 | rtx ops[5]; | |
5136 | int tabbase = REGNO (operands[2]); | |
5137 | ||
5138 | ops[0] = operands[0]; | |
5139 | ops[1] = gen_rtx_REG (V8QImode, tabbase); | |
5140 | ops[2] = gen_rtx_REG (V8QImode, tabbase + 2); | |
5141 | ops[3] = gen_rtx_REG (V8QImode, tabbase + 4); | |
5142 | ops[4] = operands[3]; | |
5143 | output_asm_insn ("vtbx.8\t%P0, {%P1, %P2, %P3}, %P4", ops); | |
5144 | ||
5145 | return ""; | |
c956e102 | 5146 | } |
f7379e5e | 5147 | [(set_attr "type" "neon_tbl3")] |
c956e102 | 5148 | ) |
88f77cba JB |
5149 | |
5150 | (define_insn "neon_vtbx4v8qi" | |
5151 | [(set (match_operand:V8QI 0 "s_register_operand" "=w") | |
5152 | (unspec:V8QI [(match_operand:V8QI 1 "s_register_operand" "0") | |
5153 | (match_operand:OI 2 "s_register_operand" "w") | |
5154 | (match_operand:V8QI 3 "s_register_operand" "w")] | |
5155 | UNSPEC_VTBX))] | |
5156 | "TARGET_NEON" | |
5157 | { | |
5158 | rtx ops[6]; | |
5159 | int tabbase = REGNO (operands[2]); | |
5160 | ||
5161 | ops[0] = operands[0]; | |
5162 | ops[1] = gen_rtx_REG (V8QImode, tabbase); | |
5163 | ops[2] = gen_rtx_REG (V8QImode, tabbase + 2); | |
5164 | ops[3] = gen_rtx_REG (V8QImode, tabbase + 4); | |
5165 | ops[4] = gen_rtx_REG (V8QImode, tabbase + 6); | |
5166 | ops[5] = operands[3]; | |
5167 | output_asm_insn ("vtbx.8\t%P0, {%P1, %P2, %P3, %P4}, %P5", ops); | |
5168 | ||
5169 | return ""; | |
c956e102 | 5170 | } |
f7379e5e | 5171 | [(set_attr "type" "neon_tbl4")] |
c956e102 | 5172 | ) |
88f77cba | 5173 | |
ff03f2d1 | 5174 | (define_expand "@neon_vtrn<mode>_internal" |
28131dfe | 5175 | [(parallel |
b1a970a5 MW |
5176 | [(set (match_operand:VDQWH 0 "s_register_operand") |
5177 | (unspec:VDQWH [(match_operand:VDQWH 1 "s_register_operand") | |
5178 | (match_operand:VDQWH 2 "s_register_operand")] | |
28131dfe | 5179 | UNSPEC_VTRN1)) |
b1a970a5 MW |
5180 | (set (match_operand:VDQWH 3 "s_register_operand") |
5181 | (unspec:VDQWH [(match_dup 1) (match_dup 2)] UNSPEC_VTRN2))])] | |
28131dfe RE |
5182 | "TARGET_NEON" |
5183 | "" | |
5184 | ) | |
5185 | ||
5186 | ;; Note: Different operand numbering to handle tied registers correctly. | |
5187 | (define_insn "*neon_vtrn<mode>_insn" | |
b1a970a5 MW |
5188 | [(set (match_operand:VDQWH 0 "s_register_operand" "=&w") |
5189 | (unspec:VDQWH [(match_operand:VDQWH 1 "s_register_operand" "0") | |
5190 | (match_operand:VDQWH 3 "s_register_operand" "2")] | |
5191 | UNSPEC_VTRN1)) | |
5192 | (set (match_operand:VDQWH 2 "s_register_operand" "=&w") | |
5193 | (unspec:VDQWH [(match_dup 1) (match_dup 3)] | |
5194 | UNSPEC_VTRN2))] | |
7e7cfcf6 | 5195 | "TARGET_NEON" |
28131dfe | 5196 | "vtrn.<V_sz_elem>\t%<V_reg>0, %<V_reg>2" |
f7379e5e | 5197 | [(set_attr "type" "neon_permute<q>")] |
c956e102 | 5198 | ) |
88f77cba | 5199 | |
ff03f2d1 | 5200 | (define_expand "@neon_vzip<mode>_internal" |
28131dfe | 5201 | [(parallel |
b1a970a5 MW |
5202 | [(set (match_operand:VDQWH 0 "s_register_operand") |
5203 | (unspec:VDQWH [(match_operand:VDQWH 1 "s_register_operand") | |
5204 | (match_operand:VDQWH 2 "s_register_operand")] | |
5205 | UNSPEC_VZIP1)) | |
5206 | (set (match_operand:VDQWH 3 "s_register_operand") | |
5207 | (unspec:VDQWH [(match_dup 1) (match_dup 2)] UNSPEC_VZIP2))])] | |
28131dfe RE |
5208 | "TARGET_NEON" |
5209 | "" | |
5210 | ) | |
5211 | ||
5212 | ;; Note: Different operand numbering to handle tied registers correctly. | |
5213 | (define_insn "*neon_vzip<mode>_insn" | |
b1a970a5 MW |
5214 | [(set (match_operand:VDQWH 0 "s_register_operand" "=&w") |
5215 | (unspec:VDQWH [(match_operand:VDQWH 1 "s_register_operand" "0") | |
5216 | (match_operand:VDQWH 3 "s_register_operand" "2")] | |
5217 | UNSPEC_VZIP1)) | |
5218 | (set (match_operand:VDQWH 2 "s_register_operand" "=&w") | |
5219 | (unspec:VDQWH [(match_dup 1) (match_dup 3)] | |
5220 | UNSPEC_VZIP2))] | |
7e7cfcf6 | 5221 | "TARGET_NEON" |
28131dfe | 5222 | "vzip.<V_sz_elem>\t%<V_reg>0, %<V_reg>2" |
f7379e5e | 5223 | [(set_attr "type" "neon_zip<q>")] |
c956e102 | 5224 | ) |
88f77cba | 5225 | |
ff03f2d1 | 5226 | (define_expand "@neon_vuzp<mode>_internal" |
28131dfe | 5227 | [(parallel |
b1a970a5 MW |
5228 | [(set (match_operand:VDQWH 0 "s_register_operand") |
5229 | (unspec:VDQWH [(match_operand:VDQWH 1 "s_register_operand") | |
5230 | (match_operand:VDQWH 2 "s_register_operand")] | |
28131dfe | 5231 | UNSPEC_VUZP1)) |
cd65e265 | 5232 | (set (match_operand:VDQWH 3 "s_register_operand") |
b1a970a5 | 5233 | (unspec:VDQWH [(match_dup 1) (match_dup 2)] UNSPEC_VUZP2))])] |
28131dfe RE |
5234 | "TARGET_NEON" |
5235 | "" | |
5236 | ) | |
5237 | ||
5238 | ;; Note: Different operand numbering to handle tied registers correctly. | |
5239 | (define_insn "*neon_vuzp<mode>_insn" | |
b1a970a5 MW |
5240 | [(set (match_operand:VDQWH 0 "s_register_operand" "=&w") |
5241 | (unspec:VDQWH [(match_operand:VDQWH 1 "s_register_operand" "0") | |
5242 | (match_operand:VDQWH 3 "s_register_operand" "2")] | |
5243 | UNSPEC_VUZP1)) | |
5244 | (set (match_operand:VDQWH 2 "s_register_operand" "=&w") | |
5245 | (unspec:VDQWH [(match_dup 1) (match_dup 3)] | |
5246 | UNSPEC_VUZP2))] | |
88f77cba | 5247 | "TARGET_NEON" |
28131dfe | 5248 | "vuzp.<V_sz_elem>\t%<V_reg>0, %<V_reg>2" |
f7379e5e | 5249 | [(set_attr "type" "neon_zip<q>")] |
c956e102 | 5250 | ) |
88f77cba | 5251 | |
3188ed59 RS |
5252 | (define_expand "vec_load_lanes<mode><mode>" |
5253 | [(set (match_operand:VDQX 0 "s_register_operand") | |
5254 | (unspec:VDQX [(match_operand:VDQX 1 "neon_struct_operand")] | |
5255 | UNSPEC_VLD1))] | |
5256 | "TARGET_NEON") | |
5257 | ||
88f77cba JB |
5258 | (define_insn "neon_vld1<mode>" |
5259 | [(set (match_operand:VDQX 0 "s_register_operand" "=w") | |
6308e208 | 5260 | (unspec:VDQX [(match_operand:VDQX 1 "neon_struct_operand" "Um")] |
88f77cba JB |
5261 | UNSPEC_VLD1))] |
5262 | "TARGET_NEON" | |
6308e208 | 5263 | "vld1.<V_sz_elem>\t%h0, %A1" |
f7379e5e | 5264 | [(set_attr "type" "neon_load1_1reg<q>")] |
c956e102 | 5265 | ) |
88f77cba | 5266 | |
22f9db64 CB |
5267 | ;; The lane numbers in the RTL are in GCC lane order, having been flipped |
5268 | ;; in arm_expand_neon_args. The lane numbers are restored to architectural | |
5269 | ;; lane order here. | |
88f77cba JB |
5270 | (define_insn "neon_vld1_lane<mode>" |
5271 | [(set (match_operand:VDX 0 "s_register_operand" "=w") | |
6308e208 | 5272 | (unspec:VDX [(match_operand:<V_elem> 1 "neon_struct_operand" "Um") |
88f77cba JB |
5273 | (match_operand:VDX 2 "s_register_operand" "0") |
5274 | (match_operand:SI 3 "immediate_operand" "i")] | |
5275 | UNSPEC_VLD1_LANE))] | |
5276 | "TARGET_NEON" | |
5277 | { | |
22f9db64 | 5278 | HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N(<MODE>mode, INTVAL (operands[3])); |
88f77cba | 5279 | HOST_WIDE_INT max = GET_MODE_NUNITS (<MODE>mode); |
22f9db64 | 5280 | operands[3] = GEN_INT (lane); |
88f77cba | 5281 | if (max == 1) |
6308e208 | 5282 | return "vld1.<V_sz_elem>\t%P0, %A1"; |
88f77cba | 5283 | else |
6308e208 | 5284 | return "vld1.<V_sz_elem>\t{%P0[%c3]}, %A1"; |
c956e102 | 5285 | } |
f7379e5e | 5286 | [(set_attr "type" "neon_load1_one_lane<q>")] |
c956e102 | 5287 | ) |
88f77cba | 5288 | |
22f9db64 CB |
5289 | ;; see comment on neon_vld1_lane for reason why the lane numbers are reversed |
5290 | ;; here on big endian targets. | |
88f77cba JB |
5291 | (define_insn "neon_vld1_lane<mode>" |
5292 | [(set (match_operand:VQX 0 "s_register_operand" "=w") | |
6308e208 | 5293 | (unspec:VQX [(match_operand:<V_elem> 1 "neon_struct_operand" "Um") |
88f77cba JB |
5294 | (match_operand:VQX 2 "s_register_operand" "0") |
5295 | (match_operand:SI 3 "immediate_operand" "i")] | |
5296 | UNSPEC_VLD1_LANE))] | |
5297 | "TARGET_NEON" | |
5298 | { | |
22f9db64 | 5299 | HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N(<MODE>mode, INTVAL (operands[3])); |
88f77cba | 5300 | HOST_WIDE_INT max = GET_MODE_NUNITS (<MODE>mode); |
22f9db64 | 5301 | operands[3] = GEN_INT (lane); |
88f77cba | 5302 | int regno = REGNO (operands[0]); |
e68ffe57 | 5303 | if (lane >= max / 2) |
88f77cba JB |
5304 | { |
5305 | lane -= max / 2; | |
5306 | regno += 2; | |
5307 | operands[3] = GEN_INT (lane); | |
5308 | } | |
5309 | operands[0] = gen_rtx_REG (<V_HALF>mode, regno); | |
5310 | if (max == 2) | |
6308e208 | 5311 | return "vld1.<V_sz_elem>\t%P0, %A1"; |
88f77cba | 5312 | else |
6308e208 | 5313 | return "vld1.<V_sz_elem>\t{%P0[%c3]}, %A1"; |
c956e102 | 5314 | } |
f7379e5e | 5315 | [(set_attr "type" "neon_load1_one_lane<q>")] |
c956e102 | 5316 | ) |
88f77cba JB |
5317 | |
5318 | (define_insn "neon_vld1_dup<mode>" | |
92422235 CL |
5319 | [(set (match_operand:VD_LANE 0 "s_register_operand" "=w") |
5320 | (vec_duplicate:VD_LANE (match_operand:<V_elem> 1 "neon_struct_operand" "Um")))] | |
88f77cba | 5321 | "TARGET_NEON" |
27d2e612 | 5322 | "vld1.<V_sz_elem>\t{%P0[]}, %A1" |
f7379e5e | 5323 | [(set_attr "type" "neon_load1_all_lanes<q>")] |
27d2e612 RE |
5324 | ) |
5325 | ||
5326 | ;; Special case for DImode. Treat it exactly like a simple load. | |
5327 | (define_expand "neon_vld1_dupdi" | |
cd65e265 DZ |
5328 | [(set (match_operand:DI 0 "s_register_operand") |
5329 | (unspec:DI [(match_operand:DI 1 "neon_struct_operand")] | |
27d2e612 RE |
5330 | UNSPEC_VLD1))] |
5331 | "TARGET_NEON" | |
5332 | "" | |
c956e102 | 5333 | ) |
88f77cba JB |
5334 | |
5335 | (define_insn "neon_vld1_dup<mode>" | |
92422235 CL |
5336 | [(set (match_operand:VQ2 0 "s_register_operand" "=w") |
5337 | (vec_duplicate:VQ2 (match_operand:<V_elem> 1 "neon_struct_operand" "Um")))] | |
88f77cba JB |
5338 | "TARGET_NEON" |
5339 | { | |
8490252a | 5340 | return "vld1.<V_sz_elem>\t{%e0[], %f0[]}, %A1"; |
c956e102 | 5341 | } |
f7379e5e | 5342 | [(set_attr "type" "neon_load1_all_lanes<q>")] |
8490252a CL |
5343 | ) |
5344 | ||
5345 | (define_insn_and_split "neon_vld1_dupv2di" | |
5346 | [(set (match_operand:V2DI 0 "s_register_operand" "=w") | |
5347 | (vec_duplicate:V2DI (match_operand:DI 1 "neon_struct_operand" "Um")))] | |
5348 | "TARGET_NEON" | |
5349 | "#" | |
5350 | "&& reload_completed" | |
5351 | [(const_int 0)] | |
5352 | { | |
5353 | rtx tmprtx = gen_lowpart (DImode, operands[0]); | |
5354 | emit_insn (gen_neon_vld1_dupdi (tmprtx, operands[1])); | |
5355 | emit_move_insn (gen_highpart (DImode, operands[0]), tmprtx ); | |
5356 | DONE; | |
5357 | } | |
5358 | [(set_attr "length" "8") | |
f7379e5e | 5359 | (set_attr "type" "neon_load1_all_lanes_q")] |
c956e102 | 5360 | ) |
88f77cba | 5361 | |
3188ed59 RS |
5362 | (define_expand "vec_store_lanes<mode><mode>" |
5363 | [(set (match_operand:VDQX 0 "neon_struct_operand") | |
5364 | (unspec:VDQX [(match_operand:VDQX 1 "s_register_operand")] | |
5365 | UNSPEC_VST1))] | |
5366 | "TARGET_NEON") | |
5367 | ||
88f77cba | 5368 | (define_insn "neon_vst1<mode>" |
6308e208 | 5369 | [(set (match_operand:VDQX 0 "neon_struct_operand" "=Um") |
88f77cba JB |
5370 | (unspec:VDQX [(match_operand:VDQX 1 "s_register_operand" "w")] |
5371 | UNSPEC_VST1))] | |
5372 | "TARGET_NEON" | |
6308e208 | 5373 | "vst1.<V_sz_elem>\t%h1, %A0" |
f7379e5e | 5374 | [(set_attr "type" "neon_store1_1reg<q>")]) |
88f77cba | 5375 | |
22f9db64 CB |
5376 | ;; see comment on neon_vld1_lane for reason why the lane numbers are reversed |
5377 | ;; here on big endian targets. | |
88f77cba | 5378 | (define_insn "neon_vst1_lane<mode>" |
6308e208 | 5379 | [(set (match_operand:<V_elem> 0 "neon_struct_operand" "=Um") |
058e2674 UW |
5380 | (unspec:<V_elem> |
5381 | [(match_operand:VDX 1 "s_register_operand" "w") | |
5382 | (match_operand:SI 2 "immediate_operand" "i")] | |
5383 | UNSPEC_VST1_LANE))] | |
88f77cba JB |
5384 | "TARGET_NEON" |
5385 | { | |
22f9db64 | 5386 | HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N(<MODE>mode, INTVAL (operands[2])); |
88f77cba | 5387 | HOST_WIDE_INT max = GET_MODE_NUNITS (<MODE>mode); |
22f9db64 | 5388 | operands[2] = GEN_INT (lane); |
88f77cba | 5389 | if (max == 1) |
6308e208 | 5390 | return "vst1.<V_sz_elem>\t{%P1}, %A0"; |
88f77cba | 5391 | else |
6308e208 | 5392 | return "vst1.<V_sz_elem>\t{%P1[%c2]}, %A0"; |
c956e102 | 5393 | } |
f7379e5e JG |
5394 | [(set_attr "type" "neon_store1_one_lane<q>")] |
5395 | ) | |
88f77cba | 5396 | |
22f9db64 CB |
5397 | ;; see comment on neon_vld1_lane for reason why the lane numbers are reversed |
5398 | ;; here on big endian targets. | |
88f77cba | 5399 | (define_insn "neon_vst1_lane<mode>" |
6308e208 | 5400 | [(set (match_operand:<V_elem> 0 "neon_struct_operand" "=Um") |
058e2674 UW |
5401 | (unspec:<V_elem> |
5402 | [(match_operand:VQX 1 "s_register_operand" "w") | |
5403 | (match_operand:SI 2 "immediate_operand" "i")] | |
5404 | UNSPEC_VST1_LANE))] | |
88f77cba JB |
5405 | "TARGET_NEON" |
5406 | { | |
22f9db64 | 5407 | HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N(<MODE>mode, INTVAL (operands[2])); |
88f77cba JB |
5408 | HOST_WIDE_INT max = GET_MODE_NUNITS (<MODE>mode); |
5409 | int regno = REGNO (operands[1]); | |
e68ffe57 | 5410 | if (lane >= max / 2) |
88f77cba JB |
5411 | { |
5412 | lane -= max / 2; | |
5413 | regno += 2; | |
88f77cba | 5414 | } |
22f9db64 | 5415 | operands[2] = GEN_INT (lane); |
88f77cba JB |
5416 | operands[1] = gen_rtx_REG (<V_HALF>mode, regno); |
5417 | if (max == 2) | |
6308e208 | 5418 | return "vst1.<V_sz_elem>\t{%P1}, %A0"; |
88f77cba | 5419 | else |
6308e208 | 5420 | return "vst1.<V_sz_elem>\t{%P1[%c2]}, %A0"; |
c956e102 | 5421 | } |
f7379e5e | 5422 | [(set_attr "type" "neon_store1_one_lane<q>")] |
c956e102 | 5423 | ) |
88f77cba | 5424 | |
3188ed59 RS |
5425 | (define_expand "vec_load_lanesti<mode>" |
5426 | [(set (match_operand:TI 0 "s_register_operand") | |
5427 | (unspec:TI [(match_operand:TI 1 "neon_struct_operand") | |
5428 | (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] | |
5429 | UNSPEC_VLD2))] | |
5430 | "TARGET_NEON") | |
5431 | ||
88f77cba JB |
5432 | (define_insn "neon_vld2<mode>" |
5433 | [(set (match_operand:TI 0 "s_register_operand" "=w") | |
6308e208 | 5434 | (unspec:TI [(match_operand:TI 1 "neon_struct_operand" "Um") |
eb637e76 | 5435 | (unspec:VDXBF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] |
88f77cba JB |
5436 | UNSPEC_VLD2))] |
5437 | "TARGET_NEON" | |
5438 | { | |
5439 | if (<V_sz_elem> == 64) | |
6308e208 | 5440 | return "vld1.64\t%h0, %A1"; |
88f77cba | 5441 | else |
6308e208 | 5442 | return "vld2.<V_sz_elem>\t%h0, %A1"; |
c956e102 | 5443 | } |
003bb7f3 | 5444 | [(set (attr "type") |
c956e102 | 5445 | (if_then_else (eq (const_string "<V_sz_elem>") (const_string "64")) |
f7379e5e JG |
5446 | (const_string "neon_load1_2reg<q>") |
5447 | (const_string "neon_load2_2reg<q>")))] | |
c956e102 | 5448 | ) |
88f77cba | 5449 | |
3188ed59 RS |
5450 | (define_expand "vec_load_lanesoi<mode>" |
5451 | [(set (match_operand:OI 0 "s_register_operand") | |
5452 | (unspec:OI [(match_operand:OI 1 "neon_struct_operand") | |
4b644867 | 5453 | (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] |
3188ed59 RS |
5454 | UNSPEC_VLD2))] |
5455 | "TARGET_NEON") | |
5456 | ||
88f77cba JB |
5457 | (define_insn "neon_vld2<mode>" |
5458 | [(set (match_operand:OI 0 "s_register_operand" "=w") | |
6308e208 | 5459 | (unspec:OI [(match_operand:OI 1 "neon_struct_operand" "Um") |
eb637e76 | 5460 | (unspec:VQ2BF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] |
88f77cba JB |
5461 | UNSPEC_VLD2))] |
5462 | "TARGET_NEON" | |
6308e208 | 5463 | "vld2.<V_sz_elem>\t%h0, %A1" |
f7379e5e | 5464 | [(set_attr "type" "neon_load2_2reg_q")]) |
88f77cba | 5465 | |
22f9db64 CB |
5466 | ;; see comment on neon_vld1_lane for reason why the lane numbers are reversed |
5467 | ;; here on big endian targets. | |
88f77cba JB |
5468 | (define_insn "neon_vld2_lane<mode>" |
5469 | [(set (match_operand:TI 0 "s_register_operand" "=w") | |
6308e208 | 5470 | (unspec:TI [(match_operand:<V_two_elem> 1 "neon_struct_operand" "Um") |
88f77cba JB |
5471 | (match_operand:TI 2 "s_register_operand" "0") |
5472 | (match_operand:SI 3 "immediate_operand" "i") | |
4b644867 | 5473 | (unspec:VD_LANE [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] |
88f77cba JB |
5474 | UNSPEC_VLD2_LANE))] |
5475 | "TARGET_NEON" | |
5476 | { | |
22f9db64 | 5477 | HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N(<MODE>mode, INTVAL (operands[3])); |
88f77cba JB |
5478 | int regno = REGNO (operands[0]); |
5479 | rtx ops[4]; | |
88f77cba JB |
5480 | ops[0] = gen_rtx_REG (DImode, regno); |
5481 | ops[1] = gen_rtx_REG (DImode, regno + 2); | |
5482 | ops[2] = operands[1]; | |
22f9db64 | 5483 | ops[3] = GEN_INT (lane); |
6308e208 | 5484 | output_asm_insn ("vld2.<V_sz_elem>\t{%P0[%c3], %P1[%c3]}, %A2", ops); |
88f77cba | 5485 | return ""; |
c956e102 | 5486 | } |
f7379e5e | 5487 | [(set_attr "type" "neon_load2_one_lane<q>")] |
c956e102 | 5488 | ) |
88f77cba | 5489 | |
22f9db64 CB |
5490 | ;; see comment on neon_vld1_lane for reason why the lane numbers are reversed |
5491 | ;; here on big endian targets. | |
88f77cba JB |
5492 | (define_insn "neon_vld2_lane<mode>" |
5493 | [(set (match_operand:OI 0 "s_register_operand" "=w") | |
6308e208 | 5494 | (unspec:OI [(match_operand:<V_two_elem> 1 "neon_struct_operand" "Um") |
88f77cba JB |
5495 | (match_operand:OI 2 "s_register_operand" "0") |
5496 | (match_operand:SI 3 "immediate_operand" "i") | |
4b644867 | 5497 | (unspec:VQ_HS [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] |
88f77cba JB |
5498 | UNSPEC_VLD2_LANE))] |
5499 | "TARGET_NEON" | |
5500 | { | |
22f9db64 | 5501 | HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N(<MODE>mode, INTVAL (operands[3])); |
88f77cba JB |
5502 | HOST_WIDE_INT max = GET_MODE_NUNITS (<MODE>mode); |
5503 | int regno = REGNO (operands[0]); | |
5504 | rtx ops[4]; | |
e68ffe57 | 5505 | if (lane >= max / 2) |
88f77cba JB |
5506 | { |
5507 | lane -= max / 2; | |
5508 | regno += 2; | |
5509 | } | |
5510 | ops[0] = gen_rtx_REG (DImode, regno); | |
5511 | ops[1] = gen_rtx_REG (DImode, regno + 4); | |
5512 | ops[2] = operands[1]; | |
5513 | ops[3] = GEN_INT (lane); | |
6308e208 | 5514 | output_asm_insn ("vld2.<V_sz_elem>\t{%P0[%c3], %P1[%c3]}, %A2", ops); |
88f77cba | 5515 | return ""; |
c956e102 | 5516 | } |
f7379e5e | 5517 | [(set_attr "type" "neon_load2_one_lane<q>")] |
c956e102 | 5518 | ) |
88f77cba JB |
5519 | |
5520 | (define_insn "neon_vld2_dup<mode>" | |
5521 | [(set (match_operand:TI 0 "s_register_operand" "=w") | |
6308e208 | 5522 | (unspec:TI [(match_operand:<V_two_elem> 1 "neon_struct_operand" "Um") |
eb637e76 | 5523 | (unspec:VDXBF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] |
88f77cba JB |
5524 | UNSPEC_VLD2_DUP))] |
5525 | "TARGET_NEON" | |
5526 | { | |
5527 | if (GET_MODE_NUNITS (<MODE>mode) > 1) | |
6308e208 | 5528 | return "vld2.<V_sz_elem>\t{%e0[], %f0[]}, %A1"; |
88f77cba | 5529 | else |
6308e208 | 5530 | return "vld1.<V_sz_elem>\t%h0, %A1"; |
c956e102 | 5531 | } |
003bb7f3 | 5532 | [(set (attr "type") |
c956e102 | 5533 | (if_then_else (gt (const_string "<V_mode_nunits>") (const_string "1")) |
f7379e5e JG |
5534 | (const_string "neon_load2_all_lanes<q>") |
5535 | (const_string "neon_load1_1reg<q>")))] | |
c956e102 | 5536 | ) |
88f77cba | 5537 | |
eb637e76 DB |
5538 | (define_insn "neon_vld2_dupv8bf" |
5539 | [(set (match_operand:OI 0 "s_register_operand" "=w") | |
5540 | (unspec:OI [(match_operand:V2BF 1 "neon_struct_operand" "Um") | |
5541 | (unspec:V8BF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] | |
5542 | UNSPEC_VLD2_DUP))] | |
5543 | "TARGET_BF16_SIMD" | |
5544 | { | |
5545 | rtx ops[5]; | |
5546 | int tabbase = REGNO (operands[0]); | |
5547 | ||
5548 | ops[4] = operands[1]; | |
5549 | ops[0] = gen_rtx_REG (V4BFmode, tabbase); | |
5550 | ops[1] = gen_rtx_REG (V4BFmode, tabbase + 2); | |
5551 | ops[2] = gen_rtx_REG (V4BFmode, tabbase + 4); | |
5552 | ops[3] = gen_rtx_REG (V4BFmode, tabbase + 6); | |
5553 | output_asm_insn ("vld2.16\t{%P0, %P1, %P2, %P3}, %A4", ops); | |
5554 | return ""; | |
5555 | } | |
5556 | [(set_attr "type" "neon_load2_all_lanes_q")] | |
5557 | ) | |
5558 | ||
3188ed59 RS |
5559 | (define_expand "vec_store_lanesti<mode>" |
5560 | [(set (match_operand:TI 0 "neon_struct_operand") | |
5561 | (unspec:TI [(match_operand:TI 1 "s_register_operand") | |
5562 | (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] | |
5563 | UNSPEC_VST2))] | |
5564 | "TARGET_NEON") | |
5565 | ||
88f77cba | 5566 | (define_insn "neon_vst2<mode>" |
6308e208 | 5567 | [(set (match_operand:TI 0 "neon_struct_operand" "=Um") |
88f77cba | 5568 | (unspec:TI [(match_operand:TI 1 "s_register_operand" "w") |
ff229375 | 5569 | (unspec:VDXBF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] |
88f77cba JB |
5570 | UNSPEC_VST2))] |
5571 | "TARGET_NEON" | |
5572 | { | |
5573 | if (<V_sz_elem> == 64) | |
6308e208 | 5574 | return "vst1.64\t%h1, %A0"; |
88f77cba | 5575 | else |
6308e208 | 5576 | return "vst2.<V_sz_elem>\t%h1, %A0"; |
c956e102 | 5577 | } |
003bb7f3 | 5578 | [(set (attr "type") |
c956e102 | 5579 | (if_then_else (eq (const_string "<V_sz_elem>") (const_string "64")) |
f7379e5e JG |
5580 | (const_string "neon_store1_2reg<q>") |
5581 | (const_string "neon_store2_one_lane<q>")))] | |
c956e102 | 5582 | ) |
88f77cba | 5583 | |
3188ed59 RS |
5584 | (define_expand "vec_store_lanesoi<mode>" |
5585 | [(set (match_operand:OI 0 "neon_struct_operand") | |
5586 | (unspec:OI [(match_operand:OI 1 "s_register_operand") | |
4b644867 | 5587 | (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] |
3188ed59 RS |
5588 | UNSPEC_VST2))] |
5589 | "TARGET_NEON") | |
5590 | ||
88f77cba | 5591 | (define_insn "neon_vst2<mode>" |
6308e208 | 5592 | [(set (match_operand:OI 0 "neon_struct_operand" "=Um") |
88f77cba | 5593 | (unspec:OI [(match_operand:OI 1 "s_register_operand" "w") |
ff229375 | 5594 | (unspec:VQ2BF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] |
88f77cba JB |
5595 | UNSPEC_VST2))] |
5596 | "TARGET_NEON" | |
6308e208 | 5597 | "vst2.<V_sz_elem>\t%h1, %A0" |
f7379e5e | 5598 | [(set_attr "type" "neon_store2_4reg<q>")] |
c956e102 | 5599 | ) |
88f77cba | 5600 | |
22f9db64 CB |
5601 | ;; see comment on neon_vld1_lane for reason why the lane numbers are reversed |
5602 | ;; here on big endian targets. | |
88f77cba | 5603 | (define_insn "neon_vst2_lane<mode>" |
6308e208 | 5604 | [(set (match_operand:<V_two_elem> 0 "neon_struct_operand" "=Um") |
88f77cba JB |
5605 | (unspec:<V_two_elem> |
5606 | [(match_operand:TI 1 "s_register_operand" "w") | |
5607 | (match_operand:SI 2 "immediate_operand" "i") | |
4b644867 | 5608 | (unspec:VD_LANE [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] |
88f77cba JB |
5609 | UNSPEC_VST2_LANE))] |
5610 | "TARGET_NEON" | |
5611 | { | |
22f9db64 | 5612 | HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N(<MODE>mode, INTVAL (operands[2])); |
88f77cba JB |
5613 | int regno = REGNO (operands[1]); |
5614 | rtx ops[4]; | |
88f77cba JB |
5615 | ops[0] = operands[0]; |
5616 | ops[1] = gen_rtx_REG (DImode, regno); | |
5617 | ops[2] = gen_rtx_REG (DImode, regno + 2); | |
22f9db64 | 5618 | ops[3] = GEN_INT (lane); |
6308e208 | 5619 | output_asm_insn ("vst2.<V_sz_elem>\t{%P1[%c3], %P2[%c3]}, %A0", ops); |
88f77cba | 5620 | return ""; |
c956e102 | 5621 | } |
f7379e5e | 5622 | [(set_attr "type" "neon_store2_one_lane<q>")] |
c956e102 | 5623 | ) |
88f77cba | 5624 | |
22f9db64 CB |
5625 | ;; see comment on neon_vld1_lane for reason why the lane numbers are reversed |
5626 | ;; here on big endian targets. | |
88f77cba | 5627 | (define_insn "neon_vst2_lane<mode>" |
6308e208 | 5628 | [(set (match_operand:<V_two_elem> 0 "neon_struct_operand" "=Um") |
88f77cba JB |
5629 | (unspec:<V_two_elem> |
5630 | [(match_operand:OI 1 "s_register_operand" "w") | |
5631 | (match_operand:SI 2 "immediate_operand" "i") | |
4b644867 | 5632 | (unspec:VQ_HS [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] |
88f77cba JB |
5633 | UNSPEC_VST2_LANE))] |
5634 | "TARGET_NEON" | |
5635 | { | |
22f9db64 | 5636 | HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N(<MODE>mode, INTVAL (operands[2])); |
88f77cba JB |
5637 | HOST_WIDE_INT max = GET_MODE_NUNITS (<MODE>mode); |
5638 | int regno = REGNO (operands[1]); | |
5639 | rtx ops[4]; | |
e68ffe57 | 5640 | if (lane >= max / 2) |
88f77cba JB |
5641 | { |
5642 | lane -= max / 2; | |
5643 | regno += 2; | |
5644 | } | |
5645 | ops[0] = operands[0]; | |
5646 | ops[1] = gen_rtx_REG (DImode, regno); | |
5647 | ops[2] = gen_rtx_REG (DImode, regno + 4); | |
5648 | ops[3] = GEN_INT (lane); | |
6308e208 | 5649 | output_asm_insn ("vst2.<V_sz_elem>\t{%P1[%c3], %P2[%c3]}, %A0", ops); |
88f77cba | 5650 | return ""; |
c956e102 | 5651 | } |
f7379e5e | 5652 | [(set_attr "type" "neon_store2_one_lane<q>")] |
c956e102 | 5653 | ) |
88f77cba | 5654 | |
3188ed59 RS |
5655 | (define_expand "vec_load_lanesei<mode>" |
5656 | [(set (match_operand:EI 0 "s_register_operand") | |
5657 | (unspec:EI [(match_operand:EI 1 "neon_struct_operand") | |
5658 | (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] | |
5659 | UNSPEC_VLD3))] | |
5660 | "TARGET_NEON") | |
5661 | ||
88f77cba JB |
5662 | (define_insn "neon_vld3<mode>" |
5663 | [(set (match_operand:EI 0 "s_register_operand" "=w") | |
6308e208 | 5664 | (unspec:EI [(match_operand:EI 1 "neon_struct_operand" "Um") |
eb637e76 | 5665 | (unspec:VDXBF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] |
88f77cba JB |
5666 | UNSPEC_VLD3))] |
5667 | "TARGET_NEON" | |
5668 | { | |
5669 | if (<V_sz_elem> == 64) | |
6308e208 | 5670 | return "vld1.64\t%h0, %A1"; |
88f77cba | 5671 | else |
6308e208 | 5672 | return "vld3.<V_sz_elem>\t%h0, %A1"; |
c956e102 | 5673 | } |
003bb7f3 | 5674 | [(set (attr "type") |
c956e102 | 5675 | (if_then_else (eq (const_string "<V_sz_elem>") (const_string "64")) |
f7379e5e JG |
5676 | (const_string "neon_load1_3reg<q>") |
5677 | (const_string "neon_load3_3reg<q>")))] | |
c956e102 | 5678 | ) |
88f77cba | 5679 | |
3188ed59 RS |
5680 | (define_expand "vec_load_lanesci<mode>" |
5681 | [(match_operand:CI 0 "s_register_operand") | |
5682 | (match_operand:CI 1 "neon_struct_operand") | |
4b644867 | 5683 | (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] |
3188ed59 RS |
5684 | "TARGET_NEON" |
5685 | { | |
5686 | emit_insn (gen_neon_vld3<mode> (operands[0], operands[1])); | |
5687 | DONE; | |
5688 | }) | |
5689 | ||
88f77cba | 5690 | (define_expand "neon_vld3<mode>" |
6308e208 RS |
5691 | [(match_operand:CI 0 "s_register_operand") |
5692 | (match_operand:CI 1 "neon_struct_operand") | |
eb637e76 | 5693 | (unspec:VQ2BF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] |
88f77cba JB |
5694 | "TARGET_NEON" |
5695 | { | |
6308e208 RS |
5696 | rtx mem; |
5697 | ||
5698 | mem = adjust_address (operands[1], EImode, 0); | |
5699 | emit_insn (gen_neon_vld3qa<mode> (operands[0], mem)); | |
5700 | mem = adjust_address (mem, EImode, GET_MODE_SIZE (EImode)); | |
5701 | emit_insn (gen_neon_vld3qb<mode> (operands[0], mem, operands[0])); | |
88f77cba JB |
5702 | DONE; |
5703 | }) | |
5704 | ||
5705 | (define_insn "neon_vld3qa<mode>" | |
5706 | [(set (match_operand:CI 0 "s_register_operand" "=w") | |
6308e208 | 5707 | (unspec:CI [(match_operand:EI 1 "neon_struct_operand" "Um") |
eb637e76 | 5708 | (unspec:VQ2BF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] |
6308e208 | 5709 | UNSPEC_VLD3A))] |
88f77cba JB |
5710 | "TARGET_NEON" |
5711 | { | |
5712 | int regno = REGNO (operands[0]); | |
5713 | rtx ops[4]; | |
5714 | ops[0] = gen_rtx_REG (DImode, regno); | |
5715 | ops[1] = gen_rtx_REG (DImode, regno + 4); | |
5716 | ops[2] = gen_rtx_REG (DImode, regno + 8); | |
a6217191 | 5717 | ops[3] = operands[1]; |
6308e208 | 5718 | output_asm_insn ("vld3.<V_sz_elem>\t{%P0, %P1, %P2}, %A3", ops); |
88f77cba | 5719 | return ""; |
c956e102 | 5720 | } |
f7379e5e | 5721 | [(set_attr "type" "neon_load3_3reg<q>")] |
c956e102 | 5722 | ) |
88f77cba JB |
5723 | |
5724 | (define_insn "neon_vld3qb<mode>" | |
5725 | [(set (match_operand:CI 0 "s_register_operand" "=w") | |
6308e208 RS |
5726 | (unspec:CI [(match_operand:EI 1 "neon_struct_operand" "Um") |
5727 | (match_operand:CI 2 "s_register_operand" "0") | |
eb637e76 | 5728 | (unspec:VQ2BF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] |
6308e208 | 5729 | UNSPEC_VLD3B))] |
88f77cba JB |
5730 | "TARGET_NEON" |
5731 | { | |
5732 | int regno = REGNO (operands[0]); | |
5733 | rtx ops[4]; | |
5734 | ops[0] = gen_rtx_REG (DImode, regno + 2); | |
5735 | ops[1] = gen_rtx_REG (DImode, regno + 6); | |
5736 | ops[2] = gen_rtx_REG (DImode, regno + 10); | |
6308e208 RS |
5737 | ops[3] = operands[1]; |
5738 | output_asm_insn ("vld3.<V_sz_elem>\t{%P0, %P1, %P2}, %A3", ops); | |
88f77cba | 5739 | return ""; |
c956e102 | 5740 | } |
f7379e5e | 5741 | [(set_attr "type" "neon_load3_3reg<q>")] |
c956e102 | 5742 | ) |
88f77cba | 5743 | |
22f9db64 CB |
5744 | ;; see comment on neon_vld1_lane for reason why the lane numbers are reversed |
5745 | ;; here on big endian targets. | |
88f77cba JB |
5746 | (define_insn "neon_vld3_lane<mode>" |
5747 | [(set (match_operand:EI 0 "s_register_operand" "=w") | |
6308e208 | 5748 | (unspec:EI [(match_operand:<V_three_elem> 1 "neon_struct_operand" "Um") |
88f77cba JB |
5749 | (match_operand:EI 2 "s_register_operand" "0") |
5750 | (match_operand:SI 3 "immediate_operand" "i") | |
4b644867 | 5751 | (unspec:VD_LANE [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] |
88f77cba JB |
5752 | UNSPEC_VLD3_LANE))] |
5753 | "TARGET_NEON" | |
5754 | { | |
22f9db64 | 5755 | HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N (<MODE>mode, INTVAL (operands[3])); |
88f77cba JB |
5756 | int regno = REGNO (operands[0]); |
5757 | rtx ops[5]; | |
88f77cba JB |
5758 | ops[0] = gen_rtx_REG (DImode, regno); |
5759 | ops[1] = gen_rtx_REG (DImode, regno + 2); | |
5760 | ops[2] = gen_rtx_REG (DImode, regno + 4); | |
5761 | ops[3] = operands[1]; | |
22f9db64 | 5762 | ops[4] = GEN_INT (lane); |
9594fe5e | 5763 | output_asm_insn ("vld3.<V_sz_elem>\t{%P0[%c4], %P1[%c4], %P2[%c4]}, %3", |
88f77cba JB |
5764 | ops); |
5765 | return ""; | |
c956e102 | 5766 | } |
f7379e5e | 5767 | [(set_attr "type" "neon_load3_one_lane<q>")] |
c956e102 | 5768 | ) |
88f77cba | 5769 | |
22f9db64 CB |
5770 | ;; see comment on neon_vld1_lane for reason why the lane numbers are reversed |
5771 | ;; here on big endian targets. | |
88f77cba JB |
5772 | (define_insn "neon_vld3_lane<mode>" |
5773 | [(set (match_operand:CI 0 "s_register_operand" "=w") | |
6308e208 | 5774 | (unspec:CI [(match_operand:<V_three_elem> 1 "neon_struct_operand" "Um") |
88f77cba JB |
5775 | (match_operand:CI 2 "s_register_operand" "0") |
5776 | (match_operand:SI 3 "immediate_operand" "i") | |
4b644867 | 5777 | (unspec:VQ_HS [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] |
88f77cba JB |
5778 | UNSPEC_VLD3_LANE))] |
5779 | "TARGET_NEON" | |
5780 | { | |
22f9db64 | 5781 | HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N(<MODE>mode, INTVAL (operands[3])); |
88f77cba JB |
5782 | HOST_WIDE_INT max = GET_MODE_NUNITS (<MODE>mode); |
5783 | int regno = REGNO (operands[0]); | |
5784 | rtx ops[5]; | |
e68ffe57 | 5785 | if (lane >= max / 2) |
88f77cba JB |
5786 | { |
5787 | lane -= max / 2; | |
5788 | regno += 2; | |
5789 | } | |
5790 | ops[0] = gen_rtx_REG (DImode, regno); | |
5791 | ops[1] = gen_rtx_REG (DImode, regno + 4); | |
5792 | ops[2] = gen_rtx_REG (DImode, regno + 8); | |
5793 | ops[3] = operands[1]; | |
5794 | ops[4] = GEN_INT (lane); | |
9594fe5e | 5795 | output_asm_insn ("vld3.<V_sz_elem>\t{%P0[%c4], %P1[%c4], %P2[%c4]}, %3", |
88f77cba JB |
5796 | ops); |
5797 | return ""; | |
c956e102 | 5798 | } |
f7379e5e | 5799 | [(set_attr "type" "neon_load3_one_lane<q>")] |
c956e102 | 5800 | ) |
88f77cba JB |
5801 | |
5802 | (define_insn "neon_vld3_dup<mode>" | |
5803 | [(set (match_operand:EI 0 "s_register_operand" "=w") | |
6308e208 | 5804 | (unspec:EI [(match_operand:<V_three_elem> 1 "neon_struct_operand" "Um") |
eb637e76 | 5805 | (unspec:VDXBF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] |
88f77cba JB |
5806 | UNSPEC_VLD3_DUP))] |
5807 | "TARGET_NEON" | |
5808 | { | |
5809 | if (GET_MODE_NUNITS (<MODE>mode) > 1) | |
5810 | { | |
5811 | int regno = REGNO (operands[0]); | |
5812 | rtx ops[4]; | |
5813 | ops[0] = gen_rtx_REG (DImode, regno); | |
5814 | ops[1] = gen_rtx_REG (DImode, regno + 2); | |
5815 | ops[2] = gen_rtx_REG (DImode, regno + 4); | |
5816 | ops[3] = operands[1]; | |
9594fe5e | 5817 | output_asm_insn ("vld3.<V_sz_elem>\t{%P0[], %P1[], %P2[]}, %3", ops); |
88f77cba JB |
5818 | return ""; |
5819 | } | |
5820 | else | |
6308e208 | 5821 | return "vld1.<V_sz_elem>\t%h0, %A1"; |
c956e102 | 5822 | } |
003bb7f3 | 5823 | [(set (attr "type") |
c956e102 | 5824 | (if_then_else (gt (const_string "<V_mode_nunits>") (const_string "1")) |
f7379e5e JG |
5825 | (const_string "neon_load3_all_lanes<q>") |
5826 | (const_string "neon_load1_1reg<q>")))]) | |
88f77cba | 5827 | |
eb637e76 DB |
5828 | (define_insn "neon_vld3_dupv8bf" |
5829 | [(set (match_operand:CI 0 "s_register_operand" "=w") | |
5830 | (unspec:CI [(match_operand:V2BF 1 "neon_struct_operand" "Um") | |
5831 | (unspec:V8BF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] | |
5832 | UNSPEC_VLD2_DUP))] | |
5833 | "TARGET_BF16_SIMD" | |
5834 | { | |
5835 | rtx ops[4]; | |
5836 | int tabbase = REGNO (operands[0]); | |
5837 | ||
5838 | ops[3] = operands[1]; | |
5839 | ops[0] = gen_rtx_REG (V4BFmode, tabbase); | |
5840 | ops[1] = gen_rtx_REG (V4BFmode, tabbase + 2); | |
5841 | ops[2] = gen_rtx_REG (V4BFmode, tabbase + 4); | |
5842 | output_asm_insn ("vld3.16\t{%P0[], %P1[], %P2[]}, %A3", ops); | |
5843 | return ""; | |
5844 | } | |
5845 | [(set_attr "type" "neon_load3_all_lanes_q")] | |
5846 | ) | |
5847 | ||
3188ed59 RS |
5848 | (define_expand "vec_store_lanesei<mode>" |
5849 | [(set (match_operand:EI 0 "neon_struct_operand") | |
5850 | (unspec:EI [(match_operand:EI 1 "s_register_operand") | |
5851 | (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] | |
5852 | UNSPEC_VST3))] | |
5853 | "TARGET_NEON") | |
5854 | ||
88f77cba | 5855 | (define_insn "neon_vst3<mode>" |
6308e208 | 5856 | [(set (match_operand:EI 0 "neon_struct_operand" "=Um") |
88f77cba | 5857 | (unspec:EI [(match_operand:EI 1 "s_register_operand" "w") |
ff229375 | 5858 | (unspec:VDXBF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] |
88f77cba JB |
5859 | UNSPEC_VST3))] |
5860 | "TARGET_NEON" | |
5861 | { | |
5862 | if (<V_sz_elem> == 64) | |
6308e208 | 5863 | return "vst1.64\t%h1, %A0"; |
88f77cba | 5864 | else |
6308e208 | 5865 | return "vst3.<V_sz_elem>\t%h1, %A0"; |
c956e102 | 5866 | } |
003bb7f3 | 5867 | [(set (attr "type") |
c956e102 | 5868 | (if_then_else (eq (const_string "<V_sz_elem>") (const_string "64")) |
f7379e5e JG |
5869 | (const_string "neon_store1_3reg<q>") |
5870 | (const_string "neon_store3_one_lane<q>")))]) | |
88f77cba | 5871 | |
3188ed59 RS |
5872 | (define_expand "vec_store_lanesci<mode>" |
5873 | [(match_operand:CI 0 "neon_struct_operand") | |
5874 | (match_operand:CI 1 "s_register_operand") | |
4b644867 | 5875 | (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] |
3188ed59 RS |
5876 | "TARGET_NEON" |
5877 | { | |
5878 | emit_insn (gen_neon_vst3<mode> (operands[0], operands[1])); | |
5879 | DONE; | |
5880 | }) | |
5881 | ||
88f77cba | 5882 | (define_expand "neon_vst3<mode>" |
6308e208 RS |
5883 | [(match_operand:CI 0 "neon_struct_operand") |
5884 | (match_operand:CI 1 "s_register_operand") | |
ff229375 | 5885 | (unspec:VQ2BF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] |
88f77cba JB |
5886 | "TARGET_NEON" |
5887 | { | |
6308e208 RS |
5888 | rtx mem; |
5889 | ||
5890 | mem = adjust_address (operands[0], EImode, 0); | |
5891 | emit_insn (gen_neon_vst3qa<mode> (mem, operands[1])); | |
5892 | mem = adjust_address (mem, EImode, GET_MODE_SIZE (EImode)); | |
5893 | emit_insn (gen_neon_vst3qb<mode> (mem, operands[1])); | |
88f77cba JB |
5894 | DONE; |
5895 | }) | |
5896 | ||
5897 | (define_insn "neon_vst3qa<mode>" | |
6308e208 RS |
5898 | [(set (match_operand:EI 0 "neon_struct_operand" "=Um") |
5899 | (unspec:EI [(match_operand:CI 1 "s_register_operand" "w") | |
ff229375 | 5900 | (unspec:VQ2BF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] |
6308e208 | 5901 | UNSPEC_VST3A))] |
88f77cba JB |
5902 | "TARGET_NEON" |
5903 | { | |
6308e208 | 5904 | int regno = REGNO (operands[1]); |
88f77cba JB |
5905 | rtx ops[4]; |
5906 | ops[0] = operands[0]; | |
5907 | ops[1] = gen_rtx_REG (DImode, regno); | |
5908 | ops[2] = gen_rtx_REG (DImode, regno + 4); | |
5909 | ops[3] = gen_rtx_REG (DImode, regno + 8); | |
6308e208 | 5910 | output_asm_insn ("vst3.<V_sz_elem>\t{%P1, %P2, %P3}, %A0", ops); |
88f77cba | 5911 | return ""; |
c956e102 | 5912 | } |
f7379e5e | 5913 | [(set_attr "type" "neon_store3_3reg<q>")] |
c956e102 | 5914 | ) |
88f77cba JB |
5915 | |
5916 | (define_insn "neon_vst3qb<mode>" | |
6308e208 RS |
5917 | [(set (match_operand:EI 0 "neon_struct_operand" "=Um") |
5918 | (unspec:EI [(match_operand:CI 1 "s_register_operand" "w") | |
ff229375 | 5919 | (unspec:VQ2BF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] |
6308e208 | 5920 | UNSPEC_VST3B))] |
88f77cba JB |
5921 | "TARGET_NEON" |
5922 | { | |
6308e208 | 5923 | int regno = REGNO (operands[1]); |
88f77cba JB |
5924 | rtx ops[4]; |
5925 | ops[0] = operands[0]; | |
5926 | ops[1] = gen_rtx_REG (DImode, regno + 2); | |
5927 | ops[2] = gen_rtx_REG (DImode, regno + 6); | |
5928 | ops[3] = gen_rtx_REG (DImode, regno + 10); | |
6308e208 | 5929 | output_asm_insn ("vst3.<V_sz_elem>\t{%P1, %P2, %P3}, %A0", ops); |
88f77cba | 5930 | return ""; |
c956e102 | 5931 | } |
f7379e5e | 5932 | [(set_attr "type" "neon_store3_3reg<q>")] |
c956e102 | 5933 | ) |
88f77cba | 5934 | |
22f9db64 CB |
5935 | ;; see comment on neon_vld1_lane for reason why the lane numbers are reversed |
5936 | ;; here on big endian targets. | |
88f77cba | 5937 | (define_insn "neon_vst3_lane<mode>" |
6308e208 | 5938 | [(set (match_operand:<V_three_elem> 0 "neon_struct_operand" "=Um") |
88f77cba JB |
5939 | (unspec:<V_three_elem> |
5940 | [(match_operand:EI 1 "s_register_operand" "w") | |
5941 | (match_operand:SI 2 "immediate_operand" "i") | |
4b644867 | 5942 | (unspec:VD_LANE [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] |
88f77cba JB |
5943 | UNSPEC_VST3_LANE))] |
5944 | "TARGET_NEON" | |
5945 | { | |
22f9db64 | 5946 | HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N(<MODE>mode, INTVAL (operands[2])); |
88f77cba JB |
5947 | int regno = REGNO (operands[1]); |
5948 | rtx ops[5]; | |
88f77cba JB |
5949 | ops[0] = operands[0]; |
5950 | ops[1] = gen_rtx_REG (DImode, regno); | |
5951 | ops[2] = gen_rtx_REG (DImode, regno + 2); | |
5952 | ops[3] = gen_rtx_REG (DImode, regno + 4); | |
22f9db64 | 5953 | ops[4] = GEN_INT (lane); |
9594fe5e | 5954 | output_asm_insn ("vst3.<V_sz_elem>\t{%P1[%c4], %P2[%c4], %P3[%c4]}, %0", |
88f77cba JB |
5955 | ops); |
5956 | return ""; | |
c956e102 | 5957 | } |
f7379e5e | 5958 | [(set_attr "type" "neon_store3_one_lane<q>")] |
c956e102 | 5959 | ) |
88f77cba | 5960 | |
22f9db64 CB |
5961 | ;; see comment on neon_vld1_lane for reason why the lane numbers are reversed |
5962 | ;; here on big endian targets. | |
88f77cba | 5963 | (define_insn "neon_vst3_lane<mode>" |
6308e208 | 5964 | [(set (match_operand:<V_three_elem> 0 "neon_struct_operand" "=Um") |
88f77cba JB |
5965 | (unspec:<V_three_elem> |
5966 | [(match_operand:CI 1 "s_register_operand" "w") | |
5967 | (match_operand:SI 2 "immediate_operand" "i") | |
4b644867 | 5968 | (unspec:VQ_HS [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] |
88f77cba JB |
5969 | UNSPEC_VST3_LANE))] |
5970 | "TARGET_NEON" | |
5971 | { | |
22f9db64 | 5972 | HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N(<MODE>mode, INTVAL (operands[2])); |
88f77cba JB |
5973 | HOST_WIDE_INT max = GET_MODE_NUNITS (<MODE>mode); |
5974 | int regno = REGNO (operands[1]); | |
5975 | rtx ops[5]; | |
e68ffe57 | 5976 | if (lane >= max / 2) |
88f77cba JB |
5977 | { |
5978 | lane -= max / 2; | |
5979 | regno += 2; | |
5980 | } | |
5981 | ops[0] = operands[0]; | |
5982 | ops[1] = gen_rtx_REG (DImode, regno); | |
5983 | ops[2] = gen_rtx_REG (DImode, regno + 4); | |
5984 | ops[3] = gen_rtx_REG (DImode, regno + 8); | |
5985 | ops[4] = GEN_INT (lane); | |
9594fe5e | 5986 | output_asm_insn ("vst3.<V_sz_elem>\t{%P1[%c4], %P2[%c4], %P3[%c4]}, %0", |
88f77cba JB |
5987 | ops); |
5988 | return ""; | |
c956e102 | 5989 | } |
f7379e5e JG |
5990 | [(set_attr "type" "neon_store3_one_lane<q>")] |
5991 | ) | |
88f77cba | 5992 | |
3188ed59 RS |
5993 | (define_expand "vec_load_lanesoi<mode>" |
5994 | [(set (match_operand:OI 0 "s_register_operand") | |
5995 | (unspec:OI [(match_operand:OI 1 "neon_struct_operand") | |
5996 | (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] | |
5997 | UNSPEC_VLD4))] | |
5998 | "TARGET_NEON") | |
5999 | ||
88f77cba JB |
6000 | (define_insn "neon_vld4<mode>" |
6001 | [(set (match_operand:OI 0 "s_register_operand" "=w") | |
6308e208 | 6002 | (unspec:OI [(match_operand:OI 1 "neon_struct_operand" "Um") |
eb637e76 | 6003 | (unspec:VDXBF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] |
88f77cba JB |
6004 | UNSPEC_VLD4))] |
6005 | "TARGET_NEON" | |
6006 | { | |
6007 | if (<V_sz_elem> == 64) | |
6308e208 | 6008 | return "vld1.64\t%h0, %A1"; |
88f77cba | 6009 | else |
6308e208 | 6010 | return "vld4.<V_sz_elem>\t%h0, %A1"; |
c956e102 | 6011 | } |
003bb7f3 | 6012 | [(set (attr "type") |
c956e102 | 6013 | (if_then_else (eq (const_string "<V_sz_elem>") (const_string "64")) |
f7379e5e JG |
6014 | (const_string "neon_load1_4reg<q>") |
6015 | (const_string "neon_load4_4reg<q>")))] | |
c956e102 | 6016 | ) |
88f77cba | 6017 | |
3188ed59 RS |
6018 | (define_expand "vec_load_lanesxi<mode>" |
6019 | [(match_operand:XI 0 "s_register_operand") | |
6020 | (match_operand:XI 1 "neon_struct_operand") | |
4b644867 | 6021 | (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] |
3188ed59 RS |
6022 | "TARGET_NEON" |
6023 | { | |
6024 | emit_insn (gen_neon_vld4<mode> (operands[0], operands[1])); | |
6025 | DONE; | |
6026 | }) | |
6027 | ||
88f77cba | 6028 | (define_expand "neon_vld4<mode>" |
6308e208 RS |
6029 | [(match_operand:XI 0 "s_register_operand") |
6030 | (match_operand:XI 1 "neon_struct_operand") | |
eb637e76 | 6031 | (unspec:VQ2BF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] |
88f77cba JB |
6032 | "TARGET_NEON" |
6033 | { | |
6308e208 RS |
6034 | rtx mem; |
6035 | ||
6036 | mem = adjust_address (operands[1], OImode, 0); | |
6037 | emit_insn (gen_neon_vld4qa<mode> (operands[0], mem)); | |
6038 | mem = adjust_address (mem, OImode, GET_MODE_SIZE (OImode)); | |
6039 | emit_insn (gen_neon_vld4qb<mode> (operands[0], mem, operands[0])); | |
88f77cba JB |
6040 | DONE; |
6041 | }) | |
6042 | ||
6043 | (define_insn "neon_vld4qa<mode>" | |
6044 | [(set (match_operand:XI 0 "s_register_operand" "=w") | |
6308e208 | 6045 | (unspec:XI [(match_operand:OI 1 "neon_struct_operand" "Um") |
eb637e76 | 6046 | (unspec:VQ2BF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] |
6308e208 | 6047 | UNSPEC_VLD4A))] |
88f77cba JB |
6048 | "TARGET_NEON" |
6049 | { | |
6050 | int regno = REGNO (operands[0]); | |
6051 | rtx ops[5]; | |
6052 | ops[0] = gen_rtx_REG (DImode, regno); | |
6053 | ops[1] = gen_rtx_REG (DImode, regno + 4); | |
6054 | ops[2] = gen_rtx_REG (DImode, regno + 8); | |
6055 | ops[3] = gen_rtx_REG (DImode, regno + 12); | |
a6217191 | 6056 | ops[4] = operands[1]; |
6308e208 | 6057 | output_asm_insn ("vld4.<V_sz_elem>\t{%P0, %P1, %P2, %P3}, %A4", ops); |
88f77cba | 6058 | return ""; |
c956e102 | 6059 | } |
f7379e5e | 6060 | [(set_attr "type" "neon_load4_4reg<q>")] |
c956e102 | 6061 | ) |
88f77cba JB |
6062 | |
6063 | (define_insn "neon_vld4qb<mode>" | |
6064 | [(set (match_operand:XI 0 "s_register_operand" "=w") | |
6308e208 RS |
6065 | (unspec:XI [(match_operand:OI 1 "neon_struct_operand" "Um") |
6066 | (match_operand:XI 2 "s_register_operand" "0") | |
eb637e76 | 6067 | (unspec:VQ2BF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] |
6308e208 | 6068 | UNSPEC_VLD4B))] |
88f77cba JB |
6069 | "TARGET_NEON" |
6070 | { | |
6071 | int regno = REGNO (operands[0]); | |
6072 | rtx ops[5]; | |
6073 | ops[0] = gen_rtx_REG (DImode, regno + 2); | |
6074 | ops[1] = gen_rtx_REG (DImode, regno + 6); | |
6075 | ops[2] = gen_rtx_REG (DImode, regno + 10); | |
6076 | ops[3] = gen_rtx_REG (DImode, regno + 14); | |
6308e208 RS |
6077 | ops[4] = operands[1]; |
6078 | output_asm_insn ("vld4.<V_sz_elem>\t{%P0, %P1, %P2, %P3}, %A4", ops); | |
88f77cba | 6079 | return ""; |
c956e102 | 6080 | } |
f7379e5e | 6081 | [(set_attr "type" "neon_load4_4reg<q>")] |
c956e102 | 6082 | ) |
88f77cba | 6083 | |
22f9db64 CB |
6084 | ;; see comment on neon_vld1_lane for reason why the lane numbers are reversed |
6085 | ;; here on big endian targets. | |
88f77cba JB |
6086 | (define_insn "neon_vld4_lane<mode>" |
6087 | [(set (match_operand:OI 0 "s_register_operand" "=w") | |
6308e208 | 6088 | (unspec:OI [(match_operand:<V_four_elem> 1 "neon_struct_operand" "Um") |
88f77cba JB |
6089 | (match_operand:OI 2 "s_register_operand" "0") |
6090 | (match_operand:SI 3 "immediate_operand" "i") | |
4b644867 | 6091 | (unspec:VD_LANE [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] |
88f77cba JB |
6092 | UNSPEC_VLD4_LANE))] |
6093 | "TARGET_NEON" | |
6094 | { | |
22f9db64 | 6095 | HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N(<MODE>mode, INTVAL (operands[3])); |
88f77cba JB |
6096 | int regno = REGNO (operands[0]); |
6097 | rtx ops[6]; | |
88f77cba JB |
6098 | ops[0] = gen_rtx_REG (DImode, regno); |
6099 | ops[1] = gen_rtx_REG (DImode, regno + 2); | |
6100 | ops[2] = gen_rtx_REG (DImode, regno + 4); | |
6101 | ops[3] = gen_rtx_REG (DImode, regno + 6); | |
6102 | ops[4] = operands[1]; | |
22f9db64 | 6103 | ops[5] = GEN_INT (lane); |
6308e208 | 6104 | output_asm_insn ("vld4.<V_sz_elem>\t{%P0[%c5], %P1[%c5], %P2[%c5], %P3[%c5]}, %A4", |
88f77cba JB |
6105 | ops); |
6106 | return ""; | |
c956e102 | 6107 | } |
f7379e5e | 6108 | [(set_attr "type" "neon_load4_one_lane<q>")] |
c956e102 | 6109 | ) |
88f77cba | 6110 | |
22f9db64 CB |
6111 | ;; see comment on neon_vld1_lane for reason why the lane numbers are reversed |
6112 | ;; here on big endian targets. | |
88f77cba JB |
6113 | (define_insn "neon_vld4_lane<mode>" |
6114 | [(set (match_operand:XI 0 "s_register_operand" "=w") | |
6308e208 | 6115 | (unspec:XI [(match_operand:<V_four_elem> 1 "neon_struct_operand" "Um") |
88f77cba JB |
6116 | (match_operand:XI 2 "s_register_operand" "0") |
6117 | (match_operand:SI 3 "immediate_operand" "i") | |
4b644867 | 6118 | (unspec:VQ_HS [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] |
88f77cba JB |
6119 | UNSPEC_VLD4_LANE))] |
6120 | "TARGET_NEON" | |
6121 | { | |
22f9db64 | 6122 | HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N(<MODE>mode, INTVAL (operands[3])); |
88f77cba JB |
6123 | HOST_WIDE_INT max = GET_MODE_NUNITS (<MODE>mode); |
6124 | int regno = REGNO (operands[0]); | |
6125 | rtx ops[6]; | |
e68ffe57 | 6126 | if (lane >= max / 2) |
88f77cba JB |
6127 | { |
6128 | lane -= max / 2; | |
6129 | regno += 2; | |
6130 | } | |
6131 | ops[0] = gen_rtx_REG (DImode, regno); | |
6132 | ops[1] = gen_rtx_REG (DImode, regno + 4); | |
6133 | ops[2] = gen_rtx_REG (DImode, regno + 8); | |
6134 | ops[3] = gen_rtx_REG (DImode, regno + 12); | |
6135 | ops[4] = operands[1]; | |
6136 | ops[5] = GEN_INT (lane); | |
6308e208 | 6137 | output_asm_insn ("vld4.<V_sz_elem>\t{%P0[%c5], %P1[%c5], %P2[%c5], %P3[%c5]}, %A4", |
88f77cba JB |
6138 | ops); |
6139 | return ""; | |
c956e102 | 6140 | } |
f7379e5e | 6141 | [(set_attr "type" "neon_load4_one_lane<q>")] |
c956e102 | 6142 | ) |
88f77cba JB |
6143 | |
6144 | (define_insn "neon_vld4_dup<mode>" | |
6145 | [(set (match_operand:OI 0 "s_register_operand" "=w") | |
6308e208 | 6146 | (unspec:OI [(match_operand:<V_four_elem> 1 "neon_struct_operand" "Um") |
eb637e76 | 6147 | (unspec:VDXBF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] |
88f77cba JB |
6148 | UNSPEC_VLD4_DUP))] |
6149 | "TARGET_NEON" | |
6150 | { | |
6151 | if (GET_MODE_NUNITS (<MODE>mode) > 1) | |
6152 | { | |
6153 | int regno = REGNO (operands[0]); | |
6154 | rtx ops[5]; | |
6155 | ops[0] = gen_rtx_REG (DImode, regno); | |
6156 | ops[1] = gen_rtx_REG (DImode, regno + 2); | |
6157 | ops[2] = gen_rtx_REG (DImode, regno + 4); | |
6158 | ops[3] = gen_rtx_REG (DImode, regno + 6); | |
6159 | ops[4] = operands[1]; | |
6308e208 | 6160 | output_asm_insn ("vld4.<V_sz_elem>\t{%P0[], %P1[], %P2[], %P3[]}, %A4", |
88f77cba JB |
6161 | ops); |
6162 | return ""; | |
6163 | } | |
6164 | else | |
6308e208 | 6165 | return "vld1.<V_sz_elem>\t%h0, %A1"; |
c956e102 | 6166 | } |
003bb7f3 | 6167 | [(set (attr "type") |
c956e102 | 6168 | (if_then_else (gt (const_string "<V_mode_nunits>") (const_string "1")) |
f7379e5e JG |
6169 | (const_string "neon_load4_all_lanes<q>") |
6170 | (const_string "neon_load1_1reg<q>")))] | |
c956e102 | 6171 | ) |
88f77cba | 6172 | |
eb637e76 DB |
6173 | (define_insn "neon_vld4_dupv8bf" |
6174 | [(set (match_operand:XI 0 "s_register_operand" "=w") | |
6175 | (unspec:XI [(match_operand:V2BF 1 "neon_struct_operand" "Um") | |
6176 | (unspec:V8BF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] | |
6177 | UNSPEC_VLD2_DUP))] | |
6178 | "TARGET_BF16_SIMD" | |
6179 | { | |
6180 | rtx ops[5]; | |
6181 | int tabbase = REGNO (operands[0]); | |
6182 | ||
6183 | ops[4] = operands[1]; | |
6184 | ops[0] = gen_rtx_REG (V4BFmode, tabbase); | |
6185 | ops[1] = gen_rtx_REG (V4BFmode, tabbase + 2); | |
6186 | ops[2] = gen_rtx_REG (V4BFmode, tabbase + 4); | |
6187 | ops[3] = gen_rtx_REG (V4BFmode, tabbase + 6); | |
6188 | output_asm_insn ("vld4.16\t{%P0[], %P1[], %P2[], %P3[]}, %A4", ops); | |
6189 | return ""; | |
6190 | } | |
6191 | [(set_attr "type" "neon_load4_all_lanes_q")] | |
6192 | ) | |
6193 | ||
3188ed59 RS |
6194 | (define_expand "vec_store_lanesoi<mode>" |
6195 | [(set (match_operand:OI 0 "neon_struct_operand") | |
6196 | (unspec:OI [(match_operand:OI 1 "s_register_operand") | |
6197 | (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] | |
6198 | UNSPEC_VST4))] | |
6199 | "TARGET_NEON") | |
6200 | ||
88f77cba | 6201 | (define_insn "neon_vst4<mode>" |
6308e208 | 6202 | [(set (match_operand:OI 0 "neon_struct_operand" "=Um") |
88f77cba | 6203 | (unspec:OI [(match_operand:OI 1 "s_register_operand" "w") |
ff229375 | 6204 | (unspec:VDXBF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] |
88f77cba JB |
6205 | UNSPEC_VST4))] |
6206 | "TARGET_NEON" | |
6207 | { | |
6208 | if (<V_sz_elem> == 64) | |
6308e208 | 6209 | return "vst1.64\t%h1, %A0"; |
88f77cba | 6210 | else |
6308e208 | 6211 | return "vst4.<V_sz_elem>\t%h1, %A0"; |
c956e102 | 6212 | } |
003bb7f3 | 6213 | [(set (attr "type") |
c956e102 | 6214 | (if_then_else (eq (const_string "<V_sz_elem>") (const_string "64")) |
f7379e5e JG |
6215 | (const_string "neon_store1_4reg<q>") |
6216 | (const_string "neon_store4_4reg<q>")))] | |
c956e102 | 6217 | ) |
88f77cba | 6218 | |
3188ed59 RS |
6219 | (define_expand "vec_store_lanesxi<mode>" |
6220 | [(match_operand:XI 0 "neon_struct_operand") | |
6221 | (match_operand:XI 1 "s_register_operand") | |
4b644867 | 6222 | (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] |
3188ed59 RS |
6223 | "TARGET_NEON" |
6224 | { | |
6225 | emit_insn (gen_neon_vst4<mode> (operands[0], operands[1])); | |
6226 | DONE; | |
6227 | }) | |
6228 | ||
88f77cba | 6229 | (define_expand "neon_vst4<mode>" |
6308e208 RS |
6230 | [(match_operand:XI 0 "neon_struct_operand") |
6231 | (match_operand:XI 1 "s_register_operand") | |
ff229375 | 6232 | (unspec:VQ2BF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] |
88f77cba JB |
6233 | "TARGET_NEON" |
6234 | { | |
6308e208 RS |
6235 | rtx mem; |
6236 | ||
6237 | mem = adjust_address (operands[0], OImode, 0); | |
6238 | emit_insn (gen_neon_vst4qa<mode> (mem, operands[1])); | |
6239 | mem = adjust_address (mem, OImode, GET_MODE_SIZE (OImode)); | |
6240 | emit_insn (gen_neon_vst4qb<mode> (mem, operands[1])); | |
88f77cba JB |
6241 | DONE; |
6242 | }) | |
6243 | ||
6244 | (define_insn "neon_vst4qa<mode>" | |
6308e208 RS |
6245 | [(set (match_operand:OI 0 "neon_struct_operand" "=Um") |
6246 | (unspec:OI [(match_operand:XI 1 "s_register_operand" "w") | |
ff229375 | 6247 | (unspec:VQ2BF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] |
6308e208 | 6248 | UNSPEC_VST4A))] |
88f77cba JB |
6249 | "TARGET_NEON" |
6250 | { | |
6308e208 | 6251 | int regno = REGNO (operands[1]); |
88f77cba JB |
6252 | rtx ops[5]; |
6253 | ops[0] = operands[0]; | |
6254 | ops[1] = gen_rtx_REG (DImode, regno); | |
6255 | ops[2] = gen_rtx_REG (DImode, regno + 4); | |
6256 | ops[3] = gen_rtx_REG (DImode, regno + 8); | |
6257 | ops[4] = gen_rtx_REG (DImode, regno + 12); | |
6308e208 | 6258 | output_asm_insn ("vst4.<V_sz_elem>\t{%P1, %P2, %P3, %P4}, %A0", ops); |
88f77cba | 6259 | return ""; |
c956e102 | 6260 | } |
f7379e5e | 6261 | [(set_attr "type" "neon_store4_4reg<q>")] |
c956e102 | 6262 | ) |
88f77cba JB |
6263 | |
6264 | (define_insn "neon_vst4qb<mode>" | |
6308e208 RS |
6265 | [(set (match_operand:OI 0 "neon_struct_operand" "=Um") |
6266 | (unspec:OI [(match_operand:XI 1 "s_register_operand" "w") | |
ff229375 | 6267 | (unspec:VQ2BF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] |
6308e208 | 6268 | UNSPEC_VST4B))] |
88f77cba JB |
6269 | "TARGET_NEON" |
6270 | { | |
6308e208 | 6271 | int regno = REGNO (operands[1]); |
88f77cba JB |
6272 | rtx ops[5]; |
6273 | ops[0] = operands[0]; | |
6274 | ops[1] = gen_rtx_REG (DImode, regno + 2); | |
6275 | ops[2] = gen_rtx_REG (DImode, regno + 6); | |
6276 | ops[3] = gen_rtx_REG (DImode, regno + 10); | |
6277 | ops[4] = gen_rtx_REG (DImode, regno + 14); | |
6308e208 | 6278 | output_asm_insn ("vst4.<V_sz_elem>\t{%P1, %P2, %P3, %P4}, %A0", ops); |
88f77cba | 6279 | return ""; |
c956e102 | 6280 | } |
f7379e5e | 6281 | [(set_attr "type" "neon_store4_4reg<q>")] |
c956e102 | 6282 | ) |
88f77cba | 6283 | |
22f9db64 CB |
6284 | ;; see comment on neon_vld1_lane for reason why the lane numbers are reversed |
6285 | ;; here on big endian targets. | |
88f77cba | 6286 | (define_insn "neon_vst4_lane<mode>" |
6308e208 | 6287 | [(set (match_operand:<V_four_elem> 0 "neon_struct_operand" "=Um") |
88f77cba JB |
6288 | (unspec:<V_four_elem> |
6289 | [(match_operand:OI 1 "s_register_operand" "w") | |
6290 | (match_operand:SI 2 "immediate_operand" "i") | |
4b644867 | 6291 | (unspec:VD_LANE [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] |
88f77cba JB |
6292 | UNSPEC_VST4_LANE))] |
6293 | "TARGET_NEON" | |
6294 | { | |
22f9db64 | 6295 | HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N(<MODE>mode, INTVAL (operands[2])); |
88f77cba JB |
6296 | int regno = REGNO (operands[1]); |
6297 | rtx ops[6]; | |
88f77cba JB |
6298 | ops[0] = operands[0]; |
6299 | ops[1] = gen_rtx_REG (DImode, regno); | |
6300 | ops[2] = gen_rtx_REG (DImode, regno + 2); | |
6301 | ops[3] = gen_rtx_REG (DImode, regno + 4); | |
6302 | ops[4] = gen_rtx_REG (DImode, regno + 6); | |
22f9db64 | 6303 | ops[5] = GEN_INT (lane); |
6308e208 | 6304 | output_asm_insn ("vst4.<V_sz_elem>\t{%P1[%c5], %P2[%c5], %P3[%c5], %P4[%c5]}, %A0", |
88f77cba JB |
6305 | ops); |
6306 | return ""; | |
c956e102 | 6307 | } |
f7379e5e | 6308 | [(set_attr "type" "neon_store4_one_lane<q>")] |
c956e102 | 6309 | ) |
88f77cba | 6310 | |
22f9db64 CB |
6311 | ;; see comment on neon_vld1_lane for reason why the lane numbers are reversed |
6312 | ;; here on big endian targets. | |
88f77cba | 6313 | (define_insn "neon_vst4_lane<mode>" |
6308e208 | 6314 | [(set (match_operand:<V_four_elem> 0 "neon_struct_operand" "=Um") |
88f77cba JB |
6315 | (unspec:<V_four_elem> |
6316 | [(match_operand:XI 1 "s_register_operand" "w") | |
6317 | (match_operand:SI 2 "immediate_operand" "i") | |
4b644867 | 6318 | (unspec:VQ_HS [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] |
88f77cba JB |
6319 | UNSPEC_VST4_LANE))] |
6320 | "TARGET_NEON" | |
6321 | { | |
22f9db64 | 6322 | HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N(<MODE>mode, INTVAL (operands[2])); |
88f77cba JB |
6323 | HOST_WIDE_INT max = GET_MODE_NUNITS (<MODE>mode); |
6324 | int regno = REGNO (operands[1]); | |
6325 | rtx ops[6]; | |
e68ffe57 | 6326 | if (lane >= max / 2) |
88f77cba JB |
6327 | { |
6328 | lane -= max / 2; | |
6329 | regno += 2; | |
6330 | } | |
6331 | ops[0] = operands[0]; | |
6332 | ops[1] = gen_rtx_REG (DImode, regno); | |
6333 | ops[2] = gen_rtx_REG (DImode, regno + 4); | |
6334 | ops[3] = gen_rtx_REG (DImode, regno + 8); | |
6335 | ops[4] = gen_rtx_REG (DImode, regno + 12); | |
6336 | ops[5] = GEN_INT (lane); | |
6308e208 | 6337 | output_asm_insn ("vst4.<V_sz_elem>\t{%P1[%c5], %P2[%c5], %P3[%c5], %P4[%c5]}, %A0", |
88f77cba JB |
6338 | ops); |
6339 | return ""; | |
c956e102 | 6340 | } |
f7379e5e | 6341 | [(set_attr "type" "neon_store4_4reg<q>")] |
c956e102 | 6342 | ) |
88f77cba | 6343 | |
46b57af1 TB |
6344 | (define_insn "neon_vec_unpack<US>_lo_<mode>" |
6345 | [(set (match_operand:<V_unpack> 0 "register_operand" "=w") | |
6346 | (SE:<V_unpack> (vec_select:<V_HALF> | |
6347 | (match_operand:VU 1 "register_operand" "w") | |
6348 | (match_operand:VU 2 "vect_par_constant_low" ""))))] | |
0094f21b | 6349 | "TARGET_NEON && !BYTES_BIG_ENDIAN" |
46b57af1 | 6350 | "vmovl.<US><V_sz_elem> %q0, %e1" |
f7379e5e | 6351 | [(set_attr "type" "neon_shift_imm_long")] |
46b57af1 TB |
6352 | ) |
6353 | ||
6354 | (define_insn "neon_vec_unpack<US>_hi_<mode>" | |
6355 | [(set (match_operand:<V_unpack> 0 "register_operand" "=w") | |
6356 | (SE:<V_unpack> (vec_select:<V_HALF> | |
6357 | (match_operand:VU 1 "register_operand" "w") | |
6358 | (match_operand:VU 2 "vect_par_constant_high" ""))))] | |
0094f21b | 6359 | "TARGET_NEON && !BYTES_BIG_ENDIAN" |
46b57af1 | 6360 | "vmovl.<US><V_sz_elem> %q0, %f1" |
f7379e5e | 6361 | [(set_attr "type" "neon_shift_imm_long")] |
46b57af1 TB |
6362 | ) |
6363 | ||
6364 | (define_expand "vec_unpack<US>_hi_<mode>" | |
cd65e265 | 6365 | [(match_operand:<V_unpack> 0 "register_operand") |
46b57af1 | 6366 | (SE:<V_unpack> (match_operand:VU 1 "register_operand"))] |
0094f21b | 6367 | "TARGET_NEON && !BYTES_BIG_ENDIAN" |
46b57af1 TB |
6368 | { |
6369 | rtvec v = rtvec_alloc (<V_mode_nunits>/2) ; | |
6370 | rtx t1; | |
6371 | int i; | |
6372 | for (i = 0; i < (<V_mode_nunits>/2); i++) | |
6373 | RTVEC_ELT (v, i) = GEN_INT ((<V_mode_nunits>/2) + i); | |
6374 | ||
6375 | t1 = gen_rtx_PARALLEL (<MODE>mode, v); | |
6376 | emit_insn (gen_neon_vec_unpack<US>_hi_<mode> (operands[0], | |
6377 | operands[1], | |
6378 | t1)); | |
6379 | DONE; | |
6380 | } | |
6381 | ) | |
6382 | ||
6383 | (define_expand "vec_unpack<US>_lo_<mode>" | |
cd65e265 DZ |
6384 | [(match_operand:<V_unpack> 0 "register_operand") |
6385 | (SE:<V_unpack> (match_operand:VU 1 "register_operand"))] | |
0094f21b | 6386 | "TARGET_NEON && !BYTES_BIG_ENDIAN" |
46b57af1 TB |
6387 | { |
6388 | rtvec v = rtvec_alloc (<V_mode_nunits>/2) ; | |
6389 | rtx t1; | |
6390 | int i; | |
6391 | for (i = 0; i < (<V_mode_nunits>/2) ; i++) | |
6392 | RTVEC_ELT (v, i) = GEN_INT (i); | |
6393 | t1 = gen_rtx_PARALLEL (<MODE>mode, v); | |
6394 | emit_insn (gen_neon_vec_unpack<US>_lo_<mode> (operands[0], | |
6395 | operands[1], | |
6396 | t1)); | |
6397 | DONE; | |
6398 | } | |
6399 | ) | |
6400 | ||
6401 | (define_insn "neon_vec_<US>mult_lo_<mode>" | |
6402 | [(set (match_operand:<V_unpack> 0 "register_operand" "=w") | |
6403 | (mult:<V_unpack> (SE:<V_unpack> (vec_select:<V_HALF> | |
6404 | (match_operand:VU 1 "register_operand" "w") | |
6405 | (match_operand:VU 2 "vect_par_constant_low" ""))) | |
6406 | (SE:<V_unpack> (vec_select:<V_HALF> | |
6407 | (match_operand:VU 3 "register_operand" "w") | |
6408 | (match_dup 2)))))] | |
0094f21b | 6409 | "TARGET_NEON && !BYTES_BIG_ENDIAN" |
46b57af1 | 6410 | "vmull.<US><V_sz_elem> %q0, %e1, %e3" |
f7379e5e | 6411 | [(set_attr "type" "neon_mul_<V_elem_ch>_long")] |
46b57af1 TB |
6412 | ) |
6413 | ||
6414 | (define_expand "vec_widen_<US>mult_lo_<mode>" | |
cd65e265 DZ |
6415 | [(match_operand:<V_unpack> 0 "register_operand") |
6416 | (SE:<V_unpack> (match_operand:VU 1 "register_operand")) | |
6417 | (SE:<V_unpack> (match_operand:VU 2 "register_operand"))] | |
0094f21b | 6418 | "TARGET_NEON && !BYTES_BIG_ENDIAN" |
46b57af1 TB |
6419 | { |
6420 | rtvec v = rtvec_alloc (<V_mode_nunits>/2) ; | |
6421 | rtx t1; | |
6422 | int i; | |
6423 | for (i = 0; i < (<V_mode_nunits>/2) ; i++) | |
6424 | RTVEC_ELT (v, i) = GEN_INT (i); | |
6425 | t1 = gen_rtx_PARALLEL (<MODE>mode, v); | |
6426 | ||
6427 | emit_insn (gen_neon_vec_<US>mult_lo_<mode> (operands[0], | |
6428 | operands[1], | |
6429 | t1, | |
6430 | operands[2])); | |
6431 | DONE; | |
6432 | } | |
6433 | ) | |
6434 | ||
6435 | (define_insn "neon_vec_<US>mult_hi_<mode>" | |
6436 | [(set (match_operand:<V_unpack> 0 "register_operand" "=w") | |
6437 | (mult:<V_unpack> (SE:<V_unpack> (vec_select:<V_HALF> | |
6438 | (match_operand:VU 1 "register_operand" "w") | |
6439 | (match_operand:VU 2 "vect_par_constant_high" ""))) | |
6440 | (SE:<V_unpack> (vec_select:<V_HALF> | |
6441 | (match_operand:VU 3 "register_operand" "w") | |
6442 | (match_dup 2)))))] | |
0094f21b | 6443 | "TARGET_NEON && !BYTES_BIG_ENDIAN" |
46b57af1 | 6444 | "vmull.<US><V_sz_elem> %q0, %f1, %f3" |
f7379e5e | 6445 | [(set_attr "type" "neon_mul_<V_elem_ch>_long")] |
46b57af1 TB |
6446 | ) |
6447 | ||
6448 | (define_expand "vec_widen_<US>mult_hi_<mode>" | |
cd65e265 DZ |
6449 | [(match_operand:<V_unpack> 0 "register_operand") |
6450 | (SE:<V_unpack> (match_operand:VU 1 "register_operand")) | |
6451 | (SE:<V_unpack> (match_operand:VU 2 "register_operand"))] | |
0094f21b | 6452 | "TARGET_NEON && !BYTES_BIG_ENDIAN" |
46b57af1 TB |
6453 | { |
6454 | rtvec v = rtvec_alloc (<V_mode_nunits>/2) ; | |
6455 | rtx t1; | |
6456 | int i; | |
6457 | for (i = 0; i < (<V_mode_nunits>/2) ; i++) | |
6458 | RTVEC_ELT (v, i) = GEN_INT (<V_mode_nunits>/2 + i); | |
6459 | t1 = gen_rtx_PARALLEL (<MODE>mode, v); | |
6460 | ||
6461 | emit_insn (gen_neon_vec_<US>mult_hi_<mode> (operands[0], | |
6462 | operands[1], | |
6463 | t1, | |
6464 | operands[2])); | |
6465 | DONE; | |
6466 | ||
6467 | } | |
6468 | ) | |
6469 | ||
36ba4aae IR |
6470 | (define_insn "neon_vec_<US>shiftl_<mode>" |
6471 | [(set (match_operand:<V_widen> 0 "register_operand" "=w") | |
6472 | (SE:<V_widen> (ashift:VW (match_operand:VW 1 "register_operand" "w") | |
6473 | (match_operand:<V_innermode> 2 "const_neon_scalar_shift_amount_operand" ""))))] | |
6474 | "TARGET_NEON" | |
6475 | { | |
6476 | return "vshll.<US><V_sz_elem> %q0, %P1, %2"; | |
6477 | } | |
f7379e5e | 6478 | [(set_attr "type" "neon_shift_imm_long")] |
36ba4aae IR |
6479 | ) |
6480 | ||
6481 | (define_expand "vec_widen_<US>shiftl_lo_<mode>" | |
cd65e265 DZ |
6482 | [(match_operand:<V_unpack> 0 "register_operand") |
6483 | (SE:<V_unpack> (match_operand:VU 1 "register_operand")) | |
6484 | (match_operand:SI 2 "immediate_operand")] | |
36ba4aae IR |
6485 | "TARGET_NEON && !BYTES_BIG_ENDIAN" |
6486 | { | |
6487 | emit_insn (gen_neon_vec_<US>shiftl_<V_half> (operands[0], | |
6488 | simplify_gen_subreg (<V_HALF>mode, operands[1], <MODE>mode, 0), | |
6489 | operands[2])); | |
6490 | DONE; | |
6491 | } | |
6492 | ) | |
6493 | ||
6494 | (define_expand "vec_widen_<US>shiftl_hi_<mode>" | |
cd65e265 DZ |
6495 | [(match_operand:<V_unpack> 0 "register_operand") |
6496 | (SE:<V_unpack> (match_operand:VU 1 "register_operand")) | |
6497 | (match_operand:SI 2 "immediate_operand")] | |
36ba4aae IR |
6498 | "TARGET_NEON && !BYTES_BIG_ENDIAN" |
6499 | { | |
6500 | emit_insn (gen_neon_vec_<US>shiftl_<V_half> (operands[0], | |
6501 | simplify_gen_subreg (<V_HALF>mode, operands[1], <MODE>mode, | |
6502 | GET_MODE_SIZE (<V_HALF>mode)), | |
6503 | operands[2])); | |
6504 | DONE; | |
6505 | } | |
6506 | ) | |
6507 | ||
46b57af1 TB |
6508 | ;; Vectorize for non-neon-quad case |
6509 | (define_insn "neon_unpack<US>_<mode>" | |
6510 | [(set (match_operand:<V_widen> 0 "register_operand" "=w") | |
7f27ec08 | 6511 | (SE:<V_widen> (match_operand:VDI 1 "register_operand" "w")))] |
46b57af1 | 6512 | "TARGET_NEON" |
7f27ec08 | 6513 | "vmovl.<US><V_sz_elem> %q0, %P1" |
f7379e5e | 6514 | [(set_attr "type" "neon_move")] |
46b57af1 TB |
6515 | ) |
6516 | ||
6517 | (define_expand "vec_unpack<US>_lo_<mode>" | |
cd65e265 | 6518 | [(match_operand:<V_double_width> 0 "register_operand") |
46b57af1 TB |
6519 | (SE:<V_double_width>(match_operand:VDI 1 "register_operand"))] |
6520 | "TARGET_NEON" | |
6521 | { | |
6522 | rtx tmpreg = gen_reg_rtx (<V_widen>mode); | |
6523 | emit_insn (gen_neon_unpack<US>_<mode> (tmpreg, operands[1])); | |
6524 | emit_insn (gen_neon_vget_low<V_widen_l> (operands[0], tmpreg)); | |
6525 | ||
6526 | DONE; | |
6527 | } | |
6528 | ) | |
6529 | ||
6530 | (define_expand "vec_unpack<US>_hi_<mode>" | |
cd65e265 | 6531 | [(match_operand:<V_double_width> 0 "register_operand") |
46b57af1 TB |
6532 | (SE:<V_double_width>(match_operand:VDI 1 "register_operand"))] |
6533 | "TARGET_NEON" | |
6534 | { | |
6535 | rtx tmpreg = gen_reg_rtx (<V_widen>mode); | |
6536 | emit_insn (gen_neon_unpack<US>_<mode> (tmpreg, operands[1])); | |
6537 | emit_insn (gen_neon_vget_high<V_widen_l> (operands[0], tmpreg)); | |
6538 | ||
6539 | DONE; | |
6540 | } | |
6541 | ) | |
6542 | ||
6543 | (define_insn "neon_vec_<US>mult_<mode>" | |
6544 | [(set (match_operand:<V_widen> 0 "register_operand" "=w") | |
6545 | (mult:<V_widen> (SE:<V_widen> | |
6546 | (match_operand:VDI 1 "register_operand" "w")) | |
6547 | (SE:<V_widen> | |
6548 | (match_operand:VDI 2 "register_operand" "w"))))] | |
6549 | "TARGET_NEON" | |
7f27ec08 | 6550 | "vmull.<US><V_sz_elem> %q0, %P1, %P2" |
f7379e5e | 6551 | [(set_attr "type" "neon_mul_<V_elem_ch>_long")] |
46b57af1 TB |
6552 | ) |
6553 | ||
6554 | (define_expand "vec_widen_<US>mult_hi_<mode>" | |
cd65e265 DZ |
6555 | [(match_operand:<V_double_width> 0 "register_operand") |
6556 | (SE:<V_double_width> (match_operand:VDI 1 "register_operand")) | |
6557 | (SE:<V_double_width> (match_operand:VDI 2 "register_operand"))] | |
46b57af1 TB |
6558 | "TARGET_NEON" |
6559 | { | |
6560 | rtx tmpreg = gen_reg_rtx (<V_widen>mode); | |
6561 | emit_insn (gen_neon_vec_<US>mult_<mode> (tmpreg, operands[1], operands[2])); | |
6562 | emit_insn (gen_neon_vget_high<V_widen_l> (operands[0], tmpreg)); | |
6563 | ||
6564 | DONE; | |
6565 | ||
6566 | } | |
6567 | ) | |
6568 | ||
6569 | (define_expand "vec_widen_<US>mult_lo_<mode>" | |
cd65e265 DZ |
6570 | [(match_operand:<V_double_width> 0 "register_operand") |
6571 | (SE:<V_double_width> (match_operand:VDI 1 "register_operand")) | |
6572 | (SE:<V_double_width> (match_operand:VDI 2 "register_operand"))] | |
46b57af1 TB |
6573 | "TARGET_NEON" |
6574 | { | |
6575 | rtx tmpreg = gen_reg_rtx (<V_widen>mode); | |
6576 | emit_insn (gen_neon_vec_<US>mult_<mode> (tmpreg, operands[1], operands[2])); | |
6577 | emit_insn (gen_neon_vget_low<V_widen_l> (operands[0], tmpreg)); | |
6578 | ||
6579 | DONE; | |
6580 | ||
6581 | } | |
6582 | ) | |
0f38f229 | 6583 | |
36ba4aae | 6584 | (define_expand "vec_widen_<US>shiftl_hi_<mode>" |
cd65e265 DZ |
6585 | [(match_operand:<V_double_width> 0 "register_operand") |
6586 | (SE:<V_double_width> (match_operand:VDI 1 "register_operand")) | |
6587 | (match_operand:SI 2 "immediate_operand")] | |
36ba4aae IR |
6588 | "TARGET_NEON" |
6589 | { | |
6590 | rtx tmpreg = gen_reg_rtx (<V_widen>mode); | |
6591 | emit_insn (gen_neon_vec_<US>shiftl_<mode> (tmpreg, operands[1], operands[2])); | |
6592 | emit_insn (gen_neon_vget_high<V_widen_l> (operands[0], tmpreg)); | |
6593 | ||
6594 | DONE; | |
6595 | } | |
6596 | ) | |
6597 | ||
6598 | (define_expand "vec_widen_<US>shiftl_lo_<mode>" | |
cd65e265 DZ |
6599 | [(match_operand:<V_double_width> 0 "register_operand") |
6600 | (SE:<V_double_width> (match_operand:VDI 1 "register_operand")) | |
6601 | (match_operand:SI 2 "immediate_operand")] | |
36ba4aae IR |
6602 | "TARGET_NEON" |
6603 | { | |
6604 | rtx tmpreg = gen_reg_rtx (<V_widen>mode); | |
6605 | emit_insn (gen_neon_vec_<US>shiftl_<mode> (tmpreg, operands[1], operands[2])); | |
6606 | emit_insn (gen_neon_vget_low<V_widen_l> (operands[0], tmpreg)); | |
6607 | ||
6608 | DONE; | |
6609 | } | |
6610 | ) | |
6611 | ||
0094f21b JB |
6612 | ; FIXME: These instruction patterns can't be used safely in big-endian mode |
6613 | ; because the ordering of vector elements in Q registers is different from what | |
6614 | ; the semantics of the instructions require. | |
6615 | ||
0f38f229 TB |
6616 | (define_insn "vec_pack_trunc_<mode>" |
6617 | [(set (match_operand:<V_narrow_pack> 0 "register_operand" "=&w") | |
6618 | (vec_concat:<V_narrow_pack> | |
6619 | (truncate:<V_narrow> | |
6620 | (match_operand:VN 1 "register_operand" "w")) | |
6621 | (truncate:<V_narrow> | |
6622 | (match_operand:VN 2 "register_operand" "w"))))] | |
0094f21b | 6623 | "TARGET_NEON && !BYTES_BIG_ENDIAN" |
30cecf17 | 6624 | "vmovn.i<V_sz_elem>\t%e0, %q1\;vmovn.i<V_sz_elem>\t%f0, %q2" |
f7379e5e | 6625 | [(set_attr "type" "multiple") |
30cecf17 | 6626 | (set_attr "length" "8")] |
0f38f229 TB |
6627 | ) |
6628 | ||
6629 | ;; For the non-quad case. | |
6630 | (define_insn "neon_vec_pack_trunc_<mode>" | |
6631 | [(set (match_operand:<V_narrow> 0 "register_operand" "=w") | |
7f27ec08 | 6632 | (truncate:<V_narrow> (match_operand:VN 1 "register_operand" "w")))] |
0094f21b | 6633 | "TARGET_NEON && !BYTES_BIG_ENDIAN" |
7f27ec08 | 6634 | "vmovn.i<V_sz_elem>\t%P0, %q1" |
f7379e5e | 6635 | [(set_attr "type" "neon_move_narrow_q")] |
0f38f229 TB |
6636 | ) |
6637 | ||
6638 | (define_expand "vec_pack_trunc_<mode>" | |
cd65e265 DZ |
6639 | [(match_operand:<V_narrow_pack> 0 "register_operand") |
6640 | (match_operand:VSHFT 1 "register_operand") | |
0f38f229 | 6641 | (match_operand:VSHFT 2 "register_operand")] |
0094f21b | 6642 | "TARGET_NEON && !BYTES_BIG_ENDIAN" |
0f38f229 TB |
6643 | { |
6644 | rtx tempreg = gen_reg_rtx (<V_DOUBLE>mode); | |
6645 | ||
6646 | emit_insn (gen_move_lo_quad_<V_double> (tempreg, operands[1])); | |
6647 | emit_insn (gen_move_hi_quad_<V_double> (tempreg, operands[2])); | |
6648 | emit_insn (gen_neon_vec_pack_trunc_<V_double> (operands[0], tempreg)); | |
6649 | DONE; | |
6650 | }) | |
bd1aa4f4 SS |
6651 | |
6652 | (define_insn "neon_vabd<mode>_2" | |
d0b6b5a7 KT |
6653 | [(set (match_operand:VF 0 "s_register_operand" "=w") |
6654 | (abs:VF (minus:VF (match_operand:VF 1 "s_register_operand" "w") | |
6655 | (match_operand:VF 2 "s_register_operand" "w"))))] | |
6656 | "TARGET_NEON && flag_unsafe_math_optimizations" | |
bd1aa4f4 | 6657 | "vabd.<V_s_elem> %<V_reg>0, %<V_reg>1, %<V_reg>2" |
d0b6b5a7 | 6658 | [(set_attr "type" "neon_fp_abd_s<q>")] |
bd1aa4f4 SS |
6659 | ) |
6660 | ||
6661 | (define_insn "neon_vabd<mode>_3" | |
d0b6b5a7 KT |
6662 | [(set (match_operand:VF 0 "s_register_operand" "=w") |
6663 | (abs:VF (unspec:VF [(match_operand:VF 1 "s_register_operand" "w") | |
6664 | (match_operand:VF 2 "s_register_operand" "w")] | |
6665 | UNSPEC_VSUB)))] | |
6666 | "TARGET_NEON && flag_unsafe_math_optimizations" | |
bd1aa4f4 | 6667 | "vabd.<V_if_elem> %<V_reg>0, %<V_reg>1, %<V_reg>2" |
d0b6b5a7 | 6668 | [(set_attr "type" "neon_fp_abd_s<q>")] |
bd1aa4f4 | 6669 | ) |
436016f4 DZ |
6670 | |
6671 | (define_insn "neon_<sup>mmlav16qi" | |
6672 | [(set (match_operand:V4SI 0 "register_operand" "=w") | |
6673 | (plus:V4SI | |
6674 | (unspec:V4SI [(match_operand:V16QI 2 "register_operand" "w") | |
6675 | (match_operand:V16QI 3 "register_operand" "w")] MATMUL) | |
6676 | (match_operand:V4SI 1 "register_operand" "0")))] | |
6677 | "TARGET_I8MM" | |
6678 | "v<sup>mmla.<mmla_sfx>\t%q0, %q2, %q3" | |
6679 | [(set_attr "type" "neon_mla_s_q")] | |
6680 | ) | |
eb7ba6c3 DZ |
6681 | |
6682 | (define_insn "neon_vbfdot<VCVTF:mode>" | |
6683 | [(set (match_operand:VCVTF 0 "register_operand" "=w") | |
6684 | (plus:VCVTF (match_operand:VCVTF 1 "register_operand" "0") | |
6685 | (unspec:VCVTF [ | |
6686 | (match_operand:<VSF2BF> 2 "register_operand" "w") | |
6687 | (match_operand:<VSF2BF> 3 "register_operand" "w")] | |
6688 | UNSPEC_DOT_S)))] | |
6689 | "TARGET_BF16_SIMD" | |
6690 | "vdot.bf16\\t%<V_reg>0, %<V_reg>2, %<V_reg>3" | |
6691 | [(set_attr "type" "neon_dot<q>")] | |
6692 | ) | |
6693 | ||
6694 | (define_insn "neon_vbfdot_lanev4bf<VCVTF:mode>" | |
6695 | [(set (match_operand:VCVTF 0 "register_operand" "=w") | |
6696 | (plus:VCVTF (match_operand:VCVTF 1 "register_operand" "0") | |
6697 | (unspec:VCVTF [ | |
6698 | (match_operand:<VSF2BF> 2 "register_operand" "w") | |
6699 | (match_operand:V4BF 3 "register_operand" "x") | |
6700 | (match_operand:SI 4 "immediate_operand" "i")] | |
6701 | UNSPEC_DOT_S)))] | |
6702 | "TARGET_BF16_SIMD" | |
6703 | "vdot.bf16\\t%<V_reg>0, %<V_reg>2, %P3[%c4]" | |
6704 | [(set_attr "type" "neon_dot<q>")] | |
6705 | ) | |
6706 | ||
6707 | (define_insn "neon_vbfdot_lanev8bf<VCVTF:mode>" | |
6708 | [(set (match_operand:VCVTF 0 "register_operand" "=w") | |
6709 | (plus:VCVTF (match_operand:VCVTF 1 "register_operand" "0") | |
6710 | (unspec:VCVTF [ | |
6711 | (match_operand:<VSF2BF> 2 "register_operand" "w") | |
6712 | (match_operand:V8BF 3 "register_operand" "x") | |
6713 | (match_operand:SI 4 "immediate_operand" "i")] | |
6714 | UNSPEC_DOT_S)))] | |
6715 | "TARGET_BF16_SIMD" | |
6716 | { | |
6717 | int lane = INTVAL (operands[4]); | |
6718 | int half = GET_MODE_NUNITS (GET_MODE (operands[3])) / 4; | |
6719 | if (lane < half) | |
6720 | return "vdot.bf16\\t%<V_reg>0, %<V_reg>2, %e3[%c4]"; | |
6721 | else | |
6722 | { | |
6723 | operands[4] = GEN_INT (lane - half); | |
6724 | return "vdot.bf16\\t%<V_reg>0, %<V_reg>2, %f3[%c4]"; | |
6725 | } | |
6726 | } | |
6727 | [(set_attr "type" "neon_dot<q>")] | |
6728 | ) | |
8e6d0dba DZ |
6729 | |
6730 | (define_insn "neon_vbfcvtv4sf<VBFCVT:mode>" | |
6731 | [(set (match_operand:VBFCVT 0 "register_operand" "=w") | |
6732 | (unspec:VBFCVT [(match_operand:V4SF 1 "register_operand" "w")] | |
6733 | UNSPEC_BFCVT))] | |
6734 | "TARGET_BF16_SIMD" | |
6735 | "vcvt.bf16.f32\\t%<V_bf_low>0, %q1" | |
6736 | [(set_attr "type" "neon_fp_cvt_narrow_s_q")] | |
6737 | ) | |
6738 | ||
6739 | (define_insn "neon_vbfcvtv4sf_highv8bf" | |
6740 | [(set (match_operand:V8BF 0 "register_operand" "=w") | |
6741 | (unspec:V8BF [(match_operand:V8BF 1 "register_operand" "0") | |
6742 | (match_operand:V4SF 2 "register_operand" "w")] | |
6743 | UNSPEC_BFCVT_HIGH))] | |
6744 | "TARGET_BF16_SIMD" | |
6745 | "vcvt.bf16.f32\\t%f0, %q2" | |
6746 | [(set_attr "type" "neon_fp_cvt_narrow_s_q")] | |
6747 | ) | |
6748 | ||
6749 | (define_insn "neon_vbfcvtsf" | |
6750 | [(set (match_operand:BF 0 "register_operand" "=t") | |
6751 | (unspec:BF [(match_operand:SF 1 "register_operand" "t")] | |
6752 | UNSPEC_BFCVT))] | |
6753 | "TARGET_BF16_FP" | |
6754 | "vcvtb.bf16.f32\\t%0, %1" | |
6755 | [(set_attr "type" "f_cvt")] | |
6756 | ) | |
6757 | ||
6758 | (define_insn "neon_vbfcvt<VBFCVT:mode>" | |
6759 | [(set (match_operand:V4SF 0 "register_operand" "=w") | |
6760 | (unspec:V4SF [(match_operand:VBFCVT 1 "register_operand" "w")] | |
6761 | UNSPEC_BFCVT))] | |
6762 | "TARGET_BF16_SIMD" | |
6763 | "vshll.u32\\t%q0, %<V_bf_low>1, #16" | |
6764 | [(set_attr "type" "neon_shift_imm_q")] | |
6765 | ) | |
6766 | ||
6767 | (define_insn "neon_vbfcvt_highv8bf" | |
6768 | [(set (match_operand:V4SF 0 "register_operand" "=w") | |
6769 | (unspec:V4SF [(match_operand:V8BF 1 "register_operand" "w")] | |
6770 | UNSPEC_BFCVT_HIGH))] | |
6771 | "TARGET_BF16_SIMD" | |
6772 | "vshll.u32\\t%q0, %f1, #16" | |
6773 | [(set_attr "type" "neon_shift_imm_q")] | |
6774 | ) | |
6775 | ||
6776 | ;; Convert a BF scalar operand to SF via VSHL. | |
6777 | ;; VSHL doesn't accept 32-bit registers where the BF and SF scalar operands | |
6778 | ;; would be allocated, therefore the operands must be converted to intermediate | |
6779 | ;; vectors (i.e. V2SI) in order to apply 64-bit registers. | |
6780 | (define_expand "neon_vbfcvtbf" | |
6781 | [(match_operand:SF 0 "register_operand") | |
6782 | (unspec:SF [(match_operand:BF 1 "register_operand")] UNSPEC_BFCVT)] | |
6783 | "TARGET_BF16_FP" | |
6784 | { | |
6785 | rtx op0 = gen_reg_rtx (V2SImode); | |
6786 | rtx op1 = gen_reg_rtx (V2SImode); | |
6787 | emit_insn (gen_neon_vbfcvtbf_cvtmodev2si (op1, operands[1])); | |
6788 | emit_insn (gen_neon_vshl_nv2si (op0, op1, gen_int_mode(16, SImode))); | |
6789 | emit_insn (gen_neon_vbfcvtbf_cvtmodesf (operands[0], op0)); | |
6790 | DONE; | |
6791 | }) | |
6792 | ||
6793 | ;; Convert BF mode to V2SI and V2SI to SF. | |
6794 | ;; Implement this by allocating a 32-bit operand in the low half of a 64-bit | |
6795 | ;; register indexed by a 32-bit sub-register number. | |
6796 | ;; This will generate reloads but compiler can optimize out the moves. | |
6797 | ;; Use 'x' constraint to guarantee the 32-bit sub-registers in an indexable | |
6798 | ;; range so that to avoid extra moves. | |
6799 | (define_insn "neon_vbfcvtbf_cvtmode<mode>" | |
6800 | [(set (match_operand:VBFCVTM 0 "register_operand" "=x") | |
6801 | (unspec:VBFCVTM [(match_operand:<V_bf_cvt_m> 1 "register_operand" "0")] | |
6802 | UNSPEC_BFCVT))] | |
6803 | "TARGET_BF16_FP" | |
6804 | "" | |
6805 | ) | |
2d22ab64 KT |
6806 | |
6807 | (define_insn "neon_vmmlav8bf" | |
6808 | [(set (match_operand:V4SF 0 "register_operand" "=w") | |
6809 | (plus:V4SF (match_operand:V4SF 1 "register_operand" "0") | |
6810 | (unspec:V4SF [(match_operand:V8BF 2 "register_operand" "w") | |
6811 | (match_operand:V8BF 3 "register_operand" "w")] | |
6812 | UNSPEC_BFMMLA)))] | |
6813 | "TARGET_BF16_SIMD" | |
6814 | "vmmla.bf16\\t%q0, %q2, %q3" | |
6815 | [(set_attr "type" "neon_fp_mla_s_q")] | |
6816 | ) | |
6817 | ||
6818 | (define_insn "neon_vfma<bt>v8bf" | |
6819 | [(set (match_operand:V4SF 0 "register_operand" "=w") | |
6820 | (plus: V4SF (match_operand:V4SF 1 "register_operand" "0") | |
6821 | (unspec:V4SF [(match_operand:V8BF 2 "register_operand" "w") | |
6822 | (match_operand:V8BF 3 "register_operand" "w")] | |
6823 | BF_MA)))] | |
6824 | "TARGET_BF16_SIMD" | |
6825 | "vfma<bt>.bf16\\t%q0, %q2, %q3" | |
6826 | [(set_attr "type" "neon_fp_mla_s_q")] | |
6827 | ) | |
6828 | ||
6829 | (define_insn "neon_vfma<bt>_lanev8bf" | |
6830 | [(set (match_operand:V4SF 0 "register_operand" "=w") | |
6831 | (plus: V4SF (match_operand:V4SF 1 "register_operand" "0") | |
6832 | (unspec:V4SF [(match_operand:V8BF 2 "register_operand" "w") | |
6833 | (match_operand:V4BF 3 "register_operand" "x") | |
6834 | (match_operand:SI 4 "const_int_operand" "n")] | |
6835 | BF_MA)))] | |
6836 | "TARGET_BF16_SIMD" | |
6837 | "vfma<bt>.bf16\\t%q0, %q2, %P3[%c4]" | |
6838 | [(set_attr "type" "neon_fp_mla_s_scalar_q")] | |
6839 | ) | |
6840 | ||
6841 | (define_expand "neon_vfma<bt>_laneqv8bf" | |
6842 | [(set (match_operand:V4SF 0 "register_operand" "=w") | |
6843 | (plus: V4SF (match_operand:V4SF 1 "register_operand" "0") | |
6844 | (unspec:V4SF [(match_operand:V8BF 2 "register_operand" "w") | |
6845 | (match_operand:V8BF 3 "register_operand" "x") | |
6846 | (match_operand:SI 4 "const_int_operand" "n")] | |
6847 | BF_MA)))] | |
6848 | "TARGET_BF16_SIMD" | |
6849 | { | |
6850 | int lane = INTVAL (operands[4]); | |
6851 | gcc_assert (IN_RANGE(lane, 0, 7)); | |
6852 | if (lane < 4) | |
6853 | { | |
6854 | emit_insn (gen_neon_vfma<bt>_lanev8bf (operands[0], operands[1], operands[2], operands[3], operands[4])); | |
6855 | } | |
6856 | else | |
6857 | { | |
6858 | rtx op_highpart = gen_reg_rtx (V4BFmode); | |
6859 | emit_insn (gen_neon_vget_highv8bf (op_highpart, operands[3])); | |
6860 | operands[4] = GEN_INT (lane - 4); | |
6861 | emit_insn (gen_neon_vfma<bt>_lanev8bf (operands[0], operands[1], operands[2], op_highpart, operands[4])); | |
6862 | } | |
6863 | DONE; | |
6864 | } | |
6865 | [(set_attr "type" "neon_fp_mla_s_scalar_q")] | |
6866 | ) |