]>
Commit | Line | Data |
---|---|---|
88f77cba | 1 | ;; ARM NEON coprocessor Machine Description |
8d9254fc | 2 | ;; Copyright (C) 2006-2020 Free Software Foundation, Inc. |
88f77cba JB |
3 | ;; Written by CodeSourcery. |
4 | ;; | |
5 | ;; This file is part of GCC. | |
6 | ;; | |
7 | ;; GCC is free software; you can redistribute it and/or modify it | |
8 | ;; under the terms of the GNU General Public License as published by | |
2f83c7d6 | 9 | ;; the Free Software Foundation; either version 3, or (at your option) |
88f77cba JB |
10 | ;; any later version. |
11 | ;; | |
12 | ;; GCC is distributed in the hope that it will be useful, but | |
13 | ;; WITHOUT ANY WARRANTY; without even the implied warranty of | |
14 | ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
15 | ;; General Public License for more details. | |
16 | ;; | |
17 | ;; You should have received a copy of the GNU General Public License | |
2f83c7d6 NC |
18 | ;; along with GCC; see the file COPYING3. If not see |
19 | ;; <http://www.gnu.org/licenses/>. | |
88f77cba | 20 | |
88f77cba | 21 | |
c956e102 | 22 | ;; Attribute used to permit string comparisons against <VQH_mnem> in |
003bb7f3 | 23 | ;; type attribute definitions. |
c956e102 MS |
24 | (define_attr "vqh_mnem" "vadd,vmin,vmax" (const_string "vadd")) |
25 | ||
3eefaaa9 BE |
26 | (define_insn "unaligned_storev8qi" |
27 | [(set (match_operand:V8QI 0 "memory_operand" "=Un") | |
28 | (unspec:V8QI [(match_operand:V8QI 1 "s_register_operand" "w")] | |
29 | UNSPEC_UNALIGNED_STORE))] | |
30 | "TARGET_NEON" | |
31 | "* | |
32 | return output_move_neon (operands); | |
33 | " | |
34 | [(set_attr "type" "neon_store1_1reg")]) | |
35 | ||
88f77cba | 36 | (define_insn "*neon_mov<mode>" |
2e87b2f4 | 37 | [(set (match_operand:VDXMOV 0 "nonimmediate_operand" |
e009dfb3 | 38 | "=w,Un,w, w, w, ?r,?w,?r, ?Us,*r") |
2e87b2f4 | 39 | (match_operand:VDXMOV 1 "general_operand" |
e009dfb3 | 40 | " w,w, Dm,Dn,Uni, w, r, Usi,r,*r"))] |
40f73786 DJ |
41 | "TARGET_NEON |
42 | && (register_operand (operands[0], <MODE>mode) | |
43 | || register_operand (operands[1], <MODE>mode))" | |
88f77cba | 44 | { |
e009dfb3 | 45 | if (which_alternative == 2 || which_alternative == 3) |
88f77cba JB |
46 | { |
47 | int width, is_valid; | |
48 | static char templ[40]; | |
49 | ||
63c8f7d6 | 50 | is_valid = simd_immediate_valid_for_move (operands[1], <MODE>mode, |
88f77cba JB |
51 | &operands[1], &width); |
52 | ||
53 | gcc_assert (is_valid != 0); | |
54 | ||
55 | if (width == 0) | |
56 | return "vmov.f32\t%P0, %1 @ <mode>"; | |
57 | else | |
00a3a76a | 58 | sprintf (templ, "vmov.i%d\t%%P0, %%x1 @ <mode>", width); |
88f77cba JB |
59 | |
60 | return templ; | |
61 | } | |
62 | ||
88f77cba JB |
63 | switch (which_alternative) |
64 | { | |
65 | case 0: return "vmov\t%P0, %P1 @ <mode>"; | |
e009dfb3 MM |
66 | case 1: case 4: return output_move_neon (operands); |
67 | case 2: case 3: gcc_unreachable (); | |
68 | case 5: return "vmov\t%Q0, %R0, %P1 @ <mode>"; | |
69 | case 6: return "vmov\t%P0, %Q1, %R1 @ <mode>"; | |
70 | case 9: return "#"; | |
3598da80 | 71 | default: return output_move_double (operands, true, NULL); |
88f77cba JB |
72 | } |
73 | } | |
f7379e5e | 74 | [(set_attr "type" "neon_move<q>,neon_store1_1reg,neon_move<q>,\ |
e009dfb3 MM |
75 | neon_move<q>,neon_load1_1reg, neon_to_gp<q>,\ |
76 | neon_from_gp<q>,neon_load1_2reg, neon_store1_2reg,\ | |
77 | multiple") | |
78 | (set_attr "length" "4,4,4,4,4,4,4,8,8,8") | |
79 | (set_attr "arm_pool_range" "*,*,*,*,1020,*,*,1020,*,*") | |
80 | (set_attr "thumb2_pool_range" "*,*,*,*,1018,*,*,1018,*,*") | |
81 | (set_attr "neg_pool_range" "*,*,*,*,1004,*,*,1004,*,*")]) | |
88f77cba JB |
82 | |
83 | (define_insn "*neon_mov<mode>" | |
84 | [(set (match_operand:VQXMOV 0 "nonimmediate_operand" | |
e009dfb3 | 85 | "=w,Un,w, w, w, ?r,?w,?r,?r, ?Us") |
88f77cba | 86 | (match_operand:VQXMOV 1 "general_operand" |
e009dfb3 | 87 | " w,w, Dm,DN,Uni, w, r, r, Usi, r"))] |
40f73786 DJ |
88 | "TARGET_NEON |
89 | && (register_operand (operands[0], <MODE>mode) | |
90 | || register_operand (operands[1], <MODE>mode))" | |
88f77cba | 91 | { |
e009dfb3 | 92 | if (which_alternative == 2 || which_alternative == 3) |
88f77cba JB |
93 | { |
94 | int width, is_valid; | |
95 | static char templ[40]; | |
96 | ||
63c8f7d6 | 97 | is_valid = simd_immediate_valid_for_move (operands[1], <MODE>mode, |
88f77cba JB |
98 | &operands[1], &width); |
99 | ||
100 | gcc_assert (is_valid != 0); | |
101 | ||
102 | if (width == 0) | |
103 | return "vmov.f32\t%q0, %1 @ <mode>"; | |
104 | else | |
105 | sprintf (templ, "vmov.i%d\t%%q0, %%1 @ <mode>", width); | |
106 | ||
107 | return templ; | |
108 | } | |
109 | ||
110 | switch (which_alternative) | |
111 | { | |
112 | case 0: return "vmov\t%q0, %q1 @ <mode>"; | |
e009dfb3 MM |
113 | case 1: case 4: return output_move_neon (operands); |
114 | case 2: case 3: gcc_unreachable (); | |
115 | case 5: return "vmov\t%Q0, %R0, %e1 @ <mode>\;vmov\t%J0, %K0, %f1"; | |
116 | case 6: return "vmov\t%e0, %Q1, %R1 @ <mode>\;vmov\t%f0, %J1, %K1"; | |
88f77cba JB |
117 | default: return output_move_quad (operands); |
118 | } | |
119 | } | |
f7379e5e | 120 | [(set_attr "type" "neon_move_q,neon_store2_2reg_q,neon_move_q,\ |
e009dfb3 MM |
121 | neon_move_q,neon_load2_2reg_q,neon_to_gp_q,\ |
122 | neon_from_gp_q,mov_reg,neon_load1_4reg,neon_store1_4reg") | |
123 | (set_attr "length" "4,8,4,4,8,8,8,16,8,16") | |
124 | (set_attr "arm_pool_range" "*,*,*,*,1020,*,*,*,1020,*") | |
125 | (set_attr "thumb2_pool_range" "*,*,*,*,1018,*,*,*,1018,*") | |
126 | (set_attr "neg_pool_range" "*,*,*,*,996,*,*,*,996,*")]) | |
88f77cba | 127 | |
2a9234e8 TC |
128 | /* We define these mov expanders to match the standard mov$a optab to prevent |
129 | the mid-end from trying to do a subreg for these modes which is the most | |
130 | inefficient way to expand the move. Also big-endian subreg's aren't | |
131 | allowed for a subset of modes, See TARGET_CAN_CHANGE_MODE_CLASS. | |
132 | Without these RTL generation patterns the mid-end would attempt to take a | |
133 | sub-reg and may ICE if it can't. */ | |
134 | ||
88f77cba | 135 | (define_expand "movti" |
cd65e265 DZ |
136 | [(set (match_operand:TI 0 "nonimmediate_operand") |
137 | (match_operand:TI 1 "general_operand"))] | |
88f77cba JB |
138 | "TARGET_NEON" |
139 | { | |
70cdb21e BE |
140 | gcc_checking_assert (aligned_operand (operands[0], TImode)); |
141 | gcc_checking_assert (aligned_operand (operands[1], TImode)); | |
40f73786 DJ |
142 | if (can_create_pseudo_p ()) |
143 | { | |
d435a4be | 144 | if (!REG_P (operands[0])) |
40f73786 DJ |
145 | operands[1] = force_reg (TImode, operands[1]); |
146 | } | |
88f77cba JB |
147 | }) |
148 | ||
149 | (define_expand "mov<mode>" | |
cd65e265 DZ |
150 | [(set (match_operand:VSTRUCT 0 "nonimmediate_operand") |
151 | (match_operand:VSTRUCT 1 "general_operand"))] | |
14782c81 | 152 | "TARGET_NEON || TARGET_HAVE_MVE" |
88f77cba | 153 | { |
70cdb21e BE |
154 | gcc_checking_assert (aligned_operand (operands[0], <MODE>mode)); |
155 | gcc_checking_assert (aligned_operand (operands[1], <MODE>mode)); | |
40f73786 DJ |
156 | if (can_create_pseudo_p ()) |
157 | { | |
d435a4be | 158 | if (!REG_P (operands[0])) |
40f73786 DJ |
159 | operands[1] = force_reg (<MODE>mode, operands[1]); |
160 | } | |
88f77cba JB |
161 | }) |
162 | ||
63c8f7d6 SP |
163 | ;; The pattern mov<mode> where mode is v8hf, v4hf, v4bf and v8bf are split into |
164 | ;; two groups. The pattern movv8hf is common for MVE and NEON, so it is moved | |
165 | ;; into vec-common.md file. Remaining mov expand patterns with half float and | |
166 | ;; bfloats are implemented below. | |
2a9234e8 | 167 | (define_expand "mov<mode>" |
63c8f7d6 SP |
168 | [(set (match_operand:VHFBF_split 0 "s_register_operand") |
169 | (match_operand:VHFBF_split 1 "s_register_operand"))] | |
2a9234e8 | 170 | "TARGET_NEON" |
92422235 | 171 | { |
70cdb21e BE |
172 | gcc_checking_assert (aligned_operand (operands[0], <MODE>mode)); |
173 | gcc_checking_assert (aligned_operand (operands[1], <MODE>mode)); | |
92422235 CL |
174 | if (can_create_pseudo_p ()) |
175 | { | |
176 | if (!REG_P (operands[0])) | |
2a9234e8 | 177 | operands[1] = force_reg (<MODE>mode, operands[1]); |
92422235 CL |
178 | } |
179 | }) | |
180 | ||
88f77cba JB |
181 | (define_insn "*neon_mov<mode>" |
182 | [(set (match_operand:VSTRUCT 0 "nonimmediate_operand" "=w,Ut,w") | |
183 | (match_operand:VSTRUCT 1 "general_operand" " w,w, Ut"))] | |
14782c81 | 184 | "(TARGET_NEON || TARGET_HAVE_MVE) |
40f73786 DJ |
185 | && (register_operand (operands[0], <MODE>mode) |
186 | || register_operand (operands[1], <MODE>mode))" | |
88f77cba JB |
187 | { |
188 | switch (which_alternative) | |
189 | { | |
190 | case 0: return "#"; | |
191 | case 1: case 2: return output_move_neon (operands); | |
192 | default: gcc_unreachable (); | |
193 | } | |
194 | } | |
f7379e5e | 195 | [(set_attr "type" "neon_move_q,neon_store2_2reg_q,neon_load2_2reg_q") |
7c4f0041 | 196 | (set (attr "length") (symbol_ref "arm_attr_length_move_neon (insn)"))]) |
88f77cba JB |
197 | |
198 | (define_split | |
199 | [(set (match_operand:EI 0 "s_register_operand" "") | |
200 | (match_operand:EI 1 "s_register_operand" ""))] | |
201 | "TARGET_NEON && reload_completed" | |
202 | [(set (match_dup 0) (match_dup 1)) | |
203 | (set (match_dup 2) (match_dup 3))] | |
204 | { | |
205 | int rdest = REGNO (operands[0]); | |
206 | int rsrc = REGNO (operands[1]); | |
207 | rtx dest[2], src[2]; | |
208 | ||
209 | dest[0] = gen_rtx_REG (TImode, rdest); | |
210 | src[0] = gen_rtx_REG (TImode, rsrc); | |
211 | dest[1] = gen_rtx_REG (DImode, rdest + 4); | |
212 | src[1] = gen_rtx_REG (DImode, rsrc + 4); | |
213 | ||
214 | neon_disambiguate_copy (operands, dest, src, 2); | |
215 | }) | |
216 | ||
217 | (define_split | |
218 | [(set (match_operand:OI 0 "s_register_operand" "") | |
219 | (match_operand:OI 1 "s_register_operand" ""))] | |
14782c81 | 220 | "(TARGET_NEON || TARGET_HAVE_MVE)&& reload_completed" |
88f77cba JB |
221 | [(set (match_dup 0) (match_dup 1)) |
222 | (set (match_dup 2) (match_dup 3))] | |
223 | { | |
224 | int rdest = REGNO (operands[0]); | |
225 | int rsrc = REGNO (operands[1]); | |
226 | rtx dest[2], src[2]; | |
227 | ||
228 | dest[0] = gen_rtx_REG (TImode, rdest); | |
229 | src[0] = gen_rtx_REG (TImode, rsrc); | |
230 | dest[1] = gen_rtx_REG (TImode, rdest + 4); | |
231 | src[1] = gen_rtx_REG (TImode, rsrc + 4); | |
232 | ||
233 | neon_disambiguate_copy (operands, dest, src, 2); | |
234 | }) | |
235 | ||
236 | (define_split | |
237 | [(set (match_operand:CI 0 "s_register_operand" "") | |
238 | (match_operand:CI 1 "s_register_operand" ""))] | |
239 | "TARGET_NEON && reload_completed" | |
240 | [(set (match_dup 0) (match_dup 1)) | |
241 | (set (match_dup 2) (match_dup 3)) | |
242 | (set (match_dup 4) (match_dup 5))] | |
243 | { | |
244 | int rdest = REGNO (operands[0]); | |
245 | int rsrc = REGNO (operands[1]); | |
246 | rtx dest[3], src[3]; | |
247 | ||
248 | dest[0] = gen_rtx_REG (TImode, rdest); | |
249 | src[0] = gen_rtx_REG (TImode, rsrc); | |
250 | dest[1] = gen_rtx_REG (TImode, rdest + 4); | |
251 | src[1] = gen_rtx_REG (TImode, rsrc + 4); | |
252 | dest[2] = gen_rtx_REG (TImode, rdest + 8); | |
253 | src[2] = gen_rtx_REG (TImode, rsrc + 8); | |
254 | ||
255 | neon_disambiguate_copy (operands, dest, src, 3); | |
256 | }) | |
257 | ||
258 | (define_split | |
259 | [(set (match_operand:XI 0 "s_register_operand" "") | |
260 | (match_operand:XI 1 "s_register_operand" ""))] | |
14782c81 | 261 | "(TARGET_NEON || TARGET_HAVE_MVE) && reload_completed" |
88f77cba JB |
262 | [(set (match_dup 0) (match_dup 1)) |
263 | (set (match_dup 2) (match_dup 3)) | |
264 | (set (match_dup 4) (match_dup 5)) | |
265 | (set (match_dup 6) (match_dup 7))] | |
266 | { | |
267 | int rdest = REGNO (operands[0]); | |
268 | int rsrc = REGNO (operands[1]); | |
269 | rtx dest[4], src[4]; | |
270 | ||
271 | dest[0] = gen_rtx_REG (TImode, rdest); | |
272 | src[0] = gen_rtx_REG (TImode, rsrc); | |
273 | dest[1] = gen_rtx_REG (TImode, rdest + 4); | |
274 | src[1] = gen_rtx_REG (TImode, rsrc + 4); | |
275 | dest[2] = gen_rtx_REG (TImode, rdest + 8); | |
276 | src[2] = gen_rtx_REG (TImode, rsrc + 8); | |
277 | dest[3] = gen_rtx_REG (TImode, rdest + 12); | |
278 | src[3] = gen_rtx_REG (TImode, rsrc + 12); | |
279 | ||
280 | neon_disambiguate_copy (operands, dest, src, 4); | |
281 | }) | |
282 | ||
c452684d | 283 | (define_expand "movmisalign<mode>" |
33255ae3 JB |
284 | [(set (match_operand:VDQX 0 "neon_perm_struct_or_reg_operand") |
285 | (unspec:VDQX [(match_operand:VDQX 1 "neon_perm_struct_or_reg_operand")] | |
c452684d | 286 | UNSPEC_MISALIGNED_ACCESS))] |
869b9125 | 287 | "TARGET_NEON && !BYTES_BIG_ENDIAN && unaligned_access" |
c452684d | 288 | { |
3416dd87 | 289 | rtx adjust_mem; |
c452684d JB |
290 | /* This pattern is not permitted to fail during expansion: if both arguments |
291 | are non-registers (e.g. memory := constant, which can be created by the | |
292 | auto-vectorizer), force operand 1 into a register. */ | |
293 | if (!s_register_operand (operands[0], <MODE>mode) | |
294 | && !s_register_operand (operands[1], <MODE>mode)) | |
295 | operands[1] = force_reg (<MODE>mode, operands[1]); | |
3416dd87 RR |
296 | |
297 | if (s_register_operand (operands[0], <MODE>mode)) | |
298 | adjust_mem = operands[1]; | |
299 | else | |
300 | adjust_mem = operands[0]; | |
301 | ||
302 | /* Legitimize address. */ | |
303 | if (!neon_vector_mem_operand (adjust_mem, 2, true)) | |
304 | XEXP (adjust_mem, 0) = force_reg (Pmode, XEXP (adjust_mem, 0)); | |
305 | ||
c452684d JB |
306 | }) |
307 | ||
308 | (define_insn "*movmisalign<mode>_neon_store" | |
33255ae3 | 309 | [(set (match_operand:VDX 0 "neon_permissive_struct_operand" "=Um") |
c452684d JB |
310 | (unspec:VDX [(match_operand:VDX 1 "s_register_operand" " w")] |
311 | UNSPEC_MISALIGNED_ACCESS))] | |
869b9125 | 312 | "TARGET_NEON && !BYTES_BIG_ENDIAN && unaligned_access" |
c452684d | 313 | "vst1.<V_sz_elem>\t{%P1}, %A0" |
f7379e5e | 314 | [(set_attr "type" "neon_store1_1reg<q>")]) |
c452684d JB |
315 | |
316 | (define_insn "*movmisalign<mode>_neon_load" | |
33255ae3 JB |
317 | [(set (match_operand:VDX 0 "s_register_operand" "=w") |
318 | (unspec:VDX [(match_operand:VDX 1 "neon_permissive_struct_operand" | |
319 | " Um")] | |
c452684d | 320 | UNSPEC_MISALIGNED_ACCESS))] |
869b9125 | 321 | "TARGET_NEON && !BYTES_BIG_ENDIAN && unaligned_access" |
c452684d | 322 | "vld1.<V_sz_elem>\t{%P0}, %A1" |
f7379e5e | 323 | [(set_attr "type" "neon_load1_1reg<q>")]) |
c452684d JB |
324 | |
325 | (define_insn "*movmisalign<mode>_neon_store" | |
33255ae3 | 326 | [(set (match_operand:VQX 0 "neon_permissive_struct_operand" "=Um") |
c452684d JB |
327 | (unspec:VQX [(match_operand:VQX 1 "s_register_operand" " w")] |
328 | UNSPEC_MISALIGNED_ACCESS))] | |
869b9125 | 329 | "TARGET_NEON && !BYTES_BIG_ENDIAN && unaligned_access" |
c452684d | 330 | "vst1.<V_sz_elem>\t{%q1}, %A0" |
f7379e5e | 331 | [(set_attr "type" "neon_store1_1reg<q>")]) |
c452684d JB |
332 | |
333 | (define_insn "*movmisalign<mode>_neon_load" | |
33255ae3 JB |
334 | [(set (match_operand:VQX 0 "s_register_operand" "=w") |
335 | (unspec:VQX [(match_operand:VQX 1 "neon_permissive_struct_operand" | |
336 | " Um")] | |
c452684d | 337 | UNSPEC_MISALIGNED_ACCESS))] |
869b9125 | 338 | "TARGET_NEON && !BYTES_BIG_ENDIAN && unaligned_access" |
c452684d | 339 | "vld1.<V_sz_elem>\t{%q0}, %A1" |
b5a26023 | 340 | [(set_attr "type" "neon_load1_1reg<q>")]) |
c452684d | 341 | |
8ba8ebff | 342 | (define_insn "@vec_set<mode>_internal" |
92422235 CL |
343 | [(set (match_operand:VD_LANE 0 "s_register_operand" "=w,w") |
344 | (vec_merge:VD_LANE | |
345 | (vec_duplicate:VD_LANE | |
058e2674 | 346 | (match_operand:<V_elem> 1 "nonimmediate_operand" "Um,r")) |
92422235 | 347 | (match_operand:VD_LANE 3 "s_register_operand" "0,0") |
058e2674 | 348 | (match_operand:SI 2 "immediate_operand" "i,i")))] |
88f77cba | 349 | "TARGET_NEON" |
80b8eb11 | 350 | { |
d19eb620 | 351 | int elt = ffs ((int) INTVAL (operands[2])) - 1; |
874d42b9 JM |
352 | if (BYTES_BIG_ENDIAN) |
353 | elt = GET_MODE_NUNITS (<MODE>mode) - 1 - elt; | |
354 | operands[2] = GEN_INT (elt); | |
058e2674 UW |
355 | |
356 | if (which_alternative == 0) | |
357 | return "vld1.<V_sz_elem>\t{%P0[%c2]}, %A1"; | |
358 | else | |
359 | return "vmov.<V_sz_elem>\t%P0[%c2], %1"; | |
80b8eb11 | 360 | } |
f7379e5e | 361 | [(set_attr "type" "neon_load1_all_lanes<q>,neon_from_gp<q>")]) |
88f77cba | 362 | |
8ba8ebff | 363 | (define_insn "@vec_set<mode>_internal" |
4b644867 AL |
364 | [(set (match_operand:VQ2 0 "s_register_operand" "=w,w") |
365 | (vec_merge:VQ2 | |
366 | (vec_duplicate:VQ2 | |
058e2674 | 367 | (match_operand:<V_elem> 1 "nonimmediate_operand" "Um,r")) |
4b644867 | 368 | (match_operand:VQ2 3 "s_register_operand" "0,0") |
058e2674 | 369 | (match_operand:SI 2 "immediate_operand" "i,i")))] |
88f77cba JB |
370 | "TARGET_NEON" |
371 | { | |
466e4b7a | 372 | HOST_WIDE_INT elem = ffs ((int) INTVAL (operands[2])) - 1; |
88f77cba | 373 | int half_elts = GET_MODE_NUNITS (<MODE>mode) / 2; |
80b8eb11 JB |
374 | int elt = elem % half_elts; |
375 | int hi = (elem / half_elts) * 2; | |
88f77cba JB |
376 | int regno = REGNO (operands[0]); |
377 | ||
874d42b9 JM |
378 | if (BYTES_BIG_ENDIAN) |
379 | elt = half_elts - 1 - elt; | |
380 | ||
88f77cba JB |
381 | operands[0] = gen_rtx_REG (<V_HALF>mode, regno + hi); |
382 | operands[2] = GEN_INT (elt); | |
383 | ||
058e2674 UW |
384 | if (which_alternative == 0) |
385 | return "vld1.<V_sz_elem>\t{%P0[%c2]}, %A1"; | |
386 | else | |
387 | return "vmov.<V_sz_elem>\t%P0[%c2], %1"; | |
88f77cba | 388 | } |
f7379e5e | 389 | [(set_attr "type" "neon_load1_all_lanes<q>,neon_from_gp<q>")] |
c956e102 | 390 | ) |
88f77cba | 391 | |
8ba8ebff RS |
392 | (define_insn "@vec_set<mode>_internal" |
393 | [(set (match_operand:V2DI_ONLY 0 "s_register_operand" "=w,w") | |
394 | (vec_merge:V2DI_ONLY | |
395 | (vec_duplicate:V2DI_ONLY | |
058e2674 | 396 | (match_operand:DI 1 "nonimmediate_operand" "Um,r")) |
8ba8ebff | 397 | (match_operand:V2DI_ONLY 3 "s_register_operand" "0,0") |
058e2674 | 398 | (match_operand:SI 2 "immediate_operand" "i,i")))] |
88f77cba JB |
399 | "TARGET_NEON" |
400 | { | |
466e4b7a | 401 | HOST_WIDE_INT elem = ffs ((int) INTVAL (operands[2])) - 1; |
80b8eb11 | 402 | int regno = REGNO (operands[0]) + 2 * elem; |
88f77cba JB |
403 | |
404 | operands[0] = gen_rtx_REG (DImode, regno); | |
405 | ||
058e2674 UW |
406 | if (which_alternative == 0) |
407 | return "vld1.64\t%P0, %A1"; | |
408 | else | |
409 | return "vmov\t%P0, %Q1, %R1"; | |
88f77cba | 410 | } |
f7379e5e | 411 | [(set_attr "type" "neon_load1_all_lanes_q,neon_from_gp_q")] |
c956e102 | 412 | ) |
88f77cba | 413 | |
ff03930a | 414 | (define_insn "vec_extract<mode><V_elem_l>" |
058e2674 | 415 | [(set (match_operand:<V_elem> 0 "nonimmediate_operand" "=Um,r") |
88f77cba | 416 | (vec_select:<V_elem> |
92422235 | 417 | (match_operand:VD_LANE 1 "s_register_operand" "w,w") |
058e2674 | 418 | (parallel [(match_operand:SI 2 "immediate_operand" "i,i")])))] |
88f77cba | 419 | "TARGET_NEON" |
874d42b9 JM |
420 | { |
421 | if (BYTES_BIG_ENDIAN) | |
422 | { | |
423 | int elt = INTVAL (operands[2]); | |
424 | elt = GET_MODE_NUNITS (<MODE>mode) - 1 - elt; | |
425 | operands[2] = GEN_INT (elt); | |
426 | } | |
058e2674 UW |
427 | |
428 | if (which_alternative == 0) | |
429 | return "vst1.<V_sz_elem>\t{%P1[%c2]}, %A0"; | |
430 | else | |
431 | return "vmov.<V_uf_sclr>\t%0, %P1[%c2]"; | |
874d42b9 | 432 | } |
f7379e5e | 433 | [(set_attr "type" "neon_store1_one_lane<q>,neon_to_gp<q>")] |
c956e102 | 434 | ) |
88f77cba | 435 | |
1a5c27b1 SP |
436 | ;; This pattern is renamed from "vec_extract<mode><V_elem_l>" to |
437 | ;; "neon_vec_extract<mode><V_elem_l>" and this pattern is called | |
438 | ;; by define_expand in vec-common.md file. | |
439 | (define_insn "neon_vec_extract<mode><V_elem_l>" | |
058e2674 | 440 | [(set (match_operand:<V_elem> 0 "nonimmediate_operand" "=Um,r") |
88f77cba | 441 | (vec_select:<V_elem> |
4b644867 | 442 | (match_operand:VQ2 1 "s_register_operand" "w,w") |
058e2674 | 443 | (parallel [(match_operand:SI 2 "immediate_operand" "i,i")])))] |
88f77cba JB |
444 | "TARGET_NEON" |
445 | { | |
446 | int half_elts = GET_MODE_NUNITS (<MODE>mode) / 2; | |
447 | int elt = INTVAL (operands[2]) % half_elts; | |
448 | int hi = (INTVAL (operands[2]) / half_elts) * 2; | |
449 | int regno = REGNO (operands[1]); | |
450 | ||
874d42b9 JM |
451 | if (BYTES_BIG_ENDIAN) |
452 | elt = half_elts - 1 - elt; | |
453 | ||
88f77cba JB |
454 | operands[1] = gen_rtx_REG (<V_HALF>mode, regno + hi); |
455 | operands[2] = GEN_INT (elt); | |
456 | ||
058e2674 UW |
457 | if (which_alternative == 0) |
458 | return "vst1.<V_sz_elem>\t{%P1[%c2]}, %A0"; | |
459 | else | |
460 | return "vmov.<V_uf_sclr>\t%0, %P1[%c2]"; | |
88f77cba | 461 | } |
f7379e5e | 462 | [(set_attr "type" "neon_store1_one_lane<q>,neon_to_gp<q>")] |
c956e102 | 463 | ) |
88f77cba | 464 | |
1a5c27b1 SP |
465 | ;; This pattern is renamed from "vec_extractv2didi" to "neon_vec_extractv2didi" |
466 | ;; and this pattern is called by define_expand in vec-common.md file. | |
467 | (define_insn "neon_vec_extractv2didi" | |
058e2674 | 468 | [(set (match_operand:DI 0 "nonimmediate_operand" "=Um,r") |
88f77cba | 469 | (vec_select:DI |
058e2674 UW |
470 | (match_operand:V2DI 1 "s_register_operand" "w,w") |
471 | (parallel [(match_operand:SI 2 "immediate_operand" "i,i")])))] | |
88f77cba JB |
472 | "TARGET_NEON" |
473 | { | |
8c98c2a6 | 474 | int regno = REGNO (operands[1]) + 2 * INTVAL (operands[2]); |
88f77cba JB |
475 | |
476 | operands[1] = gen_rtx_REG (DImode, regno); | |
477 | ||
058e2674 UW |
478 | if (which_alternative == 0) |
479 | return "vst1.64\t{%P1}, %A0 @ v2di"; | |
480 | else | |
481 | return "vmov\t%Q0, %R0, %P1 @ v2di"; | |
88f77cba | 482 | } |
f7379e5e | 483 | [(set_attr "type" "neon_store1_one_lane_q,neon_to_gp_q")] |
c956e102 | 484 | ) |
88f77cba | 485 | |
ff03930a | 486 | (define_expand "vec_init<mode><V_elem_l>" |
cd65e265 | 487 | [(match_operand:VDQ 0 "s_register_operand") |
88f77cba | 488 | (match_operand 1 "" "")] |
63c8f7d6 | 489 | "TARGET_NEON || TARGET_HAVE_MVE" |
88f77cba JB |
490 | { |
491 | neon_expand_vector_init (operands[0], operands[1]); | |
492 | DONE; | |
493 | }) | |
494 | ||
495 | ;; Doubleword and quadword arithmetic. | |
496 | ||
bab53516 SL |
497 | ;; NOTE: some other instructions also support 64-bit integer |
498 | ;; element size, which we could potentially use for "long long" operations. | |
88f77cba JB |
499 | |
500 | (define_insn "*add<mode>3_neon" | |
501 | [(set (match_operand:VDQ 0 "s_register_operand" "=w") | |
502 | (plus:VDQ (match_operand:VDQ 1 "s_register_operand" "w") | |
503 | (match_operand:VDQ 2 "s_register_operand" "w")))] | |
95e10b8a | 504 | "ARM_HAVE_NEON_<MODE>_ARITH" |
c956e102 | 505 | "vadd.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2" |
003bb7f3 | 506 | [(set (attr "type") |
b75b1be2 | 507 | (if_then_else (match_test "<Is_float_mode>") |
f7379e5e JG |
508 | (const_string "neon_fp_addsub_s<q>") |
509 | (const_string "neon_add<q>")))] | |
c956e102 | 510 | ) |
88f77cba JB |
511 | |
512 | (define_insn "*sub<mode>3_neon" | |
513 | [(set (match_operand:VDQ 0 "s_register_operand" "=w") | |
514 | (minus:VDQ (match_operand:VDQ 1 "s_register_operand" "w") | |
515 | (match_operand:VDQ 2 "s_register_operand" "w")))] | |
bb78e587 | 516 | "ARM_HAVE_NEON_<MODE>_ARITH" |
c956e102 | 517 | "vsub.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2" |
003bb7f3 | 518 | [(set (attr "type") |
b75b1be2 | 519 | (if_then_else (match_test "<Is_float_mode>") |
f7379e5e JG |
520 | (const_string "neon_fp_addsub_s<q>") |
521 | (const_string "neon_sub<q>")))] | |
c956e102 | 522 | ) |
88f77cba JB |
523 | |
524 | (define_insn "*mul<mode>3_neon" | |
f7379e5e JG |
525 | [(set (match_operand:VDQW 0 "s_register_operand" "=w") |
526 | (mult:VDQW (match_operand:VDQW 1 "s_register_operand" "w") | |
527 | (match_operand:VDQW 2 "s_register_operand" "w")))] | |
bb78e587 | 528 | "ARM_HAVE_NEON_<MODE>_ARITH" |
c956e102 | 529 | "vmul.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2" |
003bb7f3 | 530 | [(set (attr "type") |
b75b1be2 | 531 | (if_then_else (match_test "<Is_float_mode>") |
f7379e5e JG |
532 | (const_string "neon_fp_mul_s<q>") |
533 | (const_string "neon_mul_<V_elem_ch><q>")))] | |
c956e102 | 534 | ) |
88f77cba | 535 | |
536ecfc4 PK |
536 | /* Perform division using multiply-by-reciprocal. |
537 | Reciprocal is calculated using Newton-Raphson method. | |
538 | Enabled with -funsafe-math-optimizations -freciprocal-math | |
539 | and disabled for -Os since it increases code size . */ | |
540 | ||
541 | (define_expand "div<mode>3" | |
cd65e265 DZ |
542 | [(set (match_operand:VCVTF 0 "s_register_operand") |
543 | (div:VCVTF (match_operand:VCVTF 1 "s_register_operand") | |
544 | (match_operand:VCVTF 2 "s_register_operand")))] | |
536ecfc4 PK |
545 | "TARGET_NEON && !optimize_size |
546 | && flag_reciprocal_math" | |
547 | { | |
548 | rtx rec = gen_reg_rtx (<MODE>mode); | |
549 | rtx vrecps_temp = gen_reg_rtx (<MODE>mode); | |
550 | ||
551 | /* Reciprocal estimate. */ | |
552 | emit_insn (gen_neon_vrecpe<mode> (rec, operands[2])); | |
553 | ||
554 | /* Perform 2 iterations of newton-raphson method. */ | |
555 | for (int i = 0; i < 2; i++) | |
556 | { | |
557 | emit_insn (gen_neon_vrecps<mode> (vrecps_temp, rec, operands[2])); | |
558 | emit_insn (gen_mul<mode>3 (rec, rec, vrecps_temp)); | |
559 | } | |
560 | ||
561 | /* We now have reciprocal in rec, perform operands[0] = operands[1] * rec. */ | |
562 | emit_insn (gen_mul<mode>3 (operands[0], operands[1], rec)); | |
563 | DONE; | |
564 | } | |
565 | ) | |
566 | ||
567 | ||
bab53516 | 568 | (define_insn "mul<mode>3add<mode>_neon" |
f7379e5e JG |
569 | [(set (match_operand:VDQW 0 "s_register_operand" "=w") |
570 | (plus:VDQW (mult:VDQW (match_operand:VDQW 2 "s_register_operand" "w") | |
571 | (match_operand:VDQW 3 "s_register_operand" "w")) | |
572 | (match_operand:VDQW 1 "s_register_operand" "0")))] | |
bb78e587 | 573 | "ARM_HAVE_NEON_<MODE>_ARITH" |
1ea9fe56 | 574 | "vmla.<V_if_elem>\t%<V_reg>0, %<V_reg>2, %<V_reg>3" |
003bb7f3 | 575 | [(set (attr "type") |
b75b1be2 | 576 | (if_then_else (match_test "<Is_float_mode>") |
f7379e5e JG |
577 | (const_string "neon_fp_mla_s<q>") |
578 | (const_string "neon_mla_<V_elem_ch><q>")))] | |
1ea9fe56 MM |
579 | ) |
580 | ||
55a9b91b MW |
581 | (define_insn "mul<mode>3add<mode>_neon" |
582 | [(set (match_operand:VH 0 "s_register_operand" "=w") | |
583 | (plus:VH (mult:VH (match_operand:VH 2 "s_register_operand" "w") | |
584 | (match_operand:VH 3 "s_register_operand" "w")) | |
585 | (match_operand:VH 1 "s_register_operand" "0")))] | |
bb78e587 | 586 | "ARM_HAVE_NEON_<MODE>_ARITH" |
55a9b91b MW |
587 | "vmla.f16\t%<V_reg>0, %<V_reg>2, %<V_reg>3" |
588 | [(set_attr "type" "neon_fp_mla_s<q>")] | |
589 | ) | |
590 | ||
bab53516 | 591 | (define_insn "mul<mode>3neg<mode>add<mode>_neon" |
f7379e5e JG |
592 | [(set (match_operand:VDQW 0 "s_register_operand" "=w") |
593 | (minus:VDQW (match_operand:VDQW 1 "s_register_operand" "0") | |
594 | (mult:VDQW (match_operand:VDQW 2 "s_register_operand" "w") | |
595 | (match_operand:VDQW 3 "s_register_operand" "w"))))] | |
bb78e587 | 596 | "ARM_HAVE_NEON_<MODE>_ARITH" |
1ea9fe56 | 597 | "vmls.<V_if_elem>\t%<V_reg>0, %<V_reg>2, %<V_reg>3" |
003bb7f3 | 598 | [(set (attr "type") |
b75b1be2 | 599 | (if_then_else (match_test "<Is_float_mode>") |
f7379e5e JG |
600 | (const_string "neon_fp_mla_s<q>") |
601 | (const_string "neon_mla_<V_elem_ch><q>")))] | |
1ea9fe56 MM |
602 | ) |
603 | ||
8b2ab9cb | 604 | ;; Fused multiply-accumulate |
c4216388 MGD |
605 | ;; We define each insn twice here: |
606 | ;; 1: with flag_unsafe_math_optimizations for the widening multiply phase | |
607 | ;; to be able to use when converting to FMA. | |
608 | ;; 2: without flag_unsafe_math_optimizations for the intrinsics to use. | |
8b2ab9cb RR |
609 | (define_insn "fma<VCVTF:mode>4" |
610 | [(set (match_operand:VCVTF 0 "register_operand" "=w") | |
611 | (fma:VCVTF (match_operand:VCVTF 1 "register_operand" "w") | |
612 | (match_operand:VCVTF 2 "register_operand" "w") | |
613 | (match_operand:VCVTF 3 "register_operand" "0")))] | |
bb78e587 | 614 | "ARM_HAVE_NEON_<MODE>_ARITH && TARGET_FMA" |
e60226ff | 615 | "vfma.<V_if_elem>\\t%<V_reg>0, %<V_reg>1, %<V_reg>2" |
f7379e5e | 616 | [(set_attr "type" "neon_fp_mla_s<q>")] |
8b2ab9cb RR |
617 | ) |
618 | ||
c4216388 MGD |
619 | (define_insn "fma<VCVTF:mode>4_intrinsic" |
620 | [(set (match_operand:VCVTF 0 "register_operand" "=w") | |
621 | (fma:VCVTF (match_operand:VCVTF 1 "register_operand" "w") | |
622 | (match_operand:VCVTF 2 "register_operand" "w") | |
623 | (match_operand:VCVTF 3 "register_operand" "0")))] | |
624 | "TARGET_NEON && TARGET_FMA" | |
e60226ff | 625 | "vfma.<V_if_elem>\\t%<V_reg>0, %<V_reg>1, %<V_reg>2" |
f7379e5e | 626 | [(set_attr "type" "neon_fp_mla_s<q>")] |
c4216388 MGD |
627 | ) |
628 | ||
6da37857 MW |
629 | (define_insn "fma<VH:mode>4" |
630 | [(set (match_operand:VH 0 "register_operand" "=w") | |
631 | (fma:VH | |
632 | (match_operand:VH 1 "register_operand" "w") | |
633 | (match_operand:VH 2 "register_operand" "w") | |
634 | (match_operand:VH 3 "register_operand" "0")))] | |
bb78e587 | 635 | "ARM_HAVE_NEON_<MODE>_ARITH" |
55a9b91b MW |
636 | "vfma.<V_if_elem>\\t%<V_reg>0, %<V_reg>1, %<V_reg>2" |
637 | [(set_attr "type" "neon_fp_mla_s<q>")] | |
638 | ) | |
639 | ||
8b2ab9cb RR |
640 | (define_insn "*fmsub<VCVTF:mode>4" |
641 | [(set (match_operand:VCVTF 0 "register_operand" "=w") | |
642 | (fma:VCVTF (neg:VCVTF (match_operand:VCVTF 1 "register_operand" "w")) | |
643 | (match_operand:VCVTF 2 "register_operand" "w") | |
644 | (match_operand:VCVTF 3 "register_operand" "0")))] | |
bb78e587 | 645 | "ARM_HAVE_NEON_<MODE>_ARITH && TARGET_FMA" |
e60226ff | 646 | "vfms.<V_if_elem>\\t%<V_reg>0, %<V_reg>1, %<V_reg>2" |
f7379e5e | 647 | [(set_attr "type" "neon_fp_mla_s<q>")] |
8b2ab9cb RR |
648 | ) |
649 | ||
c4216388 | 650 | (define_insn "fmsub<VCVTF:mode>4_intrinsic" |
55a9b91b MW |
651 | [(set (match_operand:VCVTF 0 "register_operand" "=w") |
652 | (fma:VCVTF | |
653 | (neg:VCVTF (match_operand:VCVTF 1 "register_operand" "w")) | |
654 | (match_operand:VCVTF 2 "register_operand" "w") | |
655 | (match_operand:VCVTF 3 "register_operand" "0")))] | |
656 | "TARGET_NEON && TARGET_FMA" | |
e60226ff | 657 | "vfms.<V_if_elem>\\t%<V_reg>0, %<V_reg>1, %<V_reg>2" |
55a9b91b MW |
658 | [(set_attr "type" "neon_fp_mla_s<q>")] |
659 | ) | |
660 | ||
661 | (define_insn "fmsub<VH:mode>4_intrinsic" | |
662 | [(set (match_operand:VH 0 "register_operand" "=w") | |
663 | (fma:VH | |
664 | (neg:VH (match_operand:VH 1 "register_operand" "w")) | |
665 | (match_operand:VH 2 "register_operand" "w") | |
666 | (match_operand:VH 3 "register_operand" "0")))] | |
667 | "TARGET_NEON_FP16INST" | |
668 | "vfms.<V_if_elem>\\t%<V_reg>0, %<V_reg>1, %<V_reg>2" | |
669 | [(set_attr "type" "neon_fp_mla_s<q>")] | |
c4216388 MGD |
670 | ) |
671 | ||
79739965 KT |
672 | (define_insn "neon_vrint<NEON_VRINT:nvrint_variant><VCVTF:mode>" |
673 | [(set (match_operand:VCVTF 0 "s_register_operand" "=w") | |
674 | (unspec:VCVTF [(match_operand:VCVTF 1 | |
675 | "s_register_operand" "w")] | |
676 | NEON_VRINT))] | |
c8d61ab8 | 677 | "TARGET_NEON && TARGET_VFP5" |
e60226ff | 678 | "vrint<nvrint_variant>.f32\\t%<V_reg>0, %<V_reg>1" |
f7379e5e | 679 | [(set_attr "type" "neon_fp_round_<V_elem_ch><q>")] |
79739965 KT |
680 | ) |
681 | ||
e9e67af1 KT |
682 | (define_insn "neon_vcvt<NEON_VCVT:nvrint_variant><su_optab><VCVTF:mode><v_cmp_result>" |
683 | [(set (match_operand:<V_cmp_result> 0 "register_operand" "=w") | |
684 | (FIXUORS:<V_cmp_result> (unspec:VCVTF | |
685 | [(match_operand:VCVTF 1 "register_operand" "w")] | |
686 | NEON_VCVT)))] | |
c8d61ab8 | 687 | "TARGET_NEON && TARGET_VFP5" |
e9e67af1 KT |
688 | "vcvt<nvrint_variant>.<su>32.f32\\t%<V_reg>0, %<V_reg>1" |
689 | [(set_attr "type" "neon_fp_to_int_<V_elem_ch><q>") | |
690 | (set_attr "predicable" "no")] | |
691 | ) | |
692 | ||
75de6a28 | 693 | (define_insn "ior<mode>3_neon" |
88f77cba JB |
694 | [(set (match_operand:VDQ 0 "s_register_operand" "=w,w") |
695 | (ior:VDQ (match_operand:VDQ 1 "s_register_operand" "w,0") | |
696 | (match_operand:VDQ 2 "neon_logic_op2" "w,Dl")))] | |
697 | "TARGET_NEON" | |
698 | { | |
699 | switch (which_alternative) | |
700 | { | |
701 | case 0: return "vorr\t%<V_reg>0, %<V_reg>1, %<V_reg>2"; | |
702 | case 1: return neon_output_logic_immediate ("vorr", &operands[2], | |
703 | <MODE>mode, 0, VALID_NEON_QREG_MODE (<MODE>mode)); | |
704 | default: gcc_unreachable (); | |
705 | } | |
c956e102 | 706 | } |
f7379e5e | 707 | [(set_attr "type" "neon_logic<q>")] |
c956e102 | 708 | ) |
88f77cba | 709 | |
88f77cba JB |
710 | ;; The concrete forms of the Neon immediate-logic instructions are vbic and |
711 | ;; vorr. We support the pseudo-instruction vand instead, because that | |
712 | ;; corresponds to the canonical form the middle-end expects to use for | |
713 | ;; immediate bitwise-ANDs. | |
714 | ||
11a0beff | 715 | (define_insn "and<mode>3_neon" |
88f77cba JB |
716 | [(set (match_operand:VDQ 0 "s_register_operand" "=w,w") |
717 | (and:VDQ (match_operand:VDQ 1 "s_register_operand" "w,0") | |
718 | (match_operand:VDQ 2 "neon_inv_logic_op2" "w,DL")))] | |
719 | "TARGET_NEON" | |
720 | { | |
721 | switch (which_alternative) | |
722 | { | |
723 | case 0: return "vand\t%<V_reg>0, %<V_reg>1, %<V_reg>2"; | |
724 | case 1: return neon_output_logic_immediate ("vand", &operands[2], | |
725 | <MODE>mode, 1, VALID_NEON_QREG_MODE (<MODE>mode)); | |
726 | default: gcc_unreachable (); | |
727 | } | |
c956e102 | 728 | } |
f7379e5e | 729 | [(set_attr "type" "neon_logic<q>")] |
c956e102 | 730 | ) |
88f77cba | 731 | |
88f77cba JB |
732 | (define_insn "orn<mode>3_neon" |
733 | [(set (match_operand:VDQ 0 "s_register_operand" "=w") | |
50fed7bf RR |
734 | (ior:VDQ (not:VDQ (match_operand:VDQ 2 "s_register_operand" "w")) |
735 | (match_operand:VDQ 1 "s_register_operand" "w")))] | |
88f77cba | 736 | "TARGET_NEON" |
c956e102 | 737 | "vorn\t%<V_reg>0, %<V_reg>1, %<V_reg>2" |
f7379e5e | 738 | [(set_attr "type" "neon_logic<q>")] |
c956e102 | 739 | ) |
88f77cba | 740 | |
88f77cba JB |
741 | (define_insn "bic<mode>3_neon" |
742 | [(set (match_operand:VDQ 0 "s_register_operand" "=w") | |
50fed7bf RR |
743 | (and:VDQ (not:VDQ (match_operand:VDQ 2 "s_register_operand" "w")) |
744 | (match_operand:VDQ 1 "s_register_operand" "w")))] | |
88f77cba | 745 | "TARGET_NEON" |
c956e102 | 746 | "vbic\t%<V_reg>0, %<V_reg>1, %<V_reg>2" |
f7379e5e | 747 | [(set_attr "type" "neon_logic<q>")] |
c956e102 | 748 | ) |
88f77cba | 749 | |
434fb3b6 | 750 | (define_insn "xor<mode>3_neon" |
88f77cba JB |
751 | [(set (match_operand:VDQ 0 "s_register_operand" "=w") |
752 | (xor:VDQ (match_operand:VDQ 1 "s_register_operand" "w") | |
753 | (match_operand:VDQ 2 "s_register_operand" "w")))] | |
754 | "TARGET_NEON" | |
c956e102 | 755 | "veor\t%<V_reg>0, %<V_reg>1, %<V_reg>2" |
f7379e5e | 756 | [(set_attr "type" "neon_logic<q>")] |
c956e102 | 757 | ) |
88f77cba | 758 | |
fd436034 | 759 | (define_insn "one_cmpl<mode>2_neon" |
88f77cba JB |
760 | [(set (match_operand:VDQ 0 "s_register_operand" "=w") |
761 | (not:VDQ (match_operand:VDQ 1 "s_register_operand" "w")))] | |
762 | "TARGET_NEON" | |
c956e102 | 763 | "vmvn\t%<V_reg>0, %<V_reg>1" |
f7379e5e | 764 | [(set_attr "type" "neon_move<q>")] |
c956e102 | 765 | ) |
88f77cba JB |
766 | |
767 | (define_insn "abs<mode>2" | |
768 | [(set (match_operand:VDQW 0 "s_register_operand" "=w") | |
769 | (abs:VDQW (match_operand:VDQW 1 "s_register_operand" "w")))] | |
770 | "TARGET_NEON" | |
c956e102 | 771 | "vabs.<V_s_elem>\t%<V_reg>0, %<V_reg>1" |
003bb7f3 | 772 | [(set (attr "type") |
b75b1be2 | 773 | (if_then_else (match_test "<Is_float_mode>") |
f7379e5e JG |
774 | (const_string "neon_fp_abs_s<q>") |
775 | (const_string "neon_abs<q>")))] | |
c956e102 | 776 | ) |
88f77cba | 777 | |
4cbb7cab | 778 | (define_insn "neon_neg<mode>2" |
88f77cba JB |
779 | [(set (match_operand:VDQW 0 "s_register_operand" "=w") |
780 | (neg:VDQW (match_operand:VDQW 1 "s_register_operand" "w")))] | |
781 | "TARGET_NEON" | |
c956e102 | 782 | "vneg.<V_s_elem>\t%<V_reg>0, %<V_reg>1" |
003bb7f3 | 783 | [(set (attr "type") |
b75b1be2 | 784 | (if_then_else (match_test "<Is_float_mode>") |
f7379e5e JG |
785 | (const_string "neon_fp_neg_s<q>") |
786 | (const_string "neon_neg<q>")))] | |
c956e102 | 787 | ) |
88f77cba | 788 | |
4cbb7cab | 789 | (define_insn "neon_<absneg_str><mode>2" |
55a9b91b MW |
790 | [(set (match_operand:VH 0 "s_register_operand" "=w") |
791 | (ABSNEG:VH (match_operand:VH 1 "s_register_operand" "w")))] | |
792 | "TARGET_NEON_FP16INST" | |
793 | "v<absneg_str>.<V_s_elem>\t%<V_reg>0, %<V_reg>1" | |
794 | [(set_attr "type" "neon_abs<q>")] | |
795 | ) | |
796 | ||
797 | (define_expand "neon_v<absneg_str><mode>" | |
798 | [(set | |
799 | (match_operand:VH 0 "s_register_operand") | |
800 | (ABSNEG:VH (match_operand:VH 1 "s_register_operand")))] | |
801 | "TARGET_NEON_FP16INST" | |
802 | { | |
4cbb7cab | 803 | emit_insn (gen_neon_<absneg_str><mode>2 (operands[0], operands[1])); |
55a9b91b MW |
804 | DONE; |
805 | }) | |
806 | ||
807 | (define_insn "neon_v<fp16_rnd_str><mode>" | |
808 | [(set (match_operand:VH 0 "s_register_operand" "=w") | |
809 | (unspec:VH | |
810 | [(match_operand:VH 1 "s_register_operand" "w")] | |
811 | FP16_RND))] | |
812 | "TARGET_NEON_FP16INST" | |
813 | "<fp16_rnd_insn>.<V_s_elem>\t%<V_reg>0, %<V_reg>1" | |
814 | [(set_attr "type" "neon_fp_round_s<q>")] | |
815 | ) | |
816 | ||
817 | (define_insn "neon_vrsqrte<mode>" | |
818 | [(set (match_operand:VH 0 "s_register_operand" "=w") | |
819 | (unspec:VH | |
820 | [(match_operand:VH 1 "s_register_operand" "w")] | |
821 | UNSPEC_VRSQRTE))] | |
822 | "TARGET_NEON_FP16INST" | |
823 | "vrsqrte.f16\t%<V_reg>0, %<V_reg>1" | |
824 | [(set_attr "type" "neon_fp_rsqrte_s<q>")] | |
825 | ) | |
826 | ||
88f77cba JB |
827 | (define_insn "*umin<mode>3_neon" |
828 | [(set (match_operand:VDQIW 0 "s_register_operand" "=w") | |
829 | (umin:VDQIW (match_operand:VDQIW 1 "s_register_operand" "w") | |
830 | (match_operand:VDQIW 2 "s_register_operand" "w")))] | |
831 | "TARGET_NEON" | |
c956e102 | 832 | "vmin.<V_u_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2" |
f7379e5e | 833 | [(set_attr "type" "neon_minmax<q>")] |
c956e102 | 834 | ) |
88f77cba JB |
835 | |
836 | (define_insn "*umax<mode>3_neon" | |
837 | [(set (match_operand:VDQIW 0 "s_register_operand" "=w") | |
838 | (umax:VDQIW (match_operand:VDQIW 1 "s_register_operand" "w") | |
839 | (match_operand:VDQIW 2 "s_register_operand" "w")))] | |
840 | "TARGET_NEON" | |
c956e102 | 841 | "vmax.<V_u_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2" |
f7379e5e | 842 | [(set_attr "type" "neon_minmax<q>")] |
c956e102 | 843 | ) |
88f77cba JB |
844 | |
845 | (define_insn "*smin<mode>3_neon" | |
846 | [(set (match_operand:VDQW 0 "s_register_operand" "=w") | |
847 | (smin:VDQW (match_operand:VDQW 1 "s_register_operand" "w") | |
848 | (match_operand:VDQW 2 "s_register_operand" "w")))] | |
849 | "TARGET_NEON" | |
c956e102 | 850 | "vmin.<V_s_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2" |
003bb7f3 | 851 | [(set (attr "type") |
b75b1be2 | 852 | (if_then_else (match_test "<Is_float_mode>") |
f7379e5e JG |
853 | (const_string "neon_fp_minmax_s<q>") |
854 | (const_string "neon_minmax<q>")))] | |
c956e102 | 855 | ) |
88f77cba JB |
856 | |
857 | (define_insn "*smax<mode>3_neon" | |
858 | [(set (match_operand:VDQW 0 "s_register_operand" "=w") | |
859 | (smax:VDQW (match_operand:VDQW 1 "s_register_operand" "w") | |
860 | (match_operand:VDQW 2 "s_register_operand" "w")))] | |
861 | "TARGET_NEON" | |
c956e102 | 862 | "vmax.<V_s_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2" |
003bb7f3 | 863 | [(set (attr "type") |
b75b1be2 | 864 | (if_then_else (match_test "<Is_float_mode>") |
f7379e5e JG |
865 | (const_string "neon_fp_minmax_s<q>") |
866 | (const_string "neon_minmax<q>")))] | |
c956e102 | 867 | ) |
88f77cba JB |
868 | |
869 | ; TODO: V2DI shifts are current disabled because there are bugs in the | |
870 | ; generic vectorizer code. It ends up creating a V2DI constructor with | |
871 | ; SImode elements. | |
872 | ||
d44463a9 | 873 | (define_insn "vashl<mode>3" |
31a0c825 DP |
874 | [(set (match_operand:VDQIW 0 "s_register_operand" "=w,w") |
875 | (ashift:VDQIW (match_operand:VDQIW 1 "s_register_operand" "w,w") | |
e009dfb3 | 876 | (match_operand:VDQIW 2 "imm_lshift_or_reg_neon" "w,Dm")))] |
31a0c825 DP |
877 | "TARGET_NEON" |
878 | { | |
879 | switch (which_alternative) | |
880 | { | |
881 | case 0: return "vshl.<V_s_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"; | |
882 | case 1: return neon_output_shift_immediate ("vshl", 'i', &operands[2], | |
883 | <MODE>mode, | |
884 | VALID_NEON_QREG_MODE (<MODE>mode), | |
885 | true); | |
886 | default: gcc_unreachable (); | |
887 | } | |
888 | } | |
f7379e5e | 889 | [(set_attr "type" "neon_shift_reg<q>, neon_shift_imm<q>")] |
31a0c825 DP |
890 | ) |
891 | ||
892 | (define_insn "vashr<mode>3_imm" | |
88f77cba | 893 | [(set (match_operand:VDQIW 0 "s_register_operand" "=w") |
31a0c825 | 894 | (ashiftrt:VDQIW (match_operand:VDQIW 1 "s_register_operand" "w") |
e009dfb3 | 895 | (match_operand:VDQIW 2 "imm_for_neon_rshift_operand" "Dm")))] |
88f77cba | 896 | "TARGET_NEON" |
31a0c825 DP |
897 | { |
898 | return neon_output_shift_immediate ("vshr", 's', &operands[2], | |
899 | <MODE>mode, VALID_NEON_QREG_MODE (<MODE>mode), | |
900 | false); | |
901 | } | |
f7379e5e | 902 | [(set_attr "type" "neon_shift_imm<q>")] |
c956e102 | 903 | ) |
88f77cba | 904 | |
31a0c825 DP |
905 | (define_insn "vlshr<mode>3_imm" |
906 | [(set (match_operand:VDQIW 0 "s_register_operand" "=w") | |
907 | (lshiftrt:VDQIW (match_operand:VDQIW 1 "s_register_operand" "w") | |
e009dfb3 | 908 | (match_operand:VDQIW 2 "imm_for_neon_rshift_operand" "Dm")))] |
31a0c825 DP |
909 | "TARGET_NEON" |
910 | { | |
911 | return neon_output_shift_immediate ("vshr", 'u', &operands[2], | |
912 | <MODE>mode, VALID_NEON_QREG_MODE (<MODE>mode), | |
913 | false); | |
914 | } | |
f7379e5e | 915 | [(set_attr "type" "neon_shift_imm<q>")] |
31a0c825 DP |
916 | ) |
917 | ||
88f77cba JB |
918 | ; Used for implementing logical shift-right, which is a left-shift by a negative |
919 | ; amount, with signed operands. This is essentially the same as ashl<mode>3 | |
920 | ; above, but using an unspec in case GCC tries anything tricky with negative | |
921 | ; shift amounts. | |
922 | ||
923 | (define_insn "ashl<mode>3_signed" | |
924 | [(set (match_operand:VDQI 0 "s_register_operand" "=w") | |
925 | (unspec:VDQI [(match_operand:VDQI 1 "s_register_operand" "w") | |
926 | (match_operand:VDQI 2 "s_register_operand" "w")] | |
927 | UNSPEC_ASHIFT_SIGNED))] | |
928 | "TARGET_NEON" | |
c956e102 | 929 | "vshl.<V_s_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2" |
f7379e5e | 930 | [(set_attr "type" "neon_shift_reg<q>")] |
c956e102 | 931 | ) |
88f77cba JB |
932 | |
933 | ; Used for implementing logical shift-right, which is a left-shift by a negative | |
934 | ; amount, with unsigned operands. | |
935 | ||
936 | (define_insn "ashl<mode>3_unsigned" | |
937 | [(set (match_operand:VDQI 0 "s_register_operand" "=w") | |
938 | (unspec:VDQI [(match_operand:VDQI 1 "s_register_operand" "w") | |
939 | (match_operand:VDQI 2 "s_register_operand" "w")] | |
940 | UNSPEC_ASHIFT_UNSIGNED))] | |
941 | "TARGET_NEON" | |
c956e102 | 942 | "vshl.<V_u_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2" |
f7379e5e | 943 | [(set_attr "type" "neon_shift_reg<q>")] |
c956e102 | 944 | ) |
88f77cba | 945 | |
d44463a9 | 946 | (define_expand "vashr<mode>3" |
cd65e265 DZ |
947 | [(set (match_operand:VDQIW 0 "s_register_operand") |
948 | (ashiftrt:VDQIW (match_operand:VDQIW 1 "s_register_operand") | |
949 | (match_operand:VDQIW 2 "imm_rshift_or_reg_neon")))] | |
88f77cba JB |
950 | "TARGET_NEON" |
951 | { | |
56b15099 | 952 | if (s_register_operand (operands[2], <MODE>mode)) |
31a0c825 | 953 | { |
56b15099 | 954 | rtx neg = gen_reg_rtx (<MODE>mode); |
4cbb7cab | 955 | emit_insn (gen_neon_neg<mode>2 (neg, operands[2])); |
31a0c825 DP |
956 | emit_insn (gen_ashl<mode>3_signed (operands[0], operands[1], neg)); |
957 | } | |
958 | else | |
959 | emit_insn (gen_vashr<mode>3_imm (operands[0], operands[1], operands[2])); | |
88f77cba JB |
960 | DONE; |
961 | }) | |
962 | ||
d44463a9 | 963 | (define_expand "vlshr<mode>3" |
cd65e265 DZ |
964 | [(set (match_operand:VDQIW 0 "s_register_operand") |
965 | (lshiftrt:VDQIW (match_operand:VDQIW 1 "s_register_operand") | |
966 | (match_operand:VDQIW 2 "imm_rshift_or_reg_neon")))] | |
88f77cba JB |
967 | "TARGET_NEON" |
968 | { | |
56b15099 | 969 | if (s_register_operand (operands[2], <MODE>mode)) |
31a0c825 | 970 | { |
56b15099 | 971 | rtx neg = gen_reg_rtx (<MODE>mode); |
4cbb7cab | 972 | emit_insn (gen_neon_neg<mode>2 (neg, operands[2])); |
31a0c825 DP |
973 | emit_insn (gen_ashl<mode>3_unsigned (operands[0], operands[1], neg)); |
974 | } | |
975 | else | |
976 | emit_insn (gen_vlshr<mode>3_imm (operands[0], operands[1], operands[2])); | |
88f77cba JB |
977 | DONE; |
978 | }) | |
979 | ||
3f2dc806 AS |
980 | ;; 64-bit shifts |
981 | ||
982 | ;; This pattern loads a 32-bit shift count into a 64-bit NEON register, | |
983 | ;; leaving the upper half uninitalized. This is OK since the shift | |
984 | ;; instruction only looks at the low 8 bits anyway. To avoid confusing | |
985 | ;; data flow analysis however, we pretend the full register is set | |
986 | ;; using an unspec. | |
987 | (define_insn "neon_load_count" | |
988 | [(set (match_operand:DI 0 "s_register_operand" "=w,w") | |
989 | (unspec:DI [(match_operand:SI 1 "nonimmediate_operand" "Um,r")] | |
990 | UNSPEC_LOAD_COUNT))] | |
991 | "TARGET_NEON" | |
992 | "@ | |
993 | vld1.32\t{%P0[0]}, %A1 | |
994 | vmov.32\t%P0[0], %1" | |
f7379e5e | 995 | [(set_attr "type" "neon_load1_1reg,neon_from_gp")] |
3f2dc806 AS |
996 | ) |
997 | ||
88f77cba JB |
998 | ;; Widening operations |
999 | ||
93c590ee | 1000 | (define_expand "widen_ssum<mode>3" |
cd65e265 | 1001 | [(set (match_operand:<V_double_width> 0 "s_register_operand") |
93c590ee MC |
1002 | (plus:<V_double_width> |
1003 | (sign_extend:<V_double_width> | |
cd65e265 DZ |
1004 | (match_operand:VQI 1 "s_register_operand")) |
1005 | (match_operand:<V_double_width> 2 "s_register_operand")))] | |
93c590ee MC |
1006 | "TARGET_NEON" |
1007 | { | |
1008 | machine_mode mode = GET_MODE (operands[1]); | |
1009 | rtx p1, p2; | |
1010 | ||
1011 | p1 = arm_simd_vect_par_cnst_half (mode, false); | |
1012 | p2 = arm_simd_vect_par_cnst_half (mode, true); | |
1013 | ||
1014 | if (operands[0] != operands[2]) | |
1015 | emit_move_insn (operands[0], operands[2]); | |
1016 | ||
1017 | emit_insn (gen_vec_sel_widen_ssum_lo<mode><V_half>3 (operands[0], | |
1018 | operands[1], | |
1019 | p1, | |
1020 | operands[0])); | |
1021 | emit_insn (gen_vec_sel_widen_ssum_hi<mode><V_half>3 (operands[0], | |
1022 | operands[1], | |
1023 | p2, | |
1024 | operands[0])); | |
1025 | DONE; | |
1026 | } | |
1027 | ) | |
1028 | ||
b8c36603 KT |
1029 | (define_insn "vec_sel_widen_ssum_lo<mode><V_half>3" |
1030 | [(set (match_operand:<V_double_width> 0 "s_register_operand" "=w") | |
1031 | (plus:<V_double_width> | |
1032 | (sign_extend:<V_double_width> | |
1033 | (vec_select:<V_HALF> | |
93c590ee MC |
1034 | (match_operand:VQI 1 "s_register_operand" "%w") |
1035 | (match_operand:VQI 2 "vect_par_constant_low" ""))) | |
b8c36603 | 1036 | (match_operand:<V_double_width> 3 "s_register_operand" "0")))] |
93c590ee MC |
1037 | "TARGET_NEON" |
1038 | { | |
1039 | return BYTES_BIG_ENDIAN ? "vaddw.<V_s_elem>\t%q0, %q3, %f1" : | |
1040 | "vaddw.<V_s_elem>\t%q0, %q3, %e1"; | |
1041 | } | |
1042 | [(set_attr "type" "neon_add_widen")]) | |
1043 | ||
b8c36603 KT |
1044 | (define_insn "vec_sel_widen_ssum_hi<mode><V_half>3" |
1045 | [(set (match_operand:<V_double_width> 0 "s_register_operand" "=w") | |
1046 | (plus:<V_double_width> | |
1047 | (sign_extend:<V_double_width> | |
1048 | (vec_select:<V_HALF> | |
1049 | (match_operand:VQI 1 "s_register_operand" "%w") | |
93c590ee | 1050 | (match_operand:VQI 2 "vect_par_constant_high" ""))) |
b8c36603 | 1051 | (match_operand:<V_double_width> 3 "s_register_operand" "0")))] |
93c590ee MC |
1052 | "TARGET_NEON" |
1053 | { | |
1054 | return BYTES_BIG_ENDIAN ? "vaddw.<V_s_elem>\t%q0, %q3, %e1" : | |
1055 | "vaddw.<V_s_elem>\t%q0, %q3, %f1"; | |
1056 | } | |
1057 | [(set_attr "type" "neon_add_widen")]) | |
1058 | ||
88f77cba JB |
1059 | (define_insn "widen_ssum<mode>3" |
1060 | [(set (match_operand:<V_widen> 0 "s_register_operand" "=w") | |
93c590ee MC |
1061 | (plus:<V_widen> |
1062 | (sign_extend:<V_widen> | |
1063 | (match_operand:VW 1 "s_register_operand" "%w")) | |
1064 | (match_operand:<V_widen> 2 "s_register_operand" "w")))] | |
88f77cba | 1065 | "TARGET_NEON" |
c956e102 | 1066 | "vaddw.<V_s_elem>\t%q0, %q2, %P1" |
f7379e5e | 1067 | [(set_attr "type" "neon_add_widen")] |
c956e102 | 1068 | ) |
88f77cba | 1069 | |
93c590ee | 1070 | (define_expand "widen_usum<mode>3" |
cd65e265 | 1071 | [(set (match_operand:<V_double_width> 0 "s_register_operand") |
93c590ee MC |
1072 | (plus:<V_double_width> |
1073 | (zero_extend:<V_double_width> | |
cd65e265 DZ |
1074 | (match_operand:VQI 1 "s_register_operand")) |
1075 | (match_operand:<V_double_width> 2 "s_register_operand")))] | |
93c590ee MC |
1076 | "TARGET_NEON" |
1077 | { | |
1078 | machine_mode mode = GET_MODE (operands[1]); | |
1079 | rtx p1, p2; | |
1080 | ||
1081 | p1 = arm_simd_vect_par_cnst_half (mode, false); | |
1082 | p2 = arm_simd_vect_par_cnst_half (mode, true); | |
1083 | ||
1084 | if (operands[0] != operands[2]) | |
1085 | emit_move_insn (operands[0], operands[2]); | |
1086 | ||
1087 | emit_insn (gen_vec_sel_widen_usum_lo<mode><V_half>3 (operands[0], | |
1088 | operands[1], | |
1089 | p1, | |
1090 | operands[0])); | |
1091 | emit_insn (gen_vec_sel_widen_usum_hi<mode><V_half>3 (operands[0], | |
1092 | operands[1], | |
1093 | p2, | |
1094 | operands[0])); | |
1095 | DONE; | |
1096 | } | |
1097 | ) | |
1098 | ||
b8c36603 KT |
1099 | (define_insn "vec_sel_widen_usum_lo<mode><V_half>3" |
1100 | [(set (match_operand:<V_double_width> 0 "s_register_operand" "=w") | |
1101 | (plus:<V_double_width> | |
1102 | (zero_extend:<V_double_width> | |
1103 | (vec_select:<V_HALF> | |
93c590ee MC |
1104 | (match_operand:VQI 1 "s_register_operand" "%w") |
1105 | (match_operand:VQI 2 "vect_par_constant_low" ""))) | |
b8c36603 | 1106 | (match_operand:<V_double_width> 3 "s_register_operand" "0")))] |
93c590ee MC |
1107 | "TARGET_NEON" |
1108 | { | |
1109 | return BYTES_BIG_ENDIAN ? "vaddw.<V_u_elem>\t%q0, %q3, %f1" : | |
1110 | "vaddw.<V_u_elem>\t%q0, %q3, %e1"; | |
1111 | } | |
1112 | [(set_attr "type" "neon_add_widen")]) | |
1113 | ||
b8c36603 KT |
1114 | (define_insn "vec_sel_widen_usum_hi<mode><V_half>3" |
1115 | [(set (match_operand:<V_double_width> 0 "s_register_operand" "=w") | |
1116 | (plus:<V_double_width> | |
1117 | (zero_extend:<V_double_width> | |
1118 | (vec_select:<V_HALF> | |
1119 | (match_operand:VQI 1 "s_register_operand" "%w") | |
93c590ee | 1120 | (match_operand:VQI 2 "vect_par_constant_high" ""))) |
b8c36603 | 1121 | (match_operand:<V_double_width> 3 "s_register_operand" "0")))] |
93c590ee MC |
1122 | "TARGET_NEON" |
1123 | { | |
1124 | return BYTES_BIG_ENDIAN ? "vaddw.<V_u_elem>\t%q0, %q3, %e1" : | |
1125 | "vaddw.<V_u_elem>\t%q0, %q3, %f1"; | |
1126 | } | |
1127 | [(set_attr "type" "neon_add_widen")]) | |
1128 | ||
88f77cba JB |
1129 | (define_insn "widen_usum<mode>3" |
1130 | [(set (match_operand:<V_widen> 0 "s_register_operand" "=w") | |
1131 | (plus:<V_widen> (zero_extend:<V_widen> | |
1132 | (match_operand:VW 1 "s_register_operand" "%w")) | |
1133 | (match_operand:<V_widen> 2 "s_register_operand" "w")))] | |
1134 | "TARGET_NEON" | |
c956e102 | 1135 | "vaddw.<V_u_elem>\t%q0, %q2, %P1" |
f7379e5e | 1136 | [(set_attr "type" "neon_add_widen")] |
c956e102 | 1137 | ) |
88f77cba | 1138 | |
88f77cba JB |
1139 | ;; Helpers for quad-word reduction operations |
1140 | ||
1141 | ; Add (or smin, smax...) the low N/2 elements of the N-element vector | |
1142 | ; operand[1] to the high N/2 elements of same. Put the result in operand[0], an | |
1143 | ; N/2-element vector. | |
1144 | ||
1145 | (define_insn "quad_halves_<code>v4si" | |
1146 | [(set (match_operand:V2SI 0 "s_register_operand" "=w") | |
728dc153 | 1147 | (VQH_OPS:V2SI |
88f77cba JB |
1148 | (vec_select:V2SI (match_operand:V4SI 1 "s_register_operand" "w") |
1149 | (parallel [(const_int 0) (const_int 1)])) | |
1150 | (vec_select:V2SI (match_dup 1) | |
1151 | (parallel [(const_int 2) (const_int 3)]))))] | |
1152 | "TARGET_NEON" | |
c956e102 MS |
1153 | "<VQH_mnem>.<VQH_sign>32\t%P0, %e1, %f1" |
1154 | [(set_attr "vqh_mnem" "<VQH_mnem>") | |
f7379e5e | 1155 | (set_attr "type" "neon_reduc_<VQH_type>_q")] |
c956e102 | 1156 | ) |
88f77cba JB |
1157 | |
1158 | (define_insn "quad_halves_<code>v4sf" | |
1159 | [(set (match_operand:V2SF 0 "s_register_operand" "=w") | |
728dc153 | 1160 | (VQHS_OPS:V2SF |
88f77cba JB |
1161 | (vec_select:V2SF (match_operand:V4SF 1 "s_register_operand" "w") |
1162 | (parallel [(const_int 0) (const_int 1)])) | |
1163 | (vec_select:V2SF (match_dup 1) | |
1164 | (parallel [(const_int 2) (const_int 3)]))))] | |
bb78e587 | 1165 | "ARM_HAVE_NEON_V4SF_ARITH" |
c956e102 MS |
1166 | "<VQH_mnem>.f32\t%P0, %e1, %f1" |
1167 | [(set_attr "vqh_mnem" "<VQH_mnem>") | |
f7379e5e | 1168 | (set_attr "type" "neon_fp_reduc_<VQH_type>_s_q")] |
c956e102 | 1169 | ) |
88f77cba JB |
1170 | |
1171 | (define_insn "quad_halves_<code>v8hi" | |
1172 | [(set (match_operand:V4HI 0 "s_register_operand" "+w") | |
728dc153 | 1173 | (VQH_OPS:V4HI |
88f77cba JB |
1174 | (vec_select:V4HI (match_operand:V8HI 1 "s_register_operand" "w") |
1175 | (parallel [(const_int 0) (const_int 1) | |
1176 | (const_int 2) (const_int 3)])) | |
1177 | (vec_select:V4HI (match_dup 1) | |
1178 | (parallel [(const_int 4) (const_int 5) | |
1179 | (const_int 6) (const_int 7)]))))] | |
1180 | "TARGET_NEON" | |
c956e102 MS |
1181 | "<VQH_mnem>.<VQH_sign>16\t%P0, %e1, %f1" |
1182 | [(set_attr "vqh_mnem" "<VQH_mnem>") | |
f7379e5e | 1183 | (set_attr "type" "neon_reduc_<VQH_type>_q")] |
c956e102 | 1184 | ) |
88f77cba JB |
1185 | |
1186 | (define_insn "quad_halves_<code>v16qi" | |
1187 | [(set (match_operand:V8QI 0 "s_register_operand" "+w") | |
728dc153 | 1188 | (VQH_OPS:V8QI |
88f77cba JB |
1189 | (vec_select:V8QI (match_operand:V16QI 1 "s_register_operand" "w") |
1190 | (parallel [(const_int 0) (const_int 1) | |
1191 | (const_int 2) (const_int 3) | |
1192 | (const_int 4) (const_int 5) | |
1193 | (const_int 6) (const_int 7)])) | |
1194 | (vec_select:V8QI (match_dup 1) | |
1195 | (parallel [(const_int 8) (const_int 9) | |
1196 | (const_int 10) (const_int 11) | |
1197 | (const_int 12) (const_int 13) | |
1198 | (const_int 14) (const_int 15)]))))] | |
1199 | "TARGET_NEON" | |
c956e102 MS |
1200 | "<VQH_mnem>.<VQH_sign>8\t%P0, %e1, %f1" |
1201 | [(set_attr "vqh_mnem" "<VQH_mnem>") | |
f7379e5e | 1202 | (set_attr "type" "neon_reduc_<VQH_type>_q")] |
c956e102 | 1203 | ) |
88f77cba | 1204 | |
0f38f229 | 1205 | (define_expand "move_hi_quad_<mode>" |
cd65e265 DZ |
1206 | [(match_operand:ANY128 0 "s_register_operand") |
1207 | (match_operand:<V_HALF> 1 "s_register_operand")] | |
0f38f229 | 1208 | "TARGET_NEON" |
88f77cba | 1209 | { |
d92aed06 RS |
1210 | emit_move_insn (simplify_gen_subreg (<V_HALF>mode, operands[0], <MODE>mode, |
1211 | GET_MODE_SIZE (<V_HALF>mode)), | |
1212 | operands[1]); | |
0f38f229 TB |
1213 | DONE; |
1214 | }) | |
1215 | ||
1216 | (define_expand "move_lo_quad_<mode>" | |
cd65e265 DZ |
1217 | [(match_operand:ANY128 0 "s_register_operand") |
1218 | (match_operand:<V_HALF> 1 "s_register_operand")] | |
0f38f229 | 1219 | "TARGET_NEON" |
88f77cba | 1220 | { |
d92aed06 RS |
1221 | emit_move_insn (simplify_gen_subreg (<V_HALF>mode, operands[0], |
1222 | <MODE>mode, 0), | |
1223 | operands[1]); | |
0f38f229 TB |
1224 | DONE; |
1225 | }) | |
88f77cba JB |
1226 | |
1227 | ;; Reduction operations | |
1228 | ||
89edc986 | 1229 | (define_expand "reduc_plus_scal_<mode>" |
cd65e265 DZ |
1230 | [(match_operand:<V_elem> 0 "nonimmediate_operand") |
1231 | (match_operand:VD 1 "s_register_operand")] | |
bb78e587 | 1232 | "ARM_HAVE_NEON_<MODE>_ARITH" |
88f77cba | 1233 | { |
89edc986 AL |
1234 | rtx vec = gen_reg_rtx (<MODE>mode); |
1235 | neon_pairwise_reduce (vec, operands[1], <MODE>mode, | |
88f77cba | 1236 | &gen_neon_vpadd_internal<mode>); |
89edc986 | 1237 | /* The same result is actually computed into every element. */ |
ff03930a | 1238 | emit_insn (gen_vec_extract<mode><V_elem_l> (operands[0], vec, const0_rtx)); |
88f77cba JB |
1239 | DONE; |
1240 | }) | |
1241 | ||
89edc986 | 1242 | (define_expand "reduc_plus_scal_<mode>" |
cd65e265 DZ |
1243 | [(match_operand:<V_elem> 0 "nonimmediate_operand") |
1244 | (match_operand:VQ 1 "s_register_operand")] | |
bb78e587 | 1245 | "ARM_HAVE_NEON_<MODE>_ARITH && !BYTES_BIG_ENDIAN" |
88f77cba JB |
1246 | { |
1247 | rtx step1 = gen_reg_rtx (<V_HALF>mode); | |
88f77cba JB |
1248 | |
1249 | emit_insn (gen_quad_halves_plus<mode> (step1, operands[1])); | |
89edc986 | 1250 | emit_insn (gen_reduc_plus_scal_<V_half> (operands[0], step1)); |
88f77cba JB |
1251 | |
1252 | DONE; | |
1253 | }) | |
1254 | ||
89edc986 | 1255 | (define_expand "reduc_plus_scal_v2di" |
cd65e265 DZ |
1256 | [(match_operand:DI 0 "nonimmediate_operand") |
1257 | (match_operand:V2DI 1 "s_register_operand")] | |
89edc986 AL |
1258 | "TARGET_NEON && !BYTES_BIG_ENDIAN" |
1259 | { | |
1260 | rtx vec = gen_reg_rtx (V2DImode); | |
1261 | ||
1262 | emit_insn (gen_arm_reduc_plus_internal_v2di (vec, operands[1])); | |
ff03930a | 1263 | emit_insn (gen_vec_extractv2didi (operands[0], vec, const0_rtx)); |
89edc986 AL |
1264 | |
1265 | DONE; | |
1266 | }) | |
1267 | ||
1268 | (define_insn "arm_reduc_plus_internal_v2di" | |
88f77cba JB |
1269 | [(set (match_operand:V2DI 0 "s_register_operand" "=w") |
1270 | (unspec:V2DI [(match_operand:V2DI 1 "s_register_operand" "w")] | |
1271 | UNSPEC_VPADD))] | |
0094f21b | 1272 | "TARGET_NEON && !BYTES_BIG_ENDIAN" |
c956e102 | 1273 | "vadd.i64\t%e0, %e1, %f1" |
f7379e5e | 1274 | [(set_attr "type" "neon_add_q")] |
c956e102 | 1275 | ) |
88f77cba | 1276 | |
f5dcbee1 | 1277 | (define_expand "reduc_smin_scal_<mode>" |
cd65e265 DZ |
1278 | [(match_operand:<V_elem> 0 "nonimmediate_operand") |
1279 | (match_operand:VD 1 "s_register_operand")] | |
bb78e587 | 1280 | "ARM_HAVE_NEON_<MODE>_ARITH" |
88f77cba | 1281 | { |
f5dcbee1 AL |
1282 | rtx vec = gen_reg_rtx (<MODE>mode); |
1283 | ||
1284 | neon_pairwise_reduce (vec, operands[1], <MODE>mode, | |
88f77cba | 1285 | &gen_neon_vpsmin<mode>); |
f5dcbee1 | 1286 | /* The result is computed into every element of the vector. */ |
ff03930a | 1287 | emit_insn (gen_vec_extract<mode><V_elem_l> (operands[0], vec, const0_rtx)); |
88f77cba JB |
1288 | DONE; |
1289 | }) | |
1290 | ||
f5dcbee1 | 1291 | (define_expand "reduc_smin_scal_<mode>" |
cd65e265 DZ |
1292 | [(match_operand:<V_elem> 0 "nonimmediate_operand") |
1293 | (match_operand:VQ 1 "s_register_operand")] | |
bb78e587 | 1294 | "ARM_HAVE_NEON_<MODE>_ARITH && !BYTES_BIG_ENDIAN" |
88f77cba JB |
1295 | { |
1296 | rtx step1 = gen_reg_rtx (<V_HALF>mode); | |
88f77cba JB |
1297 | |
1298 | emit_insn (gen_quad_halves_smin<mode> (step1, operands[1])); | |
f5dcbee1 | 1299 | emit_insn (gen_reduc_smin_scal_<V_half> (operands[0], step1)); |
88f77cba JB |
1300 | |
1301 | DONE; | |
1302 | }) | |
1303 | ||
f5dcbee1 | 1304 | (define_expand "reduc_smax_scal_<mode>" |
cd65e265 DZ |
1305 | [(match_operand:<V_elem> 0 "nonimmediate_operand") |
1306 | (match_operand:VD 1 "s_register_operand")] | |
bb78e587 | 1307 | "ARM_HAVE_NEON_<MODE>_ARITH" |
88f77cba | 1308 | { |
f5dcbee1 AL |
1309 | rtx vec = gen_reg_rtx (<MODE>mode); |
1310 | neon_pairwise_reduce (vec, operands[1], <MODE>mode, | |
88f77cba | 1311 | &gen_neon_vpsmax<mode>); |
f5dcbee1 | 1312 | /* The result is computed into every element of the vector. */ |
ff03930a | 1313 | emit_insn (gen_vec_extract<mode><V_elem_l> (operands[0], vec, const0_rtx)); |
88f77cba JB |
1314 | DONE; |
1315 | }) | |
1316 | ||
f5dcbee1 | 1317 | (define_expand "reduc_smax_scal_<mode>" |
cd65e265 DZ |
1318 | [(match_operand:<V_elem> 0 "nonimmediate_operand") |
1319 | (match_operand:VQ 1 "s_register_operand")] | |
bb78e587 | 1320 | "ARM_HAVE_NEON_<MODE>_ARITH && !BYTES_BIG_ENDIAN" |
88f77cba JB |
1321 | { |
1322 | rtx step1 = gen_reg_rtx (<V_HALF>mode); | |
88f77cba JB |
1323 | |
1324 | emit_insn (gen_quad_halves_smax<mode> (step1, operands[1])); | |
f5dcbee1 | 1325 | emit_insn (gen_reduc_smax_scal_<V_half> (operands[0], step1)); |
88f77cba JB |
1326 | |
1327 | DONE; | |
1328 | }) | |
1329 | ||
f5dcbee1 | 1330 | (define_expand "reduc_umin_scal_<mode>" |
cd65e265 DZ |
1331 | [(match_operand:<V_elem> 0 "nonimmediate_operand") |
1332 | (match_operand:VDI 1 "s_register_operand")] | |
88f77cba JB |
1333 | "TARGET_NEON" |
1334 | { | |
f5dcbee1 AL |
1335 | rtx vec = gen_reg_rtx (<MODE>mode); |
1336 | neon_pairwise_reduce (vec, operands[1], <MODE>mode, | |
88f77cba | 1337 | &gen_neon_vpumin<mode>); |
f5dcbee1 | 1338 | /* The result is computed into every element of the vector. */ |
ff03930a | 1339 | emit_insn (gen_vec_extract<mode><V_elem_l> (operands[0], vec, const0_rtx)); |
88f77cba JB |
1340 | DONE; |
1341 | }) | |
1342 | ||
f5dcbee1 | 1343 | (define_expand "reduc_umin_scal_<mode>" |
cd65e265 DZ |
1344 | [(match_operand:<V_elem> 0 "nonimmediate_operand") |
1345 | (match_operand:VQI 1 "s_register_operand")] | |
0094f21b | 1346 | "TARGET_NEON && !BYTES_BIG_ENDIAN" |
88f77cba JB |
1347 | { |
1348 | rtx step1 = gen_reg_rtx (<V_HALF>mode); | |
88f77cba JB |
1349 | |
1350 | emit_insn (gen_quad_halves_umin<mode> (step1, operands[1])); | |
f5dcbee1 | 1351 | emit_insn (gen_reduc_umin_scal_<V_half> (operands[0], step1)); |
88f77cba JB |
1352 | |
1353 | DONE; | |
1354 | }) | |
1355 | ||
f5dcbee1 | 1356 | (define_expand "reduc_umax_scal_<mode>" |
cd65e265 DZ |
1357 | [(match_operand:<V_elem> 0 "nonimmediate_operand") |
1358 | (match_operand:VDI 1 "s_register_operand")] | |
88f77cba JB |
1359 | "TARGET_NEON" |
1360 | { | |
f5dcbee1 AL |
1361 | rtx vec = gen_reg_rtx (<MODE>mode); |
1362 | neon_pairwise_reduce (vec, operands[1], <MODE>mode, | |
88f77cba | 1363 | &gen_neon_vpumax<mode>); |
f5dcbee1 | 1364 | /* The result is computed into every element of the vector. */ |
ff03930a | 1365 | emit_insn (gen_vec_extract<mode><V_elem_l> (operands[0], vec, const0_rtx)); |
88f77cba JB |
1366 | DONE; |
1367 | }) | |
1368 | ||
f5dcbee1 | 1369 | (define_expand "reduc_umax_scal_<mode>" |
cd65e265 DZ |
1370 | [(match_operand:<V_elem> 0 "nonimmediate_operand") |
1371 | (match_operand:VQI 1 "s_register_operand")] | |
0094f21b | 1372 | "TARGET_NEON && !BYTES_BIG_ENDIAN" |
88f77cba JB |
1373 | { |
1374 | rtx step1 = gen_reg_rtx (<V_HALF>mode); | |
88f77cba JB |
1375 | |
1376 | emit_insn (gen_quad_halves_umax<mode> (step1, operands[1])); | |
f5dcbee1 | 1377 | emit_insn (gen_reduc_umax_scal_<V_half> (operands[0], step1)); |
88f77cba JB |
1378 | |
1379 | DONE; | |
1380 | }) | |
1381 | ||
1382 | (define_insn "neon_vpadd_internal<mode>" | |
1383 | [(set (match_operand:VD 0 "s_register_operand" "=w") | |
1384 | (unspec:VD [(match_operand:VD 1 "s_register_operand" "w") | |
1385 | (match_operand:VD 2 "s_register_operand" "w")] | |
1386 | UNSPEC_VPADD))] | |
1387 | "TARGET_NEON" | |
c956e102 MS |
1388 | "vpadd.<V_if_elem>\t%P0, %P1, %P2" |
1389 | ;; Assume this schedules like vadd. | |
003bb7f3 | 1390 | [(set (attr "type") |
b75b1be2 | 1391 | (if_then_else (match_test "<Is_float_mode>") |
f7379e5e JG |
1392 | (const_string "neon_fp_reduc_add_s<q>") |
1393 | (const_string "neon_reduc_add<q>")))] | |
c956e102 | 1394 | ) |
88f77cba | 1395 | |
55a9b91b MW |
1396 | (define_insn "neon_vpaddv4hf" |
1397 | [(set | |
1398 | (match_operand:V4HF 0 "s_register_operand" "=w") | |
1399 | (unspec:V4HF [(match_operand:V4HF 1 "s_register_operand" "w") | |
1400 | (match_operand:V4HF 2 "s_register_operand" "w")] | |
1401 | UNSPEC_VPADD))] | |
1402 | "TARGET_NEON_FP16INST" | |
1403 | "vpadd.f16\t%P0, %P1, %P2" | |
1404 | [(set_attr "type" "neon_reduc_add")] | |
1405 | ) | |
1406 | ||
88f77cba JB |
1407 | (define_insn "neon_vpsmin<mode>" |
1408 | [(set (match_operand:VD 0 "s_register_operand" "=w") | |
1409 | (unspec:VD [(match_operand:VD 1 "s_register_operand" "w") | |
1410 | (match_operand:VD 2 "s_register_operand" "w")] | |
1411 | UNSPEC_VPSMIN))] | |
1412 | "TARGET_NEON" | |
c956e102 | 1413 | "vpmin.<V_s_elem>\t%P0, %P1, %P2" |
003bb7f3 | 1414 | [(set (attr "type") |
b75b1be2 | 1415 | (if_then_else (match_test "<Is_float_mode>") |
f7379e5e JG |
1416 | (const_string "neon_fp_reduc_minmax_s<q>") |
1417 | (const_string "neon_reduc_minmax<q>")))] | |
c956e102 | 1418 | ) |
88f77cba JB |
1419 | |
1420 | (define_insn "neon_vpsmax<mode>" | |
1421 | [(set (match_operand:VD 0 "s_register_operand" "=w") | |
1422 | (unspec:VD [(match_operand:VD 1 "s_register_operand" "w") | |
1423 | (match_operand:VD 2 "s_register_operand" "w")] | |
1424 | UNSPEC_VPSMAX))] | |
1425 | "TARGET_NEON" | |
c956e102 | 1426 | "vpmax.<V_s_elem>\t%P0, %P1, %P2" |
003bb7f3 | 1427 | [(set (attr "type") |
b75b1be2 | 1428 | (if_then_else (match_test "<Is_float_mode>") |
f7379e5e JG |
1429 | (const_string "neon_fp_reduc_minmax_s<q>") |
1430 | (const_string "neon_reduc_minmax<q>")))] | |
c956e102 | 1431 | ) |
88f77cba JB |
1432 | |
1433 | (define_insn "neon_vpumin<mode>" | |
1434 | [(set (match_operand:VDI 0 "s_register_operand" "=w") | |
1435 | (unspec:VDI [(match_operand:VDI 1 "s_register_operand" "w") | |
1436 | (match_operand:VDI 2 "s_register_operand" "w")] | |
1437 | UNSPEC_VPUMIN))] | |
1438 | "TARGET_NEON" | |
c956e102 | 1439 | "vpmin.<V_u_elem>\t%P0, %P1, %P2" |
f7379e5e | 1440 | [(set_attr "type" "neon_reduc_minmax<q>")] |
c956e102 | 1441 | ) |
88f77cba JB |
1442 | |
1443 | (define_insn "neon_vpumax<mode>" | |
1444 | [(set (match_operand:VDI 0 "s_register_operand" "=w") | |
1445 | (unspec:VDI [(match_operand:VDI 1 "s_register_operand" "w") | |
1446 | (match_operand:VDI 2 "s_register_operand" "w")] | |
1447 | UNSPEC_VPUMAX))] | |
1448 | "TARGET_NEON" | |
c956e102 | 1449 | "vpmax.<V_u_elem>\t%P0, %P1, %P2" |
f7379e5e | 1450 | [(set_attr "type" "neon_reduc_minmax<q>")] |
c956e102 | 1451 | ) |
88f77cba JB |
1452 | |
1453 | ;; Saturating arithmetic | |
1454 | ||
1455 | ; NOTE: Neon supports many more saturating variants of instructions than the | |
1456 | ; following, but these are all GCC currently understands. | |
1457 | ; FIXME: Actually, GCC doesn't know how to create saturating add/sub by itself | |
1458 | ; yet either, although these patterns may be used by intrinsics when they're | |
1459 | ; added. | |
1460 | ||
1461 | (define_insn "*ss_add<mode>_neon" | |
1462 | [(set (match_operand:VD 0 "s_register_operand" "=w") | |
1463 | (ss_plus:VD (match_operand:VD 1 "s_register_operand" "w") | |
1464 | (match_operand:VD 2 "s_register_operand" "w")))] | |
1465 | "TARGET_NEON" | |
c956e102 | 1466 | "vqadd.<V_s_elem>\t%P0, %P1, %P2" |
f7379e5e | 1467 | [(set_attr "type" "neon_qadd<q>")] |
c956e102 | 1468 | ) |
88f77cba JB |
1469 | |
1470 | (define_insn "*us_add<mode>_neon" | |
1471 | [(set (match_operand:VD 0 "s_register_operand" "=w") | |
1472 | (us_plus:VD (match_operand:VD 1 "s_register_operand" "w") | |
1473 | (match_operand:VD 2 "s_register_operand" "w")))] | |
1474 | "TARGET_NEON" | |
c956e102 | 1475 | "vqadd.<V_u_elem>\t%P0, %P1, %P2" |
f7379e5e | 1476 | [(set_attr "type" "neon_qadd<q>")] |
c956e102 | 1477 | ) |
88f77cba JB |
1478 | |
1479 | (define_insn "*ss_sub<mode>_neon" | |
1480 | [(set (match_operand:VD 0 "s_register_operand" "=w") | |
1481 | (ss_minus:VD (match_operand:VD 1 "s_register_operand" "w") | |
1482 | (match_operand:VD 2 "s_register_operand" "w")))] | |
1483 | "TARGET_NEON" | |
c956e102 | 1484 | "vqsub.<V_s_elem>\t%P0, %P1, %P2" |
f7379e5e | 1485 | [(set_attr "type" "neon_qsub<q>")] |
c956e102 | 1486 | ) |
88f77cba JB |
1487 | |
1488 | (define_insn "*us_sub<mode>_neon" | |
1489 | [(set (match_operand:VD 0 "s_register_operand" "=w") | |
1490 | (us_minus:VD (match_operand:VD 1 "s_register_operand" "w") | |
1491 | (match_operand:VD 2 "s_register_operand" "w")))] | |
1492 | "TARGET_NEON" | |
c956e102 | 1493 | "vqsub.<V_u_elem>\t%P0, %P1, %P2" |
f7379e5e | 1494 | [(set_attr "type" "neon_qsub<q>")] |
c956e102 | 1495 | ) |
88f77cba | 1496 | |
c2978b34 RS |
1497 | (define_expand "vec_cmp<mode><v_cmp_result>" |
1498 | [(set (match_operand:<V_cmp_result> 0 "s_register_operand") | |
1499 | (match_operator:<V_cmp_result> 1 "comparison_operator" | |
1500 | [(match_operand:VDQW 2 "s_register_operand") | |
1501 | (match_operand:VDQW 3 "reg_or_zero_operand")]))] | |
1502 | "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)" | |
1503 | { | |
1504 | arm_expand_vector_compare (operands[0], GET_CODE (operands[1]), | |
1505 | operands[2], operands[3], false); | |
1506 | DONE; | |
1507 | }) | |
1508 | ||
1509 | (define_expand "vec_cmpu<mode><mode>" | |
1510 | [(set (match_operand:VDQIW 0 "s_register_operand") | |
1511 | (match_operator:VDQIW 1 "comparison_operator" | |
1512 | [(match_operand:VDQIW 2 "s_register_operand") | |
1513 | (match_operand:VDQIW 3 "reg_or_zero_operand")]))] | |
1514 | "TARGET_NEON" | |
1515 | { | |
1516 | arm_expand_vector_compare (operands[0], GET_CODE (operands[1]), | |
1517 | operands[2], operands[3], false); | |
1518 | DONE; | |
1519 | }) | |
1520 | ||
5bfc5baa JB |
1521 | ;; Conditional instructions. These are comparisons with conditional moves for |
1522 | ;; vectors. They perform the assignment: | |
1523 | ;; | |
1524 | ;; Vop0 = (Vop4 <op3> Vop5) ? Vop1 : Vop2; | |
1525 | ;; | |
1526 | ;; where op3 is <, <=, ==, !=, >= or >. Operations are performed | |
1527 | ;; element-wise. | |
1528 | ||
e9e1d143 | 1529 | (define_expand "vcond<mode><mode>" |
cd65e265 | 1530 | [(set (match_operand:VDQW 0 "s_register_operand") |
5bfc5baa | 1531 | (if_then_else:VDQW |
f35c297f | 1532 | (match_operator 3 "comparison_operator" |
cd65e265 | 1533 | [(match_operand:VDQW 4 "s_register_operand") |
c2978b34 | 1534 | (match_operand:VDQW 5 "reg_or_zero_operand")]) |
cd65e265 DZ |
1535 | (match_operand:VDQW 1 "s_register_operand") |
1536 | (match_operand:VDQW 2 "s_register_operand")))] | |
5bfc5baa JB |
1537 | "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)" |
1538 | { | |
c2978b34 RS |
1539 | arm_expand_vcond (operands, <V_cmp_result>mode); |
1540 | DONE; | |
1541 | }) | |
f35c297f | 1542 | |
c2978b34 RS |
1543 | (define_expand "vcond<V_cvtto><mode>" |
1544 | [(set (match_operand:<V_CVTTO> 0 "s_register_operand") | |
1545 | (if_then_else:<V_CVTTO> | |
1546 | (match_operator 3 "comparison_operator" | |
1547 | [(match_operand:V32 4 "s_register_operand") | |
1548 | (match_operand:V32 5 "reg_or_zero_operand")]) | |
1549 | (match_operand:<V_CVTTO> 1 "s_register_operand") | |
1550 | (match_operand:<V_CVTTO> 2 "s_register_operand")))] | |
1551 | "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)" | |
1552 | { | |
1553 | arm_expand_vcond (operands, <V_cmp_result>mode); | |
5bfc5baa JB |
1554 | DONE; |
1555 | }) | |
1556 | ||
c2978b34 RS |
1557 | (define_expand "vcondu<mode><v_cmp_result>" |
1558 | [(set (match_operand:VDQW 0 "s_register_operand") | |
1559 | (if_then_else:VDQW | |
5bfc5baa | 1560 | (match_operator 3 "arm_comparison_operator" |
c2978b34 RS |
1561 | [(match_operand:<V_cmp_result> 4 "s_register_operand") |
1562 | (match_operand:<V_cmp_result> 5 "reg_or_zero_operand")]) | |
1563 | (match_operand:VDQW 1 "s_register_operand") | |
1564 | (match_operand:VDQW 2 "s_register_operand")))] | |
1565 | "TARGET_NEON" | |
1566 | { | |
1567 | arm_expand_vcond (operands, <V_cmp_result>mode); | |
1568 | DONE; | |
1569 | }) | |
5bfc5baa | 1570 | |
c2978b34 RS |
1571 | (define_expand "vcond_mask_<mode><v_cmp_result>" |
1572 | [(set (match_operand:VDQW 0 "s_register_operand") | |
1573 | (if_then_else:VDQW | |
1574 | (match_operand:<V_cmp_result> 3 "s_register_operand") | |
1575 | (match_operand:VDQW 1 "s_register_operand") | |
1576 | (match_operand:VDQW 2 "s_register_operand")))] | |
1577 | "TARGET_NEON" | |
1578 | { | |
1579 | emit_insn (gen_neon_vbsl<mode> (operands[0], operands[3], operands[1], | |
1580 | operands[2])); | |
5bfc5baa JB |
1581 | DONE; |
1582 | }) | |
1583 | ||
88f77cba JB |
1584 | ;; Patterns for builtins. |
1585 | ||
1586 | ; good for plain vadd, vaddq. | |
1587 | ||
bab53516 | 1588 | (define_expand "neon_vadd<mode>" |
cd65e265 DZ |
1589 | [(match_operand:VCVTF 0 "s_register_operand") |
1590 | (match_operand:VCVTF 1 "s_register_operand") | |
1591 | (match_operand:VCVTF 2 "s_register_operand")] | |
bab53516 SL |
1592 | "TARGET_NEON" |
1593 | { | |
bb78e587 | 1594 | if (ARM_HAVE_NEON_<MODE>_ARITH) |
bab53516 SL |
1595 | emit_insn (gen_add<mode>3 (operands[0], operands[1], operands[2])); |
1596 | else | |
1597 | emit_insn (gen_neon_vadd<mode>_unspec (operands[0], operands[1], | |
1598 | operands[2])); | |
1599 | DONE; | |
1600 | }) | |
1601 | ||
55a9b91b MW |
1602 | (define_expand "neon_vadd<mode>" |
1603 | [(match_operand:VH 0 "s_register_operand") | |
1604 | (match_operand:VH 1 "s_register_operand") | |
1605 | (match_operand:VH 2 "s_register_operand")] | |
1606 | "TARGET_NEON_FP16INST" | |
1607 | { | |
95e10b8a | 1608 | emit_insn (gen_add<mode>3 (operands[0], operands[1], operands[2])); |
55a9b91b MW |
1609 | DONE; |
1610 | }) | |
1611 | ||
1612 | (define_expand "neon_vsub<mode>" | |
1613 | [(match_operand:VH 0 "s_register_operand") | |
1614 | (match_operand:VH 1 "s_register_operand") | |
1615 | (match_operand:VH 2 "s_register_operand")] | |
1616 | "TARGET_NEON_FP16INST" | |
1617 | { | |
98161c24 | 1618 | emit_insn (gen_sub<mode>3 (operands[0], operands[1], operands[2])); |
55a9b91b MW |
1619 | DONE; |
1620 | }) | |
1621 | ||
bab53516 SL |
1622 | ; Note that NEON operations don't support the full IEEE 754 standard: in |
1623 | ; particular, denormal values are flushed to zero. This means that GCC cannot | |
1624 | ; use those instructions for autovectorization, etc. unless | |
1625 | ; -funsafe-math-optimizations is in effect (in which case flush-to-zero | |
9c582551 | 1626 | ; behavior is permissible). Intrinsic operations (provided by the arm_neon.h |
bab53516 SL |
1627 | ; header) must work in either case: if -funsafe-math-optimizations is given, |
1628 | ; intrinsics expand to "canonical" RTL where possible, otherwise intrinsics | |
1629 | ; expand to unspecs (which may potentially limit the extent to which they might | |
1630 | ; be optimized by generic code). | |
1631 | ||
1632 | ; Used for intrinsics when flag_unsafe_math_optimizations is false. | |
1633 | ||
1634 | (define_insn "neon_vadd<mode>_unspec" | |
0d0b79a6 RR |
1635 | [(set (match_operand:VCVTF 0 "s_register_operand" "=w") |
1636 | (unspec:VCVTF [(match_operand:VCVTF 1 "s_register_operand" "w") | |
1637 | (match_operand:VCVTF 2 "s_register_operand" "w")] | |
88f77cba JB |
1638 | UNSPEC_VADD))] |
1639 | "TARGET_NEON" | |
c956e102 | 1640 | "vadd.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2" |
003bb7f3 | 1641 | [(set (attr "type") |
b75b1be2 | 1642 | (if_then_else (match_test "<Is_float_mode>") |
f7379e5e JG |
1643 | (const_string "neon_fp_addsub_s<q>") |
1644 | (const_string "neon_add<q>")))] | |
c956e102 | 1645 | ) |
88f77cba | 1646 | |
94f0f2cc | 1647 | (define_insn "neon_vaddl<sup><mode>" |
88f77cba JB |
1648 | [(set (match_operand:<V_widen> 0 "s_register_operand" "=w") |
1649 | (unspec:<V_widen> [(match_operand:VDI 1 "s_register_operand" "w") | |
94f0f2cc JG |
1650 | (match_operand:VDI 2 "s_register_operand" "w")] |
1651 | VADDL))] | |
88f77cba | 1652 | "TARGET_NEON" |
94f0f2cc | 1653 | "vaddl.<sup>%#<V_sz_elem>\t%q0, %P1, %P2" |
f7379e5e | 1654 | [(set_attr "type" "neon_add_long")] |
c956e102 | 1655 | ) |
88f77cba | 1656 | |
94f0f2cc | 1657 | (define_insn "neon_vaddw<sup><mode>" |
88f77cba JB |
1658 | [(set (match_operand:<V_widen> 0 "s_register_operand" "=w") |
1659 | (unspec:<V_widen> [(match_operand:<V_widen> 1 "s_register_operand" "w") | |
94f0f2cc JG |
1660 | (match_operand:VDI 2 "s_register_operand" "w")] |
1661 | VADDW))] | |
88f77cba | 1662 | "TARGET_NEON" |
94f0f2cc | 1663 | "vaddw.<sup>%#<V_sz_elem>\t%q0, %q1, %P2" |
f7379e5e | 1664 | [(set_attr "type" "neon_add_widen")] |
c956e102 | 1665 | ) |
88f77cba JB |
1666 | |
1667 | ; vhadd and vrhadd. | |
1668 | ||
94f0f2cc | 1669 | (define_insn "neon_v<r>hadd<sup><mode>" |
88f77cba JB |
1670 | [(set (match_operand:VDQIW 0 "s_register_operand" "=w") |
1671 | (unspec:VDQIW [(match_operand:VDQIW 1 "s_register_operand" "w") | |
94f0f2cc JG |
1672 | (match_operand:VDQIW 2 "s_register_operand" "w")] |
1673 | VHADD))] | |
88f77cba | 1674 | "TARGET_NEON" |
94f0f2cc | 1675 | "v<r>hadd.<sup>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2" |
f7379e5e | 1676 | [(set_attr "type" "neon_add_halve_q")] |
c956e102 | 1677 | ) |
88f77cba | 1678 | |
94f0f2cc | 1679 | (define_insn "neon_vqadd<sup><mode>" |
88f77cba JB |
1680 | [(set (match_operand:VDQIX 0 "s_register_operand" "=w") |
1681 | (unspec:VDQIX [(match_operand:VDQIX 1 "s_register_operand" "w") | |
94f0f2cc JG |
1682 | (match_operand:VDQIX 2 "s_register_operand" "w")] |
1683 | VQADD))] | |
88f77cba | 1684 | "TARGET_NEON" |
94f0f2cc | 1685 | "vqadd.<sup>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2" |
f7379e5e | 1686 | [(set_attr "type" "neon_qadd<q>")] |
c956e102 | 1687 | ) |
88f77cba | 1688 | |
94f0f2cc | 1689 | (define_insn "neon_v<r>addhn<mode>" |
88f77cba JB |
1690 | [(set (match_operand:<V_narrow> 0 "s_register_operand" "=w") |
1691 | (unspec:<V_narrow> [(match_operand:VN 1 "s_register_operand" "w") | |
94f0f2cc JG |
1692 | (match_operand:VN 2 "s_register_operand" "w")] |
1693 | VADDHN))] | |
88f77cba | 1694 | "TARGET_NEON" |
94f0f2cc | 1695 | "v<r>addhn.<V_if_elem>\t%P0, %q1, %q2" |
f7379e5e | 1696 | [(set_attr "type" "neon_add_halve_narrow_q")] |
c956e102 | 1697 | ) |
88f77cba | 1698 | |
94f0f2cc JG |
1699 | ;; Polynomial and Float multiplication. |
1700 | (define_insn "neon_vmul<pf><mode>" | |
1701 | [(set (match_operand:VPF 0 "s_register_operand" "=w") | |
1702 | (unspec:VPF [(match_operand:VPF 1 "s_register_operand" "w") | |
1703 | (match_operand:VPF 2 "s_register_operand" "w")] | |
88f77cba JB |
1704 | UNSPEC_VMUL))] |
1705 | "TARGET_NEON" | |
94f0f2cc | 1706 | "vmul.<pf>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2" |
003bb7f3 | 1707 | [(set (attr "type") |
b75b1be2 | 1708 | (if_then_else (match_test "<Is_float_mode>") |
f7379e5e JG |
1709 | (const_string "neon_fp_mul_s<q>") |
1710 | (const_string "neon_mul_<V_elem_ch><q>")))] | |
c956e102 | 1711 | ) |
88f77cba | 1712 | |
55a9b91b MW |
1713 | (define_insn "neon_vmulf<mode>" |
1714 | [(set | |
1715 | (match_operand:VH 0 "s_register_operand" "=w") | |
1716 | (mult:VH | |
1717 | (match_operand:VH 1 "s_register_operand" "w") | |
1718 | (match_operand:VH 2 "s_register_operand" "w")))] | |
1719 | "TARGET_NEON_FP16INST" | |
1720 | "vmul.f16\t%<V_reg>0, %<V_reg>1, %<V_reg>2" | |
1721 | [(set_attr "type" "neon_mul_<VH_elem_ch><q>")] | |
1722 | ) | |
1723 | ||
bab53516 | 1724 | (define_expand "neon_vmla<mode>" |
cd65e265 DZ |
1725 | [(match_operand:VDQW 0 "s_register_operand") |
1726 | (match_operand:VDQW 1 "s_register_operand") | |
1727 | (match_operand:VDQW 2 "s_register_operand") | |
1728 | (match_operand:VDQW 3 "s_register_operand")] | |
bab53516 SL |
1729 | "TARGET_NEON" |
1730 | { | |
bb78e587 | 1731 | if (ARM_HAVE_NEON_<MODE>_ARITH) |
bab53516 SL |
1732 | emit_insn (gen_mul<mode>3add<mode>_neon (operands[0], operands[1], |
1733 | operands[2], operands[3])); | |
1734 | else | |
1735 | emit_insn (gen_neon_vmla<mode>_unspec (operands[0], operands[1], | |
1736 | operands[2], operands[3])); | |
1737 | DONE; | |
1738 | }) | |
1739 | ||
c4216388 MGD |
1740 | (define_expand "neon_vfma<VCVTF:mode>" |
1741 | [(match_operand:VCVTF 0 "s_register_operand") | |
1742 | (match_operand:VCVTF 1 "s_register_operand") | |
1743 | (match_operand:VCVTF 2 "s_register_operand") | |
94f0f2cc | 1744 | (match_operand:VCVTF 3 "s_register_operand")] |
c4216388 MGD |
1745 | "TARGET_NEON && TARGET_FMA" |
1746 | { | |
1747 | emit_insn (gen_fma<mode>4_intrinsic (operands[0], operands[2], operands[3], | |
1748 | operands[1])); | |
1749 | DONE; | |
1750 | }) | |
1751 | ||
55a9b91b MW |
1752 | (define_expand "neon_vfma<VH:mode>" |
1753 | [(match_operand:VH 0 "s_register_operand") | |
1754 | (match_operand:VH 1 "s_register_operand") | |
1755 | (match_operand:VH 2 "s_register_operand") | |
1756 | (match_operand:VH 3 "s_register_operand")] | |
1757 | "TARGET_NEON_FP16INST" | |
1758 | { | |
bb78e587 RS |
1759 | emit_insn (gen_fma<mode>4 (operands[0], operands[2], operands[3], |
1760 | operands[1])); | |
55a9b91b MW |
1761 | DONE; |
1762 | }) | |
1763 | ||
c4216388 MGD |
1764 | (define_expand "neon_vfms<VCVTF:mode>" |
1765 | [(match_operand:VCVTF 0 "s_register_operand") | |
1766 | (match_operand:VCVTF 1 "s_register_operand") | |
1767 | (match_operand:VCVTF 2 "s_register_operand") | |
94f0f2cc | 1768 | (match_operand:VCVTF 3 "s_register_operand")] |
c4216388 MGD |
1769 | "TARGET_NEON && TARGET_FMA" |
1770 | { | |
1771 | emit_insn (gen_fmsub<mode>4_intrinsic (operands[0], operands[2], operands[3], | |
1772 | operands[1])); | |
1773 | DONE; | |
1774 | }) | |
1775 | ||
55a9b91b MW |
1776 | (define_expand "neon_vfms<VH:mode>" |
1777 | [(match_operand:VH 0 "s_register_operand") | |
1778 | (match_operand:VH 1 "s_register_operand") | |
1779 | (match_operand:VH 2 "s_register_operand") | |
1780 | (match_operand:VH 3 "s_register_operand")] | |
1781 | "TARGET_NEON_FP16INST" | |
1782 | { | |
1783 | emit_insn (gen_fmsub<mode>4_intrinsic (operands[0], operands[2], operands[3], | |
1784 | operands[1])); | |
1785 | DONE; | |
1786 | }) | |
1787 | ||
06e95715 KT |
1788 | ;; The expand RTL structure here is not important. |
1789 | ;; We use the gen_* functions anyway. | |
1790 | ;; We just need something to wrap the iterators around. | |
1791 | ||
1792 | (define_expand "neon_vfm<vfml_op>l_<vfml_half><mode>" | |
1793 | [(set (match_operand:VCVTF 0 "s_register_operand") | |
1794 | (unspec:VCVTF | |
1795 | [(match_operand:VCVTF 1 "s_register_operand") | |
1796 | (PLUSMINUS:<VFML> | |
1797 | (match_operand:<VFML> 2 "s_register_operand") | |
1798 | (match_operand:<VFML> 3 "s_register_operand"))] VFMLHALVES))] | |
1799 | "TARGET_FP16FML" | |
1800 | { | |
1801 | rtx half = arm_simd_vect_par_cnst_half (<VFML>mode, <vfml_half_selector>); | |
1802 | emit_insn (gen_vfm<vfml_op>l_<vfml_half><mode>_intrinsic (operands[0], | |
1803 | operands[1], | |
1804 | operands[2], | |
1805 | operands[3], | |
1806 | half, half)); | |
1807 | DONE; | |
1808 | }) | |
1809 | ||
1810 | (define_insn "vfmal_low<mode>_intrinsic" | |
1811 | [(set (match_operand:VCVTF 0 "s_register_operand" "=w") | |
1812 | (fma:VCVTF | |
1813 | (float_extend:VCVTF | |
1814 | (vec_select:<VFMLSEL> | |
1815 | (match_operand:<VFML> 2 "s_register_operand" "<VF_constraint>") | |
1816 | (match_operand:<VFML> 4 "vect_par_constant_low" ""))) | |
1817 | (float_extend:VCVTF | |
1818 | (vec_select:<VFMLSEL> | |
1819 | (match_operand:<VFML> 3 "s_register_operand" "<VF_constraint>") | |
1820 | (match_operand:<VFML> 5 "vect_par_constant_low" ""))) | |
1821 | (match_operand:VCVTF 1 "s_register_operand" "0")))] | |
1822 | "TARGET_FP16FML" | |
1823 | "vfmal.f16\\t%<V_reg>0, %<V_lo>2, %<V_lo>3" | |
1824 | [(set_attr "type" "neon_fp_mla_s<q>")] | |
1825 | ) | |
1826 | ||
1827 | (define_insn "vfmsl_high<mode>_intrinsic" | |
1828 | [(set (match_operand:VCVTF 0 "s_register_operand" "=w") | |
1829 | (fma:VCVTF | |
1830 | (float_extend:VCVTF | |
1831 | (neg:<VFMLSEL> | |
1832 | (vec_select:<VFMLSEL> | |
1833 | (match_operand:<VFML> 2 "s_register_operand" "<VF_constraint>") | |
1834 | (match_operand:<VFML> 4 "vect_par_constant_high" "")))) | |
1835 | (float_extend:VCVTF | |
1836 | (vec_select:<VFMLSEL> | |
1837 | (match_operand:<VFML> 3 "s_register_operand" "<VF_constraint>") | |
1838 | (match_operand:<VFML> 5 "vect_par_constant_high" ""))) | |
1839 | (match_operand:VCVTF 1 "s_register_operand" "0")))] | |
1840 | "TARGET_FP16FML" | |
1841 | "vfmsl.f16\\t%<V_reg>0, %<V_hi>2, %<V_hi>3" | |
1842 | [(set_attr "type" "neon_fp_mla_s<q>")] | |
1843 | ) | |
1844 | ||
1845 | (define_insn "vfmal_high<mode>_intrinsic" | |
1846 | [(set (match_operand:VCVTF 0 "s_register_operand" "=w") | |
1847 | (fma:VCVTF | |
1848 | (float_extend:VCVTF | |
1849 | (vec_select:<VFMLSEL> | |
1850 | (match_operand:<VFML> 2 "s_register_operand" "<VF_constraint>") | |
1851 | (match_operand:<VFML> 4 "vect_par_constant_high" ""))) | |
1852 | (float_extend:VCVTF | |
1853 | (vec_select:<VFMLSEL> | |
1854 | (match_operand:<VFML> 3 "s_register_operand" "<VF_constraint>") | |
1855 | (match_operand:<VFML> 5 "vect_par_constant_high" ""))) | |
1856 | (match_operand:VCVTF 1 "s_register_operand" "0")))] | |
1857 | "TARGET_FP16FML" | |
1858 | "vfmal.f16\\t%<V_reg>0, %<V_hi>2, %<V_hi>3" | |
1859 | [(set_attr "type" "neon_fp_mla_s<q>")] | |
1860 | ) | |
1861 | ||
1862 | (define_insn "vfmsl_low<mode>_intrinsic" | |
1863 | [(set (match_operand:VCVTF 0 "s_register_operand" "=w") | |
1864 | (fma:VCVTF | |
1865 | (float_extend:VCVTF | |
1866 | (neg:<VFMLSEL> | |
1867 | (vec_select:<VFMLSEL> | |
1868 | (match_operand:<VFML> 2 "s_register_operand" "<VF_constraint>") | |
1869 | (match_operand:<VFML> 4 "vect_par_constant_low" "")))) | |
1870 | (float_extend:VCVTF | |
1871 | (vec_select:<VFMLSEL> | |
1872 | (match_operand:<VFML> 3 "s_register_operand" "<VF_constraint>") | |
1873 | (match_operand:<VFML> 5 "vect_par_constant_low" ""))) | |
1874 | (match_operand:VCVTF 1 "s_register_operand" "0")))] | |
1875 | "TARGET_FP16FML" | |
1876 | "vfmsl.f16\\t%<V_reg>0, %<V_lo>2, %<V_lo>3" | |
1877 | [(set_attr "type" "neon_fp_mla_s<q>")] | |
1878 | ) | |
1879 | ||
eccf4d70 KT |
1880 | (define_expand "neon_vfm<vfml_op>l_lane_<vfml_half><VCVTF:mode>" |
1881 | [(set:VCVTF (match_operand:VCVTF 0 "s_register_operand") | |
1882 | (unspec:VCVTF | |
1883 | [(match_operand:VCVTF 1 "s_register_operand") | |
1884 | (PLUSMINUS:<VFML> | |
1885 | (match_operand:<VFML> 2 "s_register_operand") | |
1886 | (match_operand:<VFML> 3 "s_register_operand")) | |
1887 | (match_operand:SI 4 "const_int_operand")] VFMLHALVES))] | |
1888 | "TARGET_FP16FML" | |
1889 | { | |
1890 | rtx lane = GEN_INT (NEON_ENDIAN_LANE_N (<VFML>mode, INTVAL (operands[4]))); | |
1891 | rtx half = arm_simd_vect_par_cnst_half (<VFML>mode, <vfml_half_selector>); | |
1892 | emit_insn (gen_vfm<vfml_op>l_lane_<vfml_half><mode>_intrinsic | |
1893 | (operands[0], operands[1], | |
1894 | operands[2], operands[3], | |
1895 | half, lane)); | |
1896 | DONE; | |
1897 | }) | |
1898 | ||
1899 | (define_insn "vfmal_lane_low<mode>_intrinsic" | |
1900 | [(set (match_operand:VCVTF 0 "s_register_operand" "=w") | |
1901 | (fma:VCVTF | |
1902 | (float_extend:VCVTF | |
1903 | (vec_select:<VFMLSEL> | |
1904 | (match_operand:<VFML> 2 "s_register_operand" "<VF_constraint>") | |
1905 | (match_operand:<VFML> 4 "vect_par_constant_low" ""))) | |
1906 | (float_extend:VCVTF | |
1907 | (vec_duplicate:<VFMLSEL> | |
1908 | (vec_select:HF | |
1909 | (match_operand:<VFML> 3 "s_register_operand" "x") | |
1910 | (parallel [(match_operand:SI 5 "const_int_operand" "n")])))) | |
1911 | (match_operand:VCVTF 1 "s_register_operand" "0")))] | |
1912 | "TARGET_FP16FML" | |
1913 | { | |
1914 | int lane = NEON_ENDIAN_LANE_N (<VFML>mode, INTVAL (operands[5])); | |
1915 | if (lane > GET_MODE_NUNITS (<VFMLSEL>mode) - 1) | |
1916 | { | |
1917 | operands[5] = GEN_INT (lane - GET_MODE_NUNITS (<VFMLSEL>mode)); | |
1918 | return "vfmal.f16\\t%<V_reg>0, %<V_lo>2, %<V_hi>3[%c5]"; | |
1919 | } | |
1920 | else | |
1921 | { | |
1922 | operands[5] = GEN_INT (lane); | |
1923 | return "vfmal.f16\\t%<V_reg>0, %<V_lo>2, %<V_lo>3[%c5]"; | |
1924 | } | |
1925 | } | |
1926 | [(set_attr "type" "neon_fp_mla_s<q>")] | |
1927 | ) | |
1928 | ||
1929 | (define_expand "neon_vfm<vfml_op>l_lane_<vfml_half><vfmlsel2><mode>" | |
1930 | [(set:VCVTF (match_operand:VCVTF 0 "s_register_operand") | |
1931 | (unspec:VCVTF | |
1932 | [(match_operand:VCVTF 1 "s_register_operand") | |
1933 | (PLUSMINUS:<VFML> | |
1934 | (match_operand:<VFML> 2 "s_register_operand") | |
1935 | (match_operand:<VFMLSEL2> 3 "s_register_operand")) | |
1936 | (match_operand:SI 4 "const_int_operand")] VFMLHALVES))] | |
1937 | "TARGET_FP16FML" | |
1938 | { | |
1939 | rtx lane | |
1940 | = GEN_INT (NEON_ENDIAN_LANE_N (<VFMLSEL2>mode, INTVAL (operands[4]))); | |
1941 | rtx half = arm_simd_vect_par_cnst_half (<VFML>mode, <vfml_half_selector>); | |
1942 | emit_insn (gen_vfm<vfml_op>l_lane_<vfml_half><vfmlsel2><mode>_intrinsic | |
1943 | (operands[0], operands[1], operands[2], operands[3], | |
1944 | half, lane)); | |
1945 | DONE; | |
1946 | }) | |
1947 | ||
1948 | ;; Used to implement the intrinsics: | |
99cf78cf TC |
1949 | ;; float32x4_t vfmlalq_lane_low_f16 (float32x4_t r, float16x8_t a, float16x4_t b, const int lane) |
1950 | ;; float32x2_t vfmlal_laneq_low_f16 (float32x2_t r, float16x4_t a, float16x8_t b, const int lane) | |
eccf4d70 KT |
1951 | ;; Needs a bit of care to get the modes of the different sub-expressions right |
1952 | ;; due to 'a' and 'b' having different sizes and make sure we use the right | |
1953 | ;; S or D subregister to select the appropriate lane from. | |
1954 | ||
1955 | (define_insn "vfmal_lane_low<vfmlsel2><mode>_intrinsic" | |
1956 | [(set (match_operand:VCVTF 0 "s_register_operand" "=w") | |
1957 | (fma:VCVTF | |
1958 | (float_extend:VCVTF | |
1959 | (vec_select:<VFMLSEL> | |
1960 | (match_operand:<VFML> 2 "s_register_operand" "<VF_constraint>") | |
1961 | (match_operand:<VFML> 4 "vect_par_constant_low" ""))) | |
1962 | (float_extend:VCVTF | |
1963 | (vec_duplicate:<VFMLSEL> | |
1964 | (vec_select:HF | |
1965 | (match_operand:<VFMLSEL2> 3 "s_register_operand" "x") | |
1966 | (parallel [(match_operand:SI 5 "const_int_operand" "n")])))) | |
1967 | (match_operand:VCVTF 1 "s_register_operand" "0")))] | |
1968 | "TARGET_FP16FML" | |
1969 | { | |
1970 | int lane = NEON_ENDIAN_LANE_N (<VFMLSEL2>mode, INTVAL (operands[5])); | |
1971 | int elts_per_reg = GET_MODE_NUNITS (<VFMLSEL>mode); | |
1972 | int new_lane = lane % elts_per_reg; | |
1973 | int regdiff = lane / elts_per_reg; | |
1974 | operands[5] = GEN_INT (new_lane); | |
1975 | /* We re-create operands[2] and operands[3] in the halved VFMLSEL modes | |
1976 | because we want the print_operand code to print the appropriate | |
1977 | S or D register prefix. */ | |
1978 | operands[3] = gen_rtx_REG (<VFMLSEL>mode, REGNO (operands[3]) + regdiff); | |
1979 | operands[2] = gen_rtx_REG (<VFMLSEL>mode, REGNO (operands[2])); | |
1980 | return "vfmal.f16\\t%<V_reg>0, %<V_lane_reg>2, %<V_lane_reg>3[%c5]"; | |
1981 | } | |
1982 | [(set_attr "type" "neon_fp_mla_s<q>")] | |
1983 | ) | |
1984 | ||
1985 | ;; Used to implement the intrinsics: | |
99cf78cf TC |
1986 | ;; float32x4_t vfmlalq_lane_high_f16 (float32x4_t r, float16x8_t a, float16x4_t b, const int lane) |
1987 | ;; float32x2_t vfmlal_laneq_high_f16 (float32x2_t r, float16x4_t a, float16x8_t b, const int lane) | |
eccf4d70 KT |
1988 | ;; Needs a bit of care to get the modes of the different sub-expressions right |
1989 | ;; due to 'a' and 'b' having different sizes and make sure we use the right | |
1990 | ;; S or D subregister to select the appropriate lane from. | |
1991 | ||
1992 | (define_insn "vfmal_lane_high<vfmlsel2><mode>_intrinsic" | |
1993 | [(set (match_operand:VCVTF 0 "s_register_operand" "=w") | |
1994 | (fma:VCVTF | |
1995 | (float_extend:VCVTF | |
1996 | (vec_select:<VFMLSEL> | |
1997 | (match_operand:<VFML> 2 "s_register_operand" "<VF_constraint>") | |
1998 | (match_operand:<VFML> 4 "vect_par_constant_high" ""))) | |
1999 | (float_extend:VCVTF | |
2000 | (vec_duplicate:<VFMLSEL> | |
2001 | (vec_select:HF | |
2002 | (match_operand:<VFMLSEL2> 3 "s_register_operand" "x") | |
2003 | (parallel [(match_operand:SI 5 "const_int_operand" "n")])))) | |
2004 | (match_operand:VCVTF 1 "s_register_operand" "0")))] | |
2005 | "TARGET_FP16FML" | |
2006 | { | |
2007 | int lane = NEON_ENDIAN_LANE_N (<VFMLSEL2>mode, INTVAL (operands[5])); | |
2008 | int elts_per_reg = GET_MODE_NUNITS (<VFMLSEL>mode); | |
2009 | int new_lane = lane % elts_per_reg; | |
2010 | int regdiff = lane / elts_per_reg; | |
2011 | operands[5] = GEN_INT (new_lane); | |
2012 | /* We re-create operands[3] in the halved VFMLSEL mode | |
2013 | because we've calculated the correct half-width subreg to extract | |
2014 | the lane from and we want to print *that* subreg instead. */ | |
2015 | operands[3] = gen_rtx_REG (<VFMLSEL>mode, REGNO (operands[3]) + regdiff); | |
2016 | return "vfmal.f16\\t%<V_reg>0, %<V_hi>2, %<V_lane_reg>3[%c5]"; | |
2017 | } | |
2018 | [(set_attr "type" "neon_fp_mla_s<q>")] | |
2019 | ) | |
2020 | ||
2021 | (define_insn "vfmal_lane_high<mode>_intrinsic" | |
2022 | [(set (match_operand:VCVTF 0 "s_register_operand" "=w") | |
2023 | (fma:VCVTF | |
2024 | (float_extend:VCVTF | |
2025 | (vec_select:<VFMLSEL> | |
2026 | (match_operand:<VFML> 2 "s_register_operand" "<VF_constraint>") | |
2027 | (match_operand:<VFML> 4 "vect_par_constant_high" ""))) | |
2028 | (float_extend:VCVTF | |
2029 | (vec_duplicate:<VFMLSEL> | |
2030 | (vec_select:HF | |
2031 | (match_operand:<VFML> 3 "s_register_operand" "x") | |
2032 | (parallel [(match_operand:SI 5 "const_int_operand" "n")])))) | |
2033 | (match_operand:VCVTF 1 "s_register_operand" "0")))] | |
2034 | "TARGET_FP16FML" | |
2035 | { | |
2036 | int lane = NEON_ENDIAN_LANE_N (<VFML>mode, INTVAL (operands[5])); | |
2037 | if (lane > GET_MODE_NUNITS (<VFMLSEL>mode) - 1) | |
2038 | { | |
2039 | operands[5] = GEN_INT (lane - GET_MODE_NUNITS (<VFMLSEL>mode)); | |
2040 | return "vfmal.f16\\t%<V_reg>0, %<V_hi>2, %<V_hi>3[%c5]"; | |
2041 | } | |
2042 | else | |
2043 | { | |
2044 | operands[5] = GEN_INT (lane); | |
2045 | return "vfmal.f16\\t%<V_reg>0, %<V_hi>2, %<V_lo>3[%c5]"; | |
2046 | } | |
2047 | } | |
2048 | [(set_attr "type" "neon_fp_mla_s<q>")] | |
2049 | ) | |
2050 | ||
2051 | (define_insn "vfmsl_lane_low<mode>_intrinsic" | |
2052 | [(set (match_operand:VCVTF 0 "s_register_operand" "=w") | |
2053 | (fma:VCVTF | |
2054 | (float_extend:VCVTF | |
2055 | (neg:<VFMLSEL> | |
2056 | (vec_select:<VFMLSEL> | |
2057 | (match_operand:<VFML> 2 "s_register_operand" "<VF_constraint>") | |
2058 | (match_operand:<VFML> 4 "vect_par_constant_low" "")))) | |
2059 | (float_extend:VCVTF | |
2060 | (vec_duplicate:<VFMLSEL> | |
2061 | (vec_select:HF | |
2062 | (match_operand:<VFML> 3 "s_register_operand" "x") | |
2063 | (parallel [(match_operand:SI 5 "const_int_operand" "n")])))) | |
2064 | (match_operand:VCVTF 1 "s_register_operand" "0")))] | |
2065 | "TARGET_FP16FML" | |
2066 | { | |
2067 | int lane = NEON_ENDIAN_LANE_N (<VFML>mode, INTVAL (operands[5])); | |
2068 | if (lane > GET_MODE_NUNITS (<VFMLSEL>mode) - 1) | |
2069 | { | |
2070 | operands[5] = GEN_INT (lane - GET_MODE_NUNITS (<VFMLSEL>mode)); | |
2071 | return "vfmsl.f16\\t%<V_reg>0, %<V_lo>2, %<V_hi>3[%c5]"; | |
2072 | } | |
2073 | else | |
2074 | { | |
2075 | operands[5] = GEN_INT (lane); | |
2076 | return "vfmsl.f16\\t%<V_reg>0, %<V_lo>2, %<V_lo>3[%c5]"; | |
2077 | } | |
2078 | } | |
2079 | [(set_attr "type" "neon_fp_mla_s<q>")] | |
2080 | ) | |
2081 | ||
2082 | ;; Used to implement the intrinsics: | |
99cf78cf TC |
2083 | ;; float32x4_t vfmlslq_lane_low_f16 (float32x4_t r, float16x8_t a, float16x4_t b, const int lane) |
2084 | ;; float32x2_t vfmlsl_laneq_low_f16 (float32x2_t r, float16x4_t a, float16x8_t b, const int lane) | |
eccf4d70 KT |
2085 | ;; Needs a bit of care to get the modes of the different sub-expressions right |
2086 | ;; due to 'a' and 'b' having different sizes and make sure we use the right | |
2087 | ;; S or D subregister to select the appropriate lane from. | |
2088 | ||
2089 | (define_insn "vfmsl_lane_low<vfmlsel2><mode>_intrinsic" | |
2090 | [(set (match_operand:VCVTF 0 "s_register_operand" "=w") | |
2091 | (fma:VCVTF | |
2092 | (float_extend:VCVTF | |
2093 | (neg:<VFMLSEL> | |
2094 | (vec_select:<VFMLSEL> | |
2095 | (match_operand:<VFML> 2 "s_register_operand" "<VF_constraint>") | |
2096 | (match_operand:<VFML> 4 "vect_par_constant_low" "")))) | |
2097 | (float_extend:VCVTF | |
2098 | (vec_duplicate:<VFMLSEL> | |
2099 | (vec_select:HF | |
2100 | (match_operand:<VFMLSEL2> 3 "s_register_operand" "x") | |
2101 | (parallel [(match_operand:SI 5 "const_int_operand" "n")])))) | |
2102 | (match_operand:VCVTF 1 "s_register_operand" "0")))] | |
2103 | "TARGET_FP16FML" | |
2104 | { | |
2105 | int lane = NEON_ENDIAN_LANE_N (<VFMLSEL2>mode, INTVAL (operands[5])); | |
2106 | int elts_per_reg = GET_MODE_NUNITS (<VFMLSEL>mode); | |
2107 | int new_lane = lane % elts_per_reg; | |
2108 | int regdiff = lane / elts_per_reg; | |
2109 | operands[5] = GEN_INT (new_lane); | |
2110 | /* We re-create operands[2] and operands[3] in the halved VFMLSEL modes | |
2111 | because we want the print_operand code to print the appropriate | |
2112 | S or D register prefix. */ | |
2113 | operands[3] = gen_rtx_REG (<VFMLSEL>mode, REGNO (operands[3]) + regdiff); | |
2114 | operands[2] = gen_rtx_REG (<VFMLSEL>mode, REGNO (operands[2])); | |
2115 | return "vfmsl.f16\\t%<V_reg>0, %<V_lane_reg>2, %<V_lane_reg>3[%c5]"; | |
2116 | } | |
2117 | [(set_attr "type" "neon_fp_mla_s<q>")] | |
2118 | ) | |
2119 | ||
2120 | ;; Used to implement the intrinsics: | |
99cf78cf TC |
2121 | ;; float32x4_t vfmlslq_lane_high_f16 (float32x4_t r, float16x8_t a, float16x4_t b, const int lane) |
2122 | ;; float32x2_t vfmlsl_laneq_high_f16 (float32x2_t r, float16x4_t a, float16x8_t b, const int lane) | |
eccf4d70 KT |
2123 | ;; Needs a bit of care to get the modes of the different sub-expressions right |
2124 | ;; due to 'a' and 'b' having different sizes and make sure we use the right | |
2125 | ;; S or D subregister to select the appropriate lane from. | |
2126 | ||
2127 | (define_insn "vfmsl_lane_high<vfmlsel2><mode>_intrinsic" | |
2128 | [(set (match_operand:VCVTF 0 "s_register_operand" "=w") | |
2129 | (fma:VCVTF | |
2130 | (float_extend:VCVTF | |
2131 | (neg:<VFMLSEL> | |
2132 | (vec_select:<VFMLSEL> | |
2133 | (match_operand:<VFML> 2 "s_register_operand" "<VF_constraint>") | |
2134 | (match_operand:<VFML> 4 "vect_par_constant_high" "")))) | |
2135 | (float_extend:VCVTF | |
2136 | (vec_duplicate:<VFMLSEL> | |
2137 | (vec_select:HF | |
2138 | (match_operand:<VFMLSEL2> 3 "s_register_operand" "x") | |
2139 | (parallel [(match_operand:SI 5 "const_int_operand" "n")])))) | |
2140 | (match_operand:VCVTF 1 "s_register_operand" "0")))] | |
2141 | "TARGET_FP16FML" | |
2142 | { | |
2143 | int lane = NEON_ENDIAN_LANE_N (<VFMLSEL2>mode, INTVAL (operands[5])); | |
2144 | int elts_per_reg = GET_MODE_NUNITS (<VFMLSEL>mode); | |
2145 | int new_lane = lane % elts_per_reg; | |
2146 | int regdiff = lane / elts_per_reg; | |
2147 | operands[5] = GEN_INT (new_lane); | |
2148 | /* We re-create operands[3] in the halved VFMLSEL mode | |
2149 | because we've calculated the correct half-width subreg to extract | |
2150 | the lane from and we want to print *that* subreg instead. */ | |
2151 | operands[3] = gen_rtx_REG (<VFMLSEL>mode, REGNO (operands[3]) + regdiff); | |
2152 | return "vfmsl.f16\\t%<V_reg>0, %<V_hi>2, %<V_lane_reg>3[%c5]"; | |
2153 | } | |
2154 | [(set_attr "type" "neon_fp_mla_s<q>")] | |
2155 | ) | |
2156 | ||
2157 | (define_insn "vfmsl_lane_high<mode>_intrinsic" | |
2158 | [(set (match_operand:VCVTF 0 "s_register_operand" "=w") | |
2159 | (fma:VCVTF | |
2160 | (float_extend:VCVTF | |
2161 | (neg:<VFMLSEL> | |
2162 | (vec_select:<VFMLSEL> | |
2163 | (match_operand:<VFML> 2 "s_register_operand" "<VF_constraint>") | |
2164 | (match_operand:<VFML> 4 "vect_par_constant_high" "")))) | |
2165 | (float_extend:VCVTF | |
2166 | (vec_duplicate:<VFMLSEL> | |
2167 | (vec_select:HF | |
2168 | (match_operand:<VFML> 3 "s_register_operand" "x") | |
2169 | (parallel [(match_operand:SI 5 "const_int_operand" "n")])))) | |
2170 | (match_operand:VCVTF 1 "s_register_operand" "0")))] | |
2171 | "TARGET_FP16FML" | |
2172 | { | |
2173 | int lane = NEON_ENDIAN_LANE_N (<VFML>mode, INTVAL (operands[5])); | |
2174 | if (lane > GET_MODE_NUNITS (<VFMLSEL>mode) - 1) | |
2175 | { | |
2176 | operands[5] = GEN_INT (lane - GET_MODE_NUNITS (<VFMLSEL>mode)); | |
2177 | return "vfmsl.f16\\t%<V_reg>0, %<V_hi>2, %<V_hi>3[%c5]"; | |
2178 | } | |
2179 | else | |
2180 | { | |
2181 | operands[5] = GEN_INT (lane); | |
2182 | return "vfmsl.f16\\t%<V_reg>0, %<V_hi>2, %<V_lo>3[%c5]"; | |
2183 | } | |
2184 | } | |
2185 | [(set_attr "type" "neon_fp_mla_s<q>")] | |
2186 | ) | |
2187 | ||
bab53516 SL |
2188 | ; Used for intrinsics when flag_unsafe_math_optimizations is false. |
2189 | ||
2190 | (define_insn "neon_vmla<mode>_unspec" | |
f7379e5e JG |
2191 | [(set (match_operand:VDQW 0 "s_register_operand" "=w") |
2192 | (unspec:VDQW [(match_operand:VDQW 1 "s_register_operand" "0") | |
2193 | (match_operand:VDQW 2 "s_register_operand" "w") | |
2194 | (match_operand:VDQW 3 "s_register_operand" "w")] | |
bab53516 | 2195 | UNSPEC_VMLA))] |
88f77cba | 2196 | "TARGET_NEON" |
c956e102 | 2197 | "vmla.<V_if_elem>\t%<V_reg>0, %<V_reg>2, %<V_reg>3" |
003bb7f3 | 2198 | [(set (attr "type") |
b75b1be2 | 2199 | (if_then_else (match_test "<Is_float_mode>") |
f7379e5e JG |
2200 | (const_string "neon_fp_mla_s<q>") |
2201 | (const_string "neon_mla_<V_elem_ch><q>")))] | |
c956e102 | 2202 | ) |
88f77cba | 2203 | |
94f0f2cc | 2204 | (define_insn "neon_vmlal<sup><mode>" |
88f77cba JB |
2205 | [(set (match_operand:<V_widen> 0 "s_register_operand" "=w") |
2206 | (unspec:<V_widen> [(match_operand:<V_widen> 1 "s_register_operand" "0") | |
2207 | (match_operand:VW 2 "s_register_operand" "w") | |
94f0f2cc JG |
2208 | (match_operand:VW 3 "s_register_operand" "w")] |
2209 | VMLAL))] | |
88f77cba | 2210 | "TARGET_NEON" |
94f0f2cc | 2211 | "vmlal.<sup>%#<V_sz_elem>\t%q0, %P2, %P3" |
f7379e5e | 2212 | [(set_attr "type" "neon_mla_<V_elem_ch>_long")] |
c956e102 | 2213 | ) |
88f77cba | 2214 | |
bab53516 | 2215 | (define_expand "neon_vmls<mode>" |
cd65e265 DZ |
2216 | [(match_operand:VDQW 0 "s_register_operand") |
2217 | (match_operand:VDQW 1 "s_register_operand") | |
2218 | (match_operand:VDQW 2 "s_register_operand") | |
2219 | (match_operand:VDQW 3 "s_register_operand")] | |
bab53516 SL |
2220 | "TARGET_NEON" |
2221 | { | |
bb78e587 | 2222 | if (ARM_HAVE_NEON_<MODE>_ARITH) |
bab53516 SL |
2223 | emit_insn (gen_mul<mode>3neg<mode>add<mode>_neon (operands[0], |
2224 | operands[1], operands[2], operands[3])); | |
2225 | else | |
2226 | emit_insn (gen_neon_vmls<mode>_unspec (operands[0], operands[1], | |
2227 | operands[2], operands[3])); | |
2228 | DONE; | |
2229 | }) | |
2230 | ||
2231 | ; Used for intrinsics when flag_unsafe_math_optimizations is false. | |
2232 | ||
2233 | (define_insn "neon_vmls<mode>_unspec" | |
f7379e5e JG |
2234 | [(set (match_operand:VDQW 0 "s_register_operand" "=w") |
2235 | (unspec:VDQW [(match_operand:VDQW 1 "s_register_operand" "0") | |
2236 | (match_operand:VDQW 2 "s_register_operand" "w") | |
2237 | (match_operand:VDQW 3 "s_register_operand" "w")] | |
bab53516 | 2238 | UNSPEC_VMLS))] |
88f77cba | 2239 | "TARGET_NEON" |
c956e102 | 2240 | "vmls.<V_if_elem>\t%<V_reg>0, %<V_reg>2, %<V_reg>3" |
003bb7f3 | 2241 | [(set (attr "type") |
b75b1be2 | 2242 | (if_then_else (match_test "<Is_float_mode>") |
f7379e5e JG |
2243 | (const_string "neon_fp_mla_s<q>") |
2244 | (const_string "neon_mla_<V_elem_ch><q>")))] | |
c956e102 | 2245 | ) |
88f77cba | 2246 | |
94f0f2cc | 2247 | (define_insn "neon_vmlsl<sup><mode>" |
88f77cba JB |
2248 | [(set (match_operand:<V_widen> 0 "s_register_operand" "=w") |
2249 | (unspec:<V_widen> [(match_operand:<V_widen> 1 "s_register_operand" "0") | |
2250 | (match_operand:VW 2 "s_register_operand" "w") | |
94f0f2cc JG |
2251 | (match_operand:VW 3 "s_register_operand" "w")] |
2252 | VMLSL))] | |
88f77cba | 2253 | "TARGET_NEON" |
94f0f2cc | 2254 | "vmlsl.<sup>%#<V_sz_elem>\t%q0, %P2, %P3" |
f7379e5e | 2255 | [(set_attr "type" "neon_mla_<V_elem_ch>_long")] |
c956e102 | 2256 | ) |
88f77cba | 2257 | |
94f0f2cc JG |
2258 | ;; vqdmulh, vqrdmulh |
2259 | (define_insn "neon_vq<r>dmulh<mode>" | |
88f77cba JB |
2260 | [(set (match_operand:VMDQI 0 "s_register_operand" "=w") |
2261 | (unspec:VMDQI [(match_operand:VMDQI 1 "s_register_operand" "w") | |
94f0f2cc JG |
2262 | (match_operand:VMDQI 2 "s_register_operand" "w")] |
2263 | VQDMULH))] | |
88f77cba | 2264 | "TARGET_NEON" |
94f0f2cc | 2265 | "vq<r>dmulh.<V_s_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2" |
f7379e5e | 2266 | [(set_attr "type" "neon_sat_mul_<V_elem_ch><q>")] |
c956e102 | 2267 | ) |
88f77cba | 2268 | |
5f2ca3b2 MW |
2269 | ;; vqrdmlah, vqrdmlsh |
2270 | (define_insn "neon_vqrdml<VQRDMLH_AS:neon_rdma_as>h<mode>" | |
2271 | [(set (match_operand:VMDQI 0 "s_register_operand" "=w") | |
2272 | (unspec:VMDQI [(match_operand:VMDQI 1 "s_register_operand" "0") | |
2273 | (match_operand:VMDQI 2 "s_register_operand" "w") | |
2274 | (match_operand:VMDQI 3 "s_register_operand" "w")] | |
2275 | VQRDMLH_AS))] | |
2276 | "TARGET_NEON_RDMA" | |
2277 | "vqrdml<VQRDMLH_AS:neon_rdma_as>h.<V_s_elem>\t%<V_reg>0, %<V_reg>2, %<V_reg>3" | |
2278 | [(set_attr "type" "neon_sat_mla_<V_elem_ch>_long")] | |
2279 | ) | |
2280 | ||
88f77cba JB |
2281 | (define_insn "neon_vqdmlal<mode>" |
2282 | [(set (match_operand:<V_widen> 0 "s_register_operand" "=w") | |
2283 | (unspec:<V_widen> [(match_operand:<V_widen> 1 "s_register_operand" "0") | |
2284 | (match_operand:VMDI 2 "s_register_operand" "w") | |
94f0f2cc | 2285 | (match_operand:VMDI 3 "s_register_operand" "w")] |
88f77cba JB |
2286 | UNSPEC_VQDMLAL))] |
2287 | "TARGET_NEON" | |
c956e102 | 2288 | "vqdmlal.<V_s_elem>\t%q0, %P2, %P3" |
f7379e5e | 2289 | [(set_attr "type" "neon_sat_mla_<V_elem_ch>_long")] |
c956e102 | 2290 | ) |
88f77cba JB |
2291 | |
2292 | (define_insn "neon_vqdmlsl<mode>" | |
2293 | [(set (match_operand:<V_widen> 0 "s_register_operand" "=w") | |
2294 | (unspec:<V_widen> [(match_operand:<V_widen> 1 "s_register_operand" "0") | |
2295 | (match_operand:VMDI 2 "s_register_operand" "w") | |
94f0f2cc | 2296 | (match_operand:VMDI 3 "s_register_operand" "w")] |
88f77cba JB |
2297 | UNSPEC_VQDMLSL))] |
2298 | "TARGET_NEON" | |
c956e102 | 2299 | "vqdmlsl.<V_s_elem>\t%q0, %P2, %P3" |
f7379e5e | 2300 | [(set_attr "type" "neon_sat_mla_<V_elem_ch>_long")] |
c956e102 | 2301 | ) |
88f77cba | 2302 | |
94f0f2cc | 2303 | (define_insn "neon_vmull<sup><mode>" |
88f77cba JB |
2304 | [(set (match_operand:<V_widen> 0 "s_register_operand" "=w") |
2305 | (unspec:<V_widen> [(match_operand:VW 1 "s_register_operand" "w") | |
94f0f2cc JG |
2306 | (match_operand:VW 2 "s_register_operand" "w")] |
2307 | VMULL))] | |
88f77cba | 2308 | "TARGET_NEON" |
94f0f2cc | 2309 | "vmull.<sup>%#<V_sz_elem>\t%q0, %P1, %P2" |
f7379e5e | 2310 | [(set_attr "type" "neon_mul_<V_elem_ch>_long")] |
c956e102 | 2311 | ) |
88f77cba JB |
2312 | |
2313 | (define_insn "neon_vqdmull<mode>" | |
2314 | [(set (match_operand:<V_widen> 0 "s_register_operand" "=w") | |
2315 | (unspec:<V_widen> [(match_operand:VMDI 1 "s_register_operand" "w") | |
94f0f2cc | 2316 | (match_operand:VMDI 2 "s_register_operand" "w")] |
88f77cba JB |
2317 | UNSPEC_VQDMULL))] |
2318 | "TARGET_NEON" | |
c956e102 | 2319 | "vqdmull.<V_s_elem>\t%q0, %P1, %P2" |
f7379e5e | 2320 | [(set_attr "type" "neon_sat_mul_<V_elem_ch>_long")] |
c956e102 | 2321 | ) |
88f77cba | 2322 | |
bab53516 | 2323 | (define_expand "neon_vsub<mode>" |
cd65e265 DZ |
2324 | [(match_operand:VCVTF 0 "s_register_operand") |
2325 | (match_operand:VCVTF 1 "s_register_operand") | |
2326 | (match_operand:VCVTF 2 "s_register_operand")] | |
bab53516 SL |
2327 | "TARGET_NEON" |
2328 | { | |
bb78e587 | 2329 | if (ARM_HAVE_NEON_<MODE>_ARITH) |
bab53516 SL |
2330 | emit_insn (gen_sub<mode>3 (operands[0], operands[1], operands[2])); |
2331 | else | |
2332 | emit_insn (gen_neon_vsub<mode>_unspec (operands[0], operands[1], | |
2333 | operands[2])); | |
2334 | DONE; | |
2335 | }) | |
2336 | ||
2337 | ; Used for intrinsics when flag_unsafe_math_optimizations is false. | |
2338 | ||
2339 | (define_insn "neon_vsub<mode>_unspec" | |
0d0b79a6 RR |
2340 | [(set (match_operand:VCVTF 0 "s_register_operand" "=w") |
2341 | (unspec:VCVTF [(match_operand:VCVTF 1 "s_register_operand" "w") | |
2342 | (match_operand:VCVTF 2 "s_register_operand" "w")] | |
88f77cba JB |
2343 | UNSPEC_VSUB))] |
2344 | "TARGET_NEON" | |
c956e102 | 2345 | "vsub.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2" |
003bb7f3 | 2346 | [(set (attr "type") |
b75b1be2 | 2347 | (if_then_else (match_test "<Is_float_mode>") |
f7379e5e JG |
2348 | (const_string "neon_fp_addsub_s<q>") |
2349 | (const_string "neon_sub<q>")))] | |
c956e102 | 2350 | ) |
88f77cba | 2351 | |
94f0f2cc | 2352 | (define_insn "neon_vsubl<sup><mode>" |
88f77cba JB |
2353 | [(set (match_operand:<V_widen> 0 "s_register_operand" "=w") |
2354 | (unspec:<V_widen> [(match_operand:VDI 1 "s_register_operand" "w") | |
94f0f2cc JG |
2355 | (match_operand:VDI 2 "s_register_operand" "w")] |
2356 | VSUBL))] | |
88f77cba | 2357 | "TARGET_NEON" |
94f0f2cc | 2358 | "vsubl.<sup>%#<V_sz_elem>\t%q0, %P1, %P2" |
f7379e5e | 2359 | [(set_attr "type" "neon_sub_long")] |
c956e102 | 2360 | ) |
88f77cba | 2361 | |
94f0f2cc | 2362 | (define_insn "neon_vsubw<sup><mode>" |
88f77cba JB |
2363 | [(set (match_operand:<V_widen> 0 "s_register_operand" "=w") |
2364 | (unspec:<V_widen> [(match_operand:<V_widen> 1 "s_register_operand" "w") | |
94f0f2cc JG |
2365 | (match_operand:VDI 2 "s_register_operand" "w")] |
2366 | VSUBW))] | |
88f77cba | 2367 | "TARGET_NEON" |
94f0f2cc | 2368 | "vsubw.<sup>%#<V_sz_elem>\t%q0, %q1, %P2" |
f7379e5e | 2369 | [(set_attr "type" "neon_sub_widen")] |
c956e102 | 2370 | ) |
88f77cba | 2371 | |
94f0f2cc | 2372 | (define_insn "neon_vqsub<sup><mode>" |
88f77cba JB |
2373 | [(set (match_operand:VDQIX 0 "s_register_operand" "=w") |
2374 | (unspec:VDQIX [(match_operand:VDQIX 1 "s_register_operand" "w") | |
94f0f2cc JG |
2375 | (match_operand:VDQIX 2 "s_register_operand" "w")] |
2376 | VQSUB))] | |
88f77cba | 2377 | "TARGET_NEON" |
94f0f2cc | 2378 | "vqsub.<sup>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2" |
f7379e5e | 2379 | [(set_attr "type" "neon_qsub<q>")] |
c956e102 | 2380 | ) |
88f77cba | 2381 | |
94f0f2cc | 2382 | (define_insn "neon_vhsub<sup><mode>" |
88f77cba JB |
2383 | [(set (match_operand:VDQIW 0 "s_register_operand" "=w") |
2384 | (unspec:VDQIW [(match_operand:VDQIW 1 "s_register_operand" "w") | |
94f0f2cc JG |
2385 | (match_operand:VDQIW 2 "s_register_operand" "w")] |
2386 | VHSUB))] | |
88f77cba | 2387 | "TARGET_NEON" |
94f0f2cc | 2388 | "vhsub.<sup>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2" |
f7379e5e | 2389 | [(set_attr "type" "neon_sub_halve<q>")] |
c956e102 | 2390 | ) |
88f77cba | 2391 | |
94f0f2cc | 2392 | (define_insn "neon_v<r>subhn<mode>" |
88f77cba JB |
2393 | [(set (match_operand:<V_narrow> 0 "s_register_operand" "=w") |
2394 | (unspec:<V_narrow> [(match_operand:VN 1 "s_register_operand" "w") | |
94f0f2cc JG |
2395 | (match_operand:VN 2 "s_register_operand" "w")] |
2396 | VSUBHN))] | |
88f77cba | 2397 | "TARGET_NEON" |
94f0f2cc | 2398 | "v<r>subhn.<V_if_elem>\t%P0, %q1, %q2" |
f7379e5e | 2399 | [(set_attr "type" "neon_sub_halve_narrow_q")] |
c956e102 | 2400 | ) |
88f77cba | 2401 | |
381811fa KT |
2402 | ;; These may expand to an UNSPEC pattern when a floating point mode is used |
2403 | ;; without unsafe math optimizations. | |
c2978b34 | 2404 | (define_expand "@neon_vc<cmp_op><mode>" |
cd65e265 | 2405 | [(match_operand:<V_cmp_result> 0 "s_register_operand") |
381811fa | 2406 | (neg:<V_cmp_result> |
cd65e265 DZ |
2407 | (COMPARISONS:VDQW (match_operand:VDQW 1 "s_register_operand") |
2408 | (match_operand:VDQW 2 "reg_or_zero_operand")))] | |
88f77cba | 2409 | "TARGET_NEON" |
381811fa KT |
2410 | { |
2411 | /* For FP comparisons use UNSPECS unless -funsafe-math-optimizations | |
2412 | are enabled. */ | |
2413 | if (GET_MODE_CLASS (<MODE>mode) == MODE_VECTOR_FLOAT | |
2414 | && !flag_unsafe_math_optimizations) | |
2415 | { | |
2416 | /* We don't just emit a gen_neon_vc<cmp_op><mode>_insn_unspec because | |
2417 | we define gen_neon_vceq<mode>_insn_unspec only for float modes | |
2418 | whereas this expander iterates over the integer modes as well, | |
2419 | but we will never expand to UNSPECs for the integer comparisons. */ | |
2420 | switch (<MODE>mode) | |
2421 | { | |
4e10a5a7 | 2422 | case E_V2SFmode: |
381811fa KT |
2423 | emit_insn (gen_neon_vc<cmp_op>v2sf_insn_unspec (operands[0], |
2424 | operands[1], | |
2425 | operands[2])); | |
2426 | break; | |
4e10a5a7 | 2427 | case E_V4SFmode: |
381811fa KT |
2428 | emit_insn (gen_neon_vc<cmp_op>v4sf_insn_unspec (operands[0], |
2429 | operands[1], | |
2430 | operands[2])); | |
2431 | break; | |
2432 | default: | |
2433 | gcc_unreachable (); | |
2434 | } | |
2435 | } | |
2436 | else | |
2437 | emit_insn (gen_neon_vc<cmp_op><mode>_insn (operands[0], | |
2438 | operands[1], | |
2439 | operands[2])); | |
2440 | DONE; | |
2441 | } | |
c956e102 | 2442 | ) |
88f77cba | 2443 | |
c2978b34 | 2444 | (define_insn "@neon_vc<cmp_op><mode>_insn" |
5bfc5baa | 2445 | [(set (match_operand:<V_cmp_result> 0 "s_register_operand" "=w,w") |
381811fa KT |
2446 | (neg:<V_cmp_result> |
2447 | (COMPARISONS:<V_cmp_result> | |
2448 | (match_operand:VDQW 1 "s_register_operand" "w,w") | |
2449 | (match_operand:VDQW 2 "reg_or_zero_operand" "w,Dz"))))] | |
2450 | "TARGET_NEON && !(GET_MODE_CLASS (<MODE>mode) == MODE_VECTOR_FLOAT | |
2451 | && !flag_unsafe_math_optimizations)" | |
2452 | { | |
2453 | char pattern[100]; | |
2454 | sprintf (pattern, "vc<cmp_op>.%s%%#<V_sz_elem>\t%%<V_reg>0," | |
2455 | " %%<V_reg>1, %s", | |
2456 | GET_MODE_CLASS (<MODE>mode) == MODE_VECTOR_FLOAT | |
2457 | ? "f" : "<cmp_type>", | |
2458 | which_alternative == 0 | |
2459 | ? "%<V_reg>2" : "#0"); | |
2460 | output_asm_insn (pattern, operands); | |
2461 | return ""; | |
2462 | } | |
003bb7f3 | 2463 | [(set (attr "type") |
381811fa | 2464 | (if_then_else (match_operand 2 "zero_operand") |
f7379e5e | 2465 | (const_string "neon_compare_zero<q>") |
381811fa | 2466 | (const_string "neon_compare<q>")))] |
c956e102 | 2467 | ) |
88f77cba | 2468 | |
381811fa | 2469 | (define_insn "neon_vc<cmp_op_unsp><mode>_insn_unspec" |
5bfc5baa JB |
2470 | [(set (match_operand:<V_cmp_result> 0 "s_register_operand" "=w,w") |
2471 | (unspec:<V_cmp_result> | |
381811fa KT |
2472 | [(match_operand:VCVTF 1 "s_register_operand" "w,w") |
2473 | (match_operand:VCVTF 2 "reg_or_zero_operand" "w,Dz")] | |
2474 | NEON_VCMP))] | |
88f77cba | 2475 | "TARGET_NEON" |
381811fa KT |
2476 | { |
2477 | char pattern[100]; | |
2478 | sprintf (pattern, "vc<cmp_op_unsp>.f%%#<V_sz_elem>\t%%<V_reg>0," | |
2479 | " %%<V_reg>1, %s", | |
2480 | which_alternative == 0 | |
2481 | ? "%<V_reg>2" : "#0"); | |
2482 | output_asm_insn (pattern, operands); | |
2483 | return ""; | |
2484 | } | |
2485 | [(set_attr "type" "neon_fp_compare_s<q>")] | |
c956e102 | 2486 | ) |
88f77cba | 2487 | |
c2978b34 | 2488 | (define_expand "@neon_vc<cmp_op><mode>" |
55a9b91b MW |
2489 | [(match_operand:<V_cmp_result> 0 "s_register_operand") |
2490 | (neg:<V_cmp_result> | |
2491 | (COMPARISONS:VH | |
2492 | (match_operand:VH 1 "s_register_operand") | |
2493 | (match_operand:VH 2 "reg_or_zero_operand")))] | |
2494 | "TARGET_NEON_FP16INST" | |
2495 | { | |
2496 | /* For FP comparisons use UNSPECS unless -funsafe-math-optimizations | |
2497 | are enabled. */ | |
2498 | if (GET_MODE_CLASS (<MODE>mode) == MODE_VECTOR_FLOAT | |
2499 | && !flag_unsafe_math_optimizations) | |
2500 | emit_insn | |
2501 | (gen_neon_vc<cmp_op><mode>_fp16insn_unspec | |
2502 | (operands[0], operands[1], operands[2])); | |
2503 | else | |
2504 | emit_insn | |
2505 | (gen_neon_vc<cmp_op><mode>_fp16insn | |
2506 | (operands[0], operands[1], operands[2])); | |
2507 | DONE; | |
2508 | }) | |
2509 | ||
2510 | (define_insn "neon_vc<cmp_op><mode>_fp16insn" | |
2511 | [(set (match_operand:<V_cmp_result> 0 "s_register_operand" "=w,w") | |
2512 | (neg:<V_cmp_result> | |
2513 | (COMPARISONS:<V_cmp_result> | |
2514 | (match_operand:VH 1 "s_register_operand" "w,w") | |
2515 | (match_operand:VH 2 "reg_or_zero_operand" "w,Dz"))))] | |
2516 | "TARGET_NEON_FP16INST | |
2517 | && !(GET_MODE_CLASS (<MODE>mode) == MODE_VECTOR_FLOAT | |
2518 | && !flag_unsafe_math_optimizations)" | |
2519 | { | |
2520 | char pattern[100]; | |
2521 | sprintf (pattern, "vc<cmp_op>.%s%%#<V_sz_elem>\t%%<V_reg>0," | |
2522 | " %%<V_reg>1, %s", | |
2523 | GET_MODE_CLASS (<MODE>mode) == MODE_VECTOR_FLOAT | |
2524 | ? "f" : "<cmp_type>", | |
2525 | which_alternative == 0 | |
2526 | ? "%<V_reg>2" : "#0"); | |
2527 | output_asm_insn (pattern, operands); | |
2528 | return ""; | |
2529 | } | |
2530 | [(set (attr "type") | |
2531 | (if_then_else (match_operand 2 "zero_operand") | |
2532 | (const_string "neon_compare_zero<q>") | |
2533 | (const_string "neon_compare<q>")))]) | |
2534 | ||
2535 | (define_insn "neon_vc<cmp_op_unsp><mode>_fp16insn_unspec" | |
2536 | [(set | |
2537 | (match_operand:<V_cmp_result> 0 "s_register_operand" "=w,w") | |
2538 | (unspec:<V_cmp_result> | |
2539 | [(match_operand:VH 1 "s_register_operand" "w,w") | |
2540 | (match_operand:VH 2 "reg_or_zero_operand" "w,Dz")] | |
2541 | NEON_VCMP))] | |
2542 | "TARGET_NEON_FP16INST" | |
2543 | { | |
2544 | char pattern[100]; | |
2545 | sprintf (pattern, "vc<cmp_op_unsp>.f%%#<V_sz_elem>\t%%<V_reg>0," | |
2546 | " %%<V_reg>1, %s", | |
2547 | which_alternative == 0 | |
2548 | ? "%<V_reg>2" : "#0"); | |
2549 | output_asm_insn (pattern, operands); | |
2550 | return ""; | |
2551 | } | |
2552 | [(set_attr "type" "neon_fp_compare_s<q>")]) | |
2553 | ||
c2978b34 | 2554 | (define_insn "@neon_vc<code><mode>" |
fd92bb80 | 2555 | [(set (match_operand:<V_cmp_result> 0 "s_register_operand" "=w") |
381811fa KT |
2556 | (neg:<V_cmp_result> |
2557 | (GTUGEU:<V_cmp_result> | |
2558 | (match_operand:VDQIW 1 "s_register_operand" "w") | |
2559 | (match_operand:VDQIW 2 "s_register_operand" "w"))))] | |
fd92bb80 | 2560 | "TARGET_NEON" |
381811fa | 2561 | "vc<cmp_op>.u%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2" |
f7379e5e | 2562 | [(set_attr "type" "neon_compare<q>")] |
fd92bb80 MGD |
2563 | ) |
2564 | ||
381811fa KT |
2565 | (define_expand "neon_vca<cmp_op><mode>" |
2566 | [(set (match_operand:<V_cmp_result> 0 "s_register_operand") | |
2567 | (neg:<V_cmp_result> | |
2568 | (GTGE:<V_cmp_result> | |
2569 | (abs:VCVTF (match_operand:VCVTF 1 "s_register_operand")) | |
2570 | (abs:VCVTF (match_operand:VCVTF 2 "s_register_operand")))))] | |
5bfc5baa | 2571 | "TARGET_NEON" |
381811fa KT |
2572 | { |
2573 | if (flag_unsafe_math_optimizations) | |
2574 | emit_insn (gen_neon_vca<cmp_op><mode>_insn (operands[0], operands[1], | |
2575 | operands[2])); | |
2576 | else | |
2577 | emit_insn (gen_neon_vca<cmp_op><mode>_insn_unspec (operands[0], | |
2578 | operands[1], | |
2579 | operands[2])); | |
2580 | DONE; | |
2581 | } | |
5bfc5baa JB |
2582 | ) |
2583 | ||
381811fa | 2584 | (define_insn "neon_vca<cmp_op><mode>_insn" |
88f77cba | 2585 | [(set (match_operand:<V_cmp_result> 0 "s_register_operand" "=w") |
381811fa KT |
2586 | (neg:<V_cmp_result> |
2587 | (GTGE:<V_cmp_result> | |
2588 | (abs:VCVTF (match_operand:VCVTF 1 "s_register_operand" "w")) | |
2589 | (abs:VCVTF (match_operand:VCVTF 2 "s_register_operand" "w")))))] | |
2590 | "TARGET_NEON && flag_unsafe_math_optimizations" | |
2591 | "vac<cmp_op>.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2" | |
f7379e5e | 2592 | [(set_attr "type" "neon_fp_compare_s<q>")] |
c956e102 | 2593 | ) |
88f77cba | 2594 | |
381811fa | 2595 | (define_insn "neon_vca<cmp_op_unsp><mode>_insn_unspec" |
88f77cba JB |
2596 | [(set (match_operand:<V_cmp_result> 0 "s_register_operand" "=w") |
2597 | (unspec:<V_cmp_result> [(match_operand:VCVTF 1 "s_register_operand" "w") | |
94f0f2cc | 2598 | (match_operand:VCVTF 2 "s_register_operand" "w")] |
381811fa | 2599 | NEON_VACMP))] |
88f77cba | 2600 | "TARGET_NEON" |
381811fa | 2601 | "vac<cmp_op_unsp>.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2" |
f7379e5e | 2602 | [(set_attr "type" "neon_fp_compare_s<q>")] |
c956e102 | 2603 | ) |
88f77cba | 2604 | |
55a9b91b MW |
2605 | (define_expand "neon_vca<cmp_op><mode>" |
2606 | [(set | |
2607 | (match_operand:<V_cmp_result> 0 "s_register_operand") | |
2608 | (neg:<V_cmp_result> | |
2609 | (GLTE:<V_cmp_result> | |
2610 | (abs:VH (match_operand:VH 1 "s_register_operand")) | |
2611 | (abs:VH (match_operand:VH 2 "s_register_operand")))))] | |
2612 | "TARGET_NEON_FP16INST" | |
2613 | { | |
2614 | if (flag_unsafe_math_optimizations) | |
2615 | emit_insn (gen_neon_vca<cmp_op><mode>_fp16insn | |
2616 | (operands[0], operands[1], operands[2])); | |
2617 | else | |
2618 | emit_insn (gen_neon_vca<cmp_op><mode>_fp16insn_unspec | |
2619 | (operands[0], operands[1], operands[2])); | |
2620 | DONE; | |
2621 | }) | |
2622 | ||
2623 | (define_insn "neon_vca<cmp_op><mode>_fp16insn" | |
2624 | [(set | |
2625 | (match_operand:<V_cmp_result> 0 "s_register_operand" "=w") | |
2626 | (neg:<V_cmp_result> | |
2627 | (GLTE:<V_cmp_result> | |
2628 | (abs:VH (match_operand:VH 1 "s_register_operand" "w")) | |
2629 | (abs:VH (match_operand:VH 2 "s_register_operand" "w")))))] | |
2630 | "TARGET_NEON_FP16INST && flag_unsafe_math_optimizations" | |
2631 | "vac<cmp_op>.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2" | |
2632 | [(set_attr "type" "neon_fp_compare_s<q>")] | |
2633 | ) | |
2634 | ||
2635 | (define_insn "neon_vca<cmp_op_unsp><mode>_fp16insn_unspec" | |
2636 | [(set (match_operand:<V_cmp_result> 0 "s_register_operand" "=w") | |
2637 | (unspec:<V_cmp_result> | |
2638 | [(match_operand:VH 1 "s_register_operand" "w") | |
2639 | (match_operand:VH 2 "s_register_operand" "w")] | |
2640 | NEON_VAGLTE))] | |
2641 | "TARGET_NEON" | |
2642 | "vac<cmp_op_unsp>.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2" | |
2643 | [(set_attr "type" "neon_fp_compare_s<q>")] | |
2644 | ) | |
2645 | ||
2646 | (define_expand "neon_vc<cmp_op>z<mode>" | |
2647 | [(set | |
2648 | (match_operand:<V_cmp_result> 0 "s_register_operand") | |
2649 | (COMPARISONS:<V_cmp_result> | |
2650 | (match_operand:VH 1 "s_register_operand") | |
2651 | (const_int 0)))] | |
2652 | "TARGET_NEON_FP16INST" | |
2653 | { | |
2654 | emit_insn (gen_neon_vc<cmp_op><mode> (operands[0], operands[1], | |
2655 | CONST0_RTX (<MODE>mode))); | |
2656 | DONE; | |
2657 | }) | |
2658 | ||
88f77cba JB |
2659 | (define_insn "neon_vtst<mode>" |
2660 | [(set (match_operand:VDQIW 0 "s_register_operand" "=w") | |
2661 | (unspec:VDQIW [(match_operand:VDQIW 1 "s_register_operand" "w") | |
94f0f2cc | 2662 | (match_operand:VDQIW 2 "s_register_operand" "w")] |
88f77cba JB |
2663 | UNSPEC_VTST))] |
2664 | "TARGET_NEON" | |
c956e102 | 2665 | "vtst.<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2" |
f7379e5e | 2666 | [(set_attr "type" "neon_tst<q>")] |
c956e102 | 2667 | ) |
88f77cba | 2668 | |
94f0f2cc JG |
2669 | (define_insn "neon_vabd<sup><mode>" |
2670 | [(set (match_operand:VDQIW 0 "s_register_operand" "=w") | |
2671 | (unspec:VDQIW [(match_operand:VDQIW 1 "s_register_operand" "w") | |
2672 | (match_operand:VDQIW 2 "s_register_operand" "w")] | |
2673 | VABD))] | |
88f77cba | 2674 | "TARGET_NEON" |
94f0f2cc JG |
2675 | "vabd.<sup>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2" |
2676 | [(set_attr "type" "neon_abd<q>")] | |
c956e102 | 2677 | ) |
88f77cba | 2678 | |
55a9b91b MW |
2679 | (define_insn "neon_vabd<mode>" |
2680 | [(set (match_operand:VH 0 "s_register_operand" "=w") | |
2681 | (unspec:VH [(match_operand:VH 1 "s_register_operand" "w") | |
2682 | (match_operand:VH 2 "s_register_operand" "w")] | |
2683 | UNSPEC_VABD_F))] | |
2684 | "TARGET_NEON_FP16INST" | |
2685 | "vabd.<V_s_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2" | |
2686 | [(set_attr "type" "neon_abd<q>")] | |
2687 | ) | |
2688 | ||
94f0f2cc JG |
2689 | (define_insn "neon_vabdf<mode>" |
2690 | [(set (match_operand:VCVTF 0 "s_register_operand" "=w") | |
2691 | (unspec:VCVTF [(match_operand:VCVTF 1 "s_register_operand" "w") | |
2692 | (match_operand:VCVTF 2 "s_register_operand" "w")] | |
2693 | UNSPEC_VABD_F))] | |
2694 | "TARGET_NEON" | |
2695 | "vabd.<V_s_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2" | |
2696 | [(set_attr "type" "neon_fp_abd_s<q>")] | |
2697 | ) | |
2698 | ||
2699 | (define_insn "neon_vabdl<sup><mode>" | |
88f77cba JB |
2700 | [(set (match_operand:<V_widen> 0 "s_register_operand" "=w") |
2701 | (unspec:<V_widen> [(match_operand:VW 1 "s_register_operand" "w") | |
94f0f2cc JG |
2702 | (match_operand:VW 2 "s_register_operand" "w")] |
2703 | VABDL))] | |
88f77cba | 2704 | "TARGET_NEON" |
94f0f2cc | 2705 | "vabdl.<sup>%#<V_sz_elem>\t%q0, %P1, %P2" |
f7379e5e | 2706 | [(set_attr "type" "neon_abd_long")] |
c956e102 | 2707 | ) |
88f77cba | 2708 | |
94f0f2cc | 2709 | (define_insn "neon_vaba<sup><mode>" |
88f77cba | 2710 | [(set (match_operand:VDQIW 0 "s_register_operand" "=w") |
5b28d821 | 2711 | (plus:VDQIW (unspec:VDQIW [(match_operand:VDQIW 2 "s_register_operand" "w") |
94f0f2cc JG |
2712 | (match_operand:VDQIW 3 "s_register_operand" "w")] |
2713 | VABD) | |
5b28d821 | 2714 | (match_operand:VDQIW 1 "s_register_operand" "0")))] |
88f77cba | 2715 | "TARGET_NEON" |
94f0f2cc | 2716 | "vaba.<sup>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>2, %<V_reg>3" |
f7379e5e | 2717 | [(set_attr "type" "neon_arith_acc<q>")] |
c956e102 | 2718 | ) |
88f77cba | 2719 | |
94f0f2cc | 2720 | (define_insn "neon_vabal<sup><mode>" |
88f77cba | 2721 | [(set (match_operand:<V_widen> 0 "s_register_operand" "=w") |
5b28d821 | 2722 | (plus:<V_widen> (unspec:<V_widen> [(match_operand:VW 2 "s_register_operand" "w") |
94f0f2cc JG |
2723 | (match_operand:VW 3 "s_register_operand" "w")] |
2724 | VABDL) | |
5b28d821 | 2725 | (match_operand:<V_widen> 1 "s_register_operand" "0")))] |
88f77cba | 2726 | "TARGET_NEON" |
94f0f2cc | 2727 | "vabal.<sup>%#<V_sz_elem>\t%q0, %P2, %P3" |
f7379e5e | 2728 | [(set_attr "type" "neon_arith_acc<q>")] |
c956e102 | 2729 | ) |
88f77cba | 2730 | |
84ae7213 PW |
2731 | (define_expand "<sup>sadv16qi" |
2732 | [(use (match_operand:V4SI 0 "register_operand")) | |
2733 | (unspec:V16QI [(use (match_operand:V16QI 1 "register_operand")) | |
2734 | (use (match_operand:V16QI 2 "register_operand"))] VABAL) | |
2735 | (use (match_operand:V4SI 3 "register_operand"))] | |
2736 | "TARGET_NEON" | |
2737 | { | |
2738 | rtx reduc = gen_reg_rtx (V8HImode); | |
2739 | rtx op1_highpart = gen_reg_rtx (V8QImode); | |
2740 | rtx op2_highpart = gen_reg_rtx (V8QImode); | |
2741 | ||
2742 | emit_insn (gen_neon_vabdl<sup>v8qi (reduc, | |
2743 | gen_lowpart (V8QImode, operands[1]), | |
2744 | gen_lowpart (V8QImode, operands[2]))); | |
2745 | ||
2746 | emit_insn (gen_neon_vget_highv16qi (op1_highpart, operands[1])); | |
2747 | emit_insn (gen_neon_vget_highv16qi (op2_highpart, operands[2])); | |
2748 | emit_insn (gen_neon_vabal<sup>v8qi (reduc, reduc, | |
2749 | op1_highpart, op2_highpart)); | |
2750 | emit_insn (gen_neon_vpadal<sup>v8hi (operands[3], operands[3], reduc)); | |
2751 | ||
2752 | emit_move_insn (operands[0], operands[3]); | |
2753 | DONE; | |
2754 | } | |
2755 | ) | |
2756 | ||
94f0f2cc JG |
2757 | (define_insn "neon_v<maxmin><sup><mode>" |
2758 | [(set (match_operand:VDQIW 0 "s_register_operand" "=w") | |
2759 | (unspec:VDQIW [(match_operand:VDQIW 1 "s_register_operand" "w") | |
2760 | (match_operand:VDQIW 2 "s_register_operand" "w")] | |
2761 | VMAXMIN))] | |
88f77cba | 2762 | "TARGET_NEON" |
94f0f2cc JG |
2763 | "v<maxmin>.<sup>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2" |
2764 | [(set_attr "type" "neon_minmax<q>")] | |
c956e102 | 2765 | ) |
88f77cba | 2766 | |
94f0f2cc JG |
2767 | (define_insn "neon_v<maxmin>f<mode>" |
2768 | [(set (match_operand:VCVTF 0 "s_register_operand" "=w") | |
2769 | (unspec:VCVTF [(match_operand:VCVTF 1 "s_register_operand" "w") | |
2770 | (match_operand:VCVTF 2 "s_register_operand" "w")] | |
2771 | VMAXMINF))] | |
88f77cba | 2772 | "TARGET_NEON" |
94f0f2cc | 2773 | "v<maxmin>.<V_s_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2" |
0a18c19f DS |
2774 | [(set_attr "type" "neon_fp_minmax_s<q>")] |
2775 | ) | |
2776 | ||
55a9b91b MW |
2777 | (define_insn "neon_v<maxmin>f<mode>" |
2778 | [(set (match_operand:VH 0 "s_register_operand" "=w") | |
2779 | (unspec:VH | |
2780 | [(match_operand:VH 1 "s_register_operand" "w") | |
2781 | (match_operand:VH 2 "s_register_operand" "w")] | |
2782 | VMAXMINF))] | |
2783 | "TARGET_NEON_FP16INST" | |
2784 | "v<maxmin>.<V_s_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2" | |
2785 | [(set_attr "type" "neon_fp_minmax_s<q>")] | |
2786 | ) | |
2787 | ||
2788 | (define_insn "neon_vp<maxmin>fv4hf" | |
2789 | [(set (match_operand:V4HF 0 "s_register_operand" "=w") | |
2790 | (unspec:V4HF | |
2791 | [(match_operand:V4HF 1 "s_register_operand" "w") | |
2792 | (match_operand:V4HF 2 "s_register_operand" "w")] | |
2793 | VPMAXMINF))] | |
2794 | "TARGET_NEON_FP16INST" | |
2795 | "vp<maxmin>.f16\t%P0, %P1, %P2" | |
2796 | [(set_attr "type" "neon_reduc_minmax")] | |
2797 | ) | |
2798 | ||
2799 | (define_insn "neon_<fmaxmin_op><mode>" | |
2800 | [(set | |
2801 | (match_operand:VH 0 "s_register_operand" "=w") | |
2802 | (unspec:VH | |
2803 | [(match_operand:VH 1 "s_register_operand" "w") | |
2804 | (match_operand:VH 2 "s_register_operand" "w")] | |
2805 | VMAXMINFNM))] | |
2806 | "TARGET_NEON_FP16INST" | |
2807 | "<fmaxmin_op>.<V_s_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2" | |
2808 | [(set_attr "type" "neon_fp_minmax_s<q>")] | |
2809 | ) | |
9fc158eb BB |
2810 | |
2811 | ;; v<maxmin>nm intrinsics. | |
2812 | (define_insn "neon_<fmaxmin_op><mode>" | |
2813 | [(set (match_operand:VCVTF 0 "s_register_operand" "=w") | |
2814 | (unspec:VCVTF [(match_operand:VCVTF 1 "s_register_operand" "w") | |
2815 | (match_operand:VCVTF 2 "s_register_operand" "w")] | |
2816 | VMAXMINFNM))] | |
c8d61ab8 | 2817 | "TARGET_NEON && TARGET_VFP5" |
9fc158eb BB |
2818 | "<fmaxmin_op>.<V_s_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2" |
2819 | [(set_attr "type" "neon_fp_minmax_s<q>")] | |
2820 | ) | |
55a9b91b | 2821 | |
0a18c19f DS |
2822 | ;; Vector forms for the IEEE-754 fmax()/fmin() functions |
2823 | (define_insn "<fmaxmin><mode>3" | |
2824 | [(set (match_operand:VCVTF 0 "s_register_operand" "=w") | |
2825 | (unspec:VCVTF [(match_operand:VCVTF 1 "s_register_operand" "w") | |
2826 | (match_operand:VCVTF 2 "s_register_operand" "w")] | |
2827 | VMAXMINFNM))] | |
c8d61ab8 | 2828 | "TARGET_NEON && TARGET_VFP5" |
0a18c19f | 2829 | "<fmaxmin_op>.<V_s_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2" |
94f0f2cc | 2830 | [(set_attr "type" "neon_fp_minmax_s<q>")] |
c956e102 | 2831 | ) |
88f77cba JB |
2832 | |
2833 | (define_expand "neon_vpadd<mode>" | |
cd65e265 DZ |
2834 | [(match_operand:VD 0 "s_register_operand") |
2835 | (match_operand:VD 1 "s_register_operand") | |
2836 | (match_operand:VD 2 "s_register_operand")] | |
88f77cba JB |
2837 | "TARGET_NEON" |
2838 | { | |
2839 | emit_insn (gen_neon_vpadd_internal<mode> (operands[0], operands[1], | |
2840 | operands[2])); | |
2841 | DONE; | |
2842 | }) | |
2843 | ||
94f0f2cc | 2844 | (define_insn "neon_vpaddl<sup><mode>" |
88f77cba | 2845 | [(set (match_operand:<V_double_width> 0 "s_register_operand" "=w") |
94f0f2cc JG |
2846 | (unspec:<V_double_width> [(match_operand:VDQIW 1 "s_register_operand" "w")] |
2847 | VPADDL))] | |
88f77cba | 2848 | "TARGET_NEON" |
94f0f2cc | 2849 | "vpaddl.<sup>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1" |
f7379e5e | 2850 | [(set_attr "type" "neon_reduc_add_long")] |
c956e102 | 2851 | ) |
88f77cba | 2852 | |
94f0f2cc | 2853 | (define_insn "neon_vpadal<sup><mode>" |
88f77cba JB |
2854 | [(set (match_operand:<V_double_width> 0 "s_register_operand" "=w") |
2855 | (unspec:<V_double_width> [(match_operand:<V_double_width> 1 "s_register_operand" "0") | |
94f0f2cc JG |
2856 | (match_operand:VDQIW 2 "s_register_operand" "w")] |
2857 | VPADAL))] | |
88f77cba | 2858 | "TARGET_NEON" |
94f0f2cc | 2859 | "vpadal.<sup>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>2" |
f7379e5e | 2860 | [(set_attr "type" "neon_reduc_add_acc")] |
c956e102 | 2861 | ) |
88f77cba | 2862 | |
94f0f2cc JG |
2863 | (define_insn "neon_vp<maxmin><sup><mode>" |
2864 | [(set (match_operand:VDI 0 "s_register_operand" "=w") | |
2865 | (unspec:VDI [(match_operand:VDI 1 "s_register_operand" "w") | |
2866 | (match_operand:VDI 2 "s_register_operand" "w")] | |
2867 | VPMAXMIN))] | |
88f77cba | 2868 | "TARGET_NEON" |
94f0f2cc JG |
2869 | "vp<maxmin>.<sup>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2" |
2870 | [(set_attr "type" "neon_reduc_minmax<q>")] | |
c956e102 | 2871 | ) |
88f77cba | 2872 | |
94f0f2cc JG |
2873 | (define_insn "neon_vp<maxmin>f<mode>" |
2874 | [(set (match_operand:VCVTF 0 "s_register_operand" "=w") | |
2875 | (unspec:VCVTF [(match_operand:VCVTF 1 "s_register_operand" "w") | |
2876 | (match_operand:VCVTF 2 "s_register_operand" "w")] | |
2877 | VPMAXMINF))] | |
88f77cba | 2878 | "TARGET_NEON" |
94f0f2cc JG |
2879 | "vp<maxmin>.<V_s_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2" |
2880 | [(set_attr "type" "neon_fp_reduc_minmax_s<q>")] | |
c956e102 | 2881 | ) |
88f77cba JB |
2882 | |
2883 | (define_insn "neon_vrecps<mode>" | |
2884 | [(set (match_operand:VCVTF 0 "s_register_operand" "=w") | |
2885 | (unspec:VCVTF [(match_operand:VCVTF 1 "s_register_operand" "w") | |
94f0f2cc | 2886 | (match_operand:VCVTF 2 "s_register_operand" "w")] |
88f77cba JB |
2887 | UNSPEC_VRECPS))] |
2888 | "TARGET_NEON" | |
c956e102 | 2889 | "vrecps.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2" |
f7379e5e | 2890 | [(set_attr "type" "neon_fp_recps_s<q>")] |
c956e102 | 2891 | ) |
88f77cba | 2892 | |
55a9b91b MW |
2893 | (define_insn "neon_vrecps<mode>" |
2894 | [(set | |
2895 | (match_operand:VH 0 "s_register_operand" "=w") | |
2896 | (unspec:VH [(match_operand:VH 1 "s_register_operand" "w") | |
2897 | (match_operand:VH 2 "s_register_operand" "w")] | |
2898 | UNSPEC_VRECPS))] | |
2899 | "TARGET_NEON_FP16INST" | |
2900 | "vrecps.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2" | |
2901 | [(set_attr "type" "neon_fp_recps_s<q>")] | |
2902 | ) | |
2903 | ||
88f77cba JB |
2904 | (define_insn "neon_vrsqrts<mode>" |
2905 | [(set (match_operand:VCVTF 0 "s_register_operand" "=w") | |
2906 | (unspec:VCVTF [(match_operand:VCVTF 1 "s_register_operand" "w") | |
94f0f2cc | 2907 | (match_operand:VCVTF 2 "s_register_operand" "w")] |
88f77cba JB |
2908 | UNSPEC_VRSQRTS))] |
2909 | "TARGET_NEON" | |
c956e102 | 2910 | "vrsqrts.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2" |
f7379e5e | 2911 | [(set_attr "type" "neon_fp_rsqrts_s<q>")] |
c956e102 | 2912 | ) |
88f77cba | 2913 | |
55a9b91b MW |
2914 | (define_insn "neon_vrsqrts<mode>" |
2915 | [(set | |
2916 | (match_operand:VH 0 "s_register_operand" "=w") | |
2917 | (unspec:VH [(match_operand:VH 1 "s_register_operand" "w") | |
2918 | (match_operand:VH 2 "s_register_operand" "w")] | |
2919 | UNSPEC_VRSQRTS))] | |
2920 | "TARGET_NEON_FP16INST" | |
2921 | "vrsqrts.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2" | |
2922 | [(set_attr "type" "neon_fp_rsqrts_s<q>")] | |
2923 | ) | |
2924 | ||
bab53516 | 2925 | (define_expand "neon_vabs<mode>" |
cd65e265 DZ |
2926 | [(match_operand:VDQW 0 "s_register_operand") |
2927 | (match_operand:VDQW 1 "s_register_operand")] | |
88f77cba | 2928 | "TARGET_NEON" |
bab53516 SL |
2929 | { |
2930 | emit_insn (gen_abs<mode>2 (operands[0], operands[1])); | |
2931 | DONE; | |
2932 | }) | |
88f77cba JB |
2933 | |
2934 | (define_insn "neon_vqabs<mode>" | |
2935 | [(set (match_operand:VDQIW 0 "s_register_operand" "=w") | |
94f0f2cc | 2936 | (unspec:VDQIW [(match_operand:VDQIW 1 "s_register_operand" "w")] |
88f77cba JB |
2937 | UNSPEC_VQABS))] |
2938 | "TARGET_NEON" | |
c956e102 | 2939 | "vqabs.<V_s_elem>\t%<V_reg>0, %<V_reg>1" |
f7379e5e | 2940 | [(set_attr "type" "neon_qabs<q>")] |
c956e102 | 2941 | ) |
88f77cba | 2942 | |
7a10ea9f KT |
2943 | (define_insn "neon_bswap<mode>" |
2944 | [(set (match_operand:VDQHSD 0 "register_operand" "=w") | |
2945 | (bswap:VDQHSD (match_operand:VDQHSD 1 "register_operand" "w")))] | |
2946 | "TARGET_NEON" | |
2947 | "vrev<V_sz_elem>.8\\t%<V_reg>0, %<V_reg>1" | |
2948 | [(set_attr "type" "neon_rev<q>")] | |
2949 | ) | |
2950 | ||
88f77cba | 2951 | (define_expand "neon_vneg<mode>" |
cd65e265 DZ |
2952 | [(match_operand:VDQW 0 "s_register_operand") |
2953 | (match_operand:VDQW 1 "s_register_operand")] | |
88f77cba JB |
2954 | "TARGET_NEON" |
2955 | { | |
4cbb7cab | 2956 | emit_insn (gen_neon_neg<mode>2 (operands[0], operands[1])); |
88f77cba JB |
2957 | DONE; |
2958 | }) | |
2959 | ||
c2b7062d TC |
2960 | |
2961 | ;; The vcadd and vcmla patterns are made UNSPEC for the explicitly due to the | |
2962 | ;; fact that their usage need to guarantee that the source vectors are | |
2963 | ;; contiguous. It would be wrong to describe the operation without being able | |
2964 | ;; to describe the permute that is also required, but even if that is done | |
2965 | ;; the permute would have been created as a LOAD_LANES which means the values | |
2966 | ;; in the registers are in the wrong order. | |
2967 | (define_insn "neon_vcadd<rot><mode>" | |
2968 | [(set (match_operand:VF 0 "register_operand" "=w") | |
2969 | (unspec:VF [(match_operand:VF 1 "register_operand" "w") | |
2970 | (match_operand:VF 2 "register_operand" "w")] | |
2971 | VCADD))] | |
2972 | "TARGET_COMPLEX" | |
2973 | "vcadd.<V_s_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2, #<rot>" | |
2974 | [(set_attr "type" "neon_fcadd")] | |
2975 | ) | |
2976 | ||
2977 | (define_insn "neon_vcmla<rot><mode>" | |
2978 | [(set (match_operand:VF 0 "register_operand" "=w") | |
2979 | (plus:VF (match_operand:VF 1 "register_operand" "0") | |
2980 | (unspec:VF [(match_operand:VF 2 "register_operand" "w") | |
2981 | (match_operand:VF 3 "register_operand" "w")] | |
2982 | VCMLA)))] | |
2983 | "TARGET_COMPLEX" | |
2984 | "vcmla.<V_s_elem>\t%<V_reg>0, %<V_reg>2, %<V_reg>3, #<rot>" | |
2985 | [(set_attr "type" "neon_fcmla")] | |
2986 | ) | |
2987 | ||
2988 | (define_insn "neon_vcmla_lane<rot><mode>" | |
2989 | [(set (match_operand:VF 0 "s_register_operand" "=w") | |
2990 | (plus:VF (match_operand:VF 1 "s_register_operand" "0") | |
2991 | (unspec:VF [(match_operand:VF 2 "s_register_operand" "w") | |
2992 | (match_operand:VF 3 "s_register_operand" "<VF_constraint>") | |
2993 | (match_operand:SI 4 "const_int_operand" "n")] | |
2994 | VCMLA)))] | |
2995 | "TARGET_COMPLEX" | |
2996 | { | |
ee8045e5 | 2997 | operands = neon_vcmla_lane_prepare_operands (operands); |
c2b7062d TC |
2998 | return "vcmla.<V_s_elem>\t%<V_reg>0, %<V_reg>2, d%c3[%c4], #<rot>"; |
2999 | } | |
3000 | [(set_attr "type" "neon_fcmla")] | |
3001 | ) | |
3002 | ||
3003 | (define_insn "neon_vcmla_laneq<rot><mode>" | |
3004 | [(set (match_operand:VDF 0 "s_register_operand" "=w") | |
3005 | (plus:VDF (match_operand:VDF 1 "s_register_operand" "0") | |
3006 | (unspec:VDF [(match_operand:VDF 2 "s_register_operand" "w") | |
3007 | (match_operand:<V_DOUBLE> 3 "s_register_operand" "<VF_constraint>") | |
3008 | (match_operand:SI 4 "const_int_operand" "n")] | |
3009 | VCMLA)))] | |
3010 | "TARGET_COMPLEX" | |
3011 | { | |
ee8045e5 | 3012 | operands = neon_vcmla_lane_prepare_operands (operands); |
c2b7062d TC |
3013 | return "vcmla.<V_s_elem>\t%<V_reg>0, %<V_reg>2, d%c3[%c4], #<rot>"; |
3014 | } | |
3015 | [(set_attr "type" "neon_fcmla")] | |
3016 | ) | |
3017 | ||
3018 | (define_insn "neon_vcmlaq_lane<rot><mode>" | |
3019 | [(set (match_operand:VQ_HSF 0 "s_register_operand" "=w") | |
3020 | (plus:VQ_HSF (match_operand:VQ_HSF 1 "s_register_operand" "0") | |
3021 | (unspec:VQ_HSF [(match_operand:VQ_HSF 2 "s_register_operand" "w") | |
3022 | (match_operand:<V_HALF> 3 "s_register_operand" "<VF_constraint>") | |
3023 | (match_operand:SI 4 "const_int_operand" "n")] | |
3024 | VCMLA)))] | |
3025 | "TARGET_COMPLEX" | |
3026 | { | |
ee8045e5 | 3027 | operands = neon_vcmla_lane_prepare_operands (operands); |
c2b7062d TC |
3028 | return "vcmla.<V_s_elem>\t%<V_reg>0, %<V_reg>2, d%c3[%c4], #<rot>"; |
3029 | } | |
3030 | [(set_attr "type" "neon_fcmla")] | |
3031 | ) | |
3032 | ||
3033 | ||
f8e109ba TC |
3034 | ;; These instructions map to the __builtins for the Dot Product operations. |
3035 | (define_insn "neon_<sup>dot<vsi2qi>" | |
3036 | [(set (match_operand:VCVTI 0 "register_operand" "=w") | |
3037 | (plus:VCVTI (match_operand:VCVTI 1 "register_operand" "0") | |
3038 | (unspec:VCVTI [(match_operand:<VSI2QI> 2 | |
3039 | "register_operand" "w") | |
3040 | (match_operand:<VSI2QI> 3 | |
3041 | "register_operand" "w")] | |
3042 | DOTPROD)))] | |
3043 | "TARGET_DOTPROD" | |
3044 | "v<sup>dot.<opsuffix>\\t%<V_reg>0, %<V_reg>2, %<V_reg>3" | |
51e6029f | 3045 | [(set_attr "type" "neon_dot<q>")] |
f8e109ba TC |
3046 | ) |
3047 | ||
f348846e SMW |
3048 | ;; These instructions map to the __builtins for the Dot Product operations. |
3049 | (define_insn "neon_usdot<vsi2qi>" | |
3050 | [(set (match_operand:VCVTI 0 "register_operand" "=w") | |
3051 | (plus:VCVTI | |
3052 | (unspec:VCVTI | |
3053 | [(match_operand:<VSI2QI> 2 "register_operand" "w") | |
3054 | (match_operand:<VSI2QI> 3 "register_operand" "w")] | |
3055 | UNSPEC_DOT_US) | |
3056 | (match_operand:VCVTI 1 "register_operand" "0")))] | |
3057 | "TARGET_I8MM" | |
3058 | "vusdot.s8\\t%<V_reg>0, %<V_reg>2, %<V_reg>3" | |
3059 | [(set_attr "type" "neon_dot<q>")] | |
3060 | ) | |
3061 | ||
f8e109ba TC |
3062 | ;; These instructions map to the __builtins for the Dot Product |
3063 | ;; indexed operations. | |
3064 | (define_insn "neon_<sup>dot_lane<vsi2qi>" | |
3065 | [(set (match_operand:VCVTI 0 "register_operand" "=w") | |
3066 | (plus:VCVTI (match_operand:VCVTI 1 "register_operand" "0") | |
3067 | (unspec:VCVTI [(match_operand:<VSI2QI> 2 | |
3068 | "register_operand" "w") | |
3069 | (match_operand:V8QI 3 "register_operand" "t") | |
3070 | (match_operand:SI 4 "immediate_operand" "i")] | |
3071 | DOTPROD)))] | |
3072 | "TARGET_DOTPROD" | |
3073 | { | |
3074 | operands[4] | |
3075 | = GEN_INT (NEON_ENDIAN_LANE_N (V8QImode, INTVAL (operands[4]))); | |
3076 | return "v<sup>dot.<opsuffix>\\t%<V_reg>0, %<V_reg>2, %P3[%c4]"; | |
3077 | } | |
51e6029f | 3078 | [(set_attr "type" "neon_dot<q>")] |
f8e109ba TC |
3079 | ) |
3080 | ||
f348846e SMW |
3081 | ;; These instructions map to the __builtins for the Dot Product |
3082 | ;; indexed operations in the v8.6 I8MM extension. | |
3083 | (define_insn "neon_<sup>dot_lane<vsi2qi>" | |
3084 | [(set (match_operand:VCVTI 0 "register_operand" "=w") | |
3085 | (plus:VCVTI | |
3086 | (unspec:VCVTI | |
3087 | [(match_operand:<VSI2QI> 2 "register_operand" "w") | |
3088 | (match_operand:V8QI 3 "register_operand" "t") | |
3089 | (match_operand:SI 4 "immediate_operand" "i")] | |
3090 | DOTPROD_I8MM) | |
3091 | (match_operand:VCVTI 1 "register_operand" "0")))] | |
3092 | "TARGET_I8MM" | |
3093 | { | |
3094 | operands[4] = GEN_INT (INTVAL (operands[4])); | |
3095 | return "v<sup>dot.<opsuffix>\\t%<V_reg>0, %<V_reg>2, %P3[%c4]"; | |
3096 | } | |
3097 | [(set_attr "type" "neon_dot<q>")] | |
3098 | ) | |
3099 | ||
f8e109ba TC |
3100 | ;; These expands map to the Dot Product optab the vectorizer checks for. |
3101 | ;; The auto-vectorizer expects a dot product builtin that also does an | |
3102 | ;; accumulation into the provided register. | |
3103 | ;; Given the following pattern | |
3104 | ;; | |
3105 | ;; for (i=0; i<len; i++) { | |
3106 | ;; c = a[i] * b[i]; | |
3107 | ;; r += c; | |
3108 | ;; } | |
3109 | ;; return result; | |
3110 | ;; | |
3111 | ;; This can be auto-vectorized to | |
3112 | ;; r = a[0]*b[0] + a[1]*b[1] + a[2]*b[2] + a[3]*b[3]; | |
3113 | ;; | |
3114 | ;; given enough iterations. However the vectorizer can keep unrolling the loop | |
3115 | ;; r += a[4]*b[4] + a[5]*b[5] + a[6]*b[6] + a[7]*b[7]; | |
3116 | ;; r += a[8]*b[8] + a[9]*b[9] + a[10]*b[10] + a[11]*b[11]; | |
3117 | ;; ... | |
3118 | ;; | |
3119 | ;; and so the vectorizer provides r, in which the result has to be accumulated. | |
3120 | (define_expand "<sup>dot_prod<vsi2qi>" | |
3121 | [(set (match_operand:VCVTI 0 "register_operand") | |
3122 | (plus:VCVTI (unspec:VCVTI [(match_operand:<VSI2QI> 1 | |
3123 | "register_operand") | |
3124 | (match_operand:<VSI2QI> 2 | |
3125 | "register_operand")] | |
3126 | DOTPROD) | |
3127 | (match_operand:VCVTI 3 "register_operand")))] | |
3128 | "TARGET_DOTPROD" | |
3129 | { | |
3130 | emit_insn ( | |
3131 | gen_neon_<sup>dot<vsi2qi> (operands[3], operands[3], operands[1], | |
3132 | operands[2])); | |
3133 | emit_insn (gen_rtx_SET (operands[0], operands[3])); | |
3134 | DONE; | |
3135 | }) | |
3136 | ||
97f518b3 JW |
3137 | (define_expand "neon_copysignf<mode>" |
3138 | [(match_operand:VCVTF 0 "register_operand") | |
3139 | (match_operand:VCVTF 1 "register_operand") | |
3140 | (match_operand:VCVTF 2 "register_operand")] | |
3141 | "TARGET_NEON" | |
3142 | "{ | |
3143 | rtx v_bitmask_cast; | |
3144 | rtx v_bitmask = gen_reg_rtx (<VCVTF:V_cmp_result>mode); | |
4199c859 | 3145 | rtx c = gen_int_mode (0x80000000, SImode); |
97f518b3 JW |
3146 | |
3147 | emit_move_insn (v_bitmask, | |
59d06c05 | 3148 | gen_const_vec_duplicate (<VCVTF:V_cmp_result>mode, c)); |
97f518b3 JW |
3149 | emit_move_insn (operands[0], operands[2]); |
3150 | v_bitmask_cast = simplify_gen_subreg (<MODE>mode, v_bitmask, | |
3151 | <VCVTF:V_cmp_result>mode, 0); | |
3152 | emit_insn (gen_neon_vbsl<mode> (operands[0], v_bitmask_cast, operands[0], | |
3153 | operands[1])); | |
3154 | ||
3155 | DONE; | |
3156 | }" | |
3157 | ) | |
3158 | ||
88f77cba JB |
3159 | (define_insn "neon_vqneg<mode>" |
3160 | [(set (match_operand:VDQIW 0 "s_register_operand" "=w") | |
94f0f2cc | 3161 | (unspec:VDQIW [(match_operand:VDQIW 1 "s_register_operand" "w")] |
88f77cba JB |
3162 | UNSPEC_VQNEG))] |
3163 | "TARGET_NEON" | |
c956e102 | 3164 | "vqneg.<V_s_elem>\t%<V_reg>0, %<V_reg>1" |
f7379e5e | 3165 | [(set_attr "type" "neon_qneg<q>")] |
c956e102 | 3166 | ) |
88f77cba JB |
3167 | |
3168 | (define_insn "neon_vcls<mode>" | |
3169 | [(set (match_operand:VDQIW 0 "s_register_operand" "=w") | |
94f0f2cc | 3170 | (unspec:VDQIW [(match_operand:VDQIW 1 "s_register_operand" "w")] |
88f77cba JB |
3171 | UNSPEC_VCLS))] |
3172 | "TARGET_NEON" | |
c956e102 | 3173 | "vcls.<V_s_elem>\t%<V_reg>0, %<V_reg>1" |
f7379e5e | 3174 | [(set_attr "type" "neon_cls<q>")] |
c956e102 | 3175 | ) |
88f77cba | 3176 | |
b3b7bbce | 3177 | (define_insn "clz<mode>2" |
88f77cba | 3178 | [(set (match_operand:VDQIW 0 "s_register_operand" "=w") |
b3b7bbce | 3179 | (clz:VDQIW (match_operand:VDQIW 1 "s_register_operand" "w")))] |
88f77cba | 3180 | "TARGET_NEON" |
c956e102 | 3181 | "vclz.<V_if_elem>\t%<V_reg>0, %<V_reg>1" |
f7379e5e | 3182 | [(set_attr "type" "neon_cnt<q>")] |
c956e102 | 3183 | ) |
88f77cba | 3184 | |
b3b7bbce | 3185 | (define_expand "neon_vclz<mode>" |
cd65e265 DZ |
3186 | [(match_operand:VDQIW 0 "s_register_operand") |
3187 | (match_operand:VDQIW 1 "s_register_operand")] | |
b3b7bbce SL |
3188 | "TARGET_NEON" |
3189 | { | |
3190 | emit_insn (gen_clz<mode>2 (operands[0], operands[1])); | |
3191 | DONE; | |
3192 | }) | |
3193 | ||
3194 | (define_insn "popcount<mode>2" | |
88f77cba | 3195 | [(set (match_operand:VE 0 "s_register_operand" "=w") |
b3b7bbce | 3196 | (popcount:VE (match_operand:VE 1 "s_register_operand" "w")))] |
88f77cba | 3197 | "TARGET_NEON" |
c956e102 | 3198 | "vcnt.<V_sz_elem>\t%<V_reg>0, %<V_reg>1" |
f7379e5e | 3199 | [(set_attr "type" "neon_cnt<q>")] |
c956e102 | 3200 | ) |
88f77cba | 3201 | |
b3b7bbce | 3202 | (define_expand "neon_vcnt<mode>" |
cd65e265 DZ |
3203 | [(match_operand:VE 0 "s_register_operand") |
3204 | (match_operand:VE 1 "s_register_operand")] | |
b3b7bbce SL |
3205 | "TARGET_NEON" |
3206 | { | |
3207 | emit_insn (gen_popcount<mode>2 (operands[0], operands[1])); | |
3208 | DONE; | |
3209 | }) | |
3210 | ||
55a9b91b MW |
3211 | (define_insn "neon_vrecpe<mode>" |
3212 | [(set (match_operand:VH 0 "s_register_operand" "=w") | |
3213 | (unspec:VH [(match_operand:VH 1 "s_register_operand" "w")] | |
3214 | UNSPEC_VRECPE))] | |
3215 | "TARGET_NEON_FP16INST" | |
3216 | "vrecpe.f16\t%<V_reg>0, %<V_reg>1" | |
3217 | [(set_attr "type" "neon_fp_recpe_s<q>")] | |
3218 | ) | |
3219 | ||
88f77cba JB |
3220 | (define_insn "neon_vrecpe<mode>" |
3221 | [(set (match_operand:V32 0 "s_register_operand" "=w") | |
94f0f2cc | 3222 | (unspec:V32 [(match_operand:V32 1 "s_register_operand" "w")] |
88f77cba JB |
3223 | UNSPEC_VRECPE))] |
3224 | "TARGET_NEON" | |
c956e102 | 3225 | "vrecpe.<V_u_elem>\t%<V_reg>0, %<V_reg>1" |
f7379e5e | 3226 | [(set_attr "type" "neon_fp_recpe_s<q>")] |
c956e102 | 3227 | ) |
88f77cba JB |
3228 | |
3229 | (define_insn "neon_vrsqrte<mode>" | |
3230 | [(set (match_operand:V32 0 "s_register_operand" "=w") | |
94f0f2cc | 3231 | (unspec:V32 [(match_operand:V32 1 "s_register_operand" "w")] |
88f77cba JB |
3232 | UNSPEC_VRSQRTE))] |
3233 | "TARGET_NEON" | |
c956e102 | 3234 | "vrsqrte.<V_u_elem>\t%<V_reg>0, %<V_reg>1" |
f7379e5e | 3235 | [(set_attr "type" "neon_fp_rsqrte_s<q>")] |
c956e102 | 3236 | ) |
88f77cba JB |
3237 | |
3238 | (define_expand "neon_vmvn<mode>" | |
cd65e265 DZ |
3239 | [(match_operand:VDQIW 0 "s_register_operand") |
3240 | (match_operand:VDQIW 1 "s_register_operand")] | |
88f77cba JB |
3241 | "TARGET_NEON" |
3242 | { | |
fd436034 | 3243 | emit_insn (gen_one_cmpl<mode>2_neon (operands[0], operands[1])); |
88f77cba JB |
3244 | DONE; |
3245 | }) | |
3246 | ||
89ffa8fc JB |
3247 | (define_insn "neon_vget_lane<mode>_sext_internal" |
3248 | [(set (match_operand:SI 0 "s_register_operand" "=r") | |
3249 | (sign_extend:SI | |
3250 | (vec_select:<V_elem> | |
3251 | (match_operand:VD 1 "s_register_operand" "w") | |
3252 | (parallel [(match_operand:SI 2 "immediate_operand" "i")]))))] | |
88f77cba | 3253 | "TARGET_NEON" |
874d42b9 JM |
3254 | { |
3255 | if (BYTES_BIG_ENDIAN) | |
3256 | { | |
3257 | int elt = INTVAL (operands[2]); | |
3258 | elt = GET_MODE_NUNITS (<MODE>mode) - 1 - elt; | |
3259 | operands[2] = GEN_INT (elt); | |
3260 | } | |
c3b1709a | 3261 | return "vmov.s<V_sz_elem>\t%0, %P1[%c2]"; |
874d42b9 | 3262 | } |
f7379e5e | 3263 | [(set_attr "type" "neon_to_gp")] |
c956e102 | 3264 | ) |
88f77cba | 3265 | |
89ffa8fc JB |
3266 | (define_insn "neon_vget_lane<mode>_zext_internal" |
3267 | [(set (match_operand:SI 0 "s_register_operand" "=r") | |
3268 | (zero_extend:SI | |
3269 | (vec_select:<V_elem> | |
3270 | (match_operand:VD 1 "s_register_operand" "w") | |
3271 | (parallel [(match_operand:SI 2 "immediate_operand" "i")]))))] | |
3272 | "TARGET_NEON" | |
874d42b9 JM |
3273 | { |
3274 | if (BYTES_BIG_ENDIAN) | |
3275 | { | |
3276 | int elt = INTVAL (operands[2]); | |
3277 | elt = GET_MODE_NUNITS (<MODE>mode) - 1 - elt; | |
3278 | operands[2] = GEN_INT (elt); | |
3279 | } | |
c3b1709a | 3280 | return "vmov.u<V_sz_elem>\t%0, %P1[%c2]"; |
874d42b9 | 3281 | } |
f7379e5e | 3282 | [(set_attr "type" "neon_to_gp")] |
89ffa8fc | 3283 | ) |
88f77cba | 3284 | |
89ffa8fc JB |
3285 | (define_insn "neon_vget_lane<mode>_sext_internal" |
3286 | [(set (match_operand:SI 0 "s_register_operand" "=r") | |
3287 | (sign_extend:SI | |
3288 | (vec_select:<V_elem> | |
4b644867 | 3289 | (match_operand:VQ2 1 "s_register_operand" "w") |
89ffa8fc | 3290 | (parallel [(match_operand:SI 2 "immediate_operand" "i")]))))] |
88f77cba | 3291 | "TARGET_NEON" |
b617fc71 | 3292 | { |
89ffa8fc JB |
3293 | rtx ops[3]; |
3294 | int regno = REGNO (operands[1]); | |
3295 | unsigned int halfelts = GET_MODE_NUNITS (<MODE>mode) / 2; | |
3296 | unsigned int elt = INTVAL (operands[2]); | |
874d42b9 JM |
3297 | unsigned int elt_adj = elt % halfelts; |
3298 | ||
3299 | if (BYTES_BIG_ENDIAN) | |
3300 | elt_adj = halfelts - 1 - elt_adj; | |
89ffa8fc JB |
3301 | |
3302 | ops[0] = operands[0]; | |
3303 | ops[1] = gen_rtx_REG (<V_HALF>mode, regno + 2 * (elt / halfelts)); | |
874d42b9 | 3304 | ops[2] = GEN_INT (elt_adj); |
c3b1709a | 3305 | output_asm_insn ("vmov.s<V_sz_elem>\t%0, %P1[%c2]", ops); |
89ffa8fc JB |
3306 | |
3307 | return ""; | |
b617fc71 | 3308 | } |
f7379e5e | 3309 | [(set_attr "type" "neon_to_gp_q")] |
c956e102 | 3310 | ) |
88f77cba | 3311 | |
89ffa8fc JB |
3312 | (define_insn "neon_vget_lane<mode>_zext_internal" |
3313 | [(set (match_operand:SI 0 "s_register_operand" "=r") | |
3314 | (zero_extend:SI | |
3315 | (vec_select:<V_elem> | |
4b644867 | 3316 | (match_operand:VQ2 1 "s_register_operand" "w") |
89ffa8fc | 3317 | (parallel [(match_operand:SI 2 "immediate_operand" "i")]))))] |
88f77cba JB |
3318 | "TARGET_NEON" |
3319 | { | |
89ffa8fc | 3320 | rtx ops[3]; |
88f77cba JB |
3321 | int regno = REGNO (operands[1]); |
3322 | unsigned int halfelts = GET_MODE_NUNITS (<MODE>mode) / 2; | |
3323 | unsigned int elt = INTVAL (operands[2]); | |
874d42b9 JM |
3324 | unsigned int elt_adj = elt % halfelts; |
3325 | ||
3326 | if (BYTES_BIG_ENDIAN) | |
3327 | elt_adj = halfelts - 1 - elt_adj; | |
88f77cba JB |
3328 | |
3329 | ops[0] = operands[0]; | |
3330 | ops[1] = gen_rtx_REG (<V_HALF>mode, regno + 2 * (elt / halfelts)); | |
874d42b9 | 3331 | ops[2] = GEN_INT (elt_adj); |
c3b1709a | 3332 | output_asm_insn ("vmov.u<V_sz_elem>\t%0, %P1[%c2]", ops); |
88f77cba JB |
3333 | |
3334 | return ""; | |
3335 | } | |
f7379e5e | 3336 | [(set_attr "type" "neon_to_gp_q")] |
89ffa8fc JB |
3337 | ) |
3338 | ||
3339 | (define_expand "neon_vget_lane<mode>" | |
cd65e265 DZ |
3340 | [(match_operand:<V_ext> 0 "s_register_operand") |
3341 | (match_operand:VDQW 1 "s_register_operand") | |
3342 | (match_operand:SI 2 "immediate_operand")] | |
89ffa8fc JB |
3343 | "TARGET_NEON" |
3344 | { | |
874d42b9 JM |
3345 | if (BYTES_BIG_ENDIAN) |
3346 | { | |
3347 | /* The intrinsics are defined in terms of a model where the | |
3348 | element ordering in memory is vldm order, whereas the generic | |
3349 | RTL is defined in terms of a model where the element ordering | |
3350 | in memory is array order. Convert the lane number to conform | |
3351 | to this model. */ | |
3352 | unsigned int elt = INTVAL (operands[2]); | |
3353 | unsigned int reg_nelts | |
6c825cd4 | 3354 | = 64 / GET_MODE_UNIT_BITSIZE (<MODE>mode); |
874d42b9 JM |
3355 | elt ^= reg_nelts - 1; |
3356 | operands[2] = GEN_INT (elt); | |
3357 | } | |
3358 | ||
6c825cd4 | 3359 | if (GET_MODE_UNIT_BITSIZE (<MODE>mode) == 32) |
ff03930a JJ |
3360 | emit_insn (gen_vec_extract<mode><V_elem_l> (operands[0], operands[1], |
3361 | operands[2])); | |
89ffa8fc | 3362 | else |
94f0f2cc JG |
3363 | emit_insn (gen_neon_vget_lane<mode>_sext_internal (operands[0], |
3364 | operands[1], | |
3365 | operands[2])); | |
3366 | DONE; | |
3367 | }) | |
3368 | ||
3369 | (define_expand "neon_vget_laneu<mode>" | |
cd65e265 DZ |
3370 | [(match_operand:<V_ext> 0 "s_register_operand") |
3371 | (match_operand:VDQIW 1 "s_register_operand") | |
3372 | (match_operand:SI 2 "immediate_operand")] | |
94f0f2cc JG |
3373 | "TARGET_NEON" |
3374 | { | |
94f0f2cc | 3375 | if (BYTES_BIG_ENDIAN) |
89ffa8fc | 3376 | { |
94f0f2cc JG |
3377 | /* The intrinsics are defined in terms of a model where the |
3378 | element ordering in memory is vldm order, whereas the generic | |
3379 | RTL is defined in terms of a model where the element ordering | |
3380 | in memory is array order. Convert the lane number to conform | |
3381 | to this model. */ | |
3382 | unsigned int elt = INTVAL (operands[2]); | |
3383 | unsigned int reg_nelts | |
6c825cd4 | 3384 | = 64 / GET_MODE_UNIT_BITSIZE (<MODE>mode); |
94f0f2cc JG |
3385 | elt ^= reg_nelts - 1; |
3386 | operands[2] = GEN_INT (elt); | |
89ffa8fc | 3387 | } |
94f0f2cc | 3388 | |
6c825cd4 | 3389 | if (GET_MODE_UNIT_BITSIZE (<MODE>mode) == 32) |
ff03930a JJ |
3390 | emit_insn (gen_vec_extract<mode><V_elem_l> (operands[0], operands[1], |
3391 | operands[2])); | |
94f0f2cc JG |
3392 | else |
3393 | emit_insn (gen_neon_vget_lane<mode>_zext_internal (operands[0], | |
3394 | operands[1], | |
3395 | operands[2])); | |
89ffa8fc JB |
3396 | DONE; |
3397 | }) | |
3398 | ||
a277dd9b | 3399 | (define_expand "neon_vget_lanedi" |
cd65e265 DZ |
3400 | [(match_operand:DI 0 "s_register_operand") |
3401 | (match_operand:DI 1 "s_register_operand") | |
3402 | (match_operand:SI 2 "immediate_operand")] | |
89ffa8fc JB |
3403 | "TARGET_NEON" |
3404 | { | |
a277dd9b SL |
3405 | emit_move_insn (operands[0], operands[1]); |
3406 | DONE; | |
3407 | }) | |
88f77cba | 3408 | |
a277dd9b | 3409 | (define_expand "neon_vget_lanev2di" |
cd65e265 DZ |
3410 | [(match_operand:DI 0 "s_register_operand") |
3411 | (match_operand:V2DI 1 "s_register_operand") | |
3412 | (match_operand:SI 2 "immediate_operand")] | |
88f77cba JB |
3413 | "TARGET_NEON" |
3414 | { | |
69b23ad6 CL |
3415 | int lane; |
3416 | ||
3417 | if (BYTES_BIG_ENDIAN) | |
3418 | { | |
3419 | /* The intrinsics are defined in terms of a model where the | |
3420 | element ordering in memory is vldm order, whereas the generic | |
3421 | RTL is defined in terms of a model where the element ordering | |
3422 | in memory is array order. Convert the lane number to conform | |
3423 | to this model. */ | |
3424 | unsigned int elt = INTVAL (operands[2]); | |
3425 | unsigned int reg_nelts = 2; | |
3426 | elt ^= reg_nelts - 1; | |
3427 | operands[2] = GEN_INT (elt); | |
3428 | } | |
3429 | ||
3430 | lane = INTVAL (operands[2]); | |
eaa80f64 AL |
3431 | gcc_assert ((lane ==0) || (lane == 1)); |
3432 | emit_move_insn (operands[0], lane == 0 | |
3433 | ? gen_lowpart (DImode, operands[1]) | |
3434 | : gen_highpart (DImode, operands[1])); | |
a277dd9b SL |
3435 | DONE; |
3436 | }) | |
b617fc71 | 3437 | |
a277dd9b | 3438 | (define_expand "neon_vset_lane<mode>" |
cd65e265 DZ |
3439 | [(match_operand:VDQ 0 "s_register_operand") |
3440 | (match_operand:<V_elem> 1 "s_register_operand") | |
3441 | (match_operand:VDQ 2 "s_register_operand") | |
3442 | (match_operand:SI 3 "immediate_operand")] | |
88f77cba JB |
3443 | "TARGET_NEON" |
3444 | { | |
88f77cba JB |
3445 | unsigned int elt = INTVAL (operands[3]); |
3446 | ||
a277dd9b SL |
3447 | if (BYTES_BIG_ENDIAN) |
3448 | { | |
3449 | unsigned int reg_nelts | |
6c825cd4 | 3450 | = 64 / GET_MODE_UNIT_BITSIZE (<MODE>mode); |
a277dd9b SL |
3451 | elt ^= reg_nelts - 1; |
3452 | } | |
b617fc71 | 3453 | |
a277dd9b SL |
3454 | emit_insn (gen_vec_set<mode>_internal (operands[0], operands[1], |
3455 | GEN_INT (1 << elt), operands[2])); | |
3456 | DONE; | |
3457 | }) | |
88f77cba | 3458 | |
a277dd9b | 3459 | ; See neon_vget_lanedi comment for reasons operands 2 & 3 are ignored. |
88f77cba | 3460 | |
a277dd9b | 3461 | (define_expand "neon_vset_lanedi" |
cd65e265 DZ |
3462 | [(match_operand:DI 0 "s_register_operand") |
3463 | (match_operand:DI 1 "s_register_operand") | |
3464 | (match_operand:DI 2 "s_register_operand") | |
3465 | (match_operand:SI 3 "immediate_operand")] | |
88f77cba JB |
3466 | "TARGET_NEON" |
3467 | { | |
a277dd9b SL |
3468 | emit_move_insn (operands[0], operands[1]); |
3469 | DONE; | |
3470 | }) | |
88f77cba JB |
3471 | |
3472 | (define_expand "neon_vcreate<mode>" | |
cd65e265 DZ |
3473 | [(match_operand:VD_RE 0 "s_register_operand") |
3474 | (match_operand:DI 1 "general_operand")] | |
88f77cba JB |
3475 | "TARGET_NEON" |
3476 | { | |
3477 | rtx src = gen_lowpart (<MODE>mode, operands[1]); | |
3478 | emit_move_insn (operands[0], src); | |
3479 | DONE; | |
3480 | }) | |
3481 | ||
3482 | (define_insn "neon_vdup_n<mode>" | |
814a4c3b | 3483 | [(set (match_operand:VX 0 "s_register_operand" "=w") |
a277dd9b | 3484 | (vec_duplicate:VX (match_operand:<V_elem> 1 "s_register_operand" "r")))] |
88f77cba | 3485 | "TARGET_NEON" |
c3b1709a | 3486 | "vdup.<V_sz_elem>\t%<V_reg>0, %1" |
f7379e5e | 3487 | [(set_attr "type" "neon_from_gp<q>")] |
c956e102 | 3488 | ) |
88f77cba | 3489 | |
92422235 CL |
3490 | (define_insn "neon_vdup_nv4hf" |
3491 | [(set (match_operand:V4HF 0 "s_register_operand" "=w") | |
3492 | (vec_duplicate:V4HF (match_operand:HF 1 "s_register_operand" "r")))] | |
3493 | "TARGET_NEON" | |
3494 | "vdup.16\t%P0, %1" | |
3495 | [(set_attr "type" "neon_from_gp")] | |
3496 | ) | |
3497 | ||
3498 | (define_insn "neon_vdup_nv8hf" | |
3499 | [(set (match_operand:V8HF 0 "s_register_operand" "=w") | |
3500 | (vec_duplicate:V8HF (match_operand:HF 1 "s_register_operand" "r")))] | |
3501 | "TARGET_NEON" | |
3502 | "vdup.16\t%q0, %1" | |
3503 | [(set_attr "type" "neon_from_gp_q")] | |
3504 | ) | |
3505 | ||
17a13507 MI |
3506 | (define_insn "neon_vdup_nv4bf" |
3507 | [(set (match_operand:V4BF 0 "s_register_operand" "=w") | |
3508 | (vec_duplicate:V4BF (match_operand:BF 1 "s_register_operand" "r")))] | |
3509 | "TARGET_NEON" | |
3510 | "vdup.16\t%P0, %1" | |
3511 | [(set_attr "type" "neon_from_gp")] | |
3512 | ) | |
3513 | ||
3514 | (define_insn "neon_vdup_nv8bf" | |
3515 | [(set (match_operand:V8BF 0 "s_register_operand" "=w") | |
3516 | (vec_duplicate:V8BF (match_operand:BF 1 "s_register_operand" "r")))] | |
3517 | "TARGET_NEON" | |
3518 | "vdup.16\t%q0, %1" | |
3519 | [(set_attr "type" "neon_from_gp_q")] | |
3520 | ) | |
3521 | ||
814a4c3b DJ |
3522 | (define_insn "neon_vdup_n<mode>" |
3523 | [(set (match_operand:V32 0 "s_register_operand" "=w,w") | |
a277dd9b | 3524 | (vec_duplicate:V32 (match_operand:<V_elem> 1 "s_register_operand" "r,t")))] |
814a4c3b DJ |
3525 | "TARGET_NEON" |
3526 | "@ | |
c3b1709a RR |
3527 | vdup.<V_sz_elem>\t%<V_reg>0, %1 |
3528 | vdup.<V_sz_elem>\t%<V_reg>0, %y1" | |
f7379e5e | 3529 | [(set_attr "type" "neon_from_gp<q>,neon_dup<q>")] |
814a4c3b DJ |
3530 | ) |
3531 | ||
a277dd9b | 3532 | (define_expand "neon_vdup_ndi" |
cd65e265 DZ |
3533 | [(match_operand:DI 0 "s_register_operand") |
3534 | (match_operand:DI 1 "s_register_operand")] | |
88f77cba | 3535 | "TARGET_NEON" |
a277dd9b SL |
3536 | { |
3537 | emit_move_insn (operands[0], operands[1]); | |
3538 | DONE; | |
3539 | } | |
c956e102 | 3540 | ) |
88f77cba JB |
3541 | |
3542 | (define_insn "neon_vdup_nv2di" | |
a277dd9b SL |
3543 | [(set (match_operand:V2DI 0 "s_register_operand" "=w,w") |
3544 | (vec_duplicate:V2DI (match_operand:DI 1 "s_register_operand" "r,w")))] | |
88f77cba | 3545 | "TARGET_NEON" |
a277dd9b | 3546 | "@ |
c3b1709a RR |
3547 | vmov\t%e0, %Q1, %R1\;vmov\t%f0, %Q1, %R1 |
3548 | vmov\t%e0, %P1\;vmov\t%f0, %P1" | |
3549 | [(set_attr "length" "8") | |
f7379e5e | 3550 | (set_attr "type" "multiple")] |
c956e102 | 3551 | ) |
88f77cba | 3552 | |
a277dd9b SL |
3553 | (define_insn "neon_vdup_lane<mode>_internal" |
3554 | [(set (match_operand:VDQW 0 "s_register_operand" "=w") | |
3555 | (vec_duplicate:VDQW | |
3556 | (vec_select:<V_elem> | |
3557 | (match_operand:<V_double_vector_mode> 1 "s_register_operand" "w") | |
3558 | (parallel [(match_operand:SI 2 "immediate_operand" "i")]))))] | |
88f77cba | 3559 | "TARGET_NEON" |
b617fc71 | 3560 | { |
a277dd9b SL |
3561 | if (BYTES_BIG_ENDIAN) |
3562 | { | |
3563 | int elt = INTVAL (operands[2]); | |
3564 | elt = GET_MODE_NUNITS (<V_double_vector_mode>mode) - 1 - elt; | |
3565 | operands[2] = GEN_INT (elt); | |
3566 | } | |
3567 | if (<Is_d_reg>) | |
3568 | return "vdup.<V_sz_elem>\t%P0, %P1[%c2]"; | |
3569 | else | |
3570 | return "vdup.<V_sz_elem>\t%q0, %P1[%c2]"; | |
b617fc71 | 3571 | } |
f7379e5e | 3572 | [(set_attr "type" "neon_dup<q>")] |
c956e102 | 3573 | ) |
88f77cba | 3574 | |
b1a970a5 | 3575 | (define_insn "neon_vdup_lane<mode>_internal" |
17a13507 MI |
3576 | [(set (match_operand:VHFBF 0 "s_register_operand" "=w") |
3577 | (vec_duplicate:VHFBF | |
b1a970a5 MW |
3578 | (vec_select:<V_elem> |
3579 | (match_operand:<V_double_vector_mode> 1 "s_register_operand" "w") | |
3580 | (parallel [(match_operand:SI 2 "immediate_operand" "i")]))))] | |
17a13507 | 3581 | "TARGET_NEON && (TARGET_FP16 || TARGET_BF16_SIMD)" |
b1a970a5 MW |
3582 | { |
3583 | if (BYTES_BIG_ENDIAN) | |
3584 | { | |
3585 | int elt = INTVAL (operands[2]); | |
3586 | elt = GET_MODE_NUNITS (<V_double_vector_mode>mode) - 1 - elt; | |
3587 | operands[2] = GEN_INT (elt); | |
3588 | } | |
3589 | if (<Is_d_reg>) | |
3590 | return "vdup.<V_sz_elem>\t%P0, %P1[%c2]"; | |
3591 | else | |
3592 | return "vdup.<V_sz_elem>\t%q0, %P1[%c2]"; | |
3593 | } | |
3594 | [(set_attr "type" "neon_dup<q>")] | |
3595 | ) | |
3596 | ||
a277dd9b | 3597 | (define_expand "neon_vdup_lane<mode>" |
cd65e265 DZ |
3598 | [(match_operand:VDQW 0 "s_register_operand") |
3599 | (match_operand:<V_double_vector_mode> 1 "s_register_operand") | |
3600 | (match_operand:SI 2 "immediate_operand")] | |
88f77cba | 3601 | "TARGET_NEON" |
b617fc71 | 3602 | { |
a277dd9b SL |
3603 | if (BYTES_BIG_ENDIAN) |
3604 | { | |
3605 | unsigned int elt = INTVAL (operands[2]); | |
3606 | unsigned int reg_nelts | |
6c825cd4 | 3607 | = 64 / GET_MODE_UNIT_BITSIZE (<V_double_vector_mode>mode); |
a277dd9b SL |
3608 | elt ^= reg_nelts - 1; |
3609 | operands[2] = GEN_INT (elt); | |
3610 | } | |
3611 | emit_insn (gen_neon_vdup_lane<mode>_internal (operands[0], operands[1], | |
3612 | operands[2])); | |
3613 | DONE; | |
3614 | }) | |
88f77cba | 3615 | |
b1a970a5 | 3616 | (define_expand "neon_vdup_lane<mode>" |
17a13507 | 3617 | [(match_operand:VHFBF 0 "s_register_operand") |
b1a970a5 MW |
3618 | (match_operand:<V_double_vector_mode> 1 "s_register_operand") |
3619 | (match_operand:SI 2 "immediate_operand")] | |
17a13507 | 3620 | "TARGET_NEON && (TARGET_FP16 || TARGET_BF16_SIMD)" |
b1a970a5 MW |
3621 | { |
3622 | if (BYTES_BIG_ENDIAN) | |
3623 | { | |
3624 | unsigned int elt = INTVAL (operands[2]); | |
3625 | unsigned int reg_nelts | |
3626 | = 64 / GET_MODE_UNIT_BITSIZE (<V_double_vector_mode>mode); | |
3627 | elt ^= reg_nelts - 1; | |
3628 | operands[2] = GEN_INT (elt); | |
3629 | } | |
3630 | emit_insn (gen_neon_vdup_lane<mode>_internal (operands[0], operands[1], | |
3631 | operands[2])); | |
3632 | DONE; | |
3633 | }) | |
3634 | ||
88f77cba JB |
3635 | ; Scalar index is ignored, since only zero is valid here. |
3636 | (define_expand "neon_vdup_lanedi" | |
cd65e265 DZ |
3637 | [(match_operand:DI 0 "s_register_operand") |
3638 | (match_operand:DI 1 "s_register_operand") | |
3639 | (match_operand:SI 2 "immediate_operand")] | |
88f77cba JB |
3640 | "TARGET_NEON" |
3641 | { | |
3642 | emit_move_insn (operands[0], operands[1]); | |
3643 | DONE; | |
3644 | }) | |
3645 | ||
a277dd9b SL |
3646 | ; Likewise for v2di, as the DImode second operand has only a single element. |
3647 | (define_expand "neon_vdup_lanev2di" | |
cd65e265 DZ |
3648 | [(match_operand:V2DI 0 "s_register_operand") |
3649 | (match_operand:DI 1 "s_register_operand") | |
3650 | (match_operand:SI 2 "immediate_operand")] | |
88f77cba | 3651 | "TARGET_NEON" |
b617fc71 | 3652 | { |
a277dd9b SL |
3653 | emit_insn (gen_neon_vdup_nv2di (operands[0], operands[1])); |
3654 | DONE; | |
3655 | }) | |
88f77cba | 3656 | |
b440f324 RH |
3657 | ; Disabled before reload because we don't want combine doing something silly, |
3658 | ; but used by the post-reload expansion of neon_vcombine. | |
3659 | (define_insn "*neon_vswp<mode>" | |
3660 | [(set (match_operand:VDQX 0 "s_register_operand" "+w") | |
3661 | (match_operand:VDQX 1 "s_register_operand" "+w")) | |
3662 | (set (match_dup 1) (match_dup 0))] | |
3663 | "TARGET_NEON && reload_completed" | |
dc2c7a52 | 3664 | "vswp\t%<V_reg>0, %<V_reg>1" |
f7379e5e | 3665 | [(set_attr "type" "neon_permute<q>")] |
b440f324 RH |
3666 | ) |
3667 | ||
88f77cba JB |
3668 | ;; In this insn, operand 1 should be low, and operand 2 the high part of the |
3669 | ;; dest vector. | |
3670 | ;; FIXME: A different implementation of this builtin could make it much | |
3671 | ;; more likely that we wouldn't actually need to output anything (we could make | |
3672 | ;; it so that the reg allocator puts things in the right places magically | |
3673 | ;; instead). Lack of subregs for vectors makes that tricky though, I think. | |
3674 | ||
b440f324 | 3675 | (define_insn_and_split "neon_vcombine<mode>" |
88f77cba | 3676 | [(set (match_operand:<V_DOUBLE> 0 "s_register_operand" "=w") |
b440f324 RH |
3677 | (vec_concat:<V_DOUBLE> |
3678 | (match_operand:VDX 1 "s_register_operand" "w") | |
3679 | (match_operand:VDX 2 "s_register_operand" "w")))] | |
88f77cba | 3680 | "TARGET_NEON" |
b440f324 RH |
3681 | "#" |
3682 | "&& reload_completed" | |
3683 | [(const_int 0)] | |
88f77cba | 3684 | { |
b440f324 RH |
3685 | neon_split_vcombine (operands); |
3686 | DONE; | |
f7379e5e JG |
3687 | } |
3688 | [(set_attr "type" "multiple")] | |
3689 | ) | |
88f77cba | 3690 | |
ddfd2edf RS |
3691 | (define_expand "neon_vget_high<mode>" |
3692 | [(match_operand:<V_HALF> 0 "s_register_operand") | |
2d22ab64 | 3693 | (match_operand:VQXBF 1 "s_register_operand")] |
a277dd9b SL |
3694 | "TARGET_NEON" |
3695 | { | |
ddfd2edf RS |
3696 | emit_move_insn (operands[0], |
3697 | simplify_gen_subreg (<V_HALF>mode, operands[1], <MODE>mode, | |
3698 | GET_MODE_SIZE (<V_HALF>mode))); | |
3699 | DONE; | |
3700 | }) | |
a277dd9b | 3701 | |
ddfd2edf RS |
3702 | (define_expand "neon_vget_low<mode>" |
3703 | [(match_operand:<V_HALF> 0 "s_register_operand") | |
3704 | (match_operand:VQX 1 "s_register_operand")] | |
88f77cba JB |
3705 | "TARGET_NEON" |
3706 | { | |
ddfd2edf RS |
3707 | emit_move_insn (operands[0], |
3708 | simplify_gen_subreg (<V_HALF>mode, operands[1], | |
3709 | <MODE>mode, 0)); | |
3710 | DONE; | |
3711 | }) | |
88f77cba | 3712 | |
5bf4dcf2 DP |
3713 | (define_insn "float<mode><V_cvtto>2" |
3714 | [(set (match_operand:<V_CVTTO> 0 "s_register_operand" "=w") | |
3715 | (float:<V_CVTTO> (match_operand:VCVTI 1 "s_register_operand" "w")))] | |
3716 | "TARGET_NEON && !flag_rounding_math" | |
3717 | "vcvt.f32.s32\t%<V_reg>0, %<V_reg>1" | |
f7379e5e | 3718 | [(set_attr "type" "neon_int_to_fp_<V_elem_ch><q>")] |
5bf4dcf2 DP |
3719 | ) |
3720 | ||
3721 | (define_insn "floatuns<mode><V_cvtto>2" | |
3722 | [(set (match_operand:<V_CVTTO> 0 "s_register_operand" "=w") | |
3723 | (unsigned_float:<V_CVTTO> (match_operand:VCVTI 1 "s_register_operand" "w")))] | |
3724 | "TARGET_NEON && !flag_rounding_math" | |
3725 | "vcvt.f32.u32\t%<V_reg>0, %<V_reg>1" | |
f7379e5e | 3726 | [(set_attr "type" "neon_int_to_fp_<V_elem_ch><q>")] |
5bf4dcf2 DP |
3727 | ) |
3728 | ||
3729 | (define_insn "fix_trunc<mode><V_cvtto>2" | |
3730 | [(set (match_operand:<V_CVTTO> 0 "s_register_operand" "=w") | |
3731 | (fix:<V_CVTTO> (match_operand:VCVTF 1 "s_register_operand" "w")))] | |
3732 | "TARGET_NEON" | |
3733 | "vcvt.s32.f32\t%<V_reg>0, %<V_reg>1" | |
f7379e5e | 3734 | [(set_attr "type" "neon_fp_to_int_<V_elem_ch><q>")] |
5bf4dcf2 DP |
3735 | ) |
3736 | ||
3737 | (define_insn "fixuns_trunc<mode><V_cvtto>2" | |
3738 | [(set (match_operand:<V_CVTTO> 0 "s_register_operand" "=w") | |
3739 | (unsigned_fix:<V_CVTTO> (match_operand:VCVTF 1 "s_register_operand" "w")))] | |
3740 | "TARGET_NEON" | |
3741 | "vcvt.u32.f32\t%<V_reg>0, %<V_reg>1" | |
f7379e5e | 3742 | [(set_attr "type" "neon_fp_to_int_<V_elem_ch><q>")] |
5bf4dcf2 DP |
3743 | ) |
3744 | ||
94f0f2cc | 3745 | (define_insn "neon_vcvt<sup><mode>" |
88f77cba | 3746 | [(set (match_operand:<V_CVTTO> 0 "s_register_operand" "=w") |
94f0f2cc JG |
3747 | (unspec:<V_CVTTO> [(match_operand:VCVTF 1 "s_register_operand" "w")] |
3748 | VCVT_US))] | |
88f77cba | 3749 | "TARGET_NEON" |
94f0f2cc | 3750 | "vcvt.<sup>%#32.f32\t%<V_reg>0, %<V_reg>1" |
f7379e5e | 3751 | [(set_attr "type" "neon_fp_to_int_<V_elem_ch><q>")] |
c956e102 | 3752 | ) |
88f77cba | 3753 | |
94f0f2cc | 3754 | (define_insn "neon_vcvt<sup><mode>" |
88f77cba | 3755 | [(set (match_operand:<V_CVTTO> 0 "s_register_operand" "=w") |
94f0f2cc JG |
3756 | (unspec:<V_CVTTO> [(match_operand:VCVTI 1 "s_register_operand" "w")] |
3757 | VCVT_US))] | |
88f77cba | 3758 | "TARGET_NEON" |
94f0f2cc | 3759 | "vcvt.f32.<sup>%#32\t%<V_reg>0, %<V_reg>1" |
f7379e5e | 3760 | [(set_attr "type" "neon_int_to_fp_<V_elem_ch><q>")] |
c956e102 | 3761 | ) |
88f77cba | 3762 | |
5819f96f KT |
3763 | (define_insn "neon_vcvtv4sfv4hf" |
3764 | [(set (match_operand:V4SF 0 "s_register_operand" "=w") | |
3765 | (unspec:V4SF [(match_operand:V4HF 1 "s_register_operand" "w")] | |
3766 | UNSPEC_VCVT))] | |
3767 | "TARGET_NEON && TARGET_FP16" | |
3768 | "vcvt.f32.f16\t%q0, %P1" | |
f7379e5e | 3769 | [(set_attr "type" "neon_fp_cvt_widen_h")] |
5819f96f KT |
3770 | ) |
3771 | ||
3772 | (define_insn "neon_vcvtv4hfv4sf" | |
3773 | [(set (match_operand:V4HF 0 "s_register_operand" "=w") | |
3774 | (unspec:V4HF [(match_operand:V4SF 1 "s_register_operand" "w")] | |
3775 | UNSPEC_VCVT))] | |
3776 | "TARGET_NEON && TARGET_FP16" | |
3777 | "vcvt.f16.f32\t%P0, %q1" | |
f7379e5e | 3778 | [(set_attr "type" "neon_fp_cvt_narrow_s_q")] |
5819f96f KT |
3779 | ) |
3780 | ||
55a9b91b MW |
3781 | (define_insn "neon_vcvt<sup><mode>" |
3782 | [(set | |
3783 | (match_operand:<VH_CVTTO> 0 "s_register_operand" "=w") | |
3784 | (unspec:<VH_CVTTO> | |
3785 | [(match_operand:VCVTHI 1 "s_register_operand" "w")] | |
3786 | VCVT_US))] | |
3787 | "TARGET_NEON_FP16INST" | |
3788 | "vcvt.f16.<sup>%#16\t%<V_reg>0, %<V_reg>1" | |
3789 | [(set_attr "type" "neon_int_to_fp_<VH_elem_ch><q>")] | |
3790 | ) | |
3791 | ||
3792 | (define_insn "neon_vcvt<sup><mode>" | |
3793 | [(set | |
3794 | (match_operand:<VH_CVTTO> 0 "s_register_operand" "=w") | |
3795 | (unspec:<VH_CVTTO> | |
3796 | [(match_operand:VH 1 "s_register_operand" "w")] | |
3797 | VCVT_US))] | |
3798 | "TARGET_NEON_FP16INST" | |
3799 | "vcvt.<sup>%#16.f16\t%<V_reg>0, %<V_reg>1" | |
3800 | [(set_attr "type" "neon_fp_to_int_<VH_elem_ch><q>")] | |
3801 | ) | |
3802 | ||
94f0f2cc | 3803 | (define_insn "neon_vcvt<sup>_n<mode>" |
88f77cba JB |
3804 | [(set (match_operand:<V_CVTTO> 0 "s_register_operand" "=w") |
3805 | (unspec:<V_CVTTO> [(match_operand:VCVTF 1 "s_register_operand" "w") | |
94f0f2cc JG |
3806 | (match_operand:SI 2 "immediate_operand" "i")] |
3807 | VCVT_US_N))] | |
88f77cba | 3808 | "TARGET_NEON" |
b617fc71 | 3809 | { |
d57daa0c | 3810 | arm_const_bounds (operands[2], 1, 33); |
94f0f2cc | 3811 | return "vcvt.<sup>%#32.f32\t%<V_reg>0, %<V_reg>1, %2"; |
b617fc71 | 3812 | } |
f7379e5e | 3813 | [(set_attr "type" "neon_fp_to_int_<V_elem_ch><q>")] |
c956e102 | 3814 | ) |
88f77cba | 3815 | |
55a9b91b MW |
3816 | (define_insn "neon_vcvt<sup>_n<mode>" |
3817 | [(set (match_operand:<VH_CVTTO> 0 "s_register_operand" "=w") | |
3818 | (unspec:<VH_CVTTO> | |
3819 | [(match_operand:VH 1 "s_register_operand" "w") | |
3820 | (match_operand:SI 2 "immediate_operand" "i")] | |
3821 | VCVT_US_N))] | |
3822 | "TARGET_NEON_FP16INST" | |
3823 | { | |
d57daa0c | 3824 | arm_const_bounds (operands[2], 0, 17); |
55a9b91b MW |
3825 | return "vcvt.<sup>%#16.f16\t%<V_reg>0, %<V_reg>1, %2"; |
3826 | } | |
3827 | [(set_attr "type" "neon_fp_to_int_<VH_elem_ch><q>")] | |
3828 | ) | |
3829 | ||
94f0f2cc | 3830 | (define_insn "neon_vcvt<sup>_n<mode>" |
88f77cba JB |
3831 | [(set (match_operand:<V_CVTTO> 0 "s_register_operand" "=w") |
3832 | (unspec:<V_CVTTO> [(match_operand:VCVTI 1 "s_register_operand" "w") | |
94f0f2cc JG |
3833 | (match_operand:SI 2 "immediate_operand" "i")] |
3834 | VCVT_US_N))] | |
88f77cba | 3835 | "TARGET_NEON" |
b617fc71 | 3836 | { |
d57daa0c | 3837 | arm_const_bounds (operands[2], 1, 33); |
94f0f2cc | 3838 | return "vcvt.f32.<sup>%#32\t%<V_reg>0, %<V_reg>1, %2"; |
b617fc71 | 3839 | } |
f7379e5e | 3840 | [(set_attr "type" "neon_int_to_fp_<V_elem_ch><q>")] |
c956e102 | 3841 | ) |
88f77cba | 3842 | |
55a9b91b MW |
3843 | (define_insn "neon_vcvt<sup>_n<mode>" |
3844 | [(set (match_operand:<VH_CVTTO> 0 "s_register_operand" "=w") | |
3845 | (unspec:<VH_CVTTO> | |
3846 | [(match_operand:VCVTHI 1 "s_register_operand" "w") | |
3847 | (match_operand:SI 2 "immediate_operand" "i")] | |
3848 | VCVT_US_N))] | |
3849 | "TARGET_NEON_FP16INST" | |
3850 | { | |
d57daa0c | 3851 | arm_const_bounds (operands[2], 0, 17); |
55a9b91b MW |
3852 | return "vcvt.f16.<sup>%#16\t%<V_reg>0, %<V_reg>1, %2"; |
3853 | } | |
3854 | [(set_attr "type" "neon_int_to_fp_<VH_elem_ch><q>")] | |
3855 | ) | |
3856 | ||
3857 | (define_insn "neon_vcvt<vcvth_op><sup><mode>" | |
3858 | [(set | |
3859 | (match_operand:<VH_CVTTO> 0 "s_register_operand" "=w") | |
3860 | (unspec:<VH_CVTTO> | |
3861 | [(match_operand:VH 1 "s_register_operand" "w")] | |
3862 | VCVT_HF_US))] | |
3863 | "TARGET_NEON_FP16INST" | |
3864 | "vcvt<vcvth_op>.<sup>%#16.f16\t%<V_reg>0, %<V_reg>1" | |
3865 | [(set_attr "type" "neon_fp_to_int_<VH_elem_ch><q>")] | |
3866 | ) | |
3867 | ||
88f77cba JB |
3868 | (define_insn "neon_vmovn<mode>" |
3869 | [(set (match_operand:<V_narrow> 0 "s_register_operand" "=w") | |
94f0f2cc | 3870 | (unspec:<V_narrow> [(match_operand:VN 1 "s_register_operand" "w")] |
88f77cba JB |
3871 | UNSPEC_VMOVN))] |
3872 | "TARGET_NEON" | |
c956e102 | 3873 | "vmovn.<V_if_elem>\t%P0, %q1" |
f7379e5e | 3874 | [(set_attr "type" "neon_shift_imm_narrow_q")] |
c956e102 | 3875 | ) |
88f77cba | 3876 | |
94f0f2cc | 3877 | (define_insn "neon_vqmovn<sup><mode>" |
88f77cba | 3878 | [(set (match_operand:<V_narrow> 0 "s_register_operand" "=w") |
94f0f2cc JG |
3879 | (unspec:<V_narrow> [(match_operand:VN 1 "s_register_operand" "w")] |
3880 | VQMOVN))] | |
88f77cba | 3881 | "TARGET_NEON" |
94f0f2cc | 3882 | "vqmovn.<sup>%#<V_sz_elem>\t%P0, %q1" |
f7379e5e | 3883 | [(set_attr "type" "neon_sat_shift_imm_narrow_q")] |
c956e102 | 3884 | ) |
88f77cba JB |
3885 | |
3886 | (define_insn "neon_vqmovun<mode>" | |
3887 | [(set (match_operand:<V_narrow> 0 "s_register_operand" "=w") | |
94f0f2cc | 3888 | (unspec:<V_narrow> [(match_operand:VN 1 "s_register_operand" "w")] |
88f77cba JB |
3889 | UNSPEC_VQMOVUN))] |
3890 | "TARGET_NEON" | |
c956e102 | 3891 | "vqmovun.<V_s_elem>\t%P0, %q1" |
f7379e5e | 3892 | [(set_attr "type" "neon_sat_shift_imm_narrow_q")] |
c956e102 | 3893 | ) |
88f77cba | 3894 | |
94f0f2cc | 3895 | (define_insn "neon_vmovl<sup><mode>" |
88f77cba | 3896 | [(set (match_operand:<V_widen> 0 "s_register_operand" "=w") |
94f0f2cc JG |
3897 | (unspec:<V_widen> [(match_operand:VW 1 "s_register_operand" "w")] |
3898 | VMOVL))] | |
88f77cba | 3899 | "TARGET_NEON" |
94f0f2cc | 3900 | "vmovl.<sup>%#<V_sz_elem>\t%q0, %P1" |
f7379e5e | 3901 | [(set_attr "type" "neon_shift_imm_long")] |
c956e102 | 3902 | ) |
88f77cba JB |
3903 | |
3904 | (define_insn "neon_vmul_lane<mode>" | |
3905 | [(set (match_operand:VMD 0 "s_register_operand" "=w") | |
3906 | (unspec:VMD [(match_operand:VMD 1 "s_register_operand" "w") | |
3907 | (match_operand:VMD 2 "s_register_operand" | |
3908 | "<scalar_mul_constraint>") | |
94f0f2cc | 3909 | (match_operand:SI 3 "immediate_operand" "i")] |
88f77cba JB |
3910 | UNSPEC_VMUL_LANE))] |
3911 | "TARGET_NEON" | |
b617fc71 | 3912 | { |
b617fc71 JB |
3913 | return "vmul.<V_if_elem>\t%P0, %P1, %P2[%c3]"; |
3914 | } | |
003bb7f3 | 3915 | [(set (attr "type") |
b75b1be2 | 3916 | (if_then_else (match_test "<Is_float_mode>") |
f7379e5e JG |
3917 | (const_string "neon_fp_mul_s_scalar<q>") |
3918 | (const_string "neon_mul_<V_elem_ch>_scalar<q>")))] | |
c956e102 | 3919 | ) |
88f77cba JB |
3920 | |
3921 | (define_insn "neon_vmul_lane<mode>" | |
3922 | [(set (match_operand:VMQ 0 "s_register_operand" "=w") | |
3923 | (unspec:VMQ [(match_operand:VMQ 1 "s_register_operand" "w") | |
3924 | (match_operand:<V_HALF> 2 "s_register_operand" | |
3925 | "<scalar_mul_constraint>") | |
94f0f2cc | 3926 | (match_operand:SI 3 "immediate_operand" "i")] |
88f77cba JB |
3927 | UNSPEC_VMUL_LANE))] |
3928 | "TARGET_NEON" | |
b617fc71 | 3929 | { |
b617fc71 JB |
3930 | return "vmul.<V_if_elem>\t%q0, %q1, %P2[%c3]"; |
3931 | } | |
003bb7f3 | 3932 | [(set (attr "type") |
b75b1be2 | 3933 | (if_then_else (match_test "<Is_float_mode>") |
f7379e5e JG |
3934 | (const_string "neon_fp_mul_s_scalar<q>") |
3935 | (const_string "neon_mul_<V_elem_ch>_scalar<q>")))] | |
c956e102 | 3936 | ) |
88f77cba | 3937 | |
55a9b91b MW |
3938 | (define_insn "neon_vmul_lane<mode>" |
3939 | [(set (match_operand:VH 0 "s_register_operand" "=w") | |
3940 | (unspec:VH [(match_operand:VH 1 "s_register_operand" "w") | |
3941 | (match_operand:V4HF 2 "s_register_operand" | |
3942 | "<scalar_mul_constraint>") | |
3943 | (match_operand:SI 3 "immediate_operand" "i")] | |
3944 | UNSPEC_VMUL_LANE))] | |
3945 | "TARGET_NEON_FP16INST" | |
3946 | "vmul.f16\t%<V_reg>0, %<V_reg>1, %P2[%c3]" | |
3947 | [(set_attr "type" "neon_fp_mul_s_scalar<q>")] | |
3948 | ) | |
3949 | ||
94f0f2cc | 3950 | (define_insn "neon_vmull<sup>_lane<mode>" |
88f77cba JB |
3951 | [(set (match_operand:<V_widen> 0 "s_register_operand" "=w") |
3952 | (unspec:<V_widen> [(match_operand:VMDI 1 "s_register_operand" "w") | |
3953 | (match_operand:VMDI 2 "s_register_operand" | |
3954 | "<scalar_mul_constraint>") | |
94f0f2cc JG |
3955 | (match_operand:SI 3 "immediate_operand" "i")] |
3956 | VMULL_LANE))] | |
88f77cba | 3957 | "TARGET_NEON" |
b617fc71 | 3958 | { |
94f0f2cc | 3959 | return "vmull.<sup>%#<V_sz_elem>\t%q0, %P1, %P2[%c3]"; |
b617fc71 | 3960 | } |
f7379e5e | 3961 | [(set_attr "type" "neon_mul_<V_elem_ch>_scalar_long")] |
c956e102 | 3962 | ) |
88f77cba JB |
3963 | |
3964 | (define_insn "neon_vqdmull_lane<mode>" | |
3965 | [(set (match_operand:<V_widen> 0 "s_register_operand" "=w") | |
3966 | (unspec:<V_widen> [(match_operand:VMDI 1 "s_register_operand" "w") | |
3967 | (match_operand:VMDI 2 "s_register_operand" | |
3968 | "<scalar_mul_constraint>") | |
94f0f2cc | 3969 | (match_operand:SI 3 "immediate_operand" "i")] |
88f77cba JB |
3970 | UNSPEC_VQDMULL_LANE))] |
3971 | "TARGET_NEON" | |
b617fc71 | 3972 | { |
b617fc71 JB |
3973 | return "vqdmull.<V_s_elem>\t%q0, %P1, %P2[%c3]"; |
3974 | } | |
f7379e5e | 3975 | [(set_attr "type" "neon_sat_mul_<V_elem_ch>_scalar_long")] |
c956e102 | 3976 | ) |
88f77cba | 3977 | |
94f0f2cc | 3978 | (define_insn "neon_vq<r>dmulh_lane<mode>" |
88f77cba JB |
3979 | [(set (match_operand:VMQI 0 "s_register_operand" "=w") |
3980 | (unspec:VMQI [(match_operand:VMQI 1 "s_register_operand" "w") | |
3981 | (match_operand:<V_HALF> 2 "s_register_operand" | |
3982 | "<scalar_mul_constraint>") | |
94f0f2cc JG |
3983 | (match_operand:SI 3 "immediate_operand" "i")] |
3984 | VQDMULH_LANE))] | |
88f77cba | 3985 | "TARGET_NEON" |
b617fc71 | 3986 | { |
94f0f2cc | 3987 | return "vq<r>dmulh.<V_s_elem>\t%q0, %q1, %P2[%c3]"; |
b617fc71 | 3988 | } |
f7379e5e | 3989 | [(set_attr "type" "neon_sat_mul_<V_elem_ch>_scalar_q")] |
c956e102 | 3990 | ) |
88f77cba | 3991 | |
94f0f2cc | 3992 | (define_insn "neon_vq<r>dmulh_lane<mode>" |
88f77cba JB |
3993 | [(set (match_operand:VMDI 0 "s_register_operand" "=w") |
3994 | (unspec:VMDI [(match_operand:VMDI 1 "s_register_operand" "w") | |
3995 | (match_operand:VMDI 2 "s_register_operand" | |
3996 | "<scalar_mul_constraint>") | |
94f0f2cc JG |
3997 | (match_operand:SI 3 "immediate_operand" "i")] |
3998 | VQDMULH_LANE))] | |
88f77cba | 3999 | "TARGET_NEON" |
b617fc71 | 4000 | { |
94f0f2cc | 4001 | return "vq<r>dmulh.<V_s_elem>\t%P0, %P1, %P2[%c3]"; |
b617fc71 | 4002 | } |
f7379e5e | 4003 | [(set_attr "type" "neon_sat_mul_<V_elem_ch>_scalar_q")] |
c956e102 | 4004 | ) |
88f77cba | 4005 | |
5f2ca3b2 MW |
4006 | ;; vqrdmlah_lane, vqrdmlsh_lane |
4007 | (define_insn "neon_vqrdml<VQRDMLH_AS:neon_rdma_as>h_lane<mode>" | |
4008 | [(set (match_operand:VMQI 0 "s_register_operand" "=w") | |
4009 | (unspec:VMQI [(match_operand:VMQI 1 "s_register_operand" "0") | |
4010 | (match_operand:VMQI 2 "s_register_operand" "w") | |
4011 | (match_operand:<V_HALF> 3 "s_register_operand" | |
4012 | "<scalar_mul_constraint>") | |
4013 | (match_operand:SI 4 "immediate_operand" "i")] | |
4014 | VQRDMLH_AS))] | |
4015 | "TARGET_NEON_RDMA" | |
4016 | { | |
4017 | return | |
4018 | "vqrdml<VQRDMLH_AS:neon_rdma_as>h.<V_s_elem>\t%q0, %q2, %P3[%c4]"; | |
4019 | } | |
4020 | [(set_attr "type" "neon_mla_<V_elem_ch>_scalar<q>")] | |
4021 | ) | |
4022 | ||
4023 | (define_insn "neon_vqrdml<VQRDMLH_AS:neon_rdma_as>h_lane<mode>" | |
4024 | [(set (match_operand:VMDI 0 "s_register_operand" "=w") | |
4025 | (unspec:VMDI [(match_operand:VMDI 1 "s_register_operand" "0") | |
4026 | (match_operand:VMDI 2 "s_register_operand" "w") | |
4027 | (match_operand:VMDI 3 "s_register_operand" | |
4028 | "<scalar_mul_constraint>") | |
4029 | (match_operand:SI 4 "immediate_operand" "i")] | |
4030 | VQRDMLH_AS))] | |
4031 | "TARGET_NEON_RDMA" | |
4032 | { | |
4033 | return | |
4034 | "vqrdml<VQRDMLH_AS:neon_rdma_as>h.<V_s_elem>\t%P0, %P2, %P3[%c4]"; | |
4035 | } | |
4036 | [(set_attr "type" "neon_mla_<V_elem_ch>_scalar")] | |
4037 | ) | |
4038 | ||
88f77cba JB |
4039 | (define_insn "neon_vmla_lane<mode>" |
4040 | [(set (match_operand:VMD 0 "s_register_operand" "=w") | |
4041 | (unspec:VMD [(match_operand:VMD 1 "s_register_operand" "0") | |
4042 | (match_operand:VMD 2 "s_register_operand" "w") | |
4043 | (match_operand:VMD 3 "s_register_operand" | |
4044 | "<scalar_mul_constraint>") | |
94f0f2cc | 4045 | (match_operand:SI 4 "immediate_operand" "i")] |
88f77cba JB |
4046 | UNSPEC_VMLA_LANE))] |
4047 | "TARGET_NEON" | |
b617fc71 | 4048 | { |
b617fc71 JB |
4049 | return "vmla.<V_if_elem>\t%P0, %P2, %P3[%c4]"; |
4050 | } | |
003bb7f3 | 4051 | [(set (attr "type") |
b75b1be2 | 4052 | (if_then_else (match_test "<Is_float_mode>") |
f7379e5e JG |
4053 | (const_string "neon_fp_mla_s_scalar<q>") |
4054 | (const_string "neon_mla_<V_elem_ch>_scalar<q>")))] | |
c956e102 | 4055 | ) |
88f77cba JB |
4056 | |
4057 | (define_insn "neon_vmla_lane<mode>" | |
4058 | [(set (match_operand:VMQ 0 "s_register_operand" "=w") | |
4059 | (unspec:VMQ [(match_operand:VMQ 1 "s_register_operand" "0") | |
4060 | (match_operand:VMQ 2 "s_register_operand" "w") | |
4061 | (match_operand:<V_HALF> 3 "s_register_operand" | |
4062 | "<scalar_mul_constraint>") | |
94f0f2cc | 4063 | (match_operand:SI 4 "immediate_operand" "i")] |
88f77cba JB |
4064 | UNSPEC_VMLA_LANE))] |
4065 | "TARGET_NEON" | |
b617fc71 | 4066 | { |
b617fc71 JB |
4067 | return "vmla.<V_if_elem>\t%q0, %q2, %P3[%c4]"; |
4068 | } | |
003bb7f3 | 4069 | [(set (attr "type") |
b75b1be2 | 4070 | (if_then_else (match_test "<Is_float_mode>") |
f7379e5e JG |
4071 | (const_string "neon_fp_mla_s_scalar<q>") |
4072 | (const_string "neon_mla_<V_elem_ch>_scalar<q>")))] | |
c956e102 | 4073 | ) |
88f77cba | 4074 | |
94f0f2cc | 4075 | (define_insn "neon_vmlal<sup>_lane<mode>" |
88f77cba JB |
4076 | [(set (match_operand:<V_widen> 0 "s_register_operand" "=w") |
4077 | (unspec:<V_widen> [(match_operand:<V_widen> 1 "s_register_operand" "0") | |
4078 | (match_operand:VMDI 2 "s_register_operand" "w") | |
4079 | (match_operand:VMDI 3 "s_register_operand" | |
4080 | "<scalar_mul_constraint>") | |
94f0f2cc JG |
4081 | (match_operand:SI 4 "immediate_operand" "i")] |
4082 | VMLAL_LANE))] | |
88f77cba | 4083 | "TARGET_NEON" |
b617fc71 | 4084 | { |
94f0f2cc | 4085 | return "vmlal.<sup>%#<V_sz_elem>\t%q0, %P2, %P3[%c4]"; |
b617fc71 | 4086 | } |
f7379e5e | 4087 | [(set_attr "type" "neon_mla_<V_elem_ch>_scalar_long")] |
c956e102 | 4088 | ) |
88f77cba JB |
4089 | |
4090 | (define_insn "neon_vqdmlal_lane<mode>" | |
4091 | [(set (match_operand:<V_widen> 0 "s_register_operand" "=w") | |
4092 | (unspec:<V_widen> [(match_operand:<V_widen> 1 "s_register_operand" "0") | |
4093 | (match_operand:VMDI 2 "s_register_operand" "w") | |
4094 | (match_operand:VMDI 3 "s_register_operand" | |
4095 | "<scalar_mul_constraint>") | |
94f0f2cc | 4096 | (match_operand:SI 4 "immediate_operand" "i")] |
88f77cba JB |
4097 | UNSPEC_VQDMLAL_LANE))] |
4098 | "TARGET_NEON" | |
b617fc71 | 4099 | { |
b617fc71 JB |
4100 | return "vqdmlal.<V_s_elem>\t%q0, %P2, %P3[%c4]"; |
4101 | } | |
f7379e5e | 4102 | [(set_attr "type" "neon_sat_mla_<V_elem_ch>_scalar_long")] |
c956e102 | 4103 | ) |
88f77cba JB |
4104 | |
4105 | (define_insn "neon_vmls_lane<mode>" | |
4106 | [(set (match_operand:VMD 0 "s_register_operand" "=w") | |
4107 | (unspec:VMD [(match_operand:VMD 1 "s_register_operand" "0") | |
4108 | (match_operand:VMD 2 "s_register_operand" "w") | |
4109 | (match_operand:VMD 3 "s_register_operand" | |
4110 | "<scalar_mul_constraint>") | |
94f0f2cc | 4111 | (match_operand:SI 4 "immediate_operand" "i")] |
88f77cba JB |
4112 | UNSPEC_VMLS_LANE))] |
4113 | "TARGET_NEON" | |
b617fc71 | 4114 | { |
b617fc71 JB |
4115 | return "vmls.<V_if_elem>\t%P0, %P2, %P3[%c4]"; |
4116 | } | |
003bb7f3 | 4117 | [(set (attr "type") |
b75b1be2 | 4118 | (if_then_else (match_test "<Is_float_mode>") |
f7379e5e JG |
4119 | (const_string "neon_fp_mla_s_scalar<q>") |
4120 | (const_string "neon_mla_<V_elem_ch>_scalar<q>")))] | |
c956e102 | 4121 | ) |
88f77cba JB |
4122 | |
4123 | (define_insn "neon_vmls_lane<mode>" | |
4124 | [(set (match_operand:VMQ 0 "s_register_operand" "=w") | |
4125 | (unspec:VMQ [(match_operand:VMQ 1 "s_register_operand" "0") | |
4126 | (match_operand:VMQ 2 "s_register_operand" "w") | |
4127 | (match_operand:<V_HALF> 3 "s_register_operand" | |
4128 | "<scalar_mul_constraint>") | |
94f0f2cc | 4129 | (match_operand:SI 4 "immediate_operand" "i")] |
88f77cba JB |
4130 | UNSPEC_VMLS_LANE))] |
4131 | "TARGET_NEON" | |
b617fc71 | 4132 | { |
b617fc71 JB |
4133 | return "vmls.<V_if_elem>\t%q0, %q2, %P3[%c4]"; |
4134 | } | |
003bb7f3 | 4135 | [(set (attr "type") |
b75b1be2 | 4136 | (if_then_else (match_test "<Is_float_mode>") |
f7379e5e JG |
4137 | (const_string "neon_fp_mla_s_scalar<q>") |
4138 | (const_string "neon_mla_<V_elem_ch>_scalar<q>")))] | |
c956e102 | 4139 | ) |
88f77cba | 4140 | |
94f0f2cc | 4141 | (define_insn "neon_vmlsl<sup>_lane<mode>" |
88f77cba JB |
4142 | [(set (match_operand:<V_widen> 0 "s_register_operand" "=w") |
4143 | (unspec:<V_widen> [(match_operand:<V_widen> 1 "s_register_operand" "0") | |
4144 | (match_operand:VMDI 2 "s_register_operand" "w") | |
4145 | (match_operand:VMDI 3 "s_register_operand" | |
4146 | "<scalar_mul_constraint>") | |
94f0f2cc JG |
4147 | (match_operand:SI 4 "immediate_operand" "i")] |
4148 | VMLSL_LANE))] | |
88f77cba | 4149 | "TARGET_NEON" |
b617fc71 | 4150 | { |
94f0f2cc | 4151 | return "vmlsl.<sup>%#<V_sz_elem>\t%q0, %P2, %P3[%c4]"; |
b617fc71 | 4152 | } |
f7379e5e | 4153 | [(set_attr "type" "neon_mla_<V_elem_ch>_scalar_long")] |
c956e102 | 4154 | ) |
88f77cba JB |
4155 | |
4156 | (define_insn "neon_vqdmlsl_lane<mode>" | |
4157 | [(set (match_operand:<V_widen> 0 "s_register_operand" "=w") | |
4158 | (unspec:<V_widen> [(match_operand:<V_widen> 1 "s_register_operand" "0") | |
4159 | (match_operand:VMDI 2 "s_register_operand" "w") | |
4160 | (match_operand:VMDI 3 "s_register_operand" | |
4161 | "<scalar_mul_constraint>") | |
94f0f2cc | 4162 | (match_operand:SI 4 "immediate_operand" "i")] |
88f77cba JB |
4163 | UNSPEC_VQDMLSL_LANE))] |
4164 | "TARGET_NEON" | |
b617fc71 | 4165 | { |
b617fc71 JB |
4166 | return "vqdmlsl.<V_s_elem>\t%q0, %P2, %P3[%c4]"; |
4167 | } | |
f7379e5e | 4168 | [(set_attr "type" "neon_sat_mla_<V_elem_ch>_scalar_long")] |
c956e102 | 4169 | ) |
88f77cba JB |
4170 | |
4171 | ; FIXME: For the "_n" multiply/multiply-accumulate insns, we copy a value in a | |
4172 | ; core register into a temp register, then use a scalar taken from that. This | |
4173 | ; isn't an optimal solution if e.g. the scalar has just been read from memory | |
4174 | ; or extracted from another vector. The latter case it's currently better to | |
4175 | ; use the "_lane" variant, and the former case can probably be implemented | |
4176 | ; using vld1_lane, but that hasn't been done yet. | |
4177 | ||
4178 | (define_expand "neon_vmul_n<mode>" | |
cd65e265 DZ |
4179 | [(match_operand:VMD 0 "s_register_operand") |
4180 | (match_operand:VMD 1 "s_register_operand") | |
4181 | (match_operand:<V_elem> 2 "s_register_operand")] | |
88f77cba JB |
4182 | "TARGET_NEON" |
4183 | { | |
4184 | rtx tmp = gen_reg_rtx (<MODE>mode); | |
4185 | emit_insn (gen_neon_vset_lane<mode> (tmp, operands[2], tmp, const0_rtx)); | |
4186 | emit_insn (gen_neon_vmul_lane<mode> (operands[0], operands[1], tmp, | |
94f0f2cc | 4187 | const0_rtx)); |
88f77cba JB |
4188 | DONE; |
4189 | }) | |
4190 | ||
4191 | (define_expand "neon_vmul_n<mode>" | |
cd65e265 DZ |
4192 | [(match_operand:VMQ 0 "s_register_operand") |
4193 | (match_operand:VMQ 1 "s_register_operand") | |
4194 | (match_operand:<V_elem> 2 "s_register_operand")] | |
88f77cba JB |
4195 | "TARGET_NEON" |
4196 | { | |
4197 | rtx tmp = gen_reg_rtx (<V_HALF>mode); | |
4198 | emit_insn (gen_neon_vset_lane<V_half> (tmp, operands[2], tmp, const0_rtx)); | |
4199 | emit_insn (gen_neon_vmul_lane<mode> (operands[0], operands[1], tmp, | |
94f0f2cc | 4200 | const0_rtx)); |
88f77cba JB |
4201 | DONE; |
4202 | }) | |
4203 | ||
55a9b91b MW |
4204 | (define_expand "neon_vmul_n<mode>" |
4205 | [(match_operand:VH 0 "s_register_operand") | |
4206 | (match_operand:VH 1 "s_register_operand") | |
4207 | (match_operand:<V_elem> 2 "s_register_operand")] | |
4208 | "TARGET_NEON_FP16INST" | |
4209 | { | |
4210 | rtx tmp = gen_reg_rtx (V4HFmode); | |
4211 | emit_insn (gen_neon_vset_lanev4hf (tmp, operands[2], tmp, const0_rtx)); | |
4212 | emit_insn (gen_neon_vmul_lane<mode> (operands[0], operands[1], tmp, | |
4213 | const0_rtx)); | |
4214 | DONE; | |
4215 | }) | |
4216 | ||
94f0f2cc | 4217 | (define_expand "neon_vmulls_n<mode>" |
cd65e265 DZ |
4218 | [(match_operand:<V_widen> 0 "s_register_operand") |
4219 | (match_operand:VMDI 1 "s_register_operand") | |
4220 | (match_operand:<V_elem> 2 "s_register_operand")] | |
88f77cba JB |
4221 | "TARGET_NEON" |
4222 | { | |
4223 | rtx tmp = gen_reg_rtx (<MODE>mode); | |
4224 | emit_insn (gen_neon_vset_lane<mode> (tmp, operands[2], tmp, const0_rtx)); | |
94f0f2cc JG |
4225 | emit_insn (gen_neon_vmulls_lane<mode> (operands[0], operands[1], tmp, |
4226 | const0_rtx)); | |
4227 | DONE; | |
4228 | }) | |
4229 | ||
4230 | (define_expand "neon_vmullu_n<mode>" | |
cd65e265 DZ |
4231 | [(match_operand:<V_widen> 0 "s_register_operand") |
4232 | (match_operand:VMDI 1 "s_register_operand") | |
4233 | (match_operand:<V_elem> 2 "s_register_operand")] | |
94f0f2cc JG |
4234 | "TARGET_NEON" |
4235 | { | |
4236 | rtx tmp = gen_reg_rtx (<MODE>mode); | |
4237 | emit_insn (gen_neon_vset_lane<mode> (tmp, operands[2], tmp, const0_rtx)); | |
4238 | emit_insn (gen_neon_vmullu_lane<mode> (operands[0], operands[1], tmp, | |
4239 | const0_rtx)); | |
88f77cba JB |
4240 | DONE; |
4241 | }) | |
4242 | ||
4243 | (define_expand "neon_vqdmull_n<mode>" | |
cd65e265 DZ |
4244 | [(match_operand:<V_widen> 0 "s_register_operand") |
4245 | (match_operand:VMDI 1 "s_register_operand") | |
4246 | (match_operand:<V_elem> 2 "s_register_operand")] | |
88f77cba JB |
4247 | "TARGET_NEON" |
4248 | { | |
4249 | rtx tmp = gen_reg_rtx (<MODE>mode); | |
4250 | emit_insn (gen_neon_vset_lane<mode> (tmp, operands[2], tmp, const0_rtx)); | |
4251 | emit_insn (gen_neon_vqdmull_lane<mode> (operands[0], operands[1], tmp, | |
94f0f2cc | 4252 | const0_rtx)); |
88f77cba JB |
4253 | DONE; |
4254 | }) | |
4255 | ||
4256 | (define_expand "neon_vqdmulh_n<mode>" | |
cd65e265 DZ |
4257 | [(match_operand:VMDI 0 "s_register_operand") |
4258 | (match_operand:VMDI 1 "s_register_operand") | |
4259 | (match_operand:<V_elem> 2 "s_register_operand")] | |
88f77cba JB |
4260 | "TARGET_NEON" |
4261 | { | |
4262 | rtx tmp = gen_reg_rtx (<MODE>mode); | |
4263 | emit_insn (gen_neon_vset_lane<mode> (tmp, operands[2], tmp, const0_rtx)); | |
4264 | emit_insn (gen_neon_vqdmulh_lane<mode> (operands[0], operands[1], tmp, | |
94f0f2cc JG |
4265 | const0_rtx)); |
4266 | DONE; | |
4267 | }) | |
4268 | ||
4269 | (define_expand "neon_vqrdmulh_n<mode>" | |
cd65e265 DZ |
4270 | [(match_operand:VMDI 0 "s_register_operand") |
4271 | (match_operand:VMDI 1 "s_register_operand") | |
4272 | (match_operand:<V_elem> 2 "s_register_operand")] | |
94f0f2cc JG |
4273 | "TARGET_NEON" |
4274 | { | |
4275 | rtx tmp = gen_reg_rtx (<MODE>mode); | |
4276 | emit_insn (gen_neon_vset_lane<mode> (tmp, operands[2], tmp, const0_rtx)); | |
4277 | emit_insn (gen_neon_vqrdmulh_lane<mode> (operands[0], operands[1], tmp, | |
4278 | const0_rtx)); | |
88f77cba JB |
4279 | DONE; |
4280 | }) | |
4281 | ||
4282 | (define_expand "neon_vqdmulh_n<mode>" | |
cd65e265 DZ |
4283 | [(match_operand:VMQI 0 "s_register_operand") |
4284 | (match_operand:VMQI 1 "s_register_operand") | |
4285 | (match_operand:<V_elem> 2 "s_register_operand")] | |
88f77cba JB |
4286 | "TARGET_NEON" |
4287 | { | |
4288 | rtx tmp = gen_reg_rtx (<V_HALF>mode); | |
4289 | emit_insn (gen_neon_vset_lane<V_half> (tmp, operands[2], tmp, const0_rtx)); | |
4290 | emit_insn (gen_neon_vqdmulh_lane<mode> (operands[0], operands[1], tmp, | |
94f0f2cc JG |
4291 | const0_rtx)); |
4292 | DONE; | |
4293 | }) | |
4294 | ||
4295 | (define_expand "neon_vqrdmulh_n<mode>" | |
cd65e265 DZ |
4296 | [(match_operand:VMQI 0 "s_register_operand") |
4297 | (match_operand:VMQI 1 "s_register_operand") | |
4298 | (match_operand:<V_elem> 2 "s_register_operand")] | |
94f0f2cc JG |
4299 | "TARGET_NEON" |
4300 | { | |
4301 | rtx tmp = gen_reg_rtx (<V_HALF>mode); | |
4302 | emit_insn (gen_neon_vset_lane<V_half> (tmp, operands[2], tmp, const0_rtx)); | |
4303 | emit_insn (gen_neon_vqrdmulh_lane<mode> (operands[0], operands[1], tmp, | |
4304 | const0_rtx)); | |
88f77cba JB |
4305 | DONE; |
4306 | }) | |
4307 | ||
4308 | (define_expand "neon_vmla_n<mode>" | |
cd65e265 DZ |
4309 | [(match_operand:VMD 0 "s_register_operand") |
4310 | (match_operand:VMD 1 "s_register_operand") | |
4311 | (match_operand:VMD 2 "s_register_operand") | |
4312 | (match_operand:<V_elem> 3 "s_register_operand")] | |
88f77cba JB |
4313 | "TARGET_NEON" |
4314 | { | |
4315 | rtx tmp = gen_reg_rtx (<MODE>mode); | |
4316 | emit_insn (gen_neon_vset_lane<mode> (tmp, operands[3], tmp, const0_rtx)); | |
4317 | emit_insn (gen_neon_vmla_lane<mode> (operands[0], operands[1], operands[2], | |
94f0f2cc | 4318 | tmp, const0_rtx)); |
88f77cba JB |
4319 | DONE; |
4320 | }) | |
4321 | ||
4322 | (define_expand "neon_vmla_n<mode>" | |
cd65e265 DZ |
4323 | [(match_operand:VMQ 0 "s_register_operand") |
4324 | (match_operand:VMQ 1 "s_register_operand") | |
4325 | (match_operand:VMQ 2 "s_register_operand") | |
4326 | (match_operand:<V_elem> 3 "s_register_operand")] | |
88f77cba JB |
4327 | "TARGET_NEON" |
4328 | { | |
4329 | rtx tmp = gen_reg_rtx (<V_HALF>mode); | |
4330 | emit_insn (gen_neon_vset_lane<V_half> (tmp, operands[3], tmp, const0_rtx)); | |
4331 | emit_insn (gen_neon_vmla_lane<mode> (operands[0], operands[1], operands[2], | |
94f0f2cc | 4332 | tmp, const0_rtx)); |
88f77cba JB |
4333 | DONE; |
4334 | }) | |
4335 | ||
94f0f2cc | 4336 | (define_expand "neon_vmlals_n<mode>" |
cd65e265 DZ |
4337 | [(match_operand:<V_widen> 0 "s_register_operand") |
4338 | (match_operand:<V_widen> 1 "s_register_operand") | |
4339 | (match_operand:VMDI 2 "s_register_operand") | |
4340 | (match_operand:<V_elem> 3 "s_register_operand")] | |
88f77cba JB |
4341 | "TARGET_NEON" |
4342 | { | |
4343 | rtx tmp = gen_reg_rtx (<MODE>mode); | |
4344 | emit_insn (gen_neon_vset_lane<mode> (tmp, operands[3], tmp, const0_rtx)); | |
94f0f2cc JG |
4345 | emit_insn (gen_neon_vmlals_lane<mode> (operands[0], operands[1], operands[2], |
4346 | tmp, const0_rtx)); | |
4347 | DONE; | |
4348 | }) | |
4349 | ||
4350 | (define_expand "neon_vmlalu_n<mode>" | |
cd65e265 DZ |
4351 | [(match_operand:<V_widen> 0 "s_register_operand") |
4352 | (match_operand:<V_widen> 1 "s_register_operand") | |
4353 | (match_operand:VMDI 2 "s_register_operand") | |
4354 | (match_operand:<V_elem> 3 "s_register_operand")] | |
94f0f2cc JG |
4355 | "TARGET_NEON" |
4356 | { | |
4357 | rtx tmp = gen_reg_rtx (<MODE>mode); | |
4358 | emit_insn (gen_neon_vset_lane<mode> (tmp, operands[3], tmp, const0_rtx)); | |
4359 | emit_insn (gen_neon_vmlalu_lane<mode> (operands[0], operands[1], operands[2], | |
4360 | tmp, const0_rtx)); | |
88f77cba JB |
4361 | DONE; |
4362 | }) | |
4363 | ||
4364 | (define_expand "neon_vqdmlal_n<mode>" | |
cd65e265 DZ |
4365 | [(match_operand:<V_widen> 0 "s_register_operand") |
4366 | (match_operand:<V_widen> 1 "s_register_operand") | |
4367 | (match_operand:VMDI 2 "s_register_operand") | |
4368 | (match_operand:<V_elem> 3 "s_register_operand")] | |
88f77cba JB |
4369 | "TARGET_NEON" |
4370 | { | |
4371 | rtx tmp = gen_reg_rtx (<MODE>mode); | |
4372 | emit_insn (gen_neon_vset_lane<mode> (tmp, operands[3], tmp, const0_rtx)); | |
4373 | emit_insn (gen_neon_vqdmlal_lane<mode> (operands[0], operands[1], operands[2], | |
94f0f2cc | 4374 | tmp, const0_rtx)); |
88f77cba JB |
4375 | DONE; |
4376 | }) | |
4377 | ||
4378 | (define_expand "neon_vmls_n<mode>" | |
cd65e265 DZ |
4379 | [(match_operand:VMD 0 "s_register_operand") |
4380 | (match_operand:VMD 1 "s_register_operand") | |
4381 | (match_operand:VMD 2 "s_register_operand") | |
4382 | (match_operand:<V_elem> 3 "s_register_operand")] | |
88f77cba JB |
4383 | "TARGET_NEON" |
4384 | { | |
4385 | rtx tmp = gen_reg_rtx (<MODE>mode); | |
4386 | emit_insn (gen_neon_vset_lane<mode> (tmp, operands[3], tmp, const0_rtx)); | |
4387 | emit_insn (gen_neon_vmls_lane<mode> (operands[0], operands[1], operands[2], | |
94f0f2cc | 4388 | tmp, const0_rtx)); |
88f77cba JB |
4389 | DONE; |
4390 | }) | |
4391 | ||
4392 | (define_expand "neon_vmls_n<mode>" | |
cd65e265 DZ |
4393 | [(match_operand:VMQ 0 "s_register_operand") |
4394 | (match_operand:VMQ 1 "s_register_operand") | |
4395 | (match_operand:VMQ 2 "s_register_operand") | |
4396 | (match_operand:<V_elem> 3 "s_register_operand")] | |
88f77cba JB |
4397 | "TARGET_NEON" |
4398 | { | |
4399 | rtx tmp = gen_reg_rtx (<V_HALF>mode); | |
4400 | emit_insn (gen_neon_vset_lane<V_half> (tmp, operands[3], tmp, const0_rtx)); | |
4401 | emit_insn (gen_neon_vmls_lane<mode> (operands[0], operands[1], operands[2], | |
94f0f2cc JG |
4402 | tmp, const0_rtx)); |
4403 | DONE; | |
4404 | }) | |
4405 | ||
4406 | (define_expand "neon_vmlsls_n<mode>" | |
cd65e265 DZ |
4407 | [(match_operand:<V_widen> 0 "s_register_operand") |
4408 | (match_operand:<V_widen> 1 "s_register_operand") | |
4409 | (match_operand:VMDI 2 "s_register_operand") | |
4410 | (match_operand:<V_elem> 3 "s_register_operand")] | |
94f0f2cc JG |
4411 | "TARGET_NEON" |
4412 | { | |
4413 | rtx tmp = gen_reg_rtx (<MODE>mode); | |
4414 | emit_insn (gen_neon_vset_lane<mode> (tmp, operands[3], tmp, const0_rtx)); | |
4415 | emit_insn (gen_neon_vmlsls_lane<mode> (operands[0], operands[1], operands[2], | |
4416 | tmp, const0_rtx)); | |
88f77cba JB |
4417 | DONE; |
4418 | }) | |
4419 | ||
94f0f2cc | 4420 | (define_expand "neon_vmlslu_n<mode>" |
cd65e265 DZ |
4421 | [(match_operand:<V_widen> 0 "s_register_operand") |
4422 | (match_operand:<V_widen> 1 "s_register_operand") | |
4423 | (match_operand:VMDI 2 "s_register_operand") | |
4424 | (match_operand:<V_elem> 3 "s_register_operand")] | |
88f77cba JB |
4425 | "TARGET_NEON" |
4426 | { | |
4427 | rtx tmp = gen_reg_rtx (<MODE>mode); | |
4428 | emit_insn (gen_neon_vset_lane<mode> (tmp, operands[3], tmp, const0_rtx)); | |
94f0f2cc JG |
4429 | emit_insn (gen_neon_vmlslu_lane<mode> (operands[0], operands[1], operands[2], |
4430 | tmp, const0_rtx)); | |
88f77cba JB |
4431 | DONE; |
4432 | }) | |
4433 | ||
4434 | (define_expand "neon_vqdmlsl_n<mode>" | |
cd65e265 DZ |
4435 | [(match_operand:<V_widen> 0 "s_register_operand") |
4436 | (match_operand:<V_widen> 1 "s_register_operand") | |
4437 | (match_operand:VMDI 2 "s_register_operand") | |
4438 | (match_operand:<V_elem> 3 "s_register_operand")] | |
88f77cba JB |
4439 | "TARGET_NEON" |
4440 | { | |
4441 | rtx tmp = gen_reg_rtx (<MODE>mode); | |
4442 | emit_insn (gen_neon_vset_lane<mode> (tmp, operands[3], tmp, const0_rtx)); | |
4443 | emit_insn (gen_neon_vqdmlsl_lane<mode> (operands[0], operands[1], operands[2], | |
94f0f2cc | 4444 | tmp, const0_rtx)); |
88f77cba JB |
4445 | DONE; |
4446 | }) | |
4447 | ||
ff03f2d1 | 4448 | (define_insn "@neon_vext<mode>" |
88f77cba JB |
4449 | [(set (match_operand:VDQX 0 "s_register_operand" "=w") |
4450 | (unspec:VDQX [(match_operand:VDQX 1 "s_register_operand" "w") | |
4451 | (match_operand:VDQX 2 "s_register_operand" "w") | |
4452 | (match_operand:SI 3 "immediate_operand" "i")] | |
4453 | UNSPEC_VEXT))] | |
4454 | "TARGET_NEON" | |
b617fc71 | 4455 | { |
d57daa0c | 4456 | arm_const_bounds (operands[3], 0, GET_MODE_NUNITS (<MODE>mode)); |
b617fc71 JB |
4457 | return "vext.<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2, %3"; |
4458 | } | |
f7379e5e | 4459 | [(set_attr "type" "neon_ext<q>")] |
c956e102 | 4460 | ) |
88f77cba | 4461 | |
ff03f2d1 | 4462 | (define_insn "@neon_vrev64<mode>" |
88f77cba | 4463 | [(set (match_operand:VDQ 0 "s_register_operand" "=w") |
94f0f2cc | 4464 | (unspec:VDQ [(match_operand:VDQ 1 "s_register_operand" "w")] |
88f77cba JB |
4465 | UNSPEC_VREV64))] |
4466 | "TARGET_NEON" | |
c956e102 | 4467 | "vrev64.<V_sz_elem>\t%<V_reg>0, %<V_reg>1" |
f7379e5e | 4468 | [(set_attr "type" "neon_rev<q>")] |
c956e102 | 4469 | ) |
88f77cba | 4470 | |
ff03f2d1 | 4471 | (define_insn "@neon_vrev32<mode>" |
88f77cba | 4472 | [(set (match_operand:VX 0 "s_register_operand" "=w") |
94f0f2cc | 4473 | (unspec:VX [(match_operand:VX 1 "s_register_operand" "w")] |
88f77cba JB |
4474 | UNSPEC_VREV32))] |
4475 | "TARGET_NEON" | |
c956e102 | 4476 | "vrev32.<V_sz_elem>\t%<V_reg>0, %<V_reg>1" |
f7379e5e | 4477 | [(set_attr "type" "neon_rev<q>")] |
c956e102 | 4478 | ) |
88f77cba | 4479 | |
ff03f2d1 | 4480 | (define_insn "@neon_vrev16<mode>" |
88f77cba | 4481 | [(set (match_operand:VE 0 "s_register_operand" "=w") |
94f0f2cc | 4482 | (unspec:VE [(match_operand:VE 1 "s_register_operand" "w")] |
88f77cba JB |
4483 | UNSPEC_VREV16))] |
4484 | "TARGET_NEON" | |
c956e102 | 4485 | "vrev16.<V_sz_elem>\t%<V_reg>0, %<V_reg>1" |
f7379e5e | 4486 | [(set_attr "type" "neon_rev<q>")] |
c956e102 | 4487 | ) |
88f77cba JB |
4488 | |
4489 | ; vbsl_* intrinsics may compile to any of vbsl/vbif/vbit depending on register | |
4490 | ; allocation. For an intrinsic of form: | |
4491 | ; rD = vbsl_* (rS, rN, rM) | |
4492 | ; We can use any of: | |
4493 | ; vbsl rS, rN, rM (if D = S) | |
4494 | ; vbit rD, rN, rS (if D = M, so 1-bits in rS choose bits from rN, else rM) | |
4495 | ; vbif rD, rM, rS (if D = N, so 0-bits in rS choose bits from rM, else rN) | |
4496 | ||
4497 | (define_insn "neon_vbsl<mode>_internal" | |
4498 | [(set (match_operand:VDQX 0 "s_register_operand" "=w,w,w") | |
4499 | (unspec:VDQX [(match_operand:VDQX 1 "s_register_operand" " 0,w,w") | |
4500 | (match_operand:VDQX 2 "s_register_operand" " w,w,0") | |
4501 | (match_operand:VDQX 3 "s_register_operand" " w,0,w")] | |
4502 | UNSPEC_VBSL))] | |
4503 | "TARGET_NEON" | |
4504 | "@ | |
4505 | vbsl\t%<V_reg>0, %<V_reg>2, %<V_reg>3 | |
4506 | vbit\t%<V_reg>0, %<V_reg>2, %<V_reg>1 | |
c956e102 | 4507 | vbif\t%<V_reg>0, %<V_reg>3, %<V_reg>1" |
f7379e5e | 4508 | [(set_attr "type" "neon_bsl<q>")] |
c956e102 | 4509 | ) |
88f77cba | 4510 | |
c2978b34 | 4511 | (define_expand "@neon_vbsl<mode>" |
cd65e265 DZ |
4512 | [(set (match_operand:VDQX 0 "s_register_operand") |
4513 | (unspec:VDQX [(match_operand:<V_cmp_result> 1 "s_register_operand") | |
4514 | (match_operand:VDQX 2 "s_register_operand") | |
4515 | (match_operand:VDQX 3 "s_register_operand")] | |
88f77cba JB |
4516 | UNSPEC_VBSL))] |
4517 | "TARGET_NEON" | |
4518 | { | |
4519 | /* We can't alias operands together if they have different modes. */ | |
4520 | operands[1] = gen_lowpart (<MODE>mode, operands[1]); | |
4521 | }) | |
4522 | ||
94f0f2cc JG |
4523 | ;; vshl, vrshl |
4524 | (define_insn "neon_v<shift_op><sup><mode>" | |
88f77cba JB |
4525 | [(set (match_operand:VDQIX 0 "s_register_operand" "=w") |
4526 | (unspec:VDQIX [(match_operand:VDQIX 1 "s_register_operand" "w") | |
94f0f2cc JG |
4527 | (match_operand:VDQIX 2 "s_register_operand" "w")] |
4528 | VSHL))] | |
88f77cba | 4529 | "TARGET_NEON" |
94f0f2cc | 4530 | "v<shift_op>.<sup>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2" |
f7379e5e | 4531 | [(set_attr "type" "neon_shift_imm<q>")] |
c956e102 | 4532 | ) |
88f77cba | 4533 | |
94f0f2cc JG |
4534 | ;; vqshl, vqrshl |
4535 | (define_insn "neon_v<shift_op><sup><mode>" | |
88f77cba JB |
4536 | [(set (match_operand:VDQIX 0 "s_register_operand" "=w") |
4537 | (unspec:VDQIX [(match_operand:VDQIX 1 "s_register_operand" "w") | |
94f0f2cc JG |
4538 | (match_operand:VDQIX 2 "s_register_operand" "w")] |
4539 | VQSHL))] | |
88f77cba | 4540 | "TARGET_NEON" |
94f0f2cc | 4541 | "v<shift_op>.<sup>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2" |
f7379e5e | 4542 | [(set_attr "type" "neon_sat_shift_imm<q>")] |
c956e102 | 4543 | ) |
88f77cba | 4544 | |
94f0f2cc JG |
4545 | ;; vshr_n, vrshr_n |
4546 | (define_insn "neon_v<shift_op><sup>_n<mode>" | |
88f77cba JB |
4547 | [(set (match_operand:VDQIX 0 "s_register_operand" "=w") |
4548 | (unspec:VDQIX [(match_operand:VDQIX 1 "s_register_operand" "w") | |
94f0f2cc JG |
4549 | (match_operand:SI 2 "immediate_operand" "i")] |
4550 | VSHR_N))] | |
88f77cba | 4551 | "TARGET_NEON" |
b617fc71 | 4552 | { |
d57daa0c | 4553 | arm_const_bounds (operands[2], 1, neon_element_bits (<MODE>mode) + 1); |
94f0f2cc | 4554 | return "v<shift_op>.<sup>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %2"; |
b617fc71 | 4555 | } |
f7379e5e | 4556 | [(set_attr "type" "neon_shift_imm<q>")] |
c956e102 | 4557 | ) |
88f77cba | 4558 | |
94f0f2cc JG |
4559 | ;; vshrn_n, vrshrn_n |
4560 | (define_insn "neon_v<shift_op>_n<mode>" | |
88f77cba JB |
4561 | [(set (match_operand:<V_narrow> 0 "s_register_operand" "=w") |
4562 | (unspec:<V_narrow> [(match_operand:VN 1 "s_register_operand" "w") | |
94f0f2cc JG |
4563 | (match_operand:SI 2 "immediate_operand" "i")] |
4564 | VSHRN_N))] | |
88f77cba | 4565 | "TARGET_NEON" |
b617fc71 | 4566 | { |
d57daa0c | 4567 | arm_const_bounds (operands[2], 1, neon_element_bits (<MODE>mode) / 2 + 1); |
94f0f2cc | 4568 | return "v<shift_op>.<V_if_elem>\t%P0, %q1, %2"; |
b617fc71 | 4569 | } |
f7379e5e | 4570 | [(set_attr "type" "neon_shift_imm_narrow_q")] |
c956e102 | 4571 | ) |
88f77cba | 4572 | |
94f0f2cc JG |
4573 | ;; vqshrn_n, vqrshrn_n |
4574 | (define_insn "neon_v<shift_op><sup>_n<mode>" | |
88f77cba JB |
4575 | [(set (match_operand:<V_narrow> 0 "s_register_operand" "=w") |
4576 | (unspec:<V_narrow> [(match_operand:VN 1 "s_register_operand" "w") | |
94f0f2cc JG |
4577 | (match_operand:SI 2 "immediate_operand" "i")] |
4578 | VQSHRN_N))] | |
88f77cba | 4579 | "TARGET_NEON" |
b617fc71 | 4580 | { |
d57daa0c | 4581 | arm_const_bounds (operands[2], 1, neon_element_bits (<MODE>mode) / 2 + 1); |
94f0f2cc | 4582 | return "v<shift_op>.<sup>%#<V_sz_elem>\t%P0, %q1, %2"; |
b617fc71 | 4583 | } |
f7379e5e | 4584 | [(set_attr "type" "neon_sat_shift_imm_narrow_q")] |
c956e102 | 4585 | ) |
88f77cba | 4586 | |
94f0f2cc JG |
4587 | ;; vqshrun_n, vqrshrun_n |
4588 | (define_insn "neon_v<shift_op>_n<mode>" | |
88f77cba JB |
4589 | [(set (match_operand:<V_narrow> 0 "s_register_operand" "=w") |
4590 | (unspec:<V_narrow> [(match_operand:VN 1 "s_register_operand" "w") | |
94f0f2cc JG |
4591 | (match_operand:SI 2 "immediate_operand" "i")] |
4592 | VQSHRUN_N))] | |
88f77cba | 4593 | "TARGET_NEON" |
b617fc71 | 4594 | { |
d57daa0c | 4595 | arm_const_bounds (operands[2], 1, neon_element_bits (<MODE>mode) / 2 + 1); |
94f0f2cc | 4596 | return "v<shift_op>.<V_s_elem>\t%P0, %q1, %2"; |
b617fc71 | 4597 | } |
f7379e5e | 4598 | [(set_attr "type" "neon_sat_shift_imm_narrow_q")] |
c956e102 | 4599 | ) |
88f77cba JB |
4600 | |
4601 | (define_insn "neon_vshl_n<mode>" | |
4602 | [(set (match_operand:VDQIX 0 "s_register_operand" "=w") | |
4603 | (unspec:VDQIX [(match_operand:VDQIX 1 "s_register_operand" "w") | |
94f0f2cc | 4604 | (match_operand:SI 2 "immediate_operand" "i")] |
88f77cba JB |
4605 | UNSPEC_VSHL_N))] |
4606 | "TARGET_NEON" | |
b617fc71 | 4607 | { |
d57daa0c | 4608 | arm_const_bounds (operands[2], 0, neon_element_bits (<MODE>mode)); |
b617fc71 JB |
4609 | return "vshl.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %2"; |
4610 | } | |
f7379e5e | 4611 | [(set_attr "type" "neon_shift_imm<q>")] |
c956e102 | 4612 | ) |
88f77cba | 4613 | |
94f0f2cc | 4614 | (define_insn "neon_vqshl_<sup>_n<mode>" |
88f77cba JB |
4615 | [(set (match_operand:VDQIX 0 "s_register_operand" "=w") |
4616 | (unspec:VDQIX [(match_operand:VDQIX 1 "s_register_operand" "w") | |
94f0f2cc JG |
4617 | (match_operand:SI 2 "immediate_operand" "i")] |
4618 | VQSHL_N))] | |
88f77cba | 4619 | "TARGET_NEON" |
b617fc71 | 4620 | { |
d57daa0c | 4621 | arm_const_bounds (operands[2], 0, neon_element_bits (<MODE>mode)); |
94f0f2cc | 4622 | return "vqshl.<sup>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %2"; |
b617fc71 | 4623 | } |
f7379e5e | 4624 | [(set_attr "type" "neon_sat_shift_imm<q>")] |
c956e102 | 4625 | ) |
88f77cba JB |
4626 | |
4627 | (define_insn "neon_vqshlu_n<mode>" | |
4628 | [(set (match_operand:VDQIX 0 "s_register_operand" "=w") | |
4629 | (unspec:VDQIX [(match_operand:VDQIX 1 "s_register_operand" "w") | |
94f0f2cc | 4630 | (match_operand:SI 2 "immediate_operand" "i")] |
88f77cba JB |
4631 | UNSPEC_VQSHLU_N))] |
4632 | "TARGET_NEON" | |
b617fc71 | 4633 | { |
d57daa0c | 4634 | arm_const_bounds (operands[2], 0, neon_element_bits (<MODE>mode)); |
94f0f2cc | 4635 | return "vqshlu.<V_s_elem>\t%<V_reg>0, %<V_reg>1, %2"; |
b617fc71 | 4636 | } |
f7379e5e | 4637 | [(set_attr "type" "neon_sat_shift_imm<q>")] |
c956e102 | 4638 | ) |
88f77cba | 4639 | |
94f0f2cc | 4640 | (define_insn "neon_vshll<sup>_n<mode>" |
88f77cba JB |
4641 | [(set (match_operand:<V_widen> 0 "s_register_operand" "=w") |
4642 | (unspec:<V_widen> [(match_operand:VW 1 "s_register_operand" "w") | |
94f0f2cc JG |
4643 | (match_operand:SI 2 "immediate_operand" "i")] |
4644 | VSHLL_N))] | |
88f77cba | 4645 | "TARGET_NEON" |
b617fc71 | 4646 | { |
8cb32ff2 | 4647 | /* The boundaries are: 0 < imm <= size. */ |
d57daa0c | 4648 | arm_const_bounds (operands[2], 0, neon_element_bits (<MODE>mode) + 1); |
94f0f2cc | 4649 | return "vshll.<sup>%#<V_sz_elem>\t%q0, %P1, %2"; |
b617fc71 | 4650 | } |
f7379e5e | 4651 | [(set_attr "type" "neon_shift_imm_long")] |
c956e102 | 4652 | ) |
88f77cba | 4653 | |
94f0f2cc JG |
4654 | ;; vsra_n, vrsra_n |
4655 | (define_insn "neon_v<shift_op><sup>_n<mode>" | |
88f77cba JB |
4656 | [(set (match_operand:VDQIX 0 "s_register_operand" "=w") |
4657 | (unspec:VDQIX [(match_operand:VDQIX 1 "s_register_operand" "0") | |
4658 | (match_operand:VDQIX 2 "s_register_operand" "w") | |
94f0f2cc JG |
4659 | (match_operand:SI 3 "immediate_operand" "i")] |
4660 | VSRA_N))] | |
88f77cba | 4661 | "TARGET_NEON" |
b617fc71 | 4662 | { |
d57daa0c | 4663 | arm_const_bounds (operands[3], 1, neon_element_bits (<MODE>mode) + 1); |
94f0f2cc | 4664 | return "v<shift_op>.<sup>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>2, %3"; |
b617fc71 | 4665 | } |
f7379e5e | 4666 | [(set_attr "type" "neon_shift_acc<q>")] |
c956e102 | 4667 | ) |
88f77cba JB |
4668 | |
4669 | (define_insn "neon_vsri_n<mode>" | |
4670 | [(set (match_operand:VDQIX 0 "s_register_operand" "=w") | |
4671 | (unspec:VDQIX [(match_operand:VDQIX 1 "s_register_operand" "0") | |
4672 | (match_operand:VDQIX 2 "s_register_operand" "w") | |
4673 | (match_operand:SI 3 "immediate_operand" "i")] | |
4674 | UNSPEC_VSRI))] | |
4675 | "TARGET_NEON" | |
b617fc71 | 4676 | { |
d57daa0c | 4677 | arm_const_bounds (operands[3], 1, neon_element_bits (<MODE>mode) + 1); |
b617fc71 JB |
4678 | return "vsri.<V_sz_elem>\t%<V_reg>0, %<V_reg>2, %3"; |
4679 | } | |
f7379e5e | 4680 | [(set_attr "type" "neon_shift_reg<q>")] |
c956e102 | 4681 | ) |
88f77cba JB |
4682 | |
4683 | (define_insn "neon_vsli_n<mode>" | |
4684 | [(set (match_operand:VDQIX 0 "s_register_operand" "=w") | |
4685 | (unspec:VDQIX [(match_operand:VDQIX 1 "s_register_operand" "0") | |
4686 | (match_operand:VDQIX 2 "s_register_operand" "w") | |
4687 | (match_operand:SI 3 "immediate_operand" "i")] | |
4688 | UNSPEC_VSLI))] | |
4689 | "TARGET_NEON" | |
b617fc71 | 4690 | { |
d57daa0c | 4691 | arm_const_bounds (operands[3], 0, neon_element_bits (<MODE>mode)); |
b617fc71 JB |
4692 | return "vsli.<V_sz_elem>\t%<V_reg>0, %<V_reg>2, %3"; |
4693 | } | |
f7379e5e | 4694 | [(set_attr "type" "neon_shift_reg<q>")] |
c956e102 | 4695 | ) |
88f77cba JB |
4696 | |
4697 | (define_insn "neon_vtbl1v8qi" | |
4698 | [(set (match_operand:V8QI 0 "s_register_operand" "=w") | |
4699 | (unspec:V8QI [(match_operand:V8QI 1 "s_register_operand" "w") | |
4700 | (match_operand:V8QI 2 "s_register_operand" "w")] | |
4701 | UNSPEC_VTBL))] | |
4702 | "TARGET_NEON" | |
c956e102 | 4703 | "vtbl.8\t%P0, {%P1}, %P2" |
f7379e5e | 4704 | [(set_attr "type" "neon_tbl1")] |
c956e102 | 4705 | ) |
88f77cba JB |
4706 | |
4707 | (define_insn "neon_vtbl2v8qi" | |
4708 | [(set (match_operand:V8QI 0 "s_register_operand" "=w") | |
4709 | (unspec:V8QI [(match_operand:TI 1 "s_register_operand" "w") | |
4710 | (match_operand:V8QI 2 "s_register_operand" "w")] | |
4711 | UNSPEC_VTBL))] | |
4712 | "TARGET_NEON" | |
4713 | { | |
4714 | rtx ops[4]; | |
4715 | int tabbase = REGNO (operands[1]); | |
4716 | ||
4717 | ops[0] = operands[0]; | |
4718 | ops[1] = gen_rtx_REG (V8QImode, tabbase); | |
4719 | ops[2] = gen_rtx_REG (V8QImode, tabbase + 2); | |
4720 | ops[3] = operands[2]; | |
4721 | output_asm_insn ("vtbl.8\t%P0, {%P1, %P2}, %P3", ops); | |
4722 | ||
4723 | return ""; | |
c956e102 | 4724 | } |
f7379e5e | 4725 | [(set_attr "type" "neon_tbl2")] |
c956e102 | 4726 | ) |
88f77cba JB |
4727 | |
4728 | (define_insn "neon_vtbl3v8qi" | |
4729 | [(set (match_operand:V8QI 0 "s_register_operand" "=w") | |
4730 | (unspec:V8QI [(match_operand:EI 1 "s_register_operand" "w") | |
4731 | (match_operand:V8QI 2 "s_register_operand" "w")] | |
4732 | UNSPEC_VTBL))] | |
4733 | "TARGET_NEON" | |
4734 | { | |
4735 | rtx ops[5]; | |
4736 | int tabbase = REGNO (operands[1]); | |
4737 | ||
4738 | ops[0] = operands[0]; | |
4739 | ops[1] = gen_rtx_REG (V8QImode, tabbase); | |
4740 | ops[2] = gen_rtx_REG (V8QImode, tabbase + 2); | |
4741 | ops[3] = gen_rtx_REG (V8QImode, tabbase + 4); | |
4742 | ops[4] = operands[2]; | |
4743 | output_asm_insn ("vtbl.8\t%P0, {%P1, %P2, %P3}, %P4", ops); | |
4744 | ||
4745 | return ""; | |
c956e102 | 4746 | } |
f7379e5e | 4747 | [(set_attr "type" "neon_tbl3")] |
c956e102 | 4748 | ) |
88f77cba JB |
4749 | |
4750 | (define_insn "neon_vtbl4v8qi" | |
4751 | [(set (match_operand:V8QI 0 "s_register_operand" "=w") | |
4752 | (unspec:V8QI [(match_operand:OI 1 "s_register_operand" "w") | |
4753 | (match_operand:V8QI 2 "s_register_operand" "w")] | |
4754 | UNSPEC_VTBL))] | |
4755 | "TARGET_NEON" | |
4756 | { | |
4757 | rtx ops[6]; | |
4758 | int tabbase = REGNO (operands[1]); | |
4759 | ||
4760 | ops[0] = operands[0]; | |
4761 | ops[1] = gen_rtx_REG (V8QImode, tabbase); | |
4762 | ops[2] = gen_rtx_REG (V8QImode, tabbase + 2); | |
4763 | ops[3] = gen_rtx_REG (V8QImode, tabbase + 4); | |
4764 | ops[4] = gen_rtx_REG (V8QImode, tabbase + 6); | |
4765 | ops[5] = operands[2]; | |
4766 | output_asm_insn ("vtbl.8\t%P0, {%P1, %P2, %P3, %P4}, %P5", ops); | |
4767 | ||
4768 | return ""; | |
c956e102 | 4769 | } |
f7379e5e | 4770 | [(set_attr "type" "neon_tbl4")] |
c956e102 | 4771 | ) |
88f77cba | 4772 | |
b440f324 RH |
4773 | ;; These three are used by the vec_perm infrastructure for V16QImode. |
4774 | (define_insn_and_split "neon_vtbl1v16qi" | |
4775 | [(set (match_operand:V16QI 0 "s_register_operand" "=&w") | |
4776 | (unspec:V16QI [(match_operand:V16QI 1 "s_register_operand" "w") | |
4777 | (match_operand:V16QI 2 "s_register_operand" "w")] | |
4778 | UNSPEC_VTBL))] | |
4779 | "TARGET_NEON" | |
4780 | "#" | |
4781 | "&& reload_completed" | |
4782 | [(const_int 0)] | |
4783 | { | |
4784 | rtx op0, op1, op2, part0, part2; | |
4785 | unsigned ofs; | |
4786 | ||
4787 | op0 = operands[0]; | |
4788 | op1 = gen_lowpart (TImode, operands[1]); | |
4789 | op2 = operands[2]; | |
4790 | ||
4791 | ofs = subreg_lowpart_offset (V8QImode, V16QImode); | |
4792 | part0 = simplify_subreg (V8QImode, op0, V16QImode, ofs); | |
4793 | part2 = simplify_subreg (V8QImode, op2, V16QImode, ofs); | |
4794 | emit_insn (gen_neon_vtbl2v8qi (part0, op1, part2)); | |
4795 | ||
4796 | ofs = subreg_highpart_offset (V8QImode, V16QImode); | |
4797 | part0 = simplify_subreg (V8QImode, op0, V16QImode, ofs); | |
4798 | part2 = simplify_subreg (V8QImode, op2, V16QImode, ofs); | |
4799 | emit_insn (gen_neon_vtbl2v8qi (part0, op1, part2)); | |
4800 | DONE; | |
f7379e5e JG |
4801 | } |
4802 | [(set_attr "type" "multiple")] | |
4803 | ) | |
b440f324 RH |
4804 | |
4805 | (define_insn_and_split "neon_vtbl2v16qi" | |
4806 | [(set (match_operand:V16QI 0 "s_register_operand" "=&w") | |
4807 | (unspec:V16QI [(match_operand:OI 1 "s_register_operand" "w") | |
4808 | (match_operand:V16QI 2 "s_register_operand" "w")] | |
4809 | UNSPEC_VTBL))] | |
4810 | "TARGET_NEON" | |
4811 | "#" | |
4812 | "&& reload_completed" | |
4813 | [(const_int 0)] | |
4814 | { | |
4815 | rtx op0, op1, op2, part0, part2; | |
4816 | unsigned ofs; | |
4817 | ||
4818 | op0 = operands[0]; | |
4819 | op1 = operands[1]; | |
4820 | op2 = operands[2]; | |
4821 | ||
4822 | ofs = subreg_lowpart_offset (V8QImode, V16QImode); | |
4823 | part0 = simplify_subreg (V8QImode, op0, V16QImode, ofs); | |
4824 | part2 = simplify_subreg (V8QImode, op2, V16QImode, ofs); | |
4825 | emit_insn (gen_neon_vtbl2v8qi (part0, op1, part2)); | |
4826 | ||
4827 | ofs = subreg_highpart_offset (V8QImode, V16QImode); | |
4828 | part0 = simplify_subreg (V8QImode, op0, V16QImode, ofs); | |
4829 | part2 = simplify_subreg (V8QImode, op2, V16QImode, ofs); | |
4830 | emit_insn (gen_neon_vtbl2v8qi (part0, op1, part2)); | |
4831 | DONE; | |
f7379e5e JG |
4832 | } |
4833 | [(set_attr "type" "multiple")] | |
4834 | ) | |
b440f324 RH |
4835 | |
4836 | ;; ??? Logically we should extend the regular neon_vcombine pattern to | |
4837 | ;; handle quad-word input modes, producing octa-word output modes. But | |
4838 | ;; that requires us to add support for octa-word vector modes in moves. | |
4839 | ;; That seems overkill for this one use in vec_perm. | |
4840 | (define_insn_and_split "neon_vcombinev16qi" | |
4841 | [(set (match_operand:OI 0 "s_register_operand" "=w") | |
4842 | (unspec:OI [(match_operand:V16QI 1 "s_register_operand" "w") | |
4843 | (match_operand:V16QI 2 "s_register_operand" "w")] | |
4844 | UNSPEC_VCONCAT))] | |
4845 | "TARGET_NEON" | |
4846 | "#" | |
4847 | "&& reload_completed" | |
4848 | [(const_int 0)] | |
4849 | { | |
4850 | neon_split_vcombine (operands); | |
4851 | DONE; | |
f7379e5e JG |
4852 | } |
4853 | [(set_attr "type" "multiple")] | |
4854 | ) | |
b440f324 | 4855 | |
88f77cba JB |
4856 | (define_insn "neon_vtbx1v8qi" |
4857 | [(set (match_operand:V8QI 0 "s_register_operand" "=w") | |
4858 | (unspec:V8QI [(match_operand:V8QI 1 "s_register_operand" "0") | |
4859 | (match_operand:V8QI 2 "s_register_operand" "w") | |
4860 | (match_operand:V8QI 3 "s_register_operand" "w")] | |
4861 | UNSPEC_VTBX))] | |
4862 | "TARGET_NEON" | |
c956e102 | 4863 | "vtbx.8\t%P0, {%P2}, %P3" |
f7379e5e | 4864 | [(set_attr "type" "neon_tbl1")] |
c956e102 | 4865 | ) |
88f77cba JB |
4866 | |
4867 | (define_insn "neon_vtbx2v8qi" | |
4868 | [(set (match_operand:V8QI 0 "s_register_operand" "=w") | |
4869 | (unspec:V8QI [(match_operand:V8QI 1 "s_register_operand" "0") | |
4870 | (match_operand:TI 2 "s_register_operand" "w") | |
4871 | (match_operand:V8QI 3 "s_register_operand" "w")] | |
4872 | UNSPEC_VTBX))] | |
4873 | "TARGET_NEON" | |
4874 | { | |
4875 | rtx ops[4]; | |
4876 | int tabbase = REGNO (operands[2]); | |
4877 | ||
4878 | ops[0] = operands[0]; | |
4879 | ops[1] = gen_rtx_REG (V8QImode, tabbase); | |
4880 | ops[2] = gen_rtx_REG (V8QImode, tabbase + 2); | |
4881 | ops[3] = operands[3]; | |
4882 | output_asm_insn ("vtbx.8\t%P0, {%P1, %P2}, %P3", ops); | |
4883 | ||
4884 | return ""; | |
c956e102 | 4885 | } |
f7379e5e | 4886 | [(set_attr "type" "neon_tbl2")] |
c956e102 | 4887 | ) |
88f77cba JB |
4888 | |
4889 | (define_insn "neon_vtbx3v8qi" | |
4890 | [(set (match_operand:V8QI 0 "s_register_operand" "=w") | |
4891 | (unspec:V8QI [(match_operand:V8QI 1 "s_register_operand" "0") | |
4892 | (match_operand:EI 2 "s_register_operand" "w") | |
4893 | (match_operand:V8QI 3 "s_register_operand" "w")] | |
4894 | UNSPEC_VTBX))] | |
4895 | "TARGET_NEON" | |
4896 | { | |
4897 | rtx ops[5]; | |
4898 | int tabbase = REGNO (operands[2]); | |
4899 | ||
4900 | ops[0] = operands[0]; | |
4901 | ops[1] = gen_rtx_REG (V8QImode, tabbase); | |
4902 | ops[2] = gen_rtx_REG (V8QImode, tabbase + 2); | |
4903 | ops[3] = gen_rtx_REG (V8QImode, tabbase + 4); | |
4904 | ops[4] = operands[3]; | |
4905 | output_asm_insn ("vtbx.8\t%P0, {%P1, %P2, %P3}, %P4", ops); | |
4906 | ||
4907 | return ""; | |
c956e102 | 4908 | } |
f7379e5e | 4909 | [(set_attr "type" "neon_tbl3")] |
c956e102 | 4910 | ) |
88f77cba JB |
4911 | |
4912 | (define_insn "neon_vtbx4v8qi" | |
4913 | [(set (match_operand:V8QI 0 "s_register_operand" "=w") | |
4914 | (unspec:V8QI [(match_operand:V8QI 1 "s_register_operand" "0") | |
4915 | (match_operand:OI 2 "s_register_operand" "w") | |
4916 | (match_operand:V8QI 3 "s_register_operand" "w")] | |
4917 | UNSPEC_VTBX))] | |
4918 | "TARGET_NEON" | |
4919 | { | |
4920 | rtx ops[6]; | |
4921 | int tabbase = REGNO (operands[2]); | |
4922 | ||
4923 | ops[0] = operands[0]; | |
4924 | ops[1] = gen_rtx_REG (V8QImode, tabbase); | |
4925 | ops[2] = gen_rtx_REG (V8QImode, tabbase + 2); | |
4926 | ops[3] = gen_rtx_REG (V8QImode, tabbase + 4); | |
4927 | ops[4] = gen_rtx_REG (V8QImode, tabbase + 6); | |
4928 | ops[5] = operands[3]; | |
4929 | output_asm_insn ("vtbx.8\t%P0, {%P1, %P2, %P3, %P4}, %P5", ops); | |
4930 | ||
4931 | return ""; | |
c956e102 | 4932 | } |
f7379e5e | 4933 | [(set_attr "type" "neon_tbl4")] |
c956e102 | 4934 | ) |
88f77cba | 4935 | |
ff03f2d1 | 4936 | (define_expand "@neon_vtrn<mode>_internal" |
28131dfe | 4937 | [(parallel |
b1a970a5 MW |
4938 | [(set (match_operand:VDQWH 0 "s_register_operand") |
4939 | (unspec:VDQWH [(match_operand:VDQWH 1 "s_register_operand") | |
4940 | (match_operand:VDQWH 2 "s_register_operand")] | |
28131dfe | 4941 | UNSPEC_VTRN1)) |
b1a970a5 MW |
4942 | (set (match_operand:VDQWH 3 "s_register_operand") |
4943 | (unspec:VDQWH [(match_dup 1) (match_dup 2)] UNSPEC_VTRN2))])] | |
28131dfe RE |
4944 | "TARGET_NEON" |
4945 | "" | |
4946 | ) | |
4947 | ||
4948 | ;; Note: Different operand numbering to handle tied registers correctly. | |
4949 | (define_insn "*neon_vtrn<mode>_insn" | |
b1a970a5 MW |
4950 | [(set (match_operand:VDQWH 0 "s_register_operand" "=&w") |
4951 | (unspec:VDQWH [(match_operand:VDQWH 1 "s_register_operand" "0") | |
4952 | (match_operand:VDQWH 3 "s_register_operand" "2")] | |
4953 | UNSPEC_VTRN1)) | |
4954 | (set (match_operand:VDQWH 2 "s_register_operand" "=&w") | |
4955 | (unspec:VDQWH [(match_dup 1) (match_dup 3)] | |
4956 | UNSPEC_VTRN2))] | |
7e7cfcf6 | 4957 | "TARGET_NEON" |
28131dfe | 4958 | "vtrn.<V_sz_elem>\t%<V_reg>0, %<V_reg>2" |
f7379e5e | 4959 | [(set_attr "type" "neon_permute<q>")] |
c956e102 | 4960 | ) |
88f77cba | 4961 | |
ff03f2d1 | 4962 | (define_expand "@neon_vzip<mode>_internal" |
28131dfe | 4963 | [(parallel |
b1a970a5 MW |
4964 | [(set (match_operand:VDQWH 0 "s_register_operand") |
4965 | (unspec:VDQWH [(match_operand:VDQWH 1 "s_register_operand") | |
4966 | (match_operand:VDQWH 2 "s_register_operand")] | |
4967 | UNSPEC_VZIP1)) | |
4968 | (set (match_operand:VDQWH 3 "s_register_operand") | |
4969 | (unspec:VDQWH [(match_dup 1) (match_dup 2)] UNSPEC_VZIP2))])] | |
28131dfe RE |
4970 | "TARGET_NEON" |
4971 | "" | |
4972 | ) | |
4973 | ||
4974 | ;; Note: Different operand numbering to handle tied registers correctly. | |
4975 | (define_insn "*neon_vzip<mode>_insn" | |
b1a970a5 MW |
4976 | [(set (match_operand:VDQWH 0 "s_register_operand" "=&w") |
4977 | (unspec:VDQWH [(match_operand:VDQWH 1 "s_register_operand" "0") | |
4978 | (match_operand:VDQWH 3 "s_register_operand" "2")] | |
4979 | UNSPEC_VZIP1)) | |
4980 | (set (match_operand:VDQWH 2 "s_register_operand" "=&w") | |
4981 | (unspec:VDQWH [(match_dup 1) (match_dup 3)] | |
4982 | UNSPEC_VZIP2))] | |
7e7cfcf6 | 4983 | "TARGET_NEON" |
28131dfe | 4984 | "vzip.<V_sz_elem>\t%<V_reg>0, %<V_reg>2" |
f7379e5e | 4985 | [(set_attr "type" "neon_zip<q>")] |
c956e102 | 4986 | ) |
88f77cba | 4987 | |
ff03f2d1 | 4988 | (define_expand "@neon_vuzp<mode>_internal" |
28131dfe | 4989 | [(parallel |
b1a970a5 MW |
4990 | [(set (match_operand:VDQWH 0 "s_register_operand") |
4991 | (unspec:VDQWH [(match_operand:VDQWH 1 "s_register_operand") | |
4992 | (match_operand:VDQWH 2 "s_register_operand")] | |
28131dfe | 4993 | UNSPEC_VUZP1)) |
cd65e265 | 4994 | (set (match_operand:VDQWH 3 "s_register_operand") |
b1a970a5 | 4995 | (unspec:VDQWH [(match_dup 1) (match_dup 2)] UNSPEC_VUZP2))])] |
28131dfe RE |
4996 | "TARGET_NEON" |
4997 | "" | |
4998 | ) | |
4999 | ||
5000 | ;; Note: Different operand numbering to handle tied registers correctly. | |
5001 | (define_insn "*neon_vuzp<mode>_insn" | |
b1a970a5 MW |
5002 | [(set (match_operand:VDQWH 0 "s_register_operand" "=&w") |
5003 | (unspec:VDQWH [(match_operand:VDQWH 1 "s_register_operand" "0") | |
5004 | (match_operand:VDQWH 3 "s_register_operand" "2")] | |
5005 | UNSPEC_VUZP1)) | |
5006 | (set (match_operand:VDQWH 2 "s_register_operand" "=&w") | |
5007 | (unspec:VDQWH [(match_dup 1) (match_dup 3)] | |
5008 | UNSPEC_VUZP2))] | |
88f77cba | 5009 | "TARGET_NEON" |
28131dfe | 5010 | "vuzp.<V_sz_elem>\t%<V_reg>0, %<V_reg>2" |
f7379e5e | 5011 | [(set_attr "type" "neon_zip<q>")] |
c956e102 | 5012 | ) |
88f77cba | 5013 | |
3188ed59 RS |
5014 | (define_expand "vec_load_lanes<mode><mode>" |
5015 | [(set (match_operand:VDQX 0 "s_register_operand") | |
5016 | (unspec:VDQX [(match_operand:VDQX 1 "neon_struct_operand")] | |
5017 | UNSPEC_VLD1))] | |
5018 | "TARGET_NEON") | |
5019 | ||
88f77cba JB |
5020 | (define_insn "neon_vld1<mode>" |
5021 | [(set (match_operand:VDQX 0 "s_register_operand" "=w") | |
6308e208 | 5022 | (unspec:VDQX [(match_operand:VDQX 1 "neon_struct_operand" "Um")] |
88f77cba JB |
5023 | UNSPEC_VLD1))] |
5024 | "TARGET_NEON" | |
6308e208 | 5025 | "vld1.<V_sz_elem>\t%h0, %A1" |
f7379e5e | 5026 | [(set_attr "type" "neon_load1_1reg<q>")] |
c956e102 | 5027 | ) |
88f77cba | 5028 | |
22f9db64 CB |
5029 | ;; The lane numbers in the RTL are in GCC lane order, having been flipped |
5030 | ;; in arm_expand_neon_args. The lane numbers are restored to architectural | |
5031 | ;; lane order here. | |
88f77cba JB |
5032 | (define_insn "neon_vld1_lane<mode>" |
5033 | [(set (match_operand:VDX 0 "s_register_operand" "=w") | |
6308e208 | 5034 | (unspec:VDX [(match_operand:<V_elem> 1 "neon_struct_operand" "Um") |
88f77cba JB |
5035 | (match_operand:VDX 2 "s_register_operand" "0") |
5036 | (match_operand:SI 3 "immediate_operand" "i")] | |
5037 | UNSPEC_VLD1_LANE))] | |
5038 | "TARGET_NEON" | |
5039 | { | |
22f9db64 | 5040 | HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N(<MODE>mode, INTVAL (operands[3])); |
88f77cba | 5041 | HOST_WIDE_INT max = GET_MODE_NUNITS (<MODE>mode); |
22f9db64 | 5042 | operands[3] = GEN_INT (lane); |
88f77cba | 5043 | if (max == 1) |
6308e208 | 5044 | return "vld1.<V_sz_elem>\t%P0, %A1"; |
88f77cba | 5045 | else |
6308e208 | 5046 | return "vld1.<V_sz_elem>\t{%P0[%c3]}, %A1"; |
c956e102 | 5047 | } |
f7379e5e | 5048 | [(set_attr "type" "neon_load1_one_lane<q>")] |
c956e102 | 5049 | ) |
88f77cba | 5050 | |
22f9db64 CB |
5051 | ;; see comment on neon_vld1_lane for reason why the lane numbers are reversed |
5052 | ;; here on big endian targets. | |
88f77cba JB |
5053 | (define_insn "neon_vld1_lane<mode>" |
5054 | [(set (match_operand:VQX 0 "s_register_operand" "=w") | |
6308e208 | 5055 | (unspec:VQX [(match_operand:<V_elem> 1 "neon_struct_operand" "Um") |
88f77cba JB |
5056 | (match_operand:VQX 2 "s_register_operand" "0") |
5057 | (match_operand:SI 3 "immediate_operand" "i")] | |
5058 | UNSPEC_VLD1_LANE))] | |
5059 | "TARGET_NEON" | |
5060 | { | |
22f9db64 | 5061 | HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N(<MODE>mode, INTVAL (operands[3])); |
88f77cba | 5062 | HOST_WIDE_INT max = GET_MODE_NUNITS (<MODE>mode); |
22f9db64 | 5063 | operands[3] = GEN_INT (lane); |
88f77cba | 5064 | int regno = REGNO (operands[0]); |
e68ffe57 | 5065 | if (lane >= max / 2) |
88f77cba JB |
5066 | { |
5067 | lane -= max / 2; | |
5068 | regno += 2; | |
5069 | operands[3] = GEN_INT (lane); | |
5070 | } | |
5071 | operands[0] = gen_rtx_REG (<V_HALF>mode, regno); | |
5072 | if (max == 2) | |
6308e208 | 5073 | return "vld1.<V_sz_elem>\t%P0, %A1"; |
88f77cba | 5074 | else |
6308e208 | 5075 | return "vld1.<V_sz_elem>\t{%P0[%c3]}, %A1"; |
c956e102 | 5076 | } |
f7379e5e | 5077 | [(set_attr "type" "neon_load1_one_lane<q>")] |
c956e102 | 5078 | ) |
88f77cba JB |
5079 | |
5080 | (define_insn "neon_vld1_dup<mode>" | |
92422235 CL |
5081 | [(set (match_operand:VD_LANE 0 "s_register_operand" "=w") |
5082 | (vec_duplicate:VD_LANE (match_operand:<V_elem> 1 "neon_struct_operand" "Um")))] | |
88f77cba | 5083 | "TARGET_NEON" |
27d2e612 | 5084 | "vld1.<V_sz_elem>\t{%P0[]}, %A1" |
f7379e5e | 5085 | [(set_attr "type" "neon_load1_all_lanes<q>")] |
27d2e612 RE |
5086 | ) |
5087 | ||
5088 | ;; Special case for DImode. Treat it exactly like a simple load. | |
5089 | (define_expand "neon_vld1_dupdi" | |
cd65e265 DZ |
5090 | [(set (match_operand:DI 0 "s_register_operand") |
5091 | (unspec:DI [(match_operand:DI 1 "neon_struct_operand")] | |
27d2e612 RE |
5092 | UNSPEC_VLD1))] |
5093 | "TARGET_NEON" | |
5094 | "" | |
c956e102 | 5095 | ) |
88f77cba JB |
5096 | |
5097 | (define_insn "neon_vld1_dup<mode>" | |
92422235 CL |
5098 | [(set (match_operand:VQ2 0 "s_register_operand" "=w") |
5099 | (vec_duplicate:VQ2 (match_operand:<V_elem> 1 "neon_struct_operand" "Um")))] | |
88f77cba JB |
5100 | "TARGET_NEON" |
5101 | { | |
8490252a | 5102 | return "vld1.<V_sz_elem>\t{%e0[], %f0[]}, %A1"; |
c956e102 | 5103 | } |
f7379e5e | 5104 | [(set_attr "type" "neon_load1_all_lanes<q>")] |
8490252a CL |
5105 | ) |
5106 | ||
5107 | (define_insn_and_split "neon_vld1_dupv2di" | |
5108 | [(set (match_operand:V2DI 0 "s_register_operand" "=w") | |
5109 | (vec_duplicate:V2DI (match_operand:DI 1 "neon_struct_operand" "Um")))] | |
5110 | "TARGET_NEON" | |
5111 | "#" | |
5112 | "&& reload_completed" | |
5113 | [(const_int 0)] | |
5114 | { | |
5115 | rtx tmprtx = gen_lowpart (DImode, operands[0]); | |
5116 | emit_insn (gen_neon_vld1_dupdi (tmprtx, operands[1])); | |
5117 | emit_move_insn (gen_highpart (DImode, operands[0]), tmprtx ); | |
5118 | DONE; | |
5119 | } | |
5120 | [(set_attr "length" "8") | |
f7379e5e | 5121 | (set_attr "type" "neon_load1_all_lanes_q")] |
c956e102 | 5122 | ) |
88f77cba | 5123 | |
3188ed59 RS |
5124 | (define_expand "vec_store_lanes<mode><mode>" |
5125 | [(set (match_operand:VDQX 0 "neon_struct_operand") | |
5126 | (unspec:VDQX [(match_operand:VDQX 1 "s_register_operand")] | |
5127 | UNSPEC_VST1))] | |
5128 | "TARGET_NEON") | |
5129 | ||
88f77cba | 5130 | (define_insn "neon_vst1<mode>" |
6308e208 | 5131 | [(set (match_operand:VDQX 0 "neon_struct_operand" "=Um") |
88f77cba JB |
5132 | (unspec:VDQX [(match_operand:VDQX 1 "s_register_operand" "w")] |
5133 | UNSPEC_VST1))] | |
5134 | "TARGET_NEON" | |
6308e208 | 5135 | "vst1.<V_sz_elem>\t%h1, %A0" |
f7379e5e | 5136 | [(set_attr "type" "neon_store1_1reg<q>")]) |
88f77cba | 5137 | |
22f9db64 CB |
5138 | ;; see comment on neon_vld1_lane for reason why the lane numbers are reversed |
5139 | ;; here on big endian targets. | |
88f77cba | 5140 | (define_insn "neon_vst1_lane<mode>" |
6308e208 | 5141 | [(set (match_operand:<V_elem> 0 "neon_struct_operand" "=Um") |
058e2674 UW |
5142 | (unspec:<V_elem> |
5143 | [(match_operand:VDX 1 "s_register_operand" "w") | |
5144 | (match_operand:SI 2 "immediate_operand" "i")] | |
5145 | UNSPEC_VST1_LANE))] | |
88f77cba JB |
5146 | "TARGET_NEON" |
5147 | { | |
22f9db64 | 5148 | HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N(<MODE>mode, INTVAL (operands[2])); |
88f77cba | 5149 | HOST_WIDE_INT max = GET_MODE_NUNITS (<MODE>mode); |
22f9db64 | 5150 | operands[2] = GEN_INT (lane); |
88f77cba | 5151 | if (max == 1) |
6308e208 | 5152 | return "vst1.<V_sz_elem>\t{%P1}, %A0"; |
88f77cba | 5153 | else |
6308e208 | 5154 | return "vst1.<V_sz_elem>\t{%P1[%c2]}, %A0"; |
c956e102 | 5155 | } |
f7379e5e JG |
5156 | [(set_attr "type" "neon_store1_one_lane<q>")] |
5157 | ) | |
88f77cba | 5158 | |
22f9db64 CB |
5159 | ;; see comment on neon_vld1_lane for reason why the lane numbers are reversed |
5160 | ;; here on big endian targets. | |
88f77cba | 5161 | (define_insn "neon_vst1_lane<mode>" |
6308e208 | 5162 | [(set (match_operand:<V_elem> 0 "neon_struct_operand" "=Um") |
058e2674 UW |
5163 | (unspec:<V_elem> |
5164 | [(match_operand:VQX 1 "s_register_operand" "w") | |
5165 | (match_operand:SI 2 "immediate_operand" "i")] | |
5166 | UNSPEC_VST1_LANE))] | |
88f77cba JB |
5167 | "TARGET_NEON" |
5168 | { | |
22f9db64 | 5169 | HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N(<MODE>mode, INTVAL (operands[2])); |
88f77cba JB |
5170 | HOST_WIDE_INT max = GET_MODE_NUNITS (<MODE>mode); |
5171 | int regno = REGNO (operands[1]); | |
e68ffe57 | 5172 | if (lane >= max / 2) |
88f77cba JB |
5173 | { |
5174 | lane -= max / 2; | |
5175 | regno += 2; | |
88f77cba | 5176 | } |
22f9db64 | 5177 | operands[2] = GEN_INT (lane); |
88f77cba JB |
5178 | operands[1] = gen_rtx_REG (<V_HALF>mode, regno); |
5179 | if (max == 2) | |
6308e208 | 5180 | return "vst1.<V_sz_elem>\t{%P1}, %A0"; |
88f77cba | 5181 | else |
6308e208 | 5182 | return "vst1.<V_sz_elem>\t{%P1[%c2]}, %A0"; |
c956e102 | 5183 | } |
f7379e5e | 5184 | [(set_attr "type" "neon_store1_one_lane<q>")] |
c956e102 | 5185 | ) |
88f77cba | 5186 | |
3188ed59 RS |
5187 | (define_expand "vec_load_lanesti<mode>" |
5188 | [(set (match_operand:TI 0 "s_register_operand") | |
5189 | (unspec:TI [(match_operand:TI 1 "neon_struct_operand") | |
5190 | (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] | |
5191 | UNSPEC_VLD2))] | |
5192 | "TARGET_NEON") | |
5193 | ||
88f77cba JB |
5194 | (define_insn "neon_vld2<mode>" |
5195 | [(set (match_operand:TI 0 "s_register_operand" "=w") | |
6308e208 | 5196 | (unspec:TI [(match_operand:TI 1 "neon_struct_operand" "Um") |
eb637e76 | 5197 | (unspec:VDXBF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] |
88f77cba JB |
5198 | UNSPEC_VLD2))] |
5199 | "TARGET_NEON" | |
5200 | { | |
5201 | if (<V_sz_elem> == 64) | |
6308e208 | 5202 | return "vld1.64\t%h0, %A1"; |
88f77cba | 5203 | else |
6308e208 | 5204 | return "vld2.<V_sz_elem>\t%h0, %A1"; |
c956e102 | 5205 | } |
003bb7f3 | 5206 | [(set (attr "type") |
c956e102 | 5207 | (if_then_else (eq (const_string "<V_sz_elem>") (const_string "64")) |
f7379e5e JG |
5208 | (const_string "neon_load1_2reg<q>") |
5209 | (const_string "neon_load2_2reg<q>")))] | |
c956e102 | 5210 | ) |
88f77cba | 5211 | |
3188ed59 RS |
5212 | (define_expand "vec_load_lanesoi<mode>" |
5213 | [(set (match_operand:OI 0 "s_register_operand") | |
5214 | (unspec:OI [(match_operand:OI 1 "neon_struct_operand") | |
4b644867 | 5215 | (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] |
3188ed59 RS |
5216 | UNSPEC_VLD2))] |
5217 | "TARGET_NEON") | |
5218 | ||
88f77cba JB |
5219 | (define_insn "neon_vld2<mode>" |
5220 | [(set (match_operand:OI 0 "s_register_operand" "=w") | |
6308e208 | 5221 | (unspec:OI [(match_operand:OI 1 "neon_struct_operand" "Um") |
eb637e76 | 5222 | (unspec:VQ2BF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] |
88f77cba JB |
5223 | UNSPEC_VLD2))] |
5224 | "TARGET_NEON" | |
6308e208 | 5225 | "vld2.<V_sz_elem>\t%h0, %A1" |
f7379e5e | 5226 | [(set_attr "type" "neon_load2_2reg_q")]) |
88f77cba | 5227 | |
22f9db64 CB |
5228 | ;; see comment on neon_vld1_lane for reason why the lane numbers are reversed |
5229 | ;; here on big endian targets. | |
88f77cba JB |
5230 | (define_insn "neon_vld2_lane<mode>" |
5231 | [(set (match_operand:TI 0 "s_register_operand" "=w") | |
6308e208 | 5232 | (unspec:TI [(match_operand:<V_two_elem> 1 "neon_struct_operand" "Um") |
88f77cba JB |
5233 | (match_operand:TI 2 "s_register_operand" "0") |
5234 | (match_operand:SI 3 "immediate_operand" "i") | |
4b644867 | 5235 | (unspec:VD_LANE [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] |
88f77cba JB |
5236 | UNSPEC_VLD2_LANE))] |
5237 | "TARGET_NEON" | |
5238 | { | |
22f9db64 | 5239 | HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N(<MODE>mode, INTVAL (operands[3])); |
88f77cba JB |
5240 | int regno = REGNO (operands[0]); |
5241 | rtx ops[4]; | |
88f77cba JB |
5242 | ops[0] = gen_rtx_REG (DImode, regno); |
5243 | ops[1] = gen_rtx_REG (DImode, regno + 2); | |
5244 | ops[2] = operands[1]; | |
22f9db64 | 5245 | ops[3] = GEN_INT (lane); |
6308e208 | 5246 | output_asm_insn ("vld2.<V_sz_elem>\t{%P0[%c3], %P1[%c3]}, %A2", ops); |
88f77cba | 5247 | return ""; |
c956e102 | 5248 | } |
f7379e5e | 5249 | [(set_attr "type" "neon_load2_one_lane<q>")] |
c956e102 | 5250 | ) |
88f77cba | 5251 | |
22f9db64 CB |
5252 | ;; see comment on neon_vld1_lane for reason why the lane numbers are reversed |
5253 | ;; here on big endian targets. | |
88f77cba JB |
5254 | (define_insn "neon_vld2_lane<mode>" |
5255 | [(set (match_operand:OI 0 "s_register_operand" "=w") | |
6308e208 | 5256 | (unspec:OI [(match_operand:<V_two_elem> 1 "neon_struct_operand" "Um") |
88f77cba JB |
5257 | (match_operand:OI 2 "s_register_operand" "0") |
5258 | (match_operand:SI 3 "immediate_operand" "i") | |
4b644867 | 5259 | (unspec:VQ_HS [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] |
88f77cba JB |
5260 | UNSPEC_VLD2_LANE))] |
5261 | "TARGET_NEON" | |
5262 | { | |
22f9db64 | 5263 | HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N(<MODE>mode, INTVAL (operands[3])); |
88f77cba JB |
5264 | HOST_WIDE_INT max = GET_MODE_NUNITS (<MODE>mode); |
5265 | int regno = REGNO (operands[0]); | |
5266 | rtx ops[4]; | |
e68ffe57 | 5267 | if (lane >= max / 2) |
88f77cba JB |
5268 | { |
5269 | lane -= max / 2; | |
5270 | regno += 2; | |
5271 | } | |
5272 | ops[0] = gen_rtx_REG (DImode, regno); | |
5273 | ops[1] = gen_rtx_REG (DImode, regno + 4); | |
5274 | ops[2] = operands[1]; | |
5275 | ops[3] = GEN_INT (lane); | |
6308e208 | 5276 | output_asm_insn ("vld2.<V_sz_elem>\t{%P0[%c3], %P1[%c3]}, %A2", ops); |
88f77cba | 5277 | return ""; |
c956e102 | 5278 | } |
f7379e5e | 5279 | [(set_attr "type" "neon_load2_one_lane<q>")] |
c956e102 | 5280 | ) |
88f77cba JB |
5281 | |
5282 | (define_insn "neon_vld2_dup<mode>" | |
5283 | [(set (match_operand:TI 0 "s_register_operand" "=w") | |
6308e208 | 5284 | (unspec:TI [(match_operand:<V_two_elem> 1 "neon_struct_operand" "Um") |
eb637e76 | 5285 | (unspec:VDXBF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] |
88f77cba JB |
5286 | UNSPEC_VLD2_DUP))] |
5287 | "TARGET_NEON" | |
5288 | { | |
5289 | if (GET_MODE_NUNITS (<MODE>mode) > 1) | |
6308e208 | 5290 | return "vld2.<V_sz_elem>\t{%e0[], %f0[]}, %A1"; |
88f77cba | 5291 | else |
6308e208 | 5292 | return "vld1.<V_sz_elem>\t%h0, %A1"; |
c956e102 | 5293 | } |
003bb7f3 | 5294 | [(set (attr "type") |
c956e102 | 5295 | (if_then_else (gt (const_string "<V_mode_nunits>") (const_string "1")) |
f7379e5e JG |
5296 | (const_string "neon_load2_all_lanes<q>") |
5297 | (const_string "neon_load1_1reg<q>")))] | |
c956e102 | 5298 | ) |
88f77cba | 5299 | |
eb637e76 DB |
5300 | (define_insn "neon_vld2_dupv8bf" |
5301 | [(set (match_operand:OI 0 "s_register_operand" "=w") | |
5302 | (unspec:OI [(match_operand:V2BF 1 "neon_struct_operand" "Um") | |
5303 | (unspec:V8BF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] | |
5304 | UNSPEC_VLD2_DUP))] | |
5305 | "TARGET_BF16_SIMD" | |
5306 | { | |
5307 | rtx ops[5]; | |
5308 | int tabbase = REGNO (operands[0]); | |
5309 | ||
5310 | ops[4] = operands[1]; | |
5311 | ops[0] = gen_rtx_REG (V4BFmode, tabbase); | |
5312 | ops[1] = gen_rtx_REG (V4BFmode, tabbase + 2); | |
5313 | ops[2] = gen_rtx_REG (V4BFmode, tabbase + 4); | |
5314 | ops[3] = gen_rtx_REG (V4BFmode, tabbase + 6); | |
5315 | output_asm_insn ("vld2.16\t{%P0, %P1, %P2, %P3}, %A4", ops); | |
5316 | return ""; | |
5317 | } | |
5318 | [(set_attr "type" "neon_load2_all_lanes_q")] | |
5319 | ) | |
5320 | ||
3188ed59 RS |
5321 | (define_expand "vec_store_lanesti<mode>" |
5322 | [(set (match_operand:TI 0 "neon_struct_operand") | |
5323 | (unspec:TI [(match_operand:TI 1 "s_register_operand") | |
5324 | (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] | |
5325 | UNSPEC_VST2))] | |
5326 | "TARGET_NEON") | |
5327 | ||
88f77cba | 5328 | (define_insn "neon_vst2<mode>" |
6308e208 | 5329 | [(set (match_operand:TI 0 "neon_struct_operand" "=Um") |
88f77cba | 5330 | (unspec:TI [(match_operand:TI 1 "s_register_operand" "w") |
ff229375 | 5331 | (unspec:VDXBF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] |
88f77cba JB |
5332 | UNSPEC_VST2))] |
5333 | "TARGET_NEON" | |
5334 | { | |
5335 | if (<V_sz_elem> == 64) | |
6308e208 | 5336 | return "vst1.64\t%h1, %A0"; |
88f77cba | 5337 | else |
6308e208 | 5338 | return "vst2.<V_sz_elem>\t%h1, %A0"; |
c956e102 | 5339 | } |
003bb7f3 | 5340 | [(set (attr "type") |
c956e102 | 5341 | (if_then_else (eq (const_string "<V_sz_elem>") (const_string "64")) |
f7379e5e JG |
5342 | (const_string "neon_store1_2reg<q>") |
5343 | (const_string "neon_store2_one_lane<q>")))] | |
c956e102 | 5344 | ) |
88f77cba | 5345 | |
3188ed59 RS |
5346 | (define_expand "vec_store_lanesoi<mode>" |
5347 | [(set (match_operand:OI 0 "neon_struct_operand") | |
5348 | (unspec:OI [(match_operand:OI 1 "s_register_operand") | |
4b644867 | 5349 | (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] |
3188ed59 RS |
5350 | UNSPEC_VST2))] |
5351 | "TARGET_NEON") | |
5352 | ||
88f77cba | 5353 | (define_insn "neon_vst2<mode>" |
6308e208 | 5354 | [(set (match_operand:OI 0 "neon_struct_operand" "=Um") |
88f77cba | 5355 | (unspec:OI [(match_operand:OI 1 "s_register_operand" "w") |
ff229375 | 5356 | (unspec:VQ2BF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] |
88f77cba JB |
5357 | UNSPEC_VST2))] |
5358 | "TARGET_NEON" | |
6308e208 | 5359 | "vst2.<V_sz_elem>\t%h1, %A0" |
f7379e5e | 5360 | [(set_attr "type" "neon_store2_4reg<q>")] |
c956e102 | 5361 | ) |
88f77cba | 5362 | |
22f9db64 CB |
5363 | ;; see comment on neon_vld1_lane for reason why the lane numbers are reversed |
5364 | ;; here on big endian targets. | |
88f77cba | 5365 | (define_insn "neon_vst2_lane<mode>" |
6308e208 | 5366 | [(set (match_operand:<V_two_elem> 0 "neon_struct_operand" "=Um") |
88f77cba JB |
5367 | (unspec:<V_two_elem> |
5368 | [(match_operand:TI 1 "s_register_operand" "w") | |
5369 | (match_operand:SI 2 "immediate_operand" "i") | |
4b644867 | 5370 | (unspec:VD_LANE [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] |
88f77cba JB |
5371 | UNSPEC_VST2_LANE))] |
5372 | "TARGET_NEON" | |
5373 | { | |
22f9db64 | 5374 | HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N(<MODE>mode, INTVAL (operands[2])); |
88f77cba JB |
5375 | int regno = REGNO (operands[1]); |
5376 | rtx ops[4]; | |
88f77cba JB |
5377 | ops[0] = operands[0]; |
5378 | ops[1] = gen_rtx_REG (DImode, regno); | |
5379 | ops[2] = gen_rtx_REG (DImode, regno + 2); | |
22f9db64 | 5380 | ops[3] = GEN_INT (lane); |
6308e208 | 5381 | output_asm_insn ("vst2.<V_sz_elem>\t{%P1[%c3], %P2[%c3]}, %A0", ops); |
88f77cba | 5382 | return ""; |
c956e102 | 5383 | } |
f7379e5e | 5384 | [(set_attr "type" "neon_store2_one_lane<q>")] |
c956e102 | 5385 | ) |
88f77cba | 5386 | |
22f9db64 CB |
5387 | ;; see comment on neon_vld1_lane for reason why the lane numbers are reversed |
5388 | ;; here on big endian targets. | |
88f77cba | 5389 | (define_insn "neon_vst2_lane<mode>" |
6308e208 | 5390 | [(set (match_operand:<V_two_elem> 0 "neon_struct_operand" "=Um") |
88f77cba JB |
5391 | (unspec:<V_two_elem> |
5392 | [(match_operand:OI 1 "s_register_operand" "w") | |
5393 | (match_operand:SI 2 "immediate_operand" "i") | |
4b644867 | 5394 | (unspec:VQ_HS [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] |
88f77cba JB |
5395 | UNSPEC_VST2_LANE))] |
5396 | "TARGET_NEON" | |
5397 | { | |
22f9db64 | 5398 | HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N(<MODE>mode, INTVAL (operands[2])); |
88f77cba JB |
5399 | HOST_WIDE_INT max = GET_MODE_NUNITS (<MODE>mode); |
5400 | int regno = REGNO (operands[1]); | |
5401 | rtx ops[4]; | |
e68ffe57 | 5402 | if (lane >= max / 2) |
88f77cba JB |
5403 | { |
5404 | lane -= max / 2; | |
5405 | regno += 2; | |
5406 | } | |
5407 | ops[0] = operands[0]; | |
5408 | ops[1] = gen_rtx_REG (DImode, regno); | |
5409 | ops[2] = gen_rtx_REG (DImode, regno + 4); | |
5410 | ops[3] = GEN_INT (lane); | |
6308e208 | 5411 | output_asm_insn ("vst2.<V_sz_elem>\t{%P1[%c3], %P2[%c3]}, %A0", ops); |
88f77cba | 5412 | return ""; |
c956e102 | 5413 | } |
f7379e5e | 5414 | [(set_attr "type" "neon_store2_one_lane<q>")] |
c956e102 | 5415 | ) |
88f77cba | 5416 | |
3188ed59 RS |
5417 | (define_expand "vec_load_lanesei<mode>" |
5418 | [(set (match_operand:EI 0 "s_register_operand") | |
5419 | (unspec:EI [(match_operand:EI 1 "neon_struct_operand") | |
5420 | (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] | |
5421 | UNSPEC_VLD3))] | |
5422 | "TARGET_NEON") | |
5423 | ||
88f77cba JB |
5424 | (define_insn "neon_vld3<mode>" |
5425 | [(set (match_operand:EI 0 "s_register_operand" "=w") | |
6308e208 | 5426 | (unspec:EI [(match_operand:EI 1 "neon_struct_operand" "Um") |
eb637e76 | 5427 | (unspec:VDXBF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] |
88f77cba JB |
5428 | UNSPEC_VLD3))] |
5429 | "TARGET_NEON" | |
5430 | { | |
5431 | if (<V_sz_elem> == 64) | |
6308e208 | 5432 | return "vld1.64\t%h0, %A1"; |
88f77cba | 5433 | else |
6308e208 | 5434 | return "vld3.<V_sz_elem>\t%h0, %A1"; |
c956e102 | 5435 | } |
003bb7f3 | 5436 | [(set (attr "type") |
c956e102 | 5437 | (if_then_else (eq (const_string "<V_sz_elem>") (const_string "64")) |
f7379e5e JG |
5438 | (const_string "neon_load1_3reg<q>") |
5439 | (const_string "neon_load3_3reg<q>")))] | |
c956e102 | 5440 | ) |
88f77cba | 5441 | |
3188ed59 RS |
5442 | (define_expand "vec_load_lanesci<mode>" |
5443 | [(match_operand:CI 0 "s_register_operand") | |
5444 | (match_operand:CI 1 "neon_struct_operand") | |
4b644867 | 5445 | (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] |
3188ed59 RS |
5446 | "TARGET_NEON" |
5447 | { | |
5448 | emit_insn (gen_neon_vld3<mode> (operands[0], operands[1])); | |
5449 | DONE; | |
5450 | }) | |
5451 | ||
88f77cba | 5452 | (define_expand "neon_vld3<mode>" |
6308e208 RS |
5453 | [(match_operand:CI 0 "s_register_operand") |
5454 | (match_operand:CI 1 "neon_struct_operand") | |
eb637e76 | 5455 | (unspec:VQ2BF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] |
88f77cba JB |
5456 | "TARGET_NEON" |
5457 | { | |
6308e208 RS |
5458 | rtx mem; |
5459 | ||
5460 | mem = adjust_address (operands[1], EImode, 0); | |
5461 | emit_insn (gen_neon_vld3qa<mode> (operands[0], mem)); | |
5462 | mem = adjust_address (mem, EImode, GET_MODE_SIZE (EImode)); | |
5463 | emit_insn (gen_neon_vld3qb<mode> (operands[0], mem, operands[0])); | |
88f77cba JB |
5464 | DONE; |
5465 | }) | |
5466 | ||
5467 | (define_insn "neon_vld3qa<mode>" | |
5468 | [(set (match_operand:CI 0 "s_register_operand" "=w") | |
6308e208 | 5469 | (unspec:CI [(match_operand:EI 1 "neon_struct_operand" "Um") |
eb637e76 | 5470 | (unspec:VQ2BF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] |
6308e208 | 5471 | UNSPEC_VLD3A))] |
88f77cba JB |
5472 | "TARGET_NEON" |
5473 | { | |
5474 | int regno = REGNO (operands[0]); | |
5475 | rtx ops[4]; | |
5476 | ops[0] = gen_rtx_REG (DImode, regno); | |
5477 | ops[1] = gen_rtx_REG (DImode, regno + 4); | |
5478 | ops[2] = gen_rtx_REG (DImode, regno + 8); | |
a6217191 | 5479 | ops[3] = operands[1]; |
6308e208 | 5480 | output_asm_insn ("vld3.<V_sz_elem>\t{%P0, %P1, %P2}, %A3", ops); |
88f77cba | 5481 | return ""; |
c956e102 | 5482 | } |
f7379e5e | 5483 | [(set_attr "type" "neon_load3_3reg<q>")] |
c956e102 | 5484 | ) |
88f77cba JB |
5485 | |
5486 | (define_insn "neon_vld3qb<mode>" | |
5487 | [(set (match_operand:CI 0 "s_register_operand" "=w") | |
6308e208 RS |
5488 | (unspec:CI [(match_operand:EI 1 "neon_struct_operand" "Um") |
5489 | (match_operand:CI 2 "s_register_operand" "0") | |
eb637e76 | 5490 | (unspec:VQ2BF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] |
6308e208 | 5491 | UNSPEC_VLD3B))] |
88f77cba JB |
5492 | "TARGET_NEON" |
5493 | { | |
5494 | int regno = REGNO (operands[0]); | |
5495 | rtx ops[4]; | |
5496 | ops[0] = gen_rtx_REG (DImode, regno + 2); | |
5497 | ops[1] = gen_rtx_REG (DImode, regno + 6); | |
5498 | ops[2] = gen_rtx_REG (DImode, regno + 10); | |
6308e208 RS |
5499 | ops[3] = operands[1]; |
5500 | output_asm_insn ("vld3.<V_sz_elem>\t{%P0, %P1, %P2}, %A3", ops); | |
88f77cba | 5501 | return ""; |
c956e102 | 5502 | } |
f7379e5e | 5503 | [(set_attr "type" "neon_load3_3reg<q>")] |
c956e102 | 5504 | ) |
88f77cba | 5505 | |
22f9db64 CB |
5506 | ;; see comment on neon_vld1_lane for reason why the lane numbers are reversed |
5507 | ;; here on big endian targets. | |
88f77cba JB |
5508 | (define_insn "neon_vld3_lane<mode>" |
5509 | [(set (match_operand:EI 0 "s_register_operand" "=w") | |
6308e208 | 5510 | (unspec:EI [(match_operand:<V_three_elem> 1 "neon_struct_operand" "Um") |
88f77cba JB |
5511 | (match_operand:EI 2 "s_register_operand" "0") |
5512 | (match_operand:SI 3 "immediate_operand" "i") | |
4b644867 | 5513 | (unspec:VD_LANE [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] |
88f77cba JB |
5514 | UNSPEC_VLD3_LANE))] |
5515 | "TARGET_NEON" | |
5516 | { | |
22f9db64 | 5517 | HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N (<MODE>mode, INTVAL (operands[3])); |
88f77cba JB |
5518 | int regno = REGNO (operands[0]); |
5519 | rtx ops[5]; | |
88f77cba JB |
5520 | ops[0] = gen_rtx_REG (DImode, regno); |
5521 | ops[1] = gen_rtx_REG (DImode, regno + 2); | |
5522 | ops[2] = gen_rtx_REG (DImode, regno + 4); | |
5523 | ops[3] = operands[1]; | |
22f9db64 | 5524 | ops[4] = GEN_INT (lane); |
9594fe5e | 5525 | output_asm_insn ("vld3.<V_sz_elem>\t{%P0[%c4], %P1[%c4], %P2[%c4]}, %3", |
88f77cba JB |
5526 | ops); |
5527 | return ""; | |
c956e102 | 5528 | } |
f7379e5e | 5529 | [(set_attr "type" "neon_load3_one_lane<q>")] |
c956e102 | 5530 | ) |
88f77cba | 5531 | |
22f9db64 CB |
5532 | ;; see comment on neon_vld1_lane for reason why the lane numbers are reversed |
5533 | ;; here on big endian targets. | |
88f77cba JB |
5534 | (define_insn "neon_vld3_lane<mode>" |
5535 | [(set (match_operand:CI 0 "s_register_operand" "=w") | |
6308e208 | 5536 | (unspec:CI [(match_operand:<V_three_elem> 1 "neon_struct_operand" "Um") |
88f77cba JB |
5537 | (match_operand:CI 2 "s_register_operand" "0") |
5538 | (match_operand:SI 3 "immediate_operand" "i") | |
4b644867 | 5539 | (unspec:VQ_HS [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] |
88f77cba JB |
5540 | UNSPEC_VLD3_LANE))] |
5541 | "TARGET_NEON" | |
5542 | { | |
22f9db64 | 5543 | HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N(<MODE>mode, INTVAL (operands[3])); |
88f77cba JB |
5544 | HOST_WIDE_INT max = GET_MODE_NUNITS (<MODE>mode); |
5545 | int regno = REGNO (operands[0]); | |
5546 | rtx ops[5]; | |
e68ffe57 | 5547 | if (lane >= max / 2) |
88f77cba JB |
5548 | { |
5549 | lane -= max / 2; | |
5550 | regno += 2; | |
5551 | } | |
5552 | ops[0] = gen_rtx_REG (DImode, regno); | |
5553 | ops[1] = gen_rtx_REG (DImode, regno + 4); | |
5554 | ops[2] = gen_rtx_REG (DImode, regno + 8); | |
5555 | ops[3] = operands[1]; | |
5556 | ops[4] = GEN_INT (lane); | |
9594fe5e | 5557 | output_asm_insn ("vld3.<V_sz_elem>\t{%P0[%c4], %P1[%c4], %P2[%c4]}, %3", |
88f77cba JB |
5558 | ops); |
5559 | return ""; | |
c956e102 | 5560 | } |
f7379e5e | 5561 | [(set_attr "type" "neon_load3_one_lane<q>")] |
c956e102 | 5562 | ) |
88f77cba JB |
5563 | |
5564 | (define_insn "neon_vld3_dup<mode>" | |
5565 | [(set (match_operand:EI 0 "s_register_operand" "=w") | |
6308e208 | 5566 | (unspec:EI [(match_operand:<V_three_elem> 1 "neon_struct_operand" "Um") |
eb637e76 | 5567 | (unspec:VDXBF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] |
88f77cba JB |
5568 | UNSPEC_VLD3_DUP))] |
5569 | "TARGET_NEON" | |
5570 | { | |
5571 | if (GET_MODE_NUNITS (<MODE>mode) > 1) | |
5572 | { | |
5573 | int regno = REGNO (operands[0]); | |
5574 | rtx ops[4]; | |
5575 | ops[0] = gen_rtx_REG (DImode, regno); | |
5576 | ops[1] = gen_rtx_REG (DImode, regno + 2); | |
5577 | ops[2] = gen_rtx_REG (DImode, regno + 4); | |
5578 | ops[3] = operands[1]; | |
9594fe5e | 5579 | output_asm_insn ("vld3.<V_sz_elem>\t{%P0[], %P1[], %P2[]}, %3", ops); |
88f77cba JB |
5580 | return ""; |
5581 | } | |
5582 | else | |
6308e208 | 5583 | return "vld1.<V_sz_elem>\t%h0, %A1"; |
c956e102 | 5584 | } |
003bb7f3 | 5585 | [(set (attr "type") |
c956e102 | 5586 | (if_then_else (gt (const_string "<V_mode_nunits>") (const_string "1")) |
f7379e5e JG |
5587 | (const_string "neon_load3_all_lanes<q>") |
5588 | (const_string "neon_load1_1reg<q>")))]) | |
88f77cba | 5589 | |
eb637e76 DB |
5590 | (define_insn "neon_vld3_dupv8bf" |
5591 | [(set (match_operand:CI 0 "s_register_operand" "=w") | |
5592 | (unspec:CI [(match_operand:V2BF 1 "neon_struct_operand" "Um") | |
5593 | (unspec:V8BF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] | |
5594 | UNSPEC_VLD2_DUP))] | |
5595 | "TARGET_BF16_SIMD" | |
5596 | { | |
5597 | rtx ops[4]; | |
5598 | int tabbase = REGNO (operands[0]); | |
5599 | ||
5600 | ops[3] = operands[1]; | |
5601 | ops[0] = gen_rtx_REG (V4BFmode, tabbase); | |
5602 | ops[1] = gen_rtx_REG (V4BFmode, tabbase + 2); | |
5603 | ops[2] = gen_rtx_REG (V4BFmode, tabbase + 4); | |
5604 | output_asm_insn ("vld3.16\t{%P0[], %P1[], %P2[]}, %A3", ops); | |
5605 | return ""; | |
5606 | } | |
5607 | [(set_attr "type" "neon_load3_all_lanes_q")] | |
5608 | ) | |
5609 | ||
3188ed59 RS |
5610 | (define_expand "vec_store_lanesei<mode>" |
5611 | [(set (match_operand:EI 0 "neon_struct_operand") | |
5612 | (unspec:EI [(match_operand:EI 1 "s_register_operand") | |
5613 | (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] | |
5614 | UNSPEC_VST3))] | |
5615 | "TARGET_NEON") | |
5616 | ||
88f77cba | 5617 | (define_insn "neon_vst3<mode>" |
6308e208 | 5618 | [(set (match_operand:EI 0 "neon_struct_operand" "=Um") |
88f77cba | 5619 | (unspec:EI [(match_operand:EI 1 "s_register_operand" "w") |
ff229375 | 5620 | (unspec:VDXBF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] |
88f77cba JB |
5621 | UNSPEC_VST3))] |
5622 | "TARGET_NEON" | |
5623 | { | |
5624 | if (<V_sz_elem> == 64) | |
6308e208 | 5625 | return "vst1.64\t%h1, %A0"; |
88f77cba | 5626 | else |
6308e208 | 5627 | return "vst3.<V_sz_elem>\t%h1, %A0"; |
c956e102 | 5628 | } |
003bb7f3 | 5629 | [(set (attr "type") |
c956e102 | 5630 | (if_then_else (eq (const_string "<V_sz_elem>") (const_string "64")) |
f7379e5e JG |
5631 | (const_string "neon_store1_3reg<q>") |
5632 | (const_string "neon_store3_one_lane<q>")))]) | |
88f77cba | 5633 | |
3188ed59 RS |
5634 | (define_expand "vec_store_lanesci<mode>" |
5635 | [(match_operand:CI 0 "neon_struct_operand") | |
5636 | (match_operand:CI 1 "s_register_operand") | |
4b644867 | 5637 | (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] |
3188ed59 RS |
5638 | "TARGET_NEON" |
5639 | { | |
5640 | emit_insn (gen_neon_vst3<mode> (operands[0], operands[1])); | |
5641 | DONE; | |
5642 | }) | |
5643 | ||
88f77cba | 5644 | (define_expand "neon_vst3<mode>" |
6308e208 RS |
5645 | [(match_operand:CI 0 "neon_struct_operand") |
5646 | (match_operand:CI 1 "s_register_operand") | |
ff229375 | 5647 | (unspec:VQ2BF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] |
88f77cba JB |
5648 | "TARGET_NEON" |
5649 | { | |
6308e208 RS |
5650 | rtx mem; |
5651 | ||
5652 | mem = adjust_address (operands[0], EImode, 0); | |
5653 | emit_insn (gen_neon_vst3qa<mode> (mem, operands[1])); | |
5654 | mem = adjust_address (mem, EImode, GET_MODE_SIZE (EImode)); | |
5655 | emit_insn (gen_neon_vst3qb<mode> (mem, operands[1])); | |
88f77cba JB |
5656 | DONE; |
5657 | }) | |
5658 | ||
5659 | (define_insn "neon_vst3qa<mode>" | |
6308e208 RS |
5660 | [(set (match_operand:EI 0 "neon_struct_operand" "=Um") |
5661 | (unspec:EI [(match_operand:CI 1 "s_register_operand" "w") | |
ff229375 | 5662 | (unspec:VQ2BF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] |
6308e208 | 5663 | UNSPEC_VST3A))] |
88f77cba JB |
5664 | "TARGET_NEON" |
5665 | { | |
6308e208 | 5666 | int regno = REGNO (operands[1]); |
88f77cba JB |
5667 | rtx ops[4]; |
5668 | ops[0] = operands[0]; | |
5669 | ops[1] = gen_rtx_REG (DImode, regno); | |
5670 | ops[2] = gen_rtx_REG (DImode, regno + 4); | |
5671 | ops[3] = gen_rtx_REG (DImode, regno + 8); | |
6308e208 | 5672 | output_asm_insn ("vst3.<V_sz_elem>\t{%P1, %P2, %P3}, %A0", ops); |
88f77cba | 5673 | return ""; |
c956e102 | 5674 | } |
f7379e5e | 5675 | [(set_attr "type" "neon_store3_3reg<q>")] |
c956e102 | 5676 | ) |
88f77cba JB |
5677 | |
5678 | (define_insn "neon_vst3qb<mode>" | |
6308e208 RS |
5679 | [(set (match_operand:EI 0 "neon_struct_operand" "=Um") |
5680 | (unspec:EI [(match_operand:CI 1 "s_register_operand" "w") | |
ff229375 | 5681 | (unspec:VQ2BF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] |
6308e208 | 5682 | UNSPEC_VST3B))] |
88f77cba JB |
5683 | "TARGET_NEON" |
5684 | { | |
6308e208 | 5685 | int regno = REGNO (operands[1]); |
88f77cba JB |
5686 | rtx ops[4]; |
5687 | ops[0] = operands[0]; | |
5688 | ops[1] = gen_rtx_REG (DImode, regno + 2); | |
5689 | ops[2] = gen_rtx_REG (DImode, regno + 6); | |
5690 | ops[3] = gen_rtx_REG (DImode, regno + 10); | |
6308e208 | 5691 | output_asm_insn ("vst3.<V_sz_elem>\t{%P1, %P2, %P3}, %A0", ops); |
88f77cba | 5692 | return ""; |
c956e102 | 5693 | } |
f7379e5e | 5694 | [(set_attr "type" "neon_store3_3reg<q>")] |
c956e102 | 5695 | ) |
88f77cba | 5696 | |
22f9db64 CB |
5697 | ;; see comment on neon_vld1_lane for reason why the lane numbers are reversed |
5698 | ;; here on big endian targets. | |
88f77cba | 5699 | (define_insn "neon_vst3_lane<mode>" |
6308e208 | 5700 | [(set (match_operand:<V_three_elem> 0 "neon_struct_operand" "=Um") |
88f77cba JB |
5701 | (unspec:<V_three_elem> |
5702 | [(match_operand:EI 1 "s_register_operand" "w") | |
5703 | (match_operand:SI 2 "immediate_operand" "i") | |
4b644867 | 5704 | (unspec:VD_LANE [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] |
88f77cba JB |
5705 | UNSPEC_VST3_LANE))] |
5706 | "TARGET_NEON" | |
5707 | { | |
22f9db64 | 5708 | HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N(<MODE>mode, INTVAL (operands[2])); |
88f77cba JB |
5709 | int regno = REGNO (operands[1]); |
5710 | rtx ops[5]; | |
88f77cba JB |
5711 | ops[0] = operands[0]; |
5712 | ops[1] = gen_rtx_REG (DImode, regno); | |
5713 | ops[2] = gen_rtx_REG (DImode, regno + 2); | |
5714 | ops[3] = gen_rtx_REG (DImode, regno + 4); | |
22f9db64 | 5715 | ops[4] = GEN_INT (lane); |
9594fe5e | 5716 | output_asm_insn ("vst3.<V_sz_elem>\t{%P1[%c4], %P2[%c4], %P3[%c4]}, %0", |
88f77cba JB |
5717 | ops); |
5718 | return ""; | |
c956e102 | 5719 | } |
f7379e5e | 5720 | [(set_attr "type" "neon_store3_one_lane<q>")] |
c956e102 | 5721 | ) |
88f77cba | 5722 | |
22f9db64 CB |
5723 | ;; see comment on neon_vld1_lane for reason why the lane numbers are reversed |
5724 | ;; here on big endian targets. | |
88f77cba | 5725 | (define_insn "neon_vst3_lane<mode>" |
6308e208 | 5726 | [(set (match_operand:<V_three_elem> 0 "neon_struct_operand" "=Um") |
88f77cba JB |
5727 | (unspec:<V_three_elem> |
5728 | [(match_operand:CI 1 "s_register_operand" "w") | |
5729 | (match_operand:SI 2 "immediate_operand" "i") | |
4b644867 | 5730 | (unspec:VQ_HS [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] |
88f77cba JB |
5731 | UNSPEC_VST3_LANE))] |
5732 | "TARGET_NEON" | |
5733 | { | |
22f9db64 | 5734 | HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N(<MODE>mode, INTVAL (operands[2])); |
88f77cba JB |
5735 | HOST_WIDE_INT max = GET_MODE_NUNITS (<MODE>mode); |
5736 | int regno = REGNO (operands[1]); | |
5737 | rtx ops[5]; | |
e68ffe57 | 5738 | if (lane >= max / 2) |
88f77cba JB |
5739 | { |
5740 | lane -= max / 2; | |
5741 | regno += 2; | |
5742 | } | |
5743 | ops[0] = operands[0]; | |
5744 | ops[1] = gen_rtx_REG (DImode, regno); | |
5745 | ops[2] = gen_rtx_REG (DImode, regno + 4); | |
5746 | ops[3] = gen_rtx_REG (DImode, regno + 8); | |
5747 | ops[4] = GEN_INT (lane); | |
9594fe5e | 5748 | output_asm_insn ("vst3.<V_sz_elem>\t{%P1[%c4], %P2[%c4], %P3[%c4]}, %0", |
88f77cba JB |
5749 | ops); |
5750 | return ""; | |
c956e102 | 5751 | } |
f7379e5e JG |
5752 | [(set_attr "type" "neon_store3_one_lane<q>")] |
5753 | ) | |
88f77cba | 5754 | |
3188ed59 RS |
5755 | (define_expand "vec_load_lanesoi<mode>" |
5756 | [(set (match_operand:OI 0 "s_register_operand") | |
5757 | (unspec:OI [(match_operand:OI 1 "neon_struct_operand") | |
5758 | (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] | |
5759 | UNSPEC_VLD4))] | |
5760 | "TARGET_NEON") | |
5761 | ||
88f77cba JB |
5762 | (define_insn "neon_vld4<mode>" |
5763 | [(set (match_operand:OI 0 "s_register_operand" "=w") | |
6308e208 | 5764 | (unspec:OI [(match_operand:OI 1 "neon_struct_operand" "Um") |
eb637e76 | 5765 | (unspec:VDXBF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] |
88f77cba JB |
5766 | UNSPEC_VLD4))] |
5767 | "TARGET_NEON" | |
5768 | { | |
5769 | if (<V_sz_elem> == 64) | |
6308e208 | 5770 | return "vld1.64\t%h0, %A1"; |
88f77cba | 5771 | else |
6308e208 | 5772 | return "vld4.<V_sz_elem>\t%h0, %A1"; |
c956e102 | 5773 | } |
003bb7f3 | 5774 | [(set (attr "type") |
c956e102 | 5775 | (if_then_else (eq (const_string "<V_sz_elem>") (const_string "64")) |
f7379e5e JG |
5776 | (const_string "neon_load1_4reg<q>") |
5777 | (const_string "neon_load4_4reg<q>")))] | |
c956e102 | 5778 | ) |
88f77cba | 5779 | |
3188ed59 RS |
5780 | (define_expand "vec_load_lanesxi<mode>" |
5781 | [(match_operand:XI 0 "s_register_operand") | |
5782 | (match_operand:XI 1 "neon_struct_operand") | |
4b644867 | 5783 | (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] |
3188ed59 RS |
5784 | "TARGET_NEON" |
5785 | { | |
5786 | emit_insn (gen_neon_vld4<mode> (operands[0], operands[1])); | |
5787 | DONE; | |
5788 | }) | |
5789 | ||
88f77cba | 5790 | (define_expand "neon_vld4<mode>" |
6308e208 RS |
5791 | [(match_operand:XI 0 "s_register_operand") |
5792 | (match_operand:XI 1 "neon_struct_operand") | |
eb637e76 | 5793 | (unspec:VQ2BF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] |
88f77cba JB |
5794 | "TARGET_NEON" |
5795 | { | |
6308e208 RS |
5796 | rtx mem; |
5797 | ||
5798 | mem = adjust_address (operands[1], OImode, 0); | |
5799 | emit_insn (gen_neon_vld4qa<mode> (operands[0], mem)); | |
5800 | mem = adjust_address (mem, OImode, GET_MODE_SIZE (OImode)); | |
5801 | emit_insn (gen_neon_vld4qb<mode> (operands[0], mem, operands[0])); | |
88f77cba JB |
5802 | DONE; |
5803 | }) | |
5804 | ||
5805 | (define_insn "neon_vld4qa<mode>" | |
5806 | [(set (match_operand:XI 0 "s_register_operand" "=w") | |
6308e208 | 5807 | (unspec:XI [(match_operand:OI 1 "neon_struct_operand" "Um") |
eb637e76 | 5808 | (unspec:VQ2BF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] |
6308e208 | 5809 | UNSPEC_VLD4A))] |
88f77cba JB |
5810 | "TARGET_NEON" |
5811 | { | |
5812 | int regno = REGNO (operands[0]); | |
5813 | rtx ops[5]; | |
5814 | ops[0] = gen_rtx_REG (DImode, regno); | |
5815 | ops[1] = gen_rtx_REG (DImode, regno + 4); | |
5816 | ops[2] = gen_rtx_REG (DImode, regno + 8); | |
5817 | ops[3] = gen_rtx_REG (DImode, regno + 12); | |
a6217191 | 5818 | ops[4] = operands[1]; |
6308e208 | 5819 | output_asm_insn ("vld4.<V_sz_elem>\t{%P0, %P1, %P2, %P3}, %A4", ops); |
88f77cba | 5820 | return ""; |
c956e102 | 5821 | } |
f7379e5e | 5822 | [(set_attr "type" "neon_load4_4reg<q>")] |
c956e102 | 5823 | ) |
88f77cba JB |
5824 | |
5825 | (define_insn "neon_vld4qb<mode>" | |
5826 | [(set (match_operand:XI 0 "s_register_operand" "=w") | |
6308e208 RS |
5827 | (unspec:XI [(match_operand:OI 1 "neon_struct_operand" "Um") |
5828 | (match_operand:XI 2 "s_register_operand" "0") | |
eb637e76 | 5829 | (unspec:VQ2BF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] |
6308e208 | 5830 | UNSPEC_VLD4B))] |
88f77cba JB |
5831 | "TARGET_NEON" |
5832 | { | |
5833 | int regno = REGNO (operands[0]); | |
5834 | rtx ops[5]; | |
5835 | ops[0] = gen_rtx_REG (DImode, regno + 2); | |
5836 | ops[1] = gen_rtx_REG (DImode, regno + 6); | |
5837 | ops[2] = gen_rtx_REG (DImode, regno + 10); | |
5838 | ops[3] = gen_rtx_REG (DImode, regno + 14); | |
6308e208 RS |
5839 | ops[4] = operands[1]; |
5840 | output_asm_insn ("vld4.<V_sz_elem>\t{%P0, %P1, %P2, %P3}, %A4", ops); | |
88f77cba | 5841 | return ""; |
c956e102 | 5842 | } |
f7379e5e | 5843 | [(set_attr "type" "neon_load4_4reg<q>")] |
c956e102 | 5844 | ) |
88f77cba | 5845 | |
22f9db64 CB |
5846 | ;; see comment on neon_vld1_lane for reason why the lane numbers are reversed |
5847 | ;; here on big endian targets. | |
88f77cba JB |
5848 | (define_insn "neon_vld4_lane<mode>" |
5849 | [(set (match_operand:OI 0 "s_register_operand" "=w") | |
6308e208 | 5850 | (unspec:OI [(match_operand:<V_four_elem> 1 "neon_struct_operand" "Um") |
88f77cba JB |
5851 | (match_operand:OI 2 "s_register_operand" "0") |
5852 | (match_operand:SI 3 "immediate_operand" "i") | |
4b644867 | 5853 | (unspec:VD_LANE [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] |
88f77cba JB |
5854 | UNSPEC_VLD4_LANE))] |
5855 | "TARGET_NEON" | |
5856 | { | |
22f9db64 | 5857 | HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N(<MODE>mode, INTVAL (operands[3])); |
88f77cba JB |
5858 | int regno = REGNO (operands[0]); |
5859 | rtx ops[6]; | |
88f77cba JB |
5860 | ops[0] = gen_rtx_REG (DImode, regno); |
5861 | ops[1] = gen_rtx_REG (DImode, regno + 2); | |
5862 | ops[2] = gen_rtx_REG (DImode, regno + 4); | |
5863 | ops[3] = gen_rtx_REG (DImode, regno + 6); | |
5864 | ops[4] = operands[1]; | |
22f9db64 | 5865 | ops[5] = GEN_INT (lane); |
6308e208 | 5866 | output_asm_insn ("vld4.<V_sz_elem>\t{%P0[%c5], %P1[%c5], %P2[%c5], %P3[%c5]}, %A4", |
88f77cba JB |
5867 | ops); |
5868 | return ""; | |
c956e102 | 5869 | } |
f7379e5e | 5870 | [(set_attr "type" "neon_load4_one_lane<q>")] |
c956e102 | 5871 | ) |
88f77cba | 5872 | |
22f9db64 CB |
5873 | ;; see comment on neon_vld1_lane for reason why the lane numbers are reversed |
5874 | ;; here on big endian targets. | |
88f77cba JB |
5875 | (define_insn "neon_vld4_lane<mode>" |
5876 | [(set (match_operand:XI 0 "s_register_operand" "=w") | |
6308e208 | 5877 | (unspec:XI [(match_operand:<V_four_elem> 1 "neon_struct_operand" "Um") |
88f77cba JB |
5878 | (match_operand:XI 2 "s_register_operand" "0") |
5879 | (match_operand:SI 3 "immediate_operand" "i") | |
4b644867 | 5880 | (unspec:VQ_HS [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] |
88f77cba JB |
5881 | UNSPEC_VLD4_LANE))] |
5882 | "TARGET_NEON" | |
5883 | { | |
22f9db64 | 5884 | HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N(<MODE>mode, INTVAL (operands[3])); |
88f77cba JB |
5885 | HOST_WIDE_INT max = GET_MODE_NUNITS (<MODE>mode); |
5886 | int regno = REGNO (operands[0]); | |
5887 | rtx ops[6]; | |
e68ffe57 | 5888 | if (lane >= max / 2) |
88f77cba JB |
5889 | { |
5890 | lane -= max / 2; | |
5891 | regno += 2; | |
5892 | } | |
5893 | ops[0] = gen_rtx_REG (DImode, regno); | |
5894 | ops[1] = gen_rtx_REG (DImode, regno + 4); | |
5895 | ops[2] = gen_rtx_REG (DImode, regno + 8); | |
5896 | ops[3] = gen_rtx_REG (DImode, regno + 12); | |
5897 | ops[4] = operands[1]; | |
5898 | ops[5] = GEN_INT (lane); | |
6308e208 | 5899 | output_asm_insn ("vld4.<V_sz_elem>\t{%P0[%c5], %P1[%c5], %P2[%c5], %P3[%c5]}, %A4", |
88f77cba JB |
5900 | ops); |
5901 | return ""; | |
c956e102 | 5902 | } |
f7379e5e | 5903 | [(set_attr "type" "neon_load4_one_lane<q>")] |
c956e102 | 5904 | ) |
88f77cba JB |
5905 | |
5906 | (define_insn "neon_vld4_dup<mode>" | |
5907 | [(set (match_operand:OI 0 "s_register_operand" "=w") | |
6308e208 | 5908 | (unspec:OI [(match_operand:<V_four_elem> 1 "neon_struct_operand" "Um") |
eb637e76 | 5909 | (unspec:VDXBF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] |
88f77cba JB |
5910 | UNSPEC_VLD4_DUP))] |
5911 | "TARGET_NEON" | |
5912 | { | |
5913 | if (GET_MODE_NUNITS (<MODE>mode) > 1) | |
5914 | { | |
5915 | int regno = REGNO (operands[0]); | |
5916 | rtx ops[5]; | |
5917 | ops[0] = gen_rtx_REG (DImode, regno); | |
5918 | ops[1] = gen_rtx_REG (DImode, regno + 2); | |
5919 | ops[2] = gen_rtx_REG (DImode, regno + 4); | |
5920 | ops[3] = gen_rtx_REG (DImode, regno + 6); | |
5921 | ops[4] = operands[1]; | |
6308e208 | 5922 | output_asm_insn ("vld4.<V_sz_elem>\t{%P0[], %P1[], %P2[], %P3[]}, %A4", |
88f77cba JB |
5923 | ops); |
5924 | return ""; | |
5925 | } | |
5926 | else | |
6308e208 | 5927 | return "vld1.<V_sz_elem>\t%h0, %A1"; |
c956e102 | 5928 | } |
003bb7f3 | 5929 | [(set (attr "type") |
c956e102 | 5930 | (if_then_else (gt (const_string "<V_mode_nunits>") (const_string "1")) |
f7379e5e JG |
5931 | (const_string "neon_load4_all_lanes<q>") |
5932 | (const_string "neon_load1_1reg<q>")))] | |
c956e102 | 5933 | ) |
88f77cba | 5934 | |
eb637e76 DB |
5935 | (define_insn "neon_vld4_dupv8bf" |
5936 | [(set (match_operand:XI 0 "s_register_operand" "=w") | |
5937 | (unspec:XI [(match_operand:V2BF 1 "neon_struct_operand" "Um") | |
5938 | (unspec:V8BF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] | |
5939 | UNSPEC_VLD2_DUP))] | |
5940 | "TARGET_BF16_SIMD" | |
5941 | { | |
5942 | rtx ops[5]; | |
5943 | int tabbase = REGNO (operands[0]); | |
5944 | ||
5945 | ops[4] = operands[1]; | |
5946 | ops[0] = gen_rtx_REG (V4BFmode, tabbase); | |
5947 | ops[1] = gen_rtx_REG (V4BFmode, tabbase + 2); | |
5948 | ops[2] = gen_rtx_REG (V4BFmode, tabbase + 4); | |
5949 | ops[3] = gen_rtx_REG (V4BFmode, tabbase + 6); | |
5950 | output_asm_insn ("vld4.16\t{%P0[], %P1[], %P2[], %P3[]}, %A4", ops); | |
5951 | return ""; | |
5952 | } | |
5953 | [(set_attr "type" "neon_load4_all_lanes_q")] | |
5954 | ) | |
5955 | ||
3188ed59 RS |
5956 | (define_expand "vec_store_lanesoi<mode>" |
5957 | [(set (match_operand:OI 0 "neon_struct_operand") | |
5958 | (unspec:OI [(match_operand:OI 1 "s_register_operand") | |
5959 | (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] | |
5960 | UNSPEC_VST4))] | |
5961 | "TARGET_NEON") | |
5962 | ||
88f77cba | 5963 | (define_insn "neon_vst4<mode>" |
6308e208 | 5964 | [(set (match_operand:OI 0 "neon_struct_operand" "=Um") |
88f77cba | 5965 | (unspec:OI [(match_operand:OI 1 "s_register_operand" "w") |
ff229375 | 5966 | (unspec:VDXBF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] |
88f77cba JB |
5967 | UNSPEC_VST4))] |
5968 | "TARGET_NEON" | |
5969 | { | |
5970 | if (<V_sz_elem> == 64) | |
6308e208 | 5971 | return "vst1.64\t%h1, %A0"; |
88f77cba | 5972 | else |
6308e208 | 5973 | return "vst4.<V_sz_elem>\t%h1, %A0"; |
c956e102 | 5974 | } |
003bb7f3 | 5975 | [(set (attr "type") |
c956e102 | 5976 | (if_then_else (eq (const_string "<V_sz_elem>") (const_string "64")) |
f7379e5e JG |
5977 | (const_string "neon_store1_4reg<q>") |
5978 | (const_string "neon_store4_4reg<q>")))] | |
c956e102 | 5979 | ) |
88f77cba | 5980 | |
3188ed59 RS |
5981 | (define_expand "vec_store_lanesxi<mode>" |
5982 | [(match_operand:XI 0 "neon_struct_operand") | |
5983 | (match_operand:XI 1 "s_register_operand") | |
4b644867 | 5984 | (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] |
3188ed59 RS |
5985 | "TARGET_NEON" |
5986 | { | |
5987 | emit_insn (gen_neon_vst4<mode> (operands[0], operands[1])); | |
5988 | DONE; | |
5989 | }) | |
5990 | ||
88f77cba | 5991 | (define_expand "neon_vst4<mode>" |
6308e208 RS |
5992 | [(match_operand:XI 0 "neon_struct_operand") |
5993 | (match_operand:XI 1 "s_register_operand") | |
ff229375 | 5994 | (unspec:VQ2BF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] |
88f77cba JB |
5995 | "TARGET_NEON" |
5996 | { | |
6308e208 RS |
5997 | rtx mem; |
5998 | ||
5999 | mem = adjust_address (operands[0], OImode, 0); | |
6000 | emit_insn (gen_neon_vst4qa<mode> (mem, operands[1])); | |
6001 | mem = adjust_address (mem, OImode, GET_MODE_SIZE (OImode)); | |
6002 | emit_insn (gen_neon_vst4qb<mode> (mem, operands[1])); | |
88f77cba JB |
6003 | DONE; |
6004 | }) | |
6005 | ||
6006 | (define_insn "neon_vst4qa<mode>" | |
6308e208 RS |
6007 | [(set (match_operand:OI 0 "neon_struct_operand" "=Um") |
6008 | (unspec:OI [(match_operand:XI 1 "s_register_operand" "w") | |
ff229375 | 6009 | (unspec:VQ2BF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] |
6308e208 | 6010 | UNSPEC_VST4A))] |
88f77cba JB |
6011 | "TARGET_NEON" |
6012 | { | |
6308e208 | 6013 | int regno = REGNO (operands[1]); |
88f77cba JB |
6014 | rtx ops[5]; |
6015 | ops[0] = operands[0]; | |
6016 | ops[1] = gen_rtx_REG (DImode, regno); | |
6017 | ops[2] = gen_rtx_REG (DImode, regno + 4); | |
6018 | ops[3] = gen_rtx_REG (DImode, regno + 8); | |
6019 | ops[4] = gen_rtx_REG (DImode, regno + 12); | |
6308e208 | 6020 | output_asm_insn ("vst4.<V_sz_elem>\t{%P1, %P2, %P3, %P4}, %A0", ops); |
88f77cba | 6021 | return ""; |
c956e102 | 6022 | } |
f7379e5e | 6023 | [(set_attr "type" "neon_store4_4reg<q>")] |
c956e102 | 6024 | ) |
88f77cba JB |
6025 | |
6026 | (define_insn "neon_vst4qb<mode>" | |
6308e208 RS |
6027 | [(set (match_operand:OI 0 "neon_struct_operand" "=Um") |
6028 | (unspec:OI [(match_operand:XI 1 "s_register_operand" "w") | |
ff229375 | 6029 | (unspec:VQ2BF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] |
6308e208 | 6030 | UNSPEC_VST4B))] |
88f77cba JB |
6031 | "TARGET_NEON" |
6032 | { | |
6308e208 | 6033 | int regno = REGNO (operands[1]); |
88f77cba JB |
6034 | rtx ops[5]; |
6035 | ops[0] = operands[0]; | |
6036 | ops[1] = gen_rtx_REG (DImode, regno + 2); | |
6037 | ops[2] = gen_rtx_REG (DImode, regno + 6); | |
6038 | ops[3] = gen_rtx_REG (DImode, regno + 10); | |
6039 | ops[4] = gen_rtx_REG (DImode, regno + 14); | |
6308e208 | 6040 | output_asm_insn ("vst4.<V_sz_elem>\t{%P1, %P2, %P3, %P4}, %A0", ops); |
88f77cba | 6041 | return ""; |
c956e102 | 6042 | } |
f7379e5e | 6043 | [(set_attr "type" "neon_store4_4reg<q>")] |
c956e102 | 6044 | ) |
88f77cba | 6045 | |
22f9db64 CB |
6046 | ;; see comment on neon_vld1_lane for reason why the lane numbers are reversed |
6047 | ;; here on big endian targets. | |
88f77cba | 6048 | (define_insn "neon_vst4_lane<mode>" |
6308e208 | 6049 | [(set (match_operand:<V_four_elem> 0 "neon_struct_operand" "=Um") |
88f77cba JB |
6050 | (unspec:<V_four_elem> |
6051 | [(match_operand:OI 1 "s_register_operand" "w") | |
6052 | (match_operand:SI 2 "immediate_operand" "i") | |
4b644867 | 6053 | (unspec:VD_LANE [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] |
88f77cba JB |
6054 | UNSPEC_VST4_LANE))] |
6055 | "TARGET_NEON" | |
6056 | { | |
22f9db64 | 6057 | HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N(<MODE>mode, INTVAL (operands[2])); |
88f77cba JB |
6058 | int regno = REGNO (operands[1]); |
6059 | rtx ops[6]; | |
88f77cba JB |
6060 | ops[0] = operands[0]; |
6061 | ops[1] = gen_rtx_REG (DImode, regno); | |
6062 | ops[2] = gen_rtx_REG (DImode, regno + 2); | |
6063 | ops[3] = gen_rtx_REG (DImode, regno + 4); | |
6064 | ops[4] = gen_rtx_REG (DImode, regno + 6); | |
22f9db64 | 6065 | ops[5] = GEN_INT (lane); |
6308e208 | 6066 | output_asm_insn ("vst4.<V_sz_elem>\t{%P1[%c5], %P2[%c5], %P3[%c5], %P4[%c5]}, %A0", |
88f77cba JB |
6067 | ops); |
6068 | return ""; | |
c956e102 | 6069 | } |
f7379e5e | 6070 | [(set_attr "type" "neon_store4_one_lane<q>")] |
c956e102 | 6071 | ) |
88f77cba | 6072 | |
22f9db64 CB |
6073 | ;; see comment on neon_vld1_lane for reason why the lane numbers are reversed |
6074 | ;; here on big endian targets. | |
88f77cba | 6075 | (define_insn "neon_vst4_lane<mode>" |
6308e208 | 6076 | [(set (match_operand:<V_four_elem> 0 "neon_struct_operand" "=Um") |
88f77cba JB |
6077 | (unspec:<V_four_elem> |
6078 | [(match_operand:XI 1 "s_register_operand" "w") | |
6079 | (match_operand:SI 2 "immediate_operand" "i") | |
4b644867 | 6080 | (unspec:VQ_HS [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] |
88f77cba JB |
6081 | UNSPEC_VST4_LANE))] |
6082 | "TARGET_NEON" | |
6083 | { | |
22f9db64 | 6084 | HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N(<MODE>mode, INTVAL (operands[2])); |
88f77cba JB |
6085 | HOST_WIDE_INT max = GET_MODE_NUNITS (<MODE>mode); |
6086 | int regno = REGNO (operands[1]); | |
6087 | rtx ops[6]; | |
e68ffe57 | 6088 | if (lane >= max / 2) |
88f77cba JB |
6089 | { |
6090 | lane -= max / 2; | |
6091 | regno += 2; | |
6092 | } | |
6093 | ops[0] = operands[0]; | |
6094 | ops[1] = gen_rtx_REG (DImode, regno); | |
6095 | ops[2] = gen_rtx_REG (DImode, regno + 4); | |
6096 | ops[3] = gen_rtx_REG (DImode, regno + 8); | |
6097 | ops[4] = gen_rtx_REG (DImode, regno + 12); | |
6098 | ops[5] = GEN_INT (lane); | |
6308e208 | 6099 | output_asm_insn ("vst4.<V_sz_elem>\t{%P1[%c5], %P2[%c5], %P3[%c5], %P4[%c5]}, %A0", |
88f77cba JB |
6100 | ops); |
6101 | return ""; | |
c956e102 | 6102 | } |
f7379e5e | 6103 | [(set_attr "type" "neon_store4_4reg<q>")] |
c956e102 | 6104 | ) |
88f77cba | 6105 | |
46b57af1 TB |
6106 | (define_insn "neon_vec_unpack<US>_lo_<mode>" |
6107 | [(set (match_operand:<V_unpack> 0 "register_operand" "=w") | |
6108 | (SE:<V_unpack> (vec_select:<V_HALF> | |
6109 | (match_operand:VU 1 "register_operand" "w") | |
6110 | (match_operand:VU 2 "vect_par_constant_low" ""))))] | |
0094f21b | 6111 | "TARGET_NEON && !BYTES_BIG_ENDIAN" |
46b57af1 | 6112 | "vmovl.<US><V_sz_elem> %q0, %e1" |
f7379e5e | 6113 | [(set_attr "type" "neon_shift_imm_long")] |
46b57af1 TB |
6114 | ) |
6115 | ||
6116 | (define_insn "neon_vec_unpack<US>_hi_<mode>" | |
6117 | [(set (match_operand:<V_unpack> 0 "register_operand" "=w") | |
6118 | (SE:<V_unpack> (vec_select:<V_HALF> | |
6119 | (match_operand:VU 1 "register_operand" "w") | |
6120 | (match_operand:VU 2 "vect_par_constant_high" ""))))] | |
0094f21b | 6121 | "TARGET_NEON && !BYTES_BIG_ENDIAN" |
46b57af1 | 6122 | "vmovl.<US><V_sz_elem> %q0, %f1" |
f7379e5e | 6123 | [(set_attr "type" "neon_shift_imm_long")] |
46b57af1 TB |
6124 | ) |
6125 | ||
6126 | (define_expand "vec_unpack<US>_hi_<mode>" | |
cd65e265 | 6127 | [(match_operand:<V_unpack> 0 "register_operand") |
46b57af1 | 6128 | (SE:<V_unpack> (match_operand:VU 1 "register_operand"))] |
0094f21b | 6129 | "TARGET_NEON && !BYTES_BIG_ENDIAN" |
46b57af1 TB |
6130 | { |
6131 | rtvec v = rtvec_alloc (<V_mode_nunits>/2) ; | |
6132 | rtx t1; | |
6133 | int i; | |
6134 | for (i = 0; i < (<V_mode_nunits>/2); i++) | |
6135 | RTVEC_ELT (v, i) = GEN_INT ((<V_mode_nunits>/2) + i); | |
6136 | ||
6137 | t1 = gen_rtx_PARALLEL (<MODE>mode, v); | |
6138 | emit_insn (gen_neon_vec_unpack<US>_hi_<mode> (operands[0], | |
6139 | operands[1], | |
6140 | t1)); | |
6141 | DONE; | |
6142 | } | |
6143 | ) | |
6144 | ||
6145 | (define_expand "vec_unpack<US>_lo_<mode>" | |
cd65e265 DZ |
6146 | [(match_operand:<V_unpack> 0 "register_operand") |
6147 | (SE:<V_unpack> (match_operand:VU 1 "register_operand"))] | |
0094f21b | 6148 | "TARGET_NEON && !BYTES_BIG_ENDIAN" |
46b57af1 TB |
6149 | { |
6150 | rtvec v = rtvec_alloc (<V_mode_nunits>/2) ; | |
6151 | rtx t1; | |
6152 | int i; | |
6153 | for (i = 0; i < (<V_mode_nunits>/2) ; i++) | |
6154 | RTVEC_ELT (v, i) = GEN_INT (i); | |
6155 | t1 = gen_rtx_PARALLEL (<MODE>mode, v); | |
6156 | emit_insn (gen_neon_vec_unpack<US>_lo_<mode> (operands[0], | |
6157 | operands[1], | |
6158 | t1)); | |
6159 | DONE; | |
6160 | } | |
6161 | ) | |
6162 | ||
6163 | (define_insn "neon_vec_<US>mult_lo_<mode>" | |
6164 | [(set (match_operand:<V_unpack> 0 "register_operand" "=w") | |
6165 | (mult:<V_unpack> (SE:<V_unpack> (vec_select:<V_HALF> | |
6166 | (match_operand:VU 1 "register_operand" "w") | |
6167 | (match_operand:VU 2 "vect_par_constant_low" ""))) | |
6168 | (SE:<V_unpack> (vec_select:<V_HALF> | |
6169 | (match_operand:VU 3 "register_operand" "w") | |
6170 | (match_dup 2)))))] | |
0094f21b | 6171 | "TARGET_NEON && !BYTES_BIG_ENDIAN" |
46b57af1 | 6172 | "vmull.<US><V_sz_elem> %q0, %e1, %e3" |
f7379e5e | 6173 | [(set_attr "type" "neon_mul_<V_elem_ch>_long")] |
46b57af1 TB |
6174 | ) |
6175 | ||
6176 | (define_expand "vec_widen_<US>mult_lo_<mode>" | |
cd65e265 DZ |
6177 | [(match_operand:<V_unpack> 0 "register_operand") |
6178 | (SE:<V_unpack> (match_operand:VU 1 "register_operand")) | |
6179 | (SE:<V_unpack> (match_operand:VU 2 "register_operand"))] | |
0094f21b | 6180 | "TARGET_NEON && !BYTES_BIG_ENDIAN" |
46b57af1 TB |
6181 | { |
6182 | rtvec v = rtvec_alloc (<V_mode_nunits>/2) ; | |
6183 | rtx t1; | |
6184 | int i; | |
6185 | for (i = 0; i < (<V_mode_nunits>/2) ; i++) | |
6186 | RTVEC_ELT (v, i) = GEN_INT (i); | |
6187 | t1 = gen_rtx_PARALLEL (<MODE>mode, v); | |
6188 | ||
6189 | emit_insn (gen_neon_vec_<US>mult_lo_<mode> (operands[0], | |
6190 | operands[1], | |
6191 | t1, | |
6192 | operands[2])); | |
6193 | DONE; | |
6194 | } | |
6195 | ) | |
6196 | ||
6197 | (define_insn "neon_vec_<US>mult_hi_<mode>" | |
6198 | [(set (match_operand:<V_unpack> 0 "register_operand" "=w") | |
6199 | (mult:<V_unpack> (SE:<V_unpack> (vec_select:<V_HALF> | |
6200 | (match_operand:VU 1 "register_operand" "w") | |
6201 | (match_operand:VU 2 "vect_par_constant_high" ""))) | |
6202 | (SE:<V_unpack> (vec_select:<V_HALF> | |
6203 | (match_operand:VU 3 "register_operand" "w") | |
6204 | (match_dup 2)))))] | |
0094f21b | 6205 | "TARGET_NEON && !BYTES_BIG_ENDIAN" |
46b57af1 | 6206 | "vmull.<US><V_sz_elem> %q0, %f1, %f3" |
f7379e5e | 6207 | [(set_attr "type" "neon_mul_<V_elem_ch>_long")] |
46b57af1 TB |
6208 | ) |
6209 | ||
6210 | (define_expand "vec_widen_<US>mult_hi_<mode>" | |
cd65e265 DZ |
6211 | [(match_operand:<V_unpack> 0 "register_operand") |
6212 | (SE:<V_unpack> (match_operand:VU 1 "register_operand")) | |
6213 | (SE:<V_unpack> (match_operand:VU 2 "register_operand"))] | |
0094f21b | 6214 | "TARGET_NEON && !BYTES_BIG_ENDIAN" |
46b57af1 TB |
6215 | { |
6216 | rtvec v = rtvec_alloc (<V_mode_nunits>/2) ; | |
6217 | rtx t1; | |
6218 | int i; | |
6219 | for (i = 0; i < (<V_mode_nunits>/2) ; i++) | |
6220 | RTVEC_ELT (v, i) = GEN_INT (<V_mode_nunits>/2 + i); | |
6221 | t1 = gen_rtx_PARALLEL (<MODE>mode, v); | |
6222 | ||
6223 | emit_insn (gen_neon_vec_<US>mult_hi_<mode> (operands[0], | |
6224 | operands[1], | |
6225 | t1, | |
6226 | operands[2])); | |
6227 | DONE; | |
6228 | ||
6229 | } | |
6230 | ) | |
6231 | ||
36ba4aae IR |
6232 | (define_insn "neon_vec_<US>shiftl_<mode>" |
6233 | [(set (match_operand:<V_widen> 0 "register_operand" "=w") | |
6234 | (SE:<V_widen> (ashift:VW (match_operand:VW 1 "register_operand" "w") | |
6235 | (match_operand:<V_innermode> 2 "const_neon_scalar_shift_amount_operand" ""))))] | |
6236 | "TARGET_NEON" | |
6237 | { | |
6238 | return "vshll.<US><V_sz_elem> %q0, %P1, %2"; | |
6239 | } | |
f7379e5e | 6240 | [(set_attr "type" "neon_shift_imm_long")] |
36ba4aae IR |
6241 | ) |
6242 | ||
6243 | (define_expand "vec_widen_<US>shiftl_lo_<mode>" | |
cd65e265 DZ |
6244 | [(match_operand:<V_unpack> 0 "register_operand") |
6245 | (SE:<V_unpack> (match_operand:VU 1 "register_operand")) | |
6246 | (match_operand:SI 2 "immediate_operand")] | |
36ba4aae IR |
6247 | "TARGET_NEON && !BYTES_BIG_ENDIAN" |
6248 | { | |
6249 | emit_insn (gen_neon_vec_<US>shiftl_<V_half> (operands[0], | |
6250 | simplify_gen_subreg (<V_HALF>mode, operands[1], <MODE>mode, 0), | |
6251 | operands[2])); | |
6252 | DONE; | |
6253 | } | |
6254 | ) | |
6255 | ||
6256 | (define_expand "vec_widen_<US>shiftl_hi_<mode>" | |
cd65e265 DZ |
6257 | [(match_operand:<V_unpack> 0 "register_operand") |
6258 | (SE:<V_unpack> (match_operand:VU 1 "register_operand")) | |
6259 | (match_operand:SI 2 "immediate_operand")] | |
36ba4aae IR |
6260 | "TARGET_NEON && !BYTES_BIG_ENDIAN" |
6261 | { | |
6262 | emit_insn (gen_neon_vec_<US>shiftl_<V_half> (operands[0], | |
6263 | simplify_gen_subreg (<V_HALF>mode, operands[1], <MODE>mode, | |
6264 | GET_MODE_SIZE (<V_HALF>mode)), | |
6265 | operands[2])); | |
6266 | DONE; | |
6267 | } | |
6268 | ) | |
6269 | ||
46b57af1 TB |
6270 | ;; Vectorize for non-neon-quad case |
6271 | (define_insn "neon_unpack<US>_<mode>" | |
6272 | [(set (match_operand:<V_widen> 0 "register_operand" "=w") | |
7f27ec08 | 6273 | (SE:<V_widen> (match_operand:VDI 1 "register_operand" "w")))] |
46b57af1 | 6274 | "TARGET_NEON" |
7f27ec08 | 6275 | "vmovl.<US><V_sz_elem> %q0, %P1" |
f7379e5e | 6276 | [(set_attr "type" "neon_move")] |
46b57af1 TB |
6277 | ) |
6278 | ||
6279 | (define_expand "vec_unpack<US>_lo_<mode>" | |
cd65e265 | 6280 | [(match_operand:<V_double_width> 0 "register_operand") |
46b57af1 TB |
6281 | (SE:<V_double_width>(match_operand:VDI 1 "register_operand"))] |
6282 | "TARGET_NEON" | |
6283 | { | |
6284 | rtx tmpreg = gen_reg_rtx (<V_widen>mode); | |
6285 | emit_insn (gen_neon_unpack<US>_<mode> (tmpreg, operands[1])); | |
6286 | emit_insn (gen_neon_vget_low<V_widen_l> (operands[0], tmpreg)); | |
6287 | ||
6288 | DONE; | |
6289 | } | |
6290 | ) | |
6291 | ||
6292 | (define_expand "vec_unpack<US>_hi_<mode>" | |
cd65e265 | 6293 | [(match_operand:<V_double_width> 0 "register_operand") |
46b57af1 TB |
6294 | (SE:<V_double_width>(match_operand:VDI 1 "register_operand"))] |
6295 | "TARGET_NEON" | |
6296 | { | |
6297 | rtx tmpreg = gen_reg_rtx (<V_widen>mode); | |
6298 | emit_insn (gen_neon_unpack<US>_<mode> (tmpreg, operands[1])); | |
6299 | emit_insn (gen_neon_vget_high<V_widen_l> (operands[0], tmpreg)); | |
6300 | ||
6301 | DONE; | |
6302 | } | |
6303 | ) | |
6304 | ||
6305 | (define_insn "neon_vec_<US>mult_<mode>" | |
6306 | [(set (match_operand:<V_widen> 0 "register_operand" "=w") | |
6307 | (mult:<V_widen> (SE:<V_widen> | |
6308 | (match_operand:VDI 1 "register_operand" "w")) | |
6309 | (SE:<V_widen> | |
6310 | (match_operand:VDI 2 "register_operand" "w"))))] | |
6311 | "TARGET_NEON" | |
7f27ec08 | 6312 | "vmull.<US><V_sz_elem> %q0, %P1, %P2" |
f7379e5e | 6313 | [(set_attr "type" "neon_mul_<V_elem_ch>_long")] |
46b57af1 TB |
6314 | ) |
6315 | ||
6316 | (define_expand "vec_widen_<US>mult_hi_<mode>" | |
cd65e265 DZ |
6317 | [(match_operand:<V_double_width> 0 "register_operand") |
6318 | (SE:<V_double_width> (match_operand:VDI 1 "register_operand")) | |
6319 | (SE:<V_double_width> (match_operand:VDI 2 "register_operand"))] | |
46b57af1 TB |
6320 | "TARGET_NEON" |
6321 | { | |
6322 | rtx tmpreg = gen_reg_rtx (<V_widen>mode); | |
6323 | emit_insn (gen_neon_vec_<US>mult_<mode> (tmpreg, operands[1], operands[2])); | |
6324 | emit_insn (gen_neon_vget_high<V_widen_l> (operands[0], tmpreg)); | |
6325 | ||
6326 | DONE; | |
6327 | ||
6328 | } | |
6329 | ) | |
6330 | ||
6331 | (define_expand "vec_widen_<US>mult_lo_<mode>" | |
cd65e265 DZ |
6332 | [(match_operand:<V_double_width> 0 "register_operand") |
6333 | (SE:<V_double_width> (match_operand:VDI 1 "register_operand")) | |
6334 | (SE:<V_double_width> (match_operand:VDI 2 "register_operand"))] | |
46b57af1 TB |
6335 | "TARGET_NEON" |
6336 | { | |
6337 | rtx tmpreg = gen_reg_rtx (<V_widen>mode); | |
6338 | emit_insn (gen_neon_vec_<US>mult_<mode> (tmpreg, operands[1], operands[2])); | |
6339 | emit_insn (gen_neon_vget_low<V_widen_l> (operands[0], tmpreg)); | |
6340 | ||
6341 | DONE; | |
6342 | ||
6343 | } | |
6344 | ) | |
0f38f229 | 6345 | |
36ba4aae | 6346 | (define_expand "vec_widen_<US>shiftl_hi_<mode>" |
cd65e265 DZ |
6347 | [(match_operand:<V_double_width> 0 "register_operand") |
6348 | (SE:<V_double_width> (match_operand:VDI 1 "register_operand")) | |
6349 | (match_operand:SI 2 "immediate_operand")] | |
36ba4aae IR |
6350 | "TARGET_NEON" |
6351 | { | |
6352 | rtx tmpreg = gen_reg_rtx (<V_widen>mode); | |
6353 | emit_insn (gen_neon_vec_<US>shiftl_<mode> (tmpreg, operands[1], operands[2])); | |
6354 | emit_insn (gen_neon_vget_high<V_widen_l> (operands[0], tmpreg)); | |
6355 | ||
6356 | DONE; | |
6357 | } | |
6358 | ) | |
6359 | ||
6360 | (define_expand "vec_widen_<US>shiftl_lo_<mode>" | |
cd65e265 DZ |
6361 | [(match_operand:<V_double_width> 0 "register_operand") |
6362 | (SE:<V_double_width> (match_operand:VDI 1 "register_operand")) | |
6363 | (match_operand:SI 2 "immediate_operand")] | |
36ba4aae IR |
6364 | "TARGET_NEON" |
6365 | { | |
6366 | rtx tmpreg = gen_reg_rtx (<V_widen>mode); | |
6367 | emit_insn (gen_neon_vec_<US>shiftl_<mode> (tmpreg, operands[1], operands[2])); | |
6368 | emit_insn (gen_neon_vget_low<V_widen_l> (operands[0], tmpreg)); | |
6369 | ||
6370 | DONE; | |
6371 | } | |
6372 | ) | |
6373 | ||
0094f21b JB |
6374 | ; FIXME: These instruction patterns can't be used safely in big-endian mode |
6375 | ; because the ordering of vector elements in Q registers is different from what | |
6376 | ; the semantics of the instructions require. | |
6377 | ||
0f38f229 TB |
6378 | (define_insn "vec_pack_trunc_<mode>" |
6379 | [(set (match_operand:<V_narrow_pack> 0 "register_operand" "=&w") | |
6380 | (vec_concat:<V_narrow_pack> | |
6381 | (truncate:<V_narrow> | |
6382 | (match_operand:VN 1 "register_operand" "w")) | |
6383 | (truncate:<V_narrow> | |
6384 | (match_operand:VN 2 "register_operand" "w"))))] | |
0094f21b | 6385 | "TARGET_NEON && !BYTES_BIG_ENDIAN" |
30cecf17 | 6386 | "vmovn.i<V_sz_elem>\t%e0, %q1\;vmovn.i<V_sz_elem>\t%f0, %q2" |
f7379e5e | 6387 | [(set_attr "type" "multiple") |
30cecf17 | 6388 | (set_attr "length" "8")] |
0f38f229 TB |
6389 | ) |
6390 | ||
6391 | ;; For the non-quad case. | |
6392 | (define_insn "neon_vec_pack_trunc_<mode>" | |
6393 | [(set (match_operand:<V_narrow> 0 "register_operand" "=w") | |
7f27ec08 | 6394 | (truncate:<V_narrow> (match_operand:VN 1 "register_operand" "w")))] |
0094f21b | 6395 | "TARGET_NEON && !BYTES_BIG_ENDIAN" |
7f27ec08 | 6396 | "vmovn.i<V_sz_elem>\t%P0, %q1" |
f7379e5e | 6397 | [(set_attr "type" "neon_move_narrow_q")] |
0f38f229 TB |
6398 | ) |
6399 | ||
6400 | (define_expand "vec_pack_trunc_<mode>" | |
cd65e265 DZ |
6401 | [(match_operand:<V_narrow_pack> 0 "register_operand") |
6402 | (match_operand:VSHFT 1 "register_operand") | |
0f38f229 | 6403 | (match_operand:VSHFT 2 "register_operand")] |
0094f21b | 6404 | "TARGET_NEON && !BYTES_BIG_ENDIAN" |
0f38f229 TB |
6405 | { |
6406 | rtx tempreg = gen_reg_rtx (<V_DOUBLE>mode); | |
6407 | ||
6408 | emit_insn (gen_move_lo_quad_<V_double> (tempreg, operands[1])); | |
6409 | emit_insn (gen_move_hi_quad_<V_double> (tempreg, operands[2])); | |
6410 | emit_insn (gen_neon_vec_pack_trunc_<V_double> (operands[0], tempreg)); | |
6411 | DONE; | |
6412 | }) | |
bd1aa4f4 SS |
6413 | |
6414 | (define_insn "neon_vabd<mode>_2" | |
d0b6b5a7 KT |
6415 | [(set (match_operand:VF 0 "s_register_operand" "=w") |
6416 | (abs:VF (minus:VF (match_operand:VF 1 "s_register_operand" "w") | |
6417 | (match_operand:VF 2 "s_register_operand" "w"))))] | |
bb78e587 | 6418 | "ARM_HAVE_NEON_<MODE>_ARITH" |
bd1aa4f4 | 6419 | "vabd.<V_s_elem> %<V_reg>0, %<V_reg>1, %<V_reg>2" |
d0b6b5a7 | 6420 | [(set_attr "type" "neon_fp_abd_s<q>")] |
bd1aa4f4 SS |
6421 | ) |
6422 | ||
6423 | (define_insn "neon_vabd<mode>_3" | |
d0b6b5a7 KT |
6424 | [(set (match_operand:VF 0 "s_register_operand" "=w") |
6425 | (abs:VF (unspec:VF [(match_operand:VF 1 "s_register_operand" "w") | |
6426 | (match_operand:VF 2 "s_register_operand" "w")] | |
6427 | UNSPEC_VSUB)))] | |
bb78e587 | 6428 | "ARM_HAVE_NEON_<MODE>_ARITH" |
bd1aa4f4 | 6429 | "vabd.<V_if_elem> %<V_reg>0, %<V_reg>1, %<V_reg>2" |
d0b6b5a7 | 6430 | [(set_attr "type" "neon_fp_abd_s<q>")] |
bd1aa4f4 | 6431 | ) |
436016f4 DZ |
6432 | |
6433 | (define_insn "neon_<sup>mmlav16qi" | |
6434 | [(set (match_operand:V4SI 0 "register_operand" "=w") | |
6435 | (plus:V4SI | |
6436 | (unspec:V4SI [(match_operand:V16QI 2 "register_operand" "w") | |
6437 | (match_operand:V16QI 3 "register_operand" "w")] MATMUL) | |
6438 | (match_operand:V4SI 1 "register_operand" "0")))] | |
6439 | "TARGET_I8MM" | |
6440 | "v<sup>mmla.<mmla_sfx>\t%q0, %q2, %q3" | |
6441 | [(set_attr "type" "neon_mla_s_q")] | |
6442 | ) | |
eb7ba6c3 DZ |
6443 | |
6444 | (define_insn "neon_vbfdot<VCVTF:mode>" | |
6445 | [(set (match_operand:VCVTF 0 "register_operand" "=w") | |
6446 | (plus:VCVTF (match_operand:VCVTF 1 "register_operand" "0") | |
6447 | (unspec:VCVTF [ | |
6448 | (match_operand:<VSF2BF> 2 "register_operand" "w") | |
6449 | (match_operand:<VSF2BF> 3 "register_operand" "w")] | |
6450 | UNSPEC_DOT_S)))] | |
6451 | "TARGET_BF16_SIMD" | |
6452 | "vdot.bf16\\t%<V_reg>0, %<V_reg>2, %<V_reg>3" | |
6453 | [(set_attr "type" "neon_dot<q>")] | |
6454 | ) | |
6455 | ||
6456 | (define_insn "neon_vbfdot_lanev4bf<VCVTF:mode>" | |
6457 | [(set (match_operand:VCVTF 0 "register_operand" "=w") | |
6458 | (plus:VCVTF (match_operand:VCVTF 1 "register_operand" "0") | |
6459 | (unspec:VCVTF [ | |
6460 | (match_operand:<VSF2BF> 2 "register_operand" "w") | |
6461 | (match_operand:V4BF 3 "register_operand" "x") | |
6462 | (match_operand:SI 4 "immediate_operand" "i")] | |
6463 | UNSPEC_DOT_S)))] | |
6464 | "TARGET_BF16_SIMD" | |
6465 | "vdot.bf16\\t%<V_reg>0, %<V_reg>2, %P3[%c4]" | |
6466 | [(set_attr "type" "neon_dot<q>")] | |
6467 | ) | |
6468 | ||
6469 | (define_insn "neon_vbfdot_lanev8bf<VCVTF:mode>" | |
6470 | [(set (match_operand:VCVTF 0 "register_operand" "=w") | |
6471 | (plus:VCVTF (match_operand:VCVTF 1 "register_operand" "0") | |
6472 | (unspec:VCVTF [ | |
6473 | (match_operand:<VSF2BF> 2 "register_operand" "w") | |
6474 | (match_operand:V8BF 3 "register_operand" "x") | |
6475 | (match_operand:SI 4 "immediate_operand" "i")] | |
6476 | UNSPEC_DOT_S)))] | |
6477 | "TARGET_BF16_SIMD" | |
6478 | { | |
6479 | int lane = INTVAL (operands[4]); | |
6480 | int half = GET_MODE_NUNITS (GET_MODE (operands[3])) / 4; | |
6481 | if (lane < half) | |
6482 | return "vdot.bf16\\t%<V_reg>0, %<V_reg>2, %e3[%c4]"; | |
6483 | else | |
6484 | { | |
6485 | operands[4] = GEN_INT (lane - half); | |
6486 | return "vdot.bf16\\t%<V_reg>0, %<V_reg>2, %f3[%c4]"; | |
6487 | } | |
6488 | } | |
6489 | [(set_attr "type" "neon_dot<q>")] | |
6490 | ) | |
8e6d0dba DZ |
6491 | |
6492 | (define_insn "neon_vbfcvtv4sf<VBFCVT:mode>" | |
6493 | [(set (match_operand:VBFCVT 0 "register_operand" "=w") | |
6494 | (unspec:VBFCVT [(match_operand:V4SF 1 "register_operand" "w")] | |
6495 | UNSPEC_BFCVT))] | |
6496 | "TARGET_BF16_SIMD" | |
6497 | "vcvt.bf16.f32\\t%<V_bf_low>0, %q1" | |
6498 | [(set_attr "type" "neon_fp_cvt_narrow_s_q")] | |
6499 | ) | |
6500 | ||
6501 | (define_insn "neon_vbfcvtv4sf_highv8bf" | |
6502 | [(set (match_operand:V8BF 0 "register_operand" "=w") | |
6503 | (unspec:V8BF [(match_operand:V8BF 1 "register_operand" "0") | |
6504 | (match_operand:V4SF 2 "register_operand" "w")] | |
6505 | UNSPEC_BFCVT_HIGH))] | |
6506 | "TARGET_BF16_SIMD" | |
6507 | "vcvt.bf16.f32\\t%f0, %q2" | |
6508 | [(set_attr "type" "neon_fp_cvt_narrow_s_q")] | |
6509 | ) | |
6510 | ||
6511 | (define_insn "neon_vbfcvtsf" | |
6512 | [(set (match_operand:BF 0 "register_operand" "=t") | |
6513 | (unspec:BF [(match_operand:SF 1 "register_operand" "t")] | |
6514 | UNSPEC_BFCVT))] | |
6515 | "TARGET_BF16_FP" | |
6516 | "vcvtb.bf16.f32\\t%0, %1" | |
6517 | [(set_attr "type" "f_cvt")] | |
6518 | ) | |
6519 | ||
6520 | (define_insn "neon_vbfcvt<VBFCVT:mode>" | |
6521 | [(set (match_operand:V4SF 0 "register_operand" "=w") | |
6522 | (unspec:V4SF [(match_operand:VBFCVT 1 "register_operand" "w")] | |
6523 | UNSPEC_BFCVT))] | |
6524 | "TARGET_BF16_SIMD" | |
6525 | "vshll.u32\\t%q0, %<V_bf_low>1, #16" | |
6526 | [(set_attr "type" "neon_shift_imm_q")] | |
6527 | ) | |
6528 | ||
6529 | (define_insn "neon_vbfcvt_highv8bf" | |
6530 | [(set (match_operand:V4SF 0 "register_operand" "=w") | |
6531 | (unspec:V4SF [(match_operand:V8BF 1 "register_operand" "w")] | |
6532 | UNSPEC_BFCVT_HIGH))] | |
6533 | "TARGET_BF16_SIMD" | |
6534 | "vshll.u32\\t%q0, %f1, #16" | |
6535 | [(set_attr "type" "neon_shift_imm_q")] | |
6536 | ) | |
6537 | ||
6538 | ;; Convert a BF scalar operand to SF via VSHL. | |
6539 | ;; VSHL doesn't accept 32-bit registers where the BF and SF scalar operands | |
6540 | ;; would be allocated, therefore the operands must be converted to intermediate | |
6541 | ;; vectors (i.e. V2SI) in order to apply 64-bit registers. | |
6542 | (define_expand "neon_vbfcvtbf" | |
6543 | [(match_operand:SF 0 "register_operand") | |
6544 | (unspec:SF [(match_operand:BF 1 "register_operand")] UNSPEC_BFCVT)] | |
6545 | "TARGET_BF16_FP" | |
6546 | { | |
6547 | rtx op0 = gen_reg_rtx (V2SImode); | |
6548 | rtx op1 = gen_reg_rtx (V2SImode); | |
6549 | emit_insn (gen_neon_vbfcvtbf_cvtmodev2si (op1, operands[1])); | |
6550 | emit_insn (gen_neon_vshl_nv2si (op0, op1, gen_int_mode(16, SImode))); | |
6551 | emit_insn (gen_neon_vbfcvtbf_cvtmodesf (operands[0], op0)); | |
6552 | DONE; | |
6553 | }) | |
6554 | ||
6555 | ;; Convert BF mode to V2SI and V2SI to SF. | |
6556 | ;; Implement this by allocating a 32-bit operand in the low half of a 64-bit | |
6557 | ;; register indexed by a 32-bit sub-register number. | |
6558 | ;; This will generate reloads but compiler can optimize out the moves. | |
6559 | ;; Use 'x' constraint to guarantee the 32-bit sub-registers in an indexable | |
6560 | ;; range so that to avoid extra moves. | |
6561 | (define_insn "neon_vbfcvtbf_cvtmode<mode>" | |
6562 | [(set (match_operand:VBFCVTM 0 "register_operand" "=x") | |
6563 | (unspec:VBFCVTM [(match_operand:<V_bf_cvt_m> 1 "register_operand" "0")] | |
6564 | UNSPEC_BFCVT))] | |
6565 | "TARGET_BF16_FP" | |
6566 | "" | |
6567 | ) | |
2d22ab64 KT |
6568 | |
6569 | (define_insn "neon_vmmlav8bf" | |
6570 | [(set (match_operand:V4SF 0 "register_operand" "=w") | |
6571 | (plus:V4SF (match_operand:V4SF 1 "register_operand" "0") | |
6572 | (unspec:V4SF [(match_operand:V8BF 2 "register_operand" "w") | |
6573 | (match_operand:V8BF 3 "register_operand" "w")] | |
6574 | UNSPEC_BFMMLA)))] | |
6575 | "TARGET_BF16_SIMD" | |
6576 | "vmmla.bf16\\t%q0, %q2, %q3" | |
6577 | [(set_attr "type" "neon_fp_mla_s_q")] | |
6578 | ) | |
6579 | ||
6580 | (define_insn "neon_vfma<bt>v8bf" | |
6581 | [(set (match_operand:V4SF 0 "register_operand" "=w") | |
6582 | (plus: V4SF (match_operand:V4SF 1 "register_operand" "0") | |
6583 | (unspec:V4SF [(match_operand:V8BF 2 "register_operand" "w") | |
6584 | (match_operand:V8BF 3 "register_operand" "w")] | |
6585 | BF_MA)))] | |
6586 | "TARGET_BF16_SIMD" | |
6587 | "vfma<bt>.bf16\\t%q0, %q2, %q3" | |
6588 | [(set_attr "type" "neon_fp_mla_s_q")] | |
6589 | ) | |
6590 | ||
6591 | (define_insn "neon_vfma<bt>_lanev8bf" | |
6592 | [(set (match_operand:V4SF 0 "register_operand" "=w") | |
6593 | (plus: V4SF (match_operand:V4SF 1 "register_operand" "0") | |
6594 | (unspec:V4SF [(match_operand:V8BF 2 "register_operand" "w") | |
6595 | (match_operand:V4BF 3 "register_operand" "x") | |
6596 | (match_operand:SI 4 "const_int_operand" "n")] | |
6597 | BF_MA)))] | |
6598 | "TARGET_BF16_SIMD" | |
6599 | "vfma<bt>.bf16\\t%q0, %q2, %P3[%c4]" | |
6600 | [(set_attr "type" "neon_fp_mla_s_scalar_q")] | |
6601 | ) | |
6602 | ||
6603 | (define_expand "neon_vfma<bt>_laneqv8bf" | |
6604 | [(set (match_operand:V4SF 0 "register_operand" "=w") | |
6605 | (plus: V4SF (match_operand:V4SF 1 "register_operand" "0") | |
6606 | (unspec:V4SF [(match_operand:V8BF 2 "register_operand" "w") | |
6607 | (match_operand:V8BF 3 "register_operand" "x") | |
6608 | (match_operand:SI 4 "const_int_operand" "n")] | |
6609 | BF_MA)))] | |
6610 | "TARGET_BF16_SIMD" | |
6611 | { | |
6612 | int lane = INTVAL (operands[4]); | |
6613 | gcc_assert (IN_RANGE(lane, 0, 7)); | |
6614 | if (lane < 4) | |
6615 | { | |
6616 | emit_insn (gen_neon_vfma<bt>_lanev8bf (operands[0], operands[1], operands[2], operands[3], operands[4])); | |
6617 | } | |
6618 | else | |
6619 | { | |
6620 | rtx op_highpart = gen_reg_rtx (V4BFmode); | |
6621 | emit_insn (gen_neon_vget_highv8bf (op_highpart, operands[3])); | |
6622 | operands[4] = GEN_INT (lane - 4); | |
6623 | emit_insn (gen_neon_vfma<bt>_lanev8bf (operands[0], operands[1], operands[2], op_highpart, operands[4])); | |
6624 | } | |
6625 | DONE; | |
6626 | } | |
6627 | [(set_attr "type" "neon_fp_mla_s_scalar_q")] | |
6628 | ) |