]>
Commit | Line | Data |
---|---|---|
d98a3884 | 1 | ;; ARM NEON coprocessor Machine Description |
f1717362 | 2 | ;; Copyright (C) 2006-2016 Free Software Foundation, Inc. |
d98a3884 | 3 | ;; Written by CodeSourcery. |
4 | ;; | |
5 | ;; This file is part of GCC. | |
6 | ;; | |
7 | ;; GCC is free software; you can redistribute it and/or modify it | |
8 | ;; under the terms of the GNU General Public License as published by | |
038d1e19 | 9 | ;; the Free Software Foundation; either version 3, or (at your option) |
d98a3884 | 10 | ;; any later version. |
11 | ;; | |
12 | ;; GCC is distributed in the hope that it will be useful, but | |
13 | ;; WITHOUT ANY WARRANTY; without even the implied warranty of | |
14 | ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
15 | ;; General Public License for more details. | |
16 | ;; | |
17 | ;; You should have received a copy of the GNU General Public License | |
038d1e19 | 18 | ;; along with GCC; see the file COPYING3. If not see |
19 | ;; <http://www.gnu.org/licenses/>. | |
d98a3884 | 20 | |
d98a3884 | 21 | |
bcaec148 | 22 | ;; Attribute used to permit string comparisons against <VQH_mnem> in |
52432540 | 23 | ;; type attribute definitions. |
bcaec148 | 24 | (define_attr "vqh_mnem" "vadd,vmin,vmax" (const_string "vadd")) |
25 | ||
d98a3884 | 26 | (define_insn "*neon_mov<mode>" |
87f800b2 | 27 | [(set (match_operand:VDX 0 "nonimmediate_operand" |
e7a3ea01 | 28 | "=w,Un,w, w, ?r,?w,?r,?r, ?Us") |
87f800b2 | 29 | (match_operand:VDX 1 "general_operand" |
e7a3ea01 | 30 | " w,w, Dn,Uni, w, r, r, Usi,r"))] |
fdd8cbd8 | 31 | "TARGET_NEON |
32 | && (register_operand (operands[0], <MODE>mode) | |
33 | || register_operand (operands[1], <MODE>mode))" | |
d98a3884 | 34 | { |
35 | if (which_alternative == 2) | |
36 | { | |
37 | int width, is_valid; | |
38 | static char templ[40]; | |
39 | ||
40 | is_valid = neon_immediate_valid_for_move (operands[1], <MODE>mode, | |
41 | &operands[1], &width); | |
42 | ||
43 | gcc_assert (is_valid != 0); | |
44 | ||
45 | if (width == 0) | |
46 | return "vmov.f32\t%P0, %1 @ <mode>"; | |
47 | else | |
87f800b2 | 48 | sprintf (templ, "vmov.i%d\t%%P0, %%x1 @ <mode>", width); |
d98a3884 | 49 | |
50 | return templ; | |
51 | } | |
52 | ||
d98a3884 | 53 | switch (which_alternative) |
54 | { | |
55 | case 0: return "vmov\t%P0, %P1 @ <mode>"; | |
e7a3ea01 | 56 | case 1: case 3: return output_move_neon (operands); |
d98a3884 | 57 | case 2: gcc_unreachable (); |
58 | case 4: return "vmov\t%Q0, %R0, %P1 @ <mode>"; | |
59 | case 5: return "vmov\t%P0, %Q1, %R1 @ <mode>"; | |
26ff80c0 | 60 | default: return output_move_double (operands, true, NULL); |
d98a3884 | 61 | } |
62 | } | |
32093010 | 63 | [(set_attr "type" "neon_move<q>,neon_store1_1reg,neon_move<q>,\ |
64 | neon_load1_1reg, neon_to_gp<q>,neon_from_gp<q>,mov_reg,\ | |
65 | neon_load1_2reg, neon_store1_2reg") | |
bcaec148 | 66 | (set_attr "length" "4,4,4,4,4,4,8,8,8") |
42e1de19 | 67 | (set_attr "arm_pool_range" "*,*,*,1020,*,*,*,1020,*") |
68 | (set_attr "thumb2_pool_range" "*,*,*,1018,*,*,*,1018,*") | |
8848d797 | 69 | (set_attr "neg_pool_range" "*,*,*,1004,*,*,*,1004,*")]) |
d98a3884 | 70 | |
71 | (define_insn "*neon_mov<mode>" | |
72 | [(set (match_operand:VQXMOV 0 "nonimmediate_operand" | |
73 | "=w,Un,w, w, ?r,?w,?r,?r, ?Us") | |
74 | (match_operand:VQXMOV 1 "general_operand" | |
75 | " w,w, Dn,Uni, w, r, r, Usi, r"))] | |
fdd8cbd8 | 76 | "TARGET_NEON |
77 | && (register_operand (operands[0], <MODE>mode) | |
78 | || register_operand (operands[1], <MODE>mode))" | |
d98a3884 | 79 | { |
80 | if (which_alternative == 2) | |
81 | { | |
82 | int width, is_valid; | |
83 | static char templ[40]; | |
84 | ||
85 | is_valid = neon_immediate_valid_for_move (operands[1], <MODE>mode, | |
86 | &operands[1], &width); | |
87 | ||
88 | gcc_assert (is_valid != 0); | |
89 | ||
90 | if (width == 0) | |
91 | return "vmov.f32\t%q0, %1 @ <mode>"; | |
92 | else | |
93 | sprintf (templ, "vmov.i%d\t%%q0, %%1 @ <mode>", width); | |
94 | ||
95 | return templ; | |
96 | } | |
97 | ||
98 | switch (which_alternative) | |
99 | { | |
100 | case 0: return "vmov\t%q0, %q1 @ <mode>"; | |
101 | case 1: case 3: return output_move_neon (operands); | |
102 | case 2: gcc_unreachable (); | |
103 | case 4: return "vmov\t%Q0, %R0, %e1 @ <mode>\;vmov\t%J0, %K0, %f1"; | |
104 | case 5: return "vmov\t%e0, %Q1, %R1 @ <mode>\;vmov\t%f0, %J1, %K1"; | |
105 | default: return output_move_quad (operands); | |
106 | } | |
107 | } | |
32093010 | 108 | [(set_attr "type" "neon_move_q,neon_store2_2reg_q,neon_move_q,\ |
109 | neon_load2_2reg_q,neon_to_gp_q,neon_from_gp_q,\ | |
110 | mov_reg,neon_load1_4reg,neon_store1_4reg") | |
d98a3884 | 111 | (set_attr "length" "4,8,4,8,8,8,16,8,16") |
42e1de19 | 112 | (set_attr "arm_pool_range" "*,*,*,1020,*,*,*,1020,*") |
113 | (set_attr "thumb2_pool_range" "*,*,*,1018,*,*,*,1018,*") | |
8848d797 | 114 | (set_attr "neg_pool_range" "*,*,*,996,*,*,*,996,*")]) |
d98a3884 | 115 | |
116 | (define_expand "movti" | |
117 | [(set (match_operand:TI 0 "nonimmediate_operand" "") | |
118 | (match_operand:TI 1 "general_operand" ""))] | |
119 | "TARGET_NEON" | |
120 | { | |
fdd8cbd8 | 121 | if (can_create_pseudo_p ()) |
122 | { | |
0438d37f | 123 | if (!REG_P (operands[0])) |
fdd8cbd8 | 124 | operands[1] = force_reg (TImode, operands[1]); |
125 | } | |
d98a3884 | 126 | }) |
127 | ||
128 | (define_expand "mov<mode>" | |
129 | [(set (match_operand:VSTRUCT 0 "nonimmediate_operand" "") | |
130 | (match_operand:VSTRUCT 1 "general_operand" ""))] | |
131 | "TARGET_NEON" | |
132 | { | |
fdd8cbd8 | 133 | if (can_create_pseudo_p ()) |
134 | { | |
0438d37f | 135 | if (!REG_P (operands[0])) |
fdd8cbd8 | 136 | operands[1] = force_reg (<MODE>mode, operands[1]); |
137 | } | |
d98a3884 | 138 | }) |
139 | ||
140 | (define_insn "*neon_mov<mode>" | |
141 | [(set (match_operand:VSTRUCT 0 "nonimmediate_operand" "=w,Ut,w") | |
142 | (match_operand:VSTRUCT 1 "general_operand" " w,w, Ut"))] | |
fdd8cbd8 | 143 | "TARGET_NEON |
144 | && (register_operand (operands[0], <MODE>mode) | |
145 | || register_operand (operands[1], <MODE>mode))" | |
d98a3884 | 146 | { |
147 | switch (which_alternative) | |
148 | { | |
149 | case 0: return "#"; | |
150 | case 1: case 2: return output_move_neon (operands); | |
151 | default: gcc_unreachable (); | |
152 | } | |
153 | } | |
32093010 | 154 | [(set_attr "type" "neon_move_q,neon_store2_2reg_q,neon_load2_2reg_q") |
ba6c018a | 155 | (set (attr "length") (symbol_ref "arm_attr_length_move_neon (insn)"))]) |
d98a3884 | 156 | |
157 | (define_split | |
158 | [(set (match_operand:EI 0 "s_register_operand" "") | |
159 | (match_operand:EI 1 "s_register_operand" ""))] | |
160 | "TARGET_NEON && reload_completed" | |
161 | [(set (match_dup 0) (match_dup 1)) | |
162 | (set (match_dup 2) (match_dup 3))] | |
163 | { | |
164 | int rdest = REGNO (operands[0]); | |
165 | int rsrc = REGNO (operands[1]); | |
166 | rtx dest[2], src[2]; | |
167 | ||
168 | dest[0] = gen_rtx_REG (TImode, rdest); | |
169 | src[0] = gen_rtx_REG (TImode, rsrc); | |
170 | dest[1] = gen_rtx_REG (DImode, rdest + 4); | |
171 | src[1] = gen_rtx_REG (DImode, rsrc + 4); | |
172 | ||
173 | neon_disambiguate_copy (operands, dest, src, 2); | |
174 | }) | |
175 | ||
176 | (define_split | |
177 | [(set (match_operand:OI 0 "s_register_operand" "") | |
178 | (match_operand:OI 1 "s_register_operand" ""))] | |
179 | "TARGET_NEON && reload_completed" | |
180 | [(set (match_dup 0) (match_dup 1)) | |
181 | (set (match_dup 2) (match_dup 3))] | |
182 | { | |
183 | int rdest = REGNO (operands[0]); | |
184 | int rsrc = REGNO (operands[1]); | |
185 | rtx dest[2], src[2]; | |
186 | ||
187 | dest[0] = gen_rtx_REG (TImode, rdest); | |
188 | src[0] = gen_rtx_REG (TImode, rsrc); | |
189 | dest[1] = gen_rtx_REG (TImode, rdest + 4); | |
190 | src[1] = gen_rtx_REG (TImode, rsrc + 4); | |
191 | ||
192 | neon_disambiguate_copy (operands, dest, src, 2); | |
193 | }) | |
194 | ||
195 | (define_split | |
196 | [(set (match_operand:CI 0 "s_register_operand" "") | |
197 | (match_operand:CI 1 "s_register_operand" ""))] | |
198 | "TARGET_NEON && reload_completed" | |
199 | [(set (match_dup 0) (match_dup 1)) | |
200 | (set (match_dup 2) (match_dup 3)) | |
201 | (set (match_dup 4) (match_dup 5))] | |
202 | { | |
203 | int rdest = REGNO (operands[0]); | |
204 | int rsrc = REGNO (operands[1]); | |
205 | rtx dest[3], src[3]; | |
206 | ||
207 | dest[0] = gen_rtx_REG (TImode, rdest); | |
208 | src[0] = gen_rtx_REG (TImode, rsrc); | |
209 | dest[1] = gen_rtx_REG (TImode, rdest + 4); | |
210 | src[1] = gen_rtx_REG (TImode, rsrc + 4); | |
211 | dest[2] = gen_rtx_REG (TImode, rdest + 8); | |
212 | src[2] = gen_rtx_REG (TImode, rsrc + 8); | |
213 | ||
214 | neon_disambiguate_copy (operands, dest, src, 3); | |
215 | }) | |
216 | ||
217 | (define_split | |
218 | [(set (match_operand:XI 0 "s_register_operand" "") | |
219 | (match_operand:XI 1 "s_register_operand" ""))] | |
220 | "TARGET_NEON && reload_completed" | |
221 | [(set (match_dup 0) (match_dup 1)) | |
222 | (set (match_dup 2) (match_dup 3)) | |
223 | (set (match_dup 4) (match_dup 5)) | |
224 | (set (match_dup 6) (match_dup 7))] | |
225 | { | |
226 | int rdest = REGNO (operands[0]); | |
227 | int rsrc = REGNO (operands[1]); | |
228 | rtx dest[4], src[4]; | |
229 | ||
230 | dest[0] = gen_rtx_REG (TImode, rdest); | |
231 | src[0] = gen_rtx_REG (TImode, rsrc); | |
232 | dest[1] = gen_rtx_REG (TImode, rdest + 4); | |
233 | src[1] = gen_rtx_REG (TImode, rsrc + 4); | |
234 | dest[2] = gen_rtx_REG (TImode, rdest + 8); | |
235 | src[2] = gen_rtx_REG (TImode, rsrc + 8); | |
236 | dest[3] = gen_rtx_REG (TImode, rdest + 12); | |
237 | src[3] = gen_rtx_REG (TImode, rsrc + 12); | |
238 | ||
239 | neon_disambiguate_copy (operands, dest, src, 4); | |
240 | }) | |
241 | ||
dd0cb1e8 | 242 | (define_expand "movmisalign<mode>" |
1c2054e4 | 243 | [(set (match_operand:VDQX 0 "neon_perm_struct_or_reg_operand") |
244 | (unspec:VDQX [(match_operand:VDQX 1 "neon_perm_struct_or_reg_operand")] | |
dd0cb1e8 | 245 | UNSPEC_MISALIGNED_ACCESS))] |
80c69b15 | 246 | "TARGET_NEON && !BYTES_BIG_ENDIAN && unaligned_access" |
dd0cb1e8 | 247 | { |
877744d0 | 248 | rtx adjust_mem; |
dd0cb1e8 | 249 | /* This pattern is not permitted to fail during expansion: if both arguments |
250 | are non-registers (e.g. memory := constant, which can be created by the | |
251 | auto-vectorizer), force operand 1 into a register. */ | |
252 | if (!s_register_operand (operands[0], <MODE>mode) | |
253 | && !s_register_operand (operands[1], <MODE>mode)) | |
254 | operands[1] = force_reg (<MODE>mode, operands[1]); | |
877744d0 | 255 | |
256 | if (s_register_operand (operands[0], <MODE>mode)) | |
257 | adjust_mem = operands[1]; | |
258 | else | |
259 | adjust_mem = operands[0]; | |
260 | ||
261 | /* Legitimize address. */ | |
262 | if (!neon_vector_mem_operand (adjust_mem, 2, true)) | |
263 | XEXP (adjust_mem, 0) = force_reg (Pmode, XEXP (adjust_mem, 0)); | |
264 | ||
dd0cb1e8 | 265 | }) |
266 | ||
267 | (define_insn "*movmisalign<mode>_neon_store" | |
1c2054e4 | 268 | [(set (match_operand:VDX 0 "neon_permissive_struct_operand" "=Um") |
dd0cb1e8 | 269 | (unspec:VDX [(match_operand:VDX 1 "s_register_operand" " w")] |
270 | UNSPEC_MISALIGNED_ACCESS))] | |
80c69b15 | 271 | "TARGET_NEON && !BYTES_BIG_ENDIAN && unaligned_access" |
dd0cb1e8 | 272 | "vst1.<V_sz_elem>\t{%P1}, %A0" |
32093010 | 273 | [(set_attr "type" "neon_store1_1reg<q>")]) |
dd0cb1e8 | 274 | |
275 | (define_insn "*movmisalign<mode>_neon_load" | |
1c2054e4 | 276 | [(set (match_operand:VDX 0 "s_register_operand" "=w") |
277 | (unspec:VDX [(match_operand:VDX 1 "neon_permissive_struct_operand" | |
278 | " Um")] | |
dd0cb1e8 | 279 | UNSPEC_MISALIGNED_ACCESS))] |
80c69b15 | 280 | "TARGET_NEON && !BYTES_BIG_ENDIAN && unaligned_access" |
dd0cb1e8 | 281 | "vld1.<V_sz_elem>\t{%P0}, %A1" |
32093010 | 282 | [(set_attr "type" "neon_load1_1reg<q>")]) |
dd0cb1e8 | 283 | |
284 | (define_insn "*movmisalign<mode>_neon_store" | |
1c2054e4 | 285 | [(set (match_operand:VQX 0 "neon_permissive_struct_operand" "=Um") |
dd0cb1e8 | 286 | (unspec:VQX [(match_operand:VQX 1 "s_register_operand" " w")] |
287 | UNSPEC_MISALIGNED_ACCESS))] | |
80c69b15 | 288 | "TARGET_NEON && !BYTES_BIG_ENDIAN && unaligned_access" |
dd0cb1e8 | 289 | "vst1.<V_sz_elem>\t{%q1}, %A0" |
32093010 | 290 | [(set_attr "type" "neon_store1_1reg<q>")]) |
dd0cb1e8 | 291 | |
292 | (define_insn "*movmisalign<mode>_neon_load" | |
1c2054e4 | 293 | [(set (match_operand:VQX 0 "s_register_operand" "=w") |
294 | (unspec:VQX [(match_operand:VQX 1 "neon_permissive_struct_operand" | |
295 | " Um")] | |
dd0cb1e8 | 296 | UNSPEC_MISALIGNED_ACCESS))] |
80c69b15 | 297 | "TARGET_NEON && !BYTES_BIG_ENDIAN && unaligned_access" |
dd0cb1e8 | 298 | "vld1.<V_sz_elem>\t{%q0}, %A1" |
14453814 | 299 | [(set_attr "type" "neon_load1_1reg<q>")]) |
dd0cb1e8 | 300 | |
eecb5747 | 301 | (define_insn "vec_set<mode>_internal" |
7867e2bd | 302 | [(set (match_operand:VD 0 "s_register_operand" "=w,w") |
d98a3884 | 303 | (vec_merge:VD |
d98a3884 | 304 | (vec_duplicate:VD |
7867e2bd | 305 | (match_operand:<V_elem> 1 "nonimmediate_operand" "Um,r")) |
306 | (match_operand:VD 3 "s_register_operand" "0,0") | |
307 | (match_operand:SI 2 "immediate_operand" "i,i")))] | |
d98a3884 | 308 | "TARGET_NEON" |
eecb5747 | 309 | { |
e3c4ab51 | 310 | int elt = ffs ((int) INTVAL (operands[2])) - 1; |
cdf93281 | 311 | if (BYTES_BIG_ENDIAN) |
312 | elt = GET_MODE_NUNITS (<MODE>mode) - 1 - elt; | |
313 | operands[2] = GEN_INT (elt); | |
7867e2bd | 314 | |
315 | if (which_alternative == 0) | |
316 | return "vld1.<V_sz_elem>\t{%P0[%c2]}, %A1"; | |
317 | else | |
318 | return "vmov.<V_sz_elem>\t%P0[%c2], %1"; | |
eecb5747 | 319 | } |
32093010 | 320 | [(set_attr "type" "neon_load1_all_lanes<q>,neon_from_gp<q>")]) |
d98a3884 | 321 | |
eecb5747 | 322 | (define_insn "vec_set<mode>_internal" |
fb5f110d | 323 | [(set (match_operand:VQ2 0 "s_register_operand" "=w,w") |
324 | (vec_merge:VQ2 | |
325 | (vec_duplicate:VQ2 | |
7867e2bd | 326 | (match_operand:<V_elem> 1 "nonimmediate_operand" "Um,r")) |
fb5f110d | 327 | (match_operand:VQ2 3 "s_register_operand" "0,0") |
7867e2bd | 328 | (match_operand:SI 2 "immediate_operand" "i,i")))] |
d98a3884 | 329 | "TARGET_NEON" |
330 | { | |
d3c9aa05 | 331 | HOST_WIDE_INT elem = ffs ((int) INTVAL (operands[2])) - 1; |
d98a3884 | 332 | int half_elts = GET_MODE_NUNITS (<MODE>mode) / 2; |
eecb5747 | 333 | int elt = elem % half_elts; |
334 | int hi = (elem / half_elts) * 2; | |
d98a3884 | 335 | int regno = REGNO (operands[0]); |
336 | ||
cdf93281 | 337 | if (BYTES_BIG_ENDIAN) |
338 | elt = half_elts - 1 - elt; | |
339 | ||
d98a3884 | 340 | operands[0] = gen_rtx_REG (<V_HALF>mode, regno + hi); |
341 | operands[2] = GEN_INT (elt); | |
342 | ||
7867e2bd | 343 | if (which_alternative == 0) |
344 | return "vld1.<V_sz_elem>\t{%P0[%c2]}, %A1"; | |
345 | else | |
346 | return "vmov.<V_sz_elem>\t%P0[%c2], %1"; | |
d98a3884 | 347 | } |
32093010 | 348 | [(set_attr "type" "neon_load1_all_lanes<q>,neon_from_gp<q>")] |
bcaec148 | 349 | ) |
d98a3884 | 350 | |
eecb5747 | 351 | (define_insn "vec_setv2di_internal" |
7867e2bd | 352 | [(set (match_operand:V2DI 0 "s_register_operand" "=w,w") |
d98a3884 | 353 | (vec_merge:V2DI |
d98a3884 | 354 | (vec_duplicate:V2DI |
7867e2bd | 355 | (match_operand:DI 1 "nonimmediate_operand" "Um,r")) |
356 | (match_operand:V2DI 3 "s_register_operand" "0,0") | |
357 | (match_operand:SI 2 "immediate_operand" "i,i")))] | |
d98a3884 | 358 | "TARGET_NEON" |
359 | { | |
d3c9aa05 | 360 | HOST_WIDE_INT elem = ffs ((int) INTVAL (operands[2])) - 1; |
eecb5747 | 361 | int regno = REGNO (operands[0]) + 2 * elem; |
d98a3884 | 362 | |
363 | operands[0] = gen_rtx_REG (DImode, regno); | |
364 | ||
7867e2bd | 365 | if (which_alternative == 0) |
366 | return "vld1.64\t%P0, %A1"; | |
367 | else | |
368 | return "vmov\t%P0, %Q1, %R1"; | |
d98a3884 | 369 | } |
32093010 | 370 | [(set_attr "type" "neon_load1_all_lanes_q,neon_from_gp_q")] |
bcaec148 | 371 | ) |
d98a3884 | 372 | |
eecb5747 | 373 | (define_expand "vec_set<mode>" |
374 | [(match_operand:VDQ 0 "s_register_operand" "") | |
375 | (match_operand:<V_elem> 1 "s_register_operand" "") | |
376 | (match_operand:SI 2 "immediate_operand" "")] | |
377 | "TARGET_NEON" | |
378 | { | |
379 | HOST_WIDE_INT elem = (HOST_WIDE_INT) 1 << INTVAL (operands[2]); | |
380 | emit_insn (gen_vec_set<mode>_internal (operands[0], operands[1], | |
381 | GEN_INT (elem), operands[0])); | |
382 | DONE; | |
383 | }) | |
384 | ||
d98a3884 | 385 | (define_insn "vec_extract<mode>" |
7867e2bd | 386 | [(set (match_operand:<V_elem> 0 "nonimmediate_operand" "=Um,r") |
d98a3884 | 387 | (vec_select:<V_elem> |
7867e2bd | 388 | (match_operand:VD 1 "s_register_operand" "w,w") |
389 | (parallel [(match_operand:SI 2 "immediate_operand" "i,i")])))] | |
d98a3884 | 390 | "TARGET_NEON" |
cdf93281 | 391 | { |
392 | if (BYTES_BIG_ENDIAN) | |
393 | { | |
394 | int elt = INTVAL (operands[2]); | |
395 | elt = GET_MODE_NUNITS (<MODE>mode) - 1 - elt; | |
396 | operands[2] = GEN_INT (elt); | |
397 | } | |
7867e2bd | 398 | |
399 | if (which_alternative == 0) | |
400 | return "vst1.<V_sz_elem>\t{%P1[%c2]}, %A0"; | |
401 | else | |
402 | return "vmov.<V_uf_sclr>\t%0, %P1[%c2]"; | |
cdf93281 | 403 | } |
32093010 | 404 | [(set_attr "type" "neon_store1_one_lane<q>,neon_to_gp<q>")] |
bcaec148 | 405 | ) |
d98a3884 | 406 | |
407 | (define_insn "vec_extract<mode>" | |
7867e2bd | 408 | [(set (match_operand:<V_elem> 0 "nonimmediate_operand" "=Um,r") |
d98a3884 | 409 | (vec_select:<V_elem> |
fb5f110d | 410 | (match_operand:VQ2 1 "s_register_operand" "w,w") |
7867e2bd | 411 | (parallel [(match_operand:SI 2 "immediate_operand" "i,i")])))] |
d98a3884 | 412 | "TARGET_NEON" |
413 | { | |
414 | int half_elts = GET_MODE_NUNITS (<MODE>mode) / 2; | |
415 | int elt = INTVAL (operands[2]) % half_elts; | |
416 | int hi = (INTVAL (operands[2]) / half_elts) * 2; | |
417 | int regno = REGNO (operands[1]); | |
418 | ||
cdf93281 | 419 | if (BYTES_BIG_ENDIAN) |
420 | elt = half_elts - 1 - elt; | |
421 | ||
d98a3884 | 422 | operands[1] = gen_rtx_REG (<V_HALF>mode, regno + hi); |
423 | operands[2] = GEN_INT (elt); | |
424 | ||
7867e2bd | 425 | if (which_alternative == 0) |
426 | return "vst1.<V_sz_elem>\t{%P1[%c2]}, %A0"; | |
427 | else | |
428 | return "vmov.<V_uf_sclr>\t%0, %P1[%c2]"; | |
d98a3884 | 429 | } |
32093010 | 430 | [(set_attr "type" "neon_store1_one_lane<q>,neon_to_gp<q>")] |
bcaec148 | 431 | ) |
d98a3884 | 432 | |
433 | (define_insn "vec_extractv2di" | |
7867e2bd | 434 | [(set (match_operand:DI 0 "nonimmediate_operand" "=Um,r") |
d98a3884 | 435 | (vec_select:DI |
7867e2bd | 436 | (match_operand:V2DI 1 "s_register_operand" "w,w") |
437 | (parallel [(match_operand:SI 2 "immediate_operand" "i,i")])))] | |
d98a3884 | 438 | "TARGET_NEON" |
439 | { | |
d0f1d3d4 | 440 | int regno = REGNO (operands[1]) + 2 * INTVAL (operands[2]); |
d98a3884 | 441 | |
442 | operands[1] = gen_rtx_REG (DImode, regno); | |
443 | ||
7867e2bd | 444 | if (which_alternative == 0) |
445 | return "vst1.64\t{%P1}, %A0 @ v2di"; | |
446 | else | |
447 | return "vmov\t%Q0, %R0, %P1 @ v2di"; | |
d98a3884 | 448 | } |
32093010 | 449 | [(set_attr "type" "neon_store1_one_lane_q,neon_to_gp_q")] |
bcaec148 | 450 | ) |
d98a3884 | 451 | |
452 | (define_expand "vec_init<mode>" | |
453 | [(match_operand:VDQ 0 "s_register_operand" "") | |
454 | (match_operand 1 "" "")] | |
455 | "TARGET_NEON" | |
456 | { | |
457 | neon_expand_vector_init (operands[0], operands[1]); | |
458 | DONE; | |
459 | }) | |
460 | ||
461 | ;; Doubleword and quadword arithmetic. | |
462 | ||
94829feb | 463 | ;; NOTE: some other instructions also support 64-bit integer |
464 | ;; element size, which we could potentially use for "long long" operations. | |
d98a3884 | 465 | |
466 | (define_insn "*add<mode>3_neon" | |
467 | [(set (match_operand:VDQ 0 "s_register_operand" "=w") | |
468 | (plus:VDQ (match_operand:VDQ 1 "s_register_operand" "w") | |
469 | (match_operand:VDQ 2 "s_register_operand" "w")))] | |
fe4048be | 470 | "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)" |
bcaec148 | 471 | "vadd.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2" |
52432540 | 472 | [(set (attr "type") |
0bf497f5 | 473 | (if_then_else (match_test "<Is_float_mode>") |
32093010 | 474 | (const_string "neon_fp_addsub_s<q>") |
475 | (const_string "neon_add<q>")))] | |
bcaec148 | 476 | ) |
d98a3884 | 477 | |
94829feb | 478 | (define_insn "adddi3_neon" |
10e5ccd5 | 479 | [(set (match_operand:DI 0 "s_register_operand" "=w,?&r,?&r,?w,?&r,?&r,?&r") |
480 | (plus:DI (match_operand:DI 1 "s_register_operand" "%w,0,0,w,r,0,r") | |
481 | (match_operand:DI 2 "arm_adddi_operand" "w,r,0,w,r,Dd,Dd"))) | |
94829feb | 482 | (clobber (reg:CC CC_REGNUM))] |
483 | "TARGET_NEON" | |
484 | { | |
485 | switch (which_alternative) | |
486 | { | |
a651f34d | 487 | case 0: /* fall through */ |
488 | case 3: return "vadd.i64\t%P0, %P1, %P2"; | |
94829feb | 489 | case 1: return "#"; |
490 | case 2: return "#"; | |
10e5ccd5 | 491 | case 4: return "#"; |
492 | case 5: return "#"; | |
493 | case 6: return "#"; | |
94829feb | 494 | default: gcc_unreachable (); |
495 | } | |
496 | } | |
32093010 | 497 | [(set_attr "type" "neon_add,multiple,multiple,neon_add,\ |
498 | multiple,multiple,multiple") | |
10e5ccd5 | 499 | (set_attr "conds" "*,clob,clob,*,clob,clob,clob") |
500 | (set_attr "length" "*,8,8,*,8,8,8") | |
b6779ddc | 501 | (set_attr "arch" "neon_for_64bits,*,*,avoid_neon_for_64bits,*,*,*")] |
94829feb | 502 | ) |
503 | ||
d98a3884 | 504 | (define_insn "*sub<mode>3_neon" |
505 | [(set (match_operand:VDQ 0 "s_register_operand" "=w") | |
506 | (minus:VDQ (match_operand:VDQ 1 "s_register_operand" "w") | |
507 | (match_operand:VDQ 2 "s_register_operand" "w")))] | |
fe4048be | 508 | "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)" |
bcaec148 | 509 | "vsub.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2" |
52432540 | 510 | [(set (attr "type") |
0bf497f5 | 511 | (if_then_else (match_test "<Is_float_mode>") |
32093010 | 512 | (const_string "neon_fp_addsub_s<q>") |
513 | (const_string "neon_sub<q>")))] | |
bcaec148 | 514 | ) |
d98a3884 | 515 | |
94829feb | 516 | (define_insn "subdi3_neon" |
a651f34d | 517 | [(set (match_operand:DI 0 "s_register_operand" "=w,?&r,?&r,?&r,?w") |
518 | (minus:DI (match_operand:DI 1 "s_register_operand" "w,0,r,0,w") | |
519 | (match_operand:DI 2 "s_register_operand" "w,r,0,0,w"))) | |
94829feb | 520 | (clobber (reg:CC CC_REGNUM))] |
521 | "TARGET_NEON" | |
522 | { | |
523 | switch (which_alternative) | |
524 | { | |
a651f34d | 525 | case 0: /* fall through */ |
526 | case 4: return "vsub.i64\t%P0, %P1, %P2"; | |
94829feb | 527 | case 1: /* fall through */ |
528 | case 2: /* fall through */ | |
529 | case 3: return "subs\\t%Q0, %Q1, %Q2\;sbc\\t%R0, %R1, %R2"; | |
530 | default: gcc_unreachable (); | |
531 | } | |
532 | } | |
32093010 | 533 | [(set_attr "type" "neon_sub,multiple,multiple,multiple,neon_sub") |
a651f34d | 534 | (set_attr "conds" "*,clob,clob,clob,*") |
535 | (set_attr "length" "*,8,8,8,*") | |
b6779ddc | 536 | (set_attr "arch" "neon_for_64bits,*,*,*,avoid_neon_for_64bits")] |
94829feb | 537 | ) |
538 | ||
d98a3884 | 539 | (define_insn "*mul<mode>3_neon" |
32093010 | 540 | [(set (match_operand:VDQW 0 "s_register_operand" "=w") |
541 | (mult:VDQW (match_operand:VDQW 1 "s_register_operand" "w") | |
542 | (match_operand:VDQW 2 "s_register_operand" "w")))] | |
fe4048be | 543 | "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)" |
bcaec148 | 544 | "vmul.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2" |
52432540 | 545 | [(set (attr "type") |
0bf497f5 | 546 | (if_then_else (match_test "<Is_float_mode>") |
32093010 | 547 | (const_string "neon_fp_mul_s<q>") |
548 | (const_string "neon_mul_<V_elem_ch><q>")))] | |
bcaec148 | 549 | ) |
d98a3884 | 550 | |
94829feb | 551 | (define_insn "mul<mode>3add<mode>_neon" |
32093010 | 552 | [(set (match_operand:VDQW 0 "s_register_operand" "=w") |
553 | (plus:VDQW (mult:VDQW (match_operand:VDQW 2 "s_register_operand" "w") | |
554 | (match_operand:VDQW 3 "s_register_operand" "w")) | |
555 | (match_operand:VDQW 1 "s_register_operand" "0")))] | |
fe4048be | 556 | "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)" |
3d8bad71 | 557 | "vmla.<V_if_elem>\t%<V_reg>0, %<V_reg>2, %<V_reg>3" |
52432540 | 558 | [(set (attr "type") |
0bf497f5 | 559 | (if_then_else (match_test "<Is_float_mode>") |
32093010 | 560 | (const_string "neon_fp_mla_s<q>") |
561 | (const_string "neon_mla_<V_elem_ch><q>")))] | |
3d8bad71 | 562 | ) |
563 | ||
94829feb | 564 | (define_insn "mul<mode>3neg<mode>add<mode>_neon" |
32093010 | 565 | [(set (match_operand:VDQW 0 "s_register_operand" "=w") |
566 | (minus:VDQW (match_operand:VDQW 1 "s_register_operand" "0") | |
567 | (mult:VDQW (match_operand:VDQW 2 "s_register_operand" "w") | |
568 | (match_operand:VDQW 3 "s_register_operand" "w"))))] | |
fe4048be | 569 | "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)" |
3d8bad71 | 570 | "vmls.<V_if_elem>\t%<V_reg>0, %<V_reg>2, %<V_reg>3" |
52432540 | 571 | [(set (attr "type") |
0bf497f5 | 572 | (if_then_else (match_test "<Is_float_mode>") |
32093010 | 573 | (const_string "neon_fp_mla_s<q>") |
574 | (const_string "neon_mla_<V_elem_ch><q>")))] | |
3d8bad71 | 575 | ) |
576 | ||
c6bf9c68 | 577 | ;; Fused multiply-accumulate |
9abf7cc0 | 578 | ;; We define each insn twice here: |
579 | ;; 1: with flag_unsafe_math_optimizations for the widening multiply phase | |
580 | ;; to be able to use when converting to FMA. | |
581 | ;; 2: without flag_unsafe_math_optimizations for the intrinsics to use. | |
c6bf9c68 | 582 | (define_insn "fma<VCVTF:mode>4" |
583 | [(set (match_operand:VCVTF 0 "register_operand" "=w") | |
584 | (fma:VCVTF (match_operand:VCVTF 1 "register_operand" "w") | |
585 | (match_operand:VCVTF 2 "register_operand" "w") | |
586 | (match_operand:VCVTF 3 "register_operand" "0")))] | |
587 | "TARGET_NEON && TARGET_FMA && flag_unsafe_math_optimizations" | |
588 | "vfma%?.<V_if_elem>\\t%<V_reg>0, %<V_reg>1, %<V_reg>2" | |
32093010 | 589 | [(set_attr "type" "neon_fp_mla_s<q>")] |
c6bf9c68 | 590 | ) |
591 | ||
9abf7cc0 | 592 | (define_insn "fma<VCVTF:mode>4_intrinsic" |
593 | [(set (match_operand:VCVTF 0 "register_operand" "=w") | |
594 | (fma:VCVTF (match_operand:VCVTF 1 "register_operand" "w") | |
595 | (match_operand:VCVTF 2 "register_operand" "w") | |
596 | (match_operand:VCVTF 3 "register_operand" "0")))] | |
597 | "TARGET_NEON && TARGET_FMA" | |
598 | "vfma%?.<V_if_elem>\\t%<V_reg>0, %<V_reg>1, %<V_reg>2" | |
32093010 | 599 | [(set_attr "type" "neon_fp_mla_s<q>")] |
9abf7cc0 | 600 | ) |
601 | ||
c6bf9c68 | 602 | (define_insn "*fmsub<VCVTF:mode>4" |
603 | [(set (match_operand:VCVTF 0 "register_operand" "=w") | |
604 | (fma:VCVTF (neg:VCVTF (match_operand:VCVTF 1 "register_operand" "w")) | |
605 | (match_operand:VCVTF 2 "register_operand" "w") | |
606 | (match_operand:VCVTF 3 "register_operand" "0")))] | |
607 | "TARGET_NEON && TARGET_FMA && flag_unsafe_math_optimizations" | |
608 | "vfms%?.<V_if_elem>\\t%<V_reg>0, %<V_reg>1, %<V_reg>2" | |
32093010 | 609 | [(set_attr "type" "neon_fp_mla_s<q>")] |
c6bf9c68 | 610 | ) |
611 | ||
9abf7cc0 | 612 | (define_insn "fmsub<VCVTF:mode>4_intrinsic" |
613 | [(set (match_operand:VCVTF 0 "register_operand" "=w") | |
614 | (fma:VCVTF (neg:VCVTF (match_operand:VCVTF 1 "register_operand" "w")) | |
615 | (match_operand:VCVTF 2 "register_operand" "w") | |
616 | (match_operand:VCVTF 3 "register_operand" "0")))] | |
617 | "TARGET_NEON && TARGET_FMA" | |
618 | "vfms%?.<V_if_elem>\\t%<V_reg>0, %<V_reg>1, %<V_reg>2" | |
32093010 | 619 | [(set_attr "type" "neon_fp_mla_s<q>")] |
9abf7cc0 | 620 | ) |
621 | ||
4182b724 | 622 | (define_insn "neon_vrint<NEON_VRINT:nvrint_variant><VCVTF:mode>" |
623 | [(set (match_operand:VCVTF 0 "s_register_operand" "=w") | |
624 | (unspec:VCVTF [(match_operand:VCVTF 1 | |
625 | "s_register_operand" "w")] | |
626 | NEON_VRINT))] | |
627 | "TARGET_NEON && TARGET_FPU_ARMV8" | |
628 | "vrint<nvrint_variant>%?.f32\\t%<V_reg>0, %<V_reg>1" | |
32093010 | 629 | [(set_attr "type" "neon_fp_round_<V_elem_ch><q>")] |
4182b724 | 630 | ) |
631 | ||
48bd6927 | 632 | (define_insn "neon_vcvt<NEON_VCVT:nvrint_variant><su_optab><VCVTF:mode><v_cmp_result>" |
633 | [(set (match_operand:<V_cmp_result> 0 "register_operand" "=w") | |
634 | (FIXUORS:<V_cmp_result> (unspec:VCVTF | |
635 | [(match_operand:VCVTF 1 "register_operand" "w")] | |
636 | NEON_VCVT)))] | |
637 | "TARGET_NEON && TARGET_FPU_ARMV8" | |
638 | "vcvt<nvrint_variant>.<su>32.f32\\t%<V_reg>0, %<V_reg>1" | |
639 | [(set_attr "type" "neon_fp_to_int_<V_elem_ch><q>") | |
640 | (set_attr "predicable" "no")] | |
641 | ) | |
642 | ||
d98a3884 | 643 | (define_insn "ior<mode>3" |
644 | [(set (match_operand:VDQ 0 "s_register_operand" "=w,w") | |
645 | (ior:VDQ (match_operand:VDQ 1 "s_register_operand" "w,0") | |
646 | (match_operand:VDQ 2 "neon_logic_op2" "w,Dl")))] | |
647 | "TARGET_NEON" | |
648 | { | |
649 | switch (which_alternative) | |
650 | { | |
651 | case 0: return "vorr\t%<V_reg>0, %<V_reg>1, %<V_reg>2"; | |
652 | case 1: return neon_output_logic_immediate ("vorr", &operands[2], | |
653 | <MODE>mode, 0, VALID_NEON_QREG_MODE (<MODE>mode)); | |
654 | default: gcc_unreachable (); | |
655 | } | |
bcaec148 | 656 | } |
32093010 | 657 | [(set_attr "type" "neon_logic<q>")] |
bcaec148 | 658 | ) |
d98a3884 | 659 | |
d98a3884 | 660 | ;; The concrete forms of the Neon immediate-logic instructions are vbic and |
661 | ;; vorr. We support the pseudo-instruction vand instead, because that | |
662 | ;; corresponds to the canonical form the middle-end expects to use for | |
663 | ;; immediate bitwise-ANDs. | |
664 | ||
665 | (define_insn "and<mode>3" | |
666 | [(set (match_operand:VDQ 0 "s_register_operand" "=w,w") | |
667 | (and:VDQ (match_operand:VDQ 1 "s_register_operand" "w,0") | |
668 | (match_operand:VDQ 2 "neon_inv_logic_op2" "w,DL")))] | |
669 | "TARGET_NEON" | |
670 | { | |
671 | switch (which_alternative) | |
672 | { | |
673 | case 0: return "vand\t%<V_reg>0, %<V_reg>1, %<V_reg>2"; | |
674 | case 1: return neon_output_logic_immediate ("vand", &operands[2], | |
675 | <MODE>mode, 1, VALID_NEON_QREG_MODE (<MODE>mode)); | |
676 | default: gcc_unreachable (); | |
677 | } | |
bcaec148 | 678 | } |
32093010 | 679 | [(set_attr "type" "neon_logic<q>")] |
bcaec148 | 680 | ) |
d98a3884 | 681 | |
d98a3884 | 682 | (define_insn "orn<mode>3_neon" |
683 | [(set (match_operand:VDQ 0 "s_register_operand" "=w") | |
04931b44 | 684 | (ior:VDQ (not:VDQ (match_operand:VDQ 2 "s_register_operand" "w")) |
685 | (match_operand:VDQ 1 "s_register_operand" "w")))] | |
d98a3884 | 686 | "TARGET_NEON" |
bcaec148 | 687 | "vorn\t%<V_reg>0, %<V_reg>1, %<V_reg>2" |
32093010 | 688 | [(set_attr "type" "neon_logic<q>")] |
bcaec148 | 689 | ) |
d98a3884 | 690 | |
2eb9782e | 691 | ;; TODO: investigate whether we should disable |
692 | ;; this and bicdi3_neon for the A8 in line with the other | |
693 | ;; changes above. | |
694 | (define_insn_and_split "orndi3_neon" | |
695 | [(set (match_operand:DI 0 "s_register_operand" "=w,?&r,?&r,?&r") | |
696 | (ior:DI (not:DI (match_operand:DI 2 "s_register_operand" "w,0,0,r")) | |
697 | (match_operand:DI 1 "s_register_operand" "w,r,r,0")))] | |
d98a3884 | 698 | "TARGET_NEON" |
e2669ea7 | 699 | "@ |
700 | vorn\t%P0, %P1, %P2 | |
701 | # | |
2eb9782e | 702 | # |
e2669ea7 | 703 | #" |
2eb9782e | 704 | "reload_completed && |
705 | (TARGET_NEON && !(IS_VFP_REGNUM (REGNO (operands[0]))))" | |
706 | [(set (match_dup 0) (ior:SI (not:SI (match_dup 2)) (match_dup 1))) | |
707 | (set (match_dup 3) (ior:SI (not:SI (match_dup 4)) (match_dup 5)))] | |
708 | " | |
709 | { | |
710 | if (TARGET_THUMB2) | |
711 | { | |
712 | operands[3] = gen_highpart (SImode, operands[0]); | |
713 | operands[0] = gen_lowpart (SImode, operands[0]); | |
714 | operands[4] = gen_highpart (SImode, operands[2]); | |
715 | operands[2] = gen_lowpart (SImode, operands[2]); | |
716 | operands[5] = gen_highpart (SImode, operands[1]); | |
717 | operands[1] = gen_lowpart (SImode, operands[1]); | |
718 | } | |
719 | else | |
720 | { | |
721 | emit_insn (gen_one_cmpldi2 (operands[0], operands[2])); | |
722 | emit_insn (gen_iordi3 (operands[0], operands[1], operands[0])); | |
723 | DONE; | |
724 | } | |
725 | }" | |
32093010 | 726 | [(set_attr "type" "neon_logic,multiple,multiple,multiple") |
2eb9782e | 727 | (set_attr "length" "*,16,8,8") |
728 | (set_attr "arch" "any,a,t2,t2")] | |
bcaec148 | 729 | ) |
d98a3884 | 730 | |
731 | (define_insn "bic<mode>3_neon" | |
732 | [(set (match_operand:VDQ 0 "s_register_operand" "=w") | |
04931b44 | 733 | (and:VDQ (not:VDQ (match_operand:VDQ 2 "s_register_operand" "w")) |
734 | (match_operand:VDQ 1 "s_register_operand" "w")))] | |
d98a3884 | 735 | "TARGET_NEON" |
bcaec148 | 736 | "vbic\t%<V_reg>0, %<V_reg>1, %<V_reg>2" |
32093010 | 737 | [(set_attr "type" "neon_logic<q>")] |
bcaec148 | 738 | ) |
d98a3884 | 739 | |
e2669ea7 | 740 | ;; Compare to *anddi_notdi_di. |
d98a3884 | 741 | (define_insn "bicdi3_neon" |
205c3b0a | 742 | [(set (match_operand:DI 0 "s_register_operand" "=w,?&r,?&r") |
e2669ea7 | 743 | (and:DI (not:DI (match_operand:DI 2 "s_register_operand" "w,r,0")) |
744 | (match_operand:DI 1 "s_register_operand" "w,0,r")))] | |
d98a3884 | 745 | "TARGET_NEON" |
e2669ea7 | 746 | "@ |
747 | vbic\t%P0, %P1, %P2 | |
748 | # | |
749 | #" | |
32093010 | 750 | [(set_attr "type" "neon_logic,multiple,multiple") |
e2669ea7 | 751 | (set_attr "length" "*,8,8")] |
bcaec148 | 752 | ) |
d98a3884 | 753 | |
754 | (define_insn "xor<mode>3" | |
755 | [(set (match_operand:VDQ 0 "s_register_operand" "=w") | |
756 | (xor:VDQ (match_operand:VDQ 1 "s_register_operand" "w") | |
757 | (match_operand:VDQ 2 "s_register_operand" "w")))] | |
758 | "TARGET_NEON" | |
bcaec148 | 759 | "veor\t%<V_reg>0, %<V_reg>1, %<V_reg>2" |
32093010 | 760 | [(set_attr "type" "neon_logic<q>")] |
bcaec148 | 761 | ) |
d98a3884 | 762 | |
d98a3884 | 763 | (define_insn "one_cmpl<mode>2" |
764 | [(set (match_operand:VDQ 0 "s_register_operand" "=w") | |
765 | (not:VDQ (match_operand:VDQ 1 "s_register_operand" "w")))] | |
766 | "TARGET_NEON" | |
bcaec148 | 767 | "vmvn\t%<V_reg>0, %<V_reg>1" |
32093010 | 768 | [(set_attr "type" "neon_move<q>")] |
bcaec148 | 769 | ) |
d98a3884 | 770 | |
771 | (define_insn "abs<mode>2" | |
772 | [(set (match_operand:VDQW 0 "s_register_operand" "=w") | |
773 | (abs:VDQW (match_operand:VDQW 1 "s_register_operand" "w")))] | |
774 | "TARGET_NEON" | |
bcaec148 | 775 | "vabs.<V_s_elem>\t%<V_reg>0, %<V_reg>1" |
52432540 | 776 | [(set (attr "type") |
0bf497f5 | 777 | (if_then_else (match_test "<Is_float_mode>") |
32093010 | 778 | (const_string "neon_fp_abs_s<q>") |
779 | (const_string "neon_abs<q>")))] | |
bcaec148 | 780 | ) |
d98a3884 | 781 | |
782 | (define_insn "neg<mode>2" | |
783 | [(set (match_operand:VDQW 0 "s_register_operand" "=w") | |
784 | (neg:VDQW (match_operand:VDQW 1 "s_register_operand" "w")))] | |
785 | "TARGET_NEON" | |
bcaec148 | 786 | "vneg.<V_s_elem>\t%<V_reg>0, %<V_reg>1" |
52432540 | 787 | [(set (attr "type") |
0bf497f5 | 788 | (if_then_else (match_test "<Is_float_mode>") |
32093010 | 789 | (const_string "neon_fp_neg_s<q>") |
790 | (const_string "neon_neg<q>")))] | |
bcaec148 | 791 | ) |
d98a3884 | 792 | |
774d2fbb | 793 | (define_insn "negdi2_neon" |
794 | [(set (match_operand:DI 0 "s_register_operand" "=&w, w,r,&r") | |
795 | (neg:DI (match_operand:DI 1 "s_register_operand" " w, w,0, r"))) | |
796 | (clobber (match_scratch:DI 2 "= X,&w,X, X")) | |
797 | (clobber (reg:CC CC_REGNUM))] | |
798 | "TARGET_NEON" | |
799 | "#" | |
32093010 | 800 | [(set_attr "length" "8") |
801 | (set_attr "type" "multiple")] | |
774d2fbb | 802 | ) |
803 | ||
804 | ; Split negdi2_neon for vfp registers | |
805 | (define_split | |
806 | [(set (match_operand:DI 0 "s_register_operand" "") | |
807 | (neg:DI (match_operand:DI 1 "s_register_operand" ""))) | |
808 | (clobber (match_scratch:DI 2 "")) | |
809 | (clobber (reg:CC CC_REGNUM))] | |
810 | "TARGET_NEON && reload_completed && IS_VFP_REGNUM (REGNO (operands[0]))" | |
811 | [(set (match_dup 2) (const_int 0)) | |
812 | (parallel [(set (match_dup 0) (minus:DI (match_dup 2) (match_dup 1))) | |
813 | (clobber (reg:CC CC_REGNUM))])] | |
814 | { | |
815 | if (!REG_P (operands[2])) | |
816 | operands[2] = operands[0]; | |
817 | } | |
818 | ) | |
819 | ||
820 | ; Split negdi2_neon for core registers | |
821 | (define_split | |
822 | [(set (match_operand:DI 0 "s_register_operand" "") | |
823 | (neg:DI (match_operand:DI 1 "s_register_operand" ""))) | |
824 | (clobber (match_scratch:DI 2 "")) | |
825 | (clobber (reg:CC CC_REGNUM))] | |
826 | "TARGET_32BIT && reload_completed | |
827 | && arm_general_register_operand (operands[0], DImode)" | |
828 | [(parallel [(set (match_dup 0) (neg:DI (match_dup 1))) | |
829 | (clobber (reg:CC CC_REGNUM))])] | |
830 | "" | |
831 | ) | |
832 | ||
d98a3884 | 833 | (define_insn "*umin<mode>3_neon" |
834 | [(set (match_operand:VDQIW 0 "s_register_operand" "=w") | |
835 | (umin:VDQIW (match_operand:VDQIW 1 "s_register_operand" "w") | |
836 | (match_operand:VDQIW 2 "s_register_operand" "w")))] | |
837 | "TARGET_NEON" | |
bcaec148 | 838 | "vmin.<V_u_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2" |
32093010 | 839 | [(set_attr "type" "neon_minmax<q>")] |
bcaec148 | 840 | ) |
d98a3884 | 841 | |
842 | (define_insn "*umax<mode>3_neon" | |
843 | [(set (match_operand:VDQIW 0 "s_register_operand" "=w") | |
844 | (umax:VDQIW (match_operand:VDQIW 1 "s_register_operand" "w") | |
845 | (match_operand:VDQIW 2 "s_register_operand" "w")))] | |
846 | "TARGET_NEON" | |
bcaec148 | 847 | "vmax.<V_u_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2" |
32093010 | 848 | [(set_attr "type" "neon_minmax<q>")] |
bcaec148 | 849 | ) |
d98a3884 | 850 | |
851 | (define_insn "*smin<mode>3_neon" | |
852 | [(set (match_operand:VDQW 0 "s_register_operand" "=w") | |
853 | (smin:VDQW (match_operand:VDQW 1 "s_register_operand" "w") | |
854 | (match_operand:VDQW 2 "s_register_operand" "w")))] | |
855 | "TARGET_NEON" | |
bcaec148 | 856 | "vmin.<V_s_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2" |
52432540 | 857 | [(set (attr "type") |
0bf497f5 | 858 | (if_then_else (match_test "<Is_float_mode>") |
32093010 | 859 | (const_string "neon_fp_minmax_s<q>") |
860 | (const_string "neon_minmax<q>")))] | |
bcaec148 | 861 | ) |
d98a3884 | 862 | |
863 | (define_insn "*smax<mode>3_neon" | |
864 | [(set (match_operand:VDQW 0 "s_register_operand" "=w") | |
865 | (smax:VDQW (match_operand:VDQW 1 "s_register_operand" "w") | |
866 | (match_operand:VDQW 2 "s_register_operand" "w")))] | |
867 | "TARGET_NEON" | |
bcaec148 | 868 | "vmax.<V_s_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2" |
52432540 | 869 | [(set (attr "type") |
0bf497f5 | 870 | (if_then_else (match_test "<Is_float_mode>") |
32093010 | 871 | (const_string "neon_fp_minmax_s<q>") |
872 | (const_string "neon_minmax<q>")))] | |
bcaec148 | 873 | ) |
d98a3884 | 874 | |
875 | ; TODO: V2DI shifts are current disabled because there are bugs in the | |
876 | ; generic vectorizer code. It ends up creating a V2DI constructor with | |
877 | ; SImode elements. | |
878 | ||
6fbc081b | 879 | (define_insn "vashl<mode>3" |
73f20323 | 880 | [(set (match_operand:VDQIW 0 "s_register_operand" "=w,w") |
881 | (ashift:VDQIW (match_operand:VDQIW 1 "s_register_operand" "w,w") | |
882 | (match_operand:VDQIW 2 "imm_lshift_or_reg_neon" "w,Dn")))] | |
883 | "TARGET_NEON" | |
884 | { | |
885 | switch (which_alternative) | |
886 | { | |
887 | case 0: return "vshl.<V_s_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"; | |
888 | case 1: return neon_output_shift_immediate ("vshl", 'i', &operands[2], | |
889 | <MODE>mode, | |
890 | VALID_NEON_QREG_MODE (<MODE>mode), | |
891 | true); | |
892 | default: gcc_unreachable (); | |
893 | } | |
894 | } | |
32093010 | 895 | [(set_attr "type" "neon_shift_reg<q>, neon_shift_imm<q>")] |
73f20323 | 896 | ) |
897 | ||
898 | (define_insn "vashr<mode>3_imm" | |
d98a3884 | 899 | [(set (match_operand:VDQIW 0 "s_register_operand" "=w") |
73f20323 | 900 | (ashiftrt:VDQIW (match_operand:VDQIW 1 "s_register_operand" "w") |
901 | (match_operand:VDQIW 2 "imm_for_neon_rshift_operand" "Dn")))] | |
d98a3884 | 902 | "TARGET_NEON" |
73f20323 | 903 | { |
904 | return neon_output_shift_immediate ("vshr", 's', &operands[2], | |
905 | <MODE>mode, VALID_NEON_QREG_MODE (<MODE>mode), | |
906 | false); | |
907 | } | |
32093010 | 908 | [(set_attr "type" "neon_shift_imm<q>")] |
bcaec148 | 909 | ) |
d98a3884 | 910 | |
73f20323 | 911 | (define_insn "vlshr<mode>3_imm" |
912 | [(set (match_operand:VDQIW 0 "s_register_operand" "=w") | |
913 | (lshiftrt:VDQIW (match_operand:VDQIW 1 "s_register_operand" "w") | |
914 | (match_operand:VDQIW 2 "imm_for_neon_rshift_operand" "Dn")))] | |
915 | "TARGET_NEON" | |
916 | { | |
917 | return neon_output_shift_immediate ("vshr", 'u', &operands[2], | |
918 | <MODE>mode, VALID_NEON_QREG_MODE (<MODE>mode), | |
919 | false); | |
920 | } | |
32093010 | 921 | [(set_attr "type" "neon_shift_imm<q>")] |
73f20323 | 922 | ) |
923 | ||
d98a3884 | 924 | ; Used for implementing logical shift-right, which is a left-shift by a negative |
925 | ; amount, with signed operands. This is essentially the same as ashl<mode>3 | |
926 | ; above, but using an unspec in case GCC tries anything tricky with negative | |
927 | ; shift amounts. | |
928 | ||
929 | (define_insn "ashl<mode>3_signed" | |
930 | [(set (match_operand:VDQI 0 "s_register_operand" "=w") | |
931 | (unspec:VDQI [(match_operand:VDQI 1 "s_register_operand" "w") | |
932 | (match_operand:VDQI 2 "s_register_operand" "w")] | |
933 | UNSPEC_ASHIFT_SIGNED))] | |
934 | "TARGET_NEON" | |
bcaec148 | 935 | "vshl.<V_s_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2" |
32093010 | 936 | [(set_attr "type" "neon_shift_reg<q>")] |
bcaec148 | 937 | ) |
d98a3884 | 938 | |
939 | ; Used for implementing logical shift-right, which is a left-shift by a negative | |
940 | ; amount, with unsigned operands. | |
941 | ||
942 | (define_insn "ashl<mode>3_unsigned" | |
943 | [(set (match_operand:VDQI 0 "s_register_operand" "=w") | |
944 | (unspec:VDQI [(match_operand:VDQI 1 "s_register_operand" "w") | |
945 | (match_operand:VDQI 2 "s_register_operand" "w")] | |
946 | UNSPEC_ASHIFT_UNSIGNED))] | |
947 | "TARGET_NEON" | |
bcaec148 | 948 | "vshl.<V_u_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2" |
32093010 | 949 | [(set_attr "type" "neon_shift_reg<q>")] |
bcaec148 | 950 | ) |
d98a3884 | 951 | |
6fbc081b | 952 | (define_expand "vashr<mode>3" |
d98a3884 | 953 | [(set (match_operand:VDQIW 0 "s_register_operand" "") |
954 | (ashiftrt:VDQIW (match_operand:VDQIW 1 "s_register_operand" "") | |
73f20323 | 955 | (match_operand:VDQIW 2 "imm_rshift_or_reg_neon" "")))] |
d98a3884 | 956 | "TARGET_NEON" |
957 | { | |
7510cf26 | 958 | if (s_register_operand (operands[2], <MODE>mode)) |
73f20323 | 959 | { |
7510cf26 | 960 | rtx neg = gen_reg_rtx (<MODE>mode); |
73f20323 | 961 | emit_insn (gen_neg<mode>2 (neg, operands[2])); |
962 | emit_insn (gen_ashl<mode>3_signed (operands[0], operands[1], neg)); | |
963 | } | |
964 | else | |
965 | emit_insn (gen_vashr<mode>3_imm (operands[0], operands[1], operands[2])); | |
d98a3884 | 966 | DONE; |
967 | }) | |
968 | ||
6fbc081b | 969 | (define_expand "vlshr<mode>3" |
d98a3884 | 970 | [(set (match_operand:VDQIW 0 "s_register_operand" "") |
971 | (lshiftrt:VDQIW (match_operand:VDQIW 1 "s_register_operand" "") | |
73f20323 | 972 | (match_operand:VDQIW 2 "imm_rshift_or_reg_neon" "")))] |
d98a3884 | 973 | "TARGET_NEON" |
974 | { | |
7510cf26 | 975 | if (s_register_operand (operands[2], <MODE>mode)) |
73f20323 | 976 | { |
7510cf26 | 977 | rtx neg = gen_reg_rtx (<MODE>mode); |
73f20323 | 978 | emit_insn (gen_neg<mode>2 (neg, operands[2])); |
979 | emit_insn (gen_ashl<mode>3_unsigned (operands[0], operands[1], neg)); | |
980 | } | |
981 | else | |
982 | emit_insn (gen_vlshr<mode>3_imm (operands[0], operands[1], operands[2])); | |
d98a3884 | 983 | DONE; |
984 | }) | |
985 | ||
aa06c51c | 986 | ;; 64-bit shifts |
987 | ||
988 | ;; This pattern loads a 32-bit shift count into a 64-bit NEON register, | |
989 | ;; leaving the upper half uninitalized. This is OK since the shift | |
990 | ;; instruction only looks at the low 8 bits anyway. To avoid confusing | |
991 | ;; data flow analysis however, we pretend the full register is set | |
992 | ;; using an unspec. | |
993 | (define_insn "neon_load_count" | |
994 | [(set (match_operand:DI 0 "s_register_operand" "=w,w") | |
995 | (unspec:DI [(match_operand:SI 1 "nonimmediate_operand" "Um,r")] | |
996 | UNSPEC_LOAD_COUNT))] | |
997 | "TARGET_NEON" | |
998 | "@ | |
999 | vld1.32\t{%P0[0]}, %A1 | |
1000 | vmov.32\t%P0[0], %1" | |
32093010 | 1001 | [(set_attr "type" "neon_load1_1reg,neon_from_gp")] |
aa06c51c | 1002 | ) |
1003 | ||
1004 | (define_insn "ashldi3_neon_noclobber" | |
1005 | [(set (match_operand:DI 0 "s_register_operand" "=w,w") | |
1006 | (ashift:DI (match_operand:DI 1 "s_register_operand" " w,w") | |
1007 | (match_operand:DI 2 "reg_or_int_operand" " i,w")))] | |
1008 | "TARGET_NEON && reload_completed | |
1009 | && (!CONST_INT_P (operands[2]) | |
1010 | || (INTVAL (operands[2]) >= 0 && INTVAL (operands[2]) < 64))" | |
1011 | "@ | |
1012 | vshl.u64\t%P0, %P1, %2 | |
1013 | vshl.u64\t%P0, %P1, %P2" | |
32093010 | 1014 | [(set_attr "type" "neon_shift_imm, neon_shift_reg")] |
aa06c51c | 1015 | ) |
1016 | ||
1017 | (define_insn_and_split "ashldi3_neon" | |
1018 | [(set (match_operand:DI 0 "s_register_operand" "= w, w,?&r,?r, ?w,w") | |
1019 | (ashift:DI (match_operand:DI 1 "s_register_operand" " 0w, w, 0r, r, 0w,w") | |
1020 | (match_operand:SI 2 "general_operand" "rUm, i, r, i,rUm,i"))) | |
1021 | (clobber (match_scratch:SI 3 "= X, X,?&r, X, X,X")) | |
1022 | (clobber (match_scratch:SI 4 "= X, X,?&r, X, X,X")) | |
1023 | (clobber (match_scratch:DI 5 "=&w, X, X, X, &w,X")) | |
1024 | (clobber (reg:CC_C CC_REGNUM))] | |
1025 | "TARGET_NEON" | |
1026 | "#" | |
1027 | "TARGET_NEON && reload_completed" | |
1028 | [(const_int 0)] | |
1029 | " | |
1030 | { | |
1031 | if (IS_VFP_REGNUM (REGNO (operands[0]))) | |
1032 | { | |
1033 | if (CONST_INT_P (operands[2])) | |
1034 | { | |
1035 | if (INTVAL (operands[2]) < 1) | |
1036 | { | |
1037 | emit_insn (gen_movdi (operands[0], operands[1])); | |
1038 | DONE; | |
1039 | } | |
1040 | else if (INTVAL (operands[2]) > 63) | |
1041 | operands[2] = gen_rtx_CONST_INT (VOIDmode, 63); | |
1042 | } | |
1043 | else | |
1044 | { | |
1045 | emit_insn (gen_neon_load_count (operands[5], operands[2])); | |
1046 | operands[2] = operands[5]; | |
1047 | } | |
1048 | ||
1049 | /* Ditch the unnecessary clobbers. */ | |
1050 | emit_insn (gen_ashldi3_neon_noclobber (operands[0], operands[1], | |
1051 | operands[2])); | |
1052 | } | |
1053 | else | |
1054 | { | |
f0f38f16 | 1055 | if (CONST_INT_P (operands[2]) && INTVAL (operands[2]) == 1 |
1056 | && (!reg_overlap_mentioned_p (operands[0], operands[1]) | |
1057 | || REGNO (operands[0]) == REGNO (operands[1]))) | |
aa06c51c | 1058 | /* This clobbers CC. */ |
1059 | emit_insn (gen_arm_ashldi3_1bit (operands[0], operands[1])); | |
1060 | else | |
1061 | arm_emit_coreregs_64bit_shift (ASHIFT, operands[0], operands[1], | |
1062 | operands[2], operands[3], operands[4]); | |
1063 | } | |
1064 | DONE; | |
1065 | }" | |
b6779ddc | 1066 | [(set_attr "arch" "neon_for_64bits,neon_for_64bits,*,*,avoid_neon_for_64bits,avoid_neon_for_64bits") |
32093010 | 1067 | (set_attr "opt" "*,*,speed,speed,*,*") |
1068 | (set_attr "type" "multiple")] | |
aa06c51c | 1069 | ) |
1070 | ||
1071 | ; The shift amount needs to be negated for right-shifts | |
1072 | (define_insn "signed_shift_di3_neon" | |
1073 | [(set (match_operand:DI 0 "s_register_operand" "=w") | |
1074 | (unspec:DI [(match_operand:DI 1 "s_register_operand" " w") | |
1075 | (match_operand:DI 2 "s_register_operand" " w")] | |
1076 | UNSPEC_ASHIFT_SIGNED))] | |
1077 | "TARGET_NEON && reload_completed" | |
1078 | "vshl.s64\t%P0, %P1, %P2" | |
32093010 | 1079 | [(set_attr "type" "neon_shift_reg")] |
aa06c51c | 1080 | ) |
1081 | ||
1082 | ; The shift amount needs to be negated for right-shifts | |
1083 | (define_insn "unsigned_shift_di3_neon" | |
1084 | [(set (match_operand:DI 0 "s_register_operand" "=w") | |
1085 | (unspec:DI [(match_operand:DI 1 "s_register_operand" " w") | |
1086 | (match_operand:DI 2 "s_register_operand" " w")] | |
1087 | UNSPEC_ASHIFT_UNSIGNED))] | |
1088 | "TARGET_NEON && reload_completed" | |
1089 | "vshl.u64\t%P0, %P1, %P2" | |
32093010 | 1090 | [(set_attr "type" "neon_shift_reg")] |
aa06c51c | 1091 | ) |
1092 | ||
1093 | (define_insn "ashrdi3_neon_imm_noclobber" | |
1094 | [(set (match_operand:DI 0 "s_register_operand" "=w") | |
1095 | (ashiftrt:DI (match_operand:DI 1 "s_register_operand" " w") | |
1096 | (match_operand:DI 2 "const_int_operand" " i")))] | |
1097 | "TARGET_NEON && reload_completed | |
1098 | && INTVAL (operands[2]) > 0 && INTVAL (operands[2]) <= 64" | |
1099 | "vshr.s64\t%P0, %P1, %2" | |
32093010 | 1100 | [(set_attr "type" "neon_shift_imm")] |
aa06c51c | 1101 | ) |
1102 | ||
1103 | (define_insn "lshrdi3_neon_imm_noclobber" | |
1104 | [(set (match_operand:DI 0 "s_register_operand" "=w") | |
1105 | (lshiftrt:DI (match_operand:DI 1 "s_register_operand" " w") | |
1106 | (match_operand:DI 2 "const_int_operand" " i")))] | |
1107 | "TARGET_NEON && reload_completed | |
1108 | && INTVAL (operands[2]) > 0 && INTVAL (operands[2]) <= 64" | |
1109 | "vshr.u64\t%P0, %P1, %2" | |
32093010 | 1110 | [(set_attr "type" "neon_shift_imm")] |
aa06c51c | 1111 | ) |
1112 | ||
1113 | ;; ashrdi3_neon | |
1114 | ;; lshrdi3_neon | |
1115 | (define_insn_and_split "<shift>di3_neon" | |
1116 | [(set (match_operand:DI 0 "s_register_operand" "= w, w,?&r,?r,?w,?w") | |
9b23f0a7 | 1117 | (RSHIFTS:DI (match_operand:DI 1 "s_register_operand" " 0w, w, 0r, r,0w, w") |
aa06c51c | 1118 | (match_operand:SI 2 "reg_or_int_operand" " r, i, r, i, r, i"))) |
1119 | (clobber (match_scratch:SI 3 "=2r, X, &r, X,2r, X")) | |
1120 | (clobber (match_scratch:SI 4 "= X, X, &r, X, X, X")) | |
1121 | (clobber (match_scratch:DI 5 "=&w, X, X, X,&w, X")) | |
1122 | (clobber (reg:CC CC_REGNUM))] | |
1123 | "TARGET_NEON" | |
1124 | "#" | |
1125 | "TARGET_NEON && reload_completed" | |
1126 | [(const_int 0)] | |
1127 | " | |
1128 | { | |
1129 | if (IS_VFP_REGNUM (REGNO (operands[0]))) | |
1130 | { | |
1131 | if (CONST_INT_P (operands[2])) | |
1132 | { | |
1133 | if (INTVAL (operands[2]) < 1) | |
1134 | { | |
1135 | emit_insn (gen_movdi (operands[0], operands[1])); | |
1136 | DONE; | |
1137 | } | |
1138 | else if (INTVAL (operands[2]) > 64) | |
1139 | operands[2] = gen_rtx_CONST_INT (VOIDmode, 64); | |
1140 | ||
1141 | /* Ditch the unnecessary clobbers. */ | |
1142 | emit_insn (gen_<shift>di3_neon_imm_noclobber (operands[0], | |
1143 | operands[1], | |
1144 | operands[2])); | |
1145 | } | |
1146 | else | |
1147 | { | |
1148 | /* We must use a negative left-shift. */ | |
1149 | emit_insn (gen_negsi2 (operands[3], operands[2])); | |
1150 | emit_insn (gen_neon_load_count (operands[5], operands[3])); | |
1151 | emit_insn (gen_<shifttype>_shift_di3_neon (operands[0], operands[1], | |
1152 | operands[5])); | |
1153 | } | |
1154 | } | |
1155 | else | |
1156 | { | |
f0f38f16 | 1157 | if (CONST_INT_P (operands[2]) && INTVAL (operands[2]) == 1 |
1158 | && (!reg_overlap_mentioned_p (operands[0], operands[1]) | |
1159 | || REGNO (operands[0]) == REGNO (operands[1]))) | |
aa06c51c | 1160 | /* This clobbers CC. */ |
1161 | emit_insn (gen_arm_<shift>di3_1bit (operands[0], operands[1])); | |
1162 | else | |
1163 | /* This clobbers CC (ASHIFTRT by register only). */ | |
1164 | arm_emit_coreregs_64bit_shift (<CODE>, operands[0], operands[1], | |
1165 | operands[2], operands[3], operands[4]); | |
1166 | } | |
1167 | ||
1168 | DONE; | |
1169 | }" | |
b6779ddc | 1170 | [(set_attr "arch" "neon_for_64bits,neon_for_64bits,*,*,avoid_neon_for_64bits,avoid_neon_for_64bits") |
32093010 | 1171 | (set_attr "opt" "*,*,speed,speed,*,*") |
1172 | (set_attr "type" "multiple")] | |
aa06c51c | 1173 | ) |
1174 | ||
d98a3884 | 1175 | ;; Widening operations |
1176 | ||
1177 | (define_insn "widen_ssum<mode>3" | |
1178 | [(set (match_operand:<V_widen> 0 "s_register_operand" "=w") | |
1179 | (plus:<V_widen> (sign_extend:<V_widen> | |
1180 | (match_operand:VW 1 "s_register_operand" "%w")) | |
1181 | (match_operand:<V_widen> 2 "s_register_operand" "w")))] | |
1182 | "TARGET_NEON" | |
bcaec148 | 1183 | "vaddw.<V_s_elem>\t%q0, %q2, %P1" |
32093010 | 1184 | [(set_attr "type" "neon_add_widen")] |
bcaec148 | 1185 | ) |
d98a3884 | 1186 | |
1187 | (define_insn "widen_usum<mode>3" | |
1188 | [(set (match_operand:<V_widen> 0 "s_register_operand" "=w") | |
1189 | (plus:<V_widen> (zero_extend:<V_widen> | |
1190 | (match_operand:VW 1 "s_register_operand" "%w")) | |
1191 | (match_operand:<V_widen> 2 "s_register_operand" "w")))] | |
1192 | "TARGET_NEON" | |
bcaec148 | 1193 | "vaddw.<V_u_elem>\t%q0, %q2, %P1" |
32093010 | 1194 | [(set_attr "type" "neon_add_widen")] |
bcaec148 | 1195 | ) |
d98a3884 | 1196 | |
d98a3884 | 1197 | ;; Helpers for quad-word reduction operations |
1198 | ||
1199 | ; Add (or smin, smax...) the low N/2 elements of the N-element vector | |
1200 | ; operand[1] to the high N/2 elements of same. Put the result in operand[0], an | |
1201 | ; N/2-element vector. | |
1202 | ||
1203 | (define_insn "quad_halves_<code>v4si" | |
1204 | [(set (match_operand:V2SI 0 "s_register_operand" "=w") | |
9b23f0a7 | 1205 | (VQH_OPS:V2SI |
d98a3884 | 1206 | (vec_select:V2SI (match_operand:V4SI 1 "s_register_operand" "w") |
1207 | (parallel [(const_int 0) (const_int 1)])) | |
1208 | (vec_select:V2SI (match_dup 1) | |
1209 | (parallel [(const_int 2) (const_int 3)]))))] | |
1210 | "TARGET_NEON" | |
bcaec148 | 1211 | "<VQH_mnem>.<VQH_sign>32\t%P0, %e1, %f1" |
1212 | [(set_attr "vqh_mnem" "<VQH_mnem>") | |
32093010 | 1213 | (set_attr "type" "neon_reduc_<VQH_type>_q")] |
bcaec148 | 1214 | ) |
d98a3884 | 1215 | |
1216 | (define_insn "quad_halves_<code>v4sf" | |
1217 | [(set (match_operand:V2SF 0 "s_register_operand" "=w") | |
9b23f0a7 | 1218 | (VQHS_OPS:V2SF |
d98a3884 | 1219 | (vec_select:V2SF (match_operand:V4SF 1 "s_register_operand" "w") |
1220 | (parallel [(const_int 0) (const_int 1)])) | |
1221 | (vec_select:V2SF (match_dup 1) | |
1222 | (parallel [(const_int 2) (const_int 3)]))))] | |
fe4048be | 1223 | "TARGET_NEON && flag_unsafe_math_optimizations" |
bcaec148 | 1224 | "<VQH_mnem>.f32\t%P0, %e1, %f1" |
1225 | [(set_attr "vqh_mnem" "<VQH_mnem>") | |
32093010 | 1226 | (set_attr "type" "neon_fp_reduc_<VQH_type>_s_q")] |
bcaec148 | 1227 | ) |
d98a3884 | 1228 | |
1229 | (define_insn "quad_halves_<code>v8hi" | |
1230 | [(set (match_operand:V4HI 0 "s_register_operand" "+w") | |
9b23f0a7 | 1231 | (VQH_OPS:V4HI |
d98a3884 | 1232 | (vec_select:V4HI (match_operand:V8HI 1 "s_register_operand" "w") |
1233 | (parallel [(const_int 0) (const_int 1) | |
1234 | (const_int 2) (const_int 3)])) | |
1235 | (vec_select:V4HI (match_dup 1) | |
1236 | (parallel [(const_int 4) (const_int 5) | |
1237 | (const_int 6) (const_int 7)]))))] | |
1238 | "TARGET_NEON" | |
bcaec148 | 1239 | "<VQH_mnem>.<VQH_sign>16\t%P0, %e1, %f1" |
1240 | [(set_attr "vqh_mnem" "<VQH_mnem>") | |
32093010 | 1241 | (set_attr "type" "neon_reduc_<VQH_type>_q")] |
bcaec148 | 1242 | ) |
d98a3884 | 1243 | |
1244 | (define_insn "quad_halves_<code>v16qi" | |
1245 | [(set (match_operand:V8QI 0 "s_register_operand" "+w") | |
9b23f0a7 | 1246 | (VQH_OPS:V8QI |
d98a3884 | 1247 | (vec_select:V8QI (match_operand:V16QI 1 "s_register_operand" "w") |
1248 | (parallel [(const_int 0) (const_int 1) | |
1249 | (const_int 2) (const_int 3) | |
1250 | (const_int 4) (const_int 5) | |
1251 | (const_int 6) (const_int 7)])) | |
1252 | (vec_select:V8QI (match_dup 1) | |
1253 | (parallel [(const_int 8) (const_int 9) | |
1254 | (const_int 10) (const_int 11) | |
1255 | (const_int 12) (const_int 13) | |
1256 | (const_int 14) (const_int 15)]))))] | |
1257 | "TARGET_NEON" | |
bcaec148 | 1258 | "<VQH_mnem>.<VQH_sign>8\t%P0, %e1, %f1" |
1259 | [(set_attr "vqh_mnem" "<VQH_mnem>") | |
32093010 | 1260 | (set_attr "type" "neon_reduc_<VQH_type>_q")] |
bcaec148 | 1261 | ) |
d98a3884 | 1262 | |
a62cc977 | 1263 | (define_expand "move_hi_quad_<mode>" |
1264 | [(match_operand:ANY128 0 "s_register_operand" "") | |
1265 | (match_operand:<V_HALF> 1 "s_register_operand" "")] | |
1266 | "TARGET_NEON" | |
d98a3884 | 1267 | { |
be7a395b | 1268 | emit_move_insn (simplify_gen_subreg (<V_HALF>mode, operands[0], <MODE>mode, |
1269 | GET_MODE_SIZE (<V_HALF>mode)), | |
1270 | operands[1]); | |
a62cc977 | 1271 | DONE; |
1272 | }) | |
1273 | ||
1274 | (define_expand "move_lo_quad_<mode>" | |
1275 | [(match_operand:ANY128 0 "s_register_operand" "") | |
1276 | (match_operand:<V_HALF> 1 "s_register_operand" "")] | |
1277 | "TARGET_NEON" | |
d98a3884 | 1278 | { |
be7a395b | 1279 | emit_move_insn (simplify_gen_subreg (<V_HALF>mode, operands[0], |
1280 | <MODE>mode, 0), | |
1281 | operands[1]); | |
a62cc977 | 1282 | DONE; |
1283 | }) | |
d98a3884 | 1284 | |
1285 | ;; Reduction operations | |
1286 | ||
0d2d31ff | 1287 | (define_expand "reduc_plus_scal_<mode>" |
1288 | [(match_operand:<V_elem> 0 "nonimmediate_operand" "") | |
d98a3884 | 1289 | (match_operand:VD 1 "s_register_operand" "")] |
fe4048be | 1290 | "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)" |
d98a3884 | 1291 | { |
0d2d31ff | 1292 | rtx vec = gen_reg_rtx (<MODE>mode); |
1293 | neon_pairwise_reduce (vec, operands[1], <MODE>mode, | |
d98a3884 | 1294 | &gen_neon_vpadd_internal<mode>); |
0d2d31ff | 1295 | /* The same result is actually computed into every element. */ |
1296 | emit_insn (gen_vec_extract<mode> (operands[0], vec, const0_rtx)); | |
d98a3884 | 1297 | DONE; |
1298 | }) | |
1299 | ||
0d2d31ff | 1300 | (define_expand "reduc_plus_scal_<mode>" |
1301 | [(match_operand:<V_elem> 0 "nonimmediate_operand" "") | |
d98a3884 | 1302 | (match_operand:VQ 1 "s_register_operand" "")] |
b46a36c7 | 1303 | "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations) |
1304 | && !BYTES_BIG_ENDIAN" | |
d98a3884 | 1305 | { |
1306 | rtx step1 = gen_reg_rtx (<V_HALF>mode); | |
d98a3884 | 1307 | |
1308 | emit_insn (gen_quad_halves_plus<mode> (step1, operands[1])); | |
0d2d31ff | 1309 | emit_insn (gen_reduc_plus_scal_<V_half> (operands[0], step1)); |
d98a3884 | 1310 | |
1311 | DONE; | |
1312 | }) | |
1313 | ||
0d2d31ff | 1314 | (define_expand "reduc_plus_scal_v2di" |
1315 | [(match_operand:DI 0 "nonimmediate_operand" "=w") | |
1316 | (match_operand:V2DI 1 "s_register_operand" "")] | |
1317 | "TARGET_NEON && !BYTES_BIG_ENDIAN" | |
1318 | { | |
1319 | rtx vec = gen_reg_rtx (V2DImode); | |
1320 | ||
1321 | emit_insn (gen_arm_reduc_plus_internal_v2di (vec, operands[1])); | |
1322 | emit_insn (gen_vec_extractv2di (operands[0], vec, const0_rtx)); | |
1323 | ||
1324 | DONE; | |
1325 | }) | |
1326 | ||
1327 | (define_insn "arm_reduc_plus_internal_v2di" | |
d98a3884 | 1328 | [(set (match_operand:V2DI 0 "s_register_operand" "=w") |
1329 | (unspec:V2DI [(match_operand:V2DI 1 "s_register_operand" "w")] | |
1330 | UNSPEC_VPADD))] | |
b46a36c7 | 1331 | "TARGET_NEON && !BYTES_BIG_ENDIAN" |
bcaec148 | 1332 | "vadd.i64\t%e0, %e1, %f1" |
32093010 | 1333 | [(set_attr "type" "neon_add_q")] |
bcaec148 | 1334 | ) |
d98a3884 | 1335 | |
375c3982 | 1336 | (define_expand "reduc_smin_scal_<mode>" |
1337 | [(match_operand:<V_elem> 0 "nonimmediate_operand" "") | |
d98a3884 | 1338 | (match_operand:VD 1 "s_register_operand" "")] |
fe4048be | 1339 | "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)" |
d98a3884 | 1340 | { |
375c3982 | 1341 | rtx vec = gen_reg_rtx (<MODE>mode); |
1342 | ||
1343 | neon_pairwise_reduce (vec, operands[1], <MODE>mode, | |
d98a3884 | 1344 | &gen_neon_vpsmin<mode>); |
375c3982 | 1345 | /* The result is computed into every element of the vector. */ |
1346 | emit_insn (gen_vec_extract<mode> (operands[0], vec, const0_rtx)); | |
d98a3884 | 1347 | DONE; |
1348 | }) | |
1349 | ||
375c3982 | 1350 | (define_expand "reduc_smin_scal_<mode>" |
1351 | [(match_operand:<V_elem> 0 "nonimmediate_operand" "") | |
d98a3884 | 1352 | (match_operand:VQ 1 "s_register_operand" "")] |
b46a36c7 | 1353 | "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations) |
1354 | && !BYTES_BIG_ENDIAN" | |
d98a3884 | 1355 | { |
1356 | rtx step1 = gen_reg_rtx (<V_HALF>mode); | |
d98a3884 | 1357 | |
1358 | emit_insn (gen_quad_halves_smin<mode> (step1, operands[1])); | |
375c3982 | 1359 | emit_insn (gen_reduc_smin_scal_<V_half> (operands[0], step1)); |
d98a3884 | 1360 | |
1361 | DONE; | |
1362 | }) | |
1363 | ||
375c3982 | 1364 | (define_expand "reduc_smax_scal_<mode>" |
1365 | [(match_operand:<V_elem> 0 "nonimmediate_operand" "") | |
d98a3884 | 1366 | (match_operand:VD 1 "s_register_operand" "")] |
fe4048be | 1367 | "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)" |
d98a3884 | 1368 | { |
375c3982 | 1369 | rtx vec = gen_reg_rtx (<MODE>mode); |
1370 | neon_pairwise_reduce (vec, operands[1], <MODE>mode, | |
d98a3884 | 1371 | &gen_neon_vpsmax<mode>); |
375c3982 | 1372 | /* The result is computed into every element of the vector. */ |
1373 | emit_insn (gen_vec_extract<mode> (operands[0], vec, const0_rtx)); | |
d98a3884 | 1374 | DONE; |
1375 | }) | |
1376 | ||
375c3982 | 1377 | (define_expand "reduc_smax_scal_<mode>" |
1378 | [(match_operand:<V_elem> 0 "nonimmediate_operand" "") | |
d98a3884 | 1379 | (match_operand:VQ 1 "s_register_operand" "")] |
b46a36c7 | 1380 | "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations) |
1381 | && !BYTES_BIG_ENDIAN" | |
d98a3884 | 1382 | { |
1383 | rtx step1 = gen_reg_rtx (<V_HALF>mode); | |
d98a3884 | 1384 | |
1385 | emit_insn (gen_quad_halves_smax<mode> (step1, operands[1])); | |
375c3982 | 1386 | emit_insn (gen_reduc_smax_scal_<V_half> (operands[0], step1)); |
d98a3884 | 1387 | |
1388 | DONE; | |
1389 | }) | |
1390 | ||
375c3982 | 1391 | (define_expand "reduc_umin_scal_<mode>" |
1392 | [(match_operand:<V_elem> 0 "nonimmediate_operand" "") | |
d98a3884 | 1393 | (match_operand:VDI 1 "s_register_operand" "")] |
1394 | "TARGET_NEON" | |
1395 | { | |
375c3982 | 1396 | rtx vec = gen_reg_rtx (<MODE>mode); |
1397 | neon_pairwise_reduce (vec, operands[1], <MODE>mode, | |
d98a3884 | 1398 | &gen_neon_vpumin<mode>); |
375c3982 | 1399 | /* The result is computed into every element of the vector. */ |
1400 | emit_insn (gen_vec_extract<mode> (operands[0], vec, const0_rtx)); | |
d98a3884 | 1401 | DONE; |
1402 | }) | |
1403 | ||
375c3982 | 1404 | (define_expand "reduc_umin_scal_<mode>" |
1405 | [(match_operand:<V_elem> 0 "nonimmediate_operand" "") | |
d98a3884 | 1406 | (match_operand:VQI 1 "s_register_operand" "")] |
b46a36c7 | 1407 | "TARGET_NEON && !BYTES_BIG_ENDIAN" |
d98a3884 | 1408 | { |
1409 | rtx step1 = gen_reg_rtx (<V_HALF>mode); | |
d98a3884 | 1410 | |
1411 | emit_insn (gen_quad_halves_umin<mode> (step1, operands[1])); | |
375c3982 | 1412 | emit_insn (gen_reduc_umin_scal_<V_half> (operands[0], step1)); |
d98a3884 | 1413 | |
1414 | DONE; | |
1415 | }) | |
1416 | ||
375c3982 | 1417 | (define_expand "reduc_umax_scal_<mode>" |
1418 | [(match_operand:<V_elem> 0 "nonimmediate_operand" "") | |
d98a3884 | 1419 | (match_operand:VDI 1 "s_register_operand" "")] |
1420 | "TARGET_NEON" | |
1421 | { | |
375c3982 | 1422 | rtx vec = gen_reg_rtx (<MODE>mode); |
1423 | neon_pairwise_reduce (vec, operands[1], <MODE>mode, | |
d98a3884 | 1424 | &gen_neon_vpumax<mode>); |
375c3982 | 1425 | /* The result is computed into every element of the vector. */ |
1426 | emit_insn (gen_vec_extract<mode> (operands[0], vec, const0_rtx)); | |
d98a3884 | 1427 | DONE; |
1428 | }) | |
1429 | ||
375c3982 | 1430 | (define_expand "reduc_umax_scal_<mode>" |
1431 | [(match_operand:<V_elem> 0 "nonimmediate_operand" "") | |
d98a3884 | 1432 | (match_operand:VQI 1 "s_register_operand" "")] |
b46a36c7 | 1433 | "TARGET_NEON && !BYTES_BIG_ENDIAN" |
d98a3884 | 1434 | { |
1435 | rtx step1 = gen_reg_rtx (<V_HALF>mode); | |
d98a3884 | 1436 | |
1437 | emit_insn (gen_quad_halves_umax<mode> (step1, operands[1])); | |
375c3982 | 1438 | emit_insn (gen_reduc_umax_scal_<V_half> (operands[0], step1)); |
d98a3884 | 1439 | |
1440 | DONE; | |
1441 | }) | |
1442 | ||
1443 | (define_insn "neon_vpadd_internal<mode>" | |
1444 | [(set (match_operand:VD 0 "s_register_operand" "=w") | |
1445 | (unspec:VD [(match_operand:VD 1 "s_register_operand" "w") | |
1446 | (match_operand:VD 2 "s_register_operand" "w")] | |
1447 | UNSPEC_VPADD))] | |
1448 | "TARGET_NEON" | |
bcaec148 | 1449 | "vpadd.<V_if_elem>\t%P0, %P1, %P2" |
1450 | ;; Assume this schedules like vadd. | |
52432540 | 1451 | [(set (attr "type") |
0bf497f5 | 1452 | (if_then_else (match_test "<Is_float_mode>") |
32093010 | 1453 | (const_string "neon_fp_reduc_add_s<q>") |
1454 | (const_string "neon_reduc_add<q>")))] | |
bcaec148 | 1455 | ) |
d98a3884 | 1456 | |
1457 | (define_insn "neon_vpsmin<mode>" | |
1458 | [(set (match_operand:VD 0 "s_register_operand" "=w") | |
1459 | (unspec:VD [(match_operand:VD 1 "s_register_operand" "w") | |
1460 | (match_operand:VD 2 "s_register_operand" "w")] | |
1461 | UNSPEC_VPSMIN))] | |
1462 | "TARGET_NEON" | |
bcaec148 | 1463 | "vpmin.<V_s_elem>\t%P0, %P1, %P2" |
52432540 | 1464 | [(set (attr "type") |
0bf497f5 | 1465 | (if_then_else (match_test "<Is_float_mode>") |
32093010 | 1466 | (const_string "neon_fp_reduc_minmax_s<q>") |
1467 | (const_string "neon_reduc_minmax<q>")))] | |
bcaec148 | 1468 | ) |
d98a3884 | 1469 | |
1470 | (define_insn "neon_vpsmax<mode>" | |
1471 | [(set (match_operand:VD 0 "s_register_operand" "=w") | |
1472 | (unspec:VD [(match_operand:VD 1 "s_register_operand" "w") | |
1473 | (match_operand:VD 2 "s_register_operand" "w")] | |
1474 | UNSPEC_VPSMAX))] | |
1475 | "TARGET_NEON" | |
bcaec148 | 1476 | "vpmax.<V_s_elem>\t%P0, %P1, %P2" |
52432540 | 1477 | [(set (attr "type") |
0bf497f5 | 1478 | (if_then_else (match_test "<Is_float_mode>") |
32093010 | 1479 | (const_string "neon_fp_reduc_minmax_s<q>") |
1480 | (const_string "neon_reduc_minmax<q>")))] | |
bcaec148 | 1481 | ) |
d98a3884 | 1482 | |
1483 | (define_insn "neon_vpumin<mode>" | |
1484 | [(set (match_operand:VDI 0 "s_register_operand" "=w") | |
1485 | (unspec:VDI [(match_operand:VDI 1 "s_register_operand" "w") | |
1486 | (match_operand:VDI 2 "s_register_operand" "w")] | |
1487 | UNSPEC_VPUMIN))] | |
1488 | "TARGET_NEON" | |
bcaec148 | 1489 | "vpmin.<V_u_elem>\t%P0, %P1, %P2" |
32093010 | 1490 | [(set_attr "type" "neon_reduc_minmax<q>")] |
bcaec148 | 1491 | ) |
d98a3884 | 1492 | |
1493 | (define_insn "neon_vpumax<mode>" | |
1494 | [(set (match_operand:VDI 0 "s_register_operand" "=w") | |
1495 | (unspec:VDI [(match_operand:VDI 1 "s_register_operand" "w") | |
1496 | (match_operand:VDI 2 "s_register_operand" "w")] | |
1497 | UNSPEC_VPUMAX))] | |
1498 | "TARGET_NEON" | |
bcaec148 | 1499 | "vpmax.<V_u_elem>\t%P0, %P1, %P2" |
32093010 | 1500 | [(set_attr "type" "neon_reduc_minmax<q>")] |
bcaec148 | 1501 | ) |
d98a3884 | 1502 | |
1503 | ;; Saturating arithmetic | |
1504 | ||
1505 | ; NOTE: Neon supports many more saturating variants of instructions than the | |
1506 | ; following, but these are all GCC currently understands. | |
1507 | ; FIXME: Actually, GCC doesn't know how to create saturating add/sub by itself | |
1508 | ; yet either, although these patterns may be used by intrinsics when they're | |
1509 | ; added. | |
1510 | ||
1511 | (define_insn "*ss_add<mode>_neon" | |
1512 | [(set (match_operand:VD 0 "s_register_operand" "=w") | |
1513 | (ss_plus:VD (match_operand:VD 1 "s_register_operand" "w") | |
1514 | (match_operand:VD 2 "s_register_operand" "w")))] | |
1515 | "TARGET_NEON" | |
bcaec148 | 1516 | "vqadd.<V_s_elem>\t%P0, %P1, %P2" |
32093010 | 1517 | [(set_attr "type" "neon_qadd<q>")] |
bcaec148 | 1518 | ) |
d98a3884 | 1519 | |
1520 | (define_insn "*us_add<mode>_neon" | |
1521 | [(set (match_operand:VD 0 "s_register_operand" "=w") | |
1522 | (us_plus:VD (match_operand:VD 1 "s_register_operand" "w") | |
1523 | (match_operand:VD 2 "s_register_operand" "w")))] | |
1524 | "TARGET_NEON" | |
bcaec148 | 1525 | "vqadd.<V_u_elem>\t%P0, %P1, %P2" |
32093010 | 1526 | [(set_attr "type" "neon_qadd<q>")] |
bcaec148 | 1527 | ) |
d98a3884 | 1528 | |
1529 | (define_insn "*ss_sub<mode>_neon" | |
1530 | [(set (match_operand:VD 0 "s_register_operand" "=w") | |
1531 | (ss_minus:VD (match_operand:VD 1 "s_register_operand" "w") | |
1532 | (match_operand:VD 2 "s_register_operand" "w")))] | |
1533 | "TARGET_NEON" | |
bcaec148 | 1534 | "vqsub.<V_s_elem>\t%P0, %P1, %P2" |
32093010 | 1535 | [(set_attr "type" "neon_qsub<q>")] |
bcaec148 | 1536 | ) |
d98a3884 | 1537 | |
1538 | (define_insn "*us_sub<mode>_neon" | |
1539 | [(set (match_operand:VD 0 "s_register_operand" "=w") | |
1540 | (us_minus:VD (match_operand:VD 1 "s_register_operand" "w") | |
1541 | (match_operand:VD 2 "s_register_operand" "w")))] | |
1542 | "TARGET_NEON" | |
bcaec148 | 1543 | "vqsub.<V_u_elem>\t%P0, %P1, %P2" |
32093010 | 1544 | [(set_attr "type" "neon_qsub<q>")] |
bcaec148 | 1545 | ) |
d98a3884 | 1546 | |
0a987353 | 1547 | ;; Conditional instructions. These are comparisons with conditional moves for |
1548 | ;; vectors. They perform the assignment: | |
1549 | ;; | |
1550 | ;; Vop0 = (Vop4 <op3> Vop5) ? Vop1 : Vop2; | |
1551 | ;; | |
1552 | ;; where op3 is <, <=, ==, !=, >= or >. Operations are performed | |
1553 | ;; element-wise. | |
1554 | ||
d6b19f6b | 1555 | (define_expand "vcond<mode><mode>" |
0a987353 | 1556 | [(set (match_operand:VDQW 0 "s_register_operand" "") |
1557 | (if_then_else:VDQW | |
2f17f336 | 1558 | (match_operator 3 "comparison_operator" |
0a987353 | 1559 | [(match_operand:VDQW 4 "s_register_operand" "") |
1560 | (match_operand:VDQW 5 "nonmemory_operand" "")]) | |
1561 | (match_operand:VDQW 1 "s_register_operand" "") | |
1562 | (match_operand:VDQW 2 "s_register_operand" "")))] | |
1563 | "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)" | |
1564 | { | |
2f17f336 | 1565 | int inverse = 0; |
53e6ff93 | 1566 | int use_zero_form = 0; |
2f17f336 | 1567 | int swap_bsl_operands = 0; |
1568 | rtx mask = gen_reg_rtx (<V_cmp_result>mode); | |
1569 | rtx tmp = gen_reg_rtx (<V_cmp_result>mode); | |
1570 | ||
25a124b3 | 1571 | rtx (*base_comparison) (rtx, rtx, rtx); |
1572 | rtx (*complimentary_comparison) (rtx, rtx, rtx); | |
2f17f336 | 1573 | |
0a987353 | 1574 | switch (GET_CODE (operands[3])) |
1575 | { | |
1576 | case GE: | |
53e6ff93 | 1577 | case GT: |
2f17f336 | 1578 | case LE: |
53e6ff93 | 1579 | case LT: |
2f17f336 | 1580 | case EQ: |
53e6ff93 | 1581 | if (operands[5] == CONST0_RTX (<MODE>mode)) |
1582 | { | |
1583 | use_zero_form = 1; | |
1584 | break; | |
1585 | } | |
1586 | /* Fall through. */ | |
2f17f336 | 1587 | default: |
1588 | if (!REG_P (operands[5])) | |
1589 | operands[5] = force_reg (<MODE>mode, operands[5]); | |
1590 | } | |
1591 | ||
1592 | switch (GET_CODE (operands[3])) | |
1593 | { | |
1594 | case LT: | |
1595 | case UNLT: | |
1596 | inverse = 1; | |
1597 | /* Fall through. */ | |
1598 | case GE: | |
1599 | case UNGE: | |
1600 | case ORDERED: | |
1601 | case UNORDERED: | |
1602 | base_comparison = gen_neon_vcge<mode>; | |
1603 | complimentary_comparison = gen_neon_vcgt<mode>; | |
1604 | break; | |
1605 | case LE: | |
1606 | case UNLE: | |
1607 | inverse = 1; | |
1608 | /* Fall through. */ | |
0a987353 | 1609 | case GT: |
2f17f336 | 1610 | case UNGT: |
1611 | base_comparison = gen_neon_vcgt<mode>; | |
1612 | complimentary_comparison = gen_neon_vcge<mode>; | |
0a987353 | 1613 | break; |
0a987353 | 1614 | case EQ: |
2f17f336 | 1615 | case NE: |
1616 | case UNEQ: | |
1617 | base_comparison = gen_neon_vceq<mode>; | |
1618 | complimentary_comparison = gen_neon_vceq<mode>; | |
0a987353 | 1619 | break; |
2f17f336 | 1620 | default: |
1621 | gcc_unreachable (); | |
1622 | } | |
1623 | ||
1624 | switch (GET_CODE (operands[3])) | |
1625 | { | |
1626 | case LT: | |
0a987353 | 1627 | case LE: |
2f17f336 | 1628 | case GT: |
1629 | case GE: | |
1630 | case EQ: | |
1631 | /* The easy case. Here we emit one of vcge, vcgt or vceq. | |
1632 | As a LT b <=> b GE a && a LE b <=> b GT a. Our transformations are: | |
1633 | a GE b -> a GE b | |
1634 | a GT b -> a GT b | |
1635 | a LE b -> b GE a | |
1636 | a LT b -> b GT a | |
53e6ff93 | 1637 | a EQ b -> a EQ b |
1638 | Note that there also exist direct comparison against 0 forms, | |
1639 | so catch those as a special case. */ | |
1640 | if (use_zero_form) | |
1641 | { | |
1642 | inverse = 0; | |
1643 | switch (GET_CODE (operands[3])) | |
1644 | { | |
1645 | case LT: | |
1646 | base_comparison = gen_neon_vclt<mode>; | |
1647 | break; | |
1648 | case LE: | |
1649 | base_comparison = gen_neon_vcle<mode>; | |
1650 | break; | |
1651 | default: | |
1652 | /* Do nothing, other zero form cases already have the correct | |
1653 | base_comparison. */ | |
1654 | break; | |
1655 | } | |
1656 | } | |
2f17f336 | 1657 | |
1658 | if (!inverse) | |
25a124b3 | 1659 | emit_insn (base_comparison (mask, operands[4], operands[5])); |
0a987353 | 1660 | else |
25a124b3 | 1661 | emit_insn (complimentary_comparison (mask, operands[5], operands[4])); |
0a987353 | 1662 | break; |
2f17f336 | 1663 | case UNLT: |
1664 | case UNLE: | |
1665 | case UNGT: | |
1666 | case UNGE: | |
1667 | case NE: | |
1668 | /* Vector compare returns false for lanes which are unordered, so if we use | |
1669 | the inverse of the comparison we actually want to emit, then | |
1670 | swap the operands to BSL, we will end up with the correct result. | |
1671 | Note that a NE NaN and NaN NE b are true for all a, b. | |
1672 | ||
1673 | Our transformations are: | |
1674 | a GE b -> !(b GT a) | |
1675 | a GT b -> !(b GE a) | |
1676 | a LE b -> !(a GT b) | |
1677 | a LT b -> !(a GE b) | |
1678 | a NE b -> !(a EQ b) */ | |
1679 | ||
1680 | if (inverse) | |
25a124b3 | 1681 | emit_insn (base_comparison (mask, operands[4], operands[5])); |
0a987353 | 1682 | else |
25a124b3 | 1683 | emit_insn (complimentary_comparison (mask, operands[5], operands[4])); |
2f17f336 | 1684 | |
1685 | swap_bsl_operands = 1; | |
0a987353 | 1686 | break; |
2f17f336 | 1687 | case UNEQ: |
1688 | /* We check (a > b || b > a). combining these comparisons give us | |
1689 | true iff !(a != b && a ORDERED b), swapping the operands to BSL | |
1690 | will then give us (a == b || a UNORDERED b) as intended. */ | |
1691 | ||
25a124b3 | 1692 | emit_insn (gen_neon_vcgt<mode> (mask, operands[4], operands[5])); |
1693 | emit_insn (gen_neon_vcgt<mode> (tmp, operands[5], operands[4])); | |
2f17f336 | 1694 | emit_insn (gen_ior<v_cmp_result>3 (mask, mask, tmp)); |
1695 | swap_bsl_operands = 1; | |
1696 | break; | |
1697 | case UNORDERED: | |
1698 | /* Operands are ORDERED iff (a > b || b >= a). | |
1699 | Swapping the operands to BSL will give the UNORDERED case. */ | |
1700 | swap_bsl_operands = 1; | |
1701 | /* Fall through. */ | |
1702 | case ORDERED: | |
25a124b3 | 1703 | emit_insn (gen_neon_vcgt<mode> (tmp, operands[4], operands[5])); |
1704 | emit_insn (gen_neon_vcge<mode> (mask, operands[5], operands[4])); | |
2f17f336 | 1705 | emit_insn (gen_ior<v_cmp_result>3 (mask, mask, tmp)); |
0a987353 | 1706 | break; |
0a987353 | 1707 | default: |
1708 | gcc_unreachable (); | |
1709 | } | |
2f17f336 | 1710 | |
1711 | if (swap_bsl_operands) | |
0a987353 | 1712 | emit_insn (gen_neon_vbsl<mode> (operands[0], mask, operands[2], |
1713 | operands[1])); | |
1714 | else | |
1715 | emit_insn (gen_neon_vbsl<mode> (operands[0], mask, operands[1], | |
1716 | operands[2])); | |
0a987353 | 1717 | DONE; |
1718 | }) | |
1719 | ||
d6b19f6b | 1720 | (define_expand "vcondu<mode><mode>" |
0a987353 | 1721 | [(set (match_operand:VDQIW 0 "s_register_operand" "") |
1722 | (if_then_else:VDQIW | |
1723 | (match_operator 3 "arm_comparison_operator" | |
1724 | [(match_operand:VDQIW 4 "s_register_operand" "") | |
1725 | (match_operand:VDQIW 5 "s_register_operand" "")]) | |
1726 | (match_operand:VDQIW 1 "s_register_operand" "") | |
1727 | (match_operand:VDQIW 2 "s_register_operand" "")))] | |
1728 | "TARGET_NEON" | |
1729 | { | |
1730 | rtx mask; | |
1731 | int inverse = 0, immediate_zero = 0; | |
1732 | ||
1733 | mask = gen_reg_rtx (<V_cmp_result>mode); | |
1734 | ||
1735 | if (operands[5] == CONST0_RTX (<MODE>mode)) | |
1736 | immediate_zero = 1; | |
1737 | else if (!REG_P (operands[5])) | |
1738 | operands[5] = force_reg (<MODE>mode, operands[5]); | |
1739 | ||
1740 | switch (GET_CODE (operands[3])) | |
1741 | { | |
1742 | case GEU: | |
25a124b3 | 1743 | emit_insn (gen_neon_vcgeu<mode> (mask, operands[4], operands[5])); |
0a987353 | 1744 | break; |
1745 | ||
1746 | case GTU: | |
25a124b3 | 1747 | emit_insn (gen_neon_vcgtu<mode> (mask, operands[4], operands[5])); |
0a987353 | 1748 | break; |
1749 | ||
1750 | case EQ: | |
25a124b3 | 1751 | emit_insn (gen_neon_vceq<mode> (mask, operands[4], operands[5])); |
0a987353 | 1752 | break; |
1753 | ||
1754 | case LEU: | |
1755 | if (immediate_zero) | |
25a124b3 | 1756 | emit_insn (gen_neon_vcle<mode> (mask, operands[4], operands[5])); |
0a987353 | 1757 | else |
25a124b3 | 1758 | emit_insn (gen_neon_vcgeu<mode> (mask, operands[5], operands[4])); |
0a987353 | 1759 | break; |
1760 | ||
1761 | case LTU: | |
1762 | if (immediate_zero) | |
25a124b3 | 1763 | emit_insn (gen_neon_vclt<mode> (mask, operands[4], operands[5])); |
0a987353 | 1764 | else |
25a124b3 | 1765 | emit_insn (gen_neon_vcgtu<mode> (mask, operands[5], operands[4])); |
0a987353 | 1766 | break; |
1767 | ||
1768 | case NE: | |
25a124b3 | 1769 | emit_insn (gen_neon_vceq<mode> (mask, operands[4], operands[5])); |
0a987353 | 1770 | inverse = 1; |
1771 | break; | |
1772 | ||
1773 | default: | |
1774 | gcc_unreachable (); | |
1775 | } | |
1776 | ||
1777 | if (inverse) | |
1778 | emit_insn (gen_neon_vbsl<mode> (operands[0], mask, operands[2], | |
1779 | operands[1])); | |
1780 | else | |
1781 | emit_insn (gen_neon_vbsl<mode> (operands[0], mask, operands[1], | |
1782 | operands[2])); | |
1783 | ||
1784 | DONE; | |
1785 | }) | |
1786 | ||
d98a3884 | 1787 | ;; Patterns for builtins. |
1788 | ||
1789 | ; good for plain vadd, vaddq. | |
1790 | ||
94829feb | 1791 | (define_expand "neon_vadd<mode>" |
b237f2f6 | 1792 | [(match_operand:VCVTF 0 "s_register_operand" "=w") |
1793 | (match_operand:VCVTF 1 "s_register_operand" "w") | |
25a124b3 | 1794 | (match_operand:VCVTF 2 "s_register_operand" "w")] |
94829feb | 1795 | "TARGET_NEON" |
1796 | { | |
1797 | if (!<Is_float_mode> || flag_unsafe_math_optimizations) | |
1798 | emit_insn (gen_add<mode>3 (operands[0], operands[1], operands[2])); | |
1799 | else | |
1800 | emit_insn (gen_neon_vadd<mode>_unspec (operands[0], operands[1], | |
1801 | operands[2])); | |
1802 | DONE; | |
1803 | }) | |
1804 | ||
1805 | ; Note that NEON operations don't support the full IEEE 754 standard: in | |
1806 | ; particular, denormal values are flushed to zero. This means that GCC cannot | |
1807 | ; use those instructions for autovectorization, etc. unless | |
1808 | ; -funsafe-math-optimizations is in effect (in which case flush-to-zero | |
1809 | ; behaviour is permissible). Intrinsic operations (provided by the arm_neon.h | |
1810 | ; header) must work in either case: if -funsafe-math-optimizations is given, | |
1811 | ; intrinsics expand to "canonical" RTL where possible, otherwise intrinsics | |
1812 | ; expand to unspecs (which may potentially limit the extent to which they might | |
1813 | ; be optimized by generic code). | |
1814 | ||
1815 | ; Used for intrinsics when flag_unsafe_math_optimizations is false. | |
1816 | ||
1817 | (define_insn "neon_vadd<mode>_unspec" | |
b237f2f6 | 1818 | [(set (match_operand:VCVTF 0 "s_register_operand" "=w") |
1819 | (unspec:VCVTF [(match_operand:VCVTF 1 "s_register_operand" "w") | |
1820 | (match_operand:VCVTF 2 "s_register_operand" "w")] | |
d98a3884 | 1821 | UNSPEC_VADD))] |
1822 | "TARGET_NEON" | |
bcaec148 | 1823 | "vadd.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2" |
52432540 | 1824 | [(set (attr "type") |
0bf497f5 | 1825 | (if_then_else (match_test "<Is_float_mode>") |
32093010 | 1826 | (const_string "neon_fp_addsub_s<q>") |
1827 | (const_string "neon_add<q>")))] | |
bcaec148 | 1828 | ) |
d98a3884 | 1829 | |
25a124b3 | 1830 | (define_insn "neon_vaddl<sup><mode>" |
d98a3884 | 1831 | [(set (match_operand:<V_widen> 0 "s_register_operand" "=w") |
1832 | (unspec:<V_widen> [(match_operand:VDI 1 "s_register_operand" "w") | |
25a124b3 | 1833 | (match_operand:VDI 2 "s_register_operand" "w")] |
1834 | VADDL))] | |
d98a3884 | 1835 | "TARGET_NEON" |
25a124b3 | 1836 | "vaddl.<sup>%#<V_sz_elem>\t%q0, %P1, %P2" |
32093010 | 1837 | [(set_attr "type" "neon_add_long")] |
bcaec148 | 1838 | ) |
d98a3884 | 1839 | |
25a124b3 | 1840 | (define_insn "neon_vaddw<sup><mode>" |
d98a3884 | 1841 | [(set (match_operand:<V_widen> 0 "s_register_operand" "=w") |
1842 | (unspec:<V_widen> [(match_operand:<V_widen> 1 "s_register_operand" "w") | |
25a124b3 | 1843 | (match_operand:VDI 2 "s_register_operand" "w")] |
1844 | VADDW))] | |
d98a3884 | 1845 | "TARGET_NEON" |
25a124b3 | 1846 | "vaddw.<sup>%#<V_sz_elem>\t%q0, %q1, %P2" |
32093010 | 1847 | [(set_attr "type" "neon_add_widen")] |
bcaec148 | 1848 | ) |
d98a3884 | 1849 | |
1850 | ; vhadd and vrhadd. | |
1851 | ||
25a124b3 | 1852 | (define_insn "neon_v<r>hadd<sup><mode>" |
d98a3884 | 1853 | [(set (match_operand:VDQIW 0 "s_register_operand" "=w") |
1854 | (unspec:VDQIW [(match_operand:VDQIW 1 "s_register_operand" "w") | |
25a124b3 | 1855 | (match_operand:VDQIW 2 "s_register_operand" "w")] |
1856 | VHADD))] | |
d98a3884 | 1857 | "TARGET_NEON" |
25a124b3 | 1858 | "v<r>hadd.<sup>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2" |
32093010 | 1859 | [(set_attr "type" "neon_add_halve_q")] |
bcaec148 | 1860 | ) |
d98a3884 | 1861 | |
25a124b3 | 1862 | (define_insn "neon_vqadd<sup><mode>" |
d98a3884 | 1863 | [(set (match_operand:VDQIX 0 "s_register_operand" "=w") |
1864 | (unspec:VDQIX [(match_operand:VDQIX 1 "s_register_operand" "w") | |
25a124b3 | 1865 | (match_operand:VDQIX 2 "s_register_operand" "w")] |
1866 | VQADD))] | |
d98a3884 | 1867 | "TARGET_NEON" |
25a124b3 | 1868 | "vqadd.<sup>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2" |
32093010 | 1869 | [(set_attr "type" "neon_qadd<q>")] |
bcaec148 | 1870 | ) |
d98a3884 | 1871 | |
25a124b3 | 1872 | (define_insn "neon_v<r>addhn<mode>" |
d98a3884 | 1873 | [(set (match_operand:<V_narrow> 0 "s_register_operand" "=w") |
1874 | (unspec:<V_narrow> [(match_operand:VN 1 "s_register_operand" "w") | |
25a124b3 | 1875 | (match_operand:VN 2 "s_register_operand" "w")] |
1876 | VADDHN))] | |
d98a3884 | 1877 | "TARGET_NEON" |
25a124b3 | 1878 | "v<r>addhn.<V_if_elem>\t%P0, %q1, %q2" |
32093010 | 1879 | [(set_attr "type" "neon_add_halve_narrow_q")] |
bcaec148 | 1880 | ) |
d98a3884 | 1881 | |
25a124b3 | 1882 | ;; Polynomial and Float multiplication. |
1883 | (define_insn "neon_vmul<pf><mode>" | |
1884 | [(set (match_operand:VPF 0 "s_register_operand" "=w") | |
1885 | (unspec:VPF [(match_operand:VPF 1 "s_register_operand" "w") | |
1886 | (match_operand:VPF 2 "s_register_operand" "w")] | |
d98a3884 | 1887 | UNSPEC_VMUL))] |
1888 | "TARGET_NEON" | |
25a124b3 | 1889 | "vmul.<pf>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2" |
52432540 | 1890 | [(set (attr "type") |
0bf497f5 | 1891 | (if_then_else (match_test "<Is_float_mode>") |
32093010 | 1892 | (const_string "neon_fp_mul_s<q>") |
1893 | (const_string "neon_mul_<V_elem_ch><q>")))] | |
bcaec148 | 1894 | ) |
d98a3884 | 1895 | |
94829feb | 1896 | (define_expand "neon_vmla<mode>" |
1897 | [(match_operand:VDQW 0 "s_register_operand" "=w") | |
1898 | (match_operand:VDQW 1 "s_register_operand" "0") | |
1899 | (match_operand:VDQW 2 "s_register_operand" "w") | |
25a124b3 | 1900 | (match_operand:VDQW 3 "s_register_operand" "w")] |
94829feb | 1901 | "TARGET_NEON" |
1902 | { | |
1903 | if (!<Is_float_mode> || flag_unsafe_math_optimizations) | |
1904 | emit_insn (gen_mul<mode>3add<mode>_neon (operands[0], operands[1], | |
1905 | operands[2], operands[3])); | |
1906 | else | |
1907 | emit_insn (gen_neon_vmla<mode>_unspec (operands[0], operands[1], | |
1908 | operands[2], operands[3])); | |
1909 | DONE; | |
1910 | }) | |
1911 | ||
9abf7cc0 | 1912 | (define_expand "neon_vfma<VCVTF:mode>" |
1913 | [(match_operand:VCVTF 0 "s_register_operand") | |
1914 | (match_operand:VCVTF 1 "s_register_operand") | |
1915 | (match_operand:VCVTF 2 "s_register_operand") | |
25a124b3 | 1916 | (match_operand:VCVTF 3 "s_register_operand")] |
9abf7cc0 | 1917 | "TARGET_NEON && TARGET_FMA" |
1918 | { | |
1919 | emit_insn (gen_fma<mode>4_intrinsic (operands[0], operands[2], operands[3], | |
1920 | operands[1])); | |
1921 | DONE; | |
1922 | }) | |
1923 | ||
1924 | (define_expand "neon_vfms<VCVTF:mode>" | |
1925 | [(match_operand:VCVTF 0 "s_register_operand") | |
1926 | (match_operand:VCVTF 1 "s_register_operand") | |
1927 | (match_operand:VCVTF 2 "s_register_operand") | |
25a124b3 | 1928 | (match_operand:VCVTF 3 "s_register_operand")] |
9abf7cc0 | 1929 | "TARGET_NEON && TARGET_FMA" |
1930 | { | |
1931 | emit_insn (gen_fmsub<mode>4_intrinsic (operands[0], operands[2], operands[3], | |
1932 | operands[1])); | |
1933 | DONE; | |
1934 | }) | |
1935 | ||
94829feb | 1936 | ; Used for intrinsics when flag_unsafe_math_optimizations is false. |
1937 | ||
1938 | (define_insn "neon_vmla<mode>_unspec" | |
32093010 | 1939 | [(set (match_operand:VDQW 0 "s_register_operand" "=w") |
1940 | (unspec:VDQW [(match_operand:VDQW 1 "s_register_operand" "0") | |
1941 | (match_operand:VDQW 2 "s_register_operand" "w") | |
1942 | (match_operand:VDQW 3 "s_register_operand" "w")] | |
94829feb | 1943 | UNSPEC_VMLA))] |
d98a3884 | 1944 | "TARGET_NEON" |
bcaec148 | 1945 | "vmla.<V_if_elem>\t%<V_reg>0, %<V_reg>2, %<V_reg>3" |
52432540 | 1946 | [(set (attr "type") |
0bf497f5 | 1947 | (if_then_else (match_test "<Is_float_mode>") |
32093010 | 1948 | (const_string "neon_fp_mla_s<q>") |
1949 | (const_string "neon_mla_<V_elem_ch><q>")))] | |
bcaec148 | 1950 | ) |
d98a3884 | 1951 | |
25a124b3 | 1952 | (define_insn "neon_vmlal<sup><mode>" |
d98a3884 | 1953 | [(set (match_operand:<V_widen> 0 "s_register_operand" "=w") |
1954 | (unspec:<V_widen> [(match_operand:<V_widen> 1 "s_register_operand" "0") | |
1955 | (match_operand:VW 2 "s_register_operand" "w") | |
25a124b3 | 1956 | (match_operand:VW 3 "s_register_operand" "w")] |
1957 | VMLAL))] | |
d98a3884 | 1958 | "TARGET_NEON" |
25a124b3 | 1959 | "vmlal.<sup>%#<V_sz_elem>\t%q0, %P2, %P3" |
32093010 | 1960 | [(set_attr "type" "neon_mla_<V_elem_ch>_long")] |
bcaec148 | 1961 | ) |
d98a3884 | 1962 | |
94829feb | 1963 | (define_expand "neon_vmls<mode>" |
1964 | [(match_operand:VDQW 0 "s_register_operand" "=w") | |
1965 | (match_operand:VDQW 1 "s_register_operand" "0") | |
1966 | (match_operand:VDQW 2 "s_register_operand" "w") | |
25a124b3 | 1967 | (match_operand:VDQW 3 "s_register_operand" "w")] |
94829feb | 1968 | "TARGET_NEON" |
1969 | { | |
1970 | if (!<Is_float_mode> || flag_unsafe_math_optimizations) | |
1971 | emit_insn (gen_mul<mode>3neg<mode>add<mode>_neon (operands[0], | |
1972 | operands[1], operands[2], operands[3])); | |
1973 | else | |
1974 | emit_insn (gen_neon_vmls<mode>_unspec (operands[0], operands[1], | |
1975 | operands[2], operands[3])); | |
1976 | DONE; | |
1977 | }) | |
1978 | ||
1979 | ; Used for intrinsics when flag_unsafe_math_optimizations is false. | |
1980 | ||
1981 | (define_insn "neon_vmls<mode>_unspec" | |
32093010 | 1982 | [(set (match_operand:VDQW 0 "s_register_operand" "=w") |
1983 | (unspec:VDQW [(match_operand:VDQW 1 "s_register_operand" "0") | |
1984 | (match_operand:VDQW 2 "s_register_operand" "w") | |
1985 | (match_operand:VDQW 3 "s_register_operand" "w")] | |
94829feb | 1986 | UNSPEC_VMLS))] |
d98a3884 | 1987 | "TARGET_NEON" |
bcaec148 | 1988 | "vmls.<V_if_elem>\t%<V_reg>0, %<V_reg>2, %<V_reg>3" |
52432540 | 1989 | [(set (attr "type") |
0bf497f5 | 1990 | (if_then_else (match_test "<Is_float_mode>") |
32093010 | 1991 | (const_string "neon_fp_mla_s<q>") |
1992 | (const_string "neon_mla_<V_elem_ch><q>")))] | |
bcaec148 | 1993 | ) |
d98a3884 | 1994 | |
25a124b3 | 1995 | (define_insn "neon_vmlsl<sup><mode>" |
d98a3884 | 1996 | [(set (match_operand:<V_widen> 0 "s_register_operand" "=w") |
1997 | (unspec:<V_widen> [(match_operand:<V_widen> 1 "s_register_operand" "0") | |
1998 | (match_operand:VW 2 "s_register_operand" "w") | |
25a124b3 | 1999 | (match_operand:VW 3 "s_register_operand" "w")] |
2000 | VMLSL))] | |
d98a3884 | 2001 | "TARGET_NEON" |
25a124b3 | 2002 | "vmlsl.<sup>%#<V_sz_elem>\t%q0, %P2, %P3" |
32093010 | 2003 | [(set_attr "type" "neon_mla_<V_elem_ch>_long")] |
bcaec148 | 2004 | ) |
d98a3884 | 2005 | |
25a124b3 | 2006 | ;; vqdmulh, vqrdmulh |
2007 | (define_insn "neon_vq<r>dmulh<mode>" | |
d98a3884 | 2008 | [(set (match_operand:VMDQI 0 "s_register_operand" "=w") |
2009 | (unspec:VMDQI [(match_operand:VMDQI 1 "s_register_operand" "w") | |
25a124b3 | 2010 | (match_operand:VMDQI 2 "s_register_operand" "w")] |
2011 | VQDMULH))] | |
d98a3884 | 2012 | "TARGET_NEON" |
25a124b3 | 2013 | "vq<r>dmulh.<V_s_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2" |
32093010 | 2014 | [(set_attr "type" "neon_sat_mul_<V_elem_ch><q>")] |
bcaec148 | 2015 | ) |
d98a3884 | 2016 | |
32833c04 | 2017 | ;; vqrdmlah, vqrdmlsh |
2018 | (define_insn "neon_vqrdml<VQRDMLH_AS:neon_rdma_as>h<mode>" | |
2019 | [(set (match_operand:VMDQI 0 "s_register_operand" "=w") | |
2020 | (unspec:VMDQI [(match_operand:VMDQI 1 "s_register_operand" "0") | |
2021 | (match_operand:VMDQI 2 "s_register_operand" "w") | |
2022 | (match_operand:VMDQI 3 "s_register_operand" "w")] | |
2023 | VQRDMLH_AS))] | |
2024 | "TARGET_NEON_RDMA" | |
2025 | "vqrdml<VQRDMLH_AS:neon_rdma_as>h.<V_s_elem>\t%<V_reg>0, %<V_reg>2, %<V_reg>3" | |
2026 | [(set_attr "type" "neon_sat_mla_<V_elem_ch>_long")] | |
2027 | ) | |
2028 | ||
d98a3884 | 2029 | (define_insn "neon_vqdmlal<mode>" |
2030 | [(set (match_operand:<V_widen> 0 "s_register_operand" "=w") | |
2031 | (unspec:<V_widen> [(match_operand:<V_widen> 1 "s_register_operand" "0") | |
2032 | (match_operand:VMDI 2 "s_register_operand" "w") | |
25a124b3 | 2033 | (match_operand:VMDI 3 "s_register_operand" "w")] |
d98a3884 | 2034 | UNSPEC_VQDMLAL))] |
2035 | "TARGET_NEON" | |
bcaec148 | 2036 | "vqdmlal.<V_s_elem>\t%q0, %P2, %P3" |
32093010 | 2037 | [(set_attr "type" "neon_sat_mla_<V_elem_ch>_long")] |
bcaec148 | 2038 | ) |
d98a3884 | 2039 | |
2040 | (define_insn "neon_vqdmlsl<mode>" | |
2041 | [(set (match_operand:<V_widen> 0 "s_register_operand" "=w") | |
2042 | (unspec:<V_widen> [(match_operand:<V_widen> 1 "s_register_operand" "0") | |
2043 | (match_operand:VMDI 2 "s_register_operand" "w") | |
25a124b3 | 2044 | (match_operand:VMDI 3 "s_register_operand" "w")] |
d98a3884 | 2045 | UNSPEC_VQDMLSL))] |
2046 | "TARGET_NEON" | |
bcaec148 | 2047 | "vqdmlsl.<V_s_elem>\t%q0, %P2, %P3" |
32093010 | 2048 | [(set_attr "type" "neon_sat_mla_<V_elem_ch>_long")] |
bcaec148 | 2049 | ) |
d98a3884 | 2050 | |
25a124b3 | 2051 | (define_insn "neon_vmull<sup><mode>" |
d98a3884 | 2052 | [(set (match_operand:<V_widen> 0 "s_register_operand" "=w") |
2053 | (unspec:<V_widen> [(match_operand:VW 1 "s_register_operand" "w") | |
25a124b3 | 2054 | (match_operand:VW 2 "s_register_operand" "w")] |
2055 | VMULL))] | |
d98a3884 | 2056 | "TARGET_NEON" |
25a124b3 | 2057 | "vmull.<sup>%#<V_sz_elem>\t%q0, %P1, %P2" |
32093010 | 2058 | [(set_attr "type" "neon_mul_<V_elem_ch>_long")] |
bcaec148 | 2059 | ) |
d98a3884 | 2060 | |
2061 | (define_insn "neon_vqdmull<mode>" | |
2062 | [(set (match_operand:<V_widen> 0 "s_register_operand" "=w") | |
2063 | (unspec:<V_widen> [(match_operand:VMDI 1 "s_register_operand" "w") | |
25a124b3 | 2064 | (match_operand:VMDI 2 "s_register_operand" "w")] |
d98a3884 | 2065 | UNSPEC_VQDMULL))] |
2066 | "TARGET_NEON" | |
bcaec148 | 2067 | "vqdmull.<V_s_elem>\t%q0, %P1, %P2" |
32093010 | 2068 | [(set_attr "type" "neon_sat_mul_<V_elem_ch>_long")] |
bcaec148 | 2069 | ) |
d98a3884 | 2070 | |
94829feb | 2071 | (define_expand "neon_vsub<mode>" |
b237f2f6 | 2072 | [(match_operand:VCVTF 0 "s_register_operand" "=w") |
2073 | (match_operand:VCVTF 1 "s_register_operand" "w") | |
25a124b3 | 2074 | (match_operand:VCVTF 2 "s_register_operand" "w")] |
94829feb | 2075 | "TARGET_NEON" |
2076 | { | |
2077 | if (!<Is_float_mode> || flag_unsafe_math_optimizations) | |
2078 | emit_insn (gen_sub<mode>3 (operands[0], operands[1], operands[2])); | |
2079 | else | |
2080 | emit_insn (gen_neon_vsub<mode>_unspec (operands[0], operands[1], | |
2081 | operands[2])); | |
2082 | DONE; | |
2083 | }) | |
2084 | ||
2085 | ; Used for intrinsics when flag_unsafe_math_optimizations is false. | |
2086 | ||
2087 | (define_insn "neon_vsub<mode>_unspec" | |
b237f2f6 | 2088 | [(set (match_operand:VCVTF 0 "s_register_operand" "=w") |
2089 | (unspec:VCVTF [(match_operand:VCVTF 1 "s_register_operand" "w") | |
2090 | (match_operand:VCVTF 2 "s_register_operand" "w")] | |
d98a3884 | 2091 | UNSPEC_VSUB))] |
2092 | "TARGET_NEON" | |
bcaec148 | 2093 | "vsub.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2" |
52432540 | 2094 | [(set (attr "type") |
0bf497f5 | 2095 | (if_then_else (match_test "<Is_float_mode>") |
32093010 | 2096 | (const_string "neon_fp_addsub_s<q>") |
2097 | (const_string "neon_sub<q>")))] | |
bcaec148 | 2098 | ) |
d98a3884 | 2099 | |
25a124b3 | 2100 | (define_insn "neon_vsubl<sup><mode>" |
d98a3884 | 2101 | [(set (match_operand:<V_widen> 0 "s_register_operand" "=w") |
2102 | (unspec:<V_widen> [(match_operand:VDI 1 "s_register_operand" "w") | |
25a124b3 | 2103 | (match_operand:VDI 2 "s_register_operand" "w")] |
2104 | VSUBL))] | |
d98a3884 | 2105 | "TARGET_NEON" |
25a124b3 | 2106 | "vsubl.<sup>%#<V_sz_elem>\t%q0, %P1, %P2" |
32093010 | 2107 | [(set_attr "type" "neon_sub_long")] |
bcaec148 | 2108 | ) |
d98a3884 | 2109 | |
25a124b3 | 2110 | (define_insn "neon_vsubw<sup><mode>" |
d98a3884 | 2111 | [(set (match_operand:<V_widen> 0 "s_register_operand" "=w") |
2112 | (unspec:<V_widen> [(match_operand:<V_widen> 1 "s_register_operand" "w") | |
25a124b3 | 2113 | (match_operand:VDI 2 "s_register_operand" "w")] |
2114 | VSUBW))] | |
d98a3884 | 2115 | "TARGET_NEON" |
25a124b3 | 2116 | "vsubw.<sup>%#<V_sz_elem>\t%q0, %q1, %P2" |
32093010 | 2117 | [(set_attr "type" "neon_sub_widen")] |
bcaec148 | 2118 | ) |
d98a3884 | 2119 | |
25a124b3 | 2120 | (define_insn "neon_vqsub<sup><mode>" |
d98a3884 | 2121 | [(set (match_operand:VDQIX 0 "s_register_operand" "=w") |
2122 | (unspec:VDQIX [(match_operand:VDQIX 1 "s_register_operand" "w") | |
25a124b3 | 2123 | (match_operand:VDQIX 2 "s_register_operand" "w")] |
2124 | VQSUB))] | |
d98a3884 | 2125 | "TARGET_NEON" |
25a124b3 | 2126 | "vqsub.<sup>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2" |
32093010 | 2127 | [(set_attr "type" "neon_qsub<q>")] |
bcaec148 | 2128 | ) |
d98a3884 | 2129 | |
25a124b3 | 2130 | (define_insn "neon_vhsub<sup><mode>" |
d98a3884 | 2131 | [(set (match_operand:VDQIW 0 "s_register_operand" "=w") |
2132 | (unspec:VDQIW [(match_operand:VDQIW 1 "s_register_operand" "w") | |
25a124b3 | 2133 | (match_operand:VDQIW 2 "s_register_operand" "w")] |
2134 | VHSUB))] | |
d98a3884 | 2135 | "TARGET_NEON" |
25a124b3 | 2136 | "vhsub.<sup>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2" |
32093010 | 2137 | [(set_attr "type" "neon_sub_halve<q>")] |
bcaec148 | 2138 | ) |
d98a3884 | 2139 | |
25a124b3 | 2140 | (define_insn "neon_v<r>subhn<mode>" |
d98a3884 | 2141 | [(set (match_operand:<V_narrow> 0 "s_register_operand" "=w") |
2142 | (unspec:<V_narrow> [(match_operand:VN 1 "s_register_operand" "w") | |
25a124b3 | 2143 | (match_operand:VN 2 "s_register_operand" "w")] |
2144 | VSUBHN))] | |
d98a3884 | 2145 | "TARGET_NEON" |
25a124b3 | 2146 | "v<r>subhn.<V_if_elem>\t%P0, %q1, %q2" |
32093010 | 2147 | [(set_attr "type" "neon_sub_halve_narrow_q")] |
bcaec148 | 2148 | ) |
d98a3884 | 2149 | |
996c516f | 2150 | ;; These may expand to an UNSPEC pattern when a floating point mode is used |
2151 | ;; without unsafe math optimizations. | |
2152 | (define_expand "neon_vc<cmp_op><mode>" | |
2153 | [(match_operand:<V_cmp_result> 0 "s_register_operand" "=w,w") | |
2154 | (neg:<V_cmp_result> | |
2155 | (COMPARISONS:VDQW (match_operand:VDQW 1 "s_register_operand" "w,w") | |
2156 | (match_operand:VDQW 2 "reg_or_zero_operand" "w,Dz")))] | |
d98a3884 | 2157 | "TARGET_NEON" |
996c516f | 2158 | { |
2159 | /* For FP comparisons use UNSPECS unless -funsafe-math-optimizations | |
2160 | are enabled. */ | |
2161 | if (GET_MODE_CLASS (<MODE>mode) == MODE_VECTOR_FLOAT | |
2162 | && !flag_unsafe_math_optimizations) | |
2163 | { | |
2164 | /* We don't just emit a gen_neon_vc<cmp_op><mode>_insn_unspec because | |
2165 | we define gen_neon_vceq<mode>_insn_unspec only for float modes | |
2166 | whereas this expander iterates over the integer modes as well, | |
2167 | but we will never expand to UNSPECs for the integer comparisons. */ | |
2168 | switch (<MODE>mode) | |
2169 | { | |
2170 | case V2SFmode: | |
2171 | emit_insn (gen_neon_vc<cmp_op>v2sf_insn_unspec (operands[0], | |
2172 | operands[1], | |
2173 | operands[2])); | |
2174 | break; | |
2175 | case V4SFmode: | |
2176 | emit_insn (gen_neon_vc<cmp_op>v4sf_insn_unspec (operands[0], | |
2177 | operands[1], | |
2178 | operands[2])); | |
2179 | break; | |
2180 | default: | |
2181 | gcc_unreachable (); | |
2182 | } | |
2183 | } | |
2184 | else | |
2185 | emit_insn (gen_neon_vc<cmp_op><mode>_insn (operands[0], | |
2186 | operands[1], | |
2187 | operands[2])); | |
2188 | DONE; | |
2189 | } | |
bcaec148 | 2190 | ) |
d98a3884 | 2191 | |
996c516f | 2192 | (define_insn "neon_vc<cmp_op><mode>_insn" |
0a987353 | 2193 | [(set (match_operand:<V_cmp_result> 0 "s_register_operand" "=w,w") |
996c516f | 2194 | (neg:<V_cmp_result> |
2195 | (COMPARISONS:<V_cmp_result> | |
2196 | (match_operand:VDQW 1 "s_register_operand" "w,w") | |
2197 | (match_operand:VDQW 2 "reg_or_zero_operand" "w,Dz"))))] | |
2198 | "TARGET_NEON && !(GET_MODE_CLASS (<MODE>mode) == MODE_VECTOR_FLOAT | |
2199 | && !flag_unsafe_math_optimizations)" | |
2200 | { | |
2201 | char pattern[100]; | |
2202 | sprintf (pattern, "vc<cmp_op>.%s%%#<V_sz_elem>\t%%<V_reg>0," | |
2203 | " %%<V_reg>1, %s", | |
2204 | GET_MODE_CLASS (<MODE>mode) == MODE_VECTOR_FLOAT | |
2205 | ? "f" : "<cmp_type>", | |
2206 | which_alternative == 0 | |
2207 | ? "%<V_reg>2" : "#0"); | |
2208 | output_asm_insn (pattern, operands); | |
2209 | return ""; | |
2210 | } | |
52432540 | 2211 | [(set (attr "type") |
996c516f | 2212 | (if_then_else (match_operand 2 "zero_operand") |
32093010 | 2213 | (const_string "neon_compare_zero<q>") |
996c516f | 2214 | (const_string "neon_compare<q>")))] |
bcaec148 | 2215 | ) |
d98a3884 | 2216 | |
996c516f | 2217 | (define_insn "neon_vc<cmp_op_unsp><mode>_insn_unspec" |
0a987353 | 2218 | [(set (match_operand:<V_cmp_result> 0 "s_register_operand" "=w,w") |
2219 | (unspec:<V_cmp_result> | |
996c516f | 2220 | [(match_operand:VCVTF 1 "s_register_operand" "w,w") |
2221 | (match_operand:VCVTF 2 "reg_or_zero_operand" "w,Dz")] | |
2222 | NEON_VCMP))] | |
d98a3884 | 2223 | "TARGET_NEON" |
996c516f | 2224 | { |
2225 | char pattern[100]; | |
2226 | sprintf (pattern, "vc<cmp_op_unsp>.f%%#<V_sz_elem>\t%%<V_reg>0," | |
2227 | " %%<V_reg>1, %s", | |
2228 | which_alternative == 0 | |
2229 | ? "%<V_reg>2" : "#0"); | |
2230 | output_asm_insn (pattern, operands); | |
2231 | return ""; | |
2232 | } | |
2233 | [(set_attr "type" "neon_fp_compare_s<q>")] | |
bcaec148 | 2234 | ) |
d98a3884 | 2235 | |
996c516f | 2236 | (define_insn "neon_vc<cmp_op>u<mode>" |
ca6c837f | 2237 | [(set (match_operand:<V_cmp_result> 0 "s_register_operand" "=w") |
996c516f | 2238 | (neg:<V_cmp_result> |
2239 | (GTUGEU:<V_cmp_result> | |
2240 | (match_operand:VDQIW 1 "s_register_operand" "w") | |
2241 | (match_operand:VDQIW 2 "s_register_operand" "w"))))] | |
ca6c837f | 2242 | "TARGET_NEON" |
996c516f | 2243 | "vc<cmp_op>.u%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2" |
32093010 | 2244 | [(set_attr "type" "neon_compare<q>")] |
ca6c837f | 2245 | ) |
2246 | ||
996c516f | 2247 | (define_expand "neon_vca<cmp_op><mode>" |
2248 | [(set (match_operand:<V_cmp_result> 0 "s_register_operand") | |
2249 | (neg:<V_cmp_result> | |
2250 | (GTGE:<V_cmp_result> | |
2251 | (abs:VCVTF (match_operand:VCVTF 1 "s_register_operand")) | |
2252 | (abs:VCVTF (match_operand:VCVTF 2 "s_register_operand")))))] | |
0a987353 | 2253 | "TARGET_NEON" |
996c516f | 2254 | { |
2255 | if (flag_unsafe_math_optimizations) | |
2256 | emit_insn (gen_neon_vca<cmp_op><mode>_insn (operands[0], operands[1], | |
2257 | operands[2])); | |
2258 | else | |
2259 | emit_insn (gen_neon_vca<cmp_op><mode>_insn_unspec (operands[0], | |
2260 | operands[1], | |
2261 | operands[2])); | |
2262 | DONE; | |
2263 | } | |
0a987353 | 2264 | ) |
2265 | ||
996c516f | 2266 | (define_insn "neon_vca<cmp_op><mode>_insn" |
d98a3884 | 2267 | [(set (match_operand:<V_cmp_result> 0 "s_register_operand" "=w") |
996c516f | 2268 | (neg:<V_cmp_result> |
2269 | (GTGE:<V_cmp_result> | |
2270 | (abs:VCVTF (match_operand:VCVTF 1 "s_register_operand" "w")) | |
2271 | (abs:VCVTF (match_operand:VCVTF 2 "s_register_operand" "w")))))] | |
2272 | "TARGET_NEON && flag_unsafe_math_optimizations" | |
2273 | "vac<cmp_op>.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2" | |
32093010 | 2274 | [(set_attr "type" "neon_fp_compare_s<q>")] |
bcaec148 | 2275 | ) |
d98a3884 | 2276 | |
996c516f | 2277 | (define_insn "neon_vca<cmp_op_unsp><mode>_insn_unspec" |
d98a3884 | 2278 | [(set (match_operand:<V_cmp_result> 0 "s_register_operand" "=w") |
2279 | (unspec:<V_cmp_result> [(match_operand:VCVTF 1 "s_register_operand" "w") | |
25a124b3 | 2280 | (match_operand:VCVTF 2 "s_register_operand" "w")] |
996c516f | 2281 | NEON_VACMP))] |
d98a3884 | 2282 | "TARGET_NEON" |
996c516f | 2283 | "vac<cmp_op_unsp>.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2" |
32093010 | 2284 | [(set_attr "type" "neon_fp_compare_s<q>")] |
bcaec148 | 2285 | ) |
d98a3884 | 2286 | |
2287 | (define_insn "neon_vtst<mode>" | |
2288 | [(set (match_operand:VDQIW 0 "s_register_operand" "=w") | |
2289 | (unspec:VDQIW [(match_operand:VDQIW 1 "s_register_operand" "w") | |
25a124b3 | 2290 | (match_operand:VDQIW 2 "s_register_operand" "w")] |
d98a3884 | 2291 | UNSPEC_VTST))] |
2292 | "TARGET_NEON" | |
bcaec148 | 2293 | "vtst.<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2" |
32093010 | 2294 | [(set_attr "type" "neon_tst<q>")] |
bcaec148 | 2295 | ) |
d98a3884 | 2296 | |
25a124b3 | 2297 | (define_insn "neon_vabd<sup><mode>" |
2298 | [(set (match_operand:VDQIW 0 "s_register_operand" "=w") | |
2299 | (unspec:VDQIW [(match_operand:VDQIW 1 "s_register_operand" "w") | |
2300 | (match_operand:VDQIW 2 "s_register_operand" "w")] | |
2301 | VABD))] | |
d98a3884 | 2302 | "TARGET_NEON" |
25a124b3 | 2303 | "vabd.<sup>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2" |
2304 | [(set_attr "type" "neon_abd<q>")] | |
bcaec148 | 2305 | ) |
d98a3884 | 2306 | |
25a124b3 | 2307 | (define_insn "neon_vabdf<mode>" |
2308 | [(set (match_operand:VCVTF 0 "s_register_operand" "=w") | |
2309 | (unspec:VCVTF [(match_operand:VCVTF 1 "s_register_operand" "w") | |
2310 | (match_operand:VCVTF 2 "s_register_operand" "w")] | |
2311 | UNSPEC_VABD_F))] | |
2312 | "TARGET_NEON" | |
2313 | "vabd.<V_s_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2" | |
2314 | [(set_attr "type" "neon_fp_abd_s<q>")] | |
2315 | ) | |
2316 | ||
2317 | (define_insn "neon_vabdl<sup><mode>" | |
d98a3884 | 2318 | [(set (match_operand:<V_widen> 0 "s_register_operand" "=w") |
2319 | (unspec:<V_widen> [(match_operand:VW 1 "s_register_operand" "w") | |
25a124b3 | 2320 | (match_operand:VW 2 "s_register_operand" "w")] |
2321 | VABDL))] | |
d98a3884 | 2322 | "TARGET_NEON" |
25a124b3 | 2323 | "vabdl.<sup>%#<V_sz_elem>\t%q0, %P1, %P2" |
32093010 | 2324 | [(set_attr "type" "neon_abd_long")] |
bcaec148 | 2325 | ) |
d98a3884 | 2326 | |
25a124b3 | 2327 | (define_insn "neon_vaba<sup><mode>" |
d98a3884 | 2328 | [(set (match_operand:VDQIW 0 "s_register_operand" "=w") |
8c619ffb | 2329 | (plus:VDQIW (unspec:VDQIW [(match_operand:VDQIW 2 "s_register_operand" "w") |
25a124b3 | 2330 | (match_operand:VDQIW 3 "s_register_operand" "w")] |
2331 | VABD) | |
8c619ffb | 2332 | (match_operand:VDQIW 1 "s_register_operand" "0")))] |
d98a3884 | 2333 | "TARGET_NEON" |
25a124b3 | 2334 | "vaba.<sup>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>2, %<V_reg>3" |
32093010 | 2335 | [(set_attr "type" "neon_arith_acc<q>")] |
bcaec148 | 2336 | ) |
d98a3884 | 2337 | |
25a124b3 | 2338 | (define_insn "neon_vabal<sup><mode>" |
d98a3884 | 2339 | [(set (match_operand:<V_widen> 0 "s_register_operand" "=w") |
8c619ffb | 2340 | (plus:<V_widen> (unspec:<V_widen> [(match_operand:VW 2 "s_register_operand" "w") |
25a124b3 | 2341 | (match_operand:VW 3 "s_register_operand" "w")] |
2342 | VABDL) | |
8c619ffb | 2343 | (match_operand:<V_widen> 1 "s_register_operand" "0")))] |
d98a3884 | 2344 | "TARGET_NEON" |
25a124b3 | 2345 | "vabal.<sup>%#<V_sz_elem>\t%q0, %P2, %P3" |
32093010 | 2346 | [(set_attr "type" "neon_arith_acc<q>")] |
bcaec148 | 2347 | ) |
d98a3884 | 2348 | |
25a124b3 | 2349 | (define_insn "neon_v<maxmin><sup><mode>" |
2350 | [(set (match_operand:VDQIW 0 "s_register_operand" "=w") | |
2351 | (unspec:VDQIW [(match_operand:VDQIW 1 "s_register_operand" "w") | |
2352 | (match_operand:VDQIW 2 "s_register_operand" "w")] | |
2353 | VMAXMIN))] | |
d98a3884 | 2354 | "TARGET_NEON" |
25a124b3 | 2355 | "v<maxmin>.<sup>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2" |
2356 | [(set_attr "type" "neon_minmax<q>")] | |
bcaec148 | 2357 | ) |
d98a3884 | 2358 | |
25a124b3 | 2359 | (define_insn "neon_v<maxmin>f<mode>" |
2360 | [(set (match_operand:VCVTF 0 "s_register_operand" "=w") | |
2361 | (unspec:VCVTF [(match_operand:VCVTF 1 "s_register_operand" "w") | |
2362 | (match_operand:VCVTF 2 "s_register_operand" "w")] | |
2363 | VMAXMINF))] | |
d98a3884 | 2364 | "TARGET_NEON" |
25a124b3 | 2365 | "v<maxmin>.<V_s_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2" |
a76d6a35 | 2366 | [(set_attr "type" "neon_fp_minmax_s<q>")] |
2367 | ) | |
2368 | ||
2369 | ;; Vector forms for the IEEE-754 fmax()/fmin() functions | |
2370 | (define_insn "<fmaxmin><mode>3" | |
2371 | [(set (match_operand:VCVTF 0 "s_register_operand" "=w") | |
2372 | (unspec:VCVTF [(match_operand:VCVTF 1 "s_register_operand" "w") | |
2373 | (match_operand:VCVTF 2 "s_register_operand" "w")] | |
2374 | VMAXMINFNM))] | |
2375 | "TARGET_NEON && TARGET_FPU_ARMV8" | |
2376 | "<fmaxmin_op>.<V_s_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2" | |
25a124b3 | 2377 | [(set_attr "type" "neon_fp_minmax_s<q>")] |
bcaec148 | 2378 | ) |
d98a3884 | 2379 | |
2380 | (define_expand "neon_vpadd<mode>" | |
2381 | [(match_operand:VD 0 "s_register_operand" "=w") | |
2382 | (match_operand:VD 1 "s_register_operand" "w") | |
25a124b3 | 2383 | (match_operand:VD 2 "s_register_operand" "w")] |
d98a3884 | 2384 | "TARGET_NEON" |
2385 | { | |
2386 | emit_insn (gen_neon_vpadd_internal<mode> (operands[0], operands[1], | |
2387 | operands[2])); | |
2388 | DONE; | |
2389 | }) | |
2390 | ||
25a124b3 | 2391 | (define_insn "neon_vpaddl<sup><mode>" |
d98a3884 | 2392 | [(set (match_operand:<V_double_width> 0 "s_register_operand" "=w") |
25a124b3 | 2393 | (unspec:<V_double_width> [(match_operand:VDQIW 1 "s_register_operand" "w")] |
2394 | VPADDL))] | |
d98a3884 | 2395 | "TARGET_NEON" |
25a124b3 | 2396 | "vpaddl.<sup>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1" |
32093010 | 2397 | [(set_attr "type" "neon_reduc_add_long")] |
bcaec148 | 2398 | ) |
d98a3884 | 2399 | |
25a124b3 | 2400 | (define_insn "neon_vpadal<sup><mode>" |
d98a3884 | 2401 | [(set (match_operand:<V_double_width> 0 "s_register_operand" "=w") |
2402 | (unspec:<V_double_width> [(match_operand:<V_double_width> 1 "s_register_operand" "0") | |
25a124b3 | 2403 | (match_operand:VDQIW 2 "s_register_operand" "w")] |
2404 | VPADAL))] | |
d98a3884 | 2405 | "TARGET_NEON" |
25a124b3 | 2406 | "vpadal.<sup>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>2" |
32093010 | 2407 | [(set_attr "type" "neon_reduc_add_acc")] |
bcaec148 | 2408 | ) |
d98a3884 | 2409 | |
25a124b3 | 2410 | (define_insn "neon_vp<maxmin><sup><mode>" |
2411 | [(set (match_operand:VDI 0 "s_register_operand" "=w") | |
2412 | (unspec:VDI [(match_operand:VDI 1 "s_register_operand" "w") | |
2413 | (match_operand:VDI 2 "s_register_operand" "w")] | |
2414 | VPMAXMIN))] | |
d98a3884 | 2415 | "TARGET_NEON" |
25a124b3 | 2416 | "vp<maxmin>.<sup>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2" |
2417 | [(set_attr "type" "neon_reduc_minmax<q>")] | |
bcaec148 | 2418 | ) |
d98a3884 | 2419 | |
25a124b3 | 2420 | (define_insn "neon_vp<maxmin>f<mode>" |
2421 | [(set (match_operand:VCVTF 0 "s_register_operand" "=w") | |
2422 | (unspec:VCVTF [(match_operand:VCVTF 1 "s_register_operand" "w") | |
2423 | (match_operand:VCVTF 2 "s_register_operand" "w")] | |
2424 | VPMAXMINF))] | |
d98a3884 | 2425 | "TARGET_NEON" |
25a124b3 | 2426 | "vp<maxmin>.<V_s_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2" |
2427 | [(set_attr "type" "neon_fp_reduc_minmax_s<q>")] | |
bcaec148 | 2428 | ) |
d98a3884 | 2429 | |
2430 | (define_insn "neon_vrecps<mode>" | |
2431 | [(set (match_operand:VCVTF 0 "s_register_operand" "=w") | |
2432 | (unspec:VCVTF [(match_operand:VCVTF 1 "s_register_operand" "w") | |
25a124b3 | 2433 | (match_operand:VCVTF 2 "s_register_operand" "w")] |
d98a3884 | 2434 | UNSPEC_VRECPS))] |
2435 | "TARGET_NEON" | |
bcaec148 | 2436 | "vrecps.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2" |
32093010 | 2437 | [(set_attr "type" "neon_fp_recps_s<q>")] |
bcaec148 | 2438 | ) |
d98a3884 | 2439 | |
2440 | (define_insn "neon_vrsqrts<mode>" | |
2441 | [(set (match_operand:VCVTF 0 "s_register_operand" "=w") | |
2442 | (unspec:VCVTF [(match_operand:VCVTF 1 "s_register_operand" "w") | |
25a124b3 | 2443 | (match_operand:VCVTF 2 "s_register_operand" "w")] |
d98a3884 | 2444 | UNSPEC_VRSQRTS))] |
2445 | "TARGET_NEON" | |
bcaec148 | 2446 | "vrsqrts.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2" |
32093010 | 2447 | [(set_attr "type" "neon_fp_rsqrts_s<q>")] |
bcaec148 | 2448 | ) |
d98a3884 | 2449 | |
94829feb | 2450 | (define_expand "neon_vabs<mode>" |
2451 | [(match_operand:VDQW 0 "s_register_operand" "") | |
25a124b3 | 2452 | (match_operand:VDQW 1 "s_register_operand" "")] |
d98a3884 | 2453 | "TARGET_NEON" |
94829feb | 2454 | { |
2455 | emit_insn (gen_abs<mode>2 (operands[0], operands[1])); | |
2456 | DONE; | |
2457 | }) | |
d98a3884 | 2458 | |
2459 | (define_insn "neon_vqabs<mode>" | |
2460 | [(set (match_operand:VDQIW 0 "s_register_operand" "=w") | |
25a124b3 | 2461 | (unspec:VDQIW [(match_operand:VDQIW 1 "s_register_operand" "w")] |
d98a3884 | 2462 | UNSPEC_VQABS))] |
2463 | "TARGET_NEON" | |
bcaec148 | 2464 | "vqabs.<V_s_elem>\t%<V_reg>0, %<V_reg>1" |
32093010 | 2465 | [(set_attr "type" "neon_qabs<q>")] |
bcaec148 | 2466 | ) |
d98a3884 | 2467 | |
0a8b68fa | 2468 | (define_insn "neon_bswap<mode>" |
2469 | [(set (match_operand:VDQHSD 0 "register_operand" "=w") | |
2470 | (bswap:VDQHSD (match_operand:VDQHSD 1 "register_operand" "w")))] | |
2471 | "TARGET_NEON" | |
2472 | "vrev<V_sz_elem>.8\\t%<V_reg>0, %<V_reg>1" | |
2473 | [(set_attr "type" "neon_rev<q>")] | |
2474 | ) | |
2475 | ||
d98a3884 | 2476 | (define_expand "neon_vneg<mode>" |
2477 | [(match_operand:VDQW 0 "s_register_operand" "") | |
25a124b3 | 2478 | (match_operand:VDQW 1 "s_register_operand" "")] |
d98a3884 | 2479 | "TARGET_NEON" |
2480 | { | |
2481 | emit_insn (gen_neg<mode>2 (operands[0], operands[1])); | |
2482 | DONE; | |
2483 | }) | |
2484 | ||
ff1be5c1 | 2485 | (define_expand "neon_copysignf<mode>" |
2486 | [(match_operand:VCVTF 0 "register_operand") | |
2487 | (match_operand:VCVTF 1 "register_operand") | |
2488 | (match_operand:VCVTF 2 "register_operand")] | |
2489 | "TARGET_NEON" | |
2490 | "{ | |
2491 | rtx v_bitmask_cast; | |
2492 | rtx v_bitmask = gen_reg_rtx (<VCVTF:V_cmp_result>mode); | |
2493 | int i, n_elt = GET_MODE_NUNITS (<MODE>mode); | |
2494 | rtvec v = rtvec_alloc (n_elt); | |
2495 | ||
2496 | /* Create bitmask for vector select. */ | |
2497 | for (i = 0; i < n_elt; ++i) | |
2498 | RTVEC_ELT (v, i) = GEN_INT (0x80000000); | |
2499 | ||
2500 | emit_move_insn (v_bitmask, | |
2501 | gen_rtx_CONST_VECTOR (<VCVTF:V_cmp_result>mode, v)); | |
2502 | emit_move_insn (operands[0], operands[2]); | |
2503 | v_bitmask_cast = simplify_gen_subreg (<MODE>mode, v_bitmask, | |
2504 | <VCVTF:V_cmp_result>mode, 0); | |
2505 | emit_insn (gen_neon_vbsl<mode> (operands[0], v_bitmask_cast, operands[0], | |
2506 | operands[1])); | |
2507 | ||
2508 | DONE; | |
2509 | }" | |
2510 | ) | |
2511 | ||
d98a3884 | 2512 | (define_insn "neon_vqneg<mode>" |
2513 | [(set (match_operand:VDQIW 0 "s_register_operand" "=w") | |
25a124b3 | 2514 | (unspec:VDQIW [(match_operand:VDQIW 1 "s_register_operand" "w")] |
d98a3884 | 2515 | UNSPEC_VQNEG))] |
2516 | "TARGET_NEON" | |
bcaec148 | 2517 | "vqneg.<V_s_elem>\t%<V_reg>0, %<V_reg>1" |
32093010 | 2518 | [(set_attr "type" "neon_qneg<q>")] |
bcaec148 | 2519 | ) |
d98a3884 | 2520 | |
2521 | (define_insn "neon_vcls<mode>" | |
2522 | [(set (match_operand:VDQIW 0 "s_register_operand" "=w") | |
25a124b3 | 2523 | (unspec:VDQIW [(match_operand:VDQIW 1 "s_register_operand" "w")] |
d98a3884 | 2524 | UNSPEC_VCLS))] |
2525 | "TARGET_NEON" | |
bcaec148 | 2526 | "vcls.<V_s_elem>\t%<V_reg>0, %<V_reg>1" |
32093010 | 2527 | [(set_attr "type" "neon_cls<q>")] |
bcaec148 | 2528 | ) |
d98a3884 | 2529 | |
e2669ea7 | 2530 | (define_insn "clz<mode>2" |
d98a3884 | 2531 | [(set (match_operand:VDQIW 0 "s_register_operand" "=w") |
e2669ea7 | 2532 | (clz:VDQIW (match_operand:VDQIW 1 "s_register_operand" "w")))] |
d98a3884 | 2533 | "TARGET_NEON" |
bcaec148 | 2534 | "vclz.<V_if_elem>\t%<V_reg>0, %<V_reg>1" |
32093010 | 2535 | [(set_attr "type" "neon_cnt<q>")] |
bcaec148 | 2536 | ) |
d98a3884 | 2537 | |
e2669ea7 | 2538 | (define_expand "neon_vclz<mode>" |
2539 | [(match_operand:VDQIW 0 "s_register_operand" "") | |
25a124b3 | 2540 | (match_operand:VDQIW 1 "s_register_operand" "")] |
e2669ea7 | 2541 | "TARGET_NEON" |
2542 | { | |
2543 | emit_insn (gen_clz<mode>2 (operands[0], operands[1])); | |
2544 | DONE; | |
2545 | }) | |
2546 | ||
2547 | (define_insn "popcount<mode>2" | |
d98a3884 | 2548 | [(set (match_operand:VE 0 "s_register_operand" "=w") |
e2669ea7 | 2549 | (popcount:VE (match_operand:VE 1 "s_register_operand" "w")))] |
d98a3884 | 2550 | "TARGET_NEON" |
bcaec148 | 2551 | "vcnt.<V_sz_elem>\t%<V_reg>0, %<V_reg>1" |
32093010 | 2552 | [(set_attr "type" "neon_cnt<q>")] |
bcaec148 | 2553 | ) |
d98a3884 | 2554 | |
e2669ea7 | 2555 | (define_expand "neon_vcnt<mode>" |
2556 | [(match_operand:VE 0 "s_register_operand" "=w") | |
25a124b3 | 2557 | (match_operand:VE 1 "s_register_operand" "w")] |
e2669ea7 | 2558 | "TARGET_NEON" |
2559 | { | |
2560 | emit_insn (gen_popcount<mode>2 (operands[0], operands[1])); | |
2561 | DONE; | |
2562 | }) | |
2563 | ||
d98a3884 | 2564 | (define_insn "neon_vrecpe<mode>" |
2565 | [(set (match_operand:V32 0 "s_register_operand" "=w") | |
25a124b3 | 2566 | (unspec:V32 [(match_operand:V32 1 "s_register_operand" "w")] |
d98a3884 | 2567 | UNSPEC_VRECPE))] |
2568 | "TARGET_NEON" | |
bcaec148 | 2569 | "vrecpe.<V_u_elem>\t%<V_reg>0, %<V_reg>1" |
32093010 | 2570 | [(set_attr "type" "neon_fp_recpe_s<q>")] |
bcaec148 | 2571 | ) |
d98a3884 | 2572 | |
2573 | (define_insn "neon_vrsqrte<mode>" | |
2574 | [(set (match_operand:V32 0 "s_register_operand" "=w") | |
25a124b3 | 2575 | (unspec:V32 [(match_operand:V32 1 "s_register_operand" "w")] |
d98a3884 | 2576 | UNSPEC_VRSQRTE))] |
2577 | "TARGET_NEON" | |
bcaec148 | 2578 | "vrsqrte.<V_u_elem>\t%<V_reg>0, %<V_reg>1" |
32093010 | 2579 | [(set_attr "type" "neon_fp_rsqrte_s<q>")] |
bcaec148 | 2580 | ) |
d98a3884 | 2581 | |
2582 | (define_expand "neon_vmvn<mode>" | |
2583 | [(match_operand:VDQIW 0 "s_register_operand" "") | |
25a124b3 | 2584 | (match_operand:VDQIW 1 "s_register_operand" "")] |
d98a3884 | 2585 | "TARGET_NEON" |
2586 | { | |
2587 | emit_insn (gen_one_cmpl<mode>2 (operands[0], operands[1])); | |
2588 | DONE; | |
2589 | }) | |
2590 | ||
4c0b79b4 | 2591 | (define_insn "neon_vget_lane<mode>_sext_internal" |
2592 | [(set (match_operand:SI 0 "s_register_operand" "=r") | |
2593 | (sign_extend:SI | |
2594 | (vec_select:<V_elem> | |
2595 | (match_operand:VD 1 "s_register_operand" "w") | |
2596 | (parallel [(match_operand:SI 2 "immediate_operand" "i")]))))] | |
d98a3884 | 2597 | "TARGET_NEON" |
cdf93281 | 2598 | { |
2599 | if (BYTES_BIG_ENDIAN) | |
2600 | { | |
2601 | int elt = INTVAL (operands[2]); | |
2602 | elt = GET_MODE_NUNITS (<MODE>mode) - 1 - elt; | |
2603 | operands[2] = GEN_INT (elt); | |
2604 | } | |
17ae1a66 | 2605 | return "vmov.s<V_sz_elem>\t%0, %P1[%c2]"; |
cdf93281 | 2606 | } |
32093010 | 2607 | [(set_attr "type" "neon_to_gp")] |
bcaec148 | 2608 | ) |
d98a3884 | 2609 | |
4c0b79b4 | 2610 | (define_insn "neon_vget_lane<mode>_zext_internal" |
2611 | [(set (match_operand:SI 0 "s_register_operand" "=r") | |
2612 | (zero_extend:SI | |
2613 | (vec_select:<V_elem> | |
2614 | (match_operand:VD 1 "s_register_operand" "w") | |
2615 | (parallel [(match_operand:SI 2 "immediate_operand" "i")]))))] | |
2616 | "TARGET_NEON" | |
cdf93281 | 2617 | { |
2618 | if (BYTES_BIG_ENDIAN) | |
2619 | { | |
2620 | int elt = INTVAL (operands[2]); | |
2621 | elt = GET_MODE_NUNITS (<MODE>mode) - 1 - elt; | |
2622 | operands[2] = GEN_INT (elt); | |
2623 | } | |
17ae1a66 | 2624 | return "vmov.u<V_sz_elem>\t%0, %P1[%c2]"; |
cdf93281 | 2625 | } |
32093010 | 2626 | [(set_attr "type" "neon_to_gp")] |
4c0b79b4 | 2627 | ) |
d98a3884 | 2628 | |
4c0b79b4 | 2629 | (define_insn "neon_vget_lane<mode>_sext_internal" |
2630 | [(set (match_operand:SI 0 "s_register_operand" "=r") | |
2631 | (sign_extend:SI | |
2632 | (vec_select:<V_elem> | |
fb5f110d | 2633 | (match_operand:VQ2 1 "s_register_operand" "w") |
4c0b79b4 | 2634 | (parallel [(match_operand:SI 2 "immediate_operand" "i")]))))] |
d98a3884 | 2635 | "TARGET_NEON" |
8521669a | 2636 | { |
4c0b79b4 | 2637 | rtx ops[3]; |
2638 | int regno = REGNO (operands[1]); | |
2639 | unsigned int halfelts = GET_MODE_NUNITS (<MODE>mode) / 2; | |
2640 | unsigned int elt = INTVAL (operands[2]); | |
cdf93281 | 2641 | unsigned int elt_adj = elt % halfelts; |
2642 | ||
2643 | if (BYTES_BIG_ENDIAN) | |
2644 | elt_adj = halfelts - 1 - elt_adj; | |
4c0b79b4 | 2645 | |
2646 | ops[0] = operands[0]; | |
2647 | ops[1] = gen_rtx_REG (<V_HALF>mode, regno + 2 * (elt / halfelts)); | |
cdf93281 | 2648 | ops[2] = GEN_INT (elt_adj); |
17ae1a66 | 2649 | output_asm_insn ("vmov.s<V_sz_elem>\t%0, %P1[%c2]", ops); |
4c0b79b4 | 2650 | |
2651 | return ""; | |
8521669a | 2652 | } |
32093010 | 2653 | [(set_attr "type" "neon_to_gp_q")] |
bcaec148 | 2654 | ) |
d98a3884 | 2655 | |
4c0b79b4 | 2656 | (define_insn "neon_vget_lane<mode>_zext_internal" |
2657 | [(set (match_operand:SI 0 "s_register_operand" "=r") | |
2658 | (zero_extend:SI | |
2659 | (vec_select:<V_elem> | |
fb5f110d | 2660 | (match_operand:VQ2 1 "s_register_operand" "w") |
4c0b79b4 | 2661 | (parallel [(match_operand:SI 2 "immediate_operand" "i")]))))] |
d98a3884 | 2662 | "TARGET_NEON" |
2663 | { | |
4c0b79b4 | 2664 | rtx ops[3]; |
d98a3884 | 2665 | int regno = REGNO (operands[1]); |
2666 | unsigned int halfelts = GET_MODE_NUNITS (<MODE>mode) / 2; | |
2667 | unsigned int elt = INTVAL (operands[2]); | |
cdf93281 | 2668 | unsigned int elt_adj = elt % halfelts; |
2669 | ||
2670 | if (BYTES_BIG_ENDIAN) | |
2671 | elt_adj = halfelts - 1 - elt_adj; | |
d98a3884 | 2672 | |
2673 | ops[0] = operands[0]; | |
2674 | ops[1] = gen_rtx_REG (<V_HALF>mode, regno + 2 * (elt / halfelts)); | |
cdf93281 | 2675 | ops[2] = GEN_INT (elt_adj); |
17ae1a66 | 2676 | output_asm_insn ("vmov.u<V_sz_elem>\t%0, %P1[%c2]", ops); |
d98a3884 | 2677 | |
2678 | return ""; | |
2679 | } | |
32093010 | 2680 | [(set_attr "type" "neon_to_gp_q")] |
4c0b79b4 | 2681 | ) |
2682 | ||
2683 | (define_expand "neon_vget_lane<mode>" | |
2684 | [(match_operand:<V_ext> 0 "s_register_operand" "") | |
2685 | (match_operand:VDQW 1 "s_register_operand" "") | |
25a124b3 | 2686 | (match_operand:SI 2 "immediate_operand" "")] |
4c0b79b4 | 2687 | "TARGET_NEON" |
2688 | { | |
cdf93281 | 2689 | if (BYTES_BIG_ENDIAN) |
2690 | { | |
2691 | /* The intrinsics are defined in terms of a model where the | |
2692 | element ordering in memory is vldm order, whereas the generic | |
2693 | RTL is defined in terms of a model where the element ordering | |
2694 | in memory is array order. Convert the lane number to conform | |
2695 | to this model. */ | |
2696 | unsigned int elt = INTVAL (operands[2]); | |
2697 | unsigned int reg_nelts | |
0436eae5 | 2698 | = 64 / GET_MODE_UNIT_BITSIZE (<MODE>mode); |
cdf93281 | 2699 | elt ^= reg_nelts - 1; |
2700 | operands[2] = GEN_INT (elt); | |
2701 | } | |
2702 | ||
0436eae5 | 2703 | if (GET_MODE_UNIT_BITSIZE (<MODE>mode) == 32) |
25a124b3 | 2704 | emit_insn (gen_vec_extract<mode> (operands[0], operands[1], operands[2])); |
4c0b79b4 | 2705 | else |
25a124b3 | 2706 | emit_insn (gen_neon_vget_lane<mode>_sext_internal (operands[0], |
2707 | operands[1], | |
2708 | operands[2])); | |
2709 | DONE; | |
2710 | }) | |
2711 | ||
2712 | (define_expand "neon_vget_laneu<mode>" | |
2713 | [(match_operand:<V_ext> 0 "s_register_operand" "") | |
2714 | (match_operand:VDQIW 1 "s_register_operand" "") | |
2715 | (match_operand:SI 2 "immediate_operand" "")] | |
2716 | "TARGET_NEON" | |
2717 | { | |
25a124b3 | 2718 | if (BYTES_BIG_ENDIAN) |
4c0b79b4 | 2719 | { |
25a124b3 | 2720 | /* The intrinsics are defined in terms of a model where the |
2721 | element ordering in memory is vldm order, whereas the generic | |
2722 | RTL is defined in terms of a model where the element ordering | |
2723 | in memory is array order. Convert the lane number to conform | |
2724 | to this model. */ | |
2725 | unsigned int elt = INTVAL (operands[2]); | |
2726 | unsigned int reg_nelts | |
0436eae5 | 2727 | = 64 / GET_MODE_UNIT_BITSIZE (<MODE>mode); |
25a124b3 | 2728 | elt ^= reg_nelts - 1; |
2729 | operands[2] = GEN_INT (elt); | |
4c0b79b4 | 2730 | } |
25a124b3 | 2731 | |
0436eae5 | 2732 | if (GET_MODE_UNIT_BITSIZE (<MODE>mode) == 32) |
25a124b3 | 2733 | emit_insn (gen_vec_extract<mode> (operands[0], operands[1], operands[2])); |
2734 | else | |
2735 | emit_insn (gen_neon_vget_lane<mode>_zext_internal (operands[0], | |
2736 | operands[1], | |
2737 | operands[2])); | |
4c0b79b4 | 2738 | DONE; |
2739 | }) | |
2740 | ||
79a83503 | 2741 | (define_expand "neon_vget_lanedi" |
2742 | [(match_operand:DI 0 "s_register_operand" "=r") | |
2743 | (match_operand:DI 1 "s_register_operand" "w") | |
25a124b3 | 2744 | (match_operand:SI 2 "immediate_operand" "")] |
4c0b79b4 | 2745 | "TARGET_NEON" |
2746 | { | |
79a83503 | 2747 | emit_move_insn (operands[0], operands[1]); |
2748 | DONE; | |
2749 | }) | |
d98a3884 | 2750 | |
79a83503 | 2751 | (define_expand "neon_vget_lanev2di" |
cd4e07db | 2752 | [(match_operand:DI 0 "s_register_operand" "") |
2753 | (match_operand:V2DI 1 "s_register_operand" "") | |
25a124b3 | 2754 | (match_operand:SI 2 "immediate_operand" "")] |
d98a3884 | 2755 | "TARGET_NEON" |
2756 | { | |
16f72d2f | 2757 | int lane; |
2758 | ||
2759 | if (BYTES_BIG_ENDIAN) | |
2760 | { | |
2761 | /* The intrinsics are defined in terms of a model where the | |
2762 | element ordering in memory is vldm order, whereas the generic | |
2763 | RTL is defined in terms of a model where the element ordering | |
2764 | in memory is array order. Convert the lane number to conform | |
2765 | to this model. */ | |
2766 | unsigned int elt = INTVAL (operands[2]); | |
2767 | unsigned int reg_nelts = 2; | |
2768 | elt ^= reg_nelts - 1; | |
2769 | operands[2] = GEN_INT (elt); | |
2770 | } | |
2771 | ||
2772 | lane = INTVAL (operands[2]); | |
505e1f91 | 2773 | gcc_assert ((lane ==0) || (lane == 1)); |
2774 | emit_move_insn (operands[0], lane == 0 | |
2775 | ? gen_lowpart (DImode, operands[1]) | |
2776 | : gen_highpart (DImode, operands[1])); | |
79a83503 | 2777 | DONE; |
2778 | }) | |
8521669a | 2779 | |
79a83503 | 2780 | (define_expand "neon_vset_lane<mode>" |
2781 | [(match_operand:VDQ 0 "s_register_operand" "=w") | |
2782 | (match_operand:<V_elem> 1 "s_register_operand" "r") | |
2783 | (match_operand:VDQ 2 "s_register_operand" "0") | |
2784 | (match_operand:SI 3 "immediate_operand" "i")] | |
d98a3884 | 2785 | "TARGET_NEON" |
2786 | { | |
d98a3884 | 2787 | unsigned int elt = INTVAL (operands[3]); |
2788 | ||
79a83503 | 2789 | if (BYTES_BIG_ENDIAN) |
2790 | { | |
2791 | unsigned int reg_nelts | |
0436eae5 | 2792 | = 64 / GET_MODE_UNIT_BITSIZE (<MODE>mode); |
79a83503 | 2793 | elt ^= reg_nelts - 1; |
2794 | } | |
8521669a | 2795 | |
79a83503 | 2796 | emit_insn (gen_vec_set<mode>_internal (operands[0], operands[1], |
2797 | GEN_INT (1 << elt), operands[2])); | |
2798 | DONE; | |
2799 | }) | |
d98a3884 | 2800 | |
79a83503 | 2801 | ; See neon_vget_lanedi comment for reasons operands 2 & 3 are ignored. |
d98a3884 | 2802 | |
79a83503 | 2803 | (define_expand "neon_vset_lanedi" |
2804 | [(match_operand:DI 0 "s_register_operand" "=w") | |
2805 | (match_operand:DI 1 "s_register_operand" "r") | |
2806 | (match_operand:DI 2 "s_register_operand" "0") | |
2807 | (match_operand:SI 3 "immediate_operand" "i")] | |
d98a3884 | 2808 | "TARGET_NEON" |
2809 | { | |
79a83503 | 2810 | emit_move_insn (operands[0], operands[1]); |
2811 | DONE; | |
2812 | }) | |
d98a3884 | 2813 | |
2814 | (define_expand "neon_vcreate<mode>" | |
fb5f110d | 2815 | [(match_operand:VD_RE 0 "s_register_operand" "") |
d98a3884 | 2816 | (match_operand:DI 1 "general_operand" "")] |
2817 | "TARGET_NEON" | |
2818 | { | |
2819 | rtx src = gen_lowpart (<MODE>mode, operands[1]); | |
2820 | emit_move_insn (operands[0], src); | |
2821 | DONE; | |
2822 | }) | |
2823 | ||
2824 | (define_insn "neon_vdup_n<mode>" | |
c747abbb | 2825 | [(set (match_operand:VX 0 "s_register_operand" "=w") |
79a83503 | 2826 | (vec_duplicate:VX (match_operand:<V_elem> 1 "s_register_operand" "r")))] |
d98a3884 | 2827 | "TARGET_NEON" |
17ae1a66 | 2828 | "vdup.<V_sz_elem>\t%<V_reg>0, %1" |
32093010 | 2829 | [(set_attr "type" "neon_from_gp<q>")] |
bcaec148 | 2830 | ) |
d98a3884 | 2831 | |
c747abbb | 2832 | (define_insn "neon_vdup_n<mode>" |
2833 | [(set (match_operand:V32 0 "s_register_operand" "=w,w") | |
79a83503 | 2834 | (vec_duplicate:V32 (match_operand:<V_elem> 1 "s_register_operand" "r,t")))] |
c747abbb | 2835 | "TARGET_NEON" |
2836 | "@ | |
17ae1a66 | 2837 | vdup.<V_sz_elem>\t%<V_reg>0, %1 |
2838 | vdup.<V_sz_elem>\t%<V_reg>0, %y1" | |
32093010 | 2839 | [(set_attr "type" "neon_from_gp<q>,neon_dup<q>")] |
c747abbb | 2840 | ) |
2841 | ||
79a83503 | 2842 | (define_expand "neon_vdup_ndi" |
2843 | [(match_operand:DI 0 "s_register_operand" "=w") | |
2844 | (match_operand:DI 1 "s_register_operand" "r")] | |
d98a3884 | 2845 | "TARGET_NEON" |
79a83503 | 2846 | { |
2847 | emit_move_insn (operands[0], operands[1]); | |
2848 | DONE; | |
2849 | } | |
bcaec148 | 2850 | ) |
d98a3884 | 2851 | |
2852 | (define_insn "neon_vdup_nv2di" | |
79a83503 | 2853 | [(set (match_operand:V2DI 0 "s_register_operand" "=w,w") |
2854 | (vec_duplicate:V2DI (match_operand:DI 1 "s_register_operand" "r,w")))] | |
d98a3884 | 2855 | "TARGET_NEON" |
79a83503 | 2856 | "@ |
17ae1a66 | 2857 | vmov\t%e0, %Q1, %R1\;vmov\t%f0, %Q1, %R1 |
2858 | vmov\t%e0, %P1\;vmov\t%f0, %P1" | |
2859 | [(set_attr "length" "8") | |
32093010 | 2860 | (set_attr "type" "multiple")] |
bcaec148 | 2861 | ) |
d98a3884 | 2862 | |
79a83503 | 2863 | (define_insn "neon_vdup_lane<mode>_internal" |
2864 | [(set (match_operand:VDQW 0 "s_register_operand" "=w") | |
2865 | (vec_duplicate:VDQW | |
2866 | (vec_select:<V_elem> | |
2867 | (match_operand:<V_double_vector_mode> 1 "s_register_operand" "w") | |
2868 | (parallel [(match_operand:SI 2 "immediate_operand" "i")]))))] | |
d98a3884 | 2869 | "TARGET_NEON" |
8521669a | 2870 | { |
79a83503 | 2871 | if (BYTES_BIG_ENDIAN) |
2872 | { | |
2873 | int elt = INTVAL (operands[2]); | |
2874 | elt = GET_MODE_NUNITS (<V_double_vector_mode>mode) - 1 - elt; | |
2875 | operands[2] = GEN_INT (elt); | |
2876 | } | |
2877 | if (<Is_d_reg>) | |
2878 | return "vdup.<V_sz_elem>\t%P0, %P1[%c2]"; | |
2879 | else | |
2880 | return "vdup.<V_sz_elem>\t%q0, %P1[%c2]"; | |
8521669a | 2881 | } |
32093010 | 2882 | [(set_attr "type" "neon_dup<q>")] |
bcaec148 | 2883 | ) |
d98a3884 | 2884 | |
79a83503 | 2885 | (define_expand "neon_vdup_lane<mode>" |
2886 | [(match_operand:VDQW 0 "s_register_operand" "=w") | |
2887 | (match_operand:<V_double_vector_mode> 1 "s_register_operand" "w") | |
2888 | (match_operand:SI 2 "immediate_operand" "i")] | |
d98a3884 | 2889 | "TARGET_NEON" |
8521669a | 2890 | { |
79a83503 | 2891 | if (BYTES_BIG_ENDIAN) |
2892 | { | |
2893 | unsigned int elt = INTVAL (operands[2]); | |
2894 | unsigned int reg_nelts | |
0436eae5 | 2895 | = 64 / GET_MODE_UNIT_BITSIZE (<V_double_vector_mode>mode); |
79a83503 | 2896 | elt ^= reg_nelts - 1; |
2897 | operands[2] = GEN_INT (elt); | |
2898 | } | |
2899 | emit_insn (gen_neon_vdup_lane<mode>_internal (operands[0], operands[1], | |
2900 | operands[2])); | |
2901 | DONE; | |
2902 | }) | |
d98a3884 | 2903 | |
2904 | ; Scalar index is ignored, since only zero is valid here. | |
2905 | (define_expand "neon_vdup_lanedi" | |
79a83503 | 2906 | [(match_operand:DI 0 "s_register_operand" "=w") |
2907 | (match_operand:DI 1 "s_register_operand" "w") | |
2908 | (match_operand:SI 2 "immediate_operand" "i")] | |
d98a3884 | 2909 | "TARGET_NEON" |
2910 | { | |
2911 | emit_move_insn (operands[0], operands[1]); | |
2912 | DONE; | |
2913 | }) | |
2914 | ||
79a83503 | 2915 | ; Likewise for v2di, as the DImode second operand has only a single element. |
2916 | (define_expand "neon_vdup_lanev2di" | |
2917 | [(match_operand:V2DI 0 "s_register_operand" "=w") | |
2918 | (match_operand:DI 1 "s_register_operand" "w") | |
2919 | (match_operand:SI 2 "immediate_operand" "i")] | |
d98a3884 | 2920 | "TARGET_NEON" |
8521669a | 2921 | { |
79a83503 | 2922 | emit_insn (gen_neon_vdup_nv2di (operands[0], operands[1])); |
2923 | DONE; | |
2924 | }) | |
d98a3884 | 2925 | |
47ddcd6b | 2926 | ; Disabled before reload because we don't want combine doing something silly, |
2927 | ; but used by the post-reload expansion of neon_vcombine. | |
2928 | (define_insn "*neon_vswp<mode>" | |
2929 | [(set (match_operand:VDQX 0 "s_register_operand" "+w") | |
2930 | (match_operand:VDQX 1 "s_register_operand" "+w")) | |
2931 | (set (match_dup 1) (match_dup 0))] | |
2932 | "TARGET_NEON && reload_completed" | |
042161e8 | 2933 | "vswp\t%<V_reg>0, %<V_reg>1" |
32093010 | 2934 | [(set_attr "type" "neon_permute<q>")] |
47ddcd6b | 2935 | ) |
2936 | ||
d98a3884 | 2937 | ;; In this insn, operand 1 should be low, and operand 2 the high part of the |
2938 | ;; dest vector. | |
2939 | ;; FIXME: A different implementation of this builtin could make it much | |
2940 | ;; more likely that we wouldn't actually need to output anything (we could make | |
2941 | ;; it so that the reg allocator puts things in the right places magically | |
2942 | ;; instead). Lack of subregs for vectors makes that tricky though, I think. | |
2943 | ||
47ddcd6b | 2944 | (define_insn_and_split "neon_vcombine<mode>" |
d98a3884 | 2945 | [(set (match_operand:<V_DOUBLE> 0 "s_register_operand" "=w") |
47ddcd6b | 2946 | (vec_concat:<V_DOUBLE> |
2947 | (match_operand:VDX 1 "s_register_operand" "w") | |
2948 | (match_operand:VDX 2 "s_register_operand" "w")))] | |
d98a3884 | 2949 | "TARGET_NEON" |
47ddcd6b | 2950 | "#" |
2951 | "&& reload_completed" | |
2952 | [(const_int 0)] | |
d98a3884 | 2953 | { |
47ddcd6b | 2954 | neon_split_vcombine (operands); |
2955 | DONE; | |
32093010 | 2956 | } |
2957 | [(set_attr "type" "multiple")] | |
2958 | ) | |
d98a3884 | 2959 | |
33aeac4f | 2960 | (define_expand "neon_vget_high<mode>" |
2961 | [(match_operand:<V_HALF> 0 "s_register_operand") | |
2962 | (match_operand:VQX 1 "s_register_operand")] | |
79a83503 | 2963 | "TARGET_NEON" |
2964 | { | |
33aeac4f | 2965 | emit_move_insn (operands[0], |
2966 | simplify_gen_subreg (<V_HALF>mode, operands[1], <MODE>mode, | |
2967 | GET_MODE_SIZE (<V_HALF>mode))); | |
2968 | DONE; | |
2969 | }) | |
79a83503 | 2970 | |
33aeac4f | 2971 | (define_expand "neon_vget_low<mode>" |
2972 | [(match_operand:<V_HALF> 0 "s_register_operand") | |
2973 | (match_operand:VQX 1 "s_register_operand")] | |
d98a3884 | 2974 | "TARGET_NEON" |
2975 | { | |
33aeac4f | 2976 | emit_move_insn (operands[0], |
2977 | simplify_gen_subreg (<V_HALF>mode, operands[1], | |
2978 | <MODE>mode, 0)); | |
2979 | DONE; | |
2980 | }) | |
d98a3884 | 2981 | |
741cdc29 | 2982 | (define_insn "float<mode><V_cvtto>2" |
2983 | [(set (match_operand:<V_CVTTO> 0 "s_register_operand" "=w") | |
2984 | (float:<V_CVTTO> (match_operand:VCVTI 1 "s_register_operand" "w")))] | |
2985 | "TARGET_NEON && !flag_rounding_math" | |
2986 | "vcvt.f32.s32\t%<V_reg>0, %<V_reg>1" | |
32093010 | 2987 | [(set_attr "type" "neon_int_to_fp_<V_elem_ch><q>")] |
741cdc29 | 2988 | ) |
2989 | ||
2990 | (define_insn "floatuns<mode><V_cvtto>2" | |
2991 | [(set (match_operand:<V_CVTTO> 0 "s_register_operand" "=w") | |
2992 | (unsigned_float:<V_CVTTO> (match_operand:VCVTI 1 "s_register_operand" "w")))] | |
2993 | "TARGET_NEON && !flag_rounding_math" | |
2994 | "vcvt.f32.u32\t%<V_reg>0, %<V_reg>1" | |
32093010 | 2995 | [(set_attr "type" "neon_int_to_fp_<V_elem_ch><q>")] |
741cdc29 | 2996 | ) |
2997 | ||
2998 | (define_insn "fix_trunc<mode><V_cvtto>2" | |
2999 | [(set (match_operand:<V_CVTTO> 0 "s_register_operand" "=w") | |
3000 | (fix:<V_CVTTO> (match_operand:VCVTF 1 "s_register_operand" "w")))] | |
3001 | "TARGET_NEON" | |
3002 | "vcvt.s32.f32\t%<V_reg>0, %<V_reg>1" | |
32093010 | 3003 | [(set_attr "type" "neon_fp_to_int_<V_elem_ch><q>")] |
741cdc29 | 3004 | ) |
3005 | ||
3006 | (define_insn "fixuns_trunc<mode><V_cvtto>2" | |
3007 | [(set (match_operand:<V_CVTTO> 0 "s_register_operand" "=w") | |
3008 | (unsigned_fix:<V_CVTTO> (match_operand:VCVTF 1 "s_register_operand" "w")))] | |
3009 | "TARGET_NEON" | |
3010 | "vcvt.u32.f32\t%<V_reg>0, %<V_reg>1" | |
32093010 | 3011 | [(set_attr "type" "neon_fp_to_int_<V_elem_ch><q>")] |
741cdc29 | 3012 | ) |
3013 | ||
25a124b3 | 3014 | (define_insn "neon_vcvt<sup><mode>" |
d98a3884 | 3015 | [(set (match_operand:<V_CVTTO> 0 "s_register_operand" "=w") |
25a124b3 | 3016 | (unspec:<V_CVTTO> [(match_operand:VCVTF 1 "s_register_operand" "w")] |
3017 | VCVT_US))] | |
d98a3884 | 3018 | "TARGET_NEON" |
25a124b3 | 3019 | "vcvt.<sup>%#32.f32\t%<V_reg>0, %<V_reg>1" |
32093010 | 3020 | [(set_attr "type" "neon_fp_to_int_<V_elem_ch><q>")] |
bcaec148 | 3021 | ) |
d98a3884 | 3022 | |
25a124b3 | 3023 | (define_insn "neon_vcvt<sup><mode>" |
d98a3884 | 3024 | [(set (match_operand:<V_CVTTO> 0 "s_register_operand" "=w") |
25a124b3 | 3025 | (unspec:<V_CVTTO> [(match_operand:VCVTI 1 "s_register_operand" "w")] |
3026 | VCVT_US))] | |
d98a3884 | 3027 | "TARGET_NEON" |
25a124b3 | 3028 | "vcvt.f32.<sup>%#32\t%<V_reg>0, %<V_reg>1" |
32093010 | 3029 | [(set_attr "type" "neon_int_to_fp_<V_elem_ch><q>")] |
bcaec148 | 3030 | ) |
d98a3884 | 3031 | |
8518cada | 3032 | (define_insn "neon_vcvtv4sfv4hf" |
3033 | [(set (match_operand:V4SF 0 "s_register_operand" "=w") | |
3034 | (unspec:V4SF [(match_operand:V4HF 1 "s_register_operand" "w")] | |
3035 | UNSPEC_VCVT))] | |
3036 | "TARGET_NEON && TARGET_FP16" | |
3037 | "vcvt.f32.f16\t%q0, %P1" | |
32093010 | 3038 | [(set_attr "type" "neon_fp_cvt_widen_h")] |
8518cada | 3039 | ) |
3040 | ||
3041 | (define_insn "neon_vcvtv4hfv4sf" | |
3042 | [(set (match_operand:V4HF 0 "s_register_operand" "=w") | |
3043 | (unspec:V4HF [(match_operand:V4SF 1 "s_register_operand" "w")] | |
3044 | UNSPEC_VCVT))] | |
3045 | "TARGET_NEON && TARGET_FP16" | |
3046 | "vcvt.f16.f32\t%P0, %q1" | |
32093010 | 3047 | [(set_attr "type" "neon_fp_cvt_narrow_s_q")] |
8518cada | 3048 | ) |
3049 | ||
25a124b3 | 3050 | (define_insn "neon_vcvt<sup>_n<mode>" |
d98a3884 | 3051 | [(set (match_operand:<V_CVTTO> 0 "s_register_operand" "=w") |
3052 | (unspec:<V_CVTTO> [(match_operand:VCVTF 1 "s_register_operand" "w") | |
25a124b3 | 3053 | (match_operand:SI 2 "immediate_operand" "i")] |
3054 | VCVT_US_N))] | |
d98a3884 | 3055 | "TARGET_NEON" |
8521669a | 3056 | { |
3057 | neon_const_bounds (operands[2], 1, 33); | |
25a124b3 | 3058 | return "vcvt.<sup>%#32.f32\t%<V_reg>0, %<V_reg>1, %2"; |
8521669a | 3059 | } |
32093010 | 3060 | [(set_attr "type" "neon_fp_to_int_<V_elem_ch><q>")] |
bcaec148 | 3061 | ) |
d98a3884 | 3062 | |
25a124b3 | 3063 | (define_insn "neon_vcvt<sup>_n<mode>" |
d98a3884 | 3064 | [(set (match_operand:<V_CVTTO> 0 "s_register_operand" "=w") |
3065 | (unspec:<V_CVTTO> [(match_operand:VCVTI 1 "s_register_operand" "w") | |
25a124b3 | 3066 | (match_operand:SI 2 "immediate_operand" "i")] |
3067 | VCVT_US_N))] | |
d98a3884 | 3068 | "TARGET_NEON" |
8521669a | 3069 | { |
3070 | neon_const_bounds (operands[2], 1, 33); | |
25a124b3 | 3071 | return "vcvt.f32.<sup>%#32\t%<V_reg>0, %<V_reg>1, %2"; |
8521669a | 3072 | } |
32093010 | 3073 | [(set_attr "type" "neon_int_to_fp_<V_elem_ch><q>")] |
bcaec148 | 3074 | ) |
d98a3884 | 3075 | |
3076 | (define_insn "neon_vmovn<mode>" | |
3077 | [(set (match_operand:<V_narrow> 0 "s_register_operand" "=w") | |
25a124b3 | 3078 | (unspec:<V_narrow> [(match_operand:VN 1 "s_register_operand" "w")] |
d98a3884 | 3079 | UNSPEC_VMOVN))] |
3080 | "TARGET_NEON" | |
bcaec148 | 3081 | "vmovn.<V_if_elem>\t%P0, %q1" |
32093010 | 3082 | [(set_attr "type" "neon_shift_imm_narrow_q")] |
bcaec148 | 3083 | ) |
d98a3884 | 3084 | |
25a124b3 | 3085 | (define_insn "neon_vqmovn<sup><mode>" |
d98a3884 | 3086 | [(set (match_operand:<V_narrow> 0 "s_register_operand" "=w") |
25a124b3 | 3087 | (unspec:<V_narrow> [(match_operand:VN 1 "s_register_operand" "w")] |
3088 | VQMOVN))] | |
d98a3884 | 3089 | "TARGET_NEON" |
25a124b3 | 3090 | "vqmovn.<sup>%#<V_sz_elem>\t%P0, %q1" |
32093010 | 3091 | [(set_attr "type" "neon_sat_shift_imm_narrow_q")] |
bcaec148 | 3092 | ) |
d98a3884 | 3093 | |
3094 | (define_insn "neon_vqmovun<mode>" | |
3095 | [(set (match_operand:<V_narrow> 0 "s_register_operand" "=w") | |
25a124b3 | 3096 | (unspec:<V_narrow> [(match_operand:VN 1 "s_register_operand" "w")] |
d98a3884 | 3097 | UNSPEC_VQMOVUN))] |
3098 | "TARGET_NEON" | |
bcaec148 | 3099 | "vqmovun.<V_s_elem>\t%P0, %q1" |
32093010 | 3100 | [(set_attr "type" "neon_sat_shift_imm_narrow_q")] |
bcaec148 | 3101 | ) |
d98a3884 | 3102 | |
25a124b3 | 3103 | (define_insn "neon_vmovl<sup><mode>" |
d98a3884 | 3104 | [(set (match_operand:<V_widen> 0 "s_register_operand" "=w") |
25a124b3 | 3105 | (unspec:<V_widen> [(match_operand:VW 1 "s_register_operand" "w")] |
3106 | VMOVL))] | |
d98a3884 | 3107 | "TARGET_NEON" |
25a124b3 | 3108 | "vmovl.<sup>%#<V_sz_elem>\t%q0, %P1" |
32093010 | 3109 | [(set_attr "type" "neon_shift_imm_long")] |
bcaec148 | 3110 | ) |
d98a3884 | 3111 | |
3112 | (define_insn "neon_vmul_lane<mode>" | |
3113 | [(set (match_operand:VMD 0 "s_register_operand" "=w") | |
3114 | (unspec:VMD [(match_operand:VMD 1 "s_register_operand" "w") | |
3115 | (match_operand:VMD 2 "s_register_operand" | |
3116 | "<scalar_mul_constraint>") | |
25a124b3 | 3117 | (match_operand:SI 3 "immediate_operand" "i")] |
d98a3884 | 3118 | UNSPEC_VMUL_LANE))] |
3119 | "TARGET_NEON" | |
8521669a | 3120 | { |
8521669a | 3121 | return "vmul.<V_if_elem>\t%P0, %P1, %P2[%c3]"; |
3122 | } | |
52432540 | 3123 | [(set (attr "type") |
0bf497f5 | 3124 | (if_then_else (match_test "<Is_float_mode>") |
32093010 | 3125 | (const_string "neon_fp_mul_s_scalar<q>") |
3126 | (const_string "neon_mul_<V_elem_ch>_scalar<q>")))] | |
bcaec148 | 3127 | ) |
d98a3884 | 3128 | |
3129 | (define_insn "neon_vmul_lane<mode>" | |
3130 | [(set (match_operand:VMQ 0 "s_register_operand" "=w") | |
3131 | (unspec:VMQ [(match_operand:VMQ 1 "s_register_operand" "w") | |
3132 | (match_operand:<V_HALF> 2 "s_register_operand" | |
3133 | "<scalar_mul_constraint>") | |
25a124b3 | 3134 | (match_operand:SI 3 "immediate_operand" "i")] |
d98a3884 | 3135 | UNSPEC_VMUL_LANE))] |
3136 | "TARGET_NEON" | |
8521669a | 3137 | { |
8521669a | 3138 | return "vmul.<V_if_elem>\t%q0, %q1, %P2[%c3]"; |
3139 | } | |
52432540 | 3140 | [(set (attr "type") |
0bf497f5 | 3141 | (if_then_else (match_test "<Is_float_mode>") |
32093010 | 3142 | (const_string "neon_fp_mul_s_scalar<q>") |
3143 | (const_string "neon_mul_<V_elem_ch>_scalar<q>")))] | |
bcaec148 | 3144 | ) |
d98a3884 | 3145 | |
25a124b3 | 3146 | (define_insn "neon_vmull<sup>_lane<mode>" |
d98a3884 | 3147 | [(set (match_operand:<V_widen> 0 "s_register_operand" "=w") |
3148 | (unspec:<V_widen> [(match_operand:VMDI 1 "s_register_operand" "w") | |
3149 | (match_operand:VMDI 2 "s_register_operand" | |
3150 | "<scalar_mul_constraint>") | |
25a124b3 | 3151 | (match_operand:SI 3 "immediate_operand" "i")] |
3152 | VMULL_LANE))] | |
d98a3884 | 3153 | "TARGET_NEON" |
8521669a | 3154 | { |
25a124b3 | 3155 | return "vmull.<sup>%#<V_sz_elem>\t%q0, %P1, %P2[%c3]"; |
8521669a | 3156 | } |
32093010 | 3157 | [(set_attr "type" "neon_mul_<V_elem_ch>_scalar_long")] |
bcaec148 | 3158 | ) |
d98a3884 | 3159 | |
3160 | (define_insn "neon_vqdmull_lane<mode>" | |
3161 | [(set (match_operand:<V_widen> 0 "s_register_operand" "=w") | |
3162 | (unspec:<V_widen> [(match_operand:VMDI 1 "s_register_operand" "w") | |
3163 | (match_operand:VMDI 2 "s_register_operand" | |
3164 | "<scalar_mul_constraint>") | |
25a124b3 | 3165 | (match_operand:SI 3 "immediate_operand" "i")] |
d98a3884 | 3166 | UNSPEC_VQDMULL_LANE))] |
3167 | "TARGET_NEON" | |
8521669a | 3168 | { |
8521669a | 3169 | return "vqdmull.<V_s_elem>\t%q0, %P1, %P2[%c3]"; |
3170 | } | |
32093010 | 3171 | [(set_attr "type" "neon_sat_mul_<V_elem_ch>_scalar_long")] |
bcaec148 | 3172 | ) |
d98a3884 | 3173 | |
25a124b3 | 3174 | (define_insn "neon_vq<r>dmulh_lane<mode>" |
d98a3884 | 3175 | [(set (match_operand:VMQI 0 "s_register_operand" "=w") |
3176 | (unspec:VMQI [(match_operand:VMQI 1 "s_register_operand" "w") | |
3177 | (match_operand:<V_HALF> 2 "s_register_operand" | |
3178 | "<scalar_mul_constraint>") | |
25a124b3 | 3179 | (match_operand:SI 3 "immediate_operand" "i")] |
3180 | VQDMULH_LANE))] | |
d98a3884 | 3181 | "TARGET_NEON" |
8521669a | 3182 | { |
25a124b3 | 3183 | return "vq<r>dmulh.<V_s_elem>\t%q0, %q1, %P2[%c3]"; |
8521669a | 3184 | } |
32093010 | 3185 | [(set_attr "type" "neon_sat_mul_<V_elem_ch>_scalar_q")] |
bcaec148 | 3186 | ) |
d98a3884 | 3187 | |
25a124b3 | 3188 | (define_insn "neon_vq<r>dmulh_lane<mode>" |
d98a3884 | 3189 | [(set (match_operand:VMDI 0 "s_register_operand" "=w") |
3190 | (unspec:VMDI [(match_operand:VMDI 1 "s_register_operand" "w") | |
3191 | (match_operand:VMDI 2 "s_register_operand" | |
3192 | "<scalar_mul_constraint>") | |
25a124b3 | 3193 | (match_operand:SI 3 "immediate_operand" "i")] |
3194 | VQDMULH_LANE))] | |
d98a3884 | 3195 | "TARGET_NEON" |
8521669a | 3196 | { |
25a124b3 | 3197 | return "vq<r>dmulh.<V_s_elem>\t%P0, %P1, %P2[%c3]"; |
8521669a | 3198 | } |
32093010 | 3199 | [(set_attr "type" "neon_sat_mul_<V_elem_ch>_scalar_q")] |
bcaec148 | 3200 | ) |
d98a3884 | 3201 | |
32833c04 | 3202 | ;; vqrdmlah_lane, vqrdmlsh_lane |
3203 | (define_insn "neon_vqrdml<VQRDMLH_AS:neon_rdma_as>h_lane<mode>" | |
3204 | [(set (match_operand:VMQI 0 "s_register_operand" "=w") | |
3205 | (unspec:VMQI [(match_operand:VMQI 1 "s_register_operand" "0") | |
3206 | (match_operand:VMQI 2 "s_register_operand" "w") | |
3207 | (match_operand:<V_HALF> 3 "s_register_operand" | |
3208 | "<scalar_mul_constraint>") | |
3209 | (match_operand:SI 4 "immediate_operand" "i")] | |
3210 | VQRDMLH_AS))] | |
3211 | "TARGET_NEON_RDMA" | |
3212 | { | |
3213 | return | |
3214 | "vqrdml<VQRDMLH_AS:neon_rdma_as>h.<V_s_elem>\t%q0, %q2, %P3[%c4]"; | |
3215 | } | |
3216 | [(set_attr "type" "neon_mla_<V_elem_ch>_scalar<q>")] | |
3217 | ) | |
3218 | ||
3219 | (define_insn "neon_vqrdml<VQRDMLH_AS:neon_rdma_as>h_lane<mode>" | |
3220 | [(set (match_operand:VMDI 0 "s_register_operand" "=w") | |
3221 | (unspec:VMDI [(match_operand:VMDI 1 "s_register_operand" "0") | |
3222 | (match_operand:VMDI 2 "s_register_operand" "w") | |
3223 | (match_operand:VMDI 3 "s_register_operand" | |
3224 | "<scalar_mul_constraint>") | |
3225 | (match_operand:SI 4 "immediate_operand" "i")] | |
3226 | VQRDMLH_AS))] | |
3227 | "TARGET_NEON_RDMA" | |
3228 | { | |
3229 | return | |
3230 | "vqrdml<VQRDMLH_AS:neon_rdma_as>h.<V_s_elem>\t%P0, %P2, %P3[%c4]"; | |
3231 | } | |
3232 | [(set_attr "type" "neon_mla_<V_elem_ch>_scalar")] | |
3233 | ) | |
3234 | ||
d98a3884 | 3235 | (define_insn "neon_vmla_lane<mode>" |
3236 | [(set (match_operand:VMD 0 "s_register_operand" "=w") | |
3237 | (unspec:VMD [(match_operand:VMD 1 "s_register_operand" "0") | |
3238 | (match_operand:VMD 2 "s_register_operand" "w") | |
3239 | (match_operand:VMD 3 "s_register_operand" | |
3240 | "<scalar_mul_constraint>") | |
25a124b3 | 3241 | (match_operand:SI 4 "immediate_operand" "i")] |
d98a3884 | 3242 | UNSPEC_VMLA_LANE))] |
3243 | "TARGET_NEON" | |
8521669a | 3244 | { |
8521669a | 3245 | return "vmla.<V_if_elem>\t%P0, %P2, %P3[%c4]"; |
3246 | } | |
52432540 | 3247 | [(set (attr "type") |
0bf497f5 | 3248 | (if_then_else (match_test "<Is_float_mode>") |
32093010 | 3249 | (const_string "neon_fp_mla_s_scalar<q>") |
3250 | (const_string "neon_mla_<V_elem_ch>_scalar<q>")))] | |
bcaec148 | 3251 | ) |
d98a3884 | 3252 | |
3253 | (define_insn "neon_vmla_lane<mode>" | |
3254 | [(set (match_operand:VMQ 0 "s_register_operand" "=w") | |
3255 | (unspec:VMQ [(match_operand:VMQ 1 "s_register_operand" "0") | |
3256 | (match_operand:VMQ 2 "s_register_operand" "w") | |
3257 | (match_operand:<V_HALF> 3 "s_register_operand" | |
3258 | "<scalar_mul_constraint>") | |
25a124b3 | 3259 | (match_operand:SI 4 "immediate_operand" "i")] |
d98a3884 | 3260 | UNSPEC_VMLA_LANE))] |
3261 | "TARGET_NEON" | |
8521669a | 3262 | { |
8521669a | 3263 | return "vmla.<V_if_elem>\t%q0, %q2, %P3[%c4]"; |
3264 | } | |
52432540 | 3265 | [(set (attr "type") |
0bf497f5 | 3266 | (if_then_else (match_test "<Is_float_mode>") |
32093010 | 3267 | (const_string "neon_fp_mla_s_scalar<q>") |
3268 | (const_string "neon_mla_<V_elem_ch>_scalar<q>")))] | |
bcaec148 | 3269 | ) |
d98a3884 | 3270 | |
25a124b3 | 3271 | (define_insn "neon_vmlal<sup>_lane<mode>" |
d98a3884 | 3272 | [(set (match_operand:<V_widen> 0 "s_register_operand" "=w") |
3273 | (unspec:<V_widen> [(match_operand:<V_widen> 1 "s_register_operand" "0") | |
3274 | (match_operand:VMDI 2 "s_register_operand" "w") | |
3275 | (match_operand:VMDI 3 "s_register_operand" | |
3276 | "<scalar_mul_constraint>") | |
25a124b3 | 3277 | (match_operand:SI 4 "immediate_operand" "i")] |
3278 | VMLAL_LANE))] | |
d98a3884 | 3279 | "TARGET_NEON" |
8521669a | 3280 | { |
25a124b3 | 3281 | return "vmlal.<sup>%#<V_sz_elem>\t%q0, %P2, %P3[%c4]"; |
8521669a | 3282 | } |
32093010 | 3283 | [(set_attr "type" "neon_mla_<V_elem_ch>_scalar_long")] |
bcaec148 | 3284 | ) |
d98a3884 | 3285 | |
3286 | (define_insn "neon_vqdmlal_lane<mode>" | |
3287 | [(set (match_operand:<V_widen> 0 "s_register_operand" "=w") | |
3288 | (unspec:<V_widen> [(match_operand:<V_widen> 1 "s_register_operand" "0") | |
3289 | (match_operand:VMDI 2 "s_register_operand" "w") | |
3290 | (match_operand:VMDI 3 "s_register_operand" | |
3291 | "<scalar_mul_constraint>") | |
25a124b3 | 3292 | (match_operand:SI 4 "immediate_operand" "i")] |
d98a3884 | 3293 | UNSPEC_VQDMLAL_LANE))] |
3294 | "TARGET_NEON" | |
8521669a | 3295 | { |
8521669a | 3296 | return "vqdmlal.<V_s_elem>\t%q0, %P2, %P3[%c4]"; |
3297 | } | |
32093010 | 3298 | [(set_attr "type" "neon_sat_mla_<V_elem_ch>_scalar_long")] |
bcaec148 | 3299 | ) |
d98a3884 | 3300 | |
3301 | (define_insn "neon_vmls_lane<mode>" | |
3302 | [(set (match_operand:VMD 0 "s_register_operand" "=w") | |
3303 | (unspec:VMD [(match_operand:VMD 1 "s_register_operand" "0") | |
3304 | (match_operand:VMD 2 "s_register_operand" "w") | |
3305 | (match_operand:VMD 3 "s_register_operand" | |
3306 | "<scalar_mul_constraint>") | |
25a124b3 | 3307 | (match_operand:SI 4 "immediate_operand" "i")] |
d98a3884 | 3308 | UNSPEC_VMLS_LANE))] |
3309 | "TARGET_NEON" | |
8521669a | 3310 | { |
8521669a | 3311 | return "vmls.<V_if_elem>\t%P0, %P2, %P3[%c4]"; |
3312 | } | |
52432540 | 3313 | [(set (attr "type") |
0bf497f5 | 3314 | (if_then_else (match_test "<Is_float_mode>") |
32093010 | 3315 | (const_string "neon_fp_mla_s_scalar<q>") |
3316 | (const_string "neon_mla_<V_elem_ch>_scalar<q>")))] | |
bcaec148 | 3317 | ) |
d98a3884 | 3318 | |
3319 | (define_insn "neon_vmls_lane<mode>" | |
3320 | [(set (match_operand:VMQ 0 "s_register_operand" "=w") | |
3321 | (unspec:VMQ [(match_operand:VMQ 1 "s_register_operand" "0") | |
3322 | (match_operand:VMQ 2 "s_register_operand" "w") | |
3323 | (match_operand:<V_HALF> 3 "s_register_operand" | |
3324 | "<scalar_mul_constraint>") | |
25a124b3 | 3325 | (match_operand:SI 4 "immediate_operand" "i")] |
d98a3884 | 3326 | UNSPEC_VMLS_LANE))] |
3327 | "TARGET_NEON" | |
8521669a | 3328 | { |
8521669a | 3329 | return "vmls.<V_if_elem>\t%q0, %q2, %P3[%c4]"; |
3330 | } | |
52432540 | 3331 | [(set (attr "type") |
0bf497f5 | 3332 | (if_then_else (match_test "<Is_float_mode>") |
32093010 | 3333 | (const_string "neon_fp_mla_s_scalar<q>") |
3334 | (const_string "neon_mla_<V_elem_ch>_scalar<q>")))] | |
bcaec148 | 3335 | ) |
d98a3884 | 3336 | |
25a124b3 | 3337 | (define_insn "neon_vmlsl<sup>_lane<mode>" |
d98a3884 | 3338 | [(set (match_operand:<V_widen> 0 "s_register_operand" "=w") |
3339 | (unspec:<V_widen> [(match_operand:<V_widen> 1 "s_register_operand" "0") | |
3340 | (match_operand:VMDI 2 "s_register_operand" "w") | |
3341 | (match_operand:VMDI 3 "s_register_operand" | |
3342 | "<scalar_mul_constraint>") | |
25a124b3 | 3343 | (match_operand:SI 4 "immediate_operand" "i")] |
3344 | VMLSL_LANE))] | |
d98a3884 | 3345 | "TARGET_NEON" |
8521669a | 3346 | { |
25a124b3 | 3347 | return "vmlsl.<sup>%#<V_sz_elem>\t%q0, %P2, %P3[%c4]"; |
8521669a | 3348 | } |
32093010 | 3349 | [(set_attr "type" "neon_mla_<V_elem_ch>_scalar_long")] |
bcaec148 | 3350 | ) |
d98a3884 | 3351 | |
3352 | (define_insn "neon_vqdmlsl_lane<mode>" | |
3353 | [(set (match_operand:<V_widen> 0 "s_register_operand" "=w") | |
3354 | (unspec:<V_widen> [(match_operand:<V_widen> 1 "s_register_operand" "0") | |
3355 | (match_operand:VMDI 2 "s_register_operand" "w") | |
3356 | (match_operand:VMDI 3 "s_register_operand" | |
3357 | "<scalar_mul_constraint>") | |
25a124b3 | 3358 | (match_operand:SI 4 "immediate_operand" "i")] |
d98a3884 | 3359 | UNSPEC_VQDMLSL_LANE))] |
3360 | "TARGET_NEON" | |
8521669a | 3361 | { |
8521669a | 3362 | return "vqdmlsl.<V_s_elem>\t%q0, %P2, %P3[%c4]"; |
3363 | } | |
32093010 | 3364 | [(set_attr "type" "neon_sat_mla_<V_elem_ch>_scalar_long")] |
bcaec148 | 3365 | ) |
d98a3884 | 3366 | |
3367 | ; FIXME: For the "_n" multiply/multiply-accumulate insns, we copy a value in a | |
3368 | ; core register into a temp register, then use a scalar taken from that. This | |
3369 | ; isn't an optimal solution if e.g. the scalar has just been read from memory | |
3370 | ; or extracted from another vector. The latter case it's currently better to | |
3371 | ; use the "_lane" variant, and the former case can probably be implemented | |
3372 | ; using vld1_lane, but that hasn't been done yet. | |
3373 | ||
3374 | (define_expand "neon_vmul_n<mode>" | |
3375 | [(match_operand:VMD 0 "s_register_operand" "") | |
3376 | (match_operand:VMD 1 "s_register_operand" "") | |
25a124b3 | 3377 | (match_operand:<V_elem> 2 "s_register_operand" "")] |
d98a3884 | 3378 | "TARGET_NEON" |
3379 | { | |
3380 | rtx tmp = gen_reg_rtx (<MODE>mode); | |
3381 | emit_insn (gen_neon_vset_lane<mode> (tmp, operands[2], tmp, const0_rtx)); | |
3382 | emit_insn (gen_neon_vmul_lane<mode> (operands[0], operands[1], tmp, | |
25a124b3 | 3383 | const0_rtx)); |
d98a3884 | 3384 | DONE; |
3385 | }) | |
3386 | ||
3387 | (define_expand "neon_vmul_n<mode>" | |
3388 | [(match_operand:VMQ 0 "s_register_operand" "") | |
3389 | (match_operand:VMQ 1 "s_register_operand" "") | |
25a124b3 | 3390 | (match_operand:<V_elem> 2 "s_register_operand" "")] |
d98a3884 | 3391 | "TARGET_NEON" |
3392 | { | |
3393 | rtx tmp = gen_reg_rtx (<V_HALF>mode); | |
3394 | emit_insn (gen_neon_vset_lane<V_half> (tmp, operands[2], tmp, const0_rtx)); | |
3395 | emit_insn (gen_neon_vmul_lane<mode> (operands[0], operands[1], tmp, | |
25a124b3 | 3396 | const0_rtx)); |
d98a3884 | 3397 | DONE; |
3398 | }) | |
3399 | ||
25a124b3 | 3400 | (define_expand "neon_vmulls_n<mode>" |
d98a3884 | 3401 | [(match_operand:<V_widen> 0 "s_register_operand" "") |
3402 | (match_operand:VMDI 1 "s_register_operand" "") | |
25a124b3 | 3403 | (match_operand:<V_elem> 2 "s_register_operand" "")] |
d98a3884 | 3404 | "TARGET_NEON" |
3405 | { | |
3406 | rtx tmp = gen_reg_rtx (<MODE>mode); | |
3407 | emit_insn (gen_neon_vset_lane<mode> (tmp, operands[2], tmp, const0_rtx)); | |
25a124b3 | 3408 | emit_insn (gen_neon_vmulls_lane<mode> (operands[0], operands[1], tmp, |
3409 | const0_rtx)); | |
3410 | DONE; | |
3411 | }) | |
3412 | ||
3413 | (define_expand "neon_vmullu_n<mode>" | |
3414 | [(match_operand:<V_widen> 0 "s_register_operand" "") | |
3415 | (match_operand:VMDI 1 "s_register_operand" "") | |
3416 | (match_operand:<V_elem> 2 "s_register_operand" "")] | |
3417 | "TARGET_NEON" | |
3418 | { | |
3419 | rtx tmp = gen_reg_rtx (<MODE>mode); | |
3420 | emit_insn (gen_neon_vset_lane<mode> (tmp, operands[2], tmp, const0_rtx)); | |
3421 | emit_insn (gen_neon_vmullu_lane<mode> (operands[0], operands[1], tmp, | |
3422 | const0_rtx)); | |
d98a3884 | 3423 | DONE; |
3424 | }) | |
3425 | ||
3426 | (define_expand "neon_vqdmull_n<mode>" | |
3427 | [(match_operand:<V_widen> 0 "s_register_operand" "") | |
3428 | (match_operand:VMDI 1 "s_register_operand" "") | |
25a124b3 | 3429 | (match_operand:<V_elem> 2 "s_register_operand" "")] |
d98a3884 | 3430 | "TARGET_NEON" |
3431 | { | |
3432 | rtx tmp = gen_reg_rtx (<MODE>mode); | |
3433 | emit_insn (gen_neon_vset_lane<mode> (tmp, operands[2], tmp, const0_rtx)); | |
3434 | emit_insn (gen_neon_vqdmull_lane<mode> (operands[0], operands[1], tmp, | |
25a124b3 | 3435 | const0_rtx)); |
d98a3884 | 3436 | DONE; |
3437 | }) | |
3438 | ||
3439 | (define_expand "neon_vqdmulh_n<mode>" | |
3440 | [(match_operand:VMDI 0 "s_register_operand" "") | |
3441 | (match_operand:VMDI 1 "s_register_operand" "") | |
25a124b3 | 3442 | (match_operand:<V_elem> 2 "s_register_operand" "")] |
d98a3884 | 3443 | "TARGET_NEON" |
3444 | { | |
3445 | rtx tmp = gen_reg_rtx (<MODE>mode); | |
3446 | emit_insn (gen_neon_vset_lane<mode> (tmp, operands[2], tmp, const0_rtx)); | |
3447 | emit_insn (gen_neon_vqdmulh_lane<mode> (operands[0], operands[1], tmp, | |
25a124b3 | 3448 | const0_rtx)); |
3449 | DONE; | |
3450 | }) | |
3451 | ||
3452 | (define_expand "neon_vqrdmulh_n<mode>" | |
3453 | [(match_operand:VMDI 0 "s_register_operand" "") | |
3454 | (match_operand:VMDI 1 "s_register_operand" "") | |
3455 | (match_operand:<V_elem> 2 "s_register_operand" "")] | |
3456 | "TARGET_NEON" | |
3457 | { | |
3458 | rtx tmp = gen_reg_rtx (<MODE>mode); | |
3459 | emit_insn (gen_neon_vset_lane<mode> (tmp, operands[2], tmp, const0_rtx)); | |
3460 | emit_insn (gen_neon_vqrdmulh_lane<mode> (operands[0], operands[1], tmp, | |
3461 | const0_rtx)); | |
d98a3884 | 3462 | DONE; |
3463 | }) | |
3464 | ||
3465 | (define_expand "neon_vqdmulh_n<mode>" | |
3466 | [(match_operand:VMQI 0 "s_register_operand" "") | |
3467 | (match_operand:VMQI 1 "s_register_operand" "") | |
25a124b3 | 3468 | (match_operand:<V_elem> 2 "s_register_operand" "")] |
d98a3884 | 3469 | "TARGET_NEON" |
3470 | { | |
3471 | rtx tmp = gen_reg_rtx (<V_HALF>mode); | |
3472 | emit_insn (gen_neon_vset_lane<V_half> (tmp, operands[2], tmp, const0_rtx)); | |
3473 | emit_insn (gen_neon_vqdmulh_lane<mode> (operands[0], operands[1], tmp, | |
25a124b3 | 3474 | const0_rtx)); |
3475 | DONE; | |
3476 | }) | |
3477 | ||
3478 | (define_expand "neon_vqrdmulh_n<mode>" | |
3479 | [(match_operand:VMQI 0 "s_register_operand" "") | |
3480 | (match_operand:VMQI 1 "s_register_operand" "") | |
3481 | (match_operand:<V_elem> 2 "s_register_operand" "")] | |
3482 | "TARGET_NEON" | |
3483 | { | |
3484 | rtx tmp = gen_reg_rtx (<V_HALF>mode); | |
3485 | emit_insn (gen_neon_vset_lane<V_half> (tmp, operands[2], tmp, const0_rtx)); | |
3486 | emit_insn (gen_neon_vqrdmulh_lane<mode> (operands[0], operands[1], tmp, | |
3487 | const0_rtx)); | |
d98a3884 | 3488 | DONE; |
3489 | }) | |
3490 | ||
3491 | (define_expand "neon_vmla_n<mode>" | |
3492 | [(match_operand:VMD 0 "s_register_operand" "") | |
3493 | (match_operand:VMD 1 "s_register_operand" "") | |
3494 | (match_operand:VMD 2 "s_register_operand" "") | |
25a124b3 | 3495 | (match_operand:<V_elem> 3 "s_register_operand" "")] |
d98a3884 | 3496 | "TARGET_NEON" |
3497 | { | |
3498 | rtx tmp = gen_reg_rtx (<MODE>mode); | |
3499 | emit_insn (gen_neon_vset_lane<mode> (tmp, operands[3], tmp, const0_rtx)); | |
3500 | emit_insn (gen_neon_vmla_lane<mode> (operands[0], operands[1], operands[2], | |
25a124b3 | 3501 | tmp, const0_rtx)); |
d98a3884 | 3502 | DONE; |
3503 | }) | |
3504 | ||
3505 | (define_expand "neon_vmla_n<mode>" | |
3506 | [(match_operand:VMQ 0 "s_register_operand" "") | |
3507 | (match_operand:VMQ 1 "s_register_operand" "") | |
3508 | (match_operand:VMQ 2 "s_register_operand" "") | |
25a124b3 | 3509 | (match_operand:<V_elem> 3 "s_register_operand" "")] |
d98a3884 | 3510 | "TARGET_NEON" |
3511 | { | |
3512 | rtx tmp = gen_reg_rtx (<V_HALF>mode); | |
3513 | emit_insn (gen_neon_vset_lane<V_half> (tmp, operands[3], tmp, const0_rtx)); | |
3514 | emit_insn (gen_neon_vmla_lane<mode> (operands[0], operands[1], operands[2], | |
25a124b3 | 3515 | tmp, const0_rtx)); |
d98a3884 | 3516 | DONE; |
3517 | }) | |
3518 | ||
25a124b3 | 3519 | (define_expand "neon_vmlals_n<mode>" |
d98a3884 | 3520 | [(match_operand:<V_widen> 0 "s_register_operand" "") |
3521 | (match_operand:<V_widen> 1 "s_register_operand" "") | |
3522 | (match_operand:VMDI 2 "s_register_operand" "") | |
25a124b3 | 3523 | (match_operand:<V_elem> 3 "s_register_operand" "")] |
d98a3884 | 3524 | "TARGET_NEON" |
3525 | { | |
3526 | rtx tmp = gen_reg_rtx (<MODE>mode); | |
3527 | emit_insn (gen_neon_vset_lane<mode> (tmp, operands[3], tmp, const0_rtx)); | |
25a124b3 | 3528 | emit_insn (gen_neon_vmlals_lane<mode> (operands[0], operands[1], operands[2], |
3529 | tmp, const0_rtx)); | |
3530 | DONE; | |
3531 | }) | |
3532 | ||
3533 | (define_expand "neon_vmlalu_n<mode>" | |
3534 | [(match_operand:<V_widen> 0 "s_register_operand" "") | |
3535 | (match_operand:<V_widen> 1 "s_register_operand" "") | |
3536 | (match_operand:VMDI 2 "s_register_operand" "") | |
3537 | (match_operand:<V_elem> 3 "s_register_operand" "")] | |
3538 | "TARGET_NEON" | |
3539 | { | |
3540 | rtx tmp = gen_reg_rtx (<MODE>mode); | |
3541 | emit_insn (gen_neon_vset_lane<mode> (tmp, operands[3], tmp, const0_rtx)); | |
3542 | emit_insn (gen_neon_vmlalu_lane<mode> (operands[0], operands[1], operands[2], | |
3543 | tmp, const0_rtx)); | |
d98a3884 | 3544 | DONE; |
3545 | }) | |
3546 | ||
3547 | (define_expand "neon_vqdmlal_n<mode>" | |
3548 | [(match_operand:<V_widen> 0 "s_register_operand" "") | |
3549 | (match_operand:<V_widen> 1 "s_register_operand" "") | |
3550 | (match_operand:VMDI 2 "s_register_operand" "") | |
25a124b3 | 3551 | (match_operand:<V_elem> 3 "s_register_operand" "")] |
d98a3884 | 3552 | "TARGET_NEON" |
3553 | { | |
3554 | rtx tmp = gen_reg_rtx (<MODE>mode); | |
3555 | emit_insn (gen_neon_vset_lane<mode> (tmp, operands[3], tmp, const0_rtx)); | |
3556 | emit_insn (gen_neon_vqdmlal_lane<mode> (operands[0], operands[1], operands[2], | |
25a124b3 | 3557 | tmp, const0_rtx)); |
d98a3884 | 3558 | DONE; |
3559 | }) | |
3560 | ||
3561 | (define_expand "neon_vmls_n<mode>" | |
3562 | [(match_operand:VMD 0 "s_register_operand" "") | |
3563 | (match_operand:VMD 1 "s_register_operand" "") | |
3564 | (match_operand:VMD 2 "s_register_operand" "") | |
25a124b3 | 3565 | (match_operand:<V_elem> 3 "s_register_operand" "")] |
d98a3884 | 3566 | "TARGET_NEON" |
3567 | { | |
3568 | rtx tmp = gen_reg_rtx (<MODE>mode); | |
3569 | emit_insn (gen_neon_vset_lane<mode> (tmp, operands[3], tmp, const0_rtx)); | |
3570 | emit_insn (gen_neon_vmls_lane<mode> (operands[0], operands[1], operands[2], | |
25a124b3 | 3571 | tmp, const0_rtx)); |
d98a3884 | 3572 | DONE; |
3573 | }) | |
3574 | ||
3575 | (define_expand "neon_vmls_n<mode>" | |
3576 | [(match_operand:VMQ 0 "s_register_operand" "") | |
3577 | (match_operand:VMQ 1 "s_register_operand" "") | |
3578 | (match_operand:VMQ 2 "s_register_operand" "") | |
25a124b3 | 3579 | (match_operand:<V_elem> 3 "s_register_operand" "")] |
d98a3884 | 3580 | "TARGET_NEON" |
3581 | { | |
3582 | rtx tmp = gen_reg_rtx (<V_HALF>mode); | |
3583 | emit_insn (gen_neon_vset_lane<V_half> (tmp, operands[3], tmp, const0_rtx)); | |
3584 | emit_insn (gen_neon_vmls_lane<mode> (operands[0], operands[1], operands[2], | |
25a124b3 | 3585 | tmp, const0_rtx)); |
3586 | DONE; | |
3587 | }) | |
3588 | ||
3589 | (define_expand "neon_vmlsls_n<mode>" | |
3590 | [(match_operand:<V_widen> 0 "s_register_operand" "") | |
3591 | (match_operand:<V_widen> 1 "s_register_operand" "") | |
3592 | (match_operand:VMDI 2 "s_register_operand" "") | |
3593 | (match_operand:<V_elem> 3 "s_register_operand" "")] | |
3594 | "TARGET_NEON" | |
3595 | { | |
3596 | rtx tmp = gen_reg_rtx (<MODE>mode); | |
3597 | emit_insn (gen_neon_vset_lane<mode> (tmp, operands[3], tmp, const0_rtx)); | |
3598 | emit_insn (gen_neon_vmlsls_lane<mode> (operands[0], operands[1], operands[2], | |
3599 | tmp, const0_rtx)); | |
d98a3884 | 3600 | DONE; |
3601 | }) | |
3602 | ||
25a124b3 | 3603 | (define_expand "neon_vmlslu_n<mode>" |
d98a3884 | 3604 | [(match_operand:<V_widen> 0 "s_register_operand" "") |
3605 | (match_operand:<V_widen> 1 "s_register_operand" "") | |
3606 | (match_operand:VMDI 2 "s_register_operand" "") | |
25a124b3 | 3607 | (match_operand:<V_elem> 3 "s_register_operand" "")] |
d98a3884 | 3608 | "TARGET_NEON" |
3609 | { | |
3610 | rtx tmp = gen_reg_rtx (<MODE>mode); | |
3611 | emit_insn (gen_neon_vset_lane<mode> (tmp, operands[3], tmp, const0_rtx)); | |
25a124b3 | 3612 | emit_insn (gen_neon_vmlslu_lane<mode> (operands[0], operands[1], operands[2], |
3613 | tmp, const0_rtx)); | |
d98a3884 | 3614 | DONE; |
3615 | }) | |
3616 | ||
3617 | (define_expand "neon_vqdmlsl_n<mode>" | |
3618 | [(match_operand:<V_widen> 0 "s_register_operand" "") | |
3619 | (match_operand:<V_widen> 1 "s_register_operand" "") | |
3620 | (match_operand:VMDI 2 "s_register_operand" "") | |
25a124b3 | 3621 | (match_operand:<V_elem> 3 "s_register_operand" "")] |
d98a3884 | 3622 | "TARGET_NEON" |
3623 | { | |
3624 | rtx tmp = gen_reg_rtx (<MODE>mode); | |
3625 | emit_insn (gen_neon_vset_lane<mode> (tmp, operands[3], tmp, const0_rtx)); | |
3626 | emit_insn (gen_neon_vqdmlsl_lane<mode> (operands[0], operands[1], operands[2], | |
25a124b3 | 3627 | tmp, const0_rtx)); |
d98a3884 | 3628 | DONE; |
3629 | }) | |
3630 | ||
3631 | (define_insn "neon_vext<mode>" | |
3632 | [(set (match_operand:VDQX 0 "s_register_operand" "=w") | |
3633 | (unspec:VDQX [(match_operand:VDQX 1 "s_register_operand" "w") | |
3634 | (match_operand:VDQX 2 "s_register_operand" "w") | |
3635 | (match_operand:SI 3 "immediate_operand" "i")] | |
3636 | UNSPEC_VEXT))] | |
3637 | "TARGET_NEON" | |
8521669a | 3638 | { |
3639 | neon_const_bounds (operands[3], 0, GET_MODE_NUNITS (<MODE>mode)); | |
3640 | return "vext.<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2, %3"; | |
3641 | } | |
32093010 | 3642 | [(set_attr "type" "neon_ext<q>")] |
bcaec148 | 3643 | ) |
d98a3884 | 3644 | |
3645 | (define_insn "neon_vrev64<mode>" | |
3646 | [(set (match_operand:VDQ 0 "s_register_operand" "=w") | |
25a124b3 | 3647 | (unspec:VDQ [(match_operand:VDQ 1 "s_register_operand" "w")] |
d98a3884 | 3648 | UNSPEC_VREV64))] |
3649 | "TARGET_NEON" | |
bcaec148 | 3650 | "vrev64.<V_sz_elem>\t%<V_reg>0, %<V_reg>1" |
32093010 | 3651 | [(set_attr "type" "neon_rev<q>")] |
bcaec148 | 3652 | ) |
d98a3884 | 3653 | |
3654 | (define_insn "neon_vrev32<mode>" | |
3655 | [(set (match_operand:VX 0 "s_register_operand" "=w") | |
25a124b3 | 3656 | (unspec:VX [(match_operand:VX 1 "s_register_operand" "w")] |
d98a3884 | 3657 | UNSPEC_VREV32))] |
3658 | "TARGET_NEON" | |
bcaec148 | 3659 | "vrev32.<V_sz_elem>\t%<V_reg>0, %<V_reg>1" |
32093010 | 3660 | [(set_attr "type" "neon_rev<q>")] |
bcaec148 | 3661 | ) |
d98a3884 | 3662 | |
3663 | (define_insn "neon_vrev16<mode>" | |
3664 | [(set (match_operand:VE 0 "s_register_operand" "=w") | |
25a124b3 | 3665 | (unspec:VE [(match_operand:VE 1 "s_register_operand" "w")] |
d98a3884 | 3666 | UNSPEC_VREV16))] |
3667 | "TARGET_NEON" | |
bcaec148 | 3668 | "vrev16.<V_sz_elem>\t%<V_reg>0, %<V_reg>1" |
32093010 | 3669 | [(set_attr "type" "neon_rev<q>")] |
bcaec148 | 3670 | ) |
d98a3884 | 3671 | |
3672 | ; vbsl_* intrinsics may compile to any of vbsl/vbif/vbit depending on register | |
3673 | ; allocation. For an intrinsic of form: | |
3674 | ; rD = vbsl_* (rS, rN, rM) | |
3675 | ; We can use any of: | |
3676 | ; vbsl rS, rN, rM (if D = S) | |
3677 | ; vbit rD, rN, rS (if D = M, so 1-bits in rS choose bits from rN, else rM) | |
3678 | ; vbif rD, rM, rS (if D = N, so 0-bits in rS choose bits from rM, else rN) | |
3679 | ||
3680 | (define_insn "neon_vbsl<mode>_internal" | |
3681 | [(set (match_operand:VDQX 0 "s_register_operand" "=w,w,w") | |
3682 | (unspec:VDQX [(match_operand:VDQX 1 "s_register_operand" " 0,w,w") | |
3683 | (match_operand:VDQX 2 "s_register_operand" " w,w,0") | |
3684 | (match_operand:VDQX 3 "s_register_operand" " w,0,w")] | |
3685 | UNSPEC_VBSL))] | |
3686 | "TARGET_NEON" | |
3687 | "@ | |
3688 | vbsl\t%<V_reg>0, %<V_reg>2, %<V_reg>3 | |
3689 | vbit\t%<V_reg>0, %<V_reg>2, %<V_reg>1 | |
bcaec148 | 3690 | vbif\t%<V_reg>0, %<V_reg>3, %<V_reg>1" |
32093010 | 3691 | [(set_attr "type" "neon_bsl<q>")] |
bcaec148 | 3692 | ) |
d98a3884 | 3693 | |
3694 | (define_expand "neon_vbsl<mode>" | |
3695 | [(set (match_operand:VDQX 0 "s_register_operand" "") | |
3696 | (unspec:VDQX [(match_operand:<V_cmp_result> 1 "s_register_operand" "") | |
3697 | (match_operand:VDQX 2 "s_register_operand" "") | |
3698 | (match_operand:VDQX 3 "s_register_operand" "")] | |
3699 | UNSPEC_VBSL))] | |
3700 | "TARGET_NEON" | |
3701 | { | |
3702 | /* We can't alias operands together if they have different modes. */ | |
3703 | operands[1] = gen_lowpart (<MODE>mode, operands[1]); | |
3704 | }) | |
3705 | ||
25a124b3 | 3706 | ;; vshl, vrshl |
3707 | (define_insn "neon_v<shift_op><sup><mode>" | |
d98a3884 | 3708 | [(set (match_operand:VDQIX 0 "s_register_operand" "=w") |
3709 | (unspec:VDQIX [(match_operand:VDQIX 1 "s_register_operand" "w") | |
25a124b3 | 3710 | (match_operand:VDQIX 2 "s_register_operand" "w")] |
3711 | VSHL))] | |
d98a3884 | 3712 | "TARGET_NEON" |
25a124b3 | 3713 | "v<shift_op>.<sup>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2" |
32093010 | 3714 | [(set_attr "type" "neon_shift_imm<q>")] |
bcaec148 | 3715 | ) |
d98a3884 | 3716 | |
25a124b3 | 3717 | ;; vqshl, vqrshl |
3718 | (define_insn "neon_v<shift_op><sup><mode>" | |
d98a3884 | 3719 | [(set (match_operand:VDQIX 0 "s_register_operand" "=w") |
3720 | (unspec:VDQIX [(match_operand:VDQIX 1 "s_register_operand" "w") | |
25a124b3 | 3721 | (match_operand:VDQIX 2 "s_register_operand" "w")] |
3722 | VQSHL))] | |
d98a3884 | 3723 | "TARGET_NEON" |
25a124b3 | 3724 | "v<shift_op>.<sup>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2" |
32093010 | 3725 | [(set_attr "type" "neon_sat_shift_imm<q>")] |
bcaec148 | 3726 | ) |
d98a3884 | 3727 | |
25a124b3 | 3728 | ;; vshr_n, vrshr_n |
3729 | (define_insn "neon_v<shift_op><sup>_n<mode>" | |
d98a3884 | 3730 | [(set (match_operand:VDQIX 0 "s_register_operand" "=w") |
3731 | (unspec:VDQIX [(match_operand:VDQIX 1 "s_register_operand" "w") | |
25a124b3 | 3732 | (match_operand:SI 2 "immediate_operand" "i")] |
3733 | VSHR_N))] | |
d98a3884 | 3734 | "TARGET_NEON" |
8521669a | 3735 | { |
3736 | neon_const_bounds (operands[2], 1, neon_element_bits (<MODE>mode) + 1); | |
25a124b3 | 3737 | return "v<shift_op>.<sup>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %2"; |
8521669a | 3738 | } |
32093010 | 3739 | [(set_attr "type" "neon_shift_imm<q>")] |
bcaec148 | 3740 | ) |
d98a3884 | 3741 | |
25a124b3 | 3742 | ;; vshrn_n, vrshrn_n |
3743 | (define_insn "neon_v<shift_op>_n<mode>" | |
d98a3884 | 3744 | [(set (match_operand:<V_narrow> 0 "s_register_operand" "=w") |
3745 | (unspec:<V_narrow> [(match_operand:VN 1 "s_register_operand" "w") | |
25a124b3 | 3746 | (match_operand:SI 2 "immediate_operand" "i")] |
3747 | VSHRN_N))] | |
d98a3884 | 3748 | "TARGET_NEON" |
8521669a | 3749 | { |
3750 | neon_const_bounds (operands[2], 1, neon_element_bits (<MODE>mode) / 2 + 1); | |
25a124b3 | 3751 | return "v<shift_op>.<V_if_elem>\t%P0, %q1, %2"; |
8521669a | 3752 | } |
32093010 | 3753 | [(set_attr "type" "neon_shift_imm_narrow_q")] |
bcaec148 | 3754 | ) |
d98a3884 | 3755 | |
25a124b3 | 3756 | ;; vqshrn_n, vqrshrn_n |
3757 | (define_insn "neon_v<shift_op><sup>_n<mode>" | |
d98a3884 | 3758 | [(set (match_operand:<V_narrow> 0 "s_register_operand" "=w") |
3759 | (unspec:<V_narrow> [(match_operand:VN 1 "s_register_operand" "w") | |
25a124b3 | 3760 | (match_operand:SI 2 "immediate_operand" "i")] |
3761 | VQSHRN_N))] | |
d98a3884 | 3762 | "TARGET_NEON" |
8521669a | 3763 | { |
3764 | neon_const_bounds (operands[2], 1, neon_element_bits (<MODE>mode) / 2 + 1); | |
25a124b3 | 3765 | return "v<shift_op>.<sup>%#<V_sz_elem>\t%P0, %q1, %2"; |
8521669a | 3766 | } |
32093010 | 3767 | [(set_attr "type" "neon_sat_shift_imm_narrow_q")] |
bcaec148 | 3768 | ) |
d98a3884 | 3769 | |
25a124b3 | 3770 | ;; vqshrun_n, vqrshrun_n |
3771 | (define_insn "neon_v<shift_op>_n<mode>" | |
d98a3884 | 3772 | [(set (match_operand:<V_narrow> 0 "s_register_operand" "=w") |
3773 | (unspec:<V_narrow> [(match_operand:VN 1 "s_register_operand" "w") | |
25a124b3 | 3774 | (match_operand:SI 2 "immediate_operand" "i")] |
3775 | VQSHRUN_N))] | |
d98a3884 | 3776 | "TARGET_NEON" |
8521669a | 3777 | { |
3778 | neon_const_bounds (operands[2], 1, neon_element_bits (<MODE>mode) / 2 + 1); | |
25a124b3 | 3779 | return "v<shift_op>.<V_s_elem>\t%P0, %q1, %2"; |
8521669a | 3780 | } |
32093010 | 3781 | [(set_attr "type" "neon_sat_shift_imm_narrow_q")] |
bcaec148 | 3782 | ) |
d98a3884 | 3783 | |
3784 | (define_insn "neon_vshl_n<mode>" | |
3785 | [(set (match_operand:VDQIX 0 "s_register_operand" "=w") | |
3786 | (unspec:VDQIX [(match_operand:VDQIX 1 "s_register_operand" "w") | |
25a124b3 | 3787 | (match_operand:SI 2 "immediate_operand" "i")] |
d98a3884 | 3788 | UNSPEC_VSHL_N))] |
3789 | "TARGET_NEON" | |
8521669a | 3790 | { |
3791 | neon_const_bounds (operands[2], 0, neon_element_bits (<MODE>mode)); | |
3792 | return "vshl.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %2"; | |
3793 | } | |
32093010 | 3794 | [(set_attr "type" "neon_shift_imm<q>")] |
bcaec148 | 3795 | ) |
d98a3884 | 3796 | |
25a124b3 | 3797 | (define_insn "neon_vqshl_<sup>_n<mode>" |
d98a3884 | 3798 | [(set (match_operand:VDQIX 0 "s_register_operand" "=w") |
3799 | (unspec:VDQIX [(match_operand:VDQIX 1 "s_register_operand" "w") | |
25a124b3 | 3800 | (match_operand:SI 2 "immediate_operand" "i")] |
3801 | VQSHL_N))] | |
d98a3884 | 3802 | "TARGET_NEON" |
8521669a | 3803 | { |
3804 | neon_const_bounds (operands[2], 0, neon_element_bits (<MODE>mode)); | |
25a124b3 | 3805 | return "vqshl.<sup>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %2"; |
8521669a | 3806 | } |
32093010 | 3807 | [(set_attr "type" "neon_sat_shift_imm<q>")] |
bcaec148 | 3808 | ) |
d98a3884 | 3809 | |
3810 | (define_insn "neon_vqshlu_n<mode>" | |
3811 | [(set (match_operand:VDQIX 0 "s_register_operand" "=w") | |
3812 | (unspec:VDQIX [(match_operand:VDQIX 1 "s_register_operand" "w") | |
25a124b3 | 3813 | (match_operand:SI 2 "immediate_operand" "i")] |
d98a3884 | 3814 | UNSPEC_VQSHLU_N))] |
3815 | "TARGET_NEON" | |
8521669a | 3816 | { |
3817 | neon_const_bounds (operands[2], 0, neon_element_bits (<MODE>mode)); | |
25a124b3 | 3818 | return "vqshlu.<V_s_elem>\t%<V_reg>0, %<V_reg>1, %2"; |
8521669a | 3819 | } |
32093010 | 3820 | [(set_attr "type" "neon_sat_shift_imm<q>")] |
bcaec148 | 3821 | ) |
d98a3884 | 3822 | |
25a124b3 | 3823 | (define_insn "neon_vshll<sup>_n<mode>" |
d98a3884 | 3824 | [(set (match_operand:<V_widen> 0 "s_register_operand" "=w") |
3825 | (unspec:<V_widen> [(match_operand:VW 1 "s_register_operand" "w") | |
25a124b3 | 3826 | (match_operand:SI 2 "immediate_operand" "i")] |
3827 | VSHLL_N))] | |
d98a3884 | 3828 | "TARGET_NEON" |
8521669a | 3829 | { |
61725b11 | 3830 | /* The boundaries are: 0 < imm <= size. */ |
3831 | neon_const_bounds (operands[2], 0, neon_element_bits (<MODE>mode) + 1); | |
25a124b3 | 3832 | return "vshll.<sup>%#<V_sz_elem>\t%q0, %P1, %2"; |
8521669a | 3833 | } |
32093010 | 3834 | [(set_attr "type" "neon_shift_imm_long")] |
bcaec148 | 3835 | ) |
d98a3884 | 3836 | |
25a124b3 | 3837 | ;; vsra_n, vrsra_n |
3838 | (define_insn "neon_v<shift_op><sup>_n<mode>" | |
d98a3884 | 3839 | [(set (match_operand:VDQIX 0 "s_register_operand" "=w") |
3840 | (unspec:VDQIX [(match_operand:VDQIX 1 "s_register_operand" "0") | |
3841 | (match_operand:VDQIX 2 "s_register_operand" "w") | |
25a124b3 | 3842 | (match_operand:SI 3 "immediate_operand" "i")] |
3843 | VSRA_N))] | |
d98a3884 | 3844 | "TARGET_NEON" |
8521669a | 3845 | { |
3846 | neon_const_bounds (operands[3], 1, neon_element_bits (<MODE>mode) + 1); | |
25a124b3 | 3847 | return "v<shift_op>.<sup>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>2, %3"; |
8521669a | 3848 | } |
32093010 | 3849 | [(set_attr "type" "neon_shift_acc<q>")] |
bcaec148 | 3850 | ) |
d98a3884 | 3851 | |
3852 | (define_insn "neon_vsri_n<mode>" | |
3853 | [(set (match_operand:VDQIX 0 "s_register_operand" "=w") | |
3854 | (unspec:VDQIX [(match_operand:VDQIX 1 "s_register_operand" "0") | |
3855 | (match_operand:VDQIX 2 "s_register_operand" "w") | |
3856 | (match_operand:SI 3 "immediate_operand" "i")] | |
3857 | UNSPEC_VSRI))] | |
3858 | "TARGET_NEON" | |
8521669a | 3859 | { |
3860 | neon_const_bounds (operands[3], 1, neon_element_bits (<MODE>mode) + 1); | |
3861 | return "vsri.<V_sz_elem>\t%<V_reg>0, %<V_reg>2, %3"; | |
3862 | } | |
32093010 | 3863 | [(set_attr "type" "neon_shift_reg<q>")] |
bcaec148 | 3864 | ) |
d98a3884 | 3865 | |
3866 | (define_insn "neon_vsli_n<mode>" | |
3867 | [(set (match_operand:VDQIX 0 "s_register_operand" "=w") | |
3868 | (unspec:VDQIX [(match_operand:VDQIX 1 "s_register_operand" "0") | |
3869 | (match_operand:VDQIX 2 "s_register_operand" "w") | |
3870 | (match_operand:SI 3 "immediate_operand" "i")] | |
3871 | UNSPEC_VSLI))] | |
3872 | "TARGET_NEON" | |
8521669a | 3873 | { |
3874 | neon_const_bounds (operands[3], 0, neon_element_bits (<MODE>mode)); | |
3875 | return "vsli.<V_sz_elem>\t%<V_reg>0, %<V_reg>2, %3"; | |
3876 | } | |
32093010 | 3877 | [(set_attr "type" "neon_shift_reg<q>")] |
bcaec148 | 3878 | ) |
d98a3884 | 3879 | |
3880 | (define_insn "neon_vtbl1v8qi" | |
3881 | [(set (match_operand:V8QI 0 "s_register_operand" "=w") | |
3882 | (unspec:V8QI [(match_operand:V8QI 1 "s_register_operand" "w") | |
3883 | (match_operand:V8QI 2 "s_register_operand" "w")] | |
3884 | UNSPEC_VTBL))] | |
3885 | "TARGET_NEON" | |
bcaec148 | 3886 | "vtbl.8\t%P0, {%P1}, %P2" |
32093010 | 3887 | [(set_attr "type" "neon_tbl1")] |
bcaec148 | 3888 | ) |
d98a3884 | 3889 | |
3890 | (define_insn "neon_vtbl2v8qi" | |
3891 | [(set (match_operand:V8QI 0 "s_register_operand" "=w") | |
3892 | (unspec:V8QI [(match_operand:TI 1 "s_register_operand" "w") | |
3893 | (match_operand:V8QI 2 "s_register_operand" "w")] | |
3894 | UNSPEC_VTBL))] | |
3895 | "TARGET_NEON" | |
3896 | { | |
3897 | rtx ops[4]; | |
3898 | int tabbase = REGNO (operands[1]); | |
3899 | ||
3900 | ops[0] = operands[0]; | |
3901 | ops[1] = gen_rtx_REG (V8QImode, tabbase); | |
3902 | ops[2] = gen_rtx_REG (V8QImode, tabbase + 2); | |
3903 | ops[3] = operands[2]; | |
3904 | output_asm_insn ("vtbl.8\t%P0, {%P1, %P2}, %P3", ops); | |
3905 | ||
3906 | return ""; | |
bcaec148 | 3907 | } |
32093010 | 3908 | [(set_attr "type" "neon_tbl2")] |
bcaec148 | 3909 | ) |
d98a3884 | 3910 | |
3911 | (define_insn "neon_vtbl3v8qi" | |
3912 | [(set (match_operand:V8QI 0 "s_register_operand" "=w") | |
3913 | (unspec:V8QI [(match_operand:EI 1 "s_register_operand" "w") | |
3914 | (match_operand:V8QI 2 "s_register_operand" "w")] | |
3915 | UNSPEC_VTBL))] | |
3916 | "TARGET_NEON" | |
3917 | { | |
3918 | rtx ops[5]; | |
3919 | int tabbase = REGNO (operands[1]); | |
3920 | ||
3921 | ops[0] = operands[0]; | |
3922 | ops[1] = gen_rtx_REG (V8QImode, tabbase); | |
3923 | ops[2] = gen_rtx_REG (V8QImode, tabbase + 2); | |
3924 | ops[3] = gen_rtx_REG (V8QImode, tabbase + 4); | |
3925 | ops[4] = operands[2]; | |
3926 | output_asm_insn ("vtbl.8\t%P0, {%P1, %P2, %P3}, %P4", ops); | |
3927 | ||
3928 | return ""; | |
bcaec148 | 3929 | } |
32093010 | 3930 | [(set_attr "type" "neon_tbl3")] |
bcaec148 | 3931 | ) |
d98a3884 | 3932 | |
3933 | (define_insn "neon_vtbl4v8qi" | |
3934 | [(set (match_operand:V8QI 0 "s_register_operand" "=w") | |
3935 | (unspec:V8QI [(match_operand:OI 1 "s_register_operand" "w") | |
3936 | (match_operand:V8QI 2 "s_register_operand" "w")] | |
3937 | UNSPEC_VTBL))] | |
3938 | "TARGET_NEON" | |
3939 | { | |
3940 | rtx ops[6]; | |
3941 | int tabbase = REGNO (operands[1]); | |
3942 | ||
3943 | ops[0] = operands[0]; | |
3944 | ops[1] = gen_rtx_REG (V8QImode, tabbase); | |
3945 | ops[2] = gen_rtx_REG (V8QImode, tabbase + 2); | |
3946 | ops[3] = gen_rtx_REG (V8QImode, tabbase + 4); | |
3947 | ops[4] = gen_rtx_REG (V8QImode, tabbase + 6); | |
3948 | ops[5] = operands[2]; | |
3949 | output_asm_insn ("vtbl.8\t%P0, {%P1, %P2, %P3, %P4}, %P5", ops); | |
3950 | ||
3951 | return ""; | |
bcaec148 | 3952 | } |
32093010 | 3953 | [(set_attr "type" "neon_tbl4")] |
bcaec148 | 3954 | ) |
d98a3884 | 3955 | |
47ddcd6b | 3956 | ;; These three are used by the vec_perm infrastructure for V16QImode. |
3957 | (define_insn_and_split "neon_vtbl1v16qi" | |
3958 | [(set (match_operand:V16QI 0 "s_register_operand" "=&w") | |
3959 | (unspec:V16QI [(match_operand:V16QI 1 "s_register_operand" "w") | |
3960 | (match_operand:V16QI 2 "s_register_operand" "w")] | |
3961 | UNSPEC_VTBL))] | |
3962 | "TARGET_NEON" | |
3963 | "#" | |
3964 | "&& reload_completed" | |
3965 | [(const_int 0)] | |
3966 | { | |
3967 | rtx op0, op1, op2, part0, part2; | |
3968 | unsigned ofs; | |
3969 | ||
3970 | op0 = operands[0]; | |
3971 | op1 = gen_lowpart (TImode, operands[1]); | |
3972 | op2 = operands[2]; | |
3973 | ||
3974 | ofs = subreg_lowpart_offset (V8QImode, V16QImode); | |
3975 | part0 = simplify_subreg (V8QImode, op0, V16QImode, ofs); | |
3976 | part2 = simplify_subreg (V8QImode, op2, V16QImode, ofs); | |
3977 | emit_insn (gen_neon_vtbl2v8qi (part0, op1, part2)); | |
3978 | ||
3979 | ofs = subreg_highpart_offset (V8QImode, V16QImode); | |
3980 | part0 = simplify_subreg (V8QImode, op0, V16QImode, ofs); | |
3981 | part2 = simplify_subreg (V8QImode, op2, V16QImode, ofs); | |
3982 | emit_insn (gen_neon_vtbl2v8qi (part0, op1, part2)); | |
3983 | DONE; | |
32093010 | 3984 | } |
3985 | [(set_attr "type" "multiple")] | |
3986 | ) | |
47ddcd6b | 3987 | |
3988 | (define_insn_and_split "neon_vtbl2v16qi" | |
3989 | [(set (match_operand:V16QI 0 "s_register_operand" "=&w") | |
3990 | (unspec:V16QI [(match_operand:OI 1 "s_register_operand" "w") | |
3991 | (match_operand:V16QI 2 "s_register_operand" "w")] | |
3992 | UNSPEC_VTBL))] | |
3993 | "TARGET_NEON" | |
3994 | "#" | |
3995 | "&& reload_completed" | |
3996 | [(const_int 0)] | |
3997 | { | |
3998 | rtx op0, op1, op2, part0, part2; | |
3999 | unsigned ofs; | |
4000 | ||
4001 | op0 = operands[0]; | |
4002 | op1 = operands[1]; | |
4003 | op2 = operands[2]; | |
4004 | ||
4005 | ofs = subreg_lowpart_offset (V8QImode, V16QImode); | |
4006 | part0 = simplify_subreg (V8QImode, op0, V16QImode, ofs); | |
4007 | part2 = simplify_subreg (V8QImode, op2, V16QImode, ofs); | |
4008 | emit_insn (gen_neon_vtbl2v8qi (part0, op1, part2)); | |
4009 | ||
4010 | ofs = subreg_highpart_offset (V8QImode, V16QImode); | |
4011 | part0 = simplify_subreg (V8QImode, op0, V16QImode, ofs); | |
4012 | part2 = simplify_subreg (V8QImode, op2, V16QImode, ofs); | |
4013 | emit_insn (gen_neon_vtbl2v8qi (part0, op1, part2)); | |
4014 | DONE; | |
32093010 | 4015 | } |
4016 | [(set_attr "type" "multiple")] | |
4017 | ) | |
47ddcd6b | 4018 | |
4019 | ;; ??? Logically we should extend the regular neon_vcombine pattern to | |
4020 | ;; handle quad-word input modes, producing octa-word output modes. But | |
4021 | ;; that requires us to add support for octa-word vector modes in moves. | |
4022 | ;; That seems overkill for this one use in vec_perm. | |
4023 | (define_insn_and_split "neon_vcombinev16qi" | |
4024 | [(set (match_operand:OI 0 "s_register_operand" "=w") | |
4025 | (unspec:OI [(match_operand:V16QI 1 "s_register_operand" "w") | |
4026 | (match_operand:V16QI 2 "s_register_operand" "w")] | |
4027 | UNSPEC_VCONCAT))] | |
4028 | "TARGET_NEON" | |
4029 | "#" | |
4030 | "&& reload_completed" | |
4031 | [(const_int 0)] | |
4032 | { | |
4033 | neon_split_vcombine (operands); | |
4034 | DONE; | |
32093010 | 4035 | } |
4036 | [(set_attr "type" "multiple")] | |
4037 | ) | |
47ddcd6b | 4038 | |
d98a3884 | 4039 | (define_insn "neon_vtbx1v8qi" |
4040 | [(set (match_operand:V8QI 0 "s_register_operand" "=w") | |
4041 | (unspec:V8QI [(match_operand:V8QI 1 "s_register_operand" "0") | |
4042 | (match_operand:V8QI 2 "s_register_operand" "w") | |
4043 | (match_operand:V8QI 3 "s_register_operand" "w")] | |
4044 | UNSPEC_VTBX))] | |
4045 | "TARGET_NEON" | |
bcaec148 | 4046 | "vtbx.8\t%P0, {%P2}, %P3" |
32093010 | 4047 | [(set_attr "type" "neon_tbl1")] |
bcaec148 | 4048 | ) |
d98a3884 | 4049 | |
4050 | (define_insn "neon_vtbx2v8qi" | |
4051 | [(set (match_operand:V8QI 0 "s_register_operand" "=w") | |
4052 | (unspec:V8QI [(match_operand:V8QI 1 "s_register_operand" "0") | |
4053 | (match_operand:TI 2 "s_register_operand" "w") | |
4054 | (match_operand:V8QI 3 "s_register_operand" "w")] | |
4055 | UNSPEC_VTBX))] | |
4056 | "TARGET_NEON" | |
4057 | { | |
4058 | rtx ops[4]; | |
4059 | int tabbase = REGNO (operands[2]); | |
4060 | ||
4061 | ops[0] = operands[0]; | |
4062 | ops[1] = gen_rtx_REG (V8QImode, tabbase); | |
4063 | ops[2] = gen_rtx_REG (V8QImode, tabbase + 2); | |
4064 | ops[3] = operands[3]; | |
4065 | output_asm_insn ("vtbx.8\t%P0, {%P1, %P2}, %P3", ops); | |
4066 | ||
4067 | return ""; | |
bcaec148 | 4068 | } |
32093010 | 4069 | [(set_attr "type" "neon_tbl2")] |
bcaec148 | 4070 | ) |
d98a3884 | 4071 | |
4072 | (define_insn "neon_vtbx3v8qi" | |
4073 | [(set (match_operand:V8QI 0 "s_register_operand" "=w") | |
4074 | (unspec:V8QI [(match_operand:V8QI 1 "s_register_operand" "0") | |
4075 | (match_operand:EI 2 "s_register_operand" "w") | |
4076 | (match_operand:V8QI 3 "s_register_operand" "w")] | |
4077 | UNSPEC_VTBX))] | |
4078 | "TARGET_NEON" | |
4079 | { | |
4080 | rtx ops[5]; | |
4081 | int tabbase = REGNO (operands[2]); | |
4082 | ||
4083 | ops[0] = operands[0]; | |
4084 | ops[1] = gen_rtx_REG (V8QImode, tabbase); | |
4085 | ops[2] = gen_rtx_REG (V8QImode, tabbase + 2); | |
4086 | ops[3] = gen_rtx_REG (V8QImode, tabbase + 4); | |
4087 | ops[4] = operands[3]; | |
4088 | output_asm_insn ("vtbx.8\t%P0, {%P1, %P2, %P3}, %P4", ops); | |
4089 | ||
4090 | return ""; | |
bcaec148 | 4091 | } |
32093010 | 4092 | [(set_attr "type" "neon_tbl3")] |
bcaec148 | 4093 | ) |
d98a3884 | 4094 | |
4095 | (define_insn "neon_vtbx4v8qi" | |
4096 | [(set (match_operand:V8QI 0 "s_register_operand" "=w") | |
4097 | (unspec:V8QI [(match_operand:V8QI 1 "s_register_operand" "0") | |
4098 | (match_operand:OI 2 "s_register_operand" "w") | |
4099 | (match_operand:V8QI 3 "s_register_operand" "w")] | |
4100 | UNSPEC_VTBX))] | |
4101 | "TARGET_NEON" | |
4102 | { | |
4103 | rtx ops[6]; | |
4104 | int tabbase = REGNO (operands[2]); | |
4105 | ||
4106 | ops[0] = operands[0]; | |
4107 | ops[1] = gen_rtx_REG (V8QImode, tabbase); | |
4108 | ops[2] = gen_rtx_REG (V8QImode, tabbase + 2); | |
4109 | ops[3] = gen_rtx_REG (V8QImode, tabbase + 4); | |
4110 | ops[4] = gen_rtx_REG (V8QImode, tabbase + 6); | |
4111 | ops[5] = operands[3]; | |
4112 | output_asm_insn ("vtbx.8\t%P0, {%P1, %P2, %P3, %P4}, %P5", ops); | |
4113 | ||
4114 | return ""; | |
bcaec148 | 4115 | } |
32093010 | 4116 | [(set_attr "type" "neon_tbl4")] |
bcaec148 | 4117 | ) |
d98a3884 | 4118 | |
189fec2b | 4119 | (define_expand "neon_vtrn<mode>_internal" |
4120 | [(parallel | |
4121 | [(set (match_operand:VDQW 0 "s_register_operand" "") | |
4122 | (unspec:VDQW [(match_operand:VDQW 1 "s_register_operand" "") | |
4123 | (match_operand:VDQW 2 "s_register_operand" "")] | |
4124 | UNSPEC_VTRN1)) | |
4125 | (set (match_operand:VDQW 3 "s_register_operand" "") | |
4126 | (unspec:VDQW [(match_dup 1) (match_dup 2)] UNSPEC_VTRN2))])] | |
4127 | "TARGET_NEON" | |
4128 | "" | |
4129 | ) | |
4130 | ||
4131 | ;; Note: Different operand numbering to handle tied registers correctly. | |
4132 | (define_insn "*neon_vtrn<mode>_insn" | |
f66a1e76 | 4133 | [(set (match_operand:VDQW 0 "s_register_operand" "=&w") |
fd4b5409 | 4134 | (unspec:VDQW [(match_operand:VDQW 1 "s_register_operand" "0") |
189fec2b | 4135 | (match_operand:VDQW 3 "s_register_operand" "2")] |
fd4b5409 | 4136 | UNSPEC_VTRN1)) |
f66a1e76 | 4137 | (set (match_operand:VDQW 2 "s_register_operand" "=&w") |
189fec2b | 4138 | (unspec:VDQW [(match_dup 1) (match_dup 3)] |
fd4b5409 | 4139 | UNSPEC_VTRN2))] |
4140 | "TARGET_NEON" | |
189fec2b | 4141 | "vtrn.<V_sz_elem>\t%<V_reg>0, %<V_reg>2" |
32093010 | 4142 | [(set_attr "type" "neon_permute<q>")] |
bcaec148 | 4143 | ) |
d98a3884 | 4144 | |
189fec2b | 4145 | (define_expand "neon_vzip<mode>_internal" |
4146 | [(parallel | |
4147 | [(set (match_operand:VDQW 0 "s_register_operand" "") | |
4148 | (unspec:VDQW [(match_operand:VDQW 1 "s_register_operand" "") | |
4149 | (match_operand:VDQW 2 "s_register_operand" "")] | |
4150 | UNSPEC_VZIP1)) | |
4151 | (set (match_operand:VDQW 3 "s_register_operand" "") | |
4152 | (unspec:VDQW [(match_dup 1) (match_dup 2)] UNSPEC_VZIP2))])] | |
4153 | "TARGET_NEON" | |
4154 | "" | |
4155 | ) | |
4156 | ||
4157 | ;; Note: Different operand numbering to handle tied registers correctly. | |
4158 | (define_insn "*neon_vzip<mode>_insn" | |
f66a1e76 | 4159 | [(set (match_operand:VDQW 0 "s_register_operand" "=&w") |
fd4b5409 | 4160 | (unspec:VDQW [(match_operand:VDQW 1 "s_register_operand" "0") |
189fec2b | 4161 | (match_operand:VDQW 3 "s_register_operand" "2")] |
fd4b5409 | 4162 | UNSPEC_VZIP1)) |
f66a1e76 | 4163 | (set (match_operand:VDQW 2 "s_register_operand" "=&w") |
189fec2b | 4164 | (unspec:VDQW [(match_dup 1) (match_dup 3)] |
fd4b5409 | 4165 | UNSPEC_VZIP2))] |
4166 | "TARGET_NEON" | |
189fec2b | 4167 | "vzip.<V_sz_elem>\t%<V_reg>0, %<V_reg>2" |
32093010 | 4168 | [(set_attr "type" "neon_zip<q>")] |
bcaec148 | 4169 | ) |
d98a3884 | 4170 | |
189fec2b | 4171 | (define_expand "neon_vuzp<mode>_internal" |
4172 | [(parallel | |
4173 | [(set (match_operand:VDQW 0 "s_register_operand" "") | |
4174 | (unspec:VDQW [(match_operand:VDQW 1 "s_register_operand" "") | |
4175 | (match_operand:VDQW 2 "s_register_operand" "")] | |
4176 | UNSPEC_VUZP1)) | |
4177 | (set (match_operand:VDQW 3 "s_register_operand" "") | |
4178 | (unspec:VDQW [(match_dup 1) (match_dup 2)] UNSPEC_VUZP2))])] | |
4179 | "TARGET_NEON" | |
4180 | "" | |
4181 | ) | |
4182 | ||
4183 | ;; Note: Different operand numbering to handle tied registers correctly. | |
4184 | (define_insn "*neon_vuzp<mode>_insn" | |
f66a1e76 | 4185 | [(set (match_operand:VDQW 0 "s_register_operand" "=&w") |
fd4b5409 | 4186 | (unspec:VDQW [(match_operand:VDQW 1 "s_register_operand" "0") |
189fec2b | 4187 | (match_operand:VDQW 3 "s_register_operand" "2")] |
d98a3884 | 4188 | UNSPEC_VUZP1)) |
f66a1e76 | 4189 | (set (match_operand:VDQW 2 "s_register_operand" "=&w") |
189fec2b | 4190 | (unspec:VDQW [(match_dup 1) (match_dup 3)] |
fd4b5409 | 4191 | UNSPEC_VUZP2))] |
d98a3884 | 4192 | "TARGET_NEON" |
189fec2b | 4193 | "vuzp.<V_sz_elem>\t%<V_reg>0, %<V_reg>2" |
32093010 | 4194 | [(set_attr "type" "neon_zip<q>")] |
bcaec148 | 4195 | ) |
d98a3884 | 4196 | |
d98a3884 | 4197 | (define_expand "neon_vreinterpretv8qi<mode>" |
4198 | [(match_operand:V8QI 0 "s_register_operand" "") | |
fb5f110d | 4199 | (match_operand:VD_RE 1 "s_register_operand" "")] |
d98a3884 | 4200 | "TARGET_NEON" |
4201 | { | |
4202 | neon_reinterpret (operands[0], operands[1]); | |
4203 | DONE; | |
4204 | }) | |
4205 | ||
4206 | (define_expand "neon_vreinterpretv4hi<mode>" | |
4207 | [(match_operand:V4HI 0 "s_register_operand" "") | |
fb5f110d | 4208 | (match_operand:VD_RE 1 "s_register_operand" "")] |
d98a3884 | 4209 | "TARGET_NEON" |
4210 | { | |
4211 | neon_reinterpret (operands[0], operands[1]); | |
4212 | DONE; | |
4213 | }) | |
4214 | ||
4215 | (define_expand "neon_vreinterpretv2si<mode>" | |
4216 | [(match_operand:V2SI 0 "s_register_operand" "") | |
fb5f110d | 4217 | (match_operand:VD_RE 1 "s_register_operand" "")] |
d98a3884 | 4218 | "TARGET_NEON" |
4219 | { | |
4220 | neon_reinterpret (operands[0], operands[1]); | |
4221 | DONE; | |
4222 | }) | |
4223 | ||
4224 | (define_expand "neon_vreinterpretv2sf<mode>" | |
4225 | [(match_operand:V2SF 0 "s_register_operand" "") | |
fb5f110d | 4226 | (match_operand:VD_RE 1 "s_register_operand" "")] |
d98a3884 | 4227 | "TARGET_NEON" |
4228 | { | |
4229 | neon_reinterpret (operands[0], operands[1]); | |
4230 | DONE; | |
4231 | }) | |
4232 | ||
4233 | (define_expand "neon_vreinterpretdi<mode>" | |
4234 | [(match_operand:DI 0 "s_register_operand" "") | |
fb5f110d | 4235 | (match_operand:VD_RE 1 "s_register_operand" "")] |
d98a3884 | 4236 | "TARGET_NEON" |
4237 | { | |
4238 | neon_reinterpret (operands[0], operands[1]); | |
4239 | DONE; | |
4240 | }) | |
4241 | ||
e84fdf6e | 4242 | (define_expand "neon_vreinterpretti<mode>" |
4243 | [(match_operand:TI 0 "s_register_operand" "") | |
4244 | (match_operand:VQXMOV 1 "s_register_operand" "")] | |
4245 | "TARGET_NEON" | |
4246 | { | |
4247 | neon_reinterpret (operands[0], operands[1]); | |
4248 | DONE; | |
4249 | }) | |
4250 | ||
4251 | ||
d98a3884 | 4252 | (define_expand "neon_vreinterpretv16qi<mode>" |
4253 | [(match_operand:V16QI 0 "s_register_operand" "") | |
e84fdf6e | 4254 | (match_operand:VQXMOV 1 "s_register_operand" "")] |
d98a3884 | 4255 | "TARGET_NEON" |
4256 | { | |
4257 | neon_reinterpret (operands[0], operands[1]); | |
4258 | DONE; | |
4259 | }) | |
4260 | ||
4261 | (define_expand "neon_vreinterpretv8hi<mode>" | |
4262 | [(match_operand:V8HI 0 "s_register_operand" "") | |
e84fdf6e | 4263 | (match_operand:VQXMOV 1 "s_register_operand" "")] |
d98a3884 | 4264 | "TARGET_NEON" |
4265 | { | |
4266 | neon_reinterpret (operands[0], operands[1]); | |
4267 | DONE; | |
4268 | }) | |
4269 | ||
4270 | (define_expand "neon_vreinterpretv4si<mode>" | |
4271 | [(match_operand:V4SI 0 "s_register_operand" "") | |
e84fdf6e | 4272 | (match_operand:VQXMOV 1 "s_register_operand" "")] |
d98a3884 | 4273 | "TARGET_NEON" |
4274 | { | |
4275 | neon_reinterpret (operands[0], operands[1]); | |
4276 | DONE; | |
4277 | }) | |
4278 | ||
4279 | (define_expand "neon_vreinterpretv4sf<mode>" | |
4280 | [(match_operand:V4SF 0 "s_register_operand" "") | |
e84fdf6e | 4281 | (match_operand:VQXMOV 1 "s_register_operand" "")] |
d98a3884 | 4282 | "TARGET_NEON" |
4283 | { | |
4284 | neon_reinterpret (operands[0], operands[1]); | |
4285 | DONE; | |
4286 | }) | |
4287 | ||
4288 | (define_expand "neon_vreinterpretv2di<mode>" | |
4289 | [(match_operand:V2DI 0 "s_register_operand" "") | |
e84fdf6e | 4290 | (match_operand:VQXMOV 1 "s_register_operand" "")] |
d98a3884 | 4291 | "TARGET_NEON" |
4292 | { | |
4293 | neon_reinterpret (operands[0], operands[1]); | |
4294 | DONE; | |
4295 | }) | |
4296 | ||
672b3f5b | 4297 | (define_expand "vec_load_lanes<mode><mode>" |
4298 | [(set (match_operand:VDQX 0 "s_register_operand") | |
4299 | (unspec:VDQX [(match_operand:VDQX 1 "neon_struct_operand")] | |
4300 | UNSPEC_VLD1))] | |
4301 | "TARGET_NEON") | |
4302 | ||
d98a3884 | 4303 | (define_insn "neon_vld1<mode>" |
4304 | [(set (match_operand:VDQX 0 "s_register_operand" "=w") | |
e5bf7a7a | 4305 | (unspec:VDQX [(match_operand:VDQX 1 "neon_struct_operand" "Um")] |
d98a3884 | 4306 | UNSPEC_VLD1))] |
4307 | "TARGET_NEON" | |
e5bf7a7a | 4308 | "vld1.<V_sz_elem>\t%h0, %A1" |
32093010 | 4309 | [(set_attr "type" "neon_load1_1reg<q>")] |
bcaec148 | 4310 | ) |
d98a3884 | 4311 | |
fe8542d5 | 4312 | ;; The lane numbers in the RTL are in GCC lane order, having been flipped |
4313 | ;; in arm_expand_neon_args. The lane numbers are restored to architectural | |
4314 | ;; lane order here. | |
d98a3884 | 4315 | (define_insn "neon_vld1_lane<mode>" |
4316 | [(set (match_operand:VDX 0 "s_register_operand" "=w") | |
e5bf7a7a | 4317 | (unspec:VDX [(match_operand:<V_elem> 1 "neon_struct_operand" "Um") |
d98a3884 | 4318 | (match_operand:VDX 2 "s_register_operand" "0") |
4319 | (match_operand:SI 3 "immediate_operand" "i")] | |
4320 | UNSPEC_VLD1_LANE))] | |
4321 | "TARGET_NEON" | |
4322 | { | |
fe8542d5 | 4323 | HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N(<MODE>mode, INTVAL (operands[3])); |
d98a3884 | 4324 | HOST_WIDE_INT max = GET_MODE_NUNITS (<MODE>mode); |
fe8542d5 | 4325 | operands[3] = GEN_INT (lane); |
d98a3884 | 4326 | if (max == 1) |
e5bf7a7a | 4327 | return "vld1.<V_sz_elem>\t%P0, %A1"; |
d98a3884 | 4328 | else |
e5bf7a7a | 4329 | return "vld1.<V_sz_elem>\t{%P0[%c3]}, %A1"; |
bcaec148 | 4330 | } |
32093010 | 4331 | [(set_attr "type" "neon_load1_one_lane<q>")] |
bcaec148 | 4332 | ) |
d98a3884 | 4333 | |
fe8542d5 | 4334 | ;; see comment on neon_vld1_lane for reason why the lane numbers are reversed |
4335 | ;; here on big endian targets. | |
d98a3884 | 4336 | (define_insn "neon_vld1_lane<mode>" |
4337 | [(set (match_operand:VQX 0 "s_register_operand" "=w") | |
e5bf7a7a | 4338 | (unspec:VQX [(match_operand:<V_elem> 1 "neon_struct_operand" "Um") |
d98a3884 | 4339 | (match_operand:VQX 2 "s_register_operand" "0") |
4340 | (match_operand:SI 3 "immediate_operand" "i")] | |
4341 | UNSPEC_VLD1_LANE))] | |
4342 | "TARGET_NEON" | |
4343 | { | |
fe8542d5 | 4344 | HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N(<MODE>mode, INTVAL (operands[3])); |
d98a3884 | 4345 | HOST_WIDE_INT max = GET_MODE_NUNITS (<MODE>mode); |
fe8542d5 | 4346 | operands[3] = GEN_INT (lane); |
d98a3884 | 4347 | int regno = REGNO (operands[0]); |
2989d25d | 4348 | if (lane >= max / 2) |
d98a3884 | 4349 | { |
4350 | lane -= max / 2; | |
4351 | regno += 2; | |
4352 | operands[3] = GEN_INT (lane); | |
4353 | } | |
4354 | operands[0] = gen_rtx_REG (<V_HALF>mode, regno); | |
4355 | if (max == 2) | |
e5bf7a7a | 4356 | return "vld1.<V_sz_elem>\t%P0, %A1"; |
d98a3884 | 4357 | else |
e5bf7a7a | 4358 | return "vld1.<V_sz_elem>\t{%P0[%c3]}, %A1"; |
bcaec148 | 4359 | } |
32093010 | 4360 | [(set_attr "type" "neon_load1_one_lane<q>")] |
bcaec148 | 4361 | ) |
d98a3884 | 4362 | |
4363 | (define_insn "neon_vld1_dup<mode>" | |
ad504e50 | 4364 | [(set (match_operand:VD 0 "s_register_operand" "=w") |
4365 | (vec_duplicate:VD (match_operand:<V_elem> 1 "neon_struct_operand" "Um")))] | |
d98a3884 | 4366 | "TARGET_NEON" |
ad504e50 | 4367 | "vld1.<V_sz_elem>\t{%P0[]}, %A1" |
32093010 | 4368 | [(set_attr "type" "neon_load1_all_lanes<q>")] |
ad504e50 | 4369 | ) |
4370 | ||
4371 | ;; Special case for DImode. Treat it exactly like a simple load. | |
4372 | (define_expand "neon_vld1_dupdi" | |
4373 | [(set (match_operand:DI 0 "s_register_operand" "") | |
4374 | (unspec:DI [(match_operand:DI 1 "neon_struct_operand" "")] | |
4375 | UNSPEC_VLD1))] | |
4376 | "TARGET_NEON" | |
4377 | "" | |
bcaec148 | 4378 | ) |
d98a3884 | 4379 | |
4380 | (define_insn "neon_vld1_dup<mode>" | |
f983358a | 4381 | [(set (match_operand:VQ 0 "s_register_operand" "=w") |
4382 | (vec_duplicate:VQ (match_operand:<V_elem> 1 "neon_struct_operand" "Um")))] | |
d98a3884 | 4383 | "TARGET_NEON" |
4384 | { | |
f983358a | 4385 | return "vld1.<V_sz_elem>\t{%e0[], %f0[]}, %A1"; |
bcaec148 | 4386 | } |
32093010 | 4387 | [(set_attr "type" "neon_load1_all_lanes<q>")] |
f983358a | 4388 | ) |
4389 | ||
4390 | (define_insn_and_split "neon_vld1_dupv2di" | |
4391 | [(set (match_operand:V2DI 0 "s_register_operand" "=w") | |
4392 | (vec_duplicate:V2DI (match_operand:DI 1 "neon_struct_operand" "Um")))] | |
4393 | "TARGET_NEON" | |
4394 | "#" | |
4395 | "&& reload_completed" | |
4396 | [(const_int 0)] | |
4397 | { | |
4398 | rtx tmprtx = gen_lowpart (DImode, operands[0]); | |
4399 | emit_insn (gen_neon_vld1_dupdi (tmprtx, operands[1])); | |
4400 | emit_move_insn (gen_highpart (DImode, operands[0]), tmprtx ); | |
4401 | DONE; | |
4402 | } | |
4403 | [(set_attr "length" "8") | |
32093010 | 4404 | (set_attr "type" "neon_load1_all_lanes_q")] |
bcaec148 | 4405 | ) |
d98a3884 | 4406 | |
672b3f5b | 4407 | (define_expand "vec_store_lanes<mode><mode>" |
4408 | [(set (match_operand:VDQX 0 "neon_struct_operand") | |
4409 | (unspec:VDQX [(match_operand:VDQX 1 "s_register_operand")] | |
4410 | UNSPEC_VST1))] | |
4411 | "TARGET_NEON") | |
4412 | ||
d98a3884 | 4413 | (define_insn "neon_vst1<mode>" |
e5bf7a7a | 4414 | [(set (match_operand:VDQX 0 "neon_struct_operand" "=Um") |
d98a3884 | 4415 | (unspec:VDQX [(match_operand:VDQX 1 "s_register_operand" "w")] |
4416 | UNSPEC_VST1))] | |
4417 | "TARGET_NEON" | |
e5bf7a7a | 4418 | "vst1.<V_sz_elem>\t%h1, %A0" |
32093010 | 4419 | [(set_attr "type" "neon_store1_1reg<q>")]) |
d98a3884 | 4420 | |
fe8542d5 | 4421 | ;; see comment on neon_vld1_lane for reason why the lane numbers are reversed |
4422 | ;; here on big endian targets. | |
d98a3884 | 4423 | (define_insn "neon_vst1_lane<mode>" |
e5bf7a7a | 4424 | [(set (match_operand:<V_elem> 0 "neon_struct_operand" "=Um") |
7867e2bd | 4425 | (unspec:<V_elem> |
4426 | [(match_operand:VDX 1 "s_register_operand" "w") | |
4427 | (match_operand:SI 2 "immediate_operand" "i")] | |
4428 | UNSPEC_VST1_LANE))] | |
d98a3884 | 4429 | "TARGET_NEON" |
4430 | { | |
fe8542d5 | 4431 | HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N(<MODE>mode, INTVAL (operands[2])); |
d98a3884 | 4432 | HOST_WIDE_INT max = GET_MODE_NUNITS (<MODE>mode); |
fe8542d5 | 4433 | operands[2] = GEN_INT (lane); |
d98a3884 | 4434 | if (max == 1) |
e5bf7a7a | 4435 | return "vst1.<V_sz_elem>\t{%P1}, %A0"; |
d98a3884 | 4436 | else |
e5bf7a7a | 4437 | return "vst1.<V_sz_elem>\t{%P1[%c2]}, %A0"; |
bcaec148 | 4438 | } |
32093010 | 4439 | [(set_attr "type" "neon_store1_one_lane<q>")] |
4440 | ) | |
d98a3884 | 4441 | |
fe8542d5 | 4442 | ;; see comment on neon_vld1_lane for reason why the lane numbers are reversed |
4443 | ;; here on big endian targets. | |
d98a3884 | 4444 | (define_insn "neon_vst1_lane<mode>" |
e5bf7a7a | 4445 | [(set (match_operand:<V_elem> 0 "neon_struct_operand" "=Um") |
7867e2bd | 4446 | (unspec:<V_elem> |
4447 | [(match_operand:VQX 1 "s_register_operand" "w") | |
4448 | (match_operand:SI 2 "immediate_operand" "i")] | |
4449 | UNSPEC_VST1_LANE))] | |
d98a3884 | 4450 | "TARGET_NEON" |
4451 | { | |
fe8542d5 | 4452 | HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N(<MODE>mode, INTVAL (operands[2])); |
d98a3884 | 4453 | HOST_WIDE_INT max = GET_MODE_NUNITS (<MODE>mode); |
4454 | int regno = REGNO (operands[1]); | |
2989d25d | 4455 | if (lane >= max / 2) |
d98a3884 | 4456 | { |
4457 | lane -= max / 2; | |
4458 | regno += 2; | |
d98a3884 | 4459 | } |
fe8542d5 | 4460 | operands[2] = GEN_INT (lane); |
d98a3884 | 4461 | operands[1] = gen_rtx_REG (<V_HALF>mode, regno); |
4462 | if (max == 2) | |
e5bf7a7a | 4463 | return "vst1.<V_sz_elem>\t{%P1}, %A0"; |
d98a3884 | 4464 | else |
e5bf7a7a | 4465 | return "vst1.<V_sz_elem>\t{%P1[%c2]}, %A0"; |
bcaec148 | 4466 | } |
32093010 | 4467 | [(set_attr "type" "neon_store1_one_lane<q>")] |
bcaec148 | 4468 | ) |
d98a3884 | 4469 | |
672b3f5b | 4470 | (define_expand "vec_load_lanesti<mode>" |
4471 | [(set (match_operand:TI 0 "s_register_operand") | |
4472 | (unspec:TI [(match_operand:TI 1 "neon_struct_operand") | |
4473 | (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] | |
4474 | UNSPEC_VLD2))] | |
4475 | "TARGET_NEON") | |
4476 | ||
d98a3884 | 4477 | (define_insn "neon_vld2<mode>" |
4478 | [(set (match_operand:TI 0 "s_register_operand" "=w") | |
e5bf7a7a | 4479 | (unspec:TI [(match_operand:TI 1 "neon_struct_operand" "Um") |
d98a3884 | 4480 | (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] |
4481 | UNSPEC_VLD2))] | |
4482 | "TARGET_NEON" | |
4483 | { | |
4484 | if (<V_sz_elem> == 64) | |
e5bf7a7a | 4485 | return "vld1.64\t%h0, %A1"; |
d98a3884 | 4486 | else |
e5bf7a7a | 4487 | return "vld2.<V_sz_elem>\t%h0, %A1"; |
bcaec148 | 4488 | } |
52432540 | 4489 | [(set (attr "type") |
bcaec148 | 4490 | (if_then_else (eq (const_string "<V_sz_elem>") (const_string "64")) |
32093010 | 4491 | (const_string "neon_load1_2reg<q>") |
4492 | (const_string "neon_load2_2reg<q>")))] | |
bcaec148 | 4493 | ) |
d98a3884 | 4494 | |
672b3f5b | 4495 | (define_expand "vec_load_lanesoi<mode>" |
4496 | [(set (match_operand:OI 0 "s_register_operand") | |
4497 | (unspec:OI [(match_operand:OI 1 "neon_struct_operand") | |
fb5f110d | 4498 | (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] |
672b3f5b | 4499 | UNSPEC_VLD2))] |
4500 | "TARGET_NEON") | |
4501 | ||
d98a3884 | 4502 | (define_insn "neon_vld2<mode>" |
4503 | [(set (match_operand:OI 0 "s_register_operand" "=w") | |
e5bf7a7a | 4504 | (unspec:OI [(match_operand:OI 1 "neon_struct_operand" "Um") |
fb5f110d | 4505 | (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] |
d98a3884 | 4506 | UNSPEC_VLD2))] |
4507 | "TARGET_NEON" | |
e5bf7a7a | 4508 | "vld2.<V_sz_elem>\t%h0, %A1" |
32093010 | 4509 | [(set_attr "type" "neon_load2_2reg_q")]) |
d98a3884 | 4510 | |
fe8542d5 | 4511 | ;; see comment on neon_vld1_lane for reason why the lane numbers are reversed |
4512 | ;; here on big endian targets. | |
d98a3884 | 4513 | (define_insn "neon_vld2_lane<mode>" |
4514 | [(set (match_operand:TI 0 "s_register_operand" "=w") | |
e5bf7a7a | 4515 | (unspec:TI [(match_operand:<V_two_elem> 1 "neon_struct_operand" "Um") |
d98a3884 | 4516 | (match_operand:TI 2 "s_register_operand" "0") |
4517 | (match_operand:SI 3 "immediate_operand" "i") | |
fb5f110d | 4518 | (unspec:VD_LANE [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] |
d98a3884 | 4519 | UNSPEC_VLD2_LANE))] |
4520 | "TARGET_NEON" | |
4521 | { | |
fe8542d5 | 4522 | HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N(<MODE>mode, INTVAL (operands[3])); |
d98a3884 | 4523 | int regno = REGNO (operands[0]); |
4524 | rtx ops[4]; | |
d98a3884 | 4525 | ops[0] = gen_rtx_REG (DImode, regno); |
4526 | ops[1] = gen_rtx_REG (DImode, regno + 2); | |
4527 | ops[2] = operands[1]; | |
fe8542d5 | 4528 | ops[3] = GEN_INT (lane); |
e5bf7a7a | 4529 | output_asm_insn ("vld2.<V_sz_elem>\t{%P0[%c3], %P1[%c3]}, %A2", ops); |
d98a3884 | 4530 | return ""; |
bcaec148 | 4531 | } |
32093010 | 4532 | [(set_attr "type" "neon_load2_one_lane<q>")] |
bcaec148 | 4533 | ) |
d98a3884 | 4534 | |
fe8542d5 | 4535 | ;; see comment on neon_vld1_lane for reason why the lane numbers are reversed |
4536 | ;; here on big endian targets. | |
d98a3884 | 4537 | (define_insn "neon_vld2_lane<mode>" |
4538 | [(set (match_operand:OI 0 "s_register_operand" "=w") | |
e5bf7a7a | 4539 | (unspec:OI [(match_operand:<V_two_elem> 1 "neon_struct_operand" "Um") |
d98a3884 | 4540 | (match_operand:OI 2 "s_register_operand" "0") |
4541 | (match_operand:SI 3 "immediate_operand" "i") | |
fb5f110d | 4542 | (unspec:VQ_HS [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] |
d98a3884 | 4543 | UNSPEC_VLD2_LANE))] |
4544 | "TARGET_NEON" | |
4545 | { | |
fe8542d5 | 4546 | HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N(<MODE>mode, INTVAL (operands[3])); |
d98a3884 | 4547 | HOST_WIDE_INT max = GET_MODE_NUNITS (<MODE>mode); |
4548 | int regno = REGNO (operands[0]); | |
4549 | rtx ops[4]; | |
2989d25d | 4550 | if (lane >= max / 2) |
d98a3884 | 4551 | { |
4552 | lane -= max / 2; | |
4553 | regno += 2; | |
4554 | } | |
4555 | ops[0] = gen_rtx_REG (DImode, regno); | |
4556 | ops[1] = gen_rtx_REG (DImode, regno + 4); | |
4557 | ops[2] = operands[1]; | |
4558 | ops[3] = GEN_INT (lane); | |
e5bf7a7a | 4559 | output_asm_insn ("vld2.<V_sz_elem>\t{%P0[%c3], %P1[%c3]}, %A2", ops); |
d98a3884 | 4560 | return ""; |
bcaec148 | 4561 | } |
32093010 | 4562 | [(set_attr "type" "neon_load2_one_lane<q>")] |
bcaec148 | 4563 | ) |
d98a3884 | 4564 | |
4565 | (define_insn "neon_vld2_dup<mode>" | |
4566 | [(set (match_operand:TI 0 "s_register_operand" "=w") | |
e5bf7a7a | 4567 | (unspec:TI [(match_operand:<V_two_elem> 1 "neon_struct_operand" "Um") |
d98a3884 | 4568 | (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] |
4569 | UNSPEC_VLD2_DUP))] | |
4570 | "TARGET_NEON" | |
4571 | { | |
4572 | if (GET_MODE_NUNITS (<MODE>mode) > 1) | |
e5bf7a7a | 4573 | return "vld2.<V_sz_elem>\t{%e0[], %f0[]}, %A1"; |
d98a3884 | 4574 | else |
e5bf7a7a | 4575 | return "vld1.<V_sz_elem>\t%h0, %A1"; |
bcaec148 | 4576 | } |
52432540 | 4577 | [(set (attr "type") |
bcaec148 | 4578 | (if_then_else (gt (const_string "<V_mode_nunits>") (const_string "1")) |
32093010 | 4579 | (const_string "neon_load2_all_lanes<q>") |
4580 | (const_string "neon_load1_1reg<q>")))] | |
bcaec148 | 4581 | ) |
d98a3884 | 4582 | |
672b3f5b | 4583 | (define_expand "vec_store_lanesti<mode>" |
4584 | [(set (match_operand:TI 0 "neon_struct_operand") | |
4585 | (unspec:TI [(match_operand:TI 1 "s_register_operand") | |
4586 | (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] | |
4587 | UNSPEC_VST2))] | |
4588 | "TARGET_NEON") | |
4589 | ||
d98a3884 | 4590 | (define_insn "neon_vst2<mode>" |
e5bf7a7a | 4591 | [(set (match_operand:TI 0 "neon_struct_operand" "=Um") |
d98a3884 | 4592 | (unspec:TI [(match_operand:TI 1 "s_register_operand" "w") |
4593 | (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] | |
4594 | UNSPEC_VST2))] | |
4595 | "TARGET_NEON" | |
4596 | { | |
4597 | if (<V_sz_elem> == 64) | |
e5bf7a7a | 4598 | return "vst1.64\t%h1, %A0"; |
d98a3884 | 4599 | else |
e5bf7a7a | 4600 | return "vst2.<V_sz_elem>\t%h1, %A0"; |
bcaec148 | 4601 | } |
52432540 | 4602 | [(set (attr "type") |
bcaec148 | 4603 | (if_then_else (eq (const_string "<V_sz_elem>") (const_string "64")) |
32093010 | 4604 | (const_string "neon_store1_2reg<q>") |
4605 | (const_string "neon_store2_one_lane<q>")))] | |
bcaec148 | 4606 | ) |
d98a3884 | 4607 | |
672b3f5b | 4608 | (define_expand "vec_store_lanesoi<mode>" |
4609 | [(set (match_operand:OI 0 "neon_struct_operand") | |
4610 | (unspec:OI [(match_operand:OI 1 "s_register_operand") | |
fb5f110d | 4611 | (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] |
672b3f5b | 4612 | UNSPEC_VST2))] |
4613 | "TARGET_NEON") | |
4614 | ||
d98a3884 | 4615 | (define_insn "neon_vst2<mode>" |
e5bf7a7a | 4616 | [(set (match_operand:OI 0 "neon_struct_operand" "=Um") |
d98a3884 | 4617 | (unspec:OI [(match_operand:OI 1 "s_register_operand" "w") |
fb5f110d | 4618 | (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] |
d98a3884 | 4619 | UNSPEC_VST2))] |
4620 | "TARGET_NEON" | |
e5bf7a7a | 4621 | "vst2.<V_sz_elem>\t%h1, %A0" |
32093010 | 4622 | [(set_attr "type" "neon_store2_4reg<q>")] |
bcaec148 | 4623 | ) |
d98a3884 | 4624 | |
fe8542d5 | 4625 | ;; see comment on neon_vld1_lane for reason why the lane numbers are reversed |
4626 | ;; here on big endian targets. | |
d98a3884 | 4627 | (define_insn "neon_vst2_lane<mode>" |
e5bf7a7a | 4628 | [(set (match_operand:<V_two_elem> 0 "neon_struct_operand" "=Um") |
d98a3884 | 4629 | (unspec:<V_two_elem> |
4630 | [(match_operand:TI 1 "s_register_operand" "w") | |
4631 | (match_operand:SI 2 "immediate_operand" "i") | |
fb5f110d | 4632 | (unspec:VD_LANE [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] |
d98a3884 | 4633 | UNSPEC_VST2_LANE))] |
4634 | "TARGET_NEON" | |
4635 | { | |
fe8542d5 | 4636 | HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N(<MODE>mode, INTVAL (operands[2])); |
d98a3884 | 4637 | int regno = REGNO (operands[1]); |
4638 | rtx ops[4]; | |
d98a3884 | 4639 | ops[0] = operands[0]; |
4640 | ops[1] = gen_rtx_REG (DImode, regno); | |
4641 | ops[2] = gen_rtx_REG (DImode, regno + 2); | |
fe8542d5 | 4642 | ops[3] = GEN_INT (lane); |
e5bf7a7a | 4643 | output_asm_insn ("vst2.<V_sz_elem>\t{%P1[%c3], %P2[%c3]}, %A0", ops); |
d98a3884 | 4644 | return ""; |
bcaec148 | 4645 | } |
32093010 | 4646 | [(set_attr "type" "neon_store2_one_lane<q>")] |
bcaec148 | 4647 | ) |
d98a3884 | 4648 | |
fe8542d5 | 4649 | ;; see comment on neon_vld1_lane for reason why the lane numbers are reversed |
4650 | ;; here on big endian targets. | |
d98a3884 | 4651 | (define_insn "neon_vst2_lane<mode>" |
e5bf7a7a | 4652 | [(set (match_operand:<V_two_elem> 0 "neon_struct_operand" "=Um") |
d98a3884 | 4653 | (unspec:<V_two_elem> |
4654 | [(match_operand:OI 1 "s_register_operand" "w") | |
4655 | (match_operand:SI 2 "immediate_operand" "i") | |
fb5f110d | 4656 | (unspec:VQ_HS [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] |
d98a3884 | 4657 | UNSPEC_VST2_LANE))] |
4658 | "TARGET_NEON" | |
4659 | { | |
fe8542d5 | 4660 | HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N(<MODE>mode, INTVAL (operands[2])); |
d98a3884 | 4661 | HOST_WIDE_INT max = GET_MODE_NUNITS (<MODE>mode); |
4662 | int regno = REGNO (operands[1]); | |
4663 | rtx ops[4]; | |
2989d25d | 4664 | if (lane >= max / 2) |
d98a3884 | 4665 | { |
4666 | lane -= max / 2; | |
4667 | regno += 2; | |
4668 | } | |
4669 | ops[0] = operands[0]; | |
4670 | ops[1] = gen_rtx_REG (DImode, regno); | |
4671 | ops[2] = gen_rtx_REG (DImode, regno + 4); | |
4672 | ops[3] = GEN_INT (lane); | |
e5bf7a7a | 4673 | output_asm_insn ("vst2.<V_sz_elem>\t{%P1[%c3], %P2[%c3]}, %A0", ops); |
d98a3884 | 4674 | return ""; |
bcaec148 | 4675 | } |
32093010 | 4676 | [(set_attr "type" "neon_store2_one_lane<q>")] |
bcaec148 | 4677 | ) |
d98a3884 | 4678 | |
672b3f5b | 4679 | (define_expand "vec_load_lanesei<mode>" |
4680 | [(set (match_operand:EI 0 "s_register_operand") | |
4681 | (unspec:EI [(match_operand:EI 1 "neon_struct_operand") | |
4682 | (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] | |
4683 | UNSPEC_VLD3))] | |
4684 | "TARGET_NEON") | |
4685 | ||
d98a3884 | 4686 | (define_insn "neon_vld3<mode>" |
4687 | [(set (match_operand:EI 0 "s_register_operand" "=w") | |
e5bf7a7a | 4688 | (unspec:EI [(match_operand:EI 1 "neon_struct_operand" "Um") |
d98a3884 | 4689 | (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] |
4690 | UNSPEC_VLD3))] | |
4691 | "TARGET_NEON" | |
4692 | { | |
4693 | if (<V_sz_elem> == 64) | |
e5bf7a7a | 4694 | return "vld1.64\t%h0, %A1"; |
d98a3884 | 4695 | else |
e5bf7a7a | 4696 | return "vld3.<V_sz_elem>\t%h0, %A1"; |
bcaec148 | 4697 | } |
52432540 | 4698 | [(set (attr "type") |
bcaec148 | 4699 | (if_then_else (eq (const_string "<V_sz_elem>") (const_string "64")) |
32093010 | 4700 | (const_string "neon_load1_3reg<q>") |
4701 | (const_string "neon_load3_3reg<q>")))] | |
bcaec148 | 4702 | ) |
d98a3884 | 4703 | |
672b3f5b | 4704 | (define_expand "vec_load_lanesci<mode>" |
4705 | [(match_operand:CI 0 "s_register_operand") | |
4706 | (match_operand:CI 1 "neon_struct_operand") | |
fb5f110d | 4707 | (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] |
672b3f5b | 4708 | "TARGET_NEON" |
4709 | { | |
4710 | emit_insn (gen_neon_vld3<mode> (operands[0], operands[1])); | |
4711 | DONE; | |
4712 | }) | |
4713 | ||
d98a3884 | 4714 | (define_expand "neon_vld3<mode>" |
e5bf7a7a | 4715 | [(match_operand:CI 0 "s_register_operand") |
4716 | (match_operand:CI 1 "neon_struct_operand") | |
fb5f110d | 4717 | (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] |
d98a3884 | 4718 | "TARGET_NEON" |
4719 | { | |
e5bf7a7a | 4720 | rtx mem; |
4721 | ||
4722 | mem = adjust_address (operands[1], EImode, 0); | |
4723 | emit_insn (gen_neon_vld3qa<mode> (operands[0], mem)); | |
4724 | mem = adjust_address (mem, EImode, GET_MODE_SIZE (EImode)); | |
4725 | emit_insn (gen_neon_vld3qb<mode> (operands[0], mem, operands[0])); | |
d98a3884 | 4726 | DONE; |
4727 | }) | |
4728 | ||
4729 | (define_insn "neon_vld3qa<mode>" | |
4730 | [(set (match_operand:CI 0 "s_register_operand" "=w") | |
e5bf7a7a | 4731 | (unspec:CI [(match_operand:EI 1 "neon_struct_operand" "Um") |
fb5f110d | 4732 | (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] |
e5bf7a7a | 4733 | UNSPEC_VLD3A))] |
d98a3884 | 4734 | "TARGET_NEON" |
4735 | { | |
4736 | int regno = REGNO (operands[0]); | |
4737 | rtx ops[4]; | |
4738 | ops[0] = gen_rtx_REG (DImode, regno); | |
4739 | ops[1] = gen_rtx_REG (DImode, regno + 4); | |
4740 | ops[2] = gen_rtx_REG (DImode, regno + 8); | |
92d6c32b | 4741 | ops[3] = operands[1]; |
e5bf7a7a | 4742 | output_asm_insn ("vld3.<V_sz_elem>\t{%P0, %P1, %P2}, %A3", ops); |
d98a3884 | 4743 | return ""; |
bcaec148 | 4744 | } |
32093010 | 4745 | [(set_attr "type" "neon_load3_3reg<q>")] |
bcaec148 | 4746 | ) |
d98a3884 | 4747 | |
4748 | (define_insn "neon_vld3qb<mode>" | |
4749 | [(set (match_operand:CI 0 "s_register_operand" "=w") | |
e5bf7a7a | 4750 | (unspec:CI [(match_operand:EI 1 "neon_struct_operand" "Um") |
4751 | (match_operand:CI 2 "s_register_operand" "0") | |
fb5f110d | 4752 | (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] |
e5bf7a7a | 4753 | UNSPEC_VLD3B))] |
d98a3884 | 4754 | "TARGET_NEON" |
4755 | { | |
4756 | int regno = REGNO (operands[0]); | |
4757 | rtx ops[4]; | |
4758 | ops[0] = gen_rtx_REG (DImode, regno + 2); | |
4759 | ops[1] = gen_rtx_REG (DImode, regno + 6); | |
4760 | ops[2] = gen_rtx_REG (DImode, regno + 10); | |
e5bf7a7a | 4761 | ops[3] = operands[1]; |
4762 | output_asm_insn ("vld3.<V_sz_elem>\t{%P0, %P1, %P2}, %A3", ops); | |
d98a3884 | 4763 | return ""; |
bcaec148 | 4764 | } |
32093010 | 4765 | [(set_attr "type" "neon_load3_3reg<q>")] |
bcaec148 | 4766 | ) |
d98a3884 | 4767 | |
fe8542d5 | 4768 | ;; see comment on neon_vld1_lane for reason why the lane numbers are reversed |
4769 | ;; here on big endian targets. | |
d98a3884 | 4770 | (define_insn "neon_vld3_lane<mode>" |
4771 | [(set (match_operand:EI 0 "s_register_operand" "=w") | |
e5bf7a7a | 4772 | (unspec:EI [(match_operand:<V_three_elem> 1 "neon_struct_operand" "Um") |
d98a3884 | 4773 | (match_operand:EI 2 "s_register_operand" "0") |
4774 | (match_operand:SI 3 "immediate_operand" "i") | |
fb5f110d | 4775 | (unspec:VD_LANE [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] |
d98a3884 | 4776 | UNSPEC_VLD3_LANE))] |
4777 | "TARGET_NEON" | |
4778 | { | |
fe8542d5 | 4779 | HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N (<MODE>mode, INTVAL (operands[3])); |
d98a3884 | 4780 | int regno = REGNO (operands[0]); |
4781 | rtx ops[5]; | |
d98a3884 | 4782 | ops[0] = gen_rtx_REG (DImode, regno); |
4783 | ops[1] = gen_rtx_REG (DImode, regno + 2); | |
4784 | ops[2] = gen_rtx_REG (DImode, regno + 4); | |
4785 | ops[3] = operands[1]; | |
fe8542d5 | 4786 | ops[4] = GEN_INT (lane); |
0b66dd3d | 4787 | output_asm_insn ("vld3.<V_sz_elem>\t{%P0[%c4], %P1[%c4], %P2[%c4]}, %3", |
d98a3884 | 4788 | ops); |
4789 | return ""; | |
bcaec148 | 4790 | } |
32093010 | 4791 | [(set_attr "type" "neon_load3_one_lane<q>")] |
bcaec148 | 4792 | ) |
d98a3884 | 4793 | |
fe8542d5 | 4794 | ;; see comment on neon_vld1_lane for reason why the lane numbers are reversed |
4795 | ;; here on big endian targets. | |
d98a3884 | 4796 | (define_insn "neon_vld3_lane<mode>" |
4797 | [(set (match_operand:CI 0 "s_register_operand" "=w") | |
e5bf7a7a | 4798 | (unspec:CI [(match_operand:<V_three_elem> 1 "neon_struct_operand" "Um") |
d98a3884 | 4799 | (match_operand:CI 2 "s_register_operand" "0") |
4800 | (match_operand:SI 3 "immediate_operand" "i") | |
fb5f110d | 4801 | (unspec:VQ_HS [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] |
d98a3884 | 4802 | UNSPEC_VLD3_LANE))] |
4803 | "TARGET_NEON" | |
4804 | { | |
fe8542d5 | 4805 | HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N(<MODE>mode, INTVAL (operands[3])); |
d98a3884 | 4806 | HOST_WIDE_INT max = GET_MODE_NUNITS (<MODE>mode); |
4807 | int regno = REGNO (operands[0]); | |
4808 | rtx ops[5]; | |
2989d25d | 4809 | if (lane >= max / 2) |
d98a3884 | 4810 | { |
4811 | lane -= max / 2; | |
4812 | regno += 2; | |
4813 | } | |
4814 | ops[0] = gen_rtx_REG (DImode, regno); | |
4815 | ops[1] = gen_rtx_REG (DImode, regno + 4); | |
4816 | ops[2] = gen_rtx_REG (DImode, regno + 8); | |
4817 | ops[3] = operands[1]; | |
4818 | ops[4] = GEN_INT (lane); | |
0b66dd3d | 4819 | output_asm_insn ("vld3.<V_sz_elem>\t{%P0[%c4], %P1[%c4], %P2[%c4]}, %3", |
d98a3884 | 4820 | ops); |
4821 | return ""; | |
bcaec148 | 4822 | } |
32093010 | 4823 | [(set_attr "type" "neon_load3_one_lane<q>")] |
bcaec148 | 4824 | ) |
d98a3884 | 4825 | |
4826 | (define_insn "neon_vld3_dup<mode>" | |
4827 | [(set (match_operand:EI 0 "s_register_operand" "=w") | |
e5bf7a7a | 4828 | (unspec:EI [(match_operand:<V_three_elem> 1 "neon_struct_operand" "Um") |
d98a3884 | 4829 | (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] |
4830 | UNSPEC_VLD3_DUP))] | |
4831 | "TARGET_NEON" | |
4832 | { | |
4833 | if (GET_MODE_NUNITS (<MODE>mode) > 1) | |
4834 | { | |
4835 | int regno = REGNO (operands[0]); | |
4836 | rtx ops[4]; | |
4837 | ops[0] = gen_rtx_REG (DImode, regno); | |
4838 | ops[1] = gen_rtx_REG (DImode, regno + 2); | |
4839 | ops[2] = gen_rtx_REG (DImode, regno + 4); | |
4840 | ops[3] = operands[1]; | |
0b66dd3d | 4841 | output_asm_insn ("vld3.<V_sz_elem>\t{%P0[], %P1[], %P2[]}, %3", ops); |
d98a3884 | 4842 | return ""; |
4843 | } | |
4844 | else | |
e5bf7a7a | 4845 | return "vld1.<V_sz_elem>\t%h0, %A1"; |
bcaec148 | 4846 | } |
52432540 | 4847 | [(set (attr "type") |
bcaec148 | 4848 | (if_then_else (gt (const_string "<V_mode_nunits>") (const_string "1")) |
32093010 | 4849 | (const_string "neon_load3_all_lanes<q>") |
4850 | (const_string "neon_load1_1reg<q>")))]) | |
d98a3884 | 4851 | |
672b3f5b | 4852 | (define_expand "vec_store_lanesei<mode>" |
4853 | [(set (match_operand:EI 0 "neon_struct_operand") | |
4854 | (unspec:EI [(match_operand:EI 1 "s_register_operand") | |
4855 | (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] | |
4856 | UNSPEC_VST3))] | |
4857 | "TARGET_NEON") | |
4858 | ||
d98a3884 | 4859 | (define_insn "neon_vst3<mode>" |
e5bf7a7a | 4860 | [(set (match_operand:EI 0 "neon_struct_operand" "=Um") |
d98a3884 | 4861 | (unspec:EI [(match_operand:EI 1 "s_register_operand" "w") |
4862 | (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] | |
4863 | UNSPEC_VST3))] | |
4864 | "TARGET_NEON" | |
4865 | { | |
4866 | if (<V_sz_elem> == 64) | |
e5bf7a7a | 4867 | return "vst1.64\t%h1, %A0"; |
d98a3884 | 4868 | else |
e5bf7a7a | 4869 | return "vst3.<V_sz_elem>\t%h1, %A0"; |
bcaec148 | 4870 | } |
52432540 | 4871 | [(set (attr "type") |
bcaec148 | 4872 | (if_then_else (eq (const_string "<V_sz_elem>") (const_string "64")) |
32093010 | 4873 | (const_string "neon_store1_3reg<q>") |
4874 | (const_string "neon_store3_one_lane<q>")))]) | |
d98a3884 | 4875 | |
672b3f5b | 4876 | (define_expand "vec_store_lanesci<mode>" |
4877 | [(match_operand:CI 0 "neon_struct_operand") | |
4878 | (match_operand:CI 1 "s_register_operand") | |
fb5f110d | 4879 | (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] |
672b3f5b | 4880 | "TARGET_NEON" |
4881 | { | |
4882 | emit_insn (gen_neon_vst3<mode> (operands[0], operands[1])); | |
4883 | DONE; | |
4884 | }) | |
4885 | ||
d98a3884 | 4886 | (define_expand "neon_vst3<mode>" |
e5bf7a7a | 4887 | [(match_operand:CI 0 "neon_struct_operand") |
4888 | (match_operand:CI 1 "s_register_operand") | |
fb5f110d | 4889 | (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] |
d98a3884 | 4890 | "TARGET_NEON" |
4891 | { | |
e5bf7a7a | 4892 | rtx mem; |
4893 | ||
4894 | mem = adjust_address (operands[0], EImode, 0); | |
4895 | emit_insn (gen_neon_vst3qa<mode> (mem, operands[1])); | |
4896 | mem = adjust_address (mem, EImode, GET_MODE_SIZE (EImode)); | |
4897 | emit_insn (gen_neon_vst3qb<mode> (mem, operands[1])); | |
d98a3884 | 4898 | DONE; |
4899 | }) | |
4900 | ||
4901 | (define_insn "neon_vst3qa<mode>" | |
e5bf7a7a | 4902 | [(set (match_operand:EI 0 "neon_struct_operand" "=Um") |
4903 | (unspec:EI [(match_operand:CI 1 "s_register_operand" "w") | |
fb5f110d | 4904 | (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] |
e5bf7a7a | 4905 | UNSPEC_VST3A))] |
d98a3884 | 4906 | "TARGET_NEON" |
4907 | { | |
e5bf7a7a | 4908 | int regno = REGNO (operands[1]); |
d98a3884 | 4909 | rtx ops[4]; |
4910 | ops[0] = operands[0]; | |
4911 | ops[1] = gen_rtx_REG (DImode, regno); | |
4912 | ops[2] = gen_rtx_REG (DImode, regno + 4); | |
4913 | ops[3] = gen_rtx_REG (DImode, regno + 8); | |
e5bf7a7a | 4914 | output_asm_insn ("vst3.<V_sz_elem>\t{%P1, %P2, %P3}, %A0", ops); |
d98a3884 | 4915 | return ""; |
bcaec148 | 4916 | } |
32093010 | 4917 | [(set_attr "type" "neon_store3_3reg<q>")] |
bcaec148 | 4918 | ) |
d98a3884 | 4919 | |
4920 | (define_insn "neon_vst3qb<mode>" | |
e5bf7a7a | 4921 | [(set (match_operand:EI 0 "neon_struct_operand" "=Um") |
4922 | (unspec:EI [(match_operand:CI 1 "s_register_operand" "w") | |
fb5f110d | 4923 | (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] |
e5bf7a7a | 4924 | UNSPEC_VST3B))] |
d98a3884 | 4925 | "TARGET_NEON" |
4926 | { | |
e5bf7a7a | 4927 | int regno = REGNO (operands[1]); |
d98a3884 | 4928 | rtx ops[4]; |
4929 | ops[0] = operands[0]; | |
4930 | ops[1] = gen_rtx_REG (DImode, regno + 2); | |
4931 | ops[2] = gen_rtx_REG (DImode, regno + 6); | |
4932 | ops[3] = gen_rtx_REG (DImode, regno + 10); | |
e5bf7a7a | 4933 | output_asm_insn ("vst3.<V_sz_elem>\t{%P1, %P2, %P3}, %A0", ops); |
d98a3884 | 4934 | return ""; |
bcaec148 | 4935 | } |
32093010 | 4936 | [(set_attr "type" "neon_store3_3reg<q>")] |
bcaec148 | 4937 | ) |
d98a3884 | 4938 | |
fe8542d5 | 4939 | ;; see comment on neon_vld1_lane for reason why the lane numbers are reversed |
4940 | ;; here on big endian targets. | |
d98a3884 | 4941 | (define_insn "neon_vst3_lane<mode>" |
e5bf7a7a | 4942 | [(set (match_operand:<V_three_elem> 0 "neon_struct_operand" "=Um") |
d98a3884 | 4943 | (unspec:<V_three_elem> |
4944 | [(match_operand:EI 1 "s_register_operand" "w") | |
4945 | (match_operand:SI 2 "immediate_operand" "i") | |
fb5f110d | 4946 | (unspec:VD_LANE [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] |
d98a3884 | 4947 | UNSPEC_VST3_LANE))] |
4948 | "TARGET_NEON" | |
4949 | { | |
fe8542d5 | 4950 | HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N(<MODE>mode, INTVAL (operands[2])); |
d98a3884 | 4951 | int regno = REGNO (operands[1]); |
4952 | rtx ops[5]; | |
d98a3884 | 4953 | ops[0] = operands[0]; |
4954 | ops[1] = gen_rtx_REG (DImode, regno); | |
4955 | ops[2] = gen_rtx_REG (DImode, regno + 2); | |
4956 | ops[3] = gen_rtx_REG (DImode, regno + 4); | |
fe8542d5 | 4957 | ops[4] = GEN_INT (lane); |
0b66dd3d | 4958 | output_asm_insn ("vst3.<V_sz_elem>\t{%P1[%c4], %P2[%c4], %P3[%c4]}, %0", |
d98a3884 | 4959 | ops); |
4960 | return ""; | |
bcaec148 | 4961 | } |
32093010 | 4962 | [(set_attr "type" "neon_store3_one_lane<q>")] |
bcaec148 | 4963 | ) |
d98a3884 | 4964 | |
fe8542d5 | 4965 | ;; see comment on neon_vld1_lane for reason why the lane numbers are reversed |
4966 | ;; here on big endian targets. | |
d98a3884 | 4967 | (define_insn "neon_vst3_lane<mode>" |
e5bf7a7a | 4968 | [(set (match_operand:<V_three_elem> 0 "neon_struct_operand" "=Um") |
d98a3884 | 4969 | (unspec:<V_three_elem> |
4970 | [(match_operand:CI 1 "s_register_operand" "w") | |
4971 | (match_operand:SI 2 "immediate_operand" "i") | |
fb5f110d | 4972 | (unspec:VQ_HS [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] |
d98a3884 | 4973 | UNSPEC_VST3_LANE))] |
4974 | "TARGET_NEON" | |
4975 | { | |
fe8542d5 | 4976 | HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N(<MODE>mode, INTVAL (operands[2])); |
d98a3884 | 4977 | HOST_WIDE_INT max = GET_MODE_NUNITS (<MODE>mode); |
4978 | int regno = REGNO (operands[1]); | |
4979 | rtx ops[5]; | |
2989d25d | 4980 | if (lane >= max / 2) |
d98a3884 | 4981 | { |
4982 | lane -= max / 2; | |
4983 | regno += 2; | |
4984 | } | |
4985 | ops[0] = operands[0]; | |
4986 | ops[1] = gen_rtx_REG (DImode, regno); | |
4987 | ops[2] = gen_rtx_REG (DImode, regno + 4); | |
4988 | ops[3] = gen_rtx_REG (DImode, regno + 8); | |
4989 | ops[4] = GEN_INT (lane); | |
0b66dd3d | 4990 | output_asm_insn ("vst3.<V_sz_elem>\t{%P1[%c4], %P2[%c4], %P3[%c4]}, %0", |
d98a3884 | 4991 | ops); |
4992 | return ""; | |
bcaec148 | 4993 | } |
32093010 | 4994 | [(set_attr "type" "neon_store3_one_lane<q>")] |
4995 | ) | |
d98a3884 | 4996 | |
672b3f5b | 4997 | (define_expand "vec_load_lanesoi<mode>" |
4998 | [(set (match_operand:OI 0 "s_register_operand") | |
4999 | (unspec:OI [(match_operand:OI 1 "neon_struct_operand") | |
5000 | (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] | |
5001 | UNSPEC_VLD4))] | |
5002 | "TARGET_NEON") | |
5003 | ||
d98a3884 | 5004 | (define_insn "neon_vld4<mode>" |
5005 | [(set (match_operand:OI 0 "s_register_operand" "=w") | |
e5bf7a7a | 5006 | (unspec:OI [(match_operand:OI 1 "neon_struct_operand" "Um") |
d98a3884 | 5007 | (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] |
5008 | UNSPEC_VLD4))] | |
5009 | "TARGET_NEON" | |
5010 | { | |
5011 | if (<V_sz_elem> == 64) | |
e5bf7a7a | 5012 | return "vld1.64\t%h0, %A1"; |
d98a3884 | 5013 | else |
e5bf7a7a | 5014 | return "vld4.<V_sz_elem>\t%h0, %A1"; |
bcaec148 | 5015 | } |
52432540 | 5016 | [(set (attr "type") |
bcaec148 | 5017 | (if_then_else (eq (const_string "<V_sz_elem>") (const_string "64")) |
32093010 | 5018 | (const_string "neon_load1_4reg<q>") |
5019 | (const_string "neon_load4_4reg<q>")))] | |
bcaec148 | 5020 | ) |
d98a3884 | 5021 | |
672b3f5b | 5022 | (define_expand "vec_load_lanesxi<mode>" |
5023 | [(match_operand:XI 0 "s_register_operand") | |
5024 | (match_operand:XI 1 "neon_struct_operand") | |
fb5f110d | 5025 | (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] |
672b3f5b | 5026 | "TARGET_NEON" |
5027 | { | |
5028 | emit_insn (gen_neon_vld4<mode> (operands[0], operands[1])); | |
5029 | DONE; | |
5030 | }) | |
5031 | ||
d98a3884 | 5032 | (define_expand "neon_vld4<mode>" |
e5bf7a7a | 5033 | [(match_operand:XI 0 "s_register_operand") |
5034 | (match_operand:XI 1 "neon_struct_operand") | |
fb5f110d | 5035 | (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] |
d98a3884 | 5036 | "TARGET_NEON" |
5037 | { | |
e5bf7a7a | 5038 | rtx mem; |
5039 | ||
5040 | mem = adjust_address (operands[1], OImode, 0); | |
5041 | emit_insn (gen_neon_vld4qa<mode> (operands[0], mem)); | |
5042 | mem = adjust_address (mem, OImode, GET_MODE_SIZE (OImode)); | |
5043 | emit_insn (gen_neon_vld4qb<mode> (operands[0], mem, operands[0])); | |
d98a3884 | 5044 | DONE; |
5045 | }) | |
5046 | ||
5047 | (define_insn "neon_vld4qa<mode>" | |
5048 | [(set (match_operand:XI 0 "s_register_operand" "=w") | |
e5bf7a7a | 5049 | (unspec:XI [(match_operand:OI 1 "neon_struct_operand" "Um") |
fb5f110d | 5050 | (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] |
e5bf7a7a | 5051 | UNSPEC_VLD4A))] |
d98a3884 | 5052 | "TARGET_NEON" |
5053 | { | |
5054 | int regno = REGNO (operands[0]); | |
5055 | rtx ops[5]; | |
5056 | ops[0] = gen_rtx_REG (DImode, regno); | |
5057 | ops[1] = gen_rtx_REG (DImode, regno + 4); | |
5058 | ops[2] = gen_rtx_REG (DImode, regno + 8); | |
5059 | ops[3] = gen_rtx_REG (DImode, regno + 12); | |
92d6c32b | 5060 | ops[4] = operands[1]; |
e5bf7a7a | 5061 | output_asm_insn ("vld4.<V_sz_elem>\t{%P0, %P1, %P2, %P3}, %A4", ops); |
d98a3884 | 5062 | return ""; |
bcaec148 | 5063 | } |
32093010 | 5064 | [(set_attr "type" "neon_load4_4reg<q>")] |
bcaec148 | 5065 | ) |
d98a3884 | 5066 | |
5067 | (define_insn "neon_vld4qb<mode>" | |
5068 | [(set (match_operand:XI 0 "s_register_operand" "=w") | |
e5bf7a7a | 5069 | (unspec:XI [(match_operand:OI 1 "neon_struct_operand" "Um") |
5070 | (match_operand:XI 2 "s_register_operand" "0") | |
fb5f110d | 5071 | (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] |
e5bf7a7a | 5072 | UNSPEC_VLD4B))] |
d98a3884 | 5073 | "TARGET_NEON" |
5074 | { | |
5075 | int regno = REGNO (operands[0]); | |
5076 | rtx ops[5]; | |
5077 | ops[0] = gen_rtx_REG (DImode, regno + 2); | |
5078 | ops[1] = gen_rtx_REG (DImode, regno + 6); | |
5079 | ops[2] = gen_rtx_REG (DImode, regno + 10); | |
5080 | ops[3] = gen_rtx_REG (DImode, regno + 14); | |
e5bf7a7a | 5081 | ops[4] = operands[1]; |
5082 | output_asm_insn ("vld4.<V_sz_elem>\t{%P0, %P1, %P2, %P3}, %A4", ops); | |
d98a3884 | 5083 | return ""; |
bcaec148 | 5084 | } |
32093010 | 5085 | [(set_attr "type" "neon_load4_4reg<q>")] |
bcaec148 | 5086 | ) |
d98a3884 | 5087 | |
fe8542d5 | 5088 | ;; see comment on neon_vld1_lane for reason why the lane numbers are reversed |
5089 | ;; here on big endian targets. | |
d98a3884 | 5090 | (define_insn "neon_vld4_lane<mode>" |
5091 | [(set (match_operand:OI 0 "s_register_operand" "=w") | |
e5bf7a7a | 5092 | (unspec:OI [(match_operand:<V_four_elem> 1 "neon_struct_operand" "Um") |
d98a3884 | 5093 | (match_operand:OI 2 "s_register_operand" "0") |
5094 | (match_operand:SI 3 "immediate_operand" "i") | |
fb5f110d | 5095 | (unspec:VD_LANE [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] |
d98a3884 | 5096 | UNSPEC_VLD4_LANE))] |
5097 | "TARGET_NEON" | |
5098 | { | |
fe8542d5 | 5099 | HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N(<MODE>mode, INTVAL (operands[3])); |
d98a3884 | 5100 | int regno = REGNO (operands[0]); |
5101 | rtx ops[6]; | |
d98a3884 | 5102 | ops[0] = gen_rtx_REG (DImode, regno); |
5103 | ops[1] = gen_rtx_REG (DImode, regno + 2); | |
5104 | ops[2] = gen_rtx_REG (DImode, regno + 4); | |
5105 | ops[3] = gen_rtx_REG (DImode, regno + 6); | |
5106 | ops[4] = operands[1]; | |
fe8542d5 | 5107 | ops[5] = GEN_INT (lane); |
e5bf7a7a | 5108 | output_asm_insn ("vld4.<V_sz_elem>\t{%P0[%c5], %P1[%c5], %P2[%c5], %P3[%c5]}, %A4", |
d98a3884 | 5109 | ops); |
5110 | return ""; | |
bcaec148 | 5111 | } |
32093010 | 5112 | [(set_attr "type" "neon_load4_one_lane<q>")] |
bcaec148 | 5113 | ) |
d98a3884 | 5114 | |
fe8542d5 | 5115 | ;; see comment on neon_vld1_lane for reason why the lane numbers are reversed |
5116 | ;; here on big endian targets. | |
d98a3884 | 5117 | (define_insn "neon_vld4_lane<mode>" |
5118 | [(set (match_operand:XI 0 "s_register_operand" "=w") | |
e5bf7a7a | 5119 | (unspec:XI [(match_operand:<V_four_elem> 1 "neon_struct_operand" "Um") |
d98a3884 | 5120 | (match_operand:XI 2 "s_register_operand" "0") |
5121 | (match_operand:SI 3 "immediate_operand" "i") | |
fb5f110d | 5122 | (unspec:VQ_HS [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] |
d98a3884 | 5123 | UNSPEC_VLD4_LANE))] |
5124 | "TARGET_NEON" | |
5125 | { | |
fe8542d5 | 5126 | HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N(<MODE>mode, INTVAL (operands[3])); |
d98a3884 | 5127 | HOST_WIDE_INT max = GET_MODE_NUNITS (<MODE>mode); |
5128 | int regno = REGNO (operands[0]); | |
5129 | rtx ops[6]; | |
2989d25d | 5130 | if (lane >= max / 2) |
d98a3884 | 5131 | { |
5132 | lane -= max / 2; | |
5133 | regno += 2; | |
5134 | } | |
5135 | ops[0] = gen_rtx_REG (DImode, regno); | |
5136 | ops[1] = gen_rtx_REG (DImode, regno + 4); | |
5137 | ops[2] = gen_rtx_REG (DImode, regno + 8); | |
5138 | ops[3] = gen_rtx_REG (DImode, regno + 12); | |
5139 | ops[4] = operands[1]; | |
5140 | ops[5] = GEN_INT (lane); | |
e5bf7a7a | 5141 | output_asm_insn ("vld4.<V_sz_elem>\t{%P0[%c5], %P1[%c5], %P2[%c5], %P3[%c5]}, %A4", |
d98a3884 | 5142 | ops); |
5143 | return ""; | |
bcaec148 | 5144 | } |
32093010 | 5145 | [(set_attr "type" "neon_load4_one_lane<q>")] |
bcaec148 | 5146 | ) |
d98a3884 | 5147 | |
5148 | (define_insn "neon_vld4_dup<mode>" | |
5149 | [(set (match_operand:OI 0 "s_register_operand" "=w") | |
e5bf7a7a | 5150 | (unspec:OI [(match_operand:<V_four_elem> 1 "neon_struct_operand" "Um") |
d98a3884 | 5151 | (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] |
5152 | UNSPEC_VLD4_DUP))] | |
5153 | "TARGET_NEON" | |
5154 | { | |
5155 | if (GET_MODE_NUNITS (<MODE>mode) > 1) | |
5156 | { | |
5157 | int regno = REGNO (operands[0]); | |
5158 | rtx ops[5]; | |
5159 | ops[0] = gen_rtx_REG (DImode, regno); | |
5160 | ops[1] = gen_rtx_REG (DImode, regno + 2); | |
5161 | ops[2] = gen_rtx_REG (DImode, regno + 4); | |
5162 | ops[3] = gen_rtx_REG (DImode, regno + 6); | |
5163 | ops[4] = operands[1]; | |
e5bf7a7a | 5164 | output_asm_insn ("vld4.<V_sz_elem>\t{%P0[], %P1[], %P2[], %P3[]}, %A4", |
d98a3884 | 5165 | ops); |
5166 | return ""; | |
5167 | } | |
5168 | else | |
e5bf7a7a | 5169 | return "vld1.<V_sz_elem>\t%h0, %A1"; |
bcaec148 | 5170 | } |
52432540 | 5171 | [(set (attr "type") |
bcaec148 | 5172 | (if_then_else (gt (const_string "<V_mode_nunits>") (const_string "1")) |
32093010 | 5173 | (const_string "neon_load4_all_lanes<q>") |
5174 | (const_string "neon_load1_1reg<q>")))] | |
bcaec148 | 5175 | ) |
d98a3884 | 5176 | |
672b3f5b | 5177 | (define_expand "vec_store_lanesoi<mode>" |
5178 | [(set (match_operand:OI 0 "neon_struct_operand") | |
5179 | (unspec:OI [(match_operand:OI 1 "s_register_operand") | |
5180 | (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] | |
5181 | UNSPEC_VST4))] | |
5182 | "TARGET_NEON") | |
5183 | ||
d98a3884 | 5184 | (define_insn "neon_vst4<mode>" |
e5bf7a7a | 5185 | [(set (match_operand:OI 0 "neon_struct_operand" "=Um") |
d98a3884 | 5186 | (unspec:OI [(match_operand:OI 1 "s_register_operand" "w") |
5187 | (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] | |
5188 | UNSPEC_VST4))] | |
5189 | "TARGET_NEON" | |
5190 | { | |
5191 | if (<V_sz_elem> == 64) | |
e5bf7a7a | 5192 | return "vst1.64\t%h1, %A0"; |
d98a3884 | 5193 | else |
e5bf7a7a | 5194 | return "vst4.<V_sz_elem>\t%h1, %A0"; |
bcaec148 | 5195 | } |
52432540 | 5196 | [(set (attr "type") |
bcaec148 | 5197 | (if_then_else (eq (const_string "<V_sz_elem>") (const_string "64")) |
32093010 | 5198 | (const_string "neon_store1_4reg<q>") |
5199 | (const_string "neon_store4_4reg<q>")))] | |
bcaec148 | 5200 | ) |
d98a3884 | 5201 | |
672b3f5b | 5202 | (define_expand "vec_store_lanesxi<mode>" |
5203 | [(match_operand:XI 0 "neon_struct_operand") | |
5204 | (match_operand:XI 1 "s_register_operand") | |
fb5f110d | 5205 | (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] |
672b3f5b | 5206 | "TARGET_NEON" |
5207 | { | |
5208 | emit_insn (gen_neon_vst4<mode> (operands[0], operands[1])); | |
5209 | DONE; | |
5210 | }) | |
5211 | ||
d98a3884 | 5212 | (define_expand "neon_vst4<mode>" |
e5bf7a7a | 5213 | [(match_operand:XI 0 "neon_struct_operand") |
5214 | (match_operand:XI 1 "s_register_operand") | |
fb5f110d | 5215 | (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] |
d98a3884 | 5216 | "TARGET_NEON" |
5217 | { | |
e5bf7a7a | 5218 | rtx mem; |
5219 | ||
5220 | mem = adjust_address (operands[0], OImode, 0); | |
5221 | emit_insn (gen_neon_vst4qa<mode> (mem, operands[1])); | |
5222 | mem = adjust_address (mem, OImode, GET_MODE_SIZE (OImode)); | |
5223 | emit_insn (gen_neon_vst4qb<mode> (mem, operands[1])); | |
d98a3884 | 5224 | DONE; |
5225 | }) | |
5226 | ||
5227 | (define_insn "neon_vst4qa<mode>" | |
e5bf7a7a | 5228 | [(set (match_operand:OI 0 "neon_struct_operand" "=Um") |
5229 | (unspec:OI [(match_operand:XI 1 "s_register_operand" "w") | |
fb5f110d | 5230 | (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] |
e5bf7a7a | 5231 | UNSPEC_VST4A))] |
d98a3884 | 5232 | "TARGET_NEON" |
5233 | { | |
e5bf7a7a | 5234 | int regno = REGNO (operands[1]); |
d98a3884 | 5235 | rtx ops[5]; |
5236 | ops[0] = operands[0]; | |
5237 | ops[1] = gen_rtx_REG (DImode, regno); | |
5238 | ops[2] = gen_rtx_REG (DImode, regno + 4); | |
5239 | ops[3] = gen_rtx_REG (DImode, regno + 8); | |
5240 | ops[4] = gen_rtx_REG (DImode, regno + 12); | |
e5bf7a7a | 5241 | output_asm_insn ("vst4.<V_sz_elem>\t{%P1, %P2, %P3, %P4}, %A0", ops); |
d98a3884 | 5242 | return ""; |
bcaec148 | 5243 | } |
32093010 | 5244 | [(set_attr "type" "neon_store4_4reg<q>")] |
bcaec148 | 5245 | ) |
d98a3884 | 5246 | |
5247 | (define_insn "neon_vst4qb<mode>" | |
e5bf7a7a | 5248 | [(set (match_operand:OI 0 "neon_struct_operand" "=Um") |
5249 | (unspec:OI [(match_operand:XI 1 "s_register_operand" "w") | |
fb5f110d | 5250 | (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] |
e5bf7a7a | 5251 | UNSPEC_VST4B))] |
d98a3884 | 5252 | "TARGET_NEON" |
5253 | { | |
e5bf7a7a | 5254 | int regno = REGNO (operands[1]); |
d98a3884 | 5255 | rtx ops[5]; |
5256 | ops[0] = operands[0]; | |
5257 | ops[1] = gen_rtx_REG (DImode, regno + 2); | |
5258 | ops[2] = gen_rtx_REG (DImode, regno + 6); | |
5259 | ops[3] = gen_rtx_REG (DImode, regno + 10); | |
5260 | ops[4] = gen_rtx_REG (DImode, regno + 14); | |
e5bf7a7a | 5261 | output_asm_insn ("vst4.<V_sz_elem>\t{%P1, %P2, %P3, %P4}, %A0", ops); |
d98a3884 | 5262 | return ""; |
bcaec148 | 5263 | } |
32093010 | 5264 | [(set_attr "type" "neon_store4_4reg<q>")] |
bcaec148 | 5265 | ) |
d98a3884 | 5266 | |
fe8542d5 | 5267 | ;; see comment on neon_vld1_lane for reason why the lane numbers are reversed |
5268 | ;; here on big endian targets. | |
d98a3884 | 5269 | (define_insn "neon_vst4_lane<mode>" |
e5bf7a7a | 5270 | [(set (match_operand:<V_four_elem> 0 "neon_struct_operand" "=Um") |
d98a3884 | 5271 | (unspec:<V_four_elem> |
5272 | [(match_operand:OI 1 "s_register_operand" "w") | |
5273 | (match_operand:SI 2 "immediate_operand" "i") | |
fb5f110d | 5274 | (unspec:VD_LANE [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] |
d98a3884 | 5275 | UNSPEC_VST4_LANE))] |
5276 | "TARGET_NEON" | |
5277 | { | |
fe8542d5 | 5278 | HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N(<MODE>mode, INTVAL (operands[2])); |
d98a3884 | 5279 | int regno = REGNO (operands[1]); |
5280 | rtx ops[6]; | |
d98a3884 | 5281 | ops[0] = operands[0]; |
5282 | ops[1] = gen_rtx_REG (DImode, regno); | |
5283 | ops[2] = gen_rtx_REG (DImode, regno + 2); | |
5284 | ops[3] = gen_rtx_REG (DImode, regno + 4); | |
5285 | ops[4] = gen_rtx_REG (DImode, regno + 6); | |
fe8542d5 | 5286 | ops[5] = GEN_INT (lane); |
e5bf7a7a | 5287 | output_asm_insn ("vst4.<V_sz_elem>\t{%P1[%c5], %P2[%c5], %P3[%c5], %P4[%c5]}, %A0", |
d98a3884 | 5288 | ops); |
5289 | return ""; | |
bcaec148 | 5290 | } |
32093010 | 5291 | [(set_attr "type" "neon_store4_one_lane<q>")] |
bcaec148 | 5292 | ) |
d98a3884 | 5293 | |
fe8542d5 | 5294 | ;; see comment on neon_vld1_lane for reason why the lane numbers are reversed |
5295 | ;; here on big endian targets. | |
d98a3884 | 5296 | (define_insn "neon_vst4_lane<mode>" |
e5bf7a7a | 5297 | [(set (match_operand:<V_four_elem> 0 "neon_struct_operand" "=Um") |
d98a3884 | 5298 | (unspec:<V_four_elem> |
5299 | [(match_operand:XI 1 "s_register_operand" "w") | |
5300 | (match_operand:SI 2 "immediate_operand" "i") | |
fb5f110d | 5301 | (unspec:VQ_HS [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] |
d98a3884 | 5302 | UNSPEC_VST4_LANE))] |
5303 | "TARGET_NEON" | |
5304 | { | |
fe8542d5 | 5305 | HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N(<MODE>mode, INTVAL (operands[2])); |
d98a3884 | 5306 | HOST_WIDE_INT max = GET_MODE_NUNITS (<MODE>mode); |
5307 | int regno = REGNO (operands[1]); | |
5308 | rtx ops[6]; | |
2989d25d | 5309 | if (lane >= max / 2) |
d98a3884 | 5310 | { |
5311 | lane -= max / 2; | |
5312 | regno += 2; | |
5313 | } | |
5314 | ops[0] = operands[0]; | |
5315 | ops[1] = gen_rtx_REG (DImode, regno); | |
5316 | ops[2] = gen_rtx_REG (DImode, regno + 4); | |
5317 | ops[3] = gen_rtx_REG (DImode, regno + 8); | |
5318 | ops[4] = gen_rtx_REG (DImode, regno + 12); | |
5319 | ops[5] = GEN_INT (lane); | |
e5bf7a7a | 5320 | output_asm_insn ("vst4.<V_sz_elem>\t{%P1[%c5], %P2[%c5], %P3[%c5], %P4[%c5]}, %A0", |
d98a3884 | 5321 | ops); |
5322 | return ""; | |
bcaec148 | 5323 | } |
32093010 | 5324 | [(set_attr "type" "neon_store4_4reg<q>")] |
bcaec148 | 5325 | ) |
d98a3884 | 5326 | |
6e4376d7 | 5327 | (define_insn "neon_vec_unpack<US>_lo_<mode>" |
5328 | [(set (match_operand:<V_unpack> 0 "register_operand" "=w") | |
5329 | (SE:<V_unpack> (vec_select:<V_HALF> | |
5330 | (match_operand:VU 1 "register_operand" "w") | |
5331 | (match_operand:VU 2 "vect_par_constant_low" ""))))] | |
b46a36c7 | 5332 | "TARGET_NEON && !BYTES_BIG_ENDIAN" |
6e4376d7 | 5333 | "vmovl.<US><V_sz_elem> %q0, %e1" |
32093010 | 5334 | [(set_attr "type" "neon_shift_imm_long")] |
6e4376d7 | 5335 | ) |
5336 | ||
5337 | (define_insn "neon_vec_unpack<US>_hi_<mode>" | |
5338 | [(set (match_operand:<V_unpack> 0 "register_operand" "=w") | |
5339 | (SE:<V_unpack> (vec_select:<V_HALF> | |
5340 | (match_operand:VU 1 "register_operand" "w") | |
5341 | (match_operand:VU 2 "vect_par_constant_high" ""))))] | |
b46a36c7 | 5342 | "TARGET_NEON && !BYTES_BIG_ENDIAN" |
6e4376d7 | 5343 | "vmovl.<US><V_sz_elem> %q0, %f1" |
32093010 | 5344 | [(set_attr "type" "neon_shift_imm_long")] |
6e4376d7 | 5345 | ) |
5346 | ||
5347 | (define_expand "vec_unpack<US>_hi_<mode>" | |
5348 | [(match_operand:<V_unpack> 0 "register_operand" "") | |
5349 | (SE:<V_unpack> (match_operand:VU 1 "register_operand"))] | |
b46a36c7 | 5350 | "TARGET_NEON && !BYTES_BIG_ENDIAN" |
6e4376d7 | 5351 | { |
5352 | rtvec v = rtvec_alloc (<V_mode_nunits>/2) ; | |
5353 | rtx t1; | |
5354 | int i; | |
5355 | for (i = 0; i < (<V_mode_nunits>/2); i++) | |
5356 | RTVEC_ELT (v, i) = GEN_INT ((<V_mode_nunits>/2) + i); | |
5357 | ||
5358 | t1 = gen_rtx_PARALLEL (<MODE>mode, v); | |
5359 | emit_insn (gen_neon_vec_unpack<US>_hi_<mode> (operands[0], | |
5360 | operands[1], | |
5361 | t1)); | |
5362 | DONE; | |
5363 | } | |
5364 | ) | |
5365 | ||
5366 | (define_expand "vec_unpack<US>_lo_<mode>" | |
5367 | [(match_operand:<V_unpack> 0 "register_operand" "") | |
5368 | (SE:<V_unpack> (match_operand:VU 1 "register_operand" ""))] | |
b46a36c7 | 5369 | "TARGET_NEON && !BYTES_BIG_ENDIAN" |
6e4376d7 | 5370 | { |
5371 | rtvec v = rtvec_alloc (<V_mode_nunits>/2) ; | |
5372 | rtx t1; | |
5373 | int i; | |
5374 | for (i = 0; i < (<V_mode_nunits>/2) ; i++) | |
5375 | RTVEC_ELT (v, i) = GEN_INT (i); | |
5376 | t1 = gen_rtx_PARALLEL (<MODE>mode, v); | |
5377 | emit_insn (gen_neon_vec_unpack<US>_lo_<mode> (operands[0], | |
5378 | operands[1], | |
5379 | t1)); | |
5380 | DONE; | |
5381 | } | |
5382 | ) | |
5383 | ||
5384 | (define_insn "neon_vec_<US>mult_lo_<mode>" | |
5385 | [(set (match_operand:<V_unpack> 0 "register_operand" "=w") | |
5386 | (mult:<V_unpack> (SE:<V_unpack> (vec_select:<V_HALF> | |
5387 | (match_operand:VU 1 "register_operand" "w") | |
5388 | (match_operand:VU 2 "vect_par_constant_low" ""))) | |
5389 | (SE:<V_unpack> (vec_select:<V_HALF> | |
5390 | (match_operand:VU 3 "register_operand" "w") | |
5391 | (match_dup 2)))))] | |
b46a36c7 | 5392 | "TARGET_NEON && !BYTES_BIG_ENDIAN" |
6e4376d7 | 5393 | "vmull.<US><V_sz_elem> %q0, %e1, %e3" |
32093010 | 5394 | [(set_attr "type" "neon_mul_<V_elem_ch>_long")] |
6e4376d7 | 5395 | ) |
5396 | ||
5397 | (define_expand "vec_widen_<US>mult_lo_<mode>" | |
5398 | [(match_operand:<V_unpack> 0 "register_operand" "") | |
5399 | (SE:<V_unpack> (match_operand:VU 1 "register_operand" "")) | |
5400 | (SE:<V_unpack> (match_operand:VU 2 "register_operand" ""))] | |
b46a36c7 | 5401 | "TARGET_NEON && !BYTES_BIG_ENDIAN" |
6e4376d7 | 5402 | { |
5403 | rtvec v = rtvec_alloc (<V_mode_nunits>/2) ; | |
5404 | rtx t1; | |
5405 | int i; | |
5406 | for (i = 0; i < (<V_mode_nunits>/2) ; i++) | |
5407 | RTVEC_ELT (v, i) = GEN_INT (i); | |
5408 | t1 = gen_rtx_PARALLEL (<MODE>mode, v); | |
5409 | ||
5410 | emit_insn (gen_neon_vec_<US>mult_lo_<mode> (operands[0], | |
5411 | operands[1], | |
5412 | t1, | |
5413 | operands[2])); | |
5414 | DONE; | |
5415 | } | |
5416 | ) | |
5417 | ||
5418 | (define_insn "neon_vec_<US>mult_hi_<mode>" | |
5419 | [(set (match_operand:<V_unpack> 0 "register_operand" "=w") | |
5420 | (mult:<V_unpack> (SE:<V_unpack> (vec_select:<V_HALF> | |
5421 | (match_operand:VU 1 "register_operand" "w") | |
5422 | (match_operand:VU 2 "vect_par_constant_high" ""))) | |
5423 | (SE:<V_unpack> (vec_select:<V_HALF> | |
5424 | (match_operand:VU 3 "register_operand" "w") | |
5425 | (match_dup 2)))))] | |
b46a36c7 | 5426 | "TARGET_NEON && !BYTES_BIG_ENDIAN" |
6e4376d7 | 5427 | "vmull.<US><V_sz_elem> %q0, %f1, %f3" |
32093010 | 5428 | [(set_attr "type" "neon_mul_<V_elem_ch>_long")] |
6e4376d7 | 5429 | ) |
5430 | ||
5431 | (define_expand "vec_widen_<US>mult_hi_<mode>" | |
5432 | [(match_operand:<V_unpack> 0 "register_operand" "") | |
5433 | (SE:<V_unpack> (match_operand:VU 1 "register_operand" "")) | |
5434 | (SE:<V_unpack> (match_operand:VU 2 "register_operand" ""))] | |
b46a36c7 | 5435 | "TARGET_NEON && !BYTES_BIG_ENDIAN" |
6e4376d7 | 5436 | { |
5437 | rtvec v = rtvec_alloc (<V_mode_nunits>/2) ; | |
5438 | rtx t1; | |
5439 | int i; | |
5440 | for (i = 0; i < (<V_mode_nunits>/2) ; i++) | |
5441 | RTVEC_ELT (v, i) = GEN_INT (<V_mode_nunits>/2 + i); | |
5442 | t1 = gen_rtx_PARALLEL (<MODE>mode, v); | |
5443 | ||
5444 | emit_insn (gen_neon_vec_<US>mult_hi_<mode> (operands[0], | |
5445 | operands[1], | |
5446 | t1, | |
5447 | operands[2])); | |
5448 | DONE; | |
5449 | ||
5450 | } | |
5451 | ) | |
5452 | ||
6083c152 | 5453 | (define_insn "neon_vec_<US>shiftl_<mode>" |
5454 | [(set (match_operand:<V_widen> 0 "register_operand" "=w") | |
5455 | (SE:<V_widen> (ashift:VW (match_operand:VW 1 "register_operand" "w") | |
5456 | (match_operand:<V_innermode> 2 "const_neon_scalar_shift_amount_operand" ""))))] | |
5457 | "TARGET_NEON" | |
5458 | { | |
5459 | return "vshll.<US><V_sz_elem> %q0, %P1, %2"; | |
5460 | } | |
32093010 | 5461 | [(set_attr "type" "neon_shift_imm_long")] |
6083c152 | 5462 | ) |
5463 | ||
5464 | (define_expand "vec_widen_<US>shiftl_lo_<mode>" | |
5465 | [(match_operand:<V_unpack> 0 "register_operand" "") | |
5466 | (SE:<V_unpack> (match_operand:VU 1 "register_operand" "")) | |
5467 | (match_operand:SI 2 "immediate_operand" "i")] | |
5468 | "TARGET_NEON && !BYTES_BIG_ENDIAN" | |
5469 | { | |
5470 | emit_insn (gen_neon_vec_<US>shiftl_<V_half> (operands[0], | |
5471 | simplify_gen_subreg (<V_HALF>mode, operands[1], <MODE>mode, 0), | |
5472 | operands[2])); | |
5473 | DONE; | |
5474 | } | |
5475 | ) | |
5476 | ||
5477 | (define_expand "vec_widen_<US>shiftl_hi_<mode>" | |
5478 | [(match_operand:<V_unpack> 0 "register_operand" "") | |
5479 | (SE:<V_unpack> (match_operand:VU 1 "register_operand" "")) | |
5480 | (match_operand:SI 2 "immediate_operand" "i")] | |
5481 | "TARGET_NEON && !BYTES_BIG_ENDIAN" | |
5482 | { | |
5483 | emit_insn (gen_neon_vec_<US>shiftl_<V_half> (operands[0], | |
5484 | simplify_gen_subreg (<V_HALF>mode, operands[1], <MODE>mode, | |
5485 | GET_MODE_SIZE (<V_HALF>mode)), | |
5486 | operands[2])); | |
5487 | DONE; | |
5488 | } | |
5489 | ) | |
5490 | ||
6e4376d7 | 5491 | ;; Vectorize for non-neon-quad case |
5492 | (define_insn "neon_unpack<US>_<mode>" | |
5493 | [(set (match_operand:<V_widen> 0 "register_operand" "=w") | |
80d18bad | 5494 | (SE:<V_widen> (match_operand:VDI 1 "register_operand" "w")))] |
6e4376d7 | 5495 | "TARGET_NEON" |
80d18bad | 5496 | "vmovl.<US><V_sz_elem> %q0, %P1" |
32093010 | 5497 | [(set_attr "type" "neon_move")] |
6e4376d7 | 5498 | ) |
5499 | ||
5500 | (define_expand "vec_unpack<US>_lo_<mode>" | |
5501 | [(match_operand:<V_double_width> 0 "register_operand" "") | |
5502 | (SE:<V_double_width>(match_operand:VDI 1 "register_operand"))] | |
5503 | "TARGET_NEON" | |
5504 | { | |
5505 | rtx tmpreg = gen_reg_rtx (<V_widen>mode); | |
5506 | emit_insn (gen_neon_unpack<US>_<mode> (tmpreg, operands[1])); | |
5507 | emit_insn (gen_neon_vget_low<V_widen_l> (operands[0], tmpreg)); | |
5508 | ||
5509 | DONE; | |
5510 | } | |
5511 | ) | |
5512 | ||
5513 | (define_expand "vec_unpack<US>_hi_<mode>" | |
5514 | [(match_operand:<V_double_width> 0 "register_operand" "") | |
5515 | (SE:<V_double_width>(match_operand:VDI 1 "register_operand"))] | |
5516 | "TARGET_NEON" | |
5517 | { | |
5518 | rtx tmpreg = gen_reg_rtx (<V_widen>mode); | |
5519 | emit_insn (gen_neon_unpack<US>_<mode> (tmpreg, operands[1])); | |
5520 | emit_insn (gen_neon_vget_high<V_widen_l> (operands[0], tmpreg)); | |
5521 | ||
5522 | DONE; | |
5523 | } | |
5524 | ) | |
5525 | ||
5526 | (define_insn "neon_vec_<US>mult_<mode>" | |
5527 | [(set (match_operand:<V_widen> 0 "register_operand" "=w") | |
5528 | (mult:<V_widen> (SE:<V_widen> | |
5529 | (match_operand:VDI 1 "register_operand" "w")) | |
5530 | (SE:<V_widen> | |
5531 | (match_operand:VDI 2 "register_operand" "w"))))] | |
5532 | "TARGET_NEON" | |
80d18bad | 5533 | "vmull.<US><V_sz_elem> %q0, %P1, %P2" |
32093010 | 5534 | [(set_attr "type" "neon_mul_<V_elem_ch>_long")] |
6e4376d7 | 5535 | ) |
5536 | ||
5537 | (define_expand "vec_widen_<US>mult_hi_<mode>" | |
5538 | [(match_operand:<V_double_width> 0 "register_operand" "") | |
5539 | (SE:<V_double_width> (match_operand:VDI 1 "register_operand" "")) | |
5540 | (SE:<V_double_width> (match_operand:VDI 2 "register_operand" ""))] | |
5541 | "TARGET_NEON" | |
5542 | { | |
5543 | rtx tmpreg = gen_reg_rtx (<V_widen>mode); | |
5544 | emit_insn (gen_neon_vec_<US>mult_<mode> (tmpreg, operands[1], operands[2])); | |
5545 | emit_insn (gen_neon_vget_high<V_widen_l> (operands[0], tmpreg)); | |
5546 | ||
5547 | DONE; | |
5548 | ||
5549 | } | |
5550 | ) | |
5551 | ||
5552 | (define_expand "vec_widen_<US>mult_lo_<mode>" | |
5553 | [(match_operand:<V_double_width> 0 "register_operand" "") | |
5554 | (SE:<V_double_width> (match_operand:VDI 1 "register_operand" "")) | |
5555 | (SE:<V_double_width> (match_operand:VDI 2 "register_operand" ""))] | |
5556 | "TARGET_NEON" | |
5557 | { | |
5558 | rtx tmpreg = gen_reg_rtx (<V_widen>mode); | |
5559 | emit_insn (gen_neon_vec_<US>mult_<mode> (tmpreg, operands[1], operands[2])); | |
5560 | emit_insn (gen_neon_vget_low<V_widen_l> (operands[0], tmpreg)); | |
5561 | ||
5562 | DONE; | |
5563 | ||
5564 | } | |
5565 | ) | |
a62cc977 | 5566 | |
6083c152 | 5567 | (define_expand "vec_widen_<US>shiftl_hi_<mode>" |
5568 | [(match_operand:<V_double_width> 0 "register_operand" "") | |
5569 | (SE:<V_double_width> (match_operand:VDI 1 "register_operand" "")) | |
5570 | (match_operand:SI 2 "immediate_operand" "i")] | |
5571 | "TARGET_NEON" | |
5572 | { | |
5573 | rtx tmpreg = gen_reg_rtx (<V_widen>mode); | |
5574 | emit_insn (gen_neon_vec_<US>shiftl_<mode> (tmpreg, operands[1], operands[2])); | |
5575 | emit_insn (gen_neon_vget_high<V_widen_l> (operands[0], tmpreg)); | |
5576 | ||
5577 | DONE; | |
5578 | } | |
5579 | ) | |
5580 | ||
5581 | (define_expand "vec_widen_<US>shiftl_lo_<mode>" | |
5582 | [(match_operand:<V_double_width> 0 "register_operand" "") | |
5583 | (SE:<V_double_width> (match_operand:VDI 1 "register_operand" "")) | |
5584 | (match_operand:SI 2 "immediate_operand" "i")] | |
5585 | "TARGET_NEON" | |
5586 | { | |
5587 | rtx tmpreg = gen_reg_rtx (<V_widen>mode); | |
5588 | emit_insn (gen_neon_vec_<US>shiftl_<mode> (tmpreg, operands[1], operands[2])); | |
5589 | emit_insn (gen_neon_vget_low<V_widen_l> (operands[0], tmpreg)); | |
5590 | ||
5591 | DONE; | |
5592 | } | |
5593 | ) | |
5594 | ||
b46a36c7 | 5595 | ; FIXME: These instruction patterns can't be used safely in big-endian mode |
5596 | ; because the ordering of vector elements in Q registers is different from what | |
5597 | ; the semantics of the instructions require. | |
5598 | ||
a62cc977 | 5599 | (define_insn "vec_pack_trunc_<mode>" |
5600 | [(set (match_operand:<V_narrow_pack> 0 "register_operand" "=&w") | |
5601 | (vec_concat:<V_narrow_pack> | |
5602 | (truncate:<V_narrow> | |
5603 | (match_operand:VN 1 "register_operand" "w")) | |
5604 | (truncate:<V_narrow> | |
5605 | (match_operand:VN 2 "register_operand" "w"))))] | |
b46a36c7 | 5606 | "TARGET_NEON && !BYTES_BIG_ENDIAN" |
11371434 | 5607 | "vmovn.i<V_sz_elem>\t%e0, %q1\;vmovn.i<V_sz_elem>\t%f0, %q2" |
32093010 | 5608 | [(set_attr "type" "multiple") |
11371434 | 5609 | (set_attr "length" "8")] |
a62cc977 | 5610 | ) |
5611 | ||
5612 | ;; For the non-quad case. | |
5613 | (define_insn "neon_vec_pack_trunc_<mode>" | |
5614 | [(set (match_operand:<V_narrow> 0 "register_operand" "=w") | |
80d18bad | 5615 | (truncate:<V_narrow> (match_operand:VN 1 "register_operand" "w")))] |
b46a36c7 | 5616 | "TARGET_NEON && !BYTES_BIG_ENDIAN" |
80d18bad | 5617 | "vmovn.i<V_sz_elem>\t%P0, %q1" |
32093010 | 5618 | [(set_attr "type" "neon_move_narrow_q")] |
a62cc977 | 5619 | ) |
5620 | ||
5621 | (define_expand "vec_pack_trunc_<mode>" | |
5622 | [(match_operand:<V_narrow_pack> 0 "register_operand" "") | |
5623 | (match_operand:VSHFT 1 "register_operand" "") | |
5624 | (match_operand:VSHFT 2 "register_operand")] | |
b46a36c7 | 5625 | "TARGET_NEON && !BYTES_BIG_ENDIAN" |
a62cc977 | 5626 | { |
5627 | rtx tempreg = gen_reg_rtx (<V_DOUBLE>mode); | |
5628 | ||
5629 | emit_insn (gen_move_lo_quad_<V_double> (tempreg, operands[1])); | |
5630 | emit_insn (gen_move_hi_quad_<V_double> (tempreg, operands[2])); | |
5631 | emit_insn (gen_neon_vec_pack_trunc_<V_double> (operands[0], tempreg)); | |
5632 | DONE; | |
5633 | }) | |
99c1d5bc | 5634 | |
5635 | (define_insn "neon_vabd<mode>_2" | |
5636 | [(set (match_operand:VDQ 0 "s_register_operand" "=w") | |
5637 | (abs:VDQ (minus:VDQ (match_operand:VDQ 1 "s_register_operand" "w") | |
5638 | (match_operand:VDQ 2 "s_register_operand" "w"))))] | |
5639 | "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)" | |
5640 | "vabd.<V_s_elem> %<V_reg>0, %<V_reg>1, %<V_reg>2" | |
52432540 | 5641 | [(set (attr "type") |
99c1d5bc | 5642 | (if_then_else (ne (symbol_ref "<Is_float_mode>") (const_int 0)) |
32093010 | 5643 | (const_string "neon_fp_abd_s<q>") |
5644 | (const_string "neon_abd<q>")))] | |
99c1d5bc | 5645 | ) |
5646 | ||
5647 | (define_insn "neon_vabd<mode>_3" | |
5648 | [(set (match_operand:VDQ 0 "s_register_operand" "=w") | |
5649 | (abs:VDQ (unspec:VDQ [(match_operand:VDQ 1 "s_register_operand" "w") | |
5650 | (match_operand:VDQ 2 "s_register_operand" "w")] | |
5651 | UNSPEC_VSUB)))] | |
5652 | "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)" | |
5653 | "vabd.<V_if_elem> %<V_reg>0, %<V_reg>1, %<V_reg>2" | |
52432540 | 5654 | [(set (attr "type") |
99c1d5bc | 5655 | (if_then_else (ne (symbol_ref "<Is_float_mode>") (const_int 0)) |
32093010 | 5656 | (const_string "neon_fp_abd_s<q>") |
5657 | (const_string "neon_abd<q>")))] | |
99c1d5bc | 5658 | ) |
8012d2c2 | 5659 | |
5660 | ;; Copy from core-to-neon regs, then extend, not vice-versa | |
5661 | ||
5662 | (define_split | |
5663 | [(set (match_operand:DI 0 "s_register_operand" "") | |
5664 | (sign_extend:DI (match_operand:SI 1 "s_register_operand" "")))] | |
5665 | "TARGET_NEON && reload_completed && IS_VFP_REGNUM (REGNO (operands[0]))" | |
5666 | [(set (match_dup 2) (vec_duplicate:V2SI (match_dup 1))) | |
5667 | (set (match_dup 0) (ashiftrt:DI (match_dup 0) (const_int 32)))] | |
5668 | { | |
5669 | operands[2] = gen_rtx_REG (V2SImode, REGNO (operands[0])); | |
5670 | }) | |
5671 | ||
5672 | (define_split | |
5673 | [(set (match_operand:DI 0 "s_register_operand" "") | |
5674 | (sign_extend:DI (match_operand:HI 1 "s_register_operand" "")))] | |
5675 | "TARGET_NEON && reload_completed && IS_VFP_REGNUM (REGNO (operands[0]))" | |
5676 | [(set (match_dup 2) (vec_duplicate:V4HI (match_dup 1))) | |
5677 | (set (match_dup 0) (ashiftrt:DI (match_dup 0) (const_int 48)))] | |
5678 | { | |
5679 | operands[2] = gen_rtx_REG (V4HImode, REGNO (operands[0])); | |
5680 | }) | |
5681 | ||
5682 | (define_split | |
5683 | [(set (match_operand:DI 0 "s_register_operand" "") | |
5684 | (sign_extend:DI (match_operand:QI 1 "s_register_operand" "")))] | |
5685 | "TARGET_NEON && reload_completed && IS_VFP_REGNUM (REGNO (operands[0]))" | |
5686 | [(set (match_dup 2) (vec_duplicate:V8QI (match_dup 1))) | |
5687 | (set (match_dup 0) (ashiftrt:DI (match_dup 0) (const_int 56)))] | |
5688 | { | |
5689 | operands[2] = gen_rtx_REG (V8QImode, REGNO (operands[0])); | |
5690 | }) | |
5691 | ||
5692 | (define_split | |
5693 | [(set (match_operand:DI 0 "s_register_operand" "") | |
5694 | (zero_extend:DI (match_operand:SI 1 "s_register_operand" "")))] | |
5695 | "TARGET_NEON && reload_completed && IS_VFP_REGNUM (REGNO (operands[0]))" | |
5696 | [(set (match_dup 2) (vec_duplicate:V2SI (match_dup 1))) | |
5697 | (set (match_dup 0) (lshiftrt:DI (match_dup 0) (const_int 32)))] | |
5698 | { | |
5699 | operands[2] = gen_rtx_REG (V2SImode, REGNO (operands[0])); | |
5700 | }) | |
5701 | ||
5702 | (define_split | |
5703 | [(set (match_operand:DI 0 "s_register_operand" "") | |
5704 | (zero_extend:DI (match_operand:HI 1 "s_register_operand" "")))] | |
5705 | "TARGET_NEON && reload_completed && IS_VFP_REGNUM (REGNO (operands[0]))" | |
5706 | [(set (match_dup 2) (vec_duplicate:V4HI (match_dup 1))) | |
5707 | (set (match_dup 0) (lshiftrt:DI (match_dup 0) (const_int 48)))] | |
5708 | { | |
5709 | operands[2] = gen_rtx_REG (V4HImode, REGNO (operands[0])); | |
5710 | }) | |
5711 | ||
5712 | (define_split | |
5713 | [(set (match_operand:DI 0 "s_register_operand" "") | |
5714 | (zero_extend:DI (match_operand:QI 1 "s_register_operand" "")))] | |
5715 | "TARGET_NEON && reload_completed && IS_VFP_REGNUM (REGNO (operands[0]))" | |
5716 | [(set (match_dup 2) (vec_duplicate:V8QI (match_dup 1))) | |
5717 | (set (match_dup 0) (lshiftrt:DI (match_dup 0) (const_int 56)))] | |
5718 | { | |
5719 | operands[2] = gen_rtx_REG (V8QImode, REGNO (operands[0])); | |
5720 | }) |