]>
Commit | Line | Data |
---|---|---|
df401d54 | 1 | ;; Machine description for AArch64 AdvSIMD architecture. |
f1717362 | 2 | ;; Copyright (C) 2011-2016 Free Software Foundation, Inc. |
df401d54 | 3 | ;; Contributed by ARM Ltd. |
4 | ;; | |
5 | ;; This file is part of GCC. | |
6 | ;; | |
7 | ;; GCC is free software; you can redistribute it and/or modify it | |
8 | ;; under the terms of the GNU General Public License as published by | |
9 | ;; the Free Software Foundation; either version 3, or (at your option) | |
10 | ;; any later version. | |
11 | ;; | |
12 | ;; GCC is distributed in the hope that it will be useful, but | |
13 | ;; WITHOUT ANY WARRANTY; without even the implied warranty of | |
14 | ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
15 | ;; General Public License for more details. | |
16 | ;; | |
17 | ;; You should have received a copy of the GNU General Public License | |
18 | ;; along with GCC; see the file COPYING3. If not see | |
19 | ;; <http://www.gnu.org/licenses/>. | |
20 | ||
df401d54 | 21 | (define_expand "mov<mode>" |
aea31229 | 22 | [(set (match_operand:VALL_F16 0 "nonimmediate_operand" "") |
23 | (match_operand:VALL_F16 1 "general_operand" ""))] | |
df401d54 | 24 | "TARGET_SIMD" |
25 | " | |
26 | if (GET_CODE (operands[0]) == MEM) | |
27 | operands[1] = force_reg (<MODE>mode, operands[1]); | |
28 | " | |
29 | ) | |
30 | ||
31 | (define_expand "movmisalign<mode>" | |
8013ad1b | 32 | [(set (match_operand:VALL 0 "nonimmediate_operand" "") |
33 | (match_operand:VALL 1 "general_operand" ""))] | |
df401d54 | 34 | "TARGET_SIMD" |
35 | { | |
36 | /* This pattern is not permitted to fail during expansion: if both arguments | |
37 | are non-registers (e.g. memory := constant, which can be created by the | |
38 | auto-vectorizer), force operand 1 into a register. */ | |
39 | if (!register_operand (operands[0], <MODE>mode) | |
40 | && !register_operand (operands[1], <MODE>mode)) | |
41 | operands[1] = force_reg (<MODE>mode, operands[1]); | |
42 | }) | |
43 | ||
44 | (define_insn "aarch64_simd_dup<mode>" | |
658fa7f6 | 45 | [(set (match_operand:VDQ_I 0 "register_operand" "=w, w") |
46 | (vec_duplicate:VDQ_I | |
47 | (match_operand:<VEL> 1 "register_operand" "r, w")))] | |
df401d54 | 48 | "TARGET_SIMD" |
2d67c34c | 49 | "@ |
50 | dup\\t%0.<Vtype>, %<vw>1 | |
51 | dup\\t%0.<Vtype>, %1.<Vetype>[0]" | |
52e95e58 | 52 | [(set_attr "type" "neon_from_gp<q>, neon_dup<q>")] |
2d67c34c | 53 | ) |
54 | ||
55 | (define_insn "aarch64_simd_dup<mode>" | |
57887f75 | 56 | [(set (match_operand:VDQF_F16 0 "register_operand" "=w") |
57 | (vec_duplicate:VDQF_F16 | |
58 | (match_operand:<VEL> 1 "register_operand" "w")))] | |
2d67c34c | 59 | "TARGET_SIMD" |
60 | "dup\\t%0.<Vtype>, %1.<Vetype>[0]" | |
52e95e58 | 61 | [(set_attr "type" "neon_dup<q>")] |
df401d54 | 62 | ) |
63 | ||
64 | (define_insn "aarch64_dup_lane<mode>" | |
57887f75 | 65 | [(set (match_operand:VALL_F16 0 "register_operand" "=w") |
66 | (vec_duplicate:VALL_F16 | |
df401d54 | 67 | (vec_select:<VEL> |
57887f75 | 68 | (match_operand:VALL_F16 1 "register_operand" "w") |
df401d54 | 69 | (parallel [(match_operand:SI 2 "immediate_operand" "i")]) |
70 | )))] | |
71 | "TARGET_SIMD" | |
c034024c | 72 | { |
73 | operands[2] = GEN_INT (ENDIAN_LANE_N (<MODE>mode, INTVAL (operands[2]))); | |
74 | return "dup\\t%0.<Vtype>, %1.<Vetype>[%2]"; | |
75 | } | |
52e95e58 | 76 | [(set_attr "type" "neon_dup<q>")] |
df401d54 | 77 | ) |
78 | ||
2d67c34c | 79 | (define_insn "aarch64_dup_lane_<vswap_width_name><mode>" |
57887f75 | 80 | [(set (match_operand:VALL_F16 0 "register_operand" "=w") |
81 | (vec_duplicate:VALL_F16 | |
2d67c34c | 82 | (vec_select:<VEL> |
83 | (match_operand:<VSWAP_WIDTH> 1 "register_operand" "w") | |
84 | (parallel [(match_operand:SI 2 "immediate_operand" "i")]) | |
85 | )))] | |
df401d54 | 86 | "TARGET_SIMD" |
c034024c | 87 | { |
88 | operands[2] = GEN_INT (ENDIAN_LANE_N (<VSWAP_WIDTH>mode, | |
89 | INTVAL (operands[2]))); | |
90 | return "dup\\t%0.<Vtype>, %1.<Vetype>[%2]"; | |
91 | } | |
52e95e58 | 92 | [(set_attr "type" "neon_dup<q>")] |
df401d54 | 93 | ) |
94 | ||
95 | (define_insn "*aarch64_simd_mov<mode>" | |
8013ad1b | 96 | [(set (match_operand:VD 0 "nonimmediate_operand" |
571ec0ac | 97 | "=w, m, w, ?r, ?w, ?r, w") |
8013ad1b | 98 | (match_operand:VD 1 "general_operand" |
571ec0ac | 99 | "m, w, w, w, r, r, Dn"))] |
df401d54 | 100 | "TARGET_SIMD |
101 | && (register_operand (operands[0], <MODE>mode) | |
102 | || register_operand (operands[1], <MODE>mode))" | |
103 | { | |
104 | switch (which_alternative) | |
105 | { | |
571ec0ac | 106 | case 0: return "ldr\\t%d0, %1"; |
107 | case 1: return "str\\t%d1, %0"; | |
df401d54 | 108 | case 2: return "orr\t%0.<Vbtype>, %1.<Vbtype>, %1.<Vbtype>"; |
109 | case 3: return "umov\t%0, %1.d[0]"; | |
110 | case 4: return "ins\t%0.d[0], %1"; | |
111 | case 5: return "mov\t%0, %1"; | |
112 | case 6: | |
04b042b2 | 113 | return aarch64_output_simd_mov_immediate (operands[1], |
72841352 | 114 | <MODE>mode, 64); |
df401d54 | 115 | default: gcc_unreachable (); |
116 | } | |
117 | } | |
52e95e58 | 118 | [(set_attr "type" "neon_load1_1reg<q>, neon_store1_1reg<q>,\ |
5efe19ee | 119 | neon_logic<q>, neon_to_gp<q>, neon_from_gp<q>,\ |
52e95e58 | 120 | mov_reg, neon_move<q>")] |
df401d54 | 121 | ) |
122 | ||
123 | (define_insn "*aarch64_simd_mov<mode>" | |
8013ad1b | 124 | [(set (match_operand:VQ 0 "nonimmediate_operand" |
571ec0ac | 125 | "=w, m, w, ?r, ?w, ?r, w") |
8013ad1b | 126 | (match_operand:VQ 1 "general_operand" |
571ec0ac | 127 | "m, w, w, w, r, r, Dn"))] |
df401d54 | 128 | "TARGET_SIMD |
129 | && (register_operand (operands[0], <MODE>mode) | |
130 | || register_operand (operands[1], <MODE>mode))" | |
131 | { | |
72841352 | 132 | switch (which_alternative) |
133 | { | |
e30d3162 | 134 | case 0: |
571ec0ac | 135 | return "ldr\\t%q0, %1"; |
e30d3162 | 136 | case 1: |
571ec0ac | 137 | return "str\\t%q1, %0"; |
e30d3162 | 138 | case 2: |
139 | return "orr\t%0.<Vbtype>, %1.<Vbtype>, %1.<Vbtype>"; | |
140 | case 3: | |
141 | case 4: | |
142 | case 5: | |
143 | return "#"; | |
72841352 | 144 | case 6: |
04b042b2 | 145 | return aarch64_output_simd_mov_immediate (operands[1], <MODE>mode, 128); |
e30d3162 | 146 | default: |
147 | gcc_unreachable (); | |
72841352 | 148 | } |
df401d54 | 149 | } |
52e95e58 | 150 | [(set_attr "type" "neon_load1_1reg<q>, neon_store1_1reg<q>,\ |
5efe19ee | 151 | neon_logic<q>, multiple, multiple, multiple,\ |
152 | neon_move<q>") | |
df401d54 | 153 | (set_attr "length" "4,4,4,8,8,8,4")] |
154 | ) | |
155 | ||
9202af54 | 156 | (define_insn "load_pair<mode>" |
157 | [(set (match_operand:VD 0 "register_operand" "=w") | |
158 | (match_operand:VD 1 "aarch64_mem_pair_operand" "Ump")) | |
159 | (set (match_operand:VD 2 "register_operand" "=w") | |
160 | (match_operand:VD 3 "memory_operand" "m"))] | |
161 | "TARGET_SIMD | |
162 | && rtx_equal_p (XEXP (operands[3], 0), | |
163 | plus_constant (Pmode, | |
164 | XEXP (operands[1], 0), | |
165 | GET_MODE_SIZE (<MODE>mode)))" | |
166 | "ldp\\t%d0, %d2, %1" | |
167 | [(set_attr "type" "neon_ldp")] | |
168 | ) | |
169 | ||
170 | (define_insn "store_pair<mode>" | |
171 | [(set (match_operand:VD 0 "aarch64_mem_pair_operand" "=Ump") | |
172 | (match_operand:VD 1 "register_operand" "w")) | |
173 | (set (match_operand:VD 2 "memory_operand" "=m") | |
174 | (match_operand:VD 3 "register_operand" "w"))] | |
175 | "TARGET_SIMD | |
176 | && rtx_equal_p (XEXP (operands[2], 0), | |
177 | plus_constant (Pmode, | |
178 | XEXP (operands[0], 0), | |
179 | GET_MODE_SIZE (<MODE>mode)))" | |
180 | "stp\\t%d1, %d3, %0" | |
181 | [(set_attr "type" "neon_stp")] | |
182 | ) | |
183 | ||
df401d54 | 184 | (define_split |
185 | [(set (match_operand:VQ 0 "register_operand" "") | |
186 | (match_operand:VQ 1 "register_operand" ""))] | |
187 | "TARGET_SIMD && reload_completed | |
188 | && GP_REGNUM_P (REGNO (operands[0])) | |
189 | && GP_REGNUM_P (REGNO (operands[1]))" | |
8bcdf19e | 190 | [(const_int 0)] |
df401d54 | 191 | { |
8bcdf19e | 192 | aarch64_simd_emit_reg_reg_move (operands, DImode, 2); |
193 | DONE; | |
df401d54 | 194 | }) |
195 | ||
e0e03aa1 | 196 | (define_split |
197 | [(set (match_operand:VQ 0 "register_operand" "") | |
198 | (match_operand:VQ 1 "register_operand" ""))] | |
199 | "TARGET_SIMD && reload_completed | |
200 | && ((FP_REGNUM_P (REGNO (operands[0])) && GP_REGNUM_P (REGNO (operands[1]))) | |
201 | || (GP_REGNUM_P (REGNO (operands[0])) && FP_REGNUM_P (REGNO (operands[1]))))" | |
202 | [(const_int 0)] | |
203 | { | |
204 | aarch64_split_simd_move (operands[0], operands[1]); | |
205 | DONE; | |
206 | }) | |
207 | ||
e30d3162 | 208 | (define_expand "aarch64_split_simd_mov<mode>" |
e0e03aa1 | 209 | [(set (match_operand:VQ 0) |
210 | (match_operand:VQ 1))] | |
211 | "TARGET_SIMD" | |
212 | { | |
213 | rtx dst = operands[0]; | |
214 | rtx src = operands[1]; | |
215 | ||
216 | if (GP_REGNUM_P (REGNO (src))) | |
217 | { | |
e30d3162 | 218 | rtx src_low_part = gen_lowpart (<VHALF>mode, src); |
219 | rtx src_high_part = gen_highpart (<VHALF>mode, src); | |
e0e03aa1 | 220 | |
221 | emit_insn | |
e30d3162 | 222 | (gen_move_lo_quad_<mode> (dst, src_low_part)); |
e0e03aa1 | 223 | emit_insn |
e30d3162 | 224 | (gen_move_hi_quad_<mode> (dst, src_high_part)); |
e0e03aa1 | 225 | } |
226 | ||
227 | else | |
228 | { | |
e30d3162 | 229 | rtx dst_low_part = gen_lowpart (<VHALF>mode, dst); |
230 | rtx dst_high_part = gen_highpart (<VHALF>mode, dst); | |
231 | rtx lo = aarch64_simd_vect_par_cnst_half (<MODE>mode, false); | |
232 | rtx hi = aarch64_simd_vect_par_cnst_half (<MODE>mode, true); | |
e0e03aa1 | 233 | |
234 | emit_insn | |
e30d3162 | 235 | (gen_aarch64_simd_mov_from_<mode>low (dst_low_part, src, lo)); |
e0e03aa1 | 236 | emit_insn |
e30d3162 | 237 | (gen_aarch64_simd_mov_from_<mode>high (dst_high_part, src, hi)); |
e0e03aa1 | 238 | } |
239 | DONE; | |
240 | } | |
241 | ) | |
242 | ||
e0e03aa1 | 243 | (define_insn "aarch64_simd_mov_from_<mode>low" |
244 | [(set (match_operand:<VHALF> 0 "register_operand" "=r") | |
245 | (vec_select:<VHALF> | |
246 | (match_operand:VQ 1 "register_operand" "w") | |
247 | (match_operand:VQ 2 "vect_par_cnst_lo_half" "")))] | |
248 | "TARGET_SIMD && reload_completed" | |
249 | "umov\t%0, %1.d[0]" | |
52e95e58 | 250 | [(set_attr "type" "neon_to_gp<q>") |
e0e03aa1 | 251 | (set_attr "length" "4") |
252 | ]) | |
253 | ||
254 | (define_insn "aarch64_simd_mov_from_<mode>high" | |
255 | [(set (match_operand:<VHALF> 0 "register_operand" "=r") | |
256 | (vec_select:<VHALF> | |
257 | (match_operand:VQ 1 "register_operand" "w") | |
258 | (match_operand:VQ 2 "vect_par_cnst_hi_half" "")))] | |
259 | "TARGET_SIMD && reload_completed" | |
260 | "umov\t%0, %1.d[1]" | |
52e95e58 | 261 | [(set_attr "type" "neon_to_gp<q>") |
e0e03aa1 | 262 | (set_attr "length" "4") |
263 | ]) | |
264 | ||
df401d54 | 265 | (define_insn "orn<mode>3" |
658fa7f6 | 266 | [(set (match_operand:VDQ_I 0 "register_operand" "=w") |
267 | (ior:VDQ_I (not:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")) | |
268 | (match_operand:VDQ_I 2 "register_operand" "w")))] | |
df401d54 | 269 | "TARGET_SIMD" |
270 | "orn\t%0.<Vbtype>, %2.<Vbtype>, %1.<Vbtype>" | |
52e95e58 | 271 | [(set_attr "type" "neon_logic<q>")] |
df401d54 | 272 | ) |
273 | ||
274 | (define_insn "bic<mode>3" | |
658fa7f6 | 275 | [(set (match_operand:VDQ_I 0 "register_operand" "=w") |
276 | (and:VDQ_I (not:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")) | |
277 | (match_operand:VDQ_I 2 "register_operand" "w")))] | |
df401d54 | 278 | "TARGET_SIMD" |
279 | "bic\t%0.<Vbtype>, %2.<Vbtype>, %1.<Vbtype>" | |
52e95e58 | 280 | [(set_attr "type" "neon_logic<q>")] |
df401d54 | 281 | ) |
282 | ||
283 | (define_insn "add<mode>3" | |
658fa7f6 | 284 | [(set (match_operand:VDQ_I 0 "register_operand" "=w") |
285 | (plus:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w") | |
286 | (match_operand:VDQ_I 2 "register_operand" "w")))] | |
df401d54 | 287 | "TARGET_SIMD" |
288 | "add\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>" | |
52e95e58 | 289 | [(set_attr "type" "neon_add<q>")] |
df401d54 | 290 | ) |
291 | ||
292 | (define_insn "sub<mode>3" | |
658fa7f6 | 293 | [(set (match_operand:VDQ_I 0 "register_operand" "=w") |
294 | (minus:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w") | |
295 | (match_operand:VDQ_I 2 "register_operand" "w")))] | |
df401d54 | 296 | "TARGET_SIMD" |
297 | "sub\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>" | |
52e95e58 | 298 | [(set_attr "type" "neon_sub<q>")] |
df401d54 | 299 | ) |
300 | ||
301 | (define_insn "mul<mode>3" | |
658fa7f6 | 302 | [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w") |
303 | (mult:VDQ_BHSI (match_operand:VDQ_BHSI 1 "register_operand" "w") | |
304 | (match_operand:VDQ_BHSI 2 "register_operand" "w")))] | |
df401d54 | 305 | "TARGET_SIMD" |
306 | "mul\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>" | |
52e95e58 | 307 | [(set_attr "type" "neon_mul_<Vetype><q>")] |
df401d54 | 308 | ) |
309 | ||
0870dc95 | 310 | (define_insn "bswap<mode>2" |
e98e78fa | 311 | [(set (match_operand:VDQHSD 0 "register_operand" "=w") |
312 | (bswap:VDQHSD (match_operand:VDQHSD 1 "register_operand" "w")))] | |
313 | "TARGET_SIMD" | |
314 | "rev<Vrevsuff>\\t%0.<Vbtype>, %1.<Vbtype>" | |
315 | [(set_attr "type" "neon_rev<q>")] | |
316 | ) | |
317 | ||
8dc079f0 | 318 | (define_insn "aarch64_rbit<mode>" |
319 | [(set (match_operand:VB 0 "register_operand" "=w") | |
320 | (unspec:VB [(match_operand:VB 1 "register_operand" "w")] | |
321 | UNSPEC_RBIT))] | |
322 | "TARGET_SIMD" | |
323 | "rbit\\t%0.<Vbtype>, %1.<Vbtype>" | |
324 | [(set_attr "type" "neon_rbit")] | |
325 | ) | |
326 | ||
caa5509e | 327 | (define_expand "ctz<mode>2" |
328 | [(set (match_operand:VS 0 "register_operand") | |
329 | (ctz:VS (match_operand:VS 1 "register_operand")))] | |
330 | "TARGET_SIMD" | |
331 | { | |
0870dc95 | 332 | emit_insn (gen_bswap<mode>2 (operands[0], operands[1])); |
caa5509e | 333 | rtx op0_castsi2qi = simplify_gen_subreg(<VS:VSI2QI>mode, operands[0], |
334 | <MODE>mode, 0); | |
335 | emit_insn (gen_aarch64_rbit<VS:vsi2qi> (op0_castsi2qi, op0_castsi2qi)); | |
336 | emit_insn (gen_clz<mode>2 (operands[0], operands[0])); | |
337 | DONE; | |
338 | } | |
339 | ) | |
340 | ||
5839dcd4 | 341 | (define_insn "*aarch64_mul3_elt<mode>" |
342 | [(set (match_operand:VMUL 0 "register_operand" "=w") | |
343 | (mult:VMUL | |
344 | (vec_duplicate:VMUL | |
345 | (vec_select:<VEL> | |
346 | (match_operand:VMUL 1 "register_operand" "<h_con>") | |
347 | (parallel [(match_operand:SI 2 "immediate_operand")]))) | |
348 | (match_operand:VMUL 3 "register_operand" "w")))] | |
349 | "TARGET_SIMD" | |
c034024c | 350 | { |
351 | operands[2] = GEN_INT (ENDIAN_LANE_N (<MODE>mode, INTVAL (operands[2]))); | |
352 | return "<f>mul\\t%0.<Vtype>, %3.<Vtype>, %1.<Vetype>[%2]"; | |
353 | } | |
52e95e58 | 354 | [(set_attr "type" "neon<fp>_mul_<Vetype>_scalar<q>")] |
5839dcd4 | 355 | ) |
356 | ||
357 | (define_insn "*aarch64_mul3_elt_<vswap_width_name><mode>" | |
358 | [(set (match_operand:VMUL_CHANGE_NLANES 0 "register_operand" "=w") | |
359 | (mult:VMUL_CHANGE_NLANES | |
360 | (vec_duplicate:VMUL_CHANGE_NLANES | |
361 | (vec_select:<VEL> | |
362 | (match_operand:<VSWAP_WIDTH> 1 "register_operand" "<h_con>") | |
363 | (parallel [(match_operand:SI 2 "immediate_operand")]))) | |
364 | (match_operand:VMUL_CHANGE_NLANES 3 "register_operand" "w")))] | |
365 | "TARGET_SIMD" | |
c034024c | 366 | { |
367 | operands[2] = GEN_INT (ENDIAN_LANE_N (<VSWAP_WIDTH>mode, | |
368 | INTVAL (operands[2]))); | |
369 | return "<f>mul\\t%0.<Vtype>, %3.<Vtype>, %1.<Vetype>[%2]"; | |
370 | } | |
52e95e58 | 371 | [(set_attr "type" "neon<fp>_mul_<Vetype>_scalar<q>")] |
5839dcd4 | 372 | ) |
373 | ||
374 | (define_insn "*aarch64_mul3_elt_to_128df" | |
375 | [(set (match_operand:V2DF 0 "register_operand" "=w") | |
376 | (mult:V2DF | |
377 | (vec_duplicate:V2DF | |
378 | (match_operand:DF 2 "register_operand" "w")) | |
379 | (match_operand:V2DF 1 "register_operand" "w")))] | |
380 | "TARGET_SIMD" | |
381 | "fmul\\t%0.2d, %1.2d, %2.d[0]" | |
52e95e58 | 382 | [(set_attr "type" "neon_fp_mul_d_scalar_q")] |
5839dcd4 | 383 | ) |
384 | ||
e1a2ea91 | 385 | (define_insn "aarch64_rsqrte_<mode>2" |
386 | [(set (match_operand:VALLF 0 "register_operand" "=w") | |
387 | (unspec:VALLF [(match_operand:VALLF 1 "register_operand" "w")] | |
388 | UNSPEC_RSQRTE))] | |
389 | "TARGET_SIMD" | |
390 | "frsqrte\\t%<v>0<Vmtype>, %<v>1<Vmtype>" | |
391 | [(set_attr "type" "neon_fp_rsqrte_<Vetype><q>")]) | |
392 | ||
393 | (define_insn "aarch64_rsqrts_<mode>3" | |
394 | [(set (match_operand:VALLF 0 "register_operand" "=w") | |
395 | (unspec:VALLF [(match_operand:VALLF 1 "register_operand" "w") | |
396 | (match_operand:VALLF 2 "register_operand" "w")] | |
397 | UNSPEC_RSQRTS))] | |
398 | "TARGET_SIMD" | |
399 | "frsqrts\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>" | |
400 | [(set_attr "type" "neon_fp_rsqrts_<Vetype><q>")]) | |
401 | ||
4cfd27a5 | 402 | (define_expand "rsqrt<mode>2" |
e1a2ea91 | 403 | [(set (match_operand:VALLF 0 "register_operand" "=w") |
404 | (unspec:VALLF [(match_operand:VALLF 1 "register_operand" "w")] | |
405 | UNSPEC_RSQRT))] | |
406 | "TARGET_SIMD" | |
407 | { | |
408 | aarch64_emit_swrsqrt (operands[0], operands[1]); | |
409 | DONE; | |
410 | }) | |
411 | ||
5839dcd4 | 412 | (define_insn "*aarch64_mul3_elt_to_64v2df" |
413 | [(set (match_operand:DF 0 "register_operand" "=w") | |
414 | (mult:DF | |
415 | (vec_select:DF | |
416 | (match_operand:V2DF 1 "register_operand" "w") | |
417 | (parallel [(match_operand:SI 2 "immediate_operand")])) | |
418 | (match_operand:DF 3 "register_operand" "w")))] | |
419 | "TARGET_SIMD" | |
c034024c | 420 | { |
421 | operands[2] = GEN_INT (ENDIAN_LANE_N (V2DFmode, INTVAL (operands[2]))); | |
422 | return "fmul\\t%0.2d, %3.2d, %1.d[%2]"; | |
423 | } | |
52e95e58 | 424 | [(set_attr "type" "neon_fp_mul_d_scalar_q")] |
5839dcd4 | 425 | ) |
426 | ||
df401d54 | 427 | (define_insn "neg<mode>2" |
658fa7f6 | 428 | [(set (match_operand:VDQ_I 0 "register_operand" "=w") |
429 | (neg:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")))] | |
df401d54 | 430 | "TARGET_SIMD" |
431 | "neg\t%0.<Vtype>, %1.<Vtype>" | |
52e95e58 | 432 | [(set_attr "type" "neon_neg<q>")] |
df401d54 | 433 | ) |
434 | ||
435 | (define_insn "abs<mode>2" | |
658fa7f6 | 436 | [(set (match_operand:VDQ_I 0 "register_operand" "=w") |
437 | (abs:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")))] | |
df401d54 | 438 | "TARGET_SIMD" |
439 | "abs\t%0.<Vtype>, %1.<Vtype>" | |
52e95e58 | 440 | [(set_attr "type" "neon_abs<q>")] |
df401d54 | 441 | ) |
442 | ||
260d579b | 443 | ;; The intrinsic version of integer ABS must not be allowed to |
444 | ;; combine with any operation with an integerated ABS step, such | |
445 | ;; as SABD. | |
446 | (define_insn "aarch64_abs<mode>" | |
447 | [(set (match_operand:VSDQ_I_DI 0 "register_operand" "=w") | |
448 | (unspec:VSDQ_I_DI | |
449 | [(match_operand:VSDQ_I_DI 1 "register_operand" "w")] | |
450 | UNSPEC_ABS))] | |
451 | "TARGET_SIMD" | |
452 | "abs\t%<v>0<Vmtype>, %<v>1<Vmtype>" | |
453 | [(set_attr "type" "neon_abs<q>")] | |
454 | ) | |
455 | ||
02ab8c72 | 456 | (define_insn "abd<mode>_3" |
457 | [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w") | |
458 | (abs:VDQ_BHSI (minus:VDQ_BHSI | |
459 | (match_operand:VDQ_BHSI 1 "register_operand" "w") | |
460 | (match_operand:VDQ_BHSI 2 "register_operand" "w"))))] | |
461 | "TARGET_SIMD" | |
462 | "sabd\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>" | |
52e95e58 | 463 | [(set_attr "type" "neon_abd<q>")] |
02ab8c72 | 464 | ) |
465 | ||
466 | (define_insn "aba<mode>_3" | |
467 | [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w") | |
468 | (plus:VDQ_BHSI (abs:VDQ_BHSI (minus:VDQ_BHSI | |
469 | (match_operand:VDQ_BHSI 1 "register_operand" "w") | |
470 | (match_operand:VDQ_BHSI 2 "register_operand" "w"))) | |
471 | (match_operand:VDQ_BHSI 3 "register_operand" "0")))] | |
472 | "TARGET_SIMD" | |
473 | "saba\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>" | |
52e95e58 | 474 | [(set_attr "type" "neon_arith_acc<q>")] |
02ab8c72 | 475 | ) |
476 | ||
477 | (define_insn "fabd<mode>_3" | |
478 | [(set (match_operand:VDQF 0 "register_operand" "=w") | |
479 | (abs:VDQF (minus:VDQF | |
480 | (match_operand:VDQF 1 "register_operand" "w") | |
481 | (match_operand:VDQF 2 "register_operand" "w"))))] | |
482 | "TARGET_SIMD" | |
483 | "fabd\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>" | |
52e95e58 | 484 | [(set_attr "type" "neon_fp_abd_<Vetype><q>")] |
02ab8c72 | 485 | ) |
486 | ||
4d9d91e8 | 487 | (define_insn "*fabd_scalar<mode>3" |
488 | [(set (match_operand:GPF 0 "register_operand" "=w") | |
489 | (abs:GPF (minus:GPF | |
490 | (match_operand:GPF 1 "register_operand" "w") | |
491 | (match_operand:GPF 2 "register_operand" "w"))))] | |
492 | "TARGET_SIMD" | |
493 | "fabd\t%<s>0, %<s>1, %<s>2" | |
52e95e58 | 494 | [(set_attr "type" "neon_fp_abd_<Vetype><q>")] |
4d9d91e8 | 495 | ) |
496 | ||
df401d54 | 497 | (define_insn "and<mode>3" |
658fa7f6 | 498 | [(set (match_operand:VDQ_I 0 "register_operand" "=w") |
499 | (and:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w") | |
500 | (match_operand:VDQ_I 2 "register_operand" "w")))] | |
df401d54 | 501 | "TARGET_SIMD" |
502 | "and\t%0.<Vbtype>, %1.<Vbtype>, %2.<Vbtype>" | |
52e95e58 | 503 | [(set_attr "type" "neon_logic<q>")] |
df401d54 | 504 | ) |
505 | ||
506 | (define_insn "ior<mode>3" | |
658fa7f6 | 507 | [(set (match_operand:VDQ_I 0 "register_operand" "=w") |
508 | (ior:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w") | |
509 | (match_operand:VDQ_I 2 "register_operand" "w")))] | |
df401d54 | 510 | "TARGET_SIMD" |
511 | "orr\t%0.<Vbtype>, %1.<Vbtype>, %2.<Vbtype>" | |
52e95e58 | 512 | [(set_attr "type" "neon_logic<q>")] |
df401d54 | 513 | ) |
514 | ||
515 | (define_insn "xor<mode>3" | |
658fa7f6 | 516 | [(set (match_operand:VDQ_I 0 "register_operand" "=w") |
517 | (xor:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w") | |
518 | (match_operand:VDQ_I 2 "register_operand" "w")))] | |
df401d54 | 519 | "TARGET_SIMD" |
520 | "eor\t%0.<Vbtype>, %1.<Vbtype>, %2.<Vbtype>" | |
52e95e58 | 521 | [(set_attr "type" "neon_logic<q>")] |
df401d54 | 522 | ) |
523 | ||
524 | (define_insn "one_cmpl<mode>2" | |
658fa7f6 | 525 | [(set (match_operand:VDQ_I 0 "register_operand" "=w") |
526 | (not:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")))] | |
df401d54 | 527 | "TARGET_SIMD" |
528 | "not\t%0.<Vbtype>, %1.<Vbtype>" | |
52e95e58 | 529 | [(set_attr "type" "neon_logic<q>")] |
df401d54 | 530 | ) |
531 | ||
532 | (define_insn "aarch64_simd_vec_set<mode>" | |
658fa7f6 | 533 | [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w,w,w") |
534 | (vec_merge:VDQ_BHSI | |
535 | (vec_duplicate:VDQ_BHSI | |
63dfb934 | 536 | (match_operand:<VEL> 1 "aarch64_simd_general_operand" "r,w,Utv")) |
658fa7f6 | 537 | (match_operand:VDQ_BHSI 3 "register_operand" "0,0,0") |
63dfb934 | 538 | (match_operand:SI 2 "immediate_operand" "i,i,i")))] |
df401d54 | 539 | "TARGET_SIMD" |
56234646 | 540 | { |
541 | int elt = ENDIAN_LANE_N (<MODE>mode, exact_log2 (INTVAL (operands[2]))); | |
542 | operands[2] = GEN_INT ((HOST_WIDE_INT) 1 << elt); | |
543 | switch (which_alternative) | |
544 | { | |
545 | case 0: | |
546 | return "ins\\t%0.<Vetype>[%p2], %w1"; | |
547 | case 1: | |
548 | return "ins\\t%0.<Vetype>[%p2], %1.<Vetype>[0]"; | |
63dfb934 | 549 | case 2: |
550 | return "ld1\\t{%0.<Vetype>}[%p2], %1"; | |
56234646 | 551 | default: |
552 | gcc_unreachable (); | |
553 | } | |
554 | } | |
63dfb934 | 555 | [(set_attr "type" "neon_from_gp<q>, neon_ins<q>, neon_load1_1reg<q>")] |
df401d54 | 556 | ) |
557 | ||
558 | (define_insn "aarch64_simd_lshr<mode>" | |
658fa7f6 | 559 | [(set (match_operand:VDQ_I 0 "register_operand" "=w") |
560 | (lshiftrt:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w") | |
561 | (match_operand:VDQ_I 2 "aarch64_simd_rshift_imm" "Dr")))] | |
df401d54 | 562 | "TARGET_SIMD" |
563 | "ushr\t%0.<Vtype>, %1.<Vtype>, %2" | |
52e95e58 | 564 | [(set_attr "type" "neon_shift_imm<q>")] |
df401d54 | 565 | ) |
566 | ||
567 | (define_insn "aarch64_simd_ashr<mode>" | |
658fa7f6 | 568 | [(set (match_operand:VDQ_I 0 "register_operand" "=w") |
569 | (ashiftrt:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w") | |
570 | (match_operand:VDQ_I 2 "aarch64_simd_rshift_imm" "Dr")))] | |
df401d54 | 571 | "TARGET_SIMD" |
572 | "sshr\t%0.<Vtype>, %1.<Vtype>, %2" | |
52e95e58 | 573 | [(set_attr "type" "neon_shift_imm<q>")] |
df401d54 | 574 | ) |
575 | ||
576 | (define_insn "aarch64_simd_imm_shl<mode>" | |
658fa7f6 | 577 | [(set (match_operand:VDQ_I 0 "register_operand" "=w") |
578 | (ashift:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w") | |
579 | (match_operand:VDQ_I 2 "aarch64_simd_lshift_imm" "Dl")))] | |
df401d54 | 580 | "TARGET_SIMD" |
581 | "shl\t%0.<Vtype>, %1.<Vtype>, %2" | |
52e95e58 | 582 | [(set_attr "type" "neon_shift_imm<q>")] |
df401d54 | 583 | ) |
584 | ||
585 | (define_insn "aarch64_simd_reg_sshl<mode>" | |
658fa7f6 | 586 | [(set (match_operand:VDQ_I 0 "register_operand" "=w") |
587 | (ashift:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w") | |
588 | (match_operand:VDQ_I 2 "register_operand" "w")))] | |
df401d54 | 589 | "TARGET_SIMD" |
590 | "sshl\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>" | |
52e95e58 | 591 | [(set_attr "type" "neon_shift_reg<q>")] |
df401d54 | 592 | ) |
593 | ||
594 | (define_insn "aarch64_simd_reg_shl<mode>_unsigned" | |
658fa7f6 | 595 | [(set (match_operand:VDQ_I 0 "register_operand" "=w") |
596 | (unspec:VDQ_I [(match_operand:VDQ_I 1 "register_operand" "w") | |
597 | (match_operand:VDQ_I 2 "register_operand" "w")] | |
df401d54 | 598 | UNSPEC_ASHIFT_UNSIGNED))] |
599 | "TARGET_SIMD" | |
600 | "ushl\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>" | |
52e95e58 | 601 | [(set_attr "type" "neon_shift_reg<q>")] |
df401d54 | 602 | ) |
603 | ||
604 | (define_insn "aarch64_simd_reg_shl<mode>_signed" | |
658fa7f6 | 605 | [(set (match_operand:VDQ_I 0 "register_operand" "=w") |
606 | (unspec:VDQ_I [(match_operand:VDQ_I 1 "register_operand" "w") | |
607 | (match_operand:VDQ_I 2 "register_operand" "w")] | |
df401d54 | 608 | UNSPEC_ASHIFT_SIGNED))] |
609 | "TARGET_SIMD" | |
610 | "sshl\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>" | |
52e95e58 | 611 | [(set_attr "type" "neon_shift_reg<q>")] |
df401d54 | 612 | ) |
613 | ||
614 | (define_expand "ashl<mode>3" | |
658fa7f6 | 615 | [(match_operand:VDQ_I 0 "register_operand" "") |
616 | (match_operand:VDQ_I 1 "register_operand" "") | |
df401d54 | 617 | (match_operand:SI 2 "general_operand" "")] |
618 | "TARGET_SIMD" | |
619 | { | |
620 | int bit_width = GET_MODE_UNIT_SIZE (<MODE>mode) * BITS_PER_UNIT; | |
621 | int shift_amount; | |
622 | ||
623 | if (CONST_INT_P (operands[2])) | |
624 | { | |
625 | shift_amount = INTVAL (operands[2]); | |
626 | if (shift_amount >= 0 && shift_amount < bit_width) | |
627 | { | |
628 | rtx tmp = aarch64_simd_gen_const_vector_dup (<MODE>mode, | |
629 | shift_amount); | |
630 | emit_insn (gen_aarch64_simd_imm_shl<mode> (operands[0], | |
631 | operands[1], | |
632 | tmp)); | |
633 | DONE; | |
634 | } | |
635 | else | |
636 | { | |
637 | operands[2] = force_reg (SImode, operands[2]); | |
638 | } | |
639 | } | |
640 | else if (MEM_P (operands[2])) | |
641 | { | |
642 | operands[2] = force_reg (SImode, operands[2]); | |
643 | } | |
644 | ||
645 | if (REG_P (operands[2])) | |
646 | { | |
647 | rtx tmp = gen_reg_rtx (<MODE>mode); | |
648 | emit_insn (gen_aarch64_simd_dup<mode> (tmp, | |
649 | convert_to_mode (<VEL>mode, | |
650 | operands[2], | |
651 | 0))); | |
652 | emit_insn (gen_aarch64_simd_reg_sshl<mode> (operands[0], operands[1], | |
653 | tmp)); | |
654 | DONE; | |
655 | } | |
656 | else | |
657 | FAIL; | |
658 | } | |
659 | ) | |
660 | ||
661 | (define_expand "lshr<mode>3" | |
658fa7f6 | 662 | [(match_operand:VDQ_I 0 "register_operand" "") |
663 | (match_operand:VDQ_I 1 "register_operand" "") | |
df401d54 | 664 | (match_operand:SI 2 "general_operand" "")] |
665 | "TARGET_SIMD" | |
666 | { | |
667 | int bit_width = GET_MODE_UNIT_SIZE (<MODE>mode) * BITS_PER_UNIT; | |
668 | int shift_amount; | |
669 | ||
670 | if (CONST_INT_P (operands[2])) | |
671 | { | |
672 | shift_amount = INTVAL (operands[2]); | |
673 | if (shift_amount > 0 && shift_amount <= bit_width) | |
674 | { | |
675 | rtx tmp = aarch64_simd_gen_const_vector_dup (<MODE>mode, | |
676 | shift_amount); | |
677 | emit_insn (gen_aarch64_simd_lshr<mode> (operands[0], | |
678 | operands[1], | |
679 | tmp)); | |
680 | DONE; | |
681 | } | |
682 | else | |
683 | operands[2] = force_reg (SImode, operands[2]); | |
684 | } | |
685 | else if (MEM_P (operands[2])) | |
686 | { | |
687 | operands[2] = force_reg (SImode, operands[2]); | |
688 | } | |
689 | ||
690 | if (REG_P (operands[2])) | |
691 | { | |
692 | rtx tmp = gen_reg_rtx (SImode); | |
693 | rtx tmp1 = gen_reg_rtx (<MODE>mode); | |
694 | emit_insn (gen_negsi2 (tmp, operands[2])); | |
695 | emit_insn (gen_aarch64_simd_dup<mode> (tmp1, | |
696 | convert_to_mode (<VEL>mode, | |
697 | tmp, 0))); | |
698 | emit_insn (gen_aarch64_simd_reg_shl<mode>_unsigned (operands[0], | |
699 | operands[1], | |
700 | tmp1)); | |
701 | DONE; | |
702 | } | |
703 | else | |
704 | FAIL; | |
705 | } | |
706 | ) | |
707 | ||
708 | (define_expand "ashr<mode>3" | |
658fa7f6 | 709 | [(match_operand:VDQ_I 0 "register_operand" "") |
710 | (match_operand:VDQ_I 1 "register_operand" "") | |
df401d54 | 711 | (match_operand:SI 2 "general_operand" "")] |
712 | "TARGET_SIMD" | |
713 | { | |
714 | int bit_width = GET_MODE_UNIT_SIZE (<MODE>mode) * BITS_PER_UNIT; | |
715 | int shift_amount; | |
716 | ||
717 | if (CONST_INT_P (operands[2])) | |
718 | { | |
719 | shift_amount = INTVAL (operands[2]); | |
720 | if (shift_amount > 0 && shift_amount <= bit_width) | |
721 | { | |
722 | rtx tmp = aarch64_simd_gen_const_vector_dup (<MODE>mode, | |
723 | shift_amount); | |
724 | emit_insn (gen_aarch64_simd_ashr<mode> (operands[0], | |
725 | operands[1], | |
726 | tmp)); | |
727 | DONE; | |
728 | } | |
729 | else | |
730 | operands[2] = force_reg (SImode, operands[2]); | |
731 | } | |
732 | else if (MEM_P (operands[2])) | |
733 | { | |
734 | operands[2] = force_reg (SImode, operands[2]); | |
735 | } | |
736 | ||
737 | if (REG_P (operands[2])) | |
738 | { | |
739 | rtx tmp = gen_reg_rtx (SImode); | |
740 | rtx tmp1 = gen_reg_rtx (<MODE>mode); | |
741 | emit_insn (gen_negsi2 (tmp, operands[2])); | |
742 | emit_insn (gen_aarch64_simd_dup<mode> (tmp1, | |
743 | convert_to_mode (<VEL>mode, | |
744 | tmp, 0))); | |
745 | emit_insn (gen_aarch64_simd_reg_shl<mode>_signed (operands[0], | |
746 | operands[1], | |
747 | tmp1)); | |
748 | DONE; | |
749 | } | |
750 | else | |
751 | FAIL; | |
752 | } | |
753 | ) | |
754 | ||
755 | (define_expand "vashl<mode>3" | |
658fa7f6 | 756 | [(match_operand:VDQ_I 0 "register_operand" "") |
757 | (match_operand:VDQ_I 1 "register_operand" "") | |
758 | (match_operand:VDQ_I 2 "register_operand" "")] | |
df401d54 | 759 | "TARGET_SIMD" |
760 | { | |
761 | emit_insn (gen_aarch64_simd_reg_sshl<mode> (operands[0], operands[1], | |
762 | operands[2])); | |
763 | DONE; | |
764 | }) | |
765 | ||
658fa7f6 | 766 | ;; Using mode VDQ_BHSI as there is no V2DImode neg! |
df401d54 | 767 | ;; Negating individual lanes most certainly offsets the |
768 | ;; gain from vectorization. | |
769 | (define_expand "vashr<mode>3" | |
658fa7f6 | 770 | [(match_operand:VDQ_BHSI 0 "register_operand" "") |
771 | (match_operand:VDQ_BHSI 1 "register_operand" "") | |
772 | (match_operand:VDQ_BHSI 2 "register_operand" "")] | |
df401d54 | 773 | "TARGET_SIMD" |
774 | { | |
775 | rtx neg = gen_reg_rtx (<MODE>mode); | |
776 | emit (gen_neg<mode>2 (neg, operands[2])); | |
777 | emit_insn (gen_aarch64_simd_reg_shl<mode>_signed (operands[0], operands[1], | |
778 | neg)); | |
779 | DONE; | |
780 | }) | |
781 | ||
33fb2095 | 782 | ;; DI vector shift |
783 | (define_expand "aarch64_ashr_simddi" | |
784 | [(match_operand:DI 0 "register_operand" "=w") | |
785 | (match_operand:DI 1 "register_operand" "w") | |
ade3aaf0 | 786 | (match_operand:SI 2 "aarch64_shift_imm64_di" "")] |
33fb2095 | 787 | "TARGET_SIMD" |
788 | { | |
ea4bea42 | 789 | /* An arithmetic shift right by 64 fills the result with copies of the sign |
790 | bit, just like asr by 63 - however the standard pattern does not handle | |
791 | a shift by 64. */ | |
33fb2095 | 792 | if (INTVAL (operands[2]) == 64) |
ea4bea42 | 793 | operands[2] = GEN_INT (63); |
794 | emit_insn (gen_ashrdi3 (operands[0], operands[1], operands[2])); | |
33fb2095 | 795 | DONE; |
796 | } | |
797 | ) | |
798 | ||
df401d54 | 799 | (define_expand "vlshr<mode>3" |
658fa7f6 | 800 | [(match_operand:VDQ_BHSI 0 "register_operand" "") |
801 | (match_operand:VDQ_BHSI 1 "register_operand" "") | |
802 | (match_operand:VDQ_BHSI 2 "register_operand" "")] | |
df401d54 | 803 | "TARGET_SIMD" |
804 | { | |
805 | rtx neg = gen_reg_rtx (<MODE>mode); | |
806 | emit (gen_neg<mode>2 (neg, operands[2])); | |
807 | emit_insn (gen_aarch64_simd_reg_shl<mode>_unsigned (operands[0], operands[1], | |
808 | neg)); | |
809 | DONE; | |
810 | }) | |
811 | ||
bed34e83 | 812 | (define_expand "aarch64_lshr_simddi" |
813 | [(match_operand:DI 0 "register_operand" "=w") | |
814 | (match_operand:DI 1 "register_operand" "w") | |
815 | (match_operand:SI 2 "aarch64_shift_imm64_di" "")] | |
816 | "TARGET_SIMD" | |
817 | { | |
818 | if (INTVAL (operands[2]) == 64) | |
7c260af3 | 819 | emit_move_insn (operands[0], const0_rtx); |
bed34e83 | 820 | else |
821 | emit_insn (gen_lshrdi3 (operands[0], operands[1], operands[2])); | |
822 | DONE; | |
823 | } | |
824 | ) | |
825 | ||
df401d54 | 826 | (define_expand "vec_set<mode>" |
658fa7f6 | 827 | [(match_operand:VDQ_BHSI 0 "register_operand") |
e76492cb | 828 | (match_operand:<VEL> 1 "register_operand") |
829 | (match_operand:SI 2 "immediate_operand")] | |
df401d54 | 830 | "TARGET_SIMD" |
831 | { | |
832 | HOST_WIDE_INT elem = (HOST_WIDE_INT) 1 << INTVAL (operands[2]); | |
833 | emit_insn (gen_aarch64_simd_vec_set<mode> (operands[0], operands[1], | |
834 | GEN_INT (elem), operands[0])); | |
835 | DONE; | |
836 | } | |
837 | ) | |
838 | ||
586a8300 | 839 | ;; For 64-bit modes we use ushl/r, as this does not require a SIMD zero. |
840 | (define_insn "vec_shr_<mode>" | |
841 | [(set (match_operand:VD 0 "register_operand" "=w") | |
5bea7977 | 842 | (unspec:VD [(match_operand:VD 1 "register_operand" "w") |
843 | (match_operand:SI 2 "immediate_operand" "i")] | |
844 | UNSPEC_VEC_SHR))] | |
586a8300 | 845 | "TARGET_SIMD" |
846 | { | |
847 | if (BYTES_BIG_ENDIAN) | |
5bea7977 | 848 | return "shl %d0, %d1, %2"; |
586a8300 | 849 | else |
850 | return "ushr %d0, %d1, %2"; | |
851 | } | |
852 | [(set_attr "type" "neon_shift_imm")] | |
853 | ) | |
854 | ||
df401d54 | 855 | (define_insn "aarch64_simd_vec_setv2di" |
e76492cb | 856 | [(set (match_operand:V2DI 0 "register_operand" "=w,w") |
df401d54 | 857 | (vec_merge:V2DI |
858 | (vec_duplicate:V2DI | |
e76492cb | 859 | (match_operand:DI 1 "register_operand" "r,w")) |
860 | (match_operand:V2DI 3 "register_operand" "0,0") | |
861 | (match_operand:SI 2 "immediate_operand" "i,i")))] | |
df401d54 | 862 | "TARGET_SIMD" |
56234646 | 863 | { |
864 | int elt = ENDIAN_LANE_N (V2DImode, exact_log2 (INTVAL (operands[2]))); | |
865 | operands[2] = GEN_INT ((HOST_WIDE_INT) 1 << elt); | |
866 | switch (which_alternative) | |
867 | { | |
868 | case 0: | |
869 | return "ins\\t%0.d[%p2], %1"; | |
870 | case 1: | |
871 | return "ins\\t%0.d[%p2], %1.d[0]"; | |
872 | default: | |
873 | gcc_unreachable (); | |
874 | } | |
875 | } | |
52e95e58 | 876 | [(set_attr "type" "neon_from_gp, neon_ins_q")] |
df401d54 | 877 | ) |
878 | ||
879 | (define_expand "vec_setv2di" | |
e76492cb | 880 | [(match_operand:V2DI 0 "register_operand") |
881 | (match_operand:DI 1 "register_operand") | |
882 | (match_operand:SI 2 "immediate_operand")] | |
df401d54 | 883 | "TARGET_SIMD" |
884 | { | |
885 | HOST_WIDE_INT elem = (HOST_WIDE_INT) 1 << INTVAL (operands[2]); | |
886 | emit_insn (gen_aarch64_simd_vec_setv2di (operands[0], operands[1], | |
887 | GEN_INT (elem), operands[0])); | |
888 | DONE; | |
889 | } | |
890 | ) | |
891 | ||
892 | (define_insn "aarch64_simd_vec_set<mode>" | |
57887f75 | 893 | [(set (match_operand:VDQF_F16 0 "register_operand" "=w") |
894 | (vec_merge:VDQF_F16 | |
895 | (vec_duplicate:VDQF_F16 | |
df401d54 | 896 | (match_operand:<VEL> 1 "register_operand" "w")) |
57887f75 | 897 | (match_operand:VDQF_F16 3 "register_operand" "0") |
df401d54 | 898 | (match_operand:SI 2 "immediate_operand" "i")))] |
899 | "TARGET_SIMD" | |
56234646 | 900 | { |
901 | int elt = ENDIAN_LANE_N (<MODE>mode, exact_log2 (INTVAL (operands[2]))); | |
902 | ||
903 | operands[2] = GEN_INT ((HOST_WIDE_INT)1 << elt); | |
904 | return "ins\t%0.<Vetype>[%p2], %1.<Vetype>[0]"; | |
905 | } | |
52e95e58 | 906 | [(set_attr "type" "neon_ins<q>")] |
df401d54 | 907 | ) |
908 | ||
909 | (define_expand "vec_set<mode>" | |
57887f75 | 910 | [(match_operand:VDQF_F16 0 "register_operand" "+w") |
df401d54 | 911 | (match_operand:<VEL> 1 "register_operand" "w") |
912 | (match_operand:SI 2 "immediate_operand" "")] | |
913 | "TARGET_SIMD" | |
914 | { | |
915 | HOST_WIDE_INT elem = (HOST_WIDE_INT) 1 << INTVAL (operands[2]); | |
916 | emit_insn (gen_aarch64_simd_vec_set<mode> (operands[0], operands[1], | |
917 | GEN_INT (elem), operands[0])); | |
918 | DONE; | |
919 | } | |
920 | ) | |
921 | ||
922 | ||
923 | (define_insn "aarch64_mla<mode>" | |
658fa7f6 | 924 | [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w") |
925 | (plus:VDQ_BHSI (mult:VDQ_BHSI | |
926 | (match_operand:VDQ_BHSI 2 "register_operand" "w") | |
927 | (match_operand:VDQ_BHSI 3 "register_operand" "w")) | |
928 | (match_operand:VDQ_BHSI 1 "register_operand" "0")))] | |
df401d54 | 929 | "TARGET_SIMD" |
930 | "mla\t%0.<Vtype>, %2.<Vtype>, %3.<Vtype>" | |
52e95e58 | 931 | [(set_attr "type" "neon_mla_<Vetype><q>")] |
df401d54 | 932 | ) |
933 | ||
5b44c420 | 934 | (define_insn "*aarch64_mla_elt<mode>" |
935 | [(set (match_operand:VDQHS 0 "register_operand" "=w") | |
936 | (plus:VDQHS | |
937 | (mult:VDQHS | |
938 | (vec_duplicate:VDQHS | |
939 | (vec_select:<VEL> | |
940 | (match_operand:VDQHS 1 "register_operand" "<h_con>") | |
941 | (parallel [(match_operand:SI 2 "immediate_operand")]))) | |
942 | (match_operand:VDQHS 3 "register_operand" "w")) | |
943 | (match_operand:VDQHS 4 "register_operand" "0")))] | |
944 | "TARGET_SIMD" | |
c034024c | 945 | { |
946 | operands[2] = GEN_INT (ENDIAN_LANE_N (<MODE>mode, INTVAL (operands[2]))); | |
947 | return "mla\t%0.<Vtype>, %3.<Vtype>, %1.<Vtype>[%2]"; | |
948 | } | |
52e95e58 | 949 | [(set_attr "type" "neon_mla_<Vetype>_scalar<q>")] |
5b44c420 | 950 | ) |
951 | ||
952 | (define_insn "*aarch64_mla_elt_<vswap_width_name><mode>" | |
953 | [(set (match_operand:VDQHS 0 "register_operand" "=w") | |
954 | (plus:VDQHS | |
955 | (mult:VDQHS | |
956 | (vec_duplicate:VDQHS | |
957 | (vec_select:<VEL> | |
958 | (match_operand:<VSWAP_WIDTH> 1 "register_operand" "<h_con>") | |
959 | (parallel [(match_operand:SI 2 "immediate_operand")]))) | |
960 | (match_operand:VDQHS 3 "register_operand" "w")) | |
961 | (match_operand:VDQHS 4 "register_operand" "0")))] | |
962 | "TARGET_SIMD" | |
c034024c | 963 | { |
964 | operands[2] = GEN_INT (ENDIAN_LANE_N (<VSWAP_WIDTH>mode, | |
965 | INTVAL (operands[2]))); | |
966 | return "mla\t%0.<Vtype>, %3.<Vtype>, %1.<Vtype>[%2]"; | |
967 | } | |
52e95e58 | 968 | [(set_attr "type" "neon_mla_<Vetype>_scalar<q>")] |
5b44c420 | 969 | ) |
970 | ||
df401d54 | 971 | (define_insn "aarch64_mls<mode>" |
658fa7f6 | 972 | [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w") |
973 | (minus:VDQ_BHSI (match_operand:VDQ_BHSI 1 "register_operand" "0") | |
974 | (mult:VDQ_BHSI (match_operand:VDQ_BHSI 2 "register_operand" "w") | |
975 | (match_operand:VDQ_BHSI 3 "register_operand" "w"))))] | |
df401d54 | 976 | "TARGET_SIMD" |
977 | "mls\t%0.<Vtype>, %2.<Vtype>, %3.<Vtype>" | |
52e95e58 | 978 | [(set_attr "type" "neon_mla_<Vetype><q>")] |
df401d54 | 979 | ) |
980 | ||
5b44c420 | 981 | (define_insn "*aarch64_mls_elt<mode>" |
982 | [(set (match_operand:VDQHS 0 "register_operand" "=w") | |
983 | (minus:VDQHS | |
984 | (match_operand:VDQHS 4 "register_operand" "0") | |
985 | (mult:VDQHS | |
986 | (vec_duplicate:VDQHS | |
987 | (vec_select:<VEL> | |
988 | (match_operand:VDQHS 1 "register_operand" "<h_con>") | |
989 | (parallel [(match_operand:SI 2 "immediate_operand")]))) | |
990 | (match_operand:VDQHS 3 "register_operand" "w"))))] | |
991 | "TARGET_SIMD" | |
c034024c | 992 | { |
993 | operands[2] = GEN_INT (ENDIAN_LANE_N (<MODE>mode, INTVAL (operands[2]))); | |
994 | return "mls\t%0.<Vtype>, %3.<Vtype>, %1.<Vtype>[%2]"; | |
995 | } | |
52e95e58 | 996 | [(set_attr "type" "neon_mla_<Vetype>_scalar<q>")] |
5b44c420 | 997 | ) |
998 | ||
999 | (define_insn "*aarch64_mls_elt_<vswap_width_name><mode>" | |
1000 | [(set (match_operand:VDQHS 0 "register_operand" "=w") | |
1001 | (minus:VDQHS | |
1002 | (match_operand:VDQHS 4 "register_operand" "0") | |
1003 | (mult:VDQHS | |
1004 | (vec_duplicate:VDQHS | |
1005 | (vec_select:<VEL> | |
1006 | (match_operand:<VSWAP_WIDTH> 1 "register_operand" "<h_con>") | |
1007 | (parallel [(match_operand:SI 2 "immediate_operand")]))) | |
1008 | (match_operand:VDQHS 3 "register_operand" "w"))))] | |
1009 | "TARGET_SIMD" | |
c034024c | 1010 | { |
1011 | operands[2] = GEN_INT (ENDIAN_LANE_N (<VSWAP_WIDTH>mode, | |
1012 | INTVAL (operands[2]))); | |
1013 | return "mls\t%0.<Vtype>, %3.<Vtype>, %1.<Vtype>[%2]"; | |
1014 | } | |
52e95e58 | 1015 | [(set_attr "type" "neon_mla_<Vetype>_scalar<q>")] |
5b44c420 | 1016 | ) |
1017 | ||
df401d54 | 1018 | ;; Max/Min operations. |
2520d4b7 | 1019 | (define_insn "<su><maxmin><mode>3" |
658fa7f6 | 1020 | [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w") |
1021 | (MAXMIN:VDQ_BHSI (match_operand:VDQ_BHSI 1 "register_operand" "w") | |
1022 | (match_operand:VDQ_BHSI 2 "register_operand" "w")))] | |
df401d54 | 1023 | "TARGET_SIMD" |
2520d4b7 | 1024 | "<su><maxmin>\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>" |
52e95e58 | 1025 | [(set_attr "type" "neon_minmax<q>")] |
df401d54 | 1026 | ) |
1027 | ||
aa9243d5 | 1028 | (define_expand "<su><maxmin>v2di3" |
1029 | [(set (match_operand:V2DI 0 "register_operand" "") | |
1030 | (MAXMIN:V2DI (match_operand:V2DI 1 "register_operand" "") | |
1031 | (match_operand:V2DI 2 "register_operand" "")))] | |
1032 | "TARGET_SIMD" | |
1033 | { | |
1034 | enum rtx_code cmp_operator; | |
1035 | rtx cmp_fmt; | |
1036 | ||
1037 | switch (<CODE>) | |
1038 | { | |
1039 | case UMIN: | |
1040 | cmp_operator = LTU; | |
1041 | break; | |
1042 | case SMIN: | |
1043 | cmp_operator = LT; | |
1044 | break; | |
1045 | case UMAX: | |
1046 | cmp_operator = GTU; | |
1047 | break; | |
1048 | case SMAX: | |
1049 | cmp_operator = GT; | |
1050 | break; | |
1051 | default: | |
1052 | gcc_unreachable (); | |
1053 | } | |
1054 | ||
1055 | cmp_fmt = gen_rtx_fmt_ee (cmp_operator, V2DImode, operands[1], operands[2]); | |
1056 | emit_insn (gen_aarch64_vcond_internalv2div2di (operands[0], operands[1], | |
1057 | operands[2], cmp_fmt, operands[1], operands[2])); | |
1058 | DONE; | |
1059 | }) | |
1060 | ||
ef416414 | 1061 | ;; Pairwise Integer Max/Min operations. |
1062 | (define_insn "aarch64_<maxmin_uns>p<mode>" | |
1063 | [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w") | |
1064 | (unspec:VDQ_BHSI [(match_operand:VDQ_BHSI 1 "register_operand" "w") | |
1065 | (match_operand:VDQ_BHSI 2 "register_operand" "w")] | |
1066 | MAXMINV))] | |
1067 | "TARGET_SIMD" | |
1068 | "<maxmin_uns_op>p\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>" | |
1069 | [(set_attr "type" "neon_minmax<q>")] | |
1070 | ) | |
1071 | ||
1072 | ;; Pairwise FP Max/Min operations. | |
1073 | (define_insn "aarch64_<maxmin_uns>p<mode>" | |
1074 | [(set (match_operand:VDQF 0 "register_operand" "=w") | |
1075 | (unspec:VDQF [(match_operand:VDQF 1 "register_operand" "w") | |
1076 | (match_operand:VDQF 2 "register_operand" "w")] | |
1077 | FMAXMINV))] | |
1078 | "TARGET_SIMD" | |
1079 | "<maxmin_uns_op>p\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>" | |
1080 | [(set_attr "type" "neon_minmax<q>")] | |
1081 | ) | |
1082 | ||
09853ca2 | 1083 | ;; vec_concat gives a new vector with the low elements from operand 1, and |
1084 | ;; the high elements from operand 2. That is to say, given op1 = { a, b } | |
1085 | ;; op2 = { c, d }, vec_concat (op1, op2) = { a, b, c, d }. | |
1086 | ;; What that means, is that the RTL descriptions of the below patterns | |
1087 | ;; need to change depending on endianness. | |
df401d54 | 1088 | |
09853ca2 | 1089 | ;; Move to the low architectural bits of the register. |
1090 | ;; On little-endian this is { operand, zeroes } | |
1091 | ;; On big-endian this is { zeroes, operand } | |
1092 | ||
1093 | (define_insn "move_lo_quad_internal_<mode>" | |
138af246 | 1094 | [(set (match_operand:VQ_NO2E 0 "register_operand" "=w,w,w") |
1095 | (vec_concat:VQ_NO2E | |
e30d3162 | 1096 | (match_operand:<VHALF> 1 "register_operand" "w,r,r") |
df401d54 | 1097 | (vec_duplicate:<VHALF> (const_int 0))))] |
09853ca2 | 1098 | "TARGET_SIMD && !BYTES_BIG_ENDIAN" |
1099 | "@ | |
1100 | dup\\t%d0, %1.d[0] | |
1101 | fmov\\t%d0, %1 | |
1102 | dup\\t%d0, %1" | |
1103 | [(set_attr "type" "neon_dup<q>,f_mcr,neon_dup<q>") | |
1104 | (set_attr "simd" "yes,*,yes") | |
1105 | (set_attr "fp" "*,yes,*") | |
1106 | (set_attr "length" "4")] | |
1107 | ) | |
1108 | ||
138af246 | 1109 | (define_insn "move_lo_quad_internal_<mode>" |
1110 | [(set (match_operand:VQ_2E 0 "register_operand" "=w,w,w") | |
1111 | (vec_concat:VQ_2E | |
1112 | (match_operand:<VHALF> 1 "register_operand" "w,r,r") | |
1113 | (const_int 0)))] | |
1114 | "TARGET_SIMD && !BYTES_BIG_ENDIAN" | |
1115 | "@ | |
1116 | dup\\t%d0, %1.d[0] | |
1117 | fmov\\t%d0, %1 | |
1118 | dup\\t%d0, %1" | |
1119 | [(set_attr "type" "neon_dup<q>,f_mcr,neon_dup<q>") | |
1120 | (set_attr "simd" "yes,*,yes") | |
1121 | (set_attr "fp" "*,yes,*") | |
1122 | (set_attr "length" "4")] | |
1123 | ) | |
1124 | ||
09853ca2 | 1125 | (define_insn "move_lo_quad_internal_be_<mode>" |
138af246 | 1126 | [(set (match_operand:VQ_NO2E 0 "register_operand" "=w,w,w") |
1127 | (vec_concat:VQ_NO2E | |
09853ca2 | 1128 | (vec_duplicate:<VHALF> (const_int 0)) |
1129 | (match_operand:<VHALF> 1 "register_operand" "w,r,r")))] | |
1130 | "TARGET_SIMD && BYTES_BIG_ENDIAN" | |
e30d3162 | 1131 | "@ |
20431bc8 | 1132 | dup\\t%d0, %1.d[0] |
1133 | fmov\\t%d0, %1 | |
1134 | dup\\t%d0, %1" | |
94f2c0a7 | 1135 | [(set_attr "type" "neon_dup<q>,f_mcr,neon_dup<q>") |
e30d3162 | 1136 | (set_attr "simd" "yes,*,yes") |
1137 | (set_attr "fp" "*,yes,*") | |
1138 | (set_attr "length" "4")] | |
df401d54 | 1139 | ) |
1140 | ||
138af246 | 1141 | (define_insn "move_lo_quad_internal_be_<mode>" |
1142 | [(set (match_operand:VQ_2E 0 "register_operand" "=w,w,w") | |
1143 | (vec_concat:VQ_2E | |
1144 | (const_int 0) | |
1145 | (match_operand:<VHALF> 1 "register_operand" "w,r,r")))] | |
1146 | "TARGET_SIMD && BYTES_BIG_ENDIAN" | |
1147 | "@ | |
1148 | dup\\t%d0, %1.d[0] | |
1149 | fmov\\t%d0, %1 | |
1150 | dup\\t%d0, %1" | |
1151 | [(set_attr "type" "neon_dup<q>,f_mcr,neon_dup<q>") | |
1152 | (set_attr "simd" "yes,*,yes") | |
1153 | (set_attr "fp" "*,yes,*") | |
1154 | (set_attr "length" "4")] | |
1155 | ) | |
1156 | ||
09853ca2 | 1157 | (define_expand "move_lo_quad_<mode>" |
1158 | [(match_operand:VQ 0 "register_operand") | |
1159 | (match_operand:VQ 1 "register_operand")] | |
1160 | "TARGET_SIMD" | |
1161 | { | |
1162 | if (BYTES_BIG_ENDIAN) | |
1163 | emit_insn (gen_move_lo_quad_internal_be_<mode> (operands[0], operands[1])); | |
1164 | else | |
1165 | emit_insn (gen_move_lo_quad_internal_<mode> (operands[0], operands[1])); | |
1166 | DONE; | |
1167 | } | |
1168 | ) | |
1169 | ||
1170 | ;; Move operand1 to the high architectural bits of the register, keeping | |
1171 | ;; the low architectural bits of operand2. | |
1172 | ;; For little-endian this is { operand2, operand1 } | |
1173 | ;; For big-endian this is { operand1, operand2 } | |
df401d54 | 1174 | |
1175 | (define_insn "aarch64_simd_move_hi_quad_<mode>" | |
e30d3162 | 1176 | [(set (match_operand:VQ 0 "register_operand" "+w,w") |
df401d54 | 1177 | (vec_concat:VQ |
1178 | (vec_select:<VHALF> | |
1179 | (match_dup 0) | |
1180 | (match_operand:VQ 2 "vect_par_cnst_lo_half" "")) | |
e30d3162 | 1181 | (match_operand:<VHALF> 1 "register_operand" "w,r")))] |
09853ca2 | 1182 | "TARGET_SIMD && !BYTES_BIG_ENDIAN" |
e30d3162 | 1183 | "@ |
1184 | ins\\t%0.d[1], %1.d[0] | |
1185 | ins\\t%0.d[1], %1" | |
09853ca2 | 1186 | [(set_attr "type" "neon_ins")] |
1187 | ) | |
1188 | ||
1189 | (define_insn "aarch64_simd_move_hi_quad_be_<mode>" | |
1190 | [(set (match_operand:VQ 0 "register_operand" "+w,w") | |
1191 | (vec_concat:VQ | |
1192 | (match_operand:<VHALF> 1 "register_operand" "w,r") | |
1193 | (vec_select:<VHALF> | |
1194 | (match_dup 0) | |
e71cd518 | 1195 | (match_operand:VQ 2 "vect_par_cnst_lo_half" ""))))] |
09853ca2 | 1196 | "TARGET_SIMD && BYTES_BIG_ENDIAN" |
1197 | "@ | |
1198 | ins\\t%0.d[1], %1.d[0] | |
1199 | ins\\t%0.d[1], %1" | |
1200 | [(set_attr "type" "neon_ins")] | |
df401d54 | 1201 | ) |
1202 | ||
1203 | (define_expand "move_hi_quad_<mode>" | |
1204 | [(match_operand:VQ 0 "register_operand" "") | |
1205 | (match_operand:<VHALF> 1 "register_operand" "")] | |
1206 | "TARGET_SIMD" | |
1207 | { | |
e71cd518 | 1208 | rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, false); |
09853ca2 | 1209 | if (BYTES_BIG_ENDIAN) |
1210 | emit_insn (gen_aarch64_simd_move_hi_quad_be_<mode> (operands[0], | |
1211 | operands[1], p)); | |
1212 | else | |
1213 | emit_insn (gen_aarch64_simd_move_hi_quad_<mode> (operands[0], | |
1214 | operands[1], p)); | |
df401d54 | 1215 | DONE; |
1216 | }) | |
1217 | ||
1218 | ;; Narrowing operations. | |
1219 | ||
1220 | ;; For doubles. | |
1221 | (define_insn "aarch64_simd_vec_pack_trunc_<mode>" | |
1222 | [(set (match_operand:<VNARROWQ> 0 "register_operand" "=w") | |
1223 | (truncate:<VNARROWQ> (match_operand:VQN 1 "register_operand" "w")))] | |
1224 | "TARGET_SIMD" | |
1225 | "xtn\\t%0.<Vntype>, %1.<Vtype>" | |
52e95e58 | 1226 | [(set_attr "type" "neon_shift_imm_narrow_q")] |
df401d54 | 1227 | ) |
1228 | ||
1229 | (define_expand "vec_pack_trunc_<mode>" | |
1230 | [(match_operand:<VNARROWD> 0 "register_operand" "") | |
1231 | (match_operand:VDN 1 "register_operand" "") | |
1232 | (match_operand:VDN 2 "register_operand" "")] | |
1233 | "TARGET_SIMD" | |
1234 | { | |
1235 | rtx tempreg = gen_reg_rtx (<VDBL>mode); | |
961b4271 | 1236 | int lo = BYTES_BIG_ENDIAN ? 2 : 1; |
1237 | int hi = BYTES_BIG_ENDIAN ? 1 : 2; | |
df401d54 | 1238 | |
961b4271 | 1239 | emit_insn (gen_move_lo_quad_<Vdbl> (tempreg, operands[lo])); |
1240 | emit_insn (gen_move_hi_quad_<Vdbl> (tempreg, operands[hi])); | |
df401d54 | 1241 | emit_insn (gen_aarch64_simd_vec_pack_trunc_<Vdbl> (operands[0], tempreg)); |
1242 | DONE; | |
1243 | }) | |
1244 | ||
1245 | ;; For quads. | |
1246 | ||
1247 | (define_insn "vec_pack_trunc_<mode>" | |
c0a0cc5e | 1248 | [(set (match_operand:<VNARROWQ2> 0 "register_operand" "=&w") |
df401d54 | 1249 | (vec_concat:<VNARROWQ2> |
1250 | (truncate:<VNARROWQ> (match_operand:VQN 1 "register_operand" "w")) | |
1251 | (truncate:<VNARROWQ> (match_operand:VQN 2 "register_operand" "w"))))] | |
1252 | "TARGET_SIMD" | |
961b4271 | 1253 | { |
1254 | if (BYTES_BIG_ENDIAN) | |
1255 | return "xtn\\t%0.<Vntype>, %2.<Vtype>\;xtn2\\t%0.<V2ntype>, %1.<Vtype>"; | |
1256 | else | |
1257 | return "xtn\\t%0.<Vntype>, %1.<Vtype>\;xtn2\\t%0.<V2ntype>, %2.<Vtype>"; | |
1258 | } | |
52e95e58 | 1259 | [(set_attr "type" "multiple") |
df401d54 | 1260 | (set_attr "length" "8")] |
1261 | ) | |
1262 | ||
1263 | ;; Widening operations. | |
1264 | ||
1265 | (define_insn "aarch64_simd_vec_unpack<su>_lo_<mode>" | |
1266 | [(set (match_operand:<VWIDE> 0 "register_operand" "=w") | |
1267 | (ANY_EXTEND:<VWIDE> (vec_select:<VHALF> | |
1268 | (match_operand:VQW 1 "register_operand" "w") | |
1269 | (match_operand:VQW 2 "vect_par_cnst_lo_half" "") | |
1270 | )))] | |
1271 | "TARGET_SIMD" | |
00225f5c | 1272 | "<su>shll\t%0.<Vwtype>, %1.<Vhalftype>, 0" |
52e95e58 | 1273 | [(set_attr "type" "neon_shift_imm_long")] |
df401d54 | 1274 | ) |
1275 | ||
1276 | (define_insn "aarch64_simd_vec_unpack<su>_hi_<mode>" | |
1277 | [(set (match_operand:<VWIDE> 0 "register_operand" "=w") | |
1278 | (ANY_EXTEND:<VWIDE> (vec_select:<VHALF> | |
1279 | (match_operand:VQW 1 "register_operand" "w") | |
1280 | (match_operand:VQW 2 "vect_par_cnst_hi_half" "") | |
1281 | )))] | |
1282 | "TARGET_SIMD" | |
00225f5c | 1283 | "<su>shll2\t%0.<Vwtype>, %1.<Vtype>, 0" |
52e95e58 | 1284 | [(set_attr "type" "neon_shift_imm_long")] |
df401d54 | 1285 | ) |
1286 | ||
1287 | (define_expand "vec_unpack<su>_hi_<mode>" | |
1288 | [(match_operand:<VWIDE> 0 "register_operand" "") | |
1289 | (ANY_EXTEND:<VWIDE> (match_operand:VQW 1 "register_operand"))] | |
1290 | "TARGET_SIMD" | |
1291 | { | |
1292 | rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, true); | |
1293 | emit_insn (gen_aarch64_simd_vec_unpack<su>_hi_<mode> (operands[0], | |
1294 | operands[1], p)); | |
1295 | DONE; | |
1296 | } | |
1297 | ) | |
1298 | ||
1299 | (define_expand "vec_unpack<su>_lo_<mode>" | |
1300 | [(match_operand:<VWIDE> 0 "register_operand" "") | |
1301 | (ANY_EXTEND:<VWIDE> (match_operand:VQW 1 "register_operand" ""))] | |
1302 | "TARGET_SIMD" | |
1303 | { | |
1304 | rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, false); | |
1305 | emit_insn (gen_aarch64_simd_vec_unpack<su>_lo_<mode> (operands[0], | |
1306 | operands[1], p)); | |
1307 | DONE; | |
1308 | } | |
1309 | ) | |
1310 | ||
1311 | ;; Widening arithmetic. | |
1312 | ||
74375e70 | 1313 | (define_insn "*aarch64_<su>mlal_lo<mode>" |
1314 | [(set (match_operand:<VWIDE> 0 "register_operand" "=w") | |
1315 | (plus:<VWIDE> | |
1316 | (mult:<VWIDE> | |
1317 | (ANY_EXTEND:<VWIDE> (vec_select:<VHALF> | |
1318 | (match_operand:VQW 2 "register_operand" "w") | |
1319 | (match_operand:VQW 3 "vect_par_cnst_lo_half" ""))) | |
1320 | (ANY_EXTEND:<VWIDE> (vec_select:<VHALF> | |
1321 | (match_operand:VQW 4 "register_operand" "w") | |
1322 | (match_dup 3)))) | |
1323 | (match_operand:<VWIDE> 1 "register_operand" "0")))] | |
1324 | "TARGET_SIMD" | |
1325 | "<su>mlal\t%0.<Vwtype>, %2.<Vhalftype>, %4.<Vhalftype>" | |
52e95e58 | 1326 | [(set_attr "type" "neon_mla_<Vetype>_long")] |
74375e70 | 1327 | ) |
1328 | ||
1329 | (define_insn "*aarch64_<su>mlal_hi<mode>" | |
1330 | [(set (match_operand:<VWIDE> 0 "register_operand" "=w") | |
1331 | (plus:<VWIDE> | |
1332 | (mult:<VWIDE> | |
1333 | (ANY_EXTEND:<VWIDE> (vec_select:<VHALF> | |
1334 | (match_operand:VQW 2 "register_operand" "w") | |
1335 | (match_operand:VQW 3 "vect_par_cnst_hi_half" ""))) | |
1336 | (ANY_EXTEND:<VWIDE> (vec_select:<VHALF> | |
1337 | (match_operand:VQW 4 "register_operand" "w") | |
1338 | (match_dup 3)))) | |
1339 | (match_operand:<VWIDE> 1 "register_operand" "0")))] | |
1340 | "TARGET_SIMD" | |
1341 | "<su>mlal2\t%0.<Vwtype>, %2.<Vtype>, %4.<Vtype>" | |
52e95e58 | 1342 | [(set_attr "type" "neon_mla_<Vetype>_long")] |
74375e70 | 1343 | ) |
1344 | ||
1345 | (define_insn "*aarch64_<su>mlsl_lo<mode>" | |
1346 | [(set (match_operand:<VWIDE> 0 "register_operand" "=w") | |
1347 | (minus:<VWIDE> | |
1348 | (match_operand:<VWIDE> 1 "register_operand" "0") | |
1349 | (mult:<VWIDE> | |
1350 | (ANY_EXTEND:<VWIDE> (vec_select:<VHALF> | |
1351 | (match_operand:VQW 2 "register_operand" "w") | |
1352 | (match_operand:VQW 3 "vect_par_cnst_lo_half" ""))) | |
1353 | (ANY_EXTEND:<VWIDE> (vec_select:<VHALF> | |
1354 | (match_operand:VQW 4 "register_operand" "w") | |
1355 | (match_dup 3))))))] | |
1356 | "TARGET_SIMD" | |
1357 | "<su>mlsl\t%0.<Vwtype>, %2.<Vhalftype>, %4.<Vhalftype>" | |
52e95e58 | 1358 | [(set_attr "type" "neon_mla_<Vetype>_long")] |
74375e70 | 1359 | ) |
1360 | ||
1361 | (define_insn "*aarch64_<su>mlsl_hi<mode>" | |
1362 | [(set (match_operand:<VWIDE> 0 "register_operand" "=w") | |
1363 | (minus:<VWIDE> | |
1364 | (match_operand:<VWIDE> 1 "register_operand" "0") | |
1365 | (mult:<VWIDE> | |
1366 | (ANY_EXTEND:<VWIDE> (vec_select:<VHALF> | |
1367 | (match_operand:VQW 2 "register_operand" "w") | |
1368 | (match_operand:VQW 3 "vect_par_cnst_hi_half" ""))) | |
1369 | (ANY_EXTEND:<VWIDE> (vec_select:<VHALF> | |
1370 | (match_operand:VQW 4 "register_operand" "w") | |
1371 | (match_dup 3))))))] | |
1372 | "TARGET_SIMD" | |
1373 | "<su>mlsl2\t%0.<Vwtype>, %2.<Vtype>, %4.<Vtype>" | |
52e95e58 | 1374 | [(set_attr "type" "neon_mla_<Vetype>_long")] |
74375e70 | 1375 | ) |
1376 | ||
1377 | (define_insn "*aarch64_<su>mlal<mode>" | |
1378 | [(set (match_operand:<VWIDE> 0 "register_operand" "=w") | |
1379 | (plus:<VWIDE> | |
1380 | (mult:<VWIDE> | |
1381 | (ANY_EXTEND:<VWIDE> | |
658fa7f6 | 1382 | (match_operand:VD_BHSI 1 "register_operand" "w")) |
74375e70 | 1383 | (ANY_EXTEND:<VWIDE> |
658fa7f6 | 1384 | (match_operand:VD_BHSI 2 "register_operand" "w"))) |
74375e70 | 1385 | (match_operand:<VWIDE> 3 "register_operand" "0")))] |
1386 | "TARGET_SIMD" | |
1387 | "<su>mlal\t%0.<Vwtype>, %1.<Vtype>, %2.<Vtype>" | |
52e95e58 | 1388 | [(set_attr "type" "neon_mla_<Vetype>_long")] |
74375e70 | 1389 | ) |
1390 | ||
1391 | (define_insn "*aarch64_<su>mlsl<mode>" | |
1392 | [(set (match_operand:<VWIDE> 0 "register_operand" "=w") | |
1393 | (minus:<VWIDE> | |
1394 | (match_operand:<VWIDE> 1 "register_operand" "0") | |
1395 | (mult:<VWIDE> | |
1396 | (ANY_EXTEND:<VWIDE> | |
658fa7f6 | 1397 | (match_operand:VD_BHSI 2 "register_operand" "w")) |
74375e70 | 1398 | (ANY_EXTEND:<VWIDE> |
658fa7f6 | 1399 | (match_operand:VD_BHSI 3 "register_operand" "w")))))] |
74375e70 | 1400 | "TARGET_SIMD" |
1401 | "<su>mlsl\t%0.<Vwtype>, %2.<Vtype>, %3.<Vtype>" | |
52e95e58 | 1402 | [(set_attr "type" "neon_mla_<Vetype>_long")] |
74375e70 | 1403 | ) |
1404 | ||
df401d54 | 1405 | (define_insn "aarch64_simd_vec_<su>mult_lo_<mode>" |
1406 | [(set (match_operand:<VWIDE> 0 "register_operand" "=w") | |
1407 | (mult:<VWIDE> (ANY_EXTEND:<VWIDE> (vec_select:<VHALF> | |
1408 | (match_operand:VQW 1 "register_operand" "w") | |
1409 | (match_operand:VQW 3 "vect_par_cnst_lo_half" ""))) | |
1410 | (ANY_EXTEND:<VWIDE> (vec_select:<VHALF> | |
1411 | (match_operand:VQW 2 "register_operand" "w") | |
1412 | (match_dup 3)))))] | |
1413 | "TARGET_SIMD" | |
b3f1c89d | 1414 | "<su>mull\\t%0.<Vwtype>, %1.<Vhalftype>, %2.<Vhalftype>" |
52e95e58 | 1415 | [(set_attr "type" "neon_mul_<Vetype>_long")] |
df401d54 | 1416 | ) |
1417 | ||
1418 | (define_expand "vec_widen_<su>mult_lo_<mode>" | |
1419 | [(match_operand:<VWIDE> 0 "register_operand" "") | |
1420 | (ANY_EXTEND:<VWIDE> (match_operand:VQW 1 "register_operand" "")) | |
1421 | (ANY_EXTEND:<VWIDE> (match_operand:VQW 2 "register_operand" ""))] | |
1422 | "TARGET_SIMD" | |
1423 | { | |
1424 | rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, false); | |
1425 | emit_insn (gen_aarch64_simd_vec_<su>mult_lo_<mode> (operands[0], | |
1426 | operands[1], | |
1427 | operands[2], p)); | |
1428 | DONE; | |
1429 | } | |
1430 | ) | |
1431 | ||
1432 | (define_insn "aarch64_simd_vec_<su>mult_hi_<mode>" | |
1433 | [(set (match_operand:<VWIDE> 0 "register_operand" "=w") | |
1434 | (mult:<VWIDE> (ANY_EXTEND:<VWIDE> (vec_select:<VHALF> | |
1435 | (match_operand:VQW 1 "register_operand" "w") | |
1436 | (match_operand:VQW 3 "vect_par_cnst_hi_half" ""))) | |
1437 | (ANY_EXTEND:<VWIDE> (vec_select:<VHALF> | |
1438 | (match_operand:VQW 2 "register_operand" "w") | |
1439 | (match_dup 3)))))] | |
1440 | "TARGET_SIMD" | |
b3f1c89d | 1441 | "<su>mull2\\t%0.<Vwtype>, %1.<Vtype>, %2.<Vtype>" |
52e95e58 | 1442 | [(set_attr "type" "neon_mul_<Vetype>_long")] |
df401d54 | 1443 | ) |
1444 | ||
1445 | (define_expand "vec_widen_<su>mult_hi_<mode>" | |
1446 | [(match_operand:<VWIDE> 0 "register_operand" "") | |
1447 | (ANY_EXTEND:<VWIDE> (match_operand:VQW 1 "register_operand" "")) | |
1448 | (ANY_EXTEND:<VWIDE> (match_operand:VQW 2 "register_operand" ""))] | |
1449 | "TARGET_SIMD" | |
1450 | { | |
1451 | rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, true); | |
1452 | emit_insn (gen_aarch64_simd_vec_<su>mult_hi_<mode> (operands[0], | |
1453 | operands[1], | |
1454 | operands[2], p)); | |
1455 | DONE; | |
1456 | ||
1457 | } | |
1458 | ) | |
1459 | ||
1460 | ;; FP vector operations. | |
1461 | ;; AArch64 AdvSIMD supports single-precision (32-bit) and | |
1462 | ;; double-precision (64-bit) floating-point data types and arithmetic as | |
1463 | ;; defined by the IEEE 754-2008 standard. This makes them vectorizable | |
1464 | ;; without the need for -ffast-math or -funsafe-math-optimizations. | |
1465 | ;; | |
1466 | ;; Floating-point operations can raise an exception. Vectorizing such | |
1467 | ;; operations are safe because of reasons explained below. | |
1468 | ;; | |
1469 | ;; ARMv8 permits an extension to enable trapped floating-point | |
1470 | ;; exception handling, however this is an optional feature. In the | |
1471 | ;; event of a floating-point exception being raised by vectorised | |
1472 | ;; code then: | |
1473 | ;; 1. If trapped floating-point exceptions are available, then a trap | |
1474 | ;; will be taken when any lane raises an enabled exception. A trap | |
1475 | ;; handler may determine which lane raised the exception. | |
1476 | ;; 2. Alternatively a sticky exception flag is set in the | |
1477 | ;; floating-point status register (FPSR). Software may explicitly | |
1478 | ;; test the exception flags, in which case the tests will either | |
1479 | ;; prevent vectorisation, allowing precise identification of the | |
1480 | ;; failing operation, or if tested outside of vectorisable regions | |
1481 | ;; then the specific operation and lane are not of interest. | |
1482 | ||
1483 | ;; FP arithmetic operations. | |
1484 | ||
1485 | (define_insn "add<mode>3" | |
1486 | [(set (match_operand:VDQF 0 "register_operand" "=w") | |
1487 | (plus:VDQF (match_operand:VDQF 1 "register_operand" "w") | |
1488 | (match_operand:VDQF 2 "register_operand" "w")))] | |
1489 | "TARGET_SIMD" | |
1490 | "fadd\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>" | |
52e95e58 | 1491 | [(set_attr "type" "neon_fp_addsub_<Vetype><q>")] |
df401d54 | 1492 | ) |
1493 | ||
1494 | (define_insn "sub<mode>3" | |
1495 | [(set (match_operand:VDQF 0 "register_operand" "=w") | |
1496 | (minus:VDQF (match_operand:VDQF 1 "register_operand" "w") | |
1497 | (match_operand:VDQF 2 "register_operand" "w")))] | |
1498 | "TARGET_SIMD" | |
1499 | "fsub\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>" | |
52e95e58 | 1500 | [(set_attr "type" "neon_fp_addsub_<Vetype><q>")] |
df401d54 | 1501 | ) |
1502 | ||
1503 | (define_insn "mul<mode>3" | |
1504 | [(set (match_operand:VDQF 0 "register_operand" "=w") | |
1505 | (mult:VDQF (match_operand:VDQF 1 "register_operand" "w") | |
1506 | (match_operand:VDQF 2 "register_operand" "w")))] | |
1507 | "TARGET_SIMD" | |
1508 | "fmul\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>" | |
52e95e58 | 1509 | [(set_attr "type" "neon_fp_mul_<Vetype><q>")] |
df401d54 | 1510 | ) |
1511 | ||
1512 | (define_insn "div<mode>3" | |
1513 | [(set (match_operand:VDQF 0 "register_operand" "=w") | |
1514 | (div:VDQF (match_operand:VDQF 1 "register_operand" "w") | |
1515 | (match_operand:VDQF 2 "register_operand" "w")))] | |
1516 | "TARGET_SIMD" | |
1517 | "fdiv\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>" | |
52e95e58 | 1518 | [(set_attr "type" "neon_fp_div_<Vetype><q>")] |
df401d54 | 1519 | ) |
1520 | ||
1521 | (define_insn "neg<mode>2" | |
1522 | [(set (match_operand:VDQF 0 "register_operand" "=w") | |
1523 | (neg:VDQF (match_operand:VDQF 1 "register_operand" "w")))] | |
1524 | "TARGET_SIMD" | |
1525 | "fneg\\t%0.<Vtype>, %1.<Vtype>" | |
52e95e58 | 1526 | [(set_attr "type" "neon_fp_neg_<Vetype><q>")] |
df401d54 | 1527 | ) |
1528 | ||
1529 | (define_insn "abs<mode>2" | |
1530 | [(set (match_operand:VDQF 0 "register_operand" "=w") | |
1531 | (abs:VDQF (match_operand:VDQF 1 "register_operand" "w")))] | |
1532 | "TARGET_SIMD" | |
1533 | "fabs\\t%0.<Vtype>, %1.<Vtype>" | |
52e95e58 | 1534 | [(set_attr "type" "neon_fp_abs_<Vetype><q>")] |
df401d54 | 1535 | ) |
1536 | ||
1537 | (define_insn "fma<mode>4" | |
1538 | [(set (match_operand:VDQF 0 "register_operand" "=w") | |
1539 | (fma:VDQF (match_operand:VDQF 1 "register_operand" "w") | |
1540 | (match_operand:VDQF 2 "register_operand" "w") | |
1541 | (match_operand:VDQF 3 "register_operand" "0")))] | |
1542 | "TARGET_SIMD" | |
1543 | "fmla\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>" | |
52e95e58 | 1544 | [(set_attr "type" "neon_fp_mla_<Vetype><q>")] |
df401d54 | 1545 | ) |
1546 | ||
5b44c420 | 1547 | (define_insn "*aarch64_fma4_elt<mode>" |
1548 | [(set (match_operand:VDQF 0 "register_operand" "=w") | |
1549 | (fma:VDQF | |
1550 | (vec_duplicate:VDQF | |
1551 | (vec_select:<VEL> | |
1552 | (match_operand:VDQF 1 "register_operand" "<h_con>") | |
1553 | (parallel [(match_operand:SI 2 "immediate_operand")]))) | |
1554 | (match_operand:VDQF 3 "register_operand" "w") | |
1555 | (match_operand:VDQF 4 "register_operand" "0")))] | |
1556 | "TARGET_SIMD" | |
c034024c | 1557 | { |
1558 | operands[2] = GEN_INT (ENDIAN_LANE_N (<MODE>mode, INTVAL (operands[2]))); | |
1559 | return "fmla\\t%0.<Vtype>, %3.<Vtype>, %1.<Vtype>[%2]"; | |
1560 | } | |
52e95e58 | 1561 | [(set_attr "type" "neon_fp_mla_<Vetype>_scalar<q>")] |
5b44c420 | 1562 | ) |
1563 | ||
1564 | (define_insn "*aarch64_fma4_elt_<vswap_width_name><mode>" | |
1565 | [(set (match_operand:VDQSF 0 "register_operand" "=w") | |
1566 | (fma:VDQSF | |
1567 | (vec_duplicate:VDQSF | |
1568 | (vec_select:<VEL> | |
1569 | (match_operand:<VSWAP_WIDTH> 1 "register_operand" "<h_con>") | |
1570 | (parallel [(match_operand:SI 2 "immediate_operand")]))) | |
1571 | (match_operand:VDQSF 3 "register_operand" "w") | |
1572 | (match_operand:VDQSF 4 "register_operand" "0")))] | |
1573 | "TARGET_SIMD" | |
c034024c | 1574 | { |
1575 | operands[2] = GEN_INT (ENDIAN_LANE_N (<VSWAP_WIDTH>mode, | |
1576 | INTVAL (operands[2]))); | |
1577 | return "fmla\\t%0.<Vtype>, %3.<Vtype>, %1.<Vtype>[%2]"; | |
1578 | } | |
52e95e58 | 1579 | [(set_attr "type" "neon_fp_mla_<Vetype>_scalar<q>")] |
5b44c420 | 1580 | ) |
1581 | ||
1582 | (define_insn "*aarch64_fma4_elt_to_128df" | |
1583 | [(set (match_operand:V2DF 0 "register_operand" "=w") | |
1584 | (fma:V2DF | |
1585 | (vec_duplicate:V2DF | |
1586 | (match_operand:DF 1 "register_operand" "w")) | |
1587 | (match_operand:V2DF 2 "register_operand" "w") | |
1588 | (match_operand:V2DF 3 "register_operand" "0")))] | |
1589 | "TARGET_SIMD" | |
1590 | "fmla\\t%0.2d, %2.2d, %1.2d[0]" | |
52e95e58 | 1591 | [(set_attr "type" "neon_fp_mla_d_scalar_q")] |
5b44c420 | 1592 | ) |
1593 | ||
1594 | (define_insn "*aarch64_fma4_elt_to_64v2df" | |
1595 | [(set (match_operand:DF 0 "register_operand" "=w") | |
1596 | (fma:DF | |
1597 | (vec_select:DF | |
1598 | (match_operand:V2DF 1 "register_operand" "w") | |
1599 | (parallel [(match_operand:SI 2 "immediate_operand")])) | |
1600 | (match_operand:DF 3 "register_operand" "w") | |
1601 | (match_operand:DF 4 "register_operand" "0")))] | |
1602 | "TARGET_SIMD" | |
c034024c | 1603 | { |
1604 | operands[2] = GEN_INT (ENDIAN_LANE_N (V2DFmode, INTVAL (operands[2]))); | |
1605 | return "fmla\\t%0.2d, %3.2d, %1.2d[%2]"; | |
1606 | } | |
52e95e58 | 1607 | [(set_attr "type" "neon_fp_mla_d_scalar_q")] |
5b44c420 | 1608 | ) |
1609 | ||
1610 | (define_insn "fnma<mode>4" | |
1611 | [(set (match_operand:VDQF 0 "register_operand" "=w") | |
1612 | (fma:VDQF | |
1613 | (match_operand:VDQF 1 "register_operand" "w") | |
1614 | (neg:VDQF | |
1615 | (match_operand:VDQF 2 "register_operand" "w")) | |
1616 | (match_operand:VDQF 3 "register_operand" "0")))] | |
1617 | "TARGET_SIMD" | |
1618 | "fmls\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>" | |
52e95e58 | 1619 | [(set_attr "type" "neon_fp_mla_<Vetype><q>")] |
5b44c420 | 1620 | ) |
1621 | ||
1622 | (define_insn "*aarch64_fnma4_elt<mode>" | |
1623 | [(set (match_operand:VDQF 0 "register_operand" "=w") | |
1624 | (fma:VDQF | |
1625 | (neg:VDQF | |
1626 | (match_operand:VDQF 3 "register_operand" "w")) | |
1627 | (vec_duplicate:VDQF | |
1628 | (vec_select:<VEL> | |
1629 | (match_operand:VDQF 1 "register_operand" "<h_con>") | |
1630 | (parallel [(match_operand:SI 2 "immediate_operand")]))) | |
1631 | (match_operand:VDQF 4 "register_operand" "0")))] | |
1632 | "TARGET_SIMD" | |
c034024c | 1633 | { |
1634 | operands[2] = GEN_INT (ENDIAN_LANE_N (<MODE>mode, INTVAL (operands[2]))); | |
1635 | return "fmls\\t%0.<Vtype>, %3.<Vtype>, %1.<Vtype>[%2]"; | |
1636 | } | |
52e95e58 | 1637 | [(set_attr "type" "neon_fp_mla_<Vetype>_scalar<q>")] |
5b44c420 | 1638 | ) |
1639 | ||
1640 | (define_insn "*aarch64_fnma4_elt_<vswap_width_name><mode>" | |
1641 | [(set (match_operand:VDQSF 0 "register_operand" "=w") | |
1642 | (fma:VDQSF | |
1643 | (neg:VDQSF | |
1644 | (match_operand:VDQSF 3 "register_operand" "w")) | |
1645 | (vec_duplicate:VDQSF | |
1646 | (vec_select:<VEL> | |
1647 | (match_operand:<VSWAP_WIDTH> 1 "register_operand" "<h_con>") | |
1648 | (parallel [(match_operand:SI 2 "immediate_operand")]))) | |
1649 | (match_operand:VDQSF 4 "register_operand" "0")))] | |
1650 | "TARGET_SIMD" | |
c034024c | 1651 | { |
1652 | operands[2] = GEN_INT (ENDIAN_LANE_N (<VSWAP_WIDTH>mode, | |
1653 | INTVAL (operands[2]))); | |
1654 | return "fmls\\t%0.<Vtype>, %3.<Vtype>, %1.<Vtype>[%2]"; | |
1655 | } | |
52e95e58 | 1656 | [(set_attr "type" "neon_fp_mla_<Vetype>_scalar<q>")] |
5b44c420 | 1657 | ) |
1658 | ||
1659 | (define_insn "*aarch64_fnma4_elt_to_128df" | |
1660 | [(set (match_operand:V2DF 0 "register_operand" "=w") | |
1661 | (fma:V2DF | |
1662 | (neg:V2DF | |
1663 | (match_operand:V2DF 2 "register_operand" "w")) | |
1664 | (vec_duplicate:V2DF | |
1665 | (match_operand:DF 1 "register_operand" "w")) | |
1666 | (match_operand:V2DF 3 "register_operand" "0")))] | |
1667 | "TARGET_SIMD" | |
1668 | "fmls\\t%0.2d, %2.2d, %1.2d[0]" | |
52e95e58 | 1669 | [(set_attr "type" "neon_fp_mla_d_scalar_q")] |
5b44c420 | 1670 | ) |
1671 | ||
1672 | (define_insn "*aarch64_fnma4_elt_to_64v2df" | |
1673 | [(set (match_operand:DF 0 "register_operand" "=w") | |
1674 | (fma:DF | |
1675 | (vec_select:DF | |
1676 | (match_operand:V2DF 1 "register_operand" "w") | |
1677 | (parallel [(match_operand:SI 2 "immediate_operand")])) | |
1678 | (neg:DF | |
1679 | (match_operand:DF 3 "register_operand" "w")) | |
1680 | (match_operand:DF 4 "register_operand" "0")))] | |
1681 | "TARGET_SIMD" | |
c034024c | 1682 | { |
1683 | operands[2] = GEN_INT (ENDIAN_LANE_N (V2DFmode, INTVAL (operands[2]))); | |
1684 | return "fmls\\t%0.2d, %3.2d, %1.2d[%2]"; | |
1685 | } | |
52e95e58 | 1686 | [(set_attr "type" "neon_fp_mla_d_scalar_q")] |
5b44c420 | 1687 | ) |
1688 | ||
abb2e961 | 1689 | ;; Vector versions of the floating-point frint patterns. |
90231a68 | 1690 | ;; Expands to btrunc, ceil, floor, nearbyint, rint, round, frintn. |
abb2e961 | 1691 | (define_insn "<frint_pattern><mode>2" |
ba640418 | 1692 | [(set (match_operand:VDQF 0 "register_operand" "=w") |
1693 | (unspec:VDQF [(match_operand:VDQF 1 "register_operand" "w")] | |
1694 | FRINT))] | |
1695 | "TARGET_SIMD" | |
1696 | "frint<frint_suffix>\\t%0.<Vtype>, %1.<Vtype>" | |
52e95e58 | 1697 | [(set_attr "type" "neon_fp_round_<Vetype><q>")] |
ba640418 | 1698 | ) |
1699 | ||
2fc9b3fc | 1700 | ;; Vector versions of the fcvt standard patterns. |
1701 | ;; Expands to lbtrunc, lround, lceil, lfloor | |
1702 | (define_insn "l<fcvt_pattern><su_optab><VDQF:mode><fcvt_target>2" | |
ba640418 | 1703 | [(set (match_operand:<FCVT_TARGET> 0 "register_operand" "=w") |
1704 | (FIXUORS:<FCVT_TARGET> (unspec:<FCVT_TARGET> | |
1705 | [(match_operand:VDQF 1 "register_operand" "w")] | |
1706 | FCVT)))] | |
1707 | "TARGET_SIMD" | |
1708 | "fcvt<frint_suffix><su>\\t%0.<Vtype>, %1.<Vtype>" | |
52e95e58 | 1709 | [(set_attr "type" "neon_fp_to_int_<Vetype><q>")] |
ba640418 | 1710 | ) |
1711 | ||
ac80c076 | 1712 | (define_insn "*aarch64_fcvt<su_optab><VDQF:mode><fcvt_target>2_mult" |
1713 | [(set (match_operand:<FCVT_TARGET> 0 "register_operand" "=w") | |
1714 | (FIXUORS:<FCVT_TARGET> (unspec:<FCVT_TARGET> | |
1715 | [(mult:VDQF | |
1716 | (match_operand:VDQF 1 "register_operand" "w") | |
1717 | (match_operand:VDQF 2 "aarch64_fp_vec_pow2" ""))] | |
1718 | UNSPEC_FRINTZ)))] | |
1719 | "TARGET_SIMD | |
1720 | && IN_RANGE (aarch64_vec_fpconst_pow_of_2 (operands[2]), 1, | |
1721 | GET_MODE_BITSIZE (GET_MODE_INNER (<VDQF:MODE>mode)))" | |
1722 | { | |
1723 | int fbits = aarch64_vec_fpconst_pow_of_2 (operands[2]); | |
1724 | char buf[64]; | |
1725 | snprintf (buf, 64, "fcvtz<su>\\t%%0.<Vtype>, %%1.<Vtype>, #%d", fbits); | |
1726 | output_asm_insn (buf, operands); | |
1727 | return ""; | |
1728 | } | |
1729 | [(set_attr "type" "neon_fp_to_int_<Vetype><q>")] | |
1730 | ) | |
1731 | ||
c14c74cf | 1732 | (define_expand "<optab><VDQF:mode><fcvt_target>2" |
1733 | [(set (match_operand:<FCVT_TARGET> 0 "register_operand") | |
1734 | (FIXUORS:<FCVT_TARGET> (unspec:<FCVT_TARGET> | |
1735 | [(match_operand:VDQF 1 "register_operand")] | |
1736 | UNSPEC_FRINTZ)))] | |
1737 | "TARGET_SIMD" | |
1738 | {}) | |
1739 | ||
1740 | (define_expand "<fix_trunc_optab><VDQF:mode><fcvt_target>2" | |
1741 | [(set (match_operand:<FCVT_TARGET> 0 "register_operand") | |
1742 | (FIXUORS:<FCVT_TARGET> (unspec:<FCVT_TARGET> | |
1743 | [(match_operand:VDQF 1 "register_operand")] | |
1744 | UNSPEC_FRINTZ)))] | |
1745 | "TARGET_SIMD" | |
1746 | {}) | |
1747 | ||
1748 | (define_expand "ftrunc<VDQF:mode>2" | |
1749 | [(set (match_operand:VDQF 0 "register_operand") | |
1750 | (unspec:VDQF [(match_operand:VDQF 1 "register_operand")] | |
1751 | UNSPEC_FRINTZ))] | |
1752 | "TARGET_SIMD" | |
1753 | {}) | |
1754 | ||
865dd64d | 1755 | (define_insn "<optab><fcvt_target><VDQF:mode>2" |
1756 | [(set (match_operand:VDQF 0 "register_operand" "=w") | |
1757 | (FLOATUORS:VDQF | |
1758 | (match_operand:<FCVT_TARGET> 1 "register_operand" "w")))] | |
1759 | "TARGET_SIMD" | |
1760 | "<su_optab>cvtf\\t%0.<Vtype>, %1.<Vtype>" | |
52e95e58 | 1761 | [(set_attr "type" "neon_int_to_fp_<Vetype><q>")] |
865dd64d | 1762 | ) |
1763 | ||
9f224c46 | 1764 | ;; Conversions between vectors of floats and doubles. |
1765 | ;; Contains a mix of patterns to match standard pattern names | |
1766 | ;; and those for intrinsics. | |
1767 | ||
1768 | ;; Float widening operations. | |
1769 | ||
90d05d2d | 1770 | (define_insn "aarch64_simd_vec_unpacks_lo_<mode>" |
1771 | [(set (match_operand:<VWIDE> 0 "register_operand" "=w") | |
1772 | (float_extend:<VWIDE> (vec_select:<VHALF> | |
1773 | (match_operand:VQ_HSF 1 "register_operand" "w") | |
1774 | (match_operand:VQ_HSF 2 "vect_par_cnst_lo_half" "") | |
1775 | )))] | |
9f224c46 | 1776 | "TARGET_SIMD" |
90d05d2d | 1777 | "fcvtl\\t%0.<Vwtype>, %1.<Vhalftype>" |
52e95e58 | 1778 | [(set_attr "type" "neon_fp_cvt_widen_s")] |
9f224c46 | 1779 | ) |
1780 | ||
6e3d7d46 | 1781 | ;; ??? Note that the vectorizer usage of the vec_unpacks_[lo/hi] patterns |
1782 | ;; is inconsistent with vector ordering elsewhere in the compiler, in that | |
1783 | ;; the meaning of HI and LO changes depending on the target endianness. | |
1784 | ;; While elsewhere we map the higher numbered elements of a vector to | |
1785 | ;; the lower architectural lanes of the vector, for these patterns we want | |
1786 | ;; to always treat "hi" as referring to the higher architectural lanes. | |
1787 | ;; Consequently, while the patterns below look inconsistent with our | |
1788 | ;; other big-endian patterns their behaviour is as required. | |
1789 | ||
90d05d2d | 1790 | (define_expand "vec_unpacks_lo_<mode>" |
1791 | [(match_operand:<VWIDE> 0 "register_operand" "") | |
1792 | (match_operand:VQ_HSF 1 "register_operand" "")] | |
9f224c46 | 1793 | "TARGET_SIMD" |
90d05d2d | 1794 | { |
1795 | rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, false); | |
1796 | emit_insn (gen_aarch64_simd_vec_unpacks_lo_<mode> (operands[0], | |
1797 | operands[1], p)); | |
1798 | DONE; | |
1799 | } | |
1800 | ) | |
1801 | ||
1802 | (define_insn "aarch64_simd_vec_unpacks_hi_<mode>" | |
1803 | [(set (match_operand:<VWIDE> 0 "register_operand" "=w") | |
1804 | (float_extend:<VWIDE> (vec_select:<VHALF> | |
1805 | (match_operand:VQ_HSF 1 "register_operand" "w") | |
1806 | (match_operand:VQ_HSF 2 "vect_par_cnst_hi_half" "") | |
1807 | )))] | |
1808 | "TARGET_SIMD" | |
1809 | "fcvtl2\\t%0.<Vwtype>, %1.<Vtype>" | |
52e95e58 | 1810 | [(set_attr "type" "neon_fp_cvt_widen_s")] |
9f224c46 | 1811 | ) |
1812 | ||
90d05d2d | 1813 | (define_expand "vec_unpacks_hi_<mode>" |
1814 | [(match_operand:<VWIDE> 0 "register_operand" "") | |
1815 | (match_operand:VQ_HSF 1 "register_operand" "")] | |
1816 | "TARGET_SIMD" | |
1817 | { | |
1818 | rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, true); | |
1819 | emit_insn (gen_aarch64_simd_vec_unpacks_lo_<mode> (operands[0], | |
1820 | operands[1], p)); | |
1821 | DONE; | |
1822 | } | |
1823 | ) | |
1824 | (define_insn "aarch64_float_extend_lo_<Vwide>" | |
1825 | [(set (match_operand:<VWIDE> 0 "register_operand" "=w") | |
1826 | (float_extend:<VWIDE> | |
1827 | (match_operand:VDF 1 "register_operand" "w")))] | |
9f224c46 | 1828 | "TARGET_SIMD" |
90d05d2d | 1829 | "fcvtl\\t%0<Vmwtype>, %1<Vmtype>" |
52e95e58 | 1830 | [(set_attr "type" "neon_fp_cvt_widen_s")] |
9f224c46 | 1831 | ) |
1832 | ||
1833 | ;; Float narrowing operations. | |
1834 | ||
96bf1c67 | 1835 | (define_insn "aarch64_float_truncate_lo_<mode>" |
1836 | [(set (match_operand:VDF 0 "register_operand" "=w") | |
1837 | (float_truncate:VDF | |
1838 | (match_operand:<VWIDE> 1 "register_operand" "w")))] | |
9f224c46 | 1839 | "TARGET_SIMD" |
96bf1c67 | 1840 | "fcvtn\\t%0.<Vtype>, %1<Vmwtype>" |
52e95e58 | 1841 | [(set_attr "type" "neon_fp_cvt_narrow_d_q")] |
9f224c46 | 1842 | ) |
1843 | ||
6e3d7d46 | 1844 | (define_insn "aarch64_float_truncate_hi_<Vdbl>_le" |
96bf1c67 | 1845 | [(set (match_operand:<VDBL> 0 "register_operand" "=w") |
1846 | (vec_concat:<VDBL> | |
1847 | (match_operand:VDF 1 "register_operand" "0") | |
1848 | (float_truncate:VDF | |
1849 | (match_operand:<VWIDE> 2 "register_operand" "w"))))] | |
6e3d7d46 | 1850 | "TARGET_SIMD && !BYTES_BIG_ENDIAN" |
96bf1c67 | 1851 | "fcvtn2\\t%0.<Vdtype>, %2<Vmwtype>" |
52e95e58 | 1852 | [(set_attr "type" "neon_fp_cvt_narrow_d_q")] |
9f224c46 | 1853 | ) |
1854 | ||
6e3d7d46 | 1855 | (define_insn "aarch64_float_truncate_hi_<Vdbl>_be" |
1856 | [(set (match_operand:<VDBL> 0 "register_operand" "=w") | |
1857 | (vec_concat:<VDBL> | |
1858 | (float_truncate:VDF | |
1859 | (match_operand:<VWIDE> 2 "register_operand" "w")) | |
1860 | (match_operand:VDF 1 "register_operand" "0")))] | |
1861 | "TARGET_SIMD && BYTES_BIG_ENDIAN" | |
1862 | "fcvtn2\\t%0.<Vdtype>, %2<Vmwtype>" | |
1863 | [(set_attr "type" "neon_fp_cvt_narrow_d_q")] | |
1864 | ) | |
1865 | ||
1866 | (define_expand "aarch64_float_truncate_hi_<Vdbl>" | |
1867 | [(match_operand:<VDBL> 0 "register_operand" "=w") | |
1868 | (match_operand:VDF 1 "register_operand" "0") | |
1869 | (match_operand:<VWIDE> 2 "register_operand" "w")] | |
1870 | "TARGET_SIMD" | |
1871 | { | |
1872 | rtx (*gen) (rtx, rtx, rtx) = BYTES_BIG_ENDIAN | |
1873 | ? gen_aarch64_float_truncate_hi_<Vdbl>_be | |
1874 | : gen_aarch64_float_truncate_hi_<Vdbl>_le; | |
1875 | emit_insn (gen (operands[0], operands[1], operands[2])); | |
1876 | DONE; | |
1877 | } | |
1878 | ) | |
1879 | ||
9f224c46 | 1880 | (define_expand "vec_pack_trunc_v2df" |
1881 | [(set (match_operand:V4SF 0 "register_operand") | |
1882 | (vec_concat:V4SF | |
1883 | (float_truncate:V2SF | |
1884 | (match_operand:V2DF 1 "register_operand")) | |
1885 | (float_truncate:V2SF | |
1886 | (match_operand:V2DF 2 "register_operand")) | |
1887 | ))] | |
1888 | "TARGET_SIMD" | |
1889 | { | |
1890 | rtx tmp = gen_reg_rtx (V2SFmode); | |
961b4271 | 1891 | int lo = BYTES_BIG_ENDIAN ? 2 : 1; |
1892 | int hi = BYTES_BIG_ENDIAN ? 1 : 2; | |
1893 | ||
1894 | emit_insn (gen_aarch64_float_truncate_lo_v2sf (tmp, operands[lo])); | |
9f224c46 | 1895 | emit_insn (gen_aarch64_float_truncate_hi_v4sf (operands[0], |
961b4271 | 1896 | tmp, operands[hi])); |
9f224c46 | 1897 | DONE; |
1898 | } | |
1899 | ) | |
1900 | ||
1901 | (define_expand "vec_pack_trunc_df" | |
1902 | [(set (match_operand:V2SF 0 "register_operand") | |
1903 | (vec_concat:V2SF | |
1904 | (float_truncate:SF | |
1905 | (match_operand:DF 1 "register_operand")) | |
1906 | (float_truncate:SF | |
1907 | (match_operand:DF 2 "register_operand")) | |
1908 | ))] | |
1909 | "TARGET_SIMD" | |
1910 | { | |
1911 | rtx tmp = gen_reg_rtx (V2SFmode); | |
961b4271 | 1912 | int lo = BYTES_BIG_ENDIAN ? 2 : 1; |
1913 | int hi = BYTES_BIG_ENDIAN ? 1 : 2; | |
1914 | ||
1915 | emit_insn (gen_move_lo_quad_v2df (tmp, operands[lo])); | |
1916 | emit_insn (gen_move_hi_quad_v2df (tmp, operands[hi])); | |
9f224c46 | 1917 | emit_insn (gen_aarch64_float_truncate_lo_v2sf (operands[0], tmp)); |
1918 | DONE; | |
1919 | } | |
1920 | ) | |
1921 | ||
df401d54 | 1922 | (define_insn "aarch64_vmls<mode>" |
1923 | [(set (match_operand:VDQF 0 "register_operand" "=w") | |
1924 | (minus:VDQF (match_operand:VDQF 1 "register_operand" "0") | |
1925 | (mult:VDQF (match_operand:VDQF 2 "register_operand" "w") | |
1926 | (match_operand:VDQF 3 "register_operand" "w"))))] | |
1927 | "TARGET_SIMD" | |
1928 | "fmls\\t%0.<Vtype>, %2.<Vtype>, %3.<Vtype>" | |
52e95e58 | 1929 | [(set_attr "type" "neon_fp_mla_<Vetype>_scalar<q>")] |
df401d54 | 1930 | ) |
1931 | ||
1932 | ;; FP Max/Min | |
1933 | ;; Max/Min are introduced by idiom recognition by GCC's mid-end. An | |
1934 | ;; expression like: | |
1935 | ;; a = (b < c) ? b : c; | |
1936 | ;; is idiom-matched as MIN_EXPR<b,c> only if -ffinite-math-only is enabled | |
1937 | ;; either explicitly or indirectly via -ffast-math. | |
1938 | ;; | |
1939 | ;; MIN_EXPR and MAX_EXPR eventually map to 'smin' and 'smax' in RTL. | |
1940 | ;; The 'smax' and 'smin' RTL standard pattern names do not specify which | |
1941 | ;; operand will be returned when both operands are zero (i.e. they may not | |
1942 | ;; honour signed zeroes), or when either operand is NaN. Therefore GCC | |
1943 | ;; only introduces MIN_EXPR/MAX_EXPR in fast math mode or when not honouring | |
1944 | ;; NaNs. | |
1945 | ||
2520d4b7 | 1946 | (define_insn "<su><maxmin><mode>3" |
df401d54 | 1947 | [(set (match_operand:VDQF 0 "register_operand" "=w") |
2520d4b7 | 1948 | (FMAXMIN:VDQF (match_operand:VDQF 1 "register_operand" "w") |
df401d54 | 1949 | (match_operand:VDQF 2 "register_operand" "w")))] |
1950 | "TARGET_SIMD" | |
2520d4b7 | 1951 | "f<maxmin>nm\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>" |
52e95e58 | 1952 | [(set_attr "type" "neon_fp_minmax_<Vetype><q>")] |
df401d54 | 1953 | ) |
1954 | ||
2520d4b7 | 1955 | (define_insn "<maxmin_uns><mode>3" |
df401d54 | 1956 | [(set (match_operand:VDQF 0 "register_operand" "=w") |
2520d4b7 | 1957 | (unspec:VDQF [(match_operand:VDQF 1 "register_operand" "w") |
1958 | (match_operand:VDQF 2 "register_operand" "w")] | |
1959 | FMAXMIN_UNS))] | |
df401d54 | 1960 | "TARGET_SIMD" |
2520d4b7 | 1961 | "<maxmin_uns_op>\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>" |
52e95e58 | 1962 | [(set_attr "type" "neon_fp_minmax_<Vetype><q>")] |
df401d54 | 1963 | ) |
1964 | ||
04edfb83 | 1965 | ;; Auto-vectorized forms for the IEEE-754 fmax()/fmin() functions |
1966 | (define_insn "<fmaxmin><mode>3" | |
1967 | [(set (match_operand:VDQF 0 "register_operand" "=w") | |
1968 | (unspec:VDQF [(match_operand:VDQF 1 "register_operand" "w") | |
1969 | (match_operand:VDQF 2 "register_operand" "w")] | |
1970 | FMAXMIN))] | |
1971 | "TARGET_SIMD" | |
1972 | "<fmaxmin_op>\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>" | |
1973 | [(set_attr "type" "neon_fp_minmax_<Vetype><q>")] | |
1974 | ) | |
1975 | ||
4491e251 | 1976 | ;; 'across lanes' add. |
df401d54 | 1977 | |
06329b36 | 1978 | (define_expand "reduc_plus_scal_<mode>" |
1979 | [(match_operand:<VEL> 0 "register_operand" "=w") | |
658fa7f6 | 1980 | (unspec:VDQ_I [(match_operand:VDQ_I 1 "register_operand" "w")] |
06329b36 | 1981 | UNSPEC_ADDV)] |
1982 | "TARGET_SIMD" | |
1983 | { | |
1984 | rtx elt = GEN_INT (ENDIAN_LANE_N (<MODE>mode, 0)); | |
1985 | rtx scratch = gen_reg_rtx (<MODE>mode); | |
1986 | emit_insn (gen_aarch64_reduc_plus_internal<mode> (scratch, operands[1])); | |
1987 | emit_insn (gen_aarch64_get_lane<mode> (operands[0], scratch, elt)); | |
1988 | DONE; | |
1989 | } | |
1990 | ) | |
1991 | ||
1992 | (define_expand "reduc_plus_scal_<mode>" | |
1993 | [(match_operand:<VEL> 0 "register_operand" "=w") | |
1994 | (match_operand:V2F 1 "register_operand" "w")] | |
1995 | "TARGET_SIMD" | |
1996 | { | |
1997 | rtx elt = GEN_INT (ENDIAN_LANE_N (<MODE>mode, 0)); | |
1998 | rtx scratch = gen_reg_rtx (<MODE>mode); | |
1999 | emit_insn (gen_aarch64_reduc_plus_internal<mode> (scratch, operands[1])); | |
2000 | emit_insn (gen_aarch64_get_lane<mode> (operands[0], scratch, elt)); | |
2001 | DONE; | |
2002 | } | |
2003 | ) | |
2004 | ||
2005 | (define_insn "aarch64_reduc_plus_internal<mode>" | |
df401d54 | 2006 | [(set (match_operand:VDQV 0 "register_operand" "=w") |
2007 | (unspec:VDQV [(match_operand:VDQV 1 "register_operand" "w")] | |
06329b36 | 2008 | UNSPEC_ADDV))] |
df401d54 | 2009 | "TARGET_SIMD" |
63e88a0d | 2010 | "add<VDQV:vp>\\t%<Vetype>0, %1.<Vtype>" |
52e95e58 | 2011 | [(set_attr "type" "neon_reduc_add<q>")] |
df401d54 | 2012 | ) |
2013 | ||
06329b36 | 2014 | (define_insn "aarch64_reduc_plus_internalv2si" |
df401d54 | 2015 | [(set (match_operand:V2SI 0 "register_operand" "=w") |
2016 | (unspec:V2SI [(match_operand:V2SI 1 "register_operand" "w")] | |
06329b36 | 2017 | UNSPEC_ADDV))] |
df401d54 | 2018 | "TARGET_SIMD" |
2019 | "addp\\t%0.2s, %1.2s, %1.2s" | |
52e95e58 | 2020 | [(set_attr "type" "neon_reduc_add")] |
df401d54 | 2021 | ) |
2022 | ||
06329b36 | 2023 | (define_insn "aarch64_reduc_plus_internal<mode>" |
4491e251 | 2024 | [(set (match_operand:V2F 0 "register_operand" "=w") |
2025 | (unspec:V2F [(match_operand:V2F 1 "register_operand" "w")] | |
63e88a0d | 2026 | UNSPEC_FADDV))] |
df401d54 | 2027 | "TARGET_SIMD" |
4491e251 | 2028 | "faddp\\t%<Vetype>0, %1.<Vtype>" |
52e95e58 | 2029 | [(set_attr "type" "neon_fp_reduc_add_<Vetype><q>")] |
df401d54 | 2030 | ) |
2031 | ||
4491e251 | 2032 | (define_insn "aarch64_addpv4sf" |
2033 | [(set (match_operand:V4SF 0 "register_operand" "=w") | |
2034 | (unspec:V4SF [(match_operand:V4SF 1 "register_operand" "w")] | |
2035 | UNSPEC_FADDV))] | |
df401d54 | 2036 | "TARGET_SIMD" |
4491e251 | 2037 | "faddp\\t%0.4s, %1.4s, %1.4s" |
52e95e58 | 2038 | [(set_attr "type" "neon_fp_reduc_add_s_q")] |
df401d54 | 2039 | ) |
2040 | ||
06329b36 | 2041 | (define_expand "reduc_plus_scal_v4sf" |
2042 | [(set (match_operand:SF 0 "register_operand") | |
4491e251 | 2043 | (unspec:V4SF [(match_operand:V4SF 1 "register_operand")] |
63e88a0d | 2044 | UNSPEC_FADDV))] |
4491e251 | 2045 | "TARGET_SIMD" |
2046 | { | |
06329b36 | 2047 | rtx elt = GEN_INT (ENDIAN_LANE_N (V4SFmode, 0)); |
2048 | rtx scratch = gen_reg_rtx (V4SFmode); | |
2049 | emit_insn (gen_aarch64_addpv4sf (scratch, operands[1])); | |
2050 | emit_insn (gen_aarch64_addpv4sf (scratch, scratch)); | |
2051 | emit_insn (gen_aarch64_get_lanev4sf (operands[0], scratch, elt)); | |
4491e251 | 2052 | DONE; |
2053 | }) | |
2054 | ||
c10c436e | 2055 | (define_insn "clrsb<mode>2" |
2056 | [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w") | |
2057 | (clrsb:VDQ_BHSI (match_operand:VDQ_BHSI 1 "register_operand" "w")))] | |
2058 | "TARGET_SIMD" | |
2059 | "cls\\t%0.<Vtype>, %1.<Vtype>" | |
2060 | [(set_attr "type" "neon_cls<q>")] | |
2061 | ) | |
2062 | ||
9cacc1ad | 2063 | (define_insn "clz<mode>2" |
2064 | [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w") | |
2065 | (clz:VDQ_BHSI (match_operand:VDQ_BHSI 1 "register_operand" "w")))] | |
2066 | "TARGET_SIMD" | |
2067 | "clz\\t%0.<Vtype>, %1.<Vtype>" | |
52e95e58 | 2068 | [(set_attr "type" "neon_cls<q>")] |
9cacc1ad | 2069 | ) |
2070 | ||
c10c436e | 2071 | (define_insn "popcount<mode>2" |
2072 | [(set (match_operand:VB 0 "register_operand" "=w") | |
2073 | (popcount:VB (match_operand:VB 1 "register_operand" "w")))] | |
2074 | "TARGET_SIMD" | |
2075 | "cnt\\t%0.<Vbtype>, %1.<Vbtype>" | |
2076 | [(set_attr "type" "neon_cnt<q>")] | |
2077 | ) | |
2078 | ||
2520d4b7 | 2079 | ;; 'across lanes' max and min ops. |
2080 | ||
89debc6a | 2081 | ;; Template for outputting a scalar, so we can create __builtins which can be |
2082 | ;; gimple_fold'd to the REDUC_(MAX|MIN)_EXPR tree code. (This is FP smax/smin). | |
2083 | (define_expand "reduc_<maxmin_uns>_scal_<mode>" | |
2084 | [(match_operand:<VEL> 0 "register_operand") | |
2085 | (unspec:VDQF [(match_operand:VDQF 1 "register_operand")] | |
2086 | FMAXMINV)] | |
2087 | "TARGET_SIMD" | |
2088 | { | |
2089 | rtx elt = GEN_INT (ENDIAN_LANE_N (<MODE>mode, 0)); | |
2090 | rtx scratch = gen_reg_rtx (<MODE>mode); | |
2091 | emit_insn (gen_aarch64_reduc_<maxmin_uns>_internal<mode> (scratch, | |
2092 | operands[1])); | |
2093 | emit_insn (gen_aarch64_get_lane<mode> (operands[0], scratch, elt)); | |
2094 | DONE; | |
2095 | } | |
2096 | ) | |
2097 | ||
2098 | ;; Likewise for integer cases, signed and unsigned. | |
2099 | (define_expand "reduc_<maxmin_uns>_scal_<mode>" | |
2100 | [(match_operand:<VEL> 0 "register_operand") | |
2101 | (unspec:VDQ_BHSI [(match_operand:VDQ_BHSI 1 "register_operand")] | |
2102 | MAXMINV)] | |
2103 | "TARGET_SIMD" | |
2104 | { | |
2105 | rtx elt = GEN_INT (ENDIAN_LANE_N (<MODE>mode, 0)); | |
2106 | rtx scratch = gen_reg_rtx (<MODE>mode); | |
2107 | emit_insn (gen_aarch64_reduc_<maxmin_uns>_internal<mode> (scratch, | |
2108 | operands[1])); | |
2109 | emit_insn (gen_aarch64_get_lane<mode> (operands[0], scratch, elt)); | |
2110 | DONE; | |
2111 | } | |
2112 | ) | |
2113 | ||
2114 | (define_insn "aarch64_reduc_<maxmin_uns>_internal<mode>" | |
63e88a0d | 2115 | [(set (match_operand:VDQV_S 0 "register_operand" "=w") |
2116 | (unspec:VDQV_S [(match_operand:VDQV_S 1 "register_operand" "w")] | |
df401d54 | 2117 | MAXMINV))] |
2118 | "TARGET_SIMD" | |
2520d4b7 | 2119 | "<maxmin_uns_op>v\\t%<Vetype>0, %1.<Vtype>" |
52e95e58 | 2120 | [(set_attr "type" "neon_reduc_minmax<q>")] |
df401d54 | 2121 | ) |
2122 | ||
89debc6a | 2123 | (define_insn "aarch64_reduc_<maxmin_uns>_internalv2si" |
df401d54 | 2124 | [(set (match_operand:V2SI 0 "register_operand" "=w") |
2125 | (unspec:V2SI [(match_operand:V2SI 1 "register_operand" "w")] | |
2126 | MAXMINV))] | |
2127 | "TARGET_SIMD" | |
2520d4b7 | 2128 | "<maxmin_uns_op>p\\t%0.2s, %1.2s, %1.2s" |
52e95e58 | 2129 | [(set_attr "type" "neon_reduc_minmax")] |
df401d54 | 2130 | ) |
2131 | ||
89debc6a | 2132 | (define_insn "aarch64_reduc_<maxmin_uns>_internal<mode>" |
2133 | [(set (match_operand:VDQF 0 "register_operand" "=w") | |
2134 | (unspec:VDQF [(match_operand:VDQF 1 "register_operand" "w")] | |
2520d4b7 | 2135 | FMAXMINV))] |
2136 | "TARGET_SIMD" | |
89debc6a | 2137 | "<maxmin_uns_op><vp>\\t%<Vetype>0, %1.<Vtype>" |
52e95e58 | 2138 | [(set_attr "type" "neon_fp_reduc_minmax_<Vetype><q>")] |
2520d4b7 | 2139 | ) |
2140 | ||
0a627f67 | 2141 | ;; aarch64_simd_bsl may compile to any of bsl/bif/bit depending on register |
2142 | ;; allocation. | |
2143 | ;; Operand 1 is the mask, operands 2 and 3 are the bitfields from which | |
2144 | ;; to select. | |
2145 | ;; | |
2146 | ;; Thus our BSL is of the form: | |
2147 | ;; op0 = bsl (mask, op2, op3) | |
7b93616c | 2148 | ;; We can use any of: |
0a627f67 | 2149 | ;; |
2150 | ;; if (op0 = mask) | |
2151 | ;; bsl mask, op1, op2 | |
2152 | ;; if (op0 = op1) (so 1-bits in mask choose bits from op2, else op0) | |
2153 | ;; bit op0, op2, mask | |
2154 | ;; if (op0 = op2) (so 0-bits in mask choose bits from op1, else op0) | |
2155 | ;; bif op0, op1, mask | |
5ca941ee | 2156 | ;; |
2157 | ;; This pattern is expanded to by the aarch64_simd_bsl<mode> expander. | |
2158 | ;; Some forms of straight-line code may generate the equivalent form | |
2159 | ;; in *aarch64_simd_bsl<mode>_alt. | |
7b93616c | 2160 | |
2161 | (define_insn "aarch64_simd_bsl<mode>_internal" | |
de661a1a | 2162 | [(set (match_operand:VSDQ_I_DI 0 "register_operand" "=w,w,w") |
2163 | (xor:VSDQ_I_DI | |
c4d0de0b | 2164 | (and:VSDQ_I_DI |
de661a1a | 2165 | (xor:VSDQ_I_DI |
2166 | (match_operand:<V_cmp_result> 3 "register_operand" "w,0,w") | |
2167 | (match_operand:VSDQ_I_DI 2 "register_operand" "w,w,0")) | |
2168 | (match_operand:VSDQ_I_DI 1 "register_operand" "0,w,w")) | |
2169 | (match_dup:<V_cmp_result> 3) | |
0a627f67 | 2170 | ))] |
7b93616c | 2171 | "TARGET_SIMD" |
2172 | "@ | |
2173 | bsl\\t%0.<Vbtype>, %2.<Vbtype>, %3.<Vbtype> | |
2174 | bit\\t%0.<Vbtype>, %2.<Vbtype>, %1.<Vbtype> | |
2175 | bif\\t%0.<Vbtype>, %3.<Vbtype>, %1.<Vbtype>" | |
52e95e58 | 2176 | [(set_attr "type" "neon_bsl<q>")] |
7b93616c | 2177 | ) |
2178 | ||
5ca941ee | 2179 | ;; We need this form in addition to the above pattern to match the case |
2180 | ;; when combine tries merging three insns such that the second operand of | |
2181 | ;; the outer XOR matches the second operand of the inner XOR rather than | |
2182 | ;; the first. The two are equivalent but since recog doesn't try all | |
2183 | ;; permutations of commutative operations, we have to have a separate pattern. | |
2184 | ||
2185 | (define_insn "*aarch64_simd_bsl<mode>_alt" | |
2186 | [(set (match_operand:VSDQ_I_DI 0 "register_operand" "=w,w,w") | |
2187 | (xor:VSDQ_I_DI | |
2188 | (and:VSDQ_I_DI | |
2189 | (xor:VSDQ_I_DI | |
2190 | (match_operand:VSDQ_I_DI 3 "register_operand" "w,w,0") | |
2191 | (match_operand:VSDQ_I_DI 2 "register_operand" "w,0,w")) | |
2192 | (match_operand:VSDQ_I_DI 1 "register_operand" "0,w,w")) | |
2193 | (match_dup:VSDQ_I_DI 2)))] | |
2194 | "TARGET_SIMD" | |
2195 | "@ | |
2196 | bsl\\t%0.<Vbtype>, %3.<Vbtype>, %2.<Vbtype> | |
2197 | bit\\t%0.<Vbtype>, %3.<Vbtype>, %1.<Vbtype> | |
2198 | bif\\t%0.<Vbtype>, %2.<Vbtype>, %1.<Vbtype>" | |
2199 | [(set_attr "type" "neon_bsl<q>")] | |
2200 | ) | |
2201 | ||
7b93616c | 2202 | (define_expand "aarch64_simd_bsl<mode>" |
408987fb | 2203 | [(match_operand:VALLDIF 0 "register_operand") |
0a627f67 | 2204 | (match_operand:<V_cmp_result> 1 "register_operand") |
408987fb | 2205 | (match_operand:VALLDIF 2 "register_operand") |
2206 | (match_operand:VALLDIF 3 "register_operand")] | |
0a627f67 | 2207 | "TARGET_SIMD" |
7b93616c | 2208 | { |
2209 | /* We can't alias operands together if they have different modes. */ | |
c4d0de0b | 2210 | rtx tmp = operands[0]; |
2211 | if (FLOAT_MODE_P (<MODE>mode)) | |
2212 | { | |
2213 | operands[2] = gen_lowpart (<V_cmp_result>mode, operands[2]); | |
2214 | operands[3] = gen_lowpart (<V_cmp_result>mode, operands[3]); | |
2215 | tmp = gen_reg_rtx (<V_cmp_result>mode); | |
2216 | } | |
1eb525c3 | 2217 | operands[1] = gen_lowpart (<V_cmp_result>mode, operands[1]); |
c4d0de0b | 2218 | emit_insn (gen_aarch64_simd_bsl<v_cmp_result>_internal (tmp, |
2219 | operands[1], | |
2220 | operands[2], | |
2221 | operands[3])); | |
2222 | if (tmp != operands[0]) | |
2223 | emit_move_insn (operands[0], gen_lowpart (<MODE>mode, tmp)); | |
2224 | ||
0a627f67 | 2225 | DONE; |
7b93616c | 2226 | }) |
2227 | ||
2fcc3fb4 | 2228 | (define_expand "aarch64_vcond_internal<mode><mode>" |
d81e9009 | 2229 | [(set (match_operand:VSDQ_I_DI 0 "register_operand") |
2230 | (if_then_else:VSDQ_I_DI | |
7b93616c | 2231 | (match_operator 3 "comparison_operator" |
d81e9009 | 2232 | [(match_operand:VSDQ_I_DI 4 "register_operand") |
2233 | (match_operand:VSDQ_I_DI 5 "nonmemory_operand")]) | |
2234 | (match_operand:VSDQ_I_DI 1 "nonmemory_operand") | |
2235 | (match_operand:VSDQ_I_DI 2 "nonmemory_operand")))] | |
7b93616c | 2236 | "TARGET_SIMD" |
2237 | { | |
a793ba7d | 2238 | rtx op1 = operands[1]; |
2239 | rtx op2 = operands[2]; | |
7b93616c | 2240 | rtx mask = gen_reg_rtx (<MODE>mode); |
bead021f | 2241 | enum rtx_code code = GET_CODE (operands[3]); |
2242 | ||
2243 | /* Switching OP1 and OP2 is necessary for NE (to output a cmeq insn), | |
2244 | and desirable for other comparisons if it results in FOO ? -1 : 0 | |
2245 | (this allows direct use of the comparison result without a bsl). */ | |
2246 | if (code == NE | |
2247 | || (code != EQ | |
2248 | && op1 == CONST0_RTX (<V_cmp_result>mode) | |
2249 | && op2 == CONSTM1_RTX (<V_cmp_result>mode))) | |
2250 | { | |
2251 | op1 = operands[2]; | |
2252 | op2 = operands[1]; | |
2253 | switch (code) | |
2254 | { | |
2255 | case LE: code = GT; break; | |
2256 | case LT: code = GE; break; | |
2257 | case GE: code = LT; break; | |
2258 | case GT: code = LE; break; | |
2259 | /* No case EQ. */ | |
2260 | case NE: code = EQ; break; | |
2261 | case LTU: code = GEU; break; | |
2262 | case LEU: code = GTU; break; | |
2263 | case GTU: code = LEU; break; | |
2264 | case GEU: code = LTU; break; | |
2265 | default: gcc_unreachable (); | |
2266 | } | |
2267 | } | |
7b93616c | 2268 | |
bead021f | 2269 | /* Make sure we can handle the last operand. */ |
2270 | switch (code) | |
7b93616c | 2271 | { |
bead021f | 2272 | case NE: |
2273 | /* Normalized to EQ above. */ | |
2274 | gcc_unreachable (); | |
2275 | ||
7b93616c | 2276 | case LE: |
2277 | case LT: | |
7b93616c | 2278 | case GE: |
2279 | case GT: | |
2280 | case EQ: | |
bead021f | 2281 | /* These instructions have a form taking an immediate zero. */ |
2282 | if (operands[5] == CONST0_RTX (<MODE>mode)) | |
2283 | break; | |
2284 | /* Fall through, as may need to load into register. */ | |
7b93616c | 2285 | default: |
bead021f | 2286 | if (!REG_P (operands[5])) |
2287 | operands[5] = force_reg (<MODE>mode, operands[5]); | |
7b93616c | 2288 | break; |
2289 | } | |
2290 | ||
bead021f | 2291 | switch (code) |
7b93616c | 2292 | { |
2293 | case LT: | |
bead021f | 2294 | emit_insn (gen_aarch64_cmlt<mode> (mask, operands[4], operands[5])); |
2295 | break; | |
2296 | ||
7b93616c | 2297 | case GE: |
2298 | emit_insn (gen_aarch64_cmge<mode> (mask, operands[4], operands[5])); | |
2299 | break; | |
2300 | ||
2301 | case LE: | |
bead021f | 2302 | emit_insn (gen_aarch64_cmle<mode> (mask, operands[4], operands[5])); |
2303 | break; | |
2304 | ||
7b93616c | 2305 | case GT: |
2306 | emit_insn (gen_aarch64_cmgt<mode> (mask, operands[4], operands[5])); | |
2307 | break; | |
2308 | ||
2309 | case LTU: | |
bead021f | 2310 | emit_insn (gen_aarch64_cmgtu<mode> (mask, operands[5], operands[4])); |
2311 | break; | |
2312 | ||
7b93616c | 2313 | case GEU: |
6ba1316b | 2314 | emit_insn (gen_aarch64_cmgeu<mode> (mask, operands[4], operands[5])); |
7b93616c | 2315 | break; |
2316 | ||
2317 | case LEU: | |
bead021f | 2318 | emit_insn (gen_aarch64_cmgeu<mode> (mask, operands[5], operands[4])); |
2319 | break; | |
2320 | ||
7b93616c | 2321 | case GTU: |
6ba1316b | 2322 | emit_insn (gen_aarch64_cmgtu<mode> (mask, operands[4], operands[5])); |
7b93616c | 2323 | break; |
2324 | ||
bead021f | 2325 | /* NE has been normalized to EQ above. */ |
7b93616c | 2326 | case EQ: |
2327 | emit_insn (gen_aarch64_cmeq<mode> (mask, operands[4], operands[5])); | |
2328 | break; | |
2329 | ||
2330 | default: | |
2331 | gcc_unreachable (); | |
2332 | } | |
2333 | ||
a793ba7d | 2334 | /* If we have (a = (b CMP c) ? -1 : 0); |
2335 | Then we can simply move the generated mask. */ | |
2336 | ||
2337 | if (op1 == CONSTM1_RTX (<V_cmp_result>mode) | |
2338 | && op2 == CONST0_RTX (<V_cmp_result>mode)) | |
2339 | emit_move_insn (operands[0], mask); | |
2340 | else | |
2341 | { | |
2342 | if (!REG_P (op1)) | |
2343 | op1 = force_reg (<MODE>mode, op1); | |
2344 | if (!REG_P (op2)) | |
2345 | op2 = force_reg (<MODE>mode, op2); | |
2346 | emit_insn (gen_aarch64_simd_bsl<mode> (operands[0], mask, | |
2347 | op1, op2)); | |
2348 | } | |
7b93616c | 2349 | |
2350 | DONE; | |
2351 | }) | |
2352 | ||
2fcc3fb4 | 2353 | (define_expand "aarch64_vcond_internal<VDQF_COND:mode><VDQF:mode>" |
2354 | [(set (match_operand:VDQF_COND 0 "register_operand") | |
1eb525c3 | 2355 | (if_then_else:VDQF |
2356 | (match_operator 3 "comparison_operator" | |
2357 | [(match_operand:VDQF 4 "register_operand") | |
2358 | (match_operand:VDQF 5 "nonmemory_operand")]) | |
2fcc3fb4 | 2359 | (match_operand:VDQF_COND 1 "nonmemory_operand") |
2360 | (match_operand:VDQF_COND 2 "nonmemory_operand")))] | |
1eb525c3 | 2361 | "TARGET_SIMD" |
2362 | { | |
2363 | int inverse = 0; | |
3b11c427 | 2364 | int use_zero_form = 0; |
15953bb4 | 2365 | int swap_bsl_operands = 0; |
a793ba7d | 2366 | rtx op1 = operands[1]; |
2367 | rtx op2 = operands[2]; | |
2fcc3fb4 | 2368 | rtx mask = gen_reg_rtx (<VDQF_COND:V_cmp_result>mode); |
2369 | rtx tmp = gen_reg_rtx (<VDQF_COND:V_cmp_result>mode); | |
1eb525c3 | 2370 | |
15953bb4 | 2371 | rtx (*base_comparison) (rtx, rtx, rtx); |
2372 | rtx (*complimentary_comparison) (rtx, rtx, rtx); | |
2373 | ||
2374 | switch (GET_CODE (operands[3])) | |
2375 | { | |
2376 | case GE: | |
3b11c427 | 2377 | case GT: |
15953bb4 | 2378 | case LE: |
3b11c427 | 2379 | case LT: |
15953bb4 | 2380 | case EQ: |
3b11c427 | 2381 | if (operands[5] == CONST0_RTX (<MODE>mode)) |
2382 | { | |
2383 | use_zero_form = 1; | |
2384 | break; | |
2385 | } | |
2386 | /* Fall through. */ | |
15953bb4 | 2387 | default: |
2388 | if (!REG_P (operands[5])) | |
2fcc3fb4 | 2389 | operands[5] = force_reg (<VDQF:MODE>mode, operands[5]); |
15953bb4 | 2390 | } |
1eb525c3 | 2391 | |
2392 | switch (GET_CODE (operands[3])) | |
2393 | { | |
2394 | case LT: | |
15953bb4 | 2395 | case UNLT: |
1eb525c3 | 2396 | inverse = 1; |
2397 | /* Fall through. */ | |
2398 | case GE: | |
15953bb4 | 2399 | case UNGE: |
2400 | case ORDERED: | |
2401 | case UNORDERED: | |
2fcc3fb4 | 2402 | base_comparison = gen_aarch64_cmge<VDQF:mode>; |
2403 | complimentary_comparison = gen_aarch64_cmgt<VDQF:mode>; | |
1eb525c3 | 2404 | break; |
2405 | case LE: | |
15953bb4 | 2406 | case UNLE: |
1eb525c3 | 2407 | inverse = 1; |
2408 | /* Fall through. */ | |
2409 | case GT: | |
15953bb4 | 2410 | case UNGT: |
2fcc3fb4 | 2411 | base_comparison = gen_aarch64_cmgt<VDQF:mode>; |
2412 | complimentary_comparison = gen_aarch64_cmge<VDQF:mode>; | |
1eb525c3 | 2413 | break; |
15953bb4 | 2414 | case EQ: |
1eb525c3 | 2415 | case NE: |
15953bb4 | 2416 | case UNEQ: |
2fcc3fb4 | 2417 | base_comparison = gen_aarch64_cmeq<VDQF:mode>; |
2418 | complimentary_comparison = gen_aarch64_cmeq<VDQF:mode>; | |
15953bb4 | 2419 | break; |
2420 | default: | |
2421 | gcc_unreachable (); | |
2422 | } | |
2423 | ||
2424 | switch (GET_CODE (operands[3])) | |
2425 | { | |
2426 | case LT: | |
2427 | case LE: | |
2428 | case GT: | |
2429 | case GE: | |
1eb525c3 | 2430 | case EQ: |
15953bb4 | 2431 | /* The easy case. Here we emit one of FCMGE, FCMGT or FCMEQ. |
2432 | As a LT b <=> b GE a && a LE b <=> b GT a. Our transformations are: | |
2433 | a GE b -> a GE b | |
2434 | a GT b -> a GT b | |
2435 | a LE b -> b GE a | |
2436 | a LT b -> b GT a | |
3b11c427 | 2437 | a EQ b -> a EQ b |
2438 | Note that there also exist direct comparison against 0 forms, | |
2439 | so catch those as a special case. */ | |
2440 | if (use_zero_form) | |
2441 | { | |
2442 | inverse = 0; | |
2443 | switch (GET_CODE (operands[3])) | |
2444 | { | |
2445 | case LT: | |
2fcc3fb4 | 2446 | base_comparison = gen_aarch64_cmlt<VDQF:mode>; |
3b11c427 | 2447 | break; |
2448 | case LE: | |
2fcc3fb4 | 2449 | base_comparison = gen_aarch64_cmle<VDQF:mode>; |
3b11c427 | 2450 | break; |
2451 | default: | |
2452 | /* Do nothing, other zero form cases already have the correct | |
2453 | base_comparison. */ | |
2454 | break; | |
2455 | } | |
2456 | } | |
15953bb4 | 2457 | |
2458 | if (!inverse) | |
2459 | emit_insn (base_comparison (mask, operands[4], operands[5])); | |
2460 | else | |
2461 | emit_insn (complimentary_comparison (mask, operands[5], operands[4])); | |
2462 | break; | |
2463 | case UNLT: | |
2464 | case UNLE: | |
2465 | case UNGT: | |
2466 | case UNGE: | |
2467 | case NE: | |
2468 | /* FCM returns false for lanes which are unordered, so if we use | |
2469 | the inverse of the comparison we actually want to emit, then | |
2470 | swap the operands to BSL, we will end up with the correct result. | |
2471 | Note that a NE NaN and NaN NE b are true for all a, b. | |
2472 | ||
2473 | Our transformations are: | |
2474 | a GE b -> !(b GT a) | |
2475 | a GT b -> !(b GE a) | |
2476 | a LE b -> !(a GT b) | |
2477 | a LT b -> !(a GE b) | |
2478 | a NE b -> !(a EQ b) */ | |
2479 | ||
2480 | if (inverse) | |
2481 | emit_insn (base_comparison (mask, operands[4], operands[5])); | |
2482 | else | |
2483 | emit_insn (complimentary_comparison (mask, operands[5], operands[4])); | |
2484 | ||
2485 | swap_bsl_operands = 1; | |
2486 | break; | |
2487 | case UNEQ: | |
2488 | /* We check (a > b || b > a). combining these comparisons give us | |
2489 | true iff !(a != b && a ORDERED b), swapping the operands to BSL | |
2490 | will then give us (a == b || a UNORDERED b) as intended. */ | |
2491 | ||
2fcc3fb4 | 2492 | emit_insn (gen_aarch64_cmgt<VDQF:mode> (mask, operands[4], operands[5])); |
2493 | emit_insn (gen_aarch64_cmgt<VDQF:mode> (tmp, operands[5], operands[4])); | |
2494 | emit_insn (gen_ior<VDQF_COND:v_cmp_result>3 (mask, mask, tmp)); | |
15953bb4 | 2495 | swap_bsl_operands = 1; |
2496 | break; | |
2497 | case UNORDERED: | |
2498 | /* Operands are ORDERED iff (a > b || b >= a). | |
2499 | Swapping the operands to BSL will give the UNORDERED case. */ | |
2500 | swap_bsl_operands = 1; | |
2501 | /* Fall through. */ | |
2502 | case ORDERED: | |
2fcc3fb4 | 2503 | emit_insn (gen_aarch64_cmgt<VDQF:mode> (tmp, operands[4], operands[5])); |
2504 | emit_insn (gen_aarch64_cmge<VDQF:mode> (mask, operands[5], operands[4])); | |
2505 | emit_insn (gen_ior<VDQF_COND:v_cmp_result>3 (mask, mask, tmp)); | |
1eb525c3 | 2506 | break; |
2507 | default: | |
2508 | gcc_unreachable (); | |
2509 | } | |
2510 | ||
15953bb4 | 2511 | if (swap_bsl_operands) |
a793ba7d | 2512 | { |
2513 | op1 = operands[2]; | |
2514 | op2 = operands[1]; | |
2515 | } | |
2516 | ||
2517 | /* If we have (a = (b CMP c) ? -1 : 0); | |
2518 | Then we can simply move the generated mask. */ | |
2519 | ||
2fcc3fb4 | 2520 | if (op1 == CONSTM1_RTX (<VDQF_COND:V_cmp_result>mode) |
2521 | && op2 == CONST0_RTX (<VDQF_COND:V_cmp_result>mode)) | |
a793ba7d | 2522 | emit_move_insn (operands[0], mask); |
2523 | else | |
2524 | { | |
2525 | if (!REG_P (op1)) | |
2fcc3fb4 | 2526 | op1 = force_reg (<VDQF_COND:MODE>mode, op1); |
a793ba7d | 2527 | if (!REG_P (op2)) |
2fcc3fb4 | 2528 | op2 = force_reg (<VDQF_COND:MODE>mode, op2); |
2529 | emit_insn (gen_aarch64_simd_bsl<VDQF_COND:mode> (operands[0], mask, | |
a793ba7d | 2530 | op1, op2)); |
2531 | } | |
2532 | ||
1eb525c3 | 2533 | DONE; |
2534 | }) | |
2535 | ||
7b93616c | 2536 | (define_expand "vcond<mode><mode>" |
d81e9009 | 2537 | [(set (match_operand:VALLDI 0 "register_operand") |
2538 | (if_then_else:VALLDI | |
7b93616c | 2539 | (match_operator 3 "comparison_operator" |
d81e9009 | 2540 | [(match_operand:VALLDI 4 "register_operand") |
2541 | (match_operand:VALLDI 5 "nonmemory_operand")]) | |
2542 | (match_operand:VALLDI 1 "nonmemory_operand") | |
2543 | (match_operand:VALLDI 2 "nonmemory_operand")))] | |
7b93616c | 2544 | "TARGET_SIMD" |
2545 | { | |
2fcc3fb4 | 2546 | emit_insn (gen_aarch64_vcond_internal<mode><mode> (operands[0], operands[1], |
7b93616c | 2547 | operands[2], operands[3], |
2548 | operands[4], operands[5])); | |
2549 | DONE; | |
2550 | }) | |
2551 | ||
a793ba7d | 2552 | (define_expand "vcond<v_cmp_result><mode>" |
2553 | [(set (match_operand:<V_cmp_result> 0 "register_operand") | |
2554 | (if_then_else:<V_cmp_result> | |
2555 | (match_operator 3 "comparison_operator" | |
2556 | [(match_operand:VDQF 4 "register_operand") | |
2557 | (match_operand:VDQF 5 "nonmemory_operand")]) | |
2558 | (match_operand:<V_cmp_result> 1 "nonmemory_operand") | |
2559 | (match_operand:<V_cmp_result> 2 "nonmemory_operand")))] | |
2560 | "TARGET_SIMD" | |
2561 | { | |
2fcc3fb4 | 2562 | emit_insn (gen_aarch64_vcond_internal<v_cmp_result><mode> ( |
a793ba7d | 2563 | operands[0], operands[1], |
2564 | operands[2], operands[3], | |
2565 | operands[4], operands[5])); | |
2566 | DONE; | |
2567 | }) | |
7b93616c | 2568 | |
2569 | (define_expand "vcondu<mode><mode>" | |
d81e9009 | 2570 | [(set (match_operand:VSDQ_I_DI 0 "register_operand") |
2571 | (if_then_else:VSDQ_I_DI | |
7b93616c | 2572 | (match_operator 3 "comparison_operator" |
d81e9009 | 2573 | [(match_operand:VSDQ_I_DI 4 "register_operand") |
2574 | (match_operand:VSDQ_I_DI 5 "nonmemory_operand")]) | |
2575 | (match_operand:VSDQ_I_DI 1 "nonmemory_operand") | |
2576 | (match_operand:VSDQ_I_DI 2 "nonmemory_operand")))] | |
7b93616c | 2577 | "TARGET_SIMD" |
2578 | { | |
2fcc3fb4 | 2579 | emit_insn (gen_aarch64_vcond_internal<mode><mode> (operands[0], operands[1], |
7b93616c | 2580 | operands[2], operands[3], |
2581 | operands[4], operands[5])); | |
2582 | DONE; | |
2583 | }) | |
2584 | ||
df401d54 | 2585 | ;; Patterns for AArch64 SIMD Intrinsics. |
2586 | ||
5e7bd7ac | 2587 | ;; Lane extraction with sign extension to general purpose register. |
2588 | (define_insn "*aarch64_get_lane_extend<GPI:mode><VDQQH:mode>" | |
2589 | [(set (match_operand:GPI 0 "register_operand" "=r") | |
2590 | (sign_extend:GPI | |
df401d54 | 2591 | (vec_select:<VEL> |
5e7bd7ac | 2592 | (match_operand:VDQQH 1 "register_operand" "w") |
df401d54 | 2593 | (parallel [(match_operand:SI 2 "immediate_operand" "i")]))))] |
2594 | "TARGET_SIMD" | |
56234646 | 2595 | { |
2596 | operands[2] = GEN_INT (ENDIAN_LANE_N (<MODE>mode, INTVAL (operands[2]))); | |
2597 | return "smov\\t%<GPI:w>0, %1.<VDQQH:Vetype>[%2]"; | |
2598 | } | |
52e95e58 | 2599 | [(set_attr "type" "neon_to_gp<q>")] |
df401d54 | 2600 | ) |
2601 | ||
5e7bd7ac | 2602 | (define_insn "*aarch64_get_lane_zero_extendsi<mode>" |
2603 | [(set (match_operand:SI 0 "register_operand" "=r") | |
2604 | (zero_extend:SI | |
df401d54 | 2605 | (vec_select:<VEL> |
5e7bd7ac | 2606 | (match_operand:VDQQH 1 "register_operand" "w") |
df401d54 | 2607 | (parallel [(match_operand:SI 2 "immediate_operand" "i")]))))] |
2608 | "TARGET_SIMD" | |
56234646 | 2609 | { |
2610 | operands[2] = GEN_INT (ENDIAN_LANE_N (<MODE>mode, INTVAL (operands[2]))); | |
2611 | return "umov\\t%w0, %1.<Vetype>[%2]"; | |
2612 | } | |
52e95e58 | 2613 | [(set_attr "type" "neon_to_gp<q>")] |
df401d54 | 2614 | ) |
2615 | ||
5e7bd7ac | 2616 | ;; Lane extraction of a value, neither sign nor zero extension |
2617 | ;; is guaranteed so upper bits should be considered undefined. | |
12e851f9 | 2618 | ;; RTL uses GCC vector extension indices throughout so flip only for assembly. |
df401d54 | 2619 | (define_insn "aarch64_get_lane<mode>" |
56234646 | 2620 | [(set (match_operand:<VEL> 0 "aarch64_simd_nonimmediate_operand" "=r, w, Utv") |
df401d54 | 2621 | (vec_select:<VEL> |
aea31229 | 2622 | (match_operand:VALL_F16 1 "register_operand" "w, w, w") |
56234646 | 2623 | (parallel [(match_operand:SI 2 "immediate_operand" "i, i, i")])))] |
df401d54 | 2624 | "TARGET_SIMD" |
56234646 | 2625 | { |
2626 | operands[2] = GEN_INT (ENDIAN_LANE_N (<MODE>mode, INTVAL (operands[2]))); | |
2627 | switch (which_alternative) | |
2628 | { | |
2629 | case 0: | |
2630 | return "umov\\t%<vwcore>0, %1.<Vetype>[%2]"; | |
2631 | case 1: | |
2632 | return "dup\\t%<Vetype>0, %1.<Vetype>[%2]"; | |
2633 | case 2: | |
2634 | return "st1\\t{%1.<Vetype>}[%2], %0"; | |
2635 | default: | |
2636 | gcc_unreachable (); | |
2637 | } | |
2638 | } | |
2639 | [(set_attr "type" "neon_to_gp<q>, neon_dup<q>, neon_store1_one_lane<q>")] | |
df401d54 | 2640 | ) |
2641 | ||
df401d54 | 2642 | ;; In this insn, operand 1 should be low, and operand 2 the high part of the |
2643 | ;; dest vector. | |
2644 | ||
2645 | (define_insn "*aarch64_combinez<mode>" | |
cf40f9ad | 2646 | [(set (match_operand:<VDBL> 0 "register_operand" "=w,w,w") |
df401d54 | 2647 | (vec_concat:<VDBL> |
cf40f9ad | 2648 | (match_operand:VD_BHSI 1 "general_operand" "w,r,m") |
2649 | (match_operand:VD_BHSI 2 "aarch64_simd_imm_zero" "Dz,Dz,Dz")))] | |
09853ca2 | 2650 | "TARGET_SIMD && !BYTES_BIG_ENDIAN" |
cf40f9ad | 2651 | "@ |
2652 | mov\\t%0.8b, %1.8b | |
2653 | fmov\t%d0, %1 | |
2654 | ldr\\t%d0, %1" | |
2655 | [(set_attr "type" "neon_move<q>, neon_from_gp, neon_load1_1reg") | |
2656 | (set_attr "simd" "yes,*,yes") | |
2657 | (set_attr "fp" "*,yes,*")] | |
09853ca2 | 2658 | ) |
2659 | ||
2660 | (define_insn "*aarch64_combinez_be<mode>" | |
cf40f9ad | 2661 | [(set (match_operand:<VDBL> 0 "register_operand" "=w,w,w") |
09853ca2 | 2662 | (vec_concat:<VDBL> |
cf40f9ad | 2663 | (match_operand:VD_BHSI 2 "aarch64_simd_imm_zero" "Dz,Dz,Dz") |
2664 | (match_operand:VD_BHSI 1 "general_operand" "w,r,m")))] | |
09853ca2 | 2665 | "TARGET_SIMD && BYTES_BIG_ENDIAN" |
cf40f9ad | 2666 | "@ |
2667 | mov\\t%0.8b, %1.8b | |
2668 | fmov\t%d0, %1 | |
2669 | ldr\\t%d0, %1" | |
2670 | [(set_attr "type" "neon_move<q>, neon_from_gp, neon_load1_1reg") | |
2671 | (set_attr "simd" "yes,*,yes") | |
2672 | (set_attr "fp" "*,yes,*")] | |
df401d54 | 2673 | ) |
2674 | ||
09853ca2 | 2675 | (define_expand "aarch64_combine<mode>" |
2676 | [(match_operand:<VDBL> 0 "register_operand") | |
2677 | (match_operand:VDC 1 "register_operand") | |
2678 | (match_operand:VDC 2 "register_operand")] | |
2679 | "TARGET_SIMD" | |
2680 | { | |
2681 | rtx op1, op2; | |
2682 | if (BYTES_BIG_ENDIAN) | |
2683 | { | |
2684 | op1 = operands[2]; | |
2685 | op2 = operands[1]; | |
2686 | } | |
2687 | else | |
2688 | { | |
2689 | op1 = operands[1]; | |
2690 | op2 = operands[2]; | |
2691 | } | |
2692 | emit_insn (gen_aarch64_combine_internal<mode> (operands[0], op1, op2)); | |
2693 | DONE; | |
2694 | } | |
2695 | ) | |
2696 | ||
2697 | (define_insn_and_split "aarch64_combine_internal<mode>" | |
df401d54 | 2698 | [(set (match_operand:<VDBL> 0 "register_operand" "=&w") |
2699 | (vec_concat:<VDBL> (match_operand:VDC 1 "register_operand" "w") | |
2700 | (match_operand:VDC 2 "register_operand" "w")))] | |
2701 | "TARGET_SIMD" | |
d820433c | 2702 | "#" |
2703 | "&& reload_completed" | |
2704 | [(const_int 0)] | |
2705 | { | |
09853ca2 | 2706 | if (BYTES_BIG_ENDIAN) |
2707 | aarch64_split_simd_combine (operands[0], operands[2], operands[1]); | |
2708 | else | |
2709 | aarch64_split_simd_combine (operands[0], operands[1], operands[2]); | |
d820433c | 2710 | DONE; |
5efe19ee | 2711 | } |
2712 | [(set_attr "type" "multiple")] | |
2713 | ) | |
d820433c | 2714 | |
2715 | (define_expand "aarch64_simd_combine<mode>" | |
09853ca2 | 2716 | [(match_operand:<VDBL> 0 "register_operand") |
2717 | (match_operand:VDC 1 "register_operand") | |
2718 | (match_operand:VDC 2 "register_operand")] | |
d820433c | 2719 | "TARGET_SIMD" |
2720 | { | |
2721 | emit_insn (gen_move_lo_quad_<Vdbl> (operands[0], operands[1])); | |
2722 | emit_insn (gen_move_hi_quad_<Vdbl> (operands[0], operands[2])); | |
2723 | DONE; | |
5efe19ee | 2724 | } |
2725 | [(set_attr "type" "multiple")] | |
2726 | ) | |
df401d54 | 2727 | |
2728 | ;; <su><addsub>l<q>. | |
2729 | ||
d68f29d7 | 2730 | (define_insn "aarch64_<ANY_EXTEND:su><ADDSUB:optab>l<mode>_hi_internal" |
df401d54 | 2731 | [(set (match_operand:<VWIDE> 0 "register_operand" "=w") |
2732 | (ADDSUB:<VWIDE> (ANY_EXTEND:<VWIDE> (vec_select:<VHALF> | |
2733 | (match_operand:VQW 1 "register_operand" "w") | |
2734 | (match_operand:VQW 3 "vect_par_cnst_hi_half" ""))) | |
2735 | (ANY_EXTEND:<VWIDE> (vec_select:<VHALF> | |
2736 | (match_operand:VQW 2 "register_operand" "w") | |
2737 | (match_dup 3)))))] | |
2738 | "TARGET_SIMD" | |
d68f29d7 | 2739 | "<ANY_EXTEND:su><ADDSUB:optab>l2\t%0.<Vwtype>, %1.<Vtype>, %2.<Vtype>" |
52e95e58 | 2740 | [(set_attr "type" "neon_<ADDSUB:optab>_long")] |
df401d54 | 2741 | ) |
2742 | ||
d68f29d7 | 2743 | (define_insn "aarch64_<ANY_EXTEND:su><ADDSUB:optab>l<mode>_lo_internal" |
2744 | [(set (match_operand:<VWIDE> 0 "register_operand" "=w") | |
2745 | (ADDSUB:<VWIDE> (ANY_EXTEND:<VWIDE> (vec_select:<VHALF> | |
2746 | (match_operand:VQW 1 "register_operand" "w") | |
2747 | (match_operand:VQW 3 "vect_par_cnst_lo_half" ""))) | |
2748 | (ANY_EXTEND:<VWIDE> (vec_select:<VHALF> | |
2749 | (match_operand:VQW 2 "register_operand" "w") | |
2750 | (match_dup 3)))))] | |
2751 | "TARGET_SIMD" | |
2752 | "<ANY_EXTEND:su><ADDSUB:optab>l\t%0.<Vwtype>, %1.<Vhalftype>, %2.<Vhalftype>" | |
52e95e58 | 2753 | [(set_attr "type" "neon_<ADDSUB:optab>_long")] |
d68f29d7 | 2754 | ) |
2755 | ||
2756 | ||
df401d54 | 2757 | (define_expand "aarch64_saddl2<mode>" |
2758 | [(match_operand:<VWIDE> 0 "register_operand" "=w") | |
2759 | (match_operand:VQW 1 "register_operand" "w") | |
2760 | (match_operand:VQW 2 "register_operand" "w")] | |
2761 | "TARGET_SIMD" | |
2762 | { | |
2763 | rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, true); | |
d68f29d7 | 2764 | emit_insn (gen_aarch64_saddl<mode>_hi_internal (operands[0], operands[1], |
2765 | operands[2], p)); | |
df401d54 | 2766 | DONE; |
2767 | }) | |
2768 | ||
2769 | (define_expand "aarch64_uaddl2<mode>" | |
2770 | [(match_operand:<VWIDE> 0 "register_operand" "=w") | |
2771 | (match_operand:VQW 1 "register_operand" "w") | |
2772 | (match_operand:VQW 2 "register_operand" "w")] | |
2773 | "TARGET_SIMD" | |
2774 | { | |
2775 | rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, true); | |
d68f29d7 | 2776 | emit_insn (gen_aarch64_uaddl<mode>_hi_internal (operands[0], operands[1], |
2777 | operands[2], p)); | |
df401d54 | 2778 | DONE; |
2779 | }) | |
2780 | ||
2781 | (define_expand "aarch64_ssubl2<mode>" | |
2782 | [(match_operand:<VWIDE> 0 "register_operand" "=w") | |
2783 | (match_operand:VQW 1 "register_operand" "w") | |
2784 | (match_operand:VQW 2 "register_operand" "w")] | |
2785 | "TARGET_SIMD" | |
2786 | { | |
2787 | rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, true); | |
d68f29d7 | 2788 | emit_insn (gen_aarch64_ssubl<mode>_hi_internal (operands[0], operands[1], |
df401d54 | 2789 | operands[2], p)); |
2790 | DONE; | |
2791 | }) | |
2792 | ||
2793 | (define_expand "aarch64_usubl2<mode>" | |
2794 | [(match_operand:<VWIDE> 0 "register_operand" "=w") | |
2795 | (match_operand:VQW 1 "register_operand" "w") | |
2796 | (match_operand:VQW 2 "register_operand" "w")] | |
2797 | "TARGET_SIMD" | |
2798 | { | |
2799 | rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, true); | |
d68f29d7 | 2800 | emit_insn (gen_aarch64_usubl<mode>_hi_internal (operands[0], operands[1], |
df401d54 | 2801 | operands[2], p)); |
2802 | DONE; | |
2803 | }) | |
2804 | ||
2805 | (define_insn "aarch64_<ANY_EXTEND:su><ADDSUB:optab>l<mode>" | |
2806 | [(set (match_operand:<VWIDE> 0 "register_operand" "=w") | |
2807 | (ADDSUB:<VWIDE> (ANY_EXTEND:<VWIDE> | |
658fa7f6 | 2808 | (match_operand:VD_BHSI 1 "register_operand" "w")) |
df401d54 | 2809 | (ANY_EXTEND:<VWIDE> |
658fa7f6 | 2810 | (match_operand:VD_BHSI 2 "register_operand" "w"))))] |
df401d54 | 2811 | "TARGET_SIMD" |
00225f5c | 2812 | "<ANY_EXTEND:su><ADDSUB:optab>l\t%0.<Vwtype>, %1.<Vtype>, %2.<Vtype>" |
52e95e58 | 2813 | [(set_attr "type" "neon_<ADDSUB:optab>_long")] |
df401d54 | 2814 | ) |
2815 | ||
2816 | ;; <su><addsub>w<q>. | |
2817 | ||
166b3aa6 | 2818 | (define_expand "widen_ssum<mode>3" |
2819 | [(set (match_operand:<VDBLW> 0 "register_operand" "") | |
2820 | (plus:<VDBLW> (sign_extend:<VDBLW> | |
2821 | (match_operand:VQW 1 "register_operand" "")) | |
2822 | (match_operand:<VDBLW> 2 "register_operand" "")))] | |
2823 | "TARGET_SIMD" | |
2824 | { | |
2825 | rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, false); | |
2826 | rtx temp = gen_reg_rtx (GET_MODE (operands[0])); | |
2827 | ||
2828 | emit_insn (gen_aarch64_saddw<mode>_internal (temp, operands[2], | |
2829 | operands[1], p)); | |
2830 | emit_insn (gen_aarch64_saddw2<mode> (operands[0], temp, operands[1])); | |
2831 | DONE; | |
2832 | } | |
2833 | ) | |
2834 | ||
2835 | (define_expand "widen_ssum<mode>3" | |
2836 | [(set (match_operand:<VWIDE> 0 "register_operand" "") | |
2837 | (plus:<VWIDE> (sign_extend:<VWIDE> | |
2838 | (match_operand:VD_BHSI 1 "register_operand" "")) | |
2839 | (match_operand:<VWIDE> 2 "register_operand" "")))] | |
2840 | "TARGET_SIMD" | |
2841 | { | |
2842 | emit_insn (gen_aarch64_saddw<mode> (operands[0], operands[2], operands[1])); | |
2843 | DONE; | |
2844 | }) | |
2845 | ||
2846 | (define_expand "widen_usum<mode>3" | |
2847 | [(set (match_operand:<VDBLW> 0 "register_operand" "") | |
2848 | (plus:<VDBLW> (zero_extend:<VDBLW> | |
2849 | (match_operand:VQW 1 "register_operand" "")) | |
2850 | (match_operand:<VDBLW> 2 "register_operand" "")))] | |
2851 | "TARGET_SIMD" | |
2852 | { | |
2853 | rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, false); | |
2854 | rtx temp = gen_reg_rtx (GET_MODE (operands[0])); | |
2855 | ||
2856 | emit_insn (gen_aarch64_uaddw<mode>_internal (temp, operands[2], | |
2857 | operands[1], p)); | |
2858 | emit_insn (gen_aarch64_uaddw2<mode> (operands[0], temp, operands[1])); | |
2859 | DONE; | |
2860 | } | |
2861 | ) | |
2862 | ||
2863 | (define_expand "widen_usum<mode>3" | |
2864 | [(set (match_operand:<VWIDE> 0 "register_operand" "") | |
2865 | (plus:<VWIDE> (zero_extend:<VWIDE> | |
2866 | (match_operand:VD_BHSI 1 "register_operand" "")) | |
2867 | (match_operand:<VWIDE> 2 "register_operand" "")))] | |
2868 | "TARGET_SIMD" | |
2869 | { | |
2870 | emit_insn (gen_aarch64_uaddw<mode> (operands[0], operands[2], operands[1])); | |
2871 | DONE; | |
2872 | }) | |
2873 | ||
df401d54 | 2874 | (define_insn "aarch64_<ANY_EXTEND:su><ADDSUB:optab>w<mode>" |
2875 | [(set (match_operand:<VWIDE> 0 "register_operand" "=w") | |
2876 | (ADDSUB:<VWIDE> (match_operand:<VWIDE> 1 "register_operand" "w") | |
2877 | (ANY_EXTEND:<VWIDE> | |
658fa7f6 | 2878 | (match_operand:VD_BHSI 2 "register_operand" "w"))))] |
df401d54 | 2879 | "TARGET_SIMD" |
2880 | "<ANY_EXTEND:su><ADDSUB:optab>w\\t%0.<Vwtype>, %1.<Vwtype>, %2.<Vtype>" | |
52e95e58 | 2881 | [(set_attr "type" "neon_<ADDSUB:optab>_widen")] |
df401d54 | 2882 | ) |
2883 | ||
166b3aa6 | 2884 | (define_insn "aarch64_<ANY_EXTEND:su><ADDSUB:optab>w<mode>_internal" |
2885 | [(set (match_operand:<VWIDE> 0 "register_operand" "=w") | |
2886 | (ADDSUB:<VWIDE> (match_operand:<VWIDE> 1 "register_operand" "w") | |
2887 | (ANY_EXTEND:<VWIDE> | |
2888 | (vec_select:<VHALF> | |
2889 | (match_operand:VQW 2 "register_operand" "w") | |
2890 | (match_operand:VQW 3 "vect_par_cnst_lo_half" "")))))] | |
2891 | "TARGET_SIMD" | |
2892 | "<ANY_EXTEND:su><ADDSUB:optab>w\\t%0.<Vwtype>, %1.<Vwtype>, %2.<Vhalftype>" | |
2893 | [(set_attr "type" "neon_<ADDSUB:optab>_widen")] | |
2894 | ) | |
2895 | ||
df401d54 | 2896 | (define_insn "aarch64_<ANY_EXTEND:su><ADDSUB:optab>w2<mode>_internal" |
2897 | [(set (match_operand:<VWIDE> 0 "register_operand" "=w") | |
2898 | (ADDSUB:<VWIDE> (match_operand:<VWIDE> 1 "register_operand" "w") | |
2899 | (ANY_EXTEND:<VWIDE> | |
2900 | (vec_select:<VHALF> | |
2901 | (match_operand:VQW 2 "register_operand" "w") | |
2902 | (match_operand:VQW 3 "vect_par_cnst_hi_half" "")))))] | |
2903 | "TARGET_SIMD" | |
2904 | "<ANY_EXTEND:su><ADDSUB:optab>w2\\t%0.<Vwtype>, %1.<Vwtype>, %2.<Vtype>" | |
52e95e58 | 2905 | [(set_attr "type" "neon_<ADDSUB:optab>_widen")] |
df401d54 | 2906 | ) |
2907 | ||
2908 | (define_expand "aarch64_saddw2<mode>" | |
2909 | [(match_operand:<VWIDE> 0 "register_operand" "=w") | |
2910 | (match_operand:<VWIDE> 1 "register_operand" "w") | |
2911 | (match_operand:VQW 2 "register_operand" "w")] | |
2912 | "TARGET_SIMD" | |
2913 | { | |
2914 | rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, true); | |
2915 | emit_insn (gen_aarch64_saddw2<mode>_internal (operands[0], operands[1], | |
2916 | operands[2], p)); | |
2917 | DONE; | |
2918 | }) | |
2919 | ||
2920 | (define_expand "aarch64_uaddw2<mode>" | |
2921 | [(match_operand:<VWIDE> 0 "register_operand" "=w") | |
2922 | (match_operand:<VWIDE> 1 "register_operand" "w") | |
2923 | (match_operand:VQW 2 "register_operand" "w")] | |
2924 | "TARGET_SIMD" | |
2925 | { | |
2926 | rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, true); | |
2927 | emit_insn (gen_aarch64_uaddw2<mode>_internal (operands[0], operands[1], | |
2928 | operands[2], p)); | |
2929 | DONE; | |
2930 | }) | |
2931 | ||
2932 | ||
2933 | (define_expand "aarch64_ssubw2<mode>" | |
2934 | [(match_operand:<VWIDE> 0 "register_operand" "=w") | |
2935 | (match_operand:<VWIDE> 1 "register_operand" "w") | |
2936 | (match_operand:VQW 2 "register_operand" "w")] | |
2937 | "TARGET_SIMD" | |
2938 | { | |
2939 | rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, true); | |
2940 | emit_insn (gen_aarch64_ssubw2<mode>_internal (operands[0], operands[1], | |
2941 | operands[2], p)); | |
2942 | DONE; | |
2943 | }) | |
2944 | ||
2945 | (define_expand "aarch64_usubw2<mode>" | |
2946 | [(match_operand:<VWIDE> 0 "register_operand" "=w") | |
2947 | (match_operand:<VWIDE> 1 "register_operand" "w") | |
2948 | (match_operand:VQW 2 "register_operand" "w")] | |
2949 | "TARGET_SIMD" | |
2950 | { | |
2951 | rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, true); | |
2952 | emit_insn (gen_aarch64_usubw2<mode>_internal (operands[0], operands[1], | |
2953 | operands[2], p)); | |
2954 | DONE; | |
2955 | }) | |
2956 | ||
2957 | ;; <su><r>h<addsub>. | |
2958 | ||
2959 | (define_insn "aarch64_<sur>h<addsub><mode>" | |
658fa7f6 | 2960 | [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w") |
2961 | (unspec:VDQ_BHSI [(match_operand:VDQ_BHSI 1 "register_operand" "w") | |
2962 | (match_operand:VDQ_BHSI 2 "register_operand" "w")] | |
df401d54 | 2963 | HADDSUB))] |
2964 | "TARGET_SIMD" | |
2965 | "<sur>h<addsub>\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>" | |
52e95e58 | 2966 | [(set_attr "type" "neon_<addsub>_halve<q>")] |
df401d54 | 2967 | ) |
2968 | ||
2969 | ;; <r><addsub>hn<q>. | |
2970 | ||
2971 | (define_insn "aarch64_<sur><addsub>hn<mode>" | |
2972 | [(set (match_operand:<VNARROWQ> 0 "register_operand" "=w") | |
2973 | (unspec:<VNARROWQ> [(match_operand:VQN 1 "register_operand" "w") | |
2974 | (match_operand:VQN 2 "register_operand" "w")] | |
2975 | ADDSUBHN))] | |
2976 | "TARGET_SIMD" | |
2977 | "<sur><addsub>hn\\t%0.<Vntype>, %1.<Vtype>, %2.<Vtype>" | |
52e95e58 | 2978 | [(set_attr "type" "neon_<addsub>_halve_narrow_q")] |
df401d54 | 2979 | ) |
2980 | ||
2981 | (define_insn "aarch64_<sur><addsub>hn2<mode>" | |
2982 | [(set (match_operand:<VNARROWQ2> 0 "register_operand" "=w") | |
2983 | (unspec:<VNARROWQ2> [(match_operand:<VNARROWQ> 1 "register_operand" "0") | |
2984 | (match_operand:VQN 2 "register_operand" "w") | |
2985 | (match_operand:VQN 3 "register_operand" "w")] | |
2986 | ADDSUBHN2))] | |
2987 | "TARGET_SIMD" | |
2988 | "<sur><addsub>hn2\\t%0.<V2ntype>, %2.<Vtype>, %3.<Vtype>" | |
52e95e58 | 2989 | [(set_attr "type" "neon_<addsub>_halve_narrow_q")] |
df401d54 | 2990 | ) |
2991 | ||
2992 | ;; pmul. | |
2993 | ||
2994 | (define_insn "aarch64_pmul<mode>" | |
2995 | [(set (match_operand:VB 0 "register_operand" "=w") | |
2996 | (unspec:VB [(match_operand:VB 1 "register_operand" "w") | |
2997 | (match_operand:VB 2 "register_operand" "w")] | |
2998 | UNSPEC_PMUL))] | |
2999 | "TARGET_SIMD" | |
3000 | "pmul\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>" | |
52e95e58 | 3001 | [(set_attr "type" "neon_mul_<Vetype><q>")] |
df401d54 | 3002 | ) |
3003 | ||
4f03d0e0 | 3004 | ;; fmulx. |
3005 | ||
3006 | (define_insn "aarch64_fmulx<mode>" | |
3007 | [(set (match_operand:VALLF 0 "register_operand" "=w") | |
3008 | (unspec:VALLF [(match_operand:VALLF 1 "register_operand" "w") | |
3009 | (match_operand:VALLF 2 "register_operand" "w")] | |
3010 | UNSPEC_FMULX))] | |
3011 | "TARGET_SIMD" | |
3012 | "fmulx\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>" | |
3013 | [(set_attr "type" "neon_fp_mul_<Vetype>")] | |
3014 | ) | |
3015 | ||
2ec3c6ff | 3016 | ;; vmulxq_lane_f32, and vmulx_laneq_f32 |
3017 | ||
3018 | (define_insn "*aarch64_mulx_elt_<vswap_width_name><mode>" | |
3019 | [(set (match_operand:VDQSF 0 "register_operand" "=w") | |
3020 | (unspec:VDQSF | |
3021 | [(match_operand:VDQSF 1 "register_operand" "w") | |
3022 | (vec_duplicate:VDQSF | |
3023 | (vec_select:<VEL> | |
3024 | (match_operand:<VSWAP_WIDTH> 2 "register_operand" "w") | |
3025 | (parallel [(match_operand:SI 3 "immediate_operand" "i")])))] | |
3026 | UNSPEC_FMULX))] | |
3027 | "TARGET_SIMD" | |
3028 | { | |
3029 | operands[3] = GEN_INT (ENDIAN_LANE_N (<VSWAP_WIDTH>mode, | |
3030 | INTVAL (operands[3]))); | |
3031 | return "fmulx\t%<v>0<Vmtype>, %<v>1<Vmtype>, %2.<Vetype>[%3]"; | |
3032 | } | |
3033 | [(set_attr "type" "neon_fp_mul_<Vetype>_scalar<q>")] | |
3034 | ) | |
3035 | ||
3036 | ;; vmulxq_laneq_f32, vmulxq_laneq_f64, vmulx_lane_f32 | |
3037 | ||
3038 | (define_insn "*aarch64_mulx_elt<mode>" | |
3039 | [(set (match_operand:VDQF 0 "register_operand" "=w") | |
3040 | (unspec:VDQF | |
3041 | [(match_operand:VDQF 1 "register_operand" "w") | |
3042 | (vec_duplicate:VDQF | |
3043 | (vec_select:<VEL> | |
3044 | (match_operand:VDQF 2 "register_operand" "w") | |
3045 | (parallel [(match_operand:SI 3 "immediate_operand" "i")])))] | |
3046 | UNSPEC_FMULX))] | |
3047 | "TARGET_SIMD" | |
3048 | { | |
3049 | operands[3] = GEN_INT (ENDIAN_LANE_N (<MODE>mode, INTVAL (operands[3]))); | |
3050 | return "fmulx\t%<v>0<Vmtype>, %<v>1<Vmtype>, %2.<Vetype>[%3]"; | |
3051 | } | |
3052 | [(set_attr "type" "neon_fp_mul_<Vetype><q>")] | |
3053 | ) | |
3054 | ||
3055 | ;; vmulxq_lane_f64 | |
3056 | ||
3057 | (define_insn "*aarch64_mulx_elt_to_64v2df" | |
3058 | [(set (match_operand:V2DF 0 "register_operand" "=w") | |
3059 | (unspec:V2DF | |
3060 | [(match_operand:V2DF 1 "register_operand" "w") | |
3061 | (vec_duplicate:V2DF | |
3062 | (match_operand:DF 2 "register_operand" "w"))] | |
3063 | UNSPEC_FMULX))] | |
3064 | "TARGET_SIMD" | |
3065 | { | |
3066 | return "fmulx\t%0.2d, %1.2d, %2.d[0]"; | |
3067 | } | |
3068 | [(set_attr "type" "neon_fp_mul_d_scalar_q")] | |
3069 | ) | |
3070 | ||
3071 | ;; vmulxs_lane_f32, vmulxs_laneq_f32 | |
3072 | ;; vmulxd_lane_f64 == vmulx_lane_f64 | |
3073 | ;; vmulxd_laneq_f64 == vmulx_laneq_f64 | |
3074 | ||
3075 | (define_insn "*aarch64_vgetfmulx<mode>" | |
3076 | [(set (match_operand:<VEL> 0 "register_operand" "=w") | |
3077 | (unspec:<VEL> | |
3078 | [(match_operand:<VEL> 1 "register_operand" "w") | |
3079 | (vec_select:<VEL> | |
3080 | (match_operand:VDQF_DF 2 "register_operand" "w") | |
3081 | (parallel [(match_operand:SI 3 "immediate_operand" "i")]))] | |
3082 | UNSPEC_FMULX))] | |
3083 | "TARGET_SIMD" | |
3084 | { | |
3085 | operands[3] = GEN_INT (ENDIAN_LANE_N (<MODE>mode, INTVAL (operands[3]))); | |
3086 | return "fmulx\t%<Vetype>0, %<Vetype>1, %2.<Vetype>[%3]"; | |
3087 | } | |
3088 | [(set_attr "type" "fmul<Vetype>")] | |
3089 | ) | |
df401d54 | 3090 | ;; <su>q<addsub> |
3091 | ||
3092 | (define_insn "aarch64_<su_optab><optab><mode>" | |
3093 | [(set (match_operand:VSDQ_I 0 "register_operand" "=w") | |
3094 | (BINQOPS:VSDQ_I (match_operand:VSDQ_I 1 "register_operand" "w") | |
3095 | (match_operand:VSDQ_I 2 "register_operand" "w")))] | |
3096 | "TARGET_SIMD" | |
3097 | "<su_optab><optab>\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>" | |
52e95e58 | 3098 | [(set_attr "type" "neon_<optab><q>")] |
df401d54 | 3099 | ) |
3100 | ||
3101 | ;; suqadd and usqadd | |
3102 | ||
3103 | (define_insn "aarch64_<sur>qadd<mode>" | |
3104 | [(set (match_operand:VSDQ_I 0 "register_operand" "=w") | |
3105 | (unspec:VSDQ_I [(match_operand:VSDQ_I 1 "register_operand" "0") | |
3106 | (match_operand:VSDQ_I 2 "register_operand" "w")] | |
3107 | USSUQADD))] | |
3108 | "TARGET_SIMD" | |
3109 | "<sur>qadd\\t%<v>0<Vmtype>, %<v>2<Vmtype>" | |
52e95e58 | 3110 | [(set_attr "type" "neon_qadd<q>")] |
df401d54 | 3111 | ) |
3112 | ||
3113 | ;; sqmovun | |
3114 | ||
3115 | (define_insn "aarch64_sqmovun<mode>" | |
3116 | [(set (match_operand:<VNARROWQ> 0 "register_operand" "=w") | |
3117 | (unspec:<VNARROWQ> [(match_operand:VSQN_HSDI 1 "register_operand" "w")] | |
3118 | UNSPEC_SQXTUN))] | |
3119 | "TARGET_SIMD" | |
3120 | "sqxtun\\t%<vn2>0<Vmntype>, %<v>1<Vmtype>" | |
52e95e58 | 3121 | [(set_attr "type" "neon_sat_shift_imm_narrow_q")] |
03032325 | 3122 | ) |
df401d54 | 3123 | |
3124 | ;; sqmovn and uqmovn | |
3125 | ||
3126 | (define_insn "aarch64_<sur>qmovn<mode>" | |
3127 | [(set (match_operand:<VNARROWQ> 0 "register_operand" "=w") | |
3128 | (unspec:<VNARROWQ> [(match_operand:VSQN_HSDI 1 "register_operand" "w")] | |
3129 | SUQMOVN))] | |
3130 | "TARGET_SIMD" | |
3131 | "<sur>qxtn\\t%<vn2>0<Vmntype>, %<v>1<Vmtype>" | |
52e95e58 | 3132 | [(set_attr "type" "neon_sat_shift_imm_narrow_q")] |
03032325 | 3133 | ) |
df401d54 | 3134 | |
3135 | ;; <su>q<absneg> | |
3136 | ||
3137 | (define_insn "aarch64_s<optab><mode>" | |
a9aae8ec | 3138 | [(set (match_operand:VSDQ_I 0 "register_operand" "=w") |
3139 | (UNQOPS:VSDQ_I | |
3140 | (match_operand:VSDQ_I 1 "register_operand" "w")))] | |
df401d54 | 3141 | "TARGET_SIMD" |
3142 | "s<optab>\\t%<v>0<Vmtype>, %<v>1<Vmtype>" | |
52e95e58 | 3143 | [(set_attr "type" "neon_<optab><q>")] |
df401d54 | 3144 | ) |
3145 | ||
3146 | ;; sq<r>dmulh. | |
3147 | ||
3148 | (define_insn "aarch64_sq<r>dmulh<mode>" | |
3149 | [(set (match_operand:VSDQ_HSI 0 "register_operand" "=w") | |
3150 | (unspec:VSDQ_HSI | |
3151 | [(match_operand:VSDQ_HSI 1 "register_operand" "w") | |
3152 | (match_operand:VSDQ_HSI 2 "register_operand" "w")] | |
3153 | VQDMULH))] | |
3154 | "TARGET_SIMD" | |
3155 | "sq<r>dmulh\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>" | |
52e95e58 | 3156 | [(set_attr "type" "neon_sat_mul_<Vetype><q>")] |
df401d54 | 3157 | ) |
3158 | ||
3159 | ;; sq<r>dmulh_lane | |
3160 | ||
491d11ba | 3161 | (define_insn "aarch64_sq<r>dmulh_lane<mode>" |
431fc079 | 3162 | [(set (match_operand:VDQHS 0 "register_operand" "=w") |
3163 | (unspec:VDQHS | |
3164 | [(match_operand:VDQHS 1 "register_operand" "w") | |
3165 | (vec_select:<VEL> | |
3166 | (match_operand:<VCOND> 2 "register_operand" "<vwx>") | |
3167 | (parallel [(match_operand:SI 3 "immediate_operand" "i")]))] | |
3168 | VQDMULH))] | |
3169 | "TARGET_SIMD" | |
3170 | "* | |
c034024c | 3171 | operands[3] = GEN_INT (ENDIAN_LANE_N (<VCOND>mode, INTVAL (operands[3]))); |
431fc079 | 3172 | return \"sq<r>dmulh\\t%0.<Vtype>, %1.<Vtype>, %2.<Vetype>[%3]\";" |
52e95e58 | 3173 | [(set_attr "type" "neon_sat_mul_<Vetype>_scalar<q>")] |
431fc079 | 3174 | ) |
3175 | ||
491d11ba | 3176 | (define_insn "aarch64_sq<r>dmulh_laneq<mode>" |
431fc079 | 3177 | [(set (match_operand:VDQHS 0 "register_operand" "=w") |
3178 | (unspec:VDQHS | |
3179 | [(match_operand:VDQHS 1 "register_operand" "w") | |
3180 | (vec_select:<VEL> | |
3181 | (match_operand:<VCONQ> 2 "register_operand" "<vwx>") | |
3182 | (parallel [(match_operand:SI 3 "immediate_operand" "i")]))] | |
3183 | VQDMULH))] | |
3184 | "TARGET_SIMD" | |
3185 | "* | |
c034024c | 3186 | operands[3] = GEN_INT (ENDIAN_LANE_N (<VCONQ>mode, INTVAL (operands[3]))); |
431fc079 | 3187 | return \"sq<r>dmulh\\t%0.<Vtype>, %1.<Vtype>, %2.<Vetype>[%3]\";" |
52e95e58 | 3188 | [(set_attr "type" "neon_sat_mul_<Vetype>_scalar<q>")] |
431fc079 | 3189 | ) |
3190 | ||
491d11ba | 3191 | (define_insn "aarch64_sq<r>dmulh_lane<mode>" |
431fc079 | 3192 | [(set (match_operand:SD_HSI 0 "register_operand" "=w") |
3193 | (unspec:SD_HSI | |
3194 | [(match_operand:SD_HSI 1 "register_operand" "w") | |
df401d54 | 3195 | (vec_select:<VEL> |
c7a2a326 | 3196 | (match_operand:<VCOND> 2 "register_operand" "<vwx>") |
df401d54 | 3197 | (parallel [(match_operand:SI 3 "immediate_operand" "i")]))] |
3198 | VQDMULH))] | |
3199 | "TARGET_SIMD" | |
3200 | "* | |
c7a2a326 | 3201 | operands[3] = GEN_INT (ENDIAN_LANE_N (<VCOND>mode, INTVAL (operands[3]))); |
431fc079 | 3202 | return \"sq<r>dmulh\\t%<v>0, %<v>1, %2.<v>[%3]\";" |
52e95e58 | 3203 | [(set_attr "type" "neon_sat_mul_<Vetype>_scalar<q>")] |
df401d54 | 3204 | ) |
3205 | ||
491d11ba | 3206 | (define_insn "aarch64_sq<r>dmulh_laneq<mode>" |
ac016904 | 3207 | [(set (match_operand:SD_HSI 0 "register_operand" "=w") |
3208 | (unspec:SD_HSI | |
3209 | [(match_operand:SD_HSI 1 "register_operand" "w") | |
3210 | (vec_select:<VEL> | |
3211 | (match_operand:<VCONQ> 2 "register_operand" "<vwx>") | |
3212 | (parallel [(match_operand:SI 3 "immediate_operand" "i")]))] | |
3213 | VQDMULH))] | |
3214 | "TARGET_SIMD" | |
3215 | "* | |
3216 | operands[3] = GEN_INT (ENDIAN_LANE_N (<VCONQ>mode, INTVAL (operands[3]))); | |
3217 | return \"sq<r>dmulh\\t%<v>0, %<v>1, %2.<v>[%3]\";" | |
3218 | [(set_attr "type" "neon_sat_mul_<Vetype>_scalar<q>")] | |
3219 | ) | |
3220 | ||
03032325 | 3221 | ;; sqrdml[as]h. |
3222 | ||
3223 | (define_insn "aarch64_sqrdml<SQRDMLH_AS:rdma_as>h<mode>" | |
3224 | [(set (match_operand:VSDQ_HSI 0 "register_operand" "=w") | |
3225 | (unspec:VSDQ_HSI | |
3226 | [(match_operand:VSDQ_HSI 1 "register_operand" "0") | |
3227 | (match_operand:VSDQ_HSI 2 "register_operand" "w") | |
3228 | (match_operand:VSDQ_HSI 3 "register_operand" "w")] | |
3229 | SQRDMLH_AS))] | |
3230 | "TARGET_SIMD_RDMA" | |
3231 | "sqrdml<SQRDMLH_AS:rdma_as>h\\t%<v>0<Vmtype>, %<v>2<Vmtype>, %<v>3<Vmtype>" | |
3232 | [(set_attr "type" "neon_sat_mla_<Vetype>_long")] | |
3233 | ) | |
3234 | ||
3235 | ;; sqrdml[as]h_lane. | |
3236 | ||
3237 | (define_insn "aarch64_sqrdml<SQRDMLH_AS:rdma_as>h_lane<mode>" | |
3238 | [(set (match_operand:VDQHS 0 "register_operand" "=w") | |
3239 | (unspec:VDQHS | |
3240 | [(match_operand:VDQHS 1 "register_operand" "0") | |
3241 | (match_operand:VDQHS 2 "register_operand" "w") | |
3242 | (vec_select:<VEL> | |
3243 | (match_operand:<VCOND> 3 "register_operand" "w") | |
3244 | (parallel [(match_operand:SI 4 "immediate_operand" "i")]))] | |
3245 | SQRDMLH_AS))] | |
3246 | "TARGET_SIMD_RDMA" | |
3247 | { | |
3248 | operands[4] = GEN_INT (ENDIAN_LANE_N (<VCOND>mode, INTVAL (operands[4]))); | |
3249 | return | |
3250 | "sqrdml<SQRDMLH_AS:rdma_as>h\\t%0.<Vtype>, %2.<Vtype>, %3.<Vetype>[%4]"; | |
3251 | } | |
3252 | [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")] | |
3253 | ) | |
3254 | ||
3255 | (define_insn "aarch64_sqrdml<SQRDMLH_AS:rdma_as>h_lane<mode>" | |
3256 | [(set (match_operand:SD_HSI 0 "register_operand" "=w") | |
3257 | (unspec:SD_HSI | |
3258 | [(match_operand:SD_HSI 1 "register_operand" "0") | |
3259 | (match_operand:SD_HSI 2 "register_operand" "w") | |
3260 | (vec_select:<VEL> | |
3261 | (match_operand:<VCOND> 3 "register_operand" "w") | |
3262 | (parallel [(match_operand:SI 4 "immediate_operand" "i")]))] | |
3263 | SQRDMLH_AS))] | |
3264 | "TARGET_SIMD_RDMA" | |
3265 | { | |
3266 | operands[4] = GEN_INT (ENDIAN_LANE_N (<VCOND>mode, INTVAL (operands[4]))); | |
3267 | return | |
3268 | "sqrdml<SQRDMLH_AS:rdma_as>h\\t%<v>0, %<v>2, %3.<Vetype>[%4]"; | |
3269 | } | |
3270 | [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")] | |
3271 | ) | |
3272 | ||
3273 | ;; sqrdml[as]h_laneq. | |
3274 | ||
3275 | (define_insn "aarch64_sqrdml<SQRDMLH_AS:rdma_as>h_laneq<mode>" | |
3276 | [(set (match_operand:VDQHS 0 "register_operand" "=w") | |
3277 | (unspec:VDQHS | |
3278 | [(match_operand:VDQHS 1 "register_operand" "0") | |
3279 | (match_operand:VDQHS 2 "register_operand" "w") | |
3280 | (vec_select:<VEL> | |
3281 | (match_operand:<VCONQ> 3 "register_operand" "w") | |
3282 | (parallel [(match_operand:SI 4 "immediate_operand" "i")]))] | |
3283 | SQRDMLH_AS))] | |
3284 | "TARGET_SIMD_RDMA" | |
3285 | { | |
3286 | operands[4] = GEN_INT (ENDIAN_LANE_N (<VCONQ>mode, INTVAL (operands[4]))); | |
3287 | return | |
3288 | "sqrdml<SQRDMLH_AS:rdma_as>h\\t%0.<Vtype>, %2.<Vtype>, %3.<Vetype>[%4]"; | |
3289 | } | |
3290 | [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")] | |
3291 | ) | |
3292 | ||
3293 | (define_insn "aarch64_sqrdml<SQRDMLH_AS:rdma_as>h_laneq<mode>" | |
3294 | [(set (match_operand:SD_HSI 0 "register_operand" "=w") | |
3295 | (unspec:SD_HSI | |
3296 | [(match_operand:SD_HSI 1 "register_operand" "0") | |
3297 | (match_operand:SD_HSI 2 "register_operand" "w") | |
3298 | (vec_select:<VEL> | |
3299 | (match_operand:<VCONQ> 3 "register_operand" "w") | |
3300 | (parallel [(match_operand:SI 4 "immediate_operand" "i")]))] | |
3301 | SQRDMLH_AS))] | |
3302 | "TARGET_SIMD_RDMA" | |
3303 | { | |
3304 | operands[4] = GEN_INT (ENDIAN_LANE_N (<VCONQ>mode, INTVAL (operands[4]))); | |
3305 | return | |
3306 | "sqrdml<SQRDMLH_AS:rdma_as>h\\t%<v>0, %<v>2, %3.<v>[%4]"; | |
3307 | } | |
3308 | [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")] | |
3309 | ) | |
3310 | ||
df401d54 | 3311 | ;; vqdml[sa]l |
3312 | ||
3313 | (define_insn "aarch64_sqdml<SBINQOPS:as>l<mode>" | |
3314 | [(set (match_operand:<VWIDE> 0 "register_operand" "=w") | |
3315 | (SBINQOPS:<VWIDE> | |
3316 | (match_operand:<VWIDE> 1 "register_operand" "0") | |
3317 | (ss_ashift:<VWIDE> | |
3318 | (mult:<VWIDE> | |
3319 | (sign_extend:<VWIDE> | |
3320 | (match_operand:VSD_HSI 2 "register_operand" "w")) | |
3321 | (sign_extend:<VWIDE> | |
3322 | (match_operand:VSD_HSI 3 "register_operand" "w"))) | |
3323 | (const_int 1))))] | |
3324 | "TARGET_SIMD" | |
3325 | "sqdml<SBINQOPS:as>l\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %<v>3<Vmtype>" | |
52e95e58 | 3326 | [(set_attr "type" "neon_sat_mla_<Vetype>_long")] |
df401d54 | 3327 | ) |
3328 | ||
3329 | ;; vqdml[sa]l_lane | |
3330 | ||
491d11ba | 3331 | (define_insn "aarch64_sqdml<SBINQOPS:as>l_lane<mode>" |
df401d54 | 3332 | [(set (match_operand:<VWIDE> 0 "register_operand" "=w") |
3333 | (SBINQOPS:<VWIDE> | |
3334 | (match_operand:<VWIDE> 1 "register_operand" "0") | |
3335 | (ss_ashift:<VWIDE> | |
3336 | (mult:<VWIDE> | |
3337 | (sign_extend:<VWIDE> | |
3338 | (match_operand:VD_HSI 2 "register_operand" "w")) | |
3339 | (sign_extend:<VWIDE> | |
3340 | (vec_duplicate:VD_HSI | |
3341 | (vec_select:<VEL> | |
c7a2a326 | 3342 | (match_operand:<VCOND> 3 "register_operand" "<vwx>") |
3343 | (parallel [(match_operand:SI 4 "immediate_operand" "i")]))) | |
3344 | )) | |
3345 | (const_int 1))))] | |
3346 | "TARGET_SIMD" | |
3347 | { | |
3348 | operands[4] = GEN_INT (ENDIAN_LANE_N (<VCOND>mode, INTVAL (operands[4]))); | |
3349 | return | |
3350 | "sqdml<SBINQOPS:as>l\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[%4]"; | |
3351 | } | |
3352 | [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")] | |
3353 | ) | |
3354 | ||
491d11ba | 3355 | (define_insn "aarch64_sqdml<SBINQOPS:as>l_laneq<mode>" |
c7a2a326 | 3356 | [(set (match_operand:<VWIDE> 0 "register_operand" "=w") |
3357 | (SBINQOPS:<VWIDE> | |
3358 | (match_operand:<VWIDE> 1 "register_operand" "0") | |
3359 | (ss_ashift:<VWIDE> | |
3360 | (mult:<VWIDE> | |
3361 | (sign_extend:<VWIDE> | |
3362 | (match_operand:VD_HSI 2 "register_operand" "w")) | |
3363 | (sign_extend:<VWIDE> | |
3364 | (vec_duplicate:VD_HSI | |
3365 | (vec_select:<VEL> | |
3366 | (match_operand:<VCONQ> 3 "register_operand" "<vwx>") | |
df401d54 | 3367 | (parallel [(match_operand:SI 4 "immediate_operand" "i")]))) |
3368 | )) | |
3369 | (const_int 1))))] | |
3370 | "TARGET_SIMD" | |
c034024c | 3371 | { |
3372 | operands[4] = GEN_INT (ENDIAN_LANE_N (<VCONQ>mode, INTVAL (operands[4]))); | |
3373 | return | |
3374 | "sqdml<SBINQOPS:as>l\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[%4]"; | |
3375 | } | |
52e95e58 | 3376 | [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")] |
df401d54 | 3377 | ) |
3378 | ||
491d11ba | 3379 | (define_insn "aarch64_sqdml<SBINQOPS:as>l_lane<mode>" |
df401d54 | 3380 | [(set (match_operand:<VWIDE> 0 "register_operand" "=w") |
3381 | (SBINQOPS:<VWIDE> | |
3382 | (match_operand:<VWIDE> 1 "register_operand" "0") | |
3383 | (ss_ashift:<VWIDE> | |
3384 | (mult:<VWIDE> | |
3385 | (sign_extend:<VWIDE> | |
3386 | (match_operand:SD_HSI 2 "register_operand" "w")) | |
3387 | (sign_extend:<VWIDE> | |
3388 | (vec_select:<VEL> | |
c7a2a326 | 3389 | (match_operand:<VCOND> 3 "register_operand" "<vwx>") |
3390 | (parallel [(match_operand:SI 4 "immediate_operand" "i")]))) | |
3391 | ) | |
3392 | (const_int 1))))] | |
3393 | "TARGET_SIMD" | |
3394 | { | |
3395 | operands[4] = GEN_INT (ENDIAN_LANE_N (<VCOND>mode, INTVAL (operands[4]))); | |
3396 | return | |
3397 | "sqdml<SBINQOPS:as>l\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[%4]"; | |
3398 | } | |
3399 | [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")] | |
3400 | ) | |
3401 | ||
491d11ba | 3402 | (define_insn "aarch64_sqdml<SBINQOPS:as>l_laneq<mode>" |
c7a2a326 | 3403 | [(set (match_operand:<VWIDE> 0 "register_operand" "=w") |
3404 | (SBINQOPS:<VWIDE> | |
3405 | (match_operand:<VWIDE> 1 "register_operand" "0") | |
3406 | (ss_ashift:<VWIDE> | |
3407 | (mult:<VWIDE> | |
3408 | (sign_extend:<VWIDE> | |
3409 | (match_operand:SD_HSI 2 "register_operand" "w")) | |
3410 | (sign_extend:<VWIDE> | |
3411 | (vec_select:<VEL> | |
3412 | (match_operand:<VCONQ> 3 "register_operand" "<vwx>") | |
df401d54 | 3413 | (parallel [(match_operand:SI 4 "immediate_operand" "i")]))) |
3414 | ) | |
3415 | (const_int 1))))] | |
3416 | "TARGET_SIMD" | |
c034024c | 3417 | { |
3418 | operands[4] = GEN_INT (ENDIAN_LANE_N (<VCONQ>mode, INTVAL (operands[4]))); | |
3419 | return | |
3420 | "sqdml<SBINQOPS:as>l\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[%4]"; | |
3421 | } | |
52e95e58 | 3422 | [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")] |
df401d54 | 3423 | ) |
3424 | ||
df401d54 | 3425 | ;; vqdml[sa]l_n |
3426 | ||
3427 | (define_insn "aarch64_sqdml<SBINQOPS:as>l_n<mode>" | |
3428 | [(set (match_operand:<VWIDE> 0 "register_operand" "=w") | |
3429 | (SBINQOPS:<VWIDE> | |
3430 | (match_operand:<VWIDE> 1 "register_operand" "0") | |
3431 | (ss_ashift:<VWIDE> | |
3432 | (mult:<VWIDE> | |
3433 | (sign_extend:<VWIDE> | |
3434 | (match_operand:VD_HSI 2 "register_operand" "w")) | |
3435 | (sign_extend:<VWIDE> | |
3436 | (vec_duplicate:VD_HSI | |
9d37257a | 3437 | (match_operand:<VEL> 3 "register_operand" "<vwx>")))) |
df401d54 | 3438 | (const_int 1))))] |
3439 | "TARGET_SIMD" | |
3440 | "sqdml<SBINQOPS:as>l\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[0]" | |
52e95e58 | 3441 | [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")] |
df401d54 | 3442 | ) |
3443 | ||
3444 | ;; sqdml[as]l2 | |
3445 | ||
3446 | (define_insn "aarch64_sqdml<SBINQOPS:as>l2<mode>_internal" | |
3447 | [(set (match_operand:<VWIDE> 0 "register_operand" "=w") | |
3448 | (SBINQOPS:<VWIDE> | |
3449 | (match_operand:<VWIDE> 1 "register_operand" "0") | |
3450 | (ss_ashift:<VWIDE> | |
3451 | (mult:<VWIDE> | |
3452 | (sign_extend:<VWIDE> | |
3453 | (vec_select:<VHALF> | |
3454 | (match_operand:VQ_HSI 2 "register_operand" "w") | |
3455 | (match_operand:VQ_HSI 4 "vect_par_cnst_hi_half" ""))) | |
3456 | (sign_extend:<VWIDE> | |
3457 | (vec_select:<VHALF> | |
3458 | (match_operand:VQ_HSI 3 "register_operand" "w") | |
3459 | (match_dup 4)))) | |
3460 | (const_int 1))))] | |
3461 | "TARGET_SIMD" | |
3462 | "sqdml<SBINQOPS:as>l2\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %<v>3<Vmtype>" | |
52e95e58 | 3463 | [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")] |
df401d54 | 3464 | ) |
3465 | ||
3466 | (define_expand "aarch64_sqdmlal2<mode>" | |
3467 | [(match_operand:<VWIDE> 0 "register_operand" "=w") | |
3468 | (match_operand:<VWIDE> 1 "register_operand" "w") | |
3469 | (match_operand:VQ_HSI 2 "register_operand" "w") | |
3470 | (match_operand:VQ_HSI 3 "register_operand" "w")] | |
3471 | "TARGET_SIMD" | |
3472 | { | |
3473 | rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, true); | |
3474 | emit_insn (gen_aarch64_sqdmlal2<mode>_internal (operands[0], operands[1], | |
3475 | operands[2], operands[3], p)); | |
3476 | DONE; | |
3477 | }) | |
3478 | ||
3479 | (define_expand "aarch64_sqdmlsl2<mode>" | |
3480 | [(match_operand:<VWIDE> 0 "register_operand" "=w") | |
3481 | (match_operand:<VWIDE> 1 "register_operand" "w") | |
3482 | (match_operand:VQ_HSI 2 "register_operand" "w") | |
3483 | (match_operand:VQ_HSI 3 "register_operand" "w")] | |
3484 | "TARGET_SIMD" | |
3485 | { | |
3486 | rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, true); | |
3487 | emit_insn (gen_aarch64_sqdmlsl2<mode>_internal (operands[0], operands[1], | |
3488 | operands[2], operands[3], p)); | |
3489 | DONE; | |
3490 | }) | |
3491 | ||
3492 | ;; vqdml[sa]l2_lane | |
3493 | ||
3494 | (define_insn "aarch64_sqdml<SBINQOPS:as>l2_lane<mode>_internal" | |
3495 | [(set (match_operand:<VWIDE> 0 "register_operand" "=w") | |
3496 | (SBINQOPS:<VWIDE> | |
3497 | (match_operand:<VWIDE> 1 "register_operand" "0") | |
3498 | (ss_ashift:<VWIDE> | |
3499 | (mult:<VWIDE> | |
3500 | (sign_extend:<VWIDE> | |
3501 | (vec_select:<VHALF> | |
3502 | (match_operand:VQ_HSI 2 "register_operand" "w") | |
3503 | (match_operand:VQ_HSI 5 "vect_par_cnst_hi_half" ""))) | |
3504 | (sign_extend:<VWIDE> | |
3505 | (vec_duplicate:<VHALF> | |
3506 | (vec_select:<VEL> | |
c7a2a326 | 3507 | (match_operand:<VCOND> 3 "register_operand" "<vwx>") |
3508 | (parallel [(match_operand:SI 4 "immediate_operand" "i")]) | |
3509 | )))) | |
3510 | (const_int 1))))] | |
3511 | "TARGET_SIMD" | |
3512 | { | |
3513 | operands[4] = GEN_INT (ENDIAN_LANE_N (<VCOND>mode, INTVAL (operands[4]))); | |
3514 | return | |
3515 | "sqdml<SBINQOPS:as>l2\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[%4]"; | |
3516 | } | |
3517 | [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")] | |
3518 | ) | |
3519 | ||
3520 | (define_insn "aarch64_sqdml<SBINQOPS:as>l2_laneq<mode>_internal" | |
3521 | [(set (match_operand:<VWIDE> 0 "register_operand" "=w") | |
3522 | (SBINQOPS:<VWIDE> | |
3523 | (match_operand:<VWIDE> 1 "register_operand" "0") | |
3524 | (ss_ashift:<VWIDE> | |
3525 | (mult:<VWIDE> | |
3526 | (sign_extend:<VWIDE> | |
3527 | (vec_select:<VHALF> | |
3528 | (match_operand:VQ_HSI 2 "register_operand" "w") | |
3529 | (match_operand:VQ_HSI 5 "vect_par_cnst_hi_half" ""))) | |
3530 | (sign_extend:<VWIDE> | |
3531 | (vec_duplicate:<VHALF> | |
3532 | (vec_select:<VEL> | |
3533 | (match_operand:<VCONQ> 3 "register_operand" "<vwx>") | |
df401d54 | 3534 | (parallel [(match_operand:SI 4 "immediate_operand" "i")]) |
3535 | )))) | |
3536 | (const_int 1))))] | |
3537 | "TARGET_SIMD" | |
c034024c | 3538 | { |
3539 | operands[4] = GEN_INT (ENDIAN_LANE_N (<VCONQ>mode, INTVAL (operands[4]))); | |
3540 | return | |
3541 | "sqdml<SBINQOPS:as>l2\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[%4]"; | |
3542 | } | |
52e95e58 | 3543 | [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")] |
df401d54 | 3544 | ) |
3545 | ||
3546 | (define_expand "aarch64_sqdmlal2_lane<mode>" | |
3547 | [(match_operand:<VWIDE> 0 "register_operand" "=w") | |
3548 | (match_operand:<VWIDE> 1 "register_operand" "w") | |
3549 | (match_operand:VQ_HSI 2 "register_operand" "w") | |
c7a2a326 | 3550 | (match_operand:<VCOND> 3 "register_operand" "<vwx>") |
df401d54 | 3551 | (match_operand:SI 4 "immediate_operand" "i")] |
3552 | "TARGET_SIMD" | |
3553 | { | |
3554 | rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, true); | |
df401d54 | 3555 | emit_insn (gen_aarch64_sqdmlal2_lane<mode>_internal (operands[0], operands[1], |
3556 | operands[2], operands[3], | |
3557 | operands[4], p)); | |
3558 | DONE; | |
3559 | }) | |
3560 | ||
3561 | (define_expand "aarch64_sqdmlal2_laneq<mode>" | |
3562 | [(match_operand:<VWIDE> 0 "register_operand" "=w") | |
3563 | (match_operand:<VWIDE> 1 "register_operand" "w") | |
3564 | (match_operand:VQ_HSI 2 "register_operand" "w") | |
c7a2a326 | 3565 | (match_operand:<VCONQ> 3 "register_operand" "<vwx>") |
df401d54 | 3566 | (match_operand:SI 4 "immediate_operand" "i")] |
3567 | "TARGET_SIMD" | |
3568 | { | |
3569 | rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, true); | |
c7a2a326 | 3570 | emit_insn (gen_aarch64_sqdmlal2_laneq<mode>_internal (operands[0], operands[1], |
df401d54 | 3571 | operands[2], operands[3], |
3572 | operands[4], p)); | |
3573 | DONE; | |
3574 | }) | |
3575 | ||
3576 | (define_expand "aarch64_sqdmlsl2_lane<mode>" | |
3577 | [(match_operand:<VWIDE> 0 "register_operand" "=w") | |
3578 | (match_operand:<VWIDE> 1 "register_operand" "w") | |
3579 | (match_operand:VQ_HSI 2 "register_operand" "w") | |
c7a2a326 | 3580 | (match_operand:<VCOND> 3 "register_operand" "<vwx>") |
df401d54 | 3581 | (match_operand:SI 4 "immediate_operand" "i")] |
3582 | "TARGET_SIMD" | |
3583 | { | |
3584 | rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, true); | |
df401d54 | 3585 | emit_insn (gen_aarch64_sqdmlsl2_lane<mode>_internal (operands[0], operands[1], |
3586 | operands[2], operands[3], | |
3587 | operands[4], p)); | |
3588 | DONE; | |
3589 | }) | |
3590 | ||
3591 | (define_expand "aarch64_sqdmlsl2_laneq<mode>" | |
3592 | [(match_operand:<VWIDE> 0 "register_operand" "=w") | |
3593 | (match_operand:<VWIDE> 1 "register_operand" "w") | |
3594 | (match_operand:VQ_HSI 2 "register_operand" "w") | |
c7a2a326 | 3595 | (match_operand:<VCONQ> 3 "register_operand" "<vwx>") |
df401d54 | 3596 | (match_operand:SI 4 "immediate_operand" "i")] |
3597 | "TARGET_SIMD" | |
3598 | { | |
3599 | rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, true); | |
c7a2a326 | 3600 | emit_insn (gen_aarch64_sqdmlsl2_laneq<mode>_internal (operands[0], operands[1], |
df401d54 | 3601 | operands[2], operands[3], |
3602 | operands[4], p)); | |
3603 | DONE; | |
3604 | }) | |
3605 | ||
3606 | (define_insn "aarch64_sqdml<SBINQOPS:as>l2_n<mode>_internal" | |
3607 | [(set (match_operand:<VWIDE> 0 "register_operand" "=w") | |
3608 | (SBINQOPS:<VWIDE> | |
3609 | (match_operand:<VWIDE> 1 "register_operand" "0") | |
3610 | (ss_ashift:<VWIDE> | |
3611 | (mult:<VWIDE> | |
3612 | (sign_extend:<VWIDE> | |
3613 | (vec_select:<VHALF> | |
3614 | (match_operand:VQ_HSI 2 "register_operand" "w") | |
3615 | (match_operand:VQ_HSI 4 "vect_par_cnst_hi_half" ""))) | |
3616 | (sign_extend:<VWIDE> | |
3617 | (vec_duplicate:<VHALF> | |
9d37257a | 3618 | (match_operand:<VEL> 3 "register_operand" "<vwx>")))) |
df401d54 | 3619 | (const_int 1))))] |
3620 | "TARGET_SIMD" | |
3621 | "sqdml<SBINQOPS:as>l2\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[0]" | |
52e95e58 | 3622 | [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")] |
df401d54 | 3623 | ) |
3624 | ||
3625 | (define_expand "aarch64_sqdmlal2_n<mode>" | |
3626 | [(match_operand:<VWIDE> 0 "register_operand" "=w") | |
3627 | (match_operand:<VWIDE> 1 "register_operand" "w") | |
3628 | (match_operand:VQ_HSI 2 "register_operand" "w") | |
3629 | (match_operand:<VEL> 3 "register_operand" "w")] | |
3630 | "TARGET_SIMD" | |
3631 | { | |
3632 | rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, true); | |
3633 | emit_insn (gen_aarch64_sqdmlal2_n<mode>_internal (operands[0], operands[1], | |
3634 | operands[2], operands[3], | |
3635 | p)); | |
3636 | DONE; | |
3637 | }) | |
3638 | ||
3639 | (define_expand "aarch64_sqdmlsl2_n<mode>" | |
3640 | [(match_operand:<VWIDE> 0 "register_operand" "=w") | |
3641 | (match_operand:<VWIDE> 1 "register_operand" "w") | |
3642 | (match_operand:VQ_HSI 2 "register_operand" "w") | |
3643 | (match_operand:<VEL> 3 "register_operand" "w")] | |
3644 | "TARGET_SIMD" | |
3645 | { | |
3646 | rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, true); | |
3647 | emit_insn (gen_aarch64_sqdmlsl2_n<mode>_internal (operands[0], operands[1], | |
3648 | operands[2], operands[3], | |
3649 | p)); | |
3650 | DONE; | |
3651 | }) | |
3652 | ||
3653 | ;; vqdmull | |
3654 | ||
3655 | (define_insn "aarch64_sqdmull<mode>" | |
3656 | [(set (match_operand:<VWIDE> 0 "register_operand" "=w") | |
3657 | (ss_ashift:<VWIDE> | |
3658 | (mult:<VWIDE> | |
3659 | (sign_extend:<VWIDE> | |
3660 | (match_operand:VSD_HSI 1 "register_operand" "w")) | |
3661 | (sign_extend:<VWIDE> | |
3662 | (match_operand:VSD_HSI 2 "register_operand" "w"))) | |
3663 | (const_int 1)))] | |
3664 | "TARGET_SIMD" | |
3665 | "sqdmull\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %<v>2<Vmtype>" | |
52e95e58 | 3666 | [(set_attr "type" "neon_sat_mul_<Vetype>_long")] |
df401d54 | 3667 | ) |
3668 | ||
3669 | ;; vqdmull_lane | |
3670 | ||
491d11ba | 3671 | (define_insn "aarch64_sqdmull_lane<mode>" |
df401d54 | 3672 | [(set (match_operand:<VWIDE> 0 "register_operand" "=w") |
3673 | (ss_ashift:<VWIDE> | |
3674 | (mult:<VWIDE> | |
3675 | (sign_extend:<VWIDE> | |
3676 | (match_operand:VD_HSI 1 "register_operand" "w")) | |
3677 | (sign_extend:<VWIDE> | |
3678 | (vec_duplicate:VD_HSI | |
3679 | (vec_select:<VEL> | |
c7a2a326 | 3680 | (match_operand:<VCOND> 2 "register_operand" "<vwx>") |
3681 | (parallel [(match_operand:SI 3 "immediate_operand" "i")]))) | |
3682 | )) | |
3683 | (const_int 1)))] | |
3684 | "TARGET_SIMD" | |
3685 | { | |
3686 | operands[3] = GEN_INT (ENDIAN_LANE_N (<VCOND>mode, INTVAL (operands[3]))); | |
3687 | return "sqdmull\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[%3]"; | |
3688 | } | |
3689 | [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")] | |
3690 | ) | |
3691 | ||
491d11ba | 3692 | (define_insn "aarch64_sqdmull_laneq<mode>" |
c7a2a326 | 3693 | [(set (match_operand:<VWIDE> 0 "register_operand" "=w") |
3694 | (ss_ashift:<VWIDE> | |
3695 | (mult:<VWIDE> | |
3696 | (sign_extend:<VWIDE> | |
3697 | (match_operand:VD_HSI 1 "register_operand" "w")) | |
3698 | (sign_extend:<VWIDE> | |
3699 | (vec_duplicate:VD_HSI | |
3700 | (vec_select:<VEL> | |
3701 | (match_operand:<VCONQ> 2 "register_operand" "<vwx>") | |
df401d54 | 3702 | (parallel [(match_operand:SI 3 "immediate_operand" "i")]))) |
3703 | )) | |
3704 | (const_int 1)))] | |
3705 | "TARGET_SIMD" | |
c034024c | 3706 | { |
3707 | operands[3] = GEN_INT (ENDIAN_LANE_N (<VCONQ>mode, INTVAL (operands[3]))); | |
3708 | return "sqdmull\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[%3]"; | |
3709 | } | |
52e95e58 | 3710 | [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")] |
df401d54 | 3711 | ) |
3712 | ||
491d11ba | 3713 | (define_insn "aarch64_sqdmull_lane<mode>" |
df401d54 | 3714 | [(set (match_operand:<VWIDE> 0 "register_operand" "=w") |
3715 | (ss_ashift:<VWIDE> | |
3716 | (mult:<VWIDE> | |
3717 | (sign_extend:<VWIDE> | |
3718 | (match_operand:SD_HSI 1 "register_operand" "w")) | |
3719 | (sign_extend:<VWIDE> | |
3720 | (vec_select:<VEL> | |
c7a2a326 | 3721 | (match_operand:<VCOND> 2 "register_operand" "<vwx>") |
3722 | (parallel [(match_operand:SI 3 "immediate_operand" "i")])) | |
3723 | )) | |
3724 | (const_int 1)))] | |
3725 | "TARGET_SIMD" | |
3726 | { | |
3727 | operands[3] = GEN_INT (ENDIAN_LANE_N (<VCOND>mode, INTVAL (operands[3]))); | |
3728 | return "sqdmull\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[%3]"; | |
3729 | } | |
3730 | [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")] | |
3731 | ) | |
3732 | ||
491d11ba | 3733 | (define_insn "aarch64_sqdmull_laneq<mode>" |
c7a2a326 | 3734 | [(set (match_operand:<VWIDE> 0 "register_operand" "=w") |
3735 | (ss_ashift:<VWIDE> | |
3736 | (mult:<VWIDE> | |
3737 | (sign_extend:<VWIDE> | |
3738 | (match_operand:SD_HSI 1 "register_operand" "w")) | |
3739 | (sign_extend:<VWIDE> | |
3740 | (vec_select:<VEL> | |
3741 | (match_operand:<VCONQ> 2 "register_operand" "<vwx>") | |
df401d54 | 3742 | (parallel [(match_operand:SI 3 "immediate_operand" "i")])) |
3743 | )) | |
3744 | (const_int 1)))] | |
3745 | "TARGET_SIMD" | |
c034024c | 3746 | { |
3747 | operands[3] = GEN_INT (ENDIAN_LANE_N (<VCONQ>mode, INTVAL (operands[3]))); | |
3748 | return "sqdmull\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[%3]"; | |
3749 | } | |
52e95e58 | 3750 | [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")] |
df401d54 | 3751 | ) |
3752 | ||
df401d54 | 3753 | ;; vqdmull_n |
3754 | ||
3755 | (define_insn "aarch64_sqdmull_n<mode>" | |
3756 | [(set (match_operand:<VWIDE> 0 "register_operand" "=w") | |
3757 | (ss_ashift:<VWIDE> | |
3758 | (mult:<VWIDE> | |
3759 | (sign_extend:<VWIDE> | |
3760 | (match_operand:VD_HSI 1 "register_operand" "w")) | |
3761 | (sign_extend:<VWIDE> | |
3762 | (vec_duplicate:VD_HSI | |
9d37257a | 3763 | (match_operand:<VEL> 2 "register_operand" "<vwx>"))) |
df401d54 | 3764 | ) |
3765 | (const_int 1)))] | |
3766 | "TARGET_SIMD" | |
3767 | "sqdmull\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[0]" | |
52e95e58 | 3768 | [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")] |
df401d54 | 3769 | ) |
3770 | ||
3771 | ;; vqdmull2 | |
3772 | ||
3773 | ||
3774 | ||
3775 | (define_insn "aarch64_sqdmull2<mode>_internal" | |
3776 | [(set (match_operand:<VWIDE> 0 "register_operand" "=w") | |
3777 | (ss_ashift:<VWIDE> | |
3778 | (mult:<VWIDE> | |
3779 | (sign_extend:<VWIDE> | |
3780 | (vec_select:<VHALF> | |
3781 | (match_operand:VQ_HSI 1 "register_operand" "w") | |
3782 | (match_operand:VQ_HSI 3 "vect_par_cnst_hi_half" ""))) | |
3783 | (sign_extend:<VWIDE> | |
3784 | (vec_select:<VHALF> | |
3785 | (match_operand:VQ_HSI 2 "register_operand" "w") | |
3786 | (match_dup 3))) | |
3787 | ) | |
3788 | (const_int 1)))] | |
3789 | "TARGET_SIMD" | |
3790 | "sqdmull2\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %<v>2<Vmtype>" | |
52e95e58 | 3791 | [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")] |
df401d54 | 3792 | ) |
3793 | ||
3794 | (define_expand "aarch64_sqdmull2<mode>" | |
3795 | [(match_operand:<VWIDE> 0 "register_operand" "=w") | |
3796 | (match_operand:VQ_HSI 1 "register_operand" "w") | |
c7a2a326 | 3797 | (match_operand:VQ_HSI 2 "register_operand" "w")] |
df401d54 | 3798 | "TARGET_SIMD" |
3799 | { | |
3800 | rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, true); | |
3801 | emit_insn (gen_aarch64_sqdmull2<mode>_internal (operands[0], operands[1], | |
3802 | operands[2], p)); | |
3803 | DONE; | |
3804 | }) | |
3805 | ||
3806 | ;; vqdmull2_lane | |
3807 | ||
3808 | (define_insn "aarch64_sqdmull2_lane<mode>_internal" | |
3809 | [(set (match_operand:<VWIDE> 0 "register_operand" "=w") | |
3810 | (ss_ashift:<VWIDE> | |
3811 | (mult:<VWIDE> | |
3812 | (sign_extend:<VWIDE> | |
3813 | (vec_select:<VHALF> | |
3814 | (match_operand:VQ_HSI 1 "register_operand" "w") | |
3815 | (match_operand:VQ_HSI 4 "vect_par_cnst_hi_half" ""))) | |
3816 | (sign_extend:<VWIDE> | |
3817 | (vec_duplicate:<VHALF> | |
3818 | (vec_select:<VEL> | |
c7a2a326 | 3819 | (match_operand:<VCOND> 2 "register_operand" "<vwx>") |
3820 | (parallel [(match_operand:SI 3 "immediate_operand" "i")]))) | |
3821 | )) | |
3822 | (const_int 1)))] | |
3823 | "TARGET_SIMD" | |
3824 | { | |
3825 | operands[3] = GEN_INT (ENDIAN_LANE_N (<VCOND>mode, INTVAL (operands[3]))); | |
3826 | return "sqdmull2\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[%3]"; | |
3827 | } | |
3828 | [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")] | |
3829 | ) | |
3830 | ||
3831 | (define_insn "aarch64_sqdmull2_laneq<mode>_internal" | |
3832 | [(set (match_operand:<VWIDE> 0 "register_operand" "=w") | |
3833 | (ss_ashift:<VWIDE> | |
3834 | (mult:<VWIDE> | |
3835 | (sign_extend:<VWIDE> | |
3836 | (vec_select:<VHALF> | |
3837 | (match_operand:VQ_HSI 1 "register_operand" "w") | |
3838 | (match_operand:VQ_HSI 4 "vect_par_cnst_hi_half" ""))) | |
3839 | (sign_extend:<VWIDE> | |
3840 | (vec_duplicate:<VHALF> | |
3841 | (vec_select:<VEL> | |
3842 | (match_operand:<VCONQ> 2 "register_operand" "<vwx>") | |
df401d54 | 3843 | (parallel [(match_operand:SI 3 "immediate_operand" "i")]))) |
3844 | )) | |
3845 | (const_int 1)))] | |
3846 | "TARGET_SIMD" | |
c034024c | 3847 | { |
3848 | operands[3] = GEN_INT (ENDIAN_LANE_N (<VCONQ>mode, INTVAL (operands[3]))); | |
3849 | return "sqdmull2\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[%3]"; | |
3850 | } | |
52e95e58 | 3851 | [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")] |
df401d54 | 3852 | ) |
3853 | ||
3854 | (define_expand "aarch64_sqdmull2_lane<mode>" | |
3855 | [(match_operand:<VWIDE> 0 "register_operand" "=w") | |
3856 | (match_operand:VQ_HSI 1 "register_operand" "w") | |
c7a2a326 | 3857 | (match_operand:<VCOND> 2 "register_operand" "<vwx>") |
df401d54 | 3858 | (match_operand:SI 3 "immediate_operand" "i")] |
3859 | "TARGET_SIMD" | |
3860 | { | |
3861 | rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, true); | |
df401d54 | 3862 | emit_insn (gen_aarch64_sqdmull2_lane<mode>_internal (operands[0], operands[1], |
3863 | operands[2], operands[3], | |
3864 | p)); | |
3865 | DONE; | |
3866 | }) | |
3867 | ||
3868 | (define_expand "aarch64_sqdmull2_laneq<mode>" | |
3869 | [(match_operand:<VWIDE> 0 "register_operand" "=w") | |
3870 | (match_operand:VQ_HSI 1 "register_operand" "w") | |
c7a2a326 | 3871 | (match_operand:<VCONQ> 2 "register_operand" "<vwx>") |
df401d54 | 3872 | (match_operand:SI 3 "immediate_operand" "i")] |
3873 | "TARGET_SIMD" | |
3874 | { | |
3875 | rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, true); | |
c7a2a326 | 3876 | emit_insn (gen_aarch64_sqdmull2_laneq<mode>_internal (operands[0], operands[1], |
df401d54 | 3877 | operands[2], operands[3], |
3878 | p)); | |
3879 | DONE; | |
3880 | }) | |
3881 | ||
3882 | ;; vqdmull2_n | |
3883 | ||
3884 | (define_insn "aarch64_sqdmull2_n<mode>_internal" | |
3885 | [(set (match_operand:<VWIDE> 0 "register_operand" "=w") | |
3886 | (ss_ashift:<VWIDE> | |
3887 | (mult:<VWIDE> | |
3888 | (sign_extend:<VWIDE> | |
3889 | (vec_select:<VHALF> | |
3890 | (match_operand:VQ_HSI 1 "register_operand" "w") | |
3891 | (match_operand:VQ_HSI 3 "vect_par_cnst_hi_half" ""))) | |
3892 | (sign_extend:<VWIDE> | |
3893 | (vec_duplicate:<VHALF> | |
9d37257a | 3894 | (match_operand:<VEL> 2 "register_operand" "<vwx>"))) |
df401d54 | 3895 | ) |
3896 | (const_int 1)))] | |
3897 | "TARGET_SIMD" | |
3898 | "sqdmull2\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[0]" | |
52e95e58 | 3899 | [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")] |
df401d54 | 3900 | ) |
3901 | ||
3902 | (define_expand "aarch64_sqdmull2_n<mode>" | |
3903 | [(match_operand:<VWIDE> 0 "register_operand" "=w") | |
3904 | (match_operand:VQ_HSI 1 "register_operand" "w") | |
3905 | (match_operand:<VEL> 2 "register_operand" "w")] | |
3906 | "TARGET_SIMD" | |
3907 | { | |
3908 | rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, true); | |
3909 | emit_insn (gen_aarch64_sqdmull2_n<mode>_internal (operands[0], operands[1], | |
3910 | operands[2], p)); | |
3911 | DONE; | |
3912 | }) | |
3913 | ||
3914 | ;; vshl | |
3915 | ||
3916 | (define_insn "aarch64_<sur>shl<mode>" | |
3917 | [(set (match_operand:VSDQ_I_DI 0 "register_operand" "=w") | |
3918 | (unspec:VSDQ_I_DI | |
3919 | [(match_operand:VSDQ_I_DI 1 "register_operand" "w") | |
3920 | (match_operand:VSDQ_I_DI 2 "register_operand" "w")] | |
3921 | VSHL))] | |
3922 | "TARGET_SIMD" | |
3923 | "<sur>shl\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"; | |
52e95e58 | 3924 | [(set_attr "type" "neon_shift_reg<q>")] |
df401d54 | 3925 | ) |
3926 | ||
3927 | ||
3928 | ;; vqshl | |
3929 | ||
3930 | (define_insn "aarch64_<sur>q<r>shl<mode>" | |
3931 | [(set (match_operand:VSDQ_I 0 "register_operand" "=w") | |
3932 | (unspec:VSDQ_I | |
3933 | [(match_operand:VSDQ_I 1 "register_operand" "w") | |
3934 | (match_operand:VSDQ_I 2 "register_operand" "w")] | |
3935 | VQSHL))] | |
3936 | "TARGET_SIMD" | |
3937 | "<sur>q<r>shl\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"; | |
52e95e58 | 3938 | [(set_attr "type" "neon_sat_shift_reg<q>")] |
df401d54 | 3939 | ) |
3940 | ||
df401d54 | 3941 | ;; vshll_n |
3942 | ||
3943 | (define_insn "aarch64_<sur>shll_n<mode>" | |
3944 | [(set (match_operand:<VWIDE> 0 "register_operand" "=w") | |
658fa7f6 | 3945 | (unspec:<VWIDE> [(match_operand:VD_BHSI 1 "register_operand" "w") |
a92326b6 | 3946 | (match_operand:SI 2 |
3947 | "aarch64_simd_shift_imm_bitsize_<ve_mode>" "i")] | |
df401d54 | 3948 | VSHLL))] |
3949 | "TARGET_SIMD" | |
3950 | "* | |
3951 | int bit_width = GET_MODE_UNIT_SIZE (<MODE>mode) * BITS_PER_UNIT; | |
df401d54 | 3952 | if (INTVAL (operands[2]) == bit_width) |
3953 | { | |
3954 | return \"shll\\t%0.<Vwtype>, %1.<Vtype>, %2\"; | |
3955 | } | |
3956 | else { | |
3957 | return \"<sur>shll\\t%0.<Vwtype>, %1.<Vtype>, %2\"; | |
3958 | }" | |
52e95e58 | 3959 | [(set_attr "type" "neon_shift_imm_long")] |
df401d54 | 3960 | ) |
3961 | ||
3962 | ;; vshll_high_n | |
3963 | ||
3964 | (define_insn "aarch64_<sur>shll2_n<mode>" | |
3965 | [(set (match_operand:<VWIDE> 0 "register_operand" "=w") | |
3966 | (unspec:<VWIDE> [(match_operand:VQW 1 "register_operand" "w") | |
3967 | (match_operand:SI 2 "immediate_operand" "i")] | |
3968 | VSHLL))] | |
3969 | "TARGET_SIMD" | |
3970 | "* | |
3971 | int bit_width = GET_MODE_UNIT_SIZE (<MODE>mode) * BITS_PER_UNIT; | |
df401d54 | 3972 | if (INTVAL (operands[2]) == bit_width) |
3973 | { | |
3974 | return \"shll2\\t%0.<Vwtype>, %1.<Vtype>, %2\"; | |
3975 | } | |
3976 | else { | |
3977 | return \"<sur>shll2\\t%0.<Vwtype>, %1.<Vtype>, %2\"; | |
3978 | }" | |
52e95e58 | 3979 | [(set_attr "type" "neon_shift_imm_long")] |
df401d54 | 3980 | ) |
3981 | ||
df401d54 | 3982 | ;; vrshr_n |
3983 | ||
3984 | (define_insn "aarch64_<sur>shr_n<mode>" | |
3985 | [(set (match_operand:VSDQ_I_DI 0 "register_operand" "=w") | |
3986 | (unspec:VSDQ_I_DI [(match_operand:VSDQ_I_DI 1 "register_operand" "w") | |
a92326b6 | 3987 | (match_operand:SI 2 |
3988 | "aarch64_simd_shift_imm_offset_<ve_mode>" "i")] | |
df401d54 | 3989 | VRSHR_N))] |
3990 | "TARGET_SIMD" | |
a92326b6 | 3991 | "<sur>shr\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %2" |
52e95e58 | 3992 | [(set_attr "type" "neon_sat_shift_imm<q>")] |
df401d54 | 3993 | ) |
3994 | ||
3995 | ;; v(r)sra_n | |
3996 | ||
3997 | (define_insn "aarch64_<sur>sra_n<mode>" | |
3998 | [(set (match_operand:VSDQ_I_DI 0 "register_operand" "=w") | |
3999 | (unspec:VSDQ_I_DI [(match_operand:VSDQ_I_DI 1 "register_operand" "0") | |
4000 | (match_operand:VSDQ_I_DI 2 "register_operand" "w") | |
a92326b6 | 4001 | (match_operand:SI 3 |
4002 | "aarch64_simd_shift_imm_offset_<ve_mode>" "i")] | |
df401d54 | 4003 | VSRA))] |
4004 | "TARGET_SIMD" | |
a92326b6 | 4005 | "<sur>sra\\t%<v>0<Vmtype>, %<v>2<Vmtype>, %3" |
52e95e58 | 4006 | [(set_attr "type" "neon_shift_acc<q>")] |
df401d54 | 4007 | ) |
4008 | ||
4009 | ;; vs<lr>i_n | |
4010 | ||
4011 | (define_insn "aarch64_<sur>s<lr>i_n<mode>" | |
4012 | [(set (match_operand:VSDQ_I_DI 0 "register_operand" "=w") | |
4013 | (unspec:VSDQ_I_DI [(match_operand:VSDQ_I_DI 1 "register_operand" "0") | |
4014 | (match_operand:VSDQ_I_DI 2 "register_operand" "w") | |
a92326b6 | 4015 | (match_operand:SI 3 |
4016 | "aarch64_simd_shift_imm_<offsetlr><ve_mode>" "i")] | |
df401d54 | 4017 | VSLRI))] |
4018 | "TARGET_SIMD" | |
a92326b6 | 4019 | "s<lr>i\\t%<v>0<Vmtype>, %<v>2<Vmtype>, %3" |
52e95e58 | 4020 | [(set_attr "type" "neon_shift_imm<q>")] |
df401d54 | 4021 | ) |
4022 | ||
4023 | ;; vqshl(u) | |
4024 | ||
4025 | (define_insn "aarch64_<sur>qshl<u>_n<mode>" | |
4026 | [(set (match_operand:VSDQ_I 0 "register_operand" "=w") | |
4027 | (unspec:VSDQ_I [(match_operand:VSDQ_I 1 "register_operand" "w") | |
a92326b6 | 4028 | (match_operand:SI 2 |
4029 | "aarch64_simd_shift_imm_<ve_mode>" "i")] | |
df401d54 | 4030 | VQSHL_N))] |
4031 | "TARGET_SIMD" | |
a92326b6 | 4032 | "<sur>qshl<u>\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %2" |
52e95e58 | 4033 | [(set_attr "type" "neon_sat_shift_imm<q>")] |
df401d54 | 4034 | ) |
4035 | ||
4036 | ||
4037 | ;; vq(r)shr(u)n_n | |
4038 | ||
4039 | (define_insn "aarch64_<sur>q<r>shr<u>n_n<mode>" | |
4040 | [(set (match_operand:<VNARROWQ> 0 "register_operand" "=w") | |
4041 | (unspec:<VNARROWQ> [(match_operand:VSQN_HSDI 1 "register_operand" "w") | |
a92326b6 | 4042 | (match_operand:SI 2 |
4043 | "aarch64_simd_shift_imm_offset_<ve_mode>" "i")] | |
df401d54 | 4044 | VQSHRN_N))] |
4045 | "TARGET_SIMD" | |
a92326b6 | 4046 | "<sur>q<r>shr<u>n\\t%<vn2>0<Vmntype>, %<v>1<Vmtype>, %2" |
52e95e58 | 4047 | [(set_attr "type" "neon_sat_shift_imm_narrow_q")] |
df401d54 | 4048 | ) |
4049 | ||
4050 | ||
6ba1316b | 4051 | ;; cm(eq|ge|gt|lt|le) |
4052 | ;; Note, we have constraints for Dz and Z as different expanders | |
4053 | ;; have different ideas of what should be passed to this pattern. | |
df401d54 | 4054 | |
6ba1316b | 4055 | (define_insn "aarch64_cm<optab><mode>" |
df401d54 | 4056 | [(set (match_operand:<V_cmp_result> 0 "register_operand" "=w,w") |
6ba1316b | 4057 | (neg:<V_cmp_result> |
4058 | (COMPARISONS:<V_cmp_result> | |
658fa7f6 | 4059 | (match_operand:VDQ_I 1 "register_operand" "w,w") |
4060 | (match_operand:VDQ_I 2 "aarch64_simd_reg_or_zero" "w,ZDz") | |
6ba1316b | 4061 | )))] |
df401d54 | 4062 | "TARGET_SIMD" |
4063 | "@ | |
6ba1316b | 4064 | cm<n_optab>\t%<v>0<Vmtype>, %<v><cmp_1><Vmtype>, %<v><cmp_2><Vmtype> |
4065 | cm<optab>\t%<v>0<Vmtype>, %<v>1<Vmtype>, #0" | |
52e95e58 | 4066 | [(set_attr "type" "neon_compare<q>, neon_compare_zero<q>")] |
df401d54 | 4067 | ) |
4068 | ||
6ba1316b | 4069 | (define_insn_and_split "aarch64_cm<optab>di" |
4070 | [(set (match_operand:DI 0 "register_operand" "=w,w,r") | |
4071 | (neg:DI | |
4072 | (COMPARISONS:DI | |
4073 | (match_operand:DI 1 "register_operand" "w,w,r") | |
4074 | (match_operand:DI 2 "aarch64_simd_reg_or_zero" "w,ZDz,r") | |
b776525d | 4075 | ))) |
4076 | (clobber (reg:CC CC_REGNUM))] | |
6ba1316b | 4077 | "TARGET_SIMD" |
daebd468 | 4078 | "#" |
4079 | "reload_completed" | |
4080 | [(set (match_operand:DI 0 "register_operand") | |
4081 | (neg:DI | |
4082 | (COMPARISONS:DI | |
4083 | (match_operand:DI 1 "register_operand") | |
4084 | (match_operand:DI 2 "aarch64_simd_reg_or_zero") | |
4085 | )))] | |
6ba1316b | 4086 | { |
daebd468 | 4087 | /* If we are in the general purpose register file, |
4088 | we split to a sequence of comparison and store. */ | |
4089 | if (GP_REGNUM_P (REGNO (operands[0])) | |
4090 | && GP_REGNUM_P (REGNO (operands[1]))) | |
4091 | { | |
3754d046 | 4092 | machine_mode mode = SELECT_CC_MODE (<CMP>, operands[1], operands[2]); |
daebd468 | 4093 | rtx cc_reg = aarch64_gen_compare_reg (<CMP>, operands[1], operands[2]); |
4094 | rtx comparison = gen_rtx_<CMP> (mode, operands[1], operands[2]); | |
4095 | emit_insn (gen_cstoredi_neg (operands[0], comparison, cc_reg)); | |
4096 | DONE; | |
4097 | } | |
4098 | /* Otherwise, we expand to a similar pattern which does not | |
4099 | clobber CC_REGNUM. */ | |
6ba1316b | 4100 | } |
52e95e58 | 4101 | [(set_attr "type" "neon_compare, neon_compare_zero, multiple")] |
6ba1316b | 4102 | ) |
4103 | ||
daebd468 | 4104 | (define_insn "*aarch64_cm<optab>di" |
4105 | [(set (match_operand:DI 0 "register_operand" "=w,w") | |
4106 | (neg:DI | |
4107 | (COMPARISONS:DI | |
4108 | (match_operand:DI 1 "register_operand" "w,w") | |
4109 | (match_operand:DI 2 "aarch64_simd_reg_or_zero" "w,ZDz") | |
4110 | )))] | |
4111 | "TARGET_SIMD && reload_completed" | |
4112 | "@ | |
4113 | cm<n_optab>\t%d0, %d<cmp_1>, %d<cmp_2> | |
4114 | cm<optab>\t%d0, %d1, #0" | |
4115 | [(set_attr "type" "neon_compare, neon_compare_zero")] | |
4116 | ) | |
4117 | ||
6ba1316b | 4118 | ;; cm(hs|hi) |
df401d54 | 4119 | |
6ba1316b | 4120 | (define_insn "aarch64_cm<optab><mode>" |
df401d54 | 4121 | [(set (match_operand:<V_cmp_result> 0 "register_operand" "=w") |
6ba1316b | 4122 | (neg:<V_cmp_result> |
4123 | (UCOMPARISONS:<V_cmp_result> | |
658fa7f6 | 4124 | (match_operand:VDQ_I 1 "register_operand" "w") |
4125 | (match_operand:VDQ_I 2 "register_operand" "w") | |
6ba1316b | 4126 | )))] |
df401d54 | 4127 | "TARGET_SIMD" |
6ba1316b | 4128 | "cm<n_optab>\t%<v>0<Vmtype>, %<v><cmp_1><Vmtype>, %<v><cmp_2><Vmtype>" |
52e95e58 | 4129 | [(set_attr "type" "neon_compare<q>")] |
df401d54 | 4130 | ) |
4131 | ||
6ba1316b | 4132 | (define_insn_and_split "aarch64_cm<optab>di" |
4133 | [(set (match_operand:DI 0 "register_operand" "=w,r") | |
4134 | (neg:DI | |
4135 | (UCOMPARISONS:DI | |
4136 | (match_operand:DI 1 "register_operand" "w,r") | |
4137 | (match_operand:DI 2 "aarch64_simd_reg_or_zero" "w,r") | |
b776525d | 4138 | ))) |
4139 | (clobber (reg:CC CC_REGNUM))] | |
6ba1316b | 4140 | "TARGET_SIMD" |
daebd468 | 4141 | "#" |
4142 | "reload_completed" | |
4143 | [(set (match_operand:DI 0 "register_operand") | |
4144 | (neg:DI | |
4145 | (UCOMPARISONS:DI | |
4146 | (match_operand:DI 1 "register_operand") | |
4147 | (match_operand:DI 2 "aarch64_simd_reg_or_zero") | |
4148 | )))] | |
6ba1316b | 4149 | { |
daebd468 | 4150 | /* If we are in the general purpose register file, |
4151 | we split to a sequence of comparison and store. */ | |
4152 | if (GP_REGNUM_P (REGNO (operands[0])) | |
4153 | && GP_REGNUM_P (REGNO (operands[1]))) | |
4154 | { | |
3754d046 | 4155 | machine_mode mode = CCmode; |
daebd468 | 4156 | rtx cc_reg = aarch64_gen_compare_reg (<CMP>, operands[1], operands[2]); |
4157 | rtx comparison = gen_rtx_<CMP> (mode, operands[1], operands[2]); | |
4158 | emit_insn (gen_cstoredi_neg (operands[0], comparison, cc_reg)); | |
4159 | DONE; | |
4160 | } | |
4161 | /* Otherwise, we expand to a similar pattern which does not | |
4162 | clobber CC_REGNUM. */ | |
6ba1316b | 4163 | } |
daebd468 | 4164 | [(set_attr "type" "neon_compare,multiple")] |
4165 | ) | |
4166 | ||
4167 | (define_insn "*aarch64_cm<optab>di" | |
4168 | [(set (match_operand:DI 0 "register_operand" "=w") | |
4169 | (neg:DI | |
4170 | (UCOMPARISONS:DI | |
4171 | (match_operand:DI 1 "register_operand" "w") | |
4172 | (match_operand:DI 2 "aarch64_simd_reg_or_zero" "w") | |
4173 | )))] | |
4174 | "TARGET_SIMD && reload_completed" | |
4175 | "cm<n_optab>\t%d0, %d<cmp_1>, %d<cmp_2>" | |
4176 | [(set_attr "type" "neon_compare")] | |
6ba1316b | 4177 | ) |
1eb525c3 | 4178 | |
6ba1316b | 4179 | ;; cmtst |
4180 | ||
bead021f | 4181 | ;; Although neg (ne (and x y) 0) is the natural way of expressing a cmtst, |
4182 | ;; we don't have any insns using ne, and aarch64_vcond_internal outputs | |
4183 | ;; not (neg (eq (and x y) 0)) | |
4184 | ;; which is rewritten by simplify_rtx as | |
4185 | ;; plus (eq (and x y) 0) -1. | |
4186 | ||
6ba1316b | 4187 | (define_insn "aarch64_cmtst<mode>" |
4188 | [(set (match_operand:<V_cmp_result> 0 "register_operand" "=w") | |
bead021f | 4189 | (plus:<V_cmp_result> |
4190 | (eq:<V_cmp_result> | |
658fa7f6 | 4191 | (and:VDQ_I |
4192 | (match_operand:VDQ_I 1 "register_operand" "w") | |
4193 | (match_operand:VDQ_I 2 "register_operand" "w")) | |
4194 | (match_operand:VDQ_I 3 "aarch64_simd_imm_zero")) | |
bead021f | 4195 | (match_operand:<V_cmp_result> 4 "aarch64_simd_imm_minus_one"))) |
4196 | ] | |
6ba1316b | 4197 | "TARGET_SIMD" |
4198 | "cmtst\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>" | |
52e95e58 | 4199 | [(set_attr "type" "neon_tst<q>")] |
6ba1316b | 4200 | ) |
4201 | ||
4202 | (define_insn_and_split "aarch64_cmtstdi" | |
4203 | [(set (match_operand:DI 0 "register_operand" "=w,r") | |
4204 | (neg:DI | |
4205 | (ne:DI | |
4206 | (and:DI | |
4207 | (match_operand:DI 1 "register_operand" "w,r") | |
4208 | (match_operand:DI 2 "register_operand" "w,r")) | |
b776525d | 4209 | (const_int 0)))) |
4210 | (clobber (reg:CC CC_REGNUM))] | |
6ba1316b | 4211 | "TARGET_SIMD" |
daebd468 | 4212 | "#" |
4213 | "reload_completed" | |
4214 | [(set (match_operand:DI 0 "register_operand") | |
4215 | (neg:DI | |
4216 | (ne:DI | |
4217 | (and:DI | |
4218 | (match_operand:DI 1 "register_operand") | |
4219 | (match_operand:DI 2 "register_operand")) | |
4220 | (const_int 0))))] | |
6ba1316b | 4221 | { |
daebd468 | 4222 | /* If we are in the general purpose register file, |
4223 | we split to a sequence of comparison and store. */ | |
4224 | if (GP_REGNUM_P (REGNO (operands[0])) | |
4225 | && GP_REGNUM_P (REGNO (operands[1]))) | |
4226 | { | |
4227 | rtx and_tree = gen_rtx_AND (DImode, operands[1], operands[2]); | |
3754d046 | 4228 | machine_mode mode = SELECT_CC_MODE (NE, and_tree, const0_rtx); |
daebd468 | 4229 | rtx cc_reg = aarch64_gen_compare_reg (NE, and_tree, const0_rtx); |
4230 | rtx comparison = gen_rtx_NE (mode, and_tree, const0_rtx); | |
4231 | emit_insn (gen_cstoredi_neg (operands[0], comparison, cc_reg)); | |
4232 | DONE; | |
4233 | } | |
4234 | /* Otherwise, we expand to a similar pattern which does not | |
4235 | clobber CC_REGNUM. */ | |
6ba1316b | 4236 | } |
daebd468 | 4237 | [(set_attr "type" "neon_tst,multiple")] |
4238 | ) | |
4239 | ||
4240 | (define_insn "*aarch64_cmtstdi" | |
4241 | [(set (match_operand:DI 0 "register_operand" "=w") | |
4242 | (neg:DI | |
4243 | (ne:DI | |
4244 | (and:DI | |
4245 | (match_operand:DI 1 "register_operand" "w") | |
4246 | (match_operand:DI 2 "register_operand" "w")) | |
4247 | (const_int 0))))] | |
4248 | "TARGET_SIMD" | |
4249 | "cmtst\t%d0, %d1, %d2" | |
52e95e58 | 4250 | [(set_attr "type" "neon_tst")] |
6ba1316b | 4251 | ) |
4252 | ||
4253 | ;; fcm(eq|ge|gt|le|lt) | |
4254 | ||
4255 | (define_insn "aarch64_cm<optab><mode>" | |
1eb525c3 | 4256 | [(set (match_operand:<V_cmp_result> 0 "register_operand" "=w,w") |
6ba1316b | 4257 | (neg:<V_cmp_result> |
4258 | (COMPARISONS:<V_cmp_result> | |
4259 | (match_operand:VALLF 1 "register_operand" "w,w") | |
4260 | (match_operand:VALLF 2 "aarch64_simd_reg_or_zero" "w,YDz") | |
4261 | )))] | |
1eb525c3 | 4262 | "TARGET_SIMD" |
4263 | "@ | |
6ba1316b | 4264 | fcm<n_optab>\t%<v>0<Vmtype>, %<v><cmp_1><Vmtype>, %<v><cmp_2><Vmtype> |
4265 | fcm<optab>\t%<v>0<Vmtype>, %<v>1<Vmtype>, 0" | |
52e95e58 | 4266 | [(set_attr "type" "neon_fp_compare_<Vetype><q>")] |
1eb525c3 | 4267 | ) |
4268 | ||
3310ab7f | 4269 | ;; fac(ge|gt) |
4270 | ;; Note we can also handle what would be fac(le|lt) by | |
4271 | ;; generating fac(ge|gt). | |
4272 | ||
4273 | (define_insn "*aarch64_fac<optab><mode>" | |
4274 | [(set (match_operand:<V_cmp_result> 0 "register_operand" "=w") | |
4275 | (neg:<V_cmp_result> | |
4276 | (FAC_COMPARISONS:<V_cmp_result> | |
4277 | (abs:VALLF (match_operand:VALLF 1 "register_operand" "w")) | |
4278 | (abs:VALLF (match_operand:VALLF 2 "register_operand" "w")) | |
4279 | )))] | |
4280 | "TARGET_SIMD" | |
4281 | "fac<n_optab>\t%<v>0<Vmtype>, %<v><cmp_1><Vmtype>, %<v><cmp_2><Vmtype>" | |
52e95e58 | 4282 | [(set_attr "type" "neon_fp_compare_<Vetype><q>")] |
3310ab7f | 4283 | ) |
4284 | ||
df401d54 | 4285 | ;; addp |
4286 | ||
4287 | (define_insn "aarch64_addp<mode>" | |
4288 | [(set (match_operand:VD_BHSI 0 "register_operand" "=w") | |
4289 | (unspec:VD_BHSI | |
4290 | [(match_operand:VD_BHSI 1 "register_operand" "w") | |
4291 | (match_operand:VD_BHSI 2 "register_operand" "w")] | |
4292 | UNSPEC_ADDP))] | |
4293 | "TARGET_SIMD" | |
4294 | "addp\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>" | |
52e95e58 | 4295 | [(set_attr "type" "neon_reduc_add<q>")] |
df401d54 | 4296 | ) |
4297 | ||
4298 | (define_insn "aarch64_addpdi" | |
4299 | [(set (match_operand:DI 0 "register_operand" "=w") | |
4300 | (unspec:DI | |
4301 | [(match_operand:V2DI 1 "register_operand" "w")] | |
4302 | UNSPEC_ADDP))] | |
4303 | "TARGET_SIMD" | |
4304 | "addp\t%d0, %1.2d" | |
52e95e58 | 4305 | [(set_attr "type" "neon_reduc_add")] |
df401d54 | 4306 | ) |
4307 | ||
df401d54 | 4308 | ;; sqrt |
4309 | ||
4310 | (define_insn "sqrt<mode>2" | |
4311 | [(set (match_operand:VDQF 0 "register_operand" "=w") | |
4312 | (sqrt:VDQF (match_operand:VDQF 1 "register_operand" "w")))] | |
4313 | "TARGET_SIMD" | |
4314 | "fsqrt\\t%0.<Vtype>, %1.<Vtype>" | |
52e95e58 | 4315 | [(set_attr "type" "neon_fp_sqrt_<Vetype><q>")] |
df401d54 | 4316 | ) |
4317 | ||
df401d54 | 4318 | ;; Patterns for vector struct loads and stores. |
4319 | ||
a91cc579 | 4320 | (define_insn "aarch64_simd_ld2<mode>" |
df401d54 | 4321 | [(set (match_operand:OI 0 "register_operand" "=w") |
4322 | (unspec:OI [(match_operand:OI 1 "aarch64_simd_struct_operand" "Utv") | |
4323 | (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] | |
4324 | UNSPEC_LD2))] | |
4325 | "TARGET_SIMD" | |
4326 | "ld2\\t{%S0.<Vtype> - %T0.<Vtype>}, %1" | |
52e95e58 | 4327 | [(set_attr "type" "neon_load2_2reg<q>")] |
4328 | ) | |
df401d54 | 4329 | |
20bce7af | 4330 | (define_insn "aarch64_simd_ld2r<mode>" |
4331 | [(set (match_operand:OI 0 "register_operand" "=w") | |
fda7ad8b | 4332 | (unspec:OI [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv") |
20bce7af | 4333 | (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY) ] |
4334 | UNSPEC_LD2_DUP))] | |
4335 | "TARGET_SIMD" | |
4336 | "ld2r\\t{%S0.<Vtype> - %T0.<Vtype>}, %1" | |
4337 | [(set_attr "type" "neon_load2_all_lanes<q>")] | |
4338 | ) | |
4339 | ||
37a7d2c8 | 4340 | (define_insn "aarch64_vec_load_lanesoi_lane<mode>" |
4341 | [(set (match_operand:OI 0 "register_operand" "=w") | |
fda7ad8b | 4342 | (unspec:OI [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv") |
37a7d2c8 | 4343 | (match_operand:OI 2 "register_operand" "0") |
4344 | (match_operand:SI 3 "immediate_operand" "i") | |
0146686c | 4345 | (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY) ] |
37a7d2c8 | 4346 | UNSPEC_LD2_LANE))] |
4347 | "TARGET_SIMD" | |
0146686c | 4348 | { |
4349 | operands[3] = GEN_INT (ENDIAN_LANE_N (<MODE>mode, INTVAL (operands[3]))); | |
4350 | return "ld2\\t{%S0.<Vetype> - %T0.<Vetype>}[%3], %1"; | |
4351 | } | |
37a7d2c8 | 4352 | [(set_attr "type" "neon_load2_one_lane")] |
4353 | ) | |
4354 | ||
a91cc579 | 4355 | (define_expand "vec_load_lanesoi<mode>" |
4356 | [(set (match_operand:OI 0 "register_operand" "=w") | |
4357 | (unspec:OI [(match_operand:OI 1 "aarch64_simd_struct_operand" "Utv") | |
4358 | (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] | |
4359 | UNSPEC_LD2))] | |
4360 | "TARGET_SIMD" | |
4361 | { | |
4362 | if (BYTES_BIG_ENDIAN) | |
4363 | { | |
4364 | rtx tmp = gen_reg_rtx (OImode); | |
4365 | rtx mask = aarch64_reverse_mask (<MODE>mode); | |
4366 | emit_insn (gen_aarch64_simd_ld2<mode> (tmp, operands[1])); | |
4367 | emit_insn (gen_aarch64_rev_reglistoi (operands[0], tmp, mask)); | |
4368 | } | |
4369 | else | |
4370 | emit_insn (gen_aarch64_simd_ld2<mode> (operands[0], operands[1])); | |
4371 | DONE; | |
4372 | }) | |
4373 | ||
4374 | (define_insn "aarch64_simd_st2<mode>" | |
df401d54 | 4375 | [(set (match_operand:OI 0 "aarch64_simd_struct_operand" "=Utv") |
4376 | (unspec:OI [(match_operand:OI 1 "register_operand" "w") | |
4377 | (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] | |
4378 | UNSPEC_ST2))] | |
4379 | "TARGET_SIMD" | |
4380 | "st2\\t{%S1.<Vtype> - %T1.<Vtype>}, %0" | |
52e95e58 | 4381 | [(set_attr "type" "neon_store2_2reg<q>")] |
4382 | ) | |
df401d54 | 4383 | |
59d865b5 | 4384 | ;; RTL uses GCC vector extension indices, so flip only for assembly. |
06bd64dc | 4385 | (define_insn "aarch64_vec_store_lanesoi_lane<mode>" |
fda7ad8b | 4386 | [(set (match_operand:BLK 0 "aarch64_simd_struct_operand" "=Utv") |
4387 | (unspec:BLK [(match_operand:OI 1 "register_operand" "w") | |
0146686c | 4388 | (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY) |
657d877a | 4389 | (match_operand:SI 2 "immediate_operand" "i")] |
0146686c | 4390 | UNSPEC_ST2_LANE))] |
657d877a | 4391 | "TARGET_SIMD" |
59d865b5 | 4392 | { |
4393 | operands[2] = GEN_INT (ENDIAN_LANE_N (<MODE>mode, INTVAL (operands[2]))); | |
4394 | return "st2\\t{%S1.<Vetype> - %T1.<Vetype>}[%2], %0"; | |
4395 | } | |
bf3bbdf8 | 4396 | [(set_attr "type" "neon_store2_one_lane<q>")] |
657d877a | 4397 | ) |
4398 | ||
a91cc579 | 4399 | (define_expand "vec_store_lanesoi<mode>" |
4400 | [(set (match_operand:OI 0 "aarch64_simd_struct_operand" "=Utv") | |
4401 | (unspec:OI [(match_operand:OI 1 "register_operand" "w") | |
4402 | (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] | |
4403 | UNSPEC_ST2))] | |
4404 | "TARGET_SIMD" | |
4405 | { | |
4406 | if (BYTES_BIG_ENDIAN) | |
4407 | { | |
4408 | rtx tmp = gen_reg_rtx (OImode); | |
4409 | rtx mask = aarch64_reverse_mask (<MODE>mode); | |
4410 | emit_insn (gen_aarch64_rev_reglistoi (tmp, operands[1], mask)); | |
4411 | emit_insn (gen_aarch64_simd_st2<mode> (operands[0], tmp)); | |
4412 | } | |
4413 | else | |
4414 | emit_insn (gen_aarch64_simd_st2<mode> (operands[0], operands[1])); | |
4415 | DONE; | |
4416 | }) | |
4417 | ||
4418 | (define_insn "aarch64_simd_ld3<mode>" | |
df401d54 | 4419 | [(set (match_operand:CI 0 "register_operand" "=w") |
4420 | (unspec:CI [(match_operand:CI 1 "aarch64_simd_struct_operand" "Utv") | |
4421 | (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] | |
4422 | UNSPEC_LD3))] | |
4423 | "TARGET_SIMD" | |
4424 | "ld3\\t{%S0.<Vtype> - %U0.<Vtype>}, %1" | |
52e95e58 | 4425 | [(set_attr "type" "neon_load3_3reg<q>")] |
4426 | ) | |
df401d54 | 4427 | |
20bce7af | 4428 | (define_insn "aarch64_simd_ld3r<mode>" |
4429 | [(set (match_operand:CI 0 "register_operand" "=w") | |
b630b9b5 | 4430 | (unspec:CI [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv") |
20bce7af | 4431 | (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY) ] |
4432 | UNSPEC_LD3_DUP))] | |
4433 | "TARGET_SIMD" | |
4434 | "ld3r\\t{%S0.<Vtype> - %U0.<Vtype>}, %1" | |
4435 | [(set_attr "type" "neon_load3_all_lanes<q>")] | |
4436 | ) | |
4437 | ||
37a7d2c8 | 4438 | (define_insn "aarch64_vec_load_lanesci_lane<mode>" |
4439 | [(set (match_operand:CI 0 "register_operand" "=w") | |
b630b9b5 | 4440 | (unspec:CI [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv") |
37a7d2c8 | 4441 | (match_operand:CI 2 "register_operand" "0") |
4442 | (match_operand:SI 3 "immediate_operand" "i") | |
0146686c | 4443 | (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] |
37a7d2c8 | 4444 | UNSPEC_LD3_LANE))] |
4445 | "TARGET_SIMD" | |
0146686c | 4446 | { |
4447 | operands[3] = GEN_INT (ENDIAN_LANE_N (<MODE>mode, INTVAL (operands[3]))); | |
4448 | return "ld3\\t{%S0.<Vetype> - %U0.<Vetype>}[%3], %1"; | |
4449 | } | |
37a7d2c8 | 4450 | [(set_attr "type" "neon_load3_one_lane")] |
4451 | ) | |
4452 | ||
a91cc579 | 4453 | (define_expand "vec_load_lanesci<mode>" |
4454 | [(set (match_operand:CI 0 "register_operand" "=w") | |
4455 | (unspec:CI [(match_operand:CI 1 "aarch64_simd_struct_operand" "Utv") | |
4456 | (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] | |
4457 | UNSPEC_LD3))] | |
4458 | "TARGET_SIMD" | |
4459 | { | |
4460 | if (BYTES_BIG_ENDIAN) | |
4461 | { | |
4462 | rtx tmp = gen_reg_rtx (CImode); | |
4463 | rtx mask = aarch64_reverse_mask (<MODE>mode); | |
4464 | emit_insn (gen_aarch64_simd_ld3<mode> (tmp, operands[1])); | |
4465 | emit_insn (gen_aarch64_rev_reglistci (operands[0], tmp, mask)); | |
4466 | } | |
4467 | else | |
4468 | emit_insn (gen_aarch64_simd_ld3<mode> (operands[0], operands[1])); | |
4469 | DONE; | |
4470 | }) | |
4471 | ||
4472 | (define_insn "aarch64_simd_st3<mode>" | |
df401d54 | 4473 | [(set (match_operand:CI 0 "aarch64_simd_struct_operand" "=Utv") |
4474 | (unspec:CI [(match_operand:CI 1 "register_operand" "w") | |
4475 | (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] | |
4476 | UNSPEC_ST3))] | |
4477 | "TARGET_SIMD" | |
4478 | "st3\\t{%S1.<Vtype> - %U1.<Vtype>}, %0" | |
52e95e58 | 4479 | [(set_attr "type" "neon_store3_3reg<q>")] |
4480 | ) | |
df401d54 | 4481 | |
59d865b5 | 4482 | ;; RTL uses GCC vector extension indices, so flip only for assembly. |
06bd64dc | 4483 | (define_insn "aarch64_vec_store_lanesci_lane<mode>" |
b630b9b5 | 4484 | [(set (match_operand:BLK 0 "aarch64_simd_struct_operand" "=Utv") |
4485 | (unspec:BLK [(match_operand:CI 1 "register_operand" "w") | |
4486 | (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY) | |
4487 | (match_operand:SI 2 "immediate_operand" "i")] | |
4488 | UNSPEC_ST3_LANE))] | |
657d877a | 4489 | "TARGET_SIMD" |
59d865b5 | 4490 | { |
4491 | operands[2] = GEN_INT (ENDIAN_LANE_N (<MODE>mode, INTVAL (operands[2]))); | |
4492 | return "st3\\t{%S1.<Vetype> - %U1.<Vetype>}[%2], %0"; | |
4493 | } | |
657d877a | 4494 | [(set_attr "type" "neon_store3_one_lane<q>")] |
4495 | ) | |
4496 | ||
a91cc579 | 4497 | (define_expand "vec_store_lanesci<mode>" |
4498 | [(set (match_operand:CI 0 "aarch64_simd_struct_operand" "=Utv") | |
4499 | (unspec:CI [(match_operand:CI 1 "register_operand" "w") | |
4500 | (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] | |
4501 | UNSPEC_ST3))] | |
4502 | "TARGET_SIMD" | |
4503 | { | |
4504 | if (BYTES_BIG_ENDIAN) | |
4505 | { | |
4506 | rtx tmp = gen_reg_rtx (CImode); | |
4507 | rtx mask = aarch64_reverse_mask (<MODE>mode); | |
4508 | emit_insn (gen_aarch64_rev_reglistci (tmp, operands[1], mask)); | |
4509 | emit_insn (gen_aarch64_simd_st3<mode> (operands[0], tmp)); | |
4510 | } | |
4511 | else | |
4512 | emit_insn (gen_aarch64_simd_st3<mode> (operands[0], operands[1])); | |
4513 | DONE; | |
4514 | }) | |
4515 | ||
4516 | (define_insn "aarch64_simd_ld4<mode>" | |
df401d54 | 4517 | [(set (match_operand:XI 0 "register_operand" "=w") |
4518 | (unspec:XI [(match_operand:XI 1 "aarch64_simd_struct_operand" "Utv") | |
4519 | (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] | |
4520 | UNSPEC_LD4))] | |
4521 | "TARGET_SIMD" | |
4522 | "ld4\\t{%S0.<Vtype> - %V0.<Vtype>}, %1" | |
52e95e58 | 4523 | [(set_attr "type" "neon_load4_4reg<q>")] |
4524 | ) | |
df401d54 | 4525 | |
20bce7af | 4526 | (define_insn "aarch64_simd_ld4r<mode>" |
4527 | [(set (match_operand:XI 0 "register_operand" "=w") | |
bdd0de51 | 4528 | (unspec:XI [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv") |
20bce7af | 4529 | (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY) ] |
4530 | UNSPEC_LD4_DUP))] | |
4531 | "TARGET_SIMD" | |
4532 | "ld4r\\t{%S0.<Vtype> - %V0.<Vtype>}, %1" | |
4533 | [(set_attr "type" "neon_load4_all_lanes<q>")] | |
4534 | ) | |
4535 | ||
37a7d2c8 | 4536 | (define_insn "aarch64_vec_load_lanesxi_lane<mode>" |
4537 | [(set (match_operand:XI 0 "register_operand" "=w") | |
bdd0de51 | 4538 | (unspec:XI [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv") |
37a7d2c8 | 4539 | (match_operand:XI 2 "register_operand" "0") |
4540 | (match_operand:SI 3 "immediate_operand" "i") | |
0146686c | 4541 | (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] |
37a7d2c8 | 4542 | UNSPEC_LD4_LANE))] |
4543 | "TARGET_SIMD" | |
0146686c | 4544 | { |
4545 | operands[3] = GEN_INT (ENDIAN_LANE_N (<MODE>mode, INTVAL (operands[3]))); | |
4546 | return "ld4\\t{%S0.<Vetype> - %V0.<Vetype>}[%3], %1"; | |
4547 | } | |
37a7d2c8 | 4548 | [(set_attr "type" "neon_load4_one_lane")] |
4549 | ) | |
4550 | ||
a91cc579 | 4551 | (define_expand "vec_load_lanesxi<mode>" |
4552 | [(set (match_operand:XI 0 "register_operand" "=w") | |
4553 | (unspec:XI [(match_operand:XI 1 "aarch64_simd_struct_operand" "Utv") | |
4554 | (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] | |
4555 | UNSPEC_LD4))] | |
4556 | "TARGET_SIMD" | |
4557 | { | |
4558 | if (BYTES_BIG_ENDIAN) | |
4559 | { | |
4560 | rtx tmp = gen_reg_rtx (XImode); | |
4561 | rtx mask = aarch64_reverse_mask (<MODE>mode); | |
4562 | emit_insn (gen_aarch64_simd_ld4<mode> (tmp, operands[1])); | |
4563 | emit_insn (gen_aarch64_rev_reglistxi (operands[0], tmp, mask)); | |
4564 | } | |
4565 | else | |
4566 | emit_insn (gen_aarch64_simd_ld4<mode> (operands[0], operands[1])); | |
4567 | DONE; | |
4568 | }) | |
4569 | ||
4570 | (define_insn "aarch64_simd_st4<mode>" | |
df401d54 | 4571 | [(set (match_operand:XI 0 "aarch64_simd_struct_operand" "=Utv") |
4572 | (unspec:XI [(match_operand:XI 1 "register_operand" "w") | |
4573 | (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] | |
4574 | UNSPEC_ST4))] | |
4575 | "TARGET_SIMD" | |
4576 | "st4\\t{%S1.<Vtype> - %V1.<Vtype>}, %0" | |
52e95e58 | 4577 | [(set_attr "type" "neon_store4_4reg<q>")] |
4578 | ) | |
df401d54 | 4579 | |
59d865b5 | 4580 | ;; RTL uses GCC vector extension indices, so flip only for assembly. |
06bd64dc | 4581 | (define_insn "aarch64_vec_store_lanesxi_lane<mode>" |
bdd0de51 | 4582 | [(set (match_operand:BLK 0 "aarch64_simd_struct_operand" "=Utv") |
4583 | (unspec:BLK [(match_operand:XI 1 "register_operand" "w") | |
4584 | (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY) | |
4585 | (match_operand:SI 2 "immediate_operand" "i")] | |
4586 | UNSPEC_ST4_LANE))] | |
657d877a | 4587 | "TARGET_SIMD" |
59d865b5 | 4588 | { |
4589 | operands[2] = GEN_INT (ENDIAN_LANE_N (<MODE>mode, INTVAL (operands[2]))); | |
4590 | return "st4\\t{%S1.<Vetype> - %V1.<Vetype>}[%2], %0"; | |
4591 | } | |
657d877a | 4592 | [(set_attr "type" "neon_store4_one_lane<q>")] |
4593 | ) | |
4594 | ||
a91cc579 | 4595 | (define_expand "vec_store_lanesxi<mode>" |
4596 | [(set (match_operand:XI 0 "aarch64_simd_struct_operand" "=Utv") | |
4597 | (unspec:XI [(match_operand:XI 1 "register_operand" "w") | |
4598 | (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] | |
4599 | UNSPEC_ST4))] | |
4600 | "TARGET_SIMD" | |
4601 | { | |
4602 | if (BYTES_BIG_ENDIAN) | |
4603 | { | |
4604 | rtx tmp = gen_reg_rtx (XImode); | |
4605 | rtx mask = aarch64_reverse_mask (<MODE>mode); | |
4606 | emit_insn (gen_aarch64_rev_reglistxi (tmp, operands[1], mask)); | |
4607 | emit_insn (gen_aarch64_simd_st4<mode> (operands[0], tmp)); | |
4608 | } | |
4609 | else | |
4610 | emit_insn (gen_aarch64_simd_st4<mode> (operands[0], operands[1])); | |
4611 | DONE; | |
4612 | }) | |
4613 | ||
4614 | (define_insn_and_split "aarch64_rev_reglist<mode>" | |
4615 | [(set (match_operand:VSTRUCT 0 "register_operand" "=&w") | |
4616 | (unspec:VSTRUCT | |
4617 | [(match_operand:VSTRUCT 1 "register_operand" "w") | |
4618 | (match_operand:V16QI 2 "register_operand" "w")] | |
4619 | UNSPEC_REV_REGLIST))] | |
4620 | "TARGET_SIMD" | |
4621 | "#" | |
4622 | "&& reload_completed" | |
4623 | [(const_int 0)] | |
4624 | { | |
4625 | int i; | |
4626 | int nregs = GET_MODE_SIZE (<MODE>mode) / UNITS_PER_VREG; | |
4627 | for (i = 0; i < nregs; i++) | |
4628 | { | |
4629 | rtx op0 = gen_rtx_REG (V16QImode, REGNO (operands[0]) + i); | |
4630 | rtx op1 = gen_rtx_REG (V16QImode, REGNO (operands[1]) + i); | |
4631 | emit_insn (gen_aarch64_tbl1v16qi (op0, op1, operands[2])); | |
4632 | } | |
4633 | DONE; | |
4634 | } | |
4635 | [(set_attr "type" "neon_tbl1_q") | |
4636 | (set_attr "length" "<insn_count>")] | |
4637 | ) | |
4638 | ||
df401d54 | 4639 | ;; Reload patterns for AdvSIMD register list operands. |
4640 | ||
4641 | (define_expand "mov<mode>" | |
8bcdf19e | 4642 | [(set (match_operand:VSTRUCT 0 "nonimmediate_operand" "") |
4643 | (match_operand:VSTRUCT 1 "general_operand" ""))] | |
df401d54 | 4644 | "TARGET_SIMD" |
4645 | { | |
4646 | if (can_create_pseudo_p ()) | |
4647 | { | |
4648 | if (GET_CODE (operands[0]) != REG) | |
4649 | operands[1] = force_reg (<MODE>mode, operands[1]); | |
4650 | } | |
4651 | }) | |
4652 | ||
4653 | (define_insn "*aarch64_mov<mode>" | |
4654 | [(set (match_operand:VSTRUCT 0 "aarch64_simd_nonimmediate_operand" "=w,Utv,w") | |
8bcdf19e | 4655 | (match_operand:VSTRUCT 1 "aarch64_simd_general_operand" " w,w,Utv"))] |
4656 | "TARGET_SIMD && !BYTES_BIG_ENDIAN | |
df401d54 | 4657 | && (register_operand (operands[0], <MODE>mode) |
4658 | || register_operand (operands[1], <MODE>mode))" | |
8bcdf19e | 4659 | "@ |
4660 | # | |
4661 | st1\\t{%S1.16b - %<Vendreg>1.16b}, %0 | |
4662 | ld1\\t{%S0.16b - %<Vendreg>0.16b}, %1" | |
4663 | [(set_attr "type" "multiple,neon_store<nregs>_<nregs>reg_q,\ | |
4664 | neon_load<nregs>_<nregs>reg_q") | |
52e95e58 | 4665 | (set (attr "length") (symbol_ref "aarch64_simd_attr_length_move (insn)"))] |
4666 | ) | |
df401d54 | 4667 | |
c2259249 | 4668 | (define_insn "aarch64_be_ld1<mode>" |
aea31229 | 4669 | [(set (match_operand:VALLDI_F16 0 "register_operand" "=w") |
4670 | (unspec:VALLDI_F16 [(match_operand:VALLDI_F16 1 | |
4671 | "aarch64_simd_struct_operand" "Utv")] | |
c2259249 | 4672 | UNSPEC_LD1))] |
4673 | "TARGET_SIMD" | |
4674 | "ld1\\t{%0<Vmtype>}, %1" | |
4675 | [(set_attr "type" "neon_load1_1reg<q>")] | |
4676 | ) | |
4677 | ||
4678 | (define_insn "aarch64_be_st1<mode>" | |
aea31229 | 4679 | [(set (match_operand:VALLDI_F16 0 "aarch64_simd_struct_operand" "=Utv") |
4680 | (unspec:VALLDI_F16 [(match_operand:VALLDI_F16 1 "register_operand" "w")] | |
c2259249 | 4681 | UNSPEC_ST1))] |
4682 | "TARGET_SIMD" | |
4683 | "st1\\t{%1<Vmtype>}, %0" | |
4684 | [(set_attr "type" "neon_store1_1reg<q>")] | |
4685 | ) | |
4686 | ||
8bcdf19e | 4687 | (define_insn "*aarch64_be_movoi" |
4688 | [(set (match_operand:OI 0 "nonimmediate_operand" "=w,m,w") | |
4689 | (match_operand:OI 1 "general_operand" " w,w,m"))] | |
4690 | "TARGET_SIMD && BYTES_BIG_ENDIAN | |
4691 | && (register_operand (operands[0], OImode) | |
4692 | || register_operand (operands[1], OImode))" | |
4693 | "@ | |
4694 | # | |
4695 | stp\\t%q1, %R1, %0 | |
4696 | ldp\\t%q0, %R0, %1" | |
648c8771 | 4697 | [(set_attr "type" "multiple,neon_stp_q,neon_ldp_q") |
8bcdf19e | 4698 | (set (attr "length") (symbol_ref "aarch64_simd_attr_length_move (insn)"))] |
4699 | ) | |
4700 | ||
4701 | (define_insn "*aarch64_be_movci" | |
4702 | [(set (match_operand:CI 0 "nonimmediate_operand" "=w,o,w") | |
4703 | (match_operand:CI 1 "general_operand" " w,w,o"))] | |
4704 | "TARGET_SIMD && BYTES_BIG_ENDIAN | |
4705 | && (register_operand (operands[0], CImode) | |
4706 | || register_operand (operands[1], CImode))" | |
4707 | "#" | |
4708 | [(set_attr "type" "multiple") | |
4709 | (set (attr "length") (symbol_ref "aarch64_simd_attr_length_move (insn)"))] | |
4710 | ) | |
4711 | ||
4712 | (define_insn "*aarch64_be_movxi" | |
4713 | [(set (match_operand:XI 0 "nonimmediate_operand" "=w,o,w") | |
4714 | (match_operand:XI 1 "general_operand" " w,w,o"))] | |
4715 | "TARGET_SIMD && BYTES_BIG_ENDIAN | |
4716 | && (register_operand (operands[0], XImode) | |
4717 | || register_operand (operands[1], XImode))" | |
4718 | "#" | |
4719 | [(set_attr "type" "multiple") | |
4720 | (set (attr "length") (symbol_ref "aarch64_simd_attr_length_move (insn)"))] | |
4721 | ) | |
4722 | ||
df401d54 | 4723 | (define_split |
8bcdf19e | 4724 | [(set (match_operand:OI 0 "register_operand") |
4725 | (match_operand:OI 1 "register_operand"))] | |
df401d54 | 4726 | "TARGET_SIMD && reload_completed" |
8bcdf19e | 4727 | [(const_int 0)] |
df401d54 | 4728 | { |
8bcdf19e | 4729 | aarch64_simd_emit_reg_reg_move (operands, TImode, 2); |
4730 | DONE; | |
df401d54 | 4731 | }) |
4732 | ||
4733 | (define_split | |
8bcdf19e | 4734 | [(set (match_operand:CI 0 "nonimmediate_operand") |
4735 | (match_operand:CI 1 "general_operand"))] | |
df401d54 | 4736 | "TARGET_SIMD && reload_completed" |
8bcdf19e | 4737 | [(const_int 0)] |
df401d54 | 4738 | { |
8bcdf19e | 4739 | if (register_operand (operands[0], CImode) |
4740 | && register_operand (operands[1], CImode)) | |
4741 | { | |
4742 | aarch64_simd_emit_reg_reg_move (operands, TImode, 3); | |
4743 | DONE; | |
4744 | } | |
4745 | else if (BYTES_BIG_ENDIAN) | |
4746 | { | |
4747 | emit_move_insn (simplify_gen_subreg (OImode, operands[0], CImode, 0), | |
4748 | simplify_gen_subreg (OImode, operands[1], CImode, 0)); | |
4749 | emit_move_insn (gen_lowpart (V16QImode, | |
4750 | simplify_gen_subreg (TImode, operands[0], | |
4751 | CImode, 32)), | |
4752 | gen_lowpart (V16QImode, | |
4753 | simplify_gen_subreg (TImode, operands[1], | |
4754 | CImode, 32))); | |
4755 | DONE; | |
4756 | } | |
4757 | else | |
4758 | FAIL; | |
df401d54 | 4759 | }) |
4760 | ||
4761 | (define_split | |
8bcdf19e | 4762 | [(set (match_operand:XI 0 "nonimmediate_operand") |
4763 | (match_operand:XI 1 "general_operand"))] | |
df401d54 | 4764 | "TARGET_SIMD && reload_completed" |
8bcdf19e | 4765 | [(const_int 0)] |
df401d54 | 4766 | { |
8bcdf19e | 4767 | if (register_operand (operands[0], XImode) |
4768 | && register_operand (operands[1], XImode)) | |
4769 | { | |
4770 | aarch64_simd_emit_reg_reg_move (operands, TImode, 4); | |
4771 | DONE; | |
4772 | } | |
4773 | else if (BYTES_BIG_ENDIAN) | |
4774 | { | |
4775 | emit_move_insn (simplify_gen_subreg (OImode, operands[0], XImode, 0), | |
4776 | simplify_gen_subreg (OImode, operands[1], XImode, 0)); | |
4777 | emit_move_insn (simplify_gen_subreg (OImode, operands[0], XImode, 32), | |
4778 | simplify_gen_subreg (OImode, operands[1], XImode, 32)); | |
4779 | DONE; | |
4780 | } | |
4781 | else | |
4782 | FAIL; | |
df401d54 | 4783 | }) |
4784 | ||
e788bb64 | 4785 | (define_expand "aarch64_ld<VSTRUCT:nregs>r<VALLDIF:mode>" |
4786 | [(match_operand:VSTRUCT 0 "register_operand" "=w") | |
20bce7af | 4787 | (match_operand:DI 1 "register_operand" "w") |
4788 | (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] | |
4789 | "TARGET_SIMD" | |
4790 | { | |
fda7ad8b | 4791 | rtx mem = gen_rtx_MEM (BLKmode, operands[1]); |
e788bb64 | 4792 | set_mem_size (mem, GET_MODE_SIZE (GET_MODE_INNER (<VALLDIF:MODE>mode)) |
4793 | * <VSTRUCT:nregs>); | |
20bce7af | 4794 | |
e788bb64 | 4795 | emit_insn (gen_aarch64_simd_ld<VSTRUCT:nregs>r<VALLDIF:mode> (operands[0], |
4796 | mem)); | |
20bce7af | 4797 | DONE; |
4798 | }) | |
4799 | ||
df401d54 | 4800 | (define_insn "aarch64_ld2<mode>_dreg" |
4801 | [(set (match_operand:OI 0 "register_operand" "=w") | |
4802 | (subreg:OI | |
4803 | (vec_concat:<VRL2> | |
4804 | (vec_concat:<VDBL> | |
c425c861 | 4805 | (unspec:VD |
4806 | [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")] | |
4807 | UNSPEC_LD2) | |
df401d54 | 4808 | (vec_duplicate:VD (const_int 0))) |
4809 | (vec_concat:<VDBL> | |
4810 | (unspec:VD [(match_dup 1)] | |
4811 | UNSPEC_LD2) | |
4812 | (vec_duplicate:VD (const_int 0)))) 0))] | |
4813 | "TARGET_SIMD" | |
4814 | "ld2\\t{%S0.<Vtype> - %T0.<Vtype>}, %1" | |
52e95e58 | 4815 | [(set_attr "type" "neon_load2_2reg<q>")] |
4816 | ) | |
df401d54 | 4817 | |
4818 | (define_insn "aarch64_ld2<mode>_dreg" | |
4819 | [(set (match_operand:OI 0 "register_operand" "=w") | |
4820 | (subreg:OI | |
4821 | (vec_concat:<VRL2> | |
4822 | (vec_concat:<VDBL> | |
c425c861 | 4823 | (unspec:DX |
4824 | [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")] | |
4825 | UNSPEC_LD2) | |
df401d54 | 4826 | (const_int 0)) |
4827 | (vec_concat:<VDBL> | |
4828 | (unspec:DX [(match_dup 1)] | |
4829 | UNSPEC_LD2) | |
4830 | (const_int 0))) 0))] | |
4831 | "TARGET_SIMD" | |
4832 | "ld1\\t{%S0.1d - %T0.1d}, %1" | |
52e95e58 | 4833 | [(set_attr "type" "neon_load1_2reg<q>")] |
4834 | ) | |
df401d54 | 4835 | |
4836 | (define_insn "aarch64_ld3<mode>_dreg" | |
4837 | [(set (match_operand:CI 0 "register_operand" "=w") | |
4838 | (subreg:CI | |
4839 | (vec_concat:<VRL3> | |
4840 | (vec_concat:<VRL2> | |
4841 | (vec_concat:<VDBL> | |
c425c861 | 4842 | (unspec:VD |
4843 | [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")] | |
4844 | UNSPEC_LD3) | |
df401d54 | 4845 | (vec_duplicate:VD (const_int 0))) |
4846 | (vec_concat:<VDBL> | |
4847 | (unspec:VD [(match_dup 1)] | |
4848 | UNSPEC_LD3) | |
4849 | (vec_duplicate:VD (const_int 0)))) | |
4850 | (vec_concat:<VDBL> | |
4851 | (unspec:VD [(match_dup 1)] | |
4852 | UNSPEC_LD3) | |
4853 | (vec_duplicate:VD (const_int 0)))) 0))] | |
4854 | "TARGET_SIMD" | |
4855 | "ld3\\t{%S0.<Vtype> - %U0.<Vtype>}, %1" | |
52e95e58 | 4856 | [(set_attr "type" "neon_load3_3reg<q>")] |
4857 | ) | |
df401d54 | 4858 | |
4859 | (define_insn "aarch64_ld3<mode>_dreg" | |
4860 | [(set (match_operand:CI 0 "register_operand" "=w") | |
4861 | (subreg:CI | |
4862 | (vec_concat:<VRL3> | |
4863 | (vec_concat:<VRL2> | |
4864 | (vec_concat:<VDBL> | |
c425c861 | 4865 | (unspec:DX |
4866 | [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")] | |
4867 | UNSPEC_LD3) | |
df401d54 | 4868 | (const_int 0)) |
4869 | (vec_concat:<VDBL> | |
4870 | (unspec:DX [(match_dup 1)] | |
4871 | UNSPEC_LD3) | |
4872 | (const_int 0))) | |
4873 | (vec_concat:<VDBL> | |
4874 | (unspec:DX [(match_dup 1)] | |
4875 | UNSPEC_LD3) | |
4876 | (const_int 0))) 0))] | |
4877 | "TARGET_SIMD" | |
4878 | "ld1\\t{%S0.1d - %U0.1d}, %1" | |
52e95e58 | 4879 | [(set_attr "type" "neon_load1_3reg<q>")] |
4880 | ) | |
df401d54 | 4881 | |
4882 | (define_insn "aarch64_ld4<mode>_dreg" | |
4883 | [(set (match_operand:XI 0 "register_operand" "=w") | |
4884 | (subreg:XI | |
4885 | (vec_concat:<VRL4> | |
4886 | (vec_concat:<VRL2> | |
4887 | (vec_concat:<VDBL> | |
c425c861 | 4888 | (unspec:VD |
4889 | [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")] | |
4890 | UNSPEC_LD4) | |
df401d54 | 4891 | (vec_duplicate:VD (const_int 0))) |
4892 | (vec_concat:<VDBL> | |
4893 | (unspec:VD [(match_dup 1)] | |
4894 | UNSPEC_LD4) | |
4895 | (vec_duplicate:VD (const_int 0)))) | |
4896 | (vec_concat:<VRL2> | |
4897 | (vec_concat:<VDBL> | |
4898 | (unspec:VD [(match_dup 1)] | |
4899 | UNSPEC_LD4) | |
4900 | (vec_duplicate:VD (const_int 0))) | |
4901 | (vec_concat:<VDBL> | |
4902 | (unspec:VD [(match_dup 1)] | |
4903 | UNSPEC_LD4) | |
4904 | (vec_duplicate:VD (const_int 0))))) 0))] | |
4905 | "TARGET_SIMD" | |
4906 | "ld4\\t{%S0.<Vtype> - %V0.<Vtype>}, %1" | |
52e95e58 | 4907 | [(set_attr "type" "neon_load4_4reg<q>")] |
4908 | ) | |
df401d54 | 4909 | |
4910 | (define_insn "aarch64_ld4<mode>_dreg" | |
4911 | [(set (match_operand:XI 0 "register_operand" "=w") | |
4912 | (subreg:XI | |
4913 | (vec_concat:<VRL4> | |
4914 | (vec_concat:<VRL2> | |
4915 | (vec_concat:<VDBL> | |
c425c861 | 4916 | (unspec:DX |
4917 | [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")] | |
4918 | UNSPEC_LD4) | |
df401d54 | 4919 | (const_int 0)) |
4920 | (vec_concat:<VDBL> | |
4921 | (unspec:DX [(match_dup 1)] | |
4922 | UNSPEC_LD4) | |
4923 | (const_int 0))) | |
4924 | (vec_concat:<VRL2> | |
4925 | (vec_concat:<VDBL> | |
4926 | (unspec:DX [(match_dup 1)] | |
4927 | UNSPEC_LD4) | |
4928 | (const_int 0)) | |
4929 | (vec_concat:<VDBL> | |
4930 | (unspec:DX [(match_dup 1)] | |
4931 | UNSPEC_LD4) | |
4932 | (const_int 0)))) 0))] | |
4933 | "TARGET_SIMD" | |
4934 | "ld1\\t{%S0.1d - %V0.1d}, %1" | |
52e95e58 | 4935 | [(set_attr "type" "neon_load1_4reg<q>")] |
4936 | ) | |
df401d54 | 4937 | |
4938 | (define_expand "aarch64_ld<VSTRUCT:nregs><VDC:mode>" | |
4939 | [(match_operand:VSTRUCT 0 "register_operand" "=w") | |
4940 | (match_operand:DI 1 "register_operand" "r") | |
4941 | (unspec:VDC [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] | |
4942 | "TARGET_SIMD" | |
4943 | { | |
c425c861 | 4944 | rtx mem = gen_rtx_MEM (BLKmode, operands[1]); |
4945 | set_mem_size (mem, <VSTRUCT:nregs> * 8); | |
df401d54 | 4946 | |
4947 | emit_insn (gen_aarch64_ld<VSTRUCT:nregs><VDC:mode>_dreg (operands[0], mem)); | |
4948 | DONE; | |
4949 | }) | |
4950 | ||
aea31229 | 4951 | (define_expand "aarch64_ld1<VALL_F16:mode>" |
4952 | [(match_operand:VALL_F16 0 "register_operand") | |
88c9a347 | 4953 | (match_operand:DI 1 "register_operand")] |
4954 | "TARGET_SIMD" | |
4955 | { | |
aea31229 | 4956 | machine_mode mode = <VALL_F16:MODE>mode; |
88c9a347 | 4957 | rtx mem = gen_rtx_MEM (mode, operands[1]); |
c2259249 | 4958 | |
4959 | if (BYTES_BIG_ENDIAN) | |
aea31229 | 4960 | emit_insn (gen_aarch64_be_ld1<VALL_F16:mode> (operands[0], mem)); |
c2259249 | 4961 | else |
4962 | emit_move_insn (operands[0], mem); | |
88c9a347 | 4963 | DONE; |
4964 | }) | |
4965 | ||
df401d54 | 4966 | (define_expand "aarch64_ld<VSTRUCT:nregs><VQ:mode>" |
4967 | [(match_operand:VSTRUCT 0 "register_operand" "=w") | |
4968 | (match_operand:DI 1 "register_operand" "r") | |
4969 | (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] | |
4970 | "TARGET_SIMD" | |
4971 | { | |
3754d046 | 4972 | machine_mode mode = <VSTRUCT:MODE>mode; |
df401d54 | 4973 | rtx mem = gen_rtx_MEM (mode, operands[1]); |
4974 | ||
5002f9a0 | 4975 | emit_insn (gen_aarch64_simd_ld<VSTRUCT:nregs><VQ:mode> (operands[0], mem)); |
df401d54 | 4976 | DONE; |
4977 | }) | |
4978 | ||
e788bb64 | 4979 | (define_expand "aarch64_ld<VSTRUCT:nregs>_lane<VALLDIF:mode>" |
4980 | [(match_operand:VSTRUCT 0 "register_operand" "=w") | |
37a7d2c8 | 4981 | (match_operand:DI 1 "register_operand" "w") |
e788bb64 | 4982 | (match_operand:VSTRUCT 2 "register_operand" "0") |
37a7d2c8 | 4983 | (match_operand:SI 3 "immediate_operand" "i") |
0146686c | 4984 | (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] |
37a7d2c8 | 4985 | "TARGET_SIMD" |
4986 | { | |
b630b9b5 | 4987 | rtx mem = gen_rtx_MEM (BLKmode, operands[1]); |
e788bb64 | 4988 | set_mem_size (mem, GET_MODE_SIZE (GET_MODE_INNER (<VALLDIF:MODE>mode)) |
4989 | * <VSTRUCT:nregs>); | |
4990 | ||
4991 | aarch64_simd_lane_bounds (operands[3], 0, | |
4992 | GET_MODE_NUNITS (<VALLDIF:MODE>mode), | |
4993 | NULL); | |
4994 | emit_insn (gen_aarch64_vec_load_lanes<VSTRUCT:mode>_lane<VALLDIF:mode> ( | |
4995 | operands[0], mem, operands[2], operands[3])); | |
37a7d2c8 | 4996 | DONE; |
4997 | }) | |
4998 | ||
df401d54 | 4999 | ;; Expanders for builtins to extract vector registers from large |
5000 | ;; opaque integer modes. | |
5001 | ||
5002 | ;; D-register list. | |
5003 | ||
5004 | (define_expand "aarch64_get_dreg<VSTRUCT:mode><VDC:mode>" | |
5005 | [(match_operand:VDC 0 "register_operand" "=w") | |
5006 | (match_operand:VSTRUCT 1 "register_operand" "w") | |
5007 | (match_operand:SI 2 "immediate_operand" "i")] | |
5008 | "TARGET_SIMD" | |
5009 | { | |
5010 | int part = INTVAL (operands[2]); | |
5011 | rtx temp = gen_reg_rtx (<VDC:VDBL>mode); | |
5012 | int offset = part * 16; | |
5013 | ||
5014 | emit_move_insn (temp, gen_rtx_SUBREG (<VDC:VDBL>mode, operands[1], offset)); | |
5015 | emit_move_insn (operands[0], gen_lowpart (<VDC:MODE>mode, temp)); | |
5016 | DONE; | |
5017 | }) | |
5018 | ||
5019 | ;; Q-register list. | |
5020 | ||
5021 | (define_expand "aarch64_get_qreg<VSTRUCT:mode><VQ:mode>" | |
5022 | [(match_operand:VQ 0 "register_operand" "=w") | |
5023 | (match_operand:VSTRUCT 1 "register_operand" "w") | |
5024 | (match_operand:SI 2 "immediate_operand" "i")] | |
5025 | "TARGET_SIMD" | |
5026 | { | |
5027 | int part = INTVAL (operands[2]); | |
5028 | int offset = part * 16; | |
5029 | ||
5030 | emit_move_insn (operands[0], | |
5031 | gen_rtx_SUBREG (<VQ:MODE>mode, operands[1], offset)); | |
5032 | DONE; | |
5033 | }) | |
5034 | ||
5035 | ;; Permuted-store expanders for neon intrinsics. | |
5036 | ||
5de1fcdb | 5037 | ;; Permute instructions |
5038 | ||
5039 | ;; vec_perm support | |
5040 | ||
5041 | (define_expand "vec_perm_const<mode>" | |
57887f75 | 5042 | [(match_operand:VALL_F16 0 "register_operand") |
5043 | (match_operand:VALL_F16 1 "register_operand") | |
5044 | (match_operand:VALL_F16 2 "register_operand") | |
5de1fcdb | 5045 | (match_operand:<V_cmp_result> 3)] |
5046 | "TARGET_SIMD" | |
5047 | { | |
5048 | if (aarch64_expand_vec_perm_const (operands[0], operands[1], | |
5049 | operands[2], operands[3])) | |
5050 | DONE; | |
5051 | else | |
5052 | FAIL; | |
5053 | }) | |
5054 | ||
5055 | (define_expand "vec_perm<mode>" | |
5056 | [(match_operand:VB 0 "register_operand") | |
5057 | (match_operand:VB 1 "register_operand") | |
5058 | (match_operand:VB 2 "register_operand") | |
5059 | (match_operand:VB 3 "register_operand")] | |
27cdb632 | 5060 | "TARGET_SIMD" |
5de1fcdb | 5061 | { |
5062 | aarch64_expand_vec_perm (operands[0], operands[1], | |
5063 | operands[2], operands[3]); | |
5064 | DONE; | |
5065 | }) | |
5066 | ||
5067 | (define_insn "aarch64_tbl1<mode>" | |
5068 | [(set (match_operand:VB 0 "register_operand" "=w") | |
5069 | (unspec:VB [(match_operand:V16QI 1 "register_operand" "w") | |
5070 | (match_operand:VB 2 "register_operand" "w")] | |
5071 | UNSPEC_TBL))] | |
5072 | "TARGET_SIMD" | |
5073 | "tbl\\t%0.<Vtype>, {%1.16b}, %2.<Vtype>" | |
52e95e58 | 5074 | [(set_attr "type" "neon_tbl1<q>")] |
5de1fcdb | 5075 | ) |
5076 | ||
5077 | ;; Two source registers. | |
5078 | ||
5079 | (define_insn "aarch64_tbl2v16qi" | |
5080 | [(set (match_operand:V16QI 0 "register_operand" "=w") | |
5081 | (unspec:V16QI [(match_operand:OI 1 "register_operand" "w") | |
5082 | (match_operand:V16QI 2 "register_operand" "w")] | |
5083 | UNSPEC_TBL))] | |
5084 | "TARGET_SIMD" | |
5085 | "tbl\\t%0.16b, {%S1.16b - %T1.16b}, %2.16b" | |
52e95e58 | 5086 | [(set_attr "type" "neon_tbl2_q")] |
5de1fcdb | 5087 | ) |
5088 | ||
b631bffd | 5089 | (define_insn "aarch64_tbl3<mode>" |
5090 | [(set (match_operand:VB 0 "register_operand" "=w") | |
5091 | (unspec:VB [(match_operand:OI 1 "register_operand" "w") | |
5092 | (match_operand:VB 2 "register_operand" "w")] | |
297c6263 | 5093 | UNSPEC_TBL))] |
5094 | "TARGET_SIMD" | |
b631bffd | 5095 | "tbl\\t%S0.<Vbtype>, {%S1.16b - %T1.16b}, %S2.<Vbtype>" |
297c6263 | 5096 | [(set_attr "type" "neon_tbl3")] |
5097 | ) | |
5098 | ||
b631bffd | 5099 | (define_insn "aarch64_tbx4<mode>" |
5100 | [(set (match_operand:VB 0 "register_operand" "=w") | |
5101 | (unspec:VB [(match_operand:VB 1 "register_operand" "0") | |
297c6263 | 5102 | (match_operand:OI 2 "register_operand" "w") |
b631bffd | 5103 | (match_operand:VB 3 "register_operand" "w")] |
5104 | UNSPEC_TBX))] | |
5105 | "TARGET_SIMD" | |
5106 | "tbx\\t%S0.<Vbtype>, {%S2.16b - %T2.16b}, %S3.<Vbtype>" | |
5107 | [(set_attr "type" "neon_tbl4")] | |
5108 | ) | |
5109 | ||
5110 | ;; Three source registers. | |
5111 | ||
5112 | (define_insn "aarch64_qtbl3<mode>" | |
5113 | [(set (match_operand:VB 0 "register_operand" "=w") | |
5114 | (unspec:VB [(match_operand:CI 1 "register_operand" "w") | |
5115 | (match_operand:VB 2 "register_operand" "w")] | |
5116 | UNSPEC_TBL))] | |
5117 | "TARGET_SIMD" | |
5118 | "tbl\\t%S0.<Vbtype>, {%S1.16b - %U1.16b}, %S2.<Vbtype>" | |
5119 | [(set_attr "type" "neon_tbl3")] | |
5120 | ) | |
5121 | ||
5122 | (define_insn "aarch64_qtbx3<mode>" | |
5123 | [(set (match_operand:VB 0 "register_operand" "=w") | |
5124 | (unspec:VB [(match_operand:VB 1 "register_operand" "0") | |
5125 | (match_operand:CI 2 "register_operand" "w") | |
5126 | (match_operand:VB 3 "register_operand" "w")] | |
5127 | UNSPEC_TBX))] | |
5128 | "TARGET_SIMD" | |
5129 | "tbx\\t%S0.<Vbtype>, {%S2.16b - %U2.16b}, %S3.<Vbtype>" | |
5130 | [(set_attr "type" "neon_tbl3")] | |
5131 | ) | |
5132 | ||
5133 | ;; Four source registers. | |
5134 | ||
5135 | (define_insn "aarch64_qtbl4<mode>" | |
5136 | [(set (match_operand:VB 0 "register_operand" "=w") | |
5137 | (unspec:VB [(match_operand:XI 1 "register_operand" "w") | |
5138 | (match_operand:VB 2 "register_operand" "w")] | |
5139 | UNSPEC_TBL))] | |
5140 | "TARGET_SIMD" | |
5141 | "tbl\\t%S0.<Vbtype>, {%S1.16b - %V1.16b}, %S2.<Vbtype>" | |
5142 | [(set_attr "type" "neon_tbl4")] | |
5143 | ) | |
5144 | ||
5145 | (define_insn "aarch64_qtbx4<mode>" | |
5146 | [(set (match_operand:VB 0 "register_operand" "=w") | |
5147 | (unspec:VB [(match_operand:VB 1 "register_operand" "0") | |
5148 | (match_operand:XI 2 "register_operand" "w") | |
5149 | (match_operand:VB 3 "register_operand" "w")] | |
297c6263 | 5150 | UNSPEC_TBX))] |
5151 | "TARGET_SIMD" | |
b631bffd | 5152 | "tbx\\t%S0.<Vbtype>, {%S2.16b - %V2.16b}, %S3.<Vbtype>" |
297c6263 | 5153 | [(set_attr "type" "neon_tbl4")] |
5154 | ) | |
5155 | ||
5de1fcdb | 5156 | (define_insn_and_split "aarch64_combinev16qi" |
5157 | [(set (match_operand:OI 0 "register_operand" "=w") | |
5158 | (unspec:OI [(match_operand:V16QI 1 "register_operand" "w") | |
5159 | (match_operand:V16QI 2 "register_operand" "w")] | |
5160 | UNSPEC_CONCAT))] | |
5161 | "TARGET_SIMD" | |
5162 | "#" | |
5163 | "&& reload_completed" | |
5164 | [(const_int 0)] | |
5165 | { | |
5166 | aarch64_split_combinev16qi (operands); | |
5167 | DONE; | |
5efe19ee | 5168 | } |
5169 | [(set_attr "type" "multiple")] | |
5170 | ) | |
5de1fcdb | 5171 | |
14799b23 | 5172 | (define_insn "aarch64_<PERMUTE:perm_insn><PERMUTE:perm_hilo><mode>" |
5173 | [(set (match_operand:VALL 0 "register_operand" "=w") | |
5174 | (unspec:VALL [(match_operand:VALL 1 "register_operand" "w") | |
5175 | (match_operand:VALL 2 "register_operand" "w")] | |
5176 | PERMUTE))] | |
5177 | "TARGET_SIMD" | |
5178 | "<PERMUTE:perm_insn><PERMUTE:perm_hilo>\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>" | |
52e95e58 | 5179 | [(set_attr "type" "neon_permute<q>")] |
14799b23 | 5180 | ) |
5181 | ||
582dc50a | 5182 | ;; Note immediate (third) operand is lane index not byte index. |
5183 | (define_insn "aarch64_ext<mode>" | |
5184 | [(set (match_operand:VALL 0 "register_operand" "=w") | |
5185 | (unspec:VALL [(match_operand:VALL 1 "register_operand" "w") | |
5186 | (match_operand:VALL 2 "register_operand" "w") | |
5187 | (match_operand:SI 3 "immediate_operand" "i")] | |
5188 | UNSPEC_EXT))] | |
5189 | "TARGET_SIMD" | |
5190 | { | |
5191 | operands[3] = GEN_INT (INTVAL (operands[3]) | |
6e256598 | 5192 | * GET_MODE_UNIT_SIZE (<MODE>mode)); |
582dc50a | 5193 | return "ext\\t%0.<Vbtype>, %1.<Vbtype>, %2.<Vbtype>, #%3"; |
5194 | } | |
5195 | [(set_attr "type" "neon_ext<q>")] | |
5196 | ) | |
5197 | ||
5f5fccf7 | 5198 | (define_insn "aarch64_rev<REVERSE:rev_op><mode>" |
5199 | [(set (match_operand:VALL 0 "register_operand" "=w") | |
5200 | (unspec:VALL [(match_operand:VALL 1 "register_operand" "w")] | |
5201 | REVERSE))] | |
5202 | "TARGET_SIMD" | |
5203 | "rev<REVERSE:rev_op>\\t%0.<Vtype>, %1.<Vtype>" | |
5204 | [(set_attr "type" "neon_rev<q>")] | |
5205 | ) | |
5206 | ||
df401d54 | 5207 | (define_insn "aarch64_st2<mode>_dreg" |
c425c861 | 5208 | [(set (match_operand:BLK 0 "aarch64_simd_struct_operand" "=Utv") |
5209 | (unspec:BLK [(match_operand:OI 1 "register_operand" "w") | |
df401d54 | 5210 | (unspec:VD [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] |
5211 | UNSPEC_ST2))] | |
5212 | "TARGET_SIMD" | |
5213 | "st2\\t{%S1.<Vtype> - %T1.<Vtype>}, %0" | |
52e95e58 | 5214 | [(set_attr "type" "neon_store2_2reg")] |
5215 | ) | |
df401d54 | 5216 | |
5217 | (define_insn "aarch64_st2<mode>_dreg" | |
c425c861 | 5218 | [(set (match_operand:BLK 0 "aarch64_simd_struct_operand" "=Utv") |
5219 | (unspec:BLK [(match_operand:OI 1 "register_operand" "w") | |
df401d54 | 5220 | (unspec:DX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] |
5221 | UNSPEC_ST2))] | |
5222 | "TARGET_SIMD" | |
5223 | "st1\\t{%S1.1d - %T1.1d}, %0" | |
52e95e58 | 5224 | [(set_attr "type" "neon_store1_2reg")] |
5225 | ) | |
df401d54 | 5226 | |
5227 | (define_insn "aarch64_st3<mode>_dreg" | |
c425c861 | 5228 | [(set (match_operand:BLK 0 "aarch64_simd_struct_operand" "=Utv") |
5229 | (unspec:BLK [(match_operand:CI 1 "register_operand" "w") | |
df401d54 | 5230 | (unspec:VD [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] |
5231 | UNSPEC_ST3))] | |
5232 | "TARGET_SIMD" | |
5233 | "st3\\t{%S1.<Vtype> - %U1.<Vtype>}, %0" | |
52e95e58 | 5234 | [(set_attr "type" "neon_store3_3reg")] |
5235 | ) | |
df401d54 | 5236 | |
5237 | (define_insn "aarch64_st3<mode>_dreg" | |
c425c861 | 5238 | [(set (match_operand:BLK 0 "aarch64_simd_struct_operand" "=Utv") |
5239 | (unspec:BLK [(match_operand:CI 1 "register_operand" "w") | |
df401d54 | 5240 | (unspec:DX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] |
5241 | UNSPEC_ST3))] | |
5242 | "TARGET_SIMD" | |
5243 | "st1\\t{%S1.1d - %U1.1d}, %0" | |
52e95e58 | 5244 | [(set_attr "type" "neon_store1_3reg")] |
5245 | ) | |
df401d54 | 5246 | |
5247 | (define_insn "aarch64_st4<mode>_dreg" | |
c425c861 | 5248 | [(set (match_operand:BLK 0 "aarch64_simd_struct_operand" "=Utv") |
5249 | (unspec:BLK [(match_operand:XI 1 "register_operand" "w") | |
df401d54 | 5250 | (unspec:VD [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] |
5251 | UNSPEC_ST4))] | |
5252 | "TARGET_SIMD" | |
5253 | "st4\\t{%S1.<Vtype> - %V1.<Vtype>}, %0" | |
52e95e58 | 5254 | [(set_attr "type" "neon_store4_4reg")] |
5255 | ) | |
df401d54 | 5256 | |
5257 | (define_insn "aarch64_st4<mode>_dreg" | |
c425c861 | 5258 | [(set (match_operand:BLK 0 "aarch64_simd_struct_operand" "=Utv") |
5259 | (unspec:BLK [(match_operand:XI 1 "register_operand" "w") | |
df401d54 | 5260 | (unspec:DX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] |
5261 | UNSPEC_ST4))] | |
5262 | "TARGET_SIMD" | |
5263 | "st1\\t{%S1.1d - %V1.1d}, %0" | |
52e95e58 | 5264 | [(set_attr "type" "neon_store1_4reg")] |
5265 | ) | |
df401d54 | 5266 | |
5267 | (define_expand "aarch64_st<VSTRUCT:nregs><VDC:mode>" | |
5268 | [(match_operand:DI 0 "register_operand" "r") | |
5269 | (match_operand:VSTRUCT 1 "register_operand" "w") | |
5270 | (unspec:VDC [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] | |
5271 | "TARGET_SIMD" | |
5272 | { | |
c425c861 | 5273 | rtx mem = gen_rtx_MEM (BLKmode, operands[0]); |
5274 | set_mem_size (mem, <VSTRUCT:nregs> * 8); | |
df401d54 | 5275 | |
5276 | emit_insn (gen_aarch64_st<VSTRUCT:nregs><VDC:mode>_dreg (mem, operands[1])); | |
5277 | DONE; | |
5278 | }) | |
5279 | ||
5280 | (define_expand "aarch64_st<VSTRUCT:nregs><VQ:mode>" | |
5281 | [(match_operand:DI 0 "register_operand" "r") | |
5282 | (match_operand:VSTRUCT 1 "register_operand" "w") | |
5283 | (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] | |
5284 | "TARGET_SIMD" | |
5285 | { | |
3754d046 | 5286 | machine_mode mode = <VSTRUCT:MODE>mode; |
df401d54 | 5287 | rtx mem = gen_rtx_MEM (mode, operands[0]); |
5288 | ||
5002f9a0 | 5289 | emit_insn (gen_aarch64_simd_st<VSTRUCT:nregs><VQ:mode> (mem, operands[1])); |
df401d54 | 5290 | DONE; |
5291 | }) | |
5292 | ||
e788bb64 | 5293 | (define_expand "aarch64_st<VSTRUCT:nregs>_lane<VALLDIF:mode>" |
657d877a | 5294 | [(match_operand:DI 0 "register_operand" "r") |
e788bb64 | 5295 | (match_operand:VSTRUCT 1 "register_operand" "w") |
0146686c | 5296 | (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY) |
657d877a | 5297 | (match_operand:SI 2 "immediate_operand")] |
5298 | "TARGET_SIMD" | |
5299 | { | |
bdd0de51 | 5300 | rtx mem = gen_rtx_MEM (BLKmode, operands[0]); |
e788bb64 | 5301 | set_mem_size (mem, GET_MODE_SIZE (GET_MODE_INNER (<VALLDIF:MODE>mode)) |
5302 | * <VSTRUCT:nregs>); | |
657d877a | 5303 | |
e788bb64 | 5304 | emit_insn (gen_aarch64_vec_store_lanes<VSTRUCT:mode>_lane<VALLDIF:mode> ( |
5305 | mem, operands[1], operands[2])); | |
657d877a | 5306 | DONE; |
5307 | }) | |
5308 | ||
aea31229 | 5309 | (define_expand "aarch64_st1<VALL_F16:mode>" |
88c9a347 | 5310 | [(match_operand:DI 0 "register_operand") |
aea31229 | 5311 | (match_operand:VALL_F16 1 "register_operand")] |
88c9a347 | 5312 | "TARGET_SIMD" |
5313 | { | |
aea31229 | 5314 | machine_mode mode = <VALL_F16:MODE>mode; |
88c9a347 | 5315 | rtx mem = gen_rtx_MEM (mode, operands[0]); |
c2259249 | 5316 | |
5317 | if (BYTES_BIG_ENDIAN) | |
aea31229 | 5318 | emit_insn (gen_aarch64_be_st1<VALL_F16:mode> (mem, operands[1])); |
c2259249 | 5319 | else |
5320 | emit_move_insn (mem, operands[1]); | |
88c9a347 | 5321 | DONE; |
5322 | }) | |
5323 | ||
df401d54 | 5324 | ;; Expander for builtins to insert vector registers into large |
5325 | ;; opaque integer modes. | |
5326 | ||
5327 | ;; Q-register list. We don't need a D-reg inserter as we zero | |
5328 | ;; extend them in arm_neon.h and insert the resulting Q-regs. | |
5329 | ||
5330 | (define_expand "aarch64_set_qreg<VSTRUCT:mode><VQ:mode>" | |
5331 | [(match_operand:VSTRUCT 0 "register_operand" "+w") | |
5332 | (match_operand:VSTRUCT 1 "register_operand" "0") | |
5333 | (match_operand:VQ 2 "register_operand" "w") | |
5334 | (match_operand:SI 3 "immediate_operand" "i")] | |
5335 | "TARGET_SIMD" | |
5336 | { | |
5337 | int part = INTVAL (operands[3]); | |
5338 | int offset = part * 16; | |
5339 | ||
5340 | emit_move_insn (operands[0], operands[1]); | |
5341 | emit_move_insn (gen_rtx_SUBREG (<VQ:MODE>mode, operands[0], offset), | |
5342 | operands[2]); | |
5343 | DONE; | |
5344 | }) | |
5345 | ||
501336f7 | 5346 | ;; Standard pattern name vec_init<mode>. |
5347 | ||
5348 | (define_expand "vec_init<mode>" | |
57887f75 | 5349 | [(match_operand:VALL_F16 0 "register_operand" "") |
501336f7 | 5350 | (match_operand 1 "" "")] |
5351 | "TARGET_SIMD" | |
5352 | { | |
5353 | aarch64_expand_vector_init (operands[0], operands[1]); | |
5354 | DONE; | |
5355 | }) | |
5356 | ||
df83fa4d | 5357 | (define_insn "*aarch64_simd_ld1r<mode>" |
57887f75 | 5358 | [(set (match_operand:VALL_F16 0 "register_operand" "=w") |
5359 | (vec_duplicate:VALL_F16 | |
df83fa4d | 5360 | (match_operand:<VEL> 1 "aarch64_simd_struct_operand" "Utv")))] |
5361 | "TARGET_SIMD" | |
5362 | "ld1r\\t{%0.<Vtype>}, %1" | |
52e95e58 | 5363 | [(set_attr "type" "neon_load1_all_lanes")] |
5364 | ) | |
7b990298 | 5365 | |
5366 | (define_insn "aarch64_frecpe<mode>" | |
5367 | [(set (match_operand:VDQF 0 "register_operand" "=w") | |
5368 | (unspec:VDQF [(match_operand:VDQF 1 "register_operand" "w")] | |
5369 | UNSPEC_FRECPE))] | |
5370 | "TARGET_SIMD" | |
5371 | "frecpe\\t%0.<Vtype>, %1.<Vtype>" | |
52e95e58 | 5372 | [(set_attr "type" "neon_fp_recpe_<Vetype><q>")] |
7b990298 | 5373 | ) |
5374 | ||
b5f423ee | 5375 | (define_insn "aarch64_frecp<FRECP:frecp_suffix><mode>" |
5376 | [(set (match_operand:GPF 0 "register_operand" "=w") | |
5377 | (unspec:GPF [(match_operand:GPF 1 "register_operand" "w")] | |
5378 | FRECP))] | |
5379 | "TARGET_SIMD" | |
5380 | "frecp<FRECP:frecp_suffix>\\t%<s>0, %<s>1" | |
52e95e58 | 5381 | [(set_attr "type" "neon_fp_recp<FRECP:frecp_suffix>_<GPF:Vetype><GPF:q>")] |
b5f423ee | 5382 | ) |
5383 | ||
7b990298 | 5384 | (define_insn "aarch64_frecps<mode>" |
b5f423ee | 5385 | [(set (match_operand:VALLF 0 "register_operand" "=w") |
5386 | (unspec:VALLF [(match_operand:VALLF 1 "register_operand" "w") | |
5387 | (match_operand:VALLF 2 "register_operand" "w")] | |
7b990298 | 5388 | UNSPEC_FRECPS))] |
5389 | "TARGET_SIMD" | |
b5f423ee | 5390 | "frecps\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>" |
52e95e58 | 5391 | [(set_attr "type" "neon_fp_recps_<Vetype><q>")] |
7b990298 | 5392 | ) |
5393 | ||
6fa49e69 | 5394 | (define_insn "aarch64_urecpe<mode>" |
5395 | [(set (match_operand:VDQ_SI 0 "register_operand" "=w") | |
5396 | (unspec:VDQ_SI [(match_operand:VDQ_SI 1 "register_operand" "w")] | |
5397 | UNSPEC_URECPE))] | |
5398 | "TARGET_SIMD" | |
5399 | "urecpe\\t%0.<Vtype>, %1.<Vtype>" | |
5400 | [(set_attr "type" "neon_fp_recpe_<Vetype><q>")]) | |
5401 | ||
8a0ffa1d | 5402 | ;; Standard pattern name vec_extract<mode>. |
5403 | ||
56234646 | 5404 | (define_expand "vec_extract<mode>" |
5405 | [(match_operand:<VEL> 0 "aarch64_simd_nonimmediate_operand" "") | |
57887f75 | 5406 | (match_operand:VALL_F16 1 "register_operand" "") |
56234646 | 5407 | (match_operand:SI 2 "immediate_operand" "")] |
8a0ffa1d | 5408 | "TARGET_SIMD" |
56234646 | 5409 | { |
5410 | emit_insn | |
5411 | (gen_aarch64_get_lane<mode> (operands[0], operands[1], operands[2])); | |
5412 | DONE; | |
5413 | }) | |
90513b03 | 5414 | |
5415 | ;; aes | |
5416 | ||
5417 | (define_insn "aarch64_crypto_aes<aes_op>v16qi" | |
5418 | [(set (match_operand:V16QI 0 "register_operand" "=w") | |
5419 | (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "0") | |
5420 | (match_operand:V16QI 2 "register_operand" "w")] | |
5421 | CRYPTO_AES))] | |
5422 | "TARGET_SIMD && TARGET_CRYPTO" | |
5423 | "aes<aes_op>\\t%0.16b, %2.16b" | |
31c72114 | 5424 | [(set_attr "type" "crypto_aese")] |
90513b03 | 5425 | ) |
5426 | ||
5427 | (define_insn "aarch64_crypto_aes<aesmc_op>v16qi" | |
5428 | [(set (match_operand:V16QI 0 "register_operand" "=w") | |
5429 | (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "w")] | |
5430 | CRYPTO_AESMC))] | |
5431 | "TARGET_SIMD && TARGET_CRYPTO" | |
5432 | "aes<aesmc_op>\\t%0.16b, %1.16b" | |
31c72114 | 5433 | [(set_attr "type" "crypto_aesmc")] |
90513b03 | 5434 | ) |
5435 | ||
feff02e4 | 5436 | ;; sha1 |
5437 | ||
5438 | (define_insn "aarch64_crypto_sha1hsi" | |
5439 | [(set (match_operand:SI 0 "register_operand" "=w") | |
5440 | (unspec:SI [(match_operand:SI 1 | |
5441 | "register_operand" "w")] | |
5442 | UNSPEC_SHA1H))] | |
5443 | "TARGET_SIMD && TARGET_CRYPTO" | |
5444 | "sha1h\\t%s0, %s1" | |
5445 | [(set_attr "type" "crypto_sha1_fast")] | |
5446 | ) | |
5447 | ||
5448 | (define_insn "aarch64_crypto_sha1su1v4si" | |
5449 | [(set (match_operand:V4SI 0 "register_operand" "=w") | |
5450 | (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0") | |
5451 | (match_operand:V4SI 2 "register_operand" "w")] | |
5452 | UNSPEC_SHA1SU1))] | |
5453 | "TARGET_SIMD && TARGET_CRYPTO" | |
5454 | "sha1su1\\t%0.4s, %2.4s" | |
5455 | [(set_attr "type" "crypto_sha1_fast")] | |
5456 | ) | |
5457 | ||
5458 | (define_insn "aarch64_crypto_sha1<sha1_op>v4si" | |
5459 | [(set (match_operand:V4SI 0 "register_operand" "=w") | |
5460 | (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0") | |
5461 | (match_operand:SI 2 "register_operand" "w") | |
5462 | (match_operand:V4SI 3 "register_operand" "w")] | |
5463 | CRYPTO_SHA1))] | |
5464 | "TARGET_SIMD && TARGET_CRYPTO" | |
5465 | "sha1<sha1_op>\\t%q0, %s2, %3.4s" | |
5466 | [(set_attr "type" "crypto_sha1_slow")] | |
5467 | ) | |
5468 | ||
5469 | (define_insn "aarch64_crypto_sha1su0v4si" | |
5470 | [(set (match_operand:V4SI 0 "register_operand" "=w") | |
5471 | (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0") | |
5472 | (match_operand:V4SI 2 "register_operand" "w") | |
5473 | (match_operand:V4SI 3 "register_operand" "w")] | |
5474 | UNSPEC_SHA1SU0))] | |
5475 | "TARGET_SIMD && TARGET_CRYPTO" | |
5476 | "sha1su0\\t%0.4s, %2.4s, %3.4s" | |
5477 | [(set_attr "type" "crypto_sha1_xor")] | |
5478 | ) | |
7462aa99 | 5479 | |
5480 | ;; sha256 | |
5481 | ||
5482 | (define_insn "aarch64_crypto_sha256h<sha256_op>v4si" | |
5483 | [(set (match_operand:V4SI 0 "register_operand" "=w") | |
5484 | (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0") | |
5485 | (match_operand:V4SI 2 "register_operand" "w") | |
5486 | (match_operand:V4SI 3 "register_operand" "w")] | |
5487 | CRYPTO_SHA256))] | |
5488 | "TARGET_SIMD && TARGET_CRYPTO" | |
5489 | "sha256h<sha256_op>\\t%q0, %q2, %3.4s" | |
5490 | [(set_attr "type" "crypto_sha256_slow")] | |
5491 | ) | |
5492 | ||
5493 | (define_insn "aarch64_crypto_sha256su0v4si" | |
5494 | [(set (match_operand:V4SI 0 "register_operand" "=w") | |
5495 | (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0") | |
5496 | (match_operand:V4SI 2 "register_operand" "w")] | |
5497 | UNSPEC_SHA256SU0))] | |
5498 | "TARGET_SIMD &&TARGET_CRYPTO" | |
5499 | "sha256su0\\t%0.4s, %2.4s" | |
5500 | [(set_attr "type" "crypto_sha256_fast")] | |
5501 | ) | |
5502 | ||
5503 | (define_insn "aarch64_crypto_sha256su1v4si" | |
5504 | [(set (match_operand:V4SI 0 "register_operand" "=w") | |
5505 | (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0") | |
5506 | (match_operand:V4SI 2 "register_operand" "w") | |
5507 | (match_operand:V4SI 3 "register_operand" "w")] | |
5508 | UNSPEC_SHA256SU1))] | |
5509 | "TARGET_SIMD &&TARGET_CRYPTO" | |
5510 | "sha256su1\\t%0.4s, %2.4s, %3.4s" | |
5511 | [(set_attr "type" "crypto_sha256_slow")] | |
5512 | ) | |
e45446c4 | 5513 | |
5514 | ;; pmull | |
5515 | ||
5516 | (define_insn "aarch64_crypto_pmulldi" | |
5517 | [(set (match_operand:TI 0 "register_operand" "=w") | |
5518 | (unspec:TI [(match_operand:DI 1 "register_operand" "w") | |
5519 | (match_operand:DI 2 "register_operand" "w")] | |
5520 | UNSPEC_PMULL))] | |
5521 | "TARGET_SIMD && TARGET_CRYPTO" | |
5522 | "pmull\\t%0.1q, %1.1d, %2.1d" | |
5523 | [(set_attr "type" "neon_mul_d_long")] | |
5524 | ) | |
5525 | ||
5526 | (define_insn "aarch64_crypto_pmullv2di" | |
5527 | [(set (match_operand:TI 0 "register_operand" "=w") | |
5528 | (unspec:TI [(match_operand:V2DI 1 "register_operand" "w") | |
5529 | (match_operand:V2DI 2 "register_operand" "w")] | |
5530 | UNSPEC_PMULL2))] | |
5531 | "TARGET_SIMD && TARGET_CRYPTO" | |
5532 | "pmull2\\t%0.1q, %1.2d, %2.2d" | |
5533 | [(set_attr "type" "neon_mul_d_long")] | |
5534 | ) |