]> git.ipfire.org Git - thirdparty/gcc.git/blame - gcc/config/i386/sse.md
rtl.h (always_void_p): New function.
[thirdparty/gcc.git] / gcc / config / i386 / sse.md
CommitLineData
ef719a44 1;; GCC machine description for SSE instructions
5624e564 2;; Copyright (C) 2005-2015 Free Software Foundation, Inc.
ef719a44
RH
3;;
4;; This file is part of GCC.
5;;
6;; GCC is free software; you can redistribute it and/or modify
7;; it under the terms of the GNU General Public License as published by
2f83c7d6 8;; the Free Software Foundation; either version 3, or (at your option)
ef719a44
RH
9;; any later version.
10;;
11;; GCC is distributed in the hope that it will be useful,
12;; but WITHOUT ANY WARRANTY; without even the implied warranty of
13;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14;; GNU General Public License for more details.
15;;
16;; You should have received a copy of the GNU General Public License
2f83c7d6
NC
17;; along with GCC; see the file COPYING3. If not see
18;; <http://www.gnu.org/licenses/>.
ef719a44 19
dc9945a4
UB
20(define_c_enum "unspec" [
21 ;; SSE
22 UNSPEC_MOVNT
860f5e77
UB
23 UNSPEC_LOADU
24 UNSPEC_STOREU
dc9945a4
UB
25
26 ;; SSE3
27 UNSPEC_LDDQU
28
29 ;; SSSE3
30 UNSPEC_PSHUFB
31 UNSPEC_PSIGN
32 UNSPEC_PALIGNR
33
34 ;; For SSE4A support
35 UNSPEC_EXTRQI
36 UNSPEC_EXTRQ
37 UNSPEC_INSERTQI
38 UNSPEC_INSERTQ
39
40 ;; For SSE4.1 support
41 UNSPEC_BLENDV
42 UNSPEC_INSERTPS
43 UNSPEC_DP
44 UNSPEC_MOVNTDQA
45 UNSPEC_MPSADBW
46 UNSPEC_PHMINPOSUW
47 UNSPEC_PTEST
48
49 ;; For SSE4.2 support
50 UNSPEC_PCMPESTR
51 UNSPEC_PCMPISTR
52
53 ;; For FMA4 support
54 UNSPEC_FMADDSUB
55 UNSPEC_XOP_UNSIGNED_CMP
56 UNSPEC_XOP_TRUEFALSE
57 UNSPEC_XOP_PERMUTE
58 UNSPEC_FRCZ
59
60 ;; For AES support
61 UNSPEC_AESENC
62 UNSPEC_AESENCLAST
63 UNSPEC_AESDEC
64 UNSPEC_AESDECLAST
65 UNSPEC_AESIMC
66 UNSPEC_AESKEYGENASSIST
67
68 ;; For PCLMUL support
69 UNSPEC_PCLMUL
70
71 ;; For AVX support
72 UNSPEC_PCMP
73 UNSPEC_VPERMIL
74 UNSPEC_VPERMIL2
75 UNSPEC_VPERMIL2F128
76 UNSPEC_CAST
77 UNSPEC_VTESTP
78 UNSPEC_VCVTPH2PS
79 UNSPEC_VCVTPS2PH
80
81 ;; For AVX2 support
2ff5ea2d 82 UNSPEC_VPERMVAR
dc9945a4
UB
83 UNSPEC_VPERMTI
84 UNSPEC_GATHER
85 UNSPEC_VSIBADDR
ab931c71
AI
86
87 ;; For AVX512F support
88 UNSPEC_VPERMI2
89 UNSPEC_VPERMT2
47490470 90 UNSPEC_VPERMI2_MASK
c003c6d6 91 UNSPEC_UNSIGNED_FIX_NOTRUNC
0fe65b75
AI
92 UNSPEC_UNSIGNED_PCMP
93 UNSPEC_TESTM
94 UNSPEC_TESTNM
ab931c71 95 UNSPEC_SCATTER
afb4ac68
AI
96 UNSPEC_RCP14
97 UNSPEC_RSQRT14
98 UNSPEC_FIXUPIMM
99 UNSPEC_SCALEF
0fe65b75 100 UNSPEC_VTERNLOG
afb4ac68
AI
101 UNSPEC_GETEXP
102 UNSPEC_GETMANT
0fe65b75
AI
103 UNSPEC_ALIGN
104 UNSPEC_CONFLICT
47490470
AI
105 UNSPEC_COMPRESS
106 UNSPEC_COMPRESS_STORE
107 UNSPEC_EXPAND
0fe65b75
AI
108 UNSPEC_MASKED_EQ
109 UNSPEC_MASKED_GT
110
47490470
AI
111 ;; For embed. rounding feature
112 UNSPEC_EMBEDDED_ROUNDING
113
0fe65b75
AI
114 ;; For AVX512PF support
115 UNSPEC_GATHER_PREFETCH
116 UNSPEC_SCATTER_PREFETCH
afb4ac68
AI
117
118 ;; For AVX512ER support
119 UNSPEC_EXP2
120 UNSPEC_RCP28
121 UNSPEC_RSQRT28
c1618f82
AI
122
123 ;; For SHA support
124 UNSPEC_SHA1MSG1
125 UNSPEC_SHA1MSG2
126 UNSPEC_SHA1NEXTE
127 UNSPEC_SHA1RNDS4
128 UNSPEC_SHA256MSG1
129 UNSPEC_SHA256MSG2
130 UNSPEC_SHA256RNDS2
b9826286 131
41755b52 132 ;; For AVX512BW support
5f64b496
AI
133 UNSPEC_DBPSADBW
134 UNSPEC_PMADDUBSW512
ed3e611e 135 UNSPEC_PMADDWD512
41755b52
AI
136 UNSPEC_PSHUFHW
137 UNSPEC_PSHUFLW
2be4091a 138 UNSPEC_CVTINT2MASK
41755b52 139
b9826286
AI
140 ;; For AVX512DQ support
141 UNSPEC_REDUCE
142 UNSPEC_FPCLASS
143 UNSPEC_RANGE
4190ea38
IT
144
145 ;; For AVX512IFMA support
146 UNSPEC_VPMADD52LUQ
147 UNSPEC_VPMADD52HUQ
3dcc8af5
IT
148
149 ;; For AVX512VBMI support
150 UNSPEC_VPMULTISHIFT
dc9945a4
UB
151])
152
153(define_c_enum "unspecv" [
154 UNSPECV_LDMXCSR
155 UNSPECV_STMXCSR
156 UNSPECV_CLFLUSH
157 UNSPECV_MONITOR
158 UNSPECV_MWAIT
159 UNSPECV_VZEROALL
160 UNSPECV_VZEROUPPER
161])
162
e1faf150 163;; All vector modes including V?TImode, used in move patterns.
c7ecdec6 164(define_mode_iterator VMOVE
b86f6e9e
AI
165 [(V64QI "TARGET_AVX512F") (V32QI "TARGET_AVX") V16QI
166 (V32HI "TARGET_AVX512F") (V16HI "TARGET_AVX") V8HI
167 (V16SI "TARGET_AVX512F") (V8SI "TARGET_AVX") V4SI
168 (V8DI "TARGET_AVX512F") (V4DI "TARGET_AVX") V2DI
e0aacde4 169 (V4TI "TARGET_AVX512BW") (V2TI "TARGET_AVX") V1TI
b86f6e9e
AI
170 (V16SF "TARGET_AVX512F") (V8SF "TARGET_AVX") V4SF
171 (V8DF "TARGET_AVX512F") (V4DF "TARGET_AVX") V2DF])
ef719a44 172
7cbdc87d
KY
173;; All AVX-512{F,VL} vector modes. Supposed TARGET_AVX512F baseline.
174(define_mode_iterator V48_AVX512VL
175 [V16SI (V8SI "TARGET_AVX512VL") (V4SI "TARGET_AVX512VL")
176 V8DI (V4DI "TARGET_AVX512VL") (V2DI "TARGET_AVX512VL")
177 V16SF (V8SF "TARGET_AVX512VL") (V4SF "TARGET_AVX512VL")
178 V8DF (V4DF "TARGET_AVX512VL") (V2DF "TARGET_AVX512VL")])
179
180;; 1,2 byte AVX-512{BW,VL} vector modes. Supposed TARGET_AVX512BW baseline.
181(define_mode_iterator VI12_AVX512VL
182 [V64QI (V16QI "TARGET_AVX512VL") (V32QI "TARGET_AVX512VL")
183 V32HI (V16HI "TARGET_AVX512VL") (V8HI "TARGET_AVX512VL")])
e0aacde4 184
3dcc8af5
IT
185(define_mode_iterator VI1_AVX512VL
186 [V64QI (V16QI "TARGET_AVX512VL") (V32QI "TARGET_AVX512VL")])
187
6bec6c98
UB
188;; All vector modes
189(define_mode_iterator V
190 [(V32QI "TARGET_AVX") V16QI
191 (V16HI "TARGET_AVX") V8HI
ec5e777c
AI
192 (V16SI "TARGET_AVX512F") (V8SI "TARGET_AVX") V4SI
193 (V8DI "TARGET_AVX512F") (V4DI "TARGET_AVX") V2DI
194 (V16SF "TARGET_AVX512F") (V8SF "TARGET_AVX") V4SF
195 (V8DF "TARGET_AVX512F") (V4DF "TARGET_AVX") (V2DF "TARGET_SSE2")])
6bec6c98
UB
196
197;; All 128bit vector modes
198(define_mode_iterator V_128
199 [V16QI V8HI V4SI V2DI V4SF (V2DF "TARGET_SSE2")])
200
201;; All 256bit vector modes
202(define_mode_iterator V_256
203 [V32QI V16HI V8SI V4DI V8SF V4DF])
204
f62ce24f
AI
205;; All 512bit vector modes
206(define_mode_iterator V_512 [V64QI V32HI V16SI V8DI V16SF V8DF])
207
ec5e777c
AI
208;; All 256bit and 512bit vector modes
209(define_mode_iterator V_256_512
210 [V32QI V16HI V8SI V4DI V8SF V4DF
211 (V64QI "TARGET_AVX512F") (V32HI "TARGET_AVX512F") (V16SI "TARGET_AVX512F")
212 (V8DI "TARGET_AVX512F") (V16SF "TARGET_AVX512F") (V8DF "TARGET_AVX512F")])
213
07c0852e
UB
214;; All vector float modes
215(define_mode_iterator VF
b86f6e9e
AI
216 [(V16SF "TARGET_AVX512F") (V8SF "TARGET_AVX") V4SF
217 (V8DF "TARGET_AVX512F") (V4DF "TARGET_AVX") (V2DF "TARGET_SSE2")])
218
219;; 128- and 256-bit float vector modes
220(define_mode_iterator VF_128_256
6bec6c98
UB
221 [(V8SF "TARGET_AVX") V4SF
222 (V4DF "TARGET_AVX") (V2DF "TARGET_SSE2")])
07c0852e
UB
223
224;; All SFmode vector float modes
225(define_mode_iterator VF1
a9ccbba2
AI
226 [(V16SF "TARGET_AVX512F") (V8SF "TARGET_AVX") V4SF])
227
228;; 128- and 256-bit SF vector modes
229(define_mode_iterator VF1_128_256
6bec6c98 230 [(V8SF "TARGET_AVX") V4SF])
07c0852e 231
39012b09
AI
232(define_mode_iterator VF1_128_256VL
233 [V8SF (V4SF "TARGET_AVX512VL")])
234
07c0852e
UB
235;; All DFmode vector float modes
236(define_mode_iterator VF2
ec5e777c
AI
237 [(V8DF "TARGET_AVX512F") (V4DF "TARGET_AVX") V2DF])
238
239;; 128- and 256-bit DF vector modes
240(define_mode_iterator VF2_128_256
6bec6c98 241 [(V4DF "TARGET_AVX") V2DF])
07c0852e 242
ec5e777c 243(define_mode_iterator VF2_512_256
39012b09
AI
244 [(V8DF "TARGET_AVX512F") V4DF])
245
246(define_mode_iterator VF2_512_256VL
247 [V8DF (V4DF "TARGET_AVX512VL")])
ec5e777c 248
07c0852e
UB
249;; All 128bit vector float modes
250(define_mode_iterator VF_128
6bec6c98
UB
251 [V4SF (V2DF "TARGET_SSE2")])
252
253;; All 256bit vector float modes
254(define_mode_iterator VF_256
255 [V8SF V4DF])
07c0852e 256
b86f6e9e
AI
257;; All 512bit vector float modes
258(define_mode_iterator VF_512
259 [V16SF V8DF])
260
ca9b264e
AI
261(define_mode_iterator VI48_AVX512VL
262 [V16SI (V8SI "TARGET_AVX512VL") (V4SI "TARGET_AVX512VL")
263 V8DI (V4DI "TARGET_AVX512VL") (V2DI "TARGET_AVX512VL")])
264
e274629e
AI
265(define_mode_iterator VF_AVX512VL
266 [V16SF (V8SF "TARGET_AVX512VL") (V4SF "TARGET_AVX512VL")
267 V8DF (V4DF "TARGET_AVX512VL") (V2DF "TARGET_AVX512VL")])
268
3bcf35e7
AI
269(define_mode_iterator VF2_AVX512VL
270 [V8DF (V4DF "TARGET_AVX512VL") (V2DF "TARGET_AVX512VL")])
271
4769c826
AI
272(define_mode_iterator VF1_AVX512VL
273 [V16SF (V8SF "TARGET_AVX512VL") (V4SF "TARGET_AVX512VL")])
274
d8700b1c
UB
275;; All vector integer modes
276(define_mode_iterator VI
a9ccbba2 277 [(V16SI "TARGET_AVX512F") (V8DI "TARGET_AVX512F")
9945a432
AI
278 (V64QI "TARGET_AVX512BW") (V32QI "TARGET_AVX") V16QI
279 (V32HI "TARGET_AVX512BW") (V16HI "TARGET_AVX") V8HI
d8700b1c
UB
280 (V8SI "TARGET_AVX") V4SI
281 (V4DI "TARGET_AVX") V2DI])
282
1707583b 283(define_mode_iterator VI_AVX2
700e2919
AI
284 [(V64QI "TARGET_AVX512BW") (V32QI "TARGET_AVX2") V16QI
285 (V32HI "TARGET_AVX512BW") (V16HI "TARGET_AVX2") V8HI
a9ccbba2
AI
286 (V16SI "TARGET_AVX512F") (V8SI "TARGET_AVX2") V4SI
287 (V8DI "TARGET_AVX512F") (V4DI "TARGET_AVX2") V2DI])
1707583b 288
e81b8564
UB
289;; All QImode vector integer modes
290(define_mode_iterator VI1
291 [(V32QI "TARGET_AVX") V16QI])
292
ca9b264e
AI
293(define_mode_iterator VI_ULOADSTORE_BW_AVX512VL
294 [V64QI
295 V32HI (V8HI "TARGET_AVX512VL") (V16HI "TARGET_AVX512VL")])
296
297(define_mode_iterator VI_ULOADSTORE_F_AVX512VL
298 [V16SI (V8SI "TARGET_AVX512VL") (V4SI "TARGET_AVX512VL")
299 V8DI (V4DI "TARGET_AVX512VL") (V2DI "TARGET_AVX512VL")])
b86f6e9e 300
e81b8564
UB
301;; All DImode vector integer modes
302(define_mode_iterator VI8
a9ccbba2 303 [(V8DI "TARGET_AVX512F") (V4DI "TARGET_AVX") V2DI])
e81b8564 304
98725d44
AI
305(define_mode_iterator VI8_AVX512VL
306 [V8DI (V4DI "TARGET_AVX512VL") (V2DI "TARGET_AVX512VL")])
307
dc3b8d27
AI
308(define_mode_iterator VI8_256_512
309 [V8DI (V4DI "TARGET_AVX512VL")])
310
977e83a3
KY
311(define_mode_iterator VI1_AVX2
312 [(V32QI "TARGET_AVX2") V16QI])
313
f5db965f
IT
314(define_mode_iterator VI1_AVX512
315 [(V64QI "TARGET_AVX512BW") (V32QI "TARGET_AVX2") V16QI])
316
977e83a3 317(define_mode_iterator VI2_AVX2
ed3e611e 318 [(V32HI "TARGET_AVX512BW") (V16HI "TARGET_AVX2") V8HI])
977e83a3 319
3bdf6340
AI
320(define_mode_iterator VI2_AVX512F
321 [(V32HI "TARGET_AVX512F") (V16HI "TARGET_AVX2") V8HI])
322
50e60d7d
AI
323(define_mode_iterator VI4_AVX
324 [(V8SI "TARGET_AVX") V4SI])
325
977e83a3
KY
326(define_mode_iterator VI4_AVX2
327 [(V8SI "TARGET_AVX2") V4SI])
328
f5f41d88
AI
329(define_mode_iterator VI4_AVX512F
330 [(V16SI "TARGET_AVX512F") (V8SI "TARGET_AVX2") V4SI])
331
21c924ac
AI
332(define_mode_iterator VI4_AVX512VL
333 [V16SI (V8SI "TARGET_AVX512VL") (V4SI "TARGET_AVX512VL")])
334
335(define_mode_iterator VI48_AVX512F_AVX512VL
336 [V4SI V8SI (V16SI "TARGET_AVX512F")
337 (V2DI "TARGET_AVX512VL") (V4DI "TARGET_AVX512VL") (V8DI "TARGET_AVX512F")])
338
339(define_mode_iterator VI2_AVX512VL
340 [(V8HI "TARGET_AVX512VL") (V16HI "TARGET_AVX512VL") V32HI])
5348cff8 341
44f59829
AI
342(define_mode_iterator VI8_AVX2_AVX512BW
343 [(V8DI "TARGET_AVX512BW") (V4DI "TARGET_AVX2") V2DI])
344
977e83a3
KY
345(define_mode_iterator VI8_AVX2
346 [(V4DI "TARGET_AVX2") V2DI])
347
f5f41d88
AI
348(define_mode_iterator VI8_AVX2_AVX512F
349 [(V8DI "TARGET_AVX512F") (V4DI "TARGET_AVX2") V2DI])
350
4a90ee35
AI
351(define_mode_iterator VI4_128_8_256
352 [V4SI V4DI])
353
2e2206fa
AI
354;; All V8D* modes
355(define_mode_iterator V8FI
356 [V8DF V8DI])
357
358;; All V16S* modes
359(define_mode_iterator V16FI
360 [V16SF V16SI])
361
e1faf150 362;; ??? We should probably use TImode instead.
977e83a3 363(define_mode_iterator VIMAX_AVX2
98ee4d9b 364 [(V4TI "TARGET_AVX512BW") (V2TI "TARGET_AVX2") V1TI])
977e83a3 365
e1faf150 366;; ??? This should probably be dropped in favor of VIMAX_AVX2.
977e83a3 367(define_mode_iterator SSESCALARMODE
b99ba39a 368 [(V4TI "TARGET_AVX512BW") (V2TI "TARGET_AVX2") TI])
977e83a3
KY
369
370(define_mode_iterator VI12_AVX2
c9b17fa5
AI
371 [(V64QI "TARGET_AVX512BW") (V32QI "TARGET_AVX2") V16QI
372 (V32HI "TARGET_AVX512BW") (V16HI "TARGET_AVX2") V8HI])
977e83a3
KY
373
374(define_mode_iterator VI24_AVX2
375 [(V16HI "TARGET_AVX2") V8HI
376 (V8SI "TARGET_AVX2") V4SI])
377
3bdf6340
AI
378(define_mode_iterator VI124_AVX512F
379 [(V32QI "TARGET_AVX2") V16QI
380 (V32HI "TARGET_AVX512F") (V16HI "TARGET_AVX2") V8HI
381 (V16SI "TARGET_AVX512F") (V8SI "TARGET_AVX2") V4SI])
382
977e83a3
KY
383(define_mode_iterator VI124_AVX2
384 [(V32QI "TARGET_AVX2") V16QI
385 (V16HI "TARGET_AVX2") V8HI
386 (V8SI "TARGET_AVX2") V4SI])
387
3616dc70
AI
388(define_mode_iterator VI2_AVX2_AVX512BW
389 [(V32HI "TARGET_AVX512BW") (V16HI "TARGET_AVX2") V8HI])
390
391(define_mode_iterator VI48_AVX2
392 [(V8SI "TARGET_AVX2") V4SI
977e83a3
KY
393 (V4DI "TARGET_AVX2") V2DI])
394
e8d08206
AI
395(define_mode_iterator VI248_AVX2_8_AVX512F
396 [(V16HI "TARGET_AVX2") V8HI
397 (V8SI "TARGET_AVX2") V4SI
398 (V8DI "TARGET_AVX512F") (V4DI "TARGET_AVX2") V2DI])
399
28e9a294
AI
400(define_mode_iterator VI248_AVX512BW_AVX512VL
401 [(V32HI "TARGET_AVX512BW")
402 (V4DI "TARGET_AVX512VL") V16SI V8DI])
403
404;; Suppose TARGET_AVX512VL as baseline
405(define_mode_iterator VI24_AVX512BW_1
406 [(V16HI "TARGET_AVX512BW") (V8HI "TARGET_AVX512BW")
407 V8SI V4SI])
408
38f4b550
AI
409(define_mode_iterator VI48_AVX512F
410 [(V16SI "TARGET_AVX512F") V8SI V4SI
411 (V8DI "TARGET_AVX512F") V4DI V2DI])
977e83a3
KY
412
413(define_mode_iterator V48_AVX2
1707583b
UB
414 [V4SF V2DF
415 V8SF V4DF
977e83a3
KY
416 (V4SI "TARGET_AVX2") (V2DI "TARGET_AVX2")
417 (V8SI "TARGET_AVX2") (V4DI "TARGET_AVX2")])
418
8b994297
AI
419(define_mode_attr avx512
420 [(V16QI "avx512vl") (V32QI "avx512vl") (V64QI "avx512bw")
421 (V8HI "avx512vl") (V16HI "avx512vl") (V32HI "avx512bw")
422 (V4SI "avx512vl") (V8SI "avx512vl") (V16SI "avx512f")
423 (V2DI "avx512vl") (V4DI "avx512vl") (V8DI "avx512f")
424 (V4SF "avx512vl") (V8SF "avx512vl") (V16SF "avx512f")
425 (V2DF "avx512vl") (V4DF "avx512vl") (V8DF "avx512f")])
426
b86f6e9e
AI
427(define_mode_attr sse2_avx_avx512f
428 [(V16QI "sse2") (V32QI "avx") (V64QI "avx512f")
8b994297 429 (V8HI "avx512vl") (V16HI "avx512vl") (V32HI "avx512bw")
b86f6e9e 430 (V4SI "sse2") (V8SI "avx") (V16SI "avx512f")
8b994297 431 (V2DI "avx512vl") (V4DI "avx512vl") (V8DI "avx512f")
b86f6e9e
AI
432 (V16SF "avx512f") (V8SF "avx") (V4SF "avx")
433 (V8DF "avx512f") (V4DF "avx") (V2DF "avx")])
434
977e83a3 435(define_mode_attr sse2_avx2
8b994297
AI
436 [(V16QI "sse2") (V32QI "avx2") (V64QI "avx512bw")
437 (V8HI "sse2") (V16HI "avx2") (V32HI "avx512bw")
b86f6e9e
AI
438 (V4SI "sse2") (V8SI "avx2") (V16SI "avx512f")
439 (V2DI "sse2") (V4DI "avx2") (V8DI "avx512f")
8b994297 440 (V1TI "sse2") (V2TI "avx2") (V4TI "avx512bw")])
977e83a3
KY
441
442(define_mode_attr ssse3_avx2
8b994297
AI
443 [(V16QI "ssse3") (V32QI "avx2") (V64QI "avx512bw")
444 (V4HI "ssse3") (V8HI "ssse3") (V16HI "avx2") (V32HI "avx512bw")
977e83a3
KY
445 (V4SI "ssse3") (V8SI "avx2")
446 (V2DI "ssse3") (V4DI "avx2")
8b994297 447 (TI "ssse3") (V2TI "avx2") (V4TI "avx512bw")])
977e83a3
KY
448
449(define_mode_attr sse4_1_avx2
8b994297
AI
450 [(V16QI "sse4_1") (V32QI "avx2") (V64QI "avx512bw")
451 (V8HI "sse4_1") (V16HI "avx2") (V32HI "avx512bw")
b86f6e9e 452 (V4SI "sse4_1") (V8SI "avx2") (V16SI "avx512f")
8b994297 453 (V2DI "sse4_1") (V4DI "avx2") (V8DI "avx512dq")])
977e83a3
KY
454
455(define_mode_attr avx_avx2
456 [(V4SF "avx") (V2DF "avx")
457 (V8SF "avx") (V4DF "avx")
458 (V4SI "avx2") (V2DI "avx2")
459 (V8SI "avx2") (V4DI "avx2")])
460
f2289672
JJ
461(define_mode_attr vec_avx2
462 [(V16QI "vec") (V32QI "avx2")
463 (V8HI "vec") (V16HI "avx2")
464 (V4SI "vec") (V8SI "avx2")
465 (V2DI "vec") (V4DI "avx2")])
466
cf92ae7f 467(define_mode_attr avx2_avx512
8b994297
AI
468 [(V4SI "avx2") (V8SI "avx2") (V16SI "avx512f")
469 (V2DI "avx2") (V4DI "avx2") (V8DI "avx512f")
470 (V4SF "avx2") (V8SF "avx2") (V16SF "avx512f")
471 (V2DF "avx2") (V4DF "avx2") (V8DF "avx512f")
472 (V8HI "avx512vl") (V16HI "avx512vl") (V32HI "avx512bw")])
473
3f97cb0b
AI
474(define_mode_attr shuffletype
475 [(V16SF "f") (V16SI "i") (V8DF "f") (V8DI "i")
476 (V8SF "f") (V8SI "i") (V4DF "f") (V4DI "i")
477 (V4SF "f") (V4SI "i") (V2DF "f") (V2DI "i")
478 (V32QI "i") (V16HI "u") (V16QI "i") (V8HI "i")
479 (V64QI "i") (V1TI "i") (V2TI "i")])
480
2e2206fa
AI
481(define_mode_attr ssequartermode
482 [(V16SF "V4SF") (V8DF "V2DF") (V16SI "V4SI") (V8DI "V2DI")])
483
8b994297
AI
484(define_mode_attr ssedoublemodelower
485 [(V16QI "v16hi") (V32QI "v32hi") (V64QI "v64hi")
486 (V8HI "v8si") (V16HI "v16si") (V32HI "v32si")
487 (V4SI "v4di") (V8SI "v8di") (V16SI "v16di")])
488
977e83a3 489(define_mode_attr ssedoublemode
2e2206fa 490 [(V16SF "V32SF") (V16SI "V32SI") (V8DI "V16DI") (V8DF "V16DF")
8b994297
AI
491 (V8SF "V16SF") (V8SI "V16SI") (V4DI "V8DI") (V4DF "V8DF")
492 (V16HI "V16SI") (V8HI "V8SI") (V4HI "V4SI") (V4SI "V4DI")
493 (V32HI "V32SI") (V32QI "V32HI") (V16QI "V16HI") (V64QI "V64HI")])
977e83a3
KY
494
495(define_mode_attr ssebytemode
8b994297 496 [(V8DI "V64QI") (V4DI "V32QI") (V2DI "V16QI")])
977e83a3 497
798dd0ba
UB
498;; All 128bit vector integer modes
499(define_mode_iterator VI_128 [V16QI V8HI V4SI V2DI])
500
977e83a3
KY
501;; All 256bit vector integer modes
502(define_mode_iterator VI_256 [V32QI V16HI V8SI V4DI])
503
f62ce24f
AI
504;; All 512bit vector integer modes
505(define_mode_iterator VI_512 [V64QI V32HI V16SI V8DI])
506
507;; Various 128bit vector integer mode combinations
798dd0ba
UB
508(define_mode_iterator VI12_128 [V16QI V8HI])
509(define_mode_iterator VI14_128 [V16QI V4SI])
510(define_mode_iterator VI124_128 [V16QI V8HI V4SI])
511(define_mode_iterator VI24_128 [V8HI V4SI])
512(define_mode_iterator VI248_128 [V8HI V4SI V2DI])
ee3b466d 513(define_mode_iterator VI48_128 [V4SI V2DI])
07c0852e 514
e8d08206 515;; Various 256bit and 512 vector integer mode combinations
575d952c
AI
516(define_mode_iterator VI124_256 [V32QI V16HI V8SI])
517(define_mode_iterator VI124_256_AVX512F_AVX512BW
518 [V32QI V16HI V8SI
519 (V64QI "TARGET_AVX512BW")
520 (V32HI "TARGET_AVX512BW")
521 (V16SI "TARGET_AVX512F")])
ee3b466d 522(define_mode_iterator VI48_256 [V8SI V4DI])
0fe65b75 523(define_mode_iterator VI48_512 [V16SI V8DI])
e711dffd 524(define_mode_iterator VI4_256_8_512 [V8SI V8DI])
0ab03ea0
AI
525(define_mode_iterator VI_AVX512BW
526 [V16SI V8DI (V32HI "TARGET_AVX512BW") (V64QI "TARGET_AVX512BW")])
977e83a3 527
6bec6c98
UB
528;; Int-float size matches
529(define_mode_iterator VI4F_128 [V4SI V4SF])
530(define_mode_iterator VI8F_128 [V2DI V2DF])
531(define_mode_iterator VI4F_256 [V8SI V8SF])
532(define_mode_iterator VI8F_256 [V4DI V4DF])
16821545
AI
533(define_mode_iterator VI8F_256_512
534 [V4DI V4DF (V8DI "TARGET_AVX512F") (V8DF "TARGET_AVX512F")])
3c87b77b
AI
535(define_mode_iterator VI48F_256_512
536 [V8SI V8SF
537 (V16SI "TARGET_AVX512F") (V16SF "TARGET_AVX512F")
cf92ae7f
AI
538 (V8DI "TARGET_AVX512F") (V8DF "TARGET_AVX512F")
539 (V4DI "TARGET_AVX512VL") (V4DF "TARGET_AVX512VL")])
bf584ca0
AI
540(define_mode_iterator VF48_I1248
541 [V16SI V16SF V8DI V8DF V32HI V64QI])
f7be73c8
AI
542(define_mode_iterator VI48F
543 [V16SI V16SF V8DI V8DF
544 (V8SI "TARGET_AVX512VL") (V8SF "TARGET_AVX512VL")
545 (V4DI "TARGET_AVX512VL") (V4DF "TARGET_AVX512VL")
546 (V4SI "TARGET_AVX512VL") (V4SF "TARGET_AVX512VL")
547 (V2DI "TARGET_AVX512VL") (V2DF "TARGET_AVX512VL")])
0774c160 548(define_mode_iterator VI48F_256 [V8SI V8SF V4DI V4DF])
6bec6c98 549
8dfb9f16
UB
550;; Mapping from float mode to required SSE level
551(define_mode_attr sse
552 [(SF "sse") (DF "sse2")
553 (V4SF "sse") (V2DF "sse2")
b86f6e9e
AI
554 (V16SF "avx512f") (V8SF "avx")
555 (V8DF "avx512f") (V4DF "avx")])
8dfb9f16
UB
556
557(define_mode_attr sse2
b86f6e9e
AI
558 [(V16QI "sse2") (V32QI "avx") (V64QI "avx512f")
559 (V2DI "sse2") (V4DI "avx") (V8DI "avx512f")])
8dfb9f16
UB
560
561(define_mode_attr sse3
562 [(V16QI "sse3") (V32QI "avx")])
563
564(define_mode_attr sse4_1
565 [(V4SF "sse4_1") (V2DF "sse4_1")
b86f6e9e
AI
566 (V8SF "avx") (V4DF "avx")
567 (V8DF "avx512f")])
8dfb9f16 568
cbb734aa 569(define_mode_attr avxsizesuffix
b86f6e9e
AI
570 [(V64QI "512") (V32HI "512") (V16SI "512") (V8DI "512")
571 (V32QI "256") (V16HI "256") (V8SI "256") (V4DI "256")
6bec6c98 572 (V16QI "") (V8HI "") (V4SI "") (V2DI "")
b86f6e9e 573 (V16SF "512") (V8DF "512")
cbb734aa
UB
574 (V8SF "256") (V4DF "256")
575 (V4SF "") (V2DF "")])
6cf9eb27 576
cbb734aa
UB
577;; SSE instruction mode
578(define_mode_attr sseinsnmode
8b994297 579 [(V64QI "XI") (V32HI "XI") (V16SI "XI") (V8DI "XI") (V4TI "XI")
3f97cb0b 580 (V32QI "OI") (V16HI "OI") (V8SI "OI") (V4DI "OI") (V2TI "OI")
cbb734aa 581 (V16QI "TI") (V8HI "TI") (V4SI "TI") (V2DI "TI") (V1TI "TI")
3f97cb0b 582 (V16SF "V16SF") (V8DF "V8DF")
cbb734aa 583 (V8SF "V8SF") (V4DF "V4DF")
977e83a3 584 (V4SF "V4SF") (V2DF "V2DF")
601a5d76 585 (TI "TI")])
cbb734aa 586
ab931c71
AI
587;; Mapping of vector modes to corresponding mask size
588(define_mode_attr avx512fmaskmode
2534573e
AI
589 [(V64QI "DI") (V32QI "SI") (V16QI "HI")
590 (V32HI "SI") (V16HI "HI") (V8HI "QI") (V4HI "QI")
ab931c71
AI
591 (V16SI "HI") (V8SI "QI") (V4SI "QI")
592 (V8DI "QI") (V4DI "QI") (V2DI "QI")
593 (V16SF "HI") (V8SF "QI") (V4SF "QI")
594 (V8DF "QI") (V4DF "QI") (V2DF "QI")])
595
cbb734aa
UB
596;; Mapping of vector float modes to an integer mode of the same size
597(define_mode_attr sseintvecmode
b86f6e9e
AI
598 [(V16SF "V16SI") (V8DF "V8DI")
599 (V8SF "V8SI") (V4DF "V4DI")
600 (V4SF "V4SI") (V2DF "V2DI")
601 (V16SI "V16SI") (V8DI "V8DI")
602 (V8SI "V8SI") (V4DI "V4DI")
603 (V4SI "V4SI") (V2DI "V2DI")
604 (V16HI "V16HI") (V8HI "V8HI")
8b994297 605 (V32HI "V32HI") (V64QI "V64QI")
7b45b87f 606 (V32QI "V32QI") (V16QI "V16QI")])
cbb734aa 607
3bcf35e7
AI
608(define_mode_attr sseintvecmode2
609 [(V8DF "XI") (V4DF "OI") (V2DF "TI")
610 (V8SF "OI") (V4SF "TI")])
611
406d683e 612(define_mode_attr sseintvecmodelower
8b994297 613 [(V16SF "v16si") (V8DF "v8di")
a9ccbba2 614 (V8SF "v8si") (V4DF "v4di")
406d683e
JJ
615 (V4SF "v4si") (V2DF "v2di")
616 (V8SI "v8si") (V4DI "v4di")
617 (V4SI "v4si") (V2DI "v2di")
618 (V16HI "v16hi") (V8HI "v8hi")
619 (V32QI "v32qi") (V16QI "v16qi")])
620
cbb734aa
UB
621;; Mapping of vector modes to a vector mode of double size
622(define_mode_attr ssedoublevecmode
623 [(V32QI "V64QI") (V16HI "V32HI") (V8SI "V16SI") (V4DI "V8DI")
624 (V16QI "V32QI") (V8HI "V16HI") (V4SI "V8SI") (V2DI "V4DI")
625 (V8SF "V16SF") (V4DF "V8DF")
626 (V4SF "V8SF") (V2DF "V4DF")])
627
628;; Mapping of vector modes to a vector mode of half size
629(define_mode_attr ssehalfvecmode
ec5e777c
AI
630 [(V64QI "V32QI") (V32HI "V16HI") (V16SI "V8SI") (V8DI "V4DI")
631 (V32QI "V16QI") (V16HI "V8HI") (V8SI "V4SI") (V4DI "V2DI")
632 (V16QI "V8QI") (V8HI "V4HI") (V4SI "V2SI")
633 (V16SF "V8SF") (V8DF "V4DF")
634 (V8SF "V4SF") (V4DF "V2DF")
635 (V4SF "V2SF")])
cbb734aa 636
e338c25c
UB
637;; Mapping of vector modes ti packed single mode of the same size
638(define_mode_attr ssePSmode
b86f6e9e
AI
639 [(V16SI "V16SF") (V8DF "V16SF")
640 (V16SF "V16SF") (V8DI "V16SF")
641 (V64QI "V16SF") (V32QI "V8SF") (V16QI "V4SF")
642 (V32HI "V16SF") (V16HI "V8SF") (V8HI "V4SF")
e338c25c
UB
643 (V8SI "V8SF") (V4SI "V4SF")
644 (V4DI "V8SF") (V2DI "V4SF")
8b994297 645 (V4TI "V16SF") (V2TI "V8SF") (V1TI "V4SF")
e338c25c
UB
646 (V8SF "V8SF") (V4SF "V4SF")
647 (V4DF "V8SF") (V2DF "V4SF")])
648
8b994297
AI
649(define_mode_attr ssePSmode2
650 [(V8DI "V8SF") (V4DI "V4SF")])
651
cbb734aa
UB
652;; Mapping of vector modes back to the scalar modes
653(define_mode_attr ssescalarmode
a9ccbba2
AI
654 [(V64QI "QI") (V32QI "QI") (V16QI "QI")
655 (V32HI "HI") (V16HI "HI") (V8HI "HI")
656 (V16SI "SI") (V8SI "SI") (V4SI "SI")
657 (V8DI "DI") (V4DI "DI") (V2DI "DI")
658 (V16SF "SF") (V8SF "SF") (V4SF "SF")
659 (V8DF "DF") (V4DF "DF") (V2DF "DF")])
660
661;; Mapping of vector modes to the 128bit modes
662(define_mode_attr ssexmmmode
663 [(V64QI "V16QI") (V32QI "V16QI") (V16QI "V16QI")
664 (V32HI "V8HI") (V16HI "V8HI") (V8HI "V8HI")
665 (V16SI "V4SI") (V8SI "V4SI") (V4SI "V4SI")
666 (V8DI "V2DI") (V4DI "V2DI") (V2DI "V2DI")
667 (V16SF "V4SF") (V8SF "V4SF") (V4SF "V4SF")
668 (V8DF "V2DF") (V4DF "V2DF") (V2DF "V2DF")])
cbb734aa 669
eabb5f48
UB
670;; Pointer size override for scalar modes (Intel asm dialect)
671(define_mode_attr iptr
672 [(V32QI "b") (V16HI "w") (V8SI "k") (V4DI "q")
673 (V16QI "b") (V8HI "w") (V4SI "k") (V2DI "q")
674 (V8SF "k") (V4DF "q")
675 (V4SF "k") (V2DF "q")
676 (SF "k") (DF "q")])
677
cbb734aa
UB
678;; Number of scalar elements in each vector type
679(define_mode_attr ssescalarnum
a9ccbba2
AI
680 [(V64QI "64") (V16SI "16") (V8DI "8")
681 (V32QI "32") (V16HI "16") (V8SI "8") (V4DI "4")
cbb734aa 682 (V16QI "16") (V8HI "8") (V4SI "4") (V2DI "2")
a9ccbba2 683 (V16SF "16") (V8DF "8")
cbb734aa
UB
684 (V8SF "8") (V4DF "4")
685 (V4SF "4") (V2DF "2")])
686
3f5783ea
UB
687;; Mask of scalar elements in each vector type
688(define_mode_attr ssescalarnummask
689 [(V32QI "31") (V16HI "15") (V8SI "7") (V4DI "3")
690 (V16QI "15") (V8HI "7") (V4SI "3") (V2DI "1")
691 (V8SF "7") (V4DF "3")
692 (V4SF "3") (V2DF "1")])
693
47490470
AI
694(define_mode_attr ssescalarsize
695 [(V8DI "64") (V4DI "64") (V2DI "64")
8b994297 696 (V64QI "8") (V32QI "8") (V16QI "8")
47490470
AI
697 (V32HI "16") (V16HI "16") (V8HI "16")
698 (V16SI "32") (V8SI "32") (V4SI "32")
699 (V16SF "32") (V8DF "64")])
700
7b45b87f
UB
701;; SSE prefix for integer vector modes
702(define_mode_attr sseintprefix
ab931c71
AI
703 [(V2DI "p") (V2DF "")
704 (V4DI "p") (V4DF "")
705 (V8DI "p") (V8DF "")
706 (V4SI "p") (V4SF "")
707 (V8SI "p") (V8SF "")
8b994297
AI
708 (V16SI "p") (V16SF "")
709 (V16QI "p") (V8HI "p")
710 (V32QI "p") (V16HI "p")
711 (V64QI "p") (V32HI "p")])
7b45b87f 712
cbb734aa
UB
713;; SSE scalar suffix for vector modes
714(define_mode_attr ssescalarmodesuffix
977e83a3
KY
715 [(SF "ss") (DF "sd")
716 (V8SF "ss") (V4DF "sd")
cbb734aa
UB
717 (V4SF "ss") (V2DF "sd")
718 (V8SI "ss") (V4DI "sd")
719 (V4SI "d")])
720
8dfb9f16
UB
721;; Pack/unpack vector modes
722(define_mode_attr sseunpackmode
977e83a3 723 [(V16QI "V8HI") (V8HI "V4SI") (V4SI "V2DI")
3bdf6340
AI
724 (V32QI "V16HI") (V16HI "V8SI") (V8SI "V4DI")
725 (V32HI "V16SI") (V64QI "V32HI") (V16SI "V8DI")])
8dfb9f16
UB
726
727(define_mode_attr ssepackmode
977e83a3 728 [(V8HI "V16QI") (V4SI "V8HI") (V2DI "V4SI")
e8d08206
AI
729 (V16HI "V32QI") (V8SI "V16HI") (V4DI "V8SI")
730 (V32HI "V64QI") (V16SI "V32HI") (V8DI "V16SI")])
8dfb9f16 731
cbb734aa
UB
732;; Mapping of the max integer size for xop rotate immediate constraint
733(define_mode_attr sserotatemax
734 [(V16QI "7") (V8HI "15") (V4SI "31") (V2DI "63")])
8dfb9f16 735
cd7c6bc5 736;; Mapping of mode to cast intrinsic name
275be1da
IT
737(define_mode_attr castmode
738 [(V8SI "si") (V8SF "ps") (V4DF "pd")
739 (V16SI "si") (V16SF "ps") (V8DF "pd")])
cd7c6bc5 740
ee9dd92e
UB
741;; Instruction suffix for sign and zero extensions.
742(define_code_attr extsuffix [(sign_extend "sx") (zero_extend "zx")])
743
1db4406e 744;; i128 for integer vectors and TARGET_AVX2, f128 otherwise.
ec5e777c 745;; i64x4 or f64x4 for 512bit modes.
1db4406e 746(define_mode_attr i128
ec5e777c
AI
747 [(V16SF "f64x4") (V8SF "f128") (V8DF "f64x4") (V4DF "f128")
748 (V64QI "i64x4") (V32QI "%~128") (V32HI "i64x4") (V16HI "%~128")
749 (V16SI "i64x4") (V8SI "%~128") (V8DI "i64x4") (V4DI "%~128")])
1db4406e 750
ef719a44 751;; Mix-n-match
95879c72 752(define_mode_iterator AVX256MODE2P [V8SI V8SF V4DF])
275be1da 753(define_mode_iterator AVX512MODE2P [V16SI V16SF V8DF])
95879c72 754
8b994297
AI
755;; Mapping for dbpsabbw modes
756(define_mode_attr dbpsadbwmode
757 [(V32HI "V64QI") (V16HI "V32QI") (V8HI "V16QI")])
c96b4102 758
ab931c71
AI
759;; Mapping suffixes for broadcast
760(define_mode_attr bcstscalarsuff
8b994297
AI
761 [(V64QI "b") (V32QI "b") (V16QI "b")
762 (V32HI "w") (V16HI "w") (V8HI "w")
763 (V16SI "d") (V8SI "d") (V4SI "d")
764 (V8DI "q") (V4DI "q") (V2DI "q")
765 (V16SF "ss") (V8SF "ss") (V4SF "ss")
766 (V8DF "sd") (V4DF "sd") (V2DF "sd")])
ab931c71 767
4854de0d
AI
768;; Tie mode of assembler operand to mode iterator
769(define_mode_attr concat_tg_mode
770 [(V32QI "t") (V16HI "t") (V8SI "t") (V4DI "t") (V8SF "t") (V4DF "t")
771 (V64QI "g") (V32HI "g") (V16SI "g") (V8DI "g") (V16SF "g") (V8DF "g")])
772
773
47490470
AI
774;; Include define_subst patterns for instructions with mask
775(include "subst.md")
776
ef719a44
RH
777;; Patterns whose name begins with "sse{,2,3}_" are invoked by intrinsics.
778
779;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
780;;
781;; Move patterns
782;;
783;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
784
e81b8564
UB
785;; All of these patterns are enabled for SSE1 as well as SSE2.
786;; This is essential for maintaining stable calling conventions.
787
95879c72 788(define_expand "mov<mode>"
c7ecdec6
KY
789 [(set (match_operand:VMOVE 0 "nonimmediate_operand")
790 (match_operand:VMOVE 1 "nonimmediate_operand"))]
e81b8564 791 "TARGET_SSE"
95879c72
L
792{
793 ix86_expand_vector_move (<MODE>mode, operands);
794 DONE;
795})
796
e81b8564 797(define_insn "*mov<mode>_internal"
3f97cb0b
AI
798 [(set (match_operand:VMOVE 0 "nonimmediate_operand" "=v,v ,m")
799 (match_operand:VMOVE 1 "nonimmediate_or_sse_const_operand" "C ,vm,v"))]
e81b8564 800 "TARGET_SSE
95879c72
L
801 && (register_operand (operands[0], <MODE>mode)
802 || register_operand (operands[1], <MODE>mode))"
803{
3f97cb0b 804 int mode = get_attr_mode (insn);
95879c72
L
805 switch (which_alternative)
806 {
807 case 0:
808 return standard_sse_constant_opcode (insn, operands[1]);
809 case 1:
810 case 2:
3f97cb0b
AI
811 /* There is no evex-encoded vmov* for sizes smaller than 64-bytes
812 in avx512f, so we need to use workarounds, to access sse registers
e0aacde4 813 16-31, which are evex-only. In avx512vl we don't need workarounds. */
f2864cc4 814 if (TARGET_AVX512F && <MODE_SIZE> < 64 && !TARGET_AVX512VL
e0aacde4
AI
815 && ((REG_P (operands[0]) && EXT_REX_SSE_REGNO_P (REGNO (operands[0])))
816 || (REG_P (operands[1]) && EXT_REX_SSE_REGNO_P (REGNO (operands[1])))))
3f97cb0b
AI
817 {
818 if (memory_operand (operands[0], <MODE>mode))
819 {
039eee3f 820 if (<MODE_SIZE> == 32)
3f97cb0b 821 return "vextract<shuffletype>64x4\t{$0x0, %g1, %0|%0, %g1, 0x0}";
039eee3f 822 else if (<MODE_SIZE> == 16)
3f97cb0b
AI
823 return "vextract<shuffletype>32x4\t{$0x0, %g1, %0|%0, %g1, 0x0}";
824 else
825 gcc_unreachable ();
826 }
827 else if (memory_operand (operands[1], <MODE>mode))
828 {
039eee3f 829 if (<MODE_SIZE> == 32)
3f97cb0b 830 return "vbroadcast<shuffletype>64x4\t{%1, %g0|%g0, %1}";
039eee3f 831 else if (<MODE_SIZE> == 16)
3f97cb0b
AI
832 return "vbroadcast<shuffletype>32x4\t{%1, %g0|%g0, %1}";
833 else
834 gcc_unreachable ();
835 }
836 else
837 /* Reg -> reg move is always aligned. Just use wider move. */
838 switch (mode)
839 {
840 case MODE_V8SF:
841 case MODE_V4SF:
842 return "vmovaps\t{%g1, %g0|%g0, %g1}";
843 case MODE_V4DF:
844 case MODE_V2DF:
845 return "vmovapd\t{%g1, %g0|%g0, %g1}";
846 case MODE_OI:
847 case MODE_TI:
848 return "vmovdqa64\t{%g1, %g0|%g0, %g1}";
849 default:
850 gcc_unreachable ();
851 }
852 }
853 switch (mode)
977e83a3 854 {
3f97cb0b 855 case MODE_V16SF:
95879c72
L
856 case MODE_V8SF:
857 case MODE_V4SF:
e81b8564
UB
858 if (TARGET_AVX
859 && (misaligned_operand (operands[0], <MODE>mode)
860 || misaligned_operand (operands[1], <MODE>mode)))
d253656a
L
861 return "vmovups\t{%1, %0|%0, %1}";
862 else
e81b8564
UB
863 return "%vmovaps\t{%1, %0|%0, %1}";
864
3f97cb0b 865 case MODE_V8DF:
95879c72
L
866 case MODE_V4DF:
867 case MODE_V2DF:
e81b8564
UB
868 if (TARGET_AVX
869 && (misaligned_operand (operands[0], <MODE>mode)
870 || misaligned_operand (operands[1], <MODE>mode)))
d253656a 871 return "vmovupd\t{%1, %0|%0, %1}";
1133125e 872 else
e81b8564
UB
873 return "%vmovapd\t{%1, %0|%0, %1}";
874
875 case MODE_OI:
876 case MODE_TI:
877 if (TARGET_AVX
878 && (misaligned_operand (operands[0], <MODE>mode)
879 || misaligned_operand (operands[1], <MODE>mode)))
e0aacde4
AI
880 return TARGET_AVX512VL ? "vmovdqu64\t{%1, %0|%0, %1}"
881 : "vmovdqu\t{%1, %0|%0, %1}";
1133125e 882 else
e0aacde4
AI
883 return TARGET_AVX512VL ? "vmovdqa64\t{%1, %0|%0, %1}"
884 : "%vmovdqa\t{%1, %0|%0, %1}";
3f97cb0b
AI
885 case MODE_XI:
886 if (misaligned_operand (operands[0], <MODE>mode)
887 || misaligned_operand (operands[1], <MODE>mode))
888 return "vmovdqu64\t{%1, %0|%0, %1}";
889 else
890 return "vmovdqa64\t{%1, %0|%0, %1}";
ef719a44 891
a5e11364 892 default:
e81b8564 893 gcc_unreachable ();
a5e11364 894 }
ef719a44 895 default:
7637e42c 896 gcc_unreachable ();
ef719a44
RH
897 }
898}
899 [(set_attr "type" "sselog1,ssemov,ssemov")
e81b8564 900 (set_attr "prefix" "maybe_vex")
ef719a44 901 (set (attr "mode")
659c0e68
JM
902 (cond [(and (match_test "<MODE_SIZE> == 16")
903 (ior (match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL")
904 (and (eq_attr "alternative" "2")
905 (match_test "TARGET_SSE_TYPELESS_STORES"))))
e338c25c 906 (const_string "<ssePSmode>")
20f9034b 907 (match_test "TARGET_AVX")
977e83a3 908 (const_string "<sseinsnmode>")
e338c25c
UB
909 (ior (not (match_test "TARGET_SSE2"))
910 (match_test "optimize_function_for_size_p (cfun)"))
a5e11364 911 (const_string "V4SF")
63705578
UB
912 (and (eq_attr "alternative" "0")
913 (match_test "TARGET_SSE_LOAD0_BY_PXOR"))
914 (const_string "TI")
a5e11364 915 ]
e338c25c 916 (const_string "<sseinsnmode>")))])
ef719a44 917
e0aacde4 918(define_insn "<avx512>_load<mode>_mask"
7cbdc87d
KY
919 [(set (match_operand:V48_AVX512VL 0 "register_operand" "=v,v")
920 (vec_merge:V48_AVX512VL
921 (match_operand:V48_AVX512VL 1 "nonimmediate_operand" "v,m")
922 (match_operand:V48_AVX512VL 2 "vector_move_operand" "0C,0C")
be792bce 923 (match_operand:<avx512fmaskmode> 3 "register_operand" "Yk,Yk")))]
47490470
AI
924 "TARGET_AVX512F"
925{
7cbdc87d
KY
926 static char buf [64];
927
928 const char *insn_op;
929 const char *sse_suffix;
930 const char *align;
931 if (FLOAT_MODE_P (GET_MODE_INNER (<MODE>mode)))
47490470 932 {
7cbdc87d
KY
933 insn_op = "vmov";
934 sse_suffix = "<ssemodesuffix>";
935 }
936 else
937 {
938 insn_op = "vmovdq";
939 sse_suffix = "<ssescalarsize>";
47490470 940 }
7cbdc87d
KY
941
942 if (misaligned_operand (operands[1], <MODE>mode))
943 align = "u";
944 else
945 align = "a";
946
947 snprintf (buf, sizeof (buf), "%s%s%s\t{%%1, %%0%%{%%3%%}%%N2|%%0%%{%%3%%}%%N2, %%1}",
948 insn_op, align, sse_suffix);
949 return buf;
47490470
AI
950}
951 [(set_attr "type" "ssemov")
952 (set_attr "prefix" "evex")
953 (set_attr "memory" "none,load")
954 (set_attr "mode" "<sseinsnmode>")])
955
7cbdc87d
KY
956(define_insn "<avx512>_load<mode>_mask"
957 [(set (match_operand:VI12_AVX512VL 0 "register_operand" "=v,v")
958 (vec_merge:VI12_AVX512VL
959 (match_operand:VI12_AVX512VL 1 "nonimmediate_operand" "v,m")
960 (match_operand:VI12_AVX512VL 2 "vector_move_operand" "0C,0C")
961 (match_operand:<avx512fmaskmode> 3 "register_operand" "Yk,Yk")))]
962 "TARGET_AVX512BW"
963 "vmovdqu<ssescalarsize>\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}"
964 [(set_attr "type" "ssemov")
965 (set_attr "prefix" "evex")
966 (set_attr "memory" "none,load")
967 (set_attr "mode" "<sseinsnmode>")])
968
51e14b05
AI
969(define_insn "<avx512>_blendm<mode>"
970 [(set (match_operand:V48_AVX512VL 0 "register_operand" "=v")
971 (vec_merge:V48_AVX512VL
972 (match_operand:V48_AVX512VL 2 "nonimmediate_operand" "vm")
973 (match_operand:V48_AVX512VL 1 "register_operand" "v")
be792bce 974 (match_operand:<avx512fmaskmode> 3 "register_operand" "Yk")))]
ab931c71 975 "TARGET_AVX512F"
51e14b05
AI
976 "vblendm<ssemodesuffix>\t{%2, %1, %0%{%3%}|%0%{%3%}, %1, %2}"
977 [(set_attr "type" "ssemov")
978 (set_attr "prefix" "evex")
979 (set_attr "mode" "<sseinsnmode>")])
980
981(define_insn "<avx512>_blendm<mode>"
982 [(set (match_operand:VI12_AVX512VL 0 "register_operand" "=v")
983 (vec_merge:VI12_AVX512VL
984 (match_operand:VI12_AVX512VL 2 "nonimmediate_operand" "vm")
985 (match_operand:VI12_AVX512VL 1 "register_operand" "v")
986 (match_operand:<avx512fmaskmode> 3 "register_operand" "Yk")))]
987 "TARGET_AVX512BW"
988 "vpblendm<ssemodesuffix>\t{%2, %1, %0%{%3%}|%0%{%3%}, %1, %2}"
ab931c71
AI
989 [(set_attr "type" "ssemov")
990 (set_attr "prefix" "evex")
991 (set_attr "mode" "<sseinsnmode>")])
992
e0aacde4 993(define_insn "<avx512>_store<mode>_mask"
7cbdc87d
KY
994 [(set (match_operand:V48_AVX512VL 0 "memory_operand" "=m")
995 (vec_merge:V48_AVX512VL
996 (match_operand:V48_AVX512VL 1 "register_operand" "v")
47490470 997 (match_dup 0)
be792bce 998 (match_operand:<avx512fmaskmode> 2 "register_operand" "Yk")))]
47490470
AI
999 "TARGET_AVX512F"
1000{
7cbdc87d
KY
1001 static char buf [64];
1002
1003 const char *insn_op;
1004 const char *sse_suffix;
1005 const char *align;
1006 if (FLOAT_MODE_P (GET_MODE_INNER (<MODE>mode)))
47490470 1007 {
7cbdc87d
KY
1008 insn_op = "vmov";
1009 sse_suffix = "<ssemodesuffix>";
1010 }
1011 else
1012 {
1013 insn_op = "vmovdq";
1014 sse_suffix = "<ssescalarsize>";
47490470 1015 }
7cbdc87d
KY
1016
1017 if (misaligned_operand (operands[1], <MODE>mode))
1018 align = "u";
1019 else
1020 align = "a";
1021
1022 snprintf (buf, sizeof (buf), "%s%s%s\t{%%1, %%0%%{%%2%%}|%%0%%{%%2%%}, %%1}",
1023 insn_op, align, sse_suffix);
1024 return buf;
47490470
AI
1025}
1026 [(set_attr "type" "ssemov")
1027 (set_attr "prefix" "evex")
1028 (set_attr "memory" "store")
1029 (set_attr "mode" "<sseinsnmode>")])
1030
7cbdc87d
KY
1031(define_insn "<avx512>_store<mode>_mask"
1032 [(set (match_operand:VI12_AVX512VL 0 "memory_operand" "=m")
1033 (vec_merge:VI12_AVX512VL
1034 (match_operand:VI12_AVX512VL 1 "register_operand" "v")
1035 (match_dup 0)
1036 (match_operand:<avx512fmaskmode> 2 "register_operand" "Yk")))]
1037 "TARGET_AVX512BW"
1038 "vmovdqu<ssescalarsize>\t{%1, %0%{%2%}|%0%{%2%}, %1}"
1039 [(set_attr "type" "ssemov")
1040 (set_attr "prefix" "evex")
1041 (set_attr "memory" "store")
1042 (set_attr "mode" "<sseinsnmode>")])
1043
e81b8564
UB
1044(define_insn "sse2_movq128"
1045 [(set (match_operand:V2DI 0 "register_operand" "=x")
1046 (vec_concat:V2DI
1047 (vec_select:DI
1048 (match_operand:V2DI 1 "nonimmediate_operand" "xm")
1049 (parallel [(const_int 0)]))
1050 (const_int 0)))]
1051 "TARGET_SSE2"
eabb5f48 1052 "%vmovq\t{%1, %0|%0, %q1}"
e81b8564
UB
1053 [(set_attr "type" "ssemov")
1054 (set_attr "prefix" "maybe_vex")
1055 (set_attr "mode" "TI")])
1056
ebff937c
SH
1057;; Move a DI from a 32-bit register pair (e.g. %edx:%eax) to an xmm.
1058;; We'd rather avoid this entirely; if the 32-bit reg pair was loaded
1059;; from memory, we'd prefer to load the memory directly into the %xmm
1060;; register. To facilitate this happy circumstance, this pattern won't
1061;; split until after register allocation. If the 64-bit value didn't
1062;; come from memory, this is the best we can do. This is much better
1063;; than storing %edx:%eax into a stack temporary and loading an %xmm
1064;; from there.
1065
1066(define_insn_and_split "movdi_to_sse"
1067 [(parallel
1068 [(set (match_operand:V4SI 0 "register_operand" "=?x,x")
1069 (subreg:V4SI (match_operand:DI 1 "nonimmediate_operand" "r,m") 0))
1070 (clobber (match_scratch:V4SI 2 "=&x,X"))])]
00fcb892 1071 "!TARGET_64BIT && TARGET_SSE2 && TARGET_INTER_UNIT_MOVES_TO_VEC"
ebff937c
SH
1072 "#"
1073 "&& reload_completed"
1074 [(const_int 0)]
1075{
5eafdd32
EC
1076 if (register_operand (operands[1], DImode))
1077 {
ebff937c
SH
1078 /* The DImode arrived in a pair of integral registers (e.g. %edx:%eax).
1079 Assemble the 64-bit DImode value in an xmm register. */
1080 emit_insn (gen_sse2_loadld (operands[0], CONST0_RTX (V4SImode),
977e83a3 1081 gen_rtx_SUBREG (SImode, operands[1], 0)));
ebff937c
SH
1082 emit_insn (gen_sse2_loadld (operands[2], CONST0_RTX (V4SImode),
1083 gen_rtx_SUBREG (SImode, operands[1], 4)));
b0d49a6e 1084 emit_insn (gen_vec_interleave_lowv4si (operands[0], operands[0],
977e83a3 1085 operands[2]));
d8c84975 1086 }
5eafdd32 1087 else if (memory_operand (operands[1], DImode))
d8c84975
JJ
1088 {
1089 rtx tmp = gen_reg_rtx (V2DImode);
1090 emit_insn (gen_vec_concatv2di (tmp, operands[1], const0_rtx));
1091 emit_move_insn (operands[0], gen_lowpart (V4SImode, tmp));
1092 }
5eafdd32 1093 else
b0d49a6e 1094 gcc_unreachable ();
ebff937c
SH
1095})
1096
ef719a44 1097(define_split
82e86dc6
UB
1098 [(set (match_operand:V4SF 0 "register_operand")
1099 (match_operand:V4SF 1 "zero_extended_scalar_load_operand"))]
ef719a44 1100 "TARGET_SSE && reload_completed"
eb701deb
RH
1101 [(set (match_dup 0)
1102 (vec_merge:V4SF
1103 (vec_duplicate:V4SF (match_dup 1))
1104 (match_dup 2)
1105 (const_int 1)))]
ef719a44 1106{
eb701deb
RH
1107 operands[1] = simplify_gen_subreg (SFmode, operands[1], V4SFmode, 0);
1108 operands[2] = CONST0_RTX (V4SFmode);
ef719a44
RH
1109})
1110
ef719a44 1111(define_split
82e86dc6
UB
1112 [(set (match_operand:V2DF 0 "register_operand")
1113 (match_operand:V2DF 1 "zero_extended_scalar_load_operand"))]
ef719a44 1114 "TARGET_SSE2 && reload_completed"
eb701deb 1115 [(set (match_dup 0) (vec_concat:V2DF (match_dup 1) (match_dup 2)))]
ef719a44 1116{
eb701deb
RH
1117 operands[1] = simplify_gen_subreg (DFmode, operands[1], V2DFmode, 0);
1118 operands[2] = CONST0_RTX (DFmode);
ef719a44
RH
1119})
1120
95879c72 1121(define_expand "movmisalign<mode>"
c7ecdec6
KY
1122 [(set (match_operand:VMOVE 0 "nonimmediate_operand")
1123 (match_operand:VMOVE 1 "nonimmediate_operand"))]
ef719a44
RH
1124 "TARGET_SSE"
1125{
1126 ix86_expand_vector_move_misalign (<MODE>mode, operands);
1127 DONE;
1128})
1129
90be6e46
JJ
1130(define_expand "<sse>_loadu<ssemodesuffix><avxsizesuffix><mask_name>"
1131 [(set (match_operand:VF 0 "register_operand")
1132 (unspec:VF [(match_operand:VF 1 "nonimmediate_operand")]
1133 UNSPEC_LOADU))]
1134 "TARGET_SSE && <mask_mode512bit_condition>"
1135{
1136 /* For AVX, normal *mov<mode>_internal pattern will handle unaligned loads
1137 just fine if misaligned_operand is true, and without the UNSPEC it can
1138 be combined with arithmetic instructions. If misaligned_operand is
1139 false, still emit UNSPEC_LOADU insn to honor user's request for
1140 misaligned load. */
1141 if (TARGET_AVX
dad5ed2e 1142 && misaligned_operand (operands[1], <MODE>mode))
90be6e46 1143 {
dad5ed2e
JJ
1144 rtx src = operands[1];
1145 if (<mask_applied>)
1146 src = gen_rtx_VEC_MERGE (<MODE>mode, operands[1],
1147 operands[2 * <mask_applied>],
1148 operands[3 * <mask_applied>]);
f7df4a84 1149 emit_insn (gen_rtx_SET (operands[0], src));
90be6e46
JJ
1150 DONE;
1151 }
1152})
1153
1154(define_insn "*<sse>_loadu<ssemodesuffix><avxsizesuffix><mask_name>"
3f97cb0b 1155 [(set (match_operand:VF 0 "register_operand" "=v")
e81b8564 1156 (unspec:VF
b86f6e9e 1157 [(match_operand:VF 1 "nonimmediate_operand" "vm")]
860f5e77 1158 UNSPEC_LOADU))]
47490470 1159 "TARGET_SSE && <mask_mode512bit_condition>"
20f9034b
UB
1160{
1161 switch (get_attr_mode (insn))
1162 {
b86f6e9e 1163 case MODE_V16SF:
20f9034b
UB
1164 case MODE_V8SF:
1165 case MODE_V4SF:
47490470 1166 return "%vmovups\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}";
20f9034b 1167 default:
47490470 1168 return "%vmovu<ssemodesuffix>\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}";
20f9034b
UB
1169 }
1170}
95879c72 1171 [(set_attr "type" "ssemov")
b6837b94 1172 (set_attr "movu" "1")
f220a4f4 1173 (set_attr "ssememalign" "8")
95879c72 1174 (set_attr "prefix" "maybe_vex")
20f9034b 1175 (set (attr "mode")
659c0e68
JM
1176 (cond [(and (match_test "<MODE_SIZE> == 16")
1177 (match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL"))
e338c25c 1178 (const_string "<ssePSmode>")
20f9034b
UB
1179 (match_test "TARGET_AVX")
1180 (const_string "<MODE>")
e338c25c
UB
1181 (match_test "optimize_function_for_size_p (cfun)")
1182 (const_string "V4SF")
20f9034b 1183 ]
e338c25c 1184 (const_string "<MODE>")))])
ef719a44 1185
97afef00
UB
1186;; Merge movsd/movhpd to movupd for TARGET_SSE_UNALIGNED_LOAD_OPTIMAL targets.
1187(define_peephole2
1188 [(set (match_operand:V2DF 0 "register_operand")
1189 (vec_concat:V2DF (match_operand:DF 1 "memory_operand")
1190 (match_operand:DF 4 "const0_operand")))
1191 (set (match_operand:V2DF 2 "register_operand")
1192 (vec_concat:V2DF (vec_select:DF (match_dup 2)
1193 (parallel [(const_int 0)]))
1194 (match_operand:DF 3 "memory_operand")))]
1195 "TARGET_SSE2 && TARGET_SSE_UNALIGNED_LOAD_OPTIMAL
1196 && ix86_operands_ok_for_move_multiple (operands, true, DFmode)"
1197 [(set (match_dup 2)
1198 (unspec:V2DF [(match_dup 4)] UNSPEC_LOADU))]
1199 "operands[4] = adjust_address (operands[1], V2DFmode, 0);")
1200
860f5e77
UB
1201(define_insn "<sse>_storeu<ssemodesuffix><avxsizesuffix>"
1202 [(set (match_operand:VF 0 "memory_operand" "=m")
1203 (unspec:VF
b86f6e9e 1204 [(match_operand:VF 1 "register_operand" "v")]
860f5e77
UB
1205 UNSPEC_STOREU))]
1206 "TARGET_SSE"
1207{
1208 switch (get_attr_mode (insn))
1209 {
b86f6e9e 1210 case MODE_V16SF:
860f5e77
UB
1211 case MODE_V8SF:
1212 case MODE_V4SF:
1213 return "%vmovups\t{%1, %0|%0, %1}";
1214 default:
1215 return "%vmovu<ssemodesuffix>\t{%1, %0|%0, %1}";
1216 }
1217}
1218 [(set_attr "type" "ssemov")
1219 (set_attr "movu" "1")
f220a4f4 1220 (set_attr "ssememalign" "8")
860f5e77
UB
1221 (set_attr "prefix" "maybe_vex")
1222 (set (attr "mode")
039eee3f 1223 (cond [(and (match_test "<MODE_SIZE> == 16")
d3c2fee0
AI
1224 (ior (match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL")
1225 (match_test "TARGET_SSE_TYPELESS_STORES")))
860f5e77
UB
1226 (const_string "<ssePSmode>")
1227 (match_test "TARGET_AVX")
1228 (const_string "<MODE>")
1229 (match_test "optimize_function_for_size_p (cfun)")
1230 (const_string "V4SF")
1231 ]
1232 (const_string "<MODE>")))])
1233
b040ded3
AI
1234(define_insn "<avx512>_storeu<ssemodesuffix><avxsizesuffix>_mask"
1235 [(set (match_operand:VF_AVX512VL 0 "memory_operand" "=m")
1236 (vec_merge:VF_AVX512VL
1237 (unspec:VF_AVX512VL
1238 [(match_operand:VF_AVX512VL 1 "register_operand" "v")]
47490470
AI
1239 UNSPEC_STOREU)
1240 (match_dup 0)
be792bce 1241 (match_operand:<avx512fmaskmode> 2 "register_operand" "Yk")))]
47490470
AI
1242 "TARGET_AVX512F"
1243{
1244 switch (get_attr_mode (insn))
1245 {
1246 case MODE_V16SF:
b040ded3
AI
1247 case MODE_V8SF:
1248 case MODE_V4SF:
47490470
AI
1249 return "vmovups\t{%1, %0%{%2%}|%0%{%2%}, %1}";
1250 default:
1251 return "vmovu<ssemodesuffix>\t{%1, %0%{%2%}|%0%{%2%}, %1}";
1252 }
1253}
1254 [(set_attr "type" "ssemov")
1255 (set_attr "movu" "1")
1256 (set_attr "memory" "store")
1257 (set_attr "prefix" "evex")
1258 (set_attr "mode" "<sseinsnmode>")])
1259
97afef00
UB
1260;; Merge movlpd/movhpd to movupd for TARGET_SSE_UNALIGNED_STORE_OPTIMAL targets.
1261(define_peephole2
1262 [(set (match_operand:DF 0 "memory_operand")
1263 (vec_select:DF (match_operand:V2DF 1 "register_operand")
1264 (parallel [(const_int 0)])))
1265 (set (match_operand:DF 2 "memory_operand")
1266 (vec_select:DF (match_operand:V2DF 3 "register_operand")
1267 (parallel [(const_int 1)])))]
1268 "TARGET_SSE2 && TARGET_SSE_UNALIGNED_STORE_OPTIMAL
1269 && ix86_operands_ok_for_move_multiple (operands, false, DFmode)"
1270 [(set (match_dup 4)
1271 (unspec:V2DF [(match_dup 1)] UNSPEC_STOREU))]
1272 "operands[4] = adjust_address (operands[0], V2DFmode, 0);")
1273
ca9b264e
AI
1274/* For AVX, normal *mov<mode>_internal pattern will handle unaligned loads
1275 just fine if misaligned_operand is true, and without the UNSPEC it can
1276 be combined with arithmetic instructions. If misaligned_operand is
1277 false, still emit UNSPEC_LOADU insn to honor user's request for
1278 misaligned load. */
90be6e46 1279(define_expand "<sse2_avx_avx512f>_loaddqu<mode><mask_name>"
ca9b264e
AI
1280 [(set (match_operand:VI1 0 "register_operand")
1281 (unspec:VI1
1282 [(match_operand:VI1 1 "nonimmediate_operand")]
90be6e46 1283 UNSPEC_LOADU))]
ca9b264e 1284 "TARGET_SSE2 && <mask_avx512vl_condition> && <mask_avx512bw_condition>"
90be6e46 1285{
90be6e46 1286 if (TARGET_AVX
dad5ed2e 1287 && misaligned_operand (operands[1], <MODE>mode))
90be6e46 1288 {
dad5ed2e
JJ
1289 rtx src = operands[1];
1290 if (<mask_applied>)
1291 src = gen_rtx_VEC_MERGE (<MODE>mode, operands[1],
1292 operands[2 * <mask_applied>],
1293 operands[3 * <mask_applied>]);
f7df4a84 1294 emit_insn (gen_rtx_SET (operands[0], src));
90be6e46
JJ
1295 DONE;
1296 }
1297})
1298
ca9b264e
AI
1299(define_expand "<sse2_avx_avx512f>_loaddqu<mode><mask_name>"
1300 [(set (match_operand:VI_ULOADSTORE_BW_AVX512VL 0 "register_operand")
1301 (unspec:VI_ULOADSTORE_BW_AVX512VL
1302 [(match_operand:VI_ULOADSTORE_BW_AVX512VL 1 "nonimmediate_operand")]
1303 UNSPEC_LOADU))]
1304 "TARGET_AVX512BW"
1305{
1306 if (misaligned_operand (operands[1], <MODE>mode))
1307 {
1308 rtx src = operands[1];
1309 if (<mask_applied>)
1310 src = gen_rtx_VEC_MERGE (<MODE>mode, operands[1],
1311 operands[2 * <mask_applied>],
1312 operands[3 * <mask_applied>]);
f7df4a84 1313 emit_insn (gen_rtx_SET (operands[0], src));
ca9b264e
AI
1314 DONE;
1315 }
1316})
1317
1318(define_expand "<sse2_avx_avx512f>_loaddqu<mode><mask_name>"
1319 [(set (match_operand:VI_ULOADSTORE_F_AVX512VL 0 "register_operand")
1320 (unspec:VI_ULOADSTORE_F_AVX512VL
1321 [(match_operand:VI_ULOADSTORE_F_AVX512VL 1 "nonimmediate_operand")]
1322 UNSPEC_LOADU))]
1323 "TARGET_AVX512F"
1324{
1325 if (misaligned_operand (operands[1], <MODE>mode))
1326 {
1327 rtx src = operands[1];
1328 if (<mask_applied>)
1329 src = gen_rtx_VEC_MERGE (<MODE>mode, operands[1],
1330 operands[2 * <mask_applied>],
1331 operands[3 * <mask_applied>]);
f7df4a84 1332 emit_insn (gen_rtx_SET (operands[0], src));
ca9b264e
AI
1333 DONE;
1334 }
1335})
1336
90be6e46 1337(define_insn "*<sse2_avx_avx512f>_loaddqu<mode><mask_name>"
ca9b264e
AI
1338 [(set (match_operand:VI1 0 "register_operand" "=v")
1339 (unspec:VI1
1340 [(match_operand:VI1 1 "nonimmediate_operand" "vm")]
b86f6e9e 1341 UNSPEC_LOADU))]
ca9b264e 1342 "TARGET_SSE2 && <mask_avx512vl_condition> && <mask_avx512bw_condition>"
860f5e77
UB
1343{
1344 switch (get_attr_mode (insn))
1345 {
1346 case MODE_V8SF:
1347 case MODE_V4SF:
1348 return "%vmovups\t{%1, %0|%0, %1}";
1349 default:
ca9b264e
AI
1350 if (!(TARGET_AVX512VL && TARGET_AVX512BW))
1351 return "%vmovdqu\t{%1, %0|%0, %1}";
1352 else
1353 return "vmovdqu<ssescalarsize>\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}";
860f5e77
UB
1354 }
1355}
1356 [(set_attr "type" "ssemov")
1357 (set_attr "movu" "1")
f220a4f4 1358 (set_attr "ssememalign" "8")
860f5e77
UB
1359 (set (attr "prefix_data16")
1360 (if_then_else
1361 (match_test "TARGET_AVX")
1362 (const_string "*")
1363 (const_string "1")))
1364 (set_attr "prefix" "maybe_vex")
1365 (set (attr "mode")
659c0e68
JM
1366 (cond [(and (match_test "<MODE_SIZE> == 16")
1367 (match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL"))
860f5e77 1368 (const_string "<ssePSmode>")
860f5e77
UB
1369 (match_test "TARGET_AVX")
1370 (const_string "<sseinsnmode>")
1371 (match_test "optimize_function_for_size_p (cfun)")
1372 (const_string "V4SF")
1373 ]
1374 (const_string "<sseinsnmode>")))])
1375
ca9b264e
AI
1376(define_insn "*<sse2_avx_avx512f>_loaddqu<mode><mask_name>"
1377 [(set (match_operand:VI_ULOADSTORE_BW_AVX512VL 0 "register_operand" "=v")
1378 (unspec:VI_ULOADSTORE_BW_AVX512VL
1379 [(match_operand:VI_ULOADSTORE_BW_AVX512VL 1 "nonimmediate_operand" "vm")]
1380 UNSPEC_LOADU))]
1381 "TARGET_AVX512BW"
1382 "vmovdqu<ssescalarsize>\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}";
1383 [(set_attr "type" "ssemov")
1384 (set_attr "movu" "1")
1385 (set_attr "ssememalign" "8")
1386 (set_attr "prefix" "maybe_evex")])
1387
1388(define_insn "*<sse2_avx_avx512f>_loaddqu<mode><mask_name>"
1389 [(set (match_operand:VI_ULOADSTORE_F_AVX512VL 0 "register_operand" "=v")
1390 (unspec:VI_ULOADSTORE_F_AVX512VL
1391 [(match_operand:VI_ULOADSTORE_F_AVX512VL 1 "nonimmediate_operand" "vm")]
1392 UNSPEC_LOADU))]
1393 "TARGET_AVX512F"
1394 "vmovdqu<ssescalarsize>\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}";
1395 [(set_attr "type" "ssemov")
1396 (set_attr "movu" "1")
1397 (set_attr "ssememalign" "8")
1398 (set_attr "prefix" "maybe_evex")])
1399
b86f6e9e 1400(define_insn "<sse2_avx_avx512f>_storedqu<mode>"
ca9b264e
AI
1401 [(set (match_operand:VI1 0 "memory_operand" "=m")
1402 (unspec:VI1
1403 [(match_operand:VI1 1 "register_operand" "v")]
b86f6e9e 1404 UNSPEC_STOREU))]
860f5e77 1405 "TARGET_SSE2"
20f9034b
UB
1406{
1407 switch (get_attr_mode (insn))
1408 {
d3c2fee0 1409 case MODE_V16SF:
20f9034b
UB
1410 case MODE_V8SF:
1411 case MODE_V4SF:
1412 return "%vmovups\t{%1, %0|%0, %1}";
1413 default:
ca9b264e
AI
1414 switch (<MODE>mode)
1415 {
1416 case V32QImode:
1417 case V16QImode:
1418 if (!(TARGET_AVX512VL && TARGET_AVX512BW))
1419 return "%vmovdqu\t{%1, %0|%0, %1}";
1420 default:
1421 return "vmovdqu<ssescalarsize>\t{%1, %0|%0, %1}";
1422 }
20f9034b
UB
1423 }
1424}
95879c72 1425 [(set_attr "type" "ssemov")
b6837b94 1426 (set_attr "movu" "1")
f220a4f4 1427 (set_attr "ssememalign" "8")
e81b8564
UB
1428 (set (attr "prefix_data16")
1429 (if_then_else
67b2c493 1430 (match_test "TARGET_AVX")
e81b8564
UB
1431 (const_string "*")
1432 (const_string "1")))
1433 (set_attr "prefix" "maybe_vex")
20f9034b 1434 (set (attr "mode")
039eee3f 1435 (cond [(and (match_test "<MODE_SIZE> == 16")
d3c2fee0
AI
1436 (ior (match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL")
1437 (match_test "TARGET_SSE_TYPELESS_STORES")))
e338c25c 1438 (const_string "<ssePSmode>")
20f9034b
UB
1439 (match_test "TARGET_AVX")
1440 (const_string "<sseinsnmode>")
e338c25c 1441 (match_test "optimize_function_for_size_p (cfun)")
20f9034b
UB
1442 (const_string "V4SF")
1443 ]
e338c25c 1444 (const_string "<sseinsnmode>")))])
95879c72 1445
ca9b264e
AI
1446(define_insn "<sse2_avx_avx512f>_storedqu<mode>"
1447 [(set (match_operand:VI_ULOADSTORE_BW_AVX512VL 0 "memory_operand" "=m")
1448 (unspec:VI_ULOADSTORE_BW_AVX512VL
1449 [(match_operand:VI_ULOADSTORE_BW_AVX512VL 1 "register_operand" "v")]
1450 UNSPEC_STOREU))]
1451 "TARGET_AVX512BW"
1452 "vmovdqu<ssescalarsize>\t{%1, %0|%0, %1}"
1453 [(set_attr "type" "ssemov")
1454 (set_attr "movu" "1")
1455 (set_attr "ssememalign" "8")
1456 (set_attr "prefix" "maybe_evex")])
1457
1458(define_insn "<sse2_avx_avx512f>_storedqu<mode>"
1459 [(set (match_operand:VI_ULOADSTORE_F_AVX512VL 0 "memory_operand" "=m")
1460 (unspec:VI_ULOADSTORE_F_AVX512VL
1461 [(match_operand:VI_ULOADSTORE_F_AVX512VL 1 "register_operand" "v")]
1462 UNSPEC_STOREU))]
1463 "TARGET_AVX512F"
1464 "vmovdqu<ssescalarsize>\t{%1, %0|%0, %1}"
1465 [(set_attr "type" "ssemov")
1466 (set_attr "movu" "1")
1467 (set_attr "ssememalign" "8")
1468 (set_attr "prefix" "maybe_vex")])
1469
1470(define_insn "<avx512>_storedqu<mode>_mask"
1471 [(set (match_operand:VI48_AVX512VL 0 "memory_operand" "=m")
1472 (vec_merge:VI48_AVX512VL
1473 (unspec:VI48_AVX512VL
1474 [(match_operand:VI48_AVX512VL 1 "register_operand" "v")]
47490470
AI
1475 UNSPEC_STOREU)
1476 (match_dup 0)
be792bce 1477 (match_operand:<avx512fmaskmode> 2 "register_operand" "Yk")))]
47490470 1478 "TARGET_AVX512F"
ca9b264e
AI
1479 "vmovdqu<ssescalarsize>\t{%1, %0%{%2%}|%0%{%2%}, %1}"
1480 [(set_attr "type" "ssemov")
1481 (set_attr "movu" "1")
1482 (set_attr "memory" "store")
1483 (set_attr "prefix" "evex")
1484 (set_attr "mode" "<sseinsnmode>")])
1485
1486(define_insn "<avx512>_storedqu<mode>_mask"
1487 [(set (match_operand:VI12_AVX512VL 0 "memory_operand" "=m")
1488 (vec_merge:VI12_AVX512VL
1489 (unspec:VI12_AVX512VL
1490 [(match_operand:VI12_AVX512VL 1 "register_operand" "v")]
1491 UNSPEC_STOREU)
1492 (match_dup 0)
1493 (match_operand:<avx512fmaskmode> 2 "register_operand" "Yk")))]
1494 "TARGET_AVX512BW"
1495 "vmovdqu<ssescalarsize>\t{%1, %0%{%2%}|%0%{%2%}, %1}"
47490470
AI
1496 [(set_attr "type" "ssemov")
1497 (set_attr "movu" "1")
1498 (set_attr "memory" "store")
1499 (set_attr "prefix" "evex")
1500 (set_attr "mode" "<sseinsnmode>")])
1501
cbb734aa 1502(define_insn "<sse3>_lddqu<avxsizesuffix>"
e81b8564
UB
1503 [(set (match_operand:VI1 0 "register_operand" "=x")
1504 (unspec:VI1 [(match_operand:VI1 1 "memory_operand" "m")]
1505 UNSPEC_LDDQU))]
1506 "TARGET_SSE3"
1507 "%vlddqu\t{%1, %0|%0, %1}"
ef719a44 1508 [(set_attr "type" "ssemov")
b6837b94 1509 (set_attr "movu" "1")
f220a4f4 1510 (set_attr "ssememalign" "8")
e81b8564
UB
1511 (set (attr "prefix_data16")
1512 (if_then_else
67b2c493 1513 (match_test "TARGET_AVX")
e81b8564
UB
1514 (const_string "*")
1515 (const_string "0")))
1516 (set (attr "prefix_rep")
1517 (if_then_else
67b2c493 1518 (match_test "TARGET_AVX")
e81b8564
UB
1519 (const_string "*")
1520 (const_string "1")))
1521 (set_attr "prefix" "maybe_vex")
cbb734aa 1522 (set_attr "mode" "<sseinsnmode>")])
65b82caa 1523
f32c951e
L
1524(define_insn "sse2_movnti<mode>"
1525 [(set (match_operand:SWI48 0 "memory_operand" "=m")
1526 (unspec:SWI48 [(match_operand:SWI48 1 "register_operand" "r")]
aa198500 1527 UNSPEC_MOVNT))]
ef719a44
RH
1528 "TARGET_SSE2"
1529 "movnti\t{%1, %0|%0, %1}"
b6837b94 1530 [(set_attr "type" "ssemov")
725fd454 1531 (set_attr "prefix_data16" "0")
f32c951e 1532 (set_attr "mode" "<MODE>")])
ef719a44 1533
e81b8564
UB
1534(define_insn "<sse>_movnt<mode>"
1535 [(set (match_operand:VF 0 "memory_operand" "=m")
b86f6e9e
AI
1536 (unspec:VF
1537 [(match_operand:VF 1 "register_operand" "v")]
1538 UNSPEC_MOVNT))]
e81b8564
UB
1539 "TARGET_SSE"
1540 "%vmovnt<ssemodesuffix>\t{%1, %0|%0, %1}"
1541 [(set_attr "type" "ssemov")
1542 (set_attr "prefix" "maybe_vex")
1543 (set_attr "mode" "<MODE>")])
1544
1545(define_insn "<sse2>_movnt<mode>"
1546 [(set (match_operand:VI8 0 "memory_operand" "=m")
2b1ebb0c 1547 (unspec:VI8 [(match_operand:VI8 1 "register_operand" "v")]
e81b8564
UB
1548 UNSPEC_MOVNT))]
1549 "TARGET_SSE2"
1550 "%vmovntdq\t{%1, %0|%0, %1}"
95879c72 1551 [(set_attr "type" "ssecvt")
e81b8564
UB
1552 (set (attr "prefix_data16")
1553 (if_then_else
67b2c493 1554 (match_test "TARGET_AVX")
e81b8564
UB
1555 (const_string "*")
1556 (const_string "1")))
1557 (set_attr "prefix" "maybe_vex")
cbb734aa 1558 (set_attr "mode" "<sseinsnmode>")])
95879c72 1559
79f5e442
ZD
1560; Expand patterns for non-temporal stores. At the moment, only those
1561; that directly map to insns are defined; it would be possible to
1562; define patterns for other modes that would expand to several insns.
1563
6bec6c98
UB
1564;; Modes handled by storent patterns.
1565(define_mode_iterator STORENT_MODE
aa198500
UB
1566 [(DI "TARGET_SSE2 && TARGET_64BIT") (SI "TARGET_SSE2")
1567 (SF "TARGET_SSE4A") (DF "TARGET_SSE4A")
b86f6e9e
AI
1568 (V8DI "TARGET_AVX512F") (V4DI "TARGET_AVX") (V2DI "TARGET_SSE2")
1569 (V16SF "TARGET_AVX512F") (V8SF "TARGET_AVX") V4SF
1570 (V8DF "TARGET_AVX512F") (V4DF "TARGET_AVX") (V2DF "TARGET_SSE2")])
6bec6c98 1571
d6023b50 1572(define_expand "storent<mode>"
82e86dc6 1573 [(set (match_operand:STORENT_MODE 0 "memory_operand")
1e27129f 1574 (unspec:STORENT_MODE
82e86dc6 1575 [(match_operand:STORENT_MODE 1 "register_operand")]
6bec6c98
UB
1576 UNSPEC_MOVNT))]
1577 "TARGET_SSE")
79f5e442 1578
ef719a44
RH
1579;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1580;;
d6023b50 1581;; Parallel floating point arithmetic
ef719a44
RH
1582;;
1583;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1584
6dd18eb1 1585(define_expand "<code><mode>2"
82e86dc6 1586 [(set (match_operand:VF 0 "register_operand")
07c0852e 1587 (absneg:VF
82e86dc6 1588 (match_operand:VF 1 "register_operand")))]
6bec6c98 1589 "TARGET_SSE"
6dd18eb1 1590 "ix86_expand_fp_absneg_operator (<CODE>, <MODE>mode, operands); DONE;")
ef719a44 1591
07c0852e 1592(define_insn_and_split "*absneg<mode>2"
b86f6e9e 1593 [(set (match_operand:VF 0 "register_operand" "=x,x,v,v")
07c0852e 1594 (match_operator:VF 3 "absneg_operator"
b86f6e9e
AI
1595 [(match_operand:VF 1 "nonimmediate_operand" "0, xm, v, m")]))
1596 (use (match_operand:VF 2 "nonimmediate_operand" "xm, 0, vm,v"))]
6bec6c98 1597 "TARGET_SSE"
7a12785c 1598 "#"
3b0eee5d 1599 "&& reload_completed"
7a12785c
RH
1600 [(const_int 0)]
1601{
07c0852e
UB
1602 enum rtx_code absneg_op;
1603 rtx op1, op2;
487a9a3e
RH
1604 rtx t;
1605
07c0852e
UB
1606 if (TARGET_AVX)
1607 {
1608 if (MEM_P (operands[1]))
1609 op1 = operands[2], op2 = operands[1];
1610 else
1611 op1 = operands[1], op2 = operands[2];
1612 }
487a9a3e 1613 else
07c0852e
UB
1614 {
1615 op1 = operands[0];
1616 if (rtx_equal_p (operands[0], operands[1]))
1617 op2 = operands[2];
1618 else
1619 op2 = operands[1];
1620 }
487a9a3e 1621
07c0852e
UB
1622 absneg_op = GET_CODE (operands[3]) == NEG ? XOR : AND;
1623 t = gen_rtx_fmt_ee (absneg_op, <MODE>mode, op1, op2);
f7df4a84 1624 t = gen_rtx_SET (operands[0], t);
487a9a3e 1625 emit_insn (t);
7a12785c 1626 DONE;
07c0852e
UB
1627}
1628 [(set_attr "isa" "noavx,noavx,avx,avx")])
95879c72 1629
06bc9e41 1630(define_expand "<plusminus_insn><mode>3<mask_name><round_name>"
82e86dc6 1631 [(set (match_operand:VF 0 "register_operand")
07c0852e 1632 (plusminus:VF
06bc9e41
AI
1633 (match_operand:VF 1 "<round_nimm_predicate>")
1634 (match_operand:VF 2 "<round_nimm_predicate>")))]
1635 "TARGET_SSE && <mask_mode512bit_condition> && <round_mode512bit_condition>"
78e8956b 1636 "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
ef719a44 1637
06bc9e41 1638(define_insn "*<plusminus_insn><mode>3<mask_name><round_name>"
3f97cb0b 1639 [(set (match_operand:VF 0 "register_operand" "=x,v")
07c0852e 1640 (plusminus:VF
06bc9e41
AI
1641 (match_operand:VF 1 "<round_nimm_predicate>" "<comm>0,v")
1642 (match_operand:VF 2 "<round_nimm_predicate>" "xm,<round_constraint>")))]
1643 "TARGET_SSE && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands) && <mask_mode512bit_condition> && <round_mode512bit_condition>"
07c0852e
UB
1644 "@
1645 <plusminus_mnemonic><ssemodesuffix>\t{%2, %0|%0, %2}
06bc9e41 1646 v<plusminus_mnemonic><ssemodesuffix>\t{<round_mask_op3>%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2<round_mask_op3>}"
07c0852e
UB
1647 [(set_attr "isa" "noavx,avx")
1648 (set_attr "type" "sseadd")
47490470 1649 (set_attr "prefix" "<mask_prefix3>")
d6023b50 1650 (set_attr "mode" "<MODE>")])
ef719a44 1651
075691af 1652(define_insn "<sse>_vm<plusminus_insn><mode>3<round_name>"
3f97cb0b 1653 [(set (match_operand:VF_128 0 "register_operand" "=x,v")
07c0852e
UB
1654 (vec_merge:VF_128
1655 (plusminus:VF_128
3f97cb0b 1656 (match_operand:VF_128 1 "register_operand" "0,v")
075691af 1657 (match_operand:VF_128 2 "nonimmediate_operand" "xm,<round_constraint>"))
ef719a44
RH
1658 (match_dup 1)
1659 (const_int 1)))]
6bec6c98 1660 "TARGET_SSE"
07c0852e 1661 "@
eabb5f48 1662 <plusminus_mnemonic><ssescalarmodesuffix>\t{%2, %0|%0, %<iptr>2}
075691af 1663 v<plusminus_mnemonic><ssescalarmodesuffix>\t{<round_op3>%2, %1, %0|%0, %1, %<iptr>2<round_op3>}"
07c0852e
UB
1664 [(set_attr "isa" "noavx,avx")
1665 (set_attr "type" "sseadd")
075691af 1666 (set_attr "prefix" "<round_prefix>")
d6023b50 1667 (set_attr "mode" "<ssescalarmode>")])
ef719a44 1668
06bc9e41 1669(define_expand "mul<mode>3<mask_name><round_name>"
82e86dc6 1670 [(set (match_operand:VF 0 "register_operand")
07c0852e 1671 (mult:VF
06bc9e41
AI
1672 (match_operand:VF 1 "<round_nimm_predicate>")
1673 (match_operand:VF 2 "<round_nimm_predicate>")))]
1674 "TARGET_SSE && <mask_mode512bit_condition> && <round_mode512bit_condition>"
d6023b50
UB
1675 "ix86_fixup_binary_operands_no_copy (MULT, <MODE>mode, operands);")
1676
06bc9e41 1677(define_insn "*mul<mode>3<mask_name><round_name>"
b86f6e9e 1678 [(set (match_operand:VF 0 "register_operand" "=x,v")
07c0852e 1679 (mult:VF
06bc9e41
AI
1680 (match_operand:VF 1 "<round_nimm_predicate>" "%0,v")
1681 (match_operand:VF 2 "<round_nimm_predicate>" "xm,<round_constraint>")))]
1682 "TARGET_SSE && ix86_binary_operator_ok (MULT, <MODE>mode, operands) && <mask_mode512bit_condition> && <round_mode512bit_condition>"
07c0852e
UB
1683 "@
1684 mul<ssemodesuffix>\t{%2, %0|%0, %2}
06bc9e41 1685 vmul<ssemodesuffix>\t{<round_mask_op3>%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2<round_mask_op3>}"
07c0852e
UB
1686 [(set_attr "isa" "noavx,avx")
1687 (set_attr "type" "ssemul")
47490470 1688 (set_attr "prefix" "<mask_prefix3>")
01284895 1689 (set_attr "btver2_decode" "direct,double")
d6023b50 1690 (set_attr "mode" "<MODE>")])
ef719a44 1691
075691af 1692(define_insn "<sse>_vm<multdiv_mnemonic><mode>3<round_name>"
3f97cb0b 1693 [(set (match_operand:VF_128 0 "register_operand" "=x,v")
07c0852e 1694 (vec_merge:VF_128
67f783cb 1695 (multdiv:VF_128
3f97cb0b 1696 (match_operand:VF_128 1 "register_operand" "0,v")
075691af 1697 (match_operand:VF_128 2 "nonimmediate_operand" "xm,<round_constraint>"))
ef719a44
RH
1698 (match_dup 1)
1699 (const_int 1)))]
6bec6c98 1700 "TARGET_SSE"
07c0852e 1701 "@
67f783cb 1702 <multdiv_mnemonic><ssescalarmodesuffix>\t{%2, %0|%0, %<iptr>2}
075691af 1703 v<multdiv_mnemonic><ssescalarmodesuffix>\t{<round_op3>%2, %1, %0|%0, %1, %<iptr>2<round_op3>}"
07c0852e 1704 [(set_attr "isa" "noavx,avx")
67f783cb 1705 (set_attr "type" "sse<multdiv_mnemonic>")
075691af 1706 (set_attr "prefix" "<round_prefix>")
67f783cb 1707 (set_attr "btver2_decode" "direct,double")
d6023b50 1708 (set_attr "mode" "<ssescalarmode>")])
ef719a44 1709
07c0852e 1710(define_expand "div<mode>3"
82e86dc6
UB
1711 [(set (match_operand:VF2 0 "register_operand")
1712 (div:VF2 (match_operand:VF2 1 "register_operand")
1713 (match_operand:VF2 2 "nonimmediate_operand")))]
6bec6c98 1714 "TARGET_SSE2"
07c0852e
UB
1715 "ix86_fixup_binary_operands_no_copy (DIV, <MODE>mode, operands);")
1716
1717(define_expand "div<mode>3"
82e86dc6
UB
1718 [(set (match_operand:VF1 0 "register_operand")
1719 (div:VF1 (match_operand:VF1 1 "register_operand")
1720 (match_operand:VF1 2 "nonimmediate_operand")))]
6bec6c98 1721 "TARGET_SSE"
95879c72 1722{
07c0852e 1723 ix86_fixup_binary_operands_no_copy (DIV, <MODE>mode, operands);
95879c72 1724
b86b9f44
MM
1725 if (TARGET_SSE_MATH
1726 && TARGET_RECIP_VEC_DIV
1727 && !optimize_insn_for_size_p ()
95879c72
L
1728 && flag_finite_math_only && !flag_trapping_math
1729 && flag_unsafe_math_optimizations)
1730 {
07c0852e 1731 ix86_emit_swdivsf (operands[0], operands[1], operands[2], <MODE>mode);
6b889d89
UB
1732 DONE;
1733 }
1734})
ef719a44 1735
06bc9e41 1736(define_insn "<sse>_div<mode>3<mask_name><round_name>"
3f97cb0b 1737 [(set (match_operand:VF 0 "register_operand" "=x,v")
07c0852e 1738 (div:VF
3f97cb0b 1739 (match_operand:VF 1 "register_operand" "0,v")
06bc9e41
AI
1740 (match_operand:VF 2 "<round_nimm_predicate>" "xm,<round_constraint>")))]
1741 "TARGET_SSE && <mask_mode512bit_condition> && <round_mode512bit_condition>"
07c0852e
UB
1742 "@
1743 div<ssemodesuffix>\t{%2, %0|%0, %2}
06bc9e41 1744 vdiv<ssemodesuffix>\t{<round_mask_op3>%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2<round_mask_op3>}"
07c0852e
UB
1745 [(set_attr "isa" "noavx,avx")
1746 (set_attr "type" "ssediv")
47490470 1747 (set_attr "prefix" "<mask_prefix3>")
d6023b50 1748 (set_attr "mode" "<MODE>")])
ef719a44 1749
07c0852e 1750(define_insn "<sse>_rcp<mode>2"
a9ccbba2
AI
1751 [(set (match_operand:VF1_128_256 0 "register_operand" "=x")
1752 (unspec:VF1_128_256
1753 [(match_operand:VF1_128_256 1 "nonimmediate_operand" "xm")] UNSPEC_RCP))]
6bec6c98 1754 "TARGET_SSE"
95879c72 1755 "%vrcpps\t{%1, %0|%0, %1}"
ef719a44 1756 [(set_attr "type" "sse")
b6837b94 1757 (set_attr "atom_sse_attr" "rcp")
01284895 1758 (set_attr "btver2_sse_attr" "rcp")
95879c72 1759 (set_attr "prefix" "maybe_vex")
07c0852e 1760 (set_attr "mode" "<MODE>")])
95879c72 1761
ef719a44 1762(define_insn "sse_vmrcpv4sf2"
07c0852e 1763 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
ef719a44 1764 (vec_merge:V4SF
07c0852e 1765 (unspec:V4SF [(match_operand:V4SF 1 "nonimmediate_operand" "xm,xm")]
ef719a44 1766 UNSPEC_RCP)
07c0852e 1767 (match_operand:V4SF 2 "register_operand" "0,x")
ef719a44
RH
1768 (const_int 1)))]
1769 "TARGET_SSE"
07c0852e 1770 "@
eabb5f48
UB
1771 rcpss\t{%1, %0|%0, %k1}
1772 vrcpss\t{%1, %2, %0|%0, %2, %k1}"
07c0852e
UB
1773 [(set_attr "isa" "noavx,avx")
1774 (set_attr "type" "sse")
f220a4f4 1775 (set_attr "ssememalign" "32")
b6837b94 1776 (set_attr "atom_sse_attr" "rcp")
01284895 1777 (set_attr "btver2_sse_attr" "rcp")
07c0852e 1778 (set_attr "prefix" "orig,vex")
ef719a44
RH
1779 (set_attr "mode" "SF")])
1780
47490470 1781(define_insn "<mask_codefor>rcp14<mode><mask_name>"
b040ded3
AI
1782 [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v")
1783 (unspec:VF_AVX512VL
1784 [(match_operand:VF_AVX512VL 1 "nonimmediate_operand" "vm")]
afb4ac68
AI
1785 UNSPEC_RCP14))]
1786 "TARGET_AVX512F"
47490470 1787 "vrcp14<ssemodesuffix>\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
afb4ac68
AI
1788 [(set_attr "type" "sse")
1789 (set_attr "prefix" "evex")
1790 (set_attr "mode" "<MODE>")])
1791
075691af 1792(define_insn "srcp14<mode>"
afb4ac68
AI
1793 [(set (match_operand:VF_128 0 "register_operand" "=v")
1794 (vec_merge:VF_128
1795 (unspec:VF_128
c56a42b9 1796 [(match_operand:VF_128 1 "nonimmediate_operand" "vm")]
afb4ac68 1797 UNSPEC_RCP14)
c56a42b9 1798 (match_operand:VF_128 2 "register_operand" "v")
afb4ac68
AI
1799 (const_int 1)))]
1800 "TARGET_AVX512F"
c56a42b9 1801 "vrcp14<ssescalarmodesuffix>\t{%1, %2, %0|%0, %2, %1}"
afb4ac68
AI
1802 [(set_attr "type" "sse")
1803 (set_attr "prefix" "evex")
1804 (set_attr "mode" "<MODE>")])
1805
07c0852e 1806(define_expand "sqrt<mode>2"
82e86dc6
UB
1807 [(set (match_operand:VF2 0 "register_operand")
1808 (sqrt:VF2 (match_operand:VF2 1 "nonimmediate_operand")))]
6bec6c98 1809 "TARGET_SSE2")
95879c72 1810
07c0852e 1811(define_expand "sqrt<mode>2"
82e86dc6
UB
1812 [(set (match_operand:VF1 0 "register_operand")
1813 (sqrt:VF1 (match_operand:VF1 1 "nonimmediate_operand")))]
6bec6c98 1814 "TARGET_SSE"
d6023b50 1815{
b86b9f44
MM
1816 if (TARGET_SSE_MATH
1817 && TARGET_RECIP_VEC_SQRT
1818 && !optimize_insn_for_size_p ()
d6023b50
UB
1819 && flag_finite_math_only && !flag_trapping_math
1820 && flag_unsafe_math_optimizations)
1821 {
07c0852e 1822 ix86_emit_swsqrtsf (operands[0], operands[1], <MODE>mode, false);
d6023b50
UB
1823 DONE;
1824 }
1825})
1826
06bc9e41 1827(define_insn "<sse>_sqrt<mode>2<mask_name><round_name>"
3f97cb0b 1828 [(set (match_operand:VF 0 "register_operand" "=v")
06bc9e41
AI
1829 (sqrt:VF (match_operand:VF 1 "<round_nimm_predicate>" "<round_constraint>")))]
1830 "TARGET_SSE && <mask_mode512bit_condition> && <round_mode512bit_condition>"
1831 "%vsqrt<ssemodesuffix>\t{<round_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_mask_op2>}"
d6023b50 1832 [(set_attr "type" "sse")
b6837b94 1833 (set_attr "atom_sse_attr" "sqrt")
01284895 1834 (set_attr "btver2_sse_attr" "sqrt")
95879c72 1835 (set_attr "prefix" "maybe_vex")
07c0852e 1836 (set_attr "mode" "<MODE>")])
95879c72 1837
075691af 1838(define_insn "<sse>_vmsqrt<mode>2<round_name>"
3f97cb0b 1839 [(set (match_operand:VF_128 0 "register_operand" "=x,v")
07c0852e
UB
1840 (vec_merge:VF_128
1841 (sqrt:VF_128
075691af 1842 (match_operand:VF_128 1 "nonimmediate_operand" "xm,<round_constraint>"))
3f97cb0b 1843 (match_operand:VF_128 2 "register_operand" "0,v")
d6023b50 1844 (const_int 1)))]
6bec6c98 1845 "TARGET_SSE"
07c0852e 1846 "@
eabb5f48 1847 sqrt<ssescalarmodesuffix>\t{%1, %0|%0, %<iptr>1}
075691af 1848 vsqrt<ssescalarmodesuffix>\t{<round_op3>%1, %2, %0|%0, %2, %<iptr>1<round_op3>}"
07c0852e
UB
1849 [(set_attr "isa" "noavx,avx")
1850 (set_attr "type" "sse")
b6837b94 1851 (set_attr "atom_sse_attr" "sqrt")
075691af 1852 (set_attr "prefix" "<round_prefix>")
47490470 1853 (set_attr "btver2_sse_attr" "sqrt")
d6023b50
UB
1854 (set_attr "mode" "<ssescalarmode>")])
1855
07c0852e 1856(define_expand "rsqrt<mode>2"
a9ccbba2
AI
1857 [(set (match_operand:VF1_128_256 0 "register_operand")
1858 (unspec:VF1_128_256
1859 [(match_operand:VF1_128_256 1 "nonimmediate_operand")] UNSPEC_RSQRT))]
f1bf33ce 1860 "TARGET_SSE_MATH"
6b889d89 1861{
07c0852e 1862 ix86_emit_swsqrtsf (operands[0], operands[1], <MODE>mode, true);
f1bf33ce 1863 DONE;
6b889d89
UB
1864})
1865
07c0852e 1866(define_insn "<sse>_rsqrt<mode>2"
a9ccbba2
AI
1867 [(set (match_operand:VF1_128_256 0 "register_operand" "=x")
1868 (unspec:VF1_128_256
1869 [(match_operand:VF1_128_256 1 "nonimmediate_operand" "xm")] UNSPEC_RSQRT))]
6bec6c98 1870 "TARGET_SSE"
95879c72 1871 "%vrsqrtps\t{%1, %0|%0, %1}"
3dc0f23a 1872 [(set_attr "type" "sse")
95879c72 1873 (set_attr "prefix" "maybe_vex")
07c0852e 1874 (set_attr "mode" "<MODE>")])
95879c72 1875
47490470 1876(define_insn "<mask_codefor>rsqrt14<mode><mask_name>"
b040ded3
AI
1877 [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v")
1878 (unspec:VF_AVX512VL
1879 [(match_operand:VF_AVX512VL 1 "nonimmediate_operand" "vm")]
afb4ac68
AI
1880 UNSPEC_RSQRT14))]
1881 "TARGET_AVX512F"
47490470 1882 "vrsqrt14<ssemodesuffix>\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
afb4ac68
AI
1883 [(set_attr "type" "sse")
1884 (set_attr "prefix" "evex")
1885 (set_attr "mode" "<MODE>")])
1886
075691af 1887(define_insn "rsqrt14<mode>"
afb4ac68
AI
1888 [(set (match_operand:VF_128 0 "register_operand" "=v")
1889 (vec_merge:VF_128
1890 (unspec:VF_128
df62b4af 1891 [(match_operand:VF_128 1 "nonimmediate_operand" "vm")]
afb4ac68 1892 UNSPEC_RSQRT14)
df62b4af 1893 (match_operand:VF_128 2 "register_operand" "v")
afb4ac68
AI
1894 (const_int 1)))]
1895 "TARGET_AVX512F"
df62b4af 1896 "vrsqrt14<ssescalarmodesuffix>\t{%1, %2, %0|%0, %2, %1}"
afb4ac68
AI
1897 [(set_attr "type" "sse")
1898 (set_attr "prefix" "evex")
1899 (set_attr "mode" "<MODE>")])
1900
ef719a44 1901(define_insn "sse_vmrsqrtv4sf2"
07c0852e 1902 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
ef719a44 1903 (vec_merge:V4SF
07c0852e 1904 (unspec:V4SF [(match_operand:V4SF 1 "nonimmediate_operand" "xm,xm")]
ef719a44 1905 UNSPEC_RSQRT)
07c0852e 1906 (match_operand:V4SF 2 "register_operand" "0,x")
ef719a44
RH
1907 (const_int 1)))]
1908 "TARGET_SSE"
07c0852e 1909 "@
eabb5f48
UB
1910 rsqrtss\t{%1, %0|%0, %k1}
1911 vrsqrtss\t{%1, %2, %0|%0, %2, %k1}"
07c0852e
UB
1912 [(set_attr "isa" "noavx,avx")
1913 (set_attr "type" "sse")
f220a4f4 1914 (set_attr "ssememalign" "32")
07c0852e 1915 (set_attr "prefix" "orig,vex")
ef719a44
RH
1916 (set_attr "mode" "SF")])
1917
115a33c2 1918;; ??? For !flag_finite_math_only, the representation with SMIN/SMAX
4f3f76e6 1919;; isn't really correct, as those rtl operators aren't defined when
115a33c2
RH
1920;; applied to NaNs. Hopefully the optimizers won't get too smart on us.
1921
8a6ef760 1922(define_expand "<code><mode>3<mask_name><round_saeonly_name>"
82e86dc6 1923 [(set (match_operand:VF 0 "register_operand")
07c0852e 1924 (smaxmin:VF
8a6ef760
AI
1925 (match_operand:VF 1 "<round_saeonly_nimm_predicate>")
1926 (match_operand:VF 2 "<round_saeonly_nimm_predicate>")))]
1927 "TARGET_SSE && <mask_mode512bit_condition> && <round_saeonly_mode512bit_condition>"
115a33c2
RH
1928{
1929 if (!flag_finite_math_only)
d6023b50 1930 operands[1] = force_reg (<MODE>mode, operands[1]);
78e8956b 1931 ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);
115a33c2 1932})
ef719a44 1933
8a6ef760 1934(define_insn "*<code><mode>3_finite<mask_name><round_saeonly_name>"
3f97cb0b 1935 [(set (match_operand:VF 0 "register_operand" "=x,v")
07c0852e 1936 (smaxmin:VF
8a6ef760
AI
1937 (match_operand:VF 1 "<round_saeonly_nimm_predicate>" "%0,v")
1938 (match_operand:VF 2 "<round_saeonly_nimm_predicate>" "xm,<round_saeonly_constraint>")))]
6bec6c98 1939 "TARGET_SSE && flag_finite_math_only
47490470 1940 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)
8a6ef760 1941 && <mask_mode512bit_condition> && <round_saeonly_mode512bit_condition>"
07c0852e
UB
1942 "@
1943 <maxmin_float><ssemodesuffix>\t{%2, %0|%0, %2}
8a6ef760 1944 v<maxmin_float><ssemodesuffix>\t{<round_saeonly_mask_op3>%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2<round_saeonly_mask_op3>}"
07c0852e
UB
1945 [(set_attr "isa" "noavx,avx")
1946 (set_attr "type" "sseadd")
01284895 1947 (set_attr "btver2_sse_attr" "maxmin")
47490470 1948 (set_attr "prefix" "<mask_prefix3>")
d6023b50 1949 (set_attr "mode" "<MODE>")])
ef719a44 1950
8a6ef760 1951(define_insn "*<code><mode>3<mask_name><round_saeonly_name>"
3f97cb0b 1952 [(set (match_operand:VF 0 "register_operand" "=x,v")
07c0852e 1953 (smaxmin:VF
3f97cb0b 1954 (match_operand:VF 1 "register_operand" "0,v")
8a6ef760 1955 (match_operand:VF 2 "<round_saeonly_nimm_predicate>" "xm,<round_saeonly_constraint>")))]
47490470 1956 "TARGET_SSE && !flag_finite_math_only
8a6ef760 1957 && <mask_mode512bit_condition> && <round_saeonly_mode512bit_condition>"
07c0852e
UB
1958 "@
1959 <maxmin_float><ssemodesuffix>\t{%2, %0|%0, %2}
8a6ef760 1960 v<maxmin_float><ssemodesuffix>\t{<round_saeonly_mask_op3>%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2<round_saeonly_mask_op3>}"
07c0852e
UB
1961 [(set_attr "isa" "noavx,avx")
1962 (set_attr "type" "sseadd")
01284895 1963 (set_attr "btver2_sse_attr" "maxmin")
47490470 1964 (set_attr "prefix" "<mask_prefix3>")
d6023b50 1965 (set_attr "mode" "<MODE>")])
115a33c2 1966
075691af 1967(define_insn "<sse>_vm<code><mode>3<round_saeonly_name>"
3f97cb0b 1968 [(set (match_operand:VF_128 0 "register_operand" "=x,v")
07c0852e
UB
1969 (vec_merge:VF_128
1970 (smaxmin:VF_128
3f97cb0b 1971 (match_operand:VF_128 1 "register_operand" "0,v")
075691af 1972 (match_operand:VF_128 2 "nonimmediate_operand" "xm,<round_saeonly_constraint>"))
115a33c2
RH
1973 (match_dup 1)
1974 (const_int 1)))]
6bec6c98 1975 "TARGET_SSE"
07c0852e 1976 "@
eabb5f48 1977 <maxmin_float><ssescalarmodesuffix>\t{%2, %0|%0, %<iptr>2}
075691af 1978 v<maxmin_float><ssescalarmodesuffix>\t{<round_saeonly_op3>%2, %1, %0|%0, %1, %<iptr>2<round_saeonly_op3>}"
07c0852e
UB
1979 [(set_attr "isa" "noavx,avx")
1980 (set_attr "type" "sse")
01284895 1981 (set_attr "btver2_sse_attr" "maxmin")
075691af 1982 (set_attr "prefix" "<round_saeonly_prefix>")
d6023b50 1983 (set_attr "mode" "<ssescalarmode>")])
ef719a44 1984
79e72538
UB
1985;; These versions of the min/max patterns implement exactly the operations
1986;; min = (op1 < op2 ? op1 : op2)
1987;; max = (!(op1 < op2) ? op1 : op2)
1988;; Their operands are not commutative, and thus they may be used in the
1989;; presence of -0.0 and NaN.
1990
d6023b50 1991(define_insn "*ieee_smin<mode>3"
b86f6e9e 1992 [(set (match_operand:VF 0 "register_operand" "=v,v")
07c0852e 1993 (unspec:VF
b86f6e9e
AI
1994 [(match_operand:VF 1 "register_operand" "0,v")
1995 (match_operand:VF 2 "nonimmediate_operand" "vm,vm")]
d6023b50 1996 UNSPEC_IEEE_MIN))]
6bec6c98 1997 "TARGET_SSE"
07c0852e 1998 "@
266446be
L
1999 min<ssemodesuffix>\t{%2, %0|%0, %2}
2000 vmin<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
07c0852e
UB
2001 [(set_attr "isa" "noavx,avx")
2002 (set_attr "type" "sseadd")
2003 (set_attr "prefix" "orig,vex")
d6023b50 2004 (set_attr "mode" "<MODE>")])
79e72538 2005
d6023b50 2006(define_insn "*ieee_smax<mode>3"
b86f6e9e 2007 [(set (match_operand:VF 0 "register_operand" "=v,v")
07c0852e 2008 (unspec:VF
b86f6e9e
AI
2009 [(match_operand:VF 1 "register_operand" "0,v")
2010 (match_operand:VF 2 "nonimmediate_operand" "vm,vm")]
d6023b50 2011 UNSPEC_IEEE_MAX))]
6bec6c98 2012 "TARGET_SSE"
07c0852e 2013 "@
266446be
L
2014 max<ssemodesuffix>\t{%2, %0|%0, %2}
2015 vmax<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
07c0852e
UB
2016 [(set_attr "isa" "noavx,avx")
2017 (set_attr "type" "sseadd")
2018 (set_attr "prefix" "orig,vex")
d6023b50 2019 (set_attr "mode" "<MODE>")])
79e72538 2020
95879c72
L
2021(define_insn "avx_addsubv4df3"
2022 [(set (match_operand:V4DF 0 "register_operand" "=x")
2023 (vec_merge:V4DF
2024 (plus:V4DF
2025 (match_operand:V4DF 1 "register_operand" "x")
2026 (match_operand:V4DF 2 "nonimmediate_operand" "xm"))
2027 (minus:V4DF (match_dup 1) (match_dup 2))
cec8874f 2028 (const_int 10)))]
95879c72
L
2029 "TARGET_AVX"
2030 "vaddsubpd\t{%2, %1, %0|%0, %1, %2}"
2031 [(set_attr "type" "sseadd")
2032 (set_attr "prefix" "vex")
2033 (set_attr "mode" "V4DF")])
2034
07c0852e
UB
2035(define_insn "sse3_addsubv2df3"
2036 [(set (match_operand:V2DF 0 "register_operand" "=x,x")
2037 (vec_merge:V2DF
2038 (plus:V2DF
2039 (match_operand:V2DF 1 "register_operand" "0,x")
2040 (match_operand:V2DF 2 "nonimmediate_operand" "xm,xm"))
2041 (minus:V2DF (match_dup 1) (match_dup 2))
2042 (const_int 2)))]
2043 "TARGET_SSE3"
2044 "@
2045 addsubpd\t{%2, %0|%0, %2}
2046 vaddsubpd\t{%2, %1, %0|%0, %1, %2}"
2047 [(set_attr "isa" "noavx,avx")
2048 (set_attr "type" "sseadd")
2049 (set_attr "atom_unit" "complex")
2050 (set_attr "prefix" "orig,vex")
2051 (set_attr "mode" "V2DF")])
2052
2053(define_insn "avx_addsubv8sf3"
2054 [(set (match_operand:V8SF 0 "register_operand" "=x")
2055 (vec_merge:V8SF
2056 (plus:V8SF
2057 (match_operand:V8SF 1 "register_operand" "x")
2058 (match_operand:V8SF 2 "nonimmediate_operand" "xm"))
2059 (minus:V8SF (match_dup 1) (match_dup 2))
2060 (const_int 170)))]
95879c72
L
2061 "TARGET_AVX"
2062 "vaddsubps\t{%2, %1, %0|%0, %1, %2}"
2063 [(set_attr "type" "sseadd")
2064 (set_attr "prefix" "vex")
07c0852e 2065 (set_attr "mode" "V8SF")])
95879c72 2066
ef719a44 2067(define_insn "sse3_addsubv4sf3"
07c0852e 2068 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
ef719a44
RH
2069 (vec_merge:V4SF
2070 (plus:V4SF
07c0852e
UB
2071 (match_operand:V4SF 1 "register_operand" "0,x")
2072 (match_operand:V4SF 2 "nonimmediate_operand" "xm,xm"))
ef719a44 2073 (minus:V4SF (match_dup 1) (match_dup 2))
cec8874f 2074 (const_int 10)))]
ef719a44 2075 "TARGET_SSE3"
07c0852e
UB
2076 "@
2077 addsubps\t{%2, %0|%0, %2}
2078 vaddsubps\t{%2, %1, %0|%0, %1, %2}"
2079 [(set_attr "isa" "noavx,avx")
2080 (set_attr "type" "sseadd")
2081 (set_attr "prefix" "orig,vex")
2082 (set_attr "prefix_rep" "1,*")
ef719a44
RH
2083 (set_attr "mode" "V4SF")])
2084
95879c72
L
2085(define_insn "avx_h<plusminus_insn>v4df3"
2086 [(set (match_operand:V4DF 0 "register_operand" "=x")
2087 (vec_concat:V4DF
2088 (vec_concat:V2DF
2089 (plusminus:DF
2090 (vec_select:DF
2091 (match_operand:V4DF 1 "register_operand" "x")
2092 (parallel [(const_int 0)]))
2093 (vec_select:DF (match_dup 1) (parallel [(const_int 1)])))
95879c72
L
2094 (plusminus:DF
2095 (vec_select:DF
2096 (match_operand:V4DF 2 "nonimmediate_operand" "xm")
2097 (parallel [(const_int 0)]))
a15d0d03
UB
2098 (vec_select:DF (match_dup 2) (parallel [(const_int 1)]))))
2099 (vec_concat:V2DF
2100 (plusminus:DF
2101 (vec_select:DF (match_dup 1) (parallel [(const_int 2)]))
2102 (vec_select:DF (match_dup 1) (parallel [(const_int 3)])))
95879c72
L
2103 (plusminus:DF
2104 (vec_select:DF (match_dup 2) (parallel [(const_int 2)]))
2105 (vec_select:DF (match_dup 2) (parallel [(const_int 3)]))))))]
2106 "TARGET_AVX"
2107 "vh<plusminus_mnemonic>pd\t{%2, %1, %0|%0, %1, %2}"
2108 [(set_attr "type" "sseadd")
2109 (set_attr "prefix" "vex")
2110 (set_attr "mode" "V4DF")])
2111
b790dea2
MG
2112(define_expand "sse3_haddv2df3"
2113 [(set (match_operand:V2DF 0 "register_operand")
2114 (vec_concat:V2DF
2115 (plus:DF
2116 (vec_select:DF
2117 (match_operand:V2DF 1 "register_operand")
2118 (parallel [(const_int 0)]))
2119 (vec_select:DF (match_dup 1) (parallel [(const_int 1)])))
2120 (plus:DF
2121 (vec_select:DF
2122 (match_operand:V2DF 2 "nonimmediate_operand")
2123 (parallel [(const_int 0)]))
2124 (vec_select:DF (match_dup 2) (parallel [(const_int 1)])))))]
2125 "TARGET_SSE3")
2126
2127(define_insn "*sse3_haddv2df3"
07c0852e
UB
2128 [(set (match_operand:V2DF 0 "register_operand" "=x,x")
2129 (vec_concat:V2DF
b790dea2
MG
2130 (plus:DF
2131 (vec_select:DF
2132 (match_operand:V2DF 1 "register_operand" "0,x")
2133 (parallel [(match_operand:SI 3 "const_0_to_1_operand")]))
2134 (vec_select:DF
2135 (match_dup 1)
2136 (parallel [(match_operand:SI 4 "const_0_to_1_operand")])))
2137 (plus:DF
2138 (vec_select:DF
2139 (match_operand:V2DF 2 "nonimmediate_operand" "xm,xm")
2140 (parallel [(match_operand:SI 5 "const_0_to_1_operand")]))
2141 (vec_select:DF
2142 (match_dup 2)
2143 (parallel [(match_operand:SI 6 "const_0_to_1_operand")])))))]
2144 "TARGET_SSE3
2145 && INTVAL (operands[3]) != INTVAL (operands[4])
2146 && INTVAL (operands[5]) != INTVAL (operands[6])"
2147 "@
2148 haddpd\t{%2, %0|%0, %2}
2149 vhaddpd\t{%2, %1, %0|%0, %1, %2}"
2150 [(set_attr "isa" "noavx,avx")
2151 (set_attr "type" "sseadd")
2152 (set_attr "prefix" "orig,vex")
2153 (set_attr "mode" "V2DF")])
2154
2155(define_insn "sse3_hsubv2df3"
2156 [(set (match_operand:V2DF 0 "register_operand" "=x,x")
2157 (vec_concat:V2DF
2158 (minus:DF
07c0852e
UB
2159 (vec_select:DF
2160 (match_operand:V2DF 1 "register_operand" "0,x")
2161 (parallel [(const_int 0)]))
2162 (vec_select:DF (match_dup 1) (parallel [(const_int 1)])))
b790dea2 2163 (minus:DF
07c0852e
UB
2164 (vec_select:DF
2165 (match_operand:V2DF 2 "nonimmediate_operand" "xm,xm")
2166 (parallel [(const_int 0)]))
2167 (vec_select:DF (match_dup 2) (parallel [(const_int 1)])))))]
2168 "TARGET_SSE3"
2169 "@
b790dea2
MG
2170 hsubpd\t{%2, %0|%0, %2}
2171 vhsubpd\t{%2, %1, %0|%0, %1, %2}"
07c0852e
UB
2172 [(set_attr "isa" "noavx,avx")
2173 (set_attr "type" "sseadd")
2174 (set_attr "prefix" "orig,vex")
2175 (set_attr "mode" "V2DF")])
2176
b790dea2
MG
2177(define_insn "*sse3_haddv2df3_low"
2178 [(set (match_operand:DF 0 "register_operand" "=x,x")
2179 (plus:DF
2180 (vec_select:DF
2181 (match_operand:V2DF 1 "register_operand" "0,x")
2182 (parallel [(match_operand:SI 2 "const_0_to_1_operand")]))
2183 (vec_select:DF
2184 (match_dup 1)
2185 (parallel [(match_operand:SI 3 "const_0_to_1_operand")]))))]
2186 "TARGET_SSE3
2187 && INTVAL (operands[2]) != INTVAL (operands[3])"
2188 "@
2189 haddpd\t{%0, %0|%0, %0}
2190 vhaddpd\t{%1, %1, %0|%0, %1, %1}"
2191 [(set_attr "isa" "noavx,avx")
2192 (set_attr "type" "sseadd1")
2193 (set_attr "prefix" "orig,vex")
2194 (set_attr "mode" "V2DF")])
2195
2196(define_insn "*sse3_hsubv2df3_low"
2197 [(set (match_operand:DF 0 "register_operand" "=x,x")
2198 (minus:DF
2199 (vec_select:DF
2200 (match_operand:V2DF 1 "register_operand" "0,x")
2201 (parallel [(const_int 0)]))
2202 (vec_select:DF
2203 (match_dup 1)
2204 (parallel [(const_int 1)]))))]
2205 "TARGET_SSE3"
2206 "@
2207 hsubpd\t{%0, %0|%0, %0}
2208 vhsubpd\t{%1, %1, %0|%0, %1, %1}"
2209 [(set_attr "isa" "noavx,avx")
2210 (set_attr "type" "sseadd1")
2211 (set_attr "prefix" "orig,vex")
2212 (set_attr "mode" "V2DF")])
2213
95879c72
L
2214(define_insn "avx_h<plusminus_insn>v8sf3"
2215 [(set (match_operand:V8SF 0 "register_operand" "=x")
2216 (vec_concat:V8SF
2217 (vec_concat:V4SF
2218 (vec_concat:V2SF
2219 (plusminus:SF
2220 (vec_select:SF
2221 (match_operand:V8SF 1 "register_operand" "x")
2222 (parallel [(const_int 0)]))
2223 (vec_select:SF (match_dup 1) (parallel [(const_int 1)])))
2224 (plusminus:SF
2225 (vec_select:SF (match_dup 1) (parallel [(const_int 2)]))
2226 (vec_select:SF (match_dup 1) (parallel [(const_int 3)]))))
2227 (vec_concat:V2SF
2228 (plusminus:SF
2229 (vec_select:SF
2230 (match_operand:V8SF 2 "nonimmediate_operand" "xm")
2231 (parallel [(const_int 0)]))
2232 (vec_select:SF (match_dup 2) (parallel [(const_int 1)])))
2233 (plusminus:SF
2234 (vec_select:SF (match_dup 2) (parallel [(const_int 2)]))
2235 (vec_select:SF (match_dup 2) (parallel [(const_int 3)])))))
2236 (vec_concat:V4SF
2237 (vec_concat:V2SF
2238 (plusminus:SF
2239 (vec_select:SF (match_dup 1) (parallel [(const_int 4)]))
2240 (vec_select:SF (match_dup 1) (parallel [(const_int 5)])))
2241 (plusminus:SF
2242 (vec_select:SF (match_dup 1) (parallel [(const_int 6)]))
2243 (vec_select:SF (match_dup 1) (parallel [(const_int 7)]))))
2244 (vec_concat:V2SF
2245 (plusminus:SF
2246 (vec_select:SF (match_dup 2) (parallel [(const_int 4)]))
2247 (vec_select:SF (match_dup 2) (parallel [(const_int 5)])))
2248 (plusminus:SF
2249 (vec_select:SF (match_dup 2) (parallel [(const_int 6)]))
2250 (vec_select:SF (match_dup 2) (parallel [(const_int 7)])))))))]
2251 "TARGET_AVX"
2252 "vh<plusminus_mnemonic>ps\t{%2, %1, %0|%0, %1, %2}"
2253 [(set_attr "type" "sseadd")
2254 (set_attr "prefix" "vex")
2255 (set_attr "mode" "V8SF")])
2256
d1c3b587 2257(define_insn "sse3_h<plusminus_insn>v4sf3"
07c0852e 2258 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
d6023b50
UB
2259 (vec_concat:V4SF
2260 (vec_concat:V2SF
78e8956b 2261 (plusminus:SF
d6023b50 2262 (vec_select:SF
07c0852e 2263 (match_operand:V4SF 1 "register_operand" "0,x")
d6023b50
UB
2264 (parallel [(const_int 0)]))
2265 (vec_select:SF (match_dup 1) (parallel [(const_int 1)])))
78e8956b 2266 (plusminus:SF
ef719a44
RH
2267 (vec_select:SF (match_dup 1) (parallel [(const_int 2)]))
2268 (vec_select:SF (match_dup 1) (parallel [(const_int 3)]))))
2269 (vec_concat:V2SF
78e8956b 2270 (plusminus:SF
ef719a44 2271 (vec_select:SF
07c0852e 2272 (match_operand:V4SF 2 "nonimmediate_operand" "xm,xm")
ef719a44
RH
2273 (parallel [(const_int 0)]))
2274 (vec_select:SF (match_dup 2) (parallel [(const_int 1)])))
78e8956b 2275 (plusminus:SF
ef719a44
RH
2276 (vec_select:SF (match_dup 2) (parallel [(const_int 2)]))
2277 (vec_select:SF (match_dup 2) (parallel [(const_int 3)]))))))]
2278 "TARGET_SSE3"
07c0852e
UB
2279 "@
2280 h<plusminus_mnemonic>ps\t{%2, %0|%0, %2}
2281 vh<plusminus_mnemonic>ps\t{%2, %1, %0|%0, %1, %2}"
2282 [(set_attr "isa" "noavx,avx")
2283 (set_attr "type" "sseadd")
b6837b94 2284 (set_attr "atom_unit" "complex")
07c0852e
UB
2285 (set_attr "prefix" "orig,vex")
2286 (set_attr "prefix_rep" "1,*")
ef719a44
RH
2287 (set_attr "mode" "V4SF")])
2288
2e2206fa
AI
2289(define_expand "reduc_splus_v8df"
2290 [(match_operand:V8DF 0 "register_operand")
2291 (match_operand:V8DF 1 "register_operand")]
2292 "TARGET_AVX512F"
2293{
2294 ix86_expand_reduc (gen_addv8df3, operands[0], operands[1]);
2295 DONE;
2296})
2297
07c0852e 2298(define_expand "reduc_splus_v4df"
82e86dc6
UB
2299 [(match_operand:V4DF 0 "register_operand")
2300 (match_operand:V4DF 1 "register_operand")]
95879c72 2301 "TARGET_AVX"
07c0852e
UB
2302{
2303 rtx tmp = gen_reg_rtx (V4DFmode);
2304 rtx tmp2 = gen_reg_rtx (V4DFmode);
2305 emit_insn (gen_avx_haddv4df3 (tmp, operands[1], operands[1]));
2306 emit_insn (gen_avx_vperm2f128v4df3 (tmp2, tmp, tmp, GEN_INT (1)));
2307 emit_insn (gen_addv4df3 (operands[0], tmp, tmp2));
2308 DONE;
2309})
95879c72 2310
07c0852e 2311(define_expand "reduc_splus_v2df"
82e86dc6
UB
2312 [(match_operand:V2DF 0 "register_operand")
2313 (match_operand:V2DF 1 "register_operand")]
d6023b50 2314 "TARGET_SSE3"
07c0852e
UB
2315{
2316 emit_insn (gen_sse3_haddv2df3 (operands[0], operands[1], operands[1]));
2317 DONE;
2318})
d6023b50 2319
2e2206fa
AI
2320(define_expand "reduc_splus_v16sf"
2321 [(match_operand:V16SF 0 "register_operand")
2322 (match_operand:V16SF 1 "register_operand")]
2323 "TARGET_AVX512F"
2324{
2325 ix86_expand_reduc (gen_addv16sf3, operands[0], operands[1]);
2326 DONE;
2327})
2328
1e27129f 2329(define_expand "reduc_splus_v8sf"
82e86dc6
UB
2330 [(match_operand:V8SF 0 "register_operand")
2331 (match_operand:V8SF 1 "register_operand")]
1e27129f
L
2332 "TARGET_AVX"
2333{
2334 rtx tmp = gen_reg_rtx (V8SFmode);
2335 rtx tmp2 = gen_reg_rtx (V8SFmode);
2336 emit_insn (gen_avx_haddv8sf3 (tmp, operands[1], operands[1]));
b9121f42
L
2337 emit_insn (gen_avx_haddv8sf3 (tmp2, tmp, tmp));
2338 emit_insn (gen_avx_vperm2f128v8sf3 (tmp, tmp2, tmp2, GEN_INT (1)));
2339 emit_insn (gen_addv8sf3 (operands[0], tmp, tmp2));
1e27129f
L
2340 DONE;
2341})
2342
e20524fa 2343(define_expand "reduc_splus_v4sf"
82e86dc6
UB
2344 [(match_operand:V4SF 0 "register_operand")
2345 (match_operand:V4SF 1 "register_operand")]
2ab1754e
RH
2346 "TARGET_SSE"
2347{
2348 if (TARGET_SSE3)
2349 {
2350 rtx tmp = gen_reg_rtx (V4SFmode);
2351 emit_insn (gen_sse3_haddv4sf3 (tmp, operands[1], operands[1]));
2352 emit_insn (gen_sse3_haddv4sf3 (operands[0], tmp, tmp));
2353 }
2354 else
c0b0ee6f 2355 ix86_expand_reduc (gen_addv4sf3, operands[0], operands[1]);
2ab1754e
RH
2356 DONE;
2357})
2358
8a0436cb
JJ
2359;; Modes handled by reduc_sm{in,ax}* patterns.
2360(define_mode_iterator REDUC_SMINMAX_MODE
2361 [(V32QI "TARGET_AVX2") (V16HI "TARGET_AVX2")
2362 (V8SI "TARGET_AVX2") (V4DI "TARGET_AVX2")
2363 (V8SF "TARGET_AVX") (V4DF "TARGET_AVX")
805e20ad
AI
2364 (V4SF "TARGET_SSE") (V64QI "TARGET_AVX512BW")
2365 (V32HI "TARGET_AVX512BW") (V16SI "TARGET_AVX512F")
0fe65b75
AI
2366 (V8DI "TARGET_AVX512F") (V16SF "TARGET_AVX512F")
2367 (V8DF "TARGET_AVX512F")])
8a0436cb
JJ
2368
2369(define_expand "reduc_<code>_<mode>"
2370 [(smaxmin:REDUC_SMINMAX_MODE
82e86dc6
UB
2371 (match_operand:REDUC_SMINMAX_MODE 0 "register_operand")
2372 (match_operand:REDUC_SMINMAX_MODE 1 "register_operand"))]
8a0436cb 2373 ""
2ab1754e 2374{
8a0436cb 2375 ix86_expand_reduc (gen_<code><mode>3, operands[0], operands[1]);
c0b0ee6f
JJ
2376 DONE;
2377})
2378
0fe65b75 2379(define_expand "reduc_<code>_<mode>"
0ab03ea0
AI
2380 [(umaxmin:VI_AVX512BW
2381 (match_operand:VI_AVX512BW 0 "register_operand")
2382 (match_operand:VI_AVX512BW 1 "register_operand"))]
0fe65b75
AI
2383 "TARGET_AVX512F"
2384{
2385 ix86_expand_reduc (gen_<code><mode>3, operands[0], operands[1]);
2386 DONE;
2387})
2388
8a0436cb
JJ
2389(define_expand "reduc_<code>_<mode>"
2390 [(umaxmin:VI_256
82e86dc6
UB
2391 (match_operand:VI_256 0 "register_operand")
2392 (match_operand:VI_256 1 "register_operand"))]
8a0436cb 2393 "TARGET_AVX2"
c0b0ee6f 2394{
8a0436cb 2395 ix86_expand_reduc (gen_<code><mode>3, operands[0], operands[1]);
2ab1754e
RH
2396 DONE;
2397})
2398
a520f3c3
JJ
2399(define_expand "reduc_umin_v8hi"
2400 [(umin:V8HI
82e86dc6
UB
2401 (match_operand:V8HI 0 "register_operand")
2402 (match_operand:V8HI 1 "register_operand"))]
a520f3c3
JJ
2403 "TARGET_SSE4_1"
2404{
2405 ix86_expand_reduc (gen_uminv8hi3, operands[0], operands[1]);
2406 DONE;
2407})
2408
b9826286
AI
2409(define_insn "<mask_codefor>reducep<mode><mask_name>"
2410 [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v")
2411 (unspec:VF_AVX512VL
2412 [(match_operand:VF_AVX512VL 1 "nonimmediate_operand" "vm")
2413 (match_operand:SI 2 "const_0_to_255_operand")]
2414 UNSPEC_REDUCE))]
2415 "TARGET_AVX512DQ"
2416 "vreduce<ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
2417 [(set_attr "type" "sse")
2418 (set_attr "prefix" "evex")
2419 (set_attr "mode" "<MODE>")])
2420
2421(define_insn "reduces<mode>"
2422 [(set (match_operand:VF_128 0 "register_operand" "=v")
2423 (vec_merge:VF_128
2424 (unspec:VF_128
2425 [(match_operand:VF_128 1 "register_operand" "v")
2426 (match_operand:VF_128 2 "nonimmediate_operand" "vm")
2427 (match_operand:SI 3 "const_0_to_255_operand")]
2428 UNSPEC_REDUCE)
2429 (match_dup 1)
2430 (const_int 1)))]
2431 "TARGET_AVX512DQ"
2432 "vreduce<ssescalarmodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
2433 [(set_attr "type" "sse")
2434 (set_attr "prefix" "evex")
2435 (set_attr "mode" "<MODE>")])
2436
ef719a44
RH
2437;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2438;;
d6023b50 2439;; Parallel floating point comparisons
ef719a44
RH
2440;;
2441;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2442
07c0852e 2443(define_insn "avx_cmp<mode>3"
b86f6e9e
AI
2444 [(set (match_operand:VF_128_256 0 "register_operand" "=x")
2445 (unspec:VF_128_256
2446 [(match_operand:VF_128_256 1 "register_operand" "x")
2447 (match_operand:VF_128_256 2 "nonimmediate_operand" "xm")
95879c72
L
2448 (match_operand:SI 3 "const_0_to_31_operand" "n")]
2449 UNSPEC_PCMP))]
2450 "TARGET_AVX"
1c154a23 2451 "vcmp<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
95879c72 2452 [(set_attr "type" "ssecmp")
725fd454 2453 (set_attr "length_immediate" "1")
95879c72
L
2454 (set_attr "prefix" "vex")
2455 (set_attr "mode" "<MODE>")])
2456
07c0852e
UB
2457(define_insn "avx_vmcmp<mode>3"
2458 [(set (match_operand:VF_128 0 "register_operand" "=x")
2459 (vec_merge:VF_128
2460 (unspec:VF_128
2461 [(match_operand:VF_128 1 "register_operand" "x")
2462 (match_operand:VF_128 2 "nonimmediate_operand" "xm")
95879c72
L
2463 (match_operand:SI 3 "const_0_to_31_operand" "n")]
2464 UNSPEC_PCMP)
2465 (match_dup 1)
2466 (const_int 1)))]
2467 "TARGET_AVX"
eabb5f48 2468 "vcmp<ssescalarmodesuffix>\t{%3, %2, %1, %0|%0, %1, %<iptr>2, %3}"
95879c72 2469 [(set_attr "type" "ssecmp")
725fd454 2470 (set_attr "length_immediate" "1")
95879c72
L
2471 (set_attr "prefix" "vex")
2472 (set_attr "mode" "<ssescalarmode>")])
2473
c497c412 2474(define_insn "*<sse>_maskcmp<mode>3_comm"
b86f6e9e
AI
2475 [(set (match_operand:VF_128_256 0 "register_operand" "=x,x")
2476 (match_operator:VF_128_256 3 "sse_comparison_operator"
2477 [(match_operand:VF_128_256 1 "register_operand" "%0,x")
2478 (match_operand:VF_128_256 2 "nonimmediate_operand" "xm,xm")]))]
c497c412
UB
2479 "TARGET_SSE
2480 && GET_RTX_CLASS (GET_CODE (operands[3])) == RTX_COMM_COMPARE"
2481 "@
2482 cmp%D3<ssemodesuffix>\t{%2, %0|%0, %2}
2483 vcmp%D3<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
2484 [(set_attr "isa" "noavx,avx")
2485 (set_attr "type" "ssecmp")
2486 (set_attr "length_immediate" "1")
2487 (set_attr "prefix" "orig,vex")
2488 (set_attr "mode" "<MODE>")])
2489
95879c72 2490(define_insn "<sse>_maskcmp<mode>3"
b86f6e9e
AI
2491 [(set (match_operand:VF_128_256 0 "register_operand" "=x,x")
2492 (match_operator:VF_128_256 3 "sse_comparison_operator"
2493 [(match_operand:VF_128_256 1 "register_operand" "0,x")
2494 (match_operand:VF_128_256 2 "nonimmediate_operand" "xm,xm")]))]
6bec6c98 2495 "TARGET_SSE"
07c0852e
UB
2496 "@
2497 cmp%D3<ssemodesuffix>\t{%2, %0|%0, %2}
2498 vcmp%D3<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
2499 [(set_attr "isa" "noavx,avx")
2500 (set_attr "type" "ssecmp")
725fd454 2501 (set_attr "length_immediate" "1")
07c0852e 2502 (set_attr "prefix" "orig,vex")
d6023b50 2503 (set_attr "mode" "<MODE>")])
d096ecdd 2504
d6023b50 2505(define_insn "<sse>_vmmaskcmp<mode>3"
07c0852e
UB
2506 [(set (match_operand:VF_128 0 "register_operand" "=x,x")
2507 (vec_merge:VF_128
2508 (match_operator:VF_128 3 "sse_comparison_operator"
2509 [(match_operand:VF_128 1 "register_operand" "0,x")
2510 (match_operand:VF_128 2 "nonimmediate_operand" "xm,xm")])
ef719a44
RH
2511 (match_dup 1)
2512 (const_int 1)))]
6bec6c98 2513 "TARGET_SSE"
07c0852e 2514 "@
eabb5f48
UB
2515 cmp%D3<ssescalarmodesuffix>\t{%2, %0|%0, %<iptr>2}
2516 vcmp%D3<ssescalarmodesuffix>\t{%2, %1, %0|%0, %1, %<iptr>2}"
07c0852e
UB
2517 [(set_attr "isa" "noavx,avx")
2518 (set_attr "type" "ssecmp")
2519 (set_attr "length_immediate" "1,*")
2520 (set_attr "prefix" "orig,vex")
d6023b50 2521 (set_attr "mode" "<ssescalarmode>")])
ef719a44 2522
ab931c71 2523(define_mode_attr cmp_imm_predicate
51e14b05
AI
2524 [(V16SF "const_0_to_31_operand") (V8DF "const_0_to_31_operand")
2525 (V16SI "const_0_to_7_operand") (V8DI "const_0_to_7_operand")
2526 (V8SF "const_0_to_31_operand") (V4DF "const_0_to_31_operand")
2527 (V8SI "const_0_to_7_operand") (V4DI "const_0_to_7_operand")
2528 (V4SF "const_0_to_31_operand") (V2DF "const_0_to_31_operand")
2529 (V4SI "const_0_to_7_operand") (V2DI "const_0_to_7_operand")
2530 (V32HI "const_0_to_7_operand") (V64QI "const_0_to_7_operand")
2531 (V16HI "const_0_to_7_operand") (V32QI "const_0_to_7_operand")
2532 (V8HI "const_0_to_7_operand") (V16QI "const_0_to_7_operand")])
2533
2534(define_insn "<avx512>_cmp<mode>3<mask_scalar_merge_name><round_saeonly_name>"
be792bce 2535 [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=Yk")
ab931c71 2536 (unspec:<avx512fmaskmode>
51e14b05
AI
2537 [(match_operand:V48_AVX512VL 1 "register_operand" "v")
2538 (match_operand:V48_AVX512VL 2 "nonimmediate_operand" "<round_saeonly_constraint>")
ab931c71
AI
2539 (match_operand:SI 3 "<cmp_imm_predicate>" "n")]
2540 UNSPEC_PCMP))]
8a6ef760
AI
2541 "TARGET_AVX512F && <round_saeonly_mode512bit_condition>"
2542 "v<sseintprefix>cmp<ssemodesuffix>\t{%3, <round_saeonly_mask_scalar_merge_op4>%2, %1, %0<mask_scalar_merge_operand4>|%0<mask_scalar_merge_operand4>, %1, %2<round_saeonly_mask_scalar_merge_op4>, %3}"
ab931c71
AI
2543 [(set_attr "type" "ssecmp")
2544 (set_attr "length_immediate" "1")
2545 (set_attr "prefix" "evex")
2546 (set_attr "mode" "<sseinsnmode>")])
2547
51e14b05
AI
2548(define_insn "<avx512>_cmp<mode>3<mask_scalar_merge_name>"
2549 [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=Yk")
2550 (unspec:<avx512fmaskmode>
2551 [(match_operand:VI12_AVX512VL 1 "register_operand" "v")
2552 (match_operand:VI12_AVX512VL 2 "nonimmediate_operand" "vm")
2553 (match_operand:SI 3 "<cmp_imm_predicate>" "n")]
2554 UNSPEC_PCMP))]
2555 "TARGET_AVX512BW"
2556 "vpcmp<ssemodesuffix>\t{%3, %2, %1, %0<mask_scalar_merge_operand4>|%0<mask_scalar_merge_operand4>, %1, %2, %3}"
2557 [(set_attr "type" "ssecmp")
2558 (set_attr "length_immediate" "1")
2559 (set_attr "prefix" "evex")
2560 (set_attr "mode" "<sseinsnmode>")])
2561
54967fb0 2562(define_insn "<avx512>_ucmp<mode>3<mask_scalar_merge_name>"
be792bce 2563 [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=Yk")
0fe65b75 2564 (unspec:<avx512fmaskmode>
54967fb0
AI
2565 [(match_operand:VI12_AVX512VL 1 "register_operand" "v")
2566 (match_operand:VI12_AVX512VL 2 "nonimmediate_operand" "vm")
2567 (match_operand:SI 3 "const_0_to_7_operand" "n")]
2568 UNSPEC_UNSIGNED_PCMP))]
2569 "TARGET_AVX512BW"
2570 "vpcmpu<ssemodesuffix>\t{%3, %2, %1, %0<mask_scalar_merge_operand4>|%0<mask_scalar_merge_operand4>, %1, %2, %3}"
2571 [(set_attr "type" "ssecmp")
2572 (set_attr "length_immediate" "1")
2573 (set_attr "prefix" "evex")
2574 (set_attr "mode" "<sseinsnmode>")])
2575
2576(define_insn "<avx512>_ucmp<mode>3<mask_scalar_merge_name>"
2577 [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=Yk")
2578 (unspec:<avx512fmaskmode>
2579 [(match_operand:VI48_AVX512VL 1 "register_operand" "v")
2580 (match_operand:VI48_AVX512VL 2 "nonimmediate_operand" "vm")
0fe65b75
AI
2581 (match_operand:SI 3 "const_0_to_7_operand" "n")]
2582 UNSPEC_UNSIGNED_PCMP))]
2583 "TARGET_AVX512F"
a95ec517 2584 "vpcmpu<ssemodesuffix>\t{%3, %2, %1, %0<mask_scalar_merge_operand4>|%0<mask_scalar_merge_operand4>, %1, %2, %3}"
0fe65b75
AI
2585 [(set_attr "type" "ssecmp")
2586 (set_attr "length_immediate" "1")
2587 (set_attr "prefix" "evex")
2588 (set_attr "mode" "<sseinsnmode>")])
2589
8a6ef760 2590(define_insn "avx512f_vmcmp<mode>3<round_saeonly_name>"
be792bce 2591 [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=Yk")
b868b7ca
AI
2592 (and:<avx512fmaskmode>
2593 (unspec:<avx512fmaskmode>
2594 [(match_operand:VF_128 1 "register_operand" "v")
8a6ef760 2595 (match_operand:VF_128 2 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")
b868b7ca
AI
2596 (match_operand:SI 3 "const_0_to_31_operand" "n")]
2597 UNSPEC_PCMP)
2598 (const_int 1)))]
2599 "TARGET_AVX512F"
8a6ef760 2600 "vcmp<ssescalarmodesuffix>\t{%3, <round_saeonly_op4>%2, %1, %0|%0, %1, %2<round_saeonly_op4>, %3}"
b868b7ca
AI
2601 [(set_attr "type" "ssecmp")
2602 (set_attr "length_immediate" "1")
2603 (set_attr "prefix" "evex")
2604 (set_attr "mode" "<ssescalarmode>")])
2605
8a6ef760 2606(define_insn "avx512f_vmcmp<mode>3_mask<round_saeonly_name>"
be792bce 2607 [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=Yk")
47490470
AI
2608 (and:<avx512fmaskmode>
2609 (unspec:<avx512fmaskmode>
2610 [(match_operand:VF_128 1 "register_operand" "v")
8a6ef760 2611 (match_operand:VF_128 2 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")
47490470
AI
2612 (match_operand:SI 3 "const_0_to_31_operand" "n")]
2613 UNSPEC_PCMP)
2614 (and:<avx512fmaskmode>
be792bce 2615 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")
47490470
AI
2616 (const_int 1))))]
2617 "TARGET_AVX512F"
8a6ef760 2618 "vcmp<ssescalarmodesuffix>\t{%3, <round_saeonly_op5>%2, %1, %0%{%4%}|%0%{%4%}, %1, %2<round_saeonly_op5>, %3}"
47490470
AI
2619 [(set_attr "type" "ssecmp")
2620 (set_attr "length_immediate" "1")
2621 (set_attr "prefix" "evex")
2622 (set_attr "mode" "<ssescalarmode>")])
2623
b868b7ca 2624(define_insn "avx512f_maskcmp<mode>3"
be792bce 2625 [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=Yk")
b868b7ca
AI
2626 (match_operator:<avx512fmaskmode> 3 "sse_comparison_operator"
2627 [(match_operand:VF 1 "register_operand" "v")
2628 (match_operand:VF 2 "nonimmediate_operand" "vm")]))]
2629 "TARGET_SSE"
2630 "vcmp%D3<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
2631 [(set_attr "type" "ssecmp")
2632 (set_attr "length_immediate" "1")
2633 (set_attr "prefix" "evex")
2634 (set_attr "mode" "<sseinsnmode>")])
2635
8a6ef760 2636(define_insn "<sse>_comi<round_saeonly_name>"
ef719a44
RH
2637 [(set (reg:CCFP FLAGS_REG)
2638 (compare:CCFP
d6023b50 2639 (vec_select:MODEF
2b1ebb0c 2640 (match_operand:<ssevecmode> 0 "register_operand" "v")
ef719a44 2641 (parallel [(const_int 0)]))
d6023b50 2642 (vec_select:MODEF
8a6ef760 2643 (match_operand:<ssevecmode> 1 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")
ef719a44 2644 (parallel [(const_int 0)]))))]
d6023b50 2645 "SSE_FLOAT_MODE_P (<MODE>mode)"
8a6ef760 2646 "%vcomi<ssemodesuffix>\t{<round_saeonly_op2>%1, %0|%0, %<iptr>1<round_saeonly_op2>}"
ef719a44 2647 [(set_attr "type" "ssecomi")
95879c72 2648 (set_attr "prefix" "maybe_vex")
725fd454
JJ
2649 (set_attr "prefix_rep" "0")
2650 (set (attr "prefix_data16")
2651 (if_then_else (eq_attr "mode" "DF")
2652 (const_string "1")
2653 (const_string "0")))
d6023b50 2654 (set_attr "mode" "<MODE>")])
ef719a44 2655
8a6ef760 2656(define_insn "<sse>_ucomi<round_saeonly_name>"
ef719a44
RH
2657 [(set (reg:CCFPU FLAGS_REG)
2658 (compare:CCFPU
d6023b50 2659 (vec_select:MODEF
2b1ebb0c 2660 (match_operand:<ssevecmode> 0 "register_operand" "v")
ef719a44 2661 (parallel [(const_int 0)]))
d6023b50 2662 (vec_select:MODEF
8a6ef760 2663 (match_operand:<ssevecmode> 1 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")
ef719a44 2664 (parallel [(const_int 0)]))))]
d6023b50 2665 "SSE_FLOAT_MODE_P (<MODE>mode)"
8a6ef760 2666 "%vucomi<ssemodesuffix>\t{<round_saeonly_op2>%1, %0|%0, %<iptr>1<round_saeonly_op2>}"
ef719a44 2667 [(set_attr "type" "ssecomi")
95879c72 2668 (set_attr "prefix" "maybe_vex")
725fd454
JJ
2669 (set_attr "prefix_rep" "0")
2670 (set (attr "prefix_data16")
2671 (if_then_else (eq_attr "mode" "DF")
2672 (const_string "1")
2673 (const_string "0")))
d6023b50 2674 (set_attr "mode" "<MODE>")])
ef719a44 2675
f62ce24f
AI
2676(define_expand "vcond<V_512:mode><VF_512:mode>"
2677 [(set (match_operand:V_512 0 "register_operand")
2678 (if_then_else:V_512
2679 (match_operator 3 ""
2680 [(match_operand:VF_512 4 "nonimmediate_operand")
2681 (match_operand:VF_512 5 "nonimmediate_operand")])
2682 (match_operand:V_512 1 "general_operand")
2683 (match_operand:V_512 2 "general_operand")))]
2684 "TARGET_AVX512F
2685 && (GET_MODE_NUNITS (<V_512:MODE>mode)
2686 == GET_MODE_NUNITS (<VF_512:MODE>mode))"
2687{
2688 bool ok = ix86_expand_fp_vcond (operands);
2689 gcc_assert (ok);
2690 DONE;
2691})
2692
e9e1d143 2693(define_expand "vcond<V_256:mode><VF_256:mode>"
82e86dc6 2694 [(set (match_operand:V_256 0 "register_operand")
e9e1d143 2695 (if_then_else:V_256
977e83a3 2696 (match_operator 3 ""
82e86dc6
UB
2697 [(match_operand:VF_256 4 "nonimmediate_operand")
2698 (match_operand:VF_256 5 "nonimmediate_operand")])
2699 (match_operand:V_256 1 "general_operand")
2700 (match_operand:V_256 2 "general_operand")))]
e9e1d143
RG
2701 "TARGET_AVX
2702 && (GET_MODE_NUNITS (<V_256:MODE>mode)
2703 == GET_MODE_NUNITS (<VF_256:MODE>mode))"
2704{
2705 bool ok = ix86_expand_fp_vcond (operands);
2706 gcc_assert (ok);
2707 DONE;
2708})
2709
2710(define_expand "vcond<V_128:mode><VF_128:mode>"
82e86dc6 2711 [(set (match_operand:V_128 0 "register_operand")
e9e1d143
RG
2712 (if_then_else:V_128
2713 (match_operator 3 ""
82e86dc6
UB
2714 [(match_operand:VF_128 4 "nonimmediate_operand")
2715 (match_operand:VF_128 5 "nonimmediate_operand")])
2716 (match_operand:V_128 1 "general_operand")
2717 (match_operand:V_128 2 "general_operand")))]
e9e1d143
RG
2718 "TARGET_SSE
2719 && (GET_MODE_NUNITS (<V_128:MODE>mode)
2720 == GET_MODE_NUNITS (<VF_128:MODE>mode))"
ae46a07a 2721{
1262fd02
UB
2722 bool ok = ix86_expand_fp_vcond (operands);
2723 gcc_assert (ok);
2724 DONE;
ae46a07a
RH
2725})
2726
ef719a44
RH
2727;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2728;;
d6023b50 2729;; Parallel floating point logical operations
ef719a44
RH
2730;;
2731;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2732
26358fb6
AI
2733(define_insn "<sse>_andnot<mode>3<mask_name>"
2734 [(set (match_operand:VF_128_256 0 "register_operand" "=x,v")
2735 (and:VF_128_256
2736 (not:VF_128_256
2737 (match_operand:VF_128_256 1 "register_operand" "0,v"))
2738 (match_operand:VF_128_256 2 "nonimmediate_operand" "xm,vm")))]
2739 "TARGET_SSE && <mask_avx512vl_condition>"
a95d4000 2740{
26358fb6 2741 static char buf[128];
b8dd0894
UB
2742 const char *ops;
2743 const char *suffix;
2744
2745 switch (get_attr_mode (insn))
2746 {
2747 case MODE_V8SF:
2748 case MODE_V4SF:
2749 suffix = "ps";
2750 break;
2751 default:
2752 suffix = "<ssemodesuffix>";
2753 }
ef719a44 2754
a95d4000
UB
2755 switch (which_alternative)
2756 {
2757 case 0:
b8dd0894 2758 ops = "andn%s\t{%%2, %%0|%%0, %%2}";
a95d4000
UB
2759 break;
2760 case 1:
26358fb6 2761 ops = "vandn%s\t{%%2, %%1, %%0<mask_operand3_1>|%%0<mask_operand3_1>, %%1, %%2}";
a95d4000
UB
2762 break;
2763 default:
2764 gcc_unreachable ();
2765 }
95879c72 2766
26358fb6
AI
2767 /* There is no vandnp[sd] in avx512f. Use vpandn[qd]. */
2768 if (<mask_applied> && !TARGET_AVX512DQ)
b86f6e9e 2769 {
26358fb6
AI
2770 suffix = GET_MODE_INNER (<MODE>mode) == DFmode ? "q" : "d";
2771 ops = "vpandn%s\t{%%2, %%1, %%0<mask_operand3_1>|%%0<mask_operand3_1>, %%1, %%2}";
b86f6e9e
AI
2772 }
2773
b8dd0894 2774 snprintf (buf, sizeof (buf), ops, suffix);
a95d4000 2775 return buf;
1133125e 2776}
a95d4000
UB
2777 [(set_attr "isa" "noavx,avx")
2778 (set_attr "type" "sselog")
b86f6e9e 2779 (set_attr "prefix" "orig,maybe_evex")
b8dd0894 2780 (set (attr "mode")
659c0e68
JM
2781 (cond [(and (match_test "<MODE_SIZE> == 16")
2782 (match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL"))
b8dd0894
UB
2783 (const_string "<ssePSmode>")
2784 (match_test "TARGET_AVX")
2785 (const_string "<MODE>")
2786 (match_test "optimize_function_for_size_p (cfun)")
2787 (const_string "V4SF")
2788 ]
2789 (const_string "<MODE>")))])
95879c72 2790
26358fb6
AI
2791
2792(define_insn "<sse>_andnot<mode>3<mask_name>"
2793 [(set (match_operand:VF_512 0 "register_operand" "=v")
2794 (and:VF_512
2795 (not:VF_512
2796 (match_operand:VF_512 1 "register_operand" "v"))
2797 (match_operand:VF_512 2 "nonimmediate_operand" "vm")))]
2798 "TARGET_AVX512F"
2799{
2800 static char buf[128];
2801 const char *ops;
2802 const char *suffix;
2803
2804 suffix = "<ssemodesuffix>";
2805 ops = "";
2806
2807 /* There is no vandnp[sd] in avx512f. Use vpandn[qd]. */
2808 if (!TARGET_AVX512DQ)
2809 {
2810 suffix = GET_MODE_INNER (<MODE>mode) == DFmode ? "q" : "d";
2811 ops = "p";
2812 }
2813
2814 snprintf (buf, sizeof (buf),
2815 "v%sandn%s\t{%%2, %%1, %%0<mask_operand3_1>|%%0<mask_operand3_1>, %%1, %%2}",
2816 ops, suffix);
2817 return buf;
2818}
2819 [(set_attr "type" "sselog")
2820 (set_attr "prefix" "evex")
2821 (set_attr "mode" "<sseinsnmode>")])
2822
2823(define_expand "<code><mode>3<mask_name>"
b86f6e9e 2824 [(set (match_operand:VF_128_256 0 "register_operand")
26358fb6
AI
2825 (any_logic:VF_128_256
2826 (match_operand:VF_128_256 1 "nonimmediate_operand")
2827 (match_operand:VF_128_256 2 "nonimmediate_operand")))]
2828 "TARGET_SSE && <mask_avx512vl_condition>"
94237c92 2829 "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
ef719a44 2830
26358fb6 2831(define_expand "<code><mode>3<mask_name>"
b86f6e9e 2832 [(set (match_operand:VF_512 0 "register_operand")
26358fb6 2833 (any_logic:VF_512
b86f6e9e
AI
2834 (match_operand:VF_512 1 "nonimmediate_operand")
2835 (match_operand:VF_512 2 "nonimmediate_operand")))]
2836 "TARGET_AVX512F"
2837 "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
2838
26358fb6
AI
2839(define_insn "*<code><mode>3<mask_name>"
2840 [(set (match_operand:VF_128_256 0 "register_operand" "=x,v")
2841 (any_logic:VF_128_256
2842 (match_operand:VF_128_256 1 "nonimmediate_operand" "%0,v")
2843 (match_operand:VF_128_256 2 "nonimmediate_operand" "xm,vm")))]
2844 "TARGET_SSE && <mask_avx512vl_condition>
2845 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
1133125e 2846{
26358fb6 2847 static char buf[128];
b8dd0894
UB
2848 const char *ops;
2849 const char *suffix;
2850
2851 switch (get_attr_mode (insn))
2852 {
2853 case MODE_V8SF:
2854 case MODE_V4SF:
2855 suffix = "ps";
2856 break;
2857 default:
2858 suffix = "<ssemodesuffix>";
2859 }
a95d4000
UB
2860
2861 switch (which_alternative)
2862 {
2863 case 0:
b8dd0894 2864 ops = "<logic>%s\t{%%2, %%0|%%0, %%2}";
a95d4000
UB
2865 break;
2866 case 1:
26358fb6 2867 ops = "v<logic>%s\t{%%2, %%1, %%0<mask_operand3_1>|%%0<mask_operand3_1>, %%1, %%2}";
a95d4000
UB
2868 break;
2869 default:
2870 gcc_unreachable ();
2871 }
2872
26358fb6
AI
2873 /* There is no v<logic>p[sd] in avx512f. Use vp<logic>[dq]. */
2874 if (<mask_applied> && !TARGET_AVX512DQ)
b86f6e9e 2875 {
26358fb6
AI
2876 suffix = GET_MODE_INNER (<MODE>mode) == DFmode ? "q" : "d";
2877 ops = "vp<logic>%s\t{%%2, %%1, %%0<mask_operand3_1>|%%0<mask_operand3_1>, %%1, %%2}";
b86f6e9e
AI
2878 }
2879
b8dd0894 2880 snprintf (buf, sizeof (buf), ops, suffix);
a95d4000 2881 return buf;
1133125e 2882}
a95d4000
UB
2883 [(set_attr "isa" "noavx,avx")
2884 (set_attr "type" "sselog")
b86f6e9e 2885 (set_attr "prefix" "orig,maybe_evex")
b8dd0894 2886 (set (attr "mode")
659c0e68
JM
2887 (cond [(and (match_test "<MODE_SIZE> == 16")
2888 (match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL"))
b8dd0894
UB
2889 (const_string "<ssePSmode>")
2890 (match_test "TARGET_AVX")
2891 (const_string "<MODE>")
2892 (match_test "optimize_function_for_size_p (cfun)")
2893 (const_string "V4SF")
2894 ]
2895 (const_string "<MODE>")))])
ef719a44 2896
26358fb6
AI
2897(define_insn "*<code><mode>3<mask_name>"
2898 [(set (match_operand:VF_512 0 "register_operand" "=v")
2899 (any_logic:VF_512
2900 (match_operand:VF_512 1 "nonimmediate_operand" "%v")
2901 (match_operand:VF_512 2 "nonimmediate_operand" "vm")))]
2902 "TARGET_AVX512F && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
2903{
2904 static char buf[128];
2905 const char *ops;
2906 const char *suffix;
2907
2908 suffix = "<ssemodesuffix>";
2909 ops = "";
2910
2911 /* There is no v<logic>p[sd] in avx512f. Use vp<logic>[dq]. */
2912 if ((<MODE_SIZE> == 64 || <mask_applied>) && !TARGET_AVX512DQ)
2913 {
2914 suffix = GET_MODE_INNER (<MODE>mode) == DFmode ? "q" : "d";
2915 ops = "p";
2916 }
2917
2918 snprintf (buf, sizeof (buf),
2919 "v%s<logic>%s\t{%%2, %%1, %%0<mask_operand3_1>|%%0<mask_operand3_1>, %%1, %%2}",
2920 ops, suffix);
2921 return buf;
2922}
2923 [(set_attr "type" "sselog")
2924 (set_attr "prefix" "evex")
2925 (set_attr "mode" "<sseinsnmode>")])
2926
af766f2d 2927(define_expand "copysign<mode>3"
31f44cd0 2928 [(set (match_dup 4)
a95d4000
UB
2929 (and:VF
2930 (not:VF (match_dup 3))
82e86dc6 2931 (match_operand:VF 1 "nonimmediate_operand")))
31f44cd0 2932 (set (match_dup 5)
a95d4000 2933 (and:VF (match_dup 3)
82e86dc6
UB
2934 (match_operand:VF 2 "nonimmediate_operand")))
2935 (set (match_operand:VF 0 "register_operand")
a95d4000 2936 (ior:VF (match_dup 4) (match_dup 5)))]
6bec6c98 2937 "TARGET_SSE"
af766f2d 2938{
1e27129f 2939 operands[3] = ix86_build_signbit_mask (<MODE>mode, 1, 0);
1fba7394 2940
31f44cd0 2941 operands[4] = gen_reg_rtx (<MODE>mode);
1fba7394 2942 operands[5] = gen_reg_rtx (<MODE>mode);
af766f2d
UB
2943})
2944
ab8efbd8 2945;; Also define scalar versions. These are used for abs, neg, and
41f717fb 2946;; conditional move. Using subregs into vector modes causes register
ab8efbd8
RH
2947;; allocation lossage. These patterns do not allow memory operands
2948;; because the native instructions read the full 128-bits.
2949
c6d55c5b 2950(define_insn "*andnot<mode>3"
a95d4000 2951 [(set (match_operand:MODEF 0 "register_operand" "=x,x")
d6023b50
UB
2952 (and:MODEF
2953 (not:MODEF
a95d4000
UB
2954 (match_operand:MODEF 1 "register_operand" "0,x"))
2955 (match_operand:MODEF 2 "register_operand" "x,x")))]
d6023b50 2956 "SSE_FLOAT_MODE_P (<MODE>mode)"
1133125e 2957{
a95d4000 2958 static char buf[32];
b8dd0894 2959 const char *ops;
a95d4000 2960 const char *suffix
b8dd0894
UB
2961 = (get_attr_mode (insn) == MODE_V4SF) ? "ps" : "<ssevecmodesuffix>";
2962
2963 switch (which_alternative)
2964 {
2965 case 0:
2966 ops = "andn%s\t{%%2, %%0|%%0, %%2}";
2967 break;
2968 case 1:
2969 ops = "vandn%s\t{%%2, %%1, %%0|%%0, %%1, %%2}";
2970 break;
2971 default:
2972 gcc_unreachable ();
2973 }
2974
2975 snprintf (buf, sizeof (buf), ops, suffix);
2976 return buf;
2977}
2978 [(set_attr "isa" "noavx,avx")
2979 (set_attr "type" "sselog")
2980 (set_attr "prefix" "orig,vex")
2981 (set (attr "mode")
659c0e68
JM
2982 (cond [(and (match_test "<MODE_SIZE> == 16")
2983 (match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL"))
b8dd0894
UB
2984 (const_string "V4SF")
2985 (match_test "TARGET_AVX")
2986 (const_string "<ssevecmode>")
2987 (match_test "optimize_function_for_size_p (cfun)")
2988 (const_string "V4SF")
2989 ]
2990 (const_string "<ssevecmode>")))])
2991
2992(define_insn "*andnottf3"
2993 [(set (match_operand:TF 0 "register_operand" "=x,x")
2994 (and:TF
2995 (not:TF (match_operand:TF 1 "register_operand" "0,x"))
2996 (match_operand:TF 2 "nonimmediate_operand" "xm,xm")))]
2997 "TARGET_SSE"
2998{
2999 static char buf[32];
3000 const char *ops;
3001 const char *tmp
3002 = (get_attr_mode (insn) == MODE_V4SF) ? "andnps" : "pandn";
a95d4000
UB
3003
3004 switch (which_alternative)
3005 {
3006 case 0:
b8dd0894 3007 ops = "%s\t{%%2, %%0|%%0, %%2}";
a95d4000
UB
3008 break;
3009 case 1:
b8dd0894 3010 ops = "v%s\t{%%2, %%1, %%0|%%0, %%1, %%2}";
a95d4000
UB
3011 break;
3012 default:
3013 gcc_unreachable ();
3014 }
3015
b8dd0894 3016 snprintf (buf, sizeof (buf), ops, tmp);
a95d4000 3017 return buf;
1133125e 3018}
a95d4000
UB
3019 [(set_attr "isa" "noavx,avx")
3020 (set_attr "type" "sselog")
b8dd0894
UB
3021 (set (attr "prefix_data16")
3022 (if_then_else
3023 (and (eq_attr "alternative" "0")
3024 (eq_attr "mode" "TI"))
3025 (const_string "1")
3026 (const_string "*")))
a95d4000 3027 (set_attr "prefix" "orig,vex")
b8dd0894
UB
3028 (set (attr "mode")
3029 (cond [(match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL")
3030 (const_string "V4SF")
3031 (match_test "TARGET_AVX")
3032 (const_string "TI")
3033 (ior (not (match_test "TARGET_SSE2"))
3034 (match_test "optimize_function_for_size_p (cfun)"))
3035 (const_string "V4SF")
3036 ]
3037 (const_string "TI")))])
95879c72 3038
94237c92 3039(define_insn "*<code><mode>3"
a95d4000 3040 [(set (match_operand:MODEF 0 "register_operand" "=x,x")
c8427064 3041 (any_logic:MODEF
a95d4000
UB
3042 (match_operand:MODEF 1 "register_operand" "%0,x")
3043 (match_operand:MODEF 2 "register_operand" "x,x")))]
d6023b50 3044 "SSE_FLOAT_MODE_P (<MODE>mode)"
1133125e 3045{
a95d4000 3046 static char buf[32];
b8dd0894 3047 const char *ops;
a95d4000 3048 const char *suffix
b8dd0894
UB
3049 = (get_attr_mode (insn) == MODE_V4SF) ? "ps" : "<ssevecmodesuffix>";
3050
3051 switch (which_alternative)
3052 {
3053 case 0:
3054 ops = "<logic>%s\t{%%2, %%0|%%0, %%2}";
3055 break;
3056 case 1:
3057 ops = "v<logic>%s\t{%%2, %%1, %%0|%%0, %%1, %%2}";
3058 break;
3059 default:
3060 gcc_unreachable ();
3061 }
3062
3063 snprintf (buf, sizeof (buf), ops, suffix);
3064 return buf;
3065}
3066 [(set_attr "isa" "noavx,avx")
3067 (set_attr "type" "sselog")
3068 (set_attr "prefix" "orig,vex")
3069 (set (attr "mode")
659c0e68
JM
3070 (cond [(and (match_test "<MODE_SIZE> == 16")
3071 (match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL"))
b8dd0894
UB
3072 (const_string "V4SF")
3073 (match_test "TARGET_AVX")
3074 (const_string "<ssevecmode>")
3075 (match_test "optimize_function_for_size_p (cfun)")
3076 (const_string "V4SF")
3077 ]
3078 (const_string "<ssevecmode>")))])
3079
3080(define_expand "<code>tf3"
3081 [(set (match_operand:TF 0 "register_operand")
3082 (any_logic:TF
3083 (match_operand:TF 1 "nonimmediate_operand")
3084 (match_operand:TF 2 "nonimmediate_operand")))]
3085 "TARGET_SSE"
3086 "ix86_fixup_binary_operands_no_copy (<CODE>, TFmode, operands);")
3087
3088(define_insn "*<code>tf3"
3089 [(set (match_operand:TF 0 "register_operand" "=x,x")
3090 (any_logic:TF
3091 (match_operand:TF 1 "nonimmediate_operand" "%0,x")
3092 (match_operand:TF 2 "nonimmediate_operand" "xm,xm")))]
3093 "TARGET_SSE
3094 && ix86_binary_operator_ok (<CODE>, TFmode, operands)"
3095{
3096 static char buf[32];
3097 const char *ops;
3098 const char *tmp
3099 = (get_attr_mode (insn) == MODE_V4SF) ? "<logic>ps" : "p<logic>";
a95d4000
UB
3100
3101 switch (which_alternative)
3102 {
3103 case 0:
b8dd0894 3104 ops = "%s\t{%%2, %%0|%%0, %%2}";
a95d4000
UB
3105 break;
3106 case 1:
b8dd0894 3107 ops = "v%s\t{%%2, %%1, %%0|%%0, %%1, %%2}";
a95d4000
UB
3108 break;
3109 default:
3110 gcc_unreachable ();
3111 }
3112
b8dd0894 3113 snprintf (buf, sizeof (buf), ops, tmp);
a95d4000 3114 return buf;
1133125e 3115}
a95d4000
UB
3116 [(set_attr "isa" "noavx,avx")
3117 (set_attr "type" "sselog")
b8dd0894
UB
3118 (set (attr "prefix_data16")
3119 (if_then_else
3120 (and (eq_attr "alternative" "0")
3121 (eq_attr "mode" "TI"))
3122 (const_string "1")
3123 (const_string "*")))
a95d4000 3124 (set_attr "prefix" "orig,vex")
b8dd0894
UB
3125 (set (attr "mode")
3126 (cond [(match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL")
3127 (const_string "V4SF")
3128 (match_test "TARGET_AVX")
3129 (const_string "TI")
3130 (ior (not (match_test "TARGET_SSE2"))
3131 (match_test "optimize_function_for_size_p (cfun)"))
3132 (const_string "V4SF")
3133 ]
3134 (const_string "TI")))])
ab8efbd8 3135
cbf2e4d4
HJ
3136;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
3137;;
b0d5396c
UB
3138;; FMA floating point multiply/accumulate instructions. These include
3139;; scalar versions of the instructions as well as vector versions.
cbf2e4d4
HJ
3140;;
3141;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
3142
19c5f6e6 3143;; The standard names for scalar FMA are only available with SSE math enabled.
558d9f79
AI
3144;; CPUID bit AVX512F enables evex encoded scalar and 512-bit fma. It doesn't
3145;; care about FMA bit, so we enable fma for TARGET_AVX512F even when TARGET_FMA
3146;; and TARGET_FMA4 are both false.
3147;; TODO: In theory AVX512F does not automatically imply FMA, and without FMA
3148;; one must force the EVEX encoding of the fma insns. Ideally we'd improve
3149;; GAS to allow proper prefix selection. However, for the moment all hardware
3150;; that supports AVX512F also supports FMA so we can ignore this for now.
3151(define_mode_iterator FMAMODEM
3152 [(SF "TARGET_SSE_MATH && (TARGET_FMA || TARGET_FMA4 || TARGET_AVX512F)")
3153 (DF "TARGET_SSE_MATH && (TARGET_FMA || TARGET_FMA4 || TARGET_AVX512F)")
e274629e
AI
3154 (V4SF "TARGET_FMA || TARGET_FMA4 || TARGET_AVX512VL")
3155 (V2DF "TARGET_FMA || TARGET_FMA4 || TARGET_AVX512VL")
3156 (V8SF "TARGET_FMA || TARGET_FMA4 || TARGET_AVX512VL")
3157 (V4DF "TARGET_FMA || TARGET_FMA4 || TARGET_AVX512VL")
558d9f79
AI
3158 (V16SF "TARGET_AVX512F")
3159 (V8DF "TARGET_AVX512F")])
b0d5396c 3160
89509419 3161(define_expand "fma<mode>4"
19c5f6e6
UB
3162 [(set (match_operand:FMAMODEM 0 "register_operand")
3163 (fma:FMAMODEM
3164 (match_operand:FMAMODEM 1 "nonimmediate_operand")
3165 (match_operand:FMAMODEM 2 "nonimmediate_operand")
429749e2 3166 (match_operand:FMAMODEM 3 "nonimmediate_operand")))])
351877cf 3167
16949072 3168(define_expand "fms<mode>4"
19c5f6e6
UB
3169 [(set (match_operand:FMAMODEM 0 "register_operand")
3170 (fma:FMAMODEM
3171 (match_operand:FMAMODEM 1 "nonimmediate_operand")
3172 (match_operand:FMAMODEM 2 "nonimmediate_operand")
429749e2 3173 (neg:FMAMODEM (match_operand:FMAMODEM 3 "nonimmediate_operand"))))])
16949072
RG
3174
3175(define_expand "fnma<mode>4"
19c5f6e6
UB
3176 [(set (match_operand:FMAMODEM 0 "register_operand")
3177 (fma:FMAMODEM
3178 (neg:FMAMODEM (match_operand:FMAMODEM 1 "nonimmediate_operand"))
3179 (match_operand:FMAMODEM 2 "nonimmediate_operand")
429749e2 3180 (match_operand:FMAMODEM 3 "nonimmediate_operand")))])
16949072
RG
3181
3182(define_expand "fnms<mode>4"
19c5f6e6
UB
3183 [(set (match_operand:FMAMODEM 0 "register_operand")
3184 (fma:FMAMODEM
3185 (neg:FMAMODEM (match_operand:FMAMODEM 1 "nonimmediate_operand"))
3186 (match_operand:FMAMODEM 2 "nonimmediate_operand")
429749e2 3187 (neg:FMAMODEM (match_operand:FMAMODEM 3 "nonimmediate_operand"))))])
19c5f6e6
UB
3188
3189;; The builtins for intrinsics are not constrained by SSE math enabled.
e274629e
AI
3190(define_mode_iterator FMAMODE_AVX512
3191 [(SF "TARGET_FMA || TARGET_FMA4 || TARGET_AVX512F")
3192 (DF "TARGET_FMA || TARGET_FMA4 || TARGET_AVX512F")
3193 (V4SF "TARGET_FMA || TARGET_FMA4 || TARGET_AVX512VL")
3194 (V2DF "TARGET_FMA || TARGET_FMA4 || TARGET_AVX512VL")
3195 (V8SF "TARGET_FMA || TARGET_FMA4 || TARGET_AVX512VL")
3196 (V4DF "TARGET_FMA || TARGET_FMA4 || TARGET_AVX512VL")
3197 (V16SF "TARGET_AVX512F")
3198 (V8DF "TARGET_AVX512F")])
3199
429749e2 3200(define_mode_iterator FMAMODE
e274629e 3201 [SF DF V4SF V2DF V8SF V4DF])
16949072 3202
351877cf 3203(define_expand "fma4i_fmadd_<mode>"
e274629e
AI
3204 [(set (match_operand:FMAMODE_AVX512 0 "register_operand")
3205 (fma:FMAMODE_AVX512
3206 (match_operand:FMAMODE_AVX512 1 "nonimmediate_operand")
3207 (match_operand:FMAMODE_AVX512 2 "nonimmediate_operand")
3208 (match_operand:FMAMODE_AVX512 3 "nonimmediate_operand")))])
3209
3210(define_expand "<avx512>_fmadd_<mode>_maskz<round_expand_name>"
3211 [(match_operand:VF_AVX512VL 0 "register_operand")
3212 (match_operand:VF_AVX512VL 1 "<round_expand_nimm_predicate>")
3213 (match_operand:VF_AVX512VL 2 "<round_expand_nimm_predicate>")
3214 (match_operand:VF_AVX512VL 3 "<round_expand_nimm_predicate>")
06bc9e41 3215 (match_operand:<avx512fmaskmode> 4 "register_operand")]
e274629e 3216 "TARGET_AVX512F && <round_mode512bit_condition>"
06bc9e41 3217{
7cf78561 3218 emit_insn (gen_fma_fmadd_<mode>_maskz_1<round_expand_name> (
06bc9e41 3219 operands[0], operands[1], operands[2], operands[3],
7cf78561 3220 CONST0_RTX (<MODE>mode), operands[4]<round_expand_operand>));
06bc9e41
AI
3221 DONE;
3222})
3223
e274629e
AI
3224(define_insn "*fma_fmadd_<mode>"
3225 [(set (match_operand:FMAMODE 0 "register_operand" "=v,v,v,x,x")
3226 (fma:FMAMODE
3227 (match_operand:FMAMODE 1 "nonimmediate_operand" "%0,0,v,x,x")
3228 (match_operand:FMAMODE 2 "nonimmediate_operand" "vm,v,vm,x,m")
3229 (match_operand:FMAMODE 3 "nonimmediate_operand" "v,vm,0,xm,x")))]
3230 "TARGET_FMA || TARGET_FMA4"
06bc9e41 3231 "@
e274629e
AI
3232 vfmadd132<ssemodesuffix>\t{%2, %3, %0|%0, %3, %2}
3233 vfmadd213<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
3234 vfmadd231<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}
3e5804e1
UB
3235 vfmadd<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}
3236 vfmadd<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
e274629e 3237 [(set_attr "isa" "fma,fma,fma,fma4,fma4")
3e5804e1 3238 (set_attr "type" "ssemuladd")
b0d5396c
UB
3239 (set_attr "mode" "<MODE>")])
3240
e274629e
AI
3241;; Suppose AVX-512F as baseline
3242(define_mode_iterator VF_SF_AVX512VL
3243 [SF V16SF (V8SF "TARGET_AVX512VL") (V4SF "TARGET_AVX512VL")
3244 DF V8DF (V4DF "TARGET_AVX512VL") (V2DF "TARGET_AVX512VL")])
3245
429749e2 3246(define_insn "<sd_mask_codefor>fma_fmadd_<mode><sd_maskz_name><round_name>"
e274629e
AI
3247 [(set (match_operand:VF_SF_AVX512VL 0 "register_operand" "=v,v,v")
3248 (fma:VF_SF_AVX512VL
3249 (match_operand:VF_SF_AVX512VL 1 "<round_nimm_predicate>" "%0,0,v")
3250 (match_operand:VF_SF_AVX512VL 2 "<round_nimm_predicate>" "<round_constraint>,v,<round_constraint>")
3251 (match_operand:VF_SF_AVX512VL 3 "<round_nimm_predicate>" "v,<round_constraint>,0")))]
3252 "TARGET_AVX512F && <sd_mask_mode512bit_condition> && <round_mode512bit_condition>"
429749e2
UB
3253 "@
3254 vfmadd132<ssemodesuffix>\t{<round_sd_mask_op4>%2, %3, %0<sd_mask_op4>|%0<sd_mask_op4>, %3, %2<round_sd_mask_op4>}
3255 vfmadd213<ssemodesuffix>\t{<round_sd_mask_op4>%3, %2, %0<sd_mask_op4>|%0<sd_mask_op4>, %2, %3<round_sd_mask_op4>}
3256 vfmadd231<ssemodesuffix>\t{<round_sd_mask_op4>%2, %1, %0<sd_mask_op4>|%0<sd_mask_op4>, %1, %2<round_sd_mask_op4>}"
e274629e 3257 [(set_attr "type" "ssemuladd")
429749e2
UB
3258 (set_attr "mode" "<MODE>")])
3259
e274629e
AI
3260(define_insn "<avx512>_fmadd_<mode>_mask<round_name>"
3261 [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v,v")
3262 (vec_merge:VF_AVX512VL
3263 (fma:VF_AVX512VL
3264 (match_operand:VF_AVX512VL 1 "register_operand" "0,0")
3265 (match_operand:VF_AVX512VL 2 "nonimmediate_operand" "<round_constraint>,v")
3266 (match_operand:VF_AVX512VL 3 "nonimmediate_operand" "v,<round_constraint>"))
47490470 3267 (match_dup 1)
be792bce 3268 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk,Yk")))]
e274629e 3269 "TARGET_AVX512F && <round_mode512bit_condition>"
47490470 3270 "@
06bc9e41
AI
3271 vfmadd132<ssemodesuffix>\t{<round_op5>%2, %3, %0%{%4%}|%0%{%4%}, %3, %2<round_op5>}
3272 vfmadd213<ssemodesuffix>\t{<round_op5>%3, %2, %0%{%4%}|%0%{%4%}, %2, %3<round_op5>}"
47490470
AI
3273 [(set_attr "isa" "fma_avx512f,fma_avx512f")
3274 (set_attr "type" "ssemuladd")
3275 (set_attr "mode" "<MODE>")])
3276
e274629e
AI
3277(define_insn "<avx512>_fmadd_<mode>_mask3<round_name>"
3278 [(set (match_operand:VF_AVX512VL 0 "register_operand" "=x")
3279 (vec_merge:VF_AVX512VL
3280 (fma:VF_AVX512VL
3281 (match_operand:VF_AVX512VL 1 "register_operand" "x")
3282 (match_operand:VF_AVX512VL 2 "nonimmediate_operand" "<round_constraint>")
3283 (match_operand:VF_AVX512VL 3 "register_operand" "0"))
47490470 3284 (match_dup 3)
be792bce 3285 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")))]
47490470 3286 "TARGET_AVX512F"
06bc9e41 3287 "vfmadd231<ssemodesuffix>\t{<round_op5>%2, %1, %0%{%4%}|%0%{%4%}, %1, %2<round_op5>}"
47490470
AI
3288 [(set_attr "isa" "fma_avx512f")
3289 (set_attr "type" "ssemuladd")
3290 (set_attr "mode" "<MODE>")])
3291
e274629e
AI
3292(define_insn "*fma_fmsub_<mode>"
3293 [(set (match_operand:FMAMODE 0 "register_operand" "=v,v,v,x,x")
3294 (fma:FMAMODE
3295 (match_operand:FMAMODE 1 "nonimmediate_operand" "%0,0,v,x,x")
3296 (match_operand:FMAMODE 2 "nonimmediate_operand" "vm,v,vm,x,m")
3297 (neg:FMAMODE
3298 (match_operand:FMAMODE 3 "nonimmediate_operand" "v,vm,0,xm,x"))))]
3299 "TARGET_FMA || TARGET_FMA4"
b0d5396c 3300 "@
e274629e
AI
3301 vfmsub132<ssemodesuffix>\t{%2, %3, %0|%0, %3, %2}
3302 vfmsub213<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
3303 vfmsub231<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}
3e5804e1
UB
3304 vfmsub<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}
3305 vfmsub<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
e274629e 3306 [(set_attr "isa" "fma,fma,fma,fma4,fma4")
3e5804e1 3307 (set_attr "type" "ssemuladd")
b0d5396c
UB
3308 (set_attr "mode" "<MODE>")])
3309
429749e2 3310(define_insn "<sd_mask_codefor>fma_fmsub_<mode><sd_maskz_name><round_name>"
e274629e
AI
3311 [(set (match_operand:VF_SF_AVX512VL 0 "register_operand" "=v,v,v")
3312 (fma:VF_SF_AVX512VL
3313 (match_operand:VF_SF_AVX512VL 1 "<round_nimm_predicate>" "%0,0,v")
3314 (match_operand:VF_SF_AVX512VL 2 "<round_nimm_predicate>" "<round_constraint>,v,<round_constraint>")
3315 (neg:VF_SF_AVX512VL
3316 (match_operand:VF_SF_AVX512VL 3 "<round_nimm_predicate>" "v,<round_constraint>,0"))))]
3317 "TARGET_AVX512F && <sd_mask_mode512bit_condition> && <round_mode512bit_condition>"
429749e2
UB
3318 "@
3319 vfmsub132<ssemodesuffix>\t{<round_sd_mask_op4>%2, %3, %0<sd_mask_op4>|%0<sd_mask_op4>, %3, %2<round_sd_mask_op4>}
3320 vfmsub213<ssemodesuffix>\t{<round_sd_mask_op4>%3, %2, %0<sd_mask_op4>|%0<sd_mask_op4>, %2, %3<round_sd_mask_op4>}
3321 vfmsub231<ssemodesuffix>\t{<round_sd_mask_op4>%2, %1, %0<sd_mask_op4>|%0<sd_mask_op4>, %1, %2<round_sd_mask_op4>}"
e274629e 3322 [(set_attr "type" "ssemuladd")
429749e2
UB
3323 (set_attr "mode" "<MODE>")])
3324
e274629e
AI
3325(define_insn "<avx512>_fmsub_<mode>_mask<round_name>"
3326 [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v,v")
3327 (vec_merge:VF_AVX512VL
3328 (fma:VF_AVX512VL
3329 (match_operand:VF_AVX512VL 1 "register_operand" "0,0")
3330 (match_operand:VF_AVX512VL 2 "nonimmediate_operand" "<round_constraint>,v")
3331 (neg:VF_AVX512VL
3332 (match_operand:VF_AVX512VL 3 "nonimmediate_operand" "v,<round_constraint>")))
47490470 3333 (match_dup 1)
be792bce 3334 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk,Yk")))]
47490470
AI
3335 "TARGET_AVX512F"
3336 "@
06bc9e41
AI
3337 vfmsub132<ssemodesuffix>\t{<round_op5>%2, %3, %0%{%4%}|%0%{%4%}, %3, %2<round_op5>}
3338 vfmsub213<ssemodesuffix>\t{<round_op5>%3, %2, %0%{%4%}|%0%{%4%}, %2, %3<round_op5>}"
47490470
AI
3339 [(set_attr "isa" "fma_avx512f,fma_avx512f")
3340 (set_attr "type" "ssemuladd")
3341 (set_attr "mode" "<MODE>")])
3342
e274629e
AI
3343(define_insn "<avx512>_fmsub_<mode>_mask3<round_name>"
3344 [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v")
3345 (vec_merge:VF_AVX512VL
3346 (fma:VF_AVX512VL
3347 (match_operand:VF_AVX512VL 1 "register_operand" "v")
3348 (match_operand:VF_AVX512VL 2 "nonimmediate_operand" "<round_constraint>")
3349 (neg:VF_AVX512VL
3350 (match_operand:VF_AVX512VL 3 "register_operand" "0")))
47490470 3351 (match_dup 3)
be792bce 3352 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")))]
e274629e 3353 "TARGET_AVX512F && <round_mode512bit_condition>"
06bc9e41 3354 "vfmsub231<ssemodesuffix>\t{<round_op5>%2, %1, %0%{%4%}|%0%{%4%}, %1, %2<round_op5>}"
47490470
AI
3355 [(set_attr "isa" "fma_avx512f")
3356 (set_attr "type" "ssemuladd")
3357 (set_attr "mode" "<MODE>")])
3358
e274629e
AI
3359(define_insn "*fma_fnmadd_<mode>"
3360 [(set (match_operand:FMAMODE 0 "register_operand" "=v,v,v,x,x")
3361 (fma:FMAMODE
3362 (neg:FMAMODE
3363 (match_operand:FMAMODE 1 "nonimmediate_operand" "%0,0,v,x,x"))
3364 (match_operand:FMAMODE 2 "nonimmediate_operand" "vm,v,vm,x,m")
3365 (match_operand:FMAMODE 3 "nonimmediate_operand" "v,vm,0,xm,x")))]
3366 "TARGET_FMA || TARGET_FMA4"
06bc9e41 3367 "@
e274629e
AI
3368 vfnmadd132<ssemodesuffix>\t{%2, %3, %0|%0, %3, %2}
3369 vfnmadd213<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
3370 vfnmadd231<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}
3e5804e1
UB
3371 vfnmadd<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}
3372 vfnmadd<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
e274629e 3373 [(set_attr "isa" "fma,fma,fma,fma4,fma4")
3e5804e1 3374 (set_attr "type" "ssemuladd")
b0d5396c
UB
3375 (set_attr "mode" "<MODE>")])
3376
429749e2 3377(define_insn "<sd_mask_codefor>fma_fnmadd_<mode><sd_maskz_name><round_name>"
e274629e
AI
3378 [(set (match_operand:VF_SF_AVX512VL 0 "register_operand" "=v,v,v")
3379 (fma:VF_SF_AVX512VL
3380 (neg:VF_SF_AVX512VL
3381 (match_operand:VF_SF_AVX512VL 1 "<round_nimm_predicate>" "%0,0,v"))
3382 (match_operand:VF_SF_AVX512VL 2 "<round_nimm_predicate>" "<round_constraint>,v,<round_constraint>")
3383 (match_operand:VF_SF_AVX512VL 3 "<round_nimm_predicate>" "v,<round_constraint>,0")))]
3384 "TARGET_AVX512F && <sd_mask_mode512bit_condition> && <round_mode512bit_condition>"
429749e2
UB
3385 "@
3386 vfnmadd132<ssemodesuffix>\t{<round_sd_mask_op4>%2, %3, %0<sd_mask_op4>|%0<sd_mask_op4>, %3, %2<round_sd_mask_op4>}
3387 vfnmadd213<ssemodesuffix>\t{<round_sd_mask_op4>%3, %2, %0<sd_mask_op4>|%0<sd_mask_op4>, %2, %3<round_sd_mask_op4>}
3388 vfnmadd231<ssemodesuffix>\t{<round_sd_mask_op4>%2, %1, %0<sd_mask_op4>|%0<sd_mask_op4>, %1, %2<round_sd_mask_op4>}"
e274629e 3389 [(set_attr "type" "ssemuladd")
429749e2
UB
3390 (set_attr "mode" "<MODE>")])
3391
e274629e
AI
3392(define_insn "<avx512>_fnmadd_<mode>_mask<round_name>"
3393 [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v,v")
3394 (vec_merge:VF_AVX512VL
3395 (fma:VF_AVX512VL
3396 (neg:VF_AVX512VL
3397 (match_operand:VF_AVX512VL 1 "register_operand" "0,0"))
3398 (match_operand:VF_AVX512VL 2 "nonimmediate_operand" "<round_constraint>,v")
3399 (match_operand:VF_AVX512VL 3 "nonimmediate_operand" "v,<round_constraint>"))
47490470 3400 (match_dup 1)
be792bce 3401 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk,Yk")))]
e274629e 3402 "TARGET_AVX512F && <round_mode512bit_condition>"
47490470 3403 "@
06bc9e41
AI
3404 vfnmadd132<ssemodesuffix>\t{<round_op5>%2, %3, %0%{%4%}|%0%{%4%}, %3, %2<round_op5>}
3405 vfnmadd213<ssemodesuffix>\t{<round_op5>%3, %2, %0%{%4%}|%0%{%4%}, %2, %3<round_op5>}"
47490470
AI
3406 [(set_attr "isa" "fma_avx512f,fma_avx512f")
3407 (set_attr "type" "ssemuladd")
3408 (set_attr "mode" "<MODE>")])
3409
e274629e
AI
3410(define_insn "<avx512>_fnmadd_<mode>_mask3<round_name>"
3411 [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v")
3412 (vec_merge:VF_AVX512VL
3413 (fma:VF_AVX512VL
3414 (neg:VF_AVX512VL
3415 (match_operand:VF_AVX512VL 1 "register_operand" "v"))
3416 (match_operand:VF_AVX512VL 2 "nonimmediate_operand" "<round_constraint>")
3417 (match_operand:VF_AVX512VL 3 "register_operand" "0"))
47490470 3418 (match_dup 3)
be792bce 3419 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")))]
e274629e 3420 "TARGET_AVX512F && <round_mode512bit_condition>"
06bc9e41 3421 "vfnmadd231<ssemodesuffix>\t{<round_op5>%2, %1, %0%{%4%}|%0%{%4%}, %1, %2<round_op5>}"
47490470
AI
3422 [(set_attr "isa" "fma_avx512f")
3423 (set_attr "type" "ssemuladd")
3424 (set_attr "mode" "<MODE>")])
3425
e274629e
AI
3426(define_insn "*fma_fnmsub_<mode>"
3427 [(set (match_operand:FMAMODE 0 "register_operand" "=v,v,v,x,x")
3428 (fma:FMAMODE
3429 (neg:FMAMODE
3430 (match_operand:FMAMODE 1 "nonimmediate_operand" "%0,0,v,x,x"))
3431 (match_operand:FMAMODE 2 "nonimmediate_operand" "vm,v,vm,x,m")
3432 (neg:FMAMODE
3433 (match_operand:FMAMODE 3 "nonimmediate_operand" "v,vm,0,xm,x"))))]
3434 "TARGET_FMA || TARGET_FMA4"
b0d5396c 3435 "@
06bc9e41
AI
3436 vfnmsub132<ssemodesuffix>\t{<round_sd_mask_op4>%2, %3, %0<sd_mask_op4>|%0<sd_mask_op4>, %3, %2<round_sd_mask_op4>}
3437 vfnmsub213<ssemodesuffix>\t{<round_sd_mask_op4>%3, %2, %0<sd_mask_op4>|%0<sd_mask_op4>, %2, %3<round_sd_mask_op4>}
3438 vfnmsub231<ssemodesuffix>\t{<round_sd_mask_op4>%2, %1, %0<sd_mask_op4>|%0<sd_mask_op4>, %1, %2<round_sd_mask_op4>}
3e5804e1
UB
3439 vfnmsub<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}
3440 vfnmsub<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
e274629e 3441 [(set_attr "isa" "fma,fma,fma,fma4,fma4")
3e5804e1 3442 (set_attr "type" "ssemuladd")
cbf2e4d4
HJ
3443 (set_attr "mode" "<MODE>")])
3444
429749e2 3445(define_insn "<sd_mask_codefor>fma_fnmsub_<mode><sd_maskz_name><round_name>"
e274629e
AI
3446 [(set (match_operand:VF_SF_AVX512VL 0 "register_operand" "=v,v,v")
3447 (fma:VF_SF_AVX512VL
3448 (neg:VF_SF_AVX512VL
3449 (match_operand:VF_SF_AVX512VL 1 "<round_nimm_predicate>" "%0,0,v"))
3450 (match_operand:VF_SF_AVX512VL 2 "<round_nimm_predicate>" "<round_constraint>,v,<round_constraint>")
3451 (neg:VF_SF_AVX512VL
3452 (match_operand:VF_SF_AVX512VL 3 "<round_nimm_predicate>" "v,<round_constraint>,0"))))]
3453 "TARGET_AVX512F && <sd_mask_mode512bit_condition> && <round_mode512bit_condition>"
429749e2
UB
3454 "@
3455 vfnmsub132<ssemodesuffix>\t{<round_sd_mask_op4>%2, %3, %0<sd_mask_op4>|%0<sd_mask_op4>, %3, %2<round_sd_mask_op4>}
3456 vfnmsub213<ssemodesuffix>\t{<round_sd_mask_op4>%3, %2, %0<sd_mask_op4>|%0<sd_mask_op4>, %2, %3<round_sd_mask_op4>}
3457 vfnmsub231<ssemodesuffix>\t{<round_sd_mask_op4>%2, %1, %0<sd_mask_op4>|%0<sd_mask_op4>, %1, %2<round_sd_mask_op4>}"
e274629e 3458 [(set_attr "type" "ssemuladd")
429749e2
UB
3459 (set_attr "mode" "<MODE>")])
3460
e274629e
AI
3461(define_insn "<avx512>_fnmsub_<mode>_mask<round_name>"
3462 [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v,v")
3463 (vec_merge:VF_AVX512VL
3464 (fma:VF_AVX512VL
3465 (neg:VF_AVX512VL
3466 (match_operand:VF_AVX512VL 1 "register_operand" "0,0"))
3467 (match_operand:VF_AVX512VL 2 "nonimmediate_operand" "<round_constraint>,v")
3468 (neg:VF_AVX512VL
3469 (match_operand:VF_AVX512VL 3 "nonimmediate_operand" "v,<round_constraint>")))
47490470 3470 (match_dup 1)
be792bce 3471 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk,Yk")))]
e274629e 3472 "TARGET_AVX512F && <round_mode512bit_condition>"
47490470 3473 "@
06bc9e41
AI
3474 vfnmsub132<ssemodesuffix>\t{<round_op5>%2, %3, %0%{%4%}|%0%{%4%}, %3, %2<round_op5>}
3475 vfnmsub213<ssemodesuffix>\t{<round_op5>%3, %2, %0%{%4%}|%0%{%4%}, %2, %3<round_op5>}"
47490470
AI
3476 [(set_attr "isa" "fma_avx512f,fma_avx512f")
3477 (set_attr "type" "ssemuladd")
3478 (set_attr "mode" "<MODE>")])
3479
e274629e
AI
3480(define_insn "<avx512>_fnmsub_<mode>_mask3<round_name>"
3481 [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v")
3482 (vec_merge:VF_AVX512VL
3483 (fma:VF_AVX512VL
3484 (neg:VF_AVX512VL
3485 (match_operand:VF_AVX512VL 1 "register_operand" "v"))
3486 (match_operand:VF_AVX512VL 2 "nonimmediate_operand" "<round_constraint>")
3487 (neg:VF_AVX512VL
3488 (match_operand:VF_AVX512VL 3 "register_operand" "0")))
47490470 3489 (match_dup 3)
be792bce 3490 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")))]
47490470 3491 "TARGET_AVX512F"
06bc9e41 3492 "vfnmsub231<ssemodesuffix>\t{<round_op5>%2, %1, %0%{%4%}|%0%{%4%}, %1, %2<round_op5>}"
47490470
AI
3493 [(set_attr "isa" "fma_avx512f")
3494 (set_attr "type" "ssemuladd")
3495 (set_attr "mode" "<MODE>")])
3496
b0d5396c 3497;; FMA parallel floating point multiply addsub and subadd operations.
89509419 3498
b0d5396c
UB
3499;; It would be possible to represent these without the UNSPEC as
3500;;
3501;; (vec_merge
3502;; (fma op1 op2 op3)
3503;; (fma op1 op2 (neg op3))
3504;; (merge-const))
3505;;
3506;; But this doesn't seem useful in practice.
3507
3508(define_expand "fmaddsub_<mode>"
3509 [(set (match_operand:VF 0 "register_operand")
3510 (unspec:VF
3511 [(match_operand:VF 1 "nonimmediate_operand")
3512 (match_operand:VF 2 "nonimmediate_operand")
3513 (match_operand:VF 3 "nonimmediate_operand")]
3514 UNSPEC_FMADDSUB))]
558d9f79 3515 "TARGET_FMA || TARGET_FMA4 || TARGET_AVX512F")
b0d5396c 3516
e274629e
AI
3517(define_expand "<avx512>_fmaddsub_<mode>_maskz<round_expand_name>"
3518 [(match_operand:VF_AVX512VL 0 "register_operand")
3519 (match_operand:VF_AVX512VL 1 "<round_expand_nimm_predicate>")
3520 (match_operand:VF_AVX512VL 2 "<round_expand_nimm_predicate>")
3521 (match_operand:VF_AVX512VL 3 "<round_expand_nimm_predicate>")
8b08db1e
AI
3522 (match_operand:<avx512fmaskmode> 4 "register_operand")]
3523 "TARGET_AVX512F"
3524{
7cf78561 3525 emit_insn (gen_fma_fmaddsub_<mode>_maskz_1<round_expand_name> (
8b08db1e 3526 operands[0], operands[1], operands[2], operands[3],
7cf78561 3527 CONST0_RTX (<MODE>mode), operands[4]<round_expand_operand>));
8b08db1e
AI
3528 DONE;
3529})
3530
e274629e 3531(define_insn "*fma_fmaddsub_<mode>"
429749e2
UB
3532 [(set (match_operand:VF_128_256 0 "register_operand" "=v,v,v,x,x")
3533 (unspec:VF_128_256
e274629e
AI
3534 [(match_operand:VF_128_256 1 "nonimmediate_operand" "%0,0,v,x,x")
3535 (match_operand:VF_128_256 2 "nonimmediate_operand" "vm,v,vm,x,m")
3536 (match_operand:VF_128_256 3 "nonimmediate_operand" "v,vm,0,xm,x")]
b0d5396c 3537 UNSPEC_FMADDSUB))]
e274629e 3538 "TARGET_FMA || TARGET_FMA4"
b0d5396c 3539 "@
e274629e
AI
3540 vfmaddsub132<ssemodesuffix>\t{%2, %3, %0|%0, %3, %2}
3541 vfmaddsub213<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
3542 vfmaddsub231<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}
3e5804e1
UB
3543 vfmaddsub<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}
3544 vfmaddsub<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
e274629e 3545 [(set_attr "isa" "fma,fma,fma,fma4,fma4")
3e5804e1 3546 (set_attr "type" "ssemuladd")
b0d5396c
UB
3547 (set_attr "mode" "<MODE>")])
3548
429749e2 3549(define_insn "<sd_mask_codefor>fma_fmaddsub_<mode><sd_maskz_name><round_name>"
e274629e
AI
3550 [(set (match_operand:VF_SF_AVX512VL 0 "register_operand" "=v,v,v")
3551 (unspec:VF_SF_AVX512VL
3552 [(match_operand:VF_SF_AVX512VL 1 "<round_nimm_predicate>" "%0,0,v")
3553 (match_operand:VF_SF_AVX512VL 2 "<round_nimm_predicate>" "<round_constraint>,v,<round_constraint>")
3554 (match_operand:VF_SF_AVX512VL 3 "<round_nimm_predicate>" "v,<round_constraint>,0")]
429749e2
UB
3555 UNSPEC_FMADDSUB))]
3556 "TARGET_AVX512F && <sd_mask_mode512bit_condition> && <round_mode512bit_condition>"
3557 "@
3558 vfmaddsub132<ssemodesuffix>\t{<round_sd_mask_op4>%2, %3, %0<sd_mask_op4>|%0<sd_mask_op4>, %3, %2<round_sd_mask_op4>}
3559 vfmaddsub213<ssemodesuffix>\t{<round_sd_mask_op4>%3, %2, %0<sd_mask_op4>|%0<sd_mask_op4>, %2, %3<round_sd_mask_op4>}
3560 vfmaddsub231<ssemodesuffix>\t{<round_sd_mask_op4>%2, %1, %0<sd_mask_op4>|%0<sd_mask_op4>, %1, %2<round_sd_mask_op4>}"
e274629e 3561 [(set_attr "type" "ssemuladd")
429749e2
UB
3562 (set_attr "mode" "<MODE>")])
3563
e274629e
AI
3564(define_insn "<avx512>_fmaddsub_<mode>_mask<round_name>"
3565 [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v,v")
3566 (vec_merge:VF_AVX512VL
3567 (unspec:VF_AVX512VL
3568 [(match_operand:VF_AVX512VL 1 "register_operand" "0,0")
3569 (match_operand:VF_AVX512VL 2 "nonimmediate_operand" "<round_constraint>,v")
3570 (match_operand:VF_AVX512VL 3 "nonimmediate_operand" "v,<round_constraint>")]
47490470
AI
3571 UNSPEC_FMADDSUB)
3572 (match_dup 1)
be792bce 3573 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk,Yk")))]
47490470
AI
3574 "TARGET_AVX512F"
3575 "@
06bc9e41
AI
3576 vfmaddsub132<ssemodesuffix>\t{<round_op5>%2, %3, %0%{%4%}|%0%{%4%}, %3, %2<round_op5>}
3577 vfmaddsub213<ssemodesuffix>\t{<round_op5>%3, %2, %0%{%4%}|%0%{%4%}, %2, %3<round_op5>}"
47490470
AI
3578 [(set_attr "isa" "fma_avx512f,fma_avx512f")
3579 (set_attr "type" "ssemuladd")
3580 (set_attr "mode" "<MODE>")])
3581
e274629e
AI
3582(define_insn "<avx512>_fmaddsub_<mode>_mask3<round_name>"
3583 [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v")
3584 (vec_merge:VF_AVX512VL
3585 (unspec:VF_AVX512VL
3586 [(match_operand:VF_AVX512VL 1 "register_operand" "v")
3587 (match_operand:VF_AVX512VL 2 "nonimmediate_operand" "<round_constraint>")
3588 (match_operand:VF_AVX512VL 3 "register_operand" "0")]
47490470
AI
3589 UNSPEC_FMADDSUB)
3590 (match_dup 3)
be792bce 3591 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")))]
47490470 3592 "TARGET_AVX512F"
06bc9e41 3593 "vfmaddsub231<ssemodesuffix>\t{<round_op5>%2, %1, %0%{%4%}|%0%{%4%}, %1, %2<round_op5>}"
47490470
AI
3594 [(set_attr "isa" "fma_avx512f")
3595 (set_attr "type" "ssemuladd")
3596 (set_attr "mode" "<MODE>")])
3597
e274629e 3598(define_insn "*fma_fmsubadd_<mode>"
429749e2
UB
3599 [(set (match_operand:VF_128_256 0 "register_operand" "=v,v,v,x,x")
3600 (unspec:VF_128_256
e274629e
AI
3601 [(match_operand:VF_128_256 1 "nonimmediate_operand" "%0,0,v,x,x")
3602 (match_operand:VF_128_256 2 "nonimmediate_operand" "vm,v,vm,x,m")
429749e2 3603 (neg:VF_128_256
e274629e 3604 (match_operand:VF_128_256 3 "nonimmediate_operand" "v,vm,0,xm,x"))]
b0d5396c 3605 UNSPEC_FMADDSUB))]
e274629e 3606 "TARGET_FMA || TARGET_FMA4"
b0d5396c 3607 "@
e274629e
AI
3608 vfmsubadd132<ssemodesuffix>\t{%2, %3, %0|%0, %3, %2}
3609 vfmsubadd213<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
3610 vfmsubadd231<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}
3e5804e1
UB
3611 vfmsubadd<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}
3612 vfmsubadd<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
e274629e 3613 [(set_attr "isa" "fma,fma,fma,fma4,fma4")
3e5804e1 3614 (set_attr "type" "ssemuladd")
b0d5396c
UB
3615 (set_attr "mode" "<MODE>")])
3616
429749e2 3617(define_insn "<sd_mask_codefor>fma_fmsubadd_<mode><sd_maskz_name><round_name>"
e274629e
AI
3618 [(set (match_operand:VF_SF_AVX512VL 0 "register_operand" "=v,v,v")
3619 (unspec:VF_SF_AVX512VL
3620 [(match_operand:VF_SF_AVX512VL 1 "<round_nimm_predicate>" "%0,0,v")
3621 (match_operand:VF_SF_AVX512VL 2 "<round_nimm_predicate>" "<round_constraint>,v,<round_constraint>")
3622 (neg:VF_SF_AVX512VL
3623 (match_operand:VF_SF_AVX512VL 3 "<round_nimm_predicate>" "v,<round_constraint>,0"))]
429749e2
UB
3624 UNSPEC_FMADDSUB))]
3625 "TARGET_AVX512F && <sd_mask_mode512bit_condition> && <round_mode512bit_condition>"
3626 "@
3627 vfmsubadd132<ssemodesuffix>\t{<round_sd_mask_op4>%2, %3, %0<sd_mask_op4>|%0<sd_mask_op4>, %3, %2<round_sd_mask_op4>}
3628 vfmsubadd213<ssemodesuffix>\t{<round_sd_mask_op4>%3, %2, %0<sd_mask_op4>|%0<sd_mask_op4>, %2, %3<round_sd_mask_op4>}
3629 vfmsubadd231<ssemodesuffix>\t{<round_sd_mask_op4>%2, %1, %0<sd_mask_op4>|%0<sd_mask_op4>, %1, %2<round_sd_mask_op4>}"
e274629e 3630 [(set_attr "type" "ssemuladd")
429749e2
UB
3631 (set_attr "mode" "<MODE>")])
3632
e274629e
AI
3633(define_insn "<avx512>_fmsubadd_<mode>_mask<round_name>"
3634 [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v,v")
3635 (vec_merge:VF_AVX512VL
3636 (unspec:VF_AVX512VL
3637 [(match_operand:VF_AVX512VL 1 "register_operand" "0,0")
3638 (match_operand:VF_AVX512VL 2 "nonimmediate_operand" "<round_constraint>,v")
3639 (neg:VF_AVX512VL
3640 (match_operand:VF_AVX512VL 3 "nonimmediate_operand" "v,<round_constraint>"))]
47490470
AI
3641 UNSPEC_FMADDSUB)
3642 (match_dup 1)
be792bce 3643 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk,Yk")))]
47490470
AI
3644 "TARGET_AVX512F"
3645 "@
06bc9e41
AI
3646 vfmsubadd132<ssemodesuffix>\t{<round_op5>%2, %3, %0%{%4%}|%0%{%4%}, %3, %2<round_op5>}
3647 vfmsubadd213<ssemodesuffix>\t{<round_op5>%3, %2, %0%{%4%}|%0%{%4%}, %2, %3<round_op5>}"
47490470
AI
3648 [(set_attr "isa" "fma_avx512f,fma_avx512f")
3649 (set_attr "type" "ssemuladd")
3650 (set_attr "mode" "<MODE>")])
3651
e274629e
AI
3652(define_insn "<avx512>_fmsubadd_<mode>_mask3<round_name>"
3653 [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v")
3654 (vec_merge:VF_AVX512VL
3655 (unspec:VF_AVX512VL
3656 [(match_operand:VF_AVX512VL 1 "register_operand" "v")
3657 (match_operand:VF_AVX512VL 2 "nonimmediate_operand" "<round_constraint>")
3658 (neg:VF_AVX512VL
3659 (match_operand:VF_AVX512VL 3 "register_operand" "0"))]
47490470
AI
3660 UNSPEC_FMADDSUB)
3661 (match_dup 3)
be792bce 3662 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")))]
47490470 3663 "TARGET_AVX512F"
06bc9e41 3664 "vfmsubadd231<ssemodesuffix>\t{<round_op5>%2, %1, %0%{%4%}|%0%{%4%}, %1, %2<round_op5>}"
47490470
AI
3665 [(set_attr "isa" "fma_avx512f")
3666 (set_attr "type" "ssemuladd")
3667 (set_attr "mode" "<MODE>")])
3668
b0d5396c
UB
3669;; FMA3 floating point scalar intrinsics. These merge result with
3670;; high-order elements from the destination register.
cbf2e4d4 3671
06bc9e41 3672(define_expand "fmai_vmfmadd_<mode><round_name>"
2ddd46d6
IT
3673 [(set (match_operand:VF_128 0 "register_operand")
3674 (vec_merge:VF_128
3675 (fma:VF_128
06bc9e41
AI
3676 (match_operand:VF_128 1 "<round_nimm_predicate>")
3677 (match_operand:VF_128 2 "<round_nimm_predicate>")
3678 (match_operand:VF_128 3 "<round_nimm_predicate>"))
5f08ae1e 3679 (match_dup 1)
2ddd46d6
IT
3680 (const_int 1)))]
3681 "TARGET_FMA")
3682
3683(define_insn "*fmai_fmadd_<mode>"
3f97cb0b 3684 [(set (match_operand:VF_128 0 "register_operand" "=v,v")
2ddd46d6
IT
3685 (vec_merge:VF_128
3686 (fma:VF_128
06bc9e41
AI
3687 (match_operand:VF_128 1 "<round_nimm_predicate>" " 0, 0")
3688 (match_operand:VF_128 2 "<round_nimm_predicate>" "<round_constraint>, v")
3689 (match_operand:VF_128 3 "<round_nimm_predicate>" " v,<round_constraint>"))
5f08ae1e 3690 (match_dup 1)
2ddd46d6 3691 (const_int 1)))]
558d9f79 3692 "TARGET_FMA || TARGET_AVX512F"
2ddd46d6 3693 "@
06bc9e41
AI
3694 vfmadd132<ssescalarmodesuffix>\t{<round_op4>%2, %3, %0|%0, %<iptr>3, %<iptr>2<round_op4>}
3695 vfmadd213<ssescalarmodesuffix>\t{<round_op4>%3, %2, %0|%0, %<iptr>2, %<iptr>3<round_op4>}"
2ddd46d6
IT
3696 [(set_attr "type" "ssemuladd")
3697 (set_attr "mode" "<MODE>")])
3698
3699(define_insn "*fmai_fmsub_<mode>"
3f97cb0b 3700 [(set (match_operand:VF_128 0 "register_operand" "=v,v")
2ddd46d6
IT
3701 (vec_merge:VF_128
3702 (fma:VF_128
06bc9e41
AI
3703 (match_operand:VF_128 1 "<round_nimm_predicate>" "0,0")
3704 (match_operand:VF_128 2 "<round_nimm_predicate>" "<round_constraint>,v")
2ddd46d6 3705 (neg:VF_128
06bc9e41 3706 (match_operand:VF_128 3 "<round_nimm_predicate>" " v,<round_constraint>")))
5f08ae1e 3707 (match_dup 1)
2ddd46d6 3708 (const_int 1)))]
558d9f79 3709 "TARGET_FMA || TARGET_AVX512F"
2ddd46d6 3710 "@
06bc9e41
AI
3711 vfmsub132<ssescalarmodesuffix>\t{<round_op4>%2, %3, %0|%0, %<iptr>3, %<iptr>2<round_op4>}
3712 vfmsub213<ssescalarmodesuffix>\t{<round_op4>%3, %2, %0|%0, %<iptr>2, %<iptr>3<round_op4>}"
2ddd46d6
IT
3713 [(set_attr "type" "ssemuladd")
3714 (set_attr "mode" "<MODE>")])
3715
06bc9e41 3716(define_insn "*fmai_fnmadd_<mode><round_name>"
3f97cb0b 3717 [(set (match_operand:VF_128 0 "register_operand" "=v,v")
2ddd46d6
IT
3718 (vec_merge:VF_128
3719 (fma:VF_128
3720 (neg:VF_128
06bc9e41
AI
3721 (match_operand:VF_128 2 "<round_nimm_predicate>" "<round_constraint>,v"))
3722 (match_operand:VF_128 1 "<round_nimm_predicate>" "0,0")
3723 (match_operand:VF_128 3 "<round_nimm_predicate>" "v,<round_constraint>"))
5f08ae1e 3724 (match_dup 1)
2ddd46d6 3725 (const_int 1)))]
558d9f79 3726 "TARGET_FMA || TARGET_AVX512F"
2ddd46d6 3727 "@
06bc9e41
AI
3728 vfnmadd132<ssescalarmodesuffix>\t{<round_op4>%2, %3, %0|%0, %<iptr>3, %<iptr>2<round_op4>}
3729 vfnmadd213<ssescalarmodesuffix>\t{<round_op4>%3, %2, %0|%0, %<iptr>2, %<iptr>3<round_op4>}"
2ddd46d6
IT
3730 [(set_attr "type" "ssemuladd")
3731 (set_attr "mode" "<MODE>")])
3732
06bc9e41 3733(define_insn "*fmai_fnmsub_<mode><round_name>"
3f97cb0b 3734 [(set (match_operand:VF_128 0 "register_operand" "=v,v")
2ddd46d6
IT
3735 (vec_merge:VF_128
3736 (fma:VF_128
3737 (neg:VF_128
06bc9e41
AI
3738 (match_operand:VF_128 2 "<round_nimm_predicate>" "<round_constraint>, v"))
3739 (match_operand:VF_128 1 "<round_nimm_predicate>" " 0, 0")
2ddd46d6 3740 (neg:VF_128
06bc9e41 3741 (match_operand:VF_128 3 "<round_nimm_predicate>" " v,<round_constraint>")))
5f08ae1e 3742 (match_dup 1)
2ddd46d6 3743 (const_int 1)))]
558d9f79 3744 "TARGET_FMA || TARGET_AVX512F"
2ddd46d6 3745 "@
06bc9e41
AI
3746 vfnmsub132<ssescalarmodesuffix>\t{<round_op4>%2, %3, %0|%0, %<iptr>3, %<iptr>2<round_op4>}
3747 vfnmsub213<ssescalarmodesuffix>\t{<round_op4>%3, %2, %0|%0, %<iptr>2, %<iptr>3<round_op4>}"
2ddd46d6
IT
3748 [(set_attr "type" "ssemuladd")
3749 (set_attr "mode" "<MODE>")])
3750
b0d5396c
UB
3751;; FMA4 floating point scalar intrinsics. These write the
3752;; entire destination register, with the high-order elements zeroed.
3753
3754(define_expand "fma4i_vmfmadd_<mode>"
3755 [(set (match_operand:VF_128 0 "register_operand")
3756 (vec_merge:VF_128
3757 (fma:VF_128
3758 (match_operand:VF_128 1 "nonimmediate_operand")
3759 (match_operand:VF_128 2 "nonimmediate_operand")
3760 (match_operand:VF_128 3 "nonimmediate_operand"))
3761 (match_dup 4)
3762 (const_int 1)))]
3763 "TARGET_FMA4"
3764 "operands[4] = CONST0_RTX (<MODE>mode);")
3765
89509419 3766(define_insn "*fma4i_vmfmadd_<mode>"
6bec6c98
UB
3767 [(set (match_operand:VF_128 0 "register_operand" "=x,x")
3768 (vec_merge:VF_128
3769 (fma:VF_128
3770 (match_operand:VF_128 1 "nonimmediate_operand" "%x,x")
3771 (match_operand:VF_128 2 "nonimmediate_operand" " x,m")
3772 (match_operand:VF_128 3 "nonimmediate_operand" "xm,x"))
82e86dc6 3773 (match_operand:VF_128 4 "const0_operand")
89509419
RH
3774 (const_int 1)))]
3775 "TARGET_FMA4"
eabb5f48 3776 "vfmadd<ssescalarmodesuffix>\t{%3, %2, %1, %0|%0, %1, %<iptr>2, %<iptr>3}"
cbf2e4d4
HJ
3777 [(set_attr "type" "ssemuladd")
3778 (set_attr "mode" "<MODE>")])
3779
89509419 3780(define_insn "*fma4i_vmfmsub_<mode>"
6bec6c98
UB
3781 [(set (match_operand:VF_128 0 "register_operand" "=x,x")
3782 (vec_merge:VF_128
3783 (fma:VF_128
3784 (match_operand:VF_128 1 "nonimmediate_operand" "%x,x")
3785 (match_operand:VF_128 2 "nonimmediate_operand" " x,m")
3786 (neg:VF_128
3787 (match_operand:VF_128 3 "nonimmediate_operand" "xm,x")))
82e86dc6 3788 (match_operand:VF_128 4 "const0_operand")
89509419
RH
3789 (const_int 1)))]
3790 "TARGET_FMA4"
eabb5f48 3791 "vfmsub<ssescalarmodesuffix>\t{%3, %2, %1, %0|%0, %1, %<iptr>2, %<iptr>3}"
cbf2e4d4
HJ
3792 [(set_attr "type" "ssemuladd")
3793 (set_attr "mode" "<MODE>")])
3794
89509419 3795(define_insn "*fma4i_vmfnmadd_<mode>"
6bec6c98
UB
3796 [(set (match_operand:VF_128 0 "register_operand" "=x,x")
3797 (vec_merge:VF_128
3798 (fma:VF_128
3799 (neg:VF_128
3800 (match_operand:VF_128 1 "nonimmediate_operand" "%x,x"))
3801 (match_operand:VF_128 2 "nonimmediate_operand" " x,m")
3802 (match_operand:VF_128 3 "nonimmediate_operand" "xm,x"))
82e86dc6 3803 (match_operand:VF_128 4 "const0_operand")
89509419
RH
3804 (const_int 1)))]
3805 "TARGET_FMA4"
eabb5f48 3806 "vfnmadd<ssescalarmodesuffix>\t{%3, %2, %1, %0|%0, %1, %<iptr>2, %<iptr>3}"
cbf2e4d4
HJ
3807 [(set_attr "type" "ssemuladd")
3808 (set_attr "mode" "<MODE>")])
3809
89509419 3810(define_insn "*fma4i_vmfnmsub_<mode>"
6bec6c98
UB
3811 [(set (match_operand:VF_128 0 "register_operand" "=x,x")
3812 (vec_merge:VF_128
3813 (fma:VF_128
3814 (neg:VF_128
3815 (match_operand:VF_128 1 "nonimmediate_operand" "%x,x"))
3816 (match_operand:VF_128 2 "nonimmediate_operand" " x,m")
3817 (neg:VF_128
3818 (match_operand:VF_128 3 "nonimmediate_operand" "xm,x")))
82e86dc6 3819 (match_operand:VF_128 4 "const0_operand")
89509419 3820 (const_int 1)))]
c71ad61e 3821 "TARGET_FMA4"
eabb5f48 3822 "vfnmsub<ssescalarmodesuffix>\t{%3, %2, %1, %0|%0, %1, %<iptr>2, %<iptr>3}"
cbf2e4d4
HJ
3823 [(set_attr "type" "ssemuladd")
3824 (set_attr "mode" "<MODE>")])
3825
351877cf 3826;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
ef719a44 3827;;
d6023b50
UB
3828;; Parallel single-precision floating point conversion operations
3829;;
3830;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
3831
3832(define_insn "sse_cvtpi2ps"
3833 [(set (match_operand:V4SF 0 "register_operand" "=x")
3834 (vec_merge:V4SF
3835 (vec_duplicate:V4SF
3836 (float:V2SF (match_operand:V2SI 2 "nonimmediate_operand" "ym")))
3837 (match_operand:V4SF 1 "register_operand" "0")
3838 (const_int 3)))]
3839 "TARGET_SSE"
3840 "cvtpi2ps\t{%2, %0|%0, %2}"
3841 [(set_attr "type" "ssecvt")
3842 (set_attr "mode" "V4SF")])
3843
3844(define_insn "sse_cvtps2pi"
3845 [(set (match_operand:V2SI 0 "register_operand" "=y")
3846 (vec_select:V2SI
3847 (unspec:V4SI [(match_operand:V4SF 1 "nonimmediate_operand" "xm")]
3848 UNSPEC_FIX_NOTRUNC)
3849 (parallel [(const_int 0) (const_int 1)])))]
3850 "TARGET_SSE"
eabb5f48 3851 "cvtps2pi\t{%1, %0|%0, %q1}"
d6023b50
UB
3852 [(set_attr "type" "ssecvt")
3853 (set_attr "unit" "mmx")
3854 (set_attr "mode" "DI")])
3855
3856(define_insn "sse_cvttps2pi"
3857 [(set (match_operand:V2SI 0 "register_operand" "=y")
3858 (vec_select:V2SI
3859 (fix:V4SI (match_operand:V4SF 1 "nonimmediate_operand" "xm"))
3860 (parallel [(const_int 0) (const_int 1)])))]
3861 "TARGET_SSE"
eabb5f48 3862 "cvttps2pi\t{%1, %0|%0, %q1}"
d6023b50
UB
3863 [(set_attr "type" "ssecvt")
3864 (set_attr "unit" "mmx")
725fd454 3865 (set_attr "prefix_rep" "0")
d6023b50
UB
3866 (set_attr "mode" "SF")])
3867
06bc9e41 3868(define_insn "sse_cvtsi2ss<round_name>"
3f97cb0b 3869 [(set (match_operand:V4SF 0 "register_operand" "=x,x,v")
d6023b50
UB
3870 (vec_merge:V4SF
3871 (vec_duplicate:V4SF
06bc9e41 3872 (float:SF (match_operand:SI 2 "<round_nimm_predicate>" "r,m,<round_constraint3>")))
3f97cb0b 3873 (match_operand:V4SF 1 "register_operand" "0,0,v")
d6023b50
UB
3874 (const_int 1)))]
3875 "TARGET_SSE"
a95d4000
UB
3876 "@
3877 cvtsi2ss\t{%2, %0|%0, %2}
3878 cvtsi2ss\t{%2, %0|%0, %2}
06bc9e41 3879 vcvtsi2ss\t{<round_op3>%2, %1, %0|%0, %1, %2<round_op3>}"
a95d4000
UB
3880 [(set_attr "isa" "noavx,noavx,avx")
3881 (set_attr "type" "sseicvt")
3882 (set_attr "athlon_decode" "vector,double,*")
3883 (set_attr "amdfam10_decode" "vector,double,*")
3884 (set_attr "bdver1_decode" "double,direct,*")
01284895 3885 (set_attr "btver2_decode" "double,double,double")
2b1ebb0c 3886 (set_attr "prefix" "orig,orig,maybe_evex")
95879c72
L
3887 (set_attr "mode" "SF")])
3888
06bc9e41 3889(define_insn "sse_cvtsi2ssq<round_name>"
3f97cb0b 3890 [(set (match_operand:V4SF 0 "register_operand" "=x,x,v")
d6023b50
UB
3891 (vec_merge:V4SF
3892 (vec_duplicate:V4SF
06bc9e41 3893 (float:SF (match_operand:DI 2 "<round_nimm_predicate>" "r,m,<round_constraint3>")))
3f97cb0b 3894 (match_operand:V4SF 1 "register_operand" "0,0,v")
d6023b50
UB
3895 (const_int 1)))]
3896 "TARGET_SSE && TARGET_64BIT"
a95d4000
UB
3897 "@
3898 cvtsi2ssq\t{%2, %0|%0, %2}
3899 cvtsi2ssq\t{%2, %0|%0, %2}
06bc9e41 3900 vcvtsi2ssq\t{<round_op3>%2, %1, %0|%0, %1, %2<round_op3>}"
a95d4000
UB
3901 [(set_attr "isa" "noavx,noavx,avx")
3902 (set_attr "type" "sseicvt")
3903 (set_attr "athlon_decode" "vector,double,*")
3904 (set_attr "amdfam10_decode" "vector,double,*")
3905 (set_attr "bdver1_decode" "double,direct,*")
01284895 3906 (set_attr "btver2_decode" "double,double,double")
a95d4000
UB
3907 (set_attr "length_vex" "*,*,4")
3908 (set_attr "prefix_rex" "1,1,*")
2b1ebb0c 3909 (set_attr "prefix" "orig,orig,maybe_evex")
d6023b50
UB
3910 (set_attr "mode" "SF")])
3911
06bc9e41 3912(define_insn "sse_cvtss2si<round_name>"
d6023b50
UB
3913 [(set (match_operand:SI 0 "register_operand" "=r,r")
3914 (unspec:SI
3915 [(vec_select:SF
06bc9e41 3916 (match_operand:V4SF 1 "<round_nimm_predicate>" "v,<round_constraint2>")
d6023b50
UB
3917 (parallel [(const_int 0)]))]
3918 UNSPEC_FIX_NOTRUNC))]
3919 "TARGET_SSE"
06bc9e41 3920 "%vcvtss2si\t{<round_op2>%1, %0|%0, %k1<round_op2>}"
d6023b50
UB
3921 [(set_attr "type" "sseicvt")
3922 (set_attr "athlon_decode" "double,vector")
6a08ffca 3923 (set_attr "bdver1_decode" "double,double")
d6023b50 3924 (set_attr "prefix_rep" "1")
95879c72 3925 (set_attr "prefix" "maybe_vex")
d6023b50
UB
3926 (set_attr "mode" "SI")])
3927
3928(define_insn "sse_cvtss2si_2"
3929 [(set (match_operand:SI 0 "register_operand" "=r,r")
3f97cb0b 3930 (unspec:SI [(match_operand:SF 1 "nonimmediate_operand" "v,m")]
d6023b50
UB
3931 UNSPEC_FIX_NOTRUNC))]
3932 "TARGET_SSE"
eabb5f48 3933 "%vcvtss2si\t{%1, %0|%0, %k1}"
d6023b50
UB
3934 [(set_attr "type" "sseicvt")
3935 (set_attr "athlon_decode" "double,vector")
3936 (set_attr "amdfam10_decode" "double,double")
6a08ffca 3937 (set_attr "bdver1_decode" "double,double")
d6023b50 3938 (set_attr "prefix_rep" "1")
95879c72 3939 (set_attr "prefix" "maybe_vex")
d6023b50
UB
3940 (set_attr "mode" "SI")])
3941
06bc9e41 3942(define_insn "sse_cvtss2siq<round_name>"
d6023b50
UB
3943 [(set (match_operand:DI 0 "register_operand" "=r,r")
3944 (unspec:DI
3945 [(vec_select:SF
06bc9e41 3946 (match_operand:V4SF 1 "<round_nimm_predicate>" "v,<round_constraint2>")
d6023b50
UB
3947 (parallel [(const_int 0)]))]
3948 UNSPEC_FIX_NOTRUNC))]
3949 "TARGET_SSE && TARGET_64BIT"
06bc9e41 3950 "%vcvtss2si{q}\t{<round_op2>%1, %0|%0, %k1<round_op2>}"
d6023b50
UB
3951 [(set_attr "type" "sseicvt")
3952 (set_attr "athlon_decode" "double,vector")
6a08ffca 3953 (set_attr "bdver1_decode" "double,double")
d6023b50 3954 (set_attr "prefix_rep" "1")
95879c72 3955 (set_attr "prefix" "maybe_vex")
d6023b50
UB
3956 (set_attr "mode" "DI")])
3957
3958(define_insn "sse_cvtss2siq_2"
3959 [(set (match_operand:DI 0 "register_operand" "=r,r")
2b1ebb0c 3960 (unspec:DI [(match_operand:SF 1 "nonimmediate_operand" "v,m")]
d6023b50
UB
3961 UNSPEC_FIX_NOTRUNC))]
3962 "TARGET_SSE && TARGET_64BIT"
eabb5f48 3963 "%vcvtss2si{q}\t{%1, %0|%0, %k1}"
d6023b50
UB
3964 [(set_attr "type" "sseicvt")
3965 (set_attr "athlon_decode" "double,vector")
3966 (set_attr "amdfam10_decode" "double,double")
6a08ffca 3967 (set_attr "bdver1_decode" "double,double")
d6023b50 3968 (set_attr "prefix_rep" "1")
95879c72 3969 (set_attr "prefix" "maybe_vex")
d6023b50
UB
3970 (set_attr "mode" "DI")])
3971
8a6ef760 3972(define_insn "sse_cvttss2si<round_saeonly_name>"
d6023b50
UB
3973 [(set (match_operand:SI 0 "register_operand" "=r,r")
3974 (fix:SI
3975 (vec_select:SF
8a6ef760 3976 (match_operand:V4SF 1 "<round_saeonly_nimm_predicate>" "v,<round_saeonly_constraint2>")
d6023b50
UB
3977 (parallel [(const_int 0)]))))]
3978 "TARGET_SSE"
8a6ef760 3979 "%vcvttss2si\t{<round_saeonly_op2>%1, %0|%0, %k1<round_saeonly_op2>}"
d6023b50
UB
3980 [(set_attr "type" "sseicvt")
3981 (set_attr "athlon_decode" "double,vector")
3982 (set_attr "amdfam10_decode" "double,double")
6a08ffca 3983 (set_attr "bdver1_decode" "double,double")
d6023b50 3984 (set_attr "prefix_rep" "1")
95879c72 3985 (set_attr "prefix" "maybe_vex")
d6023b50
UB
3986 (set_attr "mode" "SI")])
3987
8a6ef760 3988(define_insn "sse_cvttss2siq<round_saeonly_name>"
d6023b50
UB
3989 [(set (match_operand:DI 0 "register_operand" "=r,r")
3990 (fix:DI
3991 (vec_select:SF
8a6ef760 3992 (match_operand:V4SF 1 "<round_saeonly_nimm_predicate>" "v,<round_saeonly_constraint>")
d6023b50
UB
3993 (parallel [(const_int 0)]))))]
3994 "TARGET_SSE && TARGET_64BIT"
8a6ef760 3995 "%vcvttss2si{q}\t{<round_saeonly_op2>%1, %0|%0, %k1<round_saeonly_op2>}"
d6023b50
UB
3996 [(set_attr "type" "sseicvt")
3997 (set_attr "athlon_decode" "double,vector")
3998 (set_attr "amdfam10_decode" "double,double")
6a08ffca 3999 (set_attr "bdver1_decode" "double,double")
d6023b50 4000 (set_attr "prefix_rep" "1")
95879c72 4001 (set_attr "prefix" "maybe_vex")
d6023b50
UB
4002 (set_attr "mode" "DI")])
4003
06bc9e41 4004(define_insn "cvtusi2<ssescalarmodesuffix>32<round_name>"
c003c6d6
AI
4005 [(set (match_operand:VF_128 0 "register_operand" "=v")
4006 (vec_merge:VF_128
4007 (vec_duplicate:VF_128
4008 (unsigned_float:<ssescalarmode>
06bc9e41 4009 (match_operand:SI 2 "<round_nimm_predicate>" "<round_constraint3>")))
c003c6d6
AI
4010 (match_operand:VF_128 1 "register_operand" "v")
4011 (const_int 1)))]
06bc9e41
AI
4012 "TARGET_AVX512F && <round_modev4sf_condition>"
4013 "vcvtusi2<ssescalarmodesuffix>\t{<round_op3>%2, %1, %0|%0, %1, %2<round_op3>}"
c003c6d6
AI
4014 [(set_attr "type" "sseicvt")
4015 (set_attr "prefix" "evex")
4016 (set_attr "mode" "<ssescalarmode>")])
4017
06bc9e41 4018(define_insn "cvtusi2<ssescalarmodesuffix>64<round_name>"
c003c6d6
AI
4019 [(set (match_operand:VF_128 0 "register_operand" "=v")
4020 (vec_merge:VF_128
4021 (vec_duplicate:VF_128
4022 (unsigned_float:<ssescalarmode>
06bc9e41 4023 (match_operand:DI 2 "<round_nimm_predicate>" "<round_constraint3>")))
c003c6d6
AI
4024 (match_operand:VF_128 1 "register_operand" "v")
4025 (const_int 1)))]
4026 "TARGET_AVX512F && TARGET_64BIT"
06bc9e41 4027 "vcvtusi2<ssescalarmodesuffix>\t{<round_op3>%2, %1, %0|%0, %1, %2<round_op3>}"
c003c6d6
AI
4028 [(set_attr "type" "sseicvt")
4029 (set_attr "prefix" "evex")
4030 (set_attr "mode" "<ssescalarmode>")])
4031
06bc9e41 4032(define_insn "float<sseintvecmodelower><mode>2<mask_name><round_name>"
3f97cb0b 4033 [(set (match_operand:VF1 0 "register_operand" "=v")
406d683e 4034 (float:VF1
06bc9e41
AI
4035 (match_operand:<sseintvecmode> 1 "<round_nimm_predicate>" "<round_constraint>")))]
4036 "TARGET_SSE2 && <mask_mode512bit_condition> && <round_mode512bit_condition>"
4037 "%vcvtdq2ps\t{<round_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_mask_op2>}"
d6023b50 4038 [(set_attr "type" "ssecvt")
a95d4000 4039 (set_attr "prefix" "maybe_vex")
406d683e 4040 (set_attr "mode" "<sseinsnmode>")])
d6023b50 4041
4769c826
AI
4042(define_insn "ufloat<sseintvecmodelower><mode>2<mask_name><round_name>"
4043 [(set (match_operand:VF1_AVX512VL 0 "register_operand" "=v")
4044 (unsigned_float:VF1_AVX512VL
4045 (match_operand:<sseintvecmode> 1 "nonimmediate_operand" "<round_constraint>")))]
c003c6d6 4046 "TARGET_AVX512F"
06bc9e41 4047 "vcvtudq2ps\t{<round_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_mask_op2>}"
c003c6d6
AI
4048 [(set_attr "type" "ssecvt")
4049 (set_attr "prefix" "evex")
4769c826 4050 (set_attr "mode" "<MODE>")])
c003c6d6 4051
406d683e 4052(define_expand "floatuns<sseintvecmodelower><mode>2"
82e86dc6
UB
4053 [(match_operand:VF1 0 "register_operand")
4054 (match_operand:<sseintvecmode> 1 "register_operand")]
635c6321 4055 "TARGET_SSE2 && (<MODE>mode == V4SFmode || TARGET_AVX2)"
54e86f6b 4056{
d3c2fee0
AI
4057 if (<MODE>mode == V16SFmode)
4058 emit_insn (gen_ufloatv16siv16sf2 (operands[0], operands[1]));
2fa9ee8f
AI
4059 else
4060 if (TARGET_AVX512VL)
4061 {
4062 if (<MODE>mode == V4SFmode)
4063 emit_insn (gen_ufloatv4siv4sf2 (operands[0], operands[1]));
4064 else
4065 emit_insn (gen_ufloatv8siv8sf2 (operands[0], operands[1]));
4066 }
d3c2fee0
AI
4067 else
4068 ix86_expand_vector_convert_uns_vsivsf (operands[0], operands[1]);
4069
635c6321 4070 DONE;
54e86f6b
UB
4071})
4072
95879c72 4073
50e60d7d
AI
4074;; For <sse2_avx_avx512f>_fix_notrunc<sf2simodelower><mode> insn pattern
4075(define_mode_attr sf2simodelower
4076 [(V16SI "v16sf") (V8SI "v8sf") (V4SI "v4sf")])
4077
415ebad0 4078(define_insn "<sse2_avx_avx512f>_fix_notrunc<sf2simodelower><mode><mask_name>"
50e60d7d
AI
4079 [(set (match_operand:VI4_AVX 0 "register_operand" "=v")
4080 (unspec:VI4_AVX
4081 [(match_operand:<ssePSmode> 1 "nonimmediate_operand" "vm")]
4082 UNSPEC_FIX_NOTRUNC))]
415ebad0
AI
4083 "TARGET_SSE2 && <mask_mode512bit_condition>"
4084 "%vcvtps2dq\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
d6023b50 4085 [(set_attr "type" "ssecvt")
a95d4000
UB
4086 (set (attr "prefix_data16")
4087 (if_then_else
67b2c493 4088 (match_test "TARGET_AVX")
a95d4000
UB
4089 (const_string "*")
4090 (const_string "1")))
4091 (set_attr "prefix" "maybe_vex")
50e60d7d 4092 (set_attr "mode" "<sseinsnmode>")])
d6023b50 4093
06bc9e41 4094(define_insn "<mask_codefor>avx512f_fix_notruncv16sfv16si<mask_name><round_name>"
c003c6d6
AI
4095 [(set (match_operand:V16SI 0 "register_operand" "=v")
4096 (unspec:V16SI
06bc9e41 4097 [(match_operand:V16SF 1 "<round_nimm_predicate>" "<round_constraint>")]
c003c6d6
AI
4098 UNSPEC_FIX_NOTRUNC))]
4099 "TARGET_AVX512F"
06bc9e41 4100 "vcvtps2dq\t{<round_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_mask_op2>}"
c003c6d6
AI
4101 [(set_attr "type" "ssecvt")
4102 (set_attr "prefix" "evex")
4103 (set_attr "mode" "XI")])
4104
21c924ac
AI
4105(define_insn "<mask_codefor><avx512>_ufix_notrunc<sf2simodelower><mode><mask_name><round_name>"
4106 [(set (match_operand:VI4_AVX512VL 0 "register_operand" "=v")
4107 (unspec:VI4_AVX512VL
4108 [(match_operand:<ssePSmode> 1 "nonimmediate_operand" "<round_constraint>")]
c003c6d6
AI
4109 UNSPEC_UNSIGNED_FIX_NOTRUNC))]
4110 "TARGET_AVX512F"
06bc9e41 4111 "vcvtps2udq\t{<round_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_mask_op2>}"
c003c6d6
AI
4112 [(set_attr "type" "ssecvt")
4113 (set_attr "prefix" "evex")
21c924ac 4114 (set_attr "mode" "<sseinsnmode>")])
c003c6d6 4115
dc3b8d27
AI
4116(define_insn "<mask_codefor>avx512dq_cvtps2qq<mode><mask_name><round_name>"
4117 [(set (match_operand:VI8_256_512 0 "register_operand" "=v")
4118 (unspec:VI8_256_512 [(match_operand:<ssePSmode2> 1 "nonimmediate_operand" "<round_constraint>")]
4119 UNSPEC_FIX_NOTRUNC))]
4120 "TARGET_AVX512DQ && <round_mode512bit_condition>"
4121 "vcvtps2qq\t{<round_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_mask_op2>}"
4122 [(set_attr "type" "ssecvt")
4123 (set_attr "prefix" "evex")
4124 (set_attr "mode" "<sseinsnmode>")])
4125
4126(define_insn "<mask_codefor>avx512dq_cvtps2qqv2di<mask_name>"
4127 [(set (match_operand:V2DI 0 "register_operand" "=v")
4128 (unspec:V2DI
4129 [(vec_select:V2SF
4130 (match_operand:V4SF 1 "nonimmediate_operand" "vm")
4131 (parallel [(const_int 0) (const_int 1)]))]
4132 UNSPEC_FIX_NOTRUNC))]
4133 "TARGET_AVX512DQ && TARGET_AVX512VL"
4134 "vcvtps2qq\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
4135 [(set_attr "type" "ssecvt")
4136 (set_attr "prefix" "evex")
4137 (set_attr "mode" "TI")])
4138
4139(define_insn "<mask_codefor>avx512dq_cvtps2uqq<mode><mask_name><round_name>"
4140 [(set (match_operand:VI8_256_512 0 "register_operand" "=v")
4141 (unspec:VI8_256_512 [(match_operand:<ssePSmode2> 1 "nonimmediate_operand" "<round_constraint>")]
4142 UNSPEC_UNSIGNED_FIX_NOTRUNC))]
4143 "TARGET_AVX512DQ && <round_mode512bit_condition>"
4144 "vcvtps2uqq\t{<round_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_mask_op2>}"
4145 [(set_attr "type" "ssecvt")
4146 (set_attr "prefix" "evex")
4147 (set_attr "mode" "<sseinsnmode>")])
4148
4149(define_insn "<mask_codefor>avx512dq_cvtps2uqqv2di<mask_name>"
4150 [(set (match_operand:V2DI 0 "register_operand" "=v")
4151 (unspec:V2DI
4152 [(vec_select:V2SF
4153 (match_operand:V4SF 1 "nonimmediate_operand" "vm")
4154 (parallel [(const_int 0) (const_int 1)]))]
4155 UNSPEC_UNSIGNED_FIX_NOTRUNC))]
4156 "TARGET_AVX512DQ && TARGET_AVX512VL"
4157 "vcvtps2uqq\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
4158 [(set_attr "type" "ssecvt")
4159 (set_attr "prefix" "evex")
4160 (set_attr "mode" "TI")])
4161
8a6ef760 4162(define_insn "<fixsuffix>fix_truncv16sfv16si2<mask_name><round_saeonly_name>"
a9ccbba2
AI
4163 [(set (match_operand:V16SI 0 "register_operand" "=v")
4164 (any_fix:V16SI
8a6ef760 4165 (match_operand:V16SF 1 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")))]
a9ccbba2 4166 "TARGET_AVX512F"
8a6ef760 4167 "vcvttps2<fixsuffix>dq\t{<round_saeonly_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_saeonly_mask_op2>}"
a9ccbba2
AI
4168 [(set_attr "type" "ssecvt")
4169 (set_attr "prefix" "evex")
4170 (set_attr "mode" "XI")])
4171
415ebad0
AI
4172(define_insn "fix_truncv8sfv8si2<mask_name>"
4173 [(set (match_operand:V8SI 0 "register_operand" "=v")
4174 (fix:V8SI (match_operand:V8SF 1 "nonimmediate_operand" "vm")))]
4175 "TARGET_AVX && <mask_avx512vl_condition>"
4176 "vcvttps2dq\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
95879c72 4177 [(set_attr "type" "ssecvt")
415ebad0 4178 (set_attr "prefix" "<mask_prefix>")
a95d4000 4179 (set_attr "mode" "OI")])
95879c72 4180
415ebad0
AI
4181(define_insn "fix_truncv4sfv4si2<mask_name>"
4182 [(set (match_operand:V4SI 0 "register_operand" "=v")
4183 (fix:V4SI (match_operand:V4SF 1 "nonimmediate_operand" "vm")))]
4184 "TARGET_SSE2 && <mask_avx512vl_condition>"
4185 "%vcvttps2dq\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
d6023b50 4186 [(set_attr "type" "ssecvt")
a95d4000
UB
4187 (set (attr "prefix_rep")
4188 (if_then_else
67b2c493 4189 (match_test "TARGET_AVX")
a95d4000
UB
4190 (const_string "*")
4191 (const_string "1")))
4192 (set (attr "prefix_data16")
4193 (if_then_else
67b2c493 4194 (match_test "TARGET_AVX")
a95d4000
UB
4195 (const_string "*")
4196 (const_string "0")))
725fd454 4197 (set_attr "prefix_data16" "0")
415ebad0 4198 (set_attr "prefix" "<mask_prefix2>")
d6023b50
UB
4199 (set_attr "mode" "TI")])
4200
03e0010d 4201(define_expand "fixuns_trunc<mode><sseintvecmodelower>2"
82e86dc6
UB
4202 [(match_operand:<sseintvecmode> 0 "register_operand")
4203 (match_operand:VF1 1 "register_operand")]
2f2da9e9 4204 "TARGET_SSE2"
03e0010d 4205{
d3c2fee0
AI
4206 if (<MODE>mode == V16SFmode)
4207 emit_insn (gen_ufix_truncv16sfv16si2 (operands[0],
4208 operands[1]));
4209 else
4210 {
4211 rtx tmp[3];
4212 tmp[0] = ix86_expand_adjust_ufix_to_sfix_si (operands[1], &tmp[2]);
4213 tmp[1] = gen_reg_rtx (<sseintvecmode>mode);
4214 emit_insn (gen_fix_trunc<mode><sseintvecmodelower>2 (tmp[1], tmp[0]));
4215 emit_insn (gen_xor<sseintvecmodelower>3 (operands[0], tmp[1], tmp[2]));
4216 }
6bf39801 4217 DONE;
03e0010d
JJ
4218})
4219
d6023b50
UB
4220;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
4221;;
4222;; Parallel double-precision floating point conversion operations
ef719a44
RH
4223;;
4224;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
4225
d6023b50
UB
4226(define_insn "sse2_cvtpi2pd"
4227 [(set (match_operand:V2DF 0 "register_operand" "=x,x")
4228 (float:V2DF (match_operand:V2SI 1 "nonimmediate_operand" "y,m")))]
ef719a44 4229 "TARGET_SSE2"
d6023b50
UB
4230 "cvtpi2pd\t{%1, %0|%0, %1}"
4231 [(set_attr "type" "ssecvt")
4232 (set_attr "unit" "mmx,*")
725fd454 4233 (set_attr "prefix_data16" "1,*")
d6023b50 4234 (set_attr "mode" "V2DF")])
ef719a44 4235
d6023b50
UB
4236(define_insn "sse2_cvtpd2pi"
4237 [(set (match_operand:V2SI 0 "register_operand" "=y")
4238 (unspec:V2SI [(match_operand:V2DF 1 "nonimmediate_operand" "xm")]
4239 UNSPEC_FIX_NOTRUNC))]
ef719a44 4240 "TARGET_SSE2"
d6023b50
UB
4241 "cvtpd2pi\t{%1, %0|%0, %1}"
4242 [(set_attr "type" "ssecvt")
4243 (set_attr "unit" "mmx")
a95d4000 4244 (set_attr "bdver1_decode" "double")
01284895 4245 (set_attr "btver2_decode" "direct")
d6023b50 4246 (set_attr "prefix_data16" "1")
a95d4000 4247 (set_attr "mode" "DI")])
ef719a44 4248
d6023b50
UB
4249(define_insn "sse2_cvttpd2pi"
4250 [(set (match_operand:V2SI 0 "register_operand" "=y")
4251 (fix:V2SI (match_operand:V2DF 1 "nonimmediate_operand" "xm")))]
ef719a44 4252 "TARGET_SSE2"
d6023b50
UB
4253 "cvttpd2pi\t{%1, %0|%0, %1}"
4254 [(set_attr "type" "ssecvt")
4255 (set_attr "unit" "mmx")
a95d4000 4256 (set_attr "bdver1_decode" "double")
d6023b50 4257 (set_attr "prefix_data16" "1")
a95d4000 4258 (set_attr "mode" "TI")])
95879c72 4259
d6023b50 4260(define_insn "sse2_cvtsi2sd"
460f31ee 4261 [(set (match_operand:V2DF 0 "register_operand" "=x,x,v")
ef719a44 4262 (vec_merge:V2DF
d6023b50 4263 (vec_duplicate:V2DF
a95d4000 4264 (float:DF (match_operand:SI 2 "nonimmediate_operand" "r,m,rm")))
460f31ee 4265 (match_operand:V2DF 1 "register_operand" "0,0,v")
ef719a44 4266 (const_int 1)))]
ef719a44 4267 "TARGET_SSE2"
a95d4000
UB
4268 "@
4269 cvtsi2sd\t{%2, %0|%0, %2}
4270 cvtsi2sd\t{%2, %0|%0, %2}
4271 vcvtsi2sd\t{%2, %1, %0|%0, %1, %2}"
4272 [(set_attr "isa" "noavx,noavx,avx")
4273 (set_attr "type" "sseicvt")
4274 (set_attr "athlon_decode" "double,direct,*")
4275 (set_attr "amdfam10_decode" "vector,double,*")
4276 (set_attr "bdver1_decode" "double,direct,*")
01284895 4277 (set_attr "btver2_decode" "double,double,double")
460f31ee 4278 (set_attr "prefix" "orig,orig,maybe_evex")
95879c72
L
4279 (set_attr "mode" "DF")])
4280
06bc9e41 4281(define_insn "sse2_cvtsi2sdq<round_name>"
2b1ebb0c 4282 [(set (match_operand:V2DF 0 "register_operand" "=x,x,v")
ef719a44 4283 (vec_merge:V2DF
d6023b50 4284 (vec_duplicate:V2DF
06bc9e41 4285 (float:DF (match_operand:DI 2 "<round_nimm_predicate>" "r,m,<round_constraint3>")))
2b1ebb0c 4286 (match_operand:V2DF 1 "register_operand" "0,0,v")
ef719a44 4287 (const_int 1)))]
d6023b50 4288 "TARGET_SSE2 && TARGET_64BIT"
a95d4000
UB
4289 "@
4290 cvtsi2sdq\t{%2, %0|%0, %2}
4291 cvtsi2sdq\t{%2, %0|%0, %2}
06bc9e41 4292 vcvtsi2sdq\t{<round_op3>%2, %1, %0|%0, %1, %2<round_op3>}"
a95d4000
UB
4293 [(set_attr "isa" "noavx,noavx,avx")
4294 (set_attr "type" "sseicvt")
4295 (set_attr "athlon_decode" "double,direct,*")
4296 (set_attr "amdfam10_decode" "vector,double,*")
4297 (set_attr "bdver1_decode" "double,direct,*")
4298 (set_attr "length_vex" "*,*,4")
4299 (set_attr "prefix_rex" "1,1,*")
2b1ebb0c 4300 (set_attr "prefix" "orig,orig,maybe_evex")
a95d4000 4301 (set_attr "mode" "DF")])
d6023b50 4302
06bc9e41 4303(define_insn "avx512f_vcvtss2usi<round_name>"
c003c6d6
AI
4304 [(set (match_operand:SI 0 "register_operand" "=r")
4305 (unspec:SI
4306 [(vec_select:SF
06bc9e41 4307 (match_operand:V4SF 1 "<round_nimm_predicate>" "<round_constraint>")
c003c6d6
AI
4308 (parallel [(const_int 0)]))]
4309 UNSPEC_UNSIGNED_FIX_NOTRUNC))]
4310 "TARGET_AVX512F"
06bc9e41 4311 "vcvtss2usi\t{<round_op2>%1, %0|%0, %1<round_op2>}"
c003c6d6
AI
4312 [(set_attr "type" "sseicvt")
4313 (set_attr "prefix" "evex")
4314 (set_attr "mode" "SI")])
4315
06bc9e41 4316(define_insn "avx512f_vcvtss2usiq<round_name>"
c003c6d6
AI
4317 [(set (match_operand:DI 0 "register_operand" "=r")
4318 (unspec:DI
4319 [(vec_select:SF
06bc9e41 4320 (match_operand:V4SF 1 "<round_nimm_predicate>" "<round_constraint>")
c003c6d6
AI
4321 (parallel [(const_int 0)]))]
4322 UNSPEC_UNSIGNED_FIX_NOTRUNC))]
4323 "TARGET_AVX512F && TARGET_64BIT"
06bc9e41 4324 "vcvtss2usi\t{<round_op2>%1, %0|%0, %1<round_op2>}"
c003c6d6
AI
4325 [(set_attr "type" "sseicvt")
4326 (set_attr "prefix" "evex")
4327 (set_attr "mode" "DI")])
4328
8a6ef760 4329(define_insn "avx512f_vcvttss2usi<round_saeonly_name>"
c003c6d6
AI
4330 [(set (match_operand:SI 0 "register_operand" "=r")
4331 (unsigned_fix:SI
4332 (vec_select:SF
8a6ef760 4333 (match_operand:V4SF 1 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")
c003c6d6
AI
4334 (parallel [(const_int 0)]))))]
4335 "TARGET_AVX512F"
8a6ef760 4336 "vcvttss2usi\t{<round_saeonly_op2>%1, %0|%0, %1<round_saeonly_op2>}"
c003c6d6
AI
4337 [(set_attr "type" "sseicvt")
4338 (set_attr "prefix" "evex")
4339 (set_attr "mode" "SI")])
4340
8a6ef760 4341(define_insn "avx512f_vcvttss2usiq<round_saeonly_name>"
c003c6d6
AI
4342 [(set (match_operand:DI 0 "register_operand" "=r")
4343 (unsigned_fix:DI
4344 (vec_select:SF
8a6ef760 4345 (match_operand:V4SF 1 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")
c003c6d6
AI
4346 (parallel [(const_int 0)]))))]
4347 "TARGET_AVX512F && TARGET_64BIT"
8a6ef760 4348 "vcvttss2usi\t{<round_saeonly_op2>%1, %0|%0, %1<round_saeonly_op2>}"
c003c6d6
AI
4349 [(set_attr "type" "sseicvt")
4350 (set_attr "prefix" "evex")
4351 (set_attr "mode" "DI")])
4352
06bc9e41 4353(define_insn "avx512f_vcvtsd2usi<round_name>"
c003c6d6
AI
4354 [(set (match_operand:SI 0 "register_operand" "=r")
4355 (unspec:SI
4356 [(vec_select:DF
06bc9e41 4357 (match_operand:V2DF 1 "<round_nimm_predicate>" "<round_constraint>")
c003c6d6
AI
4358 (parallel [(const_int 0)]))]
4359 UNSPEC_UNSIGNED_FIX_NOTRUNC))]
4360 "TARGET_AVX512F"
06bc9e41 4361 "vcvtsd2usi\t{<round_op2>%1, %0|%0, %1<round_op2>}"
c003c6d6
AI
4362 [(set_attr "type" "sseicvt")
4363 (set_attr "prefix" "evex")
4364 (set_attr "mode" "SI")])
4365
06bc9e41 4366(define_insn "avx512f_vcvtsd2usiq<round_name>"
c003c6d6
AI
4367 [(set (match_operand:DI 0 "register_operand" "=r")
4368 (unspec:DI
4369 [(vec_select:DF
06bc9e41 4370 (match_operand:V2DF 1 "<round_nimm_predicate>" "<round_constraint>")
c003c6d6
AI
4371 (parallel [(const_int 0)]))]
4372 UNSPEC_UNSIGNED_FIX_NOTRUNC))]
4373 "TARGET_AVX512F && TARGET_64BIT"
06bc9e41 4374 "vcvtsd2usi\t{<round_op2>%1, %0|%0, %1<round_op2>}"
c003c6d6
AI
4375 [(set_attr "type" "sseicvt")
4376 (set_attr "prefix" "evex")
4377 (set_attr "mode" "DI")])
4378
8a6ef760 4379(define_insn "avx512f_vcvttsd2usi<round_saeonly_name>"
c003c6d6
AI
4380 [(set (match_operand:SI 0 "register_operand" "=r")
4381 (unsigned_fix:SI
4382 (vec_select:DF
8a6ef760 4383 (match_operand:V2DF 1 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")
c003c6d6
AI
4384 (parallel [(const_int 0)]))))]
4385 "TARGET_AVX512F"
8a6ef760 4386 "vcvttsd2usi\t{<round_saeonly_op2>%1, %0|%0, %1<round_saeonly_op2>}"
c003c6d6
AI
4387 [(set_attr "type" "sseicvt")
4388 (set_attr "prefix" "evex")
4389 (set_attr "mode" "SI")])
4390
8a6ef760 4391(define_insn "avx512f_vcvttsd2usiq<round_saeonly_name>"
c003c6d6
AI
4392 [(set (match_operand:DI 0 "register_operand" "=r")
4393 (unsigned_fix:DI
4394 (vec_select:DF
8a6ef760 4395 (match_operand:V2DF 1 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")
c003c6d6
AI
4396 (parallel [(const_int 0)]))))]
4397 "TARGET_AVX512F && TARGET_64BIT"
8a6ef760 4398 "vcvttsd2usi\t{<round_saeonly_op2>%1, %0|%0, %1<round_saeonly_op2>}"
c003c6d6
AI
4399 [(set_attr "type" "sseicvt")
4400 (set_attr "prefix" "evex")
4401 (set_attr "mode" "DI")])
4402
06bc9e41 4403(define_insn "sse2_cvtsd2si<round_name>"
d6023b50
UB
4404 [(set (match_operand:SI 0 "register_operand" "=r,r")
4405 (unspec:SI
4406 [(vec_select:DF
06bc9e41 4407 (match_operand:V2DF 1 "<round_nimm_predicate>" "v,<round_constraint2>")
d6023b50
UB
4408 (parallel [(const_int 0)]))]
4409 UNSPEC_FIX_NOTRUNC))]
ef719a44 4410 "TARGET_SSE2"
06bc9e41 4411 "%vcvtsd2si\t{<round_op2>%1, %0|%0, %q1<round_op2>}"
d6023b50
UB
4412 [(set_attr "type" "sseicvt")
4413 (set_attr "athlon_decode" "double,vector")
6a08ffca 4414 (set_attr "bdver1_decode" "double,double")
01284895 4415 (set_attr "btver2_decode" "double,double")
d6023b50 4416 (set_attr "prefix_rep" "1")
95879c72 4417 (set_attr "prefix" "maybe_vex")
d6023b50 4418 (set_attr "mode" "SI")])
ef719a44 4419
d6023b50
UB
4420(define_insn "sse2_cvtsd2si_2"
4421 [(set (match_operand:SI 0 "register_operand" "=r,r")
3f97cb0b 4422 (unspec:SI [(match_operand:DF 1 "nonimmediate_operand" "v,m")]
d6023b50 4423 UNSPEC_FIX_NOTRUNC))]
ef719a44 4424 "TARGET_SSE2"
eabb5f48 4425 "%vcvtsd2si\t{%1, %0|%0, %q1}"
d6023b50
UB
4426 [(set_attr "type" "sseicvt")
4427 (set_attr "athlon_decode" "double,vector")
4428 (set_attr "amdfam10_decode" "double,double")
6a08ffca 4429 (set_attr "bdver1_decode" "double,double")
d6023b50 4430 (set_attr "prefix_rep" "1")
95879c72 4431 (set_attr "prefix" "maybe_vex")
d6023b50 4432 (set_attr "mode" "SI")])
ef719a44 4433
06bc9e41 4434(define_insn "sse2_cvtsd2siq<round_name>"
d6023b50
UB
4435 [(set (match_operand:DI 0 "register_operand" "=r,r")
4436 (unspec:DI
4437 [(vec_select:DF
06bc9e41 4438 (match_operand:V2DF 1 "<round_nimm_predicate>" "v,<round_constraint2>")
d6023b50
UB
4439 (parallel [(const_int 0)]))]
4440 UNSPEC_FIX_NOTRUNC))]
4441 "TARGET_SSE2 && TARGET_64BIT"
06bc9e41 4442 "%vcvtsd2si{q}\t{<round_op2>%1, %0|%0, %q1<round_op2>}"
d6023b50
UB
4443 [(set_attr "type" "sseicvt")
4444 (set_attr "athlon_decode" "double,vector")
6a08ffca 4445 (set_attr "bdver1_decode" "double,double")
d6023b50 4446 (set_attr "prefix_rep" "1")
95879c72 4447 (set_attr "prefix" "maybe_vex")
d6023b50 4448 (set_attr "mode" "DI")])
ef719a44 4449
d6023b50
UB
4450(define_insn "sse2_cvtsd2siq_2"
4451 [(set (match_operand:DI 0 "register_operand" "=r,r")
3f97cb0b 4452 (unspec:DI [(match_operand:DF 1 "nonimmediate_operand" "v,m")]
d6023b50
UB
4453 UNSPEC_FIX_NOTRUNC))]
4454 "TARGET_SSE2 && TARGET_64BIT"
eabb5f48 4455 "%vcvtsd2si{q}\t{%1, %0|%0, %q1}"
d6023b50
UB
4456 [(set_attr "type" "sseicvt")
4457 (set_attr "athlon_decode" "double,vector")
4458 (set_attr "amdfam10_decode" "double,double")
6a08ffca 4459 (set_attr "bdver1_decode" "double,double")
d6023b50 4460 (set_attr "prefix_rep" "1")
95879c72 4461 (set_attr "prefix" "maybe_vex")
d6023b50 4462 (set_attr "mode" "DI")])
ef719a44 4463
8a6ef760 4464(define_insn "sse2_cvttsd2si<round_saeonly_name>"
d6023b50
UB
4465 [(set (match_operand:SI 0 "register_operand" "=r,r")
4466 (fix:SI
4467 (vec_select:DF
8a6ef760 4468 (match_operand:V2DF 1 "<round_saeonly_nimm_predicate>" "v,<round_saeonly_constraint2>")
d6023b50 4469 (parallel [(const_int 0)]))))]
ef719a44 4470 "TARGET_SSE2"
8a6ef760 4471 "%vcvttsd2si\t{<round_saeonly_op2>%1, %0|%0, %q1<round_saeonly_op2>}"
d6023b50 4472 [(set_attr "type" "sseicvt")
d6023b50 4473 (set_attr "athlon_decode" "double,vector")
6a08ffca 4474 (set_attr "amdfam10_decode" "double,double")
a95d4000 4475 (set_attr "bdver1_decode" "double,double")
01284895 4476 (set_attr "btver2_decode" "double,double")
a95d4000
UB
4477 (set_attr "prefix_rep" "1")
4478 (set_attr "prefix" "maybe_vex")
4479 (set_attr "mode" "SI")])
d6023b50 4480
8a6ef760 4481(define_insn "sse2_cvttsd2siq<round_saeonly_name>"
d6023b50
UB
4482 [(set (match_operand:DI 0 "register_operand" "=r,r")
4483 (fix:DI
4484 (vec_select:DF
8a6ef760 4485 (match_operand:V2DF 1 "<round_saeonly_nimm_predicate>" "v,<round_saeonly_constraint2>")
d6023b50
UB
4486 (parallel [(const_int 0)]))))]
4487 "TARGET_SSE2 && TARGET_64BIT"
8a6ef760 4488 "%vcvttsd2si{q}\t{<round_saeonly_op2>%1, %0|%0, %q1<round_saeonly_op2>}"
d6023b50 4489 [(set_attr "type" "sseicvt")
d6023b50 4490 (set_attr "athlon_decode" "double,vector")
6a08ffca 4491 (set_attr "amdfam10_decode" "double,double")
a95d4000
UB
4492 (set_attr "bdver1_decode" "double,double")
4493 (set_attr "prefix_rep" "1")
4494 (set_attr "prefix" "maybe_vex")
4495 (set_attr "mode" "DI")])
ef719a44 4496
ec5e777c
AI
4497;; For float<si2dfmode><mode>2 insn pattern
4498(define_mode_attr si2dfmode
4499 [(V8DF "V8SI") (V4DF "V4SI")])
4500(define_mode_attr si2dfmodelower
4501 [(V8DF "v8si") (V4DF "v4si")])
4502
47490470 4503(define_insn "float<si2dfmodelower><mode>2<mask_name>"
ec5e777c
AI
4504 [(set (match_operand:VF2_512_256 0 "register_operand" "=v")
4505 (float:VF2_512_256 (match_operand:<si2dfmode> 1 "nonimmediate_operand" "vm")))]
47490470
AI
4506 "TARGET_AVX && <mask_mode512bit_condition>"
4507 "vcvtdq2pd\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
95879c72 4508 [(set_attr "type" "ssecvt")
ec5e777c
AI
4509 (set_attr "prefix" "maybe_vex")
4510 (set_attr "mode" "<MODE>")])
95879c72 4511
39012b09
AI
4512(define_insn "<floatsuffix>float<sseintvecmodelower><mode>2<mask_name><round_name>"
4513 [(set (match_operand:VF2_AVX512VL 0 "register_operand" "=v")
4514 (any_float:VF2_AVX512VL
4515 (match_operand:<sseintvecmode> 1 "nonimmediate_operand" "vm")))]
4516 "TARGET_AVX512DQ"
4517 "vcvt<floatsuffix>qq2pd\t{<round_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_mask_op2>}"
4518 [(set_attr "type" "ssecvt")
4519 (set_attr "prefix" "evex")
4520 (set_attr "mode" "<MODE>")])
4521
4522;; For <floatsuffix>float<sselondveclower><mode> insn patterns
4523(define_mode_attr qq2pssuff
4524 [(V8SF "") (V4SF "{y}")])
4525
4526(define_mode_attr sselongvecmode
4527 [(V8SF "V8DI") (V4SF "V4DI")])
4528
4529(define_mode_attr sselongvecmodelower
4530 [(V8SF "v8di") (V4SF "v4di")])
4531
4532(define_mode_attr sseintvecmode3
4533 [(V8SF "XI") (V4SF "OI")
4534 (V8DF "OI") (V4DF "TI")])
4535
4536(define_insn "<floatsuffix>float<sselongvecmodelower><mode>2<mask_name><round_name>"
4537 [(set (match_operand:VF1_128_256VL 0 "register_operand" "=v")
4538 (any_float:VF1_128_256VL
4539 (match_operand:<sselongvecmode> 1 "nonimmediate_operand" "<round_constraint>")))]
4540 "TARGET_AVX512DQ && <round_modev8sf_condition>"
4541 "vcvt<floatsuffix>qq2ps<qq2pssuff>\t{<round_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_mask_op2>}"
4542 [(set_attr "type" "ssecvt")
4543 (set_attr "prefix" "evex")
4544 (set_attr "mode" "<MODE>")])
4545
4546(define_insn "*<floatsuffix>floatv2div2sf2"
4547 [(set (match_operand:V4SF 0 "register_operand" "=v")
4548 (vec_concat:V4SF
4549 (any_float:V2SF (match_operand:V2DI 1 "nonimmediate_operand" "vm"))
4550 (const_vector:V2SF [(const_int 0) (const_int 0)])))]
4551 "TARGET_AVX512DQ && TARGET_AVX512VL"
4552 "vcvt<floatsuffix>qq2ps{x}\t{%1, %0|%0, %1}"
4553 [(set_attr "type" "ssecvt")
4554 (set_attr "prefix" "evex")
4555 (set_attr "mode" "V4SF")])
4556
4557(define_insn "<floatsuffix>floatv2div2sf2_mask"
4558 [(set (match_operand:V4SF 0 "register_operand" "=v")
4559 (vec_concat:V4SF
4560 (vec_merge:V2SF
4561 (any_float:V2SF (match_operand:V2DI 1 "nonimmediate_operand" "vm"))
4562 (vec_select:V2SF
4563 (match_operand:V4SF 2 "vector_move_operand" "0C")
4564 (parallel [(const_int 0) (const_int 1)]))
4565 (match_operand:QI 3 "register_operand" "Yk"))
4566 (const_vector:V2SF [(const_int 0) (const_int 0)])))]
4567 "TARGET_AVX512DQ && TARGET_AVX512VL"
4568 "vcvt<floatsuffix>qq2ps{x}\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}"
4569 [(set_attr "type" "ssecvt")
4570 (set_attr "prefix" "evex")
4571 (set_attr "mode" "V4SF")])
4572
4573(define_insn "ufloat<si2dfmodelower><mode>2<mask_name>"
4574 [(set (match_operand:VF2_512_256VL 0 "register_operand" "=v")
4575 (unsigned_float:VF2_512_256VL
4576 (match_operand:<si2dfmode> 1 "nonimmediate_operand" "vm")))]
4577 "TARGET_AVX512F"
4578 "vcvtudq2pd\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
4579 [(set_attr "type" "ssecvt")
4580 (set_attr "prefix" "evex")
4581 (set_attr "mode" "<MODE>")])
4582
4583(define_insn "ufloatv2siv2df2<mask_name>"
4584 [(set (match_operand:V2DF 0 "register_operand" "=v")
4585 (unsigned_float:V2DF
4586 (vec_select:V2SI
4587 (match_operand:V4SI 1 "nonimmediate_operand" "vm")
4588 (parallel [(const_int 0) (const_int 1)]))))]
4589 "TARGET_AVX512VL"
47490470 4590 "vcvtudq2pd\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
c003c6d6
AI
4591 [(set_attr "type" "ssecvt")
4592 (set_attr "prefix" "evex")
39012b09 4593 (set_attr "mode" "V2DF")])
c003c6d6
AI
4594
4595(define_insn "avx512f_cvtdq2pd512_2"
4596 [(set (match_operand:V8DF 0 "register_operand" "=v")
4597 (float:V8DF
4598 (vec_select:V8SI
4599 (match_operand:V16SI 1 "nonimmediate_operand" "vm")
4600 (parallel [(const_int 0) (const_int 1)
4601 (const_int 2) (const_int 3)
4602 (const_int 4) (const_int 5)
4603 (const_int 6) (const_int 7)]))))]
42815c48 4604 "TARGET_AVX512F"
c003c6d6
AI
4605 "vcvtdq2pd\t{%t1, %0|%0, %t1}"
4606 [(set_attr "type" "ssecvt")
4607 (set_attr "prefix" "evex")
4608 (set_attr "mode" "V8DF")])
4609
4fbe3b8a 4610(define_insn "avx_cvtdq2pd256_2"
42815c48 4611 [(set (match_operand:V4DF 0 "register_operand" "=v")
1e27129f
L
4612 (float:V4DF
4613 (vec_select:V4SI
42815c48 4614 (match_operand:V8SI 1 "nonimmediate_operand" "vm")
a95d4000
UB
4615 (parallel [(const_int 0) (const_int 1)
4616 (const_int 2) (const_int 3)]))))]
1e27129f
L
4617 "TARGET_AVX"
4618 "vcvtdq2pd\t{%x1, %0|%0, %x1}"
4619 [(set_attr "type" "ssecvt")
42815c48 4620 (set_attr "prefix" "maybe_evex")
1e27129f
L
4621 (set_attr "mode" "V4DF")])
4622
42815c48
AI
4623(define_insn "sse2_cvtdq2pd<mask_name>"
4624 [(set (match_operand:V2DF 0 "register_operand" "=v")
d6023b50
UB
4625 (float:V2DF
4626 (vec_select:V2SI
42815c48 4627 (match_operand:V4SI 1 "nonimmediate_operand" "vm")
d6023b50 4628 (parallel [(const_int 0) (const_int 1)]))))]
42815c48
AI
4629 "TARGET_SSE2 && <mask_avx512vl_condition>"
4630 "%vcvtdq2pd\t{%1, %0<mask_operand2>|%0<mask_operand2>, %q1}"
d6023b50 4631 [(set_attr "type" "ssecvt")
95879c72 4632 (set_attr "prefix" "maybe_vex")
f220a4f4 4633 (set_attr "ssememalign" "64")
ef719a44
RH
4634 (set_attr "mode" "V2DF")])
4635
06bc9e41 4636(define_insn "<mask_codefor>avx512f_cvtpd2dq512<mask_name><round_name>"
c003c6d6 4637 [(set (match_operand:V8SI 0 "register_operand" "=v")
47490470 4638 (unspec:V8SI
06bc9e41 4639 [(match_operand:V8DF 1 "<round_nimm_predicate>" "<round_constraint>")]
47490470 4640 UNSPEC_FIX_NOTRUNC))]
c003c6d6 4641 "TARGET_AVX512F"
06bc9e41 4642 "vcvtpd2dq\t{<round_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_mask_op2>}"
c003c6d6
AI
4643 [(set_attr "type" "ssecvt")
4644 (set_attr "prefix" "evex")
4645 (set_attr "mode" "OI")])
4646
42815c48
AI
4647(define_insn "avx_cvtpd2dq256<mask_name>"
4648 [(set (match_operand:V4SI 0 "register_operand" "=v")
4649 (unspec:V4SI [(match_operand:V4DF 1 "nonimmediate_operand" "vm")]
95879c72 4650 UNSPEC_FIX_NOTRUNC))]
42815c48
AI
4651 "TARGET_AVX && <mask_avx512vl_condition>"
4652 "vcvtpd2dq{y}\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
95879c72 4653 [(set_attr "type" "ssecvt")
42815c48 4654 (set_attr "prefix" "<mask_prefix>")
95879c72
L
4655 (set_attr "mode" "OI")])
4656
1ee48839 4657(define_expand "avx_cvtpd2dq256_2"
82e86dc6 4658 [(set (match_operand:V8SI 0 "register_operand")
1ee48839 4659 (vec_concat:V8SI
82e86dc6 4660 (unspec:V4SI [(match_operand:V4DF 1 "nonimmediate_operand")]
1ee48839
JJ
4661 UNSPEC_FIX_NOTRUNC)
4662 (match_dup 2)))]
4663 "TARGET_AVX"
4664 "operands[2] = CONST0_RTX (V4SImode);")
4665
4666(define_insn "*avx_cvtpd2dq256_2"
4667 [(set (match_operand:V8SI 0 "register_operand" "=x")
4668 (vec_concat:V8SI
4669 (unspec:V4SI [(match_operand:V4DF 1 "nonimmediate_operand" "xm")]
4670 UNSPEC_FIX_NOTRUNC)
82e86dc6 4671 (match_operand:V4SI 2 "const0_operand")))]
1ee48839
JJ
4672 "TARGET_AVX"
4673 "vcvtpd2dq{y}\t{%1, %x0|%x0, %1}"
4674 [(set_attr "type" "ssecvt")
4675 (set_attr "prefix" "vex")
01284895 4676 (set_attr "btver2_decode" "vector")
1ee48839
JJ
4677 (set_attr "mode" "OI")])
4678
42815c48
AI
4679(define_insn "sse2_cvtpd2dq<mask_name>"
4680 [(set (match_operand:V4SI 0 "register_operand" "=v")
d6023b50 4681 (vec_concat:V4SI
42815c48 4682 (unspec:V2SI [(match_operand:V2DF 1 "nonimmediate_operand" "vm")]
d6023b50 4683 UNSPEC_FIX_NOTRUNC)
42815c48
AI
4684 (const_vector:V2SI [(const_int 0) (const_int 0)])))]
4685 "TARGET_SSE2 && <mask_avx512vl_condition>"
a95d4000
UB
4686{
4687 if (TARGET_AVX)
42815c48 4688 return "vcvtpd2dq{x}\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}";
a95d4000
UB
4689 else
4690 return "cvtpd2dq\t{%1, %0|%0, %1}";
4691}
d6023b50
UB
4692 [(set_attr "type" "ssecvt")
4693 (set_attr "prefix_rep" "1")
725fd454 4694 (set_attr "prefix_data16" "0")
95879c72 4695 (set_attr "prefix" "maybe_vex")
d6023b50 4696 (set_attr "mode" "TI")
6a08ffca 4697 (set_attr "amdfam10_decode" "double")
0b7e851b 4698 (set_attr "athlon_decode" "vector")
6a08ffca 4699 (set_attr "bdver1_decode" "double")])
ef719a44 4700
42815c48
AI
4701;; For ufix_notrunc* insn patterns
4702(define_mode_attr pd2udqsuff
4703 [(V8DF "") (V4DF "{y}")])
4704
4705(define_insn "ufix_notrunc<mode><si2dfmodelower>2<mask_name><round_name>"
4706 [(set (match_operand:<si2dfmode> 0 "register_operand" "=v")
4707 (unspec:<si2dfmode>
4708 [(match_operand:VF2_512_256VL 1 "nonimmediate_operand" "<round_constraint>")]
c003c6d6
AI
4709 UNSPEC_UNSIGNED_FIX_NOTRUNC))]
4710 "TARGET_AVX512F"
42815c48 4711 "vcvtpd2udq<pd2udqsuff>\t{<round_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_mask_op2>}"
c003c6d6
AI
4712 [(set_attr "type" "ssecvt")
4713 (set_attr "prefix" "evex")
42815c48
AI
4714 (set_attr "mode" "<sseinsnmode>")])
4715
4716(define_insn "ufix_notruncv2dfv2si2<mask_name>"
4717 [(set (match_operand:V4SI 0 "register_operand" "=v")
4718 (vec_concat:V4SI
4719 (unspec:V2SI
4720 [(match_operand:V2DF 1 "nonimmediate_operand" "vm")]
4721 UNSPEC_UNSIGNED_FIX_NOTRUNC)
4722 (const_vector:V2SI [(const_int 0) (const_int 0)])))]
4723 "TARGET_AVX512VL"
4724 "vcvtpd2udq{x}\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
4725 [(set_attr "type" "ssecvt")
4726 (set_attr "prefix" "evex")
4727 (set_attr "mode" "TI")])
c003c6d6 4728
8a6ef760 4729(define_insn "<fixsuffix>fix_truncv8dfv8si2<mask_name><round_saeonly_name>"
ec5e777c 4730 [(set (match_operand:V8SI 0 "register_operand" "=v")
47490470 4731 (any_fix:V8SI
8a6ef760 4732 (match_operand:V8DF 1 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")))]
ec5e777c 4733 "TARGET_AVX512F"
8a6ef760 4734 "vcvttpd2<fixsuffix>dq\t{<round_saeonly_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_saeonly_mask_op2>}"
ec5e777c
AI
4735 [(set_attr "type" "ssecvt")
4736 (set_attr "prefix" "evex")
4737 (set_attr "mode" "OI")])
4738
3bcf35e7
AI
4739(define_insn "ufix_truncv2dfv2si2<mask_name>"
4740 [(set (match_operand:V4SI 0 "register_operand" "=v")
4741 (vec_concat:V4SI
4742 (unsigned_fix:V2SI (match_operand:V2DF 1 "nonimmediate_operand" "vm"))
4743 (const_vector:V2SI [(const_int 0) (const_int 0)])))]
4744 "TARGET_AVX512VL"
4745 "vcvttpd2udq{x}\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
95879c72 4746 [(set_attr "type" "ssecvt")
3bcf35e7
AI
4747 (set_attr "prefix" "evex")
4748 (set_attr "mode" "TI")])
4749
4750(define_insn "fix_truncv4dfv4si2<mask_name>"
4751 [(set (match_operand:V4SI 0 "register_operand" "=v")
4752 (fix:V4SI (match_operand:V4DF 1 "nonimmediate_operand" "vm")))]
4753 "TARGET_AVX || (TARGET_AVX512VL && TARGET_AVX512F)"
4754 "vcvttpd2dq{y}\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
4755 [(set_attr "type" "ssecvt")
4756 (set_attr "prefix" "maybe_evex")
4757 (set_attr "mode" "OI")])
4758
4759(define_insn "ufix_truncv4dfv4si2<mask_name>"
4760 [(set (match_operand:V4SI 0 "register_operand" "=v")
4761 (unsigned_fix:V4SI (match_operand:V4DF 1 "nonimmediate_operand" "vm")))]
4762 "TARGET_AVX512VL && TARGET_AVX512F"
4763 "vcvttpd2udq{y}\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
4764 [(set_attr "type" "ssecvt")
4765 (set_attr "prefix" "maybe_evex")
95879c72
L
4766 (set_attr "mode" "OI")])
4767
3bcf35e7
AI
4768(define_insn "<fixsuffix>fix_trunc<mode><sseintvecmodelower>2<mask_name><round_saeonly_name>"
4769 [(set (match_operand:<sseintvecmode> 0 "register_operand" "=v")
4770 (any_fix:<sseintvecmode>
4771 (match_operand:VF2_AVX512VL 1 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")))]
4772 "TARGET_AVX512DQ && <round_saeonly_mode512bit_condition>"
4773 "vcvttpd2<fixsuffix>qq\t{<round_saeonly_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_saeonly_mask_op2>}"
4774 [(set_attr "type" "ssecvt")
4775 (set_attr "prefix" "evex")
4776 (set_attr "mode" "<sseintvecmode2>")])
4777
4778(define_insn "fix_notrunc<mode><sseintvecmodelower>2<mask_name><round_name>"
4779 [(set (match_operand:<sseintvecmode> 0 "register_operand" "=v")
4780 (unspec:<sseintvecmode>
4781 [(match_operand:VF2_AVX512VL 1 "<round_nimm_predicate>" "<round_constraint>")]
4782 UNSPEC_FIX_NOTRUNC))]
4783 "TARGET_AVX512DQ && <round_mode512bit_condition>"
4784 "vcvtpd2qq\t{<round_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_mask_op2>}"
4785 [(set_attr "type" "ssecvt")
4786 (set_attr "prefix" "evex")
4787 (set_attr "mode" "<sseintvecmode2>")])
4788
4789(define_insn "ufix_notrunc<mode><sseintvecmodelower>2<mask_name><round_name>"
4790 [(set (match_operand:<sseintvecmode> 0 "register_operand" "=v")
4791 (unspec:<sseintvecmode>
4792 [(match_operand:VF2_AVX512VL 1 "nonimmediate_operand" "<round_constraint>")]
4793 UNSPEC_UNSIGNED_FIX_NOTRUNC))]
4794 "TARGET_AVX512DQ && <round_mode512bit_condition>"
4795 "vcvtpd2uqq\t{<round_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_mask_op2>}"
4796 [(set_attr "type" "ssecvt")
4797 (set_attr "prefix" "evex")
4798 (set_attr "mode" "<sseintvecmode2>")])
4799
1bdf255a
AI
4800(define_insn "<fixsuffix>fix_trunc<mode><sselongvecmodelower>2<mask_name><round_saeonly_name>"
4801 [(set (match_operand:<sselongvecmode> 0 "register_operand" "=v")
4802 (any_fix:<sselongvecmode>
4803 (match_operand:VF1_128_256VL 1 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")))]
4804 "TARGET_AVX512DQ && <round_saeonly_modev8sf_condition>"
4805 "vcvttps2<fixsuffix>qq\t{<round_saeonly_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_saeonly_mask_op2>}"
4806 [(set_attr "type" "ssecvt")
4807 (set_attr "prefix" "evex")
4808 (set_attr "mode" "<sseintvecmode3>")])
4809
4810(define_insn "<fixsuffix>fix_truncv2sfv2di2<mask_name>"
4811 [(set (match_operand:V2DI 0 "register_operand" "=v")
4812 (any_fix:V2DI
4813 (vec_select:V2SF
4814 (match_operand:V4SF 1 "nonimmediate_operand" "vm")
4815 (parallel [(const_int 0) (const_int 1)]))))]
4816 "TARGET_AVX512DQ && TARGET_AVX512VL"
4817 "vcvttps2<fixsuffix>qq\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
4818 [(set_attr "type" "ssecvt")
4819 (set_attr "prefix" "evex")
4820 (set_attr "mode" "TI")])
4821
4822(define_insn "ufix_trunc<mode><sseintvecmodelower>2<mask_name>"
4823 [(set (match_operand:<sseintvecmode> 0 "register_operand" "=v")
4824 (unsigned_fix:<sseintvecmode>
4825 (match_operand:VF1_128_256VL 1 "nonimmediate_operand" "vm")))]
4826 "TARGET_AVX512VL"
4827 "vcvttps2udq\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
4828 [(set_attr "type" "ssecvt")
4829 (set_attr "prefix" "evex")
4830 (set_attr "mode" "<sseintvecmode2>")])
4831
1ee48839 4832(define_expand "avx_cvttpd2dq256_2"
82e86dc6 4833 [(set (match_operand:V8SI 0 "register_operand")
1ee48839 4834 (vec_concat:V8SI
82e86dc6 4835 (fix:V4SI (match_operand:V4DF 1 "nonimmediate_operand"))
1ee48839
JJ
4836 (match_dup 2)))]
4837 "TARGET_AVX"
4838 "operands[2] = CONST0_RTX (V4SImode);")
4839
42815c48
AI
4840(define_insn "sse2_cvttpd2dq<mask_name>"
4841 [(set (match_operand:V4SI 0 "register_operand" "=v")
d6023b50 4842 (vec_concat:V4SI
42815c48
AI
4843 (fix:V2SI (match_operand:V2DF 1 "nonimmediate_operand" "vm"))
4844 (const_vector:V2SI [(const_int 0) (const_int 0)])))]
4845 "TARGET_SSE2 && <mask_avx512vl_condition>"
a95d4000
UB
4846{
4847 if (TARGET_AVX)
42815c48 4848 return "vcvttpd2dq{x}\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}";
a95d4000
UB
4849 else
4850 return "cvttpd2dq\t{%1, %0|%0, %1}";
4851}
d6023b50 4852 [(set_attr "type" "ssecvt")
6a08ffca 4853 (set_attr "amdfam10_decode" "double")
0b7e851b 4854 (set_attr "athlon_decode" "vector")
a95d4000
UB
4855 (set_attr "bdver1_decode" "double")
4856 (set_attr "prefix" "maybe_vex")
4857 (set_attr "mode" "TI")])
95879c72 4858
075691af 4859(define_insn "sse2_cvtsd2ss<round_name>"
3f97cb0b 4860 [(set (match_operand:V4SF 0 "register_operand" "=x,x,v")
d6023b50
UB
4861 (vec_merge:V4SF
4862 (vec_duplicate:V4SF
4863 (float_truncate:V2SF
075691af 4864 (match_operand:V2DF 2 "nonimmediate_operand" "x,m,<round_constraint>")))
3f97cb0b 4865 (match_operand:V4SF 1 "register_operand" "0,0,v")
d6023b50
UB
4866 (const_int 1)))]
4867 "TARGET_SSE2"
a95d4000
UB
4868 "@
4869 cvtsd2ss\t{%2, %0|%0, %2}
eabb5f48 4870 cvtsd2ss\t{%2, %0|%0, %q2}
075691af 4871 vcvtsd2ss\t{<round_op3>%2, %1, %0|%0, %1, %q2<round_op3>}"
a95d4000
UB
4872 [(set_attr "isa" "noavx,noavx,avx")
4873 (set_attr "type" "ssecvt")
4874 (set_attr "athlon_decode" "vector,double,*")
4875 (set_attr "amdfam10_decode" "vector,double,*")
4876 (set_attr "bdver1_decode" "direct,direct,*")
01284895 4877 (set_attr "btver2_decode" "double,double,double")
075691af 4878 (set_attr "prefix" "orig,orig,<round_prefix>")
d6023b50 4879 (set_attr "mode" "SF")])
115a33c2 4880
075691af 4881(define_insn "sse2_cvtss2sd<round_saeonly_name>"
3f97cb0b 4882 [(set (match_operand:V2DF 0 "register_operand" "=x,x,v")
115a33c2 4883 (vec_merge:V2DF
d6023b50
UB
4884 (float_extend:V2DF
4885 (vec_select:V2SF
1bdf255a 4886 (match_operand:V4SF 2 "<round_saeonly_nimm_predicate>" "x,m,<round_saeonly_constraint>")
d6023b50 4887 (parallel [(const_int 0) (const_int 1)])))
3f97cb0b 4888 (match_operand:V2DF 1 "register_operand" "0,0,v")
115a33c2
RH
4889 (const_int 1)))]
4890 "TARGET_SSE2"
a95d4000
UB
4891 "@
4892 cvtss2sd\t{%2, %0|%0, %2}
eabb5f48 4893 cvtss2sd\t{%2, %0|%0, %k2}
075691af 4894 vcvtss2sd\t{<round_saeonly_op3>%2, %1, %0|%0, %1, %k2<round_saeonly_op3>}"
a95d4000
UB
4895 [(set_attr "isa" "noavx,noavx,avx")
4896 (set_attr "type" "ssecvt")
4897 (set_attr "amdfam10_decode" "vector,double,*")
4898 (set_attr "athlon_decode" "direct,direct,*")
4899 (set_attr "bdver1_decode" "direct,direct,*")
01284895 4900 (set_attr "btver2_decode" "double,double,double")
075691af 4901 (set_attr "prefix" "orig,orig,<round_saeonly_prefix>")
ef719a44
RH
4902 (set_attr "mode" "DF")])
4903
06bc9e41 4904(define_insn "<mask_codefor>avx512f_cvtpd2ps512<mask_name><round_name>"
c003c6d6
AI
4905 [(set (match_operand:V8SF 0 "register_operand" "=v")
4906 (float_truncate:V8SF
06bc9e41 4907 (match_operand:V8DF 1 "<round_nimm_predicate>" "<round_constraint>")))]
c003c6d6 4908 "TARGET_AVX512F"
06bc9e41 4909 "vcvtpd2ps\t{<round_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_mask_op2>}"
c003c6d6
AI
4910 [(set_attr "type" "ssecvt")
4911 (set_attr "prefix" "evex")
4912 (set_attr "mode" "V8SF")])
4913
1bdf255a
AI
4914(define_insn "avx_cvtpd2ps256<mask_name>"
4915 [(set (match_operand:V4SF 0 "register_operand" "=v")
95879c72 4916 (float_truncate:V4SF
1bdf255a
AI
4917 (match_operand:V4DF 1 "nonimmediate_operand" "vm")))]
4918 "TARGET_AVX && <mask_avx512vl_condition>"
4919 "vcvtpd2ps{y}\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
95879c72 4920 [(set_attr "type" "ssecvt")
1bdf255a 4921 (set_attr "prefix" "maybe_evex")
01284895 4922 (set_attr "btver2_decode" "vector")
95879c72
L
4923 (set_attr "mode" "V4SF")])
4924
d6023b50 4925(define_expand "sse2_cvtpd2ps"
82e86dc6 4926 [(set (match_operand:V4SF 0 "register_operand")
d6023b50
UB
4927 (vec_concat:V4SF
4928 (float_truncate:V2SF
82e86dc6 4929 (match_operand:V2DF 1 "nonimmediate_operand"))
d6023b50 4930 (match_dup 2)))]
ef719a44 4931 "TARGET_SSE2"
d6023b50 4932 "operands[2] = CONST0_RTX (V2SFmode);")
ef719a44 4933
1bdf255a
AI
4934(define_expand "sse2_cvtpd2ps_mask"
4935 [(set (match_operand:V4SF 0 "register_operand")
4936 (vec_merge:V4SF
4937 (vec_concat:V4SF
4938 (float_truncate:V2SF
4939 (match_operand:V2DF 1 "nonimmediate_operand"))
4940 (match_dup 4))
4941 (match_operand:V4SF 2 "register_operand")
4942 (match_operand:QI 3 "register_operand")))]
4943 "TARGET_SSE2"
4944 "operands[4] = CONST0_RTX (V2SFmode);")
4945
4946(define_insn "*sse2_cvtpd2ps<mask_name>"
4947 [(set (match_operand:V4SF 0 "register_operand" "=v")
d6023b50
UB
4948 (vec_concat:V4SF
4949 (float_truncate:V2SF
1bdf255a 4950 (match_operand:V2DF 1 "nonimmediate_operand" "vm"))
82e86dc6 4951 (match_operand:V2SF 2 "const0_operand")))]
1bdf255a 4952 "TARGET_SSE2 && <mask_avx512vl_condition>"
a95d4000
UB
4953{
4954 if (TARGET_AVX)
1bdf255a 4955 return "vcvtpd2ps{x}\t{%1, %0<mask_operand3>|%0<mask_operand3>, %1}";
a95d4000
UB
4956 else
4957 return "cvtpd2ps\t{%1, %0|%0, %1}";
4958}
d6023b50 4959 [(set_attr "type" "ssecvt")
6a08ffca 4960 (set_attr "amdfam10_decode" "double")
0b7e851b 4961 (set_attr "athlon_decode" "vector")
a95d4000
UB
4962 (set_attr "bdver1_decode" "double")
4963 (set_attr "prefix_data16" "1")
4964 (set_attr "prefix" "maybe_vex")
4965 (set_attr "mode" "V4SF")])
115a33c2 4966
ec5e777c
AI
4967;; For <sse2_avx_avx512f>_cvtps2pd<avxsizesuffix> insn pattern
4968(define_mode_attr sf2dfmode
4969 [(V8DF "V8SF") (V4DF "V4SF")])
4970
8a6ef760 4971(define_insn "<sse2_avx_avx512f>_cvtps2pd<avxsizesuffix><mask_name><round_saeonly_name>"
ec5e777c
AI
4972 [(set (match_operand:VF2_512_256 0 "register_operand" "=v")
4973 (float_extend:VF2_512_256
8a6ef760
AI
4974 (match_operand:<sf2dfmode> 1 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")))]
4975 "TARGET_AVX && <mask_mode512bit_condition> && <round_saeonly_mode512bit_condition>"
4976 "vcvtps2pd\t{<round_saeonly_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_saeonly_mask_op2>}"
95879c72 4977 [(set_attr "type" "ssecvt")
ec5e777c
AI
4978 (set_attr "prefix" "maybe_vex")
4979 (set_attr "mode" "<MODE>")])
95879c72 4980
1e27129f
L
4981(define_insn "*avx_cvtps2pd256_2"
4982 [(set (match_operand:V4DF 0 "register_operand" "=x")
4983 (float_extend:V4DF
4984 (vec_select:V4SF
4985 (match_operand:V8SF 1 "nonimmediate_operand" "xm")
a95d4000
UB
4986 (parallel [(const_int 0) (const_int 1)
4987 (const_int 2) (const_int 3)]))))]
1e27129f
L
4988 "TARGET_AVX"
4989 "vcvtps2pd\t{%x1, %0|%0, %x1}"
4990 [(set_attr "type" "ssecvt")
4991 (set_attr "prefix" "vex")
4992 (set_attr "mode" "V4DF")])
4993
c003c6d6
AI
4994(define_insn "vec_unpacks_lo_v16sf"
4995 [(set (match_operand:V8DF 0 "register_operand" "=v")
4996 (float_extend:V8DF
4997 (vec_select:V8SF
4998 (match_operand:V16SF 1 "nonimmediate_operand" "vm")
4999 (parallel [(const_int 0) (const_int 1)
5000 (const_int 2) (const_int 3)
5001 (const_int 4) (const_int 5)
5002 (const_int 6) (const_int 7)]))))]
5003 "TARGET_AVX512F"
5004 "vcvtps2pd\t{%t1, %0|%0, %t1}"
5005 [(set_attr "type" "ssecvt")
5006 (set_attr "prefix" "evex")
5007 (set_attr "mode" "V8DF")])
5008
2be4091a
AI
5009(define_insn "<avx512>_cvt<ssemodesuffix>2mask<mode>"
5010 [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=Yk")
5011 (unspec:<avx512fmaskmode>
5012 [(match_operand:VI12_AVX512VL 1 "register_operand" "v")]
5013 UNSPEC_CVTINT2MASK))]
5014 "TARGET_AVX512BW"
5015 "vpmov<ssemodesuffix>2m\t{%1, %0|%0, %1}"
5016 [(set_attr "prefix" "evex")
5017 (set_attr "mode" "<sseinsnmode>")])
5018
5019(define_insn "<avx512>_cvt<ssemodesuffix>2mask<mode>"
5020 [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=Yk")
5021 (unspec:<avx512fmaskmode>
5022 [(match_operand:VI48_AVX512VL 1 "register_operand" "v")]
5023 UNSPEC_CVTINT2MASK))]
5024 "TARGET_AVX512DQ"
5025 "vpmov<ssemodesuffix>2m\t{%1, %0|%0, %1}"
5026 [(set_attr "prefix" "evex")
5027 (set_attr "mode" "<sseinsnmode>")])
5028
5029(define_expand "<avx512>_cvtmask2<ssemodesuffix><mode>"
5030 [(set (match_operand:VI12_AVX512VL 0 "register_operand")
5031 (vec_merge:VI12_AVX512VL
5032 (match_dup 2)
5033 (match_dup 3)
5034 (match_operand:<avx512fmaskmode> 1 "register_operand")))]
5035 "TARGET_AVX512BW"
5036 {
5037 operands[2] = CONSTM1_RTX (<MODE>mode);
5038 operands[3] = CONST0_RTX (<MODE>mode);
5039 })
5040
5041(define_insn "*<avx512>_cvtmask2<ssemodesuffix><mode>"
5042 [(set (match_operand:VI12_AVX512VL 0 "register_operand" "=v")
5043 (vec_merge:VI12_AVX512VL
5044 (match_operand:VI12_AVX512VL 2 "constm1_operand")
5045 (match_operand:VI12_AVX512VL 3 "const0_operand")
5046 (match_operand:<avx512fmaskmode> 1 "register_operand" "Yk")))]
5047 "TARGET_AVX512BW"
5048 "vpmovm2<ssemodesuffix>\t{%1, %0|%0, %1}"
5049 [(set_attr "prefix" "evex")
5050 (set_attr "mode" "<sseinsnmode>")])
5051
5052(define_expand "<avx512>_cvtmask2<ssemodesuffix><mode>"
5053 [(set (match_operand:VI48_AVX512VL 0 "register_operand")
5054 (vec_merge:VI48_AVX512VL
5055 (match_dup 2)
5056 (match_dup 3)
5057 (match_operand:<avx512fmaskmode> 1 "register_operand")))]
5058 "TARGET_AVX512DQ"
5059 "{
5060 operands[2] = CONSTM1_RTX (<MODE>mode);
5061 operands[3] = CONST0_RTX (<MODE>mode);
5062 }")
5063
5064(define_insn "*<avx512>_cvtmask2<ssemodesuffix><mode>"
5065 [(set (match_operand:VI48_AVX512VL 0 "register_operand" "=v")
5066 (vec_merge:VI48_AVX512VL
5067 (match_operand:VI48_AVX512VL 2 "constm1_operand")
5068 (match_operand:VI48_AVX512VL 3 "const0_operand")
5069 (match_operand:<avx512fmaskmode> 1 "register_operand" "Yk")))]
5070 "TARGET_AVX512DQ"
5071 "vpmovm2<ssemodesuffix>\t{%1, %0|%0, %1}"
5072 [(set_attr "prefix" "evex")
5073 (set_attr "mode" "<sseinsnmode>")])
5074
1bdf255a
AI
5075(define_insn "sse2_cvtps2pd<mask_name>"
5076 [(set (match_operand:V2DF 0 "register_operand" "=v")
d6023b50
UB
5077 (float_extend:V2DF
5078 (vec_select:V2SF
1bdf255a 5079 (match_operand:V4SF 1 "nonimmediate_operand" "vm")
d6023b50 5080 (parallel [(const_int 0) (const_int 1)]))))]
1bdf255a
AI
5081 "TARGET_SSE2 && <mask_avx512vl_condition>"
5082 "%vcvtps2pd\t{%1, %0<mask_operand2>|%0<mask_operand2>, %q1}"
d6023b50 5083 [(set_attr "type" "ssecvt")
6a08ffca 5084 (set_attr "amdfam10_decode" "direct")
0b7e851b 5085 (set_attr "athlon_decode" "double")
a95d4000
UB
5086 (set_attr "bdver1_decode" "double")
5087 (set_attr "prefix_data16" "0")
5088 (set_attr "prefix" "maybe_vex")
5089 (set_attr "mode" "V2DF")])
ef719a44 5090
d6023b50
UB
5091(define_expand "vec_unpacks_hi_v4sf"
5092 [(set (match_dup 2)
5093 (vec_select:V4SF
5094 (vec_concat:V8SF
5095 (match_dup 2)
82e86dc6 5096 (match_operand:V4SF 1 "nonimmediate_operand"))
a95d4000
UB
5097 (parallel [(const_int 6) (const_int 7)
5098 (const_int 2) (const_int 3)])))
82e86dc6 5099 (set (match_operand:V2DF 0 "register_operand")
d6023b50
UB
5100 (float_extend:V2DF
5101 (vec_select:V2SF
5102 (match_dup 2)
5103 (parallel [(const_int 0) (const_int 1)]))))]
a427621f
UB
5104 "TARGET_SSE2"
5105 "operands[2] = gen_reg_rtx (V4SFmode);")
e20524fa 5106
1e27129f
L
5107(define_expand "vec_unpacks_hi_v8sf"
5108 [(set (match_dup 2)
5109 (vec_select:V4SF
110b7886 5110 (match_operand:V8SF 1 "register_operand")
a95d4000
UB
5111 (parallel [(const_int 4) (const_int 5)
5112 (const_int 6) (const_int 7)])))
82e86dc6 5113 (set (match_operand:V4DF 0 "register_operand")
1e27129f
L
5114 (float_extend:V4DF
5115 (match_dup 2)))]
5116 "TARGET_AVX"
a95d4000 5117 "operands[2] = gen_reg_rtx (V4SFmode);")
1e27129f 5118
c003c6d6
AI
5119(define_expand "vec_unpacks_hi_v16sf"
5120 [(set (match_dup 2)
5121 (vec_select:V8SF
110b7886 5122 (match_operand:V16SF 1 "register_operand")
c003c6d6
AI
5123 (parallel [(const_int 8) (const_int 9)
5124 (const_int 10) (const_int 11)
5125 (const_int 12) (const_int 13)
5126 (const_int 14) (const_int 15)])))
5127 (set (match_operand:V8DF 0 "register_operand")
5128 (float_extend:V8DF
5129 (match_dup 2)))]
5130"TARGET_AVX512F"
5131"operands[2] = gen_reg_rtx (V8SFmode);")
5132
d6023b50 5133(define_expand "vec_unpacks_lo_v4sf"
82e86dc6 5134 [(set (match_operand:V2DF 0 "register_operand")
d6023b50
UB
5135 (float_extend:V2DF
5136 (vec_select:V2SF
82e86dc6 5137 (match_operand:V4SF 1 "nonimmediate_operand")
d6023b50
UB
5138 (parallel [(const_int 0) (const_int 1)]))))]
5139 "TARGET_SSE2")
ef719a44 5140
1e27129f 5141(define_expand "vec_unpacks_lo_v8sf"
82e86dc6 5142 [(set (match_operand:V4DF 0 "register_operand")
1e27129f
L
5143 (float_extend:V4DF
5144 (vec_select:V4SF
82e86dc6 5145 (match_operand:V8SF 1 "nonimmediate_operand")
a95d4000
UB
5146 (parallel [(const_int 0) (const_int 1)
5147 (const_int 2) (const_int 3)]))))]
1e27129f
L
5148 "TARGET_AVX")
5149
4fbe3b8a 5150(define_mode_attr sseunpackfltmode
3bdf6340
AI
5151 [(V8HI "V4SF") (V4SI "V2DF") (V16HI "V8SF")
5152 (V8SI "V4DF") (V32HI "V16SF") (V16SI "V8DF")])
4fbe3b8a
JJ
5153
5154(define_expand "vec_unpacks_float_hi_<mode>"
82e86dc6 5155 [(match_operand:<sseunpackfltmode> 0 "register_operand")
3bdf6340 5156 (match_operand:VI2_AVX512F 1 "register_operand")]
ef719a44 5157 "TARGET_SSE2"
d6023b50 5158{
4fbe3b8a 5159 rtx tmp = gen_reg_rtx (<sseunpackmode>mode);
ef719a44 5160
4fbe3b8a 5161 emit_insn (gen_vec_unpacks_hi_<mode> (tmp, operands[1]));
f7df4a84 5162 emit_insn (gen_rtx_SET (operands[0],
4fbe3b8a 5163 gen_rtx_FLOAT (<sseunpackfltmode>mode, tmp)));
d6023b50
UB
5164 DONE;
5165})
ef719a44 5166
4fbe3b8a 5167(define_expand "vec_unpacks_float_lo_<mode>"
82e86dc6 5168 [(match_operand:<sseunpackfltmode> 0 "register_operand")
3bdf6340 5169 (match_operand:VI2_AVX512F 1 "register_operand")]
ae46a07a
RH
5170 "TARGET_SSE2"
5171{
4fbe3b8a 5172 rtx tmp = gen_reg_rtx (<sseunpackmode>mode);
ae46a07a 5173
4fbe3b8a 5174 emit_insn (gen_vec_unpacks_lo_<mode> (tmp, operands[1]));
f7df4a84 5175 emit_insn (gen_rtx_SET (operands[0],
4fbe3b8a 5176 gen_rtx_FLOAT (<sseunpackfltmode>mode, tmp)));
d6023b50
UB
5177 DONE;
5178})
ef719a44 5179
4fbe3b8a 5180(define_expand "vec_unpacku_float_hi_<mode>"
82e86dc6 5181 [(match_operand:<sseunpackfltmode> 0 "register_operand")
3bdf6340 5182 (match_operand:VI2_AVX512F 1 "register_operand")]
ef719a44 5183 "TARGET_SSE2"
d6023b50 5184{
4fbe3b8a 5185 rtx tmp = gen_reg_rtx (<sseunpackmode>mode);
ef719a44 5186
4fbe3b8a 5187 emit_insn (gen_vec_unpacku_hi_<mode> (tmp, operands[1]));
f7df4a84 5188 emit_insn (gen_rtx_SET (operands[0],
4fbe3b8a 5189 gen_rtx_FLOAT (<sseunpackfltmode>mode, tmp)));
d6023b50
UB
5190 DONE;
5191})
ef719a44 5192
4fbe3b8a 5193(define_expand "vec_unpacku_float_lo_<mode>"
82e86dc6 5194 [(match_operand:<sseunpackfltmode> 0 "register_operand")
3bdf6340 5195 (match_operand:VI2_AVX512F 1 "register_operand")]
ef719a44 5196 "TARGET_SSE2"
d6023b50 5197{
4fbe3b8a 5198 rtx tmp = gen_reg_rtx (<sseunpackmode>mode);
ef719a44 5199
4fbe3b8a 5200 emit_insn (gen_vec_unpacku_lo_<mode> (tmp, operands[1]));
f7df4a84 5201 emit_insn (gen_rtx_SET (operands[0],
4fbe3b8a 5202 gen_rtx_FLOAT (<sseunpackfltmode>mode, tmp)));
d6023b50
UB
5203 DONE;
5204})
ef719a44 5205
d6023b50
UB
5206(define_expand "vec_unpacks_float_hi_v4si"
5207 [(set (match_dup 2)
5208 (vec_select:V4SI
82e86dc6 5209 (match_operand:V4SI 1 "nonimmediate_operand")
a95d4000
UB
5210 (parallel [(const_int 2) (const_int 3)
5211 (const_int 2) (const_int 3)])))
82e86dc6 5212 (set (match_operand:V2DF 0 "register_operand")
977e83a3 5213 (float:V2DF
d6023b50
UB
5214 (vec_select:V2SI
5215 (match_dup 2)
5216 (parallel [(const_int 0) (const_int 1)]))))]
a427621f
UB
5217 "TARGET_SSE2"
5218 "operands[2] = gen_reg_rtx (V4SImode);")
ef719a44 5219
d6023b50 5220(define_expand "vec_unpacks_float_lo_v4si"
82e86dc6 5221 [(set (match_operand:V2DF 0 "register_operand")
d6023b50
UB
5222 (float:V2DF
5223 (vec_select:V2SI
82e86dc6 5224 (match_operand:V4SI 1 "nonimmediate_operand")
d6023b50
UB
5225 (parallel [(const_int 0) (const_int 1)]))))]
5226 "TARGET_SSE2")
5227
1e27129f
L
5228(define_expand "vec_unpacks_float_hi_v8si"
5229 [(set (match_dup 2)
5230 (vec_select:V4SI
82e86dc6 5231 (match_operand:V8SI 1 "nonimmediate_operand")
a95d4000
UB
5232 (parallel [(const_int 4) (const_int 5)
5233 (const_int 6) (const_int 7)])))
82e86dc6 5234 (set (match_operand:V4DF 0 "register_operand")
977e83a3 5235 (float:V4DF
1e27129f
L
5236 (match_dup 2)))]
5237 "TARGET_AVX"
5238 "operands[2] = gen_reg_rtx (V4SImode);")
5239
5240(define_expand "vec_unpacks_float_lo_v8si"
82e86dc6 5241 [(set (match_operand:V4DF 0 "register_operand")
1e27129f
L
5242 (float:V4DF
5243 (vec_select:V4SI
82e86dc6 5244 (match_operand:V8SI 1 "nonimmediate_operand")
a95d4000
UB
5245 (parallel [(const_int 0) (const_int 1)
5246 (const_int 2) (const_int 3)]))))]
1e27129f
L
5247 "TARGET_AVX")
5248
c003c6d6
AI
5249(define_expand "vec_unpacks_float_hi_v16si"
5250 [(set (match_dup 2)
5251 (vec_select:V8SI
5252 (match_operand:V16SI 1 "nonimmediate_operand")
5253 (parallel [(const_int 8) (const_int 9)
5254 (const_int 10) (const_int 11)
5255 (const_int 12) (const_int 13)
5256 (const_int 14) (const_int 15)])))
5257 (set (match_operand:V8DF 0 "register_operand")
5258 (float:V8DF
5259 (match_dup 2)))]
5260 "TARGET_AVX512F"
5261 "operands[2] = gen_reg_rtx (V8SImode);")
5262
5263(define_expand "vec_unpacks_float_lo_v16si"
5264 [(set (match_operand:V8DF 0 "register_operand")
5265 (float:V8DF
5266 (vec_select:V8SI
5267 (match_operand:V16SI 1 "nonimmediate_operand")
5268 (parallel [(const_int 0) (const_int 1)
5269 (const_int 2) (const_int 3)
5270 (const_int 4) (const_int 5)
5271 (const_int 6) (const_int 7)]))))]
5272 "TARGET_AVX512F")
5273
848e6317
UB
5274(define_expand "vec_unpacku_float_hi_v4si"
5275 [(set (match_dup 5)
5276 (vec_select:V4SI
82e86dc6 5277 (match_operand:V4SI 1 "nonimmediate_operand")
a95d4000
UB
5278 (parallel [(const_int 2) (const_int 3)
5279 (const_int 2) (const_int 3)])))
848e6317 5280 (set (match_dup 6)
977e83a3 5281 (float:V2DF
848e6317
UB
5282 (vec_select:V2SI
5283 (match_dup 5)
5284 (parallel [(const_int 0) (const_int 1)]))))
5285 (set (match_dup 7)
5286 (lt:V2DF (match_dup 6) (match_dup 3)))
5287 (set (match_dup 8)
5288 (and:V2DF (match_dup 7) (match_dup 4)))
82e86dc6 5289 (set (match_operand:V2DF 0 "register_operand")
848e6317 5290 (plus:V2DF (match_dup 6) (match_dup 8)))]
a427621f 5291 "TARGET_SSE2"
848e6317
UB
5292{
5293 REAL_VALUE_TYPE TWO32r;
5294 rtx x;
5295 int i;
5296
5297 real_ldexp (&TWO32r, &dconst1, 32);
5298 x = const_double_from_real_value (TWO32r, DFmode);
5299
5300 operands[3] = force_reg (V2DFmode, CONST0_RTX (V2DFmode));
1e27129f
L
5301 operands[4] = force_reg (V2DFmode,
5302 ix86_build_const_vector (V2DFmode, 1, x));
848e6317
UB
5303
5304 operands[5] = gen_reg_rtx (V4SImode);
6cf9eb27 5305
848e6317
UB
5306 for (i = 6; i < 9; i++)
5307 operands[i] = gen_reg_rtx (V2DFmode);
5308})
5309
5310(define_expand "vec_unpacku_float_lo_v4si"
5311 [(set (match_dup 5)
5312 (float:V2DF
5313 (vec_select:V2SI
82e86dc6 5314 (match_operand:V4SI 1 "nonimmediate_operand")
848e6317
UB
5315 (parallel [(const_int 0) (const_int 1)]))))
5316 (set (match_dup 6)
5317 (lt:V2DF (match_dup 5) (match_dup 3)))
5318 (set (match_dup 7)
5319 (and:V2DF (match_dup 6) (match_dup 4)))
82e86dc6 5320 (set (match_operand:V2DF 0 "register_operand")
848e6317
UB
5321 (plus:V2DF (match_dup 5) (match_dup 7)))]
5322 "TARGET_SSE2"
5323{
5324 REAL_VALUE_TYPE TWO32r;
5325 rtx x;
5326 int i;
5327
5328 real_ldexp (&TWO32r, &dconst1, 32);
5329 x = const_double_from_real_value (TWO32r, DFmode);
5330
5331 operands[3] = force_reg (V2DFmode, CONST0_RTX (V2DFmode));
1e27129f
L
5332 operands[4] = force_reg (V2DFmode,
5333 ix86_build_const_vector (V2DFmode, 1, x));
848e6317
UB
5334
5335 for (i = 5; i < 8; i++)
5336 operands[i] = gen_reg_rtx (V2DFmode);
5337})
5338
4fbe3b8a 5339(define_expand "vec_unpacku_float_hi_v8si"
82e86dc6
UB
5340 [(match_operand:V4DF 0 "register_operand")
5341 (match_operand:V8SI 1 "register_operand")]
4fbe3b8a
JJ
5342 "TARGET_AVX"
5343{
5344 REAL_VALUE_TYPE TWO32r;
5345 rtx x, tmp[6];
5346 int i;
5347
5348 real_ldexp (&TWO32r, &dconst1, 32);
5349 x = const_double_from_real_value (TWO32r, DFmode);
5350
5351 tmp[0] = force_reg (V4DFmode, CONST0_RTX (V4DFmode));
5352 tmp[1] = force_reg (V4DFmode, ix86_build_const_vector (V4DFmode, 1, x));
5353 tmp[5] = gen_reg_rtx (V4SImode);
5354
5355 for (i = 2; i < 5; i++)
5356 tmp[i] = gen_reg_rtx (V4DFmode);
5357 emit_insn (gen_vec_extract_hi_v8si (tmp[5], operands[1]));
788a2908 5358 emit_insn (gen_floatv4siv4df2 (tmp[2], tmp[5]));
f7df4a84 5359 emit_insn (gen_rtx_SET (tmp[3], gen_rtx_LT (V4DFmode, tmp[2], tmp[0])));
4fbe3b8a
JJ
5360 emit_insn (gen_andv4df3 (tmp[4], tmp[3], tmp[1]));
5361 emit_insn (gen_addv4df3 (operands[0], tmp[2], tmp[4]));
5362 DONE;
5363})
5364
d3c2fee0
AI
5365(define_expand "vec_unpacku_float_hi_v16si"
5366 [(match_operand:V8DF 0 "register_operand")
5367 (match_operand:V16SI 1 "register_operand")]
5368 "TARGET_AVX512F"
5369{
5370 REAL_VALUE_TYPE TWO32r;
5371 rtx k, x, tmp[4];
5372
5373 real_ldexp (&TWO32r, &dconst1, 32);
5374 x = const_double_from_real_value (TWO32r, DFmode);
5375
5376 tmp[0] = force_reg (V8DFmode, CONST0_RTX (V8DFmode));
5377 tmp[1] = force_reg (V8DFmode, ix86_build_const_vector (V8DFmode, 1, x));
5378 tmp[2] = gen_reg_rtx (V8DFmode);
5379 tmp[3] = gen_reg_rtx (V8SImode);
5380 k = gen_reg_rtx (QImode);
5381
5382 emit_insn (gen_vec_extract_hi_v16si (tmp[3], operands[1]));
5383 emit_insn (gen_floatv8siv8df2 (tmp[2], tmp[3]));
f7df4a84 5384 emit_insn (gen_rtx_SET (k, gen_rtx_LT (QImode, tmp[2], tmp[0])));
d3c2fee0
AI
5385 emit_insn (gen_addv8df3_mask (tmp[2], tmp[2], tmp[1], tmp[2], k));
5386 emit_move_insn (operands[0], tmp[2]);
5387 DONE;
5388})
5389
4fbe3b8a 5390(define_expand "vec_unpacku_float_lo_v8si"
82e86dc6
UB
5391 [(match_operand:V4DF 0 "register_operand")
5392 (match_operand:V8SI 1 "nonimmediate_operand")]
4fbe3b8a
JJ
5393 "TARGET_AVX"
5394{
5395 REAL_VALUE_TYPE TWO32r;
5396 rtx x, tmp[5];
5397 int i;
5398
5399 real_ldexp (&TWO32r, &dconst1, 32);
5400 x = const_double_from_real_value (TWO32r, DFmode);
5401
5402 tmp[0] = force_reg (V4DFmode, CONST0_RTX (V4DFmode));
5403 tmp[1] = force_reg (V4DFmode, ix86_build_const_vector (V4DFmode, 1, x));
5404
5405 for (i = 2; i < 5; i++)
5406 tmp[i] = gen_reg_rtx (V4DFmode);
5407 emit_insn (gen_avx_cvtdq2pd256_2 (tmp[2], operands[1]));
f7df4a84 5408 emit_insn (gen_rtx_SET (tmp[3], gen_rtx_LT (V4DFmode, tmp[2], tmp[0])));
4fbe3b8a
JJ
5409 emit_insn (gen_andv4df3 (tmp[4], tmp[3], tmp[1]));
5410 emit_insn (gen_addv4df3 (operands[0], tmp[2], tmp[4]));
5411 DONE;
5412})
5413
47490470
AI
5414(define_expand "vec_unpacku_float_lo_v16si"
5415 [(match_operand:V8DF 0 "register_operand")
5416 (match_operand:V16SI 1 "nonimmediate_operand")]
5417 "TARGET_AVX512F"
5418{
5419 REAL_VALUE_TYPE TWO32r;
5420 rtx k, x, tmp[3];
5421
5422 real_ldexp (&TWO32r, &dconst1, 32);
5423 x = const_double_from_real_value (TWO32r, DFmode);
5424
5425 tmp[0] = force_reg (V8DFmode, CONST0_RTX (V8DFmode));
5426 tmp[1] = force_reg (V8DFmode, ix86_build_const_vector (V8DFmode, 1, x));
5427 tmp[2] = gen_reg_rtx (V8DFmode);
5428 k = gen_reg_rtx (QImode);
5429
5430 emit_insn (gen_avx512f_cvtdq2pd512_2 (tmp[2], operands[1]));
f7df4a84 5431 emit_insn (gen_rtx_SET (k, gen_rtx_LT (QImode, tmp[2], tmp[0])));
47490470
AI
5432 emit_insn (gen_addv8df3_mask (tmp[2], tmp[2], tmp[1], tmp[2], k));
5433 emit_move_insn (operands[0], tmp[2]);
5434 DONE;
5435})
5436
ec5e777c 5437(define_expand "vec_pack_trunc_<mode>"
1e27129f 5438 [(set (match_dup 3)
ec5e777c
AI
5439 (float_truncate:<sf2dfmode>
5440 (match_operand:VF2_512_256 1 "nonimmediate_operand")))
1e27129f 5441 (set (match_dup 4)
ec5e777c
AI
5442 (float_truncate:<sf2dfmode>
5443 (match_operand:VF2_512_256 2 "nonimmediate_operand")))
5444 (set (match_operand:<ssePSmode> 0 "register_operand")
5445 (vec_concat:<ssePSmode>
1e27129f
L
5446 (match_dup 3)
5447 (match_dup 4)))]
5448 "TARGET_AVX"
5449{
ec5e777c
AI
5450 operands[3] = gen_reg_rtx (<sf2dfmode>mode);
5451 operands[4] = gen_reg_rtx (<sf2dfmode>mode);
1e27129f
L
5452})
5453
d6023b50 5454(define_expand "vec_pack_trunc_v2df"
82e86dc6
UB
5455 [(match_operand:V4SF 0 "register_operand")
5456 (match_operand:V2DF 1 "nonimmediate_operand")
5457 (match_operand:V2DF 2 "nonimmediate_operand")]
ef719a44 5458 "TARGET_SSE2"
d6023b50 5459{
76a2c904 5460 rtx tmp0, tmp1;
ef719a44 5461
a1aff58f 5462 if (TARGET_AVX && !TARGET_PREFER_AVX128 && optimize_insn_for_speed_p ())
76a2c904
UB
5463 {
5464 tmp0 = gen_reg_rtx (V4DFmode);
5465 tmp1 = force_reg (V2DFmode, operands[1]);
ef719a44 5466
76a2c904
UB
5467 emit_insn (gen_avx_vec_concatv4df (tmp0, tmp1, operands[2]));
5468 emit_insn (gen_avx_cvtpd2ps256 (operands[0], tmp0));
5469 }
5470 else
5471 {
5472 tmp0 = gen_reg_rtx (V4SFmode);
5473 tmp1 = gen_reg_rtx (V4SFmode);
5474
5475 emit_insn (gen_sse2_cvtpd2ps (tmp0, operands[1]));
5476 emit_insn (gen_sse2_cvtpd2ps (tmp1, operands[2]));
5477 emit_insn (gen_sse_movlhps (operands[0], tmp0, tmp1));
5478 }
d6023b50
UB
5479 DONE;
5480})
ab8efbd8 5481
ec5e777c
AI
5482(define_expand "vec_pack_sfix_trunc_v8df"
5483 [(match_operand:V16SI 0 "register_operand")
5484 (match_operand:V8DF 1 "nonimmediate_operand")
5485 (match_operand:V8DF 2 "nonimmediate_operand")]
5486 "TARGET_AVX512F"
5487{
5488 rtx r1, r2;
5489
5490 r1 = gen_reg_rtx (V8SImode);
5491 r2 = gen_reg_rtx (V8SImode);
5492
5493 emit_insn (gen_fix_truncv8dfv8si2 (r1, operands[1]));
5494 emit_insn (gen_fix_truncv8dfv8si2 (r2, operands[2]));
5495 emit_insn (gen_avx_vec_concatv16si (operands[0], r1, r2));
5496 DONE;
5497})
5498
1ee48839 5499(define_expand "vec_pack_sfix_trunc_v4df"
82e86dc6
UB
5500 [(match_operand:V8SI 0 "register_operand")
5501 (match_operand:V4DF 1 "nonimmediate_operand")
5502 (match_operand:V4DF 2 "nonimmediate_operand")]
1ee48839
JJ
5503 "TARGET_AVX"
5504{
5505 rtx r1, r2;
5506
76a2c904
UB
5507 r1 = gen_reg_rtx (V4SImode);
5508 r2 = gen_reg_rtx (V4SImode);
1ee48839 5509
76a2c904
UB
5510 emit_insn (gen_fix_truncv4dfv4si2 (r1, operands[1]));
5511 emit_insn (gen_fix_truncv4dfv4si2 (r2, operands[2]));
5512 emit_insn (gen_avx_vec_concatv8si (operands[0], r1, r2));
1ee48839
JJ
5513 DONE;
5514})
5515
d6023b50 5516(define_expand "vec_pack_sfix_trunc_v2df"
82e86dc6
UB
5517 [(match_operand:V4SI 0 "register_operand")
5518 (match_operand:V2DF 1 "nonimmediate_operand")
5519 (match_operand:V2DF 2 "nonimmediate_operand")]
ab8efbd8 5520 "TARGET_SSE2"
d6023b50 5521{
d8c84975 5522 rtx tmp0, tmp1, tmp2;
ab8efbd8 5523
a1aff58f 5524 if (TARGET_AVX && !TARGET_PREFER_AVX128 && optimize_insn_for_speed_p ())
76a2c904
UB
5525 {
5526 tmp0 = gen_reg_rtx (V4DFmode);
5527 tmp1 = force_reg (V2DFmode, operands[1]);
ab8efbd8 5528
76a2c904
UB
5529 emit_insn (gen_avx_vec_concatv4df (tmp0, tmp1, operands[2]));
5530 emit_insn (gen_fix_truncv4dfv4si2 (operands[0], tmp0));
5531 }
5532 else
5533 {
5534 tmp0 = gen_reg_rtx (V4SImode);
5535 tmp1 = gen_reg_rtx (V4SImode);
d8c84975 5536 tmp2 = gen_reg_rtx (V2DImode);
76a2c904
UB
5537
5538 emit_insn (gen_sse2_cvttpd2dq (tmp0, operands[1]));
5539 emit_insn (gen_sse2_cvttpd2dq (tmp1, operands[2]));
d8c84975
JJ
5540 emit_insn (gen_vec_interleave_lowv2di (tmp2,
5541 gen_lowpart (V2DImode, tmp0),
5542 gen_lowpart (V2DImode, tmp1)));
5543 emit_move_insn (operands[0], gen_lowpart (V4SImode, tmp2));
76a2c904 5544 }
d6023b50
UB
5545 DONE;
5546})
ab8efbd8 5547
6bf39801 5548(define_mode_attr ssepackfltmode
ec5e777c 5549 [(V8DF "V16SI") (V4DF "V8SI") (V2DF "V4SI")])
6bf39801
JJ
5550
5551(define_expand "vec_pack_ufix_trunc_<mode>"
82e86dc6 5552 [(match_operand:<ssepackfltmode> 0 "register_operand")
d3c2fee0
AI
5553 (match_operand:VF2 1 "register_operand")
5554 (match_operand:VF2 2 "register_operand")]
2f2da9e9 5555 "TARGET_SSE2"
6bf39801 5556{
d3c2fee0 5557 if (<MODE>mode == V8DFmode)
2f2da9e9 5558 {
d3c2fee0
AI
5559 rtx r1, r2;
5560
5561 r1 = gen_reg_rtx (V8SImode);
5562 r2 = gen_reg_rtx (V8SImode);
5563
5564 emit_insn (gen_ufix_truncv8dfv8si2 (r1, operands[1]));
5565 emit_insn (gen_ufix_truncv8dfv8si2 (r2, operands[2]));
5566 emit_insn (gen_avx_vec_concatv16si (operands[0], r1, r2));
2f2da9e9
JJ
5567 }
5568 else
5569 {
d3c2fee0
AI
5570 rtx tmp[7];
5571 tmp[0] = ix86_expand_adjust_ufix_to_sfix_si (operands[1], &tmp[2]);
5572 tmp[1] = ix86_expand_adjust_ufix_to_sfix_si (operands[2], &tmp[3]);
5573 tmp[4] = gen_reg_rtx (<ssepackfltmode>mode);
5574 emit_insn (gen_vec_pack_sfix_trunc_<mode> (tmp[4], tmp[0], tmp[1]));
5575 if (<ssepackfltmode>mode == V4SImode || TARGET_AVX2)
5576 {
5577 tmp[5] = gen_reg_rtx (<ssepackfltmode>mode);
5578 ix86_expand_vec_extract_even_odd (tmp[5], tmp[2], tmp[3], 0);
5579 }
5580 else
5581 {
5582 tmp[5] = gen_reg_rtx (V8SFmode);
5583 ix86_expand_vec_extract_even_odd (tmp[5], gen_lowpart (V8SFmode, tmp[2]),
5584 gen_lowpart (V8SFmode, tmp[3]), 0);
5585 tmp[5] = gen_lowpart (V8SImode, tmp[5]);
5586 }
5587 tmp[6] = expand_simple_binop (<ssepackfltmode>mode, XOR, tmp[4], tmp[5],
5588 operands[0], 0, OPTAB_DIRECT);
5589 if (tmp[6] != operands[0])
5590 emit_move_insn (operands[0], tmp[6]);
2f2da9e9 5591 }
d3c2fee0 5592
6bf39801
JJ
5593 DONE;
5594})
5595
1ee48839 5596(define_expand "vec_pack_sfix_v4df"
82e86dc6
UB
5597 [(match_operand:V8SI 0 "register_operand")
5598 (match_operand:V4DF 1 "nonimmediate_operand")
5599 (match_operand:V4DF 2 "nonimmediate_operand")]
1ee48839
JJ
5600 "TARGET_AVX"
5601{
5602 rtx r1, r2;
5603
76a2c904
UB
5604 r1 = gen_reg_rtx (V4SImode);
5605 r2 = gen_reg_rtx (V4SImode);
1ee48839 5606
76a2c904
UB
5607 emit_insn (gen_avx_cvtpd2dq256 (r1, operands[1]));
5608 emit_insn (gen_avx_cvtpd2dq256 (r2, operands[2]));
5609 emit_insn (gen_avx_vec_concatv8si (operands[0], r1, r2));
1ee48839
JJ
5610 DONE;
5611})
5612
d6023b50 5613(define_expand "vec_pack_sfix_v2df"
82e86dc6
UB
5614 [(match_operand:V4SI 0 "register_operand")
5615 (match_operand:V2DF 1 "nonimmediate_operand")
5616 (match_operand:V2DF 2 "nonimmediate_operand")]
ab8efbd8 5617 "TARGET_SSE2"
d6023b50 5618{
d8c84975 5619 rtx tmp0, tmp1, tmp2;
d6023b50 5620
a1aff58f 5621 if (TARGET_AVX && !TARGET_PREFER_AVX128 && optimize_insn_for_speed_p ())
76a2c904
UB
5622 {
5623 tmp0 = gen_reg_rtx (V4DFmode);
5624 tmp1 = force_reg (V2DFmode, operands[1]);
d6023b50 5625
76a2c904
UB
5626 emit_insn (gen_avx_vec_concatv4df (tmp0, tmp1, operands[2]));
5627 emit_insn (gen_avx_cvtpd2dq256 (operands[0], tmp0));
5628 }
5629 else
5630 {
5631 tmp0 = gen_reg_rtx (V4SImode);
5632 tmp1 = gen_reg_rtx (V4SImode);
d8c84975 5633 tmp2 = gen_reg_rtx (V2DImode);
76a2c904
UB
5634
5635 emit_insn (gen_sse2_cvtpd2dq (tmp0, operands[1]));
5636 emit_insn (gen_sse2_cvtpd2dq (tmp1, operands[2]));
d8c84975
JJ
5637 emit_insn (gen_vec_interleave_lowv2di (tmp2,
5638 gen_lowpart (V2DImode, tmp0),
5639 gen_lowpart (V2DImode, tmp1)));
5640 emit_move_insn (operands[0], gen_lowpart (V4SImode, tmp2));
76a2c904 5641 }
d6023b50
UB
5642 DONE;
5643})
ab8efbd8 5644
ef719a44
RH
5645;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
5646;;
d6023b50 5647;; Parallel single-precision floating point element swizzling
ef719a44
RH
5648;;
5649;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
5650
3a3f9d87 5651(define_expand "sse_movhlps_exp"
82e86dc6 5652 [(set (match_operand:V4SF 0 "nonimmediate_operand")
ffbaf337
UB
5653 (vec_select:V4SF
5654 (vec_concat:V8SF
82e86dc6
UB
5655 (match_operand:V4SF 1 "nonimmediate_operand")
5656 (match_operand:V4SF 2 "nonimmediate_operand"))
ffbaf337
UB
5657 (parallel [(const_int 6)
5658 (const_int 7)
5659 (const_int 2)
5660 (const_int 3)])))]
5661 "TARGET_SSE"
f17aa4ad
UB
5662{
5663 rtx dst = ix86_fixup_binary_operands (UNKNOWN, V4SFmode, operands);
6cf9eb27 5664
f17aa4ad
UB
5665 emit_insn (gen_sse_movhlps (dst, operands[1], operands[2]));
5666
5667 /* Fix up the destination if needed. */
5668 if (dst != operands[0])
5669 emit_move_insn (operands[0], dst);
5670
5671 DONE;
5672})
ffbaf337 5673
3a3f9d87 5674(define_insn "sse_movhlps"
3729983c 5675 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,x,x,m")
d6023b50
UB
5676 (vec_select:V4SF
5677 (vec_concat:V8SF
3729983c
UB
5678 (match_operand:V4SF 1 "nonimmediate_operand" " 0,x,0,x,0")
5679 (match_operand:V4SF 2 "nonimmediate_operand" " x,x,o,o,x"))
d6023b50
UB
5680 (parallel [(const_int 6)
5681 (const_int 7)
5682 (const_int 2)
5683 (const_int 3)])))]
2fe4dc01 5684 "TARGET_SSE && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
d6023b50
UB
5685 "@
5686 movhlps\t{%2, %0|%0, %2}
3729983c 5687 vmovhlps\t{%2, %1, %0|%0, %1, %2}
d6023b50 5688 movlps\t{%H2, %0|%0, %H2}
3729983c 5689 vmovlps\t{%H2, %1, %0|%0, %1, %H2}
eabb5f48 5690 %vmovhps\t{%2, %0|%q0, %2}"
ba94c7af 5691 [(set_attr "isa" "noavx,avx,noavx,avx,*")
3729983c 5692 (set_attr "type" "ssemov")
f220a4f4 5693 (set_attr "ssememalign" "64")
3729983c
UB
5694 (set_attr "prefix" "orig,vex,orig,vex,maybe_vex")
5695 (set_attr "mode" "V4SF,V4SF,V2SF,V2SF,V2SF")])
ef719a44 5696
3a3f9d87 5697(define_expand "sse_movlhps_exp"
82e86dc6 5698 [(set (match_operand:V4SF 0 "nonimmediate_operand")
ffbaf337
UB
5699 (vec_select:V4SF
5700 (vec_concat:V8SF
82e86dc6
UB
5701 (match_operand:V4SF 1 "nonimmediate_operand")
5702 (match_operand:V4SF 2 "nonimmediate_operand"))
ffbaf337
UB
5703 (parallel [(const_int 0)
5704 (const_int 1)
5705 (const_int 4)
5706 (const_int 5)])))]
5707 "TARGET_SSE"
f17aa4ad
UB
5708{
5709 rtx dst = ix86_fixup_binary_operands (UNKNOWN, V4SFmode, operands);
6cf9eb27 5710
f17aa4ad
UB
5711 emit_insn (gen_sse_movlhps (dst, operands[1], operands[2]));
5712
5713 /* Fix up the destination if needed. */
5714 if (dst != operands[0])
5715 emit_move_insn (operands[0], dst);
5716
5717 DONE;
5718})
ffbaf337 5719
3a3f9d87 5720(define_insn "sse_movlhps"
3729983c 5721 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,x,x,o")
d6023b50
UB
5722 (vec_select:V4SF
5723 (vec_concat:V8SF
3729983c 5724 (match_operand:V4SF 1 "nonimmediate_operand" " 0,x,0,x,0")
eabb5f48 5725 (match_operand:V4SF 2 "nonimmediate_operand" " x,x,m,m,x"))
d6023b50
UB
5726 (parallel [(const_int 0)
5727 (const_int 1)
5728 (const_int 4)
5729 (const_int 5)])))]
5730 "TARGET_SSE && ix86_binary_operator_ok (UNKNOWN, V4SFmode, operands)"
5731 "@
5732 movlhps\t{%2, %0|%0, %2}
3729983c 5733 vmovlhps\t{%2, %1, %0|%0, %1, %2}
eabb5f48
UB
5734 movhps\t{%2, %0|%0, %q2}
5735 vmovhps\t{%2, %1, %0|%0, %1, %q2}
3729983c 5736 %vmovlps\t{%2, %H0|%H0, %2}"
ba94c7af 5737 [(set_attr "isa" "noavx,avx,noavx,avx,*")
3729983c 5738 (set_attr "type" "ssemov")
f220a4f4 5739 (set_attr "ssememalign" "64")
3729983c
UB
5740 (set_attr "prefix" "orig,vex,orig,vex,maybe_vex")
5741 (set_attr "mode" "V4SF,V4SF,V2SF,V2SF,V2SF")])
ef719a44 5742
47490470 5743(define_insn "<mask_codefor>avx512f_unpckhps512<mask_name>"
c003c6d6
AI
5744 [(set (match_operand:V16SF 0 "register_operand" "=v")
5745 (vec_select:V16SF
5746 (vec_concat:V32SF
5747 (match_operand:V16SF 1 "register_operand" "v")
5748 (match_operand:V16SF 2 "nonimmediate_operand" "vm"))
5749 (parallel [(const_int 2) (const_int 18)
5750 (const_int 3) (const_int 19)
5751 (const_int 6) (const_int 22)
5752 (const_int 7) (const_int 23)
5753 (const_int 10) (const_int 26)
5754 (const_int 11) (const_int 27)
5755 (const_int 14) (const_int 30)
5756 (const_int 15) (const_int 31)])))]
5757 "TARGET_AVX512F"
47490470 5758 "vunpckhps\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
c003c6d6
AI
5759 [(set_attr "type" "sselog")
5760 (set_attr "prefix" "evex")
5761 (set_attr "mode" "V16SF")])
5762
b0d49a6e 5763;; Recall that the 256-bit unpck insns only shuffle within their lanes.
47145255
AI
5764(define_insn "avx_unpckhps256<mask_name>"
5765 [(set (match_operand:V8SF 0 "register_operand" "=v")
95879c72
L
5766 (vec_select:V8SF
5767 (vec_concat:V16SF
47145255
AI
5768 (match_operand:V8SF 1 "register_operand" "v")
5769 (match_operand:V8SF 2 "nonimmediate_operand" "vm"))
c4d3f42f
L
5770 (parallel [(const_int 2) (const_int 10)
5771 (const_int 3) (const_int 11)
5772 (const_int 6) (const_int 14)
5773 (const_int 7) (const_int 15)])))]
47145255
AI
5774 "TARGET_AVX && <mask_avx512vl_condition>"
5775 "vunpckhps\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
95879c72
L
5776 [(set_attr "type" "sselog")
5777 (set_attr "prefix" "vex")
5778 (set_attr "mode" "V8SF")])
5779
1e27129f
L
5780(define_expand "vec_interleave_highv8sf"
5781 [(set (match_dup 3)
5782 (vec_select:V8SF
5783 (vec_concat:V16SF
5784 (match_operand:V8SF 1 "register_operand" "x")
5785 (match_operand:V8SF 2 "nonimmediate_operand" "xm"))
5786 (parallel [(const_int 0) (const_int 8)
5787 (const_int 1) (const_int 9)
5788 (const_int 4) (const_int 12)
5789 (const_int 5) (const_int 13)])))
5790 (set (match_dup 4)
5791 (vec_select:V8SF
5792 (vec_concat:V16SF
5793 (match_dup 1)
5794 (match_dup 2))
5795 (parallel [(const_int 2) (const_int 10)
5796 (const_int 3) (const_int 11)
5797 (const_int 6) (const_int 14)
5798 (const_int 7) (const_int 15)])))
82e86dc6 5799 (set (match_operand:V8SF 0 "register_operand")
2a4337c0
UB
5800 (vec_select:V8SF
5801 (vec_concat:V16SF
1e27129f 5802 (match_dup 3)
2a4337c0
UB
5803 (match_dup 4))
5804 (parallel [(const_int 4) (const_int 5)
5805 (const_int 6) (const_int 7)
5806 (const_int 12) (const_int 13)
5807 (const_int 14) (const_int 15)])))]
1e27129f
L
5808 "TARGET_AVX"
5809{
5810 operands[3] = gen_reg_rtx (V8SFmode);
5811 operands[4] = gen_reg_rtx (V8SFmode);
5812})
5813
47145255
AI
5814(define_insn "vec_interleave_highv4sf<mask_name>"
5815 [(set (match_operand:V4SF 0 "register_operand" "=x,v")
d6023b50
UB
5816 (vec_select:V4SF
5817 (vec_concat:V8SF
47145255
AI
5818 (match_operand:V4SF 1 "register_operand" "0,v")
5819 (match_operand:V4SF 2 "nonimmediate_operand" "xm,vm"))
d6023b50
UB
5820 (parallel [(const_int 2) (const_int 6)
5821 (const_int 3) (const_int 7)])))]
47145255 5822 "TARGET_SSE && <mask_avx512vl_condition>"
3729983c
UB
5823 "@
5824 unpckhps\t{%2, %0|%0, %2}
47145255 5825 vunpckhps\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
3729983c
UB
5826 [(set_attr "isa" "noavx,avx")
5827 (set_attr "type" "sselog")
5828 (set_attr "prefix" "orig,vex")
d6023b50 5829 (set_attr "mode" "V4SF")])
ef719a44 5830
47490470 5831(define_insn "<mask_codefor>avx512f_unpcklps512<mask_name>"
c003c6d6
AI
5832 [(set (match_operand:V16SF 0 "register_operand" "=v")
5833 (vec_select:V16SF
5834 (vec_concat:V32SF
5835 (match_operand:V16SF 1 "register_operand" "v")
5836 (match_operand:V16SF 2 "nonimmediate_operand" "vm"))
5837 (parallel [(const_int 0) (const_int 16)
5838 (const_int 1) (const_int 17)
5839 (const_int 4) (const_int 20)
5840 (const_int 5) (const_int 21)
5841 (const_int 8) (const_int 24)
5842 (const_int 9) (const_int 25)
5843 (const_int 12) (const_int 28)
5844 (const_int 13) (const_int 29)])))]
5845 "TARGET_AVX512F"
47490470 5846 "vunpcklps\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
c003c6d6
AI
5847 [(set_attr "type" "sselog")
5848 (set_attr "prefix" "evex")
5849 (set_attr "mode" "V16SF")])
5850
b0d49a6e 5851;; Recall that the 256-bit unpck insns only shuffle within their lanes.
47145255
AI
5852(define_insn "avx_unpcklps256<mask_name>"
5853 [(set (match_operand:V8SF 0 "register_operand" "=v")
95879c72
L
5854 (vec_select:V8SF
5855 (vec_concat:V16SF
47145255
AI
5856 (match_operand:V8SF 1 "register_operand" "v")
5857 (match_operand:V8SF 2 "nonimmediate_operand" "vm"))
c4d3f42f
L
5858 (parallel [(const_int 0) (const_int 8)
5859 (const_int 1) (const_int 9)
5860 (const_int 4) (const_int 12)
5861 (const_int 5) (const_int 13)])))]
47145255
AI
5862 "TARGET_AVX && <mask_avx512vl_condition>"
5863 "vunpcklps\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
95879c72
L
5864 [(set_attr "type" "sselog")
5865 (set_attr "prefix" "vex")
5866 (set_attr "mode" "V8SF")])
5867
47145255
AI
5868(define_insn "unpcklps128_mask"
5869 [(set (match_operand:V4SF 0 "register_operand" "=v")
5870 (vec_merge:V4SF
5871 (vec_select:V4SF
5872 (vec_concat:V8SF
5873 (match_operand:V4SF 1 "register_operand" "v")
5874 (match_operand:V4SF 2 "nonimmediate_operand" "vm"))
5875 (parallel [(const_int 0) (const_int 4)
5876 (const_int 1) (const_int 5)]))
5877 (match_operand:V4SF 3 "vector_move_operand" "0C")
5878 (match_operand:QI 4 "register_operand" "Yk")))]
5879 "TARGET_AVX512VL"
5880 "vunpcklps\t{%2, %1, %0%{%4%}%N3|%0%{%4%}%N3, %1, %2}"
5881 [(set_attr "type" "sselog")
5882 (set_attr "prefix" "evex")
5883 (set_attr "mode" "V4SF")])
5884
1e27129f
L
5885(define_expand "vec_interleave_lowv8sf"
5886 [(set (match_dup 3)
5887 (vec_select:V8SF
5888 (vec_concat:V16SF
5889 (match_operand:V8SF 1 "register_operand" "x")
5890 (match_operand:V8SF 2 "nonimmediate_operand" "xm"))
5891 (parallel [(const_int 0) (const_int 8)
5892 (const_int 1) (const_int 9)
5893 (const_int 4) (const_int 12)
5894 (const_int 5) (const_int 13)])))
5895 (set (match_dup 4)
5896 (vec_select:V8SF
5897 (vec_concat:V16SF
5898 (match_dup 1)
5899 (match_dup 2))
5900 (parallel [(const_int 2) (const_int 10)
5901 (const_int 3) (const_int 11)
5902 (const_int 6) (const_int 14)
5903 (const_int 7) (const_int 15)])))
82e86dc6 5904 (set (match_operand:V8SF 0 "register_operand")
2a4337c0
UB
5905 (vec_select:V8SF
5906 (vec_concat:V16SF
1e27129f 5907 (match_dup 3)
2a4337c0
UB
5908 (match_dup 4))
5909 (parallel [(const_int 0) (const_int 1)
5910 (const_int 2) (const_int 3)
5911 (const_int 8) (const_int 9)
5912 (const_int 10) (const_int 11)])))]
1e27129f
L
5913 "TARGET_AVX"
5914{
5915 operands[3] = gen_reg_rtx (V8SFmode);
5916 operands[4] = gen_reg_rtx (V8SFmode);
5917})
5918
b0d49a6e 5919(define_insn "vec_interleave_lowv4sf"
3729983c 5920 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
d6023b50
UB
5921 (vec_select:V4SF
5922 (vec_concat:V8SF
3729983c
UB
5923 (match_operand:V4SF 1 "register_operand" "0,x")
5924 (match_operand:V4SF 2 "nonimmediate_operand" "xm,xm"))
d6023b50
UB
5925 (parallel [(const_int 0) (const_int 4)
5926 (const_int 1) (const_int 5)])))]
5927 "TARGET_SSE"
3729983c
UB
5928 "@
5929 unpcklps\t{%2, %0|%0, %2}
5930 vunpcklps\t{%2, %1, %0|%0, %1, %2}"
5931 [(set_attr "isa" "noavx,avx")
5932 (set_attr "type" "sselog")
5933 (set_attr "prefix" "orig,vex")
d6023b50 5934 (set_attr "mode" "V4SF")])
ef719a44 5935
d6023b50
UB
5936;; These are modeled with the same vec_concat as the others so that we
5937;; capture users of shufps that can use the new instructions
6eacd27c
AI
5938(define_insn "avx_movshdup256<mask_name>"
5939 [(set (match_operand:V8SF 0 "register_operand" "=v")
95879c72
L
5940 (vec_select:V8SF
5941 (vec_concat:V16SF
6eacd27c 5942 (match_operand:V8SF 1 "nonimmediate_operand" "vm")
95879c72
L
5943 (match_dup 1))
5944 (parallel [(const_int 1) (const_int 1)
5945 (const_int 3) (const_int 3)
5946 (const_int 5) (const_int 5)
5947 (const_int 7) (const_int 7)])))]
6eacd27c
AI
5948 "TARGET_AVX && <mask_avx512vl_condition>"
5949 "vmovshdup\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
95879c72
L
5950 [(set_attr "type" "sse")
5951 (set_attr "prefix" "vex")
5952 (set_attr "mode" "V8SF")])
5953
6eacd27c
AI
5954(define_insn "sse3_movshdup<mask_name>"
5955 [(set (match_operand:V4SF 0 "register_operand" "=v")
d6023b50
UB
5956 (vec_select:V4SF
5957 (vec_concat:V8SF
6eacd27c 5958 (match_operand:V4SF 1 "nonimmediate_operand" "vm")
d6023b50
UB
5959 (match_dup 1))
5960 (parallel [(const_int 1)
5961 (const_int 1)
5962 (const_int 7)
5963 (const_int 7)])))]
6eacd27c
AI
5964 "TARGET_SSE3 && <mask_avx512vl_condition>"
5965 "%vmovshdup\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
d6023b50 5966 [(set_attr "type" "sse")
10e4d956 5967 (set_attr "prefix_rep" "1")
95879c72 5968 (set_attr "prefix" "maybe_vex")
d6023b50 5969 (set_attr "mode" "V4SF")])
ef719a44 5970
47490470 5971(define_insn "<mask_codefor>avx512f_movshdup512<mask_name>"
c003c6d6
AI
5972 [(set (match_operand:V16SF 0 "register_operand" "=v")
5973 (vec_select:V16SF
5974 (vec_concat:V32SF
5975 (match_operand:V16SF 1 "nonimmediate_operand" "vm")
5976 (match_dup 1))
5977 (parallel [(const_int 1) (const_int 1)
5978 (const_int 3) (const_int 3)
5979 (const_int 5) (const_int 5)
5980 (const_int 7) (const_int 7)
5981 (const_int 9) (const_int 9)
5982 (const_int 11) (const_int 11)
5983 (const_int 13) (const_int 13)
5984 (const_int 15) (const_int 15)])))]
5985 "TARGET_AVX512F"
47490470 5986 "vmovshdup\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
c003c6d6
AI
5987 [(set_attr "type" "sse")
5988 (set_attr "prefix" "evex")
5989 (set_attr "mode" "V16SF")])
5990
6eacd27c
AI
5991(define_insn "avx_movsldup256<mask_name>"
5992 [(set (match_operand:V8SF 0 "register_operand" "=v")
95879c72
L
5993 (vec_select:V8SF
5994 (vec_concat:V16SF
6eacd27c 5995 (match_operand:V8SF 1 "nonimmediate_operand" "vm")
95879c72
L
5996 (match_dup 1))
5997 (parallel [(const_int 0) (const_int 0)
5998 (const_int 2) (const_int 2)
5999 (const_int 4) (const_int 4)
6000 (const_int 6) (const_int 6)])))]
6eacd27c
AI
6001 "TARGET_AVX && <mask_avx512vl_condition>"
6002 "vmovsldup\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
95879c72
L
6003 [(set_attr "type" "sse")
6004 (set_attr "prefix" "vex")
6005 (set_attr "mode" "V8SF")])
6006
6eacd27c
AI
6007(define_insn "sse3_movsldup<mask_name>"
6008 [(set (match_operand:V4SF 0 "register_operand" "=v")
d6023b50
UB
6009 (vec_select:V4SF
6010 (vec_concat:V8SF
6eacd27c 6011 (match_operand:V4SF 1 "nonimmediate_operand" "vm")
d6023b50
UB
6012 (match_dup 1))
6013 (parallel [(const_int 0)
6014 (const_int 0)
6015 (const_int 6)
6016 (const_int 6)])))]
6eacd27c
AI
6017 "TARGET_SSE3 && <mask_avx512vl_condition>"
6018 "%vmovsldup\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
d6023b50 6019 [(set_attr "type" "sse")
10e4d956 6020 (set_attr "prefix_rep" "1")
95879c72 6021 (set_attr "prefix" "maybe_vex")
d6023b50 6022 (set_attr "mode" "V4SF")])
ef719a44 6023
47490470 6024(define_insn "<mask_codefor>avx512f_movsldup512<mask_name>"
c003c6d6
AI
6025 [(set (match_operand:V16SF 0 "register_operand" "=v")
6026 (vec_select:V16SF
6027 (vec_concat:V32SF
6028 (match_operand:V16SF 1 "nonimmediate_operand" "vm")
6029 (match_dup 1))
6030 (parallel [(const_int 0) (const_int 0)
6031 (const_int 2) (const_int 2)
6032 (const_int 4) (const_int 4)
6033 (const_int 6) (const_int 6)
6034 (const_int 8) (const_int 8)
6035 (const_int 10) (const_int 10)
6036 (const_int 12) (const_int 12)
6037 (const_int 14) (const_int 14)])))]
6038 "TARGET_AVX512F"
47490470 6039 "vmovsldup\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
c003c6d6
AI
6040 [(set_attr "type" "sse")
6041 (set_attr "prefix" "evex")
6042 (set_attr "mode" "V16SF")])
6043
fc01a1ac 6044(define_expand "avx_shufps256<mask_expand4_name>"
82e86dc6
UB
6045 [(match_operand:V8SF 0 "register_operand")
6046 (match_operand:V8SF 1 "register_operand")
6047 (match_operand:V8SF 2 "nonimmediate_operand")
6048 (match_operand:SI 3 "const_int_operand")]
95879c72
L
6049 "TARGET_AVX"
6050{
6051 int mask = INTVAL (operands[3]);
fc01a1ac
AI
6052 emit_insn (gen_avx_shufps256_1<mask_expand4_name> (operands[0],
6053 operands[1],
6054 operands[2],
6055 GEN_INT ((mask >> 0) & 3),
6056 GEN_INT ((mask >> 2) & 3),
6057 GEN_INT (((mask >> 4) & 3) + 8),
6058 GEN_INT (((mask >> 6) & 3) + 8),
6059 GEN_INT (((mask >> 0) & 3) + 4),
6060 GEN_INT (((mask >> 2) & 3) + 4),
6061 GEN_INT (((mask >> 4) & 3) + 12),
6062 GEN_INT (((mask >> 6) & 3) + 12)
6063 <mask_expand4_args>));
95879c72
L
6064 DONE;
6065})
6066
6067;; One bit in mask selects 2 elements.
fc01a1ac
AI
6068(define_insn "avx_shufps256_1<mask_name>"
6069 [(set (match_operand:V8SF 0 "register_operand" "=v")
95879c72
L
6070 (vec_select:V8SF
6071 (vec_concat:V16SF
fc01a1ac
AI
6072 (match_operand:V8SF 1 "register_operand" "v")
6073 (match_operand:V8SF 2 "nonimmediate_operand" "vm"))
82e86dc6
UB
6074 (parallel [(match_operand 3 "const_0_to_3_operand" )
6075 (match_operand 4 "const_0_to_3_operand" )
6076 (match_operand 5 "const_8_to_11_operand" )
6077 (match_operand 6 "const_8_to_11_operand" )
6078 (match_operand 7 "const_4_to_7_operand" )
6079 (match_operand 8 "const_4_to_7_operand" )
6080 (match_operand 9 "const_12_to_15_operand")
6081 (match_operand 10 "const_12_to_15_operand")])))]
95879c72 6082 "TARGET_AVX
fc01a1ac 6083 && <mask_avx512vl_condition>
95879c72
L
6084 && (INTVAL (operands[3]) == (INTVAL (operands[7]) - 4)
6085 && INTVAL (operands[4]) == (INTVAL (operands[8]) - 4)
6086 && INTVAL (operands[5]) == (INTVAL (operands[9]) - 4)
6087 && INTVAL (operands[6]) == (INTVAL (operands[10]) - 4))"
6088{
6089 int mask;
6090 mask = INTVAL (operands[3]);
6091 mask |= INTVAL (operands[4]) << 2;
6092 mask |= (INTVAL (operands[5]) - 8) << 4;
6093 mask |= (INTVAL (operands[6]) - 8) << 6;
6094 operands[3] = GEN_INT (mask);
6095
fc01a1ac 6096 return "vshufps\t{%3, %2, %1, %0<mask_operand11>|%0<mask_operand11>, %1, %2, %3}";
95879c72 6097}
eb2f2b44 6098 [(set_attr "type" "sseshuf")
725fd454 6099 (set_attr "length_immediate" "1")
fc01a1ac 6100 (set_attr "prefix" "<mask_prefix>")
95879c72
L
6101 (set_attr "mode" "V8SF")])
6102
fc01a1ac 6103(define_expand "sse_shufps<mask_expand4_name>"
82e86dc6
UB
6104 [(match_operand:V4SF 0 "register_operand")
6105 (match_operand:V4SF 1 "register_operand")
6106 (match_operand:V4SF 2 "nonimmediate_operand")
6107 (match_operand:SI 3 "const_int_operand")]
d6023b50
UB
6108 "TARGET_SSE"
6109{
6110 int mask = INTVAL (operands[3]);
fc01a1ac
AI
6111 emit_insn (gen_sse_shufps_v4sf<mask_expand4_name> (operands[0],
6112 operands[1],
6113 operands[2],
6114 GEN_INT ((mask >> 0) & 3),
6115 GEN_INT ((mask >> 2) & 3),
6116 GEN_INT (((mask >> 4) & 3) + 4),
6117 GEN_INT (((mask >> 6) & 3) + 4)
6118 <mask_expand4_args>));
d6023b50
UB
6119 DONE;
6120})
ef719a44 6121
fc01a1ac
AI
6122(define_insn "sse_shufps_v4sf_mask"
6123 [(set (match_operand:V4SF 0 "register_operand" "=v")
6124 (vec_merge:V4SF
6125 (vec_select:V4SF
6126 (vec_concat:V8SF
6127 (match_operand:V4SF 1 "register_operand" "v")
6128 (match_operand:V4SF 2 "nonimmediate_operand" "vm"))
6129 (parallel [(match_operand 3 "const_0_to_3_operand")
6130 (match_operand 4 "const_0_to_3_operand")
6131 (match_operand 5 "const_4_to_7_operand")
6132 (match_operand 6 "const_4_to_7_operand")]))
6133 (match_operand:V4SF 7 "vector_move_operand" "0C")
6134 (match_operand:QI 8 "register_operand" "Yk")))]
6135 "TARGET_AVX512VL"
6136{
6137 int mask = 0;
6138 mask |= INTVAL (operands[3]) << 0;
6139 mask |= INTVAL (operands[4]) << 2;
6140 mask |= (INTVAL (operands[5]) - 4) << 4;
6141 mask |= (INTVAL (operands[6]) - 4) << 6;
6142 operands[3] = GEN_INT (mask);
6143
6144 return "vshufps\t{%3, %2, %1, %0%{%8%}%N7|%0%{%8%}%N7, %1, %2, %3}";
6145}
6146 [(set_attr "type" "sseshuf")
6147 (set_attr "length_immediate" "1")
6148 (set_attr "prefix" "evex")
6149 (set_attr "mode" "V4SF")])
6150
ba63dfb9 6151(define_insn "sse_shufps_<mode>"
6bec6c98
UB
6152 [(set (match_operand:VI4F_128 0 "register_operand" "=x,x")
6153 (vec_select:VI4F_128
cbb734aa 6154 (vec_concat:<ssedoublevecmode>
6bec6c98
UB
6155 (match_operand:VI4F_128 1 "register_operand" "0,x")
6156 (match_operand:VI4F_128 2 "nonimmediate_operand" "xm,xm"))
82e86dc6
UB
6157 (parallel [(match_operand 3 "const_0_to_3_operand")
6158 (match_operand 4 "const_0_to_3_operand")
6159 (match_operand 5 "const_4_to_7_operand")
6160 (match_operand 6 "const_4_to_7_operand")])))]
d6023b50
UB
6161 "TARGET_SSE"
6162{
6163 int mask = 0;
6164 mask |= INTVAL (operands[3]) << 0;
6165 mask |= INTVAL (operands[4]) << 2;
6166 mask |= (INTVAL (operands[5]) - 4) << 4;
6167 mask |= (INTVAL (operands[6]) - 4) << 6;
6168 operands[3] = GEN_INT (mask);
ef719a44 6169
3729983c
UB
6170 switch (which_alternative)
6171 {
6172 case 0:
6173 return "shufps\t{%3, %2, %0|%0, %2, %3}";
6174 case 1:
6175 return "vshufps\t{%3, %2, %1, %0|%0, %1, %2, %3}";
6176 default:
6177 gcc_unreachable ();
6178 }
d6023b50 6179}
3729983c 6180 [(set_attr "isa" "noavx,avx")
eb2f2b44 6181 (set_attr "type" "sseshuf")
725fd454 6182 (set_attr "length_immediate" "1")
3729983c 6183 (set_attr "prefix" "orig,vex")
d6023b50 6184 (set_attr "mode" "V4SF")])
ef719a44 6185
d6023b50
UB
6186(define_insn "sse_storehps"
6187 [(set (match_operand:V2SF 0 "nonimmediate_operand" "=m,x,x")
6188 (vec_select:V2SF
6189 (match_operand:V4SF 1 "nonimmediate_operand" "x,x,o")
6190 (parallel [(const_int 2) (const_int 3)])))]
6191 "TARGET_SSE"
6192 "@
eabb5f48 6193 %vmovhps\t{%1, %0|%q0, %1}
95879c72
L
6194 %vmovhlps\t{%1, %d0|%d0, %1}
6195 %vmovlps\t{%H1, %d0|%d0, %H1}"
d6023b50 6196 [(set_attr "type" "ssemov")
f220a4f4 6197 (set_attr "ssememalign" "64")
95879c72 6198 (set_attr "prefix" "maybe_vex")
d6023b50
UB
6199 (set_attr "mode" "V2SF,V4SF,V2SF")])
6200
3a3f9d87 6201(define_expand "sse_loadhps_exp"
82e86dc6 6202 [(set (match_operand:V4SF 0 "nonimmediate_operand")
ffbaf337
UB
6203 (vec_concat:V4SF
6204 (vec_select:V2SF
82e86dc6 6205 (match_operand:V4SF 1 "nonimmediate_operand")
ffbaf337 6206 (parallel [(const_int 0) (const_int 1)]))
82e86dc6 6207 (match_operand:V2SF 2 "nonimmediate_operand")))]
ffbaf337 6208 "TARGET_SSE"
f17aa4ad
UB
6209{
6210 rtx dst = ix86_fixup_binary_operands (UNKNOWN, V4SFmode, operands);
6cf9eb27 6211
f17aa4ad
UB
6212 emit_insn (gen_sse_loadhps (dst, operands[1], operands[2]));
6213
6214 /* Fix up the destination if needed. */
6215 if (dst != operands[0])
6216 emit_move_insn (operands[0], dst);
6217
6218 DONE;
6219})
ffbaf337 6220
3a3f9d87 6221(define_insn "sse_loadhps"
3729983c 6222 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,x,x,o")
ef719a44 6223 (vec_concat:V4SF
d6023b50 6224 (vec_select:V2SF
3729983c 6225 (match_operand:V4SF 1 "nonimmediate_operand" " 0,x,0,x,0")
d6023b50 6226 (parallel [(const_int 0) (const_int 1)]))
3729983c 6227 (match_operand:V2SF 2 "nonimmediate_operand" " m,m,x,x,x")))]
2fe4dc01 6228 "TARGET_SSE"
d6023b50 6229 "@
eabb5f48
UB
6230 movhps\t{%2, %0|%0, %q2}
6231 vmovhps\t{%2, %1, %0|%0, %1, %q2}
d6023b50 6232 movlhps\t{%2, %0|%0, %2}
3729983c
UB
6233 vmovlhps\t{%2, %1, %0|%0, %1, %2}
6234 %vmovlps\t{%2, %H0|%H0, %2}"
ba94c7af 6235 [(set_attr "isa" "noavx,avx,noavx,avx,*")
3729983c 6236 (set_attr "type" "ssemov")
f220a4f4 6237 (set_attr "ssememalign" "64")
3729983c
UB
6238 (set_attr "prefix" "orig,vex,orig,vex,maybe_vex")
6239 (set_attr "mode" "V2SF,V2SF,V4SF,V4SF,V2SF")])
95879c72 6240
d6023b50 6241(define_insn "sse_storelps"
3729983c 6242 [(set (match_operand:V2SF 0 "nonimmediate_operand" "=m,x,x")
d6023b50 6243 (vec_select:V2SF
3729983c 6244 (match_operand:V4SF 1 "nonimmediate_operand" " x,x,m")
d6023b50 6245 (parallel [(const_int 0) (const_int 1)])))]
2fe4dc01 6246 "TARGET_SSE"
d6023b50 6247 "@
eabb5f48 6248 %vmovlps\t{%1, %0|%q0, %1}
3729983c 6249 %vmovaps\t{%1, %0|%0, %1}
eabb5f48 6250 %vmovlps\t{%1, %d0|%d0, %q1}"
d6023b50 6251 [(set_attr "type" "ssemov")
3729983c 6252 (set_attr "prefix" "maybe_vex")
d6023b50
UB
6253 (set_attr "mode" "V2SF,V4SF,V2SF")])
6254
3a3f9d87 6255(define_expand "sse_loadlps_exp"
82e86dc6 6256 [(set (match_operand:V4SF 0 "nonimmediate_operand")
ffbaf337 6257 (vec_concat:V4SF
82e86dc6 6258 (match_operand:V2SF 2 "nonimmediate_operand")
ffbaf337 6259 (vec_select:V2SF
82e86dc6 6260 (match_operand:V4SF 1 "nonimmediate_operand")
ffbaf337
UB
6261 (parallel [(const_int 2) (const_int 3)]))))]
6262 "TARGET_SSE"
f17aa4ad
UB
6263{
6264 rtx dst = ix86_fixup_binary_operands (UNKNOWN, V4SFmode, operands);
6cf9eb27 6265
f17aa4ad
UB
6266 emit_insn (gen_sse_loadlps (dst, operands[1], operands[2]));
6267
6268 /* Fix up the destination if needed. */
6269 if (dst != operands[0])
6270 emit_move_insn (operands[0], dst);
6271
6272 DONE;
6273})
ffbaf337 6274
3a3f9d87 6275(define_insn "sse_loadlps"
3729983c 6276 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,x,x,m")
d6023b50 6277 (vec_concat:V4SF
85d91748 6278 (match_operand:V2SF 2 "nonimmediate_operand" " 0,x,m,m,x")
ef719a44 6279 (vec_select:V2SF
3729983c 6280 (match_operand:V4SF 1 "nonimmediate_operand" " x,x,0,x,0")
d6023b50 6281 (parallel [(const_int 2) (const_int 3)]))))]
2fe4dc01 6282 "TARGET_SSE"
d6023b50
UB
6283 "@
6284 shufps\t{$0xe4, %1, %0|%0, %1, 0xe4}
3729983c 6285 vshufps\t{$0xe4, %1, %2, %0|%0, %2, %1, 0xe4}
eabb5f48
UB
6286 movlps\t{%2, %0|%0, %q2}
6287 vmovlps\t{%2, %1, %0|%0, %1, %q2}
6288 %vmovlps\t{%2, %0|%q0, %2}"
ba94c7af 6289 [(set_attr "isa" "noavx,avx,noavx,avx,*")
eb2f2b44 6290 (set_attr "type" "sseshuf,sseshuf,ssemov,ssemov,ssemov")
f220a4f4 6291 (set_attr "ssememalign" "64")
3729983c
UB
6292 (set_attr "length_immediate" "1,1,*,*,*")
6293 (set_attr "prefix" "orig,vex,orig,vex,maybe_vex")
6294 (set_attr "mode" "V4SF,V4SF,V2SF,V2SF,V2SF")])
95879c72 6295
d6023b50 6296(define_insn "sse_movss"
3729983c 6297 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
d6023b50 6298 (vec_merge:V4SF
3729983c
UB
6299 (match_operand:V4SF 2 "register_operand" " x,x")
6300 (match_operand:V4SF 1 "register_operand" " 0,x")
d6023b50
UB
6301 (const_int 1)))]
6302 "TARGET_SSE"
3729983c
UB
6303 "@
6304 movss\t{%2, %0|%0, %2}
6305 vmovss\t{%2, %1, %0|%0, %1, %2}"
6306 [(set_attr "isa" "noavx,avx")
6307 (set_attr "type" "ssemov")
6308 (set_attr "prefix" "orig,vex")
d6023b50 6309 (set_attr "mode" "SF")])
8115817b 6310
da957891 6311(define_insn "avx2_vec_dup<mode>"
a9ccbba2
AI
6312 [(set (match_operand:VF1_128_256 0 "register_operand" "=x")
6313 (vec_duplicate:VF1_128_256
977e83a3
KY
6314 (vec_select:SF
6315 (match_operand:V4SF 1 "register_operand" "x")
6316 (parallel [(const_int 0)]))))]
6317 "TARGET_AVX2"
6318 "vbroadcastss\t{%1, %0|%0, %1}"
6319 [(set_attr "type" "sselog1")
6320 (set_attr "prefix" "vex")
da957891 6321 (set_attr "mode" "<MODE>")])
977e83a3 6322
6945a32e
JJ
6323(define_insn "avx2_vec_dupv8sf_1"
6324 [(set (match_operand:V8SF 0 "register_operand" "=x")
6325 (vec_duplicate:V8SF
6326 (vec_select:SF
6327 (match_operand:V8SF 1 "register_operand" "x")
6328 (parallel [(const_int 0)]))))]
6329 "TARGET_AVX2"
6330 "vbroadcastss\t{%x1, %0|%0, %x1}"
6331 [(set_attr "type" "sselog1")
6332 (set_attr "prefix" "vex")
6333 (set_attr "mode" "V8SF")])
6334
b92883d6
IT
6335(define_insn "avx512f_vec_dup<mode>_1"
6336 [(set (match_operand:VF_512 0 "register_operand" "=v")
6337 (vec_duplicate:VF_512
6338 (vec_select:<ssescalarmode>
6339 (match_operand:VF_512 1 "register_operand" "v")
6340 (parallel [(const_int 0)]))))]
6341 "TARGET_AVX512F"
6342 "vbroadcast<bcstscalarsuff>\t{%x1, %0|%0, %x1}"
6343 [(set_attr "type" "sselog1")
6344 (set_attr "prefix" "evex")
6345 (set_attr "mode" "<MODE>")])
6346
6784c6e0
UB
6347;; Although insertps takes register source, we prefer
6348;; unpcklps with register source since it is shorter.
6349(define_insn "*vec_concatv2sf_sse4_1"
ee768d85
UB
6350 [(set (match_operand:V2SF 0 "register_operand"
6351 "=Yr,*x,x,Yr,*x,x,x,*y ,*y")
6784c6e0 6352 (vec_concat:V2SF
ee768d85
UB
6353 (match_operand:SF 1 "nonimmediate_operand"
6354 " 0, 0,x, 0,0, x,m, 0 , m")
6355 (match_operand:SF 2 "vector_move_operand"
6356 " Yr,*x,x, m,m, m,C,*ym, C")))]
6357 "TARGET_SSE4_1 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
6784c6e0 6358 "@
45392c76 6359 unpcklps\t{%2, %0|%0, %2}
6784c6e0 6360 unpcklps\t{%2, %0|%0, %2}
3729983c 6361 vunpcklps\t{%2, %1, %0|%0, %1, %2}
6784c6e0 6362 insertps\t{$0x10, %2, %0|%0, %2, 0x10}
45392c76 6363 insertps\t{$0x10, %2, %0|%0, %2, 0x10}
3729983c
UB
6364 vinsertps\t{$0x10, %2, %1, %0|%0, %1, %2, 0x10}
6365 %vmovss\t{%1, %0|%0, %1}
6784c6e0
UB
6366 punpckldq\t{%2, %0|%0, %2}
6367 movd\t{%1, %0|%0, %1}"
45392c76
IE
6368 [(set_attr "isa" "noavx,noavx,avx,noavx,noavx,avx,*,*,*")
6369 (set_attr "type" "sselog,sselog,sselog,sselog,sselog,sselog,ssemov,mmxcvt,mmxmov")
6370 (set_attr "prefix_data16" "*,*,*,1,1,*,*,*,*")
6371 (set_attr "prefix_extra" "*,*,*,1,1,1,*,*,*")
6372 (set_attr "length_immediate" "*,*,*,1,1,1,*,*,*")
6373 (set_attr "prefix" "orig,orig,vex,orig,orig,vex,maybe_vex,orig,orig")
6374 (set_attr "mode" "V4SF,V4SF,V4SF,V4SF,V4SF,V4SF,SF,DI,DI")])
6784c6e0 6375
d6023b50
UB
6376;; ??? In theory we can match memory for the MMX alternative, but allowing
6377;; nonimmediate_operand for operand 2 and *not* allowing memory for the SSE
6378;; alternatives pretty much forces the MMX alternative to be chosen.
fcc9fe1e 6379(define_insn "*vec_concatv2sf_sse"
d6023b50
UB
6380 [(set (match_operand:V2SF 0 "register_operand" "=x,x,*y,*y")
6381 (vec_concat:V2SF
6382 (match_operand:SF 1 "nonimmediate_operand" " 0,m, 0, m")
6383 (match_operand:SF 2 "reg_or_0_operand" " x,C,*y, C")))]
6384 "TARGET_SSE"
6385 "@
6386 unpcklps\t{%2, %0|%0, %2}
6387 movss\t{%1, %0|%0, %1}
6388 punpckldq\t{%2, %0|%0, %2}
6389 movd\t{%1, %0|%0, %1}"
6390 [(set_attr "type" "sselog,ssemov,mmxcvt,mmxmov")
6391 (set_attr "mode" "V4SF,SF,DI,DI")])
d9987fb4 6392
51e7f377 6393(define_insn "*vec_concatv4sf"
3729983c 6394 [(set (match_operand:V4SF 0 "register_operand" "=x,x,x,x")
d6023b50 6395 (vec_concat:V4SF
3729983c
UB
6396 (match_operand:V2SF 1 "register_operand" " 0,x,0,x")
6397 (match_operand:V2SF 2 "nonimmediate_operand" " x,x,m,m")))]
d6023b50
UB
6398 "TARGET_SSE"
6399 "@
6400 movlhps\t{%2, %0|%0, %2}
3729983c 6401 vmovlhps\t{%2, %1, %0|%0, %1, %2}
eabb5f48
UB
6402 movhps\t{%2, %0|%0, %q2}
6403 vmovhps\t{%2, %1, %0|%0, %1, %q2}"
3729983c
UB
6404 [(set_attr "isa" "noavx,avx,noavx,avx")
6405 (set_attr "type" "ssemov")
6406 (set_attr "prefix" "orig,vex,orig,vex")
6407 (set_attr "mode" "V4SF,V4SF,V2SF,V2SF")])
d9987fb4 6408
f8caa3a8 6409(define_expand "vec_init<mode>"
82e86dc6
UB
6410 [(match_operand:V_128 0 "register_operand")
6411 (match_operand 1)]
d6023b50 6412 "TARGET_SSE"
d9987fb4 6413{
d6023b50 6414 ix86_expand_vector_init (false, operands[0], operands[1]);
d9987fb4
UB
6415 DONE;
6416})
6417
3729983c
UB
6418;; Avoid combining registers from different units in a single alternative,
6419;; see comment above inline_secondary_memory_needed function in i386.c
aad61732 6420(define_insn "vec_set<mode>_0"
6bec6c98 6421 [(set (match_operand:VI4F_128 0 "nonimmediate_operand"
98321768 6422 "=Yr,*v,v,Yi,x,x,v,Yr ,*x ,x ,m ,m ,m")
6bec6c98
UB
6423 (vec_merge:VI4F_128
6424 (vec_duplicate:VI4F_128
3729983c 6425 (match_operand:<ssescalarmode> 2 "general_operand"
98321768 6426 " Yr,*v,m,r ,m,x,v,*rm,*rm,*rm,!x,!*re,!*fF"))
6bec6c98 6427 (match_operand:VI4F_128 1 "vector_move_operand"
45392c76 6428 " C , C,C,C ,C,0,v,0 ,0 ,x ,0 ,0 ,0")
5e04b3b6 6429 (const_int 1)))]
aad61732 6430 "TARGET_SSE"
5e04b3b6 6431 "@
45392c76 6432 %vinsertps\t{$0xe, %d2, %0|%0, %d2, 0xe}
3729983c
UB
6433 %vinsertps\t{$0xe, %d2, %0|%0, %d2, 0xe}
6434 %vmov<ssescalarmodesuffix>\t{%2, %0|%0, %2}
6435 %vmovd\t{%2, %0|%0, %2}
5e04b3b6 6436 movss\t{%2, %0|%0, %2}
aad61732 6437 movss\t{%2, %0|%0, %2}
3729983c 6438 vmovss\t{%2, %1, %0|%0, %1, %2}
5e04b3b6 6439 pinsrd\t{$0, %2, %0|%0, %2, 0}
45392c76 6440 pinsrd\t{$0, %2, %0|%0, %2, 0}
3729983c
UB
6441 vpinsrd\t{$0, %2, %1, %0|%0, %1, %2, 0}
6442 #
6443 #
5e04b3b6 6444 #"
45392c76 6445 [(set_attr "isa" "sse4,sse4,sse2,sse2,noavx,noavx,avx,sse4_noavx,sse4_noavx,avx,*,*,*")
ba94c7af 6446 (set (attr "type")
45392c76 6447 (cond [(eq_attr "alternative" "0,1,7,8,9")
ba94c7af 6448 (const_string "sselog")
45392c76 6449 (eq_attr "alternative" "11")
ba94c7af 6450 (const_string "imov")
45392c76 6451 (eq_attr "alternative" "12")
29ebe616 6452 (const_string "fmov")
ba94c7af
UB
6453 ]
6454 (const_string "ssemov")))
45392c76
IE
6455 (set_attr "prefix_extra" "*,*,*,*,*,*,*,1,1,1,*,*,*")
6456 (set_attr "length_immediate" "*,*,*,*,*,*,*,1,1,1,*,*,*")
6457 (set_attr "prefix" "maybe_vex,maybe_vex,maybe_vex,maybe_vex,orig,orig,vex,orig,orig,vex,*,*,*")
6458 (set_attr "mode" "SF,SF,<ssescalarmode>,SI,SF,SF,SF,TI,TI,TI,*,*,*")])
d6023b50
UB
6459
6460;; A subset is vec_setv4sf.
6461(define_insn "*vec_setv4sf_sse4_1"
45392c76 6462 [(set (match_operand:V4SF 0 "register_operand" "=Yr,*x,x")
d6023b50
UB
6463 (vec_merge:V4SF
6464 (vec_duplicate:V4SF
45392c76
IE
6465 (match_operand:SF 2 "nonimmediate_operand" "Yrm,*xm,xm"))
6466 (match_operand:V4SF 1 "register_operand" "0,0,x")
82e86dc6 6467 (match_operand:SI 3 "const_int_operand")))]
51e7f377
UB
6468 "TARGET_SSE4_1
6469 && ((unsigned) exact_log2 (INTVAL (operands[3]))
6470 < GET_MODE_NUNITS (V4SFmode))"
d9987fb4 6471{
d6023b50 6472 operands[3] = GEN_INT (exact_log2 (INTVAL (operands[3])) << 4);
3729983c
UB
6473 switch (which_alternative)
6474 {
6475 case 0:
3729983c 6476 case 1:
45392c76
IE
6477 return "insertps\t{%3, %2, %0|%0, %2, %3}";
6478 case 2:
3729983c
UB
6479 return "vinsertps\t{%3, %2, %1, %0|%0, %1, %2, %3}";
6480 default:
6481 gcc_unreachable ();
6482 }
d6023b50 6483}
45392c76 6484 [(set_attr "isa" "noavx,noavx,avx")
3729983c 6485 (set_attr "type" "sselog")
45392c76 6486 (set_attr "prefix_data16" "1,1,*")
725fd454
JJ
6487 (set_attr "prefix_extra" "1")
6488 (set_attr "length_immediate" "1")
45392c76 6489 (set_attr "prefix" "orig,orig,vex")
95879c72
L
6490 (set_attr "mode" "V4SF")])
6491
d6023b50 6492(define_insn "sse4_1_insertps"
45392c76
IE
6493 [(set (match_operand:V4SF 0 "register_operand" "=Yr,*x,x")
6494 (unspec:V4SF [(match_operand:V4SF 2 "nonimmediate_operand" "Yrm,*xm,xm")
6495 (match_operand:V4SF 1 "register_operand" "0,0,x")
6496 (match_operand:SI 3 "const_0_to_255_operand" "n,n,n")]
d6023b50
UB
6497 UNSPEC_INSERTPS))]
6498 "TARGET_SSE4_1"
09db7afe
JJ
6499{
6500 if (MEM_P (operands[2]))
6501 {
6502 unsigned count_s = INTVAL (operands[3]) >> 6;
6503 if (count_s)
6504 operands[3] = GEN_INT (INTVAL (operands[3]) & 0x3f);
6505 operands[2] = adjust_address_nv (operands[2], SFmode, count_s * 4);
6506 }
6507 switch (which_alternative)
6508 {
6509 case 0:
09db7afe 6510 case 1:
45392c76
IE
6511 return "insertps\t{%3, %2, %0|%0, %2, %3}";
6512 case 2:
09db7afe
JJ
6513 return "vinsertps\t{%3, %2, %1, %0|%0, %1, %2, %3}";
6514 default:
6515 gcc_unreachable ();
6516 }
6517}
45392c76 6518 [(set_attr "isa" "noavx,noavx,avx")
3729983c 6519 (set_attr "type" "sselog")
45392c76 6520 (set_attr "prefix_data16" "1,1,*")
d6023b50 6521 (set_attr "prefix_extra" "1")
725fd454 6522 (set_attr "length_immediate" "1")
45392c76 6523 (set_attr "prefix" "orig,orig,vex")
d6023b50 6524 (set_attr "mode" "V4SF")])
d9987fb4 6525
d6023b50 6526(define_split
82e86dc6 6527 [(set (match_operand:VI4F_128 0 "memory_operand")
6bec6c98
UB
6528 (vec_merge:VI4F_128
6529 (vec_duplicate:VI4F_128
82e86dc6 6530 (match_operand:<ssescalarmode> 1 "nonmemory_operand"))
d6023b50
UB
6531 (match_dup 0)
6532 (const_int 1)))]
6533 "TARGET_SSE && reload_completed"
0b013847
UB
6534 [(set (match_dup 0) (match_dup 1))]
6535 "operands[0] = adjust_address (operands[0], <ssescalarmode>mode, 0);")
d9987fb4 6536
349587b8 6537(define_expand "vec_set<mode>"
82e86dc6
UB
6538 [(match_operand:V 0 "register_operand")
6539 (match_operand:<ssescalarmode> 1 "register_operand")
6540 (match_operand 2 "const_int_operand")]
d6023b50 6541 "TARGET_SSE"
d9987fb4 6542{
d6023b50
UB
6543 ix86_expand_vector_set (false, operands[0], operands[1],
6544 INTVAL (operands[2]));
6545 DONE;
d9987fb4
UB
6546})
6547
d6023b50 6548(define_insn_and_split "*vec_extractv4sf_0"
a3d4a22b 6549 [(set (match_operand:SF 0 "nonimmediate_operand" "=x,m,f,r")
d6023b50 6550 (vec_select:SF
a3d4a22b 6551 (match_operand:V4SF 1 "nonimmediate_operand" "xm,x,m,m")
d6023b50
UB
6552 (parallel [(const_int 0)])))]
6553 "TARGET_SSE && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
6554 "#"
6555 "&& reload_completed"
36c4015b 6556 [(set (match_dup 0) (match_dup 1))]
8115817b 6557{
36c4015b
UB
6558 if (REG_P (operands[1]))
6559 operands[1] = gen_rtx_REG (SFmode, REGNO (operands[1]));
d6023b50 6560 else
36c4015b 6561 operands[1] = adjust_address (operands[1], SFmode, 0);
8115817b
UB
6562})
6563
424c8389 6564(define_insn_and_split "*sse4_1_extractps"
45392c76 6565 [(set (match_operand:SF 0 "nonimmediate_operand" "=rm,rm,x,x")
424c8389 6566 (vec_select:SF
45392c76
IE
6567 (match_operand:V4SF 1 "register_operand" "Yr,*x,0,x")
6568 (parallel [(match_operand:SI 2 "const_0_to_3_operand" "n,n,n,n")])))]
424c8389
UB
6569 "TARGET_SSE4_1"
6570 "@
45392c76 6571 %vextractps\t{%2, %1, %0|%0, %1, %2}
424c8389
UB
6572 %vextractps\t{%2, %1, %0|%0, %1, %2}
6573 #
6574 #"
6575 "&& reload_completed && SSE_REG_P (operands[0])"
6576 [(const_int 0)]
6577{
6578 rtx dest = gen_rtx_REG (V4SFmode, REGNO (operands[0]));
6579 switch (INTVAL (operands[2]))
6580 {
6581 case 1:
6582 case 3:
6583 emit_insn (gen_sse_shufps_v4sf (dest, operands[1], operands[1],
6584 operands[2], operands[2],
6585 GEN_INT (INTVAL (operands[2]) + 4),
6586 GEN_INT (INTVAL (operands[2]) + 4)));
6587 break;
6588 case 2:
6589 emit_insn (gen_vec_interleave_highv4sf (dest, operands[1], operands[1]));
6590 break;
6591 default:
6592 /* 0 should be handled by the *vec_extractv4sf_0 pattern above. */
6593 gcc_unreachable ();
6594 }
6595 DONE;
6596}
45392c76
IE
6597 [(set_attr "isa" "*,*,noavx,avx")
6598 (set_attr "type" "sselog,sselog,*,*")
6599 (set_attr "prefix_data16" "1,1,*,*")
6600 (set_attr "prefix_extra" "1,1,*,*")
6601 (set_attr "length_immediate" "1,1,*,*")
6602 (set_attr "prefix" "maybe_vex,maybe_vex,*,*")
6603 (set_attr "mode" "V4SF,V4SF,*,*")])
424c8389 6604
3095685e 6605(define_insn_and_split "*vec_extractv4sf_mem"
424c8389 6606 [(set (match_operand:SF 0 "register_operand" "=x,*r,f")
3095685e
UB
6607 (vec_select:SF
6608 (match_operand:V4SF 1 "memory_operand" "o,o,o")
6609 (parallel [(match_operand 2 "const_0_to_3_operand" "n,n,n")])))]
424c8389
UB
6610 "TARGET_SSE"
6611 "#"
6612 "&& reload_completed"
0b013847 6613 [(set (match_dup 0) (match_dup 1))]
424c8389 6614{
0b013847 6615 operands[1] = adjust_address (operands[1], SFmode, INTVAL (operands[2]) * 4);
424c8389
UB
6616})
6617
0774c160
AI
6618(define_mode_attr extract_type
6619 [(V16SF "avx512f") (V16SI "avx512f") (V8DF "avx512dq") (V8DI "avx512dq")])
6620
6621(define_mode_attr extract_suf
6622 [(V16SF "32x4") (V16SI "32x4") (V8DF "64x2") (V8DI "64x2")])
6623
6624(define_mode_iterator AVX512_VEC
6625 [(V8DF "TARGET_AVX512DQ") (V8DI "TARGET_AVX512DQ") V16SF V16SI])
6626
6627(define_expand "<extract_type>_vextract<shuffletype><extract_suf>_mask"
47490470 6628 [(match_operand:<ssequartermode> 0 "nonimmediate_operand")
0774c160 6629 (match_operand:AVX512_VEC 1 "register_operand")
47490470
AI
6630 (match_operand:SI 2 "const_0_to_3_operand")
6631 (match_operand:<ssequartermode> 3 "nonimmediate_operand")
6632 (match_operand:QI 4 "register_operand")]
6633 "TARGET_AVX512F"
6634{
0774c160
AI
6635 int mask;
6636 mask = INTVAL (operands[2]);
6637
47490470
AI
6638 if (MEM_P (operands[0]) && GET_CODE (operands[3]) == CONST_VECTOR)
6639 operands[0] = force_reg (<ssequartermode>mode, operands[0]);
0774c160
AI
6640
6641 if (<MODE>mode == V16SImode || <MODE>mode == V16SFmode)
6642 emit_insn (gen_avx512f_vextract<shuffletype>32x4_1_mask (operands[0],
6643 operands[1], GEN_INT (mask * 4), GEN_INT (mask * 4 + 1),
6644 GEN_INT (mask * 4 + 2), GEN_INT (mask * 4 + 3), operands[3],
6645 operands[4]));
6646 else
6647 emit_insn (gen_avx512dq_vextract<shuffletype>64x2_1_mask (operands[0],
6648 operands[1], GEN_INT (mask * 2), GEN_INT (mask * 2 + 1), operands[3],
6649 operands[4]));
47490470
AI
6650 DONE;
6651})
6652
0774c160
AI
6653(define_insn "avx512dq_vextract<shuffletype>64x2_1_maskm"
6654 [(set (match_operand:<ssequartermode> 0 "memory_operand" "=m")
6655 (vec_merge:<ssequartermode>
6656 (vec_select:<ssequartermode>
6657 (match_operand:V8FI 1 "register_operand" "v")
6658 (parallel [(match_operand 2 "const_0_to_7_operand")
6659 (match_operand 3 "const_0_to_7_operand")]))
6660 (match_operand:<ssequartermode> 4 "memory_operand" "0")
6661 (match_operand:QI 5 "register_operand" "k")))]
6662 "TARGET_AVX512DQ
6663 && (INTVAL (operands[2]) % 2 == 0)
bf3b2de7
UB
6664 && (INTVAL (operands[2]) == INTVAL (operands[3]) - 1)
6665 && rtx_equal_p (operands[4], operands[0])"
0774c160
AI
6666{
6667 operands[2] = GEN_INT ((INTVAL (operands[2])) >> 1);
6668 return "vextract<shuffletype>64x2\t{%2, %1, %0%{%5%}|%0%{%5%}, %1, %2}";
6669}
6670 [(set_attr "type" "sselog")
6671 (set_attr "prefix_extra" "1")
6672 (set_attr "length_immediate" "1")
6673 (set_attr "memory" "store")
6674 (set_attr "prefix" "evex")
6675 (set_attr "mode" "<sseinsnmode>")])
6676
47490470
AI
6677(define_insn "avx512f_vextract<shuffletype>32x4_1_maskm"
6678 [(set (match_operand:<ssequartermode> 0 "memory_operand" "=m")
6679 (vec_merge:<ssequartermode>
6680 (vec_select:<ssequartermode>
6681 (match_operand:V16FI 1 "register_operand" "v")
6682 (parallel [(match_operand 2 "const_0_to_15_operand")
6683 (match_operand 3 "const_0_to_15_operand")
6684 (match_operand 4 "const_0_to_15_operand")
6685 (match_operand 5 "const_0_to_15_operand")]))
6686 (match_operand:<ssequartermode> 6 "memory_operand" "0")
be792bce 6687 (match_operand:QI 7 "register_operand" "Yk")))]
622cd23a 6688 "TARGET_AVX512F
0774c160
AI
6689 && ((INTVAL (operands[2]) % 4 == 0)
6690 && INTVAL (operands[2]) == (INTVAL (operands[3]) - 1)
622cd23a 6691 && INTVAL (operands[3]) == (INTVAL (operands[4]) - 1)
bf3b2de7
UB
6692 && INTVAL (operands[4]) == (INTVAL (operands[5]) - 1))
6693 && rtx_equal_p (operands[6], operands[0])"
47490470
AI
6694{
6695 operands[2] = GEN_INT ((INTVAL (operands[2])) >> 2);
6696 return "vextract<shuffletype>32x4\t{%2, %1, %0%{%7%}|%0%{%7%}, %1, %2}";
6697}
6698 [(set_attr "type" "sselog")
6699 (set_attr "prefix_extra" "1")
6700 (set_attr "length_immediate" "1")
6701 (set_attr "memory" "store")
6702 (set_attr "prefix" "evex")
6703 (set_attr "mode" "<sseinsnmode>")])
6704
0774c160
AI
6705(define_insn "<mask_codefor>avx512dq_vextract<shuffletype>64x2_1<mask_name>"
6706 [(set (match_operand:<ssequartermode> 0 "<store_mask_predicate>" "=<store_mask_constraint>")
6707 (vec_select:<ssequartermode>
6708 (match_operand:V8FI 1 "register_operand" "v")
6709 (parallel [(match_operand 2 "const_0_to_7_operand")
6710 (match_operand 3 "const_0_to_7_operand")])))]
6711 "TARGET_AVX512DQ && (INTVAL (operands[2]) == INTVAL (operands[3]) - 1)"
6712{
6713 operands[2] = GEN_INT ((INTVAL (operands[2])) >> 1);
6714 return "vextract<shuffletype>64x2\t{%2, %1, %0<mask_operand4>|%0<mask_operand4>, %1, %2}";
6715}
6716 [(set_attr "type" "sselog1")
6717 (set_attr "prefix_extra" "1")
6718 (set_attr "length_immediate" "1")
6719 (set_attr "prefix" "evex")
6720 (set_attr "mode" "<sseinsnmode>")])
6721
47490470
AI
6722(define_insn "<mask_codefor>avx512f_vextract<shuffletype>32x4_1<mask_name>"
6723 [(set (match_operand:<ssequartermode> 0 "<store_mask_predicate>" "=<store_mask_constraint>")
2e2206fa
AI
6724 (vec_select:<ssequartermode>
6725 (match_operand:V16FI 1 "register_operand" "v")
6726 (parallel [(match_operand 2 "const_0_to_15_operand")
6727 (match_operand 3 "const_0_to_15_operand")
6728 (match_operand 4 "const_0_to_15_operand")
6729 (match_operand 5 "const_0_to_15_operand")])))]
622cd23a
UB
6730 "TARGET_AVX512F
6731 && (INTVAL (operands[2]) == (INTVAL (operands[3]) - 1)
6732 && INTVAL (operands[3]) == (INTVAL (operands[4]) - 1)
6733 && INTVAL (operands[4]) == (INTVAL (operands[5]) - 1))"
2e2206fa
AI
6734{
6735 operands[2] = GEN_INT ((INTVAL (operands[2])) >> 2);
47490470 6736 return "vextract<shuffletype>32x4\t{%2, %1, %0<mask_operand6>|%0<mask_operand6>, %1, %2}";
2e2206fa 6737}
0774c160 6738 [(set_attr "type" "sselog1")
2e2206fa
AI
6739 (set_attr "prefix_extra" "1")
6740 (set_attr "length_immediate" "1")
2e2206fa
AI
6741 (set_attr "prefix" "evex")
6742 (set_attr "mode" "<sseinsnmode>")])
6743
0774c160
AI
6744(define_mode_attr extract_type_2
6745 [(V16SF "avx512dq") (V16SI "avx512dq") (V8DF "avx512f") (V8DI "avx512f")])
6746
6747(define_mode_attr extract_suf_2
6748 [(V16SF "32x8") (V16SI "32x8") (V8DF "64x4") (V8DI "64x4")])
6749
6750(define_mode_iterator AVX512_VEC_2
6751 [(V16SF "TARGET_AVX512DQ") (V16SI "TARGET_AVX512DQ") V8DF V8DI])
6752
6753(define_expand "<extract_type_2>_vextract<shuffletype><extract_suf_2>_mask"
47490470 6754 [(match_operand:<ssehalfvecmode> 0 "nonimmediate_operand")
0774c160 6755 (match_operand:AVX512_VEC_2 1 "register_operand")
47490470
AI
6756 (match_operand:SI 2 "const_0_to_1_operand")
6757 (match_operand:<ssehalfvecmode> 3 "nonimmediate_operand")
6758 (match_operand:QI 4 "register_operand")]
6759 "TARGET_AVX512F"
6760{
6761 rtx (*insn)(rtx, rtx, rtx, rtx);
6762
6763 if (MEM_P (operands[0]) && GET_CODE (operands[3]) == CONST_VECTOR)
6764 operands[0] = force_reg (<ssequartermode>mode, operands[0]);
6765
6766 switch (INTVAL (operands[2]))
6767 {
6768 case 0:
6769 insn = gen_vec_extract_lo_<mode>_mask;
6770 break;
6771 case 1:
6772 insn = gen_vec_extract_hi_<mode>_mask;
6773 break;
6774 default:
6775 gcc_unreachable ();
6776 }
6777
6778 emit_insn (insn (operands[0], operands[1], operands[3], operands[4]));
6779 DONE;
6780})
6781
2e2206fa
AI
6782(define_split
6783 [(set (match_operand:<ssehalfvecmode> 0 "nonimmediate_operand")
6784 (vec_select:<ssehalfvecmode>
6785 (match_operand:V8FI 1 "nonimmediate_operand")
6786 (parallel [(const_int 0) (const_int 1)
6787 (const_int 2) (const_int 3)])))]
6788 "TARGET_AVX512F && !(MEM_P (operands[0]) && MEM_P (operands[1]))
6789 && reload_completed"
6790 [(const_int 0)]
6791{
6792 rtx op1 = operands[1];
6793 if (REG_P (op1))
6794 op1 = gen_rtx_REG (<ssehalfvecmode>mode, REGNO (op1));
6795 else
6796 op1 = gen_lowpart (<ssehalfvecmode>mode, op1);
6797 emit_move_insn (operands[0], op1);
6798 DONE;
6799})
6800
47490470
AI
6801(define_insn "vec_extract_lo_<mode>_maskm"
6802 [(set (match_operand:<ssehalfvecmode> 0 "memory_operand" "=m")
6803 (vec_merge:<ssehalfvecmode>
6804 (vec_select:<ssehalfvecmode>
6805 (match_operand:V8FI 1 "register_operand" "v")
6806 (parallel [(const_int 0) (const_int 1)
6807 (const_int 2) (const_int 3)]))
6808 (match_operand:<ssehalfvecmode> 2 "memory_operand" "0")
be792bce 6809 (match_operand:QI 3 "register_operand" "Yk")))]
bf3b2de7
UB
6810 "TARGET_AVX512F
6811 && rtx_equal_p (operands[2], operands[0])"
0774c160
AI
6812 "vextract<shuffletype>64x4\t{$0x0, %1, %0%{%3%}|%0%{%3%}, %1, 0x0}"
6813 [(set_attr "type" "sselog1")
47490470
AI
6814 (set_attr "prefix_extra" "1")
6815 (set_attr "length_immediate" "1")
6816 (set_attr "prefix" "evex")
6817 (set_attr "mode" "<sseinsnmode>")])
6818
6819(define_insn "vec_extract_lo_<mode><mask_name>"
9d04ba29 6820 [(set (match_operand:<ssehalfvecmode> 0 "<store_mask_predicate>" "=<store_mask_constraint>,v")
2e2206fa 6821 (vec_select:<ssehalfvecmode>
9d04ba29 6822 (match_operand:V8FI 1 "nonimmediate_operand" "v,m")
2e2206fa
AI
6823 (parallel [(const_int 0) (const_int 1)
6824 (const_int 2) (const_int 3)])))]
6825 "TARGET_AVX512F && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
47490470
AI
6826{
6827 if (<mask_applied>)
6828 return "vextract<shuffletype>64x4\t{$0x0, %1, %0<mask_operand2>|%0<mask_operand2>, %1, 0x0}";
6829 else
6830 return "#";
6831}
0774c160 6832 [(set_attr "type" "sselog1")
2e2206fa
AI
6833 (set_attr "prefix_extra" "1")
6834 (set_attr "length_immediate" "1")
2e2206fa
AI
6835 (set_attr "prefix" "evex")
6836 (set_attr "mode" "<sseinsnmode>")])
6837
47490470
AI
6838(define_insn "vec_extract_hi_<mode>_maskm"
6839 [(set (match_operand:<ssehalfvecmode> 0 "memory_operand" "=m")
6840 (vec_merge:<ssehalfvecmode>
6841 (vec_select:<ssehalfvecmode>
6842 (match_operand:V8FI 1 "register_operand" "v")
6843 (parallel [(const_int 4) (const_int 5)
6844 (const_int 6) (const_int 7)]))
6845 (match_operand:<ssehalfvecmode> 2 "memory_operand" "0")
be792bce 6846 (match_operand:QI 3 "register_operand" "Yk")))]
bf3b2de7
UB
6847 "TARGET_AVX512F
6848 && rtx_equal_p (operands[2], operands[0])"
47490470
AI
6849 "vextract<shuffletype>64x4\t{$0x1, %1, %0%{%3%}|%0%{%3%}, %1, 0x1}"
6850 [(set_attr "type" "sselog")
6851 (set_attr "prefix_extra" "1")
6852 (set_attr "length_immediate" "1")
6853 (set_attr "memory" "store")
6854 (set_attr "prefix" "evex")
6855 (set_attr "mode" "<sseinsnmode>")])
6856
6857(define_insn "vec_extract_hi_<mode><mask_name>"
6858 [(set (match_operand:<ssehalfvecmode> 0 "<store_mask_predicate>" "=<store_mask_constraint>")
2e2206fa
AI
6859 (vec_select:<ssehalfvecmode>
6860 (match_operand:V8FI 1 "register_operand" "v")
6861 (parallel [(const_int 4) (const_int 5)
6862 (const_int 6) (const_int 7)])))]
6863 "TARGET_AVX512F"
47490470 6864 "vextract<shuffletype>64x4\t{$0x1, %1, %0<mask_operand2>|%0<mask_operand2>, %1, 0x1}"
0774c160
AI
6865 [(set_attr "type" "sselog1")
6866 (set_attr "prefix_extra" "1")
6867 (set_attr "length_immediate" "1")
6868 (set_attr "prefix" "evex")
6869 (set_attr "mode" "<sseinsnmode>")])
6870
6871(define_insn "vec_extract_hi_<mode>_maskm"
6872 [(set (match_operand:<ssehalfvecmode> 0 "memory_operand" "=m")
6873 (vec_merge:<ssehalfvecmode>
6874 (vec_select:<ssehalfvecmode>
6875 (match_operand:V16FI 1 "register_operand" "v")
6876 (parallel [(const_int 8) (const_int 9)
6877 (const_int 10) (const_int 11)
6878 (const_int 12) (const_int 13)
6879 (const_int 14) (const_int 15)]))
6880 (match_operand:<ssehalfvecmode> 2 "memory_operand" "0")
6881 (match_operand:QI 3 "register_operand" "k")))]
bf3b2de7
UB
6882 "TARGET_AVX512DQ
6883 && rtx_equal_p (operands[2], operands[0])"
0774c160
AI
6884 "vextract<shuffletype>32x8\t{$0x1, %1, %0%{%3%}|%0%{%3%}, %1, 0x1}"
6885 [(set_attr "type" "sselog1")
2e2206fa
AI
6886 (set_attr "prefix_extra" "1")
6887 (set_attr "length_immediate" "1")
2e2206fa
AI
6888 (set_attr "prefix" "evex")
6889 (set_attr "mode" "<sseinsnmode>")])
6890
0148f0b6
AI
6891(define_insn "vec_extract_hi_<mode><mask_name>"
6892 [(set (match_operand:<ssehalfvecmode> 0 "<store_mask_predicate>" "=<store_mask_constraint>,vm")
6893 (vec_select:<ssehalfvecmode>
6894 (match_operand:V16FI 1 "register_operand" "v,v")
6895 (parallel [(const_int 8) (const_int 9)
6896 (const_int 10) (const_int 11)
6897 (const_int 12) (const_int 13)
6898 (const_int 14) (const_int 15)])))]
0774c160 6899 "TARGET_AVX512F && <mask_avx512dq_condition>"
0148f0b6
AI
6900 "@
6901 vextract<shuffletype>32x8\t{$0x1, %1, %0<mask_operand2>|%0<mask_operand2>, %1, 0x1}
6902 vextracti64x4\t{$0x1, %1, %0|%0, %1, 0x1}"
6903 [(set_attr "type" "sselog1")
6904 (set_attr "prefix_extra" "1")
6905 (set_attr "isa" "avx512dq,noavx512dq")
6906 (set_attr "length_immediate" "1")
6907 (set_attr "prefix" "evex")
6908 (set_attr "mode" "<sseinsnmode>")])
6909
0774c160
AI
6910(define_expand "avx512vl_vextractf128<mode>"
6911 [(match_operand:<ssehalfvecmode> 0 "nonimmediate_operand")
6912 (match_operand:VI48F_256 1 "register_operand")
6913 (match_operand:SI 2 "const_0_to_1_operand")
6914 (match_operand:<ssehalfvecmode> 3 "vector_move_operand")
6915 (match_operand:QI 4 "register_operand")]
6916 "TARGET_AVX512DQ && TARGET_AVX512VL"
6917{
6918 rtx (*insn)(rtx, rtx, rtx, rtx);
6919
6920 if (MEM_P (operands[0]) && GET_CODE (operands[3]) == CONST_VECTOR)
6921 operands[0] = force_reg (<ssehalfvecmode>mode, operands[0]);
6922
6923 switch (INTVAL (operands[2]))
6924 {
6925 case 0:
6926 insn = gen_vec_extract_lo_<mode>_mask;
6927 break;
6928 case 1:
6929 insn = gen_vec_extract_hi_<mode>_mask;
6930 break;
6931 default:
6932 gcc_unreachable ();
6933 }
6934
6935 emit_insn (insn (operands[0], operands[1], operands[3], operands[4]));
6936 DONE;
6937})
6938
95879c72 6939(define_expand "avx_vextractf128<mode>"
82e86dc6
UB
6940 [(match_operand:<ssehalfvecmode> 0 "nonimmediate_operand")
6941 (match_operand:V_256 1 "register_operand")
6942 (match_operand:SI 2 "const_0_to_1_operand")]
95879c72
L
6943 "TARGET_AVX"
6944{
16cc4440
UB
6945 rtx (*insn)(rtx, rtx);
6946
95879c72
L
6947 switch (INTVAL (operands[2]))
6948 {
6949 case 0:
16cc4440 6950 insn = gen_vec_extract_lo_<mode>;
95879c72
L
6951 break;
6952 case 1:
16cc4440 6953 insn = gen_vec_extract_hi_<mode>;
95879c72
L
6954 break;
6955 default:
6956 gcc_unreachable ();
6957 }
16cc4440
UB
6958
6959 emit_insn (insn (operands[0], operands[1]));
95879c72
L
6960 DONE;
6961})
6962
0774c160 6963(define_insn "vec_extract_lo_<mode><mask_name>"
2e2206fa
AI
6964 [(set (match_operand:<ssehalfvecmode> 0 "nonimmediate_operand" "=v,m")
6965 (vec_select:<ssehalfvecmode>
6966 (match_operand:V16FI 1 "nonimmediate_operand" "vm,v")
6967 (parallel [(const_int 0) (const_int 1)
6968 (const_int 2) (const_int 3)
6969 (const_int 4) (const_int 5)
6970 (const_int 6) (const_int 7)])))]
0774c160
AI
6971 "TARGET_AVX512F
6972 && <mask_mode512bit_condition>
6973 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
2e2206fa 6974{
0774c160
AI
6975 if (<mask_applied>)
6976 return "vextract<shuffletype>32x8\t{$0x0, %1, %0<mask_operand2>|%0<mask_operand2>, %1, 0x0}";
6977 else
6978 return "#";
6979})
6980
6981(define_split
6982 [(set (match_operand:<ssehalfvecmode> 0 "nonimmediate_operand")
6983 (vec_select:<ssehalfvecmode>
6984 (match_operand:V16FI 1 "nonimmediate_operand")
6985 (parallel [(const_int 0) (const_int 1)
6986 (const_int 2) (const_int 3)
6987 (const_int 4) (const_int 5)
6988 (const_int 6) (const_int 7)])))]
6989 "TARGET_AVX512F && !(MEM_P (operands[0]) && MEM_P (operands[1]))
6990 && reload_completed"
6991 [(const_int 0)]
6992 {
2e2206fa
AI
6993 rtx op1 = operands[1];
6994 if (REG_P (op1))
6995 op1 = gen_rtx_REG (<ssehalfvecmode>mode, REGNO (op1));
6996 else
6997 op1 = gen_lowpart (<ssehalfvecmode>mode, op1);
6998 emit_move_insn (operands[0], op1);
6999 DONE;
7000})
7001
0774c160
AI
7002(define_insn "vec_extract_lo_<mode><mask_name>"
7003 [(set (match_operand:<ssehalfvecmode> 0 "<store_mask_predicate>" "=v,m")
cbb734aa 7004 (vec_select:<ssehalfvecmode>
0774c160 7005 (match_operand:VI8F_256 1 "nonimmediate_operand" "vm,v")
95879c72 7006 (parallel [(const_int 0) (const_int 1)])))]
0774c160
AI
7007 "TARGET_AVX
7008 && <mask_avx512vl_condition> && <mask_avx512dq_condition>
7009 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
9b2133cd 7010{
0774c160
AI
7011 if (<mask_applied>)
7012 return "vextract<shuffletype>64x2\t{$0x0, %1, %0%{%3%}|%0%{%3%}, %1, 0x0}";
9b2133cd 7013 else
0774c160
AI
7014 return "#";
7015}
7016 [(set_attr "type" "sselog")
7017 (set_attr "prefix_extra" "1")
7018 (set_attr "length_immediate" "1")
7019 (set_attr "memory" "none,store")
7020 (set_attr "prefix" "evex")
7021 (set_attr "mode" "XI")])
7022
7023(define_split
7024 [(set (match_operand:<ssehalfvecmode> 0 "nonimmediate_operand")
7025 (vec_select:<ssehalfvecmode>
7026 (match_operand:VI8F_256 1 "nonimmediate_operand")
7027 (parallel [(const_int 0) (const_int 1)])))]
7028 "TARGET_AVX && !(MEM_P (operands[0]) && MEM_P (operands[1]))
7029 && reload_completed"
7030 [(const_int 0)]
7031{
7032 rtx op1 = operands[1];
7033 if (REG_P (op1))
7034 op1 = gen_rtx_REG (<ssehalfvecmode>mode, REGNO (op1));
7035 else
7036 op1 = gen_lowpart (<ssehalfvecmode>mode, op1);
7037 emit_move_insn (operands[0], op1);
7038 DONE;
9b2133cd 7039})
95879c72 7040
0774c160
AI
7041(define_insn "vec_extract_hi_<mode><mask_name>"
7042 [(set (match_operand:<ssehalfvecmode> 0 "<store_mask_predicate>" "=v,<store_mask_constraint>")
cbb734aa 7043 (vec_select:<ssehalfvecmode>
0774c160 7044 (match_operand:VI8F_256 1 "register_operand" "v,v")
95879c72 7045 (parallel [(const_int 2) (const_int 3)])))]
7f664e31 7046 "TARGET_AVX && <mask_avx512vl_condition> && <mask_avx512dq_condition>"
0774c160 7047{
7f664e31
KY
7048 if (TARGET_AVX512VL)
7049 {
7050 if (TARGET_AVX512DQ)
7051 return "vextract<shuffletype>64x2\t{$0x1, %1, %0<mask_operand2>|%0<mask_operand2>, %1, 0x1}";
7052 else
7053 return "vextract<shuffletype>32x4\t{$0x1, %1, %0|%0, %1, 0x1}";
7054 }
0774c160
AI
7055 else
7056 return "vextract<i128>\t{$0x1, %1, %0|%0, %1, 0x1}";
7057}
95879c72 7058 [(set_attr "type" "sselog")
725fd454
JJ
7059 (set_attr "prefix_extra" "1")
7060 (set_attr "length_immediate" "1")
95879c72
L
7061 (set_attr "memory" "none,store")
7062 (set_attr "prefix" "vex")
1db4406e 7063 (set_attr "mode" "<sseinsnmode>")])
95879c72 7064
0774c160
AI
7065(define_split
7066 [(set (match_operand:<ssehalfvecmode> 0 "nonimmediate_operand")
cbb734aa 7067 (vec_select:<ssehalfvecmode>
0774c160 7068 (match_operand:VI4F_256 1 "nonimmediate_operand")
95879c72
L
7069 (parallel [(const_int 0) (const_int 1)
7070 (const_int 2) (const_int 3)])))]
0774c160
AI
7071 "TARGET_AVX && !(MEM_P (operands[0]) && MEM_P (operands[1])) && reload_completed"
7072 [(const_int 0)]
9b2133cd 7073{
0774c160
AI
7074 rtx op1 = operands[1];
7075 if (REG_P (op1))
7076 op1 = gen_rtx_REG (<ssehalfvecmode>mode, REGNO (op1));
9b2133cd 7077 else
0774c160
AI
7078 op1 = gen_lowpart (<ssehalfvecmode>mode, op1);
7079 emit_move_insn (operands[0], op1);
7080 DONE;
9b2133cd 7081})
95879c72 7082
0774c160
AI
7083
7084(define_insn "vec_extract_lo_<mode><mask_name>"
7085 [(set (match_operand:<ssehalfvecmode> 0 "<store_mask_predicate>" "=<store_mask_constraint>")
7086 (vec_select:<ssehalfvecmode>
ee768d85 7087 (match_operand:VI4F_256 1 "register_operand" "v")
0774c160
AI
7088 (parallel [(const_int 0) (const_int 1)
7089 (const_int 2) (const_int 3)])))]
7090 "TARGET_AVX && <mask_avx512vl_condition> && <mask_avx512dq_condition>"
7091{
7092 if (<mask_applied>)
7093 return "vextract<shuffletype>32x4\t{$0x0, %1, %0<mask_operand2>|%0<mask_operand2>, %1, 0x0}";
7094 else
7095 return "#";
7096}
7097 [(set_attr "type" "sselog1")
7098 (set_attr "prefix_extra" "1")
7099 (set_attr "length_immediate" "1")
7100 (set_attr "prefix" "evex")
7101 (set_attr "mode" "<sseinsnmode>")])
7102
7103(define_insn "vec_extract_lo_<mode>_maskm"
7104 [(set (match_operand:<ssehalfvecmode> 0 "memory_operand" "=m")
7105 (vec_merge:<ssehalfvecmode>
7106 (vec_select:<ssehalfvecmode>
7107 (match_operand:VI4F_256 1 "register_operand" "v")
7108 (parallel [(const_int 0) (const_int 1)
7109 (const_int 2) (const_int 3)]))
7110 (match_operand:<ssehalfvecmode> 2 "memory_operand" "0")
7111 (match_operand:QI 3 "register_operand" "k")))]
bf3b2de7
UB
7112 "TARGET_AVX512VL && TARGET_AVX512F
7113 && rtx_equal_p (operands[2], operands[0])"
7114 "vextract<shuffletype>32x4\t{$0x0, %1, %0%{%3%}|%0%{%3%}, %1, 0x0}"
0774c160
AI
7115 [(set_attr "type" "sselog1")
7116 (set_attr "prefix_extra" "1")
7117 (set_attr "length_immediate" "1")
7118 (set_attr "prefix" "evex")
7119 (set_attr "mode" "<sseinsnmode>")])
7120
7121(define_insn "vec_extract_hi_<mode>_maskm"
7122 [(set (match_operand:<ssehalfvecmode> 0 "memory_operand" "=m")
7123 (vec_merge:<ssehalfvecmode>
7124 (vec_select:<ssehalfvecmode>
7125 (match_operand:VI4F_256 1 "register_operand" "v")
7126 (parallel [(const_int 4) (const_int 5)
7127 (const_int 6) (const_int 7)]))
7128 (match_operand:<ssehalfvecmode> 2 "memory_operand" "0")
7129 (match_operand:<ssehalfvecmode> 3 "register_operand" "k")))]
bf3b2de7
UB
7130 "TARGET_AVX512F && TARGET_AVX512VL
7131 && rtx_equal_p (operands[2], operands[0])"
7132 "vextract<shuffletype>32x4\t{$0x1, %1, %0%{%3%}|%0%{%3%}, %1, 0x1}"
0774c160
AI
7133 [(set_attr "type" "sselog1")
7134 (set_attr "prefix_extra" "1")
7135 (set_attr "length_immediate" "1")
7136 (set_attr "prefix" "evex")
7137 (set_attr "mode" "<sseinsnmode>")])
7138
7139(define_insn "vec_extract_hi_<mode><mask_name>"
7140 [(set (match_operand:<ssehalfvecmode> 0 "<store_mask_predicate>" "=<store_mask_constraint>")
cbb734aa 7141 (vec_select:<ssehalfvecmode>
0774c160 7142 (match_operand:VI4F_256 1 "register_operand" "v")
95879c72
L
7143 (parallel [(const_int 4) (const_int 5)
7144 (const_int 6) (const_int 7)])))]
0774c160
AI
7145 "TARGET_AVX && <mask_avx512vl_condition>"
7146{
7147 if (TARGET_AVX512VL)
7148 return "vextract<shuffletype>32x4\t{$0x1, %1, %0<mask_operand2>|%0<mask_operand2>, %1, 0x1}";
7149 else
7150 return "vextract<i128>\t{$0x1, %1, %0|%0, %1, 0x1}";
7151}
7152 [(set_attr "type" "sselog1")
725fd454
JJ
7153 (set_attr "prefix_extra" "1")
7154 (set_attr "length_immediate" "1")
0774c160
AI
7155 (set (attr "prefix")
7156 (if_then_else
7157 (match_test "TARGET_AVX512VL")
7158 (const_string "evex")
7159 (const_string "vex")))
1db4406e 7160 (set_attr "mode" "<sseinsnmode>")])
95879c72 7161
c003c6d6
AI
7162(define_insn_and_split "vec_extract_lo_v32hi"
7163 [(set (match_operand:V16HI 0 "nonimmediate_operand" "=v,m")
7164 (vec_select:V16HI
7165 (match_operand:V32HI 1 "nonimmediate_operand" "vm,v")
7166 (parallel [(const_int 0) (const_int 1)
7167 (const_int 2) (const_int 3)
7168 (const_int 4) (const_int 5)
7169 (const_int 6) (const_int 7)
7170 (const_int 8) (const_int 9)
7171 (const_int 10) (const_int 11)
7172 (const_int 12) (const_int 13)
7173 (const_int 14) (const_int 15)])))]
7174 "TARGET_AVX512F && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
7175 "#"
7176 "&& reload_completed"
7177 [(set (match_dup 0) (match_dup 1))]
7178{
7179 if (REG_P (operands[1]))
7180 operands[1] = gen_rtx_REG (V16HImode, REGNO (operands[1]));
7181 else
7182 operands[1] = adjust_address (operands[1], V16HImode, 0);
7183})
7184
7185(define_insn "vec_extract_hi_v32hi"
7186 [(set (match_operand:V16HI 0 "nonimmediate_operand" "=v,m")
7187 (vec_select:V16HI
ee768d85 7188 (match_operand:V32HI 1 "register_operand" "v,v")
c003c6d6
AI
7189 (parallel [(const_int 16) (const_int 17)
7190 (const_int 18) (const_int 19)
7191 (const_int 20) (const_int 21)
7192 (const_int 22) (const_int 23)
7193 (const_int 24) (const_int 25)
7194 (const_int 26) (const_int 27)
7195 (const_int 28) (const_int 29)
7196 (const_int 30) (const_int 31)])))]
7197 "TARGET_AVX512F"
7198 "vextracti64x4\t{$0x1, %1, %0|%0, %1, 0x1}"
7199 [(set_attr "type" "sselog")
7200 (set_attr "prefix_extra" "1")
7201 (set_attr "length_immediate" "1")
7202 (set_attr "memory" "none,store")
7203 (set_attr "prefix" "evex")
7204 (set_attr "mode" "XI")])
7205
9b2133cd 7206(define_insn_and_split "vec_extract_lo_v16hi"
95879c72
L
7207 [(set (match_operand:V8HI 0 "nonimmediate_operand" "=x,m")
7208 (vec_select:V8HI
9b2133cd 7209 (match_operand:V16HI 1 "nonimmediate_operand" "xm,x")
95879c72
L
7210 (parallel [(const_int 0) (const_int 1)
7211 (const_int 2) (const_int 3)
7212 (const_int 4) (const_int 5)
7213 (const_int 6) (const_int 7)])))]
3b0eee5d 7214 "TARGET_AVX && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
9b2133cd
L
7215 "#"
7216 "&& reload_completed"
36c4015b 7217 [(set (match_dup 0) (match_dup 1))]
9b2133cd 7218{
36c4015b
UB
7219 if (REG_P (operands[1]))
7220 operands[1] = gen_rtx_REG (V8HImode, REGNO (operands[1]));
9b2133cd 7221 else
36c4015b 7222 operands[1] = adjust_address (operands[1], V8HImode, 0);
9b2133cd 7223})
95879c72
L
7224
7225(define_insn "vec_extract_hi_v16hi"
7226 [(set (match_operand:V8HI 0 "nonimmediate_operand" "=x,m")
7227 (vec_select:V8HI
7228 (match_operand:V16HI 1 "register_operand" "x,x")
7229 (parallel [(const_int 8) (const_int 9)
7230 (const_int 10) (const_int 11)
7231 (const_int 12) (const_int 13)
7232 (const_int 14) (const_int 15)])))]
7233 "TARGET_AVX"
1db4406e 7234 "vextract%~128\t{$0x1, %1, %0|%0, %1, 0x1}"
95879c72 7235 [(set_attr "type" "sselog")
725fd454
JJ
7236 (set_attr "prefix_extra" "1")
7237 (set_attr "length_immediate" "1")
95879c72
L
7238 (set_attr "memory" "none,store")
7239 (set_attr "prefix" "vex")
1db4406e 7240 (set_attr "mode" "OI")])
95879c72 7241
c003c6d6
AI
7242(define_insn_and_split "vec_extract_lo_v64qi"
7243 [(set (match_operand:V32QI 0 "nonimmediate_operand" "=v,m")
7244 (vec_select:V32QI
7245 (match_operand:V64QI 1 "nonimmediate_operand" "vm,v")
7246 (parallel [(const_int 0) (const_int 1)
7247 (const_int 2) (const_int 3)
7248 (const_int 4) (const_int 5)
7249 (const_int 6) (const_int 7)
7250 (const_int 8) (const_int 9)
7251 (const_int 10) (const_int 11)
7252 (const_int 12) (const_int 13)
7253 (const_int 14) (const_int 15)
7254 (const_int 16) (const_int 17)
7255 (const_int 18) (const_int 19)
7256 (const_int 20) (const_int 21)
7257 (const_int 22) (const_int 23)
7258 (const_int 24) (const_int 25)
7259 (const_int 26) (const_int 27)
7260 (const_int 28) (const_int 29)
7261 (const_int 30) (const_int 31)])))]
7262 "TARGET_AVX512F && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
7263 "#"
7264 "&& reload_completed"
7265 [(set (match_dup 0) (match_dup 1))]
7266{
7267 if (REG_P (operands[1]))
7268 operands[1] = gen_rtx_REG (V32QImode, REGNO (operands[1]));
7269 else
7270 operands[1] = adjust_address (operands[1], V32QImode, 0);
7271})
7272
7273(define_insn "vec_extract_hi_v64qi"
7274 [(set (match_operand:V32QI 0 "nonimmediate_operand" "=v,m")
7275 (vec_select:V32QI
ee768d85 7276 (match_operand:V64QI 1 "register_operand" "v,v")
c003c6d6
AI
7277 (parallel [(const_int 32) (const_int 33)
7278 (const_int 34) (const_int 35)
7279 (const_int 36) (const_int 37)
7280 (const_int 38) (const_int 39)
7281 (const_int 40) (const_int 41)
7282 (const_int 42) (const_int 43)
7283 (const_int 44) (const_int 45)
7284 (const_int 46) (const_int 47)
7285 (const_int 48) (const_int 49)
7286 (const_int 50) (const_int 51)
7287 (const_int 52) (const_int 53)
7288 (const_int 54) (const_int 55)
7289 (const_int 56) (const_int 57)
7290 (const_int 58) (const_int 59)
7291 (const_int 60) (const_int 61)
7292 (const_int 62) (const_int 63)])))]
7293 "TARGET_AVX512F"
7294 "vextracti64x4\t{$0x1, %1, %0|%0, %1, 0x1}"
7295 [(set_attr "type" "sselog")
7296 (set_attr "prefix_extra" "1")
7297 (set_attr "length_immediate" "1")
7298 (set_attr "memory" "none,store")
7299 (set_attr "prefix" "evex")
7300 (set_attr "mode" "XI")])
7301
9b2133cd 7302(define_insn_and_split "vec_extract_lo_v32qi"
95879c72
L
7303 [(set (match_operand:V16QI 0 "nonimmediate_operand" "=x,m")
7304 (vec_select:V16QI
9b2133cd 7305 (match_operand:V32QI 1 "nonimmediate_operand" "xm,x")
95879c72
L
7306 (parallel [(const_int 0) (const_int 1)
7307 (const_int 2) (const_int 3)
7308 (const_int 4) (const_int 5)
7309 (const_int 6) (const_int 7)
7310 (const_int 8) (const_int 9)
7311 (const_int 10) (const_int 11)
7312 (const_int 12) (const_int 13)
7313 (const_int 14) (const_int 15)])))]
3b0eee5d 7314 "TARGET_AVX && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
9b2133cd
L
7315 "#"
7316 "&& reload_completed"
36c4015b 7317 [(set (match_dup 0) (match_dup 1))]
9b2133cd 7318{
36c4015b
UB
7319 if (REG_P (operands[1]))
7320 operands[1] = gen_rtx_REG (V16QImode, REGNO (operands[1]));
9b2133cd 7321 else
36c4015b 7322 operands[1] = adjust_address (operands[1], V16QImode, 0);
9b2133cd 7323})
95879c72
L
7324
7325(define_insn "vec_extract_hi_v32qi"
7326 [(set (match_operand:V16QI 0 "nonimmediate_operand" "=x,m")
7327 (vec_select:V16QI
7328 (match_operand:V32QI 1 "register_operand" "x,x")
7329 (parallel [(const_int 16) (const_int 17)
7330 (const_int 18) (const_int 19)
7331 (const_int 20) (const_int 21)
7332 (const_int 22) (const_int 23)
7333 (const_int 24) (const_int 25)
7334 (const_int 26) (const_int 27)
7335 (const_int 28) (const_int 29)
7336 (const_int 30) (const_int 31)])))]
7337 "TARGET_AVX"
1db4406e 7338 "vextract%~128\t{$0x1, %1, %0|%0, %1, 0x1}"
95879c72 7339 [(set_attr "type" "sselog")
725fd454
JJ
7340 (set_attr "prefix_extra" "1")
7341 (set_attr "length_immediate" "1")
95879c72
L
7342 (set_attr "memory" "none,store")
7343 (set_attr "prefix" "vex")
1db4406e 7344 (set_attr "mode" "OI")])
95879c72 7345
6bec6c98
UB
7346;; Modes handled by vec_extract patterns.
7347(define_mode_iterator VEC_EXTRACT_MODE
0774c160
AI
7348 [(V64QI "TARGET_AVX512BW") (V32QI "TARGET_AVX") V16QI
7349 (V32HI "TARGET_AVX512BW") (V16HI "TARGET_AVX") V8HI
c003c6d6
AI
7350 (V16SI "TARGET_AVX512F") (V8SI "TARGET_AVX") V4SI
7351 (V8DI "TARGET_AVX512F") (V4DI "TARGET_AVX") V2DI
7352 (V16SF "TARGET_AVX512F") (V8SF "TARGET_AVX") V4SF
7353 (V8DF "TARGET_AVX512F") (V4DF "TARGET_AVX") V2DF])
6bec6c98 7354
80980aec 7355(define_expand "vec_extract<mode>"
82e86dc6
UB
7356 [(match_operand:<ssescalarmode> 0 "register_operand")
7357 (match_operand:VEC_EXTRACT_MODE 1 "register_operand")
7358 (match_operand 2 "const_int_operand")]
d6023b50 7359 "TARGET_SSE"
b40c4f68 7360{
d6023b50
UB
7361 ix86_expand_vector_extract (false, operands[0], operands[1],
7362 INTVAL (operands[2]));
b40c4f68
UB
7363 DONE;
7364})
7365
ef719a44
RH
7366;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
7367;;
7368;; Parallel double-precision floating point element swizzling
7369;;
7370;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
7371
47490470 7372(define_insn "<mask_codefor>avx512f_unpckhpd512<mask_name>"
c003c6d6
AI
7373 [(set (match_operand:V8DF 0 "register_operand" "=v")
7374 (vec_select:V8DF
7375 (vec_concat:V16DF
ee768d85 7376 (match_operand:V8DF 1 "register_operand" "v")
c003c6d6
AI
7377 (match_operand:V8DF 2 "nonimmediate_operand" "vm"))
7378 (parallel [(const_int 1) (const_int 9)
7379 (const_int 3) (const_int 11)
7380 (const_int 5) (const_int 13)
7381 (const_int 7) (const_int 15)])))]
7382 "TARGET_AVX512F"
47490470 7383 "vunpckhpd\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
c003c6d6
AI
7384 [(set_attr "type" "sselog")
7385 (set_attr "prefix" "evex")
7386 (set_attr "mode" "V8DF")])
7387
b0d49a6e 7388;; Recall that the 256-bit unpck insns only shuffle within their lanes.
8fd83a54
AI
7389(define_insn "avx_unpckhpd256<mask_name>"
7390 [(set (match_operand:V4DF 0 "register_operand" "=v")
95879c72
L
7391 (vec_select:V4DF
7392 (vec_concat:V8DF
8fd83a54
AI
7393 (match_operand:V4DF 1 "register_operand" "v")
7394 (match_operand:V4DF 2 "nonimmediate_operand" "vm"))
c4d3f42f 7395 (parallel [(const_int 1) (const_int 5)
95879c72 7396 (const_int 3) (const_int 7)])))]
8fd83a54
AI
7397 "TARGET_AVX && <mask_avx512vl_condition>"
7398 "vunpckhpd\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
95879c72
L
7399 [(set_attr "type" "sselog")
7400 (set_attr "prefix" "vex")
7401 (set_attr "mode" "V4DF")])
7402
1e27129f
L
7403(define_expand "vec_interleave_highv4df"
7404 [(set (match_dup 3)
7405 (vec_select:V4DF
7406 (vec_concat:V8DF
7407 (match_operand:V4DF 1 "register_operand" "x")
7408 (match_operand:V4DF 2 "nonimmediate_operand" "xm"))
7409 (parallel [(const_int 0) (const_int 4)
7410 (const_int 2) (const_int 6)])))
7411 (set (match_dup 4)
7412 (vec_select:V4DF
7413 (vec_concat:V8DF
7414 (match_dup 1)
7415 (match_dup 2))
7416 (parallel [(const_int 1) (const_int 5)
7417 (const_int 3) (const_int 7)])))
82e86dc6 7418 (set (match_operand:V4DF 0 "register_operand")
2a4337c0
UB
7419 (vec_select:V4DF
7420 (vec_concat:V8DF
1e27129f 7421 (match_dup 3)
2a4337c0
UB
7422 (match_dup 4))
7423 (parallel [(const_int 2) (const_int 3)
7424 (const_int 6) (const_int 7)])))]
1e27129f
L
7425 "TARGET_AVX"
7426{
7427 operands[3] = gen_reg_rtx (V4DFmode);
7428 operands[4] = gen_reg_rtx (V4DFmode);
7429})
7430
7431
8fd83a54
AI
7432(define_insn "avx512vl_unpckhpd128_mask"
7433 [(set (match_operand:V2DF 0 "register_operand" "=v")
7434 (vec_merge:V2DF
7435 (vec_select:V2DF
7436 (vec_concat:V4DF
7437 (match_operand:V2DF 1 "register_operand" "v")
7438 (match_operand:V2DF 2 "nonimmediate_operand" "vm"))
7439 (parallel [(const_int 1) (const_int 3)]))
7440 (match_operand:V2DF 3 "vector_move_operand" "0C")
7441 (match_operand:QI 4 "register_operand" "Yk")))]
7442 "TARGET_AVX512VL"
7443 "vunpckhpd\t{%2, %1, %0%{%4%}%N3|%0%{%4%}%N3, %1, %2}"
7444 [(set_attr "type" "sselog")
7445 (set_attr "prefix" "evex")
7446 (set_attr "mode" "V2DF")])
7447
b0d49a6e 7448(define_expand "vec_interleave_highv2df"
82e86dc6 7449 [(set (match_operand:V2DF 0 "register_operand")
ffbaf337
UB
7450 (vec_select:V2DF
7451 (vec_concat:V4DF
82e86dc6
UB
7452 (match_operand:V2DF 1 "nonimmediate_operand")
7453 (match_operand:V2DF 2 "nonimmediate_operand"))
ffbaf337
UB
7454 (parallel [(const_int 1)
7455 (const_int 3)])))]
7456 "TARGET_SSE2"
5e04b3b6
RH
7457{
7458 if (!ix86_vec_interleave_v2df_operator_ok (operands, 1))
7459 operands[2] = force_reg (V2DFmode, operands[2]);
7460})
ffbaf337 7461
4e76acd2 7462(define_insn "*vec_interleave_highv2df"
a02f398d 7463 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,x,x,x,m")
5e04b3b6
RH
7464 (vec_select:V2DF
7465 (vec_concat:V4DF
a02f398d
UB
7466 (match_operand:V2DF 1 "nonimmediate_operand" " 0,x,o,o,o,x")
7467 (match_operand:V2DF 2 "nonimmediate_operand" " x,x,1,0,x,0"))
5e04b3b6
RH
7468 (parallel [(const_int 1)
7469 (const_int 3)])))]
4e76acd2 7470 "TARGET_SSE2 && ix86_vec_interleave_v2df_operator_ok (operands, 1)"
5e04b3b6
RH
7471 "@
7472 unpckhpd\t{%2, %0|%0, %2}
3729983c
UB
7473 vunpckhpd\t{%2, %1, %0|%0, %1, %2}
7474 %vmovddup\t{%H1, %0|%0, %H1}
5e04b3b6 7475 movlpd\t{%H1, %0|%0, %H1}
3729983c 7476 vmovlpd\t{%H1, %2, %0|%0, %2, %H1}
eabb5f48 7477 %vmovhpd\t{%1, %0|%q0, %1}"
a02f398d 7478 [(set_attr "isa" "noavx,avx,sse3,noavx,avx,*")
f220a4f4
JJ
7479 (set_attr "type" "sselog,sselog,sselog,ssemov,ssemov,ssemov")
7480 (set_attr "ssememalign" "64")
3729983c
UB
7481 (set_attr "prefix_data16" "*,*,*,1,*,1")
7482 (set_attr "prefix" "orig,vex,maybe_vex,orig,vex,maybe_vex")
7b0fe4f4 7483 (set_attr "mode" "V2DF,V2DF,DF,V1DF,V1DF,V1DF")])
95879c72 7484
47490470 7485(define_expand "avx512f_movddup512<mask_name>"
c003c6d6
AI
7486 [(set (match_operand:V8DF 0 "register_operand")
7487 (vec_select:V8DF
7488 (vec_concat:V16DF
7489 (match_operand:V8DF 1 "nonimmediate_operand")
7490 (match_dup 1))
7491 (parallel [(const_int 0) (const_int 8)
7492 (const_int 2) (const_int 10)
7493 (const_int 4) (const_int 12)
7494 (const_int 6) (const_int 14)])))]
7495 "TARGET_AVX512F")
7496
47490470 7497(define_expand "avx512f_unpcklpd512<mask_name>"
c003c6d6
AI
7498 [(set (match_operand:V8DF 0 "register_operand")
7499 (vec_select:V8DF
7500 (vec_concat:V16DF
7501 (match_operand:V8DF 1 "register_operand")
7502 (match_operand:V8DF 2 "nonimmediate_operand"))
7503 (parallel [(const_int 0) (const_int 8)
7504 (const_int 2) (const_int 10)
7505 (const_int 4) (const_int 12)
7506 (const_int 6) (const_int 14)])))]
7507 "TARGET_AVX512F")
7508
47490470 7509(define_insn "*avx512f_unpcklpd512<mask_name>"
c003c6d6
AI
7510 [(set (match_operand:V8DF 0 "register_operand" "=v,v")
7511 (vec_select:V8DF
7512 (vec_concat:V16DF
7513 (match_operand:V8DF 1 "nonimmediate_operand" "vm, v")
7514 (match_operand:V8DF 2 "nonimmediate_operand" "1 ,vm"))
7515 (parallel [(const_int 0) (const_int 8)
7516 (const_int 2) (const_int 10)
7517 (const_int 4) (const_int 12)
7518 (const_int 6) (const_int 14)])))]
7519 "TARGET_AVX512F"
7520 "@
47490470
AI
7521 vmovddup\t{%1, %0<mask_operand3>|%0<mask_operand3>, %1}
7522 vunpcklpd\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
c003c6d6
AI
7523 [(set_attr "type" "sselog")
7524 (set_attr "prefix" "evex")
7525 (set_attr "mode" "V8DF")])
7526
5e04b3b6 7527;; Recall that the 256-bit unpck insns only shuffle within their lanes.
8fd83a54 7528(define_expand "avx_movddup256<mask_name>"
82e86dc6 7529 [(set (match_operand:V4DF 0 "register_operand")
95879c72
L
7530 (vec_select:V4DF
7531 (vec_concat:V8DF
82e86dc6 7532 (match_operand:V4DF 1 "nonimmediate_operand")
95879c72 7533 (match_dup 1))
5e04b3b6
RH
7534 (parallel [(const_int 0) (const_int 4)
7535 (const_int 2) (const_int 6)])))]
8fd83a54 7536 "TARGET_AVX && <mask_avx512vl_condition>")
ef719a44 7537
8fd83a54 7538(define_expand "avx_unpcklpd256<mask_name>"
82e86dc6 7539 [(set (match_operand:V4DF 0 "register_operand")
95879c72
L
7540 (vec_select:V4DF
7541 (vec_concat:V8DF
82e86dc6
UB
7542 (match_operand:V4DF 1 "register_operand")
7543 (match_operand:V4DF 2 "nonimmediate_operand"))
95879c72 7544 (parallel [(const_int 0) (const_int 4)
c4d3f42f 7545 (const_int 2) (const_int 6)])))]
8fd83a54 7546 "TARGET_AVX && <mask_avx512vl_condition>")
5e04b3b6 7547
8fd83a54
AI
7548(define_insn "*avx_unpcklpd256<mask_name>"
7549 [(set (match_operand:V4DF 0 "register_operand" "=v,v")
5e04b3b6
RH
7550 (vec_select:V4DF
7551 (vec_concat:V8DF
8fd83a54
AI
7552 (match_operand:V4DF 1 "nonimmediate_operand" " v,m")
7553 (match_operand:V4DF 2 "nonimmediate_operand" "vm,1"))
5e04b3b6
RH
7554 (parallel [(const_int 0) (const_int 4)
7555 (const_int 2) (const_int 6)])))]
8fd83a54 7556 "TARGET_AVX && <mask_avx512vl_condition>"
5e04b3b6 7557 "@
8fd83a54
AI
7558 vunpcklpd\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}
7559 vmovddup\t{%1, %0<mask_operand3>|%0<mask_operand3>, %1}"
95879c72
L
7560 [(set_attr "type" "sselog")
7561 (set_attr "prefix" "vex")
7562 (set_attr "mode" "V4DF")])
7563
1e27129f
L
7564(define_expand "vec_interleave_lowv4df"
7565 [(set (match_dup 3)
7566 (vec_select:V4DF
7567 (vec_concat:V8DF
7568 (match_operand:V4DF 1 "register_operand" "x")
7569 (match_operand:V4DF 2 "nonimmediate_operand" "xm"))
7570 (parallel [(const_int 0) (const_int 4)
7571 (const_int 2) (const_int 6)])))
7572 (set (match_dup 4)
7573 (vec_select:V4DF
7574 (vec_concat:V8DF
7575 (match_dup 1)
7576 (match_dup 2))
7577 (parallel [(const_int 1) (const_int 5)
7578 (const_int 3) (const_int 7)])))
82e86dc6 7579 (set (match_operand:V4DF 0 "register_operand")
2a4337c0
UB
7580 (vec_select:V4DF
7581 (vec_concat:V8DF
1e27129f 7582 (match_dup 3)
2a4337c0
UB
7583 (match_dup 4))
7584 (parallel [(const_int 0) (const_int 1)
977e83a3 7585 (const_int 4) (const_int 5)])))]
1e27129f
L
7586 "TARGET_AVX"
7587{
7588 operands[3] = gen_reg_rtx (V4DFmode);
7589 operands[4] = gen_reg_rtx (V4DFmode);
7590})
7591
8fd83a54
AI
7592(define_insn "avx512vl_unpcklpd128_mask"
7593 [(set (match_operand:V2DF 0 "register_operand" "=v")
7594 (vec_merge:V2DF
7595 (vec_select:V2DF
7596 (vec_concat:V4DF
7597 (match_operand:V2DF 1 "register_operand" "v")
7598 (match_operand:V2DF 2 "nonimmediate_operand" "vm"))
7599 (parallel [(const_int 0) (const_int 2)]))
7600 (match_operand:V2DF 3 "vector_move_operand" "0C")
7601 (match_operand:QI 4 "register_operand" "Yk")))]
7602 "TARGET_AVX512VL"
7603 "vunpcklpd\t{%2, %1, %0%{%4%}%N3|%0%{%4%}%N3, %1, %2}"
7604 [(set_attr "type" "sselog")
7605 (set_attr "prefix" "evex")
7606 (set_attr "mode" "V2DF")])
7607
b0d49a6e 7608(define_expand "vec_interleave_lowv2df"
82e86dc6 7609 [(set (match_operand:V2DF 0 "register_operand")
ffbaf337
UB
7610 (vec_select:V2DF
7611 (vec_concat:V4DF
82e86dc6
UB
7612 (match_operand:V2DF 1 "nonimmediate_operand")
7613 (match_operand:V2DF 2 "nonimmediate_operand"))
ffbaf337
UB
7614 (parallel [(const_int 0)
7615 (const_int 2)])))]
7616 "TARGET_SSE2"
5e04b3b6
RH
7617{
7618 if (!ix86_vec_interleave_v2df_operator_ok (operands, 0))
7619 operands[1] = force_reg (V2DFmode, operands[1]);
7620})
ffbaf337 7621
4e76acd2 7622(define_insn "*vec_interleave_lowv2df"
a02f398d 7623 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,x,x,x,o")
5e04b3b6
RH
7624 (vec_select:V2DF
7625 (vec_concat:V4DF
a02f398d
UB
7626 (match_operand:V2DF 1 "nonimmediate_operand" " 0,x,m,0,x,0")
7627 (match_operand:V2DF 2 "nonimmediate_operand" " x,x,1,m,m,x"))
5e04b3b6
RH
7628 (parallel [(const_int 0)
7629 (const_int 2)])))]
4e76acd2 7630 "TARGET_SSE2 && ix86_vec_interleave_v2df_operator_ok (operands, 0)"
5e04b3b6
RH
7631 "@
7632 unpcklpd\t{%2, %0|%0, %2}
3729983c 7633 vunpcklpd\t{%2, %1, %0|%0, %1, %2}
eabb5f48
UB
7634 %vmovddup\t{%1, %0|%0, %q1}
7635 movhpd\t{%2, %0|%0, %q2}
7636 vmovhpd\t{%2, %1, %0|%0, %1, %q2}
3729983c 7637 %vmovlpd\t{%2, %H0|%H0, %2}"
a02f398d 7638 [(set_attr "isa" "noavx,avx,sse3,noavx,avx,*")
3729983c 7639 (set_attr "type" "sselog,sselog,sselog,ssemov,ssemov,ssemov")
f220a4f4 7640 (set_attr "ssememalign" "64")
3729983c
UB
7641 (set_attr "prefix_data16" "*,*,*,1,*,1")
7642 (set_attr "prefix" "orig,vex,maybe_vex,orig,vex,maybe_vex")
7b0fe4f4 7643 (set_attr "mode" "V2DF,V2DF,DF,V1DF,V1DF,V1DF")])
95879c72 7644
5e04b3b6 7645(define_split
82e86dc6 7646 [(set (match_operand:V2DF 0 "memory_operand")
5e04b3b6
RH
7647 (vec_select:V2DF
7648 (vec_concat:V4DF
82e86dc6 7649 (match_operand:V2DF 1 "register_operand")
5e04b3b6
RH
7650 (match_dup 1))
7651 (parallel [(const_int 0)
7652 (const_int 2)])))]
7653 "TARGET_SSE3 && reload_completed"
7654 [(const_int 0)]
7655{
7656 rtx low = gen_rtx_REG (DFmode, REGNO (operands[1]));
7657 emit_move_insn (adjust_address (operands[0], DFmode, 0), low);
7658 emit_move_insn (adjust_address (operands[0], DFmode, 8), low);
7659 DONE;
7660})
7661
7662(define_split
82e86dc6 7663 [(set (match_operand:V2DF 0 "register_operand")
5e04b3b6
RH
7664 (vec_select:V2DF
7665 (vec_concat:V4DF
82e86dc6 7666 (match_operand:V2DF 1 "memory_operand")
5e04b3b6 7667 (match_dup 1))
82e86dc6
UB
7668 (parallel [(match_operand:SI 2 "const_0_to_1_operand")
7669 (match_operand:SI 3 "const_int_operand")])))]
5e04b3b6
RH
7670 "TARGET_SSE3 && INTVAL (operands[2]) + 2 == INTVAL (operands[3])"
7671 [(set (match_dup 0) (vec_duplicate:V2DF (match_dup 1)))]
7672{
7673 operands[1] = adjust_address (operands[1], DFmode, INTVAL (operands[2]) * 8);
7674})
7675
075691af 7676(define_insn "avx512f_vmscalef<mode><round_name>"
afb4ac68
AI
7677 [(set (match_operand:VF_128 0 "register_operand" "=v")
7678 (vec_merge:VF_128
47490470
AI
7679 (unspec:VF_128
7680 [(match_operand:VF_128 1 "register_operand" "v")
c56a42b9 7681 (match_operand:VF_128 2 "<round_nimm_predicate>" "<round_constraint>")]
47490470 7682 UNSPEC_SCALEF)
afb4ac68
AI
7683 (match_dup 1)
7684 (const_int 1)))]
7685 "TARGET_AVX512F"
260d3642 7686 "vscalef<ssescalarmodesuffix>\t{<round_op3>%2, %1, %0|%0, %1, %2<round_op3>}"
afb4ac68
AI
7687 [(set_attr "prefix" "evex")
7688 (set_attr "mode" "<ssescalarmode>")])
7689
b040ded3
AI
7690(define_insn "<avx512>_scalef<mode><mask_name><round_name>"
7691 [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v")
7692 (unspec:VF_AVX512VL
7693 [(match_operand:VF_AVX512VL 1 "register_operand" "v")
7694 (match_operand:VF_AVX512VL 2 "nonimmediate_operand" "<round_constraint>")]
47490470 7695 UNSPEC_SCALEF))]
afb4ac68 7696 "TARGET_AVX512F"
260d3642 7697 "vscalef<ssemodesuffix>\t{<round_mask_op3>%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2<round_mask_op3>}"
afb4ac68
AI
7698 [(set_attr "prefix" "evex")
7699 (set_attr "mode" "<MODE>")])
7700
6ead0238
AI
7701(define_expand "<avx512>_vternlog<mode>_maskz"
7702 [(match_operand:VI48_AVX512VL 0 "register_operand")
7703 (match_operand:VI48_AVX512VL 1 "register_operand")
7704 (match_operand:VI48_AVX512VL 2 "register_operand")
7705 (match_operand:VI48_AVX512VL 3 "nonimmediate_operand")
8b08db1e
AI
7706 (match_operand:SI 4 "const_0_to_255_operand")
7707 (match_operand:<avx512fmaskmode> 5 "register_operand")]
7708 "TARGET_AVX512F"
7709{
6ead0238 7710 emit_insn (gen_<avx512>_vternlog<mode>_maskz_1 (
8b08db1e
AI
7711 operands[0], operands[1], operands[2], operands[3],
7712 operands[4], CONST0_RTX (<MODE>mode), operands[5]));
7713 DONE;
7714})
7715
6ead0238
AI
7716(define_insn "<avx512>_vternlog<mode><sd_maskz_name>"
7717 [(set (match_operand:VI48_AVX512VL 0 "register_operand" "=v")
7718 (unspec:VI48_AVX512VL
7719 [(match_operand:VI48_AVX512VL 1 "register_operand" "0")
7720 (match_operand:VI48_AVX512VL 2 "register_operand" "v")
7721 (match_operand:VI48_AVX512VL 3 "nonimmediate_operand" "vm")
0fe65b75
AI
7722 (match_operand:SI 4 "const_0_to_255_operand")]
7723 UNSPEC_VTERNLOG))]
7724 "TARGET_AVX512F"
8b08db1e 7725 "vpternlog<ssemodesuffix>\t{%4, %3, %2, %0<sd_mask_op5>|%0<sd_mask_op5>, %2, %3, %4}"
0fe65b75
AI
7726 [(set_attr "type" "sselog")
7727 (set_attr "prefix" "evex")
7728 (set_attr "mode" "<sseinsnmode>")])
7729
6ead0238
AI
7730(define_insn "<avx512>_vternlog<mode>_mask"
7731 [(set (match_operand:VI48_AVX512VL 0 "register_operand" "=v")
7732 (vec_merge:VI48_AVX512VL
7733 (unspec:VI48_AVX512VL
7734 [(match_operand:VI48_AVX512VL 1 "register_operand" "0")
7735 (match_operand:VI48_AVX512VL 2 "register_operand" "v")
7736 (match_operand:VI48_AVX512VL 3 "nonimmediate_operand" "vm")
47490470
AI
7737 (match_operand:SI 4 "const_0_to_255_operand")]
7738 UNSPEC_VTERNLOG)
7739 (match_dup 1)
be792bce 7740 (match_operand:<avx512fmaskmode> 5 "register_operand" "Yk")))]
47490470
AI
7741 "TARGET_AVX512F"
7742 "vpternlog<ssemodesuffix>\t{%4, %3, %2, %0%{%5%}|%0%{%5%}, %2, %3, %4}"
7743 [(set_attr "type" "sselog")
7744 (set_attr "prefix" "evex")
7745 (set_attr "mode" "<sseinsnmode>")])
7746
b040ded3
AI
7747(define_insn "<avx512>_getexp<mode><mask_name><round_saeonly_name>"
7748 [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v")
7749 (unspec:VF_AVX512VL [(match_operand:VF_AVX512VL 1 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")]
afb4ac68
AI
7750 UNSPEC_GETEXP))]
7751 "TARGET_AVX512F"
8a6ef760 7752 "vgetexp<ssemodesuffix>\t{<round_saeonly_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_saeonly_mask_op2>}";
afb4ac68
AI
7753 [(set_attr "prefix" "evex")
7754 (set_attr "mode" "<MODE>")])
7755
075691af 7756(define_insn "avx512f_sgetexp<mode><round_saeonly_name>"
afb4ac68
AI
7757 [(set (match_operand:VF_128 0 "register_operand" "=v")
7758 (vec_merge:VF_128
47490470
AI
7759 (unspec:VF_128
7760 [(match_operand:VF_128 1 "register_operand" "v")
c56a42b9 7761 (match_operand:VF_128 2 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")]
47490470 7762 UNSPEC_GETEXP)
afb4ac68
AI
7763 (match_dup 1)
7764 (const_int 1)))]
7765 "TARGET_AVX512F"
075691af 7766 "vgetexp<ssescalarmodesuffix>\t{<round_saeonly_op3>%2, %1, %0|%0, %1, %2<round_saeonly_op3>}";
afb4ac68
AI
7767 [(set_attr "prefix" "evex")
7768 (set_attr "mode" "<ssescalarmode>")])
7769
6ead0238
AI
7770(define_insn "<mask_codefor><avx512>_align<mode><mask_name>"
7771 [(set (match_operand:VI48_AVX512VL 0 "register_operand" "=v")
7772 (unspec:VI48_AVX512VL [(match_operand:VI48_AVX512VL 1 "register_operand" "v")
7773 (match_operand:VI48_AVX512VL 2 "nonimmediate_operand" "vm")
7774 (match_operand:SI 3 "const_0_to_255_operand")]
7775 UNSPEC_ALIGN))]
0fe65b75 7776 "TARGET_AVX512F"
47490470 7777 "valign<ssemodesuffix>\t{%3, %2, %1, %0<mask_operand4>|%0<mask_operand4>, %1, %2, %3}";
0fe65b75
AI
7778 [(set_attr "prefix" "evex")
7779 (set_attr "mode" "<sseinsnmode>")])
7780
47490470
AI
7781(define_expand "avx512f_shufps512_mask"
7782 [(match_operand:V16SF 0 "register_operand")
7783 (match_operand:V16SF 1 "register_operand")
7784 (match_operand:V16SF 2 "nonimmediate_operand")
7785 (match_operand:SI 3 "const_0_to_255_operand")
7786 (match_operand:V16SF 4 "register_operand")
7787 (match_operand:HI 5 "register_operand")]
7788 "TARGET_AVX512F"
7789{
7790 int mask = INTVAL (operands[3]);
7791 emit_insn (gen_avx512f_shufps512_1_mask (operands[0], operands[1], operands[2],
7792 GEN_INT ((mask >> 0) & 3),
7793 GEN_INT ((mask >> 2) & 3),
7794 GEN_INT (((mask >> 4) & 3) + 16),
7795 GEN_INT (((mask >> 6) & 3) + 16),
7796 GEN_INT (((mask >> 0) & 3) + 4),
7797 GEN_INT (((mask >> 2) & 3) + 4),
7798 GEN_INT (((mask >> 4) & 3) + 20),
7799 GEN_INT (((mask >> 6) & 3) + 20),
7800 GEN_INT (((mask >> 0) & 3) + 8),
7801 GEN_INT (((mask >> 2) & 3) + 8),
7802 GEN_INT (((mask >> 4) & 3) + 24),
7803 GEN_INT (((mask >> 6) & 3) + 24),
7804 GEN_INT (((mask >> 0) & 3) + 12),
7805 GEN_INT (((mask >> 2) & 3) + 12),
7806 GEN_INT (((mask >> 4) & 3) + 28),
7807 GEN_INT (((mask >> 6) & 3) + 28),
7808 operands[4], operands[5]));
7809 DONE;
7810})
7811
8b08db1e 7812
b040ded3
AI
7813(define_expand "<avx512>_fixupimm<mode>_maskz<round_saeonly_expand_name>"
7814 [(match_operand:VF_AVX512VL 0 "register_operand")
7815 (match_operand:VF_AVX512VL 1 "register_operand")
7816 (match_operand:VF_AVX512VL 2 "register_operand")
4de67111 7817 (match_operand:<sseintvecmode> 3 "<round_saeonly_expand_nimm_predicate>")
8b08db1e
AI
7818 (match_operand:SI 4 "const_0_to_255_operand")
7819 (match_operand:<avx512fmaskmode> 5 "register_operand")]
7820 "TARGET_AVX512F"
7821{
b040ded3 7822 emit_insn (gen_<avx512>_fixupimm<mode>_maskz_1<round_saeonly_expand_name> (
8b08db1e 7823 operands[0], operands[1], operands[2], operands[3],
4de67111
AI
7824 operands[4], CONST0_RTX (<MODE>mode), operands[5]
7825 <round_saeonly_expand_operand6>));
8b08db1e
AI
7826 DONE;
7827})
7828
b040ded3
AI
7829(define_insn "<avx512>_fixupimm<mode><sd_maskz_name><round_saeonly_name>"
7830 [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v")
7831 (unspec:VF_AVX512VL
7832 [(match_operand:VF_AVX512VL 1 "register_operand" "0")
7833 (match_operand:VF_AVX512VL 2 "register_operand" "v")
7834 (match_operand:<sseintvecmode> 3 "nonimmediate_operand" "<round_saeonly_constraint>")
afb4ac68
AI
7835 (match_operand:SI 4 "const_0_to_255_operand")]
7836 UNSPEC_FIXUPIMM))]
7837 "TARGET_AVX512F"
8a6ef760 7838 "vfixupimm<ssemodesuffix>\t{%4, <round_saeonly_sd_mask_op5>%3, %2, %0<sd_mask_op5>|%0<sd_mask_op5>, %2, %3<round_saeonly_sd_mask_op5>, %4}";
afb4ac68
AI
7839 [(set_attr "prefix" "evex")
7840 (set_attr "mode" "<MODE>")])
7841
b040ded3
AI
7842(define_insn "<avx512>_fixupimm<mode>_mask<round_saeonly_name>"
7843 [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v")
7844 (vec_merge:VF_AVX512VL
7845 (unspec:VF_AVX512VL
7846 [(match_operand:VF_AVX512VL 1 "register_operand" "0")
7847 (match_operand:VF_AVX512VL 2 "register_operand" "v")
7848 (match_operand:<sseintvecmode> 3 "nonimmediate_operand" "<round_saeonly_constraint>")
47490470
AI
7849 (match_operand:SI 4 "const_0_to_255_operand")]
7850 UNSPEC_FIXUPIMM)
7851 (match_dup 1)
be792bce 7852 (match_operand:<avx512fmaskmode> 5 "register_operand" "Yk")))]
47490470 7853 "TARGET_AVX512F"
8a6ef760 7854 "vfixupimm<ssemodesuffix>\t{%4, <round_saeonly_op6>%3, %2, %0%{%5%}|%0%{%5%}, %2, %3<round_saeonly_op6>, %4}";
47490470
AI
7855 [(set_attr "prefix" "evex")
7856 (set_attr "mode" "<MODE>")])
7857
4de67111 7858(define_expand "avx512f_sfixupimm<mode>_maskz<round_saeonly_expand_name>"
8b08db1e
AI
7859 [(match_operand:VF_128 0 "register_operand")
7860 (match_operand:VF_128 1 "register_operand")
7861 (match_operand:VF_128 2 "register_operand")
4de67111 7862 (match_operand:<sseintvecmode> 3 "<round_saeonly_expand_nimm_predicate>")
8b08db1e
AI
7863 (match_operand:SI 4 "const_0_to_255_operand")
7864 (match_operand:<avx512fmaskmode> 5 "register_operand")]
7865 "TARGET_AVX512F"
7866{
4de67111 7867 emit_insn (gen_avx512f_sfixupimm<mode>_maskz_1<round_saeonly_expand_name> (
8b08db1e 7868 operands[0], operands[1], operands[2], operands[3],
4de67111
AI
7869 operands[4], CONST0_RTX (<MODE>mode), operands[5]
7870 <round_saeonly_expand_operand6>));
8b08db1e
AI
7871 DONE;
7872})
7873
8a6ef760 7874(define_insn "avx512f_sfixupimm<mode><sd_maskz_name><round_saeonly_name>"
afb4ac68
AI
7875 [(set (match_operand:VF_128 0 "register_operand" "=v")
7876 (vec_merge:VF_128
7877 (unspec:VF_128
7878 [(match_operand:VF_128 1 "register_operand" "0")
7879 (match_operand:VF_128 2 "register_operand" "v")
8a6ef760 7880 (match_operand:<sseintvecmode> 3 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")
afb4ac68
AI
7881 (match_operand:SI 4 "const_0_to_255_operand")]
7882 UNSPEC_FIXUPIMM)
7883 (match_dup 1)
7884 (const_int 1)))]
7885 "TARGET_AVX512F"
8a6ef760 7886 "vfixupimm<ssescalarmodesuffix>\t{%4, <round_saeonly_sd_mask_op5>%3, %2, %0<sd_mask_op5>|%0<sd_mask_op5>, %2, %3<round_saeonly_sd_mask_op5>, %4}";
afb4ac68
AI
7887 [(set_attr "prefix" "evex")
7888 (set_attr "mode" "<ssescalarmode>")])
7889
8a6ef760 7890(define_insn "avx512f_sfixupimm<mode>_mask<round_saeonly_name>"
47490470
AI
7891 [(set (match_operand:VF_128 0 "register_operand" "=v")
7892 (vec_merge:VF_128
7893 (vec_merge:VF_128
7894 (unspec:VF_128
7895 [(match_operand:VF_128 1 "register_operand" "0")
7896 (match_operand:VF_128 2 "register_operand" "v")
8a6ef760 7897 (match_operand:<sseintvecmode> 3 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")
47490470
AI
7898 (match_operand:SI 4 "const_0_to_255_operand")]
7899 UNSPEC_FIXUPIMM)
7900 (match_dup 1)
7901 (const_int 1))
7902 (match_dup 1)
be792bce 7903 (match_operand:<avx512fmaskmode> 5 "register_operand" "Yk")))]
47490470 7904 "TARGET_AVX512F"
8a6ef760 7905 "vfixupimm<ssescalarmodesuffix>\t{%4, <round_saeonly_op6>%3, %2, %0%{%5%}|%0%{%5%}, %2, %3<round_saeonly_op6>, %4}";
47490470
AI
7906 [(set_attr "prefix" "evex")
7907 (set_attr "mode" "<ssescalarmode>")])
7908
b040ded3
AI
7909(define_insn "<avx512>_rndscale<mode><mask_name><round_saeonly_name>"
7910 [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v")
7911 (unspec:VF_AVX512VL
7912 [(match_operand:VF_AVX512VL 1 "nonimmediate_operand" "<round_saeonly_constraint>")
afb4ac68
AI
7913 (match_operand:SI 2 "const_0_to_255_operand")]
7914 UNSPEC_ROUND))]
ec5e777c 7915 "TARGET_AVX512F"
8a6ef760 7916 "vrndscale<ssemodesuffix>\t{%2, <round_saeonly_mask_op3>%1, %0<mask_operand3>|%0<mask_operand3>, %1<round_saeonly_mask_op3>, %2}"
ec5e777c
AI
7917 [(set_attr "length_immediate" "1")
7918 (set_attr "prefix" "evex")
7919 (set_attr "mode" "<MODE>")])
7920
075691af 7921(define_insn "avx512f_rndscale<mode><round_saeonly_name>"
afb4ac68
AI
7922 [(set (match_operand:VF_128 0 "register_operand" "=v")
7923 (vec_merge:VF_128
7924 (unspec:VF_128
7925 [(match_operand:VF_128 1 "register_operand" "v")
c56a42b9 7926 (match_operand:VF_128 2 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")
afb4ac68
AI
7927 (match_operand:SI 3 "const_0_to_255_operand")]
7928 UNSPEC_ROUND)
7929 (match_dup 1)
7930 (const_int 1)))]
7931 "TARGET_AVX512F"
075691af 7932 "vrndscale<ssescalarmodesuffix>\t{%3, <round_saeonly_op4>%2, %1, %0|%0, %1, %2<round_saeonly_op4>, %3}"
afb4ac68
AI
7933 [(set_attr "length_immediate" "1")
7934 (set_attr "prefix" "evex")
7935 (set_attr "mode" "<MODE>")])
7936
c003c6d6 7937;; One bit in mask selects 2 elements.
47490470 7938(define_insn "avx512f_shufps512_1<mask_name>"
c003c6d6
AI
7939 [(set (match_operand:V16SF 0 "register_operand" "=v")
7940 (vec_select:V16SF
7941 (vec_concat:V32SF
7942 (match_operand:V16SF 1 "register_operand" "v")
7943 (match_operand:V16SF 2 "nonimmediate_operand" "vm"))
7944 (parallel [(match_operand 3 "const_0_to_3_operand")
7945 (match_operand 4 "const_0_to_3_operand")
7946 (match_operand 5 "const_16_to_19_operand")
7947 (match_operand 6 "const_16_to_19_operand")
7948 (match_operand 7 "const_4_to_7_operand")
7949 (match_operand 8 "const_4_to_7_operand")
7950 (match_operand 9 "const_20_to_23_operand")
7951 (match_operand 10 "const_20_to_23_operand")
7952 (match_operand 11 "const_8_to_11_operand")
7953 (match_operand 12 "const_8_to_11_operand")
7954 (match_operand 13 "const_24_to_27_operand")
7955 (match_operand 14 "const_24_to_27_operand")
7956 (match_operand 15 "const_12_to_15_operand")
7957 (match_operand 16 "const_12_to_15_operand")
7958 (match_operand 17 "const_28_to_31_operand")
7959 (match_operand 18 "const_28_to_31_operand")])))]
7960 "TARGET_AVX512F
7961 && (INTVAL (operands[3]) == (INTVAL (operands[7]) - 4)
7962 && INTVAL (operands[4]) == (INTVAL (operands[8]) - 4)
7963 && INTVAL (operands[5]) == (INTVAL (operands[9]) - 4)
7964 && INTVAL (operands[6]) == (INTVAL (operands[10]) - 4)
7965 && INTVAL (operands[3]) == (INTVAL (operands[11]) - 8)
7966 && INTVAL (operands[4]) == (INTVAL (operands[12]) - 8)
7967 && INTVAL (operands[5]) == (INTVAL (operands[13]) - 8)
7968 && INTVAL (operands[6]) == (INTVAL (operands[14]) - 8)
7969 && INTVAL (operands[3]) == (INTVAL (operands[15]) - 12)
7970 && INTVAL (operands[4]) == (INTVAL (operands[16]) - 12)
7971 && INTVAL (operands[5]) == (INTVAL (operands[17]) - 12)
7972 && INTVAL (operands[6]) == (INTVAL (operands[18]) - 12))"
7973{
7974 int mask;
7975 mask = INTVAL (operands[3]);
7976 mask |= INTVAL (operands[4]) << 2;
7977 mask |= (INTVAL (operands[5]) - 16) << 4;
7978 mask |= (INTVAL (operands[6]) - 16) << 6;
7979 operands[3] = GEN_INT (mask);
7980
47490470 7981 return "vshufps\t{%3, %2, %1, %0<mask_operand19>|%0<mask_operand19>, %1, %2, %3}";
c003c6d6
AI
7982}
7983 [(set_attr "type" "sselog")
7984 (set_attr "length_immediate" "1")
7985 (set_attr "prefix" "evex")
7986 (set_attr "mode" "V16SF")])
7987
47490470
AI
7988(define_expand "avx512f_shufpd512_mask"
7989 [(match_operand:V8DF 0 "register_operand")
7990 (match_operand:V8DF 1 "register_operand")
7991 (match_operand:V8DF 2 "nonimmediate_operand")
7992 (match_operand:SI 3 "const_0_to_255_operand")
7993 (match_operand:V8DF 4 "register_operand")
7994 (match_operand:QI 5 "register_operand")]
7995 "TARGET_AVX512F"
7996{
7997 int mask = INTVAL (operands[3]);
7998 emit_insn (gen_avx512f_shufpd512_1_mask (operands[0], operands[1], operands[2],
7999 GEN_INT (mask & 1),
8000 GEN_INT (mask & 2 ? 9 : 8),
8001 GEN_INT (mask & 4 ? 3 : 2),
8002 GEN_INT (mask & 8 ? 11 : 10),
8003 GEN_INT (mask & 16 ? 5 : 4),
8004 GEN_INT (mask & 32 ? 13 : 12),
8005 GEN_INT (mask & 64 ? 7 : 6),
8006 GEN_INT (mask & 128 ? 15 : 14),
8007 operands[4], operands[5]));
8008 DONE;
8009})
8010
8011(define_insn "avx512f_shufpd512_1<mask_name>"
c003c6d6
AI
8012 [(set (match_operand:V8DF 0 "register_operand" "=v")
8013 (vec_select:V8DF
8014 (vec_concat:V16DF
8015 (match_operand:V8DF 1 "register_operand" "v")
8016 (match_operand:V8DF 2 "nonimmediate_operand" "vm"))
8017 (parallel [(match_operand 3 "const_0_to_1_operand")
8018 (match_operand 4 "const_8_to_9_operand")
8019 (match_operand 5 "const_2_to_3_operand")
8020 (match_operand 6 "const_10_to_11_operand")
8021 (match_operand 7 "const_4_to_5_operand")
8022 (match_operand 8 "const_12_to_13_operand")
8023 (match_operand 9 "const_6_to_7_operand")
8024 (match_operand 10 "const_14_to_15_operand")])))]
8025 "TARGET_AVX512F"
8026{
8027 int mask;
8028 mask = INTVAL (operands[3]);
8029 mask |= (INTVAL (operands[4]) - 8) << 1;
8030 mask |= (INTVAL (operands[5]) - 2) << 2;
8031 mask |= (INTVAL (operands[6]) - 10) << 3;
8032 mask |= (INTVAL (operands[7]) - 4) << 4;
8033 mask |= (INTVAL (operands[8]) - 12) << 5;
8034 mask |= (INTVAL (operands[9]) - 6) << 6;
8035 mask |= (INTVAL (operands[10]) - 14) << 7;
8036 operands[3] = GEN_INT (mask);
8037
47490470 8038 return "vshufpd\t{%3, %2, %1, %0<mask_operand11>|%0<mask_operand11>, %1, %2, %3}";
c003c6d6
AI
8039}
8040 [(set_attr "type" "sselog")
8041 (set_attr "length_immediate" "1")
8042 (set_attr "prefix" "evex")
8043 (set_attr "mode" "V8DF")])
8044
b2d623e5 8045(define_expand "avx_shufpd256<mask_expand4_name>"
82e86dc6
UB
8046 [(match_operand:V4DF 0 "register_operand")
8047 (match_operand:V4DF 1 "register_operand")
8048 (match_operand:V4DF 2 "nonimmediate_operand")
8049 (match_operand:SI 3 "const_int_operand")]
95879c72
L
8050 "TARGET_AVX"
8051{
8052 int mask = INTVAL (operands[3]);
b2d623e5
AI
8053 emit_insn (gen_avx_shufpd256_1<mask_expand4_name> (operands[0],
8054 operands[1],
8055 operands[2],
8056 GEN_INT (mask & 1),
8057 GEN_INT (mask & 2 ? 5 : 4),
8058 GEN_INT (mask & 4 ? 3 : 2),
8059 GEN_INT (mask & 8 ? 7 : 6)
8060 <mask_expand4_args>));
95879c72
L
8061 DONE;
8062})
8063
b2d623e5
AI
8064(define_insn "avx_shufpd256_1<mask_name>"
8065 [(set (match_operand:V4DF 0 "register_operand" "=v")
95879c72
L
8066 (vec_select:V4DF
8067 (vec_concat:V8DF
b2d623e5
AI
8068 (match_operand:V4DF 1 "register_operand" "v")
8069 (match_operand:V4DF 2 "nonimmediate_operand" "vm"))
82e86dc6
UB
8070 (parallel [(match_operand 3 "const_0_to_1_operand")
8071 (match_operand 4 "const_4_to_5_operand")
8072 (match_operand 5 "const_2_to_3_operand")
8073 (match_operand 6 "const_6_to_7_operand")])))]
b2d623e5 8074 "TARGET_AVX && <mask_avx512vl_condition>"
95879c72
L
8075{
8076 int mask;
8077 mask = INTVAL (operands[3]);
8078 mask |= (INTVAL (operands[4]) - 4) << 1;
8079 mask |= (INTVAL (operands[5]) - 2) << 2;
8080 mask |= (INTVAL (operands[6]) - 6) << 3;
8081 operands[3] = GEN_INT (mask);
8082
b2d623e5 8083 return "vshufpd\t{%3, %2, %1, %0<mask_operand7>|%0<mask_operand7>, %1, %2, %3}";
95879c72 8084}
eb2f2b44 8085 [(set_attr "type" "sseshuf")
725fd454 8086 (set_attr "length_immediate" "1")
95879c72
L
8087 (set_attr "prefix" "vex")
8088 (set_attr "mode" "V4DF")])
8089
b2d623e5 8090(define_expand "sse2_shufpd<mask_expand4_name>"
82e86dc6
UB
8091 [(match_operand:V2DF 0 "register_operand")
8092 (match_operand:V2DF 1 "register_operand")
8093 (match_operand:V2DF 2 "nonimmediate_operand")
8094 (match_operand:SI 3 "const_int_operand")]
ef719a44
RH
8095 "TARGET_SSE2"
8096{
8097 int mask = INTVAL (operands[3]);
b2d623e5
AI
8098 emit_insn (gen_sse2_shufpd_v2df<mask_expand4_name> (operands[0], operands[1],
8099 operands[2], GEN_INT (mask & 1),
8100 GEN_INT (mask & 2 ? 3 : 2)
8101 <mask_expand4_args>));
ef719a44
RH
8102 DONE;
8103})
8104
b2d623e5
AI
8105(define_insn "sse2_shufpd_v2df_mask"
8106 [(set (match_operand:V2DF 0 "register_operand" "=v")
8107 (vec_merge:V2DF
8108 (vec_select:V2DF
8109 (vec_concat:V4DF
8110 (match_operand:V2DF 1 "register_operand" "v")
8111 (match_operand:V2DF 2 "nonimmediate_operand" "vm"))
8112 (parallel [(match_operand 3 "const_0_to_1_operand")
8113 (match_operand 4 "const_2_to_3_operand")]))
8114 (match_operand:V2DF 5 "vector_move_operand" "0C")
8115 (match_operand:QI 6 "register_operand" "Yk")))]
8116 "TARGET_AVX512VL"
8117{
8118 int mask;
8119 mask = INTVAL (operands[3]);
8120 mask |= (INTVAL (operands[4]) - 2) << 1;
8121 operands[3] = GEN_INT (mask);
8122
8123 return "vshufpd\t{%3, %2, %1, %0%{%6%}%N5|%0%{6%}%N5, %1, %2, %3}";
8124}
8125 [(set_attr "type" "sseshuf")
8126 (set_attr "length_immediate" "1")
8127 (set_attr "prefix" "evex")
8128 (set_attr "mode" "V2DF")])
8129
b8aaf506 8130;; punpcklqdq and punpckhqdq are shorter than shufpd.
8cb0a27f
AI
8131(define_insn "avx2_interleave_highv4di<mask_name>"
8132 [(set (match_operand:V4DI 0 "register_operand" "=v")
977e83a3
KY
8133 (vec_select:V4DI
8134 (vec_concat:V8DI
8cb0a27f
AI
8135 (match_operand:V4DI 1 "register_operand" "v")
8136 (match_operand:V4DI 2 "nonimmediate_operand" "vm"))
977e83a3
KY
8137 (parallel [(const_int 1)
8138 (const_int 5)
8139 (const_int 3)
8140 (const_int 7)])))]
8cb0a27f
AI
8141 "TARGET_AVX2 && <mask_avx512vl_condition>"
8142 "vpunpckhqdq\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
977e83a3
KY
8143 [(set_attr "type" "sselog")
8144 (set_attr "prefix" "vex")
8145 (set_attr "mode" "OI")])
95879c72 8146
47490470 8147(define_insn "<mask_codefor>avx512f_interleave_highv8di<mask_name>"
c003c6d6
AI
8148 [(set (match_operand:V8DI 0 "register_operand" "=v")
8149 (vec_select:V8DI
8150 (vec_concat:V16DI
8151 (match_operand:V8DI 1 "register_operand" "v")
8152 (match_operand:V8DI 2 "nonimmediate_operand" "vm"))
8153 (parallel [(const_int 1) (const_int 9)
8154 (const_int 3) (const_int 11)
8155 (const_int 5) (const_int 13)
8156 (const_int 7) (const_int 15)])))]
8157 "TARGET_AVX512F"
47490470 8158 "vpunpckhqdq\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
c003c6d6
AI
8159 [(set_attr "type" "sselog")
8160 (set_attr "prefix" "evex")
8161 (set_attr "mode" "XI")])
8162
8cb0a27f
AI
8163(define_insn "vec_interleave_highv2di<mask_name>"
8164 [(set (match_operand:V2DI 0 "register_operand" "=x,v")
b8aaf506
L
8165 (vec_select:V2DI
8166 (vec_concat:V4DI
8cb0a27f
AI
8167 (match_operand:V2DI 1 "register_operand" "0,v")
8168 (match_operand:V2DI 2 "nonimmediate_operand" "xm,vm"))
b8aaf506
L
8169 (parallel [(const_int 1)
8170 (const_int 3)])))]
8cb0a27f 8171 "TARGET_SSE2 && <mask_avx512vl_condition>"
3729983c
UB
8172 "@
8173 punpckhqdq\t{%2, %0|%0, %2}
8cb0a27f 8174 vpunpckhqdq\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
3729983c
UB
8175 [(set_attr "isa" "noavx,avx")
8176 (set_attr "type" "sselog")
8177 (set_attr "prefix_data16" "1,*")
8cb0a27f 8178 (set_attr "prefix" "orig,<mask_prefix>")
95879c72
L
8179 (set_attr "mode" "TI")])
8180
8cb0a27f
AI
8181(define_insn "avx2_interleave_lowv4di<mask_name>"
8182 [(set (match_operand:V4DI 0 "register_operand" "=v")
977e83a3
KY
8183 (vec_select:V4DI
8184 (vec_concat:V8DI
8cb0a27f
AI
8185 (match_operand:V4DI 1 "register_operand" "v")
8186 (match_operand:V4DI 2 "nonimmediate_operand" "vm"))
977e83a3
KY
8187 (parallel [(const_int 0)
8188 (const_int 4)
8189 (const_int 2)
8190 (const_int 6)])))]
8cb0a27f
AI
8191 "TARGET_AVX2 && <mask_avx512vl_condition>"
8192 "vpunpcklqdq\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
977e83a3
KY
8193 [(set_attr "type" "sselog")
8194 (set_attr "prefix" "vex")
8195 (set_attr "mode" "OI")])
8196
47490470 8197(define_insn "<mask_codefor>avx512f_interleave_lowv8di<mask_name>"
c003c6d6
AI
8198 [(set (match_operand:V8DI 0 "register_operand" "=v")
8199 (vec_select:V8DI
8200 (vec_concat:V16DI
8201 (match_operand:V8DI 1 "register_operand" "v")
8202 (match_operand:V8DI 2 "nonimmediate_operand" "vm"))
8203 (parallel [(const_int 0) (const_int 8)
8204 (const_int 2) (const_int 10)
8205 (const_int 4) (const_int 12)
8206 (const_int 6) (const_int 14)])))]
8207 "TARGET_AVX512F"
47490470 8208 "vpunpcklqdq\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
c003c6d6
AI
8209 [(set_attr "type" "sselog")
8210 (set_attr "prefix" "evex")
8211 (set_attr "mode" "XI")])
8212
8cb0a27f
AI
8213(define_insn "vec_interleave_lowv2di<mask_name>"
8214 [(set (match_operand:V2DI 0 "register_operand" "=x,v")
b8aaf506
L
8215 (vec_select:V2DI
8216 (vec_concat:V4DI
8cb0a27f
AI
8217 (match_operand:V2DI 1 "register_operand" "0,v")
8218 (match_operand:V2DI 2 "nonimmediate_operand" "xm,vm"))
b8aaf506
L
8219 (parallel [(const_int 0)
8220 (const_int 2)])))]
8cb0a27f 8221 "TARGET_SSE2 && <mask_avx512vl_condition>"
3729983c
UB
8222 "@
8223 punpcklqdq\t{%2, %0|%0, %2}
8cb0a27f 8224 vpunpcklqdq\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
3729983c
UB
8225 [(set_attr "isa" "noavx,avx")
8226 (set_attr "type" "sselog")
8227 (set_attr "prefix_data16" "1,*")
8228 (set_attr "prefix" "orig,vex")
b8aaf506
L
8229 (set_attr "mode" "TI")])
8230
ba63dfb9 8231(define_insn "sse2_shufpd_<mode>"
6bec6c98
UB
8232 [(set (match_operand:VI8F_128 0 "register_operand" "=x,x")
8233 (vec_select:VI8F_128
cbb734aa 8234 (vec_concat:<ssedoublevecmode>
6bec6c98
UB
8235 (match_operand:VI8F_128 1 "register_operand" "0,x")
8236 (match_operand:VI8F_128 2 "nonimmediate_operand" "xm,xm"))
82e86dc6
UB
8237 (parallel [(match_operand 3 "const_0_to_1_operand")
8238 (match_operand 4 "const_2_to_3_operand")])))]
ef719a44
RH
8239 "TARGET_SSE2"
8240{
8241 int mask;
8242 mask = INTVAL (operands[3]);
8243 mask |= (INTVAL (operands[4]) - 2) << 1;
8244 operands[3] = GEN_INT (mask);
8245
3729983c
UB
8246 switch (which_alternative)
8247 {
8248 case 0:
8249 return "shufpd\t{%3, %2, %0|%0, %2, %3}";
8250 case 1:
8251 return "vshufpd\t{%3, %2, %1, %0|%0, %1, %2, %3}";
8252 default:
8253 gcc_unreachable ();
8254 }
ef719a44 8255}
3729983c 8256 [(set_attr "isa" "noavx,avx")
eb2f2b44 8257 (set_attr "type" "sseshuf")
725fd454 8258 (set_attr "length_immediate" "1")
3729983c 8259 (set_attr "prefix" "orig,vex")
ef719a44
RH
8260 (set_attr "mode" "V2DF")])
8261
a3d4a22b
UB
8262;; Avoid combining registers from different units in a single alternative,
8263;; see comment above inline_secondary_memory_needed function in i386.c
ef719a44 8264(define_insn "sse2_storehpd"
3729983c 8265 [(set (match_operand:DF 0 "nonimmediate_operand" "=m,x,x,x,*f,r")
ef719a44 8266 (vec_select:DF
3729983c 8267 (match_operand:V2DF 1 "nonimmediate_operand" " x,0,x,o,o,o")
ef719a44
RH
8268 (parallel [(const_int 1)])))]
8269 "TARGET_SSE2 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
8270 "@
3729983c 8271 %vmovhpd\t{%1, %0|%0, %1}
ef719a44 8272 unpckhpd\t%0, %0
3729983c 8273 vunpckhpd\t{%d1, %0|%0, %d1}
a3d4a22b
UB
8274 #
8275 #
ef719a44 8276 #"
ba94c7af 8277 [(set_attr "isa" "*,noavx,avx,*,*,*")
3729983c
UB
8278 (set_attr "type" "ssemov,sselog1,sselog1,ssemov,fmov,imov")
8279 (set (attr "prefix_data16")
8280 (if_then_else
8281 (and (eq_attr "alternative" "0")
67b2c493 8282 (not (match_test "TARGET_AVX")))
3729983c
UB
8283 (const_string "1")
8284 (const_string "*")))
8285 (set_attr "prefix" "maybe_vex,orig,vex,*,*,*")
8286 (set_attr "mode" "V1DF,V1DF,V2DF,DF,DF,DF")])
ef719a44
RH
8287
8288(define_split
82e86dc6 8289 [(set (match_operand:DF 0 "register_operand")
ef719a44 8290 (vec_select:DF
82e86dc6 8291 (match_operand:V2DF 1 "memory_operand")
ef719a44
RH
8292 (parallel [(const_int 1)])))]
8293 "TARGET_SSE2 && reload_completed"
8294 [(set (match_dup 0) (match_dup 1))]
a427621f 8295 "operands[1] = adjust_address (operands[1], DFmode, 8);")
ef719a44 8296
ba94c7af
UB
8297(define_insn "*vec_extractv2df_1_sse"
8298 [(set (match_operand:DF 0 "nonimmediate_operand" "=m,x,x")
8299 (vec_select:DF
8300 (match_operand:V2DF 1 "nonimmediate_operand" "x,x,o")
8301 (parallel [(const_int 1)])))]
8302 "!TARGET_SSE2 && TARGET_SSE
8303 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
8304 "@
eabb5f48 8305 movhps\t{%1, %0|%q0, %1}
ba94c7af
UB
8306 movhlps\t{%1, %0|%0, %1}
8307 movlps\t{%H1, %0|%0, %H1}"
8308 [(set_attr "type" "ssemov")
f220a4f4 8309 (set_attr "ssememalign" "64")
ba94c7af
UB
8310 (set_attr "mode" "V2SF,V4SF,V2SF")])
8311
a3d4a22b
UB
8312;; Avoid combining registers from different units in a single alternative,
8313;; see comment above inline_secondary_memory_needed function in i386.c
ef719a44 8314(define_insn "sse2_storelpd"
a3d4a22b 8315 [(set (match_operand:DF 0 "nonimmediate_operand" "=m,x,x,*f,r")
ef719a44 8316 (vec_select:DF
a3d4a22b 8317 (match_operand:V2DF 1 "nonimmediate_operand" " x,x,m,m,m")
ef719a44
RH
8318 (parallel [(const_int 0)])))]
8319 "TARGET_SSE2 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
8320 "@
95879c72 8321 %vmovlpd\t{%1, %0|%0, %1}
ef719a44 8322 #
a3d4a22b
UB
8323 #
8324 #
ef719a44 8325 #"
a3d4a22b 8326 [(set_attr "type" "ssemov,ssemov,ssemov,fmov,imov")
725fd454 8327 (set_attr "prefix_data16" "1,*,*,*,*")
95879c72 8328 (set_attr "prefix" "maybe_vex")
a3d4a22b 8329 (set_attr "mode" "V1DF,DF,DF,DF,DF")])
ef719a44
RH
8330
8331(define_split
82e86dc6 8332 [(set (match_operand:DF 0 "register_operand")
ef719a44 8333 (vec_select:DF
82e86dc6 8334 (match_operand:V2DF 1 "nonimmediate_operand")
ef719a44
RH
8335 (parallel [(const_int 0)])))]
8336 "TARGET_SSE2 && reload_completed"
36c4015b 8337 [(set (match_dup 0) (match_dup 1))]
ef719a44 8338{
36c4015b
UB
8339 if (REG_P (operands[1]))
8340 operands[1] = gen_rtx_REG (DFmode, REGNO (operands[1]));
eb701deb 8341 else
36c4015b 8342 operands[1] = adjust_address (operands[1], DFmode, 0);
ef719a44
RH
8343})
8344
ba94c7af
UB
8345(define_insn "*vec_extractv2df_0_sse"
8346 [(set (match_operand:DF 0 "nonimmediate_operand" "=m,x,x")
8347 (vec_select:DF
8348 (match_operand:V2DF 1 "nonimmediate_operand" "x,x,m")
8349 (parallel [(const_int 0)])))]
8350 "!TARGET_SSE2 && TARGET_SSE
8351 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
8352 "@
8353 movlps\t{%1, %0|%0, %1}
8354 movaps\t{%1, %0|%0, %1}
eabb5f48 8355 movlps\t{%1, %0|%0, %q1}"
ba94c7af
UB
8356 [(set_attr "type" "ssemov")
8357 (set_attr "mode" "V2SF,V4SF,V2SF")])
8358
3a3f9d87 8359(define_expand "sse2_loadhpd_exp"
82e86dc6 8360 [(set (match_operand:V2DF 0 "nonimmediate_operand")
ffbaf337
UB
8361 (vec_concat:V2DF
8362 (vec_select:DF
82e86dc6 8363 (match_operand:V2DF 1 "nonimmediate_operand")
ffbaf337 8364 (parallel [(const_int 0)]))
82e86dc6 8365 (match_operand:DF 2 "nonimmediate_operand")))]
ffbaf337 8366 "TARGET_SSE2"
f17aa4ad
UB
8367{
8368 rtx dst = ix86_fixup_binary_operands (UNKNOWN, V2DFmode, operands);
6cf9eb27 8369
f17aa4ad
UB
8370 emit_insn (gen_sse2_loadhpd (dst, operands[1], operands[2]));
8371
8372 /* Fix up the destination if needed. */
8373 if (dst != operands[0])
8374 emit_move_insn (operands[0], dst);
8375
8376 DONE;
8377})
ffbaf337 8378
a3d4a22b
UB
8379;; Avoid combining registers from different units in a single alternative,
8380;; see comment above inline_secondary_memory_needed function in i386.c
3a3f9d87 8381(define_insn "sse2_loadhpd"
3729983c 8382 [(set (match_operand:V2DF 0 "nonimmediate_operand"
df1f7315 8383 "=x,x,x,x,o,o ,o")
ef719a44
RH
8384 (vec_concat:V2DF
8385 (vec_select:DF
3729983c 8386 (match_operand:V2DF 1 "nonimmediate_operand"
df1f7315 8387 " 0,x,0,x,0,0 ,0")
ef719a44 8388 (parallel [(const_int 0)]))
3729983c 8389 (match_operand:DF 2 "nonimmediate_operand"
df1f7315 8390 " m,m,x,x,x,*f,r")))]
2fe4dc01 8391 "TARGET_SSE2 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
ef719a44
RH
8392 "@
8393 movhpd\t{%2, %0|%0, %2}
3729983c 8394 vmovhpd\t{%2, %1, %0|%0, %1, %2}
ef719a44 8395 unpcklpd\t{%2, %0|%0, %2}
3729983c 8396 vunpcklpd\t{%2, %1, %0|%0, %1, %2}
a3d4a22b
UB
8397 #
8398 #
ef719a44 8399 #"
ba94c7af 8400 [(set_attr "isa" "noavx,avx,noavx,avx,*,*,*")
df1f7315 8401 (set_attr "type" "ssemov,ssemov,sselog,sselog,ssemov,fmov,imov")
f220a4f4 8402 (set_attr "ssememalign" "64")
df1f7315
UB
8403 (set_attr "prefix_data16" "1,*,*,*,*,*,*")
8404 (set_attr "prefix" "orig,vex,orig,vex,*,*,*")
8405 (set_attr "mode" "V1DF,V1DF,V2DF,V2DF,DF,DF,DF")])
ef719a44
RH
8406
8407(define_split
82e86dc6 8408 [(set (match_operand:V2DF 0 "memory_operand")
ef719a44
RH
8409 (vec_concat:V2DF
8410 (vec_select:DF (match_dup 0) (parallel [(const_int 0)]))
82e86dc6 8411 (match_operand:DF 1 "register_operand")))]
ef719a44
RH
8412 "TARGET_SSE2 && reload_completed"
8413 [(set (match_dup 0) (match_dup 1))]
a427621f 8414 "operands[0] = adjust_address (operands[0], DFmode, 8);")
ef719a44 8415
3a3f9d87 8416(define_expand "sse2_loadlpd_exp"
82e86dc6 8417 [(set (match_operand:V2DF 0 "nonimmediate_operand")
ffbaf337 8418 (vec_concat:V2DF
82e86dc6 8419 (match_operand:DF 2 "nonimmediate_operand")
ffbaf337 8420 (vec_select:DF
82e86dc6 8421 (match_operand:V2DF 1 "nonimmediate_operand")
ffbaf337
UB
8422 (parallel [(const_int 1)]))))]
8423 "TARGET_SSE2"
f17aa4ad
UB
8424{
8425 rtx dst = ix86_fixup_binary_operands (UNKNOWN, V2DFmode, operands);
6cf9eb27 8426
f17aa4ad
UB
8427 emit_insn (gen_sse2_loadlpd (dst, operands[1], operands[2]));
8428
8429 /* Fix up the destination if needed. */
8430 if (dst != operands[0])
8431 emit_move_insn (operands[0], dst);
8432
8433 DONE;
8434})
ffbaf337 8435
a3d4a22b
UB
8436;; Avoid combining registers from different units in a single alternative,
8437;; see comment above inline_secondary_memory_needed function in i386.c
3a3f9d87 8438(define_insn "sse2_loadlpd"
3729983c
UB
8439 [(set (match_operand:V2DF 0 "nonimmediate_operand"
8440 "=x,x,x,x,x,x,x,x,m,m ,m")
ef719a44 8441 (vec_concat:V2DF
3729983c
UB
8442 (match_operand:DF 2 "nonimmediate_operand"
8443 " m,m,m,x,x,0,0,x,x,*f,r")
ef719a44 8444 (vec_select:DF
3729983c
UB
8445 (match_operand:V2DF 1 "vector_move_operand"
8446 " C,0,x,0,x,x,o,o,0,0 ,0")
ef719a44
RH
8447 (parallel [(const_int 1)]))))]
8448 "TARGET_SSE2 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
8449 "@
3729983c 8450 %vmovsd\t{%2, %0|%0, %2}
ef719a44 8451 movlpd\t{%2, %0|%0, %2}
3729983c 8452 vmovlpd\t{%2, %1, %0|%0, %1, %2}
ef719a44 8453 movsd\t{%2, %0|%0, %2}
3729983c 8454 vmovsd\t{%2, %1, %0|%0, %1, %2}
401e4fea 8455 shufpd\t{$2, %1, %0|%0, %1, 2}
ef719a44 8456 movhpd\t{%H1, %0|%0, %H1}
3729983c 8457 vmovhpd\t{%H1, %2, %0|%0, %2, %H1}
a3d4a22b
UB
8458 #
8459 #
ef719a44 8460 #"
ba94c7af
UB
8461 [(set_attr "isa" "*,noavx,avx,noavx,avx,noavx,noavx,avx,*,*,*")
8462 (set (attr "type")
8463 (cond [(eq_attr "alternative" "5")
8464 (const_string "sselog")
8465 (eq_attr "alternative" "9")
8466 (const_string "fmov")
8467 (eq_attr "alternative" "10")
8468 (const_string "imov")
8469 ]
8470 (const_string "ssemov")))
f220a4f4 8471 (set_attr "ssememalign" "64")
3729983c
UB
8472 (set_attr "prefix_data16" "*,1,*,*,*,*,1,*,*,*,*")
8473 (set_attr "length_immediate" "*,*,*,*,*,1,*,*,*,*,*")
8474 (set_attr "prefix" "maybe_vex,orig,vex,orig,vex,orig,orig,vex,*,*,*")
8475 (set_attr "mode" "DF,V1DF,V1DF,V1DF,V1DF,V2DF,V1DF,V1DF,DF,DF,DF")])
ef719a44
RH
8476
8477(define_split
82e86dc6 8478 [(set (match_operand:V2DF 0 "memory_operand")
ef719a44 8479 (vec_concat:V2DF
82e86dc6 8480 (match_operand:DF 1 "register_operand")
ef719a44
RH
8481 (vec_select:DF (match_dup 0) (parallel [(const_int 1)]))))]
8482 "TARGET_SSE2 && reload_completed"
8483 [(set (match_dup 0) (match_dup 1))]
be47bf24 8484 "operands[0] = adjust_address (operands[0], DFmode, 0);")
ef719a44 8485
ef719a44 8486(define_insn "sse2_movsd"
3729983c 8487 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,x,x,m,x,x,x,o")
ef719a44 8488 (vec_merge:V2DF
3729983c
UB
8489 (match_operand:V2DF 2 "nonimmediate_operand" " x,x,m,m,x,0,0,x,0")
8490 (match_operand:V2DF 1 "nonimmediate_operand" " 0,x,0,x,0,x,o,o,x")
ef719a44
RH
8491 (const_int 1)))]
8492 "TARGET_SSE2"
8493 "@
8494 movsd\t{%2, %0|%0, %2}
3729983c 8495 vmovsd\t{%2, %1, %0|%0, %1, %2}
eabb5f48
UB
8496 movlpd\t{%2, %0|%0, %q2}
8497 vmovlpd\t{%2, %1, %0|%0, %1, %q2}
8498 %vmovlpd\t{%2, %0|%q0, %2}
401e4fea 8499 shufpd\t{$2, %1, %0|%0, %1, 2}
f013cadc 8500 movhps\t{%H1, %0|%0, %H1}
3729983c
UB
8501 vmovhps\t{%H1, %2, %0|%0, %2, %H1}
8502 %vmovhps\t{%1, %H0|%H0, %1}"
ba94c7af
UB
8503 [(set_attr "isa" "noavx,avx,noavx,avx,*,noavx,noavx,avx,*")
8504 (set (attr "type")
8505 (if_then_else
8506 (eq_attr "alternative" "5")
8507 (const_string "sselog")
8508 (const_string "ssemov")))
3729983c
UB
8509 (set (attr "prefix_data16")
8510 (if_then_else
8511 (and (eq_attr "alternative" "2,4")
67b2c493 8512 (not (match_test "TARGET_AVX")))
3729983c
UB
8513 (const_string "1")
8514 (const_string "*")))
8515 (set_attr "length_immediate" "*,*,*,*,*,1,*,*,*")
f220a4f4 8516 (set_attr "ssememalign" "64")
3729983c
UB
8517 (set_attr "prefix" "orig,vex,orig,vex,maybe_vex,orig,orig,vex,maybe_vex")
8518 (set_attr "mode" "DF,DF,V1DF,V1DF,V1DF,V2DF,V1DF,V1DF,V1DF")])
ef719a44 8519
6eacd27c
AI
8520(define_insn "vec_dupv2df<mask_name>"
8521 [(set (match_operand:V2DF 0 "register_operand" "=x,v")
9ee65b55 8522 (vec_duplicate:V2DF
6eacd27c
AI
8523 (match_operand:DF 1 "nonimmediate_operand" " 0,vm")))]
8524 "TARGET_SSE2 && <mask_avx512vl_condition>"
da957891
UB
8525 "@
8526 unpcklpd\t%0, %0
6eacd27c 8527 %vmovddup\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
da957891
UB
8528 [(set_attr "isa" "noavx,sse3")
8529 (set_attr "type" "sselog1")
8530 (set_attr "prefix" "orig,maybe_vex")
7b0fe4f4 8531 (set_attr "mode" "V2DF,DF")])
eb701deb 8532
eb701deb 8533(define_insn "*vec_concatv2df"
6eacd27c 8534 [(set (match_operand:V2DF 0 "register_operand" "=x,v,v,x,x,v,x,x")
eb701deb 8535 (vec_concat:V2DF
6eacd27c
AI
8536 (match_operand:DF 1 "nonimmediate_operand" " 0,v,m,0,x,m,0,0")
8537 (match_operand:DF 2 "vector_move_operand" " x,v,1,m,m,C,x,m")))]
ee768d85
UB
8538 "TARGET_SSE
8539 && (!(MEM_P (operands[1]) && MEM_P (operands[2]))
8540 || (TARGET_SSE3 && rtx_equal_p (operands[1], operands[2])))"
eb701deb
RH
8541 "@
8542 unpcklpd\t{%2, %0|%0, %2}
3729983c 8543 vunpcklpd\t{%2, %1, %0|%0, %1, %2}
da957891 8544 %vmovddup\t{%1, %0|%0, %1}
eb701deb 8545 movhpd\t{%2, %0|%0, %2}
3729983c
UB
8546 vmovhpd\t{%2, %1, %0|%0, %1, %2}
8547 %vmovsd\t{%1, %0|%0, %1}
eb701deb
RH
8548 movlhps\t{%2, %0|%0, %2}
8549 movhps\t{%2, %0|%0, %2}"
da957891 8550 [(set_attr "isa" "sse2_noavx,avx,sse3,sse2_noavx,avx,sse2,noavx,noavx")
ba94c7af
UB
8551 (set (attr "type")
8552 (if_then_else
da957891 8553 (eq_attr "alternative" "0,1,2")
ba94c7af
UB
8554 (const_string "sselog")
8555 (const_string "ssemov")))
da957891
UB
8556 (set_attr "prefix_data16" "*,*,*,1,*,*,*,*")
8557 (set_attr "prefix" "orig,vex,maybe_vex,orig,vex,maybe_vex,orig,orig")
8558 (set_attr "mode" "V2DF,V2DF,DF,V1DF,V1DF,DF,V4SF,V2SF")])
eb701deb 8559
c003c6d6
AI
8560;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
8561;;
8562;; Parallel integer down-conversion operations
8563;;
8564;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
8565
4a90ee35 8566(define_mode_iterator PMOV_DST_MODE_1 [V16QI V16HI V8SI V8HI])
c003c6d6
AI
8567(define_mode_attr pmov_src_mode
8568 [(V16QI "V16SI") (V16HI "V16SI") (V8SI "V8DI") (V8HI "V8DI")])
8569(define_mode_attr pmov_src_lower
8570 [(V16QI "v16si") (V16HI "v16si") (V8SI "v8di") (V8HI "v8di")])
4a90ee35 8571(define_mode_attr pmov_suff_1
c003c6d6
AI
8572 [(V16QI "db") (V16HI "dw") (V8SI "qd") (V8HI "qw")])
8573
8574(define_insn "*avx512f_<code><pmov_src_lower><mode>2"
4a90ee35
AI
8575 [(set (match_operand:PMOV_DST_MODE_1 0 "nonimmediate_operand" "=v,m")
8576 (any_truncate:PMOV_DST_MODE_1
c003c6d6
AI
8577 (match_operand:<pmov_src_mode> 1 "register_operand" "v,v")))]
8578 "TARGET_AVX512F"
4a90ee35 8579 "vpmov<trunsuffix><pmov_suff_1>\t{%1, %0|%0, %1}"
c003c6d6
AI
8580 [(set_attr "type" "ssemov")
8581 (set_attr "memory" "none,store")
8582 (set_attr "prefix" "evex")
8583 (set_attr "mode" "<sseinsnmode>")])
8584
47490470 8585(define_insn "avx512f_<code><pmov_src_lower><mode>2_mask"
4a90ee35
AI
8586 [(set (match_operand:PMOV_DST_MODE_1 0 "nonimmediate_operand" "=v,m")
8587 (vec_merge:PMOV_DST_MODE_1
8588 (any_truncate:PMOV_DST_MODE_1
47490470 8589 (match_operand:<pmov_src_mode> 1 "register_operand" "v,v"))
4a90ee35 8590 (match_operand:PMOV_DST_MODE_1 2 "vector_move_operand" "0C,0")
be792bce 8591 (match_operand:<avx512fmaskmode> 3 "register_operand" "Yk,Yk")))]
47490470 8592 "TARGET_AVX512F"
4a90ee35 8593 "vpmov<trunsuffix><pmov_suff_1>\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}"
47490470
AI
8594 [(set_attr "type" "ssemov")
8595 (set_attr "memory" "none,store")
8596 (set_attr "prefix" "evex")
8597 (set_attr "mode" "<sseinsnmode>")])
8598
d256b866 8599(define_expand "avx512f_<code><pmov_src_lower><mode>2_mask_store"
4a90ee35
AI
8600 [(set (match_operand:PMOV_DST_MODE_1 0 "memory_operand")
8601 (vec_merge:PMOV_DST_MODE_1
8602 (any_truncate:PMOV_DST_MODE_1
d256b866
IT
8603 (match_operand:<pmov_src_mode> 1 "register_operand"))
8604 (match_dup 0)
8605 (match_operand:<avx512fmaskmode> 2 "register_operand")))]
8606 "TARGET_AVX512F")
8607
4a90ee35
AI
8608(define_insn "*avx512bw_<code>v32hiv32qi2"
8609 [(set (match_operand:V32QI 0 "nonimmediate_operand" "=v,m")
8610 (any_truncate:V32QI
8611 (match_operand:V32HI 1 "register_operand" "v,v")))]
8612 "TARGET_AVX512BW"
8613 "vpmov<trunsuffix>wb\t{%1, %0|%0, %1}"
8614 [(set_attr "type" "ssemov")
8615 (set_attr "memory" "none,store")
8616 (set_attr "prefix" "evex")
8617 (set_attr "mode" "XI")])
8618
8619(define_insn "avx512bw_<code>v32hiv32qi2_mask"
8620 [(set (match_operand:V32QI 0 "nonimmediate_operand" "=v,m")
8621 (vec_merge:V32QI
8622 (any_truncate:V32QI
8623 (match_operand:V32HI 1 "register_operand" "v,v"))
8624 (match_operand:V32QI 2 "vector_move_operand" "0C,0")
8625 (match_operand:SI 3 "register_operand" "Yk,Yk")))]
8626 "TARGET_AVX512BW"
8627 "vpmov<trunsuffix>wb\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}"
8628 [(set_attr "type" "ssemov")
8629 (set_attr "memory" "none,store")
8630 (set_attr "prefix" "evex")
8631 (set_attr "mode" "XI")])
8632
8633(define_expand "avx512bw_<code>v32hiv32qi2_mask_store"
8634 [(set (match_operand:V32QI 0 "nonimmediate_operand")
8635 (vec_merge:V32QI
8636 (any_truncate:V32QI
8637 (match_operand:V32HI 1 "register_operand"))
8638 (match_dup 0)
8639 (match_operand:SI 2 "register_operand")))]
8640 "TARGET_AVX512BW")
8641
8642(define_mode_iterator PMOV_DST_MODE_2
8643 [V4SI V8HI (V16QI "TARGET_AVX512BW")])
8644(define_mode_attr pmov_suff_2
8645 [(V16QI "wb") (V8HI "dw") (V4SI "qd")])
8646
8647(define_insn "*avx512vl_<code><ssedoublemodelower><mode>2"
8648 [(set (match_operand:PMOV_DST_MODE_2 0 "nonimmediate_operand" "=v,m")
8649 (any_truncate:PMOV_DST_MODE_2
8650 (match_operand:<ssedoublemode> 1 "register_operand" "v,v")))]
8651 "TARGET_AVX512VL"
8652 "vpmov<trunsuffix><pmov_suff_2>\t{%1, %0|%0, %1}"
8653 [(set_attr "type" "ssemov")
8654 (set_attr "memory" "none,store")
8655 (set_attr "prefix" "evex")
8656 (set_attr "mode" "<sseinsnmode>")])
8657
8658(define_insn "<avx512>_<code><ssedoublemodelower><mode>2_mask"
8659 [(set (match_operand:PMOV_DST_MODE_2 0 "nonimmediate_operand" "=v,m")
8660 (vec_merge:PMOV_DST_MODE_2
8661 (any_truncate:PMOV_DST_MODE_2
8662 (match_operand:<ssedoublemode> 1 "register_operand" "v,v"))
8663 (match_operand:PMOV_DST_MODE_2 2 "vector_move_operand" "0C,0")
8664 (match_operand:<avx512fmaskmode> 3 "register_operand" "Yk,Yk")))]
8665 "TARGET_AVX512VL"
8666 "vpmov<trunsuffix><pmov_suff_2>\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}"
8667 [(set_attr "type" "ssemov")
8668 (set_attr "memory" "none,store")
8669 (set_attr "prefix" "evex")
8670 (set_attr "mode" "<sseinsnmode>")])
8671
8672(define_expand "<avx512>_<code><ssedoublemodelower><mode>2_mask_store"
8673 [(set (match_operand:PMOV_DST_MODE_2 0 "nonimmediate_operand")
8674 (vec_merge:PMOV_DST_MODE_2
8675 (any_truncate:PMOV_DST_MODE_2
8676 (match_operand:<ssedoublemode> 1 "register_operand"))
8677 (match_dup 0)
8678 (match_operand:<avx512fmaskmode> 2 "register_operand")))]
8679 "TARGET_AVX512VL")
8680
8681(define_mode_iterator PMOV_SRC_MODE_3 [V4DI V2DI V8SI V4SI (V8HI "TARGET_AVX512BW")])
8682(define_mode_attr pmov_dst_3
8683 [(V4DI "V4QI") (V2DI "V2QI") (V8SI "V8QI") (V4SI "V4QI") (V8HI "V8QI")])
8684(define_mode_attr pmov_dst_zeroed_3
8685 [(V4DI "V12QI") (V2DI "V14QI") (V8SI "V8QI") (V4SI "V12QI") (V8HI "V8QI")])
8686(define_mode_attr pmov_suff_3
8687 [(V4DI "qb") (V2DI "qb") (V8SI "db") (V4SI "db") (V8HI "wb")])
8688
8689(define_insn "*avx512vl_<code><mode>v<ssescalarnum>qi2"
8690 [(set (match_operand:V16QI 0 "register_operand" "=v")
8691 (vec_concat:V16QI
8692 (any_truncate:<pmov_dst_3>
8693 (match_operand:PMOV_SRC_MODE_3 1 "register_operand" "v"))
8694 (match_operand:<pmov_dst_zeroed_3> 2 "const0_operand")))]
8695 "TARGET_AVX512VL"
8696 "vpmov<trunsuffix><pmov_suff_3>\t{%1, %0|%0, %1}"
8697 [(set_attr "type" "ssemov")
8698 (set_attr "prefix" "evex")
8699 (set_attr "mode" "TI")])
8700
8701(define_insn "*avx512vl_<code>v2div2qi2_store"
8702 [(set (match_operand:V16QI 0 "memory_operand" "=m")
8703 (vec_concat:V16QI
8704 (any_truncate:V2QI
8705 (match_operand:V2DI 1 "register_operand" "v"))
8706 (vec_select:V14QI
8707 (match_dup 0)
8708 (parallel [(const_int 2) (const_int 3)
8709 (const_int 4) (const_int 5)
8710 (const_int 6) (const_int 7)
8711 (const_int 8) (const_int 9)
8712 (const_int 10) (const_int 11)
8713 (const_int 12) (const_int 13)
8714 (const_int 14) (const_int 15)]))))]
8715 "TARGET_AVX512VL"
8716 "vpmov<trunsuffix>qb\t{%1, %0|%0, %1}"
8717 [(set_attr "type" "ssemov")
8718 (set_attr "memory" "store")
8719 (set_attr "prefix" "evex")
8720 (set_attr "mode" "TI")])
8721
8722(define_insn "avx512vl_<code>v2div2qi2_mask"
8723 [(set (match_operand:V16QI 0 "register_operand" "=v")
8724 (vec_concat:V16QI
8725 (vec_merge:V2QI
8726 (any_truncate:V2QI
8727 (match_operand:V2DI 1 "register_operand" "v"))
8728 (vec_select:V2QI
8729 (match_operand:V16QI 2 "vector_move_operand" "0C")
8730 (parallel [(const_int 0) (const_int 1)]))
8731 (match_operand:QI 3 "register_operand" "Yk"))
8732 (const_vector:V14QI [(const_int 0) (const_int 0)
8733 (const_int 0) (const_int 0)
8734 (const_int 0) (const_int 0)
8735 (const_int 0) (const_int 0)
8736 (const_int 0) (const_int 0)
8737 (const_int 0) (const_int 0)
8738 (const_int 0) (const_int 0)])))]
8739 "TARGET_AVX512VL"
8740 "vpmov<trunsuffix>qb\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}"
8741 [(set_attr "type" "ssemov")
8742 (set_attr "prefix" "evex")
8743 (set_attr "mode" "TI")])
8744
8745(define_insn "avx512vl_<code>v2div2qi2_mask_store"
8746 [(set (match_operand:V16QI 0 "memory_operand" "=m")
8747 (vec_concat:V16QI
8748 (vec_merge:V2QI
8749 (any_truncate:V2QI
8750 (match_operand:V2DI 1 "register_operand" "v"))
8751 (vec_select:V2QI
8752 (match_dup 0)
8753 (parallel [(const_int 0) (const_int 1)]))
8754 (match_operand:QI 2 "register_operand" "Yk"))
8755 (vec_select:V14QI
8756 (match_dup 0)
8757 (parallel [(const_int 2) (const_int 3)
8758 (const_int 4) (const_int 5)
8759 (const_int 6) (const_int 7)
8760 (const_int 8) (const_int 9)
8761 (const_int 10) (const_int 11)
8762 (const_int 12) (const_int 13)
8763 (const_int 14) (const_int 15)]))))]
8764 "TARGET_AVX512VL"
8765 "vpmov<trunsuffix>qb\t{%1, %0%{%2%}|%0%{%2%}, %1}"
8766 [(set_attr "type" "ssemov")
8767 (set_attr "memory" "store")
8768 (set_attr "prefix" "evex")
8769 (set_attr "mode" "TI")])
8770
8771(define_insn "*avx512vl_<code><mode>v4qi2_store"
8772 [(set (match_operand:V16QI 0 "memory_operand" "=m")
8773 (vec_concat:V16QI
8774 (any_truncate:V4QI
8775 (match_operand:VI4_128_8_256 1 "register_operand" "v"))
8776 (vec_select:V12QI
8777 (match_dup 0)
8778 (parallel [(const_int 4) (const_int 5)
8779 (const_int 6) (const_int 7)
8780 (const_int 8) (const_int 9)
8781 (const_int 10) (const_int 11)
8782 (const_int 12) (const_int 13)
8783 (const_int 14) (const_int 15)]))))]
8784 "TARGET_AVX512VL"
8785 "vpmov<trunsuffix><pmov_suff_3>\t{%1, %0|%0, %1}"
8786 [(set_attr "type" "ssemov")
8787 (set_attr "memory" "store")
8788 (set_attr "prefix" "evex")
8789 (set_attr "mode" "TI")])
8790
8791(define_insn "avx512vl_<code><mode>v4qi2_mask"
8792 [(set (match_operand:V16QI 0 "register_operand" "=v")
8793 (vec_concat:V16QI
8794 (vec_merge:V4QI
8795 (any_truncate:V4QI
8796 (match_operand:VI4_128_8_256 1 "register_operand" "v"))
8797 (vec_select:V4QI
8798 (match_operand:V16QI 2 "vector_move_operand" "0C")
8799 (parallel [(const_int 0) (const_int 1)
8800 (const_int 2) (const_int 3)]))
8801 (match_operand:QI 3 "register_operand" "Yk"))
8802 (const_vector:V12QI [(const_int 0) (const_int 0)
8803 (const_int 0) (const_int 0)
8804 (const_int 0) (const_int 0)
8805 (const_int 0) (const_int 0)
8806 (const_int 0) (const_int 0)
8807 (const_int 0) (const_int 0)])))]
8808 "TARGET_AVX512VL"
8809 "vpmov<trunsuffix><pmov_suff_3>\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}"
8810 [(set_attr "type" "ssemov")
8811 (set_attr "prefix" "evex")
8812 (set_attr "mode" "TI")])
8813
8814(define_insn "avx512vl_<code><mode>v4qi2_mask_store"
8815 [(set (match_operand:V16QI 0 "memory_operand" "=m")
8816 (vec_concat:V16QI
8817 (vec_merge:V4QI
8818 (any_truncate:V4QI
8819 (match_operand:VI4_128_8_256 1 "register_operand" "v"))
8820 (vec_select:V4QI
8821 (match_dup 0)
8822 (parallel [(const_int 0) (const_int 1)
8823 (const_int 2) (const_int 3)]))
8824 (match_operand:QI 2 "register_operand" "Yk"))
8825 (vec_select:V12QI
8826 (match_dup 0)
8827 (parallel [(const_int 4) (const_int 5)
8828 (const_int 6) (const_int 7)
8829 (const_int 8) (const_int 9)
8830 (const_int 10) (const_int 11)
8831 (const_int 12) (const_int 13)
8832 (const_int 14) (const_int 15)]))))]
8833 "TARGET_AVX512VL"
8834 "vpmov<trunsuffix><pmov_suff_3>\t{%1, %0%{%2%}|%0%{%2%}, %1}"
8835 [(set_attr "type" "ssemov")
8836 (set_attr "memory" "store")
8837 (set_attr "prefix" "evex")
8838 (set_attr "mode" "TI")])
8839
8840(define_mode_iterator VI2_128_BW_4_256
8841 [(V8HI "TARGET_AVX512BW") V8SI])
8842
8843(define_insn "*avx512vl_<code><mode>v8qi2_store"
8844 [(set (match_operand:V16QI 0 "memory_operand" "=m")
8845 (vec_concat:V16QI
8846 (any_truncate:V8QI
8847 (match_operand:VI2_128_BW_4_256 1 "register_operand" "v"))
8848 (vec_select:V8QI
8849 (match_dup 0)
8850 (parallel [(const_int 8) (const_int 9)
8851 (const_int 10) (const_int 11)
8852 (const_int 12) (const_int 13)
8853 (const_int 14) (const_int 15)]))))]
8854 "TARGET_AVX512VL"
8855 "vpmov<trunsuffix><pmov_suff_3>\t{%1, %0|%0, %1}"
8856 [(set_attr "type" "ssemov")
8857 (set_attr "memory" "store")
8858 (set_attr "prefix" "evex")
8859 (set_attr "mode" "TI")])
8860
8861(define_insn "avx512vl_<code><mode>v8qi2_mask"
8862 [(set (match_operand:V16QI 0 "register_operand" "=v")
8863 (vec_concat:V16QI
8864 (vec_merge:V8QI
8865 (any_truncate:V8QI
8866 (match_operand:VI2_128_BW_4_256 1 "register_operand" "v"))
8867 (vec_select:V8QI
8868 (match_operand:V16QI 2 "vector_move_operand" "0C")
8869 (parallel [(const_int 0) (const_int 1)
8870 (const_int 2) (const_int 3)
8871 (const_int 4) (const_int 5)
8872 (const_int 6) (const_int 7)]))
8873 (match_operand:QI 3 "register_operand" "Yk"))
8874 (const_vector:V8QI [(const_int 0) (const_int 0)
8875 (const_int 0) (const_int 0)
8876 (const_int 0) (const_int 0)
8877 (const_int 0) (const_int 0)])))]
8878 "TARGET_AVX512VL"
8879 "vpmov<trunsuffix><pmov_suff_3>\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}"
8880 [(set_attr "type" "ssemov")
8881 (set_attr "prefix" "evex")
8882 (set_attr "mode" "TI")])
8883
8884(define_insn "avx512vl_<code><mode>v8qi2_mask_store"
8885 [(set (match_operand:V16QI 0 "memory_operand" "=m")
8886 (vec_concat:V16QI
8887 (vec_merge:V8QI
8888 (any_truncate:V8QI
8889 (match_operand:VI2_128_BW_4_256 1 "register_operand" "v"))
8890 (vec_select:V8QI
8891 (match_dup 0)
8892 (parallel [(const_int 0) (const_int 1)
8893 (const_int 2) (const_int 3)
8894 (const_int 4) (const_int 5)
8895 (const_int 6) (const_int 7)]))
8896 (match_operand:QI 2 "register_operand" "Yk"))
8897 (vec_select:V8QI
8898 (match_dup 0)
8899 (parallel [(const_int 8) (const_int 9)
8900 (const_int 10) (const_int 11)
8901 (const_int 12) (const_int 13)
8902 (const_int 14) (const_int 15)]))))]
8903 "TARGET_AVX512VL"
8904 "vpmov<trunsuffix><pmov_suff_3>\t{%1, %0%{%2%}|%0%{%2%}, %1}"
8905 [(set_attr "type" "ssemov")
8906 (set_attr "memory" "store")
8907 (set_attr "prefix" "evex")
8908 (set_attr "mode" "TI")])
8909
8910(define_mode_iterator PMOV_SRC_MODE_4 [V4DI V2DI V4SI])
8911(define_mode_attr pmov_dst_4
8912 [(V4DI "V4HI") (V2DI "V2HI") (V4SI "V4HI")])
8913(define_mode_attr pmov_dst_zeroed_4
8914 [(V4DI "V4HI") (V2DI "V6HI") (V4SI "V4HI")])
8915(define_mode_attr pmov_suff_4
8916 [(V4DI "qw") (V2DI "qw") (V4SI "dw")])
8917
8918(define_insn "*avx512vl_<code><mode>v<ssescalarnum>hi2"
8919 [(set (match_operand:V8HI 0 "register_operand" "=v")
8920 (vec_concat:V8HI
8921 (any_truncate:<pmov_dst_4>
8922 (match_operand:PMOV_SRC_MODE_4 1 "register_operand" "v"))
8923 (match_operand:<pmov_dst_zeroed_4> 2 "const0_operand")))]
8924 "TARGET_AVX512VL"
8925 "vpmov<trunsuffix><pmov_suff_4>\t{%1, %0|%0, %1}"
8926 [(set_attr "type" "ssemov")
8927 (set_attr "prefix" "evex")
8928 (set_attr "mode" "TI")])
8929
8930(define_insn "*avx512vl_<code><mode>v4hi2_store"
8931 [(set (match_operand:V8HI 0 "memory_operand" "=m")
8932 (vec_concat:V8HI
8933 (any_truncate:V4HI
8934 (match_operand:VI4_128_8_256 1 "register_operand" "v"))
8935 (vec_select:V4HI
8936 (match_dup 0)
8937 (parallel [(const_int 4) (const_int 5)
8938 (const_int 6) (const_int 7)]))))]
8939 "TARGET_AVX512VL"
8940 "vpmov<trunsuffix><pmov_suff_4>\t{%1, %0|%0, %1}"
8941 [(set_attr "type" "ssemov")
8942 (set_attr "memory" "store")
8943 (set_attr "prefix" "evex")
8944 (set_attr "mode" "TI")])
8945
8946(define_insn "avx512vl_<code><mode>v4hi2_mask"
8947 [(set (match_operand:V8HI 0 "register_operand" "=v")
8948 (vec_concat:V8HI
8949 (vec_merge:V4HI
8950 (any_truncate:V4HI
8951 (match_operand:VI4_128_8_256 1 "register_operand" "v"))
8952 (vec_select:V4HI
8953 (match_operand:V8HI 2 "vector_move_operand" "0C")
8954 (parallel [(const_int 0) (const_int 1)
8955 (const_int 2) (const_int 3)]))
8956 (match_operand:QI 3 "register_operand" "Yk"))
8957 (const_vector:V4HI [(const_int 0) (const_int 0)
8958 (const_int 0) (const_int 0)])))]
8959 "TARGET_AVX512VL"
8960 "vpmov<trunsuffix><pmov_suff_4>\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}"
8961 [(set_attr "type" "ssemov")
8962 (set_attr "prefix" "evex")
8963 (set_attr "mode" "TI")])
8964
8965(define_insn "avx512vl_<code><mode>v4hi2_mask_store"
8966 [(set (match_operand:V8HI 0 "memory_operand" "=m")
8967 (vec_concat:V8HI
8968 (vec_merge:V4HI
8969 (any_truncate:V4HI
8970 (match_operand:VI4_128_8_256 1 "register_operand" "v"))
8971 (vec_select:V4HI
8972 (match_dup 0)
8973 (parallel [(const_int 0) (const_int 1)
8974 (const_int 2) (const_int 3)]))
8975 (match_operand:QI 2 "register_operand" "Yk"))
8976 (vec_select:V4HI
8977 (match_dup 0)
8978 (parallel [(const_int 4) (const_int 5)
8979 (const_int 6) (const_int 7)]))))]
8980 "TARGET_AVX512VL"
8981 "vpmov<trunsuffix><pmov_suff_4>\t{%1, %0%{%2%}|%0%{%2%}, %1}"
8982 [(set_attr "type" "ssemov")
8983 (set_attr "memory" "store")
8984 (set_attr "prefix" "evex")
8985 (set_attr "mode" "TI")])
8986
8987(define_insn "*avx512vl_<code>v2div2hi2_store"
8988 [(set (match_operand:V8HI 0 "memory_operand" "=m")
8989 (vec_concat:V8HI
8990 (any_truncate:V2HI
8991 (match_operand:V2DI 1 "register_operand" "v"))
8992 (vec_select:V6HI
8993 (match_dup 0)
8994 (parallel [(const_int 2) (const_int 3)
8995 (const_int 4) (const_int 5)
8996 (const_int 6) (const_int 7)]))))]
8997 "TARGET_AVX512VL"
8998 "vpmov<trunsuffix>qw\t{%1, %0|%0, %1}"
8999 [(set_attr "type" "ssemov")
9000 (set_attr "memory" "store")
9001 (set_attr "prefix" "evex")
9002 (set_attr "mode" "TI")])
9003
9004(define_insn "avx512vl_<code>v2div2hi2_mask"
9005 [(set (match_operand:V8HI 0 "register_operand" "=v")
9006 (vec_concat:V8HI
9007 (vec_merge:V2HI
9008 (any_truncate:V2HI
9009 (match_operand:V2DI 1 "register_operand" "v"))
9010 (vec_select:V2HI
9011 (match_operand:V8HI 2 "vector_move_operand" "0C")
9012 (parallel [(const_int 0) (const_int 1)]))
9013 (match_operand:QI 3 "register_operand" "Yk"))
9014 (const_vector:V6HI [(const_int 0) (const_int 0)
9015 (const_int 0) (const_int 0)
9016 (const_int 0) (const_int 0)])))]
9017 "TARGET_AVX512VL"
9018 "vpmov<trunsuffix>qw\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}"
9019 [(set_attr "type" "ssemov")
9020 (set_attr "prefix" "evex")
9021 (set_attr "mode" "TI")])
9022
9023(define_insn "avx512vl_<code>v2div2hi2_mask_store"
9024 [(set (match_operand:V8HI 0 "memory_operand" "=m")
9025 (vec_concat:V8HI
9026 (vec_merge:V2HI
9027 (any_truncate:V2HI
9028 (match_operand:V2DI 1 "register_operand" "v"))
9029 (vec_select:V2HI
9030 (match_dup 0)
9031 (parallel [(const_int 0) (const_int 1)]))
9032 (match_operand:QI 2 "register_operand" "Yk"))
9033 (vec_select:V6HI
9034 (match_dup 0)
9035 (parallel [(const_int 2) (const_int 3)
9036 (const_int 4) (const_int 5)
9037 (const_int 6) (const_int 7)]))))]
9038 "TARGET_AVX512VL"
9039 "vpmov<trunsuffix>qw\t{%1, %0%{%2%}|%0%{%2%}, %1}"
9040 [(set_attr "type" "ssemov")
9041 (set_attr "memory" "store")
9042 (set_attr "prefix" "evex")
9043 (set_attr "mode" "TI")])
9044
9045(define_insn "*avx512vl_<code>v2div2si2"
9046 [(set (match_operand:V4SI 0 "register_operand" "=v")
9047 (vec_concat:V4SI
9048 (any_truncate:V2SI
9049 (match_operand:V2DI 1 "register_operand" "v"))
9050 (match_operand:V2SI 2 "const0_operand")))]
9051 "TARGET_AVX512VL"
9052 "vpmov<trunsuffix>qd\t{%1, %0|%0, %1}"
9053 [(set_attr "type" "ssemov")
9054 (set_attr "prefix" "evex")
9055 (set_attr "mode" "TI")])
9056
9057(define_insn "*avx512vl_<code>v2div2si2_store"
9058 [(set (match_operand:V4SI 0 "memory_operand" "=m")
9059 (vec_concat:V4SI
9060 (any_truncate:V2SI
9061 (match_operand:V2DI 1 "register_operand" "v"))
9062 (vec_select:V2SI
9063 (match_dup 0)
9064 (parallel [(const_int 2) (const_int 3)]))))]
9065 "TARGET_AVX512VL"
9066 "vpmov<trunsuffix>qd\t{%1, %0|%0, %1}"
9067 [(set_attr "type" "ssemov")
9068 (set_attr "memory" "store")
9069 (set_attr "prefix" "evex")
9070 (set_attr "mode" "TI")])
9071
9072(define_insn "avx512vl_<code>v2div2si2_mask"
9073 [(set (match_operand:V4SI 0 "register_operand" "=v")
9074 (vec_concat:V4SI
9075 (vec_merge:V2SI
9076 (any_truncate:V2SI
9077 (match_operand:V2DI 1 "register_operand" "v"))
9078 (vec_select:V2SI
9079 (match_operand:V4SI 2 "vector_move_operand" "0C")
9080 (parallel [(const_int 0) (const_int 1)]))
9081 (match_operand:QI 3 "register_operand" "Yk"))
9082 (const_vector:V2SI [(const_int 0) (const_int 0)])))]
9083 "TARGET_AVX512VL"
9084 "vpmov<trunsuffix>qd\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}"
9085 [(set_attr "type" "ssemov")
9086 (set_attr "prefix" "evex")
9087 (set_attr "mode" "TI")])
9088
9089(define_insn "avx512vl_<code>v2div2si2_mask_store"
9090 [(set (match_operand:V4SI 0 "memory_operand" "=m")
9091 (vec_concat:V4SI
9092 (vec_merge:V2SI
9093 (any_truncate:V2SI
9094 (match_operand:V2DI 1 "register_operand" "v"))
9095 (vec_select:V2SI
9096 (match_dup 0)
9097 (parallel [(const_int 0) (const_int 1)]))
9098 (match_operand:QI 2 "register_operand" "Yk"))
9099 (vec_select:V2SI
9100 (match_dup 0)
9101 (parallel [(const_int 2) (const_int 3)]))))]
9102 "TARGET_AVX512VL"
9103 "vpmov<trunsuffix>qd\t{%1, %0%{%2%}|%0%{%2%}, %1}"
9104 [(set_attr "type" "ssemov")
9105 (set_attr "memory" "store")
9106 (set_attr "prefix" "evex")
9107 (set_attr "mode" "TI")])
9108
c003c6d6
AI
9109(define_insn "*avx512f_<code>v8div16qi2"
9110 [(set (match_operand:V16QI 0 "register_operand" "=v")
9111 (vec_concat:V16QI
9112 (any_truncate:V8QI
9113 (match_operand:V8DI 1 "register_operand" "v"))
9114 (const_vector:V8QI [(const_int 0) (const_int 0)
9115 (const_int 0) (const_int 0)
9116 (const_int 0) (const_int 0)
9117 (const_int 0) (const_int 0)])))]
9118 "TARGET_AVX512F"
9119 "vpmov<trunsuffix>qb\t{%1, %0|%0, %1}"
9120 [(set_attr "type" "ssemov")
9121 (set_attr "prefix" "evex")
9122 (set_attr "mode" "TI")])
9123
9124(define_insn "*avx512f_<code>v8div16qi2_store"
9125 [(set (match_operand:V16QI 0 "memory_operand" "=m")
9126 (vec_concat:V16QI
9127 (any_truncate:V8QI
9128 (match_operand:V8DI 1 "register_operand" "v"))
9129 (vec_select:V8QI
9130 (match_dup 0)
9131 (parallel [(const_int 8) (const_int 9)
9132 (const_int 10) (const_int 11)
9133 (const_int 12) (const_int 13)
9134 (const_int 14) (const_int 15)]))))]
9135 "TARGET_AVX512F"
9136 "vpmov<trunsuffix>qb\t{%1, %0|%0, %1}"
9137 [(set_attr "type" "ssemov")
9138 (set_attr "memory" "store")
9139 (set_attr "prefix" "evex")
9140 (set_attr "mode" "TI")])
9141
47490470
AI
9142(define_insn "avx512f_<code>v8div16qi2_mask"
9143 [(set (match_operand:V16QI 0 "register_operand" "=v")
9144 (vec_concat:V16QI
9145 (vec_merge:V8QI
9146 (any_truncate:V8QI
9147 (match_operand:V8DI 1 "register_operand" "v"))
9148 (vec_select:V8QI
9149 (match_operand:V16QI 2 "vector_move_operand" "0C")
9150 (parallel [(const_int 0) (const_int 1)
9151 (const_int 2) (const_int 3)
9152 (const_int 4) (const_int 5)
9153 (const_int 6) (const_int 7)]))
be792bce 9154 (match_operand:QI 3 "register_operand" "Yk"))
47490470
AI
9155 (const_vector:V8QI [(const_int 0) (const_int 0)
9156 (const_int 0) (const_int 0)
9157 (const_int 0) (const_int 0)
9158 (const_int 0) (const_int 0)])))]
9159 "TARGET_AVX512F"
9160 "vpmov<trunsuffix>qb\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}"
9161 [(set_attr "type" "ssemov")
9162 (set_attr "prefix" "evex")
9163 (set_attr "mode" "TI")])
9164
d256b866 9165(define_insn "avx512f_<code>v8div16qi2_mask_store"
47490470
AI
9166 [(set (match_operand:V16QI 0 "memory_operand" "=m")
9167 (vec_concat:V16QI
9168 (vec_merge:V8QI
9169 (any_truncate:V8QI
9170 (match_operand:V8DI 1 "register_operand" "v"))
9171 (vec_select:V8QI
9172 (match_dup 0)
9173 (parallel [(const_int 0) (const_int 1)
9174 (const_int 2) (const_int 3)
9175 (const_int 4) (const_int 5)
9176 (const_int 6) (const_int 7)]))
be792bce 9177 (match_operand:QI 2 "register_operand" "Yk"))
47490470
AI
9178 (vec_select:V8QI
9179 (match_dup 0)
9180 (parallel [(const_int 8) (const_int 9)
9181 (const_int 10) (const_int 11)
9182 (const_int 12) (const_int 13)
9183 (const_int 14) (const_int 15)]))))]
9184 "TARGET_AVX512F"
9185 "vpmov<trunsuffix>qb\t{%1, %0%{%2%}|%0%{%2%}, %1}"
9186 [(set_attr "type" "ssemov")
9187 (set_attr "memory" "store")
9188 (set_attr "prefix" "evex")
9189 (set_attr "mode" "TI")])
9190
ef719a44
RH
9191;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
9192;;
9193;; Parallel integral arithmetic
9194;;
9195;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
9196
9197(define_expand "neg<mode>2"
82e86dc6 9198 [(set (match_operand:VI_AVX2 0 "register_operand")
267ff156 9199 (minus:VI_AVX2
ef719a44 9200 (match_dup 2)
82e86dc6 9201 (match_operand:VI_AVX2 1 "nonimmediate_operand")))]
ef719a44
RH
9202 "TARGET_SSE2"
9203 "operands[2] = force_reg (<MODE>mode, CONST0_RTX (<MODE>mode));")
9204
700e2919 9205(define_expand "<plusminus_insn><mode>3"
82e86dc6 9206 [(set (match_operand:VI_AVX2 0 "register_operand")
1707583b 9207 (plusminus:VI_AVX2
82e86dc6
UB
9208 (match_operand:VI_AVX2 1 "nonimmediate_operand")
9209 (match_operand:VI_AVX2 2 "nonimmediate_operand")))]
700e2919
AI
9210 "TARGET_SSE2"
9211 "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
9212
9213(define_expand "<plusminus_insn><mode>3_mask"
9214 [(set (match_operand:VI48_AVX512VL 0 "register_operand")
9215 (vec_merge:VI48_AVX512VL
9216 (plusminus:VI48_AVX512VL
9217 (match_operand:VI48_AVX512VL 1 "nonimmediate_operand")
9218 (match_operand:VI48_AVX512VL 2 "nonimmediate_operand"))
9219 (match_operand:VI48_AVX512VL 3 "vector_move_operand")
9220 (match_operand:<avx512fmaskmode> 4 "register_operand")))]
9221 "TARGET_AVX512F"
9222 "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
9223
9224(define_expand "<plusminus_insn><mode>3_mask"
9225 [(set (match_operand:VI12_AVX512VL 0 "register_operand")
9226 (vec_merge:VI12_AVX512VL
9227 (plusminus:VI12_AVX512VL
9228 (match_operand:VI12_AVX512VL 1 "nonimmediate_operand")
9229 (match_operand:VI12_AVX512VL 2 "nonimmediate_operand"))
9230 (match_operand:VI12_AVX512VL 3 "vector_move_operand")
9231 (match_operand:<avx512fmaskmode> 4 "register_operand")))]
9232 "TARGET_AVX512BW"
d1c3b587 9233 "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
ef719a44 9234
700e2919 9235(define_insn "*<plusminus_insn><mode>3"
3f97cb0b 9236 [(set (match_operand:VI_AVX2 0 "register_operand" "=x,v")
1707583b 9237 (plusminus:VI_AVX2
3f97cb0b
AI
9238 (match_operand:VI_AVX2 1 "nonimmediate_operand" "<comm>0,v")
9239 (match_operand:VI_AVX2 2 "nonimmediate_operand" "xm,vm")))]
700e2919
AI
9240 "TARGET_SSE2
9241 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
798dd0ba 9242 "@
cbb734aa 9243 p<plusminus_mnemonic><ssemodesuffix>\t{%2, %0|%0, %2}
47490470 9244 vp<plusminus_mnemonic><ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
798dd0ba
UB
9245 [(set_attr "isa" "noavx,avx")
9246 (set_attr "type" "sseiadd")
9247 (set_attr "prefix_data16" "1,*")
47490470 9248 (set_attr "prefix" "<mask_prefix3>")
977e83a3 9249 (set_attr "mode" "<sseinsnmode>")])
ef719a44 9250
700e2919
AI
9251(define_insn "*<plusminus_insn><mode>3_mask"
9252 [(set (match_operand:VI48_AVX512VL 0 "register_operand" "=v")
9253 (vec_merge:VI48_AVX512VL
9254 (plusminus:VI48_AVX512VL
9255 (match_operand:VI48_AVX512VL 1 "nonimmediate_operand" "<comm>v")
9256 (match_operand:VI48_AVX512VL 2 "nonimmediate_operand" "vm"))
9257 (match_operand:VI48_AVX512VL 3 "vector_move_operand" "0C")
9258 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")))]
9259 "TARGET_AVX512F
9260 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
9261 "vp<plusminus_mnemonic><ssemodesuffix>\t{%2, %1, %0%{%4%}%N3|%0%{%4%}%N3, %1, %2}"
9262 [(set_attr "type" "sseiadd")
9263 (set_attr "prefix" "evex")
9264 (set_attr "mode" "<sseinsnmode>")])
9265
9266(define_insn "*<plusminus_insn><mode>3_mask"
9267 [(set (match_operand:VI12_AVX512VL 0 "register_operand" "=v")
9268 (vec_merge:VI12_AVX512VL
9269 (plusminus:VI12_AVX512VL
9270 (match_operand:VI12_AVX512VL 1 "nonimmediate_operand" "<comm>v")
9271 (match_operand:VI12_AVX512VL 2 "nonimmediate_operand" "vm"))
9272 (match_operand:VI12_AVX512VL 3 "vector_move_operand" "0C")
9273 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")))]
9274 "TARGET_AVX512BW && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
9275 "vp<plusminus_mnemonic><ssemodesuffix>\t{%2, %1, %0%{%4%}%N3|%0%{%4%}%N3, %1, %2}"
9276 [(set_attr "type" "sseiadd")
9277 (set_attr "prefix" "evex")
9278 (set_attr "mode" "<sseinsnmode>")])
9279
c9b17fa5 9280(define_expand "<sse2_avx2>_<plusminus_insn><mode>3<mask_name>"
82e86dc6 9281 [(set (match_operand:VI12_AVX2 0 "register_operand")
977e83a3 9282 (sat_plusminus:VI12_AVX2
82e86dc6
UB
9283 (match_operand:VI12_AVX2 1 "nonimmediate_operand")
9284 (match_operand:VI12_AVX2 2 "nonimmediate_operand")))]
c9b17fa5 9285 "TARGET_SSE2 && <mask_mode512bit_condition> && <mask_avx512bw_condition>"
ffbaf337
UB
9286 "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
9287
c9b17fa5 9288(define_insn "*<sse2_avx2>_<plusminus_insn><mode>3<mask_name>"
3f97cb0b 9289 [(set (match_operand:VI12_AVX2 0 "register_operand" "=x,v")
977e83a3 9290 (sat_plusminus:VI12_AVX2
3f97cb0b
AI
9291 (match_operand:VI12_AVX2 1 "nonimmediate_operand" "<comm>0,v")
9292 (match_operand:VI12_AVX2 2 "nonimmediate_operand" "xm,vm")))]
c9b17fa5
AI
9293 "TARGET_SSE2 && <mask_mode512bit_condition> && <mask_avx512bw_condition>
9294 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
798dd0ba 9295 "@
cbb734aa 9296 p<plusminus_mnemonic><ssemodesuffix>\t{%2, %0|%0, %2}
c9b17fa5 9297 vp<plusminus_mnemonic><ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
798dd0ba
UB
9298 [(set_attr "isa" "noavx,avx")
9299 (set_attr "type" "sseiadd")
9300 (set_attr "prefix_data16" "1,*")
c9b17fa5 9301 (set_attr "prefix" "orig,maybe_evex")
ef719a44
RH
9302 (set_attr "mode" "TI")])
9303
2ac7a566 9304(define_expand "mul<mode>3<mask_name>"
f5db965f
IT
9305 [(set (match_operand:VI1_AVX512 0 "register_operand")
9306 (mult:VI1_AVX512 (match_operand:VI1_AVX512 1 "register_operand")
9307 (match_operand:VI1_AVX512 2 "register_operand")))]
2ac7a566 9308 "TARGET_SSE2 && <mask_mode512bit_condition> && <mask_avx512bw_condition>"
64e6863e 9309{
77a3dbf6 9310 ix86_expand_vecop_qihi (MULT, operands[0], operands[1], operands[2]);
64e6863e
RH
9311 DONE;
9312})
9313
ed3e611e 9314(define_expand "mul<mode>3<mask_name>"
82e86dc6
UB
9315 [(set (match_operand:VI2_AVX2 0 "register_operand")
9316 (mult:VI2_AVX2 (match_operand:VI2_AVX2 1 "nonimmediate_operand")
9317 (match_operand:VI2_AVX2 2 "nonimmediate_operand")))]
ed3e611e 9318 "TARGET_SSE2 && <mask_mode512bit_condition> && <mask_avx512bw_condition>"
977e83a3 9319 "ix86_fixup_binary_operands_no_copy (MULT, <MODE>mode, operands);")
ef719a44 9320
ed3e611e
AI
9321(define_insn "*mul<mode>3<mask_name>"
9322 [(set (match_operand:VI2_AVX2 0 "register_operand" "=x,v")
9323 (mult:VI2_AVX2 (match_operand:VI2_AVX2 1 "nonimmediate_operand" "%0,v")
9324 (match_operand:VI2_AVX2 2 "nonimmediate_operand" "xm,vm")))]
9325 "TARGET_SSE2
9326 && ix86_binary_operator_ok (MULT, <MODE>mode, operands)
9327 && <mask_mode512bit_condition> && <mask_avx512bw_condition>"
798dd0ba
UB
9328 "@
9329 pmullw\t{%2, %0|%0, %2}
ed3e611e 9330 vpmullw\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
798dd0ba
UB
9331 [(set_attr "isa" "noavx,avx")
9332 (set_attr "type" "sseimul")
9333 (set_attr "prefix_data16" "1,*")
9334 (set_attr "prefix" "orig,vex")
977e83a3 9335 (set_attr "mode" "<sseinsnmode>")])
ef719a44 9336
ed3e611e 9337(define_expand "<s>mul<mode>3_highpart<mask_name>"
82e86dc6 9338 [(set (match_operand:VI2_AVX2 0 "register_operand")
977e83a3
KY
9339 (truncate:VI2_AVX2
9340 (lshiftrt:<ssedoublemode>
9341 (mult:<ssedoublemode>
9342 (any_extend:<ssedoublemode>
82e86dc6 9343 (match_operand:VI2_AVX2 1 "nonimmediate_operand"))
977e83a3 9344 (any_extend:<ssedoublemode>
82e86dc6 9345 (match_operand:VI2_AVX2 2 "nonimmediate_operand")))
977e83a3 9346 (const_int 16))))]
ed3e611e
AI
9347 "TARGET_SSE2
9348 && <mask_mode512bit_condition> && <mask_avx512bw_condition>"
2ddfea8a 9349 "ix86_fixup_binary_operands_no_copy (MULT, <MODE>mode, operands);")
89d67cca 9350
ed3e611e
AI
9351(define_insn "*<s>mul<mode>3_highpart<mask_name>"
9352 [(set (match_operand:VI2_AVX2 0 "register_operand" "=x,v")
977e83a3
KY
9353 (truncate:VI2_AVX2
9354 (lshiftrt:<ssedoublemode>
9355 (mult:<ssedoublemode>
9356 (any_extend:<ssedoublemode>
ed3e611e 9357 (match_operand:VI2_AVX2 1 "nonimmediate_operand" "%0,v"))
977e83a3 9358 (any_extend:<ssedoublemode>
ed3e611e 9359 (match_operand:VI2_AVX2 2 "nonimmediate_operand" "xm,vm")))
ef719a44 9360 (const_int 16))))]
ed3e611e
AI
9361 "TARGET_SSE2
9362 && ix86_binary_operator_ok (MULT, <MODE>mode, operands)
9363 && <mask_mode512bit_condition> && <mask_avx512bw_condition>"
798dd0ba
UB
9364 "@
9365 pmulh<u>w\t{%2, %0|%0, %2}
ed3e611e 9366 vpmulh<u>w\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
798dd0ba
UB
9367 [(set_attr "isa" "noavx,avx")
9368 (set_attr "type" "sseimul")
9369 (set_attr "prefix_data16" "1,*")
9370 (set_attr "prefix" "orig,vex")
977e83a3
KY
9371 (set_attr "mode" "<sseinsnmode>")])
9372
47490470 9373(define_expand "vec_widen_umult_even_v16si<mask_name>"
c003c6d6
AI
9374 [(set (match_operand:V8DI 0 "register_operand")
9375 (mult:V8DI
9376 (zero_extend:V8DI
9377 (vec_select:V8SI
9378 (match_operand:V16SI 1 "nonimmediate_operand")
9379 (parallel [(const_int 0) (const_int 2)
9380 (const_int 4) (const_int 6)
9381 (const_int 8) (const_int 10)
9382 (const_int 12) (const_int 14)])))
9383 (zero_extend:V8DI
9384 (vec_select:V8SI
9385 (match_operand:V16SI 2 "nonimmediate_operand")
9386 (parallel [(const_int 0) (const_int 2)
9387 (const_int 4) (const_int 6)
9388 (const_int 8) (const_int 10)
9389 (const_int 12) (const_int 14)])))))]
9390 "TARGET_AVX512F"
9391 "ix86_fixup_binary_operands_no_copy (MULT, V16SImode, operands);")
9392
47490470 9393(define_insn "*vec_widen_umult_even_v16si<mask_name>"
c003c6d6
AI
9394 [(set (match_operand:V8DI 0 "register_operand" "=v")
9395 (mult:V8DI
9396 (zero_extend:V8DI
9397 (vec_select:V8SI
9398 (match_operand:V16SI 1 "nonimmediate_operand" "%v")
9399 (parallel [(const_int 0) (const_int 2)
9400 (const_int 4) (const_int 6)
9401 (const_int 8) (const_int 10)
9402 (const_int 12) (const_int 14)])))
9403 (zero_extend:V8DI
9404 (vec_select:V8SI
9405 (match_operand:V16SI 2 "nonimmediate_operand" "vm")
9406 (parallel [(const_int 0) (const_int 2)
9407 (const_int 4) (const_int 6)
9408 (const_int 8) (const_int 10)
9409 (const_int 12) (const_int 14)])))))]
9410 "TARGET_AVX512F && ix86_binary_operator_ok (MULT, V16SImode, operands)"
47490470 9411 "vpmuludq\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
c003c6d6
AI
9412 [(set_attr "isa" "avx512f")
9413 (set_attr "type" "sseimul")
9414 (set_attr "prefix_extra" "1")
9415 (set_attr "prefix" "evex")
9416 (set_attr "mode" "XI")])
9417
f1df7a1c 9418(define_expand "vec_widen_umult_even_v8si<mask_name>"
82e86dc6 9419 [(set (match_operand:V4DI 0 "register_operand")
977e83a3
KY
9420 (mult:V4DI
9421 (zero_extend:V4DI
9422 (vec_select:V4SI
82e86dc6 9423 (match_operand:V8SI 1 "nonimmediate_operand")
977e83a3
KY
9424 (parallel [(const_int 0) (const_int 2)
9425 (const_int 4) (const_int 6)])))
9426 (zero_extend:V4DI
9427 (vec_select:V4SI
82e86dc6 9428 (match_operand:V8SI 2 "nonimmediate_operand")
977e83a3
KY
9429 (parallel [(const_int 0) (const_int 2)
9430 (const_int 4) (const_int 6)])))))]
f1df7a1c 9431 "TARGET_AVX2 && <mask_avx512vl_condition>"
977e83a3
KY
9432 "ix86_fixup_binary_operands_no_copy (MULT, V8SImode, operands);")
9433
f1df7a1c
AI
9434(define_insn "*vec_widen_umult_even_v8si<mask_name>"
9435 [(set (match_operand:V4DI 0 "register_operand" "=v")
977e83a3
KY
9436 (mult:V4DI
9437 (zero_extend:V4DI
9438 (vec_select:V4SI
f1df7a1c 9439 (match_operand:V8SI 1 "nonimmediate_operand" "%v")
977e83a3
KY
9440 (parallel [(const_int 0) (const_int 2)
9441 (const_int 4) (const_int 6)])))
9442 (zero_extend:V4DI
9443 (vec_select:V4SI
f1df7a1c 9444 (match_operand:V8SI 2 "nonimmediate_operand" "vm")
977e83a3
KY
9445 (parallel [(const_int 0) (const_int 2)
9446 (const_int 4) (const_int 6)])))))]
f1df7a1c
AI
9447 "TARGET_AVX2 && <mask_avx512vl_condition>
9448 && ix86_binary_operator_ok (MULT, V8SImode, operands)"
9449 "vpmuludq\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
977e83a3 9450 [(set_attr "type" "sseimul")
f1df7a1c 9451 (set_attr "prefix" "maybe_evex")
977e83a3 9452 (set_attr "mode" "OI")])
ef719a44 9453
f1df7a1c 9454(define_expand "vec_widen_umult_even_v4si<mask_name>"
82e86dc6 9455 [(set (match_operand:V2DI 0 "register_operand")
ffbaf337
UB
9456 (mult:V2DI
9457 (zero_extend:V2DI
9458 (vec_select:V2SI
82e86dc6 9459 (match_operand:V4SI 1 "nonimmediate_operand")
ffbaf337
UB
9460 (parallel [(const_int 0) (const_int 2)])))
9461 (zero_extend:V2DI
9462 (vec_select:V2SI
82e86dc6 9463 (match_operand:V4SI 2 "nonimmediate_operand")
ffbaf337 9464 (parallel [(const_int 0) (const_int 2)])))))]
f1df7a1c 9465 "TARGET_SSE2 && <mask_avx512vl_condition>"
ffbaf337
UB
9466 "ix86_fixup_binary_operands_no_copy (MULT, V4SImode, operands);")
9467
f1df7a1c
AI
9468(define_insn "*vec_widen_umult_even_v4si<mask_name>"
9469 [(set (match_operand:V2DI 0 "register_operand" "=x,v")
ef719a44
RH
9470 (mult:V2DI
9471 (zero_extend:V2DI
9472 (vec_select:V2SI
f1df7a1c 9473 (match_operand:V4SI 1 "nonimmediate_operand" "%0,v")
ef719a44
RH
9474 (parallel [(const_int 0) (const_int 2)])))
9475 (zero_extend:V2DI
9476 (vec_select:V2SI
f1df7a1c 9477 (match_operand:V4SI 2 "nonimmediate_operand" "xm,vm")
ef719a44 9478 (parallel [(const_int 0) (const_int 2)])))))]
f1df7a1c
AI
9479 "TARGET_SSE2 && <mask_avx512vl_condition>
9480 && ix86_binary_operator_ok (MULT, V4SImode, operands)"
798dd0ba
UB
9481 "@
9482 pmuludq\t{%2, %0|%0, %2}
f1df7a1c 9483 vpmuludq\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
798dd0ba
UB
9484 [(set_attr "isa" "noavx,avx")
9485 (set_attr "type" "sseimul")
9486 (set_attr "prefix_data16" "1,*")
f1df7a1c 9487 (set_attr "prefix" "orig,maybe_evex")
ef719a44
RH
9488 (set_attr "mode" "TI")])
9489
47490470 9490(define_expand "vec_widen_smult_even_v16si<mask_name>"
c003c6d6
AI
9491 [(set (match_operand:V8DI 0 "register_operand")
9492 (mult:V8DI
9493 (sign_extend:V8DI
9494 (vec_select:V8SI
9495 (match_operand:V16SI 1 "nonimmediate_operand")
9496 (parallel [(const_int 0) (const_int 2)
9497 (const_int 4) (const_int 6)
9498 (const_int 8) (const_int 10)
9499 (const_int 12) (const_int 14)])))
9500 (sign_extend:V8DI
9501 (vec_select:V8SI
9502 (match_operand:V16SI 2 "nonimmediate_operand")
9503 (parallel [(const_int 0) (const_int 2)
9504 (const_int 4) (const_int 6)
9505 (const_int 8) (const_int 10)
9506 (const_int 12) (const_int 14)])))))]
9507 "TARGET_AVX512F"
9508 "ix86_fixup_binary_operands_no_copy (MULT, V16SImode, operands);")
9509
47490470 9510(define_insn "*vec_widen_smult_even_v16si<mask_name>"
c003c6d6
AI
9511 [(set (match_operand:V8DI 0 "register_operand" "=v")
9512 (mult:V8DI
9513 (sign_extend:V8DI
9514 (vec_select:V8SI
9515 (match_operand:V16SI 1 "nonimmediate_operand" "%v")
9516 (parallel [(const_int 0) (const_int 2)
9517 (const_int 4) (const_int 6)
9518 (const_int 8) (const_int 10)
9519 (const_int 12) (const_int 14)])))
9520 (sign_extend:V8DI
9521 (vec_select:V8SI
9522 (match_operand:V16SI 2 "nonimmediate_operand" "vm")
9523 (parallel [(const_int 0) (const_int 2)
9524 (const_int 4) (const_int 6)
9525 (const_int 8) (const_int 10)
9526 (const_int 12) (const_int 14)])))))]
9527 "TARGET_AVX512F && ix86_binary_operator_ok (MULT, V16SImode, operands)"
47490470 9528 "vpmuldq\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
c003c6d6
AI
9529 [(set_attr "isa" "avx512f")
9530 (set_attr "type" "sseimul")
9531 (set_attr "prefix_extra" "1")
9532 (set_attr "prefix" "evex")
9533 (set_attr "mode" "XI")])
9534
f1df7a1c 9535(define_expand "vec_widen_smult_even_v8si<mask_name>"
82e86dc6 9536 [(set (match_operand:V4DI 0 "register_operand")
977e83a3
KY
9537 (mult:V4DI
9538 (sign_extend:V4DI
9539 (vec_select:V4SI
82e86dc6 9540 (match_operand:V8SI 1 "nonimmediate_operand")
977e83a3
KY
9541 (parallel [(const_int 0) (const_int 2)
9542 (const_int 4) (const_int 6)])))
9543 (sign_extend:V4DI
9544 (vec_select:V4SI
82e86dc6 9545 (match_operand:V8SI 2 "nonimmediate_operand")
977e83a3
KY
9546 (parallel [(const_int 0) (const_int 2)
9547 (const_int 4) (const_int 6)])))))]
f1df7a1c 9548 "TARGET_AVX2 && <mask_avx512vl_condition>"
977e83a3
KY
9549 "ix86_fixup_binary_operands_no_copy (MULT, V8SImode, operands);")
9550
f1df7a1c
AI
9551(define_insn "*vec_widen_smult_even_v8si<mask_name>"
9552 [(set (match_operand:V4DI 0 "register_operand" "=v")
977e83a3
KY
9553 (mult:V4DI
9554 (sign_extend:V4DI
9555 (vec_select:V4SI
ee768d85 9556 (match_operand:V8SI 1 "nonimmediate_operand" "%v")
977e83a3
KY
9557 (parallel [(const_int 0) (const_int 2)
9558 (const_int 4) (const_int 6)])))
9559 (sign_extend:V4DI
9560 (vec_select:V4SI
f1df7a1c 9561 (match_operand:V8SI 2 "nonimmediate_operand" "vm")
977e83a3
KY
9562 (parallel [(const_int 0) (const_int 2)
9563 (const_int 4) (const_int 6)])))))]
f1df7a1c
AI
9564 "TARGET_AVX2
9565 && ix86_binary_operator_ok (MULT, V8SImode, operands)"
9566 "vpmuldq\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
82e33890 9567 [(set_attr "type" "sseimul")
977e83a3
KY
9568 (set_attr "prefix_extra" "1")
9569 (set_attr "prefix" "vex")
9570 (set_attr "mode" "OI")])
9571
f1df7a1c 9572(define_expand "sse4_1_mulv2siv2di3<mask_name>"
82e86dc6 9573 [(set (match_operand:V2DI 0 "register_operand")
ffbaf337
UB
9574 (mult:V2DI
9575 (sign_extend:V2DI
9576 (vec_select:V2SI
82e86dc6 9577 (match_operand:V4SI 1 "nonimmediate_operand")
ffbaf337
UB
9578 (parallel [(const_int 0) (const_int 2)])))
9579 (sign_extend:V2DI
9580 (vec_select:V2SI
82e86dc6 9581 (match_operand:V4SI 2 "nonimmediate_operand")
ffbaf337 9582 (parallel [(const_int 0) (const_int 2)])))))]
f1df7a1c 9583 "TARGET_SSE4_1 && <mask_avx512vl_condition>"
ffbaf337 9584 "ix86_fixup_binary_operands_no_copy (MULT, V4SImode, operands);")
95879c72 9585
f1df7a1c 9586(define_insn "*sse4_1_mulv2siv2di3<mask_name>"
45392c76 9587 [(set (match_operand:V2DI 0 "register_operand" "=Yr,*x,v")
95879c72
L
9588 (mult:V2DI
9589 (sign_extend:V2DI
9590 (vec_select:V2SI
45392c76 9591 (match_operand:V4SI 1 "nonimmediate_operand" "%0,0,v")
95879c72
L
9592 (parallel [(const_int 0) (const_int 2)])))
9593 (sign_extend:V2DI
9594 (vec_select:V2SI
45392c76 9595 (match_operand:V4SI 2 "nonimmediate_operand" "Yrm,*xm,vm")
95879c72 9596 (parallel [(const_int 0) (const_int 2)])))))]
f1df7a1c
AI
9597 "TARGET_SSE4_1 && <mask_avx512vl_condition>
9598 && ix86_binary_operator_ok (MULT, V4SImode, operands)"
798dd0ba 9599 "@
45392c76 9600 pmuldq\t{%2, %0|%0, %2}
798dd0ba 9601 pmuldq\t{%2, %0|%0, %2}
f1df7a1c 9602 vpmuldq\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
45392c76 9603 [(set_attr "isa" "noavx,noavx,avx")
798dd0ba 9604 (set_attr "type" "sseimul")
45392c76 9605 (set_attr "prefix_data16" "1,1,*")
95879c72 9606 (set_attr "prefix_extra" "1")
45392c76 9607 (set_attr "prefix" "orig,orig,vex")
95879c72
L
9608 (set_attr "mode" "TI")])
9609
ed3e611e
AI
9610(define_insn "avx512bw_pmaddwd512<mode><mask_name>"
9611 [(set (match_operand:<sseunpackmode> 0 "register_operand" "=v")
9612 (unspec:<sseunpackmode>
9613 [(match_operand:VI2_AVX2 1 "register_operand" "v")
9614 (match_operand:VI2_AVX2 2 "nonimmediate_operand" "vm")]
9615 UNSPEC_PMADDWD512))]
9616 "TARGET_AVX512BW && <mask_mode512bit_condition>"
9617 "vpmaddwd\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}";
9618 [(set_attr "type" "sseiadd")
9619 (set_attr "prefix" "evex")
9620 (set_attr "mode" "XI")])
9621
977e83a3 9622(define_expand "avx2_pmaddwd"
82e86dc6 9623 [(set (match_operand:V8SI 0 "register_operand")
977e83a3
KY
9624 (plus:V8SI
9625 (mult:V8SI
9626 (sign_extend:V8SI
9627 (vec_select:V8HI
82e86dc6 9628 (match_operand:V16HI 1 "nonimmediate_operand")
608dccd7
UB
9629 (parallel [(const_int 0) (const_int 2)
9630 (const_int 4) (const_int 6)
9631 (const_int 8) (const_int 10)
9632 (const_int 12) (const_int 14)])))
977e83a3
KY
9633 (sign_extend:V8SI
9634 (vec_select:V8HI
82e86dc6 9635 (match_operand:V16HI 2 "nonimmediate_operand")
608dccd7
UB
9636 (parallel [(const_int 0) (const_int 2)
9637 (const_int 4) (const_int 6)
9638 (const_int 8) (const_int 10)
9639 (const_int 12) (const_int 14)]))))
977e83a3
KY
9640 (mult:V8SI
9641 (sign_extend:V8SI
9642 (vec_select:V8HI (match_dup 1)
608dccd7
UB
9643 (parallel [(const_int 1) (const_int 3)
9644 (const_int 5) (const_int 7)
9645 (const_int 9) (const_int 11)
9646 (const_int 13) (const_int 15)])))
977e83a3
KY
9647 (sign_extend:V8SI
9648 (vec_select:V8HI (match_dup 2)
608dccd7
UB
9649 (parallel [(const_int 1) (const_int 3)
9650 (const_int 5) (const_int 7)
9651 (const_int 9) (const_int 11)
9652 (const_int 13) (const_int 15)]))))))]
977e83a3
KY
9653 "TARGET_AVX2"
9654 "ix86_fixup_binary_operands_no_copy (MULT, V16HImode, operands);")
9655
977e83a3
KY
9656(define_insn "*avx2_pmaddwd"
9657 [(set (match_operand:V8SI 0 "register_operand" "=x")
9658 (plus:V8SI
9659 (mult:V8SI
9660 (sign_extend:V8SI
9661 (vec_select:V8HI
9662 (match_operand:V16HI 1 "nonimmediate_operand" "%x")
608dccd7
UB
9663 (parallel [(const_int 0) (const_int 2)
9664 (const_int 4) (const_int 6)
9665 (const_int 8) (const_int 10)
9666 (const_int 12) (const_int 14)])))
977e83a3
KY
9667 (sign_extend:V8SI
9668 (vec_select:V8HI
9669 (match_operand:V16HI 2 "nonimmediate_operand" "xm")
608dccd7
UB
9670 (parallel [(const_int 0) (const_int 2)
9671 (const_int 4) (const_int 6)
9672 (const_int 8) (const_int 10)
9673 (const_int 12) (const_int 14)]))))
977e83a3
KY
9674 (mult:V8SI
9675 (sign_extend:V8SI
9676 (vec_select:V8HI (match_dup 1)
608dccd7
UB
9677 (parallel [(const_int 1) (const_int 3)
9678 (const_int 5) (const_int 7)
9679 (const_int 9) (const_int 11)
9680 (const_int 13) (const_int 15)])))
977e83a3
KY
9681 (sign_extend:V8SI
9682 (vec_select:V8HI (match_dup 2)
608dccd7
UB
9683 (parallel [(const_int 1) (const_int 3)
9684 (const_int 5) (const_int 7)
9685 (const_int 9) (const_int 11)
9686 (const_int 13) (const_int 15)]))))))]
977e83a3
KY
9687 "TARGET_AVX2 && ix86_binary_operator_ok (MULT, V16HImode, operands)"
9688 "vpmaddwd\t{%2, %1, %0|%0, %1, %2}"
9689 [(set_attr "type" "sseiadd")
9690 (set_attr "prefix" "vex")
9691 (set_attr "mode" "OI")])
9692
608dccd7
UB
9693(define_expand "sse2_pmaddwd"
9694 [(set (match_operand:V4SI 0 "register_operand")
9695 (plus:V4SI
9696 (mult:V4SI
9697 (sign_extend:V4SI
9698 (vec_select:V4HI
9699 (match_operand:V8HI 1 "nonimmediate_operand")
9700 (parallel [(const_int 0) (const_int 2)
9701 (const_int 4) (const_int 6)])))
9702 (sign_extend:V4SI
9703 (vec_select:V4HI
9704 (match_operand:V8HI 2 "nonimmediate_operand")
9705 (parallel [(const_int 0) (const_int 2)
9706 (const_int 4) (const_int 6)]))))
9707 (mult:V4SI
9708 (sign_extend:V4SI
9709 (vec_select:V4HI (match_dup 1)
9710 (parallel [(const_int 1) (const_int 3)
9711 (const_int 5) (const_int 7)])))
9712 (sign_extend:V4SI
9713 (vec_select:V4HI (match_dup 2)
9714 (parallel [(const_int 1) (const_int 3)
9715 (const_int 5) (const_int 7)]))))))]
9716 "TARGET_SSE2"
9717 "ix86_fixup_binary_operands_no_copy (MULT, V8HImode, operands);")
9718
ffbaf337 9719(define_insn "*sse2_pmaddwd"
798dd0ba 9720 [(set (match_operand:V4SI 0 "register_operand" "=x,x")
ef719a44
RH
9721 (plus:V4SI
9722 (mult:V4SI
9723 (sign_extend:V4SI
9724 (vec_select:V4HI
798dd0ba 9725 (match_operand:V8HI 1 "nonimmediate_operand" "%0,x")
608dccd7
UB
9726 (parallel [(const_int 0) (const_int 2)
9727 (const_int 4) (const_int 6)])))
ef719a44
RH
9728 (sign_extend:V4SI
9729 (vec_select:V4HI
798dd0ba 9730 (match_operand:V8HI 2 "nonimmediate_operand" "xm,xm")
608dccd7
UB
9731 (parallel [(const_int 0) (const_int 2)
9732 (const_int 4) (const_int 6)]))))
ef719a44
RH
9733 (mult:V4SI
9734 (sign_extend:V4SI
9735 (vec_select:V4HI (match_dup 1)
608dccd7
UB
9736 (parallel [(const_int 1) (const_int 3)
9737 (const_int 5) (const_int 7)])))
ef719a44
RH
9738 (sign_extend:V4SI
9739 (vec_select:V4HI (match_dup 2)
608dccd7
UB
9740 (parallel [(const_int 1) (const_int 3)
9741 (const_int 5) (const_int 7)]))))))]
27120a59 9742 "TARGET_SSE2 && ix86_binary_operator_ok (MULT, V8HImode, operands)"
798dd0ba
UB
9743 "@
9744 pmaddwd\t{%2, %0|%0, %2}
9745 vpmaddwd\t{%2, %1, %0|%0, %1, %2}"
9746 [(set_attr "isa" "noavx,avx")
9747 (set_attr "type" "sseiadd")
b6837b94 9748 (set_attr "atom_unit" "simul")
798dd0ba
UB
9749 (set_attr "prefix_data16" "1,*")
9750 (set_attr "prefix" "orig,vex")
ef719a44
RH
9751 (set_attr "mode" "TI")])
9752
f1df7a1c
AI
9753(define_insn "avx512dq_mul<mode>3<mask_name>"
9754 [(set (match_operand:VI8 0 "register_operand" "=v")
9755 (mult:VI8
9756 (match_operand:VI8 1 "register_operand" "v")
9757 (match_operand:VI8 2 "nonimmediate_operand" "vm")))]
9758 "TARGET_AVX512DQ && <mask_mode512bit_condition>"
9759 "vpmullq\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
9760 [(set_attr "type" "sseimul")
9761 (set_attr "prefix" "evex")
9762 (set_attr "mode" "<sseinsnmode>")])
9763
47490470 9764(define_expand "mul<mode>3<mask_name>"
f5f41d88
AI
9765 [(set (match_operand:VI4_AVX512F 0 "register_operand")
9766 (mult:VI4_AVX512F
9767 (match_operand:VI4_AVX512F 1 "general_vector_operand")
9768 (match_operand:VI4_AVX512F 2 "general_vector_operand")))]
47490470 9769 "TARGET_SSE2 && <mask_mode512bit_condition>"
2b5bf0e2 9770{
ac357108 9771 if (TARGET_SSE4_1)
73e9d637 9772 {
baee1763
JJ
9773 if (!nonimmediate_operand (operands[1], <MODE>mode))
9774 operands[1] = force_reg (<MODE>mode, operands[1]);
9775 if (!nonimmediate_operand (operands[2], <MODE>mode))
9776 operands[2] = force_reg (<MODE>mode, operands[2]);
73e9d637
RH
9777 ix86_fixup_binary_operands_no_copy (MULT, <MODE>mode, operands);
9778 }
9779 else
9780 {
9781 ix86_expand_sse2_mulv4si3 (operands[0], operands[1], operands[2]);
9782 DONE;
9783 }
2b5bf0e2
RH
9784})
9785
47490470 9786(define_insn "*<sse4_1_avx2>_mul<mode>3<mask_name>"
45392c76 9787 [(set (match_operand:VI4_AVX512F 0 "register_operand" "=Yr,*x,v")
f5f41d88 9788 (mult:VI4_AVX512F
45392c76
IE
9789 (match_operand:VI4_AVX512F 1 "nonimmediate_operand" "%0,0,v")
9790 (match_operand:VI4_AVX512F 2 "nonimmediate_operand" "Yrm,*xm,vm")))]
47490470 9791 "TARGET_SSE4_1 && ix86_binary_operator_ok (MULT, <MODE>mode, operands) && <mask_mode512bit_condition>"
798dd0ba 9792 "@
45392c76 9793 pmulld\t{%2, %0|%0, %2}
798dd0ba 9794 pmulld\t{%2, %0|%0, %2}
47490470 9795 vpmulld\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
45392c76 9796 [(set_attr "isa" "noavx,noavx,avx")
798dd0ba 9797 (set_attr "type" "sseimul")
9a5cee02 9798 (set_attr "prefix_extra" "1")
45392c76
IE
9799 (set_attr "prefix" "<mask_prefix4>")
9800 (set_attr "btver2_decode" "vector,vector,vector")
977e83a3 9801 (set_attr "mode" "<sseinsnmode>")])
9a5cee02 9802
298301d9 9803(define_expand "mul<mode>3"
f5f41d88
AI
9804 [(set (match_operand:VI8_AVX2_AVX512F 0 "register_operand")
9805 (mult:VI8_AVX2_AVX512F
9806 (match_operand:VI8_AVX2_AVX512F 1 "register_operand")
9807 (match_operand:VI8_AVX2_AVX512F 2 "register_operand")))]
298301d9 9808 "TARGET_SSE2"
64e6863e 9809{
298301d9 9810 ix86_expand_sse2_mulvxdi3 (operands[0], operands[1], operands[2]);
64e6863e
RH
9811 DONE;
9812})
9813
1c4153dd 9814(define_expand "vec_widen_<s>mult_hi_<mode>"
82e86dc6 9815 [(match_operand:<sseunpackmode> 0 "register_operand")
1c4153dd 9816 (any_extend:<sseunpackmode>
ac357108
RH
9817 (match_operand:VI124_AVX2 1 "register_operand"))
9818 (match_operand:VI124_AVX2 2 "register_operand")]
87b78516 9819 "TARGET_SSE2"
0e9dac9e 9820{
ac357108
RH
9821 ix86_expand_mul_widen_hilo (operands[0], operands[1], operands[2],
9822 <u_bool>, true);
0e9dac9e
UB
9823 DONE;
9824})
9825
1c4153dd 9826(define_expand "vec_widen_<s>mult_lo_<mode>"
82e86dc6 9827 [(match_operand:<sseunpackmode> 0 "register_operand")
1c4153dd 9828 (any_extend:<sseunpackmode>
ac357108
RH
9829 (match_operand:VI124_AVX2 1 "register_operand"))
9830 (match_operand:VI124_AVX2 2 "register_operand")]
87b78516 9831 "TARGET_SSE2"
89d67cca 9832{
ac357108
RH
9833 ix86_expand_mul_widen_hilo (operands[0], operands[1], operands[2],
9834 <u_bool>, false);
89d67cca
DN
9835 DONE;
9836})
9837
87b78516
RH
9838;; Most widen_<s>mult_even_<mode> can be handled directly from other
9839;; named patterns, but signed V4SI needs special help for plain SSE2.
9840(define_expand "vec_widen_smult_even_v4si"
9841 [(match_operand:V2DI 0 "register_operand")
baee1763
JJ
9842 (match_operand:V4SI 1 "nonimmediate_operand")
9843 (match_operand:V4SI 2 "nonimmediate_operand")]
87b78516
RH
9844 "TARGET_SSE2"
9845{
9846 ix86_expand_mul_widen_evenodd (operands[0], operands[1], operands[2],
9847 false, false);
9848 DONE;
9849})
9850
93703e79
RH
9851(define_expand "vec_widen_<s>mult_odd_<mode>"
9852 [(match_operand:<sseunpackmode> 0 "register_operand")
9853 (any_extend:<sseunpackmode>
f5f41d88
AI
9854 (match_operand:VI4_AVX512F 1 "general_vector_operand"))
9855 (match_operand:VI4_AVX512F 2 "general_vector_operand")]
87b78516 9856 "TARGET_SSE2"
93703e79
RH
9857{
9858 ix86_expand_mul_widen_evenodd (operands[0], operands[1], operands[2],
9859 <u_bool>, true);
9860 DONE;
9861})
9862
ed3e611e
AI
9863(define_mode_attr SDOT_PMADD_SUF
9864 [(V32HI "512v32hi") (V16HI "") (V8HI "")])
9865
1c4153dd 9866(define_expand "sdot_prod<mode>"
82e86dc6
UB
9867 [(match_operand:<sseunpackmode> 0 "register_operand")
9868 (match_operand:VI2_AVX2 1 "register_operand")
9869 (match_operand:VI2_AVX2 2 "register_operand")
9870 (match_operand:<sseunpackmode> 3 "register_operand")]
20f06221
DN
9871 "TARGET_SSE2"
9872{
1c4153dd 9873 rtx t = gen_reg_rtx (<sseunpackmode>mode);
ed3e611e 9874 emit_insn (gen_<sse2_avx2>_pmaddwd<SDOT_PMADD_SUF> (t, operands[1], operands[2]));
f7df4a84 9875 emit_insn (gen_rtx_SET (operands[0],
1c4153dd
JJ
9876 gen_rtx_PLUS (<sseunpackmode>mode,
9877 operands[3], t)));
20f06221
DN
9878 DONE;
9879})
9880
a2051b26
RH
9881;; Normally we use widen_mul_even/odd, but combine can't quite get it all
9882;; back together when madd is available.
9883(define_expand "sdot_prodv4si"
82e86dc6 9884 [(match_operand:V2DI 0 "register_operand")
a2051b26 9885 (match_operand:V4SI 1 "register_operand")
82e86dc6
UB
9886 (match_operand:V4SI 2 "register_operand")
9887 (match_operand:V2DI 3 "register_operand")]
a2051b26 9888 "TARGET_XOP"
1c4153dd 9889{
a2051b26
RH
9890 rtx t = gen_reg_rtx (V2DImode);
9891 emit_insn (gen_xop_pmacsdqh (t, operands[1], operands[2], operands[3]));
9892 emit_insn (gen_xop_pmacsdql (operands[0], operands[1], operands[2], t));
1c4153dd
JJ
9893 DONE;
9894})
9895
79d652a5
CH
9896(define_expand "usadv16qi"
9897 [(match_operand:V4SI 0 "register_operand")
9898 (match_operand:V16QI 1 "register_operand")
9899 (match_operand:V16QI 2 "nonimmediate_operand")
9900 (match_operand:V4SI 3 "nonimmediate_operand")]
9901 "TARGET_SSE2"
9902{
9903 rtx t1 = gen_reg_rtx (V2DImode);
9904 rtx t2 = gen_reg_rtx (V4SImode);
9905 emit_insn (gen_sse2_psadbw (t1, operands[1], operands[2]));
9906 convert_move (t2, t1, 0);
9907 emit_insn (gen_addv4si3 (operands[0], t2, operands[3]));
9908 DONE;
9909})
9910
9911(define_expand "usadv32qi"
9912 [(match_operand:V8SI 0 "register_operand")
9913 (match_operand:V32QI 1 "register_operand")
9914 (match_operand:V32QI 2 "nonimmediate_operand")
9915 (match_operand:V8SI 3 "nonimmediate_operand")]
9916 "TARGET_AVX2"
9917{
9918 rtx t1 = gen_reg_rtx (V4DImode);
9919 rtx t2 = gen_reg_rtx (V8SImode);
9920 emit_insn (gen_avx2_psadbw (t1, operands[1], operands[2]));
9921 convert_move (t2, t1, 0);
9922 emit_insn (gen_addv8si3 (operands[0], t2, operands[3]));
9923 DONE;
9924})
9925
ef719a44 9926(define_insn "ashr<mode>3"
977e83a3
KY
9927 [(set (match_operand:VI24_AVX2 0 "register_operand" "=x,x")
9928 (ashiftrt:VI24_AVX2
9929 (match_operand:VI24_AVX2 1 "register_operand" "0,x")
798dd0ba 9930 (match_operand:SI 2 "nonmemory_operand" "xN,xN")))]
ef719a44 9931 "TARGET_SSE2"
798dd0ba 9932 "@
cbb734aa
UB
9933 psra<ssemodesuffix>\t{%2, %0|%0, %2}
9934 vpsra<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
798dd0ba
UB
9935 [(set_attr "isa" "noavx,avx")
9936 (set_attr "type" "sseishft")
725fd454 9937 (set (attr "length_immediate")
82e86dc6 9938 (if_then_else (match_operand 2 "const_int_operand")
725fd454
JJ
9939 (const_string "1")
9940 (const_string "0")))
798dd0ba
UB
9941 (set_attr "prefix_data16" "1,*")
9942 (set_attr "prefix" "orig,vex")
977e83a3
KY
9943 (set_attr "mode" "<sseinsnmode>")])
9944
28e9a294
AI
9945(define_insn "<mask_codefor>ashr<mode>3<mask_name>"
9946 [(set (match_operand:VI24_AVX512BW_1 0 "register_operand" "=v,v")
9947 (ashiftrt:VI24_AVX512BW_1
9948 (match_operand:VI24_AVX512BW_1 1 "nonimmediate_operand" "v,vm")
9949 (match_operand:SI 2 "nonmemory_operand" "v,N")))]
9950 "TARGET_AVX512VL"
9951 "vpsra<ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
9952 [(set_attr "type" "sseishft")
9953 (set (attr "length_immediate")
9954 (if_then_else (match_operand 2 "const_int_operand")
9955 (const_string "1")
9956 (const_string "0")))
9957 (set_attr "mode" "<sseinsnmode>")])
9958
9959(define_insn "<mask_codefor>ashrv2di3<mask_name>"
9960 [(set (match_operand:V2DI 0 "register_operand" "=v,v")
9961 (ashiftrt:V2DI
9962 (match_operand:V2DI 1 "nonimmediate_operand" "v,vm")
9963 (match_operand:DI 2 "nonmemory_operand" "v,N")))]
9964 "TARGET_AVX512VL"
9965 "vpsraq\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
9966 [(set_attr "type" "sseishft")
9967 (set (attr "length_immediate")
9968 (if_then_else (match_operand 2 "const_int_operand")
9969 (const_string "1")
9970 (const_string "0")))
9971 (set_attr "mode" "TI")])
9972
47490470 9973(define_insn "ashr<mode>3<mask_name>"
28e9a294
AI
9974 [(set (match_operand:VI248_AVX512BW_AVX512VL 0 "register_operand" "=v,v")
9975 (ashiftrt:VI248_AVX512BW_AVX512VL
9976 (match_operand:VI248_AVX512BW_AVX512VL 1 "nonimmediate_operand" "v,vm")
47490470 9977 (match_operand:SI 2 "nonmemory_operand" "v,N")))]
28e9a294 9978 "TARGET_AVX512F"
47490470
AI
9979 "vpsra<ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
9980 [(set_attr "type" "sseishft")
9981 (set (attr "length_immediate")
9982 (if_then_else (match_operand 2 "const_int_operand")
9983 (const_string "1")
9984 (const_string "0")))
9985 (set_attr "mode" "<sseinsnmode>")])
9986
3616dc70
AI
9987(define_insn "<shift_insn><mode>3<mask_name>"
9988 [(set (match_operand:VI2_AVX2_AVX512BW 0 "register_operand" "=x,v")
9989 (any_lshift:VI2_AVX2_AVX512BW
9990 (match_operand:VI2_AVX2_AVX512BW 1 "register_operand" "0,v")
9991 (match_operand:SI 2 "nonmemory_operand" "xN,vN")))]
9992 "TARGET_SSE2 && <mask_mode512bit_condition> && <mask_avx512bw_condition>"
9993 "@
9994 p<vshift><ssemodesuffix>\t{%2, %0|%0, %2}
9995 vp<vshift><ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
9996 [(set_attr "isa" "noavx,avx")
9997 (set_attr "type" "sseishft")
9998 (set (attr "length_immediate")
9999 (if_then_else (match_operand 2 "const_int_operand")
10000 (const_string "1")
10001 (const_string "0")))
10002 (set_attr "prefix_data16" "1,*")
10003 (set_attr "prefix" "orig,vex")
10004 (set_attr "mode" "<sseinsnmode>")])
10005
10006(define_insn "<shift_insn><mode>3<mask_name>"
10007 [(set (match_operand:VI48_AVX2 0 "register_operand" "=x,v")
10008 (any_lshift:VI48_AVX2
10009 (match_operand:VI48_AVX2 1 "register_operand" "0,v")
10010 (match_operand:SI 2 "nonmemory_operand" "xN,vN")))]
10011 "TARGET_SSE2 && <mask_mode512bit_condition>"
798dd0ba 10012 "@
1162730f 10013 p<vshift><ssemodesuffix>\t{%2, %0|%0, %2}
3616dc70 10014 vp<vshift><ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
798dd0ba
UB
10015 [(set_attr "isa" "noavx,avx")
10016 (set_attr "type" "sseishft")
725fd454 10017 (set (attr "length_immediate")
82e86dc6 10018 (if_then_else (match_operand 2 "const_int_operand")
725fd454
JJ
10019 (const_string "1")
10020 (const_string "0")))
798dd0ba
UB
10021 (set_attr "prefix_data16" "1,*")
10022 (set_attr "prefix" "orig,vex")
8c353205 10023 (set_attr "mode" "<sseinsnmode>")])
ef719a44 10024
47490470 10025(define_insn "<shift_insn><mode>3<mask_name>"
0fe65b75
AI
10026 [(set (match_operand:VI48_512 0 "register_operand" "=v,v")
10027 (any_lshift:VI48_512
cf73ee60 10028 (match_operand:VI48_512 1 "nonimmediate_operand" "v,m")
0fe65b75 10029 (match_operand:SI 2 "nonmemory_operand" "vN,N")))]
47490470
AI
10030 "TARGET_AVX512F && <mask_mode512bit_condition>"
10031 "vp<vshift><ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
0fe65b75
AI
10032 [(set_attr "isa" "avx512f")
10033 (set_attr "type" "sseishft")
10034 (set (attr "length_immediate")
10035 (if_then_else (match_operand 2 "const_int_operand")
10036 (const_string "1")
10037 (const_string "0")))
10038 (set_attr "prefix" "evex")
10039 (set_attr "mode" "<sseinsnmode>")])
10040
47490470 10041
69a2964c 10042(define_expand "vec_shl_<mode>"
d8c84975 10043 [(set (match_dup 3)
977e83a3 10044 (ashift:V1TI
82e86dc6 10045 (match_operand:VI_128 1 "register_operand")
d8c84975
JJ
10046 (match_operand:SI 2 "const_0_to_255_mul_8_operand")))
10047 (set (match_operand:VI_128 0 "register_operand") (match_dup 4))]
69a2964c
RH
10048 "TARGET_SSE2"
10049{
fe6ae2da 10050 operands[1] = gen_lowpart (V1TImode, operands[1]);
d8c84975
JJ
10051 operands[3] = gen_reg_rtx (V1TImode);
10052 operands[4] = gen_lowpart (<MODE>mode, operands[3]);
69a2964c
RH
10053})
10054
977e83a3 10055(define_insn "<sse2_avx2>_ashl<mode>3"
98ee4d9b 10056 [(set (match_operand:VIMAX_AVX2 0 "register_operand" "=x,v")
977e83a3 10057 (ashift:VIMAX_AVX2
98ee4d9b 10058 (match_operand:VIMAX_AVX2 1 "register_operand" "0,v")
798dd0ba
UB
10059 (match_operand:SI 2 "const_0_to_255_mul_8_operand" "n,n")))]
10060 "TARGET_SSE2"
10061{
10062 operands[2] = GEN_INT (INTVAL (operands[2]) / 8);
10063
10064 switch (which_alternative)
10065 {
10066 case 0:
10067 return "pslldq\t{%2, %0|%0, %2}";
10068 case 1:
10069 return "vpslldq\t{%2, %1, %0|%0, %1, %2}";
10070 default:
10071 gcc_unreachable ();
10072 }
10073}
10074 [(set_attr "isa" "noavx,avx")
10075 (set_attr "type" "sseishft")
10076 (set_attr "length_immediate" "1")
10077 (set_attr "prefix_data16" "1,*")
10078 (set_attr "prefix" "orig,vex")
977e83a3 10079 (set_attr "mode" "<sseinsnmode>")])
798dd0ba 10080
69a2964c 10081(define_expand "vec_shr_<mode>"
d8c84975 10082 [(set (match_dup 3)
977e83a3 10083 (lshiftrt:V1TI
82e86dc6 10084 (match_operand:VI_128 1 "register_operand")
d8c84975
JJ
10085 (match_operand:SI 2 "const_0_to_255_mul_8_operand")))
10086 (set (match_operand:VI_128 0 "register_operand") (match_dup 4))]
69a2964c
RH
10087 "TARGET_SSE2"
10088{
fe6ae2da 10089 operands[1] = gen_lowpart (V1TImode, operands[1]);
d8c84975
JJ
10090 operands[3] = gen_reg_rtx (V1TImode);
10091 operands[4] = gen_lowpart (<MODE>mode, operands[3]);
69a2964c
RH
10092})
10093
8a0436cb 10094(define_insn "<sse2_avx2>_lshr<mode>3"
98ee4d9b 10095 [(set (match_operand:VIMAX_AVX2 0 "register_operand" "=x,v")
8a0436cb 10096 (lshiftrt:VIMAX_AVX2
98ee4d9b 10097 (match_operand:VIMAX_AVX2 1 "register_operand" "0,v")
798dd0ba 10098 (match_operand:SI 2 "const_0_to_255_mul_8_operand" "n,n")))]
ef719a44 10099 "TARGET_SSE2"
798dd0ba
UB
10100{
10101 operands[2] = GEN_INT (INTVAL (operands[2]) / 8);
ef719a44 10102
798dd0ba
UB
10103 switch (which_alternative)
10104 {
10105 case 0:
10106 return "psrldq\t{%2, %0|%0, %2}";
10107 case 1:
10108 return "vpsrldq\t{%2, %1, %0|%0, %1, %2}";
10109 default:
10110 gcc_unreachable ();
10111 }
10112}
10113 [(set_attr "isa" "noavx,avx")
10114 (set_attr "type" "sseishft")
10115 (set_attr "length_immediate" "1")
10116 (set_attr "atom_unit" "sishuf")
10117 (set_attr "prefix_data16" "1,*")
10118 (set_attr "prefix" "orig,vex")
8a0436cb
JJ
10119 (set_attr "mode" "<sseinsnmode>")])
10120
6ead0238
AI
10121(define_insn "<avx512>_<rotate>v<mode><mask_name>"
10122 [(set (match_operand:VI48_AVX512VL 0 "register_operand" "=v")
10123 (any_rotate:VI48_AVX512VL
10124 (match_operand:VI48_AVX512VL 1 "register_operand" "v")
10125 (match_operand:VI48_AVX512VL 2 "nonimmediate_operand" "vm")))]
0fe65b75 10126 "TARGET_AVX512F"
47490470 10127 "vp<rotate>v<ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
0fe65b75
AI
10128 [(set_attr "prefix" "evex")
10129 (set_attr "mode" "<sseinsnmode>")])
10130
6ead0238
AI
10131(define_insn "<avx512>_<rotate><mode><mask_name>"
10132 [(set (match_operand:VI48_AVX512VL 0 "register_operand" "=v")
10133 (any_rotate:VI48_AVX512VL
10134 (match_operand:VI48_AVX512VL 1 "nonimmediate_operand" "vm")
0fe65b75
AI
10135 (match_operand:SI 2 "const_0_to_255_operand")))]
10136 "TARGET_AVX512F"
47490470 10137 "vp<rotate><ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
0fe65b75
AI
10138 [(set_attr "prefix" "evex")
10139 (set_attr "mode" "<sseinsnmode>")])
52325f2c 10140
575d952c
AI
10141(define_expand "<code><mode>3"
10142 [(set (match_operand:VI124_256_AVX512F_AVX512BW 0 "register_operand")
10143 (maxmin:VI124_256_AVX512F_AVX512BW
10144 (match_operand:VI124_256_AVX512F_AVX512BW 1 "nonimmediate_operand")
10145 (match_operand:VI124_256_AVX512F_AVX512BW 2 "nonimmediate_operand")))]
10146 "TARGET_AVX2"
8a0436cb
JJ
10147 "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
10148
575d952c
AI
10149(define_insn "*avx2_<code><mode>3"
10150 [(set (match_operand:VI124_256 0 "register_operand" "=v")
10151 (maxmin:VI124_256
10152 (match_operand:VI124_256 1 "nonimmediate_operand" "%v")
10153 (match_operand:VI124_256 2 "nonimmediate_operand" "vm")))]
10154 "TARGET_AVX2 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
10155 "vp<maxmin_int><ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
8a0436cb
JJ
10156 [(set_attr "type" "sseiadd")
10157 (set_attr "prefix_extra" "1")
575d952c 10158 (set_attr "prefix" "vex")
8a0436cb 10159 (set_attr "mode" "OI")])
ef719a44 10160
575d952c
AI
10161(define_expand "<code><mode>3_mask"
10162 [(set (match_operand:VI48_AVX512VL 0 "register_operand")
10163 (vec_merge:VI48_AVX512VL
10164 (maxmin:VI48_AVX512VL
10165 (match_operand:VI48_AVX512VL 1 "nonimmediate_operand")
10166 (match_operand:VI48_AVX512VL 2 "nonimmediate_operand"))
10167 (match_operand:VI48_AVX512VL 3 "vector_move_operand")
10168 (match_operand:<avx512fmaskmode> 4 "register_operand")))]
10169 "TARGET_AVX512F"
10170 "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
10171
10172(define_insn "*avx512bw_<code><mode>3<mask_name>"
10173 [(set (match_operand:VI48_AVX512VL 0 "register_operand" "=v")
10174 (maxmin:VI48_AVX512VL
10175 (match_operand:VI48_AVX512VL 1 "nonimmediate_operand" "%v")
10176 (match_operand:VI48_AVX512VL 2 "nonimmediate_operand" "vm")))]
10177 "TARGET_AVX512F && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
10178 "vp<maxmin_int><ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
10179 [(set_attr "type" "sseiadd")
10180 (set_attr "prefix_extra" "1")
10181 (set_attr "prefix" "maybe_evex")
10182 (set_attr "mode" "<sseinsnmode>")])
10183
e7b533a2 10184(define_insn "<mask_codefor><code><mode>3<mask_name>"
575d952c
AI
10185 [(set (match_operand:VI12_AVX512VL 0 "register_operand" "=v")
10186 (maxmin:VI12_AVX512VL
10187 (match_operand:VI12_AVX512VL 1 "register_operand" "v")
10188 (match_operand:VI12_AVX512VL 2 "nonimmediate_operand" "vm")))]
10189 "TARGET_AVX512BW"
e7b533a2
AI
10190 "vp<maxmin_int><ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
10191 [(set_attr "type" "sseiadd")
10192 (set_attr "prefix" "evex")
10193 (set_attr "mode" "<sseinsnmode>")])
10194
32469ccc 10195(define_expand "<code><mode>3"
575d952c
AI
10196 [(set (match_operand:VI8_AVX2_AVX512BW 0 "register_operand")
10197 (maxmin:VI8_AVX2_AVX512BW
10198 (match_operand:VI8_AVX2_AVX512BW 1 "register_operand")
10199 (match_operand:VI8_AVX2_AVX512BW 2 "register_operand")))]
52325f2c
UB
10200 "TARGET_SSE4_2"
10201{
575d952c
AI
10202 if (TARGET_AVX512F
10203 && (<MODE>mode == V8DImode || TARGET_AVX512VL))
10204 ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);
10205 else
10206 {
10207 enum rtx_code code;
10208 rtx xops[6];
10209 bool ok;
977e83a3 10210
52325f2c 10211
575d952c 10212 xops[0] = operands[0];
52325f2c 10213
575d952c
AI
10214 if (<CODE> == SMAX || <CODE> == UMAX)
10215 {
10216 xops[1] = operands[1];
10217 xops[2] = operands[2];
10218 }
10219 else
10220 {
10221 xops[1] = operands[2];
10222 xops[2] = operands[1];
10223 }
52325f2c 10224
575d952c 10225 code = (<CODE> == UMAX || <CODE> == UMIN) ? GTU : GT;
52325f2c 10226
575d952c
AI
10227 xops[3] = gen_rtx_fmt_ee (code, VOIDmode, operands[1], operands[2]);
10228 xops[4] = operands[1];
10229 xops[5] = operands[2];
10230
10231 ok = ix86_expand_int_vcond (xops);
10232 gcc_assert (ok);
10233 DONE;
10234 }
52325f2c
UB
10235})
10236
10237(define_expand "<code><mode>3"
82e86dc6 10238 [(set (match_operand:VI124_128 0 "register_operand")
f327a48e 10239 (smaxmin:VI124_128
82e86dc6
UB
10240 (match_operand:VI124_128 1 "nonimmediate_operand")
10241 (match_operand:VI124_128 2 "nonimmediate_operand")))]
52325f2c
UB
10242 "TARGET_SSE2"
10243{
10244 if (TARGET_SSE4_1 || <MODE>mode == V8HImode)
10245 ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);
10246 else
10247 {
10248 rtx xops[6];
10249 bool ok;
10250
10251 xops[0] = operands[0];
bdbebb7d
JJ
10252 operands[1] = force_reg (<MODE>mode, operands[1]);
10253 operands[2] = force_reg (<MODE>mode, operands[2]);
52325f2c
UB
10254
10255 if (<CODE> == SMAX)
10256 {
10257 xops[1] = operands[1];
10258 xops[2] = operands[2];
10259 }
10260 else
10261 {
10262 xops[1] = operands[2];
10263 xops[2] = operands[1];
10264 }
10265
10266 xops[3] = gen_rtx_GT (VOIDmode, operands[1], operands[2]);
10267 xops[4] = operands[1];
10268 xops[5] = operands[2];
10269
10270 ok = ix86_expand_int_vcond (xops);
10271 gcc_assert (ok);
10272 DONE;
10273 }
10274})
977e83a3 10275
c305ca7f 10276(define_insn "*sse4_1_<code><mode>3<mask_name>"
45392c76 10277 [(set (match_operand:VI14_128 0 "register_operand" "=Yr,*x,v")
798dd0ba 10278 (smaxmin:VI14_128
45392c76
IE
10279 (match_operand:VI14_128 1 "nonimmediate_operand" "%0,0,v")
10280 (match_operand:VI14_128 2 "nonimmediate_operand" "Yrm,*xm,vm")))]
c305ca7f
AI
10281 "TARGET_SSE4_1
10282 && <mask_mode512bit_condition>
10283 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
798dd0ba 10284 "@
45392c76 10285 p<maxmin_int><ssemodesuffix>\t{%2, %0|%0, %2}
cbb734aa 10286 p<maxmin_int><ssemodesuffix>\t{%2, %0|%0, %2}
c305ca7f 10287 vp<maxmin_int><ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
45392c76 10288 [(set_attr "isa" "noavx,noavx,avx")
798dd0ba 10289 (set_attr "type" "sseiadd")
45392c76
IE
10290 (set_attr "prefix_extra" "1,1,*")
10291 (set_attr "prefix" "orig,orig,vex")
4150f926
UB
10292 (set_attr "mode" "TI")])
10293
78e8956b 10294(define_insn "*<code>v8hi3"
798dd0ba 10295 [(set (match_operand:V8HI 0 "register_operand" "=x,x")
78e8956b 10296 (smaxmin:V8HI
798dd0ba
UB
10297 (match_operand:V8HI 1 "nonimmediate_operand" "%0,x")
10298 (match_operand:V8HI 2 "nonimmediate_operand" "xm,xm")))]
78e8956b 10299 "TARGET_SSE2 && ix86_binary_operator_ok (<CODE>, V8HImode, operands)"
798dd0ba
UB
10300 "@
10301 p<maxmin_int>w\t{%2, %0|%0, %2}
10302 vp<maxmin_int>w\t{%2, %1, %0|%0, %1, %2}"
10303 [(set_attr "isa" "noavx,avx")
10304 (set_attr "type" "sseiadd")
10305 (set_attr "prefix_data16" "1,*")
10306 (set_attr "prefix_extra" "*,1")
10307 (set_attr "prefix" "orig,vex")
ef719a44
RH
10308 (set_attr "mode" "TI")])
10309
32469ccc 10310(define_expand "<code><mode>3"
82e86dc6 10311 [(set (match_operand:VI124_128 0 "register_operand")
f327a48e 10312 (umaxmin:VI124_128
82e86dc6
UB
10313 (match_operand:VI124_128 1 "nonimmediate_operand")
10314 (match_operand:VI124_128 2 "nonimmediate_operand")))]
9fb93f89
RH
10315 "TARGET_SSE2"
10316{
52325f2c 10317 if (TARGET_SSE4_1 || <MODE>mode == V16QImode)
32469ccc 10318 ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);
52325f2c
UB
10319 else if (<CODE> == UMAX && <MODE>mode == V8HImode)
10320 {
10321 rtx op0 = operands[0], op2 = operands[2], op3 = op0;
bdbebb7d 10322 operands[1] = force_reg (<MODE>mode, operands[1]);
52325f2c
UB
10323 if (rtx_equal_p (op3, op2))
10324 op3 = gen_reg_rtx (V8HImode);
10325 emit_insn (gen_sse2_ussubv8hi3 (op3, operands[1], op2));
10326 emit_insn (gen_addv8hi3 (op0, op3, op2));
10327 DONE;
10328 }
9a5cee02 10329 else
a427621f
UB
10330 {
10331 rtx xops[6];
10332 bool ok;
10333
bdbebb7d
JJ
10334 operands[1] = force_reg (<MODE>mode, operands[1]);
10335 operands[2] = force_reg (<MODE>mode, operands[2]);
10336
a427621f 10337 xops[0] = operands[0];
52325f2c
UB
10338
10339 if (<CODE> == UMAX)
10340 {
10341 xops[1] = operands[1];
10342 xops[2] = operands[2];
10343 }
10344 else
10345 {
10346 xops[1] = operands[2];
10347 xops[2] = operands[1];
10348 }
10349
10350 xops[3] = gen_rtx_GTU (VOIDmode, operands[1], operands[2]);
a427621f
UB
10351 xops[4] = operands[1];
10352 xops[5] = operands[2];
52325f2c 10353
a427621f
UB
10354 ok = ix86_expand_int_vcond (xops);
10355 gcc_assert (ok);
10356 DONE;
10357 }
69a2964c
RH
10358})
10359
c305ca7f 10360(define_insn "*sse4_1_<code><mode>3<mask_name>"
45392c76 10361 [(set (match_operand:VI24_128 0 "register_operand" "=Yr,*x,v")
798dd0ba 10362 (umaxmin:VI24_128
45392c76
IE
10363 (match_operand:VI24_128 1 "nonimmediate_operand" "%0,0,v")
10364 (match_operand:VI24_128 2 "nonimmediate_operand" "Yrm,*xm,vm")))]
c305ca7f
AI
10365 "TARGET_SSE4_1
10366 && <mask_mode512bit_condition>
10367 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
798dd0ba 10368 "@
45392c76 10369 p<maxmin_int><ssemodesuffix>\t{%2, %0|%0, %2}
cbb734aa 10370 p<maxmin_int><ssemodesuffix>\t{%2, %0|%0, %2}
c305ca7f 10371 vp<maxmin_int><ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
45392c76 10372 [(set_attr "isa" "noavx,noavx,avx")
798dd0ba 10373 (set_attr "type" "sseiadd")
45392c76
IE
10374 (set_attr "prefix_extra" "1,1,*")
10375 (set_attr "prefix" "orig,orig,vex")
798dd0ba
UB
10376 (set_attr "mode" "TI")])
10377
10378(define_insn "*<code>v16qi3"
10379 [(set (match_operand:V16QI 0 "register_operand" "=x,x")
10380 (umaxmin:V16QI
10381 (match_operand:V16QI 1 "nonimmediate_operand" "%0,x")
10382 (match_operand:V16QI 2 "nonimmediate_operand" "xm,xm")))]
10383 "TARGET_SSE2 && ix86_binary_operator_ok (<CODE>, V16QImode, operands)"
10384 "@
10385 p<maxmin_int>b\t{%2, %0|%0, %2}
10386 vp<maxmin_int>b\t{%2, %1, %0|%0, %1, %2}"
10387 [(set_attr "isa" "noavx,avx")
10388 (set_attr "type" "sseiadd")
10389 (set_attr "prefix_data16" "1,*")
10390 (set_attr "prefix_extra" "*,1")
10391 (set_attr "prefix" "orig,vex")
10392 (set_attr "mode" "TI")])
10393
ef719a44
RH
10394;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
10395;;
10396;; Parallel integral comparisons
10397;;
10398;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
10399
977e83a3 10400(define_expand "avx2_eq<mode>3"
82e86dc6 10401 [(set (match_operand:VI_256 0 "register_operand")
b5344bf4 10402 (eq:VI_256
82e86dc6
UB
10403 (match_operand:VI_256 1 "nonimmediate_operand")
10404 (match_operand:VI_256 2 "nonimmediate_operand")))]
977e83a3
KY
10405 "TARGET_AVX2"
10406 "ix86_fixup_binary_operands_no_copy (EQ, <MODE>mode, operands);")
10407
10408(define_insn "*avx2_eq<mode>3"
b5344bf4
UB
10409 [(set (match_operand:VI_256 0 "register_operand" "=x")
10410 (eq:VI_256
10411 (match_operand:VI_256 1 "nonimmediate_operand" "%x")
10412 (match_operand:VI_256 2 "nonimmediate_operand" "xm")))]
977e83a3
KY
10413 "TARGET_AVX2 && ix86_binary_operator_ok (EQ, <MODE>mode, operands)"
10414 "vpcmpeq<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
10415 [(set_attr "type" "ssecmp")
10416 (set_attr "prefix_extra" "1")
10417 (set_attr "prefix" "vex")
10418 (set_attr "mode" "OI")])
10419
54967fb0
AI
10420(define_expand "<avx512>_eq<mode>3<mask_scalar_merge_name>"
10421 [(set (match_operand:<avx512fmaskmode> 0 "register_operand")
10422 (unspec:<avx512fmaskmode>
10423 [(match_operand:VI12_AVX512VL 1 "register_operand")
10424 (match_operand:VI12_AVX512VL 2 "nonimmediate_operand")]
10425 UNSPEC_MASKED_EQ))]
10426 "TARGET_AVX512BW"
10427 "ix86_fixup_binary_operands_no_copy (EQ, <MODE>mode, operands);")
10428
10429(define_expand "<avx512>_eq<mode>3<mask_scalar_merge_name>"
0fe65b75
AI
10430 [(set (match_operand:<avx512fmaskmode> 0 "register_operand")
10431 (unspec:<avx512fmaskmode>
54967fb0
AI
10432 [(match_operand:VI48_AVX512VL 1 "register_operand")
10433 (match_operand:VI48_AVX512VL 2 "nonimmediate_operand")]
0fe65b75
AI
10434 UNSPEC_MASKED_EQ))]
10435 "TARGET_AVX512F"
10436 "ix86_fixup_binary_operands_no_copy (EQ, <MODE>mode, operands);")
10437
54967fb0 10438(define_insn "<avx512>_eq<mode>3<mask_scalar_merge_name>_1"
be792bce 10439 [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=Yk")
0fe65b75 10440 (unspec:<avx512fmaskmode>
54967fb0
AI
10441 [(match_operand:VI12_AVX512VL 1 "register_operand" "%v")
10442 (match_operand:VI12_AVX512VL 2 "nonimmediate_operand" "vm")]
10443 UNSPEC_MASKED_EQ))]
10444 "TARGET_AVX512F && ix86_binary_operator_ok (EQ, <MODE>mode, operands)"
10445 "vpcmpeq<ssemodesuffix>\t{%2, %1, %0<mask_scalar_merge_operand3>|%0<mask_scalar_merge_operand3>, %1, %2}"
10446 [(set_attr "type" "ssecmp")
10447 (set_attr "prefix_extra" "1")
10448 (set_attr "prefix" "evex")
10449 (set_attr "mode" "<sseinsnmode>")])
10450
10451(define_insn "<avx512>_eq<mode>3<mask_scalar_merge_name>_1"
10452 [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=Yk")
10453 (unspec:<avx512fmaskmode>
10454 [(match_operand:VI48_AVX512VL 1 "register_operand" "%v")
10455 (match_operand:VI48_AVX512VL 2 "nonimmediate_operand" "vm")]
0fe65b75
AI
10456 UNSPEC_MASKED_EQ))]
10457 "TARGET_AVX512F && ix86_binary_operator_ok (EQ, <MODE>mode, operands)"
a95ec517 10458 "vpcmpeq<ssemodesuffix>\t{%2, %1, %0<mask_scalar_merge_operand3>|%0<mask_scalar_merge_operand3>, %1, %2}"
0fe65b75
AI
10459 [(set_attr "type" "ssecmp")
10460 (set_attr "prefix_extra" "1")
10461 (set_attr "prefix" "evex")
10462 (set_attr "mode" "<sseinsnmode>")])
10463
798dd0ba 10464(define_insn "*sse4_1_eqv2di3"
45392c76 10465 [(set (match_operand:V2DI 0 "register_operand" "=Yr,*x,x")
798dd0ba 10466 (eq:V2DI
45392c76
IE
10467 (match_operand:V2DI 1 "nonimmediate_operand" "%0,0,x")
10468 (match_operand:V2DI 2 "nonimmediate_operand" "Yrm,*xm,xm")))]
798dd0ba
UB
10469 "TARGET_SSE4_1 && ix86_binary_operator_ok (EQ, V2DImode, operands)"
10470 "@
45392c76 10471 pcmpeqq\t{%2, %0|%0, %2}
798dd0ba
UB
10472 pcmpeqq\t{%2, %0|%0, %2}
10473 vpcmpeqq\t{%2, %1, %0|%0, %1, %2}"
45392c76 10474 [(set_attr "isa" "noavx,noavx,avx")
798dd0ba
UB
10475 (set_attr "type" "ssecmp")
10476 (set_attr "prefix_extra" "1")
45392c76 10477 (set_attr "prefix" "orig,orig,vex")
95879c72
L
10478 (set_attr "mode" "TI")])
10479
ffbaf337 10480(define_insn "*sse2_eq<mode>3"
798dd0ba
UB
10481 [(set (match_operand:VI124_128 0 "register_operand" "=x,x")
10482 (eq:VI124_128
10483 (match_operand:VI124_128 1 "nonimmediate_operand" "%0,x")
10484 (match_operand:VI124_128 2 "nonimmediate_operand" "xm,xm")))]
43a8b705 10485 "TARGET_SSE2 && !TARGET_XOP
04e1d06b 10486 && ix86_binary_operator_ok (EQ, <MODE>mode, operands)"
798dd0ba 10487 "@
cbb734aa
UB
10488 pcmpeq<ssemodesuffix>\t{%2, %0|%0, %2}
10489 vpcmpeq<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
798dd0ba
UB
10490 [(set_attr "isa" "noavx,avx")
10491 (set_attr "type" "ssecmp")
10492 (set_attr "prefix_data16" "1,*")
10493 (set_attr "prefix" "orig,vex")
ef719a44
RH
10494 (set_attr "mode" "TI")])
10495
798dd0ba 10496(define_expand "sse2_eq<mode>3"
82e86dc6 10497 [(set (match_operand:VI124_128 0 "register_operand")
798dd0ba 10498 (eq:VI124_128
82e86dc6
UB
10499 (match_operand:VI124_128 1 "nonimmediate_operand")
10500 (match_operand:VI124_128 2 "nonimmediate_operand")))]
798dd0ba
UB
10501 "TARGET_SSE2 && !TARGET_XOP "
10502 "ix86_fixup_binary_operands_no_copy (EQ, <MODE>mode, operands);")
10503
ffbaf337 10504(define_expand "sse4_1_eqv2di3"
82e86dc6 10505 [(set (match_operand:V2DI 0 "register_operand")
ffbaf337 10506 (eq:V2DI
82e86dc6
UB
10507 (match_operand:V2DI 1 "nonimmediate_operand")
10508 (match_operand:V2DI 2 "nonimmediate_operand")))]
ffbaf337
UB
10509 "TARGET_SSE4_1"
10510 "ix86_fixup_binary_operands_no_copy (EQ, V2DImode, operands);")
10511
798dd0ba 10512(define_insn "sse4_2_gtv2di3"
45392c76 10513 [(set (match_operand:V2DI 0 "register_operand" "=Yr,*x,x")
798dd0ba 10514 (gt:V2DI
45392c76
IE
10515 (match_operand:V2DI 1 "register_operand" "0,0,x")
10516 (match_operand:V2DI 2 "nonimmediate_operand" "Yrm,*xm,xm")))]
798dd0ba
UB
10517 "TARGET_SSE4_2"
10518 "@
45392c76 10519 pcmpgtq\t{%2, %0|%0, %2}
798dd0ba
UB
10520 pcmpgtq\t{%2, %0|%0, %2}
10521 vpcmpgtq\t{%2, %1, %0|%0, %1, %2}"
45392c76 10522 [(set_attr "isa" "noavx,noavx,avx")
798dd0ba 10523 (set_attr "type" "ssecmp")
9a5cee02 10524 (set_attr "prefix_extra" "1")
45392c76 10525 (set_attr "prefix" "orig,orig,vex")
95879c72
L
10526 (set_attr "mode" "TI")])
10527
977e83a3 10528(define_insn "avx2_gt<mode>3"
b5344bf4
UB
10529 [(set (match_operand:VI_256 0 "register_operand" "=x")
10530 (gt:VI_256
10531 (match_operand:VI_256 1 "register_operand" "x")
10532 (match_operand:VI_256 2 "nonimmediate_operand" "xm")))]
977e83a3
KY
10533 "TARGET_AVX2"
10534 "vpcmpgt<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
10535 [(set_attr "type" "ssecmp")
10536 (set_attr "prefix_extra" "1")
10537 (set_attr "prefix" "vex")
10538 (set_attr "mode" "OI")])
10539
54967fb0 10540(define_insn "<avx512>_gt<mode>3<mask_scalar_merge_name>"
be792bce 10541 [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=Yk")
0fe65b75 10542 (unspec:<avx512fmaskmode>
54967fb0
AI
10543 [(match_operand:VI48_AVX512VL 1 "register_operand" "v")
10544 (match_operand:VI48_AVX512VL 2 "nonimmediate_operand" "vm")] UNSPEC_MASKED_GT))]
0fe65b75 10545 "TARGET_AVX512F"
a95ec517 10546 "vpcmpgt<ssemodesuffix>\t{%2, %1, %0<mask_scalar_merge_operand3>|%0<mask_scalar_merge_operand3>, %1, %2}"
0fe65b75
AI
10547 [(set_attr "type" "ssecmp")
10548 (set_attr "prefix_extra" "1")
10549 (set_attr "prefix" "evex")
10550 (set_attr "mode" "<sseinsnmode>")])
10551
54967fb0
AI
10552(define_insn "<avx512>_gt<mode>3<mask_scalar_merge_name>"
10553 [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=Yk")
10554 (unspec:<avx512fmaskmode>
10555 [(match_operand:VI12_AVX512VL 1 "register_operand" "v")
10556 (match_operand:VI12_AVX512VL 2 "nonimmediate_operand" "vm")] UNSPEC_MASKED_GT))]
10557 "TARGET_AVX512BW"
10558 "vpcmpgt<ssemodesuffix>\t{%2, %1, %0<mask_scalar_merge_operand3>|%0<mask_scalar_merge_operand3>, %1, %2}"
10559 [(set_attr "type" "ssecmp")
10560 (set_attr "prefix_extra" "1")
10561 (set_attr "prefix" "evex")
10562 (set_attr "mode" "<sseinsnmode>")])
10563
ef719a44 10564(define_insn "sse2_gt<mode>3"
798dd0ba
UB
10565 [(set (match_operand:VI124_128 0 "register_operand" "=x,x")
10566 (gt:VI124_128
10567 (match_operand:VI124_128 1 "register_operand" "0,x")
10568 (match_operand:VI124_128 2 "nonimmediate_operand" "xm,xm")))]
43a8b705 10569 "TARGET_SSE2 && !TARGET_XOP"
798dd0ba 10570 "@
cbb734aa
UB
10571 pcmpgt<ssemodesuffix>\t{%2, %0|%0, %2}
10572 vpcmpgt<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
798dd0ba
UB
10573 [(set_attr "isa" "noavx,avx")
10574 (set_attr "type" "ssecmp")
10575 (set_attr "prefix_data16" "1,*")
10576 (set_attr "prefix" "orig,vex")
3b8dd071
L
10577 (set_attr "mode" "TI")])
10578
f62ce24f
AI
10579(define_expand "vcond<V_512:mode><VI_512:mode>"
10580 [(set (match_operand:V_512 0 "register_operand")
10581 (if_then_else:V_512
10582 (match_operator 3 ""
10583 [(match_operand:VI_512 4 "nonimmediate_operand")
10584 (match_operand:VI_512 5 "general_operand")])
10585 (match_operand:V_512 1)
10586 (match_operand:V_512 2)))]
10587 "TARGET_AVX512F
10588 && (GET_MODE_NUNITS (<V_512:MODE>mode)
10589 == GET_MODE_NUNITS (<VI_512:MODE>mode))"
10590{
10591 bool ok = ix86_expand_int_vcond (operands);
10592 gcc_assert (ok);
10593 DONE;
10594})
10595
32469ccc 10596(define_expand "vcond<V_256:mode><VI_256:mode>"
82e86dc6 10597 [(set (match_operand:V_256 0 "register_operand")
32469ccc
JJ
10598 (if_then_else:V_256
10599 (match_operator 3 ""
82e86dc6
UB
10600 [(match_operand:VI_256 4 "nonimmediate_operand")
10601 (match_operand:VI_256 5 "general_operand")])
10602 (match_operand:V_256 1)
10603 (match_operand:V_256 2)))]
32469ccc
JJ
10604 "TARGET_AVX2
10605 && (GET_MODE_NUNITS (<V_256:MODE>mode)
10606 == GET_MODE_NUNITS (<VI_256:MODE>mode))"
10607{
10608 bool ok = ix86_expand_int_vcond (operands);
10609 gcc_assert (ok);
10610 DONE;
10611})
10612
e9e1d143 10613(define_expand "vcond<V_128:mode><VI124_128:mode>"
82e86dc6 10614 [(set (match_operand:V_128 0 "register_operand")
e9e1d143 10615 (if_then_else:V_128
977e83a3 10616 (match_operator 3 ""
82e86dc6
UB
10617 [(match_operand:VI124_128 4 "nonimmediate_operand")
10618 (match_operand:VI124_128 5 "general_operand")])
10619 (match_operand:V_128 1)
10620 (match_operand:V_128 2)))]
e9e1d143
RG
10621 "TARGET_SSE2
10622 && (GET_MODE_NUNITS (<V_128:MODE>mode)
10623 == GET_MODE_NUNITS (<VI124_128:MODE>mode))"
ae46a07a 10624{
1262fd02
UB
10625 bool ok = ix86_expand_int_vcond (operands);
10626 gcc_assert (ok);
10627 DONE;
ae46a07a
RH
10628})
10629
e9e1d143 10630(define_expand "vcond<VI8F_128:mode>v2di"
82e86dc6 10631 [(set (match_operand:VI8F_128 0 "register_operand")
e9e1d143 10632 (if_then_else:VI8F_128
977e83a3 10633 (match_operator 3 ""
82e86dc6
UB
10634 [(match_operand:V2DI 4 "nonimmediate_operand")
10635 (match_operand:V2DI 5 "general_operand")])
10636 (match_operand:VI8F_128 1)
10637 (match_operand:VI8F_128 2)))]
798dd0ba
UB
10638 "TARGET_SSE4_2"
10639{
10640 bool ok = ix86_expand_int_vcond (operands);
10641 gcc_assert (ok);
10642 DONE;
10643})
10644
f62ce24f
AI
10645(define_expand "vcondu<V_512:mode><VI_512:mode>"
10646 [(set (match_operand:V_512 0 "register_operand")
10647 (if_then_else:V_512
10648 (match_operator 3 ""
10649 [(match_operand:VI_512 4 "nonimmediate_operand")
10650 (match_operand:VI_512 5 "nonimmediate_operand")])
10651 (match_operand:V_512 1 "general_operand")
10652 (match_operand:V_512 2 "general_operand")))]
10653 "TARGET_AVX512F
10654 && (GET_MODE_NUNITS (<V_512:MODE>mode)
10655 == GET_MODE_NUNITS (<VI_512:MODE>mode))"
10656{
10657 bool ok = ix86_expand_int_vcond (operands);
10658 gcc_assert (ok);
10659 DONE;
10660})
10661
32469ccc 10662(define_expand "vcondu<V_256:mode><VI_256:mode>"
82e86dc6 10663 [(set (match_operand:V_256 0 "register_operand")
32469ccc
JJ
10664 (if_then_else:V_256
10665 (match_operator 3 ""
82e86dc6
UB
10666 [(match_operand:VI_256 4 "nonimmediate_operand")
10667 (match_operand:VI_256 5 "nonimmediate_operand")])
10668 (match_operand:V_256 1 "general_operand")
10669 (match_operand:V_256 2 "general_operand")))]
32469ccc
JJ
10670 "TARGET_AVX2
10671 && (GET_MODE_NUNITS (<V_256:MODE>mode)
10672 == GET_MODE_NUNITS (<VI_256:MODE>mode))"
10673{
10674 bool ok = ix86_expand_int_vcond (operands);
10675 gcc_assert (ok);
10676 DONE;
10677})
10678
e9e1d143 10679(define_expand "vcondu<V_128:mode><VI124_128:mode>"
82e86dc6 10680 [(set (match_operand:V_128 0 "register_operand")
e9e1d143 10681 (if_then_else:V_128
977e83a3 10682 (match_operator 3 ""
82e86dc6
UB
10683 [(match_operand:VI124_128 4 "nonimmediate_operand")
10684 (match_operand:VI124_128 5 "nonimmediate_operand")])
10685 (match_operand:V_128 1 "general_operand")
10686 (match_operand:V_128 2 "general_operand")))]
e9e1d143
RG
10687 "TARGET_SSE2
10688 && (GET_MODE_NUNITS (<V_128:MODE>mode)
10689 == GET_MODE_NUNITS (<VI124_128:MODE>mode))"
ae46a07a 10690{
1262fd02
UB
10691 bool ok = ix86_expand_int_vcond (operands);
10692 gcc_assert (ok);
10693 DONE;
ae46a07a
RH
10694})
10695
e9e1d143 10696(define_expand "vcondu<VI8F_128:mode>v2di"
82e86dc6 10697 [(set (match_operand:VI8F_128 0 "register_operand")
e9e1d143 10698 (if_then_else:VI8F_128
977e83a3 10699 (match_operator 3 ""
82e86dc6
UB
10700 [(match_operand:V2DI 4 "nonimmediate_operand")
10701 (match_operand:V2DI 5 "nonimmediate_operand")])
10702 (match_operand:VI8F_128 1 "general_operand")
10703 (match_operand:VI8F_128 2 "general_operand")))]
798dd0ba
UB
10704 "TARGET_SSE4_2"
10705{
10706 bool ok = ix86_expand_int_vcond (operands);
10707 gcc_assert (ok);
10708 DONE;
10709})
10710
2205ed25 10711(define_mode_iterator VEC_PERM_AVX2
44167383 10712 [V16QI V8HI V4SI V2DI V4SF V2DF
0c7189ae 10713 (V32QI "TARGET_AVX2") (V16HI "TARGET_AVX2")
44167383 10714 (V8SI "TARGET_AVX2") (V4DI "TARGET_AVX2")
c003c6d6
AI
10715 (V8SF "TARGET_AVX2") (V4DF "TARGET_AVX2")
10716 (V16SF "TARGET_AVX512F") (V8DF "TARGET_AVX512F")
f5db965f 10717 (V16SI "TARGET_AVX512F") (V8DI "TARGET_AVX512F")
28adf6e7 10718 (V32HI "TARGET_AVX512BW") (V64QI "TARGET_AVX512VBMI")])
44167383 10719
2205ed25 10720(define_expand "vec_perm<mode>"
82e86dc6
UB
10721 [(match_operand:VEC_PERM_AVX2 0 "register_operand")
10722 (match_operand:VEC_PERM_AVX2 1 "register_operand")
10723 (match_operand:VEC_PERM_AVX2 2 "register_operand")
10724 (match_operand:<sseintvecmode> 3 "register_operand")]
44167383 10725 "TARGET_SSSE3 || TARGET_AVX || TARGET_XOP"
f90e8e2e 10726{
2205ed25 10727 ix86_expand_vec_perm (operands);
f90e8e2e
AS
10728 DONE;
10729})
10730
0772d476
RH
10731(define_mode_iterator VEC_PERM_CONST
10732 [(V4SF "TARGET_SSE") (V4SI "TARGET_SSE")
10733 (V2DF "TARGET_SSE") (V2DI "TARGET_SSE")
10734 (V16QI "TARGET_SSE2") (V8HI "TARGET_SSE2")
10735 (V8SF "TARGET_AVX") (V4DF "TARGET_AVX")
10736 (V8SI "TARGET_AVX") (V4DI "TARGET_AVX")
c003c6d6
AI
10737 (V32QI "TARGET_AVX2") (V16HI "TARGET_AVX2")
10738 (V16SI "TARGET_AVX512F") (V8DI "TARGET_AVX512F")
f5db965f 10739 (V16SF "TARGET_AVX512F") (V8DF "TARGET_AVX512F")
9f9f6115 10740 (V32HI "TARGET_AVX512BW") (V64QI "TARGET_AVX512BW")])
0772d476
RH
10741
10742(define_expand "vec_perm_const<mode>"
82e86dc6
UB
10743 [(match_operand:VEC_PERM_CONST 0 "register_operand")
10744 (match_operand:VEC_PERM_CONST 1 "register_operand")
10745 (match_operand:VEC_PERM_CONST 2 "register_operand")
10746 (match_operand:<sseintvecmode> 3)]
0772d476
RH
10747 ""
10748{
10749 if (ix86_expand_vec_perm_const (operands))
10750 DONE;
10751 else
10752 FAIL;
10753})
10754
ef719a44
RH
10755;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
10756;;
edc5bbcd 10757;; Parallel bitwise logical operations
ef719a44
RH
10758;;
10759;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
10760
10761(define_expand "one_cmpl<mode>2"
82e86dc6
UB
10762 [(set (match_operand:VI 0 "register_operand")
10763 (xor:VI (match_operand:VI 1 "nonimmediate_operand")
d8700b1c
UB
10764 (match_dup 2)))]
10765 "TARGET_SSE"
ef719a44
RH
10766{
10767 int i, n = GET_MODE_NUNITS (<MODE>mode);
10768 rtvec v = rtvec_alloc (n);
10769
10770 for (i = 0; i < n; ++i)
10771 RTVEC_ELT (v, i) = constm1_rtx;
10772
10773 operands[2] = force_reg (<MODE>mode, gen_rtx_CONST_VECTOR (<MODE>mode, v));
10774})
10775
700e2919 10776(define_expand "<sse2_avx2>_andnot<mode>3"
82e86dc6 10777 [(set (match_operand:VI_AVX2 0 "register_operand")
1707583b 10778 (and:VI_AVX2
82e86dc6
UB
10779 (not:VI_AVX2 (match_operand:VI_AVX2 1 "register_operand"))
10780 (match_operand:VI_AVX2 2 "nonimmediate_operand")))]
700e2919 10781 "TARGET_SSE2")
35f3782f 10782
700e2919
AI
10783(define_expand "<sse2_avx2>_andnot<mode>3_mask"
10784 [(set (match_operand:VI48_AVX512VL 0 "register_operand")
10785 (vec_merge:VI48_AVX512VL
10786 (and:VI48_AVX512VL
10787 (not:VI48_AVX512VL
10788 (match_operand:VI48_AVX512VL 1 "register_operand"))
10789 (match_operand:VI48_AVX512VL 2 "nonimmediate_operand"))
10790 (match_operand:VI48_AVX512VL 3 "vector_move_operand")
10791 (match_operand:<avx512fmaskmode> 4 "register_operand")))]
10792 "TARGET_AVX512F")
10793
10794(define_expand "<sse2_avx2>_andnot<mode>3_mask"
10795 [(set (match_operand:VI12_AVX512VL 0 "register_operand")
10796 (vec_merge:VI12_AVX512VL
10797 (and:VI12_AVX512VL
10798 (not:VI12_AVX512VL
10799 (match_operand:VI12_AVX512VL 1 "register_operand"))
10800 (match_operand:VI12_AVX512VL 2 "nonimmediate_operand"))
10801 (match_operand:VI12_AVX512VL 3 "vector_move_operand")
10802 (match_operand:<avx512fmaskmode> 4 "register_operand")))]
10803 "TARGET_AVX512BW")
10804
10805(define_insn "*andnot<mode>3"
3f97cb0b 10806 [(set (match_operand:VI 0 "register_operand" "=x,v")
d8700b1c 10807 (and:VI
3f97cb0b
AI
10808 (not:VI (match_operand:VI 1 "register_operand" "0,v"))
10809 (match_operand:VI 2 "nonimmediate_operand" "xm,vm")))]
700e2919 10810 "TARGET_SSE"
d8700b1c 10811{
a9ccbba2 10812 static char buf[64];
d8700b1c 10813 const char *ops;
1707583b
UB
10814 const char *tmp;
10815
10816 switch (get_attr_mode (insn))
10817 {
a9ccbba2
AI
10818 case MODE_XI:
10819 gcc_assert (TARGET_AVX512F);
1707583b 10820 case MODE_OI:
26358fb6 10821 gcc_assert (TARGET_AVX2 || TARGET_AVX512VL);
1707583b 10822 case MODE_TI:
26358fb6
AI
10823 gcc_assert (TARGET_SSE2 || TARGET_AVX512VL);
10824 switch (<MODE>mode)
10825 {
10826 case V16SImode:
10827 case V8DImode:
10828 if (TARGET_AVX512F)
10829 {
10830 tmp = "pandn<ssemodesuffix>";
10831 break;
10832 }
10833 case V8SImode:
10834 case V4DImode:
10835 case V4SImode:
10836 case V2DImode:
10837 if (TARGET_AVX512VL)
10838 {
10839 tmp = "pandn<ssemodesuffix>";
10840 break;
10841 }
10842 default:
10843 tmp = TARGET_AVX512VL ? "pandnq" : "pandn";
10844 }
1707583b
UB
10845 break;
10846
8586e4bd
UB
10847 case MODE_V16SF:
10848 gcc_assert (TARGET_AVX512F);
1707583b
UB
10849 case MODE_V8SF:
10850 gcc_assert (TARGET_AVX);
10851 case MODE_V4SF:
10852 gcc_assert (TARGET_SSE);
10853
10854 tmp = "andnps";
10855 break;
10856
10857 default:
10858 gcc_unreachable ();
10859 }
95879c72 10860
d8700b1c
UB
10861 switch (which_alternative)
10862 {
10863 case 0:
10864 ops = "%s\t{%%2, %%0|%%0, %%2}";
10865 break;
10866 case 1:
47490470 10867 ops = "v%s\t{%%2, %%1, %%0<mask_operand3_1>|%%0<mask_operand3_1>, %%1, %%2}";
d8700b1c
UB
10868 break;
10869 default:
10870 gcc_unreachable ();
10871 }
ef719a44 10872
d8700b1c
UB
10873 snprintf (buf, sizeof (buf), ops, tmp);
10874 return buf;
10875}
10876 [(set_attr "isa" "noavx,avx")
10877 (set_attr "type" "sselog")
10878 (set (attr "prefix_data16")
10879 (if_then_else
10880 (and (eq_attr "alternative" "0")
10881 (eq_attr "mode" "TI"))
10882 (const_string "1")
10883 (const_string "*")))
700e2919 10884 (set_attr "prefix" "orig,vex")
d8700b1c 10885 (set (attr "mode")
659c0e68
JM
10886 (cond [(and (match_test "<MODE_SIZE> == 16")
10887 (match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL"))
daa51295
UB
10888 (const_string "<ssePSmode>")
10889 (match_test "TARGET_AVX2")
10890 (const_string "<sseinsnmode>")
10891 (match_test "TARGET_AVX")
10892 (if_then_else
039eee3f 10893 (match_test "<MODE_SIZE> > 16")
daa51295
UB
10894 (const_string "V8SF")
10895 (const_string "<sseinsnmode>"))
10896 (ior (not (match_test "TARGET_SSE2"))
10897 (match_test "optimize_function_for_size_p (cfun)"))
10898 (const_string "V4SF")
10899 ]
10900 (const_string "<sseinsnmode>")))])
edc5bbcd 10901
700e2919
AI
10902(define_insn "*andnot<mode>3_mask"
10903 [(set (match_operand:VI48_AVX512VL 0 "register_operand" "=v")
10904 (vec_merge:VI48_AVX512VL
10905 (and:VI48_AVX512VL
10906 (not:VI48_AVX512VL
10907 (match_operand:VI48_AVX512VL 1 "register_operand" "v"))
10908 (match_operand:VI48_AVX512VL 2 "nonimmediate_operand" "vm"))
10909 (match_operand:VI48_AVX512VL 3 "vector_move_operand" "0C")
10910 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")))]
10911 "TARGET_AVX512F"
10912 "vpandn<ssemodesuffix>\t{%2, %1, %0%{%4%}%N3|%0%{%4%}%N3, %1, %2}";
10913 [(set_attr "type" "sselog")
10914 (set_attr "prefix" "evex")
10915 (set_attr "mode" "<sseinsnmode>")])
10916
10917(define_insn "*andnot<mode>3_mask"
10918 [(set (match_operand:VI12_AVX512VL 0 "register_operand" "=v")
10919 (vec_merge:VI12_AVX512VL
10920 (and:VI12_AVX512VL
10921 (not:VI12_AVX512VL
10922 (match_operand:VI12_AVX512VL 1 "register_operand" "v"))
10923 (match_operand:VI12_AVX512VL 2 "nonimmediate_operand" "vm"))
10924 (match_operand:VI12_AVX512VL 3 "vector_move_operand" "0C")
10925 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")))]
10926 "TARGET_AVX512BW"
10927 "vpandn<ssemodesuffix>\t{%2, %1, %0%{%4%}%N3|%0%{%4%}%N3, %1, %2}";
10928 [(set_attr "type" "sselog")
10929 (set_attr "prefix" "evex")
10930 (set_attr "mode" "<sseinsnmode>")])
10931
94237c92 10932(define_expand "<code><mode>3"
82e86dc6 10933 [(set (match_operand:VI 0 "register_operand")
d8700b1c 10934 (any_logic:VI
42bace41
JJ
10935 (match_operand:VI 1 "nonimmediate_or_const_vector_operand")
10936 (match_operand:VI 2 "nonimmediate_or_const_vector_operand")))]
35f3782f 10937 "TARGET_SSE"
42bace41
JJ
10938{
10939 ix86_expand_vector_logical_operator (<CODE>, <MODE>mode, operands);
10940 DONE;
10941})
ef719a44 10942
47490470 10943(define_insn "<mask_codefor><code><mode>3<mask_name>"
3f97cb0b 10944 [(set (match_operand:VI 0 "register_operand" "=x,v")
d8700b1c 10945 (any_logic:VI
3f97cb0b
AI
10946 (match_operand:VI 1 "nonimmediate_operand" "%0,v")
10947 (match_operand:VI 2 "nonimmediate_operand" "xm,vm")))]
47490470 10948 "TARGET_SSE && <mask_mode512bit_condition>
94237c92 10949 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
d8700b1c 10950{
a9ccbba2 10951 static char buf[64];
d8700b1c 10952 const char *ops;
1707583b
UB
10953 const char *tmp;
10954
10955 switch (get_attr_mode (insn))
10956 {
a9ccbba2
AI
10957 case MODE_XI:
10958 gcc_assert (TARGET_AVX512F);
1707583b 10959 case MODE_OI:
26358fb6 10960 gcc_assert (TARGET_AVX2 || TARGET_AVX512VL);
1707583b 10961 case MODE_TI:
26358fb6
AI
10962 gcc_assert (TARGET_SSE2 || TARGET_AVX512VL);
10963 switch (<MODE>mode)
10964 {
10965 case V16SImode:
10966 case V8DImode:
10967 if (TARGET_AVX512F)
10968 {
10969 tmp = "p<logic><ssemodesuffix>";
10970 break;
10971 }
10972 case V8SImode:
10973 case V4DImode:
10974 case V4SImode:
10975 case V2DImode:
10976 if (TARGET_AVX512VL)
10977 {
10978 tmp = "p<logic><ssemodesuffix>";
10979 break;
10980 }
10981 default:
10982 tmp = TARGET_AVX512VL ? "p<logic>q" : "p<logic>";
10983 }
1707583b
UB
10984 break;
10985
a9ccbba2
AI
10986 case MODE_V16SF:
10987 gcc_assert (TARGET_AVX512F);
1707583b
UB
10988 case MODE_V8SF:
10989 gcc_assert (TARGET_AVX);
10990 case MODE_V4SF:
10991 gcc_assert (TARGET_SSE);
10992
10993 tmp = "<logic>ps";
10994 break;
10995
10996 default:
10997 gcc_unreachable ();
10998 }
35f3782f 10999
d8700b1c
UB
11000 switch (which_alternative)
11001 {
11002 case 0:
11003 ops = "%s\t{%%2, %%0|%%0, %%2}";
11004 break;
11005 case 1:
47490470 11006 ops = "v%s\t{%%2, %%1, %%0<mask_operand3_1>|%%0<mask_operand3_1>, %%1, %%2}";
d8700b1c
UB
11007 break;
11008 default:
11009 gcc_unreachable ();
11010 }
95879c72 11011
d8700b1c
UB
11012 snprintf (buf, sizeof (buf), ops, tmp);
11013 return buf;
11014}
11015 [(set_attr "isa" "noavx,avx")
11016 (set_attr "type" "sselog")
11017 (set (attr "prefix_data16")
11018 (if_then_else
11019 (and (eq_attr "alternative" "0")
11020 (eq_attr "mode" "TI"))
11021 (const_string "1")
11022 (const_string "*")))
47490470 11023 (set_attr "prefix" "<mask_prefix3>")
d8700b1c 11024 (set (attr "mode")
659c0e68
JM
11025 (cond [(and (match_test "<MODE_SIZE> == 16")
11026 (match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL"))
daa51295
UB
11027 (const_string "<ssePSmode>")
11028 (match_test "TARGET_AVX2")
11029 (const_string "<sseinsnmode>")
11030 (match_test "TARGET_AVX")
11031 (if_then_else
039eee3f 11032 (match_test "<MODE_SIZE> > 16")
daa51295
UB
11033 (const_string "V8SF")
11034 (const_string "<sseinsnmode>"))
11035 (ior (not (match_test "TARGET_SSE2"))
11036 (match_test "optimize_function_for_size_p (cfun)"))
11037 (const_string "V4SF")
11038 ]
11039 (const_string "<sseinsnmode>")))])
d8700b1c 11040
54967fb0 11041(define_insn "<avx512>_testm<mode>3<mask_scalar_merge_name>"
be792bce 11042 [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=Yk")
0fe65b75 11043 (unspec:<avx512fmaskmode>
54967fb0
AI
11044 [(match_operand:VI12_AVX512VL 1 "register_operand" "v")
11045 (match_operand:VI12_AVX512VL 2 "nonimmediate_operand" "vm")]
11046 UNSPEC_TESTM))]
11047 "TARGET_AVX512BW"
11048 "vptestm<ssemodesuffix>\t{%2, %1, %0<mask_scalar_merge_operand3>|%0<mask_scalar_merge_operand3>, %1, %2}"
11049 [(set_attr "prefix" "evex")
11050 (set_attr "mode" "<sseinsnmode>")])
11051
11052(define_insn "<avx512>_testm<mode>3<mask_scalar_merge_name>"
11053 [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=Yk")
11054 (unspec:<avx512fmaskmode>
11055 [(match_operand:VI48_AVX512VL 1 "register_operand" "v")
11056 (match_operand:VI48_AVX512VL 2 "nonimmediate_operand" "vm")]
0fe65b75
AI
11057 UNSPEC_TESTM))]
11058 "TARGET_AVX512F"
a95ec517 11059 "vptestm<ssemodesuffix>\t{%2, %1, %0<mask_scalar_merge_operand3>|%0<mask_scalar_merge_operand3>, %1, %2}"
0fe65b75
AI
11060 [(set_attr "prefix" "evex")
11061 (set_attr "mode" "<sseinsnmode>")])
11062
54967fb0
AI
11063(define_insn "<avx512>_testnm<mode>3<mask_scalar_merge_name>"
11064 [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=Yk")
11065 (unspec:<avx512fmaskmode>
11066 [(match_operand:VI12_AVX512VL 1 "register_operand" "v")
11067 (match_operand:VI12_AVX512VL 2 "nonimmediate_operand" "vm")]
11068 UNSPEC_TESTNM))]
11069 "TARGET_AVX512BW"
11070 "vptestnm<ssemodesuffix>\t{%2, %1, %0<mask_scalar_merge_operand3>|%0<mask_scalar_merge_operand3>, %1, %2}"
11071 [(set_attr "prefix" "evex")
11072 (set_attr "mode" "<sseinsnmode>")])
11073
11074(define_insn "<avx512>_testnm<mode>3<mask_scalar_merge_name>"
be792bce 11075 [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=Yk")
0fe65b75 11076 (unspec:<avx512fmaskmode>
54967fb0
AI
11077 [(match_operand:VI48_AVX512VL 1 "register_operand" "v")
11078 (match_operand:VI48_AVX512VL 2 "nonimmediate_operand" "vm")]
0fe65b75 11079 UNSPEC_TESTNM))]
260d3642
IT
11080 "TARGET_AVX512F"
11081 "vptestnm<ssemodesuffix>\t{%2, %1, %0<mask_scalar_merge_operand3>|%0<mask_scalar_merge_operand3>, %1, %2}"
0fe65b75
AI
11082 [(set_attr "prefix" "evex")
11083 (set_attr "mode" "<sseinsnmode>")])
11084
ef719a44
RH
11085;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
11086;;
11087;; Parallel integral element swizzling
11088;;
11089;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
11090
8dfb9f16 11091(define_expand "vec_pack_trunc_<mode>"
82e86dc6 11092 [(match_operand:<ssepackmode> 0 "register_operand")
e8d08206
AI
11093 (match_operand:VI248_AVX2_8_AVX512F 1 "register_operand")
11094 (match_operand:VI248_AVX2_8_AVX512F 2 "register_operand")]
89d67cca
DN
11095 "TARGET_SSE2"
11096{
8dfb9f16
UB
11097 rtx op1 = gen_lowpart (<ssepackmode>mode, operands[1]);
11098 rtx op2 = gen_lowpart (<ssepackmode>mode, operands[2]);
0fac5151 11099 ix86_expand_vec_extract_even_odd (operands[0], op1, op2, 0);
89d67cca
DN
11100 DONE;
11101})
11102
d281ef42 11103(define_insn "<sse2_avx2>_packsswb<mask_name>"
f5db965f
IT
11104 [(set (match_operand:VI1_AVX512 0 "register_operand" "=x,x")
11105 (vec_concat:VI1_AVX512
977e83a3 11106 (ss_truncate:<ssehalfvecmode>
d281ef42 11107 (match_operand:<sseunpackmode> 1 "register_operand" "0,v"))
977e83a3 11108 (ss_truncate:<ssehalfvecmode>
d281ef42
AI
11109 (match_operand:<sseunpackmode> 2 "nonimmediate_operand" "xm,vm"))))]
11110 "TARGET_SSE2 && <mask_mode512bit_condition> && <mask_avx512bw_condition>"
1ee8b298
UB
11111 "@
11112 packsswb\t{%2, %0|%0, %2}
d281ef42 11113 vpacksswb\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
1ee8b298
UB
11114 [(set_attr "isa" "noavx,avx")
11115 (set_attr "type" "sselog")
11116 (set_attr "prefix_data16" "1,*")
d281ef42 11117 (set_attr "prefix" "orig,maybe_evex")
977e83a3 11118 (set_attr "mode" "<sseinsnmode>")])
95879c72 11119
ed3e611e
AI
11120(define_insn "<sse2_avx2>_packssdw<mask_name>"
11121 [(set (match_operand:VI2_AVX2 0 "register_operand" "=x,v")
977e83a3
KY
11122 (vec_concat:VI2_AVX2
11123 (ss_truncate:<ssehalfvecmode>
ed3e611e 11124 (match_operand:<sseunpackmode> 1 "register_operand" "0,v"))
977e83a3 11125 (ss_truncate:<ssehalfvecmode>
ed3e611e
AI
11126 (match_operand:<sseunpackmode> 2 "nonimmediate_operand" "xm,vm"))))]
11127 "TARGET_SSE2 && <mask_mode512bit_condition> && <mask_avx512bw_condition>"
1ee8b298
UB
11128 "@
11129 packssdw\t{%2, %0|%0, %2}
ed3e611e 11130 vpackssdw\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
1ee8b298
UB
11131 [(set_attr "isa" "noavx,avx")
11132 (set_attr "type" "sselog")
11133 (set_attr "prefix_data16" "1,*")
11134 (set_attr "prefix" "orig,vex")
977e83a3 11135 (set_attr "mode" "<sseinsnmode>")])
95879c72 11136
d281ef42 11137(define_insn "<sse2_avx2>_packuswb<mask_name>"
f5db965f
IT
11138 [(set (match_operand:VI1_AVX512 0 "register_operand" "=x,x")
11139 (vec_concat:VI1_AVX512
977e83a3 11140 (us_truncate:<ssehalfvecmode>
d281ef42 11141 (match_operand:<sseunpackmode> 1 "register_operand" "0,v"))
977e83a3 11142 (us_truncate:<ssehalfvecmode>
d281ef42
AI
11143 (match_operand:<sseunpackmode> 2 "nonimmediate_operand" "xm,vm"))))]
11144 "TARGET_SSE2 && <mask_mode512bit_condition> && <mask_avx512bw_condition>"
1ee8b298
UB
11145 "@
11146 packuswb\t{%2, %0|%0, %2}
d281ef42 11147 vpackuswb\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
1ee8b298
UB
11148 [(set_attr "isa" "noavx,avx")
11149 (set_attr "type" "sselog")
11150 (set_attr "prefix_data16" "1,*")
11151 (set_attr "prefix" "orig,vex")
977e83a3 11152 (set_attr "mode" "<sseinsnmode>")])
95879c72 11153
6edf4f24
AI
11154(define_insn "avx512bw_interleave_highv64qi<mask_name>"
11155 [(set (match_operand:V64QI 0 "register_operand" "=v")
11156 (vec_select:V64QI
11157 (vec_concat:V128QI
11158 (match_operand:V64QI 1 "register_operand" "v")
11159 (match_operand:V64QI 2 "nonimmediate_operand" "vm"))
11160 (parallel [(const_int 8) (const_int 72)
11161 (const_int 9) (const_int 73)
11162 (const_int 10) (const_int 74)
11163 (const_int 11) (const_int 75)
11164 (const_int 12) (const_int 76)
11165 (const_int 13) (const_int 77)
11166 (const_int 14) (const_int 78)
11167 (const_int 15) (const_int 79)
11168 (const_int 24) (const_int 88)
11169 (const_int 25) (const_int 89)
11170 (const_int 26) (const_int 90)
11171 (const_int 27) (const_int 91)
11172 (const_int 28) (const_int 92)
11173 (const_int 29) (const_int 93)
11174 (const_int 30) (const_int 94)
11175 (const_int 31) (const_int 95)
11176 (const_int 40) (const_int 104)
11177 (const_int 41) (const_int 105)
11178 (const_int 42) (const_int 106)
11179 (const_int 43) (const_int 107)
11180 (const_int 44) (const_int 108)
11181 (const_int 45) (const_int 109)
11182 (const_int 46) (const_int 110)
11183 (const_int 47) (const_int 111)
11184 (const_int 56) (const_int 120)
11185 (const_int 57) (const_int 121)
11186 (const_int 58) (const_int 122)
11187 (const_int 59) (const_int 123)
11188 (const_int 60) (const_int 124)
11189 (const_int 61) (const_int 125)
11190 (const_int 62) (const_int 126)
11191 (const_int 63) (const_int 127)])))]
11192 "TARGET_AVX512BW"
11193 "vpunpckhbw\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
11194 [(set_attr "type" "sselog")
11195 (set_attr "prefix" "evex")
11196 (set_attr "mode" "XI")])
11197
11198(define_insn "avx2_interleave_highv32qi<mask_name>"
11199 [(set (match_operand:V32QI 0 "register_operand" "=v")
977e83a3
KY
11200 (vec_select:V32QI
11201 (vec_concat:V64QI
6edf4f24
AI
11202 (match_operand:V32QI 1 "register_operand" "v")
11203 (match_operand:V32QI 2 "nonimmediate_operand" "vm"))
977e83a3
KY
11204 (parallel [(const_int 8) (const_int 40)
11205 (const_int 9) (const_int 41)
11206 (const_int 10) (const_int 42)
11207 (const_int 11) (const_int 43)
11208 (const_int 12) (const_int 44)
11209 (const_int 13) (const_int 45)
11210 (const_int 14) (const_int 46)
11211 (const_int 15) (const_int 47)
11212 (const_int 24) (const_int 56)
11213 (const_int 25) (const_int 57)
11214 (const_int 26) (const_int 58)
11215 (const_int 27) (const_int 59)
11216 (const_int 28) (const_int 60)
11217 (const_int 29) (const_int 61)
11218 (const_int 30) (const_int 62)
0c7189ae 11219 (const_int 31) (const_int 63)])))]
6edf4f24
AI
11220 "TARGET_AVX2 && <mask_avx512vl_condition>"
11221 "vpunpckhbw\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
977e83a3 11222 [(set_attr "type" "sselog")
6edf4f24 11223 (set_attr "prefix" "<mask_prefix>")
977e83a3
KY
11224 (set_attr "mode" "OI")])
11225
6edf4f24
AI
11226(define_insn "vec_interleave_highv16qi<mask_name>"
11227 [(set (match_operand:V16QI 0 "register_operand" "=x,v")
ef719a44
RH
11228 (vec_select:V16QI
11229 (vec_concat:V32QI
6edf4f24
AI
11230 (match_operand:V16QI 1 "register_operand" "0,v")
11231 (match_operand:V16QI 2 "nonimmediate_operand" "xm,vm"))
ef719a44
RH
11232 (parallel [(const_int 8) (const_int 24)
11233 (const_int 9) (const_int 25)
11234 (const_int 10) (const_int 26)
11235 (const_int 11) (const_int 27)
4f3f76e6 11236 (const_int 12) (const_int 28)
ef719a44
RH
11237 (const_int 13) (const_int 29)
11238 (const_int 14) (const_int 30)
11239 (const_int 15) (const_int 31)])))]
6edf4f24 11240 "TARGET_SSE2 && <mask_avx512vl_condition>"
1ee8b298
UB
11241 "@
11242 punpckhbw\t{%2, %0|%0, %2}
6edf4f24 11243 vpunpckhbw\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
1ee8b298
UB
11244 [(set_attr "isa" "noavx,avx")
11245 (set_attr "type" "sselog")
11246 (set_attr "prefix_data16" "1,*")
6edf4f24 11247 (set_attr "prefix" "orig,<mask_prefix>")
95879c72
L
11248 (set_attr "mode" "TI")])
11249
6edf4f24
AI
11250(define_insn "avx512bw_interleave_lowv64qi<mask_name>"
11251 [(set (match_operand:V64QI 0 "register_operand" "=v")
11252 (vec_select:V64QI
11253 (vec_concat:V128QI
11254 (match_operand:V64QI 1 "register_operand" "v")
11255 (match_operand:V64QI 2 "nonimmediate_operand" "vm"))
11256 (parallel [(const_int 0) (const_int 64)
11257 (const_int 1) (const_int 65)
11258 (const_int 2) (const_int 66)
11259 (const_int 3) (const_int 67)
11260 (const_int 4) (const_int 68)
11261 (const_int 5) (const_int 69)
11262 (const_int 6) (const_int 70)
11263 (const_int 7) (const_int 71)
11264 (const_int 16) (const_int 80)
11265 (const_int 17) (const_int 81)
11266 (const_int 18) (const_int 82)
11267 (const_int 19) (const_int 83)
11268 (const_int 20) (const_int 84)
11269 (const_int 21) (const_int 85)
11270 (const_int 22) (const_int 86)
11271 (const_int 23) (const_int 87)
11272 (const_int 32) (const_int 96)
11273 (const_int 33) (const_int 97)
11274 (const_int 34) (const_int 98)
11275 (const_int 35) (const_int 99)
11276 (const_int 36) (const_int 100)
11277 (const_int 37) (const_int 101)
11278 (const_int 38) (const_int 102)
11279 (const_int 39) (const_int 103)
11280 (const_int 48) (const_int 112)
11281 (const_int 49) (const_int 113)
11282 (const_int 50) (const_int 114)
11283 (const_int 51) (const_int 115)
11284 (const_int 52) (const_int 116)
11285 (const_int 53) (const_int 117)
11286 (const_int 54) (const_int 118)
11287 (const_int 55) (const_int 119)])))]
11288 "TARGET_AVX512BW"
11289 "vpunpcklbw\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
11290 [(set_attr "type" "sselog")
11291 (set_attr "prefix" "evex")
11292 (set_attr "mode" "XI")])
11293
11294(define_insn "avx2_interleave_lowv32qi<mask_name>"
11295 [(set (match_operand:V32QI 0 "register_operand" "=v")
977e83a3
KY
11296 (vec_select:V32QI
11297 (vec_concat:V64QI
6edf4f24
AI
11298 (match_operand:V32QI 1 "register_operand" "v")
11299 (match_operand:V32QI 2 "nonimmediate_operand" "vm"))
977e83a3
KY
11300 (parallel [(const_int 0) (const_int 32)
11301 (const_int 1) (const_int 33)
11302 (const_int 2) (const_int 34)
11303 (const_int 3) (const_int 35)
11304 (const_int 4) (const_int 36)
11305 (const_int 5) (const_int 37)
11306 (const_int 6) (const_int 38)
11307 (const_int 7) (const_int 39)
977e83a3
KY
11308 (const_int 16) (const_int 48)
11309 (const_int 17) (const_int 49)
11310 (const_int 18) (const_int 50)
11311 (const_int 19) (const_int 51)
11312 (const_int 20) (const_int 52)
11313 (const_int 21) (const_int 53)
11314 (const_int 22) (const_int 54)
11315 (const_int 23) (const_int 55)])))]
6edf4f24
AI
11316 "TARGET_AVX2 && <mask_avx512vl_condition> && <mask_avx512bw_condition>"
11317 "vpunpcklbw\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
977e83a3 11318 [(set_attr "type" "sselog")
6edf4f24 11319 (set_attr "prefix" "maybe_vex")
977e83a3
KY
11320 (set_attr "mode" "OI")])
11321
6edf4f24
AI
11322(define_insn "vec_interleave_lowv16qi<mask_name>"
11323 [(set (match_operand:V16QI 0 "register_operand" "=x,v")
ef719a44
RH
11324 (vec_select:V16QI
11325 (vec_concat:V32QI
6edf4f24
AI
11326 (match_operand:V16QI 1 "register_operand" "0,v")
11327 (match_operand:V16QI 2 "nonimmediate_operand" "xm,vm"))
ef719a44
RH
11328 (parallel [(const_int 0) (const_int 16)
11329 (const_int 1) (const_int 17)
11330 (const_int 2) (const_int 18)
11331 (const_int 3) (const_int 19)
11332 (const_int 4) (const_int 20)
11333 (const_int 5) (const_int 21)
11334 (const_int 6) (const_int 22)
11335 (const_int 7) (const_int 23)])))]
6edf4f24 11336 "TARGET_SSE2 && <mask_avx512vl_condition> && <mask_avx512bw_condition>"
1ee8b298
UB
11337 "@
11338 punpcklbw\t{%2, %0|%0, %2}
6edf4f24 11339 vpunpcklbw\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
1ee8b298
UB
11340 [(set_attr "isa" "noavx,avx")
11341 (set_attr "type" "sselog")
11342 (set_attr "prefix_data16" "1,*")
11343 (set_attr "prefix" "orig,vex")
95879c72
L
11344 (set_attr "mode" "TI")])
11345
6edf4f24
AI
11346(define_insn "avx512bw_interleave_highv32hi<mask_name>"
11347 [(set (match_operand:V32HI 0 "register_operand" "=v")
11348 (vec_select:V32HI
11349 (vec_concat:V64HI
11350 (match_operand:V32HI 1 "register_operand" "v")
11351 (match_operand:V32HI 2 "nonimmediate_operand" "vm"))
11352 (parallel [(const_int 4) (const_int 36)
11353 (const_int 5) (const_int 37)
11354 (const_int 6) (const_int 38)
11355 (const_int 7) (const_int 39)
11356 (const_int 12) (const_int 44)
11357 (const_int 13) (const_int 45)
11358 (const_int 14) (const_int 46)
11359 (const_int 15) (const_int 47)
11360 (const_int 20) (const_int 52)
11361 (const_int 21) (const_int 53)
11362 (const_int 22) (const_int 54)
11363 (const_int 23) (const_int 55)
11364 (const_int 28) (const_int 60)
11365 (const_int 29) (const_int 61)
11366 (const_int 30) (const_int 62)
11367 (const_int 31) (const_int 63)])))]
11368 "TARGET_AVX512BW"
11369 "vpunpckhwd\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
11370 [(set_attr "type" "sselog")
11371 (set_attr "prefix" "evex")
11372 (set_attr "mode" "XI")])
11373
11374(define_insn "avx2_interleave_highv16hi<mask_name>"
11375 [(set (match_operand:V16HI 0 "register_operand" "=v")
977e83a3
KY
11376 (vec_select:V16HI
11377 (vec_concat:V32HI
6edf4f24
AI
11378 (match_operand:V16HI 1 "register_operand" "v")
11379 (match_operand:V16HI 2 "nonimmediate_operand" "vm"))
977e83a3
KY
11380 (parallel [(const_int 4) (const_int 20)
11381 (const_int 5) (const_int 21)
11382 (const_int 6) (const_int 22)
11383 (const_int 7) (const_int 23)
11384 (const_int 12) (const_int 28)
11385 (const_int 13) (const_int 29)
11386 (const_int 14) (const_int 30)
11387 (const_int 15) (const_int 31)])))]
6edf4f24
AI
11388 "TARGET_AVX2 && <mask_avx512vl_condition> && <mask_avx512bw_condition>"
11389 "vpunpckhwd\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
977e83a3 11390 [(set_attr "type" "sselog")
6edf4f24 11391 (set_attr "prefix" "maybe_evex")
977e83a3
KY
11392 (set_attr "mode" "OI")])
11393
6edf4f24
AI
11394(define_insn "vec_interleave_highv8hi<mask_name>"
11395 [(set (match_operand:V8HI 0 "register_operand" "=x,v")
ef719a44
RH
11396 (vec_select:V8HI
11397 (vec_concat:V16HI
6edf4f24
AI
11398 (match_operand:V8HI 1 "register_operand" "0,v")
11399 (match_operand:V8HI 2 "nonimmediate_operand" "xm,vm"))
ef719a44
RH
11400 (parallel [(const_int 4) (const_int 12)
11401 (const_int 5) (const_int 13)
11402 (const_int 6) (const_int 14)
11403 (const_int 7) (const_int 15)])))]
6edf4f24 11404 "TARGET_SSE2 && <mask_avx512vl_condition> && <mask_avx512bw_condition>"
1ee8b298
UB
11405 "@
11406 punpckhwd\t{%2, %0|%0, %2}
6edf4f24 11407 vpunpckhwd\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
1ee8b298
UB
11408 [(set_attr "isa" "noavx,avx")
11409 (set_attr "type" "sselog")
11410 (set_attr "prefix_data16" "1,*")
6edf4f24 11411 (set_attr "prefix" "orig,maybe_vex")
95879c72
L
11412 (set_attr "mode" "TI")])
11413
6edf4f24
AI
11414(define_insn "<mask_codefor>avx512bw_interleave_lowv32hi<mask_name>"
11415 [(set (match_operand:V32HI 0 "register_operand" "=v")
11416 (vec_select:V32HI
11417 (vec_concat:V64HI
11418 (match_operand:V32HI 1 "register_operand" "v")
11419 (match_operand:V32HI 2 "nonimmediate_operand" "vm"))
11420 (parallel [(const_int 0) (const_int 32)
11421 (const_int 1) (const_int 33)
11422 (const_int 2) (const_int 34)
11423 (const_int 3) (const_int 35)
11424 (const_int 8) (const_int 40)
11425 (const_int 9) (const_int 41)
11426 (const_int 10) (const_int 42)
11427 (const_int 11) (const_int 43)
11428 (const_int 16) (const_int 48)
11429 (const_int 17) (const_int 49)
11430 (const_int 18) (const_int 50)
11431 (const_int 19) (const_int 51)
11432 (const_int 24) (const_int 56)
11433 (const_int 25) (const_int 57)
11434 (const_int 26) (const_int 58)
11435 (const_int 27) (const_int 59)])))]
11436 "TARGET_AVX512BW"
11437 "vpunpcklwd\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
11438 [(set_attr "type" "sselog")
11439 (set_attr "prefix" "evex")
11440 (set_attr "mode" "XI")])
11441
11442(define_insn "avx2_interleave_lowv16hi<mask_name>"
11443 [(set (match_operand:V16HI 0 "register_operand" "=v")
977e83a3
KY
11444 (vec_select:V16HI
11445 (vec_concat:V32HI
6edf4f24
AI
11446 (match_operand:V16HI 1 "register_operand" "v")
11447 (match_operand:V16HI 2 "nonimmediate_operand" "vm"))
977e83a3
KY
11448 (parallel [(const_int 0) (const_int 16)
11449 (const_int 1) (const_int 17)
11450 (const_int 2) (const_int 18)
11451 (const_int 3) (const_int 19)
11452 (const_int 8) (const_int 24)
11453 (const_int 9) (const_int 25)
11454 (const_int 10) (const_int 26)
11455 (const_int 11) (const_int 27)])))]
6edf4f24
AI
11456 "TARGET_AVX2 && <mask_avx512vl_condition> && <mask_avx512bw_condition>"
11457 "vpunpcklwd\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
977e83a3 11458 [(set_attr "type" "sselog")
6edf4f24 11459 (set_attr "prefix" "maybe_evex")
977e83a3
KY
11460 (set_attr "mode" "OI")])
11461
6edf4f24
AI
11462(define_insn "vec_interleave_lowv8hi<mask_name>"
11463 [(set (match_operand:V8HI 0 "register_operand" "=x,v")
ef719a44
RH
11464 (vec_select:V8HI
11465 (vec_concat:V16HI
6edf4f24
AI
11466 (match_operand:V8HI 1 "register_operand" "0,v")
11467 (match_operand:V8HI 2 "nonimmediate_operand" "xm,vm"))
ef719a44
RH
11468 (parallel [(const_int 0) (const_int 8)
11469 (const_int 1) (const_int 9)
11470 (const_int 2) (const_int 10)
11471 (const_int 3) (const_int 11)])))]
6edf4f24 11472 "TARGET_SSE2 && <mask_avx512vl_condition> && <mask_avx512bw_condition>"
1ee8b298
UB
11473 "@
11474 punpcklwd\t{%2, %0|%0, %2}
6edf4f24 11475 vpunpcklwd\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
1ee8b298
UB
11476 [(set_attr "isa" "noavx,avx")
11477 (set_attr "type" "sselog")
11478 (set_attr "prefix_data16" "1,*")
6edf4f24 11479 (set_attr "prefix" "orig,maybe_evex")
95879c72
L
11480 (set_attr "mode" "TI")])
11481
6edf4f24
AI
11482(define_insn "avx2_interleave_highv8si<mask_name>"
11483 [(set (match_operand:V8SI 0 "register_operand" "=v")
977e83a3
KY
11484 (vec_select:V8SI
11485 (vec_concat:V16SI
6edf4f24
AI
11486 (match_operand:V8SI 1 "register_operand" "v")
11487 (match_operand:V8SI 2 "nonimmediate_operand" "vm"))
977e83a3
KY
11488 (parallel [(const_int 2) (const_int 10)
11489 (const_int 3) (const_int 11)
11490 (const_int 6) (const_int 14)
11491 (const_int 7) (const_int 15)])))]
6edf4f24
AI
11492 "TARGET_AVX2 && <mask_avx512vl_condition>"
11493 "vpunpckhdq\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
977e83a3 11494 [(set_attr "type" "sselog")
6edf4f24 11495 (set_attr "prefix" "maybe_evex")
977e83a3
KY
11496 (set_attr "mode" "OI")])
11497
47490470 11498(define_insn "<mask_codefor>avx512f_interleave_highv16si<mask_name>"
c003c6d6
AI
11499 [(set (match_operand:V16SI 0 "register_operand" "=v")
11500 (vec_select:V16SI
11501 (vec_concat:V32SI
11502 (match_operand:V16SI 1 "register_operand" "v")
11503 (match_operand:V16SI 2 "nonimmediate_operand" "vm"))
11504 (parallel [(const_int 2) (const_int 18)
11505 (const_int 3) (const_int 19)
11506 (const_int 6) (const_int 22)
11507 (const_int 7) (const_int 23)
11508 (const_int 10) (const_int 26)
11509 (const_int 11) (const_int 27)
11510 (const_int 14) (const_int 30)
11511 (const_int 15) (const_int 31)])))]
11512 "TARGET_AVX512F"
47490470 11513 "vpunpckhdq\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
c003c6d6
AI
11514 [(set_attr "type" "sselog")
11515 (set_attr "prefix" "evex")
11516 (set_attr "mode" "XI")])
11517
11518
6edf4f24
AI
11519(define_insn "vec_interleave_highv4si<mask_name>"
11520 [(set (match_operand:V4SI 0 "register_operand" "=x,v")
ef719a44
RH
11521 (vec_select:V4SI
11522 (vec_concat:V8SI
6edf4f24
AI
11523 (match_operand:V4SI 1 "register_operand" "0,v")
11524 (match_operand:V4SI 2 "nonimmediate_operand" "xm,vm"))
ef719a44
RH
11525 (parallel [(const_int 2) (const_int 6)
11526 (const_int 3) (const_int 7)])))]
6edf4f24 11527 "TARGET_SSE2 && <mask_avx512vl_condition>"
1ee8b298
UB
11528 "@
11529 punpckhdq\t{%2, %0|%0, %2}
6edf4f24 11530 vpunpckhdq\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
1ee8b298
UB
11531 [(set_attr "isa" "noavx,avx")
11532 (set_attr "type" "sselog")
11533 (set_attr "prefix_data16" "1,*")
6edf4f24 11534 (set_attr "prefix" "orig,maybe_vex")
95879c72
L
11535 (set_attr "mode" "TI")])
11536
6edf4f24
AI
11537(define_insn "avx2_interleave_lowv8si<mask_name>"
11538 [(set (match_operand:V8SI 0 "register_operand" "=v")
977e83a3
KY
11539 (vec_select:V8SI
11540 (vec_concat:V16SI
6edf4f24
AI
11541 (match_operand:V8SI 1 "register_operand" "v")
11542 (match_operand:V8SI 2 "nonimmediate_operand" "vm"))
977e83a3
KY
11543 (parallel [(const_int 0) (const_int 8)
11544 (const_int 1) (const_int 9)
11545 (const_int 4) (const_int 12)
11546 (const_int 5) (const_int 13)])))]
6edf4f24
AI
11547 "TARGET_AVX2 && <mask_avx512vl_condition>"
11548 "vpunpckldq\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
977e83a3 11549 [(set_attr "type" "sselog")
6edf4f24 11550 (set_attr "prefix" "maybe_evex")
977e83a3
KY
11551 (set_attr "mode" "OI")])
11552
47490470 11553(define_insn "<mask_codefor>avx512f_interleave_lowv16si<mask_name>"
c003c6d6
AI
11554 [(set (match_operand:V16SI 0 "register_operand" "=v")
11555 (vec_select:V16SI
11556 (vec_concat:V32SI
11557 (match_operand:V16SI 1 "register_operand" "v")
11558 (match_operand:V16SI 2 "nonimmediate_operand" "vm"))
11559 (parallel [(const_int 0) (const_int 16)
11560 (const_int 1) (const_int 17)
11561 (const_int 4) (const_int 20)
11562 (const_int 5) (const_int 21)
11563 (const_int 8) (const_int 24)
11564 (const_int 9) (const_int 25)
11565 (const_int 12) (const_int 28)
11566 (const_int 13) (const_int 29)])))]
11567 "TARGET_AVX512F"
47490470 11568 "vpunpckldq\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
c003c6d6
AI
11569 [(set_attr "type" "sselog")
11570 (set_attr "prefix" "evex")
11571 (set_attr "mode" "XI")])
11572
6edf4f24
AI
11573(define_insn "vec_interleave_lowv4si<mask_name>"
11574 [(set (match_operand:V4SI 0 "register_operand" "=x,v")
ef719a44
RH
11575 (vec_select:V4SI
11576 (vec_concat:V8SI
6edf4f24
AI
11577 (match_operand:V4SI 1 "register_operand" "0,v")
11578 (match_operand:V4SI 2 "nonimmediate_operand" "xm,vm"))
ef719a44
RH
11579 (parallel [(const_int 0) (const_int 4)
11580 (const_int 1) (const_int 5)])))]
6edf4f24 11581 "TARGET_SSE2 && <mask_avx512vl_condition>"
1ee8b298
UB
11582 "@
11583 punpckldq\t{%2, %0|%0, %2}
6edf4f24 11584 vpunpckldq\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
1ee8b298
UB
11585 [(set_attr "isa" "noavx,avx")
11586 (set_attr "type" "sselog")
11587 (set_attr "prefix_data16" "1,*")
11588 (set_attr "prefix" "orig,vex")
95879c72
L
11589 (set_attr "mode" "TI")])
11590
2e2accf8
JJ
11591(define_expand "vec_interleave_high<mode>"
11592 [(match_operand:VI_256 0 "register_operand" "=x")
11593 (match_operand:VI_256 1 "register_operand" "x")
11594 (match_operand:VI_256 2 "nonimmediate_operand" "xm")]
11595 "TARGET_AVX2"
11596{
11597 rtx t1 = gen_reg_rtx (<MODE>mode);
11598 rtx t2 = gen_reg_rtx (<MODE>mode);
d8c84975 11599 rtx t3 = gen_reg_rtx (V4DImode);
2e2accf8
JJ
11600 emit_insn (gen_avx2_interleave_low<mode> (t1, operands[1], operands[2]));
11601 emit_insn (gen_avx2_interleave_high<mode> (t2, operands[1], operands[2]));
d8c84975
JJ
11602 emit_insn (gen_avx2_permv2ti (t3, gen_lowpart (V4DImode, t1),
11603 gen_lowpart (V4DImode, t2),
11604 GEN_INT (1 + (3 << 4))));
11605 emit_move_insn (operands[0], gen_lowpart (<MODE>mode, t3));
2e2accf8
JJ
11606 DONE;
11607})
11608
11609(define_expand "vec_interleave_low<mode>"
11610 [(match_operand:VI_256 0 "register_operand" "=x")
11611 (match_operand:VI_256 1 "register_operand" "x")
11612 (match_operand:VI_256 2 "nonimmediate_operand" "xm")]
11613 "TARGET_AVX2"
11614{
11615 rtx t1 = gen_reg_rtx (<MODE>mode);
11616 rtx t2 = gen_reg_rtx (<MODE>mode);
d8c84975 11617 rtx t3 = gen_reg_rtx (V4DImode);
2e2accf8
JJ
11618 emit_insn (gen_avx2_interleave_low<mode> (t1, operands[1], operands[2]));
11619 emit_insn (gen_avx2_interleave_high<mode> (t2, operands[1], operands[2]));
d8c84975
JJ
11620 emit_insn (gen_avx2_permv2ti (t3, gen_lowpart (V4DImode, t1),
11621 gen_lowpart (V4DImode, t2),
11622 GEN_INT (0 + (2 << 4))));
11623 emit_move_insn (operands[0], gen_lowpart (<MODE>mode, t3));
2e2accf8
JJ
11624 DONE;
11625})
11626
51e7f377
UB
11627;; Modes handled by pinsr patterns.
11628(define_mode_iterator PINSR_MODE
11629 [(V16QI "TARGET_SSE4_1") V8HI
11630 (V4SI "TARGET_SSE4_1")
11631 (V2DI "TARGET_SSE4_1 && TARGET_64BIT")])
11632
11633(define_mode_attr sse2p4_1
11634 [(V16QI "sse4_1") (V8HI "sse2")
11635 (V4SI "sse4_1") (V2DI "sse4_1")])
11636
11637;; sse4_1_pinsrd must come before sse2_loadld since it is preferred.
11638(define_insn "<sse2p4_1>_pinsr<ssemodesuffix>"
11639 [(set (match_operand:PINSR_MODE 0 "register_operand" "=x,x,x,x")
11640 (vec_merge:PINSR_MODE
11641 (vec_duplicate:PINSR_MODE
11642 (match_operand:<ssescalarmode> 2 "nonimmediate_operand" "r,m,r,m"))
11643 (match_operand:PINSR_MODE 1 "register_operand" "0,0,x,x")
82e86dc6 11644 (match_operand:SI 3 "const_int_operand")))]
51e7f377
UB
11645 "TARGET_SSE2
11646 && ((unsigned) exact_log2 (INTVAL (operands[3]))
11647 < GET_MODE_NUNITS (<MODE>mode))"
ef719a44
RH
11648{
11649 operands[3] = GEN_INT (exact_log2 (INTVAL (operands[3])));
1ee8b298
UB
11650
11651 switch (which_alternative)
11652 {
11653 case 0:
51e7f377 11654 if (GET_MODE_SIZE (<ssescalarmode>mode) < GET_MODE_SIZE (SImode))
977e83a3 11655 return "pinsr<ssemodesuffix>\t{%3, %k2, %0|%0, %k2, %3}";
51e7f377 11656 /* FALLTHRU */
1ee8b298 11657 case 1:
51e7f377 11658 return "pinsr<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}";
1ee8b298 11659 case 2:
51e7f377 11660 if (GET_MODE_SIZE (<ssescalarmode>mode) < GET_MODE_SIZE (SImode))
977e83a3 11661 return "vpinsr<ssemodesuffix>\t{%3, %k2, %1, %0|%0, %1, %k2, %3}";
51e7f377 11662 /* FALLTHRU */
1ee8b298 11663 case 3:
51e7f377 11664 return "vpinsr<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}";
1ee8b298
UB
11665 default:
11666 gcc_unreachable ();
11667 }
ef719a44 11668}
1ee8b298
UB
11669 [(set_attr "isa" "noavx,noavx,avx,avx")
11670 (set_attr "type" "sselog")
51e7f377
UB
11671 (set (attr "prefix_rex")
11672 (if_then_else
67b2c493 11673 (and (not (match_test "TARGET_AVX"))
51e7f377
UB
11674 (eq (const_string "<MODE>mode") (const_string "V2DImode")))
11675 (const_string "1")
11676 (const_string "*")))
11677 (set (attr "prefix_data16")
11678 (if_then_else
67b2c493 11679 (and (not (match_test "TARGET_AVX"))
51e7f377
UB
11680 (eq (const_string "<MODE>mode") (const_string "V8HImode")))
11681 (const_string "1")
11682 (const_string "*")))
11683 (set (attr "prefix_extra")
11684 (if_then_else
67b2c493 11685 (and (not (match_test "TARGET_AVX"))
51e7f377
UB
11686 (eq (const_string "<MODE>mode") (const_string "V8HImode")))
11687 (const_string "*")
11688 (const_string "1")))
725fd454 11689 (set_attr "length_immediate" "1")
1ee8b298 11690 (set_attr "prefix" "orig,orig,vex,vex")
ef719a44
RH
11691 (set_attr "mode" "TI")])
11692
d0337ddc
AI
11693(define_expand "<extract_type>_vinsert<shuffletype><extract_suf>_mask"
11694 [(match_operand:AVX512_VEC 0 "register_operand")
11695 (match_operand:AVX512_VEC 1 "register_operand")
47490470
AI
11696 (match_operand:<ssequartermode> 2 "nonimmediate_operand")
11697 (match_operand:SI 3 "const_0_to_3_operand")
d0337ddc 11698 (match_operand:AVX512_VEC 4 "register_operand")
47490470
AI
11699 (match_operand:<avx512fmaskmode> 5 "register_operand")]
11700 "TARGET_AVX512F"
11701{
d0337ddc
AI
11702 int mask,selector;
11703 mask = INTVAL (operands[3]);
11704 selector = GET_MODE_SIZE (GET_MODE_INNER (<MODE>mode)) == 4 ?
11705 0xFFFF ^ (0xF000 >> mask * 4)
11706 : 0xFF ^ (0xC0 >> mask * 2);
11707 emit_insn (gen_<extract_type>_vinsert<shuffletype><extract_suf>_1_mask
11708 (operands[0], operands[1], operands[2], GEN_INT (selector),
11709 operands[4], operands[5]));
47490470 11710 DONE;
47490470
AI
11711})
11712
d0337ddc
AI
11713(define_insn "<mask_codefor><extract_type>_vinsert<shuffletype><extract_suf>_1<mask_name>"
11714 [(set (match_operand:AVX512_VEC 0 "register_operand" "=v")
11715 (vec_merge:AVX512_VEC
11716 (match_operand:AVX512_VEC 1 "register_operand" "v")
11717 (vec_duplicate:AVX512_VEC
2e2206fa
AI
11718 (match_operand:<ssequartermode> 2 "nonimmediate_operand" "vm"))
11719 (match_operand:SI 3 "const_int_operand" "n")))]
11720 "TARGET_AVX512F"
11721{
11722 int mask;
d0337ddc
AI
11723 int selector = INTVAL (operands[3]);
11724
11725 if (selector == 0xFFF || selector == 0x3F)
11726 mask = 0;
11727 else if ( selector == 0xF0FF || selector == 0xCF)
11728 mask = 1;
11729 else if ( selector == 0xFF0F || selector == 0xF3)
11730 mask = 2;
11731 else if ( selector == 0xFFF0 || selector == 0xFC)
11732 mask = 3;
2e2206fa
AI
11733 else
11734 gcc_unreachable ();
11735
11736 operands[3] = GEN_INT (mask);
11737
d0337ddc 11738 return "vinsert<shuffletype><extract_suf>\t{%3, %2, %1, %0<mask_operand4>|%0<mask_operand4>, %1, %2, %3}";
2e2206fa
AI
11739}
11740 [(set_attr "type" "sselog")
11741 (set_attr "length_immediate" "1")
11742 (set_attr "prefix" "evex")
11743 (set_attr "mode" "<sseinsnmode>")])
11744
d0337ddc
AI
11745(define_expand "<extract_type_2>_vinsert<shuffletype><extract_suf_2>_mask"
11746 [(match_operand:AVX512_VEC_2 0 "register_operand")
11747 (match_operand:AVX512_VEC_2 1 "register_operand")
47490470
AI
11748 (match_operand:<ssehalfvecmode> 2 "nonimmediate_operand")
11749 (match_operand:SI 3 "const_0_to_1_operand")
d0337ddc 11750 (match_operand:AVX512_VEC_2 4 "register_operand")
47490470
AI
11751 (match_operand:<avx512fmaskmode> 5 "register_operand")]
11752 "TARGET_AVX512F"
11753{
11754 int mask = INTVAL (operands[3]);
11755 if (mask == 0)
11756 emit_insn (gen_vec_set_lo_<mode>_mask
11757 (operands[0], operands[1], operands[2],
11758 operands[4], operands[5]));
11759 else
11760 emit_insn (gen_vec_set_hi_<mode>_mask
11761 (operands[0], operands[1], operands[2],
11762 operands[4], operands[5]));
11763 DONE;
11764})
11765
d0337ddc
AI
11766(define_insn "vec_set_lo_<mode><mask_name>"
11767 [(set (match_operand:V16FI 0 "register_operand" "=v")
11768 (vec_concat:V16FI
11769 (match_operand:<ssehalfvecmode> 2 "nonimmediate_operand" "vm")
11770 (vec_select:<ssehalfvecmode>
11771 (match_operand:V16FI 1 "register_operand" "v")
11772 (parallel [(const_int 8) (const_int 9)
11773 (const_int 10) (const_int 11)
11774 (const_int 12) (const_int 13)
11775 (const_int 14) (const_int 15)]))))]
11776 "TARGET_AVX512DQ"
11777 "vinsert<shuffletype>32x8\t{$0x0, %2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2, $0x0}"
11778 [(set_attr "type" "sselog")
11779 (set_attr "length_immediate" "1")
11780 (set_attr "prefix" "evex")
11781 (set_attr "mode" "<sseinsnmode>")])
11782
11783(define_insn "vec_set_hi_<mode><mask_name>"
11784 [(set (match_operand:V16FI 0 "register_operand" "=v")
11785 (vec_concat:V16FI
11786 (match_operand:<ssehalfvecmode> 2 "nonimmediate_operand" "vm")
11787 (vec_select:<ssehalfvecmode>
11788 (match_operand:V16FI 1 "register_operand" "v")
11789 (parallel [(const_int 0) (const_int 1)
11790 (const_int 2) (const_int 3)
11791 (const_int 4) (const_int 5)
11792 (const_int 6) (const_int 7)]))))]
11793 "TARGET_AVX512DQ"
11794 "vinsert<shuffletype>32x8\t{$0x1, %2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2, $0x1}"
11795 [(set_attr "type" "sselog")
11796 (set_attr "length_immediate" "1")
11797 (set_attr "prefix" "evex")
11798 (set_attr "mode" "<sseinsnmode>")])
11799
47490470 11800(define_insn "vec_set_lo_<mode><mask_name>"
2e2206fa
AI
11801 [(set (match_operand:V8FI 0 "register_operand" "=v")
11802 (vec_concat:V8FI
11803 (match_operand:<ssehalfvecmode> 2 "nonimmediate_operand" "vm")
11804 (vec_select:<ssehalfvecmode>
11805 (match_operand:V8FI 1 "register_operand" "v")
11806 (parallel [(const_int 4) (const_int 5)
11807 (const_int 6) (const_int 7)]))))]
11808 "TARGET_AVX512F"
47490470 11809 "vinsert<shuffletype>64x4\t{$0x0, %2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2, $0x0}"
2e2206fa
AI
11810 [(set_attr "type" "sselog")
11811 (set_attr "length_immediate" "1")
11812 (set_attr "prefix" "evex")
11813 (set_attr "mode" "XI")])
11814
47490470 11815(define_insn "vec_set_hi_<mode><mask_name>"
2e2206fa
AI
11816 [(set (match_operand:V8FI 0 "register_operand" "=v")
11817 (vec_concat:V8FI
11818 (match_operand:<ssehalfvecmode> 2 "nonimmediate_operand" "vm")
11819 (vec_select:<ssehalfvecmode>
11820 (match_operand:V8FI 1 "register_operand" "v")
11821 (parallel [(const_int 0) (const_int 1)
11822 (const_int 2) (const_int 3)]))))]
11823 "TARGET_AVX512F"
47490470 11824 "vinsert<shuffletype>64x4\t{$0x1, %2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2, $0x1}"
2e2206fa
AI
11825 [(set_attr "type" "sselog")
11826 (set_attr "length_immediate" "1")
11827 (set_attr "prefix" "evex")
11828 (set_attr "mode" "XI")])
11829
d286410b
AI
11830(define_expand "avx512dq_shuf_<shuffletype>64x2_mask"
11831 [(match_operand:VI8F_256 0 "register_operand")
11832 (match_operand:VI8F_256 1 "register_operand")
11833 (match_operand:VI8F_256 2 "nonimmediate_operand")
11834 (match_operand:SI 3 "const_0_to_3_operand")
11835 (match_operand:VI8F_256 4 "register_operand")
11836 (match_operand:QI 5 "register_operand")]
11837 "TARGET_AVX512DQ"
11838{
11839 int mask = INTVAL (operands[3]);
11840 emit_insn (gen_avx512dq_shuf_<shuffletype>64x2_1_mask
11841 (operands[0], operands[1], operands[2],
11842 GEN_INT (((mask >> 0) & 1) * 2 + 0),
11843 GEN_INT (((mask >> 0) & 1) * 2 + 1),
11844 GEN_INT (((mask >> 1) & 1) * 2 + 4),
11845 GEN_INT (((mask >> 1) & 1) * 2 + 5),
11846 operands[4], operands[5]));
11847 DONE;
11848})
11849
11850(define_insn "<mask_codefor>avx512dq_shuf_<shuffletype>64x2_1<mask_name>"
11851 [(set (match_operand:VI8F_256 0 "register_operand" "=v")
11852 (vec_select:VI8F_256
11853 (vec_concat:<ssedoublemode>
11854 (match_operand:VI8F_256 1 "register_operand" "v")
11855 (match_operand:VI8F_256 2 "nonimmediate_operand" "vm"))
11856 (parallel [(match_operand 3 "const_0_to_3_operand")
11857 (match_operand 4 "const_0_to_3_operand")
11858 (match_operand 5 "const_4_to_7_operand")
11859 (match_operand 6 "const_4_to_7_operand")])))]
11860 "TARGET_AVX512VL
11861 && (INTVAL (operands[3]) == (INTVAL (operands[4]) - 1)
11862 && INTVAL (operands[5]) == (INTVAL (operands[6]) - 1))"
11863{
11864 int mask;
11865 mask = INTVAL (operands[3]) / 2;
11866 mask |= (INTVAL (operands[5]) - 4) / 2 << 1;
11867 operands[3] = GEN_INT (mask);
11868 return "vshuf<shuffletype>64x2\t{%3, %2, %1, %0<mask_operand7>|%0<mask_operand7>, %1, %2, %3}";
11869}
11870 [(set_attr "type" "sselog")
11871 (set_attr "length_immediate" "1")
11872 (set_attr "prefix" "evex")
11873 (set_attr "mode" "XI")])
11874
47490470
AI
11875(define_expand "avx512f_shuf_<shuffletype>64x2_mask"
11876 [(match_operand:V8FI 0 "register_operand")
11877 (match_operand:V8FI 1 "register_operand")
11878 (match_operand:V8FI 2 "nonimmediate_operand")
11879 (match_operand:SI 3 "const_0_to_255_operand")
11880 (match_operand:V8FI 4 "register_operand")
11881 (match_operand:QI 5 "register_operand")]
11882 "TARGET_AVX512F"
11883{
11884 int mask = INTVAL (operands[3]);
11885 emit_insn (gen_avx512f_shuf_<shuffletype>64x2_1_mask
11886 (operands[0], operands[1], operands[2],
11887 GEN_INT (((mask >> 0) & 3) * 2),
11888 GEN_INT (((mask >> 0) & 3) * 2 + 1),
11889 GEN_INT (((mask >> 2) & 3) * 2),
11890 GEN_INT (((mask >> 2) & 3) * 2 + 1),
11891 GEN_INT (((mask >> 4) & 3) * 2 + 8),
11892 GEN_INT (((mask >> 4) & 3) * 2 + 9),
11893 GEN_INT (((mask >> 6) & 3) * 2 + 8),
11894 GEN_INT (((mask >> 6) & 3) * 2 + 9),
11895 operands[4], operands[5]));
11896 DONE;
11897})
11898
11899(define_insn "avx512f_shuf_<shuffletype>64x2_1<mask_name>"
2e2206fa
AI
11900 [(set (match_operand:V8FI 0 "register_operand" "=v")
11901 (vec_select:V8FI
11902 (vec_concat:<ssedoublemode>
11903 (match_operand:V8FI 1 "register_operand" "v")
11904 (match_operand:V8FI 2 "nonimmediate_operand" "vm"))
11905 (parallel [(match_operand 3 "const_0_to_7_operand")
11906 (match_operand 4 "const_0_to_7_operand")
11907 (match_operand 5 "const_0_to_7_operand")
11908 (match_operand 6 "const_0_to_7_operand")
11909 (match_operand 7 "const_8_to_15_operand")
11910 (match_operand 8 "const_8_to_15_operand")
11911 (match_operand 9 "const_8_to_15_operand")
11912 (match_operand 10 "const_8_to_15_operand")])))]
11913 "TARGET_AVX512F
11914 && (INTVAL (operands[3]) == (INTVAL (operands[4]) - 1)
11915 && INTVAL (operands[5]) == (INTVAL (operands[6]) - 1)
11916 && INTVAL (operands[7]) == (INTVAL (operands[8]) - 1)
11917 && INTVAL (operands[9]) == (INTVAL (operands[10]) - 1))"
11918{
11919 int mask;
11920 mask = INTVAL (operands[3]) / 2;
11921 mask |= INTVAL (operands[5]) / 2 << 2;
11922 mask |= (INTVAL (operands[7]) - 8) / 2 << 4;
11923 mask |= (INTVAL (operands[9]) - 8) / 2 << 6;
11924 operands[3] = GEN_INT (mask);
11925
47490470 11926 return "vshuf<shuffletype>64x2\t{%3, %2, %1, %0<mask_operand11>|%0<mask_operand11>, %1, %2, %3}";
2e2206fa
AI
11927}
11928 [(set_attr "type" "sselog")
11929 (set_attr "length_immediate" "1")
11930 (set_attr "prefix" "evex")
11931 (set_attr "mode" "<sseinsnmode>")])
11932
d286410b
AI
11933(define_expand "avx512vl_shuf_<shuffletype>32x4_mask"
11934 [(match_operand:VI4F_256 0 "register_operand")
11935 (match_operand:VI4F_256 1 "register_operand")
11936 (match_operand:VI4F_256 2 "nonimmediate_operand")
11937 (match_operand:SI 3 "const_0_to_3_operand")
11938 (match_operand:VI4F_256 4 "register_operand")
11939 (match_operand:QI 5 "register_operand")]
11940 "TARGET_AVX512VL"
11941{
11942 int mask = INTVAL (operands[3]);
11943 emit_insn (gen_avx512vl_shuf_<shuffletype>32x4_1_mask
11944 (operands[0], operands[1], operands[2],
11945 GEN_INT (((mask >> 0) & 1) * 4 + 0),
11946 GEN_INT (((mask >> 0) & 1) * 4 + 1),
11947 GEN_INT (((mask >> 0) & 1) * 4 + 2),
11948 GEN_INT (((mask >> 0) & 1) * 4 + 3),
11949 GEN_INT (((mask >> 1) & 1) * 4 + 8),
11950 GEN_INT (((mask >> 1) & 1) * 4 + 9),
11951 GEN_INT (((mask >> 1) & 1) * 4 + 10),
11952 GEN_INT (((mask >> 1) & 1) * 4 + 11),
11953 operands[4], operands[5]));
11954 DONE;
11955})
11956
11957(define_insn "<mask_codefor>avx512vl_shuf_<shuffletype>32x4_1<mask_name>"
11958 [(set (match_operand:VI4F_256 0 "register_operand" "=v")
11959 (vec_select:VI4F_256
11960 (vec_concat:<ssedoublemode>
11961 (match_operand:VI4F_256 1 "register_operand" "v")
11962 (match_operand:VI4F_256 2 "nonimmediate_operand" "vm"))
11963 (parallel [(match_operand 3 "const_0_to_7_operand")
11964 (match_operand 4 "const_0_to_7_operand")
11965 (match_operand 5 "const_0_to_7_operand")
11966 (match_operand 6 "const_0_to_7_operand")
11967 (match_operand 7 "const_8_to_15_operand")
11968 (match_operand 8 "const_8_to_15_operand")
11969 (match_operand 9 "const_8_to_15_operand")
11970 (match_operand 10 "const_8_to_15_operand")])))]
11971 "TARGET_AVX512VL
11972 && (INTVAL (operands[3]) == (INTVAL (operands[4]) - 1)
11973 && INTVAL (operands[3]) == (INTVAL (operands[5]) - 2)
11974 && INTVAL (operands[3]) == (INTVAL (operands[6]) - 3)
11975 && INTVAL (operands[7]) == (INTVAL (operands[8]) - 1)
11976 && INTVAL (operands[7]) == (INTVAL (operands[9]) - 2)
11977 && INTVAL (operands[7]) == (INTVAL (operands[10]) - 3))"
11978{
11979 int mask;
11980 mask = INTVAL (operands[3]) / 4;
11981 mask |= (INTVAL (operands[7]) - 8) / 4 << 1;
11982 operands[3] = GEN_INT (mask);
11983
11984 return "vshuf<shuffletype>32x4\t{%3, %2, %1, %0<mask_operand11>|%0<mask_operand11>, %1, %2, %3}";
11985}
11986 [(set_attr "type" "sselog")
11987 (set_attr "length_immediate" "1")
11988 (set_attr "prefix" "evex")
11989 (set_attr "mode" "<sseinsnmode>")])
11990
47490470
AI
11991(define_expand "avx512f_shuf_<shuffletype>32x4_mask"
11992 [(match_operand:V16FI 0 "register_operand")
11993 (match_operand:V16FI 1 "register_operand")
11994 (match_operand:V16FI 2 "nonimmediate_operand")
11995 (match_operand:SI 3 "const_0_to_255_operand")
11996 (match_operand:V16FI 4 "register_operand")
11997 (match_operand:HI 5 "register_operand")]
11998 "TARGET_AVX512F"
11999{
12000 int mask = INTVAL (operands[3]);
12001 emit_insn (gen_avx512f_shuf_<shuffletype>32x4_1_mask
12002 (operands[0], operands[1], operands[2],
12003 GEN_INT (((mask >> 0) & 3) * 4),
12004 GEN_INT (((mask >> 0) & 3) * 4 + 1),
12005 GEN_INT (((mask >> 0) & 3) * 4 + 2),
12006 GEN_INT (((mask >> 0) & 3) * 4 + 3),
12007 GEN_INT (((mask >> 2) & 3) * 4),
12008 GEN_INT (((mask >> 2) & 3) * 4 + 1),
12009 GEN_INT (((mask >> 2) & 3) * 4 + 2),
12010 GEN_INT (((mask >> 2) & 3) * 4 + 3),
12011 GEN_INT (((mask >> 4) & 3) * 4 + 16),
12012 GEN_INT (((mask >> 4) & 3) * 4 + 17),
12013 GEN_INT (((mask >> 4) & 3) * 4 + 18),
12014 GEN_INT (((mask >> 4) & 3) * 4 + 19),
12015 GEN_INT (((mask >> 6) & 3) * 4 + 16),
12016 GEN_INT (((mask >> 6) & 3) * 4 + 17),
12017 GEN_INT (((mask >> 6) & 3) * 4 + 18),
12018 GEN_INT (((mask >> 6) & 3) * 4 + 19),
12019 operands[4], operands[5]));
12020 DONE;
12021})
12022
12023(define_insn "avx512f_shuf_<shuffletype>32x4_1<mask_name>"
2e2206fa
AI
12024 [(set (match_operand:V16FI 0 "register_operand" "=v")
12025 (vec_select:V16FI
12026 (vec_concat:<ssedoublemode>
12027 (match_operand:V16FI 1 "register_operand" "v")
12028 (match_operand:V16FI 2 "nonimmediate_operand" "vm"))
12029 (parallel [(match_operand 3 "const_0_to_15_operand")
12030 (match_operand 4 "const_0_to_15_operand")
12031 (match_operand 5 "const_0_to_15_operand")
12032 (match_operand 6 "const_0_to_15_operand")
12033 (match_operand 7 "const_0_to_15_operand")
12034 (match_operand 8 "const_0_to_15_operand")
12035 (match_operand 9 "const_0_to_15_operand")
12036 (match_operand 10 "const_0_to_15_operand")
12037 (match_operand 11 "const_16_to_31_operand")
12038 (match_operand 12 "const_16_to_31_operand")
12039 (match_operand 13 "const_16_to_31_operand")
12040 (match_operand 14 "const_16_to_31_operand")
12041 (match_operand 15 "const_16_to_31_operand")
12042 (match_operand 16 "const_16_to_31_operand")
12043 (match_operand 17 "const_16_to_31_operand")
12044 (match_operand 18 "const_16_to_31_operand")])))]
12045 "TARGET_AVX512F
12046 && (INTVAL (operands[3]) == (INTVAL (operands[4]) - 1)
12047 && INTVAL (operands[3]) == (INTVAL (operands[5]) - 2)
12048 && INTVAL (operands[3]) == (INTVAL (operands[6]) - 3)
12049 && INTVAL (operands[7]) == (INTVAL (operands[8]) - 1)
12050 && INTVAL (operands[7]) == (INTVAL (operands[9]) - 2)
12051 && INTVAL (operands[7]) == (INTVAL (operands[10]) - 3)
12052 && INTVAL (operands[11]) == (INTVAL (operands[12]) - 1)
12053 && INTVAL (operands[11]) == (INTVAL (operands[13]) - 2)
12054 && INTVAL (operands[11]) == (INTVAL (operands[14]) - 3)
12055 && INTVAL (operands[15]) == (INTVAL (operands[16]) - 1)
12056 && INTVAL (operands[15]) == (INTVAL (operands[17]) - 2)
12057 && INTVAL (operands[15]) == (INTVAL (operands[18]) - 3))"
12058{
12059 int mask;
12060 mask = INTVAL (operands[3]) / 4;
12061 mask |= INTVAL (operands[7]) / 4 << 2;
12062 mask |= (INTVAL (operands[11]) - 16) / 4 << 4;
12063 mask |= (INTVAL (operands[15]) - 16) / 4 << 6;
12064 operands[3] = GEN_INT (mask);
12065
47490470 12066 return "vshuf<shuffletype>32x4\t{%3, %2, %1, %0<mask_operand19>|%0<mask_operand19>, %1, %2, %3}";
2e2206fa
AI
12067}
12068 [(set_attr "type" "sselog")
12069 (set_attr "length_immediate" "1")
12070 (set_attr "prefix" "evex")
12071 (set_attr "mode" "<sseinsnmode>")])
12072
47490470
AI
12073(define_expand "avx512f_pshufdv3_mask"
12074 [(match_operand:V16SI 0 "register_operand")
12075 (match_operand:V16SI 1 "nonimmediate_operand")
12076 (match_operand:SI 2 "const_0_to_255_operand")
12077 (match_operand:V16SI 3 "register_operand")
12078 (match_operand:HI 4 "register_operand")]
12079 "TARGET_AVX512F"
12080{
12081 int mask = INTVAL (operands[2]);
12082 emit_insn (gen_avx512f_pshufd_1_mask (operands[0], operands[1],
12083 GEN_INT ((mask >> 0) & 3),
12084 GEN_INT ((mask >> 2) & 3),
12085 GEN_INT ((mask >> 4) & 3),
12086 GEN_INT ((mask >> 6) & 3),
12087 GEN_INT (((mask >> 0) & 3) + 4),
12088 GEN_INT (((mask >> 2) & 3) + 4),
12089 GEN_INT (((mask >> 4) & 3) + 4),
12090 GEN_INT (((mask >> 6) & 3) + 4),
12091 GEN_INT (((mask >> 0) & 3) + 8),
12092 GEN_INT (((mask >> 2) & 3) + 8),
12093 GEN_INT (((mask >> 4) & 3) + 8),
12094 GEN_INT (((mask >> 6) & 3) + 8),
12095 GEN_INT (((mask >> 0) & 3) + 12),
12096 GEN_INT (((mask >> 2) & 3) + 12),
12097 GEN_INT (((mask >> 4) & 3) + 12),
12098 GEN_INT (((mask >> 6) & 3) + 12),
12099 operands[3], operands[4]));
12100 DONE;
12101})
12102
12103(define_insn "avx512f_pshufd_1<mask_name>"
2e2206fa
AI
12104 [(set (match_operand:V16SI 0 "register_operand" "=v")
12105 (vec_select:V16SI
12106 (match_operand:V16SI 1 "nonimmediate_operand" "vm")
12107 (parallel [(match_operand 2 "const_0_to_3_operand")
12108 (match_operand 3 "const_0_to_3_operand")
12109 (match_operand 4 "const_0_to_3_operand")
12110 (match_operand 5 "const_0_to_3_operand")
12111 (match_operand 6 "const_4_to_7_operand")
12112 (match_operand 7 "const_4_to_7_operand")
12113 (match_operand 8 "const_4_to_7_operand")
12114 (match_operand 9 "const_4_to_7_operand")
12115 (match_operand 10 "const_8_to_11_operand")
12116 (match_operand 11 "const_8_to_11_operand")
12117 (match_operand 12 "const_8_to_11_operand")
12118 (match_operand 13 "const_8_to_11_operand")
12119 (match_operand 14 "const_12_to_15_operand")
12120 (match_operand 15 "const_12_to_15_operand")
12121 (match_operand 16 "const_12_to_15_operand")
12122 (match_operand 17 "const_12_to_15_operand")])))]
12123 "TARGET_AVX512F
12124 && INTVAL (operands[2]) + 4 == INTVAL (operands[6])
12125 && INTVAL (operands[3]) + 4 == INTVAL (operands[7])
12126 && INTVAL (operands[4]) + 4 == INTVAL (operands[8])
12127 && INTVAL (operands[5]) + 4 == INTVAL (operands[9])
12128 && INTVAL (operands[2]) + 8 == INTVAL (operands[10])
12129 && INTVAL (operands[3]) + 8 == INTVAL (operands[11])
12130 && INTVAL (operands[4]) + 8 == INTVAL (operands[12])
12131 && INTVAL (operands[5]) + 8 == INTVAL (operands[13])
12132 && INTVAL (operands[2]) + 12 == INTVAL (operands[14])
12133 && INTVAL (operands[3]) + 12 == INTVAL (operands[15])
12134 && INTVAL (operands[4]) + 12 == INTVAL (operands[16])
12135 && INTVAL (operands[5]) + 12 == INTVAL (operands[17])"
12136{
12137 int mask = 0;
12138 mask |= INTVAL (operands[2]) << 0;
12139 mask |= INTVAL (operands[3]) << 2;
12140 mask |= INTVAL (operands[4]) << 4;
12141 mask |= INTVAL (operands[5]) << 6;
12142 operands[2] = GEN_INT (mask);
12143
47490470 12144 return "vpshufd\t{%2, %1, %0<mask_operand18>|%0<mask_operand18>, %1, %2}";
2e2206fa
AI
12145}
12146 [(set_attr "type" "sselog1")
12147 (set_attr "prefix" "evex")
12148 (set_attr "length_immediate" "1")
12149 (set_attr "mode" "XI")])
12150
d286410b
AI
12151(define_expand "avx512vl_pshufdv3_mask"
12152 [(match_operand:V8SI 0 "register_operand")
12153 (match_operand:V8SI 1 "nonimmediate_operand")
12154 (match_operand:SI 2 "const_0_to_255_operand")
12155 (match_operand:V8SI 3 "register_operand")
12156 (match_operand:QI 4 "register_operand")]
12157 "TARGET_AVX512VL"
12158{
12159 int mask = INTVAL (operands[2]);
12160 emit_insn (gen_avx2_pshufd_1_mask (operands[0], operands[1],
12161 GEN_INT ((mask >> 0) & 3),
12162 GEN_INT ((mask >> 2) & 3),
12163 GEN_INT ((mask >> 4) & 3),
12164 GEN_INT ((mask >> 6) & 3),
12165 GEN_INT (((mask >> 0) & 3) + 4),
12166 GEN_INT (((mask >> 2) & 3) + 4),
12167 GEN_INT (((mask >> 4) & 3) + 4),
12168 GEN_INT (((mask >> 6) & 3) + 4),
12169 operands[3], operands[4]));
12170 DONE;
12171})
12172
977e83a3 12173(define_expand "avx2_pshufdv3"
82e86dc6
UB
12174 [(match_operand:V8SI 0 "register_operand")
12175 (match_operand:V8SI 1 "nonimmediate_operand")
12176 (match_operand:SI 2 "const_0_to_255_operand")]
977e83a3
KY
12177 "TARGET_AVX2"
12178{
12179 int mask = INTVAL (operands[2]);
12180 emit_insn (gen_avx2_pshufd_1 (operands[0], operands[1],
12181 GEN_INT ((mask >> 0) & 3),
12182 GEN_INT ((mask >> 2) & 3),
12183 GEN_INT ((mask >> 4) & 3),
0c7189ae
JJ
12184 GEN_INT ((mask >> 6) & 3),
12185 GEN_INT (((mask >> 0) & 3) + 4),
12186 GEN_INT (((mask >> 2) & 3) + 4),
12187 GEN_INT (((mask >> 4) & 3) + 4),
12188 GEN_INT (((mask >> 6) & 3) + 4)));
977e83a3
KY
12189 DONE;
12190})
12191
d286410b
AI
12192(define_insn "avx2_pshufd_1<mask_name>"
12193 [(set (match_operand:V8SI 0 "register_operand" "=v")
977e83a3 12194 (vec_select:V8SI
d286410b 12195 (match_operand:V8SI 1 "nonimmediate_operand" "vm")
82e86dc6
UB
12196 (parallel [(match_operand 2 "const_0_to_3_operand")
12197 (match_operand 3 "const_0_to_3_operand")
12198 (match_operand 4 "const_0_to_3_operand")
12199 (match_operand 5 "const_0_to_3_operand")
12200 (match_operand 6 "const_4_to_7_operand")
12201 (match_operand 7 "const_4_to_7_operand")
12202 (match_operand 8 "const_4_to_7_operand")
12203 (match_operand 9 "const_4_to_7_operand")])))]
0c7189ae 12204 "TARGET_AVX2
d286410b 12205 && <mask_avx512vl_condition>
0c7189ae
JJ
12206 && INTVAL (operands[2]) + 4 == INTVAL (operands[6])
12207 && INTVAL (operands[3]) + 4 == INTVAL (operands[7])
12208 && INTVAL (operands[4]) + 4 == INTVAL (operands[8])
12209 && INTVAL (operands[5]) + 4 == INTVAL (operands[9])"
977e83a3
KY
12210{
12211 int mask = 0;
12212 mask |= INTVAL (operands[2]) << 0;
12213 mask |= INTVAL (operands[3]) << 2;
12214 mask |= INTVAL (operands[4]) << 4;
12215 mask |= INTVAL (operands[5]) << 6;
12216 operands[2] = GEN_INT (mask);
12217
d286410b 12218 return "vpshufd\t{%2, %1, %0<mask_operand10>|%0<mask_operand10>, %1, %2}";
977e83a3
KY
12219}
12220 [(set_attr "type" "sselog1")
d286410b 12221 (set_attr "prefix" "maybe_evex")
977e83a3
KY
12222 (set_attr "length_immediate" "1")
12223 (set_attr "mode" "OI")])
12224
d286410b
AI
12225(define_expand "avx512vl_pshufd_mask"
12226 [(match_operand:V4SI 0 "register_operand")
12227 (match_operand:V4SI 1 "nonimmediate_operand")
12228 (match_operand:SI 2 "const_0_to_255_operand")
12229 (match_operand:V4SI 3 "register_operand")
12230 (match_operand:QI 4 "register_operand")]
12231 "TARGET_AVX512VL"
12232{
12233 int mask = INTVAL (operands[2]);
12234 emit_insn (gen_sse2_pshufd_1_mask (operands[0], operands[1],
12235 GEN_INT ((mask >> 0) & 3),
12236 GEN_INT ((mask >> 2) & 3),
12237 GEN_INT ((mask >> 4) & 3),
12238 GEN_INT ((mask >> 6) & 3),
12239 operands[3], operands[4]));
12240 DONE;
12241})
12242
ef719a44 12243(define_expand "sse2_pshufd"
82e86dc6
UB
12244 [(match_operand:V4SI 0 "register_operand")
12245 (match_operand:V4SI 1 "nonimmediate_operand")
12246 (match_operand:SI 2 "const_int_operand")]
ef719a44
RH
12247 "TARGET_SSE2"
12248{
12249 int mask = INTVAL (operands[2]);
12250 emit_insn (gen_sse2_pshufd_1 (operands[0], operands[1],
12251 GEN_INT ((mask >> 0) & 3),
12252 GEN_INT ((mask >> 2) & 3),
12253 GEN_INT ((mask >> 4) & 3),
12254 GEN_INT ((mask >> 6) & 3)));
12255 DONE;
12256})
12257
d286410b
AI
12258(define_insn "sse2_pshufd_1<mask_name>"
12259 [(set (match_operand:V4SI 0 "register_operand" "=v")
ef719a44 12260 (vec_select:V4SI
d286410b 12261 (match_operand:V4SI 1 "nonimmediate_operand" "vm")
82e86dc6
UB
12262 (parallel [(match_operand 2 "const_0_to_3_operand")
12263 (match_operand 3 "const_0_to_3_operand")
12264 (match_operand 4 "const_0_to_3_operand")
12265 (match_operand 5 "const_0_to_3_operand")])))]
d286410b 12266 "TARGET_SSE2 && <mask_avx512vl_condition>"
ef719a44
RH
12267{
12268 int mask = 0;
12269 mask |= INTVAL (operands[2]) << 0;
12270 mask |= INTVAL (operands[3]) << 2;
12271 mask |= INTVAL (operands[4]) << 4;
12272 mask |= INTVAL (operands[5]) << 6;
12273 operands[2] = GEN_INT (mask);
12274
d286410b 12275 return "%vpshufd\t{%2, %1, %0<mask_operand6>|%0<mask_operand6>, %1, %2}";
ef719a44
RH
12276}
12277 [(set_attr "type" "sselog1")
10e4d956 12278 (set_attr "prefix_data16" "1")
d286410b 12279 (set_attr "prefix" "<mask_prefix2>")
725fd454 12280 (set_attr "length_immediate" "1")
ef719a44
RH
12281 (set_attr "mode" "TI")])
12282
41755b52
AI
12283(define_insn "<mask_codefor>avx512bw_pshuflwv32hi<mask_name>"
12284 [(set (match_operand:V32HI 0 "register_operand" "=v")
12285 (unspec:V32HI
12286 [(match_operand:V32HI 1 "nonimmediate_operand" "vm")
12287 (match_operand:SI 2 "const_0_to_255_operand" "n")]
12288 UNSPEC_PSHUFLW))]
12289 "TARGET_AVX512BW"
12290 "vpshuflw\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
12291 [(set_attr "type" "sselog")
12292 (set_attr "prefix" "evex")
12293 (set_attr "mode" "XI")])
12294
12295(define_expand "avx512vl_pshuflwv3_mask"
12296 [(match_operand:V16HI 0 "register_operand")
12297 (match_operand:V16HI 1 "nonimmediate_operand")
12298 (match_operand:SI 2 "const_0_to_255_operand")
12299 (match_operand:V16HI 3 "register_operand")
12300 (match_operand:HI 4 "register_operand")]
12301 "TARGET_AVX512VL && TARGET_AVX512BW"
12302{
12303 int mask = INTVAL (operands[2]);
12304 emit_insn (gen_avx2_pshuflw_1_mask (operands[0], operands[1],
12305 GEN_INT ((mask >> 0) & 3),
12306 GEN_INT ((mask >> 2) & 3),
12307 GEN_INT ((mask >> 4) & 3),
12308 GEN_INT ((mask >> 6) & 3),
12309 GEN_INT (((mask >> 0) & 3) + 8),
12310 GEN_INT (((mask >> 2) & 3) + 8),
12311 GEN_INT (((mask >> 4) & 3) + 8),
12312 GEN_INT (((mask >> 6) & 3) + 8),
12313 operands[3], operands[4]));
12314 DONE;
12315})
12316
977e83a3 12317(define_expand "avx2_pshuflwv3"
82e86dc6
UB
12318 [(match_operand:V16HI 0 "register_operand")
12319 (match_operand:V16HI 1 "nonimmediate_operand")
12320 (match_operand:SI 2 "const_0_to_255_operand")]
977e83a3
KY
12321 "TARGET_AVX2"
12322{
12323 int mask = INTVAL (operands[2]);
12324 emit_insn (gen_avx2_pshuflw_1 (operands[0], operands[1],
12325 GEN_INT ((mask >> 0) & 3),
12326 GEN_INT ((mask >> 2) & 3),
12327 GEN_INT ((mask >> 4) & 3),
0c7189ae
JJ
12328 GEN_INT ((mask >> 6) & 3),
12329 GEN_INT (((mask >> 0) & 3) + 8),
12330 GEN_INT (((mask >> 2) & 3) + 8),
12331 GEN_INT (((mask >> 4) & 3) + 8),
12332 GEN_INT (((mask >> 6) & 3) + 8)));
977e83a3
KY
12333 DONE;
12334})
12335
41755b52
AI
12336(define_insn "avx2_pshuflw_1<mask_name>"
12337 [(set (match_operand:V16HI 0 "register_operand" "=v")
977e83a3 12338 (vec_select:V16HI
41755b52 12339 (match_operand:V16HI 1 "nonimmediate_operand" "vm")
82e86dc6
UB
12340 (parallel [(match_operand 2 "const_0_to_3_operand")
12341 (match_operand 3 "const_0_to_3_operand")
12342 (match_operand 4 "const_0_to_3_operand")
12343 (match_operand 5 "const_0_to_3_operand")
977e83a3
KY
12344 (const_int 4)
12345 (const_int 5)
12346 (const_int 6)
12347 (const_int 7)
82e86dc6
UB
12348 (match_operand 6 "const_8_to_11_operand")
12349 (match_operand 7 "const_8_to_11_operand")
12350 (match_operand 8 "const_8_to_11_operand")
12351 (match_operand 9 "const_8_to_11_operand")
977e83a3
KY
12352 (const_int 12)
12353 (const_int 13)
12354 (const_int 14)
12355 (const_int 15)])))]
0c7189ae 12356 "TARGET_AVX2
41755b52 12357 && <mask_avx512bw_condition> && <mask_avx512vl_condition>
0c7189ae
JJ
12358 && INTVAL (operands[2]) + 8 == INTVAL (operands[6])
12359 && INTVAL (operands[3]) + 8 == INTVAL (operands[7])
12360 && INTVAL (operands[4]) + 8 == INTVAL (operands[8])
12361 && INTVAL (operands[5]) + 8 == INTVAL (operands[9])"
977e83a3
KY
12362{
12363 int mask = 0;
12364 mask |= INTVAL (operands[2]) << 0;
12365 mask |= INTVAL (operands[3]) << 2;
12366 mask |= INTVAL (operands[4]) << 4;
12367 mask |= INTVAL (operands[5]) << 6;
12368 operands[2] = GEN_INT (mask);
12369
41755b52 12370 return "vpshuflw\t{%2, %1, %0<mask_operand10>|%0<mask_operand10>, %1, %2}";
977e83a3
KY
12371}
12372 [(set_attr "type" "sselog")
41755b52 12373 (set_attr "prefix" "maybe_evex")
977e83a3
KY
12374 (set_attr "length_immediate" "1")
12375 (set_attr "mode" "OI")])
12376
41755b52
AI
12377(define_expand "avx512vl_pshuflw_mask"
12378 [(match_operand:V8HI 0 "register_operand")
12379 (match_operand:V8HI 1 "nonimmediate_operand")
12380 (match_operand:SI 2 "const_0_to_255_operand")
12381 (match_operand:V8HI 3 "register_operand")
12382 (match_operand:QI 4 "register_operand")]
12383 "TARGET_AVX512VL && TARGET_AVX512BW"
12384{
12385 int mask = INTVAL (operands[2]);
12386 emit_insn (gen_sse2_pshuflw_1_mask (operands[0], operands[1],
12387 GEN_INT ((mask >> 0) & 3),
12388 GEN_INT ((mask >> 2) & 3),
12389 GEN_INT ((mask >> 4) & 3),
12390 GEN_INT ((mask >> 6) & 3),
12391 operands[3], operands[4]));
12392 DONE;
12393})
12394
ef719a44 12395(define_expand "sse2_pshuflw"
82e86dc6
UB
12396 [(match_operand:V8HI 0 "register_operand")
12397 (match_operand:V8HI 1 "nonimmediate_operand")
12398 (match_operand:SI 2 "const_int_operand")]
ef719a44
RH
12399 "TARGET_SSE2"
12400{
12401 int mask = INTVAL (operands[2]);
12402 emit_insn (gen_sse2_pshuflw_1 (operands[0], operands[1],
12403 GEN_INT ((mask >> 0) & 3),
12404 GEN_INT ((mask >> 2) & 3),
12405 GEN_INT ((mask >> 4) & 3),
12406 GEN_INT ((mask >> 6) & 3)));
12407 DONE;
12408})
12409
41755b52
AI
12410(define_insn "sse2_pshuflw_1<mask_name>"
12411 [(set (match_operand:V8HI 0 "register_operand" "=v")
ef719a44 12412 (vec_select:V8HI
41755b52 12413 (match_operand:V8HI 1 "nonimmediate_operand" "vm")
82e86dc6
UB
12414 (parallel [(match_operand 2 "const_0_to_3_operand")
12415 (match_operand 3 "const_0_to_3_operand")
12416 (match_operand 4 "const_0_to_3_operand")
12417 (match_operand 5 "const_0_to_3_operand")
ef719a44
RH
12418 (const_int 4)
12419 (const_int 5)
12420 (const_int 6)
12421 (const_int 7)])))]
41755b52 12422 "TARGET_SSE2 && <mask_avx512bw_condition> && <mask_avx512vl_condition>"
ef719a44
RH
12423{
12424 int mask = 0;
12425 mask |= INTVAL (operands[2]) << 0;
12426 mask |= INTVAL (operands[3]) << 2;
12427 mask |= INTVAL (operands[4]) << 4;
12428 mask |= INTVAL (operands[5]) << 6;
12429 operands[2] = GEN_INT (mask);
12430
41755b52 12431 return "%vpshuflw\t{%2, %1, %0<mask_operand6>|%0<mask_operand6>, %1, %2}";
ef719a44
RH
12432}
12433 [(set_attr "type" "sselog")
725fd454 12434 (set_attr "prefix_data16" "0")
10e4d956 12435 (set_attr "prefix_rep" "1")
95879c72 12436 (set_attr "prefix" "maybe_vex")
725fd454 12437 (set_attr "length_immediate" "1")
ef719a44
RH
12438 (set_attr "mode" "TI")])
12439
977e83a3 12440(define_expand "avx2_pshufhwv3"
82e86dc6
UB
12441 [(match_operand:V16HI 0 "register_operand")
12442 (match_operand:V16HI 1 "nonimmediate_operand")
12443 (match_operand:SI 2 "const_0_to_255_operand")]
977e83a3
KY
12444 "TARGET_AVX2"
12445{
12446 int mask = INTVAL (operands[2]);
12447 emit_insn (gen_avx2_pshufhw_1 (operands[0], operands[1],
12448 GEN_INT (((mask >> 0) & 3) + 4),
12449 GEN_INT (((mask >> 2) & 3) + 4),
12450 GEN_INT (((mask >> 4) & 3) + 4),
0c7189ae
JJ
12451 GEN_INT (((mask >> 6) & 3) + 4),
12452 GEN_INT (((mask >> 0) & 3) + 12),
12453 GEN_INT (((mask >> 2) & 3) + 12),
12454 GEN_INT (((mask >> 4) & 3) + 12),
12455 GEN_INT (((mask >> 6) & 3) + 12)));
977e83a3
KY
12456 DONE;
12457})
12458
41755b52
AI
12459(define_insn "<mask_codefor>avx512bw_pshufhwv32hi<mask_name>"
12460 [(set (match_operand:V32HI 0 "register_operand" "=v")
12461 (unspec:V32HI
12462 [(match_operand:V32HI 1 "nonimmediate_operand" "vm")
12463 (match_operand:SI 2 "const_0_to_255_operand" "n")]
12464 UNSPEC_PSHUFHW))]
12465 "TARGET_AVX512BW"
12466 "vpshufhw\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
12467 [(set_attr "type" "sselog")
12468 (set_attr "prefix" "evex")
12469 (set_attr "mode" "XI")])
12470
12471(define_expand "avx512vl_pshufhwv3_mask"
12472 [(match_operand:V16HI 0 "register_operand")
12473 (match_operand:V16HI 1 "nonimmediate_operand")
12474 (match_operand:SI 2 "const_0_to_255_operand")
12475 (match_operand:V16HI 3 "register_operand")
12476 (match_operand:HI 4 "register_operand")]
12477 "TARGET_AVX512VL && TARGET_AVX512BW"
12478{
12479 int mask = INTVAL (operands[2]);
12480 emit_insn (gen_avx2_pshufhw_1_mask (operands[0], operands[1],
12481 GEN_INT (((mask >> 0) & 3) + 4),
12482 GEN_INT (((mask >> 2) & 3) + 4),
12483 GEN_INT (((mask >> 4) & 3) + 4),
12484 GEN_INT (((mask >> 6) & 3) + 4),
12485 GEN_INT (((mask >> 0) & 3) + 12),
12486 GEN_INT (((mask >> 2) & 3) + 12),
12487 GEN_INT (((mask >> 4) & 3) + 12),
12488 GEN_INT (((mask >> 6) & 3) + 12),
12489 operands[3], operands[4]));
12490 DONE;
12491})
12492
12493(define_insn "avx2_pshufhw_1<mask_name>"
12494 [(set (match_operand:V16HI 0 "register_operand" "=v")
977e83a3 12495 (vec_select:V16HI
41755b52 12496 (match_operand:V16HI 1 "nonimmediate_operand" "vm")
977e83a3
KY
12497 (parallel [(const_int 0)
12498 (const_int 1)
12499 (const_int 2)
12500 (const_int 3)
82e86dc6
UB
12501 (match_operand 2 "const_4_to_7_operand")
12502 (match_operand 3 "const_4_to_7_operand")
12503 (match_operand 4 "const_4_to_7_operand")
12504 (match_operand 5 "const_4_to_7_operand")
977e83a3
KY
12505 (const_int 8)
12506 (const_int 9)
12507 (const_int 10)
12508 (const_int 11)
82e86dc6
UB
12509 (match_operand 6 "const_12_to_15_operand")
12510 (match_operand 7 "const_12_to_15_operand")
12511 (match_operand 8 "const_12_to_15_operand")
12512 (match_operand 9 "const_12_to_15_operand")])))]
0c7189ae 12513 "TARGET_AVX2
41755b52 12514 && <mask_avx512bw_condition> && <mask_avx512vl_condition>
0c7189ae
JJ
12515 && INTVAL (operands[2]) + 8 == INTVAL (operands[6])
12516 && INTVAL (operands[3]) + 8 == INTVAL (operands[7])
12517 && INTVAL (operands[4]) + 8 == INTVAL (operands[8])
12518 && INTVAL (operands[5]) + 8 == INTVAL (operands[9])"
977e83a3
KY
12519{
12520 int mask = 0;
12521 mask |= (INTVAL (operands[2]) - 4) << 0;
12522 mask |= (INTVAL (operands[3]) - 4) << 2;
12523 mask |= (INTVAL (operands[4]) - 4) << 4;
12524 mask |= (INTVAL (operands[5]) - 4) << 6;
12525 operands[2] = GEN_INT (mask);
12526
41755b52 12527 return "vpshufhw\t{%2, %1, %0<mask_operand10>|%0<mask_operand10>, %1, %2}";
977e83a3
KY
12528}
12529 [(set_attr "type" "sselog")
41755b52 12530 (set_attr "prefix" "maybe_evex")
977e83a3
KY
12531 (set_attr "length_immediate" "1")
12532 (set_attr "mode" "OI")])
12533
41755b52
AI
12534(define_expand "avx512vl_pshufhw_mask"
12535 [(match_operand:V8HI 0 "register_operand")
12536 (match_operand:V8HI 1 "nonimmediate_operand")
12537 (match_operand:SI 2 "const_0_to_255_operand")
12538 (match_operand:V8HI 3 "register_operand")
12539 (match_operand:QI 4 "register_operand")]
12540 "TARGET_AVX512VL && TARGET_AVX512BW"
12541{
12542 int mask = INTVAL (operands[2]);
12543 emit_insn (gen_sse2_pshufhw_1_mask (operands[0], operands[1],
12544 GEN_INT (((mask >> 0) & 3) + 4),
12545 GEN_INT (((mask >> 2) & 3) + 4),
12546 GEN_INT (((mask >> 4) & 3) + 4),
12547 GEN_INT (((mask >> 6) & 3) + 4),
12548 operands[3], operands[4]));
12549 DONE;
12550})
12551
ef719a44 12552(define_expand "sse2_pshufhw"
82e86dc6
UB
12553 [(match_operand:V8HI 0 "register_operand")
12554 (match_operand:V8HI 1 "nonimmediate_operand")
12555 (match_operand:SI 2 "const_int_operand")]
ef719a44
RH
12556 "TARGET_SSE2"
12557{
12558 int mask = INTVAL (operands[2]);
12559 emit_insn (gen_sse2_pshufhw_1 (operands[0], operands[1],
12560 GEN_INT (((mask >> 0) & 3) + 4),
12561 GEN_INT (((mask >> 2) & 3) + 4),
12562 GEN_INT (((mask >> 4) & 3) + 4),
12563 GEN_INT (((mask >> 6) & 3) + 4)));
12564 DONE;
12565})
12566
41755b52
AI
12567(define_insn "sse2_pshufhw_1<mask_name>"
12568 [(set (match_operand:V8HI 0 "register_operand" "=v")
ef719a44 12569 (vec_select:V8HI
41755b52 12570 (match_operand:V8HI 1 "nonimmediate_operand" "vm")
ef719a44
RH
12571 (parallel [(const_int 0)
12572 (const_int 1)
12573 (const_int 2)
12574 (const_int 3)
82e86dc6
UB
12575 (match_operand 2 "const_4_to_7_operand")
12576 (match_operand 3 "const_4_to_7_operand")
12577 (match_operand 4 "const_4_to_7_operand")
12578 (match_operand 5 "const_4_to_7_operand")])))]
41755b52 12579 "TARGET_SSE2 && <mask_avx512bw_condition> && <mask_avx512vl_condition>"
ef719a44
RH
12580{
12581 int mask = 0;
12582 mask |= (INTVAL (operands[2]) - 4) << 0;
12583 mask |= (INTVAL (operands[3]) - 4) << 2;
12584 mask |= (INTVAL (operands[4]) - 4) << 4;
12585 mask |= (INTVAL (operands[5]) - 4) << 6;
12586 operands[2] = GEN_INT (mask);
12587
41755b52 12588 return "%vpshufhw\t{%2, %1, %0<mask_operand6>|%0<mask_operand6>, %1, %2}";
ef719a44
RH
12589}
12590 [(set_attr "type" "sselog")
10e4d956 12591 (set_attr "prefix_rep" "1")
725fd454 12592 (set_attr "prefix_data16" "0")
95879c72 12593 (set_attr "prefix" "maybe_vex")
725fd454 12594 (set_attr "length_immediate" "1")
ef719a44
RH
12595 (set_attr "mode" "TI")])
12596
12597(define_expand "sse2_loadd"
82e86dc6 12598 [(set (match_operand:V4SI 0 "register_operand")
ef719a44
RH
12599 (vec_merge:V4SI
12600 (vec_duplicate:V4SI
82e86dc6 12601 (match_operand:SI 1 "nonimmediate_operand"))
ef719a44
RH
12602 (match_dup 2)
12603 (const_int 1)))]
eb701deb 12604 "TARGET_SSE"
ef719a44
RH
12605 "operands[2] = CONST0_RTX (V4SImode);")
12606
12607(define_insn "sse2_loadld"
a02f398d 12608 [(set (match_operand:V4SI 0 "register_operand" "=x,Yi,x,x,x")
ef719a44
RH
12609 (vec_merge:V4SI
12610 (vec_duplicate:V4SI
a02f398d
UB
12611 (match_operand:SI 2 "nonimmediate_operand" "m ,r ,m,x,x"))
12612 (match_operand:V4SI 1 "reg_or_0_operand" "C ,C ,C,0,x")
ef719a44 12613 (const_int 1)))]
eb701deb 12614 "TARGET_SSE"
ef719a44 12615 "@
1ee8b298
UB
12616 %vmovd\t{%2, %0|%0, %2}
12617 %vmovd\t{%2, %0|%0, %2}
eb701deb 12618 movss\t{%2, %0|%0, %2}
1ee8b298
UB
12619 movss\t{%2, %0|%0, %2}
12620 vmovss\t{%2, %1, %0|%0, %1, %2}"
b2d7aa9a 12621 [(set_attr "isa" "sse2,sse2,noavx,noavx,avx")
1ee8b298
UB
12622 (set_attr "type" "ssemov")
12623 (set_attr "prefix" "maybe_vex,maybe_vex,orig,orig,vex")
12624 (set_attr "mode" "TI,TI,V4SF,SF,SF")])
ef719a44 12625
3f5783ea
UB
12626(define_insn "*vec_extract<mode>"
12627 [(set (match_operand:<ssescalarmode> 0 "nonimmediate_operand" "=r,m")
12628 (vec_select:<ssescalarmode>
e61e7d28 12629 (match_operand:VI12_128 1 "register_operand" "x,x")
3f5783ea
UB
12630 (parallel
12631 [(match_operand:SI 2 "const_0_to_<ssescalarnummask>_operand")])))]
12632 "TARGET_SSE4_1"
12633 "@
12634 %vpextr<ssemodesuffix>\t{%2, %1, %k0|%k0, %1, %2}
12635 %vpextr<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
12636 [(set_attr "type" "sselog1")
12637 (set (attr "prefix_data16")
12638 (if_then_else
12639 (and (eq_attr "alternative" "0")
12640 (eq (const_string "<MODE>mode") (const_string "V8HImode")))
12641 (const_string "1")
12642 (const_string "*")))
12643 (set (attr "prefix_extra")
12644 (if_then_else
12645 (and (eq_attr "alternative" "0")
12646 (eq (const_string "<MODE>mode") (const_string "V8HImode")))
12647 (const_string "*")
12648 (const_string "1")))
12649 (set_attr "length_immediate" "1")
12650 (set_attr "prefix" "maybe_vex")
12651 (set_attr "mode" "TI")])
12652
12653(define_insn "*vec_extractv8hi_sse2"
12654 [(set (match_operand:HI 0 "register_operand" "=r")
12655 (vec_select:HI
12656 (match_operand:V8HI 1 "register_operand" "x")
12657 (parallel
12658 [(match_operand:SI 2 "const_0_to_7_operand")])))]
12659 "TARGET_SSE2 && !TARGET_SSE4_1"
12660 "pextrw\t{%2, %1, %k0|%k0, %1, %2}"
12661 [(set_attr "type" "sselog1")
12662 (set_attr "prefix_data16" "1")
12663 (set_attr "length_immediate" "1")
12664 (set_attr "mode" "TI")])
12665
12666(define_insn "*vec_extractv16qi_zext"
12667 [(set (match_operand:SWI48 0 "register_operand" "=r")
12668 (zero_extend:SWI48
12669 (vec_select:QI
12670 (match_operand:V16QI 1 "register_operand" "x")
12671 (parallel
12672 [(match_operand:SI 2 "const_0_to_15_operand")]))))]
12673 "TARGET_SSE4_1"
12674 "%vpextrb\t{%2, %1, %k0|%k0, %1, %2}"
12675 [(set_attr "type" "sselog1")
12676 (set_attr "prefix_extra" "1")
12677 (set_attr "length_immediate" "1")
12678 (set_attr "prefix" "maybe_vex")
12679 (set_attr "mode" "TI")])
12680
12681(define_insn "*vec_extractv8hi_zext"
12682 [(set (match_operand:SWI48 0 "register_operand" "=r")
12683 (zero_extend:SWI48
12684 (vec_select:HI
12685 (match_operand:V8HI 1 "register_operand" "x")
12686 (parallel
12687 [(match_operand:SI 2 "const_0_to_7_operand")]))))]
12688 "TARGET_SSE2"
12689 "%vpextrw\t{%2, %1, %k0|%k0, %1, %2}"
12690 [(set_attr "type" "sselog1")
12691 (set_attr "prefix_data16" "1")
12692 (set_attr "length_immediate" "1")
12693 (set_attr "prefix" "maybe_vex")
12694 (set_attr "mode" "TI")])
12695
e61e7d28 12696(define_insn "*vec_extract<mode>_mem"
3f5783ea
UB
12697 [(set (match_operand:<ssescalarmode> 0 "register_operand" "=r")
12698 (vec_select:<ssescalarmode>
e61e7d28 12699 (match_operand:VI12_128 1 "memory_operand" "o")
3f5783ea
UB
12700 (parallel
12701 [(match_operand 2 "const_0_to_<ssescalarnummask>_operand")])))]
12702 "TARGET_SSE"
e61e7d28 12703 "#")
3f5783ea 12704
3095685e 12705(define_insn "*vec_extract<ssevecmodelower>_0"
3f5783ea 12706 [(set (match_operand:SWI48 0 "nonimmediate_operand" "=r ,r,x ,m")
3095685e 12707 (vec_select:SWI48
3f5783ea 12708 (match_operand:<ssevecmode> 1 "nonimmediate_operand" "mYj,x,xm,x")
ef719a44 12709 (parallel [(const_int 0)])))]
3095685e 12710 "TARGET_SSE && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
f75e6a51 12711 "#"
3f5783ea 12712 [(set_attr "isa" "*,sse4,*,*")])
ef719a44 12713
60ca9a65
UB
12714(define_insn_and_split "*vec_extractv4si_0_zext"
12715 [(set (match_operand:DI 0 "register_operand" "=r")
12716 (zero_extend:DI
12717 (vec_select:SI
12718 (match_operand:V4SI 1 "register_operand" "x")
12719 (parallel [(const_int 0)]))))]
12720 "TARGET_64BIT && TARGET_SSE2 && TARGET_INTER_UNIT_MOVES_FROM_VEC"
12721 "#"
12722 "&& reload_completed"
12723 [(set (match_dup 0) (zero_extend:DI (match_dup 1)))]
12724 "operands[1] = gen_rtx_REG (SImode, REGNO (operands[1]));")
12725
3095685e
UB
12726(define_insn "*vec_extractv2di_0_sse"
12727 [(set (match_operand:DI 0 "nonimmediate_operand" "=x,m")
ed69105c 12728 (vec_select:DI
3095685e 12729 (match_operand:V2DI 1 "nonimmediate_operand" "xm,x")
ed69105c 12730 (parallel [(const_int 0)])))]
3095685e
UB
12731 "TARGET_SSE && !TARGET_64BIT
12732 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
12733 "#")
ed69105c 12734
3095685e 12735(define_split
f75e6a51 12736 [(set (match_operand:SWI48x 0 "nonimmediate_operand")
3095685e 12737 (vec_select:SWI48x
e61e7d28 12738 (match_operand:<ssevecmode> 1 "register_operand")
ef719a44 12739 (parallel [(const_int 0)])))]
3095685e
UB
12740 "TARGET_SSE && reload_completed"
12741 [(set (match_dup 0) (match_dup 1))]
e61e7d28 12742 "operands[1] = gen_rtx_REG (<MODE>mode, REGNO (operands[1]));")
ef719a44 12743
f75e6a51 12744(define_insn "*vec_extractv4si"
45392c76 12745 [(set (match_operand:SI 0 "nonimmediate_operand" "=rm,Yr,*x,x")
f75e6a51 12746 (vec_select:SI
45392c76 12747 (match_operand:V4SI 1 "register_operand" "x,0,0,x")
f75e6a51
UB
12748 (parallel [(match_operand:SI 2 "const_0_to_3_operand")])))]
12749 "TARGET_SSE4_1"
60ca9a65
UB
12750{
12751 switch (which_alternative)
12752 {
12753 case 0:
12754 return "%vpextrd\t{%2, %1, %0|%0, %1, %2}";
12755
12756 case 1:
45392c76 12757 case 2:
60ca9a65
UB
12758 operands [2] = GEN_INT (INTVAL (operands[2]) * 4);
12759 return "psrldq\t{%2, %0|%0, %2}";
12760
45392c76 12761 case 3:
60ca9a65
UB
12762 operands [2] = GEN_INT (INTVAL (operands[2]) * 4);
12763 return "vpsrldq\t{%2, %1, %0|%0, %1, %2}";
12764
12765 default:
12766 gcc_unreachable ();
12767 }
12768}
45392c76
IE
12769 [(set_attr "isa" "*,noavx,noavx,avx")
12770 (set_attr "type" "sselog1,sseishft1,sseishft1,sseishft1")
12771 (set_attr "prefix_extra" "1,*,*,*")
f75e6a51 12772 (set_attr "length_immediate" "1")
45392c76 12773 (set_attr "prefix" "maybe_vex,orig,orig,vex")
f75e6a51
UB
12774 (set_attr "mode" "TI")])
12775
12776(define_insn "*vec_extractv4si_zext"
12777 [(set (match_operand:DI 0 "register_operand" "=r")
12778 (zero_extend:DI
12779 (vec_select:SI
12780 (match_operand:V4SI 1 "register_operand" "x")
12781 (parallel [(match_operand:SI 2 "const_0_to_3_operand")]))))]
12782 "TARGET_64BIT && TARGET_SSE4_1"
12783 "%vpextrd\t{%2, %1, %k0|%k0, %1, %2}"
12784 [(set_attr "type" "sselog1")
12785 (set_attr "prefix_extra" "1")
12786 (set_attr "length_immediate" "1")
12787 (set_attr "prefix" "maybe_vex")
12788 (set_attr "mode" "TI")])
3095685e 12789
e61e7d28 12790(define_insn "*vec_extractv4si_mem"
3095685e
UB
12791 [(set (match_operand:SI 0 "register_operand" "=x,r")
12792 (vec_select:SI
12793 (match_operand:V4SI 1 "memory_operand" "o,o")
12794 (parallel [(match_operand 2 "const_0_to_3_operand")])))]
12795 "TARGET_SSE"
e61e7d28 12796 "#")
ef719a44 12797
60ca9a65
UB
12798(define_insn_and_split "*vec_extractv4si_zext_mem"
12799 [(set (match_operand:DI 0 "register_operand" "=x,r")
12800 (zero_extend:DI
12801 (vec_select:SI
12802 (match_operand:V4SI 1 "memory_operand" "o,o")
12803 (parallel [(match_operand:SI 2 "const_0_to_3_operand")]))))]
12804 "TARGET_64BIT && TARGET_SSE"
12805 "#"
12806 "&& reload_completed"
12807 [(set (match_dup 0) (zero_extend:DI (match_dup 1)))]
12808{
12809 operands[1] = adjust_address (operands[1], SImode, INTVAL (operands[2]) * 4);
12810})
12811
aad61732 12812(define_insn "*vec_extractv2di_1"
f75e6a51 12813 [(set (match_operand:DI 0 "nonimmediate_operand" "=rm,m,x,x,x,x,r")
c49c0c39 12814 (vec_select:DI
f75e6a51 12815 (match_operand:V2DI 1 "nonimmediate_operand" "x ,x,0,x,x,o,o")
c49c0c39 12816 (parallel [(const_int 1)])))]
3095685e 12817 "TARGET_SSE && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
c49c0c39 12818 "@
f75e6a51 12819 %vpextrq\t{$1, %1, %0|%0, %1, 1}
1ee8b298 12820 %vmovhps\t{%1, %0|%0, %1}
77315816 12821 psrldq\t{$8, %0|%0, 8}
1ee8b298 12822 vpsrldq\t{$8, %1, %0|%0, %1, 8}
4d9cab74 12823 movhlps\t{%1, %0|%0, %1}
3095685e
UB
12824 #
12825 #"
f75e6a51
UB
12826 [(set_attr "isa" "x64_sse4,*,sse2_noavx,avx,noavx,*,x64")
12827 (set_attr "type" "sselog1,ssemov,sseishft1,sseishft1,ssemov,ssemov,imov")
12828 (set_attr "length_immediate" "1,*,1,1,*,*,*")
f75e6a51
UB
12829 (set_attr "prefix_rex" "1,*,*,*,*,*,*")
12830 (set_attr "prefix_extra" "1,*,*,*,*,*,*")
12831 (set_attr "prefix" "maybe_vex,maybe_vex,orig,vex,orig,*,*")
12832 (set_attr "mode" "TI,V2SF,TI,TI,V4SF,DI,DI")])
3095685e
UB
12833
12834(define_split
e61e7d28
UB
12835 [(set (match_operand:<ssescalarmode> 0 "register_operand")
12836 (vec_select:<ssescalarmode>
12837 (match_operand:VI_128 1 "memory_operand")
12838 (parallel
12839 [(match_operand 2 "const_0_to_<ssescalarnummask>_operand")])))]
3095685e
UB
12840 "TARGET_SSE && reload_completed"
12841 [(set (match_dup 0) (match_dup 1))]
e61e7d28
UB
12842{
12843 int offs = INTVAL (operands[2]) * GET_MODE_SIZE (<ssescalarmode>mode);
12844
12845 operands[1] = adjust_address (operands[1], <ssescalarmode>mode, offs);
12846})
c49c0c39 12847
fbf524de
JJ
12848;; Turn SImode or DImode extraction from arbitrary SSE/AVX/AVX512F
12849;; vector modes into vec_extract*.
12850(define_split
12851 [(set (match_operand:SWI48x 0 "nonimmediate_operand")
12852 (match_operand:SWI48x 1 "register_operand"))]
12853 "can_create_pseudo_p ()
12854 && GET_CODE (operands[1]) == SUBREG
12855 && REG_P (SUBREG_REG (operands[1]))
12856 && (GET_MODE_CLASS (GET_MODE (SUBREG_REG (operands[1]))) == MODE_VECTOR_INT
12857 || (GET_MODE_CLASS (GET_MODE (SUBREG_REG (operands[1])))
12858 == MODE_VECTOR_FLOAT))
12859 && SUBREG_BYTE (operands[1]) == 0
12860 && TARGET_SSE
12861 && (GET_MODE_SIZE (GET_MODE (SUBREG_REG (operands[1]))) == 16
12862 || (GET_MODE_SIZE (GET_MODE (SUBREG_REG (operands[1]))) == 32
12863 && TARGET_AVX)
12864 || (GET_MODE_SIZE (GET_MODE (SUBREG_REG (operands[1]))) == 64
12865 && TARGET_AVX512F))
12866 && (<MODE>mode == SImode || TARGET_64BIT || MEM_P (operands[0]))"
12867 [(set (match_dup 0) (vec_select:SWI48x (match_dup 1)
12868 (parallel [(const_int 0)])))]
12869{
12870 rtx tmp;
12871 operands[1] = SUBREG_REG (operands[1]);
12872 switch (GET_MODE_SIZE (GET_MODE (operands[1])))
12873 {
12874 case 64:
12875 if (<MODE>mode == SImode)
12876 {
12877 tmp = gen_reg_rtx (V8SImode);
12878 emit_insn (gen_vec_extract_lo_v16si (tmp,
12879 gen_lowpart (V16SImode,
12880 operands[1])));
12881 }
12882 else
12883 {
12884 tmp = gen_reg_rtx (V4DImode);
12885 emit_insn (gen_vec_extract_lo_v8di (tmp,
12886 gen_lowpart (V8DImode,
12887 operands[1])));
12888 }
12889 operands[1] = tmp;
12890 /* FALLTHRU */
12891 case 32:
12892 tmp = gen_reg_rtx (<ssevecmode>mode);
12893 if (<MODE>mode == SImode)
12894 emit_insn (gen_vec_extract_lo_v8si (tmp, gen_lowpart (V8SImode,
12895 operands[1])));
12896 else
12897 emit_insn (gen_vec_extract_lo_v4di (tmp, gen_lowpart (V4DImode,
12898 operands[1])));
12899 operands[1] = tmp;
12900 break;
12901 case 16:
12902 operands[1] = gen_lowpart (<ssevecmode>mode, operands[1]);
12903 break;
12904 }
12905})
12906
fcc9fe1e 12907(define_insn "*vec_concatv2si_sse4_1"
ee768d85
UB
12908 [(set (match_operand:V2SI 0 "register_operand"
12909 "=Yr,*x,x, Yr,*x,x, x, *y,*y")
fcc9fe1e 12910 (vec_concat:V2SI
ee768d85
UB
12911 (match_operand:SI 1 "nonimmediate_operand"
12912 " 0, 0,x, 0,0, x,rm, 0,rm")
12913 (match_operand:SI 2 "vector_move_operand"
12914 " rm,rm,rm,Yr,*x,x, C,*ym, C")))]
12915 "TARGET_SSE4_1 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
fcc9fe1e 12916 "@
45392c76 12917 pinsrd\t{$1, %2, %0|%0, %2, 1}
aad61732
UB
12918 pinsrd\t{$1, %2, %0|%0, %2, 1}
12919 vpinsrd\t{$1, %2, %1, %0|%0, %1, %2, 1}
6784c6e0 12920 punpckldq\t{%2, %0|%0, %2}
45392c76 12921 punpckldq\t{%2, %0|%0, %2}
1ee8b298
UB
12922 vpunpckldq\t{%2, %1, %0|%0, %1, %2}
12923 %vmovd\t{%1, %0|%0, %1}
6784c6e0
UB
12924 punpckldq\t{%2, %0|%0, %2}
12925 movd\t{%1, %0|%0, %1}"
45392c76
IE
12926 [(set_attr "isa" "noavx,noavx,avx,noavx,noavx,avx,*,*,*")
12927 (set_attr "type" "sselog,sselog,sselog,sselog,sselog,sselog,ssemov,mmxcvt,mmxmov")
12928 (set_attr "prefix_extra" "1,1,1,*,*,*,*,*,*")
12929 (set_attr "length_immediate" "1,1,1,*,*,*,*,*,*")
12930 (set_attr "prefix" "orig,orig,vex,orig,orig,vex,maybe_vex,orig,orig")
12931 (set_attr "mode" "TI,TI,TI,TI,TI,TI,TI,DI,DI")])
fcc9fe1e 12932
eb701deb
RH
12933;; ??? In theory we can match memory for the MMX alternative, but allowing
12934;; nonimmediate_operand for operand 2 and *not* allowing memory for the SSE
12935;; alternatives pretty much forces the MMX alternative to be chosen.
3c21604f
UB
12936(define_insn "*vec_concatv2si"
12937 [(set (match_operand:V2SI 0 "register_operand" "=x,x ,*y,x,x,*y,*y")
eb701deb 12938 (vec_concat:V2SI
3c21604f
UB
12939 (match_operand:SI 1 "nonimmediate_operand" " 0,rm,rm,0,m, 0,*rm")
12940 (match_operand:SI 2 "reg_or_0_operand" " x,C ,C, x,C,*y,C")))]
12941 "TARGET_SSE && !TARGET_SSE4_1"
eb701deb
RH
12942 "@
12943 punpckldq\t{%2, %0|%0, %2}
12944 movd\t{%1, %0|%0, %1}
3c21604f 12945 movd\t{%1, %0|%0, %1}
eb701deb
RH
12946 unpcklps\t{%2, %0|%0, %2}
12947 movss\t{%1, %0|%0, %1}
12948 punpckldq\t{%2, %0|%0, %2}
12949 movd\t{%1, %0|%0, %1}"
3c21604f
UB
12950 [(set_attr "isa" "sse2,sse2,sse2,*,*,*,*")
12951 (set_attr "type" "sselog,ssemov,mmxmov,sselog,ssemov,mmxcvt,mmxmov")
12952 (set_attr "mode" "TI,TI,DI,V4SF,SF,DI,DI")])
eb701deb 12953
fb55d62e 12954(define_insn "*vec_concatv4si"
a02f398d 12955 [(set (match_operand:V4SI 0 "register_operand" "=x,x,x,x,x")
95879c72 12956 (vec_concat:V4SI
a02f398d
UB
12957 (match_operand:V2SI 1 "register_operand" " 0,x,0,0,x")
12958 (match_operand:V2SI 2 "nonimmediate_operand" " x,x,x,m,m")))]
eb701deb
RH
12959 "TARGET_SSE"
12960 "@
12961 punpcklqdq\t{%2, %0|%0, %2}
fb55d62e 12962 vpunpcklqdq\t{%2, %1, %0|%0, %1, %2}
eb701deb 12963 movlhps\t{%2, %0|%0, %2}
eabb5f48
UB
12964 movhps\t{%2, %0|%0, %q2}
12965 vmovhps\t{%2, %1, %0|%0, %1, %q2}"
a02f398d 12966 [(set_attr "isa" "sse2_noavx,avx,noavx,noavx,avx")
fb55d62e
UB
12967 (set_attr "type" "sselog,sselog,ssemov,ssemov,ssemov")
12968 (set_attr "prefix" "orig,vex,orig,orig,vex")
12969 (set_attr "mode" "TI,TI,V4SF,V2SF,V2SF")])
eb701deb 12970
843b6915 12971;; movd instead of movq is required to handle broken assemblers.
3c21604f 12972(define_insn "vec_concatv2di"
1ee8b298 12973 [(set (match_operand:V2DI 0 "register_operand"
45392c76 12974 "=Yr,*x,x ,Yi,x ,!x,x,x,x,x,x")
6784c6e0 12975 (vec_concat:V2DI
1ee8b298 12976 (match_operand:DI 1 "nonimmediate_operand"
45392c76 12977 " 0, 0,x ,r ,xm,*y,0,x,0,0,x")
1ee8b298 12978 (match_operand:DI 2 "vector_move_operand"
45392c76 12979 "*rm,rm,rm,C ,C ,C ,x,x,x,m,m")))]
3c21604f 12980 "TARGET_SSE"
6784c6e0 12981 "@
45392c76 12982 pinsrq\t{$1, %2, %0|%0, %2, 1}
aad61732
UB
12983 pinsrq\t{$1, %2, %0|%0, %2, 1}
12984 vpinsrq\t{$1, %2, %1, %0|%0, %1, %2, 1}
13a26a7d 12985 * return HAVE_AS_IX86_INTERUNIT_MOVQ ? \"%vmovq\t{%1, %0|%0, %1}\" : \"%vmovd\t{%1, %0|%0, %1}\";
3c21604f 12986 %vmovq\t{%1, %0|%0, %1}
6784c6e0
UB
12987 movq2dq\t{%1, %0|%0, %1}
12988 punpcklqdq\t{%2, %0|%0, %2}
1ee8b298 12989 vpunpcklqdq\t{%2, %1, %0|%0, %1, %2}
3c21604f 12990 movlhps\t{%2, %0|%0, %2}
1ee8b298
UB
12991 movhps\t{%2, %0|%0, %2}
12992 vmovhps\t{%2, %1, %0|%0, %1, %2}"
45392c76 12993 [(set_attr "isa" "x64_sse4_noavx,x64_sse4_noavx,x64_avx,x64,sse2,sse2,sse2_noavx,avx,noavx,noavx,avx")
aad61732
UB
12994 (set (attr "type")
12995 (if_then_else
45392c76 12996 (eq_attr "alternative" "0,1,2,6,7")
aad61732
UB
12997 (const_string "sselog")
12998 (const_string "ssemov")))
45392c76
IE
12999 (set_attr "prefix_rex" "1,1,1,1,*,*,*,*,*,*,*")
13000 (set_attr "prefix_extra" "1,1,1,*,*,*,*,*,*,*,*")
13001 (set_attr "length_immediate" "1,1,1,*,*,*,*,*,*,*,*")
13002 (set_attr "prefix" "orig,orig,vex,maybe_vex,maybe_vex,orig,orig,vex,orig,orig,vex")
13003 (set_attr "mode" "TI,TI,TI,TI,TI,TI,TI,TI,V4SF,V2SF,V2SF")])
1ee8b298 13004
8dfb9f16 13005(define_expand "vec_unpacks_lo_<mode>"
82e86dc6 13006 [(match_operand:<sseunpackmode> 0 "register_operand")
3bdf6340 13007 (match_operand:VI124_AVX512F 1 "register_operand")]
89d67cca 13008 "TARGET_SSE2"
7b532118 13009 "ix86_expand_sse_unpack (operands[0], operands[1], false, false); DONE;")
89d67cca 13010
8dfb9f16 13011(define_expand "vec_unpacks_hi_<mode>"
82e86dc6 13012 [(match_operand:<sseunpackmode> 0 "register_operand")
3bdf6340 13013 (match_operand:VI124_AVX512F 1 "register_operand")]
89d67cca 13014 "TARGET_SSE2"
7b532118 13015 "ix86_expand_sse_unpack (operands[0], operands[1], false, true); DONE;")
89d67cca 13016
8dfb9f16 13017(define_expand "vec_unpacku_lo_<mode>"
82e86dc6 13018 [(match_operand:<sseunpackmode> 0 "register_operand")
3bdf6340 13019 (match_operand:VI124_AVX512F 1 "register_operand")]
89d67cca 13020 "TARGET_SSE2"
7b532118 13021 "ix86_expand_sse_unpack (operands[0], operands[1], true, false); DONE;")
89d67cca 13022
8dfb9f16 13023(define_expand "vec_unpacku_hi_<mode>"
82e86dc6 13024 [(match_operand:<sseunpackmode> 0 "register_operand")
3bdf6340 13025 (match_operand:VI124_AVX512F 1 "register_operand")]
89d67cca 13026 "TARGET_SSE2"
7b532118 13027 "ix86_expand_sse_unpack (operands[0], operands[1], true, true); DONE;")
89d67cca 13028
ef719a44
RH
13029;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
13030;;
35fd3193 13031;; Miscellaneous
ef719a44
RH
13032;;
13033;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
13034
c9b17fa5 13035(define_expand "<sse2_avx2>_uavg<mode>3<mask_name>"
880ab4be
AT
13036 [(set (match_operand:VI12_AVX2 0 "register_operand")
13037 (truncate:VI12_AVX2
13038 (lshiftrt:<ssedoublemode>
13039 (plus:<ssedoublemode>
13040 (plus:<ssedoublemode>
13041 (zero_extend:<ssedoublemode>
13042 (match_operand:VI12_AVX2 1 "nonimmediate_operand"))
13043 (zero_extend:<ssedoublemode>
13044 (match_operand:VI12_AVX2 2 "nonimmediate_operand")))
c9b17fa5 13045 (match_dup <mask_expand_op3>))
ffbaf337 13046 (const_int 1))))]
c9b17fa5 13047 "TARGET_SSE2 && <mask_mode512bit_condition> && <mask_avx512bw_condition>"
880ab4be 13048{
c9b17fa5
AI
13049 rtx tmp;
13050 if (<mask_applied>)
13051 tmp = operands[3];
880ab4be
AT
13052 operands[3] = CONST1_RTX(<MODE>mode);
13053 ix86_fixup_binary_operands_no_copy (PLUS, <MODE>mode, operands);
c9b17fa5
AI
13054
13055 if (<mask_applied>)
13056 {
13057 operands[5] = operands[3];
13058 operands[3] = tmp;
13059 }
880ab4be 13060})
977e83a3 13061
c9b17fa5
AI
13062(define_insn "*<sse2_avx2>_uavg<mode>3<mask_name>"
13063 [(set (match_operand:VI12_AVX2 0 "register_operand" "=x,v")
880ab4be
AT
13064 (truncate:VI12_AVX2
13065 (lshiftrt:<ssedoublemode>
13066 (plus:<ssedoublemode>
13067 (plus:<ssedoublemode>
13068 (zero_extend:<ssedoublemode>
c9b17fa5 13069 (match_operand:VI12_AVX2 1 "nonimmediate_operand" "%0,v"))
880ab4be 13070 (zero_extend:<ssedoublemode>
c9b17fa5
AI
13071 (match_operand:VI12_AVX2 2 "nonimmediate_operand" "xm,vm")))
13072 (match_operand:VI12_AVX2 <mask_expand_op3> "const1_operand"))
ef719a44 13073 (const_int 1))))]
c9b17fa5
AI
13074 "TARGET_SSE2 && <mask_mode512bit_condition> && <mask_avx512bw_condition>
13075 && ix86_binary_operator_ok (PLUS, <MODE>mode, operands)"
81b1e7eb 13076 "@
880ab4be 13077 pavg<ssemodesuffix>\t{%2, %0|%0, %2}
c9b17fa5 13078 vpavg<ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
81b1e7eb
UB
13079 [(set_attr "isa" "noavx,avx")
13080 (set_attr "type" "sseiadd")
13081 (set_attr "prefix_data16" "1,*")
c9b17fa5 13082 (set_attr "prefix" "orig,<mask_prefix>")
880ab4be 13083 (set_attr "mode" "<sseinsnmode>")])
ef719a44 13084
4f3f76e6 13085;; The correct representation for this is absolutely enormous, and
ef719a44 13086;; surely not generally useful.
977e83a3 13087(define_insn "<sse2_avx2>_psadbw"
44f59829
AI
13088 [(set (match_operand:VI8_AVX2_AVX512BW 0 "register_operand" "=x,v")
13089 (unspec:VI8_AVX2_AVX512BW
13090 [(match_operand:<ssebytemode> 1 "register_operand" "0,v")
13091 (match_operand:<ssebytemode> 2 "nonimmediate_operand" "xm,vm")]
8861ba4d 13092 UNSPEC_PSADBW))]
ef719a44 13093 "TARGET_SSE2"
81b1e7eb
UB
13094 "@
13095 psadbw\t{%2, %0|%0, %2}
13096 vpsadbw\t{%2, %1, %0|%0, %1, %2}"
13097 [(set_attr "isa" "noavx,avx")
13098 (set_attr "type" "sseiadd")
b6837b94 13099 (set_attr "atom_unit" "simul")
81b1e7eb 13100 (set_attr "prefix_data16" "1,*")
44f59829 13101 (set_attr "prefix" "orig,maybe_evex")
977e83a3 13102 (set_attr "mode" "<sseinsnmode>")])
ef719a44 13103
cbb734aa 13104(define_insn "<sse>_movmsk<ssemodesuffix><avxsizesuffix>"
95879c72
L
13105 [(set (match_operand:SI 0 "register_operand" "=r")
13106 (unspec:SI
b86f6e9e 13107 [(match_operand:VF_128_256 1 "register_operand" "x")]
95879c72 13108 UNSPEC_MOVMSK))]
6bec6c98 13109 "TARGET_SSE"
1c154a23 13110 "%vmovmsk<ssemodesuffix>\t{%1, %0|%0, %1}"
b6837b94 13111 [(set_attr "type" "ssemov")
95879c72 13112 (set_attr "prefix" "maybe_vex")
85845bb9 13113 (set_attr "mode" "<MODE>")])
ef719a44 13114
977e83a3
KY
13115(define_insn "avx2_pmovmskb"
13116 [(set (match_operand:SI 0 "register_operand" "=r")
13117 (unspec:SI [(match_operand:V32QI 1 "register_operand" "x")]
13118 UNSPEC_MOVMSK))]
13119 "TARGET_AVX2"
13120 "vpmovmskb\t{%1, %0|%0, %1}"
13121 [(set_attr "type" "ssemov")
13122 (set_attr "prefix" "vex")
13123 (set_attr "mode" "DI")])
13124
ef719a44
RH
13125(define_insn "sse2_pmovmskb"
13126 [(set (match_operand:SI 0 "register_operand" "=r")
13127 (unspec:SI [(match_operand:V16QI 1 "register_operand" "x")]
13128 UNSPEC_MOVMSK))]
13129 "TARGET_SSE2"
95879c72 13130 "%vpmovmskb\t{%1, %0|%0, %1}"
b6837b94 13131 [(set_attr "type" "ssemov")
10e4d956 13132 (set_attr "prefix_data16" "1")
95879c72 13133 (set_attr "prefix" "maybe_vex")
10e4d956 13134 (set_attr "mode" "SI")])
ef719a44
RH
13135
13136(define_expand "sse2_maskmovdqu"
82e86dc6
UB
13137 [(set (match_operand:V16QI 0 "memory_operand")
13138 (unspec:V16QI [(match_operand:V16QI 1 "register_operand")
13139 (match_operand:V16QI 2 "register_operand")
ef719a44
RH
13140 (match_dup 0)]
13141 UNSPEC_MASKMOV))]
a427621f 13142 "TARGET_SSE2")
ef719a44
RH
13143
13144(define_insn "*sse2_maskmovdqu"
f60c2554 13145 [(set (mem:V16QI (match_operand:P 0 "register_operand" "D"))
ef719a44
RH
13146 (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "x")
13147 (match_operand:V16QI 2 "register_operand" "x")
13148 (mem:V16QI (match_dup 0))]
13149 UNSPEC_MASKMOV))]
f60c2554 13150 "TARGET_SSE2"
061eff6d
UB
13151{
13152 /* We can't use %^ here due to ASM_OUTPUT_OPCODE processing
13153 that requires %v to be at the beginning of the opcode name. */
13154 if (Pmode != word_mode)
13155 fputs ("\taddr32", asm_out_file);
13156 return "%vmaskmovdqu\t{%2, %1|%1, %2}";
13157}
b6837b94 13158 [(set_attr "type" "ssemov")
10e4d956 13159 (set_attr "prefix_data16" "1")
061eff6d
UB
13160 (set (attr "length_address")
13161 (symbol_ref ("Pmode != word_mode")))
725fd454
JJ
13162 ;; The implicit %rdi operand confuses default length_vex computation.
13163 (set (attr "length_vex")
f60c2554 13164 (symbol_ref ("3 + REX_SSE_REGNO_P (REGNO (operands[2]))")))
95879c72 13165 (set_attr "prefix" "maybe_vex")
ef719a44
RH
13166 (set_attr "mode" "TI")])
13167
80e8bb90
RH
13168(define_insn "sse_ldmxcsr"
13169 [(unspec_volatile [(match_operand:SI 0 "memory_operand" "m")]
13170 UNSPECV_LDMXCSR)]
13171 "TARGET_SSE"
95879c72 13172 "%vldmxcsr\t%0"
80e8bb90 13173 [(set_attr "type" "sse")
b6837b94 13174 (set_attr "atom_sse_attr" "mxcsr")
95879c72 13175 (set_attr "prefix" "maybe_vex")
80e8bb90
RH
13176 (set_attr "memory" "load")])
13177
13178(define_insn "sse_stmxcsr"
13179 [(set (match_operand:SI 0 "memory_operand" "=m")
13180 (unspec_volatile:SI [(const_int 0)] UNSPECV_STMXCSR))]
13181 "TARGET_SSE"
95879c72 13182 "%vstmxcsr\t%0"
80e8bb90 13183 [(set_attr "type" "sse")
b6837b94 13184 (set_attr "atom_sse_attr" "mxcsr")
95879c72 13185 (set_attr "prefix" "maybe_vex")
80e8bb90
RH
13186 (set_attr "memory" "store")])
13187
ef719a44
RH
13188(define_insn "sse2_clflush"
13189 [(unspec_volatile [(match_operand 0 "address_operand" "p")]
13190 UNSPECV_CLFLUSH)]
13191 "TARGET_SSE2"
13192 "clflush\t%a0"
13193 [(set_attr "type" "sse")
b6837b94 13194 (set_attr "atom_sse_attr" "fence")
ef719a44
RH
13195 (set_attr "memory" "unknown")])
13196
ef719a44
RH
13197
13198(define_insn "sse3_mwait"
13199 [(unspec_volatile [(match_operand:SI 0 "register_operand" "a")
13200 (match_operand:SI 1 "register_operand" "c")]
13201 UNSPECV_MWAIT)]
13202 "TARGET_SSE3"
67a4b391
L
13203;; 64bit version is "mwait %rax,%rcx". But only lower 32bits are used.
13204;; Since 32bit register operands are implicitly zero extended to 64bit,
13205;; we only need to set up 32bit registers.
13206 "mwait"
ef719a44
RH
13207 [(set_attr "length" "3")])
13208
061eff6d 13209(define_insn "sse3_monitor_<mode>"
986b6423 13210 [(unspec_volatile [(match_operand:P 0 "register_operand" "a")
67a4b391
L
13211 (match_operand:SI 1 "register_operand" "c")
13212 (match_operand:SI 2 "register_operand" "d")]
13213 UNSPECV_MONITOR)]
061eff6d 13214 "TARGET_SSE3"
67a4b391
L
13215;; 64bit version is "monitor %rax,%rcx,%rdx". But only lower 32bits in
13216;; RCX and RDX are used. Since 32bit register operands are implicitly
13217;; zero extended to 64bit, we only need to set up 32bit registers.
061eff6d
UB
13218 "%^monitor"
13219 [(set (attr "length")
13220 (symbol_ref ("(Pmode != word_mode) + 3")))])
b1875f52 13221
85845bb9
UB
13222;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
13223;;
13224;; SSSE3 instructions
13225;;
13226;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
13227
8861ba4d 13228(define_code_iterator ssse3_plusminus [plus ss_plus minus ss_minus])
b1875f52 13229
8861ba4d 13230(define_insn "avx2_ph<plusminus_mnemonic>wv16hi3"
977e83a3
KY
13231 [(set (match_operand:V16HI 0 "register_operand" "=x")
13232 (vec_concat:V16HI
13233 (vec_concat:V8HI
13234 (vec_concat:V4HI
13235 (vec_concat:V2HI
8861ba4d 13236 (ssse3_plusminus:HI
977e83a3
KY
13237 (vec_select:HI
13238 (match_operand:V16HI 1 "register_operand" "x")
13239 (parallel [(const_int 0)]))
13240 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
8861ba4d 13241 (ssse3_plusminus:HI
977e83a3
KY
13242 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
13243 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
13244 (vec_concat:V2HI
8861ba4d 13245 (ssse3_plusminus:HI
977e83a3
KY
13246 (vec_select:HI (match_dup 1) (parallel [(const_int 4)]))
13247 (vec_select:HI (match_dup 1) (parallel [(const_int 5)])))
8861ba4d 13248 (ssse3_plusminus:HI
977e83a3
KY
13249 (vec_select:HI (match_dup 1) (parallel [(const_int 6)]))
13250 (vec_select:HI (match_dup 1) (parallel [(const_int 7)])))))
13251 (vec_concat:V4HI
13252 (vec_concat:V2HI
8861ba4d 13253 (ssse3_plusminus:HI
977e83a3
KY
13254 (vec_select:HI (match_dup 1) (parallel [(const_int 8)]))
13255 (vec_select:HI (match_dup 1) (parallel [(const_int 9)])))
8861ba4d 13256 (ssse3_plusminus:HI
977e83a3
KY
13257 (vec_select:HI (match_dup 1) (parallel [(const_int 10)]))
13258 (vec_select:HI (match_dup 1) (parallel [(const_int 11)]))))
13259 (vec_concat:V2HI
8861ba4d 13260 (ssse3_plusminus:HI
977e83a3
KY
13261 (vec_select:HI (match_dup 1) (parallel [(const_int 12)]))
13262 (vec_select:HI (match_dup 1) (parallel [(const_int 13)])))
8861ba4d 13263 (ssse3_plusminus:HI
977e83a3
KY
13264 (vec_select:HI (match_dup 1) (parallel [(const_int 14)]))
13265 (vec_select:HI (match_dup 1) (parallel [(const_int 15)]))))))
13266 (vec_concat:V8HI
13267 (vec_concat:V4HI
13268 (vec_concat:V2HI
8861ba4d 13269 (ssse3_plusminus:HI
977e83a3
KY
13270 (vec_select:HI
13271 (match_operand:V16HI 2 "nonimmediate_operand" "xm")
13272 (parallel [(const_int 0)]))
13273 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
8861ba4d 13274 (ssse3_plusminus:HI
977e83a3
KY
13275 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
13276 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))
13277 (vec_concat:V2HI
8861ba4d 13278 (ssse3_plusminus:HI
977e83a3
KY
13279 (vec_select:HI (match_dup 2) (parallel [(const_int 4)]))
13280 (vec_select:HI (match_dup 2) (parallel [(const_int 5)])))
8861ba4d 13281 (ssse3_plusminus:HI
977e83a3
KY
13282 (vec_select:HI (match_dup 2) (parallel [(const_int 6)]))
13283 (vec_select:HI (match_dup 2) (parallel [(const_int 7)])))))
13284 (vec_concat:V4HI
13285 (vec_concat:V2HI
8861ba4d 13286 (ssse3_plusminus:HI
977e83a3
KY
13287 (vec_select:HI (match_dup 2) (parallel [(const_int 8)]))
13288 (vec_select:HI (match_dup 2) (parallel [(const_int 9)])))
8861ba4d 13289 (ssse3_plusminus:HI
977e83a3
KY
13290 (vec_select:HI (match_dup 2) (parallel [(const_int 10)]))
13291 (vec_select:HI (match_dup 2) (parallel [(const_int 11)]))))
13292 (vec_concat:V2HI
8861ba4d 13293 (ssse3_plusminus:HI
977e83a3
KY
13294 (vec_select:HI (match_dup 2) (parallel [(const_int 12)]))
13295 (vec_select:HI (match_dup 2) (parallel [(const_int 13)])))
8861ba4d 13296 (ssse3_plusminus:HI
977e83a3
KY
13297 (vec_select:HI (match_dup 2) (parallel [(const_int 14)]))
13298 (vec_select:HI (match_dup 2) (parallel [(const_int 15)]))))))))]
13299 "TARGET_AVX2"
8861ba4d 13300 "vph<plusminus_mnemonic>w\t{%2, %1, %0|%0, %1, %2}"
977e83a3
KY
13301 [(set_attr "type" "sseiadd")
13302 (set_attr "prefix_extra" "1")
13303 (set_attr "prefix" "vex")
13304 (set_attr "mode" "OI")])
13305
8861ba4d 13306(define_insn "ssse3_ph<plusminus_mnemonic>wv8hi3"
81b1e7eb 13307 [(set (match_operand:V8HI 0 "register_operand" "=x,x")
b1875f52
L
13308 (vec_concat:V8HI
13309 (vec_concat:V4HI
13310 (vec_concat:V2HI
8861ba4d 13311 (ssse3_plusminus:HI
b1875f52 13312 (vec_select:HI
81b1e7eb 13313 (match_operand:V8HI 1 "register_operand" "0,x")
b1875f52
L
13314 (parallel [(const_int 0)]))
13315 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
8861ba4d 13316 (ssse3_plusminus:HI
b1875f52
L
13317 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
13318 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
13319 (vec_concat:V2HI
8861ba4d 13320 (ssse3_plusminus:HI
b1875f52
L
13321 (vec_select:HI (match_dup 1) (parallel [(const_int 4)]))
13322 (vec_select:HI (match_dup 1) (parallel [(const_int 5)])))
8861ba4d 13323 (ssse3_plusminus:HI
b1875f52
L
13324 (vec_select:HI (match_dup 1) (parallel [(const_int 6)]))
13325 (vec_select:HI (match_dup 1) (parallel [(const_int 7)])))))
13326 (vec_concat:V4HI
13327 (vec_concat:V2HI
8861ba4d 13328 (ssse3_plusminus:HI
b1875f52 13329 (vec_select:HI
81b1e7eb 13330 (match_operand:V8HI 2 "nonimmediate_operand" "xm,xm")
b1875f52
L
13331 (parallel [(const_int 0)]))
13332 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
8861ba4d 13333 (ssse3_plusminus:HI
b1875f52
L
13334 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
13335 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))
13336 (vec_concat:V2HI
8861ba4d 13337 (ssse3_plusminus:HI
b1875f52
L
13338 (vec_select:HI (match_dup 2) (parallel [(const_int 4)]))
13339 (vec_select:HI (match_dup 2) (parallel [(const_int 5)])))
8861ba4d 13340 (ssse3_plusminus:HI
b1875f52
L
13341 (vec_select:HI (match_dup 2) (parallel [(const_int 6)]))
13342 (vec_select:HI (match_dup 2) (parallel [(const_int 7)])))))))]
13343 "TARGET_SSSE3"
81b1e7eb 13344 "@
8861ba4d
UB
13345 ph<plusminus_mnemonic>w\t{%2, %0|%0, %2}
13346 vph<plusminus_mnemonic>w\t{%2, %1, %0|%0, %1, %2}"
81b1e7eb
UB
13347 [(set_attr "isa" "noavx,avx")
13348 (set_attr "type" "sseiadd")
b6837b94 13349 (set_attr "atom_unit" "complex")
81b1e7eb 13350 (set_attr "prefix_data16" "1,*")
10e4d956 13351 (set_attr "prefix_extra" "1")
81b1e7eb 13352 (set_attr "prefix" "orig,vex")
b1875f52
L
13353 (set_attr "mode" "TI")])
13354
8861ba4d 13355(define_insn "ssse3_ph<plusminus_mnemonic>wv4hi3"
b1875f52
L
13356 [(set (match_operand:V4HI 0 "register_operand" "=y")
13357 (vec_concat:V4HI
13358 (vec_concat:V2HI
8861ba4d 13359 (ssse3_plusminus:HI
b1875f52
L
13360 (vec_select:HI
13361 (match_operand:V4HI 1 "register_operand" "0")
13362 (parallel [(const_int 0)]))
13363 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
8861ba4d 13364 (ssse3_plusminus:HI
b1875f52
L
13365 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
13366 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
13367 (vec_concat:V2HI
8861ba4d 13368 (ssse3_plusminus:HI
b1875f52
L
13369 (vec_select:HI
13370 (match_operand:V4HI 2 "nonimmediate_operand" "ym")
13371 (parallel [(const_int 0)]))
13372 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
8861ba4d 13373 (ssse3_plusminus:HI
b1875f52
L
13374 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
13375 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))))]
13376 "TARGET_SSSE3"
8861ba4d 13377 "ph<plusminus_mnemonic>w\t{%2, %0|%0, %2}"
b1875f52 13378 [(set_attr "type" "sseiadd")
b6837b94 13379 (set_attr "atom_unit" "complex")
10e4d956 13380 (set_attr "prefix_extra" "1")
725fd454 13381 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
b1875f52
L
13382 (set_attr "mode" "DI")])
13383
8861ba4d 13384(define_insn "avx2_ph<plusminus_mnemonic>dv8si3"
977e83a3
KY
13385 [(set (match_operand:V8SI 0 "register_operand" "=x")
13386 (vec_concat:V8SI
13387 (vec_concat:V4SI
13388 (vec_concat:V2SI
8861ba4d 13389 (plusminus:SI
977e83a3
KY
13390 (vec_select:SI
13391 (match_operand:V8SI 1 "register_operand" "x")
13392 (parallel [(const_int 0)]))
13393 (vec_select:SI (match_dup 1) (parallel [(const_int 1)])))
8861ba4d 13394 (plusminus:SI
977e83a3
KY
13395 (vec_select:SI (match_dup 1) (parallel [(const_int 2)]))
13396 (vec_select:SI (match_dup 1) (parallel [(const_int 3)]))))
13397 (vec_concat:V2SI
8861ba4d 13398 (plusminus:SI
977e83a3
KY
13399 (vec_select:SI (match_dup 1) (parallel [(const_int 4)]))
13400 (vec_select:SI (match_dup 1) (parallel [(const_int 5)])))
8861ba4d 13401 (plusminus:SI
977e83a3
KY
13402 (vec_select:SI (match_dup 1) (parallel [(const_int 6)]))
13403 (vec_select:SI (match_dup 1) (parallel [(const_int 7)])))))
13404 (vec_concat:V4SI
13405 (vec_concat:V2SI
8861ba4d 13406 (plusminus:SI
977e83a3
KY
13407 (vec_select:SI
13408 (match_operand:V8SI 2 "nonimmediate_operand" "xm")
13409 (parallel [(const_int 0)]))
13410 (vec_select:SI (match_dup 2) (parallel [(const_int 1)])))
8861ba4d 13411 (plusminus:SI
977e83a3
KY
13412 (vec_select:SI (match_dup 2) (parallel [(const_int 2)]))
13413 (vec_select:SI (match_dup 2) (parallel [(const_int 3)]))))
13414 (vec_concat:V2SI
8861ba4d 13415 (plusminus:SI
977e83a3
KY
13416 (vec_select:SI (match_dup 2) (parallel [(const_int 4)]))
13417 (vec_select:SI (match_dup 2) (parallel [(const_int 5)])))
8861ba4d 13418 (plusminus:SI
977e83a3
KY
13419 (vec_select:SI (match_dup 2) (parallel [(const_int 6)]))
13420 (vec_select:SI (match_dup 2) (parallel [(const_int 7)])))))))]
13421 "TARGET_AVX2"
8861ba4d 13422 "vph<plusminus_mnemonic>d\t{%2, %1, %0|%0, %1, %2}"
977e83a3
KY
13423 [(set_attr "type" "sseiadd")
13424 (set_attr "prefix_extra" "1")
13425 (set_attr "prefix" "vex")
13426 (set_attr "mode" "OI")])
13427
8861ba4d 13428(define_insn "ssse3_ph<plusminus_mnemonic>dv4si3"
81b1e7eb 13429 [(set (match_operand:V4SI 0 "register_operand" "=x,x")
b1875f52
L
13430 (vec_concat:V4SI
13431 (vec_concat:V2SI
8861ba4d 13432 (plusminus:SI
b1875f52 13433 (vec_select:SI
81b1e7eb 13434 (match_operand:V4SI 1 "register_operand" "0,x")
b1875f52
L
13435 (parallel [(const_int 0)]))
13436 (vec_select:SI (match_dup 1) (parallel [(const_int 1)])))
8861ba4d 13437 (plusminus:SI
b1875f52
L
13438 (vec_select:SI (match_dup 1) (parallel [(const_int 2)]))
13439 (vec_select:SI (match_dup 1) (parallel [(const_int 3)]))))
13440 (vec_concat:V2SI
8861ba4d 13441 (plusminus:SI
b1875f52 13442 (vec_select:SI
81b1e7eb 13443 (match_operand:V4SI 2 "nonimmediate_operand" "xm,xm")
b1875f52
L
13444 (parallel [(const_int 0)]))
13445 (vec_select:SI (match_dup 2) (parallel [(const_int 1)])))
8861ba4d 13446 (plusminus:SI
b1875f52
L
13447 (vec_select:SI (match_dup 2) (parallel [(const_int 2)]))
13448 (vec_select:SI (match_dup 2) (parallel [(const_int 3)]))))))]
13449 "TARGET_SSSE3"
81b1e7eb 13450 "@
8861ba4d
UB
13451 ph<plusminus_mnemonic>d\t{%2, %0|%0, %2}
13452 vph<plusminus_mnemonic>d\t{%2, %1, %0|%0, %1, %2}"
81b1e7eb
UB
13453 [(set_attr "isa" "noavx,avx")
13454 (set_attr "type" "sseiadd")
b6837b94 13455 (set_attr "atom_unit" "complex")
81b1e7eb 13456 (set_attr "prefix_data16" "1,*")
10e4d956 13457 (set_attr "prefix_extra" "1")
81b1e7eb 13458 (set_attr "prefix" "orig,vex")
b1875f52
L
13459 (set_attr "mode" "TI")])
13460
8861ba4d 13461(define_insn "ssse3_ph<plusminus_mnemonic>dv2si3"
b1875f52
L
13462 [(set (match_operand:V2SI 0 "register_operand" "=y")
13463 (vec_concat:V2SI
8861ba4d 13464 (plusminus:SI
b1875f52
L
13465 (vec_select:SI
13466 (match_operand:V2SI 1 "register_operand" "0")
13467 (parallel [(const_int 0)]))
13468 (vec_select:SI (match_dup 1) (parallel [(const_int 1)])))
8861ba4d 13469 (plusminus:SI
b1875f52
L
13470 (vec_select:SI
13471 (match_operand:V2SI 2 "nonimmediate_operand" "ym")
13472 (parallel [(const_int 0)]))
13473 (vec_select:SI (match_dup 2) (parallel [(const_int 1)])))))]
13474 "TARGET_SSSE3"
8861ba4d 13475 "ph<plusminus_mnemonic>d\t{%2, %0|%0, %2}"
b1875f52 13476 [(set_attr "type" "sseiadd")
b6837b94 13477 (set_attr "atom_unit" "complex")
95879c72 13478 (set_attr "prefix_extra" "1")
725fd454 13479 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
95879c72
L
13480 (set_attr "mode" "DI")])
13481
977e83a3
KY
13482(define_insn "avx2_pmaddubsw256"
13483 [(set (match_operand:V16HI 0 "register_operand" "=x")
13484 (ss_plus:V16HI
13485 (mult:V16HI
13486 (zero_extend:V16HI
13487 (vec_select:V16QI
13488 (match_operand:V32QI 1 "register_operand" "x")
608dccd7
UB
13489 (parallel [(const_int 0) (const_int 2)
13490 (const_int 4) (const_int 6)
13491 (const_int 8) (const_int 10)
13492 (const_int 12) (const_int 14)
13493 (const_int 16) (const_int 18)
13494 (const_int 20) (const_int 22)
13495 (const_int 24) (const_int 26)
13496 (const_int 28) (const_int 30)])))
977e83a3
KY
13497 (sign_extend:V16HI
13498 (vec_select:V16QI
13499 (match_operand:V32QI 2 "nonimmediate_operand" "xm")
608dccd7
UB
13500 (parallel [(const_int 0) (const_int 2)
13501 (const_int 4) (const_int 6)
13502 (const_int 8) (const_int 10)
13503 (const_int 12) (const_int 14)
13504 (const_int 16) (const_int 18)
13505 (const_int 20) (const_int 22)
13506 (const_int 24) (const_int 26)
13507 (const_int 28) (const_int 30)]))))
977e83a3
KY
13508 (mult:V16HI
13509 (zero_extend:V16HI
13510 (vec_select:V16QI (match_dup 1)
608dccd7
UB
13511 (parallel [(const_int 1) (const_int 3)
13512 (const_int 5) (const_int 7)
13513 (const_int 9) (const_int 11)
13514 (const_int 13) (const_int 15)
13515 (const_int 17) (const_int 19)
13516 (const_int 21) (const_int 23)
13517 (const_int 25) (const_int 27)
13518 (const_int 29) (const_int 31)])))
977e83a3
KY
13519 (sign_extend:V16HI
13520 (vec_select:V16QI (match_dup 2)
608dccd7
UB
13521 (parallel [(const_int 1) (const_int 3)
13522 (const_int 5) (const_int 7)
13523 (const_int 9) (const_int 11)
13524 (const_int 13) (const_int 15)
13525 (const_int 17) (const_int 19)
13526 (const_int 21) (const_int 23)
13527 (const_int 25) (const_int 27)
13528 (const_int 29) (const_int 31)]))))))]
977e83a3
KY
13529 "TARGET_AVX2"
13530 "vpmaddubsw\t{%2, %1, %0|%0, %1, %2}"
13531 [(set_attr "type" "sseiadd")
13532 (set_attr "prefix_extra" "1")
13533 (set_attr "prefix" "vex")
13534 (set_attr "mode" "OI")])
13535
5f64b496
AI
13536;; The correct representation for this is absolutely enormous, and
13537;; surely not generally useful.
13538(define_insn "avx512bw_pmaddubsw512<mode><mask_name>"
13539 [(set (match_operand:VI2_AVX512VL 0 "register_operand" "=v")
13540 (unspec:VI2_AVX512VL
13541 [(match_operand:<dbpsadbwmode> 1 "register_operand" "v")
13542 (match_operand:<dbpsadbwmode> 2 "nonimmediate_operand" "vm")]
13543 UNSPEC_PMADDUBSW512))]
13544 "TARGET_AVX512BW"
13545 "vpmaddubsw\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}";
13546 [(set_attr "type" "sseiadd")
13547 (set_attr "prefix" "evex")
13548 (set_attr "mode" "XI")])
13549
cf25c309
AI
13550(define_insn "avx512bw_umulhrswv32hi3<mask_name>"
13551 [(set (match_operand:V32HI 0 "register_operand" "=v")
13552 (truncate:V32HI
13553 (lshiftrt:V32SI
13554 (plus:V32SI
13555 (lshiftrt:V32SI
13556 (mult:V32SI
13557 (sign_extend:V32SI
13558 (match_operand:V32HI 1 "nonimmediate_operand" "%v"))
13559 (sign_extend:V32SI
13560 (match_operand:V32HI 2 "nonimmediate_operand" "vm")))
13561 (const_int 14))
13562 (const_vector:V32HI [(const_int 1) (const_int 1)
13563 (const_int 1) (const_int 1)
13564 (const_int 1) (const_int 1)
13565 (const_int 1) (const_int 1)
13566 (const_int 1) (const_int 1)
13567 (const_int 1) (const_int 1)
13568 (const_int 1) (const_int 1)
13569 (const_int 1) (const_int 1)
13570 (const_int 1) (const_int 1)
13571 (const_int 1) (const_int 1)
13572 (const_int 1) (const_int 1)
13573 (const_int 1) (const_int 1)
13574 (const_int 1) (const_int 1)
13575 (const_int 1) (const_int 1)
13576 (const_int 1) (const_int 1)
13577 (const_int 1) (const_int 1)]))
13578 (const_int 1))))]
13579 "TARGET_AVX512BW"
13580 "vpmulhrsw\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
13581 [(set_attr "type" "sseimul")
13582 (set_attr "prefix" "evex")
13583 (set_attr "mode" "XI")])
13584
1b667c82 13585(define_insn "ssse3_pmaddubsw128"
81b1e7eb 13586 [(set (match_operand:V8HI 0 "register_operand" "=x,x")
b1875f52
L
13587 (ss_plus:V8HI
13588 (mult:V8HI
13589 (zero_extend:V8HI
dfee1406 13590 (vec_select:V8QI
81b1e7eb 13591 (match_operand:V16QI 1 "register_operand" "0,x")
608dccd7
UB
13592 (parallel [(const_int 0) (const_int 2)
13593 (const_int 4) (const_int 6)
13594 (const_int 8) (const_int 10)
13595 (const_int 12) (const_int 14)])))
b1875f52
L
13596 (sign_extend:V8HI
13597 (vec_select:V8QI
81b1e7eb 13598 (match_operand:V16QI 2 "nonimmediate_operand" "xm,xm")
608dccd7
UB
13599 (parallel [(const_int 0) (const_int 2)
13600 (const_int 4) (const_int 6)
13601 (const_int 8) (const_int 10)
13602 (const_int 12) (const_int 14)]))))
b1875f52
L
13603 (mult:V8HI
13604 (zero_extend:V8HI
dfee1406 13605 (vec_select:V8QI (match_dup 1)
608dccd7
UB
13606 (parallel [(const_int 1) (const_int 3)
13607 (const_int 5) (const_int 7)
13608 (const_int 9) (const_int 11)
13609 (const_int 13) (const_int 15)])))
b1875f52 13610 (sign_extend:V8HI
dfee1406 13611 (vec_select:V8QI (match_dup 2)
608dccd7
UB
13612 (parallel [(const_int 1) (const_int 3)
13613 (const_int 5) (const_int 7)
13614 (const_int 9) (const_int 11)
13615 (const_int 13) (const_int 15)]))))))]
b1875f52 13616 "TARGET_SSSE3"
81b1e7eb
UB
13617 "@
13618 pmaddubsw\t{%2, %0|%0, %2}
13619 vpmaddubsw\t{%2, %1, %0|%0, %1, %2}"
13620 [(set_attr "isa" "noavx,avx")
13621 (set_attr "type" "sseiadd")
b6837b94 13622 (set_attr "atom_unit" "simul")
81b1e7eb 13623 (set_attr "prefix_data16" "1,*")
10e4d956 13624 (set_attr "prefix_extra" "1")
81b1e7eb 13625 (set_attr "prefix" "orig,vex")
b1875f52
L
13626 (set_attr "mode" "TI")])
13627
1b667c82 13628(define_insn "ssse3_pmaddubsw"
b1875f52
L
13629 [(set (match_operand:V4HI 0 "register_operand" "=y")
13630 (ss_plus:V4HI
13631 (mult:V4HI
13632 (zero_extend:V4HI
13633 (vec_select:V4QI
ffbaf337 13634 (match_operand:V8QI 1 "register_operand" "0")
608dccd7
UB
13635 (parallel [(const_int 0) (const_int 2)
13636 (const_int 4) (const_int 6)])))
b1875f52
L
13637 (sign_extend:V4HI
13638 (vec_select:V4QI
13639 (match_operand:V8QI 2 "nonimmediate_operand" "ym")
608dccd7
UB
13640 (parallel [(const_int 0) (const_int 2)
13641 (const_int 4) (const_int 6)]))))
b1875f52
L
13642 (mult:V4HI
13643 (zero_extend:V4HI
dfee1406 13644 (vec_select:V4QI (match_dup 1)
608dccd7
UB
13645 (parallel [(const_int 1) (const_int 3)
13646 (const_int 5) (const_int 7)])))
b1875f52 13647 (sign_extend:V4HI
dfee1406 13648 (vec_select:V4QI (match_dup 2)
608dccd7
UB
13649 (parallel [(const_int 1) (const_int 3)
13650 (const_int 5) (const_int 7)]))))))]
b1875f52
L
13651 "TARGET_SSSE3"
13652 "pmaddubsw\t{%2, %0|%0, %2}"
13653 [(set_attr "type" "sseiadd")
b6837b94 13654 (set_attr "atom_unit" "simul")
10e4d956 13655 (set_attr "prefix_extra" "1")
977e83a3
KY
13656 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
13657 (set_attr "mode" "DI")])
13658
880ab4be
AT
13659(define_mode_iterator PMULHRSW
13660 [V4HI V8HI (V16HI "TARGET_AVX2")])
13661
cf25c309
AI
13662(define_expand "<ssse3_avx2>_pmulhrsw<mode>3_mask"
13663 [(set (match_operand:PMULHRSW 0 "register_operand")
13664 (vec_merge:PMULHRSW
13665 (truncate:PMULHRSW
13666 (lshiftrt:<ssedoublemode>
13667 (plus:<ssedoublemode>
13668 (lshiftrt:<ssedoublemode>
13669 (mult:<ssedoublemode>
13670 (sign_extend:<ssedoublemode>
13671 (match_operand:PMULHRSW 1 "nonimmediate_operand"))
13672 (sign_extend:<ssedoublemode>
13673 (match_operand:PMULHRSW 2 "nonimmediate_operand")))
13674 (const_int 14))
13675 (match_dup 5))
13676 (const_int 1)))
13677 (match_operand:PMULHRSW 3 "register_operand")
13678 (match_operand:<avx512fmaskmode> 4 "register_operand")))]
13679 "TARGET_AVX512BW && TARGET_AVX512VL"
13680{
13681 operands[5] = CONST1_RTX(<MODE>mode);
13682 ix86_fixup_binary_operands_no_copy (MULT, <MODE>mode, operands);
13683})
13684
880ab4be
AT
13685(define_expand "<ssse3_avx2>_pmulhrsw<mode>3"
13686 [(set (match_operand:PMULHRSW 0 "register_operand")
13687 (truncate:PMULHRSW
13688 (lshiftrt:<ssedoublemode>
13689 (plus:<ssedoublemode>
13690 (lshiftrt:<ssedoublemode>
13691 (mult:<ssedoublemode>
13692 (sign_extend:<ssedoublemode>
13693 (match_operand:PMULHRSW 1 "nonimmediate_operand"))
13694 (sign_extend:<ssedoublemode>
13695 (match_operand:PMULHRSW 2 "nonimmediate_operand")))
977e83a3 13696 (const_int 14))
880ab4be 13697 (match_dup 3))
977e83a3
KY
13698 (const_int 1))))]
13699 "TARGET_AVX2"
880ab4be
AT
13700{
13701 operands[3] = CONST1_RTX(<MODE>mode);
13702 ix86_fixup_binary_operands_no_copy (MULT, <MODE>mode, operands);
13703})
977e83a3 13704
ed3e611e
AI
13705(define_insn "*<ssse3_avx2>_pmulhrsw<mode>3<mask_name>"
13706 [(set (match_operand:VI2_AVX2 0 "register_operand" "=x,v")
e7d8fc6c
AT
13707 (truncate:VI2_AVX2
13708 (lshiftrt:<ssedoublemode>
13709 (plus:<ssedoublemode>
13710 (lshiftrt:<ssedoublemode>
13711 (mult:<ssedoublemode>
13712 (sign_extend:<ssedoublemode>
ed3e611e 13713 (match_operand:VI2_AVX2 1 "nonimmediate_operand" "%0,v"))
e7d8fc6c 13714 (sign_extend:<ssedoublemode>
ed3e611e 13715 (match_operand:VI2_AVX2 2 "nonimmediate_operand" "xm,vm")))
b1875f52 13716 (const_int 14))
e7d8fc6c 13717 (match_operand:VI2_AVX2 3 "const1_operand"))
b1875f52 13718 (const_int 1))))]
ed3e611e
AI
13719 "TARGET_SSSE3 && <mask_mode512bit_condition> && <mask_avx512bw_condition>
13720 && ix86_binary_operator_ok (MULT, <MODE>mode, operands)"
81b1e7eb
UB
13721 "@
13722 pmulhrsw\t{%2, %0|%0, %2}
ed3e611e 13723 vpmulhrsw\t{%2, %1, %0<mask_operand4>|%0<mask_operand4>, %1, %2}"
81b1e7eb
UB
13724 [(set_attr "isa" "noavx,avx")
13725 (set_attr "type" "sseimul")
13726 (set_attr "prefix_data16" "1,*")
10e4d956 13727 (set_attr "prefix_extra" "1")
ed3e611e 13728 (set_attr "prefix" "orig,maybe_evex")
e7d8fc6c 13729 (set_attr "mode" "<sseinsnmode>")])
b1875f52 13730
ffbaf337 13731(define_insn "*ssse3_pmulhrswv4hi3"
b1875f52
L
13732 [(set (match_operand:V4HI 0 "register_operand" "=y")
13733 (truncate:V4HI
13734 (lshiftrt:V4SI
13735 (plus:V4SI
13736 (lshiftrt:V4SI
13737 (mult:V4SI
13738 (sign_extend:V4SI
13739 (match_operand:V4HI 1 "nonimmediate_operand" "%0"))
13740 (sign_extend:V4SI
13741 (match_operand:V4HI 2 "nonimmediate_operand" "ym")))
13742 (const_int 14))
880ab4be 13743 (match_operand:V4HI 3 "const1_operand"))
b1875f52
L
13744 (const_int 1))))]
13745 "TARGET_SSSE3 && ix86_binary_operator_ok (MULT, V4HImode, operands)"
13746 "pmulhrsw\t{%2, %0|%0, %2}"
13747 [(set_attr "type" "sseimul")
10e4d956 13748 (set_attr "prefix_extra" "1")
725fd454 13749 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
b1875f52
L
13750 (set_attr "mode" "DI")])
13751
f5db965f
IT
13752(define_insn "<ssse3_avx2>_pshufb<mode>3<mask_name>"
13753 [(set (match_operand:VI1_AVX512 0 "register_operand" "=x,v")
13754 (unspec:VI1_AVX512
13755 [(match_operand:VI1_AVX512 1 "register_operand" "0,v")
13756 (match_operand:VI1_AVX512 2 "nonimmediate_operand" "xm,vm")]
8861ba4d 13757 UNSPEC_PSHUFB))]
f5db965f 13758 "TARGET_SSSE3 && <mask_mode512bit_condition> && <mask_avx512bw_condition>"
81b1e7eb
UB
13759 "@
13760 pshufb\t{%2, %0|%0, %2}
f5db965f 13761 vpshufb\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
81b1e7eb
UB
13762 [(set_attr "isa" "noavx,avx")
13763 (set_attr "type" "sselog1")
13764 (set_attr "prefix_data16" "1,*")
10e4d956 13765 (set_attr "prefix_extra" "1")
f5db965f 13766 (set_attr "prefix" "orig,maybe_evex")
01284895 13767 (set_attr "btver2_decode" "vector,vector")
977e83a3 13768 (set_attr "mode" "<sseinsnmode>")])
b1875f52
L
13769
13770(define_insn "ssse3_pshufbv8qi3"
13771 [(set (match_operand:V8QI 0 "register_operand" "=y")
13772 (unspec:V8QI [(match_operand:V8QI 1 "register_operand" "0")
13773 (match_operand:V8QI 2 "nonimmediate_operand" "ym")]
d6023b50 13774 UNSPEC_PSHUFB))]
b1875f52
L
13775 "TARGET_SSSE3"
13776 "pshufb\t{%2, %0|%0, %2}";
13777 [(set_attr "type" "sselog1")
10e4d956 13778 (set_attr "prefix_extra" "1")
725fd454 13779 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
b1875f52
L
13780 (set_attr "mode" "DI")])
13781
977e83a3
KY
13782(define_insn "<ssse3_avx2>_psign<mode>3"
13783 [(set (match_operand:VI124_AVX2 0 "register_operand" "=x,x")
13784 (unspec:VI124_AVX2
13785 [(match_operand:VI124_AVX2 1 "register_operand" "0,x")
13786 (match_operand:VI124_AVX2 2 "nonimmediate_operand" "xm,xm")]
d6023b50 13787 UNSPEC_PSIGN))]
b1875f52 13788 "TARGET_SSSE3"
81b1e7eb 13789 "@
cbb734aa
UB
13790 psign<ssemodesuffix>\t{%2, %0|%0, %2}
13791 vpsign<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
81b1e7eb
UB
13792 [(set_attr "isa" "noavx,avx")
13793 (set_attr "type" "sselog1")
13794 (set_attr "prefix_data16" "1,*")
10e4d956 13795 (set_attr "prefix_extra" "1")
81b1e7eb 13796 (set_attr "prefix" "orig,vex")
977e83a3 13797 (set_attr "mode" "<sseinsnmode>")])
b1875f52
L
13798
13799(define_insn "ssse3_psign<mode>3"
13800 [(set (match_operand:MMXMODEI 0 "register_operand" "=y")
d6023b50
UB
13801 (unspec:MMXMODEI
13802 [(match_operand:MMXMODEI 1 "register_operand" "0")
13803 (match_operand:MMXMODEI 2 "nonimmediate_operand" "ym")]
13804 UNSPEC_PSIGN))]
b1875f52
L
13805 "TARGET_SSSE3"
13806 "psign<mmxvecsize>\t{%2, %0|%0, %2}";
13807 [(set_attr "type" "sselog1")
10e4d956 13808 (set_attr "prefix_extra" "1")
725fd454 13809 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
b1875f52
L
13810 (set_attr "mode" "DI")])
13811
b99ba39a 13812(define_insn "<ssse3_avx2>_palignr<mode>_mask"
6afcac32
IT
13813 [(set (match_operand:VI1_AVX512 0 "register_operand" "=v")
13814 (vec_merge:VI1_AVX512
13815 (unspec:VI1_AVX512
13816 [(match_operand:VI1_AVX512 1 "register_operand" "v")
13817 (match_operand:VI1_AVX512 2 "nonimmediate_operand" "vm")
b99ba39a
AI
13818 (match_operand:SI 3 "const_0_to_255_mul_8_operand" "n")]
13819 UNSPEC_PALIGNR)
6afcac32 13820 (match_operand:VI1_AVX512 4 "vector_move_operand" "0C")
b99ba39a
AI
13821 (match_operand:<avx512fmaskmode> 5 "register_operand" "Yk")))]
13822 "TARGET_AVX512BW && (<MODE_SIZE> == 64 || TARGET_AVX512VL)"
13823{
13824 operands[3] = GEN_INT (INTVAL (operands[3]) / 8);
13825 return "vpalignr\t{%3, %2, %1, %0%{%5%}%N4|%0%{%5%}%N4, %1, %2, %3}";
13826}
13827 [(set_attr "type" "sseishft")
13828 (set_attr "atom_unit" "sishuf")
13829 (set_attr "prefix_extra" "1")
13830 (set_attr "length_immediate" "1")
13831 (set_attr "prefix" "evex")
13832 (set_attr "mode" "<sseinsnmode>")])
13833
977e83a3 13834(define_insn "<ssse3_avx2>_palignr<mode>"
b99ba39a 13835 [(set (match_operand:SSESCALARMODE 0 "register_operand" "=x,v")
8861ba4d 13836 (unspec:SSESCALARMODE
b99ba39a
AI
13837 [(match_operand:SSESCALARMODE 1 "register_operand" "0,v")
13838 (match_operand:SSESCALARMODE 2 "nonimmediate_operand" "xm,vm")
8861ba4d
UB
13839 (match_operand:SI 3 "const_0_to_255_mul_8_operand" "n,n")]
13840 UNSPEC_PALIGNR))]
b1875f52
L
13841 "TARGET_SSSE3"
13842{
13843 operands[3] = GEN_INT (INTVAL (operands[3]) / 8);
81b1e7eb
UB
13844
13845 switch (which_alternative)
13846 {
13847 case 0:
13848 return "palignr\t{%3, %2, %0|%0, %2, %3}";
13849 case 1:
13850 return "vpalignr\t{%3, %2, %1, %0|%0, %1, %2, %3}";
13851 default:
13852 gcc_unreachable ();
13853 }
b1875f52 13854}
81b1e7eb
UB
13855 [(set_attr "isa" "noavx,avx")
13856 (set_attr "type" "sseishft")
b6837b94 13857 (set_attr "atom_unit" "sishuf")
81b1e7eb 13858 (set_attr "prefix_data16" "1,*")
10e4d956 13859 (set_attr "prefix_extra" "1")
725fd454 13860 (set_attr "length_immediate" "1")
81b1e7eb 13861 (set_attr "prefix" "orig,vex")
977e83a3 13862 (set_attr "mode" "<sseinsnmode>")])
b1875f52
L
13863
13864(define_insn "ssse3_palignrdi"
13865 [(set (match_operand:DI 0 "register_operand" "=y")
13866 (unspec:DI [(match_operand:DI 1 "register_operand" "0")
13867 (match_operand:DI 2 "nonimmediate_operand" "ym")
13868 (match_operand:SI 3 "const_0_to_255_mul_8_operand" "n")]
d6023b50 13869 UNSPEC_PALIGNR))]
b1875f52
L
13870 "TARGET_SSSE3"
13871{
13872 operands[3] = GEN_INT (INTVAL (operands[3]) / 8);
13873 return "palignr\t{%3, %2, %0|%0, %2, %3}";
13874}
13875 [(set_attr "type" "sseishft")
b6837b94 13876 (set_attr "atom_unit" "sishuf")
10e4d956 13877 (set_attr "prefix_extra" "1")
725fd454
JJ
13878 (set_attr "length_immediate" "1")
13879 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
b1875f52
L
13880 (set_attr "mode" "DI")])
13881
e650a568
KY
13882;; Mode iterator to handle singularity w/ absence of V2DI and V4DI
13883;; modes for abs instruction on pre AVX-512 targets.
13884(define_mode_iterator VI1248_AVX512VL_AVX512BW
13885 [(V64QI "TARGET_AVX512BW") (V32QI "TARGET_AVX2") V16QI
13886 (V32HI "TARGET_AVX512BW") (V16HI "TARGET_AVX2") V8HI
13887 (V16SI "TARGET_AVX512F") (V8SI "TARGET_AVX2") V4SI
13888 (V8DI "TARGET_AVX512F") (V4DI "TARGET_AVX512VL") (V2DI "TARGET_AVX512VL")])
13889
700e2919 13890(define_insn "*abs<mode>2"
e650a568
KY
13891 [(set (match_operand:VI1248_AVX512VL_AVX512BW 0 "register_operand" "=v")
13892 (abs:VI1248_AVX512VL_AVX512BW
13893 (match_operand:VI1248_AVX512VL_AVX512BW 1 "nonimmediate_operand" "vm")))]
700e2919
AI
13894 "TARGET_SSSE3"
13895 "%vpabs<ssemodesuffix>\t{%1, %0|%0, %1}"
b1875f52 13896 [(set_attr "type" "sselog1")
10e4d956
L
13897 (set_attr "prefix_data16" "1")
13898 (set_attr "prefix_extra" "1")
95879c72 13899 (set_attr "prefix" "maybe_vex")
977e83a3 13900 (set_attr "mode" "<sseinsnmode>")])
b1875f52 13901
700e2919
AI
13902(define_insn "abs<mode>2_mask"
13903 [(set (match_operand:VI48_AVX512VL 0 "register_operand" "=v")
13904 (vec_merge:VI48_AVX512VL
13905 (abs:VI48_AVX512VL
13906 (match_operand:VI48_AVX512VL 1 "nonimmediate_operand" "vm"))
13907 (match_operand:VI48_AVX512VL 2 "vector_move_operand" "0C")
13908 (match_operand:<avx512fmaskmode> 3 "register_operand" "Yk")))]
13909 "TARGET_AVX512F"
13910 "vpabs<ssemodesuffix>\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}"
13911 [(set_attr "type" "sselog1")
13912 (set_attr "prefix" "evex")
13913 (set_attr "mode" "<sseinsnmode>")])
13914
13915(define_insn "abs<mode>2_mask"
13916 [(set (match_operand:VI12_AVX512VL 0 "register_operand" "=v")
13917 (vec_merge:VI12_AVX512VL
13918 (abs:VI12_AVX512VL
13919 (match_operand:VI12_AVX512VL 1 "nonimmediate_operand" "vm"))
13920 (match_operand:VI12_AVX512VL 2 "vector_move_operand" "0C")
13921 (match_operand:<avx512fmaskmode> 3 "register_operand" "Yk")))]
13922 "TARGET_AVX512BW"
13923 "vpabs<ssemodesuffix>\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}"
13924 [(set_attr "type" "sselog1")
13925 (set_attr "prefix" "evex")
13926 (set_attr "mode" "<sseinsnmode>")])
13927
8f24613d 13928(define_expand "abs<mode>2"
e650a568
KY
13929 [(set (match_operand:VI1248_AVX512VL_AVX512BW 0 "register_operand")
13930 (abs:VI1248_AVX512VL_AVX512BW
13931 (match_operand:VI1248_AVX512VL_AVX512BW 1 "nonimmediate_operand")))]
8f24613d
CH
13932 "TARGET_SSE2"
13933{
13934 if (!TARGET_SSSE3)
13935 {
13936 ix86_expand_sse2_abs (operands[0], operands[1]);
13937 DONE;
13938 }
13939})
13940
b1875f52
L
13941(define_insn "abs<mode>2"
13942 [(set (match_operand:MMXMODEI 0 "register_operand" "=y")
81b1e7eb
UB
13943 (abs:MMXMODEI
13944 (match_operand:MMXMODEI 1 "nonimmediate_operand" "ym")))]
b1875f52
L
13945 "TARGET_SSSE3"
13946 "pabs<mmxvecsize>\t{%1, %0|%0, %1}";
13947 [(set_attr "type" "sselog1")
725fd454 13948 (set_attr "prefix_rep" "0")
10e4d956 13949 (set_attr "prefix_extra" "1")
725fd454 13950 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
b1875f52 13951 (set_attr "mode" "DI")])
21efb4d4
HJ
13952
13953;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
13954;;
13955;; AMD SSE4A instructions
13956;;
13957;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
13958
85845bb9
UB
13959(define_insn "sse4a_movnt<mode>"
13960 [(set (match_operand:MODEF 0 "memory_operand" "=m")
13961 (unspec:MODEF
13962 [(match_operand:MODEF 1 "register_operand" "x")]
977e83a3 13963 UNSPEC_MOVNT))]
21efb4d4 13964 "TARGET_SSE4A"
cbb734aa 13965 "movnt<ssemodesuffix>\t{%1, %0|%0, %1}"
21efb4d4 13966 [(set_attr "type" "ssemov")
85845bb9 13967 (set_attr "mode" "<MODE>")])
21efb4d4 13968
85845bb9
UB
13969(define_insn "sse4a_vmmovnt<mode>"
13970 [(set (match_operand:<ssescalarmode> 0 "memory_operand" "=m")
13971 (unspec:<ssescalarmode>
13972 [(vec_select:<ssescalarmode>
6bec6c98 13973 (match_operand:VF_128 1 "register_operand" "x")
85845bb9
UB
13974 (parallel [(const_int 0)]))]
13975 UNSPEC_MOVNT))]
21efb4d4 13976 "TARGET_SSE4A"
1c154a23 13977 "movnt<ssescalarmodesuffix>\t{%1, %0|%0, %1}"
21efb4d4 13978 [(set_attr "type" "ssemov")
85845bb9 13979 (set_attr "mode" "<ssescalarmode>")])
21efb4d4
HJ
13980
13981(define_insn "sse4a_extrqi"
13982 [(set (match_operand:V2DI 0 "register_operand" "=x")
977e83a3 13983 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
82e86dc6
UB
13984 (match_operand 2 "const_0_to_255_operand")
13985 (match_operand 3 "const_0_to_255_operand")]
977e83a3 13986 UNSPEC_EXTRQI))]
21efb4d4
HJ
13987 "TARGET_SSE4A"
13988 "extrq\t{%3, %2, %0|%0, %2, %3}"
13989 [(set_attr "type" "sse")
10e4d956 13990 (set_attr "prefix_data16" "1")
725fd454 13991 (set_attr "length_immediate" "2")
21efb4d4
HJ
13992 (set_attr "mode" "TI")])
13993
13994(define_insn "sse4a_extrq"
13995 [(set (match_operand:V2DI 0 "register_operand" "=x")
977e83a3
KY
13996 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
13997 (match_operand:V16QI 2 "register_operand" "x")]
13998 UNSPEC_EXTRQ))]
21efb4d4
HJ
13999 "TARGET_SSE4A"
14000 "extrq\t{%2, %0|%0, %2}"
14001 [(set_attr "type" "sse")
10e4d956 14002 (set_attr "prefix_data16" "1")
21efb4d4
HJ
14003 (set_attr "mode" "TI")])
14004
14005(define_insn "sse4a_insertqi"
14006 [(set (match_operand:V2DI 0 "register_operand" "=x")
977e83a3
KY
14007 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
14008 (match_operand:V2DI 2 "register_operand" "x")
82e86dc6
UB
14009 (match_operand 3 "const_0_to_255_operand")
14010 (match_operand 4 "const_0_to_255_operand")]
977e83a3 14011 UNSPEC_INSERTQI))]
21efb4d4
HJ
14012 "TARGET_SSE4A"
14013 "insertq\t{%4, %3, %2, %0|%0, %2, %3, %4}"
14014 [(set_attr "type" "sseins")
725fd454 14015 (set_attr "prefix_data16" "0")
10e4d956 14016 (set_attr "prefix_rep" "1")
725fd454 14017 (set_attr "length_immediate" "2")
21efb4d4
HJ
14018 (set_attr "mode" "TI")])
14019
14020(define_insn "sse4a_insertq"
14021 [(set (match_operand:V2DI 0 "register_operand" "=x")
977e83a3
KY
14022 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
14023 (match_operand:V2DI 2 "register_operand" "x")]
14024 UNSPEC_INSERTQ))]
21efb4d4
HJ
14025 "TARGET_SSE4A"
14026 "insertq\t{%2, %0|%0, %2}"
14027 [(set_attr "type" "sseins")
725fd454 14028 (set_attr "prefix_data16" "0")
10e4d956 14029 (set_attr "prefix_rep" "1")
21efb4d4 14030 (set_attr "mode" "TI")])
9a5cee02
L
14031
14032;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
14033;;
14034;; Intel SSE4.1 instructions
14035;;
14036;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
14037
8b994297
AI
14038;; Mapping of immediate bits for blend instructions
14039(define_mode_attr blendbits
14040 [(V8SF "255") (V4SF "15") (V4DF "15") (V2DF "3")])
14041
cbb734aa 14042(define_insn "<sse4_1>_blend<ssemodesuffix><avxsizesuffix>"
45392c76 14043 [(set (match_operand:VF_128_256 0 "register_operand" "=Yr,*x,x")
b86f6e9e 14044 (vec_merge:VF_128_256
45392c76
IE
14045 (match_operand:VF_128_256 2 "nonimmediate_operand" "Yrm,*xm,xm")
14046 (match_operand:VF_128_256 1 "register_operand" "0,0,x")
82e86dc6 14047 (match_operand:SI 3 "const_0_to_<blendbits>_operand")))]
c96b4102 14048 "TARGET_SSE4_1"
5e60198b 14049 "@
45392c76 14050 blend<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
5e60198b
UB
14051 blend<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
14052 vblend<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
45392c76 14053 [(set_attr "isa" "noavx,noavx,avx")
5e60198b 14054 (set_attr "type" "ssemov")
725fd454 14055 (set_attr "length_immediate" "1")
45392c76 14056 (set_attr "prefix_data16" "1,1,*")
5e60198b 14057 (set_attr "prefix_extra" "1")
45392c76 14058 (set_attr "prefix" "orig,orig,vex")
85845bb9 14059 (set_attr "mode" "<MODE>")])
9a5cee02 14060
cbb734aa 14061(define_insn "<sse4_1>_blendv<ssemodesuffix><avxsizesuffix>"
45392c76 14062 [(set (match_operand:VF_128_256 0 "register_operand" "=Yr,*x,x")
b86f6e9e 14063 (unspec:VF_128_256
45392c76
IE
14064 [(match_operand:VF_128_256 1 "register_operand" "0,0,x")
14065 (match_operand:VF_128_256 2 "nonimmediate_operand" "Yrm,*xm,xm")
14066 (match_operand:VF_128_256 3 "register_operand" "Yz,Yz,x")]
85845bb9 14067 UNSPEC_BLENDV))]
9a5cee02 14068 "TARGET_SSE4_1"
5e60198b 14069 "@
45392c76 14070 blendv<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
5e60198b
UB
14071 blendv<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
14072 vblendv<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
45392c76 14073 [(set_attr "isa" "noavx,noavx,avx")
5e60198b
UB
14074 (set_attr "type" "ssemov")
14075 (set_attr "length_immediate" "1")
45392c76 14076 (set_attr "prefix_data16" "1,1,*")
9a5cee02 14077 (set_attr "prefix_extra" "1")
45392c76
IE
14078 (set_attr "prefix" "orig,orig,vex")
14079 (set_attr "btver2_decode" "vector,vector,vector")
85845bb9 14080 (set_attr "mode" "<MODE>")])
9a5cee02 14081
cbb734aa 14082(define_insn "<sse4_1>_dp<ssemodesuffix><avxsizesuffix>"
45392c76 14083 [(set (match_operand:VF_128_256 0 "register_operand" "=Yr,*x,x")
b86f6e9e 14084 (unspec:VF_128_256
45392c76
IE
14085 [(match_operand:VF_128_256 1 "nonimmediate_operand" "%0,0,x")
14086 (match_operand:VF_128_256 2 "nonimmediate_operand" "Yrm,*xm,xm")
14087 (match_operand:SI 3 "const_0_to_255_operand" "n,n,n")]
85845bb9 14088 UNSPEC_DP))]
9a5cee02 14089 "TARGET_SSE4_1"
5e60198b 14090 "@
45392c76 14091 dp<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
5e60198b
UB
14092 dp<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
14093 vdp<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
45392c76 14094 [(set_attr "isa" "noavx,noavx,avx")
5e60198b 14095 (set_attr "type" "ssemul")
725fd454 14096 (set_attr "length_immediate" "1")
45392c76 14097 (set_attr "prefix_data16" "1,1,*")
5e60198b 14098 (set_attr "prefix_extra" "1")
45392c76
IE
14099 (set_attr "prefix" "orig,orig,vex")
14100 (set_attr "btver2_decode" "vector,vector,vector")
85845bb9 14101 (set_attr "mode" "<MODE>")])
9a5cee02 14102
8b994297
AI
14103;; Mode attribute used by `vmovntdqa' pattern
14104(define_mode_attr vi8_sse4_1_avx2_avx512
14105 [(V2DI "sse4_1") (V4DI "avx2") (V8DI "avx512f")])
14106
14107(define_insn "<vi8_sse4_1_avx2_avx512>_movntdqa"
45392c76
IE
14108 [(set (match_operand:VI8_AVX2_AVX512F 0 "register_operand" "=Yr,*x, v")
14109 (unspec:VI8_AVX2_AVX512F [(match_operand:VI8_AVX2_AVX512F 1 "memory_operand" "m, m, m")]
9a5cee02
L
14110 UNSPEC_MOVNTDQA))]
14111 "TARGET_SSE4_1"
95879c72 14112 "%vmovntdqa\t{%1, %0|%0, %1}"
b6837b94 14113 [(set_attr "type" "ssemov")
45392c76
IE
14114 (set_attr "prefix_extra" "1,1,*")
14115 (set_attr "prefix" "maybe_vex,maybe_vex,evex")
977e83a3 14116 (set_attr "mode" "<sseinsnmode>")])
95879c72 14117
977e83a3 14118(define_insn "<sse4_1_avx2>_mpsadbw"
45392c76 14119 [(set (match_operand:VI1_AVX2 0 "register_operand" "=Yr,*x,x")
8861ba4d 14120 (unspec:VI1_AVX2
45392c76
IE
14121 [(match_operand:VI1_AVX2 1 "register_operand" "0,0,x")
14122 (match_operand:VI1_AVX2 2 "nonimmediate_operand" "Yrm,*xm,xm")
14123 (match_operand:SI 3 "const_0_to_255_operand" "n,n,n")]
8861ba4d 14124 UNSPEC_MPSADBW))]
9a5cee02 14125 "TARGET_SSE4_1"
5e60198b 14126 "@
45392c76 14127 mpsadbw\t{%3, %2, %0|%0, %2, %3}
5e60198b
UB
14128 mpsadbw\t{%3, %2, %0|%0, %2, %3}
14129 vmpsadbw\t{%3, %2, %1, %0|%0, %1, %2, %3}"
45392c76 14130 [(set_attr "isa" "noavx,noavx,avx")
5e60198b 14131 (set_attr "type" "sselog1")
725fd454 14132 (set_attr "length_immediate" "1")
725fd454 14133 (set_attr "prefix_extra" "1")
45392c76
IE
14134 (set_attr "prefix" "orig,orig,vex")
14135 (set_attr "btver2_decode" "vector,vector,vector")
977e83a3
KY
14136 (set_attr "mode" "<sseinsnmode>")])
14137
ed3e611e 14138(define_insn "<sse4_1_avx2>_packusdw<mask_name>"
45392c76 14139 [(set (match_operand:VI2_AVX2 0 "register_operand" "=Yr,*x,v")
ed3e611e
AI
14140 (vec_concat:VI2_AVX2
14141 (us_truncate:<ssehalfvecmode>
45392c76 14142 (match_operand:<sseunpackmode> 1 "register_operand" "0,0,v"))
ed3e611e 14143 (us_truncate:<ssehalfvecmode>
45392c76 14144 (match_operand:<sseunpackmode> 2 "nonimmediate_operand" "Yrm,*xm,vm"))))]
ed3e611e 14145 "TARGET_SSE4_1 && <mask_mode512bit_condition> && <mask_avx512bw_condition>"
5e60198b 14146 "@
45392c76 14147 packusdw\t{%2, %0|%0, %2}
5e60198b 14148 packusdw\t{%2, %0|%0, %2}
ed3e611e 14149 vpackusdw\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
45392c76 14150 [(set_attr "isa" "noavx,noavx,avx")
5e60198b 14151 (set_attr "type" "sselog")
725fd454 14152 (set_attr "prefix_extra" "1")
45392c76 14153 (set_attr "prefix" "orig,orig,maybe_evex")
ed3e611e 14154 (set_attr "mode" "<sseinsnmode>")])
95879c72 14155
977e83a3 14156(define_insn "<sse4_1_avx2>_pblendvb"
45392c76 14157 [(set (match_operand:VI1_AVX2 0 "register_operand" "=Yr,*x,x")
977e83a3 14158 (unspec:VI1_AVX2
45392c76
IE
14159 [(match_operand:VI1_AVX2 1 "register_operand" "0,0,x")
14160 (match_operand:VI1_AVX2 2 "nonimmediate_operand" "Yrm,*xm,xm")
14161 (match_operand:VI1_AVX2 3 "register_operand" "Yz,Yz,x")]
5e60198b 14162 UNSPEC_BLENDV))]
9a5cee02 14163 "TARGET_SSE4_1"
5e60198b 14164 "@
45392c76 14165 pblendvb\t{%3, %2, %0|%0, %2, %3}
5e60198b
UB
14166 pblendvb\t{%3, %2, %0|%0, %2, %3}
14167 vpblendvb\t{%3, %2, %1, %0|%0, %1, %2, %3}"
45392c76 14168 [(set_attr "isa" "noavx,noavx,avx")
5e60198b 14169 (set_attr "type" "ssemov")
725fd454 14170 (set_attr "prefix_extra" "1")
45392c76
IE
14171 (set_attr "length_immediate" "*,*,1")
14172 (set_attr "prefix" "orig,orig,vex")
14173 (set_attr "btver2_decode" "vector,vector,vector")
977e83a3 14174 (set_attr "mode" "<sseinsnmode>")])
95879c72 14175
96d86115 14176(define_insn "sse4_1_pblendw"
45392c76 14177 [(set (match_operand:V8HI 0 "register_operand" "=Yr,*x,x")
96d86115 14178 (vec_merge:V8HI
45392c76
IE
14179 (match_operand:V8HI 2 "nonimmediate_operand" "Yrm,*xm,xm")
14180 (match_operand:V8HI 1 "register_operand" "0,0,x")
14181 (match_operand:SI 3 "const_0_to_255_operand" "n,n,n")))]
9a5cee02 14182 "TARGET_SSE4_1"
5e60198b 14183 "@
45392c76 14184 pblendw\t{%3, %2, %0|%0, %2, %3}
5e60198b
UB
14185 pblendw\t{%3, %2, %0|%0, %2, %3}
14186 vpblendw\t{%3, %2, %1, %0|%0, %1, %2, %3}"
45392c76 14187 [(set_attr "isa" "noavx,noavx,avx")
5e60198b 14188 (set_attr "type" "ssemov")
9a5cee02 14189 (set_attr "prefix_extra" "1")
725fd454 14190 (set_attr "length_immediate" "1")
45392c76 14191 (set_attr "prefix" "orig,orig,vex")
96d86115
RH
14192 (set_attr "mode" "TI")])
14193
14194;; The builtin uses an 8-bit immediate. Expand that.
14195(define_expand "avx2_pblendw"
82e86dc6 14196 [(set (match_operand:V16HI 0 "register_operand")
96d86115 14197 (vec_merge:V16HI
82e86dc6
UB
14198 (match_operand:V16HI 2 "nonimmediate_operand")
14199 (match_operand:V16HI 1 "register_operand")
14200 (match_operand:SI 3 "const_0_to_255_operand")))]
96d86115
RH
14201 "TARGET_AVX2"
14202{
14203 HOST_WIDE_INT val = INTVAL (operands[3]) & 0xff;
14204 operands[3] = GEN_INT (val << 8 | val);
14205})
14206
14207(define_insn "*avx2_pblendw"
14208 [(set (match_operand:V16HI 0 "register_operand" "=x")
14209 (vec_merge:V16HI
14210 (match_operand:V16HI 2 "nonimmediate_operand" "xm")
14211 (match_operand:V16HI 1 "register_operand" "x")
14212 (match_operand:SI 3 "avx2_pblendw_operand" "n")))]
524857ec 14213 "TARGET_AVX2"
96d86115
RH
14214{
14215 operands[3] = GEN_INT (INTVAL (operands[3]) & 0xff);
14216 return "vpblendw\t{%3, %2, %1, %0|%0, %1, %2, %3}";
14217}
14218 [(set_attr "type" "ssemov")
14219 (set_attr "prefix_extra" "1")
14220 (set_attr "length_immediate" "1")
14221 (set_attr "prefix" "vex")
14222 (set_attr "mode" "OI")])
977e83a3
KY
14223
14224(define_insn "avx2_pblendd<mode>"
14225 [(set (match_operand:VI4_AVX2 0 "register_operand" "=x")
14226 (vec_merge:VI4_AVX2
14227 (match_operand:VI4_AVX2 2 "nonimmediate_operand" "xm")
14228 (match_operand:VI4_AVX2 1 "register_operand" "x")
14229 (match_operand:SI 3 "const_0_to_255_operand" "n")))]
14230 "TARGET_AVX2"
14231 "vpblendd\t{%3, %2, %1, %0|%0, %1, %2, %3}"
14232 [(set_attr "type" "ssemov")
14233 (set_attr "prefix_extra" "1")
14234 (set_attr "length_immediate" "1")
14235 (set_attr "prefix" "vex")
14236 (set_attr "mode" "<sseinsnmode>")])
9a5cee02
L
14237
14238(define_insn "sse4_1_phminposuw"
45392c76
IE
14239 [(set (match_operand:V8HI 0 "register_operand" "=Yr,*x")
14240 (unspec:V8HI [(match_operand:V8HI 1 "nonimmediate_operand" "Yrm,*xm")]
9a5cee02
L
14241 UNSPEC_PHMINPOSUW))]
14242 "TARGET_SSE4_1"
95879c72 14243 "%vphminposuw\t{%1, %0|%0, %1}"
9a5cee02
L
14244 [(set_attr "type" "sselog1")
14245 (set_attr "prefix_extra" "1")
95879c72 14246 (set_attr "prefix" "maybe_vex")
9a5cee02
L
14247 (set_attr "mode" "TI")])
14248
f95dcc81
AI
14249(define_insn "avx2_<code>v16qiv16hi2<mask_name>"
14250 [(set (match_operand:V16HI 0 "register_operand" "=v")
977e83a3 14251 (any_extend:V16HI
f95dcc81
AI
14252 (match_operand:V16QI 1 "nonimmediate_operand" "vm")))]
14253 "TARGET_AVX2 && <mask_avx512bw_condition> && <mask_avx512vl_condition>"
14254 "vpmov<extsuffix>bw\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
977e83a3
KY
14255 [(set_attr "type" "ssemov")
14256 (set_attr "prefix_extra" "1")
f95dcc81 14257 (set_attr "prefix" "maybe_evex")
977e83a3
KY
14258 (set_attr "mode" "OI")])
14259
f95dcc81
AI
14260(define_insn "avx512bw_<code>v32qiv32hi2<mask_name>"
14261 [(set (match_operand:V32HI 0 "register_operand" "=v")
14262 (any_extend:V32HI
14263 (match_operand:V32QI 1 "nonimmediate_operand" "vm")))]
14264 "TARGET_AVX512BW"
14265 "vpmov<extsuffix>bw\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
14266 [(set_attr "type" "ssemov")
14267 (set_attr "prefix_extra" "1")
14268 (set_attr "prefix" "evex")
14269 (set_attr "mode" "XI")])
14270
14271(define_insn "sse4_1_<code>v8qiv8hi2<mask_name>"
45392c76 14272 [(set (match_operand:V8HI 0 "register_operand" "=Yr,*v")
ee9dd92e 14273 (any_extend:V8HI
9a5cee02 14274 (vec_select:V8QI
45392c76 14275 (match_operand:V16QI 1 "nonimmediate_operand" "Yrm,*vm")
608dccd7
UB
14276 (parallel [(const_int 0) (const_int 1)
14277 (const_int 2) (const_int 3)
14278 (const_int 4) (const_int 5)
14279 (const_int 6) (const_int 7)]))))]
f95dcc81
AI
14280 "TARGET_SSE4_1 && <mask_avx512bw_condition> && <mask_avx512vl_condition>"
14281 "%vpmov<extsuffix>bw\t{%1, %0<mask_operand2>|%0<mask_operand2>, %q1}"
9a5cee02 14282 [(set_attr "type" "ssemov")
f220a4f4 14283 (set_attr "ssememalign" "64")
9a5cee02 14284 (set_attr "prefix_extra" "1")
95879c72 14285 (set_attr "prefix" "maybe_vex")
9a5cee02
L
14286 (set_attr "mode" "TI")])
14287
47490470 14288(define_insn "<mask_codefor>avx512f_<code>v16qiv16si2<mask_name>"
c003c6d6
AI
14289 [(set (match_operand:V16SI 0 "register_operand" "=v")
14290 (any_extend:V16SI
14291 (match_operand:V16QI 1 "nonimmediate_operand" "vm")))]
14292 "TARGET_AVX512F"
47490470 14293 "vpmov<extsuffix>bd\t{%1, %0<mask_operand2>|%0<mask_operand2>, %q1}"
c003c6d6
AI
14294 [(set_attr "type" "ssemov")
14295 (set_attr "prefix" "evex")
14296 (set_attr "mode" "XI")])
14297
f95dcc81
AI
14298(define_insn "avx2_<code>v8qiv8si2<mask_name>"
14299 [(set (match_operand:V8SI 0 "register_operand" "=v")
977e83a3
KY
14300 (any_extend:V8SI
14301 (vec_select:V8QI
f95dcc81 14302 (match_operand:V16QI 1 "nonimmediate_operand" "vm")
608dccd7
UB
14303 (parallel [(const_int 0) (const_int 1)
14304 (const_int 2) (const_int 3)
14305 (const_int 4) (const_int 5)
14306 (const_int 6) (const_int 7)]))))]
f95dcc81
AI
14307 "TARGET_AVX2 && <mask_avx512vl_condition>"
14308 "vpmov<extsuffix>bd\t{%1, %0<mask_operand2>|%0<mask_operand2>, %q1}"
977e83a3
KY
14309 [(set_attr "type" "ssemov")
14310 (set_attr "prefix_extra" "1")
f95dcc81 14311 (set_attr "prefix" "maybe_evex")
977e83a3
KY
14312 (set_attr "mode" "OI")])
14313
f95dcc81 14314(define_insn "sse4_1_<code>v4qiv4si2<mask_name>"
45392c76 14315 [(set (match_operand:V4SI 0 "register_operand" "=Yr,*v")
ee9dd92e 14316 (any_extend:V4SI
9a5cee02 14317 (vec_select:V4QI
45392c76 14318 (match_operand:V16QI 1 "nonimmediate_operand" "Yrm,*vm")
608dccd7
UB
14319 (parallel [(const_int 0) (const_int 1)
14320 (const_int 2) (const_int 3)]))))]
f95dcc81
AI
14321 "TARGET_SSE4_1 && <mask_avx512vl_condition>"
14322 "%vpmov<extsuffix>bd\t{%1, %0<mask_operand2>|%0<mask_operand2>, %k1}"
9a5cee02 14323 [(set_attr "type" "ssemov")
f220a4f4 14324 (set_attr "ssememalign" "32")
9a5cee02 14325 (set_attr "prefix_extra" "1")
95879c72 14326 (set_attr "prefix" "maybe_vex")
9a5cee02
L
14327 (set_attr "mode" "TI")])
14328
47490470 14329(define_insn "avx512f_<code>v16hiv16si2<mask_name>"
c003c6d6
AI
14330 [(set (match_operand:V16SI 0 "register_operand" "=v")
14331 (any_extend:V16SI
14332 (match_operand:V16HI 1 "nonimmediate_operand" "vm")))]
14333 "TARGET_AVX512F"
47490470 14334 "vpmov<extsuffix>wd\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
c003c6d6
AI
14335 [(set_attr "type" "ssemov")
14336 (set_attr "prefix" "evex")
14337 (set_attr "mode" "XI")])
14338
f95dcc81
AI
14339(define_insn "avx2_<code>v8hiv8si2<mask_name>"
14340 [(set (match_operand:V8SI 0 "register_operand" "=v")
977e83a3 14341 (any_extend:V8SI
f95dcc81
AI
14342 (match_operand:V8HI 1 "nonimmediate_operand" "vm")))]
14343 "TARGET_AVX2 && <mask_avx512vl_condition>"
14344 "vpmov<extsuffix>wd\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
977e83a3
KY
14345 [(set_attr "type" "ssemov")
14346 (set_attr "prefix_extra" "1")
f95dcc81 14347 (set_attr "prefix" "maybe_evex")
977e83a3
KY
14348 (set_attr "mode" "OI")])
14349
f95dcc81 14350(define_insn "sse4_1_<code>v4hiv4si2<mask_name>"
45392c76 14351 [(set (match_operand:V4SI 0 "register_operand" "=Yr,*v")
ee9dd92e 14352 (any_extend:V4SI
9a5cee02 14353 (vec_select:V4HI
45392c76 14354 (match_operand:V8HI 1 "nonimmediate_operand" "Yrm,*vm")
608dccd7
UB
14355 (parallel [(const_int 0) (const_int 1)
14356 (const_int 2) (const_int 3)]))))]
f95dcc81
AI
14357 "TARGET_SSE4_1 && <mask_avx512vl_condition>"
14358 "%vpmov<extsuffix>wd\t{%1, %0<mask_operand2>|%0<mask_operand2>, %q1}"
9a5cee02 14359 [(set_attr "type" "ssemov")
f220a4f4 14360 (set_attr "ssememalign" "64")
9a5cee02 14361 (set_attr "prefix_extra" "1")
95879c72 14362 (set_attr "prefix" "maybe_vex")
9a5cee02
L
14363 (set_attr "mode" "TI")])
14364
47490470 14365(define_insn "avx512f_<code>v8qiv8di2<mask_name>"
c003c6d6
AI
14366 [(set (match_operand:V8DI 0 "register_operand" "=v")
14367 (any_extend:V8DI
14368 (vec_select:V8QI
14369 (match_operand:V16QI 1 "nonimmediate_operand" "vm")
14370 (parallel [(const_int 0) (const_int 1)
14371 (const_int 2) (const_int 3)
14372 (const_int 4) (const_int 5)
14373 (const_int 6) (const_int 7)]))))]
14374 "TARGET_AVX512F"
47490470 14375 "vpmov<extsuffix>bq\t{%1, %0<mask_operand2>|%0<mask_operand2>, %k1}"
c003c6d6
AI
14376 [(set_attr "type" "ssemov")
14377 (set_attr "prefix" "evex")
14378 (set_attr "mode" "XI")])
14379
f95dcc81
AI
14380(define_insn "avx2_<code>v4qiv4di2<mask_name>"
14381 [(set (match_operand:V4DI 0 "register_operand" "=v")
977e83a3
KY
14382 (any_extend:V4DI
14383 (vec_select:V4QI
f95dcc81 14384 (match_operand:V16QI 1 "nonimmediate_operand" "vm")
608dccd7
UB
14385 (parallel [(const_int 0) (const_int 1)
14386 (const_int 2) (const_int 3)]))))]
f95dcc81
AI
14387 "TARGET_AVX2 && <mask_avx512vl_condition>"
14388 "vpmov<extsuffix>bq\t{%1, %0<mask_operand2>|%0<mask_operand2>, %k1}"
977e83a3
KY
14389 [(set_attr "type" "ssemov")
14390 (set_attr "prefix_extra" "1")
f95dcc81 14391 (set_attr "prefix" "maybe_evex")
977e83a3
KY
14392 (set_attr "mode" "OI")])
14393
f95dcc81 14394(define_insn "sse4_1_<code>v2qiv2di2<mask_name>"
45392c76 14395 [(set (match_operand:V2DI 0 "register_operand" "=Yr,*v")
ee9dd92e 14396 (any_extend:V2DI
9a5cee02 14397 (vec_select:V2QI
45392c76 14398 (match_operand:V16QI 1 "nonimmediate_operand" "Yrm,*vm")
608dccd7 14399 (parallel [(const_int 0) (const_int 1)]))))]
f95dcc81
AI
14400 "TARGET_SSE4_1 && <mask_avx512vl_condition>"
14401 "%vpmov<extsuffix>bq\t{%1, %0<mask_operand2>|%0<mask_operand2>, %w1}"
9a5cee02 14402 [(set_attr "type" "ssemov")
f220a4f4 14403 (set_attr "ssememalign" "16")
9a5cee02 14404 (set_attr "prefix_extra" "1")
95879c72 14405 (set_attr "prefix" "maybe_vex")
9a5cee02
L
14406 (set_attr "mode" "TI")])
14407
47490470 14408(define_insn "avx512f_<code>v8hiv8di2<mask_name>"
c003c6d6
AI
14409 [(set (match_operand:V8DI 0 "register_operand" "=v")
14410 (any_extend:V8DI
14411 (match_operand:V8HI 1 "nonimmediate_operand" "vm")))]
14412 "TARGET_AVX512F"
47490470 14413 "vpmov<extsuffix>wq\t{%1, %0<mask_operand2>|%0<mask_operand2>, %q1}"
c003c6d6
AI
14414 [(set_attr "type" "ssemov")
14415 (set_attr "prefix" "evex")
14416 (set_attr "mode" "XI")])
14417
f95dcc81
AI
14418(define_insn "avx2_<code>v4hiv4di2<mask_name>"
14419 [(set (match_operand:V4DI 0 "register_operand" "=v")
977e83a3
KY
14420 (any_extend:V4DI
14421 (vec_select:V4HI
f95dcc81 14422 (match_operand:V8HI 1 "nonimmediate_operand" "vm")
608dccd7
UB
14423 (parallel [(const_int 0) (const_int 1)
14424 (const_int 2) (const_int 3)]))))]
f95dcc81
AI
14425 "TARGET_AVX2 && <mask_avx512vl_condition>"
14426 "vpmov<extsuffix>wq\t{%1, %0<mask_operand2>|%0<mask_operand2>, %q1}"
977e83a3
KY
14427 [(set_attr "type" "ssemov")
14428 (set_attr "prefix_extra" "1")
f95dcc81 14429 (set_attr "prefix" "maybe_evex")
977e83a3
KY
14430 (set_attr "mode" "OI")])
14431
f95dcc81 14432(define_insn "sse4_1_<code>v2hiv2di2<mask_name>"
45392c76 14433 [(set (match_operand:V2DI 0 "register_operand" "=Yr,*v")
ee9dd92e 14434 (any_extend:V2DI
9a5cee02 14435 (vec_select:V2HI
45392c76 14436 (match_operand:V8HI 1 "nonimmediate_operand" "Yrm,*vm")
608dccd7 14437 (parallel [(const_int 0) (const_int 1)]))))]
f95dcc81
AI
14438 "TARGET_SSE4_1 && <mask_avx512vl_condition>"
14439 "%vpmov<extsuffix>wq\t{%1, %0<mask_operand2>|%0<mask_operand2>, %k1}"
9a5cee02 14440 [(set_attr "type" "ssemov")
f220a4f4 14441 (set_attr "ssememalign" "32")
9a5cee02 14442 (set_attr "prefix_extra" "1")
95879c72 14443 (set_attr "prefix" "maybe_vex")
9a5cee02
L
14444 (set_attr "mode" "TI")])
14445
47490470 14446(define_insn "avx512f_<code>v8siv8di2<mask_name>"
c003c6d6
AI
14447 [(set (match_operand:V8DI 0 "register_operand" "=v")
14448 (any_extend:V8DI
14449 (match_operand:V8SI 1 "nonimmediate_operand" "vm")))]
14450 "TARGET_AVX512F"
47490470 14451 "vpmov<extsuffix>dq\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
c003c6d6
AI
14452 [(set_attr "type" "ssemov")
14453 (set_attr "prefix" "evex")
14454 (set_attr "mode" "XI")])
14455
f95dcc81
AI
14456(define_insn "avx2_<code>v4siv4di2<mask_name>"
14457 [(set (match_operand:V4DI 0 "register_operand" "=v")
977e83a3 14458 (any_extend:V4DI
f95dcc81
AI
14459 (match_operand:V4SI 1 "nonimmediate_operand" "vm")))]
14460 "TARGET_AVX2 && <mask_avx512vl_condition>"
14461 "vpmov<extsuffix>dq\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
977e83a3 14462 [(set_attr "type" "ssemov")
f95dcc81 14463 (set_attr "prefix" "maybe_evex")
977e83a3
KY
14464 (set_attr "prefix_extra" "1")
14465 (set_attr "mode" "OI")])
14466
f95dcc81 14467(define_insn "sse4_1_<code>v2siv2di2<mask_name>"
45392c76 14468 [(set (match_operand:V2DI 0 "register_operand" "=Yr,*v")
ee9dd92e 14469 (any_extend:V2DI
9a5cee02 14470 (vec_select:V2SI
45392c76 14471 (match_operand:V4SI 1 "nonimmediate_operand" "Yrm,*vm")
608dccd7 14472 (parallel [(const_int 0) (const_int 1)]))))]
f95dcc81
AI
14473 "TARGET_SSE4_1 && <mask_avx512vl_condition>"
14474 "%vpmov<extsuffix>dq\t{%1, %0<mask_operand2>|%0<mask_operand2>, %q1}"
9a5cee02 14475 [(set_attr "type" "ssemov")
f220a4f4 14476 (set_attr "ssememalign" "64")
9a5cee02 14477 (set_attr "prefix_extra" "1")
95879c72 14478 (set_attr "prefix" "maybe_vex")
9a5cee02
L
14479 (set_attr "mode" "TI")])
14480
95879c72
L
14481;; ptestps/ptestpd are very similar to comiss and ucomiss when
14482;; setting FLAGS_REG. But it is not a really compare instruction.
cbb734aa 14483(define_insn "avx_vtest<ssemodesuffix><avxsizesuffix>"
95879c72 14484 [(set (reg:CC FLAGS_REG)
b86f6e9e
AI
14485 (unspec:CC [(match_operand:VF_128_256 0 "register_operand" "x")
14486 (match_operand:VF_128_256 1 "nonimmediate_operand" "xm")]
95879c72
L
14487 UNSPEC_VTESTP))]
14488 "TARGET_AVX"
1c154a23 14489 "vtest<ssemodesuffix>\t{%1, %0|%0, %1}"
95879c72 14490 [(set_attr "type" "ssecomi")
725fd454 14491 (set_attr "prefix_extra" "1")
95879c72
L
14492 (set_attr "prefix" "vex")
14493 (set_attr "mode" "<MODE>")])
14494
9a5cee02
L
14495;; ptest is very similar to comiss and ucomiss when setting FLAGS_REG.
14496;; But it is not a really compare instruction.
95879c72
L
14497(define_insn "avx_ptest256"
14498 [(set (reg:CC FLAGS_REG)
14499 (unspec:CC [(match_operand:V4DI 0 "register_operand" "x")
14500 (match_operand:V4DI 1 "nonimmediate_operand" "xm")]
14501 UNSPEC_PTEST))]
14502 "TARGET_AVX"
14503 "vptest\t{%1, %0|%0, %1}"
14504 [(set_attr "type" "ssecomi")
725fd454 14505 (set_attr "prefix_extra" "1")
95879c72 14506 (set_attr "prefix" "vex")
01284895 14507 (set_attr "btver2_decode" "vector")
95879c72
L
14508 (set_attr "mode" "OI")])
14509
9a5cee02
L
14510(define_insn "sse4_1_ptest"
14511 [(set (reg:CC FLAGS_REG)
45392c76
IE
14512 (unspec:CC [(match_operand:V2DI 0 "register_operand" "Yr,*x")
14513 (match_operand:V2DI 1 "nonimmediate_operand" "Yrm,*xm")]
9a5cee02
L
14514 UNSPEC_PTEST))]
14515 "TARGET_SSE4_1"
95879c72 14516 "%vptest\t{%1, %0|%0, %1}"
9a5cee02
L
14517 [(set_attr "type" "ssecomi")
14518 (set_attr "prefix_extra" "1")
95879c72 14519 (set_attr "prefix" "maybe_vex")
9a5cee02
L
14520 (set_attr "mode" "TI")])
14521
cbb734aa 14522(define_insn "<sse4_1>_round<ssemodesuffix><avxsizesuffix>"
45392c76 14523 [(set (match_operand:VF_128_256 0 "register_operand" "=Yr,*x")
b86f6e9e 14524 (unspec:VF_128_256
45392c76
IE
14525 [(match_operand:VF_128_256 1 "nonimmediate_operand" "Yrm,*xm")
14526 (match_operand:SI 2 "const_0_to_15_operand" "n,n")]
85845bb9 14527 UNSPEC_ROUND))]
04e1d06b 14528 "TARGET_ROUND"
1c154a23 14529 "%vround<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
9a5cee02 14530 [(set_attr "type" "ssecvt")
5e60198b
UB
14531 (set (attr "prefix_data16")
14532 (if_then_else
67b2c493 14533 (match_test "TARGET_AVX")
5e60198b
UB
14534 (const_string "*")
14535 (const_string "1")))
9a5cee02 14536 (set_attr "prefix_extra" "1")
725fd454 14537 (set_attr "length_immediate" "1")
95879c72
L
14538 (set_attr "prefix" "maybe_vex")
14539 (set_attr "mode" "<MODE>")])
14540
eab880cf 14541(define_expand "<sse4_1>_round<ssemodesuffix>_sfix<avxsizesuffix>"
82e86dc6 14542 [(match_operand:<sseintvecmode> 0 "register_operand")
a9ccbba2 14543 (match_operand:VF1_128_256 1 "nonimmediate_operand")
82e86dc6 14544 (match_operand:SI 2 "const_0_to_15_operand")]
eab880cf
UB
14545 "TARGET_ROUND"
14546{
14547 rtx tmp = gen_reg_rtx (<MODE>mode);
14548
14549 emit_insn
14550 (gen_<sse4_1>_round<ssemodesuffix><avxsizesuffix> (tmp, operands[1],
14551 operands[2]));
14552 emit_insn
14553 (gen_fix_trunc<mode><sseintvecmodelower>2 (operands[0], tmp));
14554 DONE;
14555})
14556
ec5e777c
AI
14557(define_expand "avx512f_roundpd512"
14558 [(match_operand:V8DF 0 "register_operand")
14559 (match_operand:V8DF 1 "nonimmediate_operand")
14560 (match_operand:SI 2 "const_0_to_15_operand")]
14561 "TARGET_AVX512F"
14562{
14563 emit_insn (gen_avx512f_rndscalev8df (operands[0], operands[1], operands[2]));
14564 DONE;
14565})
14566
eab880cf 14567(define_expand "<sse4_1>_round<ssemodesuffix>_vec_pack_sfix<avxsizesuffix>"
82e86dc6
UB
14568 [(match_operand:<ssepackfltmode> 0 "register_operand")
14569 (match_operand:VF2 1 "nonimmediate_operand")
14570 (match_operand:VF2 2 "nonimmediate_operand")
14571 (match_operand:SI 3 "const_0_to_15_operand")]
eab880cf
UB
14572 "TARGET_ROUND"
14573{
14574 rtx tmp0, tmp1;
14575
c05e32f5 14576 if (<MODE>mode == V2DFmode
a1aff58f 14577 && TARGET_AVX && !TARGET_PREFER_AVX128 && optimize_insn_for_speed_p ())
c05e32f5
UB
14578 {
14579 rtx tmp2 = gen_reg_rtx (V4DFmode);
eab880cf 14580
c05e32f5
UB
14581 tmp0 = gen_reg_rtx (V4DFmode);
14582 tmp1 = force_reg (V2DFmode, operands[1]);
14583
14584 emit_insn (gen_avx_vec_concatv4df (tmp0, tmp1, operands[2]));
14585 emit_insn (gen_avx_roundpd256 (tmp2, tmp0, operands[3]));
14586 emit_insn (gen_fix_truncv4dfv4si2 (operands[0], tmp2));
14587 }
14588 else
14589 {
14590 tmp0 = gen_reg_rtx (<MODE>mode);
14591 tmp1 = gen_reg_rtx (<MODE>mode);
14592
14593 emit_insn
14594 (gen_<sse4_1>_round<ssemodesuffix><avxsizesuffix> (tmp0, operands[1],
14595 operands[3]));
14596 emit_insn
14597 (gen_<sse4_1>_round<ssemodesuffix><avxsizesuffix> (tmp1, operands[2],
14598 operands[3]));
14599 emit_insn
14600 (gen_vec_pack_sfix_trunc_<mode> (operands[0], tmp0, tmp1));
14601 }
eab880cf
UB
14602 DONE;
14603})
14604
1c154a23 14605(define_insn "sse4_1_round<ssescalarmodesuffix>"
45392c76 14606 [(set (match_operand:VF_128 0 "register_operand" "=Yr,*x,x")
5e60198b
UB
14607 (vec_merge:VF_128
14608 (unspec:VF_128
45392c76
IE
14609 [(match_operand:VF_128 2 "register_operand" "Yr,*x,x")
14610 (match_operand:SI 3 "const_0_to_15_operand" "n,n,n")]
85845bb9 14611 UNSPEC_ROUND)
45392c76 14612 (match_operand:VF_128 1 "register_operand" "0,0,x")
9a5cee02 14613 (const_int 1)))]
04e1d06b 14614 "TARGET_ROUND"
5e60198b 14615 "@
45392c76 14616 round<ssescalarmodesuffix>\t{%3, %2, %0|%0, %2, %3}
5e60198b
UB
14617 round<ssescalarmodesuffix>\t{%3, %2, %0|%0, %2, %3}
14618 vround<ssescalarmodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
45392c76 14619 [(set_attr "isa" "noavx,noavx,avx")
5e60198b 14620 (set_attr "type" "ssecvt")
725fd454 14621 (set_attr "length_immediate" "1")
45392c76 14622 (set_attr "prefix_data16" "1,1,*")
5e60198b 14623 (set_attr "prefix_extra" "1")
45392c76 14624 (set_attr "prefix" "orig,orig,vex")
85845bb9 14625 (set_attr "mode" "<MODE>")])
06f4e35d 14626
bbeb5beb
UB
14627(define_expand "round<mode>2"
14628 [(set (match_dup 4)
14629 (plus:VF
82e86dc6 14630 (match_operand:VF 1 "register_operand")
bbeb5beb 14631 (match_dup 3)))
82e86dc6 14632 (set (match_operand:VF 0 "register_operand")
bbeb5beb
UB
14633 (unspec:VF
14634 [(match_dup 4) (match_dup 5)]
14635 UNSPEC_ROUND))]
14636 "TARGET_ROUND && !flag_trapping_math"
14637{
ef4bddc2 14638 machine_mode scalar_mode;
bbeb5beb
UB
14639 const struct real_format *fmt;
14640 REAL_VALUE_TYPE pred_half, half_minus_pred_half;
14641 rtx half, vec_half;
14642
14643 scalar_mode = GET_MODE_INNER (<MODE>mode);
14644
14645 /* load nextafter (0.5, 0.0) */
14646 fmt = REAL_MODE_FORMAT (scalar_mode);
14647 real_2expN (&half_minus_pred_half, -(fmt->p) - 1, scalar_mode);
14648 REAL_ARITHMETIC (pred_half, MINUS_EXPR, dconsthalf, half_minus_pred_half);
14649 half = const_double_from_real_value (pred_half, scalar_mode);
14650
14651 vec_half = ix86_build_const_vector (<MODE>mode, true, half);
14652 vec_half = force_reg (<MODE>mode, vec_half);
14653
14654 operands[3] = gen_reg_rtx (<MODE>mode);
14655 emit_insn (gen_copysign<mode>3 (operands[3], vec_half, operands[1]));
14656
14657 operands[4] = gen_reg_rtx (<MODE>mode);
14658 operands[5] = GEN_INT (ROUND_TRUNC);
14659})
14660
eab880cf 14661(define_expand "round<mode>2_sfix"
82e86dc6 14662 [(match_operand:<sseintvecmode> 0 "register_operand")
a9ccbba2 14663 (match_operand:VF1_128_256 1 "register_operand")]
eab880cf
UB
14664 "TARGET_ROUND && !flag_trapping_math"
14665{
14666 rtx tmp = gen_reg_rtx (<MODE>mode);
14667
14668 emit_insn (gen_round<mode>2 (tmp, operands[1]));
14669
14670 emit_insn
14671 (gen_fix_trunc<mode><sseintvecmodelower>2 (operands[0], tmp));
14672 DONE;
14673})
14674
14675(define_expand "round<mode>2_vec_pack_sfix"
82e86dc6
UB
14676 [(match_operand:<ssepackfltmode> 0 "register_operand")
14677 (match_operand:VF2 1 "register_operand")
14678 (match_operand:VF2 2 "register_operand")]
eab880cf
UB
14679 "TARGET_ROUND && !flag_trapping_math"
14680{
14681 rtx tmp0, tmp1;
14682
c05e32f5 14683 if (<MODE>mode == V2DFmode
a1aff58f 14684 && TARGET_AVX && !TARGET_PREFER_AVX128 && optimize_insn_for_speed_p ())
c05e32f5
UB
14685 {
14686 rtx tmp2 = gen_reg_rtx (V4DFmode);
eab880cf 14687
c05e32f5
UB
14688 tmp0 = gen_reg_rtx (V4DFmode);
14689 tmp1 = force_reg (V2DFmode, operands[1]);
eab880cf 14690
c05e32f5
UB
14691 emit_insn (gen_avx_vec_concatv4df (tmp0, tmp1, operands[2]));
14692 emit_insn (gen_roundv4df2 (tmp2, tmp0));
14693 emit_insn (gen_fix_truncv4dfv4si2 (operands[0], tmp2));
14694 }
14695 else
14696 {
14697 tmp0 = gen_reg_rtx (<MODE>mode);
14698 tmp1 = gen_reg_rtx (<MODE>mode);
14699
14700 emit_insn (gen_round<mode>2 (tmp0, operands[1]));
14701 emit_insn (gen_round<mode>2 (tmp1, operands[2]));
14702
14703 emit_insn
14704 (gen_vec_pack_sfix_trunc_<mode> (operands[0], tmp0, tmp1));
14705 }
eab880cf
UB
14706 DONE;
14707})
14708
06f4e35d
L
14709;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
14710;;
14711;; Intel SSE4.2 string/text processing instructions
14712;;
14713;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
14714
14715(define_insn_and_split "sse4_2_pcmpestr"
14716 [(set (match_operand:SI 0 "register_operand" "=c,c")
14717 (unspec:SI
305b3c9b 14718 [(match_operand:V16QI 2 "register_operand" "x,x")
06f4e35d 14719 (match_operand:SI 3 "register_operand" "a,a")
305b3c9b 14720 (match_operand:V16QI 4 "nonimmediate_operand" "x,m")
06f4e35d
L
14721 (match_operand:SI 5 "register_operand" "d,d")
14722 (match_operand:SI 6 "const_0_to_255_operand" "n,n")]
14723 UNSPEC_PCMPESTR))
e2520c41 14724 (set (match_operand:V16QI 1 "register_operand" "=Yz,Yz")
06f4e35d
L
14725 (unspec:V16QI
14726 [(match_dup 2)
14727 (match_dup 3)
14728 (match_dup 4)
14729 (match_dup 5)
14730 (match_dup 6)]
14731 UNSPEC_PCMPESTR))
14732 (set (reg:CC FLAGS_REG)
14733 (unspec:CC
14734 [(match_dup 2)
14735 (match_dup 3)
14736 (match_dup 4)
14737 (match_dup 5)
14738 (match_dup 6)]
14739 UNSPEC_PCMPESTR))]
14740 "TARGET_SSE4_2
5071eab7 14741 && can_create_pseudo_p ()"
06f4e35d
L
14742 "#"
14743 "&& 1"
14744 [(const_int 0)]
14745{
14746 int ecx = !find_regno_note (curr_insn, REG_UNUSED, REGNO (operands[0]));
14747 int xmm0 = !find_regno_note (curr_insn, REG_UNUSED, REGNO (operands[1]));
14748 int flags = !find_regno_note (curr_insn, REG_UNUSED, FLAGS_REG);
14749
14750 if (ecx)
14751 emit_insn (gen_sse4_2_pcmpestri (operands[0], operands[2],
14752 operands[3], operands[4],
14753 operands[5], operands[6]));
14754 if (xmm0)
14755 emit_insn (gen_sse4_2_pcmpestrm (operands[1], operands[2],
14756 operands[3], operands[4],
14757 operands[5], operands[6]));
14758 if (flags && !(ecx || xmm0))
627eb745
UB
14759 emit_insn (gen_sse4_2_pcmpestr_cconly (NULL, NULL,
14760 operands[2], operands[3],
06f4e35d
L
14761 operands[4], operands[5],
14762 operands[6]));
b807694e
UB
14763 if (!(flags || ecx || xmm0))
14764 emit_note (NOTE_INSN_DELETED);
14765
06f4e35d
L
14766 DONE;
14767}
14768 [(set_attr "type" "sselog")
14769 (set_attr "prefix_data16" "1")
14770 (set_attr "prefix_extra" "1")
f220a4f4 14771 (set_attr "ssememalign" "8")
725fd454 14772 (set_attr "length_immediate" "1")
06f4e35d
L
14773 (set_attr "memory" "none,load")
14774 (set_attr "mode" "TI")])
14775
b86da593
UB
14776(define_insn_and_split "*sse4_2_pcmpestr_unaligned"
14777 [(set (match_operand:SI 0 "register_operand" "=c")
14778 (unspec:SI
305b3c9b 14779 [(match_operand:V16QI 2 "register_operand" "x")
b86da593
UB
14780 (match_operand:SI 3 "register_operand" "a")
14781 (unspec:V16QI
14782 [(match_operand:V16QI 4 "memory_operand" "m")]
860f5e77 14783 UNSPEC_LOADU)
b86da593
UB
14784 (match_operand:SI 5 "register_operand" "d")
14785 (match_operand:SI 6 "const_0_to_255_operand" "n")]
14786 UNSPEC_PCMPESTR))
14787 (set (match_operand:V16QI 1 "register_operand" "=Yz")
14788 (unspec:V16QI
14789 [(match_dup 2)
14790 (match_dup 3)
860f5e77 14791 (unspec:V16QI [(match_dup 4)] UNSPEC_LOADU)
b86da593
UB
14792 (match_dup 5)
14793 (match_dup 6)]
14794 UNSPEC_PCMPESTR))
14795 (set (reg:CC FLAGS_REG)
14796 (unspec:CC
14797 [(match_dup 2)
14798 (match_dup 3)
860f5e77 14799 (unspec:V16QI [(match_dup 4)] UNSPEC_LOADU)
b86da593
UB
14800 (match_dup 5)
14801 (match_dup 6)]
14802 UNSPEC_PCMPESTR))]
14803 "TARGET_SSE4_2
14804 && can_create_pseudo_p ()"
14805 "#"
14806 "&& 1"
14807 [(const_int 0)]
14808{
14809 int ecx = !find_regno_note (curr_insn, REG_UNUSED, REGNO (operands[0]));
14810 int xmm0 = !find_regno_note (curr_insn, REG_UNUSED, REGNO (operands[1]));
14811 int flags = !find_regno_note (curr_insn, REG_UNUSED, FLAGS_REG);
14812
14813 if (ecx)
14814 emit_insn (gen_sse4_2_pcmpestri (operands[0], operands[2],
14815 operands[3], operands[4],
14816 operands[5], operands[6]));
14817 if (xmm0)
14818 emit_insn (gen_sse4_2_pcmpestrm (operands[1], operands[2],
14819 operands[3], operands[4],
14820 operands[5], operands[6]));
14821 if (flags && !(ecx || xmm0))
14822 emit_insn (gen_sse4_2_pcmpestr_cconly (NULL, NULL,
14823 operands[2], operands[3],
14824 operands[4], operands[5],
14825 operands[6]));
14826 if (!(flags || ecx || xmm0))
14827 emit_note (NOTE_INSN_DELETED);
14828
14829 DONE;
14830}
14831 [(set_attr "type" "sselog")
14832 (set_attr "prefix_data16" "1")
14833 (set_attr "prefix_extra" "1")
f220a4f4 14834 (set_attr "ssememalign" "8")
b86da593
UB
14835 (set_attr "length_immediate" "1")
14836 (set_attr "memory" "load")
14837 (set_attr "mode" "TI")])
14838
06f4e35d
L
14839(define_insn "sse4_2_pcmpestri"
14840 [(set (match_operand:SI 0 "register_operand" "=c,c")
14841 (unspec:SI
14842 [(match_operand:V16QI 1 "register_operand" "x,x")
14843 (match_operand:SI 2 "register_operand" "a,a")
14844 (match_operand:V16QI 3 "nonimmediate_operand" "x,m")
14845 (match_operand:SI 4 "register_operand" "d,d")
14846 (match_operand:SI 5 "const_0_to_255_operand" "n,n")]
14847 UNSPEC_PCMPESTR))
14848 (set (reg:CC FLAGS_REG)
14849 (unspec:CC
14850 [(match_dup 1)
14851 (match_dup 2)
14852 (match_dup 3)
14853 (match_dup 4)
14854 (match_dup 5)]
14855 UNSPEC_PCMPESTR))]
14856 "TARGET_SSE4_2"
95879c72 14857 "%vpcmpestri\t{%5, %3, %1|%1, %3, %5}"
06f4e35d
L
14858 [(set_attr "type" "sselog")
14859 (set_attr "prefix_data16" "1")
14860 (set_attr "prefix_extra" "1")
95879c72 14861 (set_attr "prefix" "maybe_vex")
f220a4f4 14862 (set_attr "ssememalign" "8")
725fd454 14863 (set_attr "length_immediate" "1")
01284895 14864 (set_attr "btver2_decode" "vector")
06f4e35d
L
14865 (set_attr "memory" "none,load")
14866 (set_attr "mode" "TI")])
14867
14868(define_insn "sse4_2_pcmpestrm"
e2520c41 14869 [(set (match_operand:V16QI 0 "register_operand" "=Yz,Yz")
06f4e35d
L
14870 (unspec:V16QI
14871 [(match_operand:V16QI 1 "register_operand" "x,x")
14872 (match_operand:SI 2 "register_operand" "a,a")
14873 (match_operand:V16QI 3 "nonimmediate_operand" "x,m")
14874 (match_operand:SI 4 "register_operand" "d,d")
14875 (match_operand:SI 5 "const_0_to_255_operand" "n,n")]
14876 UNSPEC_PCMPESTR))
14877 (set (reg:CC FLAGS_REG)
14878 (unspec:CC
14879 [(match_dup 1)
14880 (match_dup 2)
14881 (match_dup 3)
14882 (match_dup 4)
14883 (match_dup 5)]
14884 UNSPEC_PCMPESTR))]
14885 "TARGET_SSE4_2"
95879c72 14886 "%vpcmpestrm\t{%5, %3, %1|%1, %3, %5}"
06f4e35d
L
14887 [(set_attr "type" "sselog")
14888 (set_attr "prefix_data16" "1")
14889 (set_attr "prefix_extra" "1")
f220a4f4 14890 (set_attr "ssememalign" "8")
725fd454 14891 (set_attr "length_immediate" "1")
95879c72 14892 (set_attr "prefix" "maybe_vex")
01284895 14893 (set_attr "btver2_decode" "vector")
06f4e35d
L
14894 (set_attr "memory" "none,load")
14895 (set_attr "mode" "TI")])
14896
14897(define_insn "sse4_2_pcmpestr_cconly"
14898 [(set (reg:CC FLAGS_REG)
14899 (unspec:CC
627eb745
UB
14900 [(match_operand:V16QI 2 "register_operand" "x,x,x,x")
14901 (match_operand:SI 3 "register_operand" "a,a,a,a")
14902 (match_operand:V16QI 4 "nonimmediate_operand" "x,m,x,m")
14903 (match_operand:SI 5 "register_operand" "d,d,d,d")
14904 (match_operand:SI 6 "const_0_to_255_operand" "n,n,n,n")]
06f4e35d 14905 UNSPEC_PCMPESTR))
627eb745
UB
14906 (clobber (match_scratch:V16QI 0 "=Yz,Yz,X,X"))
14907 (clobber (match_scratch:SI 1 "= X, X,c,c"))]
06f4e35d
L
14908 "TARGET_SSE4_2"
14909 "@
95879c72
L
14910 %vpcmpestrm\t{%6, %4, %2|%2, %4, %6}
14911 %vpcmpestrm\t{%6, %4, %2|%2, %4, %6}
14912 %vpcmpestri\t{%6, %4, %2|%2, %4, %6}
14913 %vpcmpestri\t{%6, %4, %2|%2, %4, %6}"
06f4e35d
L
14914 [(set_attr "type" "sselog")
14915 (set_attr "prefix_data16" "1")
14916 (set_attr "prefix_extra" "1")
f220a4f4 14917 (set_attr "ssememalign" "8")
725fd454 14918 (set_attr "length_immediate" "1")
06f4e35d 14919 (set_attr "memory" "none,load,none,load")
01284895 14920 (set_attr "btver2_decode" "vector,vector,vector,vector")
95879c72 14921 (set_attr "prefix" "maybe_vex")
06f4e35d
L
14922 (set_attr "mode" "TI")])
14923
14924(define_insn_and_split "sse4_2_pcmpistr"
14925 [(set (match_operand:SI 0 "register_operand" "=c,c")
14926 (unspec:SI
305b3c9b
UB
14927 [(match_operand:V16QI 2 "register_operand" "x,x")
14928 (match_operand:V16QI 3 "nonimmediate_operand" "x,m")
06f4e35d
L
14929 (match_operand:SI 4 "const_0_to_255_operand" "n,n")]
14930 UNSPEC_PCMPISTR))
e2520c41 14931 (set (match_operand:V16QI 1 "register_operand" "=Yz,Yz")
06f4e35d
L
14932 (unspec:V16QI
14933 [(match_dup 2)
14934 (match_dup 3)
14935 (match_dup 4)]
14936 UNSPEC_PCMPISTR))
14937 (set (reg:CC FLAGS_REG)
14938 (unspec:CC
14939 [(match_dup 2)
14940 (match_dup 3)
14941 (match_dup 4)]
14942 UNSPEC_PCMPISTR))]
14943 "TARGET_SSE4_2
5071eab7 14944 && can_create_pseudo_p ()"
06f4e35d
L
14945 "#"
14946 "&& 1"
14947 [(const_int 0)]
14948{
14949 int ecx = !find_regno_note (curr_insn, REG_UNUSED, REGNO (operands[0]));
14950 int xmm0 = !find_regno_note (curr_insn, REG_UNUSED, REGNO (operands[1]));
14951 int flags = !find_regno_note (curr_insn, REG_UNUSED, FLAGS_REG);
14952
14953 if (ecx)
14954 emit_insn (gen_sse4_2_pcmpistri (operands[0], operands[2],
14955 operands[3], operands[4]));
14956 if (xmm0)
14957 emit_insn (gen_sse4_2_pcmpistrm (operands[1], operands[2],
14958 operands[3], operands[4]));
14959 if (flags && !(ecx || xmm0))
627eb745
UB
14960 emit_insn (gen_sse4_2_pcmpistr_cconly (NULL, NULL,
14961 operands[2], operands[3],
06f4e35d 14962 operands[4]));
b807694e
UB
14963 if (!(flags || ecx || xmm0))
14964 emit_note (NOTE_INSN_DELETED);
14965
06f4e35d
L
14966 DONE;
14967}
14968 [(set_attr "type" "sselog")
14969 (set_attr "prefix_data16" "1")
14970 (set_attr "prefix_extra" "1")
f220a4f4 14971 (set_attr "ssememalign" "8")
725fd454 14972 (set_attr "length_immediate" "1")
06f4e35d
L
14973 (set_attr "memory" "none,load")
14974 (set_attr "mode" "TI")])
14975
b86da593
UB
14976(define_insn_and_split "*sse4_2_pcmpistr_unaligned"
14977 [(set (match_operand:SI 0 "register_operand" "=c")
14978 (unspec:SI
305b3c9b 14979 [(match_operand:V16QI 2 "register_operand" "x")
b86da593
UB
14980 (unspec:V16QI
14981 [(match_operand:V16QI 3 "memory_operand" "m")]
860f5e77 14982 UNSPEC_LOADU)
b86da593
UB
14983 (match_operand:SI 4 "const_0_to_255_operand" "n")]
14984 UNSPEC_PCMPISTR))
14985 (set (match_operand:V16QI 1 "register_operand" "=Yz")
14986 (unspec:V16QI
14987 [(match_dup 2)
860f5e77 14988 (unspec:V16QI [(match_dup 3)] UNSPEC_LOADU)
b86da593
UB
14989 (match_dup 4)]
14990 UNSPEC_PCMPISTR))
14991 (set (reg:CC FLAGS_REG)
14992 (unspec:CC
14993 [(match_dup 2)
860f5e77 14994 (unspec:V16QI [(match_dup 3)] UNSPEC_LOADU)
b86da593
UB
14995 (match_dup 4)]
14996 UNSPEC_PCMPISTR))]
14997 "TARGET_SSE4_2
14998 && can_create_pseudo_p ()"
14999 "#"
15000 "&& 1"
15001 [(const_int 0)]
15002{
15003 int ecx = !find_regno_note (curr_insn, REG_UNUSED, REGNO (operands[0]));
15004 int xmm0 = !find_regno_note (curr_insn, REG_UNUSED, REGNO (operands[1]));
15005 int flags = !find_regno_note (curr_insn, REG_UNUSED, FLAGS_REG);
15006
15007 if (ecx)
15008 emit_insn (gen_sse4_2_pcmpistri (operands[0], operands[2],
15009 operands[3], operands[4]));
15010 if (xmm0)
15011 emit_insn (gen_sse4_2_pcmpistrm (operands[1], operands[2],
15012 operands[3], operands[4]));
15013 if (flags && !(ecx || xmm0))
15014 emit_insn (gen_sse4_2_pcmpistr_cconly (NULL, NULL,
15015 operands[2], operands[3],
15016 operands[4]));
15017 if (!(flags || ecx || xmm0))
15018 emit_note (NOTE_INSN_DELETED);
15019
15020 DONE;
15021}
15022 [(set_attr "type" "sselog")
15023 (set_attr "prefix_data16" "1")
15024 (set_attr "prefix_extra" "1")
f220a4f4 15025 (set_attr "ssememalign" "8")
b86da593
UB
15026 (set_attr "length_immediate" "1")
15027 (set_attr "memory" "load")
15028 (set_attr "mode" "TI")])
15029
06f4e35d
L
15030(define_insn "sse4_2_pcmpistri"
15031 [(set (match_operand:SI 0 "register_operand" "=c,c")
15032 (unspec:SI
15033 [(match_operand:V16QI 1 "register_operand" "x,x")
15034 (match_operand:V16QI 2 "nonimmediate_operand" "x,m")
15035 (match_operand:SI 3 "const_0_to_255_operand" "n,n")]
15036 UNSPEC_PCMPISTR))
15037 (set (reg:CC FLAGS_REG)
15038 (unspec:CC
15039 [(match_dup 1)
15040 (match_dup 2)
15041 (match_dup 3)]
15042 UNSPEC_PCMPISTR))]
15043 "TARGET_SSE4_2"
95879c72 15044 "%vpcmpistri\t{%3, %2, %1|%1, %2, %3}"
06f4e35d
L
15045 [(set_attr "type" "sselog")
15046 (set_attr "prefix_data16" "1")
15047 (set_attr "prefix_extra" "1")
f220a4f4 15048 (set_attr "ssememalign" "8")
725fd454 15049 (set_attr "length_immediate" "1")
95879c72 15050 (set_attr "prefix" "maybe_vex")
06f4e35d 15051 (set_attr "memory" "none,load")
01284895 15052 (set_attr "btver2_decode" "vector")
06f4e35d
L
15053 (set_attr "mode" "TI")])
15054
15055(define_insn "sse4_2_pcmpistrm"
e2520c41 15056 [(set (match_operand:V16QI 0 "register_operand" "=Yz,Yz")
06f4e35d
L
15057 (unspec:V16QI
15058 [(match_operand:V16QI 1 "register_operand" "x,x")
15059 (match_operand:V16QI 2 "nonimmediate_operand" "x,m")
15060 (match_operand:SI 3 "const_0_to_255_operand" "n,n")]
15061 UNSPEC_PCMPISTR))
15062 (set (reg:CC FLAGS_REG)
15063 (unspec:CC
15064 [(match_dup 1)
15065 (match_dup 2)
15066 (match_dup 3)]
15067 UNSPEC_PCMPISTR))]
15068 "TARGET_SSE4_2"
95879c72 15069 "%vpcmpistrm\t{%3, %2, %1|%1, %2, %3}"
06f4e35d
L
15070 [(set_attr "type" "sselog")
15071 (set_attr "prefix_data16" "1")
15072 (set_attr "prefix_extra" "1")
f220a4f4 15073 (set_attr "ssememalign" "8")
725fd454 15074 (set_attr "length_immediate" "1")
95879c72 15075 (set_attr "prefix" "maybe_vex")
06f4e35d 15076 (set_attr "memory" "none,load")
01284895 15077 (set_attr "btver2_decode" "vector")
06f4e35d
L
15078 (set_attr "mode" "TI")])
15079
15080(define_insn "sse4_2_pcmpistr_cconly"
15081 [(set (reg:CC FLAGS_REG)
15082 (unspec:CC
627eb745
UB
15083 [(match_operand:V16QI 2 "register_operand" "x,x,x,x")
15084 (match_operand:V16QI 3 "nonimmediate_operand" "x,m,x,m")
15085 (match_operand:SI 4 "const_0_to_255_operand" "n,n,n,n")]
06f4e35d 15086 UNSPEC_PCMPISTR))
627eb745
UB
15087 (clobber (match_scratch:V16QI 0 "=Yz,Yz,X,X"))
15088 (clobber (match_scratch:SI 1 "= X, X,c,c"))]
06f4e35d
L
15089 "TARGET_SSE4_2"
15090 "@
95879c72
L
15091 %vpcmpistrm\t{%4, %3, %2|%2, %3, %4}
15092 %vpcmpistrm\t{%4, %3, %2|%2, %3, %4}
15093 %vpcmpistri\t{%4, %3, %2|%2, %3, %4}
15094 %vpcmpistri\t{%4, %3, %2|%2, %3, %4}"
06f4e35d
L
15095 [(set_attr "type" "sselog")
15096 (set_attr "prefix_data16" "1")
15097 (set_attr "prefix_extra" "1")
f220a4f4 15098 (set_attr "ssememalign" "8")
725fd454 15099 (set_attr "length_immediate" "1")
06f4e35d 15100 (set_attr "memory" "none,load,none,load")
95879c72 15101 (set_attr "prefix" "maybe_vex")
01284895 15102 (set_attr "btver2_decode" "vector,vector,vector,vector")
06f4e35d 15103 (set_attr "mode" "TI")])
04e1d06b 15104
e711dffd
KY
15105;; Packed float variants
15106(define_mode_attr GATHER_SCATTER_SF_MEM_MODE
15107 [(V8DI "V8SF") (V16SI "V16SF")])
15108
15109(define_expand "avx512pf_gatherpf<mode>sf"
0fe65b75
AI
15110 [(unspec
15111 [(match_operand:<avx512fmaskmode> 0 "register_or_constm1_operand")
e711dffd 15112 (mem:<GATHER_SCATTER_SF_MEM_MODE>
0fe65b75
AI
15113 (match_par_dup 5
15114 [(match_operand 2 "vsib_address_operand")
15115 (match_operand:VI48_512 1 "register_operand")
15116 (match_operand:SI 3 "const1248_operand")]))
22c8aab3 15117 (match_operand:SI 4 "const_2_to_3_operand")]
0fe65b75
AI
15118 UNSPEC_GATHER_PREFETCH)]
15119 "TARGET_AVX512PF"
15120{
15121 operands[5]
15122 = gen_rtx_UNSPEC (Pmode, gen_rtvec (3, operands[2], operands[1],
15123 operands[3]), UNSPEC_VSIBADDR);
15124})
15125
e711dffd 15126(define_insn "*avx512pf_gatherpf<mode>sf_mask"
0fe65b75 15127 [(unspec
be792bce 15128 [(match_operand:<avx512fmaskmode> 0 "register_operand" "Yk")
e711dffd 15129 (match_operator:<GATHER_SCATTER_SF_MEM_MODE> 5 "vsib_mem_operator"
0fe65b75 15130 [(unspec:P
65e95828 15131 [(match_operand:P 2 "vsib_address_operand" "Tv")
0fe65b75
AI
15132 (match_operand:VI48_512 1 "register_operand" "v")
15133 (match_operand:SI 3 "const1248_operand" "n")]
15134 UNSPEC_VSIBADDR)])
22c8aab3 15135 (match_operand:SI 4 "const_2_to_3_operand" "n")]
0fe65b75
AI
15136 UNSPEC_GATHER_PREFETCH)]
15137 "TARGET_AVX512PF"
15138{
15139 switch (INTVAL (operands[4]))
15140 {
22c8aab3 15141 case 3:
cf73ee60
KY
15142 return "vgatherpf0<ssemodesuffix>ps\t{%5%{%0%}|%5%{%0%}}";
15143 case 2:
0fe65b75
AI
15144 return "vgatherpf1<ssemodesuffix>ps\t{%5%{%0%}|%5%{%0%}}";
15145 default:
15146 gcc_unreachable ();
15147 }
15148}
15149 [(set_attr "type" "sse")
15150 (set_attr "prefix" "evex")
15151 (set_attr "mode" "XI")])
15152
e711dffd 15153(define_insn "*avx512pf_gatherpf<mode>sf"
0fe65b75
AI
15154 [(unspec
15155 [(const_int -1)
e711dffd 15156 (match_operator:<GATHER_SCATTER_SF_MEM_MODE> 4 "vsib_mem_operator"
0fe65b75 15157 [(unspec:P
65e95828 15158 [(match_operand:P 1 "vsib_address_operand" "Tv")
0fe65b75
AI
15159 (match_operand:VI48_512 0 "register_operand" "v")
15160 (match_operand:SI 2 "const1248_operand" "n")]
15161 UNSPEC_VSIBADDR)])
22c8aab3 15162 (match_operand:SI 3 "const_2_to_3_operand" "n")]
0fe65b75
AI
15163 UNSPEC_GATHER_PREFETCH)]
15164 "TARGET_AVX512PF"
15165{
15166 switch (INTVAL (operands[3]))
15167 {
22c8aab3 15168 case 3:
cf73ee60
KY
15169 return "vgatherpf0<ssemodesuffix>ps\t{%4|%4}";
15170 case 2:
0fe65b75
AI
15171 return "vgatherpf1<ssemodesuffix>ps\t{%4|%4}";
15172 default:
15173 gcc_unreachable ();
15174 }
15175}
15176 [(set_attr "type" "sse")
15177 (set_attr "prefix" "evex")
15178 (set_attr "mode" "XI")])
15179
e711dffd
KY
15180;; Packed double variants
15181(define_expand "avx512pf_gatherpf<mode>df"
15182 [(unspec
15183 [(match_operand:<avx512fmaskmode> 0 "register_or_constm1_operand")
15184 (mem:V8DF
15185 (match_par_dup 5
15186 [(match_operand 2 "vsib_address_operand")
15187 (match_operand:VI4_256_8_512 1 "register_operand")
15188 (match_operand:SI 3 "const1248_operand")]))
22c8aab3 15189 (match_operand:SI 4 "const_2_to_3_operand")]
e711dffd
KY
15190 UNSPEC_GATHER_PREFETCH)]
15191 "TARGET_AVX512PF"
15192{
15193 operands[5]
15194 = gen_rtx_UNSPEC (Pmode, gen_rtvec (3, operands[2], operands[1],
15195 operands[3]), UNSPEC_VSIBADDR);
15196})
15197
15198(define_insn "*avx512pf_gatherpf<mode>df_mask"
15199 [(unspec
be792bce 15200 [(match_operand:<avx512fmaskmode> 0 "register_operand" "Yk")
e711dffd
KY
15201 (match_operator:V8DF 5 "vsib_mem_operator"
15202 [(unspec:P
15203 [(match_operand:P 2 "vsib_address_operand" "Tv")
15204 (match_operand:VI4_256_8_512 1 "register_operand" "v")
15205 (match_operand:SI 3 "const1248_operand" "n")]
15206 UNSPEC_VSIBADDR)])
22c8aab3 15207 (match_operand:SI 4 "const_2_to_3_operand" "n")]
e711dffd
KY
15208 UNSPEC_GATHER_PREFETCH)]
15209 "TARGET_AVX512PF"
15210{
15211 switch (INTVAL (operands[4]))
15212 {
22c8aab3 15213 case 3:
cf73ee60
KY
15214 return "vgatherpf0<ssemodesuffix>pd\t{%5%{%0%}|%5%{%0%}}";
15215 case 2:
e711dffd
KY
15216 return "vgatherpf1<ssemodesuffix>pd\t{%5%{%0%}|%5%{%0%}}";
15217 default:
15218 gcc_unreachable ();
15219 }
15220}
15221 [(set_attr "type" "sse")
15222 (set_attr "prefix" "evex")
15223 (set_attr "mode" "XI")])
15224
15225(define_insn "*avx512pf_gatherpf<mode>df"
15226 [(unspec
15227 [(const_int -1)
15228 (match_operator:V8DF 4 "vsib_mem_operator"
15229 [(unspec:P
15230 [(match_operand:P 1 "vsib_address_operand" "Tv")
15231 (match_operand:VI4_256_8_512 0 "register_operand" "v")
15232 (match_operand:SI 2 "const1248_operand" "n")]
15233 UNSPEC_VSIBADDR)])
22c8aab3 15234 (match_operand:SI 3 "const_2_to_3_operand" "n")]
e711dffd
KY
15235 UNSPEC_GATHER_PREFETCH)]
15236 "TARGET_AVX512PF"
15237{
15238 switch (INTVAL (operands[3]))
15239 {
22c8aab3 15240 case 3:
cf73ee60
KY
15241 return "vgatherpf0<ssemodesuffix>pd\t{%4|%4}";
15242 case 2:
e711dffd
KY
15243 return "vgatherpf1<ssemodesuffix>pd\t{%4|%4}";
15244 default:
15245 gcc_unreachable ();
15246 }
15247}
15248 [(set_attr "type" "sse")
15249 (set_attr "prefix" "evex")
15250 (set_attr "mode" "XI")])
15251
15252;; Packed float variants
15253(define_expand "avx512pf_scatterpf<mode>sf"
0fe65b75
AI
15254 [(unspec
15255 [(match_operand:<avx512fmaskmode> 0 "register_or_constm1_operand")
e711dffd 15256 (mem:<GATHER_SCATTER_SF_MEM_MODE>
0fe65b75
AI
15257 (match_par_dup 5
15258 [(match_operand 2 "vsib_address_operand")
15259 (match_operand:VI48_512 1 "register_operand")
15260 (match_operand:SI 3 "const1248_operand")]))
66b03f81 15261 (match_operand:SI 4 "const2367_operand")]
0fe65b75
AI
15262 UNSPEC_SCATTER_PREFETCH)]
15263 "TARGET_AVX512PF"
15264{
15265 operands[5]
15266 = gen_rtx_UNSPEC (Pmode, gen_rtvec (3, operands[2], operands[1],
15267 operands[3]), UNSPEC_VSIBADDR);
15268})
15269
e711dffd 15270(define_insn "*avx512pf_scatterpf<mode>sf_mask"
0fe65b75 15271 [(unspec
be792bce 15272 [(match_operand:<avx512fmaskmode> 0 "register_operand" "Yk")
e711dffd 15273 (match_operator:<GATHER_SCATTER_SF_MEM_MODE> 5 "vsib_mem_operator"
0fe65b75 15274 [(unspec:P
65e95828 15275 [(match_operand:P 2 "vsib_address_operand" "Tv")
0fe65b75
AI
15276 (match_operand:VI48_512 1 "register_operand" "v")
15277 (match_operand:SI 3 "const1248_operand" "n")]
15278 UNSPEC_VSIBADDR)])
66b03f81 15279 (match_operand:SI 4 "const2367_operand" "n")]
0fe65b75
AI
15280 UNSPEC_SCATTER_PREFETCH)]
15281 "TARGET_AVX512PF"
15282{
15283 switch (INTVAL (operands[4]))
15284 {
22c8aab3 15285 case 3:
66b03f81 15286 case 7:
cf73ee60
KY
15287 return "vscatterpf0<ssemodesuffix>ps\t{%5%{%0%}|%5%{%0%}}";
15288 case 2:
15289 case 6:
0fe65b75
AI
15290 return "vscatterpf1<ssemodesuffix>ps\t{%5%{%0%}|%5%{%0%}}";
15291 default:
15292 gcc_unreachable ();
15293 }
15294}
15295 [(set_attr "type" "sse")
15296 (set_attr "prefix" "evex")
15297 (set_attr "mode" "XI")])
15298
e711dffd 15299(define_insn "*avx512pf_scatterpf<mode>sf"
0fe65b75
AI
15300 [(unspec
15301 [(const_int -1)
e711dffd 15302 (match_operator:<GATHER_SCATTER_SF_MEM_MODE> 4 "vsib_mem_operator"
0fe65b75 15303 [(unspec:P
65e95828 15304 [(match_operand:P 1 "vsib_address_operand" "Tv")
0fe65b75
AI
15305 (match_operand:VI48_512 0 "register_operand" "v")
15306 (match_operand:SI 2 "const1248_operand" "n")]
15307 UNSPEC_VSIBADDR)])
66b03f81 15308 (match_operand:SI 3 "const2367_operand" "n")]
0fe65b75
AI
15309 UNSPEC_SCATTER_PREFETCH)]
15310 "TARGET_AVX512PF"
15311{
15312 switch (INTVAL (operands[3]))
15313 {
22c8aab3 15314 case 3:
66b03f81 15315 case 7:
cf73ee60
KY
15316 return "vscatterpf0<ssemodesuffix>ps\t{%4|%4}";
15317 case 2:
15318 case 6:
0fe65b75
AI
15319 return "vscatterpf1<ssemodesuffix>ps\t{%4|%4}";
15320 default:
15321 gcc_unreachable ();
15322 }
15323}
15324 [(set_attr "type" "sse")
15325 (set_attr "prefix" "evex")
15326 (set_attr "mode" "XI")])
15327
e711dffd
KY
15328;; Packed double variants
15329(define_expand "avx512pf_scatterpf<mode>df"
15330 [(unspec
15331 [(match_operand:<avx512fmaskmode> 0 "register_or_constm1_operand")
15332 (mem:V8DF
15333 (match_par_dup 5
15334 [(match_operand 2 "vsib_address_operand")
15335 (match_operand:VI4_256_8_512 1 "register_operand")
15336 (match_operand:SI 3 "const1248_operand")]))
66b03f81 15337 (match_operand:SI 4 "const2367_operand")]
e711dffd
KY
15338 UNSPEC_SCATTER_PREFETCH)]
15339 "TARGET_AVX512PF"
15340{
15341 operands[5]
15342 = gen_rtx_UNSPEC (Pmode, gen_rtvec (3, operands[2], operands[1],
15343 operands[3]), UNSPEC_VSIBADDR);
15344})
15345
15346(define_insn "*avx512pf_scatterpf<mode>df_mask"
15347 [(unspec
be792bce 15348 [(match_operand:<avx512fmaskmode> 0 "register_operand" "Yk")
e711dffd
KY
15349 (match_operator:V8DF 5 "vsib_mem_operator"
15350 [(unspec:P
15351 [(match_operand:P 2 "vsib_address_operand" "Tv")
15352 (match_operand:VI4_256_8_512 1 "register_operand" "v")
15353 (match_operand:SI 3 "const1248_operand" "n")]
15354 UNSPEC_VSIBADDR)])
66b03f81 15355 (match_operand:SI 4 "const2367_operand" "n")]
e711dffd
KY
15356 UNSPEC_SCATTER_PREFETCH)]
15357 "TARGET_AVX512PF"
15358{
15359 switch (INTVAL (operands[4]))
15360 {
22c8aab3 15361 case 3:
66b03f81 15362 case 7:
cf73ee60
KY
15363 return "vscatterpf0<ssemodesuffix>pd\t{%5%{%0%}|%5%{%0%}}";
15364 case 2:
15365 case 6:
e711dffd
KY
15366 return "vscatterpf1<ssemodesuffix>pd\t{%5%{%0%}|%5%{%0%}}";
15367 default:
15368 gcc_unreachable ();
15369 }
15370}
15371 [(set_attr "type" "sse")
15372 (set_attr "prefix" "evex")
15373 (set_attr "mode" "XI")])
15374
15375(define_insn "*avx512pf_scatterpf<mode>df"
15376 [(unspec
15377 [(const_int -1)
15378 (match_operator:V8DF 4 "vsib_mem_operator"
15379 [(unspec:P
15380 [(match_operand:P 1 "vsib_address_operand" "Tv")
15381 (match_operand:VI4_256_8_512 0 "register_operand" "v")
15382 (match_operand:SI 2 "const1248_operand" "n")]
15383 UNSPEC_VSIBADDR)])
66b03f81 15384 (match_operand:SI 3 "const2367_operand" "n")]
e711dffd
KY
15385 UNSPEC_SCATTER_PREFETCH)]
15386 "TARGET_AVX512PF"
15387{
15388 switch (INTVAL (operands[3]))
15389 {
22c8aab3 15390 case 3:
66b03f81 15391 case 7:
cf73ee60
KY
15392 return "vscatterpf0<ssemodesuffix>pd\t{%4|%4}";
15393 case 2:
15394 case 6:
e711dffd
KY
15395 return "vscatterpf1<ssemodesuffix>pd\t{%4|%4}";
15396 default:
15397 gcc_unreachable ();
15398 }
15399}
15400 [(set_attr "type" "sse")
15401 (set_attr "prefix" "evex")
15402 (set_attr "mode" "XI")])
15403
c56a42b9 15404(define_insn "avx512er_exp2<mode><mask_name><round_saeonly_name>"
afb4ac68
AI
15405 [(set (match_operand:VF_512 0 "register_operand" "=v")
15406 (unspec:VF_512
c56a42b9 15407 [(match_operand:VF_512 1 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")]
afb4ac68
AI
15408 UNSPEC_EXP2))]
15409 "TARGET_AVX512ER"
c56a42b9 15410 "vexp2<ssemodesuffix>\t{<round_saeonly_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_saeonly_mask_op2>}"
afb4ac68 15411 [(set_attr "prefix" "evex")
df62b4af 15412 (set_attr "type" "sse")
afb4ac68
AI
15413 (set_attr "mode" "<MODE>")])
15414
c56a42b9 15415(define_insn "<mask_codefor>avx512er_rcp28<mode><mask_name><round_saeonly_name>"
afb4ac68
AI
15416 [(set (match_operand:VF_512 0 "register_operand" "=v")
15417 (unspec:VF_512
c56a42b9 15418 [(match_operand:VF_512 1 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")]
afb4ac68
AI
15419 UNSPEC_RCP28))]
15420 "TARGET_AVX512ER"
c56a42b9 15421 "vrcp28<ssemodesuffix>\t{<round_saeonly_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_saeonly_mask_op2>}"
afb4ac68 15422 [(set_attr "prefix" "evex")
df62b4af 15423 (set_attr "type" "sse")
afb4ac68
AI
15424 (set_attr "mode" "<MODE>")])
15425
c56a42b9
KY
15426(define_insn "avx512er_vmrcp28<mode><round_saeonly_name>"
15427 [(set (match_operand:VF_128 0 "register_operand" "=v")
15428 (vec_merge:VF_128
15429 (unspec:VF_128
15430 [(match_operand:VF_128 1 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")]
15431 UNSPEC_RCP28)
15432 (match_operand:VF_128 2 "register_operand" "v")
15433 (const_int 1)))]
15434 "TARGET_AVX512ER"
df62b4af 15435 "vrcp28<ssescalarmodesuffix>\t{<round_saeonly_op3>%1, %2, %0|%0, %2, %1<round_saeonly_op3>}"
c56a42b9
KY
15436 [(set_attr "length_immediate" "1")
15437 (set_attr "prefix" "evex")
df62b4af 15438 (set_attr "type" "sse")
c56a42b9
KY
15439 (set_attr "mode" "<MODE>")])
15440
15441(define_insn "<mask_codefor>avx512er_rsqrt28<mode><mask_name><round_saeonly_name>"
afb4ac68
AI
15442 [(set (match_operand:VF_512 0 "register_operand" "=v")
15443 (unspec:VF_512
c56a42b9 15444 [(match_operand:VF_512 1 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")]
afb4ac68
AI
15445 UNSPEC_RSQRT28))]
15446 "TARGET_AVX512ER"
c56a42b9 15447 "vrsqrt28<ssemodesuffix>\t{<round_saeonly_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_saeonly_mask_op2>}"
afb4ac68 15448 [(set_attr "prefix" "evex")
df62b4af 15449 (set_attr "type" "sse")
afb4ac68
AI
15450 (set_attr "mode" "<MODE>")])
15451
c56a42b9
KY
15452(define_insn "avx512er_vmrsqrt28<mode><round_saeonly_name>"
15453 [(set (match_operand:VF_128 0 "register_operand" "=v")
15454 (vec_merge:VF_128
15455 (unspec:VF_128
15456 [(match_operand:VF_128 1 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")]
15457 UNSPEC_RSQRT28)
15458 (match_operand:VF_128 2 "register_operand" "v")
15459 (const_int 1)))]
15460 "TARGET_AVX512ER"
df62b4af 15461 "vrsqrt28<ssescalarmodesuffix>\t{<round_saeonly_op3>%1, %2, %0|%0, %2, %1<round_saeonly_op3>}"
c56a42b9 15462 [(set_attr "length_immediate" "1")
df62b4af 15463 (set_attr "type" "sse")
c56a42b9
KY
15464 (set_attr "prefix" "evex")
15465 (set_attr "mode" "<MODE>")])
15466
43a8b705
HJ
15467;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
15468;;
15469;; XOP instructions
15470;;
15471;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
15472
8861ba4d
UB
15473(define_code_iterator xop_plus [plus ss_plus])
15474
15475(define_code_attr macs [(plus "macs") (ss_plus "macss")])
15476(define_code_attr madcs [(plus "madcs") (ss_plus "madcss")])
15477
43a8b705 15478;; XOP parallel integer multiply/add instructions.
43a8b705 15479
8861ba4d
UB
15480(define_insn "xop_p<macs><ssemodesuffix><ssemodesuffix>"
15481 [(set (match_operand:VI24_128 0 "register_operand" "=x")
15482 (xop_plus:VI24_128
15483 (mult:VI24_128
15484 (match_operand:VI24_128 1 "nonimmediate_operand" "%x")
15485 (match_operand:VI24_128 2 "nonimmediate_operand" "xm"))
920ac804 15486 (match_operand:VI24_128 3 "register_operand" "x")))]
4926bb1d 15487 "TARGET_XOP"
8861ba4d 15488 "vp<macs><ssemodesuffix><ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
43a8b705
HJ
15489 [(set_attr "type" "ssemuladd")
15490 (set_attr "mode" "TI")])
15491
8861ba4d 15492(define_insn "xop_p<macs>dql"
4926bb1d 15493 [(set (match_operand:V2DI 0 "register_operand" "=x")
8861ba4d 15494 (xop_plus:V2DI
43a8b705
HJ
15495 (mult:V2DI
15496 (sign_extend:V2DI
15497 (vec_select:V2SI
aedff010 15498 (match_operand:V4SI 1 "nonimmediate_operand" "%x")
d21a7b44 15499 (parallel [(const_int 0) (const_int 2)])))
43a8b705
HJ
15500 (sign_extend:V2DI
15501 (vec_select:V2SI
8861ba4d 15502 (match_operand:V4SI 2 "nonimmediate_operand" "xm")
d21a7b44 15503 (parallel [(const_int 0) (const_int 2)]))))
920ac804 15504 (match_operand:V2DI 3 "register_operand" "x")))]
4926bb1d 15505 "TARGET_XOP"
8861ba4d 15506 "vp<macs>dql\t{%3, %2, %1, %0|%0, %1, %2, %3}"
43a8b705
HJ
15507 [(set_attr "type" "ssemuladd")
15508 (set_attr "mode" "TI")])
15509
8861ba4d 15510(define_insn "xop_p<macs>dqh"
4926bb1d 15511 [(set (match_operand:V2DI 0 "register_operand" "=x")
8861ba4d 15512 (xop_plus:V2DI
43a8b705
HJ
15513 (mult:V2DI
15514 (sign_extend:V2DI
15515 (vec_select:V2SI
aedff010 15516 (match_operand:V4SI 1 "nonimmediate_operand" "%x")
d21a7b44 15517 (parallel [(const_int 1) (const_int 3)])))
43a8b705
HJ
15518 (sign_extend:V2DI
15519 (vec_select:V2SI
4926bb1d 15520 (match_operand:V4SI 2 "nonimmediate_operand" "xm")
d21a7b44 15521 (parallel [(const_int 1) (const_int 3)]))))
920ac804 15522 (match_operand:V2DI 3 "register_operand" "x")))]
4926bb1d 15523 "TARGET_XOP"
8861ba4d 15524 "vp<macs>dqh\t{%3, %2, %1, %0|%0, %1, %2, %3}"
43a8b705
HJ
15525 [(set_attr "type" "ssemuladd")
15526 (set_attr "mode" "TI")])
15527
43a8b705 15528;; XOP parallel integer multiply/add instructions for the intrinisics
8861ba4d 15529(define_insn "xop_p<macs>wd"
4926bb1d 15530 [(set (match_operand:V4SI 0 "register_operand" "=x")
8861ba4d 15531 (xop_plus:V4SI
43a8b705
HJ
15532 (mult:V4SI
15533 (sign_extend:V4SI
15534 (vec_select:V4HI
aedff010 15535 (match_operand:V8HI 1 "nonimmediate_operand" "%x")
608dccd7
UB
15536 (parallel [(const_int 1) (const_int 3)
15537 (const_int 5) (const_int 7)])))
43a8b705
HJ
15538 (sign_extend:V4SI
15539 (vec_select:V4HI
4926bb1d 15540 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
608dccd7
UB
15541 (parallel [(const_int 1) (const_int 3)
15542 (const_int 5) (const_int 7)]))))
920ac804 15543 (match_operand:V4SI 3 "register_operand" "x")))]
4926bb1d 15544 "TARGET_XOP"
8861ba4d 15545 "vp<macs>wd\t{%3, %2, %1, %0|%0, %1, %2, %3}"
43a8b705
HJ
15546 [(set_attr "type" "ssemuladd")
15547 (set_attr "mode" "TI")])
15548
8861ba4d 15549(define_insn "xop_p<madcs>wd"
4926bb1d 15550 [(set (match_operand:V4SI 0 "register_operand" "=x")
8861ba4d 15551 (xop_plus:V4SI
43a8b705
HJ
15552 (plus:V4SI
15553 (mult:V4SI
15554 (sign_extend:V4SI
15555 (vec_select:V4HI
aedff010 15556 (match_operand:V8HI 1 "nonimmediate_operand" "%x")
608dccd7
UB
15557 (parallel [(const_int 0) (const_int 2)
15558 (const_int 4) (const_int 6)])))
43a8b705
HJ
15559 (sign_extend:V4SI
15560 (vec_select:V4HI
4926bb1d 15561 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
608dccd7
UB
15562 (parallel [(const_int 0) (const_int 2)
15563 (const_int 4) (const_int 6)]))))
43a8b705
HJ
15564 (mult:V4SI
15565 (sign_extend:V4SI
15566 (vec_select:V4HI
15567 (match_dup 1)
608dccd7
UB
15568 (parallel [(const_int 1) (const_int 3)
15569 (const_int 5) (const_int 7)])))
43a8b705
HJ
15570 (sign_extend:V4SI
15571 (vec_select:V4HI
15572 (match_dup 2)
608dccd7
UB
15573 (parallel [(const_int 1) (const_int 3)
15574 (const_int 5) (const_int 7)])))))
920ac804 15575 (match_operand:V4SI 3 "register_operand" "x")))]
4926bb1d 15576 "TARGET_XOP"
8861ba4d 15577 "vp<madcs>wd\t{%3, %2, %1, %0|%0, %1, %2, %3}"
43a8b705
HJ
15578 [(set_attr "type" "ssemuladd")
15579 (set_attr "mode" "TI")])
15580
15581;; XOP parallel XMM conditional moves
6bec6c98
UB
15582(define_insn "xop_pcmov_<mode><avxsizesuffix>"
15583 [(set (match_operand:V 0 "register_operand" "=x,x")
15584 (if_then_else:V
15585 (match_operand:V 3 "nonimmediate_operand" "x,m")
bd352290
UB
15586 (match_operand:V 1 "register_operand" "x,x")
15587 (match_operand:V 2 "nonimmediate_operand" "xm,x")))]
4926bb1d 15588 "TARGET_XOP"
1a62cb3b 15589 "vpcmov\t{%3, %2, %1, %0|%0, %1, %2, %3}"
43a8b705
HJ
15590 [(set_attr "type" "sse4arg")])
15591
15592;; XOP horizontal add/subtract instructions
8861ba4d 15593(define_insn "xop_phadd<u>bw"
43a8b705
HJ
15594 [(set (match_operand:V8HI 0 "register_operand" "=x")
15595 (plus:V8HI
8861ba4d 15596 (any_extend:V8HI
43a8b705
HJ
15597 (vec_select:V8QI
15598 (match_operand:V16QI 1 "nonimmediate_operand" "xm")
608dccd7
UB
15599 (parallel [(const_int 0) (const_int 2)
15600 (const_int 4) (const_int 6)
15601 (const_int 8) (const_int 10)
15602 (const_int 12) (const_int 14)])))
8861ba4d 15603 (any_extend:V8HI
43a8b705
HJ
15604 (vec_select:V8QI
15605 (match_dup 1)
608dccd7
UB
15606 (parallel [(const_int 1) (const_int 3)
15607 (const_int 5) (const_int 7)
15608 (const_int 9) (const_int 11)
15609 (const_int 13) (const_int 15)])))))]
43a8b705 15610 "TARGET_XOP"
8861ba4d 15611 "vphadd<u>bw\t{%1, %0|%0, %1}"
43a8b705
HJ
15612 [(set_attr "type" "sseiadd1")])
15613
8861ba4d 15614(define_insn "xop_phadd<u>bd"
43a8b705
HJ
15615 [(set (match_operand:V4SI 0 "register_operand" "=x")
15616 (plus:V4SI
15617 (plus:V4SI
8861ba4d 15618 (any_extend:V4SI
43a8b705
HJ
15619 (vec_select:V4QI
15620 (match_operand:V16QI 1 "nonimmediate_operand" "xm")
608dccd7
UB
15621 (parallel [(const_int 0) (const_int 4)
15622 (const_int 8) (const_int 12)])))
8861ba4d 15623 (any_extend:V4SI
43a8b705
HJ
15624 (vec_select:V4QI
15625 (match_dup 1)
608dccd7
UB
15626 (parallel [(const_int 1) (const_int 5)
15627 (const_int 9) (const_int 13)]))))
43a8b705 15628 (plus:V4SI
8861ba4d 15629 (any_extend:V4SI
43a8b705
HJ
15630 (vec_select:V4QI
15631 (match_dup 1)
608dccd7
UB
15632 (parallel [(const_int 2) (const_int 6)
15633 (const_int 10) (const_int 14)])))
8861ba4d 15634 (any_extend:V4SI
43a8b705
HJ
15635 (vec_select:V4QI
15636 (match_dup 1)
608dccd7
UB
15637 (parallel [(const_int 3) (const_int 7)
15638 (const_int 11) (const_int 15)]))))))]
43a8b705 15639 "TARGET_XOP"
8861ba4d 15640 "vphadd<u>bd\t{%1, %0|%0, %1}"
43a8b705
HJ
15641 [(set_attr "type" "sseiadd1")])
15642
8861ba4d 15643(define_insn "xop_phadd<u>bq"
43a8b705
HJ
15644 [(set (match_operand:V2DI 0 "register_operand" "=x")
15645 (plus:V2DI
15646 (plus:V2DI
15647 (plus:V2DI
8861ba4d 15648 (any_extend:V2DI
43a8b705
HJ
15649 (vec_select:V2QI
15650 (match_operand:V16QI 1 "nonimmediate_operand" "xm")
920ac804 15651 (parallel [(const_int 0) (const_int 8)])))
8861ba4d 15652 (any_extend:V2DI
43a8b705
HJ
15653 (vec_select:V2QI
15654 (match_dup 1)
920ac804 15655 (parallel [(const_int 1) (const_int 9)]))))
43a8b705 15656 (plus:V2DI
8861ba4d 15657 (any_extend:V2DI
43a8b705
HJ
15658 (vec_select:V2QI
15659 (match_dup 1)
920ac804 15660 (parallel [(const_int 2) (const_int 10)])))
8861ba4d 15661 (any_extend:V2DI
43a8b705
HJ
15662 (vec_select:V2QI
15663 (match_dup 1)
920ac804 15664 (parallel [(const_int 3) (const_int 11)])))))
43a8b705
HJ
15665 (plus:V2DI
15666 (plus:V2DI
8861ba4d 15667 (any_extend:V2DI
43a8b705
HJ
15668 (vec_select:V2QI
15669 (match_dup 1)
920ac804 15670 (parallel [(const_int 4) (const_int 12)])))
8861ba4d 15671 (any_extend:V2DI
43a8b705
HJ
15672 (vec_select:V2QI
15673 (match_dup 1)
920ac804 15674 (parallel [(const_int 5) (const_int 13)]))))
43a8b705 15675 (plus:V2DI
8861ba4d 15676 (any_extend:V2DI
43a8b705
HJ
15677 (vec_select:V2QI
15678 (match_dup 1)
920ac804 15679 (parallel [(const_int 6) (const_int 14)])))
8861ba4d 15680 (any_extend:V2DI
43a8b705
HJ
15681 (vec_select:V2QI
15682 (match_dup 1)
920ac804 15683 (parallel [(const_int 7) (const_int 15)])))))))]
43a8b705 15684 "TARGET_XOP"
8861ba4d 15685 "vphadd<u>bq\t{%1, %0|%0, %1}"
43a8b705
HJ
15686 [(set_attr "type" "sseiadd1")])
15687
8861ba4d 15688(define_insn "xop_phadd<u>wd"
43a8b705
HJ
15689 [(set (match_operand:V4SI 0 "register_operand" "=x")
15690 (plus:V4SI
8861ba4d 15691 (any_extend:V4SI
43a8b705
HJ
15692 (vec_select:V4HI
15693 (match_operand:V8HI 1 "nonimmediate_operand" "xm")
608dccd7
UB
15694 (parallel [(const_int 0) (const_int 2)
15695 (const_int 4) (const_int 6)])))
8861ba4d 15696 (any_extend:V4SI
43a8b705
HJ
15697 (vec_select:V4HI
15698 (match_dup 1)
608dccd7
UB
15699 (parallel [(const_int 1) (const_int 3)
15700 (const_int 5) (const_int 7)])))))]
43a8b705 15701 "TARGET_XOP"
8861ba4d 15702 "vphadd<u>wd\t{%1, %0|%0, %1}"
43a8b705
HJ
15703 [(set_attr "type" "sseiadd1")])
15704
8861ba4d 15705(define_insn "xop_phadd<u>wq"
43a8b705
HJ
15706 [(set (match_operand:V2DI 0 "register_operand" "=x")
15707 (plus:V2DI
15708 (plus:V2DI
8861ba4d 15709 (any_extend:V2DI
43a8b705
HJ
15710 (vec_select:V2HI
15711 (match_operand:V8HI 1 "nonimmediate_operand" "xm")
608dccd7 15712 (parallel [(const_int 0) (const_int 4)])))
8861ba4d 15713 (any_extend:V2DI
43a8b705
HJ
15714 (vec_select:V2HI
15715 (match_dup 1)
608dccd7 15716 (parallel [(const_int 1) (const_int 5)]))))
43a8b705 15717 (plus:V2DI
8861ba4d 15718 (any_extend:V2DI
43a8b705
HJ
15719 (vec_select:V2HI
15720 (match_dup 1)
608dccd7 15721 (parallel [(const_int 2) (const_int 6)])))
8861ba4d 15722 (any_extend:V2DI
43a8b705
HJ
15723 (vec_select:V2HI
15724 (match_dup 1)
608dccd7 15725 (parallel [(const_int 3) (const_int 7)]))))))]
43a8b705 15726 "TARGET_XOP"
8861ba4d 15727 "vphadd<u>wq\t{%1, %0|%0, %1}"
43a8b705
HJ
15728 [(set_attr "type" "sseiadd1")])
15729
8861ba4d 15730(define_insn "xop_phadd<u>dq"
43a8b705
HJ
15731 [(set (match_operand:V2DI 0 "register_operand" "=x")
15732 (plus:V2DI
8861ba4d 15733 (any_extend:V2DI
43a8b705
HJ
15734 (vec_select:V2SI
15735 (match_operand:V4SI 1 "nonimmediate_operand" "xm")
608dccd7 15736 (parallel [(const_int 0) (const_int 2)])))
8861ba4d 15737 (any_extend:V2DI
43a8b705
HJ
15738 (vec_select:V2SI
15739 (match_dup 1)
608dccd7 15740 (parallel [(const_int 1) (const_int 3)])))))]
43a8b705 15741 "TARGET_XOP"
8861ba4d 15742 "vphadd<u>dq\t{%1, %0|%0, %1}"
43a8b705
HJ
15743 [(set_attr "type" "sseiadd1")])
15744
15745(define_insn "xop_phsubbw"
15746 [(set (match_operand:V8HI 0 "register_operand" "=x")
15747 (minus:V8HI
15748 (sign_extend:V8HI
15749 (vec_select:V8QI
15750 (match_operand:V16QI 1 "nonimmediate_operand" "xm")
608dccd7
UB
15751 (parallel [(const_int 0) (const_int 2)
15752 (const_int 4) (const_int 6)
15753 (const_int 8) (const_int 10)
15754 (const_int 12) (const_int 14)])))
43a8b705
HJ
15755 (sign_extend:V8HI
15756 (vec_select:V8QI
15757 (match_dup 1)
608dccd7
UB
15758 (parallel [(const_int 1) (const_int 3)
15759 (const_int 5) (const_int 7)
15760 (const_int 9) (const_int 11)
15761 (const_int 13) (const_int 15)])))))]
43a8b705
HJ
15762 "TARGET_XOP"
15763 "vphsubbw\t{%1, %0|%0, %1}"
15764 [(set_attr "type" "sseiadd1")])
15765
15766(define_insn "xop_phsubwd"
15767 [(set (match_operand:V4SI 0 "register_operand" "=x")
15768 (minus:V4SI
15769 (sign_extend:V4SI
15770 (vec_select:V4HI
15771 (match_operand:V8HI 1 "nonimmediate_operand" "xm")
608dccd7
UB
15772 (parallel [(const_int 0) (const_int 2)
15773 (const_int 4) (const_int 6)])))
43a8b705
HJ
15774 (sign_extend:V4SI
15775 (vec_select:V4HI
15776 (match_dup 1)
608dccd7
UB
15777 (parallel [(const_int 1) (const_int 3)
15778 (const_int 5) (const_int 7)])))))]
43a8b705
HJ
15779 "TARGET_XOP"
15780 "vphsubwd\t{%1, %0|%0, %1}"
15781 [(set_attr "type" "sseiadd1")])
15782
15783(define_insn "xop_phsubdq"
15784 [(set (match_operand:V2DI 0 "register_operand" "=x")
15785 (minus:V2DI
15786 (sign_extend:V2DI
15787 (vec_select:V2SI
15788 (match_operand:V4SI 1 "nonimmediate_operand" "xm")
608dccd7 15789 (parallel [(const_int 0) (const_int 2)])))
43a8b705
HJ
15790 (sign_extend:V2DI
15791 (vec_select:V2SI
15792 (match_dup 1)
608dccd7 15793 (parallel [(const_int 1) (const_int 3)])))))]
43a8b705
HJ
15794 "TARGET_XOP"
15795 "vphsubdq\t{%1, %0|%0, %1}"
15796 [(set_attr "type" "sseiadd1")])
15797
15798;; XOP permute instructions
15799(define_insn "xop_pperm"
4926bb1d 15800 [(set (match_operand:V16QI 0 "register_operand" "=x,x")
43a8b705 15801 (unspec:V16QI
4926bb1d
SP
15802 [(match_operand:V16QI 1 "register_operand" "x,x")
15803 (match_operand:V16QI 2 "nonimmediate_operand" "x,m")
15804 (match_operand:V16QI 3 "nonimmediate_operand" "xm,x")]
43a8b705 15805 UNSPEC_XOP_PERMUTE))]
4926bb1d 15806 "TARGET_XOP && !(MEM_P (operands[2]) && MEM_P (operands[3]))"
43a8b705
HJ
15807 "vpperm\t{%3, %2, %1, %0|%0, %1, %2, %3}"
15808 [(set_attr "type" "sse4arg")
15809 (set_attr "mode" "TI")])
15810
15811;; XOP pack instructions that combine two vectors into a smaller vector
15812(define_insn "xop_pperm_pack_v2di_v4si"
4926bb1d 15813 [(set (match_operand:V4SI 0 "register_operand" "=x,x")
43a8b705
HJ
15814 (vec_concat:V4SI
15815 (truncate:V2SI
4926bb1d 15816 (match_operand:V2DI 1 "register_operand" "x,x"))
43a8b705 15817 (truncate:V2SI
4926bb1d
SP
15818 (match_operand:V2DI 2 "nonimmediate_operand" "x,m"))))
15819 (use (match_operand:V16QI 3 "nonimmediate_operand" "xm,x"))]
15820 "TARGET_XOP && !(MEM_P (operands[2]) && MEM_P (operands[3]))"
43a8b705
HJ
15821 "vpperm\t{%3, %2, %1, %0|%0, %1, %2, %3}"
15822 [(set_attr "type" "sse4arg")
15823 (set_attr "mode" "TI")])
15824
15825(define_insn "xop_pperm_pack_v4si_v8hi"
4926bb1d 15826 [(set (match_operand:V8HI 0 "register_operand" "=x,x")
43a8b705
HJ
15827 (vec_concat:V8HI
15828 (truncate:V4HI
4926bb1d 15829 (match_operand:V4SI 1 "register_operand" "x,x"))
43a8b705 15830 (truncate:V4HI
4926bb1d
SP
15831 (match_operand:V4SI 2 "nonimmediate_operand" "x,m"))))
15832 (use (match_operand:V16QI 3 "nonimmediate_operand" "xm,x"))]
15833 "TARGET_XOP && !(MEM_P (operands[2]) && MEM_P (operands[3]))"
43a8b705
HJ
15834 "vpperm\t{%3, %2, %1, %0|%0, %1, %2, %3}"
15835 [(set_attr "type" "sse4arg")
15836 (set_attr "mode" "TI")])
15837
15838(define_insn "xop_pperm_pack_v8hi_v16qi"
4926bb1d 15839 [(set (match_operand:V16QI 0 "register_operand" "=x,x")
43a8b705
HJ
15840 (vec_concat:V16QI
15841 (truncate:V8QI
4926bb1d 15842 (match_operand:V8HI 1 "register_operand" "x,x"))
43a8b705 15843 (truncate:V8QI
4926bb1d
SP
15844 (match_operand:V8HI 2 "nonimmediate_operand" "x,m"))))
15845 (use (match_operand:V16QI 3 "nonimmediate_operand" "xm,x"))]
15846 "TARGET_XOP && !(MEM_P (operands[2]) && MEM_P (operands[3]))"
43a8b705
HJ
15847 "vpperm\t{%3, %2, %1, %0|%0, %1, %2, %3}"
15848 [(set_attr "type" "sse4arg")
15849 (set_attr "mode" "TI")])
15850
15851;; XOP packed rotate instructions
15852(define_expand "rotl<mode>3"
82e86dc6 15853 [(set (match_operand:VI_128 0 "register_operand")
6bec6c98 15854 (rotate:VI_128
82e86dc6 15855 (match_operand:VI_128 1 "nonimmediate_operand")
43a8b705
HJ
15856 (match_operand:SI 2 "general_operand")))]
15857 "TARGET_XOP"
15858{
15859 /* If we were given a scalar, convert it to parallel */
15860 if (! const_0_to_<sserotatemax>_operand (operands[2], SImode))
15861 {
15862 rtvec vs = rtvec_alloc (<ssescalarnum>);
15863 rtx par = gen_rtx_PARALLEL (<MODE>mode, vs);
15864 rtx reg = gen_reg_rtx (<MODE>mode);
15865 rtx op2 = operands[2];
15866 int i;
15867
15868 if (GET_MODE (op2) != <ssescalarmode>mode)
977e83a3 15869 {
43a8b705
HJ
15870 op2 = gen_reg_rtx (<ssescalarmode>mode);
15871 convert_move (op2, operands[2], false);
15872 }
15873
15874 for (i = 0; i < <ssescalarnum>; i++)
15875 RTVEC_ELT (vs, i) = op2;
15876
15877 emit_insn (gen_vec_init<mode> (reg, par));
15878 emit_insn (gen_xop_vrotl<mode>3 (operands[0], operands[1], reg));
15879 DONE;
15880 }
15881})
15882
15883(define_expand "rotr<mode>3"
82e86dc6 15884 [(set (match_operand:VI_128 0 "register_operand")
6bec6c98 15885 (rotatert:VI_128
82e86dc6 15886 (match_operand:VI_128 1 "nonimmediate_operand")
43a8b705
HJ
15887 (match_operand:SI 2 "general_operand")))]
15888 "TARGET_XOP"
15889{
15890 /* If we were given a scalar, convert it to parallel */
15891 if (! const_0_to_<sserotatemax>_operand (operands[2], SImode))
15892 {
15893 rtvec vs = rtvec_alloc (<ssescalarnum>);
15894 rtx par = gen_rtx_PARALLEL (<MODE>mode, vs);
15895 rtx neg = gen_reg_rtx (<MODE>mode);
15896 rtx reg = gen_reg_rtx (<MODE>mode);
15897 rtx op2 = operands[2];
15898 int i;
15899
15900 if (GET_MODE (op2) != <ssescalarmode>mode)
977e83a3 15901 {
43a8b705
HJ
15902 op2 = gen_reg_rtx (<ssescalarmode>mode);
15903 convert_move (op2, operands[2], false);
15904 }
15905
15906 for (i = 0; i < <ssescalarnum>; i++)
15907 RTVEC_ELT (vs, i) = op2;
15908
15909 emit_insn (gen_vec_init<mode> (reg, par));
15910 emit_insn (gen_neg<mode>2 (neg, reg));
15911 emit_insn (gen_xop_vrotl<mode>3 (operands[0], operands[1], neg));
15912 DONE;
15913 }
15914})
15915
15916(define_insn "xop_rotl<mode>3"
6bec6c98
UB
15917 [(set (match_operand:VI_128 0 "register_operand" "=x")
15918 (rotate:VI_128
15919 (match_operand:VI_128 1 "nonimmediate_operand" "xm")
43a8b705
HJ
15920 (match_operand:SI 2 "const_0_to_<sserotatemax>_operand" "n")))]
15921 "TARGET_XOP"
cbb734aa 15922 "vprot<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
43a8b705
HJ
15923 [(set_attr "type" "sseishft")
15924 (set_attr "length_immediate" "1")
15925 (set_attr "mode" "TI")])
15926
15927(define_insn "xop_rotr<mode>3"
6bec6c98
UB
15928 [(set (match_operand:VI_128 0 "register_operand" "=x")
15929 (rotatert:VI_128
15930 (match_operand:VI_128 1 "nonimmediate_operand" "xm")
43a8b705
HJ
15931 (match_operand:SI 2 "const_0_to_<sserotatemax>_operand" "n")))]
15932 "TARGET_XOP"
15933{
ba8011e6
JJ
15934 operands[3]
15935 = GEN_INT (GET_MODE_BITSIZE (<ssescalarmode>mode) - INTVAL (operands[2]));
cbb734aa 15936 return \"vprot<ssemodesuffix>\t{%3, %1, %0|%0, %1, %3}\";
43a8b705
HJ
15937}
15938 [(set_attr "type" "sseishft")
15939 (set_attr "length_immediate" "1")
15940 (set_attr "mode" "TI")])
15941
15942(define_expand "vrotr<mode>3"
82e86dc6
UB
15943 [(match_operand:VI_128 0 "register_operand")
15944 (match_operand:VI_128 1 "register_operand")
15945 (match_operand:VI_128 2 "register_operand")]
43a8b705
HJ
15946 "TARGET_XOP"
15947{
15948 rtx reg = gen_reg_rtx (<MODE>mode);
15949 emit_insn (gen_neg<mode>2 (reg, operands[2]));
15950 emit_insn (gen_xop_vrotl<mode>3 (operands[0], operands[1], reg));
15951 DONE;
15952})
15953
15954(define_expand "vrotl<mode>3"
82e86dc6
UB
15955 [(match_operand:VI_128 0 "register_operand")
15956 (match_operand:VI_128 1 "register_operand")
15957 (match_operand:VI_128 2 "register_operand")]
43a8b705
HJ
15958 "TARGET_XOP"
15959{
15960 emit_insn (gen_xop_vrotl<mode>3 (operands[0], operands[1], operands[2]));
15961 DONE;
15962})
15963
15964(define_insn "xop_vrotl<mode>3"
6bec6c98
UB
15965 [(set (match_operand:VI_128 0 "register_operand" "=x,x")
15966 (if_then_else:VI_128
15967 (ge:VI_128
15968 (match_operand:VI_128 2 "nonimmediate_operand" "x,m")
43a8b705 15969 (const_int 0))
6bec6c98
UB
15970 (rotate:VI_128
15971 (match_operand:VI_128 1 "nonimmediate_operand" "xm,x")
43a8b705 15972 (match_dup 2))
6bec6c98 15973 (rotatert:VI_128
43a8b705 15974 (match_dup 1)
6bec6c98 15975 (neg:VI_128 (match_dup 2)))))]
4926bb1d 15976 "TARGET_XOP && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
cbb734aa 15977 "vprot<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
43a8b705
HJ
15978 [(set_attr "type" "sseishft")
15979 (set_attr "prefix_data16" "0")
15980 (set_attr "prefix_extra" "2")
15981 (set_attr "mode" "TI")])
15982
15983;; XOP packed shift instructions.
43a8b705 15984(define_expand "vlshr<mode>3"
82e86dc6 15985 [(set (match_operand:VI12_128 0 "register_operand")
c4ab64c6 15986 (lshiftrt:VI12_128
82e86dc6
UB
15987 (match_operand:VI12_128 1 "register_operand")
15988 (match_operand:VI12_128 2 "nonimmediate_operand")))]
43a8b705
HJ
15989 "TARGET_XOP"
15990{
15991 rtx neg = gen_reg_rtx (<MODE>mode);
15992 emit_insn (gen_neg<mode>2 (neg, operands[2]));
b99f906a 15993 emit_insn (gen_xop_shl<mode>3 (operands[0], operands[1], neg));
43a8b705
HJ
15994 DONE;
15995})
15996
ee3b466d 15997(define_expand "vlshr<mode>3"
82e86dc6 15998 [(set (match_operand:VI48_128 0 "register_operand")
ee3b466d 15999 (lshiftrt:VI48_128
82e86dc6
UB
16000 (match_operand:VI48_128 1 "register_operand")
16001 (match_operand:VI48_128 2 "nonimmediate_operand")))]
ee3b466d
JJ
16002 "TARGET_AVX2 || TARGET_XOP"
16003{
16004 if (!TARGET_AVX2)
16005 {
16006 rtx neg = gen_reg_rtx (<MODE>mode);
16007 emit_insn (gen_neg<mode>2 (neg, operands[2]));
b99f906a 16008 emit_insn (gen_xop_shl<mode>3 (operands[0], operands[1], neg));
ee3b466d
JJ
16009 DONE;
16010 }
16011})
16012
0fe65b75
AI
16013(define_expand "vlshr<mode>3"
16014 [(set (match_operand:VI48_512 0 "register_operand")
16015 (lshiftrt:VI48_512
16016 (match_operand:VI48_512 1 "register_operand")
16017 (match_operand:VI48_512 2 "nonimmediate_operand")))]
16018 "TARGET_AVX512F")
16019
ee3b466d 16020(define_expand "vlshr<mode>3"
82e86dc6 16021 [(set (match_operand:VI48_256 0 "register_operand")
ee3b466d 16022 (lshiftrt:VI48_256
82e86dc6
UB
16023 (match_operand:VI48_256 1 "register_operand")
16024 (match_operand:VI48_256 2 "nonimmediate_operand")))]
ee3b466d
JJ
16025 "TARGET_AVX2")
16026
b92883d6
IT
16027(define_expand "vashrv8hi3<mask_name>"
16028 [(set (match_operand:V8HI 0 "register_operand")
16029 (ashiftrt:V8HI
16030 (match_operand:V8HI 1 "register_operand")
16031 (match_operand:V8HI 2 "nonimmediate_operand")))]
06ba0585 16032 "TARGET_XOP || (TARGET_AVX512BW && TARGET_AVX512VL)"
43a8b705 16033{
06ba0585
AI
16034 if (TARGET_XOP)
16035 {
b92883d6
IT
16036 rtx neg = gen_reg_rtx (V8HImode);
16037 emit_insn (gen_negv8hi2 (neg, operands[2]));
16038 emit_insn (gen_xop_shav8hi3 (operands[0], operands[1], neg));
06ba0585
AI
16039 DONE;
16040 }
16041})
16042
b92883d6
IT
16043(define_expand "vashrv16qi3"
16044 [(set (match_operand:V16QI 0 "register_operand")
16045 (ashiftrt:V16QI
16046 (match_operand:V16QI 1 "register_operand")
16047 (match_operand:V16QI 2 "nonimmediate_operand")))]
16048 "TARGET_XOP"
16049{
16050 rtx neg = gen_reg_rtx (V16QImode);
16051 emit_insn (gen_negv16qi2 (neg, operands[2]));
16052 emit_insn (gen_xop_shav16qi3 (operands[0], operands[1], neg));
16053 DONE;
16054})
16055
06ba0585
AI
16056(define_expand "vashrv2di3<mask_name>"
16057 [(set (match_operand:V2DI 0 "register_operand")
16058 (ashiftrt:V2DI
16059 (match_operand:V2DI 1 "register_operand")
16060 (match_operand:V2DI 2 "nonimmediate_operand")))]
16061 "TARGET_XOP || TARGET_AVX512VL"
16062{
16063 if (TARGET_XOP)
16064 {
16065 rtx neg = gen_reg_rtx (V2DImode);
16066 emit_insn (gen_negv2di2 (neg, operands[2]));
16067 emit_insn (gen_xop_shav2di3 (operands[0], operands[1], neg));
16068 DONE;
16069 }
43a8b705
HJ
16070})
16071
ee3b466d 16072(define_expand "vashrv4si3"
82e86dc6
UB
16073 [(set (match_operand:V4SI 0 "register_operand")
16074 (ashiftrt:V4SI (match_operand:V4SI 1 "register_operand")
16075 (match_operand:V4SI 2 "nonimmediate_operand")))]
ee3b466d
JJ
16076 "TARGET_AVX2 || TARGET_XOP"
16077{
16078 if (!TARGET_AVX2)
16079 {
16080 rtx neg = gen_reg_rtx (V4SImode);
16081 emit_insn (gen_negv4si2 (neg, operands[2]));
b99f906a 16082 emit_insn (gen_xop_shav4si3 (operands[0], operands[1], neg));
ee3b466d
JJ
16083 DONE;
16084 }
16085})
16086
b868b7ca
AI
16087(define_expand "vashrv16si3"
16088 [(set (match_operand:V16SI 0 "register_operand")
16089 (ashiftrt:V16SI (match_operand:V16SI 1 "register_operand")
16090 (match_operand:V16SI 2 "nonimmediate_operand")))]
16091 "TARGET_AVX512F")
16092
ee3b466d 16093(define_expand "vashrv8si3"
82e86dc6
UB
16094 [(set (match_operand:V8SI 0 "register_operand")
16095 (ashiftrt:V8SI (match_operand:V8SI 1 "register_operand")
16096 (match_operand:V8SI 2 "nonimmediate_operand")))]
ee3b466d
JJ
16097 "TARGET_AVX2")
16098
43a8b705 16099(define_expand "vashl<mode>3"
82e86dc6 16100 [(set (match_operand:VI12_128 0 "register_operand")
c4ab64c6 16101 (ashift:VI12_128
82e86dc6
UB
16102 (match_operand:VI12_128 1 "register_operand")
16103 (match_operand:VI12_128 2 "nonimmediate_operand")))]
43a8b705
HJ
16104 "TARGET_XOP"
16105{
b99f906a 16106 emit_insn (gen_xop_sha<mode>3 (operands[0], operands[1], operands[2]));
43a8b705
HJ
16107 DONE;
16108})
16109
ee3b466d 16110(define_expand "vashl<mode>3"
82e86dc6 16111 [(set (match_operand:VI48_128 0 "register_operand")
ee3b466d 16112 (ashift:VI48_128
82e86dc6
UB
16113 (match_operand:VI48_128 1 "register_operand")
16114 (match_operand:VI48_128 2 "nonimmediate_operand")))]
ee3b466d
JJ
16115 "TARGET_AVX2 || TARGET_XOP"
16116{
16117 if (!TARGET_AVX2)
16118 {
16119 operands[2] = force_reg (<MODE>mode, operands[2]);
b99f906a 16120 emit_insn (gen_xop_sha<mode>3 (operands[0], operands[1], operands[2]));
ee3b466d
JJ
16121 DONE;
16122 }
16123})
16124
0fe65b75
AI
16125(define_expand "vashl<mode>3"
16126 [(set (match_operand:VI48_512 0 "register_operand")
16127 (ashift:VI48_512
16128 (match_operand:VI48_512 1 "register_operand")
16129 (match_operand:VI48_512 2 "nonimmediate_operand")))]
16130 "TARGET_AVX512F")
16131
ee3b466d 16132(define_expand "vashl<mode>3"
82e86dc6 16133 [(set (match_operand:VI48_256 0 "register_operand")
ee3b466d 16134 (ashift:VI48_256
82e86dc6
UB
16135 (match_operand:VI48_256 1 "register_operand")
16136 (match_operand:VI48_256 2 "nonimmediate_operand")))]
ee3b466d
JJ
16137 "TARGET_AVX2")
16138
b99f906a 16139(define_insn "xop_sha<mode>3"
6bec6c98
UB
16140 [(set (match_operand:VI_128 0 "register_operand" "=x,x")
16141 (if_then_else:VI_128
16142 (ge:VI_128
16143 (match_operand:VI_128 2 "nonimmediate_operand" "x,m")
43a8b705 16144 (const_int 0))
6bec6c98
UB
16145 (ashift:VI_128
16146 (match_operand:VI_128 1 "nonimmediate_operand" "xm,x")
43a8b705 16147 (match_dup 2))
6bec6c98 16148 (ashiftrt:VI_128
43a8b705 16149 (match_dup 1)
6bec6c98 16150 (neg:VI_128 (match_dup 2)))))]
4926bb1d 16151 "TARGET_XOP && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
cbb734aa 16152 "vpsha<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
43a8b705
HJ
16153 [(set_attr "type" "sseishft")
16154 (set_attr "prefix_data16" "0")
16155 (set_attr "prefix_extra" "2")
16156 (set_attr "mode" "TI")])
16157
b99f906a 16158(define_insn "xop_shl<mode>3"
6bec6c98
UB
16159 [(set (match_operand:VI_128 0 "register_operand" "=x,x")
16160 (if_then_else:VI_128
16161 (ge:VI_128
16162 (match_operand:VI_128 2 "nonimmediate_operand" "x,m")
43a8b705 16163 (const_int 0))
6bec6c98
UB
16164 (ashift:VI_128
16165 (match_operand:VI_128 1 "nonimmediate_operand" "xm,x")
43a8b705 16166 (match_dup 2))
6bec6c98 16167 (lshiftrt:VI_128
43a8b705 16168 (match_dup 1)
6bec6c98 16169 (neg:VI_128 (match_dup 2)))))]
4926bb1d 16170 "TARGET_XOP && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
cbb734aa 16171 "vpshl<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
43a8b705
HJ
16172 [(set_attr "type" "sseishft")
16173 (set_attr "prefix_data16" "0")
16174 (set_attr "prefix_extra" "2")
16175 (set_attr "mode" "TI")])
16176
2d542a9f 16177(define_expand "<shift_insn><mode>3"
f5db965f
IT
16178 [(set (match_operand:VI1_AVX512 0 "register_operand")
16179 (any_shift:VI1_AVX512
16180 (match_operand:VI1_AVX512 1 "register_operand")
82e86dc6 16181 (match_operand:SI 2 "nonmemory_operand")))]
2d542a9f 16182 "TARGET_SSE2"
43a8b705 16183{
2d542a9f
RH
16184 if (TARGET_XOP && <MODE>mode == V16QImode)
16185 {
16186 bool negate = false;
16187 rtx (*gen) (rtx, rtx, rtx);
16188 rtx tmp, par;
16189 int i;
f327a48e 16190
2d542a9f
RH
16191 if (<CODE> != ASHIFT)
16192 {
16193 if (CONST_INT_P (operands[2]))
16194 operands[2] = GEN_INT (-INTVAL (operands[2]));
16195 else
16196 negate = true;
16197 }
16198 par = gen_rtx_PARALLEL (V16QImode, rtvec_alloc (16));
16199 for (i = 0; i < 16; i++)
16200 XVECEXP (par, 0, i) = operands[2];
43a8b705 16201
2d542a9f
RH
16202 tmp = gen_reg_rtx (V16QImode);
16203 emit_insn (gen_vec_initv16qi (tmp, par));
43a8b705 16204
2d542a9f
RH
16205 if (negate)
16206 emit_insn (gen_negv16qi2 (tmp, tmp));
f327a48e 16207
2d542a9f
RH
16208 gen = (<CODE> == LSHIFTRT ? gen_xop_shlv16qi3 : gen_xop_shav16qi3);
16209 emit_insn (gen (operands[0], operands[1], tmp));
16210 }
43a8b705 16211 else
2d542a9f 16212 ix86_expand_vecop_qihi (<CODE>, operands[0], operands[1], operands[2]);
43a8b705
HJ
16213 DONE;
16214})
16215
16216(define_expand "ashrv2di3"
82e86dc6 16217 [(set (match_operand:V2DI 0 "register_operand")
1162730f 16218 (ashiftrt:V2DI
82e86dc6
UB
16219 (match_operand:V2DI 1 "register_operand")
16220 (match_operand:DI 2 "nonmemory_operand")))]
28e9a294 16221 "TARGET_XOP || TARGET_AVX512VL"
43a8b705 16222{
28e9a294
AI
16223 if (!TARGET_AVX512VL)
16224 {
16225 rtx reg = gen_reg_rtx (V2DImode);
16226 rtx par;
16227 bool negate = false;
16228 int i;
43a8b705 16229
28e9a294
AI
16230 if (CONST_INT_P (operands[2]))
16231 operands[2] = GEN_INT (-INTVAL (operands[2]));
16232 else
16233 negate = true;
f327a48e 16234
28e9a294
AI
16235 par = gen_rtx_PARALLEL (V2DImode, rtvec_alloc (2));
16236 for (i = 0; i < 2; i++)
16237 XVECEXP (par, 0, i) = operands[2];
43a8b705 16238
28e9a294 16239 emit_insn (gen_vec_initv2di (reg, par));
f327a48e 16240
28e9a294
AI
16241 if (negate)
16242 emit_insn (gen_negv2di2 (reg, reg));
f327a48e 16243
28e9a294
AI
16244 emit_insn (gen_xop_shav2di3 (operands[0], operands[1], reg));
16245 DONE;
16246 }
43a8b705
HJ
16247})
16248
16249;; XOP FRCZ support
43a8b705 16250(define_insn "xop_frcz<mode>2"
89509419
RH
16251 [(set (match_operand:FMAMODE 0 "register_operand" "=x")
16252 (unspec:FMAMODE
16253 [(match_operand:FMAMODE 1 "nonimmediate_operand" "xm")]
43a8b705
HJ
16254 UNSPEC_FRCZ))]
16255 "TARGET_XOP"
1c154a23 16256 "vfrcz<ssemodesuffix>\t{%1, %0|%0, %1}"
43a8b705
HJ
16257 [(set_attr "type" "ssecvt1")
16258 (set_attr "mode" "<MODE>")])
16259
89509419 16260(define_expand "xop_vmfrcz<mode>2"
6bec6c98
UB
16261 [(set (match_operand:VF_128 0 "register_operand")
16262 (vec_merge:VF_128
16263 (unspec:VF_128
16264 [(match_operand:VF_128 1 "nonimmediate_operand")]
43a8b705 16265 UNSPEC_FRCZ)
1287ae50 16266 (match_dup 2)
43a8b705
HJ
16267 (const_int 1)))]
16268 "TARGET_XOP"
1287ae50 16269 "operands[2] = CONST0_RTX (<MODE>mode);")
43a8b705 16270
b84acf54 16271(define_insn "*xop_vmfrcz<mode>2"
6bec6c98
UB
16272 [(set (match_operand:VF_128 0 "register_operand" "=x")
16273 (vec_merge:VF_128
16274 (unspec:VF_128
16275 [(match_operand:VF_128 1 "nonimmediate_operand" "xm")]
89509419 16276 UNSPEC_FRCZ)
6bec6c98 16277 (match_operand:VF_128 2 "const0_operand")
89509419 16278 (const_int 1)))]
43a8b705 16279 "TARGET_XOP"
eabb5f48 16280 "vfrcz<ssescalarmodesuffix>\t{%1, %0|%0, %<iptr>1}"
43a8b705
HJ
16281 [(set_attr "type" "ssecvt1")
16282 (set_attr "mode" "<MODE>")])
16283
16284(define_insn "xop_maskcmp<mode>3"
6bec6c98
UB
16285 [(set (match_operand:VI_128 0 "register_operand" "=x")
16286 (match_operator:VI_128 1 "ix86_comparison_int_operator"
16287 [(match_operand:VI_128 2 "register_operand" "x")
16288 (match_operand:VI_128 3 "nonimmediate_operand" "xm")]))]
43a8b705 16289 "TARGET_XOP"
cbb734aa 16290 "vpcom%Y1<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}"
43a8b705
HJ
16291 [(set_attr "type" "sse4arg")
16292 (set_attr "prefix_data16" "0")
16293 (set_attr "prefix_rep" "0")
16294 (set_attr "prefix_extra" "2")
16295 (set_attr "length_immediate" "1")
16296 (set_attr "mode" "TI")])
16297
16298(define_insn "xop_maskcmp_uns<mode>3"
6bec6c98
UB
16299 [(set (match_operand:VI_128 0 "register_operand" "=x")
16300 (match_operator:VI_128 1 "ix86_comparison_uns_operator"
16301 [(match_operand:VI_128 2 "register_operand" "x")
16302 (match_operand:VI_128 3 "nonimmediate_operand" "xm")]))]
43a8b705 16303 "TARGET_XOP"
cbb734aa 16304 "vpcom%Y1u<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}"
43a8b705
HJ
16305 [(set_attr "type" "ssecmp")
16306 (set_attr "prefix_data16" "0")
16307 (set_attr "prefix_rep" "0")
16308 (set_attr "prefix_extra" "2")
16309 (set_attr "length_immediate" "1")
16310 (set_attr "mode" "TI")])
16311
16312;; Version of pcom*u* that is called from the intrinsics that allows pcomequ*
16313;; and pcomneu* not to be converted to the signed ones in case somebody needs
16314;; the exact instruction generated for the intrinsic.
16315(define_insn "xop_maskcmp_uns2<mode>3"
6bec6c98
UB
16316 [(set (match_operand:VI_128 0 "register_operand" "=x")
16317 (unspec:VI_128
16318 [(match_operator:VI_128 1 "ix86_comparison_uns_operator"
16319 [(match_operand:VI_128 2 "register_operand" "x")
16320 (match_operand:VI_128 3 "nonimmediate_operand" "xm")])]
43a8b705
HJ
16321 UNSPEC_XOP_UNSIGNED_CMP))]
16322 "TARGET_XOP"
cbb734aa 16323 "vpcom%Y1u<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}"
43a8b705
HJ
16324 [(set_attr "type" "ssecmp")
16325 (set_attr "prefix_data16" "0")
16326 (set_attr "prefix_extra" "2")
16327 (set_attr "length_immediate" "1")
16328 (set_attr "mode" "TI")])
16329
16330;; Pcomtrue and pcomfalse support. These are useless instructions, but are
16331;; being added here to be complete.
16332(define_insn "xop_pcom_tf<mode>3"
6bec6c98
UB
16333 [(set (match_operand:VI_128 0 "register_operand" "=x")
16334 (unspec:VI_128
16335 [(match_operand:VI_128 1 "register_operand" "x")
16336 (match_operand:VI_128 2 "nonimmediate_operand" "xm")
43a8b705
HJ
16337 (match_operand:SI 3 "const_int_operand" "n")]
16338 UNSPEC_XOP_TRUEFALSE))]
16339 "TARGET_XOP"
16340{
16341 return ((INTVAL (operands[3]) != 0)
cbb734aa
UB
16342 ? "vpcomtrue<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
16343 : "vpcomfalse<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}");
43a8b705
HJ
16344}
16345 [(set_attr "type" "ssecmp")
16346 (set_attr "prefix_data16" "0")
16347 (set_attr "prefix_extra" "2")
16348 (set_attr "length_immediate" "1")
16349 (set_attr "mode" "TI")])
16350
02edd2f6 16351(define_insn "xop_vpermil2<mode>3"
b86f6e9e
AI
16352 [(set (match_operand:VF_128_256 0 "register_operand" "=x")
16353 (unspec:VF_128_256
16354 [(match_operand:VF_128_256 1 "register_operand" "x")
16355 (match_operand:VF_128_256 2 "nonimmediate_operand" "%x")
cbb734aa 16356 (match_operand:<sseintvecmode> 3 "nonimmediate_operand" "xm")
02edd2f6
SP
16357 (match_operand:SI 4 "const_0_to_3_operand" "n")]
16358 UNSPEC_VPERMIL2))]
16359 "TARGET_XOP"
1c154a23 16360 "vpermil2<ssemodesuffix>\t{%4, %3, %2, %1, %0|%0, %1, %2, %3, %4}"
02edd2f6
SP
16361 [(set_attr "type" "sse4arg")
16362 (set_attr "length_immediate" "1")
16363 (set_attr "mode" "<MODE>")])
16364
43a8b705 16365;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
95879c72 16366
8b96a312 16367(define_insn "aesenc"
5e60198b
UB
16368 [(set (match_operand:V2DI 0 "register_operand" "=x,x")
16369 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0,x")
16370 (match_operand:V2DI 2 "nonimmediate_operand" "xm,xm")]
8b96a312
L
16371 UNSPEC_AESENC))]
16372 "TARGET_AES"
5e60198b
UB
16373 "@
16374 aesenc\t{%2, %0|%0, %2}
16375 vaesenc\t{%2, %1, %0|%0, %1, %2}"
16376 [(set_attr "isa" "noavx,avx")
16377 (set_attr "type" "sselog1")
725fd454 16378 (set_attr "prefix_extra" "1")
5e60198b 16379 (set_attr "prefix" "orig,vex")
01284895 16380 (set_attr "btver2_decode" "double,double")
95879c72
L
16381 (set_attr "mode" "TI")])
16382
8b96a312 16383(define_insn "aesenclast"
5e60198b
UB
16384 [(set (match_operand:V2DI 0 "register_operand" "=x,x")
16385 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0,x")
16386 (match_operand:V2DI 2 "nonimmediate_operand" "xm,xm")]
8b96a312
L
16387 UNSPEC_AESENCLAST))]
16388 "TARGET_AES"
5e60198b
UB
16389 "@
16390 aesenclast\t{%2, %0|%0, %2}
16391 vaesenclast\t{%2, %1, %0|%0, %1, %2}"
16392 [(set_attr "isa" "noavx,avx")
16393 (set_attr "type" "sselog1")
725fd454 16394 (set_attr "prefix_extra" "1")
5e60198b 16395 (set_attr "prefix" "orig,vex")
01284895 16396 (set_attr "btver2_decode" "double,double")
95879c72
L
16397 (set_attr "mode" "TI")])
16398
8b96a312 16399(define_insn "aesdec"
5e60198b
UB
16400 [(set (match_operand:V2DI 0 "register_operand" "=x,x")
16401 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0,x")
16402 (match_operand:V2DI 2 "nonimmediate_operand" "xm,xm")]
8b96a312
L
16403 UNSPEC_AESDEC))]
16404 "TARGET_AES"
5e60198b
UB
16405 "@
16406 aesdec\t{%2, %0|%0, %2}
16407 vaesdec\t{%2, %1, %0|%0, %1, %2}"
16408 [(set_attr "isa" "noavx,avx")
16409 (set_attr "type" "sselog1")
725fd454 16410 (set_attr "prefix_extra" "1")
5e60198b 16411 (set_attr "prefix" "orig,vex")
01284895 16412 (set_attr "btver2_decode" "double,double")
95879c72
L
16413 (set_attr "mode" "TI")])
16414
8b96a312 16415(define_insn "aesdeclast"
5e60198b
UB
16416 [(set (match_operand:V2DI 0 "register_operand" "=x,x")
16417 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0,x")
16418 (match_operand:V2DI 2 "nonimmediate_operand" "xm,xm")]
8b96a312
L
16419 UNSPEC_AESDECLAST))]
16420 "TARGET_AES"
5e60198b
UB
16421 "@
16422 aesdeclast\t{%2, %0|%0, %2}
16423 vaesdeclast\t{%2, %1, %0|%0, %1, %2}"
16424 [(set_attr "isa" "noavx,avx")
16425 (set_attr "type" "sselog1")
8b96a312 16426 (set_attr "prefix_extra" "1")
5e60198b 16427 (set_attr "prefix" "orig,vex")
01284895 16428 (set_attr "btver2_decode" "double,double")
8b96a312
L
16429 (set_attr "mode" "TI")])
16430
16431(define_insn "aesimc"
16432 [(set (match_operand:V2DI 0 "register_operand" "=x")
16433 (unspec:V2DI [(match_operand:V2DI 1 "nonimmediate_operand" "xm")]
16434 UNSPEC_AESIMC))]
16435 "TARGET_AES"
95879c72 16436 "%vaesimc\t{%1, %0|%0, %1}"
8b96a312
L
16437 [(set_attr "type" "sselog1")
16438 (set_attr "prefix_extra" "1")
95879c72 16439 (set_attr "prefix" "maybe_vex")
8b96a312
L
16440 (set_attr "mode" "TI")])
16441
16442(define_insn "aeskeygenassist"
16443 [(set (match_operand:V2DI 0 "register_operand" "=x")
16444 (unspec:V2DI [(match_operand:V2DI 1 "nonimmediate_operand" "xm")
16445 (match_operand:SI 2 "const_0_to_255_operand" "n")]
16446 UNSPEC_AESKEYGENASSIST))]
16447 "TARGET_AES"
95879c72 16448 "%vaeskeygenassist\t{%2, %1, %0|%0, %1, %2}"
8b96a312
L
16449 [(set_attr "type" "sselog1")
16450 (set_attr "prefix_extra" "1")
725fd454 16451 (set_attr "length_immediate" "1")
95879c72 16452 (set_attr "prefix" "maybe_vex")
8b96a312
L
16453 (set_attr "mode" "TI")])
16454
16455(define_insn "pclmulqdq"
5e60198b
UB
16456 [(set (match_operand:V2DI 0 "register_operand" "=x,x")
16457 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0,x")
16458 (match_operand:V2DI 2 "nonimmediate_operand" "xm,xm")
16459 (match_operand:SI 3 "const_0_to_255_operand" "n,n")]
8b96a312
L
16460 UNSPEC_PCLMUL))]
16461 "TARGET_PCLMUL"
5e60198b
UB
16462 "@
16463 pclmulqdq\t{%3, %2, %0|%0, %2, %3}
16464 vpclmulqdq\t{%3, %2, %1, %0|%0, %1, %2, %3}"
16465 [(set_attr "isa" "noavx,avx")
16466 (set_attr "type" "sselog1")
8b96a312 16467 (set_attr "prefix_extra" "1")
725fd454 16468 (set_attr "length_immediate" "1")
5e60198b 16469 (set_attr "prefix" "orig,vex")
8b96a312 16470 (set_attr "mode" "TI")])
95879c72
L
16471
16472(define_expand "avx_vzeroall"
16473 [(match_par_dup 0 [(const_int 0)])]
16474 "TARGET_AVX"
16475{
16476 int nregs = TARGET_64BIT ? 16 : 8;
16477 int regno;
16478
16479 operands[0] = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (nregs + 1));
16480
16481 XVECEXP (operands[0], 0, 0)
16482 = gen_rtx_UNSPEC_VOLATILE (VOIDmode, gen_rtvec (1, const0_rtx),
16483 UNSPECV_VZEROALL);
16484
16485 for (regno = 0; regno < nregs; regno++)
16486 XVECEXP (operands[0], 0, regno + 1)
f7df4a84 16487 = gen_rtx_SET (gen_rtx_REG (V8SImode, SSE_REGNO (regno)),
95879c72
L
16488 CONST0_RTX (V8SImode));
16489})
16490
16491(define_insn "*avx_vzeroall"
16492 [(match_parallel 0 "vzeroall_operation"
85b1d1bd 16493 [(unspec_volatile [(const_int 0)] UNSPECV_VZEROALL)])]
95879c72
L
16494 "TARGET_AVX"
16495 "vzeroall"
16496 [(set_attr "type" "sse")
725fd454 16497 (set_attr "modrm" "0")
95879c72
L
16498 (set_attr "memory" "none")
16499 (set_attr "prefix" "vex")
01284895 16500 (set_attr "btver2_decode" "vector")
95879c72
L
16501 (set_attr "mode" "OI")])
16502
2767a7f2
L
16503;; Clear the upper 128bits of AVX registers, equivalent to a NOP
16504;; if the upper 128bits are unused.
16505(define_insn "avx_vzeroupper"
ff97910d 16506 [(unspec_volatile [(const_int 0)] UNSPECV_VZEROUPPER)]
85b1d1bd 16507 "TARGET_AVX"
95879c72
L
16508 "vzeroupper"
16509 [(set_attr "type" "sse")
725fd454 16510 (set_attr "modrm" "0")
95879c72
L
16511 (set_attr "memory" "none")
16512 (set_attr "prefix" "vex")
01284895 16513 (set_attr "btver2_decode" "vector")
95879c72
L
16514 (set_attr "mode" "OI")])
16515
977e83a3
KY
16516(define_insn "avx2_pbroadcast<mode>"
16517 [(set (match_operand:VI 0 "register_operand" "=x")
16518 (vec_duplicate:VI
16519 (vec_select:<ssescalarmode>
a9ccbba2 16520 (match_operand:<ssexmmmode> 1 "nonimmediate_operand" "xm")
977e83a3
KY
16521 (parallel [(const_int 0)]))))]
16522 "TARGET_AVX2"
eabb5f48 16523 "vpbroadcast<ssemodesuffix>\t{%1, %0|%0, %<iptr>1}"
977e83a3
KY
16524 [(set_attr "type" "ssemov")
16525 (set_attr "prefix_extra" "1")
16526 (set_attr "prefix" "vex")
16527 (set_attr "mode" "<sseinsnmode>")])
16528
6945a32e 16529(define_insn "avx2_pbroadcast<mode>_1"
eabb5f48 16530 [(set (match_operand:VI_256 0 "register_operand" "=x,x")
6945a32e
JJ
16531 (vec_duplicate:VI_256
16532 (vec_select:<ssescalarmode>
eabb5f48 16533 (match_operand:VI_256 1 "nonimmediate_operand" "m,x")
6945a32e
JJ
16534 (parallel [(const_int 0)]))))]
16535 "TARGET_AVX2"
eabb5f48
UB
16536 "@
16537 vpbroadcast<ssemodesuffix>\t{%1, %0|%0, %<iptr>1}
16538 vpbroadcast<ssemodesuffix>\t{%x1, %0|%0, %x1}"
6945a32e
JJ
16539 [(set_attr "type" "ssemov")
16540 (set_attr "prefix_extra" "1")
16541 (set_attr "prefix" "vex")
16542 (set_attr "mode" "<sseinsnmode>")])
16543
cf92ae7f 16544(define_insn "<avx2_avx512>_permvar<mode><mask_name>"
3c87b77b
AI
16545 [(set (match_operand:VI48F_256_512 0 "register_operand" "=v")
16546 (unspec:VI48F_256_512
16547 [(match_operand:VI48F_256_512 1 "nonimmediate_operand" "vm")
16548 (match_operand:<sseintvecmode> 2 "register_operand" "v")]
2ff5ea2d 16549 UNSPEC_VPERMVAR))]
47490470
AI
16550 "TARGET_AVX2 && <mask_mode512bit_condition>"
16551 "vperm<ssemodesuffix>\t{%1, %2, %0<mask_operand3>|%0<mask_operand3>, %2, %1}"
977e83a3 16552 [(set_attr "type" "sselog")
47490470 16553 (set_attr "prefix" "<mask_prefix2>")
3c87b77b 16554 (set_attr "mode" "<sseinsnmode>")])
977e83a3 16555
3dcc8af5
IT
16556(define_insn "<avx512>_permvar<mode><mask_name>"
16557 [(set (match_operand:VI1_AVX512VL 0 "register_operand" "=v")
16558 (unspec:VI1_AVX512VL
16559 [(match_operand:VI1_AVX512VL 1 "nonimmediate_operand" "vm")
16560 (match_operand:<sseintvecmode> 2 "register_operand" "v")]
16561 UNSPEC_VPERMVAR))]
16562 "TARGET_AVX512VBMI && <mask_mode512bit_condition>"
16563 "vperm<ssemodesuffix>\t{%1, %2, %0<mask_operand3>|%0<mask_operand3>, %2, %1}"
16564 [(set_attr "type" "sselog")
16565 (set_attr "prefix" "<mask_prefix2>")
16566 (set_attr "mode" "<sseinsnmode>")])
16567
cf92ae7f
AI
16568(define_insn "<avx512>_permvar<mode><mask_name>"
16569 [(set (match_operand:VI2_AVX512VL 0 "register_operand" "=v")
16570 (unspec:VI2_AVX512VL
16571 [(match_operand:VI2_AVX512VL 1 "nonimmediate_operand" "vm")
16572 (match_operand:<sseintvecmode> 2 "register_operand" "v")]
16573 UNSPEC_VPERMVAR))]
16574 "TARGET_AVX512BW && <mask_mode512bit_condition>"
16575 "vperm<ssemodesuffix>\t{%1, %2, %0<mask_operand3>|%0<mask_operand3>, %2, %1}"
16576 [(set_attr "type" "sselog")
16577 (set_attr "prefix" "<mask_prefix2>")
16578 (set_attr "mode" "<sseinsnmode>")])
16579
e2a2165d 16580(define_expand "<avx2_avx512>_perm<mode>"
16821545
AI
16581 [(match_operand:VI8F_256_512 0 "register_operand")
16582 (match_operand:VI8F_256_512 1 "nonimmediate_operand")
82e86dc6 16583 (match_operand:SI 2 "const_0_to_255_operand")]
0c7189ae
JJ
16584 "TARGET_AVX2"
16585{
16586 int mask = INTVAL (operands[2]);
e2a2165d 16587 emit_insn (gen_<avx2_avx512>_perm<mode>_1 (operands[0], operands[1],
47490470
AI
16588 GEN_INT ((mask >> 0) & 3),
16589 GEN_INT ((mask >> 2) & 3),
16590 GEN_INT ((mask >> 4) & 3),
16591 GEN_INT ((mask >> 6) & 3)));
16592 DONE;
16593})
16594
e2a2165d
AI
16595(define_expand "<avx512>_perm<mode>_mask"
16596 [(match_operand:VI8F_256_512 0 "register_operand")
16597 (match_operand:VI8F_256_512 1 "nonimmediate_operand")
47490470 16598 (match_operand:SI 2 "const_0_to_255_operand")
e2a2165d 16599 (match_operand:VI8F_256_512 3 "vector_move_operand")
47490470
AI
16600 (match_operand:<avx512fmaskmode> 4 "register_operand")]
16601 "TARGET_AVX512F"
16602{
16603 int mask = INTVAL (operands[2]);
e2a2165d 16604 emit_insn (gen_<avx2_avx512>_perm<mode>_1_mask (operands[0], operands[1],
47490470
AI
16605 GEN_INT ((mask >> 0) & 3),
16606 GEN_INT ((mask >> 2) & 3),
16607 GEN_INT ((mask >> 4) & 3),
16608 GEN_INT ((mask >> 6) & 3),
16609 operands[3], operands[4]));
0c7189ae
JJ
16610 DONE;
16611})
16612
e2a2165d 16613(define_insn "<avx2_avx512>_perm<mode>_1<mask_name>"
16821545
AI
16614 [(set (match_operand:VI8F_256_512 0 "register_operand" "=v")
16615 (vec_select:VI8F_256_512
16616 (match_operand:VI8F_256_512 1 "nonimmediate_operand" "vm")
82e86dc6
UB
16617 (parallel [(match_operand 2 "const_0_to_3_operand")
16618 (match_operand 3 "const_0_to_3_operand")
16619 (match_operand 4 "const_0_to_3_operand")
16620 (match_operand 5 "const_0_to_3_operand")])))]
47490470 16621 "TARGET_AVX2 && <mask_mode512bit_condition>"
0c7189ae
JJ
16622{
16623 int mask = 0;
16624 mask |= INTVAL (operands[2]) << 0;
16625 mask |= INTVAL (operands[3]) << 2;
16626 mask |= INTVAL (operands[4]) << 4;
16627 mask |= INTVAL (operands[5]) << 6;
16628 operands[2] = GEN_INT (mask);
47490470 16629 return "vperm<ssemodesuffix>\t{%2, %1, %0<mask_operand6>|%0<mask_operand6>, %1, %2}";
0c7189ae 16630}
977e83a3 16631 [(set_attr "type" "sselog")
47490470 16632 (set_attr "prefix" "<mask_prefix2>")
b8227739 16633 (set_attr "mode" "<sseinsnmode>")])
977e83a3
KY
16634
16635(define_insn "avx2_permv2ti"
16636 [(set (match_operand:V4DI 0 "register_operand" "=x")
16637 (unspec:V4DI
16638 [(match_operand:V4DI 1 "register_operand" "x")
0c7189ae 16639 (match_operand:V4DI 2 "nonimmediate_operand" "xm")
977e83a3
KY
16640 (match_operand:SI 3 "const_0_to_255_operand" "n")]
16641 UNSPEC_VPERMTI))]
16642 "TARGET_AVX2"
16643 "vperm2i128\t{%3, %2, %1, %0|%0, %1, %2, %3}"
16644 [(set_attr "type" "sselog")
16645 (set_attr "prefix" "vex")
16646 (set_attr "mode" "OI")])
16647
16648(define_insn "avx2_vec_dupv4df"
16649 [(set (match_operand:V4DF 0 "register_operand" "=x")
16650 (vec_duplicate:V4DF
16651 (vec_select:DF
16652 (match_operand:V2DF 1 "register_operand" "x")
16653 (parallel [(const_int 0)]))))]
16654 "TARGET_AVX2"
16655 "vbroadcastsd\t{%1, %0|%0, %1}"
16656 [(set_attr "type" "sselog1")
16657 (set_attr "prefix" "vex")
16658 (set_attr "mode" "V4DF")])
16659
b92883d6
IT
16660(define_insn "<avx512>_vec_dup<mode>_1"
16661 [(set (match_operand:VI_AVX512BW 0 "register_operand" "=v,v")
16662 (vec_duplicate:VI_AVX512BW
16663 (vec_select:VI_AVX512BW
16664 (match_operand:VI_AVX512BW 1 "nonimmediate_operand" "v,m")
16665 (parallel [(const_int 0)]))))]
16666 "TARGET_AVX512F"
16667 "vpbroadcast<ssemodesuffix>\t{%1, %0|%0, %<iptr>1}
16668 vpbroadcast<ssemodesuffix>\t{%x1, %0|%0, %x1}"
16669 [(set_attr "type" "ssemov")
16670 (set_attr "prefix" "evex")
16671 (set_attr "mode" "<sseinsnmode>")])
16672
51e14b05
AI
16673(define_insn "<avx512>_vec_dup<mode><mask_name>"
16674 [(set (match_operand:V48_AVX512VL 0 "register_operand" "=v")
16675 (vec_duplicate:V48_AVX512VL
ab931c71
AI
16676 (vec_select:<ssescalarmode>
16677 (match_operand:<ssexmmmode> 1 "nonimmediate_operand" "vm")
16678 (parallel [(const_int 0)]))))]
16679 "TARGET_AVX512F"
47490470 16680 "v<sseintprefix>broadcast<bcstscalarsuff>\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
ab931c71
AI
16681 [(set_attr "type" "ssemov")
16682 (set_attr "prefix" "evex")
16683 (set_attr "mode" "<sseinsnmode>")])
16684
51e14b05
AI
16685(define_insn "<avx512>_vec_dup<mode><mask_name>"
16686 [(set (match_operand:VI12_AVX512VL 0 "register_operand" "=v")
16687 (vec_duplicate:VI12_AVX512VL
16688 (vec_select:<ssescalarmode>
16689 (match_operand:<ssexmmmode> 1 "nonimmediate_operand" "vm")
16690 (parallel [(const_int 0)]))))]
16691 "TARGET_AVX512BW"
16692 "vpbroadcast<bcstscalarsuff>\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
16693 [(set_attr "type" "ssemov")
16694 (set_attr "prefix" "evex")
16695 (set_attr "mode" "<sseinsnmode>")])
16696
47490470 16697(define_insn "<mask_codefor>avx512f_broadcast<mode><mask_name>"
2e2206fa
AI
16698 [(set (match_operand:V16FI 0 "register_operand" "=v,v")
16699 (vec_duplicate:V16FI
16700 (match_operand:<ssexmmmode> 1 "nonimmediate_operand" "v,m")))]
16701 "TARGET_AVX512F"
16702 "@
47490470
AI
16703 vshuf<shuffletype>32x4\t{$0x0, %g1, %g1, %0<mask_operand2>|%0<mask_operand2>, %g1, %g1, 0x0}
16704 vbroadcast<shuffletype>32x4\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
2e2206fa
AI
16705 [(set_attr "type" "ssemov")
16706 (set_attr "prefix" "evex")
16707 (set_attr "mode" "<sseinsnmode>")])
16708
47490470 16709(define_insn "<mask_codefor>avx512f_broadcast<mode><mask_name>"
2e2206fa
AI
16710 [(set (match_operand:V8FI 0 "register_operand" "=v,v")
16711 (vec_duplicate:V8FI
16712 (match_operand:<ssehalfvecmode> 1 "nonimmediate_operand" "v,m")))]
16713 "TARGET_AVX512F"
16714 "@
47490470
AI
16715 vshuf<shuffletype>64x2\t{$0x44, %g1, %g1, %0<mask_operand2>|%0<mask_operand2>, %g1, %g1, 0x44}
16716 vbroadcast<shuffletype>64x4\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
2e2206fa
AI
16717 [(set_attr "type" "ssemov")
16718 (set_attr "prefix" "evex")
16719 (set_attr "mode" "<sseinsnmode>")])
16720
51e14b05 16721(define_insn "<mask_codefor><avx512>_vec_dup_gpr<mode><mask_name>"
092444af 16722 [(set (match_operand:VI12_AVX512VL 0 "register_operand" "=v,v")
51e14b05 16723 (vec_duplicate:VI12_AVX512VL
092444af 16724 (match_operand:<ssescalarmode> 1 "nonimmediate_operand" "vm,r")))]
51e14b05 16725 "TARGET_AVX512BW"
092444af
JJ
16726 "@
16727 vpbroadcast<bcstscalarsuff>\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}
16728 vpbroadcast<bcstscalarsuff>\t{%k1, %0<mask_operand2>|%0<mask_operand2>, %k1}"
51e14b05
AI
16729 [(set_attr "type" "ssemov")
16730 (set_attr "prefix" "evex")
16731 (set_attr "mode" "<sseinsnmode>")])
16732
16733(define_insn "<mask_codefor><avx512>_vec_dup_gpr<mode><mask_name>"
092444af 16734 [(set (match_operand:V48_AVX512VL 0 "register_operand" "=v,v")
51e14b05 16735 (vec_duplicate:V48_AVX512VL
092444af 16736 (match_operand:<ssescalarmode> 1 "nonimmediate_operand" "vm,r")))]
ab931c71 16737 "TARGET_AVX512F"
47490470 16738 "v<sseintprefix>broadcast<bcstscalarsuff>\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
ab931c71
AI
16739 [(set_attr "type" "ssemov")
16740 (set_attr "prefix" "evex")
092444af
JJ
16741 (set_attr "mode" "<sseinsnmode>")
16742 (set (attr "enabled")
16743 (if_then_else (eq_attr "alternative" "1")
16744 (symbol_ref "GET_MODE_CLASS (<ssescalarmode>mode) == MODE_INT
16745 && (<ssescalarmode>mode != DImode || TARGET_64BIT)")
16746 (const_int 1)))])
ab931c71 16747
092444af
JJ
16748(define_insn "vec_dupv4sf"
16749 [(set (match_operand:V4SF 0 "register_operand" "=x,x,x")
16750 (vec_duplicate:V4SF
16751 (match_operand:SF 1 "nonimmediate_operand" "x,m,0")))]
16752 "TARGET_SSE"
16753 "@
16754 vshufps\t{$0, %1, %1, %0|%0, %1, %1, 0}
16755 vbroadcastss\t{%1, %0|%0, %1}
16756 shufps\t{$0, %0, %0|%0, %0, 0}"
16757 [(set_attr "isa" "avx,avx,noavx")
16758 (set_attr "type" "sseshuf1,ssemov,sseshuf1")
16759 (set_attr "length_immediate" "1,0,1")
16760 (set_attr "prefix_extra" "0,1,*")
16761 (set_attr "prefix" "vex,vex,orig")
16762 (set_attr "mode" "V4SF")])
16763
16764(define_insn "*vec_dupv4si"
16765 [(set (match_operand:V4SI 0 "register_operand" "=x,x,x")
16766 (vec_duplicate:V4SI
16767 (match_operand:SI 1 "nonimmediate_operand" " x,m,0")))]
16768 "TARGET_SSE"
16769 "@
16770 %vpshufd\t{$0, %1, %0|%0, %1, 0}
16771 vbroadcastss\t{%1, %0|%0, %1}
16772 shufps\t{$0, %0, %0|%0, %0, 0}"
16773 [(set_attr "isa" "sse2,avx,noavx")
16774 (set_attr "type" "sselog1,ssemov,sselog1")
16775 (set_attr "length_immediate" "1,0,1")
16776 (set_attr "prefix_extra" "0,1,*")
16777 (set_attr "prefix" "maybe_vex,vex,orig")
16778 (set_attr "mode" "TI,V4SF,V4SF")])
16779
16780(define_insn "*vec_dupv2di"
16781 [(set (match_operand:V2DI 0 "register_operand" "=x,x,x,x")
16782 (vec_duplicate:V2DI
16783 (match_operand:DI 1 "nonimmediate_operand" " 0,x,m,0")))]
16784 "TARGET_SSE"
16785 "@
16786 punpcklqdq\t%0, %0
16787 vpunpcklqdq\t{%d1, %0|%0, %d1}
16788 %vmovddup\t{%1, %0|%0, %1}
16789 movlhps\t%0, %0"
16790 [(set_attr "isa" "sse2_noavx,avx,sse3,noavx")
16791 (set_attr "type" "sselog1,sselog1,sselog1,ssemov")
16792 (set_attr "prefix" "orig,vex,maybe_vex,orig")
16793 (set_attr "mode" "TI,TI,DF,V4SF")])
51e14b05 16794
977e83a3
KY
16795(define_insn "avx2_vbroadcasti128_<mode>"
16796 [(set (match_operand:VI_256 0 "register_operand" "=x")
16797 (vec_concat:VI_256
16798 (match_operand:<ssehalfvecmode> 1 "memory_operand" "m")
16799 (match_dup 1)))]
16800 "TARGET_AVX2"
16801 "vbroadcasti128\t{%1, %0|%0, %1}"
16802 [(set_attr "type" "ssemov")
16803 (set_attr "prefix_extra" "1")
16804 (set_attr "prefix" "vex")
16805 (set_attr "mode" "OI")])
16806
7d9f1cd2
JJ
16807;; Modes handled by AVX vec_dup patterns.
16808(define_mode_iterator AVX_VEC_DUP_MODE
16809 [V8SI V8SF V4DI V4DF])
16810;; Modes handled by AVX2 vec_dup patterns.
16811(define_mode_iterator AVX2_VEC_DUP_MODE
16812 [V32QI V16QI V16HI V8HI V8SI V4SI])
16813
16814(define_insn "*vec_dup<mode>"
a0d8720a 16815 [(set (match_operand:AVX2_VEC_DUP_MODE 0 "register_operand" "=x,x,Yi")
7d9f1cd2 16816 (vec_duplicate:AVX2_VEC_DUP_MODE
d1457701 16817 (match_operand:<ssescalarmode> 1 "nonimmediate_operand" "m,x,$r")))]
7d9f1cd2
JJ
16818 "TARGET_AVX2"
16819 "@
16820 v<sseintprefix>broadcast<bcstscalarsuff>\t{%1, %0|%0, %1}
16821 v<sseintprefix>broadcast<bcstscalarsuff>\t{%x1, %0|%0, %x1}
16822 #"
16823 [(set_attr "type" "ssemov")
16824 (set_attr "prefix_extra" "1")
16825 (set_attr "prefix" "maybe_evex")
16826 (set_attr "mode" "<sseinsnmode>")])
16827
16828(define_insn "vec_dup<mode>"
16829 [(set (match_operand:AVX_VEC_DUP_MODE 0 "register_operand" "=x,x,v,x")
16830 (vec_duplicate:AVX_VEC_DUP_MODE
16831 (match_operand:<ssescalarmode> 1 "nonimmediate_operand" "m,m,v,?x")))]
16832 "TARGET_AVX"
16833 "@
16834 v<sseintprefix>broadcast<bcstscalarsuff>\t{%1, %0|%0, %1}
16835 vbroadcast<ssescalarmodesuffix>\t{%1, %0|%0, %1}
16836 v<sseintprefix>broadcast<bcstscalarsuff>\t{%x1, %0|%0, %x1}
16837 #"
16838 [(set_attr "type" "ssemov")
16839 (set_attr "prefix_extra" "1")
16840 (set_attr "prefix" "maybe_evex")
16841 (set_attr "isa" "avx2,noavx2,avx2,noavx2")
16842 (set_attr "mode" "<sseinsnmode>,V8SF,<sseinsnmode>,V8SF")])
16843
16844(define_split
16845 [(set (match_operand:AVX2_VEC_DUP_MODE 0 "register_operand")
16846 (vec_duplicate:AVX2_VEC_DUP_MODE
16847 (match_operand:<ssescalarmode> 1 "register_operand")))]
092444af
JJ
16848 "TARGET_AVX2
16849 /* Disable this splitter if avx512vl_vec_dup_gprv*[qhs]i insn is
16850 available, because then we can broadcast from GPRs directly.
16851 For V*[QH]I modes it requires both -mavx512vl and -mavx512bw,
16852 for V*SI mode it requires just -mavx512vl. */
16853 && !(TARGET_AVX512VL
16854 && (TARGET_AVX512BW || <ssescalarmode>mode == SImode))
16855 && reload_completed && GENERAL_REG_P (operands[1])"
7d9f1cd2
JJ
16856 [(const_int 0)]
16857{
16858 emit_insn (gen_vec_setv4si_0 (gen_lowpart (V4SImode, operands[0]),
16859 CONST0_RTX (V4SImode),
16860 gen_lowpart (SImode, operands[1])));
16861 emit_insn (gen_avx2_pbroadcast<mode> (operands[0],
16862 gen_lowpart (<ssexmmmode>mode,
16863 operands[0])));
16864 DONE;
16865})
16866
8dfb9f16 16867(define_split
82e86dc6 16868 [(set (match_operand:AVX_VEC_DUP_MODE 0 "register_operand")
9ee65b55 16869 (vec_duplicate:AVX_VEC_DUP_MODE
82e86dc6 16870 (match_operand:<ssescalarmode> 1 "register_operand")))]
6945a32e 16871 "TARGET_AVX && !TARGET_AVX2 && reload_completed"
9ee65b55
UB
16872 [(set (match_dup 2)
16873 (vec_duplicate:<ssehalfvecmode> (match_dup 1)))
16874 (set (match_dup 0)
16875 (vec_concat:AVX_VEC_DUP_MODE (match_dup 2) (match_dup 2)))]
cbb734aa 16876 "operands[2] = gen_rtx_REG (<ssehalfvecmode>mode, REGNO (operands[0]));")
8dfb9f16 16877
5e04b3b6 16878(define_insn "avx_vbroadcastf128_<mode>"
6bec6c98
UB
16879 [(set (match_operand:V_256 0 "register_operand" "=x,x,x")
16880 (vec_concat:V_256
cbb734aa 16881 (match_operand:<ssehalfvecmode> 1 "nonimmediate_operand" "m,0,?x")
5e04b3b6
RH
16882 (match_dup 1)))]
16883 "TARGET_AVX"
16884 "@
1db4406e
JJ
16885 vbroadcast<i128>\t{%1, %0|%0, %1}
16886 vinsert<i128>\t{$1, %1, %0, %0|%0, %0, %1, 1}
16887 vperm2<i128>\t{$0, %t1, %t1, %0|%0, %t1, %t1, 0}"
5e04b3b6
RH
16888 [(set_attr "type" "ssemov,sselog1,sselog1")
16889 (set_attr "prefix_extra" "1")
16890 (set_attr "length_immediate" "0,1,1")
16891 (set_attr "prefix" "vex")
1db4406e 16892 (set_attr "mode" "<sseinsnmode>")])
5e04b3b6 16893
698ea04f
AI
16894;; For broadcast[i|f]32x2. Yes there is no v4sf version, only v4si.
16895(define_mode_iterator VI4F_BRCST32x2
16896 [V16SI (V8SI "TARGET_AVX512VL") (V4SI "TARGET_AVX512VL")
16897 V16SF (V8SF "TARGET_AVX512VL")])
16898
16899(define_mode_attr 64x2mode
16900 [(V8DF "V2DF") (V8DI "V2DI") (V4DI "V2DI") (V4DF "V2DF")])
16901
16902(define_mode_attr 32x2mode
16903 [(V16SF "V2SF") (V16SI "V2SI") (V8SI "V2SI")
16904 (V8SF "V2SF") (V4SI "V2SI")])
16905
16906(define_insn "<mask_codefor>avx512dq_broadcast<mode><mask_name>"
16907 [(set (match_operand:VI4F_BRCST32x2 0 "register_operand" "=v")
16908 (vec_duplicate:VI4F_BRCST32x2
16909 (vec_select:<32x2mode>
16910 (match_operand:<ssexmmmode> 1 "nonimmediate_operand" "vm")
16911 (parallel [(const_int 0) (const_int 1)]))))]
16912 "TARGET_AVX512DQ"
16913 "vbroadcast<shuffletype>32x2\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
16914 [(set_attr "type" "ssemov")
16915 (set_attr "prefix_extra" "1")
16916 (set_attr "prefix" "evex")
16917 (set_attr "mode" "<sseinsnmode>")])
16918
16919(define_insn "<mask_codefor>avx512vl_broadcast<mode><mask_name>_1"
16920 [(set (match_operand:VI4F_256 0 "register_operand" "=v,v")
16921 (vec_duplicate:VI4F_256
16922 (match_operand:<ssexmmmode> 1 "nonimmediate_operand" "v,m")))]
16923 "TARGET_AVX512VL"
16924 "@
16925 vshuf<shuffletype>32x4\t{$0x0, %t1, %t1, %0<mask_operand2>|%0<mask_operand2>, %t1, %t1, 0x0}
16926 vbroadcast<shuffletype>32x4\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
16927 [(set_attr "type" "ssemov")
16928 (set_attr "prefix_extra" "1")
16929 (set_attr "prefix" "evex")
16930 (set_attr "mode" "<sseinsnmode>")])
16931
16932(define_insn "<mask_codefor>avx512dq_broadcast<mode><mask_name>_1"
16933 [(set (match_operand:V16FI 0 "register_operand" "=v,v")
16934 (vec_duplicate:V16FI
16935 (match_operand:<ssehalfvecmode> 1 "nonimmediate_operand" "v,m")))]
16936 "TARGET_AVX512DQ"
16937 "@
16938 vshuf<shuffletype>32x4\t{$0x44, %g1, %g1, %0<mask_operand2>|%0<mask_operand2>, %g1, %g1, 0x44}
16939 vbroadcast<shuffletype>32x8\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
16940 [(set_attr "type" "ssemov")
16941 (set_attr "prefix_extra" "1")
16942 (set_attr "prefix" "evex")
16943 (set_attr "mode" "<sseinsnmode>")])
16944
16945;; For broadcast[i|f]64x2
16946(define_mode_iterator VI8F_BRCST64x2
16947 [V8DI V8DF (V4DI "TARGET_AVX512VL") (V4DF "TARGET_AVX512VL")])
16948
16949(define_insn "<mask_codefor>avx512dq_broadcast<mode><mask_name>_1"
16950 [(set (match_operand:VI8F_BRCST64x2 0 "register_operand" "=v,v")
16951 (vec_duplicate:VI8F_BRCST64x2
16952 (match_operand:<64x2mode> 1 "nonimmediate_operand" "v,m")))]
16953 "TARGET_AVX512DQ"
16954 "@
4854de0d 16955 vshuf<shuffletype>64x2\t{$0x0, %<concat_tg_mode>1, %<concat_tg_mode>1, %0<mask_operand2>|%0<mask_operand2>, %<concat_tg_mode>1, %<concat_tg_mode>1, 0x0}
698ea04f
AI
16956 vbroadcast<shuffletype>64x2\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
16957 [(set_attr "type" "ssemov")
16958 (set_attr "prefix_extra" "1")
16959 (set_attr "prefix" "evex")
16960 (set_attr "mode" "<sseinsnmode>")])
16961
98725d44
AI
16962(define_insn "avx512cd_maskb_vec_dup<mode>"
16963 [(set (match_operand:VI8_AVX512VL 0 "register_operand" "=v")
16964 (vec_duplicate:VI8_AVX512VL
c003c6d6 16965 (zero_extend:DI
be792bce 16966 (match_operand:QI 1 "register_operand" "Yk"))))]
c003c6d6
AI
16967 "TARGET_AVX512CD"
16968 "vpbroadcastmb2q\t{%1, %0|%0, %1}"
16969 [(set_attr "type" "mskmov")
16970 (set_attr "prefix" "evex")
16971 (set_attr "mode" "XI")])
16972
21c924ac
AI
16973(define_insn "avx512cd_maskw_vec_dup<mode>"
16974 [(set (match_operand:VI4_AVX512VL 0 "register_operand" "=v")
16975 (vec_duplicate:VI4_AVX512VL
c003c6d6 16976 (zero_extend:SI
be792bce 16977 (match_operand:HI 1 "register_operand" "Yk"))))]
c003c6d6
AI
16978 "TARGET_AVX512CD"
16979 "vpbroadcastmw2d\t{%1, %0|%0, %1}"
16980 [(set_attr "type" "mskmov")
16981 (set_attr "prefix" "evex")
16982 (set_attr "mode" "XI")])
16983
5e04b3b6
RH
16984;; Recognize broadcast as a vec_select as produced by builtin_vec_perm.
16985;; If it so happens that the input is in memory, use vbroadcast.
16986;; Otherwise use vpermilp (and in the case of 256-bit modes, vperm2f128).
16987(define_insn "*avx_vperm_broadcast_v4sf"
16988 [(set (match_operand:V4SF 0 "register_operand" "=x,x,x")
16989 (vec_select:V4SF
16990 (match_operand:V4SF 1 "nonimmediate_operand" "m,o,x")
16991 (match_parallel 2 "avx_vbroadcast_operand"
16992 [(match_operand 3 "const_int_operand" "C,n,n")])))]
16993 "TARGET_AVX"
16994{
16995 int elt = INTVAL (operands[3]);
16996 switch (which_alternative)
16997 {
16998 case 0:
16999 case 1:
17000 operands[1] = adjust_address_nv (operands[1], SFmode, elt * 4);
eabb5f48 17001 return "vbroadcastss\t{%1, %0|%0, %k1}";
5e04b3b6
RH
17002 case 2:
17003 operands[2] = GEN_INT (elt * 0x55);
17004 return "vpermilps\t{%2, %1, %0|%0, %1, %2}";
17005 default:
17006 gcc_unreachable ();
17007 }
17008}
17009 [(set_attr "type" "ssemov,ssemov,sselog1")
17010 (set_attr "prefix_extra" "1")
17011 (set_attr "length_immediate" "0,0,1")
17012 (set_attr "prefix" "vex")
17013 (set_attr "mode" "SF,SF,V4SF")])
17014
17015(define_insn_and_split "*avx_vperm_broadcast_<mode>"
6bec6c98
UB
17016 [(set (match_operand:VF_256 0 "register_operand" "=x,x,x")
17017 (vec_select:VF_256
17018 (match_operand:VF_256 1 "nonimmediate_operand" "m,o,?x")
5e04b3b6
RH
17019 (match_parallel 2 "avx_vbroadcast_operand"
17020 [(match_operand 3 "const_int_operand" "C,n,n")])))]
17021 "TARGET_AVX"
17022 "#"
6945a32e 17023 "&& reload_completed && (<MODE>mode != V4DFmode || !TARGET_AVX2)"
6bec6c98 17024 [(set (match_dup 0) (vec_duplicate:VF_256 (match_dup 1)))]
5e04b3b6
RH
17025{
17026 rtx op0 = operands[0], op1 = operands[1];
17027 int elt = INTVAL (operands[3]);
17028
17029 if (REG_P (op1))
17030 {
17031 int mask;
17032
6945a32e
JJ
17033 if (TARGET_AVX2 && elt == 0)
17034 {
17035 emit_insn (gen_vec_dup<mode> (op0, gen_lowpart (<ssescalarmode>mode,
17036 op1)));
17037 DONE;
17038 }
17039
5e04b3b6
RH
17040 /* Shuffle element we care about into all elements of the 128-bit lane.
17041 The other lane gets shuffled too, but we don't care. */
17042 if (<MODE>mode == V4DFmode)
17043 mask = (elt & 1 ? 15 : 0);
17044 else
17045 mask = (elt & 3) * 0x55;
17046 emit_insn (gen_avx_vpermil<mode> (op0, op1, GEN_INT (mask)));
17047
17048 /* Shuffle the lane we care about into both lanes of the dest. */
17049 mask = (elt / (<ssescalarnum> / 2)) * 0x11;
17050 emit_insn (gen_avx_vperm2f128<mode>3 (op0, op0, op0, GEN_INT (mask)));
17051 DONE;
17052 }
17053
0b013847
UB
17054 operands[1] = adjust_address (op1, <ssescalarmode>mode,
17055 elt * GET_MODE_SIZE (<ssescalarmode>mode));
5e04b3b6
RH
17056})
17057
47490470 17058(define_expand "<sse2_avx_avx512f>_vpermil<mode><mask_name>"
82e86dc6 17059 [(set (match_operand:VF2 0 "register_operand")
6bec6c98 17060 (vec_select:VF2
82e86dc6
UB
17061 (match_operand:VF2 1 "nonimmediate_operand")
17062 (match_operand:SI 2 "const_0_to_255_operand")))]
47490470 17063 "TARGET_AVX && <mask_mode512bit_condition>"
8a67ca92
RH
17064{
17065 int mask = INTVAL (operands[2]);
17066 rtx perm[<ssescalarnum>];
17067
ec5e777c
AI
17068 int i;
17069 for (i = 0; i < <ssescalarnum>; i = i + 2)
8a67ca92 17070 {
ec5e777c
AI
17071 perm[i] = GEN_INT (((mask >> i) & 1) + i);
17072 perm[i + 1] = GEN_INT (((mask >> (i + 1)) & 1) + i);
8a67ca92
RH
17073 }
17074
17075 operands[2]
17076 = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (<ssescalarnum>, perm));
17077})
17078
47490470 17079(define_expand "<sse2_avx_avx512f>_vpermil<mode><mask_name>"
82e86dc6 17080 [(set (match_operand:VF1 0 "register_operand")
6bec6c98 17081 (vec_select:VF1
82e86dc6
UB
17082 (match_operand:VF1 1 "nonimmediate_operand")
17083 (match_operand:SI 2 "const_0_to_255_operand")))]
47490470 17084 "TARGET_AVX && <mask_mode512bit_condition>"
8a67ca92
RH
17085{
17086 int mask = INTVAL (operands[2]);
17087 rtx perm[<ssescalarnum>];
17088
a9ccbba2
AI
17089 int i;
17090 for (i = 0; i < <ssescalarnum>; i = i + 4)
8a67ca92 17091 {
a9ccbba2
AI
17092 perm[i] = GEN_INT (((mask >> 0) & 3) + i);
17093 perm[i + 1] = GEN_INT (((mask >> 2) & 3) + i);
17094 perm[i + 2] = GEN_INT (((mask >> 4) & 3) + i);
17095 perm[i + 3] = GEN_INT (((mask >> 6) & 3) + i);
8a67ca92
RH
17096 }
17097
17098 operands[2]
17099 = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (<ssescalarnum>, perm));
17100})
17101
47490470 17102(define_insn "*<sse2_avx_avx512f>_vpermilp<mode><mask_name>"
3f97cb0b 17103 [(set (match_operand:VF 0 "register_operand" "=v")
6bec6c98 17104 (vec_select:VF
3f97cb0b 17105 (match_operand:VF 1 "nonimmediate_operand" "vm")
200eb7d2 17106 (match_parallel 2 ""
82e86dc6 17107 [(match_operand 3 "const_int_operand")])))]
47490470 17108 "TARGET_AVX && <mask_mode512bit_condition>
200eb7d2 17109 && avx_vpermilp_parallel (operands[2], <MODE>mode)"
8a67ca92
RH
17110{
17111 int mask = avx_vpermilp_parallel (operands[2], <MODE>mode) - 1;
17112 operands[2] = GEN_INT (mask);
47490470 17113 return "vpermil<ssemodesuffix>\t{%2, %1, %0<mask_operand4>|%0<mask_operand4>, %1, %2}";
8a67ca92 17114}
95879c72 17115 [(set_attr "type" "sselog")
725fd454
JJ
17116 (set_attr "prefix_extra" "1")
17117 (set_attr "length_immediate" "1")
47490470 17118 (set_attr "prefix" "<mask_prefix>")
b86f6e9e 17119 (set_attr "mode" "<sseinsnmode>")])
95879c72 17120
47490470 17121(define_insn "<sse2_avx_avx512f>_vpermilvar<mode>3<mask_name>"
3f97cb0b 17122 [(set (match_operand:VF 0 "register_operand" "=v")
6bec6c98 17123 (unspec:VF
3f97cb0b
AI
17124 [(match_operand:VF 1 "register_operand" "v")
17125 (match_operand:<sseintvecmode> 2 "nonimmediate_operand" "vm")]
95879c72 17126 UNSPEC_VPERMIL))]
47490470
AI
17127 "TARGET_AVX && <mask_mode512bit_condition>"
17128 "vpermil<ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
95879c72 17129 [(set_attr "type" "sselog")
725fd454 17130 (set_attr "prefix_extra" "1")
01284895 17131 (set_attr "btver2_decode" "vector")
47490470 17132 (set_attr "prefix" "<mask_prefix>")
b86f6e9e
AI
17133 (set_attr "mode" "<sseinsnmode>")])
17134
c883e5fb
AI
17135(define_expand "<avx512>_vpermi2var<mode>3_maskz"
17136 [(match_operand:VI48F 0 "register_operand" "=v")
17137 (match_operand:VI48F 1 "register_operand" "v")
8b08db1e 17138 (match_operand:<sseintvecmode> 2 "register_operand" "0")
c883e5fb 17139 (match_operand:VI48F 3 "nonimmediate_operand" "vm")
be792bce 17140 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")]
8b08db1e
AI
17141 "TARGET_AVX512F"
17142{
c883e5fb 17143 emit_insn (gen_<avx512>_vpermi2var<mode>3_maskz_1 (
8b08db1e
AI
17144 operands[0], operands[1], operands[2], operands[3],
17145 CONST0_RTX (<MODE>mode), operands[4]));
17146 DONE;
17147})
17148
3dcc8af5
IT
17149(define_expand "<avx512>_vpermi2var<mode>3_maskz"
17150 [(match_operand:VI1_AVX512VL 0 "register_operand")
17151 (match_operand:VI1_AVX512VL 1 "register_operand")
17152 (match_operand:<sseintvecmode> 2 "register_operand")
17153 (match_operand:VI1_AVX512VL 3 "nonimmediate_operand")
17154 (match_operand:<avx512fmaskmode> 4 "register_operand")]
17155 "TARGET_AVX512VBMI"
17156{
17157 emit_insn (gen_<avx512>_vpermi2var<mode>3_maskz_1 (
17158 operands[0], operands[1], operands[2], operands[3],
17159 CONST0_RTX (<MODE>mode), operands[4]));
17160 DONE;
17161})
17162
c883e5fb
AI
17163(define_expand "<avx512>_vpermi2var<mode>3_maskz"
17164 [(match_operand:VI2_AVX512VL 0 "register_operand" "=v")
17165 (match_operand:VI2_AVX512VL 1 "register_operand" "v")
17166 (match_operand:<sseintvecmode> 2 "register_operand" "0")
17167 (match_operand:VI2_AVX512VL 3 "nonimmediate_operand" "vm")
17168 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")]
17169 "TARGET_AVX512BW"
17170{
17171 emit_insn (gen_<avx512>_vpermi2var<mode>3_maskz_1 (
17172 operands[0], operands[1], operands[2], operands[3],
17173 CONST0_RTX (<MODE>mode), operands[4]));
17174 DONE;
17175})
17176
17177(define_insn "<avx512>_vpermi2var<mode>3<sd_maskz_name>"
17178 [(set (match_operand:VI48F 0 "register_operand" "=v")
17179 (unspec:VI48F
17180 [(match_operand:VI48F 1 "register_operand" "v")
ab931c71 17181 (match_operand:<sseintvecmode> 2 "register_operand" "0")
c883e5fb 17182 (match_operand:VI48F 3 "nonimmediate_operand" "vm")]
ab931c71
AI
17183 UNSPEC_VPERMI2))]
17184 "TARGET_AVX512F"
8b08db1e 17185 "vpermi2<ssemodesuffix>\t{%3, %1, %0<sd_mask_op4>|%0<sd_mask_op4>, %1, %3}"
ab931c71
AI
17186 [(set_attr "type" "sselog")
17187 (set_attr "prefix" "evex")
17188 (set_attr "mode" "<sseinsnmode>")])
17189
3dcc8af5
IT
17190(define_insn "<avx512>_vpermi2var<mode>3<sd_maskz_name>"
17191 [(set (match_operand:VI1_AVX512VL 0 "register_operand" "=v")
17192 (unspec:VI1_AVX512VL
17193 [(match_operand:VI1_AVX512VL 1 "register_operand" "v")
17194 (match_operand:<sseintvecmode> 2 "register_operand" "0")
17195 (match_operand:VI1_AVX512VL 3 "nonimmediate_operand" "vm")]
17196 UNSPEC_VPERMI2))]
17197 "TARGET_AVX512VBMI"
17198 "vpermi2<ssemodesuffix>\t{%3, %1, %0<sd_mask_op4>|%0<sd_mask_op4>, %1, %3}"
17199 [(set_attr "type" "sselog")
17200 (set_attr "prefix" "evex")
17201 (set_attr "mode" "<sseinsnmode>")])
17202
c883e5fb
AI
17203(define_insn "<avx512>_vpermi2var<mode>3<sd_maskz_name>"
17204 [(set (match_operand:VI2_AVX512VL 0 "register_operand" "=v")
17205 (unspec:VI2_AVX512VL
17206 [(match_operand:VI2_AVX512VL 1 "register_operand" "v")
17207 (match_operand:<sseintvecmode> 2 "register_operand" "0")
17208 (match_operand:VI2_AVX512VL 3 "nonimmediate_operand" "vm")]
17209 UNSPEC_VPERMI2))]
17210 "TARGET_AVX512BW"
17211 "vpermi2<ssemodesuffix>\t{%3, %1, %0<sd_mask_op4>|%0<sd_mask_op4>, %1, %3}"
17212 [(set_attr "type" "sselog")
17213 (set_attr "prefix" "evex")
17214 (set_attr "mode" "<sseinsnmode>")])
17215
17216(define_insn "<avx512>_vpermi2var<mode>3_mask"
17217 [(set (match_operand:VI48F 0 "register_operand" "=v")
17218 (vec_merge:VI48F
17219 (unspec:VI48F
17220 [(match_operand:VI48F 1 "register_operand" "v")
47490470 17221 (match_operand:<sseintvecmode> 2 "register_operand" "0")
c883e5fb 17222 (match_operand:VI48F 3 "nonimmediate_operand" "vm")]
47490470
AI
17223 UNSPEC_VPERMI2_MASK)
17224 (match_dup 0)
be792bce 17225 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")))]
47490470
AI
17226 "TARGET_AVX512F"
17227 "vpermi2<ssemodesuffix>\t{%3, %1, %0%{%4%}|%0%{%4%}, %1, %3}"
17228 [(set_attr "type" "sselog")
17229 (set_attr "prefix" "evex")
17230 (set_attr "mode" "<sseinsnmode>")])
17231
3dcc8af5
IT
17232(define_insn "<avx512>_vpermi2var<mode>3_mask"
17233 [(set (match_operand:VI1_AVX512VL 0 "register_operand" "=v")
17234 (vec_merge:VI1_AVX512VL
17235 (unspec:VI1_AVX512VL
17236 [(match_operand:VI1_AVX512VL 1 "register_operand" "v")
17237 (match_operand:<sseintvecmode> 2 "register_operand" "0")
17238 (match_operand:VI1_AVX512VL 3 "nonimmediate_operand" "vm")]
17239 UNSPEC_VPERMI2_MASK)
17240 (match_dup 0)
17241 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")))]
17242 "TARGET_AVX512VBMI"
17243 "vpermi2<ssemodesuffix>\t{%3, %1, %0%{%4%}|%0%{%4%}, %1, %3}"
17244 [(set_attr "type" "sselog")
17245 (set_attr "prefix" "evex")
17246 (set_attr "mode" "<sseinsnmode>")])
17247
c883e5fb
AI
17248(define_insn "<avx512>_vpermi2var<mode>3_mask"
17249 [(set (match_operand:VI2_AVX512VL 0 "register_operand" "=v")
17250 (vec_merge:VI2_AVX512VL
17251 (unspec:VI2_AVX512VL
17252 [(match_operand:VI2_AVX512VL 1 "register_operand" "v")
17253 (match_operand:<sseintvecmode> 2 "register_operand" "0")
17254 (match_operand:VI2_AVX512VL 3 "nonimmediate_operand" "vm")]
17255 UNSPEC_VPERMI2_MASK)
17256 (match_dup 0)
17257 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")))]
17258 "TARGET_AVX512BW"
17259 "vpermi2<ssemodesuffix>\t{%3, %1, %0%{%4%}|%0%{%4%}, %1, %3}"
17260 [(set_attr "type" "sselog")
17261 (set_attr "prefix" "evex")
17262 (set_attr "mode" "<sseinsnmode>")])
17263
17264(define_expand "<avx512>_vpermt2var<mode>3_maskz"
17265 [(match_operand:VI48F 0 "register_operand" "=v")
8b08db1e 17266 (match_operand:<sseintvecmode> 1 "register_operand" "v")
c883e5fb
AI
17267 (match_operand:VI48F 2 "register_operand" "0")
17268 (match_operand:VI48F 3 "nonimmediate_operand" "vm")
be792bce 17269 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")]
8b08db1e
AI
17270 "TARGET_AVX512F"
17271{
c883e5fb 17272 emit_insn (gen_<avx512>_vpermt2var<mode>3_maskz_1 (
8b08db1e
AI
17273 operands[0], operands[1], operands[2], operands[3],
17274 CONST0_RTX (<MODE>mode), operands[4]));
17275 DONE;
17276})
17277
3dcc8af5
IT
17278(define_expand "<avx512>_vpermt2var<mode>3_maskz"
17279 [(match_operand:VI1_AVX512VL 0 "register_operand" "=v")
17280 (match_operand:<sseintvecmode> 1 "register_operand" "v")
17281 (match_operand:VI1_AVX512VL 2 "register_operand" "0")
17282 (match_operand:VI1_AVX512VL 3 "nonimmediate_operand" "vm")
17283 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")]
17284 "TARGET_AVX512VBMI"
17285{
17286 emit_insn (gen_<avx512>_vpermt2var<mode>3_maskz_1 (
17287 operands[0], operands[1], operands[2], operands[3],
17288 CONST0_RTX (<MODE>mode), operands[4]));
17289 DONE;
17290})
17291
c883e5fb
AI
17292(define_expand "<avx512>_vpermt2var<mode>3_maskz"
17293 [(match_operand:VI2_AVX512VL 0 "register_operand" "=v")
17294 (match_operand:<sseintvecmode> 1 "register_operand" "v")
17295 (match_operand:VI2_AVX512VL 2 "register_operand" "0")
17296 (match_operand:VI2_AVX512VL 3 "nonimmediate_operand" "vm")
17297 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")]
17298 "TARGET_AVX512BW"
17299{
17300 emit_insn (gen_<avx512>_vpermt2var<mode>3_maskz_1 (
17301 operands[0], operands[1], operands[2], operands[3],
17302 CONST0_RTX (<MODE>mode), operands[4]));
17303 DONE;
17304})
17305
17306(define_insn "<avx512>_vpermt2var<mode>3<sd_maskz_name>"
17307 [(set (match_operand:VI48F 0 "register_operand" "=v")
17308 (unspec:VI48F
ab931c71 17309 [(match_operand:<sseintvecmode> 1 "register_operand" "v")
c883e5fb
AI
17310 (match_operand:VI48F 2 "register_operand" "0")
17311 (match_operand:VI48F 3 "nonimmediate_operand" "vm")]
ab931c71
AI
17312 UNSPEC_VPERMT2))]
17313 "TARGET_AVX512F"
8b08db1e 17314 "vpermt2<ssemodesuffix>\t{%3, %1, %0<sd_mask_op4>|%0<sd_mask_op4>, %1, %3}"
ab931c71
AI
17315 [(set_attr "type" "sselog")
17316 (set_attr "prefix" "evex")
17317 (set_attr "mode" "<sseinsnmode>")])
95879c72 17318
3dcc8af5
IT
17319(define_insn "<avx512>_vpermt2var<mode>3<sd_maskz_name>"
17320 [(set (match_operand:VI1_AVX512VL 0 "register_operand" "=v")
17321 (unspec:VI1_AVX512VL
17322 [(match_operand:<sseintvecmode> 1 "register_operand" "v")
17323 (match_operand:VI1_AVX512VL 2 "register_operand" "0")
17324 (match_operand:VI1_AVX512VL 3 "nonimmediate_operand" "vm")]
17325 UNSPEC_VPERMT2))]
17326 "TARGET_AVX512VBMI"
17327 "vpermt2<ssemodesuffix>\t{%3, %1, %0<sd_mask_op4>|%0<sd_mask_op4>, %1, %3}"
17328 [(set_attr "type" "sselog")
17329 (set_attr "prefix" "evex")
17330 (set_attr "mode" "<sseinsnmode>")])
17331
c883e5fb
AI
17332(define_insn "<avx512>_vpermt2var<mode>3<sd_maskz_name>"
17333 [(set (match_operand:VI2_AVX512VL 0 "register_operand" "=v")
17334 (unspec:VI2_AVX512VL
17335 [(match_operand:<sseintvecmode> 1 "register_operand" "v")
17336 (match_operand:VI2_AVX512VL 2 "register_operand" "0")
17337 (match_operand:VI2_AVX512VL 3 "nonimmediate_operand" "vm")]
17338 UNSPEC_VPERMT2))]
17339 "TARGET_AVX512BW"
17340 "vpermt2<ssemodesuffix>\t{%3, %1, %0<sd_mask_op4>|%0<sd_mask_op4>, %1, %3}"
17341 [(set_attr "type" "sselog")
17342 (set_attr "prefix" "evex")
17343 (set_attr "mode" "<sseinsnmode>")])
17344
17345(define_insn "<avx512>_vpermt2var<mode>3_mask"
17346 [(set (match_operand:VI48F 0 "register_operand" "=v")
17347 (vec_merge:VI48F
17348 (unspec:VI48F
47490470 17349 [(match_operand:<sseintvecmode> 1 "register_operand" "v")
c883e5fb
AI
17350 (match_operand:VI48F 2 "register_operand" "0")
17351 (match_operand:VI48F 3 "nonimmediate_operand" "vm")]
47490470
AI
17352 UNSPEC_VPERMT2)
17353 (match_dup 2)
be792bce 17354 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")))]
47490470
AI
17355 "TARGET_AVX512F"
17356 "vpermt2<ssemodesuffix>\t{%3, %1, %0%{%4%}|%0%{%4%}, %1, %3}"
17357 [(set_attr "type" "sselog")
17358 (set_attr "prefix" "evex")
17359 (set_attr "mode" "<sseinsnmode>")])
17360
3dcc8af5
IT
17361(define_insn "<avx512>_vpermt2var<mode>3_mask"
17362 [(set (match_operand:VI1_AVX512VL 0 "register_operand" "=v")
17363 (vec_merge:VI1_AVX512VL
17364 (unspec:VI1_AVX512VL
17365 [(match_operand:<sseintvecmode> 1 "register_operand" "v")
17366 (match_operand:VI1_AVX512VL 2 "register_operand" "0")
17367 (match_operand:VI1_AVX512VL 3 "nonimmediate_operand" "vm")]
17368 UNSPEC_VPERMT2)
17369 (match_dup 2)
17370 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")))]
17371 "TARGET_AVX512VBMI"
17372 "vpermt2<ssemodesuffix>\t{%3, %1, %0%{%4%}|%0%{%4%}, %1, %3}"
17373 [(set_attr "type" "sselog")
17374 (set_attr "prefix" "evex")
17375 (set_attr "mode" "<sseinsnmode>")])
17376
c883e5fb
AI
17377(define_insn "<avx512>_vpermt2var<mode>3_mask"
17378 [(set (match_operand:VI2_AVX512VL 0 "register_operand" "=v")
17379 (vec_merge:VI2_AVX512VL
17380 (unspec:VI2_AVX512VL
17381 [(match_operand:<sseintvecmode> 1 "register_operand" "v")
17382 (match_operand:VI2_AVX512VL 2 "register_operand" "0")
17383 (match_operand:VI2_AVX512VL 3 "nonimmediate_operand" "vm")]
17384 UNSPEC_VPERMT2)
17385 (match_dup 2)
17386 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")))]
17387 "TARGET_AVX512BW"
17388 "vpermt2<ssemodesuffix>\t{%3, %1, %0%{%4%}|%0%{%4%}, %1, %3}"
17389 [(set_attr "type" "sselog")
17390 (set_attr "prefix" "evex")
17391 (set_attr "mode" "<sseinsnmode>")])
17392
ca659f6e 17393(define_expand "avx_vperm2f128<mode>3"
82e86dc6 17394 [(set (match_operand:AVX256MODE2P 0 "register_operand")
ca659f6e 17395 (unspec:AVX256MODE2P
82e86dc6
UB
17396 [(match_operand:AVX256MODE2P 1 "register_operand")
17397 (match_operand:AVX256MODE2P 2 "nonimmediate_operand")
17398 (match_operand:SI 3 "const_0_to_255_operand")]
ca659f6e
RH
17399 UNSPEC_VPERMIL2F128))]
17400 "TARGET_AVX"
17401{
a1b5171b 17402 int mask = INTVAL (operands[3]);
ca659f6e
RH
17403 if ((mask & 0x88) == 0)
17404 {
17405 rtx perm[<ssescalarnum>], t1, t2;
17406 int i, base, nelt = <ssescalarnum>, nelt2 = nelt / 2;
17407
17408 base = (mask & 3) * nelt2;
17409 for (i = 0; i < nelt2; ++i)
17410 perm[i] = GEN_INT (base + i);
17411
17412 base = ((mask >> 4) & 3) * nelt2;
17413 for (i = 0; i < nelt2; ++i)
17414 perm[i + nelt2] = GEN_INT (base + i);
17415
cbb734aa 17416 t2 = gen_rtx_VEC_CONCAT (<ssedoublevecmode>mode,
ca659f6e
RH
17417 operands[1], operands[2]);
17418 t1 = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (nelt, perm));
17419 t2 = gen_rtx_VEC_SELECT (<MODE>mode, t2, t1);
f7df4a84 17420 t2 = gen_rtx_SET (operands[0], t2);
ca659f6e
RH
17421 emit_insn (t2);
17422 DONE;
17423 }
17424})
17425
17426;; Note that bits 7 and 3 of the imm8 allow lanes to be zeroed, which
17427;; means that in order to represent this properly in rtl we'd have to
17428;; nest *another* vec_concat with a zero operand and do the select from
17429;; a 4x wide vector. That doesn't seem very nice.
17430(define_insn "*avx_vperm2f128<mode>_full"
95879c72
L
17431 [(set (match_operand:AVX256MODE2P 0 "register_operand" "=x")
17432 (unspec:AVX256MODE2P
17433 [(match_operand:AVX256MODE2P 1 "register_operand" "x")
17434 (match_operand:AVX256MODE2P 2 "nonimmediate_operand" "xm")
17435 (match_operand:SI 3 "const_0_to_255_operand" "n")]
17436 UNSPEC_VPERMIL2F128))]
17437 "TARGET_AVX"
1db4406e 17438 "vperm2<i128>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
95879c72 17439 [(set_attr "type" "sselog")
725fd454
JJ
17440 (set_attr "prefix_extra" "1")
17441 (set_attr "length_immediate" "1")
95879c72 17442 (set_attr "prefix" "vex")
1db4406e 17443 (set_attr "mode" "<sseinsnmode>")])
95879c72 17444
ca659f6e
RH
17445(define_insn "*avx_vperm2f128<mode>_nozero"
17446 [(set (match_operand:AVX256MODE2P 0 "register_operand" "=x")
17447 (vec_select:AVX256MODE2P
cbb734aa 17448 (vec_concat:<ssedoublevecmode>
ca659f6e
RH
17449 (match_operand:AVX256MODE2P 1 "register_operand" "x")
17450 (match_operand:AVX256MODE2P 2 "nonimmediate_operand" "xm"))
200eb7d2 17451 (match_parallel 3 ""
82e86dc6 17452 [(match_operand 4 "const_int_operand")])))]
200eb7d2
UB
17453 "TARGET_AVX
17454 && avx_vperm2f128_parallel (operands[3], <MODE>mode)"
ca659f6e
RH
17455{
17456 int mask = avx_vperm2f128_parallel (operands[3], <MODE>mode) - 1;
5d54daac
JJ
17457 if (mask == 0x12)
17458 return "vinsert<i128>\t{$0, %x2, %1, %0|%0, %1, %x2, 0}";
17459 if (mask == 0x20)
17460 return "vinsert<i128>\t{$1, %x2, %1, %0|%0, %1, %x2, 1}";
ca659f6e 17461 operands[3] = GEN_INT (mask);
1db4406e 17462 return "vperm2<i128>\t{%3, %2, %1, %0|%0, %1, %2, %3}";
ca659f6e
RH
17463}
17464 [(set_attr "type" "sselog")
17465 (set_attr "prefix_extra" "1")
17466 (set_attr "length_immediate" "1")
17467 (set_attr "prefix" "vex")
1db4406e 17468 (set_attr "mode" "<sseinsnmode>")])
ca659f6e 17469
edbb0749
ES
17470(define_insn "*ssse3_palignr<mode>_perm"
17471 [(set (match_operand:V_128 0 "register_operand" "=x,x")
17472 (vec_select:V_128
17473 (match_operand:V_128 1 "register_operand" "0,x")
17474 (match_parallel 2 "palignr_operand"
17475 [(match_operand 3 "const_int_operand" "n, n")])))]
17476 "TARGET_SSSE3"
17477{
ef4bddc2 17478 machine_mode imode = GET_MODE_INNER (GET_MODE (operands[0]));
edbb0749
ES
17479 operands[2] = GEN_INT (INTVAL (operands[3]) * GET_MODE_SIZE (imode));
17480
17481 switch (which_alternative)
17482 {
17483 case 0:
17484 return "palignr\t{%2, %1, %0|%0, %1, %2}";
17485 case 1:
17486 return "vpalignr\t{%2, %1, %1, %0|%0, %1, %1, %2}";
17487 default:
17488 gcc_unreachable ();
17489 }
17490}
17491 [(set_attr "isa" "noavx,avx")
17492 (set_attr "type" "sseishft")
17493 (set_attr "atom_unit" "sishuf")
17494 (set_attr "prefix_data16" "1,*")
17495 (set_attr "prefix_extra" "1")
17496 (set_attr "length_immediate" "1")
17497 (set_attr "prefix" "orig,vex")])
17498
d0337ddc
AI
17499(define_expand "avx512vl_vinsert<mode>"
17500 [(match_operand:VI48F_256 0 "register_operand")
17501 (match_operand:VI48F_256 1 "register_operand")
17502 (match_operand:<ssehalfvecmode> 2 "nonimmediate_operand")
17503 (match_operand:SI 3 "const_0_to_1_operand")
17504 (match_operand:VI48F_256 4 "register_operand")
17505 (match_operand:<avx512fmaskmode> 5 "register_operand")]
17506 "TARGET_AVX512VL"
17507{
17508 rtx (*insn)(rtx, rtx, rtx, rtx, rtx);
17509
17510 switch (INTVAL (operands[3]))
17511 {
17512 case 0:
17513 insn = gen_vec_set_lo_<mode>_mask;
17514 break;
17515 case 1:
17516 insn = gen_vec_set_hi_<mode>_mask;
17517 break;
17518 default:
17519 gcc_unreachable ();
17520 }
17521
17522 emit_insn (insn (operands[0], operands[1], operands[2], operands[4],
17523 operands[5]));
17524 DONE;
17525})
17526
95879c72 17527(define_expand "avx_vinsertf128<mode>"
82e86dc6
UB
17528 [(match_operand:V_256 0 "register_operand")
17529 (match_operand:V_256 1 "register_operand")
17530 (match_operand:<ssehalfvecmode> 2 "nonimmediate_operand")
17531 (match_operand:SI 3 "const_0_to_1_operand")]
95879c72
L
17532 "TARGET_AVX"
17533{
16cc4440
UB
17534 rtx (*insn)(rtx, rtx, rtx);
17535
95879c72
L
17536 switch (INTVAL (operands[3]))
17537 {
17538 case 0:
16cc4440 17539 insn = gen_vec_set_lo_<mode>;
95879c72
L
17540 break;
17541 case 1:
16cc4440 17542 insn = gen_vec_set_hi_<mode>;
95879c72
L
17543 break;
17544 default:
17545 gcc_unreachable ();
17546 }
16cc4440
UB
17547
17548 emit_insn (insn (operands[0], operands[1], operands[2]));
95879c72
L
17549 DONE;
17550})
17551
d0337ddc
AI
17552(define_insn "vec_set_lo_<mode><mask_name>"
17553 [(set (match_operand:VI8F_256 0 "register_operand" "=v")
6bec6c98 17554 (vec_concat:VI8F_256
d0337ddc 17555 (match_operand:<ssehalfvecmode> 2 "nonimmediate_operand" "vm")
cbb734aa 17556 (vec_select:<ssehalfvecmode>
d0337ddc 17557 (match_operand:VI8F_256 1 "register_operand" "v")
95879c72
L
17558 (parallel [(const_int 2) (const_int 3)]))))]
17559 "TARGET_AVX"
d0337ddc
AI
17560{
17561 if (TARGET_AVX512VL)
17562 return "vinsert<shuffletype>64x2\t{$0x0, %2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2, 0x0}";
17563 else
17564 return "vinsert<i128>\t{$0x0, %2, %1, %0|%0, %1, %2, 0x0}";
17565}
95879c72 17566 [(set_attr "type" "sselog")
725fd454
JJ
17567 (set_attr "prefix_extra" "1")
17568 (set_attr "length_immediate" "1")
95879c72 17569 (set_attr "prefix" "vex")
1db4406e 17570 (set_attr "mode" "<sseinsnmode>")])
95879c72 17571
d0337ddc
AI
17572(define_insn "vec_set_hi_<mode><mask_name>"
17573 [(set (match_operand:VI8F_256 0 "register_operand" "=v")
6bec6c98 17574 (vec_concat:VI8F_256
cbb734aa 17575 (vec_select:<ssehalfvecmode>
d0337ddc 17576 (match_operand:VI8F_256 1 "register_operand" "v")
95879c72 17577 (parallel [(const_int 0) (const_int 1)]))
d0337ddc 17578 (match_operand:<ssehalfvecmode> 2 "nonimmediate_operand" "vm")))]
95879c72 17579 "TARGET_AVX"
d0337ddc
AI
17580{
17581 if (TARGET_AVX512VL)
17582 return "vinsert<shuffletype>64x2\t{$0x1, %2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2, 0x1}";
17583 else
17584 return "vinsert<i128>\t{$0x1, %2, %1, %0|%0, %1, %2, 0x1}";
17585}
95879c72 17586 [(set_attr "type" "sselog")
725fd454
JJ
17587 (set_attr "prefix_extra" "1")
17588 (set_attr "length_immediate" "1")
95879c72 17589 (set_attr "prefix" "vex")
1db4406e 17590 (set_attr "mode" "<sseinsnmode>")])
95879c72 17591
d0337ddc
AI
17592(define_insn "vec_set_lo_<mode><mask_name>"
17593 [(set (match_operand:VI4F_256 0 "register_operand" "=v")
6bec6c98 17594 (vec_concat:VI4F_256
d0337ddc 17595 (match_operand:<ssehalfvecmode> 2 "nonimmediate_operand" "vm")
cbb734aa 17596 (vec_select:<ssehalfvecmode>
d0337ddc 17597 (match_operand:VI4F_256 1 "register_operand" "v")
95879c72
L
17598 (parallel [(const_int 4) (const_int 5)
17599 (const_int 6) (const_int 7)]))))]
17600 "TARGET_AVX"
d0337ddc
AI
17601{
17602 if (TARGET_AVX512VL)
17603 return "vinsert<shuffletype>32x4\t{$0x0, %2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2, 0x0}";
17604 else
17605 return "vinsert<i128>\t{$0x0, %2, %1, %0|%0, %1, %2, 0x0}";
17606}
95879c72 17607 [(set_attr "type" "sselog")
725fd454
JJ
17608 (set_attr "prefix_extra" "1")
17609 (set_attr "length_immediate" "1")
95879c72 17610 (set_attr "prefix" "vex")
1db4406e 17611 (set_attr "mode" "<sseinsnmode>")])
95879c72 17612
d0337ddc
AI
17613(define_insn "vec_set_hi_<mode><mask_name>"
17614 [(set (match_operand:VI4F_256 0 "register_operand" "=v")
6bec6c98 17615 (vec_concat:VI4F_256
cbb734aa 17616 (vec_select:<ssehalfvecmode>
d0337ddc 17617 (match_operand:VI4F_256 1 "register_operand" "v")
95879c72
L
17618 (parallel [(const_int 0) (const_int 1)
17619 (const_int 2) (const_int 3)]))
d0337ddc 17620 (match_operand:<ssehalfvecmode> 2 "nonimmediate_operand" "vm")))]
95879c72 17621 "TARGET_AVX"
d0337ddc
AI
17622{
17623 if (TARGET_AVX512VL)
17624 return "vinsert<shuffletype>32x4\t{$0x1, %2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2, 0x1}";
17625 else
17626 return "vinsert<i128>\t{$0x1, %2, %1, %0|%0, %1, %2, 0x1}";
17627}
95879c72 17628 [(set_attr "type" "sselog")
725fd454
JJ
17629 (set_attr "prefix_extra" "1")
17630 (set_attr "length_immediate" "1")
95879c72 17631 (set_attr "prefix" "vex")
1db4406e 17632 (set_attr "mode" "<sseinsnmode>")])
95879c72
L
17633
17634(define_insn "vec_set_lo_v16hi"
17635 [(set (match_operand:V16HI 0 "register_operand" "=x")
17636 (vec_concat:V16HI
17637 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
17638 (vec_select:V8HI
17639 (match_operand:V16HI 1 "register_operand" "x")
17640 (parallel [(const_int 8) (const_int 9)
17641 (const_int 10) (const_int 11)
17642 (const_int 12) (const_int 13)
17643 (const_int 14) (const_int 15)]))))]
17644 "TARGET_AVX"
1db4406e 17645 "vinsert%~128\t{$0x0, %2, %1, %0|%0, %1, %2, 0x0}"
95879c72 17646 [(set_attr "type" "sselog")
725fd454
JJ
17647 (set_attr "prefix_extra" "1")
17648 (set_attr "length_immediate" "1")
95879c72 17649 (set_attr "prefix" "vex")
1db4406e 17650 (set_attr "mode" "OI")])
95879c72
L
17651
17652(define_insn "vec_set_hi_v16hi"
17653 [(set (match_operand:V16HI 0 "register_operand" "=x")
17654 (vec_concat:V16HI
17655 (vec_select:V8HI
17656 (match_operand:V16HI 1 "register_operand" "x")
17657 (parallel [(const_int 0) (const_int 1)
17658 (const_int 2) (const_int 3)
17659 (const_int 4) (const_int 5)
17660 (const_int 6) (const_int 7)]))
17661 (match_operand:V8HI 2 "nonimmediate_operand" "xm")))]
17662 "TARGET_AVX"
1db4406e 17663 "vinsert%~128\t{$0x1, %2, %1, %0|%0, %1, %2, 0x1}"
95879c72 17664 [(set_attr "type" "sselog")
725fd454
JJ
17665 (set_attr "prefix_extra" "1")
17666 (set_attr "length_immediate" "1")
95879c72 17667 (set_attr "prefix" "vex")
1db4406e 17668 (set_attr "mode" "OI")])
95879c72
L
17669
17670(define_insn "vec_set_lo_v32qi"
17671 [(set (match_operand:V32QI 0 "register_operand" "=x")
17672 (vec_concat:V32QI
17673 (match_operand:V16QI 2 "nonimmediate_operand" "xm")
17674 (vec_select:V16QI
17675 (match_operand:V32QI 1 "register_operand" "x")
17676 (parallel [(const_int 16) (const_int 17)
17677 (const_int 18) (const_int 19)
17678 (const_int 20) (const_int 21)
17679 (const_int 22) (const_int 23)
17680 (const_int 24) (const_int 25)
17681 (const_int 26) (const_int 27)
17682 (const_int 28) (const_int 29)
17683 (const_int 30) (const_int 31)]))))]
17684 "TARGET_AVX"
1db4406e 17685 "vinsert%~128\t{$0x0, %2, %1, %0|%0, %1, %2, 0x0}"
95879c72 17686 [(set_attr "type" "sselog")
725fd454
JJ
17687 (set_attr "prefix_extra" "1")
17688 (set_attr "length_immediate" "1")
95879c72 17689 (set_attr "prefix" "vex")
1db4406e 17690 (set_attr "mode" "OI")])
95879c72
L
17691
17692(define_insn "vec_set_hi_v32qi"
17693 [(set (match_operand:V32QI 0 "register_operand" "=x")
17694 (vec_concat:V32QI
17695 (vec_select:V16QI
17696 (match_operand:V32QI 1 "register_operand" "x")
17697 (parallel [(const_int 0) (const_int 1)
17698 (const_int 2) (const_int 3)
17699 (const_int 4) (const_int 5)
17700 (const_int 6) (const_int 7)
17701 (const_int 8) (const_int 9)
17702 (const_int 10) (const_int 11)
17703 (const_int 12) (const_int 13)
17704 (const_int 14) (const_int 15)]))
17705 (match_operand:V16QI 2 "nonimmediate_operand" "xm")))]
17706 "TARGET_AVX"
1db4406e 17707 "vinsert%~128\t{$0x1, %2, %1, %0|%0, %1, %2, 0x1}"
95879c72 17708 [(set_attr "type" "sselog")
725fd454
JJ
17709 (set_attr "prefix_extra" "1")
17710 (set_attr "length_immediate" "1")
95879c72 17711 (set_attr "prefix" "vex")
1db4406e 17712 (set_attr "mode" "OI")])
95879c72 17713
7b45b87f
UB
17714(define_insn "<avx_avx2>_maskload<ssemodesuffix><avxsizesuffix>"
17715 [(set (match_operand:V48_AVX2 0 "register_operand" "=x")
977e83a3 17716 (unspec:V48_AVX2
7b45b87f
UB
17717 [(match_operand:<sseintvecmode> 2 "register_operand" "x")
17718 (match_operand:V48_AVX2 1 "memory_operand" "m")]
fe646a69
UB
17719 UNSPEC_MASKMOV))]
17720 "TARGET_AVX"
7b45b87f 17721 "v<sseintprefix>maskmov<ssemodesuffix>\t{%1, %2, %0|%0, %2, %1}"
977e83a3
KY
17722 [(set_attr "type" "sselog1")
17723 (set_attr "prefix_extra" "1")
17724 (set_attr "prefix" "vex")
01284895 17725 (set_attr "btver2_decode" "vector")
977e83a3
KY
17726 (set_attr "mode" "<sseinsnmode>")])
17727
7b45b87f 17728(define_insn "<avx_avx2>_maskstore<ssemodesuffix><avxsizesuffix>"
e4ecb922 17729 [(set (match_operand:V48_AVX2 0 "memory_operand" "+m")
7b45b87f 17730 (unspec:V48_AVX2
fe646a69 17731 [(match_operand:<sseintvecmode> 1 "register_operand" "x")
7b45b87f 17732 (match_operand:V48_AVX2 2 "register_operand" "x")
f60c2554
UB
17733 (match_dup 0)]
17734 UNSPEC_MASKMOV))]
fe646a69 17735 "TARGET_AVX"
7b45b87f 17736 "v<sseintprefix>maskmov<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
95879c72 17737 [(set_attr "type" "sselog1")
725fd454 17738 (set_attr "prefix_extra" "1")
95879c72 17739 (set_attr "prefix" "vex")
01284895 17740 (set_attr "btver2_decode" "vector")
7b45b87f 17741 (set_attr "mode" "<sseinsnmode>")])
95879c72 17742
5ce9450f
JJ
17743(define_expand "maskload<mode>"
17744 [(set (match_operand:V48_AVX2 0 "register_operand")
17745 (unspec:V48_AVX2
17746 [(match_operand:<sseintvecmode> 2 "register_operand")
17747 (match_operand:V48_AVX2 1 "memory_operand")]
17748 UNSPEC_MASKMOV))]
17749 "TARGET_AVX")
17750
17751(define_expand "maskstore<mode>"
17752 [(set (match_operand:V48_AVX2 0 "memory_operand")
17753 (unspec:V48_AVX2
17754 [(match_operand:<sseintvecmode> 2 "register_operand")
17755 (match_operand:V48_AVX2 1 "register_operand")
17756 (match_dup 0)]
17757 UNSPEC_MASKMOV))]
17758 "TARGET_AVX")
17759
cd7c6bc5 17760(define_insn_and_split "avx_<castmode><avxsizesuffix>_<castmode>"
9b2133cd 17761 [(set (match_operand:AVX256MODE2P 0 "nonimmediate_operand" "=x,m")
95879c72 17762 (unspec:AVX256MODE2P
cbb734aa 17763 [(match_operand:<ssehalfvecmode> 1 "nonimmediate_operand" "xm,x")]
95879c72
L
17764 UNSPEC_CAST))]
17765 "TARGET_AVX"
9b2133cd
L
17766 "#"
17767 "&& reload_completed"
17768 [(const_int 0)]
95879c72 17769{
31f9eb59 17770 rtx op0 = operands[0];
9b2133cd 17771 rtx op1 = operands[1];
31f9eb59
L
17772 if (REG_P (op0))
17773 op0 = gen_rtx_REG (<ssehalfvecmode>mode, REGNO (op0));
6cf9eb27 17774 else
9b2133cd 17775 op1 = gen_rtx_REG (<MODE>mode, REGNO (op1));
31f9eb59 17776 emit_move_insn (op0, op1);
9b2133cd
L
17777 DONE;
17778})
95879c72
L
17779
17780(define_expand "vec_init<mode>"
82e86dc6
UB
17781 [(match_operand:V_256 0 "register_operand")
17782 (match_operand 1)]
95879c72
L
17783 "TARGET_AVX"
17784{
17785 ix86_expand_vector_init (false, operands[0], operands[1]);
17786 DONE;
17787})
17788
ab931c71 17789(define_expand "vec_init<mode>"
bf584ca0 17790 [(match_operand:VF48_I1248 0 "register_operand")
ab931c71
AI
17791 (match_operand 1)]
17792 "TARGET_AVX512F"
17793{
17794 ix86_expand_vector_init (false, operands[0], operands[1]);
17795 DONE;
17796})
17797
cf92ae7f 17798(define_insn "<avx2_avx512>_ashrv<mode><mask_name>"
21c924ac
AI
17799 [(set (match_operand:VI48_AVX512F_AVX512VL 0 "register_operand" "=v")
17800 (ashiftrt:VI48_AVX512F_AVX512VL
17801 (match_operand:VI48_AVX512F_AVX512VL 1 "register_operand" "v")
17802 (match_operand:VI48_AVX512F_AVX512VL 2 "nonimmediate_operand" "vm")))]
47490470
AI
17803 "TARGET_AVX2 && <mask_mode512bit_condition>"
17804 "vpsrav<ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
977e83a3 17805 [(set_attr "type" "sseishft")
5348cff8 17806 (set_attr "prefix" "maybe_evex")
ee3b466d 17807 (set_attr "mode" "<sseinsnmode>")])
977e83a3 17808
cf92ae7f 17809(define_insn "<avx2_avx512>_ashrv<mode><mask_name>"
21c924ac
AI
17810 [(set (match_operand:VI2_AVX512VL 0 "register_operand" "=v")
17811 (ashiftrt:VI2_AVX512VL
17812 (match_operand:VI2_AVX512VL 1 "register_operand" "v")
17813 (match_operand:VI2_AVX512VL 2 "nonimmediate_operand" "vm")))]
17814 "TARGET_AVX512BW"
17815 "vpsravw\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
17816 [(set_attr "type" "sseishft")
17817 (set_attr "prefix" "maybe_evex")
17818 (set_attr "mode" "<sseinsnmode>")])
17819
cf92ae7f 17820(define_insn "<avx2_avx512>_<shift_insn>v<mode><mask_name>"
38f4b550
AI
17821 [(set (match_operand:VI48_AVX512F 0 "register_operand" "=v")
17822 (any_lshift:VI48_AVX512F
17823 (match_operand:VI48_AVX512F 1 "register_operand" "v")
17824 (match_operand:VI48_AVX512F 2 "nonimmediate_operand" "vm")))]
47490470
AI
17825 "TARGET_AVX2 && <mask_mode512bit_condition>"
17826 "vp<vshift>v<ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
977e83a3 17827 [(set_attr "type" "sseishft")
5348cff8 17828 (set_attr "prefix" "maybe_evex")
977e83a3 17829 (set_attr "mode" "<sseinsnmode>")])
38f4b550 17830
cf92ae7f 17831(define_insn "<avx2_avx512>_<shift_insn>v<mode><mask_name>"
38f4b550
AI
17832 [(set (match_operand:VI2_AVX512VL 0 "register_operand" "=v")
17833 (any_lshift:VI2_AVX512VL
17834 (match_operand:VI2_AVX512VL 1 "register_operand" "v")
17835 (match_operand:VI2_AVX512VL 2 "nonimmediate_operand" "vm")))]
17836 "TARGET_AVX512BW"
17837 "vp<vshift>v<ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
17838 [(set_attr "type" "sseishft")
17839 (set_attr "prefix" "maybe_evex")
17840 (set_attr "mode" "<sseinsnmode>")])
977e83a3 17841
44167383 17842(define_insn "avx_vec_concat<mode>"
ec5e777c
AI
17843 [(set (match_operand:V_256_512 0 "register_operand" "=x,x")
17844 (vec_concat:V_256_512
cbb734aa
UB
17845 (match_operand:<ssehalfvecmode> 1 "register_operand" "x,x")
17846 (match_operand:<ssehalfvecmode> 2 "vector_move_operand" "xm,C")))]
95879c72
L
17847 "TARGET_AVX"
17848{
17849 switch (which_alternative)
17850 {
17851 case 0:
ec5e777c 17852 return "vinsert<i128>\t{$0x1, %2, %<concat_tg_mode>1, %0|%0, %<concat_tg_mode>1, %2, 0x1}";
95879c72
L
17853 case 1:
17854 switch (get_attr_mode (insn))
977e83a3 17855 {
ec5e777c
AI
17856 case MODE_V16SF:
17857 return "vmovaps\t{%1, %t0|%t0, %1}";
17858 case MODE_V8DF:
17859 return "vmovapd\t{%1, %t0|%t0, %1}";
95879c72
L
17860 case MODE_V8SF:
17861 return "vmovaps\t{%1, %x0|%x0, %1}";
17862 case MODE_V4DF:
17863 return "vmovapd\t{%1, %x0|%x0, %1}";
ec5e777c
AI
17864 case MODE_XI:
17865 return "vmovdqa\t{%1, %t0|%t0, %1}";
17866 case MODE_OI:
95879c72 17867 return "vmovdqa\t{%1, %x0|%x0, %1}";
ec5e777c
AI
17868 default:
17869 gcc_unreachable ();
95879c72
L
17870 }
17871 default:
17872 gcc_unreachable ();
17873 }
17874}
17875 [(set_attr "type" "sselog,ssemov")
725fd454
JJ
17876 (set_attr "prefix_extra" "1,*")
17877 (set_attr "length_immediate" "1,*")
ec5e777c 17878 (set_attr "prefix" "maybe_evex")
cbb734aa 17879 (set_attr "mode" "<sseinsnmode>")])
4ee89d5f 17880
b570c6dd
AI
17881(define_insn "vcvtph2ps<mask_name>"
17882 [(set (match_operand:V4SF 0 "register_operand" "=v")
4ee89d5f 17883 (vec_select:V4SF
b570c6dd 17884 (unspec:V8SF [(match_operand:V8HI 1 "register_operand" "v")]
4ee89d5f
L
17885 UNSPEC_VCVTPH2PS)
17886 (parallel [(const_int 0) (const_int 1)
0a2818d5 17887 (const_int 2) (const_int 3)])))]
b570c6dd
AI
17888 "TARGET_F16C || TARGET_AVX512VL"
17889 "vcvtph2ps\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
4ee89d5f 17890 [(set_attr "type" "ssecvt")
b570c6dd 17891 (set_attr "prefix" "maybe_evex")
4ee89d5f
L
17892 (set_attr "mode" "V4SF")])
17893
b570c6dd
AI
17894(define_insn "*vcvtph2ps_load<mask_name>"
17895 [(set (match_operand:V4SF 0 "register_operand" "=v")
4ee89d5f
L
17896 (unspec:V4SF [(match_operand:V4HI 1 "memory_operand" "m")]
17897 UNSPEC_VCVTPH2PS))]
b570c6dd
AI
17898 "TARGET_F16C || TARGET_AVX512VL"
17899 "vcvtph2ps\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
4ee89d5f
L
17900 [(set_attr "type" "ssecvt")
17901 (set_attr "prefix" "vex")
17902 (set_attr "mode" "V8SF")])
17903
b570c6dd
AI
17904(define_insn "vcvtph2ps256<mask_name>"
17905 [(set (match_operand:V8SF 0 "register_operand" "=v")
17906 (unspec:V8SF [(match_operand:V8HI 1 "nonimmediate_operand" "vm")]
4ee89d5f 17907 UNSPEC_VCVTPH2PS))]
b570c6dd
AI
17908 "TARGET_F16C || TARGET_AVX512VL"
17909 "vcvtph2ps\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
4ee89d5f
L
17910 [(set_attr "type" "ssecvt")
17911 (set_attr "prefix" "vex")
01284895 17912 (set_attr "btver2_decode" "double")
4ee89d5f
L
17913 (set_attr "mode" "V8SF")])
17914
8a6ef760 17915(define_insn "<mask_codefor>avx512f_vcvtph2ps512<mask_name><round_saeonly_name>"
c003c6d6 17916 [(set (match_operand:V16SF 0 "register_operand" "=v")
47490470 17917 (unspec:V16SF
8a6ef760 17918 [(match_operand:V16HI 1 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")]
47490470 17919 UNSPEC_VCVTPH2PS))]
c003c6d6 17920 "TARGET_AVX512F"
8a6ef760 17921 "vcvtph2ps\t{<round_saeonly_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_saeonly_mask_op2>}"
c003c6d6
AI
17922 [(set_attr "type" "ssecvt")
17923 (set_attr "prefix" "evex")
17924 (set_attr "mode" "V16SF")])
17925
b570c6dd
AI
17926(define_expand "vcvtps2ph_mask"
17927 [(set (match_operand:V8HI 0 "register_operand")
17928 (vec_merge:V8HI
17929 (vec_concat:V8HI
17930 (unspec:V4HI [(match_operand:V4SF 1 "register_operand")
17931 (match_operand:SI 2 "const_0_to_255_operand")]
17932 UNSPEC_VCVTPS2PH)
17933 (match_dup 5))
17934 (match_operand:V8HI 3 "vector_move_operand")
17935 (match_operand:QI 4 "register_operand")))]
17936 "TARGET_AVX512VL"
17937 "operands[5] = CONST0_RTX (V4HImode);")
17938
4ee89d5f 17939(define_expand "vcvtps2ph"
82e86dc6 17940 [(set (match_operand:V8HI 0 "register_operand")
4ee89d5f 17941 (vec_concat:V8HI
82e86dc6
UB
17942 (unspec:V4HI [(match_operand:V4SF 1 "register_operand")
17943 (match_operand:SI 2 "const_0_to_255_operand")]
4ee89d5f
L
17944 UNSPEC_VCVTPS2PH)
17945 (match_dup 3)))]
17946 "TARGET_F16C"
17947 "operands[3] = CONST0_RTX (V4HImode);")
17948
b570c6dd
AI
17949(define_insn "*vcvtps2ph<mask_name>"
17950 [(set (match_operand:V8HI 0 "register_operand" "=v")
4ee89d5f 17951 (vec_concat:V8HI
b570c6dd 17952 (unspec:V4HI [(match_operand:V4SF 1 "register_operand" "v")
c96b4102 17953 (match_operand:SI 2 "const_0_to_255_operand" "N")]
4ee89d5f 17954 UNSPEC_VCVTPS2PH)
82e86dc6 17955 (match_operand:V4HI 3 "const0_operand")))]
0774c160 17956 "(TARGET_F16C || TARGET_AVX512VL) && <mask_avx512vl_condition>"
b570c6dd 17957 "vcvtps2ph\t{%2, %1, %0<mask_operand4>|%0<mask_operand4>, %1, %2}"
4ee89d5f 17958 [(set_attr "type" "ssecvt")
b570c6dd 17959 (set_attr "prefix" "maybe_evex")
4ee89d5f
L
17960 (set_attr "mode" "V4SF")])
17961
b570c6dd 17962(define_insn "*vcvtps2ph_store<mask_name>"
4ee89d5f
L
17963 [(set (match_operand:V4HI 0 "memory_operand" "=m")
17964 (unspec:V4HI [(match_operand:V4SF 1 "register_operand" "x")
c96b4102 17965 (match_operand:SI 2 "const_0_to_255_operand" "N")]
4ee89d5f 17966 UNSPEC_VCVTPS2PH))]
b570c6dd
AI
17967 "TARGET_F16C || TARGET_AVX512VL"
17968 "vcvtps2ph\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
4ee89d5f 17969 [(set_attr "type" "ssecvt")
b570c6dd 17970 (set_attr "prefix" "maybe_evex")
4ee89d5f
L
17971 (set_attr "mode" "V4SF")])
17972
b570c6dd 17973(define_insn "vcvtps2ph256<mask_name>"
4ee89d5f
L
17974 [(set (match_operand:V8HI 0 "nonimmediate_operand" "=xm")
17975 (unspec:V8HI [(match_operand:V8SF 1 "register_operand" "x")
c96b4102 17976 (match_operand:SI 2 "const_0_to_255_operand" "N")]
4ee89d5f 17977 UNSPEC_VCVTPS2PH))]
b570c6dd
AI
17978 "TARGET_F16C || TARGET_AVX512VL"
17979 "vcvtps2ph\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
4ee89d5f 17980 [(set_attr "type" "ssecvt")
b570c6dd 17981 (set_attr "prefix" "maybe_evex")
01284895 17982 (set_attr "btver2_decode" "vector")
4ee89d5f 17983 (set_attr "mode" "V8SF")])
977e83a3 17984
47490470 17985(define_insn "<mask_codefor>avx512f_vcvtps2ph512<mask_name>"
c003c6d6 17986 [(set (match_operand:V16HI 0 "nonimmediate_operand" "=vm")
47490470
AI
17987 (unspec:V16HI
17988 [(match_operand:V16SF 1 "register_operand" "v")
17989 (match_operand:SI 2 "const_0_to_255_operand" "N")]
17990 UNSPEC_VCVTPS2PH))]
c003c6d6 17991 "TARGET_AVX512F"
47490470 17992 "vcvtps2ph\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
c003c6d6
AI
17993 [(set_attr "type" "ssecvt")
17994 (set_attr "prefix" "evex")
17995 (set_attr "mode" "V16SF")])
17996
977e83a3
KY
17997;; For gather* insn patterns
17998(define_mode_iterator VEC_GATHER_MODE
17999 [V2DI V2DF V4DI V4DF V4SI V4SF V8SI V8SF])
aec7ae7d 18000(define_mode_attr VEC_GATHER_IDXSI
ab931c71
AI
18001 [(V2DI "V4SI") (V4DI "V4SI") (V8DI "V8SI")
18002 (V2DF "V4SI") (V4DF "V4SI") (V8DF "V8SI")
18003 (V4SI "V4SI") (V8SI "V8SI") (V16SI "V16SI")
18004 (V4SF "V4SI") (V8SF "V8SI") (V16SF "V16SI")])
18005
aec7ae7d 18006(define_mode_attr VEC_GATHER_IDXDI
ab931c71
AI
18007 [(V2DI "V2DI") (V4DI "V4DI") (V8DI "V8DI")
18008 (V2DF "V2DI") (V4DF "V4DI") (V8DF "V8DI")
18009 (V4SI "V2DI") (V8SI "V4DI") (V16SI "V8DI")
18010 (V4SF "V2DI") (V8SF "V4DI") (V16SF "V8DI")])
18011
aec7ae7d 18012(define_mode_attr VEC_GATHER_SRCDI
ab931c71
AI
18013 [(V2DI "V2DI") (V4DI "V4DI") (V8DI "V8DI")
18014 (V2DF "V2DF") (V4DF "V4DF") (V8DF "V8DF")
18015 (V4SI "V4SI") (V8SI "V4SI") (V16SI "V8SI")
18016 (V4SF "V4SF") (V8SF "V4SF") (V16SF "V8SF")])
977e83a3
KY
18017
18018(define_expand "avx2_gathersi<mode>"
82e86dc6 18019 [(parallel [(set (match_operand:VEC_GATHER_MODE 0 "register_operand")
9d901b0e 18020 (unspec:VEC_GATHER_MODE
82e86dc6 18021 [(match_operand:VEC_GATHER_MODE 1 "register_operand")
e43451aa
JJ
18022 (mem:<ssescalarmode>
18023 (match_par_dup 7
82e86dc6 18024 [(match_operand 2 "vsib_address_operand")
aec7ae7d 18025 (match_operand:<VEC_GATHER_IDXSI>
82e86dc6
UB
18026 3 "register_operand")
18027 (match_operand:SI 5 "const1248_operand ")]))
9d901b0e 18028 (mem:BLK (scratch))
82e86dc6 18029 (match_operand:VEC_GATHER_MODE 4 "register_operand")]
9d901b0e 18030 UNSPEC_GATHER))
82e86dc6 18031 (clobber (match_scratch:VEC_GATHER_MODE 6))])]
e43451aa
JJ
18032 "TARGET_AVX2"
18033{
18034 operands[7]
18035 = gen_rtx_UNSPEC (Pmode, gen_rtvec (3, operands[2], operands[3],
18036 operands[5]), UNSPEC_VSIBADDR);
18037})
977e83a3
KY
18038
18039(define_insn "*avx2_gathersi<mode>"
9d901b0e 18040 [(set (match_operand:VEC_GATHER_MODE 0 "register_operand" "=&x")
977e83a3 18041 (unspec:VEC_GATHER_MODE
9d901b0e 18042 [(match_operand:VEC_GATHER_MODE 2 "register_operand" "0")
e43451aa
JJ
18043 (match_operator:<ssescalarmode> 7 "vsib_mem_operator"
18044 [(unspec:P
65e95828 18045 [(match_operand:P 3 "vsib_address_operand" "Tv")
aec7ae7d 18046 (match_operand:<VEC_GATHER_IDXSI> 4 "register_operand" "x")
e43451aa
JJ
18047 (match_operand:SI 6 "const1248_operand" "n")]
18048 UNSPEC_VSIBADDR)])
9d901b0e 18049 (mem:BLK (scratch))
e43451aa 18050 (match_operand:VEC_GATHER_MODE 5 "register_operand" "1")]
9d901b0e
JJ
18051 UNSPEC_GATHER))
18052 (clobber (match_scratch:VEC_GATHER_MODE 1 "=&x"))]
977e83a3 18053 "TARGET_AVX2"
e43451aa 18054 "v<sseintprefix>gatherd<ssemodesuffix>\t{%1, %7, %0|%0, %7, %1}"
977e83a3
KY
18055 [(set_attr "type" "ssemov")
18056 (set_attr "prefix" "vex")
18057 (set_attr "mode" "<sseinsnmode>")])
18058
da80a646
JJ
18059(define_insn "*avx2_gathersi<mode>_2"
18060 [(set (match_operand:VEC_GATHER_MODE 0 "register_operand" "=&x")
18061 (unspec:VEC_GATHER_MODE
18062 [(pc)
18063 (match_operator:<ssescalarmode> 6 "vsib_mem_operator"
18064 [(unspec:P
65e95828 18065 [(match_operand:P 2 "vsib_address_operand" "Tv")
da80a646
JJ
18066 (match_operand:<VEC_GATHER_IDXSI> 3 "register_operand" "x")
18067 (match_operand:SI 5 "const1248_operand" "n")]
18068 UNSPEC_VSIBADDR)])
18069 (mem:BLK (scratch))
18070 (match_operand:VEC_GATHER_MODE 4 "register_operand" "1")]
18071 UNSPEC_GATHER))
18072 (clobber (match_scratch:VEC_GATHER_MODE 1 "=&x"))]
18073 "TARGET_AVX2"
18074 "v<sseintprefix>gatherd<ssemodesuffix>\t{%1, %6, %0|%0, %6, %1}"
18075 [(set_attr "type" "ssemov")
18076 (set_attr "prefix" "vex")
18077 (set_attr "mode" "<sseinsnmode>")])
18078
977e83a3 18079(define_expand "avx2_gatherdi<mode>"
82e86dc6 18080 [(parallel [(set (match_operand:VEC_GATHER_MODE 0 "register_operand")
9d901b0e 18081 (unspec:VEC_GATHER_MODE
82e86dc6 18082 [(match_operand:<VEC_GATHER_SRCDI> 1 "register_operand")
e43451aa
JJ
18083 (mem:<ssescalarmode>
18084 (match_par_dup 7
82e86dc6 18085 [(match_operand 2 "vsib_address_operand")
aec7ae7d 18086 (match_operand:<VEC_GATHER_IDXDI>
82e86dc6
UB
18087 3 "register_operand")
18088 (match_operand:SI 5 "const1248_operand ")]))
9d901b0e 18089 (mem:BLK (scratch))
aec7ae7d 18090 (match_operand:<VEC_GATHER_SRCDI>
82e86dc6 18091 4 "register_operand")]
9d901b0e 18092 UNSPEC_GATHER))
82e86dc6 18093 (clobber (match_scratch:VEC_GATHER_MODE 6))])]
e43451aa
JJ
18094 "TARGET_AVX2"
18095{
18096 operands[7]
18097 = gen_rtx_UNSPEC (Pmode, gen_rtvec (3, operands[2], operands[3],
18098 operands[5]), UNSPEC_VSIBADDR);
18099})
977e83a3
KY
18100
18101(define_insn "*avx2_gatherdi<mode>"
aec7ae7d
JJ
18102 [(set (match_operand:VEC_GATHER_MODE 0 "register_operand" "=&x")
18103 (unspec:VEC_GATHER_MODE
18104 [(match_operand:<VEC_GATHER_SRCDI> 2 "register_operand" "0")
e43451aa
JJ
18105 (match_operator:<ssescalarmode> 7 "vsib_mem_operator"
18106 [(unspec:P
65e95828 18107 [(match_operand:P 3 "vsib_address_operand" "Tv")
aec7ae7d 18108 (match_operand:<VEC_GATHER_IDXDI> 4 "register_operand" "x")
e43451aa
JJ
18109 (match_operand:SI 6 "const1248_operand" "n")]
18110 UNSPEC_VSIBADDR)])
9d901b0e 18111 (mem:BLK (scratch))
aec7ae7d 18112 (match_operand:<VEC_GATHER_SRCDI> 5 "register_operand" "1")]
9d901b0e 18113 UNSPEC_GATHER))
aec7ae7d 18114 (clobber (match_scratch:VEC_GATHER_MODE 1 "=&x"))]
977e83a3 18115 "TARGET_AVX2"
aec7ae7d 18116 "v<sseintprefix>gatherq<ssemodesuffix>\t{%5, %7, %2|%2, %7, %5}"
977e83a3
KY
18117 [(set_attr "type" "ssemov")
18118 (set_attr "prefix" "vex")
18119 (set_attr "mode" "<sseinsnmode>")])
da80a646
JJ
18120
18121(define_insn "*avx2_gatherdi<mode>_2"
18122 [(set (match_operand:VEC_GATHER_MODE 0 "register_operand" "=&x")
18123 (unspec:VEC_GATHER_MODE
18124 [(pc)
18125 (match_operator:<ssescalarmode> 6 "vsib_mem_operator"
18126 [(unspec:P
65e95828 18127 [(match_operand:P 2 "vsib_address_operand" "Tv")
da80a646
JJ
18128 (match_operand:<VEC_GATHER_IDXDI> 3 "register_operand" "x")
18129 (match_operand:SI 5 "const1248_operand" "n")]
18130 UNSPEC_VSIBADDR)])
18131 (mem:BLK (scratch))
18132 (match_operand:<VEC_GATHER_SRCDI> 4 "register_operand" "1")]
18133 UNSPEC_GATHER))
18134 (clobber (match_scratch:VEC_GATHER_MODE 1 "=&x"))]
18135 "TARGET_AVX2"
18136{
18137 if (<MODE>mode != <VEC_GATHER_SRCDI>mode)
18138 return "v<sseintprefix>gatherq<ssemodesuffix>\t{%4, %6, %x0|%x0, %6, %4}";
18139 return "v<sseintprefix>gatherq<ssemodesuffix>\t{%4, %6, %0|%0, %6, %4}";
18140}
18141 [(set_attr "type" "ssemov")
18142 (set_attr "prefix" "vex")
18143 (set_attr "mode" "<sseinsnmode>")])
06046046
JJ
18144
18145(define_insn "*avx2_gatherdi<mode>_3"
18146 [(set (match_operand:<VEC_GATHER_SRCDI> 0 "register_operand" "=&x")
18147 (vec_select:<VEC_GATHER_SRCDI>
18148 (unspec:VI4F_256
18149 [(match_operand:<VEC_GATHER_SRCDI> 2 "register_operand" "0")
18150 (match_operator:<ssescalarmode> 7 "vsib_mem_operator"
18151 [(unspec:P
65e95828 18152 [(match_operand:P 3 "vsib_address_operand" "Tv")
06046046
JJ
18153 (match_operand:<VEC_GATHER_IDXDI> 4 "register_operand" "x")
18154 (match_operand:SI 6 "const1248_operand" "n")]
18155 UNSPEC_VSIBADDR)])
18156 (mem:BLK (scratch))
18157 (match_operand:<VEC_GATHER_SRCDI> 5 "register_operand" "1")]
18158 UNSPEC_GATHER)
18159 (parallel [(const_int 0) (const_int 1)
18160 (const_int 2) (const_int 3)])))
18161 (clobber (match_scratch:VI4F_256 1 "=&x"))]
18162 "TARGET_AVX2"
18163 "v<sseintprefix>gatherq<ssemodesuffix>\t{%5, %7, %0|%0, %7, %5}"
18164 [(set_attr "type" "ssemov")
18165 (set_attr "prefix" "vex")
18166 (set_attr "mode" "<sseinsnmode>")])
18167
18168(define_insn "*avx2_gatherdi<mode>_4"
18169 [(set (match_operand:<VEC_GATHER_SRCDI> 0 "register_operand" "=&x")
18170 (vec_select:<VEC_GATHER_SRCDI>
18171 (unspec:VI4F_256
18172 [(pc)
18173 (match_operator:<ssescalarmode> 6 "vsib_mem_operator"
18174 [(unspec:P
65e95828 18175 [(match_operand:P 2 "vsib_address_operand" "Tv")
06046046
JJ
18176 (match_operand:<VEC_GATHER_IDXDI> 3 "register_operand" "x")
18177 (match_operand:SI 5 "const1248_operand" "n")]
18178 UNSPEC_VSIBADDR)])
18179 (mem:BLK (scratch))
18180 (match_operand:<VEC_GATHER_SRCDI> 4 "register_operand" "1")]
18181 UNSPEC_GATHER)
18182 (parallel [(const_int 0) (const_int 1)
18183 (const_int 2) (const_int 3)])))
18184 (clobber (match_scratch:VI4F_256 1 "=&x"))]
18185 "TARGET_AVX2"
18186 "v<sseintprefix>gatherq<ssemodesuffix>\t{%4, %6, %0|%0, %6, %4}"
18187 [(set_attr "type" "ssemov")
18188 (set_attr "prefix" "vex")
18189 (set_attr "mode" "<sseinsnmode>")])
ab931c71 18190
be746da1
AI
18191(define_expand "<avx512>_gathersi<mode>"
18192 [(parallel [(set (match_operand:VI48F 0 "register_operand")
18193 (unspec:VI48F
18194 [(match_operand:VI48F 1 "register_operand")
ab931c71
AI
18195 (match_operand:<avx512fmaskmode> 4 "register_operand")
18196 (mem:<ssescalarmode>
18197 (match_par_dup 6
18198 [(match_operand 2 "vsib_address_operand")
18199 (match_operand:<VEC_GATHER_IDXSI> 3 "register_operand")
18200 (match_operand:SI 5 "const1248_operand")]))]
18201 UNSPEC_GATHER))
18202 (clobber (match_scratch:<avx512fmaskmode> 7))])]
18203 "TARGET_AVX512F"
18204{
18205 operands[6]
18206 = gen_rtx_UNSPEC (Pmode, gen_rtvec (3, operands[2], operands[3],
18207 operands[5]), UNSPEC_VSIBADDR);
18208})
18209
18210(define_insn "*avx512f_gathersi<mode>"
be746da1
AI
18211 [(set (match_operand:VI48F 0 "register_operand" "=&v")
18212 (unspec:VI48F
18213 [(match_operand:VI48F 1 "register_operand" "0")
ab931c71
AI
18214 (match_operand:<avx512fmaskmode> 7 "register_operand" "2")
18215 (match_operator:<ssescalarmode> 6 "vsib_mem_operator"
18216 [(unspec:P
65e95828 18217 [(match_operand:P 4 "vsib_address_operand" "Tv")
ab931c71
AI
18218 (match_operand:<VEC_GATHER_IDXSI> 3 "register_operand" "v")
18219 (match_operand:SI 5 "const1248_operand" "n")]
18220 UNSPEC_VSIBADDR)])]
18221 UNSPEC_GATHER))
be792bce 18222 (clobber (match_scratch:<avx512fmaskmode> 2 "=&Yk"))]
ab931c71
AI
18223 "TARGET_AVX512F"
18224 "v<sseintprefix>gatherd<ssemodesuffix>\t{%6, %0%{%2%}|%0%{%2%}, %g6}"
18225 [(set_attr "type" "ssemov")
18226 (set_attr "prefix" "evex")
18227 (set_attr "mode" "<sseinsnmode>")])
18228
18229(define_insn "*avx512f_gathersi<mode>_2"
be746da1
AI
18230 [(set (match_operand:VI48F 0 "register_operand" "=&v")
18231 (unspec:VI48F
ab931c71
AI
18232 [(pc)
18233 (match_operand:<avx512fmaskmode> 6 "register_operand" "1")
18234 (match_operator:<ssescalarmode> 5 "vsib_mem_operator"
18235 [(unspec:P
65e95828 18236 [(match_operand:P 3 "vsib_address_operand" "Tv")
ab931c71
AI
18237 (match_operand:<VEC_GATHER_IDXSI> 2 "register_operand" "v")
18238 (match_operand:SI 4 "const1248_operand" "n")]
18239 UNSPEC_VSIBADDR)])]
18240 UNSPEC_GATHER))
be792bce 18241 (clobber (match_scratch:<avx512fmaskmode> 1 "=&Yk"))]
ab931c71
AI
18242 "TARGET_AVX512F"
18243 "v<sseintprefix>gatherd<ssemodesuffix>\t{%5, %0%{%1%}|%0%{%1%}, %g5}"
18244 [(set_attr "type" "ssemov")
18245 (set_attr "prefix" "evex")
18246 (set_attr "mode" "<sseinsnmode>")])
18247
18248
be746da1
AI
18249(define_expand "<avx512>_gatherdi<mode>"
18250 [(parallel [(set (match_operand:VI48F 0 "register_operand")
18251 (unspec:VI48F
ab931c71
AI
18252 [(match_operand:<VEC_GATHER_SRCDI> 1 "register_operand")
18253 (match_operand:QI 4 "register_operand")
18254 (mem:<ssescalarmode>
18255 (match_par_dup 6
18256 [(match_operand 2 "vsib_address_operand")
18257 (match_operand:<VEC_GATHER_IDXDI> 3 "register_operand")
18258 (match_operand:SI 5 "const1248_operand")]))]
18259 UNSPEC_GATHER))
18260 (clobber (match_scratch:QI 7))])]
18261 "TARGET_AVX512F"
18262{
18263 operands[6]
18264 = gen_rtx_UNSPEC (Pmode, gen_rtvec (3, operands[2], operands[3],
18265 operands[5]), UNSPEC_VSIBADDR);
18266})
18267
18268(define_insn "*avx512f_gatherdi<mode>"
be746da1
AI
18269 [(set (match_operand:VI48F 0 "register_operand" "=&v")
18270 (unspec:VI48F
ab931c71
AI
18271 [(match_operand:<VEC_GATHER_SRCDI> 1 "register_operand" "0")
18272 (match_operand:QI 7 "register_operand" "2")
18273 (match_operator:<ssescalarmode> 6 "vsib_mem_operator"
18274 [(unspec:P
65e95828 18275 [(match_operand:P 4 "vsib_address_operand" "Tv")
ab931c71
AI
18276 (match_operand:<VEC_GATHER_IDXDI> 3 "register_operand" "v")
18277 (match_operand:SI 5 "const1248_operand" "n")]
18278 UNSPEC_VSIBADDR)])]
18279 UNSPEC_GATHER))
be792bce 18280 (clobber (match_scratch:QI 2 "=&Yk"))]
ab931c71
AI
18281 "TARGET_AVX512F"
18282 "v<sseintprefix>gatherq<ssemodesuffix>\t{%6, %1%{%2%}|%1%{%2%}, %g6}"
18283 [(set_attr "type" "ssemov")
18284 (set_attr "prefix" "evex")
18285 (set_attr "mode" "<sseinsnmode>")])
18286
18287(define_insn "*avx512f_gatherdi<mode>_2"
be746da1
AI
18288 [(set (match_operand:VI48F 0 "register_operand" "=&v")
18289 (unspec:VI48F
ab931c71
AI
18290 [(pc)
18291 (match_operand:QI 6 "register_operand" "1")
18292 (match_operator:<ssescalarmode> 5 "vsib_mem_operator"
18293 [(unspec:P
65e95828 18294 [(match_operand:P 3 "vsib_address_operand" "Tv")
ab931c71
AI
18295 (match_operand:<VEC_GATHER_IDXDI> 2 "register_operand" "v")
18296 (match_operand:SI 4 "const1248_operand" "n")]
18297 UNSPEC_VSIBADDR)])]
18298 UNSPEC_GATHER))
be792bce 18299 (clobber (match_scratch:QI 1 "=&Yk"))]
ab931c71
AI
18300 "TARGET_AVX512F"
18301{
18302 if (<MODE>mode != <VEC_GATHER_SRCDI>mode)
be746da1 18303 {
f2864cc4 18304 if (<MODE_SIZE> != 64)
be746da1
AI
18305 return "v<sseintprefix>gatherq<ssemodesuffix>\t{%5, %x0%{%1%}|%t0%{%1%}, %g5}";
18306 else
18307 return "v<sseintprefix>gatherq<ssemodesuffix>\t{%5, %t0%{%1%}|%t0%{%1%}, %g5}";
18308 }
ab931c71
AI
18309 return "v<sseintprefix>gatherq<ssemodesuffix>\t{%5, %0%{%1%}|%0%{%1%}, %g5}";
18310}
18311 [(set_attr "type" "ssemov")
18312 (set_attr "prefix" "evex")
18313 (set_attr "mode" "<sseinsnmode>")])
18314
be746da1
AI
18315(define_expand "<avx512>_scattersi<mode>"
18316 [(parallel [(set (mem:VI48F
ab931c71
AI
18317 (match_par_dup 5
18318 [(match_operand 0 "vsib_address_operand")
18319 (match_operand:<VEC_GATHER_IDXSI> 2 "register_operand")
18320 (match_operand:SI 4 "const1248_operand")]))
be746da1 18321 (unspec:VI48F
ab931c71 18322 [(match_operand:<avx512fmaskmode> 1 "register_operand")
be746da1 18323 (match_operand:VI48F 3 "register_operand")]
ab931c71
AI
18324 UNSPEC_SCATTER))
18325 (clobber (match_scratch:<avx512fmaskmode> 6))])]
18326 "TARGET_AVX512F"
18327{
18328 operands[5]
18329 = gen_rtx_UNSPEC (Pmode, gen_rtvec (3, operands[0], operands[2],
18330 operands[4]), UNSPEC_VSIBADDR);
18331})
18332
18333(define_insn "*avx512f_scattersi<mode>"
be746da1 18334 [(set (match_operator:VI48F 5 "vsib_mem_operator"
ab931c71 18335 [(unspec:P
65e95828 18336 [(match_operand:P 0 "vsib_address_operand" "Tv")
ab931c71
AI
18337 (match_operand:<VEC_GATHER_IDXSI> 2 "register_operand" "v")
18338 (match_operand:SI 4 "const1248_operand" "n")]
18339 UNSPEC_VSIBADDR)])
be746da1 18340 (unspec:VI48F
ab931c71 18341 [(match_operand:<avx512fmaskmode> 6 "register_operand" "1")
be746da1 18342 (match_operand:VI48F 3 "register_operand" "v")]
ab931c71 18343 UNSPEC_SCATTER))
be792bce 18344 (clobber (match_scratch:<avx512fmaskmode> 1 "=&Yk"))]
ab931c71
AI
18345 "TARGET_AVX512F"
18346 "v<sseintprefix>scatterd<ssemodesuffix>\t{%3, %5%{%1%}|%5%{%1%}, %3}"
18347 [(set_attr "type" "ssemov")
18348 (set_attr "prefix" "evex")
18349 (set_attr "mode" "<sseinsnmode>")])
18350
be746da1
AI
18351(define_expand "<avx512>_scatterdi<mode>"
18352 [(parallel [(set (mem:VI48F
ab931c71
AI
18353 (match_par_dup 5
18354 [(match_operand 0 "vsib_address_operand")
be746da1 18355 (match_operand:<VEC_GATHER_IDXDI> 2 "register_operand")
ab931c71 18356 (match_operand:SI 4 "const1248_operand")]))
be746da1 18357 (unspec:VI48F
ab931c71
AI
18358 [(match_operand:QI 1 "register_operand")
18359 (match_operand:<VEC_GATHER_SRCDI> 3 "register_operand")]
18360 UNSPEC_SCATTER))
18361 (clobber (match_scratch:QI 6))])]
18362 "TARGET_AVX512F"
18363{
18364 operands[5]
18365 = gen_rtx_UNSPEC (Pmode, gen_rtvec (3, operands[0], operands[2],
18366 operands[4]), UNSPEC_VSIBADDR);
18367})
18368
18369(define_insn "*avx512f_scatterdi<mode>"
be746da1 18370 [(set (match_operator:VI48F 5 "vsib_mem_operator"
ab931c71 18371 [(unspec:P
65e95828 18372 [(match_operand:P 0 "vsib_address_operand" "Tv")
be746da1 18373 (match_operand:<VEC_GATHER_IDXDI> 2 "register_operand" "v")
ab931c71
AI
18374 (match_operand:SI 4 "const1248_operand" "n")]
18375 UNSPEC_VSIBADDR)])
be746da1 18376 (unspec:VI48F
ab931c71
AI
18377 [(match_operand:QI 6 "register_operand" "1")
18378 (match_operand:<VEC_GATHER_SRCDI> 3 "register_operand" "v")]
18379 UNSPEC_SCATTER))
be792bce 18380 (clobber (match_scratch:QI 1 "=&Yk"))]
ab931c71
AI
18381 "TARGET_AVX512F"
18382 "v<sseintprefix>scatterq<ssemodesuffix>\t{%3, %5%{%1%}|%5%{%1%}, %3}"
18383 [(set_attr "type" "ssemov")
18384 (set_attr "prefix" "evex")
18385 (set_attr "mode" "<sseinsnmode>")])
0fe65b75 18386
f7be73c8
AI
18387(define_insn "<avx512>_compress<mode>_mask"
18388 [(set (match_operand:VI48F 0 "register_operand" "=v")
18389 (unspec:VI48F
18390 [(match_operand:VI48F 1 "register_operand" "v")
18391 (match_operand:VI48F 2 "vector_move_operand" "0C")
be792bce 18392 (match_operand:<avx512fmaskmode> 3 "register_operand" "Yk")]
47490470
AI
18393 UNSPEC_COMPRESS))]
18394 "TARGET_AVX512F"
18395 "v<sseintprefix>compress<ssemodesuffix>\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}"
18396 [(set_attr "type" "ssemov")
18397 (set_attr "prefix" "evex")
18398 (set_attr "mode" "<sseinsnmode>")])
18399
f7be73c8
AI
18400(define_insn "<avx512>_compressstore<mode>_mask"
18401 [(set (match_operand:VI48F 0 "memory_operand" "=m")
18402 (unspec:VI48F
18403 [(match_operand:VI48F 1 "register_operand" "x")
47490470 18404 (match_dup 0)
be792bce 18405 (match_operand:<avx512fmaskmode> 2 "register_operand" "Yk")]
47490470
AI
18406 UNSPEC_COMPRESS_STORE))]
18407 "TARGET_AVX512F"
18408 "v<sseintprefix>compress<ssemodesuffix>\t{%1, %0%{%2%}|%0%{%2%}, %1}"
18409 [(set_attr "type" "ssemov")
18410 (set_attr "prefix" "evex")
18411 (set_attr "memory" "store")
18412 (set_attr "mode" "<sseinsnmode>")])
18413
f7be73c8
AI
18414(define_expand "<avx512>_expand<mode>_maskz"
18415 [(set (match_operand:VI48F 0 "register_operand")
18416 (unspec:VI48F
18417 [(match_operand:VI48F 1 "nonimmediate_operand")
18418 (match_operand:VI48F 2 "vector_move_operand")
8b08db1e
AI
18419 (match_operand:<avx512fmaskmode> 3 "register_operand")]
18420 UNSPEC_EXPAND))]
18421 "TARGET_AVX512F"
18422 "operands[2] = CONST0_RTX (<MODE>mode);")
18423
f7be73c8
AI
18424(define_insn "<avx512>_expand<mode>_mask"
18425 [(set (match_operand:VI48F 0 "register_operand" "=v,v")
18426 (unspec:VI48F
18427 [(match_operand:VI48F 1 "nonimmediate_operand" "v,m")
18428 (match_operand:VI48F 2 "vector_move_operand" "0C,0C")
be792bce 18429 (match_operand:<avx512fmaskmode> 3 "register_operand" "Yk,Yk")]
47490470
AI
18430 UNSPEC_EXPAND))]
18431 "TARGET_AVX512F"
18432 "v<sseintprefix>expand<ssemodesuffix>\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}"
18433 [(set_attr "type" "ssemov")
18434 (set_attr "prefix" "evex")
18435 (set_attr "memory" "none,load")
18436 (set_attr "mode" "<sseinsnmode>")])
18437
b9826286
AI
18438(define_insn "avx512dq_rangep<mode><mask_name><round_saeonly_name>"
18439 [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v")
18440 (unspec:VF_AVX512VL
18441 [(match_operand:VF_AVX512VL 1 "register_operand" "v")
18442 (match_operand:VF_AVX512VL 2 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")
18443 (match_operand:SI 3 "const_0_to_15_operand")]
18444 UNSPEC_RANGE))]
18445 "TARGET_AVX512DQ && <round_saeonly_mode512bit_condition>"
18446 "vrange<ssemodesuffix>\t{<round_saeonly_mask_op4>%3, %2, %1, %0<mask_operand4>|%0<mask_operand4>, %1, %2, %3<round_saeonly_mask_op4>}"
18447 [(set_attr "type" "sse")
18448 (set_attr "prefix" "evex")
18449 (set_attr "mode" "<MODE>")])
18450
18451(define_insn "avx512dq_ranges<mode><round_saeonly_name>"
18452 [(set (match_operand:VF_128 0 "register_operand" "=v")
18453 (vec_merge:VF_128
18454 (unspec:VF_128
18455 [(match_operand:VF_128 1 "register_operand" "v")
18456 (match_operand:VF_128 2 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")
18457 (match_operand:SI 3 "const_0_to_15_operand")]
18458 UNSPEC_RANGE)
18459 (match_dup 1)
18460 (const_int 1)))]
18461 "TARGET_AVX512DQ"
18462 "vrange<ssescalarmodesuffix>\t{<round_saeonly_op4>%3, %2, %1, %0|%0, %1, %2, %3<round_saeonly_op4>}"
18463 [(set_attr "type" "sse")
18464 (set_attr "prefix" "evex")
18465 (set_attr "mode" "<MODE>")])
18466
18467(define_insn "avx512dq_fpclass<mode><mask_scalar_merge_name>"
18468 [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=Yk")
18469 (unspec:<avx512fmaskmode>
18470 [(match_operand:VF_AVX512VL 1 "register_operand" "v")
18471 (match_operand:QI 2 "const_0_to_255_operand" "n")]
18472 UNSPEC_FPCLASS))]
18473 "TARGET_AVX512DQ"
18474 "vfpclass<ssemodesuffix>\t{%2, %1, %0<mask_scalar_merge_operand3>|%0<mask_scalar_merge_operand3>, %1, %2}";
18475 [(set_attr "type" "sse")
18476 (set_attr "length_immediate" "1")
18477 (set_attr "prefix" "evex")
18478 (set_attr "mode" "<MODE>")])
18479
18480(define_insn "avx512dq_vmfpclass<mode>"
18481 [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=Yk")
18482 (and:<avx512fmaskmode>
18483 (unspec:<avx512fmaskmode>
18484 [(match_operand:VF_128 1 "register_operand" "v")
18485 (match_operand:QI 2 "const_0_to_255_operand" "n")]
18486 UNSPEC_FPCLASS)
18487 (const_int 1)))]
18488 "TARGET_AVX512DQ"
18489 "vfpclass<ssescalarmodesuffix>\t{%2, %1, %0|%0, %1, %2}";
18490 [(set_attr "type" "sse")
18491 (set_attr "length_immediate" "1")
18492 (set_attr "prefix" "evex")
18493 (set_attr "mode" "<MODE>")])
18494
b040ded3
AI
18495(define_insn "<avx512>_getmant<mode><mask_name><round_saeonly_name>"
18496 [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v")
18497 (unspec:VF_AVX512VL
18498 [(match_operand:VF_AVX512VL 1 "nonimmediate_operand" "<round_saeonly_constraint>")
afb4ac68
AI
18499 (match_operand:SI 2 "const_0_to_15_operand")]
18500 UNSPEC_GETMANT))]
18501 "TARGET_AVX512F"
8a6ef760 18502 "vgetmant<ssemodesuffix>\t{%2, <round_saeonly_mask_op3>%1, %0<mask_operand3>|%0<mask_operand3>, %1<round_saeonly_mask_op3>, %2}";
afb4ac68
AI
18503 [(set_attr "prefix" "evex")
18504 (set_attr "mode" "<MODE>")])
18505
b040ded3 18506(define_insn "avx512f_vgetmant<mode><round_saeonly_name>"
afb4ac68
AI
18507 [(set (match_operand:VF_128 0 "register_operand" "=v")
18508 (vec_merge:VF_128
18509 (unspec:VF_128
18510 [(match_operand:VF_128 1 "register_operand" "v")
c56a42b9 18511 (match_operand:VF_128 2 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")
afb4ac68
AI
18512 (match_operand:SI 3 "const_0_to_15_operand")]
18513 UNSPEC_GETMANT)
18514 (match_dup 1)
18515 (const_int 1)))]
18516 "TARGET_AVX512F"
075691af 18517 "vgetmant<ssescalarmodesuffix>\t{%3, <round_saeonly_op4>%2, %1, %0|%0, %1, %2<round_saeonly_op4>, %3}";
afb4ac68
AI
18518 [(set_attr "prefix" "evex")
18519 (set_attr "mode" "<ssescalarmode>")])
18520
5f64b496
AI
18521;; The correct representation for this is absolutely enormous, and
18522;; surely not generally useful.
18523(define_insn "<mask_codefor>avx512bw_dbpsadbw<mode><mask_name>"
18524 [(set (match_operand:VI2_AVX512VL 0 "register_operand" "=v")
18525 (unspec:VI2_AVX512VL
18526 [(match_operand:<dbpsadbwmode> 1 "register_operand" "v")
18527 (match_operand:<dbpsadbwmode> 2 "nonimmediate_operand" "vm")
18528 (match_operand:SI 3 "const_0_to_255_operand")]
18529 UNSPEC_DBPSADBW))]
18530 "TARGET_AVX512BW"
18531 "vdbpsadbw\t{%3, %2, %1, %0<mask_operand4>|%0<mask_operand4>, %1, %2, %3}"
18532 [(set_attr "isa" "avx")
18533 (set_attr "type" "sselog1")
18534 (set_attr "length_immediate" "1")
18535 (set_attr "prefix" "evex")
18536 (set_attr "mode" "<sseinsnmode>")])
18537
47490470 18538(define_insn "clz<mode>2<mask_name>"
6ead0238
AI
18539 [(set (match_operand:VI48_AVX512VL 0 "register_operand" "=v")
18540 (clz:VI48_AVX512VL
18541 (match_operand:VI48_AVX512VL 1 "nonimmediate_operand" "vm")))]
0fe65b75 18542 "TARGET_AVX512CD"
47490470 18543 "vplzcnt<ssemodesuffix>\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
0fe65b75
AI
18544 [(set_attr "type" "sse")
18545 (set_attr "prefix" "evex")
18546 (set_attr "mode" "<sseinsnmode>")])
18547
47490470 18548(define_insn "<mask_codefor>conflict<mode><mask_name>"
6ead0238
AI
18549 [(set (match_operand:VI48_AVX512VL 0 "register_operand" "=v")
18550 (unspec:VI48_AVX512VL
18551 [(match_operand:VI48_AVX512VL 1 "nonimmediate_operand" "vm")]
0fe65b75
AI
18552 UNSPEC_CONFLICT))]
18553 "TARGET_AVX512CD"
47490470 18554 "vpconflict<ssemodesuffix>\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
0fe65b75
AI
18555 [(set_attr "type" "sse")
18556 (set_attr "prefix" "evex")
18557 (set_attr "mode" "<sseinsnmode>")])
c1618f82
AI
18558
18559(define_insn "sha1msg1"
18560 [(set (match_operand:V4SI 0 "register_operand" "=x")
18561 (unspec:V4SI
18562 [(match_operand:V4SI 1 "register_operand" "0")
18563 (match_operand:V4SI 2 "nonimmediate_operand" "xm")]
18564 UNSPEC_SHA1MSG1))]
18565 "TARGET_SHA"
18566 "sha1msg1\t{%2, %0|%0, %2}"
18567 [(set_attr "type" "sselog1")
18568 (set_attr "mode" "TI")])
18569
18570(define_insn "sha1msg2"
18571 [(set (match_operand:V4SI 0 "register_operand" "=x")
18572 (unspec:V4SI
18573 [(match_operand:V4SI 1 "register_operand" "0")
18574 (match_operand:V4SI 2 "nonimmediate_operand" "xm")]
18575 UNSPEC_SHA1MSG2))]
18576 "TARGET_SHA"
18577 "sha1msg2\t{%2, %0|%0, %2}"
18578 [(set_attr "type" "sselog1")
18579 (set_attr "mode" "TI")])
18580
18581(define_insn "sha1nexte"
18582 [(set (match_operand:V4SI 0 "register_operand" "=x")
18583 (unspec:V4SI
18584 [(match_operand:V4SI 1 "register_operand" "0")
18585 (match_operand:V4SI 2 "nonimmediate_operand" "xm")]
18586 UNSPEC_SHA1NEXTE))]
18587 "TARGET_SHA"
18588 "sha1nexte\t{%2, %0|%0, %2}"
18589 [(set_attr "type" "sselog1")
18590 (set_attr "mode" "TI")])
18591
18592(define_insn "sha1rnds4"
18593 [(set (match_operand:V4SI 0 "register_operand" "=x")
18594 (unspec:V4SI
18595 [(match_operand:V4SI 1 "register_operand" "0")
18596 (match_operand:V4SI 2 "nonimmediate_operand" "xm")
18597 (match_operand:SI 3 "const_0_to_3_operand" "n")]
18598 UNSPEC_SHA1RNDS4))]
18599 "TARGET_SHA"
18600 "sha1rnds4\t{%3, %2, %0|%0, %2, %3}"
18601 [(set_attr "type" "sselog1")
18602 (set_attr "length_immediate" "1")
18603 (set_attr "mode" "TI")])
18604
18605(define_insn "sha256msg1"
18606 [(set (match_operand:V4SI 0 "register_operand" "=x")
18607 (unspec:V4SI
18608 [(match_operand:V4SI 1 "register_operand" "0")
18609 (match_operand:V4SI 2 "nonimmediate_operand" "xm")]
18610 UNSPEC_SHA256MSG1))]
18611 "TARGET_SHA"
18612 "sha256msg1\t{%2, %0|%0, %2}"
18613 [(set_attr "type" "sselog1")
18614 (set_attr "mode" "TI")])
18615
18616(define_insn "sha256msg2"
18617 [(set (match_operand:V4SI 0 "register_operand" "=x")
18618 (unspec:V4SI
18619 [(match_operand:V4SI 1 "register_operand" "0")
18620 (match_operand:V4SI 2 "nonimmediate_operand" "xm")]
18621 UNSPEC_SHA256MSG2))]
18622 "TARGET_SHA"
18623 "sha256msg2\t{%2, %0|%0, %2}"
18624 [(set_attr "type" "sselog1")
18625 (set_attr "mode" "TI")])
18626
18627(define_insn "sha256rnds2"
18628 [(set (match_operand:V4SI 0 "register_operand" "=x")
18629 (unspec:V4SI
18630 [(match_operand:V4SI 1 "register_operand" "0")
18631 (match_operand:V4SI 2 "nonimmediate_operand" "xm")
18632 (match_operand:V4SI 3 "register_operand" "Yz")]
18633 UNSPEC_SHA256RNDS2))]
18634 "TARGET_SHA"
18635 "sha256rnds2\t{%3, %2, %0|%0, %2, %3}"
18636 [(set_attr "type" "sselog1")
18637 (set_attr "length_immediate" "1")
18638 (set_attr "mode" "TI")])
275be1da
IT
18639
18640(define_insn_and_split "avx512f_<castmode><avxsizesuffix>_<castmode>"
18641 [(set (match_operand:AVX512MODE2P 0 "nonimmediate_operand" "=x,m")
18642 (unspec:AVX512MODE2P
18643 [(match_operand:<ssequartermode> 1 "nonimmediate_operand" "xm,x")]
18644 UNSPEC_CAST))]
18645 "TARGET_AVX512F"
18646 "#"
18647 "&& reload_completed"
18648 [(const_int 0)]
18649{
18650 rtx op0 = operands[0];
18651 rtx op1 = operands[1];
18652 if (REG_P (op0))
18653 op0 = gen_rtx_REG (<ssequartermode>mode, REGNO (op0));
18654 else
18655 op1 = gen_rtx_REG (<MODE>mode, REGNO (op1));
18656 emit_move_insn (op0, op1);
18657 DONE;
18658})
18659
18660(define_insn_and_split "avx512f_<castmode><avxsizesuffix>_256<castmode>"
18661 [(set (match_operand:AVX512MODE2P 0 "nonimmediate_operand" "=x,m")
18662 (unspec:AVX512MODE2P
18663 [(match_operand:<ssehalfvecmode> 1 "nonimmediate_operand" "xm,x")]
18664 UNSPEC_CAST))]
18665 "TARGET_AVX512F"
18666 "#"
18667 "&& reload_completed"
18668 [(const_int 0)]
18669{
18670 rtx op0 = operands[0];
18671 rtx op1 = operands[1];
18672 if (REG_P (op0))
18673 op0 = gen_rtx_REG (<ssehalfvecmode>mode, REGNO (op0));
18674 else
18675 op1 = gen_rtx_REG (<MODE>mode, REGNO (op1));
18676 emit_move_insn (op0, op1);
18677 DONE;
18678})
4190ea38
IT
18679
18680(define_int_iterator VPMADD52
18681 [UNSPEC_VPMADD52LUQ
18682 UNSPEC_VPMADD52HUQ])
18683
18684(define_int_attr vpmadd52type
18685 [(UNSPEC_VPMADD52LUQ "luq") (UNSPEC_VPMADD52HUQ "huq")])
18686
18687(define_expand "vpamdd52huq<mode>_maskz"
18688 [(match_operand:VI8_AVX512VL 0 "register_operand")
18689 (match_operand:VI8_AVX512VL 1 "register_operand")
18690 (match_operand:VI8_AVX512VL 2 "register_operand")
18691 (match_operand:VI8_AVX512VL 3 "nonimmediate_operand")
18692 (match_operand:<avx512fmaskmode> 4 "register_operand")]
18693 "TARGET_AVX512IFMA"
18694{
18695 emit_insn (gen_vpamdd52huq<mode>_maskz_1 (
18696 operands[0], operands[1], operands[2], operands[3],
18697 CONST0_RTX (<MODE>mode), operands[4]));
18698 DONE;
18699})
18700
18701(define_expand "vpamdd52luq<mode>_maskz"
18702 [(match_operand:VI8_AVX512VL 0 "register_operand")
18703 (match_operand:VI8_AVX512VL 1 "register_operand")
18704 (match_operand:VI8_AVX512VL 2 "register_operand")
18705 (match_operand:VI8_AVX512VL 3 "nonimmediate_operand")
18706 (match_operand:<avx512fmaskmode> 4 "register_operand")]
18707 "TARGET_AVX512IFMA"
18708{
18709 emit_insn (gen_vpamdd52luq<mode>_maskz_1 (
18710 operands[0], operands[1], operands[2], operands[3],
18711 CONST0_RTX (<MODE>mode), operands[4]));
18712 DONE;
18713})
18714
18715(define_insn "vpamdd52<vpmadd52type><mode><sd_maskz_name>"
18716 [(set (match_operand:VI8_AVX512VL 0 "register_operand" "=v")
18717 (unspec:VI8_AVX512VL
18718 [(match_operand:VI8_AVX512VL 1 "register_operand" "0")
18719 (match_operand:VI8_AVX512VL 2 "register_operand" "v")
18720 (match_operand:VI8_AVX512VL 3 "nonimmediate_operand" "vm")]
18721 VPMADD52))]
18722 "TARGET_AVX512IFMA"
18723 "vpmadd52<vpmadd52type>\t{%3, %2, %0<sd_mask_op4>|%0<sd_mask_op4>, %2, %3}"
18724 [(set_attr "type" "ssemuladd")
18725 (set_attr "prefix" "evex")
18726 (set_attr "mode" "<sseinsnmode>")])
18727
18728(define_insn "vpamdd52<vpmadd52type><mode>_mask"
18729 [(set (match_operand:VI8_AVX512VL 0 "register_operand" "=v")
18730 (vec_merge:VI8_AVX512VL
18731 (unspec:VI8_AVX512VL
18732 [(match_operand:VI8_AVX512VL 1 "register_operand" "0")
18733 (match_operand:VI8_AVX512VL 2 "register_operand" "v")
18734 (match_operand:VI8_AVX512VL 3 "nonimmediate_operand" "vm")]
18735 VPMADD52)
18736 (match_dup 1)
18737 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")))]
18738 "TARGET_AVX512IFMA"
18739 "vpmadd52<vpmadd52type>\t{%3, %2, %0%{%4%}|%0%{%4%}, %2, %3}"
18740 [(set_attr "type" "ssemuladd")
18741 (set_attr "prefix" "evex")
18742 (set_attr "mode" "<sseinsnmode>")])
18743
3dcc8af5
IT
18744(define_insn "vpmultishiftqb<mode><mask_name>"
18745 [(set (match_operand:VI1_AVX512VL 0 "register_operand" "=v")
18746 (unspec:VI1_AVX512VL
18747 [(match_operand:VI1_AVX512VL 1 "register_operand" "v")
18748 (match_operand:VI1_AVX512VL 2 "nonimmediate_operand" "vm")]
18749 UNSPEC_VPMULTISHIFT))]
18750 "TARGET_AVX512VBMI"
18751 "vpmultishiftqb\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
18752 [(set_attr "type" "sselog")
18753 (set_attr "prefix" "evex")
18754 (set_attr "mode" "<sseinsnmode>")])