]> git.ipfire.org Git - thirdparty/gcc.git/blame - gcc/config/i386/sse.md
Update copyright years.
[thirdparty/gcc.git] / gcc / config / i386 / sse.md
CommitLineData
5802c0cb 1;; GCC machine description for SSE instructions
f1717362 2;; Copyright (C) 2005-2016 Free Software Foundation, Inc.
5802c0cb 3;;
4;; This file is part of GCC.
5;;
6;; GCC is free software; you can redistribute it and/or modify
7;; it under the terms of the GNU General Public License as published by
038d1e19 8;; the Free Software Foundation; either version 3, or (at your option)
5802c0cb 9;; any later version.
10;;
11;; GCC is distributed in the hope that it will be useful,
12;; but WITHOUT ANY WARRANTY; without even the implied warranty of
13;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14;; GNU General Public License for more details.
15;;
16;; You should have received a copy of the GNU General Public License
038d1e19 17;; along with GCC; see the file COPYING3. If not see
18;; <http://www.gnu.org/licenses/>.
5802c0cb 19
dbe83d2d 20(define_c_enum "unspec" [
21 ;; SSE
22 UNSPEC_MOVNT
00820ea0 23 UNSPEC_LOADU
24 UNSPEC_STOREU
dbe83d2d 25
26 ;; SSE3
27 UNSPEC_LDDQU
28
29 ;; SSSE3
30 UNSPEC_PSHUFB
31 UNSPEC_PSIGN
32 UNSPEC_PALIGNR
33
34 ;; For SSE4A support
35 UNSPEC_EXTRQI
36 UNSPEC_EXTRQ
37 UNSPEC_INSERTQI
38 UNSPEC_INSERTQ
39
40 ;; For SSE4.1 support
41 UNSPEC_BLENDV
42 UNSPEC_INSERTPS
43 UNSPEC_DP
44 UNSPEC_MOVNTDQA
45 UNSPEC_MPSADBW
46 UNSPEC_PHMINPOSUW
47 UNSPEC_PTEST
48
49 ;; For SSE4.2 support
50 UNSPEC_PCMPESTR
51 UNSPEC_PCMPISTR
52
53 ;; For FMA4 support
54 UNSPEC_FMADDSUB
55 UNSPEC_XOP_UNSIGNED_CMP
56 UNSPEC_XOP_TRUEFALSE
57 UNSPEC_XOP_PERMUTE
58 UNSPEC_FRCZ
59
60 ;; For AES support
61 UNSPEC_AESENC
62 UNSPEC_AESENCLAST
63 UNSPEC_AESDEC
64 UNSPEC_AESDECLAST
65 UNSPEC_AESIMC
66 UNSPEC_AESKEYGENASSIST
67
68 ;; For PCLMUL support
69 UNSPEC_PCLMUL
70
71 ;; For AVX support
72 UNSPEC_PCMP
73 UNSPEC_VPERMIL
74 UNSPEC_VPERMIL2
75 UNSPEC_VPERMIL2F128
76 UNSPEC_CAST
77 UNSPEC_VTESTP
78 UNSPEC_VCVTPH2PS
79 UNSPEC_VCVTPS2PH
80
81 ;; For AVX2 support
8da8a06b 82 UNSPEC_VPERMVAR
dbe83d2d 83 UNSPEC_VPERMTI
84 UNSPEC_GATHER
85 UNSPEC_VSIBADDR
8e6b975f 86
87 ;; For AVX512F support
88 UNSPEC_VPERMI2
89 UNSPEC_VPERMT2
5220cab6 90 UNSPEC_VPERMI2_MASK
697a43f8 91 UNSPEC_UNSIGNED_FIX_NOTRUNC
d2ff59d6 92 UNSPEC_UNSIGNED_PCMP
93 UNSPEC_TESTM
94 UNSPEC_TESTNM
8e6b975f 95 UNSPEC_SCATTER
85065932 96 UNSPEC_RCP14
97 UNSPEC_RSQRT14
98 UNSPEC_FIXUPIMM
99 UNSPEC_SCALEF
d2ff59d6 100 UNSPEC_VTERNLOG
85065932 101 UNSPEC_GETEXP
102 UNSPEC_GETMANT
d2ff59d6 103 UNSPEC_ALIGN
104 UNSPEC_CONFLICT
5220cab6 105 UNSPEC_COMPRESS
106 UNSPEC_COMPRESS_STORE
107 UNSPEC_EXPAND
d2ff59d6 108 UNSPEC_MASKED_EQ
109 UNSPEC_MASKED_GT
110
5220cab6 111 ;; For embed. rounding feature
112 UNSPEC_EMBEDDED_ROUNDING
113
d2ff59d6 114 ;; For AVX512PF support
115 UNSPEC_GATHER_PREFETCH
116 UNSPEC_SCATTER_PREFETCH
85065932 117
118 ;; For AVX512ER support
119 UNSPEC_EXP2
120 UNSPEC_RCP28
121 UNSPEC_RSQRT28
fc975a40 122
123 ;; For SHA support
124 UNSPEC_SHA1MSG1
125 UNSPEC_SHA1MSG2
126 UNSPEC_SHA1NEXTE
127 UNSPEC_SHA1RNDS4
128 UNSPEC_SHA256MSG1
129 UNSPEC_SHA256MSG2
130 UNSPEC_SHA256RNDS2
6164575a 131
7b988cc3 132 ;; For AVX512BW support
d58134c2 133 UNSPEC_DBPSADBW
134 UNSPEC_PMADDUBSW512
2d71b728 135 UNSPEC_PMADDWD512
7b988cc3 136 UNSPEC_PSHUFHW
137 UNSPEC_PSHUFLW
54f53cd0 138 UNSPEC_CVTINT2MASK
7b988cc3 139
6164575a 140 ;; For AVX512DQ support
141 UNSPEC_REDUCE
142 UNSPEC_FPCLASS
143 UNSPEC_RANGE
8a12b665 144
145 ;; For AVX512IFMA support
146 UNSPEC_VPMADD52LUQ
147 UNSPEC_VPMADD52HUQ
afee0628 148
149 ;; For AVX512VBMI support
150 UNSPEC_VPMULTISHIFT
dbe83d2d 151])
152
153(define_c_enum "unspecv" [
154 UNSPECV_LDMXCSR
155 UNSPECV_STMXCSR
156 UNSPECV_CLFLUSH
157 UNSPECV_MONITOR
158 UNSPECV_MWAIT
159 UNSPECV_VZEROALL
160 UNSPECV_VZEROUPPER
161])
162
058e4e29 163;; All vector modes including V?TImode, used in move patterns.
8671b6cc 164(define_mode_iterator VMOVE
6a3f5f59 165 [(V64QI "TARGET_AVX512F") (V32QI "TARGET_AVX") V16QI
166 (V32HI "TARGET_AVX512F") (V16HI "TARGET_AVX") V8HI
167 (V16SI "TARGET_AVX512F") (V8SI "TARGET_AVX") V4SI
168 (V8DI "TARGET_AVX512F") (V4DI "TARGET_AVX") V2DI
3f4222c1 169 (V4TI "TARGET_AVX512BW") (V2TI "TARGET_AVX") V1TI
6a3f5f59 170 (V16SF "TARGET_AVX512F") (V8SF "TARGET_AVX") V4SF
171 (V8DF "TARGET_AVX512F") (V4DF "TARGET_AVX") V2DF])
5802c0cb 172
dbddc172 173;; All AVX-512{F,VL} vector modes. Supposed TARGET_AVX512F baseline.
174(define_mode_iterator V48_AVX512VL
175 [V16SI (V8SI "TARGET_AVX512VL") (V4SI "TARGET_AVX512VL")
176 V8DI (V4DI "TARGET_AVX512VL") (V2DI "TARGET_AVX512VL")
177 V16SF (V8SF "TARGET_AVX512VL") (V4SF "TARGET_AVX512VL")
178 V8DF (V4DF "TARGET_AVX512VL") (V2DF "TARGET_AVX512VL")])
179
180;; 1,2 byte AVX-512{BW,VL} vector modes. Supposed TARGET_AVX512BW baseline.
181(define_mode_iterator VI12_AVX512VL
182 [V64QI (V16QI "TARGET_AVX512VL") (V32QI "TARGET_AVX512VL")
183 V32HI (V16HI "TARGET_AVX512VL") (V8HI "TARGET_AVX512VL")])
3f4222c1 184
afee0628 185(define_mode_iterator VI1_AVX512VL
186 [V64QI (V16QI "TARGET_AVX512VL") (V32QI "TARGET_AVX512VL")])
187
6fe5844b 188;; All vector modes
189(define_mode_iterator V
190 [(V32QI "TARGET_AVX") V16QI
191 (V16HI "TARGET_AVX") V8HI
6615b722 192 (V16SI "TARGET_AVX512F") (V8SI "TARGET_AVX") V4SI
193 (V8DI "TARGET_AVX512F") (V4DI "TARGET_AVX") V2DI
194 (V16SF "TARGET_AVX512F") (V8SF "TARGET_AVX") V4SF
195 (V8DF "TARGET_AVX512F") (V4DF "TARGET_AVX") (V2DF "TARGET_SSE2")])
6fe5844b 196
197;; All 128bit vector modes
198(define_mode_iterator V_128
199 [V16QI V8HI V4SI V2DI V4SF (V2DF "TARGET_SSE2")])
200
201;; All 256bit vector modes
202(define_mode_iterator V_256
203 [V32QI V16HI V8SI V4DI V8SF V4DF])
204
f23a3158 205;; All 512bit vector modes
206(define_mode_iterator V_512 [V64QI V32HI V16SI V8DI V16SF V8DF])
207
6615b722 208;; All 256bit and 512bit vector modes
209(define_mode_iterator V_256_512
210 [V32QI V16HI V8SI V4DI V8SF V4DF
211 (V64QI "TARGET_AVX512F") (V32HI "TARGET_AVX512F") (V16SI "TARGET_AVX512F")
212 (V8DI "TARGET_AVX512F") (V16SF "TARGET_AVX512F") (V8DF "TARGET_AVX512F")])
213
27e5502d 214;; All vector float modes
215(define_mode_iterator VF
6a3f5f59 216 [(V16SF "TARGET_AVX512F") (V8SF "TARGET_AVX") V4SF
217 (V8DF "TARGET_AVX512F") (V4DF "TARGET_AVX") (V2DF "TARGET_SSE2")])
218
219;; 128- and 256-bit float vector modes
220(define_mode_iterator VF_128_256
6fe5844b 221 [(V8SF "TARGET_AVX") V4SF
222 (V4DF "TARGET_AVX") (V2DF "TARGET_SSE2")])
27e5502d 223
224;; All SFmode vector float modes
225(define_mode_iterator VF1
03ae25dc 226 [(V16SF "TARGET_AVX512F") (V8SF "TARGET_AVX") V4SF])
227
228;; 128- and 256-bit SF vector modes
229(define_mode_iterator VF1_128_256
6fe5844b 230 [(V8SF "TARGET_AVX") V4SF])
27e5502d 231
d3d65e42 232(define_mode_iterator VF1_128_256VL
233 [V8SF (V4SF "TARGET_AVX512VL")])
234
27e5502d 235;; All DFmode vector float modes
236(define_mode_iterator VF2
6615b722 237 [(V8DF "TARGET_AVX512F") (V4DF "TARGET_AVX") V2DF])
238
239;; 128- and 256-bit DF vector modes
240(define_mode_iterator VF2_128_256
6fe5844b 241 [(V4DF "TARGET_AVX") V2DF])
27e5502d 242
6615b722 243(define_mode_iterator VF2_512_256
d3d65e42 244 [(V8DF "TARGET_AVX512F") V4DF])
245
246(define_mode_iterator VF2_512_256VL
247 [V8DF (V4DF "TARGET_AVX512VL")])
6615b722 248
27e5502d 249;; All 128bit vector float modes
250(define_mode_iterator VF_128
6fe5844b 251 [V4SF (V2DF "TARGET_SSE2")])
252
253;; All 256bit vector float modes
254(define_mode_iterator VF_256
255 [V8SF V4DF])
27e5502d 256
6a3f5f59 257;; All 512bit vector float modes
258(define_mode_iterator VF_512
259 [V16SF V8DF])
260
97173adf 261(define_mode_iterator VI48_AVX512VL
262 [V16SI (V8SI "TARGET_AVX512VL") (V4SI "TARGET_AVX512VL")
263 V8DI (V4DI "TARGET_AVX512VL") (V2DI "TARGET_AVX512VL")])
264
4f3da779 265(define_mode_iterator VF_AVX512VL
266 [V16SF (V8SF "TARGET_AVX512VL") (V4SF "TARGET_AVX512VL")
267 V8DF (V4DF "TARGET_AVX512VL") (V2DF "TARGET_AVX512VL")])
268
05ecc201 269(define_mode_iterator VF2_AVX512VL
270 [V8DF (V4DF "TARGET_AVX512VL") (V2DF "TARGET_AVX512VL")])
271
040236d9 272(define_mode_iterator VF1_AVX512VL
273 [V16SF (V8SF "TARGET_AVX512VL") (V4SF "TARGET_AVX512VL")])
274
ba2558f8 275;; All vector integer modes
276(define_mode_iterator VI
03ae25dc 277 [(V16SI "TARGET_AVX512F") (V8DI "TARGET_AVX512F")
94c0db54 278 (V64QI "TARGET_AVX512BW") (V32QI "TARGET_AVX") V16QI
279 (V32HI "TARGET_AVX512BW") (V16HI "TARGET_AVX") V8HI
ba2558f8 280 (V8SI "TARGET_AVX") V4SI
281 (V4DI "TARGET_AVX") V2DI])
282
c4530783 283(define_mode_iterator VI_AVX2
12803fe0 284 [(V64QI "TARGET_AVX512BW") (V32QI "TARGET_AVX2") V16QI
285 (V32HI "TARGET_AVX512BW") (V16HI "TARGET_AVX2") V8HI
03ae25dc 286 (V16SI "TARGET_AVX512F") (V8SI "TARGET_AVX2") V4SI
287 (V8DI "TARGET_AVX512F") (V4DI "TARGET_AVX2") V2DI])
c4530783 288
e5f53f2a 289;; All QImode vector integer modes
290(define_mode_iterator VI1
291 [(V32QI "TARGET_AVX") V16QI])
292
97173adf 293(define_mode_iterator VI_ULOADSTORE_BW_AVX512VL
294 [V64QI
295 V32HI (V8HI "TARGET_AVX512VL") (V16HI "TARGET_AVX512VL")])
296
297(define_mode_iterator VI_ULOADSTORE_F_AVX512VL
298 [V16SI (V8SI "TARGET_AVX512VL") (V4SI "TARGET_AVX512VL")
299 V8DI (V4DI "TARGET_AVX512VL") (V2DI "TARGET_AVX512VL")])
6a3f5f59 300
e5f53f2a 301;; All DImode vector integer modes
37407f90 302(define_mode_iterator V_AVX
303 [V16QI V8HI V4SI V2DI V4SF V2DF
304 (V32QI "TARGET_AVX") (V16HI "TARGET_AVX")
305 (V8SI "TARGET_AVX") (V4DI "TARGET_AVX")
306 (V8SF "TARGET_AVX") (V4DF"TARGET_AVX")])
307
e5f53f2a 308(define_mode_iterator VI8
03ae25dc 309 [(V8DI "TARGET_AVX512F") (V4DI "TARGET_AVX") V2DI])
e5f53f2a 310
1ffb4a9e 311(define_mode_iterator VI8_AVX512VL
312 [V8DI (V4DI "TARGET_AVX512VL") (V2DI "TARGET_AVX512VL")])
313
9bb6f354 314(define_mode_iterator VI8_256_512
315 [V8DI (V4DI "TARGET_AVX512VL")])
316
5deb404d 317(define_mode_iterator VI1_AVX2
318 [(V32QI "TARGET_AVX2") V16QI])
319
201f262d 320(define_mode_iterator VI1_AVX512
321 [(V64QI "TARGET_AVX512BW") (V32QI "TARGET_AVX2") V16QI])
322
5deb404d 323(define_mode_iterator VI2_AVX2
2d71b728 324 [(V32HI "TARGET_AVX512BW") (V16HI "TARGET_AVX2") V8HI])
5deb404d 325
9abbf9e6 326(define_mode_iterator VI2_AVX512F
327 [(V32HI "TARGET_AVX512F") (V16HI "TARGET_AVX2") V8HI])
328
bf8e1ae3 329(define_mode_iterator VI4_AVX
330 [(V8SI "TARGET_AVX") V4SI])
331
5deb404d 332(define_mode_iterator VI4_AVX2
333 [(V8SI "TARGET_AVX2") V4SI])
334
c6cff444 335(define_mode_iterator VI4_AVX512F
336 [(V16SI "TARGET_AVX512F") (V8SI "TARGET_AVX2") V4SI])
337
7da26bee 338(define_mode_iterator VI4_AVX512VL
339 [V16SI (V8SI "TARGET_AVX512VL") (V4SI "TARGET_AVX512VL")])
340
341(define_mode_iterator VI48_AVX512F_AVX512VL
342 [V4SI V8SI (V16SI "TARGET_AVX512F")
343 (V2DI "TARGET_AVX512VL") (V4DI "TARGET_AVX512VL") (V8DI "TARGET_AVX512F")])
344
345(define_mode_iterator VI2_AVX512VL
346 [(V8HI "TARGET_AVX512VL") (V16HI "TARGET_AVX512VL") V32HI])
f062acd7 347
5f3ec3a3 348(define_mode_iterator VI8_AVX2_AVX512BW
349 [(V8DI "TARGET_AVX512BW") (V4DI "TARGET_AVX2") V2DI])
350
5deb404d 351(define_mode_iterator VI8_AVX2
352 [(V4DI "TARGET_AVX2") V2DI])
353
c6cff444 354(define_mode_iterator VI8_AVX2_AVX512F
355 [(V8DI "TARGET_AVX512F") (V4DI "TARGET_AVX2") V2DI])
356
da982d5c 357(define_mode_iterator VI4_128_8_256
358 [V4SI V4DI])
359
8e9989b0 360;; All V8D* modes
361(define_mode_iterator V8FI
362 [V8DF V8DI])
363
364;; All V16S* modes
365(define_mode_iterator V16FI
366 [V16SF V16SI])
367
058e4e29 368;; ??? We should probably use TImode instead.
5deb404d 369(define_mode_iterator VIMAX_AVX2
fd6b07be 370 [(V4TI "TARGET_AVX512BW") (V2TI "TARGET_AVX2") V1TI])
5deb404d 371
058e4e29 372;; ??? This should probably be dropped in favor of VIMAX_AVX2.
5deb404d 373(define_mode_iterator SSESCALARMODE
d49df830 374 [(V4TI "TARGET_AVX512BW") (V2TI "TARGET_AVX2") TI])
5deb404d 375
376(define_mode_iterator VI12_AVX2
293fd15f 377 [(V64QI "TARGET_AVX512BW") (V32QI "TARGET_AVX2") V16QI
378 (V32HI "TARGET_AVX512BW") (V16HI "TARGET_AVX2") V8HI])
5deb404d 379
380(define_mode_iterator VI24_AVX2
381 [(V16HI "TARGET_AVX2") V8HI
382 (V8SI "TARGET_AVX2") V4SI])
383
8f83f53e 384(define_mode_iterator VI124_AVX2_24_AVX512F_1_AVX512BW
385 [(V64QI "TARGET_AVX512BW") (V32QI "TARGET_AVX2") V16QI
9abbf9e6 386 (V32HI "TARGET_AVX512F") (V16HI "TARGET_AVX2") V8HI
387 (V16SI "TARGET_AVX512F") (V8SI "TARGET_AVX2") V4SI])
388
5deb404d 389(define_mode_iterator VI124_AVX2
390 [(V32QI "TARGET_AVX2") V16QI
391 (V16HI "TARGET_AVX2") V8HI
392 (V8SI "TARGET_AVX2") V4SI])
393
4055e076 394(define_mode_iterator VI2_AVX2_AVX512BW
395 [(V32HI "TARGET_AVX512BW") (V16HI "TARGET_AVX2") V8HI])
396
397(define_mode_iterator VI48_AVX2
398 [(V8SI "TARGET_AVX2") V4SI
5deb404d 399 (V4DI "TARGET_AVX2") V2DI])
400
8f83f53e 401(define_mode_iterator VI248_AVX2_8_AVX512F_24_AVX512BW
402 [(V32HI "TARGET_AVX512BW") (V16HI "TARGET_AVX2") V8HI
403 (V16SI "TARGET_AVX512BW") (V8SI "TARGET_AVX2") V4SI
2257113d 404 (V8DI "TARGET_AVX512F") (V4DI "TARGET_AVX2") V2DI])
405
4f545baf 406(define_mode_iterator VI248_AVX512BW_AVX512VL
407 [(V32HI "TARGET_AVX512BW")
408 (V4DI "TARGET_AVX512VL") V16SI V8DI])
409
410;; Suppose TARGET_AVX512VL as baseline
411(define_mode_iterator VI24_AVX512BW_1
412 [(V16HI "TARGET_AVX512BW") (V8HI "TARGET_AVX512BW")
413 V8SI V4SI])
414
db3a6e9c 415(define_mode_iterator VI48_AVX512F
416 [(V16SI "TARGET_AVX512F") V8SI V4SI
417 (V8DI "TARGET_AVX512F") V4DI V2DI])
5deb404d 418
e9b578bf 419(define_mode_iterator VI48_AVX_AVX512F
420 [(V16SI "TARGET_AVX512F") (V8SI "TARGET_AVX") V4SI
421 (V8DI "TARGET_AVX512F") (V4DI "TARGET_AVX") V2DI])
422
423(define_mode_iterator VI12_AVX_AVX512F
424 [ (V64QI "TARGET_AVX512F") (V32QI "TARGET_AVX") V16QI
425 (V32HI "TARGET_AVX512F") (V16HI "TARGET_AVX") V8HI])
426
5deb404d 427(define_mode_iterator V48_AVX2
c4530783 428 [V4SF V2DF
429 V8SF V4DF
5deb404d 430 (V4SI "TARGET_AVX2") (V2DI "TARGET_AVX2")
431 (V8SI "TARGET_AVX2") (V4DI "TARGET_AVX2")])
432
18b7eecb 433(define_mode_attr avx512
434 [(V16QI "avx512vl") (V32QI "avx512vl") (V64QI "avx512bw")
435 (V8HI "avx512vl") (V16HI "avx512vl") (V32HI "avx512bw")
436 (V4SI "avx512vl") (V8SI "avx512vl") (V16SI "avx512f")
437 (V2DI "avx512vl") (V4DI "avx512vl") (V8DI "avx512f")
438 (V4SF "avx512vl") (V8SF "avx512vl") (V16SF "avx512f")
439 (V2DF "avx512vl") (V4DF "avx512vl") (V8DF "avx512f")])
440
6a3f5f59 441(define_mode_attr sse2_avx_avx512f
442 [(V16QI "sse2") (V32QI "avx") (V64QI "avx512f")
18b7eecb 443 (V8HI "avx512vl") (V16HI "avx512vl") (V32HI "avx512bw")
6a3f5f59 444 (V4SI "sse2") (V8SI "avx") (V16SI "avx512f")
18b7eecb 445 (V2DI "avx512vl") (V4DI "avx512vl") (V8DI "avx512f")
6a3f5f59 446 (V16SF "avx512f") (V8SF "avx") (V4SF "avx")
447 (V8DF "avx512f") (V4DF "avx") (V2DF "avx")])
448
5deb404d 449(define_mode_attr sse2_avx2
18b7eecb 450 [(V16QI "sse2") (V32QI "avx2") (V64QI "avx512bw")
451 (V8HI "sse2") (V16HI "avx2") (V32HI "avx512bw")
6a3f5f59 452 (V4SI "sse2") (V8SI "avx2") (V16SI "avx512f")
453 (V2DI "sse2") (V4DI "avx2") (V8DI "avx512f")
18b7eecb 454 (V1TI "sse2") (V2TI "avx2") (V4TI "avx512bw")])
5deb404d 455
456(define_mode_attr ssse3_avx2
18b7eecb 457 [(V16QI "ssse3") (V32QI "avx2") (V64QI "avx512bw")
458 (V4HI "ssse3") (V8HI "ssse3") (V16HI "avx2") (V32HI "avx512bw")
5deb404d 459 (V4SI "ssse3") (V8SI "avx2")
460 (V2DI "ssse3") (V4DI "avx2")
18b7eecb 461 (TI "ssse3") (V2TI "avx2") (V4TI "avx512bw")])
5deb404d 462
463(define_mode_attr sse4_1_avx2
18b7eecb 464 [(V16QI "sse4_1") (V32QI "avx2") (V64QI "avx512bw")
465 (V8HI "sse4_1") (V16HI "avx2") (V32HI "avx512bw")
6a3f5f59 466 (V4SI "sse4_1") (V8SI "avx2") (V16SI "avx512f")
18b7eecb 467 (V2DI "sse4_1") (V4DI "avx2") (V8DI "avx512dq")])
5deb404d 468
469(define_mode_attr avx_avx2
470 [(V4SF "avx") (V2DF "avx")
471 (V8SF "avx") (V4DF "avx")
472 (V4SI "avx2") (V2DI "avx2")
473 (V8SI "avx2") (V4DI "avx2")])
474
9fb8de40 475(define_mode_attr vec_avx2
476 [(V16QI "vec") (V32QI "avx2")
477 (V8HI "vec") (V16HI "avx2")
478 (V4SI "vec") (V8SI "avx2")
479 (V2DI "vec") (V4DI "avx2")])
480
fcb19554 481(define_mode_attr avx2_avx512
18b7eecb 482 [(V4SI "avx2") (V8SI "avx2") (V16SI "avx512f")
483 (V2DI "avx2") (V4DI "avx2") (V8DI "avx512f")
484 (V4SF "avx2") (V8SF "avx2") (V16SF "avx512f")
485 (V2DF "avx2") (V4DF "avx2") (V8DF "avx512f")
486 (V8HI "avx512vl") (V16HI "avx512vl") (V32HI "avx512bw")])
487
e13e1b39 488(define_mode_attr shuffletype
489 [(V16SF "f") (V16SI "i") (V8DF "f") (V8DI "i")
490 (V8SF "f") (V8SI "i") (V4DF "f") (V4DI "i")
491 (V4SF "f") (V4SI "i") (V2DF "f") (V2DI "i")
492 (V32QI "i") (V16HI "u") (V16QI "i") (V8HI "i")
493 (V64QI "i") (V1TI "i") (V2TI "i")])
494
8e9989b0 495(define_mode_attr ssequartermode
496 [(V16SF "V4SF") (V8DF "V2DF") (V16SI "V4SI") (V8DI "V2DI")])
497
18b7eecb 498(define_mode_attr ssedoublemodelower
499 [(V16QI "v16hi") (V32QI "v32hi") (V64QI "v64hi")
500 (V8HI "v8si") (V16HI "v16si") (V32HI "v32si")
501 (V4SI "v4di") (V8SI "v8di") (V16SI "v16di")])
502
5deb404d 503(define_mode_attr ssedoublemode
06af5c80 504 [(V4SF "V8SF") (V8SF "V16SF") (V16SF "V32SF")
505 (V2DF "V4DF") (V4DF "V8DF") (V8DF "V16DF")
506 (V16QI "V16HI") (V32QI "V32HI") (V64QI "V64HI")
507 (V4HI "V4SI") (V8HI "V8SI") (V16HI "V16SI") (V32HI "V32SI")
508 (V4SI "V4DI") (V8SI "V16SI") (V16SI "V32SI")
509 (V4DI "V8DI") (V8DI "V16DI")])
5deb404d 510
511(define_mode_attr ssebytemode
18b7eecb 512 [(V8DI "V64QI") (V4DI "V32QI") (V2DI "V16QI")])
5deb404d 513
d8f82f6b 514;; All 128bit vector integer modes
515(define_mode_iterator VI_128 [V16QI V8HI V4SI V2DI])
516
5deb404d 517;; All 256bit vector integer modes
518(define_mode_iterator VI_256 [V32QI V16HI V8SI V4DI])
519
f23a3158 520;; All 512bit vector integer modes
521(define_mode_iterator VI_512 [V64QI V32HI V16SI V8DI])
522
523;; Various 128bit vector integer mode combinations
d8f82f6b 524(define_mode_iterator VI12_128 [V16QI V8HI])
525(define_mode_iterator VI14_128 [V16QI V4SI])
526(define_mode_iterator VI124_128 [V16QI V8HI V4SI])
527(define_mode_iterator VI24_128 [V8HI V4SI])
528(define_mode_iterator VI248_128 [V8HI V4SI V2DI])
7d079352 529(define_mode_iterator VI48_128 [V4SI V2DI])
27e5502d 530
2257113d 531;; Various 256bit and 512 vector integer mode combinations
5dd4f649 532(define_mode_iterator VI124_256 [V32QI V16HI V8SI])
533(define_mode_iterator VI124_256_AVX512F_AVX512BW
534 [V32QI V16HI V8SI
535 (V64QI "TARGET_AVX512BW")
536 (V32HI "TARGET_AVX512BW")
537 (V16SI "TARGET_AVX512F")])
7d079352 538(define_mode_iterator VI48_256 [V8SI V4DI])
d2ff59d6 539(define_mode_iterator VI48_512 [V16SI V8DI])
0daf3bbe 540(define_mode_iterator VI4_256_8_512 [V8SI V8DI])
9c9987c5 541(define_mode_iterator VI_AVX512BW
542 [V16SI V8DI (V32HI "TARGET_AVX512BW") (V64QI "TARGET_AVX512BW")])
5deb404d 543
6fe5844b 544;; Int-float size matches
545(define_mode_iterator VI4F_128 [V4SI V4SF])
546(define_mode_iterator VI8F_128 [V2DI V2DF])
547(define_mode_iterator VI4F_256 [V8SI V8SF])
548(define_mode_iterator VI8F_256 [V4DI V4DF])
feadfe94 549(define_mode_iterator VI8F_256_512
550 [V4DI V4DF (V8DI "TARGET_AVX512F") (V8DF "TARGET_AVX512F")])
bf24193f 551(define_mode_iterator VI48F_256_512
552 [V8SI V8SF
553 (V16SI "TARGET_AVX512F") (V16SF "TARGET_AVX512F")
fcb19554 554 (V8DI "TARGET_AVX512F") (V8DF "TARGET_AVX512F")
555 (V4DI "TARGET_AVX512VL") (V4DF "TARGET_AVX512VL")])
da2989a5 556(define_mode_iterator VF48_I1248
557 [V16SI V16SF V8DI V8DF V32HI V64QI])
6ce48b02 558(define_mode_iterator VI48F
559 [V16SI V16SF V8DI V8DF
560 (V8SI "TARGET_AVX512VL") (V8SF "TARGET_AVX512VL")
561 (V4DI "TARGET_AVX512VL") (V4DF "TARGET_AVX512VL")
562 (V4SI "TARGET_AVX512VL") (V4SF "TARGET_AVX512VL")
563 (V2DI "TARGET_AVX512VL") (V2DF "TARGET_AVX512VL")])
fd1fee28 564(define_mode_iterator VI48F_256 [V8SI V8SF V4DI V4DF])
6fe5844b 565
b6fc7168 566;; Mapping from float mode to required SSE level
567(define_mode_attr sse
568 [(SF "sse") (DF "sse2")
569 (V4SF "sse") (V2DF "sse2")
6a3f5f59 570 (V16SF "avx512f") (V8SF "avx")
571 (V8DF "avx512f") (V4DF "avx")])
b6fc7168 572
573(define_mode_attr sse2
6a3f5f59 574 [(V16QI "sse2") (V32QI "avx") (V64QI "avx512f")
575 (V2DI "sse2") (V4DI "avx") (V8DI "avx512f")])
b6fc7168 576
577(define_mode_attr sse3
578 [(V16QI "sse3") (V32QI "avx")])
579
580(define_mode_attr sse4_1
581 [(V4SF "sse4_1") (V2DF "sse4_1")
6a3f5f59 582 (V8SF "avx") (V4DF "avx")
37407f90 583 (V8DF "avx512f")
584 (V4DI "avx") (V2DI "sse4_1")
585 (V8SI "avx") (V4SI "sse4_1")
586 (V16QI "sse4_1") (V32QI "avx")
587 (V8HI "sse4_1") (V16HI "avx")])
b6fc7168 588
63d5e521 589(define_mode_attr avxsizesuffix
6a3f5f59 590 [(V64QI "512") (V32HI "512") (V16SI "512") (V8DI "512")
591 (V32QI "256") (V16HI "256") (V8SI "256") (V4DI "256")
6fe5844b 592 (V16QI "") (V8HI "") (V4SI "") (V2DI "")
6a3f5f59 593 (V16SF "512") (V8DF "512")
63d5e521 594 (V8SF "256") (V4DF "256")
595 (V4SF "") (V2DF "")])
33541f98 596
63d5e521 597;; SSE instruction mode
598(define_mode_attr sseinsnmode
18b7eecb 599 [(V64QI "XI") (V32HI "XI") (V16SI "XI") (V8DI "XI") (V4TI "XI")
e13e1b39 600 (V32QI "OI") (V16HI "OI") (V8SI "OI") (V4DI "OI") (V2TI "OI")
63d5e521 601 (V16QI "TI") (V8HI "TI") (V4SI "TI") (V2DI "TI") (V1TI "TI")
e13e1b39 602 (V16SF "V16SF") (V8DF "V8DF")
63d5e521 603 (V8SF "V8SF") (V4DF "V4DF")
5deb404d 604 (V4SF "V4SF") (V2DF "V2DF")
28f914e9 605 (TI "TI")])
63d5e521 606
8e6b975f 607;; Mapping of vector modes to corresponding mask size
608(define_mode_attr avx512fmaskmode
41564d2a 609 [(V64QI "DI") (V32QI "SI") (V16QI "HI")
610 (V32HI "SI") (V16HI "HI") (V8HI "QI") (V4HI "QI")
8e6b975f 611 (V16SI "HI") (V8SI "QI") (V4SI "QI")
612 (V8DI "QI") (V4DI "QI") (V2DI "QI")
613 (V16SF "HI") (V8SF "QI") (V4SF "QI")
614 (V8DF "QI") (V4DF "QI") (V2DF "QI")])
615
dab48979 616;; Mapping of vector modes to corresponding mask size
617(define_mode_attr avx512fmaskmodelower
618 [(V64QI "di") (V32QI "si") (V16QI "hi")
619 (V32HI "si") (V16HI "hi") (V8HI "qi") (V4HI "qi")
620 (V16SI "hi") (V8SI "qi") (V4SI "qi")
621 (V8DI "qi") (V4DI "qi") (V2DI "qi")
622 (V16SF "hi") (V8SF "qi") (V4SF "qi")
623 (V8DF "qi") (V4DF "qi") (V2DF "qi")])
624
63d5e521 625;; Mapping of vector float modes to an integer mode of the same size
626(define_mode_attr sseintvecmode
6a3f5f59 627 [(V16SF "V16SI") (V8DF "V8DI")
628 (V8SF "V8SI") (V4DF "V4DI")
629 (V4SF "V4SI") (V2DF "V2DI")
630 (V16SI "V16SI") (V8DI "V8DI")
631 (V8SI "V8SI") (V4DI "V4DI")
632 (V4SI "V4SI") (V2DI "V2DI")
633 (V16HI "V16HI") (V8HI "V8HI")
18b7eecb 634 (V32HI "V32HI") (V64QI "V64QI")
c512f3a4 635 (V32QI "V32QI") (V16QI "V16QI")])
63d5e521 636
05ecc201 637(define_mode_attr sseintvecmode2
638 [(V8DF "XI") (V4DF "OI") (V2DF "TI")
639 (V8SF "OI") (V4SF "TI")])
640
d6b69370 641(define_mode_attr sseintvecmodelower
18b7eecb 642 [(V16SF "v16si") (V8DF "v8di")
03ae25dc 643 (V8SF "v8si") (V4DF "v4di")
d6b69370 644 (V4SF "v4si") (V2DF "v2di")
645 (V8SI "v8si") (V4DI "v4di")
646 (V4SI "v4si") (V2DI "v2di")
647 (V16HI "v16hi") (V8HI "v8hi")
648 (V32QI "v32qi") (V16QI "v16qi")])
649
63d5e521 650;; Mapping of vector modes to a vector mode of double size
651(define_mode_attr ssedoublevecmode
652 [(V32QI "V64QI") (V16HI "V32HI") (V8SI "V16SI") (V4DI "V8DI")
653 (V16QI "V32QI") (V8HI "V16HI") (V4SI "V8SI") (V2DI "V4DI")
654 (V8SF "V16SF") (V4DF "V8DF")
655 (V4SF "V8SF") (V2DF "V4DF")])
656
657;; Mapping of vector modes to a vector mode of half size
658(define_mode_attr ssehalfvecmode
6615b722 659 [(V64QI "V32QI") (V32HI "V16HI") (V16SI "V8SI") (V8DI "V4DI")
660 (V32QI "V16QI") (V16HI "V8HI") (V8SI "V4SI") (V4DI "V2DI")
661 (V16QI "V8QI") (V8HI "V4HI") (V4SI "V2SI")
662 (V16SF "V8SF") (V8DF "V4DF")
663 (V8SF "V4SF") (V4DF "V2DF")
664 (V4SF "V2SF")])
63d5e521 665
dab25421 666;; Mapping of vector modes ti packed single mode of the same size
667(define_mode_attr ssePSmode
6a3f5f59 668 [(V16SI "V16SF") (V8DF "V16SF")
669 (V16SF "V16SF") (V8DI "V16SF")
670 (V64QI "V16SF") (V32QI "V8SF") (V16QI "V4SF")
671 (V32HI "V16SF") (V16HI "V8SF") (V8HI "V4SF")
dab25421 672 (V8SI "V8SF") (V4SI "V4SF")
673 (V4DI "V8SF") (V2DI "V4SF")
18b7eecb 674 (V4TI "V16SF") (V2TI "V8SF") (V1TI "V4SF")
dab25421 675 (V8SF "V8SF") (V4SF "V4SF")
676 (V4DF "V8SF") (V2DF "V4SF")])
677
18b7eecb 678(define_mode_attr ssePSmode2
679 [(V8DI "V8SF") (V4DI "V4SF")])
680
63d5e521 681;; Mapping of vector modes back to the scalar modes
682(define_mode_attr ssescalarmode
03ae25dc 683 [(V64QI "QI") (V32QI "QI") (V16QI "QI")
684 (V32HI "HI") (V16HI "HI") (V8HI "HI")
685 (V16SI "SI") (V8SI "SI") (V4SI "SI")
686 (V8DI "DI") (V4DI "DI") (V2DI "DI")
687 (V16SF "SF") (V8SF "SF") (V4SF "SF")
688 (V8DF "DF") (V4DF "DF") (V2DF "DF")])
689
690;; Mapping of vector modes to the 128bit modes
691(define_mode_attr ssexmmmode
692 [(V64QI "V16QI") (V32QI "V16QI") (V16QI "V16QI")
693 (V32HI "V8HI") (V16HI "V8HI") (V8HI "V8HI")
694 (V16SI "V4SI") (V8SI "V4SI") (V4SI "V4SI")
695 (V8DI "V2DI") (V4DI "V2DI") (V2DI "V2DI")
696 (V16SF "V4SF") (V8SF "V4SF") (V4SF "V4SF")
697 (V8DF "V2DF") (V4DF "V2DF") (V2DF "V2DF")])
63d5e521 698
c358a059 699;; Pointer size override for scalar modes (Intel asm dialect)
700(define_mode_attr iptr
701 [(V32QI "b") (V16HI "w") (V8SI "k") (V4DI "q")
702 (V16QI "b") (V8HI "w") (V4SI "k") (V2DI "q")
703 (V8SF "k") (V4DF "q")
704 (V4SF "k") (V2DF "q")
705 (SF "k") (DF "q")])
706
63d5e521 707;; Number of scalar elements in each vector type
708(define_mode_attr ssescalarnum
03ae25dc 709 [(V64QI "64") (V16SI "16") (V8DI "8")
710 (V32QI "32") (V16HI "16") (V8SI "8") (V4DI "4")
63d5e521 711 (V16QI "16") (V8HI "8") (V4SI "4") (V2DI "2")
03ae25dc 712 (V16SF "16") (V8DF "8")
63d5e521 713 (V8SF "8") (V4DF "4")
714 (V4SF "4") (V2DF "2")])
715
1087c60b 716;; Mask of scalar elements in each vector type
717(define_mode_attr ssescalarnummask
718 [(V32QI "31") (V16HI "15") (V8SI "7") (V4DI "3")
719 (V16QI "15") (V8HI "7") (V4SI "3") (V2DI "1")
720 (V8SF "7") (V4DF "3")
721 (V4SF "3") (V2DF "1")])
722
5220cab6 723(define_mode_attr ssescalarsize
724 [(V8DI "64") (V4DI "64") (V2DI "64")
18b7eecb 725 (V64QI "8") (V32QI "8") (V16QI "8")
5220cab6 726 (V32HI "16") (V16HI "16") (V8HI "16")
727 (V16SI "32") (V8SI "32") (V4SI "32")
728 (V16SF "32") (V8DF "64")])
729
c512f3a4 730;; SSE prefix for integer vector modes
731(define_mode_attr sseintprefix
8e6b975f 732 [(V2DI "p") (V2DF "")
733 (V4DI "p") (V4DF "")
734 (V8DI "p") (V8DF "")
735 (V4SI "p") (V4SF "")
736 (V8SI "p") (V8SF "")
18b7eecb 737 (V16SI "p") (V16SF "")
738 (V16QI "p") (V8HI "p")
739 (V32QI "p") (V16HI "p")
740 (V64QI "p") (V32HI "p")])
c512f3a4 741
63d5e521 742;; SSE scalar suffix for vector modes
743(define_mode_attr ssescalarmodesuffix
5deb404d 744 [(SF "ss") (DF "sd")
745 (V8SF "ss") (V4DF "sd")
63d5e521 746 (V4SF "ss") (V2DF "sd")
747 (V8SI "ss") (V4DI "sd")
748 (V4SI "d")])
749
b6fc7168 750;; Pack/unpack vector modes
751(define_mode_attr sseunpackmode
5deb404d 752 [(V16QI "V8HI") (V8HI "V4SI") (V4SI "V2DI")
9abbf9e6 753 (V32QI "V16HI") (V16HI "V8SI") (V8SI "V4DI")
754 (V32HI "V16SI") (V64QI "V32HI") (V16SI "V8DI")])
b6fc7168 755
756(define_mode_attr ssepackmode
5deb404d 757 [(V8HI "V16QI") (V4SI "V8HI") (V2DI "V4SI")
2257113d 758 (V16HI "V32QI") (V8SI "V16HI") (V4DI "V8SI")
759 (V32HI "V64QI") (V16SI "V32HI") (V8DI "V16SI")])
b6fc7168 760
63d5e521 761;; Mapping of the max integer size for xop rotate immediate constraint
762(define_mode_attr sserotatemax
763 [(V16QI "7") (V8HI "15") (V4SI "31") (V2DI "63")])
b6fc7168 764
675d6e0d 765;; Mapping of mode to cast intrinsic name
889d21f6 766(define_mode_attr castmode
767 [(V8SI "si") (V8SF "ps") (V4DF "pd")
768 (V16SI "si") (V16SF "ps") (V8DF "pd")])
675d6e0d 769
c868bf35 770;; Instruction suffix for sign and zero extensions.
771(define_code_attr extsuffix [(sign_extend "sx") (zero_extend "zx")])
772
154d1782 773;; i128 for integer vectors and TARGET_AVX2, f128 otherwise.
6615b722 774;; i64x4 or f64x4 for 512bit modes.
154d1782 775(define_mode_attr i128
6615b722 776 [(V16SF "f64x4") (V8SF "f128") (V8DF "f64x4") (V4DF "f128")
777 (V64QI "i64x4") (V32QI "%~128") (V32HI "i64x4") (V16HI "%~128")
778 (V16SI "i64x4") (V8SI "%~128") (V8DI "i64x4") (V4DI "%~128")])
154d1782 779
5802c0cb 780;; Mix-n-match
ed30e0a6 781(define_mode_iterator AVX256MODE2P [V8SI V8SF V4DF])
889d21f6 782(define_mode_iterator AVX512MODE2P [V16SI V16SF V8DF])
ed30e0a6 783
18b7eecb 784;; Mapping for dbpsabbw modes
785(define_mode_attr dbpsadbwmode
786 [(V32HI "V64QI") (V16HI "V32QI") (V8HI "V16QI")])
e16e10c8 787
8e6b975f 788;; Mapping suffixes for broadcast
789(define_mode_attr bcstscalarsuff
18b7eecb 790 [(V64QI "b") (V32QI "b") (V16QI "b")
791 (V32HI "w") (V16HI "w") (V8HI "w")
792 (V16SI "d") (V8SI "d") (V4SI "d")
793 (V8DI "q") (V4DI "q") (V2DI "q")
794 (V16SF "ss") (V8SF "ss") (V4SF "ss")
795 (V8DF "sd") (V4DF "sd") (V2DF "sd")])
8e6b975f 796
b6840105 797;; Tie mode of assembler operand to mode iterator
798(define_mode_attr concat_tg_mode
799 [(V32QI "t") (V16HI "t") (V8SI "t") (V4DI "t") (V8SF "t") (V4DF "t")
800 (V64QI "g") (V32HI "g") (V16SI "g") (V8DI "g") (V16SF "g") (V8DF "g")])
801
0852690b 802;; Half mask mode for unpacks
803(define_mode_attr HALFMASKMODE
804 [(DI "SI") (SI "HI")])
805
806;; Double mask mode for packs
807(define_mode_attr DOUBLEMASKMODE
808 [(HI "SI") (SI "DI")])
809
b6840105 810
5220cab6 811;; Include define_subst patterns for instructions with mask
812(include "subst.md")
813
5802c0cb 814;; Patterns whose name begins with "sse{,2,3}_" are invoked by intrinsics.
815
816;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
817;;
818;; Move patterns
819;;
820;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
821
e5f53f2a 822;; All of these patterns are enabled for SSE1 as well as SSE2.
823;; This is essential for maintaining stable calling conventions.
824
ed30e0a6 825(define_expand "mov<mode>"
8671b6cc 826 [(set (match_operand:VMOVE 0 "nonimmediate_operand")
827 (match_operand:VMOVE 1 "nonimmediate_operand"))]
e5f53f2a 828 "TARGET_SSE"
ed30e0a6 829{
830 ix86_expand_vector_move (<MODE>mode, operands);
831 DONE;
832})
833
e5f53f2a 834(define_insn "*mov<mode>_internal"
e13e1b39 835 [(set (match_operand:VMOVE 0 "nonimmediate_operand" "=v,v ,m")
836 (match_operand:VMOVE 1 "nonimmediate_or_sse_const_operand" "C ,vm,v"))]
e5f53f2a 837 "TARGET_SSE
ed30e0a6 838 && (register_operand (operands[0], <MODE>mode)
839 || register_operand (operands[1], <MODE>mode))"
840{
e13e1b39 841 int mode = get_attr_mode (insn);
ed30e0a6 842 switch (which_alternative)
843 {
844 case 0:
845 return standard_sse_constant_opcode (insn, operands[1]);
846 case 1:
847 case 2:
e13e1b39 848 /* There is no evex-encoded vmov* for sizes smaller than 64-bytes
849 in avx512f, so we need to use workarounds, to access sse registers
3f4222c1 850 16-31, which are evex-only. In avx512vl we don't need workarounds. */
996b47b0 851 if (TARGET_AVX512F && <MODE_SIZE> < 64 && !TARGET_AVX512VL
3f4222c1 852 && ((REG_P (operands[0]) && EXT_REX_SSE_REGNO_P (REGNO (operands[0])))
853 || (REG_P (operands[1]) && EXT_REX_SSE_REGNO_P (REGNO (operands[1])))))
e13e1b39 854 {
855 if (memory_operand (operands[0], <MODE>mode))
856 {
ca94bc0d 857 if (<MODE_SIZE> == 32)
e13e1b39 858 return "vextract<shuffletype>64x4\t{$0x0, %g1, %0|%0, %g1, 0x0}";
ca94bc0d 859 else if (<MODE_SIZE> == 16)
e13e1b39 860 return "vextract<shuffletype>32x4\t{$0x0, %g1, %0|%0, %g1, 0x0}";
861 else
862 gcc_unreachable ();
863 }
864 else if (memory_operand (operands[1], <MODE>mode))
865 {
ca94bc0d 866 if (<MODE_SIZE> == 32)
e13e1b39 867 return "vbroadcast<shuffletype>64x4\t{%1, %g0|%g0, %1}";
ca94bc0d 868 else if (<MODE_SIZE> == 16)
e13e1b39 869 return "vbroadcast<shuffletype>32x4\t{%1, %g0|%g0, %1}";
870 else
871 gcc_unreachable ();
872 }
873 else
874 /* Reg -> reg move is always aligned. Just use wider move. */
875 switch (mode)
876 {
877 case MODE_V8SF:
878 case MODE_V4SF:
879 return "vmovaps\t{%g1, %g0|%g0, %g1}";
880 case MODE_V4DF:
881 case MODE_V2DF:
882 return "vmovapd\t{%g1, %g0|%g0, %g1}";
883 case MODE_OI:
884 case MODE_TI:
885 return "vmovdqa64\t{%g1, %g0|%g0, %g1}";
886 default:
887 gcc_unreachable ();
888 }
889 }
890 switch (mode)
5deb404d 891 {
e13e1b39 892 case MODE_V16SF:
ed30e0a6 893 case MODE_V8SF:
894 case MODE_V4SF:
55ec9861 895 if ((TARGET_AVX || TARGET_IAMCU)
e5f53f2a 896 && (misaligned_operand (operands[0], <MODE>mode)
897 || misaligned_operand (operands[1], <MODE>mode)))
55ec9861 898 return "%vmovups\t{%1, %0|%0, %1}";
b8cb8d52 899 else
e5f53f2a 900 return "%vmovaps\t{%1, %0|%0, %1}";
901
e13e1b39 902 case MODE_V8DF:
ed30e0a6 903 case MODE_V4DF:
904 case MODE_V2DF:
55ec9861 905 if ((TARGET_AVX || TARGET_IAMCU)
e5f53f2a 906 && (misaligned_operand (operands[0], <MODE>mode)
907 || misaligned_operand (operands[1], <MODE>mode)))
55ec9861 908 return "%vmovupd\t{%1, %0|%0, %1}";
6fc76bb0 909 else
e5f53f2a 910 return "%vmovapd\t{%1, %0|%0, %1}";
911
912 case MODE_OI:
913 case MODE_TI:
55ec9861 914 if ((TARGET_AVX || TARGET_IAMCU)
e5f53f2a 915 && (misaligned_operand (operands[0], <MODE>mode)
916 || misaligned_operand (operands[1], <MODE>mode)))
3f4222c1 917 return TARGET_AVX512VL ? "vmovdqu64\t{%1, %0|%0, %1}"
55ec9861 918 : "%vmovdqu\t{%1, %0|%0, %1}";
6fc76bb0 919 else
3f4222c1 920 return TARGET_AVX512VL ? "vmovdqa64\t{%1, %0|%0, %1}"
921 : "%vmovdqa\t{%1, %0|%0, %1}";
e13e1b39 922 case MODE_XI:
923 if (misaligned_operand (operands[0], <MODE>mode)
924 || misaligned_operand (operands[1], <MODE>mode))
925 return "vmovdqu64\t{%1, %0|%0, %1}";
926 else
927 return "vmovdqa64\t{%1, %0|%0, %1}";
5802c0cb 928
f96e219b 929 default:
e5f53f2a 930 gcc_unreachable ();
f96e219b 931 }
5802c0cb 932 default:
8c3c4cd9 933 gcc_unreachable ();
5802c0cb 934 }
935}
936 [(set_attr "type" "sselog1,ssemov,ssemov")
e5f53f2a 937 (set_attr "prefix" "maybe_vex")
5802c0cb 938 (set (attr "mode")
7d460314 939 (cond [(and (match_test "<MODE_SIZE> == 16")
940 (ior (match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL")
941 (and (eq_attr "alternative" "2")
942 (match_test "TARGET_SSE_TYPELESS_STORES"))))
dab25421 943 (const_string "<ssePSmode>")
034788fc 944 (match_test "TARGET_AVX")
5deb404d 945 (const_string "<sseinsnmode>")
dab25421 946 (ior (not (match_test "TARGET_SSE2"))
947 (match_test "optimize_function_for_size_p (cfun)"))
f96e219b 948 (const_string "V4SF")
7c93cdac 949 (and (eq_attr "alternative" "0")
950 (match_test "TARGET_SSE_LOAD0_BY_PXOR"))
951 (const_string "TI")
f96e219b 952 ]
dab25421 953 (const_string "<sseinsnmode>")))])
5802c0cb 954
3f4222c1 955(define_insn "<avx512>_load<mode>_mask"
dbddc172 956 [(set (match_operand:V48_AVX512VL 0 "register_operand" "=v,v")
957 (vec_merge:V48_AVX512VL
958 (match_operand:V48_AVX512VL 1 "nonimmediate_operand" "v,m")
959 (match_operand:V48_AVX512VL 2 "vector_move_operand" "0C,0C")
a31e7f46 960 (match_operand:<avx512fmaskmode> 3 "register_operand" "Yk,Yk")))]
5220cab6 961 "TARGET_AVX512F"
962{
dbddc172 963 static char buf [64];
964
965 const char *insn_op;
966 const char *sse_suffix;
967 const char *align;
968 if (FLOAT_MODE_P (GET_MODE_INNER (<MODE>mode)))
5220cab6 969 {
dbddc172 970 insn_op = "vmov";
971 sse_suffix = "<ssemodesuffix>";
972 }
973 else
974 {
975 insn_op = "vmovdq";
976 sse_suffix = "<ssescalarsize>";
5220cab6 977 }
dbddc172 978
979 if (misaligned_operand (operands[1], <MODE>mode))
980 align = "u";
981 else
982 align = "a";
983
984 snprintf (buf, sizeof (buf), "%s%s%s\t{%%1, %%0%%{%%3%%}%%N2|%%0%%{%%3%%}%%N2, %%1}",
985 insn_op, align, sse_suffix);
986 return buf;
5220cab6 987}
988 [(set_attr "type" "ssemov")
989 (set_attr "prefix" "evex")
990 (set_attr "memory" "none,load")
991 (set_attr "mode" "<sseinsnmode>")])
992
dbddc172 993(define_insn "<avx512>_load<mode>_mask"
994 [(set (match_operand:VI12_AVX512VL 0 "register_operand" "=v,v")
995 (vec_merge:VI12_AVX512VL
996 (match_operand:VI12_AVX512VL 1 "nonimmediate_operand" "v,m")
997 (match_operand:VI12_AVX512VL 2 "vector_move_operand" "0C,0C")
998 (match_operand:<avx512fmaskmode> 3 "register_operand" "Yk,Yk")))]
999 "TARGET_AVX512BW"
1000 "vmovdqu<ssescalarsize>\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}"
1001 [(set_attr "type" "ssemov")
1002 (set_attr "prefix" "evex")
1003 (set_attr "memory" "none,load")
1004 (set_attr "mode" "<sseinsnmode>")])
1005
f50aa6e9 1006(define_insn "<avx512>_blendm<mode>"
1007 [(set (match_operand:V48_AVX512VL 0 "register_operand" "=v")
1008 (vec_merge:V48_AVX512VL
1009 (match_operand:V48_AVX512VL 2 "nonimmediate_operand" "vm")
1010 (match_operand:V48_AVX512VL 1 "register_operand" "v")
a31e7f46 1011 (match_operand:<avx512fmaskmode> 3 "register_operand" "Yk")))]
8e6b975f 1012 "TARGET_AVX512F"
f50aa6e9 1013 "vblendm<ssemodesuffix>\t{%2, %1, %0%{%3%}|%0%{%3%}, %1, %2}"
1014 [(set_attr "type" "ssemov")
1015 (set_attr "prefix" "evex")
1016 (set_attr "mode" "<sseinsnmode>")])
1017
1018(define_insn "<avx512>_blendm<mode>"
1019 [(set (match_operand:VI12_AVX512VL 0 "register_operand" "=v")
1020 (vec_merge:VI12_AVX512VL
1021 (match_operand:VI12_AVX512VL 2 "nonimmediate_operand" "vm")
1022 (match_operand:VI12_AVX512VL 1 "register_operand" "v")
1023 (match_operand:<avx512fmaskmode> 3 "register_operand" "Yk")))]
1024 "TARGET_AVX512BW"
1025 "vpblendm<ssemodesuffix>\t{%2, %1, %0%{%3%}|%0%{%3%}, %1, %2}"
8e6b975f 1026 [(set_attr "type" "ssemov")
1027 (set_attr "prefix" "evex")
1028 (set_attr "mode" "<sseinsnmode>")])
1029
3f4222c1 1030(define_insn "<avx512>_store<mode>_mask"
dbddc172 1031 [(set (match_operand:V48_AVX512VL 0 "memory_operand" "=m")
1032 (vec_merge:V48_AVX512VL
1033 (match_operand:V48_AVX512VL 1 "register_operand" "v")
5220cab6 1034 (match_dup 0)
a31e7f46 1035 (match_operand:<avx512fmaskmode> 2 "register_operand" "Yk")))]
5220cab6 1036 "TARGET_AVX512F"
1037{
dbddc172 1038 static char buf [64];
1039
1040 const char *insn_op;
1041 const char *sse_suffix;
1042 const char *align;
1043 if (FLOAT_MODE_P (GET_MODE_INNER (<MODE>mode)))
5220cab6 1044 {
dbddc172 1045 insn_op = "vmov";
1046 sse_suffix = "<ssemodesuffix>";
1047 }
1048 else
1049 {
1050 insn_op = "vmovdq";
1051 sse_suffix = "<ssescalarsize>";
5220cab6 1052 }
dbddc172 1053
bdda5a33 1054 if (misaligned_operand (operands[0], <MODE>mode))
dbddc172 1055 align = "u";
1056 else
1057 align = "a";
1058
1059 snprintf (buf, sizeof (buf), "%s%s%s\t{%%1, %%0%%{%%2%%}|%%0%%{%%2%%}, %%1}",
1060 insn_op, align, sse_suffix);
1061 return buf;
5220cab6 1062}
1063 [(set_attr "type" "ssemov")
1064 (set_attr "prefix" "evex")
1065 (set_attr "memory" "store")
1066 (set_attr "mode" "<sseinsnmode>")])
1067
dbddc172 1068(define_insn "<avx512>_store<mode>_mask"
1069 [(set (match_operand:VI12_AVX512VL 0 "memory_operand" "=m")
1070 (vec_merge:VI12_AVX512VL
1071 (match_operand:VI12_AVX512VL 1 "register_operand" "v")
1072 (match_dup 0)
1073 (match_operand:<avx512fmaskmode> 2 "register_operand" "Yk")))]
1074 "TARGET_AVX512BW"
1075 "vmovdqu<ssescalarsize>\t{%1, %0%{%2%}|%0%{%2%}, %1}"
1076 [(set_attr "type" "ssemov")
1077 (set_attr "prefix" "evex")
1078 (set_attr "memory" "store")
1079 (set_attr "mode" "<sseinsnmode>")])
1080
e5f53f2a 1081(define_insn "sse2_movq128"
1082 [(set (match_operand:V2DI 0 "register_operand" "=x")
1083 (vec_concat:V2DI
1084 (vec_select:DI
1085 (match_operand:V2DI 1 "nonimmediate_operand" "xm")
1086 (parallel [(const_int 0)]))
1087 (const_int 0)))]
1088 "TARGET_SSE2"
c358a059 1089 "%vmovq\t{%1, %0|%0, %q1}"
e5f53f2a 1090 [(set_attr "type" "ssemov")
1091 (set_attr "prefix" "maybe_vex")
1092 (set_attr "mode" "TI")])
1093
751bdb92 1094;; Move a DI from a 32-bit register pair (e.g. %edx:%eax) to an xmm.
1095;; We'd rather avoid this entirely; if the 32-bit reg pair was loaded
1096;; from memory, we'd prefer to load the memory directly into the %xmm
1097;; register. To facilitate this happy circumstance, this pattern won't
1098;; split until after register allocation. If the 64-bit value didn't
1099;; come from memory, this is the best we can do. This is much better
1100;; than storing %edx:%eax into a stack temporary and loading an %xmm
1101;; from there.
1102
1103(define_insn_and_split "movdi_to_sse"
1104 [(parallel
1105 [(set (match_operand:V4SI 0 "register_operand" "=?x,x")
1106 (subreg:V4SI (match_operand:DI 1 "nonimmediate_operand" "r,m") 0))
1107 (clobber (match_scratch:V4SI 2 "=&x,X"))])]
2a631a93 1108 "!TARGET_64BIT && TARGET_SSE2 && TARGET_INTER_UNIT_MOVES_TO_VEC"
751bdb92 1109 "#"
1110 "&& reload_completed"
1111 [(const_int 0)]
1112{
0bb48c33 1113 if (register_operand (operands[1], DImode))
1114 {
751bdb92 1115 /* The DImode arrived in a pair of integral registers (e.g. %edx:%eax).
1116 Assemble the 64-bit DImode value in an xmm register. */
1117 emit_insn (gen_sse2_loadld (operands[0], CONST0_RTX (V4SImode),
b82498f8 1118 gen_lowpart (SImode, operands[1])));
751bdb92 1119 emit_insn (gen_sse2_loadld (operands[2], CONST0_RTX (V4SImode),
b82498f8 1120 gen_highpart (SImode, operands[1])));
d6e05290 1121 emit_insn (gen_vec_interleave_lowv4si (operands[0], operands[0],
5deb404d 1122 operands[2]));
09e640e6 1123 }
0bb48c33 1124 else if (memory_operand (operands[1], DImode))
09e640e6 1125 {
1126 rtx tmp = gen_reg_rtx (V2DImode);
1127 emit_insn (gen_vec_concatv2di (tmp, operands[1], const0_rtx));
1128 emit_move_insn (operands[0], gen_lowpart (V4SImode, tmp));
1129 }
0bb48c33 1130 else
d6e05290 1131 gcc_unreachable ();
751bdb92 1132})
1133
5802c0cb 1134(define_split
abd4f58b 1135 [(set (match_operand:V4SF 0 "register_operand")
1136 (match_operand:V4SF 1 "zero_extended_scalar_load_operand"))]
5802c0cb 1137 "TARGET_SSE && reload_completed"
ad2c46cf 1138 [(set (match_dup 0)
1139 (vec_merge:V4SF
1140 (vec_duplicate:V4SF (match_dup 1))
1141 (match_dup 2)
1142 (const_int 1)))]
5802c0cb 1143{
ad2c46cf 1144 operands[1] = simplify_gen_subreg (SFmode, operands[1], V4SFmode, 0);
1145 operands[2] = CONST0_RTX (V4SFmode);
5802c0cb 1146})
1147
5802c0cb 1148(define_split
abd4f58b 1149 [(set (match_operand:V2DF 0 "register_operand")
1150 (match_operand:V2DF 1 "zero_extended_scalar_load_operand"))]
5802c0cb 1151 "TARGET_SSE2 && reload_completed"
ad2c46cf 1152 [(set (match_dup 0) (vec_concat:V2DF (match_dup 1) (match_dup 2)))]
5802c0cb 1153{
ad2c46cf 1154 operands[1] = simplify_gen_subreg (DFmode, operands[1], V2DFmode, 0);
1155 operands[2] = CONST0_RTX (DFmode);
5802c0cb 1156})
1157
ed30e0a6 1158(define_expand "movmisalign<mode>"
8671b6cc 1159 [(set (match_operand:VMOVE 0 "nonimmediate_operand")
1160 (match_operand:VMOVE 1 "nonimmediate_operand"))]
5802c0cb 1161 "TARGET_SSE"
1162{
1163 ix86_expand_vector_move_misalign (<MODE>mode, operands);
1164 DONE;
1165})
1166
58fb74ce 1167(define_expand "<sse>_loadu<ssemodesuffix><avxsizesuffix><mask_name>"
1168 [(set (match_operand:VF 0 "register_operand")
1169 (unspec:VF [(match_operand:VF 1 "nonimmediate_operand")]
1170 UNSPEC_LOADU))]
1171 "TARGET_SSE && <mask_mode512bit_condition>"
1172{
1173 /* For AVX, normal *mov<mode>_internal pattern will handle unaligned loads
1174 just fine if misaligned_operand is true, and without the UNSPEC it can
1175 be combined with arithmetic instructions. If misaligned_operand is
1176 false, still emit UNSPEC_LOADU insn to honor user's request for
1177 misaligned load. */
1178 if (TARGET_AVX
8688c545 1179 && misaligned_operand (operands[1], <MODE>mode))
58fb74ce 1180 {
8688c545 1181 rtx src = operands[1];
1182 if (<mask_applied>)
1183 src = gen_rtx_VEC_MERGE (<MODE>mode, operands[1],
1184 operands[2 * <mask_applied>],
1185 operands[3 * <mask_applied>]);
d1f9b275 1186 emit_insn (gen_rtx_SET (operands[0], src));
58fb74ce 1187 DONE;
1188 }
1189})
1190
1191(define_insn "*<sse>_loadu<ssemodesuffix><avxsizesuffix><mask_name>"
e13e1b39 1192 [(set (match_operand:VF 0 "register_operand" "=v")
e5f53f2a 1193 (unspec:VF
6a3f5f59 1194 [(match_operand:VF 1 "nonimmediate_operand" "vm")]
00820ea0 1195 UNSPEC_LOADU))]
5220cab6 1196 "TARGET_SSE && <mask_mode512bit_condition>"
034788fc 1197{
1198 switch (get_attr_mode (insn))
1199 {
6a3f5f59 1200 case MODE_V16SF:
034788fc 1201 case MODE_V8SF:
1202 case MODE_V4SF:
5220cab6 1203 return "%vmovups\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}";
034788fc 1204 default:
5220cab6 1205 return "%vmovu<ssemodesuffix>\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}";
034788fc 1206 }
1207}
ed30e0a6 1208 [(set_attr "type" "ssemov")
fbfe006e 1209 (set_attr "movu" "1")
8c1dfa94 1210 (set_attr "ssememalign" "8")
ed30e0a6 1211 (set_attr "prefix" "maybe_vex")
034788fc 1212 (set (attr "mode")
7d460314 1213 (cond [(and (match_test "<MODE_SIZE> == 16")
1214 (match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL"))
dab25421 1215 (const_string "<ssePSmode>")
034788fc 1216 (match_test "TARGET_AVX")
1217 (const_string "<MODE>")
dab25421 1218 (match_test "optimize_function_for_size_p (cfun)")
1219 (const_string "V4SF")
034788fc 1220 ]
dab25421 1221 (const_string "<MODE>")))])
5802c0cb 1222
6a5f6dde 1223;; Merge movsd/movhpd to movupd for TARGET_SSE_UNALIGNED_LOAD_OPTIMAL targets.
1224(define_peephole2
1225 [(set (match_operand:V2DF 0 "register_operand")
1226 (vec_concat:V2DF (match_operand:DF 1 "memory_operand")
1227 (match_operand:DF 4 "const0_operand")))
1228 (set (match_operand:V2DF 2 "register_operand")
1229 (vec_concat:V2DF (vec_select:DF (match_dup 2)
1230 (parallel [(const_int 0)]))
1231 (match_operand:DF 3 "memory_operand")))]
1232 "TARGET_SSE2 && TARGET_SSE_UNALIGNED_LOAD_OPTIMAL
1233 && ix86_operands_ok_for_move_multiple (operands, true, DFmode)"
1234 [(set (match_dup 2)
1235 (unspec:V2DF [(match_dup 4)] UNSPEC_LOADU))]
1236 "operands[4] = adjust_address (operands[1], V2DFmode, 0);")
1237
00820ea0 1238(define_insn "<sse>_storeu<ssemodesuffix><avxsizesuffix>"
1239 [(set (match_operand:VF 0 "memory_operand" "=m")
1240 (unspec:VF
6a3f5f59 1241 [(match_operand:VF 1 "register_operand" "v")]
00820ea0 1242 UNSPEC_STOREU))]
1243 "TARGET_SSE"
1244{
1245 switch (get_attr_mode (insn))
1246 {
6a3f5f59 1247 case MODE_V16SF:
00820ea0 1248 case MODE_V8SF:
1249 case MODE_V4SF:
1250 return "%vmovups\t{%1, %0|%0, %1}";
1251 default:
1252 return "%vmovu<ssemodesuffix>\t{%1, %0|%0, %1}";
1253 }
1254}
1255 [(set_attr "type" "ssemov")
1256 (set_attr "movu" "1")
8c1dfa94 1257 (set_attr "ssememalign" "8")
00820ea0 1258 (set_attr "prefix" "maybe_vex")
1259 (set (attr "mode")
ca94bc0d 1260 (cond [(and (match_test "<MODE_SIZE> == 16")
1706116d 1261 (ior (match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL")
1262 (match_test "TARGET_SSE_TYPELESS_STORES")))
00820ea0 1263 (const_string "<ssePSmode>")
1264 (match_test "TARGET_AVX")
1265 (const_string "<MODE>")
1266 (match_test "optimize_function_for_size_p (cfun)")
1267 (const_string "V4SF")
1268 ]
1269 (const_string "<MODE>")))])
1270
250533c0 1271(define_insn "<avx512>_storeu<ssemodesuffix><avxsizesuffix>_mask"
1272 [(set (match_operand:VF_AVX512VL 0 "memory_operand" "=m")
1273 (vec_merge:VF_AVX512VL
1274 (unspec:VF_AVX512VL
1275 [(match_operand:VF_AVX512VL 1 "register_operand" "v")]
5220cab6 1276 UNSPEC_STOREU)
1277 (match_dup 0)
a31e7f46 1278 (match_operand:<avx512fmaskmode> 2 "register_operand" "Yk")))]
5220cab6 1279 "TARGET_AVX512F"
1280{
1281 switch (get_attr_mode (insn))
1282 {
1283 case MODE_V16SF:
250533c0 1284 case MODE_V8SF:
1285 case MODE_V4SF:
5220cab6 1286 return "vmovups\t{%1, %0%{%2%}|%0%{%2%}, %1}";
1287 default:
1288 return "vmovu<ssemodesuffix>\t{%1, %0%{%2%}|%0%{%2%}, %1}";
1289 }
1290}
1291 [(set_attr "type" "ssemov")
1292 (set_attr "movu" "1")
1293 (set_attr "memory" "store")
1294 (set_attr "prefix" "evex")
1295 (set_attr "mode" "<sseinsnmode>")])
1296
6a5f6dde 1297;; Merge movlpd/movhpd to movupd for TARGET_SSE_UNALIGNED_STORE_OPTIMAL targets.
1298(define_peephole2
1299 [(set (match_operand:DF 0 "memory_operand")
1300 (vec_select:DF (match_operand:V2DF 1 "register_operand")
1301 (parallel [(const_int 0)])))
1302 (set (match_operand:DF 2 "memory_operand")
1303 (vec_select:DF (match_operand:V2DF 3 "register_operand")
1304 (parallel [(const_int 1)])))]
1305 "TARGET_SSE2 && TARGET_SSE_UNALIGNED_STORE_OPTIMAL
1306 && ix86_operands_ok_for_move_multiple (operands, false, DFmode)"
1307 [(set (match_dup 4)
1308 (unspec:V2DF [(match_dup 1)] UNSPEC_STOREU))]
1309 "operands[4] = adjust_address (operands[0], V2DFmode, 0);")
1310
97173adf 1311/* For AVX, normal *mov<mode>_internal pattern will handle unaligned loads
1312 just fine if misaligned_operand is true, and without the UNSPEC it can
1313 be combined with arithmetic instructions. If misaligned_operand is
1314 false, still emit UNSPEC_LOADU insn to honor user's request for
1315 misaligned load. */
58fb74ce 1316(define_expand "<sse2_avx_avx512f>_loaddqu<mode><mask_name>"
97173adf 1317 [(set (match_operand:VI1 0 "register_operand")
1318 (unspec:VI1
1319 [(match_operand:VI1 1 "nonimmediate_operand")]
58fb74ce 1320 UNSPEC_LOADU))]
97173adf 1321 "TARGET_SSE2 && <mask_avx512vl_condition> && <mask_avx512bw_condition>"
58fb74ce 1322{
58fb74ce 1323 if (TARGET_AVX
8688c545 1324 && misaligned_operand (operands[1], <MODE>mode))
58fb74ce 1325 {
8688c545 1326 rtx src = operands[1];
1327 if (<mask_applied>)
1328 src = gen_rtx_VEC_MERGE (<MODE>mode, operands[1],
1329 operands[2 * <mask_applied>],
1330 operands[3 * <mask_applied>]);
d1f9b275 1331 emit_insn (gen_rtx_SET (operands[0], src));
58fb74ce 1332 DONE;
1333 }
1334})
1335
97173adf 1336(define_expand "<sse2_avx_avx512f>_loaddqu<mode><mask_name>"
1337 [(set (match_operand:VI_ULOADSTORE_BW_AVX512VL 0 "register_operand")
1338 (unspec:VI_ULOADSTORE_BW_AVX512VL
1339 [(match_operand:VI_ULOADSTORE_BW_AVX512VL 1 "nonimmediate_operand")]
1340 UNSPEC_LOADU))]
1341 "TARGET_AVX512BW"
1342{
1343 if (misaligned_operand (operands[1], <MODE>mode))
1344 {
1345 rtx src = operands[1];
1346 if (<mask_applied>)
1347 src = gen_rtx_VEC_MERGE (<MODE>mode, operands[1],
1348 operands[2 * <mask_applied>],
1349 operands[3 * <mask_applied>]);
d1f9b275 1350 emit_insn (gen_rtx_SET (operands[0], src));
97173adf 1351 DONE;
1352 }
1353})
1354
1355(define_expand "<sse2_avx_avx512f>_loaddqu<mode><mask_name>"
1356 [(set (match_operand:VI_ULOADSTORE_F_AVX512VL 0 "register_operand")
1357 (unspec:VI_ULOADSTORE_F_AVX512VL
1358 [(match_operand:VI_ULOADSTORE_F_AVX512VL 1 "nonimmediate_operand")]
1359 UNSPEC_LOADU))]
1360 "TARGET_AVX512F"
1361{
1362 if (misaligned_operand (operands[1], <MODE>mode))
1363 {
1364 rtx src = operands[1];
1365 if (<mask_applied>)
1366 src = gen_rtx_VEC_MERGE (<MODE>mode, operands[1],
1367 operands[2 * <mask_applied>],
1368 operands[3 * <mask_applied>]);
d1f9b275 1369 emit_insn (gen_rtx_SET (operands[0], src));
97173adf 1370 DONE;
1371 }
1372})
1373
58fb74ce 1374(define_insn "*<sse2_avx_avx512f>_loaddqu<mode><mask_name>"
97173adf 1375 [(set (match_operand:VI1 0 "register_operand" "=v")
1376 (unspec:VI1
1377 [(match_operand:VI1 1 "nonimmediate_operand" "vm")]
6a3f5f59 1378 UNSPEC_LOADU))]
97173adf 1379 "TARGET_SSE2 && <mask_avx512vl_condition> && <mask_avx512bw_condition>"
00820ea0 1380{
1381 switch (get_attr_mode (insn))
1382 {
1383 case MODE_V8SF:
1384 case MODE_V4SF:
1385 return "%vmovups\t{%1, %0|%0, %1}";
1386 default:
97173adf 1387 if (!(TARGET_AVX512VL && TARGET_AVX512BW))
1388 return "%vmovdqu\t{%1, %0|%0, %1}";
1389 else
1390 return "vmovdqu<ssescalarsize>\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}";
00820ea0 1391 }
1392}
1393 [(set_attr "type" "ssemov")
1394 (set_attr "movu" "1")
8c1dfa94 1395 (set_attr "ssememalign" "8")
00820ea0 1396 (set (attr "prefix_data16")
1397 (if_then_else
1398 (match_test "TARGET_AVX")
1399 (const_string "*")
1400 (const_string "1")))
1401 (set_attr "prefix" "maybe_vex")
1402 (set (attr "mode")
7d460314 1403 (cond [(and (match_test "<MODE_SIZE> == 16")
1404 (match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL"))
00820ea0 1405 (const_string "<ssePSmode>")
00820ea0 1406 (match_test "TARGET_AVX")
1407 (const_string "<sseinsnmode>")
1408 (match_test "optimize_function_for_size_p (cfun)")
1409 (const_string "V4SF")
1410 ]
1411 (const_string "<sseinsnmode>")))])
1412
97173adf 1413(define_insn "*<sse2_avx_avx512f>_loaddqu<mode><mask_name>"
1414 [(set (match_operand:VI_ULOADSTORE_BW_AVX512VL 0 "register_operand" "=v")
1415 (unspec:VI_ULOADSTORE_BW_AVX512VL
1416 [(match_operand:VI_ULOADSTORE_BW_AVX512VL 1 "nonimmediate_operand" "vm")]
1417 UNSPEC_LOADU))]
1418 "TARGET_AVX512BW"
1419 "vmovdqu<ssescalarsize>\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}";
1420 [(set_attr "type" "ssemov")
1421 (set_attr "movu" "1")
1422 (set_attr "ssememalign" "8")
1423 (set_attr "prefix" "maybe_evex")])
1424
1425(define_insn "*<sse2_avx_avx512f>_loaddqu<mode><mask_name>"
1426 [(set (match_operand:VI_ULOADSTORE_F_AVX512VL 0 "register_operand" "=v")
1427 (unspec:VI_ULOADSTORE_F_AVX512VL
1428 [(match_operand:VI_ULOADSTORE_F_AVX512VL 1 "nonimmediate_operand" "vm")]
1429 UNSPEC_LOADU))]
1430 "TARGET_AVX512F"
1431 "vmovdqu<ssescalarsize>\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}";
1432 [(set_attr "type" "ssemov")
1433 (set_attr "movu" "1")
1434 (set_attr "ssememalign" "8")
1435 (set_attr "prefix" "maybe_evex")])
1436
6a3f5f59 1437(define_insn "<sse2_avx_avx512f>_storedqu<mode>"
97173adf 1438 [(set (match_operand:VI1 0 "memory_operand" "=m")
1439 (unspec:VI1
1440 [(match_operand:VI1 1 "register_operand" "v")]
6a3f5f59 1441 UNSPEC_STOREU))]
00820ea0 1442 "TARGET_SSE2"
034788fc 1443{
1444 switch (get_attr_mode (insn))
1445 {
1706116d 1446 case MODE_V16SF:
034788fc 1447 case MODE_V8SF:
1448 case MODE_V4SF:
1449 return "%vmovups\t{%1, %0|%0, %1}";
1450 default:
97173adf 1451 switch (<MODE>mode)
1452 {
1453 case V32QImode:
1454 case V16QImode:
1455 if (!(TARGET_AVX512VL && TARGET_AVX512BW))
1456 return "%vmovdqu\t{%1, %0|%0, %1}";
1457 default:
1458 return "vmovdqu<ssescalarsize>\t{%1, %0|%0, %1}";
1459 }
034788fc 1460 }
1461}
ed30e0a6 1462 [(set_attr "type" "ssemov")
fbfe006e 1463 (set_attr "movu" "1")
8c1dfa94 1464 (set_attr "ssememalign" "8")
e5f53f2a 1465 (set (attr "prefix_data16")
1466 (if_then_else
6be3efec 1467 (match_test "TARGET_AVX")
e5f53f2a 1468 (const_string "*")
1469 (const_string "1")))
1470 (set_attr "prefix" "maybe_vex")
034788fc 1471 (set (attr "mode")
ca94bc0d 1472 (cond [(and (match_test "<MODE_SIZE> == 16")
1706116d 1473 (ior (match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL")
1474 (match_test "TARGET_SSE_TYPELESS_STORES")))
dab25421 1475 (const_string "<ssePSmode>")
034788fc 1476 (match_test "TARGET_AVX")
1477 (const_string "<sseinsnmode>")
dab25421 1478 (match_test "optimize_function_for_size_p (cfun)")
034788fc 1479 (const_string "V4SF")
1480 ]
dab25421 1481 (const_string "<sseinsnmode>")))])
ed30e0a6 1482
97173adf 1483(define_insn "<sse2_avx_avx512f>_storedqu<mode>"
1484 [(set (match_operand:VI_ULOADSTORE_BW_AVX512VL 0 "memory_operand" "=m")
1485 (unspec:VI_ULOADSTORE_BW_AVX512VL
1486 [(match_operand:VI_ULOADSTORE_BW_AVX512VL 1 "register_operand" "v")]
1487 UNSPEC_STOREU))]
1488 "TARGET_AVX512BW"
1489 "vmovdqu<ssescalarsize>\t{%1, %0|%0, %1}"
1490 [(set_attr "type" "ssemov")
1491 (set_attr "movu" "1")
1492 (set_attr "ssememalign" "8")
1493 (set_attr "prefix" "maybe_evex")])
1494
1495(define_insn "<sse2_avx_avx512f>_storedqu<mode>"
1496 [(set (match_operand:VI_ULOADSTORE_F_AVX512VL 0 "memory_operand" "=m")
1497 (unspec:VI_ULOADSTORE_F_AVX512VL
1498 [(match_operand:VI_ULOADSTORE_F_AVX512VL 1 "register_operand" "v")]
1499 UNSPEC_STOREU))]
1500 "TARGET_AVX512F"
1501 "vmovdqu<ssescalarsize>\t{%1, %0|%0, %1}"
1502 [(set_attr "type" "ssemov")
1503 (set_attr "movu" "1")
1504 (set_attr "ssememalign" "8")
1505 (set_attr "prefix" "maybe_vex")])
1506
1507(define_insn "<avx512>_storedqu<mode>_mask"
1508 [(set (match_operand:VI48_AVX512VL 0 "memory_operand" "=m")
1509 (vec_merge:VI48_AVX512VL
1510 (unspec:VI48_AVX512VL
1511 [(match_operand:VI48_AVX512VL 1 "register_operand" "v")]
5220cab6 1512 UNSPEC_STOREU)
1513 (match_dup 0)
a31e7f46 1514 (match_operand:<avx512fmaskmode> 2 "register_operand" "Yk")))]
5220cab6 1515 "TARGET_AVX512F"
97173adf 1516 "vmovdqu<ssescalarsize>\t{%1, %0%{%2%}|%0%{%2%}, %1}"
1517 [(set_attr "type" "ssemov")
1518 (set_attr "movu" "1")
1519 (set_attr "memory" "store")
1520 (set_attr "prefix" "evex")
1521 (set_attr "mode" "<sseinsnmode>")])
1522
1523(define_insn "<avx512>_storedqu<mode>_mask"
1524 [(set (match_operand:VI12_AVX512VL 0 "memory_operand" "=m")
1525 (vec_merge:VI12_AVX512VL
1526 (unspec:VI12_AVX512VL
1527 [(match_operand:VI12_AVX512VL 1 "register_operand" "v")]
1528 UNSPEC_STOREU)
1529 (match_dup 0)
1530 (match_operand:<avx512fmaskmode> 2 "register_operand" "Yk")))]
1531 "TARGET_AVX512BW"
1532 "vmovdqu<ssescalarsize>\t{%1, %0%{%2%}|%0%{%2%}, %1}"
5220cab6 1533 [(set_attr "type" "ssemov")
1534 (set_attr "movu" "1")
1535 (set_attr "memory" "store")
1536 (set_attr "prefix" "evex")
1537 (set_attr "mode" "<sseinsnmode>")])
1538
63d5e521 1539(define_insn "<sse3>_lddqu<avxsizesuffix>"
e5f53f2a 1540 [(set (match_operand:VI1 0 "register_operand" "=x")
1541 (unspec:VI1 [(match_operand:VI1 1 "memory_operand" "m")]
1542 UNSPEC_LDDQU))]
1543 "TARGET_SSE3"
1544 "%vlddqu\t{%1, %0|%0, %1}"
5802c0cb 1545 [(set_attr "type" "ssemov")
fbfe006e 1546 (set_attr "movu" "1")
8c1dfa94 1547 (set_attr "ssememalign" "8")
e5f53f2a 1548 (set (attr "prefix_data16")
1549 (if_then_else
6be3efec 1550 (match_test "TARGET_AVX")
e5f53f2a 1551 (const_string "*")
1552 (const_string "0")))
1553 (set (attr "prefix_rep")
1554 (if_then_else
6be3efec 1555 (match_test "TARGET_AVX")
e5f53f2a 1556 (const_string "*")
1557 (const_string "1")))
1558 (set_attr "prefix" "maybe_vex")
63d5e521 1559 (set_attr "mode" "<sseinsnmode>")])
b49a1e34 1560
821b85a2 1561(define_insn "sse2_movnti<mode>"
1562 [(set (match_operand:SWI48 0 "memory_operand" "=m")
1563 (unspec:SWI48 [(match_operand:SWI48 1 "register_operand" "r")]
0fd2a6af 1564 UNSPEC_MOVNT))]
5802c0cb 1565 "TARGET_SSE2"
1566 "movnti\t{%1, %0|%0, %1}"
fbfe006e 1567 [(set_attr "type" "ssemov")
00a0e418 1568 (set_attr "prefix_data16" "0")
821b85a2 1569 (set_attr "mode" "<MODE>")])
5802c0cb 1570
e5f53f2a 1571(define_insn "<sse>_movnt<mode>"
1572 [(set (match_operand:VF 0 "memory_operand" "=m")
6a3f5f59 1573 (unspec:VF
1574 [(match_operand:VF 1 "register_operand" "v")]
1575 UNSPEC_MOVNT))]
e5f53f2a 1576 "TARGET_SSE"
1577 "%vmovnt<ssemodesuffix>\t{%1, %0|%0, %1}"
1578 [(set_attr "type" "ssemov")
1579 (set_attr "prefix" "maybe_vex")
1580 (set_attr "mode" "<MODE>")])
1581
1582(define_insn "<sse2>_movnt<mode>"
1583 [(set (match_operand:VI8 0 "memory_operand" "=m")
4c1099de 1584 (unspec:VI8 [(match_operand:VI8 1 "register_operand" "v")]
e5f53f2a 1585 UNSPEC_MOVNT))]
1586 "TARGET_SSE2"
1587 "%vmovntdq\t{%1, %0|%0, %1}"
ed30e0a6 1588 [(set_attr "type" "ssecvt")
e5f53f2a 1589 (set (attr "prefix_data16")
1590 (if_then_else
6be3efec 1591 (match_test "TARGET_AVX")
e5f53f2a 1592 (const_string "*")
1593 (const_string "1")))
1594 (set_attr "prefix" "maybe_vex")
63d5e521 1595 (set_attr "mode" "<sseinsnmode>")])
ed30e0a6 1596
5b5037b3 1597; Expand patterns for non-temporal stores. At the moment, only those
1598; that directly map to insns are defined; it would be possible to
1599; define patterns for other modes that would expand to several insns.
1600
6fe5844b 1601;; Modes handled by storent patterns.
1602(define_mode_iterator STORENT_MODE
0fd2a6af 1603 [(DI "TARGET_SSE2 && TARGET_64BIT") (SI "TARGET_SSE2")
1604 (SF "TARGET_SSE4A") (DF "TARGET_SSE4A")
6a3f5f59 1605 (V8DI "TARGET_AVX512F") (V4DI "TARGET_AVX") (V2DI "TARGET_SSE2")
1606 (V16SF "TARGET_AVX512F") (V8SF "TARGET_AVX") V4SF
1607 (V8DF "TARGET_AVX512F") (V4DF "TARGET_AVX") (V2DF "TARGET_SSE2")])
6fe5844b 1608
2a466fea 1609(define_expand "storent<mode>"
abd4f58b 1610 [(set (match_operand:STORENT_MODE 0 "memory_operand")
8cedf886 1611 (unspec:STORENT_MODE
abd4f58b 1612 [(match_operand:STORENT_MODE 1 "register_operand")]
6fe5844b 1613 UNSPEC_MOVNT))]
1614 "TARGET_SSE")
5b5037b3 1615
5802c0cb 1616;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1617;;
2a466fea 1618;; Parallel floating point arithmetic
5802c0cb 1619;;
1620;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1621
f9162d6c 1622(define_expand "<code><mode>2"
abd4f58b 1623 [(set (match_operand:VF 0 "register_operand")
27e5502d 1624 (absneg:VF
abd4f58b 1625 (match_operand:VF 1 "register_operand")))]
6fe5844b 1626 "TARGET_SSE"
f9162d6c 1627 "ix86_expand_fp_absneg_operator (<CODE>, <MODE>mode, operands); DONE;")
5802c0cb 1628
27e5502d 1629(define_insn_and_split "*absneg<mode>2"
6a3f5f59 1630 [(set (match_operand:VF 0 "register_operand" "=x,x,v,v")
27e5502d 1631 (match_operator:VF 3 "absneg_operator"
6a3f5f59 1632 [(match_operand:VF 1 "nonimmediate_operand" "0, xm, v, m")]))
1633 (use (match_operand:VF 2 "nonimmediate_operand" "xm, 0, vm,v"))]
6fe5844b 1634 "TARGET_SSE"
1c098c41 1635 "#"
a3d5479a 1636 "&& reload_completed"
1c098c41 1637 [(const_int 0)]
1638{
27e5502d 1639 enum rtx_code absneg_op;
1640 rtx op1, op2;
d8927ee1 1641 rtx t;
1642
27e5502d 1643 if (TARGET_AVX)
1644 {
1645 if (MEM_P (operands[1]))
1646 op1 = operands[2], op2 = operands[1];
1647 else
1648 op1 = operands[1], op2 = operands[2];
1649 }
d8927ee1 1650 else
27e5502d 1651 {
1652 op1 = operands[0];
1653 if (rtx_equal_p (operands[0], operands[1]))
1654 op2 = operands[2];
1655 else
1656 op2 = operands[1];
1657 }
d8927ee1 1658
27e5502d 1659 absneg_op = GET_CODE (operands[3]) == NEG ? XOR : AND;
1660 t = gen_rtx_fmt_ee (absneg_op, <MODE>mode, op1, op2);
d1f9b275 1661 t = gen_rtx_SET (operands[0], t);
d8927ee1 1662 emit_insn (t);
1c098c41 1663 DONE;
27e5502d 1664}
1665 [(set_attr "isa" "noavx,noavx,avx,avx")])
ed30e0a6 1666
be60ab96 1667(define_expand "<plusminus_insn><mode>3<mask_name><round_name>"
abd4f58b 1668 [(set (match_operand:VF 0 "register_operand")
27e5502d 1669 (plusminus:VF
be60ab96 1670 (match_operand:VF 1 "<round_nimm_predicate>")
1671 (match_operand:VF 2 "<round_nimm_predicate>")))]
1672 "TARGET_SSE && <mask_mode512bit_condition> && <round_mode512bit_condition>"
9409fce7 1673 "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
5802c0cb 1674
be60ab96 1675(define_insn "*<plusminus_insn><mode>3<mask_name><round_name>"
e13e1b39 1676 [(set (match_operand:VF 0 "register_operand" "=x,v")
27e5502d 1677 (plusminus:VF
be60ab96 1678 (match_operand:VF 1 "<round_nimm_predicate>" "<comm>0,v")
1679 (match_operand:VF 2 "<round_nimm_predicate>" "xm,<round_constraint>")))]
1680 "TARGET_SSE && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands) && <mask_mode512bit_condition> && <round_mode512bit_condition>"
27e5502d 1681 "@
1682 <plusminus_mnemonic><ssemodesuffix>\t{%2, %0|%0, %2}
be60ab96 1683 v<plusminus_mnemonic><ssemodesuffix>\t{<round_mask_op3>%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2<round_mask_op3>}"
27e5502d 1684 [(set_attr "isa" "noavx,avx")
1685 (set_attr "type" "sseadd")
5220cab6 1686 (set_attr "prefix" "<mask_prefix3>")
2a466fea 1687 (set_attr "mode" "<MODE>")])
5802c0cb 1688
0b7cc9c6 1689(define_insn "<sse>_vm<plusminus_insn><mode>3<round_name>"
e13e1b39 1690 [(set (match_operand:VF_128 0 "register_operand" "=x,v")
27e5502d 1691 (vec_merge:VF_128
1692 (plusminus:VF_128
e13e1b39 1693 (match_operand:VF_128 1 "register_operand" "0,v")
0b7cc9c6 1694 (match_operand:VF_128 2 "nonimmediate_operand" "xm,<round_constraint>"))
5802c0cb 1695 (match_dup 1)
1696 (const_int 1)))]
6fe5844b 1697 "TARGET_SSE"
27e5502d 1698 "@
c358a059 1699 <plusminus_mnemonic><ssescalarmodesuffix>\t{%2, %0|%0, %<iptr>2}
0b7cc9c6 1700 v<plusminus_mnemonic><ssescalarmodesuffix>\t{<round_op3>%2, %1, %0|%0, %1, %<iptr>2<round_op3>}"
27e5502d 1701 [(set_attr "isa" "noavx,avx")
1702 (set_attr "type" "sseadd")
0b7cc9c6 1703 (set_attr "prefix" "<round_prefix>")
2a466fea 1704 (set_attr "mode" "<ssescalarmode>")])
5802c0cb 1705
be60ab96 1706(define_expand "mul<mode>3<mask_name><round_name>"
abd4f58b 1707 [(set (match_operand:VF 0 "register_operand")
27e5502d 1708 (mult:VF
be60ab96 1709 (match_operand:VF 1 "<round_nimm_predicate>")
1710 (match_operand:VF 2 "<round_nimm_predicate>")))]
1711 "TARGET_SSE && <mask_mode512bit_condition> && <round_mode512bit_condition>"
2a466fea 1712 "ix86_fixup_binary_operands_no_copy (MULT, <MODE>mode, operands);")
1713
be60ab96 1714(define_insn "*mul<mode>3<mask_name><round_name>"
6a3f5f59 1715 [(set (match_operand:VF 0 "register_operand" "=x,v")
27e5502d 1716 (mult:VF
be60ab96 1717 (match_operand:VF 1 "<round_nimm_predicate>" "%0,v")
1718 (match_operand:VF 2 "<round_nimm_predicate>" "xm,<round_constraint>")))]
1719 "TARGET_SSE && ix86_binary_operator_ok (MULT, <MODE>mode, operands) && <mask_mode512bit_condition> && <round_mode512bit_condition>"
27e5502d 1720 "@
1721 mul<ssemodesuffix>\t{%2, %0|%0, %2}
be60ab96 1722 vmul<ssemodesuffix>\t{<round_mask_op3>%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2<round_mask_op3>}"
27e5502d 1723 [(set_attr "isa" "noavx,avx")
1724 (set_attr "type" "ssemul")
5220cab6 1725 (set_attr "prefix" "<mask_prefix3>")
6470d004 1726 (set_attr "btver2_decode" "direct,double")
2a466fea 1727 (set_attr "mode" "<MODE>")])
5802c0cb 1728
0b7cc9c6 1729(define_insn "<sse>_vm<multdiv_mnemonic><mode>3<round_name>"
e13e1b39 1730 [(set (match_operand:VF_128 0 "register_operand" "=x,v")
27e5502d 1731 (vec_merge:VF_128
1793b773 1732 (multdiv:VF_128
e13e1b39 1733 (match_operand:VF_128 1 "register_operand" "0,v")
0b7cc9c6 1734 (match_operand:VF_128 2 "nonimmediate_operand" "xm,<round_constraint>"))
5802c0cb 1735 (match_dup 1)
1736 (const_int 1)))]
6fe5844b 1737 "TARGET_SSE"
27e5502d 1738 "@
1793b773 1739 <multdiv_mnemonic><ssescalarmodesuffix>\t{%2, %0|%0, %<iptr>2}
0b7cc9c6 1740 v<multdiv_mnemonic><ssescalarmodesuffix>\t{<round_op3>%2, %1, %0|%0, %1, %<iptr>2<round_op3>}"
27e5502d 1741 [(set_attr "isa" "noavx,avx")
1793b773 1742 (set_attr "type" "sse<multdiv_mnemonic>")
0b7cc9c6 1743 (set_attr "prefix" "<round_prefix>")
1793b773 1744 (set_attr "btver2_decode" "direct,double")
2a466fea 1745 (set_attr "mode" "<ssescalarmode>")])
5802c0cb 1746
27e5502d 1747(define_expand "div<mode>3"
abd4f58b 1748 [(set (match_operand:VF2 0 "register_operand")
1749 (div:VF2 (match_operand:VF2 1 "register_operand")
1750 (match_operand:VF2 2 "nonimmediate_operand")))]
6fe5844b 1751 "TARGET_SSE2"
27e5502d 1752 "ix86_fixup_binary_operands_no_copy (DIV, <MODE>mode, operands);")
1753
1754(define_expand "div<mode>3"
abd4f58b 1755 [(set (match_operand:VF1 0 "register_operand")
1756 (div:VF1 (match_operand:VF1 1 "register_operand")
1757 (match_operand:VF1 2 "nonimmediate_operand")))]
6fe5844b 1758 "TARGET_SSE"
ed30e0a6 1759{
27e5502d 1760 ix86_fixup_binary_operands_no_copy (DIV, <MODE>mode, operands);
ed30e0a6 1761
8acd1d98 1762 if (TARGET_SSE_MATH
1763 && TARGET_RECIP_VEC_DIV
1764 && !optimize_insn_for_size_p ()
ed30e0a6 1765 && flag_finite_math_only && !flag_trapping_math
1766 && flag_unsafe_math_optimizations)
1767 {
27e5502d 1768 ix86_emit_swdivsf (operands[0], operands[1], operands[2], <MODE>mode);
e174638f 1769 DONE;
1770 }
1771})
5802c0cb 1772
be60ab96 1773(define_insn "<sse>_div<mode>3<mask_name><round_name>"
e13e1b39 1774 [(set (match_operand:VF 0 "register_operand" "=x,v")
27e5502d 1775 (div:VF
e13e1b39 1776 (match_operand:VF 1 "register_operand" "0,v")
be60ab96 1777 (match_operand:VF 2 "<round_nimm_predicate>" "xm,<round_constraint>")))]
1778 "TARGET_SSE && <mask_mode512bit_condition> && <round_mode512bit_condition>"
27e5502d 1779 "@
1780 div<ssemodesuffix>\t{%2, %0|%0, %2}
be60ab96 1781 vdiv<ssemodesuffix>\t{<round_mask_op3>%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2<round_mask_op3>}"
27e5502d 1782 [(set_attr "isa" "noavx,avx")
1783 (set_attr "type" "ssediv")
5220cab6 1784 (set_attr "prefix" "<mask_prefix3>")
2a466fea 1785 (set_attr "mode" "<MODE>")])
5802c0cb 1786
27e5502d 1787(define_insn "<sse>_rcp<mode>2"
03ae25dc 1788 [(set (match_operand:VF1_128_256 0 "register_operand" "=x")
1789 (unspec:VF1_128_256
1790 [(match_operand:VF1_128_256 1 "nonimmediate_operand" "xm")] UNSPEC_RCP))]
6fe5844b 1791 "TARGET_SSE"
ed30e0a6 1792 "%vrcpps\t{%1, %0|%0, %1}"
5802c0cb 1793 [(set_attr "type" "sse")
fbfe006e 1794 (set_attr "atom_sse_attr" "rcp")
6470d004 1795 (set_attr "btver2_sse_attr" "rcp")
ed30e0a6 1796 (set_attr "prefix" "maybe_vex")
27e5502d 1797 (set_attr "mode" "<MODE>")])
ed30e0a6 1798
5802c0cb 1799(define_insn "sse_vmrcpv4sf2"
27e5502d 1800 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
5802c0cb 1801 (vec_merge:V4SF
27e5502d 1802 (unspec:V4SF [(match_operand:V4SF 1 "nonimmediate_operand" "xm,xm")]
5802c0cb 1803 UNSPEC_RCP)
27e5502d 1804 (match_operand:V4SF 2 "register_operand" "0,x")
5802c0cb 1805 (const_int 1)))]
1806 "TARGET_SSE"
27e5502d 1807 "@
c358a059 1808 rcpss\t{%1, %0|%0, %k1}
1809 vrcpss\t{%1, %2, %0|%0, %2, %k1}"
27e5502d 1810 [(set_attr "isa" "noavx,avx")
1811 (set_attr "type" "sse")
8c1dfa94 1812 (set_attr "ssememalign" "32")
fbfe006e 1813 (set_attr "atom_sse_attr" "rcp")
6470d004 1814 (set_attr "btver2_sse_attr" "rcp")
27e5502d 1815 (set_attr "prefix" "orig,vex")
5802c0cb 1816 (set_attr "mode" "SF")])
1817
5220cab6 1818(define_insn "<mask_codefor>rcp14<mode><mask_name>"
250533c0 1819 [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v")
1820 (unspec:VF_AVX512VL
1821 [(match_operand:VF_AVX512VL 1 "nonimmediate_operand" "vm")]
85065932 1822 UNSPEC_RCP14))]
1823 "TARGET_AVX512F"
5220cab6 1824 "vrcp14<ssemodesuffix>\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
85065932 1825 [(set_attr "type" "sse")
1826 (set_attr "prefix" "evex")
1827 (set_attr "mode" "<MODE>")])
1828
0b7cc9c6 1829(define_insn "srcp14<mode>"
85065932 1830 [(set (match_operand:VF_128 0 "register_operand" "=v")
1831 (vec_merge:VF_128
1832 (unspec:VF_128
fbf4df62 1833 [(match_operand:VF_128 1 "nonimmediate_operand" "vm")]
85065932 1834 UNSPEC_RCP14)
fbf4df62 1835 (match_operand:VF_128 2 "register_operand" "v")
85065932 1836 (const_int 1)))]
1837 "TARGET_AVX512F"
fbf4df62 1838 "vrcp14<ssescalarmodesuffix>\t{%1, %2, %0|%0, %2, %1}"
85065932 1839 [(set_attr "type" "sse")
1840 (set_attr "prefix" "evex")
1841 (set_attr "mode" "<MODE>")])
1842
27e5502d 1843(define_expand "sqrt<mode>2"
abd4f58b 1844 [(set (match_operand:VF2 0 "register_operand")
1845 (sqrt:VF2 (match_operand:VF2 1 "nonimmediate_operand")))]
6fe5844b 1846 "TARGET_SSE2")
ed30e0a6 1847
27e5502d 1848(define_expand "sqrt<mode>2"
abd4f58b 1849 [(set (match_operand:VF1 0 "register_operand")
1850 (sqrt:VF1 (match_operand:VF1 1 "nonimmediate_operand")))]
6fe5844b 1851 "TARGET_SSE"
2a466fea 1852{
8acd1d98 1853 if (TARGET_SSE_MATH
1854 && TARGET_RECIP_VEC_SQRT
1855 && !optimize_insn_for_size_p ()
2a466fea 1856 && flag_finite_math_only && !flag_trapping_math
1857 && flag_unsafe_math_optimizations)
1858 {
27e5502d 1859 ix86_emit_swsqrtsf (operands[0], operands[1], <MODE>mode, false);
2a466fea 1860 DONE;
1861 }
1862})
1863
be60ab96 1864(define_insn "<sse>_sqrt<mode>2<mask_name><round_name>"
e13e1b39 1865 [(set (match_operand:VF 0 "register_operand" "=v")
be60ab96 1866 (sqrt:VF (match_operand:VF 1 "<round_nimm_predicate>" "<round_constraint>")))]
1867 "TARGET_SSE && <mask_mode512bit_condition> && <round_mode512bit_condition>"
1868 "%vsqrt<ssemodesuffix>\t{<round_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_mask_op2>}"
2a466fea 1869 [(set_attr "type" "sse")
fbfe006e 1870 (set_attr "atom_sse_attr" "sqrt")
6470d004 1871 (set_attr "btver2_sse_attr" "sqrt")
ed30e0a6 1872 (set_attr "prefix" "maybe_vex")
27e5502d 1873 (set_attr "mode" "<MODE>")])
ed30e0a6 1874
0b7cc9c6 1875(define_insn "<sse>_vmsqrt<mode>2<round_name>"
e13e1b39 1876 [(set (match_operand:VF_128 0 "register_operand" "=x,v")
27e5502d 1877 (vec_merge:VF_128
1878 (sqrt:VF_128
0b7cc9c6 1879 (match_operand:VF_128 1 "nonimmediate_operand" "xm,<round_constraint>"))
e13e1b39 1880 (match_operand:VF_128 2 "register_operand" "0,v")
2a466fea 1881 (const_int 1)))]
6fe5844b 1882 "TARGET_SSE"
27e5502d 1883 "@
c358a059 1884 sqrt<ssescalarmodesuffix>\t{%1, %0|%0, %<iptr>1}
0b7cc9c6 1885 vsqrt<ssescalarmodesuffix>\t{<round_op3>%1, %2, %0|%0, %2, %<iptr>1<round_op3>}"
27e5502d 1886 [(set_attr "isa" "noavx,avx")
1887 (set_attr "type" "sse")
fbfe006e 1888 (set_attr "atom_sse_attr" "sqrt")
0b7cc9c6 1889 (set_attr "prefix" "<round_prefix>")
5220cab6 1890 (set_attr "btver2_sse_attr" "sqrt")
2a466fea 1891 (set_attr "mode" "<ssescalarmode>")])
1892
27e5502d 1893(define_expand "rsqrt<mode>2"
03ae25dc 1894 [(set (match_operand:VF1_128_256 0 "register_operand")
1895 (unspec:VF1_128_256
1896 [(match_operand:VF1_128_256 1 "nonimmediate_operand")] UNSPEC_RSQRT))]
1a086819 1897 "TARGET_SSE_MATH"
e174638f 1898{
27e5502d 1899 ix86_emit_swsqrtsf (operands[0], operands[1], <MODE>mode, true);
1a086819 1900 DONE;
e174638f 1901})
1902
27e5502d 1903(define_insn "<sse>_rsqrt<mode>2"
03ae25dc 1904 [(set (match_operand:VF1_128_256 0 "register_operand" "=x")
1905 (unspec:VF1_128_256
1906 [(match_operand:VF1_128_256 1 "nonimmediate_operand" "xm")] UNSPEC_RSQRT))]
6fe5844b 1907 "TARGET_SSE"
ed30e0a6 1908 "%vrsqrtps\t{%1, %0|%0, %1}"
d42cfd5f 1909 [(set_attr "type" "sse")
ed30e0a6 1910 (set_attr "prefix" "maybe_vex")
27e5502d 1911 (set_attr "mode" "<MODE>")])
ed30e0a6 1912
5220cab6 1913(define_insn "<mask_codefor>rsqrt14<mode><mask_name>"
250533c0 1914 [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v")
1915 (unspec:VF_AVX512VL
1916 [(match_operand:VF_AVX512VL 1 "nonimmediate_operand" "vm")]
85065932 1917 UNSPEC_RSQRT14))]
1918 "TARGET_AVX512F"
5220cab6 1919 "vrsqrt14<ssemodesuffix>\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
85065932 1920 [(set_attr "type" "sse")
1921 (set_attr "prefix" "evex")
1922 (set_attr "mode" "<MODE>")])
1923
0b7cc9c6 1924(define_insn "rsqrt14<mode>"
85065932 1925 [(set (match_operand:VF_128 0 "register_operand" "=v")
1926 (vec_merge:VF_128
1927 (unspec:VF_128
c4f782fd 1928 [(match_operand:VF_128 1 "nonimmediate_operand" "vm")]
85065932 1929 UNSPEC_RSQRT14)
c4f782fd 1930 (match_operand:VF_128 2 "register_operand" "v")
85065932 1931 (const_int 1)))]
1932 "TARGET_AVX512F"
c4f782fd 1933 "vrsqrt14<ssescalarmodesuffix>\t{%1, %2, %0|%0, %2, %1}"
85065932 1934 [(set_attr "type" "sse")
1935 (set_attr "prefix" "evex")
1936 (set_attr "mode" "<MODE>")])
1937
5802c0cb 1938(define_insn "sse_vmrsqrtv4sf2"
27e5502d 1939 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
5802c0cb 1940 (vec_merge:V4SF
27e5502d 1941 (unspec:V4SF [(match_operand:V4SF 1 "nonimmediate_operand" "xm,xm")]
5802c0cb 1942 UNSPEC_RSQRT)
27e5502d 1943 (match_operand:V4SF 2 "register_operand" "0,x")
5802c0cb 1944 (const_int 1)))]
1945 "TARGET_SSE"
27e5502d 1946 "@
c358a059 1947 rsqrtss\t{%1, %0|%0, %k1}
1948 vrsqrtss\t{%1, %2, %0|%0, %2, %k1}"
27e5502d 1949 [(set_attr "isa" "noavx,avx")
1950 (set_attr "type" "sse")
8c1dfa94 1951 (set_attr "ssememalign" "32")
27e5502d 1952 (set_attr "prefix" "orig,vex")
5802c0cb 1953 (set_attr "mode" "SF")])
1954
79eddd43 1955;; ??? For !flag_finite_math_only, the representation with SMIN/SMAX
009b318f 1956;; isn't really correct, as those rtl operators aren't defined when
79eddd43 1957;; applied to NaNs. Hopefully the optimizers won't get too smart on us.
1958
dbfe84d5 1959(define_expand "<code><mode>3<mask_name><round_saeonly_name>"
abd4f58b 1960 [(set (match_operand:VF 0 "register_operand")
27e5502d 1961 (smaxmin:VF
dbfe84d5 1962 (match_operand:VF 1 "<round_saeonly_nimm_predicate>")
1963 (match_operand:VF 2 "<round_saeonly_nimm_predicate>")))]
1964 "TARGET_SSE && <mask_mode512bit_condition> && <round_saeonly_mode512bit_condition>"
79eddd43 1965{
1966 if (!flag_finite_math_only)
2a466fea 1967 operands[1] = force_reg (<MODE>mode, operands[1]);
9409fce7 1968 ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);
79eddd43 1969})
5802c0cb 1970
dbfe84d5 1971(define_insn "*<code><mode>3_finite<mask_name><round_saeonly_name>"
e13e1b39 1972 [(set (match_operand:VF 0 "register_operand" "=x,v")
27e5502d 1973 (smaxmin:VF
dbfe84d5 1974 (match_operand:VF 1 "<round_saeonly_nimm_predicate>" "%0,v")
1975 (match_operand:VF 2 "<round_saeonly_nimm_predicate>" "xm,<round_saeonly_constraint>")))]
6fe5844b 1976 "TARGET_SSE && flag_finite_math_only
5220cab6 1977 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)
dbfe84d5 1978 && <mask_mode512bit_condition> && <round_saeonly_mode512bit_condition>"
27e5502d 1979 "@
1980 <maxmin_float><ssemodesuffix>\t{%2, %0|%0, %2}
dbfe84d5 1981 v<maxmin_float><ssemodesuffix>\t{<round_saeonly_mask_op3>%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2<round_saeonly_mask_op3>}"
27e5502d 1982 [(set_attr "isa" "noavx,avx")
1983 (set_attr "type" "sseadd")
6470d004 1984 (set_attr "btver2_sse_attr" "maxmin")
5220cab6 1985 (set_attr "prefix" "<mask_prefix3>")
2a466fea 1986 (set_attr "mode" "<MODE>")])
5802c0cb 1987
dbfe84d5 1988(define_insn "*<code><mode>3<mask_name><round_saeonly_name>"
e13e1b39 1989 [(set (match_operand:VF 0 "register_operand" "=x,v")
27e5502d 1990 (smaxmin:VF
e13e1b39 1991 (match_operand:VF 1 "register_operand" "0,v")
dbfe84d5 1992 (match_operand:VF 2 "<round_saeonly_nimm_predicate>" "xm,<round_saeonly_constraint>")))]
5220cab6 1993 "TARGET_SSE && !flag_finite_math_only
dbfe84d5 1994 && <mask_mode512bit_condition> && <round_saeonly_mode512bit_condition>"
27e5502d 1995 "@
1996 <maxmin_float><ssemodesuffix>\t{%2, %0|%0, %2}
dbfe84d5 1997 v<maxmin_float><ssemodesuffix>\t{<round_saeonly_mask_op3>%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2<round_saeonly_mask_op3>}"
27e5502d 1998 [(set_attr "isa" "noavx,avx")
1999 (set_attr "type" "sseadd")
6470d004 2000 (set_attr "btver2_sse_attr" "maxmin")
5220cab6 2001 (set_attr "prefix" "<mask_prefix3>")
2a466fea 2002 (set_attr "mode" "<MODE>")])
79eddd43 2003
0b7cc9c6 2004(define_insn "<sse>_vm<code><mode>3<round_saeonly_name>"
e13e1b39 2005 [(set (match_operand:VF_128 0 "register_operand" "=x,v")
27e5502d 2006 (vec_merge:VF_128
2007 (smaxmin:VF_128
e13e1b39 2008 (match_operand:VF_128 1 "register_operand" "0,v")
0b7cc9c6 2009 (match_operand:VF_128 2 "nonimmediate_operand" "xm,<round_saeonly_constraint>"))
79eddd43 2010 (match_dup 1)
2011 (const_int 1)))]
6fe5844b 2012 "TARGET_SSE"
27e5502d 2013 "@
c358a059 2014 <maxmin_float><ssescalarmodesuffix>\t{%2, %0|%0, %<iptr>2}
0b7cc9c6 2015 v<maxmin_float><ssescalarmodesuffix>\t{<round_saeonly_op3>%2, %1, %0|%0, %1, %<iptr>2<round_saeonly_op3>}"
27e5502d 2016 [(set_attr "isa" "noavx,avx")
2017 (set_attr "type" "sse")
6470d004 2018 (set_attr "btver2_sse_attr" "maxmin")
0b7cc9c6 2019 (set_attr "prefix" "<round_saeonly_prefix>")
2a466fea 2020 (set_attr "mode" "<ssescalarmode>")])
5802c0cb 2021
6cab81bd 2022;; These versions of the min/max patterns implement exactly the operations
2023;; min = (op1 < op2 ? op1 : op2)
2024;; max = (!(op1 < op2) ? op1 : op2)
2025;; Their operands are not commutative, and thus they may be used in the
2026;; presence of -0.0 and NaN.
2027
2a466fea 2028(define_insn "*ieee_smin<mode>3"
6a3f5f59 2029 [(set (match_operand:VF 0 "register_operand" "=v,v")
27e5502d 2030 (unspec:VF
6a3f5f59 2031 [(match_operand:VF 1 "register_operand" "0,v")
2032 (match_operand:VF 2 "nonimmediate_operand" "vm,vm")]
2a466fea 2033 UNSPEC_IEEE_MIN))]
6fe5844b 2034 "TARGET_SSE"
27e5502d 2035 "@
8ba20934 2036 min<ssemodesuffix>\t{%2, %0|%0, %2}
2037 vmin<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
27e5502d 2038 [(set_attr "isa" "noavx,avx")
2039 (set_attr "type" "sseadd")
2040 (set_attr "prefix" "orig,vex")
2a466fea 2041 (set_attr "mode" "<MODE>")])
6cab81bd 2042
2a466fea 2043(define_insn "*ieee_smax<mode>3"
6a3f5f59 2044 [(set (match_operand:VF 0 "register_operand" "=v,v")
27e5502d 2045 (unspec:VF
6a3f5f59 2046 [(match_operand:VF 1 "register_operand" "0,v")
2047 (match_operand:VF 2 "nonimmediate_operand" "vm,vm")]
2a466fea 2048 UNSPEC_IEEE_MAX))]
6fe5844b 2049 "TARGET_SSE"
27e5502d 2050 "@
8ba20934 2051 max<ssemodesuffix>\t{%2, %0|%0, %2}
2052 vmax<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
27e5502d 2053 [(set_attr "isa" "noavx,avx")
2054 (set_attr "type" "sseadd")
2055 (set_attr "prefix" "orig,vex")
2a466fea 2056 (set_attr "mode" "<MODE>")])
6cab81bd 2057
ed30e0a6 2058(define_insn "avx_addsubv4df3"
2059 [(set (match_operand:V4DF 0 "register_operand" "=x")
2060 (vec_merge:V4DF
06af5c80 2061 (minus:V4DF
ed30e0a6 2062 (match_operand:V4DF 1 "register_operand" "x")
2063 (match_operand:V4DF 2 "nonimmediate_operand" "xm"))
06af5c80 2064 (plus:V4DF (match_dup 1) (match_dup 2))
2065 (const_int 5)))]
6c197bf1 2066 "TARGET_AVX"
2067 "vaddsubpd\t{%2, %1, %0|%0, %1, %2}"
2068 [(set_attr "type" "sseadd")
2069 (set_attr "prefix" "vex")
2070 (set_attr "mode" "V4DF")])
2071
27e5502d 2072(define_insn "sse3_addsubv2df3"
2073 [(set (match_operand:V2DF 0 "register_operand" "=x,x")
2074 (vec_merge:V2DF
06af5c80 2075 (minus:V2DF
27e5502d 2076 (match_operand:V2DF 1 "register_operand" "0,x")
2077 (match_operand:V2DF 2 "nonimmediate_operand" "xm,xm"))
06af5c80 2078 (plus:V2DF (match_dup 1) (match_dup 2))
2079 (const_int 1)))]
6c197bf1 2080 "TARGET_SSE3"
2081 "@
2082 addsubpd\t{%2, %0|%0, %2}
2083 vaddsubpd\t{%2, %1, %0|%0, %1, %2}"
2084 [(set_attr "isa" "noavx,avx")
2085 (set_attr "type" "sseadd")
2086 (set_attr "atom_unit" "complex")
2087 (set_attr "prefix" "orig,vex")
2088 (set_attr "mode" "V2DF")])
2089
27e5502d 2090(define_insn "avx_addsubv8sf3"
2091 [(set (match_operand:V8SF 0 "register_operand" "=x")
2092 (vec_merge:V8SF
06af5c80 2093 (minus:V8SF
27e5502d 2094 (match_operand:V8SF 1 "register_operand" "x")
2095 (match_operand:V8SF 2 "nonimmediate_operand" "xm"))
06af5c80 2096 (plus:V8SF (match_dup 1) (match_dup 2))
2097 (const_int 85)))]
6c197bf1 2098 "TARGET_AVX"
2099 "vaddsubps\t{%2, %1, %0|%0, %1, %2}"
2100 [(set_attr "type" "sseadd")
2101 (set_attr "prefix" "vex")
2102 (set_attr "mode" "V8SF")])
2103
5802c0cb 2104(define_insn "sse3_addsubv4sf3"
27e5502d 2105 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
5802c0cb 2106 (vec_merge:V4SF
06af5c80 2107 (minus:V4SF
27e5502d 2108 (match_operand:V4SF 1 "register_operand" "0,x")
2109 (match_operand:V4SF 2 "nonimmediate_operand" "xm,xm"))
06af5c80 2110 (plus:V4SF (match_dup 1) (match_dup 2))
2111 (const_int 5)))]
5802c0cb 2112 "TARGET_SSE3"
27e5502d 2113 "@
2114 addsubps\t{%2, %0|%0, %2}
2115 vaddsubps\t{%2, %1, %0|%0, %1, %2}"
2116 [(set_attr "isa" "noavx,avx")
2117 (set_attr "type" "sseadd")
6c197bf1 2118 (set_attr "prefix" "orig,vex")
2119 (set_attr "prefix_rep" "1,*")
2120 (set_attr "mode" "V4SF")])
2121
06af5c80 2122(define_split
2123 [(set (match_operand:VF_128_256 0 "register_operand")
2124 (match_operator:VF_128_256 6 "addsub_vm_operator"
2125 [(minus:VF_128_256
2126 (match_operand:VF_128_256 1 "register_operand")
2127 (match_operand:VF_128_256 2 "nonimmediate_operand"))
2128 (plus:VF_128_256
2129 (match_operand:VF_128_256 3 "nonimmediate_operand")
2130 (match_operand:VF_128_256 4 "nonimmediate_operand"))
2131 (match_operand 5 "const_int_operand")]))]
2132 "TARGET_SSE3
2133 && can_create_pseudo_p ()
2134 && ((rtx_equal_p (operands[1], operands[3])
2135 && rtx_equal_p (operands[2], operands[4]))
2136 || (rtx_equal_p (operands[1], operands[4])
2137 && rtx_equal_p (operands[2], operands[3])))"
2138 [(set (match_dup 0)
2139 (vec_merge:VF_128_256
2140 (minus:VF_128_256 (match_dup 1) (match_dup 2))
2141 (plus:VF_128_256 (match_dup 1) (match_dup 2))
2142 (match_dup 5)))])
6c197bf1 2143
06af5c80 2144(define_split
2145 [(set (match_operand:VF_128_256 0 "register_operand")
2146 (match_operator:VF_128_256 6 "addsub_vm_operator"
2147 [(plus:VF_128_256
2148 (match_operand:VF_128_256 1 "nonimmediate_operand")
2149 (match_operand:VF_128_256 2 "nonimmediate_operand"))
2150 (minus:VF_128_256
2151 (match_operand:VF_128_256 3 "register_operand")
2152 (match_operand:VF_128_256 4 "nonimmediate_operand"))
2153 (match_operand 5 "const_int_operand")]))]
2154 "TARGET_SSE3
2155 && can_create_pseudo_p ()
2156 && ((rtx_equal_p (operands[1], operands[3])
2157 && rtx_equal_p (operands[2], operands[4]))
2158 || (rtx_equal_p (operands[1], operands[4])
2159 && rtx_equal_p (operands[2], operands[3])))"
2160 [(set (match_dup 0)
2161 (vec_merge:VF_128_256
2162 (minus:VF_128_256 (match_dup 3) (match_dup 4))
2163 (plus:VF_128_256 (match_dup 3) (match_dup 4))
2164 (match_dup 5)))]
2165{
2166 /* Negate mask bits to compensate for swapped PLUS and MINUS RTXes. */
2167 operands[5]
2168 = GEN_INT (~INTVAL (operands[5])
2169 & ((HOST_WIDE_INT_1U << GET_MODE_NUNITS (<MODE>mode)) - 1));
2170})
2171
2172(define_split
2173 [(set (match_operand:VF_128_256 0 "register_operand")
2174 (match_operator:VF_128_256 7 "addsub_vs_operator"
2175 [(vec_concat:<ssedoublemode>
2176 (minus:VF_128_256
2177 (match_operand:VF_128_256 1 "register_operand")
2178 (match_operand:VF_128_256 2 "nonimmediate_operand"))
2179 (plus:VF_128_256
2180 (match_operand:VF_128_256 3 "nonimmediate_operand")
2181 (match_operand:VF_128_256 4 "nonimmediate_operand")))
2182 (match_parallel 5 "addsub_vs_parallel"
2183 [(match_operand 6 "const_int_operand")])]))]
2184 "TARGET_SSE3
2185 && can_create_pseudo_p ()
2186 && ((rtx_equal_p (operands[1], operands[3])
2187 && rtx_equal_p (operands[2], operands[4]))
2188 || (rtx_equal_p (operands[1], operands[4])
2189 && rtx_equal_p (operands[2], operands[3])))"
2190 [(set (match_dup 0)
2191 (vec_merge:VF_128_256
2192 (minus:VF_128_256 (match_dup 1) (match_dup 2))
2193 (plus:VF_128_256 (match_dup 1) (match_dup 2))
2194 (match_dup 5)))]
2195{
2196 int i, nelt = XVECLEN (operands[5], 0);
2197 HOST_WIDE_INT ival = 0;
2198
2199 for (i = 0; i < nelt; i++)
2200 if (INTVAL (XVECEXP (operands[5], 0, i)) < GET_MODE_NUNITS (<MODE>mode))
2201 ival |= HOST_WIDE_INT_1 << i;
2202
2203 operands[5] = GEN_INT (ival);
2204})
2205
2206(define_split
2207 [(set (match_operand:VF_128_256 0 "register_operand")
2208 (match_operator:VF_128_256 7 "addsub_vs_operator"
2209 [(vec_concat:<ssedoublemode>
2210 (plus:VF_128_256
2211 (match_operand:VF_128_256 1 "nonimmediate_operand")
2212 (match_operand:VF_128_256 2 "nonimmediate_operand"))
2213 (minus:VF_128_256
2214 (match_operand:VF_128_256 3 "register_operand")
2215 (match_operand:VF_128_256 4 "nonimmediate_operand")))
2216 (match_parallel 5 "addsub_vs_parallel"
2217 [(match_operand 6 "const_int_operand")])]))]
2218 "TARGET_SSE3
2219 && can_create_pseudo_p ()
2220 && ((rtx_equal_p (operands[1], operands[3])
2221 && rtx_equal_p (operands[2], operands[4]))
2222 || (rtx_equal_p (operands[1], operands[4])
2223 && rtx_equal_p (operands[2], operands[3])))"
2224 [(set (match_dup 0)
2225 (vec_merge:VF_128_256
2226 (minus:VF_128_256 (match_dup 3) (match_dup 4))
2227 (plus:VF_128_256 (match_dup 3) (match_dup 4))
2228 (match_dup 5)))]
2229{
2230 int i, nelt = XVECLEN (operands[5], 0);
2231 HOST_WIDE_INT ival = 0;
2232
2233 for (i = 0; i < nelt; i++)
2234 if (INTVAL (XVECEXP (operands[5], 0, i)) >= GET_MODE_NUNITS (<MODE>mode))
2235 ival |= HOST_WIDE_INT_1 << i;
2236
2237 operands[5] = GEN_INT (ival);
2238})
5802c0cb 2239
ed30e0a6 2240(define_insn "avx_h<plusminus_insn>v4df3"
2241 [(set (match_operand:V4DF 0 "register_operand" "=x")
2242 (vec_concat:V4DF
2243 (vec_concat:V2DF
2244 (plusminus:DF
2245 (vec_select:DF
2246 (match_operand:V4DF 1 "register_operand" "x")
2247 (parallel [(const_int 0)]))
2248 (vec_select:DF (match_dup 1) (parallel [(const_int 1)])))
ed30e0a6 2249 (plusminus:DF
2250 (vec_select:DF
2251 (match_operand:V4DF 2 "nonimmediate_operand" "xm")
2252 (parallel [(const_int 0)]))
80f058cd 2253 (vec_select:DF (match_dup 2) (parallel [(const_int 1)]))))
2254 (vec_concat:V2DF
2255 (plusminus:DF
2256 (vec_select:DF (match_dup 1) (parallel [(const_int 2)]))
2257 (vec_select:DF (match_dup 1) (parallel [(const_int 3)])))
ed30e0a6 2258 (plusminus:DF
2259 (vec_select:DF (match_dup 2) (parallel [(const_int 2)]))
2260 (vec_select:DF (match_dup 2) (parallel [(const_int 3)]))))))]
2261 "TARGET_AVX"
2262 "vh<plusminus_mnemonic>pd\t{%2, %1, %0|%0, %1, %2}"
2263 [(set_attr "type" "sseadd")
2264 (set_attr "prefix" "vex")
2265 (set_attr "mode" "V4DF")])
2266
6095368e 2267(define_expand "sse3_haddv2df3"
2268 [(set (match_operand:V2DF 0 "register_operand")
2269 (vec_concat:V2DF
2270 (plus:DF
2271 (vec_select:DF
2272 (match_operand:V2DF 1 "register_operand")
2273 (parallel [(const_int 0)]))
2274 (vec_select:DF (match_dup 1) (parallel [(const_int 1)])))
2275 (plus:DF
2276 (vec_select:DF
2277 (match_operand:V2DF 2 "nonimmediate_operand")
2278 (parallel [(const_int 0)]))
2279 (vec_select:DF (match_dup 2) (parallel [(const_int 1)])))))]
2280 "TARGET_SSE3")
2281
2282(define_insn "*sse3_haddv2df3"
27e5502d 2283 [(set (match_operand:V2DF 0 "register_operand" "=x,x")
2284 (vec_concat:V2DF
6095368e 2285 (plus:DF
2286 (vec_select:DF
2287 (match_operand:V2DF 1 "register_operand" "0,x")
2288 (parallel [(match_operand:SI 3 "const_0_to_1_operand")]))
2289 (vec_select:DF
2290 (match_dup 1)
2291 (parallel [(match_operand:SI 4 "const_0_to_1_operand")])))
2292 (plus:DF
2293 (vec_select:DF
2294 (match_operand:V2DF 2 "nonimmediate_operand" "xm,xm")
2295 (parallel [(match_operand:SI 5 "const_0_to_1_operand")]))
2296 (vec_select:DF
2297 (match_dup 2)
2298 (parallel [(match_operand:SI 6 "const_0_to_1_operand")])))))]
2299 "TARGET_SSE3
2300 && INTVAL (operands[3]) != INTVAL (operands[4])
2301 && INTVAL (operands[5]) != INTVAL (operands[6])"
2302 "@
2303 haddpd\t{%2, %0|%0, %2}
2304 vhaddpd\t{%2, %1, %0|%0, %1, %2}"
2305 [(set_attr "isa" "noavx,avx")
2306 (set_attr "type" "sseadd")
2307 (set_attr "prefix" "orig,vex")
2308 (set_attr "mode" "V2DF")])
2309
2310(define_insn "sse3_hsubv2df3"
2311 [(set (match_operand:V2DF 0 "register_operand" "=x,x")
2312 (vec_concat:V2DF
2313 (minus:DF
27e5502d 2314 (vec_select:DF
2315 (match_operand:V2DF 1 "register_operand" "0,x")
2316 (parallel [(const_int 0)]))
2317 (vec_select:DF (match_dup 1) (parallel [(const_int 1)])))
6095368e 2318 (minus:DF
27e5502d 2319 (vec_select:DF
2320 (match_operand:V2DF 2 "nonimmediate_operand" "xm,xm")
2321 (parallel [(const_int 0)]))
2322 (vec_select:DF (match_dup 2) (parallel [(const_int 1)])))))]
2323 "TARGET_SSE3"
2324 "@
6095368e 2325 hsubpd\t{%2, %0|%0, %2}
2326 vhsubpd\t{%2, %1, %0|%0, %1, %2}"
27e5502d 2327 [(set_attr "isa" "noavx,avx")
2328 (set_attr "type" "sseadd")
2329 (set_attr "prefix" "orig,vex")
2330 (set_attr "mode" "V2DF")])
2331
6095368e 2332(define_insn "*sse3_haddv2df3_low"
2333 [(set (match_operand:DF 0 "register_operand" "=x,x")
2334 (plus:DF
2335 (vec_select:DF
2336 (match_operand:V2DF 1 "register_operand" "0,x")
2337 (parallel [(match_operand:SI 2 "const_0_to_1_operand")]))
2338 (vec_select:DF
2339 (match_dup 1)
2340 (parallel [(match_operand:SI 3 "const_0_to_1_operand")]))))]
2341 "TARGET_SSE3
2342 && INTVAL (operands[2]) != INTVAL (operands[3])"
2343 "@
2344 haddpd\t{%0, %0|%0, %0}
2345 vhaddpd\t{%1, %1, %0|%0, %1, %1}"
2346 [(set_attr "isa" "noavx,avx")
2347 (set_attr "type" "sseadd1")
2348 (set_attr "prefix" "orig,vex")
2349 (set_attr "mode" "V2DF")])
2350
2351(define_insn "*sse3_hsubv2df3_low"
2352 [(set (match_operand:DF 0 "register_operand" "=x,x")
2353 (minus:DF
2354 (vec_select:DF
2355 (match_operand:V2DF 1 "register_operand" "0,x")
2356 (parallel [(const_int 0)]))
2357 (vec_select:DF
2358 (match_dup 1)
2359 (parallel [(const_int 1)]))))]
2360 "TARGET_SSE3"
2361 "@
2362 hsubpd\t{%0, %0|%0, %0}
2363 vhsubpd\t{%1, %1, %0|%0, %1, %1}"
2364 [(set_attr "isa" "noavx,avx")
2365 (set_attr "type" "sseadd1")
2366 (set_attr "prefix" "orig,vex")
2367 (set_attr "mode" "V2DF")])
2368
ed30e0a6 2369(define_insn "avx_h<plusminus_insn>v8sf3"
2370 [(set (match_operand:V8SF 0 "register_operand" "=x")
2371 (vec_concat:V8SF
2372 (vec_concat:V4SF
2373 (vec_concat:V2SF
2374 (plusminus:SF
2375 (vec_select:SF
2376 (match_operand:V8SF 1 "register_operand" "x")
2377 (parallel [(const_int 0)]))
2378 (vec_select:SF (match_dup 1) (parallel [(const_int 1)])))
2379 (plusminus:SF
2380 (vec_select:SF (match_dup 1) (parallel [(const_int 2)]))
2381 (vec_select:SF (match_dup 1) (parallel [(const_int 3)]))))
2382 (vec_concat:V2SF
2383 (plusminus:SF
2384 (vec_select:SF
2385 (match_operand:V8SF 2 "nonimmediate_operand" "xm")
2386 (parallel [(const_int 0)]))
2387 (vec_select:SF (match_dup 2) (parallel [(const_int 1)])))
2388 (plusminus:SF
2389 (vec_select:SF (match_dup 2) (parallel [(const_int 2)]))
2390 (vec_select:SF (match_dup 2) (parallel [(const_int 3)])))))
2391 (vec_concat:V4SF
2392 (vec_concat:V2SF
2393 (plusminus:SF
2394 (vec_select:SF (match_dup 1) (parallel [(const_int 4)]))
2395 (vec_select:SF (match_dup 1) (parallel [(const_int 5)])))
2396 (plusminus:SF
2397 (vec_select:SF (match_dup 1) (parallel [(const_int 6)]))
2398 (vec_select:SF (match_dup 1) (parallel [(const_int 7)]))))
2399 (vec_concat:V2SF
2400 (plusminus:SF
2401 (vec_select:SF (match_dup 2) (parallel [(const_int 4)]))
2402 (vec_select:SF (match_dup 2) (parallel [(const_int 5)])))
2403 (plusminus:SF
2404 (vec_select:SF (match_dup 2) (parallel [(const_int 6)]))
2405 (vec_select:SF (match_dup 2) (parallel [(const_int 7)])))))))]
2406 "TARGET_AVX"
2407 "vh<plusminus_mnemonic>ps\t{%2, %1, %0|%0, %1, %2}"
2408 [(set_attr "type" "sseadd")
2409 (set_attr "prefix" "vex")
2410 (set_attr "mode" "V8SF")])
2411
801ff5b2 2412(define_insn "sse3_h<plusminus_insn>v4sf3"
27e5502d 2413 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
2a466fea 2414 (vec_concat:V4SF
2415 (vec_concat:V2SF
9409fce7 2416 (plusminus:SF
2a466fea 2417 (vec_select:SF
27e5502d 2418 (match_operand:V4SF 1 "register_operand" "0,x")
2a466fea 2419 (parallel [(const_int 0)]))
2420 (vec_select:SF (match_dup 1) (parallel [(const_int 1)])))
9409fce7 2421 (plusminus:SF
5802c0cb 2422 (vec_select:SF (match_dup 1) (parallel [(const_int 2)]))
2423 (vec_select:SF (match_dup 1) (parallel [(const_int 3)]))))
2424 (vec_concat:V2SF
9409fce7 2425 (plusminus:SF
5802c0cb 2426 (vec_select:SF
27e5502d 2427 (match_operand:V4SF 2 "nonimmediate_operand" "xm,xm")
5802c0cb 2428 (parallel [(const_int 0)]))
2429 (vec_select:SF (match_dup 2) (parallel [(const_int 1)])))
9409fce7 2430 (plusminus:SF
5802c0cb 2431 (vec_select:SF (match_dup 2) (parallel [(const_int 2)]))
2432 (vec_select:SF (match_dup 2) (parallel [(const_int 3)]))))))]
2433 "TARGET_SSE3"
27e5502d 2434 "@
2435 h<plusminus_mnemonic>ps\t{%2, %0|%0, %2}
2436 vh<plusminus_mnemonic>ps\t{%2, %1, %0|%0, %1, %2}"
2437 [(set_attr "isa" "noavx,avx")
2438 (set_attr "type" "sseadd")
fbfe006e 2439 (set_attr "atom_unit" "complex")
27e5502d 2440 (set_attr "prefix" "orig,vex")
2441 (set_attr "prefix_rep" "1,*")
5802c0cb 2442 (set_attr "mode" "V4SF")])
2443
b8c0e65c 2444(define_expand "reduc_plus_scal_v8df"
2445 [(match_operand:DF 0 "register_operand")
8e9989b0 2446 (match_operand:V8DF 1 "register_operand")]
2447 "TARGET_AVX512F"
2448{
b8c0e65c 2449 rtx tmp = gen_reg_rtx (V8DFmode);
2450 ix86_expand_reduc (gen_addv8df3, tmp, operands[1]);
2451 emit_insn (gen_vec_extractv8df (operands[0], tmp, const0_rtx));
8e9989b0 2452 DONE;
2453})
2454
b8c0e65c 2455(define_expand "reduc_plus_scal_v4df"
2456 [(match_operand:DF 0 "register_operand")
abd4f58b 2457 (match_operand:V4DF 1 "register_operand")]
ed30e0a6 2458 "TARGET_AVX"
27e5502d 2459{
2460 rtx tmp = gen_reg_rtx (V4DFmode);
2461 rtx tmp2 = gen_reg_rtx (V4DFmode);
b8c0e65c 2462 rtx vec_res = gen_reg_rtx (V4DFmode);
27e5502d 2463 emit_insn (gen_avx_haddv4df3 (tmp, operands[1], operands[1]));
2464 emit_insn (gen_avx_vperm2f128v4df3 (tmp2, tmp, tmp, GEN_INT (1)));
b8c0e65c 2465 emit_insn (gen_addv4df3 (vec_res, tmp, tmp2));
2466 emit_insn (gen_vec_extractv4df (operands[0], vec_res, const0_rtx));
27e5502d 2467 DONE;
2468})
ed30e0a6 2469
b8c0e65c 2470(define_expand "reduc_plus_scal_v2df"
2471 [(match_operand:DF 0 "register_operand")
abd4f58b 2472 (match_operand:V2DF 1 "register_operand")]
2a466fea 2473 "TARGET_SSE3"
27e5502d 2474{
b8c0e65c 2475 rtx tmp = gen_reg_rtx (V2DFmode);
2476 emit_insn (gen_sse3_haddv2df3 (tmp, operands[1], operands[1]));
2477 emit_insn (gen_vec_extractv2df (operands[0], tmp, const0_rtx));
27e5502d 2478 DONE;
2479})
2a466fea 2480
b8c0e65c 2481(define_expand "reduc_plus_scal_v16sf"
2482 [(match_operand:SF 0 "register_operand")
8e9989b0 2483 (match_operand:V16SF 1 "register_operand")]
2484 "TARGET_AVX512F"
2485{
b8c0e65c 2486 rtx tmp = gen_reg_rtx (V16SFmode);
2487 ix86_expand_reduc (gen_addv16sf3, tmp, operands[1]);
2488 emit_insn (gen_vec_extractv16sf (operands[0], tmp, const0_rtx));
8e9989b0 2489 DONE;
2490})
2491
b8c0e65c 2492(define_expand "reduc_plus_scal_v8sf"
2493 [(match_operand:SF 0 "register_operand")
abd4f58b 2494 (match_operand:V8SF 1 "register_operand")]
8cedf886 2495 "TARGET_AVX"
2496{
2497 rtx tmp = gen_reg_rtx (V8SFmode);
2498 rtx tmp2 = gen_reg_rtx (V8SFmode);
b8c0e65c 2499 rtx vec_res = gen_reg_rtx (V8SFmode);
8cedf886 2500 emit_insn (gen_avx_haddv8sf3 (tmp, operands[1], operands[1]));
19fbdfaf 2501 emit_insn (gen_avx_haddv8sf3 (tmp2, tmp, tmp));
2502 emit_insn (gen_avx_vperm2f128v8sf3 (tmp, tmp2, tmp2, GEN_INT (1)));
b8c0e65c 2503 emit_insn (gen_addv8sf3 (vec_res, tmp, tmp2));
2504 emit_insn (gen_vec_extractv8sf (operands[0], vec_res, const0_rtx));
8cedf886 2505 DONE;
2506})
2507
b8c0e65c 2508(define_expand "reduc_plus_scal_v4sf"
2509 [(match_operand:SF 0 "register_operand")
abd4f58b 2510 (match_operand:V4SF 1 "register_operand")]
49f312aa 2511 "TARGET_SSE"
2512{
b8c0e65c 2513 rtx vec_res = gen_reg_rtx (V4SFmode);
49f312aa 2514 if (TARGET_SSE3)
2515 {
2516 rtx tmp = gen_reg_rtx (V4SFmode);
2517 emit_insn (gen_sse3_haddv4sf3 (tmp, operands[1], operands[1]));
b8c0e65c 2518 emit_insn (gen_sse3_haddv4sf3 (vec_res, tmp, tmp));
49f312aa 2519 }
2520 else
b8c0e65c 2521 ix86_expand_reduc (gen_addv4sf3, vec_res, operands[1]);
2522 emit_insn (gen_vec_extractv4sf (operands[0], vec_res, const0_rtx));
49f312aa 2523 DONE;
2524})
2525
bb7ad312 2526;; Modes handled by reduc_sm{in,ax}* patterns.
2527(define_mode_iterator REDUC_SMINMAX_MODE
2528 [(V32QI "TARGET_AVX2") (V16HI "TARGET_AVX2")
2529 (V8SI "TARGET_AVX2") (V4DI "TARGET_AVX2")
2530 (V8SF "TARGET_AVX") (V4DF "TARGET_AVX")
24d7a006 2531 (V4SF "TARGET_SSE") (V64QI "TARGET_AVX512BW")
2532 (V32HI "TARGET_AVX512BW") (V16SI "TARGET_AVX512F")
d2ff59d6 2533 (V8DI "TARGET_AVX512F") (V16SF "TARGET_AVX512F")
2534 (V8DF "TARGET_AVX512F")])
bb7ad312 2535
b8c0e65c 2536(define_expand "reduc_<code>_scal_<mode>"
bb7ad312 2537 [(smaxmin:REDUC_SMINMAX_MODE
b8c0e65c 2538 (match_operand:<ssescalarmode> 0 "register_operand")
abd4f58b 2539 (match_operand:REDUC_SMINMAX_MODE 1 "register_operand"))]
bb7ad312 2540 ""
49f312aa 2541{
b8c0e65c 2542 rtx tmp = gen_reg_rtx (<MODE>mode);
2543 ix86_expand_reduc (gen_<code><mode>3, tmp, operands[1]);
2544 emit_insn (gen_vec_extract<mode> (operands[0], tmp, const0_rtx));
2e954432 2545 DONE;
2546})
2547
b8c0e65c 2548(define_expand "reduc_<code>_scal_<mode>"
9c9987c5 2549 [(umaxmin:VI_AVX512BW
b8c0e65c 2550 (match_operand:<ssescalarmode> 0 "register_operand")
9c9987c5 2551 (match_operand:VI_AVX512BW 1 "register_operand"))]
d2ff59d6 2552 "TARGET_AVX512F"
2553{
b8c0e65c 2554 rtx tmp = gen_reg_rtx (<MODE>mode);
2555 ix86_expand_reduc (gen_<code><mode>3, tmp, operands[1]);
2556 emit_insn (gen_vec_extract<mode> (operands[0], tmp, const0_rtx));
d2ff59d6 2557 DONE;
2558})
2559
b8c0e65c 2560(define_expand "reduc_<code>_scal_<mode>"
bb7ad312 2561 [(umaxmin:VI_256
b8c0e65c 2562 (match_operand:<ssescalarmode> 0 "register_operand")
abd4f58b 2563 (match_operand:VI_256 1 "register_operand"))]
bb7ad312 2564 "TARGET_AVX2"
2e954432 2565{
b8c0e65c 2566 rtx tmp = gen_reg_rtx (<MODE>mode);
2567 ix86_expand_reduc (gen_<code><mode>3, tmp, operands[1]);
2568 emit_insn (gen_vec_extract<mode> (operands[0], tmp, const0_rtx));
49f312aa 2569 DONE;
2570})
2571
b8c0e65c 2572(define_expand "reduc_umin_scal_v8hi"
e529f590 2573 [(umin:V8HI
b8c0e65c 2574 (match_operand:HI 0 "register_operand")
abd4f58b 2575 (match_operand:V8HI 1 "register_operand"))]
e529f590 2576 "TARGET_SSE4_1"
2577{
b8c0e65c 2578 rtx tmp = gen_reg_rtx (V8HImode);
2579 ix86_expand_reduc (gen_uminv8hi3, tmp, operands[1]);
2580 emit_insn (gen_vec_extractv8hi (operands[0], tmp, const0_rtx));
e529f590 2581 DONE;
2582})
2583
6164575a 2584(define_insn "<mask_codefor>reducep<mode><mask_name>"
2585 [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v")
2586 (unspec:VF_AVX512VL
2587 [(match_operand:VF_AVX512VL 1 "nonimmediate_operand" "vm")
2588 (match_operand:SI 2 "const_0_to_255_operand")]
2589 UNSPEC_REDUCE))]
2590 "TARGET_AVX512DQ"
2591 "vreduce<ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
2592 [(set_attr "type" "sse")
2593 (set_attr "prefix" "evex")
2594 (set_attr "mode" "<MODE>")])
2595
2596(define_insn "reduces<mode>"
2597 [(set (match_operand:VF_128 0 "register_operand" "=v")
2598 (vec_merge:VF_128
2599 (unspec:VF_128
2600 [(match_operand:VF_128 1 "register_operand" "v")
2601 (match_operand:VF_128 2 "nonimmediate_operand" "vm")
2602 (match_operand:SI 3 "const_0_to_255_operand")]
2603 UNSPEC_REDUCE)
2604 (match_dup 1)
2605 (const_int 1)))]
2606 "TARGET_AVX512DQ"
2607 "vreduce<ssescalarmodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
2608 [(set_attr "type" "sse")
2609 (set_attr "prefix" "evex")
2610 (set_attr "mode" "<MODE>")])
2611
5802c0cb 2612;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2613;;
2a466fea 2614;; Parallel floating point comparisons
5802c0cb 2615;;
2616;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2617
27e5502d 2618(define_insn "avx_cmp<mode>3"
6a3f5f59 2619 [(set (match_operand:VF_128_256 0 "register_operand" "=x")
2620 (unspec:VF_128_256
2621 [(match_operand:VF_128_256 1 "register_operand" "x")
2622 (match_operand:VF_128_256 2 "nonimmediate_operand" "xm")
ed30e0a6 2623 (match_operand:SI 3 "const_0_to_31_operand" "n")]
2624 UNSPEC_PCMP))]
2625 "TARGET_AVX"
0061967e 2626 "vcmp<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
ed30e0a6 2627 [(set_attr "type" "ssecmp")
00a0e418 2628 (set_attr "length_immediate" "1")
ed30e0a6 2629 (set_attr "prefix" "vex")
2630 (set_attr "mode" "<MODE>")])
2631
27e5502d 2632(define_insn "avx_vmcmp<mode>3"
2633 [(set (match_operand:VF_128 0 "register_operand" "=x")
2634 (vec_merge:VF_128
2635 (unspec:VF_128
2636 [(match_operand:VF_128 1 "register_operand" "x")
2637 (match_operand:VF_128 2 "nonimmediate_operand" "xm")
ed30e0a6 2638 (match_operand:SI 3 "const_0_to_31_operand" "n")]
2639 UNSPEC_PCMP)
2640 (match_dup 1)
2641 (const_int 1)))]
2642 "TARGET_AVX"
c358a059 2643 "vcmp<ssescalarmodesuffix>\t{%3, %2, %1, %0|%0, %1, %<iptr>2, %3}"
ed30e0a6 2644 [(set_attr "type" "ssecmp")
00a0e418 2645 (set_attr "length_immediate" "1")
ed30e0a6 2646 (set_attr "prefix" "vex")
2647 (set_attr "mode" "<ssescalarmode>")])
2648
dd1f4650 2649(define_insn "*<sse>_maskcmp<mode>3_comm"
6a3f5f59 2650 [(set (match_operand:VF_128_256 0 "register_operand" "=x,x")
2651 (match_operator:VF_128_256 3 "sse_comparison_operator"
2652 [(match_operand:VF_128_256 1 "register_operand" "%0,x")
2653 (match_operand:VF_128_256 2 "nonimmediate_operand" "xm,xm")]))]
dd1f4650 2654 "TARGET_SSE
2655 && GET_RTX_CLASS (GET_CODE (operands[3])) == RTX_COMM_COMPARE"
2656 "@
2657 cmp%D3<ssemodesuffix>\t{%2, %0|%0, %2}
2658 vcmp%D3<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
2659 [(set_attr "isa" "noavx,avx")
2660 (set_attr "type" "ssecmp")
2661 (set_attr "length_immediate" "1")
2662 (set_attr "prefix" "orig,vex")
2663 (set_attr "mode" "<MODE>")])
2664
ed30e0a6 2665(define_insn "<sse>_maskcmp<mode>3"
6a3f5f59 2666 [(set (match_operand:VF_128_256 0 "register_operand" "=x,x")
2667 (match_operator:VF_128_256 3 "sse_comparison_operator"
2668 [(match_operand:VF_128_256 1 "register_operand" "0,x")
2669 (match_operand:VF_128_256 2 "nonimmediate_operand" "xm,xm")]))]
6fe5844b 2670 "TARGET_SSE"
27e5502d 2671 "@
2672 cmp%D3<ssemodesuffix>\t{%2, %0|%0, %2}
2673 vcmp%D3<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
2674 [(set_attr "isa" "noavx,avx")
2675 (set_attr "type" "ssecmp")
00a0e418 2676 (set_attr "length_immediate" "1")
27e5502d 2677 (set_attr "prefix" "orig,vex")
2a466fea 2678 (set_attr "mode" "<MODE>")])
e7fdb903 2679
2a466fea 2680(define_insn "<sse>_vmmaskcmp<mode>3"
27e5502d 2681 [(set (match_operand:VF_128 0 "register_operand" "=x,x")
2682 (vec_merge:VF_128
2683 (match_operator:VF_128 3 "sse_comparison_operator"
2684 [(match_operand:VF_128 1 "register_operand" "0,x")
2685 (match_operand:VF_128 2 "nonimmediate_operand" "xm,xm")])
5802c0cb 2686 (match_dup 1)
2687 (const_int 1)))]
6fe5844b 2688 "TARGET_SSE"
27e5502d 2689 "@
c358a059 2690 cmp%D3<ssescalarmodesuffix>\t{%2, %0|%0, %<iptr>2}
2691 vcmp%D3<ssescalarmodesuffix>\t{%2, %1, %0|%0, %1, %<iptr>2}"
27e5502d 2692 [(set_attr "isa" "noavx,avx")
2693 (set_attr "type" "ssecmp")
2694 (set_attr "length_immediate" "1,*")
2695 (set_attr "prefix" "orig,vex")
2a466fea 2696 (set_attr "mode" "<ssescalarmode>")])
5802c0cb 2697
8e6b975f 2698(define_mode_attr cmp_imm_predicate
f50aa6e9 2699 [(V16SF "const_0_to_31_operand") (V8DF "const_0_to_31_operand")
2700 (V16SI "const_0_to_7_operand") (V8DI "const_0_to_7_operand")
2701 (V8SF "const_0_to_31_operand") (V4DF "const_0_to_31_operand")
2702 (V8SI "const_0_to_7_operand") (V4DI "const_0_to_7_operand")
2703 (V4SF "const_0_to_31_operand") (V2DF "const_0_to_31_operand")
2704 (V4SI "const_0_to_7_operand") (V2DI "const_0_to_7_operand")
2705 (V32HI "const_0_to_7_operand") (V64QI "const_0_to_7_operand")
2706 (V16HI "const_0_to_7_operand") (V32QI "const_0_to_7_operand")
2707 (V8HI "const_0_to_7_operand") (V16QI "const_0_to_7_operand")])
2708
2709(define_insn "<avx512>_cmp<mode>3<mask_scalar_merge_name><round_saeonly_name>"
a31e7f46 2710 [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=Yk")
8e6b975f 2711 (unspec:<avx512fmaskmode>
f50aa6e9 2712 [(match_operand:V48_AVX512VL 1 "register_operand" "v")
2713 (match_operand:V48_AVX512VL 2 "nonimmediate_operand" "<round_saeonly_constraint>")
8e6b975f 2714 (match_operand:SI 3 "<cmp_imm_predicate>" "n")]
2715 UNSPEC_PCMP))]
dbfe84d5 2716 "TARGET_AVX512F && <round_saeonly_mode512bit_condition>"
2717 "v<sseintprefix>cmp<ssemodesuffix>\t{%3, <round_saeonly_mask_scalar_merge_op4>%2, %1, %0<mask_scalar_merge_operand4>|%0<mask_scalar_merge_operand4>, %1, %2<round_saeonly_mask_scalar_merge_op4>, %3}"
8e6b975f 2718 [(set_attr "type" "ssecmp")
2719 (set_attr "length_immediate" "1")
2720 (set_attr "prefix" "evex")
2721 (set_attr "mode" "<sseinsnmode>")])
2722
f50aa6e9 2723(define_insn "<avx512>_cmp<mode>3<mask_scalar_merge_name>"
2724 [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=Yk")
2725 (unspec:<avx512fmaskmode>
2726 [(match_operand:VI12_AVX512VL 1 "register_operand" "v")
2727 (match_operand:VI12_AVX512VL 2 "nonimmediate_operand" "vm")
2728 (match_operand:SI 3 "<cmp_imm_predicate>" "n")]
2729 UNSPEC_PCMP))]
2730 "TARGET_AVX512BW"
2731 "vpcmp<ssemodesuffix>\t{%3, %2, %1, %0<mask_scalar_merge_operand4>|%0<mask_scalar_merge_operand4>, %1, %2, %3}"
2732 [(set_attr "type" "ssecmp")
2733 (set_attr "length_immediate" "1")
2734 (set_attr "prefix" "evex")
2735 (set_attr "mode" "<sseinsnmode>")])
2736
6b76cef2 2737(define_insn "<avx512>_ucmp<mode>3<mask_scalar_merge_name>"
a31e7f46 2738 [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=Yk")
d2ff59d6 2739 (unspec:<avx512fmaskmode>
6b76cef2 2740 [(match_operand:VI12_AVX512VL 1 "register_operand" "v")
2741 (match_operand:VI12_AVX512VL 2 "nonimmediate_operand" "vm")
2742 (match_operand:SI 3 "const_0_to_7_operand" "n")]
2743 UNSPEC_UNSIGNED_PCMP))]
2744 "TARGET_AVX512BW"
2745 "vpcmpu<ssemodesuffix>\t{%3, %2, %1, %0<mask_scalar_merge_operand4>|%0<mask_scalar_merge_operand4>, %1, %2, %3}"
2746 [(set_attr "type" "ssecmp")
2747 (set_attr "length_immediate" "1")
2748 (set_attr "prefix" "evex")
2749 (set_attr "mode" "<sseinsnmode>")])
2750
2751(define_insn "<avx512>_ucmp<mode>3<mask_scalar_merge_name>"
2752 [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=Yk")
2753 (unspec:<avx512fmaskmode>
2754 [(match_operand:VI48_AVX512VL 1 "register_operand" "v")
2755 (match_operand:VI48_AVX512VL 2 "nonimmediate_operand" "vm")
d2ff59d6 2756 (match_operand:SI 3 "const_0_to_7_operand" "n")]
2757 UNSPEC_UNSIGNED_PCMP))]
2758 "TARGET_AVX512F"
c3d9b089 2759 "vpcmpu<ssemodesuffix>\t{%3, %2, %1, %0<mask_scalar_merge_operand4>|%0<mask_scalar_merge_operand4>, %1, %2, %3}"
d2ff59d6 2760 [(set_attr "type" "ssecmp")
2761 (set_attr "length_immediate" "1")
2762 (set_attr "prefix" "evex")
2763 (set_attr "mode" "<sseinsnmode>")])
2764
dbfe84d5 2765(define_insn "avx512f_vmcmp<mode>3<round_saeonly_name>"
a31e7f46 2766 [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=Yk")
2344eae2 2767 (and:<avx512fmaskmode>
2768 (unspec:<avx512fmaskmode>
2769 [(match_operand:VF_128 1 "register_operand" "v")
dbfe84d5 2770 (match_operand:VF_128 2 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")
2344eae2 2771 (match_operand:SI 3 "const_0_to_31_operand" "n")]
2772 UNSPEC_PCMP)
2773 (const_int 1)))]
2774 "TARGET_AVX512F"
dbfe84d5 2775 "vcmp<ssescalarmodesuffix>\t{%3, <round_saeonly_op4>%2, %1, %0|%0, %1, %2<round_saeonly_op4>, %3}"
2344eae2 2776 [(set_attr "type" "ssecmp")
2777 (set_attr "length_immediate" "1")
2778 (set_attr "prefix" "evex")
2779 (set_attr "mode" "<ssescalarmode>")])
2780
dbfe84d5 2781(define_insn "avx512f_vmcmp<mode>3_mask<round_saeonly_name>"
a31e7f46 2782 [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=Yk")
5220cab6 2783 (and:<avx512fmaskmode>
2784 (unspec:<avx512fmaskmode>
2785 [(match_operand:VF_128 1 "register_operand" "v")
dbfe84d5 2786 (match_operand:VF_128 2 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")
5220cab6 2787 (match_operand:SI 3 "const_0_to_31_operand" "n")]
2788 UNSPEC_PCMP)
2789 (and:<avx512fmaskmode>
a31e7f46 2790 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")
5220cab6 2791 (const_int 1))))]
2792 "TARGET_AVX512F"
dbfe84d5 2793 "vcmp<ssescalarmodesuffix>\t{%3, <round_saeonly_op5>%2, %1, %0%{%4%}|%0%{%4%}, %1, %2<round_saeonly_op5>, %3}"
5220cab6 2794 [(set_attr "type" "ssecmp")
2795 (set_attr "length_immediate" "1")
2796 (set_attr "prefix" "evex")
2797 (set_attr "mode" "<ssescalarmode>")])
2798
2344eae2 2799(define_insn "avx512f_maskcmp<mode>3"
a31e7f46 2800 [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=Yk")
2344eae2 2801 (match_operator:<avx512fmaskmode> 3 "sse_comparison_operator"
2802 [(match_operand:VF 1 "register_operand" "v")
2803 (match_operand:VF 2 "nonimmediate_operand" "vm")]))]
2804 "TARGET_SSE"
2805 "vcmp%D3<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
2806 [(set_attr "type" "ssecmp")
2807 (set_attr "length_immediate" "1")
2808 (set_attr "prefix" "evex")
2809 (set_attr "mode" "<sseinsnmode>")])
2810
dbfe84d5 2811(define_insn "<sse>_comi<round_saeonly_name>"
5802c0cb 2812 [(set (reg:CCFP FLAGS_REG)
2813 (compare:CCFP
2a466fea 2814 (vec_select:MODEF
4c1099de 2815 (match_operand:<ssevecmode> 0 "register_operand" "v")
5802c0cb 2816 (parallel [(const_int 0)]))
2a466fea 2817 (vec_select:MODEF
dbfe84d5 2818 (match_operand:<ssevecmode> 1 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")
5802c0cb 2819 (parallel [(const_int 0)]))))]
2a466fea 2820 "SSE_FLOAT_MODE_P (<MODE>mode)"
dbfe84d5 2821 "%vcomi<ssemodesuffix>\t{<round_saeonly_op2>%1, %0|%0, %<iptr>1<round_saeonly_op2>}"
5802c0cb 2822 [(set_attr "type" "ssecomi")
ed30e0a6 2823 (set_attr "prefix" "maybe_vex")
00a0e418 2824 (set_attr "prefix_rep" "0")
2825 (set (attr "prefix_data16")
2826 (if_then_else (eq_attr "mode" "DF")
2827 (const_string "1")
2828 (const_string "0")))
2a466fea 2829 (set_attr "mode" "<MODE>")])
5802c0cb 2830
dbfe84d5 2831(define_insn "<sse>_ucomi<round_saeonly_name>"
5802c0cb 2832 [(set (reg:CCFPU FLAGS_REG)
2833 (compare:CCFPU
2a466fea 2834 (vec_select:MODEF
4c1099de 2835 (match_operand:<ssevecmode> 0 "register_operand" "v")
5802c0cb 2836 (parallel [(const_int 0)]))
2a466fea 2837 (vec_select:MODEF
dbfe84d5 2838 (match_operand:<ssevecmode> 1 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")
5802c0cb 2839 (parallel [(const_int 0)]))))]
2a466fea 2840 "SSE_FLOAT_MODE_P (<MODE>mode)"
dbfe84d5 2841 "%vucomi<ssemodesuffix>\t{<round_saeonly_op2>%1, %0|%0, %<iptr>1<round_saeonly_op2>}"
5802c0cb 2842 [(set_attr "type" "ssecomi")
ed30e0a6 2843 (set_attr "prefix" "maybe_vex")
00a0e418 2844 (set_attr "prefix_rep" "0")
2845 (set (attr "prefix_data16")
2846 (if_then_else (eq_attr "mode" "DF")
2847 (const_string "1")
2848 (const_string "0")))
2a466fea 2849 (set_attr "mode" "<MODE>")])
5802c0cb 2850
dab48979 2851(define_expand "vec_cmp<mode><avx512fmaskmodelower>"
2852 [(set (match_operand:<avx512fmaskmode> 0 "register_operand")
2853 (match_operator:<avx512fmaskmode> 1 ""
2854 [(match_operand:V48_AVX512VL 2 "register_operand")
2855 (match_operand:V48_AVX512VL 3 "nonimmediate_operand")]))]
2856 "TARGET_AVX512F"
2857{
2858 bool ok = ix86_expand_mask_vec_cmp (operands);
2859 gcc_assert (ok);
2860 DONE;
2861})
2862
2863(define_expand "vec_cmp<mode><avx512fmaskmodelower>"
2864 [(set (match_operand:<avx512fmaskmode> 0 "register_operand")
2865 (match_operator:<avx512fmaskmode> 1 ""
2866 [(match_operand:VI12_AVX512VL 2 "register_operand")
2867 (match_operand:VI12_AVX512VL 3 "nonimmediate_operand")]))]
2868 "TARGET_AVX512BW"
2869{
2870 bool ok = ix86_expand_mask_vec_cmp (operands);
2871 gcc_assert (ok);
2872 DONE;
2873})
2874
2875(define_expand "vec_cmp<mode><sseintvecmodelower>"
2876 [(set (match_operand:<sseintvecmode> 0 "register_operand")
2877 (match_operator:<sseintvecmode> 1 ""
2878 [(match_operand:VI_256 2 "register_operand")
2879 (match_operand:VI_256 3 "nonimmediate_operand")]))]
2880 "TARGET_AVX2"
2881{
2882 bool ok = ix86_expand_int_vec_cmp (operands);
2883 gcc_assert (ok);
2884 DONE;
2885})
2886
2887(define_expand "vec_cmp<mode><sseintvecmodelower>"
2888 [(set (match_operand:<sseintvecmode> 0 "register_operand")
2889 (match_operator:<sseintvecmode> 1 ""
2890 [(match_operand:VI124_128 2 "register_operand")
2891 (match_operand:VI124_128 3 "nonimmediate_operand")]))]
2892 "TARGET_SSE2"
2893{
2894 bool ok = ix86_expand_int_vec_cmp (operands);
2895 gcc_assert (ok);
2896 DONE;
2897})
2898
2899(define_expand "vec_cmpv2div2di"
2900 [(set (match_operand:V2DI 0 "register_operand")
2901 (match_operator:V2DI 1 ""
2902 [(match_operand:V2DI 2 "register_operand")
2903 (match_operand:V2DI 3 "nonimmediate_operand")]))]
2904 "TARGET_SSE4_2"
2905{
2906 bool ok = ix86_expand_int_vec_cmp (operands);
2907 gcc_assert (ok);
2908 DONE;
2909})
2910
2911(define_expand "vec_cmp<mode><sseintvecmodelower>"
2912 [(set (match_operand:<sseintvecmode> 0 "register_operand")
2913 (match_operator:<sseintvecmode> 1 ""
2914 [(match_operand:VF_256 2 "register_operand")
2915 (match_operand:VF_256 3 "nonimmediate_operand")]))]
2916 "TARGET_AVX"
2917{
2918 bool ok = ix86_expand_fp_vec_cmp (operands);
2919 gcc_assert (ok);
2920 DONE;
2921})
2922
2923(define_expand "vec_cmp<mode><sseintvecmodelower>"
2924 [(set (match_operand:<sseintvecmode> 0 "register_operand")
2925 (match_operator:<sseintvecmode> 1 ""
2926 [(match_operand:VF_128 2 "register_operand")
2927 (match_operand:VF_128 3 "nonimmediate_operand")]))]
2928 "TARGET_SSE"
2929{
2930 bool ok = ix86_expand_fp_vec_cmp (operands);
2931 gcc_assert (ok);
2932 DONE;
2933})
2934
2935(define_expand "vec_cmpu<mode><avx512fmaskmodelower>"
2936 [(set (match_operand:<avx512fmaskmode> 0 "register_operand")
2937 (match_operator:<avx512fmaskmode> 1 ""
2938 [(match_operand:VI48_AVX512VL 2 "register_operand")
2939 (match_operand:VI48_AVX512VL 3 "nonimmediate_operand")]))]
2940 "TARGET_AVX512F"
2941{
2942 bool ok = ix86_expand_mask_vec_cmp (operands);
2943 gcc_assert (ok);
2944 DONE;
2945})
2946
2947(define_expand "vec_cmpu<mode><avx512fmaskmodelower>"
2948 [(set (match_operand:<avx512fmaskmode> 0 "register_operand")
2949 (match_operator:<avx512fmaskmode> 1 ""
2950 [(match_operand:VI12_AVX512VL 2 "register_operand")
2951 (match_operand:VI12_AVX512VL 3 "nonimmediate_operand")]))]
2952 "TARGET_AVX512BW"
2953{
2954 bool ok = ix86_expand_mask_vec_cmp (operands);
2955 gcc_assert (ok);
2956 DONE;
2957})
2958
2959(define_expand "vec_cmpu<mode><sseintvecmodelower>"
2960 [(set (match_operand:<sseintvecmode> 0 "register_operand")
2961 (match_operator:<sseintvecmode> 1 ""
2962 [(match_operand:VI_256 2 "register_operand")
2963 (match_operand:VI_256 3 "nonimmediate_operand")]))]
2964 "TARGET_AVX2"
2965{
2966 bool ok = ix86_expand_int_vec_cmp (operands);
2967 gcc_assert (ok);
2968 DONE;
2969})
2970
2971(define_expand "vec_cmpu<mode><sseintvecmodelower>"
2972 [(set (match_operand:<sseintvecmode> 0 "register_operand")
2973 (match_operator:<sseintvecmode> 1 ""
2974 [(match_operand:VI124_128 2 "register_operand")
2975 (match_operand:VI124_128 3 "nonimmediate_operand")]))]
2976 "TARGET_SSE2"
2977{
2978 bool ok = ix86_expand_int_vec_cmp (operands);
2979 gcc_assert (ok);
2980 DONE;
2981})
2982
2983(define_expand "vec_cmpuv2div2di"
2984 [(set (match_operand:V2DI 0 "register_operand")
2985 (match_operator:V2DI 1 ""
2986 [(match_operand:V2DI 2 "register_operand")
2987 (match_operand:V2DI 3 "nonimmediate_operand")]))]
2988 "TARGET_SSE4_2"
2989{
2990 bool ok = ix86_expand_int_vec_cmp (operands);
2991 gcc_assert (ok);
2992 DONE;
2993})
2994
f23a3158 2995(define_expand "vcond<V_512:mode><VF_512:mode>"
2996 [(set (match_operand:V_512 0 "register_operand")
2997 (if_then_else:V_512
2998 (match_operator 3 ""
2999 [(match_operand:VF_512 4 "nonimmediate_operand")
3000 (match_operand:VF_512 5 "nonimmediate_operand")])
3001 (match_operand:V_512 1 "general_operand")
3002 (match_operand:V_512 2 "general_operand")))]
3003 "TARGET_AVX512F
3004 && (GET_MODE_NUNITS (<V_512:MODE>mode)
3005 == GET_MODE_NUNITS (<VF_512:MODE>mode))"
3006{
3007 bool ok = ix86_expand_fp_vcond (operands);
3008 gcc_assert (ok);
3009 DONE;
3010})
3011
d6b19f6b 3012(define_expand "vcond<V_256:mode><VF_256:mode>"
abd4f58b 3013 [(set (match_operand:V_256 0 "register_operand")
d6b19f6b 3014 (if_then_else:V_256
5deb404d 3015 (match_operator 3 ""
abd4f58b 3016 [(match_operand:VF_256 4 "nonimmediate_operand")
3017 (match_operand:VF_256 5 "nonimmediate_operand")])
3018 (match_operand:V_256 1 "general_operand")
3019 (match_operand:V_256 2 "general_operand")))]
d6b19f6b 3020 "TARGET_AVX
3021 && (GET_MODE_NUNITS (<V_256:MODE>mode)
3022 == GET_MODE_NUNITS (<VF_256:MODE>mode))"
3023{
3024 bool ok = ix86_expand_fp_vcond (operands);
3025 gcc_assert (ok);
3026 DONE;
3027})
3028
3029(define_expand "vcond<V_128:mode><VF_128:mode>"
abd4f58b 3030 [(set (match_operand:V_128 0 "register_operand")
d6b19f6b 3031 (if_then_else:V_128
3032 (match_operator 3 ""
abd4f58b 3033 [(match_operand:VF_128 4 "nonimmediate_operand")
3034 (match_operand:VF_128 5 "nonimmediate_operand")])
3035 (match_operand:V_128 1 "general_operand")
3036 (match_operand:V_128 2 "general_operand")))]
d6b19f6b 3037 "TARGET_SSE
3038 && (GET_MODE_NUNITS (<V_128:MODE>mode)
3039 == GET_MODE_NUNITS (<VF_128:MODE>mode))"
76405cce 3040{
17e313b0 3041 bool ok = ix86_expand_fp_vcond (operands);
3042 gcc_assert (ok);
3043 DONE;
76405cce 3044})
3045
98da9bbe 3046(define_expand "vcond_mask_<mode><avx512fmaskmodelower>"
3047 [(set (match_operand:V48_AVX512VL 0 "register_operand")
3048 (vec_merge:V48_AVX512VL
3049 (match_operand:V48_AVX512VL 1 "nonimmediate_operand")
3050 (match_operand:V48_AVX512VL 2 "vector_move_operand")
3051 (match_operand:<avx512fmaskmode> 3 "register_operand")))]
3052 "TARGET_AVX512F")
3053
3054(define_expand "vcond_mask_<mode><avx512fmaskmodelower>"
3055 [(set (match_operand:VI12_AVX512VL 0 "register_operand")
3056 (vec_merge:VI12_AVX512VL
3057 (match_operand:VI12_AVX512VL 1 "nonimmediate_operand")
3058 (match_operand:VI12_AVX512VL 2 "vector_move_operand")
3059 (match_operand:<avx512fmaskmode> 3 "register_operand")))]
3060 "TARGET_AVX512BW")
3061
3062(define_expand "vcond_mask_<mode><sseintvecmodelower>"
3063 [(set (match_operand:VI_256 0 "register_operand")
3064 (vec_merge:VI_256
3065 (match_operand:VI_256 1 "nonimmediate_operand")
3066 (match_operand:VI_256 2 "vector_move_operand")
3067 (match_operand:<sseintvecmode> 3 "register_operand")))]
3068 "TARGET_AVX2"
3069{
3070 ix86_expand_sse_movcc (operands[0], operands[3],
3071 operands[1], operands[2]);
3072 DONE;
3073})
3074
3075(define_expand "vcond_mask_<mode><sseintvecmodelower>"
3076 [(set (match_operand:VI124_128 0 "register_operand")
3077 (vec_merge:VI124_128
3078 (match_operand:VI124_128 1 "nonimmediate_operand")
3079 (match_operand:VI124_128 2 "vector_move_operand")
3080 (match_operand:<sseintvecmode> 3 "register_operand")))]
3081 "TARGET_SSE2"
3082{
3083 ix86_expand_sse_movcc (operands[0], operands[3],
3084 operands[1], operands[2]);
3085 DONE;
3086})
3087
3088(define_expand "vcond_mask_v2div2di"
3089 [(set (match_operand:V2DI 0 "register_operand")
3090 (vec_merge:V2DI
3091 (match_operand:V2DI 1 "nonimmediate_operand")
3092 (match_operand:V2DI 2 "vector_move_operand")
3093 (match_operand:V2DI 3 "register_operand")))]
3094 "TARGET_SSE4_2"
3095{
3096 ix86_expand_sse_movcc (operands[0], operands[3],
3097 operands[1], operands[2]);
3098 DONE;
3099})
3100
3101(define_expand "vcond_mask_<mode><sseintvecmodelower>"
3102 [(set (match_operand:VF_256 0 "register_operand")
3103 (vec_merge:VF_256
3104 (match_operand:VF_256 1 "nonimmediate_operand")
3105 (match_operand:VF_256 2 "vector_move_operand")
3106 (match_operand:<sseintvecmode> 3 "register_operand")))]
3107 "TARGET_AVX"
3108{
3109 ix86_expand_sse_movcc (operands[0], operands[3],
3110 operands[1], operands[2]);
3111 DONE;
3112})
3113
3114(define_expand "vcond_mask_<mode><sseintvecmodelower>"
3115 [(set (match_operand:VF_128 0 "register_operand")
3116 (vec_merge:VF_128
3117 (match_operand:VF_128 1 "nonimmediate_operand")
3118 (match_operand:VF_128 2 "vector_move_operand")
3119 (match_operand:<sseintvecmode> 3 "register_operand")))]
3120 "TARGET_SSE"
3121{
3122 ix86_expand_sse_movcc (operands[0], operands[3],
3123 operands[1], operands[2]);
3124 DONE;
3125})
3126
5802c0cb 3127;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
3128;;
2a466fea 3129;; Parallel floating point logical operations
5802c0cb 3130;;
3131;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
3132
0607f34b 3133(define_insn "<sse>_andnot<mode>3<mask_name>"
3134 [(set (match_operand:VF_128_256 0 "register_operand" "=x,v")
3135 (and:VF_128_256
3136 (not:VF_128_256
3137 (match_operand:VF_128_256 1 "register_operand" "0,v"))
3138 (match_operand:VF_128_256 2 "nonimmediate_operand" "xm,vm")))]
3139 "TARGET_SSE && <mask_avx512vl_condition>"
887423c0 3140{
0607f34b 3141 static char buf[128];
596112aa 3142 const char *ops;
3143 const char *suffix;
3144
3145 switch (get_attr_mode (insn))
3146 {
3147 case MODE_V8SF:
3148 case MODE_V4SF:
3149 suffix = "ps";
3150 break;
3151 default:
3152 suffix = "<ssemodesuffix>";
3153 }
5802c0cb 3154
887423c0 3155 switch (which_alternative)
3156 {
3157 case 0:
596112aa 3158 ops = "andn%s\t{%%2, %%0|%%0, %%2}";
887423c0 3159 break;
3160 case 1:
0607f34b 3161 ops = "vandn%s\t{%%2, %%1, %%0<mask_operand3_1>|%%0<mask_operand3_1>, %%1, %%2}";
887423c0 3162 break;
3163 default:
3164 gcc_unreachable ();
3165 }
ed30e0a6 3166
0607f34b 3167 /* There is no vandnp[sd] in avx512f. Use vpandn[qd]. */
3168 if (<mask_applied> && !TARGET_AVX512DQ)
6a3f5f59 3169 {
0607f34b 3170 suffix = GET_MODE_INNER (<MODE>mode) == DFmode ? "q" : "d";
3171 ops = "vpandn%s\t{%%2, %%1, %%0<mask_operand3_1>|%%0<mask_operand3_1>, %%1, %%2}";
6a3f5f59 3172 }
3173
596112aa 3174 snprintf (buf, sizeof (buf), ops, suffix);
887423c0 3175 return buf;
6fc76bb0 3176}
887423c0 3177 [(set_attr "isa" "noavx,avx")
3178 (set_attr "type" "sselog")
6a3f5f59 3179 (set_attr "prefix" "orig,maybe_evex")
596112aa 3180 (set (attr "mode")
7d460314 3181 (cond [(and (match_test "<MODE_SIZE> == 16")
3182 (match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL"))
596112aa 3183 (const_string "<ssePSmode>")
3184 (match_test "TARGET_AVX")
3185 (const_string "<MODE>")
3186 (match_test "optimize_function_for_size_p (cfun)")
3187 (const_string "V4SF")
3188 ]
3189 (const_string "<MODE>")))])
ed30e0a6 3190
0607f34b 3191
3192(define_insn "<sse>_andnot<mode>3<mask_name>"
3193 [(set (match_operand:VF_512 0 "register_operand" "=v")
3194 (and:VF_512
3195 (not:VF_512
3196 (match_operand:VF_512 1 "register_operand" "v"))
3197 (match_operand:VF_512 2 "nonimmediate_operand" "vm")))]
3198 "TARGET_AVX512F"
3199{
3200 static char buf[128];
3201 const char *ops;
3202 const char *suffix;
3203
3204 suffix = "<ssemodesuffix>";
3205 ops = "";
3206
3207 /* There is no vandnp[sd] in avx512f. Use vpandn[qd]. */
3208 if (!TARGET_AVX512DQ)
3209 {
3210 suffix = GET_MODE_INNER (<MODE>mode) == DFmode ? "q" : "d";
3211 ops = "p";
3212 }
3213
3214 snprintf (buf, sizeof (buf),
3215 "v%sandn%s\t{%%2, %%1, %%0<mask_operand3_1>|%%0<mask_operand3_1>, %%1, %%2}",
3216 ops, suffix);
3217 return buf;
3218}
3219 [(set_attr "type" "sselog")
3220 (set_attr "prefix" "evex")
3221 (set_attr "mode" "<sseinsnmode>")])
3222
3223(define_expand "<code><mode>3<mask_name>"
6a3f5f59 3224 [(set (match_operand:VF_128_256 0 "register_operand")
0607f34b 3225 (any_logic:VF_128_256
3226 (match_operand:VF_128_256 1 "nonimmediate_operand")
3227 (match_operand:VF_128_256 2 "nonimmediate_operand")))]
3228 "TARGET_SSE && <mask_avx512vl_condition>"
b6bc2701 3229 "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
5802c0cb 3230
0607f34b 3231(define_expand "<code><mode>3<mask_name>"
6a3f5f59 3232 [(set (match_operand:VF_512 0 "register_operand")
0607f34b 3233 (any_logic:VF_512
6a3f5f59 3234 (match_operand:VF_512 1 "nonimmediate_operand")
3235 (match_operand:VF_512 2 "nonimmediate_operand")))]
3236 "TARGET_AVX512F"
3237 "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
3238
0607f34b 3239(define_insn "*<code><mode>3<mask_name>"
3240 [(set (match_operand:VF_128_256 0 "register_operand" "=x,v")
3241 (any_logic:VF_128_256
3242 (match_operand:VF_128_256 1 "nonimmediate_operand" "%0,v")
3243 (match_operand:VF_128_256 2 "nonimmediate_operand" "xm,vm")))]
3244 "TARGET_SSE && <mask_avx512vl_condition>
3245 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
6fc76bb0 3246{
0607f34b 3247 static char buf[128];
596112aa 3248 const char *ops;
3249 const char *suffix;
3250
3251 switch (get_attr_mode (insn))
3252 {
3253 case MODE_V8SF:
3254 case MODE_V4SF:
3255 suffix = "ps";
3256 break;
3257 default:
3258 suffix = "<ssemodesuffix>";
3259 }
887423c0 3260
3261 switch (which_alternative)
3262 {
3263 case 0:
596112aa 3264 ops = "<logic>%s\t{%%2, %%0|%%0, %%2}";
887423c0 3265 break;
3266 case 1:
0607f34b 3267 ops = "v<logic>%s\t{%%2, %%1, %%0<mask_operand3_1>|%%0<mask_operand3_1>, %%1, %%2}";
887423c0 3268 break;
3269 default:
3270 gcc_unreachable ();
3271 }
3272
0607f34b 3273 /* There is no v<logic>p[sd] in avx512f. Use vp<logic>[dq]. */
3274 if (<mask_applied> && !TARGET_AVX512DQ)
6a3f5f59 3275 {
0607f34b 3276 suffix = GET_MODE_INNER (<MODE>mode) == DFmode ? "q" : "d";
3277 ops = "vp<logic>%s\t{%%2, %%1, %%0<mask_operand3_1>|%%0<mask_operand3_1>, %%1, %%2}";
6a3f5f59 3278 }
3279
596112aa 3280 snprintf (buf, sizeof (buf), ops, suffix);
887423c0 3281 return buf;
6fc76bb0 3282}
887423c0 3283 [(set_attr "isa" "noavx,avx")
3284 (set_attr "type" "sselog")
6a3f5f59 3285 (set_attr "prefix" "orig,maybe_evex")
596112aa 3286 (set (attr "mode")
7d460314 3287 (cond [(and (match_test "<MODE_SIZE> == 16")
3288 (match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL"))
596112aa 3289 (const_string "<ssePSmode>")
3290 (match_test "TARGET_AVX")
3291 (const_string "<MODE>")
3292 (match_test "optimize_function_for_size_p (cfun)")
3293 (const_string "V4SF")
3294 ]
3295 (const_string "<MODE>")))])
5802c0cb 3296
0607f34b 3297(define_insn "*<code><mode>3<mask_name>"
3298 [(set (match_operand:VF_512 0 "register_operand" "=v")
3299 (any_logic:VF_512
3300 (match_operand:VF_512 1 "nonimmediate_operand" "%v")
3301 (match_operand:VF_512 2 "nonimmediate_operand" "vm")))]
3302 "TARGET_AVX512F && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
3303{
3304 static char buf[128];
3305 const char *ops;
3306 const char *suffix;
3307
3308 suffix = "<ssemodesuffix>";
3309 ops = "";
3310
3311 /* There is no v<logic>p[sd] in avx512f. Use vp<logic>[dq]. */
3312 if ((<MODE_SIZE> == 64 || <mask_applied>) && !TARGET_AVX512DQ)
3313 {
3314 suffix = GET_MODE_INNER (<MODE>mode) == DFmode ? "q" : "d";
3315 ops = "p";
3316 }
3317
3318 snprintf (buf, sizeof (buf),
3319 "v%s<logic>%s\t{%%2, %%1, %%0<mask_operand3_1>|%%0<mask_operand3_1>, %%1, %%2}",
3320 ops, suffix);
3321 return buf;
3322}
3323 [(set_attr "type" "sselog")
3324 (set_attr "prefix" "evex")
3325 (set_attr "mode" "<sseinsnmode>")])
3326
3d86078b 3327(define_expand "copysign<mode>3"
ddb24cdb 3328 [(set (match_dup 4)
887423c0 3329 (and:VF
3330 (not:VF (match_dup 3))
abd4f58b 3331 (match_operand:VF 1 "nonimmediate_operand")))
ddb24cdb 3332 (set (match_dup 5)
887423c0 3333 (and:VF (match_dup 3)
abd4f58b 3334 (match_operand:VF 2 "nonimmediate_operand")))
3335 (set (match_operand:VF 0 "register_operand")
887423c0 3336 (ior:VF (match_dup 4) (match_dup 5)))]
6fe5844b 3337 "TARGET_SSE"
3d86078b 3338{
8cedf886 3339 operands[3] = ix86_build_signbit_mask (<MODE>mode, 1, 0);
07ddf75c 3340
ddb24cdb 3341 operands[4] = gen_reg_rtx (<MODE>mode);
07ddf75c 3342 operands[5] = gen_reg_rtx (<MODE>mode);
3d86078b 3343})
3344
8d1e0693 3345;; Also define scalar versions. These are used for abs, neg, and
cea27bec 3346;; conditional move. Using subregs into vector modes causes register
8d1e0693 3347;; allocation lossage. These patterns do not allow memory operands
3348;; because the native instructions read the full 128-bits.
3349
841985a7 3350(define_insn "*andnot<mode>3"
887423c0 3351 [(set (match_operand:MODEF 0 "register_operand" "=x,x")
2a466fea 3352 (and:MODEF
3353 (not:MODEF
887423c0 3354 (match_operand:MODEF 1 "register_operand" "0,x"))
3355 (match_operand:MODEF 2 "register_operand" "x,x")))]
2a466fea 3356 "SSE_FLOAT_MODE_P (<MODE>mode)"
6fc76bb0 3357{
887423c0 3358 static char buf[32];
596112aa 3359 const char *ops;
887423c0 3360 const char *suffix
596112aa 3361 = (get_attr_mode (insn) == MODE_V4SF) ? "ps" : "<ssevecmodesuffix>";
3362
3363 switch (which_alternative)
3364 {
3365 case 0:
3366 ops = "andn%s\t{%%2, %%0|%%0, %%2}";
3367 break;
3368 case 1:
3369 ops = "vandn%s\t{%%2, %%1, %%0|%%0, %%1, %%2}";
3370 break;
3371 default:
3372 gcc_unreachable ();
3373 }
3374
3375 snprintf (buf, sizeof (buf), ops, suffix);
3376 return buf;
3377}
3378 [(set_attr "isa" "noavx,avx")
3379 (set_attr "type" "sselog")
3380 (set_attr "prefix" "orig,vex")
3381 (set (attr "mode")
7d460314 3382 (cond [(and (match_test "<MODE_SIZE> == 16")
3383 (match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL"))
596112aa 3384 (const_string "V4SF")
3385 (match_test "TARGET_AVX")
3386 (const_string "<ssevecmode>")
3387 (match_test "optimize_function_for_size_p (cfun)")
3388 (const_string "V4SF")
3389 ]
3390 (const_string "<ssevecmode>")))])
3391
3392(define_insn "*andnottf3"
3393 [(set (match_operand:TF 0 "register_operand" "=x,x")
3394 (and:TF
3395 (not:TF (match_operand:TF 1 "register_operand" "0,x"))
3396 (match_operand:TF 2 "nonimmediate_operand" "xm,xm")))]
3397 "TARGET_SSE"
3398{
3399 static char buf[32];
3400 const char *ops;
3401 const char *tmp
3402 = (get_attr_mode (insn) == MODE_V4SF) ? "andnps" : "pandn";
887423c0 3403
3404 switch (which_alternative)
3405 {
3406 case 0:
596112aa 3407 ops = "%s\t{%%2, %%0|%%0, %%2}";
887423c0 3408 break;
3409 case 1:
596112aa 3410 ops = "v%s\t{%%2, %%1, %%0|%%0, %%1, %%2}";
887423c0 3411 break;
3412 default:
3413 gcc_unreachable ();
3414 }
3415
596112aa 3416 snprintf (buf, sizeof (buf), ops, tmp);
887423c0 3417 return buf;
6fc76bb0 3418}
887423c0 3419 [(set_attr "isa" "noavx,avx")
3420 (set_attr "type" "sselog")
596112aa 3421 (set (attr "prefix_data16")
3422 (if_then_else
3423 (and (eq_attr "alternative" "0")
3424 (eq_attr "mode" "TI"))
3425 (const_string "1")
3426 (const_string "*")))
887423c0 3427 (set_attr "prefix" "orig,vex")
596112aa 3428 (set (attr "mode")
3429 (cond [(match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL")
3430 (const_string "V4SF")
3431 (match_test "TARGET_AVX")
3432 (const_string "TI")
3433 (ior (not (match_test "TARGET_SSE2"))
3434 (match_test "optimize_function_for_size_p (cfun)"))
3435 (const_string "V4SF")
3436 ]
3437 (const_string "TI")))])
ed30e0a6 3438
b6bc2701 3439(define_insn "*<code><mode>3"
887423c0 3440 [(set (match_operand:MODEF 0 "register_operand" "=x,x")
5acb11ef 3441 (any_logic:MODEF
887423c0 3442 (match_operand:MODEF 1 "register_operand" "%0,x")
3443 (match_operand:MODEF 2 "register_operand" "x,x")))]
2a466fea 3444 "SSE_FLOAT_MODE_P (<MODE>mode)"
6fc76bb0 3445{
887423c0 3446 static char buf[32];
596112aa 3447 const char *ops;
887423c0 3448 const char *suffix
596112aa 3449 = (get_attr_mode (insn) == MODE_V4SF) ? "ps" : "<ssevecmodesuffix>";
3450
3451 switch (which_alternative)
3452 {
3453 case 0:
3454 ops = "<logic>%s\t{%%2, %%0|%%0, %%2}";
3455 break;
3456 case 1:
3457 ops = "v<logic>%s\t{%%2, %%1, %%0|%%0, %%1, %%2}";
3458 break;
3459 default:
3460 gcc_unreachable ();
3461 }
3462
3463 snprintf (buf, sizeof (buf), ops, suffix);
3464 return buf;
3465}
3466 [(set_attr "isa" "noavx,avx")
3467 (set_attr "type" "sselog")
3468 (set_attr "prefix" "orig,vex")
3469 (set (attr "mode")
7d460314 3470 (cond [(and (match_test "<MODE_SIZE> == 16")
3471 (match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL"))
596112aa 3472 (const_string "V4SF")
3473 (match_test "TARGET_AVX")
3474 (const_string "<ssevecmode>")
3475 (match_test "optimize_function_for_size_p (cfun)")
3476 (const_string "V4SF")
3477 ]
3478 (const_string "<ssevecmode>")))])
3479
3480(define_expand "<code>tf3"
3481 [(set (match_operand:TF 0 "register_operand")
3482 (any_logic:TF
3483 (match_operand:TF 1 "nonimmediate_operand")
3484 (match_operand:TF 2 "nonimmediate_operand")))]
3485 "TARGET_SSE"
3486 "ix86_fixup_binary_operands_no_copy (<CODE>, TFmode, operands);")
3487
3488(define_insn "*<code>tf3"
3489 [(set (match_operand:TF 0 "register_operand" "=x,x")
3490 (any_logic:TF
3491 (match_operand:TF 1 "nonimmediate_operand" "%0,x")
3492 (match_operand:TF 2 "nonimmediate_operand" "xm,xm")))]
3493 "TARGET_SSE
3494 && ix86_binary_operator_ok (<CODE>, TFmode, operands)"
3495{
3496 static char buf[32];
3497 const char *ops;
3498 const char *tmp
3499 = (get_attr_mode (insn) == MODE_V4SF) ? "<logic>ps" : "p<logic>";
887423c0 3500
3501 switch (which_alternative)
3502 {
3503 case 0:
596112aa 3504 ops = "%s\t{%%2, %%0|%%0, %%2}";
887423c0 3505 break;
3506 case 1:
596112aa 3507 ops = "v%s\t{%%2, %%1, %%0|%%0, %%1, %%2}";
887423c0 3508 break;
3509 default:
3510 gcc_unreachable ();
3511 }
3512
596112aa 3513 snprintf (buf, sizeof (buf), ops, tmp);
887423c0 3514 return buf;
6fc76bb0 3515}
887423c0 3516 [(set_attr "isa" "noavx,avx")
3517 (set_attr "type" "sselog")
596112aa 3518 (set (attr "prefix_data16")
3519 (if_then_else
3520 (and (eq_attr "alternative" "0")
3521 (eq_attr "mode" "TI"))
3522 (const_string "1")
3523 (const_string "*")))
887423c0 3524 (set_attr "prefix" "orig,vex")
596112aa 3525 (set (attr "mode")
3526 (cond [(match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL")
3527 (const_string "V4SF")
3528 (match_test "TARGET_AVX")
3529 (const_string "TI")
3530 (ior (not (match_test "TARGET_SSE2"))
3531 (match_test "optimize_function_for_size_p (cfun)"))
3532 (const_string "V4SF")
3533 ]
3534 (const_string "TI")))])
8d1e0693 3535
2f212aae 3536;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
3537;;
35811e21 3538;; FMA floating point multiply/accumulate instructions. These include
3539;; scalar versions of the instructions as well as vector versions.
2f212aae 3540;;
3541;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
3542
21a6219e 3543;; The standard names for scalar FMA are only available with SSE math enabled.
c298e021 3544;; CPUID bit AVX512F enables evex encoded scalar and 512-bit fma. It doesn't
3545;; care about FMA bit, so we enable fma for TARGET_AVX512F even when TARGET_FMA
3546;; and TARGET_FMA4 are both false.
3547;; TODO: In theory AVX512F does not automatically imply FMA, and without FMA
3548;; one must force the EVEX encoding of the fma insns. Ideally we'd improve
3549;; GAS to allow proper prefix selection. However, for the moment all hardware
3550;; that supports AVX512F also supports FMA so we can ignore this for now.
3551(define_mode_iterator FMAMODEM
3552 [(SF "TARGET_SSE_MATH && (TARGET_FMA || TARGET_FMA4 || TARGET_AVX512F)")
3553 (DF "TARGET_SSE_MATH && (TARGET_FMA || TARGET_FMA4 || TARGET_AVX512F)")
4f3da779 3554 (V4SF "TARGET_FMA || TARGET_FMA4 || TARGET_AVX512VL")
3555 (V2DF "TARGET_FMA || TARGET_FMA4 || TARGET_AVX512VL")
3556 (V8SF "TARGET_FMA || TARGET_FMA4 || TARGET_AVX512VL")
3557 (V4DF "TARGET_FMA || TARGET_FMA4 || TARGET_AVX512VL")
c298e021 3558 (V16SF "TARGET_AVX512F")
3559 (V8DF "TARGET_AVX512F")])
35811e21 3560
a2f9d5b3 3561(define_expand "fma<mode>4"
21a6219e 3562 [(set (match_operand:FMAMODEM 0 "register_operand")
3563 (fma:FMAMODEM
3564 (match_operand:FMAMODEM 1 "nonimmediate_operand")
3565 (match_operand:FMAMODEM 2 "nonimmediate_operand")
8211f5a2 3566 (match_operand:FMAMODEM 3 "nonimmediate_operand")))])
5e2b6fd0 3567
b9be572e 3568(define_expand "fms<mode>4"
21a6219e 3569 [(set (match_operand:FMAMODEM 0 "register_operand")
3570 (fma:FMAMODEM
3571 (match_operand:FMAMODEM 1 "nonimmediate_operand")
3572 (match_operand:FMAMODEM 2 "nonimmediate_operand")
8211f5a2 3573 (neg:FMAMODEM (match_operand:FMAMODEM 3 "nonimmediate_operand"))))])
b9be572e 3574
3575(define_expand "fnma<mode>4"
21a6219e 3576 [(set (match_operand:FMAMODEM 0 "register_operand")
3577 (fma:FMAMODEM
3578 (neg:FMAMODEM (match_operand:FMAMODEM 1 "nonimmediate_operand"))
3579 (match_operand:FMAMODEM 2 "nonimmediate_operand")
8211f5a2 3580 (match_operand:FMAMODEM 3 "nonimmediate_operand")))])
b9be572e 3581
3582(define_expand "fnms<mode>4"
21a6219e 3583 [(set (match_operand:FMAMODEM 0 "register_operand")
3584 (fma:FMAMODEM
3585 (neg:FMAMODEM (match_operand:FMAMODEM 1 "nonimmediate_operand"))
3586 (match_operand:FMAMODEM 2 "nonimmediate_operand")
8211f5a2 3587 (neg:FMAMODEM (match_operand:FMAMODEM 3 "nonimmediate_operand"))))])
21a6219e 3588
3589;; The builtins for intrinsics are not constrained by SSE math enabled.
4f3da779 3590(define_mode_iterator FMAMODE_AVX512
3591 [(SF "TARGET_FMA || TARGET_FMA4 || TARGET_AVX512F")
3592 (DF "TARGET_FMA || TARGET_FMA4 || TARGET_AVX512F")
3593 (V4SF "TARGET_FMA || TARGET_FMA4 || TARGET_AVX512VL")
3594 (V2DF "TARGET_FMA || TARGET_FMA4 || TARGET_AVX512VL")
3595 (V8SF "TARGET_FMA || TARGET_FMA4 || TARGET_AVX512VL")
3596 (V4DF "TARGET_FMA || TARGET_FMA4 || TARGET_AVX512VL")
3597 (V16SF "TARGET_AVX512F")
3598 (V8DF "TARGET_AVX512F")])
3599
8211f5a2 3600(define_mode_iterator FMAMODE
4f3da779 3601 [SF DF V4SF V2DF V8SF V4DF])
b9be572e 3602
5e2b6fd0 3603(define_expand "fma4i_fmadd_<mode>"
4f3da779 3604 [(set (match_operand:FMAMODE_AVX512 0 "register_operand")
3605 (fma:FMAMODE_AVX512
3606 (match_operand:FMAMODE_AVX512 1 "nonimmediate_operand")
3607 (match_operand:FMAMODE_AVX512 2 "nonimmediate_operand")
3608 (match_operand:FMAMODE_AVX512 3 "nonimmediate_operand")))])
3609
3610(define_expand "<avx512>_fmadd_<mode>_maskz<round_expand_name>"
3611 [(match_operand:VF_AVX512VL 0 "register_operand")
3612 (match_operand:VF_AVX512VL 1 "<round_expand_nimm_predicate>")
3613 (match_operand:VF_AVX512VL 2 "<round_expand_nimm_predicate>")
3614 (match_operand:VF_AVX512VL 3 "<round_expand_nimm_predicate>")
be60ab96 3615 (match_operand:<avx512fmaskmode> 4 "register_operand")]
4f3da779 3616 "TARGET_AVX512F && <round_mode512bit_condition>"
be60ab96 3617{
adf45678 3618 emit_insn (gen_fma_fmadd_<mode>_maskz_1<round_expand_name> (
be60ab96 3619 operands[0], operands[1], operands[2], operands[3],
adf45678 3620 CONST0_RTX (<MODE>mode), operands[4]<round_expand_operand>));
be60ab96 3621 DONE;
3622})
3623
4f3da779 3624(define_insn "*fma_fmadd_<mode>"
3625 [(set (match_operand:FMAMODE 0 "register_operand" "=v,v,v,x,x")
3626 (fma:FMAMODE
3627 (match_operand:FMAMODE 1 "nonimmediate_operand" "%0,0,v,x,x")
3628 (match_operand:FMAMODE 2 "nonimmediate_operand" "vm,v,vm,x,m")
3629 (match_operand:FMAMODE 3 "nonimmediate_operand" "v,vm,0,xm,x")))]
3630 "TARGET_FMA || TARGET_FMA4"
be60ab96 3631 "@
4f3da779 3632 vfmadd132<ssemodesuffix>\t{%2, %3, %0|%0, %3, %2}
3633 vfmadd213<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
3634 vfmadd231<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}
2743953b 3635 vfmadd<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}
3636 vfmadd<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
4f3da779 3637 [(set_attr "isa" "fma,fma,fma,fma4,fma4")
2743953b 3638 (set_attr "type" "ssemuladd")
35811e21 3639 (set_attr "mode" "<MODE>")])
3640
4f3da779 3641;; Suppose AVX-512F as baseline
3642(define_mode_iterator VF_SF_AVX512VL
3643 [SF V16SF (V8SF "TARGET_AVX512VL") (V4SF "TARGET_AVX512VL")
3644 DF V8DF (V4DF "TARGET_AVX512VL") (V2DF "TARGET_AVX512VL")])
3645
8211f5a2 3646(define_insn "<sd_mask_codefor>fma_fmadd_<mode><sd_maskz_name><round_name>"
4f3da779 3647 [(set (match_operand:VF_SF_AVX512VL 0 "register_operand" "=v,v,v")
3648 (fma:VF_SF_AVX512VL
3649 (match_operand:VF_SF_AVX512VL 1 "<round_nimm_predicate>" "%0,0,v")
3650 (match_operand:VF_SF_AVX512VL 2 "<round_nimm_predicate>" "<round_constraint>,v,<round_constraint>")
3651 (match_operand:VF_SF_AVX512VL 3 "<round_nimm_predicate>" "v,<round_constraint>,0")))]
3652 "TARGET_AVX512F && <sd_mask_mode512bit_condition> && <round_mode512bit_condition>"
8211f5a2 3653 "@
3654 vfmadd132<ssemodesuffix>\t{<round_sd_mask_op4>%2, %3, %0<sd_mask_op4>|%0<sd_mask_op4>, %3, %2<round_sd_mask_op4>}
3655 vfmadd213<ssemodesuffix>\t{<round_sd_mask_op4>%3, %2, %0<sd_mask_op4>|%0<sd_mask_op4>, %2, %3<round_sd_mask_op4>}
3656 vfmadd231<ssemodesuffix>\t{<round_sd_mask_op4>%2, %1, %0<sd_mask_op4>|%0<sd_mask_op4>, %1, %2<round_sd_mask_op4>}"
4f3da779 3657 [(set_attr "type" "ssemuladd")
8211f5a2 3658 (set_attr "mode" "<MODE>")])
3659
4f3da779 3660(define_insn "<avx512>_fmadd_<mode>_mask<round_name>"
3661 [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v,v")
3662 (vec_merge:VF_AVX512VL
3663 (fma:VF_AVX512VL
3664 (match_operand:VF_AVX512VL 1 "register_operand" "0,0")
3665 (match_operand:VF_AVX512VL 2 "nonimmediate_operand" "<round_constraint>,v")
3666 (match_operand:VF_AVX512VL 3 "nonimmediate_operand" "v,<round_constraint>"))
5220cab6 3667 (match_dup 1)
a31e7f46 3668 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk,Yk")))]
4f3da779 3669 "TARGET_AVX512F && <round_mode512bit_condition>"
5220cab6 3670 "@
be60ab96 3671 vfmadd132<ssemodesuffix>\t{<round_op5>%2, %3, %0%{%4%}|%0%{%4%}, %3, %2<round_op5>}
3672 vfmadd213<ssemodesuffix>\t{<round_op5>%3, %2, %0%{%4%}|%0%{%4%}, %2, %3<round_op5>}"
5220cab6 3673 [(set_attr "isa" "fma_avx512f,fma_avx512f")
3674 (set_attr "type" "ssemuladd")
3675 (set_attr "mode" "<MODE>")])
3676
4f3da779 3677(define_insn "<avx512>_fmadd_<mode>_mask3<round_name>"
3678 [(set (match_operand:VF_AVX512VL 0 "register_operand" "=x")
3679 (vec_merge:VF_AVX512VL
3680 (fma:VF_AVX512VL
3681 (match_operand:VF_AVX512VL 1 "register_operand" "x")
3682 (match_operand:VF_AVX512VL 2 "nonimmediate_operand" "<round_constraint>")
3683 (match_operand:VF_AVX512VL 3 "register_operand" "0"))
5220cab6 3684 (match_dup 3)
a31e7f46 3685 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")))]
5220cab6 3686 "TARGET_AVX512F"
be60ab96 3687 "vfmadd231<ssemodesuffix>\t{<round_op5>%2, %1, %0%{%4%}|%0%{%4%}, %1, %2<round_op5>}"
5220cab6 3688 [(set_attr "isa" "fma_avx512f")
3689 (set_attr "type" "ssemuladd")
3690 (set_attr "mode" "<MODE>")])
3691
4f3da779 3692(define_insn "*fma_fmsub_<mode>"
3693 [(set (match_operand:FMAMODE 0 "register_operand" "=v,v,v,x,x")
3694 (fma:FMAMODE
3695 (match_operand:FMAMODE 1 "nonimmediate_operand" "%0,0,v,x,x")
3696 (match_operand:FMAMODE 2 "nonimmediate_operand" "vm,v,vm,x,m")
3697 (neg:FMAMODE
3698 (match_operand:FMAMODE 3 "nonimmediate_operand" "v,vm,0,xm,x"))))]
3699 "TARGET_FMA || TARGET_FMA4"
35811e21 3700 "@
4f3da779 3701 vfmsub132<ssemodesuffix>\t{%2, %3, %0|%0, %3, %2}
3702 vfmsub213<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
3703 vfmsub231<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}
2743953b 3704 vfmsub<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}
3705 vfmsub<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
4f3da779 3706 [(set_attr "isa" "fma,fma,fma,fma4,fma4")
2743953b 3707 (set_attr "type" "ssemuladd")
35811e21 3708 (set_attr "mode" "<MODE>")])
3709
8211f5a2 3710(define_insn "<sd_mask_codefor>fma_fmsub_<mode><sd_maskz_name><round_name>"
4f3da779 3711 [(set (match_operand:VF_SF_AVX512VL 0 "register_operand" "=v,v,v")
3712 (fma:VF_SF_AVX512VL
3713 (match_operand:VF_SF_AVX512VL 1 "<round_nimm_predicate>" "%0,0,v")
3714 (match_operand:VF_SF_AVX512VL 2 "<round_nimm_predicate>" "<round_constraint>,v,<round_constraint>")
3715 (neg:VF_SF_AVX512VL
3716 (match_operand:VF_SF_AVX512VL 3 "<round_nimm_predicate>" "v,<round_constraint>,0"))))]
3717 "TARGET_AVX512F && <sd_mask_mode512bit_condition> && <round_mode512bit_condition>"
8211f5a2 3718 "@
3719 vfmsub132<ssemodesuffix>\t{<round_sd_mask_op4>%2, %3, %0<sd_mask_op4>|%0<sd_mask_op4>, %3, %2<round_sd_mask_op4>}
3720 vfmsub213<ssemodesuffix>\t{<round_sd_mask_op4>%3, %2, %0<sd_mask_op4>|%0<sd_mask_op4>, %2, %3<round_sd_mask_op4>}
3721 vfmsub231<ssemodesuffix>\t{<round_sd_mask_op4>%2, %1, %0<sd_mask_op4>|%0<sd_mask_op4>, %1, %2<round_sd_mask_op4>}"
4f3da779 3722 [(set_attr "type" "ssemuladd")
8211f5a2 3723 (set_attr "mode" "<MODE>")])
3724
4f3da779 3725(define_insn "<avx512>_fmsub_<mode>_mask<round_name>"
3726 [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v,v")
3727 (vec_merge:VF_AVX512VL
3728 (fma:VF_AVX512VL
3729 (match_operand:VF_AVX512VL 1 "register_operand" "0,0")
3730 (match_operand:VF_AVX512VL 2 "nonimmediate_operand" "<round_constraint>,v")
3731 (neg:VF_AVX512VL
3732 (match_operand:VF_AVX512VL 3 "nonimmediate_operand" "v,<round_constraint>")))
5220cab6 3733 (match_dup 1)
a31e7f46 3734 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk,Yk")))]
5220cab6 3735 "TARGET_AVX512F"
3736 "@
be60ab96 3737 vfmsub132<ssemodesuffix>\t{<round_op5>%2, %3, %0%{%4%}|%0%{%4%}, %3, %2<round_op5>}
3738 vfmsub213<ssemodesuffix>\t{<round_op5>%3, %2, %0%{%4%}|%0%{%4%}, %2, %3<round_op5>}"
5220cab6 3739 [(set_attr "isa" "fma_avx512f,fma_avx512f")
3740 (set_attr "type" "ssemuladd")
3741 (set_attr "mode" "<MODE>")])
3742
4f3da779 3743(define_insn "<avx512>_fmsub_<mode>_mask3<round_name>"
3744 [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v")
3745 (vec_merge:VF_AVX512VL
3746 (fma:VF_AVX512VL
3747 (match_operand:VF_AVX512VL 1 "register_operand" "v")
3748 (match_operand:VF_AVX512VL 2 "nonimmediate_operand" "<round_constraint>")
3749 (neg:VF_AVX512VL
3750 (match_operand:VF_AVX512VL 3 "register_operand" "0")))
5220cab6 3751 (match_dup 3)
a31e7f46 3752 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")))]
4f3da779 3753 "TARGET_AVX512F && <round_mode512bit_condition>"
be60ab96 3754 "vfmsub231<ssemodesuffix>\t{<round_op5>%2, %1, %0%{%4%}|%0%{%4%}, %1, %2<round_op5>}"
5220cab6 3755 [(set_attr "isa" "fma_avx512f")
3756 (set_attr "type" "ssemuladd")
3757 (set_attr "mode" "<MODE>")])
3758
4f3da779 3759(define_insn "*fma_fnmadd_<mode>"
3760 [(set (match_operand:FMAMODE 0 "register_operand" "=v,v,v,x,x")
3761 (fma:FMAMODE
3762 (neg:FMAMODE
3763 (match_operand:FMAMODE 1 "nonimmediate_operand" "%0,0,v,x,x"))
3764 (match_operand:FMAMODE 2 "nonimmediate_operand" "vm,v,vm,x,m")
3765 (match_operand:FMAMODE 3 "nonimmediate_operand" "v,vm,0,xm,x")))]
3766 "TARGET_FMA || TARGET_FMA4"
be60ab96 3767 "@
4f3da779 3768 vfnmadd132<ssemodesuffix>\t{%2, %3, %0|%0, %3, %2}
3769 vfnmadd213<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
3770 vfnmadd231<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}
2743953b 3771 vfnmadd<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}
3772 vfnmadd<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
4f3da779 3773 [(set_attr "isa" "fma,fma,fma,fma4,fma4")
2743953b 3774 (set_attr "type" "ssemuladd")
35811e21 3775 (set_attr "mode" "<MODE>")])
3776
8211f5a2 3777(define_insn "<sd_mask_codefor>fma_fnmadd_<mode><sd_maskz_name><round_name>"
4f3da779 3778 [(set (match_operand:VF_SF_AVX512VL 0 "register_operand" "=v,v,v")
3779 (fma:VF_SF_AVX512VL
3780 (neg:VF_SF_AVX512VL
3781 (match_operand:VF_SF_AVX512VL 1 "<round_nimm_predicate>" "%0,0,v"))
3782 (match_operand:VF_SF_AVX512VL 2 "<round_nimm_predicate>" "<round_constraint>,v,<round_constraint>")
3783 (match_operand:VF_SF_AVX512VL 3 "<round_nimm_predicate>" "v,<round_constraint>,0")))]
3784 "TARGET_AVX512F && <sd_mask_mode512bit_condition> && <round_mode512bit_condition>"
8211f5a2 3785 "@
3786 vfnmadd132<ssemodesuffix>\t{<round_sd_mask_op4>%2, %3, %0<sd_mask_op4>|%0<sd_mask_op4>, %3, %2<round_sd_mask_op4>}
3787 vfnmadd213<ssemodesuffix>\t{<round_sd_mask_op4>%3, %2, %0<sd_mask_op4>|%0<sd_mask_op4>, %2, %3<round_sd_mask_op4>}
3788 vfnmadd231<ssemodesuffix>\t{<round_sd_mask_op4>%2, %1, %0<sd_mask_op4>|%0<sd_mask_op4>, %1, %2<round_sd_mask_op4>}"
4f3da779 3789 [(set_attr "type" "ssemuladd")
8211f5a2 3790 (set_attr "mode" "<MODE>")])
3791
4f3da779 3792(define_insn "<avx512>_fnmadd_<mode>_mask<round_name>"
3793 [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v,v")
3794 (vec_merge:VF_AVX512VL
3795 (fma:VF_AVX512VL
3796 (neg:VF_AVX512VL
3797 (match_operand:VF_AVX512VL 1 "register_operand" "0,0"))
3798 (match_operand:VF_AVX512VL 2 "nonimmediate_operand" "<round_constraint>,v")
3799 (match_operand:VF_AVX512VL 3 "nonimmediate_operand" "v,<round_constraint>"))
5220cab6 3800 (match_dup 1)
a31e7f46 3801 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk,Yk")))]
4f3da779 3802 "TARGET_AVX512F && <round_mode512bit_condition>"
5220cab6 3803 "@
be60ab96 3804 vfnmadd132<ssemodesuffix>\t{<round_op5>%2, %3, %0%{%4%}|%0%{%4%}, %3, %2<round_op5>}
3805 vfnmadd213<ssemodesuffix>\t{<round_op5>%3, %2, %0%{%4%}|%0%{%4%}, %2, %3<round_op5>}"
5220cab6 3806 [(set_attr "isa" "fma_avx512f,fma_avx512f")
3807 (set_attr "type" "ssemuladd")
3808 (set_attr "mode" "<MODE>")])
3809
4f3da779 3810(define_insn "<avx512>_fnmadd_<mode>_mask3<round_name>"
3811 [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v")
3812 (vec_merge:VF_AVX512VL
3813 (fma:VF_AVX512VL
3814 (neg:VF_AVX512VL
3815 (match_operand:VF_AVX512VL 1 "register_operand" "v"))
3816 (match_operand:VF_AVX512VL 2 "nonimmediate_operand" "<round_constraint>")
3817 (match_operand:VF_AVX512VL 3 "register_operand" "0"))
5220cab6 3818 (match_dup 3)
a31e7f46 3819 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")))]
4f3da779 3820 "TARGET_AVX512F && <round_mode512bit_condition>"
be60ab96 3821 "vfnmadd231<ssemodesuffix>\t{<round_op5>%2, %1, %0%{%4%}|%0%{%4%}, %1, %2<round_op5>}"
5220cab6 3822 [(set_attr "isa" "fma_avx512f")
3823 (set_attr "type" "ssemuladd")
3824 (set_attr "mode" "<MODE>")])
3825
4f3da779 3826(define_insn "*fma_fnmsub_<mode>"
3827 [(set (match_operand:FMAMODE 0 "register_operand" "=v,v,v,x,x")
3828 (fma:FMAMODE
3829 (neg:FMAMODE
3830 (match_operand:FMAMODE 1 "nonimmediate_operand" "%0,0,v,x,x"))
3831 (match_operand:FMAMODE 2 "nonimmediate_operand" "vm,v,vm,x,m")
3832 (neg:FMAMODE
3833 (match_operand:FMAMODE 3 "nonimmediate_operand" "v,vm,0,xm,x"))))]
3834 "TARGET_FMA || TARGET_FMA4"
35811e21 3835 "@
be60ab96 3836 vfnmsub132<ssemodesuffix>\t{<round_sd_mask_op4>%2, %3, %0<sd_mask_op4>|%0<sd_mask_op4>, %3, %2<round_sd_mask_op4>}
3837 vfnmsub213<ssemodesuffix>\t{<round_sd_mask_op4>%3, %2, %0<sd_mask_op4>|%0<sd_mask_op4>, %2, %3<round_sd_mask_op4>}
3838 vfnmsub231<ssemodesuffix>\t{<round_sd_mask_op4>%2, %1, %0<sd_mask_op4>|%0<sd_mask_op4>, %1, %2<round_sd_mask_op4>}
2743953b 3839 vfnmsub<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}
3840 vfnmsub<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
4f3da779 3841 [(set_attr "isa" "fma,fma,fma,fma4,fma4")
2743953b 3842 (set_attr "type" "ssemuladd")
2f212aae 3843 (set_attr "mode" "<MODE>")])
3844
8211f5a2 3845(define_insn "<sd_mask_codefor>fma_fnmsub_<mode><sd_maskz_name><round_name>"
4f3da779 3846 [(set (match_operand:VF_SF_AVX512VL 0 "register_operand" "=v,v,v")
3847 (fma:VF_SF_AVX512VL
3848 (neg:VF_SF_AVX512VL
3849 (match_operand:VF_SF_AVX512VL 1 "<round_nimm_predicate>" "%0,0,v"))
3850 (match_operand:VF_SF_AVX512VL 2 "<round_nimm_predicate>" "<round_constraint>,v,<round_constraint>")
3851 (neg:VF_SF_AVX512VL
3852 (match_operand:VF_SF_AVX512VL 3 "<round_nimm_predicate>" "v,<round_constraint>,0"))))]
3853 "TARGET_AVX512F && <sd_mask_mode512bit_condition> && <round_mode512bit_condition>"
8211f5a2 3854 "@
3855 vfnmsub132<ssemodesuffix>\t{<round_sd_mask_op4>%2, %3, %0<sd_mask_op4>|%0<sd_mask_op4>, %3, %2<round_sd_mask_op4>}
3856 vfnmsub213<ssemodesuffix>\t{<round_sd_mask_op4>%3, %2, %0<sd_mask_op4>|%0<sd_mask_op4>, %2, %3<round_sd_mask_op4>}
3857 vfnmsub231<ssemodesuffix>\t{<round_sd_mask_op4>%2, %1, %0<sd_mask_op4>|%0<sd_mask_op4>, %1, %2<round_sd_mask_op4>}"
4f3da779 3858 [(set_attr "type" "ssemuladd")
8211f5a2 3859 (set_attr "mode" "<MODE>")])
3860
4f3da779 3861(define_insn "<avx512>_fnmsub_<mode>_mask<round_name>"
3862 [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v,v")
3863 (vec_merge:VF_AVX512VL
3864 (fma:VF_AVX512VL
3865 (neg:VF_AVX512VL
3866 (match_operand:VF_AVX512VL 1 "register_operand" "0,0"))
3867 (match_operand:VF_AVX512VL 2 "nonimmediate_operand" "<round_constraint>,v")
3868 (neg:VF_AVX512VL
3869 (match_operand:VF_AVX512VL 3 "nonimmediate_operand" "v,<round_constraint>")))
5220cab6 3870 (match_dup 1)
a31e7f46 3871 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk,Yk")))]
4f3da779 3872 "TARGET_AVX512F && <round_mode512bit_condition>"
5220cab6 3873 "@
be60ab96 3874 vfnmsub132<ssemodesuffix>\t{<round_op5>%2, %3, %0%{%4%}|%0%{%4%}, %3, %2<round_op5>}
3875 vfnmsub213<ssemodesuffix>\t{<round_op5>%3, %2, %0%{%4%}|%0%{%4%}, %2, %3<round_op5>}"
5220cab6 3876 [(set_attr "isa" "fma_avx512f,fma_avx512f")
3877 (set_attr "type" "ssemuladd")
3878 (set_attr "mode" "<MODE>")])
3879
4f3da779 3880(define_insn "<avx512>_fnmsub_<mode>_mask3<round_name>"
3881 [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v")
3882 (vec_merge:VF_AVX512VL
3883 (fma:VF_AVX512VL
3884 (neg:VF_AVX512VL
3885 (match_operand:VF_AVX512VL 1 "register_operand" "v"))
3886 (match_operand:VF_AVX512VL 2 "nonimmediate_operand" "<round_constraint>")
3887 (neg:VF_AVX512VL
3888 (match_operand:VF_AVX512VL 3 "register_operand" "0")))
5220cab6 3889 (match_dup 3)
a31e7f46 3890 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")))]
5220cab6 3891 "TARGET_AVX512F"
be60ab96 3892 "vfnmsub231<ssemodesuffix>\t{<round_op5>%2, %1, %0%{%4%}|%0%{%4%}, %1, %2<round_op5>}"
5220cab6 3893 [(set_attr "isa" "fma_avx512f")
3894 (set_attr "type" "ssemuladd")
3895 (set_attr "mode" "<MODE>")])
3896
35811e21 3897;; FMA parallel floating point multiply addsub and subadd operations.
a2f9d5b3 3898
35811e21 3899;; It would be possible to represent these without the UNSPEC as
3900;;
3901;; (vec_merge
3902;; (fma op1 op2 op3)
3903;; (fma op1 op2 (neg op3))
3904;; (merge-const))
3905;;
3906;; But this doesn't seem useful in practice.
3907
3908(define_expand "fmaddsub_<mode>"
3909 [(set (match_operand:VF 0 "register_operand")
3910 (unspec:VF
3911 [(match_operand:VF 1 "nonimmediate_operand")
3912 (match_operand:VF 2 "nonimmediate_operand")
3913 (match_operand:VF 3 "nonimmediate_operand")]
3914 UNSPEC_FMADDSUB))]
c298e021 3915 "TARGET_FMA || TARGET_FMA4 || TARGET_AVX512F")
35811e21 3916
4f3da779 3917(define_expand "<avx512>_fmaddsub_<mode>_maskz<round_expand_name>"
3918 [(match_operand:VF_AVX512VL 0 "register_operand")
3919 (match_operand:VF_AVX512VL 1 "<round_expand_nimm_predicate>")
3920 (match_operand:VF_AVX512VL 2 "<round_expand_nimm_predicate>")
3921 (match_operand:VF_AVX512VL 3 "<round_expand_nimm_predicate>")
9a5ea1d5 3922 (match_operand:<avx512fmaskmode> 4 "register_operand")]
3923 "TARGET_AVX512F"
3924{
adf45678 3925 emit_insn (gen_fma_fmaddsub_<mode>_maskz_1<round_expand_name> (
9a5ea1d5 3926 operands[0], operands[1], operands[2], operands[3],
adf45678 3927 CONST0_RTX (<MODE>mode), operands[4]<round_expand_operand>));
9a5ea1d5 3928 DONE;
3929})
3930
4f3da779 3931(define_insn "*fma_fmaddsub_<mode>"
8211f5a2 3932 [(set (match_operand:VF_128_256 0 "register_operand" "=v,v,v,x,x")
3933 (unspec:VF_128_256
4f3da779 3934 [(match_operand:VF_128_256 1 "nonimmediate_operand" "%0,0,v,x,x")
3935 (match_operand:VF_128_256 2 "nonimmediate_operand" "vm,v,vm,x,m")
3936 (match_operand:VF_128_256 3 "nonimmediate_operand" "v,vm,0,xm,x")]
35811e21 3937 UNSPEC_FMADDSUB))]
4f3da779 3938 "TARGET_FMA || TARGET_FMA4"
35811e21 3939 "@
4f3da779 3940 vfmaddsub132<ssemodesuffix>\t{%2, %3, %0|%0, %3, %2}
3941 vfmaddsub213<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
3942 vfmaddsub231<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}
2743953b 3943 vfmaddsub<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}
3944 vfmaddsub<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
4f3da779 3945 [(set_attr "isa" "fma,fma,fma,fma4,fma4")
2743953b 3946 (set_attr "type" "ssemuladd")
35811e21 3947 (set_attr "mode" "<MODE>")])
3948
8211f5a2 3949(define_insn "<sd_mask_codefor>fma_fmaddsub_<mode><sd_maskz_name><round_name>"
4f3da779 3950 [(set (match_operand:VF_SF_AVX512VL 0 "register_operand" "=v,v,v")
3951 (unspec:VF_SF_AVX512VL
3952 [(match_operand:VF_SF_AVX512VL 1 "<round_nimm_predicate>" "%0,0,v")
3953 (match_operand:VF_SF_AVX512VL 2 "<round_nimm_predicate>" "<round_constraint>,v,<round_constraint>")
3954 (match_operand:VF_SF_AVX512VL 3 "<round_nimm_predicate>" "v,<round_constraint>,0")]
8211f5a2 3955 UNSPEC_FMADDSUB))]
3956 "TARGET_AVX512F && <sd_mask_mode512bit_condition> && <round_mode512bit_condition>"
3957 "@
3958 vfmaddsub132<ssemodesuffix>\t{<round_sd_mask_op4>%2, %3, %0<sd_mask_op4>|%0<sd_mask_op4>, %3, %2<round_sd_mask_op4>}
3959 vfmaddsub213<ssemodesuffix>\t{<round_sd_mask_op4>%3, %2, %0<sd_mask_op4>|%0<sd_mask_op4>, %2, %3<round_sd_mask_op4>}
3960 vfmaddsub231<ssemodesuffix>\t{<round_sd_mask_op4>%2, %1, %0<sd_mask_op4>|%0<sd_mask_op4>, %1, %2<round_sd_mask_op4>}"
4f3da779 3961 [(set_attr "type" "ssemuladd")
8211f5a2 3962 (set_attr "mode" "<MODE>")])
3963
4f3da779 3964(define_insn "<avx512>_fmaddsub_<mode>_mask<round_name>"
3965 [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v,v")
3966 (vec_merge:VF_AVX512VL
3967 (unspec:VF_AVX512VL
3968 [(match_operand:VF_AVX512VL 1 "register_operand" "0,0")
3969 (match_operand:VF_AVX512VL 2 "nonimmediate_operand" "<round_constraint>,v")
3970 (match_operand:VF_AVX512VL 3 "nonimmediate_operand" "v,<round_constraint>")]
5220cab6 3971 UNSPEC_FMADDSUB)
3972 (match_dup 1)
a31e7f46 3973 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk,Yk")))]
5220cab6 3974 "TARGET_AVX512F"
3975 "@
be60ab96 3976 vfmaddsub132<ssemodesuffix>\t{<round_op5>%2, %3, %0%{%4%}|%0%{%4%}, %3, %2<round_op5>}
3977 vfmaddsub213<ssemodesuffix>\t{<round_op5>%3, %2, %0%{%4%}|%0%{%4%}, %2, %3<round_op5>}"
5220cab6 3978 [(set_attr "isa" "fma_avx512f,fma_avx512f")
3979 (set_attr "type" "ssemuladd")
3980 (set_attr "mode" "<MODE>")])
3981
4f3da779 3982(define_insn "<avx512>_fmaddsub_<mode>_mask3<round_name>"
3983 [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v")
3984 (vec_merge:VF_AVX512VL
3985 (unspec:VF_AVX512VL
3986 [(match_operand:VF_AVX512VL 1 "register_operand" "v")
3987 (match_operand:VF_AVX512VL 2 "nonimmediate_operand" "<round_constraint>")
3988 (match_operand:VF_AVX512VL 3 "register_operand" "0")]
5220cab6 3989 UNSPEC_FMADDSUB)
3990 (match_dup 3)
a31e7f46 3991 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")))]
5220cab6 3992 "TARGET_AVX512F"
be60ab96 3993 "vfmaddsub231<ssemodesuffix>\t{<round_op5>%2, %1, %0%{%4%}|%0%{%4%}, %1, %2<round_op5>}"
5220cab6 3994 [(set_attr "isa" "fma_avx512f")
3995 (set_attr "type" "ssemuladd")
3996 (set_attr "mode" "<MODE>")])
3997
4f3da779 3998(define_insn "*fma_fmsubadd_<mode>"
8211f5a2 3999 [(set (match_operand:VF_128_256 0 "register_operand" "=v,v,v,x,x")
4000 (unspec:VF_128_256
4f3da779 4001 [(match_operand:VF_128_256 1 "nonimmediate_operand" "%0,0,v,x,x")
4002 (match_operand:VF_128_256 2 "nonimmediate_operand" "vm,v,vm,x,m")
8211f5a2 4003 (neg:VF_128_256
4f3da779 4004 (match_operand:VF_128_256 3 "nonimmediate_operand" "v,vm,0,xm,x"))]
35811e21 4005 UNSPEC_FMADDSUB))]
4f3da779 4006 "TARGET_FMA || TARGET_FMA4"
35811e21 4007 "@
4f3da779 4008 vfmsubadd132<ssemodesuffix>\t{%2, %3, %0|%0, %3, %2}
4009 vfmsubadd213<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
4010 vfmsubadd231<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}
2743953b 4011 vfmsubadd<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}
4012 vfmsubadd<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
4f3da779 4013 [(set_attr "isa" "fma,fma,fma,fma4,fma4")
2743953b 4014 (set_attr "type" "ssemuladd")
35811e21 4015 (set_attr "mode" "<MODE>")])
4016
8211f5a2 4017(define_insn "<sd_mask_codefor>fma_fmsubadd_<mode><sd_maskz_name><round_name>"
4f3da779 4018 [(set (match_operand:VF_SF_AVX512VL 0 "register_operand" "=v,v,v")
4019 (unspec:VF_SF_AVX512VL
4020 [(match_operand:VF_SF_AVX512VL 1 "<round_nimm_predicate>" "%0,0,v")
4021 (match_operand:VF_SF_AVX512VL 2 "<round_nimm_predicate>" "<round_constraint>,v,<round_constraint>")
4022 (neg:VF_SF_AVX512VL
4023 (match_operand:VF_SF_AVX512VL 3 "<round_nimm_predicate>" "v,<round_constraint>,0"))]
8211f5a2 4024 UNSPEC_FMADDSUB))]
4025 "TARGET_AVX512F && <sd_mask_mode512bit_condition> && <round_mode512bit_condition>"
4026 "@
4027 vfmsubadd132<ssemodesuffix>\t{<round_sd_mask_op4>%2, %3, %0<sd_mask_op4>|%0<sd_mask_op4>, %3, %2<round_sd_mask_op4>}
4028 vfmsubadd213<ssemodesuffix>\t{<round_sd_mask_op4>%3, %2, %0<sd_mask_op4>|%0<sd_mask_op4>, %2, %3<round_sd_mask_op4>}
4029 vfmsubadd231<ssemodesuffix>\t{<round_sd_mask_op4>%2, %1, %0<sd_mask_op4>|%0<sd_mask_op4>, %1, %2<round_sd_mask_op4>}"
4f3da779 4030 [(set_attr "type" "ssemuladd")
8211f5a2 4031 (set_attr "mode" "<MODE>")])
4032
4f3da779 4033(define_insn "<avx512>_fmsubadd_<mode>_mask<round_name>"
4034 [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v,v")
4035 (vec_merge:VF_AVX512VL
4036 (unspec:VF_AVX512VL
4037 [(match_operand:VF_AVX512VL 1 "register_operand" "0,0")
4038 (match_operand:VF_AVX512VL 2 "nonimmediate_operand" "<round_constraint>,v")
4039 (neg:VF_AVX512VL
4040 (match_operand:VF_AVX512VL 3 "nonimmediate_operand" "v,<round_constraint>"))]
5220cab6 4041 UNSPEC_FMADDSUB)
4042 (match_dup 1)
a31e7f46 4043 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk,Yk")))]
5220cab6 4044 "TARGET_AVX512F"
4045 "@
be60ab96 4046 vfmsubadd132<ssemodesuffix>\t{<round_op5>%2, %3, %0%{%4%}|%0%{%4%}, %3, %2<round_op5>}
4047 vfmsubadd213<ssemodesuffix>\t{<round_op5>%3, %2, %0%{%4%}|%0%{%4%}, %2, %3<round_op5>}"
5220cab6 4048 [(set_attr "isa" "fma_avx512f,fma_avx512f")
4049 (set_attr "type" "ssemuladd")
4050 (set_attr "mode" "<MODE>")])
4051
4f3da779 4052(define_insn "<avx512>_fmsubadd_<mode>_mask3<round_name>"
4053 [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v")
4054 (vec_merge:VF_AVX512VL
4055 (unspec:VF_AVX512VL
4056 [(match_operand:VF_AVX512VL 1 "register_operand" "v")
4057 (match_operand:VF_AVX512VL 2 "nonimmediate_operand" "<round_constraint>")
4058 (neg:VF_AVX512VL
4059 (match_operand:VF_AVX512VL 3 "register_operand" "0"))]
5220cab6 4060 UNSPEC_FMADDSUB)
4061 (match_dup 3)
a31e7f46 4062 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")))]
5220cab6 4063 "TARGET_AVX512F"
be60ab96 4064 "vfmsubadd231<ssemodesuffix>\t{<round_op5>%2, %1, %0%{%4%}|%0%{%4%}, %1, %2<round_op5>}"
5220cab6 4065 [(set_attr "isa" "fma_avx512f")
4066 (set_attr "type" "ssemuladd")
4067 (set_attr "mode" "<MODE>")])
4068
35811e21 4069;; FMA3 floating point scalar intrinsics. These merge result with
4070;; high-order elements from the destination register.
2f212aae 4071
be60ab96 4072(define_expand "fmai_vmfmadd_<mode><round_name>"
65463cb8 4073 [(set (match_operand:VF_128 0 "register_operand")
4074 (vec_merge:VF_128
4075 (fma:VF_128
be60ab96 4076 (match_operand:VF_128 1 "<round_nimm_predicate>")
4077 (match_operand:VF_128 2 "<round_nimm_predicate>")
4078 (match_operand:VF_128 3 "<round_nimm_predicate>"))
092a264c 4079 (match_dup 1)
65463cb8 4080 (const_int 1)))]
4081 "TARGET_FMA")
4082
4083(define_insn "*fmai_fmadd_<mode>"
e13e1b39 4084 [(set (match_operand:VF_128 0 "register_operand" "=v,v")
65463cb8 4085 (vec_merge:VF_128
4086 (fma:VF_128
be60ab96 4087 (match_operand:VF_128 1 "<round_nimm_predicate>" " 0, 0")
4088 (match_operand:VF_128 2 "<round_nimm_predicate>" "<round_constraint>, v")
4089 (match_operand:VF_128 3 "<round_nimm_predicate>" " v,<round_constraint>"))
092a264c 4090 (match_dup 1)
65463cb8 4091 (const_int 1)))]
c298e021 4092 "TARGET_FMA || TARGET_AVX512F"
65463cb8 4093 "@
be60ab96 4094 vfmadd132<ssescalarmodesuffix>\t{<round_op4>%2, %3, %0|%0, %<iptr>3, %<iptr>2<round_op4>}
4095 vfmadd213<ssescalarmodesuffix>\t{<round_op4>%3, %2, %0|%0, %<iptr>2, %<iptr>3<round_op4>}"
65463cb8 4096 [(set_attr "type" "ssemuladd")
4097 (set_attr "mode" "<MODE>")])
4098
4099(define_insn "*fmai_fmsub_<mode>"
e13e1b39 4100 [(set (match_operand:VF_128 0 "register_operand" "=v,v")
65463cb8 4101 (vec_merge:VF_128
4102 (fma:VF_128
be60ab96 4103 (match_operand:VF_128 1 "<round_nimm_predicate>" "0,0")
4104 (match_operand:VF_128 2 "<round_nimm_predicate>" "<round_constraint>,v")
65463cb8 4105 (neg:VF_128
be60ab96 4106 (match_operand:VF_128 3 "<round_nimm_predicate>" " v,<round_constraint>")))
092a264c 4107 (match_dup 1)
65463cb8 4108 (const_int 1)))]
c298e021 4109 "TARGET_FMA || TARGET_AVX512F"
65463cb8 4110 "@
be60ab96 4111 vfmsub132<ssescalarmodesuffix>\t{<round_op4>%2, %3, %0|%0, %<iptr>3, %<iptr>2<round_op4>}
4112 vfmsub213<ssescalarmodesuffix>\t{<round_op4>%3, %2, %0|%0, %<iptr>2, %<iptr>3<round_op4>}"
65463cb8 4113 [(set_attr "type" "ssemuladd")
4114 (set_attr "mode" "<MODE>")])
4115
be60ab96 4116(define_insn "*fmai_fnmadd_<mode><round_name>"
e13e1b39 4117 [(set (match_operand:VF_128 0 "register_operand" "=v,v")
65463cb8 4118 (vec_merge:VF_128
4119 (fma:VF_128
4120 (neg:VF_128
be60ab96 4121 (match_operand:VF_128 2 "<round_nimm_predicate>" "<round_constraint>,v"))
4122 (match_operand:VF_128 1 "<round_nimm_predicate>" "0,0")
4123 (match_operand:VF_128 3 "<round_nimm_predicate>" "v,<round_constraint>"))
092a264c 4124 (match_dup 1)
65463cb8 4125 (const_int 1)))]
c298e021 4126 "TARGET_FMA || TARGET_AVX512F"
65463cb8 4127 "@
be60ab96 4128 vfnmadd132<ssescalarmodesuffix>\t{<round_op4>%2, %3, %0|%0, %<iptr>3, %<iptr>2<round_op4>}
4129 vfnmadd213<ssescalarmodesuffix>\t{<round_op4>%3, %2, %0|%0, %<iptr>2, %<iptr>3<round_op4>}"
65463cb8 4130 [(set_attr "type" "ssemuladd")
4131 (set_attr "mode" "<MODE>")])
4132
be60ab96 4133(define_insn "*fmai_fnmsub_<mode><round_name>"
e13e1b39 4134 [(set (match_operand:VF_128 0 "register_operand" "=v,v")
65463cb8 4135 (vec_merge:VF_128
4136 (fma:VF_128
4137 (neg:VF_128
be60ab96 4138 (match_operand:VF_128 2 "<round_nimm_predicate>" "<round_constraint>, v"))
4139 (match_operand:VF_128 1 "<round_nimm_predicate>" " 0, 0")
65463cb8 4140 (neg:VF_128
be60ab96 4141 (match_operand:VF_128 3 "<round_nimm_predicate>" " v,<round_constraint>")))
092a264c 4142 (match_dup 1)
65463cb8 4143 (const_int 1)))]
c298e021 4144 "TARGET_FMA || TARGET_AVX512F"
65463cb8 4145 "@
be60ab96 4146 vfnmsub132<ssescalarmodesuffix>\t{<round_op4>%2, %3, %0|%0, %<iptr>3, %<iptr>2<round_op4>}
4147 vfnmsub213<ssescalarmodesuffix>\t{<round_op4>%3, %2, %0|%0, %<iptr>2, %<iptr>3<round_op4>}"
65463cb8 4148 [(set_attr "type" "ssemuladd")
4149 (set_attr "mode" "<MODE>")])
4150
35811e21 4151;; FMA4 floating point scalar intrinsics. These write the
4152;; entire destination register, with the high-order elements zeroed.
4153
4154(define_expand "fma4i_vmfmadd_<mode>"
4155 [(set (match_operand:VF_128 0 "register_operand")
4156 (vec_merge:VF_128
4157 (fma:VF_128
4158 (match_operand:VF_128 1 "nonimmediate_operand")
4159 (match_operand:VF_128 2 "nonimmediate_operand")
4160 (match_operand:VF_128 3 "nonimmediate_operand"))
4161 (match_dup 4)
4162 (const_int 1)))]
4163 "TARGET_FMA4"
4164 "operands[4] = CONST0_RTX (<MODE>mode);")
4165
a2f9d5b3 4166(define_insn "*fma4i_vmfmadd_<mode>"
6fe5844b 4167 [(set (match_operand:VF_128 0 "register_operand" "=x,x")
4168 (vec_merge:VF_128
4169 (fma:VF_128
4170 (match_operand:VF_128 1 "nonimmediate_operand" "%x,x")
4171 (match_operand:VF_128 2 "nonimmediate_operand" " x,m")
4172 (match_operand:VF_128 3 "nonimmediate_operand" "xm,x"))
abd4f58b 4173 (match_operand:VF_128 4 "const0_operand")
a2f9d5b3 4174 (const_int 1)))]
4175 "TARGET_FMA4"
c358a059 4176 "vfmadd<ssescalarmodesuffix>\t{%3, %2, %1, %0|%0, %1, %<iptr>2, %<iptr>3}"
2f212aae 4177 [(set_attr "type" "ssemuladd")
4178 (set_attr "mode" "<MODE>")])
4179
a2f9d5b3 4180(define_insn "*fma4i_vmfmsub_<mode>"
6fe5844b 4181 [(set (match_operand:VF_128 0 "register_operand" "=x,x")
4182 (vec_merge:VF_128
4183 (fma:VF_128
4184 (match_operand:VF_128 1 "nonimmediate_operand" "%x,x")
4185 (match_operand:VF_128 2 "nonimmediate_operand" " x,m")
4186 (neg:VF_128
4187 (match_operand:VF_128 3 "nonimmediate_operand" "xm,x")))
abd4f58b 4188 (match_operand:VF_128 4 "const0_operand")
a2f9d5b3 4189 (const_int 1)))]
4190 "TARGET_FMA4"
c358a059 4191 "vfmsub<ssescalarmodesuffix>\t{%3, %2, %1, %0|%0, %1, %<iptr>2, %<iptr>3}"
2f212aae 4192 [(set_attr "type" "ssemuladd")
4193 (set_attr "mode" "<MODE>")])
4194
a2f9d5b3 4195(define_insn "*fma4i_vmfnmadd_<mode>"
6fe5844b 4196 [(set (match_operand:VF_128 0 "register_operand" "=x,x")
4197 (vec_merge:VF_128
4198 (fma:VF_128
4199 (neg:VF_128
4200 (match_operand:VF_128 1 "nonimmediate_operand" "%x,x"))
4201 (match_operand:VF_128 2 "nonimmediate_operand" " x,m")
4202 (match_operand:VF_128 3 "nonimmediate_operand" "xm,x"))
abd4f58b 4203 (match_operand:VF_128 4 "const0_operand")
a2f9d5b3 4204 (const_int 1)))]
4205 "TARGET_FMA4"
c358a059 4206 "vfnmadd<ssescalarmodesuffix>\t{%3, %2, %1, %0|%0, %1, %<iptr>2, %<iptr>3}"
2f212aae 4207 [(set_attr "type" "ssemuladd")
4208 (set_attr "mode" "<MODE>")])
4209
a2f9d5b3 4210(define_insn "*fma4i_vmfnmsub_<mode>"
6fe5844b 4211 [(set (match_operand:VF_128 0 "register_operand" "=x,x")
4212 (vec_merge:VF_128
4213 (fma:VF_128
4214 (neg:VF_128
4215 (match_operand:VF_128 1 "nonimmediate_operand" "%x,x"))
4216 (match_operand:VF_128 2 "nonimmediate_operand" " x,m")
4217 (neg:VF_128
4218 (match_operand:VF_128 3 "nonimmediate_operand" "xm,x")))
abd4f58b 4219 (match_operand:VF_128 4 "const0_operand")
a2f9d5b3 4220 (const_int 1)))]
aaf9db06 4221 "TARGET_FMA4"
c358a059 4222 "vfnmsub<ssescalarmodesuffix>\t{%3, %2, %1, %0|%0, %1, %<iptr>2, %<iptr>3}"
2f212aae 4223 [(set_attr "type" "ssemuladd")
4224 (set_attr "mode" "<MODE>")])
4225
5e2b6fd0 4226;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
5802c0cb 4227;;
2a466fea 4228;; Parallel single-precision floating point conversion operations
4229;;
4230;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
4231
4232(define_insn "sse_cvtpi2ps"
4233 [(set (match_operand:V4SF 0 "register_operand" "=x")
4234 (vec_merge:V4SF
4235 (vec_duplicate:V4SF
4236 (float:V2SF (match_operand:V2SI 2 "nonimmediate_operand" "ym")))
4237 (match_operand:V4SF 1 "register_operand" "0")
4238 (const_int 3)))]
4239 "TARGET_SSE"
4240 "cvtpi2ps\t{%2, %0|%0, %2}"
4241 [(set_attr "type" "ssecvt")
4242 (set_attr "mode" "V4SF")])
4243
4244(define_insn "sse_cvtps2pi"
4245 [(set (match_operand:V2SI 0 "register_operand" "=y")
4246 (vec_select:V2SI
4247 (unspec:V4SI [(match_operand:V4SF 1 "nonimmediate_operand" "xm")]
4248 UNSPEC_FIX_NOTRUNC)
4249 (parallel [(const_int 0) (const_int 1)])))]
4250 "TARGET_SSE"
c358a059 4251 "cvtps2pi\t{%1, %0|%0, %q1}"
2a466fea 4252 [(set_attr "type" "ssecvt")
4253 (set_attr "unit" "mmx")
4254 (set_attr "mode" "DI")])
4255
4256(define_insn "sse_cvttps2pi"
4257 [(set (match_operand:V2SI 0 "register_operand" "=y")
4258 (vec_select:V2SI
4259 (fix:V4SI (match_operand:V4SF 1 "nonimmediate_operand" "xm"))
4260 (parallel [(const_int 0) (const_int 1)])))]
4261 "TARGET_SSE"
c358a059 4262 "cvttps2pi\t{%1, %0|%0, %q1}"
2a466fea 4263 [(set_attr "type" "ssecvt")
4264 (set_attr "unit" "mmx")
00a0e418 4265 (set_attr "prefix_rep" "0")
2a466fea 4266 (set_attr "mode" "SF")])
4267
be60ab96 4268(define_insn "sse_cvtsi2ss<round_name>"
e13e1b39 4269 [(set (match_operand:V4SF 0 "register_operand" "=x,x,v")
2a466fea 4270 (vec_merge:V4SF
4271 (vec_duplicate:V4SF
be60ab96 4272 (float:SF (match_operand:SI 2 "<round_nimm_predicate>" "r,m,<round_constraint3>")))
e13e1b39 4273 (match_operand:V4SF 1 "register_operand" "0,0,v")
2a466fea 4274 (const_int 1)))]
4275 "TARGET_SSE"
887423c0 4276 "@
4277 cvtsi2ss\t{%2, %0|%0, %2}
4278 cvtsi2ss\t{%2, %0|%0, %2}
88048095 4279 vcvtsi2ss\t{%2, <round_op3>%1, %0|%0, %1<round_op3>, %2}"
887423c0 4280 [(set_attr "isa" "noavx,noavx,avx")
4281 (set_attr "type" "sseicvt")
4282 (set_attr "athlon_decode" "vector,double,*")
4283 (set_attr "amdfam10_decode" "vector,double,*")
4284 (set_attr "bdver1_decode" "double,direct,*")
6470d004 4285 (set_attr "btver2_decode" "double,double,double")
4c9faaa4 4286 (set_attr "znver1_decode" "double,double,double")
4c1099de 4287 (set_attr "prefix" "orig,orig,maybe_evex")
ed30e0a6 4288 (set_attr "mode" "SF")])
4289
be60ab96 4290(define_insn "sse_cvtsi2ssq<round_name>"
e13e1b39 4291 [(set (match_operand:V4SF 0 "register_operand" "=x,x,v")
2a466fea 4292 (vec_merge:V4SF
4293 (vec_duplicate:V4SF
be60ab96 4294 (float:SF (match_operand:DI 2 "<round_nimm_predicate>" "r,m,<round_constraint3>")))
e13e1b39 4295 (match_operand:V4SF 1 "register_operand" "0,0,v")
2a466fea 4296 (const_int 1)))]
4297 "TARGET_SSE && TARGET_64BIT"
887423c0 4298 "@
4299 cvtsi2ssq\t{%2, %0|%0, %2}
4300 cvtsi2ssq\t{%2, %0|%0, %2}
88048095 4301 vcvtsi2ssq\t{%2, <round_op3>%1, %0|%0, %1<round_op3>, %2}"
887423c0 4302 [(set_attr "isa" "noavx,noavx,avx")
4303 (set_attr "type" "sseicvt")
4304 (set_attr "athlon_decode" "vector,double,*")
4305 (set_attr "amdfam10_decode" "vector,double,*")
4306 (set_attr "bdver1_decode" "double,direct,*")
6470d004 4307 (set_attr "btver2_decode" "double,double,double")
887423c0 4308 (set_attr "length_vex" "*,*,4")
4309 (set_attr "prefix_rex" "1,1,*")
4c1099de 4310 (set_attr "prefix" "orig,orig,maybe_evex")
2a466fea 4311 (set_attr "mode" "SF")])
4312
be60ab96 4313(define_insn "sse_cvtss2si<round_name>"
2a466fea 4314 [(set (match_operand:SI 0 "register_operand" "=r,r")
4315 (unspec:SI
4316 [(vec_select:SF
be60ab96 4317 (match_operand:V4SF 1 "<round_nimm_predicate>" "v,<round_constraint2>")
2a466fea 4318 (parallel [(const_int 0)]))]
4319 UNSPEC_FIX_NOTRUNC))]
4320 "TARGET_SSE"
be60ab96 4321 "%vcvtss2si\t{<round_op2>%1, %0|%0, %k1<round_op2>}"
2a466fea 4322 [(set_attr "type" "sseicvt")
4323 (set_attr "athlon_decode" "double,vector")
97436e92 4324 (set_attr "bdver1_decode" "double,double")
2a466fea 4325 (set_attr "prefix_rep" "1")
ed30e0a6 4326 (set_attr "prefix" "maybe_vex")
2a466fea 4327 (set_attr "mode" "SI")])
4328
4329(define_insn "sse_cvtss2si_2"
4330 [(set (match_operand:SI 0 "register_operand" "=r,r")
e13e1b39 4331 (unspec:SI [(match_operand:SF 1 "nonimmediate_operand" "v,m")]
2a466fea 4332 UNSPEC_FIX_NOTRUNC))]
4333 "TARGET_SSE"
c358a059 4334 "%vcvtss2si\t{%1, %0|%0, %k1}"
2a466fea 4335 [(set_attr "type" "sseicvt")
4336 (set_attr "athlon_decode" "double,vector")
4337 (set_attr "amdfam10_decode" "double,double")
97436e92 4338 (set_attr "bdver1_decode" "double,double")
2a466fea 4339 (set_attr "prefix_rep" "1")
ed30e0a6 4340 (set_attr "prefix" "maybe_vex")
2a466fea 4341 (set_attr "mode" "SI")])
4342
be60ab96 4343(define_insn "sse_cvtss2siq<round_name>"
2a466fea 4344 [(set (match_operand:DI 0 "register_operand" "=r,r")
4345 (unspec:DI
4346 [(vec_select:SF
be60ab96 4347 (match_operand:V4SF 1 "<round_nimm_predicate>" "v,<round_constraint2>")
2a466fea 4348 (parallel [(const_int 0)]))]
4349 UNSPEC_FIX_NOTRUNC))]
4350 "TARGET_SSE && TARGET_64BIT"
be60ab96 4351 "%vcvtss2si{q}\t{<round_op2>%1, %0|%0, %k1<round_op2>}"
2a466fea 4352 [(set_attr "type" "sseicvt")
4353 (set_attr "athlon_decode" "double,vector")
97436e92 4354 (set_attr "bdver1_decode" "double,double")
2a466fea 4355 (set_attr "prefix_rep" "1")
ed30e0a6 4356 (set_attr "prefix" "maybe_vex")
2a466fea 4357 (set_attr "mode" "DI")])
4358
4359(define_insn "sse_cvtss2siq_2"
4360 [(set (match_operand:DI 0 "register_operand" "=r,r")
4c1099de 4361 (unspec:DI [(match_operand:SF 1 "nonimmediate_operand" "v,m")]
2a466fea 4362 UNSPEC_FIX_NOTRUNC))]
4363 "TARGET_SSE && TARGET_64BIT"
c358a059 4364 "%vcvtss2si{q}\t{%1, %0|%0, %k1}"
2a466fea 4365 [(set_attr "type" "sseicvt")
4366 (set_attr "athlon_decode" "double,vector")
4367 (set_attr "amdfam10_decode" "double,double")
97436e92 4368 (set_attr "bdver1_decode" "double,double")
2a466fea 4369 (set_attr "prefix_rep" "1")
ed30e0a6 4370 (set_attr "prefix" "maybe_vex")
2a466fea 4371 (set_attr "mode" "DI")])
4372
dbfe84d5 4373(define_insn "sse_cvttss2si<round_saeonly_name>"
2a466fea 4374 [(set (match_operand:SI 0 "register_operand" "=r,r")
4375 (fix:SI
4376 (vec_select:SF
dbfe84d5 4377 (match_operand:V4SF 1 "<round_saeonly_nimm_predicate>" "v,<round_saeonly_constraint2>")
2a466fea 4378 (parallel [(const_int 0)]))))]
4379 "TARGET_SSE"
dbfe84d5 4380 "%vcvttss2si\t{<round_saeonly_op2>%1, %0|%0, %k1<round_saeonly_op2>}"
2a466fea 4381 [(set_attr "type" "sseicvt")
4382 (set_attr "athlon_decode" "double,vector")
4383 (set_attr "amdfam10_decode" "double,double")
97436e92 4384 (set_attr "bdver1_decode" "double,double")
2a466fea 4385 (set_attr "prefix_rep" "1")
ed30e0a6 4386 (set_attr "prefix" "maybe_vex")
2a466fea 4387 (set_attr "mode" "SI")])
4388
dbfe84d5 4389(define_insn "sse_cvttss2siq<round_saeonly_name>"
2a466fea 4390 [(set (match_operand:DI 0 "register_operand" "=r,r")
4391 (fix:DI
4392 (vec_select:SF
dbfe84d5 4393 (match_operand:V4SF 1 "<round_saeonly_nimm_predicate>" "v,<round_saeonly_constraint>")
2a466fea 4394 (parallel [(const_int 0)]))))]
4395 "TARGET_SSE && TARGET_64BIT"
dbfe84d5 4396 "%vcvttss2si{q}\t{<round_saeonly_op2>%1, %0|%0, %k1<round_saeonly_op2>}"
2a466fea 4397 [(set_attr "type" "sseicvt")
4398 (set_attr "athlon_decode" "double,vector")
4399 (set_attr "amdfam10_decode" "double,double")
97436e92 4400 (set_attr "bdver1_decode" "double,double")
2a466fea 4401 (set_attr "prefix_rep" "1")
ed30e0a6 4402 (set_attr "prefix" "maybe_vex")
2a466fea 4403 (set_attr "mode" "DI")])
4404
be60ab96 4405(define_insn "cvtusi2<ssescalarmodesuffix>32<round_name>"
697a43f8 4406 [(set (match_operand:VF_128 0 "register_operand" "=v")
4407 (vec_merge:VF_128
4408 (vec_duplicate:VF_128
4409 (unsigned_float:<ssescalarmode>
be60ab96 4410 (match_operand:SI 2 "<round_nimm_predicate>" "<round_constraint3>")))
697a43f8 4411 (match_operand:VF_128 1 "register_operand" "v")
4412 (const_int 1)))]
be60ab96 4413 "TARGET_AVX512F && <round_modev4sf_condition>"
88048095 4414 "vcvtusi2<ssescalarmodesuffix>\t{%2, <round_op3>%1, %0|%0, %1<round_op3>, %2}"
697a43f8 4415 [(set_attr "type" "sseicvt")
4416 (set_attr "prefix" "evex")
4417 (set_attr "mode" "<ssescalarmode>")])
4418
be60ab96 4419(define_insn "cvtusi2<ssescalarmodesuffix>64<round_name>"
697a43f8 4420 [(set (match_operand:VF_128 0 "register_operand" "=v")
4421 (vec_merge:VF_128
4422 (vec_duplicate:VF_128
4423 (unsigned_float:<ssescalarmode>
be60ab96 4424 (match_operand:DI 2 "<round_nimm_predicate>" "<round_constraint3>")))
697a43f8 4425 (match_operand:VF_128 1 "register_operand" "v")
4426 (const_int 1)))]
4427 "TARGET_AVX512F && TARGET_64BIT"
88048095 4428 "vcvtusi2<ssescalarmodesuffix>\t{%2, <round_op3>%1, %0|%0, %1<round_op3>, %2}"
697a43f8 4429 [(set_attr "type" "sseicvt")
4430 (set_attr "prefix" "evex")
4431 (set_attr "mode" "<ssescalarmode>")])
4432
be60ab96 4433(define_insn "float<sseintvecmodelower><mode>2<mask_name><round_name>"
e13e1b39 4434 [(set (match_operand:VF1 0 "register_operand" "=v")
d6b69370 4435 (float:VF1
be60ab96 4436 (match_operand:<sseintvecmode> 1 "<round_nimm_predicate>" "<round_constraint>")))]
4437 "TARGET_SSE2 && <mask_mode512bit_condition> && <round_mode512bit_condition>"
4438 "%vcvtdq2ps\t{<round_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_mask_op2>}"
2a466fea 4439 [(set_attr "type" "ssecvt")
887423c0 4440 (set_attr "prefix" "maybe_vex")
d6b69370 4441 (set_attr "mode" "<sseinsnmode>")])
2a466fea 4442
040236d9 4443(define_insn "ufloat<sseintvecmodelower><mode>2<mask_name><round_name>"
4444 [(set (match_operand:VF1_AVX512VL 0 "register_operand" "=v")
4445 (unsigned_float:VF1_AVX512VL
4446 (match_operand:<sseintvecmode> 1 "nonimmediate_operand" "<round_constraint>")))]
697a43f8 4447 "TARGET_AVX512F"
be60ab96 4448 "vcvtudq2ps\t{<round_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_mask_op2>}"
697a43f8 4449 [(set_attr "type" "ssecvt")
4450 (set_attr "prefix" "evex")
040236d9 4451 (set_attr "mode" "<MODE>")])
697a43f8 4452
d6b69370 4453(define_expand "floatuns<sseintvecmodelower><mode>2"
abd4f58b 4454 [(match_operand:VF1 0 "register_operand")
4455 (match_operand:<sseintvecmode> 1 "register_operand")]
e39ec8de 4456 "TARGET_SSE2 && (<MODE>mode == V4SFmode || TARGET_AVX2)"
e42ab82a 4457{
1706116d 4458 if (<MODE>mode == V16SFmode)
4459 emit_insn (gen_ufloatv16siv16sf2 (operands[0], operands[1]));
f06d92e4 4460 else
4461 if (TARGET_AVX512VL)
4462 {
4463 if (<MODE>mode == V4SFmode)
4464 emit_insn (gen_ufloatv4siv4sf2 (operands[0], operands[1]));
4465 else
4466 emit_insn (gen_ufloatv8siv8sf2 (operands[0], operands[1]));
4467 }
1706116d 4468 else
4469 ix86_expand_vector_convert_uns_vsivsf (operands[0], operands[1]);
4470
e39ec8de 4471 DONE;
e42ab82a 4472})
4473
ed30e0a6 4474
bf8e1ae3 4475;; For <sse2_avx_avx512f>_fix_notrunc<sf2simodelower><mode> insn pattern
4476(define_mode_attr sf2simodelower
4477 [(V16SI "v16sf") (V8SI "v8sf") (V4SI "v4sf")])
4478
d135c232 4479(define_insn "<sse2_avx_avx512f>_fix_notrunc<sf2simodelower><mode><mask_name>"
bf8e1ae3 4480 [(set (match_operand:VI4_AVX 0 "register_operand" "=v")
4481 (unspec:VI4_AVX
4482 [(match_operand:<ssePSmode> 1 "nonimmediate_operand" "vm")]
4483 UNSPEC_FIX_NOTRUNC))]
d135c232 4484 "TARGET_SSE2 && <mask_mode512bit_condition>"
4485 "%vcvtps2dq\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
2a466fea 4486 [(set_attr "type" "ssecvt")
887423c0 4487 (set (attr "prefix_data16")
4488 (if_then_else
6be3efec 4489 (match_test "TARGET_AVX")
887423c0 4490 (const_string "*")
4491 (const_string "1")))
4492 (set_attr "prefix" "maybe_vex")
bf8e1ae3 4493 (set_attr "mode" "<sseinsnmode>")])
2a466fea 4494
be60ab96 4495(define_insn "<mask_codefor>avx512f_fix_notruncv16sfv16si<mask_name><round_name>"
697a43f8 4496 [(set (match_operand:V16SI 0 "register_operand" "=v")
4497 (unspec:V16SI
be60ab96 4498 [(match_operand:V16SF 1 "<round_nimm_predicate>" "<round_constraint>")]
697a43f8 4499 UNSPEC_FIX_NOTRUNC))]
4500 "TARGET_AVX512F"
be60ab96 4501 "vcvtps2dq\t{<round_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_mask_op2>}"
697a43f8 4502 [(set_attr "type" "ssecvt")
4503 (set_attr "prefix" "evex")
4504 (set_attr "mode" "XI")])
4505
7da26bee 4506(define_insn "<mask_codefor><avx512>_ufix_notrunc<sf2simodelower><mode><mask_name><round_name>"
4507 [(set (match_operand:VI4_AVX512VL 0 "register_operand" "=v")
4508 (unspec:VI4_AVX512VL
4509 [(match_operand:<ssePSmode> 1 "nonimmediate_operand" "<round_constraint>")]
697a43f8 4510 UNSPEC_UNSIGNED_FIX_NOTRUNC))]
4511 "TARGET_AVX512F"
be60ab96 4512 "vcvtps2udq\t{<round_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_mask_op2>}"
697a43f8 4513 [(set_attr "type" "ssecvt")
4514 (set_attr "prefix" "evex")
7da26bee 4515 (set_attr "mode" "<sseinsnmode>")])
697a43f8 4516
9bb6f354 4517(define_insn "<mask_codefor>avx512dq_cvtps2qq<mode><mask_name><round_name>"
4518 [(set (match_operand:VI8_256_512 0 "register_operand" "=v")
4519 (unspec:VI8_256_512 [(match_operand:<ssePSmode2> 1 "nonimmediate_operand" "<round_constraint>")]
4520 UNSPEC_FIX_NOTRUNC))]
4521 "TARGET_AVX512DQ && <round_mode512bit_condition>"
4522 "vcvtps2qq\t{<round_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_mask_op2>}"
4523 [(set_attr "type" "ssecvt")
4524 (set_attr "prefix" "evex")
4525 (set_attr "mode" "<sseinsnmode>")])
4526
4527(define_insn "<mask_codefor>avx512dq_cvtps2qqv2di<mask_name>"
4528 [(set (match_operand:V2DI 0 "register_operand" "=v")
4529 (unspec:V2DI
4530 [(vec_select:V2SF
4531 (match_operand:V4SF 1 "nonimmediate_operand" "vm")
4532 (parallel [(const_int 0) (const_int 1)]))]
4533 UNSPEC_FIX_NOTRUNC))]
4534 "TARGET_AVX512DQ && TARGET_AVX512VL"
4535 "vcvtps2qq\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
4536 [(set_attr "type" "ssecvt")
4537 (set_attr "prefix" "evex")
4538 (set_attr "mode" "TI")])
4539
4540(define_insn "<mask_codefor>avx512dq_cvtps2uqq<mode><mask_name><round_name>"
4541 [(set (match_operand:VI8_256_512 0 "register_operand" "=v")
4542 (unspec:VI8_256_512 [(match_operand:<ssePSmode2> 1 "nonimmediate_operand" "<round_constraint>")]
4543 UNSPEC_UNSIGNED_FIX_NOTRUNC))]
4544 "TARGET_AVX512DQ && <round_mode512bit_condition>"
4545 "vcvtps2uqq\t{<round_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_mask_op2>}"
4546 [(set_attr "type" "ssecvt")
4547 (set_attr "prefix" "evex")
4548 (set_attr "mode" "<sseinsnmode>")])
4549
4550(define_insn "<mask_codefor>avx512dq_cvtps2uqqv2di<mask_name>"
4551 [(set (match_operand:V2DI 0 "register_operand" "=v")
4552 (unspec:V2DI
4553 [(vec_select:V2SF
4554 (match_operand:V4SF 1 "nonimmediate_operand" "vm")
4555 (parallel [(const_int 0) (const_int 1)]))]
4556 UNSPEC_UNSIGNED_FIX_NOTRUNC))]
4557 "TARGET_AVX512DQ && TARGET_AVX512VL"
4558 "vcvtps2uqq\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
4559 [(set_attr "type" "ssecvt")
4560 (set_attr "prefix" "evex")
4561 (set_attr "mode" "TI")])
4562
dbfe84d5 4563(define_insn "<fixsuffix>fix_truncv16sfv16si2<mask_name><round_saeonly_name>"
03ae25dc 4564 [(set (match_operand:V16SI 0 "register_operand" "=v")
4565 (any_fix:V16SI
dbfe84d5 4566 (match_operand:V16SF 1 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")))]
03ae25dc 4567 "TARGET_AVX512F"
dbfe84d5 4568 "vcvttps2<fixsuffix>dq\t{<round_saeonly_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_saeonly_mask_op2>}"
03ae25dc 4569 [(set_attr "type" "ssecvt")
4570 (set_attr "prefix" "evex")
4571 (set_attr "mode" "XI")])
4572
d135c232 4573(define_insn "fix_truncv8sfv8si2<mask_name>"
4574 [(set (match_operand:V8SI 0 "register_operand" "=v")
4575 (fix:V8SI (match_operand:V8SF 1 "nonimmediate_operand" "vm")))]
4576 "TARGET_AVX && <mask_avx512vl_condition>"
4577 "vcvttps2dq\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
ed30e0a6 4578 [(set_attr "type" "ssecvt")
d135c232 4579 (set_attr "prefix" "<mask_prefix>")
887423c0 4580 (set_attr "mode" "OI")])
ed30e0a6 4581
d135c232 4582(define_insn "fix_truncv4sfv4si2<mask_name>"
4583 [(set (match_operand:V4SI 0 "register_operand" "=v")
4584 (fix:V4SI (match_operand:V4SF 1 "nonimmediate_operand" "vm")))]
4585 "TARGET_SSE2 && <mask_avx512vl_condition>"
4586 "%vcvttps2dq\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
2a466fea 4587 [(set_attr "type" "ssecvt")
887423c0 4588 (set (attr "prefix_rep")
4589 (if_then_else
6be3efec 4590 (match_test "TARGET_AVX")
887423c0 4591 (const_string "*")
4592 (const_string "1")))
4593 (set (attr "prefix_data16")
4594 (if_then_else
6be3efec 4595 (match_test "TARGET_AVX")
887423c0 4596 (const_string "*")
4597 (const_string "0")))
00a0e418 4598 (set_attr "prefix_data16" "0")
d135c232 4599 (set_attr "prefix" "<mask_prefix2>")
2a466fea 4600 (set_attr "mode" "TI")])
4601
f155d038 4602(define_expand "fixuns_trunc<mode><sseintvecmodelower>2"
abd4f58b 4603 [(match_operand:<sseintvecmode> 0 "register_operand")
4604 (match_operand:VF1 1 "register_operand")]
da38df18 4605 "TARGET_SSE2"
f155d038 4606{
1706116d 4607 if (<MODE>mode == V16SFmode)
4608 emit_insn (gen_ufix_truncv16sfv16si2 (operands[0],
4609 operands[1]));
4610 else
4611 {
4612 rtx tmp[3];
4613 tmp[0] = ix86_expand_adjust_ufix_to_sfix_si (operands[1], &tmp[2]);
4614 tmp[1] = gen_reg_rtx (<sseintvecmode>mode);
4615 emit_insn (gen_fix_trunc<mode><sseintvecmodelower>2 (tmp[1], tmp[0]));
4616 emit_insn (gen_xor<sseintvecmodelower>3 (operands[0], tmp[1], tmp[2]));
4617 }
c152f9e5 4618 DONE;
f155d038 4619})
4620
2a466fea 4621;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
4622;;
4623;; Parallel double-precision floating point conversion operations
5802c0cb 4624;;
4625;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
4626
2a466fea 4627(define_insn "sse2_cvtpi2pd"
4628 [(set (match_operand:V2DF 0 "register_operand" "=x,x")
4629 (float:V2DF (match_operand:V2SI 1 "nonimmediate_operand" "y,m")))]
5802c0cb 4630 "TARGET_SSE2"
2a466fea 4631 "cvtpi2pd\t{%1, %0|%0, %1}"
4632 [(set_attr "type" "ssecvt")
4633 (set_attr "unit" "mmx,*")
00a0e418 4634 (set_attr "prefix_data16" "1,*")
2a466fea 4635 (set_attr "mode" "V2DF")])
5802c0cb 4636
2a466fea 4637(define_insn "sse2_cvtpd2pi"
4638 [(set (match_operand:V2SI 0 "register_operand" "=y")
4639 (unspec:V2SI [(match_operand:V2DF 1 "nonimmediate_operand" "xm")]
4640 UNSPEC_FIX_NOTRUNC))]
5802c0cb 4641 "TARGET_SSE2"
2a466fea 4642 "cvtpd2pi\t{%1, %0|%0, %1}"
4643 [(set_attr "type" "ssecvt")
4644 (set_attr "unit" "mmx")
887423c0 4645 (set_attr "bdver1_decode" "double")
6470d004 4646 (set_attr "btver2_decode" "direct")
2a466fea 4647 (set_attr "prefix_data16" "1")
887423c0 4648 (set_attr "mode" "DI")])
5802c0cb 4649
2a466fea 4650(define_insn "sse2_cvttpd2pi"
4651 [(set (match_operand:V2SI 0 "register_operand" "=y")
4652 (fix:V2SI (match_operand:V2DF 1 "nonimmediate_operand" "xm")))]
5802c0cb 4653 "TARGET_SSE2"
2a466fea 4654 "cvttpd2pi\t{%1, %0|%0, %1}"
4655 [(set_attr "type" "ssecvt")
4656 (set_attr "unit" "mmx")
887423c0 4657 (set_attr "bdver1_decode" "double")
2a466fea 4658 (set_attr "prefix_data16" "1")
887423c0 4659 (set_attr "mode" "TI")])
ed30e0a6 4660
2a466fea 4661(define_insn "sse2_cvtsi2sd"
3cd66da2 4662 [(set (match_operand:V2DF 0 "register_operand" "=x,x,v")
5802c0cb 4663 (vec_merge:V2DF
2a466fea 4664 (vec_duplicate:V2DF
887423c0 4665 (float:DF (match_operand:SI 2 "nonimmediate_operand" "r,m,rm")))
3cd66da2 4666 (match_operand:V2DF 1 "register_operand" "0,0,v")
5802c0cb 4667 (const_int 1)))]
5802c0cb 4668 "TARGET_SSE2"
887423c0 4669 "@
4670 cvtsi2sd\t{%2, %0|%0, %2}
4671 cvtsi2sd\t{%2, %0|%0, %2}
4672 vcvtsi2sd\t{%2, %1, %0|%0, %1, %2}"
4673 [(set_attr "isa" "noavx,noavx,avx")
4674 (set_attr "type" "sseicvt")
4675 (set_attr "athlon_decode" "double,direct,*")
4676 (set_attr "amdfam10_decode" "vector,double,*")
4677 (set_attr "bdver1_decode" "double,direct,*")
6470d004 4678 (set_attr "btver2_decode" "double,double,double")
4c9faaa4 4679 (set_attr "znver1_decode" "double,double,double")
3cd66da2 4680 (set_attr "prefix" "orig,orig,maybe_evex")
ed30e0a6 4681 (set_attr "mode" "DF")])
4682
be60ab96 4683(define_insn "sse2_cvtsi2sdq<round_name>"
4c1099de 4684 [(set (match_operand:V2DF 0 "register_operand" "=x,x,v")
5802c0cb 4685 (vec_merge:V2DF
2a466fea 4686 (vec_duplicate:V2DF
be60ab96 4687 (float:DF (match_operand:DI 2 "<round_nimm_predicate>" "r,m,<round_constraint3>")))
4c1099de 4688 (match_operand:V2DF 1 "register_operand" "0,0,v")
5802c0cb 4689 (const_int 1)))]
2a466fea 4690 "TARGET_SSE2 && TARGET_64BIT"
887423c0 4691 "@
4692 cvtsi2sdq\t{%2, %0|%0, %2}
4693 cvtsi2sdq\t{%2, %0|%0, %2}
88048095 4694 vcvtsi2sdq\t{%2, <round_op3>%1, %0|%0, %1<round_op3>, %2}"
887423c0 4695 [(set_attr "isa" "noavx,noavx,avx")
4696 (set_attr "type" "sseicvt")
4697 (set_attr "athlon_decode" "double,direct,*")
4698 (set_attr "amdfam10_decode" "vector,double,*")
4699 (set_attr "bdver1_decode" "double,direct,*")
4700 (set_attr "length_vex" "*,*,4")
4701 (set_attr "prefix_rex" "1,1,*")
4c1099de 4702 (set_attr "prefix" "orig,orig,maybe_evex")
887423c0 4703 (set_attr "mode" "DF")])
2a466fea 4704
be60ab96 4705(define_insn "avx512f_vcvtss2usi<round_name>"
697a43f8 4706 [(set (match_operand:SI 0 "register_operand" "=r")
4707 (unspec:SI
4708 [(vec_select:SF
be60ab96 4709 (match_operand:V4SF 1 "<round_nimm_predicate>" "<round_constraint>")
697a43f8 4710 (parallel [(const_int 0)]))]
4711 UNSPEC_UNSIGNED_FIX_NOTRUNC))]
4712 "TARGET_AVX512F"
be60ab96 4713 "vcvtss2usi\t{<round_op2>%1, %0|%0, %1<round_op2>}"
697a43f8 4714 [(set_attr "type" "sseicvt")
4715 (set_attr "prefix" "evex")
4716 (set_attr "mode" "SI")])
4717
be60ab96 4718(define_insn "avx512f_vcvtss2usiq<round_name>"
697a43f8 4719 [(set (match_operand:DI 0 "register_operand" "=r")
4720 (unspec:DI
4721 [(vec_select:SF
be60ab96 4722 (match_operand:V4SF 1 "<round_nimm_predicate>" "<round_constraint>")
697a43f8 4723 (parallel [(const_int 0)]))]
4724 UNSPEC_UNSIGNED_FIX_NOTRUNC))]
4725 "TARGET_AVX512F && TARGET_64BIT"
be60ab96 4726 "vcvtss2usi\t{<round_op2>%1, %0|%0, %1<round_op2>}"
697a43f8 4727 [(set_attr "type" "sseicvt")
4728 (set_attr "prefix" "evex")
4729 (set_attr "mode" "DI")])
4730
dbfe84d5 4731(define_insn "avx512f_vcvttss2usi<round_saeonly_name>"
697a43f8 4732 [(set (match_operand:SI 0 "register_operand" "=r")
4733 (unsigned_fix:SI
4734 (vec_select:SF
dbfe84d5 4735 (match_operand:V4SF 1 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")
697a43f8 4736 (parallel [(const_int 0)]))))]
4737 "TARGET_AVX512F"
dbfe84d5 4738 "vcvttss2usi\t{<round_saeonly_op2>%1, %0|%0, %1<round_saeonly_op2>}"
697a43f8 4739 [(set_attr "type" "sseicvt")
4740 (set_attr "prefix" "evex")
4741 (set_attr "mode" "SI")])
4742
dbfe84d5 4743(define_insn "avx512f_vcvttss2usiq<round_saeonly_name>"
697a43f8 4744 [(set (match_operand:DI 0 "register_operand" "=r")
4745 (unsigned_fix:DI
4746 (vec_select:SF
dbfe84d5 4747 (match_operand:V4SF 1 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")
697a43f8 4748 (parallel [(const_int 0)]))))]
4749 "TARGET_AVX512F && TARGET_64BIT"
dbfe84d5 4750 "vcvttss2usi\t{<round_saeonly_op2>%1, %0|%0, %1<round_saeonly_op2>}"
697a43f8 4751 [(set_attr "type" "sseicvt")
4752 (set_attr "prefix" "evex")
4753 (set_attr "mode" "DI")])
4754
be60ab96 4755(define_insn "avx512f_vcvtsd2usi<round_name>"
697a43f8 4756 [(set (match_operand:SI 0 "register_operand" "=r")
4757 (unspec:SI
4758 [(vec_select:DF
be60ab96 4759 (match_operand:V2DF 1 "<round_nimm_predicate>" "<round_constraint>")
697a43f8 4760 (parallel [(const_int 0)]))]
4761 UNSPEC_UNSIGNED_FIX_NOTRUNC))]
4762 "TARGET_AVX512F"
be60ab96 4763 "vcvtsd2usi\t{<round_op2>%1, %0|%0, %1<round_op2>}"
697a43f8 4764 [(set_attr "type" "sseicvt")
4765 (set_attr "prefix" "evex")
4766 (set_attr "mode" "SI")])
4767
be60ab96 4768(define_insn "avx512f_vcvtsd2usiq<round_name>"
697a43f8 4769 [(set (match_operand:DI 0 "register_operand" "=r")
4770 (unspec:DI
4771 [(vec_select:DF
be60ab96 4772 (match_operand:V2DF 1 "<round_nimm_predicate>" "<round_constraint>")
697a43f8 4773 (parallel [(const_int 0)]))]
4774 UNSPEC_UNSIGNED_FIX_NOTRUNC))]
4775 "TARGET_AVX512F && TARGET_64BIT"
be60ab96 4776 "vcvtsd2usi\t{<round_op2>%1, %0|%0, %1<round_op2>}"
697a43f8 4777 [(set_attr "type" "sseicvt")
4778 (set_attr "prefix" "evex")
4779 (set_attr "mode" "DI")])
4780
dbfe84d5 4781(define_insn "avx512f_vcvttsd2usi<round_saeonly_name>"
697a43f8 4782 [(set (match_operand:SI 0 "register_operand" "=r")
4783 (unsigned_fix:SI
4784 (vec_select:DF
dbfe84d5 4785 (match_operand:V2DF 1 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")
697a43f8 4786 (parallel [(const_int 0)]))))]
4787 "TARGET_AVX512F"
dbfe84d5 4788 "vcvttsd2usi\t{<round_saeonly_op2>%1, %0|%0, %1<round_saeonly_op2>}"
697a43f8 4789 [(set_attr "type" "sseicvt")
4790 (set_attr "prefix" "evex")
4791 (set_attr "mode" "SI")])
4792
dbfe84d5 4793(define_insn "avx512f_vcvttsd2usiq<round_saeonly_name>"
697a43f8 4794 [(set (match_operand:DI 0 "register_operand" "=r")
4795 (unsigned_fix:DI
4796 (vec_select:DF
dbfe84d5 4797 (match_operand:V2DF 1 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")
697a43f8 4798 (parallel [(const_int 0)]))))]
4799 "TARGET_AVX512F && TARGET_64BIT"
dbfe84d5 4800 "vcvttsd2usi\t{<round_saeonly_op2>%1, %0|%0, %1<round_saeonly_op2>}"
697a43f8 4801 [(set_attr "type" "sseicvt")
4802 (set_attr "prefix" "evex")
4803 (set_attr "mode" "DI")])
4804
be60ab96 4805(define_insn "sse2_cvtsd2si<round_name>"
2a466fea 4806 [(set (match_operand:SI 0 "register_operand" "=r,r")
4807 (unspec:SI
4808 [(vec_select:DF
be60ab96 4809 (match_operand:V2DF 1 "<round_nimm_predicate>" "v,<round_constraint2>")
2a466fea 4810 (parallel [(const_int 0)]))]
4811 UNSPEC_FIX_NOTRUNC))]
5802c0cb 4812 "TARGET_SSE2"
be60ab96 4813 "%vcvtsd2si\t{<round_op2>%1, %0|%0, %q1<round_op2>}"
2a466fea 4814 [(set_attr "type" "sseicvt")
4815 (set_attr "athlon_decode" "double,vector")
97436e92 4816 (set_attr "bdver1_decode" "double,double")
6470d004 4817 (set_attr "btver2_decode" "double,double")
2a466fea 4818 (set_attr "prefix_rep" "1")
ed30e0a6 4819 (set_attr "prefix" "maybe_vex")
2a466fea 4820 (set_attr "mode" "SI")])
5802c0cb 4821
2a466fea 4822(define_insn "sse2_cvtsd2si_2"
4823 [(set (match_operand:SI 0 "register_operand" "=r,r")
e13e1b39 4824 (unspec:SI [(match_operand:DF 1 "nonimmediate_operand" "v,m")]
2a466fea 4825 UNSPEC_FIX_NOTRUNC))]
5802c0cb 4826 "TARGET_SSE2"
c358a059 4827 "%vcvtsd2si\t{%1, %0|%0, %q1}"
2a466fea 4828 [(set_attr "type" "sseicvt")
4829 (set_attr "athlon_decode" "double,vector")
4830 (set_attr "amdfam10_decode" "double,double")
97436e92 4831 (set_attr "bdver1_decode" "double,double")
2a466fea 4832 (set_attr "prefix_rep" "1")
ed30e0a6 4833 (set_attr "prefix" "maybe_vex")
2a466fea 4834 (set_attr "mode" "SI")])
5802c0cb 4835
be60ab96 4836(define_insn "sse2_cvtsd2siq<round_name>"
2a466fea 4837 [(set (match_operand:DI 0 "register_operand" "=r,r")
4838 (unspec:DI
4839 [(vec_select:DF
be60ab96 4840 (match_operand:V2DF 1 "<round_nimm_predicate>" "v,<round_constraint2>")
2a466fea 4841 (parallel [(const_int 0)]))]
4842 UNSPEC_FIX_NOTRUNC))]
4843 "TARGET_SSE2 && TARGET_64BIT"
be60ab96 4844 "%vcvtsd2si{q}\t{<round_op2>%1, %0|%0, %q1<round_op2>}"
2a466fea 4845 [(set_attr "type" "sseicvt")
4846 (set_attr "athlon_decode" "double,vector")
97436e92 4847 (set_attr "bdver1_decode" "double,double")
2a466fea 4848 (set_attr "prefix_rep" "1")
ed30e0a6 4849 (set_attr "prefix" "maybe_vex")
2a466fea 4850 (set_attr "mode" "DI")])
5802c0cb 4851
2a466fea 4852(define_insn "sse2_cvtsd2siq_2"
4853 [(set (match_operand:DI 0 "register_operand" "=r,r")
e13e1b39 4854 (unspec:DI [(match_operand:DF 1 "nonimmediate_operand" "v,m")]
2a466fea 4855 UNSPEC_FIX_NOTRUNC))]
4856 "TARGET_SSE2 && TARGET_64BIT"
c358a059 4857 "%vcvtsd2si{q}\t{%1, %0|%0, %q1}"
2a466fea 4858 [(set_attr "type" "sseicvt")
4859 (set_attr "athlon_decode" "double,vector")
4860 (set_attr "amdfam10_decode" "double,double")
97436e92 4861 (set_attr "bdver1_decode" "double,double")
2a466fea 4862 (set_attr "prefix_rep" "1")
ed30e0a6 4863 (set_attr "prefix" "maybe_vex")
2a466fea 4864 (set_attr "mode" "DI")])
5802c0cb 4865
dbfe84d5 4866(define_insn "sse2_cvttsd2si<round_saeonly_name>"
2a466fea 4867 [(set (match_operand:SI 0 "register_operand" "=r,r")
4868 (fix:SI
4869 (vec_select:DF
dbfe84d5 4870 (match_operand:V2DF 1 "<round_saeonly_nimm_predicate>" "v,<round_saeonly_constraint2>")
2a466fea 4871 (parallel [(const_int 0)]))))]
5802c0cb 4872 "TARGET_SSE2"
dbfe84d5 4873 "%vcvttsd2si\t{<round_saeonly_op2>%1, %0|%0, %q1<round_saeonly_op2>}"
2a466fea 4874 [(set_attr "type" "sseicvt")
2a466fea 4875 (set_attr "athlon_decode" "double,vector")
97436e92 4876 (set_attr "amdfam10_decode" "double,double")
887423c0 4877 (set_attr "bdver1_decode" "double,double")
6470d004 4878 (set_attr "btver2_decode" "double,double")
887423c0 4879 (set_attr "prefix_rep" "1")
4880 (set_attr "prefix" "maybe_vex")
4881 (set_attr "mode" "SI")])
2a466fea 4882
dbfe84d5 4883(define_insn "sse2_cvttsd2siq<round_saeonly_name>"
2a466fea 4884 [(set (match_operand:DI 0 "register_operand" "=r,r")
4885 (fix:DI
4886 (vec_select:DF
dbfe84d5 4887 (match_operand:V2DF 1 "<round_saeonly_nimm_predicate>" "v,<round_saeonly_constraint2>")
2a466fea 4888 (parallel [(const_int 0)]))))]
4889 "TARGET_SSE2 && TARGET_64BIT"
dbfe84d5 4890 "%vcvttsd2si{q}\t{<round_saeonly_op2>%1, %0|%0, %q1<round_saeonly_op2>}"
2a466fea 4891 [(set_attr "type" "sseicvt")
2a466fea 4892 (set_attr "athlon_decode" "double,vector")
97436e92 4893 (set_attr "amdfam10_decode" "double,double")
887423c0 4894 (set_attr "bdver1_decode" "double,double")
4895 (set_attr "prefix_rep" "1")
4896 (set_attr "prefix" "maybe_vex")
4897 (set_attr "mode" "DI")])
5802c0cb 4898
6615b722 4899;; For float<si2dfmode><mode>2 insn pattern
4900(define_mode_attr si2dfmode
4901 [(V8DF "V8SI") (V4DF "V4SI")])
4902(define_mode_attr si2dfmodelower
4903 [(V8DF "v8si") (V4DF "v4si")])
4904
5220cab6 4905(define_insn "float<si2dfmodelower><mode>2<mask_name>"
6615b722 4906 [(set (match_operand:VF2_512_256 0 "register_operand" "=v")
4907 (float:VF2_512_256 (match_operand:<si2dfmode> 1 "nonimmediate_operand" "vm")))]
5220cab6 4908 "TARGET_AVX && <mask_mode512bit_condition>"
4909 "vcvtdq2pd\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
ed30e0a6 4910 [(set_attr "type" "ssecvt")
6615b722 4911 (set_attr "prefix" "maybe_vex")
4912 (set_attr "mode" "<MODE>")])
ed30e0a6 4913
d3d65e42 4914(define_insn "<floatsuffix>float<sseintvecmodelower><mode>2<mask_name><round_name>"
4915 [(set (match_operand:VF2_AVX512VL 0 "register_operand" "=v")
4916 (any_float:VF2_AVX512VL
4917 (match_operand:<sseintvecmode> 1 "nonimmediate_operand" "vm")))]
4918 "TARGET_AVX512DQ"
4919 "vcvt<floatsuffix>qq2pd\t{<round_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_mask_op2>}"
4920 [(set_attr "type" "ssecvt")
4921 (set_attr "prefix" "evex")
4922 (set_attr "mode" "<MODE>")])
4923
4924;; For <floatsuffix>float<sselondveclower><mode> insn patterns
4925(define_mode_attr qq2pssuff
4926 [(V8SF "") (V4SF "{y}")])
4927
4928(define_mode_attr sselongvecmode
4929 [(V8SF "V8DI") (V4SF "V4DI")])
4930
4931(define_mode_attr sselongvecmodelower
4932 [(V8SF "v8di") (V4SF "v4di")])
4933
4934(define_mode_attr sseintvecmode3
4935 [(V8SF "XI") (V4SF "OI")
4936 (V8DF "OI") (V4DF "TI")])
4937
4938(define_insn "<floatsuffix>float<sselongvecmodelower><mode>2<mask_name><round_name>"
4939 [(set (match_operand:VF1_128_256VL 0 "register_operand" "=v")
4940 (any_float:VF1_128_256VL
4941 (match_operand:<sselongvecmode> 1 "nonimmediate_operand" "<round_constraint>")))]
4942 "TARGET_AVX512DQ && <round_modev8sf_condition>"
4943 "vcvt<floatsuffix>qq2ps<qq2pssuff>\t{<round_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_mask_op2>}"
4944 [(set_attr "type" "ssecvt")
4945 (set_attr "prefix" "evex")
4946 (set_attr "mode" "<MODE>")])
4947
4948(define_insn "*<floatsuffix>floatv2div2sf2"
4949 [(set (match_operand:V4SF 0 "register_operand" "=v")
4950 (vec_concat:V4SF
4951 (any_float:V2SF (match_operand:V2DI 1 "nonimmediate_operand" "vm"))
4952 (const_vector:V2SF [(const_int 0) (const_int 0)])))]
4953 "TARGET_AVX512DQ && TARGET_AVX512VL"
4954 "vcvt<floatsuffix>qq2ps{x}\t{%1, %0|%0, %1}"
4955 [(set_attr "type" "ssecvt")
4956 (set_attr "prefix" "evex")
4957 (set_attr "mode" "V4SF")])
4958
4959(define_insn "<floatsuffix>floatv2div2sf2_mask"
4960 [(set (match_operand:V4SF 0 "register_operand" "=v")
4961 (vec_concat:V4SF
4962 (vec_merge:V2SF
4963 (any_float:V2SF (match_operand:V2DI 1 "nonimmediate_operand" "vm"))
4964 (vec_select:V2SF
4965 (match_operand:V4SF 2 "vector_move_operand" "0C")
4966 (parallel [(const_int 0) (const_int 1)]))
4967 (match_operand:QI 3 "register_operand" "Yk"))
4968 (const_vector:V2SF [(const_int 0) (const_int 0)])))]
4969 "TARGET_AVX512DQ && TARGET_AVX512VL"
4970 "vcvt<floatsuffix>qq2ps{x}\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}"
4971 [(set_attr "type" "ssecvt")
4972 (set_attr "prefix" "evex")
4973 (set_attr "mode" "V4SF")])
4974
4975(define_insn "ufloat<si2dfmodelower><mode>2<mask_name>"
4976 [(set (match_operand:VF2_512_256VL 0 "register_operand" "=v")
4977 (unsigned_float:VF2_512_256VL
4978 (match_operand:<si2dfmode> 1 "nonimmediate_operand" "vm")))]
4979 "TARGET_AVX512F"
4980 "vcvtudq2pd\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
4981 [(set_attr "type" "ssecvt")
4982 (set_attr "prefix" "evex")
4983 (set_attr "mode" "<MODE>")])
4984
4985(define_insn "ufloatv2siv2df2<mask_name>"
4986 [(set (match_operand:V2DF 0 "register_operand" "=v")
4987 (unsigned_float:V2DF
4988 (vec_select:V2SI
4989 (match_operand:V4SI 1 "nonimmediate_operand" "vm")
4990 (parallel [(const_int 0) (const_int 1)]))))]
4991 "TARGET_AVX512VL"
5220cab6 4992 "vcvtudq2pd\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
697a43f8 4993 [(set_attr "type" "ssecvt")
4994 (set_attr "prefix" "evex")
d3d65e42 4995 (set_attr "mode" "V2DF")])
697a43f8 4996
4997(define_insn "avx512f_cvtdq2pd512_2"
4998 [(set (match_operand:V8DF 0 "register_operand" "=v")
4999 (float:V8DF
5000 (vec_select:V8SI
5001 (match_operand:V16SI 1 "nonimmediate_operand" "vm")
5002 (parallel [(const_int 0) (const_int 1)
5003 (const_int 2) (const_int 3)
5004 (const_int 4) (const_int 5)
5005 (const_int 6) (const_int 7)]))))]
0c4232b3 5006 "TARGET_AVX512F"
697a43f8 5007 "vcvtdq2pd\t{%t1, %0|%0, %t1}"
5008 [(set_attr "type" "ssecvt")
5009 (set_attr "prefix" "evex")
5010 (set_attr "mode" "V8DF")])
5011
ded0808e 5012(define_insn "avx_cvtdq2pd256_2"
0c4232b3 5013 [(set (match_operand:V4DF 0 "register_operand" "=v")
8cedf886 5014 (float:V4DF
5015 (vec_select:V4SI
0c4232b3 5016 (match_operand:V8SI 1 "nonimmediate_operand" "vm")
887423c0 5017 (parallel [(const_int 0) (const_int 1)
5018 (const_int 2) (const_int 3)]))))]
8cedf886 5019 "TARGET_AVX"
5020 "vcvtdq2pd\t{%x1, %0|%0, %x1}"
5021 [(set_attr "type" "ssecvt")
0c4232b3 5022 (set_attr "prefix" "maybe_evex")
8cedf886 5023 (set_attr "mode" "V4DF")])
5024
0c4232b3 5025(define_insn "sse2_cvtdq2pd<mask_name>"
5026 [(set (match_operand:V2DF 0 "register_operand" "=v")
2a466fea 5027 (float:V2DF
5028 (vec_select:V2SI
0c4232b3 5029 (match_operand:V4SI 1 "nonimmediate_operand" "vm")
2a466fea 5030 (parallel [(const_int 0) (const_int 1)]))))]
0c4232b3 5031 "TARGET_SSE2 && <mask_avx512vl_condition>"
5032 "%vcvtdq2pd\t{%1, %0<mask_operand2>|%0<mask_operand2>, %q1}"
2a466fea 5033 [(set_attr "type" "ssecvt")
ed30e0a6 5034 (set_attr "prefix" "maybe_vex")
8c1dfa94 5035 (set_attr "ssememalign" "64")
5802c0cb 5036 (set_attr "mode" "V2DF")])
5037
be60ab96 5038(define_insn "<mask_codefor>avx512f_cvtpd2dq512<mask_name><round_name>"
697a43f8 5039 [(set (match_operand:V8SI 0 "register_operand" "=v")
5220cab6 5040 (unspec:V8SI
be60ab96 5041 [(match_operand:V8DF 1 "<round_nimm_predicate>" "<round_constraint>")]
5220cab6 5042 UNSPEC_FIX_NOTRUNC))]
697a43f8 5043 "TARGET_AVX512F"
be60ab96 5044 "vcvtpd2dq\t{<round_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_mask_op2>}"
697a43f8 5045 [(set_attr "type" "ssecvt")
5046 (set_attr "prefix" "evex")
5047 (set_attr "mode" "OI")])
5048
0c4232b3 5049(define_insn "avx_cvtpd2dq256<mask_name>"
5050 [(set (match_operand:V4SI 0 "register_operand" "=v")
5051 (unspec:V4SI [(match_operand:V4DF 1 "nonimmediate_operand" "vm")]
ed30e0a6 5052 UNSPEC_FIX_NOTRUNC))]
0c4232b3 5053 "TARGET_AVX && <mask_avx512vl_condition>"
5054 "vcvtpd2dq{y}\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
ed30e0a6 5055 [(set_attr "type" "ssecvt")
0c4232b3 5056 (set_attr "prefix" "<mask_prefix>")
ed30e0a6 5057 (set_attr "mode" "OI")])
5058
83c4576f 5059(define_expand "avx_cvtpd2dq256_2"
abd4f58b 5060 [(set (match_operand:V8SI 0 "register_operand")
83c4576f 5061 (vec_concat:V8SI
abd4f58b 5062 (unspec:V4SI [(match_operand:V4DF 1 "nonimmediate_operand")]
83c4576f 5063 UNSPEC_FIX_NOTRUNC)
5064 (match_dup 2)))]
5065 "TARGET_AVX"
5066 "operands[2] = CONST0_RTX (V4SImode);")
5067
5068(define_insn "*avx_cvtpd2dq256_2"
5069 [(set (match_operand:V8SI 0 "register_operand" "=x")
5070 (vec_concat:V8SI
5071 (unspec:V4SI [(match_operand:V4DF 1 "nonimmediate_operand" "xm")]
5072 UNSPEC_FIX_NOTRUNC)
abd4f58b 5073 (match_operand:V4SI 2 "const0_operand")))]
83c4576f 5074 "TARGET_AVX"
5075 "vcvtpd2dq{y}\t{%1, %x0|%x0, %1}"
5076 [(set_attr "type" "ssecvt")
5077 (set_attr "prefix" "vex")
6470d004 5078 (set_attr "btver2_decode" "vector")
83c4576f 5079 (set_attr "mode" "OI")])
5080
0c4232b3 5081(define_insn "sse2_cvtpd2dq<mask_name>"
5082 [(set (match_operand:V4SI 0 "register_operand" "=v")
2a466fea 5083 (vec_concat:V4SI
0c4232b3 5084 (unspec:V2SI [(match_operand:V2DF 1 "nonimmediate_operand" "vm")]
2a466fea 5085 UNSPEC_FIX_NOTRUNC)
0c4232b3 5086 (const_vector:V2SI [(const_int 0) (const_int 0)])))]
5087 "TARGET_SSE2 && <mask_avx512vl_condition>"
887423c0 5088{
5089 if (TARGET_AVX)
0c4232b3 5090 return "vcvtpd2dq{x}\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}";
887423c0 5091 else
5092 return "cvtpd2dq\t{%1, %0|%0, %1}";
5093}
2a466fea 5094 [(set_attr "type" "ssecvt")
5095 (set_attr "prefix_rep" "1")
00a0e418 5096 (set_attr "prefix_data16" "0")
ed30e0a6 5097 (set_attr "prefix" "maybe_vex")
2a466fea 5098 (set_attr "mode" "TI")
97436e92 5099 (set_attr "amdfam10_decode" "double")
68ff067d 5100 (set_attr "athlon_decode" "vector")
97436e92 5101 (set_attr "bdver1_decode" "double")])
5802c0cb 5102
0c4232b3 5103;; For ufix_notrunc* insn patterns
5104(define_mode_attr pd2udqsuff
5105 [(V8DF "") (V4DF "{y}")])
5106
5107(define_insn "ufix_notrunc<mode><si2dfmodelower>2<mask_name><round_name>"
5108 [(set (match_operand:<si2dfmode> 0 "register_operand" "=v")
5109 (unspec:<si2dfmode>
5110 [(match_operand:VF2_512_256VL 1 "nonimmediate_operand" "<round_constraint>")]
697a43f8 5111 UNSPEC_UNSIGNED_FIX_NOTRUNC))]
5112 "TARGET_AVX512F"
0c4232b3 5113 "vcvtpd2udq<pd2udqsuff>\t{<round_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_mask_op2>}"
697a43f8 5114 [(set_attr "type" "ssecvt")
5115 (set_attr "prefix" "evex")
0c4232b3 5116 (set_attr "mode" "<sseinsnmode>")])
5117
5118(define_insn "ufix_notruncv2dfv2si2<mask_name>"
5119 [(set (match_operand:V4SI 0 "register_operand" "=v")
5120 (vec_concat:V4SI
5121 (unspec:V2SI
5122 [(match_operand:V2DF 1 "nonimmediate_operand" "vm")]
5123 UNSPEC_UNSIGNED_FIX_NOTRUNC)
5124 (const_vector:V2SI [(const_int 0) (const_int 0)])))]
5125 "TARGET_AVX512VL"
5126 "vcvtpd2udq{x}\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
5127 [(set_attr "type" "ssecvt")
5128 (set_attr "prefix" "evex")
5129 (set_attr "mode" "TI")])
697a43f8 5130
dbfe84d5 5131(define_insn "<fixsuffix>fix_truncv8dfv8si2<mask_name><round_saeonly_name>"
6615b722 5132 [(set (match_operand:V8SI 0 "register_operand" "=v")
5220cab6 5133 (any_fix:V8SI
dbfe84d5 5134 (match_operand:V8DF 1 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")))]
6615b722 5135 "TARGET_AVX512F"
dbfe84d5 5136 "vcvttpd2<fixsuffix>dq\t{<round_saeonly_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_saeonly_mask_op2>}"
6615b722 5137 [(set_attr "type" "ssecvt")
5138 (set_attr "prefix" "evex")
5139 (set_attr "mode" "OI")])
5140
05ecc201 5141(define_insn "ufix_truncv2dfv2si2<mask_name>"
5142 [(set (match_operand:V4SI 0 "register_operand" "=v")
5143 (vec_concat:V4SI
5144 (unsigned_fix:V2SI (match_operand:V2DF 1 "nonimmediate_operand" "vm"))
5145 (const_vector:V2SI [(const_int 0) (const_int 0)])))]
5146 "TARGET_AVX512VL"
5147 "vcvttpd2udq{x}\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
ed30e0a6 5148 [(set_attr "type" "ssecvt")
05ecc201 5149 (set_attr "prefix" "evex")
5150 (set_attr "mode" "TI")])
5151
5152(define_insn "fix_truncv4dfv4si2<mask_name>"
5153 [(set (match_operand:V4SI 0 "register_operand" "=v")
5154 (fix:V4SI (match_operand:V4DF 1 "nonimmediate_operand" "vm")))]
5155 "TARGET_AVX || (TARGET_AVX512VL && TARGET_AVX512F)"
5156 "vcvttpd2dq{y}\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
5157 [(set_attr "type" "ssecvt")
5158 (set_attr "prefix" "maybe_evex")
5159 (set_attr "mode" "OI")])
5160
5161(define_insn "ufix_truncv4dfv4si2<mask_name>"
5162 [(set (match_operand:V4SI 0 "register_operand" "=v")
5163 (unsigned_fix:V4SI (match_operand:V4DF 1 "nonimmediate_operand" "vm")))]
5164 "TARGET_AVX512VL && TARGET_AVX512F"
5165 "vcvttpd2udq{y}\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
5166 [(set_attr "type" "ssecvt")
5167 (set_attr "prefix" "maybe_evex")
ed30e0a6 5168 (set_attr "mode" "OI")])
5169
05ecc201 5170(define_insn "<fixsuffix>fix_trunc<mode><sseintvecmodelower>2<mask_name><round_saeonly_name>"
5171 [(set (match_operand:<sseintvecmode> 0 "register_operand" "=v")
5172 (any_fix:<sseintvecmode>
5173 (match_operand:VF2_AVX512VL 1 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")))]
5174 "TARGET_AVX512DQ && <round_saeonly_mode512bit_condition>"
5175 "vcvttpd2<fixsuffix>qq\t{<round_saeonly_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_saeonly_mask_op2>}"
5176 [(set_attr "type" "ssecvt")
5177 (set_attr "prefix" "evex")
5178 (set_attr "mode" "<sseintvecmode2>")])
5179
5180(define_insn "fix_notrunc<mode><sseintvecmodelower>2<mask_name><round_name>"
5181 [(set (match_operand:<sseintvecmode> 0 "register_operand" "=v")
5182 (unspec:<sseintvecmode>
5183 [(match_operand:VF2_AVX512VL 1 "<round_nimm_predicate>" "<round_constraint>")]
5184 UNSPEC_FIX_NOTRUNC))]
5185 "TARGET_AVX512DQ && <round_mode512bit_condition>"
5186 "vcvtpd2qq\t{<round_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_mask_op2>}"
5187 [(set_attr "type" "ssecvt")
5188 (set_attr "prefix" "evex")
5189 (set_attr "mode" "<sseintvecmode2>")])
5190
5191(define_insn "ufix_notrunc<mode><sseintvecmodelower>2<mask_name><round_name>"
5192 [(set (match_operand:<sseintvecmode> 0 "register_operand" "=v")
5193 (unspec:<sseintvecmode>
5194 [(match_operand:VF2_AVX512VL 1 "nonimmediate_operand" "<round_constraint>")]
5195 UNSPEC_UNSIGNED_FIX_NOTRUNC))]
5196 "TARGET_AVX512DQ && <round_mode512bit_condition>"
5197 "vcvtpd2uqq\t{<round_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_mask_op2>}"
5198 [(set_attr "type" "ssecvt")
5199 (set_attr "prefix" "evex")
5200 (set_attr "mode" "<sseintvecmode2>")])
5201
0dc2f097 5202(define_insn "<fixsuffix>fix_trunc<mode><sselongvecmodelower>2<mask_name><round_saeonly_name>"
5203 [(set (match_operand:<sselongvecmode> 0 "register_operand" "=v")
5204 (any_fix:<sselongvecmode>
5205 (match_operand:VF1_128_256VL 1 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")))]
5206 "TARGET_AVX512DQ && <round_saeonly_modev8sf_condition>"
5207 "vcvttps2<fixsuffix>qq\t{<round_saeonly_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_saeonly_mask_op2>}"
5208 [(set_attr "type" "ssecvt")
5209 (set_attr "prefix" "evex")
5210 (set_attr "mode" "<sseintvecmode3>")])
5211
5212(define_insn "<fixsuffix>fix_truncv2sfv2di2<mask_name>"
5213 [(set (match_operand:V2DI 0 "register_operand" "=v")
5214 (any_fix:V2DI
5215 (vec_select:V2SF
5216 (match_operand:V4SF 1 "nonimmediate_operand" "vm")
5217 (parallel [(const_int 0) (const_int 1)]))))]
5218 "TARGET_AVX512DQ && TARGET_AVX512VL"
5219 "vcvttps2<fixsuffix>qq\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
5220 [(set_attr "type" "ssecvt")
5221 (set_attr "prefix" "evex")
5222 (set_attr "mode" "TI")])
5223
5224(define_insn "ufix_trunc<mode><sseintvecmodelower>2<mask_name>"
5225 [(set (match_operand:<sseintvecmode> 0 "register_operand" "=v")
5226 (unsigned_fix:<sseintvecmode>
5227 (match_operand:VF1_128_256VL 1 "nonimmediate_operand" "vm")))]
5228 "TARGET_AVX512VL"
5229 "vcvttps2udq\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
5230 [(set_attr "type" "ssecvt")
5231 (set_attr "prefix" "evex")
5232 (set_attr "mode" "<sseintvecmode2>")])
5233
83c4576f 5234(define_expand "avx_cvttpd2dq256_2"
abd4f58b 5235 [(set (match_operand:V8SI 0 "register_operand")
83c4576f 5236 (vec_concat:V8SI
abd4f58b 5237 (fix:V4SI (match_operand:V4DF 1 "nonimmediate_operand"))
83c4576f 5238 (match_dup 2)))]
5239 "TARGET_AVX"
5240 "operands[2] = CONST0_RTX (V4SImode);")
5241
0c4232b3 5242(define_insn "sse2_cvttpd2dq<mask_name>"
5243 [(set (match_operand:V4SI 0 "register_operand" "=v")
2a466fea 5244 (vec_concat:V4SI
0c4232b3 5245 (fix:V2SI (match_operand:V2DF 1 "nonimmediate_operand" "vm"))
5246 (const_vector:V2SI [(const_int 0) (const_int 0)])))]
5247 "TARGET_SSE2 && <mask_avx512vl_condition>"
887423c0 5248{
5249 if (TARGET_AVX)
0c4232b3 5250 return "vcvttpd2dq{x}\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}";
887423c0 5251 else
5252 return "cvttpd2dq\t{%1, %0|%0, %1}";
5253}
2a466fea 5254 [(set_attr "type" "ssecvt")
97436e92 5255 (set_attr "amdfam10_decode" "double")
68ff067d 5256 (set_attr "athlon_decode" "vector")
887423c0 5257 (set_attr "bdver1_decode" "double")
5258 (set_attr "prefix" "maybe_vex")
5259 (set_attr "mode" "TI")])
ed30e0a6 5260
0b7cc9c6 5261(define_insn "sse2_cvtsd2ss<round_name>"
e13e1b39 5262 [(set (match_operand:V4SF 0 "register_operand" "=x,x,v")
2a466fea 5263 (vec_merge:V4SF
5264 (vec_duplicate:V4SF
5265 (float_truncate:V2SF
0b7cc9c6 5266 (match_operand:V2DF 2 "nonimmediate_operand" "x,m,<round_constraint>")))
e13e1b39 5267 (match_operand:V4SF 1 "register_operand" "0,0,v")
2a466fea 5268 (const_int 1)))]
5269 "TARGET_SSE2"
887423c0 5270 "@
5271 cvtsd2ss\t{%2, %0|%0, %2}
c358a059 5272 cvtsd2ss\t{%2, %0|%0, %q2}
0b7cc9c6 5273 vcvtsd2ss\t{<round_op3>%2, %1, %0|%0, %1, %q2<round_op3>}"
887423c0 5274 [(set_attr "isa" "noavx,noavx,avx")
5275 (set_attr "type" "ssecvt")
5276 (set_attr "athlon_decode" "vector,double,*")
5277 (set_attr "amdfam10_decode" "vector,double,*")
5278 (set_attr "bdver1_decode" "direct,direct,*")
6470d004 5279 (set_attr "btver2_decode" "double,double,double")
0b7cc9c6 5280 (set_attr "prefix" "orig,orig,<round_prefix>")
2a466fea 5281 (set_attr "mode" "SF")])
79eddd43 5282
0b7cc9c6 5283(define_insn "sse2_cvtss2sd<round_saeonly_name>"
e13e1b39 5284 [(set (match_operand:V2DF 0 "register_operand" "=x,x,v")
79eddd43 5285 (vec_merge:V2DF
2a466fea 5286 (float_extend:V2DF
5287 (vec_select:V2SF
0dc2f097 5288 (match_operand:V4SF 2 "<round_saeonly_nimm_predicate>" "x,m,<round_saeonly_constraint>")
2a466fea 5289 (parallel [(const_int 0) (const_int 1)])))
e13e1b39 5290 (match_operand:V2DF 1 "register_operand" "0,0,v")
79eddd43 5291 (const_int 1)))]
5292 "TARGET_SSE2"
887423c0 5293 "@
5294 cvtss2sd\t{%2, %0|%0, %2}
c358a059 5295 cvtss2sd\t{%2, %0|%0, %k2}
0b7cc9c6 5296 vcvtss2sd\t{<round_saeonly_op3>%2, %1, %0|%0, %1, %k2<round_saeonly_op3>}"
887423c0 5297 [(set_attr "isa" "noavx,noavx,avx")
5298 (set_attr "type" "ssecvt")
5299 (set_attr "amdfam10_decode" "vector,double,*")
5300 (set_attr "athlon_decode" "direct,direct,*")
5301 (set_attr "bdver1_decode" "direct,direct,*")
6470d004 5302 (set_attr "btver2_decode" "double,double,double")
0b7cc9c6 5303 (set_attr "prefix" "orig,orig,<round_saeonly_prefix>")
5802c0cb 5304 (set_attr "mode" "DF")])
5305
be60ab96 5306(define_insn "<mask_codefor>avx512f_cvtpd2ps512<mask_name><round_name>"
697a43f8 5307 [(set (match_operand:V8SF 0 "register_operand" "=v")
5308 (float_truncate:V8SF
be60ab96 5309 (match_operand:V8DF 1 "<round_nimm_predicate>" "<round_constraint>")))]
697a43f8 5310 "TARGET_AVX512F"
be60ab96 5311 "vcvtpd2ps\t{<round_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_mask_op2>}"
697a43f8 5312 [(set_attr "type" "ssecvt")
5313 (set_attr "prefix" "evex")
5314 (set_attr "mode" "V8SF")])
5315
0dc2f097 5316(define_insn "avx_cvtpd2ps256<mask_name>"
5317 [(set (match_operand:V4SF 0 "register_operand" "=v")
ed30e0a6 5318 (float_truncate:V4SF
0dc2f097 5319 (match_operand:V4DF 1 "nonimmediate_operand" "vm")))]
5320 "TARGET_AVX && <mask_avx512vl_condition>"
5321 "vcvtpd2ps{y}\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
ed30e0a6 5322 [(set_attr "type" "ssecvt")
0dc2f097 5323 (set_attr "prefix" "maybe_evex")
6470d004 5324 (set_attr "btver2_decode" "vector")
ed30e0a6 5325 (set_attr "mode" "V4SF")])
5326
2a466fea 5327(define_expand "sse2_cvtpd2ps"
abd4f58b 5328 [(set (match_operand:V4SF 0 "register_operand")
2a466fea 5329 (vec_concat:V4SF
5330 (float_truncate:V2SF
abd4f58b 5331 (match_operand:V2DF 1 "nonimmediate_operand"))
2a466fea 5332 (match_dup 2)))]
5802c0cb 5333 "TARGET_SSE2"
2a466fea 5334 "operands[2] = CONST0_RTX (V2SFmode);")
5802c0cb 5335
0dc2f097 5336(define_expand "sse2_cvtpd2ps_mask"
5337 [(set (match_operand:V4SF 0 "register_operand")
5338 (vec_merge:V4SF
5339 (vec_concat:V4SF
5340 (float_truncate:V2SF
5341 (match_operand:V2DF 1 "nonimmediate_operand"))
5342 (match_dup 4))
5343 (match_operand:V4SF 2 "register_operand")
5344 (match_operand:QI 3 "register_operand")))]
5345 "TARGET_SSE2"
5346 "operands[4] = CONST0_RTX (V2SFmode);")
5347
5348(define_insn "*sse2_cvtpd2ps<mask_name>"
5349 [(set (match_operand:V4SF 0 "register_operand" "=v")
2a466fea 5350 (vec_concat:V4SF
5351 (float_truncate:V2SF
0dc2f097 5352 (match_operand:V2DF 1 "nonimmediate_operand" "vm"))
abd4f58b 5353 (match_operand:V2SF 2 "const0_operand")))]
0dc2f097 5354 "TARGET_SSE2 && <mask_avx512vl_condition>"
887423c0 5355{
5356 if (TARGET_AVX)
0dc2f097 5357 return "vcvtpd2ps{x}\t{%1, %0<mask_operand3>|%0<mask_operand3>, %1}";
887423c0 5358 else
5359 return "cvtpd2ps\t{%1, %0|%0, %1}";
5360}
2a466fea 5361 [(set_attr "type" "ssecvt")
97436e92 5362 (set_attr "amdfam10_decode" "double")
68ff067d 5363 (set_attr "athlon_decode" "vector")
887423c0 5364 (set_attr "bdver1_decode" "double")
5365 (set_attr "prefix_data16" "1")
5366 (set_attr "prefix" "maybe_vex")
5367 (set_attr "mode" "V4SF")])
79eddd43 5368
6615b722 5369;; For <sse2_avx_avx512f>_cvtps2pd<avxsizesuffix> insn pattern
5370(define_mode_attr sf2dfmode
5371 [(V8DF "V8SF") (V4DF "V4SF")])
5372
dbfe84d5 5373(define_insn "<sse2_avx_avx512f>_cvtps2pd<avxsizesuffix><mask_name><round_saeonly_name>"
6615b722 5374 [(set (match_operand:VF2_512_256 0 "register_operand" "=v")
5375 (float_extend:VF2_512_256
dbfe84d5 5376 (match_operand:<sf2dfmode> 1 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")))]
5377 "TARGET_AVX && <mask_mode512bit_condition> && <round_saeonly_mode512bit_condition>"
5378 "vcvtps2pd\t{<round_saeonly_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_saeonly_mask_op2>}"
ed30e0a6 5379 [(set_attr "type" "ssecvt")
6615b722 5380 (set_attr "prefix" "maybe_vex")
5381 (set_attr "mode" "<MODE>")])
ed30e0a6 5382
8cedf886 5383(define_insn "*avx_cvtps2pd256_2"
5384 [(set (match_operand:V4DF 0 "register_operand" "=x")
5385 (float_extend:V4DF
5386 (vec_select:V4SF
5387 (match_operand:V8SF 1 "nonimmediate_operand" "xm")
887423c0 5388 (parallel [(const_int 0) (const_int 1)
5389 (const_int 2) (const_int 3)]))))]
8cedf886 5390 "TARGET_AVX"
5391 "vcvtps2pd\t{%x1, %0|%0, %x1}"
5392 [(set_attr "type" "ssecvt")
5393 (set_attr "prefix" "vex")
5394 (set_attr "mode" "V4DF")])
5395
697a43f8 5396(define_insn "vec_unpacks_lo_v16sf"
5397 [(set (match_operand:V8DF 0 "register_operand" "=v")
5398 (float_extend:V8DF
5399 (vec_select:V8SF
5400 (match_operand:V16SF 1 "nonimmediate_operand" "vm")
5401 (parallel [(const_int 0) (const_int 1)
5402 (const_int 2) (const_int 3)
5403 (const_int 4) (const_int 5)
5404 (const_int 6) (const_int 7)]))))]
5405 "TARGET_AVX512F"
5406 "vcvtps2pd\t{%t1, %0|%0, %t1}"
5407 [(set_attr "type" "ssecvt")
5408 (set_attr "prefix" "evex")
5409 (set_attr "mode" "V8DF")])
5410
54f53cd0 5411(define_insn "<avx512>_cvt<ssemodesuffix>2mask<mode>"
5412 [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=Yk")
5413 (unspec:<avx512fmaskmode>
5414 [(match_operand:VI12_AVX512VL 1 "register_operand" "v")]
5415 UNSPEC_CVTINT2MASK))]
5416 "TARGET_AVX512BW"
5417 "vpmov<ssemodesuffix>2m\t{%1, %0|%0, %1}"
5418 [(set_attr "prefix" "evex")
5419 (set_attr "mode" "<sseinsnmode>")])
5420
5421(define_insn "<avx512>_cvt<ssemodesuffix>2mask<mode>"
5422 [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=Yk")
5423 (unspec:<avx512fmaskmode>
5424 [(match_operand:VI48_AVX512VL 1 "register_operand" "v")]
5425 UNSPEC_CVTINT2MASK))]
5426 "TARGET_AVX512DQ"
5427 "vpmov<ssemodesuffix>2m\t{%1, %0|%0, %1}"
5428 [(set_attr "prefix" "evex")
5429 (set_attr "mode" "<sseinsnmode>")])
5430
5431(define_expand "<avx512>_cvtmask2<ssemodesuffix><mode>"
5432 [(set (match_operand:VI12_AVX512VL 0 "register_operand")
5433 (vec_merge:VI12_AVX512VL
5434 (match_dup 2)
5435 (match_dup 3)
5436 (match_operand:<avx512fmaskmode> 1 "register_operand")))]
5437 "TARGET_AVX512BW"
5438 {
5439 operands[2] = CONSTM1_RTX (<MODE>mode);
5440 operands[3] = CONST0_RTX (<MODE>mode);
5441 })
5442
5443(define_insn "*<avx512>_cvtmask2<ssemodesuffix><mode>"
5444 [(set (match_operand:VI12_AVX512VL 0 "register_operand" "=v")
5445 (vec_merge:VI12_AVX512VL
5446 (match_operand:VI12_AVX512VL 2 "constm1_operand")
5447 (match_operand:VI12_AVX512VL 3 "const0_operand")
5448 (match_operand:<avx512fmaskmode> 1 "register_operand" "Yk")))]
5449 "TARGET_AVX512BW"
5450 "vpmovm2<ssemodesuffix>\t{%1, %0|%0, %1}"
5451 [(set_attr "prefix" "evex")
5452 (set_attr "mode" "<sseinsnmode>")])
5453
5454(define_expand "<avx512>_cvtmask2<ssemodesuffix><mode>"
5455 [(set (match_operand:VI48_AVX512VL 0 "register_operand")
5456 (vec_merge:VI48_AVX512VL
5457 (match_dup 2)
5458 (match_dup 3)
5459 (match_operand:<avx512fmaskmode> 1 "register_operand")))]
5460 "TARGET_AVX512DQ"
5461 "{
5462 operands[2] = CONSTM1_RTX (<MODE>mode);
5463 operands[3] = CONST0_RTX (<MODE>mode);
5464 }")
5465
5466(define_insn "*<avx512>_cvtmask2<ssemodesuffix><mode>"
5467 [(set (match_operand:VI48_AVX512VL 0 "register_operand" "=v")
5468 (vec_merge:VI48_AVX512VL
5469 (match_operand:VI48_AVX512VL 2 "constm1_operand")
5470 (match_operand:VI48_AVX512VL 3 "const0_operand")
5471 (match_operand:<avx512fmaskmode> 1 "register_operand" "Yk")))]
5472 "TARGET_AVX512DQ"
5473 "vpmovm2<ssemodesuffix>\t{%1, %0|%0, %1}"
5474 [(set_attr "prefix" "evex")
5475 (set_attr "mode" "<sseinsnmode>")])
5476
0dc2f097 5477(define_insn "sse2_cvtps2pd<mask_name>"
5478 [(set (match_operand:V2DF 0 "register_operand" "=v")
2a466fea 5479 (float_extend:V2DF
5480 (vec_select:V2SF
0dc2f097 5481 (match_operand:V4SF 1 "nonimmediate_operand" "vm")
2a466fea 5482 (parallel [(const_int 0) (const_int 1)]))))]
0dc2f097 5483 "TARGET_SSE2 && <mask_avx512vl_condition>"
5484 "%vcvtps2pd\t{%1, %0<mask_operand2>|%0<mask_operand2>, %q1}"
2a466fea 5485 [(set_attr "type" "ssecvt")
97436e92 5486 (set_attr "amdfam10_decode" "direct")
68ff067d 5487 (set_attr "athlon_decode" "double")
887423c0 5488 (set_attr "bdver1_decode" "double")
5489 (set_attr "prefix_data16" "0")
5490 (set_attr "prefix" "maybe_vex")
5491 (set_attr "mode" "V2DF")])
5802c0cb 5492
2a466fea 5493(define_expand "vec_unpacks_hi_v4sf"
5494 [(set (match_dup 2)
5495 (vec_select:V4SF
5496 (vec_concat:V8SF
5497 (match_dup 2)
abd4f58b 5498 (match_operand:V4SF 1 "nonimmediate_operand"))
887423c0 5499 (parallel [(const_int 6) (const_int 7)
5500 (const_int 2) (const_int 3)])))
abd4f58b 5501 (set (match_operand:V2DF 0 "register_operand")
2a466fea 5502 (float_extend:V2DF
5503 (vec_select:V2SF
5504 (match_dup 2)
5505 (parallel [(const_int 0) (const_int 1)]))))]
5bd1ff1d 5506 "TARGET_SSE2"
5507 "operands[2] = gen_reg_rtx (V4SFmode);")
d5d869ee 5508
8cedf886 5509(define_expand "vec_unpacks_hi_v8sf"
5510 [(set (match_dup 2)
5511 (vec_select:V4SF
9f8bc6e8 5512 (match_operand:V8SF 1 "register_operand")
887423c0 5513 (parallel [(const_int 4) (const_int 5)
5514 (const_int 6) (const_int 7)])))
abd4f58b 5515 (set (match_operand:V4DF 0 "register_operand")
8cedf886 5516 (float_extend:V4DF
5517 (match_dup 2)))]
5518 "TARGET_AVX"
887423c0 5519 "operands[2] = gen_reg_rtx (V4SFmode);")
8cedf886 5520
697a43f8 5521(define_expand "vec_unpacks_hi_v16sf"
5522 [(set (match_dup 2)
5523 (vec_select:V8SF
9f8bc6e8 5524 (match_operand:V16SF 1 "register_operand")
697a43f8 5525 (parallel [(const_int 8) (const_int 9)
5526 (const_int 10) (const_int 11)
5527 (const_int 12) (const_int 13)
5528 (const_int 14) (const_int 15)])))
5529 (set (match_operand:V8DF 0 "register_operand")
5530 (float_extend:V8DF
5531 (match_dup 2)))]
5532"TARGET_AVX512F"
5533"operands[2] = gen_reg_rtx (V8SFmode);")
5534
2a466fea 5535(define_expand "vec_unpacks_lo_v4sf"
abd4f58b 5536 [(set (match_operand:V2DF 0 "register_operand")
2a466fea 5537 (float_extend:V2DF
5538 (vec_select:V2SF
abd4f58b 5539 (match_operand:V4SF 1 "nonimmediate_operand")
2a466fea 5540 (parallel [(const_int 0) (const_int 1)]))))]
5541 "TARGET_SSE2")
5802c0cb 5542
8cedf886 5543(define_expand "vec_unpacks_lo_v8sf"
abd4f58b 5544 [(set (match_operand:V4DF 0 "register_operand")
8cedf886 5545 (float_extend:V4DF
5546 (vec_select:V4SF
abd4f58b 5547 (match_operand:V8SF 1 "nonimmediate_operand")
887423c0 5548 (parallel [(const_int 0) (const_int 1)
5549 (const_int 2) (const_int 3)]))))]
8cedf886 5550 "TARGET_AVX")
5551
ded0808e 5552(define_mode_attr sseunpackfltmode
9abbf9e6 5553 [(V8HI "V4SF") (V4SI "V2DF") (V16HI "V8SF")
5554 (V8SI "V4DF") (V32HI "V16SF") (V16SI "V8DF")])
ded0808e 5555
5556(define_expand "vec_unpacks_float_hi_<mode>"
abd4f58b 5557 [(match_operand:<sseunpackfltmode> 0 "register_operand")
9abbf9e6 5558 (match_operand:VI2_AVX512F 1 "register_operand")]
5802c0cb 5559 "TARGET_SSE2"
2a466fea 5560{
ded0808e 5561 rtx tmp = gen_reg_rtx (<sseunpackmode>mode);
5802c0cb 5562
ded0808e 5563 emit_insn (gen_vec_unpacks_hi_<mode> (tmp, operands[1]));
d1f9b275 5564 emit_insn (gen_rtx_SET (operands[0],
ded0808e 5565 gen_rtx_FLOAT (<sseunpackfltmode>mode, tmp)));
2a466fea 5566 DONE;
5567})
5802c0cb 5568
ded0808e 5569(define_expand "vec_unpacks_float_lo_<mode>"
abd4f58b 5570 [(match_operand:<sseunpackfltmode> 0 "register_operand")
9abbf9e6 5571 (match_operand:VI2_AVX512F 1 "register_operand")]
76405cce 5572 "TARGET_SSE2"
5573{
ded0808e 5574 rtx tmp = gen_reg_rtx (<sseunpackmode>mode);
76405cce 5575
ded0808e 5576 emit_insn (gen_vec_unpacks_lo_<mode> (tmp, operands[1]));
d1f9b275 5577 emit_insn (gen_rtx_SET (operands[0],
ded0808e 5578 gen_rtx_FLOAT (<sseunpackfltmode>mode, tmp)));
2a466fea 5579 DONE;
5580})
5802c0cb 5581
ded0808e 5582(define_expand "vec_unpacku_float_hi_<mode>"
abd4f58b 5583 [(match_operand:<sseunpackfltmode> 0 "register_operand")
9abbf9e6 5584 (match_operand:VI2_AVX512F 1 "register_operand")]
5802c0cb 5585 "TARGET_SSE2"
2a466fea 5586{
ded0808e 5587 rtx tmp = gen_reg_rtx (<sseunpackmode>mode);
5802c0cb 5588
ded0808e 5589 emit_insn (gen_vec_unpacku_hi_<mode> (tmp, operands[1]));
d1f9b275 5590 emit_insn (gen_rtx_SET (operands[0],
ded0808e 5591 gen_rtx_FLOAT (<sseunpackfltmode>mode, tmp)));
2a466fea 5592 DONE;
5593})
5802c0cb 5594
ded0808e 5595(define_expand "vec_unpacku_float_lo_<mode>"
abd4f58b 5596 [(match_operand:<sseunpackfltmode> 0 "register_operand")
9abbf9e6 5597 (match_operand:VI2_AVX512F 1 "register_operand")]
5802c0cb 5598 "TARGET_SSE2"
2a466fea 5599{
ded0808e 5600 rtx tmp = gen_reg_rtx (<sseunpackmode>mode);
5802c0cb 5601
ded0808e 5602 emit_insn (gen_vec_unpacku_lo_<mode> (tmp, operands[1]));
d1f9b275 5603 emit_insn (gen_rtx_SET (operands[0],
ded0808e 5604 gen_rtx_FLOAT (<sseunpackfltmode>mode, tmp)));
2a466fea 5605 DONE;
5606})
5802c0cb 5607
2a466fea 5608(define_expand "vec_unpacks_float_hi_v4si"
5609 [(set (match_dup 2)
5610 (vec_select:V4SI
abd4f58b 5611 (match_operand:V4SI 1 "nonimmediate_operand")
887423c0 5612 (parallel [(const_int 2) (const_int 3)
5613 (const_int 2) (const_int 3)])))
abd4f58b 5614 (set (match_operand:V2DF 0 "register_operand")
5deb404d 5615 (float:V2DF
2a466fea 5616 (vec_select:V2SI
5617 (match_dup 2)
5618 (parallel [(const_int 0) (const_int 1)]))))]
5bd1ff1d 5619 "TARGET_SSE2"
5620 "operands[2] = gen_reg_rtx (V4SImode);")
5802c0cb 5621
2a466fea 5622(define_expand "vec_unpacks_float_lo_v4si"
abd4f58b 5623 [(set (match_operand:V2DF 0 "register_operand")
2a466fea 5624 (float:V2DF
5625 (vec_select:V2SI
abd4f58b 5626 (match_operand:V4SI 1 "nonimmediate_operand")
2a466fea 5627 (parallel [(const_int 0) (const_int 1)]))))]
5628 "TARGET_SSE2")
5629
8cedf886 5630(define_expand "vec_unpacks_float_hi_v8si"
5631 [(set (match_dup 2)
5632 (vec_select:V4SI
abd4f58b 5633 (match_operand:V8SI 1 "nonimmediate_operand")
887423c0 5634 (parallel [(const_int 4) (const_int 5)
5635 (const_int 6) (const_int 7)])))
abd4f58b 5636 (set (match_operand:V4DF 0 "register_operand")
5deb404d 5637 (float:V4DF
8cedf886 5638 (match_dup 2)))]
5639 "TARGET_AVX"
5640 "operands[2] = gen_reg_rtx (V4SImode);")
5641
5642(define_expand "vec_unpacks_float_lo_v8si"
abd4f58b 5643 [(set (match_operand:V4DF 0 "register_operand")
8cedf886 5644 (float:V4DF
5645 (vec_select:V4SI
abd4f58b 5646 (match_operand:V8SI 1 "nonimmediate_operand")
887423c0 5647 (parallel [(const_int 0) (const_int 1)
5648 (const_int 2) (const_int 3)]))))]
8cedf886 5649 "TARGET_AVX")
5650
697a43f8 5651(define_expand "vec_unpacks_float_hi_v16si"
5652 [(set (match_dup 2)
5653 (vec_select:V8SI
5654 (match_operand:V16SI 1 "nonimmediate_operand")
5655 (parallel [(const_int 8) (const_int 9)
5656 (const_int 10) (const_int 11)
5657 (const_int 12) (const_int 13)
5658 (const_int 14) (const_int 15)])))
5659 (set (match_operand:V8DF 0 "register_operand")
5660 (float:V8DF
5661 (match_dup 2)))]
5662 "TARGET_AVX512F"
5663 "operands[2] = gen_reg_rtx (V8SImode);")
5664
5665(define_expand "vec_unpacks_float_lo_v16si"
5666 [(set (match_operand:V8DF 0 "register_operand")
5667 (float:V8DF
5668 (vec_select:V8SI
5669 (match_operand:V16SI 1 "nonimmediate_operand")
5670 (parallel [(const_int 0) (const_int 1)
5671 (const_int 2) (const_int 3)
5672 (const_int 4) (const_int 5)
5673 (const_int 6) (const_int 7)]))))]
5674 "TARGET_AVX512F")
5675
ea3bb9f4 5676(define_expand "vec_unpacku_float_hi_v4si"
5677 [(set (match_dup 5)
5678 (vec_select:V4SI
abd4f58b 5679 (match_operand:V4SI 1 "nonimmediate_operand")
887423c0 5680 (parallel [(const_int 2) (const_int 3)
5681 (const_int 2) (const_int 3)])))
ea3bb9f4 5682 (set (match_dup 6)
5deb404d 5683 (float:V2DF
ea3bb9f4 5684 (vec_select:V2SI
5685 (match_dup 5)
5686 (parallel [(const_int 0) (const_int 1)]))))
5687 (set (match_dup 7)
5688 (lt:V2DF (match_dup 6) (match_dup 3)))
5689 (set (match_dup 8)
5690 (and:V2DF (match_dup 7) (match_dup 4)))
abd4f58b 5691 (set (match_operand:V2DF 0 "register_operand")
ea3bb9f4 5692 (plus:V2DF (match_dup 6) (match_dup 8)))]
5bd1ff1d 5693 "TARGET_SSE2"
ea3bb9f4 5694{
5695 REAL_VALUE_TYPE TWO32r;
5696 rtx x;
5697 int i;
5698
5699 real_ldexp (&TWO32r, &dconst1, 32);
5700 x = const_double_from_real_value (TWO32r, DFmode);
5701
5702 operands[3] = force_reg (V2DFmode, CONST0_RTX (V2DFmode));
8cedf886 5703 operands[4] = force_reg (V2DFmode,
5704 ix86_build_const_vector (V2DFmode, 1, x));
ea3bb9f4 5705
5706 operands[5] = gen_reg_rtx (V4SImode);
33541f98 5707
ea3bb9f4 5708 for (i = 6; i < 9; i++)
5709 operands[i] = gen_reg_rtx (V2DFmode);
5710})
5711
5712(define_expand "vec_unpacku_float_lo_v4si"
5713 [(set (match_dup 5)
5714 (float:V2DF
5715 (vec_select:V2SI
abd4f58b 5716 (match_operand:V4SI 1 "nonimmediate_operand")
ea3bb9f4 5717 (parallel [(const_int 0) (const_int 1)]))))
5718 (set (match_dup 6)
5719 (lt:V2DF (match_dup 5) (match_dup 3)))
5720 (set (match_dup 7)
5721 (and:V2DF (match_dup 6) (match_dup 4)))
abd4f58b 5722 (set (match_operand:V2DF 0 "register_operand")
ea3bb9f4 5723 (plus:V2DF (match_dup 5) (match_dup 7)))]
5724 "TARGET_SSE2"
5725{
5726 REAL_VALUE_TYPE TWO32r;
5727 rtx x;
5728 int i;
5729
5730 real_ldexp (&TWO32r, &dconst1, 32);
5731 x = const_double_from_real_value (TWO32r, DFmode);
5732
5733 operands[3] = force_reg (V2DFmode, CONST0_RTX (V2DFmode));
8cedf886 5734 operands[4] = force_reg (V2DFmode,
5735 ix86_build_const_vector (V2DFmode, 1, x));
ea3bb9f4 5736
5737 for (i = 5; i < 8; i++)
5738 operands[i] = gen_reg_rtx (V2DFmode);
5739})
5740
ded0808e 5741(define_expand "vec_unpacku_float_hi_v8si"
abd4f58b 5742 [(match_operand:V4DF 0 "register_operand")
5743 (match_operand:V8SI 1 "register_operand")]
ded0808e 5744 "TARGET_AVX"
5745{
5746 REAL_VALUE_TYPE TWO32r;
5747 rtx x, tmp[6];
5748 int i;
5749
5750 real_ldexp (&TWO32r, &dconst1, 32);
5751 x = const_double_from_real_value (TWO32r, DFmode);
5752
5753 tmp[0] = force_reg (V4DFmode, CONST0_RTX (V4DFmode));
5754 tmp[1] = force_reg (V4DFmode, ix86_build_const_vector (V4DFmode, 1, x));
5755 tmp[5] = gen_reg_rtx (V4SImode);
5756
5757 for (i = 2; i < 5; i++)
5758 tmp[i] = gen_reg_rtx (V4DFmode);
5759 emit_insn (gen_vec_extract_hi_v8si (tmp[5], operands[1]));
5f813a0a 5760 emit_insn (gen_floatv4siv4df2 (tmp[2], tmp[5]));
d1f9b275 5761 emit_insn (gen_rtx_SET (tmp[3], gen_rtx_LT (V4DFmode, tmp[2], tmp[0])));
ded0808e 5762 emit_insn (gen_andv4df3 (tmp[4], tmp[3], tmp[1]));
5763 emit_insn (gen_addv4df3 (operands[0], tmp[2], tmp[4]));
5764 DONE;
5765})
5766
1706116d 5767(define_expand "vec_unpacku_float_hi_v16si"
5768 [(match_operand:V8DF 0 "register_operand")
5769 (match_operand:V16SI 1 "register_operand")]
5770 "TARGET_AVX512F"
5771{
5772 REAL_VALUE_TYPE TWO32r;
5773 rtx k, x, tmp[4];
5774
5775 real_ldexp (&TWO32r, &dconst1, 32);
5776 x = const_double_from_real_value (TWO32r, DFmode);
5777
5778 tmp[0] = force_reg (V8DFmode, CONST0_RTX (V8DFmode));
5779 tmp[1] = force_reg (V8DFmode, ix86_build_const_vector (V8DFmode, 1, x));
5780 tmp[2] = gen_reg_rtx (V8DFmode);
5781 tmp[3] = gen_reg_rtx (V8SImode);
5782 k = gen_reg_rtx (QImode);
5783
5784 emit_insn (gen_vec_extract_hi_v16si (tmp[3], operands[1]));
5785 emit_insn (gen_floatv8siv8df2 (tmp[2], tmp[3]));
d1f9b275 5786 emit_insn (gen_rtx_SET (k, gen_rtx_LT (QImode, tmp[2], tmp[0])));
1706116d 5787 emit_insn (gen_addv8df3_mask (tmp[2], tmp[2], tmp[1], tmp[2], k));
5788 emit_move_insn (operands[0], tmp[2]);
5789 DONE;
5790})
5791
ded0808e 5792(define_expand "vec_unpacku_float_lo_v8si"
abd4f58b 5793 [(match_operand:V4DF 0 "register_operand")
5794 (match_operand:V8SI 1 "nonimmediate_operand")]
ded0808e 5795 "TARGET_AVX"
5796{
5797 REAL_VALUE_TYPE TWO32r;
5798 rtx x, tmp[5];
5799 int i;
5800
5801 real_ldexp (&TWO32r, &dconst1, 32);
5802 x = const_double_from_real_value (TWO32r, DFmode);
5803
5804 tmp[0] = force_reg (V4DFmode, CONST0_RTX (V4DFmode));
5805 tmp[1] = force_reg (V4DFmode, ix86_build_const_vector (V4DFmode, 1, x));
5806
5807 for (i = 2; i < 5; i++)
5808 tmp[i] = gen_reg_rtx (V4DFmode);
5809 emit_insn (gen_avx_cvtdq2pd256_2 (tmp[2], operands[1]));
d1f9b275 5810 emit_insn (gen_rtx_SET (tmp[3], gen_rtx_LT (V4DFmode, tmp[2], tmp[0])));
ded0808e 5811 emit_insn (gen_andv4df3 (tmp[4], tmp[3], tmp[1]));
5812 emit_insn (gen_addv4df3 (operands[0], tmp[2], tmp[4]));
5813 DONE;
5814})
5815
5220cab6 5816(define_expand "vec_unpacku_float_lo_v16si"
5817 [(match_operand:V8DF 0 "register_operand")
5818 (match_operand:V16SI 1 "nonimmediate_operand")]
5819 "TARGET_AVX512F"
5820{
5821 REAL_VALUE_TYPE TWO32r;
5822 rtx k, x, tmp[3];
5823
5824 real_ldexp (&TWO32r, &dconst1, 32);
5825 x = const_double_from_real_value (TWO32r, DFmode);
5826
5827 tmp[0] = force_reg (V8DFmode, CONST0_RTX (V8DFmode));
5828 tmp[1] = force_reg (V8DFmode, ix86_build_const_vector (V8DFmode, 1, x));
5829 tmp[2] = gen_reg_rtx (V8DFmode);
5830 k = gen_reg_rtx (QImode);
5831
5832 emit_insn (gen_avx512f_cvtdq2pd512_2 (tmp[2], operands[1]));
d1f9b275 5833 emit_insn (gen_rtx_SET (k, gen_rtx_LT (QImode, tmp[2], tmp[0])));
5220cab6 5834 emit_insn (gen_addv8df3_mask (tmp[2], tmp[2], tmp[1], tmp[2], k));
5835 emit_move_insn (operands[0], tmp[2]);
5836 DONE;
5837})
5838
6615b722 5839(define_expand "vec_pack_trunc_<mode>"
8cedf886 5840 [(set (match_dup 3)
6615b722 5841 (float_truncate:<sf2dfmode>
5842 (match_operand:VF2_512_256 1 "nonimmediate_operand")))
8cedf886 5843 (set (match_dup 4)
6615b722 5844 (float_truncate:<sf2dfmode>
5845 (match_operand:VF2_512_256 2 "nonimmediate_operand")))
5846 (set (match_operand:<ssePSmode> 0 "register_operand")
5847 (vec_concat:<ssePSmode>
8cedf886 5848 (match_dup 3)
5849 (match_dup 4)))]
5850 "TARGET_AVX"
5851{
6615b722 5852 operands[3] = gen_reg_rtx (<sf2dfmode>mode);
5853 operands[4] = gen_reg_rtx (<sf2dfmode>mode);
8cedf886 5854})
5855
2a466fea 5856(define_expand "vec_pack_trunc_v2df"
abd4f58b 5857 [(match_operand:V4SF 0 "register_operand")
5858 (match_operand:V2DF 1 "nonimmediate_operand")
5859 (match_operand:V2DF 2 "nonimmediate_operand")]
5802c0cb 5860 "TARGET_SSE2"
2a466fea 5861{
71eeaf66 5862 rtx tmp0, tmp1;
5802c0cb 5863
f00377d6 5864 if (TARGET_AVX && !TARGET_PREFER_AVX128 && optimize_insn_for_speed_p ())
71eeaf66 5865 {
5866 tmp0 = gen_reg_rtx (V4DFmode);
5867 tmp1 = force_reg (V2DFmode, operands[1]);
5802c0cb 5868
71eeaf66 5869 emit_insn (gen_avx_vec_concatv4df (tmp0, tmp1, operands[2]));
5870 emit_insn (gen_avx_cvtpd2ps256 (operands[0], tmp0));
5871 }
5872 else
5873 {
5874 tmp0 = gen_reg_rtx (V4SFmode);
5875 tmp1 = gen_reg_rtx (V4SFmode);
5876
5877 emit_insn (gen_sse2_cvtpd2ps (tmp0, operands[1]));
5878 emit_insn (gen_sse2_cvtpd2ps (tmp1, operands[2]));
5879 emit_insn (gen_sse_movlhps (operands[0], tmp0, tmp1));
5880 }
2a466fea 5881 DONE;
5882})
8d1e0693 5883
6615b722 5884(define_expand "vec_pack_sfix_trunc_v8df"
5885 [(match_operand:V16SI 0 "register_operand")
5886 (match_operand:V8DF 1 "nonimmediate_operand")
5887 (match_operand:V8DF 2 "nonimmediate_operand")]
5888 "TARGET_AVX512F"
5889{
5890 rtx r1, r2;
5891
5892 r1 = gen_reg_rtx (V8SImode);
5893 r2 = gen_reg_rtx (V8SImode);
5894
5895 emit_insn (gen_fix_truncv8dfv8si2 (r1, operands[1]));
5896 emit_insn (gen_fix_truncv8dfv8si2 (r2, operands[2]));
5897 emit_insn (gen_avx_vec_concatv16si (operands[0], r1, r2));
5898 DONE;
5899})
5900
83c4576f 5901(define_expand "vec_pack_sfix_trunc_v4df"
abd4f58b 5902 [(match_operand:V8SI 0 "register_operand")
5903 (match_operand:V4DF 1 "nonimmediate_operand")
5904 (match_operand:V4DF 2 "nonimmediate_operand")]
83c4576f 5905 "TARGET_AVX"
5906{
5907 rtx r1, r2;
5908
71eeaf66 5909 r1 = gen_reg_rtx (V4SImode);
5910 r2 = gen_reg_rtx (V4SImode);
83c4576f 5911
71eeaf66 5912 emit_insn (gen_fix_truncv4dfv4si2 (r1, operands[1]));
5913 emit_insn (gen_fix_truncv4dfv4si2 (r2, operands[2]));
5914 emit_insn (gen_avx_vec_concatv8si (operands[0], r1, r2));
83c4576f 5915 DONE;
5916})
5917
2a466fea 5918(define_expand "vec_pack_sfix_trunc_v2df"
abd4f58b 5919 [(match_operand:V4SI 0 "register_operand")
5920 (match_operand:V2DF 1 "nonimmediate_operand")
5921 (match_operand:V2DF 2 "nonimmediate_operand")]
8d1e0693 5922 "TARGET_SSE2"
2a466fea 5923{
09e640e6 5924 rtx tmp0, tmp1, tmp2;
8d1e0693 5925
f00377d6 5926 if (TARGET_AVX && !TARGET_PREFER_AVX128 && optimize_insn_for_speed_p ())
71eeaf66 5927 {
5928 tmp0 = gen_reg_rtx (V4DFmode);
5929 tmp1 = force_reg (V2DFmode, operands[1]);
8d1e0693 5930
71eeaf66 5931 emit_insn (gen_avx_vec_concatv4df (tmp0, tmp1, operands[2]));
5932 emit_insn (gen_fix_truncv4dfv4si2 (operands[0], tmp0));
5933 }
5934 else
5935 {
5936 tmp0 = gen_reg_rtx (V4SImode);
5937 tmp1 = gen_reg_rtx (V4SImode);
09e640e6 5938 tmp2 = gen_reg_rtx (V2DImode);
71eeaf66 5939
5940 emit_insn (gen_sse2_cvttpd2dq (tmp0, operands[1]));
5941 emit_insn (gen_sse2_cvttpd2dq (tmp1, operands[2]));
09e640e6 5942 emit_insn (gen_vec_interleave_lowv2di (tmp2,
5943 gen_lowpart (V2DImode, tmp0),
5944 gen_lowpart (V2DImode, tmp1)));
5945 emit_move_insn (operands[0], gen_lowpart (V4SImode, tmp2));
71eeaf66 5946 }
2a466fea 5947 DONE;
5948})
8d1e0693 5949
c152f9e5 5950(define_mode_attr ssepackfltmode
6615b722 5951 [(V8DF "V16SI") (V4DF "V8SI") (V2DF "V4SI")])
c152f9e5 5952
5953(define_expand "vec_pack_ufix_trunc_<mode>"
abd4f58b 5954 [(match_operand:<ssepackfltmode> 0 "register_operand")
1706116d 5955 (match_operand:VF2 1 "register_operand")
5956 (match_operand:VF2 2 "register_operand")]
da38df18 5957 "TARGET_SSE2"
c152f9e5 5958{
1706116d 5959 if (<MODE>mode == V8DFmode)
da38df18 5960 {
1706116d 5961 rtx r1, r2;
5962
5963 r1 = gen_reg_rtx (V8SImode);
5964 r2 = gen_reg_rtx (V8SImode);
5965
5966 emit_insn (gen_ufix_truncv8dfv8si2 (r1, operands[1]));
5967 emit_insn (gen_ufix_truncv8dfv8si2 (r2, operands[2]));
5968 emit_insn (gen_avx_vec_concatv16si (operands[0], r1, r2));
da38df18 5969 }
5970 else
5971 {
1706116d 5972 rtx tmp[7];
5973 tmp[0] = ix86_expand_adjust_ufix_to_sfix_si (operands[1], &tmp[2]);
5974 tmp[1] = ix86_expand_adjust_ufix_to_sfix_si (operands[2], &tmp[3]);
5975 tmp[4] = gen_reg_rtx (<ssepackfltmode>mode);
5976 emit_insn (gen_vec_pack_sfix_trunc_<mode> (tmp[4], tmp[0], tmp[1]));
5977 if (<ssepackfltmode>mode == V4SImode || TARGET_AVX2)
5978 {
5979 tmp[5] = gen_reg_rtx (<ssepackfltmode>mode);
5980 ix86_expand_vec_extract_even_odd (tmp[5], tmp[2], tmp[3], 0);
5981 }
5982 else
5983 {
5984 tmp[5] = gen_reg_rtx (V8SFmode);
5985 ix86_expand_vec_extract_even_odd (tmp[5], gen_lowpart (V8SFmode, tmp[2]),
5986 gen_lowpart (V8SFmode, tmp[3]), 0);
5987 tmp[5] = gen_lowpart (V8SImode, tmp[5]);
5988 }
5989 tmp[6] = expand_simple_binop (<ssepackfltmode>mode, XOR, tmp[4], tmp[5],
5990 operands[0], 0, OPTAB_DIRECT);
5991 if (tmp[6] != operands[0])
5992 emit_move_insn (operands[0], tmp[6]);
da38df18 5993 }
1706116d 5994
c152f9e5 5995 DONE;
5996})
5997
83c4576f 5998(define_expand "vec_pack_sfix_v4df"
abd4f58b 5999 [(match_operand:V8SI 0 "register_operand")
6000 (match_operand:V4DF 1 "nonimmediate_operand")
6001 (match_operand:V4DF 2 "nonimmediate_operand")]
83c4576f 6002 "TARGET_AVX"
6003{
6004 rtx r1, r2;
6005
71eeaf66 6006 r1 = gen_reg_rtx (V4SImode);
6007 r2 = gen_reg_rtx (V4SImode);
83c4576f 6008
71eeaf66 6009 emit_insn (gen_avx_cvtpd2dq256 (r1, operands[1]));
6010 emit_insn (gen_avx_cvtpd2dq256 (r2, operands[2]));
6011 emit_insn (gen_avx_vec_concatv8si (operands[0], r1, r2));
83c4576f 6012 DONE;
6013})
6014
2a466fea 6015(define_expand "vec_pack_sfix_v2df"
abd4f58b 6016 [(match_operand:V4SI 0 "register_operand")
6017 (match_operand:V2DF 1 "nonimmediate_operand")
6018 (match_operand:V2DF 2 "nonimmediate_operand")]
8d1e0693 6019 "TARGET_SSE2"
2a466fea 6020{
09e640e6 6021 rtx tmp0, tmp1, tmp2;
2a466fea 6022
f00377d6 6023 if (TARGET_AVX && !TARGET_PREFER_AVX128 && optimize_insn_for_speed_p ())
71eeaf66 6024 {
6025 tmp0 = gen_reg_rtx (V4DFmode);
6026 tmp1 = force_reg (V2DFmode, operands[1]);
2a466fea 6027
71eeaf66 6028 emit_insn (gen_avx_vec_concatv4df (tmp0, tmp1, operands[2]));
6029 emit_insn (gen_avx_cvtpd2dq256 (operands[0], tmp0));
6030 }
6031 else
6032 {
6033 tmp0 = gen_reg_rtx (V4SImode);
6034 tmp1 = gen_reg_rtx (V4SImode);
09e640e6 6035 tmp2 = gen_reg_rtx (V2DImode);
71eeaf66 6036
6037 emit_insn (gen_sse2_cvtpd2dq (tmp0, operands[1]));
6038 emit_insn (gen_sse2_cvtpd2dq (tmp1, operands[2]));
09e640e6 6039 emit_insn (gen_vec_interleave_lowv2di (tmp2,
6040 gen_lowpart (V2DImode, tmp0),
6041 gen_lowpart (V2DImode, tmp1)));
6042 emit_move_insn (operands[0], gen_lowpart (V4SImode, tmp2));
71eeaf66 6043 }
2a466fea 6044 DONE;
6045})
8d1e0693 6046
5802c0cb 6047;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
6048;;
2a466fea 6049;; Parallel single-precision floating point element swizzling
5802c0cb 6050;;
6051;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
6052
2485795e 6053(define_expand "sse_movhlps_exp"
abd4f58b 6054 [(set (match_operand:V4SF 0 "nonimmediate_operand")
7c839b3f 6055 (vec_select:V4SF
6056 (vec_concat:V8SF
abd4f58b 6057 (match_operand:V4SF 1 "nonimmediate_operand")
6058 (match_operand:V4SF 2 "nonimmediate_operand"))
7c839b3f 6059 (parallel [(const_int 6)
6060 (const_int 7)
6061 (const_int 2)
6062 (const_int 3)])))]
6063 "TARGET_SSE"
cc05a422 6064{
6065 rtx dst = ix86_fixup_binary_operands (UNKNOWN, V4SFmode, operands);
33541f98 6066
cc05a422 6067 emit_insn (gen_sse_movhlps (dst, operands[1], operands[2]));
6068
6069 /* Fix up the destination if needed. */
6070 if (dst != operands[0])
6071 emit_move_insn (operands[0], dst);
6072
6073 DONE;
6074})
7c839b3f 6075
2485795e 6076(define_insn "sse_movhlps"
45c0368c 6077 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,x,x,m")
2a466fea 6078 (vec_select:V4SF
6079 (vec_concat:V8SF
45c0368c 6080 (match_operand:V4SF 1 "nonimmediate_operand" " 0,x,0,x,0")
6081 (match_operand:V4SF 2 "nonimmediate_operand" " x,x,o,o,x"))
2a466fea 6082 (parallel [(const_int 6)
6083 (const_int 7)
6084 (const_int 2)
6085 (const_int 3)])))]
5c752e47 6086 "TARGET_SSE && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
2a466fea 6087 "@
6088 movhlps\t{%2, %0|%0, %2}
45c0368c 6089 vmovhlps\t{%2, %1, %0|%0, %1, %2}
2a466fea 6090 movlps\t{%H2, %0|%0, %H2}
45c0368c 6091 vmovlps\t{%H2, %1, %0|%0, %1, %H2}
c358a059 6092 %vmovhps\t{%2, %0|%q0, %2}"
d1c8b778 6093 [(set_attr "isa" "noavx,avx,noavx,avx,*")
45c0368c 6094 (set_attr "type" "ssemov")
8c1dfa94 6095 (set_attr "ssememalign" "64")
45c0368c 6096 (set_attr "prefix" "orig,vex,orig,vex,maybe_vex")
6097 (set_attr "mode" "V4SF,V4SF,V2SF,V2SF,V2SF")])
5802c0cb 6098
2485795e 6099(define_expand "sse_movlhps_exp"
abd4f58b 6100 [(set (match_operand:V4SF 0 "nonimmediate_operand")
7c839b3f 6101 (vec_select:V4SF
6102 (vec_concat:V8SF
abd4f58b 6103 (match_operand:V4SF 1 "nonimmediate_operand")
6104 (match_operand:V4SF 2 "nonimmediate_operand"))
7c839b3f 6105 (parallel [(const_int 0)
6106 (const_int 1)
6107 (const_int 4)
6108 (const_int 5)])))]
6109 "TARGET_SSE"
cc05a422 6110{
6111 rtx dst = ix86_fixup_binary_operands (UNKNOWN, V4SFmode, operands);
33541f98 6112
cc05a422 6113 emit_insn (gen_sse_movlhps (dst, operands[1], operands[2]));
6114
6115 /* Fix up the destination if needed. */
6116 if (dst != operands[0])
6117 emit_move_insn (operands[0], dst);
6118
6119 DONE;
6120})
7c839b3f 6121
2485795e 6122(define_insn "sse_movlhps"
45c0368c 6123 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,x,x,o")
2a466fea 6124 (vec_select:V4SF
6125 (vec_concat:V8SF
45c0368c 6126 (match_operand:V4SF 1 "nonimmediate_operand" " 0,x,0,x,0")
c358a059 6127 (match_operand:V4SF 2 "nonimmediate_operand" " x,x,m,m,x"))
2a466fea 6128 (parallel [(const_int 0)
6129 (const_int 1)
6130 (const_int 4)
6131 (const_int 5)])))]
6132 "TARGET_SSE && ix86_binary_operator_ok (UNKNOWN, V4SFmode, operands)"
6133 "@
6134 movlhps\t{%2, %0|%0, %2}
45c0368c 6135 vmovlhps\t{%2, %1, %0|%0, %1, %2}
c358a059 6136 movhps\t{%2, %0|%0, %q2}
6137 vmovhps\t{%2, %1, %0|%0, %1, %q2}
45c0368c 6138 %vmovlps\t{%2, %H0|%H0, %2}"
d1c8b778 6139 [(set_attr "isa" "noavx,avx,noavx,avx,*")
45c0368c 6140 (set_attr "type" "ssemov")
8c1dfa94 6141 (set_attr "ssememalign" "64")
45c0368c 6142 (set_attr "prefix" "orig,vex,orig,vex,maybe_vex")
6143 (set_attr "mode" "V4SF,V4SF,V2SF,V2SF,V2SF")])
5802c0cb 6144
5220cab6 6145(define_insn "<mask_codefor>avx512f_unpckhps512<mask_name>"
697a43f8 6146 [(set (match_operand:V16SF 0 "register_operand" "=v")
6147 (vec_select:V16SF
6148 (vec_concat:V32SF
6149 (match_operand:V16SF 1 "register_operand" "v")
6150 (match_operand:V16SF 2 "nonimmediate_operand" "vm"))
6151 (parallel [(const_int 2) (const_int 18)
6152 (const_int 3) (const_int 19)
6153 (const_int 6) (const_int 22)
6154 (const_int 7) (const_int 23)
6155 (const_int 10) (const_int 26)
6156 (const_int 11) (const_int 27)
6157 (const_int 14) (const_int 30)
6158 (const_int 15) (const_int 31)])))]
6159 "TARGET_AVX512F"
5220cab6 6160 "vunpckhps\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
697a43f8 6161 [(set_attr "type" "sselog")
6162 (set_attr "prefix" "evex")
6163 (set_attr "mode" "V16SF")])
6164
d6e05290 6165;; Recall that the 256-bit unpck insns only shuffle within their lanes.
ee780bf5 6166(define_insn "avx_unpckhps256<mask_name>"
6167 [(set (match_operand:V8SF 0 "register_operand" "=v")
ed30e0a6 6168 (vec_select:V8SF
6169 (vec_concat:V16SF
ee780bf5 6170 (match_operand:V8SF 1 "register_operand" "v")
6171 (match_operand:V8SF 2 "nonimmediate_operand" "vm"))
5e56456b 6172 (parallel [(const_int 2) (const_int 10)
6173 (const_int 3) (const_int 11)
6174 (const_int 6) (const_int 14)
6175 (const_int 7) (const_int 15)])))]
ee780bf5 6176 "TARGET_AVX && <mask_avx512vl_condition>"
6177 "vunpckhps\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
ed30e0a6 6178 [(set_attr "type" "sselog")
6179 (set_attr "prefix" "vex")
6180 (set_attr "mode" "V8SF")])
6181
8cedf886 6182(define_expand "vec_interleave_highv8sf"
6183 [(set (match_dup 3)
6184 (vec_select:V8SF
6185 (vec_concat:V16SF
6186 (match_operand:V8SF 1 "register_operand" "x")
6187 (match_operand:V8SF 2 "nonimmediate_operand" "xm"))
6188 (parallel [(const_int 0) (const_int 8)
6189 (const_int 1) (const_int 9)
6190 (const_int 4) (const_int 12)
6191 (const_int 5) (const_int 13)])))
6192 (set (match_dup 4)
6193 (vec_select:V8SF
6194 (vec_concat:V16SF
6195 (match_dup 1)
6196 (match_dup 2))
6197 (parallel [(const_int 2) (const_int 10)
6198 (const_int 3) (const_int 11)
6199 (const_int 6) (const_int 14)
6200 (const_int 7) (const_int 15)])))
abd4f58b 6201 (set (match_operand:V8SF 0 "register_operand")
33d0986a 6202 (vec_select:V8SF
6203 (vec_concat:V16SF
8cedf886 6204 (match_dup 3)
33d0986a 6205 (match_dup 4))
6206 (parallel [(const_int 4) (const_int 5)
6207 (const_int 6) (const_int 7)
6208 (const_int 12) (const_int 13)
6209 (const_int 14) (const_int 15)])))]
8cedf886 6210 "TARGET_AVX"
6211{
6212 operands[3] = gen_reg_rtx (V8SFmode);
6213 operands[4] = gen_reg_rtx (V8SFmode);
6214})
6215
ee780bf5 6216(define_insn "vec_interleave_highv4sf<mask_name>"
6217 [(set (match_operand:V4SF 0 "register_operand" "=x,v")
2a466fea 6218 (vec_select:V4SF
6219 (vec_concat:V8SF
ee780bf5 6220 (match_operand:V4SF 1 "register_operand" "0,v")
6221 (match_operand:V4SF 2 "nonimmediate_operand" "xm,vm"))
2a466fea 6222 (parallel [(const_int 2) (const_int 6)
6223 (const_int 3) (const_int 7)])))]
ee780bf5 6224 "TARGET_SSE && <mask_avx512vl_condition>"
45c0368c 6225 "@
6226 unpckhps\t{%2, %0|%0, %2}
ee780bf5 6227 vunpckhps\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
45c0368c 6228 [(set_attr "isa" "noavx,avx")
6229 (set_attr "type" "sselog")
6230 (set_attr "prefix" "orig,vex")
2a466fea 6231 (set_attr "mode" "V4SF")])
5802c0cb 6232
5220cab6 6233(define_insn "<mask_codefor>avx512f_unpcklps512<mask_name>"
697a43f8 6234 [(set (match_operand:V16SF 0 "register_operand" "=v")
6235 (vec_select:V16SF
6236 (vec_concat:V32SF
6237 (match_operand:V16SF 1 "register_operand" "v")
6238 (match_operand:V16SF 2 "nonimmediate_operand" "vm"))
6239 (parallel [(const_int 0) (const_int 16)
6240 (const_int 1) (const_int 17)
6241 (const_int 4) (const_int 20)
6242 (const_int 5) (const_int 21)
6243 (const_int 8) (const_int 24)
6244 (const_int 9) (const_int 25)
6245 (const_int 12) (const_int 28)
6246 (const_int 13) (const_int 29)])))]
6247 "TARGET_AVX512F"
5220cab6 6248 "vunpcklps\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
697a43f8 6249 [(set_attr "type" "sselog")
6250 (set_attr "prefix" "evex")
6251 (set_attr "mode" "V16SF")])
6252
d6e05290 6253;; Recall that the 256-bit unpck insns only shuffle within their lanes.
ee780bf5 6254(define_insn "avx_unpcklps256<mask_name>"
6255 [(set (match_operand:V8SF 0 "register_operand" "=v")
ed30e0a6 6256 (vec_select:V8SF
6257 (vec_concat:V16SF
ee780bf5 6258 (match_operand:V8SF 1 "register_operand" "v")
6259 (match_operand:V8SF 2 "nonimmediate_operand" "vm"))
5e56456b 6260 (parallel [(const_int 0) (const_int 8)
6261 (const_int 1) (const_int 9)
6262 (const_int 4) (const_int 12)
6263 (const_int 5) (const_int 13)])))]
ee780bf5 6264 "TARGET_AVX && <mask_avx512vl_condition>"
6265 "vunpcklps\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
ed30e0a6 6266 [(set_attr "type" "sselog")
6267 (set_attr "prefix" "vex")
6268 (set_attr "mode" "V8SF")])
6269
ee780bf5 6270(define_insn "unpcklps128_mask"
6271 [(set (match_operand:V4SF 0 "register_operand" "=v")
6272 (vec_merge:V4SF
6273 (vec_select:V4SF
6274 (vec_concat:V8SF
6275 (match_operand:V4SF 1 "register_operand" "v")
6276 (match_operand:V4SF 2 "nonimmediate_operand" "vm"))
6277 (parallel [(const_int 0) (const_int 4)
6278 (const_int 1) (const_int 5)]))
6279 (match_operand:V4SF 3 "vector_move_operand" "0C")
6280 (match_operand:QI 4 "register_operand" "Yk")))]
6281 "TARGET_AVX512VL"
6282 "vunpcklps\t{%2, %1, %0%{%4%}%N3|%0%{%4%}%N3, %1, %2}"
6283 [(set_attr "type" "sselog")
6284 (set_attr "prefix" "evex")
6285 (set_attr "mode" "V4SF")])
6286
8cedf886 6287(define_expand "vec_interleave_lowv8sf"
6288 [(set (match_dup 3)
6289 (vec_select:V8SF
6290 (vec_concat:V16SF
6291 (match_operand:V8SF 1 "register_operand" "x")
6292 (match_operand:V8SF 2 "nonimmediate_operand" "xm"))
6293 (parallel [(const_int 0) (const_int 8)
6294 (const_int 1) (const_int 9)
6295 (const_int 4) (const_int 12)
6296 (const_int 5) (const_int 13)])))
6297 (set (match_dup 4)
6298 (vec_select:V8SF
6299 (vec_concat:V16SF
6300 (match_dup 1)
6301 (match_dup 2))
6302 (parallel [(const_int 2) (const_int 10)
6303 (const_int 3) (const_int 11)
6304 (const_int 6) (const_int 14)
6305 (const_int 7) (const_int 15)])))
abd4f58b 6306 (set (match_operand:V8SF 0 "register_operand")
33d0986a 6307 (vec_select:V8SF
6308 (vec_concat:V16SF
8cedf886 6309 (match_dup 3)
33d0986a 6310 (match_dup 4))
6311 (parallel [(const_int 0) (const_int 1)
6312 (const_int 2) (const_int 3)
6313 (const_int 8) (const_int 9)
6314 (const_int 10) (const_int 11)])))]
8cedf886 6315 "TARGET_AVX"
6316{
6317 operands[3] = gen_reg_rtx (V8SFmode);
6318 operands[4] = gen_reg_rtx (V8SFmode);
6319})
6320
d6e05290 6321(define_insn "vec_interleave_lowv4sf"
45c0368c 6322 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
2a466fea 6323 (vec_select:V4SF
6324 (vec_concat:V8SF
45c0368c 6325 (match_operand:V4SF 1 "register_operand" "0,x")
6326 (match_operand:V4SF 2 "nonimmediate_operand" "xm,xm"))
2a466fea 6327 (parallel [(const_int 0) (const_int 4)
6328 (const_int 1) (const_int 5)])))]
6329 "TARGET_SSE"
45c0368c 6330 "@
6331 unpcklps\t{%2, %0|%0, %2}
6332 vunpcklps\t{%2, %1, %0|%0, %1, %2}"
6333 [(set_attr "isa" "noavx,avx")
6334 (set_attr "type" "sselog")
6335 (set_attr "prefix" "orig,vex")
2a466fea 6336 (set_attr "mode" "V4SF")])
5802c0cb 6337
2a466fea 6338;; These are modeled with the same vec_concat as the others so that we
6339;; capture users of shufps that can use the new instructions
adea432f 6340(define_insn "avx_movshdup256<mask_name>"
6341 [(set (match_operand:V8SF 0 "register_operand" "=v")
ed30e0a6 6342 (vec_select:V8SF
6343 (vec_concat:V16SF
adea432f 6344 (match_operand:V8SF 1 "nonimmediate_operand" "vm")
ed30e0a6 6345 (match_dup 1))
6346 (parallel [(const_int 1) (const_int 1)
6347 (const_int 3) (const_int 3)
6348 (const_int 5) (const_int 5)
6349 (const_int 7) (const_int 7)])))]
adea432f 6350 "TARGET_AVX && <mask_avx512vl_condition>"
6351 "vmovshdup\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
ed30e0a6 6352 [(set_attr "type" "sse")
6353 (set_attr "prefix" "vex")
6354 (set_attr "mode" "V8SF")])
6355
adea432f 6356(define_insn "sse3_movshdup<mask_name>"
6357 [(set (match_operand:V4SF 0 "register_operand" "=v")
2a466fea 6358 (vec_select:V4SF
6359 (vec_concat:V8SF
adea432f 6360 (match_operand:V4SF 1 "nonimmediate_operand" "vm")
2a466fea 6361 (match_dup 1))
6362 (parallel [(const_int 1)
6363 (const_int 1)
6364 (const_int 7)
6365 (const_int 7)])))]
adea432f 6366 "TARGET_SSE3 && <mask_avx512vl_condition>"
6367 "%vmovshdup\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
2a466fea 6368 [(set_attr "type" "sse")
1f346cbc 6369 (set_attr "prefix_rep" "1")
ed30e0a6 6370 (set_attr "prefix" "maybe_vex")
2a466fea 6371 (set_attr "mode" "V4SF")])
5802c0cb 6372
5220cab6 6373(define_insn "<mask_codefor>avx512f_movshdup512<mask_name>"
697a43f8 6374 [(set (match_operand:V16SF 0 "register_operand" "=v")
6375 (vec_select:V16SF
6376 (vec_concat:V32SF
6377 (match_operand:V16SF 1 "nonimmediate_operand" "vm")
6378 (match_dup 1))
6379 (parallel [(const_int 1) (const_int 1)
6380 (const_int 3) (const_int 3)
6381 (const_int 5) (const_int 5)
6382 (const_int 7) (const_int 7)
6383 (const_int 9) (const_int 9)
6384 (const_int 11) (const_int 11)
6385 (const_int 13) (const_int 13)
6386 (const_int 15) (const_int 15)])))]
6387 "TARGET_AVX512F"
5220cab6 6388 "vmovshdup\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
697a43f8 6389 [(set_attr "type" "sse")
6390 (set_attr "prefix" "evex")
6391 (set_attr "mode" "V16SF")])
6392
adea432f 6393(define_insn "avx_movsldup256<mask_name>"
6394 [(set (match_operand:V8SF 0 "register_operand" "=v")
ed30e0a6 6395 (vec_select:V8SF
6396 (vec_concat:V16SF
adea432f 6397 (match_operand:V8SF 1 "nonimmediate_operand" "vm")
ed30e0a6 6398 (match_dup 1))
6399 (parallel [(const_int 0) (const_int 0)
6400 (const_int 2) (const_int 2)
6401 (const_int 4) (const_int 4)
6402 (const_int 6) (const_int 6)])))]
adea432f 6403 "TARGET_AVX && <mask_avx512vl_condition>"
6404 "vmovsldup\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
ed30e0a6 6405 [(set_attr "type" "sse")
6406 (set_attr "prefix" "vex")
6407 (set_attr "mode" "V8SF")])
6408
adea432f 6409(define_insn "sse3_movsldup<mask_name>"
6410 [(set (match_operand:V4SF 0 "register_operand" "=v")
2a466fea 6411 (vec_select:V4SF
6412 (vec_concat:V8SF
adea432f 6413 (match_operand:V4SF 1 "nonimmediate_operand" "vm")
2a466fea 6414 (match_dup 1))
6415 (parallel [(const_int 0)
6416 (const_int 0)
6417 (const_int 6)
6418 (const_int 6)])))]
adea432f 6419 "TARGET_SSE3 && <mask_avx512vl_condition>"
6420 "%vmovsldup\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
2a466fea 6421 [(set_attr "type" "sse")
1f346cbc 6422 (set_attr "prefix_rep" "1")
ed30e0a6 6423 (set_attr "prefix" "maybe_vex")
2a466fea 6424 (set_attr "mode" "V4SF")])
5802c0cb 6425
5220cab6 6426(define_insn "<mask_codefor>avx512f_movsldup512<mask_name>"
697a43f8 6427 [(set (match_operand:V16SF 0 "register_operand" "=v")
6428 (vec_select:V16SF
6429 (vec_concat:V32SF
6430 (match_operand:V16SF 1 "nonimmediate_operand" "vm")
6431 (match_dup 1))
6432 (parallel [(const_int 0) (const_int 0)
6433 (const_int 2) (const_int 2)
6434 (const_int 4) (const_int 4)
6435 (const_int 6) (const_int 6)
6436 (const_int 8) (const_int 8)
6437 (const_int 10) (const_int 10)
6438 (const_int 12) (const_int 12)
6439 (const_int 14) (const_int 14)])))]
6440 "TARGET_AVX512F"
5220cab6 6441 "vmovsldup\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
697a43f8 6442 [(set_attr "type" "sse")
6443 (set_attr "prefix" "evex")
6444 (set_attr "mode" "V16SF")])
6445
bb2fa3d8 6446(define_expand "avx_shufps256<mask_expand4_name>"
abd4f58b 6447 [(match_operand:V8SF 0 "register_operand")
6448 (match_operand:V8SF 1 "register_operand")
6449 (match_operand:V8SF 2 "nonimmediate_operand")
6450 (match_operand:SI 3 "const_int_operand")]
ed30e0a6 6451 "TARGET_AVX"
6452{
6453 int mask = INTVAL (operands[3]);
bb2fa3d8 6454 emit_insn (gen_avx_shufps256_1<mask_expand4_name> (operands[0],
6455 operands[1],
6456 operands[2],
6457 GEN_INT ((mask >> 0) & 3),
6458 GEN_INT ((mask >> 2) & 3),
6459 GEN_INT (((mask >> 4) & 3) + 8),
6460 GEN_INT (((mask >> 6) & 3) + 8),
6461 GEN_INT (((mask >> 0) & 3) + 4),
6462 GEN_INT (((mask >> 2) & 3) + 4),
6463 GEN_INT (((mask >> 4) & 3) + 12),
6464 GEN_INT (((mask >> 6) & 3) + 12)
6465 <mask_expand4_args>));
ed30e0a6 6466 DONE;
6467})
6468
6469;; One bit in mask selects 2 elements.
bb2fa3d8 6470(define_insn "avx_shufps256_1<mask_name>"
6471 [(set (match_operand:V8SF 0 "register_operand" "=v")
ed30e0a6 6472 (vec_select:V8SF
6473 (vec_concat:V16SF
bb2fa3d8 6474 (match_operand:V8SF 1 "register_operand" "v")
6475 (match_operand:V8SF 2 "nonimmediate_operand" "vm"))
abd4f58b 6476 (parallel [(match_operand 3 "const_0_to_3_operand" )
6477 (match_operand 4 "const_0_to_3_operand" )
6478 (match_operand 5 "const_8_to_11_operand" )
6479 (match_operand 6 "const_8_to_11_operand" )
6480 (match_operand 7 "const_4_to_7_operand" )
6481 (match_operand 8 "const_4_to_7_operand" )
6482 (match_operand 9 "const_12_to_15_operand")
6483 (match_operand 10 "const_12_to_15_operand")])))]
ed30e0a6 6484 "TARGET_AVX
bb2fa3d8 6485 && <mask_avx512vl_condition>
ed30e0a6 6486 && (INTVAL (operands[3]) == (INTVAL (operands[7]) - 4)
6487 && INTVAL (operands[4]) == (INTVAL (operands[8]) - 4)
6488 && INTVAL (operands[5]) == (INTVAL (operands[9]) - 4)
6489 && INTVAL (operands[6]) == (INTVAL (operands[10]) - 4))"
6490{
6491 int mask;
6492 mask = INTVAL (operands[3]);
6493 mask |= INTVAL (operands[4]) << 2;
6494 mask |= (INTVAL (operands[5]) - 8) << 4;
6495 mask |= (INTVAL (operands[6]) - 8) << 6;
6496 operands[3] = GEN_INT (mask);
6497
bb2fa3d8 6498 return "vshufps\t{%3, %2, %1, %0<mask_operand11>|%0<mask_operand11>, %1, %2, %3}";
ed30e0a6 6499}
77aff08f 6500 [(set_attr "type" "sseshuf")
00a0e418 6501 (set_attr "length_immediate" "1")
bb2fa3d8 6502 (set_attr "prefix" "<mask_prefix>")
ed30e0a6 6503 (set_attr "mode" "V8SF")])
6504
bb2fa3d8 6505(define_expand "sse_shufps<mask_expand4_name>"
abd4f58b 6506 [(match_operand:V4SF 0 "register_operand")
6507 (match_operand:V4SF 1 "register_operand")
6508 (match_operand:V4SF 2 "nonimmediate_operand")
6509 (match_operand:SI 3 "const_int_operand")]
2a466fea 6510 "TARGET_SSE"
6511{
6512 int mask = INTVAL (operands[3]);
bb2fa3d8 6513 emit_insn (gen_sse_shufps_v4sf<mask_expand4_name> (operands[0],
6514 operands[1],
6515 operands[2],
6516 GEN_INT ((mask >> 0) & 3),
6517 GEN_INT ((mask >> 2) & 3),
6518 GEN_INT (((mask >> 4) & 3) + 4),
6519 GEN_INT (((mask >> 6) & 3) + 4)
6520 <mask_expand4_args>));
2a466fea 6521 DONE;
6522})
5802c0cb 6523
bb2fa3d8 6524(define_insn "sse_shufps_v4sf_mask"
6525 [(set (match_operand:V4SF 0 "register_operand" "=v")
6526 (vec_merge:V4SF
6527 (vec_select:V4SF
6528 (vec_concat:V8SF
6529 (match_operand:V4SF 1 "register_operand" "v")
6530 (match_operand:V4SF 2 "nonimmediate_operand" "vm"))
6531 (parallel [(match_operand 3 "const_0_to_3_operand")
6532 (match_operand 4 "const_0_to_3_operand")
6533 (match_operand 5 "const_4_to_7_operand")
6534 (match_operand 6 "const_4_to_7_operand")]))
6535 (match_operand:V4SF 7 "vector_move_operand" "0C")
6536 (match_operand:QI 8 "register_operand" "Yk")))]
6537 "TARGET_AVX512VL"
6538{
6539 int mask = 0;
6540 mask |= INTVAL (operands[3]) << 0;
6541 mask |= INTVAL (operands[4]) << 2;
6542 mask |= (INTVAL (operands[5]) - 4) << 4;
6543 mask |= (INTVAL (operands[6]) - 4) << 6;
6544 operands[3] = GEN_INT (mask);
6545
6546 return "vshufps\t{%3, %2, %1, %0%{%8%}%N7|%0%{%8%}%N7, %1, %2, %3}";
6547}
6548 [(set_attr "type" "sseshuf")
6549 (set_attr "length_immediate" "1")
6550 (set_attr "prefix" "evex")
6551 (set_attr "mode" "V4SF")])
6552
56c7c824 6553(define_insn "sse_shufps_<mode>"
6fe5844b 6554 [(set (match_operand:VI4F_128 0 "register_operand" "=x,x")
6555 (vec_select:VI4F_128
63d5e521 6556 (vec_concat:<ssedoublevecmode>
6fe5844b 6557 (match_operand:VI4F_128 1 "register_operand" "0,x")
6558 (match_operand:VI4F_128 2 "nonimmediate_operand" "xm,xm"))
abd4f58b 6559 (parallel [(match_operand 3 "const_0_to_3_operand")
6560 (match_operand 4 "const_0_to_3_operand")
6561 (match_operand 5 "const_4_to_7_operand")
6562 (match_operand 6 "const_4_to_7_operand")])))]
2a466fea 6563 "TARGET_SSE"
6564{
6565 int mask = 0;
6566 mask |= INTVAL (operands[3]) << 0;
6567 mask |= INTVAL (operands[4]) << 2;
6568 mask |= (INTVAL (operands[5]) - 4) << 4;
6569 mask |= (INTVAL (operands[6]) - 4) << 6;
6570 operands[3] = GEN_INT (mask);
5802c0cb 6571
45c0368c 6572 switch (which_alternative)
6573 {
6574 case 0:
6575 return "shufps\t{%3, %2, %0|%0, %2, %3}";
6576 case 1:
6577 return "vshufps\t{%3, %2, %1, %0|%0, %1, %2, %3}";
6578 default:
6579 gcc_unreachable ();
6580 }
2a466fea 6581}
45c0368c 6582 [(set_attr "isa" "noavx,avx")
77aff08f 6583 (set_attr "type" "sseshuf")
00a0e418 6584 (set_attr "length_immediate" "1")
45c0368c 6585 (set_attr "prefix" "orig,vex")
2a466fea 6586 (set_attr "mode" "V4SF")])
5802c0cb 6587
2a466fea 6588(define_insn "sse_storehps"
6589 [(set (match_operand:V2SF 0 "nonimmediate_operand" "=m,x,x")
6590 (vec_select:V2SF
6591 (match_operand:V4SF 1 "nonimmediate_operand" "x,x,o")
6592 (parallel [(const_int 2) (const_int 3)])))]
6593 "TARGET_SSE"
6594 "@
c358a059 6595 %vmovhps\t{%1, %0|%q0, %1}
ed30e0a6 6596 %vmovhlps\t{%1, %d0|%d0, %1}
6597 %vmovlps\t{%H1, %d0|%d0, %H1}"
2a466fea 6598 [(set_attr "type" "ssemov")
8c1dfa94 6599 (set_attr "ssememalign" "64")
ed30e0a6 6600 (set_attr "prefix" "maybe_vex")
2a466fea 6601 (set_attr "mode" "V2SF,V4SF,V2SF")])
6602
2485795e 6603(define_expand "sse_loadhps_exp"
abd4f58b 6604 [(set (match_operand:V4SF 0 "nonimmediate_operand")
7c839b3f 6605 (vec_concat:V4SF
6606 (vec_select:V2SF
abd4f58b 6607 (match_operand:V4SF 1 "nonimmediate_operand")
7c839b3f 6608 (parallel [(const_int 0) (const_int 1)]))
abd4f58b 6609 (match_operand:V2SF 2 "nonimmediate_operand")))]
7c839b3f 6610 "TARGET_SSE"
cc05a422 6611{
6612 rtx dst = ix86_fixup_binary_operands (UNKNOWN, V4SFmode, operands);
33541f98 6613
cc05a422 6614 emit_insn (gen_sse_loadhps (dst, operands[1], operands[2]));
6615
6616 /* Fix up the destination if needed. */
6617 if (dst != operands[0])
6618 emit_move_insn (operands[0], dst);
6619
6620 DONE;
6621})
7c839b3f 6622
2485795e 6623(define_insn "sse_loadhps"
45c0368c 6624 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,x,x,o")
5802c0cb 6625 (vec_concat:V4SF
2a466fea 6626 (vec_select:V2SF
45c0368c 6627 (match_operand:V4SF 1 "nonimmediate_operand" " 0,x,0,x,0")
2a466fea 6628 (parallel [(const_int 0) (const_int 1)]))
45c0368c 6629 (match_operand:V2SF 2 "nonimmediate_operand" " m,m,x,x,x")))]
5c752e47 6630 "TARGET_SSE"
2a466fea 6631 "@
c358a059 6632 movhps\t{%2, %0|%0, %q2}
6633 vmovhps\t{%2, %1, %0|%0, %1, %q2}
2a466fea 6634 movlhps\t{%2, %0|%0, %2}
45c0368c 6635 vmovlhps\t{%2, %1, %0|%0, %1, %2}
6636 %vmovlps\t{%2, %H0|%H0, %2}"
d1c8b778 6637 [(set_attr "isa" "noavx,avx,noavx,avx,*")
45c0368c 6638 (set_attr "type" "ssemov")
8c1dfa94 6639 (set_attr "ssememalign" "64")
45c0368c 6640 (set_attr "prefix" "orig,vex,orig,vex,maybe_vex")
6641 (set_attr "mode" "V2SF,V2SF,V4SF,V4SF,V2SF")])
ed30e0a6 6642
2a466fea 6643(define_insn "sse_storelps"
45c0368c 6644 [(set (match_operand:V2SF 0 "nonimmediate_operand" "=m,x,x")
2a466fea 6645 (vec_select:V2SF
45c0368c 6646 (match_operand:V4SF 1 "nonimmediate_operand" " x,x,m")
2a466fea 6647 (parallel [(const_int 0) (const_int 1)])))]
5c752e47 6648 "TARGET_SSE"
2a466fea 6649 "@
c358a059 6650 %vmovlps\t{%1, %0|%q0, %1}
45c0368c 6651 %vmovaps\t{%1, %0|%0, %1}
c358a059 6652 %vmovlps\t{%1, %d0|%d0, %q1}"
2a466fea 6653 [(set_attr "type" "ssemov")
45c0368c 6654 (set_attr "prefix" "maybe_vex")
2a466fea 6655 (set_attr "mode" "V2SF,V4SF,V2SF")])
6656
2485795e 6657(define_expand "sse_loadlps_exp"
abd4f58b 6658 [(set (match_operand:V4SF 0 "nonimmediate_operand")
7c839b3f 6659 (vec_concat:V4SF
abd4f58b 6660 (match_operand:V2SF 2 "nonimmediate_operand")
7c839b3f 6661 (vec_select:V2SF
abd4f58b 6662 (match_operand:V4SF 1 "nonimmediate_operand")
7c839b3f 6663 (parallel [(const_int 2) (const_int 3)]))))]
6664 "TARGET_SSE"
cc05a422 6665{
6666 rtx dst = ix86_fixup_binary_operands (UNKNOWN, V4SFmode, operands);
33541f98 6667
cc05a422 6668 emit_insn (gen_sse_loadlps (dst, operands[1], operands[2]));
6669
6670 /* Fix up the destination if needed. */
6671 if (dst != operands[0])
6672 emit_move_insn (operands[0], dst);
6673
6674 DONE;
6675})
7c839b3f 6676
2485795e 6677(define_insn "sse_loadlps"
45c0368c 6678 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,x,x,m")
2a466fea 6679 (vec_concat:V4SF
50c10b91 6680 (match_operand:V2SF 2 "nonimmediate_operand" " 0,x,m,m,x")
5802c0cb 6681 (vec_select:V2SF
45c0368c 6682 (match_operand:V4SF 1 "nonimmediate_operand" " x,x,0,x,0")
2a466fea 6683 (parallel [(const_int 2) (const_int 3)]))))]
5c752e47 6684 "TARGET_SSE"
2a466fea 6685 "@
6686 shufps\t{$0xe4, %1, %0|%0, %1, 0xe4}
45c0368c 6687 vshufps\t{$0xe4, %1, %2, %0|%0, %2, %1, 0xe4}
c358a059 6688 movlps\t{%2, %0|%0, %q2}
6689 vmovlps\t{%2, %1, %0|%0, %1, %q2}
6690 %vmovlps\t{%2, %0|%q0, %2}"
d1c8b778 6691 [(set_attr "isa" "noavx,avx,noavx,avx,*")
77aff08f 6692 (set_attr "type" "sseshuf,sseshuf,ssemov,ssemov,ssemov")
8c1dfa94 6693 (set_attr "ssememalign" "64")
45c0368c 6694 (set_attr "length_immediate" "1,1,*,*,*")
6695 (set_attr "prefix" "orig,vex,orig,vex,maybe_vex")
6696 (set_attr "mode" "V4SF,V4SF,V2SF,V2SF,V2SF")])
ed30e0a6 6697
2a466fea 6698(define_insn "sse_movss"
45c0368c 6699 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
2a466fea 6700 (vec_merge:V4SF
45c0368c 6701 (match_operand:V4SF 2 "register_operand" " x,x")
6702 (match_operand:V4SF 1 "register_operand" " 0,x")
2a466fea 6703 (const_int 1)))]
6704 "TARGET_SSE"
45c0368c 6705 "@
6706 movss\t{%2, %0|%0, %2}
6707 vmovss\t{%2, %1, %0|%0, %1, %2}"
6708 [(set_attr "isa" "noavx,avx")
6709 (set_attr "type" "ssemov")
6710 (set_attr "prefix" "orig,vex")
2a466fea 6711 (set_attr "mode" "SF")])
bb8107e7 6712
eea5ff47 6713(define_insn "avx2_vec_dup<mode>"
03ae25dc 6714 [(set (match_operand:VF1_128_256 0 "register_operand" "=x")
6715 (vec_duplicate:VF1_128_256
5deb404d 6716 (vec_select:SF
6717 (match_operand:V4SF 1 "register_operand" "x")
6718 (parallel [(const_int 0)]))))]
6719 "TARGET_AVX2"
6720 "vbroadcastss\t{%1, %0|%0, %1}"
6721 [(set_attr "type" "sselog1")
6722 (set_attr "prefix" "vex")
eea5ff47 6723 (set_attr "mode" "<MODE>")])
5deb404d 6724
541e350d 6725(define_insn "avx2_vec_dupv8sf_1"
6726 [(set (match_operand:V8SF 0 "register_operand" "=x")
6727 (vec_duplicate:V8SF
6728 (vec_select:SF
6729 (match_operand:V8SF 1 "register_operand" "x")
6730 (parallel [(const_int 0)]))))]
6731 "TARGET_AVX2"
6732 "vbroadcastss\t{%x1, %0|%0, %x1}"
6733 [(set_attr "type" "sselog1")
6734 (set_attr "prefix" "vex")
6735 (set_attr "mode" "V8SF")])
6736
05e7532b 6737(define_insn "avx512f_vec_dup<mode>_1"
6738 [(set (match_operand:VF_512 0 "register_operand" "=v")
6739 (vec_duplicate:VF_512
6740 (vec_select:<ssescalarmode>
6741 (match_operand:VF_512 1 "register_operand" "v")
6742 (parallel [(const_int 0)]))))]
6743 "TARGET_AVX512F"
6744 "vbroadcast<bcstscalarsuff>\t{%x1, %0|%0, %x1}"
6745 [(set_attr "type" "sselog1")
6746 (set_attr "prefix" "evex")
6747 (set_attr "mode" "<MODE>")])
6748
d3d9aac1 6749;; Although insertps takes register source, we prefer
6750;; unpcklps with register source since it is shorter.
6751(define_insn "*vec_concatv2sf_sse4_1"
0a281fd0 6752 [(set (match_operand:V2SF 0 "register_operand"
6753 "=Yr,*x,x,Yr,*x,x,x,*y ,*y")
d3d9aac1 6754 (vec_concat:V2SF
0a281fd0 6755 (match_operand:SF 1 "nonimmediate_operand"
6756 " 0, 0,x, 0,0, x,m, 0 , m")
6757 (match_operand:SF 2 "vector_move_operand"
6758 " Yr,*x,x, m,m, m,C,*ym, C")))]
6759 "TARGET_SSE4_1 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
d3d9aac1 6760 "@
0a32b282 6761 unpcklps\t{%2, %0|%0, %2}
d3d9aac1 6762 unpcklps\t{%2, %0|%0, %2}
45c0368c 6763 vunpcklps\t{%2, %1, %0|%0, %1, %2}
d3d9aac1 6764 insertps\t{$0x10, %2, %0|%0, %2, 0x10}
0a32b282 6765 insertps\t{$0x10, %2, %0|%0, %2, 0x10}
45c0368c 6766 vinsertps\t{$0x10, %2, %1, %0|%0, %1, %2, 0x10}
6767 %vmovss\t{%1, %0|%0, %1}
d3d9aac1 6768 punpckldq\t{%2, %0|%0, %2}
6769 movd\t{%1, %0|%0, %1}"
0a32b282 6770 [(set_attr "isa" "noavx,noavx,avx,noavx,noavx,avx,*,*,*")
6771 (set_attr "type" "sselog,sselog,sselog,sselog,sselog,sselog,ssemov,mmxcvt,mmxmov")
6772 (set_attr "prefix_data16" "*,*,*,1,1,*,*,*,*")
6773 (set_attr "prefix_extra" "*,*,*,1,1,1,*,*,*")
6774 (set_attr "length_immediate" "*,*,*,1,1,1,*,*,*")
6775 (set_attr "prefix" "orig,orig,vex,orig,orig,vex,maybe_vex,orig,orig")
6776 (set_attr "mode" "V4SF,V4SF,V4SF,V4SF,V4SF,V4SF,SF,DI,DI")])
d3d9aac1 6777
2a466fea 6778;; ??? In theory we can match memory for the MMX alternative, but allowing
6779;; nonimmediate_operand for operand 2 and *not* allowing memory for the SSE
6780;; alternatives pretty much forces the MMX alternative to be chosen.
b4a46c88 6781(define_insn "*vec_concatv2sf_sse"
2a466fea 6782 [(set (match_operand:V2SF 0 "register_operand" "=x,x,*y,*y")
6783 (vec_concat:V2SF
6784 (match_operand:SF 1 "nonimmediate_operand" " 0,m, 0, m")
6785 (match_operand:SF 2 "reg_or_0_operand" " x,C,*y, C")))]
6786 "TARGET_SSE"
6787 "@
6788 unpcklps\t{%2, %0|%0, %2}
6789 movss\t{%1, %0|%0, %1}
6790 punpckldq\t{%2, %0|%0, %2}
6791 movd\t{%1, %0|%0, %1}"
6792 [(set_attr "type" "sselog,ssemov,mmxcvt,mmxmov")
6793 (set_attr "mode" "V4SF,SF,DI,DI")])
8aa4e142 6794
d3d379e7 6795(define_insn "*vec_concatv4sf"
45c0368c 6796 [(set (match_operand:V4SF 0 "register_operand" "=x,x,x,x")
2a466fea 6797 (vec_concat:V4SF
45c0368c 6798 (match_operand:V2SF 1 "register_operand" " 0,x,0,x")
6799 (match_operand:V2SF 2 "nonimmediate_operand" " x,x,m,m")))]
2a466fea 6800 "TARGET_SSE"
6801 "@
6802 movlhps\t{%2, %0|%0, %2}
45c0368c 6803 vmovlhps\t{%2, %1, %0|%0, %1, %2}
c358a059 6804 movhps\t{%2, %0|%0, %q2}
6805 vmovhps\t{%2, %1, %0|%0, %1, %q2}"
45c0368c 6806 [(set_attr "isa" "noavx,avx,noavx,avx")
6807 (set_attr "type" "ssemov")
6808 (set_attr "prefix" "orig,vex,orig,vex")
6809 (set_attr "mode" "V4SF,V4SF,V2SF,V2SF")])
8aa4e142 6810
c262fafb 6811(define_expand "vec_init<mode>"
abd4f58b 6812 [(match_operand:V_128 0 "register_operand")
6813 (match_operand 1)]
2a466fea 6814 "TARGET_SSE"
8aa4e142 6815{
2a466fea 6816 ix86_expand_vector_init (false, operands[0], operands[1]);
8aa4e142 6817 DONE;
6818})
6819
45c0368c 6820;; Avoid combining registers from different units in a single alternative,
6821;; see comment above inline_secondary_memory_needed function in i386.c
1e541240 6822(define_insn "vec_set<mode>_0"
6fe5844b 6823 [(set (match_operand:VI4F_128 0 "nonimmediate_operand"
43483afb 6824 "=Yr,*v,v,Yi,x,x,v,Yr ,*x ,x ,m ,m ,m")
6fe5844b 6825 (vec_merge:VI4F_128
6826 (vec_duplicate:VI4F_128
45c0368c 6827 (match_operand:<ssescalarmode> 2 "general_operand"
43483afb 6828 " Yr,*v,m,r ,m,x,v,*rm,*rm,*rm,!x,!*re,!*fF"))
6fe5844b 6829 (match_operand:VI4F_128 1 "vector_move_operand"
0a32b282 6830 " C , C,C,C ,C,0,v,0 ,0 ,x ,0 ,0 ,0")
04e14b44 6831 (const_int 1)))]
1e541240 6832 "TARGET_SSE"
04e14b44 6833 "@
0a32b282 6834 %vinsertps\t{$0xe, %d2, %0|%0, %d2, 0xe}
45c0368c 6835 %vinsertps\t{$0xe, %d2, %0|%0, %d2, 0xe}
6836 %vmov<ssescalarmodesuffix>\t{%2, %0|%0, %2}
6837 %vmovd\t{%2, %0|%0, %2}
04e14b44 6838 movss\t{%2, %0|%0, %2}
1e541240 6839 movss\t{%2, %0|%0, %2}
45c0368c 6840 vmovss\t{%2, %1, %0|%0, %1, %2}
04e14b44 6841 pinsrd\t{$0, %2, %0|%0, %2, 0}
0a32b282 6842 pinsrd\t{$0, %2, %0|%0, %2, 0}
45c0368c 6843 vpinsrd\t{$0, %2, %1, %0|%0, %1, %2, 0}
6844 #
6845 #
04e14b44 6846 #"
0a32b282 6847 [(set_attr "isa" "sse4,sse4,sse2,sse2,noavx,noavx,avx,sse4_noavx,sse4_noavx,avx,*,*,*")
d1c8b778 6848 (set (attr "type")
0a32b282 6849 (cond [(eq_attr "alternative" "0,1,7,8,9")
d1c8b778 6850 (const_string "sselog")
0a32b282 6851 (eq_attr "alternative" "11")
d1c8b778 6852 (const_string "imov")
0a32b282 6853 (eq_attr "alternative" "12")
2c9cbc56 6854 (const_string "fmov")
d1c8b778 6855 ]
6856 (const_string "ssemov")))
0a32b282 6857 (set_attr "prefix_extra" "*,*,*,*,*,*,*,1,1,1,*,*,*")
6858 (set_attr "length_immediate" "*,*,*,*,*,*,*,1,1,1,*,*,*")
6859 (set_attr "prefix" "maybe_vex,maybe_vex,maybe_vex,maybe_vex,orig,orig,vex,orig,orig,vex,*,*,*")
6860 (set_attr "mode" "SF,SF,<ssescalarmode>,SI,SF,SF,SF,TI,TI,TI,*,*,*")])
2a466fea 6861
6862;; A subset is vec_setv4sf.
6863(define_insn "*vec_setv4sf_sse4_1"
0a32b282 6864 [(set (match_operand:V4SF 0 "register_operand" "=Yr,*x,x")
2a466fea 6865 (vec_merge:V4SF
6866 (vec_duplicate:V4SF
0a32b282 6867 (match_operand:SF 2 "nonimmediate_operand" "Yrm,*xm,xm"))
6868 (match_operand:V4SF 1 "register_operand" "0,0,x")
abd4f58b 6869 (match_operand:SI 3 "const_int_operand")))]
d3d379e7 6870 "TARGET_SSE4_1
6871 && ((unsigned) exact_log2 (INTVAL (operands[3]))
6872 < GET_MODE_NUNITS (V4SFmode))"
8aa4e142 6873{
2a466fea 6874 operands[3] = GEN_INT (exact_log2 (INTVAL (operands[3])) << 4);
45c0368c 6875 switch (which_alternative)
6876 {
6877 case 0:
45c0368c 6878 case 1:
0a32b282 6879 return "insertps\t{%3, %2, %0|%0, %2, %3}";
6880 case 2:
45c0368c 6881 return "vinsertps\t{%3, %2, %1, %0|%0, %1, %2, %3}";
6882 default:
6883 gcc_unreachable ();
6884 }
2a466fea 6885}
0a32b282 6886 [(set_attr "isa" "noavx,noavx,avx")
45c0368c 6887 (set_attr "type" "sselog")
0a32b282 6888 (set_attr "prefix_data16" "1,1,*")
00a0e418 6889 (set_attr "prefix_extra" "1")
6890 (set_attr "length_immediate" "1")
0a32b282 6891 (set_attr "prefix" "orig,orig,vex")
ed30e0a6 6892 (set_attr "mode" "V4SF")])
6893
2a466fea 6894(define_insn "sse4_1_insertps"
0a32b282 6895 [(set (match_operand:V4SF 0 "register_operand" "=Yr,*x,x")
6896 (unspec:V4SF [(match_operand:V4SF 2 "nonimmediate_operand" "Yrm,*xm,xm")
6897 (match_operand:V4SF 1 "register_operand" "0,0,x")
6898 (match_operand:SI 3 "const_0_to_255_operand" "n,n,n")]
2a466fea 6899 UNSPEC_INSERTPS))]
6900 "TARGET_SSE4_1"
908f63e8 6901{
6902 if (MEM_P (operands[2]))
6903 {
6904 unsigned count_s = INTVAL (operands[3]) >> 6;
6905 if (count_s)
6906 operands[3] = GEN_INT (INTVAL (operands[3]) & 0x3f);
6907 operands[2] = adjust_address_nv (operands[2], SFmode, count_s * 4);
6908 }
6909 switch (which_alternative)
6910 {
6911 case 0:
908f63e8 6912 case 1:
0a32b282 6913 return "insertps\t{%3, %2, %0|%0, %2, %3}";
6914 case 2:
908f63e8 6915 return "vinsertps\t{%3, %2, %1, %0|%0, %1, %2, %3}";
6916 default:
6917 gcc_unreachable ();
6918 }
6919}
0a32b282 6920 [(set_attr "isa" "noavx,noavx,avx")
45c0368c 6921 (set_attr "type" "sselog")
0a32b282 6922 (set_attr "prefix_data16" "1,1,*")
2a466fea 6923 (set_attr "prefix_extra" "1")
00a0e418 6924 (set_attr "length_immediate" "1")
0a32b282 6925 (set_attr "prefix" "orig,orig,vex")
2a466fea 6926 (set_attr "mode" "V4SF")])
8aa4e142 6927
2a466fea 6928(define_split
abd4f58b 6929 [(set (match_operand:VI4F_128 0 "memory_operand")
6fe5844b 6930 (vec_merge:VI4F_128
6931 (vec_duplicate:VI4F_128
abd4f58b 6932 (match_operand:<ssescalarmode> 1 "nonmemory_operand"))
2a466fea 6933 (match_dup 0)
6934 (const_int 1)))]
6935 "TARGET_SSE && reload_completed"
823a2ddd 6936 [(set (match_dup 0) (match_dup 1))]
6937 "operands[0] = adjust_address (operands[0], <ssescalarmode>mode, 0);")
8aa4e142 6938
77d27e3a 6939(define_expand "vec_set<mode>"
abd4f58b 6940 [(match_operand:V 0 "register_operand")
6941 (match_operand:<ssescalarmode> 1 "register_operand")
6942 (match_operand 2 "const_int_operand")]
2a466fea 6943 "TARGET_SSE"
8aa4e142 6944{
2a466fea 6945 ix86_expand_vector_set (false, operands[0], operands[1],
6946 INTVAL (operands[2]));
6947 DONE;
8aa4e142 6948})
6949
2a466fea 6950(define_insn_and_split "*vec_extractv4sf_0"
6be36710 6951 [(set (match_operand:SF 0 "nonimmediate_operand" "=x,m,f,r")
2a466fea 6952 (vec_select:SF
6be36710 6953 (match_operand:V4SF 1 "nonimmediate_operand" "xm,x,m,m")
2a466fea 6954 (parallel [(const_int 0)])))]
6955 "TARGET_SSE && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
6956 "#"
6957 "&& reload_completed"
573c5512 6958 [(set (match_dup 0) (match_dup 1))]
bb8107e7 6959{
573c5512 6960 if (REG_P (operands[1]))
6961 operands[1] = gen_rtx_REG (SFmode, REGNO (operands[1]));
2a466fea 6962 else
573c5512 6963 operands[1] = adjust_address (operands[1], SFmode, 0);
bb8107e7 6964})
6965
9525a1bb 6966(define_insn_and_split "*sse4_1_extractps"
0a32b282 6967 [(set (match_operand:SF 0 "nonimmediate_operand" "=rm,rm,x,x")
9525a1bb 6968 (vec_select:SF
0a32b282 6969 (match_operand:V4SF 1 "register_operand" "Yr,*x,0,x")
6970 (parallel [(match_operand:SI 2 "const_0_to_3_operand" "n,n,n,n")])))]
9525a1bb 6971 "TARGET_SSE4_1"
6972 "@
0a32b282 6973 %vextractps\t{%2, %1, %0|%0, %1, %2}
9525a1bb 6974 %vextractps\t{%2, %1, %0|%0, %1, %2}
6975 #
6976 #"
6977 "&& reload_completed && SSE_REG_P (operands[0])"
6978 [(const_int 0)]
6979{
6980 rtx dest = gen_rtx_REG (V4SFmode, REGNO (operands[0]));
6981 switch (INTVAL (operands[2]))
6982 {
6983 case 1:
6984 case 3:
6985 emit_insn (gen_sse_shufps_v4sf (dest, operands[1], operands[1],
6986 operands[2], operands[2],
6987 GEN_INT (INTVAL (operands[2]) + 4),
6988 GEN_INT (INTVAL (operands[2]) + 4)));
6989 break;
6990 case 2:
6991 emit_insn (gen_vec_interleave_highv4sf (dest, operands[1], operands[1]));
6992 break;
6993 default:
6994 /* 0 should be handled by the *vec_extractv4sf_0 pattern above. */
6995 gcc_unreachable ();
6996 }
6997 DONE;
6998}
0a32b282 6999 [(set_attr "isa" "*,*,noavx,avx")
7000 (set_attr "type" "sselog,sselog,*,*")
7001 (set_attr "prefix_data16" "1,1,*,*")
7002 (set_attr "prefix_extra" "1,1,*,*")
7003 (set_attr "length_immediate" "1,1,*,*")
7004 (set_attr "prefix" "maybe_vex,maybe_vex,*,*")
7005 (set_attr "mode" "V4SF,V4SF,*,*")])
9525a1bb 7006
fe4df2ce 7007(define_insn_and_split "*vec_extractv4sf_mem"
9525a1bb 7008 [(set (match_operand:SF 0 "register_operand" "=x,*r,f")
fe4df2ce 7009 (vec_select:SF
7010 (match_operand:V4SF 1 "memory_operand" "o,o,o")
7011 (parallel [(match_operand 2 "const_0_to_3_operand" "n,n,n")])))]
9525a1bb 7012 "TARGET_SSE"
7013 "#"
7014 "&& reload_completed"
823a2ddd 7015 [(set (match_dup 0) (match_dup 1))]
9525a1bb 7016{
823a2ddd 7017 operands[1] = adjust_address (operands[1], SFmode, INTVAL (operands[2]) * 4);
9525a1bb 7018})
7019
fd1fee28 7020(define_mode_attr extract_type
7021 [(V16SF "avx512f") (V16SI "avx512f") (V8DF "avx512dq") (V8DI "avx512dq")])
7022
7023(define_mode_attr extract_suf
7024 [(V16SF "32x4") (V16SI "32x4") (V8DF "64x2") (V8DI "64x2")])
7025
7026(define_mode_iterator AVX512_VEC
7027 [(V8DF "TARGET_AVX512DQ") (V8DI "TARGET_AVX512DQ") V16SF V16SI])
7028
7029(define_expand "<extract_type>_vextract<shuffletype><extract_suf>_mask"
5220cab6 7030 [(match_operand:<ssequartermode> 0 "nonimmediate_operand")
fd1fee28 7031 (match_operand:AVX512_VEC 1 "register_operand")
5220cab6 7032 (match_operand:SI 2 "const_0_to_3_operand")
7033 (match_operand:<ssequartermode> 3 "nonimmediate_operand")
7034 (match_operand:QI 4 "register_operand")]
7035 "TARGET_AVX512F"
7036{
fd1fee28 7037 int mask;
7038 mask = INTVAL (operands[2]);
7039
5220cab6 7040 if (MEM_P (operands[0]) && GET_CODE (operands[3]) == CONST_VECTOR)
7041 operands[0] = force_reg (<ssequartermode>mode, operands[0]);
fd1fee28 7042
7043 if (<MODE>mode == V16SImode || <MODE>mode == V16SFmode)
7044 emit_insn (gen_avx512f_vextract<shuffletype>32x4_1_mask (operands[0],
7045 operands[1], GEN_INT (mask * 4), GEN_INT (mask * 4 + 1),
7046 GEN_INT (mask * 4 + 2), GEN_INT (mask * 4 + 3), operands[3],
7047 operands[4]));
7048 else
7049 emit_insn (gen_avx512dq_vextract<shuffletype>64x2_1_mask (operands[0],
7050 operands[1], GEN_INT (mask * 2), GEN_INT (mask * 2 + 1), operands[3],
7051 operands[4]));
5220cab6 7052 DONE;
7053})
7054
fd1fee28 7055(define_insn "avx512dq_vextract<shuffletype>64x2_1_maskm"
7056 [(set (match_operand:<ssequartermode> 0 "memory_operand" "=m")
7057 (vec_merge:<ssequartermode>
7058 (vec_select:<ssequartermode>
7059 (match_operand:V8FI 1 "register_operand" "v")
7060 (parallel [(match_operand 2 "const_0_to_7_operand")
7061 (match_operand 3 "const_0_to_7_operand")]))
7062 (match_operand:<ssequartermode> 4 "memory_operand" "0")
7063 (match_operand:QI 5 "register_operand" "k")))]
7064 "TARGET_AVX512DQ
7065 && (INTVAL (operands[2]) % 2 == 0)
648b0c25 7066 && (INTVAL (operands[2]) == INTVAL (operands[3]) - 1)
7067 && rtx_equal_p (operands[4], operands[0])"
fd1fee28 7068{
7069 operands[2] = GEN_INT ((INTVAL (operands[2])) >> 1);
7070 return "vextract<shuffletype>64x2\t{%2, %1, %0%{%5%}|%0%{%5%}, %1, %2}";
7071}
7072 [(set_attr "type" "sselog")
7073 (set_attr "prefix_extra" "1")
7074 (set_attr "length_immediate" "1")
7075 (set_attr "memory" "store")
7076 (set_attr "prefix" "evex")
7077 (set_attr "mode" "<sseinsnmode>")])
7078
5220cab6 7079(define_insn "avx512f_vextract<shuffletype>32x4_1_maskm"
7080 [(set (match_operand:<ssequartermode> 0 "memory_operand" "=m")
7081 (vec_merge:<ssequartermode>
7082 (vec_select:<ssequartermode>
7083 (match_operand:V16FI 1 "register_operand" "v")
7084 (parallel [(match_operand 2 "const_0_to_15_operand")
7085 (match_operand 3 "const_0_to_15_operand")
7086 (match_operand 4 "const_0_to_15_operand")
7087 (match_operand 5 "const_0_to_15_operand")]))
7088 (match_operand:<ssequartermode> 6 "memory_operand" "0")
a31e7f46 7089 (match_operand:QI 7 "register_operand" "Yk")))]
d948b265 7090 "TARGET_AVX512F
fd1fee28 7091 && ((INTVAL (operands[2]) % 4 == 0)
7092 && INTVAL (operands[2]) == (INTVAL (operands[3]) - 1)
d948b265 7093 && INTVAL (operands[3]) == (INTVAL (operands[4]) - 1)
648b0c25 7094 && INTVAL (operands[4]) == (INTVAL (operands[5]) - 1))
7095 && rtx_equal_p (operands[6], operands[0])"
5220cab6 7096{
7097 operands[2] = GEN_INT ((INTVAL (operands[2])) >> 2);
7098 return "vextract<shuffletype>32x4\t{%2, %1, %0%{%7%}|%0%{%7%}, %1, %2}";
7099}
7100 [(set_attr "type" "sselog")
7101 (set_attr "prefix_extra" "1")
7102 (set_attr "length_immediate" "1")
7103 (set_attr "memory" "store")
7104 (set_attr "prefix" "evex")
7105 (set_attr "mode" "<sseinsnmode>")])
7106
fd1fee28 7107(define_insn "<mask_codefor>avx512dq_vextract<shuffletype>64x2_1<mask_name>"
7108 [(set (match_operand:<ssequartermode> 0 "<store_mask_predicate>" "=<store_mask_constraint>")
7109 (vec_select:<ssequartermode>
7110 (match_operand:V8FI 1 "register_operand" "v")
7111 (parallel [(match_operand 2 "const_0_to_7_operand")
7112 (match_operand 3 "const_0_to_7_operand")])))]
7113 "TARGET_AVX512DQ && (INTVAL (operands[2]) == INTVAL (operands[3]) - 1)"
7114{
7115 operands[2] = GEN_INT ((INTVAL (operands[2])) >> 1);
7116 return "vextract<shuffletype>64x2\t{%2, %1, %0<mask_operand4>|%0<mask_operand4>, %1, %2}";
7117}
7118 [(set_attr "type" "sselog1")
7119 (set_attr "prefix_extra" "1")
7120 (set_attr "length_immediate" "1")
7121 (set_attr "prefix" "evex")
7122 (set_attr "mode" "<sseinsnmode>")])
7123
5220cab6 7124(define_insn "<mask_codefor>avx512f_vextract<shuffletype>32x4_1<mask_name>"
7125 [(set (match_operand:<ssequartermode> 0 "<store_mask_predicate>" "=<store_mask_constraint>")
8e9989b0 7126 (vec_select:<ssequartermode>
7127 (match_operand:V16FI 1 "register_operand" "v")
7128 (parallel [(match_operand 2 "const_0_to_15_operand")
7129 (match_operand 3 "const_0_to_15_operand")
7130 (match_operand 4 "const_0_to_15_operand")
7131 (match_operand 5 "const_0_to_15_operand")])))]
d948b265 7132 "TARGET_AVX512F
7133 && (INTVAL (operands[2]) == (INTVAL (operands[3]) - 1)
7134 && INTVAL (operands[3]) == (INTVAL (operands[4]) - 1)
7135 && INTVAL (operands[4]) == (INTVAL (operands[5]) - 1))"
8e9989b0 7136{
7137 operands[2] = GEN_INT ((INTVAL (operands[2])) >> 2);
5220cab6 7138 return "vextract<shuffletype>32x4\t{%2, %1, %0<mask_operand6>|%0<mask_operand6>, %1, %2}";
8e9989b0 7139}
fd1fee28 7140 [(set_attr "type" "sselog1")
8e9989b0 7141 (set_attr "prefix_extra" "1")
7142 (set_attr "length_immediate" "1")
8e9989b0 7143 (set_attr "prefix" "evex")
7144 (set_attr "mode" "<sseinsnmode>")])
7145
fd1fee28 7146(define_mode_attr extract_type_2
7147 [(V16SF "avx512dq") (V16SI "avx512dq") (V8DF "avx512f") (V8DI "avx512f")])
7148
7149(define_mode_attr extract_suf_2
7150 [(V16SF "32x8") (V16SI "32x8") (V8DF "64x4") (V8DI "64x4")])
7151
7152(define_mode_iterator AVX512_VEC_2
7153 [(V16SF "TARGET_AVX512DQ") (V16SI "TARGET_AVX512DQ") V8DF V8DI])
7154
7155(define_expand "<extract_type_2>_vextract<shuffletype><extract_suf_2>_mask"
5220cab6 7156 [(match_operand:<ssehalfvecmode> 0 "nonimmediate_operand")
fd1fee28 7157 (match_operand:AVX512_VEC_2 1 "register_operand")
5220cab6 7158 (match_operand:SI 2 "const_0_to_1_operand")
7159 (match_operand:<ssehalfvecmode> 3 "nonimmediate_operand")
7160 (match_operand:QI 4 "register_operand")]
7161 "TARGET_AVX512F"
7162{
7163 rtx (*insn)(rtx, rtx, rtx, rtx);
7164
7165 if (MEM_P (operands[0]) && GET_CODE (operands[3]) == CONST_VECTOR)
7166 operands[0] = force_reg (<ssequartermode>mode, operands[0]);
7167
7168 switch (INTVAL (operands[2]))
7169 {
7170 case 0:
7171 insn = gen_vec_extract_lo_<mode>_mask;
7172 break;
7173 case 1:
7174 insn = gen_vec_extract_hi_<mode>_mask;
7175 break;
7176 default:
7177 gcc_unreachable ();
7178 }
7179
7180 emit_insn (insn (operands[0], operands[1], operands[3], operands[4]));
7181 DONE;
7182})
7183
8e9989b0 7184(define_split
7185 [(set (match_operand:<ssehalfvecmode> 0 "nonimmediate_operand")
7186 (vec_select:<ssehalfvecmode>
7187 (match_operand:V8FI 1 "nonimmediate_operand")
7188 (parallel [(const_int 0) (const_int 1)
7189 (const_int 2) (const_int 3)])))]
7190 "TARGET_AVX512F && !(MEM_P (operands[0]) && MEM_P (operands[1]))
e6007a65 7191 && reload_completed
7192 && (TARGET_AVX512VL || (REG_P (operands[0]) && !EXT_REX_SSE_REG_P (operands[1])))"
8e9989b0 7193 [(const_int 0)]
7194{
7195 rtx op1 = operands[1];
7196 if (REG_P (op1))
7197 op1 = gen_rtx_REG (<ssehalfvecmode>mode, REGNO (op1));
7198 else
7199 op1 = gen_lowpart (<ssehalfvecmode>mode, op1);
7200 emit_move_insn (operands[0], op1);
7201 DONE;
7202})
7203
5220cab6 7204(define_insn "vec_extract_lo_<mode>_maskm"
7205 [(set (match_operand:<ssehalfvecmode> 0 "memory_operand" "=m")
7206 (vec_merge:<ssehalfvecmode>
7207 (vec_select:<ssehalfvecmode>
7208 (match_operand:V8FI 1 "register_operand" "v")
7209 (parallel [(const_int 0) (const_int 1)
7210 (const_int 2) (const_int 3)]))
7211 (match_operand:<ssehalfvecmode> 2 "memory_operand" "0")
a31e7f46 7212 (match_operand:QI 3 "register_operand" "Yk")))]
648b0c25 7213 "TARGET_AVX512F
7214 && rtx_equal_p (operands[2], operands[0])"
fd1fee28 7215 "vextract<shuffletype>64x4\t{$0x0, %1, %0%{%3%}|%0%{%3%}, %1, 0x0}"
7216 [(set_attr "type" "sselog1")
5220cab6 7217 (set_attr "prefix_extra" "1")
7218 (set_attr "length_immediate" "1")
7219 (set_attr "prefix" "evex")
7220 (set_attr "mode" "<sseinsnmode>")])
7221
7222(define_insn "vec_extract_lo_<mode><mask_name>"
d442e138 7223 [(set (match_operand:<ssehalfvecmode> 0 "<store_mask_predicate>" "=<store_mask_constraint>,v")
8e9989b0 7224 (vec_select:<ssehalfvecmode>
d442e138 7225 (match_operand:V8FI 1 "nonimmediate_operand" "v,m")
8e9989b0 7226 (parallel [(const_int 0) (const_int 1)
7227 (const_int 2) (const_int 3)])))]
7228 "TARGET_AVX512F && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
5220cab6 7229{
e6007a65 7230 if (<mask_applied> || !TARGET_AVX512VL)
5220cab6 7231 return "vextract<shuffletype>64x4\t{$0x0, %1, %0<mask_operand2>|%0<mask_operand2>, %1, 0x0}";
7232 else
7233 return "#";
7234}
fd1fee28 7235 [(set_attr "type" "sselog1")
8e9989b0 7236 (set_attr "prefix_extra" "1")
7237 (set_attr "length_immediate" "1")
8e9989b0 7238 (set_attr "prefix" "evex")
7239 (set_attr "mode" "<sseinsnmode>")])
7240
5220cab6 7241(define_insn "vec_extract_hi_<mode>_maskm"
7242 [(set (match_operand:<ssehalfvecmode> 0 "memory_operand" "=m")
7243 (vec_merge:<ssehalfvecmode>
7244 (vec_select:<ssehalfvecmode>
7245 (match_operand:V8FI 1 "register_operand" "v")
7246 (parallel [(const_int 4) (const_int 5)
7247 (const_int 6) (const_int 7)]))
7248 (match_operand:<ssehalfvecmode> 2 "memory_operand" "0")
a31e7f46 7249 (match_operand:QI 3 "register_operand" "Yk")))]
648b0c25 7250 "TARGET_AVX512F
7251 && rtx_equal_p (operands[2], operands[0])"
5220cab6 7252 "vextract<shuffletype>64x4\t{$0x1, %1, %0%{%3%}|%0%{%3%}, %1, 0x1}"
7253 [(set_attr "type" "sselog")
7254 (set_attr "prefix_extra" "1")
7255 (set_attr "length_immediate" "1")
7256 (set_attr "memory" "store")
7257 (set_attr "prefix" "evex")
7258 (set_attr "mode" "<sseinsnmode>")])
7259
7260(define_insn "vec_extract_hi_<mode><mask_name>"
7261 [(set (match_operand:<ssehalfvecmode> 0 "<store_mask_predicate>" "=<store_mask_constraint>")
8e9989b0 7262 (vec_select:<ssehalfvecmode>
7263 (match_operand:V8FI 1 "register_operand" "v")
7264 (parallel [(const_int 4) (const_int 5)
7265 (const_int 6) (const_int 7)])))]
7266 "TARGET_AVX512F"
5220cab6 7267 "vextract<shuffletype>64x4\t{$0x1, %1, %0<mask_operand2>|%0<mask_operand2>, %1, 0x1}"
fd1fee28 7268 [(set_attr "type" "sselog1")
7269 (set_attr "prefix_extra" "1")
7270 (set_attr "length_immediate" "1")
7271 (set_attr "prefix" "evex")
7272 (set_attr "mode" "<sseinsnmode>")])
7273
7274(define_insn "vec_extract_hi_<mode>_maskm"
7275 [(set (match_operand:<ssehalfvecmode> 0 "memory_operand" "=m")
7276 (vec_merge:<ssehalfvecmode>
7277 (vec_select:<ssehalfvecmode>
7278 (match_operand:V16FI 1 "register_operand" "v")
7279 (parallel [(const_int 8) (const_int 9)
7280 (const_int 10) (const_int 11)
7281 (const_int 12) (const_int 13)
7282 (const_int 14) (const_int 15)]))
7283 (match_operand:<ssehalfvecmode> 2 "memory_operand" "0")
7284 (match_operand:QI 3 "register_operand" "k")))]
648b0c25 7285 "TARGET_AVX512DQ
7286 && rtx_equal_p (operands[2], operands[0])"
fd1fee28 7287 "vextract<shuffletype>32x8\t{$0x1, %1, %0%{%3%}|%0%{%3%}, %1, 0x1}"
7288 [(set_attr "type" "sselog1")
8e9989b0 7289 (set_attr "prefix_extra" "1")
7290 (set_attr "length_immediate" "1")
8e9989b0 7291 (set_attr "prefix" "evex")
7292 (set_attr "mode" "<sseinsnmode>")])
7293
30874fa3 7294(define_insn "vec_extract_hi_<mode><mask_name>"
7295 [(set (match_operand:<ssehalfvecmode> 0 "<store_mask_predicate>" "=<store_mask_constraint>,vm")
7296 (vec_select:<ssehalfvecmode>
7297 (match_operand:V16FI 1 "register_operand" "v,v")
7298 (parallel [(const_int 8) (const_int 9)
7299 (const_int 10) (const_int 11)
7300 (const_int 12) (const_int 13)
7301 (const_int 14) (const_int 15)])))]
fd1fee28 7302 "TARGET_AVX512F && <mask_avx512dq_condition>"
30874fa3 7303 "@
7304 vextract<shuffletype>32x8\t{$0x1, %1, %0<mask_operand2>|%0<mask_operand2>, %1, 0x1}
7305 vextracti64x4\t{$0x1, %1, %0|%0, %1, 0x1}"
7306 [(set_attr "type" "sselog1")
7307 (set_attr "prefix_extra" "1")
7308 (set_attr "isa" "avx512dq,noavx512dq")
7309 (set_attr "length_immediate" "1")
7310 (set_attr "prefix" "evex")
7311 (set_attr "mode" "<sseinsnmode>")])
7312
fd1fee28 7313(define_expand "avx512vl_vextractf128<mode>"
7314 [(match_operand:<ssehalfvecmode> 0 "nonimmediate_operand")
7315 (match_operand:VI48F_256 1 "register_operand")
7316 (match_operand:SI 2 "const_0_to_1_operand")
7317 (match_operand:<ssehalfvecmode> 3 "vector_move_operand")
7318 (match_operand:QI 4 "register_operand")]
7319 "TARGET_AVX512DQ && TARGET_AVX512VL"
7320{
7321 rtx (*insn)(rtx, rtx, rtx, rtx);
7322
7323 if (MEM_P (operands[0]) && GET_CODE (operands[3]) == CONST_VECTOR)
7324 operands[0] = force_reg (<ssehalfvecmode>mode, operands[0]);
7325
7326 switch (INTVAL (operands[2]))
7327 {
7328 case 0:
7329 insn = gen_vec_extract_lo_<mode>_mask;
7330 break;
7331 case 1:
7332 insn = gen_vec_extract_hi_<mode>_mask;
7333 break;
7334 default:
7335 gcc_unreachable ();
7336 }
7337
7338 emit_insn (insn (operands[0], operands[1], operands[3], operands[4]));
7339 DONE;
7340})
7341
ed30e0a6 7342(define_expand "avx_vextractf128<mode>"
abd4f58b 7343 [(match_operand:<ssehalfvecmode> 0 "nonimmediate_operand")
7344 (match_operand:V_256 1 "register_operand")
7345 (match_operand:SI 2 "const_0_to_1_operand")]
ed30e0a6 7346 "TARGET_AVX"
7347{
c3fa352f 7348 rtx (*insn)(rtx, rtx);
7349
ed30e0a6 7350 switch (INTVAL (operands[2]))
7351 {
7352 case 0:
c3fa352f 7353 insn = gen_vec_extract_lo_<mode>;
ed30e0a6 7354 break;
7355 case 1:
c3fa352f 7356 insn = gen_vec_extract_hi_<mode>;
ed30e0a6 7357 break;
7358 default:
7359 gcc_unreachable ();
7360 }
c3fa352f 7361
7362 emit_insn (insn (operands[0], operands[1]));
ed30e0a6 7363 DONE;
7364})
7365
fd1fee28 7366(define_insn "vec_extract_lo_<mode><mask_name>"
8e9989b0 7367 [(set (match_operand:<ssehalfvecmode> 0 "nonimmediate_operand" "=v,m")
7368 (vec_select:<ssehalfvecmode>
7369 (match_operand:V16FI 1 "nonimmediate_operand" "vm,v")
7370 (parallel [(const_int 0) (const_int 1)
7371 (const_int 2) (const_int 3)
7372 (const_int 4) (const_int 5)
7373 (const_int 6) (const_int 7)])))]
fd1fee28 7374 "TARGET_AVX512F
7375 && <mask_mode512bit_condition>
7376 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
8e9989b0 7377{
fd1fee28 7378 if (<mask_applied>)
7379 return "vextract<shuffletype>32x8\t{$0x0, %1, %0<mask_operand2>|%0<mask_operand2>, %1, 0x0}";
7380 else
7381 return "#";
7382})
7383
7384(define_split
7385 [(set (match_operand:<ssehalfvecmode> 0 "nonimmediate_operand")
7386 (vec_select:<ssehalfvecmode>
7387 (match_operand:V16FI 1 "nonimmediate_operand")
7388 (parallel [(const_int 0) (const_int 1)
7389 (const_int 2) (const_int 3)
7390 (const_int 4) (const_int 5)
7391 (const_int 6) (const_int 7)])))]
7392 "TARGET_AVX512F && !(MEM_P (operands[0]) && MEM_P (operands[1]))
7393 && reload_completed"
7394 [(const_int 0)]
7395 {
8e9989b0 7396 rtx op1 = operands[1];
7397 if (REG_P (op1))
7398 op1 = gen_rtx_REG (<ssehalfvecmode>mode, REGNO (op1));
7399 else
7400 op1 = gen_lowpart (<ssehalfvecmode>mode, op1);
7401 emit_move_insn (operands[0], op1);
7402 DONE;
7403})
7404
fd1fee28 7405(define_insn "vec_extract_lo_<mode><mask_name>"
7406 [(set (match_operand:<ssehalfvecmode> 0 "<store_mask_predicate>" "=v,m")
63d5e521 7407 (vec_select:<ssehalfvecmode>
fd1fee28 7408 (match_operand:VI8F_256 1 "nonimmediate_operand" "vm,v")
ed30e0a6 7409 (parallel [(const_int 0) (const_int 1)])))]
fd1fee28 7410 "TARGET_AVX
7411 && <mask_avx512vl_condition> && <mask_avx512dq_condition>
7412 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
3d1a0207 7413{
fd1fee28 7414 if (<mask_applied>)
7415 return "vextract<shuffletype>64x2\t{$0x0, %1, %0%{%3%}|%0%{%3%}, %1, 0x0}";
3d1a0207 7416 else
fd1fee28 7417 return "#";
7418}
7419 [(set_attr "type" "sselog")
7420 (set_attr "prefix_extra" "1")
7421 (set_attr "length_immediate" "1")
7422 (set_attr "memory" "none,store")
7423 (set_attr "prefix" "evex")
7424 (set_attr "mode" "XI")])
7425
7426(define_split
7427 [(set (match_operand:<ssehalfvecmode> 0 "nonimmediate_operand")
7428 (vec_select:<ssehalfvecmode>
7429 (match_operand:VI8F_256 1 "nonimmediate_operand")
7430 (parallel [(const_int 0) (const_int 1)])))]
7431 "TARGET_AVX && !(MEM_P (operands[0]) && MEM_P (operands[1]))
7432 && reload_completed"
7433 [(const_int 0)]
7434{
7435 rtx op1 = operands[1];
7436 if (REG_P (op1))
7437 op1 = gen_rtx_REG (<ssehalfvecmode>mode, REGNO (op1));
7438 else
7439 op1 = gen_lowpart (<ssehalfvecmode>mode, op1);
7440 emit_move_insn (operands[0], op1);
7441 DONE;
3d1a0207 7442})
ed30e0a6 7443
fd1fee28 7444(define_insn "vec_extract_hi_<mode><mask_name>"
7445 [(set (match_operand:<ssehalfvecmode> 0 "<store_mask_predicate>" "=v,<store_mask_constraint>")
63d5e521 7446 (vec_select:<ssehalfvecmode>
fd1fee28 7447 (match_operand:VI8F_256 1 "register_operand" "v,v")
ed30e0a6 7448 (parallel [(const_int 2) (const_int 3)])))]
9fbb8dbd 7449 "TARGET_AVX && <mask_avx512vl_condition> && <mask_avx512dq_condition>"
fd1fee28 7450{
9fbb8dbd 7451 if (TARGET_AVX512VL)
7452 {
7453 if (TARGET_AVX512DQ)
7454 return "vextract<shuffletype>64x2\t{$0x1, %1, %0<mask_operand2>|%0<mask_operand2>, %1, 0x1}";
7455 else
7456 return "vextract<shuffletype>32x4\t{$0x1, %1, %0|%0, %1, 0x1}";
7457 }
fd1fee28 7458 else
7459 return "vextract<i128>\t{$0x1, %1, %0|%0, %1, 0x1}";
7460}
ed30e0a6 7461 [(set_attr "type" "sselog")
00a0e418 7462 (set_attr "prefix_extra" "1")
7463 (set_attr "length_immediate" "1")
ed30e0a6 7464 (set_attr "memory" "none,store")
7465 (set_attr "prefix" "vex")
154d1782 7466 (set_attr "mode" "<sseinsnmode>")])
ed30e0a6 7467
fd1fee28 7468(define_split
7469 [(set (match_operand:<ssehalfvecmode> 0 "nonimmediate_operand")
63d5e521 7470 (vec_select:<ssehalfvecmode>
fd1fee28 7471 (match_operand:VI4F_256 1 "nonimmediate_operand")
ed30e0a6 7472 (parallel [(const_int 0) (const_int 1)
7473 (const_int 2) (const_int 3)])))]
fd1fee28 7474 "TARGET_AVX && !(MEM_P (operands[0]) && MEM_P (operands[1])) && reload_completed"
7475 [(const_int 0)]
3d1a0207 7476{
fd1fee28 7477 rtx op1 = operands[1];
7478 if (REG_P (op1))
7479 op1 = gen_rtx_REG (<ssehalfvecmode>mode, REGNO (op1));
3d1a0207 7480 else
fd1fee28 7481 op1 = gen_lowpart (<ssehalfvecmode>mode, op1);
7482 emit_move_insn (operands[0], op1);
7483 DONE;
3d1a0207 7484})
ed30e0a6 7485
fd1fee28 7486
7487(define_insn "vec_extract_lo_<mode><mask_name>"
7488 [(set (match_operand:<ssehalfvecmode> 0 "<store_mask_predicate>" "=<store_mask_constraint>")
7489 (vec_select:<ssehalfvecmode>
0a281fd0 7490 (match_operand:VI4F_256 1 "register_operand" "v")
fd1fee28 7491 (parallel [(const_int 0) (const_int 1)
7492 (const_int 2) (const_int 3)])))]
7493 "TARGET_AVX && <mask_avx512vl_condition> && <mask_avx512dq_condition>"
7494{
7495 if (<mask_applied>)
7496 return "vextract<shuffletype>32x4\t{$0x0, %1, %0<mask_operand2>|%0<mask_operand2>, %1, 0x0}";
7497 else
7498 return "#";
7499}
7500 [(set_attr "type" "sselog1")
7501 (set_attr "prefix_extra" "1")
7502 (set_attr "length_immediate" "1")
7503 (set_attr "prefix" "evex")
7504 (set_attr "mode" "<sseinsnmode>")])
7505
7506(define_insn "vec_extract_lo_<mode>_maskm"
7507 [(set (match_operand:<ssehalfvecmode> 0 "memory_operand" "=m")
7508 (vec_merge:<ssehalfvecmode>
7509 (vec_select:<ssehalfvecmode>
7510 (match_operand:VI4F_256 1 "register_operand" "v")
7511 (parallel [(const_int 0) (const_int 1)
7512 (const_int 2) (const_int 3)]))
7513 (match_operand:<ssehalfvecmode> 2 "memory_operand" "0")
7514 (match_operand:QI 3 "register_operand" "k")))]
648b0c25 7515 "TARGET_AVX512VL && TARGET_AVX512F
7516 && rtx_equal_p (operands[2], operands[0])"
7517 "vextract<shuffletype>32x4\t{$0x0, %1, %0%{%3%}|%0%{%3%}, %1, 0x0}"
fd1fee28 7518 [(set_attr "type" "sselog1")
7519 (set_attr "prefix_extra" "1")
7520 (set_attr "length_immediate" "1")
7521 (set_attr "prefix" "evex")
7522 (set_attr "mode" "<sseinsnmode>")])
7523
7524(define_insn "vec_extract_hi_<mode>_maskm"
7525 [(set (match_operand:<ssehalfvecmode> 0 "memory_operand" "=m")
7526 (vec_merge:<ssehalfvecmode>
7527 (vec_select:<ssehalfvecmode>
7528 (match_operand:VI4F_256 1 "register_operand" "v")
7529 (parallel [(const_int 4) (const_int 5)
7530 (const_int 6) (const_int 7)]))
7531 (match_operand:<ssehalfvecmode> 2 "memory_operand" "0")
7532 (match_operand:<ssehalfvecmode> 3 "register_operand" "k")))]
648b0c25 7533 "TARGET_AVX512F && TARGET_AVX512VL
7534 && rtx_equal_p (operands[2], operands[0])"
7535 "vextract<shuffletype>32x4\t{$0x1, %1, %0%{%3%}|%0%{%3%}, %1, 0x1}"
fd1fee28 7536 [(set_attr "type" "sselog1")
fd1fee28 7537 (set_attr "length_immediate" "1")
7538 (set_attr "prefix" "evex")
7539 (set_attr "mode" "<sseinsnmode>")])
7540
4d1088aa 7541(define_insn "vec_extract_hi_<mode>_mask"
7542 [(set (match_operand:<ssehalfvecmode> 0 "register_operand" "=v")
7543 (vec_merge:<ssehalfvecmode>
7544 (vec_select:<ssehalfvecmode>
7545 (match_operand:VI4F_256 1 "register_operand" "v")
7546 (parallel [(const_int 4) (const_int 5)
7547 (const_int 6) (const_int 7)]))
7548 (match_operand:<ssehalfvecmode> 2 "vector_move_operand" "0C")
7549 (match_operand:<avx512fmaskmode> 3 "register_operand" "Yk")))]
7550 "TARGET_AVX512VL"
7551 "vextract<shuffletype>32x4\t{$0x1, %1, %0%{%3%}%N2|%0%{%3%}%N2, %1, 0x1}"
7552 [(set_attr "type" "sselog1")
7553 (set_attr "length_immediate" "1")
7554 (set_attr "prefix" "evex")
7555 (set_attr "mode" "<sseinsnmode>")])
7556
7557(define_insn "vec_extract_hi_<mode>"
7558 [(set (match_operand:<ssehalfvecmode> 0 "nonimmediate_operand" "=xm, vm")
63d5e521 7559 (vec_select:<ssehalfvecmode>
4d1088aa 7560 (match_operand:VI4F_256 1 "register_operand" "x, v")
ed30e0a6 7561 (parallel [(const_int 4) (const_int 5)
7562 (const_int 6) (const_int 7)])))]
4d1088aa 7563 "TARGET_AVX"
7564 "@
7565 vextract<i128>\t{$0x1, %1, %0|%0, %1, 0x1}
7566 vextract<shuffletype>32x4\t{$0x1, %1, %0|%0, %1, 0x1}"
7567 [(set_attr "isa" "*, avx512vl")
7568 (set_attr "prefix" "vex, evex")
7569 (set_attr "type" "sselog1")
00a0e418 7570 (set_attr "length_immediate" "1")
154d1782 7571 (set_attr "mode" "<sseinsnmode>")])
ed30e0a6 7572
697a43f8 7573(define_insn_and_split "vec_extract_lo_v32hi"
7574 [(set (match_operand:V16HI 0 "nonimmediate_operand" "=v,m")
7575 (vec_select:V16HI
7576 (match_operand:V32HI 1 "nonimmediate_operand" "vm,v")
7577 (parallel [(const_int 0) (const_int 1)
7578 (const_int 2) (const_int 3)
7579 (const_int 4) (const_int 5)
7580 (const_int 6) (const_int 7)
7581 (const_int 8) (const_int 9)
7582 (const_int 10) (const_int 11)
7583 (const_int 12) (const_int 13)
7584 (const_int 14) (const_int 15)])))]
7585 "TARGET_AVX512F && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
7586 "#"
7587 "&& reload_completed"
7588 [(set (match_dup 0) (match_dup 1))]
7589{
7590 if (REG_P (operands[1]))
7591 operands[1] = gen_rtx_REG (V16HImode, REGNO (operands[1]));
7592 else
7593 operands[1] = adjust_address (operands[1], V16HImode, 0);
7594})
7595
7596(define_insn "vec_extract_hi_v32hi"
7597 [(set (match_operand:V16HI 0 "nonimmediate_operand" "=v,m")
7598 (vec_select:V16HI
0a281fd0 7599 (match_operand:V32HI 1 "register_operand" "v,v")
697a43f8 7600 (parallel [(const_int 16) (const_int 17)
7601 (const_int 18) (const_int 19)
7602 (const_int 20) (const_int 21)
7603 (const_int 22) (const_int 23)
7604 (const_int 24) (const_int 25)
7605 (const_int 26) (const_int 27)
7606 (const_int 28) (const_int 29)
7607 (const_int 30) (const_int 31)])))]
7608 "TARGET_AVX512F"
7609 "vextracti64x4\t{$0x1, %1, %0|%0, %1, 0x1}"
7610 [(set_attr "type" "sselog")
7611 (set_attr "prefix_extra" "1")
7612 (set_attr "length_immediate" "1")
7613 (set_attr "memory" "none,store")
7614 (set_attr "prefix" "evex")
7615 (set_attr "mode" "XI")])
7616
3d1a0207 7617(define_insn_and_split "vec_extract_lo_v16hi"
ed30e0a6 7618 [(set (match_operand:V8HI 0 "nonimmediate_operand" "=x,m")
7619 (vec_select:V8HI
3d1a0207 7620 (match_operand:V16HI 1 "nonimmediate_operand" "xm,x")
ed30e0a6 7621 (parallel [(const_int 0) (const_int 1)
7622 (const_int 2) (const_int 3)
7623 (const_int 4) (const_int 5)
7624 (const_int 6) (const_int 7)])))]
a3d5479a 7625 "TARGET_AVX && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
3d1a0207 7626 "#"
7627 "&& reload_completed"
573c5512 7628 [(set (match_dup 0) (match_dup 1))]
3d1a0207 7629{
573c5512 7630 if (REG_P (operands[1]))
7631 operands[1] = gen_rtx_REG (V8HImode, REGNO (operands[1]));
3d1a0207 7632 else
573c5512 7633 operands[1] = adjust_address (operands[1], V8HImode, 0);
3d1a0207 7634})
ed30e0a6 7635
7636(define_insn "vec_extract_hi_v16hi"
7637 [(set (match_operand:V8HI 0 "nonimmediate_operand" "=x,m")
7638 (vec_select:V8HI
7639 (match_operand:V16HI 1 "register_operand" "x,x")
7640 (parallel [(const_int 8) (const_int 9)
7641 (const_int 10) (const_int 11)
7642 (const_int 12) (const_int 13)
7643 (const_int 14) (const_int 15)])))]
7644 "TARGET_AVX"
154d1782 7645 "vextract%~128\t{$0x1, %1, %0|%0, %1, 0x1}"
ed30e0a6 7646 [(set_attr "type" "sselog")
00a0e418 7647 (set_attr "prefix_extra" "1")
7648 (set_attr "length_immediate" "1")
ed30e0a6 7649 (set_attr "memory" "none,store")
7650 (set_attr "prefix" "vex")
154d1782 7651 (set_attr "mode" "OI")])
ed30e0a6 7652
697a43f8 7653(define_insn_and_split "vec_extract_lo_v64qi"
7654 [(set (match_operand:V32QI 0 "nonimmediate_operand" "=v,m")
7655 (vec_select:V32QI
7656 (match_operand:V64QI 1 "nonimmediate_operand" "vm,v")
7657 (parallel [(const_int 0) (const_int 1)
7658 (const_int 2) (const_int 3)
7659 (const_int 4) (const_int 5)
7660 (const_int 6) (const_int 7)
7661 (const_int 8) (const_int 9)
7662 (const_int 10) (const_int 11)
7663 (const_int 12) (const_int 13)
7664 (const_int 14) (const_int 15)
7665 (const_int 16) (const_int 17)
7666 (const_int 18) (const_int 19)
7667 (const_int 20) (const_int 21)
7668 (const_int 22) (const_int 23)
7669 (const_int 24) (const_int 25)
7670 (const_int 26) (const_int 27)
7671 (const_int 28) (const_int 29)
7672 (const_int 30) (const_int 31)])))]
7673 "TARGET_AVX512F && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
7674 "#"
7675 "&& reload_completed"
7676 [(set (match_dup 0) (match_dup 1))]
7677{
7678 if (REG_P (operands[1]))
7679 operands[1] = gen_rtx_REG (V32QImode, REGNO (operands[1]));
7680 else
7681 operands[1] = adjust_address (operands[1], V32QImode, 0);
7682})
7683
7684(define_insn "vec_extract_hi_v64qi"
7685 [(set (match_operand:V32QI 0 "nonimmediate_operand" "=v,m")
7686 (vec_select:V32QI
0a281fd0 7687 (match_operand:V64QI 1 "register_operand" "v,v")
697a43f8 7688 (parallel [(const_int 32) (const_int 33)
7689 (const_int 34) (const_int 35)
7690 (const_int 36) (const_int 37)
7691 (const_int 38) (const_int 39)
7692 (const_int 40) (const_int 41)
7693 (const_int 42) (const_int 43)
7694 (const_int 44) (const_int 45)
7695 (const_int 46) (const_int 47)
7696 (const_int 48) (const_int 49)
7697 (const_int 50) (const_int 51)
7698 (const_int 52) (const_int 53)
7699 (const_int 54) (const_int 55)
7700 (const_int 56) (const_int 57)
7701 (const_int 58) (const_int 59)
7702 (const_int 60) (const_int 61)
7703 (const_int 62) (const_int 63)])))]
7704 "TARGET_AVX512F"
7705 "vextracti64x4\t{$0x1, %1, %0|%0, %1, 0x1}"
7706 [(set_attr "type" "sselog")
7707 (set_attr "prefix_extra" "1")
7708 (set_attr "length_immediate" "1")
7709 (set_attr "memory" "none,store")
7710 (set_attr "prefix" "evex")
7711 (set_attr "mode" "XI")])
7712
3d1a0207 7713(define_insn_and_split "vec_extract_lo_v32qi"
ed30e0a6 7714 [(set (match_operand:V16QI 0 "nonimmediate_operand" "=x,m")
7715 (vec_select:V16QI
3d1a0207 7716 (match_operand:V32QI 1 "nonimmediate_operand" "xm,x")
ed30e0a6 7717 (parallel [(const_int 0) (const_int 1)
7718 (const_int 2) (const_int 3)
7719 (const_int 4) (const_int 5)
7720 (const_int 6) (const_int 7)
7721 (const_int 8) (const_int 9)
7722 (const_int 10) (const_int 11)
7723 (const_int 12) (const_int 13)
7724 (const_int 14) (const_int 15)])))]
a3d5479a 7725 "TARGET_AVX && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
3d1a0207 7726 "#"
7727 "&& reload_completed"
573c5512 7728 [(set (match_dup 0) (match_dup 1))]
3d1a0207 7729{
573c5512 7730 if (REG_P (operands[1]))
7731 operands[1] = gen_rtx_REG (V16QImode, REGNO (operands[1]));
3d1a0207 7732 else
573c5512 7733 operands[1] = adjust_address (operands[1], V16QImode, 0);
3d1a0207 7734})
ed30e0a6 7735
7736(define_insn "vec_extract_hi_v32qi"
7737 [(set (match_operand:V16QI 0 "nonimmediate_operand" "=x,m")
7738 (vec_select:V16QI
7739 (match_operand:V32QI 1 "register_operand" "x,x")
7740 (parallel [(const_int 16) (const_int 17)
7741 (const_int 18) (const_int 19)
7742 (const_int 20) (const_int 21)
7743 (const_int 22) (const_int 23)
7744 (const_int 24) (const_int 25)
7745 (const_int 26) (const_int 27)
7746 (const_int 28) (const_int 29)
7747 (const_int 30) (const_int 31)])))]
7748 "TARGET_AVX"
154d1782 7749 "vextract%~128\t{$0x1, %1, %0|%0, %1, 0x1}"
ed30e0a6 7750 [(set_attr "type" "sselog")
00a0e418 7751 (set_attr "prefix_extra" "1")
7752 (set_attr "length_immediate" "1")
ed30e0a6 7753 (set_attr "memory" "none,store")
7754 (set_attr "prefix" "vex")
154d1782 7755 (set_attr "mode" "OI")])
ed30e0a6 7756
6fe5844b 7757;; Modes handled by vec_extract patterns.
7758(define_mode_iterator VEC_EXTRACT_MODE
fd1fee28 7759 [(V64QI "TARGET_AVX512BW") (V32QI "TARGET_AVX") V16QI
7760 (V32HI "TARGET_AVX512BW") (V16HI "TARGET_AVX") V8HI
697a43f8 7761 (V16SI "TARGET_AVX512F") (V8SI "TARGET_AVX") V4SI
7762 (V8DI "TARGET_AVX512F") (V4DI "TARGET_AVX") V2DI
7763 (V16SF "TARGET_AVX512F") (V8SF "TARGET_AVX") V4SF
7764 (V8DF "TARGET_AVX512F") (V4DF "TARGET_AVX") V2DF])
6fe5844b 7765
3ce6ef72 7766(define_expand "vec_extract<mode>"
abd4f58b 7767 [(match_operand:<ssescalarmode> 0 "register_operand")
7768 (match_operand:VEC_EXTRACT_MODE 1 "register_operand")
7769 (match_operand 2 "const_int_operand")]
2a466fea 7770 "TARGET_SSE"
887b0069 7771{
2a466fea 7772 ix86_expand_vector_extract (false, operands[0], operands[1],
7773 INTVAL (operands[2]));
887b0069 7774 DONE;
7775})
7776
5802c0cb 7777;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
7778;;
7779;; Parallel double-precision floating point element swizzling
7780;;
7781;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
7782
5220cab6 7783(define_insn "<mask_codefor>avx512f_unpckhpd512<mask_name>"
697a43f8 7784 [(set (match_operand:V8DF 0 "register_operand" "=v")
7785 (vec_select:V8DF
7786 (vec_concat:V16DF
0a281fd0 7787 (match_operand:V8DF 1 "register_operand" "v")
697a43f8 7788 (match_operand:V8DF 2 "nonimmediate_operand" "vm"))
7789 (parallel [(const_int 1) (const_int 9)
7790 (const_int 3) (const_int 11)
7791 (const_int 5) (const_int 13)
7792 (const_int 7) (const_int 15)])))]
7793 "TARGET_AVX512F"
5220cab6 7794 "vunpckhpd\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
697a43f8 7795 [(set_attr "type" "sselog")
7796 (set_attr "prefix" "evex")
7797 (set_attr "mode" "V8DF")])
7798
d6e05290 7799;; Recall that the 256-bit unpck insns only shuffle within their lanes.
fd4e0ec4 7800(define_insn "avx_unpckhpd256<mask_name>"
7801 [(set (match_operand:V4DF 0 "register_operand" "=v")
ed30e0a6 7802 (vec_select:V4DF
7803 (vec_concat:V8DF
fd4e0ec4 7804 (match_operand:V4DF 1 "register_operand" "v")
7805 (match_operand:V4DF 2 "nonimmediate_operand" "vm"))
5e56456b 7806 (parallel [(const_int 1) (const_int 5)
ed30e0a6 7807 (const_int 3) (const_int 7)])))]
fd4e0ec4 7808 "TARGET_AVX && <mask_avx512vl_condition>"
7809 "vunpckhpd\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
ed30e0a6 7810 [(set_attr "type" "sselog")
7811 (set_attr "prefix" "vex")
7812 (set_attr "mode" "V4DF")])
7813
8cedf886 7814(define_expand "vec_interleave_highv4df"
7815 [(set (match_dup 3)
7816 (vec_select:V4DF
7817 (vec_concat:V8DF
7818 (match_operand:V4DF 1 "register_operand" "x")
7819 (match_operand:V4DF 2 "nonimmediate_operand" "xm"))
7820 (parallel [(const_int 0) (const_int 4)
7821 (const_int 2) (const_int 6)])))
7822 (set (match_dup 4)
7823 (vec_select:V4DF
7824 (vec_concat:V8DF
7825 (match_dup 1)
7826 (match_dup 2))
7827 (parallel [(const_int 1) (const_int 5)
7828 (const_int 3) (const_int 7)])))
abd4f58b 7829 (set (match_operand:V4DF 0 "register_operand")
33d0986a 7830 (vec_select:V4DF
7831 (vec_concat:V8DF
8cedf886 7832 (match_dup 3)
33d0986a 7833 (match_dup 4))
7834 (parallel [(const_int 2) (const_int 3)
7835 (const_int 6) (const_int 7)])))]
8cedf886 7836 "TARGET_AVX"
7837{
7838 operands[3] = gen_reg_rtx (V4DFmode);
7839 operands[4] = gen_reg_rtx (V4DFmode);
7840})
7841
7842
fd4e0ec4 7843(define_insn "avx512vl_unpckhpd128_mask"
7844 [(set (match_operand:V2DF 0 "register_operand" "=v")
7845 (vec_merge:V2DF
7846 (vec_select:V2DF
7847 (vec_concat:V4DF
7848 (match_operand:V2DF 1 "register_operand" "v")
7849 (match_operand:V2DF 2 "nonimmediate_operand" "vm"))
7850 (parallel [(const_int 1) (const_int 3)]))
7851 (match_operand:V2DF 3 "vector_move_operand" "0C")
7852 (match_operand:QI 4 "register_operand" "Yk")))]
7853 "TARGET_AVX512VL"
7854 "vunpckhpd\t{%2, %1, %0%{%4%}%N3|%0%{%4%}%N3, %1, %2}"
7855 [(set_attr "type" "sselog")
7856 (set_attr "prefix" "evex")
7857 (set_attr "mode" "V2DF")])
7858
d6e05290 7859(define_expand "vec_interleave_highv2df"
abd4f58b 7860 [(set (match_operand:V2DF 0 "register_operand")
7c839b3f 7861 (vec_select:V2DF
7862 (vec_concat:V4DF
abd4f58b 7863 (match_operand:V2DF 1 "nonimmediate_operand")
7864 (match_operand:V2DF 2 "nonimmediate_operand"))
7c839b3f 7865 (parallel [(const_int 1)
7866 (const_int 3)])))]
7867 "TARGET_SSE2"
04e14b44 7868{
7869 if (!ix86_vec_interleave_v2df_operator_ok (operands, 1))
7870 operands[2] = force_reg (V2DFmode, operands[2]);
7871})
7c839b3f 7872
01624f90 7873(define_insn "*vec_interleave_highv2df"
f30b3ad6 7874 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,x,x,x,m")
04e14b44 7875 (vec_select:V2DF
7876 (vec_concat:V4DF
f30b3ad6 7877 (match_operand:V2DF 1 "nonimmediate_operand" " 0,x,o,o,o,x")
7878 (match_operand:V2DF 2 "nonimmediate_operand" " x,x,1,0,x,0"))
04e14b44 7879 (parallel [(const_int 1)
7880 (const_int 3)])))]
01624f90 7881 "TARGET_SSE2 && ix86_vec_interleave_v2df_operator_ok (operands, 1)"
04e14b44 7882 "@
7883 unpckhpd\t{%2, %0|%0, %2}
45c0368c 7884 vunpckhpd\t{%2, %1, %0|%0, %1, %2}
7885 %vmovddup\t{%H1, %0|%0, %H1}
04e14b44 7886 movlpd\t{%H1, %0|%0, %H1}
45c0368c 7887 vmovlpd\t{%H1, %2, %0|%0, %2, %H1}
c358a059 7888 %vmovhpd\t{%1, %0|%q0, %1}"
f30b3ad6 7889 [(set_attr "isa" "noavx,avx,sse3,noavx,avx,*")
8c1dfa94 7890 (set_attr "type" "sselog,sselog,sselog,ssemov,ssemov,ssemov")
7891 (set_attr "ssememalign" "64")
45c0368c 7892 (set_attr "prefix_data16" "*,*,*,1,*,1")
7893 (set_attr "prefix" "orig,vex,maybe_vex,orig,vex,maybe_vex")
889d67a8 7894 (set_attr "mode" "V2DF,V2DF,DF,V1DF,V1DF,V1DF")])
ed30e0a6 7895
5220cab6 7896(define_expand "avx512f_movddup512<mask_name>"
697a43f8 7897 [(set (match_operand:V8DF 0 "register_operand")
7898 (vec_select:V8DF
7899 (vec_concat:V16DF
7900 (match_operand:V8DF 1 "nonimmediate_operand")
7901 (match_dup 1))
7902 (parallel [(const_int 0) (const_int 8)
7903 (const_int 2) (const_int 10)
7904 (const_int 4) (const_int 12)
7905 (const_int 6) (const_int 14)])))]
7906 "TARGET_AVX512F")
7907
5220cab6 7908(define_expand "avx512f_unpcklpd512<mask_name>"
697a43f8 7909 [(set (match_operand:V8DF 0 "register_operand")
7910 (vec_select:V8DF
7911 (vec_concat:V16DF
7912 (match_operand:V8DF 1 "register_operand")
7913 (match_operand:V8DF 2 "nonimmediate_operand"))
7914 (parallel [(const_int 0) (const_int 8)
7915 (const_int 2) (const_int 10)
7916 (const_int 4) (const_int 12)
7917 (const_int 6) (const_int 14)])))]
7918 "TARGET_AVX512F")
7919
5220cab6 7920(define_insn "*avx512f_unpcklpd512<mask_name>"
697a43f8 7921 [(set (match_operand:V8DF 0 "register_operand" "=v,v")
7922 (vec_select:V8DF
7923 (vec_concat:V16DF
7924 (match_operand:V8DF 1 "nonimmediate_operand" "vm, v")
7925 (match_operand:V8DF 2 "nonimmediate_operand" "1 ,vm"))
7926 (parallel [(const_int 0) (const_int 8)
7927 (const_int 2) (const_int 10)
7928 (const_int 4) (const_int 12)
7929 (const_int 6) (const_int 14)])))]
7930 "TARGET_AVX512F"
7931 "@
5220cab6 7932 vmovddup\t{%1, %0<mask_operand3>|%0<mask_operand3>, %1}
7933 vunpcklpd\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
697a43f8 7934 [(set_attr "type" "sselog")
7935 (set_attr "prefix" "evex")
7936 (set_attr "mode" "V8DF")])
7937
04e14b44 7938;; Recall that the 256-bit unpck insns only shuffle within their lanes.
fd4e0ec4 7939(define_expand "avx_movddup256<mask_name>"
abd4f58b 7940 [(set (match_operand:V4DF 0 "register_operand")
ed30e0a6 7941 (vec_select:V4DF
7942 (vec_concat:V8DF
abd4f58b 7943 (match_operand:V4DF 1 "nonimmediate_operand")
ed30e0a6 7944 (match_dup 1))
04e14b44 7945 (parallel [(const_int 0) (const_int 4)
7946 (const_int 2) (const_int 6)])))]
fd4e0ec4 7947 "TARGET_AVX && <mask_avx512vl_condition>")
5802c0cb 7948
fd4e0ec4 7949(define_expand "avx_unpcklpd256<mask_name>"
abd4f58b 7950 [(set (match_operand:V4DF 0 "register_operand")
ed30e0a6 7951 (vec_select:V4DF
7952 (vec_concat:V8DF
abd4f58b 7953 (match_operand:V4DF 1 "register_operand")
7954 (match_operand:V4DF 2 "nonimmediate_operand"))
ed30e0a6 7955 (parallel [(const_int 0) (const_int 4)
5e56456b 7956 (const_int 2) (const_int 6)])))]
fd4e0ec4 7957 "TARGET_AVX && <mask_avx512vl_condition>")
04e14b44 7958
fd4e0ec4 7959(define_insn "*avx_unpcklpd256<mask_name>"
7960 [(set (match_operand:V4DF 0 "register_operand" "=v,v")
04e14b44 7961 (vec_select:V4DF
7962 (vec_concat:V8DF
fd4e0ec4 7963 (match_operand:V4DF 1 "nonimmediate_operand" " v,m")
7964 (match_operand:V4DF 2 "nonimmediate_operand" "vm,1"))
04e14b44 7965 (parallel [(const_int 0) (const_int 4)
7966 (const_int 2) (const_int 6)])))]
fd4e0ec4 7967 "TARGET_AVX && <mask_avx512vl_condition>"
04e14b44 7968 "@
fd4e0ec4 7969 vunpcklpd\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}
7970 vmovddup\t{%1, %0<mask_operand3>|%0<mask_operand3>, %1}"
ed30e0a6 7971 [(set_attr "type" "sselog")
7972 (set_attr "prefix" "vex")
7973 (set_attr "mode" "V4DF")])
7974
8cedf886 7975(define_expand "vec_interleave_lowv4df"
7976 [(set (match_dup 3)
7977 (vec_select:V4DF
7978 (vec_concat:V8DF
7979 (match_operand:V4DF 1 "register_operand" "x")
7980 (match_operand:V4DF 2 "nonimmediate_operand" "xm"))
7981 (parallel [(const_int 0) (const_int 4)
7982 (const_int 2) (const_int 6)])))
7983 (set (match_dup 4)
7984 (vec_select:V4DF
7985 (vec_concat:V8DF
7986 (match_dup 1)
7987 (match_dup 2))
7988 (parallel [(const_int 1) (const_int 5)
7989 (const_int 3) (const_int 7)])))
abd4f58b 7990 (set (match_operand:V4DF 0 "register_operand")
33d0986a 7991 (vec_select:V4DF
7992 (vec_concat:V8DF
8cedf886 7993 (match_dup 3)
33d0986a 7994 (match_dup 4))
7995 (parallel [(const_int 0) (const_int 1)
5deb404d 7996 (const_int 4) (const_int 5)])))]
8cedf886 7997 "TARGET_AVX"
7998{
7999 operands[3] = gen_reg_rtx (V4DFmode);
8000 operands[4] = gen_reg_rtx (V4DFmode);
8001})
8002
fd4e0ec4 8003(define_insn "avx512vl_unpcklpd128_mask"
8004 [(set (match_operand:V2DF 0 "register_operand" "=v")
8005 (vec_merge:V2DF
8006 (vec_select:V2DF
8007 (vec_concat:V4DF
8008 (match_operand:V2DF 1 "register_operand" "v")
8009 (match_operand:V2DF 2 "nonimmediate_operand" "vm"))
8010 (parallel [(const_int 0) (const_int 2)]))
8011 (match_operand:V2DF 3 "vector_move_operand" "0C")
8012 (match_operand:QI 4 "register_operand" "Yk")))]
8013 "TARGET_AVX512VL"
8014 "vunpcklpd\t{%2, %1, %0%{%4%}%N3|%0%{%4%}%N3, %1, %2}"
8015 [(set_attr "type" "sselog")
8016 (set_attr "prefix" "evex")
8017 (set_attr "mode" "V2DF")])
8018
d6e05290 8019(define_expand "vec_interleave_lowv2df"
abd4f58b 8020 [(set (match_operand:V2DF 0 "register_operand")
7c839b3f 8021 (vec_select:V2DF
8022 (vec_concat:V4DF
abd4f58b 8023 (match_operand:V2DF 1 "nonimmediate_operand")
8024 (match_operand:V2DF 2 "nonimmediate_operand"))
7c839b3f 8025 (parallel [(const_int 0)
8026 (const_int 2)])))]
8027 "TARGET_SSE2"
04e14b44 8028{
8029 if (!ix86_vec_interleave_v2df_operator_ok (operands, 0))
8030 operands[1] = force_reg (V2DFmode, operands[1]);
8031})
7c839b3f 8032
01624f90 8033(define_insn "*vec_interleave_lowv2df"
f30b3ad6 8034 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,x,x,x,o")
04e14b44 8035 (vec_select:V2DF
8036 (vec_concat:V4DF
f30b3ad6 8037 (match_operand:V2DF 1 "nonimmediate_operand" " 0,x,m,0,x,0")
8038 (match_operand:V2DF 2 "nonimmediate_operand" " x,x,1,m,m,x"))
04e14b44 8039 (parallel [(const_int 0)
8040 (const_int 2)])))]
01624f90 8041 "TARGET_SSE2 && ix86_vec_interleave_v2df_operator_ok (operands, 0)"
04e14b44 8042 "@
8043 unpcklpd\t{%2, %0|%0, %2}
45c0368c 8044 vunpcklpd\t{%2, %1, %0|%0, %1, %2}
c358a059 8045 %vmovddup\t{%1, %0|%0, %q1}
8046 movhpd\t{%2, %0|%0, %q2}
8047 vmovhpd\t{%2, %1, %0|%0, %1, %q2}
45c0368c 8048 %vmovlpd\t{%2, %H0|%H0, %2}"
f30b3ad6 8049 [(set_attr "isa" "noavx,avx,sse3,noavx,avx,*")
45c0368c 8050 (set_attr "type" "sselog,sselog,sselog,ssemov,ssemov,ssemov")
8c1dfa94 8051 (set_attr "ssememalign" "64")
45c0368c 8052 (set_attr "prefix_data16" "*,*,*,1,*,1")
8053 (set_attr "prefix" "orig,vex,maybe_vex,orig,vex,maybe_vex")
889d67a8 8054 (set_attr "mode" "V2DF,V2DF,DF,V1DF,V1DF,V1DF")])
ed30e0a6 8055
04e14b44 8056(define_split
abd4f58b 8057 [(set (match_operand:V2DF 0 "memory_operand")
04e14b44 8058 (vec_select:V2DF
8059 (vec_concat:V4DF
abd4f58b 8060 (match_operand:V2DF 1 "register_operand")
04e14b44 8061 (match_dup 1))
8062 (parallel [(const_int 0)
8063 (const_int 2)])))]
8064 "TARGET_SSE3 && reload_completed"
8065 [(const_int 0)]
8066{
8067 rtx low = gen_rtx_REG (DFmode, REGNO (operands[1]));
8068 emit_move_insn (adjust_address (operands[0], DFmode, 0), low);
8069 emit_move_insn (adjust_address (operands[0], DFmode, 8), low);
8070 DONE;
8071})
8072
8073(define_split
abd4f58b 8074 [(set (match_operand:V2DF 0 "register_operand")
04e14b44 8075 (vec_select:V2DF
8076 (vec_concat:V4DF
abd4f58b 8077 (match_operand:V2DF 1 "memory_operand")
04e14b44 8078 (match_dup 1))
abd4f58b 8079 (parallel [(match_operand:SI 2 "const_0_to_1_operand")
8080 (match_operand:SI 3 "const_int_operand")])))]
04e14b44 8081 "TARGET_SSE3 && INTVAL (operands[2]) + 2 == INTVAL (operands[3])"
8082 [(set (match_dup 0) (vec_duplicate:V2DF (match_dup 1)))]
8083{
8084 operands[1] = adjust_address (operands[1], DFmode, INTVAL (operands[2]) * 8);
8085})
8086
0b7cc9c6 8087(define_insn "avx512f_vmscalef<mode><round_name>"
85065932 8088 [(set (match_operand:VF_128 0 "register_operand" "=v")
8089 (vec_merge:VF_128
5220cab6 8090 (unspec:VF_128
8091 [(match_operand:VF_128 1 "register_operand" "v")
fbf4df62 8092 (match_operand:VF_128 2 "<round_nimm_predicate>" "<round_constraint>")]
5220cab6 8093 UNSPEC_SCALEF)
85065932 8094 (match_dup 1)
8095 (const_int 1)))]
8096 "TARGET_AVX512F"
f46a34a6 8097 "vscalef<ssescalarmodesuffix>\t{<round_op3>%2, %1, %0|%0, %1, %2<round_op3>}"
85065932 8098 [(set_attr "prefix" "evex")
8099 (set_attr "mode" "<ssescalarmode>")])
8100
250533c0 8101(define_insn "<avx512>_scalef<mode><mask_name><round_name>"
8102 [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v")
8103 (unspec:VF_AVX512VL
8104 [(match_operand:VF_AVX512VL 1 "register_operand" "v")
8105 (match_operand:VF_AVX512VL 2 "nonimmediate_operand" "<round_constraint>")]
5220cab6 8106 UNSPEC_SCALEF))]
85065932 8107 "TARGET_AVX512F"
f46a34a6 8108 "vscalef<ssemodesuffix>\t{<round_mask_op3>%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2<round_mask_op3>}"
85065932 8109 [(set_attr "prefix" "evex")
8110 (set_attr "mode" "<MODE>")])
8111
3d038641 8112(define_expand "<avx512>_vternlog<mode>_maskz"
8113 [(match_operand:VI48_AVX512VL 0 "register_operand")
8114 (match_operand:VI48_AVX512VL 1 "register_operand")
8115 (match_operand:VI48_AVX512VL 2 "register_operand")
8116 (match_operand:VI48_AVX512VL 3 "nonimmediate_operand")
9a5ea1d5 8117 (match_operand:SI 4 "const_0_to_255_operand")
8118 (match_operand:<avx512fmaskmode> 5 "register_operand")]
8119 "TARGET_AVX512F"
8120{
3d038641 8121 emit_insn (gen_<avx512>_vternlog<mode>_maskz_1 (
9a5ea1d5 8122 operands[0], operands[1], operands[2], operands[3],
8123 operands[4], CONST0_RTX (<MODE>mode), operands[5]));
8124 DONE;
8125})
8126
3d038641 8127(define_insn "<avx512>_vternlog<mode><sd_maskz_name>"
8128 [(set (match_operand:VI48_AVX512VL 0 "register_operand" "=v")
8129 (unspec:VI48_AVX512VL
8130 [(match_operand:VI48_AVX512VL 1 "register_operand" "0")
8131 (match_operand:VI48_AVX512VL 2 "register_operand" "v")
8132 (match_operand:VI48_AVX512VL 3 "nonimmediate_operand" "vm")
d2ff59d6 8133 (match_operand:SI 4 "const_0_to_255_operand")]
8134 UNSPEC_VTERNLOG))]
8135 "TARGET_AVX512F"
9a5ea1d5 8136 "vpternlog<ssemodesuffix>\t{%4, %3, %2, %0<sd_mask_op5>|%0<sd_mask_op5>, %2, %3, %4}"
d2ff59d6 8137 [(set_attr "type" "sselog")
8138 (set_attr "prefix" "evex")
8139 (set_attr "mode" "<sseinsnmode>")])
8140
3d038641 8141(define_insn "<avx512>_vternlog<mode>_mask"
8142 [(set (match_operand:VI48_AVX512VL 0 "register_operand" "=v")
8143 (vec_merge:VI48_AVX512VL
8144 (unspec:VI48_AVX512VL
8145 [(match_operand:VI48_AVX512VL 1 "register_operand" "0")
8146 (match_operand:VI48_AVX512VL 2 "register_operand" "v")
8147 (match_operand:VI48_AVX512VL 3 "nonimmediate_operand" "vm")
5220cab6 8148 (match_operand:SI 4 "const_0_to_255_operand")]
8149 UNSPEC_VTERNLOG)
8150 (match_dup 1)
a31e7f46 8151 (match_operand:<avx512fmaskmode> 5 "register_operand" "Yk")))]
5220cab6 8152 "TARGET_AVX512F"
8153 "vpternlog<ssemodesuffix>\t{%4, %3, %2, %0%{%5%}|%0%{%5%}, %2, %3, %4}"
8154 [(set_attr "type" "sselog")
8155 (set_attr "prefix" "evex")
8156 (set_attr "mode" "<sseinsnmode>")])
8157
250533c0 8158(define_insn "<avx512>_getexp<mode><mask_name><round_saeonly_name>"
8159 [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v")
8160 (unspec:VF_AVX512VL [(match_operand:VF_AVX512VL 1 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")]
85065932 8161 UNSPEC_GETEXP))]
8162 "TARGET_AVX512F"
dbfe84d5 8163 "vgetexp<ssemodesuffix>\t{<round_saeonly_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_saeonly_mask_op2>}";
85065932 8164 [(set_attr "prefix" "evex")
8165 (set_attr "mode" "<MODE>")])
8166
0b7cc9c6 8167(define_insn "avx512f_sgetexp<mode><round_saeonly_name>"
85065932 8168 [(set (match_operand:VF_128 0 "register_operand" "=v")
8169 (vec_merge:VF_128
5220cab6 8170 (unspec:VF_128
8171 [(match_operand:VF_128 1 "register_operand" "v")
fbf4df62 8172 (match_operand:VF_128 2 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")]
5220cab6 8173 UNSPEC_GETEXP)
85065932 8174 (match_dup 1)
8175 (const_int 1)))]
8176 "TARGET_AVX512F"
0b7cc9c6 8177 "vgetexp<ssescalarmodesuffix>\t{<round_saeonly_op3>%2, %1, %0|%0, %1, %2<round_saeonly_op3>}";
85065932 8178 [(set_attr "prefix" "evex")
8179 (set_attr "mode" "<ssescalarmode>")])
8180
3d038641 8181(define_insn "<mask_codefor><avx512>_align<mode><mask_name>"
8182 [(set (match_operand:VI48_AVX512VL 0 "register_operand" "=v")
8183 (unspec:VI48_AVX512VL [(match_operand:VI48_AVX512VL 1 "register_operand" "v")
8184 (match_operand:VI48_AVX512VL 2 "nonimmediate_operand" "vm")
8185 (match_operand:SI 3 "const_0_to_255_operand")]
8186 UNSPEC_ALIGN))]
d2ff59d6 8187 "TARGET_AVX512F"
5220cab6 8188 "valign<ssemodesuffix>\t{%3, %2, %1, %0<mask_operand4>|%0<mask_operand4>, %1, %2, %3}";
d2ff59d6 8189 [(set_attr "prefix" "evex")
8190 (set_attr "mode" "<sseinsnmode>")])
8191
5220cab6 8192(define_expand "avx512f_shufps512_mask"
8193 [(match_operand:V16SF 0 "register_operand")
8194 (match_operand:V16SF 1 "register_operand")
8195 (match_operand:V16SF 2 "nonimmediate_operand")
8196 (match_operand:SI 3 "const_0_to_255_operand")
8197 (match_operand:V16SF 4 "register_operand")
8198 (match_operand:HI 5 "register_operand")]
8199 "TARGET_AVX512F"
8200{
8201 int mask = INTVAL (operands[3]);
8202 emit_insn (gen_avx512f_shufps512_1_mask (operands[0], operands[1], operands[2],
8203 GEN_INT ((mask >> 0) & 3),
8204 GEN_INT ((mask >> 2) & 3),
8205 GEN_INT (((mask >> 4) & 3) + 16),
8206 GEN_INT (((mask >> 6) & 3) + 16),
8207 GEN_INT (((mask >> 0) & 3) + 4),
8208 GEN_INT (((mask >> 2) & 3) + 4),
8209 GEN_INT (((mask >> 4) & 3) + 20),
8210 GEN_INT (((mask >> 6) & 3) + 20),
8211 GEN_INT (((mask >> 0) & 3) + 8),
8212 GEN_INT (((mask >> 2) & 3) + 8),
8213 GEN_INT (((mask >> 4) & 3) + 24),
8214 GEN_INT (((mask >> 6) & 3) + 24),
8215 GEN_INT (((mask >> 0) & 3) + 12),
8216 GEN_INT (((mask >> 2) & 3) + 12),
8217 GEN_INT (((mask >> 4) & 3) + 28),
8218 GEN_INT (((mask >> 6) & 3) + 28),
8219 operands[4], operands[5]));
8220 DONE;
8221})
8222
9a5ea1d5 8223
250533c0 8224(define_expand "<avx512>_fixupimm<mode>_maskz<round_saeonly_expand_name>"
8225 [(match_operand:VF_AVX512VL 0 "register_operand")
8226 (match_operand:VF_AVX512VL 1 "register_operand")
8227 (match_operand:VF_AVX512VL 2 "register_operand")
affa436a 8228 (match_operand:<sseintvecmode> 3 "<round_saeonly_expand_nimm_predicate>")
9a5ea1d5 8229 (match_operand:SI 4 "const_0_to_255_operand")
8230 (match_operand:<avx512fmaskmode> 5 "register_operand")]
8231 "TARGET_AVX512F"
8232{
250533c0 8233 emit_insn (gen_<avx512>_fixupimm<mode>_maskz_1<round_saeonly_expand_name> (
9a5ea1d5 8234 operands[0], operands[1], operands[2], operands[3],
affa436a 8235 operands[4], CONST0_RTX (<MODE>mode), operands[5]
8236 <round_saeonly_expand_operand6>));
9a5ea1d5 8237 DONE;
8238})
8239
250533c0 8240(define_insn "<avx512>_fixupimm<mode><sd_maskz_name><round_saeonly_name>"
8241 [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v")
8242 (unspec:VF_AVX512VL
8243 [(match_operand:VF_AVX512VL 1 "register_operand" "0")
8244 (match_operand:VF_AVX512VL 2 "register_operand" "v")
8245 (match_operand:<sseintvecmode> 3 "nonimmediate_operand" "<round_saeonly_constraint>")
85065932 8246 (match_operand:SI 4 "const_0_to_255_operand")]
8247 UNSPEC_FIXUPIMM))]
8248 "TARGET_AVX512F"
dbfe84d5 8249 "vfixupimm<ssemodesuffix>\t{%4, <round_saeonly_sd_mask_op5>%3, %2, %0<sd_mask_op5>|%0<sd_mask_op5>, %2, %3<round_saeonly_sd_mask_op5>, %4}";
85065932 8250 [(set_attr "prefix" "evex")
8251 (set_attr "mode" "<MODE>")])
8252
250533c0 8253(define_insn "<avx512>_fixupimm<mode>_mask<round_saeonly_name>"
8254 [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v")
8255 (vec_merge:VF_AVX512VL
8256 (unspec:VF_AVX512VL
8257 [(match_operand:VF_AVX512VL 1 "register_operand" "0")
8258 (match_operand:VF_AVX512VL 2 "register_operand" "v")
8259 (match_operand:<sseintvecmode> 3 "nonimmediate_operand" "<round_saeonly_constraint>")
5220cab6 8260 (match_operand:SI 4 "const_0_to_255_operand")]
8261 UNSPEC_FIXUPIMM)
8262 (match_dup 1)
a31e7f46 8263 (match_operand:<avx512fmaskmode> 5 "register_operand" "Yk")))]
5220cab6 8264 "TARGET_AVX512F"
dbfe84d5 8265 "vfixupimm<ssemodesuffix>\t{%4, <round_saeonly_op6>%3, %2, %0%{%5%}|%0%{%5%}, %2, %3<round_saeonly_op6>, %4}";
5220cab6 8266 [(set_attr "prefix" "evex")
8267 (set_attr "mode" "<MODE>")])
8268
affa436a 8269(define_expand "avx512f_sfixupimm<mode>_maskz<round_saeonly_expand_name>"
9a5ea1d5 8270 [(match_operand:VF_128 0 "register_operand")
8271 (match_operand:VF_128 1 "register_operand")
8272 (match_operand:VF_128 2 "register_operand")
affa436a 8273 (match_operand:<sseintvecmode> 3 "<round_saeonly_expand_nimm_predicate>")
9a5ea1d5 8274 (match_operand:SI 4 "const_0_to_255_operand")
8275 (match_operand:<avx512fmaskmode> 5 "register_operand")]
8276 "TARGET_AVX512F"
8277{
affa436a 8278 emit_insn (gen_avx512f_sfixupimm<mode>_maskz_1<round_saeonly_expand_name> (
9a5ea1d5 8279 operands[0], operands[1], operands[2], operands[3],
affa436a 8280 operands[4], CONST0_RTX (<MODE>mode), operands[5]
8281 <round_saeonly_expand_operand6>));
9a5ea1d5 8282 DONE;
8283})
8284
dbfe84d5 8285(define_insn "avx512f_sfixupimm<mode><sd_maskz_name><round_saeonly_name>"
85065932 8286 [(set (match_operand:VF_128 0 "register_operand" "=v")
8287 (vec_merge:VF_128
8288 (unspec:VF_128
8289 [(match_operand:VF_128 1 "register_operand" "0")
8290 (match_operand:VF_128 2 "register_operand" "v")
dbfe84d5 8291 (match_operand:<sseintvecmode> 3 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")
85065932 8292 (match_operand:SI 4 "const_0_to_255_operand")]
8293 UNSPEC_FIXUPIMM)
8294 (match_dup 1)
8295 (const_int 1)))]
8296 "TARGET_AVX512F"
dbfe84d5 8297 "vfixupimm<ssescalarmodesuffix>\t{%4, <round_saeonly_sd_mask_op5>%3, %2, %0<sd_mask_op5>|%0<sd_mask_op5>, %2, %3<round_saeonly_sd_mask_op5>, %4}";
85065932 8298 [(set_attr "prefix" "evex")
8299 (set_attr "mode" "<ssescalarmode>")])
8300
dbfe84d5 8301(define_insn "avx512f_sfixupimm<mode>_mask<round_saeonly_name>"
5220cab6 8302 [(set (match_operand:VF_128 0 "register_operand" "=v")
8303 (vec_merge:VF_128
8304 (vec_merge:VF_128
8305 (unspec:VF_128
8306 [(match_operand:VF_128 1 "register_operand" "0")
8307 (match_operand:VF_128 2 "register_operand" "v")
dbfe84d5 8308 (match_operand:<sseintvecmode> 3 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")
5220cab6 8309 (match_operand:SI 4 "const_0_to_255_operand")]
8310 UNSPEC_FIXUPIMM)
8311 (match_dup 1)
8312 (const_int 1))
8313 (match_dup 1)
a31e7f46 8314 (match_operand:<avx512fmaskmode> 5 "register_operand" "Yk")))]
5220cab6 8315 "TARGET_AVX512F"
dbfe84d5 8316 "vfixupimm<ssescalarmodesuffix>\t{%4, <round_saeonly_op6>%3, %2, %0%{%5%}|%0%{%5%}, %2, %3<round_saeonly_op6>, %4}";
5220cab6 8317 [(set_attr "prefix" "evex")
8318 (set_attr "mode" "<ssescalarmode>")])
8319
250533c0 8320(define_insn "<avx512>_rndscale<mode><mask_name><round_saeonly_name>"
8321 [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v")
8322 (unspec:VF_AVX512VL
8323 [(match_operand:VF_AVX512VL 1 "nonimmediate_operand" "<round_saeonly_constraint>")
85065932 8324 (match_operand:SI 2 "const_0_to_255_operand")]
8325 UNSPEC_ROUND))]
6615b722 8326 "TARGET_AVX512F"
dbfe84d5 8327 "vrndscale<ssemodesuffix>\t{%2, <round_saeonly_mask_op3>%1, %0<mask_operand3>|%0<mask_operand3>, %1<round_saeonly_mask_op3>, %2}"
6615b722 8328 [(set_attr "length_immediate" "1")
8329 (set_attr "prefix" "evex")
8330 (set_attr "mode" "<MODE>")])
8331
0b7cc9c6 8332(define_insn "avx512f_rndscale<mode><round_saeonly_name>"
85065932 8333 [(set (match_operand:VF_128 0 "register_operand" "=v")
8334 (vec_merge:VF_128
8335 (unspec:VF_128
8336 [(match_operand:VF_128 1 "register_operand" "v")
fbf4df62 8337 (match_operand:VF_128 2 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")
85065932 8338 (match_operand:SI 3 "const_0_to_255_operand")]
8339 UNSPEC_ROUND)
8340 (match_dup 1)
8341 (const_int 1)))]
8342 "TARGET_AVX512F"
0b7cc9c6 8343 "vrndscale<ssescalarmodesuffix>\t{%3, <round_saeonly_op4>%2, %1, %0|%0, %1, %2<round_saeonly_op4>, %3}"
85065932 8344 [(set_attr "length_immediate" "1")
8345 (set_attr "prefix" "evex")
8346 (set_attr "mode" "<MODE>")])
8347
697a43f8 8348;; One bit in mask selects 2 elements.
5220cab6 8349(define_insn "avx512f_shufps512_1<mask_name>"
697a43f8 8350 [(set (match_operand:V16SF 0 "register_operand" "=v")
8351 (vec_select:V16SF
8352 (vec_concat:V32SF
8353 (match_operand:V16SF 1 "register_operand" "v")
8354 (match_operand:V16SF 2 "nonimmediate_operand" "vm"))
8355 (parallel [(match_operand 3 "const_0_to_3_operand")
8356 (match_operand 4 "const_0_to_3_operand")
8357 (match_operand 5 "const_16_to_19_operand")
8358 (match_operand 6 "const_16_to_19_operand")
8359 (match_operand 7 "const_4_to_7_operand")
8360 (match_operand 8 "const_4_to_7_operand")
8361 (match_operand 9 "const_20_to_23_operand")
8362 (match_operand 10 "const_20_to_23_operand")
8363 (match_operand 11 "const_8_to_11_operand")
8364 (match_operand 12 "const_8_to_11_operand")
8365 (match_operand 13 "const_24_to_27_operand")
8366 (match_operand 14 "const_24_to_27_operand")
8367 (match_operand 15 "const_12_to_15_operand")
8368 (match_operand 16 "const_12_to_15_operand")
8369 (match_operand 17 "const_28_to_31_operand")
8370 (match_operand 18 "const_28_to_31_operand")])))]
8371 "TARGET_AVX512F
8372 && (INTVAL (operands[3]) == (INTVAL (operands[7]) - 4)
8373 && INTVAL (operands[4]) == (INTVAL (operands[8]) - 4)
8374 && INTVAL (operands[5]) == (INTVAL (operands[9]) - 4)
8375 && INTVAL (operands[6]) == (INTVAL (operands[10]) - 4)
8376 && INTVAL (operands[3]) == (INTVAL (operands[11]) - 8)
8377 && INTVAL (operands[4]) == (INTVAL (operands[12]) - 8)
8378 && INTVAL (operands[5]) == (INTVAL (operands[13]) - 8)
8379 && INTVAL (operands[6]) == (INTVAL (operands[14]) - 8)
8380 && INTVAL (operands[3]) == (INTVAL (operands[15]) - 12)
8381 && INTVAL (operands[4]) == (INTVAL (operands[16]) - 12)
8382 && INTVAL (operands[5]) == (INTVAL (operands[17]) - 12)
8383 && INTVAL (operands[6]) == (INTVAL (operands[18]) - 12))"
8384{
8385 int mask;
8386 mask = INTVAL (operands[3]);
8387 mask |= INTVAL (operands[4]) << 2;
8388 mask |= (INTVAL (operands[5]) - 16) << 4;
8389 mask |= (INTVAL (operands[6]) - 16) << 6;
8390 operands[3] = GEN_INT (mask);
8391
5220cab6 8392 return "vshufps\t{%3, %2, %1, %0<mask_operand19>|%0<mask_operand19>, %1, %2, %3}";
697a43f8 8393}
8394 [(set_attr "type" "sselog")
8395 (set_attr "length_immediate" "1")
8396 (set_attr "prefix" "evex")
8397 (set_attr "mode" "V16SF")])
8398
5220cab6 8399(define_expand "avx512f_shufpd512_mask"
8400 [(match_operand:V8DF 0 "register_operand")
8401 (match_operand:V8DF 1 "register_operand")
8402 (match_operand:V8DF 2 "nonimmediate_operand")
8403 (match_operand:SI 3 "const_0_to_255_operand")
8404 (match_operand:V8DF 4 "register_operand")
8405 (match_operand:QI 5 "register_operand")]
8406 "TARGET_AVX512F"
8407{
8408 int mask = INTVAL (operands[3]);
8409 emit_insn (gen_avx512f_shufpd512_1_mask (operands[0], operands[1], operands[2],
8410 GEN_INT (mask & 1),
8411 GEN_INT (mask & 2 ? 9 : 8),
8412 GEN_INT (mask & 4 ? 3 : 2),
8413 GEN_INT (mask & 8 ? 11 : 10),
8414 GEN_INT (mask & 16 ? 5 : 4),
8415 GEN_INT (mask & 32 ? 13 : 12),
8416 GEN_INT (mask & 64 ? 7 : 6),
8417 GEN_INT (mask & 128 ? 15 : 14),
8418 operands[4], operands[5]));
8419 DONE;
8420})
8421
8422(define_insn "avx512f_shufpd512_1<mask_name>"
697a43f8 8423 [(set (match_operand:V8DF 0 "register_operand" "=v")
8424 (vec_select:V8DF
8425 (vec_concat:V16DF
8426 (match_operand:V8DF 1 "register_operand" "v")
8427 (match_operand:V8DF 2 "nonimmediate_operand" "vm"))
8428 (parallel [(match_operand 3 "const_0_to_1_operand")
8429 (match_operand 4 "const_8_to_9_operand")
8430 (match_operand 5 "const_2_to_3_operand")
8431 (match_operand 6 "const_10_to_11_operand")
8432 (match_operand 7 "const_4_to_5_operand")
8433 (match_operand 8 "const_12_to_13_operand")
8434 (match_operand 9 "const_6_to_7_operand")
8435 (match_operand 10 "const_14_to_15_operand")])))]
8436 "TARGET_AVX512F"
8437{
8438 int mask;
8439 mask = INTVAL (operands[3]);
8440 mask |= (INTVAL (operands[4]) - 8) << 1;
8441 mask |= (INTVAL (operands[5]) - 2) << 2;
8442 mask |= (INTVAL (operands[6]) - 10) << 3;
8443 mask |= (INTVAL (operands[7]) - 4) << 4;
8444 mask |= (INTVAL (operands[8]) - 12) << 5;
8445 mask |= (INTVAL (operands[9]) - 6) << 6;
8446 mask |= (INTVAL (operands[10]) - 14) << 7;
8447 operands[3] = GEN_INT (mask);
8448
5220cab6 8449 return "vshufpd\t{%3, %2, %1, %0<mask_operand11>|%0<mask_operand11>, %1, %2, %3}";
697a43f8 8450}
8451 [(set_attr "type" "sselog")
8452 (set_attr "length_immediate" "1")
8453 (set_attr "prefix" "evex")
8454 (set_attr "mode" "V8DF")])
8455
a17ccedb 8456(define_expand "avx_shufpd256<mask_expand4_name>"
abd4f58b 8457 [(match_operand:V4DF 0 "register_operand")
8458 (match_operand:V4DF 1 "register_operand")
8459 (match_operand:V4DF 2 "nonimmediate_operand")
8460 (match_operand:SI 3 "const_int_operand")]
ed30e0a6 8461 "TARGET_AVX"
8462{
8463 int mask = INTVAL (operands[3]);
a17ccedb 8464 emit_insn (gen_avx_shufpd256_1<mask_expand4_name> (operands[0],
8465 operands[1],
8466 operands[2],
8467 GEN_INT (mask & 1),
8468 GEN_INT (mask & 2 ? 5 : 4),
8469 GEN_INT (mask & 4 ? 3 : 2),
8470 GEN_INT (mask & 8 ? 7 : 6)
8471 <mask_expand4_args>));
ed30e0a6 8472 DONE;
8473})
8474
a17ccedb 8475(define_insn "avx_shufpd256_1<mask_name>"
8476 [(set (match_operand:V4DF 0 "register_operand" "=v")
ed30e0a6 8477 (vec_select:V4DF
8478 (vec_concat:V8DF
a17ccedb 8479 (match_operand:V4DF 1 "register_operand" "v")
8480 (match_operand:V4DF 2 "nonimmediate_operand" "vm"))
abd4f58b 8481 (parallel [(match_operand 3 "const_0_to_1_operand")
8482 (match_operand 4 "const_4_to_5_operand")
8483 (match_operand 5 "const_2_to_3_operand")
8484 (match_operand 6 "const_6_to_7_operand")])))]
a17ccedb 8485 "TARGET_AVX && <mask_avx512vl_condition>"
ed30e0a6 8486{
8487 int mask;
8488 mask = INTVAL (operands[3]);
8489 mask |= (INTVAL (operands[4]) - 4) << 1;
8490 mask |= (INTVAL (operands[5]) - 2) << 2;
8491 mask |= (INTVAL (operands[6]) - 6) << 3;
8492 operands[3] = GEN_INT (mask);
8493
a17ccedb 8494 return "vshufpd\t{%3, %2, %1, %0<mask_operand7>|%0<mask_operand7>, %1, %2, %3}";
ed30e0a6 8495}
77aff08f 8496 [(set_attr "type" "sseshuf")
00a0e418 8497 (set_attr "length_immediate" "1")
ed30e0a6 8498 (set_attr "prefix" "vex")
8499 (set_attr "mode" "V4DF")])
8500
a17ccedb 8501(define_expand "sse2_shufpd<mask_expand4_name>"
abd4f58b 8502 [(match_operand:V2DF 0 "register_operand")
8503 (match_operand:V2DF 1 "register_operand")
8504 (match_operand:V2DF 2 "nonimmediate_operand")
8505 (match_operand:SI 3 "const_int_operand")]
5802c0cb 8506 "TARGET_SSE2"
8507{
8508 int mask = INTVAL (operands[3]);
a17ccedb 8509 emit_insn (gen_sse2_shufpd_v2df<mask_expand4_name> (operands[0], operands[1],
8510 operands[2], GEN_INT (mask & 1),
8511 GEN_INT (mask & 2 ? 3 : 2)
8512 <mask_expand4_args>));
5802c0cb 8513 DONE;
8514})
8515
a17ccedb 8516(define_insn "sse2_shufpd_v2df_mask"
8517 [(set (match_operand:V2DF 0 "register_operand" "=v")
8518 (vec_merge:V2DF
8519 (vec_select:V2DF
8520 (vec_concat:V4DF
8521 (match_operand:V2DF 1 "register_operand" "v")
8522 (match_operand:V2DF 2 "nonimmediate_operand" "vm"))
8523 (parallel [(match_operand 3 "const_0_to_1_operand")
8524 (match_operand 4 "const_2_to_3_operand")]))
8525 (match_operand:V2DF 5 "vector_move_operand" "0C")
8526 (match_operand:QI 6 "register_operand" "Yk")))]
8527 "TARGET_AVX512VL"
8528{
8529 int mask;
8530 mask = INTVAL (operands[3]);
8531 mask |= (INTVAL (operands[4]) - 2) << 1;
8532 operands[3] = GEN_INT (mask);
8533
8534 return "vshufpd\t{%3, %2, %1, %0%{%6%}%N5|%0%{6%}%N5, %1, %2, %3}";
8535}
8536 [(set_attr "type" "sseshuf")
8537 (set_attr "length_immediate" "1")
8538 (set_attr "prefix" "evex")
8539 (set_attr "mode" "V2DF")])
8540
a6142438 8541;; punpcklqdq and punpckhqdq are shorter than shufpd.
d5f65ad4 8542(define_insn "avx2_interleave_highv4di<mask_name>"
8543 [(set (match_operand:V4DI 0 "register_operand" "=v")
5deb404d 8544 (vec_select:V4DI
8545 (vec_concat:V8DI
d5f65ad4 8546 (match_operand:V4DI 1 "register_operand" "v")
8547 (match_operand:V4DI 2 "nonimmediate_operand" "vm"))
5deb404d 8548 (parallel [(const_int 1)
8549 (const_int 5)
8550 (const_int 3)
8551 (const_int 7)])))]
d5f65ad4 8552 "TARGET_AVX2 && <mask_avx512vl_condition>"
8553 "vpunpckhqdq\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
5deb404d 8554 [(set_attr "type" "sselog")
8555 (set_attr "prefix" "vex")
8556 (set_attr "mode" "OI")])
ed30e0a6 8557
5220cab6 8558(define_insn "<mask_codefor>avx512f_interleave_highv8di<mask_name>"
697a43f8 8559 [(set (match_operand:V8DI 0 "register_operand" "=v")
8560 (vec_select:V8DI
8561 (vec_concat:V16DI
8562 (match_operand:V8DI 1 "register_operand" "v")
8563 (match_operand:V8DI 2 "nonimmediate_operand" "vm"))
8564 (parallel [(const_int 1) (const_int 9)
8565 (const_int 3) (const_int 11)
8566 (const_int 5) (const_int 13)
8567 (const_int 7) (const_int 15)])))]
8568 "TARGET_AVX512F"
5220cab6 8569 "vpunpckhqdq\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
697a43f8 8570 [(set_attr "type" "sselog")
8571 (set_attr "prefix" "evex")
8572 (set_attr "mode" "XI")])
8573
d5f65ad4 8574(define_insn "vec_interleave_highv2di<mask_name>"
8575 [(set (match_operand:V2DI 0 "register_operand" "=x,v")
a6142438 8576 (vec_select:V2DI
8577 (vec_concat:V4DI
d5f65ad4 8578 (match_operand:V2DI 1 "register_operand" "0,v")
8579 (match_operand:V2DI 2 "nonimmediate_operand" "xm,vm"))
a6142438 8580 (parallel [(const_int 1)
8581 (const_int 3)])))]
d5f65ad4 8582 "TARGET_SSE2 && <mask_avx512vl_condition>"
45c0368c 8583 "@
8584 punpckhqdq\t{%2, %0|%0, %2}
d5f65ad4 8585 vpunpckhqdq\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
45c0368c 8586 [(set_attr "isa" "noavx,avx")
8587 (set_attr "type" "sselog")
8588 (set_attr "prefix_data16" "1,*")
d5f65ad4 8589 (set_attr "prefix" "orig,<mask_prefix>")
ed30e0a6 8590 (set_attr "mode" "TI")])
8591
d5f65ad4 8592(define_insn "avx2_interleave_lowv4di<mask_name>"
8593 [(set (match_operand:V4DI 0 "register_operand" "=v")
5deb404d 8594 (vec_select:V4DI
8595 (vec_concat:V8DI
d5f65ad4 8596 (match_operand:V4DI 1 "register_operand" "v")
8597 (match_operand:V4DI 2 "nonimmediate_operand" "vm"))
5deb404d 8598 (parallel [(const_int 0)
8599 (const_int 4)
8600 (const_int 2)
8601 (const_int 6)])))]
d5f65ad4 8602 "TARGET_AVX2 && <mask_avx512vl_condition>"
8603 "vpunpcklqdq\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
5deb404d 8604 [(set_attr "type" "sselog")
8605 (set_attr "prefix" "vex")
8606 (set_attr "mode" "OI")])
8607
5220cab6 8608(define_insn "<mask_codefor>avx512f_interleave_lowv8di<mask_name>"
697a43f8 8609 [(set (match_operand:V8DI 0 "register_operand" "=v")
8610 (vec_select:V8DI
8611 (vec_concat:V16DI
8612 (match_operand:V8DI 1 "register_operand" "v")
8613 (match_operand:V8DI 2 "nonimmediate_operand" "vm"))
8614 (parallel [(const_int 0) (const_int 8)
8615 (const_int 2) (const_int 10)
8616 (const_int 4) (const_int 12)
8617 (const_int 6) (const_int 14)])))]
8618 "TARGET_AVX512F"
5220cab6 8619 "vpunpcklqdq\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
697a43f8 8620 [(set_attr "type" "sselog")
8621 (set_attr "prefix" "evex")
8622 (set_attr "mode" "XI")])
8623
d5f65ad4 8624(define_insn "vec_interleave_lowv2di<mask_name>"
8625 [(set (match_operand:V2DI 0 "register_operand" "=x,v")
a6142438 8626 (vec_select:V2DI
8627 (vec_concat:V4DI
d5f65ad4 8628 (match_operand:V2DI 1 "register_operand" "0,v")
8629 (match_operand:V2DI 2 "nonimmediate_operand" "xm,vm"))
a6142438 8630 (parallel [(const_int 0)
8631 (const_int 2)])))]
d5f65ad4 8632 "TARGET_SSE2 && <mask_avx512vl_condition>"
45c0368c 8633 "@
8634 punpcklqdq\t{%2, %0|%0, %2}
d5f65ad4 8635 vpunpcklqdq\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
45c0368c 8636 [(set_attr "isa" "noavx,avx")
8637 (set_attr "type" "sselog")
8638 (set_attr "prefix_data16" "1,*")
8639 (set_attr "prefix" "orig,vex")
a6142438 8640 (set_attr "mode" "TI")])
8641
56c7c824 8642(define_insn "sse2_shufpd_<mode>"
6fe5844b 8643 [(set (match_operand:VI8F_128 0 "register_operand" "=x,x")
8644 (vec_select:VI8F_128
63d5e521 8645 (vec_concat:<ssedoublevecmode>
6fe5844b 8646 (match_operand:VI8F_128 1 "register_operand" "0,x")
8647 (match_operand:VI8F_128 2 "nonimmediate_operand" "xm,xm"))
abd4f58b 8648 (parallel [(match_operand 3 "const_0_to_1_operand")
8649 (match_operand 4 "const_2_to_3_operand")])))]
5802c0cb 8650 "TARGET_SSE2"
8651{
8652 int mask;
8653 mask = INTVAL (operands[3]);
8654 mask |= (INTVAL (operands[4]) - 2) << 1;
8655 operands[3] = GEN_INT (mask);
8656
45c0368c 8657 switch (which_alternative)
8658 {
8659 case 0:
8660 return "shufpd\t{%3, %2, %0|%0, %2, %3}";
8661 case 1:
8662 return "vshufpd\t{%3, %2, %1, %0|%0, %1, %2, %3}";
8663 default:
8664 gcc_unreachable ();
8665 }
5802c0cb 8666}
45c0368c 8667 [(set_attr "isa" "noavx,avx")
77aff08f 8668 (set_attr "type" "sseshuf")
00a0e418 8669 (set_attr "length_immediate" "1")
45c0368c 8670 (set_attr "prefix" "orig,vex")
5802c0cb 8671 (set_attr "mode" "V2DF")])
8672
6be36710 8673;; Avoid combining registers from different units in a single alternative,
8674;; see comment above inline_secondary_memory_needed function in i386.c
5802c0cb 8675(define_insn "sse2_storehpd"
45c0368c 8676 [(set (match_operand:DF 0 "nonimmediate_operand" "=m,x,x,x,*f,r")
5802c0cb 8677 (vec_select:DF
45c0368c 8678 (match_operand:V2DF 1 "nonimmediate_operand" " x,0,x,o,o,o")
5802c0cb 8679 (parallel [(const_int 1)])))]
8680 "TARGET_SSE2 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
8681 "@
45c0368c 8682 %vmovhpd\t{%1, %0|%0, %1}
5802c0cb 8683 unpckhpd\t%0, %0
45c0368c 8684 vunpckhpd\t{%d1, %0|%0, %d1}
6be36710 8685 #
8686 #
5802c0cb 8687 #"
d1c8b778 8688 [(set_attr "isa" "*,noavx,avx,*,*,*")
45c0368c 8689 (set_attr "type" "ssemov,sselog1,sselog1,ssemov,fmov,imov")
8690 (set (attr "prefix_data16")
8691 (if_then_else
8692 (and (eq_attr "alternative" "0")
6be3efec 8693 (not (match_test "TARGET_AVX")))
45c0368c 8694 (const_string "1")
8695 (const_string "*")))
8696 (set_attr "prefix" "maybe_vex,orig,vex,*,*,*")
8697 (set_attr "mode" "V1DF,V1DF,V2DF,DF,DF,DF")])
5802c0cb 8698
8699(define_split
abd4f58b 8700 [(set (match_operand:DF 0 "register_operand")
5802c0cb 8701 (vec_select:DF
abd4f58b 8702 (match_operand:V2DF 1 "memory_operand")
5802c0cb 8703 (parallel [(const_int 1)])))]
8704 "TARGET_SSE2 && reload_completed"
8705 [(set (match_dup 0) (match_dup 1))]
5bd1ff1d 8706 "operands[1] = adjust_address (operands[1], DFmode, 8);")
5802c0cb 8707
d1c8b778 8708(define_insn "*vec_extractv2df_1_sse"
8709 [(set (match_operand:DF 0 "nonimmediate_operand" "=m,x,x")
8710 (vec_select:DF
8711 (match_operand:V2DF 1 "nonimmediate_operand" "x,x,o")
8712 (parallel [(const_int 1)])))]
8713 "!TARGET_SSE2 && TARGET_SSE
8714 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
8715 "@
c358a059 8716 movhps\t{%1, %0|%q0, %1}
d1c8b778 8717 movhlps\t{%1, %0|%0, %1}
8718 movlps\t{%H1, %0|%0, %H1}"
8719 [(set_attr "type" "ssemov")
8c1dfa94 8720 (set_attr "ssememalign" "64")
d1c8b778 8721 (set_attr "mode" "V2SF,V4SF,V2SF")])
8722
6be36710 8723;; Avoid combining registers from different units in a single alternative,
8724;; see comment above inline_secondary_memory_needed function in i386.c
5802c0cb 8725(define_insn "sse2_storelpd"
6be36710 8726 [(set (match_operand:DF 0 "nonimmediate_operand" "=m,x,x,*f,r")
5802c0cb 8727 (vec_select:DF
6be36710 8728 (match_operand:V2DF 1 "nonimmediate_operand" " x,x,m,m,m")
5802c0cb 8729 (parallel [(const_int 0)])))]
8730 "TARGET_SSE2 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
8731 "@
ed30e0a6 8732 %vmovlpd\t{%1, %0|%0, %1}
5802c0cb 8733 #
6be36710 8734 #
8735 #
5802c0cb 8736 #"
6be36710 8737 [(set_attr "type" "ssemov,ssemov,ssemov,fmov,imov")
00a0e418 8738 (set_attr "prefix_data16" "1,*,*,*,*")
ed30e0a6 8739 (set_attr "prefix" "maybe_vex")
6be36710 8740 (set_attr "mode" "V1DF,DF,DF,DF,DF")])
5802c0cb 8741
8742(define_split
abd4f58b 8743 [(set (match_operand:DF 0 "register_operand")
5802c0cb 8744 (vec_select:DF
abd4f58b 8745 (match_operand:V2DF 1 "nonimmediate_operand")
5802c0cb 8746 (parallel [(const_int 0)])))]
8747 "TARGET_SSE2 && reload_completed"
573c5512 8748 [(set (match_dup 0) (match_dup 1))]
5802c0cb 8749{
573c5512 8750 if (REG_P (operands[1]))
8751 operands[1] = gen_rtx_REG (DFmode, REGNO (operands[1]));
ad2c46cf 8752 else
573c5512 8753 operands[1] = adjust_address (operands[1], DFmode, 0);
5802c0cb 8754})
8755
d1c8b778 8756(define_insn "*vec_extractv2df_0_sse"
8757 [(set (match_operand:DF 0 "nonimmediate_operand" "=m,x,x")
8758 (vec_select:DF
8759 (match_operand:V2DF 1 "nonimmediate_operand" "x,x,m")
8760 (parallel [(const_int 0)])))]
8761 "!TARGET_SSE2 && TARGET_SSE
8762 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
8763 "@
8764 movlps\t{%1, %0|%0, %1}
8765 movaps\t{%1, %0|%0, %1}
c358a059 8766 movlps\t{%1, %0|%0, %q1}"
d1c8b778 8767 [(set_attr "type" "ssemov")
8768 (set_attr "mode" "V2SF,V4SF,V2SF")])
8769
2485795e 8770(define_expand "sse2_loadhpd_exp"
abd4f58b 8771 [(set (match_operand:V2DF 0 "nonimmediate_operand")
7c839b3f 8772 (vec_concat:V2DF
8773 (vec_select:DF
abd4f58b 8774 (match_operand:V2DF 1 "nonimmediate_operand")
7c839b3f 8775 (parallel [(const_int 0)]))
abd4f58b 8776 (match_operand:DF 2 "nonimmediate_operand")))]
7c839b3f 8777 "TARGET_SSE2"
cc05a422 8778{
8779 rtx dst = ix86_fixup_binary_operands (UNKNOWN, V2DFmode, operands);
33541f98 8780
cc05a422 8781 emit_insn (gen_sse2_loadhpd (dst, operands[1], operands[2]));
8782
8783 /* Fix up the destination if needed. */
8784 if (dst != operands[0])
8785 emit_move_insn (operands[0], dst);
8786
8787 DONE;
8788})
7c839b3f 8789
6be36710 8790;; Avoid combining registers from different units in a single alternative,
8791;; see comment above inline_secondary_memory_needed function in i386.c
2485795e 8792(define_insn "sse2_loadhpd"
45c0368c 8793 [(set (match_operand:V2DF 0 "nonimmediate_operand"
23372c6e 8794 "=x,x,x,x,o,o ,o")
5802c0cb 8795 (vec_concat:V2DF
8796 (vec_select:DF
45c0368c 8797 (match_operand:V2DF 1 "nonimmediate_operand"
23372c6e 8798 " 0,x,0,x,0,0 ,0")
5802c0cb 8799 (parallel [(const_int 0)]))
45c0368c 8800 (match_operand:DF 2 "nonimmediate_operand"
23372c6e 8801 " m,m,x,x,x,*f,r")))]
5c752e47 8802 "TARGET_SSE2 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
5802c0cb 8803 "@
8804 movhpd\t{%2, %0|%0, %2}
45c0368c 8805 vmovhpd\t{%2, %1, %0|%0, %1, %2}
5802c0cb 8806 unpcklpd\t{%2, %0|%0, %2}
45c0368c 8807 vunpcklpd\t{%2, %1, %0|%0, %1, %2}
6be36710 8808 #
8809 #
5802c0cb 8810 #"
d1c8b778 8811 [(set_attr "isa" "noavx,avx,noavx,avx,*,*,*")
23372c6e 8812 (set_attr "type" "ssemov,ssemov,sselog,sselog,ssemov,fmov,imov")
8c1dfa94 8813 (set_attr "ssememalign" "64")
23372c6e 8814 (set_attr "prefix_data16" "1,*,*,*,*,*,*")
8815 (set_attr "prefix" "orig,vex,orig,vex,*,*,*")
8816 (set_attr "mode" "V1DF,V1DF,V2DF,V2DF,DF,DF,DF")])
5802c0cb 8817
8818(define_split
abd4f58b 8819 [(set (match_operand:V2DF 0 "memory_operand")
5802c0cb 8820 (vec_concat:V2DF
8821 (vec_select:DF (match_dup 0) (parallel [(const_int 0)]))
abd4f58b 8822 (match_operand:DF 1 "register_operand")))]
5802c0cb 8823 "TARGET_SSE2 && reload_completed"
8824 [(set (match_dup 0) (match_dup 1))]
5bd1ff1d 8825 "operands[0] = adjust_address (operands[0], DFmode, 8);")
5802c0cb 8826
2485795e 8827(define_expand "sse2_loadlpd_exp"
abd4f58b 8828 [(set (match_operand:V2DF 0 "nonimmediate_operand")
7c839b3f 8829 (vec_concat:V2DF
abd4f58b 8830 (match_operand:DF 2 "nonimmediate_operand")
7c839b3f 8831 (vec_select:DF
abd4f58b 8832 (match_operand:V2DF 1 "nonimmediate_operand")
7c839b3f 8833 (parallel [(const_int 1)]))))]
8834 "TARGET_SSE2"
cc05a422 8835{
8836 rtx dst = ix86_fixup_binary_operands (UNKNOWN, V2DFmode, operands);
33541f98 8837
cc05a422 8838 emit_insn (gen_sse2_loadlpd (dst, operands[1], operands[2]));
8839
8840 /* Fix up the destination if needed. */
8841 if (dst != operands[0])
8842 emit_move_insn (operands[0], dst);
8843
8844 DONE;
8845})
7c839b3f 8846
6be36710 8847;; Avoid combining registers from different units in a single alternative,
8848;; see comment above inline_secondary_memory_needed function in i386.c
2485795e 8849(define_insn "sse2_loadlpd"
45c0368c 8850 [(set (match_operand:V2DF 0 "nonimmediate_operand"
8851 "=x,x,x,x,x,x,x,x,m,m ,m")
5802c0cb 8852 (vec_concat:V2DF
45c0368c 8853 (match_operand:DF 2 "nonimmediate_operand"
8854 " m,m,m,x,x,0,0,x,x,*f,r")
5802c0cb 8855 (vec_select:DF
45c0368c 8856 (match_operand:V2DF 1 "vector_move_operand"
8857 " C,0,x,0,x,x,o,o,0,0 ,0")
5802c0cb 8858 (parallel [(const_int 1)]))))]
8859 "TARGET_SSE2 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
8860 "@
45c0368c 8861 %vmovsd\t{%2, %0|%0, %2}
5802c0cb 8862 movlpd\t{%2, %0|%0, %2}
45c0368c 8863 vmovlpd\t{%2, %1, %0|%0, %1, %2}
5802c0cb 8864 movsd\t{%2, %0|%0, %2}
45c0368c 8865 vmovsd\t{%2, %1, %0|%0, %1, %2}
ff6a33df 8866 shufpd\t{$2, %1, %0|%0, %1, 2}
5802c0cb 8867 movhpd\t{%H1, %0|%0, %H1}
45c0368c 8868 vmovhpd\t{%H1, %2, %0|%0, %2, %H1}
6be36710 8869 #
8870 #
5802c0cb 8871 #"
d1c8b778 8872 [(set_attr "isa" "*,noavx,avx,noavx,avx,noavx,noavx,avx,*,*,*")
8873 (set (attr "type")
8874 (cond [(eq_attr "alternative" "5")
8875 (const_string "sselog")
8876 (eq_attr "alternative" "9")
8877 (const_string "fmov")
8878 (eq_attr "alternative" "10")
8879 (const_string "imov")
8880 ]
8881 (const_string "ssemov")))
8c1dfa94 8882 (set_attr "ssememalign" "64")
45c0368c 8883 (set_attr "prefix_data16" "*,1,*,*,*,*,1,*,*,*,*")
8884 (set_attr "length_immediate" "*,*,*,*,*,1,*,*,*,*,*")
8885 (set_attr "prefix" "maybe_vex,orig,vex,orig,vex,orig,orig,vex,*,*,*")
8886 (set_attr "mode" "DF,V1DF,V1DF,V1DF,V1DF,V2DF,V1DF,V1DF,DF,DF,DF")])
5802c0cb 8887
8888(define_split
abd4f58b 8889 [(set (match_operand:V2DF 0 "memory_operand")
5802c0cb 8890 (vec_concat:V2DF
abd4f58b 8891 (match_operand:DF 1 "register_operand")
5802c0cb 8892 (vec_select:DF (match_dup 0) (parallel [(const_int 1)]))))]
8893 "TARGET_SSE2 && reload_completed"
8894 [(set (match_dup 0) (match_dup 1))]
9af8c7c5 8895 "operands[0] = adjust_address (operands[0], DFmode, 0);")
5802c0cb 8896
5802c0cb 8897(define_insn "sse2_movsd"
45c0368c 8898 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,x,x,m,x,x,x,o")
5802c0cb 8899 (vec_merge:V2DF
45c0368c 8900 (match_operand:V2DF 2 "nonimmediate_operand" " x,x,m,m,x,0,0,x,0")
8901 (match_operand:V2DF 1 "nonimmediate_operand" " 0,x,0,x,0,x,o,o,x")
5802c0cb 8902 (const_int 1)))]
8903 "TARGET_SSE2"
8904 "@
8905 movsd\t{%2, %0|%0, %2}
45c0368c 8906 vmovsd\t{%2, %1, %0|%0, %1, %2}
c358a059 8907 movlpd\t{%2, %0|%0, %q2}
8908 vmovlpd\t{%2, %1, %0|%0, %1, %q2}
8909 %vmovlpd\t{%2, %0|%q0, %2}
ff6a33df 8910 shufpd\t{$2, %1, %0|%0, %1, 2}
b2266391 8911 movhps\t{%H1, %0|%0, %H1}
45c0368c 8912 vmovhps\t{%H1, %2, %0|%0, %2, %H1}
8913 %vmovhps\t{%1, %H0|%H0, %1}"
d1c8b778 8914 [(set_attr "isa" "noavx,avx,noavx,avx,*,noavx,noavx,avx,*")
8915 (set (attr "type")
8916 (if_then_else
8917 (eq_attr "alternative" "5")
8918 (const_string "sselog")
8919 (const_string "ssemov")))
45c0368c 8920 (set (attr "prefix_data16")
8921 (if_then_else
8922 (and (eq_attr "alternative" "2,4")
6be3efec 8923 (not (match_test "TARGET_AVX")))
45c0368c 8924 (const_string "1")
8925 (const_string "*")))
8926 (set_attr "length_immediate" "*,*,*,*,*,1,*,*,*")
8c1dfa94 8927 (set_attr "ssememalign" "64")
45c0368c 8928 (set_attr "prefix" "orig,vex,orig,vex,maybe_vex,orig,orig,vex,maybe_vex")
8929 (set_attr "mode" "DF,DF,V1DF,V1DF,V1DF,V2DF,V1DF,V1DF,V1DF")])
5802c0cb 8930
adea432f 8931(define_insn "vec_dupv2df<mask_name>"
b2eda4e9 8932 [(set (match_operand:V2DF 0 "register_operand" "=x,x,v")
a17124a0 8933 (vec_duplicate:V2DF
b2eda4e9 8934 (match_operand:DF 1 "nonimmediate_operand" " 0,xm,vm")))]
adea432f 8935 "TARGET_SSE2 && <mask_avx512vl_condition>"
eea5ff47 8936 "@
8937 unpcklpd\t%0, %0
b2eda4e9 8938 %vmovddup\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}
8939 vmovddup\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
8940 [(set_attr "isa" "noavx,sse3,avx512vl")
eea5ff47 8941 (set_attr "type" "sselog1")
b2eda4e9 8942 (set_attr "prefix" "orig,maybe_vex,evex")
8943 (set_attr "mode" "V2DF,DF,DF")])
ad2c46cf 8944
ad2c46cf 8945(define_insn "*vec_concatv2df"
b2eda4e9 8946 [(set (match_operand:V2DF 0 "register_operand" "=x,x,v,x,v,x,x,v,x,x")
ad2c46cf 8947 (vec_concat:V2DF
b2eda4e9 8948 (match_operand:DF 1 "nonimmediate_operand" " 0,x,v,m,m,0,x,m,0,0")
8949 (match_operand:DF 2 "vector_move_operand" " x,x,v,1,1,m,m,C,x,m")))]
0a281fd0 8950 "TARGET_SSE
8951 && (!(MEM_P (operands[1]) && MEM_P (operands[2]))
8952 || (TARGET_SSE3 && rtx_equal_p (operands[1], operands[2])))"
ad2c46cf 8953 "@
8954 unpcklpd\t{%2, %0|%0, %2}
45c0368c 8955 vunpcklpd\t{%2, %1, %0|%0, %1, %2}
b2eda4e9 8956 vunpcklpd\t{%2, %1, %0|%0, %1, %2}
eea5ff47 8957 %vmovddup\t{%1, %0|%0, %1}
b2eda4e9 8958 vmovddup\t{%1, %0|%0, %1}
ad2c46cf 8959 movhpd\t{%2, %0|%0, %2}
45c0368c 8960 vmovhpd\t{%2, %1, %0|%0, %1, %2}
8961 %vmovsd\t{%1, %0|%0, %1}
ad2c46cf 8962 movlhps\t{%2, %0|%0, %2}
8963 movhps\t{%2, %0|%0, %2}"
b2eda4e9 8964 [(set_attr "isa" "sse2_noavx,avx,avx512vl,sse3,avx512vl,sse2_noavx,avx,sse2,noavx,noavx")
d1c8b778 8965 (set (attr "type")
8966 (if_then_else
7bb3b827 8967 (eq_attr "alternative" "0,1,2,3,4")
d1c8b778 8968 (const_string "sselog")
8969 (const_string "ssemov")))
b2eda4e9 8970 (set (attr "prefix_data16")
8971 (if_then_else (eq_attr "alternative" "5")
8972 (const_string "1")
8973 (const_string "*")))
8974 (set_attr "prefix" "orig,vex,evex,maybe_vex,evex,orig,vex,maybe_vex,orig,orig")
8975 (set_attr "mode" "V2DF,V2DF,V2DF, DF, DF, V1DF,V1DF,DF,V4SF,V2SF")])
ad2c46cf 8976
697a43f8 8977;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
8978;;
8979;; Parallel integer down-conversion operations
8980;;
8981;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
8982
da982d5c 8983(define_mode_iterator PMOV_DST_MODE_1 [V16QI V16HI V8SI V8HI])
697a43f8 8984(define_mode_attr pmov_src_mode
8985 [(V16QI "V16SI") (V16HI "V16SI") (V8SI "V8DI") (V8HI "V8DI")])
8986(define_mode_attr pmov_src_lower
8987 [(V16QI "v16si") (V16HI "v16si") (V8SI "v8di") (V8HI "v8di")])
da982d5c 8988(define_mode_attr pmov_suff_1
697a43f8 8989 [(V16QI "db") (V16HI "dw") (V8SI "qd") (V8HI "qw")])
8990
8991(define_insn "*avx512f_<code><pmov_src_lower><mode>2"
da982d5c 8992 [(set (match_operand:PMOV_DST_MODE_1 0 "nonimmediate_operand" "=v,m")
8993 (any_truncate:PMOV_DST_MODE_1
697a43f8 8994 (match_operand:<pmov_src_mode> 1 "register_operand" "v,v")))]
8995 "TARGET_AVX512F"
da982d5c 8996 "vpmov<trunsuffix><pmov_suff_1>\t{%1, %0|%0, %1}"
697a43f8 8997 [(set_attr "type" "ssemov")
8998 (set_attr "memory" "none,store")
8999 (set_attr "prefix" "evex")
9000 (set_attr "mode" "<sseinsnmode>")])
9001
5220cab6 9002(define_insn "avx512f_<code><pmov_src_lower><mode>2_mask"
da982d5c 9003 [(set (match_operand:PMOV_DST_MODE_1 0 "nonimmediate_operand" "=v,m")
9004 (vec_merge:PMOV_DST_MODE_1
9005 (any_truncate:PMOV_DST_MODE_1
5220cab6 9006 (match_operand:<pmov_src_mode> 1 "register_operand" "v,v"))
da982d5c 9007 (match_operand:PMOV_DST_MODE_1 2 "vector_move_operand" "0C,0")
a31e7f46 9008 (match_operand:<avx512fmaskmode> 3 "register_operand" "Yk,Yk")))]
5220cab6 9009 "TARGET_AVX512F"
da982d5c 9010 "vpmov<trunsuffix><pmov_suff_1>\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}"
5220cab6 9011 [(set_attr "type" "ssemov")
9012 (set_attr "memory" "none,store")
9013 (set_attr "prefix" "evex")
9014 (set_attr "mode" "<sseinsnmode>")])
9015
f4a19f2a 9016(define_expand "avx512f_<code><pmov_src_lower><mode>2_mask_store"
da982d5c 9017 [(set (match_operand:PMOV_DST_MODE_1 0 "memory_operand")
9018 (vec_merge:PMOV_DST_MODE_1
9019 (any_truncate:PMOV_DST_MODE_1
f4a19f2a 9020 (match_operand:<pmov_src_mode> 1 "register_operand"))
9021 (match_dup 0)
9022 (match_operand:<avx512fmaskmode> 2 "register_operand")))]
9023 "TARGET_AVX512F")
9024
8f83f53e 9025(define_insn "avx512bw_<code>v32hiv32qi2"
da982d5c 9026 [(set (match_operand:V32QI 0 "nonimmediate_operand" "=v,m")
9027 (any_truncate:V32QI
9028 (match_operand:V32HI 1 "register_operand" "v,v")))]
9029 "TARGET_AVX512BW"
9030 "vpmov<trunsuffix>wb\t{%1, %0|%0, %1}"
9031 [(set_attr "type" "ssemov")
9032 (set_attr "memory" "none,store")
9033 (set_attr "prefix" "evex")
9034 (set_attr "mode" "XI")])
9035
9036(define_insn "avx512bw_<code>v32hiv32qi2_mask"
9037 [(set (match_operand:V32QI 0 "nonimmediate_operand" "=v,m")
9038 (vec_merge:V32QI
9039 (any_truncate:V32QI
9040 (match_operand:V32HI 1 "register_operand" "v,v"))
9041 (match_operand:V32QI 2 "vector_move_operand" "0C,0")
9042 (match_operand:SI 3 "register_operand" "Yk,Yk")))]
9043 "TARGET_AVX512BW"
9044 "vpmov<trunsuffix>wb\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}"
9045 [(set_attr "type" "ssemov")
9046 (set_attr "memory" "none,store")
9047 (set_attr "prefix" "evex")
9048 (set_attr "mode" "XI")])
9049
9050(define_expand "avx512bw_<code>v32hiv32qi2_mask_store"
9051 [(set (match_operand:V32QI 0 "nonimmediate_operand")
9052 (vec_merge:V32QI
9053 (any_truncate:V32QI
9054 (match_operand:V32HI 1 "register_operand"))
9055 (match_dup 0)
9056 (match_operand:SI 2 "register_operand")))]
9057 "TARGET_AVX512BW")
9058
9059(define_mode_iterator PMOV_DST_MODE_2
9060 [V4SI V8HI (V16QI "TARGET_AVX512BW")])
9061(define_mode_attr pmov_suff_2
9062 [(V16QI "wb") (V8HI "dw") (V4SI "qd")])
9063
9064(define_insn "*avx512vl_<code><ssedoublemodelower><mode>2"
9065 [(set (match_operand:PMOV_DST_MODE_2 0 "nonimmediate_operand" "=v,m")
9066 (any_truncate:PMOV_DST_MODE_2
9067 (match_operand:<ssedoublemode> 1 "register_operand" "v,v")))]
9068 "TARGET_AVX512VL"
9069 "vpmov<trunsuffix><pmov_suff_2>\t{%1, %0|%0, %1}"
9070 [(set_attr "type" "ssemov")
9071 (set_attr "memory" "none,store")
9072 (set_attr "prefix" "evex")
9073 (set_attr "mode" "<sseinsnmode>")])
9074
9075(define_insn "<avx512>_<code><ssedoublemodelower><mode>2_mask"
9076 [(set (match_operand:PMOV_DST_MODE_2 0 "nonimmediate_operand" "=v,m")
9077 (vec_merge:PMOV_DST_MODE_2
9078 (any_truncate:PMOV_DST_MODE_2
9079 (match_operand:<ssedoublemode> 1 "register_operand" "v,v"))
9080 (match_operand:PMOV_DST_MODE_2 2 "vector_move_operand" "0C,0")
9081 (match_operand:<avx512fmaskmode> 3 "register_operand" "Yk,Yk")))]
9082 "TARGET_AVX512VL"
9083 "vpmov<trunsuffix><pmov_suff_2>\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}"
9084 [(set_attr "type" "ssemov")
9085 (set_attr "memory" "none,store")
9086 (set_attr "prefix" "evex")
9087 (set_attr "mode" "<sseinsnmode>")])
9088
9089(define_expand "<avx512>_<code><ssedoublemodelower><mode>2_mask_store"
9090 [(set (match_operand:PMOV_DST_MODE_2 0 "nonimmediate_operand")
9091 (vec_merge:PMOV_DST_MODE_2
9092 (any_truncate:PMOV_DST_MODE_2
9093 (match_operand:<ssedoublemode> 1 "register_operand"))
9094 (match_dup 0)
9095 (match_operand:<avx512fmaskmode> 2 "register_operand")))]
9096 "TARGET_AVX512VL")
9097
9098(define_mode_iterator PMOV_SRC_MODE_3 [V4DI V2DI V8SI V4SI (V8HI "TARGET_AVX512BW")])
9099(define_mode_attr pmov_dst_3
9100 [(V4DI "V4QI") (V2DI "V2QI") (V8SI "V8QI") (V4SI "V4QI") (V8HI "V8QI")])
9101(define_mode_attr pmov_dst_zeroed_3
9102 [(V4DI "V12QI") (V2DI "V14QI") (V8SI "V8QI") (V4SI "V12QI") (V8HI "V8QI")])
9103(define_mode_attr pmov_suff_3
9104 [(V4DI "qb") (V2DI "qb") (V8SI "db") (V4SI "db") (V8HI "wb")])
9105
9106(define_insn "*avx512vl_<code><mode>v<ssescalarnum>qi2"
9107 [(set (match_operand:V16QI 0 "register_operand" "=v")
9108 (vec_concat:V16QI
9109 (any_truncate:<pmov_dst_3>
9110 (match_operand:PMOV_SRC_MODE_3 1 "register_operand" "v"))
9111 (match_operand:<pmov_dst_zeroed_3> 2 "const0_operand")))]
9112 "TARGET_AVX512VL"
9113 "vpmov<trunsuffix><pmov_suff_3>\t{%1, %0|%0, %1}"
9114 [(set_attr "type" "ssemov")
9115 (set_attr "prefix" "evex")
9116 (set_attr "mode" "TI")])
9117
9118(define_insn "*avx512vl_<code>v2div2qi2_store"
9119 [(set (match_operand:V16QI 0 "memory_operand" "=m")
9120 (vec_concat:V16QI
9121 (any_truncate:V2QI
9122 (match_operand:V2DI 1 "register_operand" "v"))
9123 (vec_select:V14QI
9124 (match_dup 0)
9125 (parallel [(const_int 2) (const_int 3)
9126 (const_int 4) (const_int 5)
9127 (const_int 6) (const_int 7)
9128 (const_int 8) (const_int 9)
9129 (const_int 10) (const_int 11)
9130 (const_int 12) (const_int 13)
9131 (const_int 14) (const_int 15)]))))]
9132 "TARGET_AVX512VL"
9133 "vpmov<trunsuffix>qb\t{%1, %0|%0, %1}"
9134 [(set_attr "type" "ssemov")
9135 (set_attr "memory" "store")
9136 (set_attr "prefix" "evex")
9137 (set_attr "mode" "TI")])
9138
9139(define_insn "avx512vl_<code>v2div2qi2_mask"
9140 [(set (match_operand:V16QI 0 "register_operand" "=v")
9141 (vec_concat:V16QI
9142 (vec_merge:V2QI
9143 (any_truncate:V2QI
9144 (match_operand:V2DI 1 "register_operand" "v"))
9145 (vec_select:V2QI
9146 (match_operand:V16QI 2 "vector_move_operand" "0C")
9147 (parallel [(const_int 0) (const_int 1)]))
9148 (match_operand:QI 3 "register_operand" "Yk"))
9149 (const_vector:V14QI [(const_int 0) (const_int 0)
9150 (const_int 0) (const_int 0)
9151 (const_int 0) (const_int 0)
9152 (const_int 0) (const_int 0)
9153 (const_int 0) (const_int 0)
9154 (const_int 0) (const_int 0)
9155 (const_int 0) (const_int 0)])))]
9156 "TARGET_AVX512VL"
9157 "vpmov<trunsuffix>qb\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}"
9158 [(set_attr "type" "ssemov")
9159 (set_attr "prefix" "evex")
9160 (set_attr "mode" "TI")])
9161
9162(define_insn "avx512vl_<code>v2div2qi2_mask_store"
9163 [(set (match_operand:V16QI 0 "memory_operand" "=m")
9164 (vec_concat:V16QI
9165 (vec_merge:V2QI
9166 (any_truncate:V2QI
9167 (match_operand:V2DI 1 "register_operand" "v"))
9168 (vec_select:V2QI
9169 (match_dup 0)
9170 (parallel [(const_int 0) (const_int 1)]))
9171 (match_operand:QI 2 "register_operand" "Yk"))
9172 (vec_select:V14QI
9173 (match_dup 0)
9174 (parallel [(const_int 2) (const_int 3)
9175 (const_int 4) (const_int 5)
9176 (const_int 6) (const_int 7)
9177 (const_int 8) (const_int 9)
9178 (const_int 10) (const_int 11)
9179 (const_int 12) (const_int 13)
9180 (const_int 14) (const_int 15)]))))]
9181 "TARGET_AVX512VL"
9182 "vpmov<trunsuffix>qb\t{%1, %0%{%2%}|%0%{%2%}, %1}"
9183 [(set_attr "type" "ssemov")
9184 (set_attr "memory" "store")
9185 (set_attr "prefix" "evex")
9186 (set_attr "mode" "TI")])
9187
9188(define_insn "*avx512vl_<code><mode>v4qi2_store"
9189 [(set (match_operand:V16QI 0 "memory_operand" "=m")
9190 (vec_concat:V16QI
9191 (any_truncate:V4QI
9192 (match_operand:VI4_128_8_256 1 "register_operand" "v"))
9193 (vec_select:V12QI
9194 (match_dup 0)
9195 (parallel [(const_int 4) (const_int 5)
9196 (const_int 6) (const_int 7)
9197 (const_int 8) (const_int 9)
9198 (const_int 10) (const_int 11)
9199 (const_int 12) (const_int 13)
9200 (const_int 14) (const_int 15)]))))]
9201 "TARGET_AVX512VL"
9202 "vpmov<trunsuffix><pmov_suff_3>\t{%1, %0|%0, %1}"
9203 [(set_attr "type" "ssemov")
9204 (set_attr "memory" "store")
9205 (set_attr "prefix" "evex")
9206 (set_attr "mode" "TI")])
9207
9208(define_insn "avx512vl_<code><mode>v4qi2_mask"
9209 [(set (match_operand:V16QI 0 "register_operand" "=v")
9210 (vec_concat:V16QI
9211 (vec_merge:V4QI
9212 (any_truncate:V4QI
9213 (match_operand:VI4_128_8_256 1 "register_operand" "v"))
9214 (vec_select:V4QI
9215 (match_operand:V16QI 2 "vector_move_operand" "0C")
9216 (parallel [(const_int 0) (const_int 1)
9217 (const_int 2) (const_int 3)]))
9218 (match_operand:QI 3 "register_operand" "Yk"))
9219 (const_vector:V12QI [(const_int 0) (const_int 0)
9220 (const_int 0) (const_int 0)
9221 (const_int 0) (const_int 0)
9222 (const_int 0) (const_int 0)
9223 (const_int 0) (const_int 0)
9224 (const_int 0) (const_int 0)])))]
9225 "TARGET_AVX512VL"
9226 "vpmov<trunsuffix><pmov_suff_3>\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}"
9227 [(set_attr "type" "ssemov")
9228 (set_attr "prefix" "evex")
9229 (set_attr "mode" "TI")])
9230
9231(define_insn "avx512vl_<code><mode>v4qi2_mask_store"
9232 [(set (match_operand:V16QI 0 "memory_operand" "=m")
9233 (vec_concat:V16QI
9234 (vec_merge:V4QI
9235 (any_truncate:V4QI
9236 (match_operand:VI4_128_8_256 1 "register_operand" "v"))
9237 (vec_select:V4QI
9238 (match_dup 0)
9239 (parallel [(const_int 0) (const_int 1)
9240 (const_int 2) (const_int 3)]))
9241 (match_operand:QI 2 "register_operand" "Yk"))
9242 (vec_select:V12QI
9243 (match_dup 0)
9244 (parallel [(const_int 4) (const_int 5)
9245 (const_int 6) (const_int 7)
9246 (const_int 8) (const_int 9)
9247 (const_int 10) (const_int 11)
9248 (const_int 12) (const_int 13)
9249 (const_int 14) (const_int 15)]))))]
9250 "TARGET_AVX512VL"
9251 "vpmov<trunsuffix><pmov_suff_3>\t{%1, %0%{%2%}|%0%{%2%}, %1}"
9252 [(set_attr "type" "ssemov")
9253 (set_attr "memory" "store")
9254 (set_attr "prefix" "evex")
9255 (set_attr "mode" "TI")])
9256
9257(define_mode_iterator VI2_128_BW_4_256
9258 [(V8HI "TARGET_AVX512BW") V8SI])
9259
9260(define_insn "*avx512vl_<code><mode>v8qi2_store"
9261 [(set (match_operand:V16QI 0 "memory_operand" "=m")
9262 (vec_concat:V16QI
9263 (any_truncate:V8QI
9264 (match_operand:VI2_128_BW_4_256 1 "register_operand" "v"))
9265 (vec_select:V8QI
9266 (match_dup 0)
9267 (parallel [(const_int 8) (const_int 9)
9268 (const_int 10) (const_int 11)
9269 (const_int 12) (const_int 13)
9270 (const_int 14) (const_int 15)]))))]
9271 "TARGET_AVX512VL"
9272 "vpmov<trunsuffix><pmov_suff_3>\t{%1, %0|%0, %1}"
9273 [(set_attr "type" "ssemov")
9274 (set_attr "memory" "store")
9275 (set_attr "prefix" "evex")
9276 (set_attr "mode" "TI")])
9277
9278(define_insn "avx512vl_<code><mode>v8qi2_mask"
9279 [(set (match_operand:V16QI 0 "register_operand" "=v")
9280 (vec_concat:V16QI
9281 (vec_merge:V8QI
9282 (any_truncate:V8QI
9283 (match_operand:VI2_128_BW_4_256 1 "register_operand" "v"))
9284 (vec_select:V8QI
9285 (match_operand:V16QI 2 "vector_move_operand" "0C")
9286 (parallel [(const_int 0) (const_int 1)
9287 (const_int 2) (const_int 3)
9288 (const_int 4) (const_int 5)
9289 (const_int 6) (const_int 7)]))
9290 (match_operand:QI 3 "register_operand" "Yk"))
9291 (const_vector:V8QI [(const_int 0) (const_int 0)
9292 (const_int 0) (const_int 0)
9293 (const_int 0) (const_int 0)
9294 (const_int 0) (const_int 0)])))]
9295 "TARGET_AVX512VL"
9296 "vpmov<trunsuffix><pmov_suff_3>\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}"
9297 [(set_attr "type" "ssemov")
9298 (set_attr "prefix" "evex")
9299 (set_attr "mode" "TI")])
9300
9301(define_insn "avx512vl_<code><mode>v8qi2_mask_store"
9302 [(set (match_operand:V16QI 0 "memory_operand" "=m")
9303 (vec_concat:V16QI
9304 (vec_merge:V8QI
9305 (any_truncate:V8QI
9306 (match_operand:VI2_128_BW_4_256 1 "register_operand" "v"))
9307 (vec_select:V8QI
9308 (match_dup 0)
9309 (parallel [(const_int 0) (const_int 1)
9310 (const_int 2) (const_int 3)
9311 (const_int 4) (const_int 5)
9312 (const_int 6) (const_int 7)]))
9313 (match_operand:QI 2 "register_operand" "Yk"))
9314 (vec_select:V8QI
9315 (match_dup 0)
9316 (parallel [(const_int 8) (const_int 9)
9317 (const_int 10) (const_int 11)
9318 (const_int 12) (const_int 13)
9319 (const_int 14) (const_int 15)]))))]
9320 "TARGET_AVX512VL"
9321 "vpmov<trunsuffix><pmov_suff_3>\t{%1, %0%{%2%}|%0%{%2%}, %1}"
9322 [(set_attr "type" "ssemov")
9323 (set_attr "memory" "store")
9324 (set_attr "prefix" "evex")
9325 (set_attr "mode" "TI")])
9326
9327(define_mode_iterator PMOV_SRC_MODE_4 [V4DI V2DI V4SI])
9328(define_mode_attr pmov_dst_4
9329 [(V4DI "V4HI") (V2DI "V2HI") (V4SI "V4HI")])
9330(define_mode_attr pmov_dst_zeroed_4
9331 [(V4DI "V4HI") (V2DI "V6HI") (V4SI "V4HI")])
9332(define_mode_attr pmov_suff_4
9333 [(V4DI "qw") (V2DI "qw") (V4SI "dw")])
9334
9335(define_insn "*avx512vl_<code><mode>v<ssescalarnum>hi2"
9336 [(set (match_operand:V8HI 0 "register_operand" "=v")
9337 (vec_concat:V8HI
9338 (any_truncate:<pmov_dst_4>
9339 (match_operand:PMOV_SRC_MODE_4 1 "register_operand" "v"))
9340 (match_operand:<pmov_dst_zeroed_4> 2 "const0_operand")))]
9341 "TARGET_AVX512VL"
9342 "vpmov<trunsuffix><pmov_suff_4>\t{%1, %0|%0, %1}"
9343 [(set_attr "type" "ssemov")
9344 (set_attr "prefix" "evex")
9345 (set_attr "mode" "TI")])
9346
9347(define_insn "*avx512vl_<code><mode>v4hi2_store"
9348 [(set (match_operand:V8HI 0 "memory_operand" "=m")
9349 (vec_concat:V8HI
9350 (any_truncate:V4HI
9351 (match_operand:VI4_128_8_256 1 "register_operand" "v"))
9352 (vec_select:V4HI
9353 (match_dup 0)
9354 (parallel [(const_int 4) (const_int 5)
9355 (const_int 6) (const_int 7)]))))]
9356 "TARGET_AVX512VL"
9357 "vpmov<trunsuffix><pmov_suff_4>\t{%1, %0|%0, %1}"
9358 [(set_attr "type" "ssemov")
9359 (set_attr "memory" "store")
9360 (set_attr "prefix" "evex")
9361 (set_attr "mode" "TI")])
9362
9363(define_insn "avx512vl_<code><mode>v4hi2_mask"
9364 [(set (match_operand:V8HI 0 "register_operand" "=v")
9365 (vec_concat:V8HI
9366 (vec_merge:V4HI
9367 (any_truncate:V4HI
9368 (match_operand:VI4_128_8_256 1 "register_operand" "v"))
9369 (vec_select:V4HI
9370 (match_operand:V8HI 2 "vector_move_operand" "0C")
9371 (parallel [(const_int 0) (const_int 1)
9372 (const_int 2) (const_int 3)]))
9373 (match_operand:QI 3 "register_operand" "Yk"))
9374 (const_vector:V4HI [(const_int 0) (const_int 0)
9375 (const_int 0) (const_int 0)])))]
9376 "TARGET_AVX512VL"
9377 "vpmov<trunsuffix><pmov_suff_4>\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}"
9378 [(set_attr "type" "ssemov")
9379 (set_attr "prefix" "evex")
9380 (set_attr "mode" "TI")])
9381
9382(define_insn "avx512vl_<code><mode>v4hi2_mask_store"
9383 [(set (match_operand:V8HI 0 "memory_operand" "=m")
9384 (vec_concat:V8HI
9385 (vec_merge:V4HI
9386 (any_truncate:V4HI
9387 (match_operand:VI4_128_8_256 1 "register_operand" "v"))
9388 (vec_select:V4HI
9389 (match_dup 0)
9390 (parallel [(const_int 0) (const_int 1)
9391 (const_int 2) (const_int 3)]))
9392 (match_operand:QI 2 "register_operand" "Yk"))
9393 (vec_select:V4HI
9394 (match_dup 0)
9395 (parallel [(const_int 4) (const_int 5)
9396 (const_int 6) (const_int 7)]))))]
9397 "TARGET_AVX512VL"
9398 "vpmov<trunsuffix><pmov_suff_4>\t{%1, %0%{%2%}|%0%{%2%}, %1}"
9399 [(set_attr "type" "ssemov")
9400 (set_attr "memory" "store")
9401 (set_attr "prefix" "evex")
9402 (set_attr "mode" "TI")])
9403
9404(define_insn "*avx512vl_<code>v2div2hi2_store"
9405 [(set (match_operand:V8HI 0 "memory_operand" "=m")
9406 (vec_concat:V8HI
9407 (any_truncate:V2HI
9408 (match_operand:V2DI 1 "register_operand" "v"))
9409 (vec_select:V6HI
9410 (match_dup 0)
9411 (parallel [(const_int 2) (const_int 3)
9412 (const_int 4) (const_int 5)
9413 (const_int 6) (const_int 7)]))))]
9414 "TARGET_AVX512VL"
9415 "vpmov<trunsuffix>qw\t{%1, %0|%0, %1}"
9416 [(set_attr "type" "ssemov")
9417 (set_attr "memory" "store")
9418 (set_attr "prefix" "evex")
9419 (set_attr "mode" "TI")])
9420
9421(define_insn "avx512vl_<code>v2div2hi2_mask"
9422 [(set (match_operand:V8HI 0 "register_operand" "=v")
9423 (vec_concat:V8HI
9424 (vec_merge:V2HI
9425 (any_truncate:V2HI
9426 (match_operand:V2DI 1 "register_operand" "v"))
9427 (vec_select:V2HI
9428 (match_operand:V8HI 2 "vector_move_operand" "0C")
9429 (parallel [(const_int 0) (const_int 1)]))
9430 (match_operand:QI 3 "register_operand" "Yk"))
9431 (const_vector:V6HI [(const_int 0) (const_int 0)
9432 (const_int 0) (const_int 0)
9433 (const_int 0) (const_int 0)])))]
9434 "TARGET_AVX512VL"
9435 "vpmov<trunsuffix>qw\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}"
9436 [(set_attr "type" "ssemov")
9437 (set_attr "prefix" "evex")
9438 (set_attr "mode" "TI")])
9439
9440(define_insn "avx512vl_<code>v2div2hi2_mask_store"
9441 [(set (match_operand:V8HI 0 "memory_operand" "=m")
9442 (vec_concat:V8HI
9443 (vec_merge:V2HI
9444 (any_truncate:V2HI
9445 (match_operand:V2DI 1 "register_operand" "v"))
9446 (vec_select:V2HI
9447 (match_dup 0)
9448 (parallel [(const_int 0) (const_int 1)]))
9449 (match_operand:QI 2 "register_operand" "Yk"))
9450 (vec_select:V6HI
9451 (match_dup 0)
9452 (parallel [(const_int 2) (const_int 3)
9453 (const_int 4) (const_int 5)
9454 (const_int 6) (const_int 7)]))))]
9455 "TARGET_AVX512VL"
9456 "vpmov<trunsuffix>qw\t{%1, %0%{%2%}|%0%{%2%}, %1}"
9457 [(set_attr "type" "ssemov")
9458 (set_attr "memory" "store")
9459 (set_attr "prefix" "evex")
9460 (set_attr "mode" "TI")])
9461
9462(define_insn "*avx512vl_<code>v2div2si2"
9463 [(set (match_operand:V4SI 0 "register_operand" "=v")
9464 (vec_concat:V4SI
9465 (any_truncate:V2SI
9466 (match_operand:V2DI 1 "register_operand" "v"))
9467 (match_operand:V2SI 2 "const0_operand")))]
9468 "TARGET_AVX512VL"
9469 "vpmov<trunsuffix>qd\t{%1, %0|%0, %1}"
9470 [(set_attr "type" "ssemov")
9471 (set_attr "prefix" "evex")
9472 (set_attr "mode" "TI")])
9473
9474(define_insn "*avx512vl_<code>v2div2si2_store"
9475 [(set (match_operand:V4SI 0 "memory_operand" "=m")
9476 (vec_concat:V4SI
9477 (any_truncate:V2SI
9478 (match_operand:V2DI 1 "register_operand" "v"))
9479 (vec_select:V2SI
9480 (match_dup 0)
9481 (parallel [(const_int 2) (const_int 3)]))))]
9482 "TARGET_AVX512VL"
9483 "vpmov<trunsuffix>qd\t{%1, %0|%0, %1}"
9484 [(set_attr "type" "ssemov")
9485 (set_attr "memory" "store")
9486 (set_attr "prefix" "evex")
9487 (set_attr "mode" "TI")])
9488
9489(define_insn "avx512vl_<code>v2div2si2_mask"
9490 [(set (match_operand:V4SI 0 "register_operand" "=v")
9491 (vec_concat:V4SI
9492 (vec_merge:V2SI
9493 (any_truncate:V2SI
9494 (match_operand:V2DI 1 "register_operand" "v"))
9495 (vec_select:V2SI
9496 (match_operand:V4SI 2 "vector_move_operand" "0C")
9497 (parallel [(const_int 0) (const_int 1)]))
9498 (match_operand:QI 3 "register_operand" "Yk"))
9499 (const_vector:V2SI [(const_int 0) (const_int 0)])))]
9500 "TARGET_AVX512VL"
9501 "vpmov<trunsuffix>qd\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}"
9502 [(set_attr "type" "ssemov")
9503 (set_attr "prefix" "evex")
9504 (set_attr "mode" "TI")])
9505
9506(define_insn "avx512vl_<code>v2div2si2_mask_store"
9507 [(set (match_operand:V4SI 0 "memory_operand" "=m")
9508 (vec_concat:V4SI
9509 (vec_merge:V2SI
9510 (any_truncate:V2SI
9511 (match_operand:V2DI 1 "register_operand" "v"))
9512 (vec_select:V2SI
9513 (match_dup 0)
9514 (parallel [(const_int 0) (const_int 1)]))
9515 (match_operand:QI 2 "register_operand" "Yk"))
9516 (vec_select:V2SI
9517 (match_dup 0)
9518 (parallel [(const_int 2) (const_int 3)]))))]
9519 "TARGET_AVX512VL"
9520 "vpmov<trunsuffix>qd\t{%1, %0%{%2%}|%0%{%2%}, %1}"
9521 [(set_attr "type" "ssemov")
9522 (set_attr "memory" "store")
9523 (set_attr "prefix" "evex")
9524 (set_attr "mode" "TI")])
9525
697a43f8 9526(define_insn "*avx512f_<code>v8div16qi2"
9527 [(set (match_operand:V16QI 0 "register_operand" "=v")
9528 (vec_concat:V16QI
9529 (any_truncate:V8QI
9530 (match_operand:V8DI 1 "register_operand" "v"))
9531 (const_vector:V8QI [(const_int 0) (const_int 0)
9532 (const_int 0) (const_int 0)
9533 (const_int 0) (const_int 0)
9534 (const_int 0) (const_int 0)])))]
9535 "TARGET_AVX512F"
9536 "vpmov<trunsuffix>qb\t{%1, %0|%0, %1}"
9537 [(set_attr "type" "ssemov")
9538 (set_attr "prefix" "evex")
9539 (set_attr "mode" "TI")])
9540
9541(define_insn "*avx512f_<code>v8div16qi2_store"
9542 [(set (match_operand:V16QI 0 "memory_operand" "=m")
9543 (vec_concat:V16QI
9544 (any_truncate:V8QI
9545 (match_operand:V8DI 1 "register_operand" "v"))
9546 (vec_select:V8QI
9547 (match_dup 0)
9548 (parallel [(const_int 8) (const_int 9)
9549 (const_int 10) (const_int 11)
9550 (const_int 12) (const_int 13)
9551 (const_int 14) (const_int 15)]))))]
9552 "TARGET_AVX512F"
9553 "vpmov<trunsuffix>qb\t{%1, %0|%0, %1}"
9554 [(set_attr "type" "ssemov")
9555 (set_attr "memory" "store")
9556 (set_attr "prefix" "evex")
9557 (set_attr "mode" "TI")])
9558
5220cab6 9559(define_insn "avx512f_<code>v8div16qi2_mask"
9560 [(set (match_operand:V16QI 0 "register_operand" "=v")
9561 (vec_concat:V16QI
9562 (vec_merge:V8QI
9563 (any_truncate:V8QI
9564 (match_operand:V8DI 1 "register_operand" "v"))
9565 (vec_select:V8QI
9566 (match_operand:V16QI 2 "vector_move_operand" "0C")
9567 (parallel [(const_int 0) (const_int 1)
9568 (const_int 2) (const_int 3)
9569 (const_int 4) (const_int 5)
9570 (const_int 6) (const_int 7)]))
a31e7f46 9571 (match_operand:QI 3 "register_operand" "Yk"))
5220cab6 9572 (const_vector:V8QI [(const_int 0) (const_int 0)
9573 (const_int 0) (const_int 0)
9574 (const_int 0) (const_int 0)
9575 (const_int 0) (const_int 0)])))]
9576 "TARGET_AVX512F"
9577 "vpmov<trunsuffix>qb\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}"
9578 [(set_attr "type" "ssemov")
9579 (set_attr "prefix" "evex")
9580 (set_attr "mode" "TI")])
9581
f4a19f2a 9582(define_insn "avx512f_<code>v8div16qi2_mask_store"
5220cab6 9583 [(set (match_operand:V16QI 0 "memory_operand" "=m")
9584 (vec_concat:V16QI
9585 (vec_merge:V8QI
9586 (any_truncate:V8QI
9587 (match_operand:V8DI 1 "register_operand" "v"))
9588 (vec_select:V8QI
9589 (match_dup 0)
9590 (parallel [(const_int 0) (const_int 1)
9591 (const_int 2) (const_int 3)
9592 (const_int 4) (const_int 5)
9593 (const_int 6) (const_int 7)]))
a31e7f46 9594 (match_operand:QI 2 "register_operand" "Yk"))
5220cab6 9595 (vec_select:V8QI
9596 (match_dup 0)
9597 (parallel [(const_int 8) (const_int 9)
9598 (const_int 10) (const_int 11)
9599 (const_int 12) (const_int 13)
9600 (const_int 14) (const_int 15)]))))]
9601 "TARGET_AVX512F"
9602 "vpmov<trunsuffix>qb\t{%1, %0%{%2%}|%0%{%2%}, %1}"
9603 [(set_attr "type" "ssemov")
9604 (set_attr "memory" "store")
9605 (set_attr "prefix" "evex")
9606 (set_attr "mode" "TI")])
9607
5802c0cb 9608;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
9609;;
9610;; Parallel integral arithmetic
9611;;
9612;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
9613
9614(define_expand "neg<mode>2"
abd4f58b 9615 [(set (match_operand:VI_AVX2 0 "register_operand")
d2c249f5 9616 (minus:VI_AVX2
5802c0cb 9617 (match_dup 2)
abd4f58b 9618 (match_operand:VI_AVX2 1 "nonimmediate_operand")))]
5802c0cb 9619 "TARGET_SSE2"
9620 "operands[2] = force_reg (<MODE>mode, CONST0_RTX (<MODE>mode));")
9621
12803fe0 9622(define_expand "<plusminus_insn><mode>3"
abd4f58b 9623 [(set (match_operand:VI_AVX2 0 "register_operand")
c4530783 9624 (plusminus:VI_AVX2
abd4f58b 9625 (match_operand:VI_AVX2 1 "nonimmediate_operand")
9626 (match_operand:VI_AVX2 2 "nonimmediate_operand")))]
12803fe0 9627 "TARGET_SSE2"
9628 "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
9629
9630(define_expand "<plusminus_insn><mode>3_mask"
9631 [(set (match_operand:VI48_AVX512VL 0 "register_operand")
9632 (vec_merge:VI48_AVX512VL
9633 (plusminus:VI48_AVX512VL
9634 (match_operand:VI48_AVX512VL 1 "nonimmediate_operand")
9635 (match_operand:VI48_AVX512VL 2 "nonimmediate_operand"))
9636 (match_operand:VI48_AVX512VL 3 "vector_move_operand")
9637 (match_operand:<avx512fmaskmode> 4 "register_operand")))]
9638 "TARGET_AVX512F"
9639 "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
9640
9641(define_expand "<plusminus_insn><mode>3_mask"
9642 [(set (match_operand:VI12_AVX512VL 0 "register_operand")
9643 (vec_merge:VI12_AVX512VL
9644 (plusminus:VI12_AVX512VL
9645 (match_operand:VI12_AVX512VL 1 "nonimmediate_operand")
9646 (match_operand:VI12_AVX512VL 2 "nonimmediate_operand"))
9647 (match_operand:VI12_AVX512VL 3 "vector_move_operand")
9648 (match_operand:<avx512fmaskmode> 4 "register_operand")))]
9649 "TARGET_AVX512BW"
801ff5b2 9650 "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
5802c0cb 9651
12803fe0 9652(define_insn "*<plusminus_insn><mode>3"
e13e1b39 9653 [(set (match_operand:VI_AVX2 0 "register_operand" "=x,v")
c4530783 9654 (plusminus:VI_AVX2
e13e1b39 9655 (match_operand:VI_AVX2 1 "nonimmediate_operand" "<comm>0,v")
9656 (match_operand:VI_AVX2 2 "nonimmediate_operand" "xm,vm")))]
12803fe0 9657 "TARGET_SSE2
9658 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
d8f82f6b 9659 "@
63d5e521 9660 p<plusminus_mnemonic><ssemodesuffix>\t{%2, %0|%0, %2}
5220cab6 9661 vp<plusminus_mnemonic><ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
d8f82f6b 9662 [(set_attr "isa" "noavx,avx")
9663 (set_attr "type" "sseiadd")
9664 (set_attr "prefix_data16" "1,*")
5220cab6 9665 (set_attr "prefix" "<mask_prefix3>")
5deb404d 9666 (set_attr "mode" "<sseinsnmode>")])
5802c0cb 9667
12803fe0 9668(define_insn "*<plusminus_insn><mode>3_mask"
9669 [(set (match_operand:VI48_AVX512VL 0 "register_operand" "=v")
9670 (vec_merge:VI48_AVX512VL
9671 (plusminus:VI48_AVX512VL
9672 (match_operand:VI48_AVX512VL 1 "nonimmediate_operand" "<comm>v")
9673 (match_operand:VI48_AVX512VL 2 "nonimmediate_operand" "vm"))
9674 (match_operand:VI48_AVX512VL 3 "vector_move_operand" "0C")
9675 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")))]
9676 "TARGET_AVX512F
9677 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
9678 "vp<plusminus_mnemonic><ssemodesuffix>\t{%2, %1, %0%{%4%}%N3|%0%{%4%}%N3, %1, %2}"
9679 [(set_attr "type" "sseiadd")
9680 (set_attr "prefix" "evex")
9681 (set_attr "mode" "<sseinsnmode>")])
9682
9683(define_insn "*<plusminus_insn><mode>3_mask"
9684 [(set (match_operand:VI12_AVX512VL 0 "register_operand" "=v")
9685 (vec_merge:VI12_AVX512VL
9686 (plusminus:VI12_AVX512VL
9687 (match_operand:VI12_AVX512VL 1 "nonimmediate_operand" "<comm>v")
9688 (match_operand:VI12_AVX512VL 2 "nonimmediate_operand" "vm"))
9689 (match_operand:VI12_AVX512VL 3 "vector_move_operand" "0C")
9690 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")))]
9691 "TARGET_AVX512BW && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
9692 "vp<plusminus_mnemonic><ssemodesuffix>\t{%2, %1, %0%{%4%}%N3|%0%{%4%}%N3, %1, %2}"
9693 [(set_attr "type" "sseiadd")
9694 (set_attr "prefix" "evex")
9695 (set_attr "mode" "<sseinsnmode>")])
9696
293fd15f 9697(define_expand "<sse2_avx2>_<plusminus_insn><mode>3<mask_name>"
abd4f58b 9698 [(set (match_operand:VI12_AVX2 0 "register_operand")
5deb404d 9699 (sat_plusminus:VI12_AVX2
abd4f58b 9700 (match_operand:VI12_AVX2 1 "nonimmediate_operand")
9701 (match_operand:VI12_AVX2 2 "nonimmediate_operand")))]
293fd15f 9702 "TARGET_SSE2 && <mask_mode512bit_condition> && <mask_avx512bw_condition>"
7c839b3f 9703 "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
9704
293fd15f 9705(define_insn "*<sse2_avx2>_<plusminus_insn><mode>3<mask_name>"
e13e1b39 9706 [(set (match_operand:VI12_AVX2 0 "register_operand" "=x,v")
5deb404d 9707 (sat_plusminus:VI12_AVX2
e13e1b39 9708 (match_operand:VI12_AVX2 1 "nonimmediate_operand" "<comm>0,v")
9709 (match_operand:VI12_AVX2 2 "nonimmediate_operand" "xm,vm")))]
293fd15f 9710 "TARGET_SSE2 && <mask_mode512bit_condition> && <mask_avx512bw_condition>
9711 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
d8f82f6b 9712 "@
63d5e521 9713 p<plusminus_mnemonic><ssemodesuffix>\t{%2, %0|%0, %2}
293fd15f 9714 vp<plusminus_mnemonic><ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
d8f82f6b 9715 [(set_attr "isa" "noavx,avx")
9716 (set_attr "type" "sseiadd")
9717 (set_attr "prefix_data16" "1,*")
293fd15f 9718 (set_attr "prefix" "orig,maybe_evex")
5802c0cb 9719 (set_attr "mode" "TI")])
9720
05bea2df 9721(define_expand "mul<mode>3<mask_name>"
201f262d 9722 [(set (match_operand:VI1_AVX512 0 "register_operand")
9723 (mult:VI1_AVX512 (match_operand:VI1_AVX512 1 "register_operand")
9724 (match_operand:VI1_AVX512 2 "register_operand")))]
05bea2df 9725 "TARGET_SSE2 && <mask_mode512bit_condition> && <mask_avx512bw_condition>"
4b26818b 9726{
b1b4d742 9727 ix86_expand_vecop_qihi (MULT, operands[0], operands[1], operands[2]);
4b26818b 9728 DONE;
9729})
9730
2d71b728 9731(define_expand "mul<mode>3<mask_name>"
abd4f58b 9732 [(set (match_operand:VI2_AVX2 0 "register_operand")
9733 (mult:VI2_AVX2 (match_operand:VI2_AVX2 1 "nonimmediate_operand")
9734 (match_operand:VI2_AVX2 2 "nonimmediate_operand")))]
2d71b728 9735 "TARGET_SSE2 && <mask_mode512bit_condition> && <mask_avx512bw_condition>"
5deb404d 9736 "ix86_fixup_binary_operands_no_copy (MULT, <MODE>mode, operands);")
5802c0cb 9737
2d71b728 9738(define_insn "*mul<mode>3<mask_name>"
9739 [(set (match_operand:VI2_AVX2 0 "register_operand" "=x,v")
9740 (mult:VI2_AVX2 (match_operand:VI2_AVX2 1 "nonimmediate_operand" "%0,v")
9741 (match_operand:VI2_AVX2 2 "nonimmediate_operand" "xm,vm")))]
9742 "TARGET_SSE2
9743 && ix86_binary_operator_ok (MULT, <MODE>mode, operands)
9744 && <mask_mode512bit_condition> && <mask_avx512bw_condition>"
d8f82f6b 9745 "@
9746 pmullw\t{%2, %0|%0, %2}
2d71b728 9747 vpmullw\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
d8f82f6b 9748 [(set_attr "isa" "noavx,avx")
9749 (set_attr "type" "sseimul")
9750 (set_attr "prefix_data16" "1,*")
9751 (set_attr "prefix" "orig,vex")
5deb404d 9752 (set_attr "mode" "<sseinsnmode>")])
5802c0cb 9753
2d71b728 9754(define_expand "<s>mul<mode>3_highpart<mask_name>"
abd4f58b 9755 [(set (match_operand:VI2_AVX2 0 "register_operand")
5deb404d 9756 (truncate:VI2_AVX2
9757 (lshiftrt:<ssedoublemode>
9758 (mult:<ssedoublemode>
9759 (any_extend:<ssedoublemode>
abd4f58b 9760 (match_operand:VI2_AVX2 1 "nonimmediate_operand"))
5deb404d 9761 (any_extend:<ssedoublemode>
abd4f58b 9762 (match_operand:VI2_AVX2 2 "nonimmediate_operand")))
5deb404d 9763 (const_int 16))))]
2d71b728 9764 "TARGET_SSE2
9765 && <mask_mode512bit_condition> && <mask_avx512bw_condition>"
1004a2c1 9766 "ix86_fixup_binary_operands_no_copy (MULT, <MODE>mode, operands);")
c6c91d61 9767
2d71b728 9768(define_insn "*<s>mul<mode>3_highpart<mask_name>"
9769 [(set (match_operand:VI2_AVX2 0 "register_operand" "=x,v")
5deb404d 9770 (truncate:VI2_AVX2
9771 (lshiftrt:<ssedoublemode>
9772 (mult:<ssedoublemode>
9773 (any_extend:<ssedoublemode>
2d71b728 9774 (match_operand:VI2_AVX2 1 "nonimmediate_operand" "%0,v"))
5deb404d 9775 (any_extend:<ssedoublemode>
2d71b728 9776 (match_operand:VI2_AVX2 2 "nonimmediate_operand" "xm,vm")))
5802c0cb 9777 (const_int 16))))]
2d71b728 9778 "TARGET_SSE2
9779 && ix86_binary_operator_ok (MULT, <MODE>mode, operands)
9780 && <mask_mode512bit_condition> && <mask_avx512bw_condition>"
d8f82f6b 9781 "@
9782 pmulh<u>w\t{%2, %0|%0, %2}
2d71b728 9783 vpmulh<u>w\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
d8f82f6b 9784 [(set_attr "isa" "noavx,avx")
9785 (set_attr "type" "sseimul")
9786 (set_attr "prefix_data16" "1,*")
9787 (set_attr "prefix" "orig,vex")
5deb404d 9788 (set_attr "mode" "<sseinsnmode>")])
9789
5220cab6 9790(define_expand "vec_widen_umult_even_v16si<mask_name>"
697a43f8 9791 [(set (match_operand:V8DI 0 "register_operand")
9792 (mult:V8DI
9793 (zero_extend:V8DI
9794 (vec_select:V8SI
9795 (match_operand:V16SI 1 "nonimmediate_operand")
9796 (parallel [(const_int 0) (const_int 2)
9797 (const_int 4) (const_int 6)
9798 (const_int 8) (const_int 10)
9799 (const_int 12) (const_int 14)])))
9800 (zero_extend:V8DI
9801 (vec_select:V8SI
9802 (match_operand:V16SI 2 "nonimmediate_operand")
9803 (parallel [(const_int 0) (const_int 2)
9804 (const_int 4) (const_int 6)
9805 (const_int 8) (const_int 10)
9806 (const_int 12) (const_int 14)])))))]
9807 "TARGET_AVX512F"
9808 "ix86_fixup_binary_operands_no_copy (MULT, V16SImode, operands);")
9809
5220cab6 9810(define_insn "*vec_widen_umult_even_v16si<mask_name>"
697a43f8 9811 [(set (match_operand:V8DI 0 "register_operand" "=v")
9812 (mult:V8DI
9813 (zero_extend:V8DI
9814 (vec_select:V8SI
9815 (match_operand:V16SI 1 "nonimmediate_operand" "%v")
9816 (parallel [(const_int 0) (const_int 2)
9817 (const_int 4) (const_int 6)
9818 (const_int 8) (const_int 10)
9819 (const_int 12) (const_int 14)])))
9820 (zero_extend:V8DI
9821 (vec_select:V8SI
9822 (match_operand:V16SI 2 "nonimmediate_operand" "vm")
9823 (parallel [(const_int 0) (const_int 2)
9824 (const_int 4) (const_int 6)
9825 (const_int 8) (const_int 10)
9826 (const_int 12) (const_int 14)])))))]
9827 "TARGET_AVX512F && ix86_binary_operator_ok (MULT, V16SImode, operands)"
5220cab6 9828 "vpmuludq\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
697a43f8 9829 [(set_attr "isa" "avx512f")
9830 (set_attr "type" "sseimul")
9831 (set_attr "prefix_extra" "1")
9832 (set_attr "prefix" "evex")
9833 (set_attr "mode" "XI")])
9834
4c79b3a9 9835(define_expand "vec_widen_umult_even_v8si<mask_name>"
abd4f58b 9836 [(set (match_operand:V4DI 0 "register_operand")
5deb404d 9837 (mult:V4DI
9838 (zero_extend:V4DI
9839 (vec_select:V4SI
abd4f58b 9840 (match_operand:V8SI 1 "nonimmediate_operand")
5deb404d 9841 (parallel [(const_int 0) (const_int 2)
9842 (const_int 4) (const_int 6)])))
9843 (zero_extend:V4DI
9844 (vec_select:V4SI
abd4f58b 9845 (match_operand:V8SI 2 "nonimmediate_operand")
5deb404d 9846 (parallel [(const_int 0) (const_int 2)
9847 (const_int 4) (const_int 6)])))))]
4c79b3a9 9848 "TARGET_AVX2 && <mask_avx512vl_condition>"
5deb404d 9849 "ix86_fixup_binary_operands_no_copy (MULT, V8SImode, operands);")
9850
4c79b3a9 9851(define_insn "*vec_widen_umult_even_v8si<mask_name>"
9852 [(set (match_operand:V4DI 0 "register_operand" "=v")
5deb404d 9853 (mult:V4DI
9854 (zero_extend:V4DI
9855 (vec_select:V4SI
4c79b3a9 9856 (match_operand:V8SI 1 "nonimmediate_operand" "%v")
5deb404d 9857 (parallel [(const_int 0) (const_int 2)
9858 (const_int 4) (const_int 6)])))
9859 (zero_extend:V4DI
9860 (vec_select:V4SI
4c79b3a9 9861 (match_operand:V8SI 2 "nonimmediate_operand" "vm")
5deb404d 9862 (parallel [(const_int 0) (const_int 2)
9863 (const_int 4) (const_int 6)])))))]
4c79b3a9 9864 "TARGET_AVX2 && <mask_avx512vl_condition>
9865 && ix86_binary_operator_ok (MULT, V8SImode, operands)"
9866 "vpmuludq\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
5deb404d 9867 [(set_attr "type" "sseimul")
4c79b3a9 9868 (set_attr "prefix" "maybe_evex")
5deb404d 9869 (set_attr "mode" "OI")])
5802c0cb 9870
4c79b3a9 9871(define_expand "vec_widen_umult_even_v4si<mask_name>"
abd4f58b 9872 [(set (match_operand:V2DI 0 "register_operand")
7c839b3f 9873 (mult:V2DI
9874 (zero_extend:V2DI
9875 (vec_select:V2SI
abd4f58b 9876 (match_operand:V4SI 1 "nonimmediate_operand")
7c839b3f 9877 (parallel [(const_int 0) (const_int 2)])))
9878 (zero_extend:V2DI
9879 (vec_select:V2SI
abd4f58b 9880 (match_operand:V4SI 2 "nonimmediate_operand")
7c839b3f 9881 (parallel [(const_int 0) (const_int 2)])))))]
4c79b3a9 9882 "TARGET_SSE2 && <mask_avx512vl_condition>"
7c839b3f 9883 "ix86_fixup_binary_operands_no_copy (MULT, V4SImode, operands);")
9884
4c79b3a9 9885(define_insn "*vec_widen_umult_even_v4si<mask_name>"
9886 [(set (match_operand:V2DI 0 "register_operand" "=x,v")
5802c0cb 9887 (mult:V2DI
9888 (zero_extend:V2DI
9889 (vec_select:V2SI
4c79b3a9 9890 (match_operand:V4SI 1 "nonimmediate_operand" "%0,v")
5802c0cb 9891 (parallel [(const_int 0) (const_int 2)])))
9892 (zero_extend:V2DI
9893 (vec_select:V2SI
4c79b3a9 9894 (match_operand:V4SI 2 "nonimmediate_operand" "xm,vm")
5802c0cb 9895 (parallel [(const_int 0) (const_int 2)])))))]
4c79b3a9 9896 "TARGET_SSE2 && <mask_avx512vl_condition>
9897 && ix86_binary_operator_ok (MULT, V4SImode, operands)"
d8f82f6b 9898 "@
9899 pmuludq\t{%2, %0|%0, %2}
4c79b3a9 9900 vpmuludq\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
d8f82f6b 9901 [(set_attr "isa" "noavx,avx")
9902 (set_attr "type" "sseimul")
9903 (set_attr "prefix_data16" "1,*")
4c79b3a9 9904 (set_attr "prefix" "orig,maybe_evex")
5802c0cb 9905 (set_attr "mode" "TI")])
9906
5220cab6 9907(define_expand "vec_widen_smult_even_v16si<mask_name>"
697a43f8 9908 [(set (match_operand:V8DI 0 "register_operand")
9909 (mult:V8DI
9910 (sign_extend:V8DI
9911 (vec_select:V8SI
9912 (match_operand:V16SI 1 "nonimmediate_operand")
9913 (parallel [(const_int 0) (const_int 2)
9914 (const_int 4) (const_int 6)
9915 (const_int 8) (const_int 10)
9916 (const_int 12) (const_int 14)])))
9917 (sign_extend:V8DI
9918 (vec_select:V8SI
9919 (match_operand:V16SI 2 "nonimmediate_operand")
9920 (parallel [(const_int 0) (const_int 2)
9921 (const_int 4) (const_int 6)
9922 (const_int 8) (const_int 10)
9923 (const_int 12) (const_int 14)])))))]
9924 "TARGET_AVX512F"
9925 "ix86_fixup_binary_operands_no_copy (MULT, V16SImode, operands);")
9926
5220cab6 9927(define_insn "*vec_widen_smult_even_v16si<mask_name>"
697a43f8 9928 [(set (match_operand:V8DI 0 "register_operand" "=v")
9929 (mult:V8DI
9930 (sign_extend:V8DI
9931 (vec_select:V8SI
9932 (match_operand:V16SI 1 "nonimmediate_operand" "%v")
9933 (parallel [(const_int 0) (const_int 2)
9934 (const_int 4) (const_int 6)
9935 (const_int 8) (const_int 10)
9936 (const_int 12) (const_int 14)])))
9937 (sign_extend:V8DI
9938 (vec_select:V8SI
9939 (match_operand:V16SI 2 "nonimmediate_operand" "vm")
9940 (parallel [(const_int 0) (const_int 2)
9941 (const_int 4) (const_int 6)
9942 (const_int 8) (const_int 10)
9943 (const_int 12) (const_int 14)])))))]
9944 "TARGET_AVX512F && ix86_binary_operator_ok (MULT, V16SImode, operands)"
5220cab6 9945 "vpmuldq\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
697a43f8 9946 [(set_attr "isa" "avx512f")
9947 (set_attr "type" "sseimul")
9948 (set_attr "prefix_extra" "1")
9949 (set_attr "prefix" "evex")
9950 (set_attr "mode" "XI")])
9951
4c79b3a9 9952(define_expand "vec_widen_smult_even_v8si<mask_name>"
abd4f58b 9953 [(set (match_operand:V4DI 0 "register_operand")
5deb404d 9954 (mult:V4DI
9955 (sign_extend:V4DI
9956 (vec_select:V4SI
abd4f58b 9957 (match_operand:V8SI 1 "nonimmediate_operand")
5deb404d 9958 (parallel [(const_int 0) (const_int 2)
9959 (const_int 4) (const_int 6)])))
9960 (sign_extend:V4DI
9961 (vec_select:V4SI
abd4f58b 9962 (match_operand:V8SI 2 "nonimmediate_operand")
5deb404d 9963 (parallel [(const_int 0) (const_int 2)
9964 (const_int 4) (const_int 6)])))))]
4c79b3a9 9965 "TARGET_AVX2 && <mask_avx512vl_condition>"
5deb404d 9966 "ix86_fixup_binary_operands_no_copy (MULT, V8SImode, operands);")
9967
4c79b3a9 9968(define_insn "*vec_widen_smult_even_v8si<mask_name>"
9969 [(set (match_operand:V4DI 0 "register_operand" "=v")
5deb404d 9970 (mult:V4DI
9971 (sign_extend:V4DI
9972 (vec_select:V4SI
0a281fd0 9973 (match_operand:V8SI 1 "nonimmediate_operand" "%v")
5deb404d 9974 (parallel [(const_int 0) (const_int 2)
9975 (const_int 4) (const_int 6)])))
9976 (sign_extend:V4DI
9977 (vec_select:V4SI
4c79b3a9 9978 (match_operand:V8SI 2 "nonimmediate_operand" "vm")
5deb404d 9979 (parallel [(const_int 0) (const_int 2)
9980 (const_int 4) (const_int 6)])))))]
4c79b3a9 9981 "TARGET_AVX2
9982 && ix86_binary_operator_ok (MULT, V8SImode, operands)"
9983 "vpmuldq\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
66e3f7be 9984 [(set_attr "type" "sseimul")
5deb404d 9985 (set_attr "prefix_extra" "1")
9986 (set_attr "prefix" "vex")
9987 (set_attr "mode" "OI")])
9988
4c79b3a9 9989(define_expand "sse4_1_mulv2siv2di3<mask_name>"
abd4f58b 9990 [(set (match_operand:V2DI 0 "register_operand")
7c839b3f 9991 (mult:V2DI
9992 (sign_extend:V2DI
9993 (vec_select:V2SI
abd4f58b 9994 (match_operand:V4SI 1 "nonimmediate_operand")
7c839b3f 9995 (parallel [(const_int 0) (const_int 2)])))
9996 (sign_extend:V2DI
9997 (vec_select:V2SI
abd4f58b 9998 (match_operand:V4SI 2 "nonimmediate_operand")
7c839b3f 9999 (parallel [(const_int 0) (const_int 2)])))))]
4c79b3a9 10000 "TARGET_SSE4_1 && <mask_avx512vl_condition>"
7c839b3f 10001 "ix86_fixup_binary_operands_no_copy (MULT, V4SImode, operands);")
ed30e0a6 10002
4c79b3a9 10003(define_insn "*sse4_1_mulv2siv2di3<mask_name>"
0a32b282 10004 [(set (match_operand:V2DI 0 "register_operand" "=Yr,*x,v")
ed30e0a6 10005 (mult:V2DI
10006 (sign_extend:V2DI
10007 (vec_select:V2SI
0a32b282 10008 (match_operand:V4SI 1 "nonimmediate_operand" "%0,0,v")
ed30e0a6 10009 (parallel [(const_int 0) (const_int 2)])))
10010 (sign_extend:V2DI
10011 (vec_select:V2SI
0a32b282 10012 (match_operand:V4SI 2 "nonimmediate_operand" "Yrm,*xm,vm")
ed30e0a6 10013 (parallel [(const_int 0) (const_int 2)])))))]
4c79b3a9 10014 "TARGET_SSE4_1 && <mask_avx512vl_condition>
10015 && ix86_binary_operator_ok (MULT, V4SImode, operands)"
d8f82f6b 10016 "@
0a32b282 10017 pmuldq\t{%2, %0|%0, %2}
d8f82f6b 10018 pmuldq\t{%2, %0|%0, %2}
4c79b3a9 10019 vpmuldq\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
0a32b282 10020 [(set_attr "isa" "noavx,noavx,avx")
d8f82f6b 10021 (set_attr "type" "sseimul")
0a32b282 10022 (set_attr "prefix_data16" "1,1,*")
ed30e0a6 10023 (set_attr "prefix_extra" "1")
0a32b282 10024 (set_attr "prefix" "orig,orig,vex")
ed30e0a6 10025 (set_attr "mode" "TI")])
10026
2d71b728 10027(define_insn "avx512bw_pmaddwd512<mode><mask_name>"
10028 [(set (match_operand:<sseunpackmode> 0 "register_operand" "=v")
10029 (unspec:<sseunpackmode>
10030 [(match_operand:VI2_AVX2 1 "register_operand" "v")
10031 (match_operand:VI2_AVX2 2 "nonimmediate_operand" "vm")]
10032 UNSPEC_PMADDWD512))]
10033 "TARGET_AVX512BW && <mask_mode512bit_condition>"
10034 "vpmaddwd\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}";
10035 [(set_attr "type" "sseiadd")
10036 (set_attr "prefix" "evex")
10037 (set_attr "mode" "XI")])
10038
5deb404d 10039(define_expand "avx2_pmaddwd"
abd4f58b 10040 [(set (match_operand:V8SI 0 "register_operand")
5deb404d 10041 (plus:V8SI
10042 (mult:V8SI
10043 (sign_extend:V8SI
10044 (vec_select:V8HI
abd4f58b 10045 (match_operand:V16HI 1 "nonimmediate_operand")
04d95c72 10046 (parallel [(const_int 0) (const_int 2)
10047 (const_int 4) (const_int 6)
10048 (const_int 8) (const_int 10)
10049 (const_int 12) (const_int 14)])))
5deb404d 10050 (sign_extend:V8SI
10051 (vec_select:V8HI
abd4f58b 10052 (match_operand:V16HI 2 "nonimmediate_operand")
04d95c72 10053 (parallel [(const_int 0) (const_int 2)
10054 (const_int 4) (const_int 6)
10055 (const_int 8) (const_int 10)
10056 (const_int 12) (const_int 14)]))))
5deb404d 10057 (mult:V8SI
10058 (sign_extend:V8SI
10059 (vec_select:V8HI (match_dup 1)
04d95c72 10060 (parallel [(const_int 1) (const_int 3)
10061 (const_int 5) (const_int 7)
10062 (const_int 9) (const_int 11)
10063 (const_int 13) (const_int 15)])))
5deb404d 10064 (sign_extend:V8SI
10065 (vec_select:V8HI (match_dup 2)
04d95c72 10066 (parallel [(const_int 1) (const_int 3)
10067 (const_int 5) (const_int 7)
10068 (const_int 9) (const_int 11)
10069 (const_int 13) (const_int 15)]))))))]
5deb404d 10070 "TARGET_AVX2"
10071 "ix86_fixup_binary_operands_no_copy (MULT, V16HImode, operands);")
10072
5deb404d 10073(define_insn "*avx2_pmaddwd"
10074 [(set (match_operand:V8SI 0 "register_operand" "=x")
10075 (plus:V8SI
10076 (mult:V8SI
10077 (sign_extend:V8SI
10078 (vec_select:V8HI
10079 (match_operand:V16HI 1 "nonimmediate_operand" "%x")
04d95c72 10080 (parallel [(const_int 0) (const_int 2)
10081 (const_int 4) (const_int 6)
10082 (const_int 8) (const_int 10)
10083 (const_int 12) (const_int 14)])))
5deb404d 10084 (sign_extend:V8SI
10085 (vec_select:V8HI
10086 (match_operand:V16HI 2 "nonimmediate_operand" "xm")
04d95c72 10087 (parallel [(const_int 0) (const_int 2)
10088 (const_int 4) (const_int 6)
10089 (const_int 8) (const_int 10)
10090 (const_int 12) (const_int 14)]))))
5deb404d 10091 (mult:V8SI
10092 (sign_extend:V8SI
10093 (vec_select:V8HI (match_dup 1)
04d95c72 10094 (parallel [(const_int 1) (const_int 3)
10095 (const_int 5) (const_int 7)
10096 (const_int 9) (const_int 11)
10097 (const_int 13) (const_int 15)])))
5deb404d 10098 (sign_extend:V8SI
10099 (vec_select:V8HI (match_dup 2)
04d95c72 10100 (parallel [(const_int 1) (const_int 3)
10101 (const_int 5) (const_int 7)
10102 (const_int 9) (const_int 11)
10103 (const_int 13) (const_int 15)]))))))]
5deb404d 10104 "TARGET_AVX2 && ix86_binary_operator_ok (MULT, V16HImode, operands)"
10105 "vpmaddwd\t{%2, %1, %0|%0, %1, %2}"
10106 [(set_attr "type" "sseiadd")
10107 (set_attr "prefix" "vex")
10108 (set_attr "mode" "OI")])
10109
04d95c72 10110(define_expand "sse2_pmaddwd"
10111 [(set (match_operand:V4SI 0 "register_operand")
10112 (plus:V4SI
10113 (mult:V4SI
10114 (sign_extend:V4SI
10115 (vec_select:V4HI
10116 (match_operand:V8HI 1 "nonimmediate_operand")
10117 (parallel [(const_int 0) (const_int 2)
10118 (const_int 4) (const_int 6)])))
10119 (sign_extend:V4SI
10120 (vec_select:V4HI
10121 (match_operand:V8HI 2 "nonimmediate_operand")
10122 (parallel [(const_int 0) (const_int 2)
10123 (const_int 4) (const_int 6)]))))
10124 (mult:V4SI
10125 (sign_extend:V4SI
10126 (vec_select:V4HI (match_dup 1)
10127 (parallel [(const_int 1) (const_int 3)
10128 (const_int 5) (const_int 7)])))
10129 (sign_extend:V4SI
10130 (vec_select:V4HI (match_dup 2)
10131 (parallel [(const_int 1) (const_int 3)
10132 (const_int 5) (const_int 7)]))))))]
10133 "TARGET_SSE2"
10134 "ix86_fixup_binary_operands_no_copy (MULT, V8HImode, operands);")
10135
7c839b3f 10136(define_insn "*sse2_pmaddwd"
d8f82f6b 10137 [(set (match_operand:V4SI 0 "register_operand" "=x,x")
5802c0cb 10138 (plus:V4SI
10139 (mult:V4SI
10140 (sign_extend:V4SI
10141 (vec_select:V4HI
d8f82f6b 10142 (match_operand:V8HI 1 "nonimmediate_operand" "%0,x")
04d95c72 10143 (parallel [(const_int 0) (const_int 2)
10144 (const_int 4) (const_int 6)])))
5802c0cb 10145 (sign_extend:V4SI
10146 (vec_select:V4HI
d8f82f6b 10147 (match_operand:V8HI 2 "nonimmediate_operand" "xm,xm")
04d95c72 10148 (parallel [(const_int 0) (const_int 2)
10149 (const_int 4) (const_int 6)]))))
5802c0cb 10150 (mult:V4SI
10151 (sign_extend:V4SI
10152 (vec_select:V4HI (match_dup 1)
04d95c72 10153 (parallel [(const_int 1) (const_int 3)
10154 (const_int 5) (const_int 7)])))
5802c0cb 10155 (sign_extend:V4SI
10156 (vec_select:V4HI (match_dup 2)
04d95c72 10157 (parallel [(const_int 1) (const_int 3)
10158 (const_int 5) (const_int 7)]))))))]
70169283 10159 "TARGET_SSE2 && ix86_binary_operator_ok (MULT, V8HImode, operands)"
d8f82f6b 10160 "@
10161 pmaddwd\t{%2, %0|%0, %2}
10162 vpmaddwd\t{%2, %1, %0|%0, %1, %2}"
10163 [(set_attr "isa" "noavx,avx")
10164 (set_attr "type" "sseiadd")
fbfe006e 10165 (set_attr "atom_unit" "simul")
d8f82f6b 10166 (set_attr "prefix_data16" "1,*")
10167 (set_attr "prefix" "orig,vex")
5802c0cb 10168 (set_attr "mode" "TI")])
10169
4c79b3a9 10170(define_insn "avx512dq_mul<mode>3<mask_name>"
10171 [(set (match_operand:VI8 0 "register_operand" "=v")
10172 (mult:VI8
10173 (match_operand:VI8 1 "register_operand" "v")
10174 (match_operand:VI8 2 "nonimmediate_operand" "vm")))]
10175 "TARGET_AVX512DQ && <mask_mode512bit_condition>"
10176 "vpmullq\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
10177 [(set_attr "type" "sseimul")
10178 (set_attr "prefix" "evex")
10179 (set_attr "mode" "<sseinsnmode>")])
10180
5220cab6 10181(define_expand "mul<mode>3<mask_name>"
c6cff444 10182 [(set (match_operand:VI4_AVX512F 0 "register_operand")
10183 (mult:VI4_AVX512F
10184 (match_operand:VI4_AVX512F 1 "general_vector_operand")
10185 (match_operand:VI4_AVX512F 2 "general_vector_operand")))]
5220cab6 10186 "TARGET_SSE2 && <mask_mode512bit_condition>"
c195473e 10187{
3737d3e4 10188 if (TARGET_SSE4_1)
087cf0d3 10189 {
ebdfd365 10190 if (!nonimmediate_operand (operands[1], <MODE>mode))
10191 operands[1] = force_reg (<MODE>mode, operands[1]);
10192 if (!nonimmediate_operand (operands[2], <MODE>mode))
10193 operands[2] = force_reg (<MODE>mode, operands[2]);
087cf0d3 10194 ix86_fixup_binary_operands_no_copy (MULT, <MODE>mode, operands);
10195 }
10196 else
10197 {
10198 ix86_expand_sse2_mulv4si3 (operands[0], operands[1], operands[2]);
10199 DONE;
10200 }
c195473e 10201})
10202
5220cab6 10203(define_insn "*<sse4_1_avx2>_mul<mode>3<mask_name>"
0a32b282 10204 [(set (match_operand:VI4_AVX512F 0 "register_operand" "=Yr,*x,v")
c6cff444 10205 (mult:VI4_AVX512F
0a32b282 10206 (match_operand:VI4_AVX512F 1 "nonimmediate_operand" "%0,0,v")
10207 (match_operand:VI4_AVX512F 2 "nonimmediate_operand" "Yrm,*xm,vm")))]
5220cab6 10208 "TARGET_SSE4_1 && ix86_binary_operator_ok (MULT, <MODE>mode, operands) && <mask_mode512bit_condition>"
d8f82f6b 10209 "@
0a32b282 10210 pmulld\t{%2, %0|%0, %2}
d8f82f6b 10211 pmulld\t{%2, %0|%0, %2}
5220cab6 10212 vpmulld\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
0a32b282 10213 [(set_attr "isa" "noavx,noavx,avx")
d8f82f6b 10214 (set_attr "type" "sseimul")
2d771892 10215 (set_attr "prefix_extra" "1")
0a32b282 10216 (set_attr "prefix" "<mask_prefix4>")
10217 (set_attr "btver2_decode" "vector,vector,vector")
5deb404d 10218 (set_attr "mode" "<sseinsnmode>")])
2d771892 10219
04c4f045 10220(define_expand "mul<mode>3"
c6cff444 10221 [(set (match_operand:VI8_AVX2_AVX512F 0 "register_operand")
10222 (mult:VI8_AVX2_AVX512F
10223 (match_operand:VI8_AVX2_AVX512F 1 "register_operand")
10224 (match_operand:VI8_AVX2_AVX512F 2 "register_operand")))]
04c4f045 10225 "TARGET_SSE2"
4b26818b 10226{
04c4f045 10227 ix86_expand_sse2_mulvxdi3 (operands[0], operands[1], operands[2]);
4b26818b 10228 DONE;
10229})
10230
16e84e52 10231(define_expand "vec_widen_<s>mult_hi_<mode>"
abd4f58b 10232 [(match_operand:<sseunpackmode> 0 "register_operand")
16e84e52 10233 (any_extend:<sseunpackmode>
3737d3e4 10234 (match_operand:VI124_AVX2 1 "register_operand"))
10235 (match_operand:VI124_AVX2 2 "register_operand")]
5cd92c37 10236 "TARGET_SSE2"
1f428eb0 10237{
3737d3e4 10238 ix86_expand_mul_widen_hilo (operands[0], operands[1], operands[2],
10239 <u_bool>, true);
1f428eb0 10240 DONE;
10241})
10242
16e84e52 10243(define_expand "vec_widen_<s>mult_lo_<mode>"
abd4f58b 10244 [(match_operand:<sseunpackmode> 0 "register_operand")
16e84e52 10245 (any_extend:<sseunpackmode>
3737d3e4 10246 (match_operand:VI124_AVX2 1 "register_operand"))
10247 (match_operand:VI124_AVX2 2 "register_operand")]
5cd92c37 10248 "TARGET_SSE2"
c6c91d61 10249{
3737d3e4 10250 ix86_expand_mul_widen_hilo (operands[0], operands[1], operands[2],
10251 <u_bool>, false);
c6c91d61 10252 DONE;
10253})
10254
5cd92c37 10255;; Most widen_<s>mult_even_<mode> can be handled directly from other
10256;; named patterns, but signed V4SI needs special help for plain SSE2.
10257(define_expand "vec_widen_smult_even_v4si"
10258 [(match_operand:V2DI 0 "register_operand")
ebdfd365 10259 (match_operand:V4SI 1 "nonimmediate_operand")
10260 (match_operand:V4SI 2 "nonimmediate_operand")]
5cd92c37 10261 "TARGET_SSE2"
10262{
10263 ix86_expand_mul_widen_evenodd (operands[0], operands[1], operands[2],
10264 false, false);
10265 DONE;
10266})
10267
de9b8545 10268(define_expand "vec_widen_<s>mult_odd_<mode>"
10269 [(match_operand:<sseunpackmode> 0 "register_operand")
10270 (any_extend:<sseunpackmode>
c6cff444 10271 (match_operand:VI4_AVX512F 1 "general_vector_operand"))
10272 (match_operand:VI4_AVX512F 2 "general_vector_operand")]
5cd92c37 10273 "TARGET_SSE2"
de9b8545 10274{
10275 ix86_expand_mul_widen_evenodd (operands[0], operands[1], operands[2],
10276 <u_bool>, true);
10277 DONE;
10278})
10279
2d71b728 10280(define_mode_attr SDOT_PMADD_SUF
10281 [(V32HI "512v32hi") (V16HI "") (V8HI "")])
10282
16e84e52 10283(define_expand "sdot_prod<mode>"
abd4f58b 10284 [(match_operand:<sseunpackmode> 0 "register_operand")
10285 (match_operand:VI2_AVX2 1 "register_operand")
10286 (match_operand:VI2_AVX2 2 "register_operand")
10287 (match_operand:<sseunpackmode> 3 "register_operand")]
4a61a337 10288 "TARGET_SSE2"
10289{
16e84e52 10290 rtx t = gen_reg_rtx (<sseunpackmode>mode);
2d71b728 10291 emit_insn (gen_<sse2_avx2>_pmaddwd<SDOT_PMADD_SUF> (t, operands[1], operands[2]));
d1f9b275 10292 emit_insn (gen_rtx_SET (operands[0],
16e84e52 10293 gen_rtx_PLUS (<sseunpackmode>mode,
10294 operands[3], t)));
4a61a337 10295 DONE;
10296})
10297
48bb0beb 10298;; Normally we use widen_mul_even/odd, but combine can't quite get it all
10299;; back together when madd is available.
10300(define_expand "sdot_prodv4si"
abd4f58b 10301 [(match_operand:V2DI 0 "register_operand")
48bb0beb 10302 (match_operand:V4SI 1 "register_operand")
abd4f58b 10303 (match_operand:V4SI 2 "register_operand")
10304 (match_operand:V2DI 3 "register_operand")]
48bb0beb 10305 "TARGET_XOP"
16e84e52 10306{
48bb0beb 10307 rtx t = gen_reg_rtx (V2DImode);
10308 emit_insn (gen_xop_pmacsdqh (t, operands[1], operands[2], operands[3]));
10309 emit_insn (gen_xop_pmacsdql (operands[0], operands[1], operands[2], t));
16e84e52 10310 DONE;
10311})
10312
a2287001 10313(define_expand "usadv16qi"
10314 [(match_operand:V4SI 0 "register_operand")
10315 (match_operand:V16QI 1 "register_operand")
10316 (match_operand:V16QI 2 "nonimmediate_operand")
10317 (match_operand:V4SI 3 "nonimmediate_operand")]
10318 "TARGET_SSE2"
10319{
10320 rtx t1 = gen_reg_rtx (V2DImode);
10321 rtx t2 = gen_reg_rtx (V4SImode);
10322 emit_insn (gen_sse2_psadbw (t1, operands[1], operands[2]));
10323 convert_move (t2, t1, 0);
10324 emit_insn (gen_addv4si3 (operands[0], t2, operands[3]));
10325 DONE;
10326})
10327
10328(define_expand "usadv32qi"
10329 [(match_operand:V8SI 0 "register_operand")
10330 (match_operand:V32QI 1 "register_operand")
10331 (match_operand:V32QI 2 "nonimmediate_operand")
10332 (match_operand:V8SI 3 "nonimmediate_operand")]
10333 "TARGET_AVX2"
10334{
10335 rtx t1 = gen_reg_rtx (V4DImode);
10336 rtx t2 = gen_reg_rtx (V8SImode);
10337 emit_insn (gen_avx2_psadbw (t1, operands[1], operands[2]));
10338 convert_move (t2, t1, 0);
10339 emit_insn (gen_addv8si3 (operands[0], t2, operands[3]));
10340 DONE;
10341})
10342
5802c0cb 10343(define_insn "ashr<mode>3"
5deb404d 10344 [(set (match_operand:VI24_AVX2 0 "register_operand" "=x,x")
10345 (ashiftrt:VI24_AVX2
10346 (match_operand:VI24_AVX2 1 "register_operand" "0,x")
d8f82f6b 10347 (match_operand:SI 2 "nonmemory_operand" "xN,xN")))]
5802c0cb 10348 "TARGET_SSE2"
d8f82f6b 10349 "@
63d5e521 10350 psra<ssemodesuffix>\t{%2, %0|%0, %2}
10351 vpsra<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
d8f82f6b 10352 [(set_attr "isa" "noavx,avx")
10353 (set_attr "type" "sseishft")
00a0e418 10354 (set (attr "length_immediate")
abd4f58b 10355 (if_then_else (match_operand 2 "const_int_operand")
00a0e418 10356 (const_string "1")
10357 (const_string "0")))
d8f82f6b 10358 (set_attr "prefix_data16" "1,*")
10359 (set_attr "prefix" "orig,vex")
5deb404d 10360 (set_attr "mode" "<sseinsnmode>")])
10361
4f545baf 10362(define_insn "<mask_codefor>ashr<mode>3<mask_name>"
10363 [(set (match_operand:VI24_AVX512BW_1 0 "register_operand" "=v,v")
10364 (ashiftrt:VI24_AVX512BW_1
10365 (match_operand:VI24_AVX512BW_1 1 "nonimmediate_operand" "v,vm")
10366 (match_operand:SI 2 "nonmemory_operand" "v,N")))]
10367 "TARGET_AVX512VL"
10368 "vpsra<ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
10369 [(set_attr "type" "sseishft")
10370 (set (attr "length_immediate")
10371 (if_then_else (match_operand 2 "const_int_operand")
10372 (const_string "1")
10373 (const_string "0")))
10374 (set_attr "mode" "<sseinsnmode>")])
10375
10376(define_insn "<mask_codefor>ashrv2di3<mask_name>"
10377 [(set (match_operand:V2DI 0 "register_operand" "=v,v")
10378 (ashiftrt:V2DI
10379 (match_operand:V2DI 1 "nonimmediate_operand" "v,vm")
10380 (match_operand:DI 2 "nonmemory_operand" "v,N")))]
10381 "TARGET_AVX512VL"
10382 "vpsraq\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
10383 [(set_attr "type" "sseishft")
10384 (set (attr "length_immediate")
10385 (if_then_else (match_operand 2 "const_int_operand")
10386 (const_string "1")
10387 (const_string "0")))
10388 (set_attr "mode" "TI")])
10389
5220cab6 10390(define_insn "ashr<mode>3<mask_name>"
4f545baf 10391 [(set (match_operand:VI248_AVX512BW_AVX512VL 0 "register_operand" "=v,v")
10392 (ashiftrt:VI248_AVX512BW_AVX512VL
10393 (match_operand:VI248_AVX512BW_AVX512VL 1 "nonimmediate_operand" "v,vm")
5220cab6 10394 (match_operand:SI 2 "nonmemory_operand" "v,N")))]
4f545baf 10395 "TARGET_AVX512F"
5220cab6 10396 "vpsra<ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
10397 [(set_attr "type" "sseishft")
10398 (set (attr "length_immediate")
10399 (if_then_else (match_operand 2 "const_int_operand")
10400 (const_string "1")
10401 (const_string "0")))
10402 (set_attr "mode" "<sseinsnmode>")])
10403
4055e076 10404(define_insn "<shift_insn><mode>3<mask_name>"
10405 [(set (match_operand:VI2_AVX2_AVX512BW 0 "register_operand" "=x,v")
10406 (any_lshift:VI2_AVX2_AVX512BW
10407 (match_operand:VI2_AVX2_AVX512BW 1 "register_operand" "0,v")
10408 (match_operand:SI 2 "nonmemory_operand" "xN,vN")))]
10409 "TARGET_SSE2 && <mask_mode512bit_condition> && <mask_avx512bw_condition>"
10410 "@
10411 p<vshift><ssemodesuffix>\t{%2, %0|%0, %2}
10412 vp<vshift><ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
10413 [(set_attr "isa" "noavx,avx")
10414 (set_attr "type" "sseishft")
10415 (set (attr "length_immediate")
10416 (if_then_else (match_operand 2 "const_int_operand")
10417 (const_string "1")
10418 (const_string "0")))
10419 (set_attr "prefix_data16" "1,*")
10420 (set_attr "prefix" "orig,vex")
10421 (set_attr "mode" "<sseinsnmode>")])
10422
10423(define_insn "<shift_insn><mode>3<mask_name>"
10424 [(set (match_operand:VI48_AVX2 0 "register_operand" "=x,v")
10425 (any_lshift:VI48_AVX2
10426 (match_operand:VI48_AVX2 1 "register_operand" "0,v")
10427 (match_operand:SI 2 "nonmemory_operand" "xN,vN")))]
10428 "TARGET_SSE2 && <mask_mode512bit_condition>"
d8f82f6b 10429 "@
3297e0a4 10430 p<vshift><ssemodesuffix>\t{%2, %0|%0, %2}
4055e076 10431 vp<vshift><ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
d8f82f6b 10432 [(set_attr "isa" "noavx,avx")
10433 (set_attr "type" "sseishft")
00a0e418 10434 (set (attr "length_immediate")
abd4f58b 10435 (if_then_else (match_operand 2 "const_int_operand")
00a0e418 10436 (const_string "1")
10437 (const_string "0")))
d8f82f6b 10438 (set_attr "prefix_data16" "1,*")
10439 (set_attr "prefix" "orig,vex")
335adffd 10440 (set_attr "mode" "<sseinsnmode>")])
5802c0cb 10441
5220cab6 10442(define_insn "<shift_insn><mode>3<mask_name>"
d2ff59d6 10443 [(set (match_operand:VI48_512 0 "register_operand" "=v,v")
10444 (any_lshift:VI48_512
23afdab7 10445 (match_operand:VI48_512 1 "nonimmediate_operand" "v,m")
d2ff59d6 10446 (match_operand:SI 2 "nonmemory_operand" "vN,N")))]
5220cab6 10447 "TARGET_AVX512F && <mask_mode512bit_condition>"
10448 "vp<vshift><ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
d2ff59d6 10449 [(set_attr "isa" "avx512f")
10450 (set_attr "type" "sseishft")
10451 (set (attr "length_immediate")
10452 (if_then_else (match_operand 2 "const_int_operand")
10453 (const_string "1")
10454 (const_string "0")))
10455 (set_attr "prefix" "evex")
10456 (set_attr "mode" "<sseinsnmode>")])
10457
5220cab6 10458
41b1d2cc 10459(define_expand "vec_shl_<mode>"
09e640e6 10460 [(set (match_dup 3)
5deb404d 10461 (ashift:V1TI
abd4f58b 10462 (match_operand:VI_128 1 "register_operand")
09e640e6 10463 (match_operand:SI 2 "const_0_to_255_mul_8_operand")))
10464 (set (match_operand:VI_128 0 "register_operand") (match_dup 4))]
41b1d2cc 10465 "TARGET_SSE2"
10466{
c98fd3f6 10467 operands[1] = gen_lowpart (V1TImode, operands[1]);
09e640e6 10468 operands[3] = gen_reg_rtx (V1TImode);
10469 operands[4] = gen_lowpart (<MODE>mode, operands[3]);
41b1d2cc 10470})
10471
5deb404d 10472(define_insn "<sse2_avx2>_ashl<mode>3"
fd6b07be 10473 [(set (match_operand:VIMAX_AVX2 0 "register_operand" "=x,v")
5deb404d 10474 (ashift:VIMAX_AVX2
fd6b07be 10475 (match_operand:VIMAX_AVX2 1 "register_operand" "0,v")
d8f82f6b 10476 (match_operand:SI 2 "const_0_to_255_mul_8_operand" "n,n")))]
10477 "TARGET_SSE2"
10478{
10479 operands[2] = GEN_INT (INTVAL (operands[2]) / 8);
10480
10481 switch (which_alternative)
10482 {
10483 case 0:
10484 return "pslldq\t{%2, %0|%0, %2}";
10485 case 1:
10486 return "vpslldq\t{%2, %1, %0|%0, %1, %2}";
10487 default:
10488 gcc_unreachable ();
10489 }
10490}
10491 [(set_attr "isa" "noavx,avx")
10492 (set_attr "type" "sseishft")
10493 (set_attr "length_immediate" "1")
10494 (set_attr "prefix_data16" "1,*")
10495 (set_attr "prefix" "orig,vex")
5deb404d 10496 (set_attr "mode" "<sseinsnmode>")])
d8f82f6b 10497
41b1d2cc 10498(define_expand "vec_shr_<mode>"
09e640e6 10499 [(set (match_dup 3)
5deb404d 10500 (lshiftrt:V1TI
abd4f58b 10501 (match_operand:VI_128 1 "register_operand")
09e640e6 10502 (match_operand:SI 2 "const_0_to_255_mul_8_operand")))
10503 (set (match_operand:VI_128 0 "register_operand") (match_dup 4))]
41b1d2cc 10504 "TARGET_SSE2"
10505{
c98fd3f6 10506 operands[1] = gen_lowpart (V1TImode, operands[1]);
09e640e6 10507 operands[3] = gen_reg_rtx (V1TImode);
10508 operands[4] = gen_lowpart (<MODE>mode, operands[3]);
41b1d2cc 10509})
10510
bb7ad312 10511(define_insn "<sse2_avx2>_lshr<mode>3"
fd6b07be 10512 [(set (match_operand:VIMAX_AVX2 0 "register_operand" "=x,v")
bb7ad312 10513 (lshiftrt:VIMAX_AVX2
fd6b07be 10514 (match_operand:VIMAX_AVX2 1 "register_operand" "0,v")
d8f82f6b 10515 (match_operand:SI 2 "const_0_to_255_mul_8_operand" "n,n")))]
5802c0cb 10516 "TARGET_SSE2"
d8f82f6b 10517{
10518 operands[2] = GEN_INT (INTVAL (operands[2]) / 8);
5802c0cb 10519
d8f82f6b 10520 switch (which_alternative)
10521 {
10522 case 0:
10523 return "psrldq\t{%2, %0|%0, %2}";
10524 case 1:
10525 return "vpsrldq\t{%2, %1, %0|%0, %1, %2}";
10526 default:
10527 gcc_unreachable ();
10528 }
10529}
10530 [(set_attr "isa" "noavx,avx")
10531 (set_attr "type" "sseishft")
10532 (set_attr "length_immediate" "1")
10533 (set_attr "atom_unit" "sishuf")
10534 (set_attr "prefix_data16" "1,*")
10535 (set_attr "prefix" "orig,vex")
bb7ad312 10536 (set_attr "mode" "<sseinsnmode>")])
10537
3d038641 10538(define_insn "<avx512>_<rotate>v<mode><mask_name>"
10539 [(set (match_operand:VI48_AVX512VL 0 "register_operand" "=v")
10540 (any_rotate:VI48_AVX512VL
10541 (match_operand:VI48_AVX512VL 1 "register_operand" "v")
10542 (match_operand:VI48_AVX512VL 2 "nonimmediate_operand" "vm")))]
d2ff59d6 10543 "TARGET_AVX512F"
5220cab6 10544 "vp<rotate>v<ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
d2ff59d6 10545 [(set_attr "prefix" "evex")
10546 (set_attr "mode" "<sseinsnmode>")])
10547
3d038641 10548(define_insn "<avx512>_<rotate><mode><mask_name>"
10549 [(set (match_operand:VI48_AVX512VL 0 "register_operand" "=v")
10550 (any_rotate:VI48_AVX512VL
10551 (match_operand:VI48_AVX512VL 1 "nonimmediate_operand" "vm")
d2ff59d6 10552 (match_operand:SI 2 "const_0_to_255_operand")))]
10553 "TARGET_AVX512F"
5220cab6 10554 "vp<rotate><ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
d2ff59d6 10555 [(set_attr "prefix" "evex")
10556 (set_attr "mode" "<sseinsnmode>")])
2af8cce6 10557
5dd4f649 10558(define_expand "<code><mode>3"
10559 [(set (match_operand:VI124_256_AVX512F_AVX512BW 0 "register_operand")
10560 (maxmin:VI124_256_AVX512F_AVX512BW
10561 (match_operand:VI124_256_AVX512F_AVX512BW 1 "nonimmediate_operand")
10562 (match_operand:VI124_256_AVX512F_AVX512BW 2 "nonimmediate_operand")))]
10563 "TARGET_AVX2"
bb7ad312 10564 "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
10565
5dd4f649 10566(define_insn "*avx2_<code><mode>3"
10567 [(set (match_operand:VI124_256 0 "register_operand" "=v")
10568 (maxmin:VI124_256
10569 (match_operand:VI124_256 1 "nonimmediate_operand" "%v")
10570 (match_operand:VI124_256 2 "nonimmediate_operand" "vm")))]
10571 "TARGET_AVX2 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
10572 "vp<maxmin_int><ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
bb7ad312 10573 [(set_attr "type" "sseiadd")
10574 (set_attr "prefix_extra" "1")
5dd4f649 10575 (set_attr "prefix" "vex")
bb7ad312 10576 (set_attr "mode" "OI")])
5802c0cb 10577
5dd4f649 10578(define_expand "<code><mode>3_mask"
10579 [(set (match_operand:VI48_AVX512VL 0 "register_operand")
10580 (vec_merge:VI48_AVX512VL
10581 (maxmin:VI48_AVX512VL
10582 (match_operand:VI48_AVX512VL 1 "nonimmediate_operand")
10583 (match_operand:VI48_AVX512VL 2 "nonimmediate_operand"))
10584 (match_operand:VI48_AVX512VL 3 "vector_move_operand")
10585 (match_operand:<avx512fmaskmode> 4 "register_operand")))]
10586 "TARGET_AVX512F"
10587 "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
10588
10589(define_insn "*avx512bw_<code><mode>3<mask_name>"
10590 [(set (match_operand:VI48_AVX512VL 0 "register_operand" "=v")
10591 (maxmin:VI48_AVX512VL
10592 (match_operand:VI48_AVX512VL 1 "nonimmediate_operand" "%v")
10593 (match_operand:VI48_AVX512VL 2 "nonimmediate_operand" "vm")))]
10594 "TARGET_AVX512F && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
10595 "vp<maxmin_int><ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
10596 [(set_attr "type" "sseiadd")
10597 (set_attr "prefix_extra" "1")
10598 (set_attr "prefix" "maybe_evex")
10599 (set_attr "mode" "<sseinsnmode>")])
10600
b02673c1 10601(define_insn "<mask_codefor><code><mode>3<mask_name>"
5dd4f649 10602 [(set (match_operand:VI12_AVX512VL 0 "register_operand" "=v")
10603 (maxmin:VI12_AVX512VL
10604 (match_operand:VI12_AVX512VL 1 "register_operand" "v")
10605 (match_operand:VI12_AVX512VL 2 "nonimmediate_operand" "vm")))]
10606 "TARGET_AVX512BW"
b02673c1 10607 "vp<maxmin_int><ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
10608 [(set_attr "type" "sseiadd")
10609 (set_attr "prefix" "evex")
10610 (set_attr "mode" "<sseinsnmode>")])
10611
230eb963 10612(define_expand "<code><mode>3"
5dd4f649 10613 [(set (match_operand:VI8_AVX2_AVX512BW 0 "register_operand")
10614 (maxmin:VI8_AVX2_AVX512BW
10615 (match_operand:VI8_AVX2_AVX512BW 1 "register_operand")
10616 (match_operand:VI8_AVX2_AVX512BW 2 "register_operand")))]
2af8cce6 10617 "TARGET_SSE4_2"
10618{
5dd4f649 10619 if (TARGET_AVX512F
10620 && (<MODE>mode == V8DImode || TARGET_AVX512VL))
10621 ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);
10622 else
10623 {
10624 enum rtx_code code;
10625 rtx xops[6];
10626 bool ok;
5deb404d 10627
2af8cce6 10628
5dd4f649 10629 xops[0] = operands[0];
2af8cce6 10630
5dd4f649 10631 if (<CODE> == SMAX || <CODE> == UMAX)
10632 {
10633 xops[1] = operands[1];
10634 xops[2] = operands[2];
10635 }
10636 else
10637 {
10638 xops[1] = operands[2];
10639 xops[2] = operands[1];
10640 }
2af8cce6 10641
5dd4f649 10642 code = (<CODE> == UMAX || <CODE> == UMIN) ? GTU : GT;
2af8cce6 10643
5dd4f649 10644 xops[3] = gen_rtx_fmt_ee (code, VOIDmode, operands[1], operands[2]);
10645 xops[4] = operands[1];
10646 xops[5] = operands[2];
10647
10648 ok = ix86_expand_int_vcond (xops);
10649 gcc_assert (ok);
10650 DONE;
10651 }
2af8cce6 10652})
10653
10654(define_expand "<code><mode>3"
abd4f58b 10655 [(set (match_operand:VI124_128 0 "register_operand")
885c8b76 10656 (smaxmin:VI124_128
abd4f58b 10657 (match_operand:VI124_128 1 "nonimmediate_operand")
10658 (match_operand:VI124_128 2 "nonimmediate_operand")))]
2af8cce6 10659 "TARGET_SSE2"
10660{
10661 if (TARGET_SSE4_1 || <MODE>mode == V8HImode)
10662 ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);
10663 else
10664 {
10665 rtx xops[6];
10666 bool ok;
10667
10668 xops[0] = operands[0];
23d77b6d 10669 operands[1] = force_reg (<MODE>mode, operands[1]);
10670 operands[2] = force_reg (<MODE>mode, operands[2]);
2af8cce6 10671
10672 if (<CODE> == SMAX)
10673 {
10674 xops[1] = operands[1];
10675 xops[2] = operands[2];
10676 }
10677 else
10678 {
10679 xops[1] = operands[2];
10680 xops[2] = operands[1];
10681 }
10682
10683 xops[3] = gen_rtx_GT (VOIDmode, operands[1], operands[2]);
10684 xops[4] = operands[1];
10685 xops[5] = operands[2];
10686
10687 ok = ix86_expand_int_vcond (xops);
10688 gcc_assert (ok);
10689 DONE;
10690 }
10691})
5deb404d 10692
0bdab484 10693(define_insn "*sse4_1_<code><mode>3<mask_name>"
0a32b282 10694 [(set (match_operand:VI14_128 0 "register_operand" "=Yr,*x,v")
d8f82f6b 10695 (smaxmin:VI14_128
0a32b282 10696 (match_operand:VI14_128 1 "nonimmediate_operand" "%0,0,v")
10697 (match_operand:VI14_128 2 "nonimmediate_operand" "Yrm,*xm,vm")))]
0bdab484 10698 "TARGET_SSE4_1
10699 && <mask_mode512bit_condition>
10700 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
d8f82f6b 10701 "@
0a32b282 10702 p<maxmin_int><ssemodesuffix>\t{%2, %0|%0, %2}
63d5e521 10703 p<maxmin_int><ssemodesuffix>\t{%2, %0|%0, %2}
0bdab484 10704 vp<maxmin_int><ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
0a32b282 10705 [(set_attr "isa" "noavx,noavx,avx")
d8f82f6b 10706 (set_attr "type" "sseiadd")
0a32b282 10707 (set_attr "prefix_extra" "1,1,*")
10708 (set_attr "prefix" "orig,orig,vex")
1f46c1d5 10709 (set_attr "mode" "TI")])
10710
9409fce7 10711(define_insn "*<code>v8hi3"
d8f82f6b 10712 [(set (match_operand:V8HI 0 "register_operand" "=x,x")
9409fce7 10713 (smaxmin:V8HI
d8f82f6b 10714 (match_operand:V8HI 1 "nonimmediate_operand" "%0,x")
10715 (match_operand:V8HI 2 "nonimmediate_operand" "xm,xm")))]
9409fce7 10716 "TARGET_SSE2 && ix86_binary_operator_ok (<CODE>, V8HImode, operands)"
d8f82f6b 10717 "@
10718 p<maxmin_int>w\t{%2, %0|%0, %2}
10719 vp<maxmin_int>w\t{%2, %1, %0|%0, %1, %2}"
10720 [(set_attr "isa" "noavx,avx")
10721 (set_attr "type" "sseiadd")
10722 (set_attr "prefix_data16" "1,*")
10723 (set_attr "prefix_extra" "*,1")
10724 (set_attr "prefix" "orig,vex")
5802c0cb 10725 (set_attr "mode" "TI")])
10726
230eb963 10727(define_expand "<code><mode>3"
abd4f58b 10728 [(set (match_operand:VI124_128 0 "register_operand")
885c8b76 10729 (umaxmin:VI124_128
abd4f58b 10730 (match_operand:VI124_128 1 "nonimmediate_operand")
10731 (match_operand:VI124_128 2 "nonimmediate_operand")))]
e313c83f 10732 "TARGET_SSE2"
10733{
2af8cce6 10734 if (TARGET_SSE4_1 || <MODE>mode == V16QImode)
230eb963 10735 ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);
2af8cce6 10736 else if (<CODE> == UMAX && <MODE>mode == V8HImode)
10737 {
10738 rtx op0 = operands[0], op2 = operands[2], op3 = op0;
23d77b6d 10739 operands[1] = force_reg (<MODE>mode, operands[1]);
2af8cce6 10740 if (rtx_equal_p (op3, op2))
10741 op3 = gen_reg_rtx (V8HImode);
10742 emit_insn (gen_sse2_ussubv8hi3 (op3, operands[1], op2));
10743 emit_insn (gen_addv8hi3 (op0, op3, op2));
10744 DONE;
10745 }
2d771892 10746 else
5bd1ff1d 10747 {
10748 rtx xops[6];
10749 bool ok;
10750
23d77b6d 10751 operands[1] = force_reg (<MODE>mode, operands[1]);
10752 operands[2] = force_reg (<MODE>mode, operands[2]);
10753
5bd1ff1d 10754 xops[0] = operands[0];
2af8cce6 10755
10756 if (<CODE> == UMAX)
10757 {
10758 xops[1] = operands[1];
10759 xops[2] = operands[2];
10760 }
10761 else
10762 {
10763 xops[1] = operands[2];
10764 xops[2] = operands[1];
10765 }
10766
10767 xops[3] = gen_rtx_GTU (VOIDmode, operands[1], operands[2]);
5bd1ff1d 10768 xops[4] = operands[1];
10769 xops[5] = operands[2];
2af8cce6 10770
5bd1ff1d 10771 ok = ix86_expand_int_vcond (xops);
10772 gcc_assert (ok);
10773 DONE;
10774 }
41b1d2cc 10775})
10776
0bdab484 10777(define_insn "*sse4_1_<code><mode>3<mask_name>"
0a32b282 10778 [(set (match_operand:VI24_128 0 "register_operand" "=Yr,*x,v")
d8f82f6b 10779 (umaxmin:VI24_128
0a32b282 10780 (match_operand:VI24_128 1 "nonimmediate_operand" "%0,0,v")
10781 (match_operand:VI24_128 2 "nonimmediate_operand" "Yrm,*xm,vm")))]
0bdab484 10782 "TARGET_SSE4_1
10783 && <mask_mode512bit_condition>
10784 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
d8f82f6b 10785 "@
0a32b282 10786 p<maxmin_int><ssemodesuffix>\t{%2, %0|%0, %2}
63d5e521 10787 p<maxmin_int><ssemodesuffix>\t{%2, %0|%0, %2}
0bdab484 10788 vp<maxmin_int><ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
0a32b282 10789 [(set_attr "isa" "noavx,noavx,avx")
d8f82f6b 10790 (set_attr "type" "sseiadd")
0a32b282 10791 (set_attr "prefix_extra" "1,1,*")
10792 (set_attr "prefix" "orig,orig,vex")
d8f82f6b 10793 (set_attr "mode" "TI")])
10794
10795(define_insn "*<code>v16qi3"
10796 [(set (match_operand:V16QI 0 "register_operand" "=x,x")
10797 (umaxmin:V16QI
10798 (match_operand:V16QI 1 "nonimmediate_operand" "%0,x")
10799 (match_operand:V16QI 2 "nonimmediate_operand" "xm,xm")))]
10800 "TARGET_SSE2 && ix86_binary_operator_ok (<CODE>, V16QImode, operands)"
10801 "@
10802 p<maxmin_int>b\t{%2, %0|%0, %2}
10803 vp<maxmin_int>b\t{%2, %1, %0|%0, %1, %2}"
10804 [(set_attr "isa" "noavx,avx")
10805 (set_attr "type" "sseiadd")
10806 (set_attr "prefix_data16" "1,*")
10807 (set_attr "prefix_extra" "*,1")
10808 (set_attr "prefix" "orig,vex")
10809 (set_attr "mode" "TI")])
10810
5802c0cb 10811;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
10812;;
10813;; Parallel integral comparisons
10814;;
10815;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
10816
5deb404d 10817(define_expand "avx2_eq<mode>3"
abd4f58b 10818 [(set (match_operand:VI_256 0 "register_operand")
18594fc6 10819 (eq:VI_256
abd4f58b 10820 (match_operand:VI_256 1 "nonimmediate_operand")
10821 (match_operand:VI_256 2 "nonimmediate_operand")))]
5deb404d 10822 "TARGET_AVX2"
10823 "ix86_fixup_binary_operands_no_copy (EQ, <MODE>mode, operands);")
10824
10825(define_insn "*avx2_eq<mode>3"
18594fc6 10826 [(set (match_operand:VI_256 0 "register_operand" "=x")
10827 (eq:VI_256
10828 (match_operand:VI_256 1 "nonimmediate_operand" "%x")
10829 (match_operand:VI_256 2 "nonimmediate_operand" "xm")))]
5deb404d 10830 "TARGET_AVX2 && ix86_binary_operator_ok (EQ, <MODE>mode, operands)"
10831 "vpcmpeq<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
10832 [(set_attr "type" "ssecmp")
10833 (set_attr "prefix_extra" "1")
10834 (set_attr "prefix" "vex")
10835 (set_attr "mode" "OI")])
10836
6b76cef2 10837(define_expand "<avx512>_eq<mode>3<mask_scalar_merge_name>"
10838 [(set (match_operand:<avx512fmaskmode> 0 "register_operand")
10839 (unspec:<avx512fmaskmode>
10840 [(match_operand:VI12_AVX512VL 1 "register_operand")
10841 (match_operand:VI12_AVX512VL 2 "nonimmediate_operand")]
10842 UNSPEC_MASKED_EQ))]
10843 "TARGET_AVX512BW"
10844 "ix86_fixup_binary_operands_no_copy (EQ, <MODE>mode, operands);")
10845
10846(define_expand "<avx512>_eq<mode>3<mask_scalar_merge_name>"
d2ff59d6 10847 [(set (match_operand:<avx512fmaskmode> 0 "register_operand")
10848 (unspec:<avx512fmaskmode>
6b76cef2 10849 [(match_operand:VI48_AVX512VL 1 "register_operand")
10850 (match_operand:VI48_AVX512VL 2 "nonimmediate_operand")]
d2ff59d6 10851 UNSPEC_MASKED_EQ))]
10852 "TARGET_AVX512F"
10853 "ix86_fixup_binary_operands_no_copy (EQ, <MODE>mode, operands);")
10854
6b76cef2 10855(define_insn "<avx512>_eq<mode>3<mask_scalar_merge_name>_1"
a31e7f46 10856 [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=Yk")
d2ff59d6 10857 (unspec:<avx512fmaskmode>
6b76cef2 10858 [(match_operand:VI12_AVX512VL 1 "register_operand" "%v")
10859 (match_operand:VI12_AVX512VL 2 "nonimmediate_operand" "vm")]
10860 UNSPEC_MASKED_EQ))]
10861 "TARGET_AVX512F && ix86_binary_operator_ok (EQ, <MODE>mode, operands)"
10862 "vpcmpeq<ssemodesuffix>\t{%2, %1, %0<mask_scalar_merge_operand3>|%0<mask_scalar_merge_operand3>, %1, %2}"
10863 [(set_attr "type" "ssecmp")
10864 (set_attr "prefix_extra" "1")
10865 (set_attr "prefix" "evex")
10866 (set_attr "mode" "<sseinsnmode>")])
10867
10868(define_insn "<avx512>_eq<mode>3<mask_scalar_merge_name>_1"
10869 [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=Yk")
10870 (unspec:<avx512fmaskmode>
10871 [(match_operand:VI48_AVX512VL 1 "register_operand" "%v")
10872 (match_operand:VI48_AVX512VL 2 "nonimmediate_operand" "vm")]
d2ff59d6 10873 UNSPEC_MASKED_EQ))]
10874 "TARGET_AVX512F && ix86_binary_operator_ok (EQ, <MODE>mode, operands)"
c3d9b089 10875 "vpcmpeq<ssemodesuffix>\t{%2, %1, %0<mask_scalar_merge_operand3>|%0<mask_scalar_merge_operand3>, %1, %2}"
d2ff59d6 10876 [(set_attr "type" "ssecmp")
10877 (set_attr "prefix_extra" "1")
10878 (set_attr "prefix" "evex")
10879 (set_attr "mode" "<sseinsnmode>")])
10880
d8f82f6b 10881(define_insn "*sse4_1_eqv2di3"
0a32b282 10882 [(set (match_operand:V2DI 0 "register_operand" "=Yr,*x,x")
d8f82f6b 10883 (eq:V2DI
0a32b282 10884 (match_operand:V2DI 1 "nonimmediate_operand" "%0,0,x")
10885 (match_operand:V2DI 2 "nonimmediate_operand" "Yrm,*xm,xm")))]
d8f82f6b 10886 "TARGET_SSE4_1 && ix86_binary_operator_ok (EQ, V2DImode, operands)"
10887 "@
0a32b282 10888 pcmpeqq\t{%2, %0|%0, %2}
d8f82f6b 10889 pcmpeqq\t{%2, %0|%0, %2}
10890 vpcmpeqq\t{%2, %1, %0|%0, %1, %2}"
0a32b282 10891 [(set_attr "isa" "noavx,noavx,avx")
d8f82f6b 10892 (set_attr "type" "ssecmp")
10893 (set_attr "prefix_extra" "1")
0a32b282 10894 (set_attr "prefix" "orig,orig,vex")
ed30e0a6 10895 (set_attr "mode" "TI")])
10896
7c839b3f 10897(define_insn "*sse2_eq<mode>3"
d8f82f6b 10898 [(set (match_operand:VI124_128 0 "register_operand" "=x,x")
10899 (eq:VI124_128
10900 (match_operand:VI124_128 1 "nonimmediate_operand" "%0,x")
10901 (match_operand:VI124_128 2 "nonimmediate_operand" "xm,xm")))]
18525343 10902 "TARGET_SSE2 && !TARGET_XOP
448e99f5 10903 && ix86_binary_operator_ok (EQ, <MODE>mode, operands)"
d8f82f6b 10904 "@
63d5e521 10905 pcmpeq<ssemodesuffix>\t{%2, %0|%0, %2}
10906 vpcmpeq<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
d8f82f6b 10907 [(set_attr "isa" "noavx,avx")
10908 (set_attr "type" "ssecmp")
10909 (set_attr "prefix_data16" "1,*")
10910 (set_attr "prefix" "orig,vex")
5802c0cb 10911 (set_attr "mode" "TI")])
10912
d8f82f6b 10913(define_expand "sse2_eq<mode>3"
abd4f58b 10914 [(set (match_operand:VI124_128 0 "register_operand")
d8f82f6b 10915 (eq:VI124_128
abd4f58b 10916 (match_operand:VI124_128 1 "nonimmediate_operand")
10917 (match_operand:VI124_128 2 "nonimmediate_operand")))]
d8f82f6b 10918 "TARGET_SSE2 && !TARGET_XOP "
10919 "ix86_fixup_binary_operands_no_copy (EQ, <MODE>mode, operands);")
10920
7c839b3f 10921(define_expand "sse4_1_eqv2di3"
abd4f58b 10922 [(set (match_operand:V2DI 0 "register_operand")
7c839b3f 10923 (eq:V2DI
abd4f58b 10924 (match_operand:V2DI 1 "nonimmediate_operand")
10925 (match_operand:V2DI 2 "nonimmediate_operand")))]
7c839b3f 10926 "TARGET_SSE4_1"
10927 "ix86_fixup_binary_operands_no_copy (EQ, V2DImode, operands);")
10928
d8f82f6b 10929(define_insn "sse4_2_gtv2di3"
0a32b282 10930 [(set (match_operand:V2DI 0 "register_operand" "=Yr,*x,x")
d8f82f6b 10931 (gt:V2DI
0a32b282 10932 (match_operand:V2DI 1 "register_operand" "0,0,x")
10933 (match_operand:V2DI 2 "nonimmediate_operand" "Yrm,*xm,xm")))]
d8f82f6b 10934 "TARGET_SSE4_2"
10935 "@
0a32b282 10936 pcmpgtq\t{%2, %0|%0, %2}
d8f82f6b 10937 pcmpgtq\t{%2, %0|%0, %2}
10938 vpcmpgtq\t{%2, %1, %0|%0, %1, %2}"
0a32b282 10939 [(set_attr "isa" "noavx,noavx,avx")
d8f82f6b 10940 (set_attr "type" "ssecmp")
2d771892 10941 (set_attr "prefix_extra" "1")
0a32b282 10942 (set_attr "prefix" "orig,orig,vex")
ed30e0a6 10943 (set_attr "mode" "TI")])
10944
5deb404d 10945(define_insn "avx2_gt<mode>3"
18594fc6 10946 [(set (match_operand:VI_256 0 "register_operand" "=x")
10947 (gt:VI_256
10948 (match_operand:VI_256 1 "register_operand" "x")
10949 (match_operand:VI_256 2 "nonimmediate_operand" "xm")))]
5deb404d 10950 "TARGET_AVX2"
10951 "vpcmpgt<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
10952 [(set_attr "type" "ssecmp")
10953 (set_attr "prefix_extra" "1")
10954 (set_attr "prefix" "vex")
10955 (set_attr "mode" "OI")])
10956
6b76cef2 10957(define_insn "<avx512>_gt<mode>3<mask_scalar_merge_name>"
a31e7f46 10958 [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=Yk")
d2ff59d6 10959 (unspec:<avx512fmaskmode>
6b76cef2 10960 [(match_operand:VI48_AVX512VL 1 "register_operand" "v")
10961 (match_operand:VI48_AVX512VL 2 "nonimmediate_operand" "vm")] UNSPEC_MASKED_GT))]
d2ff59d6 10962 "TARGET_AVX512F"
c3d9b089 10963 "vpcmpgt<ssemodesuffix>\t{%2, %1, %0<mask_scalar_merge_operand3>|%0<mask_scalar_merge_operand3>, %1, %2}"
d2ff59d6 10964 [(set_attr "type" "ssecmp")
10965 (set_attr "prefix_extra" "1")
10966 (set_attr "prefix" "evex")
10967 (set_attr "mode" "<sseinsnmode>")])
10968
6b76cef2 10969(define_insn "<avx512>_gt<mode>3<mask_scalar_merge_name>"
10970 [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=Yk")
10971 (unspec:<avx512fmaskmode>
10972 [(match_operand:VI12_AVX512VL 1 "register_operand" "v")
10973 (match_operand:VI12_AVX512VL 2 "nonimmediate_operand" "vm")] UNSPEC_MASKED_GT))]
10974 "TARGET_AVX512BW"
10975 "vpcmpgt<ssemodesuffix>\t{%2, %1, %0<mask_scalar_merge_operand3>|%0<mask_scalar_merge_operand3>, %1, %2}"
10976 [(set_attr "type" "ssecmp")
10977 (set_attr "prefix_extra" "1")
10978 (set_attr "prefix" "evex")
10979 (set_attr "mode" "<sseinsnmode>")])
10980
5802c0cb 10981(define_insn "sse2_gt<mode>3"
d8f82f6b 10982 [(set (match_operand:VI124_128 0 "register_operand" "=x,x")
10983 (gt:VI124_128
10984 (match_operand:VI124_128 1 "register_operand" "0,x")
10985 (match_operand:VI124_128 2 "nonimmediate_operand" "xm,xm")))]
18525343 10986 "TARGET_SSE2 && !TARGET_XOP"
d8f82f6b 10987 "@
63d5e521 10988 pcmpgt<ssemodesuffix>\t{%2, %0|%0, %2}
10989 vpcmpgt<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
d8f82f6b 10990 [(set_attr "isa" "noavx,avx")
10991 (set_attr "type" "ssecmp")
10992 (set_attr "prefix_data16" "1,*")
10993 (set_attr "prefix" "orig,vex")
f25d51c3 10994 (set_attr "mode" "TI")])
10995
f23a3158 10996(define_expand "vcond<V_512:mode><VI_512:mode>"
10997 [(set (match_operand:V_512 0 "register_operand")
10998 (if_then_else:V_512
10999 (match_operator 3 ""
11000 [(match_operand:VI_512 4 "nonimmediate_operand")
11001 (match_operand:VI_512 5 "general_operand")])
11002 (match_operand:V_512 1)
11003 (match_operand:V_512 2)))]
11004 "TARGET_AVX512F
11005 && (GET_MODE_NUNITS (<V_512:MODE>mode)
11006 == GET_MODE_NUNITS (<VI_512:MODE>mode))"
11007{
11008 bool ok = ix86_expand_int_vcond (operands);
11009 gcc_assert (ok);
11010 DONE;
11011})
11012
230eb963 11013(define_expand "vcond<V_256:mode><VI_256:mode>"
abd4f58b 11014 [(set (match_operand:V_256 0 "register_operand")
230eb963 11015 (if_then_else:V_256
11016 (match_operator 3 ""
abd4f58b 11017 [(match_operand:VI_256 4 "nonimmediate_operand")
11018 (match_operand:VI_256 5 "general_operand")])
11019 (match_operand:V_256 1)
11020 (match_operand:V_256 2)))]
230eb963 11021 "TARGET_AVX2
11022 && (GET_MODE_NUNITS (<V_256:MODE>mode)
11023 == GET_MODE_NUNITS (<VI_256:MODE>mode))"
11024{
11025 bool ok = ix86_expand_int_vcond (operands);
11026 gcc_assert (ok);
11027 DONE;
11028})
11029
d6b19f6b 11030(define_expand "vcond<V_128:mode><VI124_128:mode>"
abd4f58b 11031 [(set (match_operand:V_128 0 "register_operand")
d6b19f6b 11032 (if_then_else:V_128
5deb404d 11033 (match_operator 3 ""
abd4f58b 11034 [(match_operand:VI124_128 4 "nonimmediate_operand")
11035 (match_operand:VI124_128 5 "general_operand")])
11036 (match_operand:V_128 1)
11037 (match_operand:V_128 2)))]
d6b19f6b 11038 "TARGET_SSE2
11039 && (GET_MODE_NUNITS (<V_128:MODE>mode)
11040 == GET_MODE_NUNITS (<VI124_128:MODE>mode))"
76405cce 11041{
17e313b0 11042 bool ok = ix86_expand_int_vcond (operands);
11043 gcc_assert (ok);
11044 DONE;
76405cce 11045})
11046
d6b19f6b 11047(define_expand "vcond<VI8F_128:mode>v2di"
abd4f58b 11048 [(set (match_operand:VI8F_128 0 "register_operand")
d6b19f6b 11049 (if_then_else:VI8F_128
5deb404d 11050 (match_operator 3 ""
abd4f58b 11051 [(match_operand:V2DI 4 "nonimmediate_operand")
11052 (match_operand:V2DI 5 "general_operand")])
11053 (match_operand:VI8F_128 1)
11054 (match_operand:VI8F_128 2)))]
d8f82f6b 11055 "TARGET_SSE4_2"
11056{
11057 bool ok = ix86_expand_int_vcond (operands);
11058 gcc_assert (ok);
11059 DONE;
11060})
11061
f23a3158 11062(define_expand "vcondu<V_512:mode><VI_512:mode>"
11063 [(set (match_operand:V_512 0 "register_operand")
11064 (if_then_else:V_512
11065 (match_operator 3 ""
11066 [(match_operand:VI_512 4 "nonimmediate_operand")
11067 (match_operand:VI_512 5 "nonimmediate_operand")])
11068 (match_operand:V_512 1 "general_operand")
11069 (match_operand:V_512 2 "general_operand")))]
11070 "TARGET_AVX512F
11071 && (GET_MODE_NUNITS (<V_512:MODE>mode)
11072 == GET_MODE_NUNITS (<VI_512:MODE>mode))"
11073{
11074 bool ok = ix86_expand_int_vcond (operands);
11075 gcc_assert (ok);
11076 DONE;
11077})
11078
230eb963 11079(define_expand "vcondu<V_256:mode><VI_256:mode>"
abd4f58b 11080 [(set (match_operand:V_256 0 "register_operand")
230eb963 11081 (if_then_else:V_256
11082 (match_operator 3 ""
abd4f58b 11083 [(match_operand:VI_256 4 "nonimmediate_operand")
11084 (match_operand:VI_256 5 "nonimmediate_operand")])
11085 (match_operand:V_256 1 "general_operand")
11086 (match_operand:V_256 2 "general_operand")))]
230eb963 11087 "TARGET_AVX2
11088 && (GET_MODE_NUNITS (<V_256:MODE>mode)
11089 == GET_MODE_NUNITS (<VI_256:MODE>mode))"
11090{
11091 bool ok = ix86_expand_int_vcond (operands);
11092 gcc_assert (ok);
11093 DONE;
11094})
11095
d6b19f6b 11096(define_expand "vcondu<V_128:mode><VI124_128:mode>"
abd4f58b 11097 [(set (match_operand:V_128 0 "register_operand")
d6b19f6b 11098 (if_then_else:V_128
5deb404d 11099 (match_operator 3 ""
abd4f58b 11100 [(match_operand:VI124_128 4 "nonimmediate_operand")
11101 (match_operand:VI124_128 5 "nonimmediate_operand")])
11102 (match_operand:V_128 1 "general_operand")
11103 (match_operand:V_128 2 "general_operand")))]
d6b19f6b 11104 "TARGET_SSE2
11105 && (GET_MODE_NUNITS (<V_128:MODE>mode)
11106 == GET_MODE_NUNITS (<VI124_128:MODE>mode))"
76405cce 11107{
17e313b0 11108 bool ok = ix86_expand_int_vcond (operands);
11109 gcc_assert (ok);
11110 DONE;
76405cce 11111})
11112
d6b19f6b 11113(define_expand "vcondu<VI8F_128:mode>v2di"
abd4f58b 11114 [(set (match_operand:VI8F_128 0 "register_operand")
d6b19f6b 11115 (if_then_else:VI8F_128
5deb404d 11116 (match_operator 3 ""
abd4f58b 11117 [(match_operand:V2DI 4 "nonimmediate_operand")
11118 (match_operand:V2DI 5 "nonimmediate_operand")])
11119 (match_operand:VI8F_128 1 "general_operand")
11120 (match_operand:VI8F_128 2 "general_operand")))]
d8f82f6b 11121 "TARGET_SSE4_2"
11122{
11123 bool ok = ix86_expand_int_vcond (operands);
11124 gcc_assert (ok);
11125 DONE;
11126})
11127
f4803722 11128(define_mode_iterator VEC_PERM_AVX2
12cbfa26 11129 [V16QI V8HI V4SI V2DI V4SF V2DF
a9e4de7b 11130 (V32QI "TARGET_AVX2") (V16HI "TARGET_AVX2")
12cbfa26 11131 (V8SI "TARGET_AVX2") (V4DI "TARGET_AVX2")
697a43f8 11132 (V8SF "TARGET_AVX2") (V4DF "TARGET_AVX2")
11133 (V16SF "TARGET_AVX512F") (V8DF "TARGET_AVX512F")
201f262d 11134 (V16SI "TARGET_AVX512F") (V8DI "TARGET_AVX512F")
447dd191 11135 (V32HI "TARGET_AVX512BW") (V64QI "TARGET_AVX512VBMI")])
12cbfa26 11136
f4803722 11137(define_expand "vec_perm<mode>"
abd4f58b 11138 [(match_operand:VEC_PERM_AVX2 0 "register_operand")
11139 (match_operand:VEC_PERM_AVX2 1 "register_operand")
11140 (match_operand:VEC_PERM_AVX2 2 "register_operand")
11141 (match_operand:<sseintvecmode> 3 "register_operand")]
12cbfa26 11142 "TARGET_SSSE3 || TARGET_AVX || TARGET_XOP"
6cf89e04 11143{
f4803722 11144 ix86_expand_vec_perm (operands);
6cf89e04 11145 DONE;
11146})
11147
6ae3cabe 11148(define_mode_iterator VEC_PERM_CONST
11149 [(V4SF "TARGET_SSE") (V4SI "TARGET_SSE")
11150 (V2DF "TARGET_SSE") (V2DI "TARGET_SSE")
11151 (V16QI "TARGET_SSE2") (V8HI "TARGET_SSE2")
11152 (V8SF "TARGET_AVX") (V4DF "TARGET_AVX")
11153 (V8SI "TARGET_AVX") (V4DI "TARGET_AVX")
697a43f8 11154 (V32QI "TARGET_AVX2") (V16HI "TARGET_AVX2")
11155 (V16SI "TARGET_AVX512F") (V8DI "TARGET_AVX512F")
201f262d 11156 (V16SF "TARGET_AVX512F") (V8DF "TARGET_AVX512F")
271c02e8 11157 (V32HI "TARGET_AVX512BW") (V64QI "TARGET_AVX512BW")])
6ae3cabe 11158
11159(define_expand "vec_perm_const<mode>"
abd4f58b 11160 [(match_operand:VEC_PERM_CONST 0 "register_operand")
11161 (match_operand:VEC_PERM_CONST 1 "register_operand")
11162 (match_operand:VEC_PERM_CONST 2 "register_operand")
11163 (match_operand:<sseintvecmode> 3)]
6ae3cabe 11164 ""
11165{
11166 if (ix86_expand_vec_perm_const (operands))
11167 DONE;
11168 else
11169 FAIL;
11170})
11171
5802c0cb 11172;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
11173;;
349d9d0e 11174;; Parallel bitwise logical operations
5802c0cb 11175;;
11176;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
11177
11178(define_expand "one_cmpl<mode>2"
abd4f58b 11179 [(set (match_operand:VI 0 "register_operand")
11180 (xor:VI (match_operand:VI 1 "nonimmediate_operand")
ba2558f8 11181 (match_dup 2)))]
11182 "TARGET_SSE"
5802c0cb 11183{
11184 int i, n = GET_MODE_NUNITS (<MODE>mode);
11185 rtvec v = rtvec_alloc (n);
11186
11187 for (i = 0; i < n; ++i)
11188 RTVEC_ELT (v, i) = constm1_rtx;
11189
11190 operands[2] = force_reg (<MODE>mode, gen_rtx_CONST_VECTOR (<MODE>mode, v));
11191})
11192
12803fe0 11193(define_expand "<sse2_avx2>_andnot<mode>3"
abd4f58b 11194 [(set (match_operand:VI_AVX2 0 "register_operand")
c4530783 11195 (and:VI_AVX2
abd4f58b 11196 (not:VI_AVX2 (match_operand:VI_AVX2 1 "register_operand"))
11197 (match_operand:VI_AVX2 2 "nonimmediate_operand")))]
12803fe0 11198 "TARGET_SSE2")
3a950715 11199
12803fe0 11200(define_expand "<sse2_avx2>_andnot<mode>3_mask"
11201 [(set (match_operand:VI48_AVX512VL 0 "register_operand")
11202 (vec_merge:VI48_AVX512VL
11203 (and:VI48_AVX512VL
11204 (not:VI48_AVX512VL
11205 (match_operand:VI48_AVX512VL 1 "register_operand"))
11206 (match_operand:VI48_AVX512VL 2 "nonimmediate_operand"))
11207 (match_operand:VI48_AVX512VL 3 "vector_move_operand")
11208 (match_operand:<avx512fmaskmode> 4 "register_operand")))]
11209 "TARGET_AVX512F")
11210
11211(define_expand "<sse2_avx2>_andnot<mode>3_mask"
11212 [(set (match_operand:VI12_AVX512VL 0 "register_operand")
11213 (vec_merge:VI12_AVX512VL
11214 (and:VI12_AVX512VL
11215 (not:VI12_AVX512VL
11216 (match_operand:VI12_AVX512VL 1 "register_operand"))
11217 (match_operand:VI12_AVX512VL 2 "nonimmediate_operand"))
11218 (match_operand:VI12_AVX512VL 3 "vector_move_operand")
11219 (match_operand:<avx512fmaskmode> 4 "register_operand")))]
11220 "TARGET_AVX512BW")
11221
11222(define_insn "*andnot<mode>3"
e13e1b39 11223 [(set (match_operand:VI 0 "register_operand" "=x,v")
ba2558f8 11224 (and:VI
e13e1b39 11225 (not:VI (match_operand:VI 1 "register_operand" "0,v"))
11226 (match_operand:VI 2 "nonimmediate_operand" "xm,vm")))]
12803fe0 11227 "TARGET_SSE"
ba2558f8 11228{
03ae25dc 11229 static char buf[64];
ba2558f8 11230 const char *ops;
c4530783 11231 const char *tmp;
11232
11233 switch (get_attr_mode (insn))
11234 {
03ae25dc 11235 case MODE_XI:
11236 gcc_assert (TARGET_AVX512F);
c4530783 11237 case MODE_OI:
0607f34b 11238 gcc_assert (TARGET_AVX2 || TARGET_AVX512VL);
c4530783 11239 case MODE_TI:
0607f34b 11240 gcc_assert (TARGET_SSE2 || TARGET_AVX512VL);
11241 switch (<MODE>mode)
11242 {
11243 case V16SImode:
11244 case V8DImode:
11245 if (TARGET_AVX512F)
11246 {
11247 tmp = "pandn<ssemodesuffix>";
11248 break;
11249 }
11250 case V8SImode:
11251 case V4DImode:
11252 case V4SImode:
11253 case V2DImode:
11254 if (TARGET_AVX512VL)
11255 {
11256 tmp = "pandn<ssemodesuffix>";
11257 break;
11258 }
11259 default:
11260 tmp = TARGET_AVX512VL ? "pandnq" : "pandn";
11261 }
c4530783 11262 break;
11263
f5d830da 11264 case MODE_V16SF:
11265 gcc_assert (TARGET_AVX512F);
c4530783 11266 case MODE_V8SF:
11267 gcc_assert (TARGET_AVX);
11268 case MODE_V4SF:
11269 gcc_assert (TARGET_SSE);
11270
11271 tmp = "andnps";
11272 break;
11273
11274 default:
11275 gcc_unreachable ();
11276 }
ed30e0a6 11277
ba2558f8 11278 switch (which_alternative)
11279 {
11280 case 0:
11281 ops = "%s\t{%%2, %%0|%%0, %%2}";
11282 break;
11283 case 1:
5220cab6 11284 ops = "v%s\t{%%2, %%1, %%0<mask_operand3_1>|%%0<mask_operand3_1>, %%1, %%2}";
ba2558f8 11285 break;
11286 default:
11287 gcc_unreachable ();
11288 }
5802c0cb 11289
ba2558f8 11290 snprintf (buf, sizeof (buf), ops, tmp);
11291 return buf;
11292}
11293 [(set_attr "isa" "noavx,avx")
11294 (set_attr "type" "sselog")
11295 (set (attr "prefix_data16")
11296 (if_then_else
11297 (and (eq_attr "alternative" "0")
11298 (eq_attr "mode" "TI"))
11299 (const_string "1")
11300 (const_string "*")))
12803fe0 11301 (set_attr "prefix" "orig,vex")
ba2558f8 11302 (set (attr "mode")
7d460314 11303 (cond [(and (match_test "<MODE_SIZE> == 16")
11304 (match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL"))
b1756286 11305 (const_string "<ssePSmode>")
11306 (match_test "TARGET_AVX2")
11307 (const_string "<sseinsnmode>")
11308 (match_test "TARGET_AVX")
11309 (if_then_else
ca94bc0d 11310 (match_test "<MODE_SIZE> > 16")
b1756286 11311 (const_string "V8SF")
11312 (const_string "<sseinsnmode>"))
11313 (ior (not (match_test "TARGET_SSE2"))
11314 (match_test "optimize_function_for_size_p (cfun)"))
11315 (const_string "V4SF")
11316 ]
11317 (const_string "<sseinsnmode>")))])
349d9d0e 11318
12803fe0 11319(define_insn "*andnot<mode>3_mask"
11320 [(set (match_operand:VI48_AVX512VL 0 "register_operand" "=v")
11321 (vec_merge:VI48_AVX512VL
11322 (and:VI48_AVX512VL
11323 (not:VI48_AVX512VL
11324 (match_operand:VI48_AVX512VL 1 "register_operand" "v"))
11325 (match_operand:VI48_AVX512VL 2 "nonimmediate_operand" "vm"))
11326 (match_operand:VI48_AVX512VL 3 "vector_move_operand" "0C")
11327 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")))]
11328 "TARGET_AVX512F"
11329 "vpandn<ssemodesuffix>\t{%2, %1, %0%{%4%}%N3|%0%{%4%}%N3, %1, %2}";
11330 [(set_attr "type" "sselog")
11331 (set_attr "prefix" "evex")
11332 (set_attr "mode" "<sseinsnmode>")])
11333
11334(define_insn "*andnot<mode>3_mask"
11335 [(set (match_operand:VI12_AVX512VL 0 "register_operand" "=v")
11336 (vec_merge:VI12_AVX512VL
11337 (and:VI12_AVX512VL
11338 (not:VI12_AVX512VL
11339 (match_operand:VI12_AVX512VL 1 "register_operand" "v"))
11340 (match_operand:VI12_AVX512VL 2 "nonimmediate_operand" "vm"))
11341 (match_operand:VI12_AVX512VL 3 "vector_move_operand" "0C")
11342 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")))]
11343 "TARGET_AVX512BW"
11344 "vpandn<ssemodesuffix>\t{%2, %1, %0%{%4%}%N3|%0%{%4%}%N3, %1, %2}";
11345 [(set_attr "type" "sselog")
11346 (set_attr "prefix" "evex")
11347 (set_attr "mode" "<sseinsnmode>")])
11348
b6bc2701 11349(define_expand "<code><mode>3"
abd4f58b 11350 [(set (match_operand:VI 0 "register_operand")
ba2558f8 11351 (any_logic:VI
3a623316 11352 (match_operand:VI 1 "nonimmediate_or_const_vector_operand")
11353 (match_operand:VI 2 "nonimmediate_or_const_vector_operand")))]
3a950715 11354 "TARGET_SSE"
3a623316 11355{
11356 ix86_expand_vector_logical_operator (<CODE>, <MODE>mode, operands);
11357 DONE;
11358})
5802c0cb 11359
5220cab6 11360(define_insn "<mask_codefor><code><mode>3<mask_name>"
e9b578bf 11361 [(set (match_operand:VI48_AVX_AVX512F 0 "register_operand" "=x,v")
11362 (any_logic:VI48_AVX_AVX512F
11363 (match_operand:VI48_AVX_AVX512F 1 "nonimmediate_operand" "%0,v")
11364 (match_operand:VI48_AVX_AVX512F 2 "nonimmediate_operand" "xm,vm")))]
5220cab6 11365 "TARGET_SSE && <mask_mode512bit_condition>
b6bc2701 11366 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
ba2558f8 11367{
03ae25dc 11368 static char buf[64];
ba2558f8 11369 const char *ops;
c4530783 11370 const char *tmp;
11371
11372 switch (get_attr_mode (insn))
11373 {
03ae25dc 11374 case MODE_XI:
11375 gcc_assert (TARGET_AVX512F);
c4530783 11376 case MODE_OI:
0607f34b 11377 gcc_assert (TARGET_AVX2 || TARGET_AVX512VL);
c4530783 11378 case MODE_TI:
0607f34b 11379 gcc_assert (TARGET_SSE2 || TARGET_AVX512VL);
11380 switch (<MODE>mode)
11381 {
11382 case V16SImode:
11383 case V8DImode:
11384 if (TARGET_AVX512F)
11385 {
11386 tmp = "p<logic><ssemodesuffix>";
11387 break;
11388 }
11389 case V8SImode:
11390 case V4DImode:
11391 case V4SImode:
11392 case V2DImode:
e9b578bf 11393 tmp = TARGET_AVX512VL ? "p<logic><ssemodesuffix>" : "p<logic>";
11394 break;
11395 default:
11396 gcc_unreachable ();
11397 }
11398 break;
11399
11400 case MODE_V8SF:
11401 gcc_assert (TARGET_AVX);
11402 case MODE_V4SF:
11403 gcc_assert (TARGET_SSE);
11404 gcc_assert (!<mask_applied>);
11405 tmp = "<logic>ps";
11406 break;
11407
11408 default:
11409 gcc_unreachable ();
11410 }
11411
11412 switch (which_alternative)
11413 {
11414 case 0:
11415 if (<mask_applied>)
11416 ops = "v%s\t{%%2, %%0, %%0<mask_operand3_1>|%%0<mask_operand3_1>, %%0, %%2}";
11417 else
11418 ops = "%s\t{%%2, %%0|%%0, %%2}";
11419 break;
11420 case 1:
11421 ops = "v%s\t{%%2, %%1, %%0<mask_operand3_1>|%%0<mask_operand3_1>, %%1, %%2}";
11422 break;
11423 default:
11424 gcc_unreachable ();
11425 }
11426
11427 snprintf (buf, sizeof (buf), ops, tmp);
11428 return buf;
11429}
11430 [(set_attr "isa" "noavx,avx")
11431 (set_attr "type" "sselog")
11432 (set (attr "prefix_data16")
11433 (if_then_else
11434 (and (eq_attr "alternative" "0")
11435 (eq_attr "mode" "TI"))
11436 (const_string "1")
11437 (const_string "*")))
11438 (set_attr "prefix" "<mask_prefix3>")
11439 (set (attr "mode")
11440 (cond [(and (match_test "<MODE_SIZE> == 16")
11441 (match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL"))
11442 (const_string "<ssePSmode>")
11443 (match_test "TARGET_AVX2")
11444 (const_string "<sseinsnmode>")
11445 (match_test "TARGET_AVX")
11446 (if_then_else
11447 (match_test "<MODE_SIZE> > 16")
11448 (const_string "V8SF")
11449 (const_string "<sseinsnmode>"))
11450 (ior (not (match_test "TARGET_SSE2"))
11451 (match_test "optimize_function_for_size_p (cfun)"))
11452 (const_string "V4SF")
11453 ]
11454 (const_string "<sseinsnmode>")))])
11455
11456(define_insn "*<code><mode>3"
11457 [(set (match_operand:VI12_AVX_AVX512F 0 "register_operand" "=x,v")
11458 (any_logic: VI12_AVX_AVX512F
11459 (match_operand:VI12_AVX_AVX512F 1 "nonimmediate_operand" "%0,v")
11460 (match_operand:VI12_AVX_AVX512F 2 "nonimmediate_operand" "xm,vm")))]
11461 "TARGET_SSE && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
11462{
11463 static char buf[64];
11464 const char *ops;
11465 const char *tmp;
11466 const char *ssesuffix;
11467
11468 switch (get_attr_mode (insn))
11469 {
11470 case MODE_XI:
11471 gcc_assert (TARGET_AVX512F);
11472 case MODE_OI:
11473 gcc_assert (TARGET_AVX2 || TARGET_AVX512VL);
11474 case MODE_TI:
11475 gcc_assert (TARGET_SSE2 || TARGET_AVX512VL);
11476 switch (<MODE>mode)
11477 {
11478 case V64QImode:
11479 case V32HImode:
11480 if (TARGET_AVX512F)
0607f34b 11481 {
e9b578bf 11482 tmp = "p<logic>";
11483 ssesuffix = "q";
11484 break;
11485 }
11486 case V32QImode:
11487 case V16HImode:
11488 case V16QImode:
11489 case V8HImode:
11490 if (TARGET_AVX512VL || TARGET_AVX2 || TARGET_SSE2)
11491 {
11492 tmp = "p<logic>";
11493 ssesuffix = TARGET_AVX512VL ? "q" : "";
0607f34b 11494 break;
11495 }
11496 default:
e9b578bf 11497 gcc_unreachable ();
0607f34b 11498 }
c4530783 11499 break;
11500
11501 case MODE_V8SF:
11502 gcc_assert (TARGET_AVX);
11503 case MODE_V4SF:
11504 gcc_assert (TARGET_SSE);
c4530783 11505 tmp = "<logic>ps";
e9b578bf 11506 ssesuffix = "";
c4530783 11507 break;
11508
11509 default:
11510 gcc_unreachable ();
11511 }
3a950715 11512
ba2558f8 11513 switch (which_alternative)
11514 {
11515 case 0:
11516 ops = "%s\t{%%2, %%0|%%0, %%2}";
e9b578bf 11517 snprintf (buf, sizeof (buf), ops, tmp);
ba2558f8 11518 break;
11519 case 1:
e9b578bf 11520 ops = "v%s%s\t{%%2, %%1, %%0|%%0, %%1, %%2}";
11521 snprintf (buf, sizeof (buf), ops, tmp, ssesuffix);
ba2558f8 11522 break;
11523 default:
11524 gcc_unreachable ();
11525 }
ed30e0a6 11526
ba2558f8 11527 return buf;
11528}
11529 [(set_attr "isa" "noavx,avx")
11530 (set_attr "type" "sselog")
11531 (set (attr "prefix_data16")
11532 (if_then_else
11533 (and (eq_attr "alternative" "0")
11534 (eq_attr "mode" "TI"))
11535 (const_string "1")
11536 (const_string "*")))
5220cab6 11537 (set_attr "prefix" "<mask_prefix3>")
ba2558f8 11538 (set (attr "mode")
7d460314 11539 (cond [(and (match_test "<MODE_SIZE> == 16")
11540 (match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL"))
b1756286 11541 (const_string "<ssePSmode>")
11542 (match_test "TARGET_AVX2")
11543 (const_string "<sseinsnmode>")
11544 (match_test "TARGET_AVX")
11545 (if_then_else
ca94bc0d 11546 (match_test "<MODE_SIZE> > 16")
b1756286 11547 (const_string "V8SF")
11548 (const_string "<sseinsnmode>"))
11549 (ior (not (match_test "TARGET_SSE2"))
11550 (match_test "optimize_function_for_size_p (cfun)"))
11551 (const_string "V4SF")
11552 ]
11553 (const_string "<sseinsnmode>")))])
ba2558f8 11554
6b76cef2 11555(define_insn "<avx512>_testm<mode>3<mask_scalar_merge_name>"
a31e7f46 11556 [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=Yk")
d2ff59d6 11557 (unspec:<avx512fmaskmode>
6b76cef2 11558 [(match_operand:VI12_AVX512VL 1 "register_operand" "v")
11559 (match_operand:VI12_AVX512VL 2 "nonimmediate_operand" "vm")]
11560 UNSPEC_TESTM))]
11561 "TARGET_AVX512BW"
11562 "vptestm<ssemodesuffix>\t{%2, %1, %0<mask_scalar_merge_operand3>|%0<mask_scalar_merge_operand3>, %1, %2}"
11563 [(set_attr "prefix" "evex")
11564 (set_attr "mode" "<sseinsnmode>")])
11565
11566(define_insn "<avx512>_testm<mode>3<mask_scalar_merge_name>"
11567 [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=Yk")
11568 (unspec:<avx512fmaskmode>
11569 [(match_operand:VI48_AVX512VL 1 "register_operand" "v")
11570 (match_operand:VI48_AVX512VL 2 "nonimmediate_operand" "vm")]
d2ff59d6 11571 UNSPEC_TESTM))]
11572 "TARGET_AVX512F"
c3d9b089 11573 "vptestm<ssemodesuffix>\t{%2, %1, %0<mask_scalar_merge_operand3>|%0<mask_scalar_merge_operand3>, %1, %2}"
d2ff59d6 11574 [(set_attr "prefix" "evex")
11575 (set_attr "mode" "<sseinsnmode>")])
11576
6b76cef2 11577(define_insn "<avx512>_testnm<mode>3<mask_scalar_merge_name>"
11578 [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=Yk")
11579 (unspec:<avx512fmaskmode>
11580 [(match_operand:VI12_AVX512VL 1 "register_operand" "v")
11581 (match_operand:VI12_AVX512VL 2 "nonimmediate_operand" "vm")]
11582 UNSPEC_TESTNM))]
11583 "TARGET_AVX512BW"
11584 "vptestnm<ssemodesuffix>\t{%2, %1, %0<mask_scalar_merge_operand3>|%0<mask_scalar_merge_operand3>, %1, %2}"
11585 [(set_attr "prefix" "evex")
11586 (set_attr "mode" "<sseinsnmode>")])
11587
11588(define_insn "<avx512>_testnm<mode>3<mask_scalar_merge_name>"
a31e7f46 11589 [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=Yk")
d2ff59d6 11590 (unspec:<avx512fmaskmode>
6b76cef2 11591 [(match_operand:VI48_AVX512VL 1 "register_operand" "v")
11592 (match_operand:VI48_AVX512VL 2 "nonimmediate_operand" "vm")]
d2ff59d6 11593 UNSPEC_TESTNM))]
f46a34a6 11594 "TARGET_AVX512F"
11595 "vptestnm<ssemodesuffix>\t{%2, %1, %0<mask_scalar_merge_operand3>|%0<mask_scalar_merge_operand3>, %1, %2}"
d2ff59d6 11596 [(set_attr "prefix" "evex")
11597 (set_attr "mode" "<sseinsnmode>")])
11598
5802c0cb 11599;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
11600;;
11601;; Parallel integral element swizzling
11602;;
11603;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
11604
b6fc7168 11605(define_expand "vec_pack_trunc_<mode>"
abd4f58b 11606 [(match_operand:<ssepackmode> 0 "register_operand")
8f83f53e 11607 (match_operand:VI248_AVX2_8_AVX512F_24_AVX512BW 1 "register_operand")
11608 (match_operand:VI248_AVX2_8_AVX512F_24_AVX512BW 2 "register_operand")]
c6c91d61 11609 "TARGET_SSE2"
11610{
b6fc7168 11611 rtx op1 = gen_lowpart (<ssepackmode>mode, operands[1]);
11612 rtx op2 = gen_lowpart (<ssepackmode>mode, operands[2]);
e2b81403 11613 ix86_expand_vec_extract_even_odd (operands[0], op1, op2, 0);
c6c91d61 11614 DONE;
11615})
11616
0852690b 11617(define_expand "vec_pack_trunc_qi"
11618 [(set (match_operand:HI 0 ("register_operand"))
11619 (ior:HI (ashift:HI (zero_extend:HI (match_operand:QI 1 ("register_operand")))
11620 (const_int 8))
11621 (zero_extend:HI (match_operand:QI 2 ("register_operand")))))]
11622 "TARGET_AVX512F")
11623
11624(define_expand "vec_pack_trunc_<mode>"
11625 [(set (match_operand:<DOUBLEMASKMODE> 0 ("register_operand"))
11626 (ior:<DOUBLEMASKMODE> (ashift:<DOUBLEMASKMODE> (zero_extend:<DOUBLEMASKMODE> (match_operand:SWI24 1 ("register_operand")))
11627 (match_dup 3))
11628 (zero_extend:<DOUBLEMASKMODE> (match_operand:SWI24 2 ("register_operand")))))]
11629 "TARGET_AVX512BW"
11630{
11631 operands[3] = GEN_INT (GET_MODE_BITSIZE (<MODE>mode));
11632})
11633
d8d386d2 11634(define_insn "<sse2_avx2>_packsswb<mask_name>"
201f262d 11635 [(set (match_operand:VI1_AVX512 0 "register_operand" "=x,x")
11636 (vec_concat:VI1_AVX512
5deb404d 11637 (ss_truncate:<ssehalfvecmode>
d8d386d2 11638 (match_operand:<sseunpackmode> 1 "register_operand" "0,v"))
5deb404d 11639 (ss_truncate:<ssehalfvecmode>
d8d386d2 11640 (match_operand:<sseunpackmode> 2 "nonimmediate_operand" "xm,vm"))))]
11641 "TARGET_SSE2 && <mask_mode512bit_condition> && <mask_avx512bw_condition>"
b11a97b3 11642 "@
11643 packsswb\t{%2, %0|%0, %2}
d8d386d2 11644 vpacksswb\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
b11a97b3 11645 [(set_attr "isa" "noavx,avx")
11646 (set_attr "type" "sselog")
11647 (set_attr "prefix_data16" "1,*")
d8d386d2 11648 (set_attr "prefix" "orig,maybe_evex")
5deb404d 11649 (set_attr "mode" "<sseinsnmode>")])
ed30e0a6 11650
2d71b728 11651(define_insn "<sse2_avx2>_packssdw<mask_name>"
11652 [(set (match_operand:VI2_AVX2 0 "register_operand" "=x,v")
5deb404d 11653 (vec_concat:VI2_AVX2
11654 (ss_truncate:<ssehalfvecmode>
2d71b728 11655 (match_operand:<sseunpackmode> 1 "register_operand" "0,v"))
5deb404d 11656 (ss_truncate:<ssehalfvecmode>
2d71b728 11657 (match_operand:<sseunpackmode> 2 "nonimmediate_operand" "xm,vm"))))]
11658 "TARGET_SSE2 && <mask_mode512bit_condition> && <mask_avx512bw_condition>"
b11a97b3 11659 "@
11660 packssdw\t{%2, %0|%0, %2}
2d71b728 11661 vpackssdw\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
b11a97b3 11662 [(set_attr "isa" "noavx,avx")
11663 (set_attr "type" "sselog")
11664 (set_attr "prefix_data16" "1,*")
11665 (set_attr "prefix" "orig,vex")
5deb404d 11666 (set_attr "mode" "<sseinsnmode>")])
ed30e0a6 11667
d8d386d2 11668(define_insn "<sse2_avx2>_packuswb<mask_name>"
201f262d 11669 [(set (match_operand:VI1_AVX512 0 "register_operand" "=x,x")
11670 (vec_concat:VI1_AVX512
5deb404d 11671 (us_truncate:<ssehalfvecmode>
d8d386d2 11672 (match_operand:<sseunpackmode> 1 "register_operand" "0,v"))
5deb404d 11673 (us_truncate:<ssehalfvecmode>
d8d386d2 11674 (match_operand:<sseunpackmode> 2 "nonimmediate_operand" "xm,vm"))))]
11675 "TARGET_SSE2 && <mask_mode512bit_condition> && <mask_avx512bw_condition>"
b11a97b3 11676 "@
11677 packuswb\t{%2, %0|%0, %2}
d8d386d2 11678 vpackuswb\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
b11a97b3 11679 [(set_attr "isa" "noavx,avx")
11680 (set_attr "type" "sselog")
11681 (set_attr "prefix_data16" "1,*")
11682 (set_attr "prefix" "orig,vex")
5deb404d 11683 (set_attr "mode" "<sseinsnmode>")])
ed30e0a6 11684
8c409b91 11685(define_insn "avx512bw_interleave_highv64qi<mask_name>"
11686 [(set (match_operand:V64QI 0 "register_operand" "=v")
11687 (vec_select:V64QI
11688 (vec_concat:V128QI
11689 (match_operand:V64QI 1 "register_operand" "v")
11690 (match_operand:V64QI 2 "nonimmediate_operand" "vm"))
11691 (parallel [(const_int 8) (const_int 72)
11692 (const_int 9) (const_int 73)
11693 (const_int 10) (const_int 74)
11694 (const_int 11) (const_int 75)
11695 (const_int 12) (const_int 76)
11696 (const_int 13) (const_int 77)
11697 (const_int 14) (const_int 78)
11698 (const_int 15) (const_int 79)
11699 (const_int 24) (const_int 88)
11700 (const_int 25) (const_int 89)
11701 (const_int 26) (const_int 90)
11702 (const_int 27) (const_int 91)
11703 (const_int 28) (const_int 92)
11704 (const_int 29) (const_int 93)
11705 (const_int 30) (const_int 94)
11706 (const_int 31) (const_int 95)
11707 (const_int 40) (const_int 104)
11708 (const_int 41) (const_int 105)
11709 (const_int 42) (const_int 106)
11710 (const_int 43) (const_int 107)
11711 (const_int 44) (const_int 108)
11712 (const_int 45) (const_int 109)
11713 (const_int 46) (const_int 110)
11714 (const_int 47) (const_int 111)
11715 (const_int 56) (const_int 120)
11716 (const_int 57) (const_int 121)
11717 (const_int 58) (const_int 122)
11718 (const_int 59) (const_int 123)
11719 (const_int 60) (const_int 124)
11720 (const_int 61) (const_int 125)
11721 (const_int 62) (const_int 126)
11722 (const_int 63) (const_int 127)])))]
11723 "TARGET_AVX512BW"
11724 "vpunpckhbw\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
11725 [(set_attr "type" "sselog")
11726 (set_attr "prefix" "evex")
11727 (set_attr "mode" "XI")])
11728
11729(define_insn "avx2_interleave_highv32qi<mask_name>"
11730 [(set (match_operand:V32QI 0 "register_operand" "=v")
5deb404d 11731 (vec_select:V32QI
11732 (vec_concat:V64QI
8c409b91 11733 (match_operand:V32QI 1 "register_operand" "v")
11734 (match_operand:V32QI 2 "nonimmediate_operand" "vm"))
5deb404d 11735 (parallel [(const_int 8) (const_int 40)
11736 (const_int 9) (const_int 41)
11737 (const_int 10) (const_int 42)
11738 (const_int 11) (const_int 43)
11739 (const_int 12) (const_int 44)
11740 (const_int 13) (const_int 45)
11741 (const_int 14) (const_int 46)
11742 (const_int 15) (const_int 47)
11743 (const_int 24) (const_int 56)
11744 (const_int 25) (const_int 57)
11745 (const_int 26) (const_int 58)
11746 (const_int 27) (const_int 59)
11747 (const_int 28) (const_int 60)
11748 (const_int 29) (const_int 61)
11749 (const_int 30) (const_int 62)
a9e4de7b 11750 (const_int 31) (const_int 63)])))]
8c409b91 11751 "TARGET_AVX2 && <mask_avx512vl_condition>"
11752 "vpunpckhbw\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
5deb404d 11753 [(set_attr "type" "sselog")
8c409b91 11754 (set_attr "prefix" "<mask_prefix>")
5deb404d 11755 (set_attr "mode" "OI")])
11756
8c409b91 11757(define_insn "vec_interleave_highv16qi<mask_name>"
11758 [(set (match_operand:V16QI 0 "register_operand" "=x,v")
5802c0cb 11759 (vec_select:V16QI
11760 (vec_concat:V32QI
8c409b91 11761 (match_operand:V16QI 1 "register_operand" "0,v")
11762 (match_operand:V16QI 2 "nonimmediate_operand" "xm,vm"))
5802c0cb 11763 (parallel [(const_int 8) (const_int 24)
11764 (const_int 9) (const_int 25)
11765 (const_int 10) (const_int 26)
11766 (const_int 11) (const_int 27)
009b318f 11767 (const_int 12) (const_int 28)
5802c0cb 11768 (const_int 13) (const_int 29)
11769 (const_int 14) (const_int 30)
11770 (const_int 15) (const_int 31)])))]
8c409b91 11771 "TARGET_SSE2 && <mask_avx512vl_condition>"
b11a97b3 11772 "@
11773 punpckhbw\t{%2, %0|%0, %2}
8c409b91 11774 vpunpckhbw\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
b11a97b3 11775 [(set_attr "isa" "noavx,avx")
11776 (set_attr "type" "sselog")
11777 (set_attr "prefix_data16" "1,*")
8c409b91 11778 (set_attr "prefix" "orig,<mask_prefix>")
ed30e0a6 11779 (set_attr "mode" "TI")])
11780
8c409b91 11781(define_insn "avx512bw_interleave_lowv64qi<mask_name>"
11782 [(set (match_operand:V64QI 0 "register_operand" "=v")
11783 (vec_select:V64QI
11784 (vec_concat:V128QI
11785 (match_operand:V64QI 1 "register_operand" "v")
11786 (match_operand:V64QI 2 "nonimmediate_operand" "vm"))
11787 (parallel [(const_int 0) (const_int 64)
11788 (const_int 1) (const_int 65)
11789 (const_int 2) (const_int 66)
11790 (const_int 3) (const_int 67)
11791 (const_int 4) (const_int 68)
11792 (const_int 5) (const_int 69)
11793 (const_int 6) (const_int 70)
11794 (const_int 7) (const_int 71)
11795 (const_int 16) (const_int 80)
11796 (const_int 17) (const_int 81)
11797 (const_int 18) (const_int 82)
11798 (const_int 19) (const_int 83)
11799 (const_int 20) (const_int 84)
11800 (const_int 21) (const_int 85)
11801 (const_int 22) (const_int 86)
11802 (const_int 23) (const_int 87)
11803 (const_int 32) (const_int 96)
11804 (const_int 33) (const_int 97)
11805 (const_int 34) (const_int 98)
11806 (const_int 35) (const_int 99)
11807 (const_int 36) (const_int 100)
11808 (const_int 37) (const_int 101)
11809 (const_int 38) (const_int 102)
11810 (const_int 39) (const_int 103)
11811 (const_int 48) (const_int 112)
11812 (const_int 49) (const_int 113)
11813 (const_int 50) (const_int 114)
11814 (const_int 51) (const_int 115)
11815 (const_int 52) (const_int 116)
11816 (const_int 53) (const_int 117)
11817 (const_int 54) (const_int 118)
11818 (const_int 55) (const_int 119)])))]
11819 "TARGET_AVX512BW"
11820 "vpunpcklbw\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
11821 [(set_attr "type" "sselog")
11822 (set_attr "prefix" "evex")
11823 (set_attr "mode" "XI")])
11824
11825(define_insn "avx2_interleave_lowv32qi<mask_name>"
11826 [(set (match_operand:V32QI 0 "register_operand" "=v")
5deb404d 11827 (vec_select:V32QI
11828 (vec_concat:V64QI
8c409b91 11829 (match_operand:V32QI 1 "register_operand" "v")
11830 (match_operand:V32QI 2 "nonimmediate_operand" "vm"))
5deb404d 11831 (parallel [(const_int 0) (const_int 32)
11832 (const_int 1) (const_int 33)
11833 (const_int 2) (const_int 34)
11834 (const_int 3) (const_int 35)
11835 (const_int 4) (const_int 36)
11836 (const_int 5) (const_int 37)
11837 (const_int 6) (const_int 38)
11838 (const_int 7) (const_int 39)
5deb404d 11839 (const_int 16) (const_int 48)
11840 (const_int 17) (const_int 49)
11841 (const_int 18) (const_int 50)
11842 (const_int 19) (const_int 51)
11843 (const_int 20) (const_int 52)
11844 (const_int 21) (const_int 53)
11845 (const_int 22) (const_int 54)
11846 (const_int 23) (const_int 55)])))]
8c409b91 11847 "TARGET_AVX2 && <mask_avx512vl_condition> && <mask_avx512bw_condition>"
11848 "vpunpcklbw\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
5deb404d 11849 [(set_attr "type" "sselog")
8c409b91 11850 (set_attr "prefix" "maybe_vex")
5deb404d 11851 (set_attr "mode" "OI")])
11852
8c409b91 11853(define_insn "vec_interleave_lowv16qi<mask_name>"
11854 [(set (match_operand:V16QI 0 "register_operand" "=x,v")
5802c0cb 11855 (vec_select:V16QI
11856 (vec_concat:V32QI
8c409b91 11857 (match_operand:V16QI 1 "register_operand" "0,v")
11858 (match_operand:V16QI 2 "nonimmediate_operand" "xm,vm"))
5802c0cb 11859 (parallel [(const_int 0) (const_int 16)
11860 (const_int 1) (const_int 17)
11861 (const_int 2) (const_int 18)
11862 (const_int 3) (const_int 19)
11863 (const_int 4) (const_int 20)
11864 (const_int 5) (const_int 21)
11865 (const_int 6) (const_int 22)
11866 (const_int 7) (const_int 23)])))]
8c409b91 11867 "TARGET_SSE2 && <mask_avx512vl_condition> && <mask_avx512bw_condition>"
b11a97b3 11868 "@
11869 punpcklbw\t{%2, %0|%0, %2}
8c409b91 11870 vpunpcklbw\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
b11a97b3 11871 [(set_attr "isa" "noavx,avx")
11872 (set_attr "type" "sselog")
11873 (set_attr "prefix_data16" "1,*")
11874 (set_attr "prefix" "orig,vex")
ed30e0a6 11875 (set_attr "mode" "TI")])
11876
8c409b91 11877(define_insn "avx512bw_interleave_highv32hi<mask_name>"
11878 [(set (match_operand:V32HI 0 "register_operand" "=v")
11879 (vec_select:V32HI
11880 (vec_concat:V64HI
11881 (match_operand:V32HI 1 "register_operand" "v")
11882 (match_operand:V32HI 2 "nonimmediate_operand" "vm"))
11883 (parallel [(const_int 4) (const_int 36)
11884 (const_int 5) (const_int 37)
11885 (const_int 6) (const_int 38)
11886 (const_int 7) (const_int 39)
11887 (const_int 12) (const_int 44)
11888 (const_int 13) (const_int 45)
11889 (const_int 14) (const_int 46)
11890 (const_int 15) (const_int 47)
11891 (const_int 20) (const_int 52)
11892 (const_int 21) (const_int 53)
11893 (const_int 22) (const_int 54)
11894 (const_int 23) (const_int 55)
11895 (const_int 28) (const_int 60)
11896 (const_int 29) (const_int 61)
11897 (const_int 30) (const_int 62)
11898 (const_int 31) (const_int 63)])))]
11899 "TARGET_AVX512BW"
11900 "vpunpckhwd\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
11901 [(set_attr "type" "sselog")
11902 (set_attr "prefix" "evex")
11903 (set_attr "mode" "XI")])
11904
11905(define_insn "avx2_interleave_highv16hi<mask_name>"
11906 [(set (match_operand:V16HI 0 "register_operand" "=v")
5deb404d 11907 (vec_select:V16HI
11908 (vec_concat:V32HI
8c409b91 11909 (match_operand:V16HI 1 "register_operand" "v")
11910 (match_operand:V16HI 2 "nonimmediate_operand" "vm"))
5deb404d 11911 (parallel [(const_int 4) (const_int 20)
11912 (const_int 5) (const_int 21)
11913 (const_int 6) (const_int 22)
11914 (const_int 7) (const_int 23)
11915 (const_int 12) (const_int 28)
11916 (const_int 13) (const_int 29)
11917 (const_int 14) (const_int 30)
11918 (const_int 15) (const_int 31)])))]
8c409b91 11919 "TARGET_AVX2 && <mask_avx512vl_condition> && <mask_avx512bw_condition>"
11920 "vpunpckhwd\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
5deb404d 11921 [(set_attr "type" "sselog")
8c409b91 11922 (set_attr "prefix" "maybe_evex")
5deb404d 11923 (set_attr "mode" "OI")])
11924
8c409b91 11925(define_insn "vec_interleave_highv8hi<mask_name>"
11926 [(set (match_operand:V8HI 0 "register_operand" "=x,v")
5802c0cb 11927 (vec_select:V8HI
11928 (vec_concat:V16HI
8c409b91 11929 (match_operand:V8HI 1 "register_operand" "0,v")
11930 (match_operand:V8HI 2 "nonimmediate_operand" "xm,vm"))
5802c0cb 11931 (parallel [(const_int 4) (const_int 12)
11932 (const_int 5) (const_int 13)
11933 (const_int 6) (const_int 14)
11934 (const_int 7) (const_int 15)])))]
8c409b91 11935 "TARGET_SSE2 && <mask_avx512vl_condition> && <mask_avx512bw_condition>"
b11a97b3 11936 "@
11937 punpckhwd\t{%2, %0|%0, %2}
8c409b91 11938 vpunpckhwd\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
b11a97b3 11939 [(set_attr "isa" "noavx,avx")
11940 (set_attr "type" "sselog")
11941 (set_attr "prefix_data16" "1,*")
8c409b91 11942 (set_attr "prefix" "orig,maybe_vex")
ed30e0a6 11943 (set_attr "mode" "TI")])
11944
8c409b91 11945(define_insn "<mask_codefor>avx512bw_interleave_lowv32hi<mask_name>"
11946 [(set (match_operand:V32HI 0 "register_operand" "=v")
11947 (vec_select:V32HI
11948 (vec_concat:V64HI
11949 (match_operand:V32HI 1 "register_operand" "v")
11950 (match_operand:V32HI 2 "nonimmediate_operand" "vm"))
11951 (parallel [(const_int 0) (const_int 32)
11952 (const_int 1) (const_int 33)
11953 (const_int 2) (const_int 34)
11954 (const_int 3) (const_int 35)
11955 (const_int 8) (const_int 40)
11956 (const_int 9) (const_int 41)
11957 (const_int 10) (const_int 42)
11958 (const_int 11) (const_int 43)
11959 (const_int 16) (const_int 48)
11960 (const_int 17) (const_int 49)
11961 (const_int 18) (const_int 50)
11962 (const_int 19) (const_int 51)
11963 (const_int 24) (const_int 56)
11964 (const_int 25) (const_int 57)
11965 (const_int 26) (const_int 58)
11966 (const_int 27) (const_int 59)])))]
11967 "TARGET_AVX512BW"
11968 "vpunpcklwd\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
11969 [(set_attr "type" "sselog")
11970 (set_attr "prefix" "evex")
11971 (set_attr "mode" "XI")])
11972
11973(define_insn "avx2_interleave_lowv16hi<mask_name>"
11974 [(set (match_operand:V16HI 0 "register_operand" "=v")
5deb404d 11975 (vec_select:V16HI
11976 (vec_concat:V32HI
8c409b91 11977 (match_operand:V16HI 1 "register_operand" "v")
11978 (match_operand:V16HI 2 "nonimmediate_operand" "vm"))
5deb404d 11979 (parallel [(const_int 0) (const_int 16)
11980 (const_int 1) (const_int 17)
11981 (const_int 2) (const_int 18)
11982 (const_int 3) (const_int 19)
11983 (const_int 8) (const_int 24)
11984 (const_int 9) (const_int 25)
11985 (const_int 10) (const_int 26)
11986 (const_int 11) (const_int 27)])))]
8c409b91 11987 "TARGET_AVX2 && <mask_avx512vl_condition> && <mask_avx512bw_condition>"
11988 "vpunpcklwd\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
5deb404d 11989 [(set_attr "type" "sselog")
8c409b91 11990 (set_attr "prefix" "maybe_evex")
5deb404d 11991 (set_attr "mode" "OI")])
11992
8c409b91 11993(define_insn "vec_interleave_lowv8hi<mask_name>"
11994 [(set (match_operand:V8HI 0 "register_operand" "=x,v")
5802c0cb 11995 (vec_select:V8HI
11996 (vec_concat:V16HI
8c409b91 11997 (match_operand:V8HI 1 "register_operand" "0,v")
11998 (match_operand:V8HI 2 "nonimmediate_operand" "xm,vm"))
5802c0cb 11999 (parallel [(const_int 0) (const_int 8)
12000 (const_int 1) (const_int 9)
12001 (const_int 2) (const_int 10)
12002 (const_int 3) (const_int 11)])))]
8c409b91 12003 "TARGET_SSE2 && <mask_avx512vl_condition> && <mask_avx512bw_condition>"
b11a97b3 12004 "@
12005 punpcklwd\t{%2, %0|%0, %2}
8c409b91 12006 vpunpcklwd\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
b11a97b3 12007 [(set_attr "isa" "noavx,avx")
12008 (set_attr "type" "sselog")
12009 (set_attr "prefix_data16" "1,*")
8c409b91 12010 (set_attr "prefix" "orig,maybe_evex")
ed30e0a6 12011 (set_attr "mode" "TI")])
12012
8c409b91 12013(define_insn "avx2_interleave_highv8si<mask_name>"
12014 [(set (match_operand:V8SI 0 "register_operand" "=v")
5deb404d 12015 (vec_select:V8SI
12016 (vec_concat:V16SI
8c409b91 12017 (match_operand:V8SI 1 "register_operand" "v")
12018 (match_operand:V8SI 2 "nonimmediate_operand" "vm"))
5deb404d 12019 (parallel [(const_int 2) (const_int 10)
12020 (const_int 3) (const_int 11)
12021 (const_int 6) (const_int 14)
12022 (const_int 7) (const_int 15)])))]
8c409b91 12023 "TARGET_AVX2 && <mask_avx512vl_condition>"
12024 "vpunpckhdq\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
5deb404d 12025 [(set_attr "type" "sselog")
8c409b91 12026 (set_attr "prefix" "maybe_evex")
5deb404d 12027 (set_attr "mode" "OI")])
12028
5220cab6 12029(define_insn "<mask_codefor>avx512f_interleave_highv16si<mask_name>"
697a43f8 12030 [(set (match_operand:V16SI 0 "register_operand" "=v")
12031 (vec_select:V16SI
12032 (vec_concat:V32SI
12033 (match_operand:V16SI 1 "register_operand" "v")
12034 (match_operand:V16SI 2 "nonimmediate_operand" "vm"))
12035 (parallel [(const_int 2) (const_int 18)
12036 (const_int 3) (const_int 19)
12037 (const_int 6) (const_int 22)
12038 (const_int 7) (const_int 23)
12039 (const_int 10) (const_int 26)
12040 (const_int 11) (const_int 27)
12041 (const_int 14) (const_int 30)
12042 (const_int 15) (const_int 31)])))]
12043 "TARGET_AVX512F"
5220cab6 12044 "vpunpckhdq\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
697a43f8 12045 [(set_attr "type" "sselog")
12046 (set_attr "prefix" "evex")
12047 (set_attr "mode" "XI")])
12048
12049
8c409b91 12050(define_insn "vec_interleave_highv4si<mask_name>"
12051 [(set (match_operand:V4SI 0 "register_operand" "=x,v")
5802c0cb 12052 (vec_select:V4SI
12053 (vec_concat:V8SI
8c409b91 12054 (match_operand:V4SI 1 "register_operand" "0,v")
12055 (match_operand:V4SI 2 "nonimmediate_operand" "xm,vm"))
5802c0cb 12056 (parallel [(const_int 2) (const_int 6)
12057 (const_int 3) (const_int 7)])))]
8c409b91 12058 "TARGET_SSE2 && <mask_avx512vl_condition>"
b11a97b3 12059 "@
12060 punpckhdq\t{%2, %0|%0, %2}
8c409b91 12061 vpunpckhdq\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
b11a97b3 12062 [(set_attr "isa" "noavx,avx")
12063 (set_attr "type" "sselog")
12064 (set_attr "prefix_data16" "1,*")
8c409b91 12065 (set_attr "prefix" "orig,maybe_vex")
ed30e0a6 12066 (set_attr "mode" "TI")])
12067
8c409b91 12068(define_insn "avx2_interleave_lowv8si<mask_name>"
12069 [(set (match_operand:V8SI 0 "register_operand" "=v")
5deb404d 12070 (vec_select:V8SI
12071 (vec_concat:V16SI
8c409b91 12072 (match_operand:V8SI 1 "register_operand" "v")
12073 (match_operand:V8SI 2 "nonimmediate_operand" "vm"))
5deb404d 12074 (parallel [(const_int 0) (const_int 8)
12075 (const_int 1) (const_int 9)
12076 (const_int 4) (const_int 12)
12077 (const_int 5) (const_int 13)])))]
8c409b91 12078 "TARGET_AVX2 && <mask_avx512vl_condition>"
12079 "vpunpckldq\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
5deb404d 12080 [(set_attr "type" "sselog")
8c409b91 12081 (set_attr "prefix" "maybe_evex")
5deb404d 12082 (set_attr "mode" "OI")])
12083
5220cab6 12084(define_insn "<mask_codefor>avx512f_interleave_lowv16si<mask_name>"
697a43f8 12085 [(set (match_operand:V16SI 0 "register_operand" "=v")
12086 (vec_select:V16SI
12087 (vec_concat:V32SI
12088 (match_operand:V16SI 1 "register_operand" "v")
12089 (match_operand:V16SI 2 "nonimmediate_operand" "vm"))
12090 (parallel [(const_int 0) (const_int 16)
12091 (const_int 1) (const_int 17)
12092 (const_int 4) (const_int 20)
12093 (const_int 5) (const_int 21)
12094 (const_int 8) (const_int 24)
12095 (const_int 9) (const_int 25)
12096 (const_int 12) (const_int 28)
12097 (const_int 13) (const_int 29)])))]
12098 "TARGET_AVX512F"
5220cab6 12099 "vpunpckldq\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
697a43f8 12100 [(set_attr "type" "sselog")
12101 (set_attr "prefix" "evex")
12102 (set_attr "mode" "XI")])
12103
8c409b91 12104(define_insn "vec_interleave_lowv4si<mask_name>"
12105 [(set (match_operand:V4SI 0 "register_operand" "=x,v")
5802c0cb 12106 (vec_select:V4SI
12107 (vec_concat:V8SI
8c409b91 12108 (match_operand:V4SI 1 "register_operand" "0,v")
12109 (match_operand:V4SI 2 "nonimmediate_operand" "xm,vm"))
5802c0cb 12110 (parallel [(const_int 0) (const_int 4)
12111 (const_int 1) (const_int 5)])))]
8c409b91 12112 "TARGET_SSE2 && <mask_avx512vl_condition>"
b11a97b3 12113 "@
12114 punpckldq\t{%2, %0|%0, %2}
8c409b91 12115 vpunpckldq\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
b11a97b3 12116 [(set_attr "isa" "noavx,avx")
12117 (set_attr "type" "sselog")
12118 (set_attr "prefix_data16" "1,*")
12119 (set_attr "prefix" "orig,vex")
ed30e0a6 12120 (set_attr "mode" "TI")])
12121
c241ed0e 12122(define_expand "vec_interleave_high<mode>"
12123 [(match_operand:VI_256 0 "register_operand" "=x")
12124 (match_operand:VI_256 1 "register_operand" "x")
12125 (match_operand:VI_256 2 "nonimmediate_operand" "xm")]
12126 "TARGET_AVX2"
12127{
12128 rtx t1 = gen_reg_rtx (<MODE>mode);
12129 rtx t2 = gen_reg_rtx (<MODE>mode);
09e640e6 12130 rtx t3 = gen_reg_rtx (V4DImode);
c241ed0e 12131 emit_insn (gen_avx2_interleave_low<mode> (t1, operands[1], operands[2]));
12132 emit_insn (gen_avx2_interleave_high<mode> (t2, operands[1], operands[2]));
09e640e6 12133 emit_insn (gen_avx2_permv2ti (t3, gen_lowpart (V4DImode, t1),
12134 gen_lowpart (V4DImode, t2),
12135 GEN_INT (1 + (3 << 4))));
12136 emit_move_insn (operands[0], gen_lowpart (<MODE>mode, t3));
c241ed0e 12137 DONE;
12138})
12139
12140(define_expand "vec_interleave_low<mode>"
12141 [(match_operand:VI_256 0 "register_operand" "=x")
12142 (match_operand:VI_256 1 "register_operand" "x")
12143 (match_operand:VI_256 2 "nonimmediate_operand" "xm")]
12144 "TARGET_AVX2"
12145{
12146 rtx t1 = gen_reg_rtx (<MODE>mode);
12147 rtx t2 = gen_reg_rtx (<MODE>mode);
09e640e6 12148 rtx t3 = gen_reg_rtx (V4DImode);
c241ed0e 12149 emit_insn (gen_avx2_interleave_low<mode> (t1, operands[1], operands[2]));
12150 emit_insn (gen_avx2_interleave_high<mode> (t2, operands[1], operands[2]));
09e640e6 12151 emit_insn (gen_avx2_permv2ti (t3, gen_lowpart (V4DImode, t1),
12152 gen_lowpart (V4DImode, t2),
12153 GEN_INT (0 + (2 << 4))));
12154 emit_move_insn (operands[0], gen_lowpart (<MODE>mode, t3));
c241ed0e 12155 DONE;
12156})
12157
d3d379e7 12158;; Modes handled by pinsr patterns.
12159(define_mode_iterator PINSR_MODE
12160 [(V16QI "TARGET_SSE4_1") V8HI
12161 (V4SI "TARGET_SSE4_1")
12162 (V2DI "TARGET_SSE4_1 && TARGET_64BIT")])
12163
12164(define_mode_attr sse2p4_1
12165 [(V16QI "sse4_1") (V8HI "sse2")
12166 (V4SI "sse4_1") (V2DI "sse4_1")])
12167
12168;; sse4_1_pinsrd must come before sse2_loadld since it is preferred.
12169(define_insn "<sse2p4_1>_pinsr<ssemodesuffix>"
12170 [(set (match_operand:PINSR_MODE 0 "register_operand" "=x,x,x,x")
12171 (vec_merge:PINSR_MODE
12172 (vec_duplicate:PINSR_MODE
12173 (match_operand:<ssescalarmode> 2 "nonimmediate_operand" "r,m,r,m"))
12174 (match_operand:PINSR_MODE 1 "register_operand" "0,0,x,x")
abd4f58b 12175 (match_operand:SI 3 "const_int_operand")))]
d3d379e7 12176 "TARGET_SSE2
12177 && ((unsigned) exact_log2 (INTVAL (operands[3]))
12178 < GET_MODE_NUNITS (<MODE>mode))"
5802c0cb 12179{
12180 operands[3] = GEN_INT (exact_log2 (INTVAL (operands[3])));
b11a97b3 12181
12182 switch (which_alternative)
12183 {
12184 case 0:
d3d379e7 12185 if (GET_MODE_SIZE (<ssescalarmode>mode) < GET_MODE_SIZE (SImode))
5deb404d 12186 return "pinsr<ssemodesuffix>\t{%3, %k2, %0|%0, %k2, %3}";
d3d379e7 12187 /* FALLTHRU */
b11a97b3 12188 case 1:
d3d379e7 12189 return "pinsr<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}";
b11a97b3 12190 case 2:
d3d379e7 12191 if (GET_MODE_SIZE (<ssescalarmode>mode) < GET_MODE_SIZE (SImode))
5deb404d 12192 return "vpinsr<ssemodesuffix>\t{%3, %k2, %1, %0|%0, %1, %k2, %3}";
d3d379e7 12193 /* FALLTHRU */
b11a97b3 12194 case 3:
d3d379e7 12195 return "vpinsr<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}";
b11a97b3 12196 default:
12197 gcc_unreachable ();
12198 }
5802c0cb 12199}
b11a97b3 12200 [(set_attr "isa" "noavx,noavx,avx,avx")
12201 (set_attr "type" "sselog")
d3d379e7 12202 (set (attr "prefix_rex")
12203 (if_then_else
6be3efec 12204 (and (not (match_test "TARGET_AVX"))
d3d379e7 12205 (eq (const_string "<MODE>mode") (const_string "V2DImode")))
12206 (const_string "1")
12207 (const_string "*")))
12208 (set (attr "prefix_data16")
12209 (if_then_else
6be3efec 12210 (and (not (match_test "TARGET_AVX"))
d3d379e7 12211 (eq (const_string "<MODE>mode") (const_string "V8HImode")))
12212 (const_string "1")
12213 (const_string "*")))
12214 (set (attr "prefix_extra")
12215 (if_then_else
6be3efec 12216 (and (not (match_test "TARGET_AVX"))
d3d379e7 12217 (eq (const_string "<MODE>mode") (const_string "V8HImode")))
12218 (const_string "*")
12219 (const_string "1")))
00a0e418 12220 (set_attr "length_immediate" "1")
b11a97b3 12221 (set_attr "prefix" "orig,orig,vex,vex")
5802c0cb 12222 (set_attr "mode" "TI")])
12223
4e303d3a 12224(define_expand "<extract_type>_vinsert<shuffletype><extract_suf>_mask"
12225 [(match_operand:AVX512_VEC 0 "register_operand")
12226 (match_operand:AVX512_VEC 1 "register_operand")
5220cab6 12227 (match_operand:<ssequartermode> 2 "nonimmediate_operand")
12228 (match_operand:SI 3 "const_0_to_3_operand")
4e303d3a 12229 (match_operand:AVX512_VEC 4 "register_operand")
5220cab6 12230 (match_operand:<avx512fmaskmode> 5 "register_operand")]
12231 "TARGET_AVX512F"
12232{
4e303d3a 12233 int mask,selector;
12234 mask = INTVAL (operands[3]);
6e256598 12235 selector = GET_MODE_UNIT_SIZE (<MODE>mode) == 4 ?
4e303d3a 12236 0xFFFF ^ (0xF000 >> mask * 4)
12237 : 0xFF ^ (0xC0 >> mask * 2);
12238 emit_insn (gen_<extract_type>_vinsert<shuffletype><extract_suf>_1_mask
12239 (operands[0], operands[1], operands[2], GEN_INT (selector),
12240 operands[4], operands[5]));
5220cab6 12241 DONE;
5220cab6 12242})
12243
4e303d3a 12244(define_insn "<mask_codefor><extract_type>_vinsert<shuffletype><extract_suf>_1<mask_name>"
12245 [(set (match_operand:AVX512_VEC 0 "register_operand" "=v")
12246 (vec_merge:AVX512_VEC
12247 (match_operand:AVX512_VEC 1 "register_operand" "v")
12248 (vec_duplicate:AVX512_VEC
8e9989b0 12249 (match_operand:<ssequartermode> 2 "nonimmediate_operand" "vm"))
12250 (match_operand:SI 3 "const_int_operand" "n")))]
12251 "TARGET_AVX512F"
12252{
12253 int mask;
4e303d3a 12254 int selector = INTVAL (operands[3]);
12255
12256 if (selector == 0xFFF || selector == 0x3F)
12257 mask = 0;
12258 else if ( selector == 0xF0FF || selector == 0xCF)
12259 mask = 1;
12260 else if ( selector == 0xFF0F || selector == 0xF3)
12261 mask = 2;
12262 else if ( selector == 0xFFF0 || selector == 0xFC)
12263 mask = 3;
8e9989b0 12264 else
12265 gcc_unreachable ();
12266
12267 operands[3] = GEN_INT (mask);
12268
4e303d3a 12269 return "vinsert<shuffletype><extract_suf>\t{%3, %2, %1, %0<mask_operand4>|%0<mask_operand4>, %1, %2, %3}";
8e9989b0 12270}
12271 [(set_attr "type" "sselog")
12272 (set_attr "length_immediate" "1")
12273 (set_attr "prefix" "evex")
12274 (set_attr "mode" "<sseinsnmode>")])
12275
4e303d3a 12276(define_expand "<extract_type_2>_vinsert<shuffletype><extract_suf_2>_mask"
12277 [(match_operand:AVX512_VEC_2 0 "register_operand")
12278 (match_operand:AVX512_VEC_2 1 "register_operand")
5220cab6 12279 (match_operand:<ssehalfvecmode> 2 "nonimmediate_operand")
12280 (match_operand:SI 3 "const_0_to_1_operand")
4e303d3a 12281 (match_operand:AVX512_VEC_2 4 "register_operand")
5220cab6 12282 (match_operand:<avx512fmaskmode> 5 "register_operand")]
12283 "TARGET_AVX512F"
12284{
12285 int mask = INTVAL (operands[3]);
12286 if (mask == 0)
12287 emit_insn (gen_vec_set_lo_<mode>_mask
12288 (operands[0], operands[1], operands[2],
12289 operands[4], operands[5]));
12290 else
12291 emit_insn (gen_vec_set_hi_<mode>_mask
12292 (operands[0], operands[1], operands[2],
12293 operands[4], operands[5]));
12294 DONE;
12295})
12296
4e303d3a 12297(define_insn "vec_set_lo_<mode><mask_name>"
12298 [(set (match_operand:V16FI 0 "register_operand" "=v")
12299 (vec_concat:V16FI
12300 (match_operand:<ssehalfvecmode> 2 "nonimmediate_operand" "vm")
12301 (vec_select:<ssehalfvecmode>
12302 (match_operand:V16FI 1 "register_operand" "v")
12303 (parallel [(const_int 8) (const_int 9)
12304 (const_int 10) (const_int 11)
12305 (const_int 12) (const_int 13)
12306 (const_int 14) (const_int 15)]))))]
12307 "TARGET_AVX512DQ"
12308 "vinsert<shuffletype>32x8\t{$0x0, %2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2, $0x0}"
12309 [(set_attr "type" "sselog")
12310 (set_attr "length_immediate" "1")
12311 (set_attr "prefix" "evex")
12312 (set_attr "mode" "<sseinsnmode>")])
12313
12314(define_insn "vec_set_hi_<mode><mask_name>"
12315 [(set (match_operand:V16FI 0 "register_operand" "=v")
12316 (vec_concat:V16FI
12317 (match_operand:<ssehalfvecmode> 2 "nonimmediate_operand" "vm")
12318 (vec_select:<ssehalfvecmode>
12319 (match_operand:V16FI 1 "register_operand" "v")
12320 (parallel [(const_int 0) (const_int 1)
12321 (const_int 2) (const_int 3)
12322 (const_int 4) (const_int 5)
12323 (const_int 6) (const_int 7)]))))]
12324 "TARGET_AVX512DQ"
12325 "vinsert<shuffletype>32x8\t{$0x1, %2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2, $0x1}"
12326 [(set_attr "type" "sselog")
12327 (set_attr "length_immediate" "1")
12328 (set_attr "prefix" "evex")
12329 (set_attr "mode" "<sseinsnmode>")])
12330
5220cab6 12331(define_insn "vec_set_lo_<mode><mask_name>"
8e9989b0 12332 [(set (match_operand:V8FI 0 "register_operand" "=v")
12333 (vec_concat:V8FI
12334 (match_operand:<ssehalfvecmode> 2 "nonimmediate_operand" "vm")
12335 (vec_select:<ssehalfvecmode>
12336 (match_operand:V8FI 1 "register_operand" "v")
12337 (parallel [(const_int 4) (const_int 5)
12338 (const_int 6) (const_int 7)]))))]
12339 "TARGET_AVX512F"
5220cab6 12340 "vinsert<shuffletype>64x4\t{$0x0, %2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2, $0x0}"
8e9989b0 12341 [(set_attr "type" "sselog")
12342 (set_attr "length_immediate" "1")
12343 (set_attr "prefix" "evex")
12344 (set_attr "mode" "XI")])
12345
5220cab6 12346(define_insn "vec_set_hi_<mode><mask_name>"
8e9989b0 12347 [(set (match_operand:V8FI 0 "register_operand" "=v")
12348 (vec_concat:V8FI
12349 (match_operand:<ssehalfvecmode> 2 "nonimmediate_operand" "vm")
12350 (vec_select:<ssehalfvecmode>
12351 (match_operand:V8FI 1 "register_operand" "v")
12352 (parallel [(const_int 0) (const_int 1)
12353 (const_int 2) (const_int 3)]))))]
12354 "TARGET_AVX512F"
5220cab6 12355 "vinsert<shuffletype>64x4\t{$0x1, %2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2, $0x1}"
8e9989b0 12356 [(set_attr "type" "sselog")
12357 (set_attr "length_immediate" "1")
12358 (set_attr "prefix" "evex")
12359 (set_attr "mode" "XI")])
12360
050e0a37 12361(define_expand "avx512dq_shuf_<shuffletype>64x2_mask"
12362 [(match_operand:VI8F_256 0 "register_operand")
12363 (match_operand:VI8F_256 1 "register_operand")
12364 (match_operand:VI8F_256 2 "nonimmediate_operand")
12365 (match_operand:SI 3 "const_0_to_3_operand")
12366 (match_operand:VI8F_256 4 "register_operand")
12367 (match_operand:QI 5 "register_operand")]
12368 "TARGET_AVX512DQ"
12369{
12370 int mask = INTVAL (operands[3]);
12371 emit_insn (gen_avx512dq_shuf_<shuffletype>64x2_1_mask
12372 (operands[0], operands[1], operands[2],
12373 GEN_INT (((mask >> 0) & 1) * 2 + 0),
12374 GEN_INT (((mask >> 0) & 1) * 2 + 1),
12375 GEN_INT (((mask >> 1) & 1) * 2 + 4),
12376 GEN_INT (((mask >> 1) & 1) * 2 + 5),
12377 operands[4], operands[5]));
12378 DONE;
12379})
12380
12381(define_insn "<mask_codefor>avx512dq_shuf_<shuffletype>64x2_1<mask_name>"
12382 [(set (match_operand:VI8F_256 0 "register_operand" "=v")
12383 (vec_select:VI8F_256
12384 (vec_concat:<ssedoublemode>
12385 (match_operand:VI8F_256 1 "register_operand" "v")
12386 (match_operand:VI8F_256 2 "nonimmediate_operand" "vm"))
12387 (parallel [(match_operand 3 "const_0_to_3_operand")
12388 (match_operand 4 "const_0_to_3_operand")
12389 (match_operand 5 "const_4_to_7_operand")
12390 (match_operand 6 "const_4_to_7_operand")])))]
12391 "TARGET_AVX512VL
12392 && (INTVAL (operands[3]) == (INTVAL (operands[4]) - 1)
12393 && INTVAL (operands[5]) == (INTVAL (operands[6]) - 1))"
12394{
12395 int mask;
12396 mask = INTVAL (operands[3]) / 2;
12397 mask |= (INTVAL (operands[5]) - 4) / 2 << 1;
12398 operands[3] = GEN_INT (mask);
12399 return "vshuf<shuffletype>64x2\t{%3, %2, %1, %0<mask_operand7>|%0<mask_operand7>, %1, %2, %3}";
12400}
12401 [(set_attr "type" "sselog")
12402 (set_attr "length_immediate" "1")
12403 (set_attr "prefix" "evex")
12404 (set_attr "mode" "XI")])
12405
5220cab6 12406(define_expand "avx512f_shuf_<shuffletype>64x2_mask"
12407 [(match_operand:V8FI 0 "register_operand")
12408 (match_operand:V8FI 1 "register_operand")
12409 (match_operand:V8FI 2 "nonimmediate_operand")
12410 (match_operand:SI 3 "const_0_to_255_operand")
12411 (match_operand:V8FI 4 "register_operand")
12412 (match_operand:QI 5 "register_operand")]
12413 "TARGET_AVX512F"
12414{
12415 int mask = INTVAL (operands[3]);
12416 emit_insn (gen_avx512f_shuf_<shuffletype>64x2_1_mask
12417 (operands[0], operands[1], operands[2],
12418 GEN_INT (((mask >> 0) & 3) * 2),
12419 GEN_INT (((mask >> 0) & 3) * 2 + 1),
12420 GEN_INT (((mask >> 2) & 3) * 2),
12421 GEN_INT (((mask >> 2) & 3) * 2 + 1),
12422 GEN_INT (((mask >> 4) & 3) * 2 + 8),
12423 GEN_INT (((mask >> 4) & 3) * 2 + 9),
12424 GEN_INT (((mask >> 6) & 3) * 2 + 8),
12425 GEN_INT (((mask >> 6) & 3) * 2 + 9),
12426 operands[4], operands[5]));
12427 DONE;
12428})
12429
12430(define_insn "avx512f_shuf_<shuffletype>64x2_1<mask_name>"
8e9989b0 12431 [(set (match_operand:V8FI 0 "register_operand" "=v")
12432 (vec_select:V8FI
12433 (vec_concat:<ssedoublemode>
12434 (match_operand:V8FI 1 "register_operand" "v")
12435 (match_operand:V8FI 2 "nonimmediate_operand" "vm"))
12436 (parallel [(match_operand 3 "const_0_to_7_operand")
12437 (match_operand 4 "const_0_to_7_operand")
12438 (match_operand 5 "const_0_to_7_operand")
12439 (match_operand 6 "const_0_to_7_operand")
12440 (match_operand 7 "const_8_to_15_operand")
12441 (match_operand 8 "const_8_to_15_operand")
12442 (match_operand 9 "const_8_to_15_operand")
12443 (match_operand 10 "const_8_to_15_operand")])))]
12444 "TARGET_AVX512F
12445 && (INTVAL (operands[3]) == (INTVAL (operands[4]) - 1)
12446 && INTVAL (operands[5]) == (INTVAL (operands[6]) - 1)
12447 && INTVAL (operands[7]) == (INTVAL (operands[8]) - 1)
12448 && INTVAL (operands[9]) == (INTVAL (operands[10]) - 1))"
12449{
12450 int mask;
12451 mask = INTVAL (operands[3]) / 2;
12452 mask |= INTVAL (operands[5]) / 2 << 2;
12453 mask |= (INTVAL (operands[7]) - 8) / 2 << 4;
12454 mask |= (INTVAL (operands[9]) - 8) / 2 << 6;
12455 operands[3] = GEN_INT (mask);
12456
5220cab6 12457 return "vshuf<shuffletype>64x2\t{%3, %2, %1, %0<mask_operand11>|%0<mask_operand11>, %1, %2, %3}";
8e9989b0 12458}
12459 [(set_attr "type" "sselog")
12460 (set_attr "length_immediate" "1")
12461 (set_attr "prefix" "evex")
12462 (set_attr "mode" "<sseinsnmode>")])
12463
050e0a37 12464(define_expand "avx512vl_shuf_<shuffletype>32x4_mask"
12465 [(match_operand:VI4F_256 0 "register_operand")
12466 (match_operand:VI4F_256 1 "register_operand")
12467 (match_operand:VI4F_256 2 "nonimmediate_operand")
12468 (match_operand:SI 3 "const_0_to_3_operand")
12469 (match_operand:VI4F_256 4 "register_operand")
12470 (match_operand:QI 5 "register_operand")]
12471 "TARGET_AVX512VL"
12472{
12473 int mask = INTVAL (operands[3]);
12474 emit_insn (gen_avx512vl_shuf_<shuffletype>32x4_1_mask
12475 (operands[0], operands[1], operands[2],
12476 GEN_INT (((mask >> 0) & 1) * 4 + 0),
12477 GEN_INT (((mask >> 0) & 1) * 4 + 1),
12478 GEN_INT (((mask >> 0) & 1) * 4 + 2),
12479 GEN_INT (((mask >> 0) & 1) * 4 + 3),
12480 GEN_INT (((mask >> 1) & 1) * 4 + 8),
12481 GEN_INT (((mask >> 1) & 1) * 4 + 9),
12482 GEN_INT (((mask >> 1) & 1) * 4 + 10),
12483 GEN_INT (((mask >> 1) & 1) * 4 + 11),
12484 operands[4], operands[5]));
12485 DONE;
12486})
12487
12488(define_insn "<mask_codefor>avx512vl_shuf_<shuffletype>32x4_1<mask_name>"
12489 [(set (match_operand:VI4F_256 0 "register_operand" "=v")
12490 (vec_select:VI4F_256
12491 (vec_concat:<ssedoublemode>
12492 (match_operand:VI4F_256 1 "register_operand" "v")
12493 (match_operand:VI4F_256 2 "nonimmediate_operand" "vm"))
12494 (parallel [(match_operand 3 "const_0_to_7_operand")
12495 (match_operand 4 "const_0_to_7_operand")
12496 (match_operand 5 "const_0_to_7_operand")
12497 (match_operand 6 "const_0_to_7_operand")
12498 (match_operand 7 "const_8_to_15_operand")
12499 (match_operand 8 "const_8_to_15_operand")
12500 (match_operand 9 "const_8_to_15_operand")
12501 (match_operand 10 "const_8_to_15_operand")])))]
12502 "TARGET_AVX512VL
12503 && (INTVAL (operands[3]) == (INTVAL (operands[4]) - 1)
12504 && INTVAL (operands[3]) == (INTVAL (operands[5]) - 2)
12505 && INTVAL (operands[3]) == (INTVAL (operands[6]) - 3)
12506 && INTVAL (operands[7]) == (INTVAL (operands[8]) - 1)
12507 && INTVAL (operands[7]) == (INTVAL (operands[9]) - 2)
12508 && INTVAL (operands[7]) == (INTVAL (operands[10]) - 3))"
12509{
12510 int mask;
12511 mask = INTVAL (operands[3]) / 4;
12512 mask |= (INTVAL (operands[7]) - 8) / 4 << 1;
12513 operands[3] = GEN_INT (mask);
12514
12515 return "vshuf<shuffletype>32x4\t{%3, %2, %1, %0<mask_operand11>|%0<mask_operand11>, %1, %2, %3}";
12516}
12517 [(set_attr "type" "sselog")
12518 (set_attr "length_immediate" "1")
12519 (set_attr "prefix" "evex")
12520 (set_attr "mode" "<sseinsnmode>")])
12521
5220cab6 12522(define_expand "avx512f_shuf_<shuffletype>32x4_mask"
12523 [(match_operand:V16FI 0 "register_operand")
12524 (match_operand:V16FI 1 "register_operand")
12525 (match_operand:V16FI 2 "nonimmediate_operand")
12526 (match_operand:SI 3 "const_0_to_255_operand")
12527 (match_operand:V16FI 4 "register_operand")
12528 (match_operand:HI 5 "register_operand")]
12529 "TARGET_AVX512F"
12530{
12531 int mask = INTVAL (operands[3]);
12532 emit_insn (gen_avx512f_shuf_<shuffletype>32x4_1_mask
12533 (operands[0], operands[1], operands[2],
12534 GEN_INT (((mask >> 0) & 3) * 4),
12535 GEN_INT (((mask >> 0) & 3) * 4 + 1),
12536 GEN_INT (((mask >> 0) & 3) * 4 + 2),
12537 GEN_INT (((mask >> 0) & 3) * 4 + 3),
12538 GEN_INT (((mask >> 2) & 3) * 4),
12539 GEN_INT (((mask >> 2) & 3) * 4 + 1),
12540 GEN_INT (((mask >> 2) & 3) * 4 + 2),
12541 GEN_INT (((mask >> 2) & 3) * 4 + 3),
12542 GEN_INT (((mask >> 4) & 3) * 4 + 16),
12543 GEN_INT (((mask >> 4) & 3) * 4 + 17),
12544 GEN_INT (((mask >> 4) & 3) * 4 + 18),
12545 GEN_INT (((mask >> 4) & 3) * 4 + 19),
12546 GEN_INT (((mask >> 6) & 3) * 4 + 16),
12547 GEN_INT (((mask >> 6) & 3) * 4 + 17),
12548 GEN_INT (((mask >> 6) & 3) * 4 + 18),
12549 GEN_INT (((mask >> 6) & 3) * 4 + 19),
12550 operands[4], operands[5]));
12551 DONE;
12552})
12553
12554(define_insn "avx512f_shuf_<shuffletype>32x4_1<mask_name>"
8e9989b0 12555 [(set (match_operand:V16FI 0 "register_operand" "=v")
12556 (vec_select:V16FI
12557 (vec_concat:<ssedoublemode>
12558 (match_operand:V16FI 1 "register_operand" "v")
12559 (match_operand:V16FI 2 "nonimmediate_operand" "vm"))
12560 (parallel [(match_operand 3 "const_0_to_15_operand")
12561 (match_operand 4 "const_0_to_15_operand")
12562 (match_operand 5 "const_0_to_15_operand")
12563 (match_operand 6 "const_0_to_15_operand")
12564 (match_operand 7 "const_0_to_15_operand")
12565 (match_operand 8 "const_0_to_15_operand")
12566 (match_operand 9 "const_0_to_15_operand")
12567 (match_operand 10 "const_0_to_15_operand")
12568 (match_operand 11 "const_16_to_31_operand")
12569 (match_operand 12 "const_16_to_31_operand")
12570 (match_operand 13 "const_16_to_31_operand")
12571 (match_operand 14 "const_16_to_31_operand")
12572 (match_operand 15 "const_16_to_31_operand")
12573 (match_operand 16 "const_16_to_31_operand")
12574 (match_operand 17 "const_16_to_31_operand")
12575 (match_operand 18 "const_16_to_31_operand")])))]
12576 "TARGET_AVX512F
12577 && (INTVAL (operands[3]) == (INTVAL (operands[4]) - 1)
12578 && INTVAL (operands[3]) == (INTVAL (operands[5]) - 2)
12579 && INTVAL (operands[3]) == (INTVAL (operands[6]) - 3)
12580 && INTVAL (operands[7]) == (INTVAL (operands[8]) - 1)
12581 && INTVAL (operands[7]) == (INTVAL (operands[9]) - 2)
12582 && INTVAL (operands[7]) == (INTVAL (operands[10]) - 3)
12583 && INTVAL (operands[11]) == (INTVAL (operands[12]) - 1)
12584 && INTVAL (operands[11]) == (INTVAL (operands[13]) - 2)
12585 && INTVAL (operands[11]) == (INTVAL (operands[14]) - 3)
12586 && INTVAL (operands[15]) == (INTVAL (operands[16]) - 1)
12587 && INTVAL (operands[15]) == (INTVAL (operands[17]) - 2)
12588 && INTVAL (operands[15]) == (INTVAL (operands[18]) - 3))"
12589{
12590 int mask;
12591 mask = INTVAL (operands[3]) / 4;
12592 mask |= INTVAL (operands[7]) / 4 << 2;
12593 mask |= (INTVAL (operands[11]) - 16) / 4 << 4;
12594 mask |= (INTVAL (operands[15]) - 16) / 4 << 6;
12595 operands[3] = GEN_INT (mask);
12596
5220cab6 12597 return "vshuf<shuffletype>32x4\t{%3, %2, %1, %0<mask_operand19>|%0<mask_operand19>, %1, %2, %3}";
8e9989b0 12598}
12599 [(set_attr "type" "sselog")
12600 (set_attr "length_immediate" "1")
12601 (set_attr "prefix" "evex")
12602 (set_attr "mode" "<sseinsnmode>")])
12603
5220cab6 12604(define_expand "avx512f_pshufdv3_mask"
12605 [(match_operand:V16SI 0 "register_operand")
12606 (match_operand:V16SI 1 "nonimmediate_operand")
12607 (match_operand:SI 2 "const_0_to_255_operand")
12608 (match_operand:V16SI 3 "register_operand")
12609 (match_operand:HI 4 "register_operand")]
12610 "TARGET_AVX512F"
12611{
12612 int mask = INTVAL (operands[2]);
12613 emit_insn (gen_avx512f_pshufd_1_mask (operands[0], operands[1],
12614 GEN_INT ((mask >> 0) & 3),
12615 GEN_INT ((mask >> 2) & 3),
12616 GEN_INT ((mask >> 4) & 3),
12617 GEN_INT ((mask >> 6) & 3),
12618 GEN_INT (((mask >> 0) & 3) + 4),
12619 GEN_INT (((mask >> 2) & 3) + 4),
12620 GEN_INT (((mask >> 4) & 3) + 4),
12621 GEN_INT (((mask >> 6) & 3) + 4),
12622 GEN_INT (((mask >> 0) & 3) + 8),
12623 GEN_INT (((mask >> 2) & 3) + 8),
12624 GEN_INT (((mask >> 4) & 3) + 8),
12625 GEN_INT (((mask >> 6) & 3) + 8),
12626 GEN_INT (((mask >> 0) & 3) + 12),
12627 GEN_INT (((mask >> 2) & 3) + 12),
12628 GEN_INT (((mask >> 4) & 3) + 12),
12629 GEN_INT (((mask >> 6) & 3) + 12),
12630 operands[3], operands[4]));
12631 DONE;
12632})
12633
12634(define_insn "avx512f_pshufd_1<mask_name>"
8e9989b0 12635 [(set (match_operand:V16SI 0 "register_operand" "=v")
12636 (vec_select:V16SI
12637 (match_operand:V16SI 1 "nonimmediate_operand" "vm")
12638 (parallel [(match_operand 2 "const_0_to_3_operand")
12639 (match_operand 3 "const_0_to_3_operand")
12640 (match_operand 4 "const_0_to_3_operand")
12641 (match_operand 5 "const_0_to_3_operand")
12642 (match_operand 6 "const_4_to_7_operand")
12643 (match_operand 7 "const_4_to_7_operand")
12644 (match_operand 8 "const_4_to_7_operand")
12645 (match_operand 9 "const_4_to_7_operand")
12646 (match_operand 10 "const_8_to_11_operand")
12647 (match_operand 11 "const_8_to_11_operand")
12648 (match_operand 12 "const_8_to_11_operand")
12649 (match_operand 13 "const_8_to_11_operand")
12650 (match_operand 14 "const_12_to_15_operand")
12651 (match_operand 15 "const_12_to_15_operand")
12652 (match_operand 16 "const_12_to_15_operand")
12653 (match_operand 17 "const_12_to_15_operand")])))]
12654 "TARGET_AVX512F
12655 && INTVAL (operands[2]) + 4 == INTVAL (operands[6])
12656 && INTVAL (operands[3]) + 4 == INTVAL (operands[7])
12657 && INTVAL (operands[4]) + 4 == INTVAL (operands[8])
12658 && INTVAL (operands[5]) + 4 == INTVAL (operands[9])
12659 && INTVAL (operands[2]) + 8 == INTVAL (operands[10])
12660 && INTVAL (operands[3]) + 8 == INTVAL (operands[11])
12661 && INTVAL (operands[4]) + 8 == INTVAL (operands[12])
12662 && INTVAL (operands[5]) + 8 == INTVAL (operands[13])
12663 && INTVAL (operands[2]) + 12 == INTVAL (operands[14])
12664 && INTVAL (operands[3]) + 12 == INTVAL (operands[15])
12665 && INTVAL (operands[4]) + 12 == INTVAL (operands[16])
12666 && INTVAL (operands[5]) + 12 == INTVAL (operands[17])"
12667{
12668 int mask = 0;
12669 mask |= INTVAL (operands[2]) << 0;
12670 mask |= INTVAL (operands[3]) << 2;
12671 mask |= INTVAL (operands[4]) << 4;
12672 mask |= INTVAL (operands[5]) << 6;
12673 operands[2] = GEN_INT (mask);
12674
5220cab6 12675 return "vpshufd\t{%2, %1, %0<mask_operand18>|%0<mask_operand18>, %1, %2}";
8e9989b0 12676}
12677 [(set_attr "type" "sselog1")
12678 (set_attr "prefix" "evex")
12679 (set_attr "length_immediate" "1")
12680 (set_attr "mode" "XI")])
12681
050e0a37 12682(define_expand "avx512vl_pshufdv3_mask"
12683 [(match_operand:V8SI 0 "register_operand")
12684 (match_operand:V8SI 1 "nonimmediate_operand")
12685 (match_operand:SI 2 "const_0_to_255_operand")
12686 (match_operand:V8SI 3 "register_operand")
12687 (match_operand:QI 4 "register_operand")]
12688 "TARGET_AVX512VL"
12689{
12690 int mask = INTVAL (operands[2]);
12691 emit_insn (gen_avx2_pshufd_1_mask (operands[0], operands[1],
12692 GEN_INT ((mask >> 0) & 3),
12693 GEN_INT ((mask >> 2) & 3),
12694 GEN_INT ((mask >> 4) & 3),
12695 GEN_INT ((mask >> 6) & 3),
12696 GEN_INT (((mask >> 0) & 3) + 4),
12697 GEN_INT (((mask >> 2) & 3) + 4),
12698 GEN_INT (((mask >> 4) & 3) + 4),
12699 GEN_INT (((mask >> 6) & 3) + 4),
12700 operands[3], operands[4]));
12701 DONE;
12702})
12703
5deb404d 12704(define_expand "avx2_pshufdv3"
abd4f58b 12705 [(match_operand:V8SI 0 "register_operand")
12706 (match_operand:V8SI 1 "nonimmediate_operand")
12707 (match_operand:SI 2 "const_0_to_255_operand")]
5deb404d 12708 "TARGET_AVX2"
12709{
12710 int mask = INTVAL (operands[2]);
12711 emit_insn (gen_avx2_pshufd_1 (operands[0], operands[1],
12712 GEN_INT ((mask >> 0) & 3),
12713 GEN_INT ((mask >> 2) & 3),
12714 GEN_INT ((mask >> 4) & 3),
a9e4de7b 12715 GEN_INT ((mask >> 6) & 3),
12716 GEN_INT (((mask >> 0) & 3) + 4),
12717 GEN_INT (((mask >> 2) & 3) + 4),
12718 GEN_INT (((mask >> 4) & 3) + 4),
12719 GEN_INT (((mask >> 6) & 3) + 4)));
5deb404d 12720 DONE;
12721})
12722
050e0a37 12723(define_insn "avx2_pshufd_1<mask_name>"
12724 [(set (match_operand:V8SI 0 "register_operand" "=v")
5deb404d 12725 (vec_select:V8SI
050e0a37 12726 (match_operand:V8SI 1 "nonimmediate_operand" "vm")
abd4f58b 12727 (parallel [(match_operand 2 "const_0_to_3_operand")
12728 (match_operand 3 "const_0_to_3_operand")
12729 (match_operand 4 "const_0_to_3_operand")
12730 (match_operand 5 "const_0_to_3_operand")
12731 (match_operand 6 "const_4_to_7_operand")
12732 (match_operand 7 "const_4_to_7_operand")
12733 (match_operand 8 "const_4_to_7_operand")
12734 (match_operand 9 "const_4_to_7_operand")])))]
a9e4de7b 12735 "TARGET_AVX2
050e0a37 12736 && <mask_avx512vl_condition>
a9e4de7b 12737 && INTVAL (operands[2]) + 4 == INTVAL (operands[6])
12738 && INTVAL (operands[3]) + 4 == INTVAL (operands[7])
12739 && INTVAL (operands[4]) + 4 == INTVAL (operands[8])
12740 && INTVAL (operands[5]) + 4 == INTVAL (operands[9])"
5deb404d 12741{
12742 int mask = 0;
12743 mask |= INTVAL (operands[2]) << 0;
12744 mask |= INTVAL (operands[3]) << 2;
12745 mask |= INTVAL (operands[4]) << 4;
12746 mask |= INTVAL (operands[5]) << 6;
12747 operands[2] = GEN_INT (mask);
12748
050e0a37 12749 return "vpshufd\t{%2, %1, %0<mask_operand10>|%0<mask_operand10>, %1, %2}";
5deb404d 12750}
12751 [(set_attr "type" "sselog1")
050e0a37 12752 (set_attr "prefix" "maybe_evex")
5deb404d 12753 (set_attr "length_immediate" "1")
12754 (set_attr "mode" "OI")])
12755
050e0a37 12756(define_expand "avx512vl_pshufd_mask"
12757 [(match_operand:V4SI 0 "register_operand")
12758 (match_operand:V4SI 1 "nonimmediate_operand")
12759 (match_operand:SI 2 "const_0_to_255_operand")
12760 (match_operand:V4SI 3 "register_operand")
12761 (match_operand:QI 4 "register_operand")]
12762 "TARGET_AVX512VL"
12763{
12764 int mask = INTVAL (operands[2]);
12765 emit_insn (gen_sse2_pshufd_1_mask (operands[0], operands[1],
12766 GEN_INT ((mask >> 0) & 3),
12767 GEN_INT ((mask >> 2) & 3),
12768 GEN_INT ((mask >> 4) & 3),
12769 GEN_INT ((mask >> 6) & 3),
12770 operands[3], operands[4]));
12771 DONE;
12772})
12773
5802c0cb 12774(define_expand "sse2_pshufd"
abd4f58b 12775 [(match_operand:V4SI 0 "register_operand")
12776 (match_operand:V4SI 1 "nonimmediate_operand")
12777 (match_operand:SI 2 "const_int_operand")]
5802c0cb 12778 "TARGET_SSE2"
12779{
12780 int mask = INTVAL (operands[2]);
12781 emit_insn (gen_sse2_pshufd_1 (operands[0], operands[1],
12782 GEN_INT ((mask >> 0) & 3),
12783 GEN_INT ((mask >> 2) & 3),
12784 GEN_INT ((mask >> 4) & 3),
12785 GEN_INT ((mask >> 6) & 3)));
12786 DONE;
12787})
12788
050e0a37 12789(define_insn "sse2_pshufd_1<mask_name>"
12790 [(set (match_operand:V4SI 0 "register_operand" "=v")
5802c0cb 12791 (vec_select:V4SI
050e0a37 12792 (match_operand:V4SI 1 "nonimmediate_operand" "vm")
abd4f58b 12793 (parallel [(match_operand 2 "const_0_to_3_operand")
12794 (match_operand 3 "const_0_to_3_operand")
12795 (match_operand 4 "const_0_to_3_operand")
12796 (match_operand 5 "const_0_to_3_operand")])))]
050e0a37 12797 "TARGET_SSE2 && <mask_avx512vl_condition>"
5802c0cb 12798{
12799 int mask = 0;
12800 mask |= INTVAL (operands[2]) << 0;
12801 mask |= INTVAL (operands[3]) << 2;
12802 mask |= INTVAL (operands[4]) << 4;
12803 mask |= INTVAL (operands[5]) << 6;
12804 operands[2] = GEN_INT (mask);
12805
050e0a37 12806 return "%vpshufd\t{%2, %1, %0<mask_operand6>|%0<mask_operand6>, %1, %2}";
5802c0cb 12807}
12808 [(set_attr "type" "sselog1")
1f346cbc 12809 (set_attr "prefix_data16" "1")
050e0a37 12810 (set_attr "prefix" "<mask_prefix2>")
00a0e418 12811 (set_attr "length_immediate" "1")
5802c0cb 12812 (set_attr "mode" "TI")])
12813
7b988cc3 12814(define_insn "<mask_codefor>avx512bw_pshuflwv32hi<mask_name>"
12815 [(set (match_operand:V32HI 0 "register_operand" "=v")
12816 (unspec:V32HI
12817 [(match_operand:V32HI 1 "nonimmediate_operand" "vm")
12818 (match_operand:SI 2 "const_0_to_255_operand" "n")]
12819 UNSPEC_PSHUFLW))]
12820 "TARGET_AVX512BW"
12821 "vpshuflw\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
12822 [(set_attr "type" "sselog")
12823 (set_attr "prefix" "evex")
12824 (set_attr "mode" "XI")])
12825
12826(define_expand "avx512vl_pshuflwv3_mask"
12827 [(match_operand:V16HI 0 "register_operand")
12828 (match_operand:V16HI 1 "nonimmediate_operand")
12829 (match_operand:SI 2 "const_0_to_255_operand")
12830 (match_operand:V16HI 3 "register_operand")
12831 (match_operand:HI 4 "register_operand")]
12832 "TARGET_AVX512VL && TARGET_AVX512BW"
12833{
12834 int mask = INTVAL (operands[2]);
12835 emit_insn (gen_avx2_pshuflw_1_mask (operands[0], operands[1],
12836 GEN_INT ((mask >> 0) & 3),
12837 GEN_INT ((mask >> 2) & 3),
12838 GEN_INT ((mask >> 4) & 3),
12839 GEN_INT ((mask >> 6) & 3),
12840 GEN_INT (((mask >> 0) & 3) + 8),
12841 GEN_INT (((mask >> 2) & 3) + 8),
12842 GEN_INT (((mask >> 4) & 3) + 8),
12843 GEN_INT (((mask >> 6) & 3) + 8),
12844 operands[3], operands[4]));
12845 DONE;
12846})
12847
5deb404d 12848(define_expand "avx2_pshuflwv3"
abd4f58b 12849 [(match_operand:V16HI 0 "register_operand")
12850 (match_operand:V16HI 1 "nonimmediate_operand")
12851 (match_operand:SI 2 "const_0_to_255_operand")]
5deb404d 12852 "TARGET_AVX2"
12853{
12854 int mask = INTVAL (operands[2]);
12855 emit_insn (gen_avx2_pshuflw_1 (operands[0], operands[1],
12856 GEN_INT ((mask >> 0) & 3),
12857 GEN_INT ((mask >> 2) & 3),
12858 GEN_INT ((mask >> 4) & 3),
a9e4de7b 12859 GEN_INT ((mask >> 6) & 3),
12860 GEN_INT (((mask >> 0) & 3) + 8),
12861 GEN_INT (((mask >> 2) & 3) + 8),
12862 GEN_INT (((mask >> 4) & 3) + 8),
12863 GEN_INT (((mask >> 6) & 3) + 8)));
5deb404d 12864 DONE;
12865})
12866
7b988cc3 12867(define_insn "avx2_pshuflw_1<mask_name>"
12868 [(set (match_operand:V16HI 0 "register_operand" "=v")
5deb404d 12869 (vec_select:V16HI
7b988cc3 12870 (match_operand:V16HI 1 "nonimmediate_operand" "vm")
abd4f58b 12871 (parallel [(match_operand 2 "const_0_to_3_operand")
12872 (match_operand 3 "const_0_to_3_operand")
12873 (match_operand 4 "const_0_to_3_operand")
12874 (match_operand 5 "const_0_to_3_operand")
5deb404d 12875 (const_int 4)
12876 (const_int 5)
12877 (const_int 6)
12878 (const_int 7)
abd4f58b 12879 (match_operand 6 "const_8_to_11_operand")
12880 (match_operand 7 "const_8_to_11_operand")
12881 (match_operand 8 "const_8_to_11_operand")
12882 (match_operand 9 "const_8_to_11_operand")
5deb404d 12883 (const_int 12)
12884 (const_int 13)
12885 (const_int 14)
12886 (const_int 15)])))]
a9e4de7b 12887 "TARGET_AVX2
7b988cc3 12888 && <mask_avx512bw_condition> && <mask_avx512vl_condition>
a9e4de7b 12889 && INTVAL (operands[2]) + 8 == INTVAL (operands[6])
12890 && INTVAL (operands[3]) + 8 == INTVAL (operands[7])
12891 && INTVAL (operands[4]) + 8 == INTVAL (operands[8])
12892 && INTVAL (operands[5]) + 8 == INTVAL (operands[9])"
5deb404d 12893{
12894 int mask = 0;
12895 mask |= INTVAL (operands[2]) << 0;
12896 mask |= INTVAL (operands[3]) << 2;
12897 mask |= INTVAL (operands[4]) << 4;
12898 mask |= INTVAL (operands[5]) << 6;
12899 operands[2] = GEN_INT (mask);
12900
7b988cc3 12901 return "vpshuflw\t{%2, %1, %0<mask_operand10>|%0<mask_operand10>, %1, %2}";
5deb404d 12902}
12903 [(set_attr "type" "sselog")
7b988cc3 12904 (set_attr "prefix" "maybe_evex")
5deb404d 12905 (set_attr "length_immediate" "1")
12906 (set_attr "mode" "OI")])
12907
7b988cc3 12908(define_expand "avx512vl_pshuflw_mask"
12909 [(match_operand:V8HI 0 "register_operand")
12910 (match_operand:V8HI 1 "nonimmediate_operand")
12911 (match_operand:SI 2 "const_0_to_255_operand")
12912 (match_operand:V8HI 3 "register_operand")
12913 (match_operand:QI 4 "register_operand")]
12914 "TARGET_AVX512VL && TARGET_AVX512BW"
12915{
12916 int mask = INTVAL (operands[2]);
12917 emit_insn (gen_sse2_pshuflw_1_mask (operands[0], operands[1],
12918 GEN_INT ((mask >> 0) & 3),
12919 GEN_INT ((mask >> 2) & 3),
12920 GEN_INT ((mask >> 4) & 3),
12921 GEN_INT ((mask >> 6) & 3),
12922 operands[3], operands[4]));
12923 DONE;
12924})
12925
5802c0cb 12926(define_expand "sse2_pshuflw"
abd4f58b 12927 [(match_operand:V8HI 0 "register_operand")
12928 (match_operand:V8HI 1 "nonimmediate_operand")
12929 (match_operand:SI 2 "const_int_operand")]
5802c0cb 12930 "TARGET_SSE2"
12931{
12932 int mask = INTVAL (operands[2]);
12933 emit_insn (gen_sse2_pshuflw_1 (operands[0], operands[1],
12934 GEN_INT ((mask >> 0) & 3),
12935 GEN_INT ((mask >> 2) & 3),
12936 GEN_INT ((mask >> 4) & 3),
12937 GEN_INT ((mask >> 6) & 3)));
12938 DONE;
12939})
12940
7b988cc3 12941(define_insn "sse2_pshuflw_1<mask_name>"
12942 [(set (match_operand:V8HI 0 "register_operand" "=v")
5802c0cb 12943 (vec_select:V8HI
7b988cc3 12944 (match_operand:V8HI 1 "nonimmediate_operand" "vm")
abd4f58b 12945 (parallel [(match_operand 2 "const_0_to_3_operand")
12946 (match_operand 3 "const_0_to_3_operand")
12947 (match_operand 4 "const_0_to_3_operand")
12948 (match_operand 5 "const_0_to_3_operand")
5802c0cb 12949 (const_int 4)
12950 (const_int 5)
12951 (const_int 6)
12952 (const_int 7)])))]
7b988cc3 12953 "TARGET_SSE2 && <mask_avx512bw_condition> && <mask_avx512vl_condition>"
5802c0cb 12954{
12955 int mask = 0;
12956 mask |= INTVAL (operands[2]) << 0;
12957 mask |= INTVAL (operands[3]) << 2;
12958 mask |= INTVAL (operands[4]) << 4;
12959 mask |= INTVAL (operands[5]) << 6;
12960 operands[2] = GEN_INT (mask);
12961
7b988cc3 12962 return "%vpshuflw\t{%2, %1, %0<mask_operand6>|%0<mask_operand6>, %1, %2}";
5802c0cb 12963}
12964 [(set_attr "type" "sselog")
00a0e418 12965 (set_attr "prefix_data16" "0")
1f346cbc 12966 (set_attr "prefix_rep" "1")
ed30e0a6 12967 (set_attr "prefix" "maybe_vex")
00a0e418 12968 (set_attr "length_immediate" "1")
5802c0cb 12969 (set_attr "mode" "TI")])
12970
5deb404d 12971(define_expand "avx2_pshufhwv3"
abd4f58b 12972 [(match_operand:V16HI 0 "register_operand")
12973 (match_operand:V16HI 1 "nonimmediate_operand")
12974 (match_operand:SI 2 "const_0_to_255_operand")]
5deb404d 12975 "TARGET_AVX2"
12976{
12977 int mask = INTVAL (operands[2]);
12978 emit_insn (gen_avx2_pshufhw_1 (operands[0], operands[1],
12979 GEN_INT (((mask >> 0) & 3) + 4),
12980 GEN_INT (((mask >> 2) & 3) + 4),
12981 GEN_INT (((mask >> 4) & 3) + 4),
a9e4de7b 12982 GEN_INT (((mask >> 6) & 3) + 4),
12983 GEN_INT (((mask >> 0) & 3) + 12),
12984 GEN_INT (((mask >> 2) & 3) + 12),
12985 GEN_INT (((mask >> 4) & 3) + 12),
12986 GEN_INT (((mask >> 6) & 3) + 12)));
5deb404d 12987 DONE;
12988})
12989
7b988cc3 12990(define_insn "<mask_codefor>avx512bw_pshufhwv32hi<mask_name>"
12991 [(set (match_operand:V32HI 0 "register_operand" "=v")
12992 (unspec:V32HI
12993 [(match_operand:V32HI 1 "nonimmediate_operand" "vm")
12994 (match_operand:SI 2 "const_0_to_255_operand" "n")]
12995 UNSPEC_PSHUFHW))]
12996 "TARGET_AVX512BW"
12997 "vpshufhw\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
12998 [(set_attr "type" "sselog")
12999 (set_attr "prefix" "evex")
13000 (set_attr "mode" "XI")])
13001
13002(define_expand "avx512vl_pshufhwv3_mask"
13003 [(match_operand:V16HI 0 "register_operand")
13004 (match_operand:V16HI 1 "nonimmediate_operand")
13005 (match_operand:SI 2 "const_0_to_255_operand")
13006 (match_operand:V16HI 3 "register_operand")
13007 (match_operand:HI 4 "register_operand")]
13008 "TARGET_AVX512VL && TARGET_AVX512BW"
13009{
13010 int mask = INTVAL (operands[2]);
13011 emit_insn (gen_avx2_pshufhw_1_mask (operands[0], operands[1],
13012 GEN_INT (((mask >> 0) & 3) + 4),
13013 GEN_INT (((mask >> 2) & 3) + 4),
13014 GEN_INT (((mask >> 4) & 3) + 4),
13015 GEN_INT (((mask >> 6) & 3) + 4),
13016 GEN_INT (((mask >> 0) & 3) + 12),
13017 GEN_INT (((mask >> 2) & 3) + 12),
13018 GEN_INT (((mask >> 4) & 3) + 12),
13019 GEN_INT (((mask >> 6) & 3) + 12),
13020 operands[3], operands[4]));
13021 DONE;
13022})
13023
13024(define_insn "avx2_pshufhw_1<mask_name>"
13025 [(set (match_operand:V16HI 0 "register_operand" "=v")
5deb404d 13026 (vec_select:V16HI
7b988cc3 13027 (match_operand:V16HI 1 "nonimmediate_operand" "vm")
5deb404d 13028 (parallel [(const_int 0)
13029 (const_int 1)
13030 (const_int 2)
13031 (const_int 3)
abd4f58b 13032 (match_operand 2 "const_4_to_7_operand")
13033 (match_operand 3 "const_4_to_7_operand")
13034 (match_operand 4 "const_4_to_7_operand")
13035 (match_operand 5 "const_4_to_7_operand")
5deb404d 13036 (const_int 8)
13037 (const_int 9)
13038 (const_int 10)
13039 (const_int 11)
abd4f58b 13040 (match_operand 6 "const_12_to_15_operand")
13041 (match_operand 7 "const_12_to_15_operand")
13042 (match_operand 8 "const_12_to_15_operand")
13043 (match_operand 9 "const_12_to_15_operand")])))]
a9e4de7b 13044 "TARGET_AVX2
7b988cc3 13045 && <mask_avx512bw_condition> && <mask_avx512vl_condition>
a9e4de7b 13046 && INTVAL (operands[2]) + 8 == INTVAL (operands[6])
13047 && INTVAL (operands[3]) + 8 == INTVAL (operands[7])
13048 && INTVAL (operands[4]) + 8 == INTVAL (operands[8])
13049 && INTVAL (operands[5]) + 8 == INTVAL (operands[9])"
5deb404d 13050{
13051 int mask = 0;
13052 mask |= (INTVAL (operands[2]) - 4) << 0;
13053 mask |= (INTVAL (operands[3]) - 4) << 2;
13054 mask |= (INTVAL (operands[4]) - 4) << 4;
13055 mask |= (INTVAL (operands[5]) - 4) << 6;
13056 operands[2] = GEN_INT (mask);
13057
7b988cc3 13058 return "vpshufhw\t{%2, %1, %0<mask_operand10>|%0<mask_operand10>, %1, %2}";
5deb404d 13059}
13060 [(set_attr "type" "sselog")
7b988cc3 13061 (set_attr "prefix" "maybe_evex")
5deb404d 13062 (set_attr "length_immediate" "1")
13063 (set_attr "mode" "OI")])
13064
7b988cc3 13065(define_expand "avx512vl_pshufhw_mask"
13066 [(match_operand:V8HI 0 "register_operand")
13067 (match_operand:V8HI 1 "nonimmediate_operand")
13068 (match_operand:SI 2 "const_0_to_255_operand")
13069 (match_operand:V8HI 3 "register_operand")
13070 (match_operand:QI 4 "register_operand")]
13071 "TARGET_AVX512VL && TARGET_AVX512BW"
13072{
13073 int mask = INTVAL (operands[2]);
13074 emit_insn (gen_sse2_pshufhw_1_mask (operands[0], operands[1],
13075 GEN_INT (((mask >> 0) & 3) + 4),
13076 GEN_INT (((mask >> 2) & 3) + 4),
13077 GEN_INT (((mask >> 4) & 3) + 4),
13078 GEN_INT (((mask >> 6) & 3) + 4),
13079 operands[3], operands[4]));
13080 DONE;
13081})
13082
5802c0cb 13083(define_expand "sse2_pshufhw"
abd4f58b 13084 [(match_operand:V8HI 0 "register_operand")
13085 (match_operand:V8HI 1 "nonimmediate_operand")
13086 (match_operand:SI 2 "const_int_operand")]
5802c0cb 13087 "TARGET_SSE2"
13088{
13089 int mask = INTVAL (operands[2]);
13090 emit_insn (gen_sse2_pshufhw_1 (operands[0], operands[1],
13091 GEN_INT (((mask >> 0) & 3) + 4),
13092 GEN_INT (((mask >> 2) & 3) + 4),
13093 GEN_INT (((mask >> 4) & 3) + 4),
13094 GEN_INT (((mask >> 6) & 3) + 4)));
13095 DONE;
13096})
13097
7b988cc3 13098(define_insn "sse2_pshufhw_1<mask_name>"
13099 [(set (match_operand:V8HI 0 "register_operand" "=v")
5802c0cb 13100 (vec_select:V8HI
7b988cc3 13101 (match_operand:V8HI 1 "nonimmediate_operand" "vm")
5802c0cb 13102 (parallel [(const_int 0)
13103 (const_int 1)
13104 (const_int 2)
13105 (const_int 3)
abd4f58b 13106 (match_operand 2 "const_4_to_7_operand")
13107 (match_operand 3 "const_4_to_7_operand")
13108 (match_operand 4 "const_4_to_7_operand")
13109 (match_operand 5 "const_4_to_7_operand")])))]
7b988cc3 13110 "TARGET_SSE2 && <mask_avx512bw_condition> && <mask_avx512vl_condition>"
5802c0cb 13111{
13112 int mask = 0;
13113 mask |= (INTVAL (operands[2]) - 4) << 0;
13114 mask |= (INTVAL (operands[3]) - 4) << 2;
13115 mask |= (INTVAL (operands[4]) - 4) << 4;
13116 mask |= (INTVAL (operands[5]) - 4) << 6;
13117 operands[2] = GEN_INT (mask);
13118
7b988cc3 13119 return "%vpshufhw\t{%2, %1, %0<mask_operand6>|%0<mask_operand6>, %1, %2}";
5802c0cb 13120}
13121 [(set_attr "type" "sselog")
1f346cbc 13122 (set_attr "prefix_rep" "1")
00a0e418 13123 (set_attr "prefix_data16" "0")
ed30e0a6 13124 (set_attr "prefix" "maybe_vex")
00a0e418 13125 (set_attr "length_immediate" "1")
5802c0cb 13126 (set_attr "mode" "TI")])
13127
13128(define_expand "sse2_loadd"
abd4f58b 13129 [(set (match_operand:V4SI 0 "register_operand")
5802c0cb 13130 (vec_merge:V4SI
13131 (vec_duplicate:V4SI
abd4f58b 13132 (match_operand:SI 1 "nonimmediate_operand"))
5802c0cb 13133 (match_dup 2)
13134 (const_int 1)))]
ad2c46cf 13135 "TARGET_SSE"
5802c0cb 13136 "operands[2] = CONST0_RTX (V4SImode);")
13137
13138(define_insn "sse2_loadld"
f30b3ad6 13139 [(set (match_operand:V4SI 0 "register_operand" "=x,Yi,x,x,x")
5802c0cb 13140 (vec_merge:V4SI
13141 (vec_duplicate:V4SI
f30b3ad6 13142 (match_operand:SI 2 "nonimmediate_operand" "m ,r ,m,x,x"))
13143 (match_operand:V4SI 1 "reg_or_0_operand" "C ,C ,C,0,x")
5802c0cb 13144 (const_int 1)))]
ad2c46cf 13145 "TARGET_SSE"
5802c0cb 13146 "@
b11a97b3 13147 %vmovd\t{%2, %0|%0, %2}
13148 %vmovd\t{%2, %0|%0, %2}
ad2c46cf 13149 movss\t{%2, %0|%0, %2}
b11a97b3 13150 movss\t{%2, %0|%0, %2}
13151 vmovss\t{%2, %1, %0|%0, %1, %2}"
dea561ed 13152 [(set_attr "isa" "sse2,sse2,noavx,noavx,avx")
b11a97b3 13153 (set_attr "type" "ssemov")
13154 (set_attr "prefix" "maybe_vex,maybe_vex,orig,orig,vex")
13155 (set_attr "mode" "TI,TI,V4SF,SF,SF")])
5802c0cb 13156
7d9c40e2 13157;; QI and HI modes handled by pextr patterns.
13158(define_mode_iterator PEXTR_MODE12
13159 [(V16QI "TARGET_SSE4_1") V8HI])
13160
1087c60b 13161(define_insn "*vec_extract<mode>"
7d9c40e2 13162 [(set (match_operand:<ssescalarmode> 0 "register_sse4nonimm_operand" "=r,m")
1087c60b 13163 (vec_select:<ssescalarmode>
7d9c40e2 13164 (match_operand:PEXTR_MODE12 1 "register_operand" "x,x")
1087c60b 13165 (parallel
13166 [(match_operand:SI 2 "const_0_to_<ssescalarnummask>_operand")])))]
7d9c40e2 13167 "TARGET_SSE2"
13168 "%vpextr<ssemodesuffix>\t{%2, %1, %k0|%k0, %1, %2}"
13169 [(set_attr "isa" "*,sse4")
13170 (set_attr "type" "sselog1")
13171 (set_attr "prefix_data16" "1")
1087c60b 13172 (set (attr "prefix_extra")
13173 (if_then_else
13174 (and (eq_attr "alternative" "0")
13175 (eq (const_string "<MODE>mode") (const_string "V8HImode")))
13176 (const_string "*")
13177 (const_string "1")))
13178 (set_attr "length_immediate" "1")
13179 (set_attr "prefix" "maybe_vex")
13180 (set_attr "mode" "TI")])
13181
7d9c40e2 13182(define_insn "*vec_extract<PEXTR_MODE12:mode>_zext"
1087c60b 13183 [(set (match_operand:SWI48 0 "register_operand" "=r")
13184 (zero_extend:SWI48
7d9c40e2 13185 (vec_select:<PEXTR_MODE12:ssescalarmode>
13186 (match_operand:PEXTR_MODE12 1 "register_operand" "x")
1087c60b 13187 (parallel
7d9c40e2 13188 [(match_operand:SI 2
13189 "const_0_to_<PEXTR_MODE12:ssescalarnummask>_operand")]))))]
1087c60b 13190 "TARGET_SSE2"
7d9c40e2 13191 "%vpextr<PEXTR_MODE12:ssemodesuffix>\t{%2, %1, %k0|%k0, %1, %2}"
1087c60b 13192 [(set_attr "type" "sselog1")
13193 (set_attr "prefix_data16" "1")
7d9c40e2 13194 (set (attr "prefix_extra")
13195 (if_then_else
13196 (eq (const_string "<PEXTR_MODE12:MODE>mode") (const_string "V8HImode"))
13197 (const_string "*")
13198 (const_string "1")))
1087c60b 13199 (set_attr "length_immediate" "1")
13200 (set_attr "prefix" "maybe_vex")
13201 (set_attr "mode" "TI")])
13202
27fc86e0 13203(define_insn "*vec_extract<mode>_mem"
1087c60b 13204 [(set (match_operand:<ssescalarmode> 0 "register_operand" "=r")
13205 (vec_select:<ssescalarmode>
27fc86e0 13206 (match_operand:VI12_128 1 "memory_operand" "o")
1087c60b 13207 (parallel
13208 [(match_operand 2 "const_0_to_<ssescalarnummask>_operand")])))]
13209 "TARGET_SSE"
27fc86e0 13210 "#")
1087c60b 13211
fe4df2ce 13212(define_insn "*vec_extract<ssevecmodelower>_0"
1087c60b 13213 [(set (match_operand:SWI48 0 "nonimmediate_operand" "=r ,r,x ,m")
fe4df2ce 13214 (vec_select:SWI48
1087c60b 13215 (match_operand:<ssevecmode> 1 "nonimmediate_operand" "mYj,x,xm,x")
5802c0cb 13216 (parallel [(const_int 0)])))]
fe4df2ce 13217 "TARGET_SSE && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
2071a184 13218 "#"
1087c60b 13219 [(set_attr "isa" "*,sse4,*,*")])
5802c0cb 13220
b8403b2e 13221(define_insn_and_split "*vec_extractv4si_0_zext"
13222 [(set (match_operand:DI 0 "register_operand" "=r")
13223 (zero_extend:DI
13224 (vec_select:SI
13225 (match_operand:V4SI 1 "register_operand" "x")
13226 (parallel [(const_int 0)]))))]
13227 "TARGET_64BIT && TARGET_SSE2 && TARGET_INTER_UNIT_MOVES_FROM_VEC"
13228 "#"
13229 "&& reload_completed"
13230 [(set (match_dup 0) (zero_extend:DI (match_dup 1)))]
13231 "operands[1] = gen_rtx_REG (SImode, REGNO (operands[1]));")
13232
fe4df2ce 13233(define_insn "*vec_extractv2di_0_sse"
13234 [(set (match_operand:DI 0 "nonimmediate_operand" "=x,m")
f957796f 13235 (vec_select:DI
fe4df2ce 13236 (match_operand:V2DI 1 "nonimmediate_operand" "xm,x")
f957796f 13237 (parallel [(const_int 0)])))]
fe4df2ce 13238 "TARGET_SSE && !TARGET_64BIT
13239 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
13240 "#")
f957796f 13241
fe4df2ce 13242(define_split
2071a184 13243 [(set (match_operand:SWI48x 0 "nonimmediate_operand")
fe4df2ce 13244 (vec_select:SWI48x
27fc86e0 13245 (match_operand:<ssevecmode> 1 "register_operand")
5802c0cb 13246 (parallel [(const_int 0)])))]
fe4df2ce 13247 "TARGET_SSE && reload_completed"
13248 [(set (match_dup 0) (match_dup 1))]
27fc86e0 13249 "operands[1] = gen_rtx_REG (<MODE>mode, REGNO (operands[1]));")
5802c0cb 13250
2071a184 13251(define_insn "*vec_extractv4si"
0a32b282 13252 [(set (match_operand:SI 0 "nonimmediate_operand" "=rm,Yr,*x,x")
2071a184 13253 (vec_select:SI
0a32b282 13254 (match_operand:V4SI 1 "register_operand" "x,0,0,x")
2071a184 13255 (parallel [(match_operand:SI 2 "const_0_to_3_operand")])))]
13256 "TARGET_SSE4_1"
b8403b2e 13257{
13258 switch (which_alternative)
13259 {
13260 case 0:
13261 return "%vpextrd\t{%2, %1, %0|%0, %1, %2}";
13262
13263 case 1:
0a32b282 13264 case 2:
b8403b2e 13265 operands [2] = GEN_INT (INTVAL (operands[2]) * 4);
13266 return "psrldq\t{%2, %0|%0, %2}";
13267
0a32b282 13268 case 3:
b8403b2e 13269 operands [2] = GEN_INT (INTVAL (operands[2]) * 4);
13270 return "vpsrldq\t{%2, %1, %0|%0, %1, %2}";
13271
13272 default:
13273 gcc_unreachable ();
13274 }
13275}
0a32b282 13276 [(set_attr "isa" "*,noavx,noavx,avx")
13277 (set_attr "type" "sselog1,sseishft1,sseishft1,sseishft1")
13278 (set_attr "prefix_extra" "1,*,*,*")
2071a184 13279 (set_attr "length_immediate" "1")
0a32b282 13280 (set_attr "prefix" "maybe_vex,orig,orig,vex")
2071a184 13281 (set_attr "mode" "TI")])
13282
13283(define_insn "*vec_extractv4si_zext"
13284 [(set (match_operand:DI 0 "register_operand" "=r")
13285 (zero_extend:DI
13286 (vec_select:SI
13287 (match_operand:V4SI 1 "register_operand" "x")
13288 (parallel [(match_operand:SI 2 "const_0_to_3_operand")]))))]
13289 "TARGET_64BIT && TARGET_SSE4_1"
13290 "%vpextrd\t{%2, %1, %k0|%k0, %1, %2}"
13291 [(set_attr "type" "sselog1")
13292 (set_attr "prefix_extra" "1")
13293 (set_attr "length_immediate" "1")
13294 (set_attr "prefix" "maybe_vex")
13295 (set_attr "mode" "TI")])
fe4df2ce 13296
27fc86e0 13297(define_insn "*vec_extractv4si_mem"
fe4df2ce 13298 [(set (match_operand:SI 0 "register_operand" "=x,r")
13299 (vec_select:SI
13300 (match_operand:V4SI 1 "memory_operand" "o,o")
13301 (parallel [(match_operand 2 "const_0_to_3_operand")])))]
13302 "TARGET_SSE"
27fc86e0 13303 "#")
5802c0cb 13304
b8403b2e 13305(define_insn_and_split "*vec_extractv4si_zext_mem"
13306 [(set (match_operand:DI 0 "register_operand" "=x,r")
13307 (zero_extend:DI
13308 (vec_select:SI
13309 (match_operand:V4SI 1 "memory_operand" "o,o")
13310 (parallel [(match_operand:SI 2 "const_0_to_3_operand")]))))]
13311 "TARGET_64BIT && TARGET_SSE"
13312 "#"
13313 "&& reload_completed"
13314 [(set (match_dup 0) (zero_extend:DI (match_dup 1)))]
13315{
13316 operands[1] = adjust_address (operands[1], SImode, INTVAL (operands[2]) * 4);
13317})
13318
1e541240 13319(define_insn "*vec_extractv2di_1"
2071a184 13320 [(set (match_operand:DI 0 "nonimmediate_operand" "=rm,m,x,x,x,x,r")
a150ee15 13321 (vec_select:DI
2071a184 13322 (match_operand:V2DI 1 "nonimmediate_operand" "x ,x,0,x,x,o,o")
a150ee15 13323 (parallel [(const_int 1)])))]
fe4df2ce 13324 "TARGET_SSE && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
a150ee15 13325 "@
2071a184 13326 %vpextrq\t{$1, %1, %0|%0, %1, 1}
b11a97b3 13327 %vmovhps\t{%1, %0|%0, %1}
c5823964 13328 psrldq\t{$8, %0|%0, 8}
b11a97b3 13329 vpsrldq\t{$8, %1, %0|%0, %1, 8}
daa8e621 13330 movhlps\t{%1, %0|%0, %1}
fe4df2ce 13331 #
13332 #"
2071a184 13333 [(set_attr "isa" "x64_sse4,*,sse2_noavx,avx,noavx,*,x64")
13334 (set_attr "type" "sselog1,ssemov,sseishft1,sseishft1,ssemov,ssemov,imov")
13335 (set_attr "length_immediate" "1,*,1,1,*,*,*")
2071a184 13336 (set_attr "prefix_rex" "1,*,*,*,*,*,*")
13337 (set_attr "prefix_extra" "1,*,*,*,*,*,*")
13338 (set_attr "prefix" "maybe_vex,maybe_vex,orig,vex,orig,*,*")
13339 (set_attr "mode" "TI,V2SF,TI,TI,V4SF,DI,DI")])
fe4df2ce 13340
13341(define_split
27fc86e0 13342 [(set (match_operand:<ssescalarmode> 0 "register_operand")
13343 (vec_select:<ssescalarmode>
13344 (match_operand:VI_128 1 "memory_operand")
13345 (parallel
13346 [(match_operand 2 "const_0_to_<ssescalarnummask>_operand")])))]
fe4df2ce 13347 "TARGET_SSE && reload_completed"
13348 [(set (match_dup 0) (match_dup 1))]
27fc86e0 13349{
13350 int offs = INTVAL (operands[2]) * GET_MODE_SIZE (<ssescalarmode>mode);
13351
13352 operands[1] = adjust_address (operands[1], <ssescalarmode>mode, offs);
13353})
a150ee15 13354
f02daedb 13355;; Turn SImode or DImode extraction from arbitrary SSE/AVX/AVX512F
13356;; vector modes into vec_extract*.
13357(define_split
13358 [(set (match_operand:SWI48x 0 "nonimmediate_operand")
13359 (match_operand:SWI48x 1 "register_operand"))]
13360 "can_create_pseudo_p ()
e15c0942 13361 && SUBREG_P (operands[1])
f02daedb 13362 && REG_P (SUBREG_REG (operands[1]))
13363 && (GET_MODE_CLASS (GET_MODE (SUBREG_REG (operands[1]))) == MODE_VECTOR_INT
13364 || (GET_MODE_CLASS (GET_MODE (SUBREG_REG (operands[1])))
13365 == MODE_VECTOR_FLOAT))
13366 && SUBREG_BYTE (operands[1]) == 0
13367 && TARGET_SSE
13368 && (GET_MODE_SIZE (GET_MODE (SUBREG_REG (operands[1]))) == 16
13369 || (GET_MODE_SIZE (GET_MODE (SUBREG_REG (operands[1]))) == 32
13370 && TARGET_AVX)
13371 || (GET_MODE_SIZE (GET_MODE (SUBREG_REG (operands[1]))) == 64
13372 && TARGET_AVX512F))
13373 && (<MODE>mode == SImode || TARGET_64BIT || MEM_P (operands[0]))"
13374 [(set (match_dup 0) (vec_select:SWI48x (match_dup 1)
13375 (parallel [(const_int 0)])))]
13376{
13377 rtx tmp;
13378 operands[1] = SUBREG_REG (operands[1]);
13379 switch (GET_MODE_SIZE (GET_MODE (operands[1])))
13380 {
13381 case 64:
13382 if (<MODE>mode == SImode)
13383 {
13384 tmp = gen_reg_rtx (V8SImode);
13385 emit_insn (gen_vec_extract_lo_v16si (tmp,
13386 gen_lowpart (V16SImode,
13387 operands[1])));
13388 }
13389 else
13390 {
13391 tmp = gen_reg_rtx (V4DImode);
13392 emit_insn (gen_vec_extract_lo_v8di (tmp,
13393 gen_lowpart (V8DImode,
13394 operands[1])));
13395 }
13396 operands[1] = tmp;
13397 /* FALLTHRU */
13398 case 32:
13399 tmp = gen_reg_rtx (<ssevecmode>mode);
13400 if (<MODE>mode == SImode)
13401 emit_insn (gen_vec_extract_lo_v8si (tmp, gen_lowpart (V8SImode,
13402 operands[1])));
13403 else
13404 emit_insn (gen_vec_extract_lo_v4di (tmp, gen_lowpart (V4DImode,
13405 operands[1])));
13406 operands[1] = tmp;
13407 break;
13408 case 16:
13409 operands[1] = gen_lowpart (<ssevecmode>mode, operands[1]);
13410 break;
13411 }
13412})
13413
b4a46c88 13414(define_insn "*vec_concatv2si_sse4_1"
0a281fd0 13415 [(set (match_operand:V2SI 0 "register_operand"
13416 "=Yr,*x,x, Yr,*x,x, x, *y,*y")
b4a46c88 13417 (vec_concat:V2SI
0a281fd0 13418 (match_operand:SI 1 "nonimmediate_operand"
13419 " 0, 0,x, 0,0, x,rm, 0,rm")
13420 (match_operand:SI 2 "vector_move_operand"
13421 " rm,rm,rm,Yr,*x,x, C,*ym, C")))]
13422 "TARGET_SSE4_1 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
b4a46c88 13423 "@
0a32b282 13424 pinsrd\t{$1, %2, %0|%0, %2, 1}
1e541240 13425 pinsrd\t{$1, %2, %0|%0, %2, 1}
13426 vpinsrd\t{$1, %2, %1, %0|%0, %1, %2, 1}
d3d9aac1 13427 punpckldq\t{%2, %0|%0, %2}
0a32b282 13428 punpckldq\t{%2, %0|%0, %2}
b11a97b3 13429 vpunpckldq\t{%2, %1, %0|%0, %1, %2}
13430 %vmovd\t{%1, %0|%0, %1}
d3d9aac1 13431 punpckldq\t{%2, %0|%0, %2}
13432 movd\t{%1, %0|%0, %1}"
0a32b282 13433 [(set_attr "isa" "noavx,noavx,avx,noavx,noavx,avx,*,*,*")
13434 (set_attr "type" "sselog,sselog,sselog,sselog,sselog,sselog,ssemov,mmxcvt,mmxmov")
13435 (set_attr "prefix_extra" "1,1,1,*,*,*,*,*,*")
13436 (set_attr "length_immediate" "1,1,1,*,*,*,*,*,*")
13437 (set_attr "prefix" "orig,orig,vex,orig,orig,vex,maybe_vex,orig,orig")
13438 (set_attr "mode" "TI,TI,TI,TI,TI,TI,TI,DI,DI")])
b4a46c88 13439
ad2c46cf 13440;; ??? In theory we can match memory for the MMX alternative, but allowing
13441;; nonimmediate_operand for operand 2 and *not* allowing memory for the SSE
13442;; alternatives pretty much forces the MMX alternative to be chosen.
65c52515 13443(define_insn "*vec_concatv2si"
13444 [(set (match_operand:V2SI 0 "register_operand" "=x,x ,*y,x,x,*y,*y")
ad2c46cf 13445 (vec_concat:V2SI
65c52515 13446 (match_operand:SI 1 "nonimmediate_operand" " 0,rm,rm,0,m, 0,*rm")
13447 (match_operand:SI 2 "reg_or_0_operand" " x,C ,C, x,C,*y,C")))]
13448 "TARGET_SSE && !TARGET_SSE4_1"
ad2c46cf 13449 "@
13450 punpckldq\t{%2, %0|%0, %2}
13451 movd\t{%1, %0|%0, %1}
65c52515 13452 movd\t{%1, %0|%0, %1}
ad2c46cf 13453 unpcklps\t{%2, %0|%0, %2}
13454 movss\t{%1, %0|%0, %1}
13455 punpckldq\t{%2, %0|%0, %2}
13456 movd\t{%1, %0|%0, %1}"
65c52515 13457 [(set_attr "isa" "sse2,sse2,sse2,*,*,*,*")
13458 (set_attr "type" "sselog,ssemov,mmxmov,sselog,ssemov,mmxcvt,mmxmov")
13459 (set_attr "mode" "TI,TI,DI,V4SF,SF,DI,DI")])
ad2c46cf 13460
18c3cd78 13461(define_insn "*vec_concatv4si"
f30b3ad6 13462 [(set (match_operand:V4SI 0 "register_operand" "=x,x,x,x,x")
ed30e0a6 13463 (vec_concat:V4SI
f30b3ad6 13464 (match_operand:V2SI 1 "register_operand" " 0,x,0,0,x")
13465 (match_operand:V2SI 2 "nonimmediate_operand" " x,x,x,m,m")))]
ad2c46cf 13466 "TARGET_SSE"
13467 "@
13468 punpcklqdq\t{%2, %0|%0, %2}
18c3cd78 13469 vpunpcklqdq\t{%2, %1, %0|%0, %1, %2}
ad2c46cf 13470 movlhps\t{%2, %0|%0, %2}
c358a059 13471 movhps\t{%2, %0|%0, %q2}
13472 vmovhps\t{%2, %1, %0|%0, %1, %q2}"
f30b3ad6 13473 [(set_attr "isa" "sse2_noavx,avx,noavx,noavx,avx")
18c3cd78 13474 (set_attr "type" "sselog,sselog,ssemov,ssemov,ssemov")
13475 (set_attr "prefix" "orig,vex,orig,orig,vex")
13476 (set_attr "mode" "TI,TI,V4SF,V2SF,V2SF")])
ad2c46cf 13477
dd196988 13478;; movd instead of movq is required to handle broken assemblers.
65c52515 13479(define_insn "vec_concatv2di"
b11a97b3 13480 [(set (match_operand:V2DI 0 "register_operand"
0a32b282 13481 "=Yr,*x,x ,Yi,x ,!x,x,x,x,x,x")
d3d9aac1 13482 (vec_concat:V2DI
b11a97b3 13483 (match_operand:DI 1 "nonimmediate_operand"
0a32b282 13484 " 0, 0,x ,r ,xm,*y,0,x,0,0,x")
b11a97b3 13485 (match_operand:DI 2 "vector_move_operand"
0a32b282 13486 "*rm,rm,rm,C ,C ,C ,x,x,x,m,m")))]
65c52515 13487 "TARGET_SSE"
d3d9aac1 13488 "@
0a32b282 13489 pinsrq\t{$1, %2, %0|%0, %2, 1}
1e541240 13490 pinsrq\t{$1, %2, %0|%0, %2, 1}
13491 vpinsrq\t{$1, %2, %1, %0|%0, %1, %2, 1}
f17a6c34 13492 * return HAVE_AS_IX86_INTERUNIT_MOVQ ? \"%vmovq\t{%1, %0|%0, %1}\" : \"%vmovd\t{%1, %0|%0, %1}\";
65c52515 13493 %vmovq\t{%1, %0|%0, %1}
d3d9aac1 13494 movq2dq\t{%1, %0|%0, %1}
13495 punpcklqdq\t{%2, %0|%0, %2}
b11a97b3 13496 vpunpcklqdq\t{%2, %1, %0|%0, %1, %2}
65c52515 13497 movlhps\t{%2, %0|%0, %2}
b11a97b3 13498 movhps\t{%2, %0|%0, %2}
13499 vmovhps\t{%2, %1, %0|%0, %1, %2}"
0a32b282 13500 [(set_attr "isa" "x64_sse4_noavx,x64_sse4_noavx,x64_avx,x64,sse2,sse2,sse2_noavx,avx,noavx,noavx,avx")
1e541240 13501 (set (attr "type")
13502 (if_then_else
0a32b282 13503 (eq_attr "alternative" "0,1,2,6,7")
1e541240 13504 (const_string "sselog")
13505 (const_string "ssemov")))
0a32b282 13506 (set_attr "prefix_rex" "1,1,1,1,*,*,*,*,*,*,*")
13507 (set_attr "prefix_extra" "1,1,1,*,*,*,*,*,*,*,*")
13508 (set_attr "length_immediate" "1,1,1,*,*,*,*,*,*,*,*")
13509 (set_attr "prefix" "orig,orig,vex,maybe_vex,maybe_vex,orig,orig,vex,orig,orig,vex")
13510 (set_attr "mode" "TI,TI,TI,TI,TI,TI,TI,TI,V4SF,V2SF,V2SF")])
b11a97b3 13511
b6fc7168 13512(define_expand "vec_unpacks_lo_<mode>"
abd4f58b 13513 [(match_operand:<sseunpackmode> 0 "register_operand")
8f83f53e 13514 (match_operand:VI124_AVX2_24_AVX512F_1_AVX512BW 1 "register_operand")]
c6c91d61 13515 "TARGET_SSE2"
3b87d2ec 13516 "ix86_expand_sse_unpack (operands[0], operands[1], false, false); DONE;")
c6c91d61 13517
b6fc7168 13518(define_expand "vec_unpacks_hi_<mode>"
abd4f58b 13519 [(match_operand:<sseunpackmode> 0 "register_operand")
8f83f53e 13520 (match_operand:VI124_AVX2_24_AVX512F_1_AVX512BW 1 "register_operand")]
c6c91d61 13521 "TARGET_SSE2"
3b87d2ec 13522 "ix86_expand_sse_unpack (operands[0], operands[1], false, true); DONE;")
c6c91d61 13523
b6fc7168 13524(define_expand "vec_unpacku_lo_<mode>"
abd4f58b 13525 [(match_operand:<sseunpackmode> 0 "register_operand")
8f83f53e 13526 (match_operand:VI124_AVX2_24_AVX512F_1_AVX512BW 1 "register_operand")]
c6c91d61 13527 "TARGET_SSE2"
3b87d2ec 13528 "ix86_expand_sse_unpack (operands[0], operands[1], true, false); DONE;")
c6c91d61 13529
0852690b 13530(define_expand "vec_unpacks_lo_hi"
13531 [(set (match_operand:QI 0 "register_operand")
13532 (subreg:QI (match_operand:HI 1 "register_operand") 0))]
13533 "TARGET_AVX512DQ")
13534
13535(define_expand "vec_unpacks_lo_si"
13536 [(set (match_operand:HI 0 "register_operand")
13537 (subreg:HI (match_operand:SI 1 "register_operand") 0))]
13538 "TARGET_AVX512F")
13539
13540(define_expand "vec_unpacks_lo_di"
13541 [(set (match_operand:SI 0 "register_operand")
13542 (subreg:SI (match_operand:DI 1 "register_operand") 0))]
13543 "TARGET_AVX512BW")
13544
b6fc7168 13545(define_expand "vec_unpacku_hi_<mode>"
abd4f58b 13546 [(match_operand:<sseunpackmode> 0 "register_operand")
8f83f53e 13547 (match_operand:VI124_AVX2_24_AVX512F_1_AVX512BW 1 "register_operand")]
c6c91d61 13548 "TARGET_SSE2"
3b87d2ec 13549 "ix86_expand_sse_unpack (operands[0], operands[1], true, true); DONE;")
c6c91d61 13550
0852690b 13551(define_expand "vec_unpacks_hi_hi"
13552 [(set (subreg:HI (match_operand:QI 0 "register_operand") 0)
13553 (lshiftrt:HI (match_operand:HI 1 "register_operand")
13554 (const_int 8)))]
13555 "TARGET_AVX512F")
13556
13557(define_expand "vec_unpacks_hi_<mode>"
13558 [(set (subreg:SWI48x (match_operand:<HALFMASKMODE> 0 "register_operand") 0)
13559 (lshiftrt:SWI48x (match_operand:SWI48x 1 "register_operand")
13560 (match_dup 2)))]
13561 "TARGET_AVX512BW"
13562{
13563 operands[2] = GEN_INT (GET_MODE_BITSIZE (<HALFMASKMODE>mode));
13564})
13565
5802c0cb 13566;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
13567;;
0975351b 13568;; Miscellaneous
5802c0cb 13569;;
13570;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
13571
293fd15f 13572(define_expand "<sse2_avx2>_uavg<mode>3<mask_name>"
e4048f11 13573 [(set (match_operand:VI12_AVX2 0 "register_operand")
13574 (truncate:VI12_AVX2
13575 (lshiftrt:<ssedoublemode>
13576 (plus:<ssedoublemode>
13577 (plus:<ssedoublemode>
13578 (zero_extend:<ssedoublemode>
13579 (match_operand:VI12_AVX2 1 "nonimmediate_operand"))
13580 (zero_extend:<ssedoublemode>
13581 (match_operand:VI12_AVX2 2 "nonimmediate_operand")))
293fd15f 13582 (match_dup <mask_expand_op3>))
7c839b3f 13583 (const_int 1))))]
293fd15f 13584 "TARGET_SSE2 && <mask_mode512bit_condition> && <mask_avx512bw_condition>"
e4048f11 13585{
293fd15f 13586 rtx tmp;
13587 if (<mask_applied>)
13588 tmp = operands[3];
e4048f11 13589 operands[3] = CONST1_RTX(<MODE>mode);
13590 ix86_fixup_binary_operands_no_copy (PLUS, <MODE>mode, operands);
293fd15f 13591
13592 if (<mask_applied>)
13593 {
13594 operands[5] = operands[3];
13595 operands[3] = tmp;
13596 }
e4048f11 13597})
5deb404d 13598
293fd15f 13599(define_insn "*<sse2_avx2>_uavg<mode>3<mask_name>"
13600 [(set (match_operand:VI12_AVX2 0 "register_operand" "=x,v")
e4048f11 13601 (truncate:VI12_AVX2
13602 (lshiftrt:<ssedoublemode>
13603 (plus:<ssedoublemode>
13604 (plus:<ssedoublemode>
13605 (zero_extend:<ssedoublemode>
293fd15f 13606 (match_operand:VI12_AVX2 1 "nonimmediate_operand" "%0,v"))
e4048f11 13607 (zero_extend:<ssedoublemode>
293fd15f 13608 (match_operand:VI12_AVX2 2 "nonimmediate_operand" "xm,vm")))
13609 (match_operand:VI12_AVX2 <mask_expand_op3> "const1_operand"))
5802c0cb 13610 (const_int 1))))]
293fd15f 13611 "TARGET_SSE2 && <mask_mode512bit_condition> && <mask_avx512bw_condition>
13612 && ix86_binary_operator_ok (PLUS, <MODE>mode, operands)"
908dc1fc 13613 "@
e4048f11 13614 pavg<ssemodesuffix>\t{%2, %0|%0, %2}
293fd15f 13615 vpavg<ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
908dc1fc 13616 [(set_attr "isa" "noavx,avx")
13617 (set_attr "type" "sseiadd")
13618 (set_attr "prefix_data16" "1,*")
293fd15f 13619 (set_attr "prefix" "orig,<mask_prefix>")
e4048f11 13620 (set_attr "mode" "<sseinsnmode>")])
5802c0cb 13621
009b318f 13622;; The correct representation for this is absolutely enormous, and
5802c0cb 13623;; surely not generally useful.
5deb404d 13624(define_insn "<sse2_avx2>_psadbw"
5f3ec3a3 13625 [(set (match_operand:VI8_AVX2_AVX512BW 0 "register_operand" "=x,v")
13626 (unspec:VI8_AVX2_AVX512BW
13627 [(match_operand:<ssebytemode> 1 "register_operand" "0,v")
13628 (match_operand:<ssebytemode> 2 "nonimmediate_operand" "xm,vm")]
fd65bafc 13629 UNSPEC_PSADBW))]
5802c0cb 13630 "TARGET_SSE2"
908dc1fc 13631 "@
13632 psadbw\t{%2, %0|%0, %2}
13633 vpsadbw\t{%2, %1, %0|%0, %1, %2}"
13634 [(set_attr "isa" "noavx,avx")
13635 (set_attr "type" "sseiadd")
fbfe006e 13636 (set_attr "atom_unit" "simul")
908dc1fc 13637 (set_attr "prefix_data16" "1,*")
5f3ec3a3 13638 (set_attr "prefix" "orig,maybe_evex")
5deb404d 13639 (set_attr "mode" "<sseinsnmode>")])
5802c0cb 13640
63d5e521 13641(define_insn "<sse>_movmsk<ssemodesuffix><avxsizesuffix>"
ed30e0a6 13642 [(set (match_operand:SI 0 "register_operand" "=r")
13643 (unspec:SI
6a3f5f59 13644 [(match_operand:VF_128_256 1 "register_operand" "x")]
ed30e0a6 13645 UNSPEC_MOVMSK))]
6fe5844b 13646 "TARGET_SSE"
0061967e 13647 "%vmovmsk<ssemodesuffix>\t{%1, %0|%0, %1}"
fbfe006e 13648 [(set_attr "type" "ssemov")
ed30e0a6 13649 (set_attr "prefix" "maybe_vex")
3da2a73c 13650 (set_attr "mode" "<MODE>")])
5802c0cb 13651
b1d9adac 13652(define_insn "*<sse>_movmsk<ssemodesuffix><avxsizesuffix>_zext"
13653 [(set (match_operand:DI 0 "register_operand" "=r")
13654 (zero_extend:DI
13655 (unspec:SI
13656 [(match_operand:VF_128_256 1 "register_operand" "x")]
13657 UNSPEC_MOVMSK)))]
13658 "TARGET_64BIT && TARGET_SSE"
13659 "%vmovmsk<ssemodesuffix>\t{%1, %k0|%k0, %1}"
5deb404d 13660 [(set_attr "type" "ssemov")
b1d9adac 13661 (set_attr "prefix" "maybe_vex")
13662 (set_attr "mode" "<MODE>")])
5deb404d 13663
b1d9adac 13664(define_insn "<sse2_avx2>_pmovmskb"
5802c0cb 13665 [(set (match_operand:SI 0 "register_operand" "=r")
b1d9adac 13666 (unspec:SI
13667 [(match_operand:VI1_AVX2 1 "register_operand" "x")]
13668 UNSPEC_MOVMSK))]
5802c0cb 13669 "TARGET_SSE2"
ed30e0a6 13670 "%vpmovmskb\t{%1, %0|%0, %1}"
fbfe006e 13671 [(set_attr "type" "ssemov")
b1d9adac 13672 (set (attr "prefix_data16")
13673 (if_then_else
13674 (match_test "TARGET_AVX")
13675 (const_string "*")
13676 (const_string "1")))
13677 (set_attr "prefix" "maybe_vex")
13678 (set_attr "mode" "SI")])
13679
13680(define_insn "*<sse2_avx2>_pmovmskb_zext"
13681 [(set (match_operand:DI 0 "register_operand" "=r")
13682 (zero_extend:DI
13683 (unspec:SI
13684 [(match_operand:VI1_AVX2 1 "register_operand" "x")]
13685 UNSPEC_MOVMSK)))]
13686 "TARGET_64BIT && TARGET_SSE2"
13687 "%vpmovmskb\t{%1, %k0|%k0, %1}"
13688 [(set_attr "type" "ssemov")
13689 (set (attr "prefix_data16")
13690 (if_then_else
13691 (match_test "TARGET_AVX")
13692 (const_string "*")
13693 (const_string "1")))
ed30e0a6 13694 (set_attr "prefix" "maybe_vex")
1f346cbc 13695 (set_attr "mode" "SI")])
5802c0cb 13696
13697(define_expand "sse2_maskmovdqu"
abd4f58b 13698 [(set (match_operand:V16QI 0 "memory_operand")
13699 (unspec:V16QI [(match_operand:V16QI 1 "register_operand")
13700 (match_operand:V16QI 2 "register_operand")
5802c0cb 13701 (match_dup 0)]
13702 UNSPEC_MASKMOV))]
5bd1ff1d 13703 "TARGET_SSE2")
5802c0cb 13704
13705(define_insn "*sse2_maskmovdqu"
dcab66ec 13706 [(set (mem:V16QI (match_operand:P 0 "register_operand" "D"))
5802c0cb 13707 (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "x")
13708 (match_operand:V16QI 2 "register_operand" "x")
13709 (mem:V16QI (match_dup 0))]
13710 UNSPEC_MASKMOV))]
dcab66ec 13711 "TARGET_SSE2"
4a2a161f 13712{
13713 /* We can't use %^ here due to ASM_OUTPUT_OPCODE processing
13714 that requires %v to be at the beginning of the opcode name. */
13715 if (Pmode != word_mode)
13716 fputs ("\taddr32", asm_out_file);
13717 return "%vmaskmovdqu\t{%2, %1|%1, %2}";
13718}
fbfe006e 13719 [(set_attr "type" "ssemov")
1f346cbc 13720 (set_attr "prefix_data16" "1")
4a2a161f 13721 (set (attr "length_address")
13722 (symbol_ref ("Pmode != word_mode")))
00a0e418 13723 ;; The implicit %rdi operand confuses default length_vex computation.
13724 (set (attr "length_vex")
dcab66ec 13725 (symbol_ref ("3 + REX_SSE_REGNO_P (REGNO (operands[2]))")))
ed30e0a6 13726 (set_attr "prefix" "maybe_vex")
4c9faaa4 13727 (set_attr "znver1_decode" "vector")
5802c0cb 13728 (set_attr "mode" "TI")])
13729
32513a88 13730(define_insn "sse_ldmxcsr"
13731 [(unspec_volatile [(match_operand:SI 0 "memory_operand" "m")]
13732 UNSPECV_LDMXCSR)]
13733 "TARGET_SSE"
ed30e0a6 13734 "%vldmxcsr\t%0"
32513a88 13735 [(set_attr "type" "sse")
fbfe006e 13736 (set_attr "atom_sse_attr" "mxcsr")
ed30e0a6 13737 (set_attr "prefix" "maybe_vex")
32513a88 13738 (set_attr "memory" "load")])
13739
13740(define_insn "sse_stmxcsr"
13741 [(set (match_operand:SI 0 "memory_operand" "=m")
13742 (unspec_volatile:SI [(const_int 0)] UNSPECV_STMXCSR))]
13743 "TARGET_SSE"
ed30e0a6 13744 "%vstmxcsr\t%0"
32513a88 13745 [(set_attr "type" "sse")
fbfe006e 13746 (set_attr "atom_sse_attr" "mxcsr")
ed30e0a6 13747 (set_attr "prefix" "maybe_vex")
32513a88 13748 (set_attr "memory" "store")])
13749
5802c0cb 13750(define_insn "sse2_clflush"
13751 [(unspec_volatile [(match_operand 0 "address_operand" "p")]
13752 UNSPECV_CLFLUSH)]
13753 "TARGET_SSE2"
13754 "clflush\t%a0"
13755 [(set_attr "type" "sse")
fbfe006e 13756 (set_attr "atom_sse_attr" "fence")
5802c0cb 13757 (set_attr "memory" "unknown")])
13758
ff6e6cb6 13759;; As per AMD and Intel ISA manuals, the first operand is extensions
13760;; and it goes to %ecx. The second operand received is hints and it goes
13761;; to %eax.
5802c0cb 13762(define_insn "sse3_mwait"
ff6e6cb6 13763 [(unspec_volatile [(match_operand:SI 0 "register_operand" "c")
13764 (match_operand:SI 1 "register_operand" "a")]
5802c0cb 13765 UNSPECV_MWAIT)]
13766 "TARGET_SSE3"
106eecb3 13767;; 64bit version is "mwait %rax,%rcx". But only lower 32bits are used.
13768;; Since 32bit register operands are implicitly zero extended to 64bit,
13769;; we only need to set up 32bit registers.
13770 "mwait"
5802c0cb 13771 [(set_attr "length" "3")])
13772
4a2a161f 13773(define_insn "sse3_monitor_<mode>"
bf0a02ba 13774 [(unspec_volatile [(match_operand:P 0 "register_operand" "a")
106eecb3 13775 (match_operand:SI 1 "register_operand" "c")
13776 (match_operand:SI 2 "register_operand" "d")]
13777 UNSPECV_MONITOR)]
4a2a161f 13778 "TARGET_SSE3"
106eecb3 13779;; 64bit version is "monitor %rax,%rcx,%rdx". But only lower 32bits in
13780;; RCX and RDX are used. Since 32bit register operands are implicitly
13781;; zero extended to 64bit, we only need to set up 32bit registers.
4a2a161f 13782 "%^monitor"
13783 [(set (attr "length")
13784 (symbol_ref ("(Pmode != word_mode) + 3")))])
2b4894c5 13785
3da2a73c 13786;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
13787;;
13788;; SSSE3 instructions
13789;;
13790;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
13791
fd65bafc 13792(define_code_iterator ssse3_plusminus [plus ss_plus minus ss_minus])
2b4894c5 13793
fd65bafc 13794(define_insn "avx2_ph<plusminus_mnemonic>wv16hi3"
5deb404d 13795 [(set (match_operand:V16HI 0 "register_operand" "=x")
13796 (vec_concat:V16HI
13797 (vec_concat:V8HI
13798 (vec_concat:V4HI
13799 (vec_concat:V2HI
fd65bafc 13800 (ssse3_plusminus:HI
5deb404d 13801 (vec_select:HI
13802 (match_operand:V16HI 1 "register_operand" "x")
13803 (parallel [(const_int 0)]))
13804 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
fd65bafc 13805 (ssse3_plusminus:HI
5deb404d 13806 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
13807 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
13808 (vec_concat:V2HI
fd65bafc 13809 (ssse3_plusminus:HI
5deb404d 13810 (vec_select:HI (match_dup 1) (parallel [(const_int 4)]))
13811 (vec_select:HI (match_dup 1) (parallel [(const_int 5)])))
fd65bafc 13812 (ssse3_plusminus:HI
5deb404d 13813 (vec_select:HI (match_dup 1) (parallel [(const_int 6)]))
13814 (vec_select:HI (match_dup 1) (parallel [(const_int 7)])))))
13815 (vec_concat:V4HI
13816 (vec_concat:V2HI
fd65bafc 13817 (ssse3_plusminus:HI
5deb404d 13818 (vec_select:HI (match_dup 1) (parallel [(const_int 8)]))
13819 (vec_select:HI (match_dup 1) (parallel [(const_int 9)])))
fd65bafc 13820 (ssse3_plusminus:HI
5deb404d 13821 (vec_select:HI (match_dup 1) (parallel [(const_int 10)]))
13822 (vec_select:HI (match_dup 1) (parallel [(const_int 11)]))))
13823 (vec_concat:V2HI
fd65bafc 13824 (ssse3_plusminus:HI
5deb404d 13825 (vec_select:HI (match_dup 1) (parallel [(const_int 12)]))
13826 (vec_select:HI (match_dup 1) (parallel [(const_int 13)])))
fd65bafc 13827 (ssse3_plusminus:HI
5deb404d 13828 (vec_select:HI (match_dup 1) (parallel [(const_int 14)]))
13829 (vec_select:HI (match_dup 1) (parallel [(const_int 15)]))))))
13830 (vec_concat:V8HI
13831 (vec_concat:V4HI
13832 (vec_concat:V2HI
fd65bafc 13833 (ssse3_plusminus:HI
5deb404d 13834 (vec_select:HI
13835 (match_operand:V16HI 2 "nonimmediate_operand" "xm")
13836 (parallel [(const_int 0)]))
13837 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
fd65bafc 13838 (ssse3_plusminus:HI
5deb404d 13839 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
13840 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))
13841 (vec_concat:V2HI
fd65bafc 13842 (ssse3_plusminus:HI
5deb404d 13843 (vec_select:HI (match_dup 2) (parallel [(const_int 4)]))
13844 (vec_select:HI (match_dup 2) (parallel [(const_int 5)])))
fd65bafc 13845 (ssse3_plusminus:HI
5deb404d 13846 (vec_select:HI (match_dup 2) (parallel [(const_int 6)]))
13847 (vec_select:HI (match_dup 2) (parallel [(const_int 7)])))))
13848 (vec_concat:V4HI
13849 (vec_concat:V2HI
fd65bafc 13850 (ssse3_plusminus:HI
5deb404d 13851 (vec_select:HI (match_dup 2) (parallel [(const_int 8)]))
13852 (vec_select:HI (match_dup 2) (parallel [(const_int 9)])))
fd65bafc 13853 (ssse3_plusminus:HI
5deb404d 13854 (vec_select:HI (match_dup 2) (parallel [(const_int 10)]))
13855 (vec_select:HI (match_dup 2) (parallel [(const_int 11)]))))
13856 (vec_concat:V2HI
fd65bafc 13857 (ssse3_plusminus:HI
5deb404d 13858 (vec_select:HI (match_dup 2) (parallel [(const_int 12)]))
13859 (vec_select:HI (match_dup 2) (parallel [(const_int 13)])))
fd65bafc 13860 (ssse3_plusminus:HI
5deb404d 13861 (vec_select:HI (match_dup 2) (parallel [(const_int 14)]))
13862 (vec_select:HI (match_dup 2) (parallel [(const_int 15)]))))))))]
13863 "TARGET_AVX2"
fd65bafc 13864 "vph<plusminus_mnemonic>w\t{%2, %1, %0|%0, %1, %2}"
5deb404d 13865 [(set_attr "type" "sseiadd")
13866 (set_attr "prefix_extra" "1")
13867 (set_attr "prefix" "vex")
13868 (set_attr "mode" "OI")])
13869
fd65bafc 13870(define_insn "ssse3_ph<plusminus_mnemonic>wv8hi3"
908dc1fc 13871 [(set (match_operand:V8HI 0 "register_operand" "=x,x")
2b4894c5 13872 (vec_concat:V8HI
13873 (vec_concat:V4HI
13874 (vec_concat:V2HI
fd65bafc 13875 (ssse3_plusminus:HI
2b4894c5 13876 (vec_select:HI
908dc1fc 13877 (match_operand:V8HI 1 "register_operand" "0,x")
2b4894c5 13878 (parallel [(const_int 0)]))
13879 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
fd65bafc 13880 (ssse3_plusminus:HI
2b4894c5 13881 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
13882 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
13883 (vec_concat:V2HI
fd65bafc 13884 (ssse3_plusminus:HI
2b4894c5 13885 (vec_select:HI (match_dup 1) (parallel [(const_int 4)]))
13886 (vec_select:HI (match_dup 1) (parallel [(const_int 5)])))
fd65bafc 13887 (ssse3_plusminus:HI
2b4894c5 13888 (vec_select:HI (match_dup 1) (parallel [(const_int 6)]))
13889 (vec_select:HI (match_dup 1) (parallel [(const_int 7)])))))
13890 (vec_concat:V4HI
13891 (vec_concat:V2HI
fd65bafc 13892 (ssse3_plusminus:HI
2b4894c5 13893 (vec_select:HI
908dc1fc 13894 (match_operand:V8HI 2 "nonimmediate_operand" "xm,xm")
2b4894c5 13895 (parallel [(const_int 0)]))
13896 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
fd65bafc 13897 (ssse3_plusminus:HI
2b4894c5 13898 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
13899 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))
13900 (vec_concat:V2HI
fd65bafc 13901 (ssse3_plusminus:HI
2b4894c5 13902 (vec_select:HI (match_dup 2) (parallel [(const_int 4)]))
13903 (vec_select:HI (match_dup 2) (parallel [(const_int 5)])))
fd65bafc 13904 (ssse3_plusminus:HI
2b4894c5 13905 (vec_select:HI (match_dup 2) (parallel [(const_int 6)]))
13906 (vec_select:HI (match_dup 2) (parallel [(const_int 7)])))))))]
13907 "TARGET_SSSE3"
908dc1fc 13908 "@
fd65bafc 13909 ph<plusminus_mnemonic>w\t{%2, %0|%0, %2}
13910 vph<plusminus_mnemonic>w\t{%2, %1, %0|%0, %1, %2}"
908dc1fc 13911 [(set_attr "isa" "noavx,avx")
13912 (set_attr "type" "sseiadd")
fbfe006e 13913 (set_attr "atom_unit" "complex")
908dc1fc 13914 (set_attr "prefix_data16" "1,*")
1f346cbc 13915 (set_attr "prefix_extra" "1")
908dc1fc 13916 (set_attr "prefix" "orig,vex")
2b4894c5 13917 (set_attr "mode" "TI")])
13918
fd65bafc 13919(define_insn "ssse3_ph<plusminus_mnemonic>wv4hi3"
2b4894c5 13920 [(set (match_operand:V4HI 0 "register_operand" "=y")
13921 (vec_concat:V4HI
13922 (vec_concat:V2HI
fd65bafc 13923 (ssse3_plusminus:HI
2b4894c5 13924 (vec_select:HI
13925 (match_operand:V4HI 1 "register_operand" "0")
13926 (parallel [(const_int 0)]))
13927 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
fd65bafc 13928 (ssse3_plusminus:HI
2b4894c5 13929 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
13930 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
13931 (vec_concat:V2HI
fd65bafc 13932 (ssse3_plusminus:HI
2b4894c5 13933 (vec_select:HI
13934 (match_operand:V4HI 2 "nonimmediate_operand" "ym")
13935 (parallel [(const_int 0)]))
13936 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
fd65bafc 13937 (ssse3_plusminus:HI
2b4894c5 13938 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
13939 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))))]
13940 "TARGET_SSSE3"
fd65bafc 13941 "ph<plusminus_mnemonic>w\t{%2, %0|%0, %2}"
2b4894c5 13942 [(set_attr "type" "sseiadd")
fbfe006e 13943 (set_attr "atom_unit" "complex")
1f346cbc 13944 (set_attr "prefix_extra" "1")
00a0e418 13945 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
2b4894c5 13946 (set_attr "mode" "DI")])
13947
fd65bafc 13948(define_insn "avx2_ph<plusminus_mnemonic>dv8si3"
5deb404d 13949 [(set (match_operand:V8SI 0 "register_operand" "=x")
13950 (vec_concat:V8SI
13951 (vec_concat:V4SI
13952 (vec_concat:V2SI
fd65bafc 13953 (plusminus:SI
5deb404d 13954 (vec_select:SI
13955 (match_operand:V8SI 1 "register_operand" "x")
13956 (parallel [(const_int 0)]))
13957 (vec_select:SI (match_dup 1) (parallel [(const_int 1)])))
fd65bafc 13958 (plusminus:SI
5deb404d 13959 (vec_select:SI (match_dup 1) (parallel [(const_int 2)]))
13960 (vec_select:SI (match_dup 1) (parallel [(const_int 3)]))))
13961 (vec_concat:V2SI
fd65bafc 13962 (plusminus:SI
5deb404d 13963 (vec_select:SI (match_dup 1) (parallel [(const_int 4)]))
13964 (vec_select:SI (match_dup 1) (parallel [(const_int 5)])))
fd65bafc 13965 (plusminus:SI
5deb404d 13966 (vec_select:SI (match_dup 1) (parallel [(const_int 6)]))
13967 (vec_select:SI (match_dup 1) (parallel [(const_int 7)])))))
13968 (vec_concat:V4SI
13969 (vec_concat:V2SI
fd65bafc 13970 (plusminus:SI
5deb404d 13971 (vec_select:SI
13972 (match_operand:V8SI 2 "nonimmediate_operand" "xm")
13973 (parallel [(const_int 0)]))
13974 (vec_select:SI (match_dup 2) (parallel [(const_int 1)])))
fd65bafc 13975 (plusminus:SI
5deb404d 13976 (vec_select:SI (match_dup 2) (parallel [(const_int 2)]))
13977 (vec_select:SI (match_dup 2) (parallel [(const_int 3)]))))
13978 (vec_concat:V2SI
fd65bafc 13979 (plusminus:SI
5deb404d 13980 (vec_select:SI (match_dup 2) (parallel [(const_int 4)]))
13981 (vec_select:SI (match_dup 2) (parallel [(const_int 5)])))
fd65bafc 13982 (plusminus:SI
5deb404d 13983 (vec_select:SI (match_dup 2) (parallel [(const_int 6)]))
13984 (vec_select:SI (match_dup 2) (parallel [(const_int 7)])))))))]
13985 "TARGET_AVX2"
fd65bafc 13986 "vph<plusminus_mnemonic>d\t{%2, %1, %0|%0, %1, %2}"
5deb404d 13987 [(set_attr "type" "sseiadd")
13988 (set_attr "prefix_extra" "1")
13989 (set_attr "prefix" "vex")
13990 (set_attr "mode" "OI")])
13991
fd65bafc 13992(define_insn "ssse3_ph<plusminus_mnemonic>dv4si3"
908dc1fc 13993 [(set (match_operand:V4SI 0 "register_operand" "=x,x")
2b4894c5 13994 (vec_concat:V4SI
13995 (vec_concat:V2SI
fd65bafc 13996 (plusminus:SI
2b4894c5 13997 (vec_select:SI
908dc1fc 13998 (match_operand:V4SI 1 "register_operand" "0,x")
2b4894c5 13999 (parallel [(const_int 0)]))
14000 (vec_select:SI (match_dup 1) (parallel [(const_int 1)])))
fd65bafc 14001 (plusminus:SI
2b4894c5 14002 (vec_select:SI (match_dup 1) (parallel [(const_int 2)]))
14003 (vec_select:SI (match_dup 1) (parallel [(const_int 3)]))))
14004 (vec_concat:V2SI
fd65bafc 14005 (plusminus:SI
2b4894c5 14006 (vec_select:SI
908dc1fc 14007 (match_operand:V4SI 2 "nonimmediate_operand" "xm,xm")
2b4894c5 14008 (parallel [(const_int 0)]))
14009 (vec_select:SI (match_dup 2) (parallel [(const_int 1)])))
fd65bafc 14010 (plusminus:SI
2b4894c5 14011 (vec_select:SI (match_dup 2) (parallel [(const_int 2)]))
14012 (vec_select:SI (match_dup 2) (parallel [(const_int 3)]))))))]
14013 "TARGET_SSSE3"
908dc1fc 14014 "@
fd65bafc 14015 ph<plusminus_mnemonic>d\t{%2, %0|%0, %2}
14016 vph<plusminus_mnemonic>d\t{%2, %1, %0|%0, %1, %2}"
908dc1fc 14017 [(set_attr "isa" "noavx,avx")
14018 (set_attr "type" "sseiadd")
fbfe006e 14019 (set_attr "atom_unit" "complex")
908dc1fc 14020 (set_attr "prefix_data16" "1,*")
1f346cbc 14021 (set_attr "prefix_extra" "1")
908dc1fc 14022 (set_attr "prefix" "orig,vex")
2b4894c5 14023 (set_attr "mode" "TI")])
14024
fd65bafc 14025(define_insn "ssse3_ph<plusminus_mnemonic>dv2si3"
2b4894c5 14026 [(set (match_operand:V2SI 0 "register_operand" "=y")
14027 (vec_concat:V2SI
fd65bafc 14028 (plusminus:SI
2b4894c5 14029 (vec_select:SI
14030 (match_operand:V2SI 1 "register_operand" "0")
14031 (parallel [(const_int 0)]))
14032 (vec_select:SI (match_dup 1) (parallel [(const_int 1)])))
fd65bafc 14033 (plusminus:SI
2b4894c5 14034 (vec_select:SI
14035 (match_operand:V2SI 2 "nonimmediate_operand" "ym")
14036 (parallel [(const_int 0)]))
14037 (vec_select:SI (match_dup 2) (parallel [(const_int 1)])))))]
14038 "TARGET_SSSE3"
fd65bafc 14039 "ph<plusminus_mnemonic>d\t{%2, %0|%0, %2}"
2b4894c5 14040 [(set_attr "type" "sseiadd")
fbfe006e 14041 (set_attr "atom_unit" "complex")
ed30e0a6 14042 (set_attr "prefix_extra" "1")
00a0e418 14043 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
ed30e0a6 14044 (set_attr "mode" "DI")])
14045
5deb404d 14046(define_insn "avx2_pmaddubsw256"
14047 [(set (match_operand:V16HI 0 "register_operand" "=x")
14048 (ss_plus:V16HI
14049 (mult:V16HI
14050 (zero_extend:V16HI
14051 (vec_select:V16QI
14052 (match_operand:V32QI 1 "register_operand" "x")
04d95c72 14053 (parallel [(const_int 0) (const_int 2)
14054 (const_int 4) (const_int 6)
14055 (const_int 8) (const_int 10)
14056 (const_int 12) (const_int 14)
14057 (const_int 16) (const_int 18)
14058 (const_int 20) (const_int 22)
14059 (const_int 24) (const_int 26)
14060 (const_int 28) (const_int 30)])))
5deb404d 14061 (sign_extend:V16HI
14062 (vec_select:V16QI
14063 (match_operand:V32QI 2 "nonimmediate_operand" "xm")
04d95c72 14064 (parallel [(const_int 0) (const_int 2)
14065 (const_int 4) (const_int 6)
14066 (const_int 8) (const_int 10)
14067 (const_int 12) (const_int 14)
14068 (const_int 16) (const_int 18)
14069 (const_int 20) (const_int 22)
14070 (const_int 24) (const_int 26)
14071 (const_int 28) (const_int 30)]))))
5deb404d 14072 (mult:V16HI
14073 (zero_extend:V16HI
14074 (vec_select:V16QI (match_dup 1)
04d95c72 14075 (parallel [(const_int 1) (const_int 3)
14076 (const_int 5) (const_int 7)
14077 (const_int 9) (const_int 11)
14078 (const_int 13) (const_int 15)
14079 (const_int 17) (const_int 19)
14080 (const_int 21) (const_int 23)
14081 (const_int 25) (const_int 27)
14082 (const_int 29) (const_int 31)])))
5deb404d 14083 (sign_extend:V16HI
14084 (vec_select:V16QI (match_dup 2)
04d95c72 14085 (parallel [(const_int 1) (const_int 3)
14086 (const_int 5) (const_int 7)
14087 (const_int 9) (const_int 11)
14088 (const_int 13) (const_int 15)
14089 (const_int 17) (const_int 19)
14090 (const_int 21) (const_int 23)
14091 (const_int 25) (const_int 27)
14092 (const_int 29) (const_int 31)]))))))]
5deb404d 14093 "TARGET_AVX2"
14094 "vpmaddubsw\t{%2, %1, %0|%0, %1, %2}"
14095 [(set_attr "type" "sseiadd")
14096 (set_attr "prefix_extra" "1")
14097 (set_attr "prefix" "vex")
14098 (set_attr "mode" "OI")])
14099
d58134c2 14100;; The correct representation for this is absolutely enormous, and
14101;; surely not generally useful.
14102(define_insn "avx512bw_pmaddubsw512<mode><mask_name>"
14103 [(set (match_operand:VI2_AVX512VL 0 "register_operand" "=v")
14104 (unspec:VI2_AVX512VL
14105 [(match_operand:<dbpsadbwmode> 1 "register_operand" "v")
14106 (match_operand:<dbpsadbwmode> 2 "nonimmediate_operand" "vm")]
14107 UNSPEC_PMADDUBSW512))]
14108 "TARGET_AVX512BW"
14109 "vpmaddubsw\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}";
14110 [(set_attr "type" "sseiadd")
14111 (set_attr "prefix" "evex")
14112 (set_attr "mode" "XI")])
14113
20144456 14114(define_insn "avx512bw_umulhrswv32hi3<mask_name>"
14115 [(set (match_operand:V32HI 0 "register_operand" "=v")
14116 (truncate:V32HI
14117 (lshiftrt:V32SI
14118 (plus:V32SI
14119 (lshiftrt:V32SI
14120 (mult:V32SI
14121 (sign_extend:V32SI
14122 (match_operand:V32HI 1 "nonimmediate_operand" "%v"))
14123 (sign_extend:V32SI
14124 (match_operand:V32HI 2 "nonimmediate_operand" "vm")))
14125 (const_int 14))
14126 (const_vector:V32HI [(const_int 1) (const_int 1)
14127 (const_int 1) (const_int 1)
14128 (const_int 1) (const_int 1)
14129 (const_int 1) (const_int 1)
14130 (const_int 1) (const_int 1)
14131 (const_int 1) (const_int 1)
14132 (const_int 1) (const_int 1)
14133 (const_int 1) (const_int 1)
14134 (const_int 1) (const_int 1)
14135 (const_int 1) (const_int 1)
14136 (const_int 1) (const_int 1)
14137 (const_int 1) (const_int 1)
14138 (const_int 1) (const_int 1)
14139 (const_int 1) (const_int 1)
14140 (const_int 1) (const_int 1)
14141 (const_int 1) (const_int 1)]))
14142 (const_int 1))))]
14143 "TARGET_AVX512BW"
14144 "vpmulhrsw\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
14145 [(set_attr "type" "sseimul")
14146 (set_attr "prefix" "evex")
14147 (set_attr "mode" "XI")])
14148
6f50184d 14149(define_insn "ssse3_pmaddubsw128"
908dc1fc 14150 [(set (match_operand:V8HI 0 "register_operand" "=x,x")
2b4894c5 14151 (ss_plus:V8HI
14152 (mult:V8HI
14153 (zero_extend:V8HI
1fda60c6 14154 (vec_select:V8QI
908dc1fc 14155 (match_operand:V16QI 1 "register_operand" "0,x")
04d95c72 14156 (parallel [(const_int 0) (const_int 2)
14157 (const_int 4) (const_int 6)
14158 (const_int 8) (const_int 10)
14159 (const_int 12) (const_int 14)])))
2b4894c5 14160 (sign_extend:V8HI
14161 (vec_select:V8QI
908dc1fc 14162 (match_operand:V16QI 2 "nonimmediate_operand" "xm,xm")
04d95c72 14163 (parallel [(const_int 0) (const_int 2)
14164 (const_int 4) (const_int 6)
14165 (const_int 8) (const_int 10)
14166 (const_int 12) (const_int 14)]))))
2b4894c5 14167 (mult:V8HI
14168 (zero_extend:V8HI
1fda60c6 14169 (vec_select:V8QI (match_dup 1)
04d95c72 14170 (parallel [(const_int 1) (const_int 3)
14171 (const_int 5) (const_int 7)
14172 (const_int 9) (const_int 11)
14173 (const_int 13) (const_int 15)])))
2b4894c5 14174 (sign_extend:V8HI
1fda60c6 14175 (vec_select:V8QI (match_dup 2)
04d95c72 14176 (parallel [(const_int 1) (const_int 3)
14177 (const_int 5) (const_int 7)
14178 (const_int 9) (const_int 11)
14179 (const_int 13) (const_int 15)]))))))]
2b4894c5 14180 "TARGET_SSSE3"
908dc1fc 14181 "@
14182 pmaddubsw\t{%2, %0|%0, %2}
14183 vpmaddubsw\t{%2, %1, %0|%0, %1, %2}"
14184 [(set_attr "isa" "noavx,avx")
14185 (set_attr "type" "sseiadd")
fbfe006e 14186 (set_attr "atom_unit" "simul")
908dc1fc 14187 (set_attr "prefix_data16" "1,*")
1f346cbc 14188 (set_attr "prefix_extra" "1")
908dc1fc 14189 (set_attr "prefix" "orig,vex")
2b4894c5 14190 (set_attr "mode" "TI")])
14191
6f50184d 14192(define_insn "ssse3_pmaddubsw"
2b4894c5 14193 [(set (match_operand:V4HI 0 "register_operand" "=y")
14194 (ss_plus:V4HI
14195 (mult:V4HI
14196 (zero_extend:V4HI
14197 (vec_select:V4QI
7c839b3f 14198 (match_operand:V8QI 1 "register_operand" "0")
04d95c72 14199 (parallel [(const_int 0) (const_int 2)
14200 (const_int 4) (const_int 6)])))
2b4894c5 14201 (sign_extend:V4HI
14202 (vec_select:V4QI
14203 (match_operand:V8QI 2 "nonimmediate_operand" "ym")
04d95c72 14204 (parallel [(const_int 0) (const_int 2)
14205 (const_int 4) (const_int 6)]))))
2b4894c5 14206 (mult:V4HI
14207 (zero_extend:V4HI
1fda60c6 14208 (vec_select:V4QI (match_dup 1)
04d95c72 14209 (parallel [(const_int 1) (const_int 3)
14210 (const_int 5) (const_int 7)])))
2b4894c5 14211 (sign_extend:V4HI
1fda60c6 14212 (vec_select:V4QI (match_dup 2)
04d95c72 14213 (parallel [(const_int 1) (const_int 3)
14214 (const_int 5) (const_int 7)]))))))]
2b4894c5 14215 "TARGET_SSSE3"
14216 "pmaddubsw\t{%2, %0|%0, %2}"
14217 [(set_attr "type" "sseiadd")
fbfe006e 14218 (set_attr "atom_unit" "simul")
1f346cbc 14219 (set_attr "prefix_extra" "1")
5deb404d 14220 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
14221 (set_attr "mode" "DI")])
14222
e4048f11 14223(define_mode_iterator PMULHRSW
14224 [V4HI V8HI (V16HI "TARGET_AVX2")])
14225
20144456 14226(define_expand "<ssse3_avx2>_pmulhrsw<mode>3_mask"
14227 [(set (match_operand:PMULHRSW 0 "register_operand")
14228 (vec_merge:PMULHRSW
14229 (truncate:PMULHRSW
14230 (lshiftrt:<ssedoublemode>
14231 (plus:<ssedoublemode>
14232 (lshiftrt:<ssedoublemode>
14233 (mult:<ssedoublemode>
14234 (sign_extend:<ssedoublemode>
14235 (match_operand:PMULHRSW 1 "nonimmediate_operand"))
14236 (sign_extend:<ssedoublemode>
14237 (match_operand:PMULHRSW 2 "nonimmediate_operand")))
14238 (const_int 14))
14239 (match_dup 5))
14240 (const_int 1)))
14241 (match_operand:PMULHRSW 3 "register_operand")
14242 (match_operand:<avx512fmaskmode> 4 "register_operand")))]
14243 "TARGET_AVX512BW && TARGET_AVX512VL"
14244{
14245 operands[5] = CONST1_RTX(<MODE>mode);
14246 ix86_fixup_binary_operands_no_copy (MULT, <MODE>mode, operands);
14247})
14248
e4048f11 14249(define_expand "<ssse3_avx2>_pmulhrsw<mode>3"
14250 [(set (match_operand:PMULHRSW 0 "register_operand")
14251 (truncate:PMULHRSW
14252 (lshiftrt:<ssedoublemode>
14253 (plus:<ssedoublemode>
14254 (lshiftrt:<ssedoublemode>
14255 (mult:<ssedoublemode>
14256 (sign_extend:<ssedoublemode>
14257 (match_operand:PMULHRSW 1 "nonimmediate_operand"))
14258 (sign_extend:<ssedoublemode>
14259 (match_operand:PMULHRSW 2 "nonimmediate_operand")))
5deb404d 14260 (const_int 14))
e4048f11 14261 (match_dup 3))
5deb404d 14262 (const_int 1))))]
14263 "TARGET_AVX2"
e4048f11 14264{
14265 operands[3] = CONST1_RTX(<MODE>mode);
14266 ix86_fixup_binary_operands_no_copy (MULT, <MODE>mode, operands);
14267})
5deb404d 14268
2d71b728 14269(define_insn "*<ssse3_avx2>_pmulhrsw<mode>3<mask_name>"
14270 [(set (match_operand:VI2_AVX2 0 "register_operand" "=x,v")
e8610aac 14271 (truncate:VI2_AVX2
14272 (lshiftrt:<ssedoublemode>
14273 (plus:<ssedoublemode>
14274 (lshiftrt:<ssedoublemode>
14275 (mult:<ssedoublemode>
14276 (sign_extend:<ssedoublemode>
2d71b728 14277 (match_operand:VI2_AVX2 1 "nonimmediate_operand" "%0,v"))
e8610aac 14278 (sign_extend:<ssedoublemode>
2d71b728 14279 (match_operand:VI2_AVX2 2 "nonimmediate_operand" "xm,vm")))
2b4894c5 14280 (const_int 14))
e8610aac 14281 (match_operand:VI2_AVX2 3 "const1_operand"))
2b4894c5 14282 (const_int 1))))]
2d71b728 14283 "TARGET_SSSE3 && <mask_mode512bit_condition> && <mask_avx512bw_condition>
14284 && ix86_binary_operator_ok (MULT, <MODE>mode, operands)"
908dc1fc 14285 "@
14286 pmulhrsw\t{%2, %0|%0, %2}
2d71b728 14287 vpmulhrsw\t{%2, %1, %0<mask_operand4>|%0<mask_operand4>, %1, %2}"
908dc1fc 14288 [(set_attr "isa" "noavx,avx")
14289 (set_attr "type" "sseimul")
14290 (set_attr "prefix_data16" "1,*")
1f346cbc 14291 (set_attr "prefix_extra" "1")
2d71b728 14292 (set_attr "prefix" "orig,maybe_evex")
e8610aac 14293 (set_attr "mode" "<sseinsnmode>")])
2b4894c5 14294
7c839b3f 14295(define_insn "*ssse3_pmulhrswv4hi3"
2b4894c5 14296 [(set (match_operand:V4HI 0 "register_operand" "=y")
14297 (truncate:V4HI
14298 (lshiftrt:V4SI
14299 (plus:V4SI
14300 (lshiftrt:V4SI
14301 (mult:V4SI
14302 (sign_extend:V4SI
14303 (match_operand:V4HI 1 "nonimmediate_operand" "%0"))
14304 (sign_extend:V4SI
14305 (match_operand:V4HI 2 "nonimmediate_operand" "ym")))
14306 (const_int 14))
e4048f11 14307 (match_operand:V4HI 3 "const1_operand"))
2b4894c5 14308 (const_int 1))))]
14309 "TARGET_SSSE3 && ix86_binary_operator_ok (MULT, V4HImode, operands)"
14310 "pmulhrsw\t{%2, %0|%0, %2}"
14311 [(set_attr "type" "sseimul")
1f346cbc 14312 (set_attr "prefix_extra" "1")
00a0e418 14313 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
2b4894c5 14314 (set_attr "mode" "DI")])
14315
201f262d 14316(define_insn "<ssse3_avx2>_pshufb<mode>3<mask_name>"
14317 [(set (match_operand:VI1_AVX512 0 "register_operand" "=x,v")
14318 (unspec:VI1_AVX512
14319 [(match_operand:VI1_AVX512 1 "register_operand" "0,v")
14320 (match_operand:VI1_AVX512 2 "nonimmediate_operand" "xm,vm")]
fd65bafc 14321 UNSPEC_PSHUFB))]
201f262d 14322 "TARGET_SSSE3 && <mask_mode512bit_condition> && <mask_avx512bw_condition>"
908dc1fc 14323 "@
14324 pshufb\t{%2, %0|%0, %2}
201f262d 14325 vpshufb\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
908dc1fc 14326 [(set_attr "isa" "noavx,avx")
14327 (set_attr "type" "sselog1")
14328 (set_attr "prefix_data16" "1,*")
1f346cbc 14329 (set_attr "prefix_extra" "1")
201f262d 14330 (set_attr "prefix" "orig,maybe_evex")
6470d004 14331 (set_attr "btver2_decode" "vector,vector")
5deb404d 14332 (set_attr "mode" "<sseinsnmode>")])
2b4894c5 14333
14334(define_insn "ssse3_pshufbv8qi3"
14335 [(set (match_operand:V8QI 0 "register_operand" "=y")
14336 (unspec:V8QI [(match_operand:V8QI 1 "register_operand" "0")
14337 (match_operand:V8QI 2 "nonimmediate_operand" "ym")]
2a466fea 14338 UNSPEC_PSHUFB))]
2b4894c5 14339 "TARGET_SSSE3"
14340 "pshufb\t{%2, %0|%0, %2}";
14341 [(set_attr "type" "sselog1")
1f346cbc 14342 (set_attr "prefix_extra" "1")
00a0e418 14343 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
2b4894c5 14344 (set_attr "mode" "DI")])
14345
5deb404d 14346(define_insn "<ssse3_avx2>_psign<mode>3"
14347 [(set (match_operand:VI124_AVX2 0 "register_operand" "=x,x")
14348 (unspec:VI124_AVX2
14349 [(match_operand:VI124_AVX2 1 "register_operand" "0,x")
14350 (match_operand:VI124_AVX2 2 "nonimmediate_operand" "xm,xm")]
2a466fea 14351 UNSPEC_PSIGN))]
2b4894c5 14352 "TARGET_SSSE3"
908dc1fc 14353 "@
63d5e521 14354 psign<ssemodesuffix>\t{%2, %0|%0, %2}
14355 vpsign<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
908dc1fc 14356 [(set_attr "isa" "noavx,avx")
14357 (set_attr "type" "sselog1")
14358 (set_attr "prefix_data16" "1,*")
1f346cbc 14359 (set_attr "prefix_extra" "1")
908dc1fc 14360 (set_attr "prefix" "orig,vex")
5deb404d 14361 (set_attr "mode" "<sseinsnmode>")])
2b4894c5 14362
14363(define_insn "ssse3_psign<mode>3"
14364 [(set (match_operand:MMXMODEI 0 "register_operand" "=y")
2a466fea 14365 (unspec:MMXMODEI
14366 [(match_operand:MMXMODEI 1 "register_operand" "0")
14367 (match_operand:MMXMODEI 2 "nonimmediate_operand" "ym")]
14368 UNSPEC_PSIGN))]
2b4894c5 14369 "TARGET_SSSE3"
14370 "psign<mmxvecsize>\t{%2, %0|%0, %2}";
14371 [(set_attr "type" "sselog1")
1f346cbc 14372 (set_attr "prefix_extra" "1")
00a0e418 14373 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
2b4894c5 14374 (set_attr "mode" "DI")])
14375
d49df830 14376(define_insn "<ssse3_avx2>_palignr<mode>_mask"
dfd41e6d 14377 [(set (match_operand:VI1_AVX512 0 "register_operand" "=v")
14378 (vec_merge:VI1_AVX512
14379 (unspec:VI1_AVX512
14380 [(match_operand:VI1_AVX512 1 "register_operand" "v")
14381 (match_operand:VI1_AVX512 2 "nonimmediate_operand" "vm")
d49df830 14382 (match_operand:SI 3 "const_0_to_255_mul_8_operand" "n")]
14383 UNSPEC_PALIGNR)
dfd41e6d 14384 (match_operand:VI1_AVX512 4 "vector_move_operand" "0C")
d49df830 14385 (match_operand:<avx512fmaskmode> 5 "register_operand" "Yk")))]
14386 "TARGET_AVX512BW && (<MODE_SIZE> == 64 || TARGET_AVX512VL)"
14387{
14388 operands[3] = GEN_INT (INTVAL (operands[3]) / 8);
14389 return "vpalignr\t{%3, %2, %1, %0%{%5%}%N4|%0%{%5%}%N4, %1, %2, %3}";
14390}
14391 [(set_attr "type" "sseishft")
14392 (set_attr "atom_unit" "sishuf")
14393 (set_attr "prefix_extra" "1")
14394 (set_attr "length_immediate" "1")
14395 (set_attr "prefix" "evex")
14396 (set_attr "mode" "<sseinsnmode>")])
14397
5deb404d 14398(define_insn "<ssse3_avx2>_palignr<mode>"
d49df830 14399 [(set (match_operand:SSESCALARMODE 0 "register_operand" "=x,v")
fd65bafc 14400 (unspec:SSESCALARMODE
d49df830 14401 [(match_operand:SSESCALARMODE 1 "register_operand" "0,v")
14402 (match_operand:SSESCALARMODE 2 "nonimmediate_operand" "xm,vm")
fd65bafc 14403 (match_operand:SI 3 "const_0_to_255_mul_8_operand" "n,n")]
14404 UNSPEC_PALIGNR))]
2b4894c5 14405 "TARGET_SSSE3"
14406{
14407 operands[3] = GEN_INT (INTVAL (operands[3]) / 8);
908dc1fc 14408
14409 switch (which_alternative)
14410 {
14411 case 0:
14412 return "palignr\t{%3, %2, %0|%0, %2, %3}";
14413 case 1:
14414 return "vpalignr\t{%3, %2, %1, %0|%0, %1, %2, %3}";
14415 default:
14416 gcc_unreachable ();
14417 }
2b4894c5 14418}
908dc1fc 14419 [(set_attr "isa" "noavx,avx")
14420 (set_attr "type" "sseishft")
fbfe006e 14421 (set_attr "atom_unit" "sishuf")
908dc1fc 14422 (set_attr "prefix_data16" "1,*")
1f346cbc 14423 (set_attr "prefix_extra" "1")
00a0e418 14424 (set_attr "length_immediate" "1")
908dc1fc 14425 (set_attr "prefix" "orig,vex")
5deb404d 14426 (set_attr "mode" "<sseinsnmode>")])
2b4894c5 14427
14428(define_insn "ssse3_palignrdi"
14429 [(set (match_operand:DI 0 "register_operand" "=y")
14430 (unspec:DI [(match_operand:DI 1 "register_operand" "0")
14431 (match_operand:DI 2 "nonimmediate_operand" "ym")
14432 (match_operand:SI 3 "const_0_to_255_mul_8_operand" "n")]
2a466fea 14433 UNSPEC_PALIGNR))]
2b4894c5 14434 "TARGET_SSSE3"
14435{
14436 operands[3] = GEN_INT (INTVAL (operands[3]) / 8);
14437 return "palignr\t{%3, %2, %0|%0, %2, %3}";
14438}
14439 [(set_attr "type" "sseishft")
fbfe006e 14440 (set_attr "atom_unit" "sishuf")
1f346cbc 14441 (set_attr "prefix_extra" "1")
00a0e418 14442 (set_attr "length_immediate" "1")
14443 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
2b4894c5 14444 (set_attr "mode" "DI")])
14445
e75eecf6 14446;; Mode iterator to handle singularity w/ absence of V2DI and V4DI
14447;; modes for abs instruction on pre AVX-512 targets.
14448(define_mode_iterator VI1248_AVX512VL_AVX512BW
14449 [(V64QI "TARGET_AVX512BW") (V32QI "TARGET_AVX2") V16QI
14450 (V32HI "TARGET_AVX512BW") (V16HI "TARGET_AVX2") V8HI
14451 (V16SI "TARGET_AVX512F") (V8SI "TARGET_AVX2") V4SI
14452 (V8DI "TARGET_AVX512F") (V4DI "TARGET_AVX512VL") (V2DI "TARGET_AVX512VL")])
14453
12803fe0 14454(define_insn "*abs<mode>2"
e75eecf6 14455 [(set (match_operand:VI1248_AVX512VL_AVX512BW 0 "register_operand" "=v")
14456 (abs:VI1248_AVX512VL_AVX512BW
14457 (match_operand:VI1248_AVX512VL_AVX512BW 1 "nonimmediate_operand" "vm")))]
12803fe0 14458 "TARGET_SSSE3"
14459 "%vpabs<ssemodesuffix>\t{%1, %0|%0, %1}"
2b4894c5 14460 [(set_attr "type" "sselog1")
1f346cbc 14461 (set_attr "prefix_data16" "1")
14462 (set_attr "prefix_extra" "1")
ed30e0a6 14463 (set_attr "prefix" "maybe_vex")
5deb404d 14464 (set_attr "mode" "<sseinsnmode>")])
2b4894c5 14465
12803fe0 14466(define_insn "abs<mode>2_mask"
14467 [(set (match_operand:VI48_AVX512VL 0 "register_operand" "=v")
14468 (vec_merge:VI48_AVX512VL
14469 (abs:VI48_AVX512VL
14470 (match_operand:VI48_AVX512VL 1 "nonimmediate_operand" "vm"))
14471 (match_operand:VI48_AVX512VL 2 "vector_move_operand" "0C")
14472 (match_operand:<avx512fmaskmode> 3 "register_operand" "Yk")))]
14473 "TARGET_AVX512F"
14474 "vpabs<ssemodesuffix>\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}"
14475 [(set_attr "type" "sselog1")
14476 (set_attr "prefix" "evex")
14477 (set_attr "mode" "<sseinsnmode>")])
14478
14479(define_insn "abs<mode>2_mask"
14480 [(set (match_operand:VI12_AVX512VL 0 "register_operand" "=v")
14481 (vec_merge:VI12_AVX512VL
14482 (abs:VI12_AVX512VL
14483 (match_operand:VI12_AVX512VL 1 "nonimmediate_operand" "vm"))
14484 (match_operand:VI12_AVX512VL 2 "vector_move_operand" "0C")
14485 (match_operand:<avx512fmaskmode> 3 "register_operand" "Yk")))]
14486 "TARGET_AVX512BW"
14487 "vpabs<ssemodesuffix>\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}"
14488 [(set_attr "type" "sselog1")
14489 (set_attr "prefix" "evex")
14490 (set_attr "mode" "<sseinsnmode>")])
14491
95e3231d 14492(define_expand "abs<mode>2"
e75eecf6 14493 [(set (match_operand:VI1248_AVX512VL_AVX512BW 0 "register_operand")
14494 (abs:VI1248_AVX512VL_AVX512BW
14495 (match_operand:VI1248_AVX512VL_AVX512BW 1 "nonimmediate_operand")))]
95e3231d 14496 "TARGET_SSE2"
14497{
14498 if (!TARGET_SSSE3)
14499 {
14500 ix86_expand_sse2_abs (operands[0], operands[1]);
14501 DONE;
14502 }
14503})
14504
2b4894c5 14505(define_insn "abs<mode>2"
14506 [(set (match_operand:MMXMODEI 0 "register_operand" "=y")
908dc1fc 14507 (abs:MMXMODEI
14508 (match_operand:MMXMODEI 1 "nonimmediate_operand" "ym")))]
2b4894c5 14509 "TARGET_SSSE3"
14510 "pabs<mmxvecsize>\t{%1, %0|%0, %1}";
14511 [(set_attr "type" "sselog1")
00a0e418 14512 (set_attr "prefix_rep" "0")
1f346cbc 14513 (set_attr "prefix_extra" "1")
00a0e418 14514 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
2b4894c5 14515 (set_attr "mode" "DI")])
3d775f8e 14516
14517;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
14518;;
14519;; AMD SSE4A instructions
14520;;
14521;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
14522
3da2a73c 14523(define_insn "sse4a_movnt<mode>"
14524 [(set (match_operand:MODEF 0 "memory_operand" "=m")
14525 (unspec:MODEF
14526 [(match_operand:MODEF 1 "register_operand" "x")]
5deb404d 14527 UNSPEC_MOVNT))]
3d775f8e 14528 "TARGET_SSE4A"
63d5e521 14529 "movnt<ssemodesuffix>\t{%1, %0|%0, %1}"
3d775f8e 14530 [(set_attr "type" "ssemov")
3da2a73c 14531 (set_attr "mode" "<MODE>")])
3d775f8e 14532
3da2a73c 14533(define_insn "sse4a_vmmovnt<mode>"
14534 [(set (match_operand:<ssescalarmode> 0 "memory_operand" "=m")
14535 (unspec:<ssescalarmode>
14536 [(vec_select:<ssescalarmode>
6fe5844b 14537 (match_operand:VF_128 1 "register_operand" "x")
3da2a73c 14538 (parallel [(const_int 0)]))]
14539 UNSPEC_MOVNT))]
3d775f8e 14540 "TARGET_SSE4A"
0061967e 14541 "movnt<ssescalarmodesuffix>\t{%1, %0|%0, %1}"
3d775f8e 14542 [(set_attr "type" "ssemov")
3da2a73c 14543 (set_attr "mode" "<ssescalarmode>")])
3d775f8e 14544
14545(define_insn "sse4a_extrqi"
14546 [(set (match_operand:V2DI 0 "register_operand" "=x")
5deb404d 14547 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
abd4f58b 14548 (match_operand 2 "const_0_to_255_operand")
14549 (match_operand 3 "const_0_to_255_operand")]
5deb404d 14550 UNSPEC_EXTRQI))]
3d775f8e 14551 "TARGET_SSE4A"
14552 "extrq\t{%3, %2, %0|%0, %2, %3}"
14553 [(set_attr "type" "sse")
1f346cbc 14554 (set_attr "prefix_data16" "1")
00a0e418 14555 (set_attr "length_immediate" "2")
3d775f8e 14556 (set_attr "mode" "TI")])
14557
14558(define_insn "sse4a_extrq"
14559 [(set (match_operand:V2DI 0 "register_operand" "=x")
5deb404d 14560 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
14561 (match_operand:V16QI 2 "register_operand" "x")]
14562 UNSPEC_EXTRQ))]
3d775f8e 14563 "TARGET_SSE4A"
14564 "extrq\t{%2, %0|%0, %2}"
14565 [(set_attr "type" "sse")
1f346cbc 14566 (set_attr "prefix_data16" "1")
3d775f8e 14567 (set_attr "mode" "TI")])
14568
14569(define_insn "sse4a_insertqi"
14570 [(set (match_operand:V2DI 0 "register_operand" "=x")
5deb404d 14571 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
14572 (match_operand:V2DI 2 "register_operand" "x")
abd4f58b 14573 (match_operand 3 "const_0_to_255_operand")
14574 (match_operand 4 "const_0_to_255_operand")]
5deb404d 14575 UNSPEC_INSERTQI))]
3d775f8e 14576 "TARGET_SSE4A"
14577 "insertq\t{%4, %3, %2, %0|%0, %2, %3, %4}"
14578 [(set_attr "type" "sseins")
00a0e418 14579 (set_attr "prefix_data16" "0")
1f346cbc 14580 (set_attr "prefix_rep" "1")
00a0e418 14581 (set_attr "length_immediate" "2")
3d775f8e 14582 (set_attr "mode" "TI")])
14583
14584(define_insn "sse4a_insertq"
14585 [(set (match_operand:V2DI 0 "register_operand" "=x")
5deb404d 14586 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
14587 (match_operand:V2DI 2 "register_operand" "x")]
14588 UNSPEC_INSERTQ))]
3d775f8e 14589 "TARGET_SSE4A"
14590 "insertq\t{%2, %0|%0, %2}"
14591 [(set_attr "type" "sseins")
00a0e418 14592 (set_attr "prefix_data16" "0")
1f346cbc 14593 (set_attr "prefix_rep" "1")
3d775f8e 14594 (set_attr "mode" "TI")])
2d771892 14595
14596;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
14597;;
14598;; Intel SSE4.1 instructions
14599;;
14600;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
14601
18b7eecb 14602;; Mapping of immediate bits for blend instructions
14603(define_mode_attr blendbits
14604 [(V8SF "255") (V4SF "15") (V4DF "15") (V2DF "3")])
14605
63d5e521 14606(define_insn "<sse4_1>_blend<ssemodesuffix><avxsizesuffix>"
0a32b282 14607 [(set (match_operand:VF_128_256 0 "register_operand" "=Yr,*x,x")
6a3f5f59 14608 (vec_merge:VF_128_256
0a32b282 14609 (match_operand:VF_128_256 2 "nonimmediate_operand" "Yrm,*xm,xm")
14610 (match_operand:VF_128_256 1 "register_operand" "0,0,x")
abd4f58b 14611 (match_operand:SI 3 "const_0_to_<blendbits>_operand")))]
e16e10c8 14612 "TARGET_SSE4_1"
f6c74054 14613 "@
0a32b282 14614 blend<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
f6c74054 14615 blend<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
14616 vblend<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
0a32b282 14617 [(set_attr "isa" "noavx,noavx,avx")
f6c74054 14618 (set_attr "type" "ssemov")
00a0e418 14619 (set_attr "length_immediate" "1")
0a32b282 14620 (set_attr "prefix_data16" "1,1,*")
f6c74054 14621 (set_attr "prefix_extra" "1")
0a32b282 14622 (set_attr "prefix" "orig,orig,vex")
3da2a73c 14623 (set_attr "mode" "<MODE>")])
2d771892 14624
63d5e521 14625(define_insn "<sse4_1>_blendv<ssemodesuffix><avxsizesuffix>"
0a32b282 14626 [(set (match_operand:VF_128_256 0 "register_operand" "=Yr,*x,x")
6a3f5f59 14627 (unspec:VF_128_256
0a32b282 14628 [(match_operand:VF_128_256 1 "register_operand" "0,0,x")
14629 (match_operand:VF_128_256 2 "nonimmediate_operand" "Yrm,*xm,xm")
14630 (match_operand:VF_128_256 3 "register_operand" "Yz,Yz,x")]
3da2a73c 14631 UNSPEC_BLENDV))]
2d771892 14632 "TARGET_SSE4_1"
f6c74054 14633 "@
0a32b282 14634 blendv<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
f6c74054 14635 blendv<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
14636 vblendv<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
0a32b282 14637 [(set_attr "isa" "noavx,noavx,avx")
f6c74054 14638 (set_attr "type" "ssemov")
14639 (set_attr "length_immediate" "1")
0a32b282 14640 (set_attr "prefix_data16" "1,1,*")
2d771892 14641 (set_attr "prefix_extra" "1")
0a32b282 14642 (set_attr "prefix" "orig,orig,vex")
14643 (set_attr "btver2_decode" "vector,vector,vector")
3da2a73c 14644 (set_attr "mode" "<MODE>")])
2d771892 14645
63d5e521 14646(define_insn "<sse4_1>_dp<ssemodesuffix><avxsizesuffix>"
0a32b282 14647 [(set (match_operand:VF_128_256 0 "register_operand" "=Yr,*x,x")
6a3f5f59 14648 (unspec:VF_128_256
0a32b282 14649 [(match_operand:VF_128_256 1 "nonimmediate_operand" "%0,0,x")
14650 (match_operand:VF_128_256 2 "nonimmediate_operand" "Yrm,*xm,xm")
14651 (match_operand:SI 3 "const_0_to_255_operand" "n,n,n")]
3da2a73c 14652 UNSPEC_DP))]
2d771892 14653 "TARGET_SSE4_1"
f6c74054 14654 "@
0a32b282 14655 dp<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
f6c74054 14656 dp<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
14657 vdp<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
0a32b282 14658 [(set_attr "isa" "noavx,noavx,avx")
f6c74054 14659 (set_attr "type" "ssemul")
00a0e418 14660 (set_attr "length_immediate" "1")
0a32b282 14661 (set_attr "prefix_data16" "1,1,*")
f6c74054 14662 (set_attr "prefix_extra" "1")
0a32b282 14663 (set_attr "prefix" "orig,orig,vex")
14664 (set_attr "btver2_decode" "vector,vector,vector")
4c9faaa4 14665 (set_attr "znver1_decode" "vector,vector,vector")
3da2a73c 14666 (set_attr "mode" "<MODE>")])
2d771892 14667
18b7eecb 14668;; Mode attribute used by `vmovntdqa' pattern
14669(define_mode_attr vi8_sse4_1_avx2_avx512
14670 [(V2DI "sse4_1") (V4DI "avx2") (V8DI "avx512f")])
14671
14672(define_insn "<vi8_sse4_1_avx2_avx512>_movntdqa"
0a32b282 14673 [(set (match_operand:VI8_AVX2_AVX512F 0 "register_operand" "=Yr,*x, v")
14674 (unspec:VI8_AVX2_AVX512F [(match_operand:VI8_AVX2_AVX512F 1 "memory_operand" "m, m, m")]
2d771892 14675 UNSPEC_MOVNTDQA))]
14676 "TARGET_SSE4_1"
ed30e0a6 14677 "%vmovntdqa\t{%1, %0|%0, %1}"
fbfe006e 14678 [(set_attr "type" "ssemov")
0a32b282 14679 (set_attr "prefix_extra" "1,1,*")
14680 (set_attr "prefix" "maybe_vex,maybe_vex,evex")
5deb404d 14681 (set_attr "mode" "<sseinsnmode>")])
ed30e0a6 14682
5deb404d 14683(define_insn "<sse4_1_avx2>_mpsadbw"
0a32b282 14684 [(set (match_operand:VI1_AVX2 0 "register_operand" "=Yr,*x,x")
fd65bafc 14685 (unspec:VI1_AVX2
0a32b282 14686 [(match_operand:VI1_AVX2 1 "register_operand" "0,0,x")
14687 (match_operand:VI1_AVX2 2 "nonimmediate_operand" "Yrm,*xm,xm")
14688 (match_operand:SI 3 "const_0_to_255_operand" "n,n,n")]
fd65bafc 14689 UNSPEC_MPSADBW))]
2d771892 14690 "TARGET_SSE4_1"
f6c74054 14691 "@
0a32b282 14692 mpsadbw\t{%3, %2, %0|%0, %2, %3}
f6c74054 14693 mpsadbw\t{%3, %2, %0|%0, %2, %3}
14694 vmpsadbw\t{%3, %2, %1, %0|%0, %1, %2, %3}"
0a32b282 14695 [(set_attr "isa" "noavx,noavx,avx")
f6c74054 14696 (set_attr "type" "sselog1")
00a0e418 14697 (set_attr "length_immediate" "1")
00a0e418 14698 (set_attr "prefix_extra" "1")
0a32b282 14699 (set_attr "prefix" "orig,orig,vex")
14700 (set_attr "btver2_decode" "vector,vector,vector")
4c9faaa4 14701 (set_attr "znver1_decode" "vector,vector,vector")
5deb404d 14702 (set_attr "mode" "<sseinsnmode>")])
14703
2d71b728 14704(define_insn "<sse4_1_avx2>_packusdw<mask_name>"
0a32b282 14705 [(set (match_operand:VI2_AVX2 0 "register_operand" "=Yr,*x,v")
2d71b728 14706 (vec_concat:VI2_AVX2
14707 (us_truncate:<ssehalfvecmode>
0a32b282 14708 (match_operand:<sseunpackmode> 1 "register_operand" "0,0,v"))
2d71b728 14709 (us_truncate:<ssehalfvecmode>
0a32b282 14710 (match_operand:<sseunpackmode> 2 "nonimmediate_operand" "Yrm,*xm,vm"))))]
2d71b728 14711 "TARGET_SSE4_1 && <mask_mode512bit_condition> && <mask_avx512bw_condition>"
f6c74054 14712 "@
0a32b282 14713 packusdw\t{%2, %0|%0, %2}
f6c74054 14714 packusdw\t{%2, %0|%0, %2}
2d71b728 14715 vpackusdw\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
0a32b282 14716 [(set_attr "isa" "noavx,noavx,avx")
f6c74054 14717 (set_attr "type" "sselog")
00a0e418 14718 (set_attr "prefix_extra" "1")
0a32b282 14719 (set_attr "prefix" "orig,orig,maybe_evex")
2d71b728 14720 (set_attr "mode" "<sseinsnmode>")])
ed30e0a6 14721
5deb404d 14722(define_insn "<sse4_1_avx2>_pblendvb"
0a32b282 14723 [(set (match_operand:VI1_AVX2 0 "register_operand" "=Yr,*x,x")
5deb404d 14724 (unspec:VI1_AVX2
0a32b282 14725 [(match_operand:VI1_AVX2 1 "register_operand" "0,0,x")
14726 (match_operand:VI1_AVX2 2 "nonimmediate_operand" "Yrm,*xm,xm")
14727 (match_operand:VI1_AVX2 3 "register_operand" "Yz,Yz,x")]
f6c74054 14728 UNSPEC_BLENDV))]
2d771892 14729 "TARGET_SSE4_1"
f6c74054 14730 "@
0a32b282 14731 pblendvb\t{%3, %2, %0|%0, %2, %3}
f6c74054 14732 pblendvb\t{%3, %2, %0|%0, %2, %3}
14733 vpblendvb\t{%3, %2, %1, %0|%0, %1, %2, %3}"
0a32b282 14734 [(set_attr "isa" "noavx,noavx,avx")
f6c74054 14735 (set_attr "type" "ssemov")
00a0e418 14736 (set_attr "prefix_extra" "1")
0a32b282 14737 (set_attr "length_immediate" "*,*,1")
14738 (set_attr "prefix" "orig,orig,vex")
14739 (set_attr "btver2_decode" "vector,vector,vector")
5deb404d 14740 (set_attr "mode" "<sseinsnmode>")])
ed30e0a6 14741
738630ee 14742(define_insn "sse4_1_pblendw"
0a32b282 14743 [(set (match_operand:V8HI 0 "register_operand" "=Yr,*x,x")
738630ee 14744 (vec_merge:V8HI
0a32b282 14745 (match_operand:V8HI 2 "nonimmediate_operand" "Yrm,*xm,xm")
14746 (match_operand:V8HI 1 "register_operand" "0,0,x")
14747 (match_operand:SI 3 "const_0_to_255_operand" "n,n,n")))]
2d771892 14748 "TARGET_SSE4_1"
f6c74054 14749 "@
0a32b282 14750 pblendw\t{%3, %2, %0|%0, %2, %3}
f6c74054 14751 pblendw\t{%3, %2, %0|%0, %2, %3}
14752 vpblendw\t{%3, %2, %1, %0|%0, %1, %2, %3}"
0a32b282 14753 [(set_attr "isa" "noavx,noavx,avx")
f6c74054 14754 (set_attr "type" "ssemov")
2d771892 14755 (set_attr "prefix_extra" "1")
00a0e418 14756 (set_attr "length_immediate" "1")
0a32b282 14757 (set_attr "prefix" "orig,orig,vex")
738630ee 14758 (set_attr "mode" "TI")])
14759
14760;; The builtin uses an 8-bit immediate. Expand that.
14761(define_expand "avx2_pblendw"
abd4f58b 14762 [(set (match_operand:V16HI 0 "register_operand")
738630ee 14763 (vec_merge:V16HI
abd4f58b 14764 (match_operand:V16HI 2 "nonimmediate_operand")
14765 (match_operand:V16HI 1 "register_operand")
14766 (match_operand:SI 3 "const_0_to_255_operand")))]
738630ee 14767 "TARGET_AVX2"
14768{
14769 HOST_WIDE_INT val = INTVAL (operands[3]) & 0xff;
14770 operands[3] = GEN_INT (val << 8 | val);
14771})
14772
14773(define_insn "*avx2_pblendw"
14774 [(set (match_operand:V16HI 0 "register_operand" "=x")
14775 (vec_merge:V16HI
14776 (match_operand:V16HI 2 "nonimmediate_operand" "xm")
14777 (match_operand:V16HI 1 "register_operand" "x")
14778 (match_operand:SI 3 "avx2_pblendw_operand" "n")))]
c450bad4 14779 "TARGET_AVX2"
738630ee 14780{
14781 operands[3] = GEN_INT (INTVAL (operands[3]) & 0xff);
14782 return "vpblendw\t{%3, %2, %1, %0|%0, %1, %2, %3}";
14783}
14784 [(set_attr "type" "ssemov")
14785 (set_attr "prefix_extra" "1")
14786 (set_attr "length_immediate" "1")
14787 (set_attr "prefix" "vex")
14788 (set_attr "mode" "OI")])
5deb404d 14789
14790(define_insn "avx2_pblendd<mode>"
14791 [(set (match_operand:VI4_AVX2 0 "register_operand" "=x")
14792 (vec_merge:VI4_AVX2
14793 (match_operand:VI4_AVX2 2 "nonimmediate_operand" "xm")
14794 (match_operand:VI4_AVX2 1 "register_operand" "x")
14795 (match_operand:SI 3 "const_0_to_255_operand" "n")))]
14796 "TARGET_AVX2"
14797 "vpblendd\t{%3, %2, %1, %0|%0, %1, %2, %3}"
14798 [(set_attr "type" "ssemov")
14799 (set_attr "prefix_extra" "1")
14800 (set_attr "length_immediate" "1")
14801 (set_attr "prefix" "vex")
14802 (set_attr "mode" "<sseinsnmode>")])
2d771892 14803
14804(define_insn "sse4_1_phminposuw"
0a32b282 14805 [(set (match_operand:V8HI 0 "register_operand" "=Yr,*x")
14806 (unspec:V8HI [(match_operand:V8HI 1 "nonimmediate_operand" "Yrm,*xm")]
2d771892 14807 UNSPEC_PHMINPOSUW))]
14808 "TARGET_SSE4_1"
ed30e0a6 14809 "%vphminposuw\t{%1, %0|%0, %1}"
2d771892 14810 [(set_attr "type" "sselog1")
14811 (set_attr "prefix_extra" "1")
ed30e0a6 14812 (set_attr "prefix" "maybe_vex")
2d771892 14813 (set_attr "mode" "TI")])
14814
ffd21b9d 14815(define_insn "avx2_<code>v16qiv16hi2<mask_name>"
14816 [(set (match_operand:V16HI 0 "register_operand" "=v")
5deb404d 14817 (any_extend:V16HI
ffd21b9d 14818 (match_operand:V16QI 1 "nonimmediate_operand" "vm")))]
14819 "TARGET_AVX2 && <mask_avx512bw_condition> && <mask_avx512vl_condition>"
14820 "vpmov<extsuffix>bw\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
5deb404d 14821 [(set_attr "type" "ssemov")
14822 (set_attr "prefix_extra" "1")
ffd21b9d 14823 (set_attr "prefix" "maybe_evex")
5deb404d 14824 (set_attr "mode" "OI")])
14825
ffd21b9d 14826(define_insn "avx512bw_<code>v32qiv32hi2<mask_name>"
14827 [(set (match_operand:V32HI 0 "register_operand" "=v")
14828 (any_extend:V32HI
14829 (match_operand:V32QI 1 "nonimmediate_operand" "vm")))]
14830 "TARGET_AVX512BW"
14831 "vpmov<extsuffix>bw\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
14832 [(set_attr "type" "ssemov")
14833 (set_attr "prefix_extra" "1")
14834 (set_attr "prefix" "evex")
14835 (set_attr "mode" "XI")])
14836
14837(define_insn "sse4_1_<code>v8qiv8hi2<mask_name>"
0a32b282 14838 [(set (match_operand:V8HI 0 "register_operand" "=Yr,*v")
c868bf35 14839 (any_extend:V8HI
2d771892 14840 (vec_select:V8QI
0a32b282 14841 (match_operand:V16QI 1 "nonimmediate_operand" "Yrm,*vm")
04d95c72 14842 (parallel [(const_int 0) (const_int 1)
14843 (const_int 2) (const_int 3)
14844 (const_int 4) (const_int 5)
14845 (const_int 6) (const_int 7)]))))]
ffd21b9d 14846 "TARGET_SSE4_1 && <mask_avx512bw_condition> && <mask_avx512vl_condition>"
14847 "%vpmov<extsuffix>bw\t{%1, %0<mask_operand2>|%0<mask_operand2>, %q1}"
2d771892 14848 [(set_attr "type" "ssemov")
8c1dfa94 14849 (set_attr "ssememalign" "64")
2d771892 14850 (set_attr "prefix_extra" "1")
ed30e0a6 14851 (set_attr "prefix" "maybe_vex")
2d771892 14852 (set_attr "mode" "TI")])
14853
5220cab6 14854(define_insn "<mask_codefor>avx512f_<code>v16qiv16si2<mask_name>"
697a43f8 14855 [(set (match_operand:V16SI 0 "register_operand" "=v")
14856 (any_extend:V16SI
14857 (match_operand:V16QI 1 "nonimmediate_operand" "vm")))]
14858 "TARGET_AVX512F"
5220cab6 14859 "vpmov<extsuffix>bd\t{%1, %0<mask_operand2>|%0<mask_operand2>, %q1}"
697a43f8 14860 [(set_attr "type" "ssemov")
14861 (set_attr "prefix" "evex")
14862 (set_attr "mode" "XI")])
14863
ffd21b9d 14864(define_insn "avx2_<code>v8qiv8si2<mask_name>"
14865 [(set (match_operand:V8SI 0 "register_operand" "=v")
5deb404d 14866 (any_extend:V8SI
14867 (vec_select:V8QI
ffd21b9d 14868 (match_operand:V16QI 1 "nonimmediate_operand" "vm")
04d95c72 14869 (parallel [(const_int 0) (const_int 1)
14870 (const_int 2) (const_int 3)
14871 (const_int 4) (const_int 5)
14872 (const_int 6) (const_int 7)]))))]
ffd21b9d 14873 "TARGET_AVX2 && <mask_avx512vl_condition>"
14874 "vpmov<extsuffix>bd\t{%1, %0<mask_operand2>|%0<mask_operand2>, %q1}"
5deb404d 14875 [(set_attr "type" "ssemov")
14876 (set_attr "prefix_extra" "1")
ffd21b9d 14877 (set_attr "prefix" "maybe_evex")
5deb404d 14878 (set_attr "mode" "OI")])
14879
ffd21b9d 14880(define_insn "sse4_1_<code>v4qiv4si2<mask_name>"
0a32b282 14881 [(set (match_operand:V4SI 0 "register_operand" "=Yr,*v")
c868bf35 14882 (any_extend:V4SI
2d771892 14883 (vec_select:V4QI
0a32b282 14884 (match_operand:V16QI 1 "nonimmediate_operand" "Yrm,*vm")
04d95c72 14885 (parallel [(const_int 0) (const_int 1)
14886 (const_int 2) (const_int 3)]))))]
ffd21b9d 14887 "TARGET_SSE4_1 && <mask_avx512vl_condition>"
14888 "%vpmov<extsuffix>bd\t{%1, %0<mask_operand2>|%0<mask_operand2>, %k1}"
2d771892 14889 [(set_attr "type" "ssemov")
8c1dfa94 14890 (set_attr "ssememalign" "32")
2d771892 14891 (set_attr "prefix_extra" "1")
ed30e0a6 14892 (set_attr "prefix" "maybe_vex")
2d771892 14893 (set_attr "mode" "TI")])
14894
5220cab6 14895(define_insn "avx512f_<code>v16hiv16si2<mask_name>"
697a43f8 14896 [(set (match_operand:V16SI 0 "register_operand" "=v")
14897 (any_extend:V16SI
14898 (match_operand:V16HI 1 "nonimmediate_operand" "vm")))]
14899 "TARGET_AVX512F"
5220cab6 14900 "vpmov<extsuffix>wd\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
697a43f8 14901 [(set_attr "type" "ssemov")
14902 (set_attr "prefix" "evex")
14903 (set_attr "mode" "XI")])
14904
ffd21b9d 14905(define_insn "avx2_<code>v8hiv8si2<mask_name>"
14906 [(set (match_operand:V8SI 0 "register_operand" "=v")
5deb404d 14907 (any_extend:V8SI
ffd21b9d 14908 (match_operand:V8HI 1 "nonimmediate_operand" "vm")))]
14909 "TARGET_AVX2 && <mask_avx512vl_condition>"
14910 "vpmov<extsuffix>wd\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
5deb404d 14911 [(set_attr "type" "ssemov")
14912 (set_attr "prefix_extra" "1")
ffd21b9d 14913 (set_attr "prefix" "maybe_evex")
5deb404d 14914 (set_attr "mode" "OI")])
14915
ffd21b9d 14916(define_insn "sse4_1_<code>v4hiv4si2<mask_name>"
0a32b282 14917 [(set (match_operand:V4SI 0 "register_operand" "=Yr,*v")
c868bf35 14918 (any_extend:V4SI
2d771892 14919 (vec_select:V4HI
0a32b282 14920 (match_operand:V8HI 1 "nonimmediate_operand" "Yrm,*vm")
04d95c72 14921 (parallel [(const_int 0) (const_int 1)
14922 (const_int 2) (const_int 3)]))))]
ffd21b9d 14923 "TARGET_SSE4_1 && <mask_avx512vl_condition>"
14924 "%vpmov<extsuffix>wd\t{%1, %0<mask_operand2>|%0<mask_operand2>, %q1}"
2d771892 14925 [(set_attr "type" "ssemov")
8c1dfa94 14926 (set_attr "ssememalign" "64")
2d771892 14927 (set_attr "prefix_extra" "1")
ed30e0a6 14928 (set_attr "prefix" "maybe_vex")
2d771892 14929 (set_attr "mode" "TI")])
14930
5220cab6 14931(define_insn "avx512f_<code>v8qiv8di2<mask_name>"
697a43f8 14932 [(set (match_operand:V8DI 0 "register_operand" "=v")
14933 (any_extend:V8DI
14934 (vec_select:V8QI
14935 (match_operand:V16QI 1 "nonimmediate_operand" "vm")
14936 (parallel [(const_int 0) (const_int 1)
14937 (const_int 2) (const_int 3)
14938 (const_int 4) (const_int 5)
14939 (const_int 6) (const_int 7)]))))]
14940 "TARGET_AVX512F"
5220cab6 14941 "vpmov<extsuffix>bq\t{%1, %0<mask_operand2>|%0<mask_operand2>, %k1}"
697a43f8 14942 [(set_attr "type" "ssemov")
14943 (set_attr "prefix" "evex")
14944 (set_attr "mode" "XI")])
14945
ffd21b9d 14946(define_insn "avx2_<code>v4qiv4di2<mask_name>"
14947 [(set (match_operand:V4DI 0 "register_operand" "=v")
5deb404d 14948 (any_extend:V4DI
14949 (vec_select:V4QI
ffd21b9d 14950 (match_operand:V16QI 1 "nonimmediate_operand" "vm")
04d95c72 14951 (parallel [(const_int 0) (const_int 1)
14952 (const_int 2) (const_int 3)]))))]
ffd21b9d 14953 "TARGET_AVX2 && <mask_avx512vl_condition>"
14954 "vpmov<extsuffix>bq\t{%1, %0<mask_operand2>|%0<mask_operand2>, %k1}"
5deb404d 14955 [(set_attr "type" "ssemov")
14956 (set_attr "prefix_extra" "1")
ffd21b9d 14957 (set_attr "prefix" "maybe_evex")
5deb404d 14958 (set_attr "mode" "OI")])
14959
ffd21b9d 14960(define_insn "sse4_1_<code>v2qiv2di2<mask_name>"
0a32b282 14961 [(set (match_operand:V2DI 0 "register_operand" "=Yr,*v")
c868bf35 14962 (any_extend:V2DI
2d771892 14963 (vec_select:V2QI
0a32b282 14964 (match_operand:V16QI 1 "nonimmediate_operand" "Yrm,*vm")
04d95c72 14965 (parallel [(const_int 0) (const_int 1)]))))]
ffd21b9d 14966 "TARGET_SSE4_1 && <mask_avx512vl_condition>"
14967 "%vpmov<extsuffix>bq\t{%1, %0<mask_operand2>|%0<mask_operand2>, %w1}"
2d771892 14968 [(set_attr "type" "ssemov")
8c1dfa94 14969 (set_attr "ssememalign" "16")
2d771892 14970 (set_attr "prefix_extra" "1")
ed30e0a6 14971 (set_attr "prefix" "maybe_vex")
2d771892 14972 (set_attr "mode" "TI")])
14973
5220cab6 14974(define_insn "avx512f_<code>v8hiv8di2<mask_name>"
697a43f8 14975 [(set (match_operand:V8DI 0 "register_operand" "=v")
14976 (any_extend:V8DI
14977 (match_operand:V8HI 1 "nonimmediate_operand" "vm")))]
14978 "TARGET_AVX512F"
5220cab6 14979 "vpmov<extsuffix>wq\t{%1, %0<mask_operand2>|%0<mask_operand2>, %q1}"
697a43f8 14980 [(set_attr "type" "ssemov")
14981 (set_attr "prefix" "evex")
14982 (set_attr "mode" "XI")])
14983
ffd21b9d 14984(define_insn "avx2_<code>v4hiv4di2<mask_name>"
14985 [(set (match_operand:V4DI 0 "register_operand" "=v")
5deb404d 14986 (any_extend:V4DI
14987 (vec_select:V4HI
ffd21b9d 14988 (match_operand:V8HI 1 "nonimmediate_operand" "vm")
04d95c72 14989 (parallel [(const_int 0) (const_int 1)
14990 (const_int 2) (const_int 3)]))))]
ffd21b9d 14991 "TARGET_AVX2 && <mask_avx512vl_condition>"
14992 "vpmov<extsuffix>wq\t{%1, %0<mask_operand2>|%0<mask_operand2>, %q1}"
5deb404d 14993 [(set_attr "type" "ssemov")
14994 (set_attr "prefix_extra" "1")
ffd21b9d 14995 (set_attr "prefix" "maybe_evex")
5deb404d 14996 (set_attr "mode" "OI")])
14997
ffd21b9d 14998(define_insn "sse4_1_<code>v2hiv2di2<mask_name>"
0a32b282 14999 [(set (match_operand:V2DI 0 "register_operand" "=Yr,*v")
c868bf35 15000 (any_extend:V2DI
2d771892 15001 (vec_select:V2HI
0a32b282 15002 (match_operand:V8HI 1 "nonimmediate_operand" "Yrm,*vm")
04d95c72 15003 (parallel [(const_int 0) (const_int 1)]))))]
ffd21b9d 15004 "TARGET_SSE4_1 && <mask_avx512vl_condition>"
15005 "%vpmov<extsuffix>wq\t{%1, %0<mask_operand2>|%0<mask_operand2>, %k1}"
2d771892 15006 [(set_attr "type" "ssemov")
8c1dfa94 15007 (set_attr "ssememalign" "32")
2d771892 15008 (set_attr "prefix_extra" "1")
ed30e0a6 15009 (set_attr "prefix" "maybe_vex")
2d771892 15010 (set_attr "mode" "TI")])
15011
5220cab6 15012(define_insn "avx512f_<code>v8siv8di2<mask_name>"
697a43f8 15013 [(set (match_operand:V8DI 0 "register_operand" "=v")
15014 (any_extend:V8DI
15015 (match_operand:V8SI 1 "nonimmediate_operand" "vm")))]
15016 "TARGET_AVX512F"
5220cab6 15017 "vpmov<extsuffix>dq\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
697a43f8 15018 [(set_attr "type" "ssemov")
15019 (set_attr "prefix" "evex")
15020 (set_attr "mode" "XI")])
15021
ffd21b9d 15022(define_insn "avx2_<code>v4siv4di2<mask_name>"
15023 [(set (match_operand:V4DI 0 "register_operand" "=v")
5deb404d 15024 (any_extend:V4DI
ffd21b9d 15025 (match_operand:V4SI 1 "nonimmediate_operand" "vm")))]
15026 "TARGET_AVX2 && <mask_avx512vl_condition>"
15027 "vpmov<extsuffix>dq\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
5deb404d 15028 [(set_attr "type" "ssemov")
ffd21b9d 15029 (set_attr "prefix" "maybe_evex")
5deb404d 15030 (set_attr "prefix_extra" "1")
15031 (set_attr "mode" "OI")])
15032
ffd21b9d 15033(define_insn "sse4_1_<code>v2siv2di2<mask_name>"
0a32b282 15034 [(set (match_operand:V2DI 0 "register_operand" "=Yr,*v")
c868bf35 15035 (any_extend:V2DI
2d771892 15036 (vec_select:V2SI
0a32b282 15037 (match_operand:V4SI 1 "nonimmediate_operand" "Yrm,*vm")
04d95c72 15038 (parallel [(const_int 0) (const_int 1)]))))]
ffd21b9d 15039 "TARGET_SSE4_1 && <mask_avx512vl_condition>"
15040 "%vpmov<extsuffix>dq\t{%1, %0<mask_operand2>|%0<mask_operand2>, %q1}"
2d771892 15041 [(set_attr "type" "ssemov")
8c1dfa94 15042 (set_attr "ssememalign" "64")
2d771892 15043 (set_attr "prefix_extra" "1")
ed30e0a6 15044 (set_attr "prefix" "maybe_vex")
2d771892 15045 (set_attr "mode" "TI")])
15046
ed30e0a6 15047;; ptestps/ptestpd are very similar to comiss and ucomiss when
15048;; setting FLAGS_REG. But it is not a really compare instruction.
63d5e521 15049(define_insn "avx_vtest<ssemodesuffix><avxsizesuffix>"
ed30e0a6 15050 [(set (reg:CC FLAGS_REG)
6a3f5f59 15051 (unspec:CC [(match_operand:VF_128_256 0 "register_operand" "x")
15052 (match_operand:VF_128_256 1 "nonimmediate_operand" "xm")]
ed30e0a6 15053 UNSPEC_VTESTP))]
15054 "TARGET_AVX"
0061967e 15055 "vtest<ssemodesuffix>\t{%1, %0|%0, %1}"
ed30e0a6 15056 [(set_attr "type" "ssecomi")
00a0e418 15057 (set_attr "prefix_extra" "1")
ed30e0a6 15058 (set_attr "prefix" "vex")
15059 (set_attr "mode" "<MODE>")])
15060
2d771892 15061;; ptest is very similar to comiss and ucomiss when setting FLAGS_REG.
15062;; But it is not a really compare instruction.
37407f90 15063(define_insn "<sse4_1>_ptest<mode>"
2d771892 15064 [(set (reg:CC FLAGS_REG)
37407f90 15065 (unspec:CC [(match_operand:V_AVX 0 "register_operand" "Yr, *x, x")
15066 (match_operand:V_AVX 1 "nonimmediate_operand" "Yrm, *xm, xm")]
2d771892 15067 UNSPEC_PTEST))]
15068 "TARGET_SSE4_1"
ed30e0a6 15069 "%vptest\t{%1, %0|%0, %1}"
37407f90 15070 [(set_attr "isa" "*,*,avx")
15071 (set_attr "type" "ssecomi")
2d771892 15072 (set_attr "prefix_extra" "1")
ed30e0a6 15073 (set_attr "prefix" "maybe_vex")
37407f90 15074 (set (attr "btver2_decode")
15075 (if_then_else
15076 (match_test "<sseinsnmode>mode==OImode")
15077 (const_string "vector")
15078 (const_string "*")))
15079 (set_attr "mode" "<sseinsnmode>")])
2d771892 15080
63d5e521 15081(define_insn "<sse4_1>_round<ssemodesuffix><avxsizesuffix>"
0a32b282 15082 [(set (match_operand:VF_128_256 0 "register_operand" "=Yr,*x")
6a3f5f59 15083 (unspec:VF_128_256
0a32b282 15084 [(match_operand:VF_128_256 1 "nonimmediate_operand" "Yrm,*xm")
15085 (match_operand:SI 2 "const_0_to_15_operand" "n,n")]
3da2a73c 15086 UNSPEC_ROUND))]
448e99f5 15087 "TARGET_ROUND"
0061967e 15088 "%vround<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
2d771892 15089 [(set_attr "type" "ssecvt")
f6c74054 15090 (set (attr "prefix_data16")
15091 (if_then_else
6be3efec 15092 (match_test "TARGET_AVX")
f6c74054 15093 (const_string "*")
15094 (const_string "1")))
2d771892 15095 (set_attr "prefix_extra" "1")
00a0e418 15096 (set_attr "length_immediate" "1")
ed30e0a6 15097 (set_attr "prefix" "maybe_vex")
15098 (set_attr "mode" "<MODE>")])
15099
c34303ca 15100(define_expand "<sse4_1>_round<ssemodesuffix>_sfix<avxsizesuffix>"
abd4f58b 15101 [(match_operand:<sseintvecmode> 0 "register_operand")
03ae25dc 15102 (match_operand:VF1_128_256 1 "nonimmediate_operand")
abd4f58b 15103 (match_operand:SI 2 "const_0_to_15_operand")]
c34303ca 15104 "TARGET_ROUND"
15105{
15106 rtx tmp = gen_reg_rtx (<MODE>mode);
15107
15108 emit_insn
15109 (gen_<sse4_1>_round<ssemodesuffix><avxsizesuffix> (tmp, operands[1],
15110 operands[2]));
15111 emit_insn
15112 (gen_fix_trunc<mode><sseintvecmodelower>2 (operands[0], tmp));
15113 DONE;
15114})
15115
6615b722 15116(define_expand "avx512f_roundpd512"
15117 [(match_operand:V8DF 0 "register_operand")
15118 (match_operand:V8DF 1 "nonimmediate_operand")
15119 (match_operand:SI 2 "const_0_to_15_operand")]
15120 "TARGET_AVX512F"
15121{
15122 emit_insn (gen_avx512f_rndscalev8df (operands[0], operands[1], operands[2]));
15123 DONE;
15124})
15125
c34303ca 15126(define_expand "<sse4_1>_round<ssemodesuffix>_vec_pack_sfix<avxsizesuffix>"
abd4f58b 15127 [(match_operand:<ssepackfltmode> 0 "register_operand")
15128 (match_operand:VF2 1 "nonimmediate_operand")
15129 (match_operand:VF2 2 "nonimmediate_operand")
15130 (match_operand:SI 3 "const_0_to_15_operand")]
c34303ca 15131 "TARGET_ROUND"
15132{
15133 rtx tmp0, tmp1;
15134
4030506f 15135 if (<MODE>mode == V2DFmode
f00377d6 15136 && TARGET_AVX && !TARGET_PREFER_AVX128 && optimize_insn_for_speed_p ())
4030506f 15137 {
15138 rtx tmp2 = gen_reg_rtx (V4DFmode);
c34303ca 15139
4030506f 15140 tmp0 = gen_reg_rtx (V4DFmode);
15141 tmp1 = force_reg (V2DFmode, operands[1]);
15142
15143 emit_insn (gen_avx_vec_concatv4df (tmp0, tmp1, operands[2]));
15144 emit_insn (gen_avx_roundpd256 (tmp2, tmp0, operands[3]));
15145 emit_insn (gen_fix_truncv4dfv4si2 (operands[0], tmp2));
15146 }
15147 else
15148 {
15149 tmp0 = gen_reg_rtx (<MODE>mode);
15150 tmp1 = gen_reg_rtx (<MODE>mode);
15151
15152 emit_insn
15153 (gen_<sse4_1>_round<ssemodesuffix><avxsizesuffix> (tmp0, operands[1],
15154 operands[3]));
15155 emit_insn
15156 (gen_<sse4_1>_round<ssemodesuffix><avxsizesuffix> (tmp1, operands[2],
15157 operands[3]));
15158 emit_insn
15159 (gen_vec_pack_sfix_trunc_<mode> (operands[0], tmp0, tmp1));
15160 }
c34303ca 15161 DONE;
15162})
15163
0061967e 15164(define_insn "sse4_1_round<ssescalarmodesuffix>"
0a32b282 15165 [(set (match_operand:VF_128 0 "register_operand" "=Yr,*x,x")
f6c74054 15166 (vec_merge:VF_128
15167 (unspec:VF_128
0a32b282 15168 [(match_operand:VF_128 2 "register_operand" "Yr,*x,x")
15169 (match_operand:SI 3 "const_0_to_15_operand" "n,n,n")]
3da2a73c 15170 UNSPEC_ROUND)
0a32b282 15171 (match_operand:VF_128 1 "register_operand" "0,0,x")
2d771892 15172 (const_int 1)))]
448e99f5 15173 "TARGET_ROUND"
f6c74054 15174 "@
0a32b282 15175 round<ssescalarmodesuffix>\t{%3, %2, %0|%0, %2, %3}
f6c74054 15176 round<ssescalarmodesuffix>\t{%3, %2, %0|%0, %2, %3}
15177 vround<ssescalarmodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
0a32b282 15178 [(set_attr "isa" "noavx,noavx,avx")
f6c74054 15179 (set_attr "type" "ssecvt")
00a0e418 15180 (set_attr "length_immediate" "1")
0a32b282 15181 (set_attr "prefix_data16" "1,1,*")
f6c74054 15182 (set_attr "prefix_extra" "1")
0a32b282 15183 (set_attr "prefix" "orig,orig,vex")
3da2a73c 15184 (set_attr "mode" "<MODE>")])
f0dd3deb 15185
56b61659 15186(define_expand "round<mode>2"
15187 [(set (match_dup 4)
15188 (plus:VF
abd4f58b 15189 (match_operand:VF 1 "register_operand")
56b61659 15190 (match_dup 3)))
abd4f58b 15191 (set (match_operand:VF 0 "register_operand")
56b61659 15192 (unspec:VF
15193 [(match_dup 4) (match_dup 5)]
15194 UNSPEC_ROUND))]
15195 "TARGET_ROUND && !flag_trapping_math"
15196{
3754d046 15197 machine_mode scalar_mode;
56b61659 15198 const struct real_format *fmt;
15199 REAL_VALUE_TYPE pred_half, half_minus_pred_half;
15200 rtx half, vec_half;
15201
15202 scalar_mode = GET_MODE_INNER (<MODE>mode);
15203
15204 /* load nextafter (0.5, 0.0) */
15205 fmt = REAL_MODE_FORMAT (scalar_mode);
15206 real_2expN (&half_minus_pred_half, -(fmt->p) - 1, scalar_mode);
f2ad9e38 15207 real_arithmetic (&pred_half, MINUS_EXPR, &dconsthalf, &half_minus_pred_half);
56b61659 15208 half = const_double_from_real_value (pred_half, scalar_mode);
15209
15210 vec_half = ix86_build_const_vector (<MODE>mode, true, half);
15211 vec_half = force_reg (<MODE>mode, vec_half);
15212
15213 operands[3] = gen_reg_rtx (<MODE>mode);
15214 emit_insn (gen_copysign<mode>3 (operands[3], vec_half, operands[1]));
15215
15216 operands[4] = gen_reg_rtx (<MODE>mode);
15217 operands[5] = GEN_INT (ROUND_TRUNC);
15218})
15219
c34303ca 15220(define_expand "round<mode>2_sfix"
abd4f58b 15221 [(match_operand:<sseintvecmode> 0 "register_operand")
03ae25dc 15222 (match_operand:VF1_128_256 1 "register_operand")]
c34303ca 15223 "TARGET_ROUND && !flag_trapping_math"
15224{
15225 rtx tmp = gen_reg_rtx (<MODE>mode);
15226
15227 emit_insn (gen_round<mode>2 (tmp, operands[1]));
15228
15229 emit_insn
15230 (gen_fix_trunc<mode><sseintvecmodelower>2 (operands[0], tmp));
15231 DONE;
15232})
15233
15234(define_expand "round<mode>2_vec_pack_sfix"
abd4f58b 15235 [(match_operand:<ssepackfltmode> 0 "register_operand")
15236 (match_operand:VF2 1 "register_operand")
15237 (match_operand:VF2 2 "register_operand")]
c34303ca 15238 "TARGET_ROUND && !flag_trapping_math"
15239{
15240 rtx tmp0, tmp1;
15241
4030506f 15242 if (<MODE>mode == V2DFmode
f00377d6 15243 && TARGET_AVX && !TARGET_PREFER_AVX128 && optimize_insn_for_speed_p ())
4030506f 15244 {
15245 rtx tmp2 = gen_reg_rtx (V4DFmode);
c34303ca 15246
4030506f 15247 tmp0 = gen_reg_rtx (V4DFmode);
15248 tmp1 = force_reg (V2DFmode, operands[1]);
c34303ca 15249
4030506f 15250 emit_insn (gen_avx_vec_concatv4df (tmp0, tmp1, operands[2]));
15251 emit_insn (gen_roundv4df2 (tmp2, tmp0));
15252 emit_insn (gen_fix_truncv4dfv4si2 (operands[0], tmp2));
15253 }
15254 else
15255 {
15256 tmp0 = gen_reg_rtx (<MODE>mode);
15257 tmp1 = gen_reg_rtx (<MODE>mode);
15258
15259 emit_insn (gen_round<mode>2 (tmp0, operands[1]));
15260 emit_insn (gen_round<mode>2 (tmp1, operands[2]));
15261
15262 emit_insn
15263 (gen_vec_pack_sfix_trunc_<mode> (operands[0], tmp0, tmp1));
15264 }
c34303ca 15265 DONE;
15266})
15267
f0dd3deb 15268;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
15269;;
15270;; Intel SSE4.2 string/text processing instructions
15271;;
15272;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
15273
15274(define_insn_and_split "sse4_2_pcmpestr"
15275 [(set (match_operand:SI 0 "register_operand" "=c,c")
15276 (unspec:SI
1a5eff3d 15277 [(match_operand:V16QI 2 "register_operand" "x,x")
f0dd3deb 15278 (match_operand:SI 3 "register_operand" "a,a")
1a5eff3d 15279 (match_operand:V16QI 4 "nonimmediate_operand" "x,m")
f0dd3deb 15280 (match_operand:SI 5 "register_operand" "d,d")
15281 (match_operand:SI 6 "const_0_to_255_operand" "n,n")]
15282 UNSPEC_PCMPESTR))
50c9119e 15283 (set (match_operand:V16QI 1 "register_operand" "=Yz,Yz")
f0dd3deb 15284 (unspec:V16QI
15285 [(match_dup 2)
15286 (match_dup 3)
15287 (match_dup 4)
15288 (match_dup 5)
15289 (match_dup 6)]
15290 UNSPEC_PCMPESTR))
15291 (set (reg:CC FLAGS_REG)
15292 (unspec:CC
15293 [(match_dup 2)
15294 (match_dup 3)
15295 (match_dup 4)
15296 (match_dup 5)
15297 (match_dup 6)]
15298 UNSPEC_PCMPESTR))]
15299 "TARGET_SSE4_2
d0b2c064 15300 && can_create_pseudo_p ()"
f0dd3deb 15301 "#"
15302 "&& 1"
15303 [(const_int 0)]
15304{
15305 int ecx = !find_regno_note (curr_insn, REG_UNUSED, REGNO (operands[0]));
15306 int xmm0 = !find_regno_note (curr_insn, REG_UNUSED, REGNO (operands[1]));
15307 int flags = !find_regno_note (curr_insn, REG_UNUSED, FLAGS_REG);
15308
15309 if (ecx)
15310 emit_insn (gen_sse4_2_pcmpestri (operands[0], operands[2],
15311 operands[3], operands[4],
15312 operands[5], operands[6]));
15313 if (xmm0)
15314 emit_insn (gen_sse4_2_pcmpestrm (operands[1], operands[2],
15315 operands[3], operands[4],
15316 operands[5], operands[6]));
15317 if (flags && !(ecx || xmm0))
18f95a36 15318 emit_insn (gen_sse4_2_pcmpestr_cconly (NULL, NULL,
15319 operands[2], operands[3],
f0dd3deb 15320 operands[4], operands[5],
15321 operands[6]));
1c9cc6e6 15322 if (!(flags || ecx || xmm0))
15323 emit_note (NOTE_INSN_DELETED);
15324
f0dd3deb 15325 DONE;
15326}
15327 [(set_attr "type" "sselog")
15328 (set_attr "prefix_data16" "1")
15329 (set_attr "prefix_extra" "1")
8c1dfa94 15330 (set_attr "ssememalign" "8")
00a0e418 15331 (set_attr "length_immediate" "1")
f0dd3deb 15332 (set_attr "memory" "none,load")
15333 (set_attr "mode" "TI")])
15334
538c1aa0 15335(define_insn_and_split "*sse4_2_pcmpestr_unaligned"
15336 [(set (match_operand:SI 0 "register_operand" "=c")
15337 (unspec:SI
1a5eff3d 15338 [(match_operand:V16QI 2 "register_operand" "x")
538c1aa0 15339 (match_operand:SI 3 "register_operand" "a")
15340 (unspec:V16QI
15341 [(match_operand:V16QI 4 "memory_operand" "m")]
00820ea0 15342 UNSPEC_LOADU)
538c1aa0 15343 (match_operand:SI 5 "register_operand" "d")
15344 (match_operand:SI 6 "const_0_to_255_operand" "n")]
15345 UNSPEC_PCMPESTR))
15346 (set (match_operand:V16QI 1 "register_operand" "=Yz")
15347 (unspec:V16QI
15348 [(match_dup 2)
15349 (match_dup 3)
00820ea0 15350 (unspec:V16QI [(match_dup 4)] UNSPEC_LOADU)
538c1aa0 15351 (match_dup 5)
15352 (match_dup 6)]
15353 UNSPEC_PCMPESTR))
15354 (set (reg:CC FLAGS_REG)
15355 (unspec:CC
15356 [(match_dup 2)
15357 (match_dup 3)
00820ea0 15358 (unspec:V16QI [(match_dup 4)] UNSPEC_LOADU)
538c1aa0 15359 (match_dup 5)
15360 (match_dup 6)]
15361 UNSPEC_PCMPESTR))]
15362 "TARGET_SSE4_2
15363 && can_create_pseudo_p ()"
15364 "#"
15365 "&& 1"
15366 [(const_int 0)]
15367{
15368 int ecx = !find_regno_note (curr_insn, REG_UNUSED, REGNO (operands[0]));
15369 int xmm0 = !find_regno_note (curr_insn, REG_UNUSED, REGNO (operands[1]));
15370 int flags = !find_regno_note (curr_insn, REG_UNUSED, FLAGS_REG);
15371
15372 if (ecx)
15373 emit_insn (gen_sse4_2_pcmpestri (operands[0], operands[2],
15374 operands[3], operands[4],
15375 operands[5], operands[6]));
15376 if (xmm0)
15377 emit_insn (gen_sse4_2_pcmpestrm (operands[1], operands[2],
15378 operands[3], operands[4],
15379 operands[5], operands[6]));
15380 if (flags && !(ecx || xmm0))
15381 emit_insn (gen_sse4_2_pcmpestr_cconly (NULL, NULL,
15382 operands[2], operands[3],
15383 operands[4], operands[5],
15384 operands[6]));
15385 if (!(flags || ecx || xmm0))
15386 emit_note (NOTE_INSN_DELETED);
15387
15388 DONE;
15389}
15390 [(set_attr "type" "sselog")
15391 (set_attr "prefix_data16" "1")
15392 (set_attr "prefix_extra" "1")
8c1dfa94 15393 (set_attr "ssememalign" "8")
538c1aa0 15394 (set_attr "length_immediate" "1")
15395 (set_attr "memory" "load")
15396 (set_attr "mode" "TI")])
15397
f0dd3deb 15398(define_insn "sse4_2_pcmpestri"
15399 [(set (match_operand:SI 0 "register_operand" "=c,c")
15400 (unspec:SI
15401 [(match_operand:V16QI 1 "register_operand" "x,x")
15402 (match_operand:SI 2 "register_operand" "a,a")
15403 (match_operand:V16QI 3 "nonimmediate_operand" "x,m")
15404 (match_operand:SI 4 "register_operand" "d,d")
15405 (match_operand:SI 5 "const_0_to_255_operand" "n,n")]
15406 UNSPEC_PCMPESTR))
15407 (set (reg:CC FLAGS_REG)
15408 (unspec:CC
15409 [(match_dup 1)
15410 (match_dup 2)
15411 (match_dup 3)
15412 (match_dup 4)
15413 (match_dup 5)]
15414 UNSPEC_PCMPESTR))]
15415 "TARGET_SSE4_2"
ed30e0a6 15416 "%vpcmpestri\t{%5, %3, %1|%1, %3, %5}"
f0dd3deb 15417 [(set_attr "type" "sselog")
15418 (set_attr "prefix_data16" "1")
15419 (set_attr "prefix_extra" "1")
ed30e0a6 15420 (set_attr "prefix" "maybe_vex")
8c1dfa94 15421 (set_attr "ssememalign" "8")
00a0e418 15422 (set_attr "length_immediate" "1")
6470d004 15423 (set_attr "btver2_decode" "vector")
f0dd3deb 15424 (set_attr "memory" "none,load")
15425 (set_attr "mode" "TI")])
15426
15427(define_insn "sse4_2_pcmpestrm"
50c9119e 15428 [(set (match_operand:V16QI 0 "register_operand" "=Yz,Yz")
f0dd3deb 15429 (unspec:V16QI
15430 [(match_operand:V16QI 1 "register_operand" "x,x")
15431 (match_operand:SI 2 "register_operand" "a,a")
15432 (match_operand:V16QI 3 "nonimmediate_operand" "x,m")
15433 (match_operand:SI 4 "register_operand" "d,d")
15434 (match_operand:SI 5 "const_0_to_255_operand" "n,n")]
15435 UNSPEC_PCMPESTR))
15436 (set (reg:CC FLAGS_REG)
15437 (unspec:CC
15438 [(match_dup 1)
15439 (match_dup 2)
15440 (match_dup 3)
15441 (match_dup 4)
15442 (match_dup 5)]
15443 UNSPEC_PCMPESTR))]
15444 "TARGET_SSE4_2"
ed30e0a6 15445 "%vpcmpestrm\t{%5, %3, %1|%1, %3, %5}"
f0dd3deb 15446 [(set_attr "type" "sselog")
15447 (set_attr "prefix_data16" "1")
15448 (set_attr "prefix_extra" "1")
8c1dfa94 15449 (set_attr "ssememalign" "8")
00a0e418 15450 (set_attr "length_immediate" "1")
ed30e0a6 15451 (set_attr "prefix" "maybe_vex")
6470d004 15452 (set_attr "btver2_decode" "vector")
f0dd3deb 15453 (set_attr "memory" "none,load")
15454 (set_attr "mode" "TI")])
15455
15456(define_insn "sse4_2_pcmpestr_cconly"
15457 [(set (reg:CC FLAGS_REG)
15458 (unspec:CC
18f95a36 15459 [(match_operand:V16QI 2 "register_operand" "x,x,x,x")
15460 (match_operand:SI 3 "register_operand" "a,a,a,a")
15461 (match_operand:V16QI 4 "nonimmediate_operand" "x,m,x,m")
15462 (match_operand:SI 5 "register_operand" "d,d,d,d")
15463 (match_operand:SI 6 "const_0_to_255_operand" "n,n,n,n")]
f0dd3deb 15464 UNSPEC_PCMPESTR))
18f95a36 15465 (clobber (match_scratch:V16QI 0 "=Yz,Yz,X,X"))
15466 (clobber (match_scratch:SI 1 "= X, X,c,c"))]
f0dd3deb 15467 "TARGET_SSE4_2"
15468 "@
ed30e0a6 15469 %vpcmpestrm\t{%6, %4, %2|%2, %4, %6}
15470 %vpcmpestrm\t{%6, %4, %2|%2, %4, %6}
15471 %vpcmpestri\t{%6, %4, %2|%2, %4, %6}
15472 %vpcmpestri\t{%6, %4, %2|%2, %4, %6}"
f0dd3deb 15473 [(set_attr "type" "sselog")
15474 (set_attr "prefix_data16" "1")
15475 (set_attr "prefix_extra" "1")
8c1dfa94 15476 (set_attr "ssememalign" "8")
00a0e418 15477 (set_attr "length_immediate" "1")
f0dd3deb 15478 (set_attr "memory" "none,load,none,load")
6470d004 15479 (set_attr "btver2_decode" "vector,vector,vector,vector")
ed30e0a6 15480 (set_attr "prefix" "maybe_vex")
f0dd3deb 15481 (set_attr "mode" "TI")])
15482
15483(define_insn_and_split "sse4_2_pcmpistr"
15484 [(set (match_operand:SI 0 "register_operand" "=c,c")
15485 (unspec:SI
1a5eff3d 15486 [(match_operand:V16QI 2 "register_operand" "x,x")
15487 (match_operand:V16QI 3 "nonimmediate_operand" "x,m")
f0dd3deb 15488 (match_operand:SI 4 "const_0_to_255_operand" "n,n")]
15489 UNSPEC_PCMPISTR))
50c9119e 15490 (set (match_operand:V16QI 1 "register_operand" "=Yz,Yz")
f0dd3deb 15491 (unspec:V16QI
15492 [(match_dup 2)
15493 (match_dup 3)
15494 (match_dup 4)]
15495 UNSPEC_PCMPISTR))
15496 (set (reg:CC FLAGS_REG)
15497 (unspec:CC
15498 [(match_dup 2)
15499 (match_dup 3)
15500 (match_dup 4)]
15501 UNSPEC_PCMPISTR))]
15502 "TARGET_SSE4_2
d0b2c064 15503 && can_create_pseudo_p ()"
f0dd3deb 15504 "#"
15505 "&& 1"
15506 [(const_int 0)]
15507{
15508 int ecx = !find_regno_note (curr_insn, REG_UNUSED, REGNO (operands[0]));
15509 int xmm0 = !find_regno_note (curr_insn, REG_UNUSED, REGNO (operands[1]));
15510 int flags = !find_regno_note (curr_insn, REG_UNUSED, FLAGS_REG);
15511
15512 if (ecx)
15513 emit_insn (gen_sse4_2_pcmpistri (operands[0], operands[2],
15514 operands[3], operands[4]));
15515 if (xmm0)
15516 emit_insn (gen_sse4_2_pcmpistrm (operands[1], operands[2],
15517 operands[3], operands[4]));
15518 if (flags && !(ecx || xmm0))
18f95a36 15519 emit_insn (gen_sse4_2_pcmpistr_cconly (NULL, NULL,
15520 operands[2], operands[3],
f0dd3deb 15521 operands[4]));
1c9cc6e6 15522 if (!(flags || ecx || xmm0))
15523 emit_note (NOTE_INSN_DELETED);
15524
f0dd3deb 15525 DONE;
15526}
15527 [(set_attr "type" "sselog")
15528 (set_attr "prefix_data16" "1")
15529 (set_attr "prefix_extra" "1")
8c1dfa94 15530 (set_attr "ssememalign" "8")
00a0e418 15531 (set_attr "length_immediate" "1")
f0dd3deb 15532 (set_attr "memory" "none,load")
15533 (set_attr "mode" "TI")])
15534
538c1aa0 15535(define_insn_and_split "*sse4_2_pcmpistr_unaligned"
15536 [(set (match_operand:SI 0 "register_operand" "=c")
15537 (unspec:SI
1a5eff3d 15538 [(match_operand:V16QI 2 "register_operand" "x")
538c1aa0 15539 (unspec:V16QI
15540 [(match_operand:V16QI 3 "memory_operand" "m")]
00820ea0 15541 UNSPEC_LOADU)
538c1aa0 15542 (match_operand:SI 4 "const_0_to_255_operand" "n")]
15543 UNSPEC_PCMPISTR))
15544 (set (match_operand:V16QI 1 "register_operand" "=Yz")
15545 (unspec:V16QI
15546 [(match_dup 2)
00820ea0 15547 (unspec:V16QI [(match_dup 3)] UNSPEC_LOADU)
538c1aa0 15548 (match_dup 4)]
15549 UNSPEC_PCMPISTR))
15550 (set (reg:CC FLAGS_REG)
15551 (unspec:CC
15552 [(match_dup 2)
00820ea0 15553 (unspec:V16QI [(match_dup 3)] UNSPEC_LOADU)
538c1aa0 15554 (match_dup 4)]
15555 UNSPEC_PCMPISTR))]
15556 "TARGET_SSE4_2
15557 && can_create_pseudo_p ()"
15558 "#"
15559 "&& 1"
15560 [(const_int 0)]
15561{
15562 int ecx = !find_regno_note (curr_insn, REG_UNUSED, REGNO (operands[0]));
15563 int xmm0 = !find_regno_note (curr_insn, REG_UNUSED, REGNO (operands[1]));
15564 int flags = !find_regno_note (curr_insn, REG_UNUSED, FLAGS_REG);
15565
15566 if (ecx)
15567 emit_insn (gen_sse4_2_pcmpistri (operands[0], operands[2],
15568 operands[3], operands[4]));
15569 if (xmm0)
15570 emit_insn (gen_sse4_2_pcmpistrm (operands[1], operands[2],
15571 operands[3], operands[4]));
15572 if (flags && !(ecx || xmm0))
15573 emit_insn (gen_sse4_2_pcmpistr_cconly (NULL, NULL,
15574 operands[2], operands[3],
15575 operands[4]));
15576 if (!(flags || ecx || xmm0))
15577 emit_note (NOTE_INSN_DELETED);
15578
15579 DONE;
15580}
15581 [(set_attr "type" "sselog")
15582 (set_attr "prefix_data16" "1")
15583 (set_attr "prefix_extra" "1")
8c1dfa94 15584 (set_attr "ssememalign" "8")
538c1aa0 15585 (set_attr "length_immediate" "1")
15586 (set_attr "memory" "load")
15587 (set_attr "mode" "TI")])
15588
f0dd3deb 15589(define_insn "sse4_2_pcmpistri"
15590 [(set (match_operand:SI 0 "register_operand" "=c,c")
15591 (unspec:SI
15592 [(match_operand:V16QI 1 "register_operand" "x,x")
15593 (match_operand:V16QI 2 "nonimmediate_operand" "x,m")
15594 (match_operand:SI 3 "const_0_to_255_operand" "n,n")]
15595 UNSPEC_PCMPISTR))
15596 (set (reg:CC FLAGS_REG)
15597 (unspec:CC
15598 [(match_dup 1)
15599 (match_dup 2)
15600 (match_dup 3)]
15601 UNSPEC_PCMPISTR))]
15602 "TARGET_SSE4_2"
ed30e0a6 15603 "%vpcmpistri\t{%3, %2, %1|%1, %2, %3}"
f0dd3deb 15604 [(set_attr "type" "sselog")
15605 (set_attr "prefix_data16" "1")
15606 (set_attr "prefix_extra" "1")
8c1dfa94 15607 (set_attr "ssememalign" "8")
00a0e418 15608 (set_attr "length_immediate" "1")
ed30e0a6 15609 (set_attr "prefix" "maybe_vex")
f0dd3deb 15610 (set_attr "memory" "none,load")
6470d004 15611 (set_attr "btver2_decode" "vector")
f0dd3deb 15612 (set_attr "mode" "TI")])
15613
15614(define_insn "sse4_2_pcmpistrm"
50c9119e 15615 [(set (match_operand:V16QI 0 "register_operand" "=Yz,Yz")
f0dd3deb 15616 (unspec:V16QI
15617 [(match_operand:V16QI 1 "register_operand" "x,x")
15618 (match_operand:V16QI 2 "nonimmediate_operand" "x,m")
15619 (match_operand:SI 3 "const_0_to_255_operand" "n,n")]
15620 UNSPEC_PCMPISTR))
15621 (set (reg:CC FLAGS_REG)
15622 (unspec:CC
15623 [(match_dup 1)
15624 (match_dup 2)
15625 (match_dup 3)]
15626 UNSPEC_PCMPISTR))]
15627 "TARGET_SSE4_2"
ed30e0a6 15628 "%vpcmpistrm\t{%3, %2, %1|%1, %2, %3}"
f0dd3deb 15629 [(set_attr "type" "sselog")
15630 (set_attr "prefix_data16" "1")
15631 (set_attr "prefix_extra" "1")
8c1dfa94 15632 (set_attr "ssememalign" "8")
00a0e418 15633 (set_attr "length_immediate" "1")
ed30e0a6 15634 (set_attr "prefix" "maybe_vex")
f0dd3deb 15635 (set_attr "memory" "none,load")
6470d004 15636 (set_attr "btver2_decode" "vector")
f0dd3deb 15637 (set_attr "mode" "TI")])
15638
15639(define_insn "sse4_2_pcmpistr_cconly"
15640 [(set (reg:CC FLAGS_REG)
15641 (unspec:CC
18f95a36 15642 [(match_operand:V16QI 2 "register_operand" "x,x,x,x")
15643 (match_operand:V16QI 3 "nonimmediate_operand" "x,m,x,m")
15644 (match_operand:SI 4 "const_0_to_255_operand" "n,n,n,n")]
f0dd3deb 15645 UNSPEC_PCMPISTR))
18f95a36 15646 (clobber (match_scratch:V16QI 0 "=Yz,Yz,X,X"))
15647 (clobber (match_scratch:SI 1 "= X, X,c,c"))]
f0dd3deb 15648 "TARGET_SSE4_2"
15649 "@
ed30e0a6 15650 %vpcmpistrm\t{%4, %3, %2|%2, %3, %4}
15651 %vpcmpistrm\t{%4, %3, %2|%2, %3, %4}
15652 %vpcmpistri\t{%4, %3, %2|%2, %3, %4}
15653 %vpcmpistri\t{%4, %3, %2|%2, %3, %4}"
f0dd3deb 15654 [(set_attr "type" "sselog")
15655 (set_attr "prefix_data16" "1")
15656 (set_attr "prefix_extra" "1")
8c1dfa94 15657 (set_attr "ssememalign" "8")
00a0e418 15658 (set_attr "length_immediate" "1")
f0dd3deb 15659 (set_attr "memory" "none,load,none,load")
ed30e0a6 15660 (set_attr "prefix" "maybe_vex")
6470d004 15661 (set_attr "btver2_decode" "vector,vector,vector,vector")
f0dd3deb 15662 (set_attr "mode" "TI")])
448e99f5 15663
0daf3bbe 15664;; Packed float variants
15665(define_mode_attr GATHER_SCATTER_SF_MEM_MODE
15666 [(V8DI "V8SF") (V16SI "V16SF")])
15667
15668(define_expand "avx512pf_gatherpf<mode>sf"
d2ff59d6 15669 [(unspec
15670 [(match_operand:<avx512fmaskmode> 0 "register_or_constm1_operand")
0daf3bbe 15671 (mem:<GATHER_SCATTER_SF_MEM_MODE>
d2ff59d6 15672 (match_par_dup 5
15673 [(match_operand 2 "vsib_address_operand")
15674 (match_operand:VI48_512 1 "register_operand")
15675 (match_operand:SI 3 "const1248_operand")]))
3befdeb0 15676 (match_operand:SI 4 "const_2_to_3_operand")]
d2ff59d6 15677 UNSPEC_GATHER_PREFETCH)]
15678 "TARGET_AVX512PF"
15679{
15680 operands[5]
15681 = gen_rtx_UNSPEC (Pmode, gen_rtvec (3, operands[2], operands[1],
15682 operands[3]), UNSPEC_VSIBADDR);
15683})
15684
0daf3bbe 15685(define_insn "*avx512pf_gatherpf<mode>sf_mask"
d2ff59d6 15686 [(unspec
a31e7f46 15687 [(match_operand:<avx512fmaskmode> 0 "register_operand" "Yk")
0daf3bbe 15688 (match_operator:<GATHER_SCATTER_SF_MEM_MODE> 5 "vsib_mem_operator"
d2ff59d6 15689 [(unspec:P
1e662e65 15690 [(match_operand:P 2 "vsib_address_operand" "Tv")
d2ff59d6 15691 (match_operand:VI48_512 1 "register_operand" "v")
15692 (match_operand:SI 3 "const1248_operand" "n")]
15693 UNSPEC_VSIBADDR)])
3befdeb0 15694 (match_operand:SI 4 "const_2_to_3_operand" "n")]
d2ff59d6 15695 UNSPEC_GATHER_PREFETCH)]
15696 "TARGET_AVX512PF"
15697{
15698 switch (INTVAL (operands[4]))
15699 {
3befdeb0 15700 case 3:
23afdab7 15701 return "vgatherpf0<ssemodesuffix>ps\t{%5%{%0%}|%5%{%0%}}";
15702 case 2:
d2ff59d6 15703 return "vgatherpf1<ssemodesuffix>ps\t{%5%{%0%}|%5%{%0%}}";
15704 default:
15705 gcc_unreachable ();
15706 }
15707}
15708 [(set_attr "type" "sse")
15709 (set_attr "prefix" "evex")
15710 (set_attr "mode" "XI")])
15711
0daf3bbe 15712(define_insn "*avx512pf_gatherpf<mode>sf"
d2ff59d6 15713 [(unspec
15714 [(const_int -1)
0daf3bbe 15715 (match_operator:<GATHER_SCATTER_SF_MEM_MODE> 4 "vsib_mem_operator"
d2ff59d6 15716 [(unspec:P
1e662e65 15717 [(match_operand:P 1 "vsib_address_operand" "Tv")
d2ff59d6 15718 (match_operand:VI48_512 0 "register_operand" "v")
15719 (match_operand:SI 2 "const1248_operand" "n")]
15720 UNSPEC_VSIBADDR)])
3befdeb0 15721 (match_operand:SI 3 "const_2_to_3_operand" "n")]
d2ff59d6 15722 UNSPEC_GATHER_PREFETCH)]
15723 "TARGET_AVX512PF"
15724{
15725 switch (INTVAL (operands[3]))
15726 {
3befdeb0 15727 case 3:
23afdab7 15728 return "vgatherpf0<ssemodesuffix>ps\t{%4|%4}";
15729 case 2:
d2ff59d6 15730 return "vgatherpf1<ssemodesuffix>ps\t{%4|%4}";
15731 default:
15732 gcc_unreachable ();
15733 }
15734}
15735 [(set_attr "type" "sse")
15736 (set_attr "prefix" "evex")
15737 (set_attr "mode" "XI")])
15738
0daf3bbe 15739;; Packed double variants
15740(define_expand "avx512pf_gatherpf<mode>df"
15741 [(unspec
15742 [(match_operand:<avx512fmaskmode> 0 "register_or_constm1_operand")
15743 (mem:V8DF
15744 (match_par_dup 5
15745 [(match_operand 2 "vsib_address_operand")
15746 (match_operand:VI4_256_8_512 1 "register_operand")
15747 (match_operand:SI 3 "const1248_operand")]))
3befdeb0 15748 (match_operand:SI 4 "const_2_to_3_operand")]
0daf3bbe 15749 UNSPEC_GATHER_PREFETCH)]
15750 "TARGET_AVX512PF"
15751{
15752 operands[5]
15753 = gen_rtx_UNSPEC (Pmode, gen_rtvec (3, operands[2], operands[1],
15754 operands[3]), UNSPEC_VSIBADDR);
15755})
15756
15757(define_insn "*avx512pf_gatherpf<mode>df_mask"
15758 [(unspec
a31e7f46 15759 [(match_operand:<avx512fmaskmode> 0 "register_operand" "Yk")
0daf3bbe 15760 (match_operator:V8DF 5 "vsib_mem_operator"
15761 [(unspec:P
15762 [(match_operand:P 2 "vsib_address_operand" "Tv")
15763 (match_operand:VI4_256_8_512 1 "register_operand" "v")
15764 (match_operand:SI 3 "const1248_operand" "n")]
15765 UNSPEC_VSIBADDR)])
3befdeb0 15766 (match_operand:SI 4 "const_2_to_3_operand" "n")]
0daf3bbe 15767 UNSPEC_GATHER_PREFETCH)]
15768 "TARGET_AVX512PF"
15769{
15770 switch (INTVAL (operands[4]))
15771 {
3befdeb0 15772 case 3:
23afdab7 15773 return "vgatherpf0<ssemodesuffix>pd\t{%5%{%0%}|%5%{%0%}}";
15774 case 2:
0daf3bbe 15775 return "vgatherpf1<ssemodesuffix>pd\t{%5%{%0%}|%5%{%0%}}";
15776 default:
15777 gcc_unreachable ();
15778 }
15779}
15780 [(set_attr "type" "sse")
15781 (set_attr "prefix" "evex")
15782 (set_attr "mode" "XI")])
15783
15784(define_insn "*avx512pf_gatherpf<mode>df"
15785 [(unspec
15786 [(const_int -1)
15787 (match_operator:V8DF 4 "vsib_mem_operator"
15788 [(unspec:P
15789 [(match_operand:P 1 "vsib_address_operand" "Tv")
15790 (match_operand:VI4_256_8_512 0 "register_operand" "v")
15791 (match_operand:SI 2 "const1248_operand" "n")]
15792 UNSPEC_VSIBADDR)])
3befdeb0 15793 (match_operand:SI 3 "const_2_to_3_operand" "n")]
0daf3bbe 15794 UNSPEC_GATHER_PREFETCH)]
15795 "TARGET_AVX512PF"
15796{
15797 switch (INTVAL (operands[3]))
15798 {
3befdeb0 15799 case 3:
23afdab7 15800 return "vgatherpf0<ssemodesuffix>pd\t{%4|%4}";
15801 case 2:
0daf3bbe 15802 return "vgatherpf1<ssemodesuffix>pd\t{%4|%4}";
15803 default:
15804 gcc_unreachable ();
15805 }
15806}
15807 [(set_attr "type" "sse")
15808 (set_attr "prefix" "evex")
15809 (set_attr "mode" "XI")])
15810
15811;; Packed float variants
15812(define_expand "avx512pf_scatterpf<mode>sf"
d2ff59d6 15813 [(unspec
15814 [(match_operand:<avx512fmaskmode> 0 "register_or_constm1_operand")
0daf3bbe 15815 (mem:<GATHER_SCATTER_SF_MEM_MODE>
d2ff59d6 15816 (match_par_dup 5
15817 [(match_operand 2 "vsib_address_operand")
15818 (match_operand:VI48_512 1 "register_operand")
15819 (match_operand:SI 3 "const1248_operand")]))
d418f1d9 15820 (match_operand:SI 4 "const2367_operand")]
d2ff59d6 15821 UNSPEC_SCATTER_PREFETCH)]
15822 "TARGET_AVX512PF"
15823{
15824 operands[5]
15825 = gen_rtx_UNSPEC (Pmode, gen_rtvec (3, operands[2], operands[1],
15826 operands[3]), UNSPEC_VSIBADDR);
15827})
15828
0daf3bbe 15829(define_insn "*avx512pf_scatterpf<mode>sf_mask"
d2ff59d6 15830 [(unspec
a31e7f46 15831 [(match_operand:<avx512fmaskmode> 0 "register_operand" "Yk")
0daf3bbe 15832 (match_operator:<GATHER_SCATTER_SF_MEM_MODE> 5 "vsib_mem_operator"
d2ff59d6 15833 [(unspec:P
1e662e65 15834 [(match_operand:P 2 "vsib_address_operand" "Tv")
d2ff59d6 15835 (match_operand:VI48_512 1 "register_operand" "v")
15836 (match_operand:SI 3 "const1248_operand" "n")]
15837 UNSPEC_VSIBADDR)])
d418f1d9 15838 (match_operand:SI 4 "const2367_operand" "n")]
d2ff59d6 15839 UNSPEC_SCATTER_PREFETCH)]
15840 "TARGET_AVX512PF"
15841{
15842 switch (INTVAL (operands[4]))
15843 {
3befdeb0 15844 case 3:
d418f1d9 15845 case 7:
23afdab7 15846 return "vscatterpf0<ssemodesuffix>ps\t{%5%{%0%}|%5%{%0%}}";
15847 case 2:
15848 case 6:
d2ff59d6 15849 return "vscatterpf1<ssemodesuffix>ps\t{%5%{%0%}|%5%{%0%}}";
15850 default:
15851 gcc_unreachable ();
15852 }
15853}
15854 [(set_attr "type" "sse")
15855 (set_attr "prefix" "evex")
15856 (set_attr "mode" "XI")])
15857
0daf3bbe 15858(define_insn "*avx512pf_scatterpf<mode>sf"
d2ff59d6 15859 [(unspec
15860 [(const_int -1)
0daf3bbe 15861 (match_operator:<GATHER_SCATTER_SF_MEM_MODE> 4 "vsib_mem_operator"
d2ff59d6 15862 [(unspec:P
1e662e65 15863 [(match_operand:P 1 "vsib_address_operand" "Tv")
d2ff59d6 15864 (match_operand:VI48_512 0 "register_operand" "v")
15865 (match_operand:SI 2 "const1248_operand" "n")]
15866 UNSPEC_VSIBADDR)])
d418f1d9 15867 (match_operand:SI 3 "const2367_operand" "n")]
d2ff59d6 15868 UNSPEC_SCATTER_PREFETCH)]
15869 "TARGET_AVX512PF"
15870{
15871 switch (INTVAL (operands[3]))
15872 {
3befdeb0 15873 case 3:
d418f1d9 15874 case 7:
23afdab7 15875 return "vscatterpf0<ssemodesuffix>ps\t{%4|%4}";
15876 case 2:
15877 case 6:
d2ff59d6 15878 return "vscatterpf1<ssemodesuffix>ps\t{%4|%4}";
15879 default:
15880 gcc_unreachable ();
15881 }
15882}
15883 [(set_attr "type" "sse")
15884 (set_attr "prefix" "evex")
15885 (set_attr "mode" "XI")])
15886
0daf3bbe 15887;; Packed double variants
15888(define_expand "avx512pf_scatterpf<mode>df"
15889 [(unspec
15890 [(match_operand:<avx512fmaskmode> 0 "register_or_constm1_operand")
15891 (mem:V8DF
15892 (match_par_dup 5
15893 [(match_operand 2 "vsib_address_operand")
15894 (match_operand:VI4_256_8_512 1 "register_operand")
15895 (match_operand:SI 3 "const1248_operand")]))
d418f1d9 15896 (match_operand:SI 4 "const2367_operand")]
0daf3bbe 15897 UNSPEC_SCATTER_PREFETCH)]
15898 "TARGET_AVX512PF"
15899{
15900 operands[5]
15901 = gen_rtx_UNSPEC (Pmode, gen_rtvec (3, operands[2], operands[1],
15902 operands[3]), UNSPEC_VSIBADDR);
15903})
15904
15905(define_insn "*avx512pf_scatterpf<mode>df_mask"
15906 [(unspec
a31e7f46 15907 [(match_operand:<avx512fmaskmode> 0 "register_operand" "Yk")
0daf3bbe 15908 (match_operator:V8DF 5 "vsib_mem_operator"
15909 [(unspec:P
15910 [(match_operand:P 2 "vsib_address_operand" "Tv")
15911 (match_operand:VI4_256_8_512 1 "register_operand" "v")
15912 (match_operand:SI 3 "const1248_operand" "n")]
15913 UNSPEC_VSIBADDR)])
d418f1d9 15914 (match_operand:SI 4 "const2367_operand" "n")]
0daf3bbe 15915 UNSPEC_SCATTER_PREFETCH)]
15916 "TARGET_AVX512PF"
15917{
15918 switch (INTVAL (operands[4]))
15919 {
3befdeb0 15920 case 3:
d418f1d9 15921 case 7:
23afdab7 15922 return "vscatterpf0<ssemodesuffix>pd\t{%5%{%0%}|%5%{%0%}}";
15923 case 2:
15924 case 6:
0daf3bbe 15925 return "vscatterpf1<ssemodesuffix>pd\t{%5%{%0%}|%5%{%0%}}";
15926 default:
15927 gcc_unreachable ();
15928 }
15929}
15930 [(set_attr "type" "sse")
15931 (set_attr "prefix" "evex")
15932 (set_attr "mode" "XI")])
15933
15934(define_insn "*avx512pf_scatterpf<mode>df"
15935 [(unspec
15936 [(const_int -1)
15937 (match_operator:V8DF 4 "vsib_mem_operator"
15938 [(unspec:P
15939 [(match_operand:P 1 "vsib_address_operand" "Tv")
15940 (match_operand:VI4_256_8_512 0 "register_operand" "v")
15941 (match_operand:SI 2 "const1248_operand" "n")]
15942 UNSPEC_VSIBADDR)])
d418f1d9 15943 (match_operand:SI 3 "const2367_operand" "n")]
0daf3bbe 15944 UNSPEC_SCATTER_PREFETCH)]
15945 "TARGET_AVX512PF"
15946{
15947 switch (INTVAL (operands[3]))
15948 {
3befdeb0 15949 case 3:
d418f1d9 15950 case 7:
23afdab7 15951 return "vscatterpf0<ssemodesuffix>pd\t{%4|%4}";
15952 case 2:
15953 case 6:
0daf3bbe 15954 return "vscatterpf1<ssemodesuffix>pd\t{%4|%4}";
15955 default:
15956 gcc_unreachable ();
15957 }
15958}
15959 [(set_attr "type" "sse")
15960 (set_attr "prefix" "evex")
15961 (set_attr "mode" "XI")])
15962
fbf4df62 15963(define_insn "avx512er_exp2<mode><mask_name><round_saeonly_name>"
85065932 15964 [(set (match_operand:VF_512 0 "register_operand" "=v")
15965 (unspec:VF_512
fbf4df62 15966 [(match_operand:VF_512 1 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")]
85065932 15967 UNSPEC_EXP2))]
15968 "TARGET_AVX512ER"
fbf4df62 15969 "vexp2<ssemodesuffix>\t{<round_saeonly_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_saeonly_mask_op2>}"
85065932 15970 [(set_attr "prefix" "evex")
c4f782fd 15971 (set_attr "type" "sse")
85065932 15972 (set_attr "mode" "<MODE>")])
15973
fbf4df62 15974(define_insn "<mask_codefor>avx512er_rcp28<mode><mask_name><round_saeonly_name>"
85065932 15975 [(set (match_operand:VF_512 0 "register_operand" "=v")
15976 (unspec:VF_512
fbf4df62 15977 [(match_operand:VF_512 1 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")]
85065932 15978 UNSPEC_RCP28))]
15979 "TARGET_AVX512ER"
fbf4df62 15980 "vrcp28<ssemodesuffix>\t{<round_saeonly_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_saeonly_mask_op2>}"
85065932 15981 [(set_attr "prefix" "evex")
c4f782fd 15982 (set_attr "type" "sse")
85065932 15983 (set_attr "mode" "<MODE>")])
15984
fbf4df62 15985(define_insn "avx512er_vmrcp28<mode><round_saeonly_name>"
15986 [(set (match_operand:VF_128 0 "register_operand" "=v")
15987 (vec_merge:VF_128
15988 (unspec:VF_128
15989 [(match_operand:VF_128 1 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")]
15990 UNSPEC_RCP28)
15991 (match_operand:VF_128 2 "register_operand" "v")
15992 (const_int 1)))]
15993 "TARGET_AVX512ER"
c4f782fd 15994 "vrcp28<ssescalarmodesuffix>\t{<round_saeonly_op3>%1, %2, %0|%0, %2, %1<round_saeonly_op3>}"
fbf4df62 15995 [(set_attr "length_immediate" "1")
15996 (set_attr "prefix" "evex")
c4f782fd 15997 (set_attr "type" "sse")
fbf4df62 15998 (set_attr "mode" "<MODE>")])
15999
16000(define_insn "<mask_codefor>avx512er_rsqrt28<mode><mask_name><round_saeonly_name>"
85065932 16001 [(set (match_operand:VF_512 0 "register_operand" "=v")
16002 (unspec:VF_512
fbf4df62 16003 [(match_operand:VF_512 1 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")]
85065932 16004 UNSPEC_RSQRT28))]
16005 "TARGET_AVX512ER"
fbf4df62 16006 "vrsqrt28<ssemodesuffix>\t{<round_saeonly_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_saeonly_mask_op2>}"
85065932 16007 [(set_attr "prefix" "evex")
c4f782fd 16008 (set_attr "type" "sse")
85065932 16009 (set_attr "mode" "<MODE>")])
16010
fbf4df62 16011(define_insn "avx512er_vmrsqrt28<mode><round_saeonly_name>"
16012 [(set (match_operand:VF_128 0 "register_operand" "=v")
16013 (vec_merge:VF_128
16014 (unspec:VF_128
16015 [(match_operand:VF_128 1 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")]
16016 UNSPEC_RSQRT28)
16017 (match_operand:VF_128 2 "register_operand" "v")
16018 (const_int 1)))]
16019 "TARGET_AVX512ER"
c4f782fd 16020 "vrsqrt28<ssescalarmodesuffix>\t{<round_saeonly_op3>%1, %2, %0|%0, %2, %1<round_saeonly_op3>}"
fbf4df62 16021 [(set_attr "length_immediate" "1")
c4f782fd 16022 (set_attr "type" "sse")
fbf4df62 16023 (set_attr "prefix" "evex")
16024 (set_attr "mode" "<MODE>")])
16025
18525343 16026;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
16027;;
16028;; XOP instructions
16029;;
16030;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
16031
fd65bafc 16032(define_code_iterator xop_plus [plus ss_plus])
16033
16034(define_code_attr macs [(plus "macs") (ss_plus "macss")])
16035(define_code_attr madcs [(plus "madcs") (ss_plus "madcss")])
16036
18525343 16037;; XOP parallel integer multiply/add instructions.
18525343 16038
fd65bafc 16039(define_insn "xop_p<macs><ssemodesuffix><ssemodesuffix>"
16040 [(set (match_operand:VI24_128 0 "register_operand" "=x")
16041 (xop_plus:VI24_128
16042 (mult:VI24_128
16043 (match_operand:VI24_128 1 "nonimmediate_operand" "%x")
16044 (match_operand:VI24_128 2 "nonimmediate_operand" "xm"))
5093b7f4 16045 (match_operand:VI24_128 3 "register_operand" "x")))]
e029cd62 16046 "TARGET_XOP"
fd65bafc 16047 "vp<macs><ssemodesuffix><ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
18525343 16048 [(set_attr "type" "ssemuladd")
16049 (set_attr "mode" "TI")])
16050
fd65bafc 16051(define_insn "xop_p<macs>dql"
e029cd62 16052 [(set (match_operand:V2DI 0 "register_operand" "=x")
fd65bafc 16053 (xop_plus:V2DI
18525343 16054 (mult:V2DI
16055 (sign_extend:V2DI
16056 (vec_select:V2SI
c971711a 16057 (match_operand:V4SI 1 "nonimmediate_operand" "%x")
72b48d28 16058 (parallel [(const_int 0) (const_int 2)])))
18525343 16059 (sign_extend:V2DI
16060 (vec_select:V2SI
fd65bafc 16061 (match_operand:V4SI 2 "nonimmediate_operand" "xm")
72b48d28 16062 (parallel [(const_int 0) (const_int 2)]))))
5093b7f4 16063 (match_operand:V2DI 3 "register_operand" "x")))]
e029cd62 16064 "TARGET_XOP"
fd65bafc 16065 "vp<macs>dql\t{%3, %2, %1, %0|%0, %1, %2, %3}"
18525343 16066 [(set_attr "type" "ssemuladd")
16067 (set_attr "mode" "TI")])
16068
fd65bafc 16069(define_insn "xop_p<macs>dqh"
e029cd62 16070 [(set (match_operand:V2DI 0 "register_operand" "=x")
fd65bafc 16071 (xop_plus:V2DI
18525343 16072 (mult:V2DI
16073 (sign_extend:V2DI
16074 (vec_select:V2SI
c971711a 16075 (match_operand:V4SI 1 "nonimmediate_operand" "%x")
72b48d28 16076 (parallel [(const_int 1) (const_int 3)])))
18525343 16077 (sign_extend:V2DI
16078 (vec_select:V2SI
e029cd62 16079 (match_operand:V4SI 2 "nonimmediate_operand" "xm")
72b48d28 16080 (parallel [(const_int 1) (const_int 3)]))))
5093b7f4 16081 (match_operand:V2DI 3 "register_operand" "x")))]
e029cd62 16082 "TARGET_XOP"
fd65bafc 16083 "vp<macs>dqh\t{%3, %2, %1, %0|%0, %1, %2, %3}"
18525343 16084 [(set_attr "type" "ssemuladd")
16085 (set_attr "mode" "TI")])
16086
18525343 16087;; XOP parallel integer multiply/add instructions for the intrinisics
fd65bafc 16088(define_insn "xop_p<macs>wd"
e029cd62 16089 [(set (match_operand:V4SI 0 "register_operand" "=x")
fd65bafc 16090 (xop_plus:V4SI
18525343 16091 (mult:V4SI
16092 (sign_extend:V4SI
16093 (vec_select:V4HI
c971711a 16094 (match_operand:V8HI 1 "nonimmediate_operand" "%x")
04d95c72 16095 (parallel [(const_int 1) (const_int 3)
16096 (const_int 5) (const_int 7)])))
18525343 16097 (sign_extend:V4SI
16098 (vec_select:V4HI
e029cd62 16099 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
04d95c72 16100 (parallel [(const_int 1) (const_int 3)
16101 (const_int 5) (const_int 7)]))))
5093b7f4 16102 (match_operand:V4SI 3 "register_operand" "x")))]
e029cd62 16103 "TARGET_XOP"
fd65bafc 16104 "vp<macs>wd\t{%3, %2, %1, %0|%0, %1, %2, %3}"
18525343 16105 [(set_attr "type" "ssemuladd")
16106 (set_attr "mode" "TI")])
16107
fd65bafc 16108(define_insn "xop_p<madcs>wd"
e029cd62 16109 [(set (match_operand:V4SI 0 "register_operand" "=x")
fd65bafc 16110 (xop_plus:V4SI
18525343 16111 (plus:V4SI
16112 (mult:V4SI
16113 (sign_extend:V4SI
16114 (vec_select:V4HI
c971711a 16115 (match_operand:V8HI 1 "nonimmediate_operand" "%x")
04d95c72 16116 (parallel [(const_int 0) (const_int 2)
16117 (const_int 4) (const_int 6)])))
18525343 16118 (sign_extend:V4SI
16119 (vec_select:V4HI
e029cd62 16120 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
04d95c72 16121 (parallel [(const_int 0) (const_int 2)
16122 (const_int 4) (const_int 6)]))))
18525343 16123 (mult:V4SI
16124 (sign_extend:V4SI
16125 (vec_select:V4HI
16126 (match_dup 1)
04d95c72 16127 (parallel [(const_int 1) (const_int 3)
16128 (const_int 5) (const_int 7)])))
18525343 16129 (sign_extend:V4SI
16130 (vec_select:V4HI
16131 (match_dup 2)
04d95c72 16132 (parallel [(const_int 1) (const_int 3)
16133 (const_int 5) (const_int 7)])))))
5093b7f4 16134 (match_operand:V4SI 3 "register_operand" "x")))]
e029cd62 16135 "TARGET_XOP"
fd65bafc 16136 "vp<madcs>wd\t{%3, %2, %1, %0|%0, %1, %2, %3}"
18525343 16137 [(set_attr "type" "ssemuladd")
16138 (set_attr "mode" "TI")])
16139
16140;; XOP parallel XMM conditional moves
6fe5844b 16141(define_insn "xop_pcmov_<mode><avxsizesuffix>"
16142 [(set (match_operand:V 0 "register_operand" "=x,x")
16143 (if_then_else:V
16144 (match_operand:V 3 "nonimmediate_operand" "x,m")
3efe840b 16145 (match_operand:V 1 "register_operand" "x,x")
16146 (match_operand:V 2 "nonimmediate_operand" "xm,x")))]
e029cd62 16147 "TARGET_XOP"
49fce50b 16148 "vpcmov\t{%3, %2, %1, %0|%0, %1, %2, %3}"
18525343 16149 [(set_attr "type" "sse4arg")])
16150
16151;; XOP horizontal add/subtract instructions
fd65bafc 16152(define_insn "xop_phadd<u>bw"
18525343 16153 [(set (match_operand:V8HI 0 "register_operand" "=x")
16154 (plus:V8HI
fd65bafc 16155 (any_extend:V8HI
18525343 16156 (vec_select:V8QI
16157 (match_operand:V16QI 1 "nonimmediate_operand" "xm")
04d95c72 16158 (parallel [(const_int 0) (const_int 2)
16159 (const_int 4) (const_int 6)
16160 (const_int 8) (const_int 10)
16161 (const_int 12) (const_int 14)])))
fd65bafc 16162 (any_extend:V8HI
18525343 16163 (vec_select:V8QI
16164 (match_dup 1)
04d95c72 16165 (parallel [(const_int 1) (const_int 3)
16166 (const_int 5) (const_int 7)
16167 (const_int 9) (const_int 11)
16168 (const_int 13) (const_int 15)])))))]
18525343 16169 "TARGET_XOP"
fd65bafc 16170 "vphadd<u>bw\t{%1, %0|%0, %1}"
18525343 16171 [(set_attr "type" "sseiadd1")])
16172
fd65bafc 16173(define_insn "xop_phadd<u>bd"
18525343 16174 [(set (match_operand:V4SI 0 "register_operand" "=x")
16175 (plus:V4SI
16176 (plus:V4SI
fd65bafc 16177 (any_extend:V4SI
18525343 16178 (vec_select:V4QI
16179 (match_operand:V16QI 1 "nonimmediate_operand" "xm")
04d95c72 16180 (parallel [(const_int 0) (const_int 4)
16181 (const_int 8) (const_int 12)])))
fd65bafc 16182 (any_extend:V4SI
18525343 16183 (vec_select:V4QI
16184 (match_dup 1)
04d95c72 16185 (parallel [(const_int 1) (const_int 5)
16186 (const_int 9) (const_int 13)]))))
18525343 16187 (plus:V4SI
fd65bafc 16188 (any_extend:V4SI
18525343 16189 (vec_select:V4QI
16190 (match_dup 1)
04d95c72 16191 (parallel [(const_int 2) (const_int 6)
16192 (const_int 10) (const_int 14)])))
fd65bafc 16193 (any_extend:V4SI
18525343 16194 (vec_select:V4QI
16195 (match_dup 1)
04d95c72 16196 (parallel [(const_int 3) (const_int 7)
16197 (const_int 11) (const_int 15)]))))))]
18525343 16198 "TARGET_XOP"
fd65bafc 16199 "vphadd<u>bd\t{%1, %0|%0, %1}"
18525343 16200 [(set_attr "type" "sseiadd1")])
16201
fd65bafc 16202(define_insn "xop_phadd<u>bq"
18525343 16203 [(set (match_operand:V2DI 0 "register_operand" "=x")
16204 (plus:V2DI
16205 (plus:V2DI
16206 (plus:V2DI
fd65bafc 16207 (any_extend:V2DI
18525343 16208 (vec_select:V2QI
16209 (match_operand:V16QI 1 "nonimmediate_operand" "xm")
5093b7f4 16210 (parallel [(const_int 0) (const_int 8)])))
fd65bafc 16211 (any_extend:V2DI
18525343 16212 (vec_select:V2QI
16213 (match_dup 1)
5093b7f4 16214 (parallel [(const_int 1) (const_int 9)]))))
18525343 16215 (plus:V2DI
fd65bafc 16216 (any_extend:V2DI
18525343 16217 (vec_select:V2QI
16218 (match_dup 1)
5093b7f4 16219 (parallel [(const_int 2) (const_int 10)])))
fd65bafc 16220 (any_extend:V2DI
18525343 16221 (vec_select:V2QI
16222 (match_dup 1)
5093b7f4 16223 (parallel [(const_int 3) (const_int 11)])))))
18525343 16224 (plus:V2DI
16225 (plus:V2DI
fd65bafc 16226 (any_extend:V2DI
18525343 16227 (vec_select:V2QI
16228 (match_dup 1)
5093b7f4 16229 (parallel [(const_int 4) (const_int 12)])))
fd65bafc 16230 (any_extend:V2DI
18525343 16231 (vec_select:V2QI
16232 (match_dup 1)
5093b7f4 16233 (parallel [(const_int 5) (const_int 13)]))))
18525343 16234 (plus:V2DI
fd65bafc 16235 (any_extend:V2DI
18525343 16236 (vec_select:V2QI
16237 (match_dup 1)
5093b7f4 16238 (parallel [(const_int 6) (const_int 14)])))
fd65bafc 16239 (any_extend:V2DI
18525343 16240 (vec_select:V2QI
16241 (match_dup 1)
5093b7f4 16242 (parallel [(const_int 7) (const_int 15)])))))))]
18525343 16243 "TARGET_XOP"
fd65bafc 16244 "vphadd<u>bq\t{%1, %0|%0, %1}"
18525343 16245 [(set_attr "type" "sseiadd1")])
16246
fd65bafc 16247(define_insn "xop_phadd<u>wd"
18525343 16248 [(set (match_operand:V4SI 0 "register_operand" "=x")
16249 (plus:V4SI
fd65bafc 16250 (any_extend:V4SI
18525343 16251 (vec_select:V4HI
16252 (match_operand:V8HI 1 "nonimmediate_operand" "xm")
04d95c72 16253 (parallel [(const_int 0) (const_int 2)
16254 (const_int 4) (const_int 6)])))
fd65bafc 16255 (any_extend:V4SI
18525343 16256 (vec_select:V4HI
16257 (match_dup 1)
04d95c72 16258 (parallel [(const_int 1) (const_int 3)
16259 (const_int 5) (const_int 7)])))))]
18525343 16260 "TARGET_XOP"
fd65bafc 16261 "vphadd<u>wd\t{%1, %0|%0, %1}"
18525343 16262 [(set_attr "type" "sseiadd1")])
16263
fd65bafc 16264(define_insn "xop_phadd<u>wq"
18525343 16265 [(set (match_operand:V2DI 0 "register_operand" "=x")
16266 (plus:V2DI
16267 (plus:V2DI
fd65bafc 16268 (any_extend:V2DI
18525343 16269 (vec_select:V2HI
16270 (match_operand:V8HI 1 "nonimmediate_operand" "xm")
04d95c72 16271 (parallel [(const_int 0) (const_int 4)])))
fd65bafc 16272 (any_extend:V2DI
18525343 16273 (vec_select:V2HI
16274 (match_dup 1)
04d95c72 16275 (parallel [(const_int 1) (const_int 5)]))))
18525343 16276 (plus:V2DI
fd65bafc 16277 (any_extend:V2DI
18525343 16278 (vec_select:V2HI
16279 (match_dup 1)
04d95c72 16280 (parallel [(const_int 2) (const_int 6)])))
fd65bafc 16281 (any_extend:V2DI
18525343 16282 (vec_select:V2HI
16283 (match_dup 1)
04d95c72 16284 (parallel [(const_int 3) (const_int 7)]))))))]
18525343 16285 "TARGET_XOP"
fd65bafc 16286 "vphadd<u>wq\t{%1, %0|%0, %1}"
18525343 16287 [(set_attr "type" "sseiadd1")])
16288
fd65bafc 16289(define_insn "xop_phadd<u>dq"
18525343 16290 [(set (match_operand:V2DI 0 "register_operand" "=x")
16291 (plus:V2DI
fd65bafc 16292 (any_extend:V2DI
18525343 16293 (vec_select:V2SI
16294 (match_operand:V4SI 1 "nonimmediate_operand" "xm")
04d95c72 16295 (parallel [(const_int 0) (const_int 2)])))
fd65bafc 16296 (any_extend:V2DI
18525343 16297 (vec_select:V2SI
16298 (match_dup 1)
04d95c72 16299 (parallel [(const_int 1) (const_int 3)])))))]
18525343 16300 "TARGET_XOP"
fd65bafc 16301 "vphadd<u>dq\t{%1, %0|%0, %1}"
18525343 16302 [(set_attr "type" "sseiadd1")])
16303
16304(define_insn "xop_phsubbw"
16305 [(set (match_operand:V8HI 0 "register_operand" "=x")
16306 (minus:V8HI
16307 (sign_extend:V8HI
16308 (vec_select:V8QI
16309 (match_operand:V16QI 1 "nonimmediate_operand" "xm")
04d95c72 16310 (parallel [(const_int 0) (const_int 2)
16311 (const_int 4) (const_int 6)
16312 (const_int 8) (const_int 10)
16313 (const_int 12) (const_int 14)])))
18525343 16314 (sign_extend:V8HI
16315 (vec_select:V8QI
16316 (match_dup 1)
04d95c72 16317 (parallel [(const_int 1) (const_int 3)
16318 (const_int 5) (const_int 7)
16319 (const_int 9) (const_int 11)
16320 (const_int 13) (const_int 15)])))))]
18525343 16321 "TARGET_XOP"
16322 "vphsubbw\t{%1, %0|%0, %1}"
16323 [(set_attr "type" "sseiadd1")])
16324
16325(define_insn "xop_phsubwd"
16326 [(set (match_operand:V4SI 0 "register_operand" "=x")
16327 (minus:V4SI
16328 (sign_extend:V4SI
16329 (vec_select:V4HI
16330 (match_operand:V8HI 1 "nonimmediate_operand" "xm")
04d95c72 16331 (parallel [(const_int 0) (const_int 2)
16332 (const_int 4) (const_int 6)])))
18525343 16333 (sign_extend:V4SI
16334 (vec_select:V4HI
16335 (match_dup 1)
04d95c72 16336 (parallel [(const_int 1) (const_int 3)
16337 (const_int 5) (const_int 7)])))))]
18525343 16338 "TARGET_XOP"
16339 "vphsubwd\t{%1, %0|%0, %1}"
16340 [(set_attr "type" "sseiadd1")])
16341
16342(define_insn "xop_phsubdq"
16343 [(set (match_operand:V2DI 0 "register_operand" "=x")
16344 (minus:V2DI
16345 (sign_extend:V2DI
16346 (vec_select:V2SI
16347 (match_operand:V4SI 1 "nonimmediate_operand" "xm")
04d95c72 16348 (parallel [(const_int 0) (const_int 2)])))
18525343 16349 (sign_extend:V2DI
16350 (vec_select:V2SI
16351 (match_dup 1)
04d95c72 16352 (parallel [(const_int 1) (const_int 3)])))))]
18525343 16353 "TARGET_XOP"
16354 "vphsubdq\t{%1, %0|%0, %1}"
16355 [(set_attr "type" "sseiadd1")])
16356
16357;; XOP permute instructions
16358(define_insn "xop_pperm"
e029cd62 16359 [(set (match_operand:V16QI 0 "register_operand" "=x,x")
18525343 16360 (unspec:V16QI
e029cd62 16361 [(match_operand:V16QI 1 "register_operand" "x,x")
16362 (match_operand:V16QI 2 "nonimmediate_operand" "x,m")
16363 (match_operand:V16QI 3 "nonimmediate_operand" "xm,x")]
18525343 16364 UNSPEC_XOP_PERMUTE))]
e029cd62 16365 "TARGET_XOP && !(MEM_P (operands[2]) && MEM_P (operands[3]))"
18525343 16366 "vpperm\t{%3, %2, %1, %0|%0, %1, %2, %3}"
16367 [(set_attr "type" "sse4arg")
16368 (set_attr "mode" "TI")])
16369
16370;; XOP pack instructions that combine two vectors into a smaller vector
16371(define_insn "xop_pperm_pack_v2di_v4si"
e029cd62 16372 [(set (match_operand:V4SI 0 "register_operand" "=x,x")
18525343 16373 (vec_concat:V4SI
16374 (truncate:V2SI
e029cd62 16375 (match_operand:V2DI 1 "register_operand" "x,x"))
18525343 16376 (truncate:V2SI
e029cd62 16377 (match_operand:V2DI 2 "nonimmediate_operand" "x,m"))))
16378 (use (match_operand:V16QI 3 "nonimmediate_operand" "xm,x"))]
16379 "TARGET_XOP && !(MEM_P (operands[2]) && MEM_P (operands[3]))"
18525343 16380 "vpperm\t{%3, %2, %1, %0|%0, %1, %2, %3}"
16381 [(set_attr "type" "sse4arg")
16382 (set_attr "mode" "TI")])
16383
16384(define_insn "xop_pperm_pack_v4si_v8hi"
e029cd62 16385 [(set (match_operand:V8HI 0 "register_operand" "=x,x")
18525343 16386 (vec_concat:V8HI
16387 (truncate:V4HI
e029cd62 16388 (match_operand:V4SI 1 "register_operand" "x,x"))
18525343 16389 (truncate:V4HI
e029cd62 16390 (match_operand:V4SI 2 "nonimmediate_operand" "x,m"))))
16391 (use (match_operand:V16QI 3 "nonimmediate_operand" "xm,x"))]
16392 "TARGET_XOP && !(MEM_P (operands[2]) && MEM_P (operands[3]))"
18525343 16393 "vpperm\t{%3, %2, %1, %0|%0, %1, %2, %3}"
16394 [(set_attr "type" "sse4arg")
16395 (set_attr "mode" "TI")])
16396
16397(define_insn "xop_pperm_pack_v8hi_v16qi"
e029cd62 16398 [(set (match_operand:V16QI 0 "register_operand" "=x,x")
18525343 16399 (vec_concat:V16QI
16400 (truncate:V8QI
e029cd62 16401 (match_operand:V8HI 1 "register_operand" "x,x"))
18525343 16402 (truncate:V8QI
e029cd62 16403 (match_operand:V8HI 2 "nonimmediate_operand" "x,m"))))
16404 (use (match_operand:V16QI 3 "nonimmediate_operand" "xm,x"))]
16405 "TARGET_XOP && !(MEM_P (operands[2]) && MEM_P (operands[3]))"
18525343 16406 "vpperm\t{%3, %2, %1, %0|%0, %1, %2, %3}"
16407 [(set_attr "type" "sse4arg")
16408 (set_attr "mode" "TI")])
16409
16410;; XOP packed rotate instructions
16411(define_expand "rotl<mode>3"
abd4f58b 16412 [(set (match_operand:VI_128 0 "register_operand")
6fe5844b 16413 (rotate:VI_128
abd4f58b 16414 (match_operand:VI_128 1 "nonimmediate_operand")
18525343 16415 (match_operand:SI 2 "general_operand")))]
16416 "TARGET_XOP"
16417{
16418 /* If we were given a scalar, convert it to parallel */
16419 if (! const_0_to_<sserotatemax>_operand (operands[2], SImode))
16420 {
16421 rtvec vs = rtvec_alloc (<ssescalarnum>);
16422 rtx par = gen_rtx_PARALLEL (<MODE>mode, vs);
16423 rtx reg = gen_reg_rtx (<MODE>mode);
16424 rtx op2 = operands[2];
16425 int i;
16426
16427 if (GET_MODE (op2) != <ssescalarmode>mode)
5deb404d 16428 {
18525343 16429 op2 = gen_reg_rtx (<ssescalarmode>mode);
16430 convert_move (op2, operands[2], false);
16431 }
16432
16433 for (i = 0; i < <ssescalarnum>; i++)
16434 RTVEC_ELT (vs, i) = op2;
16435
16436 emit_insn (gen_vec_init<mode> (reg, par));
16437 emit_insn (gen_xop_vrotl<mode>3 (operands[0], operands[1], reg));
16438 DONE;
16439 }
16440})
16441
16442(define_expand "rotr<mode>3"
abd4f58b 16443 [(set (match_operand:VI_128 0 "register_operand")
6fe5844b 16444 (rotatert:VI_128
abd4f58b 16445 (match_operand:VI_128 1 "nonimmediate_operand")
18525343 16446 (match_operand:SI 2 "general_operand")))]
16447 "TARGET_XOP"
16448{
16449 /* If we were given a scalar, convert it to parallel */
16450 if (! const_0_to_<sserotatemax>_operand (operands[2], SImode))
16451 {
16452 rtvec vs = rtvec_alloc (<ssescalarnum>);
16453 rtx par = gen_rtx_PARALLEL (<MODE>mode, vs);
16454 rtx neg = gen_reg_rtx (<MODE>mode);
16455 rtx reg = gen_reg_rtx (<MODE>mode);
16456 rtx op2 = operands[2];
16457 int i;
16458
16459 if (GET_MODE (op2) != <ssescalarmode>mode)
5deb404d 16460 {
18525343 16461 op2 = gen_reg_rtx (<ssescalarmode>mode);
16462 convert_move (op2, operands[2], false);
16463 }
16464
16465 for (i = 0; i < <ssescalarnum>; i++)
16466 RTVEC_ELT (vs, i) = op2;
16467
16468 emit_insn (gen_vec_init<mode> (reg, par));
16469 emit_insn (gen_neg<mode>2 (neg, reg));
16470 emit_insn (gen_xop_vrotl<mode>3 (operands[0], operands[1], neg));
16471 DONE;
16472 }
16473})
16474
16475(define_insn "xop_rotl<mode>3"
6fe5844b 16476 [(set (match_operand:VI_128 0 "register_operand" "=x")
16477 (rotate:VI_128
16478 (match_operand:VI_128 1 "nonimmediate_operand" "xm")
18525343 16479 (match_operand:SI 2 "const_0_to_<sserotatemax>_operand" "n")))]
16480 "TARGET_XOP"
63d5e521 16481 "vprot<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
18525343 16482 [(set_attr "type" "sseishft")
16483 (set_attr "length_immediate" "1")
16484 (set_attr "mode" "TI")])
16485
16486(define_insn "xop_rotr<mode>3"
6fe5844b 16487 [(set (match_operand:VI_128 0 "register_operand" "=x")
16488 (rotatert:VI_128
16489 (match_operand:VI_128 1 "nonimmediate_operand" "xm")
18525343 16490 (match_operand:SI 2 "const_0_to_<sserotatemax>_operand" "n")))]
16491 "TARGET_XOP"
16492{
a31cce64 16493 operands[3]
16494 = GEN_INT (GET_MODE_BITSIZE (<ssescalarmode>mode) - INTVAL (operands[2]));
63d5e521 16495 return \"vprot<ssemodesuffix>\t{%3, %1, %0|%0, %1, %3}\";
18525343 16496}
16497 [(set_attr "type" "sseishft")
16498 (set_attr "length_immediate" "1")
16499 (set_attr "mode" "TI")])
16500
16501(define_expand "vrotr<mode>3"
abd4f58b 16502 [(match_operand:VI_128 0 "register_operand")
16503 (match_operand:VI_128 1 "register_operand")
16504 (match_operand:VI_128 2 "register_operand")]
18525343 16505 "TARGET_XOP"
16506{
16507 rtx reg = gen_reg_rtx (<MODE>mode);
16508 emit_insn (gen_neg<mode>2 (reg, operands[2]));
16509 emit_insn (gen_xop_vrotl<mode>3 (operands[0], operands[1], reg));
16510 DONE;
16511})
16512
16513(define_expand "vrotl<mode>3"
abd4f58b 16514 [(match_operand:VI_128 0 "register_operand")
16515 (match_operand:VI_128 1 "register_operand")
16516 (match_operand:VI_128 2 "register_operand")]
18525343 16517 "TARGET_XOP"
16518{
16519 emit_insn (gen_xop_vrotl<mode>3 (operands[0], operands[1], operands[2]));
16520 DONE;
16521})
16522
16523(define_insn "xop_vrotl<mode>3"
6fe5844b 16524 [(set (match_operand:VI_128 0 "register_operand" "=x,x")
16525 (if_then_else:VI_128
16526 (ge:VI_128
16527 (match_operand:VI_128 2 "nonimmediate_operand" "x,m")
18525343 16528 (const_int 0))
6fe5844b 16529 (rotate:VI_128
16530 (match_operand:VI_128 1 "nonimmediate_operand" "xm,x")
18525343 16531 (match_dup 2))
6fe5844b 16532 (rotatert:VI_128
18525343 16533 (match_dup 1)
6fe5844b 16534 (neg:VI_128 (match_dup 2)))))]
e029cd62 16535 "TARGET_XOP && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
63d5e521 16536 "vprot<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
18525343 16537 [(set_attr "type" "sseishft")
16538 (set_attr "prefix_data16" "0")
16539 (set_attr "prefix_extra" "2")
16540 (set_attr "mode" "TI")])
16541
16542;; XOP packed shift instructions.
18525343 16543(define_expand "vlshr<mode>3"
abd4f58b 16544 [(set (match_operand:VI12_128 0 "register_operand")
8e73e3ae 16545 (lshiftrt:VI12_128
abd4f58b 16546 (match_operand:VI12_128 1 "register_operand")
16547 (match_operand:VI12_128 2 "nonimmediate_operand")))]
18525343 16548 "TARGET_XOP"
16549{
16550 rtx neg = gen_reg_rtx (<MODE>mode);
16551 emit_insn (gen_neg<mode>2 (neg, operands[2]));
9f689820 16552 emit_insn (gen_xop_shl<mode>3 (operands[0], operands[1], neg));
18525343 16553 DONE;
16554})
16555
7d079352 16556(define_expand "vlshr<mode>3"
abd4f58b 16557 [(set (match_operand:VI48_128 0 "register_operand")
7d079352 16558 (lshiftrt:VI48_128
abd4f58b 16559 (match_operand:VI48_128 1 "register_operand")
16560 (match_operand:VI48_128 2 "nonimmediate_operand")))]
7d079352 16561 "TARGET_AVX2 || TARGET_XOP"
16562{
16563 if (!TARGET_AVX2)
16564 {
16565 rtx neg = gen_reg_rtx (<MODE>mode);
16566 emit_insn (gen_neg<mode>2 (neg, operands[2]));
9f689820 16567 emit_insn (gen_xop_shl<mode>3 (operands[0], operands[1], neg));
7d079352 16568 DONE;
16569 }
16570})
16571
d2ff59d6 16572(define_expand "vlshr<mode>3"
16573 [(set (match_operand:VI48_512 0 "register_operand")
16574 (lshiftrt:VI48_512
16575 (match_operand:VI48_512 1 "register_operand")
16576 (match_operand:VI48_512 2 "nonimmediate_operand")))]
16577 "TARGET_AVX512F")
16578
7d079352 16579(define_expand "vlshr<mode>3"
abd4f58b 16580 [(set (match_operand:VI48_256 0 "register_operand")
7d079352 16581 (lshiftrt:VI48_256
abd4f58b 16582 (match_operand:VI48_256 1 "register_operand")
16583 (match_operand:VI48_256 2 "nonimmediate_operand")))]
7d079352 16584 "TARGET_AVX2")
16585
05e7532b 16586(define_expand "vashrv8hi3<mask_name>"
16587 [(set (match_operand:V8HI 0 "register_operand")
16588 (ashiftrt:V8HI
16589 (match_operand:V8HI 1 "register_operand")
16590 (match_operand:V8HI 2 "nonimmediate_operand")))]
3e41ffb2 16591 "TARGET_XOP || (TARGET_AVX512BW && TARGET_AVX512VL)"
18525343 16592{
3e41ffb2 16593 if (TARGET_XOP)
16594 {
05e7532b 16595 rtx neg = gen_reg_rtx (V8HImode);
16596 emit_insn (gen_negv8hi2 (neg, operands[2]));
16597 emit_insn (gen_xop_shav8hi3 (operands[0], operands[1], neg));
3e41ffb2 16598 DONE;
16599 }
16600})
16601
05e7532b 16602(define_expand "vashrv16qi3"
16603 [(set (match_operand:V16QI 0 "register_operand")
16604 (ashiftrt:V16QI
16605 (match_operand:V16QI 1 "register_operand")
16606 (match_operand:V16QI 2 "nonimmediate_operand")))]
16607 "TARGET_XOP"
16608{
16609 rtx neg = gen_reg_rtx (V16QImode);
16610 emit_insn (gen_negv16qi2 (neg, operands[2]));
16611 emit_insn (gen_xop_shav16qi3 (operands[0], operands[1], neg));
16612 DONE;
16613})
16614
3e41ffb2 16615(define_expand "vashrv2di3<mask_name>"
16616 [(set (match_operand:V2DI 0 "register_operand")
16617 (ashiftrt:V2DI
16618 (match_operand:V2DI 1 "register_operand")
16619 (match_operand:V2DI 2 "nonimmediate_operand")))]
16620 "TARGET_XOP || TARGET_AVX512VL"
16621{
16622 if (TARGET_XOP)
16623 {
16624 rtx neg = gen_reg_rtx (V2DImode);
16625 emit_insn (gen_negv2di2 (neg, operands[2]));
16626 emit_insn (gen_xop_shav2di3 (operands[0], operands[1], neg));
16627 DONE;
16628 }
18525343 16629})
16630
7d079352 16631(define_expand "vashrv4si3"
abd4f58b 16632 [(set (match_operand:V4SI 0 "register_operand")
16633 (ashiftrt:V4SI (match_operand:V4SI 1 "register_operand")
16634 (match_operand:V4SI 2 "nonimmediate_operand")))]
7d079352 16635 "TARGET_AVX2 || TARGET_XOP"
16636{
16637 if (!TARGET_AVX2)
16638 {
16639 rtx neg = gen_reg_rtx (V4SImode);
16640 emit_insn (gen_negv4si2 (neg, operands[2]));
9f689820 16641 emit_insn (gen_xop_shav4si3 (operands[0], operands[1], neg));
7d079352 16642 DONE;
16643 }
16644})
16645
2344eae2 16646(define_expand "vashrv16si3"
16647 [(set (match_operand:V16SI 0 "register_operand")
16648 (ashiftrt:V16SI (match_operand:V16SI 1 "register_operand")
16649 (match_operand:V16SI 2 "nonimmediate_operand")))]
16650 "TARGET_AVX512F")
16651
7d079352 16652(define_expand "vashrv8si3"
abd4f58b 16653 [(set (match_operand:V8SI 0 "register_operand")
16654 (ashiftrt:V8SI (match_operand:V8SI 1 "register_operand")
16655 (match_operand:V8SI 2 "nonimmediate_operand")))]
7d079352 16656 "TARGET_AVX2")
16657
18525343 16658(define_expand "vashl<mode>3"
abd4f58b 16659 [(set (match_operand:VI12_128 0 "register_operand")
8e73e3ae 16660 (ashift:VI12_128
abd4f58b 16661 (match_operand:VI12_128 1 "register_operand")
16662 (match_operand:VI12_128 2 "nonimmediate_operand")))]
18525343 16663 "TARGET_XOP"
16664{
9f689820 16665 emit_insn (gen_xop_sha<mode>3 (operands[0], operands[1], operands[2]));
18525343 16666 DONE;
16667})
16668
7d079352 16669(define_expand "vashl<mode>3"
abd4f58b 16670 [(set (match_operand:VI48_128 0 "register_operand")
7d079352 16671 (ashift:VI48_128
abd4f58b 16672 (match_operand:VI48_128 1 "register_operand")
16673 (match_operand:VI48_128 2 "nonimmediate_operand")))]
7d079352 16674 "TARGET_AVX2 || TARGET_XOP"
16675{
16676 if (!TARGET_AVX2)
16677 {
16678 operands[2] = force_reg (<MODE>mode, operands[2]);
9f689820 16679 emit_insn (gen_xop_sha<mode>3 (operands[0], operands[1], operands[2]));
7d079352 16680 DONE;
16681 }
16682})
16683
d2ff59d6 16684(define_expand "vashl<mode>3"
16685 [(set (match_operand:VI48_512 0 "register_operand")
16686 (ashift:VI48_512
16687 (match_operand:VI48_512 1 "register_operand")
16688 (match_operand:VI48_512 2 "nonimmediate_operand")))]
16689 "TARGET_AVX512F")
16690
7d079352 16691(define_expand "vashl<mode>3"
abd4f58b 16692 [(set (match_operand:VI48_256 0 "register_operand")
7d079352 16693 (ashift:VI48_256
abd4f58b 16694 (match_operand:VI48_256 1 "register_operand")
16695 (match_operand:VI48_256 2 "nonimmediate_operand")))]
7d079352 16696 "TARGET_AVX2")
16697
9f689820 16698(define_insn "xop_sha<mode>3"
6fe5844b 16699 [(set (match_operand:VI_128 0 "register_operand" "=x,x")
16700 (if_then_else:VI_128
16701 (ge:VI_128
16702 (match_operand:VI_128 2 "nonimmediate_operand" "x,m")
18525343 16703 (const_int 0))
6fe5844b 16704 (ashift:VI_128
16705 (match_operand:VI_128 1 "nonimmediate_operand" "xm,x")
18525343 16706 (match_dup 2))
6fe5844b 16707 (ashiftrt:VI_128
18525343 16708 (match_dup 1)
6fe5844b 16709 (neg:VI_128 (match_dup 2)))))]
e029cd62 16710 "TARGET_XOP && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
63d5e521 16711 "vpsha<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
18525343 16712 [(set_attr "type" "sseishft")
16713 (set_attr "prefix_data16" "0")
16714 (set_attr "prefix_extra" "2")
16715 (set_attr "mode" "TI")])
16716
9f689820 16717(define_insn "xop_shl<mode>3"
6fe5844b 16718 [(set (match_operand:VI_128 0 "register_operand" "=x,x")
16719 (if_then_else:VI_128
16720 (ge:VI_128
16721 (match_operand:VI_128 2 "nonimmediate_operand" "x,m")
18525343 16722 (const_int 0))
6fe5844b 16723 (ashift:VI_128
16724 (match_operand:VI_128 1 "nonimmediate_operand" "xm,x")
18525343 16725 (match_dup 2))
6fe5844b 16726 (lshiftrt:VI_128
18525343 16727 (match_dup 1)
6fe5844b 16728 (neg:VI_128 (match_dup 2)))))]
e029cd62 16729 "TARGET_XOP && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
63d5e521 16730 "vpshl<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
18525343 16731 [(set_attr "type" "sseishft")
16732 (set_attr "prefix_data16" "0")
16733 (set_attr "prefix_extra" "2")
16734 (set_attr "mode" "TI")])
16735
1f983100 16736(define_expand "<shift_insn><mode>3"
201f262d 16737 [(set (match_operand:VI1_AVX512 0 "register_operand")
16738 (any_shift:VI1_AVX512
16739 (match_operand:VI1_AVX512 1 "register_operand")
abd4f58b 16740 (match_operand:SI 2 "nonmemory_operand")))]
1f983100 16741 "TARGET_SSE2"
18525343 16742{
1f983100 16743 if (TARGET_XOP && <MODE>mode == V16QImode)
16744 {
16745 bool negate = false;
16746 rtx (*gen) (rtx, rtx, rtx);
16747 rtx tmp, par;
16748 int i;
885c8b76 16749
1f983100 16750 if (<CODE> != ASHIFT)
16751 {
16752 if (CONST_INT_P (operands[2]))
16753 operands[2] = GEN_INT (-INTVAL (operands[2]));
16754 else
16755 negate = true;
16756 }
16757 par = gen_rtx_PARALLEL (V16QImode, rtvec_alloc (16));
16758 for (i = 0; i < 16; i++)
16759 XVECEXP (par, 0, i) = operands[2];
18525343 16760
1f983100 16761 tmp = gen_reg_rtx (V16QImode);
16762 emit_insn (gen_vec_initv16qi (tmp, par));
18525343 16763
1f983100 16764 if (negate)
16765 emit_insn (gen_negv16qi2 (tmp, tmp));
885c8b76 16766
1f983100 16767 gen = (<CODE> == LSHIFTRT ? gen_xop_shlv16qi3 : gen_xop_shav16qi3);
16768 emit_insn (gen (operands[0], operands[1], tmp));
16769 }
18525343 16770 else
1f983100 16771 ix86_expand_vecop_qihi (<CODE>, operands[0], operands[1], operands[2]);
18525343 16772 DONE;
16773})
16774
16775(define_expand "ashrv2di3"
abd4f58b 16776 [(set (match_operand:V2DI 0 "register_operand")
3297e0a4 16777 (ashiftrt:V2DI
abd4f58b 16778 (match_operand:V2DI 1 "register_operand")
16779 (match_operand:DI 2 "nonmemory_operand")))]
4f545baf 16780 "TARGET_XOP || TARGET_AVX512VL"
18525343 16781{
4f545baf 16782 if (!TARGET_AVX512VL)
16783 {
16784 rtx reg = gen_reg_rtx (V2DImode);
16785 rtx par;
16786 bool negate = false;
16787 int i;
18525343 16788
4f545baf 16789 if (CONST_INT_P (operands[2]))
16790 operands[2] = GEN_INT (-INTVAL (operands[2]));
16791 else
16792 negate = true;
885c8b76 16793
4f545baf 16794 par = gen_rtx_PARALLEL (V2DImode, rtvec_alloc (2));
16795 for (i = 0; i < 2; i++)
16796 XVECEXP (par, 0, i) = operands[2];
18525343 16797
4f545baf 16798 emit_insn (gen_vec_initv2di (reg, par));
885c8b76 16799
4f545baf 16800 if (negate)
16801 emit_insn (gen_negv2di2 (reg, reg));
885c8b76 16802
4f545baf 16803 emit_insn (gen_xop_shav2di3 (operands[0], operands[1], reg));
16804 DONE;
16805 }
18525343 16806})
16807
16808;; XOP FRCZ support
18525343 16809(define_insn "xop_frcz<mode>2"
a2f9d5b3 16810 [(set (match_operand:FMAMODE 0 "register_operand" "=x")
16811 (unspec:FMAMODE
16812 [(match_operand:FMAMODE 1 "nonimmediate_operand" "xm")]
18525343 16813 UNSPEC_FRCZ))]
16814 "TARGET_XOP"
0061967e 16815 "vfrcz<ssemodesuffix>\t{%1, %0|%0, %1}"
18525343 16816 [(set_attr "type" "ssecvt1")
16817 (set_attr "mode" "<MODE>")])
16818
a2f9d5b3 16819(define_expand "xop_vmfrcz<mode>2"
6fe5844b 16820 [(set (match_operand:VF_128 0 "register_operand")
16821 (vec_merge:VF_128
16822 (unspec:VF_128
16823 [(match_operand:VF_128 1 "nonimmediate_operand")]
18525343 16824 UNSPEC_FRCZ)
ea47f46c 16825 (match_dup 2)
18525343 16826 (const_int 1)))]
16827 "TARGET_XOP"
ea47f46c 16828 "operands[2] = CONST0_RTX (<MODE>mode);")
18525343 16829
96188d90 16830(define_insn "*xop_vmfrcz<mode>2"
6fe5844b 16831 [(set (match_operand:VF_128 0 "register_operand" "=x")
16832 (vec_merge:VF_128
16833 (unspec:VF_128
16834 [(match_operand:VF_128 1 "nonimmediate_operand" "xm")]
a2f9d5b3 16835 UNSPEC_FRCZ)
6fe5844b 16836 (match_operand:VF_128 2 "const0_operand")
a2f9d5b3 16837 (const_int 1)))]
18525343 16838 "TARGET_XOP"
c358a059 16839 "vfrcz<ssescalarmodesuffix>\t{%1, %0|%0, %<iptr>1}"
18525343 16840 [(set_attr "type" "ssecvt1")
16841 (set_attr "mode" "<MODE>")])
16842
16843(define_insn "xop_maskcmp<mode>3"
6fe5844b 16844 [(set (match_operand:VI_128 0 "register_operand" "=x")
16845 (match_operator:VI_128 1 "ix86_comparison_int_operator"
16846 [(match_operand:VI_128 2 "register_operand" "x")
16847 (match_operand:VI_128 3 "nonimmediate_operand" "xm")]))]
18525343 16848 "TARGET_XOP"
63d5e521 16849 "vpcom%Y1<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}"
18525343 16850 [(set_attr "type" "sse4arg")
16851 (set_attr "prefix_data16" "0")
16852 (set_attr "prefix_rep" "0")
16853 (set_attr "prefix_extra" "2")
16854 (set_attr "length_immediate" "1")
16855 (set_attr "mode" "TI")])
16856
16857(define_insn "xop_maskcmp_uns<mode>3"
6fe5844b 16858 [(set (match_operand:VI_128 0 "register_operand" "=x")
16859 (match_operator:VI_128 1 "ix86_comparison_uns_operator"
16860 [(match_operand:VI_128 2 "register_operand" "x")
16861 (match_operand:VI_128 3 "nonimmediate_operand" "xm")]))]
18525343 16862 "TARGET_XOP"
63d5e521 16863 "vpcom%Y1u<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}"
18525343 16864 [(set_attr "type" "ssecmp")
16865 (set_attr "prefix_data16" "0")
16866 (set_attr "prefix_rep" "0")
16867 (set_attr "prefix_extra" "2")
16868 (set_attr "length_immediate" "1")
16869 (set_attr "mode" "TI")])
16870
16871;; Version of pcom*u* that is called from the intrinsics that allows pcomequ*
16872;; and pcomneu* not to be converted to the signed ones in case somebody needs
16873;; the exact instruction generated for the intrinsic.
16874(define_insn "xop_maskcmp_uns2<mode>3"
6fe5844b 16875 [(set (match_operand:VI_128 0 "register_operand" "=x")
16876 (unspec:VI_128
16877 [(match_operator:VI_128 1 "ix86_comparison_uns_operator"
16878 [(match_operand:VI_128 2 "register_operand" "x")
16879 (match_operand:VI_128 3 "nonimmediate_operand" "xm")])]
18525343 16880 UNSPEC_XOP_UNSIGNED_CMP))]
16881 "TARGET_XOP"
63d5e521 16882 "vpcom%Y1u<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}"
18525343 16883 [(set_attr "type" "ssecmp")
16884 (set_attr "prefix_data16" "0")
16885 (set_attr "prefix_extra" "2")
16886 (set_attr "length_immediate" "1")
16887 (set_attr "mode" "TI")])
16888
16889;; Pcomtrue and pcomfalse support. These are useless instructions, but are
16890;; being added here to be complete.
16891(define_insn "xop_pcom_tf<mode>3"
6fe5844b 16892 [(set (match_operand:VI_128 0 "register_operand" "=x")
16893 (unspec:VI_128
16894 [(match_operand:VI_128 1 "register_operand" "x")
16895 (match_operand:VI_128 2 "nonimmediate_operand" "xm")
18525343 16896 (match_operand:SI 3 "const_int_operand" "n")]
16897 UNSPEC_XOP_TRUEFALSE))]
16898 "TARGET_XOP"
16899{
16900 return ((INTVAL (operands[3]) != 0)
63d5e521 16901 ? "vpcomtrue<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
16902 : "vpcomfalse<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}");
18525343 16903}
16904 [(set_attr "type" "ssecmp")
16905 (set_attr "prefix_data16" "0")
16906 (set_attr "prefix_extra" "2")
16907 (set_attr "length_immediate" "1")
16908 (set_attr "mode" "TI")])
16909
46fd9685 16910(define_insn "xop_vpermil2<mode>3"
6a3f5f59 16911 [(set (match_operand:VF_128_256 0 "register_operand" "=x")
16912 (unspec:VF_128_256
16913 [(match_operand:VF_128_256 1 "register_operand" "x")
16914 (match_operand:VF_128_256 2 "nonimmediate_operand" "%x")
63d5e521 16915 (match_operand:<sseintvecmode> 3 "nonimmediate_operand" "xm")
46fd9685 16916 (match_operand:SI 4 "const_0_to_3_operand" "n")]
16917 UNSPEC_VPERMIL2))]
16918 "TARGET_XOP"
0061967e 16919 "vpermil2<ssemodesuffix>\t{%4, %3, %2, %1, %0|%0, %1, %2, %3, %4}"
46fd9685 16920 [(set_attr "type" "sse4arg")
16921 (set_attr "length_immediate" "1")
16922 (set_attr "mode" "<MODE>")])
16923
18525343 16924;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
ed30e0a6 16925
1d9ef704 16926(define_insn "aesenc"
f6c74054 16927 [(set (match_operand:V2DI 0 "register_operand" "=x,x")
16928 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0,x")
16929 (match_operand:V2DI 2 "nonimmediate_operand" "xm,xm")]
1d9ef704 16930 UNSPEC_AESENC))]
16931 "TARGET_AES"
f6c74054 16932 "@
16933 aesenc\t{%2, %0|%0, %2}
16934 vaesenc\t{%2, %1, %0|%0, %1, %2}"
16935 [(set_attr "isa" "noavx,avx")
16936 (set_attr "type" "sselog1")
00a0e418 16937 (set_attr "prefix_extra" "1")
f6c74054 16938 (set_attr "prefix" "orig,vex")
6470d004 16939 (set_attr "btver2_decode" "double,double")
ed30e0a6 16940 (set_attr "mode" "TI")])
16941
1d9ef704 16942(define_insn "aesenclast"
f6c74054 16943 [(set (match_operand:V2DI 0 "register_operand" "=x,x")
16944 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0,x")
16945 (match_operand:V2DI 2 "nonimmediate_operand" "xm,xm")]
1d9ef704 16946 UNSPEC_AESENCLAST))]
16947 "TARGET_AES"
f6c74054 16948 "@
16949 aesenclast\t{%2, %0|%0, %2}
16950 vaesenclast\t{%2, %1, %0|%0, %1, %2}"
16951 [(set_attr "isa" "noavx,avx")
16952 (set_attr "type" "sselog1")
00a0e418 16953 (set_attr "prefix_extra" "1")
f6c74054 16954 (set_attr "prefix" "orig,vex")
6470d004 16955 (set_attr "btver2_decode" "double,double")
ed30e0a6 16956 (set_attr "mode" "TI")])
16957
1d9ef704 16958(define_insn "aesdec"
f6c74054 16959 [(set (match_operand:V2DI 0 "register_operand" "=x,x")
16960 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0,x")
16961 (match_operand:V2DI 2 "nonimmediate_operand" "xm,xm")]
1d9ef704 16962 UNSPEC_AESDEC))]
16963 "TARGET_AES"
f6c74054 16964 "@
16965 aesdec\t{%2, %0|%0, %2}
16966 vaesdec\t{%2, %1, %0|%0, %1, %2}"
16967 [(set_attr "isa" "noavx,avx")
16968 (set_attr "type" "sselog1")
00a0e418 16969 (set_attr "prefix_extra" "1")
f6c74054 16970 (set_attr "prefix" "orig,vex")
6470d004 16971 (set_attr "btver2_decode" "double,double")
ed30e0a6 16972 (set_attr "mode" "TI")])
16973
1d9ef704 16974(define_insn "aesdeclast"
f6c74054 16975 [(set (match_operand:V2DI 0 "register_operand" "=x,x")
16976 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0,x")
16977 (match_operand:V2DI 2 "nonimmediate_operand" "xm,xm")]
1d9ef704 16978 UNSPEC_AESDECLAST))]
16979 "TARGET_AES"
f6c74054 16980 "@
16981 aesdeclast\t{%2, %0|%0, %2}
16982 vaesdeclast\t{%2, %1, %0|%0, %1, %2}"
16983 [(set_attr "isa" "noavx,avx")
16984 (set_attr "type" "sselog1")
1d9ef704 16985 (set_attr "prefix_extra" "1")
f6c74054 16986 (set_attr "prefix" "orig,vex")
6470d004 16987 (set_attr "btver2_decode" "double,double")
1d9ef704 16988 (set_attr "mode" "TI")])
16989
16990(define_insn "aesimc"
16991 [(set (match_operand:V2DI 0 "register_operand" "=x")
16992 (unspec:V2DI [(match_operand:V2DI 1 "nonimmediate_operand" "xm")]
16993 UNSPEC_AESIMC))]
16994 "TARGET_AES"
ed30e0a6 16995 "%vaesimc\t{%1, %0|%0, %1}"
1d9ef704 16996 [(set_attr "type" "sselog1")
16997 (set_attr "prefix_extra" "1")
ed30e0a6 16998 (set_attr "prefix" "maybe_vex")
1d9ef704 16999 (set_attr "mode" "TI")])
17000
17001(define_insn "aeskeygenassist"
17002 [(set (match_operand:V2DI 0 "register_operand" "=x")
17003 (unspec:V2DI [(match_operand:V2DI 1 "nonimmediate_operand" "xm")
17004 (match_operand:SI 2 "const_0_to_255_operand" "n")]
17005 UNSPEC_AESKEYGENASSIST))]
17006 "TARGET_AES"
ed30e0a6 17007 "%vaeskeygenassist\t{%2, %1, %0|%0, %1, %2}"
1d9ef704 17008 [(set_attr "type" "sselog1")
17009 (set_attr "prefix_extra" "1")
00a0e418 17010 (set_attr "length_immediate" "1")
ed30e0a6 17011 (set_attr "prefix" "maybe_vex")
1d9ef704 17012 (set_attr "mode" "TI")])
17013
17014(define_insn "pclmulqdq"
f6c74054 17015 [(set (match_operand:V2DI 0 "register_operand" "=x,x")
17016 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0,x")
17017 (match_operand:V2DI 2 "nonimmediate_operand" "xm,xm")
17018 (match_operand:SI 3 "const_0_to_255_operand" "n,n")]
1d9ef704 17019 UNSPEC_PCLMUL))]
17020 "TARGET_PCLMUL"
f6c74054 17021 "@
17022 pclmulqdq\t{%3, %2, %0|%0, %2, %3}
17023 vpclmulqdq\t{%3, %2, %1, %0|%0, %1, %2, %3}"
17024 [(set_attr "isa" "noavx,avx")
17025 (set_attr "type" "sselog1")
1d9ef704 17026 (set_attr "prefix_extra" "1")
00a0e418 17027 (set_attr "length_immediate" "1")
f6c74054 17028 (set_attr "prefix" "orig,vex")
1d9ef704 17029 (set_attr "mode" "TI")])
ed30e0a6 17030
17031(define_expand "avx_vzeroall"
17032 [(match_par_dup 0 [(const_int 0)])]
17033 "TARGET_AVX"
17034{
17035 int nregs = TARGET_64BIT ? 16 : 8;
17036 int regno;
17037
17038 operands[0] = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (nregs + 1));
17039
17040 XVECEXP (operands[0], 0, 0)
17041 = gen_rtx_UNSPEC_VOLATILE (VOIDmode, gen_rtvec (1, const0_rtx),
17042 UNSPECV_VZEROALL);
17043
17044 for (regno = 0; regno < nregs; regno++)
17045 XVECEXP (operands[0], 0, regno + 1)
d1f9b275 17046 = gen_rtx_SET (gen_rtx_REG (V8SImode, SSE_REGNO (regno)),
ed30e0a6 17047 CONST0_RTX (V8SImode));
17048})
17049
17050(define_insn "*avx_vzeroall"
17051 [(match_parallel 0 "vzeroall_operation"
e2c0f47e 17052 [(unspec_volatile [(const_int 0)] UNSPECV_VZEROALL)])]
ed30e0a6 17053 "TARGET_AVX"
17054 "vzeroall"
17055 [(set_attr "type" "sse")
00a0e418 17056 (set_attr "modrm" "0")
ed30e0a6 17057 (set_attr "memory" "none")
17058 (set_attr "prefix" "vex")
6470d004 17059 (set_attr "btver2_decode" "vector")
ed30e0a6 17060 (set_attr "mode" "OI")])
17061
3970ad84 17062;; Clear the upper 128bits of AVX registers, equivalent to a NOP
17063;; if the upper 128bits are unused.
17064(define_insn "avx_vzeroupper"
64f28d78 17065 [(unspec_volatile [(const_int 0)] UNSPECV_VZEROUPPER)]
e2c0f47e 17066 "TARGET_AVX"
ed30e0a6 17067 "vzeroupper"
17068 [(set_attr "type" "sse")
00a0e418 17069 (set_attr "modrm" "0")
ed30e0a6 17070 (set_attr "memory" "none")
17071 (set_attr "prefix" "vex")
6470d004 17072 (set_attr "btver2_decode" "vector")
ed30e0a6 17073 (set_attr "mode" "OI")])
17074
5deb404d 17075(define_insn "avx2_pbroadcast<mode>"
17076 [(set (match_operand:VI 0 "register_operand" "=x")
17077 (vec_duplicate:VI
17078 (vec_select:<ssescalarmode>
03ae25dc 17079 (match_operand:<ssexmmmode> 1 "nonimmediate_operand" "xm")
5deb404d 17080 (parallel [(const_int 0)]))))]
17081 "TARGET_AVX2"
c358a059 17082 "vpbroadcast<ssemodesuffix>\t{%1, %0|%0, %<iptr>1}"
5deb404d 17083 [(set_attr "type" "ssemov")
17084 (set_attr "prefix_extra" "1")
17085 (set_attr "prefix" "vex")
17086 (set_attr "mode" "<sseinsnmode>")])
17087
541e350d 17088(define_insn "avx2_pbroadcast<mode>_1"
c358a059 17089 [(set (match_operand:VI_256 0 "register_operand" "=x,x")
541e350d 17090 (vec_duplicate:VI_256
17091 (vec_select:<ssescalarmode>
c358a059 17092 (match_operand:VI_256 1 "nonimmediate_operand" "m,x")
541e350d 17093 (parallel [(const_int 0)]))))]
17094 "TARGET_AVX2"
c358a059 17095 "@
17096 vpbroadcast<ssemodesuffix>\t{%1, %0|%0, %<iptr>1}
17097 vpbroadcast<ssemodesuffix>\t{%x1, %0|%0, %x1}"
541e350d 17098 [(set_attr "type" "ssemov")
17099 (set_attr "prefix_extra" "1")
17100 (set_attr "prefix" "vex")
17101 (set_attr "mode" "<sseinsnmode>")])
17102
fcb19554 17103(define_insn "<avx2_avx512>_permvar<mode><mask_name>"
bf24193f 17104 [(set (match_operand:VI48F_256_512 0 "register_operand" "=v")
17105 (unspec:VI48F_256_512
17106 [(match_operand:VI48F_256_512 1 "nonimmediate_operand" "vm")
17107 (match_operand:<sseintvecmode> 2 "register_operand" "v")]
8da8a06b 17108 UNSPEC_VPERMVAR))]
5220cab6 17109 "TARGET_AVX2 && <mask_mode512bit_condition>"
17110 "vperm<ssemodesuffix>\t{%1, %2, %0<mask_operand3>|%0<mask_operand3>, %2, %1}"
5deb404d 17111 [(set_attr "type" "sselog")
5220cab6 17112 (set_attr "prefix" "<mask_prefix2>")
bf24193f 17113 (set_attr "mode" "<sseinsnmode>")])
5deb404d 17114
afee0628 17115(define_insn "<avx512>_permvar<mode><mask_name>"
17116 [(set (match_operand:VI1_AVX512VL 0 "register_operand" "=v")
17117 (unspec:VI1_AVX512VL
17118 [(match_operand:VI1_AVX512VL 1 "nonimmediate_operand" "vm")
17119 (match_operand:<sseintvecmode> 2 "register_operand" "v")]
17120 UNSPEC_VPERMVAR))]
17121 "TARGET_AVX512VBMI && <mask_mode512bit_condition>"
17122 "vperm<ssemodesuffix>\t{%1, %2, %0<mask_operand3>|%0<mask_operand3>, %2, %1}"
17123 [(set_attr "type" "sselog")
17124 (set_attr "prefix" "<mask_prefix2>")
17125 (set_attr "mode" "<sseinsnmode>")])
17126
fcb19554 17127(define_insn "<avx512>_permvar<mode><mask_name>"
17128 [(set (match_operand:VI2_AVX512VL 0 "register_operand" "=v")
17129 (unspec:VI2_AVX512VL
17130 [(match_operand:VI2_AVX512VL 1 "nonimmediate_operand" "vm")
17131 (match_operand:<sseintvecmode> 2 "register_operand" "v")]
17132 UNSPEC_VPERMVAR))]
17133 "TARGET_AVX512BW && <mask_mode512bit_condition>"
17134 "vperm<ssemodesuffix>\t{%1, %2, %0<mask_operand3>|%0<mask_operand3>, %2, %1}"
17135 [(set_attr "type" "sselog")
17136 (set_attr "prefix" "<mask_prefix2>")
17137 (set_attr "mode" "<sseinsnmode>")])
17138
262f9173 17139(define_expand "<avx2_avx512>_perm<mode>"
feadfe94 17140 [(match_operand:VI8F_256_512 0 "register_operand")
17141 (match_operand:VI8F_256_512 1 "nonimmediate_operand")
abd4f58b 17142 (match_operand:SI 2 "const_0_to_255_operand")]
a9e4de7b 17143 "TARGET_AVX2"
17144{
17145 int mask = INTVAL (operands[2]);
262f9173 17146 emit_insn (gen_<avx2_avx512>_perm<mode>_1 (operands[0], operands[1],
5220cab6 17147 GEN_INT ((mask >> 0) & 3),
17148 GEN_INT ((mask >> 2) & 3),
17149 GEN_INT ((mask >> 4) & 3),
17150 GEN_INT ((mask >> 6) & 3)));
17151 DONE;
17152})
17153
262f9173 17154(define_expand "<avx512>_perm<mode>_mask"
17155 [(match_operand:VI8F_256_512 0 "register_operand")
17156 (match_operand:VI8F_256_512 1 "nonimmediate_operand")
5220cab6 17157 (match_operand:SI 2 "const_0_to_255_operand")
262f9173 17158 (match_operand:VI8F_256_512 3 "vector_move_operand")
5220cab6 17159 (match_operand:<avx512fmaskmode> 4 "register_operand")]
17160 "TARGET_AVX512F"
17161{
17162 int mask = INTVAL (operands[2]);
262f9173 17163 emit_insn (gen_<avx2_avx512>_perm<mode>_1_mask (operands[0], operands[1],
5220cab6 17164 GEN_INT ((mask >> 0) & 3),
17165 GEN_INT ((mask >> 2) & 3),
17166 GEN_INT ((mask >> 4) & 3),
17167 GEN_INT ((mask >> 6) & 3),
17168 operands[3], operands[4]));
a9e4de7b 17169 DONE;
17170})
17171
262f9173 17172(define_insn "<avx2_avx512>_perm<mode>_1<mask_name>"
feadfe94 17173 [(set (match_operand:VI8F_256_512 0 "register_operand" "=v")
17174 (vec_select:VI8F_256_512
17175 (match_operand:VI8F_256_512 1 "nonimmediate_operand" "vm")
abd4f58b 17176 (parallel [(match_operand 2 "const_0_to_3_operand")
17177 (match_operand 3 "const_0_to_3_operand")
17178 (match_operand 4 "const_0_to_3_operand")
17179 (match_operand 5 "const_0_to_3_operand")])))]
5220cab6 17180 "TARGET_AVX2 && <mask_mode512bit_condition>"
a9e4de7b 17181{
17182 int mask = 0;
17183 mask |= INTVAL (operands[2]) << 0;
17184 mask |= INTVAL (operands[3]) << 2;
17185 mask |= INTVAL (operands[4]) << 4;
17186 mask |= INTVAL (operands[5]) << 6;
17187 operands[2] = GEN_INT (mask);
5220cab6 17188 return "vperm<ssemodesuffix>\t{%2, %1, %0<mask_operand6>|%0<mask_operand6>, %1, %2}";
a9e4de7b 17189}
5deb404d 17190 [(set_attr "type" "sselog")
5220cab6 17191 (set_attr "prefix" "<mask_prefix2>")
e0c9db9b 17192 (set_attr "mode" "<sseinsnmode>")])
5deb404d 17193
17194(define_insn "avx2_permv2ti"
17195 [(set (match_operand:V4DI 0 "register_operand" "=x")
17196 (unspec:V4DI
17197 [(match_operand:V4DI 1 "register_operand" "x")
a9e4de7b 17198 (match_operand:V4DI 2 "nonimmediate_operand" "xm")
5deb404d 17199 (match_operand:SI 3 "const_0_to_255_operand" "n")]
17200 UNSPEC_VPERMTI))]
17201 "TARGET_AVX2"
17202 "vperm2i128\t{%3, %2, %1, %0|%0, %1, %2, %3}"
17203 [(set_attr "type" "sselog")
17204 (set_attr "prefix" "vex")
17205 (set_attr "mode" "OI")])
17206
17207(define_insn "avx2_vec_dupv4df"
17208 [(set (match_operand:V4DF 0 "register_operand" "=x")
17209 (vec_duplicate:V4DF
17210 (vec_select:DF
17211 (match_operand:V2DF 1 "register_operand" "x")
17212 (parallel [(const_int 0)]))))]
17213 "TARGET_AVX2"
17214 "vbroadcastsd\t{%1, %0|%0, %1}"
17215 [(set_attr "type" "sselog1")
17216 (set_attr "prefix" "vex")
17217 (set_attr "mode" "V4DF")])
17218
05e7532b 17219(define_insn "<avx512>_vec_dup<mode>_1"
17220 [(set (match_operand:VI_AVX512BW 0 "register_operand" "=v,v")
17221 (vec_duplicate:VI_AVX512BW
17222 (vec_select:VI_AVX512BW
17223 (match_operand:VI_AVX512BW 1 "nonimmediate_operand" "v,m")
17224 (parallel [(const_int 0)]))))]
17225 "TARGET_AVX512F"
9751df37 17226 "@
17227 vpbroadcast<ssemodesuffix>\t{%x1, %0|%0, %x1}
17228 vpbroadcast<ssemodesuffix>\t{%x1, %0|%0, %<iptr>1}"
05e7532b 17229 [(set_attr "type" "ssemov")
17230 (set_attr "prefix" "evex")
17231 (set_attr "mode" "<sseinsnmode>")])
17232
f50aa6e9 17233(define_insn "<avx512>_vec_dup<mode><mask_name>"
17234 [(set (match_operand:V48_AVX512VL 0 "register_operand" "=v")
17235 (vec_duplicate:V48_AVX512VL
8e6b975f 17236 (vec_select:<ssescalarmode>
17237 (match_operand:<ssexmmmode> 1 "nonimmediate_operand" "vm")
17238 (parallel [(const_int 0)]))))]
17239 "TARGET_AVX512F"
5220cab6 17240 "v<sseintprefix>broadcast<bcstscalarsuff>\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
8e6b975f 17241 [(set_attr "type" "ssemov")
17242 (set_attr "prefix" "evex")
17243 (set_attr "mode" "<sseinsnmode>")])
17244
f50aa6e9 17245(define_insn "<avx512>_vec_dup<mode><mask_name>"
17246 [(set (match_operand:VI12_AVX512VL 0 "register_operand" "=v")
17247 (vec_duplicate:VI12_AVX512VL
17248 (vec_select:<ssescalarmode>
17249 (match_operand:<ssexmmmode> 1 "nonimmediate_operand" "vm")
17250 (parallel [(const_int 0)]))))]
17251 "TARGET_AVX512BW"
17252 "vpbroadcast<bcstscalarsuff>\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
17253 [(set_attr "type" "ssemov")
17254 (set_attr "prefix" "evex")
17255 (set_attr "mode" "<sseinsnmode>")])
17256
5220cab6 17257(define_insn "<mask_codefor>avx512f_broadcast<mode><mask_name>"
8e9989b0 17258 [(set (match_operand:V16FI 0 "register_operand" "=v,v")
17259 (vec_duplicate:V16FI
17260 (match_operand:<ssexmmmode> 1 "nonimmediate_operand" "v,m")))]
17261 "TARGET_AVX512F"
17262 "@
5220cab6 17263 vshuf<shuffletype>32x4\t{$0x0, %g1, %g1, %0<mask_operand2>|%0<mask_operand2>, %g1, %g1, 0x0}
17264 vbroadcast<shuffletype>32x4\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
8e9989b0 17265 [(set_attr "type" "ssemov")
17266 (set_attr "prefix" "evex")
17267 (set_attr "mode" "<sseinsnmode>")])
17268
5220cab6 17269(define_insn "<mask_codefor>avx512f_broadcast<mode><mask_name>"
8e9989b0 17270 [(set (match_operand:V8FI 0 "register_operand" "=v,v")
17271 (vec_duplicate:V8FI
17272 (match_operand:<ssehalfvecmode> 1 "nonimmediate_operand" "v,m")))]
17273 "TARGET_AVX512F"
17274 "@
5220cab6 17275 vshuf<shuffletype>64x2\t{$0x44, %g1, %g1, %0<mask_operand2>|%0<mask_operand2>, %g1, %g1, 0x44}
17276 vbroadcast<shuffletype>64x4\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
8e9989b0 17277 [(set_attr "type" "ssemov")
17278 (set_attr "prefix" "evex")
17279 (set_attr "mode" "<sseinsnmode>")])
17280
f50aa6e9 17281(define_insn "<mask_codefor><avx512>_vec_dup_gpr<mode><mask_name>"
dc958a45 17282 [(set (match_operand:VI12_AVX512VL 0 "register_operand" "=v,v")
f50aa6e9 17283 (vec_duplicate:VI12_AVX512VL
dc958a45 17284 (match_operand:<ssescalarmode> 1 "nonimmediate_operand" "vm,r")))]
f50aa6e9 17285 "TARGET_AVX512BW"
dc958a45 17286 "@
17287 vpbroadcast<bcstscalarsuff>\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}
17288 vpbroadcast<bcstscalarsuff>\t{%k1, %0<mask_operand2>|%0<mask_operand2>, %k1}"
f50aa6e9 17289 [(set_attr "type" "ssemov")
17290 (set_attr "prefix" "evex")
17291 (set_attr "mode" "<sseinsnmode>")])
17292
17293(define_insn "<mask_codefor><avx512>_vec_dup_gpr<mode><mask_name>"
dc958a45 17294 [(set (match_operand:V48_AVX512VL 0 "register_operand" "=v,v")
f50aa6e9 17295 (vec_duplicate:V48_AVX512VL
dc958a45 17296 (match_operand:<ssescalarmode> 1 "nonimmediate_operand" "vm,r")))]
8e6b975f 17297 "TARGET_AVX512F"
5220cab6 17298 "v<sseintprefix>broadcast<bcstscalarsuff>\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
8e6b975f 17299 [(set_attr "type" "ssemov")
17300 (set_attr "prefix" "evex")
dc958a45 17301 (set_attr "mode" "<sseinsnmode>")
17302 (set (attr "enabled")
17303 (if_then_else (eq_attr "alternative" "1")
17304 (symbol_ref "GET_MODE_CLASS (<ssescalarmode>mode) == MODE_INT
17305 && (<ssescalarmode>mode != DImode || TARGET_64BIT)")
17306 (const_int 1)))])
8e6b975f 17307
dc958a45 17308(define_insn "vec_dupv4sf"
17309 [(set (match_operand:V4SF 0 "register_operand" "=x,x,x")
17310 (vec_duplicate:V4SF
17311 (match_operand:SF 1 "nonimmediate_operand" "x,m,0")))]
17312 "TARGET_SSE"
17313 "@
17314 vshufps\t{$0, %1, %1, %0|%0, %1, %1, 0}
17315 vbroadcastss\t{%1, %0|%0, %1}
17316 shufps\t{$0, %0, %0|%0, %0, 0}"
17317 [(set_attr "isa" "avx,avx,noavx")
17318 (set_attr "type" "sseshuf1,ssemov,sseshuf1")
17319 (set_attr "length_immediate" "1,0,1")
17320 (set_attr "prefix_extra" "0,1,*")
17321 (set_attr "prefix" "vex,vex,orig")
17322 (set_attr "mode" "V4SF")])
17323
17324(define_insn "*vec_dupv4si"
17325 [(set (match_operand:V4SI 0 "register_operand" "=x,x,x")
17326 (vec_duplicate:V4SI
17327 (match_operand:SI 1 "nonimmediate_operand" " x,m,0")))]
17328 "TARGET_SSE"
17329 "@
17330 %vpshufd\t{$0, %1, %0|%0, %1, 0}
17331 vbroadcastss\t{%1, %0|%0, %1}
17332 shufps\t{$0, %0, %0|%0, %0, 0}"
17333 [(set_attr "isa" "sse2,avx,noavx")
17334 (set_attr "type" "sselog1,ssemov,sselog1")
17335 (set_attr "length_immediate" "1,0,1")
17336 (set_attr "prefix_extra" "0,1,*")
17337 (set_attr "prefix" "maybe_vex,vex,orig")
17338 (set_attr "mode" "TI,V4SF,V4SF")])
17339
17340(define_insn "*vec_dupv2di"
17341 [(set (match_operand:V2DI 0 "register_operand" "=x,x,x,x")
17342 (vec_duplicate:V2DI
17343 (match_operand:DI 1 "nonimmediate_operand" " 0,x,m,0")))]
17344 "TARGET_SSE"
17345 "@
17346 punpcklqdq\t%0, %0
17347 vpunpcklqdq\t{%d1, %0|%0, %d1}
17348 %vmovddup\t{%1, %0|%0, %1}
17349 movlhps\t%0, %0"
17350 [(set_attr "isa" "sse2_noavx,avx,sse3,noavx")
17351 (set_attr "type" "sselog1,sselog1,sselog1,ssemov")
17352 (set_attr "prefix" "orig,vex,maybe_vex,orig")
17353 (set_attr "mode" "TI,TI,DF,V4SF")])
f50aa6e9 17354
5deb404d 17355(define_insn "avx2_vbroadcasti128_<mode>"
17356 [(set (match_operand:VI_256 0 "register_operand" "=x")
17357 (vec_concat:VI_256
17358 (match_operand:<ssehalfvecmode> 1 "memory_operand" "m")
17359 (match_dup 1)))]
17360 "TARGET_AVX2"
17361 "vbroadcasti128\t{%1, %0|%0, %1}"
17362 [(set_attr "type" "ssemov")
17363 (set_attr "prefix_extra" "1")
17364 (set_attr "prefix" "vex")
17365 (set_attr "mode" "OI")])
17366
6e154e02 17367;; Modes handled by AVX vec_dup patterns.
17368(define_mode_iterator AVX_VEC_DUP_MODE
17369 [V8SI V8SF V4DI V4DF])
17370;; Modes handled by AVX2 vec_dup patterns.
17371(define_mode_iterator AVX2_VEC_DUP_MODE
17372 [V32QI V16QI V16HI V8HI V8SI V4SI])
17373
17374(define_insn "*vec_dup<mode>"
5d896697 17375 [(set (match_operand:AVX2_VEC_DUP_MODE 0 "register_operand" "=x,x,Yi")
6e154e02 17376 (vec_duplicate:AVX2_VEC_DUP_MODE
ed6272f7 17377 (match_operand:<ssescalarmode> 1 "nonimmediate_operand" "m,x,$r")))]
6e154e02 17378 "TARGET_AVX2"
17379 "@
17380 v<sseintprefix>broadcast<bcstscalarsuff>\t{%1, %0|%0, %1}
17381 v<sseintprefix>broadcast<bcstscalarsuff>\t{%x1, %0|%0, %x1}
17382 #"
17383 [(set_attr "type" "ssemov")
17384 (set_attr "prefix_extra" "1")
17385 (set_attr "prefix" "maybe_evex")
17386 (set_attr "mode" "<sseinsnmode>")])
17387
17388(define_insn "vec_dup<mode>"
f40f9ee2 17389 [(set (match_operand:AVX_VEC_DUP_MODE 0 "register_operand" "=x,x,x,v,x")
6e154e02 17390 (vec_duplicate:AVX_VEC_DUP_MODE
f40f9ee2 17391 (match_operand:<ssescalarmode> 1 "nonimmediate_operand" "m,m,x,v,?x")))]
6e154e02 17392 "TARGET_AVX"
17393 "@
17394 v<sseintprefix>broadcast<bcstscalarsuff>\t{%1, %0|%0, %1}
17395 vbroadcast<ssescalarmodesuffix>\t{%1, %0|%0, %1}
17396 v<sseintprefix>broadcast<bcstscalarsuff>\t{%x1, %0|%0, %x1}
f40f9ee2 17397 v<sseintprefix>broadcast<bcstscalarsuff>\t{%x1, %g0|%g0, %x1}
6e154e02 17398 #"
17399 [(set_attr "type" "ssemov")
17400 (set_attr "prefix_extra" "1")
17401 (set_attr "prefix" "maybe_evex")
f40f9ee2 17402 (set_attr "isa" "avx2,noavx2,avx2,avx512f,noavx2")
17403 (set_attr "mode" "<sseinsnmode>,V8SF,<sseinsnmode>,<sseinsnmode>,V8SF")])
6e154e02 17404
17405(define_split
17406 [(set (match_operand:AVX2_VEC_DUP_MODE 0 "register_operand")
17407 (vec_duplicate:AVX2_VEC_DUP_MODE
17408 (match_operand:<ssescalarmode> 1 "register_operand")))]
dc958a45 17409 "TARGET_AVX2
17410 /* Disable this splitter if avx512vl_vec_dup_gprv*[qhs]i insn is
17411 available, because then we can broadcast from GPRs directly.
17412 For V*[QH]I modes it requires both -mavx512vl and -mavx512bw,
17413 for V*SI mode it requires just -mavx512vl. */
17414 && !(TARGET_AVX512VL
17415 && (TARGET_AVX512BW || <ssescalarmode>mode == SImode))
17416 && reload_completed && GENERAL_REG_P (operands[1])"
6e154e02 17417 [(const_int 0)]
17418{
17419 emit_insn (gen_vec_setv4si_0 (gen_lowpart (V4SImode, operands[0]),
17420 CONST0_RTX (V4SImode),
17421 gen_lowpart (SImode, operands[1])));
17422 emit_insn (gen_avx2_pbroadcast<mode> (operands[0],
17423 gen_lowpart (<ssexmmmode>mode,
17424 operands[0])));
17425 DONE;
17426})
17427
b6fc7168 17428(define_split
abd4f58b 17429 [(set (match_operand:AVX_VEC_DUP_MODE 0 "register_operand")
a17124a0 17430 (vec_duplicate:AVX_VEC_DUP_MODE
abd4f58b 17431 (match_operand:<ssescalarmode> 1 "register_operand")))]
541e350d 17432 "TARGET_AVX && !TARGET_AVX2 && reload_completed"
a17124a0 17433 [(set (match_dup 2)
17434 (vec_duplicate:<ssehalfvecmode> (match_dup 1)))
17435 (set (match_dup 0)
17436 (vec_concat:AVX_VEC_DUP_MODE (match_dup 2) (match_dup 2)))]
63d5e521 17437 "operands[2] = gen_rtx_REG (<ssehalfvecmode>mode, REGNO (operands[0]));")
b6fc7168 17438
04e14b44 17439(define_insn "avx_vbroadcastf128_<mode>"
6fe5844b 17440 [(set (match_operand:V_256 0 "register_operand" "=x,x,x")
17441 (vec_concat:V_256
63d5e521 17442 (match_operand:<ssehalfvecmode> 1 "nonimmediate_operand" "m,0,?x")
04e14b44 17443 (match_dup 1)))]
17444 "TARGET_AVX"
17445 "@
154d1782 17446 vbroadcast<i128>\t{%1, %0|%0, %1}
17447 vinsert<i128>\t{$1, %1, %0, %0|%0, %0, %1, 1}
17448 vperm2<i128>\t{$0, %t1, %t1, %0|%0, %t1, %t1, 0}"
04e14b44 17449 [(set_attr "type" "ssemov,sselog1,sselog1")
17450 (set_attr "prefix_extra" "1")
17451 (set_attr "length_immediate" "0,1,1")
17452 (set_attr "prefix" "vex")
154d1782 17453 (set_attr "mode" "<sseinsnmode>")])
04e14b44 17454
0aac18e3 17455;; For broadcast[i|f]32x2. Yes there is no v4sf version, only v4si.
17456(define_mode_iterator VI4F_BRCST32x2
17457 [V16SI (V8SI "TARGET_AVX512VL") (V4SI "TARGET_AVX512VL")
17458 V16SF (V8SF "TARGET_AVX512VL")])
17459
17460(define_mode_attr 64x2mode
17461 [(V8DF "V2DF") (V8DI "V2DI") (V4DI "V2DI") (V4DF "V2DF")])
17462
17463(define_mode_attr 32x2mode
17464 [(V16SF "V2SF") (V16SI "V2SI") (V8SI "V2SI")
17465 (V8SF "V2SF") (V4SI "V2SI")])
17466
17467(define_insn "<mask_codefor>avx512dq_broadcast<mode><mask_name>"
17468 [(set (match_operand:VI4F_BRCST32x2 0 "register_operand" "=v")
17469 (vec_duplicate:VI4F_BRCST32x2
17470 (vec_select:<32x2mode>
17471 (match_operand:<ssexmmmode> 1 "nonimmediate_operand" "vm")
17472 (parallel [(const_int 0) (const_int 1)]))))]
17473 "TARGET_AVX512DQ"
17474 "vbroadcast<shuffletype>32x2\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
17475 [(set_attr "type" "ssemov")
17476 (set_attr "prefix_extra" "1")
17477 (set_attr "prefix" "evex")
17478 (set_attr "mode" "<sseinsnmode>")])
17479
17480(define_insn "<mask_codefor>avx512vl_broadcast<mode><mask_name>_1"
17481 [(set (match_operand:VI4F_256 0 "register_operand" "=v,v")
17482 (vec_duplicate:VI4F_256
17483 (match_operand:<ssexmmmode> 1 "nonimmediate_operand" "v,m")))]
17484 "TARGET_AVX512VL"
17485 "@
17486 vshuf<shuffletype>32x4\t{$0x0, %t1, %t1, %0<mask_operand2>|%0<mask_operand2>, %t1, %t1, 0x0}
17487 vbroadcast<shuffletype>32x4\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
17488 [(set_attr "type" "ssemov")
17489 (set_attr "prefix_extra" "1")
17490 (set_attr "prefix" "evex")
17491 (set_attr "mode" "<sseinsnmode>")])
17492
17493(define_insn "<mask_codefor>avx512dq_broadcast<mode><mask_name>_1"
17494 [(set (match_operand:V16FI 0 "register_operand" "=v,v")
17495 (vec_duplicate:V16FI
17496 (match_operand:<ssehalfvecmode> 1 "nonimmediate_operand" "v,m")))]
17497 "TARGET_AVX512DQ"
17498 "@
17499 vshuf<shuffletype>32x4\t{$0x44, %g1, %g1, %0<mask_operand2>|%0<mask_operand2>, %g1, %g1, 0x44}
17500 vbroadcast<shuffletype>32x8\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
17501 [(set_attr "type" "ssemov")
17502 (set_attr "prefix_extra" "1")
17503 (set_attr "prefix" "evex")
17504 (set_attr "mode" "<sseinsnmode>")])
17505
17506;; For broadcast[i|f]64x2
17507(define_mode_iterator VI8F_BRCST64x2
17508 [V8DI V8DF (V4DI "TARGET_AVX512VL") (V4DF "TARGET_AVX512VL")])
17509
17510(define_insn "<mask_codefor>avx512dq_broadcast<mode><mask_name>_1"
17511 [(set (match_operand:VI8F_BRCST64x2 0 "register_operand" "=v,v")
17512 (vec_duplicate:VI8F_BRCST64x2
17513 (match_operand:<64x2mode> 1 "nonimmediate_operand" "v,m")))]
17514 "TARGET_AVX512DQ"
17515 "@
b6840105 17516 vshuf<shuffletype>64x2\t{$0x0, %<concat_tg_mode>1, %<concat_tg_mode>1, %0<mask_operand2>|%0<mask_operand2>, %<concat_tg_mode>1, %<concat_tg_mode>1, 0x0}
0aac18e3 17517 vbroadcast<shuffletype>64x2\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
17518 [(set_attr "type" "ssemov")
17519 (set_attr "prefix_extra" "1")
17520 (set_attr "prefix" "evex")
17521 (set_attr "mode" "<sseinsnmode>")])
17522
1ffb4a9e 17523(define_insn "avx512cd_maskb_vec_dup<mode>"
17524 [(set (match_operand:VI8_AVX512VL 0 "register_operand" "=v")
17525 (vec_duplicate:VI8_AVX512VL
697a43f8 17526 (zero_extend:DI
a31e7f46 17527 (match_operand:QI 1 "register_operand" "Yk"))))]
697a43f8 17528 "TARGET_AVX512CD"
17529 "vpbroadcastmb2q\t{%1, %0|%0, %1}"
17530 [(set_attr "type" "mskmov")
17531 (set_attr "prefix" "evex")
17532 (set_attr "mode" "XI")])
17533
7da26bee 17534(define_insn "avx512cd_maskw_vec_dup<mode>"
17535 [(set (match_operand:VI4_AVX512VL 0 "register_operand" "=v")
17536 (vec_duplicate:VI4_AVX512VL
697a43f8 17537 (zero_extend:SI
a31e7f46 17538 (match_operand:HI 1 "register_operand" "Yk"))))]
697a43f8 17539 "TARGET_AVX512CD"
17540 "vpbroadcastmw2d\t{%1, %0|%0, %1}"
17541 [(set_attr "type" "mskmov")
17542 (set_attr "prefix" "evex")
17543 (set_attr "mode" "XI")])
17544
04e14b44 17545;; Recognize broadcast as a vec_select as produced by builtin_vec_perm.
17546;; If it so happens that the input is in memory, use vbroadcast.
17547;; Otherwise use vpermilp (and in the case of 256-bit modes, vperm2f128).
17548(define_insn "*avx_vperm_broadcast_v4sf"
17549 [(set (match_operand:V4SF 0 "register_operand" "=x,x,x")
17550 (vec_select:V4SF
17551 (match_operand:V4SF 1 "nonimmediate_operand" "m,o,x")
17552 (match_parallel 2 "avx_vbroadcast_operand"
17553 [(match_operand 3 "const_int_operand" "C,n,n")])))]
17554 "TARGET_AVX"
17555{
17556 int elt = INTVAL (operands[3]);
17557 switch (which_alternative)
17558 {
17559 case 0:
17560 case 1:
17561 operands[1] = adjust_address_nv (operands[1], SFmode, elt * 4);
c358a059 17562 return "vbroadcastss\t{%1, %0|%0, %k1}";
04e14b44 17563 case 2:
17564 operands[2] = GEN_INT (elt * 0x55);
17565 return "vpermilps\t{%2, %1, %0|%0, %1, %2}";
17566 default:
17567 gcc_unreachable ();
17568 }
17569}
17570 [(set_attr "type" "ssemov,ssemov,sselog1")
17571 (set_attr "prefix_extra" "1")
17572 (set_attr "length_immediate" "0,0,1")
17573 (set_attr "prefix" "vex")
17574 (set_attr "mode" "SF,SF,V4SF")])
17575
17576(define_insn_and_split "*avx_vperm_broadcast_<mode>"
6fe5844b 17577 [(set (match_operand:VF_256 0 "register_operand" "=x,x,x")
17578 (vec_select:VF_256
17579 (match_operand:VF_256 1 "nonimmediate_operand" "m,o,?x")
04e14b44 17580 (match_parallel 2 "avx_vbroadcast_operand"
17581 [(match_operand 3 "const_int_operand" "C,n,n")])))]
17582 "TARGET_AVX"
17583 "#"
541e350d 17584 "&& reload_completed && (<MODE>mode != V4DFmode || !TARGET_AVX2)"
6fe5844b 17585 [(set (match_dup 0) (vec_duplicate:VF_256 (match_dup 1)))]
04e14b44 17586{
17587 rtx op0 = operands[0], op1 = operands[1];
17588 int elt = INTVAL (operands[3]);
17589
17590 if (REG_P (op1))
17591 {
17592 int mask;
17593
541e350d 17594 if (TARGET_AVX2 && elt == 0)
17595 {
17596 emit_insn (gen_vec_dup<mode> (op0, gen_lowpart (<ssescalarmode>mode,
17597 op1)));
17598 DONE;
17599 }
17600
04e14b44 17601 /* Shuffle element we care about into all elements of the 128-bit lane.
17602 The other lane gets shuffled too, but we don't care. */
17603 if (<MODE>mode == V4DFmode)
17604 mask = (elt & 1 ? 15 : 0);
17605 else
17606 mask = (elt & 3) * 0x55;
17607 emit_insn (gen_avx_vpermil<mode> (op0, op1, GEN_INT (mask)));
17608
17609 /* Shuffle the lane we care about into both lanes of the dest. */
17610 mask = (elt / (<ssescalarnum> / 2)) * 0x11;
17611 emit_insn (gen_avx_vperm2f128<mode>3 (op0, op0, op0, GEN_INT (mask)));
17612 DONE;
17613 }
17614
823a2ddd 17615 operands[1] = adjust_address (op1, <ssescalarmode>mode,
17616 elt * GET_MODE_SIZE (<ssescalarmode>mode));
04e14b44 17617})
17618
5220cab6 17619(define_expand "<sse2_avx_avx512f>_vpermil<mode><mask_name>"
abd4f58b 17620 [(set (match_operand:VF2 0 "register_operand")
6fe5844b 17621 (vec_select:VF2
abd4f58b 17622 (match_operand:VF2 1 "nonimmediate_operand")
17623 (match_operand:SI 2 "const_0_to_255_operand")))]
5220cab6 17624 "TARGET_AVX && <mask_mode512bit_condition>"
af6d2927 17625{
17626 int mask = INTVAL (operands[2]);
17627 rtx perm[<ssescalarnum>];
17628
6615b722 17629 int i;
17630 for (i = 0; i < <ssescalarnum>; i = i + 2)
af6d2927 17631 {
6615b722 17632 perm[i] = GEN_INT (((mask >> i) & 1) + i);
17633 perm[i + 1] = GEN_INT (((mask >> (i + 1)) & 1) + i);
af6d2927 17634 }
17635
17636 operands[2]
17637 = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (<ssescalarnum>, perm));
17638})
17639
5220cab6 17640(define_expand "<sse2_avx_avx512f>_vpermil<mode><mask_name>"
abd4f58b 17641 [(set (match_operand:VF1 0 "register_operand")
6fe5844b 17642 (vec_select:VF1
abd4f58b 17643 (match_operand:VF1 1 "nonimmediate_operand")
17644 (match_operand:SI 2 "const_0_to_255_operand")))]
5220cab6 17645 "TARGET_AVX && <mask_mode512bit_condition>"
af6d2927 17646{
17647 int mask = INTVAL (operands[2]);
17648 rtx perm[<ssescalarnum>];
17649
03ae25dc 17650 int i;
17651 for (i = 0; i < <ssescalarnum>; i = i + 4)
af6d2927 17652 {
03ae25dc 17653 perm[i] = GEN_INT (((mask >> 0) & 3) + i);
17654 perm[i + 1] = GEN_INT (((mask >> 2) & 3) + i);
17655 perm[i + 2] = GEN_INT (((mask >> 4) & 3) + i);
17656 perm[i + 3] = GEN_INT (((mask >> 6) & 3) + i);
af6d2927 17657 }
17658
17659 operands[2]
17660 = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (<ssescalarnum>, perm));
17661})
17662
5220cab6 17663(define_insn "*<sse2_avx_avx512f>_vpermilp<mode><mask_name>"
e13e1b39 17664 [(set (match_operand:VF 0 "register_operand" "=v")
6fe5844b 17665 (vec_select:VF
e13e1b39 17666 (match_operand:VF 1 "nonimmediate_operand" "vm")
4581fd42 17667 (match_parallel 2 ""
abd4f58b 17668 [(match_operand 3 "const_int_operand")])))]
5220cab6 17669 "TARGET_AVX && <mask_mode512bit_condition>
4581fd42 17670 && avx_vpermilp_parallel (operands[2], <MODE>mode)"
af6d2927 17671{
17672 int mask = avx_vpermilp_parallel (operands[2], <MODE>mode) - 1;
17673 operands[2] = GEN_INT (mask);
5220cab6 17674 return "vpermil<ssemodesuffix>\t{%2, %1, %0<mask_operand4>|%0<mask_operand4>, %1, %2}";
af6d2927 17675}
ed30e0a6 17676 [(set_attr "type" "sselog")
00a0e418 17677 (set_attr "prefix_extra" "1")
17678 (set_attr "length_immediate" "1")
5220cab6 17679 (set_attr "prefix" "<mask_prefix>")
6a3f5f59 17680 (set_attr "mode" "<sseinsnmode>")])
ed30e0a6 17681
5220cab6 17682(define_insn "<sse2_avx_avx512f>_vpermilvar<mode>3<mask_name>"
e13e1b39 17683 [(set (match_operand:VF 0 "register_operand" "=v")
6fe5844b 17684 (unspec:VF
e13e1b39 17685 [(match_operand:VF 1 "register_operand" "v")
17686 (match_operand:<sseintvecmode> 2 "nonimmediate_operand" "vm")]
ed30e0a6 17687 UNSPEC_VPERMIL))]
5220cab6 17688 "TARGET_AVX && <mask_mode512bit_condition>"
17689 "vpermil<ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
ed30e0a6 17690 [(set_attr "type" "sselog")
00a0e418 17691 (set_attr "prefix_extra" "1")
6470d004 17692 (set_attr "btver2_decode" "vector")
5220cab6 17693 (set_attr "prefix" "<mask_prefix>")
6a3f5f59 17694 (set_attr "mode" "<sseinsnmode>")])
17695
6500d329 17696(define_expand "<avx512>_vpermi2var<mode>3_maskz"
17697 [(match_operand:VI48F 0 "register_operand" "=v")
17698 (match_operand:VI48F 1 "register_operand" "v")
9a5ea1d5 17699 (match_operand:<sseintvecmode> 2 "register_operand" "0")
6500d329 17700 (match_operand:VI48F 3 "nonimmediate_operand" "vm")
a31e7f46 17701 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")]
9a5ea1d5 17702 "TARGET_AVX512F"
17703{
6500d329 17704 emit_insn (gen_<avx512>_vpermi2var<mode>3_maskz_1 (
9a5ea1d5 17705 operands[0], operands[1], operands[2], operands[3],
17706 CONST0_RTX (<MODE>mode), operands[4]));
17707 DONE;
17708})
17709
afee0628 17710(define_expand "<avx512>_vpermi2var<mode>3_maskz"
17711 [(match_operand:VI1_AVX512VL 0 "register_operand")
17712 (match_operand:VI1_AVX512VL 1 "register_operand")
17713 (match_operand:<sseintvecmode> 2 "register_operand")
17714 (match_operand:VI1_AVX512VL 3 "nonimmediate_operand")
17715 (match_operand:<avx512fmaskmode> 4 "register_operand")]
17716 "TARGET_AVX512VBMI"
17717{
17718 emit_insn (gen_<avx512>_vpermi2var<mode>3_maskz_1 (
17719 operands[0], operands[1], operands[2], operands[3],
17720 CONST0_RTX (<MODE>mode), operands[4]));
17721 DONE;
17722})
17723
6500d329 17724(define_expand "<avx512>_vpermi2var<mode>3_maskz"
17725 [(match_operand:VI2_AVX512VL 0 "register_operand" "=v")
17726 (match_operand:VI2_AVX512VL 1 "register_operand" "v")
17727 (match_operand:<sseintvecmode> 2 "register_operand" "0")
17728 (match_operand:VI2_AVX512VL 3 "nonimmediate_operand" "vm")
17729 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")]
17730 "TARGET_AVX512BW"
17731{
17732 emit_insn (gen_<avx512>_vpermi2var<mode>3_maskz_1 (
17733 operands[0], operands[1], operands[2], operands[3],
17734 CONST0_RTX (<MODE>mode), operands[4]));
17735 DONE;
17736})
17737
17738(define_insn "<avx512>_vpermi2var<mode>3<sd_maskz_name>"
17739 [(set (match_operand:VI48F 0 "register_operand" "=v")
17740 (unspec:VI48F
17741 [(match_operand:VI48F 1 "register_operand" "v")
8e6b975f 17742 (match_operand:<sseintvecmode> 2 "register_operand" "0")
6500d329 17743 (match_operand:VI48F 3 "nonimmediate_operand" "vm")]
8e6b975f 17744 UNSPEC_VPERMI2))]
17745 "TARGET_AVX512F"
9a5ea1d5 17746 "vpermi2<ssemodesuffix>\t{%3, %1, %0<sd_mask_op4>|%0<sd_mask_op4>, %1, %3}"
8e6b975f 17747 [(set_attr "type" "sselog")
17748 (set_attr "prefix" "evex")
17749 (set_attr "mode" "<sseinsnmode>")])
17750
afee0628 17751(define_insn "<avx512>_vpermi2var<mode>3<sd_maskz_name>"
17752 [(set (match_operand:VI1_AVX512VL 0 "register_operand" "=v")
17753 (unspec:VI1_AVX512VL
17754 [(match_operand:VI1_AVX512VL 1 "register_operand" "v")
17755 (match_operand:<sseintvecmode> 2 "register_operand" "0")
17756 (match_operand:VI1_AVX512VL 3 "nonimmediate_operand" "vm")]
17757 UNSPEC_VPERMI2))]
17758 "TARGET_AVX512VBMI"
17759 "vpermi2<ssemodesuffix>\t{%3, %1, %0<sd_mask_op4>|%0<sd_mask_op4>, %1, %3}"
17760 [(set_attr "type" "sselog")
17761 (set_attr "prefix" "evex")
17762 (set_attr "mode" "<sseinsnmode>")])
17763
6500d329 17764(define_insn "<avx512>_vpermi2var<mode>3<sd_maskz_name>"
17765 [(set (match_operand:VI2_AVX512VL 0 "register_operand" "=v")
17766 (unspec:VI2_AVX512VL
17767 [(match_operand:VI2_AVX512VL 1 "register_operand" "v")
17768 (match_operand:<sseintvecmode> 2 "register_operand" "0")
17769 (match_operand:VI2_AVX512VL 3 "nonimmediate_operand" "vm")]
17770 UNSPEC_VPERMI2))]
17771 "TARGET_AVX512BW"
17772 "vpermi2<ssemodesuffix>\t{%3, %1, %0<sd_mask_op4>|%0<sd_mask_op4>, %1, %3}"
17773 [(set_attr "type" "sselog")
17774 (set_attr "prefix" "evex")
17775 (set_attr "mode" "<sseinsnmode>")])
17776
17777(define_insn "<avx512>_vpermi2var<mode>3_mask"
17778 [(set (match_operand:VI48F 0 "register_operand" "=v")
17779 (vec_merge:VI48F
17780 (unspec:VI48F
17781 [(match_operand:VI48F 1 "register_operand" "v")
5220cab6 17782 (match_operand:<sseintvecmode> 2 "register_operand" "0")
6500d329 17783 (match_operand:VI48F 3 "nonimmediate_operand" "vm")]
5220cab6 17784 UNSPEC_VPERMI2_MASK)
17785 (match_dup 0)
a31e7f46 17786 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")))]
5220cab6 17787 "TARGET_AVX512F"
17788 "vpermi2<ssemodesuffix>\t{%3, %1, %0%{%4%}|%0%{%4%}, %1, %3}"
17789 [(set_attr "type" "sselog")
17790 (set_attr "prefix" "evex")
17791 (set_attr "mode" "<sseinsnmode>")])
17792
afee0628 17793(define_insn "<avx512>_vpermi2var<mode>3_mask"
17794 [(set (match_operand:VI1_AVX512VL 0 "register_operand" "=v")
17795 (vec_merge:VI1_AVX512VL
17796 (unspec:VI1_AVX512VL
17797 [(match_operand:VI1_AVX512VL 1 "register_operand" "v")
17798 (match_operand:<sseintvecmode> 2 "register_operand" "0")
17799 (match_operand:VI1_AVX512VL 3 "nonimmediate_operand" "vm")]
17800 UNSPEC_VPERMI2_MASK)
17801 (match_dup 0)
17802 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")))]
17803 "TARGET_AVX512VBMI"
17804 "vpermi2<ssemodesuffix>\t{%3, %1, %0%{%4%}|%0%{%4%}, %1, %3}"
17805 [(set_attr "type" "sselog")
17806 (set_attr "prefix" "evex")
17807 (set_attr "mode" "<sseinsnmode>")])
17808
6500d329 17809(define_insn "<avx512>_vpermi2var<mode>3_mask"
17810 [(set (match_operand:VI2_AVX512VL 0 "register_operand" "=v")
17811 (vec_merge:VI2_AVX512VL
17812 (unspec:VI2_AVX512VL
17813 [(match_operand:VI2_AVX512VL 1 "register_operand" "v")
17814 (match_operand:<sseintvecmode> 2 "register_operand" "0")
17815 (match_operand:VI2_AVX512VL 3 "nonimmediate_operand" "vm")]
17816 UNSPEC_VPERMI2_MASK)
17817 (match_dup 0)
17818 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")))]
17819 "TARGET_AVX512BW"
17820 "vpermi2<ssemodesuffix>\t{%3, %1, %0%{%4%}|%0%{%4%}, %1, %3}"
17821 [(set_attr "type" "sselog")
17822 (set_attr "prefix" "evex")
17823 (set_attr "mode" "<sseinsnmode>")])
17824
17825(define_expand "<avx512>_vpermt2var<mode>3_maskz"
17826 [(match_operand:VI48F 0 "register_operand" "=v")
9a5ea1d5 17827 (match_operand:<sseintvecmode> 1 "register_operand" "v")
6500d329 17828 (match_operand:VI48F 2 "register_operand" "0")
17829 (match_operand:VI48F 3 "nonimmediate_operand" "vm")
a31e7f46 17830 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")]
9a5ea1d5 17831 "TARGET_AVX512F"
17832{
6500d329 17833 emit_insn (gen_<avx512>_vpermt2var<mode>3_maskz_1 (
9a5ea1d5 17834 operands[0], operands[1], operands[2], operands[3],
17835 CONST0_RTX (<MODE>mode), operands[4]));
17836 DONE;
17837})
17838
afee0628 17839(define_expand "<avx512>_vpermt2var<mode>3_maskz"
17840 [(match_operand:VI1_AVX512VL 0 "register_operand" "=v")
17841 (match_operand:<sseintvecmode> 1 "register_operand" "v")
17842 (match_operand:VI1_AVX512VL 2 "register_operand" "0")
17843 (match_operand:VI1_AVX512VL 3 "nonimmediate_operand" "vm")
17844 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")]
17845 "TARGET_AVX512VBMI"
17846{
17847 emit_insn (gen_<avx512>_vpermt2var<mode>3_maskz_1 (
17848 operands[0], operands[1], operands[2], operands[3],
17849 CONST0_RTX (<MODE>mode), operands[4]));
17850 DONE;
17851})
17852
6500d329 17853(define_expand "<avx512>_vpermt2var<mode>3_maskz"
17854 [(match_operand:VI2_AVX512VL 0 "register_operand" "=v")
17855 (match_operand:<sseintvecmode> 1 "register_operand" "v")
17856 (match_operand:VI2_AVX512VL 2 "register_operand" "0")
17857 (match_operand:VI2_AVX512VL 3 "nonimmediate_operand" "vm")
17858 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")]
17859 "TARGET_AVX512BW"
17860{
17861 emit_insn (gen_<avx512>_vpermt2var<mode>3_maskz_1 (
17862 operands[0], operands[1], operands[2], operands[3],
17863 CONST0_RTX (<MODE>mode), operands[4]));
17864 DONE;
17865})
17866
17867(define_insn "<avx512>_vpermt2var<mode>3<sd_maskz_name>"
17868 [(set (match_operand:VI48F 0 "register_operand" "=v")
17869 (unspec:VI48F
8e6b975f 17870 [(match_operand:<sseintvecmode> 1 "register_operand" "v")
6500d329 17871 (match_operand:VI48F 2 "register_operand" "0")
17872 (match_operand:VI48F 3 "nonimmediate_operand" "vm")]
8e6b975f 17873 UNSPEC_VPERMT2))]
17874 "TARGET_AVX512F"
9a5ea1d5 17875 "vpermt2<ssemodesuffix>\t{%3, %1, %0<sd_mask_op4>|%0<sd_mask_op4>, %1, %3}"
8e6b975f 17876 [(set_attr "type" "sselog")
17877 (set_attr "prefix" "evex")
17878 (set_attr "mode" "<sseinsnmode>")])
ed30e0a6 17879
afee0628 17880(define_insn "<avx512>_vpermt2var<mode>3<sd_maskz_name>"
17881 [(set (match_operand:VI1_AVX512VL 0 "register_operand" "=v")
17882 (unspec:VI1_AVX512VL
17883 [(match_operand:<sseintvecmode> 1 "register_operand" "v")
17884 (match_operand:VI1_AVX512VL 2 "register_operand" "0")
17885 (match_operand:VI1_AVX512VL 3 "nonimmediate_operand" "vm")]
17886 UNSPEC_VPERMT2))]
17887 "TARGET_AVX512VBMI"
17888 "vpermt2<ssemodesuffix>\t{%3, %1, %0<sd_mask_op4>|%0<sd_mask_op4>, %1, %3}"
17889 [(set_attr "type" "sselog")
17890 (set_attr "prefix" "evex")
17891 (set_attr "mode" "<sseinsnmode>")])
17892
6500d329 17893(define_insn "<avx512>_vpermt2var<mode>3<sd_maskz_name>"
17894 [(set (match_operand:VI2_AVX512VL 0 "register_operand" "=v")
17895 (unspec:VI2_AVX512VL
17896 [(match_operand:<sseintvecmode> 1 "register_operand" "v")
17897 (match_operand:VI2_AVX512VL 2 "register_operand" "0")
17898 (match_operand:VI2_AVX512VL 3 "nonimmediate_operand" "vm")]
17899 UNSPEC_VPERMT2))]
17900 "TARGET_AVX512BW"
17901 "vpermt2<ssemodesuffix>\t{%3, %1, %0<sd_mask_op4>|%0<sd_mask_op4>, %1, %3}"
17902 [(set_attr "type" "sselog")
17903 (set_attr "prefix" "evex")
17904 (set_attr "mode" "<sseinsnmode>")])
17905
17906(define_insn "<avx512>_vpermt2var<mode>3_mask"
17907 [(set (match_operand:VI48F 0 "register_operand" "=v")
17908 (vec_merge:VI48F
17909 (unspec:VI48F
5220cab6 17910 [(match_operand:<sseintvecmode> 1 "register_operand" "v")
6500d329 17911 (match_operand:VI48F 2 "register_operand" "0")
17912 (match_operand:VI48F 3 "nonimmediate_operand" "vm")]
5220cab6 17913 UNSPEC_VPERMT2)
17914 (match_dup 2)
a31e7f46 17915 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")))]
5220cab6 17916 "TARGET_AVX512F"
17917 "vpermt2<ssemodesuffix>\t{%3, %1, %0%{%4%}|%0%{%4%}, %1, %3}"
17918 [(set_attr "type" "sselog")
17919 (set_attr "prefix" "evex")
17920 (set_attr "mode" "<sseinsnmode>")])
17921
afee0628 17922(define_insn "<avx512>_vpermt2var<mode>3_mask"
17923 [(set (match_operand:VI1_AVX512VL 0 "register_operand" "=v")
17924 (vec_merge:VI1_AVX512VL
17925 (unspec:VI1_AVX512VL
17926 [(match_operand:<sseintvecmode> 1 "register_operand" "v")
17927 (match_operand:VI1_AVX512VL 2 "register_operand" "0")
17928 (match_operand:VI1_AVX512VL 3 "nonimmediate_operand" "vm")]
17929 UNSPEC_VPERMT2)
17930 (match_dup 2)
17931 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")))]
17932 "TARGET_AVX512VBMI"
17933 "vpermt2<ssemodesuffix>\t{%3, %1, %0%{%4%}|%0%{%4%}, %1, %3}"
17934 [(set_attr "type" "sselog")
17935 (set_attr "prefix" "evex")
17936 (set_attr "mode" "<sseinsnmode>")])
17937
6500d329 17938(define_insn "<avx512>_vpermt2var<mode>3_mask"
17939 [(set (match_operand:VI2_AVX512VL 0 "register_operand" "=v")
17940 (vec_merge:VI2_AVX512VL
17941 (unspec:VI2_AVX512VL
17942 [(match_operand:<sseintvecmode> 1 "register_operand" "v")
17943 (match_operand:VI2_AVX512VL 2 "register_operand" "0")
17944 (match_operand:VI2_AVX512VL 3 "nonimmediate_operand" "vm")]
17945 UNSPEC_VPERMT2)
17946 (match_dup 2)
17947 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")))]
17948 "TARGET_AVX512BW"
17949 "vpermt2<ssemodesuffix>\t{%3, %1, %0%{%4%}|%0%{%4%}, %1, %3}"
17950 [(set_attr "type" "sselog")
17951 (set_attr "prefix" "evex")
17952 (set_attr "mode" "<sseinsnmode>")])
17953
f2139d4c 17954(define_expand "avx_vperm2f128<mode>3"
abd4f58b 17955 [(set (match_operand:AVX256MODE2P 0 "register_operand")
f2139d4c 17956 (unspec:AVX256MODE2P
abd4f58b 17957 [(match_operand:AVX256MODE2P 1 "register_operand")
17958 (match_operand:AVX256MODE2P 2 "nonimmediate_operand")
17959 (match_operand:SI 3 "const_0_to_255_operand")]
f2139d4c 17960 UNSPEC_VPERMIL2F128))]
17961 "TARGET_AVX"
17962{
172d9cbe 17963 int mask = INTVAL (operands[3]);
f2139d4c 17964 if ((mask & 0x88) == 0)
17965 {
17966 rtx perm[<ssescalarnum>], t1, t2;
17967 int i, base, nelt = <ssescalarnum>, nelt2 = nelt / 2;
17968
17969 base = (mask & 3) * nelt2;
17970 for (i = 0; i < nelt2; ++i)
17971 perm[i] = GEN_INT (base + i);
17972
17973 base = ((mask >> 4) & 3) * nelt2;
17974 for (i = 0; i < nelt2; ++i)
17975 perm[i + nelt2] = GEN_INT (base + i);
17976
63d5e521 17977 t2 = gen_rtx_VEC_CONCAT (<ssedoublevecmode>mode,
f2139d4c 17978 operands[1], operands[2]);
17979 t1 = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (nelt, perm));
17980 t2 = gen_rtx_VEC_SELECT (<MODE>mode, t2, t1);
d1f9b275 17981 t2 = gen_rtx_SET (operands[0], t2);
f2139d4c 17982 emit_insn (t2);
17983 DONE;
17984 }
17985})
17986
17987;; Note that bits 7 and 3 of the imm8 allow lanes to be zeroed, which
17988;; means that in order to represent this properly in rtl we'd have to
17989;; nest *another* vec_concat with a zero operand and do the select from
17990;; a 4x wide vector. That doesn't seem very nice.
17991(define_insn "*avx_vperm2f128<mode>_full"
ed30e0a6 17992 [(set (match_operand:AVX256MODE2P 0 "register_operand" "=x")
17993 (unspec:AVX256MODE2P
17994 [(match_operand:AVX256MODE2P 1 "register_operand" "x")
17995 (match_operand:AVX256MODE2P 2 "nonimmediate_operand" "xm")
17996 (match_operand:SI 3 "const_0_to_255_operand" "n")]
17997 UNSPEC_VPERMIL2F128))]
17998 "TARGET_AVX"
154d1782 17999 "vperm2<i128>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
ed30e0a6 18000 [(set_attr "type" "sselog")
00a0e418 18001 (set_attr "prefix_extra" "1")
18002 (set_attr "length_immediate" "1")
ed30e0a6 18003 (set_attr "prefix" "vex")
154d1782 18004 (set_attr "mode" "<sseinsnmode>")])
ed30e0a6 18005
f2139d4c 18006(define_insn "*avx_vperm2f128<mode>_nozero"
18007 [(set (match_operand:AVX256MODE2P 0 "register_operand" "=x")
18008 (vec_select:AVX256MODE2P
63d5e521 18009 (vec_concat:<ssedoublevecmode>
f2139d4c 18010 (match_operand:AVX256MODE2P 1 "register_operand" "x")
18011 (match_operand:AVX256MODE2P 2 "nonimmediate_operand" "xm"))
4581fd42 18012 (match_parallel 3 ""
abd4f58b 18013 [(match_operand 4 "const_int_operand")])))]
4581fd42 18014 "TARGET_AVX
18015 && avx_vperm2f128_parallel (operands[3], <MODE>mode)"
f2139d4c 18016{
18017 int mask = avx_vperm2f128_parallel (operands[3], <MODE>mode) - 1;
7fc62525 18018 if (mask == 0x12)
18019 return "vinsert<i128>\t{$0, %x2, %1, %0|%0, %1, %x2, 0}";
18020 if (mask == 0x20)
18021 return "vinsert<i128>\t{$1, %x2, %1, %0|%0, %1, %x2, 1}";
f2139d4c 18022 operands[3] = GEN_INT (mask);
154d1782 18023 return "vperm2<i128>\t{%3, %2, %1, %0|%0, %1, %2, %3}";
f2139d4c 18024}
18025 [(set_attr "type" "sselog")
18026 (set_attr "prefix_extra" "1")
18027 (set_attr "length_immediate" "1")
18028 (set_attr "prefix" "vex")
154d1782 18029 (set_attr "mode" "<sseinsnmode>")])
f2139d4c 18030
bafd306d 18031(define_insn "*ssse3_palignr<mode>_perm"
18032 [(set (match_operand:V_128 0 "register_operand" "=x,x")
18033 (vec_select:V_128
18034 (match_operand:V_128 1 "register_operand" "0,x")
18035 (match_parallel 2 "palignr_operand"
18036 [(match_operand 3 "const_int_operand" "n, n")])))]
18037 "TARGET_SSSE3"
18038{
6e256598 18039 operands[2] =
18040 GEN_INT (INTVAL (operands[3]) * GET_MODE_UNIT_SIZE (GET_MODE (operands[0])));
bafd306d 18041
18042 switch (which_alternative)
18043 {
18044 case 0:
18045 return "palignr\t{%2, %1, %0|%0, %1, %2}";
18046 case 1:
18047 return "vpalignr\t{%2, %1, %1, %0|%0, %1, %1, %2}";
18048 default:
18049 gcc_unreachable ();
18050 }
18051}
18052 [(set_attr "isa" "noavx,avx")
18053 (set_attr "type" "sseishft")
18054 (set_attr "atom_unit" "sishuf")
18055 (set_attr "prefix_data16" "1,*")
18056 (set_attr "prefix_extra" "1")
18057 (set_attr "length_immediate" "1")
18058 (set_attr "prefix" "orig,vex")])
18059
4e303d3a 18060(define_expand "avx512vl_vinsert<mode>"
18061 [(match_operand:VI48F_256 0 "register_operand")
18062 (match_operand:VI48F_256 1 "register_operand")
18063 (match_operand:<ssehalfvecmode> 2 "nonimmediate_operand")
18064 (match_operand:SI 3 "const_0_to_1_operand")
18065 (match_operand:VI48F_256 4 "register_operand")
18066 (match_operand:<avx512fmaskmode> 5 "register_operand")]
18067 "TARGET_AVX512VL"
18068{
18069 rtx (*insn)(rtx, rtx, rtx, rtx, rtx);
18070
18071 switch (INTVAL (operands[3]))
18072 {
18073 case 0:
18074 insn = gen_vec_set_lo_<mode>_mask;
18075 break;
18076 case 1:
18077 insn = gen_vec_set_hi_<mode>_mask;
18078 break;
18079 default:
18080 gcc_unreachable ();
18081 }
18082
18083 emit_insn (insn (operands[0], operands[1], operands[2], operands[4],
18084 operands[5]));
18085 DONE;
18086})
18087
ed30e0a6 18088(define_expand "avx_vinsertf128<mode>"
abd4f58b 18089 [(match_operand:V_256 0 "register_operand")
18090 (match_operand:V_256 1 "register_operand")
18091 (match_operand:<ssehalfvecmode> 2 "nonimmediate_operand")
18092 (match_operand:SI 3 "const_0_to_1_operand")]
ed30e0a6 18093 "TARGET_AVX"
18094{
c3fa352f 18095 rtx (*insn)(rtx, rtx, rtx);
18096
ed30e0a6 18097 switch (INTVAL (operands[3]))
18098 {
18099 case 0:
c3fa352f 18100 insn = gen_vec_set_lo_<mode>;
ed30e0a6 18101 break;
18102 case 1:
c3fa352f 18103 insn = gen_vec_set_hi_<mode>;
ed30e0a6 18104 break;
18105 default:
18106 gcc_unreachable ();
18107 }
c3fa352f 18108
18109 emit_insn (insn (operands[0], operands[1], operands[2]));
ed30e0a6 18110 DONE;
18111})
18112
4e303d3a 18113(define_insn "vec_set_lo_<mode><mask_name>"
18114 [(set (match_operand:VI8F_256 0 "register_operand" "=v")
6fe5844b 18115 (vec_concat:VI8F_256
4e303d3a 18116 (match_operand:<ssehalfvecmode> 2 "nonimmediate_operand" "vm")
63d5e521 18117 (vec_select:<ssehalfvecmode>
4e303d3a 18118 (match_operand:VI8F_256 1 "register_operand" "v")
ed30e0a6 18119 (parallel [(const_int 2) (const_int 3)]))))]
18120 "TARGET_AVX"
4e303d3a 18121{
18122 if (TARGET_AVX512VL)
18123 return "vinsert<shuffletype>64x2\t{$0x0, %2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2, 0x0}";
18124 else
18125 return "vinsert<i128>\t{$0x0, %2, %1, %0|%0, %1, %2, 0x0}";
18126}
ed30e0a6 18127 [(set_attr "type" "sselog")
00a0e418 18128 (set_attr "prefix_extra" "1")
18129 (set_attr "length_immediate" "1")
ed30e0a6 18130 (set_attr "prefix" "vex")
154d1782 18131 (set_attr "mode" "<sseinsnmode>")])
ed30e0a6 18132
4e303d3a 18133(define_insn "vec_set_hi_<mode><mask_name>"
18134 [(set (match_operand:VI8F_256 0 "register_operand" "=v")
6fe5844b 18135 (vec_concat:VI8F_256
63d5e521 18136 (vec_select:<ssehalfvecmode>
4e303d3a 18137 (match_operand:VI8F_256 1 "register_operand" "v")
ed30e0a6 18138 (parallel [(const_int 0) (const_int 1)]))
4e303d3a 18139 (match_operand:<ssehalfvecmode> 2 "nonimmediate_operand" "vm")))]
ed30e0a6 18140 "TARGET_AVX"
4e303d3a 18141{
18142 if (TARGET_AVX512VL)
18143 return "vinsert<shuffletype>64x2\t{$0x1, %2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2, 0x1}";
18144 else
18145 return "vinsert<i128>\t{$0x1, %2, %1, %0|%0, %1, %2, 0x1}";
18146}
ed30e0a6 18147 [(set_attr "type" "sselog")
00a0e418 18148 (set_attr "prefix_extra" "1")
18149 (set_attr "length_immediate" "1")
ed30e0a6 18150 (set_attr "prefix" "vex")
154d1782 18151 (set_attr "mode" "<sseinsnmode>")])
ed30e0a6 18152
4e303d3a 18153(define_insn "vec_set_lo_<mode><mask_name>"
18154 [(set (match_operand:VI4F_256 0 "register_operand" "=v")
6fe5844b 18155 (vec_concat:VI4F_256
4e303d3a 18156 (match_operand:<ssehalfvecmode> 2 "nonimmediate_operand" "vm")
63d5e521 18157 (vec_select:<ssehalfvecmode>
4e303d3a 18158 (match_operand:VI4F_256 1 "register_operand" "v")
ed30e0a6 18159 (parallel [(const_int 4) (const_int 5)
18160 (const_int 6) (const_int 7)]))))]
18161 "TARGET_AVX"
4e303d3a 18162{
18163 if (TARGET_AVX512VL)
18164 return "vinsert<shuffletype>32x4\t{$0x0, %2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2, 0x0}";
18165 else
18166 return "vinsert<i128>\t{$0x0, %2, %1, %0|%0, %1, %2, 0x0}";
18167}
ed30e0a6 18168 [(set_attr "type" "sselog")
00a0e418 18169 (set_attr "prefix_extra" "1")
18170 (set_attr "length_immediate" "1")
ed30e0a6 18171 (set_attr "prefix" "vex")
154d1782 18172 (set_attr "mode" "<sseinsnmode>")])
ed30e0a6 18173
4e303d3a 18174(define_insn "vec_set_hi_<mode><mask_name>"
18175 [(set (match_operand:VI4F_256 0 "register_operand" "=v")
6fe5844b 18176 (vec_concat:VI4F_256
63d5e521 18177 (vec_select:<ssehalfvecmode>
4e303d3a 18178 (match_operand:VI4F_256 1 "register_operand" "v")
ed30e0a6 18179 (parallel [(const_int 0) (const_int 1)
18180 (const_int 2) (const_int 3)]))
4e303d3a 18181 (match_operand:<ssehalfvecmode> 2 "nonimmediate_operand" "vm")))]
ed30e0a6 18182 "TARGET_AVX"
4e303d3a 18183{
18184 if (TARGET_AVX512VL)
18185 return "vinsert<shuffletype>32x4\t{$0x1, %2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2, 0x1}";
18186 else
18187 return "vinsert<i128>\t{$0x1, %2, %1, %0|%0, %1, %2, 0x1}";
18188}
ed30e0a6 18189 [(set_attr "type" "sselog")
00a0e418 18190 (set_attr "prefix_extra" "1")
18191 (set_attr "length_immediate" "1")
ed30e0a6 18192 (set_attr "prefix" "vex")
154d1782 18193 (set_attr "mode" "<sseinsnmode>")])
ed30e0a6 18194
18195(define_insn "vec_set_lo_v16hi"
18196 [(set (match_operand:V16HI 0 "register_operand" "=x")
18197 (vec_concat:V16HI
18198 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
18199 (vec_select:V8HI
18200 (match_operand:V16HI 1 "register_operand" "x")
18201 (parallel [(const_int 8) (const_int 9)
18202 (const_int 10) (const_int 11)
18203 (const_int 12) (const_int 13)
18204 (const_int 14) (const_int 15)]))))]
18205 "TARGET_AVX"
154d1782 18206 "vinsert%~128\t{$0x0, %2, %1, %0|%0, %1, %2, 0x0}"
ed30e0a6 18207 [(set_attr "type" "sselog")
00a0e418 18208 (set_attr "prefix_extra" "1")
18209 (set_attr "length_immediate" "1")
ed30e0a6 18210 (set_attr "prefix" "vex")
154d1782 18211 (set_attr "mode" "OI")])
ed30e0a6 18212
18213(define_insn "vec_set_hi_v16hi"
18214 [(set (match_operand:V16HI 0 "register_operand" "=x")
18215 (vec_concat:V16HI
18216 (vec_select:V8HI
18217 (match_operand:V16HI 1 "register_operand" "x")
18218 (parallel [(const_int 0) (const_int 1)
18219 (const_int 2) (const_int 3)
18220 (const_int 4) (const_int 5)
18221 (const_int 6) (const_int 7)]))
18222 (match_operand:V8HI 2 "nonimmediate_operand" "xm")))]
18223 "TARGET_AVX"
154d1782 18224 "vinsert%~128\t{$0x1, %2, %1, %0|%0, %1, %2, 0x1}"
ed30e0a6 18225 [(set_attr "type" "sselog")
00a0e418 18226 (set_attr "prefix_extra" "1")
18227 (set_attr "length_immediate" "1")
ed30e0a6 18228 (set_attr "prefix" "vex")
154d1782 18229 (set_attr "mode" "OI")])
ed30e0a6 18230
18231(define_insn "vec_set_lo_v32qi"
18232 [(set (match_operand:V32QI 0 "register_operand" "=x")
18233 (vec_concat:V32QI
18234 (match_operand:V16QI 2 "nonimmediate_operand" "xm")
18235 (vec_select:V16QI
18236 (match_operand:V32QI 1 "register_operand" "x")
18237 (parallel [(const_int 16) (const_int 17)
18238 (const_int 18) (const_int 19)
18239 (const_int 20) (const_int 21)
18240 (const_int 22) (const_int 23)
18241 (const_int 24) (const_int 25)
18242 (const_int 26) (const_int 27)
18243 (const_int 28) (const_int 29)
18244 (const_int 30) (const_int 31)]))))]
18245 "TARGET_AVX"
154d1782 18246 "vinsert%~128\t{$0x0, %2, %1, %0|%0, %1, %2, 0x0}"
ed30e0a6 18247 [(set_attr "type" "sselog")
00a0e418 18248 (set_attr "prefix_extra" "1")
18249 (set_attr "length_immediate" "1")
ed30e0a6 18250 (set_attr "prefix" "vex")
154d1782 18251 (set_attr "mode" "OI")])
ed30e0a6 18252
18253(define_insn "vec_set_hi_v32qi"
18254 [(set (match_operand:V32QI 0 "register_operand" "=x")
18255 (vec_concat:V32QI
18256 (vec_select:V16QI
18257 (match_operand:V32QI 1 "register_operand" "x")
18258 (parallel [(const_int 0) (const_int 1)
18259 (const_int 2) (const_int 3)
18260 (const_int 4) (const_int 5)
18261 (const_int 6) (const_int 7)
18262 (const_int 8) (const_int 9)
18263 (const_int 10) (const_int 11)
18264 (const_int 12) (const_int 13)
18265 (const_int 14) (const_int 15)]))
18266 (match_operand:V16QI 2 "nonimmediate_operand" "xm")))]
18267 "TARGET_AVX"
154d1782 18268 "vinsert%~128\t{$0x1, %2, %1, %0|%0, %1, %2, 0x1}"
ed30e0a6 18269 [(set_attr "type" "sselog")
00a0e418 18270 (set_attr "prefix_extra" "1")
18271 (set_attr "length_immediate" "1")
ed30e0a6 18272 (set_attr "prefix" "vex")
154d1782 18273 (set_attr "mode" "OI")])
ed30e0a6 18274
c512f3a4 18275(define_insn "<avx_avx2>_maskload<ssemodesuffix><avxsizesuffix>"
18276 [(set (match_operand:V48_AVX2 0 "register_operand" "=x")
5deb404d 18277 (unspec:V48_AVX2
c512f3a4 18278 [(match_operand:<sseintvecmode> 2 "register_operand" "x")
18279 (match_operand:V48_AVX2 1 "memory_operand" "m")]
458af25e 18280 UNSPEC_MASKMOV))]
18281 "TARGET_AVX"
c512f3a4 18282 "v<sseintprefix>maskmov<ssemodesuffix>\t{%1, %2, %0|%0, %2, %1}"
5deb404d 18283 [(set_attr "type" "sselog1")
18284 (set_attr "prefix_extra" "1")
18285 (set_attr "prefix" "vex")
6470d004 18286 (set_attr "btver2_decode" "vector")
5deb404d 18287 (set_attr "mode" "<sseinsnmode>")])
18288
c512f3a4 18289(define_insn "<avx_avx2>_maskstore<ssemodesuffix><avxsizesuffix>"
ba11bab4 18290 [(set (match_operand:V48_AVX2 0 "memory_operand" "+m")
c512f3a4 18291 (unspec:V48_AVX2
458af25e 18292 [(match_operand:<sseintvecmode> 1 "register_operand" "x")
c512f3a4 18293 (match_operand:V48_AVX2 2 "register_operand" "x")
dcab66ec 18294 (match_dup 0)]
18295 UNSPEC_MASKMOV))]
458af25e 18296 "TARGET_AVX"
c512f3a4 18297 "v<sseintprefix>maskmov<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
ed30e0a6 18298 [(set_attr "type" "sselog1")
00a0e418 18299 (set_attr "prefix_extra" "1")
ed30e0a6 18300 (set_attr "prefix" "vex")
6470d004 18301 (set_attr "btver2_decode" "vector")
c512f3a4 18302 (set_attr "mode" "<sseinsnmode>")])
ed30e0a6 18303
cf5489c1 18304(define_expand "maskload<mode><sseintvecmodelower>"
c71d3c24 18305 [(set (match_operand:V48_AVX2 0 "register_operand")
18306 (unspec:V48_AVX2
18307 [(match_operand:<sseintvecmode> 2 "register_operand")
18308 (match_operand:V48_AVX2 1 "memory_operand")]
18309 UNSPEC_MASKMOV))]
18310 "TARGET_AVX")
18311
cf5489c1 18312(define_expand "maskload<mode><avx512fmaskmodelower>"
18313 [(set (match_operand:V48_AVX512VL 0 "register_operand")
18314 (vec_merge:V48_AVX512VL
18315 (match_operand:V48_AVX512VL 1 "memory_operand")
18316 (match_dup 0)
18317 (match_operand:<avx512fmaskmode> 2 "register_operand")))]
18318 "TARGET_AVX512F")
18319
18320(define_expand "maskload<mode><avx512fmaskmodelower>"
18321 [(set (match_operand:VI12_AVX512VL 0 "register_operand")
18322 (vec_merge:VI12_AVX512VL
18323 (match_operand:VI12_AVX512VL 1 "memory_operand")
18324 (match_dup 0)
18325 (match_operand:<avx512fmaskmode> 2 "register_operand")))]
18326 "TARGET_AVX512BW")
18327
18328(define_expand "maskstore<mode><sseintvecmodelower>"
c71d3c24 18329 [(set (match_operand:V48_AVX2 0 "memory_operand")
18330 (unspec:V48_AVX2
18331 [(match_operand:<sseintvecmode> 2 "register_operand")
18332 (match_operand:V48_AVX2 1 "register_operand")
18333 (match_dup 0)]
18334 UNSPEC_MASKMOV))]
18335 "TARGET_AVX")
18336
cf5489c1 18337(define_expand "maskstore<mode><avx512fmaskmodelower>"
18338 [(set (match_operand:V48_AVX512VL 0 "memory_operand")
18339 (vec_merge:V48_AVX512VL
18340 (match_operand:V48_AVX512VL 1 "register_operand")
18341 (match_dup 0)
18342 (match_operand:<avx512fmaskmode> 2 "register_operand")))]
18343 "TARGET_AVX512F")
18344
18345(define_expand "maskstore<mode><avx512fmaskmodelower>"
18346 [(set (match_operand:VI12_AVX512VL 0 "memory_operand")
18347 (vec_merge:VI12_AVX512VL
18348 (match_operand:VI12_AVX512VL 1 "register_operand")
18349 (match_dup 0)
18350 (match_operand:<avx512fmaskmode> 2 "register_operand")))]
18351 "TARGET_AVX512BW")
18352
675d6e0d 18353(define_insn_and_split "avx_<castmode><avxsizesuffix>_<castmode>"
3d1a0207 18354 [(set (match_operand:AVX256MODE2P 0 "nonimmediate_operand" "=x,m")
ed30e0a6 18355 (unspec:AVX256MODE2P
63d5e521 18356 [(match_operand:<ssehalfvecmode> 1 "nonimmediate_operand" "xm,x")]
ed30e0a6 18357 UNSPEC_CAST))]
18358 "TARGET_AVX"
3d1a0207 18359 "#"
18360 "&& reload_completed"
18361 [(const_int 0)]
ed30e0a6 18362{
03f65847 18363 rtx op0 = operands[0];
3d1a0207 18364 rtx op1 = operands[1];
03f65847 18365 if (REG_P (op0))
18366 op0 = gen_rtx_REG (<ssehalfvecmode>mode, REGNO (op0));
33541f98 18367 else
3d1a0207 18368 op1 = gen_rtx_REG (<MODE>mode, REGNO (op1));
03f65847 18369 emit_move_insn (op0, op1);
3d1a0207 18370 DONE;
18371})
ed30e0a6 18372
18373(define_expand "vec_init<mode>"
abd4f58b 18374 [(match_operand:V_256 0 "register_operand")
18375 (match_operand 1)]
ed30e0a6 18376 "TARGET_AVX"
18377{
18378 ix86_expand_vector_init (false, operands[0], operands[1]);
18379 DONE;
18380})
18381
8e6b975f 18382(define_expand "vec_init<mode>"
da2989a5 18383 [(match_operand:VF48_I1248 0 "register_operand")
8e6b975f 18384 (match_operand 1)]
18385 "TARGET_AVX512F"
18386{
18387 ix86_expand_vector_init (false, operands[0], operands[1]);
18388 DONE;
18389})
18390
fcb19554 18391(define_insn "<avx2_avx512>_ashrv<mode><mask_name>"
7da26bee 18392 [(set (match_operand:VI48_AVX512F_AVX512VL 0 "register_operand" "=v")
18393 (ashiftrt:VI48_AVX512F_AVX512VL
18394 (match_operand:VI48_AVX512F_AVX512VL 1 "register_operand" "v")
18395 (match_operand:VI48_AVX512F_AVX512VL 2 "nonimmediate_operand" "vm")))]
5220cab6 18396 "TARGET_AVX2 && <mask_mode512bit_condition>"
18397 "vpsrav<ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
5deb404d 18398 [(set_attr "type" "sseishft")
f062acd7 18399 (set_attr "prefix" "maybe_evex")
7d079352 18400 (set_attr "mode" "<sseinsnmode>")])
5deb404d 18401
fcb19554 18402(define_insn "<avx2_avx512>_ashrv<mode><mask_name>"
7da26bee 18403 [(set (match_operand:VI2_AVX512VL 0 "register_operand" "=v")
18404 (ashiftrt:VI2_AVX512VL
18405 (match_operand:VI2_AVX512VL 1 "register_operand" "v")
18406 (match_operand:VI2_AVX512VL 2 "nonimmediate_operand" "vm")))]
18407 "TARGET_AVX512BW"
18408 "vpsravw\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
18409 [(set_attr "type" "sseishft")
18410 (set_attr "prefix" "maybe_evex")
18411 (set_attr "mode" "<sseinsnmode>")])
18412
fcb19554 18413(define_insn "<avx2_avx512>_<shift_insn>v<mode><mask_name>"
db3a6e9c 18414 [(set (match_operand:VI48_AVX512F 0 "register_operand" "=v")
18415 (any_lshift:VI48_AVX512F
18416 (match_operand:VI48_AVX512F 1 "register_operand" "v")
18417 (match_operand:VI48_AVX512F 2 "nonimmediate_operand" "vm")))]
5220cab6 18418 "TARGET_AVX2 && <mask_mode512bit_condition>"
18419 "vp<vshift>v<ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
5deb404d 18420 [(set_attr "type" "sseishft")
f062acd7 18421 (set_attr "prefix" "maybe_evex")
5deb404d 18422 (set_attr "mode" "<sseinsnmode>")])
db3a6e9c 18423
fcb19554 18424(define_insn "<avx2_avx512>_<shift_insn>v<mode><mask_name>"
db3a6e9c 18425 [(set (match_operand:VI2_AVX512VL 0 "register_operand" "=v")
18426 (any_lshift:VI2_AVX512VL
18427 (match_operand:VI2_AVX512VL 1 "register_operand" "v")
18428 (match_operand:VI2_AVX512VL 2 "nonimmediate_operand" "vm")))]
18429 "TARGET_AVX512BW"
18430 "vp<vshift>v<ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
18431 [(set_attr "type" "sseishft")
18432 (set_attr "prefix" "maybe_evex")
18433 (set_attr "mode" "<sseinsnmode>")])
5deb404d 18434
12cbfa26 18435(define_insn "avx_vec_concat<mode>"
6615b722 18436 [(set (match_operand:V_256_512 0 "register_operand" "=x,x")
18437 (vec_concat:V_256_512
63d5e521 18438 (match_operand:<ssehalfvecmode> 1 "register_operand" "x,x")
18439 (match_operand:<ssehalfvecmode> 2 "vector_move_operand" "xm,C")))]
ed30e0a6 18440 "TARGET_AVX"
18441{
18442 switch (which_alternative)
18443 {
18444 case 0:
6615b722 18445 return "vinsert<i128>\t{$0x1, %2, %<concat_tg_mode>1, %0|%0, %<concat_tg_mode>1, %2, 0x1}";
ed30e0a6 18446 case 1:
18447 switch (get_attr_mode (insn))
5deb404d 18448 {
6615b722 18449 case MODE_V16SF:
18450 return "vmovaps\t{%1, %t0|%t0, %1}";
18451 case MODE_V8DF:
18452 return "vmovapd\t{%1, %t0|%t0, %1}";
ed30e0a6 18453 case MODE_V8SF:
18454 return "vmovaps\t{%1, %x0|%x0, %1}";
18455 case MODE_V4DF:
18456 return "vmovapd\t{%1, %x0|%x0, %1}";
6615b722 18457 case MODE_XI:
18458 return "vmovdqa\t{%1, %t0|%t0, %1}";
18459 case MODE_OI:
ed30e0a6 18460 return "vmovdqa\t{%1, %x0|%x0, %1}";
6615b722 18461 default:
18462 gcc_unreachable ();
ed30e0a6 18463 }
18464 default:
18465 gcc_unreachable ();
18466 }
18467}
18468 [(set_attr "type" "sselog,ssemov")
00a0e418 18469 (set_attr "prefix_extra" "1,*")
18470 (set_attr "length_immediate" "1,*")
6615b722 18471 (set_attr "prefix" "maybe_evex")
63d5e521 18472 (set_attr "mode" "<sseinsnmode>")])
ec113e67 18473
af00a412 18474(define_insn "vcvtph2ps<mask_name>"
18475 [(set (match_operand:V4SF 0 "register_operand" "=v")
ec113e67 18476 (vec_select:V4SF
af00a412 18477 (unspec:V8SF [(match_operand:V8HI 1 "register_operand" "v")]
ec113e67 18478 UNSPEC_VCVTPH2PS)
18479 (parallel [(const_int 0) (const_int 1)
30f58bc8 18480 (const_int 2) (const_int 3)])))]
af00a412 18481 "TARGET_F16C || TARGET_AVX512VL"
18482 "vcvtph2ps\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
ec113e67 18483 [(set_attr "type" "ssecvt")
af00a412 18484 (set_attr "prefix" "maybe_evex")
ec113e67 18485 (set_attr "mode" "V4SF")])
18486
af00a412 18487(define_insn "*vcvtph2ps_load<mask_name>"
18488 [(set (match_operand:V4SF 0 "register_operand" "=v")
ec113e67 18489 (unspec:V4SF [(match_operand:V4HI 1 "memory_operand" "m")]
18490 UNSPEC_VCVTPH2PS))]
af00a412 18491 "TARGET_F16C || TARGET_AVX512VL"
18492 "vcvtph2ps\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
ec113e67 18493 [(set_attr "type" "ssecvt")
18494 (set_attr "prefix" "vex")
18495 (set_attr "mode" "V8SF")])
18496
af00a412 18497(define_insn "vcvtph2ps256<mask_name>"
18498 [(set (match_operand:V8SF 0 "register_operand" "=v")
18499 (unspec:V8SF [(match_operand:V8HI 1 "nonimmediate_operand" "vm")]
ec113e67 18500 UNSPEC_VCVTPH2PS))]
af00a412 18501 "TARGET_F16C || TARGET_AVX512VL"
18502 "vcvtph2ps\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
ec113e67 18503 [(set_attr "type" "ssecvt")
18504 (set_attr "prefix" "vex")
6470d004 18505 (set_attr "btver2_decode" "double")
ec113e67 18506 (set_attr "mode" "V8SF")])
18507
dbfe84d5 18508(define_insn "<mask_codefor>avx512f_vcvtph2ps512<mask_name><round_saeonly_name>"
697a43f8 18509 [(set (match_operand:V16SF 0 "register_operand" "=v")
5220cab6 18510 (unspec:V16SF
dbfe84d5 18511 [(match_operand:V16HI 1 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")]
5220cab6 18512 UNSPEC_VCVTPH2PS))]
697a43f8 18513 "TARGET_AVX512F"
dbfe84d5 18514 "vcvtph2ps\t{<round_saeonly_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_saeonly_mask_op2>}"
697a43f8 18515 [(set_attr "type" "ssecvt")
18516 (set_attr "prefix" "evex")
18517 (set_attr "mode" "V16SF")])
18518
af00a412 18519(define_expand "vcvtps2ph_mask"
18520 [(set (match_operand:V8HI 0 "register_operand")
18521 (vec_merge:V8HI
18522 (vec_concat:V8HI
18523 (unspec:V4HI [(match_operand:V4SF 1 "register_operand")
18524 (match_operand:SI 2 "const_0_to_255_operand")]
18525 UNSPEC_VCVTPS2PH)
18526 (match_dup 5))
18527 (match_operand:V8HI 3 "vector_move_operand")
18528 (match_operand:QI 4 "register_operand")))]
18529 "TARGET_AVX512VL"
18530 "operands[5] = CONST0_RTX (V4HImode);")
18531
ec113e67 18532(define_expand "vcvtps2ph"
abd4f58b 18533 [(set (match_operand:V8HI 0 "register_operand")
ec113e67 18534 (vec_concat:V8HI
abd4f58b 18535 (unspec:V4HI [(match_operand:V4SF 1 "register_operand")
18536 (match_operand:SI 2 "const_0_to_255_operand")]
ec113e67 18537 UNSPEC_VCVTPS2PH)
18538 (match_dup 3)))]
18539 "TARGET_F16C"
18540 "operands[3] = CONST0_RTX (V4HImode);")
18541
af00a412 18542(define_insn "*vcvtps2ph<mask_name>"
18543 [(set (match_operand:V8HI 0 "register_operand" "=v")
ec113e67 18544 (vec_concat:V8HI
af00a412 18545 (unspec:V4HI [(match_operand:V4SF 1 "register_operand" "v")
e16e10c8 18546 (match_operand:SI 2 "const_0_to_255_operand" "N")]
ec113e67 18547 UNSPEC_VCVTPS2PH)
abd4f58b 18548 (match_operand:V4HI 3 "const0_operand")))]
fd1fee28 18549 "(TARGET_F16C || TARGET_AVX512VL) && <mask_avx512vl_condition>"
af00a412 18550 "vcvtps2ph\t{%2, %1, %0<mask_operand4>|%0<mask_operand4>, %1, %2}"
ec113e67 18551 [(set_attr "type" "ssecvt")
af00a412 18552 (set_attr "prefix" "maybe_evex")
ec113e67 18553 (set_attr "mode" "V4SF")])
18554
af00a412 18555(define_insn "*vcvtps2ph_store<mask_name>"
ec113e67 18556 [(set (match_operand:V4HI 0 "memory_operand" "=m")
18557 (unspec:V4HI [(match_operand:V4SF 1 "register_operand" "x")
e16e10c8 18558 (match_operand:SI 2 "const_0_to_255_operand" "N")]
ec113e67 18559 UNSPEC_VCVTPS2PH))]
af00a412 18560 "TARGET_F16C || TARGET_AVX512VL"
18561 "vcvtps2ph\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
ec113e67 18562 [(set_attr "type" "ssecvt")
af00a412 18563 (set_attr "prefix" "maybe_evex")
ec113e67 18564 (set_attr "mode" "V4SF")])
18565
af00a412 18566(define_insn "vcvtps2ph256<mask_name>"
ec113e67 18567 [(set (match_operand:V8HI 0 "nonimmediate_operand" "=xm")
18568 (unspec:V8HI [(match_operand:V8SF 1 "register_operand" "x")
e16e10c8 18569 (match_operand:SI 2 "const_0_to_255_operand" "N")]
ec113e67 18570 UNSPEC_VCVTPS2PH))]
af00a412 18571 "TARGET_F16C || TARGET_AVX512VL"
18572 "vcvtps2ph\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
ec113e67 18573 [(set_attr "type" "ssecvt")
af00a412 18574 (set_attr "prefix" "maybe_evex")
6470d004 18575 (set_attr "btver2_decode" "vector")
ec113e67 18576 (set_attr "mode" "V8SF")])
5deb404d 18577
5220cab6 18578(define_insn "<mask_codefor>avx512f_vcvtps2ph512<mask_name>"
697a43f8 18579 [(set (match_operand:V16HI 0 "nonimmediate_operand" "=vm")
5220cab6 18580 (unspec:V16HI
18581 [(match_operand:V16SF 1 "register_operand" "v")
18582 (match_operand:SI 2 "const_0_to_255_operand" "N")]
18583 UNSPEC_VCVTPS2PH))]
697a43f8 18584 "TARGET_AVX512F"
5220cab6 18585 "vcvtps2ph\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
697a43f8 18586 [(set_attr "type" "ssecvt")
18587 (set_attr "prefix" "evex")
18588 (set_attr "mode" "V16SF")])
18589
5deb404d 18590;; For gather* insn patterns
18591(define_mode_iterator VEC_GATHER_MODE
18592 [V2DI V2DF V4DI V4DF V4SI V4SF V8SI V8SF])
16dfb112 18593(define_mode_attr VEC_GATHER_IDXSI
8e6b975f 18594 [(V2DI "V4SI") (V4DI "V4SI") (V8DI "V8SI")
18595 (V2DF "V4SI") (V4DF "V4SI") (V8DF "V8SI")
18596 (V4SI "V4SI") (V8SI "V8SI") (V16SI "V16SI")
18597 (V4SF "V4SI") (V8SF "V8SI") (V16SF "V16SI")])
18598
16dfb112 18599(define_mode_attr VEC_GATHER_IDXDI
8e6b975f 18600 [(V2DI "V2DI") (V4DI "V4DI") (V8DI "V8DI")
18601 (V2DF "V2DI") (V4DF "V4DI") (V8DF "V8DI")
18602 (V4SI "V2DI") (V8SI "V4DI") (V16SI "V8DI")
18603 (V4SF "V2DI") (V8SF "V4DI") (V16SF "V8DI")])
18604
16dfb112 18605(define_mode_attr VEC_GATHER_SRCDI
8e6b975f 18606 [(V2DI "V2DI") (V4DI "V4DI") (V8DI "V8DI")
18607 (V2DF "V2DF") (V4DF "V4DF") (V8DF "V8DF")
18608 (V4SI "V4SI") (V8SI "V4SI") (V16SI "V8SI")
18609 (V4SF "V4SF") (V8SF "V4SF") (V16SF "V8SF")])
5deb404d 18610
18611(define_expand "avx2_gathersi<mode>"
abd4f58b 18612 [(parallel [(set (match_operand:VEC_GATHER_MODE 0 "register_operand")
a24c6b3c 18613 (unspec:VEC_GATHER_MODE
abd4f58b 18614 [(match_operand:VEC_GATHER_MODE 1 "register_operand")
de084923 18615 (mem:<ssescalarmode>
18616 (match_par_dup 7
abd4f58b 18617 [(match_operand 2 "vsib_address_operand")
16dfb112 18618 (match_operand:<VEC_GATHER_IDXSI>
abd4f58b 18619 3 "register_operand")
18620 (match_operand:SI 5 "const1248_operand ")]))
a24c6b3c 18621 (mem:BLK (scratch))
abd4f58b 18622 (match_operand:VEC_GATHER_MODE 4 "register_operand")]
a24c6b3c 18623 UNSPEC_GATHER))
abd4f58b 18624 (clobber (match_scratch:VEC_GATHER_MODE 6))])]
de084923 18625 "TARGET_AVX2"
18626{
18627 operands[7]
18628 = gen_rtx_UNSPEC (Pmode, gen_rtvec (3, operands[2], operands[3],
18629 operands[5]), UNSPEC_VSIBADDR);
18630})
5deb404d 18631
18632(define_insn "*avx2_gathersi<mode>"
a24c6b3c 18633 [(set (match_operand:VEC_GATHER_MODE 0 "register_operand" "=&x")
5deb404d 18634 (unspec:VEC_GATHER_MODE
a24c6b3c 18635 [(match_operand:VEC_GATHER_MODE 2 "register_operand" "0")
de084923 18636 (match_operator:<ssescalarmode> 7 "vsib_mem_operator"
18637 [(unspec:P
1e662e65 18638 [(match_operand:P 3 "vsib_address_operand" "Tv")
16dfb112 18639 (match_operand:<VEC_GATHER_IDXSI> 4 "register_operand" "x")
de084923 18640 (match_operand:SI 6 "const1248_operand" "n")]
18641 UNSPEC_VSIBADDR)])
a24c6b3c 18642 (mem:BLK (scratch))
de084923 18643 (match_operand:VEC_GATHER_MODE 5 "register_operand" "1")]
a24c6b3c 18644 UNSPEC_GATHER))
18645 (clobber (match_scratch:VEC_GATHER_MODE 1 "=&x"))]
5deb404d 18646 "TARGET_AVX2"
de084923 18647 "v<sseintprefix>gatherd<ssemodesuffix>\t{%1, %7, %0|%0, %7, %1}"
5deb404d 18648 [(set_attr "type" "ssemov")
18649 (set_attr "prefix" "vex")
18650 (set_attr "mode" "<sseinsnmode>")])
18651
6296bd96 18652(define_insn "*avx2_gathersi<mode>_2"
18653 [(set (match_operand:VEC_GATHER_MODE 0 "register_operand" "=&x")
18654 (unspec:VEC_GATHER_MODE
18655 [(pc)
18656 (match_operator:<ssescalarmode> 6 "vsib_mem_operator"
18657 [(unspec:P
1e662e65 18658 [(match_operand:P 2 "vsib_address_operand" "Tv")
6296bd96 18659 (match_operand:<VEC_GATHER_IDXSI> 3 "register_operand" "x")
18660 (match_operand:SI 5 "const1248_operand" "n")]
18661 UNSPEC_VSIBADDR)])
18662 (mem:BLK (scratch))
18663 (match_operand:VEC_GATHER_MODE 4 "register_operand" "1")]
18664 UNSPEC_GATHER))
18665 (clobber (match_scratch:VEC_GATHER_MODE 1 "=&x"))]
18666 "TARGET_AVX2"
18667 "v<sseintprefix>gatherd<ssemodesuffix>\t{%1, %6, %0|%0, %6, %1}"
18668 [(set_attr "type" "ssemov")
18669 (set_attr "prefix" "vex")
18670 (set_attr "mode" "<sseinsnmode>")])
18671
5deb404d 18672(define_expand "avx2_gatherdi<mode>"
abd4f58b 18673 [(parallel [(set (match_operand:VEC_GATHER_MODE 0 "register_operand")
a24c6b3c 18674 (unspec:VEC_GATHER_MODE
abd4f58b 18675 [(match_operand:<VEC_GATHER_SRCDI> 1 "register_operand")
de084923 18676 (mem:<ssescalarmode>
18677 (match_par_dup 7
abd4f58b 18678 [(match_operand 2 "vsib_address_operand")
16dfb112 18679 (match_operand:<VEC_GATHER_IDXDI>
abd4f58b 18680 3 "register_operand")
18681 (match_operand:SI 5 "const1248_operand ")]))
a24c6b3c 18682 (mem:BLK (scratch))
7d9c40e2 18683 (match_operand:<VEC_GATHER_SRCDI> 4 "register_operand")]
a24c6b3c 18684 UNSPEC_GATHER))
abd4f58b 18685 (clobber (match_scratch:VEC_GATHER_MODE 6))])]
de084923 18686 "TARGET_AVX2"
18687{
18688 operands[7]
18689 = gen_rtx_UNSPEC (Pmode, gen_rtvec (3, operands[2], operands[3],
18690 operands[5]), UNSPEC_VSIBADDR);
18691})
5deb404d 18692
18693(define_insn "*avx2_gatherdi<mode>"
16dfb112 18694 [(set (match_operand:VEC_GATHER_MODE 0 "register_operand" "=&x")
18695 (unspec:VEC_GATHER_MODE
18696 [(match_operand:<VEC_GATHER_SRCDI> 2 "register_operand" "0")
de084923 18697 (match_operator:<ssescalarmode> 7 "vsib_mem_operator"
18698 [(unspec:P
1e662e65 18699 [(match_operand:P 3 "vsib_address_operand" "Tv")
16dfb112 18700 (match_operand:<VEC_GATHER_IDXDI> 4 "register_operand" "x")
de084923 18701 (match_operand:SI 6 "const1248_operand" "n")]
18702 UNSPEC_VSIBADDR)])
a24c6b3c 18703 (mem:BLK (scratch))
16dfb112 18704 (match_operand:<VEC_GATHER_SRCDI> 5 "register_operand" "1")]
a24c6b3c 18705 UNSPEC_GATHER))
16dfb112 18706 (clobber (match_scratch:VEC_GATHER_MODE 1 "=&x"))]
5deb404d 18707 "TARGET_AVX2"
16dfb112 18708 "v<sseintprefix>gatherq<ssemodesuffix>\t{%5, %7, %2|%2, %7, %5}"
5deb404d 18709 [(set_attr "type" "ssemov")
18710 (set_attr "prefix" "vex")
18711 (set_attr "mode" "<sseinsnmode>")])
6296bd96 18712
18713(define_insn "*avx2_gatherdi<mode>_2"
18714 [(set (match_operand:VEC_GATHER_MODE 0 "register_operand" "=&x")
18715 (unspec:VEC_GATHER_MODE
18716 [(pc)
18717 (match_operator:<ssescalarmode> 6 "vsib_mem_operator"
18718 [(unspec:P
1e662e65 18719 [(match_operand:P 2 "vsib_address_operand" "Tv")
6296bd96 18720 (match_operand:<VEC_GATHER_IDXDI> 3 "register_operand" "x")
18721 (match_operand:SI 5 "const1248_operand" "n")]
18722 UNSPEC_VSIBADDR)])
18723 (mem:BLK (scratch))
18724 (match_operand:<VEC_GATHER_SRCDI> 4 "register_operand" "1")]
18725 UNSPEC_GATHER))
18726 (clobber (match_scratch:VEC_GATHER_MODE 1 "=&x"))]
18727 "TARGET_AVX2"
18728{
18729 if (<MODE>mode != <VEC_GATHER_SRCDI>mode)
18730 return "v<sseintprefix>gatherq<ssemodesuffix>\t{%4, %6, %x0|%x0, %6, %4}";
18731 return "v<sseintprefix>gatherq<ssemodesuffix>\t{%4, %6, %0|%0, %6, %4}";
18732}
18733 [(set_attr "type" "ssemov")
18734 (set_attr "prefix" "vex")
18735 (set_attr "mode" "<sseinsnmode>")])
2cded00a 18736
18737(define_insn "*avx2_gatherdi<mode>_3"
18738 [(set (match_operand:<VEC_GATHER_SRCDI> 0 "register_operand" "=&x")
18739 (vec_select:<VEC_GATHER_SRCDI>
18740 (unspec:VI4F_256
18741 [(match_operand:<VEC_GATHER_SRCDI> 2 "register_operand" "0")
18742 (match_operator:<ssescalarmode> 7 "vsib_mem_operator"
18743 [(unspec:P
1e662e65 18744 [(match_operand:P 3 "vsib_address_operand" "Tv")
2cded00a 18745 (match_operand:<VEC_GATHER_IDXDI> 4 "register_operand" "x")
18746 (match_operand:SI 6 "const1248_operand" "n")]
18747 UNSPEC_VSIBADDR)])
18748 (mem:BLK (scratch))
18749 (match_operand:<VEC_GATHER_SRCDI> 5 "register_operand" "1")]
18750 UNSPEC_GATHER)
18751 (parallel [(const_int 0) (const_int 1)
18752 (const_int 2) (const_int 3)])))
18753 (clobber (match_scratch:VI4F_256 1 "=&x"))]
18754 "TARGET_AVX2"
18755 "v<sseintprefix>gatherq<ssemodesuffix>\t{%5, %7, %0|%0, %7, %5}"
18756 [(set_attr "type" "ssemov")
18757 (set_attr "prefix" "vex")
18758 (set_attr "mode" "<sseinsnmode>")])
18759
18760(define_insn "*avx2_gatherdi<mode>_4"
18761 [(set (match_operand:<VEC_GATHER_SRCDI> 0 "register_operand" "=&x")
18762 (vec_select:<VEC_GATHER_SRCDI>
18763 (unspec:VI4F_256
18764 [(pc)
18765 (match_operator:<ssescalarmode> 6 "vsib_mem_operator"
18766 [(unspec:P
1e662e65 18767 [(match_operand:P 2 "vsib_address_operand" "Tv")
2cded00a 18768 (match_operand:<VEC_GATHER_IDXDI> 3 "register_operand" "x")
18769 (match_operand:SI 5 "const1248_operand" "n")]
18770 UNSPEC_VSIBADDR)])
18771 (mem:BLK (scratch))
18772 (match_operand:<VEC_GATHER_SRCDI> 4 "register_operand" "1")]
18773 UNSPEC_GATHER)
18774 (parallel [(const_int 0) (const_int 1)
18775 (const_int 2) (const_int 3)])))
18776 (clobber (match_scratch:VI4F_256 1 "=&x"))]
18777 "TARGET_AVX2"
18778 "v<sseintprefix>gatherq<ssemodesuffix>\t{%4, %6, %0|%0, %6, %4}"
18779 [(set_attr "type" "ssemov")
18780 (set_attr "prefix" "vex")
18781 (set_attr "mode" "<sseinsnmode>")])
8e6b975f 18782
06ea6335 18783(define_expand "<avx512>_gathersi<mode>"
18784 [(parallel [(set (match_operand:VI48F 0 "register_operand")
18785 (unspec:VI48F
18786 [(match_operand:VI48F 1 "register_operand")
8e6b975f 18787 (match_operand:<avx512fmaskmode> 4 "register_operand")
18788 (mem:<ssescalarmode>
18789 (match_par_dup 6
18790 [(match_operand 2 "vsib_address_operand")
18791 (match_operand:<VEC_GATHER_IDXSI> 3 "register_operand")
18792 (match_operand:SI 5 "const1248_operand")]))]
18793 UNSPEC_GATHER))
18794 (clobber (match_scratch:<avx512fmaskmode> 7))])]
18795 "TARGET_AVX512F"
18796{
18797 operands[6]
18798 = gen_rtx_UNSPEC (Pmode, gen_rtvec (3, operands[2], operands[3],
18799 operands[5]), UNSPEC_VSIBADDR);
18800})
18801
18802(define_insn "*avx512f_gathersi<mode>"
06ea6335 18803 [(set (match_operand:VI48F 0 "register_operand" "=&v")
18804 (unspec:VI48F
18805 [(match_operand:VI48F 1 "register_operand" "0")
8e6b975f 18806 (match_operand:<avx512fmaskmode> 7 "register_operand" "2")
18807 (match_operator:<ssescalarmode> 6 "vsib_mem_operator"
18808 [(unspec:P
1e662e65 18809 [(match_operand:P 4 "vsib_address_operand" "Tv")
8e6b975f 18810 (match_operand:<VEC_GATHER_IDXSI> 3 "register_operand" "v")
18811 (match_operand:SI 5 "const1248_operand" "n")]
18812 UNSPEC_VSIBADDR)])]
18813 UNSPEC_GATHER))
a31e7f46 18814 (clobber (match_scratch:<avx512fmaskmode> 2 "=&Yk"))]
8e6b975f 18815 "TARGET_AVX512F"
18816 "v<sseintprefix>gatherd<ssemodesuffix>\t{%6, %0%{%2%}|%0%{%2%}, %g6}"
18817 [(set_attr "type" "ssemov")
18818 (set_attr "prefix" "evex")
18819 (set_attr "mode" "<sseinsnmode>")])
18820
18821(define_insn "*avx512f_gathersi<mode>_2"
06ea6335 18822 [(set (match_operand:VI48F 0 "register_operand" "=&v")
18823 (unspec:VI48F
8e6b975f 18824 [(pc)
18825 (match_operand:<avx512fmaskmode> 6 "register_operand" "1")
18826 (match_operator:<ssescalarmode> 5 "vsib_mem_operator"
18827 [(unspec:P
1e662e65 18828 [(match_operand:P 3 "vsib_address_operand" "Tv")
8e6b975f 18829 (match_operand:<VEC_GATHER_IDXSI> 2 "register_operand" "v")
18830 (match_operand:SI 4 "const1248_operand" "n")]
18831 UNSPEC_VSIBADDR)])]
18832 UNSPEC_GATHER))
a31e7f46 18833 (clobber (match_scratch:<avx512fmaskmode> 1 "=&Yk"))]
8e6b975f 18834 "TARGET_AVX512F"
18835 "v<sseintprefix>gatherd<ssemodesuffix>\t{%5, %0%{%1%}|%0%{%1%}, %g5}"
18836 [(set_attr "type" "ssemov")
18837 (set_attr "prefix" "evex")
18838 (set_attr "mode" "<sseinsnmode>")])
18839
18840
06ea6335 18841(define_expand "<avx512>_gatherdi<mode>"
18842 [(parallel [(set (match_operand:VI48F 0 "register_operand")
18843 (unspec:VI48F
8e6b975f 18844 [(match_operand:<VEC_GATHER_SRCDI> 1 "register_operand")
18845 (match_operand:QI 4 "register_operand")
18846 (mem:<ssescalarmode>
18847 (match_par_dup 6
18848 [(match_operand 2 "vsib_address_operand")
18849 (match_operand:<VEC_GATHER_IDXDI> 3 "register_operand")
18850 (match_operand:SI 5 "const1248_operand")]))]
18851 UNSPEC_GATHER))
18852 (clobber (match_scratch:QI 7))])]
18853 "TARGET_AVX512F"
18854{
18855 operands[6]
18856 = gen_rtx_UNSPEC (Pmode, gen_rtvec (3, operands[2], operands[3],
18857 operands[5]), UNSPEC_VSIBADDR);
18858})
18859
18860(define_insn "*avx512f_gatherdi<mode>"
06ea6335 18861 [(set (match_operand:VI48F 0 "register_operand" "=&v")
18862 (unspec:VI48F
8e6b975f 18863 [(match_operand:<VEC_GATHER_SRCDI> 1 "register_operand" "0")
18864 (match_operand:QI 7 "register_operand" "2")
18865 (match_operator:<ssescalarmode> 6 "vsib_mem_operator"
18866 [(unspec:P
1e662e65 18867 [(match_operand:P 4 "vsib_address_operand" "Tv")
8e6b975f 18868 (match_operand:<VEC_GATHER_IDXDI> 3 "register_operand" "v")
18869 (match_operand:SI 5 "const1248_operand" "n")]
18870 UNSPEC_VSIBADDR)])]
18871 UNSPEC_GATHER))
a31e7f46 18872 (clobber (match_scratch:QI 2 "=&Yk"))]
8e6b975f 18873 "TARGET_AVX512F"
18874 "v<sseintprefix>gatherq<ssemodesuffix>\t{%6, %1%{%2%}|%1%{%2%}, %g6}"
18875 [(set_attr "type" "ssemov")
18876 (set_attr "prefix" "evex")
18877 (set_attr "mode" "<sseinsnmode>")])
18878
18879(define_insn "*avx512f_gatherdi<mode>_2"
06ea6335 18880 [(set (match_operand:VI48F 0 "register_operand" "=&v")
18881 (unspec:VI48F
8e6b975f 18882 [(pc)
18883 (match_operand:QI 6 "register_operand" "1")
18884 (match_operator:<ssescalarmode> 5 "vsib_mem_operator"
18885 [(unspec:P
1e662e65 18886 [(match_operand:P 3 "vsib_address_operand" "Tv")
8e6b975f 18887 (match_operand:<VEC_GATHER_IDXDI> 2 "register_operand" "v")
18888 (match_operand:SI 4 "const1248_operand" "n")]
18889 UNSPEC_VSIBADDR)])]
18890 UNSPEC_GATHER))
a31e7f46 18891 (clobber (match_scratch:QI 1 "=&Yk"))]
8e6b975f 18892 "TARGET_AVX512F"
18893{
18894 if (<MODE>mode != <VEC_GATHER_SRCDI>mode)
06ea6335 18895 {
996b47b0 18896 if (<MODE_SIZE> != 64)
06ea6335 18897 return "v<sseintprefix>gatherq<ssemodesuffix>\t{%5, %x0%{%1%}|%t0%{%1%}, %g5}";
18898 else
18899 return "v<sseintprefix>gatherq<ssemodesuffix>\t{%5, %t0%{%1%}|%t0%{%1%}, %g5}";
18900 }
8e6b975f 18901 return "v<sseintprefix>gatherq<ssemodesuffix>\t{%5, %0%{%1%}|%0%{%1%}, %g5}";
18902}
18903 [(set_attr "type" "ssemov")
18904 (set_attr "prefix" "evex")
18905 (set_attr "mode" "<sseinsnmode>")])
18906
06ea6335 18907(define_expand "<avx512>_scattersi<mode>"
18908 [(parallel [(set (mem:VI48F
8e6b975f 18909 (match_par_dup 5
18910 [(match_operand 0 "vsib_address_operand")
18911 (match_operand:<VEC_GATHER_IDXSI> 2 "register_operand")
18912 (match_operand:SI 4 "const1248_operand")]))
06ea6335 18913 (unspec:VI48F
8e6b975f 18914 [(match_operand:<avx512fmaskmode> 1 "register_operand")
06ea6335 18915 (match_operand:VI48F 3 "register_operand")]
8e6b975f 18916 UNSPEC_SCATTER))
18917 (clobber (match_scratch:<avx512fmaskmode> 6))])]
18918 "TARGET_AVX512F"
18919{
18920 operands[5]
18921 = gen_rtx_UNSPEC (Pmode, gen_rtvec (3, operands[0], operands[2],
18922 operands[4]), UNSPEC_VSIBADDR);
18923})
18924
18925(define_insn "*avx512f_scattersi<mode>"
06ea6335 18926 [(set (match_operator:VI48F 5 "vsib_mem_operator"
8e6b975f 18927 [(unspec:P
1e662e65 18928 [(match_operand:P 0 "vsib_address_operand" "Tv")
8e6b975f 18929 (match_operand:<VEC_GATHER_IDXSI> 2 "register_operand" "v")
18930 (match_operand:SI 4 "const1248_operand" "n")]
18931 UNSPEC_VSIBADDR)])
06ea6335 18932 (unspec:VI48F
8e6b975f 18933 [(match_operand:<avx512fmaskmode> 6 "register_operand" "1")
06ea6335 18934 (match_operand:VI48F 3 "register_operand" "v")]
8e6b975f 18935 UNSPEC_SCATTER))
a31e7f46 18936 (clobber (match_scratch:<avx512fmaskmode> 1 "=&Yk"))]
8e6b975f 18937 "TARGET_AVX512F"
18938 "v<sseintprefix>scatterd<ssemodesuffix>\t{%3, %5%{%1%}|%5%{%1%}, %3}"
18939 [(set_attr "type" "ssemov")
18940 (set_attr "prefix" "evex")
18941 (set_attr "mode" "<sseinsnmode>")])
18942
06ea6335 18943(define_expand "<avx512>_scatterdi<mode>"
18944 [(parallel [(set (mem:VI48F
8e6b975f 18945 (match_par_dup 5
18946 [(match_operand 0 "vsib_address_operand")
06ea6335 18947 (match_operand:<VEC_GATHER_IDXDI> 2 "register_operand")
8e6b975f 18948 (match_operand:SI 4 "const1248_operand")]))
06ea6335 18949 (unspec:VI48F
8e6b975f 18950 [(match_operand:QI 1 "register_operand")
18951 (match_operand:<VEC_GATHER_SRCDI> 3 "register_operand")]
18952 UNSPEC_SCATTER))
18953 (clobber (match_scratch:QI 6))])]
18954 "TARGET_AVX512F"
18955{
18956 operands[5]
18957 = gen_rtx_UNSPEC (Pmode, gen_rtvec (3, operands[0], operands[2],
18958 operands[4]), UNSPEC_VSIBADDR);
18959})
18960
18961(define_insn "*avx512f_scatterdi<mode>"
06ea6335 18962 [(set (match_operator:VI48F 5 "vsib_mem_operator"
8e6b975f 18963 [(unspec:P
1e662e65 18964 [(match_operand:P 0 "vsib_address_operand" "Tv")
06ea6335 18965 (match_operand:<VEC_GATHER_IDXDI> 2 "register_operand" "v")
8e6b975f 18966 (match_operand:SI 4 "const1248_operand" "n")]
18967 UNSPEC_VSIBADDR)])
06ea6335 18968 (unspec:VI48F
8e6b975f 18969 [(match_operand:QI 6 "register_operand" "1")
18970 (match_operand:<VEC_GATHER_SRCDI> 3 "register_operand" "v")]
18971 UNSPEC_SCATTER))
a31e7f46 18972 (clobber (match_scratch:QI 1 "=&Yk"))]
8e6b975f 18973 "TARGET_AVX512F"
18974 "v<sseintprefix>scatterq<ssemodesuffix>\t{%3, %5%{%1%}|%5%{%1%}, %3}"
18975 [(set_attr "type" "ssemov")
18976 (set_attr "prefix" "evex")
18977 (set_attr "mode" "<sseinsnmode>")])
d2ff59d6 18978
6ce48b02 18979(define_insn "<avx512>_compress<mode>_mask"
18980 [(set (match_operand:VI48F 0 "register_operand" "=v")
18981 (unspec:VI48F
18982 [(match_operand:VI48F 1 "register_operand" "v")
18983 (match_operand:VI48F 2 "vector_move_operand" "0C")
a31e7f46 18984 (match_operand:<avx512fmaskmode> 3 "register_operand" "Yk")]
5220cab6 18985 UNSPEC_COMPRESS))]
18986 "TARGET_AVX512F"
18987 "v<sseintprefix>compress<ssemodesuffix>\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}"
18988 [(set_attr "type" "ssemov")
18989 (set_attr "prefix" "evex")
18990 (set_attr "mode" "<sseinsnmode>")])
18991
6ce48b02 18992(define_insn "<avx512>_compressstore<mode>_mask"
18993 [(set (match_operand:VI48F 0 "memory_operand" "=m")
18994 (unspec:VI48F
18995 [(match_operand:VI48F 1 "register_operand" "x")
5220cab6 18996 (match_dup 0)
a31e7f46 18997 (match_operand:<avx512fmaskmode> 2 "register_operand" "Yk")]
5220cab6 18998 UNSPEC_COMPRESS_STORE))]
18999 "TARGET_AVX512F"
19000 "v<sseintprefix>compress<ssemodesuffix>\t{%1, %0%{%2%}|%0%{%2%}, %1}"
19001 [(set_attr "type" "ssemov")
19002 (set_attr "prefix" "evex")
19003 (set_attr "memory" "store")
19004 (set_attr "mode" "<sseinsnmode>")])
19005
6ce48b02 19006(define_expand "<avx512>_expand<mode>_maskz"
19007 [(set (match_operand:VI48F 0 "register_operand")
19008 (unspec:VI48F
19009 [(match_operand:VI48F 1 "nonimmediate_operand")
19010 (match_operand:VI48F 2 "vector_move_operand")
9a5ea1d5 19011 (match_operand:<avx512fmaskmode> 3 "register_operand")]
19012 UNSPEC_EXPAND))]
19013 "TARGET_AVX512F"
19014 "operands[2] = CONST0_RTX (<MODE>mode);")
19015
6ce48b02 19016(define_insn "<avx512>_expand<mode>_mask"
19017 [(set (match_operand:VI48F 0 "register_operand" "=v,v")
19018 (unspec:VI48F
19019 [(match_operand:VI48F 1 "nonimmediate_operand" "v,m")
19020 (match_operand:VI48F 2 "vector_move_operand" "0C,0C")
a31e7f46 19021 (match_operand:<avx512fmaskmode> 3 "register_operand" "Yk,Yk")]
5220cab6 19022 UNSPEC_EXPAND))]
19023 "TARGET_AVX512F"
19024 "v<sseintprefix>expand<ssemodesuffix>\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}"
19025 [(set_attr "type" "ssemov")
19026 (set_attr "prefix" "evex")
19027 (set_attr "memory" "none,load")
19028 (set_attr "mode" "<sseinsnmode>")])
19029
6164575a 19030(define_insn "avx512dq_rangep<mode><mask_name><round_saeonly_name>"
19031 [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v")
19032 (unspec:VF_AVX512VL
19033 [(match_operand:VF_AVX512VL 1 "register_operand" "v")
19034 (match_operand:VF_AVX512VL 2 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")
19035 (match_operand:SI 3 "const_0_to_15_operand")]
19036 UNSPEC_RANGE))]
19037 "TARGET_AVX512DQ && <round_saeonly_mode512bit_condition>"
88048095 19038 "vrange<ssemodesuffix>\t{%3, <round_saeonly_mask_op4>%2, %1, %0<mask_operand4>|%0<mask_operand4>, %1, %2<round_saeonly_mask_op4>, %3}"
6164575a 19039 [(set_attr "type" "sse")
19040 (set_attr "prefix" "evex")
19041 (set_attr "mode" "<MODE>")])
19042
19043(define_insn "avx512dq_ranges<mode><round_saeonly_name>"
19044 [(set (match_operand:VF_128 0 "register_operand" "=v")
19045 (vec_merge:VF_128
19046 (unspec:VF_128
19047 [(match_operand:VF_128 1 "register_operand" "v")
19048 (match_operand:VF_128 2 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")
19049 (match_operand:SI 3 "const_0_to_15_operand")]
19050 UNSPEC_RANGE)
19051 (match_dup 1)
19052 (const_int 1)))]
19053 "TARGET_AVX512DQ"
88048095 19054 "vrange<ssescalarmodesuffix>\t{%3, <round_saeonly_op4>%2, %1, %0|%0, %1, %2<round_saeonly_op4>, %3}"
6164575a 19055 [(set_attr "type" "sse")
19056 (set_attr "prefix" "evex")
19057 (set_attr "mode" "<MODE>")])
19058
19059(define_insn "avx512dq_fpclass<mode><mask_scalar_merge_name>"
19060 [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=Yk")
19061 (unspec:<avx512fmaskmode>
19062 [(match_operand:VF_AVX512VL 1 "register_operand" "v")
19063 (match_operand:QI 2 "const_0_to_255_operand" "n")]
19064 UNSPEC_FPCLASS))]
19065 "TARGET_AVX512DQ"
19066 "vfpclass<ssemodesuffix>\t{%2, %1, %0<mask_scalar_merge_operand3>|%0<mask_scalar_merge_operand3>, %1, %2}";
19067 [(set_attr "type" "sse")
19068 (set_attr "length_immediate" "1")
19069 (set_attr "prefix" "evex")
19070 (set_attr "mode" "<MODE>")])
19071
19072(define_insn "avx512dq_vmfpclass<mode>"
19073 [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=Yk")
19074 (and:<avx512fmaskmode>
19075 (unspec:<avx512fmaskmode>
19076 [(match_operand:VF_128 1 "register_operand" "v")
19077 (match_operand:QI 2 "const_0_to_255_operand" "n")]
19078 UNSPEC_FPCLASS)
19079 (const_int 1)))]
19080 "TARGET_AVX512DQ"
19081 "vfpclass<ssescalarmodesuffix>\t{%2, %1, %0|%0, %1, %2}";
19082 [(set_attr "type" "sse")
19083 (set_attr "length_immediate" "1")
19084 (set_attr "prefix" "evex")
19085 (set_attr "mode" "<MODE>")])
19086
250533c0 19087(define_insn "<avx512>_getmant<mode><mask_name><round_saeonly_name>"
19088 [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v")
19089 (unspec:VF_AVX512VL
19090 [(match_operand:VF_AVX512VL 1 "nonimmediate_operand" "<round_saeonly_constraint>")
85065932 19091 (match_operand:SI 2 "const_0_to_15_operand")]
19092 UNSPEC_GETMANT))]
19093 "TARGET_AVX512F"
dbfe84d5 19094 "vgetmant<ssemodesuffix>\t{%2, <round_saeonly_mask_op3>%1, %0<mask_operand3>|%0<mask_operand3>, %1<round_saeonly_mask_op3>, %2}";
85065932 19095 [(set_attr "prefix" "evex")
19096 (set_attr "mode" "<MODE>")])
19097
250533c0 19098(define_insn "avx512f_vgetmant<mode><round_saeonly_name>"
85065932 19099 [(set (match_operand:VF_128 0 "register_operand" "=v")
19100 (vec_merge:VF_128
19101 (unspec:VF_128
19102 [(match_operand:VF_128 1 "register_operand" "v")
fbf4df62 19103 (match_operand:VF_128 2 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")
85065932 19104 (match_operand:SI 3 "const_0_to_15_operand")]
19105 UNSPEC_GETMANT)
19106 (match_dup 1)
19107 (const_int 1)))]
19108 "TARGET_AVX512F"
0b7cc9c6 19109 "vgetmant<ssescalarmodesuffix>\t{%3, <round_saeonly_op4>%2, %1, %0|%0, %1, %2<round_saeonly_op4>, %3}";
85065932 19110 [(set_attr "prefix" "evex")
19111 (set_attr "mode" "<ssescalarmode>")])
19112
d58134c2 19113;; The correct representation for this is absolutely enormous, and
19114;; surely not generally useful.
19115(define_insn "<mask_codefor>avx512bw_dbpsadbw<mode><mask_name>"
19116 [(set (match_operand:VI2_AVX512VL 0 "register_operand" "=v")
19117 (unspec:VI2_AVX512VL
19118 [(match_operand:<dbpsadbwmode> 1 "register_operand" "v")
19119 (match_operand:<dbpsadbwmode> 2 "nonimmediate_operand" "vm")
19120 (match_operand:SI 3 "const_0_to_255_operand")]
19121 UNSPEC_DBPSADBW))]
19122 "TARGET_AVX512BW"
19123 "vdbpsadbw\t{%3, %2, %1, %0<mask_operand4>|%0<mask_operand4>, %1, %2, %3}"
19124 [(set_attr "isa" "avx")
19125 (set_attr "type" "sselog1")
19126 (set_attr "length_immediate" "1")
19127 (set_attr "prefix" "evex")
19128 (set_attr "mode" "<sseinsnmode>")])
19129
5220cab6 19130(define_insn "clz<mode>2<mask_name>"
3d038641 19131 [(set (match_operand:VI48_AVX512VL 0 "register_operand" "=v")
19132 (clz:VI48_AVX512VL
19133 (match_operand:VI48_AVX512VL 1 "nonimmediate_operand" "vm")))]
d2ff59d6 19134 "TARGET_AVX512CD"
5220cab6 19135 "vplzcnt<ssemodesuffix>\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
d2ff59d6 19136 [(set_attr "type" "sse")
19137 (set_attr "prefix" "evex")
19138 (set_attr "mode" "<sseinsnmode>")])
19139
5220cab6 19140(define_insn "<mask_codefor>conflict<mode><mask_name>"
3d038641 19141 [(set (match_operand:VI48_AVX512VL 0 "register_operand" "=v")
19142 (unspec:VI48_AVX512VL
19143 [(match_operand:VI48_AVX512VL 1 "nonimmediate_operand" "vm")]
d2ff59d6 19144 UNSPEC_CONFLICT))]
19145 "TARGET_AVX512CD"
5220cab6 19146 "vpconflict<ssemodesuffix>\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
d2ff59d6 19147 [(set_attr "type" "sse")
19148 (set_attr "prefix" "evex")
19149 (set_attr "mode" "<sseinsnmode>")])
fc975a40 19150
19151(define_insn "sha1msg1"
19152 [(set (match_operand:V4SI 0 "register_operand" "=x")
19153 (unspec:V4SI
19154 [(match_operand:V4SI 1 "register_operand" "0")
19155 (match_operand:V4SI 2 "nonimmediate_operand" "xm")]
19156 UNSPEC_SHA1MSG1))]
19157 "TARGET_SHA"
19158 "sha1msg1\t{%2, %0|%0, %2}"
19159 [(set_attr "type" "sselog1")
19160 (set_attr "mode" "TI")])
19161
19162(define_insn "sha1msg2"
19163 [(set (match_operand:V4SI 0 "register_operand" "=x")
19164 (unspec:V4SI
19165 [(match_operand:V4SI 1 "register_operand" "0")
19166 (match_operand:V4SI 2 "nonimmediate_operand" "xm")]
19167 UNSPEC_SHA1MSG2))]
19168 "TARGET_SHA"
19169 "sha1msg2\t{%2, %0|%0, %2}"
19170 [(set_attr "type" "sselog1")
19171 (set_attr "mode" "TI")])
19172
19173(define_insn "sha1nexte"
19174 [(set (match_operand:V4SI 0 "register_operand" "=x")
19175 (unspec:V4SI
19176 [(match_operand:V4SI 1 "register_operand" "0")
19177 (match_operand:V4SI 2 "nonimmediate_operand" "xm")]
19178 UNSPEC_SHA1NEXTE))]
19179 "TARGET_SHA"
19180 "sha1nexte\t{%2, %0|%0, %2}"
19181 [(set_attr "type" "sselog1")
19182 (set_attr "mode" "TI")])
19183
19184(define_insn "sha1rnds4"
19185 [(set (match_operand:V4SI 0 "register_operand" "=x")
19186 (unspec:V4SI
19187 [(match_operand:V4SI 1 "register_operand" "0")
19188 (match_operand:V4SI 2 "nonimmediate_operand" "xm")
19189 (match_operand:SI 3 "const_0_to_3_operand" "n")]
19190 UNSPEC_SHA1RNDS4))]
19191 "TARGET_SHA"
19192 "sha1rnds4\t{%3, %2, %0|%0, %2, %3}"
19193 [(set_attr "type" "sselog1")
19194 (set_attr "length_immediate" "1")
19195 (set_attr "mode" "TI")])
19196
19197(define_insn "sha256msg1"
19198 [(set (match_operand:V4SI 0 "register_operand" "=x")
19199 (unspec:V4SI
19200 [(match_operand:V4SI 1 "register_operand" "0")
19201 (match_operand:V4SI 2 "nonimmediate_operand" "xm")]
19202 UNSPEC_SHA256MSG1))]
19203 "TARGET_SHA"
19204 "sha256msg1\t{%2, %0|%0, %2}"
19205 [(set_attr "type" "sselog1")
19206 (set_attr "mode" "TI")])
19207
19208(define_insn "sha256msg2"
19209 [(set (match_operand:V4SI 0 "register_operand" "=x")
19210 (unspec:V4SI
19211 [(match_operand:V4SI 1 "register_operand" "0")
19212 (match_operand:V4SI 2 "nonimmediate_operand" "xm")]
19213 UNSPEC_SHA256MSG2))]
19214 "TARGET_SHA"
19215 "sha256msg2\t{%2, %0|%0, %2}"
19216 [(set_attr "type" "sselog1")
19217 (set_attr "mode" "TI")])
19218
19219(define_insn "sha256rnds2"
19220 [(set (match_operand:V4SI 0 "register_operand" "=x")
19221 (unspec:V4SI
19222 [(match_operand:V4SI 1 "register_operand" "0")
19223 (match_operand:V4SI 2 "nonimmediate_operand" "xm")
19224 (match_operand:V4SI 3 "register_operand" "Yz")]
19225 UNSPEC_SHA256RNDS2))]
19226 "TARGET_SHA"
19227 "sha256rnds2\t{%3, %2, %0|%0, %2, %3}"
19228 [(set_attr "type" "sselog1")
19229 (set_attr "length_immediate" "1")
19230 (set_attr "mode" "TI")])
889d21f6 19231
19232(define_insn_and_split "avx512f_<castmode><avxsizesuffix>_<castmode>"
19233 [(set (match_operand:AVX512MODE2P 0 "nonimmediate_operand" "=x,m")
19234 (unspec:AVX512MODE2P
19235 [(match_operand:<ssequartermode> 1 "nonimmediate_operand" "xm,x")]
19236 UNSPEC_CAST))]
19237 "TARGET_AVX512F"
19238 "#"
19239 "&& reload_completed"
19240 [(const_int 0)]
19241{
19242 rtx op0 = operands[0];
19243 rtx op1 = operands[1];
19244 if (REG_P (op0))
19245 op0 = gen_rtx_REG (<ssequartermode>mode, REGNO (op0));
19246 else
19247 op1 = gen_rtx_REG (<MODE>mode, REGNO (op1));
19248 emit_move_insn (op0, op1);
19249 DONE;
19250})
19251
19252(define_insn_and_split "avx512f_<castmode><avxsizesuffix>_256<castmode>"
19253 [(set (match_operand:AVX512MODE2P 0 "nonimmediate_operand" "=x,m")
19254 (unspec:AVX512MODE2P
19255 [(match_operand:<ssehalfvecmode> 1 "nonimmediate_operand" "xm,x")]
19256 UNSPEC_CAST))]
19257 "TARGET_AVX512F"
19258 "#"
19259 "&& reload_completed"
19260 [(const_int 0)]
19261{
19262 rtx op0 = operands[0];
19263 rtx op1 = operands[1];
19264 if (REG_P (op0))
19265 op0 = gen_rtx_REG (<ssehalfvecmode>mode, REGNO (op0));
19266 else
19267 op1 = gen_rtx_REG (<MODE>mode, REGNO (op1));
19268 emit_move_insn (op0, op1);
19269 DONE;
19270})
8a12b665 19271
19272(define_int_iterator VPMADD52
19273 [UNSPEC_VPMADD52LUQ
19274 UNSPEC_VPMADD52HUQ])
19275
19276(define_int_attr vpmadd52type
19277 [(UNSPEC_VPMADD52LUQ "luq") (UNSPEC_VPMADD52HUQ "huq")])
19278
19279(define_expand "vpamdd52huq<mode>_maskz"
19280 [(match_operand:VI8_AVX512VL 0 "register_operand")
19281 (match_operand:VI8_AVX512VL 1 "register_operand")
19282 (match_operand:VI8_AVX512VL 2 "register_operand")
19283 (match_operand:VI8_AVX512VL 3 "nonimmediate_operand")
19284 (match_operand:<avx512fmaskmode> 4 "register_operand")]
19285 "TARGET_AVX512IFMA"
19286{
19287 emit_insn (gen_vpamdd52huq<mode>_maskz_1 (
19288 operands[0], operands[1], operands[2], operands[3],
19289 CONST0_RTX (<MODE>mode), operands[4]));
19290 DONE;
19291})
19292
19293(define_expand "vpamdd52luq<mode>_maskz"
19294 [(match_operand:VI8_AVX512VL 0 "register_operand")
19295 (match_operand:VI8_AVX512VL 1 "register_operand")
19296 (match_operand:VI8_AVX512VL 2 "register_operand")
19297 (match_operand:VI8_AVX512VL 3 "nonimmediate_operand")
19298 (match_operand:<avx512fmaskmode> 4 "register_operand")]
19299 "TARGET_AVX512IFMA"
19300{
19301 emit_insn (gen_vpamdd52luq<mode>_maskz_1 (
19302 operands[0], operands[1], operands[2], operands[3],
19303 CONST0_RTX (<MODE>mode), operands[4]));
19304 DONE;
19305})
19306
19307(define_insn "vpamdd52<vpmadd52type><mode><sd_maskz_name>"
19308 [(set (match_operand:VI8_AVX512VL 0 "register_operand" "=v")
19309 (unspec:VI8_AVX512VL
19310 [(match_operand:VI8_AVX512VL 1 "register_operand" "0")
19311 (match_operand:VI8_AVX512VL 2 "register_operand" "v")
19312 (match_operand:VI8_AVX512VL 3 "nonimmediate_operand" "vm")]
19313 VPMADD52))]
19314 "TARGET_AVX512IFMA"
19315 "vpmadd52<vpmadd52type>\t{%3, %2, %0<sd_mask_op4>|%0<sd_mask_op4>, %2, %3}"
19316 [(set_attr "type" "ssemuladd")
19317 (set_attr "prefix" "evex")
19318 (set_attr "mode" "<sseinsnmode>")])
19319
19320(define_insn "vpamdd52<vpmadd52type><mode>_mask"
19321 [(set (match_operand:VI8_AVX512VL 0 "register_operand" "=v")
19322 (vec_merge:VI8_AVX512VL
19323 (unspec:VI8_AVX512VL
19324 [(match_operand:VI8_AVX512VL 1 "register_operand" "0")
19325 (match_operand:VI8_AVX512VL 2 "register_operand" "v")
19326 (match_operand:VI8_AVX512VL 3 "nonimmediate_operand" "vm")]
19327 VPMADD52)
19328 (match_dup 1)
19329 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")))]
19330 "TARGET_AVX512IFMA"
19331 "vpmadd52<vpmadd52type>\t{%3, %2, %0%{%4%}|%0%{%4%}, %2, %3}"
19332 [(set_attr "type" "ssemuladd")
19333 (set_attr "prefix" "evex")
19334 (set_attr "mode" "<sseinsnmode>")])
19335
afee0628 19336(define_insn "vpmultishiftqb<mode><mask_name>"
19337 [(set (match_operand:VI1_AVX512VL 0 "register_operand" "=v")
19338 (unspec:VI1_AVX512VL
19339 [(match_operand:VI1_AVX512VL 1 "register_operand" "v")
19340 (match_operand:VI1_AVX512VL 2 "nonimmediate_operand" "vm")]
19341 UNSPEC_VPMULTISHIFT))]
19342 "TARGET_AVX512VBMI"
19343 "vpmultishiftqb\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
19344 [(set_attr "type" "sselog")
19345 (set_attr "prefix" "evex")
19346 (set_attr "mode" "<sseinsnmode>")])