]> git.ipfire.org Git - thirdparty/gcc.git/blob - gcc/config/i386/sse.md
Update copyright years.
[thirdparty/gcc.git] / gcc / config / i386 / sse.md
1 ;; GCC machine description for SSE instructions
2 ;; Copyright (C) 2005-2019 Free Software Foundation, Inc.
3 ;;
4 ;; This file is part of GCC.
5 ;;
6 ;; GCC is free software; you can redistribute it and/or modify
7 ;; it under the terms of the GNU General Public License as published by
8 ;; the Free Software Foundation; either version 3, or (at your option)
9 ;; any later version.
10 ;;
11 ;; GCC is distributed in the hope that it will be useful,
12 ;; but WITHOUT ANY WARRANTY; without even the implied warranty of
13 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 ;; GNU General Public License for more details.
15 ;;
16 ;; You should have received a copy of the GNU General Public License
17 ;; along with GCC; see the file COPYING3. If not see
18 ;; <http://www.gnu.org/licenses/>.
19
20 (define_c_enum "unspec" [
21 ;; SSE
22 UNSPEC_MOVNT
23
24 ;; SSE2
25 UNSPEC_MOVDI_TO_SSE
26
27 ;; SSE3
28 UNSPEC_LDDQU
29
30 ;; SSSE3
31 UNSPEC_PSHUFB
32 UNSPEC_PSIGN
33 UNSPEC_PALIGNR
34
35 ;; For SSE4A support
36 UNSPEC_EXTRQI
37 UNSPEC_EXTRQ
38 UNSPEC_INSERTQI
39 UNSPEC_INSERTQ
40
41 ;; For SSE4.1 support
42 UNSPEC_BLENDV
43 UNSPEC_INSERTPS
44 UNSPEC_DP
45 UNSPEC_MOVNTDQA
46 UNSPEC_MPSADBW
47 UNSPEC_PHMINPOSUW
48 UNSPEC_PTEST
49
50 ;; For SSE4.2 support
51 UNSPEC_PCMPESTR
52 UNSPEC_PCMPISTR
53
54 ;; For FMA4 support
55 UNSPEC_FMADDSUB
56 UNSPEC_XOP_UNSIGNED_CMP
57 UNSPEC_XOP_TRUEFALSE
58 UNSPEC_XOP_PERMUTE
59 UNSPEC_FRCZ
60
61 ;; For AES support
62 UNSPEC_AESENC
63 UNSPEC_AESENCLAST
64 UNSPEC_AESDEC
65 UNSPEC_AESDECLAST
66 UNSPEC_AESIMC
67 UNSPEC_AESKEYGENASSIST
68
69 ;; For PCLMUL support
70 UNSPEC_PCLMUL
71
72 ;; For AVX support
73 UNSPEC_PCMP
74 UNSPEC_VPERMIL
75 UNSPEC_VPERMIL2
76 UNSPEC_VPERMIL2F128
77 UNSPEC_CAST
78 UNSPEC_VTESTP
79 UNSPEC_VCVTPH2PS
80 UNSPEC_VCVTPS2PH
81
82 ;; For AVX2 support
83 UNSPEC_VPERMVAR
84 UNSPEC_VPERMTI
85 UNSPEC_GATHER
86 UNSPEC_VSIBADDR
87
88 ;; For AVX512F support
89 UNSPEC_VPERMT2
90 UNSPEC_UNSIGNED_FIX_NOTRUNC
91 UNSPEC_UNSIGNED_PCMP
92 UNSPEC_TESTM
93 UNSPEC_TESTNM
94 UNSPEC_SCATTER
95 UNSPEC_RCP14
96 UNSPEC_RSQRT14
97 UNSPEC_FIXUPIMM
98 UNSPEC_SFIXUPIMM
99 UNSPEC_SCALEF
100 UNSPEC_VTERNLOG
101 UNSPEC_GETEXP
102 UNSPEC_GETMANT
103 UNSPEC_ALIGN
104 UNSPEC_CONFLICT
105 UNSPEC_COMPRESS
106 UNSPEC_COMPRESS_STORE
107 UNSPEC_EXPAND
108 UNSPEC_MASKED_EQ
109 UNSPEC_MASKED_GT
110
111 ;; Mask operations
112 UNSPEC_MASKOP
113 UNSPEC_KORTEST
114 UNSPEC_KTEST
115
116 ;; For embed. rounding feature
117 UNSPEC_EMBEDDED_ROUNDING
118
119 ;; For AVX512PF support
120 UNSPEC_GATHER_PREFETCH
121 UNSPEC_SCATTER_PREFETCH
122
123 ;; For AVX512ER support
124 UNSPEC_EXP2
125 UNSPEC_RCP28
126 UNSPEC_RSQRT28
127
128 ;; For SHA support
129 UNSPEC_SHA1MSG1
130 UNSPEC_SHA1MSG2
131 UNSPEC_SHA1NEXTE
132 UNSPEC_SHA1RNDS4
133 UNSPEC_SHA256MSG1
134 UNSPEC_SHA256MSG2
135 UNSPEC_SHA256RNDS2
136
137 ;; For AVX512BW support
138 UNSPEC_DBPSADBW
139 UNSPEC_PMADDUBSW512
140 UNSPEC_PMADDWD512
141 UNSPEC_PSHUFHW
142 UNSPEC_PSHUFLW
143 UNSPEC_CVTINT2MASK
144
145 ;; For AVX512DQ support
146 UNSPEC_REDUCE
147 UNSPEC_FPCLASS
148 UNSPEC_RANGE
149
150 ;; For AVX512IFMA support
151 UNSPEC_VPMADD52LUQ
152 UNSPEC_VPMADD52HUQ
153
154 ;; For AVX512VBMI support
155 UNSPEC_VPMULTISHIFT
156
157 ;; For AVX5124FMAPS/AVX5124VNNIW support
158 UNSPEC_VP4FMADD
159 UNSPEC_VP4FNMADD
160 UNSPEC_VP4DPWSSD
161 UNSPEC_VP4DPWSSDS
162
163 ;; For GFNI support
164 UNSPEC_GF2P8AFFINEINV
165 UNSPEC_GF2P8AFFINE
166 UNSPEC_GF2P8MUL
167
168 ;; For AVX512VBMI2 support
169 UNSPEC_VPSHLD
170 UNSPEC_VPSHRD
171 UNSPEC_VPSHRDV
172 UNSPEC_VPSHLDV
173
174 ;; For AVX512VNNI support
175 UNSPEC_VPMADDUBSWACCD
176 UNSPEC_VPMADDUBSWACCSSD
177 UNSPEC_VPMADDWDACCD
178 UNSPEC_VPMADDWDACCSSD
179
180 ;; For VAES support
181 UNSPEC_VAESDEC
182 UNSPEC_VAESDECLAST
183 UNSPEC_VAESENC
184 UNSPEC_VAESENCLAST
185
186 ;; For VPCLMULQDQ support
187 UNSPEC_VPCLMULQDQ
188
189 ;; For AVX512BITALG support
190 UNSPEC_VPSHUFBIT
191 ])
192
193 (define_c_enum "unspecv" [
194 UNSPECV_LDMXCSR
195 UNSPECV_STMXCSR
196 UNSPECV_CLFLUSH
197 UNSPECV_MONITOR
198 UNSPECV_MWAIT
199 UNSPECV_VZEROALL
200 UNSPECV_VZEROUPPER
201 ])
202
203 ;; All vector modes including V?TImode, used in move patterns.
204 (define_mode_iterator VMOVE
205 [(V64QI "TARGET_AVX512F") (V32QI "TARGET_AVX") V16QI
206 (V32HI "TARGET_AVX512F") (V16HI "TARGET_AVX") V8HI
207 (V16SI "TARGET_AVX512F") (V8SI "TARGET_AVX") V4SI
208 (V8DI "TARGET_AVX512F") (V4DI "TARGET_AVX") V2DI
209 (V4TI "TARGET_AVX512F") (V2TI "TARGET_AVX") V1TI
210 (V16SF "TARGET_AVX512F") (V8SF "TARGET_AVX") V4SF
211 (V8DF "TARGET_AVX512F") (V4DF "TARGET_AVX") V2DF])
212
213 ;; All AVX-512{F,VL} vector modes. Supposed TARGET_AVX512F baseline.
214 (define_mode_iterator V48_AVX512VL
215 [V16SI (V8SI "TARGET_AVX512VL") (V4SI "TARGET_AVX512VL")
216 V8DI (V4DI "TARGET_AVX512VL") (V2DI "TARGET_AVX512VL")
217 V16SF (V8SF "TARGET_AVX512VL") (V4SF "TARGET_AVX512VL")
218 V8DF (V4DF "TARGET_AVX512VL") (V2DF "TARGET_AVX512VL")])
219
220 ;; 1,2 byte AVX-512{BW,VL} vector modes. Supposed TARGET_AVX512BW baseline.
221 (define_mode_iterator VI12_AVX512VL
222 [V64QI (V16QI "TARGET_AVX512VL") (V32QI "TARGET_AVX512VL")
223 V32HI (V16HI "TARGET_AVX512VL") (V8HI "TARGET_AVX512VL")])
224
225 ;; Same iterator, but without supposed TARGET_AVX512BW
226 (define_mode_iterator VI12_AVX512VLBW
227 [(V64QI "TARGET_AVX512BW") (V16QI "TARGET_AVX512VL")
228 (V32QI "TARGET_AVX512VL && TARGET_AVX512BW") (V32HI "TARGET_AVX512BW")
229 (V16HI "TARGET_AVX512VL") (V8HI "TARGET_AVX512VL")])
230
231 (define_mode_iterator VI1_AVX512VL
232 [V64QI (V16QI "TARGET_AVX512VL") (V32QI "TARGET_AVX512VL")])
233
234 ;; All vector modes
235 (define_mode_iterator V
236 [(V64QI "TARGET_AVX512F") (V32QI "TARGET_AVX") V16QI
237 (V32HI "TARGET_AVX512F") (V16HI "TARGET_AVX") V8HI
238 (V16SI "TARGET_AVX512F") (V8SI "TARGET_AVX") V4SI
239 (V8DI "TARGET_AVX512F") (V4DI "TARGET_AVX") V2DI
240 (V16SF "TARGET_AVX512F") (V8SF "TARGET_AVX") V4SF
241 (V8DF "TARGET_AVX512F") (V4DF "TARGET_AVX") (V2DF "TARGET_SSE2")])
242
243 ;; All 128bit vector modes
244 (define_mode_iterator V_128
245 [V16QI V8HI V4SI V2DI V4SF (V2DF "TARGET_SSE2")])
246
247 ;; All 256bit vector modes
248 (define_mode_iterator V_256
249 [V32QI V16HI V8SI V4DI V8SF V4DF])
250
251 ;; All 128bit and 256bit vector modes
252 (define_mode_iterator V_128_256
253 [V32QI V16QI V16HI V8HI V8SI V4SI V4DI V2DI V8SF V4SF V4DF V2DF])
254
255 ;; All 512bit vector modes
256 (define_mode_iterator V_512 [V64QI V32HI V16SI V8DI V16SF V8DF])
257
258 ;; All 256bit and 512bit vector modes
259 (define_mode_iterator V_256_512
260 [V32QI V16HI V8SI V4DI V8SF V4DF
261 (V64QI "TARGET_AVX512F") (V32HI "TARGET_AVX512F") (V16SI "TARGET_AVX512F")
262 (V8DI "TARGET_AVX512F") (V16SF "TARGET_AVX512F") (V8DF "TARGET_AVX512F")])
263
264 ;; All vector float modes
265 (define_mode_iterator VF
266 [(V16SF "TARGET_AVX512F") (V8SF "TARGET_AVX") V4SF
267 (V8DF "TARGET_AVX512F") (V4DF "TARGET_AVX") (V2DF "TARGET_SSE2")])
268
269 ;; 128- and 256-bit float vector modes
270 (define_mode_iterator VF_128_256
271 [(V8SF "TARGET_AVX") V4SF
272 (V4DF "TARGET_AVX") (V2DF "TARGET_SSE2")])
273
274 ;; All SFmode vector float modes
275 (define_mode_iterator VF1
276 [(V16SF "TARGET_AVX512F") (V8SF "TARGET_AVX") V4SF])
277
278 ;; 128- and 256-bit SF vector modes
279 (define_mode_iterator VF1_128_256
280 [(V8SF "TARGET_AVX") V4SF])
281
282 (define_mode_iterator VF1_128_256VL
283 [V8SF (V4SF "TARGET_AVX512VL")])
284
285 ;; All DFmode vector float modes
286 (define_mode_iterator VF2
287 [(V8DF "TARGET_AVX512F") (V4DF "TARGET_AVX") V2DF])
288
289 ;; 128- and 256-bit DF vector modes
290 (define_mode_iterator VF2_128_256
291 [(V4DF "TARGET_AVX") V2DF])
292
293 (define_mode_iterator VF2_512_256
294 [(V8DF "TARGET_AVX512F") V4DF])
295
296 (define_mode_iterator VF2_512_256VL
297 [V8DF (V4DF "TARGET_AVX512VL")])
298
299 ;; All 128bit vector float modes
300 (define_mode_iterator VF_128
301 [V4SF (V2DF "TARGET_SSE2")])
302
303 ;; All 256bit vector float modes
304 (define_mode_iterator VF_256
305 [V8SF V4DF])
306
307 ;; All 512bit vector float modes
308 (define_mode_iterator VF_512
309 [V16SF V8DF])
310
311 (define_mode_iterator VI48_AVX512VL
312 [V16SI (V8SI "TARGET_AVX512VL") (V4SI "TARGET_AVX512VL")
313 V8DI (V4DI "TARGET_AVX512VL") (V2DI "TARGET_AVX512VL")])
314
315 (define_mode_iterator VF_AVX512VL
316 [V16SF (V8SF "TARGET_AVX512VL") (V4SF "TARGET_AVX512VL")
317 V8DF (V4DF "TARGET_AVX512VL") (V2DF "TARGET_AVX512VL")])
318
319 (define_mode_iterator VF2_AVX512VL
320 [V8DF (V4DF "TARGET_AVX512VL") (V2DF "TARGET_AVX512VL")])
321
322 (define_mode_iterator VF1_AVX512VL
323 [V16SF (V8SF "TARGET_AVX512VL") (V4SF "TARGET_AVX512VL")])
324
325 ;; All vector integer modes
326 (define_mode_iterator VI
327 [(V16SI "TARGET_AVX512F") (V8DI "TARGET_AVX512F")
328 (V64QI "TARGET_AVX512BW") (V32QI "TARGET_AVX") V16QI
329 (V32HI "TARGET_AVX512BW") (V16HI "TARGET_AVX") V8HI
330 (V8SI "TARGET_AVX") V4SI
331 (V4DI "TARGET_AVX") V2DI])
332
333 (define_mode_iterator VI_AVX2
334 [(V64QI "TARGET_AVX512BW") (V32QI "TARGET_AVX2") V16QI
335 (V32HI "TARGET_AVX512BW") (V16HI "TARGET_AVX2") V8HI
336 (V16SI "TARGET_AVX512F") (V8SI "TARGET_AVX2") V4SI
337 (V8DI "TARGET_AVX512F") (V4DI "TARGET_AVX2") V2DI])
338
339 ;; All QImode vector integer modes
340 (define_mode_iterator VI1
341 [(V32QI "TARGET_AVX") V16QI])
342
343 ;; All DImode vector integer modes
344 (define_mode_iterator V_AVX
345 [V16QI V8HI V4SI V2DI V4SF V2DF
346 (V32QI "TARGET_AVX") (V16HI "TARGET_AVX")
347 (V8SI "TARGET_AVX") (V4DI "TARGET_AVX")
348 (V8SF "TARGET_AVX") (V4DF"TARGET_AVX")])
349
350 (define_mode_iterator VI48_AVX
351 [V4SI V2DI
352 (V8SI "TARGET_AVX") (V4DI "TARGET_AVX")])
353
354 (define_mode_iterator VI8
355 [(V8DI "TARGET_AVX512F") (V4DI "TARGET_AVX") V2DI])
356
357 (define_mode_iterator VI8_FVL
358 [(V8DI "TARGET_AVX512F") V4DI (V2DI "TARGET_AVX512VL")])
359
360 (define_mode_iterator VI8_AVX512VL
361 [V8DI (V4DI "TARGET_AVX512VL") (V2DI "TARGET_AVX512VL")])
362
363 (define_mode_iterator VI8_256_512
364 [V8DI (V4DI "TARGET_AVX512VL")])
365
366 (define_mode_iterator VI1_AVX2
367 [(V32QI "TARGET_AVX2") V16QI])
368
369 (define_mode_iterator VI1_AVX512
370 [(V64QI "TARGET_AVX512BW") (V32QI "TARGET_AVX2") V16QI])
371
372 (define_mode_iterator VI1_AVX512F
373 [(V64QI "TARGET_AVX512F") (V32QI "TARGET_AVX") V16QI])
374
375 (define_mode_iterator VI2_AVX2
376 [(V32HI "TARGET_AVX512BW") (V16HI "TARGET_AVX2") V8HI])
377
378 (define_mode_iterator VI2_AVX512F
379 [(V32HI "TARGET_AVX512F") (V16HI "TARGET_AVX2") V8HI])
380
381 (define_mode_iterator VI4_AVX
382 [(V8SI "TARGET_AVX") V4SI])
383
384 (define_mode_iterator VI4_AVX2
385 [(V8SI "TARGET_AVX2") V4SI])
386
387 (define_mode_iterator VI4_AVX512F
388 [(V16SI "TARGET_AVX512F") (V8SI "TARGET_AVX2") V4SI])
389
390 (define_mode_iterator VI4_AVX512VL
391 [V16SI (V8SI "TARGET_AVX512VL") (V4SI "TARGET_AVX512VL")])
392
393 (define_mode_iterator VI48_AVX512F_AVX512VL
394 [V4SI V8SI (V16SI "TARGET_AVX512F")
395 (V2DI "TARGET_AVX512VL") (V4DI "TARGET_AVX512VL") (V8DI "TARGET_AVX512F")])
396
397 (define_mode_iterator VI2_AVX512VL
398 [(V8HI "TARGET_AVX512VL") (V16HI "TARGET_AVX512VL") V32HI])
399
400 (define_mode_iterator VI1_AVX512VL_F
401 [V32QI (V16QI "TARGET_AVX512VL") (V64QI "TARGET_AVX512F")])
402
403 (define_mode_iterator VI8_AVX2_AVX512BW
404 [(V8DI "TARGET_AVX512BW") (V4DI "TARGET_AVX2") V2DI])
405
406 (define_mode_iterator VI8_AVX2
407 [(V4DI "TARGET_AVX2") V2DI])
408
409 (define_mode_iterator VI8_AVX2_AVX512F
410 [(V8DI "TARGET_AVX512F") (V4DI "TARGET_AVX2") V2DI])
411
412 (define_mode_iterator VI8_AVX_AVX512F
413 [(V8DI "TARGET_AVX512F") (V4DI "TARGET_AVX")])
414
415 (define_mode_iterator VI4_128_8_256
416 [V4SI V4DI])
417
418 ;; All V8D* modes
419 (define_mode_iterator V8FI
420 [V8DF V8DI])
421
422 ;; All V16S* modes
423 (define_mode_iterator V16FI
424 [V16SF V16SI])
425
426 ;; ??? We should probably use TImode instead.
427 (define_mode_iterator VIMAX_AVX2_AVX512BW
428 [(V4TI "TARGET_AVX512BW") (V2TI "TARGET_AVX2") V1TI])
429
430 ;; Suppose TARGET_AVX512BW as baseline
431 (define_mode_iterator VIMAX_AVX512VL
432 [V4TI (V2TI "TARGET_AVX512VL") (V1TI "TARGET_AVX512VL")])
433
434 (define_mode_iterator VIMAX_AVX2
435 [(V2TI "TARGET_AVX2") V1TI])
436
437 ;; ??? This should probably be dropped in favor of VIMAX_AVX2_AVX512BW.
438 (define_mode_iterator SSESCALARMODE
439 [(V4TI "TARGET_AVX512BW") (V2TI "TARGET_AVX2") TI])
440
441 (define_mode_iterator VI12_AVX2
442 [(V64QI "TARGET_AVX512BW") (V32QI "TARGET_AVX2") V16QI
443 (V32HI "TARGET_AVX512BW") (V16HI "TARGET_AVX2") V8HI])
444
445 (define_mode_iterator VI24_AVX2
446 [(V16HI "TARGET_AVX2") V8HI
447 (V8SI "TARGET_AVX2") V4SI])
448
449 (define_mode_iterator VI124_AVX2_24_AVX512F_1_AVX512BW
450 [(V64QI "TARGET_AVX512BW") (V32QI "TARGET_AVX2") V16QI
451 (V32HI "TARGET_AVX512F") (V16HI "TARGET_AVX2") V8HI
452 (V16SI "TARGET_AVX512F") (V8SI "TARGET_AVX2") V4SI])
453
454 (define_mode_iterator VI124_AVX2
455 [(V32QI "TARGET_AVX2") V16QI
456 (V16HI "TARGET_AVX2") V8HI
457 (V8SI "TARGET_AVX2") V4SI])
458
459 (define_mode_iterator VI2_AVX2_AVX512BW
460 [(V32HI "TARGET_AVX512BW") (V16HI "TARGET_AVX2") V8HI])
461
462 (define_mode_iterator VI248_AVX512VL
463 [V32HI V16SI V8DI
464 (V16HI "TARGET_AVX512VL") (V8SI "TARGET_AVX512VL")
465 (V4DI "TARGET_AVX512VL") (V8HI "TARGET_AVX512VL")
466 (V4SI "TARGET_AVX512VL") (V2DI "TARGET_AVX512VL")])
467
468 (define_mode_iterator VI48_AVX2
469 [(V8SI "TARGET_AVX2") V4SI
470 (V4DI "TARGET_AVX2") V2DI])
471
472 (define_mode_iterator VI248_AVX2
473 [(V16HI "TARGET_AVX2") V8HI
474 (V8SI "TARGET_AVX2") V4SI
475 (V4DI "TARGET_AVX2") V2DI])
476
477 (define_mode_iterator VI248_AVX2_8_AVX512F_24_AVX512BW
478 [(V32HI "TARGET_AVX512BW") (V16HI "TARGET_AVX2") V8HI
479 (V16SI "TARGET_AVX512BW") (V8SI "TARGET_AVX2") V4SI
480 (V8DI "TARGET_AVX512F") (V4DI "TARGET_AVX2") V2DI])
481
482 (define_mode_iterator VI248_AVX512BW
483 [(V32HI "TARGET_AVX512BW") V16SI V8DI])
484
485 (define_mode_iterator VI248_AVX512BW_AVX512VL
486 [(V32HI "TARGET_AVX512BW")
487 (V4DI "TARGET_AVX512VL") V16SI V8DI])
488
489 ;; Suppose TARGET_AVX512VL as baseline
490 (define_mode_iterator VI248_AVX512BW_1
491 [(V16HI "TARGET_AVX512BW") (V8HI "TARGET_AVX512BW")
492 V8SI V4SI
493 V2DI])
494
495 (define_mode_iterator VI248_AVX512BW_2
496 [(V16HI "TARGET_AVX512BW") (V8HI "TARGET_AVX512BW")
497 V8SI V4SI
498 V4DI V2DI])
499
500 (define_mode_iterator VI48_AVX512F
501 [(V16SI "TARGET_AVX512F") V8SI V4SI
502 (V8DI "TARGET_AVX512F") V4DI V2DI])
503
504 (define_mode_iterator VI48_AVX_AVX512F
505 [(V16SI "TARGET_AVX512F") (V8SI "TARGET_AVX") V4SI
506 (V8DI "TARGET_AVX512F") (V4DI "TARGET_AVX") V2DI])
507
508 (define_mode_iterator VI12_AVX_AVX512F
509 [ (V64QI "TARGET_AVX512F") (V32QI "TARGET_AVX") V16QI
510 (V32HI "TARGET_AVX512F") (V16HI "TARGET_AVX") V8HI])
511
512 (define_mode_iterator V48_AVX2
513 [V4SF V2DF
514 V8SF V4DF
515 (V4SI "TARGET_AVX2") (V2DI "TARGET_AVX2")
516 (V8SI "TARGET_AVX2") (V4DI "TARGET_AVX2")])
517
518 (define_mode_iterator VI1_AVX512VLBW
519 [(V64QI "TARGET_AVX512BW") (V32QI "TARGET_AVX512VL")
520 (V16QI "TARGET_AVX512VL")])
521
522 (define_mode_attr avx512
523 [(V16QI "avx512vl") (V32QI "avx512vl") (V64QI "avx512bw")
524 (V8HI "avx512vl") (V16HI "avx512vl") (V32HI "avx512bw")
525 (V4SI "avx512vl") (V8SI "avx512vl") (V16SI "avx512f")
526 (V2DI "avx512vl") (V4DI "avx512vl") (V8DI "avx512f")
527 (V4SF "avx512vl") (V8SF "avx512vl") (V16SF "avx512f")
528 (V2DF "avx512vl") (V4DF "avx512vl") (V8DF "avx512f")])
529
530 (define_mode_attr sse2_avx_avx512f
531 [(V16QI "sse2") (V32QI "avx") (V64QI "avx512f")
532 (V8HI "avx512vl") (V16HI "avx512vl") (V32HI "avx512bw")
533 (V4SI "sse2") (V8SI "avx") (V16SI "avx512f")
534 (V2DI "avx512vl") (V4DI "avx512vl") (V8DI "avx512f")
535 (V16SF "avx512f") (V8SF "avx") (V4SF "avx")
536 (V8DF "avx512f") (V4DF "avx") (V2DF "avx")])
537
538 (define_mode_attr sse2_avx2
539 [(V16QI "sse2") (V32QI "avx2") (V64QI "avx512bw")
540 (V8HI "sse2") (V16HI "avx2") (V32HI "avx512bw")
541 (V4SI "sse2") (V8SI "avx2") (V16SI "avx512f")
542 (V2DI "sse2") (V4DI "avx2") (V8DI "avx512f")
543 (V1TI "sse2") (V2TI "avx2") (V4TI "avx512bw")])
544
545 (define_mode_attr ssse3_avx2
546 [(V16QI "ssse3") (V32QI "avx2") (V64QI "avx512bw")
547 (V4HI "ssse3") (V8HI "ssse3") (V16HI "avx2") (V32HI "avx512bw")
548 (V4SI "ssse3") (V8SI "avx2")
549 (V2DI "ssse3") (V4DI "avx2")
550 (TI "ssse3") (V2TI "avx2") (V4TI "avx512bw")])
551
552 (define_mode_attr sse4_1_avx2
553 [(V16QI "sse4_1") (V32QI "avx2") (V64QI "avx512bw")
554 (V8HI "sse4_1") (V16HI "avx2") (V32HI "avx512bw")
555 (V4SI "sse4_1") (V8SI "avx2") (V16SI "avx512f")
556 (V2DI "sse4_1") (V4DI "avx2") (V8DI "avx512dq")])
557
558 (define_mode_attr avx_avx2
559 [(V4SF "avx") (V2DF "avx")
560 (V8SF "avx") (V4DF "avx")
561 (V4SI "avx2") (V2DI "avx2")
562 (V8SI "avx2") (V4DI "avx2")])
563
564 (define_mode_attr vec_avx2
565 [(V16QI "vec") (V32QI "avx2")
566 (V8HI "vec") (V16HI "avx2")
567 (V4SI "vec") (V8SI "avx2")
568 (V2DI "vec") (V4DI "avx2")])
569
570 (define_mode_attr avx2_avx512
571 [(V4SI "avx2") (V8SI "avx2") (V16SI "avx512f")
572 (V2DI "avx2") (V4DI "avx2") (V8DI "avx512f")
573 (V4SF "avx2") (V8SF "avx2") (V16SF "avx512f")
574 (V2DF "avx2") (V4DF "avx2") (V8DF "avx512f")
575 (V8HI "avx512vl") (V16HI "avx512vl") (V32HI "avx512bw")])
576
577 (define_mode_attr shuffletype
578 [(V16SF "f") (V16SI "i") (V8DF "f") (V8DI "i")
579 (V8SF "f") (V8SI "i") (V4DF "f") (V4DI "i")
580 (V4SF "f") (V4SI "i") (V2DF "f") (V2DI "i")
581 (V32HI "i") (V16HI "i") (V8HI "i")
582 (V64QI "i") (V32QI "i") (V16QI "i")
583 (V4TI "i") (V2TI "i") (V1TI "i")])
584
585 (define_mode_attr ssequartermode
586 [(V16SF "V4SF") (V8DF "V2DF") (V16SI "V4SI") (V8DI "V2DI")])
587
588 (define_mode_attr ssequarterinsnmode
589 [(V16SF "V4SF") (V8DF "V2DF") (V16SI "TI") (V8DI "TI")])
590
591 (define_mode_attr ssedoublemodelower
592 [(V16QI "v16hi") (V32QI "v32hi") (V64QI "v64hi")
593 (V8HI "v8si") (V16HI "v16si") (V32HI "v32si")
594 (V4SI "v4di") (V8SI "v8di") (V16SI "v16di")])
595
596 (define_mode_attr ssedoublemode
597 [(V4SF "V8SF") (V8SF "V16SF") (V16SF "V32SF")
598 (V2DF "V4DF") (V4DF "V8DF") (V8DF "V16DF")
599 (V16QI "V16HI") (V32QI "V32HI") (V64QI "V64HI")
600 (V4HI "V4SI") (V8HI "V8SI") (V16HI "V16SI") (V32HI "V32SI")
601 (V4SI "V4DI") (V8SI "V16SI") (V16SI "V32SI")
602 (V4DI "V8DI") (V8DI "V16DI")])
603
604 (define_mode_attr ssebytemode
605 [(V8DI "V64QI") (V4DI "V32QI") (V2DI "V16QI")
606 (V16SI "V64QI") (V8SI "V32QI") (V4SI "V16QI")])
607
608 ;; All 128bit vector integer modes
609 (define_mode_iterator VI_128 [V16QI V8HI V4SI V2DI])
610
611 ;; All 256bit vector integer modes
612 (define_mode_iterator VI_256 [V32QI V16HI V8SI V4DI])
613
614 ;; Various 128bit vector integer mode combinations
615 (define_mode_iterator VI12_128 [V16QI V8HI])
616 (define_mode_iterator VI14_128 [V16QI V4SI])
617 (define_mode_iterator VI124_128 [V16QI V8HI V4SI])
618 (define_mode_iterator VI24_128 [V8HI V4SI])
619 (define_mode_iterator VI248_128 [V8HI V4SI V2DI])
620 (define_mode_iterator VI48_128 [V4SI V2DI])
621
622 ;; Various 256bit and 512 vector integer mode combinations
623 (define_mode_iterator VI124_256 [V32QI V16HI V8SI])
624 (define_mode_iterator VI124_256_AVX512F_AVX512BW
625 [V32QI V16HI V8SI
626 (V64QI "TARGET_AVX512BW")
627 (V32HI "TARGET_AVX512BW")
628 (V16SI "TARGET_AVX512F")])
629 (define_mode_iterator VI48_256 [V8SI V4DI])
630 (define_mode_iterator VI48_512 [V16SI V8DI])
631 (define_mode_iterator VI4_256_8_512 [V8SI V8DI])
632 (define_mode_iterator VI_AVX512BW
633 [V16SI V8DI (V32HI "TARGET_AVX512BW") (V64QI "TARGET_AVX512BW")])
634
635 ;; Int-float size matches
636 (define_mode_iterator VI4F_128 [V4SI V4SF])
637 (define_mode_iterator VI8F_128 [V2DI V2DF])
638 (define_mode_iterator VI4F_256 [V8SI V8SF])
639 (define_mode_iterator VI8F_256 [V4DI V4DF])
640 (define_mode_iterator VI4F_256_512
641 [V8SI V8SF
642 (V16SI "TARGET_AVX512F") (V16SF "TARGET_AVX512F")])
643 (define_mode_iterator VI48F_256_512
644 [V8SI V8SF
645 (V16SI "TARGET_AVX512F") (V16SF "TARGET_AVX512F")
646 (V8DI "TARGET_AVX512F") (V8DF "TARGET_AVX512F")
647 (V4DI "TARGET_AVX512VL") (V4DF "TARGET_AVX512VL")])
648 (define_mode_iterator VF48_I1248
649 [V16SI V16SF V8DI V8DF V32HI V64QI])
650 (define_mode_iterator VI48F
651 [V16SI V16SF V8DI V8DF
652 (V8SI "TARGET_AVX512VL") (V8SF "TARGET_AVX512VL")
653 (V4DI "TARGET_AVX512VL") (V4DF "TARGET_AVX512VL")
654 (V4SI "TARGET_AVX512VL") (V4SF "TARGET_AVX512VL")
655 (V2DI "TARGET_AVX512VL") (V2DF "TARGET_AVX512VL")])
656 (define_mode_iterator VI48F_256 [V8SI V8SF V4DI V4DF])
657
658 (define_mode_iterator VF_AVX512
659 [(V4SF "TARGET_AVX512VL") (V2DF "TARGET_AVX512VL")
660 (V8SF "TARGET_AVX512VL") (V4DF "TARGET_AVX512VL")
661 V16SF V8DF])
662
663 (define_mode_attr avx512bcst
664 [(V4SI "%{1to4%}") (V2DI "%{1to2%}")
665 (V8SI "%{1to8%}") (V4DI "%{1to4%}")
666 (V16SI "%{1to16%}") (V8DI "%{1to8%}")
667 (V4SF "%{1to4%}") (V2DF "%{1to2%}")
668 (V8SF "%{1to8%}") (V4DF "%{1to4%}")
669 (V16SF "%{1to16%}") (V8DF "%{1to8%}")])
670
671 ;; Mapping from float mode to required SSE level
672 (define_mode_attr sse
673 [(SF "sse") (DF "sse2")
674 (V4SF "sse") (V2DF "sse2")
675 (V16SF "avx512f") (V8SF "avx")
676 (V8DF "avx512f") (V4DF "avx")])
677
678 (define_mode_attr sse2
679 [(V16QI "sse2") (V32QI "avx") (V64QI "avx512f")
680 (V2DI "sse2") (V4DI "avx") (V8DI "avx512f")])
681
682 (define_mode_attr sse3
683 [(V16QI "sse3") (V32QI "avx")])
684
685 (define_mode_attr sse4_1
686 [(V4SF "sse4_1") (V2DF "sse4_1")
687 (V8SF "avx") (V4DF "avx")
688 (V8DF "avx512f")
689 (V4DI "avx") (V2DI "sse4_1")
690 (V8SI "avx") (V4SI "sse4_1")
691 (V16QI "sse4_1") (V32QI "avx")
692 (V8HI "sse4_1") (V16HI "avx")])
693
694 (define_mode_attr avxsizesuffix
695 [(V64QI "512") (V32HI "512") (V16SI "512") (V8DI "512")
696 (V32QI "256") (V16HI "256") (V8SI "256") (V4DI "256")
697 (V16QI "") (V8HI "") (V4SI "") (V2DI "")
698 (V16SF "512") (V8DF "512")
699 (V8SF "256") (V4DF "256")
700 (V4SF "") (V2DF "")])
701
702 ;; SSE instruction mode
703 (define_mode_attr sseinsnmode
704 [(V64QI "XI") (V32HI "XI") (V16SI "XI") (V8DI "XI") (V4TI "XI")
705 (V32QI "OI") (V16HI "OI") (V8SI "OI") (V4DI "OI") (V2TI "OI")
706 (V16QI "TI") (V8HI "TI") (V4SI "TI") (V2DI "TI") (V1TI "TI")
707 (V16SF "V16SF") (V8DF "V8DF")
708 (V8SF "V8SF") (V4DF "V4DF")
709 (V4SF "V4SF") (V2DF "V2DF")
710 (TI "TI")])
711
712 ;; Mapping of vector modes to corresponding mask size
713 (define_mode_attr avx512fmaskmode
714 [(V64QI "DI") (V32QI "SI") (V16QI "HI")
715 (V32HI "SI") (V16HI "HI") (V8HI "QI") (V4HI "QI")
716 (V16SI "HI") (V8SI "QI") (V4SI "QI")
717 (V8DI "QI") (V4DI "QI") (V2DI "QI")
718 (V16SF "HI") (V8SF "QI") (V4SF "QI")
719 (V8DF "QI") (V4DF "QI") (V2DF "QI")])
720
721 ;; Mapping of vector modes to corresponding mask size
722 (define_mode_attr avx512fmaskmodelower
723 [(V64QI "di") (V32QI "si") (V16QI "hi")
724 (V32HI "si") (V16HI "hi") (V8HI "qi") (V4HI "qi")
725 (V16SI "hi") (V8SI "qi") (V4SI "qi")
726 (V8DI "qi") (V4DI "qi") (V2DI "qi")
727 (V16SF "hi") (V8SF "qi") (V4SF "qi")
728 (V8DF "qi") (V4DF "qi") (V2DF "qi")])
729
730 ;; Mapping of vector float modes to an integer mode of the same size
731 (define_mode_attr sseintvecmode
732 [(V16SF "V16SI") (V8DF "V8DI")
733 (V8SF "V8SI") (V4DF "V4DI")
734 (V4SF "V4SI") (V2DF "V2DI")
735 (V16SI "V16SI") (V8DI "V8DI")
736 (V8SI "V8SI") (V4DI "V4DI")
737 (V4SI "V4SI") (V2DI "V2DI")
738 (V16HI "V16HI") (V8HI "V8HI")
739 (V32HI "V32HI") (V64QI "V64QI")
740 (V32QI "V32QI") (V16QI "V16QI")])
741
742 (define_mode_attr sseintvecmode2
743 [(V8DF "XI") (V4DF "OI") (V2DF "TI")
744 (V8SF "OI") (V4SF "TI")])
745
746 (define_mode_attr sseintvecmodelower
747 [(V16SF "v16si") (V8DF "v8di")
748 (V8SF "v8si") (V4DF "v4di")
749 (V4SF "v4si") (V2DF "v2di")
750 (V8SI "v8si") (V4DI "v4di")
751 (V4SI "v4si") (V2DI "v2di")
752 (V16HI "v16hi") (V8HI "v8hi")
753 (V32QI "v32qi") (V16QI "v16qi")])
754
755 ;; Mapping of vector modes to a vector mode of double size
756 (define_mode_attr ssedoublevecmode
757 [(V32QI "V64QI") (V16HI "V32HI") (V8SI "V16SI") (V4DI "V8DI")
758 (V16QI "V32QI") (V8HI "V16HI") (V4SI "V8SI") (V2DI "V4DI")
759 (V8SF "V16SF") (V4DF "V8DF")
760 (V4SF "V8SF") (V2DF "V4DF")])
761
762 ;; Mapping of vector modes to a vector mode of half size
763 (define_mode_attr ssehalfvecmode
764 [(V64QI "V32QI") (V32HI "V16HI") (V16SI "V8SI") (V8DI "V4DI") (V4TI "V2TI")
765 (V32QI "V16QI") (V16HI "V8HI") (V8SI "V4SI") (V4DI "V2DI")
766 (V16QI "V8QI") (V8HI "V4HI") (V4SI "V2SI")
767 (V16SF "V8SF") (V8DF "V4DF")
768 (V8SF "V4SF") (V4DF "V2DF")
769 (V4SF "V2SF")])
770
771 (define_mode_attr ssehalfvecmodelower
772 [(V64QI "v32qi") (V32HI "v16hi") (V16SI "v8si") (V8DI "v4di") (V4TI "v2ti")
773 (V32QI "v16qi") (V16HI "v8hi") (V8SI "v4si") (V4DI "v2di")
774 (V16QI "v8qi") (V8HI "v4hi") (V4SI "v2si")
775 (V16SF "v8sf") (V8DF "v4df")
776 (V8SF "v4sf") (V4DF "v2df")
777 (V4SF "v2sf")])
778
779 ;; Mapping of vector modes ti packed single mode of the same size
780 (define_mode_attr ssePSmode
781 [(V16SI "V16SF") (V8DF "V16SF")
782 (V16SF "V16SF") (V8DI "V16SF")
783 (V64QI "V16SF") (V32QI "V8SF") (V16QI "V4SF")
784 (V32HI "V16SF") (V16HI "V8SF") (V8HI "V4SF")
785 (V8SI "V8SF") (V4SI "V4SF")
786 (V4DI "V8SF") (V2DI "V4SF")
787 (V4TI "V16SF") (V2TI "V8SF") (V1TI "V4SF")
788 (V8SF "V8SF") (V4SF "V4SF")
789 (V4DF "V8SF") (V2DF "V4SF")])
790
791 (define_mode_attr ssePSmode2
792 [(V8DI "V8SF") (V4DI "V4SF")])
793
794 ;; Mapping of vector modes back to the scalar modes
795 (define_mode_attr ssescalarmode
796 [(V64QI "QI") (V32QI "QI") (V16QI "QI")
797 (V32HI "HI") (V16HI "HI") (V8HI "HI")
798 (V16SI "SI") (V8SI "SI") (V4SI "SI")
799 (V8DI "DI") (V4DI "DI") (V2DI "DI")
800 (V16SF "SF") (V8SF "SF") (V4SF "SF")
801 (V8DF "DF") (V4DF "DF") (V2DF "DF")
802 (V4TI "TI") (V2TI "TI")])
803
804 ;; Mapping of vector modes back to the scalar modes
805 (define_mode_attr ssescalarmodelower
806 [(V64QI "qi") (V32QI "qi") (V16QI "qi")
807 (V32HI "hi") (V16HI "hi") (V8HI "hi")
808 (V16SI "si") (V8SI "si") (V4SI "si")
809 (V8DI "di") (V4DI "di") (V2DI "di")
810 (V16SF "sf") (V8SF "sf") (V4SF "sf")
811 (V8DF "df") (V4DF "df") (V2DF "df")
812 (V4TI "ti") (V2TI "ti")])
813
814 ;; Mapping of vector modes to the 128bit modes
815 (define_mode_attr ssexmmmode
816 [(V64QI "V16QI") (V32QI "V16QI") (V16QI "V16QI")
817 (V32HI "V8HI") (V16HI "V8HI") (V8HI "V8HI")
818 (V16SI "V4SI") (V8SI "V4SI") (V4SI "V4SI")
819 (V8DI "V2DI") (V4DI "V2DI") (V2DI "V2DI")
820 (V16SF "V4SF") (V8SF "V4SF") (V4SF "V4SF")
821 (V8DF "V2DF") (V4DF "V2DF") (V2DF "V2DF")])
822
823 ;; Pointer size override for scalar modes (Intel asm dialect)
824 (define_mode_attr iptr
825 [(V64QI "b") (V32HI "w") (V16SI "k") (V8DI "q")
826 (V32QI "b") (V16HI "w") (V8SI "k") (V4DI "q")
827 (V16QI "b") (V8HI "w") (V4SI "k") (V2DI "q")
828 (V16SF "k") (V8DF "q")
829 (V8SF "k") (V4DF "q")
830 (V4SF "k") (V2DF "q")
831 (SF "k") (DF "q")])
832
833 ;; Number of scalar elements in each vector type
834 (define_mode_attr ssescalarnum
835 [(V64QI "64") (V16SI "16") (V8DI "8")
836 (V32QI "32") (V16HI "16") (V8SI "8") (V4DI "4")
837 (V16QI "16") (V8HI "8") (V4SI "4") (V2DI "2")
838 (V16SF "16") (V8DF "8")
839 (V8SF "8") (V4DF "4")
840 (V4SF "4") (V2DF "2")])
841
842 ;; Mask of scalar elements in each vector type
843 (define_mode_attr ssescalarnummask
844 [(V32QI "31") (V16HI "15") (V8SI "7") (V4DI "3")
845 (V16QI "15") (V8HI "7") (V4SI "3") (V2DI "1")
846 (V8SF "7") (V4DF "3")
847 (V4SF "3") (V2DF "1")])
848
849 (define_mode_attr ssescalarsize
850 [(V4TI "64") (V2TI "64") (V1TI "64")
851 (V8DI "64") (V4DI "64") (V2DI "64")
852 (V64QI "8") (V32QI "8") (V16QI "8")
853 (V32HI "16") (V16HI "16") (V8HI "16")
854 (V16SI "32") (V8SI "32") (V4SI "32")
855 (V16SF "32") (V8SF "32") (V4SF "32")
856 (V8DF "64") (V4DF "64") (V2DF "64")])
857
858 ;; SSE prefix for integer vector modes
859 (define_mode_attr sseintprefix
860 [(V2DI "p") (V2DF "")
861 (V4DI "p") (V4DF "")
862 (V8DI "p") (V8DF "")
863 (V4SI "p") (V4SF "")
864 (V8SI "p") (V8SF "")
865 (V16SI "p") (V16SF "")
866 (V16QI "p") (V8HI "p")
867 (V32QI "p") (V16HI "p")
868 (V64QI "p") (V32HI "p")])
869
870 ;; SSE scalar suffix for vector modes
871 (define_mode_attr ssescalarmodesuffix
872 [(SF "ss") (DF "sd")
873 (V16SF "ss") (V8DF "sd")
874 (V8SF "ss") (V4DF "sd")
875 (V4SF "ss") (V2DF "sd")
876 (V16SI "d") (V8DI "q")
877 (V8SI "d") (V4DI "q")
878 (V4SI "d") (V2DI "q")])
879
880 ;; Pack/unpack vector modes
881 (define_mode_attr sseunpackmode
882 [(V16QI "V8HI") (V8HI "V4SI") (V4SI "V2DI")
883 (V32QI "V16HI") (V16HI "V8SI") (V8SI "V4DI")
884 (V32HI "V16SI") (V64QI "V32HI") (V16SI "V8DI")])
885
886 (define_mode_attr ssepackmode
887 [(V8HI "V16QI") (V4SI "V8HI") (V2DI "V4SI")
888 (V16HI "V32QI") (V8SI "V16HI") (V4DI "V8SI")
889 (V32HI "V64QI") (V16SI "V32HI") (V8DI "V16SI")])
890
891 ;; Mapping of the max integer size for xop rotate immediate constraint
892 (define_mode_attr sserotatemax
893 [(V16QI "7") (V8HI "15") (V4SI "31") (V2DI "63")])
894
895 ;; Mapping of mode to cast intrinsic name
896 (define_mode_attr castmode
897 [(V8SI "si") (V8SF "ps") (V4DF "pd")
898 (V16SI "si") (V16SF "ps") (V8DF "pd")])
899
900 ;; Instruction suffix for sign and zero extensions.
901 (define_code_attr extsuffix [(sign_extend "sx") (zero_extend "zx")])
902
903 ;; i128 for integer vectors and TARGET_AVX2, f128 otherwise.
904 ;; i64x4 or f64x4 for 512bit modes.
905 (define_mode_attr i128
906 [(V16SF "f64x4") (V8SF "f128") (V8DF "f64x4") (V4DF "f128")
907 (V64QI "i64x4") (V32QI "%~128") (V32HI "i64x4") (V16HI "%~128")
908 (V16SI "i64x4") (V8SI "%~128") (V8DI "i64x4") (V4DI "%~128")])
909
910 ;; For 256-bit modes for TARGET_AVX512VL && TARGET_AVX512DQ
911 ;; i32x4, f32x4, i64x2 or f64x2 suffixes.
912 (define_mode_attr i128vldq
913 [(V8SF "f32x4") (V4DF "f64x2")
914 (V32QI "i32x4") (V16HI "i32x4") (V8SI "i32x4") (V4DI "i64x2")])
915
916 ;; Mix-n-match
917 (define_mode_iterator AVX256MODE2P [V8SI V8SF V4DF])
918 (define_mode_iterator AVX512MODE2P [V16SI V16SF V8DF])
919
920 ;; Mapping for dbpsabbw modes
921 (define_mode_attr dbpsadbwmode
922 [(V32HI "V64QI") (V16HI "V32QI") (V8HI "V16QI")])
923
924 ;; Mapping suffixes for broadcast
925 (define_mode_attr bcstscalarsuff
926 [(V64QI "b") (V32QI "b") (V16QI "b")
927 (V32HI "w") (V16HI "w") (V8HI "w")
928 (V16SI "d") (V8SI "d") (V4SI "d")
929 (V8DI "q") (V4DI "q") (V2DI "q")
930 (V16SF "ss") (V8SF "ss") (V4SF "ss")
931 (V8DF "sd") (V4DF "sd") (V2DF "sd")])
932
933 ;; Tie mode of assembler operand to mode iterator
934 (define_mode_attr xtg_mode
935 [(V16QI "x") (V8HI "x") (V4SI "x") (V2DI "x") (V4SF "x") (V2DF "x")
936 (V32QI "t") (V16HI "t") (V8SI "t") (V4DI "t") (V8SF "t") (V4DF "t")
937 (V64QI "g") (V32HI "g") (V16SI "g") (V8DI "g") (V16SF "g") (V8DF "g")])
938
939 ;; Half mask mode for unpacks
940 (define_mode_attr HALFMASKMODE
941 [(DI "SI") (SI "HI")])
942
943 ;; Double mask mode for packs
944 (define_mode_attr DOUBLEMASKMODE
945 [(HI "SI") (SI "DI")])
946
947
948 ;; Include define_subst patterns for instructions with mask
949 (include "subst.md")
950
951 ;; Patterns whose name begins with "sse{,2,3}_" are invoked by intrinsics.
952
953 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
954 ;;
955 ;; Move patterns
956 ;;
957 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
958
959 ;; All of these patterns are enabled for SSE1 as well as SSE2.
960 ;; This is essential for maintaining stable calling conventions.
961
962 (define_expand "mov<mode>"
963 [(set (match_operand:VMOVE 0 "nonimmediate_operand")
964 (match_operand:VMOVE 1 "nonimmediate_operand"))]
965 "TARGET_SSE"
966 {
967 ix86_expand_vector_move (<MODE>mode, operands);
968 DONE;
969 })
970
971 (define_insn "mov<mode>_internal"
972 [(set (match_operand:VMOVE 0 "nonimmediate_operand"
973 "=v,v ,v ,m")
974 (match_operand:VMOVE 1 "nonimmediate_or_sse_const_operand"
975 " C,BC,vm,v"))]
976 "TARGET_SSE
977 && (register_operand (operands[0], <MODE>mode)
978 || register_operand (operands[1], <MODE>mode))"
979 {
980 switch (get_attr_type (insn))
981 {
982 case TYPE_SSELOG1:
983 return standard_sse_constant_opcode (insn, operands);
984
985 case TYPE_SSEMOV:
986 /* There is no evex-encoded vmov* for sizes smaller than 64-bytes
987 in avx512f, so we need to use workarounds, to access sse registers
988 16-31, which are evex-only. In avx512vl we don't need workarounds. */
989 if (TARGET_AVX512F && <MODE_SIZE> < 64 && !TARGET_AVX512VL
990 && (EXT_REX_SSE_REG_P (operands[0])
991 || EXT_REX_SSE_REG_P (operands[1])))
992 {
993 if (memory_operand (operands[0], <MODE>mode))
994 {
995 if (<MODE_SIZE> == 32)
996 return "vextract<shuffletype>64x4\t{$0x0, %g1, %0|%0, %g1, 0x0}";
997 else if (<MODE_SIZE> == 16)
998 return "vextract<shuffletype>32x4\t{$0x0, %g1, %0|%0, %g1, 0x0}";
999 else
1000 gcc_unreachable ();
1001 }
1002 else if (memory_operand (operands[1], <MODE>mode))
1003 {
1004 if (<MODE_SIZE> == 32)
1005 return "vbroadcast<shuffletype>64x4\t{%1, %g0|%g0, %1}";
1006 else if (<MODE_SIZE> == 16)
1007 return "vbroadcast<shuffletype>32x4\t{%1, %g0|%g0, %1}";
1008 else
1009 gcc_unreachable ();
1010 }
1011 else
1012 /* Reg -> reg move is always aligned. Just use wider move. */
1013 switch (get_attr_mode (insn))
1014 {
1015 case MODE_V8SF:
1016 case MODE_V4SF:
1017 return "vmovaps\t{%g1, %g0|%g0, %g1}";
1018 case MODE_V4DF:
1019 case MODE_V2DF:
1020 return "vmovapd\t{%g1, %g0|%g0, %g1}";
1021 case MODE_OI:
1022 case MODE_TI:
1023 return "vmovdqa64\t{%g1, %g0|%g0, %g1}";
1024 default:
1025 gcc_unreachable ();
1026 }
1027 }
1028
1029 switch (get_attr_mode (insn))
1030 {
1031 case MODE_V16SF:
1032 case MODE_V8SF:
1033 case MODE_V4SF:
1034 if (misaligned_operand (operands[0], <MODE>mode)
1035 || misaligned_operand (operands[1], <MODE>mode))
1036 return "%vmovups\t{%1, %0|%0, %1}";
1037 else
1038 return "%vmovaps\t{%1, %0|%0, %1}";
1039
1040 case MODE_V8DF:
1041 case MODE_V4DF:
1042 case MODE_V2DF:
1043 if (misaligned_operand (operands[0], <MODE>mode)
1044 || misaligned_operand (operands[1], <MODE>mode))
1045 return "%vmovupd\t{%1, %0|%0, %1}";
1046 else
1047 return "%vmovapd\t{%1, %0|%0, %1}";
1048
1049 case MODE_OI:
1050 case MODE_TI:
1051 if (misaligned_operand (operands[0], <MODE>mode)
1052 || misaligned_operand (operands[1], <MODE>mode))
1053 return TARGET_AVX512VL
1054 && (<MODE>mode == V4SImode
1055 || <MODE>mode == V2DImode
1056 || <MODE>mode == V8SImode
1057 || <MODE>mode == V4DImode
1058 || TARGET_AVX512BW)
1059 ? "vmovdqu<ssescalarsize>\t{%1, %0|%0, %1}"
1060 : "%vmovdqu\t{%1, %0|%0, %1}";
1061 else
1062 return TARGET_AVX512VL ? "vmovdqa64\t{%1, %0|%0, %1}"
1063 : "%vmovdqa\t{%1, %0|%0, %1}";
1064 case MODE_XI:
1065 if (misaligned_operand (operands[0], <MODE>mode)
1066 || misaligned_operand (operands[1], <MODE>mode))
1067 return (<MODE>mode == V16SImode
1068 || <MODE>mode == V8DImode
1069 || TARGET_AVX512BW)
1070 ? "vmovdqu<ssescalarsize>\t{%1, %0|%0, %1}"
1071 : "vmovdqu64\t{%1, %0|%0, %1}";
1072 else
1073 return "vmovdqa64\t{%1, %0|%0, %1}";
1074
1075 default:
1076 gcc_unreachable ();
1077 }
1078
1079 default:
1080 gcc_unreachable ();
1081 }
1082 }
1083 [(set_attr "type" "sselog1,sselog1,ssemov,ssemov")
1084 (set_attr "prefix" "maybe_vex")
1085 (set (attr "mode")
1086 (cond [(and (eq_attr "alternative" "1")
1087 (match_test "TARGET_AVX512VL"))
1088 (const_string "<sseinsnmode>")
1089 (and (match_test "<MODE_SIZE> == 16")
1090 (ior (match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL")
1091 (and (eq_attr "alternative" "3")
1092 (match_test "TARGET_SSE_TYPELESS_STORES"))))
1093 (const_string "<ssePSmode>")
1094 (match_test "TARGET_AVX")
1095 (const_string "<sseinsnmode>")
1096 (ior (not (match_test "TARGET_SSE2"))
1097 (match_test "optimize_function_for_size_p (cfun)"))
1098 (const_string "V4SF")
1099 (and (eq_attr "alternative" "0")
1100 (match_test "TARGET_SSE_LOAD0_BY_PXOR"))
1101 (const_string "TI")
1102 ]
1103 (const_string "<sseinsnmode>")))
1104 (set (attr "enabled")
1105 (cond [(and (match_test "<MODE_SIZE> == 16")
1106 (eq_attr "alternative" "1"))
1107 (symbol_ref "TARGET_SSE2")
1108 (and (match_test "<MODE_SIZE> == 32")
1109 (eq_attr "alternative" "1"))
1110 (symbol_ref "TARGET_AVX2")
1111 ]
1112 (symbol_ref "true")))])
1113
1114 (define_insn "<avx512>_load<mode>_mask"
1115 [(set (match_operand:V48_AVX512VL 0 "register_operand" "=v,v")
1116 (vec_merge:V48_AVX512VL
1117 (match_operand:V48_AVX512VL 1 "nonimmediate_operand" "v,m")
1118 (match_operand:V48_AVX512VL 2 "nonimm_or_0_operand" "0C,0C")
1119 (match_operand:<avx512fmaskmode> 3 "register_operand" "Yk,Yk")))]
1120 "TARGET_AVX512F"
1121 {
1122 if (FLOAT_MODE_P (GET_MODE_INNER (<MODE>mode)))
1123 {
1124 if (misaligned_operand (operands[1], <MODE>mode))
1125 return "vmovu<ssemodesuffix>\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}";
1126 else
1127 return "vmova<ssemodesuffix>\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}";
1128 }
1129 else
1130 {
1131 if (misaligned_operand (operands[1], <MODE>mode))
1132 return "vmovdqu<ssescalarsize>\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}";
1133 else
1134 return "vmovdqa<ssescalarsize>\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}";
1135 }
1136 }
1137 [(set_attr "type" "ssemov")
1138 (set_attr "prefix" "evex")
1139 (set_attr "memory" "none,load")
1140 (set_attr "mode" "<sseinsnmode>")])
1141
1142 (define_insn "<avx512>_load<mode>_mask"
1143 [(set (match_operand:VI12_AVX512VL 0 "register_operand" "=v,v")
1144 (vec_merge:VI12_AVX512VL
1145 (match_operand:VI12_AVX512VL 1 "nonimmediate_operand" "v,m")
1146 (match_operand:VI12_AVX512VL 2 "nonimm_or_0_operand" "0C,0C")
1147 (match_operand:<avx512fmaskmode> 3 "register_operand" "Yk,Yk")))]
1148 "TARGET_AVX512BW"
1149 "vmovdqu<ssescalarsize>\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}"
1150 [(set_attr "type" "ssemov")
1151 (set_attr "prefix" "evex")
1152 (set_attr "memory" "none,load")
1153 (set_attr "mode" "<sseinsnmode>")])
1154
1155 (define_insn "<avx512>_blendm<mode>"
1156 [(set (match_operand:V48_AVX512VL 0 "register_operand" "=v")
1157 (vec_merge:V48_AVX512VL
1158 (match_operand:V48_AVX512VL 2 "nonimmediate_operand" "vm")
1159 (match_operand:V48_AVX512VL 1 "register_operand" "v")
1160 (match_operand:<avx512fmaskmode> 3 "register_operand" "Yk")))]
1161 "TARGET_AVX512F"
1162 "v<sseintprefix>blendm<ssemodesuffix>\t{%2, %1, %0%{%3%}|%0%{%3%}, %1, %2}"
1163 [(set_attr "type" "ssemov")
1164 (set_attr "prefix" "evex")
1165 (set_attr "mode" "<sseinsnmode>")])
1166
1167 (define_insn "<avx512>_blendm<mode>"
1168 [(set (match_operand:VI12_AVX512VL 0 "register_operand" "=v")
1169 (vec_merge:VI12_AVX512VL
1170 (match_operand:VI12_AVX512VL 2 "nonimmediate_operand" "vm")
1171 (match_operand:VI12_AVX512VL 1 "register_operand" "v")
1172 (match_operand:<avx512fmaskmode> 3 "register_operand" "Yk")))]
1173 "TARGET_AVX512BW"
1174 "vpblendm<ssemodesuffix>\t{%2, %1, %0%{%3%}|%0%{%3%}, %1, %2}"
1175 [(set_attr "type" "ssemov")
1176 (set_attr "prefix" "evex")
1177 (set_attr "mode" "<sseinsnmode>")])
1178
1179 (define_insn "<avx512>_store<mode>_mask"
1180 [(set (match_operand:V48_AVX512VL 0 "memory_operand" "=m")
1181 (vec_merge:V48_AVX512VL
1182 (match_operand:V48_AVX512VL 1 "register_operand" "v")
1183 (match_dup 0)
1184 (match_operand:<avx512fmaskmode> 2 "register_operand" "Yk")))]
1185 "TARGET_AVX512F"
1186 {
1187 if (FLOAT_MODE_P (GET_MODE_INNER (<MODE>mode)))
1188 {
1189 if (misaligned_operand (operands[0], <MODE>mode))
1190 return "vmovu<ssemodesuffix>\t{%1, %0%{%2%}|%0%{%2%}, %1}";
1191 else
1192 return "vmova<ssemodesuffix>\t{%1, %0%{%2%}|%0%{%2%}, %1}";
1193 }
1194 else
1195 {
1196 if (misaligned_operand (operands[0], <MODE>mode))
1197 return "vmovdqu<ssescalarsize>\t{%1, %0%{%2%}|%0%{%2%}, %1}";
1198 else
1199 return "vmovdqa<ssescalarsize>\t{%1, %0%{%2%}|%0%{%2%}, %1}";
1200 }
1201 }
1202 [(set_attr "type" "ssemov")
1203 (set_attr "prefix" "evex")
1204 (set_attr "memory" "store")
1205 (set_attr "mode" "<sseinsnmode>")])
1206
1207 (define_insn "<avx512>_store<mode>_mask"
1208 [(set (match_operand:VI12_AVX512VL 0 "memory_operand" "=m")
1209 (vec_merge:VI12_AVX512VL
1210 (match_operand:VI12_AVX512VL 1 "register_operand" "v")
1211 (match_dup 0)
1212 (match_operand:<avx512fmaskmode> 2 "register_operand" "Yk")))]
1213 "TARGET_AVX512BW"
1214 "vmovdqu<ssescalarsize>\t{%1, %0%{%2%}|%0%{%2%}, %1}"
1215 [(set_attr "type" "ssemov")
1216 (set_attr "prefix" "evex")
1217 (set_attr "memory" "store")
1218 (set_attr "mode" "<sseinsnmode>")])
1219
1220 (define_insn "sse2_movq128"
1221 [(set (match_operand:V2DI 0 "register_operand" "=v")
1222 (vec_concat:V2DI
1223 (vec_select:DI
1224 (match_operand:V2DI 1 "nonimmediate_operand" "vm")
1225 (parallel [(const_int 0)]))
1226 (const_int 0)))]
1227 "TARGET_SSE2"
1228 "%vmovq\t{%1, %0|%0, %q1}"
1229 [(set_attr "type" "ssemov")
1230 (set_attr "prefix" "maybe_vex")
1231 (set_attr "mode" "TI")])
1232
1233 ;; Move a DI from a 32-bit register pair (e.g. %edx:%eax) to an xmm.
1234 ;; We'd rather avoid this entirely; if the 32-bit reg pair was loaded
1235 ;; from memory, we'd prefer to load the memory directly into the %xmm
1236 ;; register. To facilitate this happy circumstance, this pattern won't
1237 ;; split until after register allocation. If the 64-bit value didn't
1238 ;; come from memory, this is the best we can do. This is much better
1239 ;; than storing %edx:%eax into a stack temporary and loading an %xmm
1240 ;; from there.
1241
1242 (define_insn_and_split "movdi_to_sse"
1243 [(set (match_operand:V4SI 0 "register_operand" "=?x,x")
1244 (unspec:V4SI [(match_operand:DI 1 "nonimmediate_operand" "r,m")]
1245 UNSPEC_MOVDI_TO_SSE))
1246 (clobber (match_scratch:V4SI 2 "=&x,X"))]
1247 "!TARGET_64BIT && TARGET_SSE2 && TARGET_INTER_UNIT_MOVES_TO_VEC"
1248 "#"
1249 "&& reload_completed"
1250 [(const_int 0)]
1251 {
1252 if (register_operand (operands[1], DImode))
1253 {
1254 /* The DImode arrived in a pair of integral registers (e.g. %edx:%eax).
1255 Assemble the 64-bit DImode value in an xmm register. */
1256 emit_insn (gen_sse2_loadld (operands[0], CONST0_RTX (V4SImode),
1257 gen_lowpart (SImode, operands[1])));
1258 emit_insn (gen_sse2_loadld (operands[2], CONST0_RTX (V4SImode),
1259 gen_highpart (SImode, operands[1])));
1260 emit_insn (gen_vec_interleave_lowv4si (operands[0], operands[0],
1261 operands[2]));
1262 }
1263 else if (memory_operand (operands[1], DImode))
1264 emit_insn (gen_vec_concatv2di (gen_lowpart (V2DImode, operands[0]),
1265 operands[1], const0_rtx));
1266 else
1267 gcc_unreachable ();
1268 DONE;
1269 })
1270
1271 (define_split
1272 [(set (match_operand:V4SF 0 "register_operand")
1273 (match_operand:V4SF 1 "zero_extended_scalar_load_operand"))]
1274 "TARGET_SSE && reload_completed"
1275 [(set (match_dup 0)
1276 (vec_merge:V4SF
1277 (vec_duplicate:V4SF (match_dup 1))
1278 (match_dup 2)
1279 (const_int 1)))]
1280 {
1281 operands[1] = gen_lowpart (SFmode, operands[1]);
1282 operands[2] = CONST0_RTX (V4SFmode);
1283 })
1284
1285 (define_split
1286 [(set (match_operand:V2DF 0 "register_operand")
1287 (match_operand:V2DF 1 "zero_extended_scalar_load_operand"))]
1288 "TARGET_SSE2 && reload_completed"
1289 [(set (match_dup 0) (vec_concat:V2DF (match_dup 1) (match_dup 2)))]
1290 {
1291 operands[1] = gen_lowpart (DFmode, operands[1]);
1292 operands[2] = CONST0_RTX (DFmode);
1293 })
1294
1295 (define_expand "movmisalign<mode>"
1296 [(set (match_operand:VMOVE 0 "nonimmediate_operand")
1297 (match_operand:VMOVE 1 "nonimmediate_operand"))]
1298 "TARGET_SSE"
1299 {
1300 ix86_expand_vector_move_misalign (<MODE>mode, operands);
1301 DONE;
1302 })
1303
1304 ;; Merge movsd/movhpd to movupd for TARGET_SSE_UNALIGNED_LOAD_OPTIMAL targets.
1305 (define_peephole2
1306 [(set (match_operand:V2DF 0 "sse_reg_operand")
1307 (vec_concat:V2DF (match_operand:DF 1 "memory_operand")
1308 (match_operand:DF 4 "const0_operand")))
1309 (set (match_operand:V2DF 2 "sse_reg_operand")
1310 (vec_concat:V2DF (vec_select:DF (match_dup 2)
1311 (parallel [(const_int 0)]))
1312 (match_operand:DF 3 "memory_operand")))]
1313 "TARGET_SSE2 && TARGET_SSE_UNALIGNED_LOAD_OPTIMAL
1314 && ix86_operands_ok_for_move_multiple (operands, true, DFmode)"
1315 [(set (match_dup 2) (match_dup 5))]
1316 "operands[5] = adjust_address (operands[1], V2DFmode, 0);")
1317
1318 (define_peephole2
1319 [(set (match_operand:DF 0 "sse_reg_operand")
1320 (match_operand:DF 1 "memory_operand"))
1321 (set (match_operand:V2DF 2 "sse_reg_operand")
1322 (vec_concat:V2DF (match_operand:DF 4 "sse_reg_operand")
1323 (match_operand:DF 3 "memory_operand")))]
1324 "TARGET_SSE2 && TARGET_SSE_UNALIGNED_LOAD_OPTIMAL
1325 && REGNO (operands[4]) == REGNO (operands[2])
1326 && ix86_operands_ok_for_move_multiple (operands, true, DFmode)"
1327 [(set (match_dup 2) (match_dup 5))]
1328 "operands[5] = adjust_address (operands[1], V2DFmode, 0);")
1329
1330 ;; Merge movlpd/movhpd to movupd for TARGET_SSE_UNALIGNED_STORE_OPTIMAL targets.
1331 (define_peephole2
1332 [(set (match_operand:DF 0 "memory_operand")
1333 (vec_select:DF (match_operand:V2DF 1 "sse_reg_operand")
1334 (parallel [(const_int 0)])))
1335 (set (match_operand:DF 2 "memory_operand")
1336 (vec_select:DF (match_operand:V2DF 3 "sse_reg_operand")
1337 (parallel [(const_int 1)])))]
1338 "TARGET_SSE2 && TARGET_SSE_UNALIGNED_STORE_OPTIMAL
1339 && ix86_operands_ok_for_move_multiple (operands, false, DFmode)"
1340 [(set (match_dup 4) (match_dup 1))]
1341 "operands[4] = adjust_address (operands[0], V2DFmode, 0);")
1342
1343 (define_insn "<sse3>_lddqu<avxsizesuffix>"
1344 [(set (match_operand:VI1 0 "register_operand" "=x")
1345 (unspec:VI1 [(match_operand:VI1 1 "memory_operand" "m")]
1346 UNSPEC_LDDQU))]
1347 "TARGET_SSE3"
1348 "%vlddqu\t{%1, %0|%0, %1}"
1349 [(set_attr "type" "ssemov")
1350 (set_attr "movu" "1")
1351 (set (attr "prefix_data16")
1352 (if_then_else
1353 (match_test "TARGET_AVX")
1354 (const_string "*")
1355 (const_string "0")))
1356 (set (attr "prefix_rep")
1357 (if_then_else
1358 (match_test "TARGET_AVX")
1359 (const_string "*")
1360 (const_string "1")))
1361 (set_attr "prefix" "maybe_vex")
1362 (set_attr "mode" "<sseinsnmode>")])
1363
1364 (define_insn "sse2_movnti<mode>"
1365 [(set (match_operand:SWI48 0 "memory_operand" "=m")
1366 (unspec:SWI48 [(match_operand:SWI48 1 "register_operand" "r")]
1367 UNSPEC_MOVNT))]
1368 "TARGET_SSE2"
1369 "movnti\t{%1, %0|%0, %1}"
1370 [(set_attr "type" "ssemov")
1371 (set_attr "prefix_data16" "0")
1372 (set_attr "mode" "<MODE>")])
1373
1374 (define_insn "<sse>_movnt<mode>"
1375 [(set (match_operand:VF 0 "memory_operand" "=m")
1376 (unspec:VF
1377 [(match_operand:VF 1 "register_operand" "v")]
1378 UNSPEC_MOVNT))]
1379 "TARGET_SSE"
1380 "%vmovnt<ssemodesuffix>\t{%1, %0|%0, %1}"
1381 [(set_attr "type" "ssemov")
1382 (set_attr "prefix" "maybe_vex")
1383 (set_attr "mode" "<MODE>")])
1384
1385 (define_insn "<sse2>_movnt<mode>"
1386 [(set (match_operand:VI8 0 "memory_operand" "=m")
1387 (unspec:VI8 [(match_operand:VI8 1 "register_operand" "v")]
1388 UNSPEC_MOVNT))]
1389 "TARGET_SSE2"
1390 "%vmovntdq\t{%1, %0|%0, %1}"
1391 [(set_attr "type" "ssecvt")
1392 (set (attr "prefix_data16")
1393 (if_then_else
1394 (match_test "TARGET_AVX")
1395 (const_string "*")
1396 (const_string "1")))
1397 (set_attr "prefix" "maybe_vex")
1398 (set_attr "mode" "<sseinsnmode>")])
1399
1400 ; Expand patterns for non-temporal stores. At the moment, only those
1401 ; that directly map to insns are defined; it would be possible to
1402 ; define patterns for other modes that would expand to several insns.
1403
1404 ;; Modes handled by storent patterns.
1405 (define_mode_iterator STORENT_MODE
1406 [(DI "TARGET_SSE2 && TARGET_64BIT") (SI "TARGET_SSE2")
1407 (SF "TARGET_SSE4A") (DF "TARGET_SSE4A")
1408 (V8DI "TARGET_AVX512F") (V4DI "TARGET_AVX") (V2DI "TARGET_SSE2")
1409 (V16SF "TARGET_AVX512F") (V8SF "TARGET_AVX") V4SF
1410 (V8DF "TARGET_AVX512F") (V4DF "TARGET_AVX") (V2DF "TARGET_SSE2")])
1411
1412 (define_expand "storent<mode>"
1413 [(set (match_operand:STORENT_MODE 0 "memory_operand")
1414 (unspec:STORENT_MODE
1415 [(match_operand:STORENT_MODE 1 "register_operand")]
1416 UNSPEC_MOVNT))]
1417 "TARGET_SSE")
1418
1419 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1420 ;;
1421 ;; Mask operations
1422 ;;
1423 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1424
1425 ;; All integer modes with AVX512BW/DQ.
1426 (define_mode_iterator SWI1248_AVX512BWDQ
1427 [(QI "TARGET_AVX512DQ") HI (SI "TARGET_AVX512BW") (DI "TARGET_AVX512BW")])
1428
1429 ;; All integer modes with AVX512BW, where HImode operation
1430 ;; can be used instead of QImode.
1431 (define_mode_iterator SWI1248_AVX512BW
1432 [QI HI (SI "TARGET_AVX512BW") (DI "TARGET_AVX512BW")])
1433
1434 ;; All integer modes with AVX512BW/DQ, even HImode requires DQ.
1435 (define_mode_iterator SWI1248_AVX512BWDQ2
1436 [(QI "TARGET_AVX512DQ") (HI "TARGET_AVX512DQ")
1437 (SI "TARGET_AVX512BW") (DI "TARGET_AVX512BW")])
1438
1439 (define_expand "kmov<mskmodesuffix>"
1440 [(set (match_operand:SWI1248_AVX512BWDQ 0 "nonimmediate_operand")
1441 (match_operand:SWI1248_AVX512BWDQ 1 "nonimmediate_operand"))]
1442 "TARGET_AVX512F
1443 && !(MEM_P (operands[0]) && MEM_P (operands[1]))")
1444
1445 (define_insn "k<code><mode>"
1446 [(set (match_operand:SWI1248_AVX512BW 0 "register_operand" "=k")
1447 (any_logic:SWI1248_AVX512BW
1448 (match_operand:SWI1248_AVX512BW 1 "register_operand" "k")
1449 (match_operand:SWI1248_AVX512BW 2 "register_operand" "k")))
1450 (unspec [(const_int 0)] UNSPEC_MASKOP)]
1451 "TARGET_AVX512F"
1452 {
1453 if (get_attr_mode (insn) == MODE_HI)
1454 return "k<logic>w\t{%2, %1, %0|%0, %1, %2}";
1455 else
1456 return "k<logic><mskmodesuffix>\t{%2, %1, %0|%0, %1, %2}";
1457 }
1458 [(set_attr "type" "msklog")
1459 (set_attr "prefix" "vex")
1460 (set (attr "mode")
1461 (cond [(and (match_test "<MODE>mode == QImode")
1462 (not (match_test "TARGET_AVX512DQ")))
1463 (const_string "HI")
1464 ]
1465 (const_string "<MODE>")))])
1466
1467 (define_insn "kandn<mode>"
1468 [(set (match_operand:SWI1248_AVX512BW 0 "register_operand" "=k")
1469 (and:SWI1248_AVX512BW
1470 (not:SWI1248_AVX512BW
1471 (match_operand:SWI1248_AVX512BW 1 "register_operand" "k"))
1472 (match_operand:SWI1248_AVX512BW 2 "register_operand" "k")))
1473 (unspec [(const_int 0)] UNSPEC_MASKOP)]
1474 "TARGET_AVX512F"
1475 {
1476 if (get_attr_mode (insn) == MODE_HI)
1477 return "kandnw\t{%2, %1, %0|%0, %1, %2}";
1478 else
1479 return "kandn<mskmodesuffix>\t{%2, %1, %0|%0, %1, %2}";
1480 }
1481 [(set_attr "type" "msklog")
1482 (set_attr "prefix" "vex")
1483 (set (attr "mode")
1484 (cond [(and (match_test "<MODE>mode == QImode")
1485 (not (match_test "TARGET_AVX512DQ")))
1486 (const_string "HI")
1487 ]
1488 (const_string "<MODE>")))])
1489
1490 (define_insn "kxnor<mode>"
1491 [(set (match_operand:SWI1248_AVX512BW 0 "register_operand" "=k")
1492 (not:SWI1248_AVX512BW
1493 (xor:SWI1248_AVX512BW
1494 (match_operand:SWI1248_AVX512BW 1 "register_operand" "k")
1495 (match_operand:SWI1248_AVX512BW 2 "register_operand" "k"))))
1496 (unspec [(const_int 0)] UNSPEC_MASKOP)]
1497 "TARGET_AVX512F"
1498 {
1499 if (get_attr_mode (insn) == MODE_HI)
1500 return "kxnorw\t{%2, %1, %0|%0, %1, %2}";
1501 else
1502 return "kxnor<mskmodesuffix>\t{%2, %1, %0|%0, %1, %2}";
1503 }
1504 [(set_attr "type" "msklog")
1505 (set_attr "prefix" "vex")
1506 (set (attr "mode")
1507 (cond [(and (match_test "<MODE>mode == QImode")
1508 (not (match_test "TARGET_AVX512DQ")))
1509 (const_string "HI")
1510 ]
1511 (const_string "<MODE>")))])
1512
1513 (define_insn "knot<mode>"
1514 [(set (match_operand:SWI1248_AVX512BW 0 "register_operand" "=k")
1515 (not:SWI1248_AVX512BW
1516 (match_operand:SWI1248_AVX512BW 1 "register_operand" "k")))
1517 (unspec [(const_int 0)] UNSPEC_MASKOP)]
1518 "TARGET_AVX512F"
1519 {
1520 if (get_attr_mode (insn) == MODE_HI)
1521 return "knotw\t{%1, %0|%0, %1}";
1522 else
1523 return "knot<mskmodesuffix>\t{%1, %0|%0, %1}";
1524 }
1525 [(set_attr "type" "msklog")
1526 (set_attr "prefix" "vex")
1527 (set (attr "mode")
1528 (cond [(and (match_test "<MODE>mode == QImode")
1529 (not (match_test "TARGET_AVX512DQ")))
1530 (const_string "HI")
1531 ]
1532 (const_string "<MODE>")))])
1533
1534 (define_insn "kadd<mode>"
1535 [(set (match_operand:SWI1248_AVX512BWDQ2 0 "register_operand" "=k")
1536 (plus:SWI1248_AVX512BWDQ2
1537 (match_operand:SWI1248_AVX512BWDQ2 1 "register_operand" "k")
1538 (match_operand:SWI1248_AVX512BWDQ2 2 "register_operand" "k")))
1539 (unspec [(const_int 0)] UNSPEC_MASKOP)]
1540 "TARGET_AVX512F"
1541 "kadd<mskmodesuffix>\t{%2, %1, %0|%0, %1, %2}"
1542 [(set_attr "type" "msklog")
1543 (set_attr "prefix" "vex")
1544 (set_attr "mode" "<MODE>")])
1545
1546 ;; Mask variant shift mnemonics
1547 (define_code_attr mshift [(ashift "shiftl") (lshiftrt "shiftr")])
1548
1549 (define_insn "k<code><mode>"
1550 [(set (match_operand:SWI1248_AVX512BWDQ 0 "register_operand" "=k")
1551 (any_lshift:SWI1248_AVX512BWDQ
1552 (match_operand:SWI1248_AVX512BWDQ 1 "register_operand" "k")
1553 (match_operand:QI 2 "immediate_operand" "n")))
1554 (unspec [(const_int 0)] UNSPEC_MASKOP)]
1555 "TARGET_AVX512F"
1556 "k<mshift><mskmodesuffix>\t{%2, %1, %0|%0, %1, %2}"
1557 [(set_attr "type" "msklog")
1558 (set_attr "prefix" "vex")
1559 (set_attr "mode" "<MODE>")])
1560
1561 (define_insn "ktest<mode>"
1562 [(set (reg:CC FLAGS_REG)
1563 (unspec:CC
1564 [(match_operand:SWI1248_AVX512BWDQ2 0 "register_operand" "k")
1565 (match_operand:SWI1248_AVX512BWDQ2 1 "register_operand" "k")]
1566 UNSPEC_KTEST))]
1567 "TARGET_AVX512F"
1568 "ktest<mskmodesuffix>\t{%1, %0|%0, %1}"
1569 [(set_attr "mode" "<MODE>")
1570 (set_attr "type" "msklog")
1571 (set_attr "prefix" "vex")])
1572
1573 (define_insn "kortest<mode>"
1574 [(set (reg:CC FLAGS_REG)
1575 (unspec:CC
1576 [(match_operand:SWI1248_AVX512BWDQ 0 "register_operand" "k")
1577 (match_operand:SWI1248_AVX512BWDQ 1 "register_operand" "k")]
1578 UNSPEC_KORTEST))]
1579 "TARGET_AVX512F"
1580 "kortest<mskmodesuffix>\t{%1, %0|%0, %1}"
1581 [(set_attr "mode" "<MODE>")
1582 (set_attr "type" "msklog")
1583 (set_attr "prefix" "vex")])
1584
1585 (define_insn "kunpckhi"
1586 [(set (match_operand:HI 0 "register_operand" "=k")
1587 (ior:HI
1588 (ashift:HI
1589 (zero_extend:HI (match_operand:QI 1 "register_operand" "k"))
1590 (const_int 8))
1591 (zero_extend:HI (match_operand:QI 2 "register_operand" "k"))))]
1592 "TARGET_AVX512F"
1593 "kunpckbw\t{%2, %1, %0|%0, %1, %2}"
1594 [(set_attr "mode" "HI")
1595 (set_attr "type" "msklog")
1596 (set_attr "prefix" "vex")])
1597
1598 (define_insn "kunpcksi"
1599 [(set (match_operand:SI 0 "register_operand" "=k")
1600 (ior:SI
1601 (ashift:SI
1602 (zero_extend:SI (match_operand:HI 1 "register_operand" "k"))
1603 (const_int 16))
1604 (zero_extend:SI (match_operand:HI 2 "register_operand" "k"))))]
1605 "TARGET_AVX512BW"
1606 "kunpckwd\t{%2, %1, %0|%0, %1, %2}"
1607 [(set_attr "mode" "SI")])
1608
1609 (define_insn "kunpckdi"
1610 [(set (match_operand:DI 0 "register_operand" "=k")
1611 (ior:DI
1612 (ashift:DI
1613 (zero_extend:DI (match_operand:SI 1 "register_operand" "k"))
1614 (const_int 32))
1615 (zero_extend:DI (match_operand:SI 2 "register_operand" "k"))))]
1616 "TARGET_AVX512BW"
1617 "kunpckdq\t{%2, %1, %0|%0, %1, %2}"
1618 [(set_attr "mode" "DI")])
1619
1620
1621 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1622 ;;
1623 ;; Parallel floating point arithmetic
1624 ;;
1625 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1626
1627 (define_expand "<code><mode>2"
1628 [(set (match_operand:VF 0 "register_operand")
1629 (absneg:VF
1630 (match_operand:VF 1 "register_operand")))]
1631 "TARGET_SSE"
1632 "ix86_expand_fp_absneg_operator (<CODE>, <MODE>mode, operands); DONE;")
1633
1634 (define_insn_and_split "*absneg<mode>2"
1635 [(set (match_operand:VF 0 "register_operand" "=x,x,v,v")
1636 (match_operator:VF 3 "absneg_operator"
1637 [(match_operand:VF 1 "vector_operand" "0, xBm,v, m")]))
1638 (use (match_operand:VF 2 "vector_operand" "xBm,0, vm,v"))]
1639 "TARGET_SSE"
1640 "#"
1641 "&& reload_completed"
1642 [(const_int 0)]
1643 {
1644 enum rtx_code absneg_op;
1645 rtx op1, op2;
1646 rtx t;
1647
1648 if (TARGET_AVX)
1649 {
1650 if (MEM_P (operands[1]))
1651 op1 = operands[2], op2 = operands[1];
1652 else
1653 op1 = operands[1], op2 = operands[2];
1654 }
1655 else
1656 {
1657 op1 = operands[0];
1658 if (rtx_equal_p (operands[0], operands[1]))
1659 op2 = operands[2];
1660 else
1661 op2 = operands[1];
1662 }
1663
1664 absneg_op = GET_CODE (operands[3]) == NEG ? XOR : AND;
1665 t = gen_rtx_fmt_ee (absneg_op, <MODE>mode, op1, op2);
1666 t = gen_rtx_SET (operands[0], t);
1667 emit_insn (t);
1668 DONE;
1669 }
1670 [(set_attr "isa" "noavx,noavx,avx,avx")])
1671
1672 (define_expand "<plusminus_insn><mode>3<mask_name><round_name>"
1673 [(set (match_operand:VF 0 "register_operand")
1674 (plusminus:VF
1675 (match_operand:VF 1 "<round_nimm_predicate>")
1676 (match_operand:VF 2 "<round_nimm_predicate>")))]
1677 "TARGET_SSE && <mask_mode512bit_condition> && <round_mode512bit_condition>"
1678 "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
1679
1680 (define_insn "*<plusminus_insn><mode>3<mask_name><round_name>"
1681 [(set (match_operand:VF 0 "register_operand" "=x,v")
1682 (plusminus:VF
1683 (match_operand:VF 1 "<round_nimm_predicate>" "<comm>0,v")
1684 (match_operand:VF 2 "<round_nimm_predicate>" "xBm,<round_constraint>")))]
1685 "TARGET_SSE && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)
1686 && <mask_mode512bit_condition> && <round_mode512bit_condition>"
1687 "@
1688 <plusminus_mnemonic><ssemodesuffix>\t{%2, %0|%0, %2}
1689 v<plusminus_mnemonic><ssemodesuffix>\t{<round_mask_op3>%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2<round_mask_op3>}"
1690 [(set_attr "isa" "noavx,avx")
1691 (set_attr "type" "sseadd")
1692 (set_attr "prefix" "<mask_prefix3>")
1693 (set_attr "mode" "<MODE>")])
1694
1695 (define_insn "*sub<mode>3<mask_name>_bcst"
1696 [(set (match_operand:VF_AVX512 0 "register_operand" "=v")
1697 (minus:VF_AVX512
1698 (match_operand:VF_AVX512 1 "register_operand" "v")
1699 (vec_duplicate:VF_AVX512
1700 (match_operand:<ssescalarmode> 2 "memory_operand" "m"))))]
1701 "TARGET_AVX512F
1702 && ix86_binary_operator_ok (MINUS, <MODE>mode, operands)
1703 && <mask_mode512bit_condition>"
1704 "vsub<ssemodesuffix>\t{%2<avx512bcst>, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2<avx512bcst>}"
1705 [(set_attr "prefix" "evex")
1706 (set_attr "type" "sseadd")
1707 (set_attr "mode" "<MODE>")])
1708
1709 (define_insn "*add<mode>3<mask_name>_bcst"
1710 [(set (match_operand:VF_AVX512 0 "register_operand" "=v")
1711 (plus:VF_AVX512
1712 (vec_duplicate:VF_AVX512
1713 (match_operand:<ssescalarmode> 1 "memory_operand" "m"))
1714 (match_operand:VF_AVX512 2 "register_operand" "v")))]
1715 "TARGET_AVX512F
1716 && ix86_binary_operator_ok (PLUS, <MODE>mode, operands)
1717 && <mask_mode512bit_condition>"
1718 "vadd<ssemodesuffix>\t{%1<avx512bcst>, %2, %0<mask_operand3>|%0<mask_operand3>, %2, %1<avx512bcst>}"
1719 [(set_attr "prefix" "evex")
1720 (set_attr "type" "sseadd")
1721 (set_attr "mode" "<MODE>")])
1722
1723 (define_insn "<sse>_vm<plusminus_insn><mode>3<mask_scalar_name><round_scalar_name>"
1724 [(set (match_operand:VF_128 0 "register_operand" "=x,v")
1725 (vec_merge:VF_128
1726 (plusminus:VF_128
1727 (match_operand:VF_128 1 "register_operand" "0,v")
1728 (match_operand:VF_128 2 "vector_operand" "xBm,<round_scalar_constraint>"))
1729 (match_dup 1)
1730 (const_int 1)))]
1731 "TARGET_SSE"
1732 "@
1733 <plusminus_mnemonic><ssescalarmodesuffix>\t{%2, %0|%0, %<iptr>2}
1734 v<plusminus_mnemonic><ssescalarmodesuffix>\t{<round_scalar_mask_op3>%2, %1, %0<mask_scalar_operand3>|%0<mask_scalar_operand3>, %1, %<iptr>2<round_scalar_mask_op3>}"
1735 [(set_attr "isa" "noavx,avx")
1736 (set_attr "type" "sseadd")
1737 (set_attr "prefix" "<round_scalar_prefix>")
1738 (set_attr "mode" "<ssescalarmode>")])
1739
1740 (define_expand "mul<mode>3<mask_name><round_name>"
1741 [(set (match_operand:VF 0 "register_operand")
1742 (mult:VF
1743 (match_operand:VF 1 "<round_nimm_predicate>")
1744 (match_operand:VF 2 "<round_nimm_predicate>")))]
1745 "TARGET_SSE && <mask_mode512bit_condition> && <round_mode512bit_condition>"
1746 "ix86_fixup_binary_operands_no_copy (MULT, <MODE>mode, operands);")
1747
1748 (define_insn "*mul<mode>3<mask_name><round_name>"
1749 [(set (match_operand:VF 0 "register_operand" "=x,v")
1750 (mult:VF
1751 (match_operand:VF 1 "<round_nimm_predicate>" "%0,v")
1752 (match_operand:VF 2 "<round_nimm_predicate>" "xBm,<round_constraint>")))]
1753 "TARGET_SSE
1754 && !(MEM_P (operands[1]) && MEM_P (operands[2]))
1755 && <mask_mode512bit_condition> && <round_mode512bit_condition>"
1756 "@
1757 mul<ssemodesuffix>\t{%2, %0|%0, %2}
1758 vmul<ssemodesuffix>\t{<round_mask_op3>%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2<round_mask_op3>}"
1759 [(set_attr "isa" "noavx,avx")
1760 (set_attr "type" "ssemul")
1761 (set_attr "prefix" "<mask_prefix3>")
1762 (set_attr "btver2_decode" "direct,double")
1763 (set_attr "mode" "<MODE>")])
1764
1765 (define_insn "*mul<mode>3<mask_name>_bcst"
1766 [(set (match_operand:VF_AVX512 0 "register_operand" "=v")
1767 (mult:VF_AVX512
1768 (vec_duplicate:VF_AVX512
1769 (match_operand:<ssescalarmode> 1 "memory_operand" "m"))
1770 (match_operand:VF_AVX512 2 "register_operand" "v")))]
1771 "TARGET_AVX512F && <mask_mode512bit_condition>"
1772 "vmul<ssemodesuffix>\t{%1<avx512bcst>, %2, %0<mask_operand3>|%0<mask_operand3>, %2, %1<<avx512bcst>>}"
1773 [(set_attr "prefix" "evex")
1774 (set_attr "type" "ssemul")
1775 (set_attr "mode" "<MODE>")])
1776
1777 (define_insn "<sse>_vm<multdiv_mnemonic><mode>3<mask_scalar_name><round_scalar_name>"
1778 [(set (match_operand:VF_128 0 "register_operand" "=x,v")
1779 (vec_merge:VF_128
1780 (multdiv:VF_128
1781 (match_operand:VF_128 1 "register_operand" "0,v")
1782 (match_operand:VF_128 2 "vector_operand" "xBm,<round_scalar_constraint>"))
1783 (match_dup 1)
1784 (const_int 1)))]
1785 "TARGET_SSE"
1786 "@
1787 <multdiv_mnemonic><ssescalarmodesuffix>\t{%2, %0|%0, %<iptr>2}
1788 v<multdiv_mnemonic><ssescalarmodesuffix>\t{<round_scalar_mask_op3>%2, %1, %0<mask_scalar_operand3>|%0<mask_scalar_operand3>, %1, %<iptr>2<round_scalar_mask_op3>}"
1789 [(set_attr "isa" "noavx,avx")
1790 (set_attr "type" "sse<multdiv_mnemonic>")
1791 (set_attr "prefix" "<round_scalar_prefix>")
1792 (set_attr "btver2_decode" "direct,double")
1793 (set_attr "mode" "<ssescalarmode>")])
1794
1795 (define_expand "div<mode>3"
1796 [(set (match_operand:VF2 0 "register_operand")
1797 (div:VF2 (match_operand:VF2 1 "register_operand")
1798 (match_operand:VF2 2 "vector_operand")))]
1799 "TARGET_SSE2"
1800 "ix86_fixup_binary_operands_no_copy (DIV, <MODE>mode, operands);")
1801
1802 (define_expand "div<mode>3"
1803 [(set (match_operand:VF1 0 "register_operand")
1804 (div:VF1 (match_operand:VF1 1 "register_operand")
1805 (match_operand:VF1 2 "vector_operand")))]
1806 "TARGET_SSE"
1807 {
1808 ix86_fixup_binary_operands_no_copy (DIV, <MODE>mode, operands);
1809
1810 if (TARGET_SSE_MATH
1811 && TARGET_RECIP_VEC_DIV
1812 && !optimize_insn_for_size_p ()
1813 && flag_finite_math_only && !flag_trapping_math
1814 && flag_unsafe_math_optimizations)
1815 {
1816 ix86_emit_swdivsf (operands[0], operands[1], operands[2], <MODE>mode);
1817 DONE;
1818 }
1819 })
1820
1821 (define_insn "<sse>_div<mode>3<mask_name><round_name>"
1822 [(set (match_operand:VF 0 "register_operand" "=x,v")
1823 (div:VF
1824 (match_operand:VF 1 "register_operand" "0,v")
1825 (match_operand:VF 2 "<round_nimm_predicate>" "xBm,<round_constraint>")))]
1826 "TARGET_SSE && <mask_mode512bit_condition> && <round_mode512bit_condition>"
1827 "@
1828 div<ssemodesuffix>\t{%2, %0|%0, %2}
1829 vdiv<ssemodesuffix>\t{<round_mask_op3>%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2<round_mask_op3>}"
1830 [(set_attr "isa" "noavx,avx")
1831 (set_attr "type" "ssediv")
1832 (set_attr "prefix" "<mask_prefix3>")
1833 (set_attr "mode" "<MODE>")])
1834
1835 (define_insn "*<avx512>_div<mode>3<mask_name>_bcst"
1836 [(set (match_operand:VF_AVX512 0 "register_operand" "=v")
1837 (div:VF_AVX512
1838 (match_operand:VF_AVX512 1 "register_operand" "v")
1839 (vec_duplicate:VF_AVX512
1840 (match_operand:<ssescalarmode> 2 "memory_operand" "m"))))]
1841 "TARGET_AVX512F && <mask_mode512bit_condition>"
1842 "vdiv<ssemodesuffix>\t{%2<avx512bcst>, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2<<avx512bcst>>}"
1843 [(set_attr "prefix" "evex")
1844 (set_attr "type" "ssediv")
1845 (set_attr "mode" "<MODE>")])
1846
1847 (define_insn "<sse>_rcp<mode>2"
1848 [(set (match_operand:VF1_128_256 0 "register_operand" "=x")
1849 (unspec:VF1_128_256
1850 [(match_operand:VF1_128_256 1 "vector_operand" "xBm")] UNSPEC_RCP))]
1851 "TARGET_SSE"
1852 "%vrcpps\t{%1, %0|%0, %1}"
1853 [(set_attr "type" "sse")
1854 (set_attr "atom_sse_attr" "rcp")
1855 (set_attr "btver2_sse_attr" "rcp")
1856 (set_attr "prefix" "maybe_vex")
1857 (set_attr "mode" "<MODE>")])
1858
1859 (define_insn "sse_vmrcpv4sf2"
1860 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
1861 (vec_merge:V4SF
1862 (unspec:V4SF [(match_operand:V4SF 1 "nonimmediate_operand" "xm,xm")]
1863 UNSPEC_RCP)
1864 (match_operand:V4SF 2 "register_operand" "0,x")
1865 (const_int 1)))]
1866 "TARGET_SSE"
1867 "@
1868 rcpss\t{%1, %0|%0, %k1}
1869 vrcpss\t{%1, %2, %0|%0, %2, %k1}"
1870 [(set_attr "isa" "noavx,avx")
1871 (set_attr "type" "sse")
1872 (set_attr "atom_sse_attr" "rcp")
1873 (set_attr "btver2_sse_attr" "rcp")
1874 (set_attr "prefix" "orig,vex")
1875 (set_attr "mode" "SF")])
1876
1877 (define_insn "<mask_codefor>rcp14<mode><mask_name>"
1878 [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v")
1879 (unspec:VF_AVX512VL
1880 [(match_operand:VF_AVX512VL 1 "nonimmediate_operand" "vm")]
1881 UNSPEC_RCP14))]
1882 "TARGET_AVX512F"
1883 "vrcp14<ssemodesuffix>\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
1884 [(set_attr "type" "sse")
1885 (set_attr "prefix" "evex")
1886 (set_attr "mode" "<MODE>")])
1887
1888 (define_insn "srcp14<mode>"
1889 [(set (match_operand:VF_128 0 "register_operand" "=v")
1890 (vec_merge:VF_128
1891 (unspec:VF_128
1892 [(match_operand:VF_128 1 "nonimmediate_operand" "vm")]
1893 UNSPEC_RCP14)
1894 (match_operand:VF_128 2 "register_operand" "v")
1895 (const_int 1)))]
1896 "TARGET_AVX512F"
1897 "vrcp14<ssescalarmodesuffix>\t{%1, %2, %0|%0, %2, %<iptr>1}"
1898 [(set_attr "type" "sse")
1899 (set_attr "prefix" "evex")
1900 (set_attr "mode" "<MODE>")])
1901
1902 (define_insn "srcp14<mode>_mask"
1903 [(set (match_operand:VF_128 0 "register_operand" "=v")
1904 (vec_merge:VF_128
1905 (vec_merge:VF_128
1906 (unspec:VF_128
1907 [(match_operand:VF_128 1 "nonimmediate_operand" "vm")]
1908 UNSPEC_RCP14)
1909 (match_operand:VF_128 3 "nonimm_or_0_operand" "0C")
1910 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk"))
1911 (match_operand:VF_128 2 "register_operand" "v")
1912 (const_int 1)))]
1913 "TARGET_AVX512F"
1914 "vrcp14<ssescalarmodesuffix>\t{%1, %2, %0%{%4%}%N3|%0%{%4%}%N3, %2, %<iptr>1}"
1915 [(set_attr "type" "sse")
1916 (set_attr "prefix" "evex")
1917 (set_attr "mode" "<MODE>")])
1918
1919 (define_expand "sqrt<mode>2"
1920 [(set (match_operand:VF2 0 "register_operand")
1921 (sqrt:VF2 (match_operand:VF2 1 "vector_operand")))]
1922 "TARGET_SSE2")
1923
1924 (define_expand "sqrt<mode>2"
1925 [(set (match_operand:VF1 0 "register_operand")
1926 (sqrt:VF1 (match_operand:VF1 1 "vector_operand")))]
1927 "TARGET_SSE"
1928 {
1929 if (TARGET_SSE_MATH
1930 && TARGET_RECIP_VEC_SQRT
1931 && !optimize_insn_for_size_p ()
1932 && flag_finite_math_only && !flag_trapping_math
1933 && flag_unsafe_math_optimizations)
1934 {
1935 ix86_emit_swsqrtsf (operands[0], operands[1], <MODE>mode, false);
1936 DONE;
1937 }
1938 })
1939
1940 (define_insn "<sse>_sqrt<mode>2<mask_name><round_name>"
1941 [(set (match_operand:VF 0 "register_operand" "=x,v")
1942 (sqrt:VF (match_operand:VF 1 "<round_nimm_predicate>" "xBm,<round_constraint>")))]
1943 "TARGET_SSE && <mask_mode512bit_condition> && <round_mode512bit_condition>"
1944 "@
1945 sqrt<ssemodesuffix>\t{%1, %0|%0, %1}
1946 vsqrt<ssemodesuffix>\t{<round_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_mask_op2>}"
1947 [(set_attr "isa" "noavx,avx")
1948 (set_attr "type" "sse")
1949 (set_attr "atom_sse_attr" "sqrt")
1950 (set_attr "btver2_sse_attr" "sqrt")
1951 (set_attr "prefix" "maybe_vex")
1952 (set_attr "mode" "<MODE>")])
1953
1954 (define_insn "<sse>_vmsqrt<mode>2<mask_scalar_name><round_scalar_name>"
1955 [(set (match_operand:VF_128 0 "register_operand" "=x,v")
1956 (vec_merge:VF_128
1957 (sqrt:VF_128
1958 (match_operand:VF_128 1 "vector_operand" "xBm,<round_scalar_constraint>"))
1959 (match_operand:VF_128 2 "register_operand" "0,v")
1960 (const_int 1)))]
1961 "TARGET_SSE"
1962 "@
1963 sqrt<ssescalarmodesuffix>\t{%1, %0|%0, %<iptr>1}
1964 vsqrt<ssescalarmodesuffix>\t{<round_scalar_mask_op3>%1, %2, %0<mask_scalar_operand3>|%0<mask_scalar_operand3>, %2, %<iptr>1<round_scalar_mask_op3>}"
1965 [(set_attr "isa" "noavx,avx")
1966 (set_attr "type" "sse")
1967 (set_attr "atom_sse_attr" "sqrt")
1968 (set_attr "prefix" "<round_scalar_prefix>")
1969 (set_attr "btver2_sse_attr" "sqrt")
1970 (set_attr "mode" "<ssescalarmode>")])
1971
1972 (define_expand "rsqrt<mode>2"
1973 [(set (match_operand:VF1_128_256 0 "register_operand")
1974 (unspec:VF1_128_256
1975 [(match_operand:VF1_128_256 1 "vector_operand")] UNSPEC_RSQRT))]
1976 "TARGET_SSE_MATH"
1977 {
1978 ix86_emit_swsqrtsf (operands[0], operands[1], <MODE>mode, true);
1979 DONE;
1980 })
1981
1982 (define_expand "rsqrtv16sf2"
1983 [(set (match_operand:V16SF 0 "register_operand")
1984 (unspec:V16SF
1985 [(match_operand:V16SF 1 "vector_operand")]
1986 UNSPEC_RSQRT28))]
1987 "TARGET_SSE_MATH && TARGET_AVX512ER"
1988 {
1989 ix86_emit_swsqrtsf (operands[0], operands[1], V16SFmode, true);
1990 DONE;
1991 })
1992
1993 (define_insn "<sse>_rsqrt<mode>2"
1994 [(set (match_operand:VF1_128_256 0 "register_operand" "=x")
1995 (unspec:VF1_128_256
1996 [(match_operand:VF1_128_256 1 "vector_operand" "xBm")] UNSPEC_RSQRT))]
1997 "TARGET_SSE"
1998 "%vrsqrtps\t{%1, %0|%0, %1}"
1999 [(set_attr "type" "sse")
2000 (set_attr "prefix" "maybe_vex")
2001 (set_attr "mode" "<MODE>")])
2002
2003 (define_insn "<mask_codefor>rsqrt14<mode><mask_name>"
2004 [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v")
2005 (unspec:VF_AVX512VL
2006 [(match_operand:VF_AVX512VL 1 "nonimmediate_operand" "vm")]
2007 UNSPEC_RSQRT14))]
2008 "TARGET_AVX512F"
2009 "vrsqrt14<ssemodesuffix>\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
2010 [(set_attr "type" "sse")
2011 (set_attr "prefix" "evex")
2012 (set_attr "mode" "<MODE>")])
2013
2014 (define_insn "rsqrt14<mode>"
2015 [(set (match_operand:VF_128 0 "register_operand" "=v")
2016 (vec_merge:VF_128
2017 (unspec:VF_128
2018 [(match_operand:VF_128 1 "nonimmediate_operand" "vm")]
2019 UNSPEC_RSQRT14)
2020 (match_operand:VF_128 2 "register_operand" "v")
2021 (const_int 1)))]
2022 "TARGET_AVX512F"
2023 "vrsqrt14<ssescalarmodesuffix>\t{%1, %2, %0|%0, %2, %<iptr>1}"
2024 [(set_attr "type" "sse")
2025 (set_attr "prefix" "evex")
2026 (set_attr "mode" "<MODE>")])
2027
2028 (define_insn "rsqrt14_<mode>_mask"
2029 [(set (match_operand:VF_128 0 "register_operand" "=v")
2030 (vec_merge:VF_128
2031 (vec_merge:VF_128
2032 (unspec:VF_128
2033 [(match_operand:VF_128 1 "nonimmediate_operand" "vm")]
2034 UNSPEC_RSQRT14)
2035 (match_operand:VF_128 3 "nonimm_or_0_operand" "0C")
2036 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk"))
2037 (match_operand:VF_128 2 "register_operand" "v")
2038 (const_int 1)))]
2039 "TARGET_AVX512F"
2040 "vrsqrt14<ssescalarmodesuffix>\t{%1, %2, %0%{%4%}%N3|%0%{%4%}%N3, %2, %<iptr>1}"
2041 [(set_attr "type" "sse")
2042 (set_attr "prefix" "evex")
2043 (set_attr "mode" "<MODE>")])
2044
2045 (define_insn "sse_vmrsqrtv4sf2"
2046 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
2047 (vec_merge:V4SF
2048 (unspec:V4SF [(match_operand:V4SF 1 "nonimmediate_operand" "xm,xm")]
2049 UNSPEC_RSQRT)
2050 (match_operand:V4SF 2 "register_operand" "0,x")
2051 (const_int 1)))]
2052 "TARGET_SSE"
2053 "@
2054 rsqrtss\t{%1, %0|%0, %k1}
2055 vrsqrtss\t{%1, %2, %0|%0, %2, %k1}"
2056 [(set_attr "isa" "noavx,avx")
2057 (set_attr "type" "sse")
2058 (set_attr "prefix" "orig,vex")
2059 (set_attr "mode" "SF")])
2060
2061 (define_expand "<code><mode>3<mask_name><round_saeonly_name>"
2062 [(set (match_operand:VF 0 "register_operand")
2063 (smaxmin:VF
2064 (match_operand:VF 1 "<round_saeonly_nimm_predicate>")
2065 (match_operand:VF 2 "<round_saeonly_nimm_predicate>")))]
2066 "TARGET_SSE && <mask_mode512bit_condition> && <round_saeonly_mode512bit_condition>"
2067 {
2068 if (!flag_finite_math_only || flag_signed_zeros)
2069 {
2070 operands[1] = force_reg (<MODE>mode, operands[1]);
2071 emit_insn (gen_ieee_<maxmin_float><mode>3<mask_name><round_saeonly_name>
2072 (operands[0], operands[1], operands[2]
2073 <mask_operand_arg34>
2074 <round_saeonly_mask_arg3>));
2075 DONE;
2076 }
2077 else
2078 ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);
2079 })
2080
2081 ;; These versions of the min/max patterns are intentionally ignorant of
2082 ;; their behavior wrt -0.0 and NaN (via the commutative operand mark).
2083 ;; Since both the tree-level MAX_EXPR and the rtl-level SMAX operator
2084 ;; are undefined in this condition, we're certain this is correct.
2085
2086 (define_insn "*<code><mode>3<mask_name><round_saeonly_name>"
2087 [(set (match_operand:VF 0 "register_operand" "=x,v")
2088 (smaxmin:VF
2089 (match_operand:VF 1 "<round_saeonly_nimm_predicate>" "%0,v")
2090 (match_operand:VF 2 "<round_saeonly_nimm_predicate>" "xBm,<round_saeonly_constraint>")))]
2091 "TARGET_SSE
2092 && !(MEM_P (operands[1]) && MEM_P (operands[2]))
2093 && <mask_mode512bit_condition> && <round_saeonly_mode512bit_condition>"
2094 "@
2095 <maxmin_float><ssemodesuffix>\t{%2, %0|%0, %2}
2096 v<maxmin_float><ssemodesuffix>\t{<round_saeonly_mask_op3>%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2<round_saeonly_mask_op3>}"
2097 [(set_attr "isa" "noavx,avx")
2098 (set_attr "type" "sseadd")
2099 (set_attr "btver2_sse_attr" "maxmin")
2100 (set_attr "prefix" "<mask_prefix3>")
2101 (set_attr "mode" "<MODE>")])
2102
2103 ;; These versions of the min/max patterns implement exactly the operations
2104 ;; min = (op1 < op2 ? op1 : op2)
2105 ;; max = (!(op1 < op2) ? op1 : op2)
2106 ;; Their operands are not commutative, and thus they may be used in the
2107 ;; presence of -0.0 and NaN.
2108
2109 (define_insn "ieee_<ieee_maxmin><mode>3<mask_name><round_saeonly_name>"
2110 [(set (match_operand:VF 0 "register_operand" "=x,v")
2111 (unspec:VF
2112 [(match_operand:VF 1 "register_operand" "0,v")
2113 (match_operand:VF 2 "<round_saeonly_nimm_predicate>" "xBm,<round_saeonly_constraint>")]
2114 IEEE_MAXMIN))]
2115 "TARGET_SSE
2116 && <mask_mode512bit_condition> && <round_saeonly_mode512bit_condition>"
2117 "@
2118 <ieee_maxmin><ssemodesuffix>\t{%2, %0|%0, %2}
2119 v<ieee_maxmin><ssemodesuffix>\t{<round_saeonly_mask_op3>%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2<round_saeonly_mask_op3>}"
2120 [(set_attr "isa" "noavx,avx")
2121 (set_attr "type" "sseadd")
2122 (set_attr "btver2_sse_attr" "maxmin")
2123 (set_attr "prefix" "<mask_prefix3>")
2124 (set_attr "mode" "<MODE>")])
2125
2126 (define_insn "<sse>_vm<code><mode>3<mask_scalar_name><round_saeonly_scalar_name>"
2127 [(set (match_operand:VF_128 0 "register_operand" "=x,v")
2128 (vec_merge:VF_128
2129 (smaxmin:VF_128
2130 (match_operand:VF_128 1 "register_operand" "0,v")
2131 (match_operand:VF_128 2 "vector_operand" "xBm,<round_saeonly_scalar_constraint>"))
2132 (match_dup 1)
2133 (const_int 1)))]
2134 "TARGET_SSE"
2135 "@
2136 <maxmin_float><ssescalarmodesuffix>\t{%2, %0|%0, %<iptr>2}
2137 v<maxmin_float><ssescalarmodesuffix>\t{<round_saeonly_scalar_mask_op3>%2, %1, %0<mask_scalar_operand3>|%0<mask_scalar_operand3>, %1, %<iptr>2<round_saeonly_scalar_mask_op3>}"
2138 [(set_attr "isa" "noavx,avx")
2139 (set_attr "type" "sse")
2140 (set_attr "btver2_sse_attr" "maxmin")
2141 (set_attr "prefix" "<round_saeonly_scalar_prefix>")
2142 (set_attr "mode" "<ssescalarmode>")])
2143
2144 (define_insn "avx_addsubv4df3"
2145 [(set (match_operand:V4DF 0 "register_operand" "=x")
2146 (vec_merge:V4DF
2147 (minus:V4DF
2148 (match_operand:V4DF 1 "register_operand" "x")
2149 (match_operand:V4DF 2 "nonimmediate_operand" "xm"))
2150 (plus:V4DF (match_dup 1) (match_dup 2))
2151 (const_int 5)))]
2152 "TARGET_AVX"
2153 "vaddsubpd\t{%2, %1, %0|%0, %1, %2}"
2154 [(set_attr "type" "sseadd")
2155 (set_attr "prefix" "vex")
2156 (set_attr "mode" "V4DF")])
2157
2158 (define_insn "sse3_addsubv2df3"
2159 [(set (match_operand:V2DF 0 "register_operand" "=x,x")
2160 (vec_merge:V2DF
2161 (minus:V2DF
2162 (match_operand:V2DF 1 "register_operand" "0,x")
2163 (match_operand:V2DF 2 "vector_operand" "xBm,xm"))
2164 (plus:V2DF (match_dup 1) (match_dup 2))
2165 (const_int 1)))]
2166 "TARGET_SSE3"
2167 "@
2168 addsubpd\t{%2, %0|%0, %2}
2169 vaddsubpd\t{%2, %1, %0|%0, %1, %2}"
2170 [(set_attr "isa" "noavx,avx")
2171 (set_attr "type" "sseadd")
2172 (set_attr "atom_unit" "complex")
2173 (set_attr "prefix" "orig,vex")
2174 (set_attr "mode" "V2DF")])
2175
2176 (define_insn "avx_addsubv8sf3"
2177 [(set (match_operand:V8SF 0 "register_operand" "=x")
2178 (vec_merge:V8SF
2179 (minus:V8SF
2180 (match_operand:V8SF 1 "register_operand" "x")
2181 (match_operand:V8SF 2 "nonimmediate_operand" "xm"))
2182 (plus:V8SF (match_dup 1) (match_dup 2))
2183 (const_int 85)))]
2184 "TARGET_AVX"
2185 "vaddsubps\t{%2, %1, %0|%0, %1, %2}"
2186 [(set_attr "type" "sseadd")
2187 (set_attr "prefix" "vex")
2188 (set_attr "mode" "V8SF")])
2189
2190 (define_insn "sse3_addsubv4sf3"
2191 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
2192 (vec_merge:V4SF
2193 (minus:V4SF
2194 (match_operand:V4SF 1 "register_operand" "0,x")
2195 (match_operand:V4SF 2 "vector_operand" "xBm,xm"))
2196 (plus:V4SF (match_dup 1) (match_dup 2))
2197 (const_int 5)))]
2198 "TARGET_SSE3"
2199 "@
2200 addsubps\t{%2, %0|%0, %2}
2201 vaddsubps\t{%2, %1, %0|%0, %1, %2}"
2202 [(set_attr "isa" "noavx,avx")
2203 (set_attr "type" "sseadd")
2204 (set_attr "prefix" "orig,vex")
2205 (set_attr "prefix_rep" "1,*")
2206 (set_attr "mode" "V4SF")])
2207
2208 (define_split
2209 [(set (match_operand:VF_128_256 0 "register_operand")
2210 (match_operator:VF_128_256 6 "addsub_vm_operator"
2211 [(minus:VF_128_256
2212 (match_operand:VF_128_256 1 "register_operand")
2213 (match_operand:VF_128_256 2 "vector_operand"))
2214 (plus:VF_128_256
2215 (match_operand:VF_128_256 3 "vector_operand")
2216 (match_operand:VF_128_256 4 "vector_operand"))
2217 (match_operand 5 "const_int_operand")]))]
2218 "TARGET_SSE3
2219 && can_create_pseudo_p ()
2220 && ((rtx_equal_p (operands[1], operands[3])
2221 && rtx_equal_p (operands[2], operands[4]))
2222 || (rtx_equal_p (operands[1], operands[4])
2223 && rtx_equal_p (operands[2], operands[3])))"
2224 [(set (match_dup 0)
2225 (vec_merge:VF_128_256
2226 (minus:VF_128_256 (match_dup 1) (match_dup 2))
2227 (plus:VF_128_256 (match_dup 1) (match_dup 2))
2228 (match_dup 5)))])
2229
2230 (define_split
2231 [(set (match_operand:VF_128_256 0 "register_operand")
2232 (match_operator:VF_128_256 6 "addsub_vm_operator"
2233 [(plus:VF_128_256
2234 (match_operand:VF_128_256 1 "vector_operand")
2235 (match_operand:VF_128_256 2 "vector_operand"))
2236 (minus:VF_128_256
2237 (match_operand:VF_128_256 3 "register_operand")
2238 (match_operand:VF_128_256 4 "vector_operand"))
2239 (match_operand 5 "const_int_operand")]))]
2240 "TARGET_SSE3
2241 && can_create_pseudo_p ()
2242 && ((rtx_equal_p (operands[1], operands[3])
2243 && rtx_equal_p (operands[2], operands[4]))
2244 || (rtx_equal_p (operands[1], operands[4])
2245 && rtx_equal_p (operands[2], operands[3])))"
2246 [(set (match_dup 0)
2247 (vec_merge:VF_128_256
2248 (minus:VF_128_256 (match_dup 3) (match_dup 4))
2249 (plus:VF_128_256 (match_dup 3) (match_dup 4))
2250 (match_dup 5)))]
2251 {
2252 /* Negate mask bits to compensate for swapped PLUS and MINUS RTXes. */
2253 operands[5]
2254 = GEN_INT (~INTVAL (operands[5])
2255 & ((HOST_WIDE_INT_1U << GET_MODE_NUNITS (<MODE>mode)) - 1));
2256 })
2257
2258 (define_split
2259 [(set (match_operand:VF_128_256 0 "register_operand")
2260 (match_operator:VF_128_256 7 "addsub_vs_operator"
2261 [(vec_concat:<ssedoublemode>
2262 (minus:VF_128_256
2263 (match_operand:VF_128_256 1 "register_operand")
2264 (match_operand:VF_128_256 2 "vector_operand"))
2265 (plus:VF_128_256
2266 (match_operand:VF_128_256 3 "vector_operand")
2267 (match_operand:VF_128_256 4 "vector_operand")))
2268 (match_parallel 5 "addsub_vs_parallel"
2269 [(match_operand 6 "const_int_operand")])]))]
2270 "TARGET_SSE3
2271 && can_create_pseudo_p ()
2272 && ((rtx_equal_p (operands[1], operands[3])
2273 && rtx_equal_p (operands[2], operands[4]))
2274 || (rtx_equal_p (operands[1], operands[4])
2275 && rtx_equal_p (operands[2], operands[3])))"
2276 [(set (match_dup 0)
2277 (vec_merge:VF_128_256
2278 (minus:VF_128_256 (match_dup 1) (match_dup 2))
2279 (plus:VF_128_256 (match_dup 1) (match_dup 2))
2280 (match_dup 5)))]
2281 {
2282 int i, nelt = XVECLEN (operands[5], 0);
2283 HOST_WIDE_INT ival = 0;
2284
2285 for (i = 0; i < nelt; i++)
2286 if (INTVAL (XVECEXP (operands[5], 0, i)) < GET_MODE_NUNITS (<MODE>mode))
2287 ival |= HOST_WIDE_INT_1 << i;
2288
2289 operands[5] = GEN_INT (ival);
2290 })
2291
2292 (define_split
2293 [(set (match_operand:VF_128_256 0 "register_operand")
2294 (match_operator:VF_128_256 7 "addsub_vs_operator"
2295 [(vec_concat:<ssedoublemode>
2296 (plus:VF_128_256
2297 (match_operand:VF_128_256 1 "vector_operand")
2298 (match_operand:VF_128_256 2 "vector_operand"))
2299 (minus:VF_128_256
2300 (match_operand:VF_128_256 3 "register_operand")
2301 (match_operand:VF_128_256 4 "vector_operand")))
2302 (match_parallel 5 "addsub_vs_parallel"
2303 [(match_operand 6 "const_int_operand")])]))]
2304 "TARGET_SSE3
2305 && can_create_pseudo_p ()
2306 && ((rtx_equal_p (operands[1], operands[3])
2307 && rtx_equal_p (operands[2], operands[4]))
2308 || (rtx_equal_p (operands[1], operands[4])
2309 && rtx_equal_p (operands[2], operands[3])))"
2310 [(set (match_dup 0)
2311 (vec_merge:VF_128_256
2312 (minus:VF_128_256 (match_dup 3) (match_dup 4))
2313 (plus:VF_128_256 (match_dup 3) (match_dup 4))
2314 (match_dup 5)))]
2315 {
2316 int i, nelt = XVECLEN (operands[5], 0);
2317 HOST_WIDE_INT ival = 0;
2318
2319 for (i = 0; i < nelt; i++)
2320 if (INTVAL (XVECEXP (operands[5], 0, i)) >= GET_MODE_NUNITS (<MODE>mode))
2321 ival |= HOST_WIDE_INT_1 << i;
2322
2323 operands[5] = GEN_INT (ival);
2324 })
2325
2326 (define_insn "avx_h<plusminus_insn>v4df3"
2327 [(set (match_operand:V4DF 0 "register_operand" "=x")
2328 (vec_concat:V4DF
2329 (vec_concat:V2DF
2330 (plusminus:DF
2331 (vec_select:DF
2332 (match_operand:V4DF 1 "register_operand" "x")
2333 (parallel [(const_int 0)]))
2334 (vec_select:DF (match_dup 1) (parallel [(const_int 1)])))
2335 (plusminus:DF
2336 (vec_select:DF
2337 (match_operand:V4DF 2 "nonimmediate_operand" "xm")
2338 (parallel [(const_int 0)]))
2339 (vec_select:DF (match_dup 2) (parallel [(const_int 1)]))))
2340 (vec_concat:V2DF
2341 (plusminus:DF
2342 (vec_select:DF (match_dup 1) (parallel [(const_int 2)]))
2343 (vec_select:DF (match_dup 1) (parallel [(const_int 3)])))
2344 (plusminus:DF
2345 (vec_select:DF (match_dup 2) (parallel [(const_int 2)]))
2346 (vec_select:DF (match_dup 2) (parallel [(const_int 3)]))))))]
2347 "TARGET_AVX"
2348 "vh<plusminus_mnemonic>pd\t{%2, %1, %0|%0, %1, %2}"
2349 [(set_attr "type" "sseadd")
2350 (set_attr "prefix" "vex")
2351 (set_attr "mode" "V4DF")])
2352
2353 (define_expand "sse3_haddv2df3"
2354 [(set (match_operand:V2DF 0 "register_operand")
2355 (vec_concat:V2DF
2356 (plus:DF
2357 (vec_select:DF
2358 (match_operand:V2DF 1 "register_operand")
2359 (parallel [(const_int 0)]))
2360 (vec_select:DF (match_dup 1) (parallel [(const_int 1)])))
2361 (plus:DF
2362 (vec_select:DF
2363 (match_operand:V2DF 2 "vector_operand")
2364 (parallel [(const_int 0)]))
2365 (vec_select:DF (match_dup 2) (parallel [(const_int 1)])))))]
2366 "TARGET_SSE3")
2367
2368 (define_insn "*sse3_haddv2df3"
2369 [(set (match_operand:V2DF 0 "register_operand" "=x,x")
2370 (vec_concat:V2DF
2371 (plus:DF
2372 (vec_select:DF
2373 (match_operand:V2DF 1 "register_operand" "0,x")
2374 (parallel [(match_operand:SI 3 "const_0_to_1_operand")]))
2375 (vec_select:DF
2376 (match_dup 1)
2377 (parallel [(match_operand:SI 4 "const_0_to_1_operand")])))
2378 (plus:DF
2379 (vec_select:DF
2380 (match_operand:V2DF 2 "vector_operand" "xBm,xm")
2381 (parallel [(match_operand:SI 5 "const_0_to_1_operand")]))
2382 (vec_select:DF
2383 (match_dup 2)
2384 (parallel [(match_operand:SI 6 "const_0_to_1_operand")])))))]
2385 "TARGET_SSE3
2386 && INTVAL (operands[3]) != INTVAL (operands[4])
2387 && INTVAL (operands[5]) != INTVAL (operands[6])"
2388 "@
2389 haddpd\t{%2, %0|%0, %2}
2390 vhaddpd\t{%2, %1, %0|%0, %1, %2}"
2391 [(set_attr "isa" "noavx,avx")
2392 (set_attr "type" "sseadd")
2393 (set_attr "prefix" "orig,vex")
2394 (set_attr "mode" "V2DF")])
2395
2396 (define_insn "sse3_hsubv2df3"
2397 [(set (match_operand:V2DF 0 "register_operand" "=x,x")
2398 (vec_concat:V2DF
2399 (minus:DF
2400 (vec_select:DF
2401 (match_operand:V2DF 1 "register_operand" "0,x")
2402 (parallel [(const_int 0)]))
2403 (vec_select:DF (match_dup 1) (parallel [(const_int 1)])))
2404 (minus:DF
2405 (vec_select:DF
2406 (match_operand:V2DF 2 "vector_operand" "xBm,xm")
2407 (parallel [(const_int 0)]))
2408 (vec_select:DF (match_dup 2) (parallel [(const_int 1)])))))]
2409 "TARGET_SSE3"
2410 "@
2411 hsubpd\t{%2, %0|%0, %2}
2412 vhsubpd\t{%2, %1, %0|%0, %1, %2}"
2413 [(set_attr "isa" "noavx,avx")
2414 (set_attr "type" "sseadd")
2415 (set_attr "prefix" "orig,vex")
2416 (set_attr "mode" "V2DF")])
2417
2418 (define_insn "*sse3_haddv2df3_low"
2419 [(set (match_operand:DF 0 "register_operand" "=x,x")
2420 (plus:DF
2421 (vec_select:DF
2422 (match_operand:V2DF 1 "register_operand" "0,x")
2423 (parallel [(match_operand:SI 2 "const_0_to_1_operand")]))
2424 (vec_select:DF
2425 (match_dup 1)
2426 (parallel [(match_operand:SI 3 "const_0_to_1_operand")]))))]
2427 "TARGET_SSE3
2428 && INTVAL (operands[2]) != INTVAL (operands[3])"
2429 "@
2430 haddpd\t{%0, %0|%0, %0}
2431 vhaddpd\t{%1, %1, %0|%0, %1, %1}"
2432 [(set_attr "isa" "noavx,avx")
2433 (set_attr "type" "sseadd1")
2434 (set_attr "prefix" "orig,vex")
2435 (set_attr "mode" "V2DF")])
2436
2437 (define_insn "*sse3_hsubv2df3_low"
2438 [(set (match_operand:DF 0 "register_operand" "=x,x")
2439 (minus:DF
2440 (vec_select:DF
2441 (match_operand:V2DF 1 "register_operand" "0,x")
2442 (parallel [(const_int 0)]))
2443 (vec_select:DF
2444 (match_dup 1)
2445 (parallel [(const_int 1)]))))]
2446 "TARGET_SSE3"
2447 "@
2448 hsubpd\t{%0, %0|%0, %0}
2449 vhsubpd\t{%1, %1, %0|%0, %1, %1}"
2450 [(set_attr "isa" "noavx,avx")
2451 (set_attr "type" "sseadd1")
2452 (set_attr "prefix" "orig,vex")
2453 (set_attr "mode" "V2DF")])
2454
2455 (define_insn "avx_h<plusminus_insn>v8sf3"
2456 [(set (match_operand:V8SF 0 "register_operand" "=x")
2457 (vec_concat:V8SF
2458 (vec_concat:V4SF
2459 (vec_concat:V2SF
2460 (plusminus:SF
2461 (vec_select:SF
2462 (match_operand:V8SF 1 "register_operand" "x")
2463 (parallel [(const_int 0)]))
2464 (vec_select:SF (match_dup 1) (parallel [(const_int 1)])))
2465 (plusminus:SF
2466 (vec_select:SF (match_dup 1) (parallel [(const_int 2)]))
2467 (vec_select:SF (match_dup 1) (parallel [(const_int 3)]))))
2468 (vec_concat:V2SF
2469 (plusminus:SF
2470 (vec_select:SF
2471 (match_operand:V8SF 2 "nonimmediate_operand" "xm")
2472 (parallel [(const_int 0)]))
2473 (vec_select:SF (match_dup 2) (parallel [(const_int 1)])))
2474 (plusminus:SF
2475 (vec_select:SF (match_dup 2) (parallel [(const_int 2)]))
2476 (vec_select:SF (match_dup 2) (parallel [(const_int 3)])))))
2477 (vec_concat:V4SF
2478 (vec_concat:V2SF
2479 (plusminus:SF
2480 (vec_select:SF (match_dup 1) (parallel [(const_int 4)]))
2481 (vec_select:SF (match_dup 1) (parallel [(const_int 5)])))
2482 (plusminus:SF
2483 (vec_select:SF (match_dup 1) (parallel [(const_int 6)]))
2484 (vec_select:SF (match_dup 1) (parallel [(const_int 7)]))))
2485 (vec_concat:V2SF
2486 (plusminus:SF
2487 (vec_select:SF (match_dup 2) (parallel [(const_int 4)]))
2488 (vec_select:SF (match_dup 2) (parallel [(const_int 5)])))
2489 (plusminus:SF
2490 (vec_select:SF (match_dup 2) (parallel [(const_int 6)]))
2491 (vec_select:SF (match_dup 2) (parallel [(const_int 7)])))))))]
2492 "TARGET_AVX"
2493 "vh<plusminus_mnemonic>ps\t{%2, %1, %0|%0, %1, %2}"
2494 [(set_attr "type" "sseadd")
2495 (set_attr "prefix" "vex")
2496 (set_attr "mode" "V8SF")])
2497
2498 (define_insn "sse3_h<plusminus_insn>v4sf3"
2499 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
2500 (vec_concat:V4SF
2501 (vec_concat:V2SF
2502 (plusminus:SF
2503 (vec_select:SF
2504 (match_operand:V4SF 1 "register_operand" "0,x")
2505 (parallel [(const_int 0)]))
2506 (vec_select:SF (match_dup 1) (parallel [(const_int 1)])))
2507 (plusminus:SF
2508 (vec_select:SF (match_dup 1) (parallel [(const_int 2)]))
2509 (vec_select:SF (match_dup 1) (parallel [(const_int 3)]))))
2510 (vec_concat:V2SF
2511 (plusminus:SF
2512 (vec_select:SF
2513 (match_operand:V4SF 2 "vector_operand" "xBm,xm")
2514 (parallel [(const_int 0)]))
2515 (vec_select:SF (match_dup 2) (parallel [(const_int 1)])))
2516 (plusminus:SF
2517 (vec_select:SF (match_dup 2) (parallel [(const_int 2)]))
2518 (vec_select:SF (match_dup 2) (parallel [(const_int 3)]))))))]
2519 "TARGET_SSE3"
2520 "@
2521 h<plusminus_mnemonic>ps\t{%2, %0|%0, %2}
2522 vh<plusminus_mnemonic>ps\t{%2, %1, %0|%0, %1, %2}"
2523 [(set_attr "isa" "noavx,avx")
2524 (set_attr "type" "sseadd")
2525 (set_attr "atom_unit" "complex")
2526 (set_attr "prefix" "orig,vex")
2527 (set_attr "prefix_rep" "1,*")
2528 (set_attr "mode" "V4SF")])
2529
2530 (define_mode_iterator REDUC_SSE_PLUS_MODE
2531 [(V2DF "TARGET_SSE") (V4SF "TARGET_SSE")])
2532
2533 (define_expand "reduc_plus_scal_<mode>"
2534 [(plus:REDUC_SSE_PLUS_MODE
2535 (match_operand:<ssescalarmode> 0 "register_operand")
2536 (match_operand:REDUC_SSE_PLUS_MODE 1 "register_operand"))]
2537 ""
2538 {
2539 rtx tmp = gen_reg_rtx (<MODE>mode);
2540 ix86_expand_reduc (gen_add<mode>3, tmp, operands[1]);
2541 emit_insn (gen_vec_extract<mode><ssescalarmodelower> (operands[0], tmp,
2542 const0_rtx));
2543 DONE;
2544 })
2545
2546 (define_mode_iterator REDUC_PLUS_MODE
2547 [(V4DF "TARGET_AVX") (V8SF "TARGET_AVX")
2548 (V8DF "TARGET_AVX512F") (V16SF "TARGET_AVX512F")])
2549
2550 (define_expand "reduc_plus_scal_<mode>"
2551 [(plus:REDUC_PLUS_MODE
2552 (match_operand:<ssescalarmode> 0 "register_operand")
2553 (match_operand:REDUC_PLUS_MODE 1 "register_operand"))]
2554 ""
2555 {
2556 rtx tmp = gen_reg_rtx (<ssehalfvecmode>mode);
2557 emit_insn (gen_vec_extract_hi_<mode> (tmp, operands[1]));
2558 rtx tmp2 = gen_reg_rtx (<ssehalfvecmode>mode);
2559 emit_insn (gen_add<ssehalfvecmodelower>3
2560 (tmp2, tmp, gen_lowpart (<ssehalfvecmode>mode, operands[1])));
2561 emit_insn (gen_reduc_plus_scal_<ssehalfvecmodelower> (operands[0], tmp2));
2562 DONE;
2563 })
2564
2565 ;; Modes handled by reduc_sm{in,ax}* patterns.
2566 (define_mode_iterator REDUC_SSE_SMINMAX_MODE
2567 [(V4SF "TARGET_SSE") (V2DF "TARGET_SSE")
2568 (V2DI "TARGET_SSE") (V4SI "TARGET_SSE") (V8HI "TARGET_SSE")
2569 (V16QI "TARGET_SSE")])
2570
2571 (define_expand "reduc_<code>_scal_<mode>"
2572 [(smaxmin:REDUC_SSE_SMINMAX_MODE
2573 (match_operand:<ssescalarmode> 0 "register_operand")
2574 (match_operand:REDUC_SSE_SMINMAX_MODE 1 "register_operand"))]
2575 ""
2576 {
2577 rtx tmp = gen_reg_rtx (<MODE>mode);
2578 ix86_expand_reduc (gen_<code><mode>3, tmp, operands[1]);
2579 emit_insn (gen_vec_extract<mode><ssescalarmodelower> (operands[0], tmp,
2580 const0_rtx));
2581 DONE;
2582 })
2583
2584 (define_mode_iterator REDUC_SMINMAX_MODE
2585 [(V32QI "TARGET_AVX2") (V16HI "TARGET_AVX2")
2586 (V8SI "TARGET_AVX2") (V4DI "TARGET_AVX2")
2587 (V8SF "TARGET_AVX") (V4DF "TARGET_AVX")
2588 (V64QI "TARGET_AVX512BW")
2589 (V32HI "TARGET_AVX512BW") (V16SI "TARGET_AVX512F")
2590 (V8DI "TARGET_AVX512F") (V16SF "TARGET_AVX512F")
2591 (V8DF "TARGET_AVX512F")])
2592
2593 (define_expand "reduc_<code>_scal_<mode>"
2594 [(smaxmin:REDUC_SMINMAX_MODE
2595 (match_operand:<ssescalarmode> 0 "register_operand")
2596 (match_operand:REDUC_SMINMAX_MODE 1 "register_operand"))]
2597 ""
2598 {
2599 rtx tmp = gen_reg_rtx (<ssehalfvecmode>mode);
2600 emit_insn (gen_vec_extract_hi_<mode> (tmp, operands[1]));
2601 rtx tmp2 = gen_reg_rtx (<ssehalfvecmode>mode);
2602 emit_insn (gen_<code><ssehalfvecmodelower>3
2603 (tmp2, tmp, gen_lowpart (<ssehalfvecmode>mode, operands[1])));
2604 emit_insn (gen_reduc_<code>_scal_<ssehalfvecmodelower> (operands[0], tmp2));
2605 DONE;
2606 })
2607
2608 (define_expand "reduc_<code>_scal_<mode>"
2609 [(umaxmin:VI_AVX512BW
2610 (match_operand:<ssescalarmode> 0 "register_operand")
2611 (match_operand:VI_AVX512BW 1 "register_operand"))]
2612 "TARGET_AVX512F"
2613 {
2614 rtx tmp = gen_reg_rtx (<ssehalfvecmode>mode);
2615 emit_insn (gen_vec_extract_hi_<mode> (tmp, operands[1]));
2616 rtx tmp2 = gen_reg_rtx (<ssehalfvecmode>mode);
2617 emit_insn (gen_<code><ssehalfvecmodelower>3
2618 (tmp2, tmp, gen_lowpart (<ssehalfvecmode>mode, operands[1])));
2619 emit_insn (gen_reduc_<code>_scal_<ssehalfvecmodelower> (operands[0], tmp2));
2620 DONE;
2621 })
2622
2623 (define_expand "reduc_<code>_scal_<mode>"
2624 [(umaxmin:VI_256
2625 (match_operand:<ssescalarmode> 0 "register_operand")
2626 (match_operand:VI_256 1 "register_operand"))]
2627 "TARGET_AVX2"
2628 {
2629 rtx tmp = gen_reg_rtx (<ssehalfvecmode>mode);
2630 emit_insn (gen_vec_extract_hi_<mode> (tmp, operands[1]));
2631 rtx tmp2 = gen_reg_rtx (<ssehalfvecmode>mode);
2632 emit_insn (gen_<code><ssehalfvecmodelower>3
2633 (tmp2, tmp, gen_lowpart (<ssehalfvecmode>mode, operands[1])));
2634 rtx tmp3 = gen_reg_rtx (<ssehalfvecmode>mode);
2635 ix86_expand_reduc (gen_<code><ssehalfvecmodelower>3, tmp3, tmp2);
2636 emit_insn (gen_vec_extract<ssehalfvecmodelower><ssescalarmodelower>
2637 (operands[0], tmp3, const0_rtx));
2638 DONE;
2639 })
2640
2641 (define_expand "reduc_umin_scal_v8hi"
2642 [(umin:V8HI
2643 (match_operand:HI 0 "register_operand")
2644 (match_operand:V8HI 1 "register_operand"))]
2645 "TARGET_SSE4_1"
2646 {
2647 rtx tmp = gen_reg_rtx (V8HImode);
2648 ix86_expand_reduc (gen_uminv8hi3, tmp, operands[1]);
2649 emit_insn (gen_vec_extractv8hihi (operands[0], tmp, const0_rtx));
2650 DONE;
2651 })
2652
2653 (define_insn "<mask_codefor>reducep<mode><mask_name>"
2654 [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v")
2655 (unspec:VF_AVX512VL
2656 [(match_operand:VF_AVX512VL 1 "nonimmediate_operand" "vm")
2657 (match_operand:SI 2 "const_0_to_255_operand")]
2658 UNSPEC_REDUCE))]
2659 "TARGET_AVX512DQ"
2660 "vreduce<ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
2661 [(set_attr "type" "sse")
2662 (set_attr "prefix" "evex")
2663 (set_attr "mode" "<MODE>")])
2664
2665 (define_insn "reduces<mode><mask_scalar_name>"
2666 [(set (match_operand:VF_128 0 "register_operand" "=v")
2667 (vec_merge:VF_128
2668 (unspec:VF_128
2669 [(match_operand:VF_128 1 "register_operand" "v")
2670 (match_operand:VF_128 2 "nonimmediate_operand" "vm")
2671 (match_operand:SI 3 "const_0_to_255_operand")]
2672 UNSPEC_REDUCE)
2673 (match_dup 1)
2674 (const_int 1)))]
2675 "TARGET_AVX512DQ"
2676 "vreduce<ssescalarmodesuffix>\t{%3, %2, %1, %0<mask_scalar_operand4>|%0<mask_scalar_operand4>, %1, %<iptr>2, %3}"
2677 [(set_attr "type" "sse")
2678 (set_attr "prefix" "evex")
2679 (set_attr "mode" "<MODE>")])
2680
2681 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2682 ;;
2683 ;; Parallel floating point comparisons
2684 ;;
2685 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2686
2687 (define_insn "avx_cmp<mode>3"
2688 [(set (match_operand:VF_128_256 0 "register_operand" "=x")
2689 (unspec:VF_128_256
2690 [(match_operand:VF_128_256 1 "register_operand" "x")
2691 (match_operand:VF_128_256 2 "nonimmediate_operand" "xm")
2692 (match_operand:SI 3 "const_0_to_31_operand" "n")]
2693 UNSPEC_PCMP))]
2694 "TARGET_AVX"
2695 "vcmp<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
2696 [(set_attr "type" "ssecmp")
2697 (set_attr "length_immediate" "1")
2698 (set_attr "prefix" "vex")
2699 (set_attr "mode" "<MODE>")])
2700
2701 (define_insn "avx_vmcmp<mode>3"
2702 [(set (match_operand:VF_128 0 "register_operand" "=x")
2703 (vec_merge:VF_128
2704 (unspec:VF_128
2705 [(match_operand:VF_128 1 "register_operand" "x")
2706 (match_operand:VF_128 2 "nonimmediate_operand" "xm")
2707 (match_operand:SI 3 "const_0_to_31_operand" "n")]
2708 UNSPEC_PCMP)
2709 (match_dup 1)
2710 (const_int 1)))]
2711 "TARGET_AVX"
2712 "vcmp<ssescalarmodesuffix>\t{%3, %2, %1, %0|%0, %1, %<iptr>2, %3}"
2713 [(set_attr "type" "ssecmp")
2714 (set_attr "length_immediate" "1")
2715 (set_attr "prefix" "vex")
2716 (set_attr "mode" "<ssescalarmode>")])
2717
2718 (define_insn "*<sse>_maskcmp<mode>3_comm"
2719 [(set (match_operand:VF_128_256 0 "register_operand" "=x,x")
2720 (match_operator:VF_128_256 3 "sse_comparison_operator"
2721 [(match_operand:VF_128_256 1 "register_operand" "%0,x")
2722 (match_operand:VF_128_256 2 "vector_operand" "xBm,xm")]))]
2723 "TARGET_SSE
2724 && GET_RTX_CLASS (GET_CODE (operands[3])) == RTX_COMM_COMPARE"
2725 "@
2726 cmp%D3<ssemodesuffix>\t{%2, %0|%0, %2}
2727 vcmp%D3<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
2728 [(set_attr "isa" "noavx,avx")
2729 (set_attr "type" "ssecmp")
2730 (set_attr "length_immediate" "1")
2731 (set_attr "prefix" "orig,vex")
2732 (set_attr "mode" "<MODE>")])
2733
2734 (define_insn "<sse>_maskcmp<mode>3"
2735 [(set (match_operand:VF_128_256 0 "register_operand" "=x,x")
2736 (match_operator:VF_128_256 3 "sse_comparison_operator"
2737 [(match_operand:VF_128_256 1 "register_operand" "0,x")
2738 (match_operand:VF_128_256 2 "vector_operand" "xBm,xm")]))]
2739 "TARGET_SSE"
2740 "@
2741 cmp%D3<ssemodesuffix>\t{%2, %0|%0, %2}
2742 vcmp%D3<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
2743 [(set_attr "isa" "noavx,avx")
2744 (set_attr "type" "ssecmp")
2745 (set_attr "length_immediate" "1")
2746 (set_attr "prefix" "orig,vex")
2747 (set_attr "mode" "<MODE>")])
2748
2749 (define_insn "<sse>_vmmaskcmp<mode>3"
2750 [(set (match_operand:VF_128 0 "register_operand" "=x,x")
2751 (vec_merge:VF_128
2752 (match_operator:VF_128 3 "sse_comparison_operator"
2753 [(match_operand:VF_128 1 "register_operand" "0,x")
2754 (match_operand:VF_128 2 "vector_operand" "xBm,xm")])
2755 (match_dup 1)
2756 (const_int 1)))]
2757 "TARGET_SSE"
2758 "@
2759 cmp%D3<ssescalarmodesuffix>\t{%2, %0|%0, %<iptr>2}
2760 vcmp%D3<ssescalarmodesuffix>\t{%2, %1, %0|%0, %1, %<iptr>2}"
2761 [(set_attr "isa" "noavx,avx")
2762 (set_attr "type" "ssecmp")
2763 (set_attr "length_immediate" "1,*")
2764 (set_attr "prefix" "orig,vex")
2765 (set_attr "mode" "<ssescalarmode>")])
2766
2767 (define_mode_attr cmp_imm_predicate
2768 [(V16SF "const_0_to_31_operand") (V8DF "const_0_to_31_operand")
2769 (V16SI "const_0_to_7_operand") (V8DI "const_0_to_7_operand")
2770 (V8SF "const_0_to_31_operand") (V4DF "const_0_to_31_operand")
2771 (V8SI "const_0_to_7_operand") (V4DI "const_0_to_7_operand")
2772 (V4SF "const_0_to_31_operand") (V2DF "const_0_to_31_operand")
2773 (V4SI "const_0_to_7_operand") (V2DI "const_0_to_7_operand")
2774 (V32HI "const_0_to_7_operand") (V64QI "const_0_to_7_operand")
2775 (V16HI "const_0_to_7_operand") (V32QI "const_0_to_7_operand")
2776 (V8HI "const_0_to_7_operand") (V16QI "const_0_to_7_operand")])
2777
2778 (define_insn "<avx512>_cmp<mode>3<mask_scalar_merge_name><round_saeonly_name>"
2779 [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=Yk")
2780 (unspec:<avx512fmaskmode>
2781 [(match_operand:V48_AVX512VL 1 "register_operand" "v")
2782 (match_operand:V48_AVX512VL 2 "nonimmediate_operand" "<round_saeonly_constraint>")
2783 (match_operand:SI 3 "<cmp_imm_predicate>" "n")]
2784 UNSPEC_PCMP))]
2785 "TARGET_AVX512F && <round_saeonly_mode512bit_condition>"
2786 "v<sseintprefix>cmp<ssemodesuffix>\t{%3, <round_saeonly_mask_scalar_merge_op4>%2, %1, %0<mask_scalar_merge_operand4>|%0<mask_scalar_merge_operand4>, %1, %2<round_saeonly_mask_scalar_merge_op4>, %3}"
2787 [(set_attr "type" "ssecmp")
2788 (set_attr "length_immediate" "1")
2789 (set_attr "prefix" "evex")
2790 (set_attr "mode" "<sseinsnmode>")])
2791
2792 (define_insn "<avx512>_cmp<mode>3<mask_scalar_merge_name>"
2793 [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=Yk")
2794 (unspec:<avx512fmaskmode>
2795 [(match_operand:VI12_AVX512VL 1 "register_operand" "v")
2796 (match_operand:VI12_AVX512VL 2 "nonimmediate_operand" "vm")
2797 (match_operand:SI 3 "<cmp_imm_predicate>" "n")]
2798 UNSPEC_PCMP))]
2799 "TARGET_AVX512BW"
2800 "vpcmp<ssemodesuffix>\t{%3, %2, %1, %0<mask_scalar_merge_operand4>|%0<mask_scalar_merge_operand4>, %1, %2, %3}"
2801 [(set_attr "type" "ssecmp")
2802 (set_attr "length_immediate" "1")
2803 (set_attr "prefix" "evex")
2804 (set_attr "mode" "<sseinsnmode>")])
2805
2806 (define_insn "<avx512>_ucmp<mode>3<mask_scalar_merge_name>"
2807 [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=Yk")
2808 (unspec:<avx512fmaskmode>
2809 [(match_operand:VI12_AVX512VL 1 "register_operand" "v")
2810 (match_operand:VI12_AVX512VL 2 "nonimmediate_operand" "vm")
2811 (match_operand:SI 3 "const_0_to_7_operand" "n")]
2812 UNSPEC_UNSIGNED_PCMP))]
2813 "TARGET_AVX512BW"
2814 "vpcmpu<ssemodesuffix>\t{%3, %2, %1, %0<mask_scalar_merge_operand4>|%0<mask_scalar_merge_operand4>, %1, %2, %3}"
2815 [(set_attr "type" "ssecmp")
2816 (set_attr "length_immediate" "1")
2817 (set_attr "prefix" "evex")
2818 (set_attr "mode" "<sseinsnmode>")])
2819
2820 (define_insn "<avx512>_ucmp<mode>3<mask_scalar_merge_name>"
2821 [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=Yk")
2822 (unspec:<avx512fmaskmode>
2823 [(match_operand:VI48_AVX512VL 1 "register_operand" "v")
2824 (match_operand:VI48_AVX512VL 2 "nonimmediate_operand" "vm")
2825 (match_operand:SI 3 "const_0_to_7_operand" "n")]
2826 UNSPEC_UNSIGNED_PCMP))]
2827 "TARGET_AVX512F"
2828 "vpcmpu<ssemodesuffix>\t{%3, %2, %1, %0<mask_scalar_merge_operand4>|%0<mask_scalar_merge_operand4>, %1, %2, %3}"
2829 [(set_attr "type" "ssecmp")
2830 (set_attr "length_immediate" "1")
2831 (set_attr "prefix" "evex")
2832 (set_attr "mode" "<sseinsnmode>")])
2833
2834 (define_insn "avx512f_vmcmp<mode>3<round_saeonly_name>"
2835 [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=Yk")
2836 (and:<avx512fmaskmode>
2837 (unspec:<avx512fmaskmode>
2838 [(match_operand:VF_128 1 "register_operand" "v")
2839 (match_operand:VF_128 2 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")
2840 (match_operand:SI 3 "const_0_to_31_operand" "n")]
2841 UNSPEC_PCMP)
2842 (const_int 1)))]
2843 "TARGET_AVX512F"
2844 "vcmp<ssescalarmodesuffix>\t{%3, <round_saeonly_op4>%2, %1, %0|%0, %1, %<iptr>2<round_saeonly_op4>, %3}"
2845 [(set_attr "type" "ssecmp")
2846 (set_attr "length_immediate" "1")
2847 (set_attr "prefix" "evex")
2848 (set_attr "mode" "<ssescalarmode>")])
2849
2850 (define_insn "avx512f_vmcmp<mode>3_mask<round_saeonly_name>"
2851 [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=Yk")
2852 (and:<avx512fmaskmode>
2853 (unspec:<avx512fmaskmode>
2854 [(match_operand:VF_128 1 "register_operand" "v")
2855 (match_operand:VF_128 2 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")
2856 (match_operand:SI 3 "const_0_to_31_operand" "n")]
2857 UNSPEC_PCMP)
2858 (and:<avx512fmaskmode>
2859 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")
2860 (const_int 1))))]
2861 "TARGET_AVX512F"
2862 "vcmp<ssescalarmodesuffix>\t{%3, <round_saeonly_op5>%2, %1, %0%{%4%}|%0%{%4%}, %1, %<iptr>2<round_saeonly_op5>, %3}"
2863 [(set_attr "type" "ssecmp")
2864 (set_attr "length_immediate" "1")
2865 (set_attr "prefix" "evex")
2866 (set_attr "mode" "<ssescalarmode>")])
2867
2868 (define_insn "avx512f_maskcmp<mode>3"
2869 [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=Yk")
2870 (match_operator:<avx512fmaskmode> 3 "sse_comparison_operator"
2871 [(match_operand:VF 1 "register_operand" "v")
2872 (match_operand:VF 2 "nonimmediate_operand" "vm")]))]
2873 "TARGET_AVX512F"
2874 "vcmp%D3<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
2875 [(set_attr "type" "ssecmp")
2876 (set_attr "length_immediate" "1")
2877 (set_attr "prefix" "evex")
2878 (set_attr "mode" "<sseinsnmode>")])
2879
2880 (define_insn "<sse>_<unord>comi<round_saeonly_name>"
2881 [(set (reg:CCFP FLAGS_REG)
2882 (compare:CCFP
2883 (vec_select:MODEF
2884 (match_operand:<ssevecmode> 0 "register_operand" "v")
2885 (parallel [(const_int 0)]))
2886 (vec_select:MODEF
2887 (match_operand:<ssevecmode> 1 "<round_saeonly_nimm_scalar_predicate>" "<round_saeonly_constraint>")
2888 (parallel [(const_int 0)]))))]
2889 "SSE_FLOAT_MODE_P (<MODE>mode)"
2890 "%v<unord>comi<ssemodesuffix>\t{<round_saeonly_op2>%1, %0|%0, %<iptr>1<round_saeonly_op2>}"
2891 [(set_attr "type" "ssecomi")
2892 (set_attr "prefix" "maybe_vex")
2893 (set_attr "prefix_rep" "0")
2894 (set (attr "prefix_data16")
2895 (if_then_else (eq_attr "mode" "DF")
2896 (const_string "1")
2897 (const_string "0")))
2898 (set_attr "mode" "<MODE>")])
2899
2900 (define_expand "vec_cmp<mode><avx512fmaskmodelower>"
2901 [(set (match_operand:<avx512fmaskmode> 0 "register_operand")
2902 (match_operator:<avx512fmaskmode> 1 ""
2903 [(match_operand:V48_AVX512VL 2 "register_operand")
2904 (match_operand:V48_AVX512VL 3 "nonimmediate_operand")]))]
2905 "TARGET_AVX512F"
2906 {
2907 bool ok = ix86_expand_mask_vec_cmp (operands);
2908 gcc_assert (ok);
2909 DONE;
2910 })
2911
2912 (define_expand "vec_cmp<mode><avx512fmaskmodelower>"
2913 [(set (match_operand:<avx512fmaskmode> 0 "register_operand")
2914 (match_operator:<avx512fmaskmode> 1 ""
2915 [(match_operand:VI12_AVX512VL 2 "register_operand")
2916 (match_operand:VI12_AVX512VL 3 "nonimmediate_operand")]))]
2917 "TARGET_AVX512BW"
2918 {
2919 bool ok = ix86_expand_mask_vec_cmp (operands);
2920 gcc_assert (ok);
2921 DONE;
2922 })
2923
2924 (define_expand "vec_cmp<mode><sseintvecmodelower>"
2925 [(set (match_operand:<sseintvecmode> 0 "register_operand")
2926 (match_operator:<sseintvecmode> 1 ""
2927 [(match_operand:VI_256 2 "register_operand")
2928 (match_operand:VI_256 3 "nonimmediate_operand")]))]
2929 "TARGET_AVX2"
2930 {
2931 bool ok = ix86_expand_int_vec_cmp (operands);
2932 gcc_assert (ok);
2933 DONE;
2934 })
2935
2936 (define_expand "vec_cmp<mode><sseintvecmodelower>"
2937 [(set (match_operand:<sseintvecmode> 0 "register_operand")
2938 (match_operator:<sseintvecmode> 1 ""
2939 [(match_operand:VI124_128 2 "register_operand")
2940 (match_operand:VI124_128 3 "vector_operand")]))]
2941 "TARGET_SSE2"
2942 {
2943 bool ok = ix86_expand_int_vec_cmp (operands);
2944 gcc_assert (ok);
2945 DONE;
2946 })
2947
2948 (define_expand "vec_cmpv2div2di"
2949 [(set (match_operand:V2DI 0 "register_operand")
2950 (match_operator:V2DI 1 ""
2951 [(match_operand:V2DI 2 "register_operand")
2952 (match_operand:V2DI 3 "vector_operand")]))]
2953 "TARGET_SSE4_2"
2954 {
2955 bool ok = ix86_expand_int_vec_cmp (operands);
2956 gcc_assert (ok);
2957 DONE;
2958 })
2959
2960 (define_expand "vec_cmp<mode><sseintvecmodelower>"
2961 [(set (match_operand:<sseintvecmode> 0 "register_operand")
2962 (match_operator:<sseintvecmode> 1 ""
2963 [(match_operand:VF_256 2 "register_operand")
2964 (match_operand:VF_256 3 "nonimmediate_operand")]))]
2965 "TARGET_AVX"
2966 {
2967 bool ok = ix86_expand_fp_vec_cmp (operands);
2968 gcc_assert (ok);
2969 DONE;
2970 })
2971
2972 (define_expand "vec_cmp<mode><sseintvecmodelower>"
2973 [(set (match_operand:<sseintvecmode> 0 "register_operand")
2974 (match_operator:<sseintvecmode> 1 ""
2975 [(match_operand:VF_128 2 "register_operand")
2976 (match_operand:VF_128 3 "vector_operand")]))]
2977 "TARGET_SSE"
2978 {
2979 bool ok = ix86_expand_fp_vec_cmp (operands);
2980 gcc_assert (ok);
2981 DONE;
2982 })
2983
2984 (define_expand "vec_cmpu<mode><avx512fmaskmodelower>"
2985 [(set (match_operand:<avx512fmaskmode> 0 "register_operand")
2986 (match_operator:<avx512fmaskmode> 1 ""
2987 [(match_operand:VI48_AVX512VL 2 "register_operand")
2988 (match_operand:VI48_AVX512VL 3 "nonimmediate_operand")]))]
2989 "TARGET_AVX512F"
2990 {
2991 bool ok = ix86_expand_mask_vec_cmp (operands);
2992 gcc_assert (ok);
2993 DONE;
2994 })
2995
2996 (define_expand "vec_cmpu<mode><avx512fmaskmodelower>"
2997 [(set (match_operand:<avx512fmaskmode> 0 "register_operand")
2998 (match_operator:<avx512fmaskmode> 1 ""
2999 [(match_operand:VI12_AVX512VL 2 "register_operand")
3000 (match_operand:VI12_AVX512VL 3 "nonimmediate_operand")]))]
3001 "TARGET_AVX512BW"
3002 {
3003 bool ok = ix86_expand_mask_vec_cmp (operands);
3004 gcc_assert (ok);
3005 DONE;
3006 })
3007
3008 (define_expand "vec_cmpu<mode><sseintvecmodelower>"
3009 [(set (match_operand:<sseintvecmode> 0 "register_operand")
3010 (match_operator:<sseintvecmode> 1 ""
3011 [(match_operand:VI_256 2 "register_operand")
3012 (match_operand:VI_256 3 "nonimmediate_operand")]))]
3013 "TARGET_AVX2"
3014 {
3015 bool ok = ix86_expand_int_vec_cmp (operands);
3016 gcc_assert (ok);
3017 DONE;
3018 })
3019
3020 (define_expand "vec_cmpu<mode><sseintvecmodelower>"
3021 [(set (match_operand:<sseintvecmode> 0 "register_operand")
3022 (match_operator:<sseintvecmode> 1 ""
3023 [(match_operand:VI124_128 2 "register_operand")
3024 (match_operand:VI124_128 3 "vector_operand")]))]
3025 "TARGET_SSE2"
3026 {
3027 bool ok = ix86_expand_int_vec_cmp (operands);
3028 gcc_assert (ok);
3029 DONE;
3030 })
3031
3032 (define_expand "vec_cmpuv2div2di"
3033 [(set (match_operand:V2DI 0 "register_operand")
3034 (match_operator:V2DI 1 ""
3035 [(match_operand:V2DI 2 "register_operand")
3036 (match_operand:V2DI 3 "vector_operand")]))]
3037 "TARGET_SSE4_2"
3038 {
3039 bool ok = ix86_expand_int_vec_cmp (operands);
3040 gcc_assert (ok);
3041 DONE;
3042 })
3043
3044 (define_expand "vec_cmpeqv2div2di"
3045 [(set (match_operand:V2DI 0 "register_operand")
3046 (match_operator:V2DI 1 ""
3047 [(match_operand:V2DI 2 "register_operand")
3048 (match_operand:V2DI 3 "vector_operand")]))]
3049 "TARGET_SSE4_1"
3050 {
3051 bool ok = ix86_expand_int_vec_cmp (operands);
3052 gcc_assert (ok);
3053 DONE;
3054 })
3055
3056 (define_expand "vcond<V_512:mode><VF_512:mode>"
3057 [(set (match_operand:V_512 0 "register_operand")
3058 (if_then_else:V_512
3059 (match_operator 3 ""
3060 [(match_operand:VF_512 4 "nonimmediate_operand")
3061 (match_operand:VF_512 5 "nonimmediate_operand")])
3062 (match_operand:V_512 1 "general_operand")
3063 (match_operand:V_512 2 "general_operand")))]
3064 "TARGET_AVX512F
3065 && (GET_MODE_NUNITS (<V_512:MODE>mode)
3066 == GET_MODE_NUNITS (<VF_512:MODE>mode))"
3067 {
3068 bool ok = ix86_expand_fp_vcond (operands);
3069 gcc_assert (ok);
3070 DONE;
3071 })
3072
3073 (define_expand "vcond<V_256:mode><VF_256:mode>"
3074 [(set (match_operand:V_256 0 "register_operand")
3075 (if_then_else:V_256
3076 (match_operator 3 ""
3077 [(match_operand:VF_256 4 "nonimmediate_operand")
3078 (match_operand:VF_256 5 "nonimmediate_operand")])
3079 (match_operand:V_256 1 "general_operand")
3080 (match_operand:V_256 2 "general_operand")))]
3081 "TARGET_AVX
3082 && (GET_MODE_NUNITS (<V_256:MODE>mode)
3083 == GET_MODE_NUNITS (<VF_256:MODE>mode))"
3084 {
3085 bool ok = ix86_expand_fp_vcond (operands);
3086 gcc_assert (ok);
3087 DONE;
3088 })
3089
3090 (define_expand "vcond<V_128:mode><VF_128:mode>"
3091 [(set (match_operand:V_128 0 "register_operand")
3092 (if_then_else:V_128
3093 (match_operator 3 ""
3094 [(match_operand:VF_128 4 "vector_operand")
3095 (match_operand:VF_128 5 "vector_operand")])
3096 (match_operand:V_128 1 "general_operand")
3097 (match_operand:V_128 2 "general_operand")))]
3098 "TARGET_SSE
3099 && (GET_MODE_NUNITS (<V_128:MODE>mode)
3100 == GET_MODE_NUNITS (<VF_128:MODE>mode))"
3101 {
3102 bool ok = ix86_expand_fp_vcond (operands);
3103 gcc_assert (ok);
3104 DONE;
3105 })
3106
3107 (define_expand "vcond_mask_<mode><avx512fmaskmodelower>"
3108 [(set (match_operand:V48_AVX512VL 0 "register_operand")
3109 (vec_merge:V48_AVX512VL
3110 (match_operand:V48_AVX512VL 1 "nonimmediate_operand")
3111 (match_operand:V48_AVX512VL 2 "nonimm_or_0_operand")
3112 (match_operand:<avx512fmaskmode> 3 "register_operand")))]
3113 "TARGET_AVX512F")
3114
3115 (define_expand "vcond_mask_<mode><avx512fmaskmodelower>"
3116 [(set (match_operand:VI12_AVX512VL 0 "register_operand")
3117 (vec_merge:VI12_AVX512VL
3118 (match_operand:VI12_AVX512VL 1 "nonimmediate_operand")
3119 (match_operand:VI12_AVX512VL 2 "nonimm_or_0_operand")
3120 (match_operand:<avx512fmaskmode> 3 "register_operand")))]
3121 "TARGET_AVX512BW")
3122
3123 (define_expand "vcond_mask_<mode><sseintvecmodelower>"
3124 [(set (match_operand:VI_256 0 "register_operand")
3125 (vec_merge:VI_256
3126 (match_operand:VI_256 1 "nonimmediate_operand")
3127 (match_operand:VI_256 2 "nonimm_or_0_operand")
3128 (match_operand:<sseintvecmode> 3 "register_operand")))]
3129 "TARGET_AVX2"
3130 {
3131 ix86_expand_sse_movcc (operands[0], operands[3],
3132 operands[1], operands[2]);
3133 DONE;
3134 })
3135
3136 (define_expand "vcond_mask_<mode><sseintvecmodelower>"
3137 [(set (match_operand:VI124_128 0 "register_operand")
3138 (vec_merge:VI124_128
3139 (match_operand:VI124_128 1 "vector_operand")
3140 (match_operand:VI124_128 2 "nonimm_or_0_operand")
3141 (match_operand:<sseintvecmode> 3 "register_operand")))]
3142 "TARGET_SSE2"
3143 {
3144 ix86_expand_sse_movcc (operands[0], operands[3],
3145 operands[1], operands[2]);
3146 DONE;
3147 })
3148
3149 (define_expand "vcond_mask_v2div2di"
3150 [(set (match_operand:V2DI 0 "register_operand")
3151 (vec_merge:V2DI
3152 (match_operand:V2DI 1 "vector_operand")
3153 (match_operand:V2DI 2 "nonimm_or_0_operand")
3154 (match_operand:V2DI 3 "register_operand")))]
3155 "TARGET_SSE4_2"
3156 {
3157 ix86_expand_sse_movcc (operands[0], operands[3],
3158 operands[1], operands[2]);
3159 DONE;
3160 })
3161
3162 (define_expand "vcond_mask_<mode><sseintvecmodelower>"
3163 [(set (match_operand:VF_256 0 "register_operand")
3164 (vec_merge:VF_256
3165 (match_operand:VF_256 1 "nonimmediate_operand")
3166 (match_operand:VF_256 2 "nonimm_or_0_operand")
3167 (match_operand:<sseintvecmode> 3 "register_operand")))]
3168 "TARGET_AVX"
3169 {
3170 ix86_expand_sse_movcc (operands[0], operands[3],
3171 operands[1], operands[2]);
3172 DONE;
3173 })
3174
3175 (define_expand "vcond_mask_<mode><sseintvecmodelower>"
3176 [(set (match_operand:VF_128 0 "register_operand")
3177 (vec_merge:VF_128
3178 (match_operand:VF_128 1 "vector_operand")
3179 (match_operand:VF_128 2 "nonimm_or_0_operand")
3180 (match_operand:<sseintvecmode> 3 "register_operand")))]
3181 "TARGET_SSE"
3182 {
3183 ix86_expand_sse_movcc (operands[0], operands[3],
3184 operands[1], operands[2]);
3185 DONE;
3186 })
3187
3188 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
3189 ;;
3190 ;; Parallel floating point logical operations
3191 ;;
3192 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
3193
3194 (define_insn "<sse>_andnot<mode>3<mask_name>"
3195 [(set (match_operand:VF_128_256 0 "register_operand" "=x,x,v,v")
3196 (and:VF_128_256
3197 (not:VF_128_256
3198 (match_operand:VF_128_256 1 "register_operand" "0,x,v,v"))
3199 (match_operand:VF_128_256 2 "vector_operand" "xBm,xm,vm,vm")))]
3200 "TARGET_SSE && <mask_avx512vl_condition>"
3201 {
3202 static char buf[128];
3203 const char *ops;
3204 const char *suffix;
3205
3206 switch (which_alternative)
3207 {
3208 case 0:
3209 ops = "andn%s\t{%%2, %%0|%%0, %%2}";
3210 break;
3211 case 1:
3212 case 2:
3213 case 3:
3214 ops = "vandn%s\t{%%2, %%1, %%0<mask_operand3_1>|%%0<mask_operand3_1>, %%1, %%2}";
3215 break;
3216 default:
3217 gcc_unreachable ();
3218 }
3219
3220 switch (get_attr_mode (insn))
3221 {
3222 case MODE_V8SF:
3223 case MODE_V4SF:
3224 suffix = "ps";
3225 break;
3226 case MODE_OI:
3227 case MODE_TI:
3228 /* There is no vandnp[sd] in avx512f. Use vpandn[qd]. */
3229 suffix = GET_MODE_INNER (<MODE>mode) == DFmode ? "q" : "d";
3230 ops = "vpandn%s\t{%%2, %%1, %%0<mask_operand3_1>|%%0<mask_operand3_1>, %%1, %%2}";
3231 break;
3232 default:
3233 suffix = "<ssemodesuffix>";
3234 }
3235
3236 snprintf (buf, sizeof (buf), ops, suffix);
3237 return buf;
3238 }
3239 [(set_attr "isa" "noavx,avx,avx512dq,avx512f")
3240 (set_attr "type" "sselog")
3241 (set_attr "prefix" "orig,maybe_vex,evex,evex")
3242 (set (attr "mode")
3243 (cond [(and (match_test "<mask_applied>")
3244 (and (eq_attr "alternative" "1")
3245 (match_test "!TARGET_AVX512DQ")))
3246 (const_string "<sseintvecmode2>")
3247 (eq_attr "alternative" "3")
3248 (const_string "<sseintvecmode2>")
3249 (and (match_test "<MODE_SIZE> == 16")
3250 (match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL"))
3251 (const_string "<ssePSmode>")
3252 (match_test "TARGET_AVX")
3253 (const_string "<MODE>")
3254 (match_test "optimize_function_for_size_p (cfun)")
3255 (const_string "V4SF")
3256 ]
3257 (const_string "<MODE>")))])
3258
3259
3260 (define_insn "<sse>_andnot<mode>3<mask_name>"
3261 [(set (match_operand:VF_512 0 "register_operand" "=v")
3262 (and:VF_512
3263 (not:VF_512
3264 (match_operand:VF_512 1 "register_operand" "v"))
3265 (match_operand:VF_512 2 "nonimmediate_operand" "vm")))]
3266 "TARGET_AVX512F"
3267 {
3268 static char buf[128];
3269 const char *ops;
3270 const char *suffix;
3271
3272 suffix = "<ssemodesuffix>";
3273 ops = "";
3274
3275 /* There is no vandnp[sd] in avx512f. Use vpandn[qd]. */
3276 if (!TARGET_AVX512DQ)
3277 {
3278 suffix = GET_MODE_INNER (<MODE>mode) == DFmode ? "q" : "d";
3279 ops = "p";
3280 }
3281
3282 snprintf (buf, sizeof (buf),
3283 "v%sandn%s\t{%%2, %%1, %%0<mask_operand3_1>|%%0<mask_operand3_1>, %%1, %%2}",
3284 ops, suffix);
3285 return buf;
3286 }
3287 [(set_attr "type" "sselog")
3288 (set_attr "prefix" "evex")
3289 (set (attr "mode")
3290 (if_then_else (match_test "TARGET_AVX512DQ")
3291 (const_string "<sseinsnmode>")
3292 (const_string "XI")))])
3293
3294 (define_expand "<code><mode>3<mask_name>"
3295 [(set (match_operand:VF_128_256 0 "register_operand")
3296 (any_logic:VF_128_256
3297 (match_operand:VF_128_256 1 "vector_operand")
3298 (match_operand:VF_128_256 2 "vector_operand")))]
3299 "TARGET_SSE && <mask_avx512vl_condition>"
3300 "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
3301
3302 (define_expand "<code><mode>3<mask_name>"
3303 [(set (match_operand:VF_512 0 "register_operand")
3304 (any_logic:VF_512
3305 (match_operand:VF_512 1 "nonimmediate_operand")
3306 (match_operand:VF_512 2 "nonimmediate_operand")))]
3307 "TARGET_AVX512F"
3308 "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
3309
3310 (define_insn "*<code><mode>3<mask_name>"
3311 [(set (match_operand:VF_128_256 0 "register_operand" "=x,x,v,v")
3312 (any_logic:VF_128_256
3313 (match_operand:VF_128_256 1 "vector_operand" "%0,x,v,v")
3314 (match_operand:VF_128_256 2 "vector_operand" "xBm,xm,vm,vm")))]
3315 "TARGET_SSE && <mask_avx512vl_condition>
3316 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
3317 {
3318 static char buf[128];
3319 const char *ops;
3320 const char *suffix;
3321
3322 switch (which_alternative)
3323 {
3324 case 0:
3325 ops = "<logic>%s\t{%%2, %%0|%%0, %%2}";
3326 break;
3327 case 1:
3328 case 2:
3329 case 3:
3330 ops = "v<logic>%s\t{%%2, %%1, %%0<mask_operand3_1>|%%0<mask_operand3_1>, %%1, %%2}";
3331 break;
3332 default:
3333 gcc_unreachable ();
3334 }
3335
3336 switch (get_attr_mode (insn))
3337 {
3338 case MODE_V8SF:
3339 case MODE_V4SF:
3340 suffix = "ps";
3341 break;
3342 case MODE_OI:
3343 case MODE_TI:
3344 /* There is no v<logic>p[sd] in avx512f. Use vp<logic>[qd]. */
3345 suffix = GET_MODE_INNER (<MODE>mode) == DFmode ? "q" : "d";
3346 ops = "vp<logic>%s\t{%%2, %%1, %%0<mask_operand3_1>|%%0<mask_operand3_1>, %%1, %%2}";
3347 break;
3348 default:
3349 suffix = "<ssemodesuffix>";
3350 }
3351
3352 snprintf (buf, sizeof (buf), ops, suffix);
3353 return buf;
3354 }
3355 [(set_attr "isa" "noavx,avx,avx512dq,avx512f")
3356 (set_attr "type" "sselog")
3357 (set_attr "prefix" "orig,maybe_evex,evex,evex")
3358 (set (attr "mode")
3359 (cond [(and (match_test "<mask_applied>")
3360 (and (eq_attr "alternative" "1")
3361 (match_test "!TARGET_AVX512DQ")))
3362 (const_string "<sseintvecmode2>")
3363 (eq_attr "alternative" "3")
3364 (const_string "<sseintvecmode2>")
3365 (and (match_test "<MODE_SIZE> == 16")
3366 (match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL"))
3367 (const_string "<ssePSmode>")
3368 (match_test "TARGET_AVX")
3369 (const_string "<MODE>")
3370 (match_test "optimize_function_for_size_p (cfun)")
3371 (const_string "V4SF")
3372 ]
3373 (const_string "<MODE>")))])
3374
3375 (define_insn "*<code><mode>3<mask_name>"
3376 [(set (match_operand:VF_512 0 "register_operand" "=v")
3377 (any_logic:VF_512
3378 (match_operand:VF_512 1 "nonimmediate_operand" "%v")
3379 (match_operand:VF_512 2 "nonimmediate_operand" "vm")))]
3380 "TARGET_AVX512F && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
3381 {
3382 static char buf[128];
3383 const char *ops;
3384 const char *suffix;
3385
3386 suffix = "<ssemodesuffix>";
3387 ops = "";
3388
3389 /* There is no v<logic>p[sd] in avx512f. Use vp<logic>[dq]. */
3390 if (!TARGET_AVX512DQ)
3391 {
3392 suffix = GET_MODE_INNER (<MODE>mode) == DFmode ? "q" : "d";
3393 ops = "p";
3394 }
3395
3396 snprintf (buf, sizeof (buf),
3397 "v%s<logic>%s\t{%%2, %%1, %%0<mask_operand3_1>|%%0<mask_operand3_1>, %%1, %%2}",
3398 ops, suffix);
3399 return buf;
3400 }
3401 [(set_attr "type" "sselog")
3402 (set_attr "prefix" "evex")
3403 (set (attr "mode")
3404 (if_then_else (match_test "TARGET_AVX512DQ")
3405 (const_string "<sseinsnmode>")
3406 (const_string "XI")))])
3407
3408 (define_expand "copysign<mode>3"
3409 [(set (match_dup 4)
3410 (and:VF
3411 (not:VF (match_dup 3))
3412 (match_operand:VF 1 "vector_operand")))
3413 (set (match_dup 5)
3414 (and:VF (match_dup 3)
3415 (match_operand:VF 2 "vector_operand")))
3416 (set (match_operand:VF 0 "register_operand")
3417 (ior:VF (match_dup 4) (match_dup 5)))]
3418 "TARGET_SSE"
3419 {
3420 operands[3] = ix86_build_signbit_mask (<MODE>mode, 1, 0);
3421
3422 operands[4] = gen_reg_rtx (<MODE>mode);
3423 operands[5] = gen_reg_rtx (<MODE>mode);
3424 })
3425
3426 ;; Also define scalar versions. These are used for abs, neg, and
3427 ;; conditional move. Using subregs into vector modes causes register
3428 ;; allocation lossage. These patterns do not allow memory operands
3429 ;; because the native instructions read the full 128-bits.
3430
3431 (define_insn "*andnot<mode>3"
3432 [(set (match_operand:MODEF 0 "register_operand" "=x,x,v,v")
3433 (and:MODEF
3434 (not:MODEF
3435 (match_operand:MODEF 1 "register_operand" "0,x,v,v"))
3436 (match_operand:MODEF 2 "register_operand" "x,x,v,v")))]
3437 "SSE_FLOAT_MODE_P (<MODE>mode)"
3438 {
3439 static char buf[128];
3440 const char *ops;
3441 const char *suffix
3442 = (get_attr_mode (insn) == MODE_V4SF) ? "ps" : "<ssevecmodesuffix>";
3443
3444 switch (which_alternative)
3445 {
3446 case 0:
3447 ops = "andn%s\t{%%2, %%0|%%0, %%2}";
3448 break;
3449 case 1:
3450 ops = "vandn%s\t{%%2, %%1, %%0|%%0, %%1, %%2}";
3451 break;
3452 case 2:
3453 if (TARGET_AVX512DQ)
3454 ops = "vandn%s\t{%%2, %%1, %%0|%%0, %%1, %%2}";
3455 else
3456 {
3457 suffix = <MODE>mode == DFmode ? "q" : "d";
3458 ops = "vpandn%s\t{%%2, %%1, %%0|%%0, %%1, %%2}";
3459 }
3460 break;
3461 case 3:
3462 if (TARGET_AVX512DQ)
3463 ops = "vandn%s\t{%%g2, %%g1, %%g0|%%g0, %%g1, %%g2}";
3464 else
3465 {
3466 suffix = <MODE>mode == DFmode ? "q" : "d";
3467 ops = "vpandn%s\t{%%g2, %%g1, %%g0|%%g0, %%g1, %%g2}";
3468 }
3469 break;
3470 default:
3471 gcc_unreachable ();
3472 }
3473
3474 snprintf (buf, sizeof (buf), ops, suffix);
3475 return buf;
3476 }
3477 [(set_attr "isa" "noavx,avx,avx512vl,avx512f")
3478 (set_attr "type" "sselog")
3479 (set_attr "prefix" "orig,vex,evex,evex")
3480 (set (attr "mode")
3481 (cond [(eq_attr "alternative" "2")
3482 (if_then_else (match_test "TARGET_AVX512DQ")
3483 (const_string "<ssevecmode>")
3484 (const_string "TI"))
3485 (eq_attr "alternative" "3")
3486 (if_then_else (match_test "TARGET_AVX512DQ")
3487 (const_string "<avx512fvecmode>")
3488 (const_string "XI"))
3489 (and (match_test "<MODE_SIZE> == 16")
3490 (match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL"))
3491 (const_string "V4SF")
3492 (match_test "TARGET_AVX")
3493 (const_string "<ssevecmode>")
3494 (match_test "optimize_function_for_size_p (cfun)")
3495 (const_string "V4SF")
3496 ]
3497 (const_string "<ssevecmode>")))])
3498
3499 (define_insn "*andnottf3"
3500 [(set (match_operand:TF 0 "register_operand" "=x,x,v,v")
3501 (and:TF
3502 (not:TF (match_operand:TF 1 "register_operand" "0,x,v,v"))
3503 (match_operand:TF 2 "vector_operand" "xBm,xm,vm,v")))]
3504 "TARGET_SSE"
3505 {
3506 static char buf[128];
3507 const char *ops;
3508 const char *tmp
3509 = (which_alternative >= 2 ? "pandnq"
3510 : get_attr_mode (insn) == MODE_V4SF ? "andnps" : "pandn");
3511
3512 switch (which_alternative)
3513 {
3514 case 0:
3515 ops = "%s\t{%%2, %%0|%%0, %%2}";
3516 break;
3517 case 1:
3518 case 2:
3519 ops = "v%s\t{%%2, %%1, %%0|%%0, %%1, %%2}";
3520 break;
3521 case 3:
3522 ops = "v%s\t{%%g2, %%g1, %%g0|%%g0, %%g1, %%g2}";
3523 break;
3524 default:
3525 gcc_unreachable ();
3526 }
3527
3528 snprintf (buf, sizeof (buf), ops, tmp);
3529 return buf;
3530 }
3531 [(set_attr "isa" "noavx,avx,avx512vl,avx512f")
3532 (set_attr "type" "sselog")
3533 (set (attr "prefix_data16")
3534 (if_then_else
3535 (and (eq_attr "alternative" "0")
3536 (eq_attr "mode" "TI"))
3537 (const_string "1")
3538 (const_string "*")))
3539 (set_attr "prefix" "orig,vex,evex,evex")
3540 (set (attr "mode")
3541 (cond [(eq_attr "alternative" "2")
3542 (const_string "TI")
3543 (eq_attr "alternative" "3")
3544 (const_string "XI")
3545 (match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL")
3546 (const_string "V4SF")
3547 (match_test "TARGET_AVX")
3548 (const_string "TI")
3549 (ior (not (match_test "TARGET_SSE2"))
3550 (match_test "optimize_function_for_size_p (cfun)"))
3551 (const_string "V4SF")
3552 ]
3553 (const_string "TI")))])
3554
3555 (define_insn "*<code><mode>3"
3556 [(set (match_operand:MODEF 0 "register_operand" "=x,x,v,v")
3557 (any_logic:MODEF
3558 (match_operand:MODEF 1 "register_operand" "%0,x,v,v")
3559 (match_operand:MODEF 2 "register_operand" "x,x,v,v")))]
3560 "SSE_FLOAT_MODE_P (<MODE>mode)"
3561 {
3562 static char buf[128];
3563 const char *ops;
3564 const char *suffix
3565 = (get_attr_mode (insn) == MODE_V4SF) ? "ps" : "<ssevecmodesuffix>";
3566
3567 switch (which_alternative)
3568 {
3569 case 0:
3570 ops = "<logic>%s\t{%%2, %%0|%%0, %%2}";
3571 break;
3572 case 2:
3573 if (!TARGET_AVX512DQ)
3574 {
3575 suffix = <MODE>mode == DFmode ? "q" : "d";
3576 ops = "vp<logic>%s\t{%%2, %%1, %%0|%%0, %%1, %%2}";
3577 break;
3578 }
3579 /* FALLTHRU */
3580 case 1:
3581 ops = "v<logic>%s\t{%%2, %%1, %%0|%%0, %%1, %%2}";
3582 break;
3583 case 3:
3584 if (TARGET_AVX512DQ)
3585 ops = "v<logic>%s\t{%%g2, %%g1, %%g0|%%g0, %%g1, %%g2}";
3586 else
3587 {
3588 suffix = <MODE>mode == DFmode ? "q" : "d";
3589 ops = "vp<logic>%s\t{%%g2, %%g1, %%g0|%%g0, %%g1, %%g2}";
3590 }
3591 break;
3592 default:
3593 gcc_unreachable ();
3594 }
3595
3596 snprintf (buf, sizeof (buf), ops, suffix);
3597 return buf;
3598 }
3599 [(set_attr "isa" "noavx,avx,avx512vl,avx512f")
3600 (set_attr "type" "sselog")
3601 (set_attr "prefix" "orig,vex,evex,evex")
3602 (set (attr "mode")
3603 (cond [(eq_attr "alternative" "2")
3604 (if_then_else (match_test "TARGET_AVX512DQ")
3605 (const_string "<ssevecmode>")
3606 (const_string "TI"))
3607 (eq_attr "alternative" "3")
3608 (if_then_else (match_test "TARGET_AVX512DQ")
3609 (const_string "<avx512fvecmode>")
3610 (const_string "XI"))
3611 (and (match_test "<MODE_SIZE> == 16")
3612 (match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL"))
3613 (const_string "V4SF")
3614 (match_test "TARGET_AVX")
3615 (const_string "<ssevecmode>")
3616 (match_test "optimize_function_for_size_p (cfun)")
3617 (const_string "V4SF")
3618 ]
3619 (const_string "<ssevecmode>")))])
3620
3621 (define_expand "<code>tf3"
3622 [(set (match_operand:TF 0 "register_operand")
3623 (any_logic:TF
3624 (match_operand:TF 1 "vector_operand")
3625 (match_operand:TF 2 "vector_operand")))]
3626 "TARGET_SSE"
3627 "ix86_fixup_binary_operands_no_copy (<CODE>, TFmode, operands);")
3628
3629 (define_insn "*<code>tf3"
3630 [(set (match_operand:TF 0 "register_operand" "=x,x,v,v")
3631 (any_logic:TF
3632 (match_operand:TF 1 "vector_operand" "%0,x,v,v")
3633 (match_operand:TF 2 "vector_operand" "xBm,xm,vm,v")))]
3634 "TARGET_SSE && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
3635 {
3636 static char buf[128];
3637 const char *ops;
3638 const char *tmp
3639 = (which_alternative >= 2 ? "p<logic>q"
3640 : get_attr_mode (insn) == MODE_V4SF ? "<logic>ps" : "p<logic>");
3641
3642 switch (which_alternative)
3643 {
3644 case 0:
3645 ops = "%s\t{%%2, %%0|%%0, %%2}";
3646 break;
3647 case 1:
3648 case 2:
3649 ops = "v%s\t{%%2, %%1, %%0|%%0, %%1, %%2}";
3650 break;
3651 case 3:
3652 ops = "v%s\t{%%g2, %%g1, %%g0|%%g0, %%g1, %%g2}";
3653 break;
3654 default:
3655 gcc_unreachable ();
3656 }
3657
3658 snprintf (buf, sizeof (buf), ops, tmp);
3659 return buf;
3660 }
3661 [(set_attr "isa" "noavx,avx,avx512vl,avx512f")
3662 (set_attr "type" "sselog")
3663 (set (attr "prefix_data16")
3664 (if_then_else
3665 (and (eq_attr "alternative" "0")
3666 (eq_attr "mode" "TI"))
3667 (const_string "1")
3668 (const_string "*")))
3669 (set_attr "prefix" "orig,vex,evex,evex")
3670 (set (attr "mode")
3671 (cond [(eq_attr "alternative" "2")
3672 (const_string "TI")
3673 (eq_attr "alternative" "3")
3674 (const_string "QI")
3675 (match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL")
3676 (const_string "V4SF")
3677 (match_test "TARGET_AVX")
3678 (const_string "TI")
3679 (ior (not (match_test "TARGET_SSE2"))
3680 (match_test "optimize_function_for_size_p (cfun)"))
3681 (const_string "V4SF")
3682 ]
3683 (const_string "TI")))])
3684
3685 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
3686 ;;
3687 ;; FMA floating point multiply/accumulate instructions. These include
3688 ;; scalar versions of the instructions as well as vector versions.
3689 ;;
3690 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
3691
3692 ;; The standard names for scalar FMA are only available with SSE math enabled.
3693 ;; CPUID bit AVX512F enables evex encoded scalar and 512-bit fma. It doesn't
3694 ;; care about FMA bit, so we enable fma for TARGET_AVX512F even when TARGET_FMA
3695 ;; and TARGET_FMA4 are both false.
3696 ;; TODO: In theory AVX512F does not automatically imply FMA, and without FMA
3697 ;; one must force the EVEX encoding of the fma insns. Ideally we'd improve
3698 ;; GAS to allow proper prefix selection. However, for the moment all hardware
3699 ;; that supports AVX512F also supports FMA so we can ignore this for now.
3700 (define_mode_iterator FMAMODEM
3701 [(SF "TARGET_SSE_MATH && (TARGET_FMA || TARGET_FMA4 || TARGET_AVX512F)")
3702 (DF "TARGET_SSE_MATH && (TARGET_FMA || TARGET_FMA4 || TARGET_AVX512F)")
3703 (V4SF "TARGET_FMA || TARGET_FMA4 || TARGET_AVX512VL")
3704 (V2DF "TARGET_FMA || TARGET_FMA4 || TARGET_AVX512VL")
3705 (V8SF "TARGET_FMA || TARGET_FMA4 || TARGET_AVX512VL")
3706 (V4DF "TARGET_FMA || TARGET_FMA4 || TARGET_AVX512VL")
3707 (V16SF "TARGET_AVX512F")
3708 (V8DF "TARGET_AVX512F")])
3709
3710 (define_expand "fma<mode>4"
3711 [(set (match_operand:FMAMODEM 0 "register_operand")
3712 (fma:FMAMODEM
3713 (match_operand:FMAMODEM 1 "nonimmediate_operand")
3714 (match_operand:FMAMODEM 2 "nonimmediate_operand")
3715 (match_operand:FMAMODEM 3 "nonimmediate_operand")))])
3716
3717 (define_expand "fms<mode>4"
3718 [(set (match_operand:FMAMODEM 0 "register_operand")
3719 (fma:FMAMODEM
3720 (match_operand:FMAMODEM 1 "nonimmediate_operand")
3721 (match_operand:FMAMODEM 2 "nonimmediate_operand")
3722 (neg:FMAMODEM (match_operand:FMAMODEM 3 "nonimmediate_operand"))))])
3723
3724 (define_expand "fnma<mode>4"
3725 [(set (match_operand:FMAMODEM 0 "register_operand")
3726 (fma:FMAMODEM
3727 (neg:FMAMODEM (match_operand:FMAMODEM 1 "nonimmediate_operand"))
3728 (match_operand:FMAMODEM 2 "nonimmediate_operand")
3729 (match_operand:FMAMODEM 3 "nonimmediate_operand")))])
3730
3731 (define_expand "fnms<mode>4"
3732 [(set (match_operand:FMAMODEM 0 "register_operand")
3733 (fma:FMAMODEM
3734 (neg:FMAMODEM (match_operand:FMAMODEM 1 "nonimmediate_operand"))
3735 (match_operand:FMAMODEM 2 "nonimmediate_operand")
3736 (neg:FMAMODEM (match_operand:FMAMODEM 3 "nonimmediate_operand"))))])
3737
3738 ;; The builtins for intrinsics are not constrained by SSE math enabled.
3739 (define_mode_iterator FMAMODE_AVX512
3740 [(SF "TARGET_FMA || TARGET_FMA4 || TARGET_AVX512F")
3741 (DF "TARGET_FMA || TARGET_FMA4 || TARGET_AVX512F")
3742 (V4SF "TARGET_FMA || TARGET_FMA4 || TARGET_AVX512VL")
3743 (V2DF "TARGET_FMA || TARGET_FMA4 || TARGET_AVX512VL")
3744 (V8SF "TARGET_FMA || TARGET_FMA4 || TARGET_AVX512VL")
3745 (V4DF "TARGET_FMA || TARGET_FMA4 || TARGET_AVX512VL")
3746 (V16SF "TARGET_AVX512F")
3747 (V8DF "TARGET_AVX512F")])
3748
3749 (define_mode_iterator FMAMODE
3750 [SF DF V4SF V2DF V8SF V4DF])
3751
3752 (define_expand "fma4i_fmadd_<mode>"
3753 [(set (match_operand:FMAMODE_AVX512 0 "register_operand")
3754 (fma:FMAMODE_AVX512
3755 (match_operand:FMAMODE_AVX512 1 "nonimmediate_operand")
3756 (match_operand:FMAMODE_AVX512 2 "nonimmediate_operand")
3757 (match_operand:FMAMODE_AVX512 3 "nonimmediate_operand")))])
3758
3759 (define_expand "fma4i_fmsub_<mode>"
3760 [(set (match_operand:FMAMODE_AVX512 0 "register_operand")
3761 (fma:FMAMODE_AVX512
3762 (match_operand:FMAMODE_AVX512 1 "nonimmediate_operand")
3763 (match_operand:FMAMODE_AVX512 2 "nonimmediate_operand")
3764 (neg:FMAMODE_AVX512
3765 (match_operand:FMAMODE_AVX512 3 "nonimmediate_operand"))))])
3766
3767 (define_expand "fma4i_fnmadd_<mode>"
3768 [(set (match_operand:FMAMODE_AVX512 0 "register_operand")
3769 (fma:FMAMODE_AVX512
3770 (neg:FMAMODE_AVX512
3771 (match_operand:FMAMODE_AVX512 1 "nonimmediate_operand"))
3772 (match_operand:FMAMODE_AVX512 2 "nonimmediate_operand")
3773 (match_operand:FMAMODE_AVX512 3 "nonimmediate_operand")))])
3774
3775 (define_expand "fma4i_fnmsub_<mode>"
3776 [(set (match_operand:FMAMODE_AVX512 0 "register_operand")
3777 (fma:FMAMODE_AVX512
3778 (neg:FMAMODE_AVX512
3779 (match_operand:FMAMODE_AVX512 1 "nonimmediate_operand"))
3780 (match_operand:FMAMODE_AVX512 2 "nonimmediate_operand")
3781 (neg:FMAMODE_AVX512
3782 (match_operand:FMAMODE_AVX512 3 "nonimmediate_operand"))))])
3783
3784 (define_expand "<avx512>_fmadd_<mode>_maskz<round_expand_name>"
3785 [(match_operand:VF_AVX512VL 0 "register_operand")
3786 (match_operand:VF_AVX512VL 1 "<round_expand_nimm_predicate>")
3787 (match_operand:VF_AVX512VL 2 "<round_expand_nimm_predicate>")
3788 (match_operand:VF_AVX512VL 3 "<round_expand_nimm_predicate>")
3789 (match_operand:<avx512fmaskmode> 4 "register_operand")]
3790 "TARGET_AVX512F && <round_mode512bit_condition>"
3791 {
3792 emit_insn (gen_fma_fmadd_<mode>_maskz_1<round_expand_name> (
3793 operands[0], operands[1], operands[2], operands[3],
3794 CONST0_RTX (<MODE>mode), operands[4]<round_expand_operand>));
3795 DONE;
3796 })
3797
3798 (define_insn "*fma_fmadd_<mode>"
3799 [(set (match_operand:FMAMODE 0 "register_operand" "=v,v,v,x,x")
3800 (fma:FMAMODE
3801 (match_operand:FMAMODE 1 "nonimmediate_operand" "%0,0,v,x,x")
3802 (match_operand:FMAMODE 2 "nonimmediate_operand" "vm,v,vm,x,m")
3803 (match_operand:FMAMODE 3 "nonimmediate_operand" "v,vm,0,xm,x")))]
3804 "TARGET_FMA || TARGET_FMA4"
3805 "@
3806 vfmadd132<ssemodesuffix>\t{%2, %3, %0|%0, %3, %2}
3807 vfmadd213<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
3808 vfmadd231<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}
3809 vfmadd<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}
3810 vfmadd<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
3811 [(set_attr "isa" "fma,fma,fma,fma4,fma4")
3812 (set_attr "type" "ssemuladd")
3813 (set_attr "mode" "<MODE>")])
3814
3815 ;; Suppose AVX-512F as baseline
3816 (define_mode_iterator VF_SF_AVX512VL
3817 [SF V16SF (V8SF "TARGET_AVX512VL") (V4SF "TARGET_AVX512VL")
3818 DF V8DF (V4DF "TARGET_AVX512VL") (V2DF "TARGET_AVX512VL")])
3819
3820 (define_insn "<sd_mask_codefor>fma_fmadd_<mode><sd_maskz_name><round_name>"
3821 [(set (match_operand:VF_SF_AVX512VL 0 "register_operand" "=v,v,v")
3822 (fma:VF_SF_AVX512VL
3823 (match_operand:VF_SF_AVX512VL 1 "<round_nimm_predicate>" "%0,0,v")
3824 (match_operand:VF_SF_AVX512VL 2 "<round_nimm_predicate>" "<round_constraint>,v,<round_constraint>")
3825 (match_operand:VF_SF_AVX512VL 3 "<round_nimm_predicate>" "v,<round_constraint>,0")))]
3826 "TARGET_AVX512F && <sd_mask_mode512bit_condition> && <round_mode512bit_condition>"
3827 "@
3828 vfmadd132<ssemodesuffix>\t{<round_sd_mask_op4>%2, %3, %0<sd_mask_op4>|%0<sd_mask_op4>, %3, %2<round_sd_mask_op4>}
3829 vfmadd213<ssemodesuffix>\t{<round_sd_mask_op4>%3, %2, %0<sd_mask_op4>|%0<sd_mask_op4>, %2, %3<round_sd_mask_op4>}
3830 vfmadd231<ssemodesuffix>\t{<round_sd_mask_op4>%2, %1, %0<sd_mask_op4>|%0<sd_mask_op4>, %1, %2<round_sd_mask_op4>}"
3831 [(set_attr "type" "ssemuladd")
3832 (set_attr "mode" "<MODE>")])
3833
3834 (define_insn "*<sd_mask_codefor>fma_fmadd_<mode><sd_maskz_name>_bcst_1"
3835 [(set (match_operand:VF_AVX512 0 "register_operand" "=v,v")
3836 (fma:VF_AVX512
3837 (match_operand:VF_AVX512 1 "register_operand" "0,v")
3838 (match_operand:VF_AVX512 2 "register_operand" "v,0")
3839 (vec_duplicate:VF_AVX512
3840 (match_operand:<ssescalarmode> 3 "memory_operand" "m,m"))))]
3841 "TARGET_AVX512F && <sd_mask_mode512bit_condition>"
3842 "vfmadd213<ssemodesuffix>\t{%3<avx512bcst>, %2, %0<sd_mask_op4>|%0<sd_mask_op4>, %2, %3<avx512bcst>}"
3843 [(set_attr "type" "ssemuladd")
3844 (set_attr "mode" "<MODE>")])
3845
3846 (define_insn "*<sd_mask_codefor>fma_fmadd_<mode><sd_maskz_name>_bcst_2"
3847 [(set (match_operand:VF_AVX512 0 "register_operand" "=v,v")
3848 (fma:VF_AVX512
3849 (vec_duplicate:VF_AVX512
3850 (match_operand:<ssescalarmode> 1 "memory_operand" "m,m"))
3851 (match_operand:VF_AVX512 2 "register_operand" "0,v")
3852 (match_operand:VF_AVX512 3 "register_operand" "v,0")))]
3853 "TARGET_AVX512F && <sd_mask_mode512bit_condition>"
3854 "@
3855 vfmadd132<ssemodesuffix>\t{%1<avx512bcst>, %3, %0<sd_mask_op4>|%0<sd_mask_op4>, %3, %1<avx512bcst>}
3856 vfmadd231<ssemodesuffix>\t{%1<avx512bcst>, %2, %0<sd_mask_op4>|%0<sd_mask_op4>, %2, %1<avx512bcst>}"
3857 [(set_attr "type" "ssemuladd")
3858 (set_attr "mode" "<MODE>")])
3859
3860 (define_insn "*<sd_mask_codefor>fma_fmadd_<mode><sd_maskz_name>_bcst_3"
3861 [(set (match_operand:VF_AVX512 0 "register_operand" "=v,v")
3862 (fma:VF_AVX512
3863 (match_operand:VF_AVX512 1 "register_operand" "0,v")
3864 (vec_duplicate:VF_AVX512
3865 (match_operand:<ssescalarmode> 2 "memory_operand" "m,m"))
3866 (match_operand:VF_AVX512 3 "register_operand" "v,0")))]
3867 "TARGET_AVX512F && <sd_mask_mode512bit_condition>"
3868 "@
3869 vfmadd132<ssemodesuffix>\t{%2<avx512bcst>, %3, %0<sd_mask_op4>|%0<sd_mask_op4>, %3, %2<avx512bcst>}
3870 vfmadd231<ssemodesuffix>\t{%2<avx512bcst>, %1, %0<sd_mask_op4>|%0<sd_mask_op4>, %1, %2<avx512bcst>}"
3871 [(set_attr "type" "ssemuladd")
3872 (set_attr "mode" "<MODE>")])
3873
3874 (define_insn "<avx512>_fmadd_<mode>_mask<round_name>"
3875 [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v,v")
3876 (vec_merge:VF_AVX512VL
3877 (fma:VF_AVX512VL
3878 (match_operand:VF_AVX512VL 1 "register_operand" "0,0")
3879 (match_operand:VF_AVX512VL 2 "nonimmediate_operand" "<round_constraint>,v")
3880 (match_operand:VF_AVX512VL 3 "nonimmediate_operand" "v,<round_constraint>"))
3881 (match_dup 1)
3882 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk,Yk")))]
3883 "TARGET_AVX512F && <round_mode512bit_condition>"
3884 "@
3885 vfmadd132<ssemodesuffix>\t{<round_op5>%2, %3, %0%{%4%}|%0%{%4%}, %3, %2<round_op5>}
3886 vfmadd213<ssemodesuffix>\t{<round_op5>%3, %2, %0%{%4%}|%0%{%4%}, %2, %3<round_op5>}"
3887 [(set_attr "type" "ssemuladd")
3888 (set_attr "mode" "<MODE>")])
3889
3890 (define_insn "<avx512>_fmadd_<mode>_mask3<round_name>"
3891 [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v")
3892 (vec_merge:VF_AVX512VL
3893 (fma:VF_AVX512VL
3894 (match_operand:VF_AVX512VL 1 "register_operand" "v")
3895 (match_operand:VF_AVX512VL 2 "nonimmediate_operand" "<round_constraint>")
3896 (match_operand:VF_AVX512VL 3 "register_operand" "0"))
3897 (match_dup 3)
3898 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")))]
3899 "TARGET_AVX512F"
3900 "vfmadd231<ssemodesuffix>\t{<round_op5>%2, %1, %0%{%4%}|%0%{%4%}, %1, %2<round_op5>}"
3901 [(set_attr "type" "ssemuladd")
3902 (set_attr "mode" "<MODE>")])
3903
3904 (define_insn "*fma_fmsub_<mode>"
3905 [(set (match_operand:FMAMODE 0 "register_operand" "=v,v,v,x,x")
3906 (fma:FMAMODE
3907 (match_operand:FMAMODE 1 "nonimmediate_operand" "%0,0,v,x,x")
3908 (match_operand:FMAMODE 2 "nonimmediate_operand" "vm,v,vm,x,m")
3909 (neg:FMAMODE
3910 (match_operand:FMAMODE 3 "nonimmediate_operand" "v,vm,0,xm,x"))))]
3911 "TARGET_FMA || TARGET_FMA4"
3912 "@
3913 vfmsub132<ssemodesuffix>\t{%2, %3, %0|%0, %3, %2}
3914 vfmsub213<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
3915 vfmsub231<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}
3916 vfmsub<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}
3917 vfmsub<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
3918 [(set_attr "isa" "fma,fma,fma,fma4,fma4")
3919 (set_attr "type" "ssemuladd")
3920 (set_attr "mode" "<MODE>")])
3921
3922 (define_expand "<avx512>_fmsub_<mode>_maskz<round_expand_name>"
3923 [(match_operand:VF_AVX512VL 0 "register_operand")
3924 (match_operand:VF_AVX512VL 1 "<round_expand_nimm_predicate>")
3925 (match_operand:VF_AVX512VL 2 "<round_expand_nimm_predicate>")
3926 (match_operand:VF_AVX512VL 3 "<round_expand_nimm_predicate>")
3927 (match_operand:<avx512fmaskmode> 4 "register_operand")]
3928 "TARGET_AVX512F && <round_mode512bit_condition>"
3929 {
3930 emit_insn (gen_fma_fmsub_<mode>_maskz_1<round_expand_name> (
3931 operands[0], operands[1], operands[2], operands[3],
3932 CONST0_RTX (<MODE>mode), operands[4]<round_expand_operand>));
3933 DONE;
3934 })
3935
3936 (define_insn "<sd_mask_codefor>fma_fmsub_<mode><sd_maskz_name><round_name>"
3937 [(set (match_operand:VF_SF_AVX512VL 0 "register_operand" "=v,v,v")
3938 (fma:VF_SF_AVX512VL
3939 (match_operand:VF_SF_AVX512VL 1 "<round_nimm_predicate>" "%0,0,v")
3940 (match_operand:VF_SF_AVX512VL 2 "<round_nimm_predicate>" "<round_constraint>,v,<round_constraint>")
3941 (neg:VF_SF_AVX512VL
3942 (match_operand:VF_SF_AVX512VL 3 "<round_nimm_predicate>" "v,<round_constraint>,0"))))]
3943 "TARGET_AVX512F && <sd_mask_mode512bit_condition> && <round_mode512bit_condition>"
3944 "@
3945 vfmsub132<ssemodesuffix>\t{<round_sd_mask_op4>%2, %3, %0<sd_mask_op4>|%0<sd_mask_op4>, %3, %2<round_sd_mask_op4>}
3946 vfmsub213<ssemodesuffix>\t{<round_sd_mask_op4>%3, %2, %0<sd_mask_op4>|%0<sd_mask_op4>, %2, %3<round_sd_mask_op4>}
3947 vfmsub231<ssemodesuffix>\t{<round_sd_mask_op4>%2, %1, %0<sd_mask_op4>|%0<sd_mask_op4>, %1, %2<round_sd_mask_op4>}"
3948 [(set_attr "type" "ssemuladd")
3949 (set_attr "mode" "<MODE>")])
3950
3951 (define_insn "*<sd_mask_codefor>fma_fmsub_<mode><sd_maskz_name>_bcst_1"
3952 [(set (match_operand:VF_AVX512 0 "register_operand" "=v,v")
3953 (fma:VF_AVX512
3954 (match_operand:VF_AVX512 1 "register_operand" "0,v")
3955 (match_operand:VF_AVX512 2 "register_operand" "v,0")
3956 (neg:VF_AVX512
3957 (vec_duplicate:VF_AVX512
3958 (match_operand:<ssescalarmode> 3 "memory_operand" "m,m")))))]
3959 "TARGET_AVX512F && <sd_mask_mode512bit_condition>"
3960 "vfmsub213<ssemodesuffix>\t{%3<avx512bcst>, %2, %0<sd_mask_op4>|%0<sd_mask_op4>, %2, %3<avx512bcst>}"
3961 [(set_attr "type" "ssemuladd")
3962 (set_attr "mode" "<MODE>")])
3963
3964 (define_insn "*<sd_mask_codefor>fma_fmsub_<mode><sd_maskz_name>_bcst_2"
3965 [(set (match_operand:VF_AVX512 0 "register_operand" "=v,v")
3966 (fma:VF_AVX512
3967 (vec_duplicate:VF_AVX512
3968 (match_operand:<ssescalarmode> 1 "memory_operand" "m,m"))
3969 (match_operand:VF_AVX512 2 "register_operand" "0,v")
3970 (neg:VF_AVX512
3971 (match_operand:VF_AVX512 3 "register_operand" "v,0"))))]
3972 "TARGET_AVX512F && <sd_mask_mode512bit_condition>"
3973 "@
3974 vfmsub132<ssemodesuffix>\t{%1<avx512bcst>, %3, %0<sd_mask_op4>|%0<sd_mask_op4>, %3, %1<avx512bcst>}
3975 vfmsub231<ssemodesuffix>\t{%1<avx512bcst>, %2, %0<sd_mask_op4>|%0<sd_mask_op4>, %2, %1<avx512bcst>}"
3976 [(set_attr "type" "ssemuladd")
3977 (set_attr "mode" "<MODE>")])
3978
3979 (define_insn "*<sd_mask_codefor>fma_fmsub_<mode><sd_maskz_name>_bcst_3"
3980 [(set (match_operand:VF_AVX512 0 "register_operand" "=v,v")
3981 (fma:VF_AVX512
3982 (match_operand:VF_AVX512 1 "register_operand" "0,v")
3983 (vec_duplicate:VF_AVX512
3984 (match_operand:<ssescalarmode> 2 "memory_operand" "m,m"))
3985 (neg:VF_AVX512
3986 (match_operand:VF_AVX512 3 "nonimmediate_operand" "v,0"))))]
3987 "TARGET_AVX512F && <sd_mask_mode512bit_condition>"
3988 "@
3989 vfmsub132<ssemodesuffix>\t{%2<avx512bcst>, %3, %0<sd_mask_op4>|%0<sd_mask_op4>, %3, %2<avx512bcst>}
3990 vfmsub231<ssemodesuffix>\t{%2<avx512bcst>, %1, %0<sd_mask_op4>|%0<sd_mask_op4>, %1, %2<avx512bcst>}"
3991 [(set_attr "type" "ssemuladd")
3992 (set_attr "mode" "<MODE>")])
3993
3994 (define_insn "<avx512>_fmsub_<mode>_mask<round_name>"
3995 [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v,v")
3996 (vec_merge:VF_AVX512VL
3997 (fma:VF_AVX512VL
3998 (match_operand:VF_AVX512VL 1 "register_operand" "0,0")
3999 (match_operand:VF_AVX512VL 2 "nonimmediate_operand" "<round_constraint>,v")
4000 (neg:VF_AVX512VL
4001 (match_operand:VF_AVX512VL 3 "nonimmediate_operand" "v,<round_constraint>")))
4002 (match_dup 1)
4003 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk,Yk")))]
4004 "TARGET_AVX512F"
4005 "@
4006 vfmsub132<ssemodesuffix>\t{<round_op5>%2, %3, %0%{%4%}|%0%{%4%}, %3, %2<round_op5>}
4007 vfmsub213<ssemodesuffix>\t{<round_op5>%3, %2, %0%{%4%}|%0%{%4%}, %2, %3<round_op5>}"
4008 [(set_attr "type" "ssemuladd")
4009 (set_attr "mode" "<MODE>")])
4010
4011 (define_insn "<avx512>_fmsub_<mode>_mask3<round_name>"
4012 [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v")
4013 (vec_merge:VF_AVX512VL
4014 (fma:VF_AVX512VL
4015 (match_operand:VF_AVX512VL 1 "register_operand" "v")
4016 (match_operand:VF_AVX512VL 2 "nonimmediate_operand" "<round_constraint>")
4017 (neg:VF_AVX512VL
4018 (match_operand:VF_AVX512VL 3 "register_operand" "0")))
4019 (match_dup 3)
4020 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")))]
4021 "TARGET_AVX512F && <round_mode512bit_condition>"
4022 "vfmsub231<ssemodesuffix>\t{<round_op5>%2, %1, %0%{%4%}|%0%{%4%}, %1, %2<round_op5>}"
4023 [(set_attr "type" "ssemuladd")
4024 (set_attr "mode" "<MODE>")])
4025
4026 (define_insn "*fma_fnmadd_<mode>"
4027 [(set (match_operand:FMAMODE 0 "register_operand" "=v,v,v,x,x")
4028 (fma:FMAMODE
4029 (neg:FMAMODE
4030 (match_operand:FMAMODE 1 "nonimmediate_operand" "%0,0,v,x,x"))
4031 (match_operand:FMAMODE 2 "nonimmediate_operand" "vm,v,vm,x,m")
4032 (match_operand:FMAMODE 3 "nonimmediate_operand" "v,vm,0,xm,x")))]
4033 "TARGET_FMA || TARGET_FMA4"
4034 "@
4035 vfnmadd132<ssemodesuffix>\t{%2, %3, %0|%0, %3, %2}
4036 vfnmadd213<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
4037 vfnmadd231<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}
4038 vfnmadd<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}
4039 vfnmadd<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
4040 [(set_attr "isa" "fma,fma,fma,fma4,fma4")
4041 (set_attr "type" "ssemuladd")
4042 (set_attr "mode" "<MODE>")])
4043
4044 (define_expand "<avx512>_fnmadd_<mode>_maskz<round_expand_name>"
4045 [(match_operand:VF_AVX512VL 0 "register_operand")
4046 (match_operand:VF_AVX512VL 1 "<round_expand_nimm_predicate>")
4047 (match_operand:VF_AVX512VL 2 "<round_expand_nimm_predicate>")
4048 (match_operand:VF_AVX512VL 3 "<round_expand_nimm_predicate>")
4049 (match_operand:<avx512fmaskmode> 4 "register_operand")]
4050 "TARGET_AVX512F && <round_mode512bit_condition>"
4051 {
4052 emit_insn (gen_fma_fnmadd_<mode>_maskz_1<round_expand_name> (
4053 operands[0], operands[1], operands[2], operands[3],
4054 CONST0_RTX (<MODE>mode), operands[4]<round_expand_operand>));
4055 DONE;
4056 })
4057
4058 (define_insn "<sd_mask_codefor>fma_fnmadd_<mode><sd_maskz_name><round_name>"
4059 [(set (match_operand:VF_SF_AVX512VL 0 "register_operand" "=v,v,v")
4060 (fma:VF_SF_AVX512VL
4061 (neg:VF_SF_AVX512VL
4062 (match_operand:VF_SF_AVX512VL 1 "<round_nimm_predicate>" "%0,0,v"))
4063 (match_operand:VF_SF_AVX512VL 2 "<round_nimm_predicate>" "<round_constraint>,v,<round_constraint>")
4064 (match_operand:VF_SF_AVX512VL 3 "<round_nimm_predicate>" "v,<round_constraint>,0")))]
4065 "TARGET_AVX512F && <sd_mask_mode512bit_condition> && <round_mode512bit_condition>"
4066 "@
4067 vfnmadd132<ssemodesuffix>\t{<round_sd_mask_op4>%2, %3, %0<sd_mask_op4>|%0<sd_mask_op4>, %3, %2<round_sd_mask_op4>}
4068 vfnmadd213<ssemodesuffix>\t{<round_sd_mask_op4>%3, %2, %0<sd_mask_op4>|%0<sd_mask_op4>, %2, %3<round_sd_mask_op4>}
4069 vfnmadd231<ssemodesuffix>\t{<round_sd_mask_op4>%2, %1, %0<sd_mask_op4>|%0<sd_mask_op4>, %1, %2<round_sd_mask_op4>}"
4070 [(set_attr "type" "ssemuladd")
4071 (set_attr "mode" "<MODE>")])
4072
4073 (define_insn "*<sd_mask_codefor>fma_fnmadd_<mode><sd_maskz_name>_bcst_1"
4074 [(set (match_operand:VF_AVX512 0 "register_operand" "=v,v")
4075 (fma:VF_AVX512
4076 (neg:VF_AVX512
4077 (match_operand:VF_AVX512 1 "register_operand" "0,v"))
4078 (match_operand:VF_AVX512 2 "register_operand" "v,0")
4079 (vec_duplicate:VF_AVX512
4080 (match_operand:<ssescalarmode> 3 "memory_operand" "m,m"))))]
4081 "TARGET_AVX512F && <sd_mask_mode512bit_condition>"
4082 "vfnmadd213<ssemodesuffix>\t{%3<avx512bcst>, %2, %0<sd_mask_op4>|%0<sd_mask_op4>, %2, %3<avx512bcst>}"
4083 [(set_attr "type" "ssemuladd")
4084 (set_attr "mode" "<MODE>")])
4085
4086 (define_insn "*<sd_mask_codefor>fma_fnmadd_<mode><sd_maskz_name>_bcst_2"
4087 [(set (match_operand:VF_AVX512 0 "register_operand" "=v,v")
4088 (fma:VF_AVX512
4089 (neg:VF_AVX512
4090 (vec_duplicate:VF_AVX512
4091 (match_operand:<ssescalarmode> 1 "memory_operand" "m,m")))
4092 (match_operand:VF_AVX512 2 "register_operand" "0,v")
4093 (match_operand:VF_AVX512 3 "register_operand" "v,0")))]
4094 "TARGET_AVX512F && <sd_mask_mode512bit_condition>"
4095 "@
4096 vfnmadd132<ssemodesuffix>\t{%1<avx512bcst>, %3, %0<sd_mask_op4>|%0<sd_mask_op4>, %3, %1<avx512bcst>}
4097 vfnmadd231<ssemodesuffix>\t{%1<avx512bcst>, %2, %0<sd_mask_op4>|%0<sd_mask_op4>, %2, %1<avx512bcst>}"
4098 [(set_attr "type" "ssemuladd")
4099 (set_attr "mode" "<MODE>")])
4100
4101 (define_insn "*<sd_mask_codefor>fma_fnmadd_<mode><sd_maskz_name>_bcst_3"
4102 [(set (match_operand:VF_AVX512 0 "register_operand" "=v,v")
4103 (fma:VF_AVX512
4104 (neg:VF_AVX512
4105 (match_operand:VF_AVX512 1 "register_operand" "0,v"))
4106 (vec_duplicate:VF_AVX512
4107 (match_operand:<ssescalarmode> 2 "memory_operand" "m,m"))
4108 (match_operand:VF_AVX512 3 "register_operand" "v,0")))]
4109 "TARGET_AVX512F && <sd_mask_mode512bit_condition>"
4110 "@
4111 vfnmadd132<ssemodesuffix>\t{%2<avx512bcst>, %3, %0<sd_mask_op4>|%0<sd_mask_op4>, %3, %2<avx512bcst>}
4112 vfnmadd231<ssemodesuffix>\t{%2<avx512bcst>, %1, %0<sd_mask_op4>|%0<sd_mask_op4>, %1, %2<avx512bcst>}"
4113 [(set_attr "type" "ssemuladd")
4114 (set_attr "mode" "<MODE>")])
4115
4116 (define_insn "<avx512>_fnmadd_<mode>_mask<round_name>"
4117 [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v,v")
4118 (vec_merge:VF_AVX512VL
4119 (fma:VF_AVX512VL
4120 (neg:VF_AVX512VL
4121 (match_operand:VF_AVX512VL 1 "register_operand" "0,0"))
4122 (match_operand:VF_AVX512VL 2 "nonimmediate_operand" "<round_constraint>,v")
4123 (match_operand:VF_AVX512VL 3 "nonimmediate_operand" "v,<round_constraint>"))
4124 (match_dup 1)
4125 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk,Yk")))]
4126 "TARGET_AVX512F && <round_mode512bit_condition>"
4127 "@
4128 vfnmadd132<ssemodesuffix>\t{<round_op5>%2, %3, %0%{%4%}|%0%{%4%}, %3, %2<round_op5>}
4129 vfnmadd213<ssemodesuffix>\t{<round_op5>%3, %2, %0%{%4%}|%0%{%4%}, %2, %3<round_op5>}"
4130 [(set_attr "type" "ssemuladd")
4131 (set_attr "mode" "<MODE>")])
4132
4133 (define_insn "<avx512>_fnmadd_<mode>_mask3<round_name>"
4134 [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v")
4135 (vec_merge:VF_AVX512VL
4136 (fma:VF_AVX512VL
4137 (neg:VF_AVX512VL
4138 (match_operand:VF_AVX512VL 1 "register_operand" "v"))
4139 (match_operand:VF_AVX512VL 2 "nonimmediate_operand" "<round_constraint>")
4140 (match_operand:VF_AVX512VL 3 "register_operand" "0"))
4141 (match_dup 3)
4142 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")))]
4143 "TARGET_AVX512F && <round_mode512bit_condition>"
4144 "vfnmadd231<ssemodesuffix>\t{<round_op5>%2, %1, %0%{%4%}|%0%{%4%}, %1, %2<round_op5>}"
4145 [(set_attr "type" "ssemuladd")
4146 (set_attr "mode" "<MODE>")])
4147
4148 (define_insn "*fma_fnmsub_<mode>"
4149 [(set (match_operand:FMAMODE 0 "register_operand" "=v,v,v,x,x")
4150 (fma:FMAMODE
4151 (neg:FMAMODE
4152 (match_operand:FMAMODE 1 "nonimmediate_operand" "%0,0,v,x,x"))
4153 (match_operand:FMAMODE 2 "nonimmediate_operand" "vm,v,vm,x,m")
4154 (neg:FMAMODE
4155 (match_operand:FMAMODE 3 "nonimmediate_operand" "v,vm,0,xm,x"))))]
4156 "TARGET_FMA || TARGET_FMA4"
4157 "@
4158 vfnmsub132<ssemodesuffix>\t{<round_sd_mask_op4>%2, %3, %0<sd_mask_op4>|%0<sd_mask_op4>, %3, %2<round_sd_mask_op4>}
4159 vfnmsub213<ssemodesuffix>\t{<round_sd_mask_op4>%3, %2, %0<sd_mask_op4>|%0<sd_mask_op4>, %2, %3<round_sd_mask_op4>}
4160 vfnmsub231<ssemodesuffix>\t{<round_sd_mask_op4>%2, %1, %0<sd_mask_op4>|%0<sd_mask_op4>, %1, %2<round_sd_mask_op4>}
4161 vfnmsub<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}
4162 vfnmsub<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
4163 [(set_attr "isa" "fma,fma,fma,fma4,fma4")
4164 (set_attr "type" "ssemuladd")
4165 (set_attr "mode" "<MODE>")])
4166
4167 (define_expand "<avx512>_fnmsub_<mode>_maskz<round_expand_name>"
4168 [(match_operand:VF_AVX512VL 0 "register_operand")
4169 (match_operand:VF_AVX512VL 1 "<round_expand_nimm_predicate>")
4170 (match_operand:VF_AVX512VL 2 "<round_expand_nimm_predicate>")
4171 (match_operand:VF_AVX512VL 3 "<round_expand_nimm_predicate>")
4172 (match_operand:<avx512fmaskmode> 4 "register_operand")]
4173 "TARGET_AVX512F && <round_mode512bit_condition>"
4174 {
4175 emit_insn (gen_fma_fnmsub_<mode>_maskz_1<round_expand_name> (
4176 operands[0], operands[1], operands[2], operands[3],
4177 CONST0_RTX (<MODE>mode), operands[4]<round_expand_operand>));
4178 DONE;
4179 })
4180
4181 (define_insn "<sd_mask_codefor>fma_fnmsub_<mode><sd_maskz_name><round_name>"
4182 [(set (match_operand:VF_SF_AVX512VL 0 "register_operand" "=v,v,v")
4183 (fma:VF_SF_AVX512VL
4184 (neg:VF_SF_AVX512VL
4185 (match_operand:VF_SF_AVX512VL 1 "<round_nimm_predicate>" "%0,0,v"))
4186 (match_operand:VF_SF_AVX512VL 2 "<round_nimm_predicate>" "<round_constraint>,v,<round_constraint>")
4187 (neg:VF_SF_AVX512VL
4188 (match_operand:VF_SF_AVX512VL 3 "<round_nimm_predicate>" "v,<round_constraint>,0"))))]
4189 "TARGET_AVX512F && <sd_mask_mode512bit_condition> && <round_mode512bit_condition>"
4190 "@
4191 vfnmsub132<ssemodesuffix>\t{<round_sd_mask_op4>%2, %3, %0<sd_mask_op4>|%0<sd_mask_op4>, %3, %2<round_sd_mask_op4>}
4192 vfnmsub213<ssemodesuffix>\t{<round_sd_mask_op4>%3, %2, %0<sd_mask_op4>|%0<sd_mask_op4>, %2, %3<round_sd_mask_op4>}
4193 vfnmsub231<ssemodesuffix>\t{<round_sd_mask_op4>%2, %1, %0<sd_mask_op4>|%0<sd_mask_op4>, %1, %2<round_sd_mask_op4>}"
4194 [(set_attr "type" "ssemuladd")
4195 (set_attr "mode" "<MODE>")])
4196
4197 (define_insn "*<sd_mask_codefor>fma_fnmsub_<mode><sd_maskz_name>_bcst_1"
4198 [(set (match_operand:VF_AVX512 0 "register_operand" "=v,v")
4199 (fma:VF_AVX512
4200 (neg:VF_AVX512
4201 (match_operand:VF_AVX512 1 "register_operand" "0,v"))
4202 (match_operand:VF_AVX512 2 "register_operand" "v,0")
4203 (neg:VF_AVX512
4204 (vec_duplicate:VF_AVX512
4205 (match_operand:<ssescalarmode> 3 "memory_operand" "m,m")))))]
4206 "TARGET_AVX512F && <sd_mask_mode512bit_condition>"
4207 "vfnmsub213<ssemodesuffix>\t{%3<avx512bcst>, %2, %0<sd_mask_op4>|%0<sd_mask_op4>, %2, %3<avx512bcst>}"
4208 [(set_attr "type" "ssemuladd")
4209 (set_attr "mode" "<MODE>")])
4210
4211 (define_insn "*<sd_mask_codefor>fma_fnmsub_<mode><sd_maskz_name>_bcst_2"
4212 [(set (match_operand:VF_AVX512 0 "register_operand" "=v,v")
4213 (fma:VF_AVX512
4214 (neg:VF_AVX512
4215 (vec_duplicate:VF_AVX512
4216 (match_operand:<ssescalarmode> 1 "memory_operand" "m,m")))
4217 (match_operand:VF_AVX512 2 "register_operand" "0,v")
4218 (neg:VF_AVX512
4219 (match_operand:VF_AVX512 3 "register_operand" "v,0"))))]
4220 "TARGET_AVX512F && <sd_mask_mode512bit_condition>"
4221 "@
4222 vfnmsub132<ssemodesuffix>\t{%1<avx512bcst>, %3, %0<sd_mask_op4>|%0<sd_mask_op4>, %3, %1<avx512bcst>}
4223 vfnmsub231<ssemodesuffix>\t{%1<avx512bcst>, %2, %0<sd_mask_op4>|%0<sd_mask_op4>, %2, %1<avx512bcst>}"
4224 [(set_attr "type" "ssemuladd")
4225 (set_attr "mode" "<MODE>")])
4226
4227 (define_insn "*<sd_mask_codefor>fma_fnmsub_<mode><sd_maskz_name>_bcst_3"
4228 [(set (match_operand:VF_AVX512 0 "register_operand" "=v,v")
4229 (fma:VF_AVX512
4230 (neg:VF_AVX512
4231 (match_operand:VF_AVX512 1 "register_operand" "0,v"))
4232 (vec_duplicate:VF_AVX512
4233 (match_operand:<ssescalarmode> 2 "memory_operand" "m,m"))
4234 (neg:VF_AVX512
4235 (match_operand:VF_AVX512 3 "register_operand" "v,0"))))]
4236 "TARGET_AVX512F && <sd_mask_mode512bit_condition>"
4237 "@
4238 vfnmsub132<ssemodesuffix>\t{%2<avx512bcst>, %3, %0<sd_mask_op4>|%0<sd_mask_op4>, %3, %2<avx512bcst>}
4239 vfnmsub231<ssemodesuffix>\t{%2<avx512bcst>, %1, %0<sd_mask_op4>|%0<sd_mask_op4>, %1, %2<avx512bcst>}"
4240 [(set_attr "type" "ssemuladd")
4241 (set_attr "mode" "<MODE>")])
4242
4243 (define_insn "<avx512>_fnmsub_<mode>_mask<round_name>"
4244 [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v,v")
4245 (vec_merge:VF_AVX512VL
4246 (fma:VF_AVX512VL
4247 (neg:VF_AVX512VL
4248 (match_operand:VF_AVX512VL 1 "register_operand" "0,0"))
4249 (match_operand:VF_AVX512VL 2 "nonimmediate_operand" "<round_constraint>,v")
4250 (neg:VF_AVX512VL
4251 (match_operand:VF_AVX512VL 3 "nonimmediate_operand" "v,<round_constraint>")))
4252 (match_dup 1)
4253 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk,Yk")))]
4254 "TARGET_AVX512F && <round_mode512bit_condition>"
4255 "@
4256 vfnmsub132<ssemodesuffix>\t{<round_op5>%2, %3, %0%{%4%}|%0%{%4%}, %3, %2<round_op5>}
4257 vfnmsub213<ssemodesuffix>\t{<round_op5>%3, %2, %0%{%4%}|%0%{%4%}, %2, %3<round_op5>}"
4258 [(set_attr "type" "ssemuladd")
4259 (set_attr "mode" "<MODE>")])
4260
4261 (define_insn "<avx512>_fnmsub_<mode>_mask3<round_name>"
4262 [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v")
4263 (vec_merge:VF_AVX512VL
4264 (fma:VF_AVX512VL
4265 (neg:VF_AVX512VL
4266 (match_operand:VF_AVX512VL 1 "register_operand" "v"))
4267 (match_operand:VF_AVX512VL 2 "nonimmediate_operand" "<round_constraint>")
4268 (neg:VF_AVX512VL
4269 (match_operand:VF_AVX512VL 3 "register_operand" "0")))
4270 (match_dup 3)
4271 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")))]
4272 "TARGET_AVX512F"
4273 "vfnmsub231<ssemodesuffix>\t{<round_op5>%2, %1, %0%{%4%}|%0%{%4%}, %1, %2<round_op5>}"
4274 [(set_attr "type" "ssemuladd")
4275 (set_attr "mode" "<MODE>")])
4276
4277 ;; FMA parallel floating point multiply addsub and subadd operations.
4278
4279 ;; It would be possible to represent these without the UNSPEC as
4280 ;;
4281 ;; (vec_merge
4282 ;; (fma op1 op2 op3)
4283 ;; (fma op1 op2 (neg op3))
4284 ;; (merge-const))
4285 ;;
4286 ;; But this doesn't seem useful in practice.
4287
4288 (define_expand "fmaddsub_<mode>"
4289 [(set (match_operand:VF 0 "register_operand")
4290 (unspec:VF
4291 [(match_operand:VF 1 "nonimmediate_operand")
4292 (match_operand:VF 2 "nonimmediate_operand")
4293 (match_operand:VF 3 "nonimmediate_operand")]
4294 UNSPEC_FMADDSUB))]
4295 "TARGET_FMA || TARGET_FMA4 || TARGET_AVX512F")
4296
4297 (define_expand "<avx512>_fmaddsub_<mode>_maskz<round_expand_name>"
4298 [(match_operand:VF_AVX512VL 0 "register_operand")
4299 (match_operand:VF_AVX512VL 1 "<round_expand_nimm_predicate>")
4300 (match_operand:VF_AVX512VL 2 "<round_expand_nimm_predicate>")
4301 (match_operand:VF_AVX512VL 3 "<round_expand_nimm_predicate>")
4302 (match_operand:<avx512fmaskmode> 4 "register_operand")]
4303 "TARGET_AVX512F"
4304 {
4305 emit_insn (gen_fma_fmaddsub_<mode>_maskz_1<round_expand_name> (
4306 operands[0], operands[1], operands[2], operands[3],
4307 CONST0_RTX (<MODE>mode), operands[4]<round_expand_operand>));
4308 DONE;
4309 })
4310
4311 (define_insn "*fma_fmaddsub_<mode>"
4312 [(set (match_operand:VF_128_256 0 "register_operand" "=v,v,v,x,x")
4313 (unspec:VF_128_256
4314 [(match_operand:VF_128_256 1 "nonimmediate_operand" "%0,0,v,x,x")
4315 (match_operand:VF_128_256 2 "nonimmediate_operand" "vm,v,vm,x,m")
4316 (match_operand:VF_128_256 3 "nonimmediate_operand" "v,vm,0,xm,x")]
4317 UNSPEC_FMADDSUB))]
4318 "TARGET_FMA || TARGET_FMA4"
4319 "@
4320 vfmaddsub132<ssemodesuffix>\t{%2, %3, %0|%0, %3, %2}
4321 vfmaddsub213<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
4322 vfmaddsub231<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}
4323 vfmaddsub<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}
4324 vfmaddsub<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
4325 [(set_attr "isa" "fma,fma,fma,fma4,fma4")
4326 (set_attr "type" "ssemuladd")
4327 (set_attr "mode" "<MODE>")])
4328
4329 (define_insn "<sd_mask_codefor>fma_fmaddsub_<mode><sd_maskz_name><round_name>"
4330 [(set (match_operand:VF_SF_AVX512VL 0 "register_operand" "=v,v,v")
4331 (unspec:VF_SF_AVX512VL
4332 [(match_operand:VF_SF_AVX512VL 1 "<round_nimm_predicate>" "%0,0,v")
4333 (match_operand:VF_SF_AVX512VL 2 "<round_nimm_predicate>" "<round_constraint>,v,<round_constraint>")
4334 (match_operand:VF_SF_AVX512VL 3 "<round_nimm_predicate>" "v,<round_constraint>,0")]
4335 UNSPEC_FMADDSUB))]
4336 "TARGET_AVX512F && <sd_mask_mode512bit_condition> && <round_mode512bit_condition>"
4337 "@
4338 vfmaddsub132<ssemodesuffix>\t{<round_sd_mask_op4>%2, %3, %0<sd_mask_op4>|%0<sd_mask_op4>, %3, %2<round_sd_mask_op4>}
4339 vfmaddsub213<ssemodesuffix>\t{<round_sd_mask_op4>%3, %2, %0<sd_mask_op4>|%0<sd_mask_op4>, %2, %3<round_sd_mask_op4>}
4340 vfmaddsub231<ssemodesuffix>\t{<round_sd_mask_op4>%2, %1, %0<sd_mask_op4>|%0<sd_mask_op4>, %1, %2<round_sd_mask_op4>}"
4341 [(set_attr "type" "ssemuladd")
4342 (set_attr "mode" "<MODE>")])
4343
4344 (define_insn "<avx512>_fmaddsub_<mode>_mask<round_name>"
4345 [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v,v")
4346 (vec_merge:VF_AVX512VL
4347 (unspec:VF_AVX512VL
4348 [(match_operand:VF_AVX512VL 1 "register_operand" "0,0")
4349 (match_operand:VF_AVX512VL 2 "nonimmediate_operand" "<round_constraint>,v")
4350 (match_operand:VF_AVX512VL 3 "nonimmediate_operand" "v,<round_constraint>")]
4351 UNSPEC_FMADDSUB)
4352 (match_dup 1)
4353 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk,Yk")))]
4354 "TARGET_AVX512F"
4355 "@
4356 vfmaddsub132<ssemodesuffix>\t{<round_op5>%2, %3, %0%{%4%}|%0%{%4%}, %3, %2<round_op5>}
4357 vfmaddsub213<ssemodesuffix>\t{<round_op5>%3, %2, %0%{%4%}|%0%{%4%}, %2, %3<round_op5>}"
4358 [(set_attr "type" "ssemuladd")
4359 (set_attr "mode" "<MODE>")])
4360
4361 (define_insn "<avx512>_fmaddsub_<mode>_mask3<round_name>"
4362 [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v")
4363 (vec_merge:VF_AVX512VL
4364 (unspec:VF_AVX512VL
4365 [(match_operand:VF_AVX512VL 1 "register_operand" "v")
4366 (match_operand:VF_AVX512VL 2 "nonimmediate_operand" "<round_constraint>")
4367 (match_operand:VF_AVX512VL 3 "register_operand" "0")]
4368 UNSPEC_FMADDSUB)
4369 (match_dup 3)
4370 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")))]
4371 "TARGET_AVX512F"
4372 "vfmaddsub231<ssemodesuffix>\t{<round_op5>%2, %1, %0%{%4%}|%0%{%4%}, %1, %2<round_op5>}"
4373 [(set_attr "type" "ssemuladd")
4374 (set_attr "mode" "<MODE>")])
4375
4376 (define_insn "*fma_fmsubadd_<mode>"
4377 [(set (match_operand:VF_128_256 0 "register_operand" "=v,v,v,x,x")
4378 (unspec:VF_128_256
4379 [(match_operand:VF_128_256 1 "nonimmediate_operand" "%0,0,v,x,x")
4380 (match_operand:VF_128_256 2 "nonimmediate_operand" "vm,v,vm,x,m")
4381 (neg:VF_128_256
4382 (match_operand:VF_128_256 3 "nonimmediate_operand" "v,vm,0,xm,x"))]
4383 UNSPEC_FMADDSUB))]
4384 "TARGET_FMA || TARGET_FMA4"
4385 "@
4386 vfmsubadd132<ssemodesuffix>\t{%2, %3, %0|%0, %3, %2}
4387 vfmsubadd213<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
4388 vfmsubadd231<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}
4389 vfmsubadd<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}
4390 vfmsubadd<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
4391 [(set_attr "isa" "fma,fma,fma,fma4,fma4")
4392 (set_attr "type" "ssemuladd")
4393 (set_attr "mode" "<MODE>")])
4394
4395 (define_insn "<sd_mask_codefor>fma_fmsubadd_<mode><sd_maskz_name><round_name>"
4396 [(set (match_operand:VF_SF_AVX512VL 0 "register_operand" "=v,v,v")
4397 (unspec:VF_SF_AVX512VL
4398 [(match_operand:VF_SF_AVX512VL 1 "<round_nimm_predicate>" "%0,0,v")
4399 (match_operand:VF_SF_AVX512VL 2 "<round_nimm_predicate>" "<round_constraint>,v,<round_constraint>")
4400 (neg:VF_SF_AVX512VL
4401 (match_operand:VF_SF_AVX512VL 3 "<round_nimm_predicate>" "v,<round_constraint>,0"))]
4402 UNSPEC_FMADDSUB))]
4403 "TARGET_AVX512F && <sd_mask_mode512bit_condition> && <round_mode512bit_condition>"
4404 "@
4405 vfmsubadd132<ssemodesuffix>\t{<round_sd_mask_op4>%2, %3, %0<sd_mask_op4>|%0<sd_mask_op4>, %3, %2<round_sd_mask_op4>}
4406 vfmsubadd213<ssemodesuffix>\t{<round_sd_mask_op4>%3, %2, %0<sd_mask_op4>|%0<sd_mask_op4>, %2, %3<round_sd_mask_op4>}
4407 vfmsubadd231<ssemodesuffix>\t{<round_sd_mask_op4>%2, %1, %0<sd_mask_op4>|%0<sd_mask_op4>, %1, %2<round_sd_mask_op4>}"
4408 [(set_attr "type" "ssemuladd")
4409 (set_attr "mode" "<MODE>")])
4410
4411 (define_insn "<avx512>_fmsubadd_<mode>_mask<round_name>"
4412 [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v,v")
4413 (vec_merge:VF_AVX512VL
4414 (unspec:VF_AVX512VL
4415 [(match_operand:VF_AVX512VL 1 "register_operand" "0,0")
4416 (match_operand:VF_AVX512VL 2 "nonimmediate_operand" "<round_constraint>,v")
4417 (neg:VF_AVX512VL
4418 (match_operand:VF_AVX512VL 3 "nonimmediate_operand" "v,<round_constraint>"))]
4419 UNSPEC_FMADDSUB)
4420 (match_dup 1)
4421 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk,Yk")))]
4422 "TARGET_AVX512F"
4423 "@
4424 vfmsubadd132<ssemodesuffix>\t{<round_op5>%2, %3, %0%{%4%}|%0%{%4%}, %3, %2<round_op5>}
4425 vfmsubadd213<ssemodesuffix>\t{<round_op5>%3, %2, %0%{%4%}|%0%{%4%}, %2, %3<round_op5>}"
4426 [(set_attr "type" "ssemuladd")
4427 (set_attr "mode" "<MODE>")])
4428
4429 (define_insn "<avx512>_fmsubadd_<mode>_mask3<round_name>"
4430 [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v")
4431 (vec_merge:VF_AVX512VL
4432 (unspec:VF_AVX512VL
4433 [(match_operand:VF_AVX512VL 1 "register_operand" "v")
4434 (match_operand:VF_AVX512VL 2 "nonimmediate_operand" "<round_constraint>")
4435 (neg:VF_AVX512VL
4436 (match_operand:VF_AVX512VL 3 "register_operand" "0"))]
4437 UNSPEC_FMADDSUB)
4438 (match_dup 3)
4439 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")))]
4440 "TARGET_AVX512F"
4441 "vfmsubadd231<ssemodesuffix>\t{<round_op5>%2, %1, %0%{%4%}|%0%{%4%}, %1, %2<round_op5>}"
4442 [(set_attr "type" "ssemuladd")
4443 (set_attr "mode" "<MODE>")])
4444
4445 ;; FMA3 floating point scalar intrinsics. These merge result with
4446 ;; high-order elements from the destination register.
4447
4448 (define_expand "fmai_vmfmadd_<mode><round_name>"
4449 [(set (match_operand:VF_128 0 "register_operand")
4450 (vec_merge:VF_128
4451 (fma:VF_128
4452 (match_operand:VF_128 1 "<round_nimm_predicate>")
4453 (match_operand:VF_128 2 "<round_nimm_predicate>")
4454 (match_operand:VF_128 3 "<round_nimm_predicate>"))
4455 (match_dup 1)
4456 (const_int 1)))]
4457 "TARGET_FMA")
4458
4459 (define_expand "fmai_vmfmsub_<mode><round_name>"
4460 [(set (match_operand:VF_128 0 "register_operand")
4461 (vec_merge:VF_128
4462 (fma:VF_128
4463 (match_operand:VF_128 1 "<round_nimm_predicate>")
4464 (match_operand:VF_128 2 "<round_nimm_predicate>")
4465 (neg:VF_128
4466 (match_operand:VF_128 3 "<round_nimm_predicate>")))
4467 (match_dup 1)
4468 (const_int 1)))]
4469 "TARGET_FMA")
4470
4471 (define_expand "fmai_vmfnmadd_<mode><round_name>"
4472 [(set (match_operand:VF_128 0 "register_operand")
4473 (vec_merge:VF_128
4474 (fma:VF_128
4475 (neg:VF_128
4476 (match_operand:VF_128 2 "<round_nimm_predicate>"))
4477 (match_operand:VF_128 1 "<round_nimm_predicate>")
4478 (match_operand:VF_128 3 "<round_nimm_predicate>"))
4479 (match_dup 1)
4480 (const_int 1)))]
4481 "TARGET_FMA")
4482
4483 (define_expand "fmai_vmfnmsub_<mode><round_name>"
4484 [(set (match_operand:VF_128 0 "register_operand")
4485 (vec_merge:VF_128
4486 (fma:VF_128
4487 (neg:VF_128
4488 (match_operand:VF_128 2 "<round_nimm_predicate>"))
4489 (match_operand:VF_128 1 "<round_nimm_predicate>")
4490 (neg:VF_128
4491 (match_operand:VF_128 3 "<round_nimm_predicate>")))
4492 (match_dup 1)
4493 (const_int 1)))]
4494 "TARGET_FMA")
4495
4496 (define_insn "*fmai_fmadd_<mode>"
4497 [(set (match_operand:VF_128 0 "register_operand" "=v,v")
4498 (vec_merge:VF_128
4499 (fma:VF_128
4500 (match_operand:VF_128 1 "<round_nimm_predicate>" " 0, 0")
4501 (match_operand:VF_128 2 "<round_nimm_predicate>" "<round_constraint>, v")
4502 (match_operand:VF_128 3 "<round_nimm_predicate>" " v,<round_constraint>"))
4503 (match_dup 1)
4504 (const_int 1)))]
4505 "TARGET_FMA || TARGET_AVX512F"
4506 "@
4507 vfmadd132<ssescalarmodesuffix>\t{<round_op4>%2, %3, %0|%0, %<iptr>3, %<iptr>2<round_op4>}
4508 vfmadd213<ssescalarmodesuffix>\t{<round_op4>%3, %2, %0|%0, %<iptr>2, %<iptr>3<round_op4>}"
4509 [(set_attr "type" "ssemuladd")
4510 (set_attr "mode" "<MODE>")])
4511
4512 (define_insn "*fmai_fmsub_<mode>"
4513 [(set (match_operand:VF_128 0 "register_operand" "=v,v")
4514 (vec_merge:VF_128
4515 (fma:VF_128
4516 (match_operand:VF_128 1 "<round_nimm_predicate>" "0,0")
4517 (match_operand:VF_128 2 "<round_nimm_predicate>" "<round_constraint>,v")
4518 (neg:VF_128
4519 (match_operand:VF_128 3 "<round_nimm_predicate>" " v,<round_constraint>")))
4520 (match_dup 1)
4521 (const_int 1)))]
4522 "TARGET_FMA || TARGET_AVX512F"
4523 "@
4524 vfmsub132<ssescalarmodesuffix>\t{<round_op4>%2, %3, %0|%0, %<iptr>3, %<iptr>2<round_op4>}
4525 vfmsub213<ssescalarmodesuffix>\t{<round_op4>%3, %2, %0|%0, %<iptr>2, %<iptr>3<round_op4>}"
4526 [(set_attr "type" "ssemuladd")
4527 (set_attr "mode" "<MODE>")])
4528
4529 (define_insn "*fmai_fnmadd_<mode><round_name>"
4530 [(set (match_operand:VF_128 0 "register_operand" "=v,v")
4531 (vec_merge:VF_128
4532 (fma:VF_128
4533 (neg:VF_128
4534 (match_operand:VF_128 2 "<round_nimm_predicate>" "<round_constraint>,v"))
4535 (match_operand:VF_128 1 "<round_nimm_predicate>" "0,0")
4536 (match_operand:VF_128 3 "<round_nimm_predicate>" "v,<round_constraint>"))
4537 (match_dup 1)
4538 (const_int 1)))]
4539 "TARGET_FMA || TARGET_AVX512F"
4540 "@
4541 vfnmadd132<ssescalarmodesuffix>\t{<round_op4>%2, %3, %0|%0, %<iptr>3, %<iptr>2<round_op4>}
4542 vfnmadd213<ssescalarmodesuffix>\t{<round_op4>%3, %2, %0|%0, %<iptr>2, %<iptr>3<round_op4>}"
4543 [(set_attr "type" "ssemuladd")
4544 (set_attr "mode" "<MODE>")])
4545
4546 (define_insn "*fmai_fnmsub_<mode><round_name>"
4547 [(set (match_operand:VF_128 0 "register_operand" "=v,v")
4548 (vec_merge:VF_128
4549 (fma:VF_128
4550 (neg:VF_128
4551 (match_operand:VF_128 2 "<round_nimm_predicate>" "<round_constraint>, v"))
4552 (match_operand:VF_128 1 "<round_nimm_predicate>" " 0, 0")
4553 (neg:VF_128
4554 (match_operand:VF_128 3 "<round_nimm_predicate>" " v,<round_constraint>")))
4555 (match_dup 1)
4556 (const_int 1)))]
4557 "TARGET_FMA || TARGET_AVX512F"
4558 "@
4559 vfnmsub132<ssescalarmodesuffix>\t{<round_op4>%2, %3, %0|%0, %<iptr>3, %<iptr>2<round_op4>}
4560 vfnmsub213<ssescalarmodesuffix>\t{<round_op4>%3, %2, %0|%0, %<iptr>2, %<iptr>3<round_op4>}"
4561 [(set_attr "type" "ssemuladd")
4562 (set_attr "mode" "<MODE>")])
4563
4564 ;; FMA4 floating point scalar intrinsics. These write the
4565 ;; entire destination register, with the high-order elements zeroed.
4566
4567 (define_expand "fma4i_vmfmadd_<mode>"
4568 [(set (match_operand:VF_128 0 "register_operand")
4569 (vec_merge:VF_128
4570 (fma:VF_128
4571 (match_operand:VF_128 1 "nonimmediate_operand")
4572 (match_operand:VF_128 2 "nonimmediate_operand")
4573 (match_operand:VF_128 3 "nonimmediate_operand"))
4574 (match_dup 4)
4575 (const_int 1)))]
4576 "TARGET_FMA4"
4577 "operands[4] = CONST0_RTX (<MODE>mode);")
4578
4579 (define_insn "*fma4i_vmfmadd_<mode>"
4580 [(set (match_operand:VF_128 0 "register_operand" "=x,x")
4581 (vec_merge:VF_128
4582 (fma:VF_128
4583 (match_operand:VF_128 1 "nonimmediate_operand" "%x,x")
4584 (match_operand:VF_128 2 "nonimmediate_operand" " x,m")
4585 (match_operand:VF_128 3 "nonimmediate_operand" "xm,x"))
4586 (match_operand:VF_128 4 "const0_operand")
4587 (const_int 1)))]
4588 "TARGET_FMA4"
4589 "vfmadd<ssescalarmodesuffix>\t{%3, %2, %1, %0|%0, %1, %<iptr>2, %<iptr>3}"
4590 [(set_attr "type" "ssemuladd")
4591 (set_attr "mode" "<MODE>")])
4592
4593 (define_insn "*fma4i_vmfmsub_<mode>"
4594 [(set (match_operand:VF_128 0 "register_operand" "=x,x")
4595 (vec_merge:VF_128
4596 (fma:VF_128
4597 (match_operand:VF_128 1 "nonimmediate_operand" "%x,x")
4598 (match_operand:VF_128 2 "nonimmediate_operand" " x,m")
4599 (neg:VF_128
4600 (match_operand:VF_128 3 "nonimmediate_operand" "xm,x")))
4601 (match_operand:VF_128 4 "const0_operand")
4602 (const_int 1)))]
4603 "TARGET_FMA4"
4604 "vfmsub<ssescalarmodesuffix>\t{%3, %2, %1, %0|%0, %1, %<iptr>2, %<iptr>3}"
4605 [(set_attr "type" "ssemuladd")
4606 (set_attr "mode" "<MODE>")])
4607
4608 (define_insn "*fma4i_vmfnmadd_<mode>"
4609 [(set (match_operand:VF_128 0 "register_operand" "=x,x")
4610 (vec_merge:VF_128
4611 (fma:VF_128
4612 (neg:VF_128
4613 (match_operand:VF_128 1 "nonimmediate_operand" "%x,x"))
4614 (match_operand:VF_128 2 "nonimmediate_operand" " x,m")
4615 (match_operand:VF_128 3 "nonimmediate_operand" "xm,x"))
4616 (match_operand:VF_128 4 "const0_operand")
4617 (const_int 1)))]
4618 "TARGET_FMA4"
4619 "vfnmadd<ssescalarmodesuffix>\t{%3, %2, %1, %0|%0, %1, %<iptr>2, %<iptr>3}"
4620 [(set_attr "type" "ssemuladd")
4621 (set_attr "mode" "<MODE>")])
4622
4623 (define_insn "*fma4i_vmfnmsub_<mode>"
4624 [(set (match_operand:VF_128 0 "register_operand" "=x,x")
4625 (vec_merge:VF_128
4626 (fma:VF_128
4627 (neg:VF_128
4628 (match_operand:VF_128 1 "nonimmediate_operand" "%x,x"))
4629 (match_operand:VF_128 2 "nonimmediate_operand" " x,m")
4630 (neg:VF_128
4631 (match_operand:VF_128 3 "nonimmediate_operand" "xm,x")))
4632 (match_operand:VF_128 4 "const0_operand")
4633 (const_int 1)))]
4634 "TARGET_FMA4"
4635 "vfnmsub<ssescalarmodesuffix>\t{%3, %2, %1, %0|%0, %1, %<iptr>2, %<iptr>3}"
4636 [(set_attr "type" "ssemuladd")
4637 (set_attr "mode" "<MODE>")])
4638
4639 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
4640 ;;
4641 ;; Parallel single-precision floating point conversion operations
4642 ;;
4643 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
4644
4645 (define_insn "sse_cvtpi2ps"
4646 [(set (match_operand:V4SF 0 "register_operand" "=x")
4647 (vec_merge:V4SF
4648 (vec_duplicate:V4SF
4649 (float:V2SF (match_operand:V2SI 2 "nonimmediate_operand" "ym")))
4650 (match_operand:V4SF 1 "register_operand" "0")
4651 (const_int 3)))]
4652 "TARGET_SSE"
4653 "cvtpi2ps\t{%2, %0|%0, %2}"
4654 [(set_attr "type" "ssecvt")
4655 (set_attr "mode" "V4SF")])
4656
4657 (define_insn "sse_cvtps2pi"
4658 [(set (match_operand:V2SI 0 "register_operand" "=y")
4659 (vec_select:V2SI
4660 (unspec:V4SI [(match_operand:V4SF 1 "nonimmediate_operand" "xm")]
4661 UNSPEC_FIX_NOTRUNC)
4662 (parallel [(const_int 0) (const_int 1)])))]
4663 "TARGET_SSE"
4664 "cvtps2pi\t{%1, %0|%0, %q1}"
4665 [(set_attr "type" "ssecvt")
4666 (set_attr "unit" "mmx")
4667 (set_attr "mode" "DI")])
4668
4669 (define_insn "sse_cvttps2pi"
4670 [(set (match_operand:V2SI 0 "register_operand" "=y")
4671 (vec_select:V2SI
4672 (fix:V4SI (match_operand:V4SF 1 "nonimmediate_operand" "xm"))
4673 (parallel [(const_int 0) (const_int 1)])))]
4674 "TARGET_SSE"
4675 "cvttps2pi\t{%1, %0|%0, %q1}"
4676 [(set_attr "type" "ssecvt")
4677 (set_attr "unit" "mmx")
4678 (set_attr "prefix_rep" "0")
4679 (set_attr "mode" "SF")])
4680
4681 (define_insn "sse_cvtsi2ss<rex64namesuffix><round_name>"
4682 [(set (match_operand:V4SF 0 "register_operand" "=x,x,v")
4683 (vec_merge:V4SF
4684 (vec_duplicate:V4SF
4685 (float:SF (match_operand:SWI48 2 "<round_nimm_scalar_predicate>" "r,m,<round_constraint3>")))
4686 (match_operand:V4SF 1 "register_operand" "0,0,v")
4687 (const_int 1)))]
4688 "TARGET_SSE"
4689 "@
4690 cvtsi2ss<rex64suffix>\t{%2, %0|%0, %2}
4691 cvtsi2ss<rex64suffix>\t{%2, %0|%0, %2}
4692 vcvtsi2ss<rex64suffix>\t{%2, <round_op3>%1, %0|%0, %1<round_op3>, %2}"
4693 [(set_attr "isa" "noavx,noavx,avx")
4694 (set_attr "type" "sseicvt")
4695 (set_attr "athlon_decode" "vector,double,*")
4696 (set_attr "amdfam10_decode" "vector,double,*")
4697 (set_attr "bdver1_decode" "double,direct,*")
4698 (set_attr "btver2_decode" "double,double,double")
4699 (set_attr "znver1_decode" "double,double,double")
4700 (set (attr "length_vex")
4701 (if_then_else
4702 (and (match_test "<MODE>mode == DImode")
4703 (eq_attr "alternative" "2"))
4704 (const_string "4")
4705 (const_string "*")))
4706 (set (attr "prefix_rex")
4707 (if_then_else
4708 (and (match_test "<MODE>mode == DImode")
4709 (eq_attr "alternative" "0,1"))
4710 (const_string "1")
4711 (const_string "*")))
4712 (set_attr "prefix" "orig,orig,maybe_evex")
4713 (set_attr "mode" "SF")])
4714
4715 (define_insn "sse_cvtss2si<rex64namesuffix><round_name>"
4716 [(set (match_operand:SWI48 0 "register_operand" "=r,r")
4717 (unspec:SWI48
4718 [(vec_select:SF
4719 (match_operand:V4SF 1 "<round_nimm_scalar_predicate>" "v,<round_constraint2>")
4720 (parallel [(const_int 0)]))]
4721 UNSPEC_FIX_NOTRUNC))]
4722 "TARGET_SSE"
4723 "%vcvtss2si<rex64suffix>\t{<round_op2>%1, %0|%0, %k1<round_op2>}"
4724 [(set_attr "type" "sseicvt")
4725 (set_attr "athlon_decode" "double,vector")
4726 (set_attr "bdver1_decode" "double,double")
4727 (set_attr "prefix_rep" "1")
4728 (set_attr "prefix" "maybe_vex")
4729 (set_attr "mode" "<MODE>")])
4730
4731 (define_insn "sse_cvtss2si<rex64namesuffix>_2"
4732 [(set (match_operand:SWI48 0 "register_operand" "=r,r")
4733 (unspec:SWI48 [(match_operand:SF 1 "nonimmediate_operand" "v,m")]
4734 UNSPEC_FIX_NOTRUNC))]
4735 "TARGET_SSE"
4736 "%vcvtss2si<rex64suffix>\t{%1, %0|%0, %k1}"
4737 [(set_attr "type" "sseicvt")
4738 (set_attr "athlon_decode" "double,vector")
4739 (set_attr "amdfam10_decode" "double,double")
4740 (set_attr "bdver1_decode" "double,double")
4741 (set_attr "prefix_rep" "1")
4742 (set_attr "prefix" "maybe_vex")
4743 (set_attr "mode" "<MODE>")])
4744
4745 (define_insn "sse_cvttss2si<rex64namesuffix><round_saeonly_name>"
4746 [(set (match_operand:SWI48 0 "register_operand" "=r,r")
4747 (fix:SWI48
4748 (vec_select:SF
4749 (match_operand:V4SF 1 "<round_saeonly_nimm_scalar_predicate>" "v,<round_saeonly_constraint>")
4750 (parallel [(const_int 0)]))))]
4751 "TARGET_SSE"
4752 "%vcvttss2si<rex64suffix>\t{<round_saeonly_op2>%1, %0|%0, %k1<round_saeonly_op2>}"
4753 [(set_attr "type" "sseicvt")
4754 (set_attr "athlon_decode" "double,vector")
4755 (set_attr "amdfam10_decode" "double,double")
4756 (set_attr "bdver1_decode" "double,double")
4757 (set_attr "prefix_rep" "1")
4758 (set_attr "prefix" "maybe_vex")
4759 (set_attr "mode" "<MODE>")])
4760
4761 (define_insn "cvtusi2<ssescalarmodesuffix>32<round_name>"
4762 [(set (match_operand:VF_128 0 "register_operand" "=v")
4763 (vec_merge:VF_128
4764 (vec_duplicate:VF_128
4765 (unsigned_float:<ssescalarmode>
4766 (match_operand:SI 2 "<round_nimm_predicate>" "<round_constraint3>")))
4767 (match_operand:VF_128 1 "register_operand" "v")
4768 (const_int 1)))]
4769 "TARGET_AVX512F && <round_modev4sf_condition>"
4770 "vcvtusi2<ssescalarmodesuffix>\t{%2, <round_op3>%1, %0|%0, %1<round_op3>, %2}"
4771 [(set_attr "type" "sseicvt")
4772 (set_attr "prefix" "evex")
4773 (set_attr "mode" "<ssescalarmode>")])
4774
4775 (define_insn "cvtusi2<ssescalarmodesuffix>64<round_name>"
4776 [(set (match_operand:VF_128 0 "register_operand" "=v")
4777 (vec_merge:VF_128
4778 (vec_duplicate:VF_128
4779 (unsigned_float:<ssescalarmode>
4780 (match_operand:DI 2 "<round_nimm_predicate>" "<round_constraint3>")))
4781 (match_operand:VF_128 1 "register_operand" "v")
4782 (const_int 1)))]
4783 "TARGET_AVX512F && TARGET_64BIT"
4784 "vcvtusi2<ssescalarmodesuffix>{q}\t{%2, <round_op3>%1, %0|%0, %1<round_op3>, %2}"
4785 [(set_attr "type" "sseicvt")
4786 (set_attr "prefix" "evex")
4787 (set_attr "mode" "<ssescalarmode>")])
4788
4789 (define_insn "float<sseintvecmodelower><mode>2<mask_name><round_name>"
4790 [(set (match_operand:VF1 0 "register_operand" "=x,v")
4791 (float:VF1
4792 (match_operand:<sseintvecmode> 1 "<round_nimm_predicate>" "xBm,<round_constraint>")))]
4793 "TARGET_SSE2 && <mask_mode512bit_condition> && <round_mode512bit_condition>"
4794 "@
4795 cvtdq2ps\t{%1, %0|%0, %1}
4796 vcvtdq2ps\t{<round_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_mask_op2>}"
4797 [(set_attr "isa" "noavx,avx")
4798 (set_attr "type" "ssecvt")
4799 (set_attr "prefix" "maybe_vex")
4800 (set_attr "mode" "<sseinsnmode>")])
4801
4802 (define_insn "ufloat<sseintvecmodelower><mode>2<mask_name><round_name>"
4803 [(set (match_operand:VF1_AVX512VL 0 "register_operand" "=v")
4804 (unsigned_float:VF1_AVX512VL
4805 (match_operand:<sseintvecmode> 1 "nonimmediate_operand" "<round_constraint>")))]
4806 "TARGET_AVX512F"
4807 "vcvtudq2ps\t{<round_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_mask_op2>}"
4808 [(set_attr "type" "ssecvt")
4809 (set_attr "prefix" "evex")
4810 (set_attr "mode" "<MODE>")])
4811
4812 (define_expand "floatuns<sseintvecmodelower><mode>2"
4813 [(match_operand:VF1 0 "register_operand")
4814 (match_operand:<sseintvecmode> 1 "register_operand")]
4815 "TARGET_SSE2 && (<MODE>mode == V4SFmode || TARGET_AVX2)"
4816 {
4817 if (<MODE>mode == V16SFmode)
4818 emit_insn (gen_ufloatv16siv16sf2 (operands[0], operands[1]));
4819 else
4820 if (TARGET_AVX512VL)
4821 {
4822 if (<MODE>mode == V4SFmode)
4823 emit_insn (gen_ufloatv4siv4sf2 (operands[0], operands[1]));
4824 else
4825 emit_insn (gen_ufloatv8siv8sf2 (operands[0], operands[1]));
4826 }
4827 else
4828 ix86_expand_vector_convert_uns_vsivsf (operands[0], operands[1]);
4829
4830 DONE;
4831 })
4832
4833
4834 ;; For <sse2_avx_avx512f>_fix_notrunc<sf2simodelower><mode> insn pattern
4835 (define_mode_attr sf2simodelower
4836 [(V16SI "v16sf") (V8SI "v8sf") (V4SI "v4sf")])
4837
4838 (define_insn "<sse2_avx_avx512f>_fix_notrunc<sf2simodelower><mode><mask_name>"
4839 [(set (match_operand:VI4_AVX 0 "register_operand" "=v")
4840 (unspec:VI4_AVX
4841 [(match_operand:<ssePSmode> 1 "vector_operand" "vBm")]
4842 UNSPEC_FIX_NOTRUNC))]
4843 "TARGET_SSE2 && <mask_mode512bit_condition>"
4844 "%vcvtps2dq\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
4845 [(set_attr "type" "ssecvt")
4846 (set (attr "prefix_data16")
4847 (if_then_else
4848 (match_test "TARGET_AVX")
4849 (const_string "*")
4850 (const_string "1")))
4851 (set_attr "prefix" "maybe_vex")
4852 (set_attr "mode" "<sseinsnmode>")])
4853
4854 (define_insn "avx512f_fix_notruncv16sfv16si<mask_name><round_name>"
4855 [(set (match_operand:V16SI 0 "register_operand" "=v")
4856 (unspec:V16SI
4857 [(match_operand:V16SF 1 "<round_nimm_predicate>" "<round_constraint>")]
4858 UNSPEC_FIX_NOTRUNC))]
4859 "TARGET_AVX512F"
4860 "vcvtps2dq\t{<round_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_mask_op2>}"
4861 [(set_attr "type" "ssecvt")
4862 (set_attr "prefix" "evex")
4863 (set_attr "mode" "XI")])
4864
4865 (define_insn "<mask_codefor><avx512>_ufix_notrunc<sf2simodelower><mode><mask_name><round_name>"
4866 [(set (match_operand:VI4_AVX512VL 0 "register_operand" "=v")
4867 (unspec:VI4_AVX512VL
4868 [(match_operand:<ssePSmode> 1 "nonimmediate_operand" "<round_constraint>")]
4869 UNSPEC_UNSIGNED_FIX_NOTRUNC))]
4870 "TARGET_AVX512F"
4871 "vcvtps2udq\t{<round_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_mask_op2>}"
4872 [(set_attr "type" "ssecvt")
4873 (set_attr "prefix" "evex")
4874 (set_attr "mode" "<sseinsnmode>")])
4875
4876 (define_insn "<mask_codefor>avx512dq_cvtps2qq<mode><mask_name><round_name>"
4877 [(set (match_operand:VI8_256_512 0 "register_operand" "=v")
4878 (unspec:VI8_256_512 [(match_operand:<ssePSmode2> 1 "nonimmediate_operand" "<round_constraint>")]
4879 UNSPEC_FIX_NOTRUNC))]
4880 "TARGET_AVX512DQ && <round_mode512bit_condition>"
4881 "vcvtps2qq\t{<round_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_mask_op2>}"
4882 [(set_attr "type" "ssecvt")
4883 (set_attr "prefix" "evex")
4884 (set_attr "mode" "<sseinsnmode>")])
4885
4886 (define_insn "<mask_codefor>avx512dq_cvtps2qqv2di<mask_name>"
4887 [(set (match_operand:V2DI 0 "register_operand" "=v")
4888 (unspec:V2DI
4889 [(vec_select:V2SF
4890 (match_operand:V4SF 1 "nonimmediate_operand" "vm")
4891 (parallel [(const_int 0) (const_int 1)]))]
4892 UNSPEC_FIX_NOTRUNC))]
4893 "TARGET_AVX512DQ && TARGET_AVX512VL"
4894 "vcvtps2qq\t{%1, %0<mask_operand2>|%0<mask_operand2>, %q1}"
4895 [(set_attr "type" "ssecvt")
4896 (set_attr "prefix" "evex")
4897 (set_attr "mode" "TI")])
4898
4899 (define_insn "<mask_codefor>avx512dq_cvtps2uqq<mode><mask_name><round_name>"
4900 [(set (match_operand:VI8_256_512 0 "register_operand" "=v")
4901 (unspec:VI8_256_512 [(match_operand:<ssePSmode2> 1 "nonimmediate_operand" "<round_constraint>")]
4902 UNSPEC_UNSIGNED_FIX_NOTRUNC))]
4903 "TARGET_AVX512DQ && <round_mode512bit_condition>"
4904 "vcvtps2uqq\t{<round_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_mask_op2>}"
4905 [(set_attr "type" "ssecvt")
4906 (set_attr "prefix" "evex")
4907 (set_attr "mode" "<sseinsnmode>")])
4908
4909 (define_insn "<mask_codefor>avx512dq_cvtps2uqqv2di<mask_name>"
4910 [(set (match_operand:V2DI 0 "register_operand" "=v")
4911 (unspec:V2DI
4912 [(vec_select:V2SF
4913 (match_operand:V4SF 1 "nonimmediate_operand" "vm")
4914 (parallel [(const_int 0) (const_int 1)]))]
4915 UNSPEC_UNSIGNED_FIX_NOTRUNC))]
4916 "TARGET_AVX512DQ && TARGET_AVX512VL"
4917 "vcvtps2uqq\t{%1, %0<mask_operand2>|%0<mask_operand2>, %q1}"
4918 [(set_attr "type" "ssecvt")
4919 (set_attr "prefix" "evex")
4920 (set_attr "mode" "TI")])
4921
4922 (define_insn "<fixsuffix>fix_truncv16sfv16si2<mask_name><round_saeonly_name>"
4923 [(set (match_operand:V16SI 0 "register_operand" "=v")
4924 (any_fix:V16SI
4925 (match_operand:V16SF 1 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")))]
4926 "TARGET_AVX512F"
4927 "vcvttps2<fixsuffix>dq\t{<round_saeonly_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_saeonly_mask_op2>}"
4928 [(set_attr "type" "ssecvt")
4929 (set_attr "prefix" "evex")
4930 (set_attr "mode" "XI")])
4931
4932 (define_insn "fix_truncv8sfv8si2<mask_name>"
4933 [(set (match_operand:V8SI 0 "register_operand" "=v")
4934 (fix:V8SI (match_operand:V8SF 1 "nonimmediate_operand" "vm")))]
4935 "TARGET_AVX && <mask_avx512vl_condition>"
4936 "vcvttps2dq\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
4937 [(set_attr "type" "ssecvt")
4938 (set_attr "prefix" "<mask_prefix>")
4939 (set_attr "mode" "OI")])
4940
4941 (define_insn "fix_truncv4sfv4si2<mask_name>"
4942 [(set (match_operand:V4SI 0 "register_operand" "=v")
4943 (fix:V4SI (match_operand:V4SF 1 "vector_operand" "vBm")))]
4944 "TARGET_SSE2 && <mask_avx512vl_condition>"
4945 "%vcvttps2dq\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
4946 [(set_attr "type" "ssecvt")
4947 (set (attr "prefix_rep")
4948 (if_then_else
4949 (match_test "TARGET_AVX")
4950 (const_string "*")
4951 (const_string "1")))
4952 (set (attr "prefix_data16")
4953 (if_then_else
4954 (match_test "TARGET_AVX")
4955 (const_string "*")
4956 (const_string "0")))
4957 (set_attr "prefix_data16" "0")
4958 (set_attr "prefix" "<mask_prefix2>")
4959 (set_attr "mode" "TI")])
4960
4961 (define_expand "fixuns_trunc<mode><sseintvecmodelower>2"
4962 [(match_operand:<sseintvecmode> 0 "register_operand")
4963 (match_operand:VF1 1 "register_operand")]
4964 "TARGET_SSE2"
4965 {
4966 if (<MODE>mode == V16SFmode)
4967 emit_insn (gen_ufix_truncv16sfv16si2 (operands[0],
4968 operands[1]));
4969 else
4970 {
4971 rtx tmp[3];
4972 tmp[0] = ix86_expand_adjust_ufix_to_sfix_si (operands[1], &tmp[2]);
4973 tmp[1] = gen_reg_rtx (<sseintvecmode>mode);
4974 emit_insn (gen_fix_trunc<mode><sseintvecmodelower>2 (tmp[1], tmp[0]));
4975 emit_insn (gen_xor<sseintvecmodelower>3 (operands[0], tmp[1], tmp[2]));
4976 }
4977 DONE;
4978 })
4979
4980 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
4981 ;;
4982 ;; Parallel double-precision floating point conversion operations
4983 ;;
4984 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
4985
4986 (define_insn "sse2_cvtpi2pd"
4987 [(set (match_operand:V2DF 0 "register_operand" "=x,x")
4988 (float:V2DF (match_operand:V2SI 1 "nonimmediate_operand" "y,m")))]
4989 "TARGET_SSE2"
4990 "cvtpi2pd\t{%1, %0|%0, %1}"
4991 [(set_attr "type" "ssecvt")
4992 (set_attr "unit" "mmx,*")
4993 (set_attr "prefix_data16" "1,*")
4994 (set_attr "mode" "V2DF")])
4995
4996 (define_insn "sse2_cvtpd2pi"
4997 [(set (match_operand:V2SI 0 "register_operand" "=y")
4998 (unspec:V2SI [(match_operand:V2DF 1 "nonimmediate_operand" "xm")]
4999 UNSPEC_FIX_NOTRUNC))]
5000 "TARGET_SSE2"
5001 "cvtpd2pi\t{%1, %0|%0, %1}"
5002 [(set_attr "type" "ssecvt")
5003 (set_attr "unit" "mmx")
5004 (set_attr "bdver1_decode" "double")
5005 (set_attr "btver2_decode" "direct")
5006 (set_attr "prefix_data16" "1")
5007 (set_attr "mode" "DI")])
5008
5009 (define_insn "sse2_cvttpd2pi"
5010 [(set (match_operand:V2SI 0 "register_operand" "=y")
5011 (fix:V2SI (match_operand:V2DF 1 "nonimmediate_operand" "xm")))]
5012 "TARGET_SSE2"
5013 "cvttpd2pi\t{%1, %0|%0, %1}"
5014 [(set_attr "type" "ssecvt")
5015 (set_attr "unit" "mmx")
5016 (set_attr "bdver1_decode" "double")
5017 (set_attr "prefix_data16" "1")
5018 (set_attr "mode" "TI")])
5019
5020 (define_insn "sse2_cvtsi2sd"
5021 [(set (match_operand:V2DF 0 "register_operand" "=x,x,v")
5022 (vec_merge:V2DF
5023 (vec_duplicate:V2DF
5024 (float:DF (match_operand:SI 2 "nonimmediate_operand" "r,m,rm")))
5025 (match_operand:V2DF 1 "register_operand" "0,0,v")
5026 (const_int 1)))]
5027 "TARGET_SSE2"
5028 "@
5029 cvtsi2sd\t{%2, %0|%0, %2}
5030 cvtsi2sd\t{%2, %0|%0, %2}
5031 vcvtsi2sd\t{%2, %1, %0|%0, %1, %2}"
5032 [(set_attr "isa" "noavx,noavx,avx")
5033 (set_attr "type" "sseicvt")
5034 (set_attr "athlon_decode" "double,direct,*")
5035 (set_attr "amdfam10_decode" "vector,double,*")
5036 (set_attr "bdver1_decode" "double,direct,*")
5037 (set_attr "btver2_decode" "double,double,double")
5038 (set_attr "znver1_decode" "double,double,double")
5039 (set_attr "prefix" "orig,orig,maybe_evex")
5040 (set_attr "mode" "DF")])
5041
5042 (define_insn "sse2_cvtsi2sdq<round_name>"
5043 [(set (match_operand:V2DF 0 "register_operand" "=x,x,v")
5044 (vec_merge:V2DF
5045 (vec_duplicate:V2DF
5046 (float:DF (match_operand:DI 2 "<round_nimm_scalar_predicate>" "r,m,<round_constraint3>")))
5047 (match_operand:V2DF 1 "register_operand" "0,0,v")
5048 (const_int 1)))]
5049 "TARGET_SSE2 && TARGET_64BIT"
5050 "@
5051 cvtsi2sdq\t{%2, %0|%0, %2}
5052 cvtsi2sdq\t{%2, %0|%0, %2}
5053 vcvtsi2sdq\t{%2, <round_op3>%1, %0|%0, %1<round_op3>, %2}"
5054 [(set_attr "isa" "noavx,noavx,avx")
5055 (set_attr "type" "sseicvt")
5056 (set_attr "athlon_decode" "double,direct,*")
5057 (set_attr "amdfam10_decode" "vector,double,*")
5058 (set_attr "bdver1_decode" "double,direct,*")
5059 (set_attr "length_vex" "*,*,4")
5060 (set_attr "prefix_rex" "1,1,*")
5061 (set_attr "prefix" "orig,orig,maybe_evex")
5062 (set_attr "mode" "DF")])
5063
5064 (define_insn "avx512f_vcvtss2usi<rex64namesuffix><round_name>"
5065 [(set (match_operand:SWI48 0 "register_operand" "=r")
5066 (unspec:SWI48
5067 [(vec_select:SF
5068 (match_operand:V4SF 1 "<round_nimm_predicate>" "<round_constraint>")
5069 (parallel [(const_int 0)]))]
5070 UNSPEC_UNSIGNED_FIX_NOTRUNC))]
5071 "TARGET_AVX512F"
5072 "vcvtss2usi\t{<round_op2>%1, %0|%0, %k1<round_op2>}"
5073 [(set_attr "type" "sseicvt")
5074 (set_attr "prefix" "evex")
5075 (set_attr "mode" "<MODE>")])
5076
5077 (define_insn "avx512f_vcvttss2usi<rex64namesuffix><round_saeonly_name>"
5078 [(set (match_operand:SWI48 0 "register_operand" "=r")
5079 (unsigned_fix:SWI48
5080 (vec_select:SF
5081 (match_operand:V4SF 1 "<round_saeonly_nimm_scalar_predicate>" "<round_saeonly_constraint>")
5082 (parallel [(const_int 0)]))))]
5083 "TARGET_AVX512F"
5084 "vcvttss2usi\t{<round_saeonly_op2>%1, %0|%0, %k1<round_saeonly_op2>}"
5085 [(set_attr "type" "sseicvt")
5086 (set_attr "prefix" "evex")
5087 (set_attr "mode" "<MODE>")])
5088
5089 (define_insn "avx512f_vcvtsd2usi<rex64namesuffix><round_name>"
5090 [(set (match_operand:SWI48 0 "register_operand" "=r")
5091 (unspec:SWI48
5092 [(vec_select:DF
5093 (match_operand:V2DF 1 "<round_nimm_predicate>" "<round_constraint>")
5094 (parallel [(const_int 0)]))]
5095 UNSPEC_UNSIGNED_FIX_NOTRUNC))]
5096 "TARGET_AVX512F"
5097 "vcvtsd2usi\t{<round_op2>%1, %0|%0, %q1<round_op2>}"
5098 [(set_attr "type" "sseicvt")
5099 (set_attr "prefix" "evex")
5100 (set_attr "mode" "<MODE>")])
5101
5102 (define_insn "avx512f_vcvttsd2usi<rex64namesuffix><round_saeonly_name>"
5103 [(set (match_operand:SWI48 0 "register_operand" "=r")
5104 (unsigned_fix:SWI48
5105 (vec_select:DF
5106 (match_operand:V2DF 1 "<round_saeonly_nimm_scalar_predicate>" "<round_saeonly_constraint>")
5107 (parallel [(const_int 0)]))))]
5108 "TARGET_AVX512F"
5109 "vcvttsd2usi\t{<round_saeonly_op2>%1, %0|%0, %q1<round_saeonly_op2>}"
5110 [(set_attr "type" "sseicvt")
5111 (set_attr "prefix" "evex")
5112 (set_attr "mode" "<MODE>")])
5113
5114 (define_insn "sse2_cvtsd2si<rex64namesuffix><round_name>"
5115 [(set (match_operand:SWI48 0 "register_operand" "=r,r")
5116 (unspec:SWI48
5117 [(vec_select:DF
5118 (match_operand:V2DF 1 "<round_nimm_scalar_predicate>" "v,<round_constraint2>")
5119 (parallel [(const_int 0)]))]
5120 UNSPEC_FIX_NOTRUNC))]
5121 "TARGET_SSE2"
5122 "%vcvtsd2si<rex64suffix>\t{<round_op2>%1, %0|%0, %q1<round_op2>}"
5123 [(set_attr "type" "sseicvt")
5124 (set_attr "athlon_decode" "double,vector")
5125 (set_attr "bdver1_decode" "double,double")
5126 (set_attr "btver2_decode" "double,double")
5127 (set_attr "prefix_rep" "1")
5128 (set_attr "prefix" "maybe_vex")
5129 (set_attr "mode" "<MODE>")])
5130
5131 (define_insn "sse2_cvtsd2si<rex64namesuffix>_2"
5132 [(set (match_operand:SWI48 0 "register_operand" "=r,r")
5133 (unspec:SWI48 [(match_operand:DF 1 "nonimmediate_operand" "v,m")]
5134 UNSPEC_FIX_NOTRUNC))]
5135 "TARGET_SSE2"
5136 "%vcvtsd2si<rex64suffix>\t{%1, %0|%0, %q1}"
5137 [(set_attr "type" "sseicvt")
5138 (set_attr "athlon_decode" "double,vector")
5139 (set_attr "amdfam10_decode" "double,double")
5140 (set_attr "bdver1_decode" "double,double")
5141 (set_attr "prefix_rep" "1")
5142 (set_attr "prefix" "maybe_vex")
5143 (set_attr "mode" "<MODE>")])
5144
5145 (define_insn "sse2_cvttsd2si<rex64namesuffix><round_saeonly_name>"
5146 [(set (match_operand:SWI48 0 "register_operand" "=r,r")
5147 (fix:SWI48
5148 (vec_select:DF
5149 (match_operand:V2DF 1 "<round_saeonly_nimm_scalar_predicate>" "v,<round_saeonly_constraint2>")
5150 (parallel [(const_int 0)]))))]
5151 "TARGET_SSE2"
5152 "%vcvttsd2si<rex64suffix>\t{<round_saeonly_op2>%1, %0|%0, %q1<round_saeonly_op2>}"
5153 [(set_attr "type" "sseicvt")
5154 (set_attr "athlon_decode" "double,vector")
5155 (set_attr "amdfam10_decode" "double,double")
5156 (set_attr "bdver1_decode" "double,double")
5157 (set_attr "btver2_decode" "double,double")
5158 (set_attr "prefix_rep" "1")
5159 (set_attr "prefix" "maybe_vex")
5160 (set_attr "mode" "<MODE>")])
5161
5162 ;; For float<si2dfmode><mode>2 insn pattern
5163 (define_mode_attr si2dfmode
5164 [(V8DF "V8SI") (V4DF "V4SI")])
5165 (define_mode_attr si2dfmodelower
5166 [(V8DF "v8si") (V4DF "v4si")])
5167
5168 (define_insn "float<si2dfmodelower><mode>2<mask_name>"
5169 [(set (match_operand:VF2_512_256 0 "register_operand" "=v")
5170 (float:VF2_512_256 (match_operand:<si2dfmode> 1 "nonimmediate_operand" "vm")))]
5171 "TARGET_AVX && <mask_mode512bit_condition>"
5172 "vcvtdq2pd\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
5173 [(set_attr "type" "ssecvt")
5174 (set_attr "prefix" "maybe_vex")
5175 (set_attr "mode" "<MODE>")])
5176
5177 (define_insn "float<floatunssuffix><sseintvecmodelower><mode>2<mask_name><round_name>"
5178 [(set (match_operand:VF2_AVX512VL 0 "register_operand" "=v")
5179 (any_float:VF2_AVX512VL
5180 (match_operand:<sseintvecmode> 1 "nonimmediate_operand" "<round_constraint>")))]
5181 "TARGET_AVX512DQ"
5182 "vcvt<floatsuffix>qq2pd\t{<round_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_mask_op2>}"
5183 [(set_attr "type" "ssecvt")
5184 (set_attr "prefix" "evex")
5185 (set_attr "mode" "<MODE>")])
5186
5187 ;; For float<floatunssuffix><sselondveclower><mode> insn patterns
5188 (define_mode_attr qq2pssuff
5189 [(V8SF "") (V4SF "{y}")])
5190
5191 (define_mode_attr sselongvecmode
5192 [(V8SF "V8DI") (V4SF "V4DI")])
5193
5194 (define_mode_attr sselongvecmodelower
5195 [(V8SF "v8di") (V4SF "v4di")])
5196
5197 (define_mode_attr sseintvecmode3
5198 [(V8SF "XI") (V4SF "OI")
5199 (V8DF "OI") (V4DF "TI")])
5200
5201 (define_insn "float<floatunssuffix><sselongvecmodelower><mode>2<mask_name><round_name>"
5202 [(set (match_operand:VF1_128_256VL 0 "register_operand" "=v")
5203 (any_float:VF1_128_256VL
5204 (match_operand:<sselongvecmode> 1 "nonimmediate_operand" "<round_constraint>")))]
5205 "TARGET_AVX512DQ && <round_modev8sf_condition>"
5206 "vcvt<floatsuffix>qq2ps<qq2pssuff>\t{<round_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_mask_op2>}"
5207 [(set_attr "type" "ssecvt")
5208 (set_attr "prefix" "evex")
5209 (set_attr "mode" "<MODE>")])
5210
5211 (define_insn "float<floatunssuffix>v2div2sf2"
5212 [(set (match_operand:V4SF 0 "register_operand" "=v")
5213 (vec_concat:V4SF
5214 (any_float:V2SF (match_operand:V2DI 1 "nonimmediate_operand" "vm"))
5215 (const_vector:V2SF [(const_int 0) (const_int 0)])))]
5216 "TARGET_AVX512DQ && TARGET_AVX512VL"
5217 "vcvt<floatsuffix>qq2ps{x}\t{%1, %0|%0, %1}"
5218 [(set_attr "type" "ssecvt")
5219 (set_attr "prefix" "evex")
5220 (set_attr "mode" "V4SF")])
5221
5222 (define_mode_attr vpckfloat_concat_mode
5223 [(V8DI "v16sf") (V4DI "v8sf") (V2DI "v8sf")])
5224 (define_mode_attr vpckfloat_temp_mode
5225 [(V8DI "V8SF") (V4DI "V4SF") (V2DI "V4SF")])
5226 (define_mode_attr vpckfloat_op_mode
5227 [(V8DI "v8sf") (V4DI "v4sf") (V2DI "v2sf")])
5228
5229 (define_expand "vec_pack<floatprefix>_float_<mode>"
5230 [(match_operand:<ssePSmode> 0 "register_operand")
5231 (any_float:<ssePSmode>
5232 (match_operand:VI8_AVX512VL 1 "register_operand"))
5233 (match_operand:VI8_AVX512VL 2 "register_operand")]
5234 "TARGET_AVX512DQ"
5235 {
5236 rtx r1 = gen_reg_rtx (<vpckfloat_temp_mode>mode);
5237 rtx r2 = gen_reg_rtx (<vpckfloat_temp_mode>mode);
5238 rtx (*gen) (rtx, rtx) = gen_float<floatunssuffix><mode><vpckfloat_op_mode>2;
5239 emit_insn (gen (r1, operands[1]));
5240 emit_insn (gen (r2, operands[2]));
5241 if (<MODE>mode == V2DImode)
5242 emit_insn (gen_sse_movlhps (operands[0], r1, r2));
5243 else
5244 emit_insn (gen_avx_vec_concat<vpckfloat_concat_mode> (operands[0],
5245 r1, r2));
5246 DONE;
5247 })
5248
5249 (define_insn "float<floatunssuffix>v2div2sf2_mask"
5250 [(set (match_operand:V4SF 0 "register_operand" "=v")
5251 (vec_concat:V4SF
5252 (vec_merge:V2SF
5253 (any_float:V2SF (match_operand:V2DI 1 "nonimmediate_operand" "vm"))
5254 (vec_select:V2SF
5255 (match_operand:V4SF 2 "nonimm_or_0_operand" "0C")
5256 (parallel [(const_int 0) (const_int 1)]))
5257 (match_operand:QI 3 "register_operand" "Yk"))
5258 (const_vector:V2SF [(const_int 0) (const_int 0)])))]
5259 "TARGET_AVX512DQ && TARGET_AVX512VL"
5260 "vcvt<floatsuffix>qq2ps{x}\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}"
5261 [(set_attr "type" "ssecvt")
5262 (set_attr "prefix" "evex")
5263 (set_attr "mode" "V4SF")])
5264
5265 (define_insn "*float<floatunssuffix>v2div2sf2_mask_1"
5266 [(set (match_operand:V4SF 0 "register_operand" "=v")
5267 (vec_concat:V4SF
5268 (vec_merge:V2SF
5269 (any_float:V2SF (match_operand:V2DI 1
5270 "nonimmediate_operand" "vm"))
5271 (const_vector:V2SF [(const_int 0) (const_int 0)])
5272 (match_operand:QI 2 "register_operand" "Yk"))
5273 (const_vector:V2SF [(const_int 0) (const_int 0)])))]
5274 "TARGET_AVX512DQ && TARGET_AVX512VL"
5275 "vcvt<floatsuffix>qq2ps{x}\t{%1, %0%{%2%}%{z%}|%0%{%2%}%{z%}, %1}"
5276 [(set_attr "type" "ssecvt")
5277 (set_attr "prefix" "evex")
5278 (set_attr "mode" "V4SF")])
5279
5280 (define_insn "ufloat<si2dfmodelower><mode>2<mask_name>"
5281 [(set (match_operand:VF2_512_256VL 0 "register_operand" "=v")
5282 (unsigned_float:VF2_512_256VL
5283 (match_operand:<si2dfmode> 1 "nonimmediate_operand" "vm")))]
5284 "TARGET_AVX512F"
5285 "vcvtudq2pd\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
5286 [(set_attr "type" "ssecvt")
5287 (set_attr "prefix" "evex")
5288 (set_attr "mode" "<MODE>")])
5289
5290 (define_insn "ufloatv2siv2df2<mask_name>"
5291 [(set (match_operand:V2DF 0 "register_operand" "=v")
5292 (unsigned_float:V2DF
5293 (vec_select:V2SI
5294 (match_operand:V4SI 1 "nonimmediate_operand" "vm")
5295 (parallel [(const_int 0) (const_int 1)]))))]
5296 "TARGET_AVX512VL"
5297 "vcvtudq2pd\t{%1, %0<mask_operand2>|%0<mask_operand2>, %q1}"
5298 [(set_attr "type" "ssecvt")
5299 (set_attr "prefix" "evex")
5300 (set_attr "mode" "V2DF")])
5301
5302 (define_insn "avx512f_cvtdq2pd512_2"
5303 [(set (match_operand:V8DF 0 "register_operand" "=v")
5304 (float:V8DF
5305 (vec_select:V8SI
5306 (match_operand:V16SI 1 "nonimmediate_operand" "vm")
5307 (parallel [(const_int 0) (const_int 1)
5308 (const_int 2) (const_int 3)
5309 (const_int 4) (const_int 5)
5310 (const_int 6) (const_int 7)]))))]
5311 "TARGET_AVX512F"
5312 "vcvtdq2pd\t{%t1, %0|%0, %t1}"
5313 [(set_attr "type" "ssecvt")
5314 (set_attr "prefix" "evex")
5315 (set_attr "mode" "V8DF")])
5316
5317 (define_insn "avx_cvtdq2pd256_2"
5318 [(set (match_operand:V4DF 0 "register_operand" "=v")
5319 (float:V4DF
5320 (vec_select:V4SI
5321 (match_operand:V8SI 1 "nonimmediate_operand" "vm")
5322 (parallel [(const_int 0) (const_int 1)
5323 (const_int 2) (const_int 3)]))))]
5324 "TARGET_AVX"
5325 "vcvtdq2pd\t{%x1, %0|%0, %x1}"
5326 [(set_attr "type" "ssecvt")
5327 (set_attr "prefix" "maybe_evex")
5328 (set_attr "mode" "V4DF")])
5329
5330 (define_insn "sse2_cvtdq2pd<mask_name>"
5331 [(set (match_operand:V2DF 0 "register_operand" "=v")
5332 (float:V2DF
5333 (vec_select:V2SI
5334 (match_operand:V4SI 1 "nonimmediate_operand" "vm")
5335 (parallel [(const_int 0) (const_int 1)]))))]
5336 "TARGET_SSE2 && <mask_avx512vl_condition>"
5337 "%vcvtdq2pd\t{%1, %0<mask_operand2>|%0<mask_operand2>, %q1}"
5338 [(set_attr "type" "ssecvt")
5339 (set_attr "prefix" "maybe_vex")
5340 (set_attr "mode" "V2DF")])
5341
5342 (define_insn "avx512f_cvtpd2dq512<mask_name><round_name>"
5343 [(set (match_operand:V8SI 0 "register_operand" "=v")
5344 (unspec:V8SI
5345 [(match_operand:V8DF 1 "<round_nimm_predicate>" "<round_constraint>")]
5346 UNSPEC_FIX_NOTRUNC))]
5347 "TARGET_AVX512F"
5348 "vcvtpd2dq\t{<round_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_mask_op2>}"
5349 [(set_attr "type" "ssecvt")
5350 (set_attr "prefix" "evex")
5351 (set_attr "mode" "OI")])
5352
5353 (define_insn "avx_cvtpd2dq256<mask_name>"
5354 [(set (match_operand:V4SI 0 "register_operand" "=v")
5355 (unspec:V4SI [(match_operand:V4DF 1 "nonimmediate_operand" "vm")]
5356 UNSPEC_FIX_NOTRUNC))]
5357 "TARGET_AVX && <mask_avx512vl_condition>"
5358 "vcvtpd2dq{y}\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
5359 [(set_attr "type" "ssecvt")
5360 (set_attr "prefix" "<mask_prefix>")
5361 (set_attr "mode" "OI")])
5362
5363 (define_expand "avx_cvtpd2dq256_2"
5364 [(set (match_operand:V8SI 0 "register_operand")
5365 (vec_concat:V8SI
5366 (unspec:V4SI [(match_operand:V4DF 1 "nonimmediate_operand")]
5367 UNSPEC_FIX_NOTRUNC)
5368 (match_dup 2)))]
5369 "TARGET_AVX"
5370 "operands[2] = CONST0_RTX (V4SImode);")
5371
5372 (define_insn "*avx_cvtpd2dq256_2"
5373 [(set (match_operand:V8SI 0 "register_operand" "=v")
5374 (vec_concat:V8SI
5375 (unspec:V4SI [(match_operand:V4DF 1 "nonimmediate_operand" "vm")]
5376 UNSPEC_FIX_NOTRUNC)
5377 (match_operand:V4SI 2 "const0_operand")))]
5378 "TARGET_AVX"
5379 "vcvtpd2dq{y}\t{%1, %x0|%x0, %1}"
5380 [(set_attr "type" "ssecvt")
5381 (set_attr "prefix" "vex")
5382 (set_attr "btver2_decode" "vector")
5383 (set_attr "mode" "OI")])
5384
5385 (define_insn "sse2_cvtpd2dq<mask_name>"
5386 [(set (match_operand:V4SI 0 "register_operand" "=v")
5387 (vec_concat:V4SI
5388 (unspec:V2SI [(match_operand:V2DF 1 "vector_operand" "vBm")]
5389 UNSPEC_FIX_NOTRUNC)
5390 (const_vector:V2SI [(const_int 0) (const_int 0)])))]
5391 "TARGET_SSE2 && <mask_avx512vl_condition>"
5392 {
5393 if (TARGET_AVX)
5394 return "vcvtpd2dq{x}\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}";
5395 else
5396 return "cvtpd2dq\t{%1, %0|%0, %1}";
5397 }
5398 [(set_attr "type" "ssecvt")
5399 (set_attr "prefix_rep" "1")
5400 (set_attr "prefix_data16" "0")
5401 (set_attr "prefix" "maybe_vex")
5402 (set_attr "mode" "TI")
5403 (set_attr "amdfam10_decode" "double")
5404 (set_attr "athlon_decode" "vector")
5405 (set_attr "bdver1_decode" "double")])
5406
5407 ;; For ufix_notrunc* insn patterns
5408 (define_mode_attr pd2udqsuff
5409 [(V8DF "") (V4DF "{y}")])
5410
5411 (define_insn "ufix_notrunc<mode><si2dfmodelower>2<mask_name><round_name>"
5412 [(set (match_operand:<si2dfmode> 0 "register_operand" "=v")
5413 (unspec:<si2dfmode>
5414 [(match_operand:VF2_512_256VL 1 "nonimmediate_operand" "<round_constraint>")]
5415 UNSPEC_UNSIGNED_FIX_NOTRUNC))]
5416 "TARGET_AVX512F"
5417 "vcvtpd2udq<pd2udqsuff>\t{<round_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_mask_op2>}"
5418 [(set_attr "type" "ssecvt")
5419 (set_attr "prefix" "evex")
5420 (set_attr "mode" "<sseinsnmode>")])
5421
5422 (define_insn "ufix_notruncv2dfv2si2<mask_name>"
5423 [(set (match_operand:V4SI 0 "register_operand" "=v")
5424 (vec_concat:V4SI
5425 (unspec:V2SI
5426 [(match_operand:V2DF 1 "nonimmediate_operand" "vm")]
5427 UNSPEC_UNSIGNED_FIX_NOTRUNC)
5428 (const_vector:V2SI [(const_int 0) (const_int 0)])))]
5429 "TARGET_AVX512VL"
5430 "vcvtpd2udq{x}\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
5431 [(set_attr "type" "ssecvt")
5432 (set_attr "prefix" "evex")
5433 (set_attr "mode" "TI")])
5434
5435 (define_insn "fix<fixunssuffix>_truncv8dfv8si2<mask_name><round_saeonly_name>"
5436 [(set (match_operand:V8SI 0 "register_operand" "=v")
5437 (any_fix:V8SI
5438 (match_operand:V8DF 1 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")))]
5439 "TARGET_AVX512F"
5440 "vcvttpd2<fixsuffix>dq\t{<round_saeonly_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_saeonly_mask_op2>}"
5441 [(set_attr "type" "ssecvt")
5442 (set_attr "prefix" "evex")
5443 (set_attr "mode" "OI")])
5444
5445 (define_insn "ufix_truncv2dfv2si2<mask_name>"
5446 [(set (match_operand:V4SI 0 "register_operand" "=v")
5447 (vec_concat:V4SI
5448 (unsigned_fix:V2SI (match_operand:V2DF 1 "nonimmediate_operand" "vm"))
5449 (const_vector:V2SI [(const_int 0) (const_int 0)])))]
5450 "TARGET_AVX512VL"
5451 "vcvttpd2udq{x}\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
5452 [(set_attr "type" "ssecvt")
5453 (set_attr "prefix" "evex")
5454 (set_attr "mode" "TI")])
5455
5456 (define_insn "fix_truncv4dfv4si2<mask_name>"
5457 [(set (match_operand:V4SI 0 "register_operand" "=v")
5458 (fix:V4SI (match_operand:V4DF 1 "nonimmediate_operand" "vm")))]
5459 "TARGET_AVX || (TARGET_AVX512VL && TARGET_AVX512F)"
5460 "vcvttpd2dq{y}\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
5461 [(set_attr "type" "ssecvt")
5462 (set_attr "prefix" "maybe_evex")
5463 (set_attr "mode" "OI")])
5464
5465 (define_insn "ufix_truncv4dfv4si2<mask_name>"
5466 [(set (match_operand:V4SI 0 "register_operand" "=v")
5467 (unsigned_fix:V4SI (match_operand:V4DF 1 "nonimmediate_operand" "vm")))]
5468 "TARGET_AVX512VL && TARGET_AVX512F"
5469 "vcvttpd2udq{y}\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
5470 [(set_attr "type" "ssecvt")
5471 (set_attr "prefix" "maybe_evex")
5472 (set_attr "mode" "OI")])
5473
5474 (define_insn "fix<fixunssuffix>_trunc<mode><sseintvecmodelower>2<mask_name><round_saeonly_name>"
5475 [(set (match_operand:<sseintvecmode> 0 "register_operand" "=v")
5476 (any_fix:<sseintvecmode>
5477 (match_operand:VF2_AVX512VL 1 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")))]
5478 "TARGET_AVX512DQ && <round_saeonly_mode512bit_condition>"
5479 "vcvttpd2<fixsuffix>qq\t{<round_saeonly_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_saeonly_mask_op2>}"
5480 [(set_attr "type" "ssecvt")
5481 (set_attr "prefix" "evex")
5482 (set_attr "mode" "<sseintvecmode2>")])
5483
5484 (define_insn "fix_notrunc<mode><sseintvecmodelower>2<mask_name><round_name>"
5485 [(set (match_operand:<sseintvecmode> 0 "register_operand" "=v")
5486 (unspec:<sseintvecmode>
5487 [(match_operand:VF2_AVX512VL 1 "<round_nimm_predicate>" "<round_constraint>")]
5488 UNSPEC_FIX_NOTRUNC))]
5489 "TARGET_AVX512DQ && <round_mode512bit_condition>"
5490 "vcvtpd2qq\t{<round_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_mask_op2>}"
5491 [(set_attr "type" "ssecvt")
5492 (set_attr "prefix" "evex")
5493 (set_attr "mode" "<sseintvecmode2>")])
5494
5495 (define_insn "ufix_notrunc<mode><sseintvecmodelower>2<mask_name><round_name>"
5496 [(set (match_operand:<sseintvecmode> 0 "register_operand" "=v")
5497 (unspec:<sseintvecmode>
5498 [(match_operand:VF2_AVX512VL 1 "nonimmediate_operand" "<round_constraint>")]
5499 UNSPEC_UNSIGNED_FIX_NOTRUNC))]
5500 "TARGET_AVX512DQ && <round_mode512bit_condition>"
5501 "vcvtpd2uqq\t{<round_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_mask_op2>}"
5502 [(set_attr "type" "ssecvt")
5503 (set_attr "prefix" "evex")
5504 (set_attr "mode" "<sseintvecmode2>")])
5505
5506 (define_insn "fix<fixunssuffix>_trunc<mode><sselongvecmodelower>2<mask_name><round_saeonly_name>"
5507 [(set (match_operand:<sselongvecmode> 0 "register_operand" "=v")
5508 (any_fix:<sselongvecmode>
5509 (match_operand:VF1_128_256VL 1 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")))]
5510 "TARGET_AVX512DQ && <round_saeonly_modev8sf_condition>"
5511 "vcvttps2<fixsuffix>qq\t{<round_saeonly_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_saeonly_mask_op2>}"
5512 [(set_attr "type" "ssecvt")
5513 (set_attr "prefix" "evex")
5514 (set_attr "mode" "<sseintvecmode3>")])
5515
5516 (define_insn "fix<fixunssuffix>_truncv2sfv2di2<mask_name>"
5517 [(set (match_operand:V2DI 0 "register_operand" "=v")
5518 (any_fix:V2DI
5519 (vec_select:V2SF
5520 (match_operand:V4SF 1 "nonimmediate_operand" "vm")
5521 (parallel [(const_int 0) (const_int 1)]))))]
5522 "TARGET_AVX512DQ && TARGET_AVX512VL"
5523 "vcvttps2<fixsuffix>qq\t{%1, %0<mask_operand2>|%0<mask_operand2>, %q1}"
5524 [(set_attr "type" "ssecvt")
5525 (set_attr "prefix" "evex")
5526 (set_attr "mode" "TI")])
5527
5528 (define_mode_attr vunpckfixt_mode
5529 [(V16SF "V8DI") (V8SF "V4DI") (V4SF "V2DI")])
5530 (define_mode_attr vunpckfixt_model
5531 [(V16SF "v8di") (V8SF "v4di") (V4SF "v2di")])
5532 (define_mode_attr vunpckfixt_extract_mode
5533 [(V16SF "v16sf") (V8SF "v8sf") (V4SF "v8sf")])
5534
5535 (define_expand "vec_unpack_<fixprefix>fix_trunc_lo_<mode>"
5536 [(match_operand:<vunpckfixt_mode> 0 "register_operand")
5537 (any_fix:<vunpckfixt_mode>
5538 (match_operand:VF1_AVX512VL 1 "register_operand"))]
5539 "TARGET_AVX512DQ"
5540 {
5541 rtx tem = operands[1];
5542 if (<MODE>mode != V4SFmode)
5543 {
5544 tem = gen_reg_rtx (<ssehalfvecmode>mode);
5545 emit_insn (gen_vec_extract_lo_<vunpckfixt_extract_mode> (tem,
5546 operands[1]));
5547 }
5548 rtx (*gen) (rtx, rtx)
5549 = gen_fix<fixunssuffix>_trunc<ssehalfvecmodelower><vunpckfixt_model>2;
5550 emit_insn (gen (operands[0], tem));
5551 DONE;
5552 })
5553
5554 (define_expand "vec_unpack_<fixprefix>fix_trunc_hi_<mode>"
5555 [(match_operand:<vunpckfixt_mode> 0 "register_operand")
5556 (any_fix:<vunpckfixt_mode>
5557 (match_operand:VF1_AVX512VL 1 "register_operand"))]
5558 "TARGET_AVX512DQ"
5559 {
5560 rtx tem;
5561 if (<MODE>mode != V4SFmode)
5562 {
5563 tem = gen_reg_rtx (<ssehalfvecmode>mode);
5564 emit_insn (gen_vec_extract_hi_<vunpckfixt_extract_mode> (tem,
5565 operands[1]));
5566 }
5567 else
5568 {
5569 tem = gen_reg_rtx (V4SFmode);
5570 emit_insn (gen_avx_vpermilv4sf (tem, operands[1], GEN_INT (0x4e)));
5571 }
5572 rtx (*gen) (rtx, rtx)
5573 = gen_fix<fixunssuffix>_trunc<ssehalfvecmodelower><vunpckfixt_model>2;
5574 emit_insn (gen (operands[0], tem));
5575 DONE;
5576 })
5577
5578 (define_insn "ufix_trunc<mode><sseintvecmodelower>2<mask_name>"
5579 [(set (match_operand:<sseintvecmode> 0 "register_operand" "=v")
5580 (unsigned_fix:<sseintvecmode>
5581 (match_operand:VF1_128_256VL 1 "nonimmediate_operand" "vm")))]
5582 "TARGET_AVX512VL"
5583 "vcvttps2udq\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
5584 [(set_attr "type" "ssecvt")
5585 (set_attr "prefix" "evex")
5586 (set_attr "mode" "<sseintvecmode2>")])
5587
5588 (define_expand "avx_cvttpd2dq256_2"
5589 [(set (match_operand:V8SI 0 "register_operand")
5590 (vec_concat:V8SI
5591 (fix:V4SI (match_operand:V4DF 1 "nonimmediate_operand"))
5592 (match_dup 2)))]
5593 "TARGET_AVX"
5594 "operands[2] = CONST0_RTX (V4SImode);")
5595
5596 (define_insn "sse2_cvttpd2dq<mask_name>"
5597 [(set (match_operand:V4SI 0 "register_operand" "=v")
5598 (vec_concat:V4SI
5599 (fix:V2SI (match_operand:V2DF 1 "vector_operand" "vBm"))
5600 (const_vector:V2SI [(const_int 0) (const_int 0)])))]
5601 "TARGET_SSE2 && <mask_avx512vl_condition>"
5602 {
5603 if (TARGET_AVX)
5604 return "vcvttpd2dq{x}\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}";
5605 else
5606 return "cvttpd2dq\t{%1, %0|%0, %1}";
5607 }
5608 [(set_attr "type" "ssecvt")
5609 (set_attr "amdfam10_decode" "double")
5610 (set_attr "athlon_decode" "vector")
5611 (set_attr "bdver1_decode" "double")
5612 (set_attr "prefix" "maybe_vex")
5613 (set_attr "mode" "TI")])
5614
5615 (define_insn "sse2_cvtsd2ss<round_name>"
5616 [(set (match_operand:V4SF 0 "register_operand" "=x,x,v")
5617 (vec_merge:V4SF
5618 (vec_duplicate:V4SF
5619 (float_truncate:V2SF
5620 (match_operand:V2DF 2 "nonimmediate_operand" "x,m,<round_constraint>")))
5621 (match_operand:V4SF 1 "register_operand" "0,0,v")
5622 (const_int 1)))]
5623 "TARGET_SSE2"
5624 "@
5625 cvtsd2ss\t{%2, %0|%0, %2}
5626 cvtsd2ss\t{%2, %0|%0, %q2}
5627 vcvtsd2ss\t{<round_op3>%2, %1, %0|%0, %1, %q2<round_op3>}"
5628 [(set_attr "isa" "noavx,noavx,avx")
5629 (set_attr "type" "ssecvt")
5630 (set_attr "athlon_decode" "vector,double,*")
5631 (set_attr "amdfam10_decode" "vector,double,*")
5632 (set_attr "bdver1_decode" "direct,direct,*")
5633 (set_attr "btver2_decode" "double,double,double")
5634 (set_attr "prefix" "orig,orig,<round_prefix>")
5635 (set_attr "mode" "SF")])
5636
5637 (define_insn "*sse2_vd_cvtsd2ss"
5638 [(set (match_operand:V4SF 0 "register_operand" "=x,x,v")
5639 (vec_merge:V4SF
5640 (vec_duplicate:V4SF
5641 (float_truncate:SF (match_operand:DF 2 "nonimmediate_operand" "x,m,vm")))
5642 (match_operand:V4SF 1 "register_operand" "0,0,v")
5643 (const_int 1)))]
5644 "TARGET_SSE2"
5645 "@
5646 cvtsd2ss\t{%2, %0|%0, %2}
5647 cvtsd2ss\t{%2, %0|%0, %2}
5648 vcvtsd2ss\t{%2, %1, %0|%0, %1, %2}"
5649 [(set_attr "isa" "noavx,noavx,avx")
5650 (set_attr "type" "ssecvt")
5651 (set_attr "athlon_decode" "vector,double,*")
5652 (set_attr "amdfam10_decode" "vector,double,*")
5653 (set_attr "bdver1_decode" "direct,direct,*")
5654 (set_attr "btver2_decode" "double,double,double")
5655 (set_attr "prefix" "orig,orig,vex")
5656 (set_attr "mode" "SF")])
5657
5658 (define_insn "sse2_cvtss2sd<round_saeonly_name>"
5659 [(set (match_operand:V2DF 0 "register_operand" "=x,x,v")
5660 (vec_merge:V2DF
5661 (float_extend:V2DF
5662 (vec_select:V2SF
5663 (match_operand:V4SF 2 "<round_saeonly_nimm_scalar_predicate>" "x,m,<round_saeonly_constraint>")
5664 (parallel [(const_int 0) (const_int 1)])))
5665 (match_operand:V2DF 1 "register_operand" "0,0,v")
5666 (const_int 1)))]
5667 "TARGET_SSE2"
5668 "@
5669 cvtss2sd\t{%2, %0|%0, %2}
5670 cvtss2sd\t{%2, %0|%0, %k2}
5671 vcvtss2sd\t{<round_saeonly_op3>%2, %1, %0|%0, %1, %k2<round_saeonly_op3>}"
5672 [(set_attr "isa" "noavx,noavx,avx")
5673 (set_attr "type" "ssecvt")
5674 (set_attr "amdfam10_decode" "vector,double,*")
5675 (set_attr "athlon_decode" "direct,direct,*")
5676 (set_attr "bdver1_decode" "direct,direct,*")
5677 (set_attr "btver2_decode" "double,double,double")
5678 (set_attr "prefix" "orig,orig,<round_saeonly_prefix>")
5679 (set_attr "mode" "DF")])
5680
5681 (define_insn "*sse2_vd_cvtss2sd"
5682 [(set (match_operand:V2DF 0 "register_operand" "=x,x,v")
5683 (vec_merge:V2DF
5684 (vec_duplicate:V2DF
5685 (float_extend:DF (match_operand:SF 2 "nonimmediate_operand" "x,m,vm")))
5686 (match_operand:V2DF 1 "register_operand" "0,0,v")
5687 (const_int 1)))]
5688 "TARGET_SSE2"
5689 "@
5690 cvtss2sd\t{%2, %0|%0, %2}
5691 cvtss2sd\t{%2, %0|%0, %2}
5692 vcvtss2sd\t{%2, %1, %0|%0, %1, %2}"
5693 [(set_attr "isa" "noavx,noavx,avx")
5694 (set_attr "type" "ssecvt")
5695 (set_attr "amdfam10_decode" "vector,double,*")
5696 (set_attr "athlon_decode" "direct,direct,*")
5697 (set_attr "bdver1_decode" "direct,direct,*")
5698 (set_attr "btver2_decode" "double,double,double")
5699 (set_attr "prefix" "orig,orig,vex")
5700 (set_attr "mode" "DF")])
5701
5702 (define_insn "<mask_codefor>avx512f_cvtpd2ps512<mask_name><round_name>"
5703 [(set (match_operand:V8SF 0 "register_operand" "=v")
5704 (float_truncate:V8SF
5705 (match_operand:V8DF 1 "<round_nimm_predicate>" "<round_constraint>")))]
5706 "TARGET_AVX512F"
5707 "vcvtpd2ps\t{<round_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_mask_op2>}"
5708 [(set_attr "type" "ssecvt")
5709 (set_attr "prefix" "evex")
5710 (set_attr "mode" "V8SF")])
5711
5712 (define_insn "avx_cvtpd2ps256<mask_name>"
5713 [(set (match_operand:V4SF 0 "register_operand" "=v")
5714 (float_truncate:V4SF
5715 (match_operand:V4DF 1 "nonimmediate_operand" "vm")))]
5716 "TARGET_AVX && <mask_avx512vl_condition>"
5717 "vcvtpd2ps{y}\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
5718 [(set_attr "type" "ssecvt")
5719 (set_attr "prefix" "maybe_evex")
5720 (set_attr "btver2_decode" "vector")
5721 (set_attr "mode" "V4SF")])
5722
5723 (define_expand "sse2_cvtpd2ps"
5724 [(set (match_operand:V4SF 0 "register_operand")
5725 (vec_concat:V4SF
5726 (float_truncate:V2SF
5727 (match_operand:V2DF 1 "vector_operand"))
5728 (match_dup 2)))]
5729 "TARGET_SSE2"
5730 "operands[2] = CONST0_RTX (V2SFmode);")
5731
5732 (define_expand "sse2_cvtpd2ps_mask"
5733 [(set (match_operand:V4SF 0 "register_operand")
5734 (vec_merge:V4SF
5735 (vec_concat:V4SF
5736 (float_truncate:V2SF
5737 (match_operand:V2DF 1 "vector_operand"))
5738 (match_dup 4))
5739 (match_operand:V4SF 2 "register_operand")
5740 (match_operand:QI 3 "register_operand")))]
5741 "TARGET_SSE2"
5742 "operands[4] = CONST0_RTX (V2SFmode);")
5743
5744 (define_insn "*sse2_cvtpd2ps<mask_name>"
5745 [(set (match_operand:V4SF 0 "register_operand" "=v")
5746 (vec_concat:V4SF
5747 (float_truncate:V2SF
5748 (match_operand:V2DF 1 "vector_operand" "vBm"))
5749 (match_operand:V2SF 2 "const0_operand")))]
5750 "TARGET_SSE2 && <mask_avx512vl_condition>"
5751 {
5752 if (TARGET_AVX)
5753 return "vcvtpd2ps{x}\t{%1, %0<mask_operand3>|%0<mask_operand3>, %1}";
5754 else
5755 return "cvtpd2ps\t{%1, %0|%0, %1}";
5756 }
5757 [(set_attr "type" "ssecvt")
5758 (set_attr "amdfam10_decode" "double")
5759 (set_attr "athlon_decode" "vector")
5760 (set_attr "bdver1_decode" "double")
5761 (set_attr "prefix_data16" "1")
5762 (set_attr "prefix" "maybe_vex")
5763 (set_attr "mode" "V4SF")])
5764
5765 ;; For <sse2_avx_avx512f>_cvtps2pd<avxsizesuffix> insn pattern
5766 (define_mode_attr sf2dfmode
5767 [(V8DF "V8SF") (V4DF "V4SF")])
5768
5769 (define_insn "<sse2_avx_avx512f>_cvtps2pd<avxsizesuffix><mask_name><round_saeonly_name>"
5770 [(set (match_operand:VF2_512_256 0 "register_operand" "=v")
5771 (float_extend:VF2_512_256
5772 (match_operand:<sf2dfmode> 1 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")))]
5773 "TARGET_AVX && <mask_mode512bit_condition> && <round_saeonly_mode512bit_condition>"
5774 "vcvtps2pd\t{<round_saeonly_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_saeonly_mask_op2>}"
5775 [(set_attr "type" "ssecvt")
5776 (set_attr "prefix" "maybe_vex")
5777 (set_attr "mode" "<MODE>")])
5778
5779 (define_insn "*avx_cvtps2pd256_2"
5780 [(set (match_operand:V4DF 0 "register_operand" "=v")
5781 (float_extend:V4DF
5782 (vec_select:V4SF
5783 (match_operand:V8SF 1 "nonimmediate_operand" "vm")
5784 (parallel [(const_int 0) (const_int 1)
5785 (const_int 2) (const_int 3)]))))]
5786 "TARGET_AVX"
5787 "vcvtps2pd\t{%x1, %0|%0, %x1}"
5788 [(set_attr "type" "ssecvt")
5789 (set_attr "prefix" "vex")
5790 (set_attr "mode" "V4DF")])
5791
5792 (define_insn "vec_unpacks_lo_v16sf"
5793 [(set (match_operand:V8DF 0 "register_operand" "=v")
5794 (float_extend:V8DF
5795 (vec_select:V8SF
5796 (match_operand:V16SF 1 "nonimmediate_operand" "vm")
5797 (parallel [(const_int 0) (const_int 1)
5798 (const_int 2) (const_int 3)
5799 (const_int 4) (const_int 5)
5800 (const_int 6) (const_int 7)]))))]
5801 "TARGET_AVX512F"
5802 "vcvtps2pd\t{%t1, %0|%0, %t1}"
5803 [(set_attr "type" "ssecvt")
5804 (set_attr "prefix" "evex")
5805 (set_attr "mode" "V8DF")])
5806
5807 (define_insn "<avx512>_cvt<ssemodesuffix>2mask<mode>"
5808 [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=Yk")
5809 (unspec:<avx512fmaskmode>
5810 [(match_operand:VI12_AVX512VL 1 "register_operand" "v")]
5811 UNSPEC_CVTINT2MASK))]
5812 "TARGET_AVX512BW"
5813 "vpmov<ssemodesuffix>2m\t{%1, %0|%0, %1}"
5814 [(set_attr "prefix" "evex")
5815 (set_attr "mode" "<sseinsnmode>")])
5816
5817 (define_insn "<avx512>_cvt<ssemodesuffix>2mask<mode>"
5818 [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=Yk")
5819 (unspec:<avx512fmaskmode>
5820 [(match_operand:VI48_AVX512VL 1 "register_operand" "v")]
5821 UNSPEC_CVTINT2MASK))]
5822 "TARGET_AVX512DQ"
5823 "vpmov<ssemodesuffix>2m\t{%1, %0|%0, %1}"
5824 [(set_attr "prefix" "evex")
5825 (set_attr "mode" "<sseinsnmode>")])
5826
5827 (define_expand "<avx512>_cvtmask2<ssemodesuffix><mode>"
5828 [(set (match_operand:VI12_AVX512VL 0 "register_operand")
5829 (vec_merge:VI12_AVX512VL
5830 (match_dup 2)
5831 (match_dup 3)
5832 (match_operand:<avx512fmaskmode> 1 "register_operand")))]
5833 "TARGET_AVX512BW"
5834 {
5835 operands[2] = CONSTM1_RTX (<MODE>mode);
5836 operands[3] = CONST0_RTX (<MODE>mode);
5837 })
5838
5839 (define_insn "*<avx512>_cvtmask2<ssemodesuffix><mode>"
5840 [(set (match_operand:VI12_AVX512VL 0 "register_operand" "=v")
5841 (vec_merge:VI12_AVX512VL
5842 (match_operand:VI12_AVX512VL 2 "vector_all_ones_operand")
5843 (match_operand:VI12_AVX512VL 3 "const0_operand")
5844 (match_operand:<avx512fmaskmode> 1 "register_operand" "Yk")))]
5845 "TARGET_AVX512BW"
5846 "vpmovm2<ssemodesuffix>\t{%1, %0|%0, %1}"
5847 [(set_attr "prefix" "evex")
5848 (set_attr "mode" "<sseinsnmode>")])
5849
5850 (define_expand "<avx512>_cvtmask2<ssemodesuffix><mode>"
5851 [(set (match_operand:VI48_AVX512VL 0 "register_operand")
5852 (vec_merge:VI48_AVX512VL
5853 (match_dup 2)
5854 (match_dup 3)
5855 (match_operand:<avx512fmaskmode> 1 "register_operand")))]
5856 "TARGET_AVX512DQ"
5857 "{
5858 operands[2] = CONSTM1_RTX (<MODE>mode);
5859 operands[3] = CONST0_RTX (<MODE>mode);
5860 }")
5861
5862 (define_insn "*<avx512>_cvtmask2<ssemodesuffix><mode>"
5863 [(set (match_operand:VI48_AVX512VL 0 "register_operand" "=v")
5864 (vec_merge:VI48_AVX512VL
5865 (match_operand:VI48_AVX512VL 2 "vector_all_ones_operand")
5866 (match_operand:VI48_AVX512VL 3 "const0_operand")
5867 (match_operand:<avx512fmaskmode> 1 "register_operand" "Yk")))]
5868 "TARGET_AVX512DQ"
5869 "vpmovm2<ssemodesuffix>\t{%1, %0|%0, %1}"
5870 [(set_attr "prefix" "evex")
5871 (set_attr "mode" "<sseinsnmode>")])
5872
5873 (define_insn "sse2_cvtps2pd<mask_name>"
5874 [(set (match_operand:V2DF 0 "register_operand" "=v")
5875 (float_extend:V2DF
5876 (vec_select:V2SF
5877 (match_operand:V4SF 1 "vector_operand" "vm")
5878 (parallel [(const_int 0) (const_int 1)]))))]
5879 "TARGET_SSE2 && <mask_avx512vl_condition>"
5880 "%vcvtps2pd\t{%1, %0<mask_operand2>|%0<mask_operand2>, %q1}"
5881 [(set_attr "type" "ssecvt")
5882 (set_attr "amdfam10_decode" "direct")
5883 (set_attr "athlon_decode" "double")
5884 (set_attr "bdver1_decode" "double")
5885 (set_attr "prefix_data16" "0")
5886 (set_attr "prefix" "maybe_vex")
5887 (set_attr "mode" "V2DF")])
5888
5889 (define_expand "vec_unpacks_hi_v4sf"
5890 [(set (match_dup 2)
5891 (vec_select:V4SF
5892 (vec_concat:V8SF
5893 (match_dup 2)
5894 (match_operand:V4SF 1 "vector_operand"))
5895 (parallel [(const_int 6) (const_int 7)
5896 (const_int 2) (const_int 3)])))
5897 (set (match_operand:V2DF 0 "register_operand")
5898 (float_extend:V2DF
5899 (vec_select:V2SF
5900 (match_dup 2)
5901 (parallel [(const_int 0) (const_int 1)]))))]
5902 "TARGET_SSE2"
5903 "operands[2] = gen_reg_rtx (V4SFmode);")
5904
5905 (define_expand "vec_unpacks_hi_v8sf"
5906 [(set (match_dup 2)
5907 (vec_select:V4SF
5908 (match_operand:V8SF 1 "register_operand")
5909 (parallel [(const_int 4) (const_int 5)
5910 (const_int 6) (const_int 7)])))
5911 (set (match_operand:V4DF 0 "register_operand")
5912 (float_extend:V4DF
5913 (match_dup 2)))]
5914 "TARGET_AVX"
5915 "operands[2] = gen_reg_rtx (V4SFmode);")
5916
5917 (define_expand "vec_unpacks_hi_v16sf"
5918 [(set (match_dup 2)
5919 (vec_select:V8SF
5920 (match_operand:V16SF 1 "register_operand")
5921 (parallel [(const_int 8) (const_int 9)
5922 (const_int 10) (const_int 11)
5923 (const_int 12) (const_int 13)
5924 (const_int 14) (const_int 15)])))
5925 (set (match_operand:V8DF 0 "register_operand")
5926 (float_extend:V8DF
5927 (match_dup 2)))]
5928 "TARGET_AVX512F"
5929 "operands[2] = gen_reg_rtx (V8SFmode);")
5930
5931 (define_expand "vec_unpacks_lo_v4sf"
5932 [(set (match_operand:V2DF 0 "register_operand")
5933 (float_extend:V2DF
5934 (vec_select:V2SF
5935 (match_operand:V4SF 1 "vector_operand")
5936 (parallel [(const_int 0) (const_int 1)]))))]
5937 "TARGET_SSE2")
5938
5939 (define_expand "vec_unpacks_lo_v8sf"
5940 [(set (match_operand:V4DF 0 "register_operand")
5941 (float_extend:V4DF
5942 (vec_select:V4SF
5943 (match_operand:V8SF 1 "nonimmediate_operand")
5944 (parallel [(const_int 0) (const_int 1)
5945 (const_int 2) (const_int 3)]))))]
5946 "TARGET_AVX")
5947
5948 (define_mode_attr sseunpackfltmode
5949 [(V8HI "V4SF") (V4SI "V2DF") (V16HI "V8SF")
5950 (V8SI "V4DF") (V32HI "V16SF") (V16SI "V8DF")])
5951
5952 (define_expand "vec_unpacks_float_hi_<mode>"
5953 [(match_operand:<sseunpackfltmode> 0 "register_operand")
5954 (match_operand:VI2_AVX512F 1 "register_operand")]
5955 "TARGET_SSE2"
5956 {
5957 rtx tmp = gen_reg_rtx (<sseunpackmode>mode);
5958
5959 emit_insn (gen_vec_unpacks_hi_<mode> (tmp, operands[1]));
5960 emit_insn (gen_rtx_SET (operands[0],
5961 gen_rtx_FLOAT (<sseunpackfltmode>mode, tmp)));
5962 DONE;
5963 })
5964
5965 (define_expand "vec_unpacks_float_lo_<mode>"
5966 [(match_operand:<sseunpackfltmode> 0 "register_operand")
5967 (match_operand:VI2_AVX512F 1 "register_operand")]
5968 "TARGET_SSE2"
5969 {
5970 rtx tmp = gen_reg_rtx (<sseunpackmode>mode);
5971
5972 emit_insn (gen_vec_unpacks_lo_<mode> (tmp, operands[1]));
5973 emit_insn (gen_rtx_SET (operands[0],
5974 gen_rtx_FLOAT (<sseunpackfltmode>mode, tmp)));
5975 DONE;
5976 })
5977
5978 (define_expand "vec_unpacku_float_hi_<mode>"
5979 [(match_operand:<sseunpackfltmode> 0 "register_operand")
5980 (match_operand:VI2_AVX512F 1 "register_operand")]
5981 "TARGET_SSE2"
5982 {
5983 rtx tmp = gen_reg_rtx (<sseunpackmode>mode);
5984
5985 emit_insn (gen_vec_unpacku_hi_<mode> (tmp, operands[1]));
5986 emit_insn (gen_rtx_SET (operands[0],
5987 gen_rtx_FLOAT (<sseunpackfltmode>mode, tmp)));
5988 DONE;
5989 })
5990
5991 (define_expand "vec_unpacku_float_lo_<mode>"
5992 [(match_operand:<sseunpackfltmode> 0 "register_operand")
5993 (match_operand:VI2_AVX512F 1 "register_operand")]
5994 "TARGET_SSE2"
5995 {
5996 rtx tmp = gen_reg_rtx (<sseunpackmode>mode);
5997
5998 emit_insn (gen_vec_unpacku_lo_<mode> (tmp, operands[1]));
5999 emit_insn (gen_rtx_SET (operands[0],
6000 gen_rtx_FLOAT (<sseunpackfltmode>mode, tmp)));
6001 DONE;
6002 })
6003
6004 (define_expand "vec_unpacks_float_hi_v4si"
6005 [(set (match_dup 2)
6006 (vec_select:V4SI
6007 (match_operand:V4SI 1 "vector_operand")
6008 (parallel [(const_int 2) (const_int 3)
6009 (const_int 2) (const_int 3)])))
6010 (set (match_operand:V2DF 0 "register_operand")
6011 (float:V2DF
6012 (vec_select:V2SI
6013 (match_dup 2)
6014 (parallel [(const_int 0) (const_int 1)]))))]
6015 "TARGET_SSE2"
6016 "operands[2] = gen_reg_rtx (V4SImode);")
6017
6018 (define_expand "vec_unpacks_float_lo_v4si"
6019 [(set (match_operand:V2DF 0 "register_operand")
6020 (float:V2DF
6021 (vec_select:V2SI
6022 (match_operand:V4SI 1 "vector_operand")
6023 (parallel [(const_int 0) (const_int 1)]))))]
6024 "TARGET_SSE2")
6025
6026 (define_expand "vec_unpacks_float_hi_v8si"
6027 [(set (match_dup 2)
6028 (vec_select:V4SI
6029 (match_operand:V8SI 1 "vector_operand")
6030 (parallel [(const_int 4) (const_int 5)
6031 (const_int 6) (const_int 7)])))
6032 (set (match_operand:V4DF 0 "register_operand")
6033 (float:V4DF
6034 (match_dup 2)))]
6035 "TARGET_AVX"
6036 "operands[2] = gen_reg_rtx (V4SImode);")
6037
6038 (define_expand "vec_unpacks_float_lo_v8si"
6039 [(set (match_operand:V4DF 0 "register_operand")
6040 (float:V4DF
6041 (vec_select:V4SI
6042 (match_operand:V8SI 1 "nonimmediate_operand")
6043 (parallel [(const_int 0) (const_int 1)
6044 (const_int 2) (const_int 3)]))))]
6045 "TARGET_AVX")
6046
6047 (define_expand "vec_unpacks_float_hi_v16si"
6048 [(set (match_dup 2)
6049 (vec_select:V8SI
6050 (match_operand:V16SI 1 "nonimmediate_operand")
6051 (parallel [(const_int 8) (const_int 9)
6052 (const_int 10) (const_int 11)
6053 (const_int 12) (const_int 13)
6054 (const_int 14) (const_int 15)])))
6055 (set (match_operand:V8DF 0 "register_operand")
6056 (float:V8DF
6057 (match_dup 2)))]
6058 "TARGET_AVX512F"
6059 "operands[2] = gen_reg_rtx (V8SImode);")
6060
6061 (define_expand "vec_unpacks_float_lo_v16si"
6062 [(set (match_operand:V8DF 0 "register_operand")
6063 (float:V8DF
6064 (vec_select:V8SI
6065 (match_operand:V16SI 1 "nonimmediate_operand")
6066 (parallel [(const_int 0) (const_int 1)
6067 (const_int 2) (const_int 3)
6068 (const_int 4) (const_int 5)
6069 (const_int 6) (const_int 7)]))))]
6070 "TARGET_AVX512F")
6071
6072 (define_expand "vec_unpacku_float_hi_v4si"
6073 [(set (match_dup 5)
6074 (vec_select:V4SI
6075 (match_operand:V4SI 1 "vector_operand")
6076 (parallel [(const_int 2) (const_int 3)
6077 (const_int 2) (const_int 3)])))
6078 (set (match_dup 6)
6079 (float:V2DF
6080 (vec_select:V2SI
6081 (match_dup 5)
6082 (parallel [(const_int 0) (const_int 1)]))))
6083 (set (match_dup 7)
6084 (lt:V2DF (match_dup 6) (match_dup 3)))
6085 (set (match_dup 8)
6086 (and:V2DF (match_dup 7) (match_dup 4)))
6087 (set (match_operand:V2DF 0 "register_operand")
6088 (plus:V2DF (match_dup 6) (match_dup 8)))]
6089 "TARGET_SSE2"
6090 {
6091 REAL_VALUE_TYPE TWO32r;
6092 rtx x;
6093 int i;
6094
6095 real_ldexp (&TWO32r, &dconst1, 32);
6096 x = const_double_from_real_value (TWO32r, DFmode);
6097
6098 operands[3] = force_reg (V2DFmode, CONST0_RTX (V2DFmode));
6099 operands[4] = force_reg (V2DFmode,
6100 ix86_build_const_vector (V2DFmode, 1, x));
6101
6102 operands[5] = gen_reg_rtx (V4SImode);
6103
6104 for (i = 6; i < 9; i++)
6105 operands[i] = gen_reg_rtx (V2DFmode);
6106 })
6107
6108 (define_expand "vec_unpacku_float_lo_v4si"
6109 [(set (match_dup 5)
6110 (float:V2DF
6111 (vec_select:V2SI
6112 (match_operand:V4SI 1 "vector_operand")
6113 (parallel [(const_int 0) (const_int 1)]))))
6114 (set (match_dup 6)
6115 (lt:V2DF (match_dup 5) (match_dup 3)))
6116 (set (match_dup 7)
6117 (and:V2DF (match_dup 6) (match_dup 4)))
6118 (set (match_operand:V2DF 0 "register_operand")
6119 (plus:V2DF (match_dup 5) (match_dup 7)))]
6120 "TARGET_SSE2"
6121 {
6122 REAL_VALUE_TYPE TWO32r;
6123 rtx x;
6124 int i;
6125
6126 real_ldexp (&TWO32r, &dconst1, 32);
6127 x = const_double_from_real_value (TWO32r, DFmode);
6128
6129 operands[3] = force_reg (V2DFmode, CONST0_RTX (V2DFmode));
6130 operands[4] = force_reg (V2DFmode,
6131 ix86_build_const_vector (V2DFmode, 1, x));
6132
6133 for (i = 5; i < 8; i++)
6134 operands[i] = gen_reg_rtx (V2DFmode);
6135 })
6136
6137 (define_expand "vec_unpacku_float_hi_v8si"
6138 [(match_operand:V4DF 0 "register_operand")
6139 (match_operand:V8SI 1 "register_operand")]
6140 "TARGET_AVX"
6141 {
6142 REAL_VALUE_TYPE TWO32r;
6143 rtx x, tmp[6];
6144 int i;
6145
6146 real_ldexp (&TWO32r, &dconst1, 32);
6147 x = const_double_from_real_value (TWO32r, DFmode);
6148
6149 tmp[0] = force_reg (V4DFmode, CONST0_RTX (V4DFmode));
6150 tmp[1] = force_reg (V4DFmode, ix86_build_const_vector (V4DFmode, 1, x));
6151 tmp[5] = gen_reg_rtx (V4SImode);
6152
6153 for (i = 2; i < 5; i++)
6154 tmp[i] = gen_reg_rtx (V4DFmode);
6155 emit_insn (gen_vec_extract_hi_v8si (tmp[5], operands[1]));
6156 emit_insn (gen_floatv4siv4df2 (tmp[2], tmp[5]));
6157 emit_insn (gen_rtx_SET (tmp[3], gen_rtx_LT (V4DFmode, tmp[2], tmp[0])));
6158 emit_insn (gen_andv4df3 (tmp[4], tmp[3], tmp[1]));
6159 emit_insn (gen_addv4df3 (operands[0], tmp[2], tmp[4]));
6160 DONE;
6161 })
6162
6163 (define_expand "vec_unpacku_float_hi_v16si"
6164 [(match_operand:V8DF 0 "register_operand")
6165 (match_operand:V16SI 1 "register_operand")]
6166 "TARGET_AVX512F"
6167 {
6168 REAL_VALUE_TYPE TWO32r;
6169 rtx k, x, tmp[4];
6170
6171 real_ldexp (&TWO32r, &dconst1, 32);
6172 x = const_double_from_real_value (TWO32r, DFmode);
6173
6174 tmp[0] = force_reg (V8DFmode, CONST0_RTX (V8DFmode));
6175 tmp[1] = force_reg (V8DFmode, ix86_build_const_vector (V8DFmode, 1, x));
6176 tmp[2] = gen_reg_rtx (V8DFmode);
6177 tmp[3] = gen_reg_rtx (V8SImode);
6178 k = gen_reg_rtx (QImode);
6179
6180 emit_insn (gen_vec_extract_hi_v16si (tmp[3], operands[1]));
6181 emit_insn (gen_floatv8siv8df2 (tmp[2], tmp[3]));
6182 emit_insn (gen_rtx_SET (k, gen_rtx_LT (QImode, tmp[2], tmp[0])));
6183 emit_insn (gen_addv8df3_mask (tmp[2], tmp[2], tmp[1], tmp[2], k));
6184 emit_move_insn (operands[0], tmp[2]);
6185 DONE;
6186 })
6187
6188 (define_expand "vec_unpacku_float_lo_v8si"
6189 [(match_operand:V4DF 0 "register_operand")
6190 (match_operand:V8SI 1 "nonimmediate_operand")]
6191 "TARGET_AVX"
6192 {
6193 REAL_VALUE_TYPE TWO32r;
6194 rtx x, tmp[5];
6195 int i;
6196
6197 real_ldexp (&TWO32r, &dconst1, 32);
6198 x = const_double_from_real_value (TWO32r, DFmode);
6199
6200 tmp[0] = force_reg (V4DFmode, CONST0_RTX (V4DFmode));
6201 tmp[1] = force_reg (V4DFmode, ix86_build_const_vector (V4DFmode, 1, x));
6202
6203 for (i = 2; i < 5; i++)
6204 tmp[i] = gen_reg_rtx (V4DFmode);
6205 emit_insn (gen_avx_cvtdq2pd256_2 (tmp[2], operands[1]));
6206 emit_insn (gen_rtx_SET (tmp[3], gen_rtx_LT (V4DFmode, tmp[2], tmp[0])));
6207 emit_insn (gen_andv4df3 (tmp[4], tmp[3], tmp[1]));
6208 emit_insn (gen_addv4df3 (operands[0], tmp[2], tmp[4]));
6209 DONE;
6210 })
6211
6212 (define_expand "vec_unpacku_float_lo_v16si"
6213 [(match_operand:V8DF 0 "register_operand")
6214 (match_operand:V16SI 1 "nonimmediate_operand")]
6215 "TARGET_AVX512F"
6216 {
6217 REAL_VALUE_TYPE TWO32r;
6218 rtx k, x, tmp[3];
6219
6220 real_ldexp (&TWO32r, &dconst1, 32);
6221 x = const_double_from_real_value (TWO32r, DFmode);
6222
6223 tmp[0] = force_reg (V8DFmode, CONST0_RTX (V8DFmode));
6224 tmp[1] = force_reg (V8DFmode, ix86_build_const_vector (V8DFmode, 1, x));
6225 tmp[2] = gen_reg_rtx (V8DFmode);
6226 k = gen_reg_rtx (QImode);
6227
6228 emit_insn (gen_avx512f_cvtdq2pd512_2 (tmp[2], operands[1]));
6229 emit_insn (gen_rtx_SET (k, gen_rtx_LT (QImode, tmp[2], tmp[0])));
6230 emit_insn (gen_addv8df3_mask (tmp[2], tmp[2], tmp[1], tmp[2], k));
6231 emit_move_insn (operands[0], tmp[2]);
6232 DONE;
6233 })
6234
6235 (define_expand "vec_pack_trunc_<mode>"
6236 [(set (match_dup 3)
6237 (float_truncate:<sf2dfmode>
6238 (match_operand:VF2_512_256 1 "nonimmediate_operand")))
6239 (set (match_dup 4)
6240 (float_truncate:<sf2dfmode>
6241 (match_operand:VF2_512_256 2 "nonimmediate_operand")))
6242 (set (match_operand:<ssePSmode> 0 "register_operand")
6243 (vec_concat:<ssePSmode>
6244 (match_dup 3)
6245 (match_dup 4)))]
6246 "TARGET_AVX"
6247 {
6248 operands[3] = gen_reg_rtx (<sf2dfmode>mode);
6249 operands[4] = gen_reg_rtx (<sf2dfmode>mode);
6250 })
6251
6252 (define_expand "vec_pack_trunc_v2df"
6253 [(match_operand:V4SF 0 "register_operand")
6254 (match_operand:V2DF 1 "vector_operand")
6255 (match_operand:V2DF 2 "vector_operand")]
6256 "TARGET_SSE2"
6257 {
6258 rtx tmp0, tmp1;
6259
6260 if (TARGET_AVX && !TARGET_PREFER_AVX128 && optimize_insn_for_speed_p ())
6261 {
6262 tmp0 = gen_reg_rtx (V4DFmode);
6263 tmp1 = force_reg (V2DFmode, operands[1]);
6264
6265 emit_insn (gen_avx_vec_concatv4df (tmp0, tmp1, operands[2]));
6266 emit_insn (gen_avx_cvtpd2ps256 (operands[0], tmp0));
6267 }
6268 else
6269 {
6270 tmp0 = gen_reg_rtx (V4SFmode);
6271 tmp1 = gen_reg_rtx (V4SFmode);
6272
6273 emit_insn (gen_sse2_cvtpd2ps (tmp0, operands[1]));
6274 emit_insn (gen_sse2_cvtpd2ps (tmp1, operands[2]));
6275 emit_insn (gen_sse_movlhps (operands[0], tmp0, tmp1));
6276 }
6277 DONE;
6278 })
6279
6280 (define_expand "vec_pack_sfix_trunc_v8df"
6281 [(match_operand:V16SI 0 "register_operand")
6282 (match_operand:V8DF 1 "nonimmediate_operand")
6283 (match_operand:V8DF 2 "nonimmediate_operand")]
6284 "TARGET_AVX512F"
6285 {
6286 rtx r1, r2;
6287
6288 r1 = gen_reg_rtx (V8SImode);
6289 r2 = gen_reg_rtx (V8SImode);
6290
6291 emit_insn (gen_fix_truncv8dfv8si2 (r1, operands[1]));
6292 emit_insn (gen_fix_truncv8dfv8si2 (r2, operands[2]));
6293 emit_insn (gen_avx_vec_concatv16si (operands[0], r1, r2));
6294 DONE;
6295 })
6296
6297 (define_expand "vec_pack_sfix_trunc_v4df"
6298 [(match_operand:V8SI 0 "register_operand")
6299 (match_operand:V4DF 1 "nonimmediate_operand")
6300 (match_operand:V4DF 2 "nonimmediate_operand")]
6301 "TARGET_AVX"
6302 {
6303 rtx r1, r2;
6304
6305 r1 = gen_reg_rtx (V4SImode);
6306 r2 = gen_reg_rtx (V4SImode);
6307
6308 emit_insn (gen_fix_truncv4dfv4si2 (r1, operands[1]));
6309 emit_insn (gen_fix_truncv4dfv4si2 (r2, operands[2]));
6310 emit_insn (gen_avx_vec_concatv8si (operands[0], r1, r2));
6311 DONE;
6312 })
6313
6314 (define_expand "vec_pack_sfix_trunc_v2df"
6315 [(match_operand:V4SI 0 "register_operand")
6316 (match_operand:V2DF 1 "vector_operand")
6317 (match_operand:V2DF 2 "vector_operand")]
6318 "TARGET_SSE2"
6319 {
6320 rtx tmp0, tmp1, tmp2;
6321
6322 if (TARGET_AVX && !TARGET_PREFER_AVX128 && optimize_insn_for_speed_p ())
6323 {
6324 tmp0 = gen_reg_rtx (V4DFmode);
6325 tmp1 = force_reg (V2DFmode, operands[1]);
6326
6327 emit_insn (gen_avx_vec_concatv4df (tmp0, tmp1, operands[2]));
6328 emit_insn (gen_fix_truncv4dfv4si2 (operands[0], tmp0));
6329 }
6330 else
6331 {
6332 tmp0 = gen_reg_rtx (V4SImode);
6333 tmp1 = gen_reg_rtx (V4SImode);
6334 tmp2 = gen_reg_rtx (V2DImode);
6335
6336 emit_insn (gen_sse2_cvttpd2dq (tmp0, operands[1]));
6337 emit_insn (gen_sse2_cvttpd2dq (tmp1, operands[2]));
6338 emit_insn (gen_vec_interleave_lowv2di (tmp2,
6339 gen_lowpart (V2DImode, tmp0),
6340 gen_lowpart (V2DImode, tmp1)));
6341 emit_move_insn (operands[0], gen_lowpart (V4SImode, tmp2));
6342 }
6343 DONE;
6344 })
6345
6346 (define_mode_attr ssepackfltmode
6347 [(V8DF "V16SI") (V4DF "V8SI") (V2DF "V4SI")])
6348
6349 (define_expand "vec_pack_ufix_trunc_<mode>"
6350 [(match_operand:<ssepackfltmode> 0 "register_operand")
6351 (match_operand:VF2 1 "register_operand")
6352 (match_operand:VF2 2 "register_operand")]
6353 "TARGET_SSE2"
6354 {
6355 if (<MODE>mode == V8DFmode)
6356 {
6357 rtx r1, r2;
6358
6359 r1 = gen_reg_rtx (V8SImode);
6360 r2 = gen_reg_rtx (V8SImode);
6361
6362 emit_insn (gen_fixuns_truncv8dfv8si2 (r1, operands[1]));
6363 emit_insn (gen_fixuns_truncv8dfv8si2 (r2, operands[2]));
6364 emit_insn (gen_avx_vec_concatv16si (operands[0], r1, r2));
6365 }
6366 else
6367 {
6368 rtx tmp[7];
6369 tmp[0] = ix86_expand_adjust_ufix_to_sfix_si (operands[1], &tmp[2]);
6370 tmp[1] = ix86_expand_adjust_ufix_to_sfix_si (operands[2], &tmp[3]);
6371 tmp[4] = gen_reg_rtx (<ssepackfltmode>mode);
6372 emit_insn (gen_vec_pack_sfix_trunc_<mode> (tmp[4], tmp[0], tmp[1]));
6373 if (<ssepackfltmode>mode == V4SImode || TARGET_AVX2)
6374 {
6375 tmp[5] = gen_reg_rtx (<ssepackfltmode>mode);
6376 ix86_expand_vec_extract_even_odd (tmp[5], tmp[2], tmp[3], 0);
6377 }
6378 else
6379 {
6380 tmp[5] = gen_reg_rtx (V8SFmode);
6381 ix86_expand_vec_extract_even_odd (tmp[5],
6382 gen_lowpart (V8SFmode, tmp[2]),
6383 gen_lowpart (V8SFmode, tmp[3]), 0);
6384 tmp[5] = gen_lowpart (V8SImode, tmp[5]);
6385 }
6386 tmp[6] = expand_simple_binop (<ssepackfltmode>mode, XOR, tmp[4], tmp[5],
6387 operands[0], 0, OPTAB_DIRECT);
6388 if (tmp[6] != operands[0])
6389 emit_move_insn (operands[0], tmp[6]);
6390 }
6391
6392 DONE;
6393 })
6394
6395 (define_expand "avx512f_vec_pack_sfix_v8df"
6396 [(match_operand:V16SI 0 "register_operand")
6397 (match_operand:V8DF 1 "nonimmediate_operand")
6398 (match_operand:V8DF 2 "nonimmediate_operand")]
6399 "TARGET_AVX512F"
6400 {
6401 rtx r1, r2;
6402
6403 r1 = gen_reg_rtx (V8SImode);
6404 r2 = gen_reg_rtx (V8SImode);
6405
6406 emit_insn (gen_avx512f_cvtpd2dq512 (r1, operands[1]));
6407 emit_insn (gen_avx512f_cvtpd2dq512 (r2, operands[2]));
6408 emit_insn (gen_avx_vec_concatv16si (operands[0], r1, r2));
6409 DONE;
6410 })
6411
6412 (define_expand "vec_pack_sfix_v4df"
6413 [(match_operand:V8SI 0 "register_operand")
6414 (match_operand:V4DF 1 "nonimmediate_operand")
6415 (match_operand:V4DF 2 "nonimmediate_operand")]
6416 "TARGET_AVX"
6417 {
6418 rtx r1, r2;
6419
6420 r1 = gen_reg_rtx (V4SImode);
6421 r2 = gen_reg_rtx (V4SImode);
6422
6423 emit_insn (gen_avx_cvtpd2dq256 (r1, operands[1]));
6424 emit_insn (gen_avx_cvtpd2dq256 (r2, operands[2]));
6425 emit_insn (gen_avx_vec_concatv8si (operands[0], r1, r2));
6426 DONE;
6427 })
6428
6429 (define_expand "vec_pack_sfix_v2df"
6430 [(match_operand:V4SI 0 "register_operand")
6431 (match_operand:V2DF 1 "vector_operand")
6432 (match_operand:V2DF 2 "vector_operand")]
6433 "TARGET_SSE2"
6434 {
6435 rtx tmp0, tmp1, tmp2;
6436
6437 if (TARGET_AVX && !TARGET_PREFER_AVX128 && optimize_insn_for_speed_p ())
6438 {
6439 tmp0 = gen_reg_rtx (V4DFmode);
6440 tmp1 = force_reg (V2DFmode, operands[1]);
6441
6442 emit_insn (gen_avx_vec_concatv4df (tmp0, tmp1, operands[2]));
6443 emit_insn (gen_avx_cvtpd2dq256 (operands[0], tmp0));
6444 }
6445 else
6446 {
6447 tmp0 = gen_reg_rtx (V4SImode);
6448 tmp1 = gen_reg_rtx (V4SImode);
6449 tmp2 = gen_reg_rtx (V2DImode);
6450
6451 emit_insn (gen_sse2_cvtpd2dq (tmp0, operands[1]));
6452 emit_insn (gen_sse2_cvtpd2dq (tmp1, operands[2]));
6453 emit_insn (gen_vec_interleave_lowv2di (tmp2,
6454 gen_lowpart (V2DImode, tmp0),
6455 gen_lowpart (V2DImode, tmp1)));
6456 emit_move_insn (operands[0], gen_lowpart (V4SImode, tmp2));
6457 }
6458 DONE;
6459 })
6460
6461 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
6462 ;;
6463 ;; Parallel single-precision floating point element swizzling
6464 ;;
6465 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
6466
6467 (define_expand "sse_movhlps_exp"
6468 [(set (match_operand:V4SF 0 "nonimmediate_operand")
6469 (vec_select:V4SF
6470 (vec_concat:V8SF
6471 (match_operand:V4SF 1 "nonimmediate_operand")
6472 (match_operand:V4SF 2 "nonimmediate_operand"))
6473 (parallel [(const_int 6)
6474 (const_int 7)
6475 (const_int 2)
6476 (const_int 3)])))]
6477 "TARGET_SSE"
6478 {
6479 rtx dst = ix86_fixup_binary_operands (UNKNOWN, V4SFmode, operands);
6480
6481 emit_insn (gen_sse_movhlps (dst, operands[1], operands[2]));
6482
6483 /* Fix up the destination if needed. */
6484 if (dst != operands[0])
6485 emit_move_insn (operands[0], dst);
6486
6487 DONE;
6488 })
6489
6490 (define_insn "sse_movhlps"
6491 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,v,x,v,m")
6492 (vec_select:V4SF
6493 (vec_concat:V8SF
6494 (match_operand:V4SF 1 "nonimmediate_operand" " 0,v,0,v,0")
6495 (match_operand:V4SF 2 "nonimmediate_operand" " x,v,o,o,v"))
6496 (parallel [(const_int 6)
6497 (const_int 7)
6498 (const_int 2)
6499 (const_int 3)])))]
6500 "TARGET_SSE && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
6501 "@
6502 movhlps\t{%2, %0|%0, %2}
6503 vmovhlps\t{%2, %1, %0|%0, %1, %2}
6504 movlps\t{%H2, %0|%0, %H2}
6505 vmovlps\t{%H2, %1, %0|%0, %1, %H2}
6506 %vmovhps\t{%2, %0|%q0, %2}"
6507 [(set_attr "isa" "noavx,avx,noavx,avx,*")
6508 (set_attr "type" "ssemov")
6509 (set_attr "prefix" "orig,maybe_evex,orig,maybe_evex,maybe_vex")
6510 (set_attr "mode" "V4SF,V4SF,V2SF,V2SF,V2SF")])
6511
6512 (define_expand "sse_movlhps_exp"
6513 [(set (match_operand:V4SF 0 "nonimmediate_operand")
6514 (vec_select:V4SF
6515 (vec_concat:V8SF
6516 (match_operand:V4SF 1 "nonimmediate_operand")
6517 (match_operand:V4SF 2 "nonimmediate_operand"))
6518 (parallel [(const_int 0)
6519 (const_int 1)
6520 (const_int 4)
6521 (const_int 5)])))]
6522 "TARGET_SSE"
6523 {
6524 rtx dst = ix86_fixup_binary_operands (UNKNOWN, V4SFmode, operands);
6525
6526 emit_insn (gen_sse_movlhps (dst, operands[1], operands[2]));
6527
6528 /* Fix up the destination if needed. */
6529 if (dst != operands[0])
6530 emit_move_insn (operands[0], dst);
6531
6532 DONE;
6533 })
6534
6535 (define_insn "sse_movlhps"
6536 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,v,x,v,o")
6537 (vec_select:V4SF
6538 (vec_concat:V8SF
6539 (match_operand:V4SF 1 "nonimmediate_operand" " 0,v,0,v,0")
6540 (match_operand:V4SF 2 "nonimmediate_operand" " x,v,m,v,v"))
6541 (parallel [(const_int 0)
6542 (const_int 1)
6543 (const_int 4)
6544 (const_int 5)])))]
6545 "TARGET_SSE && ix86_binary_operator_ok (UNKNOWN, V4SFmode, operands)"
6546 "@
6547 movlhps\t{%2, %0|%0, %2}
6548 vmovlhps\t{%2, %1, %0|%0, %1, %2}
6549 movhps\t{%2, %0|%0, %q2}
6550 vmovhps\t{%2, %1, %0|%0, %1, %q2}
6551 %vmovlps\t{%2, %H0|%H0, %2}"
6552 [(set_attr "isa" "noavx,avx,noavx,avx,*")
6553 (set_attr "type" "ssemov")
6554 (set_attr "prefix" "orig,maybe_evex,orig,maybe_evex,maybe_vex")
6555 (set_attr "mode" "V4SF,V4SF,V2SF,V2SF,V2SF")])
6556
6557 (define_insn "<mask_codefor>avx512f_unpckhps512<mask_name>"
6558 [(set (match_operand:V16SF 0 "register_operand" "=v")
6559 (vec_select:V16SF
6560 (vec_concat:V32SF
6561 (match_operand:V16SF 1 "register_operand" "v")
6562 (match_operand:V16SF 2 "nonimmediate_operand" "vm"))
6563 (parallel [(const_int 2) (const_int 18)
6564 (const_int 3) (const_int 19)
6565 (const_int 6) (const_int 22)
6566 (const_int 7) (const_int 23)
6567 (const_int 10) (const_int 26)
6568 (const_int 11) (const_int 27)
6569 (const_int 14) (const_int 30)
6570 (const_int 15) (const_int 31)])))]
6571 "TARGET_AVX512F"
6572 "vunpckhps\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
6573 [(set_attr "type" "sselog")
6574 (set_attr "prefix" "evex")
6575 (set_attr "mode" "V16SF")])
6576
6577 ;; Recall that the 256-bit unpck insns only shuffle within their lanes.
6578 (define_insn "avx_unpckhps256<mask_name>"
6579 [(set (match_operand:V8SF 0 "register_operand" "=v")
6580 (vec_select:V8SF
6581 (vec_concat:V16SF
6582 (match_operand:V8SF 1 "register_operand" "v")
6583 (match_operand:V8SF 2 "nonimmediate_operand" "vm"))
6584 (parallel [(const_int 2) (const_int 10)
6585 (const_int 3) (const_int 11)
6586 (const_int 6) (const_int 14)
6587 (const_int 7) (const_int 15)])))]
6588 "TARGET_AVX && <mask_avx512vl_condition>"
6589 "vunpckhps\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
6590 [(set_attr "type" "sselog")
6591 (set_attr "prefix" "vex")
6592 (set_attr "mode" "V8SF")])
6593
6594 (define_expand "vec_interleave_highv8sf"
6595 [(set (match_dup 3)
6596 (vec_select:V8SF
6597 (vec_concat:V16SF
6598 (match_operand:V8SF 1 "register_operand")
6599 (match_operand:V8SF 2 "nonimmediate_operand"))
6600 (parallel [(const_int 0) (const_int 8)
6601 (const_int 1) (const_int 9)
6602 (const_int 4) (const_int 12)
6603 (const_int 5) (const_int 13)])))
6604 (set (match_dup 4)
6605 (vec_select:V8SF
6606 (vec_concat:V16SF
6607 (match_dup 1)
6608 (match_dup 2))
6609 (parallel [(const_int 2) (const_int 10)
6610 (const_int 3) (const_int 11)
6611 (const_int 6) (const_int 14)
6612 (const_int 7) (const_int 15)])))
6613 (set (match_operand:V8SF 0 "register_operand")
6614 (vec_select:V8SF
6615 (vec_concat:V16SF
6616 (match_dup 3)
6617 (match_dup 4))
6618 (parallel [(const_int 4) (const_int 5)
6619 (const_int 6) (const_int 7)
6620 (const_int 12) (const_int 13)
6621 (const_int 14) (const_int 15)])))]
6622 "TARGET_AVX"
6623 {
6624 operands[3] = gen_reg_rtx (V8SFmode);
6625 operands[4] = gen_reg_rtx (V8SFmode);
6626 })
6627
6628 (define_insn "vec_interleave_highv4sf<mask_name>"
6629 [(set (match_operand:V4SF 0 "register_operand" "=x,v")
6630 (vec_select:V4SF
6631 (vec_concat:V8SF
6632 (match_operand:V4SF 1 "register_operand" "0,v")
6633 (match_operand:V4SF 2 "vector_operand" "xBm,vm"))
6634 (parallel [(const_int 2) (const_int 6)
6635 (const_int 3) (const_int 7)])))]
6636 "TARGET_SSE && <mask_avx512vl_condition>"
6637 "@
6638 unpckhps\t{%2, %0|%0, %2}
6639 vunpckhps\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
6640 [(set_attr "isa" "noavx,avx")
6641 (set_attr "type" "sselog")
6642 (set_attr "prefix" "orig,vex")
6643 (set_attr "mode" "V4SF")])
6644
6645 (define_insn "<mask_codefor>avx512f_unpcklps512<mask_name>"
6646 [(set (match_operand:V16SF 0 "register_operand" "=v")
6647 (vec_select:V16SF
6648 (vec_concat:V32SF
6649 (match_operand:V16SF 1 "register_operand" "v")
6650 (match_operand:V16SF 2 "nonimmediate_operand" "vm"))
6651 (parallel [(const_int 0) (const_int 16)
6652 (const_int 1) (const_int 17)
6653 (const_int 4) (const_int 20)
6654 (const_int 5) (const_int 21)
6655 (const_int 8) (const_int 24)
6656 (const_int 9) (const_int 25)
6657 (const_int 12) (const_int 28)
6658 (const_int 13) (const_int 29)])))]
6659 "TARGET_AVX512F"
6660 "vunpcklps\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
6661 [(set_attr "type" "sselog")
6662 (set_attr "prefix" "evex")
6663 (set_attr "mode" "V16SF")])
6664
6665 ;; Recall that the 256-bit unpck insns only shuffle within their lanes.
6666 (define_insn "avx_unpcklps256<mask_name>"
6667 [(set (match_operand:V8SF 0 "register_operand" "=v")
6668 (vec_select:V8SF
6669 (vec_concat:V16SF
6670 (match_operand:V8SF 1 "register_operand" "v")
6671 (match_operand:V8SF 2 "nonimmediate_operand" "vm"))
6672 (parallel [(const_int 0) (const_int 8)
6673 (const_int 1) (const_int 9)
6674 (const_int 4) (const_int 12)
6675 (const_int 5) (const_int 13)])))]
6676 "TARGET_AVX && <mask_avx512vl_condition>"
6677 "vunpcklps\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
6678 [(set_attr "type" "sselog")
6679 (set_attr "prefix" "vex")
6680 (set_attr "mode" "V8SF")])
6681
6682 (define_insn "unpcklps128_mask"
6683 [(set (match_operand:V4SF 0 "register_operand" "=v")
6684 (vec_merge:V4SF
6685 (vec_select:V4SF
6686 (vec_concat:V8SF
6687 (match_operand:V4SF 1 "register_operand" "v")
6688 (match_operand:V4SF 2 "nonimmediate_operand" "vm"))
6689 (parallel [(const_int 0) (const_int 4)
6690 (const_int 1) (const_int 5)]))
6691 (match_operand:V4SF 3 "nonimm_or_0_operand" "0C")
6692 (match_operand:QI 4 "register_operand" "Yk")))]
6693 "TARGET_AVX512VL"
6694 "vunpcklps\t{%2, %1, %0%{%4%}%N3|%0%{%4%}%N3, %1, %2}"
6695 [(set_attr "type" "sselog")
6696 (set_attr "prefix" "evex")
6697 (set_attr "mode" "V4SF")])
6698
6699 (define_expand "vec_interleave_lowv8sf"
6700 [(set (match_dup 3)
6701 (vec_select:V8SF
6702 (vec_concat:V16SF
6703 (match_operand:V8SF 1 "register_operand")
6704 (match_operand:V8SF 2 "nonimmediate_operand"))
6705 (parallel [(const_int 0) (const_int 8)
6706 (const_int 1) (const_int 9)
6707 (const_int 4) (const_int 12)
6708 (const_int 5) (const_int 13)])))
6709 (set (match_dup 4)
6710 (vec_select:V8SF
6711 (vec_concat:V16SF
6712 (match_dup 1)
6713 (match_dup 2))
6714 (parallel [(const_int 2) (const_int 10)
6715 (const_int 3) (const_int 11)
6716 (const_int 6) (const_int 14)
6717 (const_int 7) (const_int 15)])))
6718 (set (match_operand:V8SF 0 "register_operand")
6719 (vec_select:V8SF
6720 (vec_concat:V16SF
6721 (match_dup 3)
6722 (match_dup 4))
6723 (parallel [(const_int 0) (const_int 1)
6724 (const_int 2) (const_int 3)
6725 (const_int 8) (const_int 9)
6726 (const_int 10) (const_int 11)])))]
6727 "TARGET_AVX"
6728 {
6729 operands[3] = gen_reg_rtx (V8SFmode);
6730 operands[4] = gen_reg_rtx (V8SFmode);
6731 })
6732
6733 (define_insn "vec_interleave_lowv4sf"
6734 [(set (match_operand:V4SF 0 "register_operand" "=x,v")
6735 (vec_select:V4SF
6736 (vec_concat:V8SF
6737 (match_operand:V4SF 1 "register_operand" "0,v")
6738 (match_operand:V4SF 2 "vector_operand" "xBm,vm"))
6739 (parallel [(const_int 0) (const_int 4)
6740 (const_int 1) (const_int 5)])))]
6741 "TARGET_SSE"
6742 "@
6743 unpcklps\t{%2, %0|%0, %2}
6744 vunpcklps\t{%2, %1, %0|%0, %1, %2}"
6745 [(set_attr "isa" "noavx,avx")
6746 (set_attr "type" "sselog")
6747 (set_attr "prefix" "orig,maybe_evex")
6748 (set_attr "mode" "V4SF")])
6749
6750 ;; These are modeled with the same vec_concat as the others so that we
6751 ;; capture users of shufps that can use the new instructions
6752 (define_insn "avx_movshdup256<mask_name>"
6753 [(set (match_operand:V8SF 0 "register_operand" "=v")
6754 (vec_select:V8SF
6755 (vec_concat:V16SF
6756 (match_operand:V8SF 1 "nonimmediate_operand" "vm")
6757 (match_dup 1))
6758 (parallel [(const_int 1) (const_int 1)
6759 (const_int 3) (const_int 3)
6760 (const_int 5) (const_int 5)
6761 (const_int 7) (const_int 7)])))]
6762 "TARGET_AVX && <mask_avx512vl_condition>"
6763 "vmovshdup\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
6764 [(set_attr "type" "sse")
6765 (set_attr "prefix" "vex")
6766 (set_attr "mode" "V8SF")])
6767
6768 (define_insn "sse3_movshdup<mask_name>"
6769 [(set (match_operand:V4SF 0 "register_operand" "=v")
6770 (vec_select:V4SF
6771 (vec_concat:V8SF
6772 (match_operand:V4SF 1 "vector_operand" "vBm")
6773 (match_dup 1))
6774 (parallel [(const_int 1)
6775 (const_int 1)
6776 (const_int 7)
6777 (const_int 7)])))]
6778 "TARGET_SSE3 && <mask_avx512vl_condition>"
6779 "%vmovshdup\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
6780 [(set_attr "type" "sse")
6781 (set_attr "prefix_rep" "1")
6782 (set_attr "prefix" "maybe_vex")
6783 (set_attr "mode" "V4SF")])
6784
6785 (define_insn "<mask_codefor>avx512f_movshdup512<mask_name>"
6786 [(set (match_operand:V16SF 0 "register_operand" "=v")
6787 (vec_select:V16SF
6788 (vec_concat:V32SF
6789 (match_operand:V16SF 1 "nonimmediate_operand" "vm")
6790 (match_dup 1))
6791 (parallel [(const_int 1) (const_int 1)
6792 (const_int 3) (const_int 3)
6793 (const_int 5) (const_int 5)
6794 (const_int 7) (const_int 7)
6795 (const_int 9) (const_int 9)
6796 (const_int 11) (const_int 11)
6797 (const_int 13) (const_int 13)
6798 (const_int 15) (const_int 15)])))]
6799 "TARGET_AVX512F"
6800 "vmovshdup\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
6801 [(set_attr "type" "sse")
6802 (set_attr "prefix" "evex")
6803 (set_attr "mode" "V16SF")])
6804
6805 (define_insn "avx_movsldup256<mask_name>"
6806 [(set (match_operand:V8SF 0 "register_operand" "=v")
6807 (vec_select:V8SF
6808 (vec_concat:V16SF
6809 (match_operand:V8SF 1 "nonimmediate_operand" "vm")
6810 (match_dup 1))
6811 (parallel [(const_int 0) (const_int 0)
6812 (const_int 2) (const_int 2)
6813 (const_int 4) (const_int 4)
6814 (const_int 6) (const_int 6)])))]
6815 "TARGET_AVX && <mask_avx512vl_condition>"
6816 "vmovsldup\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
6817 [(set_attr "type" "sse")
6818 (set_attr "prefix" "vex")
6819 (set_attr "mode" "V8SF")])
6820
6821 (define_insn "sse3_movsldup<mask_name>"
6822 [(set (match_operand:V4SF 0 "register_operand" "=v")
6823 (vec_select:V4SF
6824 (vec_concat:V8SF
6825 (match_operand:V4SF 1 "vector_operand" "vBm")
6826 (match_dup 1))
6827 (parallel [(const_int 0)
6828 (const_int 0)
6829 (const_int 6)
6830 (const_int 6)])))]
6831 "TARGET_SSE3 && <mask_avx512vl_condition>"
6832 "%vmovsldup\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
6833 [(set_attr "type" "sse")
6834 (set_attr "prefix_rep" "1")
6835 (set_attr "prefix" "maybe_vex")
6836 (set_attr "mode" "V4SF")])
6837
6838 (define_insn "<mask_codefor>avx512f_movsldup512<mask_name>"
6839 [(set (match_operand:V16SF 0 "register_operand" "=v")
6840 (vec_select:V16SF
6841 (vec_concat:V32SF
6842 (match_operand:V16SF 1 "nonimmediate_operand" "vm")
6843 (match_dup 1))
6844 (parallel [(const_int 0) (const_int 0)
6845 (const_int 2) (const_int 2)
6846 (const_int 4) (const_int 4)
6847 (const_int 6) (const_int 6)
6848 (const_int 8) (const_int 8)
6849 (const_int 10) (const_int 10)
6850 (const_int 12) (const_int 12)
6851 (const_int 14) (const_int 14)])))]
6852 "TARGET_AVX512F"
6853 "vmovsldup\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
6854 [(set_attr "type" "sse")
6855 (set_attr "prefix" "evex")
6856 (set_attr "mode" "V16SF")])
6857
6858 (define_expand "avx_shufps256<mask_expand4_name>"
6859 [(match_operand:V8SF 0 "register_operand")
6860 (match_operand:V8SF 1 "register_operand")
6861 (match_operand:V8SF 2 "nonimmediate_operand")
6862 (match_operand:SI 3 "const_int_operand")]
6863 "TARGET_AVX"
6864 {
6865 int mask = INTVAL (operands[3]);
6866 emit_insn (gen_avx_shufps256_1<mask_expand4_name> (operands[0],
6867 operands[1],
6868 operands[2],
6869 GEN_INT ((mask >> 0) & 3),
6870 GEN_INT ((mask >> 2) & 3),
6871 GEN_INT (((mask >> 4) & 3) + 8),
6872 GEN_INT (((mask >> 6) & 3) + 8),
6873 GEN_INT (((mask >> 0) & 3) + 4),
6874 GEN_INT (((mask >> 2) & 3) + 4),
6875 GEN_INT (((mask >> 4) & 3) + 12),
6876 GEN_INT (((mask >> 6) & 3) + 12)
6877 <mask_expand4_args>));
6878 DONE;
6879 })
6880
6881 ;; One bit in mask selects 2 elements.
6882 (define_insn "avx_shufps256_1<mask_name>"
6883 [(set (match_operand:V8SF 0 "register_operand" "=v")
6884 (vec_select:V8SF
6885 (vec_concat:V16SF
6886 (match_operand:V8SF 1 "register_operand" "v")
6887 (match_operand:V8SF 2 "nonimmediate_operand" "vm"))
6888 (parallel [(match_operand 3 "const_0_to_3_operand" )
6889 (match_operand 4 "const_0_to_3_operand" )
6890 (match_operand 5 "const_8_to_11_operand" )
6891 (match_operand 6 "const_8_to_11_operand" )
6892 (match_operand 7 "const_4_to_7_operand" )
6893 (match_operand 8 "const_4_to_7_operand" )
6894 (match_operand 9 "const_12_to_15_operand")
6895 (match_operand 10 "const_12_to_15_operand")])))]
6896 "TARGET_AVX
6897 && <mask_avx512vl_condition>
6898 && (INTVAL (operands[3]) == (INTVAL (operands[7]) - 4)
6899 && INTVAL (operands[4]) == (INTVAL (operands[8]) - 4)
6900 && INTVAL (operands[5]) == (INTVAL (operands[9]) - 4)
6901 && INTVAL (operands[6]) == (INTVAL (operands[10]) - 4))"
6902 {
6903 int mask;
6904 mask = INTVAL (operands[3]);
6905 mask |= INTVAL (operands[4]) << 2;
6906 mask |= (INTVAL (operands[5]) - 8) << 4;
6907 mask |= (INTVAL (operands[6]) - 8) << 6;
6908 operands[3] = GEN_INT (mask);
6909
6910 return "vshufps\t{%3, %2, %1, %0<mask_operand11>|%0<mask_operand11>, %1, %2, %3}";
6911 }
6912 [(set_attr "type" "sseshuf")
6913 (set_attr "length_immediate" "1")
6914 (set_attr "prefix" "<mask_prefix>")
6915 (set_attr "mode" "V8SF")])
6916
6917 (define_expand "sse_shufps<mask_expand4_name>"
6918 [(match_operand:V4SF 0 "register_operand")
6919 (match_operand:V4SF 1 "register_operand")
6920 (match_operand:V4SF 2 "vector_operand")
6921 (match_operand:SI 3 "const_int_operand")]
6922 "TARGET_SSE"
6923 {
6924 int mask = INTVAL (operands[3]);
6925 emit_insn (gen_sse_shufps_v4sf<mask_expand4_name> (operands[0],
6926 operands[1],
6927 operands[2],
6928 GEN_INT ((mask >> 0) & 3),
6929 GEN_INT ((mask >> 2) & 3),
6930 GEN_INT (((mask >> 4) & 3) + 4),
6931 GEN_INT (((mask >> 6) & 3) + 4)
6932 <mask_expand4_args>));
6933 DONE;
6934 })
6935
6936 (define_insn "sse_shufps_v4sf_mask"
6937 [(set (match_operand:V4SF 0 "register_operand" "=v")
6938 (vec_merge:V4SF
6939 (vec_select:V4SF
6940 (vec_concat:V8SF
6941 (match_operand:V4SF 1 "register_operand" "v")
6942 (match_operand:V4SF 2 "nonimmediate_operand" "vm"))
6943 (parallel [(match_operand 3 "const_0_to_3_operand")
6944 (match_operand 4 "const_0_to_3_operand")
6945 (match_operand 5 "const_4_to_7_operand")
6946 (match_operand 6 "const_4_to_7_operand")]))
6947 (match_operand:V4SF 7 "nonimm_or_0_operand" "0C")
6948 (match_operand:QI 8 "register_operand" "Yk")))]
6949 "TARGET_AVX512VL"
6950 {
6951 int mask = 0;
6952 mask |= INTVAL (operands[3]) << 0;
6953 mask |= INTVAL (operands[4]) << 2;
6954 mask |= (INTVAL (operands[5]) - 4) << 4;
6955 mask |= (INTVAL (operands[6]) - 4) << 6;
6956 operands[3] = GEN_INT (mask);
6957
6958 return "vshufps\t{%3, %2, %1, %0%{%8%}%N7|%0%{%8%}%N7, %1, %2, %3}";
6959 }
6960 [(set_attr "type" "sseshuf")
6961 (set_attr "length_immediate" "1")
6962 (set_attr "prefix" "evex")
6963 (set_attr "mode" "V4SF")])
6964
6965 (define_insn "sse_shufps_<mode>"
6966 [(set (match_operand:VI4F_128 0 "register_operand" "=x,v")
6967 (vec_select:VI4F_128
6968 (vec_concat:<ssedoublevecmode>
6969 (match_operand:VI4F_128 1 "register_operand" "0,v")
6970 (match_operand:VI4F_128 2 "vector_operand" "xBm,vm"))
6971 (parallel [(match_operand 3 "const_0_to_3_operand")
6972 (match_operand 4 "const_0_to_3_operand")
6973 (match_operand 5 "const_4_to_7_operand")
6974 (match_operand 6 "const_4_to_7_operand")])))]
6975 "TARGET_SSE"
6976 {
6977 int mask = 0;
6978 mask |= INTVAL (operands[3]) << 0;
6979 mask |= INTVAL (operands[4]) << 2;
6980 mask |= (INTVAL (operands[5]) - 4) << 4;
6981 mask |= (INTVAL (operands[6]) - 4) << 6;
6982 operands[3] = GEN_INT (mask);
6983
6984 switch (which_alternative)
6985 {
6986 case 0:
6987 return "shufps\t{%3, %2, %0|%0, %2, %3}";
6988 case 1:
6989 return "vshufps\t{%3, %2, %1, %0|%0, %1, %2, %3}";
6990 default:
6991 gcc_unreachable ();
6992 }
6993 }
6994 [(set_attr "isa" "noavx,avx")
6995 (set_attr "type" "sseshuf")
6996 (set_attr "length_immediate" "1")
6997 (set_attr "prefix" "orig,maybe_evex")
6998 (set_attr "mode" "V4SF")])
6999
7000 (define_insn "sse_storehps"
7001 [(set (match_operand:V2SF 0 "nonimmediate_operand" "=m,v,v")
7002 (vec_select:V2SF
7003 (match_operand:V4SF 1 "nonimmediate_operand" "v,v,o")
7004 (parallel [(const_int 2) (const_int 3)])))]
7005 "TARGET_SSE && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
7006 "@
7007 %vmovhps\t{%1, %0|%q0, %1}
7008 %vmovhlps\t{%1, %d0|%d0, %1}
7009 %vmovlps\t{%H1, %d0|%d0, %H1}"
7010 [(set_attr "type" "ssemov")
7011 (set_attr "prefix" "maybe_vex")
7012 (set_attr "mode" "V2SF,V4SF,V2SF")])
7013
7014 (define_expand "sse_loadhps_exp"
7015 [(set (match_operand:V4SF 0 "nonimmediate_operand")
7016 (vec_concat:V4SF
7017 (vec_select:V2SF
7018 (match_operand:V4SF 1 "nonimmediate_operand")
7019 (parallel [(const_int 0) (const_int 1)]))
7020 (match_operand:V2SF 2 "nonimmediate_operand")))]
7021 "TARGET_SSE"
7022 {
7023 rtx dst = ix86_fixup_binary_operands (UNKNOWN, V4SFmode, operands);
7024
7025 emit_insn (gen_sse_loadhps (dst, operands[1], operands[2]));
7026
7027 /* Fix up the destination if needed. */
7028 if (dst != operands[0])
7029 emit_move_insn (operands[0], dst);
7030
7031 DONE;
7032 })
7033
7034 (define_insn "sse_loadhps"
7035 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,v,x,v,o")
7036 (vec_concat:V4SF
7037 (vec_select:V2SF
7038 (match_operand:V4SF 1 "nonimmediate_operand" " 0,v,0,v,0")
7039 (parallel [(const_int 0) (const_int 1)]))
7040 (match_operand:V2SF 2 "nonimmediate_operand" " m,m,x,v,v")))]
7041 "TARGET_SSE"
7042 "@
7043 movhps\t{%2, %0|%0, %q2}
7044 vmovhps\t{%2, %1, %0|%0, %1, %q2}
7045 movlhps\t{%2, %0|%0, %2}
7046 vmovlhps\t{%2, %1, %0|%0, %1, %2}
7047 %vmovlps\t{%2, %H0|%H0, %2}"
7048 [(set_attr "isa" "noavx,avx,noavx,avx,*")
7049 (set_attr "type" "ssemov")
7050 (set_attr "prefix" "orig,maybe_evex,orig,maybe_evex,maybe_vex")
7051 (set_attr "mode" "V2SF,V2SF,V4SF,V4SF,V2SF")])
7052
7053 (define_insn "sse_storelps"
7054 [(set (match_operand:V2SF 0 "nonimmediate_operand" "=m,v,v")
7055 (vec_select:V2SF
7056 (match_operand:V4SF 1 "nonimmediate_operand" " v,v,m")
7057 (parallel [(const_int 0) (const_int 1)])))]
7058 "TARGET_SSE && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
7059 "@
7060 %vmovlps\t{%1, %0|%q0, %1}
7061 %vmovaps\t{%1, %0|%0, %1}
7062 %vmovlps\t{%1, %d0|%d0, %q1}"
7063 [(set_attr "type" "ssemov")
7064 (set_attr "prefix" "maybe_vex")
7065 (set_attr "mode" "V2SF,V4SF,V2SF")])
7066
7067 (define_expand "sse_loadlps_exp"
7068 [(set (match_operand:V4SF 0 "nonimmediate_operand")
7069 (vec_concat:V4SF
7070 (match_operand:V2SF 2 "nonimmediate_operand")
7071 (vec_select:V2SF
7072 (match_operand:V4SF 1 "nonimmediate_operand")
7073 (parallel [(const_int 2) (const_int 3)]))))]
7074 "TARGET_SSE"
7075 {
7076 rtx dst = ix86_fixup_binary_operands (UNKNOWN, V4SFmode, operands);
7077
7078 emit_insn (gen_sse_loadlps (dst, operands[1], operands[2]));
7079
7080 /* Fix up the destination if needed. */
7081 if (dst != operands[0])
7082 emit_move_insn (operands[0], dst);
7083
7084 DONE;
7085 })
7086
7087 (define_insn "sse_loadlps"
7088 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,v,x,v,m")
7089 (vec_concat:V4SF
7090 (match_operand:V2SF 2 "nonimmediate_operand" " 0,v,m,m,v")
7091 (vec_select:V2SF
7092 (match_operand:V4SF 1 "nonimmediate_operand" " x,v,0,v,0")
7093 (parallel [(const_int 2) (const_int 3)]))))]
7094 "TARGET_SSE"
7095 "@
7096 shufps\t{$0xe4, %1, %0|%0, %1, 0xe4}
7097 vshufps\t{$0xe4, %1, %2, %0|%0, %2, %1, 0xe4}
7098 movlps\t{%2, %0|%0, %q2}
7099 vmovlps\t{%2, %1, %0|%0, %1, %q2}
7100 %vmovlps\t{%2, %0|%q0, %2}"
7101 [(set_attr "isa" "noavx,avx,noavx,avx,*")
7102 (set_attr "type" "sseshuf,sseshuf,ssemov,ssemov,ssemov")
7103 (set (attr "length_immediate")
7104 (if_then_else (eq_attr "alternative" "0,1")
7105 (const_string "1")
7106 (const_string "*")))
7107 (set_attr "prefix" "orig,maybe_evex,orig,maybe_evex,maybe_vex")
7108 (set_attr "mode" "V4SF,V4SF,V2SF,V2SF,V2SF")])
7109
7110 (define_insn "sse_movss"
7111 [(set (match_operand:V4SF 0 "register_operand" "=x,v")
7112 (vec_merge:V4SF
7113 (match_operand:V4SF 2 "register_operand" " x,v")
7114 (match_operand:V4SF 1 "register_operand" " 0,v")
7115 (const_int 1)))]
7116 "TARGET_SSE"
7117 "@
7118 movss\t{%2, %0|%0, %2}
7119 vmovss\t{%2, %1, %0|%0, %1, %2}"
7120 [(set_attr "isa" "noavx,avx")
7121 (set_attr "type" "ssemov")
7122 (set_attr "prefix" "orig,maybe_evex")
7123 (set_attr "mode" "SF")])
7124
7125 (define_insn "avx2_vec_dup<mode>"
7126 [(set (match_operand:VF1_128_256 0 "register_operand" "=v")
7127 (vec_duplicate:VF1_128_256
7128 (vec_select:SF
7129 (match_operand:V4SF 1 "register_operand" "v")
7130 (parallel [(const_int 0)]))))]
7131 "TARGET_AVX2"
7132 "vbroadcastss\t{%1, %0|%0, %1}"
7133 [(set_attr "type" "sselog1")
7134 (set_attr "prefix" "maybe_evex")
7135 (set_attr "mode" "<MODE>")])
7136
7137 (define_insn "avx2_vec_dupv8sf_1"
7138 [(set (match_operand:V8SF 0 "register_operand" "=v")
7139 (vec_duplicate:V8SF
7140 (vec_select:SF
7141 (match_operand:V8SF 1 "register_operand" "v")
7142 (parallel [(const_int 0)]))))]
7143 "TARGET_AVX2"
7144 "vbroadcastss\t{%x1, %0|%0, %x1}"
7145 [(set_attr "type" "sselog1")
7146 (set_attr "prefix" "maybe_evex")
7147 (set_attr "mode" "V8SF")])
7148
7149 (define_insn "avx512f_vec_dup<mode>_1"
7150 [(set (match_operand:VF_512 0 "register_operand" "=v")
7151 (vec_duplicate:VF_512
7152 (vec_select:<ssescalarmode>
7153 (match_operand:VF_512 1 "register_operand" "v")
7154 (parallel [(const_int 0)]))))]
7155 "TARGET_AVX512F"
7156 "vbroadcast<bcstscalarsuff>\t{%x1, %0|%0, %x1}"
7157 [(set_attr "type" "sselog1")
7158 (set_attr "prefix" "evex")
7159 (set_attr "mode" "<MODE>")])
7160
7161 ;; Although insertps takes register source, we prefer
7162 ;; unpcklps with register source since it is shorter.
7163 (define_insn "*vec_concatv2sf_sse4_1"
7164 [(set (match_operand:V2SF 0 "register_operand"
7165 "=Yr,*x, v,Yr,*x,v,v,*y ,*y")
7166 (vec_concat:V2SF
7167 (match_operand:SF 1 "nonimmediate_operand"
7168 " 0, 0,Yv, 0,0, v,m, 0 , m")
7169 (match_operand:SF 2 "nonimm_or_0_operand"
7170 " Yr,*x,Yv, m,m, m,C,*ym, C")))]
7171 "TARGET_SSE4_1 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
7172 "@
7173 unpcklps\t{%2, %0|%0, %2}
7174 unpcklps\t{%2, %0|%0, %2}
7175 vunpcklps\t{%2, %1, %0|%0, %1, %2}
7176 insertps\t{$0x10, %2, %0|%0, %2, 0x10}
7177 insertps\t{$0x10, %2, %0|%0, %2, 0x10}
7178 vinsertps\t{$0x10, %2, %1, %0|%0, %1, %2, 0x10}
7179 %vmovss\t{%1, %0|%0, %1}
7180 punpckldq\t{%2, %0|%0, %2}
7181 movd\t{%1, %0|%0, %1}"
7182 [(set (attr "isa")
7183 (cond [(eq_attr "alternative" "0,1,3,4")
7184 (const_string "noavx")
7185 (eq_attr "alternative" "2,5")
7186 (const_string "avx")
7187 ]
7188 (const_string "*")))
7189 (set (attr "type")
7190 (cond [(eq_attr "alternative" "6")
7191 (const_string "ssemov")
7192 (eq_attr "alternative" "7")
7193 (const_string "mmxcvt")
7194 (eq_attr "alternative" "8")
7195 (const_string "mmxmov")
7196 ]
7197 (const_string "sselog")))
7198 (set (attr "prefix_data16")
7199 (if_then_else (eq_attr "alternative" "3,4")
7200 (const_string "1")
7201 (const_string "*")))
7202 (set (attr "prefix_extra")
7203 (if_then_else (eq_attr "alternative" "3,4,5")
7204 (const_string "1")
7205 (const_string "*")))
7206 (set (attr "length_immediate")
7207 (if_then_else (eq_attr "alternative" "3,4,5")
7208 (const_string "1")
7209 (const_string "*")))
7210 (set (attr "prefix")
7211 (cond [(eq_attr "alternative" "2,5")
7212 (const_string "maybe_evex")
7213 (eq_attr "alternative" "6")
7214 (const_string "maybe_vex")
7215 ]
7216 (const_string "orig")))
7217 (set_attr "mode" "V4SF,V4SF,V4SF,V4SF,V4SF,V4SF,SF,DI,DI")])
7218
7219 ;; ??? In theory we can match memory for the MMX alternative, but allowing
7220 ;; vector_operand for operand 2 and *not* allowing memory for the SSE
7221 ;; alternatives pretty much forces the MMX alternative to be chosen.
7222 (define_insn "*vec_concatv2sf_sse"
7223 [(set (match_operand:V2SF 0 "register_operand" "=x,x,*y,*y")
7224 (vec_concat:V2SF
7225 (match_operand:SF 1 "nonimmediate_operand" " 0,m, 0, m")
7226 (match_operand:SF 2 "reg_or_0_operand" " x,C,*y, C")))]
7227 "TARGET_SSE"
7228 "@
7229 unpcklps\t{%2, %0|%0, %2}
7230 movss\t{%1, %0|%0, %1}
7231 punpckldq\t{%2, %0|%0, %2}
7232 movd\t{%1, %0|%0, %1}"
7233 [(set_attr "type" "sselog,ssemov,mmxcvt,mmxmov")
7234 (set_attr "mode" "V4SF,SF,DI,DI")])
7235
7236 (define_insn "*vec_concatv4sf"
7237 [(set (match_operand:V4SF 0 "register_operand" "=x,v,x,v")
7238 (vec_concat:V4SF
7239 (match_operand:V2SF 1 "register_operand" " 0,v,0,v")
7240 (match_operand:V2SF 2 "nonimmediate_operand" " x,v,m,m")))]
7241 "TARGET_SSE"
7242 "@
7243 movlhps\t{%2, %0|%0, %2}
7244 vmovlhps\t{%2, %1, %0|%0, %1, %2}
7245 movhps\t{%2, %0|%0, %q2}
7246 vmovhps\t{%2, %1, %0|%0, %1, %q2}"
7247 [(set_attr "isa" "noavx,avx,noavx,avx")
7248 (set_attr "type" "ssemov")
7249 (set_attr "prefix" "orig,maybe_evex,orig,maybe_evex")
7250 (set_attr "mode" "V4SF,V4SF,V2SF,V2SF")])
7251
7252 (define_insn "*vec_concatv4sf_0"
7253 [(set (match_operand:V4SF 0 "register_operand" "=v")
7254 (vec_concat:V4SF
7255 (match_operand:V2SF 1 "nonimmediate_operand" "xm")
7256 (match_operand:V2SF 2 "const0_operand" " C")))]
7257 "TARGET_SSE2"
7258 "%vmovq\t{%1, %0|%0, %1}"
7259 [(set_attr "type" "ssemov")
7260 (set_attr "prefix" "maybe_vex")
7261 (set_attr "mode" "DF")])
7262
7263 ;; Avoid combining registers from different units in a single alternative,
7264 ;; see comment above inline_secondary_memory_needed function in i386.c
7265 (define_insn "vec_set<mode>_0"
7266 [(set (match_operand:VI4F_128 0 "nonimmediate_operand"
7267 "=Yr,*x,v,v,v,x,x,v,Yr ,*x ,x ,m ,m ,m")
7268 (vec_merge:VI4F_128
7269 (vec_duplicate:VI4F_128
7270 (match_operand:<ssescalarmode> 2 "general_operand"
7271 " Yr,*x,v,m,r ,m,x,v,*rm,*rm,*rm,!x,!*re,!*fF"))
7272 (match_operand:VI4F_128 1 "nonimm_or_0_operand"
7273 " C , C,C,C,C ,C,0,v,0 ,0 ,x ,0 ,0 ,0")
7274 (const_int 1)))]
7275 "TARGET_SSE"
7276 "@
7277 insertps\t{$0xe, %2, %0|%0, %2, 0xe}
7278 insertps\t{$0xe, %2, %0|%0, %2, 0xe}
7279 vinsertps\t{$0xe, %2, %2, %0|%0, %2, %2, 0xe}
7280 %vmov<ssescalarmodesuffix>\t{%2, %0|%0, %2}
7281 %vmovd\t{%2, %0|%0, %2}
7282 movss\t{%2, %0|%0, %2}
7283 movss\t{%2, %0|%0, %2}
7284 vmovss\t{%2, %1, %0|%0, %1, %2}
7285 pinsrd\t{$0, %2, %0|%0, %2, 0}
7286 pinsrd\t{$0, %2, %0|%0, %2, 0}
7287 vpinsrd\t{$0, %2, %1, %0|%0, %1, %2, 0}
7288 #
7289 #
7290 #"
7291 [(set (attr "isa")
7292 (cond [(eq_attr "alternative" "0,1,8,9")
7293 (const_string "sse4_noavx")
7294 (eq_attr "alternative" "2,7,10")
7295 (const_string "avx")
7296 (eq_attr "alternative" "3,4")
7297 (const_string "sse2")
7298 (eq_attr "alternative" "5,6")
7299 (const_string "noavx")
7300 ]
7301 (const_string "*")))
7302 (set (attr "type")
7303 (cond [(eq_attr "alternative" "0,1,2,8,9,10")
7304 (const_string "sselog")
7305 (eq_attr "alternative" "12")
7306 (const_string "imov")
7307 (eq_attr "alternative" "13")
7308 (const_string "fmov")
7309 ]
7310 (const_string "ssemov")))
7311 (set (attr "prefix_extra")
7312 (if_then_else (eq_attr "alternative" "8,9,10")
7313 (const_string "1")
7314 (const_string "*")))
7315 (set (attr "length_immediate")
7316 (if_then_else (eq_attr "alternative" "8,9,10")
7317 (const_string "1")
7318 (const_string "*")))
7319 (set (attr "prefix")
7320 (cond [(eq_attr "alternative" "0,1,5,6,8,9")
7321 (const_string "orig")
7322 (eq_attr "alternative" "2")
7323 (const_string "maybe_evex")
7324 (eq_attr "alternative" "3,4")
7325 (const_string "maybe_vex")
7326 (eq_attr "alternative" "7,10")
7327 (const_string "vex")
7328 ]
7329 (const_string "*")))
7330 (set_attr "mode" "SF,SF,SF,<ssescalarmode>,SI,SF,SF,SF,TI,TI,TI,*,*,*")
7331 (set (attr "preferred_for_speed")
7332 (cond [(eq_attr "alternative" "4")
7333 (symbol_ref "TARGET_INTER_UNIT_MOVES_TO_VEC")
7334 ]
7335 (symbol_ref "true")))])
7336
7337 ;; A subset is vec_setv4sf.
7338 (define_insn "*vec_setv4sf_sse4_1"
7339 [(set (match_operand:V4SF 0 "register_operand" "=Yr,*x,v")
7340 (vec_merge:V4SF
7341 (vec_duplicate:V4SF
7342 (match_operand:SF 2 "nonimmediate_operand" "Yrm,*xm,vm"))
7343 (match_operand:V4SF 1 "register_operand" "0,0,v")
7344 (match_operand:SI 3 "const_int_operand")))]
7345 "TARGET_SSE4_1
7346 && ((unsigned) exact_log2 (INTVAL (operands[3]))
7347 < GET_MODE_NUNITS (V4SFmode))"
7348 {
7349 operands[3] = GEN_INT (exact_log2 (INTVAL (operands[3])) << 4);
7350 switch (which_alternative)
7351 {
7352 case 0:
7353 case 1:
7354 return "insertps\t{%3, %2, %0|%0, %2, %3}";
7355 case 2:
7356 return "vinsertps\t{%3, %2, %1, %0|%0, %1, %2, %3}";
7357 default:
7358 gcc_unreachable ();
7359 }
7360 }
7361 [(set_attr "isa" "noavx,noavx,avx")
7362 (set_attr "type" "sselog")
7363 (set_attr "prefix_data16" "1,1,*")
7364 (set_attr "prefix_extra" "1")
7365 (set_attr "length_immediate" "1")
7366 (set_attr "prefix" "orig,orig,maybe_evex")
7367 (set_attr "mode" "V4SF")])
7368
7369 ;; All of vinsertps, vmovss, vmovd clear also the higher bits.
7370 (define_insn "vec_set<mode>_0"
7371 [(set (match_operand:VI4F_256_512 0 "register_operand" "=v,v,v")
7372 (vec_merge:VI4F_256_512
7373 (vec_duplicate:VI4F_256_512
7374 (match_operand:<ssescalarmode> 2 "general_operand" "v,m,r"))
7375 (match_operand:VI4F_256_512 1 "const0_operand" "C,C,C")
7376 (const_int 1)))]
7377 "TARGET_AVX"
7378 "@
7379 vinsertps\t{$0xe, %2, %2, %x0|%x0, %2, %2, 0xe}
7380 vmov<ssescalarmodesuffix>\t{%x2, %x0|%x0, %2}
7381 vmovd\t{%2, %x0|%x0, %2}"
7382 [(set (attr "type")
7383 (if_then_else (eq_attr "alternative" "0")
7384 (const_string "sselog")
7385 (const_string "ssemov")))
7386 (set_attr "prefix" "maybe_evex")
7387 (set_attr "mode" "SF,<ssescalarmode>,SI")
7388 (set (attr "preferred_for_speed")
7389 (cond [(eq_attr "alternative" "2")
7390 (symbol_ref "TARGET_INTER_UNIT_MOVES_TO_VEC")
7391 ]
7392 (symbol_ref "true")))])
7393
7394 (define_insn "sse4_1_insertps"
7395 [(set (match_operand:V4SF 0 "register_operand" "=Yr,*x,v")
7396 (unspec:V4SF [(match_operand:V4SF 2 "nonimmediate_operand" "Yrm,*xm,vm")
7397 (match_operand:V4SF 1 "register_operand" "0,0,v")
7398 (match_operand:SI 3 "const_0_to_255_operand" "n,n,n")]
7399 UNSPEC_INSERTPS))]
7400 "TARGET_SSE4_1"
7401 {
7402 if (MEM_P (operands[2]))
7403 {
7404 unsigned count_s = INTVAL (operands[3]) >> 6;
7405 if (count_s)
7406 operands[3] = GEN_INT (INTVAL (operands[3]) & 0x3f);
7407 operands[2] = adjust_address_nv (operands[2], SFmode, count_s * 4);
7408 }
7409 switch (which_alternative)
7410 {
7411 case 0:
7412 case 1:
7413 return "insertps\t{%3, %2, %0|%0, %2, %3}";
7414 case 2:
7415 return "vinsertps\t{%3, %2, %1, %0|%0, %1, %2, %3}";
7416 default:
7417 gcc_unreachable ();
7418 }
7419 }
7420 [(set_attr "isa" "noavx,noavx,avx")
7421 (set_attr "type" "sselog")
7422 (set_attr "prefix_data16" "1,1,*")
7423 (set_attr "prefix_extra" "1")
7424 (set_attr "length_immediate" "1")
7425 (set_attr "prefix" "orig,orig,maybe_evex")
7426 (set_attr "mode" "V4SF")])
7427
7428 (define_split
7429 [(set (match_operand:VI4F_128 0 "memory_operand")
7430 (vec_merge:VI4F_128
7431 (vec_duplicate:VI4F_128
7432 (match_operand:<ssescalarmode> 1 "nonmemory_operand"))
7433 (match_dup 0)
7434 (const_int 1)))]
7435 "TARGET_SSE && reload_completed"
7436 [(set (match_dup 0) (match_dup 1))]
7437 "operands[0] = adjust_address (operands[0], <ssescalarmode>mode, 0);")
7438
7439 (define_expand "vec_set<mode>"
7440 [(match_operand:V 0 "register_operand")
7441 (match_operand:<ssescalarmode> 1 "register_operand")
7442 (match_operand 2 "const_int_operand")]
7443 "TARGET_SSE"
7444 {
7445 ix86_expand_vector_set (false, operands[0], operands[1],
7446 INTVAL (operands[2]));
7447 DONE;
7448 })
7449
7450 (define_insn_and_split "*vec_extractv4sf_0"
7451 [(set (match_operand:SF 0 "nonimmediate_operand" "=v,m,f,r")
7452 (vec_select:SF
7453 (match_operand:V4SF 1 "nonimmediate_operand" "vm,v,m,m")
7454 (parallel [(const_int 0)])))]
7455 "TARGET_SSE && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
7456 "#"
7457 "&& reload_completed"
7458 [(set (match_dup 0) (match_dup 1))]
7459 "operands[1] = gen_lowpart (SFmode, operands[1]);")
7460
7461 (define_insn_and_split "*sse4_1_extractps"
7462 [(set (match_operand:SF 0 "nonimmediate_operand" "=rm,rm,rm,Yv,Yv")
7463 (vec_select:SF
7464 (match_operand:V4SF 1 "register_operand" "Yr,*x,v,0,v")
7465 (parallel [(match_operand:SI 2 "const_0_to_3_operand" "n,n,n,n,n")])))]
7466 "TARGET_SSE4_1"
7467 "@
7468 extractps\t{%2, %1, %0|%0, %1, %2}
7469 extractps\t{%2, %1, %0|%0, %1, %2}
7470 vextractps\t{%2, %1, %0|%0, %1, %2}
7471 #
7472 #"
7473 "&& reload_completed && SSE_REG_P (operands[0])"
7474 [(const_int 0)]
7475 {
7476 rtx dest = lowpart_subreg (V4SFmode, operands[0], SFmode);
7477 switch (INTVAL (operands[2]))
7478 {
7479 case 1:
7480 case 3:
7481 emit_insn (gen_sse_shufps_v4sf (dest, operands[1], operands[1],
7482 operands[2], operands[2],
7483 GEN_INT (INTVAL (operands[2]) + 4),
7484 GEN_INT (INTVAL (operands[2]) + 4)));
7485 break;
7486 case 2:
7487 emit_insn (gen_vec_interleave_highv4sf (dest, operands[1], operands[1]));
7488 break;
7489 default:
7490 /* 0 should be handled by the *vec_extractv4sf_0 pattern above. */
7491 gcc_unreachable ();
7492 }
7493 DONE;
7494 }
7495 [(set_attr "isa" "noavx,noavx,avx,noavx,avx")
7496 (set_attr "type" "sselog,sselog,sselog,*,*")
7497 (set_attr "prefix_data16" "1,1,1,*,*")
7498 (set_attr "prefix_extra" "1,1,1,*,*")
7499 (set_attr "length_immediate" "1,1,1,*,*")
7500 (set_attr "prefix" "orig,orig,maybe_evex,*,*")
7501 (set_attr "mode" "V4SF,V4SF,V4SF,*,*")])
7502
7503 (define_insn_and_split "*vec_extractv4sf_mem"
7504 [(set (match_operand:SF 0 "register_operand" "=v,*r,f")
7505 (vec_select:SF
7506 (match_operand:V4SF 1 "memory_operand" "o,o,o")
7507 (parallel [(match_operand 2 "const_0_to_3_operand" "n,n,n")])))]
7508 "TARGET_SSE"
7509 "#"
7510 "&& reload_completed"
7511 [(set (match_dup 0) (match_dup 1))]
7512 {
7513 operands[1] = adjust_address (operands[1], SFmode, INTVAL (operands[2]) * 4);
7514 })
7515
7516 (define_mode_attr extract_type
7517 [(V16SF "avx512f") (V16SI "avx512f") (V8DF "avx512dq") (V8DI "avx512dq")])
7518
7519 (define_mode_attr extract_suf
7520 [(V16SF "32x4") (V16SI "32x4") (V8DF "64x2") (V8DI "64x2")])
7521
7522 (define_mode_iterator AVX512_VEC
7523 [(V8DF "TARGET_AVX512DQ") (V8DI "TARGET_AVX512DQ") V16SF V16SI])
7524
7525 (define_expand "<extract_type>_vextract<shuffletype><extract_suf>_mask"
7526 [(match_operand:<ssequartermode> 0 "nonimmediate_operand")
7527 (match_operand:AVX512_VEC 1 "register_operand")
7528 (match_operand:SI 2 "const_0_to_3_operand")
7529 (match_operand:<ssequartermode> 3 "nonimmediate_operand")
7530 (match_operand:QI 4 "register_operand")]
7531 "TARGET_AVX512F"
7532 {
7533 int mask;
7534 mask = INTVAL (operands[2]);
7535 rtx dest = operands[0];
7536
7537 if (MEM_P (operands[0]) && !rtx_equal_p (operands[0], operands[3]))
7538 dest = gen_reg_rtx (<ssequartermode>mode);
7539
7540 if (<MODE>mode == V16SImode || <MODE>mode == V16SFmode)
7541 emit_insn (gen_avx512f_vextract<shuffletype>32x4_1_mask (dest,
7542 operands[1], GEN_INT (mask * 4), GEN_INT (mask * 4 + 1),
7543 GEN_INT (mask * 4 + 2), GEN_INT (mask * 4 + 3), operands[3],
7544 operands[4]));
7545 else
7546 emit_insn (gen_avx512dq_vextract<shuffletype>64x2_1_mask (dest,
7547 operands[1], GEN_INT (mask * 2), GEN_INT (mask * 2 + 1), operands[3],
7548 operands[4]));
7549 if (dest != operands[0])
7550 emit_move_insn (operands[0], dest);
7551 DONE;
7552 })
7553
7554 (define_insn "avx512dq_vextract<shuffletype>64x2_1_maskm"
7555 [(set (match_operand:<ssequartermode> 0 "memory_operand" "=m")
7556 (vec_merge:<ssequartermode>
7557 (vec_select:<ssequartermode>
7558 (match_operand:V8FI 1 "register_operand" "v")
7559 (parallel [(match_operand 2 "const_0_to_7_operand")
7560 (match_operand 3 "const_0_to_7_operand")]))
7561 (match_operand:<ssequartermode> 4 "memory_operand" "0")
7562 (match_operand:QI 5 "register_operand" "Yk")))]
7563 "TARGET_AVX512DQ
7564 && INTVAL (operands[2]) % 2 == 0
7565 && INTVAL (operands[2]) == INTVAL (operands[3]) - 1
7566 && rtx_equal_p (operands[4], operands[0])"
7567 {
7568 operands[2] = GEN_INT ((INTVAL (operands[2])) >> 1);
7569 return "vextract<shuffletype>64x2\t{%2, %1, %0%{%5%}|%0%{%5%}, %1, %2}";
7570 }
7571 [(set_attr "type" "sselog")
7572 (set_attr "prefix_extra" "1")
7573 (set_attr "length_immediate" "1")
7574 (set_attr "memory" "store")
7575 (set_attr "prefix" "evex")
7576 (set_attr "mode" "<sseinsnmode>")])
7577
7578 (define_insn "avx512f_vextract<shuffletype>32x4_1_maskm"
7579 [(set (match_operand:<ssequartermode> 0 "memory_operand" "=m")
7580 (vec_merge:<ssequartermode>
7581 (vec_select:<ssequartermode>
7582 (match_operand:V16FI 1 "register_operand" "v")
7583 (parallel [(match_operand 2 "const_0_to_15_operand")
7584 (match_operand 3 "const_0_to_15_operand")
7585 (match_operand 4 "const_0_to_15_operand")
7586 (match_operand 5 "const_0_to_15_operand")]))
7587 (match_operand:<ssequartermode> 6 "memory_operand" "0")
7588 (match_operand:QI 7 "register_operand" "Yk")))]
7589 "TARGET_AVX512F
7590 && INTVAL (operands[2]) % 4 == 0
7591 && INTVAL (operands[2]) == INTVAL (operands[3]) - 1
7592 && INTVAL (operands[3]) == INTVAL (operands[4]) - 1
7593 && INTVAL (operands[4]) == INTVAL (operands[5]) - 1
7594 && rtx_equal_p (operands[6], operands[0])"
7595 {
7596 operands[2] = GEN_INT (INTVAL (operands[2]) >> 2);
7597 return "vextract<shuffletype>32x4\t{%2, %1, %0%{%7%}|%0%{%7%}, %1, %2}";
7598 }
7599 [(set_attr "type" "sselog")
7600 (set_attr "prefix_extra" "1")
7601 (set_attr "length_immediate" "1")
7602 (set_attr "memory" "store")
7603 (set_attr "prefix" "evex")
7604 (set_attr "mode" "<sseinsnmode>")])
7605
7606 (define_insn "<mask_codefor>avx512dq_vextract<shuffletype>64x2_1<mask_name>"
7607 [(set (match_operand:<ssequartermode> 0 "<store_mask_predicate>" "=<store_mask_constraint>")
7608 (vec_select:<ssequartermode>
7609 (match_operand:V8FI 1 "register_operand" "v")
7610 (parallel [(match_operand 2 "const_0_to_7_operand")
7611 (match_operand 3 "const_0_to_7_operand")])))]
7612 "TARGET_AVX512DQ
7613 && INTVAL (operands[2]) % 2 == 0
7614 && INTVAL (operands[2]) == INTVAL (operands[3]) - 1"
7615 {
7616 operands[2] = GEN_INT (INTVAL (operands[2]) >> 1);
7617 return "vextract<shuffletype>64x2\t{%2, %1, %0<mask_operand4>|%0<mask_operand4>, %1, %2}";
7618 }
7619 [(set_attr "type" "sselog1")
7620 (set_attr "prefix_extra" "1")
7621 (set_attr "length_immediate" "1")
7622 (set_attr "prefix" "evex")
7623 (set_attr "mode" "<sseinsnmode>")])
7624
7625 (define_split
7626 [(set (match_operand:<ssequartermode> 0 "nonimmediate_operand")
7627 (vec_select:<ssequartermode>
7628 (match_operand:V8FI 1 "register_operand")
7629 (parallel [(const_int 0) (const_int 1)])))]
7630 "TARGET_AVX512DQ
7631 && reload_completed
7632 && (TARGET_AVX512VL
7633 || REG_P (operands[0])
7634 || !EXT_REX_SSE_REG_P (operands[1]))"
7635 [(set (match_dup 0) (match_dup 1))]
7636 {
7637 if (!TARGET_AVX512VL
7638 && REG_P (operands[0])
7639 && EXT_REX_SSE_REG_P (operands[1]))
7640 operands[0]
7641 = lowpart_subreg (<MODE>mode, operands[0], <ssequartermode>mode);
7642 else
7643 operands[1] = gen_lowpart (<ssequartermode>mode, operands[1]);
7644 })
7645
7646 (define_insn "<mask_codefor>avx512f_vextract<shuffletype>32x4_1<mask_name>"
7647 [(set (match_operand:<ssequartermode> 0 "<store_mask_predicate>" "=<store_mask_constraint>")
7648 (vec_select:<ssequartermode>
7649 (match_operand:V16FI 1 "register_operand" "v")
7650 (parallel [(match_operand 2 "const_0_to_15_operand")
7651 (match_operand 3 "const_0_to_15_operand")
7652 (match_operand 4 "const_0_to_15_operand")
7653 (match_operand 5 "const_0_to_15_operand")])))]
7654 "TARGET_AVX512F
7655 && INTVAL (operands[2]) % 4 == 0
7656 && INTVAL (operands[2]) == INTVAL (operands[3]) - 1
7657 && INTVAL (operands[3]) == INTVAL (operands[4]) - 1
7658 && INTVAL (operands[4]) == INTVAL (operands[5]) - 1"
7659 {
7660 operands[2] = GEN_INT (INTVAL (operands[2]) >> 2);
7661 return "vextract<shuffletype>32x4\t{%2, %1, %0<mask_operand6>|%0<mask_operand6>, %1, %2}";
7662 }
7663 [(set_attr "type" "sselog1")
7664 (set_attr "prefix_extra" "1")
7665 (set_attr "length_immediate" "1")
7666 (set_attr "prefix" "evex")
7667 (set_attr "mode" "<sseinsnmode>")])
7668
7669 (define_split
7670 [(set (match_operand:<ssequartermode> 0 "nonimmediate_operand")
7671 (vec_select:<ssequartermode>
7672 (match_operand:V16FI 1 "register_operand")
7673 (parallel [(const_int 0) (const_int 1)
7674 (const_int 2) (const_int 3)])))]
7675 "TARGET_AVX512F
7676 && reload_completed
7677 && (TARGET_AVX512VL
7678 || REG_P (operands[0])
7679 || !EXT_REX_SSE_REG_P (operands[1]))"
7680 [(set (match_dup 0) (match_dup 1))]
7681 {
7682 if (!TARGET_AVX512VL
7683 && REG_P (operands[0])
7684 && EXT_REX_SSE_REG_P (operands[1]))
7685 operands[0]
7686 = lowpart_subreg (<MODE>mode, operands[0], <ssequartermode>mode);
7687 else
7688 operands[1] = gen_lowpart (<ssequartermode>mode, operands[1]);
7689 })
7690
7691 (define_mode_attr extract_type_2
7692 [(V16SF "avx512dq") (V16SI "avx512dq") (V8DF "avx512f") (V8DI "avx512f")])
7693
7694 (define_mode_attr extract_suf_2
7695 [(V16SF "32x8") (V16SI "32x8") (V8DF "64x4") (V8DI "64x4")])
7696
7697 (define_mode_iterator AVX512_VEC_2
7698 [(V16SF "TARGET_AVX512DQ") (V16SI "TARGET_AVX512DQ") V8DF V8DI])
7699
7700 (define_expand "<extract_type_2>_vextract<shuffletype><extract_suf_2>_mask"
7701 [(match_operand:<ssehalfvecmode> 0 "nonimmediate_operand")
7702 (match_operand:AVX512_VEC_2 1 "register_operand")
7703 (match_operand:SI 2 "const_0_to_1_operand")
7704 (match_operand:<ssehalfvecmode> 3 "nonimmediate_operand")
7705 (match_operand:QI 4 "register_operand")]
7706 "TARGET_AVX512F"
7707 {
7708 rtx (*insn)(rtx, rtx, rtx, rtx);
7709 rtx dest = operands[0];
7710
7711 if (MEM_P (dest) && !rtx_equal_p (dest, operands[3]))
7712 dest = gen_reg_rtx (<ssehalfvecmode>mode);
7713
7714 switch (INTVAL (operands[2]))
7715 {
7716 case 0:
7717 insn = gen_vec_extract_lo_<mode>_mask;
7718 break;
7719 case 1:
7720 insn = gen_vec_extract_hi_<mode>_mask;
7721 break;
7722 default:
7723 gcc_unreachable ();
7724 }
7725
7726 emit_insn (insn (dest, operands[1], operands[3], operands[4]));
7727 if (dest != operands[0])
7728 emit_move_insn (operands[0], dest);
7729 DONE;
7730 })
7731
7732 (define_split
7733 [(set (match_operand:<ssehalfvecmode> 0 "nonimmediate_operand")
7734 (vec_select:<ssehalfvecmode>
7735 (match_operand:V8FI 1 "nonimmediate_operand")
7736 (parallel [(const_int 0) (const_int 1)
7737 (const_int 2) (const_int 3)])))]
7738 "TARGET_AVX512F && !(MEM_P (operands[0]) && MEM_P (operands[1]))
7739 && reload_completed
7740 && (TARGET_AVX512VL
7741 || (REG_P (operands[0]) && !EXT_REX_SSE_REG_P (operands[1])))"
7742 [(set (match_dup 0) (match_dup 1))]
7743 "operands[1] = gen_lowpart (<ssehalfvecmode>mode, operands[1]);")
7744
7745 (define_insn "vec_extract_lo_<mode>_maskm"
7746 [(set (match_operand:<ssehalfvecmode> 0 "memory_operand" "=m")
7747 (vec_merge:<ssehalfvecmode>
7748 (vec_select:<ssehalfvecmode>
7749 (match_operand:V8FI 1 "register_operand" "v")
7750 (parallel [(const_int 0) (const_int 1)
7751 (const_int 2) (const_int 3)]))
7752 (match_operand:<ssehalfvecmode> 2 "memory_operand" "0")
7753 (match_operand:QI 3 "register_operand" "Yk")))]
7754 "TARGET_AVX512F
7755 && rtx_equal_p (operands[2], operands[0])"
7756 "vextract<shuffletype>64x4\t{$0x0, %1, %0%{%3%}|%0%{%3%}, %1, 0x0}"
7757 [(set_attr "type" "sselog1")
7758 (set_attr "prefix_extra" "1")
7759 (set_attr "length_immediate" "1")
7760 (set_attr "prefix" "evex")
7761 (set_attr "mode" "<sseinsnmode>")])
7762
7763 (define_insn "vec_extract_lo_<mode><mask_name>"
7764 [(set (match_operand:<ssehalfvecmode> 0 "<store_mask_predicate>" "=v,<store_mask_constraint>,v")
7765 (vec_select:<ssehalfvecmode>
7766 (match_operand:V8FI 1 "<store_mask_predicate>" "v,v,<store_mask_constraint>")
7767 (parallel [(const_int 0) (const_int 1)
7768 (const_int 2) (const_int 3)])))]
7769 "TARGET_AVX512F
7770 && (<mask_applied> || !(MEM_P (operands[0]) && MEM_P (operands[1])))"
7771 {
7772 if (<mask_applied> || (!TARGET_AVX512VL && !MEM_P (operands[1])))
7773 return "vextract<shuffletype>64x4\t{$0x0, %1, %0<mask_operand2>|%0<mask_operand2>, %1, 0x0}";
7774 else
7775 return "#";
7776 }
7777 [(set_attr "type" "sselog1")
7778 (set_attr "prefix_extra" "1")
7779 (set_attr "length_immediate" "1")
7780 (set_attr "memory" "none,store,load")
7781 (set_attr "prefix" "evex")
7782 (set_attr "mode" "<sseinsnmode>")])
7783
7784 (define_insn "vec_extract_hi_<mode>_maskm"
7785 [(set (match_operand:<ssehalfvecmode> 0 "memory_operand" "=m")
7786 (vec_merge:<ssehalfvecmode>
7787 (vec_select:<ssehalfvecmode>
7788 (match_operand:V8FI 1 "register_operand" "v")
7789 (parallel [(const_int 4) (const_int 5)
7790 (const_int 6) (const_int 7)]))
7791 (match_operand:<ssehalfvecmode> 2 "memory_operand" "0")
7792 (match_operand:QI 3 "register_operand" "Yk")))]
7793 "TARGET_AVX512F
7794 && rtx_equal_p (operands[2], operands[0])"
7795 "vextract<shuffletype>64x4\t{$0x1, %1, %0%{%3%}|%0%{%3%}, %1, 0x1}"
7796 [(set_attr "type" "sselog")
7797 (set_attr "prefix_extra" "1")
7798 (set_attr "length_immediate" "1")
7799 (set_attr "memory" "store")
7800 (set_attr "prefix" "evex")
7801 (set_attr "mode" "<sseinsnmode>")])
7802
7803 (define_insn "vec_extract_hi_<mode><mask_name>"
7804 [(set (match_operand:<ssehalfvecmode> 0 "<store_mask_predicate>" "=<store_mask_constraint>")
7805 (vec_select:<ssehalfvecmode>
7806 (match_operand:V8FI 1 "register_operand" "v")
7807 (parallel [(const_int 4) (const_int 5)
7808 (const_int 6) (const_int 7)])))]
7809 "TARGET_AVX512F"
7810 "vextract<shuffletype>64x4\t{$0x1, %1, %0<mask_operand2>|%0<mask_operand2>, %1, 0x1}"
7811 [(set_attr "type" "sselog1")
7812 (set_attr "prefix_extra" "1")
7813 (set_attr "length_immediate" "1")
7814 (set_attr "prefix" "evex")
7815 (set_attr "mode" "<sseinsnmode>")])
7816
7817 (define_insn "vec_extract_hi_<mode>_maskm"
7818 [(set (match_operand:<ssehalfvecmode> 0 "memory_operand" "=m")
7819 (vec_merge:<ssehalfvecmode>
7820 (vec_select:<ssehalfvecmode>
7821 (match_operand:V16FI 1 "register_operand" "v")
7822 (parallel [(const_int 8) (const_int 9)
7823 (const_int 10) (const_int 11)
7824 (const_int 12) (const_int 13)
7825 (const_int 14) (const_int 15)]))
7826 (match_operand:<ssehalfvecmode> 2 "memory_operand" "0")
7827 (match_operand:QI 3 "register_operand" "Yk")))]
7828 "TARGET_AVX512DQ
7829 && rtx_equal_p (operands[2], operands[0])"
7830 "vextract<shuffletype>32x8\t{$0x1, %1, %0%{%3%}|%0%{%3%}, %1, 0x1}"
7831 [(set_attr "type" "sselog1")
7832 (set_attr "prefix_extra" "1")
7833 (set_attr "length_immediate" "1")
7834 (set_attr "prefix" "evex")
7835 (set_attr "mode" "<sseinsnmode>")])
7836
7837 (define_insn "vec_extract_hi_<mode><mask_name>"
7838 [(set (match_operand:<ssehalfvecmode> 0 "<store_mask_predicate>" "=<store_mask_constraint>,vm")
7839 (vec_select:<ssehalfvecmode>
7840 (match_operand:V16FI 1 "register_operand" "v,v")
7841 (parallel [(const_int 8) (const_int 9)
7842 (const_int 10) (const_int 11)
7843 (const_int 12) (const_int 13)
7844 (const_int 14) (const_int 15)])))]
7845 "TARGET_AVX512F && <mask_avx512dq_condition>"
7846 "@
7847 vextract<shuffletype>32x8\t{$0x1, %1, %0<mask_operand2>|%0<mask_operand2>, %1, 0x1}
7848 vextracti64x4\t{$0x1, %1, %0|%0, %1, 0x1}"
7849 [(set_attr "type" "sselog1")
7850 (set_attr "prefix_extra" "1")
7851 (set_attr "isa" "avx512dq,noavx512dq")
7852 (set_attr "length_immediate" "1")
7853 (set_attr "prefix" "evex")
7854 (set_attr "mode" "<sseinsnmode>")])
7855
7856 (define_expand "avx512vl_vextractf128<mode>"
7857 [(match_operand:<ssehalfvecmode> 0 "nonimmediate_operand")
7858 (match_operand:VI48F_256 1 "register_operand")
7859 (match_operand:SI 2 "const_0_to_1_operand")
7860 (match_operand:<ssehalfvecmode> 3 "nonimm_or_0_operand")
7861 (match_operand:QI 4 "register_operand")]
7862 "TARGET_AVX512DQ && TARGET_AVX512VL"
7863 {
7864 rtx (*insn)(rtx, rtx, rtx, rtx);
7865 rtx dest = operands[0];
7866
7867 if (MEM_P (dest)
7868 && (GET_MODE_SIZE (GET_MODE_INNER (<MODE>mode)) == 4
7869 /* For V8S[IF]mode there are maskm insns with =m and 0
7870 constraints. */
7871 ? !rtx_equal_p (dest, operands[3])
7872 /* For V4D[IF]mode, hi insns don't allow memory, and
7873 lo insns have =m and 0C constraints. */
7874 : (operands[2] != const0_rtx
7875 || (!rtx_equal_p (dest, operands[3])
7876 && GET_CODE (operands[3]) != CONST_VECTOR))))
7877 dest = gen_reg_rtx (<ssehalfvecmode>mode);
7878 switch (INTVAL (operands[2]))
7879 {
7880 case 0:
7881 insn = gen_vec_extract_lo_<mode>_mask;
7882 break;
7883 case 1:
7884 insn = gen_vec_extract_hi_<mode>_mask;
7885 break;
7886 default:
7887 gcc_unreachable ();
7888 }
7889
7890 emit_insn (insn (dest, operands[1], operands[3], operands[4]));
7891 if (dest != operands[0])
7892 emit_move_insn (operands[0], dest);
7893 DONE;
7894 })
7895
7896 (define_expand "avx_vextractf128<mode>"
7897 [(match_operand:<ssehalfvecmode> 0 "nonimmediate_operand")
7898 (match_operand:V_256 1 "register_operand")
7899 (match_operand:SI 2 "const_0_to_1_operand")]
7900 "TARGET_AVX"
7901 {
7902 rtx (*insn)(rtx, rtx);
7903
7904 switch (INTVAL (operands[2]))
7905 {
7906 case 0:
7907 insn = gen_vec_extract_lo_<mode>;
7908 break;
7909 case 1:
7910 insn = gen_vec_extract_hi_<mode>;
7911 break;
7912 default:
7913 gcc_unreachable ();
7914 }
7915
7916 emit_insn (insn (operands[0], operands[1]));
7917 DONE;
7918 })
7919
7920 (define_insn "vec_extract_lo_<mode><mask_name>"
7921 [(set (match_operand:<ssehalfvecmode> 0 "nonimmediate_operand" "=v,v,m")
7922 (vec_select:<ssehalfvecmode>
7923 (match_operand:V16FI 1 "<store_mask_predicate>"
7924 "v,<store_mask_constraint>,v")
7925 (parallel [(const_int 0) (const_int 1)
7926 (const_int 2) (const_int 3)
7927 (const_int 4) (const_int 5)
7928 (const_int 6) (const_int 7)])))]
7929 "TARGET_AVX512F
7930 && <mask_mode512bit_condition>
7931 && (<mask_applied> || !(MEM_P (operands[0]) && MEM_P (operands[1])))"
7932 {
7933 if (<mask_applied>
7934 || (!TARGET_AVX512VL
7935 && !REG_P (operands[0])
7936 && EXT_REX_SSE_REG_P (operands[1])))
7937 return "vextract<shuffletype>32x8\t{$0x0, %1, %0<mask_operand2>|%0<mask_operand2>, %1, 0x0}";
7938 else
7939 return "#";
7940 }
7941 [(set_attr "type" "sselog1")
7942 (set_attr "prefix_extra" "1")
7943 (set_attr "length_immediate" "1")
7944 (set_attr "memory" "none,load,store")
7945 (set_attr "prefix" "evex")
7946 (set_attr "mode" "<sseinsnmode>")])
7947
7948 (define_split
7949 [(set (match_operand:<ssehalfvecmode> 0 "nonimmediate_operand")
7950 (vec_select:<ssehalfvecmode>
7951 (match_operand:V16FI 1 "nonimmediate_operand")
7952 (parallel [(const_int 0) (const_int 1)
7953 (const_int 2) (const_int 3)
7954 (const_int 4) (const_int 5)
7955 (const_int 6) (const_int 7)])))]
7956 "TARGET_AVX512F && !(MEM_P (operands[0]) && MEM_P (operands[1]))
7957 && reload_completed
7958 && (TARGET_AVX512VL
7959 || REG_P (operands[0])
7960 || !EXT_REX_SSE_REG_P (operands[1]))"
7961 [(set (match_dup 0) (match_dup 1))]
7962 {
7963 if (!TARGET_AVX512VL
7964 && REG_P (operands[0])
7965 && EXT_REX_SSE_REG_P (operands[1]))
7966 operands[0]
7967 = lowpart_subreg (<MODE>mode, operands[0], <ssehalfvecmode>mode);
7968 else
7969 operands[1] = gen_lowpart (<ssehalfvecmode>mode, operands[1]);
7970 })
7971
7972 (define_insn "vec_extract_lo_<mode><mask_name>"
7973 [(set (match_operand:<ssehalfvecmode> 0 "<store_mask_predicate>" "=v,v,m")
7974 (vec_select:<ssehalfvecmode>
7975 (match_operand:VI8F_256 1 "<store_mask_predicate>"
7976 "v,<store_mask_constraint>,v")
7977 (parallel [(const_int 0) (const_int 1)])))]
7978 "TARGET_AVX
7979 && <mask_avx512vl_condition> && <mask_avx512dq_condition>
7980 && (<mask_applied> || !(MEM_P (operands[0]) && MEM_P (operands[1])))"
7981 {
7982 if (<mask_applied>)
7983 return "vextract<shuffletype>64x2\t{$0x0, %1, %0%{%3%}|%0%{%3%}, %1, 0x0}";
7984 else
7985 return "#";
7986 }
7987 [(set_attr "type" "sselog1")
7988 (set_attr "prefix_extra" "1")
7989 (set_attr "length_immediate" "1")
7990 (set_attr "memory" "none,load,store")
7991 (set_attr "prefix" "evex")
7992 (set_attr "mode" "XI")])
7993
7994 (define_split
7995 [(set (match_operand:<ssehalfvecmode> 0 "nonimmediate_operand")
7996 (vec_select:<ssehalfvecmode>
7997 (match_operand:VI8F_256 1 "nonimmediate_operand")
7998 (parallel [(const_int 0) (const_int 1)])))]
7999 "TARGET_AVX && !(MEM_P (operands[0]) && MEM_P (operands[1]))
8000 && reload_completed"
8001 [(set (match_dup 0) (match_dup 1))]
8002 "operands[1] = gen_lowpart (<ssehalfvecmode>mode, operands[1]);")
8003
8004 (define_insn "vec_extract_hi_<mode><mask_name>"
8005 [(set (match_operand:<ssehalfvecmode> 0 "<store_mask_predicate>" "=v,<store_mask_constraint>")
8006 (vec_select:<ssehalfvecmode>
8007 (match_operand:VI8F_256 1 "register_operand" "v,v")
8008 (parallel [(const_int 2) (const_int 3)])))]
8009 "TARGET_AVX && <mask_avx512vl_condition> && <mask_avx512dq_condition>"
8010 {
8011 if (TARGET_AVX512VL)
8012 {
8013 if (TARGET_AVX512DQ)
8014 return "vextract<shuffletype>64x2\t{$0x1, %1, %0<mask_operand2>|%0<mask_operand2>, %1, 0x1}";
8015 else
8016 return "vextract<shuffletype>32x4\t{$0x1, %1, %0|%0, %1, 0x1}";
8017 }
8018 else
8019 return "vextract<i128>\t{$0x1, %1, %0|%0, %1, 0x1}";
8020 }
8021 [(set_attr "type" "sselog1")
8022 (set_attr "prefix_extra" "1")
8023 (set_attr "length_immediate" "1")
8024 (set_attr "prefix" "vex")
8025 (set_attr "mode" "<sseinsnmode>")])
8026
8027 (define_split
8028 [(set (match_operand:<ssehalfvecmode> 0 "nonimmediate_operand")
8029 (vec_select:<ssehalfvecmode>
8030 (match_operand:VI4F_256 1 "nonimmediate_operand")
8031 (parallel [(const_int 0) (const_int 1)
8032 (const_int 2) (const_int 3)])))]
8033 "TARGET_AVX && !(MEM_P (operands[0]) && MEM_P (operands[1]))
8034 && reload_completed"
8035 [(set (match_dup 0) (match_dup 1))]
8036 "operands[1] = gen_lowpart (<ssehalfvecmode>mode, operands[1]);")
8037
8038 (define_insn "vec_extract_lo_<mode><mask_name>"
8039 [(set (match_operand:<ssehalfvecmode> 0 "<store_mask_predicate>"
8040 "=<store_mask_constraint>,v")
8041 (vec_select:<ssehalfvecmode>
8042 (match_operand:VI4F_256 1 "<store_mask_predicate>"
8043 "v,<store_mask_constraint>")
8044 (parallel [(const_int 0) (const_int 1)
8045 (const_int 2) (const_int 3)])))]
8046 "TARGET_AVX
8047 && <mask_avx512vl_condition> && <mask_avx512dq_condition>
8048 && (<mask_applied> || !(MEM_P (operands[0]) && MEM_P (operands[1])))"
8049 {
8050 if (<mask_applied>)
8051 return "vextract<shuffletype>32x4\t{$0x0, %1, %0<mask_operand2>|%0<mask_operand2>, %1, 0x0}";
8052 else
8053 return "#";
8054 }
8055 [(set_attr "type" "sselog1")
8056 (set_attr "prefix_extra" "1")
8057 (set_attr "length_immediate" "1")
8058 (set_attr "prefix" "evex")
8059 (set_attr "mode" "<sseinsnmode>")])
8060
8061 (define_insn "vec_extract_lo_<mode>_maskm"
8062 [(set (match_operand:<ssehalfvecmode> 0 "memory_operand" "=m")
8063 (vec_merge:<ssehalfvecmode>
8064 (vec_select:<ssehalfvecmode>
8065 (match_operand:VI4F_256 1 "register_operand" "v")
8066 (parallel [(const_int 0) (const_int 1)
8067 (const_int 2) (const_int 3)]))
8068 (match_operand:<ssehalfvecmode> 2 "memory_operand" "0")
8069 (match_operand:QI 3 "register_operand" "Yk")))]
8070 "TARGET_AVX512VL && TARGET_AVX512F
8071 && rtx_equal_p (operands[2], operands[0])"
8072 "vextract<shuffletype>32x4\t{$0x0, %1, %0%{%3%}|%0%{%3%}, %1, 0x0}"
8073 [(set_attr "type" "sselog1")
8074 (set_attr "prefix_extra" "1")
8075 (set_attr "length_immediate" "1")
8076 (set_attr "prefix" "evex")
8077 (set_attr "mode" "<sseinsnmode>")])
8078
8079 (define_insn "vec_extract_hi_<mode>_maskm"
8080 [(set (match_operand:<ssehalfvecmode> 0 "memory_operand" "=m")
8081 (vec_merge:<ssehalfvecmode>
8082 (vec_select:<ssehalfvecmode>
8083 (match_operand:VI4F_256 1 "register_operand" "v")
8084 (parallel [(const_int 4) (const_int 5)
8085 (const_int 6) (const_int 7)]))
8086 (match_operand:<ssehalfvecmode> 2 "memory_operand" "0")
8087 (match_operand:<ssehalfvecmode> 3 "register_operand" "Yk")))]
8088 "TARGET_AVX512F && TARGET_AVX512VL
8089 && rtx_equal_p (operands[2], operands[0])"
8090 "vextract<shuffletype>32x4\t{$0x1, %1, %0%{%3%}|%0%{%3%}, %1, 0x1}"
8091 [(set_attr "type" "sselog1")
8092 (set_attr "length_immediate" "1")
8093 (set_attr "prefix" "evex")
8094 (set_attr "mode" "<sseinsnmode>")])
8095
8096 (define_insn "vec_extract_hi_<mode>_mask"
8097 [(set (match_operand:<ssehalfvecmode> 0 "register_operand" "=v")
8098 (vec_merge:<ssehalfvecmode>
8099 (vec_select:<ssehalfvecmode>
8100 (match_operand:VI4F_256 1 "register_operand" "v")
8101 (parallel [(const_int 4) (const_int 5)
8102 (const_int 6) (const_int 7)]))
8103 (match_operand:<ssehalfvecmode> 2 "nonimm_or_0_operand" "0C")
8104 (match_operand:<avx512fmaskmode> 3 "register_operand" "Yk")))]
8105 "TARGET_AVX512VL"
8106 "vextract<shuffletype>32x4\t{$0x1, %1, %0%{%3%}%N2|%0%{%3%}%N2, %1, 0x1}"
8107 [(set_attr "type" "sselog1")
8108 (set_attr "length_immediate" "1")
8109 (set_attr "prefix" "evex")
8110 (set_attr "mode" "<sseinsnmode>")])
8111
8112 (define_insn "vec_extract_hi_<mode>"
8113 [(set (match_operand:<ssehalfvecmode> 0 "nonimmediate_operand" "=xm, vm")
8114 (vec_select:<ssehalfvecmode>
8115 (match_operand:VI4F_256 1 "register_operand" "x, v")
8116 (parallel [(const_int 4) (const_int 5)
8117 (const_int 6) (const_int 7)])))]
8118 "TARGET_AVX"
8119 "@
8120 vextract<i128>\t{$0x1, %1, %0|%0, %1, 0x1}
8121 vextract<shuffletype>32x4\t{$0x1, %1, %0|%0, %1, 0x1}"
8122 [(set_attr "isa" "*, avx512vl")
8123 (set_attr "prefix" "vex, evex")
8124 (set_attr "type" "sselog1")
8125 (set_attr "length_immediate" "1")
8126 (set_attr "mode" "<sseinsnmode>")])
8127
8128 (define_insn_and_split "vec_extract_lo_v32hi"
8129 [(set (match_operand:V16HI 0 "nonimmediate_operand" "=v,v,m")
8130 (vec_select:V16HI
8131 (match_operand:V32HI 1 "nonimmediate_operand" "v,m,v")
8132 (parallel [(const_int 0) (const_int 1)
8133 (const_int 2) (const_int 3)
8134 (const_int 4) (const_int 5)
8135 (const_int 6) (const_int 7)
8136 (const_int 8) (const_int 9)
8137 (const_int 10) (const_int 11)
8138 (const_int 12) (const_int 13)
8139 (const_int 14) (const_int 15)])))]
8140 "TARGET_AVX512F && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
8141 {
8142 if (TARGET_AVX512VL
8143 || REG_P (operands[0])
8144 || !EXT_REX_SSE_REG_P (operands[1]))
8145 return "#";
8146 else
8147 return "vextracti64x4\t{$0x0, %1, %0|%0, %1, 0x0}";
8148 }
8149 "&& reload_completed
8150 && (TARGET_AVX512VL
8151 || REG_P (operands[0])
8152 || !EXT_REX_SSE_REG_P (operands[1]))"
8153 [(set (match_dup 0) (match_dup 1))]
8154 {
8155 if (!TARGET_AVX512VL
8156 && REG_P (operands[0])
8157 && EXT_REX_SSE_REG_P (operands[1]))
8158 operands[0] = lowpart_subreg (V32HImode, operands[0], V16HImode);
8159 else
8160 operands[1] = gen_lowpart (V16HImode, operands[1]);
8161 }
8162 [(set_attr "type" "sselog1")
8163 (set_attr "prefix_extra" "1")
8164 (set_attr "length_immediate" "1")
8165 (set_attr "memory" "none,load,store")
8166 (set_attr "prefix" "evex")
8167 (set_attr "mode" "XI")])
8168
8169 (define_insn "vec_extract_hi_v32hi"
8170 [(set (match_operand:V16HI 0 "nonimmediate_operand" "=vm")
8171 (vec_select:V16HI
8172 (match_operand:V32HI 1 "register_operand" "v")
8173 (parallel [(const_int 16) (const_int 17)
8174 (const_int 18) (const_int 19)
8175 (const_int 20) (const_int 21)
8176 (const_int 22) (const_int 23)
8177 (const_int 24) (const_int 25)
8178 (const_int 26) (const_int 27)
8179 (const_int 28) (const_int 29)
8180 (const_int 30) (const_int 31)])))]
8181 "TARGET_AVX512F"
8182 "vextracti64x4\t{$0x1, %1, %0|%0, %1, 0x1}"
8183 [(set_attr "type" "sselog1")
8184 (set_attr "prefix_extra" "1")
8185 (set_attr "length_immediate" "1")
8186 (set_attr "prefix" "evex")
8187 (set_attr "mode" "XI")])
8188
8189 (define_insn_and_split "vec_extract_lo_v16hi"
8190 [(set (match_operand:V8HI 0 "nonimmediate_operand" "=v,m")
8191 (vec_select:V8HI
8192 (match_operand:V16HI 1 "nonimmediate_operand" "vm,v")
8193 (parallel [(const_int 0) (const_int 1)
8194 (const_int 2) (const_int 3)
8195 (const_int 4) (const_int 5)
8196 (const_int 6) (const_int 7)])))]
8197 "TARGET_AVX && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
8198 "#"
8199 "&& reload_completed"
8200 [(set (match_dup 0) (match_dup 1))]
8201 "operands[1] = gen_lowpart (V8HImode, operands[1]);")
8202
8203 (define_insn "vec_extract_hi_v16hi"
8204 [(set (match_operand:V8HI 0 "nonimmediate_operand" "=xm,vm,vm")
8205 (vec_select:V8HI
8206 (match_operand:V16HI 1 "register_operand" "x,v,v")
8207 (parallel [(const_int 8) (const_int 9)
8208 (const_int 10) (const_int 11)
8209 (const_int 12) (const_int 13)
8210 (const_int 14) (const_int 15)])))]
8211 "TARGET_AVX"
8212 "@
8213 vextract%~128\t{$0x1, %1, %0|%0, %1, 0x1}
8214 vextracti32x4\t{$0x1, %1, %0|%0, %1, 0x1}
8215 vextracti32x4\t{$0x1, %g1, %0|%0, %g1, 0x1}"
8216 [(set_attr "type" "sselog1")
8217 (set_attr "prefix_extra" "1")
8218 (set_attr "length_immediate" "1")
8219 (set_attr "isa" "*,avx512dq,avx512f")
8220 (set_attr "prefix" "vex,evex,evex")
8221 (set_attr "mode" "OI")])
8222
8223 (define_insn_and_split "vec_extract_lo_v64qi"
8224 [(set (match_operand:V32QI 0 "nonimmediate_operand" "=v,v,m")
8225 (vec_select:V32QI
8226 (match_operand:V64QI 1 "nonimmediate_operand" "v,m,v")
8227 (parallel [(const_int 0) (const_int 1)
8228 (const_int 2) (const_int 3)
8229 (const_int 4) (const_int 5)
8230 (const_int 6) (const_int 7)
8231 (const_int 8) (const_int 9)
8232 (const_int 10) (const_int 11)
8233 (const_int 12) (const_int 13)
8234 (const_int 14) (const_int 15)
8235 (const_int 16) (const_int 17)
8236 (const_int 18) (const_int 19)
8237 (const_int 20) (const_int 21)
8238 (const_int 22) (const_int 23)
8239 (const_int 24) (const_int 25)
8240 (const_int 26) (const_int 27)
8241 (const_int 28) (const_int 29)
8242 (const_int 30) (const_int 31)])))]
8243 "TARGET_AVX512F && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
8244 {
8245 if (TARGET_AVX512VL
8246 || REG_P (operands[0])
8247 || !EXT_REX_SSE_REG_P (operands[1]))
8248 return "#";
8249 else
8250 return "vextracti64x4\t{$0x0, %1, %0|%0, %1, 0x0}";
8251 }
8252 "&& reload_completed
8253 && (TARGET_AVX512VL
8254 || REG_P (operands[0])
8255 || !EXT_REX_SSE_REG_P (operands[1]))"
8256 [(set (match_dup 0) (match_dup 1))]
8257 {
8258 if (!TARGET_AVX512VL
8259 && REG_P (operands[0])
8260 && EXT_REX_SSE_REG_P (operands[1]))
8261 operands[0] = lowpart_subreg (V64QImode, operands[0], V32QImode);
8262 else
8263 operands[1] = gen_lowpart (V32QImode, operands[1]);
8264 }
8265 [(set_attr "type" "sselog1")
8266 (set_attr "prefix_extra" "1")
8267 (set_attr "length_immediate" "1")
8268 (set_attr "memory" "none,load,store")
8269 (set_attr "prefix" "evex")
8270 (set_attr "mode" "XI")])
8271
8272 (define_insn "vec_extract_hi_v64qi"
8273 [(set (match_operand:V32QI 0 "nonimmediate_operand" "=vm")
8274 (vec_select:V32QI
8275 (match_operand:V64QI 1 "register_operand" "v")
8276 (parallel [(const_int 32) (const_int 33)
8277 (const_int 34) (const_int 35)
8278 (const_int 36) (const_int 37)
8279 (const_int 38) (const_int 39)
8280 (const_int 40) (const_int 41)
8281 (const_int 42) (const_int 43)
8282 (const_int 44) (const_int 45)
8283 (const_int 46) (const_int 47)
8284 (const_int 48) (const_int 49)
8285 (const_int 50) (const_int 51)
8286 (const_int 52) (const_int 53)
8287 (const_int 54) (const_int 55)
8288 (const_int 56) (const_int 57)
8289 (const_int 58) (const_int 59)
8290 (const_int 60) (const_int 61)
8291 (const_int 62) (const_int 63)])))]
8292 "TARGET_AVX512F"
8293 "vextracti64x4\t{$0x1, %1, %0|%0, %1, 0x1}"
8294 [(set_attr "type" "sselog1")
8295 (set_attr "prefix_extra" "1")
8296 (set_attr "length_immediate" "1")
8297 (set_attr "prefix" "evex")
8298 (set_attr "mode" "XI")])
8299
8300 (define_insn_and_split "vec_extract_lo_v32qi"
8301 [(set (match_operand:V16QI 0 "nonimmediate_operand" "=v,m")
8302 (vec_select:V16QI
8303 (match_operand:V32QI 1 "nonimmediate_operand" "vm,v")
8304 (parallel [(const_int 0) (const_int 1)
8305 (const_int 2) (const_int 3)
8306 (const_int 4) (const_int 5)
8307 (const_int 6) (const_int 7)
8308 (const_int 8) (const_int 9)
8309 (const_int 10) (const_int 11)
8310 (const_int 12) (const_int 13)
8311 (const_int 14) (const_int 15)])))]
8312 "TARGET_AVX && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
8313 "#"
8314 "&& reload_completed"
8315 [(set (match_dup 0) (match_dup 1))]
8316 "operands[1] = gen_lowpart (V16QImode, operands[1]);")
8317
8318 (define_insn "vec_extract_hi_v32qi"
8319 [(set (match_operand:V16QI 0 "nonimmediate_operand" "=xm,vm,vm")
8320 (vec_select:V16QI
8321 (match_operand:V32QI 1 "register_operand" "x,v,v")
8322 (parallel [(const_int 16) (const_int 17)
8323 (const_int 18) (const_int 19)
8324 (const_int 20) (const_int 21)
8325 (const_int 22) (const_int 23)
8326 (const_int 24) (const_int 25)
8327 (const_int 26) (const_int 27)
8328 (const_int 28) (const_int 29)
8329 (const_int 30) (const_int 31)])))]
8330 "TARGET_AVX"
8331 "@
8332 vextract%~128\t{$0x1, %1, %0|%0, %1, 0x1}
8333 vextracti32x4\t{$0x1, %1, %0|%0, %1, 0x1}
8334 vextracti32x4\t{$0x1, %g1, %0|%0, %g1, 0x1}"
8335 [(set_attr "type" "sselog1")
8336 (set_attr "prefix_extra" "1")
8337 (set_attr "length_immediate" "1")
8338 (set_attr "isa" "*,avx512dq,avx512f")
8339 (set_attr "prefix" "vex,evex,evex")
8340 (set_attr "mode" "OI")])
8341
8342 ;; Modes handled by vec_extract patterns.
8343 (define_mode_iterator VEC_EXTRACT_MODE
8344 [(V64QI "TARGET_AVX512BW") (V32QI "TARGET_AVX") V16QI
8345 (V32HI "TARGET_AVX512BW") (V16HI "TARGET_AVX") V8HI
8346 (V16SI "TARGET_AVX512F") (V8SI "TARGET_AVX") V4SI
8347 (V8DI "TARGET_AVX512F") (V4DI "TARGET_AVX") V2DI
8348 (V16SF "TARGET_AVX512F") (V8SF "TARGET_AVX") V4SF
8349 (V8DF "TARGET_AVX512F") (V4DF "TARGET_AVX") V2DF
8350 (V4TI "TARGET_AVX512F") (V2TI "TARGET_AVX")])
8351
8352 (define_expand "vec_extract<mode><ssescalarmodelower>"
8353 [(match_operand:<ssescalarmode> 0 "register_operand")
8354 (match_operand:VEC_EXTRACT_MODE 1 "register_operand")
8355 (match_operand 2 "const_int_operand")]
8356 "TARGET_SSE"
8357 {
8358 ix86_expand_vector_extract (false, operands[0], operands[1],
8359 INTVAL (operands[2]));
8360 DONE;
8361 })
8362
8363 (define_expand "vec_extract<mode><ssehalfvecmodelower>"
8364 [(match_operand:<ssehalfvecmode> 0 "nonimmediate_operand")
8365 (match_operand:V_512 1 "register_operand")
8366 (match_operand 2 "const_0_to_1_operand")]
8367 "TARGET_AVX512F"
8368 {
8369 if (INTVAL (operands[2]))
8370 emit_insn (gen_vec_extract_hi_<mode> (operands[0], operands[1]));
8371 else
8372 emit_insn (gen_vec_extract_lo_<mode> (operands[0], operands[1]));
8373 DONE;
8374 })
8375
8376 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
8377 ;;
8378 ;; Parallel double-precision floating point element swizzling
8379 ;;
8380 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
8381
8382 (define_insn "<mask_codefor>avx512f_unpckhpd512<mask_name>"
8383 [(set (match_operand:V8DF 0 "register_operand" "=v")
8384 (vec_select:V8DF
8385 (vec_concat:V16DF
8386 (match_operand:V8DF 1 "register_operand" "v")
8387 (match_operand:V8DF 2 "nonimmediate_operand" "vm"))
8388 (parallel [(const_int 1) (const_int 9)
8389 (const_int 3) (const_int 11)
8390 (const_int 5) (const_int 13)
8391 (const_int 7) (const_int 15)])))]
8392 "TARGET_AVX512F"
8393 "vunpckhpd\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
8394 [(set_attr "type" "sselog")
8395 (set_attr "prefix" "evex")
8396 (set_attr "mode" "V8DF")])
8397
8398 ;; Recall that the 256-bit unpck insns only shuffle within their lanes.
8399 (define_insn "avx_unpckhpd256<mask_name>"
8400 [(set (match_operand:V4DF 0 "register_operand" "=v")
8401 (vec_select:V4DF
8402 (vec_concat:V8DF
8403 (match_operand:V4DF 1 "register_operand" "v")
8404 (match_operand:V4DF 2 "nonimmediate_operand" "vm"))
8405 (parallel [(const_int 1) (const_int 5)
8406 (const_int 3) (const_int 7)])))]
8407 "TARGET_AVX && <mask_avx512vl_condition>"
8408 "vunpckhpd\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
8409 [(set_attr "type" "sselog")
8410 (set_attr "prefix" "vex")
8411 (set_attr "mode" "V4DF")])
8412
8413 (define_expand "vec_interleave_highv4df"
8414 [(set (match_dup 3)
8415 (vec_select:V4DF
8416 (vec_concat:V8DF
8417 (match_operand:V4DF 1 "register_operand")
8418 (match_operand:V4DF 2 "nonimmediate_operand"))
8419 (parallel [(const_int 0) (const_int 4)
8420 (const_int 2) (const_int 6)])))
8421 (set (match_dup 4)
8422 (vec_select:V4DF
8423 (vec_concat:V8DF
8424 (match_dup 1)
8425 (match_dup 2))
8426 (parallel [(const_int 1) (const_int 5)
8427 (const_int 3) (const_int 7)])))
8428 (set (match_operand:V4DF 0 "register_operand")
8429 (vec_select:V4DF
8430 (vec_concat:V8DF
8431 (match_dup 3)
8432 (match_dup 4))
8433 (parallel [(const_int 2) (const_int 3)
8434 (const_int 6) (const_int 7)])))]
8435 "TARGET_AVX"
8436 {
8437 operands[3] = gen_reg_rtx (V4DFmode);
8438 operands[4] = gen_reg_rtx (V4DFmode);
8439 })
8440
8441
8442 (define_insn "avx512vl_unpckhpd128_mask"
8443 [(set (match_operand:V2DF 0 "register_operand" "=v")
8444 (vec_merge:V2DF
8445 (vec_select:V2DF
8446 (vec_concat:V4DF
8447 (match_operand:V2DF 1 "register_operand" "v")
8448 (match_operand:V2DF 2 "nonimmediate_operand" "vm"))
8449 (parallel [(const_int 1) (const_int 3)]))
8450 (match_operand:V2DF 3 "nonimm_or_0_operand" "0C")
8451 (match_operand:QI 4 "register_operand" "Yk")))]
8452 "TARGET_AVX512VL"
8453 "vunpckhpd\t{%2, %1, %0%{%4%}%N3|%0%{%4%}%N3, %1, %2}"
8454 [(set_attr "type" "sselog")
8455 (set_attr "prefix" "evex")
8456 (set_attr "mode" "V2DF")])
8457
8458 (define_expand "vec_interleave_highv2df"
8459 [(set (match_operand:V2DF 0 "register_operand")
8460 (vec_select:V2DF
8461 (vec_concat:V4DF
8462 (match_operand:V2DF 1 "nonimmediate_operand")
8463 (match_operand:V2DF 2 "nonimmediate_operand"))
8464 (parallel [(const_int 1)
8465 (const_int 3)])))]
8466 "TARGET_SSE2"
8467 {
8468 if (!ix86_vec_interleave_v2df_operator_ok (operands, 1))
8469 operands[2] = force_reg (V2DFmode, operands[2]);
8470 })
8471
8472 (define_insn "*vec_interleave_highv2df"
8473 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,v,v,x,v,m")
8474 (vec_select:V2DF
8475 (vec_concat:V4DF
8476 (match_operand:V2DF 1 "nonimmediate_operand" " 0,v,o,o,o,v")
8477 (match_operand:V2DF 2 "nonimmediate_operand" " x,v,1,0,v,0"))
8478 (parallel [(const_int 1)
8479 (const_int 3)])))]
8480 "TARGET_SSE2 && ix86_vec_interleave_v2df_operator_ok (operands, 1)"
8481 "@
8482 unpckhpd\t{%2, %0|%0, %2}
8483 vunpckhpd\t{%2, %1, %0|%0, %1, %2}
8484 %vmovddup\t{%H1, %0|%0, %H1}
8485 movlpd\t{%H1, %0|%0, %H1}
8486 vmovlpd\t{%H1, %2, %0|%0, %2, %H1}
8487 %vmovhpd\t{%1, %0|%q0, %1}"
8488 [(set_attr "isa" "noavx,avx,sse3,noavx,avx,*")
8489 (set_attr "type" "sselog,sselog,sselog,ssemov,ssemov,ssemov")
8490 (set (attr "prefix_data16")
8491 (if_then_else (eq_attr "alternative" "3,5")
8492 (const_string "1")
8493 (const_string "*")))
8494 (set_attr "prefix" "orig,maybe_evex,maybe_vex,orig,maybe_evex,maybe_vex")
8495 (set_attr "mode" "V2DF,V2DF,DF,V1DF,V1DF,V1DF")])
8496
8497 (define_expand "avx512f_movddup512<mask_name>"
8498 [(set (match_operand:V8DF 0 "register_operand")
8499 (vec_select:V8DF
8500 (vec_concat:V16DF
8501 (match_operand:V8DF 1 "nonimmediate_operand")
8502 (match_dup 1))
8503 (parallel [(const_int 0) (const_int 8)
8504 (const_int 2) (const_int 10)
8505 (const_int 4) (const_int 12)
8506 (const_int 6) (const_int 14)])))]
8507 "TARGET_AVX512F")
8508
8509 (define_expand "avx512f_unpcklpd512<mask_name>"
8510 [(set (match_operand:V8DF 0 "register_operand")
8511 (vec_select:V8DF
8512 (vec_concat:V16DF
8513 (match_operand:V8DF 1 "register_operand")
8514 (match_operand:V8DF 2 "nonimmediate_operand"))
8515 (parallel [(const_int 0) (const_int 8)
8516 (const_int 2) (const_int 10)
8517 (const_int 4) (const_int 12)
8518 (const_int 6) (const_int 14)])))]
8519 "TARGET_AVX512F")
8520
8521 (define_insn "*avx512f_unpcklpd512<mask_name>"
8522 [(set (match_operand:V8DF 0 "register_operand" "=v,v")
8523 (vec_select:V8DF
8524 (vec_concat:V16DF
8525 (match_operand:V8DF 1 "nonimmediate_operand" "vm, v")
8526 (match_operand:V8DF 2 "nonimmediate_operand" "1 ,vm"))
8527 (parallel [(const_int 0) (const_int 8)
8528 (const_int 2) (const_int 10)
8529 (const_int 4) (const_int 12)
8530 (const_int 6) (const_int 14)])))]
8531 "TARGET_AVX512F"
8532 "@
8533 vmovddup\t{%1, %0<mask_operand3>|%0<mask_operand3>, %1}
8534 vunpcklpd\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
8535 [(set_attr "type" "sselog")
8536 (set_attr "prefix" "evex")
8537 (set_attr "mode" "V8DF")])
8538
8539 ;; Recall that the 256-bit unpck insns only shuffle within their lanes.
8540 (define_expand "avx_movddup256<mask_name>"
8541 [(set (match_operand:V4DF 0 "register_operand")
8542 (vec_select:V4DF
8543 (vec_concat:V8DF
8544 (match_operand:V4DF 1 "nonimmediate_operand")
8545 (match_dup 1))
8546 (parallel [(const_int 0) (const_int 4)
8547 (const_int 2) (const_int 6)])))]
8548 "TARGET_AVX && <mask_avx512vl_condition>")
8549
8550 (define_expand "avx_unpcklpd256<mask_name>"
8551 [(set (match_operand:V4DF 0 "register_operand")
8552 (vec_select:V4DF
8553 (vec_concat:V8DF
8554 (match_operand:V4DF 1 "register_operand")
8555 (match_operand:V4DF 2 "nonimmediate_operand"))
8556 (parallel [(const_int 0) (const_int 4)
8557 (const_int 2) (const_int 6)])))]
8558 "TARGET_AVX && <mask_avx512vl_condition>")
8559
8560 (define_insn "*avx_unpcklpd256<mask_name>"
8561 [(set (match_operand:V4DF 0 "register_operand" "=v,v")
8562 (vec_select:V4DF
8563 (vec_concat:V8DF
8564 (match_operand:V4DF 1 "nonimmediate_operand" " v,m")
8565 (match_operand:V4DF 2 "nonimmediate_operand" "vm,1"))
8566 (parallel [(const_int 0) (const_int 4)
8567 (const_int 2) (const_int 6)])))]
8568 "TARGET_AVX && <mask_avx512vl_condition>"
8569 "@
8570 vunpcklpd\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}
8571 vmovddup\t{%1, %0<mask_operand3>|%0<mask_operand3>, %1}"
8572 [(set_attr "type" "sselog")
8573 (set_attr "prefix" "vex")
8574 (set_attr "mode" "V4DF")])
8575
8576 (define_expand "vec_interleave_lowv4df"
8577 [(set (match_dup 3)
8578 (vec_select:V4DF
8579 (vec_concat:V8DF
8580 (match_operand:V4DF 1 "register_operand")
8581 (match_operand:V4DF 2 "nonimmediate_operand"))
8582 (parallel [(const_int 0) (const_int 4)
8583 (const_int 2) (const_int 6)])))
8584 (set (match_dup 4)
8585 (vec_select:V4DF
8586 (vec_concat:V8DF
8587 (match_dup 1)
8588 (match_dup 2))
8589 (parallel [(const_int 1) (const_int 5)
8590 (const_int 3) (const_int 7)])))
8591 (set (match_operand:V4DF 0 "register_operand")
8592 (vec_select:V4DF
8593 (vec_concat:V8DF
8594 (match_dup 3)
8595 (match_dup 4))
8596 (parallel [(const_int 0) (const_int 1)
8597 (const_int 4) (const_int 5)])))]
8598 "TARGET_AVX"
8599 {
8600 operands[3] = gen_reg_rtx (V4DFmode);
8601 operands[4] = gen_reg_rtx (V4DFmode);
8602 })
8603
8604 (define_insn "avx512vl_unpcklpd128_mask"
8605 [(set (match_operand:V2DF 0 "register_operand" "=v")
8606 (vec_merge:V2DF
8607 (vec_select:V2DF
8608 (vec_concat:V4DF
8609 (match_operand:V2DF 1 "register_operand" "v")
8610 (match_operand:V2DF 2 "nonimmediate_operand" "vm"))
8611 (parallel [(const_int 0) (const_int 2)]))
8612 (match_operand:V2DF 3 "nonimm_or_0_operand" "0C")
8613 (match_operand:QI 4 "register_operand" "Yk")))]
8614 "TARGET_AVX512VL"
8615 "vunpcklpd\t{%2, %1, %0%{%4%}%N3|%0%{%4%}%N3, %1, %2}"
8616 [(set_attr "type" "sselog")
8617 (set_attr "prefix" "evex")
8618 (set_attr "mode" "V2DF")])
8619
8620 (define_expand "vec_interleave_lowv2df"
8621 [(set (match_operand:V2DF 0 "register_operand")
8622 (vec_select:V2DF
8623 (vec_concat:V4DF
8624 (match_operand:V2DF 1 "nonimmediate_operand")
8625 (match_operand:V2DF 2 "nonimmediate_operand"))
8626 (parallel [(const_int 0)
8627 (const_int 2)])))]
8628 "TARGET_SSE2"
8629 {
8630 if (!ix86_vec_interleave_v2df_operator_ok (operands, 0))
8631 operands[1] = force_reg (V2DFmode, operands[1]);
8632 })
8633
8634 (define_insn "*vec_interleave_lowv2df"
8635 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,v,v,x,v,o")
8636 (vec_select:V2DF
8637 (vec_concat:V4DF
8638 (match_operand:V2DF 1 "nonimmediate_operand" " 0,v,m,0,v,0")
8639 (match_operand:V2DF 2 "nonimmediate_operand" " x,v,1,m,m,v"))
8640 (parallel [(const_int 0)
8641 (const_int 2)])))]
8642 "TARGET_SSE2 && ix86_vec_interleave_v2df_operator_ok (operands, 0)"
8643 "@
8644 unpcklpd\t{%2, %0|%0, %2}
8645 vunpcklpd\t{%2, %1, %0|%0, %1, %2}
8646 %vmovddup\t{%1, %0|%0, %q1}
8647 movhpd\t{%2, %0|%0, %q2}
8648 vmovhpd\t{%2, %1, %0|%0, %1, %q2}
8649 %vmovlpd\t{%2, %H0|%H0, %2}"
8650 [(set_attr "isa" "noavx,avx,sse3,noavx,avx,*")
8651 (set_attr "type" "sselog,sselog,sselog,ssemov,ssemov,ssemov")
8652 (set (attr "prefix_data16")
8653 (if_then_else (eq_attr "alternative" "3,5")
8654 (const_string "1")
8655 (const_string "*")))
8656 (set_attr "prefix" "orig,maybe_evex,maybe_vex,orig,maybe_evex,maybe_vex")
8657 (set_attr "mode" "V2DF,V2DF,DF,V1DF,V1DF,V1DF")])
8658
8659 (define_split
8660 [(set (match_operand:V2DF 0 "memory_operand")
8661 (vec_select:V2DF
8662 (vec_concat:V4DF
8663 (match_operand:V2DF 1 "register_operand")
8664 (match_dup 1))
8665 (parallel [(const_int 0)
8666 (const_int 2)])))]
8667 "TARGET_SSE3 && reload_completed"
8668 [(const_int 0)]
8669 {
8670 rtx low = gen_lowpart (DFmode, operands[1]);
8671
8672 emit_move_insn (adjust_address (operands[0], DFmode, 0), low);
8673 emit_move_insn (adjust_address (operands[0], DFmode, 8), low);
8674 DONE;
8675 })
8676
8677 (define_split
8678 [(set (match_operand:V2DF 0 "register_operand")
8679 (vec_select:V2DF
8680 (vec_concat:V4DF
8681 (match_operand:V2DF 1 "memory_operand")
8682 (match_dup 1))
8683 (parallel [(match_operand:SI 2 "const_0_to_1_operand")
8684 (match_operand:SI 3 "const_int_operand")])))]
8685 "TARGET_SSE3 && INTVAL (operands[2]) + 2 == INTVAL (operands[3])"
8686 [(set (match_dup 0) (vec_duplicate:V2DF (match_dup 1)))]
8687 {
8688 operands[1] = adjust_address (operands[1], DFmode, INTVAL (operands[2]) * 8);
8689 })
8690
8691 (define_insn "avx512f_vmscalef<mode><mask_scalar_name><round_scalar_name>"
8692 [(set (match_operand:VF_128 0 "register_operand" "=v")
8693 (vec_merge:VF_128
8694 (unspec:VF_128
8695 [(match_operand:VF_128 1 "register_operand" "v")
8696 (match_operand:VF_128 2 "<round_scalar_nimm_predicate>" "<round_scalar_constraint>")]
8697 UNSPEC_SCALEF)
8698 (match_dup 1)
8699 (const_int 1)))]
8700 "TARGET_AVX512F"
8701 "vscalef<ssescalarmodesuffix>\t{<round_scalar_mask_op3>%2, %1, %0<mask_scalar_operand3>|%0<mask_scalar_operand3>, %1, %2<round_scalar_mask_op3>}"
8702 [(set_attr "prefix" "evex")
8703 (set_attr "mode" "<ssescalarmode>")])
8704
8705 (define_insn "<avx512>_scalef<mode><mask_name><round_name>"
8706 [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v")
8707 (unspec:VF_AVX512VL
8708 [(match_operand:VF_AVX512VL 1 "register_operand" "v")
8709 (match_operand:VF_AVX512VL 2 "nonimmediate_operand" "<round_constraint>")]
8710 UNSPEC_SCALEF))]
8711 "TARGET_AVX512F"
8712 "vscalef<ssemodesuffix>\t{<round_mask_op3>%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2<round_mask_op3>}"
8713 [(set_attr "prefix" "evex")
8714 (set_attr "mode" "<MODE>")])
8715
8716 (define_expand "<avx512>_vternlog<mode>_maskz"
8717 [(match_operand:VI48_AVX512VL 0 "register_operand")
8718 (match_operand:VI48_AVX512VL 1 "register_operand")
8719 (match_operand:VI48_AVX512VL 2 "register_operand")
8720 (match_operand:VI48_AVX512VL 3 "nonimmediate_operand")
8721 (match_operand:SI 4 "const_0_to_255_operand")
8722 (match_operand:<avx512fmaskmode> 5 "register_operand")]
8723 "TARGET_AVX512F"
8724 {
8725 emit_insn (gen_<avx512>_vternlog<mode>_maskz_1 (
8726 operands[0], operands[1], operands[2], operands[3],
8727 operands[4], CONST0_RTX (<MODE>mode), operands[5]));
8728 DONE;
8729 })
8730
8731 (define_insn "<avx512>_vternlog<mode><sd_maskz_name>"
8732 [(set (match_operand:VI48_AVX512VL 0 "register_operand" "=v")
8733 (unspec:VI48_AVX512VL
8734 [(match_operand:VI48_AVX512VL 1 "register_operand" "0")
8735 (match_operand:VI48_AVX512VL 2 "register_operand" "v")
8736 (match_operand:VI48_AVX512VL 3 "nonimmediate_operand" "vm")
8737 (match_operand:SI 4 "const_0_to_255_operand")]
8738 UNSPEC_VTERNLOG))]
8739 "TARGET_AVX512F"
8740 "vpternlog<ssemodesuffix>\t{%4, %3, %2, %0<sd_mask_op5>|%0<sd_mask_op5>, %2, %3, %4}"
8741 [(set_attr "type" "sselog")
8742 (set_attr "prefix" "evex")
8743 (set_attr "mode" "<sseinsnmode>")])
8744
8745 (define_insn "<avx512>_vternlog<mode>_mask"
8746 [(set (match_operand:VI48_AVX512VL 0 "register_operand" "=v")
8747 (vec_merge:VI48_AVX512VL
8748 (unspec:VI48_AVX512VL
8749 [(match_operand:VI48_AVX512VL 1 "register_operand" "0")
8750 (match_operand:VI48_AVX512VL 2 "register_operand" "v")
8751 (match_operand:VI48_AVX512VL 3 "nonimmediate_operand" "vm")
8752 (match_operand:SI 4 "const_0_to_255_operand")]
8753 UNSPEC_VTERNLOG)
8754 (match_dup 1)
8755 (match_operand:<avx512fmaskmode> 5 "register_operand" "Yk")))]
8756 "TARGET_AVX512F"
8757 "vpternlog<ssemodesuffix>\t{%4, %3, %2, %0%{%5%}|%0%{%5%}, %2, %3, %4}"
8758 [(set_attr "type" "sselog")
8759 (set_attr "prefix" "evex")
8760 (set_attr "mode" "<sseinsnmode>")])
8761
8762 (define_insn "<avx512>_getexp<mode><mask_name><round_saeonly_name>"
8763 [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v")
8764 (unspec:VF_AVX512VL [(match_operand:VF_AVX512VL 1 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")]
8765 UNSPEC_GETEXP))]
8766 "TARGET_AVX512F"
8767 "vgetexp<ssemodesuffix>\t{<round_saeonly_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_saeonly_mask_op2>}";
8768 [(set_attr "prefix" "evex")
8769 (set_attr "mode" "<MODE>")])
8770
8771 (define_insn "avx512f_sgetexp<mode><mask_scalar_name><round_saeonly_scalar_name>"
8772 [(set (match_operand:VF_128 0 "register_operand" "=v")
8773 (vec_merge:VF_128
8774 (unspec:VF_128
8775 [(match_operand:VF_128 1 "register_operand" "v")
8776 (match_operand:VF_128 2 "<round_saeonly_scalar_nimm_predicate>" "<round_saeonly_scalar_constraint>")]
8777 UNSPEC_GETEXP)
8778 (match_dup 1)
8779 (const_int 1)))]
8780 "TARGET_AVX512F"
8781 "vgetexp<ssescalarmodesuffix>\t{<round_saeonly_scalar_mask_op3>%2, %1, %0<mask_scalar_operand3>|%0<mask_scalar_operand3>, %1, %<iptr>2<round_saeonly_scalar_mask_op3>}";
8782 [(set_attr "prefix" "evex")
8783 (set_attr "mode" "<ssescalarmode>")])
8784
8785 (define_insn "<mask_codefor><avx512>_align<mode><mask_name>"
8786 [(set (match_operand:VI48_AVX512VL 0 "register_operand" "=v")
8787 (unspec:VI48_AVX512VL [(match_operand:VI48_AVX512VL 1 "register_operand" "v")
8788 (match_operand:VI48_AVX512VL 2 "nonimmediate_operand" "vm")
8789 (match_operand:SI 3 "const_0_to_255_operand")]
8790 UNSPEC_ALIGN))]
8791 "TARGET_AVX512F"
8792 "valign<ssemodesuffix>\t{%3, %2, %1, %0<mask_operand4>|%0<mask_operand4>, %1, %2, %3}";
8793 [(set_attr "prefix" "evex")
8794 (set_attr "mode" "<sseinsnmode>")])
8795
8796 (define_expand "avx512f_shufps512_mask"
8797 [(match_operand:V16SF 0 "register_operand")
8798 (match_operand:V16SF 1 "register_operand")
8799 (match_operand:V16SF 2 "nonimmediate_operand")
8800 (match_operand:SI 3 "const_0_to_255_operand")
8801 (match_operand:V16SF 4 "register_operand")
8802 (match_operand:HI 5 "register_operand")]
8803 "TARGET_AVX512F"
8804 {
8805 int mask = INTVAL (operands[3]);
8806 emit_insn (gen_avx512f_shufps512_1_mask (operands[0], operands[1], operands[2],
8807 GEN_INT ((mask >> 0) & 3),
8808 GEN_INT ((mask >> 2) & 3),
8809 GEN_INT (((mask >> 4) & 3) + 16),
8810 GEN_INT (((mask >> 6) & 3) + 16),
8811 GEN_INT (((mask >> 0) & 3) + 4),
8812 GEN_INT (((mask >> 2) & 3) + 4),
8813 GEN_INT (((mask >> 4) & 3) + 20),
8814 GEN_INT (((mask >> 6) & 3) + 20),
8815 GEN_INT (((mask >> 0) & 3) + 8),
8816 GEN_INT (((mask >> 2) & 3) + 8),
8817 GEN_INT (((mask >> 4) & 3) + 24),
8818 GEN_INT (((mask >> 6) & 3) + 24),
8819 GEN_INT (((mask >> 0) & 3) + 12),
8820 GEN_INT (((mask >> 2) & 3) + 12),
8821 GEN_INT (((mask >> 4) & 3) + 28),
8822 GEN_INT (((mask >> 6) & 3) + 28),
8823 operands[4], operands[5]));
8824 DONE;
8825 })
8826
8827
8828 (define_expand "<avx512>_fixupimm<mode>_maskz<round_saeonly_expand_name>"
8829 [(match_operand:VF_AVX512VL 0 "register_operand")
8830 (match_operand:VF_AVX512VL 1 "register_operand")
8831 (match_operand:<sseintvecmode> 2 "<round_saeonly_expand_nimm_predicate>")
8832 (match_operand:SI 3 "const_0_to_255_operand")
8833 (match_operand:<avx512fmaskmode> 4 "register_operand")]
8834 "TARGET_AVX512F"
8835 {
8836 emit_insn (gen_<avx512>_fixupimm<mode>_mask<round_saeonly_expand_name> (
8837 operands[0], operands[1], operands[2], operands[3],
8838 CONST0_RTX (<MODE>mode), operands[4]
8839 <round_saeonly_expand_operand5>));
8840 DONE;
8841 })
8842
8843 (define_insn "<avx512>_fixupimm<mode><mask_name><round_saeonly_name>"
8844 [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v")
8845 (unspec:VF_AVX512VL
8846 [(match_operand:VF_AVX512VL 1 "register_operand" "v")
8847 (match_operand:<sseintvecmode> 2 "nonimmediate_operand" "<round_saeonly_constraint>")
8848 (match_operand:SI 3 "const_0_to_255_operand")]
8849 UNSPEC_FIXUPIMM))]
8850 "TARGET_AVX512F"
8851 "vfixupimm<ssemodesuffix>\t{%3, <round_saeonly_mask_op4>%2, %1, %0<mask_operand4>|%0<mask_operand4>, %1, %2<round_saeonly_mask_op4>, %3}";
8852 [(set_attr "prefix" "evex")
8853 (set_attr "mode" "<MODE>")])
8854
8855 (define_expand "avx512f_sfixupimm<mode>_maskz<round_saeonly_expand_name>"
8856 [(match_operand:VF_128 0 "register_operand")
8857 (match_operand:VF_128 1 "register_operand")
8858 (match_operand:<sseintvecmode> 2 "<round_saeonly_expand_nimm_predicate>")
8859 (match_operand:SI 3 "const_0_to_255_operand")
8860 (match_operand:<avx512fmaskmode> 4 "register_operand")]
8861 "TARGET_AVX512F"
8862 {
8863 emit_insn (gen_avx512f_sfixupimm<mode>_mask<round_saeonly_expand_name> (
8864 operands[0], operands[1], operands[2], operands[3],
8865 CONST0_RTX (<MODE>mode), operands[4]
8866 <round_saeonly_expand_operand5>));
8867 DONE;
8868 })
8869
8870 (define_insn "avx512f_sfixupimm<mode><mask_name><round_saeonly_name>"
8871 [(set (match_operand:VF_128 0 "register_operand" "=v")
8872 (unspec:VF_128
8873 [(match_operand:VF_128 1 "register_operand" "v")
8874 (match_operand:<sseintvecmode> 2 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")
8875 (match_operand:SI 3 "const_0_to_255_operand")]
8876 UNSPEC_SFIXUPIMM))]
8877 "TARGET_AVX512F"
8878 "vfixupimm<ssescalarmodesuffix>\t{%3, <round_saeonly_mask_op4>%2, %1, %0<mask_operand4>|%0<mask_operand4>, %1, %<iptr>2<round_saeonly_mask_op4>, %3}";
8879 [(set_attr "prefix" "evex")
8880 (set_attr "mode" "<ssescalarmode>")])
8881
8882 (define_insn "<avx512>_rndscale<mode><mask_name><round_saeonly_name>"
8883 [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v")
8884 (unspec:VF_AVX512VL
8885 [(match_operand:VF_AVX512VL 1 "nonimmediate_operand" "<round_saeonly_constraint>")
8886 (match_operand:SI 2 "const_0_to_255_operand")]
8887 UNSPEC_ROUND))]
8888 "TARGET_AVX512F"
8889 "vrndscale<ssemodesuffix>\t{%2, <round_saeonly_mask_op3>%1, %0<mask_operand3>|%0<mask_operand3>, %1<round_saeonly_mask_op3>, %2}"
8890 [(set_attr "length_immediate" "1")
8891 (set_attr "prefix" "evex")
8892 (set_attr "mode" "<MODE>")])
8893
8894 (define_insn "avx512f_rndscale<mode><round_saeonly_name>"
8895 [(set (match_operand:VF_128 0 "register_operand" "=v")
8896 (vec_merge:VF_128
8897 (unspec:VF_128
8898 [(match_operand:VF_128 1 "register_operand" "v")
8899 (match_operand:VF_128 2 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")
8900 (match_operand:SI 3 "const_0_to_255_operand")]
8901 UNSPEC_ROUND)
8902 (match_dup 1)
8903 (const_int 1)))]
8904 "TARGET_AVX512F"
8905 "vrndscale<ssescalarmodesuffix>\t{%3, <round_saeonly_op4>%2, %1, %0|%0, %1, %<iptr>2<round_saeonly_op4>, %3}"
8906 [(set_attr "length_immediate" "1")
8907 (set_attr "prefix" "evex")
8908 (set_attr "mode" "<MODE>")])
8909
8910 ;; One bit in mask selects 2 elements.
8911 (define_insn "avx512f_shufps512_1<mask_name>"
8912 [(set (match_operand:V16SF 0 "register_operand" "=v")
8913 (vec_select:V16SF
8914 (vec_concat:V32SF
8915 (match_operand:V16SF 1 "register_operand" "v")
8916 (match_operand:V16SF 2 "nonimmediate_operand" "vm"))
8917 (parallel [(match_operand 3 "const_0_to_3_operand")
8918 (match_operand 4 "const_0_to_3_operand")
8919 (match_operand 5 "const_16_to_19_operand")
8920 (match_operand 6 "const_16_to_19_operand")
8921 (match_operand 7 "const_4_to_7_operand")
8922 (match_operand 8 "const_4_to_7_operand")
8923 (match_operand 9 "const_20_to_23_operand")
8924 (match_operand 10 "const_20_to_23_operand")
8925 (match_operand 11 "const_8_to_11_operand")
8926 (match_operand 12 "const_8_to_11_operand")
8927 (match_operand 13 "const_24_to_27_operand")
8928 (match_operand 14 "const_24_to_27_operand")
8929 (match_operand 15 "const_12_to_15_operand")
8930 (match_operand 16 "const_12_to_15_operand")
8931 (match_operand 17 "const_28_to_31_operand")
8932 (match_operand 18 "const_28_to_31_operand")])))]
8933 "TARGET_AVX512F
8934 && (INTVAL (operands[3]) == (INTVAL (operands[7]) - 4)
8935 && INTVAL (operands[4]) == (INTVAL (operands[8]) - 4)
8936 && INTVAL (operands[5]) == (INTVAL (operands[9]) - 4)
8937 && INTVAL (operands[6]) == (INTVAL (operands[10]) - 4)
8938 && INTVAL (operands[3]) == (INTVAL (operands[11]) - 8)
8939 && INTVAL (operands[4]) == (INTVAL (operands[12]) - 8)
8940 && INTVAL (operands[5]) == (INTVAL (operands[13]) - 8)
8941 && INTVAL (operands[6]) == (INTVAL (operands[14]) - 8)
8942 && INTVAL (operands[3]) == (INTVAL (operands[15]) - 12)
8943 && INTVAL (operands[4]) == (INTVAL (operands[16]) - 12)
8944 && INTVAL (operands[5]) == (INTVAL (operands[17]) - 12)
8945 && INTVAL (operands[6]) == (INTVAL (operands[18]) - 12))"
8946 {
8947 int mask;
8948 mask = INTVAL (operands[3]);
8949 mask |= INTVAL (operands[4]) << 2;
8950 mask |= (INTVAL (operands[5]) - 16) << 4;
8951 mask |= (INTVAL (operands[6]) - 16) << 6;
8952 operands[3] = GEN_INT (mask);
8953
8954 return "vshufps\t{%3, %2, %1, %0<mask_operand19>|%0<mask_operand19>, %1, %2, %3}";
8955 }
8956 [(set_attr "type" "sselog")
8957 (set_attr "length_immediate" "1")
8958 (set_attr "prefix" "evex")
8959 (set_attr "mode" "V16SF")])
8960
8961 (define_expand "avx512f_shufpd512_mask"
8962 [(match_operand:V8DF 0 "register_operand")
8963 (match_operand:V8DF 1 "register_operand")
8964 (match_operand:V8DF 2 "nonimmediate_operand")
8965 (match_operand:SI 3 "const_0_to_255_operand")
8966 (match_operand:V8DF 4 "register_operand")
8967 (match_operand:QI 5 "register_operand")]
8968 "TARGET_AVX512F"
8969 {
8970 int mask = INTVAL (operands[3]);
8971 emit_insn (gen_avx512f_shufpd512_1_mask (operands[0], operands[1], operands[2],
8972 GEN_INT (mask & 1),
8973 GEN_INT (mask & 2 ? 9 : 8),
8974 GEN_INT (mask & 4 ? 3 : 2),
8975 GEN_INT (mask & 8 ? 11 : 10),
8976 GEN_INT (mask & 16 ? 5 : 4),
8977 GEN_INT (mask & 32 ? 13 : 12),
8978 GEN_INT (mask & 64 ? 7 : 6),
8979 GEN_INT (mask & 128 ? 15 : 14),
8980 operands[4], operands[5]));
8981 DONE;
8982 })
8983
8984 (define_insn "avx512f_shufpd512_1<mask_name>"
8985 [(set (match_operand:V8DF 0 "register_operand" "=v")
8986 (vec_select:V8DF
8987 (vec_concat:V16DF
8988 (match_operand:V8DF 1 "register_operand" "v")
8989 (match_operand:V8DF 2 "nonimmediate_operand" "vm"))
8990 (parallel [(match_operand 3 "const_0_to_1_operand")
8991 (match_operand 4 "const_8_to_9_operand")
8992 (match_operand 5 "const_2_to_3_operand")
8993 (match_operand 6 "const_10_to_11_operand")
8994 (match_operand 7 "const_4_to_5_operand")
8995 (match_operand 8 "const_12_to_13_operand")
8996 (match_operand 9 "const_6_to_7_operand")
8997 (match_operand 10 "const_14_to_15_operand")])))]
8998 "TARGET_AVX512F"
8999 {
9000 int mask;
9001 mask = INTVAL (operands[3]);
9002 mask |= (INTVAL (operands[4]) - 8) << 1;
9003 mask |= (INTVAL (operands[5]) - 2) << 2;
9004 mask |= (INTVAL (operands[6]) - 10) << 3;
9005 mask |= (INTVAL (operands[7]) - 4) << 4;
9006 mask |= (INTVAL (operands[8]) - 12) << 5;
9007 mask |= (INTVAL (operands[9]) - 6) << 6;
9008 mask |= (INTVAL (operands[10]) - 14) << 7;
9009 operands[3] = GEN_INT (mask);
9010
9011 return "vshufpd\t{%3, %2, %1, %0<mask_operand11>|%0<mask_operand11>, %1, %2, %3}";
9012 }
9013 [(set_attr "type" "sselog")
9014 (set_attr "length_immediate" "1")
9015 (set_attr "prefix" "evex")
9016 (set_attr "mode" "V8DF")])
9017
9018 (define_expand "avx_shufpd256<mask_expand4_name>"
9019 [(match_operand:V4DF 0 "register_operand")
9020 (match_operand:V4DF 1 "register_operand")
9021 (match_operand:V4DF 2 "nonimmediate_operand")
9022 (match_operand:SI 3 "const_int_operand")]
9023 "TARGET_AVX"
9024 {
9025 int mask = INTVAL (operands[3]);
9026 emit_insn (gen_avx_shufpd256_1<mask_expand4_name> (operands[0],
9027 operands[1],
9028 operands[2],
9029 GEN_INT (mask & 1),
9030 GEN_INT (mask & 2 ? 5 : 4),
9031 GEN_INT (mask & 4 ? 3 : 2),
9032 GEN_INT (mask & 8 ? 7 : 6)
9033 <mask_expand4_args>));
9034 DONE;
9035 })
9036
9037 (define_insn "avx_shufpd256_1<mask_name>"
9038 [(set (match_operand:V4DF 0 "register_operand" "=v")
9039 (vec_select:V4DF
9040 (vec_concat:V8DF
9041 (match_operand:V4DF 1 "register_operand" "v")
9042 (match_operand:V4DF 2 "nonimmediate_operand" "vm"))
9043 (parallel [(match_operand 3 "const_0_to_1_operand")
9044 (match_operand 4 "const_4_to_5_operand")
9045 (match_operand 5 "const_2_to_3_operand")
9046 (match_operand 6 "const_6_to_7_operand")])))]
9047 "TARGET_AVX && <mask_avx512vl_condition>"
9048 {
9049 int mask;
9050 mask = INTVAL (operands[3]);
9051 mask |= (INTVAL (operands[4]) - 4) << 1;
9052 mask |= (INTVAL (operands[5]) - 2) << 2;
9053 mask |= (INTVAL (operands[6]) - 6) << 3;
9054 operands[3] = GEN_INT (mask);
9055
9056 return "vshufpd\t{%3, %2, %1, %0<mask_operand7>|%0<mask_operand7>, %1, %2, %3}";
9057 }
9058 [(set_attr "type" "sseshuf")
9059 (set_attr "length_immediate" "1")
9060 (set_attr "prefix" "vex")
9061 (set_attr "mode" "V4DF")])
9062
9063 (define_expand "sse2_shufpd<mask_expand4_name>"
9064 [(match_operand:V2DF 0 "register_operand")
9065 (match_operand:V2DF 1 "register_operand")
9066 (match_operand:V2DF 2 "vector_operand")
9067 (match_operand:SI 3 "const_int_operand")]
9068 "TARGET_SSE2"
9069 {
9070 int mask = INTVAL (operands[3]);
9071 emit_insn (gen_sse2_shufpd_v2df<mask_expand4_name> (operands[0], operands[1],
9072 operands[2], GEN_INT (mask & 1),
9073 GEN_INT (mask & 2 ? 3 : 2)
9074 <mask_expand4_args>));
9075 DONE;
9076 })
9077
9078 (define_insn "sse2_shufpd_v2df_mask"
9079 [(set (match_operand:V2DF 0 "register_operand" "=v")
9080 (vec_merge:V2DF
9081 (vec_select:V2DF
9082 (vec_concat:V4DF
9083 (match_operand:V2DF 1 "register_operand" "v")
9084 (match_operand:V2DF 2 "nonimmediate_operand" "vm"))
9085 (parallel [(match_operand 3 "const_0_to_1_operand")
9086 (match_operand 4 "const_2_to_3_operand")]))
9087 (match_operand:V2DF 5 "nonimm_or_0_operand" "0C")
9088 (match_operand:QI 6 "register_operand" "Yk")))]
9089 "TARGET_AVX512VL"
9090 {
9091 int mask;
9092 mask = INTVAL (operands[3]);
9093 mask |= (INTVAL (operands[4]) - 2) << 1;
9094 operands[3] = GEN_INT (mask);
9095
9096 return "vshufpd\t{%3, %2, %1, %0%{%6%}%N5|%0%{%6%}%N5, %1, %2, %3}";
9097 }
9098 [(set_attr "type" "sseshuf")
9099 (set_attr "length_immediate" "1")
9100 (set_attr "prefix" "evex")
9101 (set_attr "mode" "V2DF")])
9102
9103 ;; punpcklqdq and punpckhqdq are shorter than shufpd.
9104 (define_insn "avx2_interleave_highv4di<mask_name>"
9105 [(set (match_operand:V4DI 0 "register_operand" "=v")
9106 (vec_select:V4DI
9107 (vec_concat:V8DI
9108 (match_operand:V4DI 1 "register_operand" "v")
9109 (match_operand:V4DI 2 "nonimmediate_operand" "vm"))
9110 (parallel [(const_int 1)
9111 (const_int 5)
9112 (const_int 3)
9113 (const_int 7)])))]
9114 "TARGET_AVX2 && <mask_avx512vl_condition>"
9115 "vpunpckhqdq\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
9116 [(set_attr "type" "sselog")
9117 (set_attr "prefix" "vex")
9118 (set_attr "mode" "OI")])
9119
9120 (define_insn "<mask_codefor>avx512f_interleave_highv8di<mask_name>"
9121 [(set (match_operand:V8DI 0 "register_operand" "=v")
9122 (vec_select:V8DI
9123 (vec_concat:V16DI
9124 (match_operand:V8DI 1 "register_operand" "v")
9125 (match_operand:V8DI 2 "nonimmediate_operand" "vm"))
9126 (parallel [(const_int 1) (const_int 9)
9127 (const_int 3) (const_int 11)
9128 (const_int 5) (const_int 13)
9129 (const_int 7) (const_int 15)])))]
9130 "TARGET_AVX512F"
9131 "vpunpckhqdq\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
9132 [(set_attr "type" "sselog")
9133 (set_attr "prefix" "evex")
9134 (set_attr "mode" "XI")])
9135
9136 (define_insn "vec_interleave_highv2di<mask_name>"
9137 [(set (match_operand:V2DI 0 "register_operand" "=x,v")
9138 (vec_select:V2DI
9139 (vec_concat:V4DI
9140 (match_operand:V2DI 1 "register_operand" "0,v")
9141 (match_operand:V2DI 2 "vector_operand" "xBm,vm"))
9142 (parallel [(const_int 1)
9143 (const_int 3)])))]
9144 "TARGET_SSE2 && <mask_avx512vl_condition>"
9145 "@
9146 punpckhqdq\t{%2, %0|%0, %2}
9147 vpunpckhqdq\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
9148 [(set_attr "isa" "noavx,avx")
9149 (set_attr "type" "sselog")
9150 (set_attr "prefix_data16" "1,*")
9151 (set_attr "prefix" "orig,<mask_prefix>")
9152 (set_attr "mode" "TI")])
9153
9154 (define_insn "avx2_interleave_lowv4di<mask_name>"
9155 [(set (match_operand:V4DI 0 "register_operand" "=v")
9156 (vec_select:V4DI
9157 (vec_concat:V8DI
9158 (match_operand:V4DI 1 "register_operand" "v")
9159 (match_operand:V4DI 2 "nonimmediate_operand" "vm"))
9160 (parallel [(const_int 0)
9161 (const_int 4)
9162 (const_int 2)
9163 (const_int 6)])))]
9164 "TARGET_AVX2 && <mask_avx512vl_condition>"
9165 "vpunpcklqdq\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
9166 [(set_attr "type" "sselog")
9167 (set_attr "prefix" "vex")
9168 (set_attr "mode" "OI")])
9169
9170 (define_insn "<mask_codefor>avx512f_interleave_lowv8di<mask_name>"
9171 [(set (match_operand:V8DI 0 "register_operand" "=v")
9172 (vec_select:V8DI
9173 (vec_concat:V16DI
9174 (match_operand:V8DI 1 "register_operand" "v")
9175 (match_operand:V8DI 2 "nonimmediate_operand" "vm"))
9176 (parallel [(const_int 0) (const_int 8)
9177 (const_int 2) (const_int 10)
9178 (const_int 4) (const_int 12)
9179 (const_int 6) (const_int 14)])))]
9180 "TARGET_AVX512F"
9181 "vpunpcklqdq\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
9182 [(set_attr "type" "sselog")
9183 (set_attr "prefix" "evex")
9184 (set_attr "mode" "XI")])
9185
9186 (define_insn "vec_interleave_lowv2di<mask_name>"
9187 [(set (match_operand:V2DI 0 "register_operand" "=x,v")
9188 (vec_select:V2DI
9189 (vec_concat:V4DI
9190 (match_operand:V2DI 1 "register_operand" "0,v")
9191 (match_operand:V2DI 2 "vector_operand" "xBm,vm"))
9192 (parallel [(const_int 0)
9193 (const_int 2)])))]
9194 "TARGET_SSE2 && <mask_avx512vl_condition>"
9195 "@
9196 punpcklqdq\t{%2, %0|%0, %2}
9197 vpunpcklqdq\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
9198 [(set_attr "isa" "noavx,avx")
9199 (set_attr "type" "sselog")
9200 (set_attr "prefix_data16" "1,*")
9201 (set_attr "prefix" "orig,vex")
9202 (set_attr "mode" "TI")])
9203
9204 (define_insn "sse2_shufpd_<mode>"
9205 [(set (match_operand:VI8F_128 0 "register_operand" "=x,v")
9206 (vec_select:VI8F_128
9207 (vec_concat:<ssedoublevecmode>
9208 (match_operand:VI8F_128 1 "register_operand" "0,v")
9209 (match_operand:VI8F_128 2 "vector_operand" "xBm,vm"))
9210 (parallel [(match_operand 3 "const_0_to_1_operand")
9211 (match_operand 4 "const_2_to_3_operand")])))]
9212 "TARGET_SSE2"
9213 {
9214 int mask;
9215 mask = INTVAL (operands[3]);
9216 mask |= (INTVAL (operands[4]) - 2) << 1;
9217 operands[3] = GEN_INT (mask);
9218
9219 switch (which_alternative)
9220 {
9221 case 0:
9222 return "shufpd\t{%3, %2, %0|%0, %2, %3}";
9223 case 1:
9224 return "vshufpd\t{%3, %2, %1, %0|%0, %1, %2, %3}";
9225 default:
9226 gcc_unreachable ();
9227 }
9228 }
9229 [(set_attr "isa" "noavx,avx")
9230 (set_attr "type" "sseshuf")
9231 (set_attr "length_immediate" "1")
9232 (set_attr "prefix" "orig,maybe_evex")
9233 (set_attr "mode" "V2DF")])
9234
9235 ;; Avoid combining registers from different units in a single alternative,
9236 ;; see comment above inline_secondary_memory_needed function in i386.c
9237 (define_insn "sse2_storehpd"
9238 [(set (match_operand:DF 0 "nonimmediate_operand" "=m,x,Yv,x,*f,r")
9239 (vec_select:DF
9240 (match_operand:V2DF 1 "nonimmediate_operand" " v,0, v,o,o,o")
9241 (parallel [(const_int 1)])))]
9242 "TARGET_SSE2 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
9243 "@
9244 %vmovhpd\t{%1, %0|%0, %1}
9245 unpckhpd\t%0, %0
9246 vunpckhpd\t{%d1, %0|%0, %d1}
9247 #
9248 #
9249 #"
9250 [(set_attr "isa" "*,noavx,avx,*,*,*")
9251 (set_attr "type" "ssemov,sselog1,sselog1,ssemov,fmov,imov")
9252 (set (attr "prefix_data16")
9253 (if_then_else
9254 (and (eq_attr "alternative" "0")
9255 (not (match_test "TARGET_AVX")))
9256 (const_string "1")
9257 (const_string "*")))
9258 (set_attr "prefix" "maybe_vex,orig,maybe_evex,*,*,*")
9259 (set_attr "mode" "V1DF,V1DF,V2DF,DF,DF,DF")])
9260
9261 (define_split
9262 [(set (match_operand:DF 0 "register_operand")
9263 (vec_select:DF
9264 (match_operand:V2DF 1 "memory_operand")
9265 (parallel [(const_int 1)])))]
9266 "TARGET_SSE2 && reload_completed"
9267 [(set (match_dup 0) (match_dup 1))]
9268 "operands[1] = adjust_address (operands[1], DFmode, 8);")
9269
9270 (define_insn "*vec_extractv2df_1_sse"
9271 [(set (match_operand:DF 0 "nonimmediate_operand" "=m,x,x")
9272 (vec_select:DF
9273 (match_operand:V2DF 1 "nonimmediate_operand" "x,x,o")
9274 (parallel [(const_int 1)])))]
9275 "!TARGET_SSE2 && TARGET_SSE
9276 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
9277 "@
9278 movhps\t{%1, %0|%q0, %1}
9279 movhlps\t{%1, %0|%0, %1}
9280 movlps\t{%H1, %0|%0, %H1}"
9281 [(set_attr "type" "ssemov")
9282 (set_attr "mode" "V2SF,V4SF,V2SF")])
9283
9284 ;; Avoid combining registers from different units in a single alternative,
9285 ;; see comment above inline_secondary_memory_needed function in i386.c
9286 (define_insn "sse2_storelpd"
9287 [(set (match_operand:DF 0 "nonimmediate_operand" "=m,x,x,*f,r")
9288 (vec_select:DF
9289 (match_operand:V2DF 1 "nonimmediate_operand" " v,x,m,m,m")
9290 (parallel [(const_int 0)])))]
9291 "TARGET_SSE2 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
9292 "@
9293 %vmovlpd\t{%1, %0|%0, %1}
9294 #
9295 #
9296 #
9297 #"
9298 [(set_attr "type" "ssemov,ssemov,ssemov,fmov,imov")
9299 (set (attr "prefix_data16")
9300 (if_then_else (eq_attr "alternative" "0")
9301 (const_string "1")
9302 (const_string "*")))
9303 (set_attr "prefix" "maybe_vex")
9304 (set_attr "mode" "V1DF,DF,DF,DF,DF")])
9305
9306 (define_split
9307 [(set (match_operand:DF 0 "register_operand")
9308 (vec_select:DF
9309 (match_operand:V2DF 1 "nonimmediate_operand")
9310 (parallel [(const_int 0)])))]
9311 "TARGET_SSE2 && reload_completed"
9312 [(set (match_dup 0) (match_dup 1))]
9313 "operands[1] = gen_lowpart (DFmode, operands[1]);")
9314
9315 (define_insn "*vec_extractv2df_0_sse"
9316 [(set (match_operand:DF 0 "nonimmediate_operand" "=m,x,x")
9317 (vec_select:DF
9318 (match_operand:V2DF 1 "nonimmediate_operand" "x,x,m")
9319 (parallel [(const_int 0)])))]
9320 "!TARGET_SSE2 && TARGET_SSE
9321 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
9322 "@
9323 movlps\t{%1, %0|%0, %1}
9324 movaps\t{%1, %0|%0, %1}
9325 movlps\t{%1, %0|%0, %q1}"
9326 [(set_attr "type" "ssemov")
9327 (set_attr "mode" "V2SF,V4SF,V2SF")])
9328
9329 (define_expand "sse2_loadhpd_exp"
9330 [(set (match_operand:V2DF 0 "nonimmediate_operand")
9331 (vec_concat:V2DF
9332 (vec_select:DF
9333 (match_operand:V2DF 1 "nonimmediate_operand")
9334 (parallel [(const_int 0)]))
9335 (match_operand:DF 2 "nonimmediate_operand")))]
9336 "TARGET_SSE2"
9337 {
9338 rtx dst = ix86_fixup_binary_operands (UNKNOWN, V2DFmode, operands);
9339
9340 emit_insn (gen_sse2_loadhpd (dst, operands[1], operands[2]));
9341
9342 /* Fix up the destination if needed. */
9343 if (dst != operands[0])
9344 emit_move_insn (operands[0], dst);
9345
9346 DONE;
9347 })
9348
9349 ;; Avoid combining registers from different units in a single alternative,
9350 ;; see comment above inline_secondary_memory_needed function in i386.c
9351 (define_insn "sse2_loadhpd"
9352 [(set (match_operand:V2DF 0 "nonimmediate_operand"
9353 "=x,v,x,v ,o,o ,o")
9354 (vec_concat:V2DF
9355 (vec_select:DF
9356 (match_operand:V2DF 1 "nonimmediate_operand"
9357 " 0,v,0,v ,0,0 ,0")
9358 (parallel [(const_int 0)]))
9359 (match_operand:DF 2 "nonimmediate_operand"
9360 " m,m,x,Yv,x,*f,r")))]
9361 "TARGET_SSE2 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
9362 "@
9363 movhpd\t{%2, %0|%0, %2}
9364 vmovhpd\t{%2, %1, %0|%0, %1, %2}
9365 unpcklpd\t{%2, %0|%0, %2}
9366 vunpcklpd\t{%2, %1, %0|%0, %1, %2}
9367 #
9368 #
9369 #"
9370 [(set_attr "isa" "noavx,avx,noavx,avx,*,*,*")
9371 (set_attr "type" "ssemov,ssemov,sselog,sselog,ssemov,fmov,imov")
9372 (set (attr "prefix_data16")
9373 (if_then_else (eq_attr "alternative" "0")
9374 (const_string "1")
9375 (const_string "*")))
9376 (set_attr "prefix" "orig,maybe_evex,orig,maybe_evex,*,*,*")
9377 (set_attr "mode" "V1DF,V1DF,V2DF,V2DF,DF,DF,DF")])
9378
9379 (define_split
9380 [(set (match_operand:V2DF 0 "memory_operand")
9381 (vec_concat:V2DF
9382 (vec_select:DF (match_dup 0) (parallel [(const_int 0)]))
9383 (match_operand:DF 1 "register_operand")))]
9384 "TARGET_SSE2 && reload_completed"
9385 [(set (match_dup 0) (match_dup 1))]
9386 "operands[0] = adjust_address (operands[0], DFmode, 8);")
9387
9388 (define_expand "sse2_loadlpd_exp"
9389 [(set (match_operand:V2DF 0 "nonimmediate_operand")
9390 (vec_concat:V2DF
9391 (match_operand:DF 2 "nonimmediate_operand")
9392 (vec_select:DF
9393 (match_operand:V2DF 1 "nonimmediate_operand")
9394 (parallel [(const_int 1)]))))]
9395 "TARGET_SSE2"
9396 {
9397 rtx dst = ix86_fixup_binary_operands (UNKNOWN, V2DFmode, operands);
9398
9399 emit_insn (gen_sse2_loadlpd (dst, operands[1], operands[2]));
9400
9401 /* Fix up the destination if needed. */
9402 if (dst != operands[0])
9403 emit_move_insn (operands[0], dst);
9404
9405 DONE;
9406 })
9407
9408 ;; Avoid combining registers from different units in a single alternative,
9409 ;; see comment above inline_secondary_memory_needed function in i386.c
9410 (define_insn "sse2_loadlpd"
9411 [(set (match_operand:V2DF 0 "nonimmediate_operand"
9412 "=v,x,v,x,v,x,x,v,m,m ,m")
9413 (vec_concat:V2DF
9414 (match_operand:DF 2 "nonimmediate_operand"
9415 "vm,m,m,x,v,0,0,v,x,*f,r")
9416 (vec_select:DF
9417 (match_operand:V2DF 1 "nonimm_or_0_operand"
9418 " C,0,v,0,v,x,o,o,0,0 ,0")
9419 (parallel [(const_int 1)]))))]
9420 "TARGET_SSE2 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
9421 "@
9422 %vmovq\t{%2, %0|%0, %2}
9423 movlpd\t{%2, %0|%0, %2}
9424 vmovlpd\t{%2, %1, %0|%0, %1, %2}
9425 movsd\t{%2, %0|%0, %2}
9426 vmovsd\t{%2, %1, %0|%0, %1, %2}
9427 shufpd\t{$2, %1, %0|%0, %1, 2}
9428 movhpd\t{%H1, %0|%0, %H1}
9429 vmovhpd\t{%H1, %2, %0|%0, %2, %H1}
9430 #
9431 #
9432 #"
9433 [(set_attr "isa" "*,noavx,avx,noavx,avx,noavx,noavx,avx,*,*,*")
9434 (set (attr "type")
9435 (cond [(eq_attr "alternative" "5")
9436 (const_string "sselog")
9437 (eq_attr "alternative" "9")
9438 (const_string "fmov")
9439 (eq_attr "alternative" "10")
9440 (const_string "imov")
9441 ]
9442 (const_string "ssemov")))
9443 (set (attr "prefix_data16")
9444 (if_then_else (eq_attr "alternative" "1,6")
9445 (const_string "1")
9446 (const_string "*")))
9447 (set (attr "length_immediate")
9448 (if_then_else (eq_attr "alternative" "5")
9449 (const_string "1")
9450 (const_string "*")))
9451 (set (attr "prefix")
9452 (cond [(eq_attr "alternative" "0")
9453 (const_string "maybe_vex")
9454 (eq_attr "alternative" "1,3,5,6")
9455 (const_string "orig")
9456 (eq_attr "alternative" "2,4,7")
9457 (const_string "maybe_evex")
9458 ]
9459 (const_string "*")))
9460 (set_attr "mode" "DF,V1DF,V1DF,V1DF,V1DF,V2DF,V1DF,V1DF,DF,DF,DF")])
9461
9462 (define_split
9463 [(set (match_operand:V2DF 0 "memory_operand")
9464 (vec_concat:V2DF
9465 (match_operand:DF 1 "register_operand")
9466 (vec_select:DF (match_dup 0) (parallel [(const_int 1)]))))]
9467 "TARGET_SSE2 && reload_completed"
9468 [(set (match_dup 0) (match_dup 1))]
9469 "operands[0] = adjust_address (operands[0], DFmode, 0);")
9470
9471 (define_insn "sse2_movsd"
9472 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,v,x,v,m,x,x,v,o")
9473 (vec_merge:V2DF
9474 (match_operand:V2DF 2 "nonimmediate_operand" " x,v,m,m,v,0,0,v,0")
9475 (match_operand:V2DF 1 "nonimmediate_operand" " 0,v,0,v,0,x,o,o,v")
9476 (const_int 1)))]
9477 "TARGET_SSE2"
9478 "@
9479 movsd\t{%2, %0|%0, %2}
9480 vmovsd\t{%2, %1, %0|%0, %1, %2}
9481 movlpd\t{%2, %0|%0, %q2}
9482 vmovlpd\t{%2, %1, %0|%0, %1, %q2}
9483 %vmovlpd\t{%2, %0|%q0, %2}
9484 shufpd\t{$2, %1, %0|%0, %1, 2}
9485 movhps\t{%H1, %0|%0, %H1}
9486 vmovhps\t{%H1, %2, %0|%0, %2, %H1}
9487 %vmovhps\t{%1, %H0|%H0, %1}"
9488 [(set_attr "isa" "noavx,avx,noavx,avx,*,noavx,noavx,avx,*")
9489 (set (attr "type")
9490 (if_then_else
9491 (eq_attr "alternative" "5")
9492 (const_string "sselog")
9493 (const_string "ssemov")))
9494 (set (attr "prefix_data16")
9495 (if_then_else
9496 (and (eq_attr "alternative" "2,4")
9497 (not (match_test "TARGET_AVX")))
9498 (const_string "1")
9499 (const_string "*")))
9500 (set (attr "length_immediate")
9501 (if_then_else (eq_attr "alternative" "5")
9502 (const_string "1")
9503 (const_string "*")))
9504 (set (attr "prefix")
9505 (cond [(eq_attr "alternative" "1,3,7")
9506 (const_string "maybe_evex")
9507 (eq_attr "alternative" "4,8")
9508 (const_string "maybe_vex")
9509 ]
9510 (const_string "orig")))
9511 (set_attr "mode" "DF,DF,V1DF,V1DF,V1DF,V2DF,V1DF,V1DF,V1DF")])
9512
9513 (define_insn "vec_dupv2df<mask_name>"
9514 [(set (match_operand:V2DF 0 "register_operand" "=x,x,v")
9515 (vec_duplicate:V2DF
9516 (match_operand:DF 1 "nonimmediate_operand" " 0,xm,vm")))]
9517 "TARGET_SSE2 && <mask_avx512vl_condition>"
9518 "@
9519 unpcklpd\t%0, %0
9520 %vmovddup\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}
9521 vmovddup\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
9522 [(set_attr "isa" "noavx,sse3,avx512vl")
9523 (set_attr "type" "sselog1")
9524 (set_attr "prefix" "orig,maybe_vex,evex")
9525 (set_attr "mode" "V2DF,DF,DF")])
9526
9527 (define_insn "vec_concatv2df"
9528 [(set (match_operand:V2DF 0 "register_operand" "=x,x,v,x,v,x,x, v,x,x")
9529 (vec_concat:V2DF
9530 (match_operand:DF 1 "nonimmediate_operand" " 0,x,v,m,m,0,x,vm,0,0")
9531 (match_operand:DF 2 "nonimm_or_0_operand" " x,x,v,1,1,m,m, C,x,m")))]
9532 "TARGET_SSE
9533 && (!(MEM_P (operands[1]) && MEM_P (operands[2]))
9534 || (TARGET_SSE3 && rtx_equal_p (operands[1], operands[2])))"
9535 "@
9536 unpcklpd\t{%2, %0|%0, %2}
9537 vunpcklpd\t{%2, %1, %0|%0, %1, %2}
9538 vunpcklpd\t{%2, %1, %0|%0, %1, %2}
9539 %vmovddup\t{%1, %0|%0, %1}
9540 vmovddup\t{%1, %0|%0, %1}
9541 movhpd\t{%2, %0|%0, %2}
9542 vmovhpd\t{%2, %1, %0|%0, %1, %2}
9543 %vmovq\t{%1, %0|%0, %1}
9544 movlhps\t{%2, %0|%0, %2}
9545 movhps\t{%2, %0|%0, %2}"
9546 [(set (attr "isa")
9547 (cond [(eq_attr "alternative" "0,5")
9548 (const_string "sse2_noavx")
9549 (eq_attr "alternative" "1,6")
9550 (const_string "avx")
9551 (eq_attr "alternative" "2,4")
9552 (const_string "avx512vl")
9553 (eq_attr "alternative" "3")
9554 (const_string "sse3")
9555 (eq_attr "alternative" "7")
9556 (const_string "sse2")
9557 ]
9558 (const_string "noavx")))
9559 (set (attr "type")
9560 (if_then_else
9561 (eq_attr "alternative" "0,1,2,3,4")
9562 (const_string "sselog")
9563 (const_string "ssemov")))
9564 (set (attr "prefix_data16")
9565 (if_then_else (eq_attr "alternative" "5")
9566 (const_string "1")
9567 (const_string "*")))
9568 (set (attr "prefix")
9569 (cond [(eq_attr "alternative" "1,6")
9570 (const_string "vex")
9571 (eq_attr "alternative" "2,4")
9572 (const_string "evex")
9573 (eq_attr "alternative" "3,7")
9574 (const_string "maybe_vex")
9575 ]
9576 (const_string "orig")))
9577 (set_attr "mode" "V2DF,V2DF,V2DF, DF, DF, V1DF,V1DF,DF,V4SF,V2SF")])
9578
9579 ;; vmovq clears also the higher bits.
9580 (define_insn "vec_set<mode>_0"
9581 [(set (match_operand:VF2_512_256 0 "register_operand" "=v")
9582 (vec_merge:VF2_512_256
9583 (vec_duplicate:VF2_512_256
9584 (match_operand:<ssescalarmode> 2 "general_operand" "xm"))
9585 (match_operand:VF2_512_256 1 "const0_operand" "C")
9586 (const_int 1)))]
9587 "TARGET_AVX"
9588 "vmovq\t{%2, %x0|%x0, %2}"
9589 [(set_attr "type" "ssemov")
9590 (set_attr "prefix" "maybe_evex")
9591 (set_attr "mode" "DF")])
9592
9593 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
9594 ;;
9595 ;; Parallel integer down-conversion operations
9596 ;;
9597 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
9598
9599 (define_mode_iterator PMOV_DST_MODE_1 [V16QI V16HI V8SI V8HI])
9600 (define_mode_attr pmov_src_mode
9601 [(V16QI "V16SI") (V16HI "V16SI") (V8SI "V8DI") (V8HI "V8DI")])
9602 (define_mode_attr pmov_src_lower
9603 [(V16QI "v16si") (V16HI "v16si") (V8SI "v8di") (V8HI "v8di")])
9604 (define_mode_attr pmov_suff_1
9605 [(V16QI "db") (V16HI "dw") (V8SI "qd") (V8HI "qw")])
9606
9607 (define_insn "*avx512f_<code><pmov_src_lower><mode>2"
9608 [(set (match_operand:PMOV_DST_MODE_1 0 "nonimmediate_operand" "=v,m")
9609 (any_truncate:PMOV_DST_MODE_1
9610 (match_operand:<pmov_src_mode> 1 "register_operand" "v,v")))]
9611 "TARGET_AVX512F"
9612 "vpmov<trunsuffix><pmov_suff_1>\t{%1, %0|%0, %1}"
9613 [(set_attr "type" "ssemov")
9614 (set_attr "memory" "none,store")
9615 (set_attr "prefix" "evex")
9616 (set_attr "mode" "<sseinsnmode>")])
9617
9618 (define_insn "avx512f_<code><pmov_src_lower><mode>2_mask"
9619 [(set (match_operand:PMOV_DST_MODE_1 0 "nonimmediate_operand" "=v,m")
9620 (vec_merge:PMOV_DST_MODE_1
9621 (any_truncate:PMOV_DST_MODE_1
9622 (match_operand:<pmov_src_mode> 1 "register_operand" "v,v"))
9623 (match_operand:PMOV_DST_MODE_1 2 "nonimm_or_0_operand" "0C,0")
9624 (match_operand:<avx512fmaskmode> 3 "register_operand" "Yk,Yk")))]
9625 "TARGET_AVX512F"
9626 "vpmov<trunsuffix><pmov_suff_1>\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}"
9627 [(set_attr "type" "ssemov")
9628 (set_attr "memory" "none,store")
9629 (set_attr "prefix" "evex")
9630 (set_attr "mode" "<sseinsnmode>")])
9631
9632 (define_expand "avx512f_<code><pmov_src_lower><mode>2_mask_store"
9633 [(set (match_operand:PMOV_DST_MODE_1 0 "memory_operand")
9634 (vec_merge:PMOV_DST_MODE_1
9635 (any_truncate:PMOV_DST_MODE_1
9636 (match_operand:<pmov_src_mode> 1 "register_operand"))
9637 (match_dup 0)
9638 (match_operand:<avx512fmaskmode> 2 "register_operand")))]
9639 "TARGET_AVX512F")
9640
9641 (define_insn "avx512bw_<code>v32hiv32qi2"
9642 [(set (match_operand:V32QI 0 "nonimmediate_operand" "=v,m")
9643 (any_truncate:V32QI
9644 (match_operand:V32HI 1 "register_operand" "v,v")))]
9645 "TARGET_AVX512BW"
9646 "vpmov<trunsuffix>wb\t{%1, %0|%0, %1}"
9647 [(set_attr "type" "ssemov")
9648 (set_attr "memory" "none,store")
9649 (set_attr "prefix" "evex")
9650 (set_attr "mode" "XI")])
9651
9652 (define_insn "avx512bw_<code>v32hiv32qi2_mask"
9653 [(set (match_operand:V32QI 0 "nonimmediate_operand" "=v,m")
9654 (vec_merge:V32QI
9655 (any_truncate:V32QI
9656 (match_operand:V32HI 1 "register_operand" "v,v"))
9657 (match_operand:V32QI 2 "nonimm_or_0_operand" "0C,0")
9658 (match_operand:SI 3 "register_operand" "Yk,Yk")))]
9659 "TARGET_AVX512BW"
9660 "vpmov<trunsuffix>wb\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}"
9661 [(set_attr "type" "ssemov")
9662 (set_attr "memory" "none,store")
9663 (set_attr "prefix" "evex")
9664 (set_attr "mode" "XI")])
9665
9666 (define_expand "avx512bw_<code>v32hiv32qi2_mask_store"
9667 [(set (match_operand:V32QI 0 "nonimmediate_operand")
9668 (vec_merge:V32QI
9669 (any_truncate:V32QI
9670 (match_operand:V32HI 1 "register_operand"))
9671 (match_dup 0)
9672 (match_operand:SI 2 "register_operand")))]
9673 "TARGET_AVX512BW")
9674
9675 (define_mode_iterator PMOV_DST_MODE_2
9676 [V4SI V8HI (V16QI "TARGET_AVX512BW")])
9677 (define_mode_attr pmov_suff_2
9678 [(V16QI "wb") (V8HI "dw") (V4SI "qd")])
9679
9680 (define_insn "*avx512vl_<code><ssedoublemodelower><mode>2"
9681 [(set (match_operand:PMOV_DST_MODE_2 0 "nonimmediate_operand" "=v,m")
9682 (any_truncate:PMOV_DST_MODE_2
9683 (match_operand:<ssedoublemode> 1 "register_operand" "v,v")))]
9684 "TARGET_AVX512VL"
9685 "vpmov<trunsuffix><pmov_suff_2>\t{%1, %0|%0, %1}"
9686 [(set_attr "type" "ssemov")
9687 (set_attr "memory" "none,store")
9688 (set_attr "prefix" "evex")
9689 (set_attr "mode" "<sseinsnmode>")])
9690
9691 (define_insn "<avx512>_<code><ssedoublemodelower><mode>2_mask"
9692 [(set (match_operand:PMOV_DST_MODE_2 0 "nonimmediate_operand" "=v,m")
9693 (vec_merge:PMOV_DST_MODE_2
9694 (any_truncate:PMOV_DST_MODE_2
9695 (match_operand:<ssedoublemode> 1 "register_operand" "v,v"))
9696 (match_operand:PMOV_DST_MODE_2 2 "nonimm_or_0_operand" "0C,0")
9697 (match_operand:<avx512fmaskmode> 3 "register_operand" "Yk,Yk")))]
9698 "TARGET_AVX512VL"
9699 "vpmov<trunsuffix><pmov_suff_2>\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}"
9700 [(set_attr "type" "ssemov")
9701 (set_attr "memory" "none,store")
9702 (set_attr "prefix" "evex")
9703 (set_attr "mode" "<sseinsnmode>")])
9704
9705 (define_expand "<avx512>_<code><ssedoublemodelower><mode>2_mask_store"
9706 [(set (match_operand:PMOV_DST_MODE_2 0 "nonimmediate_operand")
9707 (vec_merge:PMOV_DST_MODE_2
9708 (any_truncate:PMOV_DST_MODE_2
9709 (match_operand:<ssedoublemode> 1 "register_operand"))
9710 (match_dup 0)
9711 (match_operand:<avx512fmaskmode> 2 "register_operand")))]
9712 "TARGET_AVX512VL")
9713
9714 (define_mode_iterator PMOV_SRC_MODE_3 [V4DI V2DI V8SI V4SI (V8HI "TARGET_AVX512BW")])
9715 (define_mode_attr pmov_dst_3
9716 [(V4DI "V4QI") (V2DI "V2QI") (V8SI "V8QI") (V4SI "V4QI") (V8HI "V8QI")])
9717 (define_mode_attr pmov_dst_zeroed_3
9718 [(V4DI "V12QI") (V2DI "V14QI") (V8SI "V8QI") (V4SI "V12QI") (V8HI "V8QI")])
9719 (define_mode_attr pmov_suff_3
9720 [(V4DI "qb") (V2DI "qb") (V8SI "db") (V4SI "db") (V8HI "wb")])
9721
9722 (define_insn "*avx512vl_<code><mode>v<ssescalarnum>qi2"
9723 [(set (match_operand:V16QI 0 "register_operand" "=v")
9724 (vec_concat:V16QI
9725 (any_truncate:<pmov_dst_3>
9726 (match_operand:PMOV_SRC_MODE_3 1 "register_operand" "v"))
9727 (match_operand:<pmov_dst_zeroed_3> 2 "const0_operand")))]
9728 "TARGET_AVX512VL"
9729 "vpmov<trunsuffix><pmov_suff_3>\t{%1, %0|%0, %1}"
9730 [(set_attr "type" "ssemov")
9731 (set_attr "prefix" "evex")
9732 (set_attr "mode" "TI")])
9733
9734 (define_insn "*avx512vl_<code>v2div2qi2_store"
9735 [(set (match_operand:V16QI 0 "memory_operand" "=m")
9736 (vec_concat:V16QI
9737 (any_truncate:V2QI
9738 (match_operand:V2DI 1 "register_operand" "v"))
9739 (vec_select:V14QI
9740 (match_dup 0)
9741 (parallel [(const_int 2) (const_int 3)
9742 (const_int 4) (const_int 5)
9743 (const_int 6) (const_int 7)
9744 (const_int 8) (const_int 9)
9745 (const_int 10) (const_int 11)
9746 (const_int 12) (const_int 13)
9747 (const_int 14) (const_int 15)]))))]
9748 "TARGET_AVX512VL"
9749 "vpmov<trunsuffix>qb\t{%1, %0|%w0, %1}"
9750 [(set_attr "type" "ssemov")
9751 (set_attr "memory" "store")
9752 (set_attr "prefix" "evex")
9753 (set_attr "mode" "TI")])
9754
9755 (define_insn "avx512vl_<code>v2div2qi2_mask"
9756 [(set (match_operand:V16QI 0 "register_operand" "=v")
9757 (vec_concat:V16QI
9758 (vec_merge:V2QI
9759 (any_truncate:V2QI
9760 (match_operand:V2DI 1 "register_operand" "v"))
9761 (vec_select:V2QI
9762 (match_operand:V16QI 2 "nonimm_or_0_operand" "0C")
9763 (parallel [(const_int 0) (const_int 1)]))
9764 (match_operand:QI 3 "register_operand" "Yk"))
9765 (const_vector:V14QI [(const_int 0) (const_int 0)
9766 (const_int 0) (const_int 0)
9767 (const_int 0) (const_int 0)
9768 (const_int 0) (const_int 0)
9769 (const_int 0) (const_int 0)
9770 (const_int 0) (const_int 0)
9771 (const_int 0) (const_int 0)])))]
9772 "TARGET_AVX512VL"
9773 "vpmov<trunsuffix>qb\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}"
9774 [(set_attr "type" "ssemov")
9775 (set_attr "prefix" "evex")
9776 (set_attr "mode" "TI")])
9777
9778 (define_insn "*avx512vl_<code>v2div2qi2_mask_1"
9779 [(set (match_operand:V16QI 0 "register_operand" "=v")
9780 (vec_concat:V16QI
9781 (vec_merge:V2QI
9782 (any_truncate:V2QI
9783 (match_operand:V2DI 1 "register_operand" "v"))
9784 (const_vector:V2QI [(const_int 0) (const_int 0)])
9785 (match_operand:QI 2 "register_operand" "Yk"))
9786 (const_vector:V14QI [(const_int 0) (const_int 0)
9787 (const_int 0) (const_int 0)
9788 (const_int 0) (const_int 0)
9789 (const_int 0) (const_int 0)
9790 (const_int 0) (const_int 0)
9791 (const_int 0) (const_int 0)
9792 (const_int 0) (const_int 0)])))]
9793 "TARGET_AVX512VL"
9794 "vpmov<trunsuffix>qb\t{%1, %0%{%2%}%{z%}|%0%{%2%}%{z%}, %1}"
9795 [(set_attr "type" "ssemov")
9796 (set_attr "prefix" "evex")
9797 (set_attr "mode" "TI")])
9798
9799 (define_insn "avx512vl_<code>v2div2qi2_mask_store"
9800 [(set (match_operand:V16QI 0 "memory_operand" "=m")
9801 (vec_concat:V16QI
9802 (vec_merge:V2QI
9803 (any_truncate:V2QI
9804 (match_operand:V2DI 1 "register_operand" "v"))
9805 (vec_select:V2QI
9806 (match_dup 0)
9807 (parallel [(const_int 0) (const_int 1)]))
9808 (match_operand:QI 2 "register_operand" "Yk"))
9809 (vec_select:V14QI
9810 (match_dup 0)
9811 (parallel [(const_int 2) (const_int 3)
9812 (const_int 4) (const_int 5)
9813 (const_int 6) (const_int 7)
9814 (const_int 8) (const_int 9)
9815 (const_int 10) (const_int 11)
9816 (const_int 12) (const_int 13)
9817 (const_int 14) (const_int 15)]))))]
9818 "TARGET_AVX512VL"
9819 "vpmov<trunsuffix>qb\t{%1, %0%{%2%}|%w0%{%2%}, %1}"
9820 [(set_attr "type" "ssemov")
9821 (set_attr "memory" "store")
9822 (set_attr "prefix" "evex")
9823 (set_attr "mode" "TI")])
9824
9825 (define_insn "*avx512vl_<code><mode>v4qi2_store"
9826 [(set (match_operand:V16QI 0 "memory_operand" "=m")
9827 (vec_concat:V16QI
9828 (any_truncate:V4QI
9829 (match_operand:VI4_128_8_256 1 "register_operand" "v"))
9830 (vec_select:V12QI
9831 (match_dup 0)
9832 (parallel [(const_int 4) (const_int 5)
9833 (const_int 6) (const_int 7)
9834 (const_int 8) (const_int 9)
9835 (const_int 10) (const_int 11)
9836 (const_int 12) (const_int 13)
9837 (const_int 14) (const_int 15)]))))]
9838 "TARGET_AVX512VL"
9839 "vpmov<trunsuffix><pmov_suff_3>\t{%1, %0|%k0, %1}"
9840 [(set_attr "type" "ssemov")
9841 (set_attr "memory" "store")
9842 (set_attr "prefix" "evex")
9843 (set_attr "mode" "TI")])
9844
9845 (define_insn "avx512vl_<code><mode>v4qi2_mask"
9846 [(set (match_operand:V16QI 0 "register_operand" "=v")
9847 (vec_concat:V16QI
9848 (vec_merge:V4QI
9849 (any_truncate:V4QI
9850 (match_operand:VI4_128_8_256 1 "register_operand" "v"))
9851 (vec_select:V4QI
9852 (match_operand:V16QI 2 "nonimm_or_0_operand" "0C")
9853 (parallel [(const_int 0) (const_int 1)
9854 (const_int 2) (const_int 3)]))
9855 (match_operand:QI 3 "register_operand" "Yk"))
9856 (const_vector:V12QI [(const_int 0) (const_int 0)
9857 (const_int 0) (const_int 0)
9858 (const_int 0) (const_int 0)
9859 (const_int 0) (const_int 0)
9860 (const_int 0) (const_int 0)
9861 (const_int 0) (const_int 0)])))]
9862 "TARGET_AVX512VL"
9863 "vpmov<trunsuffix><pmov_suff_3>\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}"
9864 [(set_attr "type" "ssemov")
9865 (set_attr "prefix" "evex")
9866 (set_attr "mode" "TI")])
9867
9868 (define_insn "*avx512vl_<code><mode>v4qi2_mask_1"
9869 [(set (match_operand:V16QI 0 "register_operand" "=v")
9870 (vec_concat:V16QI
9871 (vec_merge:V4QI
9872 (any_truncate:V4QI
9873 (match_operand:VI4_128_8_256 1 "register_operand" "v"))
9874 (const_vector:V4QI [(const_int 0) (const_int 0)
9875 (const_int 0) (const_int 0)])
9876 (match_operand:QI 2 "register_operand" "Yk"))
9877 (const_vector:V12QI [(const_int 0) (const_int 0)
9878 (const_int 0) (const_int 0)
9879 (const_int 0) (const_int 0)
9880 (const_int 0) (const_int 0)
9881 (const_int 0) (const_int 0)
9882 (const_int 0) (const_int 0)])))]
9883 "TARGET_AVX512VL"
9884 "vpmov<trunsuffix><pmov_suff_3>\t{%1, %0%{%2%}%{z%}|%0%{%2%}%{z%}, %1}"
9885 [(set_attr "type" "ssemov")
9886 (set_attr "prefix" "evex")
9887 (set_attr "mode" "TI")])
9888
9889 (define_insn "avx512vl_<code><mode>v4qi2_mask_store"
9890 [(set (match_operand:V16QI 0 "memory_operand" "=m")
9891 (vec_concat:V16QI
9892 (vec_merge:V4QI
9893 (any_truncate:V4QI
9894 (match_operand:VI4_128_8_256 1 "register_operand" "v"))
9895 (vec_select:V4QI
9896 (match_dup 0)
9897 (parallel [(const_int 0) (const_int 1)
9898 (const_int 2) (const_int 3)]))
9899 (match_operand:QI 2 "register_operand" "Yk"))
9900 (vec_select:V12QI
9901 (match_dup 0)
9902 (parallel [(const_int 4) (const_int 5)
9903 (const_int 6) (const_int 7)
9904 (const_int 8) (const_int 9)
9905 (const_int 10) (const_int 11)
9906 (const_int 12) (const_int 13)
9907 (const_int 14) (const_int 15)]))))]
9908 "TARGET_AVX512VL"
9909 "vpmov<trunsuffix><pmov_suff_3>\t{%1, %0%{%2%}|%k0%{%2%}, %1}"
9910 [(set_attr "type" "ssemov")
9911 (set_attr "memory" "store")
9912 (set_attr "prefix" "evex")
9913 (set_attr "mode" "TI")])
9914
9915 (define_mode_iterator VI2_128_BW_4_256
9916 [(V8HI "TARGET_AVX512BW") V8SI])
9917
9918 (define_insn "*avx512vl_<code><mode>v8qi2_store"
9919 [(set (match_operand:V16QI 0 "memory_operand" "=m")
9920 (vec_concat:V16QI
9921 (any_truncate:V8QI
9922 (match_operand:VI2_128_BW_4_256 1 "register_operand" "v"))
9923 (vec_select:V8QI
9924 (match_dup 0)
9925 (parallel [(const_int 8) (const_int 9)
9926 (const_int 10) (const_int 11)
9927 (const_int 12) (const_int 13)
9928 (const_int 14) (const_int 15)]))))]
9929 "TARGET_AVX512VL"
9930 "vpmov<trunsuffix><pmov_suff_3>\t{%1, %0|%q0, %1}"
9931 [(set_attr "type" "ssemov")
9932 (set_attr "memory" "store")
9933 (set_attr "prefix" "evex")
9934 (set_attr "mode" "TI")])
9935
9936 (define_insn "avx512vl_<code><mode>v8qi2_mask"
9937 [(set (match_operand:V16QI 0 "register_operand" "=v")
9938 (vec_concat:V16QI
9939 (vec_merge:V8QI
9940 (any_truncate:V8QI
9941 (match_operand:VI2_128_BW_4_256 1 "register_operand" "v"))
9942 (vec_select:V8QI
9943 (match_operand:V16QI 2 "nonimm_or_0_operand" "0C")
9944 (parallel [(const_int 0) (const_int 1)
9945 (const_int 2) (const_int 3)
9946 (const_int 4) (const_int 5)
9947 (const_int 6) (const_int 7)]))
9948 (match_operand:QI 3 "register_operand" "Yk"))
9949 (const_vector:V8QI [(const_int 0) (const_int 0)
9950 (const_int 0) (const_int 0)
9951 (const_int 0) (const_int 0)
9952 (const_int 0) (const_int 0)])))]
9953 "TARGET_AVX512VL"
9954 "vpmov<trunsuffix><pmov_suff_3>\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}"
9955 [(set_attr "type" "ssemov")
9956 (set_attr "prefix" "evex")
9957 (set_attr "mode" "TI")])
9958
9959 (define_insn "*avx512vl_<code><mode>v8qi2_mask_1"
9960 [(set (match_operand:V16QI 0 "register_operand" "=v")
9961 (vec_concat:V16QI
9962 (vec_merge:V8QI
9963 (any_truncate:V8QI
9964 (match_operand:VI2_128_BW_4_256 1 "register_operand" "v"))
9965 (const_vector:V8QI [(const_int 0) (const_int 0)
9966 (const_int 0) (const_int 0)
9967 (const_int 0) (const_int 0)
9968 (const_int 0) (const_int 0)])
9969 (match_operand:QI 2 "register_operand" "Yk"))
9970 (const_vector:V8QI [(const_int 0) (const_int 0)
9971 (const_int 0) (const_int 0)
9972 (const_int 0) (const_int 0)
9973 (const_int 0) (const_int 0)])))]
9974 "TARGET_AVX512VL"
9975 "vpmov<trunsuffix><pmov_suff_3>\t{%1, %0%{%2%}%{z%}|%0%{%2%}%{z%}, %1}"
9976 [(set_attr "type" "ssemov")
9977 (set_attr "prefix" "evex")
9978 (set_attr "mode" "TI")])
9979
9980 (define_insn "avx512vl_<code><mode>v8qi2_mask_store"
9981 [(set (match_operand:V16QI 0 "memory_operand" "=m")
9982 (vec_concat:V16QI
9983 (vec_merge:V8QI
9984 (any_truncate:V8QI
9985 (match_operand:VI2_128_BW_4_256 1 "register_operand" "v"))
9986 (vec_select:V8QI
9987 (match_dup 0)
9988 (parallel [(const_int 0) (const_int 1)
9989 (const_int 2) (const_int 3)
9990 (const_int 4) (const_int 5)
9991 (const_int 6) (const_int 7)]))
9992 (match_operand:QI 2 "register_operand" "Yk"))
9993 (vec_select:V8QI
9994 (match_dup 0)
9995 (parallel [(const_int 8) (const_int 9)
9996 (const_int 10) (const_int 11)
9997 (const_int 12) (const_int 13)
9998 (const_int 14) (const_int 15)]))))]
9999 "TARGET_AVX512VL"
10000 "vpmov<trunsuffix><pmov_suff_3>\t{%1, %0%{%2%}|%q0%{%2%}, %1}"
10001 [(set_attr "type" "ssemov")
10002 (set_attr "memory" "store")
10003 (set_attr "prefix" "evex")
10004 (set_attr "mode" "TI")])
10005
10006 (define_mode_iterator PMOV_SRC_MODE_4 [V4DI V2DI V4SI])
10007 (define_mode_attr pmov_dst_4
10008 [(V4DI "V4HI") (V2DI "V2HI") (V4SI "V4HI")])
10009 (define_mode_attr pmov_dst_zeroed_4
10010 [(V4DI "V4HI") (V2DI "V6HI") (V4SI "V4HI")])
10011 (define_mode_attr pmov_suff_4
10012 [(V4DI "qw") (V2DI "qw") (V4SI "dw")])
10013
10014 (define_insn "*avx512vl_<code><mode>v<ssescalarnum>hi2"
10015 [(set (match_operand:V8HI 0 "register_operand" "=v")
10016 (vec_concat:V8HI
10017 (any_truncate:<pmov_dst_4>
10018 (match_operand:PMOV_SRC_MODE_4 1 "register_operand" "v"))
10019 (match_operand:<pmov_dst_zeroed_4> 2 "const0_operand")))]
10020 "TARGET_AVX512VL"
10021 "vpmov<trunsuffix><pmov_suff_4>\t{%1, %0|%0, %1}"
10022 [(set_attr "type" "ssemov")
10023 (set_attr "prefix" "evex")
10024 (set_attr "mode" "TI")])
10025
10026 (define_insn "*avx512vl_<code><mode>v4hi2_store"
10027 [(set (match_operand:V8HI 0 "memory_operand" "=m")
10028 (vec_concat:V8HI
10029 (any_truncate:V4HI
10030 (match_operand:VI4_128_8_256 1 "register_operand" "v"))
10031 (vec_select:V4HI
10032 (match_dup 0)
10033 (parallel [(const_int 4) (const_int 5)
10034 (const_int 6) (const_int 7)]))))]
10035 "TARGET_AVX512VL"
10036 "vpmov<trunsuffix><pmov_suff_4>\t{%1, %0|%0, %1}"
10037 [(set_attr "type" "ssemov")
10038 (set_attr "memory" "store")
10039 (set_attr "prefix" "evex")
10040 (set_attr "mode" "TI")])
10041
10042 (define_insn "avx512vl_<code><mode>v4hi2_mask"
10043 [(set (match_operand:V8HI 0 "register_operand" "=v")
10044 (vec_concat:V8HI
10045 (vec_merge:V4HI
10046 (any_truncate:V4HI
10047 (match_operand:VI4_128_8_256 1 "register_operand" "v"))
10048 (vec_select:V4HI
10049 (match_operand:V8HI 2 "nonimm_or_0_operand" "0C")
10050 (parallel [(const_int 0) (const_int 1)
10051 (const_int 2) (const_int 3)]))
10052 (match_operand:QI 3 "register_operand" "Yk"))
10053 (const_vector:V4HI [(const_int 0) (const_int 0)
10054 (const_int 0) (const_int 0)])))]
10055 "TARGET_AVX512VL"
10056 "vpmov<trunsuffix><pmov_suff_4>\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}"
10057 [(set_attr "type" "ssemov")
10058 (set_attr "prefix" "evex")
10059 (set_attr "mode" "TI")])
10060
10061 (define_insn "*avx512vl_<code><mode>v4hi2_mask_1"
10062 [(set (match_operand:V8HI 0 "register_operand" "=v")
10063 (vec_concat:V8HI
10064 (vec_merge:V4HI
10065 (any_truncate:V4HI
10066 (match_operand:VI4_128_8_256 1 "register_operand" "v"))
10067 (const_vector:V4HI [(const_int 0) (const_int 0)
10068 (const_int 0) (const_int 0)])
10069 (match_operand:QI 2 "register_operand" "Yk"))
10070 (const_vector:V4HI [(const_int 0) (const_int 0)
10071 (const_int 0) (const_int 0)])))]
10072 "TARGET_AVX512VL"
10073 "vpmov<trunsuffix><pmov_suff_4>\t{%1, %0%{%2%}%{z%}|%0%{%2%}%{z%}, %1}"
10074 [(set_attr "type" "ssemov")
10075 (set_attr "prefix" "evex")
10076 (set_attr "mode" "TI")])
10077
10078 (define_insn "avx512vl_<code><mode>v4hi2_mask_store"
10079 [(set (match_operand:V8HI 0 "memory_operand" "=m")
10080 (vec_concat:V8HI
10081 (vec_merge:V4HI
10082 (any_truncate:V4HI
10083 (match_operand:VI4_128_8_256 1 "register_operand" "v"))
10084 (vec_select:V4HI
10085 (match_dup 0)
10086 (parallel [(const_int 0) (const_int 1)
10087 (const_int 2) (const_int 3)]))
10088 (match_operand:QI 2 "register_operand" "Yk"))
10089 (vec_select:V4HI
10090 (match_dup 0)
10091 (parallel [(const_int 4) (const_int 5)
10092 (const_int 6) (const_int 7)]))))]
10093 "TARGET_AVX512VL"
10094 {
10095 if (GET_MODE_SIZE (GET_MODE_INNER (<MODE>mode)) == 4)
10096 return "vpmov<trunsuffix><pmov_suff_4>\t{%1, %0%{%2%}|%0%{%2%}, %t1}";
10097 return "vpmov<trunsuffix><pmov_suff_4>\t{%1, %0%{%2%}|%0%{%2%}, %g1}";
10098 }
10099 [(set_attr "type" "ssemov")
10100 (set_attr "memory" "store")
10101 (set_attr "prefix" "evex")
10102 (set_attr "mode" "TI")])
10103
10104 (define_insn "*avx512vl_<code>v2div2hi2_store"
10105 [(set (match_operand:V8HI 0 "memory_operand" "=m")
10106 (vec_concat:V8HI
10107 (any_truncate:V2HI
10108 (match_operand:V2DI 1 "register_operand" "v"))
10109 (vec_select:V6HI
10110 (match_dup 0)
10111 (parallel [(const_int 2) (const_int 3)
10112 (const_int 4) (const_int 5)
10113 (const_int 6) (const_int 7)]))))]
10114 "TARGET_AVX512VL"
10115 "vpmov<trunsuffix>qw\t{%1, %0|%0, %1}"
10116 [(set_attr "type" "ssemov")
10117 (set_attr "memory" "store")
10118 (set_attr "prefix" "evex")
10119 (set_attr "mode" "TI")])
10120
10121 (define_insn "avx512vl_<code>v2div2hi2_mask"
10122 [(set (match_operand:V8HI 0 "register_operand" "=v")
10123 (vec_concat:V8HI
10124 (vec_merge:V2HI
10125 (any_truncate:V2HI
10126 (match_operand:V2DI 1 "register_operand" "v"))
10127 (vec_select:V2HI
10128 (match_operand:V8HI 2 "nonimm_or_0_operand" "0C")
10129 (parallel [(const_int 0) (const_int 1)]))
10130 (match_operand:QI 3 "register_operand" "Yk"))
10131 (const_vector:V6HI [(const_int 0) (const_int 0)
10132 (const_int 0) (const_int 0)
10133 (const_int 0) (const_int 0)])))]
10134 "TARGET_AVX512VL"
10135 "vpmov<trunsuffix>qw\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}"
10136 [(set_attr "type" "ssemov")
10137 (set_attr "prefix" "evex")
10138 (set_attr "mode" "TI")])
10139
10140 (define_insn "*avx512vl_<code>v2div2hi2_mask_1"
10141 [(set (match_operand:V8HI 0 "register_operand" "=v")
10142 (vec_concat:V8HI
10143 (vec_merge:V2HI
10144 (any_truncate:V2HI
10145 (match_operand:V2DI 1 "register_operand" "v"))
10146 (const_vector:V2HI [(const_int 0) (const_int 0)])
10147 (match_operand:QI 2 "register_operand" "Yk"))
10148 (const_vector:V6HI [(const_int 0) (const_int 0)
10149 (const_int 0) (const_int 0)
10150 (const_int 0) (const_int 0)])))]
10151 "TARGET_AVX512VL"
10152 "vpmov<trunsuffix>qw\t{%1, %0%{%2%}%{z%}|%0%{%2%}%{z%}, %1}"
10153 [(set_attr "type" "ssemov")
10154 (set_attr "prefix" "evex")
10155 (set_attr "mode" "TI")])
10156
10157 (define_insn "avx512vl_<code>v2div2hi2_mask_store"
10158 [(set (match_operand:V8HI 0 "memory_operand" "=m")
10159 (vec_concat:V8HI
10160 (vec_merge:V2HI
10161 (any_truncate:V2HI
10162 (match_operand:V2DI 1 "register_operand" "v"))
10163 (vec_select:V2HI
10164 (match_dup 0)
10165 (parallel [(const_int 0) (const_int 1)]))
10166 (match_operand:QI 2 "register_operand" "Yk"))
10167 (vec_select:V6HI
10168 (match_dup 0)
10169 (parallel [(const_int 2) (const_int 3)
10170 (const_int 4) (const_int 5)
10171 (const_int 6) (const_int 7)]))))]
10172 "TARGET_AVX512VL"
10173 "vpmov<trunsuffix>qw\t{%1, %0%{%2%}|%0%{%2%}, %g1}"
10174 [(set_attr "type" "ssemov")
10175 (set_attr "memory" "store")
10176 (set_attr "prefix" "evex")
10177 (set_attr "mode" "TI")])
10178
10179 (define_insn "*avx512vl_<code>v2div2si2"
10180 [(set (match_operand:V4SI 0 "register_operand" "=v")
10181 (vec_concat:V4SI
10182 (any_truncate:V2SI
10183 (match_operand:V2DI 1 "register_operand" "v"))
10184 (match_operand:V2SI 2 "const0_operand")))]
10185 "TARGET_AVX512VL"
10186 "vpmov<trunsuffix>qd\t{%1, %0|%0, %1}"
10187 [(set_attr "type" "ssemov")
10188 (set_attr "prefix" "evex")
10189 (set_attr "mode" "TI")])
10190
10191 (define_insn "*avx512vl_<code>v2div2si2_store"
10192 [(set (match_operand:V4SI 0 "memory_operand" "=m")
10193 (vec_concat:V4SI
10194 (any_truncate:V2SI
10195 (match_operand:V2DI 1 "register_operand" "v"))
10196 (vec_select:V2SI
10197 (match_dup 0)
10198 (parallel [(const_int 2) (const_int 3)]))))]
10199 "TARGET_AVX512VL"
10200 "vpmov<trunsuffix>qd\t{%1, %0|%0, %1}"
10201 [(set_attr "type" "ssemov")
10202 (set_attr "memory" "store")
10203 (set_attr "prefix" "evex")
10204 (set_attr "mode" "TI")])
10205
10206 (define_insn "avx512vl_<code>v2div2si2_mask"
10207 [(set (match_operand:V4SI 0 "register_operand" "=v")
10208 (vec_concat:V4SI
10209 (vec_merge:V2SI
10210 (any_truncate:V2SI
10211 (match_operand:V2DI 1 "register_operand" "v"))
10212 (vec_select:V2SI
10213 (match_operand:V4SI 2 "nonimm_or_0_operand" "0C")
10214 (parallel [(const_int 0) (const_int 1)]))
10215 (match_operand:QI 3 "register_operand" "Yk"))
10216 (const_vector:V2SI [(const_int 0) (const_int 0)])))]
10217 "TARGET_AVX512VL"
10218 "vpmov<trunsuffix>qd\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}"
10219 [(set_attr "type" "ssemov")
10220 (set_attr "prefix" "evex")
10221 (set_attr "mode" "TI")])
10222
10223 (define_insn "*avx512vl_<code>v2div2si2_mask_1"
10224 [(set (match_operand:V4SI 0 "register_operand" "=v")
10225 (vec_concat:V4SI
10226 (vec_merge:V2SI
10227 (any_truncate:V2SI
10228 (match_operand:V2DI 1 "register_operand" "v"))
10229 (const_vector:V2SI [(const_int 0) (const_int 0)])
10230 (match_operand:QI 2 "register_operand" "Yk"))
10231 (const_vector:V2SI [(const_int 0) (const_int 0)])))]
10232 "TARGET_AVX512VL"
10233 "vpmov<trunsuffix>qd\t{%1, %0%{%2%}%{z%}|%0%{%2%}%{z%}, %1}"
10234 [(set_attr "type" "ssemov")
10235 (set_attr "prefix" "evex")
10236 (set_attr "mode" "TI")])
10237
10238 (define_insn "avx512vl_<code>v2div2si2_mask_store"
10239 [(set (match_operand:V4SI 0 "memory_operand" "=m")
10240 (vec_concat:V4SI
10241 (vec_merge:V2SI
10242 (any_truncate:V2SI
10243 (match_operand:V2DI 1 "register_operand" "v"))
10244 (vec_select:V2SI
10245 (match_dup 0)
10246 (parallel [(const_int 0) (const_int 1)]))
10247 (match_operand:QI 2 "register_operand" "Yk"))
10248 (vec_select:V2SI
10249 (match_dup 0)
10250 (parallel [(const_int 2) (const_int 3)]))))]
10251 "TARGET_AVX512VL"
10252 "vpmov<trunsuffix>qd\t{%1, %0%{%2%}|%0%{%2%}, %t1}"
10253 [(set_attr "type" "ssemov")
10254 (set_attr "memory" "store")
10255 (set_attr "prefix" "evex")
10256 (set_attr "mode" "TI")])
10257
10258 (define_insn "*avx512f_<code>v8div16qi2"
10259 [(set (match_operand:V16QI 0 "register_operand" "=v")
10260 (vec_concat:V16QI
10261 (any_truncate:V8QI
10262 (match_operand:V8DI 1 "register_operand" "v"))
10263 (const_vector:V8QI [(const_int 0) (const_int 0)
10264 (const_int 0) (const_int 0)
10265 (const_int 0) (const_int 0)
10266 (const_int 0) (const_int 0)])))]
10267 "TARGET_AVX512F"
10268 "vpmov<trunsuffix>qb\t{%1, %0|%0, %1}"
10269 [(set_attr "type" "ssemov")
10270 (set_attr "prefix" "evex")
10271 (set_attr "mode" "TI")])
10272
10273 (define_insn "*avx512f_<code>v8div16qi2_store"
10274 [(set (match_operand:V16QI 0 "memory_operand" "=m")
10275 (vec_concat:V16QI
10276 (any_truncate:V8QI
10277 (match_operand:V8DI 1 "register_operand" "v"))
10278 (vec_select:V8QI
10279 (match_dup 0)
10280 (parallel [(const_int 8) (const_int 9)
10281 (const_int 10) (const_int 11)
10282 (const_int 12) (const_int 13)
10283 (const_int 14) (const_int 15)]))))]
10284 "TARGET_AVX512F"
10285 "vpmov<trunsuffix>qb\t{%1, %0|%0, %1}"
10286 [(set_attr "type" "ssemov")
10287 (set_attr "memory" "store")
10288 (set_attr "prefix" "evex")
10289 (set_attr "mode" "TI")])
10290
10291 (define_insn "avx512f_<code>v8div16qi2_mask"
10292 [(set (match_operand:V16QI 0 "register_operand" "=v")
10293 (vec_concat:V16QI
10294 (vec_merge:V8QI
10295 (any_truncate:V8QI
10296 (match_operand:V8DI 1 "register_operand" "v"))
10297 (vec_select:V8QI
10298 (match_operand:V16QI 2 "nonimm_or_0_operand" "0C")
10299 (parallel [(const_int 0) (const_int 1)
10300 (const_int 2) (const_int 3)
10301 (const_int 4) (const_int 5)
10302 (const_int 6) (const_int 7)]))
10303 (match_operand:QI 3 "register_operand" "Yk"))
10304 (const_vector:V8QI [(const_int 0) (const_int 0)
10305 (const_int 0) (const_int 0)
10306 (const_int 0) (const_int 0)
10307 (const_int 0) (const_int 0)])))]
10308 "TARGET_AVX512F"
10309 "vpmov<trunsuffix>qb\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}"
10310 [(set_attr "type" "ssemov")
10311 (set_attr "prefix" "evex")
10312 (set_attr "mode" "TI")])
10313
10314 (define_insn "*avx512f_<code>v8div16qi2_mask_1"
10315 [(set (match_operand:V16QI 0 "register_operand" "=v")
10316 (vec_concat:V16QI
10317 (vec_merge:V8QI
10318 (any_truncate:V8QI
10319 (match_operand:V8DI 1 "register_operand" "v"))
10320 (const_vector:V8QI [(const_int 0) (const_int 0)
10321 (const_int 0) (const_int 0)
10322 (const_int 0) (const_int 0)
10323 (const_int 0) (const_int 0)])
10324 (match_operand:QI 2 "register_operand" "Yk"))
10325 (const_vector:V8QI [(const_int 0) (const_int 0)
10326 (const_int 0) (const_int 0)
10327 (const_int 0) (const_int 0)
10328 (const_int 0) (const_int 0)])))]
10329 "TARGET_AVX512F"
10330 "vpmov<trunsuffix>qb\t{%1, %0%{%2%}%{z%}|%0%{%2%}%{z%}, %1}"
10331 [(set_attr "type" "ssemov")
10332 (set_attr "prefix" "evex")
10333 (set_attr "mode" "TI")])
10334
10335 (define_insn "avx512f_<code>v8div16qi2_mask_store"
10336 [(set (match_operand:V16QI 0 "memory_operand" "=m")
10337 (vec_concat:V16QI
10338 (vec_merge:V8QI
10339 (any_truncate:V8QI
10340 (match_operand:V8DI 1 "register_operand" "v"))
10341 (vec_select:V8QI
10342 (match_dup 0)
10343 (parallel [(const_int 0) (const_int 1)
10344 (const_int 2) (const_int 3)
10345 (const_int 4) (const_int 5)
10346 (const_int 6) (const_int 7)]))
10347 (match_operand:QI 2 "register_operand" "Yk"))
10348 (vec_select:V8QI
10349 (match_dup 0)
10350 (parallel [(const_int 8) (const_int 9)
10351 (const_int 10) (const_int 11)
10352 (const_int 12) (const_int 13)
10353 (const_int 14) (const_int 15)]))))]
10354 "TARGET_AVX512F"
10355 "vpmov<trunsuffix>qb\t{%1, %0%{%2%}|%q0%{%2%}, %1}"
10356 [(set_attr "type" "ssemov")
10357 (set_attr "memory" "store")
10358 (set_attr "prefix" "evex")
10359 (set_attr "mode" "TI")])
10360
10361 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
10362 ;;
10363 ;; Parallel integral arithmetic
10364 ;;
10365 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
10366
10367 (define_expand "neg<mode>2"
10368 [(set (match_operand:VI_AVX2 0 "register_operand")
10369 (minus:VI_AVX2
10370 (match_dup 2)
10371 (match_operand:VI_AVX2 1 "vector_operand")))]
10372 "TARGET_SSE2"
10373 "operands[2] = force_reg (<MODE>mode, CONST0_RTX (<MODE>mode));")
10374
10375 (define_expand "<plusminus_insn><mode>3"
10376 [(set (match_operand:VI_AVX2 0 "register_operand")
10377 (plusminus:VI_AVX2
10378 (match_operand:VI_AVX2 1 "vector_operand")
10379 (match_operand:VI_AVX2 2 "vector_operand")))]
10380 "TARGET_SSE2"
10381 "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
10382
10383 (define_expand "<plusminus_insn><mode>3_mask"
10384 [(set (match_operand:VI48_AVX512VL 0 "register_operand")
10385 (vec_merge:VI48_AVX512VL
10386 (plusminus:VI48_AVX512VL
10387 (match_operand:VI48_AVX512VL 1 "nonimmediate_operand")
10388 (match_operand:VI48_AVX512VL 2 "nonimmediate_operand"))
10389 (match_operand:VI48_AVX512VL 3 "nonimm_or_0_operand")
10390 (match_operand:<avx512fmaskmode> 4 "register_operand")))]
10391 "TARGET_AVX512F"
10392 "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
10393
10394 (define_expand "<plusminus_insn><mode>3_mask"
10395 [(set (match_operand:VI12_AVX512VL 0 "register_operand")
10396 (vec_merge:VI12_AVX512VL
10397 (plusminus:VI12_AVX512VL
10398 (match_operand:VI12_AVX512VL 1 "nonimmediate_operand")
10399 (match_operand:VI12_AVX512VL 2 "nonimmediate_operand"))
10400 (match_operand:VI12_AVX512VL 3 "nonimm_or_0_operand")
10401 (match_operand:<avx512fmaskmode> 4 "register_operand")))]
10402 "TARGET_AVX512BW"
10403 "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
10404
10405 (define_insn "*<plusminus_insn><mode>3"
10406 [(set (match_operand:VI_AVX2 0 "register_operand" "=x,v")
10407 (plusminus:VI_AVX2
10408 (match_operand:VI_AVX2 1 "vector_operand" "<comm>0,v")
10409 (match_operand:VI_AVX2 2 "vector_operand" "xBm,vm")))]
10410 "TARGET_SSE2 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
10411 "@
10412 p<plusminus_mnemonic><ssemodesuffix>\t{%2, %0|%0, %2}
10413 vp<plusminus_mnemonic><ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
10414 [(set_attr "isa" "noavx,avx")
10415 (set_attr "type" "sseiadd")
10416 (set_attr "prefix_data16" "1,*")
10417 (set_attr "prefix" "orig,vex")
10418 (set_attr "mode" "<sseinsnmode>")])
10419
10420 (define_insn "*sub<mode>3_bcst"
10421 [(set (match_operand:VI48_AVX512VL 0 "register_operand" "=v")
10422 (minus:VI48_AVX512VL
10423 (match_operand:VI48_AVX512VL 1 "register_operand" "v")
10424 (vec_duplicate:VI48_AVX512VL
10425 (match_operand:<ssescalarmode> 2 "memory_operand" "m"))))]
10426 "TARGET_AVX512F && ix86_binary_operator_ok (MINUS, <MODE>mode, operands)"
10427 "vpsub<ssemodesuffix>\t{%2<avx512bcst>, %1, %0|%0, %1, %2<avx512bcst>}"
10428 [(set_attr "type" "sseiadd")
10429 (set_attr "prefix" "evex")
10430 (set_attr "mode" "<sseinsnmode>")])
10431
10432 (define_insn "*add<mode>3_bcst"
10433 [(set (match_operand:VI48_AVX512VL 0 "register_operand" "=v")
10434 (plus:VI48_AVX512VL
10435 (vec_duplicate:VI48_AVX512VL
10436 (match_operand:<ssescalarmode> 1 "memory_operand" "m"))
10437 (match_operand:VI48_AVX512VL 2 "register_operand" "v")))]
10438 "TARGET_AVX512F && ix86_binary_operator_ok (PLUS, <MODE>mode, operands)"
10439 "vpadd<ssemodesuffix>\t{%1<avx512bcst>, %2, %0|%0, %2, %1<avx512bcst>}"
10440 [(set_attr "type" "sseiadd")
10441 (set_attr "prefix" "evex")
10442 (set_attr "mode" "<sseinsnmode>")])
10443
10444 (define_insn "*<plusminus_insn><mode>3_mask"
10445 [(set (match_operand:VI48_AVX512VL 0 "register_operand" "=v")
10446 (vec_merge:VI48_AVX512VL
10447 (plusminus:VI48_AVX512VL
10448 (match_operand:VI48_AVX512VL 1 "nonimmediate_operand" "<comm>v")
10449 (match_operand:VI48_AVX512VL 2 "nonimmediate_operand" "vm"))
10450 (match_operand:VI48_AVX512VL 3 "nonimm_or_0_operand" "0C")
10451 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")))]
10452 "TARGET_AVX512F && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
10453 "vp<plusminus_mnemonic><ssemodesuffix>\t{%2, %1, %0%{%4%}%N3|%0%{%4%}%N3, %1, %2}"
10454 [(set_attr "type" "sseiadd")
10455 (set_attr "prefix" "evex")
10456 (set_attr "mode" "<sseinsnmode>")])
10457
10458 (define_insn "*<plusminus_insn><mode>3_mask"
10459 [(set (match_operand:VI12_AVX512VL 0 "register_operand" "=v")
10460 (vec_merge:VI12_AVX512VL
10461 (plusminus:VI12_AVX512VL
10462 (match_operand:VI12_AVX512VL 1 "nonimmediate_operand" "<comm>v")
10463 (match_operand:VI12_AVX512VL 2 "nonimmediate_operand" "vm"))
10464 (match_operand:VI12_AVX512VL 3 "nonimm_or_0_operand" "0C")
10465 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")))]
10466 "TARGET_AVX512BW && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
10467 "vp<plusminus_mnemonic><ssemodesuffix>\t{%2, %1, %0%{%4%}%N3|%0%{%4%}%N3, %1, %2}"
10468 [(set_attr "type" "sseiadd")
10469 (set_attr "prefix" "evex")
10470 (set_attr "mode" "<sseinsnmode>")])
10471
10472 (define_expand "<sse2_avx2>_<plusminus_insn><mode>3<mask_name>"
10473 [(set (match_operand:VI12_AVX2 0 "register_operand")
10474 (sat_plusminus:VI12_AVX2
10475 (match_operand:VI12_AVX2 1 "vector_operand")
10476 (match_operand:VI12_AVX2 2 "vector_operand")))]
10477 "TARGET_SSE2 && <mask_mode512bit_condition> && <mask_avx512bw_condition>"
10478 "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
10479
10480 (define_insn "*<sse2_avx2>_<plusminus_insn><mode>3<mask_name>"
10481 [(set (match_operand:VI12_AVX2 0 "register_operand" "=x,v")
10482 (sat_plusminus:VI12_AVX2
10483 (match_operand:VI12_AVX2 1 "vector_operand" "<comm>0,v")
10484 (match_operand:VI12_AVX2 2 "vector_operand" "xBm,vm")))]
10485 "TARGET_SSE2 && <mask_mode512bit_condition> && <mask_avx512bw_condition>
10486 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
10487 "@
10488 p<plusminus_mnemonic><ssemodesuffix>\t{%2, %0|%0, %2}
10489 vp<plusminus_mnemonic><ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
10490 [(set_attr "isa" "noavx,avx")
10491 (set_attr "type" "sseiadd")
10492 (set_attr "prefix_data16" "1,*")
10493 (set_attr "prefix" "orig,maybe_evex")
10494 (set_attr "mode" "TI")])
10495
10496 (define_expand "mul<mode>3<mask_name>"
10497 [(set (match_operand:VI1_AVX512 0 "register_operand")
10498 (mult:VI1_AVX512 (match_operand:VI1_AVX512 1 "register_operand")
10499 (match_operand:VI1_AVX512 2 "register_operand")))]
10500 "TARGET_SSE2 && <mask_mode512bit_condition> && <mask_avx512bw_condition>"
10501 {
10502 ix86_expand_vecop_qihi (MULT, operands[0], operands[1], operands[2]);
10503 DONE;
10504 })
10505
10506 (define_expand "mul<mode>3<mask_name>"
10507 [(set (match_operand:VI2_AVX2 0 "register_operand")
10508 (mult:VI2_AVX2 (match_operand:VI2_AVX2 1 "vector_operand")
10509 (match_operand:VI2_AVX2 2 "vector_operand")))]
10510 "TARGET_SSE2 && <mask_mode512bit_condition> && <mask_avx512bw_condition>"
10511 "ix86_fixup_binary_operands_no_copy (MULT, <MODE>mode, operands);")
10512
10513 (define_insn "*mul<mode>3<mask_name>"
10514 [(set (match_operand:VI2_AVX2 0 "register_operand" "=x,v")
10515 (mult:VI2_AVX2 (match_operand:VI2_AVX2 1 "vector_operand" "%0,v")
10516 (match_operand:VI2_AVX2 2 "vector_operand" "xBm,vm")))]
10517 "TARGET_SSE2 && !(MEM_P (operands[1]) && MEM_P (operands[2]))
10518 && <mask_mode512bit_condition> && <mask_avx512bw_condition>"
10519 "@
10520 pmullw\t{%2, %0|%0, %2}
10521 vpmullw\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
10522 [(set_attr "isa" "noavx,avx")
10523 (set_attr "type" "sseimul")
10524 (set_attr "prefix_data16" "1,*")
10525 (set_attr "prefix" "orig,vex")
10526 (set_attr "mode" "<sseinsnmode>")])
10527
10528 (define_expand "<s>mul<mode>3_highpart<mask_name>"
10529 [(set (match_operand:VI2_AVX2 0 "register_operand")
10530 (truncate:VI2_AVX2
10531 (lshiftrt:<ssedoublemode>
10532 (mult:<ssedoublemode>
10533 (any_extend:<ssedoublemode>
10534 (match_operand:VI2_AVX2 1 "vector_operand"))
10535 (any_extend:<ssedoublemode>
10536 (match_operand:VI2_AVX2 2 "vector_operand")))
10537 (const_int 16))))]
10538 "TARGET_SSE2
10539 && <mask_mode512bit_condition> && <mask_avx512bw_condition>"
10540 "ix86_fixup_binary_operands_no_copy (MULT, <MODE>mode, operands);")
10541
10542 (define_insn "*<s>mul<mode>3_highpart<mask_name>"
10543 [(set (match_operand:VI2_AVX2 0 "register_operand" "=x,v")
10544 (truncate:VI2_AVX2
10545 (lshiftrt:<ssedoublemode>
10546 (mult:<ssedoublemode>
10547 (any_extend:<ssedoublemode>
10548 (match_operand:VI2_AVX2 1 "vector_operand" "%0,v"))
10549 (any_extend:<ssedoublemode>
10550 (match_operand:VI2_AVX2 2 "vector_operand" "xBm,vm")))
10551 (const_int 16))))]
10552 "TARGET_SSE2 && !(MEM_P (operands[1]) && MEM_P (operands[2]))
10553 && <mask_mode512bit_condition> && <mask_avx512bw_condition>"
10554 "@
10555 pmulh<u>w\t{%2, %0|%0, %2}
10556 vpmulh<u>w\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
10557 [(set_attr "isa" "noavx,avx")
10558 (set_attr "type" "sseimul")
10559 (set_attr "prefix_data16" "1,*")
10560 (set_attr "prefix" "orig,vex")
10561 (set_attr "mode" "<sseinsnmode>")])
10562
10563 (define_expand "vec_widen_umult_even_v16si<mask_name>"
10564 [(set (match_operand:V8DI 0 "register_operand")
10565 (mult:V8DI
10566 (zero_extend:V8DI
10567 (vec_select:V8SI
10568 (match_operand:V16SI 1 "nonimmediate_operand")
10569 (parallel [(const_int 0) (const_int 2)
10570 (const_int 4) (const_int 6)
10571 (const_int 8) (const_int 10)
10572 (const_int 12) (const_int 14)])))
10573 (zero_extend:V8DI
10574 (vec_select:V8SI
10575 (match_operand:V16SI 2 "nonimmediate_operand")
10576 (parallel [(const_int 0) (const_int 2)
10577 (const_int 4) (const_int 6)
10578 (const_int 8) (const_int 10)
10579 (const_int 12) (const_int 14)])))))]
10580 "TARGET_AVX512F"
10581 "ix86_fixup_binary_operands_no_copy (MULT, V16SImode, operands);")
10582
10583 (define_insn "*vec_widen_umult_even_v16si<mask_name>"
10584 [(set (match_operand:V8DI 0 "register_operand" "=v")
10585 (mult:V8DI
10586 (zero_extend:V8DI
10587 (vec_select:V8SI
10588 (match_operand:V16SI 1 "nonimmediate_operand" "%v")
10589 (parallel [(const_int 0) (const_int 2)
10590 (const_int 4) (const_int 6)
10591 (const_int 8) (const_int 10)
10592 (const_int 12) (const_int 14)])))
10593 (zero_extend:V8DI
10594 (vec_select:V8SI
10595 (match_operand:V16SI 2 "nonimmediate_operand" "vm")
10596 (parallel [(const_int 0) (const_int 2)
10597 (const_int 4) (const_int 6)
10598 (const_int 8) (const_int 10)
10599 (const_int 12) (const_int 14)])))))]
10600 "TARGET_AVX512F && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
10601 "vpmuludq\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
10602 [(set_attr "type" "sseimul")
10603 (set_attr "prefix_extra" "1")
10604 (set_attr "prefix" "evex")
10605 (set_attr "mode" "XI")])
10606
10607 (define_expand "vec_widen_umult_even_v8si<mask_name>"
10608 [(set (match_operand:V4DI 0 "register_operand")
10609 (mult:V4DI
10610 (zero_extend:V4DI
10611 (vec_select:V4SI
10612 (match_operand:V8SI 1 "nonimmediate_operand")
10613 (parallel [(const_int 0) (const_int 2)
10614 (const_int 4) (const_int 6)])))
10615 (zero_extend:V4DI
10616 (vec_select:V4SI
10617 (match_operand:V8SI 2 "nonimmediate_operand")
10618 (parallel [(const_int 0) (const_int 2)
10619 (const_int 4) (const_int 6)])))))]
10620 "TARGET_AVX2 && <mask_avx512vl_condition>"
10621 "ix86_fixup_binary_operands_no_copy (MULT, V8SImode, operands);")
10622
10623 (define_insn "*vec_widen_umult_even_v8si<mask_name>"
10624 [(set (match_operand:V4DI 0 "register_operand" "=v")
10625 (mult:V4DI
10626 (zero_extend:V4DI
10627 (vec_select:V4SI
10628 (match_operand:V8SI 1 "nonimmediate_operand" "%v")
10629 (parallel [(const_int 0) (const_int 2)
10630 (const_int 4) (const_int 6)])))
10631 (zero_extend:V4DI
10632 (vec_select:V4SI
10633 (match_operand:V8SI 2 "nonimmediate_operand" "vm")
10634 (parallel [(const_int 0) (const_int 2)
10635 (const_int 4) (const_int 6)])))))]
10636 "TARGET_AVX2 && <mask_avx512vl_condition>
10637 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
10638 "vpmuludq\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
10639 [(set_attr "type" "sseimul")
10640 (set_attr "prefix" "maybe_evex")
10641 (set_attr "mode" "OI")])
10642
10643 (define_expand "vec_widen_umult_even_v4si<mask_name>"
10644 [(set (match_operand:V2DI 0 "register_operand")
10645 (mult:V2DI
10646 (zero_extend:V2DI
10647 (vec_select:V2SI
10648 (match_operand:V4SI 1 "vector_operand")
10649 (parallel [(const_int 0) (const_int 2)])))
10650 (zero_extend:V2DI
10651 (vec_select:V2SI
10652 (match_operand:V4SI 2 "vector_operand")
10653 (parallel [(const_int 0) (const_int 2)])))))]
10654 "TARGET_SSE2 && <mask_avx512vl_condition>"
10655 "ix86_fixup_binary_operands_no_copy (MULT, V4SImode, operands);")
10656
10657 (define_insn "*vec_widen_umult_even_v4si<mask_name>"
10658 [(set (match_operand:V2DI 0 "register_operand" "=x,v")
10659 (mult:V2DI
10660 (zero_extend:V2DI
10661 (vec_select:V2SI
10662 (match_operand:V4SI 1 "vector_operand" "%0,v")
10663 (parallel [(const_int 0) (const_int 2)])))
10664 (zero_extend:V2DI
10665 (vec_select:V2SI
10666 (match_operand:V4SI 2 "vector_operand" "xBm,vm")
10667 (parallel [(const_int 0) (const_int 2)])))))]
10668 "TARGET_SSE2 && <mask_avx512vl_condition>
10669 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
10670 "@
10671 pmuludq\t{%2, %0|%0, %2}
10672 vpmuludq\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
10673 [(set_attr "isa" "noavx,avx")
10674 (set_attr "type" "sseimul")
10675 (set_attr "prefix_data16" "1,*")
10676 (set_attr "prefix" "orig,maybe_evex")
10677 (set_attr "mode" "TI")])
10678
10679 (define_expand "vec_widen_smult_even_v16si<mask_name>"
10680 [(set (match_operand:V8DI 0 "register_operand")
10681 (mult:V8DI
10682 (sign_extend:V8DI
10683 (vec_select:V8SI
10684 (match_operand:V16SI 1 "nonimmediate_operand")
10685 (parallel [(const_int 0) (const_int 2)
10686 (const_int 4) (const_int 6)
10687 (const_int 8) (const_int 10)
10688 (const_int 12) (const_int 14)])))
10689 (sign_extend:V8DI
10690 (vec_select:V8SI
10691 (match_operand:V16SI 2 "nonimmediate_operand")
10692 (parallel [(const_int 0) (const_int 2)
10693 (const_int 4) (const_int 6)
10694 (const_int 8) (const_int 10)
10695 (const_int 12) (const_int 14)])))))]
10696 "TARGET_AVX512F"
10697 "ix86_fixup_binary_operands_no_copy (MULT, V16SImode, operands);")
10698
10699 (define_insn "*vec_widen_smult_even_v16si<mask_name>"
10700 [(set (match_operand:V8DI 0 "register_operand" "=v")
10701 (mult:V8DI
10702 (sign_extend:V8DI
10703 (vec_select:V8SI
10704 (match_operand:V16SI 1 "nonimmediate_operand" "%v")
10705 (parallel [(const_int 0) (const_int 2)
10706 (const_int 4) (const_int 6)
10707 (const_int 8) (const_int 10)
10708 (const_int 12) (const_int 14)])))
10709 (sign_extend:V8DI
10710 (vec_select:V8SI
10711 (match_operand:V16SI 2 "nonimmediate_operand" "vm")
10712 (parallel [(const_int 0) (const_int 2)
10713 (const_int 4) (const_int 6)
10714 (const_int 8) (const_int 10)
10715 (const_int 12) (const_int 14)])))))]
10716 "TARGET_AVX512F && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
10717 "vpmuldq\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
10718 [(set_attr "type" "sseimul")
10719 (set_attr "prefix_extra" "1")
10720 (set_attr "prefix" "evex")
10721 (set_attr "mode" "XI")])
10722
10723 (define_expand "vec_widen_smult_even_v8si<mask_name>"
10724 [(set (match_operand:V4DI 0 "register_operand")
10725 (mult:V4DI
10726 (sign_extend:V4DI
10727 (vec_select:V4SI
10728 (match_operand:V8SI 1 "nonimmediate_operand")
10729 (parallel [(const_int 0) (const_int 2)
10730 (const_int 4) (const_int 6)])))
10731 (sign_extend:V4DI
10732 (vec_select:V4SI
10733 (match_operand:V8SI 2 "nonimmediate_operand")
10734 (parallel [(const_int 0) (const_int 2)
10735 (const_int 4) (const_int 6)])))))]
10736 "TARGET_AVX2 && <mask_avx512vl_condition>"
10737 "ix86_fixup_binary_operands_no_copy (MULT, V8SImode, operands);")
10738
10739 (define_insn "*vec_widen_smult_even_v8si<mask_name>"
10740 [(set (match_operand:V4DI 0 "register_operand" "=v")
10741 (mult:V4DI
10742 (sign_extend:V4DI
10743 (vec_select:V4SI
10744 (match_operand:V8SI 1 "nonimmediate_operand" "%v")
10745 (parallel [(const_int 0) (const_int 2)
10746 (const_int 4) (const_int 6)])))
10747 (sign_extend:V4DI
10748 (vec_select:V4SI
10749 (match_operand:V8SI 2 "nonimmediate_operand" "vm")
10750 (parallel [(const_int 0) (const_int 2)
10751 (const_int 4) (const_int 6)])))))]
10752 "TARGET_AVX2 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
10753 "vpmuldq\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
10754 [(set_attr "type" "sseimul")
10755 (set_attr "prefix_extra" "1")
10756 (set_attr "prefix" "vex")
10757 (set_attr "mode" "OI")])
10758
10759 (define_expand "sse4_1_mulv2siv2di3<mask_name>"
10760 [(set (match_operand:V2DI 0 "register_operand")
10761 (mult:V2DI
10762 (sign_extend:V2DI
10763 (vec_select:V2SI
10764 (match_operand:V4SI 1 "vector_operand")
10765 (parallel [(const_int 0) (const_int 2)])))
10766 (sign_extend:V2DI
10767 (vec_select:V2SI
10768 (match_operand:V4SI 2 "vector_operand")
10769 (parallel [(const_int 0) (const_int 2)])))))]
10770 "TARGET_SSE4_1 && <mask_avx512vl_condition>"
10771 "ix86_fixup_binary_operands_no_copy (MULT, V4SImode, operands);")
10772
10773 (define_insn "*sse4_1_mulv2siv2di3<mask_name>"
10774 [(set (match_operand:V2DI 0 "register_operand" "=Yr,*x,v")
10775 (mult:V2DI
10776 (sign_extend:V2DI
10777 (vec_select:V2SI
10778 (match_operand:V4SI 1 "vector_operand" "%0,0,v")
10779 (parallel [(const_int 0) (const_int 2)])))
10780 (sign_extend:V2DI
10781 (vec_select:V2SI
10782 (match_operand:V4SI 2 "vector_operand" "YrBm,*xBm,vm")
10783 (parallel [(const_int 0) (const_int 2)])))))]
10784 "TARGET_SSE4_1 && <mask_avx512vl_condition>
10785 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
10786 "@
10787 pmuldq\t{%2, %0|%0, %2}
10788 pmuldq\t{%2, %0|%0, %2}
10789 vpmuldq\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
10790 [(set_attr "isa" "noavx,noavx,avx")
10791 (set_attr "type" "sseimul")
10792 (set_attr "prefix_data16" "1,1,*")
10793 (set_attr "prefix_extra" "1")
10794 (set_attr "prefix" "orig,orig,vex")
10795 (set_attr "mode" "TI")])
10796
10797 (define_insn "avx512bw_pmaddwd512<mode><mask_name>"
10798 [(set (match_operand:<sseunpackmode> 0 "register_operand" "=v")
10799 (unspec:<sseunpackmode>
10800 [(match_operand:VI2_AVX2 1 "register_operand" "v")
10801 (match_operand:VI2_AVX2 2 "nonimmediate_operand" "vm")]
10802 UNSPEC_PMADDWD512))]
10803 "TARGET_AVX512BW && <mask_mode512bit_condition>"
10804 "vpmaddwd\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}";
10805 [(set_attr "type" "sseiadd")
10806 (set_attr "prefix" "evex")
10807 (set_attr "mode" "XI")])
10808
10809 (define_expand "avx2_pmaddwd"
10810 [(set (match_operand:V8SI 0 "register_operand")
10811 (plus:V8SI
10812 (mult:V8SI
10813 (sign_extend:V8SI
10814 (vec_select:V8HI
10815 (match_operand:V16HI 1 "nonimmediate_operand")
10816 (parallel [(const_int 0) (const_int 2)
10817 (const_int 4) (const_int 6)
10818 (const_int 8) (const_int 10)
10819 (const_int 12) (const_int 14)])))
10820 (sign_extend:V8SI
10821 (vec_select:V8HI
10822 (match_operand:V16HI 2 "nonimmediate_operand")
10823 (parallel [(const_int 0) (const_int 2)
10824 (const_int 4) (const_int 6)
10825 (const_int 8) (const_int 10)
10826 (const_int 12) (const_int 14)]))))
10827 (mult:V8SI
10828 (sign_extend:V8SI
10829 (vec_select:V8HI (match_dup 1)
10830 (parallel [(const_int 1) (const_int 3)
10831 (const_int 5) (const_int 7)
10832 (const_int 9) (const_int 11)
10833 (const_int 13) (const_int 15)])))
10834 (sign_extend:V8SI
10835 (vec_select:V8HI (match_dup 2)
10836 (parallel [(const_int 1) (const_int 3)
10837 (const_int 5) (const_int 7)
10838 (const_int 9) (const_int 11)
10839 (const_int 13) (const_int 15)]))))))]
10840 "TARGET_AVX2"
10841 "ix86_fixup_binary_operands_no_copy (MULT, V16HImode, operands);")
10842
10843 (define_insn "*avx2_pmaddwd"
10844 [(set (match_operand:V8SI 0 "register_operand" "=x,v")
10845 (plus:V8SI
10846 (mult:V8SI
10847 (sign_extend:V8SI
10848 (vec_select:V8HI
10849 (match_operand:V16HI 1 "nonimmediate_operand" "%x,v")
10850 (parallel [(const_int 0) (const_int 2)
10851 (const_int 4) (const_int 6)
10852 (const_int 8) (const_int 10)
10853 (const_int 12) (const_int 14)])))
10854 (sign_extend:V8SI
10855 (vec_select:V8HI
10856 (match_operand:V16HI 2 "nonimmediate_operand" "xm,vm")
10857 (parallel [(const_int 0) (const_int 2)
10858 (const_int 4) (const_int 6)
10859 (const_int 8) (const_int 10)
10860 (const_int 12) (const_int 14)]))))
10861 (mult:V8SI
10862 (sign_extend:V8SI
10863 (vec_select:V8HI (match_dup 1)
10864 (parallel [(const_int 1) (const_int 3)
10865 (const_int 5) (const_int 7)
10866 (const_int 9) (const_int 11)
10867 (const_int 13) (const_int 15)])))
10868 (sign_extend:V8SI
10869 (vec_select:V8HI (match_dup 2)
10870 (parallel [(const_int 1) (const_int 3)
10871 (const_int 5) (const_int 7)
10872 (const_int 9) (const_int 11)
10873 (const_int 13) (const_int 15)]))))))]
10874 "TARGET_AVX2 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
10875 "vpmaddwd\t{%2, %1, %0|%0, %1, %2}"
10876 [(set_attr "type" "sseiadd")
10877 (set_attr "isa" "*,avx512bw")
10878 (set_attr "prefix" "vex,evex")
10879 (set_attr "mode" "OI")])
10880
10881 (define_expand "sse2_pmaddwd"
10882 [(set (match_operand:V4SI 0 "register_operand")
10883 (plus:V4SI
10884 (mult:V4SI
10885 (sign_extend:V4SI
10886 (vec_select:V4HI
10887 (match_operand:V8HI 1 "vector_operand")
10888 (parallel [(const_int 0) (const_int 2)
10889 (const_int 4) (const_int 6)])))
10890 (sign_extend:V4SI
10891 (vec_select:V4HI
10892 (match_operand:V8HI 2 "vector_operand")
10893 (parallel [(const_int 0) (const_int 2)
10894 (const_int 4) (const_int 6)]))))
10895 (mult:V4SI
10896 (sign_extend:V4SI
10897 (vec_select:V4HI (match_dup 1)
10898 (parallel [(const_int 1) (const_int 3)
10899 (const_int 5) (const_int 7)])))
10900 (sign_extend:V4SI
10901 (vec_select:V4HI (match_dup 2)
10902 (parallel [(const_int 1) (const_int 3)
10903 (const_int 5) (const_int 7)]))))))]
10904 "TARGET_SSE2"
10905 "ix86_fixup_binary_operands_no_copy (MULT, V8HImode, operands);")
10906
10907 (define_insn "*sse2_pmaddwd"
10908 [(set (match_operand:V4SI 0 "register_operand" "=x,x,v")
10909 (plus:V4SI
10910 (mult:V4SI
10911 (sign_extend:V4SI
10912 (vec_select:V4HI
10913 (match_operand:V8HI 1 "vector_operand" "%0,x,v")
10914 (parallel [(const_int 0) (const_int 2)
10915 (const_int 4) (const_int 6)])))
10916 (sign_extend:V4SI
10917 (vec_select:V4HI
10918 (match_operand:V8HI 2 "vector_operand" "xBm,xm,vm")
10919 (parallel [(const_int 0) (const_int 2)
10920 (const_int 4) (const_int 6)]))))
10921 (mult:V4SI
10922 (sign_extend:V4SI
10923 (vec_select:V4HI (match_dup 1)
10924 (parallel [(const_int 1) (const_int 3)
10925 (const_int 5) (const_int 7)])))
10926 (sign_extend:V4SI
10927 (vec_select:V4HI (match_dup 2)
10928 (parallel [(const_int 1) (const_int 3)
10929 (const_int 5) (const_int 7)]))))))]
10930 "TARGET_SSE2 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
10931 "@
10932 pmaddwd\t{%2, %0|%0, %2}
10933 vpmaddwd\t{%2, %1, %0|%0, %1, %2}
10934 vpmaddwd\t{%2, %1, %0|%0, %1, %2}"
10935 [(set_attr "isa" "noavx,avx,avx512bw")
10936 (set_attr "type" "sseiadd")
10937 (set_attr "atom_unit" "simul")
10938 (set_attr "prefix_data16" "1,*,*")
10939 (set_attr "prefix" "orig,vex,evex")
10940 (set_attr "mode" "TI")])
10941
10942 (define_insn "avx512dq_mul<mode>3<mask_name>"
10943 [(set (match_operand:VI8 0 "register_operand" "=v")
10944 (mult:VI8
10945 (match_operand:VI8 1 "register_operand" "v")
10946 (match_operand:VI8 2 "nonimmediate_operand" "vm")))]
10947 "TARGET_AVX512DQ && <mask_mode512bit_condition>"
10948 "vpmullq\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
10949 [(set_attr "type" "sseimul")
10950 (set_attr "prefix" "evex")
10951 (set_attr "mode" "<sseinsnmode>")])
10952
10953 (define_expand "mul<mode>3<mask_name>"
10954 [(set (match_operand:VI4_AVX512F 0 "register_operand")
10955 (mult:VI4_AVX512F
10956 (match_operand:VI4_AVX512F 1 "general_vector_operand")
10957 (match_operand:VI4_AVX512F 2 "general_vector_operand")))]
10958 "TARGET_SSE2 && <mask_mode512bit_condition>"
10959 {
10960 if (TARGET_SSE4_1)
10961 {
10962 if (!vector_operand (operands[1], <MODE>mode))
10963 operands[1] = force_reg (<MODE>mode, operands[1]);
10964 if (!vector_operand (operands[2], <MODE>mode))
10965 operands[2] = force_reg (<MODE>mode, operands[2]);
10966 ix86_fixup_binary_operands_no_copy (MULT, <MODE>mode, operands);
10967 }
10968 else
10969 {
10970 ix86_expand_sse2_mulv4si3 (operands[0], operands[1], operands[2]);
10971 DONE;
10972 }
10973 })
10974
10975 (define_insn "*<sse4_1_avx2>_mul<mode>3<mask_name>"
10976 [(set (match_operand:VI4_AVX512F 0 "register_operand" "=Yr,*x,v")
10977 (mult:VI4_AVX512F
10978 (match_operand:VI4_AVX512F 1 "vector_operand" "%0,0,v")
10979 (match_operand:VI4_AVX512F 2 "vector_operand" "YrBm,*xBm,vm")))]
10980 "TARGET_SSE4_1 && !(MEM_P (operands[1]) && MEM_P (operands[2]))
10981 && <mask_mode512bit_condition>"
10982 "@
10983 pmulld\t{%2, %0|%0, %2}
10984 pmulld\t{%2, %0|%0, %2}
10985 vpmulld\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
10986 [(set_attr "isa" "noavx,noavx,avx")
10987 (set_attr "type" "sseimul")
10988 (set_attr "prefix_extra" "1")
10989 (set_attr "prefix" "<mask_prefix4>")
10990 (set_attr "btver2_decode" "vector,vector,vector")
10991 (set_attr "mode" "<sseinsnmode>")])
10992
10993 (define_expand "mul<mode>3"
10994 [(set (match_operand:VI8_AVX2_AVX512F 0 "register_operand")
10995 (mult:VI8_AVX2_AVX512F
10996 (match_operand:VI8_AVX2_AVX512F 1 "register_operand")
10997 (match_operand:VI8_AVX2_AVX512F 2 "register_operand")))]
10998 "TARGET_SSE2"
10999 {
11000 ix86_expand_sse2_mulvxdi3 (operands[0], operands[1], operands[2]);
11001 DONE;
11002 })
11003
11004 (define_expand "vec_widen_<s>mult_hi_<mode>"
11005 [(match_operand:<sseunpackmode> 0 "register_operand")
11006 (any_extend:<sseunpackmode>
11007 (match_operand:VI124_AVX2 1 "register_operand"))
11008 (match_operand:VI124_AVX2 2 "register_operand")]
11009 "TARGET_SSE2"
11010 {
11011 ix86_expand_mul_widen_hilo (operands[0], operands[1], operands[2],
11012 <u_bool>, true);
11013 DONE;
11014 })
11015
11016 (define_expand "vec_widen_<s>mult_lo_<mode>"
11017 [(match_operand:<sseunpackmode> 0 "register_operand")
11018 (any_extend:<sseunpackmode>
11019 (match_operand:VI124_AVX2 1 "register_operand"))
11020 (match_operand:VI124_AVX2 2 "register_operand")]
11021 "TARGET_SSE2"
11022 {
11023 ix86_expand_mul_widen_hilo (operands[0], operands[1], operands[2],
11024 <u_bool>, false);
11025 DONE;
11026 })
11027
11028 ;; Most widen_<s>mult_even_<mode> can be handled directly from other
11029 ;; named patterns, but signed V4SI needs special help for plain SSE2.
11030 (define_expand "vec_widen_smult_even_v4si"
11031 [(match_operand:V2DI 0 "register_operand")
11032 (match_operand:V4SI 1 "vector_operand")
11033 (match_operand:V4SI 2 "vector_operand")]
11034 "TARGET_SSE2"
11035 {
11036 ix86_expand_mul_widen_evenodd (operands[0], operands[1], operands[2],
11037 false, false);
11038 DONE;
11039 })
11040
11041 (define_expand "vec_widen_<s>mult_odd_<mode>"
11042 [(match_operand:<sseunpackmode> 0 "register_operand")
11043 (any_extend:<sseunpackmode>
11044 (match_operand:VI4_AVX512F 1 "general_vector_operand"))
11045 (match_operand:VI4_AVX512F 2 "general_vector_operand")]
11046 "TARGET_SSE2"
11047 {
11048 ix86_expand_mul_widen_evenodd (operands[0], operands[1], operands[2],
11049 <u_bool>, true);
11050 DONE;
11051 })
11052
11053 (define_mode_attr SDOT_PMADD_SUF
11054 [(V32HI "512v32hi") (V16HI "") (V8HI "")])
11055
11056 (define_expand "sdot_prod<mode>"
11057 [(match_operand:<sseunpackmode> 0 "register_operand")
11058 (match_operand:VI2_AVX2 1 "register_operand")
11059 (match_operand:VI2_AVX2 2 "register_operand")
11060 (match_operand:<sseunpackmode> 3 "register_operand")]
11061 "TARGET_SSE2"
11062 {
11063 rtx t = gen_reg_rtx (<sseunpackmode>mode);
11064 emit_insn (gen_<sse2_avx2>_pmaddwd<SDOT_PMADD_SUF> (t, operands[1], operands[2]));
11065 emit_insn (gen_rtx_SET (operands[0],
11066 gen_rtx_PLUS (<sseunpackmode>mode,
11067 operands[3], t)));
11068 DONE;
11069 })
11070
11071 ;; Normally we use widen_mul_even/odd, but combine can't quite get it all
11072 ;; back together when madd is available.
11073 (define_expand "sdot_prodv4si"
11074 [(match_operand:V2DI 0 "register_operand")
11075 (match_operand:V4SI 1 "register_operand")
11076 (match_operand:V4SI 2 "register_operand")
11077 (match_operand:V2DI 3 "register_operand")]
11078 "TARGET_XOP"
11079 {
11080 rtx t = gen_reg_rtx (V2DImode);
11081 emit_insn (gen_xop_pmacsdqh (t, operands[1], operands[2], operands[3]));
11082 emit_insn (gen_xop_pmacsdql (operands[0], operands[1], operands[2], t));
11083 DONE;
11084 })
11085
11086 (define_expand "uavg<mode>3_ceil"
11087 [(set (match_operand:VI12_AVX2 0 "register_operand")
11088 (truncate:VI12_AVX2
11089 (lshiftrt:<ssedoublemode>
11090 (plus:<ssedoublemode>
11091 (plus:<ssedoublemode>
11092 (zero_extend:<ssedoublemode>
11093 (match_operand:VI12_AVX2 1 "vector_operand"))
11094 (zero_extend:<ssedoublemode>
11095 (match_operand:VI12_AVX2 2 "vector_operand")))
11096 (match_dup 3))
11097 (const_int 1))))]
11098 "TARGET_SSE2"
11099 {
11100 operands[3] = CONST1_RTX(<MODE>mode);
11101 ix86_fixup_binary_operands_no_copy (PLUS, <MODE>mode, operands);
11102 })
11103
11104 (define_expand "usadv16qi"
11105 [(match_operand:V4SI 0 "register_operand")
11106 (match_operand:V16QI 1 "register_operand")
11107 (match_operand:V16QI 2 "vector_operand")
11108 (match_operand:V4SI 3 "vector_operand")]
11109 "TARGET_SSE2"
11110 {
11111 rtx t1 = gen_reg_rtx (V2DImode);
11112 rtx t2 = gen_reg_rtx (V4SImode);
11113 emit_insn (gen_sse2_psadbw (t1, operands[1], operands[2]));
11114 convert_move (t2, t1, 0);
11115 emit_insn (gen_addv4si3 (operands[0], t2, operands[3]));
11116 DONE;
11117 })
11118
11119 (define_expand "usadv32qi"
11120 [(match_operand:V8SI 0 "register_operand")
11121 (match_operand:V32QI 1 "register_operand")
11122 (match_operand:V32QI 2 "nonimmediate_operand")
11123 (match_operand:V8SI 3 "nonimmediate_operand")]
11124 "TARGET_AVX2"
11125 {
11126 rtx t1 = gen_reg_rtx (V4DImode);
11127 rtx t2 = gen_reg_rtx (V8SImode);
11128 emit_insn (gen_avx2_psadbw (t1, operands[1], operands[2]));
11129 convert_move (t2, t1, 0);
11130 emit_insn (gen_addv8si3 (operands[0], t2, operands[3]));
11131 DONE;
11132 })
11133
11134 (define_expand "usadv64qi"
11135 [(match_operand:V16SI 0 "register_operand")
11136 (match_operand:V64QI 1 "register_operand")
11137 (match_operand:V64QI 2 "nonimmediate_operand")
11138 (match_operand:V16SI 3 "nonimmediate_operand")]
11139 "TARGET_AVX512BW"
11140 {
11141 rtx t1 = gen_reg_rtx (V8DImode);
11142 rtx t2 = gen_reg_rtx (V16SImode);
11143 emit_insn (gen_avx512f_psadbw (t1, operands[1], operands[2]));
11144 convert_move (t2, t1, 0);
11145 emit_insn (gen_addv16si3 (operands[0], t2, operands[3]));
11146 DONE;
11147 })
11148
11149 (define_insn "<mask_codefor>ashr<mode>3<mask_name>"
11150 [(set (match_operand:VI248_AVX512BW_1 0 "register_operand" "=v,v")
11151 (ashiftrt:VI248_AVX512BW_1
11152 (match_operand:VI248_AVX512BW_1 1 "nonimmediate_operand" "v,vm")
11153 (match_operand:DI 2 "nonmemory_operand" "v,N")))]
11154 "TARGET_AVX512VL"
11155 "vpsra<ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
11156 [(set_attr "type" "sseishft")
11157 (set (attr "length_immediate")
11158 (if_then_else (match_operand 2 "const_int_operand")
11159 (const_string "1")
11160 (const_string "0")))
11161 (set_attr "mode" "<sseinsnmode>")])
11162
11163 (define_insn "ashr<mode>3"
11164 [(set (match_operand:VI24_AVX2 0 "register_operand" "=x,x")
11165 (ashiftrt:VI24_AVX2
11166 (match_operand:VI24_AVX2 1 "register_operand" "0,x")
11167 (match_operand:DI 2 "nonmemory_operand" "xN,xN")))]
11168 "TARGET_SSE2"
11169 "@
11170 psra<ssemodesuffix>\t{%2, %0|%0, %2}
11171 vpsra<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
11172 [(set_attr "isa" "noavx,avx")
11173 (set_attr "type" "sseishft")
11174 (set (attr "length_immediate")
11175 (if_then_else (match_operand 2 "const_int_operand")
11176 (const_string "1")
11177 (const_string "0")))
11178 (set_attr "prefix_data16" "1,*")
11179 (set_attr "prefix" "orig,vex")
11180 (set_attr "mode" "<sseinsnmode>")])
11181
11182 (define_insn "ashr<mode>3<mask_name>"
11183 [(set (match_operand:VI248_AVX512BW_AVX512VL 0 "register_operand" "=v,v")
11184 (ashiftrt:VI248_AVX512BW_AVX512VL
11185 (match_operand:VI248_AVX512BW_AVX512VL 1 "nonimmediate_operand" "v,vm")
11186 (match_operand:DI 2 "nonmemory_operand" "v,N")))]
11187 "TARGET_AVX512F"
11188 "vpsra<ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
11189 [(set_attr "type" "sseishft")
11190 (set (attr "length_immediate")
11191 (if_then_else (match_operand 2 "const_int_operand")
11192 (const_string "1")
11193 (const_string "0")))
11194 (set_attr "mode" "<sseinsnmode>")])
11195
11196 (define_insn "<mask_codefor><shift_insn><mode>3<mask_name>"
11197 [(set (match_operand:VI248_AVX512BW_2 0 "register_operand" "=v,v")
11198 (any_lshift:VI248_AVX512BW_2
11199 (match_operand:VI248_AVX512BW_2 1 "nonimmediate_operand" "v,vm")
11200 (match_operand:DI 2 "nonmemory_operand" "v,N")))]
11201 "TARGET_AVX512VL"
11202 "vp<vshift><ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
11203 [(set_attr "type" "sseishft")
11204 (set (attr "length_immediate")
11205 (if_then_else (match_operand 2 "const_int_operand")
11206 (const_string "1")
11207 (const_string "0")))
11208 (set_attr "mode" "<sseinsnmode>")])
11209
11210 (define_insn "<shift_insn><mode>3"
11211 [(set (match_operand:VI248_AVX2 0 "register_operand" "=x,x")
11212 (any_lshift:VI248_AVX2
11213 (match_operand:VI248_AVX2 1 "register_operand" "0,x")
11214 (match_operand:DI 2 "nonmemory_operand" "xN,xN")))]
11215 "TARGET_SSE2"
11216 "@
11217 p<vshift><ssemodesuffix>\t{%2, %0|%0, %2}
11218 vp<vshift><ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
11219 [(set_attr "isa" "noavx,avx")
11220 (set_attr "type" "sseishft")
11221 (set (attr "length_immediate")
11222 (if_then_else (match_operand 2 "const_int_operand")
11223 (const_string "1")
11224 (const_string "0")))
11225 (set_attr "prefix_data16" "1,*")
11226 (set_attr "prefix" "orig,vex")
11227 (set_attr "mode" "<sseinsnmode>")])
11228
11229 (define_insn "<shift_insn><mode>3<mask_name>"
11230 [(set (match_operand:VI248_AVX512BW 0 "register_operand" "=v,v")
11231 (any_lshift:VI248_AVX512BW
11232 (match_operand:VI248_AVX512BW 1 "nonimmediate_operand" "v,m")
11233 (match_operand:DI 2 "nonmemory_operand" "vN,N")))]
11234 "TARGET_AVX512F"
11235 "vp<vshift><ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
11236 [(set_attr "type" "sseishft")
11237 (set (attr "length_immediate")
11238 (if_then_else (match_operand 2 "const_int_operand")
11239 (const_string "1")
11240 (const_string "0")))
11241 (set_attr "mode" "<sseinsnmode>")])
11242
11243
11244 (define_expand "vec_shr_<mode>"
11245 [(set (match_dup 3)
11246 (lshiftrt:V1TI
11247 (match_operand:VI_128 1 "register_operand")
11248 (match_operand:SI 2 "const_0_to_255_mul_8_operand")))
11249 (set (match_operand:VI_128 0 "register_operand") (match_dup 4))]
11250 "TARGET_SSE2"
11251 {
11252 operands[1] = gen_lowpart (V1TImode, operands[1]);
11253 operands[3] = gen_reg_rtx (V1TImode);
11254 operands[4] = gen_lowpart (<MODE>mode, operands[3]);
11255 })
11256
11257 (define_insn "avx512bw_<shift_insn><mode>3"
11258 [(set (match_operand:VIMAX_AVX512VL 0 "register_operand" "=v")
11259 (any_lshift:VIMAX_AVX512VL
11260 (match_operand:VIMAX_AVX512VL 1 "nonimmediate_operand" "vm")
11261 (match_operand:SI 2 "const_0_to_255_mul_8_operand" "n")))]
11262 "TARGET_AVX512BW"
11263 {
11264 operands[2] = GEN_INT (INTVAL (operands[2]) / 8);
11265 return "vp<vshift>dq\t{%2, %1, %0|%0, %1, %2}";
11266 }
11267 [(set_attr "type" "sseishft")
11268 (set_attr "length_immediate" "1")
11269 (set_attr "prefix" "maybe_evex")
11270 (set_attr "mode" "<sseinsnmode>")])
11271
11272 (define_insn "<sse2_avx2>_<shift_insn><mode>3"
11273 [(set (match_operand:VIMAX_AVX2 0 "register_operand" "=x,v")
11274 (any_lshift:VIMAX_AVX2
11275 (match_operand:VIMAX_AVX2 1 "register_operand" "0,v")
11276 (match_operand:SI 2 "const_0_to_255_mul_8_operand" "n,n")))]
11277 "TARGET_SSE2"
11278 {
11279 operands[2] = GEN_INT (INTVAL (operands[2]) / 8);
11280
11281 switch (which_alternative)
11282 {
11283 case 0:
11284 return "p<vshift>dq\t{%2, %0|%0, %2}";
11285 case 1:
11286 return "vp<vshift>dq\t{%2, %1, %0|%0, %1, %2}";
11287 default:
11288 gcc_unreachable ();
11289 }
11290 }
11291 [(set_attr "isa" "noavx,avx")
11292 (set_attr "type" "sseishft")
11293 (set_attr "length_immediate" "1")
11294 (set_attr "atom_unit" "sishuf")
11295 (set_attr "prefix_data16" "1,*")
11296 (set_attr "prefix" "orig,vex")
11297 (set_attr "mode" "<sseinsnmode>")])
11298
11299 (define_insn "<avx512>_<rotate>v<mode><mask_name>"
11300 [(set (match_operand:VI48_AVX512VL 0 "register_operand" "=v")
11301 (any_rotate:VI48_AVX512VL
11302 (match_operand:VI48_AVX512VL 1 "register_operand" "v")
11303 (match_operand:VI48_AVX512VL 2 "nonimmediate_operand" "vm")))]
11304 "TARGET_AVX512F"
11305 "vp<rotate>v<ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
11306 [(set_attr "prefix" "evex")
11307 (set_attr "mode" "<sseinsnmode>")])
11308
11309 (define_insn "<avx512>_<rotate><mode><mask_name>"
11310 [(set (match_operand:VI48_AVX512VL 0 "register_operand" "=v")
11311 (any_rotate:VI48_AVX512VL
11312 (match_operand:VI48_AVX512VL 1 "nonimmediate_operand" "vm")
11313 (match_operand:SI 2 "const_0_to_255_operand")))]
11314 "TARGET_AVX512F"
11315 "vp<rotate><ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
11316 [(set_attr "prefix" "evex")
11317 (set_attr "mode" "<sseinsnmode>")])
11318
11319 (define_expand "<code><mode>3"
11320 [(set (match_operand:VI124_256_AVX512F_AVX512BW 0 "register_operand")
11321 (maxmin:VI124_256_AVX512F_AVX512BW
11322 (match_operand:VI124_256_AVX512F_AVX512BW 1 "nonimmediate_operand")
11323 (match_operand:VI124_256_AVX512F_AVX512BW 2 "nonimmediate_operand")))]
11324 "TARGET_AVX2"
11325 "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
11326
11327 (define_insn "*avx2_<code><mode>3"
11328 [(set (match_operand:VI124_256 0 "register_operand" "=v")
11329 (maxmin:VI124_256
11330 (match_operand:VI124_256 1 "nonimmediate_operand" "%v")
11331 (match_operand:VI124_256 2 "nonimmediate_operand" "vm")))]
11332 "TARGET_AVX2 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
11333 "vp<maxmin_int><ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
11334 [(set_attr "type" "sseiadd")
11335 (set_attr "prefix_extra" "1")
11336 (set_attr "prefix" "vex")
11337 (set_attr "mode" "OI")])
11338
11339 (define_expand "<code><mode>3_mask"
11340 [(set (match_operand:VI48_AVX512VL 0 "register_operand")
11341 (vec_merge:VI48_AVX512VL
11342 (maxmin:VI48_AVX512VL
11343 (match_operand:VI48_AVX512VL 1 "nonimmediate_operand")
11344 (match_operand:VI48_AVX512VL 2 "nonimmediate_operand"))
11345 (match_operand:VI48_AVX512VL 3 "nonimm_or_0_operand")
11346 (match_operand:<avx512fmaskmode> 4 "register_operand")))]
11347 "TARGET_AVX512F"
11348 "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
11349
11350 (define_insn "*avx512f_<code><mode>3<mask_name>"
11351 [(set (match_operand:VI48_AVX512VL 0 "register_operand" "=v")
11352 (maxmin:VI48_AVX512VL
11353 (match_operand:VI48_AVX512VL 1 "nonimmediate_operand" "%v")
11354 (match_operand:VI48_AVX512VL 2 "nonimmediate_operand" "vm")))]
11355 "TARGET_AVX512F && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
11356 "vp<maxmin_int><ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
11357 [(set_attr "type" "sseiadd")
11358 (set_attr "prefix_extra" "1")
11359 (set_attr "prefix" "maybe_evex")
11360 (set_attr "mode" "<sseinsnmode>")])
11361
11362 (define_insn "<mask_codefor><code><mode>3<mask_name>"
11363 [(set (match_operand:VI12_AVX512VL 0 "register_operand" "=v")
11364 (maxmin:VI12_AVX512VL
11365 (match_operand:VI12_AVX512VL 1 "register_operand" "v")
11366 (match_operand:VI12_AVX512VL 2 "nonimmediate_operand" "vm")))]
11367 "TARGET_AVX512BW"
11368 "vp<maxmin_int><ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
11369 [(set_attr "type" "sseiadd")
11370 (set_attr "prefix" "evex")
11371 (set_attr "mode" "<sseinsnmode>")])
11372
11373 (define_expand "<code><mode>3"
11374 [(set (match_operand:VI8_AVX2_AVX512F 0 "register_operand")
11375 (maxmin:VI8_AVX2_AVX512F
11376 (match_operand:VI8_AVX2_AVX512F 1 "register_operand")
11377 (match_operand:VI8_AVX2_AVX512F 2 "register_operand")))]
11378 "TARGET_SSE4_2"
11379 {
11380 if (TARGET_AVX512F
11381 && (<MODE>mode == V8DImode || TARGET_AVX512VL))
11382 ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);
11383 else
11384 {
11385 enum rtx_code code;
11386 rtx xops[6];
11387 bool ok;
11388
11389
11390 xops[0] = operands[0];
11391
11392 if (<CODE> == SMAX || <CODE> == UMAX)
11393 {
11394 xops[1] = operands[1];
11395 xops[2] = operands[2];
11396 }
11397 else
11398 {
11399 xops[1] = operands[2];
11400 xops[2] = operands[1];
11401 }
11402
11403 code = (<CODE> == UMAX || <CODE> == UMIN) ? GTU : GT;
11404
11405 xops[3] = gen_rtx_fmt_ee (code, VOIDmode, operands[1], operands[2]);
11406 xops[4] = operands[1];
11407 xops[5] = operands[2];
11408
11409 ok = ix86_expand_int_vcond (xops);
11410 gcc_assert (ok);
11411 DONE;
11412 }
11413 })
11414
11415 (define_expand "<code><mode>3"
11416 [(set (match_operand:VI124_128 0 "register_operand")
11417 (smaxmin:VI124_128
11418 (match_operand:VI124_128 1 "vector_operand")
11419 (match_operand:VI124_128 2 "vector_operand")))]
11420 "TARGET_SSE2"
11421 {
11422 if (TARGET_SSE4_1 || <MODE>mode == V8HImode)
11423 ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);
11424 else
11425 {
11426 rtx xops[6];
11427 bool ok;
11428
11429 xops[0] = operands[0];
11430 operands[1] = force_reg (<MODE>mode, operands[1]);
11431 operands[2] = force_reg (<MODE>mode, operands[2]);
11432
11433 if (<CODE> == SMAX)
11434 {
11435 xops[1] = operands[1];
11436 xops[2] = operands[2];
11437 }
11438 else
11439 {
11440 xops[1] = operands[2];
11441 xops[2] = operands[1];
11442 }
11443
11444 xops[3] = gen_rtx_GT (VOIDmode, operands[1], operands[2]);
11445 xops[4] = operands[1];
11446 xops[5] = operands[2];
11447
11448 ok = ix86_expand_int_vcond (xops);
11449 gcc_assert (ok);
11450 DONE;
11451 }
11452 })
11453
11454 (define_insn "*sse4_1_<code><mode>3<mask_name>"
11455 [(set (match_operand:VI14_128 0 "register_operand" "=Yr,*x,v")
11456 (smaxmin:VI14_128
11457 (match_operand:VI14_128 1 "vector_operand" "%0,0,v")
11458 (match_operand:VI14_128 2 "vector_operand" "YrBm,*xBm,vm")))]
11459 "TARGET_SSE4_1
11460 && <mask_mode512bit_condition>
11461 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
11462 "@
11463 p<maxmin_int><ssemodesuffix>\t{%2, %0|%0, %2}
11464 p<maxmin_int><ssemodesuffix>\t{%2, %0|%0, %2}
11465 vp<maxmin_int><ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
11466 [(set_attr "isa" "noavx,noavx,avx")
11467 (set_attr "type" "sseiadd")
11468 (set_attr "prefix_extra" "1,1,*")
11469 (set_attr "prefix" "orig,orig,vex")
11470 (set_attr "mode" "TI")])
11471
11472 (define_insn "*<code>v8hi3"
11473 [(set (match_operand:V8HI 0 "register_operand" "=x,x,v")
11474 (smaxmin:V8HI
11475 (match_operand:V8HI 1 "vector_operand" "%0,x,v")
11476 (match_operand:V8HI 2 "vector_operand" "xBm,xm,vm")))]
11477 "TARGET_SSE2 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
11478 "@
11479 p<maxmin_int>w\t{%2, %0|%0, %2}
11480 vp<maxmin_int>w\t{%2, %1, %0|%0, %1, %2}
11481 vp<maxmin_int>w\t{%2, %1, %0|%0, %1, %2}"
11482 [(set_attr "isa" "noavx,avx,avx512bw")
11483 (set_attr "type" "sseiadd")
11484 (set_attr "prefix_data16" "1,*,*")
11485 (set_attr "prefix_extra" "*,1,1")
11486 (set_attr "prefix" "orig,vex,evex")
11487 (set_attr "mode" "TI")])
11488
11489 (define_expand "<code><mode>3"
11490 [(set (match_operand:VI124_128 0 "register_operand")
11491 (umaxmin:VI124_128
11492 (match_operand:VI124_128 1 "vector_operand")
11493 (match_operand:VI124_128 2 "vector_operand")))]
11494 "TARGET_SSE2"
11495 {
11496 if (TARGET_SSE4_1 || <MODE>mode == V16QImode)
11497 ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);
11498 else if (<CODE> == UMAX && <MODE>mode == V8HImode)
11499 {
11500 rtx op0 = operands[0], op2 = operands[2], op3 = op0;
11501 operands[1] = force_reg (<MODE>mode, operands[1]);
11502 if (rtx_equal_p (op3, op2))
11503 op3 = gen_reg_rtx (V8HImode);
11504 emit_insn (gen_sse2_ussubv8hi3 (op3, operands[1], op2));
11505 emit_insn (gen_addv8hi3 (op0, op3, op2));
11506 DONE;
11507 }
11508 else
11509 {
11510 rtx xops[6];
11511 bool ok;
11512
11513 operands[1] = force_reg (<MODE>mode, operands[1]);
11514 operands[2] = force_reg (<MODE>mode, operands[2]);
11515
11516 xops[0] = operands[0];
11517
11518 if (<CODE> == UMAX)
11519 {
11520 xops[1] = operands[1];
11521 xops[2] = operands[2];
11522 }
11523 else
11524 {
11525 xops[1] = operands[2];
11526 xops[2] = operands[1];
11527 }
11528
11529 xops[3] = gen_rtx_GTU (VOIDmode, operands[1], operands[2]);
11530 xops[4] = operands[1];
11531 xops[5] = operands[2];
11532
11533 ok = ix86_expand_int_vcond (xops);
11534 gcc_assert (ok);
11535 DONE;
11536 }
11537 })
11538
11539 (define_insn "*sse4_1_<code><mode>3<mask_name>"
11540 [(set (match_operand:VI24_128 0 "register_operand" "=Yr,*x,v")
11541 (umaxmin:VI24_128
11542 (match_operand:VI24_128 1 "vector_operand" "%0,0,v")
11543 (match_operand:VI24_128 2 "vector_operand" "YrBm,*xBm,vm")))]
11544 "TARGET_SSE4_1
11545 && <mask_mode512bit_condition>
11546 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
11547 "@
11548 p<maxmin_int><ssemodesuffix>\t{%2, %0|%0, %2}
11549 p<maxmin_int><ssemodesuffix>\t{%2, %0|%0, %2}
11550 vp<maxmin_int><ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
11551 [(set_attr "isa" "noavx,noavx,avx")
11552 (set_attr "type" "sseiadd")
11553 (set_attr "prefix_extra" "1,1,*")
11554 (set_attr "prefix" "orig,orig,vex")
11555 (set_attr "mode" "TI")])
11556
11557 (define_insn "*<code>v16qi3"
11558 [(set (match_operand:V16QI 0 "register_operand" "=x,x,v")
11559 (umaxmin:V16QI
11560 (match_operand:V16QI 1 "vector_operand" "%0,x,v")
11561 (match_operand:V16QI 2 "vector_operand" "xBm,xm,vm")))]
11562 "TARGET_SSE2 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
11563 "@
11564 p<maxmin_int>b\t{%2, %0|%0, %2}
11565 vp<maxmin_int>b\t{%2, %1, %0|%0, %1, %2}
11566 vp<maxmin_int>b\t{%2, %1, %0|%0, %1, %2}"
11567 [(set_attr "isa" "noavx,avx,avx512bw")
11568 (set_attr "type" "sseiadd")
11569 (set_attr "prefix_data16" "1,*,*")
11570 (set_attr "prefix_extra" "*,1,1")
11571 (set_attr "prefix" "orig,vex,evex")
11572 (set_attr "mode" "TI")])
11573
11574 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
11575 ;;
11576 ;; Parallel integral comparisons
11577 ;;
11578 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
11579
11580 (define_expand "avx2_eq<mode>3"
11581 [(set (match_operand:VI_256 0 "register_operand")
11582 (eq:VI_256
11583 (match_operand:VI_256 1 "nonimmediate_operand")
11584 (match_operand:VI_256 2 "nonimmediate_operand")))]
11585 "TARGET_AVX2"
11586 "ix86_fixup_binary_operands_no_copy (EQ, <MODE>mode, operands);")
11587
11588 (define_insn "*avx2_eq<mode>3"
11589 [(set (match_operand:VI_256 0 "register_operand" "=x")
11590 (eq:VI_256
11591 (match_operand:VI_256 1 "nonimmediate_operand" "%x")
11592 (match_operand:VI_256 2 "nonimmediate_operand" "xm")))]
11593 "TARGET_AVX2 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
11594 "vpcmpeq<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
11595 [(set_attr "type" "ssecmp")
11596 (set_attr "prefix_extra" "1")
11597 (set_attr "prefix" "vex")
11598 (set_attr "mode" "OI")])
11599
11600 (define_expand "<avx512>_eq<mode>3<mask_scalar_merge_name>"
11601 [(set (match_operand:<avx512fmaskmode> 0 "register_operand")
11602 (unspec:<avx512fmaskmode>
11603 [(match_operand:VI12_AVX512VL 1 "nonimmediate_operand")
11604 (match_operand:VI12_AVX512VL 2 "nonimmediate_operand")]
11605 UNSPEC_MASKED_EQ))]
11606 "TARGET_AVX512BW"
11607 "ix86_fixup_binary_operands_no_copy (EQ, <MODE>mode, operands);")
11608
11609 (define_expand "<avx512>_eq<mode>3<mask_scalar_merge_name>"
11610 [(set (match_operand:<avx512fmaskmode> 0 "register_operand")
11611 (unspec:<avx512fmaskmode>
11612 [(match_operand:VI48_AVX512VL 1 "nonimmediate_operand")
11613 (match_operand:VI48_AVX512VL 2 "nonimmediate_operand")]
11614 UNSPEC_MASKED_EQ))]
11615 "TARGET_AVX512F"
11616 "ix86_fixup_binary_operands_no_copy (EQ, <MODE>mode, operands);")
11617
11618 (define_insn "<avx512>_eq<mode>3<mask_scalar_merge_name>_1"
11619 [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=Yk,Yk")
11620 (unspec:<avx512fmaskmode>
11621 [(match_operand:VI12_AVX512VL 1 "nonimm_or_0_operand" "%v,v")
11622 (match_operand:VI12_AVX512VL 2 "nonimm_or_0_operand" "vm,C")]
11623 UNSPEC_MASKED_EQ))]
11624 "TARGET_AVX512BW && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
11625 "@
11626 vpcmpeq<ssemodesuffix>\t{%2, %1, %0<mask_scalar_merge_operand3>|%0<mask_scalar_merge_operand3>, %1, %2}
11627 vptestnm<ssemodesuffix>\t{%1, %1, %0<mask_scalar_merge_operand3>|%0<mask_scalar_merge_operand3>, %1, %1}"
11628 [(set_attr "type" "ssecmp")
11629 (set_attr "prefix_extra" "1")
11630 (set_attr "prefix" "evex")
11631 (set_attr "mode" "<sseinsnmode>")])
11632
11633 (define_insn "<avx512>_eq<mode>3<mask_scalar_merge_name>_1"
11634 [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=Yk,Yk")
11635 (unspec:<avx512fmaskmode>
11636 [(match_operand:VI48_AVX512VL 1 "nonimm_or_0_operand" "%v,v")
11637 (match_operand:VI48_AVX512VL 2 "nonimm_or_0_operand" "vm,C")]
11638 UNSPEC_MASKED_EQ))]
11639 "TARGET_AVX512F && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
11640 "@
11641 vpcmpeq<ssemodesuffix>\t{%2, %1, %0<mask_scalar_merge_operand3>|%0<mask_scalar_merge_operand3>, %1, %2}
11642 vptestnm<ssemodesuffix>\t{%1, %1, %0<mask_scalar_merge_operand3>|%0<mask_scalar_merge_operand3>, %1, %1}"
11643 [(set_attr "type" "ssecmp")
11644 (set_attr "prefix_extra" "1")
11645 (set_attr "prefix" "evex")
11646 (set_attr "mode" "<sseinsnmode>")])
11647
11648 (define_insn "*sse4_1_eqv2di3"
11649 [(set (match_operand:V2DI 0 "register_operand" "=Yr,*x,x")
11650 (eq:V2DI
11651 (match_operand:V2DI 1 "vector_operand" "%0,0,x")
11652 (match_operand:V2DI 2 "vector_operand" "YrBm,*xBm,xm")))]
11653 "TARGET_SSE4_1 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
11654 "@
11655 pcmpeqq\t{%2, %0|%0, %2}
11656 pcmpeqq\t{%2, %0|%0, %2}
11657 vpcmpeqq\t{%2, %1, %0|%0, %1, %2}"
11658 [(set_attr "isa" "noavx,noavx,avx")
11659 (set_attr "type" "ssecmp")
11660 (set_attr "prefix_extra" "1")
11661 (set_attr "prefix" "orig,orig,vex")
11662 (set_attr "mode" "TI")])
11663
11664 (define_insn "*sse2_eq<mode>3"
11665 [(set (match_operand:VI124_128 0 "register_operand" "=x,x")
11666 (eq:VI124_128
11667 (match_operand:VI124_128 1 "vector_operand" "%0,x")
11668 (match_operand:VI124_128 2 "vector_operand" "xBm,xm")))]
11669 "TARGET_SSE2 && !TARGET_XOP
11670 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
11671 "@
11672 pcmpeq<ssemodesuffix>\t{%2, %0|%0, %2}
11673 vpcmpeq<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
11674 [(set_attr "isa" "noavx,avx")
11675 (set_attr "type" "ssecmp")
11676 (set_attr "prefix_data16" "1,*")
11677 (set_attr "prefix" "orig,vex")
11678 (set_attr "mode" "TI")])
11679
11680 (define_expand "sse2_eq<mode>3"
11681 [(set (match_operand:VI124_128 0 "register_operand")
11682 (eq:VI124_128
11683 (match_operand:VI124_128 1 "vector_operand")
11684 (match_operand:VI124_128 2 "vector_operand")))]
11685 "TARGET_SSE2 && !TARGET_XOP "
11686 "ix86_fixup_binary_operands_no_copy (EQ, <MODE>mode, operands);")
11687
11688 (define_expand "sse4_1_eqv2di3"
11689 [(set (match_operand:V2DI 0 "register_operand")
11690 (eq:V2DI
11691 (match_operand:V2DI 1 "vector_operand")
11692 (match_operand:V2DI 2 "vector_operand")))]
11693 "TARGET_SSE4_1"
11694 "ix86_fixup_binary_operands_no_copy (EQ, V2DImode, operands);")
11695
11696 (define_insn "sse4_2_gtv2di3"
11697 [(set (match_operand:V2DI 0 "register_operand" "=Yr,*x,x")
11698 (gt:V2DI
11699 (match_operand:V2DI 1 "register_operand" "0,0,x")
11700 (match_operand:V2DI 2 "vector_operand" "YrBm,*xBm,xm")))]
11701 "TARGET_SSE4_2"
11702 "@
11703 pcmpgtq\t{%2, %0|%0, %2}
11704 pcmpgtq\t{%2, %0|%0, %2}
11705 vpcmpgtq\t{%2, %1, %0|%0, %1, %2}"
11706 [(set_attr "isa" "noavx,noavx,avx")
11707 (set_attr "type" "ssecmp")
11708 (set_attr "prefix_extra" "1")
11709 (set_attr "prefix" "orig,orig,vex")
11710 (set_attr "mode" "TI")])
11711
11712 (define_insn "avx2_gt<mode>3"
11713 [(set (match_operand:VI_256 0 "register_operand" "=x")
11714 (gt:VI_256
11715 (match_operand:VI_256 1 "register_operand" "x")
11716 (match_operand:VI_256 2 "nonimmediate_operand" "xm")))]
11717 "TARGET_AVX2"
11718 "vpcmpgt<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
11719 [(set_attr "type" "ssecmp")
11720 (set_attr "prefix_extra" "1")
11721 (set_attr "prefix" "vex")
11722 (set_attr "mode" "OI")])
11723
11724 (define_insn "<avx512>_gt<mode>3<mask_scalar_merge_name>"
11725 [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=Yk")
11726 (unspec:<avx512fmaskmode>
11727 [(match_operand:VI48_AVX512VL 1 "register_operand" "v")
11728 (match_operand:VI48_AVX512VL 2 "nonimmediate_operand" "vm")] UNSPEC_MASKED_GT))]
11729 "TARGET_AVX512F"
11730 "vpcmpgt<ssemodesuffix>\t{%2, %1, %0<mask_scalar_merge_operand3>|%0<mask_scalar_merge_operand3>, %1, %2}"
11731 [(set_attr "type" "ssecmp")
11732 (set_attr "prefix_extra" "1")
11733 (set_attr "prefix" "evex")
11734 (set_attr "mode" "<sseinsnmode>")])
11735
11736 (define_insn "<avx512>_gt<mode>3<mask_scalar_merge_name>"
11737 [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=Yk")
11738 (unspec:<avx512fmaskmode>
11739 [(match_operand:VI12_AVX512VL 1 "register_operand" "v")
11740 (match_operand:VI12_AVX512VL 2 "nonimmediate_operand" "vm")] UNSPEC_MASKED_GT))]
11741 "TARGET_AVX512BW"
11742 "vpcmpgt<ssemodesuffix>\t{%2, %1, %0<mask_scalar_merge_operand3>|%0<mask_scalar_merge_operand3>, %1, %2}"
11743 [(set_attr "type" "ssecmp")
11744 (set_attr "prefix_extra" "1")
11745 (set_attr "prefix" "evex")
11746 (set_attr "mode" "<sseinsnmode>")])
11747
11748 (define_insn "sse2_gt<mode>3"
11749 [(set (match_operand:VI124_128 0 "register_operand" "=x,x")
11750 (gt:VI124_128
11751 (match_operand:VI124_128 1 "register_operand" "0,x")
11752 (match_operand:VI124_128 2 "vector_operand" "xBm,xm")))]
11753 "TARGET_SSE2 && !TARGET_XOP"
11754 "@
11755 pcmpgt<ssemodesuffix>\t{%2, %0|%0, %2}
11756 vpcmpgt<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
11757 [(set_attr "isa" "noavx,avx")
11758 (set_attr "type" "ssecmp")
11759 (set_attr "prefix_data16" "1,*")
11760 (set_attr "prefix" "orig,vex")
11761 (set_attr "mode" "TI")])
11762
11763 (define_expand "vcond<V_512:mode><VI_AVX512BW:mode>"
11764 [(set (match_operand:V_512 0 "register_operand")
11765 (if_then_else:V_512
11766 (match_operator 3 ""
11767 [(match_operand:VI_AVX512BW 4 "nonimmediate_operand")
11768 (match_operand:VI_AVX512BW 5 "general_operand")])
11769 (match_operand:V_512 1)
11770 (match_operand:V_512 2)))]
11771 "TARGET_AVX512F
11772 && (GET_MODE_NUNITS (<V_512:MODE>mode)
11773 == GET_MODE_NUNITS (<VI_AVX512BW:MODE>mode))"
11774 {
11775 bool ok = ix86_expand_int_vcond (operands);
11776 gcc_assert (ok);
11777 DONE;
11778 })
11779
11780 (define_expand "vcond<V_256:mode><VI_256:mode>"
11781 [(set (match_operand:V_256 0 "register_operand")
11782 (if_then_else:V_256
11783 (match_operator 3 ""
11784 [(match_operand:VI_256 4 "nonimmediate_operand")
11785 (match_operand:VI_256 5 "general_operand")])
11786 (match_operand:V_256 1)
11787 (match_operand:V_256 2)))]
11788 "TARGET_AVX2
11789 && (GET_MODE_NUNITS (<V_256:MODE>mode)
11790 == GET_MODE_NUNITS (<VI_256:MODE>mode))"
11791 {
11792 bool ok = ix86_expand_int_vcond (operands);
11793 gcc_assert (ok);
11794 DONE;
11795 })
11796
11797 (define_expand "vcond<V_128:mode><VI124_128:mode>"
11798 [(set (match_operand:V_128 0 "register_operand")
11799 (if_then_else:V_128
11800 (match_operator 3 ""
11801 [(match_operand:VI124_128 4 "vector_operand")
11802 (match_operand:VI124_128 5 "general_operand")])
11803 (match_operand:V_128 1)
11804 (match_operand:V_128 2)))]
11805 "TARGET_SSE2
11806 && (GET_MODE_NUNITS (<V_128:MODE>mode)
11807 == GET_MODE_NUNITS (<VI124_128:MODE>mode))"
11808 {
11809 bool ok = ix86_expand_int_vcond (operands);
11810 gcc_assert (ok);
11811 DONE;
11812 })
11813
11814 (define_expand "vcond<VI8F_128:mode>v2di"
11815 [(set (match_operand:VI8F_128 0 "register_operand")
11816 (if_then_else:VI8F_128
11817 (match_operator 3 ""
11818 [(match_operand:V2DI 4 "vector_operand")
11819 (match_operand:V2DI 5 "general_operand")])
11820 (match_operand:VI8F_128 1)
11821 (match_operand:VI8F_128 2)))]
11822 "TARGET_SSE4_2"
11823 {
11824 bool ok = ix86_expand_int_vcond (operands);
11825 gcc_assert (ok);
11826 DONE;
11827 })
11828
11829 (define_expand "vcondu<V_512:mode><VI_AVX512BW:mode>"
11830 [(set (match_operand:V_512 0 "register_operand")
11831 (if_then_else:V_512
11832 (match_operator 3 ""
11833 [(match_operand:VI_AVX512BW 4 "nonimmediate_operand")
11834 (match_operand:VI_AVX512BW 5 "nonimmediate_operand")])
11835 (match_operand:V_512 1 "general_operand")
11836 (match_operand:V_512 2 "general_operand")))]
11837 "TARGET_AVX512F
11838 && (GET_MODE_NUNITS (<V_512:MODE>mode)
11839 == GET_MODE_NUNITS (<VI_AVX512BW:MODE>mode))"
11840 {
11841 bool ok = ix86_expand_int_vcond (operands);
11842 gcc_assert (ok);
11843 DONE;
11844 })
11845
11846 (define_expand "vcondu<V_256:mode><VI_256:mode>"
11847 [(set (match_operand:V_256 0 "register_operand")
11848 (if_then_else:V_256
11849 (match_operator 3 ""
11850 [(match_operand:VI_256 4 "nonimmediate_operand")
11851 (match_operand:VI_256 5 "nonimmediate_operand")])
11852 (match_operand:V_256 1 "general_operand")
11853 (match_operand:V_256 2 "general_operand")))]
11854 "TARGET_AVX2
11855 && (GET_MODE_NUNITS (<V_256:MODE>mode)
11856 == GET_MODE_NUNITS (<VI_256:MODE>mode))"
11857 {
11858 bool ok = ix86_expand_int_vcond (operands);
11859 gcc_assert (ok);
11860 DONE;
11861 })
11862
11863 (define_expand "vcondu<V_128:mode><VI124_128:mode>"
11864 [(set (match_operand:V_128 0 "register_operand")
11865 (if_then_else:V_128
11866 (match_operator 3 ""
11867 [(match_operand:VI124_128 4 "vector_operand")
11868 (match_operand:VI124_128 5 "vector_operand")])
11869 (match_operand:V_128 1 "general_operand")
11870 (match_operand:V_128 2 "general_operand")))]
11871 "TARGET_SSE2
11872 && (GET_MODE_NUNITS (<V_128:MODE>mode)
11873 == GET_MODE_NUNITS (<VI124_128:MODE>mode))"
11874 {
11875 bool ok = ix86_expand_int_vcond (operands);
11876 gcc_assert (ok);
11877 DONE;
11878 })
11879
11880 (define_expand "vcondu<VI8F_128:mode>v2di"
11881 [(set (match_operand:VI8F_128 0 "register_operand")
11882 (if_then_else:VI8F_128
11883 (match_operator 3 ""
11884 [(match_operand:V2DI 4 "vector_operand")
11885 (match_operand:V2DI 5 "vector_operand")])
11886 (match_operand:VI8F_128 1 "general_operand")
11887 (match_operand:VI8F_128 2 "general_operand")))]
11888 "TARGET_SSE4_2"
11889 {
11890 bool ok = ix86_expand_int_vcond (operands);
11891 gcc_assert (ok);
11892 DONE;
11893 })
11894
11895 (define_expand "vcondeq<VI8F_128:mode>v2di"
11896 [(set (match_operand:VI8F_128 0 "register_operand")
11897 (if_then_else:VI8F_128
11898 (match_operator 3 ""
11899 [(match_operand:V2DI 4 "vector_operand")
11900 (match_operand:V2DI 5 "general_operand")])
11901 (match_operand:VI8F_128 1)
11902 (match_operand:VI8F_128 2)))]
11903 "TARGET_SSE4_1"
11904 {
11905 bool ok = ix86_expand_int_vcond (operands);
11906 gcc_assert (ok);
11907 DONE;
11908 })
11909
11910 (define_mode_iterator VEC_PERM_AVX2
11911 [V16QI V8HI V4SI V2DI V4SF V2DF
11912 (V32QI "TARGET_AVX2") (V16HI "TARGET_AVX2")
11913 (V8SI "TARGET_AVX2") (V4DI "TARGET_AVX2")
11914 (V8SF "TARGET_AVX2") (V4DF "TARGET_AVX2")
11915 (V16SF "TARGET_AVX512F") (V8DF "TARGET_AVX512F")
11916 (V16SI "TARGET_AVX512F") (V8DI "TARGET_AVX512F")
11917 (V32HI "TARGET_AVX512BW") (V64QI "TARGET_AVX512VBMI")])
11918
11919 (define_expand "vec_perm<mode>"
11920 [(match_operand:VEC_PERM_AVX2 0 "register_operand")
11921 (match_operand:VEC_PERM_AVX2 1 "register_operand")
11922 (match_operand:VEC_PERM_AVX2 2 "register_operand")
11923 (match_operand:<sseintvecmode> 3 "register_operand")]
11924 "TARGET_SSSE3 || TARGET_AVX || TARGET_XOP"
11925 {
11926 ix86_expand_vec_perm (operands);
11927 DONE;
11928 })
11929
11930 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
11931 ;;
11932 ;; Parallel bitwise logical operations
11933 ;;
11934 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
11935
11936 (define_expand "one_cmpl<mode>2"
11937 [(set (match_operand:VI 0 "register_operand")
11938 (xor:VI (match_operand:VI 1 "vector_operand")
11939 (match_dup 2)))]
11940 "TARGET_SSE"
11941 {
11942 operands[2] = force_reg (<MODE>mode, CONSTM1_RTX (<MODE>mode));
11943 })
11944
11945 (define_expand "<sse2_avx2>_andnot<mode>3"
11946 [(set (match_operand:VI_AVX2 0 "register_operand")
11947 (and:VI_AVX2
11948 (not:VI_AVX2 (match_operand:VI_AVX2 1 "register_operand"))
11949 (match_operand:VI_AVX2 2 "vector_operand")))]
11950 "TARGET_SSE2")
11951
11952 (define_expand "<sse2_avx2>_andnot<mode>3_mask"
11953 [(set (match_operand:VI48_AVX512VL 0 "register_operand")
11954 (vec_merge:VI48_AVX512VL
11955 (and:VI48_AVX512VL
11956 (not:VI48_AVX512VL
11957 (match_operand:VI48_AVX512VL 1 "register_operand"))
11958 (match_operand:VI48_AVX512VL 2 "nonimmediate_operand"))
11959 (match_operand:VI48_AVX512VL 3 "nonimm_or_0_operand")
11960 (match_operand:<avx512fmaskmode> 4 "register_operand")))]
11961 "TARGET_AVX512F")
11962
11963 (define_expand "<sse2_avx2>_andnot<mode>3_mask"
11964 [(set (match_operand:VI12_AVX512VL 0 "register_operand")
11965 (vec_merge:VI12_AVX512VL
11966 (and:VI12_AVX512VL
11967 (not:VI12_AVX512VL
11968 (match_operand:VI12_AVX512VL 1 "register_operand"))
11969 (match_operand:VI12_AVX512VL 2 "nonimmediate_operand"))
11970 (match_operand:VI12_AVX512VL 3 "nonimm_or_0_operand")
11971 (match_operand:<avx512fmaskmode> 4 "register_operand")))]
11972 "TARGET_AVX512BW")
11973
11974 (define_insn "*andnot<mode>3"
11975 [(set (match_operand:VI 0 "register_operand" "=x,x,v")
11976 (and:VI
11977 (not:VI (match_operand:VI 1 "register_operand" "0,x,v"))
11978 (match_operand:VI 2 "vector_operand" "xBm,xm,vm")))]
11979 "TARGET_SSE"
11980 {
11981 static char buf[64];
11982 const char *ops;
11983 const char *tmp;
11984 const char *ssesuffix;
11985
11986 switch (get_attr_mode (insn))
11987 {
11988 case MODE_XI:
11989 gcc_assert (TARGET_AVX512F);
11990 /* FALLTHRU */
11991 case MODE_OI:
11992 gcc_assert (TARGET_AVX2);
11993 /* FALLTHRU */
11994 case MODE_TI:
11995 gcc_assert (TARGET_SSE2);
11996 tmp = "pandn";
11997 switch (<MODE>mode)
11998 {
11999 case E_V64QImode:
12000 case E_V32HImode:
12001 /* There is no vpandnb or vpandnw instruction, nor vpandn for
12002 512-bit vectors. Use vpandnq instead. */
12003 ssesuffix = "q";
12004 break;
12005 case E_V16SImode:
12006 case E_V8DImode:
12007 ssesuffix = "<ssemodesuffix>";
12008 break;
12009 case E_V8SImode:
12010 case E_V4DImode:
12011 case E_V4SImode:
12012 case E_V2DImode:
12013 ssesuffix = (TARGET_AVX512VL && which_alternative == 2
12014 ? "<ssemodesuffix>" : "");
12015 break;
12016 default:
12017 ssesuffix = TARGET_AVX512VL && which_alternative == 2 ? "q" : "";
12018 }
12019 break;
12020
12021 case MODE_V16SF:
12022 gcc_assert (TARGET_AVX512F);
12023 /* FALLTHRU */
12024 case MODE_V8SF:
12025 gcc_assert (TARGET_AVX);
12026 /* FALLTHRU */
12027 case MODE_V4SF:
12028 gcc_assert (TARGET_SSE);
12029 tmp = "andn";
12030 ssesuffix = "ps";
12031 break;
12032
12033 default:
12034 gcc_unreachable ();
12035 }
12036
12037 switch (which_alternative)
12038 {
12039 case 0:
12040 ops = "%s%s\t{%%2, %%0|%%0, %%2}";
12041 break;
12042 case 1:
12043 case 2:
12044 ops = "v%s%s\t{%%2, %%1, %%0|%%0, %%1, %%2}";
12045 break;
12046 default:
12047 gcc_unreachable ();
12048 }
12049
12050 snprintf (buf, sizeof (buf), ops, tmp, ssesuffix);
12051 return buf;
12052 }
12053 [(set_attr "isa" "noavx,avx,avx")
12054 (set_attr "type" "sselog")
12055 (set (attr "prefix_data16")
12056 (if_then_else
12057 (and (eq_attr "alternative" "0")
12058 (eq_attr "mode" "TI"))
12059 (const_string "1")
12060 (const_string "*")))
12061 (set_attr "prefix" "orig,vex,evex")
12062 (set (attr "mode")
12063 (cond [(and (match_test "<MODE_SIZE> == 16")
12064 (match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL"))
12065 (const_string "<ssePSmode>")
12066 (match_test "TARGET_AVX2")
12067 (const_string "<sseinsnmode>")
12068 (match_test "TARGET_AVX")
12069 (if_then_else
12070 (match_test "<MODE_SIZE> > 16")
12071 (const_string "V8SF")
12072 (const_string "<sseinsnmode>"))
12073 (ior (not (match_test "TARGET_SSE2"))
12074 (match_test "optimize_function_for_size_p (cfun)"))
12075 (const_string "V4SF")
12076 ]
12077 (const_string "<sseinsnmode>")))])
12078
12079 (define_insn "*andnot<mode>3_bcst"
12080 [(set (match_operand:VI 0 "register_operand" "=v")
12081 (and:VI
12082 (not:VI48_AVX512VL
12083 (match_operand:VI48_AVX512VL 1 "register_operand" "v"))
12084 (vec_duplicate:VI48_AVX512VL
12085 (match_operand:<ssescalarmode> 2 "memory_operand" "m"))))]
12086 "TARGET_AVX512F"
12087 "vpandn<ssemodesuffix>\t{%2<avx512bcst>, %1, %0|%0, %1, %2<avx512bcst>}"
12088 [(set_attr "type" "sselog")
12089 (set_attr "prefix" "evex")
12090 (set_attr "mode" "<sseinsnmode>")])
12091
12092 (define_insn "*andnot<mode>3_mask"
12093 [(set (match_operand:VI48_AVX512VL 0 "register_operand" "=v")
12094 (vec_merge:VI48_AVX512VL
12095 (and:VI48_AVX512VL
12096 (not:VI48_AVX512VL
12097 (match_operand:VI48_AVX512VL 1 "register_operand" "v"))
12098 (match_operand:VI48_AVX512VL 2 "nonimmediate_operand" "vm"))
12099 (match_operand:VI48_AVX512VL 3 "nonimm_or_0_operand" "0C")
12100 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")))]
12101 "TARGET_AVX512F"
12102 "vpandn<ssemodesuffix>\t{%2, %1, %0%{%4%}%N3|%0%{%4%}%N3, %1, %2}";
12103 [(set_attr "type" "sselog")
12104 (set_attr "prefix" "evex")
12105 (set_attr "mode" "<sseinsnmode>")])
12106
12107 (define_expand "<code><mode>3"
12108 [(set (match_operand:VI 0 "register_operand")
12109 (any_logic:VI
12110 (match_operand:VI 1 "nonimmediate_or_const_vector_operand")
12111 (match_operand:VI 2 "nonimmediate_or_const_vector_operand")))]
12112 "TARGET_SSE"
12113 {
12114 ix86_expand_vector_logical_operator (<CODE>, <MODE>mode, operands);
12115 DONE;
12116 })
12117
12118 (define_insn "<mask_codefor><code><mode>3<mask_name>"
12119 [(set (match_operand:VI48_AVX_AVX512F 0 "register_operand" "=x,x,v")
12120 (any_logic:VI48_AVX_AVX512F
12121 (match_operand:VI48_AVX_AVX512F 1 "vector_operand" "%0,x,v")
12122 (match_operand:VI48_AVX_AVX512F 2 "vector_operand" "xBm,xm,vm")))]
12123 "TARGET_SSE && <mask_mode512bit_condition>
12124 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
12125 {
12126 static char buf[64];
12127 const char *ops;
12128 const char *tmp;
12129 const char *ssesuffix;
12130
12131 switch (get_attr_mode (insn))
12132 {
12133 case MODE_XI:
12134 gcc_assert (TARGET_AVX512F);
12135 /* FALLTHRU */
12136 case MODE_OI:
12137 gcc_assert (TARGET_AVX2);
12138 /* FALLTHRU */
12139 case MODE_TI:
12140 gcc_assert (TARGET_SSE2);
12141 tmp = "p<logic>";
12142 switch (<MODE>mode)
12143 {
12144 case E_V16SImode:
12145 case E_V8DImode:
12146 ssesuffix = "<ssemodesuffix>";
12147 break;
12148 case E_V8SImode:
12149 case E_V4DImode:
12150 case E_V4SImode:
12151 case E_V2DImode:
12152 ssesuffix = (TARGET_AVX512VL
12153 && (<mask_applied> || which_alternative == 2)
12154 ? "<ssemodesuffix>" : "");
12155 break;
12156 default:
12157 gcc_unreachable ();
12158 }
12159 break;
12160
12161 case MODE_V8SF:
12162 gcc_assert (TARGET_AVX);
12163 /* FALLTHRU */
12164 case MODE_V4SF:
12165 gcc_assert (TARGET_SSE);
12166 tmp = "<logic>";
12167 ssesuffix = "ps";
12168 break;
12169
12170 default:
12171 gcc_unreachable ();
12172 }
12173
12174 switch (which_alternative)
12175 {
12176 case 0:
12177 if (<mask_applied>)
12178 ops = "v%s%s\t{%%2, %%0, %%0<mask_operand3_1>|%%0<mask_operand3_1>, %%0, %%2}";
12179 else
12180 ops = "%s%s\t{%%2, %%0|%%0, %%2}";
12181 break;
12182 case 1:
12183 case 2:
12184 ops = "v%s%s\t{%%2, %%1, %%0<mask_operand3_1>|%%0<mask_operand3_1>, %%1, %%2}";
12185 break;
12186 default:
12187 gcc_unreachable ();
12188 }
12189
12190 snprintf (buf, sizeof (buf), ops, tmp, ssesuffix);
12191 return buf;
12192 }
12193 [(set_attr "isa" "noavx,avx,avx")
12194 (set_attr "type" "sselog")
12195 (set (attr "prefix_data16")
12196 (if_then_else
12197 (and (eq_attr "alternative" "0")
12198 (eq_attr "mode" "TI"))
12199 (const_string "1")
12200 (const_string "*")))
12201 (set_attr "prefix" "<mask_prefix3>,evex")
12202 (set (attr "mode")
12203 (cond [(and (match_test "<MODE_SIZE> == 16")
12204 (match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL"))
12205 (const_string "<ssePSmode>")
12206 (match_test "TARGET_AVX2")
12207 (const_string "<sseinsnmode>")
12208 (match_test "TARGET_AVX")
12209 (if_then_else
12210 (match_test "<MODE_SIZE> > 16")
12211 (const_string "V8SF")
12212 (const_string "<sseinsnmode>"))
12213 (ior (not (match_test "TARGET_SSE2"))
12214 (match_test "optimize_function_for_size_p (cfun)"))
12215 (const_string "V4SF")
12216 ]
12217 (const_string "<sseinsnmode>")))])
12218
12219 (define_insn "*<code><mode>3"
12220 [(set (match_operand:VI12_AVX_AVX512F 0 "register_operand" "=x,x,v")
12221 (any_logic:VI12_AVX_AVX512F
12222 (match_operand:VI12_AVX_AVX512F 1 "vector_operand" "%0,x,v")
12223 (match_operand:VI12_AVX_AVX512F 2 "vector_operand" "xBm,xm,vm")))]
12224 "TARGET_SSE && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
12225 {
12226 static char buf[64];
12227 const char *ops;
12228 const char *tmp;
12229 const char *ssesuffix;
12230
12231 switch (get_attr_mode (insn))
12232 {
12233 case MODE_XI:
12234 gcc_assert (TARGET_AVX512F);
12235 /* FALLTHRU */
12236 case MODE_OI:
12237 gcc_assert (TARGET_AVX2);
12238 /* FALLTHRU */
12239 case MODE_TI:
12240 gcc_assert (TARGET_SSE2);
12241 tmp = "p<logic>";
12242 switch (<MODE>mode)
12243 {
12244 case E_V64QImode:
12245 case E_V32HImode:
12246 ssesuffix = "q";
12247 break;
12248 case E_V32QImode:
12249 case E_V16HImode:
12250 case E_V16QImode:
12251 case E_V8HImode:
12252 ssesuffix = TARGET_AVX512VL && which_alternative == 2 ? "q" : "";
12253 break;
12254 default:
12255 gcc_unreachable ();
12256 }
12257 break;
12258
12259 case MODE_V8SF:
12260 gcc_assert (TARGET_AVX);
12261 /* FALLTHRU */
12262 case MODE_V4SF:
12263 gcc_assert (TARGET_SSE);
12264 tmp = "<logic>";
12265 ssesuffix = "ps";
12266 break;
12267
12268 default:
12269 gcc_unreachable ();
12270 }
12271
12272 switch (which_alternative)
12273 {
12274 case 0:
12275 ops = "%s%s\t{%%2, %%0|%%0, %%2}";
12276 break;
12277 case 1:
12278 case 2:
12279 ops = "v%s%s\t{%%2, %%1, %%0|%%0, %%1, %%2}";
12280 break;
12281 default:
12282 gcc_unreachable ();
12283 }
12284
12285 snprintf (buf, sizeof (buf), ops, tmp, ssesuffix);
12286 return buf;
12287 }
12288 [(set_attr "isa" "noavx,avx,avx")
12289 (set_attr "type" "sselog")
12290 (set (attr "prefix_data16")
12291 (if_then_else
12292 (and (eq_attr "alternative" "0")
12293 (eq_attr "mode" "TI"))
12294 (const_string "1")
12295 (const_string "*")))
12296 (set_attr "prefix" "orig,vex,evex")
12297 (set (attr "mode")
12298 (cond [(and (match_test "<MODE_SIZE> == 16")
12299 (match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL"))
12300 (const_string "<ssePSmode>")
12301 (match_test "TARGET_AVX2")
12302 (const_string "<sseinsnmode>")
12303 (match_test "TARGET_AVX")
12304 (if_then_else
12305 (match_test "<MODE_SIZE> > 16")
12306 (const_string "V8SF")
12307 (const_string "<sseinsnmode>"))
12308 (ior (not (match_test "TARGET_SSE2"))
12309 (match_test "optimize_function_for_size_p (cfun)"))
12310 (const_string "V4SF")
12311 ]
12312 (const_string "<sseinsnmode>")))])
12313
12314 (define_insn "*<code><mode>3_bcst"
12315 [(set (match_operand:VI48_AVX512VL 0 "register_operand" "=v")
12316 (any_logic:VI48_AVX512VL
12317 (vec_duplicate:VI48_AVX512VL
12318 (match_operand:<ssescalarmode> 1 "memory_operand" "m"))
12319 (match_operand:VI48_AVX512VL 2 "register_operand" "v")))]
12320 "TARGET_AVX512F && <mask_avx512vl_condition>"
12321 "vp<logic><ssemodesuffix>\t{%1<avx512bcst>, %2, %0<mask_operand3>|%0<mask_operand3>, %2, %1<avx512bcst>}"
12322 [(set_attr "type" "sseiadd")
12323 (set_attr "prefix" "evex")
12324 (set_attr "mode" "<sseinsnmode>")])
12325
12326 (define_mode_iterator VI1248_AVX512VLBW
12327 [(V64QI "TARGET_AVX512BW") (V32QI "TARGET_AVX512VL && TARGET_AVX512BW")
12328 (V16QI "TARGET_AVX512VL && TARGET_AVX512BW")
12329 (V32HI "TARGET_AVX512BW") (V16HI "TARGET_AVX512VL && TARGET_AVX512BW")
12330 (V8HI "TARGET_AVX512VL && TARGET_AVX512BW")
12331 V16SI (V8SI "TARGET_AVX512VL") (V4SI "TARGET_AVX512VL")
12332 V8DI (V4DI "TARGET_AVX512VL") (V2DI "TARGET_AVX512VL")])
12333
12334 (define_mode_iterator AVX512ZEXTMASK
12335 [(DI "TARGET_AVX512BW") (SI "TARGET_AVX512BW") HI])
12336
12337 (define_insn "<avx512>_testm<mode>3<mask_scalar_merge_name>"
12338 [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=Yk")
12339 (unspec:<avx512fmaskmode>
12340 [(match_operand:VI1248_AVX512VLBW 1 "register_operand" "v")
12341 (match_operand:VI1248_AVX512VLBW 2 "nonimmediate_operand" "vm")]
12342 UNSPEC_TESTM))]
12343 "TARGET_AVX512F"
12344 "vptestm<ssemodesuffix>\t{%2, %1, %0<mask_scalar_merge_operand3>|%0<mask_scalar_merge_operand3>, %1, %2}"
12345 [(set_attr "prefix" "evex")
12346 (set_attr "mode" "<sseinsnmode>")])
12347
12348 (define_insn "<avx512>_testnm<mode>3<mask_scalar_merge_name>"
12349 [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=Yk")
12350 (unspec:<avx512fmaskmode>
12351 [(match_operand:VI1248_AVX512VLBW 1 "register_operand" "v")
12352 (match_operand:VI1248_AVX512VLBW 2 "nonimmediate_operand" "vm")]
12353 UNSPEC_TESTNM))]
12354 "TARGET_AVX512F"
12355 "vptestnm<ssemodesuffix>\t{%2, %1, %0<mask_scalar_merge_operand3>|%0<mask_scalar_merge_operand3>, %1, %2}"
12356 [(set_attr "prefix" "evex")
12357 (set_attr "mode" "<sseinsnmode>")])
12358
12359 (define_insn "*<avx512>_testm<VI1248_AVX512VLBW:mode>3_zext"
12360 [(set (match_operand:AVX512ZEXTMASK 0 "register_operand" "=Yk")
12361 (zero_extend:AVX512ZEXTMASK
12362 (unspec:<VI1248_AVX512VLBW:avx512fmaskmode>
12363 [(match_operand:VI1248_AVX512VLBW 1 "register_operand" "v")
12364 (match_operand:VI1248_AVX512VLBW 2 "nonimmediate_operand" "vm")]
12365 UNSPEC_TESTM)))]
12366 "TARGET_AVX512BW
12367 && (<AVX512ZEXTMASK:MODE_SIZE>
12368 > GET_MODE_SIZE (<VI1248_AVX512VLBW:avx512fmaskmode>mode))"
12369 "vptestm<VI1248_AVX512VLBW:ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
12370 [(set_attr "prefix" "evex")
12371 (set_attr "mode" "<VI1248_AVX512VLBW:sseinsnmode>")])
12372
12373 (define_insn "*<avx512>_testm<VI1248_AVX512VLBW:mode>3_zext_mask"
12374 [(set (match_operand:AVX512ZEXTMASK 0 "register_operand" "=Yk")
12375 (zero_extend:AVX512ZEXTMASK
12376 (and:<VI1248_AVX512VLBW:avx512fmaskmode>
12377 (unspec:<VI1248_AVX512VLBW:avx512fmaskmode>
12378 [(match_operand:VI1248_AVX512VLBW 1 "register_operand" "v")
12379 (match_operand:VI1248_AVX512VLBW 2 "nonimmediate_operand" "vm")]
12380 UNSPEC_TESTM)
12381 (match_operand:<VI1248_AVX512VLBW:avx512fmaskmode> 3 "register_operand" "Yk"))))]
12382 "TARGET_AVX512BW
12383 && (<AVX512ZEXTMASK:MODE_SIZE>
12384 > GET_MODE_SIZE (<VI1248_AVX512VLBW:avx512fmaskmode>mode))"
12385 "vptestm<VI1248_AVX512VLBW:ssemodesuffix>\t{%2, %1, %0%{%3%}|%0%{%3%}, %1, %2}"
12386 [(set_attr "prefix" "evex")
12387 (set_attr "mode" "<VI1248_AVX512VLBW:sseinsnmode>")])
12388
12389 (define_insn "*<avx512>_testnm<VI1248_AVX512VLBW:mode>3_zext"
12390 [(set (match_operand:AVX512ZEXTMASK 0 "register_operand" "=Yk")
12391 (zero_extend:AVX512ZEXTMASK
12392 (unspec:<VI1248_AVX512VLBW:avx512fmaskmode>
12393 [(match_operand:VI1248_AVX512VLBW 1 "register_operand" "v")
12394 (match_operand:VI1248_AVX512VLBW 2 "nonimmediate_operand" "vm")]
12395 UNSPEC_TESTNM)))]
12396 "TARGET_AVX512BW
12397 && (<AVX512ZEXTMASK:MODE_SIZE>
12398 > GET_MODE_SIZE (<VI1248_AVX512VLBW:avx512fmaskmode>mode))"
12399 "vptestnm<VI1248_AVX512VLBW:ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
12400 [(set_attr "prefix" "evex")
12401 (set_attr "mode" "<VI1248_AVX512VLBW:sseinsnmode>")])
12402
12403 (define_insn "*<avx512>_testnm<VI1248_AVX512VLBW:mode>3_zext_mask"
12404 [(set (match_operand:AVX512ZEXTMASK 0 "register_operand" "=Yk")
12405 (zero_extend:AVX512ZEXTMASK
12406 (and:<VI1248_AVX512VLBW:avx512fmaskmode>
12407 (unspec:<VI1248_AVX512VLBW:avx512fmaskmode>
12408 [(match_operand:VI1248_AVX512VLBW 1 "register_operand" "v")
12409 (match_operand:VI1248_AVX512VLBW 2 "nonimmediate_operand" "vm")]
12410 UNSPEC_TESTNM)
12411 (match_operand:<VI1248_AVX512VLBW:avx512fmaskmode> 3 "register_operand" "Yk"))))]
12412 "TARGET_AVX512BW
12413 && (<AVX512ZEXTMASK:MODE_SIZE>
12414 > GET_MODE_SIZE (<VI1248_AVX512VLBW:avx512fmaskmode>mode))"
12415 "vptestnm<VI1248_AVX512VLBW:ssemodesuffix>\t{%2, %1, %0%{%3%}|%0%{%3%}, %1, %2}"
12416 [(set_attr "prefix" "evex")
12417 (set_attr "mode" "<VI1248_AVX512VLBW:sseinsnmode>")])
12418
12419 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
12420 ;;
12421 ;; Parallel integral element swizzling
12422 ;;
12423 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
12424
12425 (define_expand "vec_pack_trunc_<mode>"
12426 [(match_operand:<ssepackmode> 0 "register_operand")
12427 (match_operand:VI248_AVX2_8_AVX512F_24_AVX512BW 1 "register_operand")
12428 (match_operand:VI248_AVX2_8_AVX512F_24_AVX512BW 2 "register_operand")]
12429 "TARGET_SSE2"
12430 {
12431 rtx op1 = gen_lowpart (<ssepackmode>mode, operands[1]);
12432 rtx op2 = gen_lowpart (<ssepackmode>mode, operands[2]);
12433 ix86_expand_vec_extract_even_odd (operands[0], op1, op2, 0);
12434 DONE;
12435 })
12436
12437 (define_expand "vec_pack_trunc_qi"
12438 [(set (match_operand:HI 0 "register_operand")
12439 (ior:HI (ashift:HI (zero_extend:HI (match_operand:QI 2 "register_operand"))
12440 (const_int 8))
12441 (zero_extend:HI (match_operand:QI 1 "register_operand"))))]
12442 "TARGET_AVX512F")
12443
12444 (define_expand "vec_pack_trunc_<mode>"
12445 [(set (match_operand:<DOUBLEMASKMODE> 0 "register_operand")
12446 (ior:<DOUBLEMASKMODE>
12447 (ashift:<DOUBLEMASKMODE>
12448 (zero_extend:<DOUBLEMASKMODE>
12449 (match_operand:SWI24 2 "register_operand"))
12450 (match_dup 3))
12451 (zero_extend:<DOUBLEMASKMODE>
12452 (match_operand:SWI24 1 "register_operand"))))]
12453 "TARGET_AVX512BW"
12454 {
12455 operands[3] = GEN_INT (GET_MODE_BITSIZE (<MODE>mode));
12456 })
12457
12458 (define_expand "vec_pack_sbool_trunc_qi"
12459 [(match_operand:QI 0 "register_operand")
12460 (match_operand:QI 1 "register_operand")
12461 (match_operand:QI 2 "register_operand")
12462 (match_operand:QI 3 "const_int_operand")]
12463 "TARGET_AVX512F"
12464 {
12465 HOST_WIDE_INT nunits = INTVAL (operands[3]);
12466 rtx mask, tem1, tem2;
12467 if (nunits != 8 && nunits != 4)
12468 FAIL;
12469 mask = gen_reg_rtx (QImode);
12470 emit_move_insn (mask, GEN_INT ((1 << (nunits / 2)) - 1));
12471 tem1 = gen_reg_rtx (QImode);
12472 emit_insn (gen_kandqi (tem1, operands[1], mask));
12473 if (TARGET_AVX512DQ)
12474 {
12475 tem2 = gen_reg_rtx (QImode);
12476 emit_insn (gen_kashiftqi (tem2, operands[2],
12477 GEN_INT (nunits / 2)));
12478 }
12479 else
12480 {
12481 tem2 = gen_reg_rtx (HImode);
12482 emit_insn (gen_kashifthi (tem2, lowpart_subreg (HImode, operands[2],
12483 QImode),
12484 GEN_INT (nunits / 2)));
12485 tem2 = lowpart_subreg (QImode, tem2, HImode);
12486 }
12487 emit_insn (gen_kiorqi (operands[0], tem1, tem2));
12488 DONE;
12489 })
12490
12491 (define_insn "<sse2_avx2>_packsswb<mask_name>"
12492 [(set (match_operand:VI1_AVX512 0 "register_operand" "=x,x,v")
12493 (vec_concat:VI1_AVX512
12494 (ss_truncate:<ssehalfvecmode>
12495 (match_operand:<sseunpackmode> 1 "register_operand" "0,x,v"))
12496 (ss_truncate:<ssehalfvecmode>
12497 (match_operand:<sseunpackmode> 2 "vector_operand" "xBm,xm,vm"))))]
12498 "TARGET_SSE2 && <mask_mode512bit_condition> && <mask_avx512bw_condition>"
12499 "@
12500 packsswb\t{%2, %0|%0, %2}
12501 vpacksswb\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}
12502 vpacksswb\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
12503 [(set_attr "isa" "noavx,avx,avx512bw")
12504 (set_attr "type" "sselog")
12505 (set_attr "prefix_data16" "1,*,*")
12506 (set_attr "prefix" "orig,<mask_prefix>,evex")
12507 (set_attr "mode" "<sseinsnmode>")])
12508
12509 (define_insn "<sse2_avx2>_packssdw<mask_name>"
12510 [(set (match_operand:VI2_AVX2 0 "register_operand" "=x,x,v")
12511 (vec_concat:VI2_AVX2
12512 (ss_truncate:<ssehalfvecmode>
12513 (match_operand:<sseunpackmode> 1 "register_operand" "0,x,v"))
12514 (ss_truncate:<ssehalfvecmode>
12515 (match_operand:<sseunpackmode> 2 "vector_operand" "xBm,xm,vm"))))]
12516 "TARGET_SSE2 && <mask_mode512bit_condition> && <mask_avx512bw_condition>"
12517 "@
12518 packssdw\t{%2, %0|%0, %2}
12519 vpackssdw\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}
12520 vpackssdw\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
12521 [(set_attr "isa" "noavx,avx,avx512bw")
12522 (set_attr "type" "sselog")
12523 (set_attr "prefix_data16" "1,*,*")
12524 (set_attr "prefix" "orig,<mask_prefix>,evex")
12525 (set_attr "mode" "<sseinsnmode>")])
12526
12527 (define_insn "<sse2_avx2>_packuswb<mask_name>"
12528 [(set (match_operand:VI1_AVX512 0 "register_operand" "=x,x,v")
12529 (vec_concat:VI1_AVX512
12530 (us_truncate:<ssehalfvecmode>
12531 (match_operand:<sseunpackmode> 1 "register_operand" "0,x,v"))
12532 (us_truncate:<ssehalfvecmode>
12533 (match_operand:<sseunpackmode> 2 "vector_operand" "xBm,xm,vm"))))]
12534 "TARGET_SSE2 && <mask_mode512bit_condition> && <mask_avx512bw_condition>"
12535 "@
12536 packuswb\t{%2, %0|%0, %2}
12537 vpackuswb\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}
12538 vpackuswb\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
12539 [(set_attr "isa" "noavx,avx,avx512bw")
12540 (set_attr "type" "sselog")
12541 (set_attr "prefix_data16" "1,*,*")
12542 (set_attr "prefix" "orig,<mask_prefix>,evex")
12543 (set_attr "mode" "<sseinsnmode>")])
12544
12545 (define_insn "avx512bw_interleave_highv64qi<mask_name>"
12546 [(set (match_operand:V64QI 0 "register_operand" "=v")
12547 (vec_select:V64QI
12548 (vec_concat:V128QI
12549 (match_operand:V64QI 1 "register_operand" "v")
12550 (match_operand:V64QI 2 "nonimmediate_operand" "vm"))
12551 (parallel [(const_int 8) (const_int 72)
12552 (const_int 9) (const_int 73)
12553 (const_int 10) (const_int 74)
12554 (const_int 11) (const_int 75)
12555 (const_int 12) (const_int 76)
12556 (const_int 13) (const_int 77)
12557 (const_int 14) (const_int 78)
12558 (const_int 15) (const_int 79)
12559 (const_int 24) (const_int 88)
12560 (const_int 25) (const_int 89)
12561 (const_int 26) (const_int 90)
12562 (const_int 27) (const_int 91)
12563 (const_int 28) (const_int 92)
12564 (const_int 29) (const_int 93)
12565 (const_int 30) (const_int 94)
12566 (const_int 31) (const_int 95)
12567 (const_int 40) (const_int 104)
12568 (const_int 41) (const_int 105)
12569 (const_int 42) (const_int 106)
12570 (const_int 43) (const_int 107)
12571 (const_int 44) (const_int 108)
12572 (const_int 45) (const_int 109)
12573 (const_int 46) (const_int 110)
12574 (const_int 47) (const_int 111)
12575 (const_int 56) (const_int 120)
12576 (const_int 57) (const_int 121)
12577 (const_int 58) (const_int 122)
12578 (const_int 59) (const_int 123)
12579 (const_int 60) (const_int 124)
12580 (const_int 61) (const_int 125)
12581 (const_int 62) (const_int 126)
12582 (const_int 63) (const_int 127)])))]
12583 "TARGET_AVX512BW"
12584 "vpunpckhbw\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
12585 [(set_attr "type" "sselog")
12586 (set_attr "prefix" "evex")
12587 (set_attr "mode" "XI")])
12588
12589 (define_insn "avx2_interleave_highv32qi<mask_name>"
12590 [(set (match_operand:V32QI 0 "register_operand" "=v")
12591 (vec_select:V32QI
12592 (vec_concat:V64QI
12593 (match_operand:V32QI 1 "register_operand" "v")
12594 (match_operand:V32QI 2 "nonimmediate_operand" "vm"))
12595 (parallel [(const_int 8) (const_int 40)
12596 (const_int 9) (const_int 41)
12597 (const_int 10) (const_int 42)
12598 (const_int 11) (const_int 43)
12599 (const_int 12) (const_int 44)
12600 (const_int 13) (const_int 45)
12601 (const_int 14) (const_int 46)
12602 (const_int 15) (const_int 47)
12603 (const_int 24) (const_int 56)
12604 (const_int 25) (const_int 57)
12605 (const_int 26) (const_int 58)
12606 (const_int 27) (const_int 59)
12607 (const_int 28) (const_int 60)
12608 (const_int 29) (const_int 61)
12609 (const_int 30) (const_int 62)
12610 (const_int 31) (const_int 63)])))]
12611 "TARGET_AVX2 && <mask_avx512vl_condition>"
12612 "vpunpckhbw\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
12613 [(set_attr "type" "sselog")
12614 (set_attr "prefix" "<mask_prefix>")
12615 (set_attr "mode" "OI")])
12616
12617 (define_insn "vec_interleave_highv16qi<mask_name>"
12618 [(set (match_operand:V16QI 0 "register_operand" "=x,v")
12619 (vec_select:V16QI
12620 (vec_concat:V32QI
12621 (match_operand:V16QI 1 "register_operand" "0,v")
12622 (match_operand:V16QI 2 "vector_operand" "xBm,vm"))
12623 (parallel [(const_int 8) (const_int 24)
12624 (const_int 9) (const_int 25)
12625 (const_int 10) (const_int 26)
12626 (const_int 11) (const_int 27)
12627 (const_int 12) (const_int 28)
12628 (const_int 13) (const_int 29)
12629 (const_int 14) (const_int 30)
12630 (const_int 15) (const_int 31)])))]
12631 "TARGET_SSE2 && <mask_avx512vl_condition>"
12632 "@
12633 punpckhbw\t{%2, %0|%0, %2}
12634 vpunpckhbw\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
12635 [(set_attr "isa" "noavx,avx")
12636 (set_attr "type" "sselog")
12637 (set_attr "prefix_data16" "1,*")
12638 (set_attr "prefix" "orig,<mask_prefix>")
12639 (set_attr "mode" "TI")])
12640
12641 (define_insn "avx512bw_interleave_lowv64qi<mask_name>"
12642 [(set (match_operand:V64QI 0 "register_operand" "=v")
12643 (vec_select:V64QI
12644 (vec_concat:V128QI
12645 (match_operand:V64QI 1 "register_operand" "v")
12646 (match_operand:V64QI 2 "nonimmediate_operand" "vm"))
12647 (parallel [(const_int 0) (const_int 64)
12648 (const_int 1) (const_int 65)
12649 (const_int 2) (const_int 66)
12650 (const_int 3) (const_int 67)
12651 (const_int 4) (const_int 68)
12652 (const_int 5) (const_int 69)
12653 (const_int 6) (const_int 70)
12654 (const_int 7) (const_int 71)
12655 (const_int 16) (const_int 80)
12656 (const_int 17) (const_int 81)
12657 (const_int 18) (const_int 82)
12658 (const_int 19) (const_int 83)
12659 (const_int 20) (const_int 84)
12660 (const_int 21) (const_int 85)
12661 (const_int 22) (const_int 86)
12662 (const_int 23) (const_int 87)
12663 (const_int 32) (const_int 96)
12664 (const_int 33) (const_int 97)
12665 (const_int 34) (const_int 98)
12666 (const_int 35) (const_int 99)
12667 (const_int 36) (const_int 100)
12668 (const_int 37) (const_int 101)
12669 (const_int 38) (const_int 102)
12670 (const_int 39) (const_int 103)
12671 (const_int 48) (const_int 112)
12672 (const_int 49) (const_int 113)
12673 (const_int 50) (const_int 114)
12674 (const_int 51) (const_int 115)
12675 (const_int 52) (const_int 116)
12676 (const_int 53) (const_int 117)
12677 (const_int 54) (const_int 118)
12678 (const_int 55) (const_int 119)])))]
12679 "TARGET_AVX512BW"
12680 "vpunpcklbw\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
12681 [(set_attr "type" "sselog")
12682 (set_attr "prefix" "evex")
12683 (set_attr "mode" "XI")])
12684
12685 (define_insn "avx2_interleave_lowv32qi<mask_name>"
12686 [(set (match_operand:V32QI 0 "register_operand" "=v")
12687 (vec_select:V32QI
12688 (vec_concat:V64QI
12689 (match_operand:V32QI 1 "register_operand" "v")
12690 (match_operand:V32QI 2 "nonimmediate_operand" "vm"))
12691 (parallel [(const_int 0) (const_int 32)
12692 (const_int 1) (const_int 33)
12693 (const_int 2) (const_int 34)
12694 (const_int 3) (const_int 35)
12695 (const_int 4) (const_int 36)
12696 (const_int 5) (const_int 37)
12697 (const_int 6) (const_int 38)
12698 (const_int 7) (const_int 39)
12699 (const_int 16) (const_int 48)
12700 (const_int 17) (const_int 49)
12701 (const_int 18) (const_int 50)
12702 (const_int 19) (const_int 51)
12703 (const_int 20) (const_int 52)
12704 (const_int 21) (const_int 53)
12705 (const_int 22) (const_int 54)
12706 (const_int 23) (const_int 55)])))]
12707 "TARGET_AVX2 && <mask_avx512vl_condition> && <mask_avx512bw_condition>"
12708 "vpunpcklbw\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
12709 [(set_attr "type" "sselog")
12710 (set_attr "prefix" "maybe_vex")
12711 (set_attr "mode" "OI")])
12712
12713 (define_insn "vec_interleave_lowv16qi<mask_name>"
12714 [(set (match_operand:V16QI 0 "register_operand" "=x,v")
12715 (vec_select:V16QI
12716 (vec_concat:V32QI
12717 (match_operand:V16QI 1 "register_operand" "0,v")
12718 (match_operand:V16QI 2 "vector_operand" "xBm,vm"))
12719 (parallel [(const_int 0) (const_int 16)
12720 (const_int 1) (const_int 17)
12721 (const_int 2) (const_int 18)
12722 (const_int 3) (const_int 19)
12723 (const_int 4) (const_int 20)
12724 (const_int 5) (const_int 21)
12725 (const_int 6) (const_int 22)
12726 (const_int 7) (const_int 23)])))]
12727 "TARGET_SSE2 && <mask_avx512vl_condition> && <mask_avx512bw_condition>"
12728 "@
12729 punpcklbw\t{%2, %0|%0, %2}
12730 vpunpcklbw\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
12731 [(set_attr "isa" "noavx,avx")
12732 (set_attr "type" "sselog")
12733 (set_attr "prefix_data16" "1,*")
12734 (set_attr "prefix" "orig,vex")
12735 (set_attr "mode" "TI")])
12736
12737 (define_insn "avx512bw_interleave_highv32hi<mask_name>"
12738 [(set (match_operand:V32HI 0 "register_operand" "=v")
12739 (vec_select:V32HI
12740 (vec_concat:V64HI
12741 (match_operand:V32HI 1 "register_operand" "v")
12742 (match_operand:V32HI 2 "nonimmediate_operand" "vm"))
12743 (parallel [(const_int 4) (const_int 36)
12744 (const_int 5) (const_int 37)
12745 (const_int 6) (const_int 38)
12746 (const_int 7) (const_int 39)
12747 (const_int 12) (const_int 44)
12748 (const_int 13) (const_int 45)
12749 (const_int 14) (const_int 46)
12750 (const_int 15) (const_int 47)
12751 (const_int 20) (const_int 52)
12752 (const_int 21) (const_int 53)
12753 (const_int 22) (const_int 54)
12754 (const_int 23) (const_int 55)
12755 (const_int 28) (const_int 60)
12756 (const_int 29) (const_int 61)
12757 (const_int 30) (const_int 62)
12758 (const_int 31) (const_int 63)])))]
12759 "TARGET_AVX512BW"
12760 "vpunpckhwd\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
12761 [(set_attr "type" "sselog")
12762 (set_attr "prefix" "evex")
12763 (set_attr "mode" "XI")])
12764
12765 (define_insn "avx2_interleave_highv16hi<mask_name>"
12766 [(set (match_operand:V16HI 0 "register_operand" "=v")
12767 (vec_select:V16HI
12768 (vec_concat:V32HI
12769 (match_operand:V16HI 1 "register_operand" "v")
12770 (match_operand:V16HI 2 "nonimmediate_operand" "vm"))
12771 (parallel [(const_int 4) (const_int 20)
12772 (const_int 5) (const_int 21)
12773 (const_int 6) (const_int 22)
12774 (const_int 7) (const_int 23)
12775 (const_int 12) (const_int 28)
12776 (const_int 13) (const_int 29)
12777 (const_int 14) (const_int 30)
12778 (const_int 15) (const_int 31)])))]
12779 "TARGET_AVX2 && <mask_avx512vl_condition> && <mask_avx512bw_condition>"
12780 "vpunpckhwd\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
12781 [(set_attr "type" "sselog")
12782 (set_attr "prefix" "maybe_evex")
12783 (set_attr "mode" "OI")])
12784
12785 (define_insn "vec_interleave_highv8hi<mask_name>"
12786 [(set (match_operand:V8HI 0 "register_operand" "=x,v")
12787 (vec_select:V8HI
12788 (vec_concat:V16HI
12789 (match_operand:V8HI 1 "register_operand" "0,v")
12790 (match_operand:V8HI 2 "vector_operand" "xBm,vm"))
12791 (parallel [(const_int 4) (const_int 12)
12792 (const_int 5) (const_int 13)
12793 (const_int 6) (const_int 14)
12794 (const_int 7) (const_int 15)])))]
12795 "TARGET_SSE2 && <mask_avx512vl_condition> && <mask_avx512bw_condition>"
12796 "@
12797 punpckhwd\t{%2, %0|%0, %2}
12798 vpunpckhwd\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
12799 [(set_attr "isa" "noavx,avx")
12800 (set_attr "type" "sselog")
12801 (set_attr "prefix_data16" "1,*")
12802 (set_attr "prefix" "orig,maybe_vex")
12803 (set_attr "mode" "TI")])
12804
12805 (define_insn "<mask_codefor>avx512bw_interleave_lowv32hi<mask_name>"
12806 [(set (match_operand:V32HI 0 "register_operand" "=v")
12807 (vec_select:V32HI
12808 (vec_concat:V64HI
12809 (match_operand:V32HI 1 "register_operand" "v")
12810 (match_operand:V32HI 2 "nonimmediate_operand" "vm"))
12811 (parallel [(const_int 0) (const_int 32)
12812 (const_int 1) (const_int 33)
12813 (const_int 2) (const_int 34)
12814 (const_int 3) (const_int 35)
12815 (const_int 8) (const_int 40)
12816 (const_int 9) (const_int 41)
12817 (const_int 10) (const_int 42)
12818 (const_int 11) (const_int 43)
12819 (const_int 16) (const_int 48)
12820 (const_int 17) (const_int 49)
12821 (const_int 18) (const_int 50)
12822 (const_int 19) (const_int 51)
12823 (const_int 24) (const_int 56)
12824 (const_int 25) (const_int 57)
12825 (const_int 26) (const_int 58)
12826 (const_int 27) (const_int 59)])))]
12827 "TARGET_AVX512BW"
12828 "vpunpcklwd\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
12829 [(set_attr "type" "sselog")
12830 (set_attr "prefix" "evex")
12831 (set_attr "mode" "XI")])
12832
12833 (define_insn "avx2_interleave_lowv16hi<mask_name>"
12834 [(set (match_operand:V16HI 0 "register_operand" "=v")
12835 (vec_select:V16HI
12836 (vec_concat:V32HI
12837 (match_operand:V16HI 1 "register_operand" "v")
12838 (match_operand:V16HI 2 "nonimmediate_operand" "vm"))
12839 (parallel [(const_int 0) (const_int 16)
12840 (const_int 1) (const_int 17)
12841 (const_int 2) (const_int 18)
12842 (const_int 3) (const_int 19)
12843 (const_int 8) (const_int 24)
12844 (const_int 9) (const_int 25)
12845 (const_int 10) (const_int 26)
12846 (const_int 11) (const_int 27)])))]
12847 "TARGET_AVX2 && <mask_avx512vl_condition> && <mask_avx512bw_condition>"
12848 "vpunpcklwd\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
12849 [(set_attr "type" "sselog")
12850 (set_attr "prefix" "maybe_evex")
12851 (set_attr "mode" "OI")])
12852
12853 (define_insn "vec_interleave_lowv8hi<mask_name>"
12854 [(set (match_operand:V8HI 0 "register_operand" "=x,v")
12855 (vec_select:V8HI
12856 (vec_concat:V16HI
12857 (match_operand:V8HI 1 "register_operand" "0,v")
12858 (match_operand:V8HI 2 "vector_operand" "xBm,vm"))
12859 (parallel [(const_int 0) (const_int 8)
12860 (const_int 1) (const_int 9)
12861 (const_int 2) (const_int 10)
12862 (const_int 3) (const_int 11)])))]
12863 "TARGET_SSE2 && <mask_avx512vl_condition> && <mask_avx512bw_condition>"
12864 "@
12865 punpcklwd\t{%2, %0|%0, %2}
12866 vpunpcklwd\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
12867 [(set_attr "isa" "noavx,avx")
12868 (set_attr "type" "sselog")
12869 (set_attr "prefix_data16" "1,*")
12870 (set_attr "prefix" "orig,maybe_evex")
12871 (set_attr "mode" "TI")])
12872
12873 (define_insn "avx2_interleave_highv8si<mask_name>"
12874 [(set (match_operand:V8SI 0 "register_operand" "=v")
12875 (vec_select:V8SI
12876 (vec_concat:V16SI
12877 (match_operand:V8SI 1 "register_operand" "v")
12878 (match_operand:V8SI 2 "nonimmediate_operand" "vm"))
12879 (parallel [(const_int 2) (const_int 10)
12880 (const_int 3) (const_int 11)
12881 (const_int 6) (const_int 14)
12882 (const_int 7) (const_int 15)])))]
12883 "TARGET_AVX2 && <mask_avx512vl_condition>"
12884 "vpunpckhdq\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
12885 [(set_attr "type" "sselog")
12886 (set_attr "prefix" "maybe_evex")
12887 (set_attr "mode" "OI")])
12888
12889 (define_insn "<mask_codefor>avx512f_interleave_highv16si<mask_name>"
12890 [(set (match_operand:V16SI 0 "register_operand" "=v")
12891 (vec_select:V16SI
12892 (vec_concat:V32SI
12893 (match_operand:V16SI 1 "register_operand" "v")
12894 (match_operand:V16SI 2 "nonimmediate_operand" "vm"))
12895 (parallel [(const_int 2) (const_int 18)
12896 (const_int 3) (const_int 19)
12897 (const_int 6) (const_int 22)
12898 (const_int 7) (const_int 23)
12899 (const_int 10) (const_int 26)
12900 (const_int 11) (const_int 27)
12901 (const_int 14) (const_int 30)
12902 (const_int 15) (const_int 31)])))]
12903 "TARGET_AVX512F"
12904 "vpunpckhdq\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
12905 [(set_attr "type" "sselog")
12906 (set_attr "prefix" "evex")
12907 (set_attr "mode" "XI")])
12908
12909
12910 (define_insn "vec_interleave_highv4si<mask_name>"
12911 [(set (match_operand:V4SI 0 "register_operand" "=x,v")
12912 (vec_select:V4SI
12913 (vec_concat:V8SI
12914 (match_operand:V4SI 1 "register_operand" "0,v")
12915 (match_operand:V4SI 2 "vector_operand" "xBm,vm"))
12916 (parallel [(const_int 2) (const_int 6)
12917 (const_int 3) (const_int 7)])))]
12918 "TARGET_SSE2 && <mask_avx512vl_condition>"
12919 "@
12920 punpckhdq\t{%2, %0|%0, %2}
12921 vpunpckhdq\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
12922 [(set_attr "isa" "noavx,avx")
12923 (set_attr "type" "sselog")
12924 (set_attr "prefix_data16" "1,*")
12925 (set_attr "prefix" "orig,maybe_vex")
12926 (set_attr "mode" "TI")])
12927
12928 (define_insn "avx2_interleave_lowv8si<mask_name>"
12929 [(set (match_operand:V8SI 0 "register_operand" "=v")
12930 (vec_select:V8SI
12931 (vec_concat:V16SI
12932 (match_operand:V8SI 1 "register_operand" "v")
12933 (match_operand:V8SI 2 "nonimmediate_operand" "vm"))
12934 (parallel [(const_int 0) (const_int 8)
12935 (const_int 1) (const_int 9)
12936 (const_int 4) (const_int 12)
12937 (const_int 5) (const_int 13)])))]
12938 "TARGET_AVX2 && <mask_avx512vl_condition>"
12939 "vpunpckldq\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
12940 [(set_attr "type" "sselog")
12941 (set_attr "prefix" "maybe_evex")
12942 (set_attr "mode" "OI")])
12943
12944 (define_insn "<mask_codefor>avx512f_interleave_lowv16si<mask_name>"
12945 [(set (match_operand:V16SI 0 "register_operand" "=v")
12946 (vec_select:V16SI
12947 (vec_concat:V32SI
12948 (match_operand:V16SI 1 "register_operand" "v")
12949 (match_operand:V16SI 2 "nonimmediate_operand" "vm"))
12950 (parallel [(const_int 0) (const_int 16)
12951 (const_int 1) (const_int 17)
12952 (const_int 4) (const_int 20)
12953 (const_int 5) (const_int 21)
12954 (const_int 8) (const_int 24)
12955 (const_int 9) (const_int 25)
12956 (const_int 12) (const_int 28)
12957 (const_int 13) (const_int 29)])))]
12958 "TARGET_AVX512F"
12959 "vpunpckldq\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
12960 [(set_attr "type" "sselog")
12961 (set_attr "prefix" "evex")
12962 (set_attr "mode" "XI")])
12963
12964 (define_insn "vec_interleave_lowv4si<mask_name>"
12965 [(set (match_operand:V4SI 0 "register_operand" "=x,v")
12966 (vec_select:V4SI
12967 (vec_concat:V8SI
12968 (match_operand:V4SI 1 "register_operand" "0,v")
12969 (match_operand:V4SI 2 "vector_operand" "xBm,vm"))
12970 (parallel [(const_int 0) (const_int 4)
12971 (const_int 1) (const_int 5)])))]
12972 "TARGET_SSE2 && <mask_avx512vl_condition>"
12973 "@
12974 punpckldq\t{%2, %0|%0, %2}
12975 vpunpckldq\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
12976 [(set_attr "isa" "noavx,avx")
12977 (set_attr "type" "sselog")
12978 (set_attr "prefix_data16" "1,*")
12979 (set_attr "prefix" "orig,vex")
12980 (set_attr "mode" "TI")])
12981
12982 (define_expand "vec_interleave_high<mode>"
12983 [(match_operand:VI_256 0 "register_operand")
12984 (match_operand:VI_256 1 "register_operand")
12985 (match_operand:VI_256 2 "nonimmediate_operand")]
12986 "TARGET_AVX2"
12987 {
12988 rtx t1 = gen_reg_rtx (<MODE>mode);
12989 rtx t2 = gen_reg_rtx (<MODE>mode);
12990 rtx t3 = gen_reg_rtx (V4DImode);
12991 emit_insn (gen_avx2_interleave_low<mode> (t1, operands[1], operands[2]));
12992 emit_insn (gen_avx2_interleave_high<mode> (t2, operands[1], operands[2]));
12993 emit_insn (gen_avx2_permv2ti (t3, gen_lowpart (V4DImode, t1),
12994 gen_lowpart (V4DImode, t2),
12995 GEN_INT (1 + (3 << 4))));
12996 emit_move_insn (operands[0], gen_lowpart (<MODE>mode, t3));
12997 DONE;
12998 })
12999
13000 (define_expand "vec_interleave_low<mode>"
13001 [(match_operand:VI_256 0 "register_operand")
13002 (match_operand:VI_256 1 "register_operand")
13003 (match_operand:VI_256 2 "nonimmediate_operand")]
13004 "TARGET_AVX2"
13005 {
13006 rtx t1 = gen_reg_rtx (<MODE>mode);
13007 rtx t2 = gen_reg_rtx (<MODE>mode);
13008 rtx t3 = gen_reg_rtx (V4DImode);
13009 emit_insn (gen_avx2_interleave_low<mode> (t1, operands[1], operands[2]));
13010 emit_insn (gen_avx2_interleave_high<mode> (t2, operands[1], operands[2]));
13011 emit_insn (gen_avx2_permv2ti (t3, gen_lowpart (V4DImode, t1),
13012 gen_lowpart (V4DImode, t2),
13013 GEN_INT (0 + (2 << 4))));
13014 emit_move_insn (operands[0], gen_lowpart (<MODE>mode, t3));
13015 DONE;
13016 })
13017
13018 ;; Modes handled by pinsr patterns.
13019 (define_mode_iterator PINSR_MODE
13020 [(V16QI "TARGET_SSE4_1") V8HI
13021 (V4SI "TARGET_SSE4_1")
13022 (V2DI "TARGET_SSE4_1 && TARGET_64BIT")])
13023
13024 (define_mode_attr sse2p4_1
13025 [(V16QI "sse4_1") (V8HI "sse2")
13026 (V4SI "sse4_1") (V2DI "sse4_1")])
13027
13028 (define_mode_attr pinsr_evex_isa
13029 [(V16QI "avx512bw") (V8HI "avx512bw")
13030 (V4SI "avx512dq") (V2DI "avx512dq")])
13031
13032 ;; sse4_1_pinsrd must come before sse2_loadld since it is preferred.
13033 (define_insn "<sse2p4_1>_pinsr<ssemodesuffix>"
13034 [(set (match_operand:PINSR_MODE 0 "register_operand" "=x,x,x,x,v,v")
13035 (vec_merge:PINSR_MODE
13036 (vec_duplicate:PINSR_MODE
13037 (match_operand:<ssescalarmode> 2 "nonimmediate_operand" "r,m,r,m,r,m"))
13038 (match_operand:PINSR_MODE 1 "register_operand" "0,0,x,x,v,v")
13039 (match_operand:SI 3 "const_int_operand")))]
13040 "TARGET_SSE2
13041 && ((unsigned) exact_log2 (INTVAL (operands[3]))
13042 < GET_MODE_NUNITS (<MODE>mode))"
13043 {
13044 operands[3] = GEN_INT (exact_log2 (INTVAL (operands[3])));
13045
13046 switch (which_alternative)
13047 {
13048 case 0:
13049 if (GET_MODE_SIZE (<ssescalarmode>mode) < GET_MODE_SIZE (SImode))
13050 return "pinsr<ssemodesuffix>\t{%3, %k2, %0|%0, %k2, %3}";
13051 /* FALLTHRU */
13052 case 1:
13053 return "pinsr<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}";
13054 case 2:
13055 case 4:
13056 if (GET_MODE_SIZE (<ssescalarmode>mode) < GET_MODE_SIZE (SImode))
13057 return "vpinsr<ssemodesuffix>\t{%3, %k2, %1, %0|%0, %1, %k2, %3}";
13058 /* FALLTHRU */
13059 case 3:
13060 case 5:
13061 return "vpinsr<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}";
13062 default:
13063 gcc_unreachable ();
13064 }
13065 }
13066 [(set_attr "isa" "noavx,noavx,avx,avx,<pinsr_evex_isa>,<pinsr_evex_isa>")
13067 (set_attr "type" "sselog")
13068 (set (attr "prefix_rex")
13069 (if_then_else
13070 (and (not (match_test "TARGET_AVX"))
13071 (eq (const_string "<MODE>mode") (const_string "V2DImode")))
13072 (const_string "1")
13073 (const_string "*")))
13074 (set (attr "prefix_data16")
13075 (if_then_else
13076 (and (not (match_test "TARGET_AVX"))
13077 (eq (const_string "<MODE>mode") (const_string "V8HImode")))
13078 (const_string "1")
13079 (const_string "*")))
13080 (set (attr "prefix_extra")
13081 (if_then_else
13082 (and (not (match_test "TARGET_AVX"))
13083 (eq (const_string "<MODE>mode") (const_string "V8HImode")))
13084 (const_string "*")
13085 (const_string "1")))
13086 (set_attr "length_immediate" "1")
13087 (set_attr "prefix" "orig,orig,vex,vex,evex,evex")
13088 (set_attr "mode" "TI")])
13089
13090 (define_expand "<extract_type>_vinsert<shuffletype><extract_suf>_mask"
13091 [(match_operand:AVX512_VEC 0 "register_operand")
13092 (match_operand:AVX512_VEC 1 "register_operand")
13093 (match_operand:<ssequartermode> 2 "nonimmediate_operand")
13094 (match_operand:SI 3 "const_0_to_3_operand")
13095 (match_operand:AVX512_VEC 4 "register_operand")
13096 (match_operand:<avx512fmaskmode> 5 "register_operand")]
13097 "TARGET_AVX512F"
13098 {
13099 int mask, selector;
13100 mask = INTVAL (operands[3]);
13101 selector = (GET_MODE_UNIT_SIZE (<MODE>mode) == 4
13102 ? 0xFFFF ^ (0x000F << mask * 4)
13103 : 0xFF ^ (0x03 << mask * 2));
13104 emit_insn (gen_<extract_type>_vinsert<shuffletype><extract_suf>_1_mask
13105 (operands[0], operands[1], operands[2], GEN_INT (selector),
13106 operands[4], operands[5]));
13107 DONE;
13108 })
13109
13110 (define_insn "*<extract_type>_vinsert<shuffletype><extract_suf>_0"
13111 [(set (match_operand:AVX512_VEC 0 "register_operand" "=v,x,Yv")
13112 (vec_merge:AVX512_VEC
13113 (match_operand:AVX512_VEC 1 "reg_or_0_operand" "v,C,C")
13114 (vec_duplicate:AVX512_VEC
13115 (match_operand:<ssequartermode> 2 "nonimmediate_operand" "vm,xm,vm"))
13116 (match_operand:SI 3 "const_int_operand" "n,n,n")))]
13117 "TARGET_AVX512F
13118 && (INTVAL (operands[3])
13119 == (GET_MODE_UNIT_SIZE (<MODE>mode) == 4 ? 0xFFF0 : 0xFC))"
13120 {
13121 if (which_alternative == 0)
13122 return "vinsert<shuffletype><extract_suf>\t{$0, %2, %1, %0|%0, %1, %2, 0}";
13123 switch (<MODE>mode)
13124 {
13125 case E_V8DFmode:
13126 return "vmovapd\t{%2, %x0|%x0, %2}";
13127 case E_V16SFmode:
13128 return "vmovaps\t{%2, %x0|%x0, %2}";
13129 case E_V8DImode:
13130 return which_alternative == 2 ? "vmovdqa64\t{%2, %x0|%x0, %2}"
13131 : "vmovdqa\t{%2, %x0|%x0, %2}";
13132 case E_V16SImode:
13133 return which_alternative == 2 ? "vmovdqa32\t{%2, %x0|%x0, %2}"
13134 : "vmovdqa\t{%2, %x0|%x0, %2}";
13135 default:
13136 gcc_unreachable ();
13137 }
13138 }
13139 [(set_attr "type" "sselog,ssemov,ssemov")
13140 (set_attr "length_immediate" "1,0,0")
13141 (set_attr "prefix" "evex,vex,evex")
13142 (set_attr "mode" "<sseinsnmode>,<ssequarterinsnmode>,<ssequarterinsnmode>")])
13143
13144 (define_insn "<mask_codefor><extract_type>_vinsert<shuffletype><extract_suf>_1<mask_name>"
13145 [(set (match_operand:AVX512_VEC 0 "register_operand" "=v")
13146 (vec_merge:AVX512_VEC
13147 (match_operand:AVX512_VEC 1 "register_operand" "v")
13148 (vec_duplicate:AVX512_VEC
13149 (match_operand:<ssequartermode> 2 "nonimmediate_operand" "vm"))
13150 (match_operand:SI 3 "const_int_operand" "n")))]
13151 "TARGET_AVX512F"
13152 {
13153 int mask;
13154 int selector = INTVAL (operands[3]);
13155
13156 if (selector == (GET_MODE_UNIT_SIZE (<MODE>mode) == 4 ? 0xFFF0 : 0xFC))
13157 mask = 0;
13158 else if (selector == (GET_MODE_UNIT_SIZE (<MODE>mode) == 4 ? 0xFF0F : 0xF3))
13159 mask = 1;
13160 else if (selector == (GET_MODE_UNIT_SIZE (<MODE>mode) == 4 ? 0xF0FF : 0xCF))
13161 mask = 2;
13162 else if (selector == (GET_MODE_UNIT_SIZE (<MODE>mode) == 4 ? 0x0FFF : 0x3F))
13163 mask = 3;
13164 else
13165 gcc_unreachable ();
13166
13167 operands[3] = GEN_INT (mask);
13168
13169 return "vinsert<shuffletype><extract_suf>\t{%3, %2, %1, %0<mask_operand4>|%0<mask_operand4>, %1, %2, %3}";
13170 }
13171 [(set_attr "type" "sselog")
13172 (set_attr "length_immediate" "1")
13173 (set_attr "prefix" "evex")
13174 (set_attr "mode" "<sseinsnmode>")])
13175
13176 (define_expand "<extract_type_2>_vinsert<shuffletype><extract_suf_2>_mask"
13177 [(match_operand:AVX512_VEC_2 0 "register_operand")
13178 (match_operand:AVX512_VEC_2 1 "register_operand")
13179 (match_operand:<ssehalfvecmode> 2 "nonimmediate_operand")
13180 (match_operand:SI 3 "const_0_to_1_operand")
13181 (match_operand:AVX512_VEC_2 4 "register_operand")
13182 (match_operand:<avx512fmaskmode> 5 "register_operand")]
13183 "TARGET_AVX512F"
13184 {
13185 int mask = INTVAL (operands[3]);
13186 if (mask == 0)
13187 emit_insn (gen_vec_set_lo_<mode>_mask (operands[0], operands[1],
13188 operands[2], operands[4],
13189 operands[5]));
13190 else
13191 emit_insn (gen_vec_set_hi_<mode>_mask (operands[0], operands[1],
13192 operands[2], operands[4],
13193 operands[5]));
13194 DONE;
13195 })
13196
13197 (define_insn "vec_set_lo_<mode><mask_name>"
13198 [(set (match_operand:V16FI 0 "register_operand" "=v")
13199 (vec_concat:V16FI
13200 (match_operand:<ssehalfvecmode> 2 "nonimmediate_operand" "vm")
13201 (vec_select:<ssehalfvecmode>
13202 (match_operand:V16FI 1 "register_operand" "v")
13203 (parallel [(const_int 8) (const_int 9)
13204 (const_int 10) (const_int 11)
13205 (const_int 12) (const_int 13)
13206 (const_int 14) (const_int 15)]))))]
13207 "TARGET_AVX512DQ"
13208 "vinsert<shuffletype>32x8\t{$0x0, %2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2, 0x0}"
13209 [(set_attr "type" "sselog")
13210 (set_attr "length_immediate" "1")
13211 (set_attr "prefix" "evex")
13212 (set_attr "mode" "<sseinsnmode>")])
13213
13214 (define_insn "vec_set_hi_<mode><mask_name>"
13215 [(set (match_operand:V16FI 0 "register_operand" "=v")
13216 (vec_concat:V16FI
13217 (vec_select:<ssehalfvecmode>
13218 (match_operand:V16FI 1 "register_operand" "v")
13219 (parallel [(const_int 0) (const_int 1)
13220 (const_int 2) (const_int 3)
13221 (const_int 4) (const_int 5)
13222 (const_int 6) (const_int 7)]))
13223 (match_operand:<ssehalfvecmode> 2 "nonimmediate_operand" "vm")))]
13224 "TARGET_AVX512DQ"
13225 "vinsert<shuffletype>32x8\t{$0x1, %2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2, 0x1}"
13226 [(set_attr "type" "sselog")
13227 (set_attr "length_immediate" "1")
13228 (set_attr "prefix" "evex")
13229 (set_attr "mode" "<sseinsnmode>")])
13230
13231 (define_insn "vec_set_lo_<mode><mask_name>"
13232 [(set (match_operand:V8FI 0 "register_operand" "=v")
13233 (vec_concat:V8FI
13234 (match_operand:<ssehalfvecmode> 2 "nonimmediate_operand" "vm")
13235 (vec_select:<ssehalfvecmode>
13236 (match_operand:V8FI 1 "register_operand" "v")
13237 (parallel [(const_int 4) (const_int 5)
13238 (const_int 6) (const_int 7)]))))]
13239 "TARGET_AVX512F"
13240 "vinsert<shuffletype>64x4\t{$0x0, %2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2, 0x0}"
13241 [(set_attr "type" "sselog")
13242 (set_attr "length_immediate" "1")
13243 (set_attr "prefix" "evex")
13244 (set_attr "mode" "XI")])
13245
13246 (define_insn "vec_set_hi_<mode><mask_name>"
13247 [(set (match_operand:V8FI 0 "register_operand" "=v")
13248 (vec_concat:V8FI
13249 (vec_select:<ssehalfvecmode>
13250 (match_operand:V8FI 1 "register_operand" "v")
13251 (parallel [(const_int 0) (const_int 1)
13252 (const_int 2) (const_int 3)]))
13253 (match_operand:<ssehalfvecmode> 2 "nonimmediate_operand" "vm")))]
13254 "TARGET_AVX512F"
13255 "vinsert<shuffletype>64x4\t{$0x1, %2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2, 0x1}"
13256 [(set_attr "type" "sselog")
13257 (set_attr "length_immediate" "1")
13258 (set_attr "prefix" "evex")
13259 (set_attr "mode" "XI")])
13260
13261 (define_expand "avx512dq_shuf_<shuffletype>64x2_mask"
13262 [(match_operand:VI8F_256 0 "register_operand")
13263 (match_operand:VI8F_256 1 "register_operand")
13264 (match_operand:VI8F_256 2 "nonimmediate_operand")
13265 (match_operand:SI 3 "const_0_to_3_operand")
13266 (match_operand:VI8F_256 4 "register_operand")
13267 (match_operand:QI 5 "register_operand")]
13268 "TARGET_AVX512DQ"
13269 {
13270 int mask = INTVAL (operands[3]);
13271 emit_insn (gen_avx512dq_shuf_<shuffletype>64x2_1_mask
13272 (operands[0], operands[1], operands[2],
13273 GEN_INT (((mask >> 0) & 1) * 2 + 0),
13274 GEN_INT (((mask >> 0) & 1) * 2 + 1),
13275 GEN_INT (((mask >> 1) & 1) * 2 + 4),
13276 GEN_INT (((mask >> 1) & 1) * 2 + 5),
13277 operands[4], operands[5]));
13278 DONE;
13279 })
13280
13281 (define_insn "<mask_codefor>avx512dq_shuf_<shuffletype>64x2_1<mask_name>"
13282 [(set (match_operand:VI8F_256 0 "register_operand" "=v")
13283 (vec_select:VI8F_256
13284 (vec_concat:<ssedoublemode>
13285 (match_operand:VI8F_256 1 "register_operand" "v")
13286 (match_operand:VI8F_256 2 "nonimmediate_operand" "vm"))
13287 (parallel [(match_operand 3 "const_0_to_3_operand")
13288 (match_operand 4 "const_0_to_3_operand")
13289 (match_operand 5 "const_4_to_7_operand")
13290 (match_operand 6 "const_4_to_7_operand")])))]
13291 "TARGET_AVX512VL
13292 && (INTVAL (operands[3]) == (INTVAL (operands[4]) - 1)
13293 && INTVAL (operands[5]) == (INTVAL (operands[6]) - 1))"
13294 {
13295 int mask;
13296 mask = INTVAL (operands[3]) / 2;
13297 mask |= (INTVAL (operands[5]) - 4) / 2 << 1;
13298 operands[3] = GEN_INT (mask);
13299 return "vshuf<shuffletype>64x2\t{%3, %2, %1, %0<mask_operand7>|%0<mask_operand7>, %1, %2, %3}";
13300 }
13301 [(set_attr "type" "sselog")
13302 (set_attr "length_immediate" "1")
13303 (set_attr "prefix" "evex")
13304 (set_attr "mode" "XI")])
13305
13306 (define_expand "avx512f_shuf_<shuffletype>64x2_mask"
13307 [(match_operand:V8FI 0 "register_operand")
13308 (match_operand:V8FI 1 "register_operand")
13309 (match_operand:V8FI 2 "nonimmediate_operand")
13310 (match_operand:SI 3 "const_0_to_255_operand")
13311 (match_operand:V8FI 4 "register_operand")
13312 (match_operand:QI 5 "register_operand")]
13313 "TARGET_AVX512F"
13314 {
13315 int mask = INTVAL (operands[3]);
13316 emit_insn (gen_avx512f_shuf_<shuffletype>64x2_1_mask
13317 (operands[0], operands[1], operands[2],
13318 GEN_INT (((mask >> 0) & 3) * 2),
13319 GEN_INT (((mask >> 0) & 3) * 2 + 1),
13320 GEN_INT (((mask >> 2) & 3) * 2),
13321 GEN_INT (((mask >> 2) & 3) * 2 + 1),
13322 GEN_INT (((mask >> 4) & 3) * 2 + 8),
13323 GEN_INT (((mask >> 4) & 3) * 2 + 9),
13324 GEN_INT (((mask >> 6) & 3) * 2 + 8),
13325 GEN_INT (((mask >> 6) & 3) * 2 + 9),
13326 operands[4], operands[5]));
13327 DONE;
13328 })
13329
13330 (define_insn "avx512f_shuf_<shuffletype>64x2_1<mask_name>"
13331 [(set (match_operand:V8FI 0 "register_operand" "=v")
13332 (vec_select:V8FI
13333 (vec_concat:<ssedoublemode>
13334 (match_operand:V8FI 1 "register_operand" "v")
13335 (match_operand:V8FI 2 "nonimmediate_operand" "vm"))
13336 (parallel [(match_operand 3 "const_0_to_7_operand")
13337 (match_operand 4 "const_0_to_7_operand")
13338 (match_operand 5 "const_0_to_7_operand")
13339 (match_operand 6 "const_0_to_7_operand")
13340 (match_operand 7 "const_8_to_15_operand")
13341 (match_operand 8 "const_8_to_15_operand")
13342 (match_operand 9 "const_8_to_15_operand")
13343 (match_operand 10 "const_8_to_15_operand")])))]
13344 "TARGET_AVX512F
13345 && (INTVAL (operands[3]) == (INTVAL (operands[4]) - 1)
13346 && INTVAL (operands[5]) == (INTVAL (operands[6]) - 1)
13347 && INTVAL (operands[7]) == (INTVAL (operands[8]) - 1)
13348 && INTVAL (operands[9]) == (INTVAL (operands[10]) - 1))"
13349 {
13350 int mask;
13351 mask = INTVAL (operands[3]) / 2;
13352 mask |= INTVAL (operands[5]) / 2 << 2;
13353 mask |= (INTVAL (operands[7]) - 8) / 2 << 4;
13354 mask |= (INTVAL (operands[9]) - 8) / 2 << 6;
13355 operands[3] = GEN_INT (mask);
13356
13357 return "vshuf<shuffletype>64x2\t{%3, %2, %1, %0<mask_operand11>|%0<mask_operand11>, %1, %2, %3}";
13358 }
13359 [(set_attr "type" "sselog")
13360 (set_attr "length_immediate" "1")
13361 (set_attr "prefix" "evex")
13362 (set_attr "mode" "<sseinsnmode>")])
13363
13364 (define_expand "avx512vl_shuf_<shuffletype>32x4_mask"
13365 [(match_operand:VI4F_256 0 "register_operand")
13366 (match_operand:VI4F_256 1 "register_operand")
13367 (match_operand:VI4F_256 2 "nonimmediate_operand")
13368 (match_operand:SI 3 "const_0_to_3_operand")
13369 (match_operand:VI4F_256 4 "register_operand")
13370 (match_operand:QI 5 "register_operand")]
13371 "TARGET_AVX512VL"
13372 {
13373 int mask = INTVAL (operands[3]);
13374 emit_insn (gen_avx512vl_shuf_<shuffletype>32x4_1_mask
13375 (operands[0], operands[1], operands[2],
13376 GEN_INT (((mask >> 0) & 1) * 4 + 0),
13377 GEN_INT (((mask >> 0) & 1) * 4 + 1),
13378 GEN_INT (((mask >> 0) & 1) * 4 + 2),
13379 GEN_INT (((mask >> 0) & 1) * 4 + 3),
13380 GEN_INT (((mask >> 1) & 1) * 4 + 8),
13381 GEN_INT (((mask >> 1) & 1) * 4 + 9),
13382 GEN_INT (((mask >> 1) & 1) * 4 + 10),
13383 GEN_INT (((mask >> 1) & 1) * 4 + 11),
13384 operands[4], operands[5]));
13385 DONE;
13386 })
13387
13388 (define_insn "avx512vl_shuf_<shuffletype>32x4_1<mask_name>"
13389 [(set (match_operand:VI4F_256 0 "register_operand" "=v")
13390 (vec_select:VI4F_256
13391 (vec_concat:<ssedoublemode>
13392 (match_operand:VI4F_256 1 "register_operand" "v")
13393 (match_operand:VI4F_256 2 "nonimmediate_operand" "vm"))
13394 (parallel [(match_operand 3 "const_0_to_7_operand")
13395 (match_operand 4 "const_0_to_7_operand")
13396 (match_operand 5 "const_0_to_7_operand")
13397 (match_operand 6 "const_0_to_7_operand")
13398 (match_operand 7 "const_8_to_15_operand")
13399 (match_operand 8 "const_8_to_15_operand")
13400 (match_operand 9 "const_8_to_15_operand")
13401 (match_operand 10 "const_8_to_15_operand")])))]
13402 "TARGET_AVX512VL
13403 && (INTVAL (operands[3]) == (INTVAL (operands[4]) - 1)
13404 && INTVAL (operands[3]) == (INTVAL (operands[5]) - 2)
13405 && INTVAL (operands[3]) == (INTVAL (operands[6]) - 3)
13406 && INTVAL (operands[7]) == (INTVAL (operands[8]) - 1)
13407 && INTVAL (operands[7]) == (INTVAL (operands[9]) - 2)
13408 && INTVAL (operands[7]) == (INTVAL (operands[10]) - 3))"
13409 {
13410 int mask;
13411 mask = INTVAL (operands[3]) / 4;
13412 mask |= (INTVAL (operands[7]) - 8) / 4 << 1;
13413 operands[3] = GEN_INT (mask);
13414
13415 return "vshuf<shuffletype>32x4\t{%3, %2, %1, %0<mask_operand11>|%0<mask_operand11>, %1, %2, %3}";
13416 }
13417 [(set_attr "type" "sselog")
13418 (set_attr "length_immediate" "1")
13419 (set_attr "prefix" "evex")
13420 (set_attr "mode" "<sseinsnmode>")])
13421
13422 (define_expand "avx512f_shuf_<shuffletype>32x4_mask"
13423 [(match_operand:V16FI 0 "register_operand")
13424 (match_operand:V16FI 1 "register_operand")
13425 (match_operand:V16FI 2 "nonimmediate_operand")
13426 (match_operand:SI 3 "const_0_to_255_operand")
13427 (match_operand:V16FI 4 "register_operand")
13428 (match_operand:HI 5 "register_operand")]
13429 "TARGET_AVX512F"
13430 {
13431 int mask = INTVAL (operands[3]);
13432 emit_insn (gen_avx512f_shuf_<shuffletype>32x4_1_mask
13433 (operands[0], operands[1], operands[2],
13434 GEN_INT (((mask >> 0) & 3) * 4),
13435 GEN_INT (((mask >> 0) & 3) * 4 + 1),
13436 GEN_INT (((mask >> 0) & 3) * 4 + 2),
13437 GEN_INT (((mask >> 0) & 3) * 4 + 3),
13438 GEN_INT (((mask >> 2) & 3) * 4),
13439 GEN_INT (((mask >> 2) & 3) * 4 + 1),
13440 GEN_INT (((mask >> 2) & 3) * 4 + 2),
13441 GEN_INT (((mask >> 2) & 3) * 4 + 3),
13442 GEN_INT (((mask >> 4) & 3) * 4 + 16),
13443 GEN_INT (((mask >> 4) & 3) * 4 + 17),
13444 GEN_INT (((mask >> 4) & 3) * 4 + 18),
13445 GEN_INT (((mask >> 4) & 3) * 4 + 19),
13446 GEN_INT (((mask >> 6) & 3) * 4 + 16),
13447 GEN_INT (((mask >> 6) & 3) * 4 + 17),
13448 GEN_INT (((mask >> 6) & 3) * 4 + 18),
13449 GEN_INT (((mask >> 6) & 3) * 4 + 19),
13450 operands[4], operands[5]));
13451 DONE;
13452 })
13453
13454 (define_insn "avx512f_shuf_<shuffletype>32x4_1<mask_name>"
13455 [(set (match_operand:V16FI 0 "register_operand" "=v")
13456 (vec_select:V16FI
13457 (vec_concat:<ssedoublemode>
13458 (match_operand:V16FI 1 "register_operand" "v")
13459 (match_operand:V16FI 2 "nonimmediate_operand" "vm"))
13460 (parallel [(match_operand 3 "const_0_to_15_operand")
13461 (match_operand 4 "const_0_to_15_operand")
13462 (match_operand 5 "const_0_to_15_operand")
13463 (match_operand 6 "const_0_to_15_operand")
13464 (match_operand 7 "const_0_to_15_operand")
13465 (match_operand 8 "const_0_to_15_operand")
13466 (match_operand 9 "const_0_to_15_operand")
13467 (match_operand 10 "const_0_to_15_operand")
13468 (match_operand 11 "const_16_to_31_operand")
13469 (match_operand 12 "const_16_to_31_operand")
13470 (match_operand 13 "const_16_to_31_operand")
13471 (match_operand 14 "const_16_to_31_operand")
13472 (match_operand 15 "const_16_to_31_operand")
13473 (match_operand 16 "const_16_to_31_operand")
13474 (match_operand 17 "const_16_to_31_operand")
13475 (match_operand 18 "const_16_to_31_operand")])))]
13476 "TARGET_AVX512F
13477 && (INTVAL (operands[3]) == (INTVAL (operands[4]) - 1)
13478 && INTVAL (operands[3]) == (INTVAL (operands[5]) - 2)
13479 && INTVAL (operands[3]) == (INTVAL (operands[6]) - 3)
13480 && INTVAL (operands[7]) == (INTVAL (operands[8]) - 1)
13481 && INTVAL (operands[7]) == (INTVAL (operands[9]) - 2)
13482 && INTVAL (operands[7]) == (INTVAL (operands[10]) - 3)
13483 && INTVAL (operands[11]) == (INTVAL (operands[12]) - 1)
13484 && INTVAL (operands[11]) == (INTVAL (operands[13]) - 2)
13485 && INTVAL (operands[11]) == (INTVAL (operands[14]) - 3)
13486 && INTVAL (operands[15]) == (INTVAL (operands[16]) - 1)
13487 && INTVAL (operands[15]) == (INTVAL (operands[17]) - 2)
13488 && INTVAL (operands[15]) == (INTVAL (operands[18]) - 3))"
13489 {
13490 int mask;
13491 mask = INTVAL (operands[3]) / 4;
13492 mask |= INTVAL (operands[7]) / 4 << 2;
13493 mask |= (INTVAL (operands[11]) - 16) / 4 << 4;
13494 mask |= (INTVAL (operands[15]) - 16) / 4 << 6;
13495 operands[3] = GEN_INT (mask);
13496
13497 return "vshuf<shuffletype>32x4\t{%3, %2, %1, %0<mask_operand19>|%0<mask_operand19>, %1, %2, %3}";
13498 }
13499 [(set_attr "type" "sselog")
13500 (set_attr "length_immediate" "1")
13501 (set_attr "prefix" "evex")
13502 (set_attr "mode" "<sseinsnmode>")])
13503
13504 (define_expand "avx512f_pshufdv3_mask"
13505 [(match_operand:V16SI 0 "register_operand")
13506 (match_operand:V16SI 1 "nonimmediate_operand")
13507 (match_operand:SI 2 "const_0_to_255_operand")
13508 (match_operand:V16SI 3 "register_operand")
13509 (match_operand:HI 4 "register_operand")]
13510 "TARGET_AVX512F"
13511 {
13512 int mask = INTVAL (operands[2]);
13513 emit_insn (gen_avx512f_pshufd_1_mask (operands[0], operands[1],
13514 GEN_INT ((mask >> 0) & 3),
13515 GEN_INT ((mask >> 2) & 3),
13516 GEN_INT ((mask >> 4) & 3),
13517 GEN_INT ((mask >> 6) & 3),
13518 GEN_INT (((mask >> 0) & 3) + 4),
13519 GEN_INT (((mask >> 2) & 3) + 4),
13520 GEN_INT (((mask >> 4) & 3) + 4),
13521 GEN_INT (((mask >> 6) & 3) + 4),
13522 GEN_INT (((mask >> 0) & 3) + 8),
13523 GEN_INT (((mask >> 2) & 3) + 8),
13524 GEN_INT (((mask >> 4) & 3) + 8),
13525 GEN_INT (((mask >> 6) & 3) + 8),
13526 GEN_INT (((mask >> 0) & 3) + 12),
13527 GEN_INT (((mask >> 2) & 3) + 12),
13528 GEN_INT (((mask >> 4) & 3) + 12),
13529 GEN_INT (((mask >> 6) & 3) + 12),
13530 operands[3], operands[4]));
13531 DONE;
13532 })
13533
13534 (define_insn "avx512f_pshufd_1<mask_name>"
13535 [(set (match_operand:V16SI 0 "register_operand" "=v")
13536 (vec_select:V16SI
13537 (match_operand:V16SI 1 "nonimmediate_operand" "vm")
13538 (parallel [(match_operand 2 "const_0_to_3_operand")
13539 (match_operand 3 "const_0_to_3_operand")
13540 (match_operand 4 "const_0_to_3_operand")
13541 (match_operand 5 "const_0_to_3_operand")
13542 (match_operand 6 "const_4_to_7_operand")
13543 (match_operand 7 "const_4_to_7_operand")
13544 (match_operand 8 "const_4_to_7_operand")
13545 (match_operand 9 "const_4_to_7_operand")
13546 (match_operand 10 "const_8_to_11_operand")
13547 (match_operand 11 "const_8_to_11_operand")
13548 (match_operand 12 "const_8_to_11_operand")
13549 (match_operand 13 "const_8_to_11_operand")
13550 (match_operand 14 "const_12_to_15_operand")
13551 (match_operand 15 "const_12_to_15_operand")
13552 (match_operand 16 "const_12_to_15_operand")
13553 (match_operand 17 "const_12_to_15_operand")])))]
13554 "TARGET_AVX512F
13555 && INTVAL (operands[2]) + 4 == INTVAL (operands[6])
13556 && INTVAL (operands[3]) + 4 == INTVAL (operands[7])
13557 && INTVAL (operands[4]) + 4 == INTVAL (operands[8])
13558 && INTVAL (operands[5]) + 4 == INTVAL (operands[9])
13559 && INTVAL (operands[2]) + 8 == INTVAL (operands[10])
13560 && INTVAL (operands[3]) + 8 == INTVAL (operands[11])
13561 && INTVAL (operands[4]) + 8 == INTVAL (operands[12])
13562 && INTVAL (operands[5]) + 8 == INTVAL (operands[13])
13563 && INTVAL (operands[2]) + 12 == INTVAL (operands[14])
13564 && INTVAL (operands[3]) + 12 == INTVAL (operands[15])
13565 && INTVAL (operands[4]) + 12 == INTVAL (operands[16])
13566 && INTVAL (operands[5]) + 12 == INTVAL (operands[17])"
13567 {
13568 int mask = 0;
13569 mask |= INTVAL (operands[2]) << 0;
13570 mask |= INTVAL (operands[3]) << 2;
13571 mask |= INTVAL (operands[4]) << 4;
13572 mask |= INTVAL (operands[5]) << 6;
13573 operands[2] = GEN_INT (mask);
13574
13575 return "vpshufd\t{%2, %1, %0<mask_operand18>|%0<mask_operand18>, %1, %2}";
13576 }
13577 [(set_attr "type" "sselog1")
13578 (set_attr "prefix" "evex")
13579 (set_attr "length_immediate" "1")
13580 (set_attr "mode" "XI")])
13581
13582 (define_expand "avx512vl_pshufdv3_mask"
13583 [(match_operand:V8SI 0 "register_operand")
13584 (match_operand:V8SI 1 "nonimmediate_operand")
13585 (match_operand:SI 2 "const_0_to_255_operand")
13586 (match_operand:V8SI 3 "register_operand")
13587 (match_operand:QI 4 "register_operand")]
13588 "TARGET_AVX512VL"
13589 {
13590 int mask = INTVAL (operands[2]);
13591 emit_insn (gen_avx2_pshufd_1_mask (operands[0], operands[1],
13592 GEN_INT ((mask >> 0) & 3),
13593 GEN_INT ((mask >> 2) & 3),
13594 GEN_INT ((mask >> 4) & 3),
13595 GEN_INT ((mask >> 6) & 3),
13596 GEN_INT (((mask >> 0) & 3) + 4),
13597 GEN_INT (((mask >> 2) & 3) + 4),
13598 GEN_INT (((mask >> 4) & 3) + 4),
13599 GEN_INT (((mask >> 6) & 3) + 4),
13600 operands[3], operands[4]));
13601 DONE;
13602 })
13603
13604 (define_expand "avx2_pshufdv3"
13605 [(match_operand:V8SI 0 "register_operand")
13606 (match_operand:V8SI 1 "nonimmediate_operand")
13607 (match_operand:SI 2 "const_0_to_255_operand")]
13608 "TARGET_AVX2"
13609 {
13610 int mask = INTVAL (operands[2]);
13611 emit_insn (gen_avx2_pshufd_1 (operands[0], operands[1],
13612 GEN_INT ((mask >> 0) & 3),
13613 GEN_INT ((mask >> 2) & 3),
13614 GEN_INT ((mask >> 4) & 3),
13615 GEN_INT ((mask >> 6) & 3),
13616 GEN_INT (((mask >> 0) & 3) + 4),
13617 GEN_INT (((mask >> 2) & 3) + 4),
13618 GEN_INT (((mask >> 4) & 3) + 4),
13619 GEN_INT (((mask >> 6) & 3) + 4)));
13620 DONE;
13621 })
13622
13623 (define_insn "avx2_pshufd_1<mask_name>"
13624 [(set (match_operand:V8SI 0 "register_operand" "=v")
13625 (vec_select:V8SI
13626 (match_operand:V8SI 1 "nonimmediate_operand" "vm")
13627 (parallel [(match_operand 2 "const_0_to_3_operand")
13628 (match_operand 3 "const_0_to_3_operand")
13629 (match_operand 4 "const_0_to_3_operand")
13630 (match_operand 5 "const_0_to_3_operand")
13631 (match_operand 6 "const_4_to_7_operand")
13632 (match_operand 7 "const_4_to_7_operand")
13633 (match_operand 8 "const_4_to_7_operand")
13634 (match_operand 9 "const_4_to_7_operand")])))]
13635 "TARGET_AVX2
13636 && <mask_avx512vl_condition>
13637 && INTVAL (operands[2]) + 4 == INTVAL (operands[6])
13638 && INTVAL (operands[3]) + 4 == INTVAL (operands[7])
13639 && INTVAL (operands[4]) + 4 == INTVAL (operands[8])
13640 && INTVAL (operands[5]) + 4 == INTVAL (operands[9])"
13641 {
13642 int mask = 0;
13643 mask |= INTVAL (operands[2]) << 0;
13644 mask |= INTVAL (operands[3]) << 2;
13645 mask |= INTVAL (operands[4]) << 4;
13646 mask |= INTVAL (operands[5]) << 6;
13647 operands[2] = GEN_INT (mask);
13648
13649 return "vpshufd\t{%2, %1, %0<mask_operand10>|%0<mask_operand10>, %1, %2}";
13650 }
13651 [(set_attr "type" "sselog1")
13652 (set_attr "prefix" "maybe_evex")
13653 (set_attr "length_immediate" "1")
13654 (set_attr "mode" "OI")])
13655
13656 (define_expand "avx512vl_pshufd_mask"
13657 [(match_operand:V4SI 0 "register_operand")
13658 (match_operand:V4SI 1 "nonimmediate_operand")
13659 (match_operand:SI 2 "const_0_to_255_operand")
13660 (match_operand:V4SI 3 "register_operand")
13661 (match_operand:QI 4 "register_operand")]
13662 "TARGET_AVX512VL"
13663 {
13664 int mask = INTVAL (operands[2]);
13665 emit_insn (gen_sse2_pshufd_1_mask (operands[0], operands[1],
13666 GEN_INT ((mask >> 0) & 3),
13667 GEN_INT ((mask >> 2) & 3),
13668 GEN_INT ((mask >> 4) & 3),
13669 GEN_INT ((mask >> 6) & 3),
13670 operands[3], operands[4]));
13671 DONE;
13672 })
13673
13674 (define_expand "sse2_pshufd"
13675 [(match_operand:V4SI 0 "register_operand")
13676 (match_operand:V4SI 1 "vector_operand")
13677 (match_operand:SI 2 "const_int_operand")]
13678 "TARGET_SSE2"
13679 {
13680 int mask = INTVAL (operands[2]);
13681 emit_insn (gen_sse2_pshufd_1 (operands[0], operands[1],
13682 GEN_INT ((mask >> 0) & 3),
13683 GEN_INT ((mask >> 2) & 3),
13684 GEN_INT ((mask >> 4) & 3),
13685 GEN_INT ((mask >> 6) & 3)));
13686 DONE;
13687 })
13688
13689 (define_insn "sse2_pshufd_1<mask_name>"
13690 [(set (match_operand:V4SI 0 "register_operand" "=v")
13691 (vec_select:V4SI
13692 (match_operand:V4SI 1 "vector_operand" "vBm")
13693 (parallel [(match_operand 2 "const_0_to_3_operand")
13694 (match_operand 3 "const_0_to_3_operand")
13695 (match_operand 4 "const_0_to_3_operand")
13696 (match_operand 5 "const_0_to_3_operand")])))]
13697 "TARGET_SSE2 && <mask_avx512vl_condition>"
13698 {
13699 int mask = 0;
13700 mask |= INTVAL (operands[2]) << 0;
13701 mask |= INTVAL (operands[3]) << 2;
13702 mask |= INTVAL (operands[4]) << 4;
13703 mask |= INTVAL (operands[5]) << 6;
13704 operands[2] = GEN_INT (mask);
13705
13706 return "%vpshufd\t{%2, %1, %0<mask_operand6>|%0<mask_operand6>, %1, %2}";
13707 }
13708 [(set_attr "type" "sselog1")
13709 (set_attr "prefix_data16" "1")
13710 (set_attr "prefix" "<mask_prefix2>")
13711 (set_attr "length_immediate" "1")
13712 (set_attr "mode" "TI")])
13713
13714 (define_insn "<mask_codefor>avx512bw_pshuflwv32hi<mask_name>"
13715 [(set (match_operand:V32HI 0 "register_operand" "=v")
13716 (unspec:V32HI
13717 [(match_operand:V32HI 1 "nonimmediate_operand" "vm")
13718 (match_operand:SI 2 "const_0_to_255_operand" "n")]
13719 UNSPEC_PSHUFLW))]
13720 "TARGET_AVX512BW"
13721 "vpshuflw\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
13722 [(set_attr "type" "sselog")
13723 (set_attr "prefix" "evex")
13724 (set_attr "mode" "XI")])
13725
13726 (define_expand "avx512vl_pshuflwv3_mask"
13727 [(match_operand:V16HI 0 "register_operand")
13728 (match_operand:V16HI 1 "nonimmediate_operand")
13729 (match_operand:SI 2 "const_0_to_255_operand")
13730 (match_operand:V16HI 3 "register_operand")
13731 (match_operand:HI 4 "register_operand")]
13732 "TARGET_AVX512VL && TARGET_AVX512BW"
13733 {
13734 int mask = INTVAL (operands[2]);
13735 emit_insn (gen_avx2_pshuflw_1_mask (operands[0], operands[1],
13736 GEN_INT ((mask >> 0) & 3),
13737 GEN_INT ((mask >> 2) & 3),
13738 GEN_INT ((mask >> 4) & 3),
13739 GEN_INT ((mask >> 6) & 3),
13740 GEN_INT (((mask >> 0) & 3) + 8),
13741 GEN_INT (((mask >> 2) & 3) + 8),
13742 GEN_INT (((mask >> 4) & 3) + 8),
13743 GEN_INT (((mask >> 6) & 3) + 8),
13744 operands[3], operands[4]));
13745 DONE;
13746 })
13747
13748 (define_expand "avx2_pshuflwv3"
13749 [(match_operand:V16HI 0 "register_operand")
13750 (match_operand:V16HI 1 "nonimmediate_operand")
13751 (match_operand:SI 2 "const_0_to_255_operand")]
13752 "TARGET_AVX2"
13753 {
13754 int mask = INTVAL (operands[2]);
13755 emit_insn (gen_avx2_pshuflw_1 (operands[0], operands[1],
13756 GEN_INT ((mask >> 0) & 3),
13757 GEN_INT ((mask >> 2) & 3),
13758 GEN_INT ((mask >> 4) & 3),
13759 GEN_INT ((mask >> 6) & 3),
13760 GEN_INT (((mask >> 0) & 3) + 8),
13761 GEN_INT (((mask >> 2) & 3) + 8),
13762 GEN_INT (((mask >> 4) & 3) + 8),
13763 GEN_INT (((mask >> 6) & 3) + 8)));
13764 DONE;
13765 })
13766
13767 (define_insn "avx2_pshuflw_1<mask_name>"
13768 [(set (match_operand:V16HI 0 "register_operand" "=v")
13769 (vec_select:V16HI
13770 (match_operand:V16HI 1 "nonimmediate_operand" "vm")
13771 (parallel [(match_operand 2 "const_0_to_3_operand")
13772 (match_operand 3 "const_0_to_3_operand")
13773 (match_operand 4 "const_0_to_3_operand")
13774 (match_operand 5 "const_0_to_3_operand")
13775 (const_int 4)
13776 (const_int 5)
13777 (const_int 6)
13778 (const_int 7)
13779 (match_operand 6 "const_8_to_11_operand")
13780 (match_operand 7 "const_8_to_11_operand")
13781 (match_operand 8 "const_8_to_11_operand")
13782 (match_operand 9 "const_8_to_11_operand")
13783 (const_int 12)
13784 (const_int 13)
13785 (const_int 14)
13786 (const_int 15)])))]
13787 "TARGET_AVX2
13788 && <mask_avx512bw_condition> && <mask_avx512vl_condition>
13789 && INTVAL (operands[2]) + 8 == INTVAL (operands[6])
13790 && INTVAL (operands[3]) + 8 == INTVAL (operands[7])
13791 && INTVAL (operands[4]) + 8 == INTVAL (operands[8])
13792 && INTVAL (operands[5]) + 8 == INTVAL (operands[9])"
13793 {
13794 int mask = 0;
13795 mask |= INTVAL (operands[2]) << 0;
13796 mask |= INTVAL (operands[3]) << 2;
13797 mask |= INTVAL (operands[4]) << 4;
13798 mask |= INTVAL (operands[5]) << 6;
13799 operands[2] = GEN_INT (mask);
13800
13801 return "vpshuflw\t{%2, %1, %0<mask_operand10>|%0<mask_operand10>, %1, %2}";
13802 }
13803 [(set_attr "type" "sselog")
13804 (set_attr "prefix" "maybe_evex")
13805 (set_attr "length_immediate" "1")
13806 (set_attr "mode" "OI")])
13807
13808 (define_expand "avx512vl_pshuflw_mask"
13809 [(match_operand:V8HI 0 "register_operand")
13810 (match_operand:V8HI 1 "nonimmediate_operand")
13811 (match_operand:SI 2 "const_0_to_255_operand")
13812 (match_operand:V8HI 3 "register_operand")
13813 (match_operand:QI 4 "register_operand")]
13814 "TARGET_AVX512VL && TARGET_AVX512BW"
13815 {
13816 int mask = INTVAL (operands[2]);
13817 emit_insn (gen_sse2_pshuflw_1_mask (operands[0], operands[1],
13818 GEN_INT ((mask >> 0) & 3),
13819 GEN_INT ((mask >> 2) & 3),
13820 GEN_INT ((mask >> 4) & 3),
13821 GEN_INT ((mask >> 6) & 3),
13822 operands[3], operands[4]));
13823 DONE;
13824 })
13825
13826 (define_expand "sse2_pshuflw"
13827 [(match_operand:V8HI 0 "register_operand")
13828 (match_operand:V8HI 1 "vector_operand")
13829 (match_operand:SI 2 "const_int_operand")]
13830 "TARGET_SSE2"
13831 {
13832 int mask = INTVAL (operands[2]);
13833 emit_insn (gen_sse2_pshuflw_1 (operands[0], operands[1],
13834 GEN_INT ((mask >> 0) & 3),
13835 GEN_INT ((mask >> 2) & 3),
13836 GEN_INT ((mask >> 4) & 3),
13837 GEN_INT ((mask >> 6) & 3)));
13838 DONE;
13839 })
13840
13841 (define_insn "sse2_pshuflw_1<mask_name>"
13842 [(set (match_operand:V8HI 0 "register_operand" "=v")
13843 (vec_select:V8HI
13844 (match_operand:V8HI 1 "vector_operand" "vBm")
13845 (parallel [(match_operand 2 "const_0_to_3_operand")
13846 (match_operand 3 "const_0_to_3_operand")
13847 (match_operand 4 "const_0_to_3_operand")
13848 (match_operand 5 "const_0_to_3_operand")
13849 (const_int 4)
13850 (const_int 5)
13851 (const_int 6)
13852 (const_int 7)])))]
13853 "TARGET_SSE2 && <mask_avx512bw_condition> && <mask_avx512vl_condition>"
13854 {
13855 int mask = 0;
13856 mask |= INTVAL (operands[2]) << 0;
13857 mask |= INTVAL (operands[3]) << 2;
13858 mask |= INTVAL (operands[4]) << 4;
13859 mask |= INTVAL (operands[5]) << 6;
13860 operands[2] = GEN_INT (mask);
13861
13862 return "%vpshuflw\t{%2, %1, %0<mask_operand6>|%0<mask_operand6>, %1, %2}";
13863 }
13864 [(set_attr "type" "sselog")
13865 (set_attr "prefix_data16" "0")
13866 (set_attr "prefix_rep" "1")
13867 (set_attr "prefix" "maybe_vex")
13868 (set_attr "length_immediate" "1")
13869 (set_attr "mode" "TI")])
13870
13871 (define_expand "avx2_pshufhwv3"
13872 [(match_operand:V16HI 0 "register_operand")
13873 (match_operand:V16HI 1 "nonimmediate_operand")
13874 (match_operand:SI 2 "const_0_to_255_operand")]
13875 "TARGET_AVX2"
13876 {
13877 int mask = INTVAL (operands[2]);
13878 emit_insn (gen_avx2_pshufhw_1 (operands[0], operands[1],
13879 GEN_INT (((mask >> 0) & 3) + 4),
13880 GEN_INT (((mask >> 2) & 3) + 4),
13881 GEN_INT (((mask >> 4) & 3) + 4),
13882 GEN_INT (((mask >> 6) & 3) + 4),
13883 GEN_INT (((mask >> 0) & 3) + 12),
13884 GEN_INT (((mask >> 2) & 3) + 12),
13885 GEN_INT (((mask >> 4) & 3) + 12),
13886 GEN_INT (((mask >> 6) & 3) + 12)));
13887 DONE;
13888 })
13889
13890 (define_insn "<mask_codefor>avx512bw_pshufhwv32hi<mask_name>"
13891 [(set (match_operand:V32HI 0 "register_operand" "=v")
13892 (unspec:V32HI
13893 [(match_operand:V32HI 1 "nonimmediate_operand" "vm")
13894 (match_operand:SI 2 "const_0_to_255_operand" "n")]
13895 UNSPEC_PSHUFHW))]
13896 "TARGET_AVX512BW"
13897 "vpshufhw\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
13898 [(set_attr "type" "sselog")
13899 (set_attr "prefix" "evex")
13900 (set_attr "mode" "XI")])
13901
13902 (define_expand "avx512vl_pshufhwv3_mask"
13903 [(match_operand:V16HI 0 "register_operand")
13904 (match_operand:V16HI 1 "nonimmediate_operand")
13905 (match_operand:SI 2 "const_0_to_255_operand")
13906 (match_operand:V16HI 3 "register_operand")
13907 (match_operand:HI 4 "register_operand")]
13908 "TARGET_AVX512VL && TARGET_AVX512BW"
13909 {
13910 int mask = INTVAL (operands[2]);
13911 emit_insn (gen_avx2_pshufhw_1_mask (operands[0], operands[1],
13912 GEN_INT (((mask >> 0) & 3) + 4),
13913 GEN_INT (((mask >> 2) & 3) + 4),
13914 GEN_INT (((mask >> 4) & 3) + 4),
13915 GEN_INT (((mask >> 6) & 3) + 4),
13916 GEN_INT (((mask >> 0) & 3) + 12),
13917 GEN_INT (((mask >> 2) & 3) + 12),
13918 GEN_INT (((mask >> 4) & 3) + 12),
13919 GEN_INT (((mask >> 6) & 3) + 12),
13920 operands[3], operands[4]));
13921 DONE;
13922 })
13923
13924 (define_insn "avx2_pshufhw_1<mask_name>"
13925 [(set (match_operand:V16HI 0 "register_operand" "=v")
13926 (vec_select:V16HI
13927 (match_operand:V16HI 1 "nonimmediate_operand" "vm")
13928 (parallel [(const_int 0)
13929 (const_int 1)
13930 (const_int 2)
13931 (const_int 3)
13932 (match_operand 2 "const_4_to_7_operand")
13933 (match_operand 3 "const_4_to_7_operand")
13934 (match_operand 4 "const_4_to_7_operand")
13935 (match_operand 5 "const_4_to_7_operand")
13936 (const_int 8)
13937 (const_int 9)
13938 (const_int 10)
13939 (const_int 11)
13940 (match_operand 6 "const_12_to_15_operand")
13941 (match_operand 7 "const_12_to_15_operand")
13942 (match_operand 8 "const_12_to_15_operand")
13943 (match_operand 9 "const_12_to_15_operand")])))]
13944 "TARGET_AVX2
13945 && <mask_avx512bw_condition> && <mask_avx512vl_condition>
13946 && INTVAL (operands[2]) + 8 == INTVAL (operands[6])
13947 && INTVAL (operands[3]) + 8 == INTVAL (operands[7])
13948 && INTVAL (operands[4]) + 8 == INTVAL (operands[8])
13949 && INTVAL (operands[5]) + 8 == INTVAL (operands[9])"
13950 {
13951 int mask = 0;
13952 mask |= (INTVAL (operands[2]) - 4) << 0;
13953 mask |= (INTVAL (operands[3]) - 4) << 2;
13954 mask |= (INTVAL (operands[4]) - 4) << 4;
13955 mask |= (INTVAL (operands[5]) - 4) << 6;
13956 operands[2] = GEN_INT (mask);
13957
13958 return "vpshufhw\t{%2, %1, %0<mask_operand10>|%0<mask_operand10>, %1, %2}";
13959 }
13960 [(set_attr "type" "sselog")
13961 (set_attr "prefix" "maybe_evex")
13962 (set_attr "length_immediate" "1")
13963 (set_attr "mode" "OI")])
13964
13965 (define_expand "avx512vl_pshufhw_mask"
13966 [(match_operand:V8HI 0 "register_operand")
13967 (match_operand:V8HI 1 "nonimmediate_operand")
13968 (match_operand:SI 2 "const_0_to_255_operand")
13969 (match_operand:V8HI 3 "register_operand")
13970 (match_operand:QI 4 "register_operand")]
13971 "TARGET_AVX512VL && TARGET_AVX512BW"
13972 {
13973 int mask = INTVAL (operands[2]);
13974 emit_insn (gen_sse2_pshufhw_1_mask (operands[0], operands[1],
13975 GEN_INT (((mask >> 0) & 3) + 4),
13976 GEN_INT (((mask >> 2) & 3) + 4),
13977 GEN_INT (((mask >> 4) & 3) + 4),
13978 GEN_INT (((mask >> 6) & 3) + 4),
13979 operands[3], operands[4]));
13980 DONE;
13981 })
13982
13983 (define_expand "sse2_pshufhw"
13984 [(match_operand:V8HI 0 "register_operand")
13985 (match_operand:V8HI 1 "vector_operand")
13986 (match_operand:SI 2 "const_int_operand")]
13987 "TARGET_SSE2"
13988 {
13989 int mask = INTVAL (operands[2]);
13990 emit_insn (gen_sse2_pshufhw_1 (operands[0], operands[1],
13991 GEN_INT (((mask >> 0) & 3) + 4),
13992 GEN_INT (((mask >> 2) & 3) + 4),
13993 GEN_INT (((mask >> 4) & 3) + 4),
13994 GEN_INT (((mask >> 6) & 3) + 4)));
13995 DONE;
13996 })
13997
13998 (define_insn "sse2_pshufhw_1<mask_name>"
13999 [(set (match_operand:V8HI 0 "register_operand" "=v")
14000 (vec_select:V8HI
14001 (match_operand:V8HI 1 "vector_operand" "vBm")
14002 (parallel [(const_int 0)
14003 (const_int 1)
14004 (const_int 2)
14005 (const_int 3)
14006 (match_operand 2 "const_4_to_7_operand")
14007 (match_operand 3 "const_4_to_7_operand")
14008 (match_operand 4 "const_4_to_7_operand")
14009 (match_operand 5 "const_4_to_7_operand")])))]
14010 "TARGET_SSE2 && <mask_avx512bw_condition> && <mask_avx512vl_condition>"
14011 {
14012 int mask = 0;
14013 mask |= (INTVAL (operands[2]) - 4) << 0;
14014 mask |= (INTVAL (operands[3]) - 4) << 2;
14015 mask |= (INTVAL (operands[4]) - 4) << 4;
14016 mask |= (INTVAL (operands[5]) - 4) << 6;
14017 operands[2] = GEN_INT (mask);
14018
14019 return "%vpshufhw\t{%2, %1, %0<mask_operand6>|%0<mask_operand6>, %1, %2}";
14020 }
14021 [(set_attr "type" "sselog")
14022 (set_attr "prefix_rep" "1")
14023 (set_attr "prefix_data16" "0")
14024 (set_attr "prefix" "maybe_vex")
14025 (set_attr "length_immediate" "1")
14026 (set_attr "mode" "TI")])
14027
14028 (define_expand "sse2_loadd"
14029 [(set (match_operand:V4SI 0 "register_operand")
14030 (vec_merge:V4SI
14031 (vec_duplicate:V4SI
14032 (match_operand:SI 1 "nonimmediate_operand"))
14033 (match_dup 2)
14034 (const_int 1)))]
14035 "TARGET_SSE"
14036 "operands[2] = CONST0_RTX (V4SImode);")
14037
14038 (define_insn "sse2_loadld"
14039 [(set (match_operand:V4SI 0 "register_operand" "=v,v,x,x,v")
14040 (vec_merge:V4SI
14041 (vec_duplicate:V4SI
14042 (match_operand:SI 2 "nonimmediate_operand" "m ,r ,m,x,v"))
14043 (match_operand:V4SI 1 "reg_or_0_operand" "C ,C ,C,0,v")
14044 (const_int 1)))]
14045 "TARGET_SSE"
14046 "@
14047 %vmovd\t{%2, %0|%0, %2}
14048 %vmovd\t{%2, %0|%0, %2}
14049 movss\t{%2, %0|%0, %2}
14050 movss\t{%2, %0|%0, %2}
14051 vmovss\t{%2, %1, %0|%0, %1, %2}"
14052 [(set_attr "isa" "sse2,sse2,noavx,noavx,avx")
14053 (set_attr "type" "ssemov")
14054 (set_attr "prefix" "maybe_vex,maybe_vex,orig,orig,maybe_evex")
14055 (set_attr "mode" "TI,TI,V4SF,SF,SF")
14056 (set (attr "preferred_for_speed")
14057 (cond [(eq_attr "alternative" "1")
14058 (symbol_ref "TARGET_INTER_UNIT_MOVES_TO_VEC")
14059 ]
14060 (symbol_ref "true")))])
14061
14062 ;; QI and HI modes handled by pextr patterns.
14063 (define_mode_iterator PEXTR_MODE12
14064 [(V16QI "TARGET_SSE4_1") V8HI])
14065
14066 (define_insn "*vec_extract<mode>"
14067 [(set (match_operand:<ssescalarmode> 0 "register_sse4nonimm_operand" "=r,m,r,m")
14068 (vec_select:<ssescalarmode>
14069 (match_operand:PEXTR_MODE12 1 "register_operand" "x,x,v,v")
14070 (parallel
14071 [(match_operand:SI 2 "const_0_to_<ssescalarnummask>_operand")])))]
14072 "TARGET_SSE2"
14073 "@
14074 %vpextr<ssemodesuffix>\t{%2, %1, %k0|%k0, %1, %2}
14075 %vpextr<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}
14076 vpextr<ssemodesuffix>\t{%2, %1, %k0|%k0, %1, %2}
14077 vpextr<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
14078 [(set_attr "isa" "*,sse4,avx512bw,avx512bw")
14079 (set_attr "type" "sselog1")
14080 (set_attr "prefix_data16" "1")
14081 (set (attr "prefix_extra")
14082 (if_then_else
14083 (and (eq_attr "alternative" "0,2")
14084 (eq (const_string "<MODE>mode") (const_string "V8HImode")))
14085 (const_string "*")
14086 (const_string "1")))
14087 (set_attr "length_immediate" "1")
14088 (set_attr "prefix" "maybe_vex,maybe_vex,evex,evex")
14089 (set_attr "mode" "TI")])
14090
14091 (define_insn "*vec_extract<PEXTR_MODE12:mode>_zext"
14092 [(set (match_operand:SWI48 0 "register_operand" "=r,r")
14093 (zero_extend:SWI48
14094 (vec_select:<PEXTR_MODE12:ssescalarmode>
14095 (match_operand:PEXTR_MODE12 1 "register_operand" "x,v")
14096 (parallel
14097 [(match_operand:SI 2
14098 "const_0_to_<PEXTR_MODE12:ssescalarnummask>_operand")]))))]
14099 "TARGET_SSE2"
14100 "@
14101 %vpextr<PEXTR_MODE12:ssemodesuffix>\t{%2, %1, %k0|%k0, %1, %2}
14102 vpextr<PEXTR_MODE12:ssemodesuffix>\t{%2, %1, %k0|%k0, %1, %2}"
14103 [(set_attr "isa" "*,avx512bw")
14104 (set_attr "type" "sselog1")
14105 (set_attr "prefix_data16" "1")
14106 (set (attr "prefix_extra")
14107 (if_then_else
14108 (eq (const_string "<PEXTR_MODE12:MODE>mode") (const_string "V8HImode"))
14109 (const_string "*")
14110 (const_string "1")))
14111 (set_attr "length_immediate" "1")
14112 (set_attr "prefix" "maybe_vex")
14113 (set_attr "mode" "TI")])
14114
14115 (define_insn "*vec_extract<mode>_mem"
14116 [(set (match_operand:<ssescalarmode> 0 "register_operand" "=r")
14117 (vec_select:<ssescalarmode>
14118 (match_operand:VI12_128 1 "memory_operand" "o")
14119 (parallel
14120 [(match_operand 2 "const_0_to_<ssescalarnummask>_operand")])))]
14121 "TARGET_SSE"
14122 "#")
14123
14124 (define_insn "*vec_extract<ssevecmodelower>_0"
14125 [(set (match_operand:SWI48 0 "nonimmediate_operand" "=r,r,v ,m")
14126 (vec_select:SWI48
14127 (match_operand:<ssevecmode> 1 "nonimmediate_operand" "m ,v,vm,v")
14128 (parallel [(const_int 0)])))]
14129 "TARGET_SSE && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
14130 "#"
14131 [(set_attr "isa" "*,sse2,*,*")
14132 (set (attr "preferred_for_speed")
14133 (cond [(eq_attr "alternative" "1")
14134 (symbol_ref "TARGET_INTER_UNIT_MOVES_FROM_VEC")
14135 ]
14136 (symbol_ref "true")))])
14137
14138 (define_insn "*vec_extractv2di_0_sse"
14139 [(set (match_operand:DI 0 "nonimmediate_operand" "=v,m")
14140 (vec_select:DI
14141 (match_operand:V2DI 1 "nonimmediate_operand" "vm,v")
14142 (parallel [(const_int 0)])))]
14143 "TARGET_SSE && !TARGET_64BIT
14144 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
14145 "#")
14146
14147 (define_split
14148 [(set (match_operand:SWI48x 0 "nonimmediate_operand")
14149 (vec_select:SWI48x
14150 (match_operand:<ssevecmode> 1 "register_operand")
14151 (parallel [(const_int 0)])))]
14152 "TARGET_SSE && reload_completed"
14153 [(set (match_dup 0) (match_dup 1))]
14154 "operands[1] = gen_lowpart (<MODE>mode, operands[1]);")
14155
14156 (define_insn "*vec_extractv4si_0_zext_sse4"
14157 [(set (match_operand:DI 0 "register_operand" "=r,x,v")
14158 (zero_extend:DI
14159 (vec_select:SI
14160 (match_operand:V4SI 1 "register_operand" "v,x,v")
14161 (parallel [(const_int 0)]))))]
14162 "TARGET_SSE4_1"
14163 "#"
14164 [(set_attr "isa" "x64,*,avx512f")
14165 (set (attr "preferred_for_speed")
14166 (cond [(eq_attr "alternative" "0")
14167 (symbol_ref "TARGET_INTER_UNIT_MOVES_FROM_VEC")
14168 ]
14169 (symbol_ref "true")))])
14170
14171 (define_insn "*vec_extractv4si_0_zext"
14172 [(set (match_operand:DI 0 "register_operand" "=r")
14173 (zero_extend:DI
14174 (vec_select:SI
14175 (match_operand:V4SI 1 "register_operand" "x")
14176 (parallel [(const_int 0)]))))]
14177 "TARGET_64BIT && TARGET_SSE2 && TARGET_INTER_UNIT_MOVES_FROM_VEC"
14178 "#")
14179
14180 (define_split
14181 [(set (match_operand:DI 0 "register_operand")
14182 (zero_extend:DI
14183 (vec_select:SI
14184 (match_operand:V4SI 1 "register_operand")
14185 (parallel [(const_int 0)]))))]
14186 "TARGET_SSE2 && reload_completed"
14187 [(set (match_dup 0) (zero_extend:DI (match_dup 1)))]
14188 "operands[1] = gen_lowpart (SImode, operands[1]);")
14189
14190 (define_insn "*vec_extractv4si"
14191 [(set (match_operand:SI 0 "nonimmediate_operand" "=rm,rm,Yr,*x,x,Yv")
14192 (vec_select:SI
14193 (match_operand:V4SI 1 "register_operand" "x,v,0,0,x,v")
14194 (parallel [(match_operand:SI 2 "const_0_to_3_operand")])))]
14195 "TARGET_SSE4_1"
14196 {
14197 switch (which_alternative)
14198 {
14199 case 0:
14200 case 1:
14201 return "%vpextrd\t{%2, %1, %0|%0, %1, %2}";
14202
14203 case 2:
14204 case 3:
14205 operands[2] = GEN_INT (INTVAL (operands[2]) * 4);
14206 return "psrldq\t{%2, %0|%0, %2}";
14207
14208 case 4:
14209 case 5:
14210 operands[2] = GEN_INT (INTVAL (operands[2]) * 4);
14211 return "vpsrldq\t{%2, %1, %0|%0, %1, %2}";
14212
14213 default:
14214 gcc_unreachable ();
14215 }
14216 }
14217 [(set_attr "isa" "*,avx512dq,noavx,noavx,avx,avx512bw")
14218 (set_attr "type" "sselog1,sselog1,sseishft1,sseishft1,sseishft1,sseishft1")
14219 (set (attr "prefix_extra")
14220 (if_then_else (eq_attr "alternative" "0,1")
14221 (const_string "1")
14222 (const_string "*")))
14223 (set_attr "length_immediate" "1")
14224 (set_attr "prefix" "maybe_vex,evex,orig,orig,vex,evex")
14225 (set_attr "mode" "TI")])
14226
14227 (define_insn "*vec_extractv4si_zext"
14228 [(set (match_operand:DI 0 "register_operand" "=r,r")
14229 (zero_extend:DI
14230 (vec_select:SI
14231 (match_operand:V4SI 1 "register_operand" "x,v")
14232 (parallel [(match_operand:SI 2 "const_0_to_3_operand")]))))]
14233 "TARGET_64BIT && TARGET_SSE4_1"
14234 "%vpextrd\t{%2, %1, %k0|%k0, %1, %2}"
14235 [(set_attr "isa" "*,avx512dq")
14236 (set_attr "type" "sselog1")
14237 (set_attr "prefix_extra" "1")
14238 (set_attr "length_immediate" "1")
14239 (set_attr "prefix" "maybe_vex")
14240 (set_attr "mode" "TI")])
14241
14242 (define_insn "*vec_extractv4si_mem"
14243 [(set (match_operand:SI 0 "register_operand" "=x,r")
14244 (vec_select:SI
14245 (match_operand:V4SI 1 "memory_operand" "o,o")
14246 (parallel [(match_operand 2 "const_0_to_3_operand")])))]
14247 "TARGET_SSE"
14248 "#")
14249
14250 (define_insn_and_split "*vec_extractv4si_zext_mem"
14251 [(set (match_operand:DI 0 "register_operand" "=x,r")
14252 (zero_extend:DI
14253 (vec_select:SI
14254 (match_operand:V4SI 1 "memory_operand" "o,o")
14255 (parallel [(match_operand:SI 2 "const_0_to_3_operand")]))))]
14256 "TARGET_64BIT && TARGET_SSE"
14257 "#"
14258 "&& reload_completed"
14259 [(set (match_dup 0) (zero_extend:DI (match_dup 1)))]
14260 {
14261 operands[1] = adjust_address (operands[1], SImode, INTVAL (operands[2]) * 4);
14262 })
14263
14264 (define_insn "*vec_extractv2di_1"
14265 [(set (match_operand:DI 0 "nonimmediate_operand" "=rm,rm,m,x,x,Yv,x,v,r")
14266 (vec_select:DI
14267 (match_operand:V2DI 1 "nonimmediate_operand" "x ,v ,v,0,x, v,x,o,o")
14268 (parallel [(const_int 1)])))]
14269 "TARGET_SSE && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
14270 "@
14271 %vpextrq\t{$1, %1, %0|%0, %1, 1}
14272 vpextrq\t{$1, %1, %0|%0, %1, 1}
14273 %vmovhps\t{%1, %0|%0, %1}
14274 psrldq\t{$8, %0|%0, 8}
14275 vpsrldq\t{$8, %1, %0|%0, %1, 8}
14276 vpsrldq\t{$8, %1, %0|%0, %1, 8}
14277 movhlps\t{%1, %0|%0, %1}
14278 #
14279 #"
14280 [(set (attr "isa")
14281 (cond [(eq_attr "alternative" "0")
14282 (const_string "x64_sse4")
14283 (eq_attr "alternative" "1")
14284 (const_string "x64_avx512dq")
14285 (eq_attr "alternative" "3")
14286 (const_string "sse2_noavx")
14287 (eq_attr "alternative" "4")
14288 (const_string "avx")
14289 (eq_attr "alternative" "5")
14290 (const_string "avx512bw")
14291 (eq_attr "alternative" "6")
14292 (const_string "noavx")
14293 (eq_attr "alternative" "8")
14294 (const_string "x64")
14295 ]
14296 (const_string "*")))
14297 (set (attr "type")
14298 (cond [(eq_attr "alternative" "2,6,7")
14299 (const_string "ssemov")
14300 (eq_attr "alternative" "3,4,5")
14301 (const_string "sseishft1")
14302 (eq_attr "alternative" "8")
14303 (const_string "imov")
14304 ]
14305 (const_string "sselog1")))
14306 (set (attr "length_immediate")
14307 (if_then_else (eq_attr "alternative" "0,1,3,4,5")
14308 (const_string "1")
14309 (const_string "*")))
14310 (set (attr "prefix_rex")
14311 (if_then_else (eq_attr "alternative" "0,1")
14312 (const_string "1")
14313 (const_string "*")))
14314 (set (attr "prefix_extra")
14315 (if_then_else (eq_attr "alternative" "0,1")
14316 (const_string "1")
14317 (const_string "*")))
14318 (set_attr "prefix" "maybe_vex,evex,maybe_vex,orig,vex,evex,orig,*,*")
14319 (set_attr "mode" "TI,TI,V2SF,TI,TI,TI,V4SF,DI,DI")])
14320
14321 (define_split
14322 [(set (match_operand:<ssescalarmode> 0 "register_operand")
14323 (vec_select:<ssescalarmode>
14324 (match_operand:VI_128 1 "memory_operand")
14325 (parallel
14326 [(match_operand 2 "const_0_to_<ssescalarnummask>_operand")])))]
14327 "TARGET_SSE && reload_completed"
14328 [(set (match_dup 0) (match_dup 1))]
14329 {
14330 int offs = INTVAL (operands[2]) * GET_MODE_SIZE (<ssescalarmode>mode);
14331
14332 operands[1] = adjust_address (operands[1], <ssescalarmode>mode, offs);
14333 })
14334
14335 (define_insn "*vec_extractv2ti"
14336 [(set (match_operand:TI 0 "nonimmediate_operand" "=xm,vm")
14337 (vec_select:TI
14338 (match_operand:V2TI 1 "register_operand" "x,v")
14339 (parallel
14340 [(match_operand:SI 2 "const_0_to_1_operand")])))]
14341 "TARGET_AVX"
14342 "@
14343 vextract%~128\t{%2, %1, %0|%0, %1, %2}
14344 vextracti32x4\t{%2, %g1, %0|%0, %g1, %2}"
14345 [(set_attr "type" "sselog")
14346 (set_attr "prefix_extra" "1")
14347 (set_attr "length_immediate" "1")
14348 (set_attr "prefix" "vex,evex")
14349 (set_attr "mode" "OI")])
14350
14351 (define_insn "*vec_extractv4ti"
14352 [(set (match_operand:TI 0 "nonimmediate_operand" "=vm")
14353 (vec_select:TI
14354 (match_operand:V4TI 1 "register_operand" "v")
14355 (parallel
14356 [(match_operand:SI 2 "const_0_to_3_operand")])))]
14357 "TARGET_AVX512F"
14358 "vextracti32x4\t{%2, %1, %0|%0, %1, %2}"
14359 [(set_attr "type" "sselog")
14360 (set_attr "prefix_extra" "1")
14361 (set_attr "length_immediate" "1")
14362 (set_attr "prefix" "evex")
14363 (set_attr "mode" "XI")])
14364
14365 (define_mode_iterator VEXTRACTI128_MODE
14366 [(V4TI "TARGET_AVX512F") V2TI])
14367
14368 (define_split
14369 [(set (match_operand:TI 0 "nonimmediate_operand")
14370 (vec_select:TI
14371 (match_operand:VEXTRACTI128_MODE 1 "register_operand")
14372 (parallel [(const_int 0)])))]
14373 "TARGET_AVX
14374 && reload_completed
14375 && (TARGET_AVX512VL || !EXT_REX_SSE_REG_P (operands[1]))"
14376 [(set (match_dup 0) (match_dup 1))]
14377 "operands[1] = gen_lowpart (TImode, operands[1]);")
14378
14379 ;; Turn SImode or DImode extraction from arbitrary SSE/AVX/AVX512F
14380 ;; vector modes into vec_extract*.
14381 (define_split
14382 [(set (match_operand:SWI48x 0 "nonimmediate_operand")
14383 (subreg:SWI48x (match_operand 1 "register_operand") 0))]
14384 "can_create_pseudo_p ()
14385 && REG_P (operands[1])
14386 && VECTOR_MODE_P (GET_MODE (operands[1]))
14387 && ((TARGET_SSE && GET_MODE_SIZE (GET_MODE (operands[1])) == 16)
14388 || (TARGET_AVX && GET_MODE_SIZE (GET_MODE (operands[1])) == 32)
14389 || (TARGET_AVX512F && GET_MODE_SIZE (GET_MODE (operands[1])) == 64))
14390 && (<MODE>mode == SImode || TARGET_64BIT || MEM_P (operands[0]))"
14391 [(set (match_dup 0) (vec_select:SWI48x (match_dup 1)
14392 (parallel [(const_int 0)])))]
14393 {
14394 rtx tmp;
14395
14396 switch (GET_MODE_SIZE (GET_MODE (operands[1])))
14397 {
14398 case 64:
14399 if (<MODE>mode == SImode)
14400 {
14401 tmp = gen_reg_rtx (V8SImode);
14402 emit_insn (gen_vec_extract_lo_v16si (tmp,
14403 gen_lowpart (V16SImode,
14404 operands[1])));
14405 }
14406 else
14407 {
14408 tmp = gen_reg_rtx (V4DImode);
14409 emit_insn (gen_vec_extract_lo_v8di (tmp,
14410 gen_lowpart (V8DImode,
14411 operands[1])));
14412 }
14413 operands[1] = tmp;
14414 /* FALLTHRU */
14415 case 32:
14416 tmp = gen_reg_rtx (<ssevecmode>mode);
14417 if (<MODE>mode == SImode)
14418 emit_insn (gen_vec_extract_lo_v8si (tmp, gen_lowpart (V8SImode,
14419 operands[1])));
14420 else
14421 emit_insn (gen_vec_extract_lo_v4di (tmp, gen_lowpart (V4DImode,
14422 operands[1])));
14423 operands[1] = tmp;
14424 break;
14425 case 16:
14426 operands[1] = gen_lowpart (<ssevecmode>mode, operands[1]);
14427 break;
14428 }
14429 })
14430
14431 (define_insn "*vec_concatv2si_sse4_1"
14432 [(set (match_operand:V2SI 0 "register_operand"
14433 "=Yr,*x, x, v,Yr,*x, v, v, *y,*y")
14434 (vec_concat:V2SI
14435 (match_operand:SI 1 "nonimmediate_operand"
14436 " 0, 0, x,Yv, 0, 0,Yv,rm, 0,rm")
14437 (match_operand:SI 2 "nonimm_or_0_operand"
14438 " rm,rm,rm,rm,Yr,*x,Yv, C,*ym, C")))]
14439 "TARGET_SSE4_1 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
14440 "@
14441 pinsrd\t{$1, %2, %0|%0, %2, 1}
14442 pinsrd\t{$1, %2, %0|%0, %2, 1}
14443 vpinsrd\t{$1, %2, %1, %0|%0, %1, %2, 1}
14444 vpinsrd\t{$1, %2, %1, %0|%0, %1, %2, 1}
14445 punpckldq\t{%2, %0|%0, %2}
14446 punpckldq\t{%2, %0|%0, %2}
14447 vpunpckldq\t{%2, %1, %0|%0, %1, %2}
14448 %vmovd\t{%1, %0|%0, %1}
14449 punpckldq\t{%2, %0|%0, %2}
14450 movd\t{%1, %0|%0, %1}"
14451 [(set_attr "isa" "noavx,noavx,avx,avx512dq,noavx,noavx,avx,*,*,*")
14452 (set (attr "type")
14453 (cond [(eq_attr "alternative" "7")
14454 (const_string "ssemov")
14455 (eq_attr "alternative" "8")
14456 (const_string "mmxcvt")
14457 (eq_attr "alternative" "9")
14458 (const_string "mmxmov")
14459 ]
14460 (const_string "sselog")))
14461 (set (attr "prefix_extra")
14462 (if_then_else (eq_attr "alternative" "0,1,2,3")
14463 (const_string "1")
14464 (const_string "*")))
14465 (set (attr "length_immediate")
14466 (if_then_else (eq_attr "alternative" "0,1,2,3")
14467 (const_string "1")
14468 (const_string "*")))
14469 (set_attr "prefix" "orig,orig,vex,evex,orig,orig,maybe_evex,maybe_vex,orig,orig")
14470 (set_attr "mode" "TI,TI,TI,TI,TI,TI,TI,TI,DI,DI")])
14471
14472 ;; ??? In theory we can match memory for the MMX alternative, but allowing
14473 ;; nonimmediate_operand for operand 2 and *not* allowing memory for the SSE
14474 ;; alternatives pretty much forces the MMX alternative to be chosen.
14475 (define_insn "*vec_concatv2si"
14476 [(set (match_operand:V2SI 0 "register_operand" "=x,x ,*y,x,x,*y,*y")
14477 (vec_concat:V2SI
14478 (match_operand:SI 1 "nonimmediate_operand" " 0,rm,rm,0,m, 0,*rm")
14479 (match_operand:SI 2 "reg_or_0_operand" " x,C ,C, x,C,*y,C")))]
14480 "TARGET_SSE && !TARGET_SSE4_1"
14481 "@
14482 punpckldq\t{%2, %0|%0, %2}
14483 movd\t{%1, %0|%0, %1}
14484 movd\t{%1, %0|%0, %1}
14485 unpcklps\t{%2, %0|%0, %2}
14486 movss\t{%1, %0|%0, %1}
14487 punpckldq\t{%2, %0|%0, %2}
14488 movd\t{%1, %0|%0, %1}"
14489 [(set_attr "isa" "sse2,sse2,sse2,*,*,*,*")
14490 (set_attr "type" "sselog,ssemov,mmxmov,sselog,ssemov,mmxcvt,mmxmov")
14491 (set_attr "mode" "TI,TI,DI,V4SF,SF,DI,DI")])
14492
14493 (define_insn "*vec_concatv4si"
14494 [(set (match_operand:V4SI 0 "register_operand" "=x,v,x,x,v")
14495 (vec_concat:V4SI
14496 (match_operand:V2SI 1 "register_operand" " 0,v,0,0,v")
14497 (match_operand:V2SI 2 "nonimmediate_operand" " x,v,x,m,m")))]
14498 "TARGET_SSE"
14499 "@
14500 punpcklqdq\t{%2, %0|%0, %2}
14501 vpunpcklqdq\t{%2, %1, %0|%0, %1, %2}
14502 movlhps\t{%2, %0|%0, %2}
14503 movhps\t{%2, %0|%0, %q2}
14504 vmovhps\t{%2, %1, %0|%0, %1, %q2}"
14505 [(set_attr "isa" "sse2_noavx,avx,noavx,noavx,avx")
14506 (set_attr "type" "sselog,sselog,ssemov,ssemov,ssemov")
14507 (set_attr "prefix" "orig,maybe_evex,orig,orig,maybe_evex")
14508 (set_attr "mode" "TI,TI,V4SF,V2SF,V2SF")])
14509
14510 (define_insn "*vec_concatv4si_0"
14511 [(set (match_operand:V4SI 0 "register_operand" "=v,x")
14512 (vec_concat:V4SI
14513 (match_operand:V2SI 1 "nonimmediate_operand" "xm,?!*y")
14514 (match_operand:V2SI 2 "const0_operand" " C,C")))]
14515 "TARGET_SSE2"
14516 "@
14517 %vmovq\t{%1, %0|%0, %1}
14518 movq2dq\t{%1, %0|%0, %1}"
14519 [(set_attr "type" "ssemov")
14520 (set_attr "prefix" "maybe_vex,orig")
14521 (set_attr "mode" "TI")
14522 (set (attr "preferred_for_speed")
14523 (if_then_else (eq_attr "alternative" "1")
14524 (symbol_ref "TARGET_INTER_UNIT_MOVES_FROM_VEC")
14525 (symbol_ref "true")))])
14526
14527 ;; movd instead of movq is required to handle broken assemblers.
14528 (define_insn "vec_concatv2di"
14529 [(set (match_operand:V2DI 0 "register_operand"
14530 "=Yr,*x,x ,v ,v,v ,x ,x,v ,x,x,v")
14531 (vec_concat:V2DI
14532 (match_operand:DI 1 "nonimmediate_operand"
14533 " 0, 0,x ,Yv,r,vm,?!*y,0,Yv,0,0,v")
14534 (match_operand:DI 2 "nonimm_or_0_operand"
14535 " rm,rm,rm,rm,C ,C ,C ,x,Yv,x,m,m")))]
14536 "TARGET_SSE"
14537 "@
14538 pinsrq\t{$1, %2, %0|%0, %2, 1}
14539 pinsrq\t{$1, %2, %0|%0, %2, 1}
14540 vpinsrq\t{$1, %2, %1, %0|%0, %1, %2, 1}
14541 vpinsrq\t{$1, %2, %1, %0|%0, %1, %2, 1}
14542 * return HAVE_AS_IX86_INTERUNIT_MOVQ ? \"%vmovq\t{%1, %0|%0, %1}\" : \"%vmovd\t{%1, %0|%0, %1}\";
14543 %vmovq\t{%1, %0|%0, %1}
14544 movq2dq\t{%1, %0|%0, %1}
14545 punpcklqdq\t{%2, %0|%0, %2}
14546 vpunpcklqdq\t{%2, %1, %0|%0, %1, %2}
14547 movlhps\t{%2, %0|%0, %2}
14548 movhps\t{%2, %0|%0, %2}
14549 vmovhps\t{%2, %1, %0|%0, %1, %2}"
14550 [(set (attr "isa")
14551 (cond [(eq_attr "alternative" "0,1")
14552 (const_string "x64_sse4_noavx")
14553 (eq_attr "alternative" "2")
14554 (const_string "x64_avx")
14555 (eq_attr "alternative" "3")
14556 (const_string "x64_avx512dq")
14557 (eq_attr "alternative" "4")
14558 (const_string "x64_sse2")
14559 (eq_attr "alternative" "5,6")
14560 (const_string "sse2")
14561 (eq_attr "alternative" "7")
14562 (const_string "sse2_noavx")
14563 (eq_attr "alternative" "8,11")
14564 (const_string "avx")
14565 ]
14566 (const_string "noavx")))
14567 (set (attr "type")
14568 (if_then_else
14569 (eq_attr "alternative" "0,1,2,3,7,8")
14570 (const_string "sselog")
14571 (const_string "ssemov")))
14572 (set (attr "prefix_rex")
14573 (if_then_else (eq_attr "alternative" "0,1,2,3,4")
14574 (const_string "1")
14575 (const_string "*")))
14576 (set (attr "prefix_extra")
14577 (if_then_else (eq_attr "alternative" "0,1,2,3")
14578 (const_string "1")
14579 (const_string "*")))
14580 (set (attr "length_immediate")
14581 (if_then_else (eq_attr "alternative" "0,1,2,3")
14582 (const_string "1")
14583 (const_string "*")))
14584 (set (attr "prefix")
14585 (cond [(eq_attr "alternative" "2")
14586 (const_string "vex")
14587 (eq_attr "alternative" "3")
14588 (const_string "evex")
14589 (eq_attr "alternative" "4,5")
14590 (const_string "maybe_vex")
14591 (eq_attr "alternative" "8,11")
14592 (const_string "maybe_evex")
14593 ]
14594 (const_string "orig")))
14595 (set_attr "mode" "TI,TI,TI,TI,TI,TI,TI,TI,TI,V4SF,V2SF,V2SF")
14596 (set (attr "preferred_for_speed")
14597 (cond [(eq_attr "alternative" "4")
14598 (symbol_ref "TARGET_INTER_UNIT_MOVES_TO_VEC")
14599 (eq_attr "alternative" "6")
14600 (symbol_ref "TARGET_INTER_UNIT_MOVES_FROM_VEC")
14601 ]
14602 (symbol_ref "true")))])
14603
14604 ;; vmovq clears also the higher bits.
14605 (define_insn "vec_set<mode>_0"
14606 [(set (match_operand:VI8_AVX_AVX512F 0 "register_operand" "=v,v")
14607 (vec_merge:VI8_AVX_AVX512F
14608 (vec_duplicate:VI8_AVX_AVX512F
14609 (match_operand:<ssescalarmode> 2 "general_operand" "r,vm"))
14610 (match_operand:VI8_AVX_AVX512F 1 "const0_operand" "C,C")
14611 (const_int 1)))]
14612 "TARGET_AVX"
14613 "vmovq\t{%2, %x0|%x0, %2}"
14614 [(set_attr "isa" "x64,*")
14615 (set_attr "type" "ssemov")
14616 (set_attr "prefix_rex" "1,*")
14617 (set_attr "prefix" "maybe_evex")
14618 (set_attr "mode" "TI")
14619 (set (attr "preferred_for_speed")
14620 (cond [(eq_attr "alternative" "0")
14621 (symbol_ref "TARGET_INTER_UNIT_MOVES_TO_VEC")
14622 ]
14623 (symbol_ref "true")))])
14624
14625 (define_expand "vec_unpacks_lo_<mode>"
14626 [(match_operand:<sseunpackmode> 0 "register_operand")
14627 (match_operand:VI124_AVX2_24_AVX512F_1_AVX512BW 1 "register_operand")]
14628 "TARGET_SSE2"
14629 "ix86_expand_sse_unpack (operands[0], operands[1], false, false); DONE;")
14630
14631 (define_expand "vec_unpacks_hi_<mode>"
14632 [(match_operand:<sseunpackmode> 0 "register_operand")
14633 (match_operand:VI124_AVX2_24_AVX512F_1_AVX512BW 1 "register_operand")]
14634 "TARGET_SSE2"
14635 "ix86_expand_sse_unpack (operands[0], operands[1], false, true); DONE;")
14636
14637 (define_expand "vec_unpacku_lo_<mode>"
14638 [(match_operand:<sseunpackmode> 0 "register_operand")
14639 (match_operand:VI124_AVX2_24_AVX512F_1_AVX512BW 1 "register_operand")]
14640 "TARGET_SSE2"
14641 "ix86_expand_sse_unpack (operands[0], operands[1], true, false); DONE;")
14642
14643 (define_expand "vec_unpacks_sbool_lo_qi"
14644 [(match_operand:QI 0 "register_operand")
14645 (match_operand:QI 1 "register_operand")
14646 (match_operand:QI 2 "const_int_operand")]
14647 "TARGET_AVX512F"
14648 {
14649 if (INTVAL (operands[2]) != 8 && INTVAL (operands[2]) != 4)
14650 FAIL;
14651 emit_move_insn (operands[0], operands[1]);
14652 DONE;
14653 })
14654
14655 (define_expand "vec_unpacks_lo_hi"
14656 [(set (subreg:HI (match_operand:QI 0 "register_operand") 0)
14657 (match_operand:HI 1 "register_operand"))]
14658 "TARGET_AVX512F")
14659
14660 (define_expand "vec_unpacks_lo_si"
14661 [(set (match_operand:HI 0 "register_operand")
14662 (subreg:HI (match_operand:SI 1 "register_operand") 0))]
14663 "TARGET_AVX512F")
14664
14665 (define_expand "vec_unpacks_lo_di"
14666 [(set (match_operand:SI 0 "register_operand")
14667 (subreg:SI (match_operand:DI 1 "register_operand") 0))]
14668 "TARGET_AVX512BW")
14669
14670 (define_expand "vec_unpacku_hi_<mode>"
14671 [(match_operand:<sseunpackmode> 0 "register_operand")
14672 (match_operand:VI124_AVX2_24_AVX512F_1_AVX512BW 1 "register_operand")]
14673 "TARGET_SSE2"
14674 "ix86_expand_sse_unpack (operands[0], operands[1], true, true); DONE;")
14675
14676 (define_expand "vec_unpacks_sbool_hi_qi"
14677 [(match_operand:QI 0 "register_operand")
14678 (match_operand:QI 1 "register_operand")
14679 (match_operand:QI 2 "const_int_operand")]
14680 "TARGET_AVX512F"
14681 {
14682 HOST_WIDE_INT nunits = INTVAL (operands[2]);
14683 if (nunits != 8 && nunits != 4)
14684 FAIL;
14685 if (TARGET_AVX512DQ)
14686 emit_insn (gen_klshiftrtqi (operands[0], operands[1],
14687 GEN_INT (nunits / 2)));
14688 else
14689 {
14690 rtx tem = gen_reg_rtx (HImode);
14691 emit_insn (gen_klshiftrthi (tem, lowpart_subreg (HImode, operands[1],
14692 QImode),
14693 GEN_INT (nunits / 2)));
14694 emit_move_insn (operands[0], lowpart_subreg (QImode, tem, HImode));
14695 }
14696 DONE;
14697 })
14698
14699 (define_expand "vec_unpacks_hi_hi"
14700 [(parallel
14701 [(set (subreg:HI (match_operand:QI 0 "register_operand") 0)
14702 (lshiftrt:HI (match_operand:HI 1 "register_operand")
14703 (const_int 8)))
14704 (unspec [(const_int 0)] UNSPEC_MASKOP)])]
14705 "TARGET_AVX512F")
14706
14707 (define_expand "vec_unpacks_hi_<mode>"
14708 [(parallel
14709 [(set (subreg:SWI48x
14710 (match_operand:<HALFMASKMODE> 0 "register_operand") 0)
14711 (lshiftrt:SWI48x (match_operand:SWI48x 1 "register_operand")
14712 (match_dup 2)))
14713 (unspec [(const_int 0)] UNSPEC_MASKOP)])]
14714 "TARGET_AVX512BW"
14715 "operands[2] = GEN_INT (GET_MODE_BITSIZE (<HALFMASKMODE>mode));")
14716
14717 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
14718 ;;
14719 ;; Miscellaneous
14720 ;;
14721 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
14722
14723 (define_expand "<sse2_avx2>_uavg<mode>3<mask_name>"
14724 [(set (match_operand:VI12_AVX2 0 "register_operand")
14725 (truncate:VI12_AVX2
14726 (lshiftrt:<ssedoublemode>
14727 (plus:<ssedoublemode>
14728 (plus:<ssedoublemode>
14729 (zero_extend:<ssedoublemode>
14730 (match_operand:VI12_AVX2 1 "vector_operand"))
14731 (zero_extend:<ssedoublemode>
14732 (match_operand:VI12_AVX2 2 "vector_operand")))
14733 (match_dup <mask_expand_op3>))
14734 (const_int 1))))]
14735 "TARGET_SSE2 && <mask_mode512bit_condition> && <mask_avx512bw_condition>"
14736 {
14737 operands[<mask_expand_op3>] = CONST1_RTX(<MODE>mode);
14738 ix86_fixup_binary_operands_no_copy (PLUS, <MODE>mode, operands);
14739 })
14740
14741 (define_insn "*<sse2_avx2>_uavg<mode>3<mask_name>"
14742 [(set (match_operand:VI12_AVX2 0 "register_operand" "=x,v")
14743 (truncate:VI12_AVX2
14744 (lshiftrt:<ssedoublemode>
14745 (plus:<ssedoublemode>
14746 (plus:<ssedoublemode>
14747 (zero_extend:<ssedoublemode>
14748 (match_operand:VI12_AVX2 1 "vector_operand" "%0,v"))
14749 (zero_extend:<ssedoublemode>
14750 (match_operand:VI12_AVX2 2 "vector_operand" "xBm,vm")))
14751 (match_operand:VI12_AVX2 <mask_expand_op3> "const1_operand"))
14752 (const_int 1))))]
14753 "TARGET_SSE2 && <mask_mode512bit_condition> && <mask_avx512bw_condition>
14754 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
14755 "@
14756 pavg<ssemodesuffix>\t{%2, %0|%0, %2}
14757 vpavg<ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
14758 [(set_attr "isa" "noavx,avx")
14759 (set_attr "type" "sseiadd")
14760 (set_attr "prefix_data16" "1,*")
14761 (set_attr "prefix" "orig,<mask_prefix>")
14762 (set_attr "mode" "<sseinsnmode>")])
14763
14764 ;; The correct representation for this is absolutely enormous, and
14765 ;; surely not generally useful.
14766 (define_insn "<sse2_avx2>_psadbw"
14767 [(set (match_operand:VI8_AVX2_AVX512BW 0 "register_operand" "=x,v")
14768 (unspec:VI8_AVX2_AVX512BW
14769 [(match_operand:<ssebytemode> 1 "register_operand" "0,v")
14770 (match_operand:<ssebytemode> 2 "vector_operand" "xBm,vm")]
14771 UNSPEC_PSADBW))]
14772 "TARGET_SSE2"
14773 "@
14774 psadbw\t{%2, %0|%0, %2}
14775 vpsadbw\t{%2, %1, %0|%0, %1, %2}"
14776 [(set_attr "isa" "noavx,avx")
14777 (set_attr "type" "sseiadd")
14778 (set_attr "atom_unit" "simul")
14779 (set_attr "prefix_data16" "1,*")
14780 (set_attr "prefix" "orig,maybe_evex")
14781 (set_attr "mode" "<sseinsnmode>")])
14782
14783 (define_insn "<sse>_movmsk<ssemodesuffix><avxsizesuffix>"
14784 [(set (match_operand:SI 0 "register_operand" "=r")
14785 (unspec:SI
14786 [(match_operand:VF_128_256 1 "register_operand" "x")]
14787 UNSPEC_MOVMSK))]
14788 "TARGET_SSE"
14789 "%vmovmsk<ssemodesuffix>\t{%1, %0|%0, %1}"
14790 [(set_attr "type" "ssemov")
14791 (set_attr "prefix" "maybe_vex")
14792 (set_attr "mode" "<MODE>")])
14793
14794 (define_insn "*<sse>_movmsk<ssemodesuffix><avxsizesuffix>_zext"
14795 [(set (match_operand:DI 0 "register_operand" "=r")
14796 (zero_extend:DI
14797 (unspec:SI
14798 [(match_operand:VF_128_256 1 "register_operand" "x")]
14799 UNSPEC_MOVMSK)))]
14800 "TARGET_64BIT && TARGET_SSE"
14801 "%vmovmsk<ssemodesuffix>\t{%1, %k0|%k0, %1}"
14802 [(set_attr "type" "ssemov")
14803 (set_attr "prefix" "maybe_vex")
14804 (set_attr "mode" "<MODE>")])
14805
14806 (define_insn_and_split "*<sse>_movmsk<ssemodesuffix><avxsizesuffix>_lt"
14807 [(set (match_operand:SI 0 "register_operand" "=r")
14808 (unspec:SI
14809 [(lt:VF_128_256
14810 (match_operand:<sseintvecmode> 1 "register_operand" "x")
14811 (match_operand:<sseintvecmode> 2 "const0_operand" "C"))]
14812 UNSPEC_MOVMSK))]
14813 "TARGET_SSE"
14814 "#"
14815 "&& reload_completed"
14816 [(set (match_dup 0)
14817 (unspec:SI [(match_dup 1)] UNSPEC_MOVMSK))]
14818 "operands[1] = gen_lowpart (<MODE>mode, operands[1]);"
14819 [(set_attr "type" "ssemov")
14820 (set_attr "prefix" "maybe_vex")
14821 (set_attr "mode" "<MODE>")])
14822
14823 (define_insn_and_split "*<sse>_movmsk<ssemodesuffix><avxsizesuffix>_zext_lt"
14824 [(set (match_operand:DI 0 "register_operand" "=r")
14825 (zero_extend:DI
14826 (unspec:SI
14827 [(lt:VF_128_256
14828 (match_operand:<sseintvecmode> 1 "register_operand" "x")
14829 (match_operand:<sseintvecmode> 2 "const0_operand" "C"))]
14830 UNSPEC_MOVMSK)))]
14831 "TARGET_64BIT && TARGET_SSE"
14832 "#"
14833 "&& reload_completed"
14834 [(set (match_dup 0)
14835 (zero_extend:DI (unspec:SI [(match_dup 1)] UNSPEC_MOVMSK)))]
14836 "operands[1] = gen_lowpart (<MODE>mode, operands[1]);"
14837 [(set_attr "type" "ssemov")
14838 (set_attr "prefix" "maybe_vex")
14839 (set_attr "mode" "<MODE>")])
14840
14841 (define_insn_and_split "*<sse>_movmsk<ssemodesuffix><avxsizesuffix>_shift"
14842 [(set (match_operand:SI 0 "register_operand" "=r")
14843 (unspec:SI
14844 [(subreg:VF_128_256
14845 (ashiftrt:<sseintvecmode>
14846 (match_operand:<sseintvecmode> 1 "register_operand" "x")
14847 (match_operand:QI 2 "const_int_operand" "n")) 0)]
14848 UNSPEC_MOVMSK))]
14849 "TARGET_SSE"
14850 "#"
14851 "&& reload_completed"
14852 [(set (match_dup 0)
14853 (unspec:SI [(match_dup 1)] UNSPEC_MOVMSK))]
14854 "operands[1] = gen_lowpart (<MODE>mode, operands[1]);"
14855 [(set_attr "type" "ssemov")
14856 (set_attr "prefix" "maybe_vex")
14857 (set_attr "mode" "<MODE>")])
14858
14859 (define_insn_and_split "*<sse>_movmsk<ssemodesuffix><avxsizesuffix>_zext_shift"
14860 [(set (match_operand:DI 0 "register_operand" "=r")
14861 (zero_extend:DI
14862 (unspec:SI
14863 [(subreg:VF_128_256
14864 (ashiftrt:<sseintvecmode>
14865 (match_operand:<sseintvecmode> 1 "register_operand" "x")
14866 (match_operand:QI 2 "const_int_operand" "n")) 0)]
14867 UNSPEC_MOVMSK)))]
14868 "TARGET_64BIT && TARGET_SSE"
14869 "#"
14870 "&& reload_completed"
14871 [(set (match_dup 0)
14872 (zero_extend:DI (unspec:SI [(match_dup 1)] UNSPEC_MOVMSK)))]
14873 "operands[1] = gen_lowpart (<MODE>mode, operands[1]);"
14874 [(set_attr "type" "ssemov")
14875 (set_attr "prefix" "maybe_vex")
14876 (set_attr "mode" "<MODE>")])
14877
14878 (define_insn "<sse2_avx2>_pmovmskb"
14879 [(set (match_operand:SI 0 "register_operand" "=r")
14880 (unspec:SI
14881 [(match_operand:VI1_AVX2 1 "register_operand" "x")]
14882 UNSPEC_MOVMSK))]
14883 "TARGET_SSE2"
14884 "%vpmovmskb\t{%1, %0|%0, %1}"
14885 [(set_attr "type" "ssemov")
14886 (set (attr "prefix_data16")
14887 (if_then_else
14888 (match_test "TARGET_AVX")
14889 (const_string "*")
14890 (const_string "1")))
14891 (set_attr "prefix" "maybe_vex")
14892 (set_attr "mode" "SI")])
14893
14894 (define_insn "*<sse2_avx2>_pmovmskb_zext"
14895 [(set (match_operand:DI 0 "register_operand" "=r")
14896 (zero_extend:DI
14897 (unspec:SI
14898 [(match_operand:VI1_AVX2 1 "register_operand" "x")]
14899 UNSPEC_MOVMSK)))]
14900 "TARGET_64BIT && TARGET_SSE2"
14901 "%vpmovmskb\t{%1, %k0|%k0, %1}"
14902 [(set_attr "type" "ssemov")
14903 (set (attr "prefix_data16")
14904 (if_then_else
14905 (match_test "TARGET_AVX")
14906 (const_string "*")
14907 (const_string "1")))
14908 (set_attr "prefix" "maybe_vex")
14909 (set_attr "mode" "SI")])
14910
14911 (define_insn_and_split "*<sse2_avx2>_pmovmskb_lt"
14912 [(set (match_operand:SI 0 "register_operand" "=r")
14913 (unspec:SI
14914 [(lt:VI1_AVX2 (match_operand:VI1_AVX2 1 "register_operand" "x")
14915 (match_operand:VI1_AVX2 2 "const0_operand" "C"))]
14916 UNSPEC_MOVMSK))]
14917 "TARGET_SSE2"
14918 "#"
14919 ""
14920 [(set (match_dup 0)
14921 (unspec:SI [(match_dup 1)] UNSPEC_MOVMSK))]
14922 ""
14923 [(set_attr "type" "ssemov")
14924 (set (attr "prefix_data16")
14925 (if_then_else
14926 (match_test "TARGET_AVX")
14927 (const_string "*")
14928 (const_string "1")))
14929 (set_attr "prefix" "maybe_vex")
14930 (set_attr "mode" "SI")])
14931
14932 (define_insn_and_split "*<sse2_avx2>_pmovmskb_zext_lt"
14933 [(set (match_operand:DI 0 "register_operand" "=r")
14934 (zero_extend:DI
14935 (unspec:SI
14936 [(lt:VI1_AVX2 (match_operand:VI1_AVX2 1 "register_operand" "x")
14937 (match_operand:VI1_AVX2 2 "const0_operand" "C"))]
14938 UNSPEC_MOVMSK)))]
14939 "TARGET_64BIT && TARGET_SSE2"
14940 "#"
14941 ""
14942 [(set (match_dup 0)
14943 (zero_extend:DI (unspec:SI [(match_dup 1)] UNSPEC_MOVMSK)))]
14944 ""
14945 [(set_attr "type" "ssemov")
14946 (set (attr "prefix_data16")
14947 (if_then_else
14948 (match_test "TARGET_AVX")
14949 (const_string "*")
14950 (const_string "1")))
14951 (set_attr "prefix" "maybe_vex")
14952 (set_attr "mode" "SI")])
14953
14954 (define_expand "sse2_maskmovdqu"
14955 [(set (match_operand:V16QI 0 "memory_operand")
14956 (unspec:V16QI [(match_operand:V16QI 1 "register_operand")
14957 (match_operand:V16QI 2 "register_operand")
14958 (match_dup 0)]
14959 UNSPEC_MASKMOV))]
14960 "TARGET_SSE2")
14961
14962 (define_insn "*sse2_maskmovdqu"
14963 [(set (mem:V16QI (match_operand:P 0 "register_operand" "D"))
14964 (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "x")
14965 (match_operand:V16QI 2 "register_operand" "x")
14966 (mem:V16QI (match_dup 0))]
14967 UNSPEC_MASKMOV))]
14968 "TARGET_SSE2"
14969 {
14970 /* We can't use %^ here due to ASM_OUTPUT_OPCODE processing
14971 that requires %v to be at the beginning of the opcode name. */
14972 if (Pmode != word_mode)
14973 fputs ("\taddr32", asm_out_file);
14974 return "%vmaskmovdqu\t{%2, %1|%1, %2}";
14975 }
14976 [(set_attr "type" "ssemov")
14977 (set_attr "prefix_data16" "1")
14978 (set (attr "length_address")
14979 (symbol_ref ("Pmode != word_mode")))
14980 ;; The implicit %rdi operand confuses default length_vex computation.
14981 (set (attr "length_vex")
14982 (symbol_ref ("3 + REX_SSE_REGNO_P (REGNO (operands[2]))")))
14983 (set_attr "prefix" "maybe_vex")
14984 (set_attr "znver1_decode" "vector")
14985 (set_attr "mode" "TI")])
14986
14987 (define_insn "sse_ldmxcsr"
14988 [(unspec_volatile [(match_operand:SI 0 "memory_operand" "m")]
14989 UNSPECV_LDMXCSR)]
14990 "TARGET_SSE"
14991 "%vldmxcsr\t%0"
14992 [(set_attr "type" "sse")
14993 (set_attr "atom_sse_attr" "mxcsr")
14994 (set_attr "prefix" "maybe_vex")
14995 (set_attr "memory" "load")])
14996
14997 (define_insn "sse_stmxcsr"
14998 [(set (match_operand:SI 0 "memory_operand" "=m")
14999 (unspec_volatile:SI [(const_int 0)] UNSPECV_STMXCSR))]
15000 "TARGET_SSE"
15001 "%vstmxcsr\t%0"
15002 [(set_attr "type" "sse")
15003 (set_attr "atom_sse_attr" "mxcsr")
15004 (set_attr "prefix" "maybe_vex")
15005 (set_attr "memory" "store")])
15006
15007 (define_insn "sse2_clflush"
15008 [(unspec_volatile [(match_operand 0 "address_operand" "p")]
15009 UNSPECV_CLFLUSH)]
15010 "TARGET_SSE2"
15011 "clflush\t%a0"
15012 [(set_attr "type" "sse")
15013 (set_attr "atom_sse_attr" "fence")
15014 (set_attr "memory" "unknown")])
15015
15016 ;; As per AMD and Intel ISA manuals, the first operand is extensions
15017 ;; and it goes to %ecx. The second operand received is hints and it goes
15018 ;; to %eax.
15019 (define_insn "sse3_mwait"
15020 [(unspec_volatile [(match_operand:SI 0 "register_operand" "c")
15021 (match_operand:SI 1 "register_operand" "a")]
15022 UNSPECV_MWAIT)]
15023 "TARGET_SSE3"
15024 ;; 64bit version is "mwait %rax,%rcx". But only lower 32bits are used.
15025 ;; Since 32bit register operands are implicitly zero extended to 64bit,
15026 ;; we only need to set up 32bit registers.
15027 "mwait"
15028 [(set_attr "length" "3")])
15029
15030 (define_insn "sse3_monitor_<mode>"
15031 [(unspec_volatile [(match_operand:P 0 "register_operand" "a")
15032 (match_operand:SI 1 "register_operand" "c")
15033 (match_operand:SI 2 "register_operand" "d")]
15034 UNSPECV_MONITOR)]
15035 "TARGET_SSE3"
15036 ;; 64bit version is "monitor %rax,%rcx,%rdx". But only lower 32bits in
15037 ;; RCX and RDX are used. Since 32bit register operands are implicitly
15038 ;; zero extended to 64bit, we only need to set up 32bit registers.
15039 "%^monitor"
15040 [(set (attr "length")
15041 (symbol_ref ("(Pmode != word_mode) + 3")))])
15042
15043 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
15044 ;;
15045 ;; SSSE3 instructions
15046 ;;
15047 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
15048
15049 (define_code_iterator ssse3_plusminus [plus ss_plus minus ss_minus])
15050
15051 (define_insn "avx2_ph<plusminus_mnemonic>wv16hi3"
15052 [(set (match_operand:V16HI 0 "register_operand" "=x")
15053 (vec_concat:V16HI
15054 (vec_concat:V8HI
15055 (vec_concat:V4HI
15056 (vec_concat:V2HI
15057 (ssse3_plusminus:HI
15058 (vec_select:HI
15059 (match_operand:V16HI 1 "register_operand" "x")
15060 (parallel [(const_int 0)]))
15061 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
15062 (ssse3_plusminus:HI
15063 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
15064 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
15065 (vec_concat:V2HI
15066 (ssse3_plusminus:HI
15067 (vec_select:HI (match_dup 1) (parallel [(const_int 4)]))
15068 (vec_select:HI (match_dup 1) (parallel [(const_int 5)])))
15069 (ssse3_plusminus:HI
15070 (vec_select:HI (match_dup 1) (parallel [(const_int 6)]))
15071 (vec_select:HI (match_dup 1) (parallel [(const_int 7)])))))
15072 (vec_concat:V4HI
15073 (vec_concat:V2HI
15074 (ssse3_plusminus:HI
15075 (vec_select:HI (match_dup 1) (parallel [(const_int 8)]))
15076 (vec_select:HI (match_dup 1) (parallel [(const_int 9)])))
15077 (ssse3_plusminus:HI
15078 (vec_select:HI (match_dup 1) (parallel [(const_int 10)]))
15079 (vec_select:HI (match_dup 1) (parallel [(const_int 11)]))))
15080 (vec_concat:V2HI
15081 (ssse3_plusminus:HI
15082 (vec_select:HI (match_dup 1) (parallel [(const_int 12)]))
15083 (vec_select:HI (match_dup 1) (parallel [(const_int 13)])))
15084 (ssse3_plusminus:HI
15085 (vec_select:HI (match_dup 1) (parallel [(const_int 14)]))
15086 (vec_select:HI (match_dup 1) (parallel [(const_int 15)]))))))
15087 (vec_concat:V8HI
15088 (vec_concat:V4HI
15089 (vec_concat:V2HI
15090 (ssse3_plusminus:HI
15091 (vec_select:HI
15092 (match_operand:V16HI 2 "nonimmediate_operand" "xm")
15093 (parallel [(const_int 0)]))
15094 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
15095 (ssse3_plusminus:HI
15096 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
15097 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))
15098 (vec_concat:V2HI
15099 (ssse3_plusminus:HI
15100 (vec_select:HI (match_dup 2) (parallel [(const_int 4)]))
15101 (vec_select:HI (match_dup 2) (parallel [(const_int 5)])))
15102 (ssse3_plusminus:HI
15103 (vec_select:HI (match_dup 2) (parallel [(const_int 6)]))
15104 (vec_select:HI (match_dup 2) (parallel [(const_int 7)])))))
15105 (vec_concat:V4HI
15106 (vec_concat:V2HI
15107 (ssse3_plusminus:HI
15108 (vec_select:HI (match_dup 2) (parallel [(const_int 8)]))
15109 (vec_select:HI (match_dup 2) (parallel [(const_int 9)])))
15110 (ssse3_plusminus:HI
15111 (vec_select:HI (match_dup 2) (parallel [(const_int 10)]))
15112 (vec_select:HI (match_dup 2) (parallel [(const_int 11)]))))
15113 (vec_concat:V2HI
15114 (ssse3_plusminus:HI
15115 (vec_select:HI (match_dup 2) (parallel [(const_int 12)]))
15116 (vec_select:HI (match_dup 2) (parallel [(const_int 13)])))
15117 (ssse3_plusminus:HI
15118 (vec_select:HI (match_dup 2) (parallel [(const_int 14)]))
15119 (vec_select:HI (match_dup 2) (parallel [(const_int 15)]))))))))]
15120 "TARGET_AVX2"
15121 "vph<plusminus_mnemonic>w\t{%2, %1, %0|%0, %1, %2}"
15122 [(set_attr "type" "sseiadd")
15123 (set_attr "prefix_extra" "1")
15124 (set_attr "prefix" "vex")
15125 (set_attr "mode" "OI")])
15126
15127 (define_insn "ssse3_ph<plusminus_mnemonic>wv8hi3"
15128 [(set (match_operand:V8HI 0 "register_operand" "=x,x")
15129 (vec_concat:V8HI
15130 (vec_concat:V4HI
15131 (vec_concat:V2HI
15132 (ssse3_plusminus:HI
15133 (vec_select:HI
15134 (match_operand:V8HI 1 "register_operand" "0,x")
15135 (parallel [(const_int 0)]))
15136 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
15137 (ssse3_plusminus:HI
15138 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
15139 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
15140 (vec_concat:V2HI
15141 (ssse3_plusminus:HI
15142 (vec_select:HI (match_dup 1) (parallel [(const_int 4)]))
15143 (vec_select:HI (match_dup 1) (parallel [(const_int 5)])))
15144 (ssse3_plusminus:HI
15145 (vec_select:HI (match_dup 1) (parallel [(const_int 6)]))
15146 (vec_select:HI (match_dup 1) (parallel [(const_int 7)])))))
15147 (vec_concat:V4HI
15148 (vec_concat:V2HI
15149 (ssse3_plusminus:HI
15150 (vec_select:HI
15151 (match_operand:V8HI 2 "vector_operand" "xBm,xm")
15152 (parallel [(const_int 0)]))
15153 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
15154 (ssse3_plusminus:HI
15155 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
15156 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))
15157 (vec_concat:V2HI
15158 (ssse3_plusminus:HI
15159 (vec_select:HI (match_dup 2) (parallel [(const_int 4)]))
15160 (vec_select:HI (match_dup 2) (parallel [(const_int 5)])))
15161 (ssse3_plusminus:HI
15162 (vec_select:HI (match_dup 2) (parallel [(const_int 6)]))
15163 (vec_select:HI (match_dup 2) (parallel [(const_int 7)])))))))]
15164 "TARGET_SSSE3"
15165 "@
15166 ph<plusminus_mnemonic>w\t{%2, %0|%0, %2}
15167 vph<plusminus_mnemonic>w\t{%2, %1, %0|%0, %1, %2}"
15168 [(set_attr "isa" "noavx,avx")
15169 (set_attr "type" "sseiadd")
15170 (set_attr "atom_unit" "complex")
15171 (set_attr "prefix_data16" "1,*")
15172 (set_attr "prefix_extra" "1")
15173 (set_attr "prefix" "orig,vex")
15174 (set_attr "mode" "TI")])
15175
15176 (define_insn "ssse3_ph<plusminus_mnemonic>wv4hi3"
15177 [(set (match_operand:V4HI 0 "register_operand" "=y")
15178 (vec_concat:V4HI
15179 (vec_concat:V2HI
15180 (ssse3_plusminus:HI
15181 (vec_select:HI
15182 (match_operand:V4HI 1 "register_operand" "0")
15183 (parallel [(const_int 0)]))
15184 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
15185 (ssse3_plusminus:HI
15186 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
15187 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
15188 (vec_concat:V2HI
15189 (ssse3_plusminus:HI
15190 (vec_select:HI
15191 (match_operand:V4HI 2 "nonimmediate_operand" "ym")
15192 (parallel [(const_int 0)]))
15193 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
15194 (ssse3_plusminus:HI
15195 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
15196 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))))]
15197 "TARGET_SSSE3"
15198 "ph<plusminus_mnemonic>w\t{%2, %0|%0, %2}"
15199 [(set_attr "type" "sseiadd")
15200 (set_attr "atom_unit" "complex")
15201 (set_attr "prefix_extra" "1")
15202 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
15203 (set_attr "mode" "DI")])
15204
15205 (define_insn "avx2_ph<plusminus_mnemonic>dv8si3"
15206 [(set (match_operand:V8SI 0 "register_operand" "=x")
15207 (vec_concat:V8SI
15208 (vec_concat:V4SI
15209 (vec_concat:V2SI
15210 (plusminus:SI
15211 (vec_select:SI
15212 (match_operand:V8SI 1 "register_operand" "x")
15213 (parallel [(const_int 0)]))
15214 (vec_select:SI (match_dup 1) (parallel [(const_int 1)])))
15215 (plusminus:SI
15216 (vec_select:SI (match_dup 1) (parallel [(const_int 2)]))
15217 (vec_select:SI (match_dup 1) (parallel [(const_int 3)]))))
15218 (vec_concat:V2SI
15219 (plusminus:SI
15220 (vec_select:SI (match_dup 1) (parallel [(const_int 4)]))
15221 (vec_select:SI (match_dup 1) (parallel [(const_int 5)])))
15222 (plusminus:SI
15223 (vec_select:SI (match_dup 1) (parallel [(const_int 6)]))
15224 (vec_select:SI (match_dup 1) (parallel [(const_int 7)])))))
15225 (vec_concat:V4SI
15226 (vec_concat:V2SI
15227 (plusminus:SI
15228 (vec_select:SI
15229 (match_operand:V8SI 2 "nonimmediate_operand" "xm")
15230 (parallel [(const_int 0)]))
15231 (vec_select:SI (match_dup 2) (parallel [(const_int 1)])))
15232 (plusminus:SI
15233 (vec_select:SI (match_dup 2) (parallel [(const_int 2)]))
15234 (vec_select:SI (match_dup 2) (parallel [(const_int 3)]))))
15235 (vec_concat:V2SI
15236 (plusminus:SI
15237 (vec_select:SI (match_dup 2) (parallel [(const_int 4)]))
15238 (vec_select:SI (match_dup 2) (parallel [(const_int 5)])))
15239 (plusminus:SI
15240 (vec_select:SI (match_dup 2) (parallel [(const_int 6)]))
15241 (vec_select:SI (match_dup 2) (parallel [(const_int 7)])))))))]
15242 "TARGET_AVX2"
15243 "vph<plusminus_mnemonic>d\t{%2, %1, %0|%0, %1, %2}"
15244 [(set_attr "type" "sseiadd")
15245 (set_attr "prefix_extra" "1")
15246 (set_attr "prefix" "vex")
15247 (set_attr "mode" "OI")])
15248
15249 (define_insn "ssse3_ph<plusminus_mnemonic>dv4si3"
15250 [(set (match_operand:V4SI 0 "register_operand" "=x,x")
15251 (vec_concat:V4SI
15252 (vec_concat:V2SI
15253 (plusminus:SI
15254 (vec_select:SI
15255 (match_operand:V4SI 1 "register_operand" "0,x")
15256 (parallel [(const_int 0)]))
15257 (vec_select:SI (match_dup 1) (parallel [(const_int 1)])))
15258 (plusminus:SI
15259 (vec_select:SI (match_dup 1) (parallel [(const_int 2)]))
15260 (vec_select:SI (match_dup 1) (parallel [(const_int 3)]))))
15261 (vec_concat:V2SI
15262 (plusminus:SI
15263 (vec_select:SI
15264 (match_operand:V4SI 2 "vector_operand" "xBm,xm")
15265 (parallel [(const_int 0)]))
15266 (vec_select:SI (match_dup 2) (parallel [(const_int 1)])))
15267 (plusminus:SI
15268 (vec_select:SI (match_dup 2) (parallel [(const_int 2)]))
15269 (vec_select:SI (match_dup 2) (parallel [(const_int 3)]))))))]
15270 "TARGET_SSSE3"
15271 "@
15272 ph<plusminus_mnemonic>d\t{%2, %0|%0, %2}
15273 vph<plusminus_mnemonic>d\t{%2, %1, %0|%0, %1, %2}"
15274 [(set_attr "isa" "noavx,avx")
15275 (set_attr "type" "sseiadd")
15276 (set_attr "atom_unit" "complex")
15277 (set_attr "prefix_data16" "1,*")
15278 (set_attr "prefix_extra" "1")
15279 (set_attr "prefix" "orig,vex")
15280 (set_attr "mode" "TI")])
15281
15282 (define_insn "ssse3_ph<plusminus_mnemonic>dv2si3"
15283 [(set (match_operand:V2SI 0 "register_operand" "=y")
15284 (vec_concat:V2SI
15285 (plusminus:SI
15286 (vec_select:SI
15287 (match_operand:V2SI 1 "register_operand" "0")
15288 (parallel [(const_int 0)]))
15289 (vec_select:SI (match_dup 1) (parallel [(const_int 1)])))
15290 (plusminus:SI
15291 (vec_select:SI
15292 (match_operand:V2SI 2 "nonimmediate_operand" "ym")
15293 (parallel [(const_int 0)]))
15294 (vec_select:SI (match_dup 2) (parallel [(const_int 1)])))))]
15295 "TARGET_SSSE3"
15296 "ph<plusminus_mnemonic>d\t{%2, %0|%0, %2}"
15297 [(set_attr "type" "sseiadd")
15298 (set_attr "atom_unit" "complex")
15299 (set_attr "prefix_extra" "1")
15300 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
15301 (set_attr "mode" "DI")])
15302
15303 (define_insn "avx2_pmaddubsw256"
15304 [(set (match_operand:V16HI 0 "register_operand" "=x,v")
15305 (ss_plus:V16HI
15306 (mult:V16HI
15307 (zero_extend:V16HI
15308 (vec_select:V16QI
15309 (match_operand:V32QI 1 "register_operand" "x,v")
15310 (parallel [(const_int 0) (const_int 2)
15311 (const_int 4) (const_int 6)
15312 (const_int 8) (const_int 10)
15313 (const_int 12) (const_int 14)
15314 (const_int 16) (const_int 18)
15315 (const_int 20) (const_int 22)
15316 (const_int 24) (const_int 26)
15317 (const_int 28) (const_int 30)])))
15318 (sign_extend:V16HI
15319 (vec_select:V16QI
15320 (match_operand:V32QI 2 "nonimmediate_operand" "xm,vm")
15321 (parallel [(const_int 0) (const_int 2)
15322 (const_int 4) (const_int 6)
15323 (const_int 8) (const_int 10)
15324 (const_int 12) (const_int 14)
15325 (const_int 16) (const_int 18)
15326 (const_int 20) (const_int 22)
15327 (const_int 24) (const_int 26)
15328 (const_int 28) (const_int 30)]))))
15329 (mult:V16HI
15330 (zero_extend:V16HI
15331 (vec_select:V16QI (match_dup 1)
15332 (parallel [(const_int 1) (const_int 3)
15333 (const_int 5) (const_int 7)
15334 (const_int 9) (const_int 11)
15335 (const_int 13) (const_int 15)
15336 (const_int 17) (const_int 19)
15337 (const_int 21) (const_int 23)
15338 (const_int 25) (const_int 27)
15339 (const_int 29) (const_int 31)])))
15340 (sign_extend:V16HI
15341 (vec_select:V16QI (match_dup 2)
15342 (parallel [(const_int 1) (const_int 3)
15343 (const_int 5) (const_int 7)
15344 (const_int 9) (const_int 11)
15345 (const_int 13) (const_int 15)
15346 (const_int 17) (const_int 19)
15347 (const_int 21) (const_int 23)
15348 (const_int 25) (const_int 27)
15349 (const_int 29) (const_int 31)]))))))]
15350 "TARGET_AVX2"
15351 "vpmaddubsw\t{%2, %1, %0|%0, %1, %2}"
15352 [(set_attr "isa" "*,avx512bw")
15353 (set_attr "type" "sseiadd")
15354 (set_attr "prefix_extra" "1")
15355 (set_attr "prefix" "vex,evex")
15356 (set_attr "mode" "OI")])
15357
15358 ;; The correct representation for this is absolutely enormous, and
15359 ;; surely not generally useful.
15360 (define_insn "avx512bw_pmaddubsw512<mode><mask_name>"
15361 [(set (match_operand:VI2_AVX512VL 0 "register_operand" "=v")
15362 (unspec:VI2_AVX512VL
15363 [(match_operand:<dbpsadbwmode> 1 "register_operand" "v")
15364 (match_operand:<dbpsadbwmode> 2 "nonimmediate_operand" "vm")]
15365 UNSPEC_PMADDUBSW512))]
15366 "TARGET_AVX512BW"
15367 "vpmaddubsw\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}";
15368 [(set_attr "type" "sseiadd")
15369 (set_attr "prefix" "evex")
15370 (set_attr "mode" "XI")])
15371
15372 (define_insn "avx512bw_umulhrswv32hi3<mask_name>"
15373 [(set (match_operand:V32HI 0 "register_operand" "=v")
15374 (truncate:V32HI
15375 (lshiftrt:V32SI
15376 (plus:V32SI
15377 (lshiftrt:V32SI
15378 (mult:V32SI
15379 (sign_extend:V32SI
15380 (match_operand:V32HI 1 "nonimmediate_operand" "%v"))
15381 (sign_extend:V32SI
15382 (match_operand:V32HI 2 "nonimmediate_operand" "vm")))
15383 (const_int 14))
15384 (const_vector:V32HI [(const_int 1) (const_int 1)
15385 (const_int 1) (const_int 1)
15386 (const_int 1) (const_int 1)
15387 (const_int 1) (const_int 1)
15388 (const_int 1) (const_int 1)
15389 (const_int 1) (const_int 1)
15390 (const_int 1) (const_int 1)
15391 (const_int 1) (const_int 1)
15392 (const_int 1) (const_int 1)
15393 (const_int 1) (const_int 1)
15394 (const_int 1) (const_int 1)
15395 (const_int 1) (const_int 1)
15396 (const_int 1) (const_int 1)
15397 (const_int 1) (const_int 1)
15398 (const_int 1) (const_int 1)
15399 (const_int 1) (const_int 1)]))
15400 (const_int 1))))]
15401 "TARGET_AVX512BW"
15402 "vpmulhrsw\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
15403 [(set_attr "type" "sseimul")
15404 (set_attr "prefix" "evex")
15405 (set_attr "mode" "XI")])
15406
15407 (define_insn "ssse3_pmaddubsw128"
15408 [(set (match_operand:V8HI 0 "register_operand" "=x,x,v")
15409 (ss_plus:V8HI
15410 (mult:V8HI
15411 (zero_extend:V8HI
15412 (vec_select:V8QI
15413 (match_operand:V16QI 1 "register_operand" "0,x,v")
15414 (parallel [(const_int 0) (const_int 2)
15415 (const_int 4) (const_int 6)
15416 (const_int 8) (const_int 10)
15417 (const_int 12) (const_int 14)])))
15418 (sign_extend:V8HI
15419 (vec_select:V8QI
15420 (match_operand:V16QI 2 "vector_operand" "xBm,xm,vm")
15421 (parallel [(const_int 0) (const_int 2)
15422 (const_int 4) (const_int 6)
15423 (const_int 8) (const_int 10)
15424 (const_int 12) (const_int 14)]))))
15425 (mult:V8HI
15426 (zero_extend:V8HI
15427 (vec_select:V8QI (match_dup 1)
15428 (parallel [(const_int 1) (const_int 3)
15429 (const_int 5) (const_int 7)
15430 (const_int 9) (const_int 11)
15431 (const_int 13) (const_int 15)])))
15432 (sign_extend:V8HI
15433 (vec_select:V8QI (match_dup 2)
15434 (parallel [(const_int 1) (const_int 3)
15435 (const_int 5) (const_int 7)
15436 (const_int 9) (const_int 11)
15437 (const_int 13) (const_int 15)]))))))]
15438 "TARGET_SSSE3"
15439 "@
15440 pmaddubsw\t{%2, %0|%0, %2}
15441 vpmaddubsw\t{%2, %1, %0|%0, %1, %2}
15442 vpmaddubsw\t{%2, %1, %0|%0, %1, %2}"
15443 [(set_attr "isa" "noavx,avx,avx512bw")
15444 (set_attr "type" "sseiadd")
15445 (set_attr "atom_unit" "simul")
15446 (set_attr "prefix_data16" "1,*,*")
15447 (set_attr "prefix_extra" "1")
15448 (set_attr "prefix" "orig,vex,evex")
15449 (set_attr "mode" "TI")])
15450
15451 (define_insn "ssse3_pmaddubsw"
15452 [(set (match_operand:V4HI 0 "register_operand" "=y")
15453 (ss_plus:V4HI
15454 (mult:V4HI
15455 (zero_extend:V4HI
15456 (vec_select:V4QI
15457 (match_operand:V8QI 1 "register_operand" "0")
15458 (parallel [(const_int 0) (const_int 2)
15459 (const_int 4) (const_int 6)])))
15460 (sign_extend:V4HI
15461 (vec_select:V4QI
15462 (match_operand:V8QI 2 "nonimmediate_operand" "ym")
15463 (parallel [(const_int 0) (const_int 2)
15464 (const_int 4) (const_int 6)]))))
15465 (mult:V4HI
15466 (zero_extend:V4HI
15467 (vec_select:V4QI (match_dup 1)
15468 (parallel [(const_int 1) (const_int 3)
15469 (const_int 5) (const_int 7)])))
15470 (sign_extend:V4HI
15471 (vec_select:V4QI (match_dup 2)
15472 (parallel [(const_int 1) (const_int 3)
15473 (const_int 5) (const_int 7)]))))))]
15474 "TARGET_SSSE3"
15475 "pmaddubsw\t{%2, %0|%0, %2}"
15476 [(set_attr "type" "sseiadd")
15477 (set_attr "atom_unit" "simul")
15478 (set_attr "prefix_extra" "1")
15479 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
15480 (set_attr "mode" "DI")])
15481
15482 (define_mode_iterator PMULHRSW
15483 [V4HI V8HI (V16HI "TARGET_AVX2")])
15484
15485 (define_expand "<ssse3_avx2>_pmulhrsw<mode>3_mask"
15486 [(set (match_operand:PMULHRSW 0 "register_operand")
15487 (vec_merge:PMULHRSW
15488 (truncate:PMULHRSW
15489 (lshiftrt:<ssedoublemode>
15490 (plus:<ssedoublemode>
15491 (lshiftrt:<ssedoublemode>
15492 (mult:<ssedoublemode>
15493 (sign_extend:<ssedoublemode>
15494 (match_operand:PMULHRSW 1 "nonimmediate_operand"))
15495 (sign_extend:<ssedoublemode>
15496 (match_operand:PMULHRSW 2 "nonimmediate_operand")))
15497 (const_int 14))
15498 (match_dup 5))
15499 (const_int 1)))
15500 (match_operand:PMULHRSW 3 "register_operand")
15501 (match_operand:<avx512fmaskmode> 4 "register_operand")))]
15502 "TARGET_AVX512BW && TARGET_AVX512VL"
15503 {
15504 operands[5] = CONST1_RTX(<MODE>mode);
15505 ix86_fixup_binary_operands_no_copy (MULT, <MODE>mode, operands);
15506 })
15507
15508 (define_expand "<ssse3_avx2>_pmulhrsw<mode>3"
15509 [(set (match_operand:PMULHRSW 0 "register_operand")
15510 (truncate:PMULHRSW
15511 (lshiftrt:<ssedoublemode>
15512 (plus:<ssedoublemode>
15513 (lshiftrt:<ssedoublemode>
15514 (mult:<ssedoublemode>
15515 (sign_extend:<ssedoublemode>
15516 (match_operand:PMULHRSW 1 "nonimmediate_operand"))
15517 (sign_extend:<ssedoublemode>
15518 (match_operand:PMULHRSW 2 "nonimmediate_operand")))
15519 (const_int 14))
15520 (match_dup 3))
15521 (const_int 1))))]
15522 "TARGET_AVX2"
15523 {
15524 operands[3] = CONST1_RTX(<MODE>mode);
15525 ix86_fixup_binary_operands_no_copy (MULT, <MODE>mode, operands);
15526 })
15527
15528 (define_insn "*<ssse3_avx2>_pmulhrsw<mode>3<mask_name>"
15529 [(set (match_operand:VI2_AVX2 0 "register_operand" "=x,x,v")
15530 (truncate:VI2_AVX2
15531 (lshiftrt:<ssedoublemode>
15532 (plus:<ssedoublemode>
15533 (lshiftrt:<ssedoublemode>
15534 (mult:<ssedoublemode>
15535 (sign_extend:<ssedoublemode>
15536 (match_operand:VI2_AVX2 1 "vector_operand" "%0,x,v"))
15537 (sign_extend:<ssedoublemode>
15538 (match_operand:VI2_AVX2 2 "vector_operand" "xBm,xm,vm")))
15539 (const_int 14))
15540 (match_operand:VI2_AVX2 3 "const1_operand"))
15541 (const_int 1))))]
15542 "TARGET_SSSE3 && <mask_mode512bit_condition> && <mask_avx512bw_condition>
15543 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
15544 "@
15545 pmulhrsw\t{%2, %0|%0, %2}
15546 vpmulhrsw\t{%2, %1, %0<mask_operand4>|%0<mask_operand4>, %1, %2}
15547 vpmulhrsw\t{%2, %1, %0<mask_operand4>|%0<mask_operand4>, %1, %2}"
15548 [(set_attr "isa" "noavx,avx,avx512bw")
15549 (set_attr "type" "sseimul")
15550 (set_attr "prefix_data16" "1,*,*")
15551 (set_attr "prefix_extra" "1")
15552 (set_attr "prefix" "orig,maybe_evex,evex")
15553 (set_attr "mode" "<sseinsnmode>")])
15554
15555 (define_insn "*ssse3_pmulhrswv4hi3"
15556 [(set (match_operand:V4HI 0 "register_operand" "=y")
15557 (truncate:V4HI
15558 (lshiftrt:V4SI
15559 (plus:V4SI
15560 (lshiftrt:V4SI
15561 (mult:V4SI
15562 (sign_extend:V4SI
15563 (match_operand:V4HI 1 "nonimmediate_operand" "%0"))
15564 (sign_extend:V4SI
15565 (match_operand:V4HI 2 "nonimmediate_operand" "ym")))
15566 (const_int 14))
15567 (match_operand:V4HI 3 "const1_operand"))
15568 (const_int 1))))]
15569 "TARGET_SSSE3 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
15570 "pmulhrsw\t{%2, %0|%0, %2}"
15571 [(set_attr "type" "sseimul")
15572 (set_attr "prefix_extra" "1")
15573 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
15574 (set_attr "mode" "DI")])
15575
15576 (define_insn "<ssse3_avx2>_pshufb<mode>3<mask_name>"
15577 [(set (match_operand:VI1_AVX512 0 "register_operand" "=x,x,v")
15578 (unspec:VI1_AVX512
15579 [(match_operand:VI1_AVX512 1 "register_operand" "0,x,v")
15580 (match_operand:VI1_AVX512 2 "vector_operand" "xBm,xm,vm")]
15581 UNSPEC_PSHUFB))]
15582 "TARGET_SSSE3 && <mask_mode512bit_condition> && <mask_avx512bw_condition>"
15583 "@
15584 pshufb\t{%2, %0|%0, %2}
15585 vpshufb\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}
15586 vpshufb\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
15587 [(set_attr "isa" "noavx,avx,avx512bw")
15588 (set_attr "type" "sselog1")
15589 (set_attr "prefix_data16" "1,*,*")
15590 (set_attr "prefix_extra" "1")
15591 (set_attr "prefix" "orig,maybe_evex,evex")
15592 (set_attr "btver2_decode" "vector")
15593 (set_attr "mode" "<sseinsnmode>")])
15594
15595 (define_insn "ssse3_pshufbv8qi3"
15596 [(set (match_operand:V8QI 0 "register_operand" "=y")
15597 (unspec:V8QI [(match_operand:V8QI 1 "register_operand" "0")
15598 (match_operand:V8QI 2 "nonimmediate_operand" "ym")]
15599 UNSPEC_PSHUFB))]
15600 "TARGET_SSSE3"
15601 "pshufb\t{%2, %0|%0, %2}";
15602 [(set_attr "type" "sselog1")
15603 (set_attr "prefix_extra" "1")
15604 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
15605 (set_attr "mode" "DI")])
15606
15607 (define_insn "<ssse3_avx2>_psign<mode>3"
15608 [(set (match_operand:VI124_AVX2 0 "register_operand" "=x,x")
15609 (unspec:VI124_AVX2
15610 [(match_operand:VI124_AVX2 1 "register_operand" "0,x")
15611 (match_operand:VI124_AVX2 2 "vector_operand" "xBm,xm")]
15612 UNSPEC_PSIGN))]
15613 "TARGET_SSSE3"
15614 "@
15615 psign<ssemodesuffix>\t{%2, %0|%0, %2}
15616 vpsign<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
15617 [(set_attr "isa" "noavx,avx")
15618 (set_attr "type" "sselog1")
15619 (set_attr "prefix_data16" "1,*")
15620 (set_attr "prefix_extra" "1")
15621 (set_attr "prefix" "orig,vex")
15622 (set_attr "mode" "<sseinsnmode>")])
15623
15624 (define_insn "ssse3_psign<mode>3"
15625 [(set (match_operand:MMXMODEI 0 "register_operand" "=y")
15626 (unspec:MMXMODEI
15627 [(match_operand:MMXMODEI 1 "register_operand" "0")
15628 (match_operand:MMXMODEI 2 "nonimmediate_operand" "ym")]
15629 UNSPEC_PSIGN))]
15630 "TARGET_SSSE3"
15631 "psign<mmxvecsize>\t{%2, %0|%0, %2}";
15632 [(set_attr "type" "sselog1")
15633 (set_attr "prefix_extra" "1")
15634 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
15635 (set_attr "mode" "DI")])
15636
15637 (define_insn "<ssse3_avx2>_palignr<mode>_mask"
15638 [(set (match_operand:VI1_AVX512 0 "register_operand" "=v")
15639 (vec_merge:VI1_AVX512
15640 (unspec:VI1_AVX512
15641 [(match_operand:VI1_AVX512 1 "register_operand" "v")
15642 (match_operand:VI1_AVX512 2 "nonimmediate_operand" "vm")
15643 (match_operand:SI 3 "const_0_to_255_mul_8_operand" "n")]
15644 UNSPEC_PALIGNR)
15645 (match_operand:VI1_AVX512 4 "nonimm_or_0_operand" "0C")
15646 (match_operand:<avx512fmaskmode> 5 "register_operand" "Yk")))]
15647 "TARGET_AVX512BW && (<MODE_SIZE> == 64 || TARGET_AVX512VL)"
15648 {
15649 operands[3] = GEN_INT (INTVAL (operands[3]) / 8);
15650 return "vpalignr\t{%3, %2, %1, %0%{%5%}%N4|%0%{%5%}%N4, %1, %2, %3}";
15651 }
15652 [(set_attr "type" "sseishft")
15653 (set_attr "atom_unit" "sishuf")
15654 (set_attr "prefix_extra" "1")
15655 (set_attr "length_immediate" "1")
15656 (set_attr "prefix" "evex")
15657 (set_attr "mode" "<sseinsnmode>")])
15658
15659 (define_insn "<ssse3_avx2>_palignr<mode>"
15660 [(set (match_operand:SSESCALARMODE 0 "register_operand" "=x,x,v")
15661 (unspec:SSESCALARMODE
15662 [(match_operand:SSESCALARMODE 1 "register_operand" "0,x,v")
15663 (match_operand:SSESCALARMODE 2 "vector_operand" "xBm,xm,vm")
15664 (match_operand:SI 3 "const_0_to_255_mul_8_operand" "n,n,n")]
15665 UNSPEC_PALIGNR))]
15666 "TARGET_SSSE3"
15667 {
15668 operands[3] = GEN_INT (INTVAL (operands[3]) / 8);
15669
15670 switch (which_alternative)
15671 {
15672 case 0:
15673 return "palignr\t{%3, %2, %0|%0, %2, %3}";
15674 case 1:
15675 case 2:
15676 return "vpalignr\t{%3, %2, %1, %0|%0, %1, %2, %3}";
15677 default:
15678 gcc_unreachable ();
15679 }
15680 }
15681 [(set_attr "isa" "noavx,avx,avx512bw")
15682 (set_attr "type" "sseishft")
15683 (set_attr "atom_unit" "sishuf")
15684 (set_attr "prefix_data16" "1,*,*")
15685 (set_attr "prefix_extra" "1")
15686 (set_attr "length_immediate" "1")
15687 (set_attr "prefix" "orig,vex,evex")
15688 (set_attr "mode" "<sseinsnmode>")])
15689
15690 (define_insn "ssse3_palignrdi"
15691 [(set (match_operand:DI 0 "register_operand" "=y")
15692 (unspec:DI [(match_operand:DI 1 "register_operand" "0")
15693 (match_operand:DI 2 "nonimmediate_operand" "ym")
15694 (match_operand:SI 3 "const_0_to_255_mul_8_operand" "n")]
15695 UNSPEC_PALIGNR))]
15696 "TARGET_SSSE3"
15697 {
15698 operands[3] = GEN_INT (INTVAL (operands[3]) / 8);
15699 return "palignr\t{%3, %2, %0|%0, %2, %3}";
15700 }
15701 [(set_attr "type" "sseishft")
15702 (set_attr "atom_unit" "sishuf")
15703 (set_attr "prefix_extra" "1")
15704 (set_attr "length_immediate" "1")
15705 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
15706 (set_attr "mode" "DI")])
15707
15708 ;; Mode iterator to handle singularity w/ absence of V2DI and V4DI
15709 ;; modes for abs instruction on pre AVX-512 targets.
15710 (define_mode_iterator VI1248_AVX512VL_AVX512BW
15711 [(V64QI "TARGET_AVX512BW") (V32QI "TARGET_AVX2") V16QI
15712 (V32HI "TARGET_AVX512BW") (V16HI "TARGET_AVX2") V8HI
15713 (V16SI "TARGET_AVX512F") (V8SI "TARGET_AVX2") V4SI
15714 (V8DI "TARGET_AVX512F") (V4DI "TARGET_AVX512VL") (V2DI "TARGET_AVX512VL")])
15715
15716 (define_insn "*abs<mode>2"
15717 [(set (match_operand:VI1248_AVX512VL_AVX512BW 0 "register_operand" "=v")
15718 (abs:VI1248_AVX512VL_AVX512BW
15719 (match_operand:VI1248_AVX512VL_AVX512BW 1 "vector_operand" "vBm")))]
15720 "TARGET_SSSE3"
15721 "%vpabs<ssemodesuffix>\t{%1, %0|%0, %1}"
15722 [(set_attr "type" "sselog1")
15723 (set_attr "prefix_data16" "1")
15724 (set_attr "prefix_extra" "1")
15725 (set_attr "prefix" "maybe_vex")
15726 (set_attr "mode" "<sseinsnmode>")])
15727
15728 (define_insn "abs<mode>2_mask"
15729 [(set (match_operand:VI48_AVX512VL 0 "register_operand" "=v")
15730 (vec_merge:VI48_AVX512VL
15731 (abs:VI48_AVX512VL
15732 (match_operand:VI48_AVX512VL 1 "nonimmediate_operand" "vm"))
15733 (match_operand:VI48_AVX512VL 2 "nonimm_or_0_operand" "0C")
15734 (match_operand:<avx512fmaskmode> 3 "register_operand" "Yk")))]
15735 "TARGET_AVX512F"
15736 "vpabs<ssemodesuffix>\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}"
15737 [(set_attr "type" "sselog1")
15738 (set_attr "prefix" "evex")
15739 (set_attr "mode" "<sseinsnmode>")])
15740
15741 (define_insn "abs<mode>2_mask"
15742 [(set (match_operand:VI12_AVX512VL 0 "register_operand" "=v")
15743 (vec_merge:VI12_AVX512VL
15744 (abs:VI12_AVX512VL
15745 (match_operand:VI12_AVX512VL 1 "nonimmediate_operand" "vm"))
15746 (match_operand:VI12_AVX512VL 2 "nonimm_or_0_operand" "0C")
15747 (match_operand:<avx512fmaskmode> 3 "register_operand" "Yk")))]
15748 "TARGET_AVX512BW"
15749 "vpabs<ssemodesuffix>\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}"
15750 [(set_attr "type" "sselog1")
15751 (set_attr "prefix" "evex")
15752 (set_attr "mode" "<sseinsnmode>")])
15753
15754 (define_expand "abs<mode>2"
15755 [(set (match_operand:VI_AVX2 0 "register_operand")
15756 (abs:VI_AVX2
15757 (match_operand:VI_AVX2 1 "vector_operand")))]
15758 "TARGET_SSE2"
15759 {
15760 if (!TARGET_SSSE3
15761 || ((<MODE>mode == V2DImode || <MODE>mode == V4DImode)
15762 && !TARGET_AVX512VL))
15763 {
15764 ix86_expand_sse2_abs (operands[0], operands[1]);
15765 DONE;
15766 }
15767 })
15768
15769 (define_insn "abs<mode>2"
15770 [(set (match_operand:MMXMODEI 0 "register_operand" "=y")
15771 (abs:MMXMODEI
15772 (match_operand:MMXMODEI 1 "nonimmediate_operand" "ym")))]
15773 "TARGET_SSSE3"
15774 "pabs<mmxvecsize>\t{%1, %0|%0, %1}";
15775 [(set_attr "type" "sselog1")
15776 (set_attr "prefix_rep" "0")
15777 (set_attr "prefix_extra" "1")
15778 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
15779 (set_attr "mode" "DI")])
15780
15781 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
15782 ;;
15783 ;; AMD SSE4A instructions
15784 ;;
15785 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
15786
15787 (define_insn "sse4a_movnt<mode>"
15788 [(set (match_operand:MODEF 0 "memory_operand" "=m")
15789 (unspec:MODEF
15790 [(match_operand:MODEF 1 "register_operand" "x")]
15791 UNSPEC_MOVNT))]
15792 "TARGET_SSE4A"
15793 "movnt<ssemodesuffix>\t{%1, %0|%0, %1}"
15794 [(set_attr "type" "ssemov")
15795 (set_attr "mode" "<MODE>")])
15796
15797 (define_insn "sse4a_vmmovnt<mode>"
15798 [(set (match_operand:<ssescalarmode> 0 "memory_operand" "=m")
15799 (unspec:<ssescalarmode>
15800 [(vec_select:<ssescalarmode>
15801 (match_operand:VF_128 1 "register_operand" "x")
15802 (parallel [(const_int 0)]))]
15803 UNSPEC_MOVNT))]
15804 "TARGET_SSE4A"
15805 "movnt<ssescalarmodesuffix>\t{%1, %0|%0, %1}"
15806 [(set_attr "type" "ssemov")
15807 (set_attr "mode" "<ssescalarmode>")])
15808
15809 (define_insn "sse4a_extrqi"
15810 [(set (match_operand:V2DI 0 "register_operand" "=x")
15811 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
15812 (match_operand 2 "const_0_to_255_operand")
15813 (match_operand 3 "const_0_to_255_operand")]
15814 UNSPEC_EXTRQI))]
15815 "TARGET_SSE4A"
15816 "extrq\t{%3, %2, %0|%0, %2, %3}"
15817 [(set_attr "type" "sse")
15818 (set_attr "prefix_data16" "1")
15819 (set_attr "length_immediate" "2")
15820 (set_attr "mode" "TI")])
15821
15822 (define_insn "sse4a_extrq"
15823 [(set (match_operand:V2DI 0 "register_operand" "=x")
15824 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
15825 (match_operand:V16QI 2 "register_operand" "x")]
15826 UNSPEC_EXTRQ))]
15827 "TARGET_SSE4A"
15828 "extrq\t{%2, %0|%0, %2}"
15829 [(set_attr "type" "sse")
15830 (set_attr "prefix_data16" "1")
15831 (set_attr "mode" "TI")])
15832
15833 (define_insn "sse4a_insertqi"
15834 [(set (match_operand:V2DI 0 "register_operand" "=x")
15835 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
15836 (match_operand:V2DI 2 "register_operand" "x")
15837 (match_operand 3 "const_0_to_255_operand")
15838 (match_operand 4 "const_0_to_255_operand")]
15839 UNSPEC_INSERTQI))]
15840 "TARGET_SSE4A"
15841 "insertq\t{%4, %3, %2, %0|%0, %2, %3, %4}"
15842 [(set_attr "type" "sseins")
15843 (set_attr "prefix_data16" "0")
15844 (set_attr "prefix_rep" "1")
15845 (set_attr "length_immediate" "2")
15846 (set_attr "mode" "TI")])
15847
15848 (define_insn "sse4a_insertq"
15849 [(set (match_operand:V2DI 0 "register_operand" "=x")
15850 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
15851 (match_operand:V2DI 2 "register_operand" "x")]
15852 UNSPEC_INSERTQ))]
15853 "TARGET_SSE4A"
15854 "insertq\t{%2, %0|%0, %2}"
15855 [(set_attr "type" "sseins")
15856 (set_attr "prefix_data16" "0")
15857 (set_attr "prefix_rep" "1")
15858 (set_attr "mode" "TI")])
15859
15860 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
15861 ;;
15862 ;; Intel SSE4.1 instructions
15863 ;;
15864 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
15865
15866 ;; Mapping of immediate bits for blend instructions
15867 (define_mode_attr blendbits
15868 [(V8SF "255") (V4SF "15") (V4DF "15") (V2DF "3")])
15869
15870 (define_insn "<sse4_1>_blend<ssemodesuffix><avxsizesuffix>"
15871 [(set (match_operand:VF_128_256 0 "register_operand" "=Yr,*x,x")
15872 (vec_merge:VF_128_256
15873 (match_operand:VF_128_256 2 "vector_operand" "YrBm,*xBm,xm")
15874 (match_operand:VF_128_256 1 "register_operand" "0,0,x")
15875 (match_operand:SI 3 "const_0_to_<blendbits>_operand")))]
15876 "TARGET_SSE4_1"
15877 "@
15878 blend<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
15879 blend<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
15880 vblend<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
15881 [(set_attr "isa" "noavx,noavx,avx")
15882 (set_attr "type" "ssemov")
15883 (set_attr "length_immediate" "1")
15884 (set_attr "prefix_data16" "1,1,*")
15885 (set_attr "prefix_extra" "1")
15886 (set_attr "prefix" "orig,orig,vex")
15887 (set_attr "mode" "<MODE>")])
15888
15889 (define_insn "<sse4_1>_blendv<ssemodesuffix><avxsizesuffix>"
15890 [(set (match_operand:VF_128_256 0 "register_operand" "=Yr,*x,x")
15891 (unspec:VF_128_256
15892 [(match_operand:VF_128_256 1 "register_operand" "0,0,x")
15893 (match_operand:VF_128_256 2 "vector_operand" "YrBm,*xBm,xm")
15894 (match_operand:VF_128_256 3 "register_operand" "Yz,Yz,x")]
15895 UNSPEC_BLENDV))]
15896 "TARGET_SSE4_1"
15897 "@
15898 blendv<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
15899 blendv<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
15900 vblendv<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
15901 [(set_attr "isa" "noavx,noavx,avx")
15902 (set_attr "type" "ssemov")
15903 (set_attr "length_immediate" "1")
15904 (set_attr "prefix_data16" "1,1,*")
15905 (set_attr "prefix_extra" "1")
15906 (set_attr "prefix" "orig,orig,vex")
15907 (set_attr "btver2_decode" "vector,vector,vector")
15908 (set_attr "mode" "<MODE>")])
15909
15910 ;; Also define scalar versions. These are used for conditional move.
15911 ;; Using subregs into vector modes causes register allocation lossage.
15912 ;; These patterns do not allow memory operands because the native
15913 ;; instructions read the full 128-bits.
15914
15915 (define_insn "sse4_1_blendv<ssemodesuffix>"
15916 [(set (match_operand:MODEF 0 "register_operand" "=Yr,*x,x")
15917 (unspec:MODEF
15918 [(match_operand:MODEF 1 "register_operand" "0,0,x")
15919 (match_operand:MODEF 2 "register_operand" "Yr,*x,x")
15920 (match_operand:MODEF 3 "register_operand" "Yz,Yz,x")]
15921 UNSPEC_BLENDV))]
15922 "TARGET_SSE4_1"
15923 {
15924 if (get_attr_mode (insn) == MODE_V4SF)
15925 return (which_alternative == 2
15926 ? "vblendvps\t{%3, %2, %1, %0|%0, %1, %2, %3}"
15927 : "blendvps\t{%3, %2, %0|%0, %2, %3}");
15928 else
15929 return (which_alternative == 2
15930 ? "vblendv<ssevecmodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
15931 : "blendv<ssevecmodesuffix>\t{%3, %2, %0|%0, %2, %3}");
15932 }
15933 [(set_attr "isa" "noavx,noavx,avx")
15934 (set_attr "type" "ssemov")
15935 (set_attr "length_immediate" "1")
15936 (set_attr "prefix_data16" "1,1,*")
15937 (set_attr "prefix_extra" "1")
15938 (set_attr "prefix" "orig,orig,vex")
15939 (set_attr "btver2_decode" "vector,vector,vector")
15940 (set (attr "mode")
15941 (cond [(match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL")
15942 (const_string "V4SF")
15943 (match_test "TARGET_AVX")
15944 (const_string "<ssevecmode>")
15945 (match_test "optimize_function_for_size_p (cfun)")
15946 (const_string "V4SF")
15947 ]
15948 (const_string "<ssevecmode>")))])
15949
15950 (define_insn_and_split "*<sse4_1>_blendv<ssemodesuffix><avxsizesuffix>_lt"
15951 [(set (match_operand:VF_128_256 0 "register_operand" "=Yr,*x,x")
15952 (unspec:VF_128_256
15953 [(match_operand:VF_128_256 1 "register_operand" "0,0,x")
15954 (match_operand:VF_128_256 2 "vector_operand" "YrBm,*xBm,xm")
15955 (subreg:VF_128_256
15956 (lt:<sseintvecmode>
15957 (match_operand:<sseintvecmode> 3 "register_operand" "Yz,Yz,x")
15958 (match_operand:<sseintvecmode> 4 "const0_operand" "C,C,C")) 0)]
15959 UNSPEC_BLENDV))]
15960 "TARGET_SSE4_1"
15961 "#"
15962 "&& reload_completed"
15963 [(set (match_dup 0)
15964 (unspec:VF_128_256
15965 [(match_dup 1) (match_dup 2) (match_dup 3)] UNSPEC_BLENDV))]
15966 "operands[3] = gen_lowpart (<MODE>mode, operands[3]);"
15967 [(set_attr "isa" "noavx,noavx,avx")
15968 (set_attr "type" "ssemov")
15969 (set_attr "length_immediate" "1")
15970 (set_attr "prefix_data16" "1,1,*")
15971 (set_attr "prefix_extra" "1")
15972 (set_attr "prefix" "orig,orig,vex")
15973 (set_attr "btver2_decode" "vector,vector,vector")
15974 (set_attr "mode" "<MODE>")])
15975
15976 (define_mode_attr ssefltmodesuffix
15977 [(V2DI "pd") (V4DI "pd") (V4SI "ps") (V8SI "ps")])
15978
15979 (define_mode_attr ssefltvecmode
15980 [(V2DI "V2DF") (V4DI "V4DF") (V4SI "V4SF") (V8SI "V8SF")])
15981
15982 (define_insn_and_split "*<sse4_1>_blendv<ssefltmodesuffix><avxsizesuffix>_ltint"
15983 [(set (match_operand:<ssebytemode> 0 "register_operand" "=Yr,*x,x")
15984 (unspec:<ssebytemode>
15985 [(match_operand:<ssebytemode> 1 "register_operand" "0,0,x")
15986 (match_operand:<ssebytemode> 2 "vector_operand" "YrBm,*xBm,xm")
15987 (subreg:<ssebytemode>
15988 (lt:VI48_AVX
15989 (match_operand:VI48_AVX 3 "register_operand" "Yz,Yz,x")
15990 (match_operand:VI48_AVX 4 "const0_operand" "C,C,C")) 0)]
15991 UNSPEC_BLENDV))]
15992 "TARGET_SSE4_1"
15993 "#"
15994 "&& reload_completed"
15995 [(set (match_dup 0)
15996 (unspec:<ssefltvecmode>
15997 [(match_dup 1) (match_dup 2) (match_dup 3)] UNSPEC_BLENDV))]
15998 {
15999 operands[0] = gen_lowpart (<ssefltvecmode>mode, operands[0]);
16000 operands[1] = gen_lowpart (<ssefltvecmode>mode, operands[1]);
16001 operands[2] = gen_lowpart (<ssefltvecmode>mode, operands[2]);
16002 operands[3] = gen_lowpart (<ssefltvecmode>mode, operands[3]);
16003 }
16004 [(set_attr "isa" "noavx,noavx,avx")
16005 (set_attr "type" "ssemov")
16006 (set_attr "length_immediate" "1")
16007 (set_attr "prefix_data16" "1,1,*")
16008 (set_attr "prefix_extra" "1")
16009 (set_attr "prefix" "orig,orig,vex")
16010 (set_attr "btver2_decode" "vector,vector,vector")
16011 (set_attr "mode" "<ssefltvecmode>")])
16012
16013 (define_insn "<sse4_1>_dp<ssemodesuffix><avxsizesuffix>"
16014 [(set (match_operand:VF_128_256 0 "register_operand" "=Yr,*x,x")
16015 (unspec:VF_128_256
16016 [(match_operand:VF_128_256 1 "vector_operand" "%0,0,x")
16017 (match_operand:VF_128_256 2 "vector_operand" "YrBm,*xBm,xm")
16018 (match_operand:SI 3 "const_0_to_255_operand" "n,n,n")]
16019 UNSPEC_DP))]
16020 "TARGET_SSE4_1"
16021 "@
16022 dp<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
16023 dp<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
16024 vdp<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
16025 [(set_attr "isa" "noavx,noavx,avx")
16026 (set_attr "type" "ssemul")
16027 (set_attr "length_immediate" "1")
16028 (set_attr "prefix_data16" "1,1,*")
16029 (set_attr "prefix_extra" "1")
16030 (set_attr "prefix" "orig,orig,vex")
16031 (set_attr "btver2_decode" "vector,vector,vector")
16032 (set_attr "znver1_decode" "vector,vector,vector")
16033 (set_attr "mode" "<MODE>")])
16034
16035 ;; Mode attribute used by `vmovntdqa' pattern
16036 (define_mode_attr vi8_sse4_1_avx2_avx512
16037 [(V2DI "sse4_1") (V4DI "avx2") (V8DI "avx512f")])
16038
16039 (define_insn "<vi8_sse4_1_avx2_avx512>_movntdqa"
16040 [(set (match_operand:VI8_AVX2_AVX512F 0 "register_operand" "=Yr,*x,v")
16041 (unspec:VI8_AVX2_AVX512F [(match_operand:VI8_AVX2_AVX512F 1 "memory_operand" "m,m,m")]
16042 UNSPEC_MOVNTDQA))]
16043 "TARGET_SSE4_1"
16044 "%vmovntdqa\t{%1, %0|%0, %1}"
16045 [(set_attr "isa" "noavx,noavx,avx")
16046 (set_attr "type" "ssemov")
16047 (set_attr "prefix_extra" "1,1,*")
16048 (set_attr "prefix" "orig,orig,maybe_evex")
16049 (set_attr "mode" "<sseinsnmode>")])
16050
16051 (define_insn "<sse4_1_avx2>_mpsadbw"
16052 [(set (match_operand:VI1_AVX2 0 "register_operand" "=Yr,*x,x")
16053 (unspec:VI1_AVX2
16054 [(match_operand:VI1_AVX2 1 "register_operand" "0,0,x")
16055 (match_operand:VI1_AVX2 2 "vector_operand" "YrBm,*xBm,xm")
16056 (match_operand:SI 3 "const_0_to_255_operand" "n,n,n")]
16057 UNSPEC_MPSADBW))]
16058 "TARGET_SSE4_1"
16059 "@
16060 mpsadbw\t{%3, %2, %0|%0, %2, %3}
16061 mpsadbw\t{%3, %2, %0|%0, %2, %3}
16062 vmpsadbw\t{%3, %2, %1, %0|%0, %1, %2, %3}"
16063 [(set_attr "isa" "noavx,noavx,avx")
16064 (set_attr "type" "sselog1")
16065 (set_attr "length_immediate" "1")
16066 (set_attr "prefix_extra" "1")
16067 (set_attr "prefix" "orig,orig,vex")
16068 (set_attr "btver2_decode" "vector,vector,vector")
16069 (set_attr "znver1_decode" "vector,vector,vector")
16070 (set_attr "mode" "<sseinsnmode>")])
16071
16072 (define_insn "<sse4_1_avx2>_packusdw<mask_name>"
16073 [(set (match_operand:VI2_AVX2 0 "register_operand" "=Yr,*x,x,v")
16074 (vec_concat:VI2_AVX2
16075 (us_truncate:<ssehalfvecmode>
16076 (match_operand:<sseunpackmode> 1 "register_operand" "0,0,x,v"))
16077 (us_truncate:<ssehalfvecmode>
16078 (match_operand:<sseunpackmode> 2 "vector_operand" "YrBm,*xBm,xm,vm"))))]
16079 "TARGET_SSE4_1 && <mask_mode512bit_condition> && <mask_avx512bw_condition>"
16080 "@
16081 packusdw\t{%2, %0|%0, %2}
16082 packusdw\t{%2, %0|%0, %2}
16083 vpackusdw\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}
16084 vpackusdw\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
16085 [(set_attr "isa" "noavx,noavx,avx,avx512bw")
16086 (set_attr "type" "sselog")
16087 (set_attr "prefix_extra" "1")
16088 (set_attr "prefix" "orig,orig,<mask_prefix>,evex")
16089 (set_attr "mode" "<sseinsnmode>")])
16090
16091 (define_insn "<sse4_1_avx2>_pblendvb"
16092 [(set (match_operand:VI1_AVX2 0 "register_operand" "=Yr,*x,x")
16093 (unspec:VI1_AVX2
16094 [(match_operand:VI1_AVX2 1 "register_operand" "0,0,x")
16095 (match_operand:VI1_AVX2 2 "vector_operand" "YrBm,*xBm,xm")
16096 (match_operand:VI1_AVX2 3 "register_operand" "Yz,Yz,x")]
16097 UNSPEC_BLENDV))]
16098 "TARGET_SSE4_1"
16099 "@
16100 pblendvb\t{%3, %2, %0|%0, %2, %3}
16101 pblendvb\t{%3, %2, %0|%0, %2, %3}
16102 vpblendvb\t{%3, %2, %1, %0|%0, %1, %2, %3}"
16103 [(set_attr "isa" "noavx,noavx,avx")
16104 (set_attr "type" "ssemov")
16105 (set_attr "prefix_extra" "1")
16106 (set_attr "length_immediate" "*,*,1")
16107 (set_attr "prefix" "orig,orig,vex")
16108 (set_attr "btver2_decode" "vector,vector,vector")
16109 (set_attr "mode" "<sseinsnmode>")])
16110
16111 (define_insn_and_split "*<sse4_1_avx2>_pblendvb_lt"
16112 [(set (match_operand:VI1_AVX2 0 "register_operand" "=Yr,*x,x")
16113 (unspec:VI1_AVX2
16114 [(match_operand:VI1_AVX2 1 "register_operand" "0,0,x")
16115 (match_operand:VI1_AVX2 2 "vector_operand" "YrBm,*xBm,xm")
16116 (lt:VI1_AVX2 (match_operand:VI1_AVX2 3 "register_operand" "Yz,Yz,x")
16117 (match_operand:VI1_AVX2 4 "const0_operand" "C,C,C"))]
16118 UNSPEC_BLENDV))]
16119 "TARGET_SSE4_1"
16120 "#"
16121 ""
16122 [(set (match_dup 0)
16123 (unspec:VI1_AVX2
16124 [(match_dup 1) (match_dup 2) (match_dup 3)] UNSPEC_BLENDV))]
16125 ""
16126 [(set_attr "isa" "noavx,noavx,avx")
16127 (set_attr "type" "ssemov")
16128 (set_attr "prefix_extra" "1")
16129 (set_attr "length_immediate" "*,*,1")
16130 (set_attr "prefix" "orig,orig,vex")
16131 (set_attr "btver2_decode" "vector,vector,vector")
16132 (set_attr "mode" "<sseinsnmode>")])
16133
16134 (define_insn "sse4_1_pblendw"
16135 [(set (match_operand:V8HI 0 "register_operand" "=Yr,*x,x")
16136 (vec_merge:V8HI
16137 (match_operand:V8HI 2 "vector_operand" "YrBm,*xBm,xm")
16138 (match_operand:V8HI 1 "register_operand" "0,0,x")
16139 (match_operand:SI 3 "const_0_to_255_operand" "n,n,n")))]
16140 "TARGET_SSE4_1"
16141 "@
16142 pblendw\t{%3, %2, %0|%0, %2, %3}
16143 pblendw\t{%3, %2, %0|%0, %2, %3}
16144 vpblendw\t{%3, %2, %1, %0|%0, %1, %2, %3}"
16145 [(set_attr "isa" "noavx,noavx,avx")
16146 (set_attr "type" "ssemov")
16147 (set_attr "prefix_extra" "1")
16148 (set_attr "length_immediate" "1")
16149 (set_attr "prefix" "orig,orig,vex")
16150 (set_attr "mode" "TI")])
16151
16152 ;; The builtin uses an 8-bit immediate. Expand that.
16153 (define_expand "avx2_pblendw"
16154 [(set (match_operand:V16HI 0 "register_operand")
16155 (vec_merge:V16HI
16156 (match_operand:V16HI 2 "nonimmediate_operand")
16157 (match_operand:V16HI 1 "register_operand")
16158 (match_operand:SI 3 "const_0_to_255_operand")))]
16159 "TARGET_AVX2"
16160 {
16161 HOST_WIDE_INT val = INTVAL (operands[3]) & 0xff;
16162 operands[3] = GEN_INT (val << 8 | val);
16163 })
16164
16165 (define_insn "*avx2_pblendw"
16166 [(set (match_operand:V16HI 0 "register_operand" "=x")
16167 (vec_merge:V16HI
16168 (match_operand:V16HI 2 "nonimmediate_operand" "xm")
16169 (match_operand:V16HI 1 "register_operand" "x")
16170 (match_operand:SI 3 "avx2_pblendw_operand" "n")))]
16171 "TARGET_AVX2"
16172 {
16173 operands[3] = GEN_INT (INTVAL (operands[3]) & 0xff);
16174 return "vpblendw\t{%3, %2, %1, %0|%0, %1, %2, %3}";
16175 }
16176 [(set_attr "type" "ssemov")
16177 (set_attr "prefix_extra" "1")
16178 (set_attr "length_immediate" "1")
16179 (set_attr "prefix" "vex")
16180 (set_attr "mode" "OI")])
16181
16182 (define_insn "avx2_pblendd<mode>"
16183 [(set (match_operand:VI4_AVX2 0 "register_operand" "=x")
16184 (vec_merge:VI4_AVX2
16185 (match_operand:VI4_AVX2 2 "nonimmediate_operand" "xm")
16186 (match_operand:VI4_AVX2 1 "register_operand" "x")
16187 (match_operand:SI 3 "const_0_to_255_operand" "n")))]
16188 "TARGET_AVX2"
16189 "vpblendd\t{%3, %2, %1, %0|%0, %1, %2, %3}"
16190 [(set_attr "type" "ssemov")
16191 (set_attr "prefix_extra" "1")
16192 (set_attr "length_immediate" "1")
16193 (set_attr "prefix" "vex")
16194 (set_attr "mode" "<sseinsnmode>")])
16195
16196 (define_insn "sse4_1_phminposuw"
16197 [(set (match_operand:V8HI 0 "register_operand" "=Yr,*x,x")
16198 (unspec:V8HI [(match_operand:V8HI 1 "vector_operand" "YrBm,*xBm,xm")]
16199 UNSPEC_PHMINPOSUW))]
16200 "TARGET_SSE4_1"
16201 "%vphminposuw\t{%1, %0|%0, %1}"
16202 [(set_attr "isa" "noavx,noavx,avx")
16203 (set_attr "type" "sselog1")
16204 (set_attr "prefix_extra" "1")
16205 (set_attr "prefix" "orig,orig,vex")
16206 (set_attr "mode" "TI")])
16207
16208 (define_insn "avx2_<code>v16qiv16hi2<mask_name>"
16209 [(set (match_operand:V16HI 0 "register_operand" "=v")
16210 (any_extend:V16HI
16211 (match_operand:V16QI 1 "nonimmediate_operand" "vm")))]
16212 "TARGET_AVX2 && <mask_avx512bw_condition> && <mask_avx512vl_condition>"
16213 "vpmov<extsuffix>bw\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
16214 [(set_attr "type" "ssemov")
16215 (set_attr "prefix_extra" "1")
16216 (set_attr "prefix" "maybe_evex")
16217 (set_attr "mode" "OI")])
16218
16219 (define_insn "avx512bw_<code>v32qiv32hi2<mask_name>"
16220 [(set (match_operand:V32HI 0 "register_operand" "=v")
16221 (any_extend:V32HI
16222 (match_operand:V32QI 1 "nonimmediate_operand" "vm")))]
16223 "TARGET_AVX512BW"
16224 "vpmov<extsuffix>bw\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
16225 [(set_attr "type" "ssemov")
16226 (set_attr "prefix_extra" "1")
16227 (set_attr "prefix" "evex")
16228 (set_attr "mode" "XI")])
16229
16230 (define_insn "sse4_1_<code>v8qiv8hi2<mask_name>"
16231 [(set (match_operand:V8HI 0 "register_operand" "=Yr,*x,v")
16232 (any_extend:V8HI
16233 (vec_select:V8QI
16234 (match_operand:V16QI 1 "register_operand" "Yr,*x,v")
16235 (parallel [(const_int 0) (const_int 1)
16236 (const_int 2) (const_int 3)
16237 (const_int 4) (const_int 5)
16238 (const_int 6) (const_int 7)]))))]
16239 "TARGET_SSE4_1 && <mask_avx512bw_condition> && <mask_avx512vl_condition>"
16240 "%vpmov<extsuffix>bw\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
16241 [(set_attr "isa" "noavx,noavx,avx")
16242 (set_attr "type" "ssemov")
16243 (set_attr "prefix_extra" "1")
16244 (set_attr "prefix" "orig,orig,maybe_evex")
16245 (set_attr "mode" "TI")])
16246
16247 (define_insn "*sse4_1_<code>v8qiv8hi2<mask_name>_1"
16248 [(set (match_operand:V8HI 0 "register_operand" "=Yr,*x,v")
16249 (any_extend:V8HI
16250 (match_operand:V8QI 1 "memory_operand" "m,m,m")))]
16251 "TARGET_SSE4_1 && <mask_avx512bw_condition> && <mask_avx512vl_condition>"
16252 "%vpmov<extsuffix>bw\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
16253 [(set_attr "isa" "noavx,noavx,avx")
16254 (set_attr "type" "ssemov")
16255 (set_attr "prefix_extra" "1")
16256 (set_attr "prefix" "orig,orig,maybe_evex")
16257 (set_attr "mode" "TI")])
16258
16259 (define_insn_and_split "*sse4_1_<code>v8qiv8hi2<mask_name>_2"
16260 [(set (match_operand:V8HI 0 "register_operand")
16261 (any_extend:V8HI
16262 (vec_select:V8QI
16263 (subreg:V16QI
16264 (vec_concat:V2DI
16265 (match_operand:DI 1 "memory_operand")
16266 (const_int 0)) 0)
16267 (parallel [(const_int 0) (const_int 1)
16268 (const_int 2) (const_int 3)
16269 (const_int 4) (const_int 5)
16270 (const_int 6) (const_int 7)]))))]
16271 "TARGET_SSE4_1 && <mask_avx512bw_condition> && <mask_avx512vl_condition>
16272 && can_create_pseudo_p ()"
16273 "#"
16274 "&& 1"
16275 [(set (match_dup 0)
16276 (any_extend:V8HI (match_dup 1)))]
16277 "operands[1] = adjust_address_nv (operands[1], V8QImode, 0);")
16278
16279 (define_insn "<mask_codefor>avx512f_<code>v16qiv16si2<mask_name>"
16280 [(set (match_operand:V16SI 0 "register_operand" "=v")
16281 (any_extend:V16SI
16282 (match_operand:V16QI 1 "nonimmediate_operand" "vm")))]
16283 "TARGET_AVX512F"
16284 "vpmov<extsuffix>bd\t{%1, %0<mask_operand2>|%0<mask_operand2>, %q1}"
16285 [(set_attr "type" "ssemov")
16286 (set_attr "prefix" "evex")
16287 (set_attr "mode" "XI")])
16288
16289 (define_insn "avx2_<code>v8qiv8si2<mask_name>"
16290 [(set (match_operand:V8SI 0 "register_operand" "=v")
16291 (any_extend:V8SI
16292 (vec_select:V8QI
16293 (match_operand:V16QI 1 "register_operand" "v")
16294 (parallel [(const_int 0) (const_int 1)
16295 (const_int 2) (const_int 3)
16296 (const_int 4) (const_int 5)
16297 (const_int 6) (const_int 7)]))))]
16298 "TARGET_AVX2 && <mask_avx512vl_condition>"
16299 "vpmov<extsuffix>bd\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
16300 [(set_attr "type" "ssemov")
16301 (set_attr "prefix_extra" "1")
16302 (set_attr "prefix" "maybe_evex")
16303 (set_attr "mode" "OI")])
16304
16305 (define_insn "*avx2_<code>v8qiv8si2<mask_name>_1"
16306 [(set (match_operand:V8SI 0 "register_operand" "=v")
16307 (any_extend:V8SI
16308 (match_operand:V8QI 1 "memory_operand" "m")))]
16309 "TARGET_AVX2 && <mask_avx512vl_condition>"
16310 "%vpmov<extsuffix>bd\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
16311 [(set_attr "type" "ssemov")
16312 (set_attr "prefix_extra" "1")
16313 (set_attr "prefix" "maybe_evex")
16314 (set_attr "mode" "OI")])
16315
16316 (define_insn_and_split "*avx2_<code>v8qiv8si2<mask_name>_2"
16317 [(set (match_operand:V8SI 0 "register_operand")
16318 (any_extend:V8SI
16319 (vec_select:V8QI
16320 (subreg:V16QI
16321 (vec_concat:V2DI
16322 (match_operand:DI 1 "memory_operand")
16323 (const_int 0)) 0)
16324 (parallel [(const_int 0) (const_int 1)
16325 (const_int 2) (const_int 3)
16326 (const_int 4) (const_int 5)
16327 (const_int 6) (const_int 7)]))))]
16328 "TARGET_AVX2 && <mask_avx512vl_condition>
16329 && can_create_pseudo_p ()"
16330 "#"
16331 "&& 1"
16332 [(set (match_dup 0)
16333 (any_extend:V8SI (match_dup 1)))]
16334 "operands[1] = adjust_address_nv (operands[1], V8QImode, 0);")
16335
16336 (define_insn "sse4_1_<code>v4qiv4si2<mask_name>"
16337 [(set (match_operand:V4SI 0 "register_operand" "=Yr,*x,v")
16338 (any_extend:V4SI
16339 (vec_select:V4QI
16340 (match_operand:V16QI 1 "register_operand" "Yr,*x,v")
16341 (parallel [(const_int 0) (const_int 1)
16342 (const_int 2) (const_int 3)]))))]
16343 "TARGET_SSE4_1 && <mask_avx512vl_condition>"
16344 "%vpmov<extsuffix>bd\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
16345 [(set_attr "isa" "noavx,noavx,avx")
16346 (set_attr "type" "ssemov")
16347 (set_attr "prefix_extra" "1")
16348 (set_attr "prefix" "orig,orig,maybe_evex")
16349 (set_attr "mode" "TI")])
16350
16351 (define_insn "*sse4_1_<code>v4qiv4si2<mask_name>_1"
16352 [(set (match_operand:V4SI 0 "register_operand" "=Yr,*x,v")
16353 (any_extend:V4SI
16354 (match_operand:V4QI 1 "memory_operand" "m,m,m")))]
16355 "TARGET_SSE4_1 && <mask_avx512vl_condition>"
16356 "%vpmov<extsuffix>bd\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
16357 [(set_attr "isa" "noavx,noavx,avx")
16358 (set_attr "type" "ssemov")
16359 (set_attr "prefix_extra" "1")
16360 (set_attr "prefix" "orig,orig,maybe_evex")
16361 (set_attr "mode" "TI")])
16362
16363 (define_insn_and_split "*sse4_1_<code>v4qiv4si2<mask_name>_2"
16364 [(set (match_operand:V4SI 0 "register_operand")
16365 (any_extend:V4SI
16366 (vec_select:V4QI
16367 (subreg:V16QI
16368 (vec_merge:V4SI
16369 (vec_duplicate:V4SI
16370 (match_operand:SI 1 "memory_operand"))
16371 (const_vector:V4SI
16372 [(const_int 0) (const_int 0)
16373 (const_int 0) (const_int 0)])
16374 (const_int 1)) 0)
16375 (parallel [(const_int 0) (const_int 1)
16376 (const_int 2) (const_int 3)]))))]
16377 "TARGET_SSE4_1 && <mask_avx512vl_condition>
16378 && can_create_pseudo_p ()"
16379 "#"
16380 "&& 1"
16381 [(set (match_dup 0)
16382 (any_extend:V4SI (match_dup 1)))]
16383 "operands[1] = adjust_address_nv (operands[1], V4QImode, 0);")
16384
16385 (define_insn "avx512f_<code>v16hiv16si2<mask_name>"
16386 [(set (match_operand:V16SI 0 "register_operand" "=v")
16387 (any_extend:V16SI
16388 (match_operand:V16HI 1 "nonimmediate_operand" "vm")))]
16389 "TARGET_AVX512F"
16390 "vpmov<extsuffix>wd\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
16391 [(set_attr "type" "ssemov")
16392 (set_attr "prefix" "evex")
16393 (set_attr "mode" "XI")])
16394
16395 (define_insn "avx2_<code>v8hiv8si2<mask_name>"
16396 [(set (match_operand:V8SI 0 "register_operand" "=v")
16397 (any_extend:V8SI
16398 (match_operand:V8HI 1 "nonimmediate_operand" "vm")))]
16399 "TARGET_AVX2 && <mask_avx512vl_condition>"
16400 "vpmov<extsuffix>wd\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
16401 [(set_attr "type" "ssemov")
16402 (set_attr "prefix_extra" "1")
16403 (set_attr "prefix" "maybe_evex")
16404 (set_attr "mode" "OI")])
16405
16406 (define_insn "sse4_1_<code>v4hiv4si2<mask_name>"
16407 [(set (match_operand:V4SI 0 "register_operand" "=Yr,*x,v")
16408 (any_extend:V4SI
16409 (vec_select:V4HI
16410 (match_operand:V8HI 1 "register_operand" "Yr,*x,v")
16411 (parallel [(const_int 0) (const_int 1)
16412 (const_int 2) (const_int 3)]))))]
16413 "TARGET_SSE4_1 && <mask_avx512vl_condition>"
16414 "%vpmov<extsuffix>wd\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
16415 [(set_attr "isa" "noavx,noavx,avx")
16416 (set_attr "type" "ssemov")
16417 (set_attr "prefix_extra" "1")
16418 (set_attr "prefix" "orig,orig,maybe_evex")
16419 (set_attr "mode" "TI")])
16420
16421 (define_insn "*sse4_1_<code>v4hiv4si2<mask_name>_1"
16422 [(set (match_operand:V4SI 0 "register_operand" "=Yr,*x,v")
16423 (any_extend:V4SI
16424 (match_operand:V4HI 1 "memory_operand" "m,m,m")))]
16425 "TARGET_SSE4_1 && <mask_avx512vl_condition>"
16426 "%vpmov<extsuffix>wd\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
16427 [(set_attr "isa" "noavx,noavx,avx")
16428 (set_attr "type" "ssemov")
16429 (set_attr "prefix_extra" "1")
16430 (set_attr "prefix" "orig,orig,maybe_evex")
16431 (set_attr "mode" "TI")])
16432
16433 (define_insn_and_split "*sse4_1_<code>v4hiv4si2<mask_name>_2"
16434 [(set (match_operand:V4SI 0 "register_operand")
16435 (any_extend:V4SI
16436 (vec_select:V4HI
16437 (subreg:V8HI
16438 (vec_concat:V2DI
16439 (match_operand:DI 1 "memory_operand")
16440 (const_int 0)) 0)
16441 (parallel [(const_int 0) (const_int 1)
16442 (const_int 2) (const_int 3)]))))]
16443 "TARGET_SSE4_1 && <mask_avx512vl_condition>
16444 && can_create_pseudo_p ()"
16445 "#"
16446 "&& 1"
16447 [(set (match_dup 0)
16448 (any_extend:V4SI (match_dup 1)))]
16449 "operands[1] = adjust_address_nv (operands[1], V4HImode, 0);")
16450
16451 (define_insn "avx512f_<code>v8qiv8di2<mask_name>"
16452 [(set (match_operand:V8DI 0 "register_operand" "=v")
16453 (any_extend:V8DI
16454 (vec_select:V8QI
16455 (match_operand:V16QI 1 "register_operand" "v")
16456 (parallel [(const_int 0) (const_int 1)
16457 (const_int 2) (const_int 3)
16458 (const_int 4) (const_int 5)
16459 (const_int 6) (const_int 7)]))))]
16460 "TARGET_AVX512F"
16461 "vpmov<extsuffix>bq\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
16462 [(set_attr "type" "ssemov")
16463 (set_attr "prefix" "evex")
16464 (set_attr "mode" "XI")])
16465
16466 (define_insn "*avx512f_<code>v8qiv8di2<mask_name>_1"
16467 [(set (match_operand:V8DI 0 "register_operand" "=v")
16468 (any_extend:V8DI
16469 (match_operand:V8QI 1 "memory_operand" "m")))]
16470 "TARGET_AVX512F"
16471 "vpmov<extsuffix>bq\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
16472 [(set_attr "type" "ssemov")
16473 (set_attr "prefix" "evex")
16474 (set_attr "mode" "XI")])
16475
16476 (define_insn_and_split "*avx512f_<code>v8qiv8di2<mask_name>_2"
16477 [(set (match_operand:V8DI 0 "register_operand")
16478 (any_extend:V8DI
16479 (vec_select:V8QI
16480 (subreg:V16QI
16481 (vec_concat:V2DI
16482 (match_operand:DI 1 "memory_operand")
16483 (const_int 0)) 0)
16484 (parallel [(const_int 0) (const_int 1)
16485 (const_int 2) (const_int 3)
16486 (const_int 4) (const_int 5)
16487 (const_int 6) (const_int 7)]))))]
16488 "TARGET_AVX512F && can_create_pseudo_p ()"
16489 "#"
16490 "&& 1"
16491 [(set (match_dup 0)
16492 (any_extend:V8DI (match_dup 1)))]
16493 "operands[1] = adjust_address_nv (operands[1], V8QImode, 0);")
16494
16495 (define_insn "avx2_<code>v4qiv4di2<mask_name>"
16496 [(set (match_operand:V4DI 0 "register_operand" "=v")
16497 (any_extend:V4DI
16498 (vec_select:V4QI
16499 (match_operand:V16QI 1 "register_operand" "v")
16500 (parallel [(const_int 0) (const_int 1)
16501 (const_int 2) (const_int 3)]))))]
16502 "TARGET_AVX2 && <mask_avx512vl_condition>"
16503 "vpmov<extsuffix>bq\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
16504 [(set_attr "type" "ssemov")
16505 (set_attr "prefix_extra" "1")
16506 (set_attr "prefix" "maybe_evex")
16507 (set_attr "mode" "OI")])
16508
16509 (define_insn "*avx2_<code>v4qiv4di2<mask_name>_1"
16510 [(set (match_operand:V4DI 0 "register_operand" "=v")
16511 (any_extend:V4DI
16512 (match_operand:V4QI 1 "memory_operand" "m")))]
16513 "TARGET_AVX2 && <mask_avx512vl_condition>"
16514 "vpmov<extsuffix>bq\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
16515 [(set_attr "type" "ssemov")
16516 (set_attr "prefix_extra" "1")
16517 (set_attr "prefix" "maybe_evex")
16518 (set_attr "mode" "OI")])
16519
16520 (define_insn_and_split "*avx2_<code>v4qiv4di2<mask_name>_2"
16521 [(set (match_operand:V4DI 0 "register_operand")
16522 (any_extend:V4DI
16523 (vec_select:V4QI
16524 (subreg:V16QI
16525 (vec_merge:V4SI
16526 (vec_duplicate:V4SI
16527 (match_operand:SI 1 "memory_operand"))
16528 (const_vector:V4SI
16529 [(const_int 0) (const_int 0)
16530 (const_int 0) (const_int 0)])
16531 (const_int 1)) 0)
16532 (parallel [(const_int 0) (const_int 1)
16533 (const_int 2) (const_int 3)]))))]
16534 "TARGET_AVX2 && <mask_avx512vl_condition>
16535 && can_create_pseudo_p ()"
16536 "#"
16537 "&& 1"
16538 [(set (match_dup 0)
16539 (any_extend:V4DI (match_dup 1)))]
16540 "operands[1] = adjust_address_nv (operands[1], V4QImode, 0);")
16541
16542 (define_insn "sse4_1_<code>v2qiv2di2<mask_name>"
16543 [(set (match_operand:V2DI 0 "register_operand" "=Yr,*x,v")
16544 (any_extend:V2DI
16545 (vec_select:V2QI
16546 (match_operand:V16QI 1 "register_operand" "Yr,*x,v")
16547 (parallel [(const_int 0) (const_int 1)]))))]
16548 "TARGET_SSE4_1 && <mask_avx512vl_condition>"
16549 "%vpmov<extsuffix>bq\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
16550 [(set_attr "isa" "noavx,noavx,avx")
16551 (set_attr "type" "ssemov")
16552 (set_attr "prefix_extra" "1")
16553 (set_attr "prefix" "orig,orig,maybe_evex")
16554 (set_attr "mode" "TI")])
16555
16556 (define_insn "avx512f_<code>v8hiv8di2<mask_name>"
16557 [(set (match_operand:V8DI 0 "register_operand" "=v")
16558 (any_extend:V8DI
16559 (match_operand:V8HI 1 "nonimmediate_operand" "vm")))]
16560 "TARGET_AVX512F"
16561 "vpmov<extsuffix>wq\t{%1, %0<mask_operand2>|%0<mask_operand2>, %q1}"
16562 [(set_attr "type" "ssemov")
16563 (set_attr "prefix" "evex")
16564 (set_attr "mode" "XI")])
16565
16566 (define_insn "avx2_<code>v4hiv4di2<mask_name>"
16567 [(set (match_operand:V4DI 0 "register_operand" "=v")
16568 (any_extend:V4DI
16569 (vec_select:V4HI
16570 (match_operand:V8HI 1 "register_operand" "v")
16571 (parallel [(const_int 0) (const_int 1)
16572 (const_int 2) (const_int 3)]))))]
16573 "TARGET_AVX2 && <mask_avx512vl_condition>"
16574 "vpmov<extsuffix>wq\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
16575 [(set_attr "type" "ssemov")
16576 (set_attr "prefix_extra" "1")
16577 (set_attr "prefix" "maybe_evex")
16578 (set_attr "mode" "OI")])
16579
16580 (define_insn "*avx2_<code>v4hiv4di2<mask_name>_1"
16581 [(set (match_operand:V4DI 0 "register_operand" "=v")
16582 (any_extend:V4DI
16583 (match_operand:V4HI 1 "memory_operand" "m")))]
16584 "TARGET_AVX2 && <mask_avx512vl_condition>"
16585 "vpmov<extsuffix>wq\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
16586 [(set_attr "type" "ssemov")
16587 (set_attr "prefix_extra" "1")
16588 (set_attr "prefix" "maybe_evex")
16589 (set_attr "mode" "OI")])
16590
16591 (define_insn_and_split "*avx2_<code>v4hiv4di2<mask_name>_2"
16592 [(set (match_operand:V4DI 0 "register_operand")
16593 (any_extend:V4DI
16594 (vec_select:V4HI
16595 (subreg:V8HI
16596 (vec_concat:V2DI
16597 (match_operand:DI 1 "memory_operand")
16598 (const_int 0)) 0)
16599 (parallel [(const_int 0) (const_int 1)
16600 (const_int 2) (const_int 3)]))))]
16601 "TARGET_AVX2 && <mask_avx512vl_condition>
16602 && can_create_pseudo_p ()"
16603 "#"
16604 "&& 1"
16605 [(set (match_dup 0)
16606 (any_extend:V4DI (match_dup 1)))]
16607 "operands[1] = adjust_address_nv (operands[1], V4HImode, 0);")
16608
16609 (define_insn "sse4_1_<code>v2hiv2di2<mask_name>"
16610 [(set (match_operand:V2DI 0 "register_operand" "=Yr,*x,v")
16611 (any_extend:V2DI
16612 (vec_select:V2HI
16613 (match_operand:V8HI 1 "register_operand" "Yr,*x,v")
16614 (parallel [(const_int 0) (const_int 1)]))))]
16615 "TARGET_SSE4_1 && <mask_avx512vl_condition>"
16616 "%vpmov<extsuffix>wq\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
16617 [(set_attr "isa" "noavx,noavx,avx")
16618 (set_attr "type" "ssemov")
16619 (set_attr "prefix_extra" "1")
16620 (set_attr "prefix" "orig,orig,maybe_evex")
16621 (set_attr "mode" "TI")])
16622
16623 (define_insn "*sse4_1_<code>v2hiv2di2<mask_name>_1"
16624 [(set (match_operand:V2DI 0 "register_operand" "=Yr,*x,v")
16625 (any_extend:V2DI
16626 (match_operand:V2HI 1 "memory_operand" "m,m,m")))]
16627 "TARGET_SSE4_1 && <mask_avx512vl_condition>"
16628 "%vpmov<extsuffix>wq\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
16629 [(set_attr "isa" "noavx,noavx,avx")
16630 (set_attr "type" "ssemov")
16631 (set_attr "prefix_extra" "1")
16632 (set_attr "prefix" "orig,orig,maybe_evex")
16633 (set_attr "mode" "TI")])
16634
16635 (define_insn_and_split "*sse4_1_<code>v2hiv2di2<mask_name>_2"
16636 [(set (match_operand:V2DI 0 "register_operand")
16637 (any_extend:V2DI
16638 (vec_select:V2HI
16639 (subreg:V8HI
16640 (vec_merge:V4SI
16641 (vec_duplicate:V4SI
16642 (match_operand:SI 1 "memory_operand"))
16643 (const_vector:V4SI
16644 [(const_int 0) (const_int 0)
16645 (const_int 0) (const_int 0)])
16646 (const_int 1)) 0)
16647 (parallel [(const_int 0) (const_int 1)]))))]
16648 "TARGET_SSE4_1 && <mask_avx512vl_condition>
16649 && can_create_pseudo_p ()"
16650 "#"
16651 "&& 1"
16652 [(set (match_dup 0)
16653 (any_extend:V2DI (match_dup 1)))]
16654 "operands[1] = adjust_address_nv (operands[1], V2HImode, 0);")
16655
16656 (define_insn "avx512f_<code>v8siv8di2<mask_name>"
16657 [(set (match_operand:V8DI 0 "register_operand" "=v")
16658 (any_extend:V8DI
16659 (match_operand:V8SI 1 "nonimmediate_operand" "vm")))]
16660 "TARGET_AVX512F"
16661 "vpmov<extsuffix>dq\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
16662 [(set_attr "type" "ssemov")
16663 (set_attr "prefix" "evex")
16664 (set_attr "mode" "XI")])
16665
16666 (define_insn "avx2_<code>v4siv4di2<mask_name>"
16667 [(set (match_operand:V4DI 0 "register_operand" "=v")
16668 (any_extend:V4DI
16669 (match_operand:V4SI 1 "nonimmediate_operand" "vm")))]
16670 "TARGET_AVX2 && <mask_avx512vl_condition>"
16671 "vpmov<extsuffix>dq\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
16672 [(set_attr "type" "ssemov")
16673 (set_attr "prefix" "maybe_evex")
16674 (set_attr "prefix_extra" "1")
16675 (set_attr "mode" "OI")])
16676
16677 (define_insn "sse4_1_<code>v2siv2di2<mask_name>"
16678 [(set (match_operand:V2DI 0 "register_operand" "=Yr,*x,v")
16679 (any_extend:V2DI
16680 (vec_select:V2SI
16681 (match_operand:V4SI 1 "register_operand" "Yr,*x,v")
16682 (parallel [(const_int 0) (const_int 1)]))))]
16683 "TARGET_SSE4_1 && <mask_avx512vl_condition>"
16684 "%vpmov<extsuffix>dq\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
16685 [(set_attr "isa" "noavx,noavx,avx")
16686 (set_attr "type" "ssemov")
16687 (set_attr "prefix_extra" "1")
16688 (set_attr "prefix" "orig,orig,maybe_evex")
16689 (set_attr "mode" "TI")])
16690
16691 (define_insn "*sse4_1_<code>v2siv2di2<mask_name>_1"
16692 [(set (match_operand:V2DI 0 "register_operand" "=Yr,*x,v")
16693 (any_extend:V2DI
16694 (match_operand:V2SI 1 "memory_operand" "m,m,m")))]
16695 "TARGET_SSE4_1 && <mask_avx512vl_condition>"
16696 "%vpmov<extsuffix>dq\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
16697 [(set_attr "isa" "noavx,noavx,avx")
16698 (set_attr "type" "ssemov")
16699 (set_attr "prefix_extra" "1")
16700 (set_attr "prefix" "orig,orig,maybe_evex")
16701 (set_attr "mode" "TI")])
16702
16703 (define_insn_and_split "*sse4_1_<code>v2siv2di2<mask_name>_2"
16704 [(set (match_operand:V2DI 0 "register_operand")
16705 (any_extend:V2DI
16706 (vec_select:V2SI
16707 (subreg:V4SI
16708 (vec_concat:V2DI
16709 (match_operand:DI 1 "memory_operand")
16710 (const_int 0)) 0)
16711 (parallel [(const_int 0) (const_int 1)]))))]
16712 "TARGET_SSE4_1 && <mask_avx512vl_condition>
16713 && can_create_pseudo_p ()"
16714 "#"
16715 "&& 1"
16716 [(set (match_dup 0)
16717 (any_extend:V2DI (match_dup 1)))]
16718 "operands[1] = adjust_address_nv (operands[1], V2SImode, 0);")
16719
16720 ;; ptestps/ptestpd are very similar to comiss and ucomiss when
16721 ;; setting FLAGS_REG. But it is not a really compare instruction.
16722 (define_insn "avx_vtest<ssemodesuffix><avxsizesuffix>"
16723 [(set (reg:CC FLAGS_REG)
16724 (unspec:CC [(match_operand:VF_128_256 0 "register_operand" "x")
16725 (match_operand:VF_128_256 1 "nonimmediate_operand" "xm")]
16726 UNSPEC_VTESTP))]
16727 "TARGET_AVX"
16728 "vtest<ssemodesuffix>\t{%1, %0|%0, %1}"
16729 [(set_attr "type" "ssecomi")
16730 (set_attr "prefix_extra" "1")
16731 (set_attr "prefix" "vex")
16732 (set_attr "mode" "<MODE>")])
16733
16734 ;; ptest is very similar to comiss and ucomiss when setting FLAGS_REG.
16735 ;; But it is not a really compare instruction.
16736 (define_insn "<sse4_1>_ptest<mode>"
16737 [(set (reg:CC FLAGS_REG)
16738 (unspec:CC [(match_operand:V_AVX 0 "register_operand" "Yr, *x, x")
16739 (match_operand:V_AVX 1 "vector_operand" "YrBm, *xBm, xm")]
16740 UNSPEC_PTEST))]
16741 "TARGET_SSE4_1"
16742 "%vptest\t{%1, %0|%0, %1}"
16743 [(set_attr "isa" "noavx,noavx,avx")
16744 (set_attr "type" "ssecomi")
16745 (set_attr "prefix_extra" "1")
16746 (set_attr "prefix" "orig,orig,vex")
16747 (set (attr "btver2_decode")
16748 (if_then_else
16749 (match_test "<sseinsnmode>mode==OImode")
16750 (const_string "vector")
16751 (const_string "*")))
16752 (set_attr "mode" "<sseinsnmode>")])
16753
16754 (define_insn "ptesttf2"
16755 [(set (reg:CC FLAGS_REG)
16756 (unspec:CC [(match_operand:TF 0 "register_operand" "Yr, *x, x")
16757 (match_operand:TF 1 "vector_operand" "YrBm, *xBm, xm")]
16758 UNSPEC_PTEST))]
16759 "TARGET_SSE4_1"
16760 "%vptest\t{%1, %0|%0, %1}"
16761 [(set_attr "isa" "noavx,noavx,avx")
16762 (set_attr "type" "ssecomi")
16763 (set_attr "prefix_extra" "1")
16764 (set_attr "prefix" "orig,orig,vex")
16765 (set_attr "mode" "TI")])
16766
16767 (define_insn "<sse4_1>_round<ssemodesuffix><avxsizesuffix>"
16768 [(set (match_operand:VF_128_256 0 "register_operand" "=Yr,*x,x")
16769 (unspec:VF_128_256
16770 [(match_operand:VF_128_256 1 "vector_operand" "YrBm,*xBm,xm")
16771 (match_operand:SI 2 "const_0_to_15_operand" "n,n,n")]
16772 UNSPEC_ROUND))]
16773 "TARGET_SSE4_1"
16774 "%vround<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
16775 [(set_attr "isa" "noavx,noavx,avx")
16776 (set_attr "type" "ssecvt")
16777 (set_attr "prefix_data16" "1,1,*")
16778 (set_attr "prefix_extra" "1")
16779 (set_attr "length_immediate" "1")
16780 (set_attr "prefix" "orig,orig,vex")
16781 (set_attr "mode" "<MODE>")])
16782
16783 (define_expand "<sse4_1>_round<ssemodesuffix>_sfix<avxsizesuffix>"
16784 [(match_operand:<sseintvecmode> 0 "register_operand")
16785 (match_operand:VF1_128_256 1 "vector_operand")
16786 (match_operand:SI 2 "const_0_to_15_operand")]
16787 "TARGET_SSE4_1"
16788 {
16789 rtx tmp = gen_reg_rtx (<MODE>mode);
16790
16791 emit_insn
16792 (gen_<sse4_1>_round<ssemodesuffix><avxsizesuffix> (tmp, operands[1],
16793 operands[2]));
16794 emit_insn
16795 (gen_fix_trunc<mode><sseintvecmodelower>2 (operands[0], tmp));
16796 DONE;
16797 })
16798
16799 (define_expand "avx512f_round<castmode>512"
16800 [(match_operand:VF_512 0 "register_operand")
16801 (match_operand:VF_512 1 "nonimmediate_operand")
16802 (match_operand:SI 2 "const_0_to_15_operand")]
16803 "TARGET_AVX512F"
16804 {
16805 emit_insn (gen_avx512f_rndscale<mode> (operands[0], operands[1], operands[2]));
16806 DONE;
16807 })
16808
16809 (define_expand "avx512f_roundps512_sfix"
16810 [(match_operand:V16SI 0 "register_operand")
16811 (match_operand:V16SF 1 "nonimmediate_operand")
16812 (match_operand:SI 2 "const_0_to_15_operand")]
16813 "TARGET_AVX512F"
16814 {
16815 rtx tmp = gen_reg_rtx (V16SFmode);
16816 emit_insn (gen_avx512f_rndscalev16sf (tmp, operands[1], operands[2]));
16817 emit_insn (gen_fix_truncv16sfv16si2 (operands[0], tmp));
16818 DONE;
16819 })
16820
16821 (define_expand "<sse4_1>_round<ssemodesuffix>_vec_pack_sfix<avxsizesuffix>"
16822 [(match_operand:<ssepackfltmode> 0 "register_operand")
16823 (match_operand:VF2 1 "vector_operand")
16824 (match_operand:VF2 2 "vector_operand")
16825 (match_operand:SI 3 "const_0_to_15_operand")]
16826 "TARGET_SSE4_1"
16827 {
16828 rtx tmp0, tmp1;
16829
16830 if (<MODE>mode == V2DFmode
16831 && TARGET_AVX && !TARGET_PREFER_AVX128 && optimize_insn_for_speed_p ())
16832 {
16833 rtx tmp2 = gen_reg_rtx (V4DFmode);
16834
16835 tmp0 = gen_reg_rtx (V4DFmode);
16836 tmp1 = force_reg (V2DFmode, operands[1]);
16837
16838 emit_insn (gen_avx_vec_concatv4df (tmp0, tmp1, operands[2]));
16839 emit_insn (gen_avx_roundpd256 (tmp2, tmp0, operands[3]));
16840 emit_insn (gen_fix_truncv4dfv4si2 (operands[0], tmp2));
16841 }
16842 else
16843 {
16844 tmp0 = gen_reg_rtx (<MODE>mode);
16845 tmp1 = gen_reg_rtx (<MODE>mode);
16846
16847 emit_insn
16848 (gen_<sse4_1>_round<ssemodesuffix><avxsizesuffix> (tmp0, operands[1],
16849 operands[3]));
16850 emit_insn
16851 (gen_<sse4_1>_round<ssemodesuffix><avxsizesuffix> (tmp1, operands[2],
16852 operands[3]));
16853 emit_insn
16854 (gen_vec_pack_sfix_trunc_<mode> (operands[0], tmp0, tmp1));
16855 }
16856 DONE;
16857 })
16858
16859 (define_insn "sse4_1_round<ssescalarmodesuffix>"
16860 [(set (match_operand:VF_128 0 "register_operand" "=Yr,*x,x,v")
16861 (vec_merge:VF_128
16862 (unspec:VF_128
16863 [(match_operand:VF_128 2 "register_operand" "Yr,*x,x,v")
16864 (match_operand:SI 3 "const_0_to_15_operand" "n,n,n,n")]
16865 UNSPEC_ROUND)
16866 (match_operand:VF_128 1 "register_operand" "0,0,x,v")
16867 (const_int 1)))]
16868 "TARGET_SSE4_1"
16869 "@
16870 round<ssescalarmodesuffix>\t{%3, %2, %0|%0, %2, %3}
16871 round<ssescalarmodesuffix>\t{%3, %2, %0|%0, %2, %3}
16872 vround<ssescalarmodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}
16873 vrndscale<ssescalarmodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
16874 [(set_attr "isa" "noavx,noavx,avx,avx512f")
16875 (set_attr "type" "ssecvt")
16876 (set_attr "length_immediate" "1")
16877 (set_attr "prefix_data16" "1,1,*,*")
16878 (set_attr "prefix_extra" "1")
16879 (set_attr "prefix" "orig,orig,vex,evex")
16880 (set_attr "mode" "<MODE>")])
16881
16882 (define_expand "round<mode>2"
16883 [(set (match_dup 3)
16884 (plus:VF
16885 (match_operand:VF 1 "register_operand")
16886 (match_dup 2)))
16887 (set (match_operand:VF 0 "register_operand")
16888 (unspec:VF
16889 [(match_dup 3) (match_dup 4)]
16890 UNSPEC_ROUND))]
16891 "TARGET_SSE4_1 && !flag_trapping_math"
16892 {
16893 machine_mode scalar_mode;
16894 const struct real_format *fmt;
16895 REAL_VALUE_TYPE pred_half, half_minus_pred_half;
16896 rtx half, vec_half;
16897
16898 scalar_mode = GET_MODE_INNER (<MODE>mode);
16899
16900 /* load nextafter (0.5, 0.0) */
16901 fmt = REAL_MODE_FORMAT (scalar_mode);
16902 real_2expN (&half_minus_pred_half, -(fmt->p) - 1, scalar_mode);
16903 real_arithmetic (&pred_half, MINUS_EXPR, &dconsthalf, &half_minus_pred_half);
16904 half = const_double_from_real_value (pred_half, scalar_mode);
16905
16906 vec_half = ix86_build_const_vector (<MODE>mode, true, half);
16907 vec_half = force_reg (<MODE>mode, vec_half);
16908
16909 operands[2] = gen_reg_rtx (<MODE>mode);
16910 emit_insn (gen_copysign<mode>3 (operands[2], vec_half, operands[1]));
16911
16912 operands[3] = gen_reg_rtx (<MODE>mode);
16913 operands[4] = GEN_INT (ROUND_TRUNC);
16914 })
16915
16916 (define_expand "round<mode>2_sfix"
16917 [(match_operand:<sseintvecmode> 0 "register_operand")
16918 (match_operand:VF1 1 "register_operand")]
16919 "TARGET_SSE4_1 && !flag_trapping_math"
16920 {
16921 rtx tmp = gen_reg_rtx (<MODE>mode);
16922
16923 emit_insn (gen_round<mode>2 (tmp, operands[1]));
16924
16925 emit_insn
16926 (gen_fix_trunc<mode><sseintvecmodelower>2 (operands[0], tmp));
16927 DONE;
16928 })
16929
16930 (define_expand "round<mode>2_vec_pack_sfix"
16931 [(match_operand:<ssepackfltmode> 0 "register_operand")
16932 (match_operand:VF2 1 "register_operand")
16933 (match_operand:VF2 2 "register_operand")]
16934 "TARGET_SSE4_1 && !flag_trapping_math"
16935 {
16936 rtx tmp0, tmp1;
16937
16938 if (<MODE>mode == V2DFmode
16939 && TARGET_AVX && !TARGET_PREFER_AVX128 && optimize_insn_for_speed_p ())
16940 {
16941 rtx tmp2 = gen_reg_rtx (V4DFmode);
16942
16943 tmp0 = gen_reg_rtx (V4DFmode);
16944 tmp1 = force_reg (V2DFmode, operands[1]);
16945
16946 emit_insn (gen_avx_vec_concatv4df (tmp0, tmp1, operands[2]));
16947 emit_insn (gen_roundv4df2 (tmp2, tmp0));
16948 emit_insn (gen_fix_truncv4dfv4si2 (operands[0], tmp2));
16949 }
16950 else
16951 {
16952 tmp0 = gen_reg_rtx (<MODE>mode);
16953 tmp1 = gen_reg_rtx (<MODE>mode);
16954
16955 emit_insn (gen_round<mode>2 (tmp0, operands[1]));
16956 emit_insn (gen_round<mode>2 (tmp1, operands[2]));
16957
16958 emit_insn
16959 (gen_vec_pack_sfix_trunc_<mode> (operands[0], tmp0, tmp1));
16960 }
16961 DONE;
16962 })
16963
16964 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
16965 ;;
16966 ;; Intel SSE4.2 string/text processing instructions
16967 ;;
16968 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
16969
16970 (define_insn_and_split "sse4_2_pcmpestr"
16971 [(set (match_operand:SI 0 "register_operand" "=c,c")
16972 (unspec:SI
16973 [(match_operand:V16QI 2 "register_operand" "x,x")
16974 (match_operand:SI 3 "register_operand" "a,a")
16975 (match_operand:V16QI 4 "nonimmediate_operand" "x,m")
16976 (match_operand:SI 5 "register_operand" "d,d")
16977 (match_operand:SI 6 "const_0_to_255_operand" "n,n")]
16978 UNSPEC_PCMPESTR))
16979 (set (match_operand:V16QI 1 "register_operand" "=Yz,Yz")
16980 (unspec:V16QI
16981 [(match_dup 2)
16982 (match_dup 3)
16983 (match_dup 4)
16984 (match_dup 5)
16985 (match_dup 6)]
16986 UNSPEC_PCMPESTR))
16987 (set (reg:CC FLAGS_REG)
16988 (unspec:CC
16989 [(match_dup 2)
16990 (match_dup 3)
16991 (match_dup 4)
16992 (match_dup 5)
16993 (match_dup 6)]
16994 UNSPEC_PCMPESTR))]
16995 "TARGET_SSE4_2
16996 && can_create_pseudo_p ()"
16997 "#"
16998 "&& 1"
16999 [(const_int 0)]
17000 {
17001 int ecx = !find_regno_note (curr_insn, REG_UNUSED, REGNO (operands[0]));
17002 int xmm0 = !find_regno_note (curr_insn, REG_UNUSED, REGNO (operands[1]));
17003 int flags = !find_regno_note (curr_insn, REG_UNUSED, FLAGS_REG);
17004
17005 if (ecx)
17006 emit_insn (gen_sse4_2_pcmpestri (operands[0], operands[2],
17007 operands[3], operands[4],
17008 operands[5], operands[6]));
17009 if (xmm0)
17010 emit_insn (gen_sse4_2_pcmpestrm (operands[1], operands[2],
17011 operands[3], operands[4],
17012 operands[5], operands[6]));
17013 if (flags && !(ecx || xmm0))
17014 emit_insn (gen_sse4_2_pcmpestr_cconly (NULL, NULL,
17015 operands[2], operands[3],
17016 operands[4], operands[5],
17017 operands[6]));
17018 if (!(flags || ecx || xmm0))
17019 emit_note (NOTE_INSN_DELETED);
17020
17021 DONE;
17022 }
17023 [(set_attr "type" "sselog")
17024 (set_attr "prefix_data16" "1")
17025 (set_attr "prefix_extra" "1")
17026 (set_attr "length_immediate" "1")
17027 (set_attr "memory" "none,load")
17028 (set_attr "mode" "TI")])
17029
17030 (define_insn "sse4_2_pcmpestri"
17031 [(set (match_operand:SI 0 "register_operand" "=c,c")
17032 (unspec:SI
17033 [(match_operand:V16QI 1 "register_operand" "x,x")
17034 (match_operand:SI 2 "register_operand" "a,a")
17035 (match_operand:V16QI 3 "nonimmediate_operand" "x,m")
17036 (match_operand:SI 4 "register_operand" "d,d")
17037 (match_operand:SI 5 "const_0_to_255_operand" "n,n")]
17038 UNSPEC_PCMPESTR))
17039 (set (reg:CC FLAGS_REG)
17040 (unspec:CC
17041 [(match_dup 1)
17042 (match_dup 2)
17043 (match_dup 3)
17044 (match_dup 4)
17045 (match_dup 5)]
17046 UNSPEC_PCMPESTR))]
17047 "TARGET_SSE4_2"
17048 "%vpcmpestri\t{%5, %3, %1|%1, %3, %5}"
17049 [(set_attr "type" "sselog")
17050 (set_attr "prefix_data16" "1")
17051 (set_attr "prefix_extra" "1")
17052 (set_attr "prefix" "maybe_vex")
17053 (set_attr "length_immediate" "1")
17054 (set_attr "btver2_decode" "vector")
17055 (set_attr "memory" "none,load")
17056 (set_attr "mode" "TI")])
17057
17058 (define_insn "sse4_2_pcmpestrm"
17059 [(set (match_operand:V16QI 0 "register_operand" "=Yz,Yz")
17060 (unspec:V16QI
17061 [(match_operand:V16QI 1 "register_operand" "x,x")
17062 (match_operand:SI 2 "register_operand" "a,a")
17063 (match_operand:V16QI 3 "nonimmediate_operand" "x,m")
17064 (match_operand:SI 4 "register_operand" "d,d")
17065 (match_operand:SI 5 "const_0_to_255_operand" "n,n")]
17066 UNSPEC_PCMPESTR))
17067 (set (reg:CC FLAGS_REG)
17068 (unspec:CC
17069 [(match_dup 1)
17070 (match_dup 2)
17071 (match_dup 3)
17072 (match_dup 4)
17073 (match_dup 5)]
17074 UNSPEC_PCMPESTR))]
17075 "TARGET_SSE4_2"
17076 "%vpcmpestrm\t{%5, %3, %1|%1, %3, %5}"
17077 [(set_attr "type" "sselog")
17078 (set_attr "prefix_data16" "1")
17079 (set_attr "prefix_extra" "1")
17080 (set_attr "length_immediate" "1")
17081 (set_attr "prefix" "maybe_vex")
17082 (set_attr "btver2_decode" "vector")
17083 (set_attr "memory" "none,load")
17084 (set_attr "mode" "TI")])
17085
17086 (define_insn "sse4_2_pcmpestr_cconly"
17087 [(set (reg:CC FLAGS_REG)
17088 (unspec:CC
17089 [(match_operand:V16QI 2 "register_operand" "x,x,x,x")
17090 (match_operand:SI 3 "register_operand" "a,a,a,a")
17091 (match_operand:V16QI 4 "nonimmediate_operand" "x,m,x,m")
17092 (match_operand:SI 5 "register_operand" "d,d,d,d")
17093 (match_operand:SI 6 "const_0_to_255_operand" "n,n,n,n")]
17094 UNSPEC_PCMPESTR))
17095 (clobber (match_scratch:V16QI 0 "=Yz,Yz,X,X"))
17096 (clobber (match_scratch:SI 1 "= X, X,c,c"))]
17097 "TARGET_SSE4_2"
17098 "@
17099 %vpcmpestrm\t{%6, %4, %2|%2, %4, %6}
17100 %vpcmpestrm\t{%6, %4, %2|%2, %4, %6}
17101 %vpcmpestri\t{%6, %4, %2|%2, %4, %6}
17102 %vpcmpestri\t{%6, %4, %2|%2, %4, %6}"
17103 [(set_attr "type" "sselog")
17104 (set_attr "prefix_data16" "1")
17105 (set_attr "prefix_extra" "1")
17106 (set_attr "length_immediate" "1")
17107 (set_attr "memory" "none,load,none,load")
17108 (set_attr "btver2_decode" "vector,vector,vector,vector")
17109 (set_attr "prefix" "maybe_vex")
17110 (set_attr "mode" "TI")])
17111
17112 (define_insn_and_split "sse4_2_pcmpistr"
17113 [(set (match_operand:SI 0 "register_operand" "=c,c")
17114 (unspec:SI
17115 [(match_operand:V16QI 2 "register_operand" "x,x")
17116 (match_operand:V16QI 3 "nonimmediate_operand" "x,m")
17117 (match_operand:SI 4 "const_0_to_255_operand" "n,n")]
17118 UNSPEC_PCMPISTR))
17119 (set (match_operand:V16QI 1 "register_operand" "=Yz,Yz")
17120 (unspec:V16QI
17121 [(match_dup 2)
17122 (match_dup 3)
17123 (match_dup 4)]
17124 UNSPEC_PCMPISTR))
17125 (set (reg:CC FLAGS_REG)
17126 (unspec:CC
17127 [(match_dup 2)
17128 (match_dup 3)
17129 (match_dup 4)]
17130 UNSPEC_PCMPISTR))]
17131 "TARGET_SSE4_2
17132 && can_create_pseudo_p ()"
17133 "#"
17134 "&& 1"
17135 [(const_int 0)]
17136 {
17137 int ecx = !find_regno_note (curr_insn, REG_UNUSED, REGNO (operands[0]));
17138 int xmm0 = !find_regno_note (curr_insn, REG_UNUSED, REGNO (operands[1]));
17139 int flags = !find_regno_note (curr_insn, REG_UNUSED, FLAGS_REG);
17140
17141 if (ecx)
17142 emit_insn (gen_sse4_2_pcmpistri (operands[0], operands[2],
17143 operands[3], operands[4]));
17144 if (xmm0)
17145 emit_insn (gen_sse4_2_pcmpistrm (operands[1], operands[2],
17146 operands[3], operands[4]));
17147 if (flags && !(ecx || xmm0))
17148 emit_insn (gen_sse4_2_pcmpistr_cconly (NULL, NULL,
17149 operands[2], operands[3],
17150 operands[4]));
17151 if (!(flags || ecx || xmm0))
17152 emit_note (NOTE_INSN_DELETED);
17153
17154 DONE;
17155 }
17156 [(set_attr "type" "sselog")
17157 (set_attr "prefix_data16" "1")
17158 (set_attr "prefix_extra" "1")
17159 (set_attr "length_immediate" "1")
17160 (set_attr "memory" "none,load")
17161 (set_attr "mode" "TI")])
17162
17163 (define_insn "sse4_2_pcmpistri"
17164 [(set (match_operand:SI 0 "register_operand" "=c,c")
17165 (unspec:SI
17166 [(match_operand:V16QI 1 "register_operand" "x,x")
17167 (match_operand:V16QI 2 "nonimmediate_operand" "x,m")
17168 (match_operand:SI 3 "const_0_to_255_operand" "n,n")]
17169 UNSPEC_PCMPISTR))
17170 (set (reg:CC FLAGS_REG)
17171 (unspec:CC
17172 [(match_dup 1)
17173 (match_dup 2)
17174 (match_dup 3)]
17175 UNSPEC_PCMPISTR))]
17176 "TARGET_SSE4_2"
17177 "%vpcmpistri\t{%3, %2, %1|%1, %2, %3}"
17178 [(set_attr "type" "sselog")
17179 (set_attr "prefix_data16" "1")
17180 (set_attr "prefix_extra" "1")
17181 (set_attr "length_immediate" "1")
17182 (set_attr "prefix" "maybe_vex")
17183 (set_attr "memory" "none,load")
17184 (set_attr "btver2_decode" "vector")
17185 (set_attr "mode" "TI")])
17186
17187 (define_insn "sse4_2_pcmpistrm"
17188 [(set (match_operand:V16QI 0 "register_operand" "=Yz,Yz")
17189 (unspec:V16QI
17190 [(match_operand:V16QI 1 "register_operand" "x,x")
17191 (match_operand:V16QI 2 "nonimmediate_operand" "x,m")
17192 (match_operand:SI 3 "const_0_to_255_operand" "n,n")]
17193 UNSPEC_PCMPISTR))
17194 (set (reg:CC FLAGS_REG)
17195 (unspec:CC
17196 [(match_dup 1)
17197 (match_dup 2)
17198 (match_dup 3)]
17199 UNSPEC_PCMPISTR))]
17200 "TARGET_SSE4_2"
17201 "%vpcmpistrm\t{%3, %2, %1|%1, %2, %3}"
17202 [(set_attr "type" "sselog")
17203 (set_attr "prefix_data16" "1")
17204 (set_attr "prefix_extra" "1")
17205 (set_attr "length_immediate" "1")
17206 (set_attr "prefix" "maybe_vex")
17207 (set_attr "memory" "none,load")
17208 (set_attr "btver2_decode" "vector")
17209 (set_attr "mode" "TI")])
17210
17211 (define_insn "sse4_2_pcmpistr_cconly"
17212 [(set (reg:CC FLAGS_REG)
17213 (unspec:CC
17214 [(match_operand:V16QI 2 "register_operand" "x,x,x,x")
17215 (match_operand:V16QI 3 "nonimmediate_operand" "x,m,x,m")
17216 (match_operand:SI 4 "const_0_to_255_operand" "n,n,n,n")]
17217 UNSPEC_PCMPISTR))
17218 (clobber (match_scratch:V16QI 0 "=Yz,Yz,X,X"))
17219 (clobber (match_scratch:SI 1 "= X, X,c,c"))]
17220 "TARGET_SSE4_2"
17221 "@
17222 %vpcmpistrm\t{%4, %3, %2|%2, %3, %4}
17223 %vpcmpistrm\t{%4, %3, %2|%2, %3, %4}
17224 %vpcmpistri\t{%4, %3, %2|%2, %3, %4}
17225 %vpcmpistri\t{%4, %3, %2|%2, %3, %4}"
17226 [(set_attr "type" "sselog")
17227 (set_attr "prefix_data16" "1")
17228 (set_attr "prefix_extra" "1")
17229 (set_attr "length_immediate" "1")
17230 (set_attr "memory" "none,load,none,load")
17231 (set_attr "prefix" "maybe_vex")
17232 (set_attr "btver2_decode" "vector,vector,vector,vector")
17233 (set_attr "mode" "TI")])
17234
17235 ;; Packed float variants
17236 (define_mode_attr GATHER_SCATTER_SF_MEM_MODE
17237 [(V8DI "V8SF") (V16SI "V16SF")])
17238
17239 (define_expand "avx512pf_gatherpf<mode>sf"
17240 [(unspec
17241 [(match_operand:<avx512fmaskmode> 0 "register_operand")
17242 (mem:<GATHER_SCATTER_SF_MEM_MODE>
17243 (match_par_dup 5
17244 [(match_operand 2 "vsib_address_operand")
17245 (match_operand:VI48_512 1 "register_operand")
17246 (match_operand:SI 3 "const1248_operand")]))
17247 (match_operand:SI 4 "const_2_to_3_operand")]
17248 UNSPEC_GATHER_PREFETCH)]
17249 "TARGET_AVX512PF"
17250 {
17251 operands[5]
17252 = gen_rtx_UNSPEC (Pmode, gen_rtvec (3, operands[2], operands[1],
17253 operands[3]), UNSPEC_VSIBADDR);
17254 })
17255
17256 (define_insn "*avx512pf_gatherpf<mode>sf_mask"
17257 [(unspec
17258 [(match_operand:<avx512fmaskmode> 0 "register_operand" "Yk")
17259 (match_operator:<GATHER_SCATTER_SF_MEM_MODE> 5 "vsib_mem_operator"
17260 [(unspec:P
17261 [(match_operand:P 2 "vsib_address_operand" "Tv")
17262 (match_operand:VI48_512 1 "register_operand" "v")
17263 (match_operand:SI 3 "const1248_operand" "n")]
17264 UNSPEC_VSIBADDR)])
17265 (match_operand:SI 4 "const_2_to_3_operand" "n")]
17266 UNSPEC_GATHER_PREFETCH)]
17267 "TARGET_AVX512PF"
17268 {
17269 switch (INTVAL (operands[4]))
17270 {
17271 case 3:
17272 /* %X5 so that we don't emit any *WORD PTR for -masm=intel, as
17273 gas changed what it requires incompatibly. */
17274 return "vgatherpf0<ssemodesuffix>ps\t{%5%{%0%}|%X5%{%0%}}";
17275 case 2:
17276 return "vgatherpf1<ssemodesuffix>ps\t{%5%{%0%}|%X5%{%0%}}";
17277 default:
17278 gcc_unreachable ();
17279 }
17280 }
17281 [(set_attr "type" "sse")
17282 (set_attr "prefix" "evex")
17283 (set_attr "mode" "XI")])
17284
17285 ;; Packed double variants
17286 (define_expand "avx512pf_gatherpf<mode>df"
17287 [(unspec
17288 [(match_operand:<avx512fmaskmode> 0 "register_operand")
17289 (mem:V8DF
17290 (match_par_dup 5
17291 [(match_operand 2 "vsib_address_operand")
17292 (match_operand:VI4_256_8_512 1 "register_operand")
17293 (match_operand:SI 3 "const1248_operand")]))
17294 (match_operand:SI 4 "const_2_to_3_operand")]
17295 UNSPEC_GATHER_PREFETCH)]
17296 "TARGET_AVX512PF"
17297 {
17298 operands[5]
17299 = gen_rtx_UNSPEC (Pmode, gen_rtvec (3, operands[2], operands[1],
17300 operands[3]), UNSPEC_VSIBADDR);
17301 })
17302
17303 (define_insn "*avx512pf_gatherpf<mode>df_mask"
17304 [(unspec
17305 [(match_operand:<avx512fmaskmode> 0 "register_operand" "Yk")
17306 (match_operator:V8DF 5 "vsib_mem_operator"
17307 [(unspec:P
17308 [(match_operand:P 2 "vsib_address_operand" "Tv")
17309 (match_operand:VI4_256_8_512 1 "register_operand" "v")
17310 (match_operand:SI 3 "const1248_operand" "n")]
17311 UNSPEC_VSIBADDR)])
17312 (match_operand:SI 4 "const_2_to_3_operand" "n")]
17313 UNSPEC_GATHER_PREFETCH)]
17314 "TARGET_AVX512PF"
17315 {
17316 switch (INTVAL (operands[4]))
17317 {
17318 case 3:
17319 /* %X5 so that we don't emit any *WORD PTR for -masm=intel, as
17320 gas changed what it requires incompatibly. */
17321 return "vgatherpf0<ssemodesuffix>pd\t{%5%{%0%}|%X5%{%0%}}";
17322 case 2:
17323 return "vgatherpf1<ssemodesuffix>pd\t{%5%{%0%}|%X5%{%0%}}";
17324 default:
17325 gcc_unreachable ();
17326 }
17327 }
17328 [(set_attr "type" "sse")
17329 (set_attr "prefix" "evex")
17330 (set_attr "mode" "XI")])
17331
17332 ;; Packed float variants
17333 (define_expand "avx512pf_scatterpf<mode>sf"
17334 [(unspec
17335 [(match_operand:<avx512fmaskmode> 0 "register_operand")
17336 (mem:<GATHER_SCATTER_SF_MEM_MODE>
17337 (match_par_dup 5
17338 [(match_operand 2 "vsib_address_operand")
17339 (match_operand:VI48_512 1 "register_operand")
17340 (match_operand:SI 3 "const1248_operand")]))
17341 (match_operand:SI 4 "const2367_operand")]
17342 UNSPEC_SCATTER_PREFETCH)]
17343 "TARGET_AVX512PF"
17344 {
17345 operands[5]
17346 = gen_rtx_UNSPEC (Pmode, gen_rtvec (3, operands[2], operands[1],
17347 operands[3]), UNSPEC_VSIBADDR);
17348 })
17349
17350 (define_insn "*avx512pf_scatterpf<mode>sf_mask"
17351 [(unspec
17352 [(match_operand:<avx512fmaskmode> 0 "register_operand" "Yk")
17353 (match_operator:<GATHER_SCATTER_SF_MEM_MODE> 5 "vsib_mem_operator"
17354 [(unspec:P
17355 [(match_operand:P 2 "vsib_address_operand" "Tv")
17356 (match_operand:VI48_512 1 "register_operand" "v")
17357 (match_operand:SI 3 "const1248_operand" "n")]
17358 UNSPEC_VSIBADDR)])
17359 (match_operand:SI 4 "const2367_operand" "n")]
17360 UNSPEC_SCATTER_PREFETCH)]
17361 "TARGET_AVX512PF"
17362 {
17363 switch (INTVAL (operands[4]))
17364 {
17365 case 3:
17366 case 7:
17367 /* %X5 so that we don't emit any *WORD PTR for -masm=intel, as
17368 gas changed what it requires incompatibly. */
17369 return "vscatterpf0<ssemodesuffix>ps\t{%5%{%0%}|%X5%{%0%}}";
17370 case 2:
17371 case 6:
17372 return "vscatterpf1<ssemodesuffix>ps\t{%5%{%0%}|%X5%{%0%}}";
17373 default:
17374 gcc_unreachable ();
17375 }
17376 }
17377 [(set_attr "type" "sse")
17378 (set_attr "prefix" "evex")
17379 (set_attr "mode" "XI")])
17380
17381 ;; Packed double variants
17382 (define_expand "avx512pf_scatterpf<mode>df"
17383 [(unspec
17384 [(match_operand:<avx512fmaskmode> 0 "register_operand")
17385 (mem:V8DF
17386 (match_par_dup 5
17387 [(match_operand 2 "vsib_address_operand")
17388 (match_operand:VI4_256_8_512 1 "register_operand")
17389 (match_operand:SI 3 "const1248_operand")]))
17390 (match_operand:SI 4 "const2367_operand")]
17391 UNSPEC_SCATTER_PREFETCH)]
17392 "TARGET_AVX512PF"
17393 {
17394 operands[5]
17395 = gen_rtx_UNSPEC (Pmode, gen_rtvec (3, operands[2], operands[1],
17396 operands[3]), UNSPEC_VSIBADDR);
17397 })
17398
17399 (define_insn "*avx512pf_scatterpf<mode>df_mask"
17400 [(unspec
17401 [(match_operand:<avx512fmaskmode> 0 "register_operand" "Yk")
17402 (match_operator:V8DF 5 "vsib_mem_operator"
17403 [(unspec:P
17404 [(match_operand:P 2 "vsib_address_operand" "Tv")
17405 (match_operand:VI4_256_8_512 1 "register_operand" "v")
17406 (match_operand:SI 3 "const1248_operand" "n")]
17407 UNSPEC_VSIBADDR)])
17408 (match_operand:SI 4 "const2367_operand" "n")]
17409 UNSPEC_SCATTER_PREFETCH)]
17410 "TARGET_AVX512PF"
17411 {
17412 switch (INTVAL (operands[4]))
17413 {
17414 case 3:
17415 case 7:
17416 /* %X5 so that we don't emit any *WORD PTR for -masm=intel, as
17417 gas changed what it requires incompatibly. */
17418 return "vscatterpf0<ssemodesuffix>pd\t{%5%{%0%}|%X5%{%0%}}";
17419 case 2:
17420 case 6:
17421 return "vscatterpf1<ssemodesuffix>pd\t{%5%{%0%}|%X5%{%0%}}";
17422 default:
17423 gcc_unreachable ();
17424 }
17425 }
17426 [(set_attr "type" "sse")
17427 (set_attr "prefix" "evex")
17428 (set_attr "mode" "XI")])
17429
17430 (define_insn "avx512er_exp2<mode><mask_name><round_saeonly_name>"
17431 [(set (match_operand:VF_512 0 "register_operand" "=v")
17432 (unspec:VF_512
17433 [(match_operand:VF_512 1 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")]
17434 UNSPEC_EXP2))]
17435 "TARGET_AVX512ER"
17436 "vexp2<ssemodesuffix>\t{<round_saeonly_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_saeonly_mask_op2>}"
17437 [(set_attr "prefix" "evex")
17438 (set_attr "type" "sse")
17439 (set_attr "mode" "<MODE>")])
17440
17441 (define_insn "<mask_codefor>avx512er_rcp28<mode><mask_name><round_saeonly_name>"
17442 [(set (match_operand:VF_512 0 "register_operand" "=v")
17443 (unspec:VF_512
17444 [(match_operand:VF_512 1 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")]
17445 UNSPEC_RCP28))]
17446 "TARGET_AVX512ER"
17447 "vrcp28<ssemodesuffix>\t{<round_saeonly_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_saeonly_mask_op2>}"
17448 [(set_attr "prefix" "evex")
17449 (set_attr "type" "sse")
17450 (set_attr "mode" "<MODE>")])
17451
17452 (define_insn "avx512er_vmrcp28<mode><round_saeonly_name>"
17453 [(set (match_operand:VF_128 0 "register_operand" "=v")
17454 (vec_merge:VF_128
17455 (unspec:VF_128
17456 [(match_operand:VF_128 1 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")]
17457 UNSPEC_RCP28)
17458 (match_operand:VF_128 2 "register_operand" "v")
17459 (const_int 1)))]
17460 "TARGET_AVX512ER"
17461 "vrcp28<ssescalarmodesuffix>\t{<round_saeonly_op3>%1, %2, %0|%0, %2, %<iptr>1<round_saeonly_op3>}"
17462 [(set_attr "length_immediate" "1")
17463 (set_attr "prefix" "evex")
17464 (set_attr "type" "sse")
17465 (set_attr "mode" "<MODE>")])
17466
17467 (define_insn "<mask_codefor>avx512er_rsqrt28<mode><mask_name><round_saeonly_name>"
17468 [(set (match_operand:VF_512 0 "register_operand" "=v")
17469 (unspec:VF_512
17470 [(match_operand:VF_512 1 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")]
17471 UNSPEC_RSQRT28))]
17472 "TARGET_AVX512ER"
17473 "vrsqrt28<ssemodesuffix>\t{<round_saeonly_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_saeonly_mask_op2>}"
17474 [(set_attr "prefix" "evex")
17475 (set_attr "type" "sse")
17476 (set_attr "mode" "<MODE>")])
17477
17478 (define_insn "avx512er_vmrsqrt28<mode><round_saeonly_name>"
17479 [(set (match_operand:VF_128 0 "register_operand" "=v")
17480 (vec_merge:VF_128
17481 (unspec:VF_128
17482 [(match_operand:VF_128 1 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")]
17483 UNSPEC_RSQRT28)
17484 (match_operand:VF_128 2 "register_operand" "v")
17485 (const_int 1)))]
17486 "TARGET_AVX512ER"
17487 "vrsqrt28<ssescalarmodesuffix>\t{<round_saeonly_op3>%1, %2, %0|%0, %2, %<iptr>1<round_saeonly_op3>}"
17488 [(set_attr "length_immediate" "1")
17489 (set_attr "type" "sse")
17490 (set_attr "prefix" "evex")
17491 (set_attr "mode" "<MODE>")])
17492
17493 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
17494 ;;
17495 ;; XOP instructions
17496 ;;
17497 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
17498
17499 (define_code_iterator xop_plus [plus ss_plus])
17500
17501 (define_code_attr macs [(plus "macs") (ss_plus "macss")])
17502 (define_code_attr madcs [(plus "madcs") (ss_plus "madcss")])
17503
17504 ;; XOP parallel integer multiply/add instructions.
17505
17506 (define_insn "xop_p<macs><ssemodesuffix><ssemodesuffix>"
17507 [(set (match_operand:VI24_128 0 "register_operand" "=x")
17508 (xop_plus:VI24_128
17509 (mult:VI24_128
17510 (match_operand:VI24_128 1 "nonimmediate_operand" "%x")
17511 (match_operand:VI24_128 2 "nonimmediate_operand" "xm"))
17512 (match_operand:VI24_128 3 "register_operand" "x")))]
17513 "TARGET_XOP"
17514 "vp<macs><ssemodesuffix><ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
17515 [(set_attr "type" "ssemuladd")
17516 (set_attr "mode" "TI")])
17517
17518 (define_insn "xop_p<macs>dql"
17519 [(set (match_operand:V2DI 0 "register_operand" "=x")
17520 (xop_plus:V2DI
17521 (mult:V2DI
17522 (sign_extend:V2DI
17523 (vec_select:V2SI
17524 (match_operand:V4SI 1 "nonimmediate_operand" "%x")
17525 (parallel [(const_int 0) (const_int 2)])))
17526 (sign_extend:V2DI
17527 (vec_select:V2SI
17528 (match_operand:V4SI 2 "nonimmediate_operand" "xm")
17529 (parallel [(const_int 0) (const_int 2)]))))
17530 (match_operand:V2DI 3 "register_operand" "x")))]
17531 "TARGET_XOP"
17532 "vp<macs>dql\t{%3, %2, %1, %0|%0, %1, %2, %3}"
17533 [(set_attr "type" "ssemuladd")
17534 (set_attr "mode" "TI")])
17535
17536 (define_insn "xop_p<macs>dqh"
17537 [(set (match_operand:V2DI 0 "register_operand" "=x")
17538 (xop_plus:V2DI
17539 (mult:V2DI
17540 (sign_extend:V2DI
17541 (vec_select:V2SI
17542 (match_operand:V4SI 1 "nonimmediate_operand" "%x")
17543 (parallel [(const_int 1) (const_int 3)])))
17544 (sign_extend:V2DI
17545 (vec_select:V2SI
17546 (match_operand:V4SI 2 "nonimmediate_operand" "xm")
17547 (parallel [(const_int 1) (const_int 3)]))))
17548 (match_operand:V2DI 3 "register_operand" "x")))]
17549 "TARGET_XOP"
17550 "vp<macs>dqh\t{%3, %2, %1, %0|%0, %1, %2, %3}"
17551 [(set_attr "type" "ssemuladd")
17552 (set_attr "mode" "TI")])
17553
17554 ;; XOP parallel integer multiply/add instructions for the intrinisics
17555 (define_insn "xop_p<macs>wd"
17556 [(set (match_operand:V4SI 0 "register_operand" "=x")
17557 (xop_plus:V4SI
17558 (mult:V4SI
17559 (sign_extend:V4SI
17560 (vec_select:V4HI
17561 (match_operand:V8HI 1 "nonimmediate_operand" "%x")
17562 (parallel [(const_int 1) (const_int 3)
17563 (const_int 5) (const_int 7)])))
17564 (sign_extend:V4SI
17565 (vec_select:V4HI
17566 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
17567 (parallel [(const_int 1) (const_int 3)
17568 (const_int 5) (const_int 7)]))))
17569 (match_operand:V4SI 3 "register_operand" "x")))]
17570 "TARGET_XOP"
17571 "vp<macs>wd\t{%3, %2, %1, %0|%0, %1, %2, %3}"
17572 [(set_attr "type" "ssemuladd")
17573 (set_attr "mode" "TI")])
17574
17575 (define_insn "xop_p<madcs>wd"
17576 [(set (match_operand:V4SI 0 "register_operand" "=x")
17577 (xop_plus:V4SI
17578 (plus:V4SI
17579 (mult:V4SI
17580 (sign_extend:V4SI
17581 (vec_select:V4HI
17582 (match_operand:V8HI 1 "nonimmediate_operand" "%x")
17583 (parallel [(const_int 0) (const_int 2)
17584 (const_int 4) (const_int 6)])))
17585 (sign_extend:V4SI
17586 (vec_select:V4HI
17587 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
17588 (parallel [(const_int 0) (const_int 2)
17589 (const_int 4) (const_int 6)]))))
17590 (mult:V4SI
17591 (sign_extend:V4SI
17592 (vec_select:V4HI
17593 (match_dup 1)
17594 (parallel [(const_int 1) (const_int 3)
17595 (const_int 5) (const_int 7)])))
17596 (sign_extend:V4SI
17597 (vec_select:V4HI
17598 (match_dup 2)
17599 (parallel [(const_int 1) (const_int 3)
17600 (const_int 5) (const_int 7)])))))
17601 (match_operand:V4SI 3 "register_operand" "x")))]
17602 "TARGET_XOP"
17603 "vp<madcs>wd\t{%3, %2, %1, %0|%0, %1, %2, %3}"
17604 [(set_attr "type" "ssemuladd")
17605 (set_attr "mode" "TI")])
17606
17607 ;; XOP parallel XMM conditional moves
17608 (define_insn "xop_pcmov_<mode><avxsizesuffix>"
17609 [(set (match_operand:V_128_256 0 "register_operand" "=x,x")
17610 (if_then_else:V_128_256
17611 (match_operand:V_128_256 3 "nonimmediate_operand" "x,m")
17612 (match_operand:V_128_256 1 "register_operand" "x,x")
17613 (match_operand:V_128_256 2 "nonimmediate_operand" "xm,x")))]
17614 "TARGET_XOP"
17615 "vpcmov\t{%3, %2, %1, %0|%0, %1, %2, %3}"
17616 [(set_attr "type" "sse4arg")])
17617
17618 ;; XOP horizontal add/subtract instructions
17619 (define_insn "xop_phadd<u>bw"
17620 [(set (match_operand:V8HI 0 "register_operand" "=x")
17621 (plus:V8HI
17622 (any_extend:V8HI
17623 (vec_select:V8QI
17624 (match_operand:V16QI 1 "nonimmediate_operand" "xm")
17625 (parallel [(const_int 0) (const_int 2)
17626 (const_int 4) (const_int 6)
17627 (const_int 8) (const_int 10)
17628 (const_int 12) (const_int 14)])))
17629 (any_extend:V8HI
17630 (vec_select:V8QI
17631 (match_dup 1)
17632 (parallel [(const_int 1) (const_int 3)
17633 (const_int 5) (const_int 7)
17634 (const_int 9) (const_int 11)
17635 (const_int 13) (const_int 15)])))))]
17636 "TARGET_XOP"
17637 "vphadd<u>bw\t{%1, %0|%0, %1}"
17638 [(set_attr "type" "sseiadd1")])
17639
17640 (define_insn "xop_phadd<u>bd"
17641 [(set (match_operand:V4SI 0 "register_operand" "=x")
17642 (plus:V4SI
17643 (plus:V4SI
17644 (any_extend:V4SI
17645 (vec_select:V4QI
17646 (match_operand:V16QI 1 "nonimmediate_operand" "xm")
17647 (parallel [(const_int 0) (const_int 4)
17648 (const_int 8) (const_int 12)])))
17649 (any_extend:V4SI
17650 (vec_select:V4QI
17651 (match_dup 1)
17652 (parallel [(const_int 1) (const_int 5)
17653 (const_int 9) (const_int 13)]))))
17654 (plus:V4SI
17655 (any_extend:V4SI
17656 (vec_select:V4QI
17657 (match_dup 1)
17658 (parallel [(const_int 2) (const_int 6)
17659 (const_int 10) (const_int 14)])))
17660 (any_extend:V4SI
17661 (vec_select:V4QI
17662 (match_dup 1)
17663 (parallel [(const_int 3) (const_int 7)
17664 (const_int 11) (const_int 15)]))))))]
17665 "TARGET_XOP"
17666 "vphadd<u>bd\t{%1, %0|%0, %1}"
17667 [(set_attr "type" "sseiadd1")])
17668
17669 (define_insn "xop_phadd<u>bq"
17670 [(set (match_operand:V2DI 0 "register_operand" "=x")
17671 (plus:V2DI
17672 (plus:V2DI
17673 (plus:V2DI
17674 (any_extend:V2DI
17675 (vec_select:V2QI
17676 (match_operand:V16QI 1 "nonimmediate_operand" "xm")
17677 (parallel [(const_int 0) (const_int 8)])))
17678 (any_extend:V2DI
17679 (vec_select:V2QI
17680 (match_dup 1)
17681 (parallel [(const_int 1) (const_int 9)]))))
17682 (plus:V2DI
17683 (any_extend:V2DI
17684 (vec_select:V2QI
17685 (match_dup 1)
17686 (parallel [(const_int 2) (const_int 10)])))
17687 (any_extend:V2DI
17688 (vec_select:V2QI
17689 (match_dup 1)
17690 (parallel [(const_int 3) (const_int 11)])))))
17691 (plus:V2DI
17692 (plus:V2DI
17693 (any_extend:V2DI
17694 (vec_select:V2QI
17695 (match_dup 1)
17696 (parallel [(const_int 4) (const_int 12)])))
17697 (any_extend:V2DI
17698 (vec_select:V2QI
17699 (match_dup 1)
17700 (parallel [(const_int 5) (const_int 13)]))))
17701 (plus:V2DI
17702 (any_extend:V2DI
17703 (vec_select:V2QI
17704 (match_dup 1)
17705 (parallel [(const_int 6) (const_int 14)])))
17706 (any_extend:V2DI
17707 (vec_select:V2QI
17708 (match_dup 1)
17709 (parallel [(const_int 7) (const_int 15)])))))))]
17710 "TARGET_XOP"
17711 "vphadd<u>bq\t{%1, %0|%0, %1}"
17712 [(set_attr "type" "sseiadd1")])
17713
17714 (define_insn "xop_phadd<u>wd"
17715 [(set (match_operand:V4SI 0 "register_operand" "=x")
17716 (plus:V4SI
17717 (any_extend:V4SI
17718 (vec_select:V4HI
17719 (match_operand:V8HI 1 "nonimmediate_operand" "xm")
17720 (parallel [(const_int 0) (const_int 2)
17721 (const_int 4) (const_int 6)])))
17722 (any_extend:V4SI
17723 (vec_select:V4HI
17724 (match_dup 1)
17725 (parallel [(const_int 1) (const_int 3)
17726 (const_int 5) (const_int 7)])))))]
17727 "TARGET_XOP"
17728 "vphadd<u>wd\t{%1, %0|%0, %1}"
17729 [(set_attr "type" "sseiadd1")])
17730
17731 (define_insn "xop_phadd<u>wq"
17732 [(set (match_operand:V2DI 0 "register_operand" "=x")
17733 (plus:V2DI
17734 (plus:V2DI
17735 (any_extend:V2DI
17736 (vec_select:V2HI
17737 (match_operand:V8HI 1 "nonimmediate_operand" "xm")
17738 (parallel [(const_int 0) (const_int 4)])))
17739 (any_extend:V2DI
17740 (vec_select:V2HI
17741 (match_dup 1)
17742 (parallel [(const_int 1) (const_int 5)]))))
17743 (plus:V2DI
17744 (any_extend:V2DI
17745 (vec_select:V2HI
17746 (match_dup 1)
17747 (parallel [(const_int 2) (const_int 6)])))
17748 (any_extend:V2DI
17749 (vec_select:V2HI
17750 (match_dup 1)
17751 (parallel [(const_int 3) (const_int 7)]))))))]
17752 "TARGET_XOP"
17753 "vphadd<u>wq\t{%1, %0|%0, %1}"
17754 [(set_attr "type" "sseiadd1")])
17755
17756 (define_insn "xop_phadd<u>dq"
17757 [(set (match_operand:V2DI 0 "register_operand" "=x")
17758 (plus:V2DI
17759 (any_extend:V2DI
17760 (vec_select:V2SI
17761 (match_operand:V4SI 1 "nonimmediate_operand" "xm")
17762 (parallel [(const_int 0) (const_int 2)])))
17763 (any_extend:V2DI
17764 (vec_select:V2SI
17765 (match_dup 1)
17766 (parallel [(const_int 1) (const_int 3)])))))]
17767 "TARGET_XOP"
17768 "vphadd<u>dq\t{%1, %0|%0, %1}"
17769 [(set_attr "type" "sseiadd1")])
17770
17771 (define_insn "xop_phsubbw"
17772 [(set (match_operand:V8HI 0 "register_operand" "=x")
17773 (minus:V8HI
17774 (sign_extend:V8HI
17775 (vec_select:V8QI
17776 (match_operand:V16QI 1 "nonimmediate_operand" "xm")
17777 (parallel [(const_int 0) (const_int 2)
17778 (const_int 4) (const_int 6)
17779 (const_int 8) (const_int 10)
17780 (const_int 12) (const_int 14)])))
17781 (sign_extend:V8HI
17782 (vec_select:V8QI
17783 (match_dup 1)
17784 (parallel [(const_int 1) (const_int 3)
17785 (const_int 5) (const_int 7)
17786 (const_int 9) (const_int 11)
17787 (const_int 13) (const_int 15)])))))]
17788 "TARGET_XOP"
17789 "vphsubbw\t{%1, %0|%0, %1}"
17790 [(set_attr "type" "sseiadd1")])
17791
17792 (define_insn "xop_phsubwd"
17793 [(set (match_operand:V4SI 0 "register_operand" "=x")
17794 (minus:V4SI
17795 (sign_extend:V4SI
17796 (vec_select:V4HI
17797 (match_operand:V8HI 1 "nonimmediate_operand" "xm")
17798 (parallel [(const_int 0) (const_int 2)
17799 (const_int 4) (const_int 6)])))
17800 (sign_extend:V4SI
17801 (vec_select:V4HI
17802 (match_dup 1)
17803 (parallel [(const_int 1) (const_int 3)
17804 (const_int 5) (const_int 7)])))))]
17805 "TARGET_XOP"
17806 "vphsubwd\t{%1, %0|%0, %1}"
17807 [(set_attr "type" "sseiadd1")])
17808
17809 (define_insn "xop_phsubdq"
17810 [(set (match_operand:V2DI 0 "register_operand" "=x")
17811 (minus:V2DI
17812 (sign_extend:V2DI
17813 (vec_select:V2SI
17814 (match_operand:V4SI 1 "nonimmediate_operand" "xm")
17815 (parallel [(const_int 0) (const_int 2)])))
17816 (sign_extend:V2DI
17817 (vec_select:V2SI
17818 (match_dup 1)
17819 (parallel [(const_int 1) (const_int 3)])))))]
17820 "TARGET_XOP"
17821 "vphsubdq\t{%1, %0|%0, %1}"
17822 [(set_attr "type" "sseiadd1")])
17823
17824 ;; XOP permute instructions
17825 (define_insn "xop_pperm"
17826 [(set (match_operand:V16QI 0 "register_operand" "=x,x")
17827 (unspec:V16QI
17828 [(match_operand:V16QI 1 "register_operand" "x,x")
17829 (match_operand:V16QI 2 "nonimmediate_operand" "x,m")
17830 (match_operand:V16QI 3 "nonimmediate_operand" "xm,x")]
17831 UNSPEC_XOP_PERMUTE))]
17832 "TARGET_XOP && !(MEM_P (operands[2]) && MEM_P (operands[3]))"
17833 "vpperm\t{%3, %2, %1, %0|%0, %1, %2, %3}"
17834 [(set_attr "type" "sse4arg")
17835 (set_attr "mode" "TI")])
17836
17837 ;; XOP pack instructions that combine two vectors into a smaller vector
17838 (define_insn "xop_pperm_pack_v2di_v4si"
17839 [(set (match_operand:V4SI 0 "register_operand" "=x,x")
17840 (vec_concat:V4SI
17841 (truncate:V2SI
17842 (match_operand:V2DI 1 "register_operand" "x,x"))
17843 (truncate:V2SI
17844 (match_operand:V2DI 2 "nonimmediate_operand" "x,m"))))
17845 (use (match_operand:V16QI 3 "nonimmediate_operand" "xm,x"))]
17846 "TARGET_XOP && !(MEM_P (operands[2]) && MEM_P (operands[3]))"
17847 "vpperm\t{%3, %2, %1, %0|%0, %1, %2, %3}"
17848 [(set_attr "type" "sse4arg")
17849 (set_attr "mode" "TI")])
17850
17851 (define_insn "xop_pperm_pack_v4si_v8hi"
17852 [(set (match_operand:V8HI 0 "register_operand" "=x,x")
17853 (vec_concat:V8HI
17854 (truncate:V4HI
17855 (match_operand:V4SI 1 "register_operand" "x,x"))
17856 (truncate:V4HI
17857 (match_operand:V4SI 2 "nonimmediate_operand" "x,m"))))
17858 (use (match_operand:V16QI 3 "nonimmediate_operand" "xm,x"))]
17859 "TARGET_XOP && !(MEM_P (operands[2]) && MEM_P (operands[3]))"
17860 "vpperm\t{%3, %2, %1, %0|%0, %1, %2, %3}"
17861 [(set_attr "type" "sse4arg")
17862 (set_attr "mode" "TI")])
17863
17864 (define_insn "xop_pperm_pack_v8hi_v16qi"
17865 [(set (match_operand:V16QI 0 "register_operand" "=x,x")
17866 (vec_concat:V16QI
17867 (truncate:V8QI
17868 (match_operand:V8HI 1 "register_operand" "x,x"))
17869 (truncate:V8QI
17870 (match_operand:V8HI 2 "nonimmediate_operand" "x,m"))))
17871 (use (match_operand:V16QI 3 "nonimmediate_operand" "xm,x"))]
17872 "TARGET_XOP && !(MEM_P (operands[2]) && MEM_P (operands[3]))"
17873 "vpperm\t{%3, %2, %1, %0|%0, %1, %2, %3}"
17874 [(set_attr "type" "sse4arg")
17875 (set_attr "mode" "TI")])
17876
17877 ;; XOP packed rotate instructions
17878 (define_expand "rotl<mode>3"
17879 [(set (match_operand:VI_128 0 "register_operand")
17880 (rotate:VI_128
17881 (match_operand:VI_128 1 "nonimmediate_operand")
17882 (match_operand:SI 2 "general_operand")))]
17883 "TARGET_XOP"
17884 {
17885 /* If we were given a scalar, convert it to parallel */
17886 if (! const_0_to_<sserotatemax>_operand (operands[2], SImode))
17887 {
17888 rtvec vs = rtvec_alloc (<ssescalarnum>);
17889 rtx par = gen_rtx_PARALLEL (<MODE>mode, vs);
17890 rtx reg = gen_reg_rtx (<MODE>mode);
17891 rtx op2 = operands[2];
17892 int i;
17893
17894 if (GET_MODE (op2) != <ssescalarmode>mode)
17895 {
17896 op2 = gen_reg_rtx (<ssescalarmode>mode);
17897 convert_move (op2, operands[2], false);
17898 }
17899
17900 for (i = 0; i < <ssescalarnum>; i++)
17901 RTVEC_ELT (vs, i) = op2;
17902
17903 emit_insn (gen_vec_init<mode><ssescalarmodelower> (reg, par));
17904 emit_insn (gen_xop_vrotl<mode>3 (operands[0], operands[1], reg));
17905 DONE;
17906 }
17907 })
17908
17909 (define_expand "rotr<mode>3"
17910 [(set (match_operand:VI_128 0 "register_operand")
17911 (rotatert:VI_128
17912 (match_operand:VI_128 1 "nonimmediate_operand")
17913 (match_operand:SI 2 "general_operand")))]
17914 "TARGET_XOP"
17915 {
17916 /* If we were given a scalar, convert it to parallel */
17917 if (! const_0_to_<sserotatemax>_operand (operands[2], SImode))
17918 {
17919 rtvec vs = rtvec_alloc (<ssescalarnum>);
17920 rtx par = gen_rtx_PARALLEL (<MODE>mode, vs);
17921 rtx neg = gen_reg_rtx (<MODE>mode);
17922 rtx reg = gen_reg_rtx (<MODE>mode);
17923 rtx op2 = operands[2];
17924 int i;
17925
17926 if (GET_MODE (op2) != <ssescalarmode>mode)
17927 {
17928 op2 = gen_reg_rtx (<ssescalarmode>mode);
17929 convert_move (op2, operands[2], false);
17930 }
17931
17932 for (i = 0; i < <ssescalarnum>; i++)
17933 RTVEC_ELT (vs, i) = op2;
17934
17935 emit_insn (gen_vec_init<mode><ssescalarmodelower> (reg, par));
17936 emit_insn (gen_neg<mode>2 (neg, reg));
17937 emit_insn (gen_xop_vrotl<mode>3 (operands[0], operands[1], neg));
17938 DONE;
17939 }
17940 })
17941
17942 (define_insn "xop_rotl<mode>3"
17943 [(set (match_operand:VI_128 0 "register_operand" "=x")
17944 (rotate:VI_128
17945 (match_operand:VI_128 1 "nonimmediate_operand" "xm")
17946 (match_operand:SI 2 "const_0_to_<sserotatemax>_operand" "n")))]
17947 "TARGET_XOP"
17948 "vprot<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
17949 [(set_attr "type" "sseishft")
17950 (set_attr "length_immediate" "1")
17951 (set_attr "mode" "TI")])
17952
17953 (define_insn "xop_rotr<mode>3"
17954 [(set (match_operand:VI_128 0 "register_operand" "=x")
17955 (rotatert:VI_128
17956 (match_operand:VI_128 1 "nonimmediate_operand" "xm")
17957 (match_operand:SI 2 "const_0_to_<sserotatemax>_operand" "n")))]
17958 "TARGET_XOP"
17959 {
17960 operands[3]
17961 = GEN_INT (GET_MODE_BITSIZE (<ssescalarmode>mode) - INTVAL (operands[2]));
17962 return \"vprot<ssemodesuffix>\t{%3, %1, %0|%0, %1, %3}\";
17963 }
17964 [(set_attr "type" "sseishft")
17965 (set_attr "length_immediate" "1")
17966 (set_attr "mode" "TI")])
17967
17968 (define_expand "vrotr<mode>3"
17969 [(match_operand:VI_128 0 "register_operand")
17970 (match_operand:VI_128 1 "register_operand")
17971 (match_operand:VI_128 2 "register_operand")]
17972 "TARGET_XOP"
17973 {
17974 rtx reg = gen_reg_rtx (<MODE>mode);
17975 emit_insn (gen_neg<mode>2 (reg, operands[2]));
17976 emit_insn (gen_xop_vrotl<mode>3 (operands[0], operands[1], reg));
17977 DONE;
17978 })
17979
17980 (define_expand "vrotl<mode>3"
17981 [(match_operand:VI_128 0 "register_operand")
17982 (match_operand:VI_128 1 "register_operand")
17983 (match_operand:VI_128 2 "register_operand")]
17984 "TARGET_XOP"
17985 {
17986 emit_insn (gen_xop_vrotl<mode>3 (operands[0], operands[1], operands[2]));
17987 DONE;
17988 })
17989
17990 (define_insn "xop_vrotl<mode>3"
17991 [(set (match_operand:VI_128 0 "register_operand" "=x,x")
17992 (if_then_else:VI_128
17993 (ge:VI_128
17994 (match_operand:VI_128 2 "nonimmediate_operand" "x,m")
17995 (const_int 0))
17996 (rotate:VI_128
17997 (match_operand:VI_128 1 "nonimmediate_operand" "xm,x")
17998 (match_dup 2))
17999 (rotatert:VI_128
18000 (match_dup 1)
18001 (neg:VI_128 (match_dup 2)))))]
18002 "TARGET_XOP && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
18003 "vprot<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
18004 [(set_attr "type" "sseishft")
18005 (set_attr "prefix_data16" "0")
18006 (set_attr "prefix_extra" "2")
18007 (set_attr "mode" "TI")])
18008
18009 ;; XOP packed shift instructions.
18010 (define_expand "vlshr<mode>3"
18011 [(set (match_operand:VI12_128 0 "register_operand")
18012 (lshiftrt:VI12_128
18013 (match_operand:VI12_128 1 "register_operand")
18014 (match_operand:VI12_128 2 "nonimmediate_operand")))]
18015 "TARGET_XOP"
18016 {
18017 rtx neg = gen_reg_rtx (<MODE>mode);
18018 emit_insn (gen_neg<mode>2 (neg, operands[2]));
18019 emit_insn (gen_xop_shl<mode>3 (operands[0], operands[1], neg));
18020 DONE;
18021 })
18022
18023 (define_expand "vlshr<mode>3"
18024 [(set (match_operand:VI48_128 0 "register_operand")
18025 (lshiftrt:VI48_128
18026 (match_operand:VI48_128 1 "register_operand")
18027 (match_operand:VI48_128 2 "nonimmediate_operand")))]
18028 "TARGET_AVX2 || TARGET_XOP"
18029 {
18030 if (!TARGET_AVX2)
18031 {
18032 rtx neg = gen_reg_rtx (<MODE>mode);
18033 emit_insn (gen_neg<mode>2 (neg, operands[2]));
18034 emit_insn (gen_xop_shl<mode>3 (operands[0], operands[1], neg));
18035 DONE;
18036 }
18037 })
18038
18039 (define_expand "vlshr<mode>3"
18040 [(set (match_operand:VI48_512 0 "register_operand")
18041 (lshiftrt:VI48_512
18042 (match_operand:VI48_512 1 "register_operand")
18043 (match_operand:VI48_512 2 "nonimmediate_operand")))]
18044 "TARGET_AVX512F")
18045
18046 (define_expand "vlshr<mode>3"
18047 [(set (match_operand:VI48_256 0 "register_operand")
18048 (lshiftrt:VI48_256
18049 (match_operand:VI48_256 1 "register_operand")
18050 (match_operand:VI48_256 2 "nonimmediate_operand")))]
18051 "TARGET_AVX2")
18052
18053 (define_expand "vashrv8hi3<mask_name>"
18054 [(set (match_operand:V8HI 0 "register_operand")
18055 (ashiftrt:V8HI
18056 (match_operand:V8HI 1 "register_operand")
18057 (match_operand:V8HI 2 "nonimmediate_operand")))]
18058 "TARGET_XOP || (TARGET_AVX512BW && TARGET_AVX512VL)"
18059 {
18060 if (TARGET_XOP)
18061 {
18062 rtx neg = gen_reg_rtx (V8HImode);
18063 emit_insn (gen_negv8hi2 (neg, operands[2]));
18064 emit_insn (gen_xop_shav8hi3 (operands[0], operands[1], neg));
18065 DONE;
18066 }
18067 })
18068
18069 (define_expand "vashrv16qi3"
18070 [(set (match_operand:V16QI 0 "register_operand")
18071 (ashiftrt:V16QI
18072 (match_operand:V16QI 1 "register_operand")
18073 (match_operand:V16QI 2 "nonimmediate_operand")))]
18074 "TARGET_XOP"
18075 {
18076 rtx neg = gen_reg_rtx (V16QImode);
18077 emit_insn (gen_negv16qi2 (neg, operands[2]));
18078 emit_insn (gen_xop_shav16qi3 (operands[0], operands[1], neg));
18079 DONE;
18080 })
18081
18082 (define_expand "vashrv2di3<mask_name>"
18083 [(set (match_operand:V2DI 0 "register_operand")
18084 (ashiftrt:V2DI
18085 (match_operand:V2DI 1 "register_operand")
18086 (match_operand:V2DI 2 "nonimmediate_operand")))]
18087 "TARGET_XOP || TARGET_AVX512VL"
18088 {
18089 if (TARGET_XOP)
18090 {
18091 rtx neg = gen_reg_rtx (V2DImode);
18092 emit_insn (gen_negv2di2 (neg, operands[2]));
18093 emit_insn (gen_xop_shav2di3 (operands[0], operands[1], neg));
18094 DONE;
18095 }
18096 })
18097
18098 (define_expand "vashrv4si3"
18099 [(set (match_operand:V4SI 0 "register_operand")
18100 (ashiftrt:V4SI (match_operand:V4SI 1 "register_operand")
18101 (match_operand:V4SI 2 "nonimmediate_operand")))]
18102 "TARGET_AVX2 || TARGET_XOP"
18103 {
18104 if (!TARGET_AVX2)
18105 {
18106 rtx neg = gen_reg_rtx (V4SImode);
18107 emit_insn (gen_negv4si2 (neg, operands[2]));
18108 emit_insn (gen_xop_shav4si3 (operands[0], operands[1], neg));
18109 DONE;
18110 }
18111 })
18112
18113 (define_expand "vashrv16si3"
18114 [(set (match_operand:V16SI 0 "register_operand")
18115 (ashiftrt:V16SI (match_operand:V16SI 1 "register_operand")
18116 (match_operand:V16SI 2 "nonimmediate_operand")))]
18117 "TARGET_AVX512F")
18118
18119 (define_expand "vashrv8si3"
18120 [(set (match_operand:V8SI 0 "register_operand")
18121 (ashiftrt:V8SI (match_operand:V8SI 1 "register_operand")
18122 (match_operand:V8SI 2 "nonimmediate_operand")))]
18123 "TARGET_AVX2")
18124
18125 (define_expand "vashl<mode>3"
18126 [(set (match_operand:VI12_128 0 "register_operand")
18127 (ashift:VI12_128
18128 (match_operand:VI12_128 1 "register_operand")
18129 (match_operand:VI12_128 2 "nonimmediate_operand")))]
18130 "TARGET_XOP"
18131 {
18132 emit_insn (gen_xop_sha<mode>3 (operands[0], operands[1], operands[2]));
18133 DONE;
18134 })
18135
18136 (define_expand "vashl<mode>3"
18137 [(set (match_operand:VI48_128 0 "register_operand")
18138 (ashift:VI48_128
18139 (match_operand:VI48_128 1 "register_operand")
18140 (match_operand:VI48_128 2 "nonimmediate_operand")))]
18141 "TARGET_AVX2 || TARGET_XOP"
18142 {
18143 if (!TARGET_AVX2)
18144 {
18145 operands[2] = force_reg (<MODE>mode, operands[2]);
18146 emit_insn (gen_xop_sha<mode>3 (operands[0], operands[1], operands[2]));
18147 DONE;
18148 }
18149 })
18150
18151 (define_expand "vashl<mode>3"
18152 [(set (match_operand:VI48_512 0 "register_operand")
18153 (ashift:VI48_512
18154 (match_operand:VI48_512 1 "register_operand")
18155 (match_operand:VI48_512 2 "nonimmediate_operand")))]
18156 "TARGET_AVX512F")
18157
18158 (define_expand "vashl<mode>3"
18159 [(set (match_operand:VI48_256 0 "register_operand")
18160 (ashift:VI48_256
18161 (match_operand:VI48_256 1 "register_operand")
18162 (match_operand:VI48_256 2 "nonimmediate_operand")))]
18163 "TARGET_AVX2")
18164
18165 (define_insn "xop_sha<mode>3"
18166 [(set (match_operand:VI_128 0 "register_operand" "=x,x")
18167 (if_then_else:VI_128
18168 (ge:VI_128
18169 (match_operand:VI_128 2 "nonimmediate_operand" "x,m")
18170 (const_int 0))
18171 (ashift:VI_128
18172 (match_operand:VI_128 1 "nonimmediate_operand" "xm,x")
18173 (match_dup 2))
18174 (ashiftrt:VI_128
18175 (match_dup 1)
18176 (neg:VI_128 (match_dup 2)))))]
18177 "TARGET_XOP && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
18178 "vpsha<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
18179 [(set_attr "type" "sseishft")
18180 (set_attr "prefix_data16" "0")
18181 (set_attr "prefix_extra" "2")
18182 (set_attr "mode" "TI")])
18183
18184 (define_insn "xop_shl<mode>3"
18185 [(set (match_operand:VI_128 0 "register_operand" "=x,x")
18186 (if_then_else:VI_128
18187 (ge:VI_128
18188 (match_operand:VI_128 2 "nonimmediate_operand" "x,m")
18189 (const_int 0))
18190 (ashift:VI_128
18191 (match_operand:VI_128 1 "nonimmediate_operand" "xm,x")
18192 (match_dup 2))
18193 (lshiftrt:VI_128
18194 (match_dup 1)
18195 (neg:VI_128 (match_dup 2)))))]
18196 "TARGET_XOP && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
18197 "vpshl<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
18198 [(set_attr "type" "sseishft")
18199 (set_attr "prefix_data16" "0")
18200 (set_attr "prefix_extra" "2")
18201 (set_attr "mode" "TI")])
18202
18203 (define_expand "<shift_insn><mode>3"
18204 [(set (match_operand:VI1_AVX512 0 "register_operand")
18205 (any_shift:VI1_AVX512
18206 (match_operand:VI1_AVX512 1 "register_operand")
18207 (match_operand:SI 2 "nonmemory_operand")))]
18208 "TARGET_SSE2"
18209 {
18210 if (TARGET_XOP && <MODE>mode == V16QImode)
18211 {
18212 bool negate = false;
18213 rtx (*gen) (rtx, rtx, rtx);
18214 rtx tmp, par;
18215 int i;
18216
18217 if (<CODE> != ASHIFT)
18218 {
18219 if (CONST_INT_P (operands[2]))
18220 operands[2] = GEN_INT (-INTVAL (operands[2]));
18221 else
18222 negate = true;
18223 }
18224 par = gen_rtx_PARALLEL (V16QImode, rtvec_alloc (16));
18225 for (i = 0; i < 16; i++)
18226 XVECEXP (par, 0, i) = operands[2];
18227
18228 tmp = gen_reg_rtx (V16QImode);
18229 emit_insn (gen_vec_initv16qiqi (tmp, par));
18230
18231 if (negate)
18232 emit_insn (gen_negv16qi2 (tmp, tmp));
18233
18234 gen = (<CODE> == LSHIFTRT ? gen_xop_shlv16qi3 : gen_xop_shav16qi3);
18235 emit_insn (gen (operands[0], operands[1], tmp));
18236 }
18237 else
18238 ix86_expand_vecop_qihi (<CODE>, operands[0], operands[1], operands[2]);
18239 DONE;
18240 })
18241
18242 (define_expand "ashrv2di3"
18243 [(set (match_operand:V2DI 0 "register_operand")
18244 (ashiftrt:V2DI
18245 (match_operand:V2DI 1 "register_operand")
18246 (match_operand:DI 2 "nonmemory_operand")))]
18247 "TARGET_XOP || TARGET_AVX512VL"
18248 {
18249 if (!TARGET_AVX512VL)
18250 {
18251 rtx reg = gen_reg_rtx (V2DImode);
18252 rtx par;
18253 bool negate = false;
18254 int i;
18255
18256 if (CONST_INT_P (operands[2]))
18257 operands[2] = GEN_INT (-INTVAL (operands[2]));
18258 else
18259 negate = true;
18260
18261 par = gen_rtx_PARALLEL (V2DImode, rtvec_alloc (2));
18262 for (i = 0; i < 2; i++)
18263 XVECEXP (par, 0, i) = operands[2];
18264
18265 emit_insn (gen_vec_initv2didi (reg, par));
18266
18267 if (negate)
18268 emit_insn (gen_negv2di2 (reg, reg));
18269
18270 emit_insn (gen_xop_shav2di3 (operands[0], operands[1], reg));
18271 DONE;
18272 }
18273 })
18274
18275 ;; XOP FRCZ support
18276 (define_insn "xop_frcz<mode>2"
18277 [(set (match_operand:FMAMODE 0 "register_operand" "=x")
18278 (unspec:FMAMODE
18279 [(match_operand:FMAMODE 1 "nonimmediate_operand" "xm")]
18280 UNSPEC_FRCZ))]
18281 "TARGET_XOP"
18282 "vfrcz<ssemodesuffix>\t{%1, %0|%0, %1}"
18283 [(set_attr "type" "ssecvt1")
18284 (set_attr "mode" "<MODE>")])
18285
18286 (define_expand "xop_vmfrcz<mode>2"
18287 [(set (match_operand:VF_128 0 "register_operand")
18288 (vec_merge:VF_128
18289 (unspec:VF_128
18290 [(match_operand:VF_128 1 "nonimmediate_operand")]
18291 UNSPEC_FRCZ)
18292 (match_dup 2)
18293 (const_int 1)))]
18294 "TARGET_XOP"
18295 "operands[2] = CONST0_RTX (<MODE>mode);")
18296
18297 (define_insn "*xop_vmfrcz<mode>2"
18298 [(set (match_operand:VF_128 0 "register_operand" "=x")
18299 (vec_merge:VF_128
18300 (unspec:VF_128
18301 [(match_operand:VF_128 1 "nonimmediate_operand" "xm")]
18302 UNSPEC_FRCZ)
18303 (match_operand:VF_128 2 "const0_operand")
18304 (const_int 1)))]
18305 "TARGET_XOP"
18306 "vfrcz<ssescalarmodesuffix>\t{%1, %0|%0, %<iptr>1}"
18307 [(set_attr "type" "ssecvt1")
18308 (set_attr "mode" "<MODE>")])
18309
18310 (define_insn "xop_maskcmp<mode>3"
18311 [(set (match_operand:VI_128 0 "register_operand" "=x")
18312 (match_operator:VI_128 1 "ix86_comparison_int_operator"
18313 [(match_operand:VI_128 2 "register_operand" "x")
18314 (match_operand:VI_128 3 "nonimmediate_operand" "xm")]))]
18315 "TARGET_XOP"
18316 "vpcom%Y1<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}"
18317 [(set_attr "type" "sse4arg")
18318 (set_attr "prefix_data16" "0")
18319 (set_attr "prefix_rep" "0")
18320 (set_attr "prefix_extra" "2")
18321 (set_attr "length_immediate" "1")
18322 (set_attr "mode" "TI")])
18323
18324 (define_insn "xop_maskcmp_uns<mode>3"
18325 [(set (match_operand:VI_128 0 "register_operand" "=x")
18326 (match_operator:VI_128 1 "ix86_comparison_uns_operator"
18327 [(match_operand:VI_128 2 "register_operand" "x")
18328 (match_operand:VI_128 3 "nonimmediate_operand" "xm")]))]
18329 "TARGET_XOP"
18330 "vpcom%Y1u<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}"
18331 [(set_attr "type" "ssecmp")
18332 (set_attr "prefix_data16" "0")
18333 (set_attr "prefix_rep" "0")
18334 (set_attr "prefix_extra" "2")
18335 (set_attr "length_immediate" "1")
18336 (set_attr "mode" "TI")])
18337
18338 ;; Version of pcom*u* that is called from the intrinsics that allows pcomequ*
18339 ;; and pcomneu* not to be converted to the signed ones in case somebody needs
18340 ;; the exact instruction generated for the intrinsic.
18341 (define_insn "xop_maskcmp_uns2<mode>3"
18342 [(set (match_operand:VI_128 0 "register_operand" "=x")
18343 (unspec:VI_128
18344 [(match_operator:VI_128 1 "ix86_comparison_uns_operator"
18345 [(match_operand:VI_128 2 "register_operand" "x")
18346 (match_operand:VI_128 3 "nonimmediate_operand" "xm")])]
18347 UNSPEC_XOP_UNSIGNED_CMP))]
18348 "TARGET_XOP"
18349 "vpcom%Y1u<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}"
18350 [(set_attr "type" "ssecmp")
18351 (set_attr "prefix_data16" "0")
18352 (set_attr "prefix_extra" "2")
18353 (set_attr "length_immediate" "1")
18354 (set_attr "mode" "TI")])
18355
18356 ;; Pcomtrue and pcomfalse support. These are useless instructions, but are
18357 ;; being added here to be complete.
18358 (define_insn "xop_pcom_tf<mode>3"
18359 [(set (match_operand:VI_128 0 "register_operand" "=x")
18360 (unspec:VI_128
18361 [(match_operand:VI_128 1 "register_operand" "x")
18362 (match_operand:VI_128 2 "nonimmediate_operand" "xm")
18363 (match_operand:SI 3 "const_int_operand" "n")]
18364 UNSPEC_XOP_TRUEFALSE))]
18365 "TARGET_XOP"
18366 {
18367 return ((INTVAL (operands[3]) != 0)
18368 ? "vpcomtrue<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
18369 : "vpcomfalse<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}");
18370 }
18371 [(set_attr "type" "ssecmp")
18372 (set_attr "prefix_data16" "0")
18373 (set_attr "prefix_extra" "2")
18374 (set_attr "length_immediate" "1")
18375 (set_attr "mode" "TI")])
18376
18377 (define_insn "xop_vpermil2<mode>3"
18378 [(set (match_operand:VF_128_256 0 "register_operand" "=x,x")
18379 (unspec:VF_128_256
18380 [(match_operand:VF_128_256 1 "register_operand" "x,x")
18381 (match_operand:VF_128_256 2 "nonimmediate_operand" "x,m")
18382 (match_operand:<sseintvecmode> 3 "nonimmediate_operand" "xm,x")
18383 (match_operand:SI 4 "const_0_to_3_operand" "n,n")]
18384 UNSPEC_VPERMIL2))]
18385 "TARGET_XOP"
18386 "vpermil2<ssemodesuffix>\t{%4, %3, %2, %1, %0|%0, %1, %2, %3, %4}"
18387 [(set_attr "type" "sse4arg")
18388 (set_attr "length_immediate" "1")
18389 (set_attr "mode" "<MODE>")])
18390
18391 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
18392
18393 (define_insn "aesenc"
18394 [(set (match_operand:V2DI 0 "register_operand" "=x,x")
18395 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0,x")
18396 (match_operand:V2DI 2 "vector_operand" "xBm,xm")]
18397 UNSPEC_AESENC))]
18398 "TARGET_AES"
18399 "@
18400 aesenc\t{%2, %0|%0, %2}
18401 vaesenc\t{%2, %1, %0|%0, %1, %2}"
18402 [(set_attr "isa" "noavx,avx")
18403 (set_attr "type" "sselog1")
18404 (set_attr "prefix_extra" "1")
18405 (set_attr "prefix" "orig,vex")
18406 (set_attr "btver2_decode" "double,double")
18407 (set_attr "mode" "TI")])
18408
18409 (define_insn "aesenclast"
18410 [(set (match_operand:V2DI 0 "register_operand" "=x,x")
18411 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0,x")
18412 (match_operand:V2DI 2 "vector_operand" "xBm,xm")]
18413 UNSPEC_AESENCLAST))]
18414 "TARGET_AES"
18415 "@
18416 aesenclast\t{%2, %0|%0, %2}
18417 vaesenclast\t{%2, %1, %0|%0, %1, %2}"
18418 [(set_attr "isa" "noavx,avx")
18419 (set_attr "type" "sselog1")
18420 (set_attr "prefix_extra" "1")
18421 (set_attr "prefix" "orig,vex")
18422 (set_attr "btver2_decode" "double,double")
18423 (set_attr "mode" "TI")])
18424
18425 (define_insn "aesdec"
18426 [(set (match_operand:V2DI 0 "register_operand" "=x,x")
18427 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0,x")
18428 (match_operand:V2DI 2 "vector_operand" "xBm,xm")]
18429 UNSPEC_AESDEC))]
18430 "TARGET_AES"
18431 "@
18432 aesdec\t{%2, %0|%0, %2}
18433 vaesdec\t{%2, %1, %0|%0, %1, %2}"
18434 [(set_attr "isa" "noavx,avx")
18435 (set_attr "type" "sselog1")
18436 (set_attr "prefix_extra" "1")
18437 (set_attr "prefix" "orig,vex")
18438 (set_attr "btver2_decode" "double,double")
18439 (set_attr "mode" "TI")])
18440
18441 (define_insn "aesdeclast"
18442 [(set (match_operand:V2DI 0 "register_operand" "=x,x")
18443 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0,x")
18444 (match_operand:V2DI 2 "vector_operand" "xBm,xm")]
18445 UNSPEC_AESDECLAST))]
18446 "TARGET_AES"
18447 "@
18448 aesdeclast\t{%2, %0|%0, %2}
18449 vaesdeclast\t{%2, %1, %0|%0, %1, %2}"
18450 [(set_attr "isa" "noavx,avx")
18451 (set_attr "type" "sselog1")
18452 (set_attr "prefix_extra" "1")
18453 (set_attr "prefix" "orig,vex")
18454 (set_attr "btver2_decode" "double,double")
18455 (set_attr "mode" "TI")])
18456
18457 (define_insn "aesimc"
18458 [(set (match_operand:V2DI 0 "register_operand" "=x")
18459 (unspec:V2DI [(match_operand:V2DI 1 "vector_operand" "xBm")]
18460 UNSPEC_AESIMC))]
18461 "TARGET_AES"
18462 "%vaesimc\t{%1, %0|%0, %1}"
18463 [(set_attr "type" "sselog1")
18464 (set_attr "prefix_extra" "1")
18465 (set_attr "prefix" "maybe_vex")
18466 (set_attr "mode" "TI")])
18467
18468 (define_insn "aeskeygenassist"
18469 [(set (match_operand:V2DI 0 "register_operand" "=x")
18470 (unspec:V2DI [(match_operand:V2DI 1 "vector_operand" "xBm")
18471 (match_operand:SI 2 "const_0_to_255_operand" "n")]
18472 UNSPEC_AESKEYGENASSIST))]
18473 "TARGET_AES"
18474 "%vaeskeygenassist\t{%2, %1, %0|%0, %1, %2}"
18475 [(set_attr "type" "sselog1")
18476 (set_attr "prefix_extra" "1")
18477 (set_attr "length_immediate" "1")
18478 (set_attr "prefix" "maybe_vex")
18479 (set_attr "mode" "TI")])
18480
18481 (define_insn "pclmulqdq"
18482 [(set (match_operand:V2DI 0 "register_operand" "=x,x")
18483 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0,x")
18484 (match_operand:V2DI 2 "vector_operand" "xBm,xm")
18485 (match_operand:SI 3 "const_0_to_255_operand" "n,n")]
18486 UNSPEC_PCLMUL))]
18487 "TARGET_PCLMUL"
18488 "@
18489 pclmulqdq\t{%3, %2, %0|%0, %2, %3}
18490 vpclmulqdq\t{%3, %2, %1, %0|%0, %1, %2, %3}"
18491 [(set_attr "isa" "noavx,avx")
18492 (set_attr "type" "sselog1")
18493 (set_attr "prefix_extra" "1")
18494 (set_attr "length_immediate" "1")
18495 (set_attr "prefix" "orig,vex")
18496 (set_attr "mode" "TI")])
18497
18498 (define_expand "avx_vzeroall"
18499 [(match_par_dup 0 [(const_int 0)])]
18500 "TARGET_AVX"
18501 {
18502 int nregs = TARGET_64BIT ? 16 : 8;
18503 int regno;
18504
18505 operands[0] = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (nregs + 1));
18506
18507 XVECEXP (operands[0], 0, 0)
18508 = gen_rtx_UNSPEC_VOLATILE (VOIDmode, gen_rtvec (1, const0_rtx),
18509 UNSPECV_VZEROALL);
18510
18511 for (regno = 0; regno < nregs; regno++)
18512 XVECEXP (operands[0], 0, regno + 1)
18513 = gen_rtx_SET (gen_rtx_REG (V8SImode, GET_SSE_REGNO (regno)),
18514 CONST0_RTX (V8SImode));
18515 })
18516
18517 (define_insn "*avx_vzeroall"
18518 [(match_parallel 0 "vzeroall_operation"
18519 [(unspec_volatile [(const_int 0)] UNSPECV_VZEROALL)])]
18520 "TARGET_AVX"
18521 "vzeroall"
18522 [(set_attr "type" "sse")
18523 (set_attr "modrm" "0")
18524 (set_attr "memory" "none")
18525 (set_attr "prefix" "vex")
18526 (set_attr "btver2_decode" "vector")
18527 (set_attr "mode" "OI")])
18528
18529 ;; Clear the upper 128bits of AVX registers, equivalent to a NOP
18530 ;; if the upper 128bits are unused.
18531 (define_insn "avx_vzeroupper"
18532 [(unspec_volatile [(const_int 0)] UNSPECV_VZEROUPPER)]
18533 "TARGET_AVX"
18534 "vzeroupper"
18535 [(set_attr "type" "sse")
18536 (set_attr "modrm" "0")
18537 (set_attr "memory" "none")
18538 (set_attr "prefix" "vex")
18539 (set_attr "btver2_decode" "vector")
18540 (set_attr "mode" "OI")])
18541
18542 (define_mode_attr pbroadcast_evex_isa
18543 [(V64QI "avx512bw") (V32QI "avx512bw") (V16QI "avx512bw")
18544 (V32HI "avx512bw") (V16HI "avx512bw") (V8HI "avx512bw")
18545 (V16SI "avx512f") (V8SI "avx512f") (V4SI "avx512f")
18546 (V8DI "avx512f") (V4DI "avx512f") (V2DI "avx512f")])
18547
18548 (define_insn "avx2_pbroadcast<mode>"
18549 [(set (match_operand:VI 0 "register_operand" "=x,v")
18550 (vec_duplicate:VI
18551 (vec_select:<ssescalarmode>
18552 (match_operand:<ssexmmmode> 1 "nonimmediate_operand" "xm,vm")
18553 (parallel [(const_int 0)]))))]
18554 "TARGET_AVX2"
18555 "vpbroadcast<ssemodesuffix>\t{%1, %0|%0, %<iptr>1}"
18556 [(set_attr "isa" "*,<pbroadcast_evex_isa>")
18557 (set_attr "type" "ssemov")
18558 (set_attr "prefix_extra" "1")
18559 (set_attr "prefix" "vex,evex")
18560 (set_attr "mode" "<sseinsnmode>")])
18561
18562 (define_insn "avx2_pbroadcast<mode>_1"
18563 [(set (match_operand:VI_256 0 "register_operand" "=x,x,v,v")
18564 (vec_duplicate:VI_256
18565 (vec_select:<ssescalarmode>
18566 (match_operand:VI_256 1 "nonimmediate_operand" "m,x,m,v")
18567 (parallel [(const_int 0)]))))]
18568 "TARGET_AVX2"
18569 "@
18570 vpbroadcast<ssemodesuffix>\t{%1, %0|%0, %<iptr>1}
18571 vpbroadcast<ssemodesuffix>\t{%x1, %0|%0, %x1}
18572 vpbroadcast<ssemodesuffix>\t{%1, %0|%0, %<iptr>1}
18573 vpbroadcast<ssemodesuffix>\t{%x1, %0|%0, %x1}"
18574 [(set_attr "isa" "*,*,<pbroadcast_evex_isa>,<pbroadcast_evex_isa>")
18575 (set_attr "type" "ssemov")
18576 (set_attr "prefix_extra" "1")
18577 (set_attr "prefix" "vex")
18578 (set_attr "mode" "<sseinsnmode>")])
18579
18580 (define_insn "<avx2_avx512>_permvar<mode><mask_name>"
18581 [(set (match_operand:VI48F_256_512 0 "register_operand" "=v")
18582 (unspec:VI48F_256_512
18583 [(match_operand:VI48F_256_512 1 "nonimmediate_operand" "vm")
18584 (match_operand:<sseintvecmode> 2 "register_operand" "v")]
18585 UNSPEC_VPERMVAR))]
18586 "TARGET_AVX2 && <mask_mode512bit_condition>"
18587 "vperm<ssemodesuffix>\t{%1, %2, %0<mask_operand3>|%0<mask_operand3>, %2, %1}"
18588 [(set_attr "type" "sselog")
18589 (set_attr "prefix" "<mask_prefix2>")
18590 (set_attr "mode" "<sseinsnmode>")])
18591
18592 (define_insn "<avx512>_permvar<mode><mask_name>"
18593 [(set (match_operand:VI1_AVX512VL 0 "register_operand" "=v")
18594 (unspec:VI1_AVX512VL
18595 [(match_operand:VI1_AVX512VL 1 "nonimmediate_operand" "vm")
18596 (match_operand:<sseintvecmode> 2 "register_operand" "v")]
18597 UNSPEC_VPERMVAR))]
18598 "TARGET_AVX512VBMI && <mask_mode512bit_condition>"
18599 "vperm<ssemodesuffix>\t{%1, %2, %0<mask_operand3>|%0<mask_operand3>, %2, %1}"
18600 [(set_attr "type" "sselog")
18601 (set_attr "prefix" "<mask_prefix2>")
18602 (set_attr "mode" "<sseinsnmode>")])
18603
18604 (define_insn "<avx512>_permvar<mode><mask_name>"
18605 [(set (match_operand:VI2_AVX512VL 0 "register_operand" "=v")
18606 (unspec:VI2_AVX512VL
18607 [(match_operand:VI2_AVX512VL 1 "nonimmediate_operand" "vm")
18608 (match_operand:<sseintvecmode> 2 "register_operand" "v")]
18609 UNSPEC_VPERMVAR))]
18610 "TARGET_AVX512BW && <mask_mode512bit_condition>"
18611 "vperm<ssemodesuffix>\t{%1, %2, %0<mask_operand3>|%0<mask_operand3>, %2, %1}"
18612 [(set_attr "type" "sselog")
18613 (set_attr "prefix" "<mask_prefix2>")
18614 (set_attr "mode" "<sseinsnmode>")])
18615
18616 (define_expand "avx2_perm<mode>"
18617 [(match_operand:VI8F_256 0 "register_operand")
18618 (match_operand:VI8F_256 1 "nonimmediate_operand")
18619 (match_operand:SI 2 "const_0_to_255_operand")]
18620 "TARGET_AVX2"
18621 {
18622 int mask = INTVAL (operands[2]);
18623 emit_insn (gen_avx2_perm<mode>_1 (operands[0], operands[1],
18624 GEN_INT ((mask >> 0) & 3),
18625 GEN_INT ((mask >> 2) & 3),
18626 GEN_INT ((mask >> 4) & 3),
18627 GEN_INT ((mask >> 6) & 3)));
18628 DONE;
18629 })
18630
18631 (define_expand "avx512vl_perm<mode>_mask"
18632 [(match_operand:VI8F_256 0 "register_operand")
18633 (match_operand:VI8F_256 1 "nonimmediate_operand")
18634 (match_operand:SI 2 "const_0_to_255_operand")
18635 (match_operand:VI8F_256 3 "nonimm_or_0_operand")
18636 (match_operand:<avx512fmaskmode> 4 "register_operand")]
18637 "TARGET_AVX512VL"
18638 {
18639 int mask = INTVAL (operands[2]);
18640 emit_insn (gen_<avx2_avx512>_perm<mode>_1_mask (operands[0], operands[1],
18641 GEN_INT ((mask >> 0) & 3),
18642 GEN_INT ((mask >> 2) & 3),
18643 GEN_INT ((mask >> 4) & 3),
18644 GEN_INT ((mask >> 6) & 3),
18645 operands[3], operands[4]));
18646 DONE;
18647 })
18648
18649 (define_insn "avx2_perm<mode>_1<mask_name>"
18650 [(set (match_operand:VI8F_256 0 "register_operand" "=v")
18651 (vec_select:VI8F_256
18652 (match_operand:VI8F_256 1 "nonimmediate_operand" "vm")
18653 (parallel [(match_operand 2 "const_0_to_3_operand")
18654 (match_operand 3 "const_0_to_3_operand")
18655 (match_operand 4 "const_0_to_3_operand")
18656 (match_operand 5 "const_0_to_3_operand")])))]
18657 "TARGET_AVX2 && <mask_mode512bit_condition>"
18658 {
18659 int mask = 0;
18660 mask |= INTVAL (operands[2]) << 0;
18661 mask |= INTVAL (operands[3]) << 2;
18662 mask |= INTVAL (operands[4]) << 4;
18663 mask |= INTVAL (operands[5]) << 6;
18664 operands[2] = GEN_INT (mask);
18665 return "vperm<ssemodesuffix>\t{%2, %1, %0<mask_operand6>|%0<mask_operand6>, %1, %2}";
18666 }
18667 [(set_attr "type" "sselog")
18668 (set_attr "prefix" "<mask_prefix2>")
18669 (set_attr "mode" "<sseinsnmode>")])
18670
18671 (define_expand "avx512f_perm<mode>"
18672 [(match_operand:V8FI 0 "register_operand")
18673 (match_operand:V8FI 1 "nonimmediate_operand")
18674 (match_operand:SI 2 "const_0_to_255_operand")]
18675 "TARGET_AVX512F"
18676 {
18677 int mask = INTVAL (operands[2]);
18678 emit_insn (gen_avx512f_perm<mode>_1 (operands[0], operands[1],
18679 GEN_INT ((mask >> 0) & 3),
18680 GEN_INT ((mask >> 2) & 3),
18681 GEN_INT ((mask >> 4) & 3),
18682 GEN_INT ((mask >> 6) & 3),
18683 GEN_INT (((mask >> 0) & 3) + 4),
18684 GEN_INT (((mask >> 2) & 3) + 4),
18685 GEN_INT (((mask >> 4) & 3) + 4),
18686 GEN_INT (((mask >> 6) & 3) + 4)));
18687 DONE;
18688 })
18689
18690 (define_expand "avx512f_perm<mode>_mask"
18691 [(match_operand:V8FI 0 "register_operand")
18692 (match_operand:V8FI 1 "nonimmediate_operand")
18693 (match_operand:SI 2 "const_0_to_255_operand")
18694 (match_operand:V8FI 3 "nonimm_or_0_operand")
18695 (match_operand:<avx512fmaskmode> 4 "register_operand")]
18696 "TARGET_AVX512F"
18697 {
18698 int mask = INTVAL (operands[2]);
18699 emit_insn (gen_avx512f_perm<mode>_1_mask (operands[0], operands[1],
18700 GEN_INT ((mask >> 0) & 3),
18701 GEN_INT ((mask >> 2) & 3),
18702 GEN_INT ((mask >> 4) & 3),
18703 GEN_INT ((mask >> 6) & 3),
18704 GEN_INT (((mask >> 0) & 3) + 4),
18705 GEN_INT (((mask >> 2) & 3) + 4),
18706 GEN_INT (((mask >> 4) & 3) + 4),
18707 GEN_INT (((mask >> 6) & 3) + 4),
18708 operands[3], operands[4]));
18709 DONE;
18710 })
18711
18712 (define_insn "avx512f_perm<mode>_1<mask_name>"
18713 [(set (match_operand:V8FI 0 "register_operand" "=v")
18714 (vec_select:V8FI
18715 (match_operand:V8FI 1 "nonimmediate_operand" "vm")
18716 (parallel [(match_operand 2 "const_0_to_3_operand")
18717 (match_operand 3 "const_0_to_3_operand")
18718 (match_operand 4 "const_0_to_3_operand")
18719 (match_operand 5 "const_0_to_3_operand")
18720 (match_operand 6 "const_4_to_7_operand")
18721 (match_operand 7 "const_4_to_7_operand")
18722 (match_operand 8 "const_4_to_7_operand")
18723 (match_operand 9 "const_4_to_7_operand")])))]
18724 "TARGET_AVX512F && <mask_mode512bit_condition>
18725 && (INTVAL (operands[2]) == (INTVAL (operands[6]) - 4)
18726 && INTVAL (operands[3]) == (INTVAL (operands[7]) - 4)
18727 && INTVAL (operands[4]) == (INTVAL (operands[8]) - 4)
18728 && INTVAL (operands[5]) == (INTVAL (operands[9]) - 4))"
18729 {
18730 int mask = 0;
18731 mask |= INTVAL (operands[2]) << 0;
18732 mask |= INTVAL (operands[3]) << 2;
18733 mask |= INTVAL (operands[4]) << 4;
18734 mask |= INTVAL (operands[5]) << 6;
18735 operands[2] = GEN_INT (mask);
18736 return "vperm<ssemodesuffix>\t{%2, %1, %0<mask_operand10>|%0<mask_operand10>, %1, %2}";
18737 }
18738 [(set_attr "type" "sselog")
18739 (set_attr "prefix" "<mask_prefix2>")
18740 (set_attr "mode" "<sseinsnmode>")])
18741
18742 (define_insn "avx2_permv2ti"
18743 [(set (match_operand:V4DI 0 "register_operand" "=x")
18744 (unspec:V4DI
18745 [(match_operand:V4DI 1 "register_operand" "x")
18746 (match_operand:V4DI 2 "nonimmediate_operand" "xm")
18747 (match_operand:SI 3 "const_0_to_255_operand" "n")]
18748 UNSPEC_VPERMTI))]
18749 "TARGET_AVX2"
18750 "vperm2i128\t{%3, %2, %1, %0|%0, %1, %2, %3}"
18751 [(set_attr "type" "sselog")
18752 (set_attr "prefix" "vex")
18753 (set_attr "mode" "OI")])
18754
18755 (define_insn "avx2_vec_dupv4df"
18756 [(set (match_operand:V4DF 0 "register_operand" "=v")
18757 (vec_duplicate:V4DF
18758 (vec_select:DF
18759 (match_operand:V2DF 1 "register_operand" "v")
18760 (parallel [(const_int 0)]))))]
18761 "TARGET_AVX2"
18762 "vbroadcastsd\t{%1, %0|%0, %1}"
18763 [(set_attr "type" "sselog1")
18764 (set_attr "prefix" "maybe_evex")
18765 (set_attr "mode" "V4DF")])
18766
18767 (define_insn "<avx512>_vec_dup<mode>_1"
18768 [(set (match_operand:VI_AVX512BW 0 "register_operand" "=v,v")
18769 (vec_duplicate:VI_AVX512BW
18770 (vec_select:<ssescalarmode>
18771 (match_operand:VI_AVX512BW 1 "nonimmediate_operand" "v,m")
18772 (parallel [(const_int 0)]))))]
18773 "TARGET_AVX512F"
18774 "@
18775 vpbroadcast<ssemodesuffix>\t{%x1, %0|%0, %x1}
18776 vpbroadcast<ssemodesuffix>\t{%x1, %0|%0, %<iptr>1}"
18777 [(set_attr "type" "ssemov")
18778 (set_attr "prefix" "evex")
18779 (set_attr "mode" "<sseinsnmode>")])
18780
18781 (define_insn "<avx512>_vec_dup<mode><mask_name>"
18782 [(set (match_operand:V48_AVX512VL 0 "register_operand" "=v")
18783 (vec_duplicate:V48_AVX512VL
18784 (vec_select:<ssescalarmode>
18785 (match_operand:<ssexmmmode> 1 "nonimmediate_operand" "vm")
18786 (parallel [(const_int 0)]))))]
18787 "TARGET_AVX512F"
18788 {
18789 /* There is no DF broadcast (in AVX-512*) to 128b register.
18790 Mimic it with integer variant. */
18791 if (<MODE>mode == V2DFmode)
18792 return "vpbroadcastq\t{%1, %0<mask_operand2>|%0<mask_operand2>, %q1}";
18793
18794 return "v<sseintprefix>broadcast<bcstscalarsuff>\t{%1, %0<mask_operand2>|%0<mask_operand2>, %<iptr>1}";
18795 }
18796 [(set_attr "type" "ssemov")
18797 (set_attr "prefix" "evex")
18798 (set_attr "mode" "<sseinsnmode>")])
18799
18800 (define_insn "<avx512>_vec_dup<mode><mask_name>"
18801 [(set (match_operand:VI12_AVX512VL 0 "register_operand" "=v")
18802 (vec_duplicate:VI12_AVX512VL
18803 (vec_select:<ssescalarmode>
18804 (match_operand:<ssexmmmode> 1 "nonimmediate_operand" "vm")
18805 (parallel [(const_int 0)]))))]
18806 "TARGET_AVX512BW"
18807 "vpbroadcast<bcstscalarsuff>\t{%1, %0<mask_operand2>|%0<mask_operand2>, %<iptr>1}"
18808 [(set_attr "type" "ssemov")
18809 (set_attr "prefix" "evex")
18810 (set_attr "mode" "<sseinsnmode>")])
18811
18812 (define_insn "<mask_codefor>avx512f_broadcast<mode><mask_name>"
18813 [(set (match_operand:V16FI 0 "register_operand" "=v,v")
18814 (vec_duplicate:V16FI
18815 (match_operand:<ssexmmmode> 1 "nonimmediate_operand" "v,m")))]
18816 "TARGET_AVX512F"
18817 "@
18818 vshuf<shuffletype>32x4\t{$0x0, %g1, %g1, %0<mask_operand2>|%0<mask_operand2>, %g1, %g1, 0x0}
18819 vbroadcast<shuffletype>32x4\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
18820 [(set_attr "type" "ssemov")
18821 (set_attr "prefix" "evex")
18822 (set_attr "mode" "<sseinsnmode>")])
18823
18824 (define_insn "<mask_codefor>avx512f_broadcast<mode><mask_name>"
18825 [(set (match_operand:V8FI 0 "register_operand" "=v,v")
18826 (vec_duplicate:V8FI
18827 (match_operand:<ssehalfvecmode> 1 "nonimmediate_operand" "v,m")))]
18828 "TARGET_AVX512F"
18829 "@
18830 vshuf<shuffletype>64x2\t{$0x44, %g1, %g1, %0<mask_operand2>|%0<mask_operand2>, %g1, %g1, 0x44}
18831 vbroadcast<shuffletype>64x4\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
18832 [(set_attr "type" "ssemov")
18833 (set_attr "prefix" "evex")
18834 (set_attr "mode" "<sseinsnmode>")])
18835
18836 (define_insn "<mask_codefor><avx512>_vec_dup_gpr<mode><mask_name>"
18837 [(set (match_operand:VI12_AVX512VL 0 "register_operand" "=v,v")
18838 (vec_duplicate:VI12_AVX512VL
18839 (match_operand:<ssescalarmode> 1 "nonimmediate_operand" "vm,r")))]
18840 "TARGET_AVX512BW"
18841 "@
18842 vpbroadcast<bcstscalarsuff>\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}
18843 vpbroadcast<bcstscalarsuff>\t{%k1, %0<mask_operand2>|%0<mask_operand2>, %k1}"
18844 [(set_attr "type" "ssemov")
18845 (set_attr "prefix" "evex")
18846 (set_attr "mode" "<sseinsnmode>")])
18847
18848 (define_insn "<mask_codefor><avx512>_vec_dup_gpr<mode><mask_name>"
18849 [(set (match_operand:V48_AVX512VL 0 "register_operand" "=v,v")
18850 (vec_duplicate:V48_AVX512VL
18851 (match_operand:<ssescalarmode> 1 "nonimmediate_operand" "vm,r")))]
18852 "TARGET_AVX512F"
18853 "v<sseintprefix>broadcast<bcstscalarsuff>\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
18854 [(set_attr "type" "ssemov")
18855 (set_attr "prefix" "evex")
18856 (set_attr "mode" "<sseinsnmode>")
18857 (set (attr "enabled")
18858 (if_then_else (eq_attr "alternative" "1")
18859 (symbol_ref "GET_MODE_CLASS (<ssescalarmode>mode) == MODE_INT
18860 && (<ssescalarmode>mode != DImode || TARGET_64BIT)")
18861 (const_int 1)))])
18862
18863 (define_insn "vec_dupv4sf"
18864 [(set (match_operand:V4SF 0 "register_operand" "=v,v,x")
18865 (vec_duplicate:V4SF
18866 (match_operand:SF 1 "nonimmediate_operand" "Yv,m,0")))]
18867 "TARGET_SSE"
18868 "@
18869 vshufps\t{$0, %1, %1, %0|%0, %1, %1, 0}
18870 vbroadcastss\t{%1, %0|%0, %1}
18871 shufps\t{$0, %0, %0|%0, %0, 0}"
18872 [(set_attr "isa" "avx,avx,noavx")
18873 (set_attr "type" "sseshuf1,ssemov,sseshuf1")
18874 (set_attr "length_immediate" "1,0,1")
18875 (set_attr "prefix_extra" "0,1,*")
18876 (set_attr "prefix" "maybe_evex,maybe_evex,orig")
18877 (set_attr "mode" "V4SF")])
18878
18879 (define_insn "*vec_dupv4si"
18880 [(set (match_operand:V4SI 0 "register_operand" "=v,v,x")
18881 (vec_duplicate:V4SI
18882 (match_operand:SI 1 "nonimmediate_operand" "Yv,m,0")))]
18883 "TARGET_SSE"
18884 "@
18885 %vpshufd\t{$0, %1, %0|%0, %1, 0}
18886 vbroadcastss\t{%1, %0|%0, %1}
18887 shufps\t{$0, %0, %0|%0, %0, 0}"
18888 [(set_attr "isa" "sse2,avx,noavx")
18889 (set_attr "type" "sselog1,ssemov,sselog1")
18890 (set_attr "length_immediate" "1,0,1")
18891 (set_attr "prefix_extra" "0,1,*")
18892 (set_attr "prefix" "maybe_vex,maybe_evex,orig")
18893 (set_attr "mode" "TI,V4SF,V4SF")])
18894
18895 (define_insn "*vec_dupv2di"
18896 [(set (match_operand:V2DI 0 "register_operand" "=x,v,v,x")
18897 (vec_duplicate:V2DI
18898 (match_operand:DI 1 "nonimmediate_operand" " 0,Yv,vm,0")))]
18899 "TARGET_SSE"
18900 "@
18901 punpcklqdq\t%0, %0
18902 vpunpcklqdq\t{%d1, %0|%0, %d1}
18903 %vmovddup\t{%1, %0|%0, %1}
18904 movlhps\t%0, %0"
18905 [(set_attr "isa" "sse2_noavx,avx,sse3,noavx")
18906 (set_attr "type" "sselog1,sselog1,sselog1,ssemov")
18907 (set_attr "prefix" "orig,maybe_evex,maybe_vex,orig")
18908 (set_attr "mode" "TI,TI,DF,V4SF")])
18909
18910 (define_insn "avx2_vbroadcasti128_<mode>"
18911 [(set (match_operand:VI_256 0 "register_operand" "=x,v,v")
18912 (vec_concat:VI_256
18913 (match_operand:<ssehalfvecmode> 1 "memory_operand" "m,m,m")
18914 (match_dup 1)))]
18915 "TARGET_AVX2"
18916 "@
18917 vbroadcasti128\t{%1, %0|%0, %1}
18918 vbroadcast<i128vldq>\t{%1, %0|%0, %1}
18919 vbroadcast<shuffletype>32x4\t{%1, %0|%0, %1}"
18920 [(set_attr "isa" "*,avx512dq,avx512vl")
18921 (set_attr "type" "ssemov")
18922 (set_attr "prefix_extra" "1")
18923 (set_attr "prefix" "vex,evex,evex")
18924 (set_attr "mode" "OI")])
18925
18926 ;; Modes handled by AVX vec_dup patterns.
18927 (define_mode_iterator AVX_VEC_DUP_MODE
18928 [V8SI V8SF V4DI V4DF])
18929 (define_mode_attr vecdupssescalarmodesuffix
18930 [(V8SF "ss") (V4DF "sd") (V8SI "ss") (V4DI "sd")])
18931 ;; Modes handled by AVX2 vec_dup patterns.
18932 (define_mode_iterator AVX2_VEC_DUP_MODE
18933 [V32QI V16QI V16HI V8HI V8SI V4SI])
18934
18935 (define_insn "*vec_dup<mode>"
18936 [(set (match_operand:AVX2_VEC_DUP_MODE 0 "register_operand" "=x,x,v")
18937 (vec_duplicate:AVX2_VEC_DUP_MODE
18938 (match_operand:<ssescalarmode> 1 "nonimmediate_operand" "m,x,$r")))]
18939 "TARGET_AVX2"
18940 "@
18941 v<sseintprefix>broadcast<bcstscalarsuff>\t{%1, %0|%0, %1}
18942 v<sseintprefix>broadcast<bcstscalarsuff>\t{%x1, %0|%0, %x1}
18943 #"
18944 [(set_attr "isa" "*,*,noavx512vl")
18945 (set_attr "type" "ssemov")
18946 (set_attr "prefix_extra" "1")
18947 (set_attr "prefix" "maybe_evex")
18948 (set_attr "mode" "<sseinsnmode>")
18949 (set (attr "preferred_for_speed")
18950 (cond [(eq_attr "alternative" "2")
18951 (symbol_ref "TARGET_INTER_UNIT_MOVES_TO_VEC")
18952 ]
18953 (symbol_ref "true")))])
18954
18955 (define_insn "vec_dup<mode>"
18956 [(set (match_operand:AVX_VEC_DUP_MODE 0 "register_operand" "=x,x,x,v,x")
18957 (vec_duplicate:AVX_VEC_DUP_MODE
18958 (match_operand:<ssescalarmode> 1 "nonimmediate_operand" "m,m,x,v,?x")))]
18959 "TARGET_AVX"
18960 "@
18961 v<sseintprefix>broadcast<bcstscalarsuff>\t{%1, %0|%0, %1}
18962 vbroadcast<vecdupssescalarmodesuffix>\t{%1, %0|%0, %1}
18963 v<sseintprefix>broadcast<bcstscalarsuff>\t{%x1, %0|%0, %x1}
18964 v<sseintprefix>broadcast<bcstscalarsuff>\t{%x1, %g0|%g0, %x1}
18965 #"
18966 [(set_attr "type" "ssemov")
18967 (set_attr "prefix_extra" "1")
18968 (set_attr "prefix" "maybe_evex")
18969 (set_attr "isa" "avx2,noavx2,avx2,avx512f,noavx2")
18970 (set_attr "mode" "<sseinsnmode>,V8SF,<sseinsnmode>,<sseinsnmode>,V8SF")])
18971
18972 (define_split
18973 [(set (match_operand:AVX2_VEC_DUP_MODE 0 "register_operand")
18974 (vec_duplicate:AVX2_VEC_DUP_MODE
18975 (match_operand:<ssescalarmode> 1 "register_operand")))]
18976 "TARGET_AVX2
18977 /* Disable this splitter if avx512vl_vec_dup_gprv*[qhs]i insn is
18978 available, because then we can broadcast from GPRs directly.
18979 For V*[QH]I modes it requires both -mavx512vl and -mavx512bw,
18980 for V*SI mode it requires just -mavx512vl. */
18981 && !(TARGET_AVX512VL
18982 && (TARGET_AVX512BW || <ssescalarmode>mode == SImode))
18983 && reload_completed && GENERAL_REG_P (operands[1])"
18984 [(const_int 0)]
18985 {
18986 emit_insn (gen_vec_setv4si_0 (gen_lowpart (V4SImode, operands[0]),
18987 CONST0_RTX (V4SImode),
18988 gen_lowpart (SImode, operands[1])));
18989 emit_insn (gen_avx2_pbroadcast<mode> (operands[0],
18990 gen_lowpart (<ssexmmmode>mode,
18991 operands[0])));
18992 DONE;
18993 })
18994
18995 (define_split
18996 [(set (match_operand:AVX_VEC_DUP_MODE 0 "register_operand")
18997 (vec_duplicate:AVX_VEC_DUP_MODE
18998 (match_operand:<ssescalarmode> 1 "register_operand")))]
18999 "TARGET_AVX && !TARGET_AVX2 && reload_completed"
19000 [(set (match_dup 2)
19001 (vec_duplicate:<ssehalfvecmode> (match_dup 1)))
19002 (set (match_dup 0)
19003 (vec_concat:AVX_VEC_DUP_MODE (match_dup 2) (match_dup 2)))]
19004 "operands[2] = gen_lowpart (<ssehalfvecmode>mode, operands[0]);")
19005
19006 (define_insn "avx_vbroadcastf128_<mode>"
19007 [(set (match_operand:V_256 0 "register_operand" "=x,x,x,v,v,v,v")
19008 (vec_concat:V_256
19009 (match_operand:<ssehalfvecmode> 1 "nonimmediate_operand" "m,0,?x,m,0,m,0")
19010 (match_dup 1)))]
19011 "TARGET_AVX"
19012 "@
19013 vbroadcast<i128>\t{%1, %0|%0, %1}
19014 vinsert<i128>\t{$1, %1, %0, %0|%0, %0, %1, 1}
19015 vperm2<i128>\t{$0, %t1, %t1, %0|%0, %t1, %t1, 0}
19016 vbroadcast<i128vldq>\t{%1, %0|%0, %1}
19017 vinsert<i128vldq>\t{$1, %1, %0, %0|%0, %0, %1, 1}
19018 vbroadcast<shuffletype>32x4\t{%1, %0|%0, %1}
19019 vinsert<shuffletype>32x4\t{$1, %1, %0, %0|%0, %0, %1, 1}"
19020 [(set_attr "isa" "*,*,*,avx512dq,avx512dq,avx512vl,avx512vl")
19021 (set_attr "type" "ssemov,sselog1,sselog1,ssemov,sselog1,ssemov,sselog1")
19022 (set_attr "prefix_extra" "1")
19023 (set_attr "length_immediate" "0,1,1,0,1,0,1")
19024 (set_attr "prefix" "vex,vex,vex,evex,evex,evex,evex")
19025 (set_attr "mode" "<sseinsnmode>")])
19026
19027 ;; For broadcast[i|f]32x2. Yes there is no v4sf version, only v4si.
19028 (define_mode_iterator VI4F_BRCST32x2
19029 [V16SI (V8SI "TARGET_AVX512VL") (V4SI "TARGET_AVX512VL")
19030 V16SF (V8SF "TARGET_AVX512VL")])
19031
19032 (define_mode_attr 64x2mode
19033 [(V8DF "V2DF") (V8DI "V2DI") (V4DI "V2DI") (V4DF "V2DF")])
19034
19035 (define_mode_attr 32x2mode
19036 [(V16SF "V2SF") (V16SI "V2SI") (V8SI "V2SI")
19037 (V8SF "V2SF") (V4SI "V2SI")])
19038
19039 (define_insn "<mask_codefor>avx512dq_broadcast<mode><mask_name>"
19040 [(set (match_operand:VI4F_BRCST32x2 0 "register_operand" "=v")
19041 (vec_duplicate:VI4F_BRCST32x2
19042 (vec_select:<32x2mode>
19043 (match_operand:<ssexmmmode> 1 "nonimmediate_operand" "vm")
19044 (parallel [(const_int 0) (const_int 1)]))))]
19045 "TARGET_AVX512DQ"
19046 "vbroadcast<shuffletype>32x2\t{%1, %0<mask_operand2>|%0<mask_operand2>, %q1}"
19047 [(set_attr "type" "ssemov")
19048 (set_attr "prefix_extra" "1")
19049 (set_attr "prefix" "evex")
19050 (set_attr "mode" "<sseinsnmode>")])
19051
19052 (define_insn "<mask_codefor>avx512vl_broadcast<mode><mask_name>_1"
19053 [(set (match_operand:VI4F_256 0 "register_operand" "=v,v")
19054 (vec_duplicate:VI4F_256
19055 (match_operand:<ssexmmmode> 1 "nonimmediate_operand" "v,m")))]
19056 "TARGET_AVX512VL"
19057 "@
19058 vshuf<shuffletype>32x4\t{$0x0, %t1, %t1, %0<mask_operand2>|%0<mask_operand2>, %t1, %t1, 0x0}
19059 vbroadcast<shuffletype>32x4\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
19060 [(set_attr "type" "ssemov")
19061 (set_attr "prefix_extra" "1")
19062 (set_attr "prefix" "evex")
19063 (set_attr "mode" "<sseinsnmode>")])
19064
19065 (define_insn "<mask_codefor>avx512dq_broadcast<mode><mask_name>_1"
19066 [(set (match_operand:V16FI 0 "register_operand" "=v,v")
19067 (vec_duplicate:V16FI
19068 (match_operand:<ssehalfvecmode> 1 "nonimmediate_operand" "v,m")))]
19069 "TARGET_AVX512DQ"
19070 "@
19071 vshuf<shuffletype>32x4\t{$0x44, %g1, %g1, %0<mask_operand2>|%0<mask_operand2>, %g1, %g1, 0x44}
19072 vbroadcast<shuffletype>32x8\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
19073 [(set_attr "type" "ssemov")
19074 (set_attr "prefix_extra" "1")
19075 (set_attr "prefix" "evex")
19076 (set_attr "mode" "<sseinsnmode>")])
19077
19078 ;; For broadcast[i|f]64x2
19079 (define_mode_iterator VI8F_BRCST64x2
19080 [V8DI V8DF (V4DI "TARGET_AVX512VL") (V4DF "TARGET_AVX512VL")])
19081
19082 (define_insn "<mask_codefor>avx512dq_broadcast<mode><mask_name>_1"
19083 [(set (match_operand:VI8F_BRCST64x2 0 "register_operand" "=v,v")
19084 (vec_duplicate:VI8F_BRCST64x2
19085 (match_operand:<64x2mode> 1 "nonimmediate_operand" "v,m")))]
19086 "TARGET_AVX512DQ"
19087 "@
19088 vshuf<shuffletype>64x2\t{$0x0, %<xtg_mode>1, %<xtg_mode>1, %0<mask_operand2>|%0<mask_operand2>, %<xtg_mode>1, %<xtg_mode>1, 0x0}
19089 vbroadcast<shuffletype>64x2\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
19090 [(set_attr "type" "ssemov")
19091 (set_attr "prefix_extra" "1")
19092 (set_attr "prefix" "evex")
19093 (set_attr "mode" "<sseinsnmode>")])
19094
19095 (define_insn "avx512cd_maskb_vec_dup<mode>"
19096 [(set (match_operand:VI8_AVX512VL 0 "register_operand" "=v")
19097 (vec_duplicate:VI8_AVX512VL
19098 (zero_extend:DI
19099 (match_operand:QI 1 "register_operand" "Yk"))))]
19100 "TARGET_AVX512CD"
19101 "vpbroadcastmb2q\t{%1, %0|%0, %1}"
19102 [(set_attr "type" "mskmov")
19103 (set_attr "prefix" "evex")
19104 (set_attr "mode" "XI")])
19105
19106 (define_insn "avx512cd_maskw_vec_dup<mode>"
19107 [(set (match_operand:VI4_AVX512VL 0 "register_operand" "=v")
19108 (vec_duplicate:VI4_AVX512VL
19109 (zero_extend:SI
19110 (match_operand:HI 1 "register_operand" "Yk"))))]
19111 "TARGET_AVX512CD"
19112 "vpbroadcastmw2d\t{%1, %0|%0, %1}"
19113 [(set_attr "type" "mskmov")
19114 (set_attr "prefix" "evex")
19115 (set_attr "mode" "XI")])
19116
19117 ;; Recognize broadcast as a vec_select as produced by builtin_vec_perm.
19118 ;; If it so happens that the input is in memory, use vbroadcast.
19119 ;; Otherwise use vpermilp (and in the case of 256-bit modes, vperm2f128).
19120 (define_insn "*avx_vperm_broadcast_v4sf"
19121 [(set (match_operand:V4SF 0 "register_operand" "=v,v,v")
19122 (vec_select:V4SF
19123 (match_operand:V4SF 1 "nonimmediate_operand" "m,o,v")
19124 (match_parallel 2 "avx_vbroadcast_operand"
19125 [(match_operand 3 "const_int_operand" "C,n,n")])))]
19126 "TARGET_AVX"
19127 {
19128 int elt = INTVAL (operands[3]);
19129 switch (which_alternative)
19130 {
19131 case 0:
19132 case 1:
19133 operands[1] = adjust_address_nv (operands[1], SFmode, elt * 4);
19134 return "vbroadcastss\t{%1, %0|%0, %k1}";
19135 case 2:
19136 operands[2] = GEN_INT (elt * 0x55);
19137 return "vpermilps\t{%2, %1, %0|%0, %1, %2}";
19138 default:
19139 gcc_unreachable ();
19140 }
19141 }
19142 [(set_attr "type" "ssemov,ssemov,sselog1")
19143 (set_attr "prefix_extra" "1")
19144 (set_attr "length_immediate" "0,0,1")
19145 (set_attr "prefix" "maybe_evex")
19146 (set_attr "mode" "SF,SF,V4SF")])
19147
19148 (define_insn_and_split "*avx_vperm_broadcast_<mode>"
19149 [(set (match_operand:VF_256 0 "register_operand" "=v,v,v")
19150 (vec_select:VF_256
19151 (match_operand:VF_256 1 "nonimmediate_operand" "m,o,?v")
19152 (match_parallel 2 "avx_vbroadcast_operand"
19153 [(match_operand 3 "const_int_operand" "C,n,n")])))]
19154 "TARGET_AVX"
19155 "#"
19156 "&& reload_completed && (<MODE>mode != V4DFmode || !TARGET_AVX2)"
19157 [(set (match_dup 0) (vec_duplicate:VF_256 (match_dup 1)))]
19158 {
19159 rtx op0 = operands[0], op1 = operands[1];
19160 int elt = INTVAL (operands[3]);
19161
19162 if (REG_P (op1))
19163 {
19164 int mask;
19165
19166 if (TARGET_AVX2 && elt == 0)
19167 {
19168 emit_insn (gen_vec_dup<mode> (op0, gen_lowpart (<ssescalarmode>mode,
19169 op1)));
19170 DONE;
19171 }
19172
19173 /* Shuffle element we care about into all elements of the 128-bit lane.
19174 The other lane gets shuffled too, but we don't care. */
19175 if (<MODE>mode == V4DFmode)
19176 mask = (elt & 1 ? 15 : 0);
19177 else
19178 mask = (elt & 3) * 0x55;
19179 emit_insn (gen_avx_vpermil<mode> (op0, op1, GEN_INT (mask)));
19180
19181 /* Shuffle the lane we care about into both lanes of the dest. */
19182 mask = (elt / (<ssescalarnum> / 2)) * 0x11;
19183 if (EXT_REX_SSE_REG_P (op0))
19184 {
19185 /* There is no EVEX VPERM2F128, but we can use either VBROADCASTSS
19186 or VSHUFF128. */
19187 gcc_assert (<MODE>mode == V8SFmode);
19188 if ((mask & 1) == 0)
19189 emit_insn (gen_avx2_vec_dupv8sf (op0,
19190 gen_lowpart (V4SFmode, op0)));
19191 else
19192 emit_insn (gen_avx512vl_shuf_f32x4_1 (op0, op0, op0,
19193 GEN_INT (4), GEN_INT (5),
19194 GEN_INT (6), GEN_INT (7),
19195 GEN_INT (12), GEN_INT (13),
19196 GEN_INT (14), GEN_INT (15)));
19197 DONE;
19198 }
19199
19200 emit_insn (gen_avx_vperm2f128<mode>3 (op0, op0, op0, GEN_INT (mask)));
19201 DONE;
19202 }
19203
19204 operands[1] = adjust_address (op1, <ssescalarmode>mode,
19205 elt * GET_MODE_SIZE (<ssescalarmode>mode));
19206 })
19207
19208 (define_expand "<sse2_avx_avx512f>_vpermil<mode><mask_name>"
19209 [(set (match_operand:VF2 0 "register_operand")
19210 (vec_select:VF2
19211 (match_operand:VF2 1 "nonimmediate_operand")
19212 (match_operand:SI 2 "const_0_to_255_operand")))]
19213 "TARGET_AVX && <mask_mode512bit_condition>"
19214 {
19215 int mask = INTVAL (operands[2]);
19216 rtx perm[<ssescalarnum>];
19217
19218 int i;
19219 for (i = 0; i < <ssescalarnum>; i = i + 2)
19220 {
19221 perm[i] = GEN_INT (((mask >> i) & 1) + i);
19222 perm[i + 1] = GEN_INT (((mask >> (i + 1)) & 1) + i);
19223 }
19224
19225 operands[2]
19226 = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (<ssescalarnum>, perm));
19227 })
19228
19229 (define_expand "<sse2_avx_avx512f>_vpermil<mode><mask_name>"
19230 [(set (match_operand:VF1 0 "register_operand")
19231 (vec_select:VF1
19232 (match_operand:VF1 1 "nonimmediate_operand")
19233 (match_operand:SI 2 "const_0_to_255_operand")))]
19234 "TARGET_AVX && <mask_mode512bit_condition>"
19235 {
19236 int mask = INTVAL (operands[2]);
19237 rtx perm[<ssescalarnum>];
19238
19239 int i;
19240 for (i = 0; i < <ssescalarnum>; i = i + 4)
19241 {
19242 perm[i] = GEN_INT (((mask >> 0) & 3) + i);
19243 perm[i + 1] = GEN_INT (((mask >> 2) & 3) + i);
19244 perm[i + 2] = GEN_INT (((mask >> 4) & 3) + i);
19245 perm[i + 3] = GEN_INT (((mask >> 6) & 3) + i);
19246 }
19247
19248 operands[2]
19249 = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (<ssescalarnum>, perm));
19250 })
19251
19252 (define_insn "*<sse2_avx_avx512f>_vpermilp<mode><mask_name>"
19253 [(set (match_operand:VF 0 "register_operand" "=v")
19254 (vec_select:VF
19255 (match_operand:VF 1 "nonimmediate_operand" "vm")
19256 (match_parallel 2 ""
19257 [(match_operand 3 "const_int_operand")])))]
19258 "TARGET_AVX && <mask_mode512bit_condition>
19259 && avx_vpermilp_parallel (operands[2], <MODE>mode)"
19260 {
19261 int mask = avx_vpermilp_parallel (operands[2], <MODE>mode) - 1;
19262 operands[2] = GEN_INT (mask);
19263 return "vpermil<ssemodesuffix>\t{%2, %1, %0<mask_operand4>|%0<mask_operand4>, %1, %2}";
19264 }
19265 [(set_attr "type" "sselog")
19266 (set_attr "prefix_extra" "1")
19267 (set_attr "length_immediate" "1")
19268 (set_attr "prefix" "<mask_prefix>")
19269 (set_attr "mode" "<sseinsnmode>")])
19270
19271 (define_insn "<sse2_avx_avx512f>_vpermilvar<mode>3<mask_name>"
19272 [(set (match_operand:VF 0 "register_operand" "=v")
19273 (unspec:VF
19274 [(match_operand:VF 1 "register_operand" "v")
19275 (match_operand:<sseintvecmode> 2 "nonimmediate_operand" "vm")]
19276 UNSPEC_VPERMIL))]
19277 "TARGET_AVX && <mask_mode512bit_condition>"
19278 "vpermil<ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
19279 [(set_attr "type" "sselog")
19280 (set_attr "prefix_extra" "1")
19281 (set_attr "btver2_decode" "vector")
19282 (set_attr "prefix" "<mask_prefix>")
19283 (set_attr "mode" "<sseinsnmode>")])
19284
19285 (define_mode_iterator VPERMI2
19286 [V16SI V16SF V8DI V8DF
19287 (V8SI "TARGET_AVX512VL") (V8SF "TARGET_AVX512VL")
19288 (V4DI "TARGET_AVX512VL") (V4DF "TARGET_AVX512VL")
19289 (V4SI "TARGET_AVX512VL") (V4SF "TARGET_AVX512VL")
19290 (V2DI "TARGET_AVX512VL") (V2DF "TARGET_AVX512VL")
19291 (V32HI "TARGET_AVX512BW") (V16HI "TARGET_AVX512BW && TARGET_AVX512VL")
19292 (V8HI "TARGET_AVX512BW && TARGET_AVX512VL")
19293 (V64QI "TARGET_AVX512VBMI") (V32QI "TARGET_AVX512VBMI && TARGET_AVX512VL")
19294 (V16QI "TARGET_AVX512VBMI && TARGET_AVX512VL")])
19295
19296 (define_mode_iterator VPERMI2I
19297 [V16SI V8DI
19298 (V8SI "TARGET_AVX512VL") (V4SI "TARGET_AVX512VL")
19299 (V4DI "TARGET_AVX512VL") (V2DI "TARGET_AVX512VL")
19300 (V32HI "TARGET_AVX512BW") (V16HI "TARGET_AVX512BW && TARGET_AVX512VL")
19301 (V8HI "TARGET_AVX512BW && TARGET_AVX512VL")
19302 (V64QI "TARGET_AVX512VBMI") (V32QI "TARGET_AVX512VBMI && TARGET_AVX512VL")
19303 (V16QI "TARGET_AVX512VBMI && TARGET_AVX512VL")])
19304
19305 (define_expand "<avx512>_vpermi2var<mode>3_mask"
19306 [(set (match_operand:VPERMI2 0 "register_operand")
19307 (vec_merge:VPERMI2
19308 (unspec:VPERMI2
19309 [(match_operand:<sseintvecmode> 2 "register_operand")
19310 (match_operand:VPERMI2 1 "register_operand")
19311 (match_operand:VPERMI2 3 "nonimmediate_operand")]
19312 UNSPEC_VPERMT2)
19313 (match_dup 5)
19314 (match_operand:<avx512fmaskmode> 4 "register_operand")))]
19315 "TARGET_AVX512F"
19316 {
19317 operands[2] = force_reg (<sseintvecmode>mode, operands[2]);
19318 operands[5] = gen_lowpart (<MODE>mode, operands[2]);
19319 })
19320
19321 (define_insn "*<avx512>_vpermi2var<mode>3_mask"
19322 [(set (match_operand:VPERMI2I 0 "register_operand" "=v")
19323 (vec_merge:VPERMI2I
19324 (unspec:VPERMI2I
19325 [(match_operand:<sseintvecmode> 2 "register_operand" "0")
19326 (match_operand:VPERMI2I 1 "register_operand" "v")
19327 (match_operand:VPERMI2I 3 "nonimmediate_operand" "vm")]
19328 UNSPEC_VPERMT2)
19329 (match_dup 2)
19330 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")))]
19331 "TARGET_AVX512F"
19332 "vpermi2<ssemodesuffix>\t{%3, %1, %0%{%4%}|%0%{%4%}, %1, %3}"
19333 [(set_attr "type" "sselog")
19334 (set_attr "prefix" "evex")
19335 (set_attr "mode" "<sseinsnmode>")])
19336
19337 (define_insn "*<avx512>_vpermi2var<mode>3_mask"
19338 [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v")
19339 (vec_merge:VF_AVX512VL
19340 (unspec:VF_AVX512VL
19341 [(match_operand:<sseintvecmode> 2 "register_operand" "0")
19342 (match_operand:VF_AVX512VL 1 "register_operand" "v")
19343 (match_operand:VF_AVX512VL 3 "nonimmediate_operand" "vm")]
19344 UNSPEC_VPERMT2)
19345 (subreg:VF_AVX512VL (match_dup 2) 0)
19346 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")))]
19347 "TARGET_AVX512F"
19348 "vpermi2<ssemodesuffix>\t{%3, %1, %0%{%4%}|%0%{%4%}, %1, %3}"
19349 [(set_attr "type" "sselog")
19350 (set_attr "prefix" "evex")
19351 (set_attr "mode" "<sseinsnmode>")])
19352
19353 (define_expand "<avx512>_vpermt2var<mode>3_maskz"
19354 [(match_operand:VPERMI2 0 "register_operand")
19355 (match_operand:<sseintvecmode> 1 "register_operand")
19356 (match_operand:VPERMI2 2 "register_operand")
19357 (match_operand:VPERMI2 3 "nonimmediate_operand")
19358 (match_operand:<avx512fmaskmode> 4 "register_operand")]
19359 "TARGET_AVX512F"
19360 {
19361 emit_insn (gen_<avx512>_vpermt2var<mode>3_maskz_1 (
19362 operands[0], operands[1], operands[2], operands[3],
19363 CONST0_RTX (<MODE>mode), operands[4]));
19364 DONE;
19365 })
19366
19367 (define_insn "<avx512>_vpermt2var<mode>3<sd_maskz_name>"
19368 [(set (match_operand:VPERMI2 0 "register_operand" "=v,v")
19369 (unspec:VPERMI2
19370 [(match_operand:<sseintvecmode> 1 "register_operand" "v,0")
19371 (match_operand:VPERMI2 2 "register_operand" "0,v")
19372 (match_operand:VPERMI2 3 "nonimmediate_operand" "vm,vm")]
19373 UNSPEC_VPERMT2))]
19374 "TARGET_AVX512F"
19375 "@
19376 vpermt2<ssemodesuffix>\t{%3, %1, %0<sd_mask_op4>|%0<sd_mask_op4>, %1, %3}
19377 vpermi2<ssemodesuffix>\t{%3, %2, %0<sd_mask_op4>|%0<sd_mask_op4>, %2, %3}"
19378 [(set_attr "type" "sselog")
19379 (set_attr "prefix" "evex")
19380 (set_attr "mode" "<sseinsnmode>")])
19381
19382 (define_insn "<avx512>_vpermt2var<mode>3_mask"
19383 [(set (match_operand:VPERMI2 0 "register_operand" "=v")
19384 (vec_merge:VPERMI2
19385 (unspec:VPERMI2
19386 [(match_operand:<sseintvecmode> 1 "register_operand" "v")
19387 (match_operand:VPERMI2 2 "register_operand" "0")
19388 (match_operand:VPERMI2 3 "nonimmediate_operand" "vm")]
19389 UNSPEC_VPERMT2)
19390 (match_dup 2)
19391 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")))]
19392 "TARGET_AVX512F"
19393 "vpermt2<ssemodesuffix>\t{%3, %1, %0%{%4%}|%0%{%4%}, %1, %3}"
19394 [(set_attr "type" "sselog")
19395 (set_attr "prefix" "evex")
19396 (set_attr "mode" "<sseinsnmode>")])
19397
19398 (define_expand "avx_vperm2f128<mode>3"
19399 [(set (match_operand:AVX256MODE2P 0 "register_operand")
19400 (unspec:AVX256MODE2P
19401 [(match_operand:AVX256MODE2P 1 "register_operand")
19402 (match_operand:AVX256MODE2P 2 "nonimmediate_operand")
19403 (match_operand:SI 3 "const_0_to_255_operand")]
19404 UNSPEC_VPERMIL2F128))]
19405 "TARGET_AVX"
19406 {
19407 int mask = INTVAL (operands[3]);
19408 if ((mask & 0x88) == 0)
19409 {
19410 rtx perm[<ssescalarnum>], t1, t2;
19411 int i, base, nelt = <ssescalarnum>, nelt2 = nelt / 2;
19412
19413 base = (mask & 3) * nelt2;
19414 for (i = 0; i < nelt2; ++i)
19415 perm[i] = GEN_INT (base + i);
19416
19417 base = ((mask >> 4) & 3) * nelt2;
19418 for (i = 0; i < nelt2; ++i)
19419 perm[i + nelt2] = GEN_INT (base + i);
19420
19421 t2 = gen_rtx_VEC_CONCAT (<ssedoublevecmode>mode,
19422 operands[1], operands[2]);
19423 t1 = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (nelt, perm));
19424 t2 = gen_rtx_VEC_SELECT (<MODE>mode, t2, t1);
19425 t2 = gen_rtx_SET (operands[0], t2);
19426 emit_insn (t2);
19427 DONE;
19428 }
19429 })
19430
19431 ;; Note that bits 7 and 3 of the imm8 allow lanes to be zeroed, which
19432 ;; means that in order to represent this properly in rtl we'd have to
19433 ;; nest *another* vec_concat with a zero operand and do the select from
19434 ;; a 4x wide vector. That doesn't seem very nice.
19435 (define_insn "*avx_vperm2f128<mode>_full"
19436 [(set (match_operand:AVX256MODE2P 0 "register_operand" "=x")
19437 (unspec:AVX256MODE2P
19438 [(match_operand:AVX256MODE2P 1 "register_operand" "x")
19439 (match_operand:AVX256MODE2P 2 "nonimmediate_operand" "xm")
19440 (match_operand:SI 3 "const_0_to_255_operand" "n")]
19441 UNSPEC_VPERMIL2F128))]
19442 "TARGET_AVX"
19443 "vperm2<i128>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
19444 [(set_attr "type" "sselog")
19445 (set_attr "prefix_extra" "1")
19446 (set_attr "length_immediate" "1")
19447 (set_attr "prefix" "vex")
19448 (set_attr "mode" "<sseinsnmode>")])
19449
19450 (define_insn "*avx_vperm2f128<mode>_nozero"
19451 [(set (match_operand:AVX256MODE2P 0 "register_operand" "=x")
19452 (vec_select:AVX256MODE2P
19453 (vec_concat:<ssedoublevecmode>
19454 (match_operand:AVX256MODE2P 1 "register_operand" "x")
19455 (match_operand:AVX256MODE2P 2 "nonimmediate_operand" "xm"))
19456 (match_parallel 3 ""
19457 [(match_operand 4 "const_int_operand")])))]
19458 "TARGET_AVX
19459 && avx_vperm2f128_parallel (operands[3], <MODE>mode)"
19460 {
19461 int mask = avx_vperm2f128_parallel (operands[3], <MODE>mode) - 1;
19462 if (mask == 0x12)
19463 return "vinsert<i128>\t{$0, %x2, %1, %0|%0, %1, %x2, 0}";
19464 if (mask == 0x20)
19465 return "vinsert<i128>\t{$1, %x2, %1, %0|%0, %1, %x2, 1}";
19466 operands[3] = GEN_INT (mask);
19467 return "vperm2<i128>\t{%3, %2, %1, %0|%0, %1, %2, %3}";
19468 }
19469 [(set_attr "type" "sselog")
19470 (set_attr "prefix_extra" "1")
19471 (set_attr "length_immediate" "1")
19472 (set_attr "prefix" "vex")
19473 (set_attr "mode" "<sseinsnmode>")])
19474
19475 (define_insn "*ssse3_palignr<mode>_perm"
19476 [(set (match_operand:V_128 0 "register_operand" "=x,x,v")
19477 (vec_select:V_128
19478 (match_operand:V_128 1 "register_operand" "0,x,v")
19479 (match_parallel 2 "palignr_operand"
19480 [(match_operand 3 "const_int_operand" "n,n,n")])))]
19481 "TARGET_SSSE3"
19482 {
19483 operands[2] = (GEN_INT (INTVAL (operands[3])
19484 * GET_MODE_UNIT_SIZE (GET_MODE (operands[0]))));
19485
19486 switch (which_alternative)
19487 {
19488 case 0:
19489 return "palignr\t{%2, %1, %0|%0, %1, %2}";
19490 case 1:
19491 case 2:
19492 return "vpalignr\t{%2, %1, %1, %0|%0, %1, %1, %2}";
19493 default:
19494 gcc_unreachable ();
19495 }
19496 }
19497 [(set_attr "isa" "noavx,avx,avx512bw")
19498 (set_attr "type" "sseishft")
19499 (set_attr "atom_unit" "sishuf")
19500 (set_attr "prefix_data16" "1,*,*")
19501 (set_attr "prefix_extra" "1")
19502 (set_attr "length_immediate" "1")
19503 (set_attr "prefix" "orig,vex,evex")])
19504
19505 (define_expand "avx512vl_vinsert<mode>"
19506 [(match_operand:VI48F_256 0 "register_operand")
19507 (match_operand:VI48F_256 1 "register_operand")
19508 (match_operand:<ssehalfvecmode> 2 "nonimmediate_operand")
19509 (match_operand:SI 3 "const_0_to_1_operand")
19510 (match_operand:VI48F_256 4 "register_operand")
19511 (match_operand:<avx512fmaskmode> 5 "register_operand")]
19512 "TARGET_AVX512VL"
19513 {
19514 rtx (*insn)(rtx, rtx, rtx, rtx, rtx);
19515
19516 switch (INTVAL (operands[3]))
19517 {
19518 case 0:
19519 insn = gen_vec_set_lo_<mode>_mask;
19520 break;
19521 case 1:
19522 insn = gen_vec_set_hi_<mode>_mask;
19523 break;
19524 default:
19525 gcc_unreachable ();
19526 }
19527
19528 emit_insn (insn (operands[0], operands[1], operands[2], operands[4],
19529 operands[5]));
19530 DONE;
19531 })
19532
19533 (define_expand "avx_vinsertf128<mode>"
19534 [(match_operand:V_256 0 "register_operand")
19535 (match_operand:V_256 1 "register_operand")
19536 (match_operand:<ssehalfvecmode> 2 "nonimmediate_operand")
19537 (match_operand:SI 3 "const_0_to_1_operand")]
19538 "TARGET_AVX"
19539 {
19540 rtx (*insn)(rtx, rtx, rtx);
19541
19542 switch (INTVAL (operands[3]))
19543 {
19544 case 0:
19545 insn = gen_vec_set_lo_<mode>;
19546 break;
19547 case 1:
19548 insn = gen_vec_set_hi_<mode>;
19549 break;
19550 default:
19551 gcc_unreachable ();
19552 }
19553
19554 emit_insn (insn (operands[0], operands[1], operands[2]));
19555 DONE;
19556 })
19557
19558 (define_insn "vec_set_lo_<mode><mask_name>"
19559 [(set (match_operand:VI8F_256 0 "register_operand" "=v")
19560 (vec_concat:VI8F_256
19561 (match_operand:<ssehalfvecmode> 2 "nonimmediate_operand" "vm")
19562 (vec_select:<ssehalfvecmode>
19563 (match_operand:VI8F_256 1 "register_operand" "v")
19564 (parallel [(const_int 2) (const_int 3)]))))]
19565 "TARGET_AVX && <mask_avx512dq_condition>"
19566 {
19567 if (TARGET_AVX512DQ)
19568 return "vinsert<shuffletype>64x2\t{$0x0, %2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2, 0x0}";
19569 else if (TARGET_AVX512VL)
19570 return "vinsert<shuffletype>32x4\t{$0x0, %2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2, 0x0}";
19571 else
19572 return "vinsert<i128>\t{$0x0, %2, %1, %0|%0, %1, %2, 0x0}";
19573 }
19574 [(set_attr "type" "sselog")
19575 (set_attr "prefix_extra" "1")
19576 (set_attr "length_immediate" "1")
19577 (set_attr "prefix" "vex")
19578 (set_attr "mode" "<sseinsnmode>")])
19579
19580 (define_insn "vec_set_hi_<mode><mask_name>"
19581 [(set (match_operand:VI8F_256 0 "register_operand" "=v")
19582 (vec_concat:VI8F_256
19583 (vec_select:<ssehalfvecmode>
19584 (match_operand:VI8F_256 1 "register_operand" "v")
19585 (parallel [(const_int 0) (const_int 1)]))
19586 (match_operand:<ssehalfvecmode> 2 "nonimmediate_operand" "vm")))]
19587 "TARGET_AVX && <mask_avx512dq_condition>"
19588 {
19589 if (TARGET_AVX512DQ)
19590 return "vinsert<shuffletype>64x2\t{$0x1, %2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2, 0x1}";
19591 else if (TARGET_AVX512VL)
19592 return "vinsert<shuffletype>32x4\t{$0x1, %2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2, 0x1}";
19593 else
19594 return "vinsert<i128>\t{$0x1, %2, %1, %0|%0, %1, %2, 0x1}";
19595 }
19596 [(set_attr "type" "sselog")
19597 (set_attr "prefix_extra" "1")
19598 (set_attr "length_immediate" "1")
19599 (set_attr "prefix" "vex")
19600 (set_attr "mode" "<sseinsnmode>")])
19601
19602 (define_insn "vec_set_lo_<mode><mask_name>"
19603 [(set (match_operand:VI4F_256 0 "register_operand" "=v")
19604 (vec_concat:VI4F_256
19605 (match_operand:<ssehalfvecmode> 2 "nonimmediate_operand" "vm")
19606 (vec_select:<ssehalfvecmode>
19607 (match_operand:VI4F_256 1 "register_operand" "v")
19608 (parallel [(const_int 4) (const_int 5)
19609 (const_int 6) (const_int 7)]))))]
19610 "TARGET_AVX"
19611 {
19612 if (TARGET_AVX512VL)
19613 return "vinsert<shuffletype>32x4\t{$0x0, %2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2, 0x0}";
19614 else
19615 return "vinsert<i128>\t{$0x0, %2, %1, %0|%0, %1, %2, 0x0}";
19616 }
19617 [(set_attr "type" "sselog")
19618 (set_attr "prefix_extra" "1")
19619 (set_attr "length_immediate" "1")
19620 (set_attr "prefix" "vex")
19621 (set_attr "mode" "<sseinsnmode>")])
19622
19623 (define_insn "vec_set_hi_<mode><mask_name>"
19624 [(set (match_operand:VI4F_256 0 "register_operand" "=v")
19625 (vec_concat:VI4F_256
19626 (vec_select:<ssehalfvecmode>
19627 (match_operand:VI4F_256 1 "register_operand" "v")
19628 (parallel [(const_int 0) (const_int 1)
19629 (const_int 2) (const_int 3)]))
19630 (match_operand:<ssehalfvecmode> 2 "nonimmediate_operand" "vm")))]
19631 "TARGET_AVX"
19632 {
19633 if (TARGET_AVX512VL)
19634 return "vinsert<shuffletype>32x4\t{$0x1, %2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2, 0x1}";
19635 else
19636 return "vinsert<i128>\t{$0x1, %2, %1, %0|%0, %1, %2, 0x1}";
19637 }
19638 [(set_attr "type" "sselog")
19639 (set_attr "prefix_extra" "1")
19640 (set_attr "length_immediate" "1")
19641 (set_attr "prefix" "vex")
19642 (set_attr "mode" "<sseinsnmode>")])
19643
19644 (define_insn "vec_set_lo_v16hi"
19645 [(set (match_operand:V16HI 0 "register_operand" "=x,v")
19646 (vec_concat:V16HI
19647 (match_operand:V8HI 2 "nonimmediate_operand" "xm,vm")
19648 (vec_select:V8HI
19649 (match_operand:V16HI 1 "register_operand" "x,v")
19650 (parallel [(const_int 8) (const_int 9)
19651 (const_int 10) (const_int 11)
19652 (const_int 12) (const_int 13)
19653 (const_int 14) (const_int 15)]))))]
19654 "TARGET_AVX"
19655 "@
19656 vinsert%~128\t{$0x0, %2, %1, %0|%0, %1, %2, 0x0}
19657 vinserti32x4\t{$0x0, %2, %1, %0|%0, %1, %2, 0x0}"
19658 [(set_attr "type" "sselog")
19659 (set_attr "prefix_extra" "1")
19660 (set_attr "length_immediate" "1")
19661 (set_attr "prefix" "vex,evex")
19662 (set_attr "mode" "OI")])
19663
19664 (define_insn "vec_set_hi_v16hi"
19665 [(set (match_operand:V16HI 0 "register_operand" "=x,v")
19666 (vec_concat:V16HI
19667 (vec_select:V8HI
19668 (match_operand:V16HI 1 "register_operand" "x,v")
19669 (parallel [(const_int 0) (const_int 1)
19670 (const_int 2) (const_int 3)
19671 (const_int 4) (const_int 5)
19672 (const_int 6) (const_int 7)]))
19673 (match_operand:V8HI 2 "nonimmediate_operand" "xm,vm")))]
19674 "TARGET_AVX"
19675 "@
19676 vinsert%~128\t{$0x1, %2, %1, %0|%0, %1, %2, 0x1}
19677 vinserti32x4\t{$0x1, %2, %1, %0|%0, %1, %2, 0x1}"
19678 [(set_attr "type" "sselog")
19679 (set_attr "prefix_extra" "1")
19680 (set_attr "length_immediate" "1")
19681 (set_attr "prefix" "vex,evex")
19682 (set_attr "mode" "OI")])
19683
19684 (define_insn "vec_set_lo_v32qi"
19685 [(set (match_operand:V32QI 0 "register_operand" "=x,v")
19686 (vec_concat:V32QI
19687 (match_operand:V16QI 2 "nonimmediate_operand" "xm,v")
19688 (vec_select:V16QI
19689 (match_operand:V32QI 1 "register_operand" "x,v")
19690 (parallel [(const_int 16) (const_int 17)
19691 (const_int 18) (const_int 19)
19692 (const_int 20) (const_int 21)
19693 (const_int 22) (const_int 23)
19694 (const_int 24) (const_int 25)
19695 (const_int 26) (const_int 27)
19696 (const_int 28) (const_int 29)
19697 (const_int 30) (const_int 31)]))))]
19698 "TARGET_AVX"
19699 "@
19700 vinsert%~128\t{$0x0, %2, %1, %0|%0, %1, %2, 0x0}
19701 vinserti32x4\t{$0x0, %2, %1, %0|%0, %1, %2, 0x0}"
19702 [(set_attr "type" "sselog")
19703 (set_attr "prefix_extra" "1")
19704 (set_attr "length_immediate" "1")
19705 (set_attr "prefix" "vex,evex")
19706 (set_attr "mode" "OI")])
19707
19708 (define_insn "vec_set_hi_v32qi"
19709 [(set (match_operand:V32QI 0 "register_operand" "=x,v")
19710 (vec_concat:V32QI
19711 (vec_select:V16QI
19712 (match_operand:V32QI 1 "register_operand" "x,v")
19713 (parallel [(const_int 0) (const_int 1)
19714 (const_int 2) (const_int 3)
19715 (const_int 4) (const_int 5)
19716 (const_int 6) (const_int 7)
19717 (const_int 8) (const_int 9)
19718 (const_int 10) (const_int 11)
19719 (const_int 12) (const_int 13)
19720 (const_int 14) (const_int 15)]))
19721 (match_operand:V16QI 2 "nonimmediate_operand" "xm,vm")))]
19722 "TARGET_AVX"
19723 "@
19724 vinsert%~128\t{$0x1, %2, %1, %0|%0, %1, %2, 0x1}
19725 vinserti32x4\t{$0x1, %2, %1, %0|%0, %1, %2, 0x1}"
19726 [(set_attr "type" "sselog")
19727 (set_attr "prefix_extra" "1")
19728 (set_attr "length_immediate" "1")
19729 (set_attr "prefix" "vex,evex")
19730 (set_attr "mode" "OI")])
19731
19732 (define_insn "<avx_avx2>_maskload<ssemodesuffix><avxsizesuffix>"
19733 [(set (match_operand:V48_AVX2 0 "register_operand" "=x")
19734 (unspec:V48_AVX2
19735 [(match_operand:<sseintvecmode> 2 "register_operand" "x")
19736 (match_operand:V48_AVX2 1 "memory_operand" "m")]
19737 UNSPEC_MASKMOV))]
19738 "TARGET_AVX"
19739 "v<sseintprefix>maskmov<ssemodesuffix>\t{%1, %2, %0|%0, %2, %1}"
19740 [(set_attr "type" "sselog1")
19741 (set_attr "prefix_extra" "1")
19742 (set_attr "prefix" "vex")
19743 (set_attr "btver2_decode" "vector")
19744 (set_attr "mode" "<sseinsnmode>")])
19745
19746 (define_insn "<avx_avx2>_maskstore<ssemodesuffix><avxsizesuffix>"
19747 [(set (match_operand:V48_AVX2 0 "memory_operand" "+m")
19748 (unspec:V48_AVX2
19749 [(match_operand:<sseintvecmode> 1 "register_operand" "x")
19750 (match_operand:V48_AVX2 2 "register_operand" "x")
19751 (match_dup 0)]
19752 UNSPEC_MASKMOV))]
19753 "TARGET_AVX"
19754 "v<sseintprefix>maskmov<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
19755 [(set_attr "type" "sselog1")
19756 (set_attr "prefix_extra" "1")
19757 (set_attr "prefix" "vex")
19758 (set_attr "btver2_decode" "vector")
19759 (set_attr "mode" "<sseinsnmode>")])
19760
19761 (define_expand "maskload<mode><sseintvecmodelower>"
19762 [(set (match_operand:V48_AVX2 0 "register_operand")
19763 (unspec:V48_AVX2
19764 [(match_operand:<sseintvecmode> 2 "register_operand")
19765 (match_operand:V48_AVX2 1 "memory_operand")]
19766 UNSPEC_MASKMOV))]
19767 "TARGET_AVX")
19768
19769 (define_expand "maskload<mode><avx512fmaskmodelower>"
19770 [(set (match_operand:V48_AVX512VL 0 "register_operand")
19771 (vec_merge:V48_AVX512VL
19772 (match_operand:V48_AVX512VL 1 "memory_operand")
19773 (match_dup 0)
19774 (match_operand:<avx512fmaskmode> 2 "register_operand")))]
19775 "TARGET_AVX512F")
19776
19777 (define_expand "maskload<mode><avx512fmaskmodelower>"
19778 [(set (match_operand:VI12_AVX512VL 0 "register_operand")
19779 (vec_merge:VI12_AVX512VL
19780 (match_operand:VI12_AVX512VL 1 "memory_operand")
19781 (match_dup 0)
19782 (match_operand:<avx512fmaskmode> 2 "register_operand")))]
19783 "TARGET_AVX512BW")
19784
19785 (define_expand "maskstore<mode><sseintvecmodelower>"
19786 [(set (match_operand:V48_AVX2 0 "memory_operand")
19787 (unspec:V48_AVX2
19788 [(match_operand:<sseintvecmode> 2 "register_operand")
19789 (match_operand:V48_AVX2 1 "register_operand")
19790 (match_dup 0)]
19791 UNSPEC_MASKMOV))]
19792 "TARGET_AVX")
19793
19794 (define_expand "maskstore<mode><avx512fmaskmodelower>"
19795 [(set (match_operand:V48_AVX512VL 0 "memory_operand")
19796 (vec_merge:V48_AVX512VL
19797 (match_operand:V48_AVX512VL 1 "register_operand")
19798 (match_dup 0)
19799 (match_operand:<avx512fmaskmode> 2 "register_operand")))]
19800 "TARGET_AVX512F")
19801
19802 (define_expand "maskstore<mode><avx512fmaskmodelower>"
19803 [(set (match_operand:VI12_AVX512VL 0 "memory_operand")
19804 (vec_merge:VI12_AVX512VL
19805 (match_operand:VI12_AVX512VL 1 "register_operand")
19806 (match_dup 0)
19807 (match_operand:<avx512fmaskmode> 2 "register_operand")))]
19808 "TARGET_AVX512BW")
19809
19810 (define_expand "cbranch<mode>4"
19811 [(set (reg:CC FLAGS_REG)
19812 (compare:CC (match_operand:VI48_AVX 1 "register_operand")
19813 (match_operand:VI48_AVX 2 "nonimmediate_operand")))
19814 (set (pc) (if_then_else
19815 (match_operator 0 "bt_comparison_operator"
19816 [(reg:CC FLAGS_REG) (const_int 0)])
19817 (label_ref (match_operand 3))
19818 (pc)))]
19819 "TARGET_SSE4_1"
19820 {
19821 ix86_expand_branch (GET_CODE (operands[0]),
19822 operands[1], operands[2], operands[3]);
19823 DONE;
19824 })
19825
19826
19827 (define_insn_and_split "avx_<castmode><avxsizesuffix>_<castmode>"
19828 [(set (match_operand:AVX256MODE2P 0 "nonimmediate_operand" "=x,m")
19829 (unspec:AVX256MODE2P
19830 [(match_operand:<ssehalfvecmode> 1 "nonimmediate_operand" "xm,x")]
19831 UNSPEC_CAST))]
19832 "TARGET_AVX && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
19833 "#"
19834 "&& reload_completed"
19835 [(set (match_dup 0) (match_dup 1))]
19836 {
19837 if (REG_P (operands[0]))
19838 operands[0] = gen_lowpart (<ssehalfvecmode>mode, operands[0]);
19839 else
19840 operands[1] = lowpart_subreg (<MODE>mode, operands[1],
19841 <ssehalfvecmode>mode);
19842 })
19843
19844 ;; Modes handled by vec_init expanders.
19845 (define_mode_iterator VEC_INIT_MODE
19846 [(V64QI "TARGET_AVX512F") (V32QI "TARGET_AVX") V16QI
19847 (V32HI "TARGET_AVX512F") (V16HI "TARGET_AVX") V8HI
19848 (V16SI "TARGET_AVX512F") (V8SI "TARGET_AVX") V4SI
19849 (V8DI "TARGET_AVX512F") (V4DI "TARGET_AVX") V2DI
19850 (V16SF "TARGET_AVX512F") (V8SF "TARGET_AVX") V4SF
19851 (V8DF "TARGET_AVX512F") (V4DF "TARGET_AVX") (V2DF "TARGET_SSE2")
19852 (V4TI "TARGET_AVX512F") (V2TI "TARGET_AVX")])
19853
19854 ;; Likewise, but for initialization from half sized vectors.
19855 ;; Thus, these are all VEC_INIT_MODE modes except V2??.
19856 (define_mode_iterator VEC_INIT_HALF_MODE
19857 [(V64QI "TARGET_AVX512F") (V32QI "TARGET_AVX") V16QI
19858 (V32HI "TARGET_AVX512F") (V16HI "TARGET_AVX") V8HI
19859 (V16SI "TARGET_AVX512F") (V8SI "TARGET_AVX") V4SI
19860 (V8DI "TARGET_AVX512F") (V4DI "TARGET_AVX")
19861 (V16SF "TARGET_AVX512F") (V8SF "TARGET_AVX") V4SF
19862 (V8DF "TARGET_AVX512F") (V4DF "TARGET_AVX")
19863 (V4TI "TARGET_AVX512F")])
19864
19865 (define_expand "vec_init<mode><ssescalarmodelower>"
19866 [(match_operand:VEC_INIT_MODE 0 "register_operand")
19867 (match_operand 1)]
19868 "TARGET_SSE"
19869 {
19870 ix86_expand_vector_init (false, operands[0], operands[1]);
19871 DONE;
19872 })
19873
19874 (define_expand "vec_init<mode><ssehalfvecmodelower>"
19875 [(match_operand:VEC_INIT_HALF_MODE 0 "register_operand")
19876 (match_operand 1)]
19877 "TARGET_SSE"
19878 {
19879 ix86_expand_vector_init (false, operands[0], operands[1]);
19880 DONE;
19881 })
19882
19883 (define_insn "<avx2_avx512>_ashrv<mode><mask_name>"
19884 [(set (match_operand:VI48_AVX512F_AVX512VL 0 "register_operand" "=v")
19885 (ashiftrt:VI48_AVX512F_AVX512VL
19886 (match_operand:VI48_AVX512F_AVX512VL 1 "register_operand" "v")
19887 (match_operand:VI48_AVX512F_AVX512VL 2 "nonimmediate_operand" "vm")))]
19888 "TARGET_AVX2 && <mask_mode512bit_condition>"
19889 "vpsrav<ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
19890 [(set_attr "type" "sseishft")
19891 (set_attr "prefix" "maybe_evex")
19892 (set_attr "mode" "<sseinsnmode>")])
19893
19894 (define_insn "<avx2_avx512>_ashrv<mode><mask_name>"
19895 [(set (match_operand:VI2_AVX512VL 0 "register_operand" "=v")
19896 (ashiftrt:VI2_AVX512VL
19897 (match_operand:VI2_AVX512VL 1 "register_operand" "v")
19898 (match_operand:VI2_AVX512VL 2 "nonimmediate_operand" "vm")))]
19899 "TARGET_AVX512BW"
19900 "vpsravw\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
19901 [(set_attr "type" "sseishft")
19902 (set_attr "prefix" "maybe_evex")
19903 (set_attr "mode" "<sseinsnmode>")])
19904
19905 (define_insn "<avx2_avx512>_<shift_insn>v<mode><mask_name>"
19906 [(set (match_operand:VI48_AVX512F 0 "register_operand" "=v")
19907 (any_lshift:VI48_AVX512F
19908 (match_operand:VI48_AVX512F 1 "register_operand" "v")
19909 (match_operand:VI48_AVX512F 2 "nonimmediate_operand" "vm")))]
19910 "TARGET_AVX2 && <mask_mode512bit_condition>"
19911 "vp<vshift>v<ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
19912 [(set_attr "type" "sseishft")
19913 (set_attr "prefix" "maybe_evex")
19914 (set_attr "mode" "<sseinsnmode>")])
19915
19916 (define_insn "<avx2_avx512>_<shift_insn>v<mode><mask_name>"
19917 [(set (match_operand:VI2_AVX512VL 0 "register_operand" "=v")
19918 (any_lshift:VI2_AVX512VL
19919 (match_operand:VI2_AVX512VL 1 "register_operand" "v")
19920 (match_operand:VI2_AVX512VL 2 "nonimmediate_operand" "vm")))]
19921 "TARGET_AVX512BW"
19922 "vp<vshift>v<ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
19923 [(set_attr "type" "sseishft")
19924 (set_attr "prefix" "maybe_evex")
19925 (set_attr "mode" "<sseinsnmode>")])
19926
19927 (define_insn "avx_vec_concat<mode>"
19928 [(set (match_operand:V_256_512 0 "register_operand" "=x,v,x,Yv")
19929 (vec_concat:V_256_512
19930 (match_operand:<ssehalfvecmode> 1 "register_operand" "x,v,x,v")
19931 (match_operand:<ssehalfvecmode> 2 "nonimm_or_0_operand" "xm,vm,C,C")))]
19932 "TARGET_AVX"
19933 {
19934 switch (which_alternative)
19935 {
19936 case 0:
19937 return "vinsert<i128>\t{$0x1, %2, %<xtg_mode>1, %0|%0, %<xtg_mode>1, %2, 0x1}";
19938 case 1:
19939 if (<MODE_SIZE> == 64)
19940 {
19941 if (TARGET_AVX512DQ && GET_MODE_SIZE (<ssescalarmode>mode) == 4)
19942 return "vinsert<shuffletype>32x8\t{$0x1, %2, %<xtg_mode>1, %0|%0, %<xtg_mode>1, %2, 0x1}";
19943 else
19944 return "vinsert<shuffletype>64x4\t{$0x1, %2, %<xtg_mode>1, %0|%0, %<xtg_mode>1, %2, 0x1}";
19945 }
19946 else
19947 {
19948 if (TARGET_AVX512DQ && GET_MODE_SIZE (<ssescalarmode>mode) == 8)
19949 return "vinsert<shuffletype>64x2\t{$0x1, %2, %<xtg_mode>1, %0|%0, %<xtg_mode>1, %2, 0x1}";
19950 else
19951 return "vinsert<shuffletype>32x4\t{$0x1, %2, %<xtg_mode>1, %0|%0, %<xtg_mode>1, %2, 0x1}";
19952 }
19953 case 2:
19954 case 3:
19955 switch (get_attr_mode (insn))
19956 {
19957 case MODE_V16SF:
19958 return "vmovaps\t{%1, %t0|%t0, %1}";
19959 case MODE_V8DF:
19960 return "vmovapd\t{%1, %t0|%t0, %1}";
19961 case MODE_V8SF:
19962 return "vmovaps\t{%1, %x0|%x0, %1}";
19963 case MODE_V4DF:
19964 return "vmovapd\t{%1, %x0|%x0, %1}";
19965 case MODE_XI:
19966 if (which_alternative == 2)
19967 return "vmovdqa\t{%1, %t0|%t0, %1}";
19968 else if (GET_MODE_SIZE (<ssescalarmode>mode) == 8)
19969 return "vmovdqa64\t{%1, %t0|%t0, %1}";
19970 else
19971 return "vmovdqa32\t{%1, %t0|%t0, %1}";
19972 case MODE_OI:
19973 if (which_alternative == 2)
19974 return "vmovdqa\t{%1, %x0|%x0, %1}";
19975 else if (GET_MODE_SIZE (<ssescalarmode>mode) == 8)
19976 return "vmovdqa64\t{%1, %x0|%x0, %1}";
19977 else
19978 return "vmovdqa32\t{%1, %x0|%x0, %1}";
19979 default:
19980 gcc_unreachable ();
19981 }
19982 default:
19983 gcc_unreachable ();
19984 }
19985 }
19986 [(set_attr "type" "sselog,sselog,ssemov,ssemov")
19987 (set_attr "prefix_extra" "1,1,*,*")
19988 (set_attr "length_immediate" "1,1,*,*")
19989 (set_attr "prefix" "maybe_evex")
19990 (set_attr "mode" "<sseinsnmode>")])
19991
19992 (define_insn "vcvtph2ps<mask_name>"
19993 [(set (match_operand:V4SF 0 "register_operand" "=v")
19994 (vec_select:V4SF
19995 (unspec:V8SF [(match_operand:V8HI 1 "register_operand" "v")]
19996 UNSPEC_VCVTPH2PS)
19997 (parallel [(const_int 0) (const_int 1)
19998 (const_int 2) (const_int 3)])))]
19999 "TARGET_F16C || TARGET_AVX512VL"
20000 "vcvtph2ps\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
20001 [(set_attr "type" "ssecvt")
20002 (set_attr "prefix" "maybe_evex")
20003 (set_attr "mode" "V4SF")])
20004
20005 (define_insn "*vcvtph2ps_load<mask_name>"
20006 [(set (match_operand:V4SF 0 "register_operand" "=v")
20007 (unspec:V4SF [(match_operand:V4HI 1 "memory_operand" "m")]
20008 UNSPEC_VCVTPH2PS))]
20009 "TARGET_F16C || TARGET_AVX512VL"
20010 "vcvtph2ps\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
20011 [(set_attr "type" "ssecvt")
20012 (set_attr "prefix" "vex")
20013 (set_attr "mode" "V8SF")])
20014
20015 (define_insn "vcvtph2ps256<mask_name>"
20016 [(set (match_operand:V8SF 0 "register_operand" "=v")
20017 (unspec:V8SF [(match_operand:V8HI 1 "nonimmediate_operand" "vm")]
20018 UNSPEC_VCVTPH2PS))]
20019 "TARGET_F16C || TARGET_AVX512VL"
20020 "vcvtph2ps\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
20021 [(set_attr "type" "ssecvt")
20022 (set_attr "prefix" "vex")
20023 (set_attr "btver2_decode" "double")
20024 (set_attr "mode" "V8SF")])
20025
20026 (define_insn "<mask_codefor>avx512f_vcvtph2ps512<mask_name><round_saeonly_name>"
20027 [(set (match_operand:V16SF 0 "register_operand" "=v")
20028 (unspec:V16SF
20029 [(match_operand:V16HI 1 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")]
20030 UNSPEC_VCVTPH2PS))]
20031 "TARGET_AVX512F"
20032 "vcvtph2ps\t{<round_saeonly_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_saeonly_mask_op2>}"
20033 [(set_attr "type" "ssecvt")
20034 (set_attr "prefix" "evex")
20035 (set_attr "mode" "V16SF")])
20036
20037 (define_expand "vcvtps2ph_mask"
20038 [(set (match_operand:V8HI 0 "register_operand")
20039 (vec_merge:V8HI
20040 (vec_concat:V8HI
20041 (unspec:V4HI [(match_operand:V4SF 1 "register_operand")
20042 (match_operand:SI 2 "const_0_to_255_operand")]
20043 UNSPEC_VCVTPS2PH)
20044 (match_dup 5))
20045 (match_operand:V8HI 3 "nonimm_or_0_operand")
20046 (match_operand:QI 4 "register_operand")))]
20047 "TARGET_AVX512VL"
20048 "operands[5] = CONST0_RTX (V4HImode);")
20049
20050 (define_expand "vcvtps2ph"
20051 [(set (match_operand:V8HI 0 "register_operand")
20052 (vec_concat:V8HI
20053 (unspec:V4HI [(match_operand:V4SF 1 "register_operand")
20054 (match_operand:SI 2 "const_0_to_255_operand")]
20055 UNSPEC_VCVTPS2PH)
20056 (match_dup 3)))]
20057 "TARGET_F16C"
20058 "operands[3] = CONST0_RTX (V4HImode);")
20059
20060 (define_insn "*vcvtps2ph<mask_name>"
20061 [(set (match_operand:V8HI 0 "register_operand" "=v")
20062 (vec_concat:V8HI
20063 (unspec:V4HI [(match_operand:V4SF 1 "register_operand" "v")
20064 (match_operand:SI 2 "const_0_to_255_operand" "N")]
20065 UNSPEC_VCVTPS2PH)
20066 (match_operand:V4HI 3 "const0_operand")))]
20067 "(TARGET_F16C || TARGET_AVX512VL) && <mask_avx512vl_condition>"
20068 "vcvtps2ph\t{%2, %1, %0<mask_operand4>|%0<mask_operand4>, %1, %2}"
20069 [(set_attr "type" "ssecvt")
20070 (set_attr "prefix" "maybe_evex")
20071 (set_attr "mode" "V4SF")])
20072
20073 (define_insn "*vcvtps2ph_store<mask_name>"
20074 [(set (match_operand:V4HI 0 "memory_operand" "=m")
20075 (unspec:V4HI [(match_operand:V4SF 1 "register_operand" "v")
20076 (match_operand:SI 2 "const_0_to_255_operand" "N")]
20077 UNSPEC_VCVTPS2PH))]
20078 "TARGET_F16C || TARGET_AVX512VL"
20079 "vcvtps2ph\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
20080 [(set_attr "type" "ssecvt")
20081 (set_attr "prefix" "maybe_evex")
20082 (set_attr "mode" "V4SF")])
20083
20084 (define_insn "vcvtps2ph256<mask_name>"
20085 [(set (match_operand:V8HI 0 "nonimmediate_operand" "=vm")
20086 (unspec:V8HI [(match_operand:V8SF 1 "register_operand" "v")
20087 (match_operand:SI 2 "const_0_to_255_operand" "N")]
20088 UNSPEC_VCVTPS2PH))]
20089 "TARGET_F16C || TARGET_AVX512VL"
20090 "vcvtps2ph\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
20091 [(set_attr "type" "ssecvt")
20092 (set_attr "prefix" "maybe_evex")
20093 (set_attr "btver2_decode" "vector")
20094 (set_attr "mode" "V8SF")])
20095
20096 (define_insn "<mask_codefor>avx512f_vcvtps2ph512<mask_name>"
20097 [(set (match_operand:V16HI 0 "nonimmediate_operand" "=vm")
20098 (unspec:V16HI
20099 [(match_operand:V16SF 1 "register_operand" "v")
20100 (match_operand:SI 2 "const_0_to_255_operand" "N")]
20101 UNSPEC_VCVTPS2PH))]
20102 "TARGET_AVX512F"
20103 "vcvtps2ph\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
20104 [(set_attr "type" "ssecvt")
20105 (set_attr "prefix" "evex")
20106 (set_attr "mode" "V16SF")])
20107
20108 ;; For gather* insn patterns
20109 (define_mode_iterator VEC_GATHER_MODE
20110 [V2DI V2DF V4DI V4DF V4SI V4SF V8SI V8SF])
20111 (define_mode_attr VEC_GATHER_IDXSI
20112 [(V2DI "V4SI") (V4DI "V4SI") (V8DI "V8SI")
20113 (V2DF "V4SI") (V4DF "V4SI") (V8DF "V8SI")
20114 (V4SI "V4SI") (V8SI "V8SI") (V16SI "V16SI")
20115 (V4SF "V4SI") (V8SF "V8SI") (V16SF "V16SI")])
20116
20117 (define_mode_attr VEC_GATHER_IDXDI
20118 [(V2DI "V2DI") (V4DI "V4DI") (V8DI "V8DI")
20119 (V2DF "V2DI") (V4DF "V4DI") (V8DF "V8DI")
20120 (V4SI "V2DI") (V8SI "V4DI") (V16SI "V8DI")
20121 (V4SF "V2DI") (V8SF "V4DI") (V16SF "V8DI")])
20122
20123 (define_mode_attr VEC_GATHER_SRCDI
20124 [(V2DI "V2DI") (V4DI "V4DI") (V8DI "V8DI")
20125 (V2DF "V2DF") (V4DF "V4DF") (V8DF "V8DF")
20126 (V4SI "V4SI") (V8SI "V4SI") (V16SI "V8SI")
20127 (V4SF "V4SF") (V8SF "V4SF") (V16SF "V8SF")])
20128
20129 (define_expand "avx2_gathersi<mode>"
20130 [(parallel [(set (match_operand:VEC_GATHER_MODE 0 "register_operand")
20131 (unspec:VEC_GATHER_MODE
20132 [(match_operand:VEC_GATHER_MODE 1 "register_operand")
20133 (mem:<ssescalarmode>
20134 (match_par_dup 6
20135 [(match_operand 2 "vsib_address_operand")
20136 (match_operand:<VEC_GATHER_IDXSI>
20137 3 "register_operand")
20138 (match_operand:SI 5 "const1248_operand ")]))
20139 (mem:BLK (scratch))
20140 (match_operand:VEC_GATHER_MODE 4 "register_operand")]
20141 UNSPEC_GATHER))
20142 (clobber (match_scratch:VEC_GATHER_MODE 7))])]
20143 "TARGET_AVX2"
20144 {
20145 operands[6]
20146 = gen_rtx_UNSPEC (Pmode, gen_rtvec (3, operands[2], operands[3],
20147 operands[5]), UNSPEC_VSIBADDR);
20148 })
20149
20150 (define_insn "*avx2_gathersi<mode>"
20151 [(set (match_operand:VEC_GATHER_MODE 0 "register_operand" "=&x")
20152 (unspec:VEC_GATHER_MODE
20153 [(match_operand:VEC_GATHER_MODE 2 "register_operand" "0")
20154 (match_operator:<ssescalarmode> 7 "vsib_mem_operator"
20155 [(unspec:P
20156 [(match_operand:P 3 "vsib_address_operand" "Tv")
20157 (match_operand:<VEC_GATHER_IDXSI> 4 "register_operand" "x")
20158 (match_operand:SI 6 "const1248_operand" "n")]
20159 UNSPEC_VSIBADDR)])
20160 (mem:BLK (scratch))
20161 (match_operand:VEC_GATHER_MODE 5 "register_operand" "1")]
20162 UNSPEC_GATHER))
20163 (clobber (match_scratch:VEC_GATHER_MODE 1 "=&x"))]
20164 "TARGET_AVX2"
20165 "v<sseintprefix>gatherd<ssemodesuffix>\t{%1, %7, %0|%0, %7, %1}"
20166 [(set_attr "type" "ssemov")
20167 (set_attr "prefix" "vex")
20168 (set_attr "mode" "<sseinsnmode>")])
20169
20170 (define_insn "*avx2_gathersi<mode>_2"
20171 [(set (match_operand:VEC_GATHER_MODE 0 "register_operand" "=&x")
20172 (unspec:VEC_GATHER_MODE
20173 [(pc)
20174 (match_operator:<ssescalarmode> 6 "vsib_mem_operator"
20175 [(unspec:P
20176 [(match_operand:P 2 "vsib_address_operand" "Tv")
20177 (match_operand:<VEC_GATHER_IDXSI> 3 "register_operand" "x")
20178 (match_operand:SI 5 "const1248_operand" "n")]
20179 UNSPEC_VSIBADDR)])
20180 (mem:BLK (scratch))
20181 (match_operand:VEC_GATHER_MODE 4 "register_operand" "1")]
20182 UNSPEC_GATHER))
20183 (clobber (match_scratch:VEC_GATHER_MODE 1 "=&x"))]
20184 "TARGET_AVX2"
20185 "v<sseintprefix>gatherd<ssemodesuffix>\t{%1, %6, %0|%0, %6, %1}"
20186 [(set_attr "type" "ssemov")
20187 (set_attr "prefix" "vex")
20188 (set_attr "mode" "<sseinsnmode>")])
20189
20190 (define_expand "avx2_gatherdi<mode>"
20191 [(parallel [(set (match_operand:VEC_GATHER_MODE 0 "register_operand")
20192 (unspec:VEC_GATHER_MODE
20193 [(match_operand:<VEC_GATHER_SRCDI> 1 "register_operand")
20194 (mem:<ssescalarmode>
20195 (match_par_dup 6
20196 [(match_operand 2 "vsib_address_operand")
20197 (match_operand:<VEC_GATHER_IDXDI>
20198 3 "register_operand")
20199 (match_operand:SI 5 "const1248_operand ")]))
20200 (mem:BLK (scratch))
20201 (match_operand:<VEC_GATHER_SRCDI> 4 "register_operand")]
20202 UNSPEC_GATHER))
20203 (clobber (match_scratch:VEC_GATHER_MODE 7))])]
20204 "TARGET_AVX2"
20205 {
20206 operands[6]
20207 = gen_rtx_UNSPEC (Pmode, gen_rtvec (3, operands[2], operands[3],
20208 operands[5]), UNSPEC_VSIBADDR);
20209 })
20210
20211 (define_insn "*avx2_gatherdi<mode>"
20212 [(set (match_operand:VEC_GATHER_MODE 0 "register_operand" "=&x")
20213 (unspec:VEC_GATHER_MODE
20214 [(match_operand:<VEC_GATHER_SRCDI> 2 "register_operand" "0")
20215 (match_operator:<ssescalarmode> 7 "vsib_mem_operator"
20216 [(unspec:P
20217 [(match_operand:P 3 "vsib_address_operand" "Tv")
20218 (match_operand:<VEC_GATHER_IDXDI> 4 "register_operand" "x")
20219 (match_operand:SI 6 "const1248_operand" "n")]
20220 UNSPEC_VSIBADDR)])
20221 (mem:BLK (scratch))
20222 (match_operand:<VEC_GATHER_SRCDI> 5 "register_operand" "1")]
20223 UNSPEC_GATHER))
20224 (clobber (match_scratch:VEC_GATHER_MODE 1 "=&x"))]
20225 "TARGET_AVX2"
20226 "v<sseintprefix>gatherq<ssemodesuffix>\t{%5, %7, %2|%2, %7, %5}"
20227 [(set_attr "type" "ssemov")
20228 (set_attr "prefix" "vex")
20229 (set_attr "mode" "<sseinsnmode>")])
20230
20231 (define_insn "*avx2_gatherdi<mode>_2"
20232 [(set (match_operand:VEC_GATHER_MODE 0 "register_operand" "=&x")
20233 (unspec:VEC_GATHER_MODE
20234 [(pc)
20235 (match_operator:<ssescalarmode> 6 "vsib_mem_operator"
20236 [(unspec:P
20237 [(match_operand:P 2 "vsib_address_operand" "Tv")
20238 (match_operand:<VEC_GATHER_IDXDI> 3 "register_operand" "x")
20239 (match_operand:SI 5 "const1248_operand" "n")]
20240 UNSPEC_VSIBADDR)])
20241 (mem:BLK (scratch))
20242 (match_operand:<VEC_GATHER_SRCDI> 4 "register_operand" "1")]
20243 UNSPEC_GATHER))
20244 (clobber (match_scratch:VEC_GATHER_MODE 1 "=&x"))]
20245 "TARGET_AVX2"
20246 {
20247 if (<MODE>mode != <VEC_GATHER_SRCDI>mode)
20248 return "v<sseintprefix>gatherq<ssemodesuffix>\t{%4, %6, %x0|%x0, %6, %4}";
20249 return "v<sseintprefix>gatherq<ssemodesuffix>\t{%4, %6, %0|%0, %6, %4}";
20250 }
20251 [(set_attr "type" "ssemov")
20252 (set_attr "prefix" "vex")
20253 (set_attr "mode" "<sseinsnmode>")])
20254
20255 (define_insn "*avx2_gatherdi<mode>_3"
20256 [(set (match_operand:<VEC_GATHER_SRCDI> 0 "register_operand" "=&x")
20257 (vec_select:<VEC_GATHER_SRCDI>
20258 (unspec:VI4F_256
20259 [(match_operand:<VEC_GATHER_SRCDI> 2 "register_operand" "0")
20260 (match_operator:<ssescalarmode> 7 "vsib_mem_operator"
20261 [(unspec:P
20262 [(match_operand:P 3 "vsib_address_operand" "Tv")
20263 (match_operand:<VEC_GATHER_IDXDI> 4 "register_operand" "x")
20264 (match_operand:SI 6 "const1248_operand" "n")]
20265 UNSPEC_VSIBADDR)])
20266 (mem:BLK (scratch))
20267 (match_operand:<VEC_GATHER_SRCDI> 5 "register_operand" "1")]
20268 UNSPEC_GATHER)
20269 (parallel [(const_int 0) (const_int 1)
20270 (const_int 2) (const_int 3)])))
20271 (clobber (match_scratch:VI4F_256 1 "=&x"))]
20272 "TARGET_AVX2"
20273 "v<sseintprefix>gatherq<ssemodesuffix>\t{%5, %7, %0|%0, %7, %5}"
20274 [(set_attr "type" "ssemov")
20275 (set_attr "prefix" "vex")
20276 (set_attr "mode" "<sseinsnmode>")])
20277
20278 (define_insn "*avx2_gatherdi<mode>_4"
20279 [(set (match_operand:<VEC_GATHER_SRCDI> 0 "register_operand" "=&x")
20280 (vec_select:<VEC_GATHER_SRCDI>
20281 (unspec:VI4F_256
20282 [(pc)
20283 (match_operator:<ssescalarmode> 6 "vsib_mem_operator"
20284 [(unspec:P
20285 [(match_operand:P 2 "vsib_address_operand" "Tv")
20286 (match_operand:<VEC_GATHER_IDXDI> 3 "register_operand" "x")
20287 (match_operand:SI 5 "const1248_operand" "n")]
20288 UNSPEC_VSIBADDR)])
20289 (mem:BLK (scratch))
20290 (match_operand:<VEC_GATHER_SRCDI> 4 "register_operand" "1")]
20291 UNSPEC_GATHER)
20292 (parallel [(const_int 0) (const_int 1)
20293 (const_int 2) (const_int 3)])))
20294 (clobber (match_scratch:VI4F_256 1 "=&x"))]
20295 "TARGET_AVX2"
20296 "v<sseintprefix>gatherq<ssemodesuffix>\t{%4, %6, %0|%0, %6, %4}"
20297 [(set_attr "type" "ssemov")
20298 (set_attr "prefix" "vex")
20299 (set_attr "mode" "<sseinsnmode>")])
20300
20301 (define_expand "<avx512>_gathersi<mode>"
20302 [(parallel [(set (match_operand:VI48F 0 "register_operand")
20303 (unspec:VI48F
20304 [(match_operand:VI48F 1 "register_operand")
20305 (match_operand:<avx512fmaskmode> 4 "register_operand")
20306 (mem:<ssescalarmode>
20307 (match_par_dup 6
20308 [(match_operand 2 "vsib_address_operand")
20309 (match_operand:<VEC_GATHER_IDXSI> 3 "register_operand")
20310 (match_operand:SI 5 "const1248_operand")]))]
20311 UNSPEC_GATHER))
20312 (clobber (match_scratch:<avx512fmaskmode> 7))])]
20313 "TARGET_AVX512F"
20314 {
20315 operands[6]
20316 = gen_rtx_UNSPEC (Pmode, gen_rtvec (3, operands[2], operands[3],
20317 operands[5]), UNSPEC_VSIBADDR);
20318 })
20319
20320 (define_insn "*avx512f_gathersi<mode>"
20321 [(set (match_operand:VI48F 0 "register_operand" "=&v")
20322 (unspec:VI48F
20323 [(match_operand:VI48F 1 "register_operand" "0")
20324 (match_operand:<avx512fmaskmode> 7 "register_operand" "2")
20325 (match_operator:<ssescalarmode> 6 "vsib_mem_operator"
20326 [(unspec:P
20327 [(match_operand:P 4 "vsib_address_operand" "Tv")
20328 (match_operand:<VEC_GATHER_IDXSI> 3 "register_operand" "v")
20329 (match_operand:SI 5 "const1248_operand" "n")]
20330 UNSPEC_VSIBADDR)])]
20331 UNSPEC_GATHER))
20332 (clobber (match_scratch:<avx512fmaskmode> 2 "=&Yk"))]
20333 "TARGET_AVX512F"
20334 ;; %X6 so that we don't emit any *WORD PTR for -masm=intel, as
20335 ;; gas changed what it requires incompatibly.
20336 "v<sseintprefix>gatherd<ssemodesuffix>\t{%6, %0%{%2%}|%0%{%2%}, %X6}"
20337 [(set_attr "type" "ssemov")
20338 (set_attr "prefix" "evex")
20339 (set_attr "mode" "<sseinsnmode>")])
20340
20341 (define_insn "*avx512f_gathersi<mode>_2"
20342 [(set (match_operand:VI48F 0 "register_operand" "=&v")
20343 (unspec:VI48F
20344 [(pc)
20345 (match_operand:<avx512fmaskmode> 6 "register_operand" "1")
20346 (match_operator:<ssescalarmode> 5 "vsib_mem_operator"
20347 [(unspec:P
20348 [(match_operand:P 3 "vsib_address_operand" "Tv")
20349 (match_operand:<VEC_GATHER_IDXSI> 2 "register_operand" "v")
20350 (match_operand:SI 4 "const1248_operand" "n")]
20351 UNSPEC_VSIBADDR)])]
20352 UNSPEC_GATHER))
20353 (clobber (match_scratch:<avx512fmaskmode> 1 "=&Yk"))]
20354 "TARGET_AVX512F"
20355 ;; %X5 so that we don't emit any *WORD PTR for -masm=intel, as
20356 ;; gas changed what it requires incompatibly.
20357 "v<sseintprefix>gatherd<ssemodesuffix>\t{%5, %0%{%1%}|%0%{%1%}, %X5}"
20358 [(set_attr "type" "ssemov")
20359 (set_attr "prefix" "evex")
20360 (set_attr "mode" "<sseinsnmode>")])
20361
20362
20363 (define_expand "<avx512>_gatherdi<mode>"
20364 [(parallel [(set (match_operand:VI48F 0 "register_operand")
20365 (unspec:VI48F
20366 [(match_operand:<VEC_GATHER_SRCDI> 1 "register_operand")
20367 (match_operand:QI 4 "register_operand")
20368 (mem:<ssescalarmode>
20369 (match_par_dup 6
20370 [(match_operand 2 "vsib_address_operand")
20371 (match_operand:<VEC_GATHER_IDXDI> 3 "register_operand")
20372 (match_operand:SI 5 "const1248_operand")]))]
20373 UNSPEC_GATHER))
20374 (clobber (match_scratch:QI 7))])]
20375 "TARGET_AVX512F"
20376 {
20377 operands[6]
20378 = gen_rtx_UNSPEC (Pmode, gen_rtvec (3, operands[2], operands[3],
20379 operands[5]), UNSPEC_VSIBADDR);
20380 })
20381
20382 (define_insn "*avx512f_gatherdi<mode>"
20383 [(set (match_operand:VI48F 0 "register_operand" "=&v")
20384 (unspec:VI48F
20385 [(match_operand:<VEC_GATHER_SRCDI> 1 "register_operand" "0")
20386 (match_operand:QI 7 "register_operand" "2")
20387 (match_operator:<ssescalarmode> 6 "vsib_mem_operator"
20388 [(unspec:P
20389 [(match_operand:P 4 "vsib_address_operand" "Tv")
20390 (match_operand:<VEC_GATHER_IDXDI> 3 "register_operand" "v")
20391 (match_operand:SI 5 "const1248_operand" "n")]
20392 UNSPEC_VSIBADDR)])]
20393 UNSPEC_GATHER))
20394 (clobber (match_scratch:QI 2 "=&Yk"))]
20395 "TARGET_AVX512F"
20396 ;; %X6 so that we don't emit any *WORD PTR for -masm=intel, as
20397 ;; gas changed what it requires incompatibly.
20398 "v<sseintprefix>gatherq<ssemodesuffix>\t{%6, %1%{%2%}|%1%{%2%}, %X6}"
20399 [(set_attr "type" "ssemov")
20400 (set_attr "prefix" "evex")
20401 (set_attr "mode" "<sseinsnmode>")])
20402
20403 (define_insn "*avx512f_gatherdi<mode>_2"
20404 [(set (match_operand:VI48F 0 "register_operand" "=&v")
20405 (unspec:VI48F
20406 [(pc)
20407 (match_operand:QI 6 "register_operand" "1")
20408 (match_operator:<ssescalarmode> 5 "vsib_mem_operator"
20409 [(unspec:P
20410 [(match_operand:P 3 "vsib_address_operand" "Tv")
20411 (match_operand:<VEC_GATHER_IDXDI> 2 "register_operand" "v")
20412 (match_operand:SI 4 "const1248_operand" "n")]
20413 UNSPEC_VSIBADDR)])]
20414 UNSPEC_GATHER))
20415 (clobber (match_scratch:QI 1 "=&Yk"))]
20416 "TARGET_AVX512F"
20417 {
20418 /* %X5 so that we don't emit any *WORD PTR for -masm=intel, as
20419 gas changed what it requires incompatibly. */
20420 if (<MODE>mode != <VEC_GATHER_SRCDI>mode)
20421 {
20422 if (<MODE_SIZE> != 64)
20423 return "v<sseintprefix>gatherq<ssemodesuffix>\t{%5, %x0%{%1%}|%x0%{%1%}, %X5}";
20424 else
20425 return "v<sseintprefix>gatherq<ssemodesuffix>\t{%5, %t0%{%1%}|%t0%{%1%}, %X5}";
20426 }
20427 return "v<sseintprefix>gatherq<ssemodesuffix>\t{%5, %0%{%1%}|%0%{%1%}, %X5}";
20428 }
20429 [(set_attr "type" "ssemov")
20430 (set_attr "prefix" "evex")
20431 (set_attr "mode" "<sseinsnmode>")])
20432
20433 (define_expand "<avx512>_scattersi<mode>"
20434 [(parallel [(set (mem:VI48F
20435 (match_par_dup 5
20436 [(match_operand 0 "vsib_address_operand")
20437 (match_operand:<VEC_GATHER_IDXSI> 2 "register_operand")
20438 (match_operand:SI 4 "const1248_operand")]))
20439 (unspec:VI48F
20440 [(match_operand:<avx512fmaskmode> 1 "register_operand")
20441 (match_operand:VI48F 3 "register_operand")]
20442 UNSPEC_SCATTER))
20443 (clobber (match_scratch:<avx512fmaskmode> 6))])]
20444 "TARGET_AVX512F"
20445 {
20446 operands[5]
20447 = gen_rtx_UNSPEC (Pmode, gen_rtvec (3, operands[0], operands[2],
20448 operands[4]), UNSPEC_VSIBADDR);
20449 })
20450
20451 (define_insn "*avx512f_scattersi<mode>"
20452 [(set (match_operator:VI48F 5 "vsib_mem_operator"
20453 [(unspec:P
20454 [(match_operand:P 0 "vsib_address_operand" "Tv")
20455 (match_operand:<VEC_GATHER_IDXSI> 2 "register_operand" "v")
20456 (match_operand:SI 4 "const1248_operand" "n")]
20457 UNSPEC_VSIBADDR)])
20458 (unspec:VI48F
20459 [(match_operand:<avx512fmaskmode> 6 "register_operand" "1")
20460 (match_operand:VI48F 3 "register_operand" "v")]
20461 UNSPEC_SCATTER))
20462 (clobber (match_scratch:<avx512fmaskmode> 1 "=&Yk"))]
20463 "TARGET_AVX512F"
20464 ;; %X5 so that we don't emit any *WORD PTR for -masm=intel, as
20465 ;; gas changed what it requires incompatibly.
20466 "v<sseintprefix>scatterd<ssemodesuffix>\t{%3, %5%{%1%}|%X5%{%1%}, %3}"
20467 [(set_attr "type" "ssemov")
20468 (set_attr "prefix" "evex")
20469 (set_attr "mode" "<sseinsnmode>")])
20470
20471 (define_expand "<avx512>_scatterdi<mode>"
20472 [(parallel [(set (mem:VI48F
20473 (match_par_dup 5
20474 [(match_operand 0 "vsib_address_operand")
20475 (match_operand:<VEC_GATHER_IDXDI> 2 "register_operand")
20476 (match_operand:SI 4 "const1248_operand")]))
20477 (unspec:VI48F
20478 [(match_operand:QI 1 "register_operand")
20479 (match_operand:<VEC_GATHER_SRCDI> 3 "register_operand")]
20480 UNSPEC_SCATTER))
20481 (clobber (match_scratch:QI 6))])]
20482 "TARGET_AVX512F"
20483 {
20484 operands[5]
20485 = gen_rtx_UNSPEC (Pmode, gen_rtvec (3, operands[0], operands[2],
20486 operands[4]), UNSPEC_VSIBADDR);
20487 })
20488
20489 (define_insn "*avx512f_scatterdi<mode>"
20490 [(set (match_operator:VI48F 5 "vsib_mem_operator"
20491 [(unspec:P
20492 [(match_operand:P 0 "vsib_address_operand" "Tv")
20493 (match_operand:<VEC_GATHER_IDXDI> 2 "register_operand" "v")
20494 (match_operand:SI 4 "const1248_operand" "n")]
20495 UNSPEC_VSIBADDR)])
20496 (unspec:VI48F
20497 [(match_operand:QI 6 "register_operand" "1")
20498 (match_operand:<VEC_GATHER_SRCDI> 3 "register_operand" "v")]
20499 UNSPEC_SCATTER))
20500 (clobber (match_scratch:QI 1 "=&Yk"))]
20501 "TARGET_AVX512F"
20502 ;; %X5 so that we don't emit any *WORD PTR for -masm=intel, as
20503 ;; gas changed what it requires incompatibly.
20504 "v<sseintprefix>scatterq<ssemodesuffix>\t{%3, %5%{%1%}|%X5%{%1%}, %3}"
20505 [(set_attr "type" "ssemov")
20506 (set_attr "prefix" "evex")
20507 (set_attr "mode" "<sseinsnmode>")])
20508
20509 (define_insn "<avx512>_compress<mode>_mask"
20510 [(set (match_operand:VI48F 0 "register_operand" "=v")
20511 (unspec:VI48F
20512 [(match_operand:VI48F 1 "register_operand" "v")
20513 (match_operand:VI48F 2 "nonimm_or_0_operand" "0C")
20514 (match_operand:<avx512fmaskmode> 3 "register_operand" "Yk")]
20515 UNSPEC_COMPRESS))]
20516 "TARGET_AVX512F"
20517 "v<sseintprefix>compress<ssemodesuffix>\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}"
20518 [(set_attr "type" "ssemov")
20519 (set_attr "prefix" "evex")
20520 (set_attr "mode" "<sseinsnmode>")])
20521
20522 (define_insn "compress<mode>_mask"
20523 [(set (match_operand:VI12_AVX512VLBW 0 "register_operand" "=v")
20524 (unspec:VI12_AVX512VLBW
20525 [(match_operand:VI12_AVX512VLBW 1 "register_operand" "v")
20526 (match_operand:VI12_AVX512VLBW 2 "nonimm_or_0_operand" "0C")
20527 (match_operand:<avx512fmaskmode> 3 "register_operand" "Yk")]
20528 UNSPEC_COMPRESS))]
20529 "TARGET_AVX512VBMI2"
20530 "vpcompress<ssemodesuffix>\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}"
20531 [(set_attr "type" "ssemov")
20532 (set_attr "prefix" "evex")
20533 (set_attr "mode" "<sseinsnmode>")])
20534
20535 (define_insn "<avx512>_compressstore<mode>_mask"
20536 [(set (match_operand:VI48F 0 "memory_operand" "=m")
20537 (unspec:VI48F
20538 [(match_operand:VI48F 1 "register_operand" "x")
20539 (match_dup 0)
20540 (match_operand:<avx512fmaskmode> 2 "register_operand" "Yk")]
20541 UNSPEC_COMPRESS_STORE))]
20542 "TARGET_AVX512F"
20543 "v<sseintprefix>compress<ssemodesuffix>\t{%1, %0%{%2%}|%0%{%2%}, %1}"
20544 [(set_attr "type" "ssemov")
20545 (set_attr "prefix" "evex")
20546 (set_attr "memory" "store")
20547 (set_attr "mode" "<sseinsnmode>")])
20548
20549 (define_insn "compressstore<mode>_mask"
20550 [(set (match_operand:VI12_AVX512VLBW 0 "memory_operand" "=m")
20551 (unspec:VI12_AVX512VLBW
20552 [(match_operand:VI12_AVX512VLBW 1 "register_operand" "x")
20553 (match_dup 0)
20554 (match_operand:<avx512fmaskmode> 2 "register_operand" "Yk")]
20555 UNSPEC_COMPRESS_STORE))]
20556 "TARGET_AVX512VBMI2"
20557 "vpcompress<ssemodesuffix>\t{%1, %0%{%2%}|%0%{%2%}, %1}"
20558 [(set_attr "type" "ssemov")
20559 (set_attr "prefix" "evex")
20560 (set_attr "memory" "store")
20561 (set_attr "mode" "<sseinsnmode>")])
20562
20563 (define_expand "<avx512>_expand<mode>_maskz"
20564 [(set (match_operand:VI48F 0 "register_operand")
20565 (unspec:VI48F
20566 [(match_operand:VI48F 1 "nonimmediate_operand")
20567 (match_operand:VI48F 2 "nonimm_or_0_operand")
20568 (match_operand:<avx512fmaskmode> 3 "register_operand")]
20569 UNSPEC_EXPAND))]
20570 "TARGET_AVX512F"
20571 "operands[2] = CONST0_RTX (<MODE>mode);")
20572
20573 (define_insn "<avx512>_expand<mode>_mask"
20574 [(set (match_operand:VI48F 0 "register_operand" "=v,v")
20575 (unspec:VI48F
20576 [(match_operand:VI48F 1 "nonimmediate_operand" "v,m")
20577 (match_operand:VI48F 2 "nonimm_or_0_operand" "0C,0C")
20578 (match_operand:<avx512fmaskmode> 3 "register_operand" "Yk,Yk")]
20579 UNSPEC_EXPAND))]
20580 "TARGET_AVX512F"
20581 "v<sseintprefix>expand<ssemodesuffix>\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}"
20582 [(set_attr "type" "ssemov")
20583 (set_attr "prefix" "evex")
20584 (set_attr "memory" "none,load")
20585 (set_attr "mode" "<sseinsnmode>")])
20586
20587 (define_insn "expand<mode>_mask"
20588 [(set (match_operand:VI12_AVX512VLBW 0 "register_operand" "=v,v")
20589 (unspec:VI12_AVX512VLBW
20590 [(match_operand:VI12_AVX512VLBW 1 "nonimmediate_operand" "v,m")
20591 (match_operand:VI12_AVX512VLBW 2 "nonimm_or_0_operand" "0C,0C")
20592 (match_operand:<avx512fmaskmode> 3 "register_operand" "Yk,Yk")]
20593 UNSPEC_EXPAND))]
20594 "TARGET_AVX512VBMI2"
20595 "v<sseintprefix>expand<ssemodesuffix>\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}"
20596 [(set_attr "type" "ssemov")
20597 (set_attr "prefix" "evex")
20598 (set_attr "memory" "none,load")
20599 (set_attr "mode" "<sseinsnmode>")])
20600
20601 (define_expand "expand<mode>_maskz"
20602 [(set (match_operand:VI12_AVX512VLBW 0 "register_operand")
20603 (unspec:VI12_AVX512VLBW
20604 [(match_operand:VI12_AVX512VLBW 1 "nonimmediate_operand")
20605 (match_operand:VI12_AVX512VLBW 2 "nonimm_or_0_operand")
20606 (match_operand:<avx512fmaskmode> 3 "register_operand")]
20607 UNSPEC_EXPAND))]
20608 "TARGET_AVX512VBMI2"
20609 "operands[2] = CONST0_RTX (<MODE>mode);")
20610
20611 (define_insn "avx512dq_rangep<mode><mask_name><round_saeonly_name>"
20612 [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v")
20613 (unspec:VF_AVX512VL
20614 [(match_operand:VF_AVX512VL 1 "register_operand" "v")
20615 (match_operand:VF_AVX512VL 2 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")
20616 (match_operand:SI 3 "const_0_to_15_operand")]
20617 UNSPEC_RANGE))]
20618 "TARGET_AVX512DQ && <round_saeonly_mode512bit_condition>"
20619 "vrange<ssemodesuffix>\t{%3, <round_saeonly_mask_op4>%2, %1, %0<mask_operand4>|%0<mask_operand4>, %1, %2<round_saeonly_mask_op4>, %3}"
20620 [(set_attr "type" "sse")
20621 (set_attr "prefix" "evex")
20622 (set_attr "mode" "<MODE>")])
20623
20624 (define_insn "avx512dq_ranges<mode><mask_scalar_name><round_saeonly_scalar_name>"
20625 [(set (match_operand:VF_128 0 "register_operand" "=v")
20626 (vec_merge:VF_128
20627 (unspec:VF_128
20628 [(match_operand:VF_128 1 "register_operand" "v")
20629 (match_operand:VF_128 2 "<round_saeonly_scalar_nimm_predicate>" "<round_saeonly_scalar_constraint>")
20630 (match_operand:SI 3 "const_0_to_15_operand")]
20631 UNSPEC_RANGE)
20632 (match_dup 1)
20633 (const_int 1)))]
20634 "TARGET_AVX512DQ"
20635 "vrange<ssescalarmodesuffix>\t{%3, <round_saeonly_scalar_mask_op4>%2, %1, %0<mask_scalar_operand4>|%0<mask_scalar_operand4>, %1, %<iptr>2<round_saeonly_scalar_mask_op4>, %3}"
20636 [(set_attr "type" "sse")
20637 (set_attr "prefix" "evex")
20638 (set_attr "mode" "<MODE>")])
20639
20640 (define_insn "avx512dq_fpclass<mode><mask_scalar_merge_name>"
20641 [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=Yk")
20642 (unspec:<avx512fmaskmode>
20643 [(match_operand:VF_AVX512VL 1 "register_operand" "v")
20644 (match_operand:QI 2 "const_0_to_255_operand" "n")]
20645 UNSPEC_FPCLASS))]
20646 "TARGET_AVX512DQ"
20647 "vfpclass<ssemodesuffix>\t{%2, %1, %0<mask_scalar_merge_operand3>|%0<mask_scalar_merge_operand3>, %1, %2}";
20648 [(set_attr "type" "sse")
20649 (set_attr "length_immediate" "1")
20650 (set_attr "prefix" "evex")
20651 (set_attr "mode" "<MODE>")])
20652
20653 (define_insn "avx512dq_vmfpclass<mode>"
20654 [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=Yk")
20655 (and:<avx512fmaskmode>
20656 (unspec:<avx512fmaskmode>
20657 [(match_operand:VF_128 1 "register_operand" "v")
20658 (match_operand:QI 2 "const_0_to_255_operand" "n")]
20659 UNSPEC_FPCLASS)
20660 (const_int 1)))]
20661 "TARGET_AVX512DQ"
20662 "vfpclass<ssescalarmodesuffix>\t{%2, %1, %0|%0, %1, %2}";
20663 [(set_attr "type" "sse")
20664 (set_attr "length_immediate" "1")
20665 (set_attr "prefix" "evex")
20666 (set_attr "mode" "<MODE>")])
20667
20668 (define_insn "<avx512>_getmant<mode><mask_name><round_saeonly_name>"
20669 [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v")
20670 (unspec:VF_AVX512VL
20671 [(match_operand:VF_AVX512VL 1 "nonimmediate_operand" "<round_saeonly_constraint>")
20672 (match_operand:SI 2 "const_0_to_15_operand")]
20673 UNSPEC_GETMANT))]
20674 "TARGET_AVX512F"
20675 "vgetmant<ssemodesuffix>\t{%2, <round_saeonly_mask_op3>%1, %0<mask_operand3>|%0<mask_operand3>, %1<round_saeonly_mask_op3>, %2}";
20676 [(set_attr "prefix" "evex")
20677 (set_attr "mode" "<MODE>")])
20678
20679 (define_insn "avx512f_vgetmant<mode><mask_scalar_name><round_saeonly_scalar_name>"
20680 [(set (match_operand:VF_128 0 "register_operand" "=v")
20681 (vec_merge:VF_128
20682 (unspec:VF_128
20683 [(match_operand:VF_128 1 "register_operand" "v")
20684 (match_operand:VF_128 2 "<round_saeonly_scalar_nimm_predicate>" "<round_saeonly_scalar_constraint>")
20685 (match_operand:SI 3 "const_0_to_15_operand")]
20686 UNSPEC_GETMANT)
20687 (match_dup 1)
20688 (const_int 1)))]
20689 "TARGET_AVX512F"
20690 "vgetmant<ssescalarmodesuffix>\t{%3, <round_saeonly_scalar_mask_op4>%2, %1, %0<mask_scalar_operand4>|%0<mask_scalar_operand4>, %1, %<iptr>2<round_saeonly_scalar_mask_op4>, %3}";
20691 [(set_attr "prefix" "evex")
20692 (set_attr "mode" "<ssescalarmode>")])
20693
20694 ;; The correct representation for this is absolutely enormous, and
20695 ;; surely not generally useful.
20696 (define_insn "<mask_codefor>avx512bw_dbpsadbw<mode><mask_name>"
20697 [(set (match_operand:VI2_AVX512VL 0 "register_operand" "=v")
20698 (unspec:VI2_AVX512VL
20699 [(match_operand:<dbpsadbwmode> 1 "register_operand" "v")
20700 (match_operand:<dbpsadbwmode> 2 "nonimmediate_operand" "vm")
20701 (match_operand:SI 3 "const_0_to_255_operand")]
20702 UNSPEC_DBPSADBW))]
20703 "TARGET_AVX512BW"
20704 "vdbpsadbw\t{%3, %2, %1, %0<mask_operand4>|%0<mask_operand4>, %1, %2, %3}"
20705 [(set_attr "type" "sselog1")
20706 (set_attr "length_immediate" "1")
20707 (set_attr "prefix" "evex")
20708 (set_attr "mode" "<sseinsnmode>")])
20709
20710 (define_insn "clz<mode>2<mask_name>"
20711 [(set (match_operand:VI48_AVX512VL 0 "register_operand" "=v")
20712 (clz:VI48_AVX512VL
20713 (match_operand:VI48_AVX512VL 1 "nonimmediate_operand" "vm")))]
20714 "TARGET_AVX512CD"
20715 "vplzcnt<ssemodesuffix>\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
20716 [(set_attr "type" "sse")
20717 (set_attr "prefix" "evex")
20718 (set_attr "mode" "<sseinsnmode>")])
20719
20720 (define_insn "<mask_codefor>conflict<mode><mask_name>"
20721 [(set (match_operand:VI48_AVX512VL 0 "register_operand" "=v")
20722 (unspec:VI48_AVX512VL
20723 [(match_operand:VI48_AVX512VL 1 "nonimmediate_operand" "vm")]
20724 UNSPEC_CONFLICT))]
20725 "TARGET_AVX512CD"
20726 "vpconflict<ssemodesuffix>\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
20727 [(set_attr "type" "sse")
20728 (set_attr "prefix" "evex")
20729 (set_attr "mode" "<sseinsnmode>")])
20730
20731 (define_insn "sha1msg1"
20732 [(set (match_operand:V4SI 0 "register_operand" "=x")
20733 (unspec:V4SI
20734 [(match_operand:V4SI 1 "register_operand" "0")
20735 (match_operand:V4SI 2 "vector_operand" "xBm")]
20736 UNSPEC_SHA1MSG1))]
20737 "TARGET_SHA"
20738 "sha1msg1\t{%2, %0|%0, %2}"
20739 [(set_attr "type" "sselog1")
20740 (set_attr "mode" "TI")])
20741
20742 (define_insn "sha1msg2"
20743 [(set (match_operand:V4SI 0 "register_operand" "=x")
20744 (unspec:V4SI
20745 [(match_operand:V4SI 1 "register_operand" "0")
20746 (match_operand:V4SI 2 "vector_operand" "xBm")]
20747 UNSPEC_SHA1MSG2))]
20748 "TARGET_SHA"
20749 "sha1msg2\t{%2, %0|%0, %2}"
20750 [(set_attr "type" "sselog1")
20751 (set_attr "mode" "TI")])
20752
20753 (define_insn "sha1nexte"
20754 [(set (match_operand:V4SI 0 "register_operand" "=x")
20755 (unspec:V4SI
20756 [(match_operand:V4SI 1 "register_operand" "0")
20757 (match_operand:V4SI 2 "vector_operand" "xBm")]
20758 UNSPEC_SHA1NEXTE))]
20759 "TARGET_SHA"
20760 "sha1nexte\t{%2, %0|%0, %2}"
20761 [(set_attr "type" "sselog1")
20762 (set_attr "mode" "TI")])
20763
20764 (define_insn "sha1rnds4"
20765 [(set (match_operand:V4SI 0 "register_operand" "=x")
20766 (unspec:V4SI
20767 [(match_operand:V4SI 1 "register_operand" "0")
20768 (match_operand:V4SI 2 "vector_operand" "xBm")
20769 (match_operand:SI 3 "const_0_to_3_operand" "n")]
20770 UNSPEC_SHA1RNDS4))]
20771 "TARGET_SHA"
20772 "sha1rnds4\t{%3, %2, %0|%0, %2, %3}"
20773 [(set_attr "type" "sselog1")
20774 (set_attr "length_immediate" "1")
20775 (set_attr "mode" "TI")])
20776
20777 (define_insn "sha256msg1"
20778 [(set (match_operand:V4SI 0 "register_operand" "=x")
20779 (unspec:V4SI
20780 [(match_operand:V4SI 1 "register_operand" "0")
20781 (match_operand:V4SI 2 "vector_operand" "xBm")]
20782 UNSPEC_SHA256MSG1))]
20783 "TARGET_SHA"
20784 "sha256msg1\t{%2, %0|%0, %2}"
20785 [(set_attr "type" "sselog1")
20786 (set_attr "mode" "TI")])
20787
20788 (define_insn "sha256msg2"
20789 [(set (match_operand:V4SI 0 "register_operand" "=x")
20790 (unspec:V4SI
20791 [(match_operand:V4SI 1 "register_operand" "0")
20792 (match_operand:V4SI 2 "vector_operand" "xBm")]
20793 UNSPEC_SHA256MSG2))]
20794 "TARGET_SHA"
20795 "sha256msg2\t{%2, %0|%0, %2}"
20796 [(set_attr "type" "sselog1")
20797 (set_attr "mode" "TI")])
20798
20799 (define_insn "sha256rnds2"
20800 [(set (match_operand:V4SI 0 "register_operand" "=x")
20801 (unspec:V4SI
20802 [(match_operand:V4SI 1 "register_operand" "0")
20803 (match_operand:V4SI 2 "vector_operand" "xBm")
20804 (match_operand:V4SI 3 "register_operand" "Yz")]
20805 UNSPEC_SHA256RNDS2))]
20806 "TARGET_SHA"
20807 "sha256rnds2\t{%3, %2, %0|%0, %2, %3}"
20808 [(set_attr "type" "sselog1")
20809 (set_attr "length_immediate" "1")
20810 (set_attr "mode" "TI")])
20811
20812 (define_insn_and_split "avx512f_<castmode><avxsizesuffix>_<castmode>"
20813 [(set (match_operand:AVX512MODE2P 0 "nonimmediate_operand" "=x,m")
20814 (unspec:AVX512MODE2P
20815 [(match_operand:<ssequartermode> 1 "nonimmediate_operand" "xm,x")]
20816 UNSPEC_CAST))]
20817 "TARGET_AVX512F && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
20818 "#"
20819 "&& reload_completed"
20820 [(set (match_dup 0) (match_dup 1))]
20821 {
20822 if (REG_P (operands[0]))
20823 operands[0] = gen_lowpart (<ssequartermode>mode, operands[0]);
20824 else
20825 operands[1] = lowpart_subreg (<MODE>mode, operands[1],
20826 <ssequartermode>mode);
20827 })
20828
20829 (define_insn_and_split "avx512f_<castmode><avxsizesuffix>_256<castmode>"
20830 [(set (match_operand:AVX512MODE2P 0 "nonimmediate_operand" "=x,m")
20831 (unspec:AVX512MODE2P
20832 [(match_operand:<ssehalfvecmode> 1 "nonimmediate_operand" "xm,x")]
20833 UNSPEC_CAST))]
20834 "TARGET_AVX512F && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
20835 "#"
20836 "&& reload_completed"
20837 [(set (match_dup 0) (match_dup 1))]
20838 {
20839 if (REG_P (operands[0]))
20840 operands[0] = gen_lowpart (<ssehalfvecmode>mode, operands[0]);
20841 else
20842 operands[1] = lowpart_subreg (<MODE>mode, operands[1],
20843 <ssehalfvecmode>mode);
20844 })
20845
20846 (define_int_iterator VPMADD52
20847 [UNSPEC_VPMADD52LUQ
20848 UNSPEC_VPMADD52HUQ])
20849
20850 (define_int_attr vpmadd52type
20851 [(UNSPEC_VPMADD52LUQ "luq") (UNSPEC_VPMADD52HUQ "huq")])
20852
20853 (define_expand "vpamdd52huq<mode>_maskz"
20854 [(match_operand:VI8_AVX512VL 0 "register_operand")
20855 (match_operand:VI8_AVX512VL 1 "register_operand")
20856 (match_operand:VI8_AVX512VL 2 "register_operand")
20857 (match_operand:VI8_AVX512VL 3 "nonimmediate_operand")
20858 (match_operand:<avx512fmaskmode> 4 "register_operand")]
20859 "TARGET_AVX512IFMA"
20860 {
20861 emit_insn (gen_vpamdd52huq<mode>_maskz_1 (
20862 operands[0], operands[1], operands[2], operands[3],
20863 CONST0_RTX (<MODE>mode), operands[4]));
20864 DONE;
20865 })
20866
20867 (define_expand "vpamdd52luq<mode>_maskz"
20868 [(match_operand:VI8_AVX512VL 0 "register_operand")
20869 (match_operand:VI8_AVX512VL 1 "register_operand")
20870 (match_operand:VI8_AVX512VL 2 "register_operand")
20871 (match_operand:VI8_AVX512VL 3 "nonimmediate_operand")
20872 (match_operand:<avx512fmaskmode> 4 "register_operand")]
20873 "TARGET_AVX512IFMA"
20874 {
20875 emit_insn (gen_vpamdd52luq<mode>_maskz_1 (
20876 operands[0], operands[1], operands[2], operands[3],
20877 CONST0_RTX (<MODE>mode), operands[4]));
20878 DONE;
20879 })
20880
20881 (define_insn "vpamdd52<vpmadd52type><mode><sd_maskz_name>"
20882 [(set (match_operand:VI8_AVX512VL 0 "register_operand" "=v")
20883 (unspec:VI8_AVX512VL
20884 [(match_operand:VI8_AVX512VL 1 "register_operand" "0")
20885 (match_operand:VI8_AVX512VL 2 "register_operand" "v")
20886 (match_operand:VI8_AVX512VL 3 "nonimmediate_operand" "vm")]
20887 VPMADD52))]
20888 "TARGET_AVX512IFMA"
20889 "vpmadd52<vpmadd52type>\t{%3, %2, %0<sd_mask_op4>|%0<sd_mask_op4>, %2, %3}"
20890 [(set_attr "type" "ssemuladd")
20891 (set_attr "prefix" "evex")
20892 (set_attr "mode" "<sseinsnmode>")])
20893
20894 (define_insn "vpamdd52<vpmadd52type><mode>_mask"
20895 [(set (match_operand:VI8_AVX512VL 0 "register_operand" "=v")
20896 (vec_merge:VI8_AVX512VL
20897 (unspec:VI8_AVX512VL
20898 [(match_operand:VI8_AVX512VL 1 "register_operand" "0")
20899 (match_operand:VI8_AVX512VL 2 "register_operand" "v")
20900 (match_operand:VI8_AVX512VL 3 "nonimmediate_operand" "vm")]
20901 VPMADD52)
20902 (match_dup 1)
20903 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")))]
20904 "TARGET_AVX512IFMA"
20905 "vpmadd52<vpmadd52type>\t{%3, %2, %0%{%4%}|%0%{%4%}, %2, %3}"
20906 [(set_attr "type" "ssemuladd")
20907 (set_attr "prefix" "evex")
20908 (set_attr "mode" "<sseinsnmode>")])
20909
20910 (define_insn "vpmultishiftqb<mode><mask_name>"
20911 [(set (match_operand:VI1_AVX512VL 0 "register_operand" "=v")
20912 (unspec:VI1_AVX512VL
20913 [(match_operand:VI1_AVX512VL 1 "register_operand" "v")
20914 (match_operand:VI1_AVX512VL 2 "nonimmediate_operand" "vm")]
20915 UNSPEC_VPMULTISHIFT))]
20916 "TARGET_AVX512VBMI"
20917 "vpmultishiftqb\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
20918 [(set_attr "type" "sselog")
20919 (set_attr "prefix" "evex")
20920 (set_attr "mode" "<sseinsnmode>")])
20921
20922 (define_mode_iterator IMOD4
20923 [(V64SF "TARGET_AVX5124FMAPS") (V64SI "TARGET_AVX5124VNNIW")])
20924
20925 (define_mode_attr imod4_narrow
20926 [(V64SF "V16SF") (V64SI "V16SI")])
20927
20928 (define_expand "mov<mode>"
20929 [(set (match_operand:IMOD4 0 "nonimmediate_operand")
20930 (match_operand:IMOD4 1 "nonimm_or_0_operand"))]
20931 "TARGET_AVX512F"
20932 {
20933 ix86_expand_vector_move (<MODE>mode, operands);
20934 DONE;
20935 })
20936
20937 (define_insn_and_split "*mov<mode>_internal"
20938 [(set (match_operand:IMOD4 0 "nonimmediate_operand" "=v,v ,m")
20939 (match_operand:IMOD4 1 "nonimm_or_0_operand" " C,vm,v"))]
20940 "TARGET_AVX512F
20941 && (register_operand (operands[0], <MODE>mode)
20942 || register_operand (operands[1], <MODE>mode))"
20943 "#"
20944 "&& reload_completed"
20945 [(const_int 0)]
20946 {
20947 rtx op0, op1;
20948 int i;
20949
20950 for (i = 0; i < 4; i++)
20951 {
20952 op0 = simplify_subreg
20953 (<imod4_narrow>mode, operands[0], <MODE>mode, i * 64);
20954 op1 = simplify_subreg
20955 (<imod4_narrow>mode, operands[1], <MODE>mode, i * 64);
20956 emit_move_insn (op0, op1);
20957 }
20958 DONE;
20959 })
20960
20961 (define_insn "avx5124fmaddps_4fmaddps"
20962 [(set (match_operand:V16SF 0 "register_operand" "=v")
20963 (unspec:V16SF
20964 [(match_operand:V16SF 1 "register_operand" "0")
20965 (match_operand:V64SF 2 "register_operand" "v")
20966 (match_operand:V4SF 3 "memory_operand" "m")] UNSPEC_VP4FMADD))]
20967 "TARGET_AVX5124FMAPS"
20968 "v4fmaddps\t{%3, %g2, %0|%0, %g2, %3}"
20969 [(set_attr ("type") ("ssemuladd"))
20970 (set_attr ("prefix") ("evex"))
20971 (set_attr ("mode") ("V16SF"))])
20972
20973 (define_insn "avx5124fmaddps_4fmaddps_mask"
20974 [(set (match_operand:V16SF 0 "register_operand" "=v")
20975 (vec_merge:V16SF
20976 (unspec:V16SF
20977 [(match_operand:V64SF 1 "register_operand" "v")
20978 (match_operand:V4SF 2 "memory_operand" "m")] UNSPEC_VP4FMADD)
20979 (match_operand:V16SF 3 "register_operand" "0")
20980 (match_operand:HI 4 "register_operand" "Yk")))]
20981 "TARGET_AVX5124FMAPS"
20982 "v4fmaddps\t{%2, %g1, %0%{%4%}|%0%{%4%}, %g1, %2}"
20983 [(set_attr ("type") ("ssemuladd"))
20984 (set_attr ("prefix") ("evex"))
20985 (set_attr ("mode") ("V16SF"))])
20986
20987 (define_insn "avx5124fmaddps_4fmaddps_maskz"
20988 [(set (match_operand:V16SF 0 "register_operand" "=v")
20989 (vec_merge:V16SF
20990 (unspec:V16SF
20991 [(match_operand:V16SF 1 "register_operand" "0")
20992 (match_operand:V64SF 2 "register_operand" "v")
20993 (match_operand:V4SF 3 "memory_operand" "m")] UNSPEC_VP4FMADD)
20994 (match_operand:V16SF 4 "const0_operand" "C")
20995 (match_operand:HI 5 "register_operand" "Yk")))]
20996 "TARGET_AVX5124FMAPS"
20997 "v4fmaddps\t{%3, %g2, %0%{%5%}%{z%}|%0%{%5%}%{z%}, %g2, %3}"
20998 [(set_attr ("type") ("ssemuladd"))
20999 (set_attr ("prefix") ("evex"))
21000 (set_attr ("mode") ("V16SF"))])
21001
21002 (define_insn "avx5124fmaddps_4fmaddss"
21003 [(set (match_operand:V4SF 0 "register_operand" "=v")
21004 (unspec:V4SF
21005 [(match_operand:V4SF 1 "register_operand" "0")
21006 (match_operand:V64SF 2 "register_operand" "v")
21007 (match_operand:V4SF 3 "memory_operand" "m")] UNSPEC_VP4FMADD))]
21008 "TARGET_AVX5124FMAPS"
21009 "v4fmaddss\t{%3, %x2, %0|%0, %x2, %3}"
21010 [(set_attr ("type") ("ssemuladd"))
21011 (set_attr ("prefix") ("evex"))
21012 (set_attr ("mode") ("SF"))])
21013
21014 (define_insn "avx5124fmaddps_4fmaddss_mask"
21015 [(set (match_operand:V4SF 0 "register_operand" "=v")
21016 (vec_merge:V4SF
21017 (unspec:V4SF
21018 [(match_operand:V64SF 1 "register_operand" "v")
21019 (match_operand:V4SF 2 "memory_operand" "m")] UNSPEC_VP4FMADD)
21020 (match_operand:V4SF 3 "register_operand" "0")
21021 (match_operand:QI 4 "register_operand" "Yk")))]
21022 "TARGET_AVX5124FMAPS"
21023 "v4fmaddss\t{%2, %x1, %0%{%4%}|%0%{%4%}, %x1, %2}"
21024 [(set_attr ("type") ("ssemuladd"))
21025 (set_attr ("prefix") ("evex"))
21026 (set_attr ("mode") ("SF"))])
21027
21028 (define_insn "avx5124fmaddps_4fmaddss_maskz"
21029 [(set (match_operand:V4SF 0 "register_operand" "=v")
21030 (vec_merge:V4SF
21031 (unspec:V4SF
21032 [(match_operand:V4SF 1 "register_operand" "0")
21033 (match_operand:V64SF 2 "register_operand" "v")
21034 (match_operand:V4SF 3 "memory_operand" "m")] UNSPEC_VP4FMADD)
21035 (match_operand:V4SF 4 "const0_operand" "C")
21036 (match_operand:QI 5 "register_operand" "Yk")))]
21037 "TARGET_AVX5124FMAPS"
21038 "v4fmaddss\t{%3, %x2, %0%{%5%}%{z%}|%0%{%5%}%{z%}, %x2, %3}"
21039 [(set_attr ("type") ("ssemuladd"))
21040 (set_attr ("prefix") ("evex"))
21041 (set_attr ("mode") ("SF"))])
21042
21043 (define_insn "avx5124fmaddps_4fnmaddps"
21044 [(set (match_operand:V16SF 0 "register_operand" "=v")
21045 (unspec:V16SF
21046 [(match_operand:V16SF 1 "register_operand" "0")
21047 (match_operand:V64SF 2 "register_operand" "v")
21048 (match_operand:V4SF 3 "memory_operand" "m")] UNSPEC_VP4FNMADD))]
21049 "TARGET_AVX5124FMAPS"
21050 "v4fnmaddps\t{%3, %g2, %0|%0, %g2, %3}"
21051 [(set_attr ("type") ("ssemuladd"))
21052 (set_attr ("prefix") ("evex"))
21053 (set_attr ("mode") ("V16SF"))])
21054
21055 (define_insn "avx5124fmaddps_4fnmaddps_mask"
21056 [(set (match_operand:V16SF 0 "register_operand" "=v")
21057 (vec_merge:V16SF
21058 (unspec:V16SF
21059 [(match_operand:V64SF 1 "register_operand" "v")
21060 (match_operand:V4SF 2 "memory_operand" "m")] UNSPEC_VP4FNMADD)
21061 (match_operand:V16SF 3 "register_operand" "0")
21062 (match_operand:HI 4 "register_operand" "Yk")))]
21063 "TARGET_AVX5124FMAPS"
21064 "v4fnmaddps\t{%2, %g1, %0%{%4%}|%0%{%4%}, %g1, %2}"
21065 [(set_attr ("type") ("ssemuladd"))
21066 (set_attr ("prefix") ("evex"))
21067 (set_attr ("mode") ("V16SF"))])
21068
21069 (define_insn "avx5124fmaddps_4fnmaddps_maskz"
21070 [(set (match_operand:V16SF 0 "register_operand" "=v")
21071 (vec_merge:V16SF
21072 (unspec:V16SF
21073 [(match_operand:V16SF 1 "register_operand" "0")
21074 (match_operand:V64SF 2 "register_operand" "v")
21075 (match_operand:V4SF 3 "memory_operand" "m")] UNSPEC_VP4FNMADD)
21076 (match_operand:V16SF 4 "const0_operand" "C")
21077 (match_operand:HI 5 "register_operand" "Yk")))]
21078 "TARGET_AVX5124FMAPS"
21079 "v4fnmaddps\t{%3, %g2, %0%{%5%}%{z%}|%0%{%5%}%{z%}, %g2, %3}"
21080 [(set_attr ("type") ("ssemuladd"))
21081 (set_attr ("prefix") ("evex"))
21082 (set_attr ("mode") ("V16SF"))])
21083
21084 (define_insn "avx5124fmaddps_4fnmaddss"
21085 [(set (match_operand:V4SF 0 "register_operand" "=v")
21086 (unspec:V4SF
21087 [(match_operand:V4SF 1 "register_operand" "0")
21088 (match_operand:V64SF 2 "register_operand" "v")
21089 (match_operand:V4SF 3 "memory_operand" "m")] UNSPEC_VP4FNMADD))]
21090 "TARGET_AVX5124FMAPS"
21091 "v4fnmaddss\t{%3, %x2, %0|%0, %x2, %3}"
21092 [(set_attr ("type") ("ssemuladd"))
21093 (set_attr ("prefix") ("evex"))
21094 (set_attr ("mode") ("SF"))])
21095
21096 (define_insn "avx5124fmaddps_4fnmaddss_mask"
21097 [(set (match_operand:V4SF 0 "register_operand" "=v")
21098 (vec_merge:V4SF
21099 (unspec:V4SF
21100 [(match_operand:V64SF 1 "register_operand" "v")
21101 (match_operand:V4SF 2 "memory_operand" "m")] UNSPEC_VP4FNMADD)
21102 (match_operand:V4SF 3 "register_operand" "0")
21103 (match_operand:QI 4 "register_operand" "Yk")))]
21104 "TARGET_AVX5124FMAPS"
21105 "v4fnmaddss\t{%2, %x1, %0%{%4%}|%0%{%4%}, %x1, %2}"
21106 [(set_attr ("type") ("ssemuladd"))
21107 (set_attr ("prefix") ("evex"))
21108 (set_attr ("mode") ("SF"))])
21109
21110 (define_insn "avx5124fmaddps_4fnmaddss_maskz"
21111 [(set (match_operand:V4SF 0 "register_operand" "=v")
21112 (vec_merge:V4SF
21113 (unspec:V4SF
21114 [(match_operand:V4SF 1 "register_operand" "0")
21115 (match_operand:V64SF 2 "register_operand" "v")
21116 (match_operand:V4SF 3 "memory_operand" "m")] UNSPEC_VP4FNMADD)
21117 (match_operand:V4SF 4 "const0_operand" "C")
21118 (match_operand:QI 5 "register_operand" "Yk")))]
21119 "TARGET_AVX5124FMAPS"
21120 "v4fnmaddss\t{%3, %x2, %0%{%5%}%{z%}|%0%{%5%}%{z%}, %x2, %3}"
21121 [(set_attr ("type") ("ssemuladd"))
21122 (set_attr ("prefix") ("evex"))
21123 (set_attr ("mode") ("SF"))])
21124
21125 (define_insn "avx5124vnniw_vp4dpwssd"
21126 [(set (match_operand:V16SI 0 "register_operand" "=v")
21127 (unspec:V16SI
21128 [(match_operand:V16SI 1 "register_operand" "0")
21129 (match_operand:V64SI 2 "register_operand" "v")
21130 (match_operand:V4SI 3 "memory_operand" "m")] UNSPEC_VP4DPWSSD))]
21131 "TARGET_AVX5124VNNIW"
21132 "vp4dpwssd\t{%3, %g2, %0|%0, %g2, %3}"
21133 [(set_attr ("type") ("ssemuladd"))
21134 (set_attr ("prefix") ("evex"))
21135 (set_attr ("mode") ("TI"))])
21136
21137 (define_insn "avx5124vnniw_vp4dpwssd_mask"
21138 [(set (match_operand:V16SI 0 "register_operand" "=v")
21139 (vec_merge:V16SI
21140 (unspec:V16SI
21141 [(match_operand:V64SI 1 "register_operand" "v")
21142 (match_operand:V4SI 2 "memory_operand" "m")] UNSPEC_VP4DPWSSD)
21143 (match_operand:V16SI 3 "register_operand" "0")
21144 (match_operand:HI 4 "register_operand" "Yk")))]
21145 "TARGET_AVX5124VNNIW"
21146 "vp4dpwssd\t{%2, %g1, %0%{%4%}|%0%{%4%}, %g1, %2}"
21147 [(set_attr ("type") ("ssemuladd"))
21148 (set_attr ("prefix") ("evex"))
21149 (set_attr ("mode") ("TI"))])
21150
21151 (define_insn "avx5124vnniw_vp4dpwssd_maskz"
21152 [(set (match_operand:V16SI 0 "register_operand" "=v")
21153 (vec_merge:V16SI
21154 (unspec:V16SI
21155 [(match_operand:V16SI 1 "register_operand" "0")
21156 (match_operand:V64SI 2 "register_operand" "v")
21157 (match_operand:V4SI 3 "memory_operand" "m")] UNSPEC_VP4DPWSSD)
21158 (match_operand:V16SI 4 "const0_operand" "C")
21159 (match_operand:HI 5 "register_operand" "Yk")))]
21160 "TARGET_AVX5124VNNIW"
21161 "vp4dpwssd\t{%3, %g2, %0%{%5%}%{z%}|%0%{%5%}%{z%}, %g2, %3}"
21162 [(set_attr ("type") ("ssemuladd"))
21163 (set_attr ("prefix") ("evex"))
21164 (set_attr ("mode") ("TI"))])
21165
21166 (define_insn "avx5124vnniw_vp4dpwssds"
21167 [(set (match_operand:V16SI 0 "register_operand" "=v")
21168 (unspec:V16SI
21169 [(match_operand:V16SI 1 "register_operand" "0")
21170 (match_operand:V64SI 2 "register_operand" "v")
21171 (match_operand:V4SI 3 "memory_operand" "m")] UNSPEC_VP4DPWSSDS))]
21172 "TARGET_AVX5124VNNIW"
21173 "vp4dpwssds\t{%3, %g2, %0|%0, %g2, %3}"
21174 [(set_attr ("type") ("ssemuladd"))
21175 (set_attr ("prefix") ("evex"))
21176 (set_attr ("mode") ("TI"))])
21177
21178 (define_insn "avx5124vnniw_vp4dpwssds_mask"
21179 [(set (match_operand:V16SI 0 "register_operand" "=v")
21180 (vec_merge:V16SI
21181 (unspec:V16SI
21182 [(match_operand:V64SI 1 "register_operand" "v")
21183 (match_operand:V4SI 2 "memory_operand" "m")] UNSPEC_VP4DPWSSDS)
21184 (match_operand:V16SI 3 "register_operand" "0")
21185 (match_operand:HI 4 "register_operand" "Yk")))]
21186 "TARGET_AVX5124VNNIW"
21187 "vp4dpwssds\t{%2, %g1, %0%{%4%}|%0%{%4%}, %g1, %2}"
21188 [(set_attr ("type") ("ssemuladd"))
21189 (set_attr ("prefix") ("evex"))
21190 (set_attr ("mode") ("TI"))])
21191
21192 (define_insn "avx5124vnniw_vp4dpwssds_maskz"
21193 [(set (match_operand:V16SI 0 "register_operand" "=v")
21194 (vec_merge:V16SI
21195 (unspec:V16SI
21196 [(match_operand:V16SI 1 "register_operand" "0")
21197 (match_operand:V64SI 2 "register_operand" "v")
21198 (match_operand:V4SI 3 "memory_operand" "m")] UNSPEC_VP4DPWSSDS)
21199 (match_operand:V16SI 4 "const0_operand" "C")
21200 (match_operand:HI 5 "register_operand" "Yk")))]
21201 "TARGET_AVX5124VNNIW"
21202 "vp4dpwssds\t{%3, %g2, %0%{%5%}%{z%}|%0%{%5%}%{z%}, %g2, %3}"
21203 [(set_attr ("type") ("ssemuladd"))
21204 (set_attr ("prefix") ("evex"))
21205 (set_attr ("mode") ("TI"))])
21206
21207 (define_insn "vpopcount<mode><mask_name>"
21208 [(set (match_operand:VI48_AVX512VL 0 "register_operand" "=v")
21209 (popcount:VI48_AVX512VL
21210 (match_operand:VI48_AVX512VL 1 "nonimmediate_operand" "vm")))]
21211 "TARGET_AVX512VPOPCNTDQ"
21212 "vpopcnt<ssemodesuffix>\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}")
21213
21214 ;; Save multiple registers out-of-line.
21215 (define_insn "save_multiple<mode>"
21216 [(match_parallel 0 "save_multiple"
21217 [(use (match_operand:P 1 "symbol_operand"))])]
21218 "TARGET_SSE && TARGET_64BIT"
21219 "call\t%P1")
21220
21221 ;; Restore multiple registers out-of-line.
21222 (define_insn "restore_multiple<mode>"
21223 [(match_parallel 0 "restore_multiple"
21224 [(use (match_operand:P 1 "symbol_operand"))])]
21225 "TARGET_SSE && TARGET_64BIT"
21226 "call\t%P1")
21227
21228 ;; Restore multiple registers out-of-line and return.
21229 (define_insn "restore_multiple_and_return<mode>"
21230 [(match_parallel 0 "restore_multiple"
21231 [(return)
21232 (use (match_operand:P 1 "symbol_operand"))
21233 (set (reg:DI SP_REG) (reg:DI R10_REG))
21234 ])]
21235 "TARGET_SSE && TARGET_64BIT"
21236 "jmp\t%P1")
21237
21238 ;; Restore multiple registers out-of-line when hard frame pointer is used,
21239 ;; perform the leave operation prior to returning (from the function).
21240 (define_insn "restore_multiple_leave_return<mode>"
21241 [(match_parallel 0 "restore_multiple"
21242 [(return)
21243 (use (match_operand:P 1 "symbol_operand"))
21244 (set (reg:DI SP_REG) (plus:DI (reg:DI BP_REG) (const_int 8)))
21245 (set (reg:DI BP_REG) (mem:DI (reg:DI BP_REG)))
21246 (clobber (mem:BLK (scratch)))
21247 ])]
21248 "TARGET_SSE && TARGET_64BIT"
21249 "jmp\t%P1")
21250
21251 (define_insn "vpopcount<mode><mask_name>"
21252 [(set (match_operand:VI12_AVX512VL 0 "register_operand" "=v")
21253 (popcount:VI12_AVX512VL
21254 (match_operand:VI12_AVX512VL 1 "nonimmediate_operand" "vm")))]
21255 "TARGET_AVX512BITALG"
21256 "vpopcnt<ssemodesuffix>\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}")
21257
21258 (define_insn "vgf2p8affineinvqb_<mode><mask_name>"
21259 [(set (match_operand:VI1_AVX512F 0 "register_operand" "=x,x,v")
21260 (unspec:VI1_AVX512F
21261 [(match_operand:VI1_AVX512F 1 "register_operand" "%0,x,v")
21262 (match_operand:VI1_AVX512F 2 "nonimmediate_operand" "xBm,xm,vm")
21263 (match_operand:QI 3 "const_0_to_255_operand" "n,n,n")]
21264 UNSPEC_GF2P8AFFINEINV))]
21265 "TARGET_GFNI"
21266 "@
21267 gf2p8affineinvqb\t{%3, %2, %0| %0, %2, %3}
21268 vgf2p8affineinvqb\t{%3, %2, %1, %0<mask_operand4>| %0<mask_operand4>, %1, %2, %3}
21269 vgf2p8affineinvqb\t{%3, %2, %1, %0<mask_operand4>| %0<mask_operand4>, %1, %2, %3}"
21270 [(set_attr "isa" "noavx,avx,avx512f")
21271 (set_attr "prefix_data16" "1,*,*")
21272 (set_attr "prefix_extra" "1")
21273 (set_attr "prefix" "orig,maybe_evex,evex")
21274 (set_attr "mode" "<sseinsnmode>")])
21275
21276 (define_insn "vgf2p8affineqb_<mode><mask_name>"
21277 [(set (match_operand:VI1_AVX512F 0 "register_operand" "=x,x,v")
21278 (unspec:VI1_AVX512F
21279 [(match_operand:VI1_AVX512F 1 "register_operand" "%0,x,v")
21280 (match_operand:VI1_AVX512F 2 "nonimmediate_operand" "xBm,xm,vm")
21281 (match_operand:QI 3 "const_0_to_255_operand" "n,n,n")]
21282 UNSPEC_GF2P8AFFINE))]
21283 "TARGET_GFNI"
21284 "@
21285 gf2p8affineqb\t{%3, %2, %0| %0, %2, %3}
21286 vgf2p8affineqb\t{%3, %2, %1, %0<mask_operand4>| %0<mask_operand4>, %1, %2, %3}
21287 vgf2p8affineqb\t{%3, %2, %1, %0<mask_operand4>| %0<mask_operand4>, %1, %2, %3}"
21288 [(set_attr "isa" "noavx,avx,avx512f")
21289 (set_attr "prefix_data16" "1,*,*")
21290 (set_attr "prefix_extra" "1")
21291 (set_attr "prefix" "orig,maybe_evex,evex")
21292 (set_attr "mode" "<sseinsnmode>")])
21293
21294 (define_insn "vgf2p8mulb_<mode><mask_name>"
21295 [(set (match_operand:VI1_AVX512F 0 "register_operand" "=x,x,v")
21296 (unspec:VI1_AVX512F
21297 [(match_operand:VI1_AVX512F 1 "register_operand" "%0,x,v")
21298 (match_operand:VI1_AVX512F 2 "nonimmediate_operand" "xBm,xm,vm")]
21299 UNSPEC_GF2P8MUL))]
21300 "TARGET_GFNI"
21301 "@
21302 gf2p8mulb\t{%2, %0| %0, %2}
21303 vgf2p8mulb\t{%2, %1, %0<mask_operand3>| %0<mask_operand3>, %1, %2}
21304 vgf2p8mulb\t{%2, %1, %0<mask_operand3>| %0<mask_operand3>, %1, %2}"
21305 [(set_attr "isa" "noavx,avx,avx512f")
21306 (set_attr "prefix_data16" "1,*,*")
21307 (set_attr "prefix_extra" "1")
21308 (set_attr "prefix" "orig,maybe_evex,evex")
21309 (set_attr "mode" "<sseinsnmode>")])
21310
21311 (define_insn "vpshrd_<mode><mask_name>"
21312 [(set (match_operand:VI248_AVX512VL 0 "register_operand" "=v")
21313 (unspec:VI248_AVX512VL
21314 [(match_operand:VI248_AVX512VL 1 "register_operand" "v")
21315 (match_operand:VI248_AVX512VL 2 "nonimmediate_operand" "vm")
21316 (match_operand:SI 3 "const_0_to_255_operand" "n")]
21317 UNSPEC_VPSHRD))]
21318 "TARGET_AVX512VBMI2"
21319 "vpshrd<ssemodesuffix>\t{%3, %2, %1, %0<mask_operand4>|%0<mask_operand4>, %1, %2, %3 }"
21320 [(set_attr ("prefix") ("evex"))])
21321
21322 (define_insn "vpshld_<mode><mask_name>"
21323 [(set (match_operand:VI248_AVX512VL 0 "register_operand" "=v")
21324 (unspec:VI248_AVX512VL
21325 [(match_operand:VI248_AVX512VL 1 "register_operand" "v")
21326 (match_operand:VI248_AVX512VL 2 "nonimmediate_operand" "vm")
21327 (match_operand:SI 3 "const_0_to_255_operand" "n")]
21328 UNSPEC_VPSHLD))]
21329 "TARGET_AVX512VBMI2"
21330 "vpshld<ssemodesuffix>\t{%3, %2, %1, %0<mask_operand4>|%0<mask_operand4>, %1, %2, %3 }"
21331 [(set_attr ("prefix") ("evex"))])
21332
21333 (define_insn "vpshrdv_<mode>"
21334 [(set (match_operand:VI248_AVX512VL 0 "register_operand" "=v")
21335 (unspec:VI248_AVX512VL
21336 [(match_operand:VI248_AVX512VL 1 "register_operand" "0")
21337 (match_operand:VI248_AVX512VL 2 "register_operand" "v")
21338 (match_operand:VI248_AVX512VL 3 "nonimmediate_operand" "vm")]
21339 UNSPEC_VPSHRDV))]
21340 "TARGET_AVX512VBMI2"
21341 "vpshrdv<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3 }"
21342 [(set_attr ("prefix") ("evex"))
21343 (set_attr "mode" "<sseinsnmode>")])
21344
21345 (define_insn "vpshrdv_<mode>_mask"
21346 [(set (match_operand:VI248_AVX512VL 0 "register_operand" "=v")
21347 (vec_merge:VI248_AVX512VL
21348 (unspec:VI248_AVX512VL
21349 [(match_operand:VI248_AVX512VL 1 "register_operand" "0")
21350 (match_operand:VI248_AVX512VL 2 "register_operand" "v")
21351 (match_operand:VI248_AVX512VL 3 "nonimmediate_operand" "vm")]
21352 UNSPEC_VPSHRDV)
21353 (match_dup 1)
21354 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")))]
21355 "TARGET_AVX512VBMI2"
21356 "vpshrdv<ssemodesuffix>\t{%3, %2, %0%{%4%}|%0%{%4%}, %2, %3 }"
21357 [(set_attr ("prefix") ("evex"))
21358 (set_attr "mode" "<sseinsnmode>")])
21359
21360 (define_expand "vpshrdv_<mode>_maskz"
21361 [(match_operand:VI248_AVX512VL 0 "register_operand")
21362 (match_operand:VI248_AVX512VL 1 "register_operand")
21363 (match_operand:VI248_AVX512VL 2 "register_operand")
21364 (match_operand:VI248_AVX512VL 3 "nonimmediate_operand")
21365 (match_operand:<avx512fmaskmode> 4 "register_operand")]
21366 "TARGET_AVX512VBMI2"
21367 {
21368 emit_insn (gen_vpshrdv_<mode>_maskz_1 (operands[0], operands[1],
21369 operands[2], operands[3],
21370 CONST0_RTX (<MODE>mode),
21371 operands[4]));
21372 DONE;
21373 })
21374
21375 (define_insn "vpshrdv_<mode>_maskz_1"
21376 [(set (match_operand:VI248_AVX512VL 0 "register_operand" "=v")
21377 (vec_merge:VI248_AVX512VL
21378 (unspec:VI248_AVX512VL
21379 [(match_operand:VI248_AVX512VL 1 "register_operand" "0")
21380 (match_operand:VI248_AVX512VL 2 "register_operand" "v")
21381 (match_operand:VI248_AVX512VL 3 "nonimmediate_operand" "vm")]
21382 UNSPEC_VPSHRDV)
21383 (match_operand:VI248_AVX512VL 4 "const0_operand" "C")
21384 (match_operand:<avx512fmaskmode> 5 "register_operand" "Yk")))]
21385 "TARGET_AVX512VBMI2"
21386 "vpshrdv<ssemodesuffix>\t{%3, %2, %0%{%5%}%{z%}|%0%{%5%}%{z%}, %2, %3 }"
21387 [(set_attr ("prefix") ("evex"))
21388 (set_attr "mode" "<sseinsnmode>")])
21389
21390 (define_insn "vpshldv_<mode>"
21391 [(set (match_operand:VI248_AVX512VL 0 "register_operand" "=v")
21392 (unspec:VI248_AVX512VL
21393 [(match_operand:VI248_AVX512VL 1 "register_operand" "0")
21394 (match_operand:VI248_AVX512VL 2 "register_operand" "v")
21395 (match_operand:VI248_AVX512VL 3 "nonimmediate_operand" "vm")]
21396 UNSPEC_VPSHLDV))]
21397 "TARGET_AVX512VBMI2"
21398 "vpshldv<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3 }"
21399 [(set_attr ("prefix") ("evex"))
21400 (set_attr "mode" "<sseinsnmode>")])
21401
21402 (define_insn "vpshldv_<mode>_mask"
21403 [(set (match_operand:VI248_AVX512VL 0 "register_operand" "=v")
21404 (vec_merge:VI248_AVX512VL
21405 (unspec:VI248_AVX512VL
21406 [(match_operand:VI248_AVX512VL 1 "register_operand" "0")
21407 (match_operand:VI248_AVX512VL 2 "register_operand" "v")
21408 (match_operand:VI248_AVX512VL 3 "nonimmediate_operand" "vm")]
21409 UNSPEC_VPSHLDV)
21410 (match_dup 1)
21411 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")))]
21412 "TARGET_AVX512VBMI2"
21413 "vpshldv<ssemodesuffix>\t{%3, %2, %0%{%4%}|%0%{%4%}, %2, %3 }"
21414 [(set_attr ("prefix") ("evex"))
21415 (set_attr "mode" "<sseinsnmode>")])
21416
21417 (define_expand "vpshldv_<mode>_maskz"
21418 [(match_operand:VI248_AVX512VL 0 "register_operand")
21419 (match_operand:VI248_AVX512VL 1 "register_operand")
21420 (match_operand:VI248_AVX512VL 2 "register_operand")
21421 (match_operand:VI248_AVX512VL 3 "nonimmediate_operand")
21422 (match_operand:<avx512fmaskmode> 4 "register_operand")]
21423 "TARGET_AVX512VBMI2"
21424 {
21425 emit_insn (gen_vpshldv_<mode>_maskz_1 (operands[0], operands[1],
21426 operands[2], operands[3],
21427 CONST0_RTX (<MODE>mode),
21428 operands[4]));
21429 DONE;
21430 })
21431
21432 (define_insn "vpshldv_<mode>_maskz_1"
21433 [(set (match_operand:VI248_AVX512VL 0 "register_operand" "=v")
21434 (vec_merge:VI248_AVX512VL
21435 (unspec:VI248_AVX512VL
21436 [(match_operand:VI248_AVX512VL 1 "register_operand" "0")
21437 (match_operand:VI248_AVX512VL 2 "register_operand" "v")
21438 (match_operand:VI248_AVX512VL 3 "nonimmediate_operand" "vm")]
21439 UNSPEC_VPSHLDV)
21440 (match_operand:VI248_AVX512VL 4 "const0_operand" "C")
21441 (match_operand:<avx512fmaskmode> 5 "register_operand" "Yk")))]
21442 "TARGET_AVX512VBMI2"
21443 "vpshldv<ssemodesuffix>\t{%3, %2, %0%{%5%}%{z%}|%0%{%5%}%{z%}, %2, %3 }"
21444 [(set_attr ("prefix") ("evex"))
21445 (set_attr "mode" "<sseinsnmode>")])
21446
21447 (define_insn "vpdpbusd_<mode>"
21448 [(set (match_operand:VI4_AVX512VL 0 "register_operand" "=v")
21449 (unspec:VI4_AVX512VL
21450 [(match_operand:VI4_AVX512VL 1 "register_operand" "0")
21451 (match_operand:VI4_AVX512VL 2 "register_operand" "v")
21452 (match_operand:VI4_AVX512VL 3 "nonimmediate_operand" "vm")]
21453 UNSPEC_VPMADDUBSWACCD))]
21454 "TARGET_AVX512VNNI"
21455 "vpdpbusd\t{%3, %2, %0|%0, %2, %3 }"
21456 [(set_attr ("prefix") ("evex"))])
21457
21458 (define_insn "vpdpbusd_<mode>_mask"
21459 [(set (match_operand:VI4_AVX512VL 0 "register_operand" "=v")
21460 (vec_merge:VI4_AVX512VL
21461 (unspec:VI4_AVX512VL
21462 [(match_operand:VI4_AVX512VL 1 "register_operand" "0")
21463 (match_operand:VI4_AVX512VL 2 "register_operand" "v")
21464 (match_operand:VI4_AVX512VL 3 "nonimmediate_operand" "vm")]
21465 UNSPEC_VPMADDUBSWACCD)
21466 (match_dup 1)
21467 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")))]
21468 "TARGET_AVX512VNNI"
21469 "vpdpbusd\t{%3, %2, %0%{%4%}|%0%{%4%}, %2, %3 }"
21470 [(set_attr ("prefix") ("evex"))])
21471
21472 (define_expand "vpdpbusd_<mode>_maskz"
21473 [(match_operand:VI4_AVX512VL 0 "register_operand")
21474 (match_operand:VI4_AVX512VL 1 "register_operand")
21475 (match_operand:VI4_AVX512VL 2 "register_operand")
21476 (match_operand:VI4_AVX512VL 3 "nonimmediate_operand")
21477 (match_operand:<avx512fmaskmode> 4 "register_operand")]
21478 "TARGET_AVX512VNNI"
21479 {
21480 emit_insn (gen_vpdpbusd_<mode>_maskz_1 (operands[0], operands[1],
21481 operands[2], operands[3],
21482 CONST0_RTX (<MODE>mode),
21483 operands[4]));
21484 DONE;
21485 })
21486
21487 (define_insn "vpdpbusd_<mode>_maskz_1"
21488 [(set (match_operand:VI4_AVX512VL 0 "register_operand" "=v")
21489 (vec_merge:VI4_AVX512VL
21490 (unspec:VI4_AVX512VL
21491 [(match_operand:VI4_AVX512VL 1 "register_operand" "0")
21492 (match_operand:VI4_AVX512VL 2 "register_operand" "v")
21493 (match_operand:VI4_AVX512VL 3 "nonimmediate_operand" "vm")
21494 ] UNSPEC_VPMADDUBSWACCD)
21495 (match_operand:VI4_AVX512VL 4 "const0_operand" "C")
21496 (match_operand:<avx512fmaskmode> 5 "register_operand" "Yk")))]
21497 "TARGET_AVX512VNNI"
21498 "vpdpbusd\t{%3, %2, %0%{%5%}%{z%}|%0%{%5%}%{z%}, %2, %3 }"
21499 [(set_attr ("prefix") ("evex"))])
21500
21501
21502 (define_insn "vpdpbusds_<mode>"
21503 [(set (match_operand:VI4_AVX512VL 0 "register_operand" "=v")
21504 (unspec:VI4_AVX512VL
21505 [(match_operand:VI4_AVX512VL 1 "register_operand" "0")
21506 (match_operand:VI4_AVX512VL 2 "register_operand" "v")
21507 (match_operand:VI4_AVX512VL 3 "nonimmediate_operand" "vm")]
21508 UNSPEC_VPMADDUBSWACCSSD))]
21509 "TARGET_AVX512VNNI"
21510 "vpdpbusds\t{%3, %2, %0|%0, %2, %3 }"
21511 [(set_attr ("prefix") ("evex"))])
21512
21513 (define_insn "vpdpbusds_<mode>_mask"
21514 [(set (match_operand:VI4_AVX512VL 0 "register_operand" "=v")
21515 (vec_merge:VI4_AVX512VL
21516 (unspec:VI4_AVX512VL
21517 [(match_operand:VI4_AVX512VL 1 "register_operand" "0")
21518 (match_operand:VI4_AVX512VL 2 "register_operand" "v")
21519 (match_operand:VI4_AVX512VL 3 "nonimmediate_operand" "vm")]
21520 UNSPEC_VPMADDUBSWACCSSD)
21521 (match_dup 1)
21522 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")))]
21523 "TARGET_AVX512VNNI"
21524 "vpdpbusds\t{%3, %2, %0%{%4%}|%0%{%4%}, %2, %3 }"
21525 [(set_attr ("prefix") ("evex"))])
21526
21527 (define_expand "vpdpbusds_<mode>_maskz"
21528 [(match_operand:VI4_AVX512VL 0 "register_operand")
21529 (match_operand:VI4_AVX512VL 1 "register_operand")
21530 (match_operand:VI4_AVX512VL 2 "register_operand")
21531 (match_operand:VI4_AVX512VL 3 "nonimmediate_operand")
21532 (match_operand:<avx512fmaskmode> 4 "register_operand")]
21533 "TARGET_AVX512VNNI"
21534 {
21535 emit_insn (gen_vpdpbusds_<mode>_maskz_1 (operands[0], operands[1],
21536 operands[2], operands[3],
21537 CONST0_RTX (<MODE>mode),
21538 operands[4]));
21539 DONE;
21540 })
21541
21542 (define_insn "vpdpbusds_<mode>_maskz_1"
21543 [(set (match_operand:VI4_AVX512VL 0 "register_operand" "=v")
21544 (vec_merge:VI4_AVX512VL
21545 (unspec:VI4_AVX512VL
21546 [(match_operand:VI4_AVX512VL 1 "register_operand" "0")
21547 (match_operand:VI4_AVX512VL 2 "register_operand" "v")
21548 (match_operand:VI4_AVX512VL 3 "nonimmediate_operand" "vm")]
21549 UNSPEC_VPMADDUBSWACCSSD)
21550 (match_operand:VI4_AVX512VL 4 "const0_operand" "C")
21551 (match_operand:<avx512fmaskmode> 5 "register_operand" "Yk")))]
21552 "TARGET_AVX512VNNI"
21553 "vpdpbusds\t{%3, %2, %0%{%5%}%{z%}|%0%{%5%}%{z%}, %2, %3 }"
21554 [(set_attr ("prefix") ("evex"))])
21555
21556
21557 (define_insn "vpdpwssd_<mode>"
21558 [(set (match_operand:VI4_AVX512VL 0 "register_operand" "=v")
21559 (unspec:VI4_AVX512VL
21560 [(match_operand:VI4_AVX512VL 1 "register_operand" "0")
21561 (match_operand:VI4_AVX512VL 2 "register_operand" "v")
21562 (match_operand:VI4_AVX512VL 3 "nonimmediate_operand" "vm")]
21563 UNSPEC_VPMADDWDACCD))]
21564 "TARGET_AVX512VNNI"
21565 "vpdpwssd\t{%3, %2, %0|%0, %2, %3 }"
21566 [(set_attr ("prefix") ("evex"))])
21567
21568 (define_insn "vpdpwssd_<mode>_mask"
21569 [(set (match_operand:VI4_AVX512VL 0 "register_operand" "=v")
21570 (vec_merge:VI4_AVX512VL
21571 (unspec:VI4_AVX512VL
21572 [(match_operand:VI4_AVX512VL 1 "register_operand" "0")
21573 (match_operand:VI4_AVX512VL 2 "register_operand" "v")
21574 (match_operand:VI4_AVX512VL 3 "nonimmediate_operand" "vm")]
21575 UNSPEC_VPMADDWDACCD)
21576 (match_dup 1)
21577 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")))]
21578 "TARGET_AVX512VNNI"
21579 "vpdpwssd\t{%3, %2, %0%{%4%}|%0%{%4%}, %2, %3 }"
21580 [(set_attr ("prefix") ("evex"))])
21581
21582 (define_expand "vpdpwssd_<mode>_maskz"
21583 [(match_operand:VI4_AVX512VL 0 "register_operand")
21584 (match_operand:VI4_AVX512VL 1 "register_operand")
21585 (match_operand:VI4_AVX512VL 2 "register_operand")
21586 (match_operand:VI4_AVX512VL 3 "nonimmediate_operand")
21587 (match_operand:<avx512fmaskmode> 4 "register_operand")]
21588 "TARGET_AVX512VNNI"
21589 {
21590 emit_insn (gen_vpdpwssd_<mode>_maskz_1 (operands[0], operands[1],
21591 operands[2], operands[3],
21592 CONST0_RTX (<MODE>mode),
21593 operands[4]));
21594 DONE;
21595 })
21596
21597 (define_insn "vpdpwssd_<mode>_maskz_1"
21598 [(set (match_operand:VI4_AVX512VL 0 "register_operand" "=v")
21599 (vec_merge:VI4_AVX512VL
21600 (unspec:VI4_AVX512VL
21601 [(match_operand:VI4_AVX512VL 1 "register_operand" "0")
21602 (match_operand:VI4_AVX512VL 2 "register_operand" "v")
21603 (match_operand:VI4_AVX512VL 3 "nonimmediate_operand" "vm")]
21604 UNSPEC_VPMADDWDACCD)
21605 (match_operand:VI4_AVX512VL 4 "const0_operand" "C")
21606 (match_operand:<avx512fmaskmode> 5 "register_operand" "Yk")))]
21607 "TARGET_AVX512VNNI"
21608 "vpdpwssd\t{%3, %2, %0%{%5%}%{z%}|%0%{%5%}%{z%}, %2, %3 }"
21609 [(set_attr ("prefix") ("evex"))])
21610
21611
21612 (define_insn "vpdpwssds_<mode>"
21613 [(set (match_operand:VI4_AVX512VL 0 "register_operand" "=v")
21614 (unspec:VI4_AVX512VL
21615 [(match_operand:VI4_AVX512VL 1 "register_operand" "0")
21616 (match_operand:VI4_AVX512VL 2 "register_operand" "v")
21617 (match_operand:VI4_AVX512VL 3 "nonimmediate_operand" "vm")]
21618 UNSPEC_VPMADDWDACCSSD))]
21619 "TARGET_AVX512VNNI"
21620 "vpdpwssds\t{%3, %2, %0|%0, %2, %3 }"
21621 [(set_attr ("prefix") ("evex"))])
21622
21623 (define_insn "vpdpwssds_<mode>_mask"
21624 [(set (match_operand:VI4_AVX512VL 0 "register_operand" "=v")
21625 (vec_merge:VI4_AVX512VL
21626 (unspec:VI4_AVX512VL
21627 [(match_operand:VI4_AVX512VL 1 "register_operand" "0")
21628 (match_operand:VI4_AVX512VL 2 "register_operand" "v")
21629 (match_operand:VI4_AVX512VL 3 "nonimmediate_operand" "vm")]
21630 UNSPEC_VPMADDWDACCSSD)
21631 (match_dup 1)
21632 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")))]
21633 "TARGET_AVX512VNNI"
21634 "vpdpwssds\t{%3, %2, %0%{%4%}|%0%{%4%}, %2, %3 }"
21635 [(set_attr ("prefix") ("evex"))])
21636
21637 (define_expand "vpdpwssds_<mode>_maskz"
21638 [(match_operand:VI4_AVX512VL 0 "register_operand")
21639 (match_operand:VI4_AVX512VL 1 "register_operand")
21640 (match_operand:VI4_AVX512VL 2 "register_operand")
21641 (match_operand:VI4_AVX512VL 3 "nonimmediate_operand")
21642 (match_operand:<avx512fmaskmode> 4 "register_operand")]
21643 "TARGET_AVX512VNNI"
21644 {
21645 emit_insn (gen_vpdpwssds_<mode>_maskz_1 (operands[0], operands[1],
21646 operands[2], operands[3],
21647 CONST0_RTX (<MODE>mode),
21648 operands[4]));
21649 DONE;
21650 })
21651
21652 (define_insn "vpdpwssds_<mode>_maskz_1"
21653 [(set (match_operand:VI4_AVX512VL 0 "register_operand" "=v")
21654 (vec_merge:VI4_AVX512VL
21655 (unspec:VI4_AVX512VL
21656 [(match_operand:VI4_AVX512VL 1 "register_operand" "0")
21657 (match_operand:VI4_AVX512VL 2 "register_operand" "v")
21658 (match_operand:VI4_AVX512VL 3 "nonimmediate_operand" "vm")]
21659 UNSPEC_VPMADDWDACCSSD)
21660 (match_operand:VI4_AVX512VL 4 "const0_operand" "C")
21661 (match_operand:<avx512fmaskmode> 5 "register_operand" "Yk")))]
21662 "TARGET_AVX512VNNI"
21663 "vpdpwssds\t{%3, %2, %0%{%5%}%{z%}|%0%{%5%}%{z%}, %2, %3 }"
21664 [(set_attr ("prefix") ("evex"))])
21665
21666 (define_insn "vaesdec_<mode>"
21667 [(set (match_operand:VI1_AVX512VL_F 0 "register_operand" "=v")
21668 (unspec:VI1_AVX512VL_F
21669 [(match_operand:VI1_AVX512VL_F 1 "register_operand" "v")
21670 (match_operand:VI1_AVX512VL_F 2 "vector_operand" "vm")]
21671 UNSPEC_VAESDEC))]
21672 "TARGET_VAES"
21673 "vaesdec\t{%2, %1, %0|%0, %1, %2}"
21674 )
21675
21676 (define_insn "vaesdeclast_<mode>"
21677 [(set (match_operand:VI1_AVX512VL_F 0 "register_operand" "=v")
21678 (unspec:VI1_AVX512VL_F
21679 [(match_operand:VI1_AVX512VL_F 1 "register_operand" "v")
21680 (match_operand:VI1_AVX512VL_F 2 "vector_operand" "vm")]
21681 UNSPEC_VAESDECLAST))]
21682 "TARGET_VAES"
21683 "vaesdeclast\t{%2, %1, %0|%0, %1, %2}"
21684 )
21685
21686 (define_insn "vaesenc_<mode>"
21687 [(set (match_operand:VI1_AVX512VL_F 0 "register_operand" "=v")
21688 (unspec:VI1_AVX512VL_F
21689 [(match_operand:VI1_AVX512VL_F 1 "register_operand" "v")
21690 (match_operand:VI1_AVX512VL_F 2 "vector_operand" "vm")]
21691 UNSPEC_VAESENC))]
21692 "TARGET_VAES"
21693 "vaesenc\t{%2, %1, %0|%0, %1, %2}"
21694 )
21695
21696 (define_insn "vaesenclast_<mode>"
21697 [(set (match_operand:VI1_AVX512VL_F 0 "register_operand" "=v")
21698 (unspec:VI1_AVX512VL_F
21699 [(match_operand:VI1_AVX512VL_F 1 "register_operand" "v")
21700 (match_operand:VI1_AVX512VL_F 2 "vector_operand" "vm")]
21701 UNSPEC_VAESENCLAST))]
21702 "TARGET_VAES"
21703 "vaesenclast\t{%2, %1, %0|%0, %1, %2}"
21704 )
21705
21706 (define_insn "vpclmulqdq_<mode>"
21707 [(set (match_operand:VI8_FVL 0 "register_operand" "=v")
21708 (unspec:VI8_FVL [(match_operand:VI8_FVL 1 "register_operand" "v")
21709 (match_operand:VI8_FVL 2 "vector_operand" "vm")
21710 (match_operand:SI 3 "const_0_to_255_operand" "n")]
21711 UNSPEC_VPCLMULQDQ))]
21712 "TARGET_VPCLMULQDQ"
21713 "vpclmulqdq\t{%3, %2, %1, %0|%0, %1, %2, %3}"
21714 [(set_attr "mode" "DI")])
21715
21716 (define_insn "avx512vl_vpshufbitqmb<mode><mask_scalar_merge_name>"
21717 [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=Yk")
21718 (unspec:<avx512fmaskmode>
21719 [(match_operand:VI1_AVX512VLBW 1 "register_operand" "v")
21720 (match_operand:VI1_AVX512VLBW 2 "nonimmediate_operand" "vm")]
21721 UNSPEC_VPSHUFBIT))]
21722 "TARGET_AVX512BITALG"
21723 "vpshufbitqmb\t{%2, %1, %0<mask_scalar_merge_operand3>|%0<mask_scalar_merge_operand3>, %1, %2}"
21724 [(set_attr "prefix" "evex")
21725 (set_attr "mode" "<sseinsnmode>")])