]> git.ipfire.org Git - thirdparty/gcc.git/blob - gcc/config/i386/sse.md
c831543b4e9722b4ac4f37b41effec15da0fa91a
[thirdparty/gcc.git] / gcc / config / i386 / sse.md
1 ;; GCC machine description for SSE instructions
2 ;; Copyright (C) 2005-2020 Free Software Foundation, Inc.
3 ;;
4 ;; This file is part of GCC.
5 ;;
6 ;; GCC is free software; you can redistribute it and/or modify
7 ;; it under the terms of the GNU General Public License as published by
8 ;; the Free Software Foundation; either version 3, or (at your option)
9 ;; any later version.
10 ;;
11 ;; GCC is distributed in the hope that it will be useful,
12 ;; but WITHOUT ANY WARRANTY; without even the implied warranty of
13 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 ;; GNU General Public License for more details.
15 ;;
16 ;; You should have received a copy of the GNU General Public License
17 ;; along with GCC; see the file COPYING3. If not see
18 ;; <http://www.gnu.org/licenses/>.
19
20 (define_c_enum "unspec" [
21 ;; SSE
22 UNSPEC_MOVNT
23
24 ;; SSE2
25 UNSPEC_MOVDI_TO_SSE
26
27 ;; SSE3
28 UNSPEC_LDDQU
29
30 ;; SSSE3
31 UNSPEC_PSHUFB
32 UNSPEC_PSIGN
33 UNSPEC_PALIGNR
34
35 ;; For SSE4A support
36 UNSPEC_EXTRQI
37 UNSPEC_EXTRQ
38 UNSPEC_INSERTQI
39 UNSPEC_INSERTQ
40
41 ;; For SSE4.1 support
42 UNSPEC_BLENDV
43 UNSPEC_INSERTPS
44 UNSPEC_DP
45 UNSPEC_MOVNTDQA
46 UNSPEC_MPSADBW
47 UNSPEC_PHMINPOSUW
48 UNSPEC_PTEST
49
50 ;; For SSE4.2 support
51 UNSPEC_PCMPESTR
52 UNSPEC_PCMPISTR
53
54 ;; For FMA4 support
55 UNSPEC_FMADDSUB
56 UNSPEC_XOP_UNSIGNED_CMP
57 UNSPEC_XOP_TRUEFALSE
58 UNSPEC_XOP_PERMUTE
59 UNSPEC_FRCZ
60
61 ;; For AES support
62 UNSPEC_AESENC
63 UNSPEC_AESENCLAST
64 UNSPEC_AESDEC
65 UNSPEC_AESDECLAST
66 UNSPEC_AESIMC
67 UNSPEC_AESKEYGENASSIST
68
69 ;; For PCLMUL support
70 UNSPEC_PCLMUL
71
72 ;; For AVX support
73 UNSPEC_PCMP
74 UNSPEC_VPERMIL
75 UNSPEC_VPERMIL2
76 UNSPEC_VPERMIL2F128
77 UNSPEC_CAST
78 UNSPEC_VTESTP
79 UNSPEC_VCVTPH2PS
80 UNSPEC_VCVTPS2PH
81
82 ;; For AVX2 support
83 UNSPEC_VPERMVAR
84 UNSPEC_VPERMTI
85 UNSPEC_GATHER
86 UNSPEC_VSIBADDR
87
88 ;; For AVX512F support
89 UNSPEC_VPERMT2
90 UNSPEC_UNSIGNED_FIX_NOTRUNC
91 UNSPEC_UNSIGNED_PCMP
92 UNSPEC_TESTM
93 UNSPEC_TESTNM
94 UNSPEC_SCATTER
95 UNSPEC_RCP14
96 UNSPEC_RSQRT14
97 UNSPEC_FIXUPIMM
98 UNSPEC_SCALEF
99 UNSPEC_VTERNLOG
100 UNSPEC_GETEXP
101 UNSPEC_GETMANT
102 UNSPEC_ALIGN
103 UNSPEC_CONFLICT
104 UNSPEC_COMPRESS
105 UNSPEC_COMPRESS_STORE
106 UNSPEC_EXPAND
107 UNSPEC_MASKED_EQ
108 UNSPEC_MASKED_GT
109
110 ;; Mask operations
111 UNSPEC_MASKOP
112 UNSPEC_KORTEST
113 UNSPEC_KTEST
114
115 ;; For embed. rounding feature
116 UNSPEC_EMBEDDED_ROUNDING
117
118 ;; For AVX512PF support
119 UNSPEC_GATHER_PREFETCH
120 UNSPEC_SCATTER_PREFETCH
121
122 ;; For AVX512ER support
123 UNSPEC_EXP2
124 UNSPEC_RCP28
125 UNSPEC_RSQRT28
126
127 ;; For SHA support
128 UNSPEC_SHA1MSG1
129 UNSPEC_SHA1MSG2
130 UNSPEC_SHA1NEXTE
131 UNSPEC_SHA1RNDS4
132 UNSPEC_SHA256MSG1
133 UNSPEC_SHA256MSG2
134 UNSPEC_SHA256RNDS2
135
136 ;; For AVX512BW support
137 UNSPEC_DBPSADBW
138 UNSPEC_PMADDUBSW512
139 UNSPEC_PMADDWD512
140 UNSPEC_PSHUFHW
141 UNSPEC_PSHUFLW
142 UNSPEC_CVTINT2MASK
143
144 ;; For AVX512DQ support
145 UNSPEC_REDUCE
146 UNSPEC_FPCLASS
147 UNSPEC_RANGE
148
149 ;; For AVX512IFMA support
150 UNSPEC_VPMADD52LUQ
151 UNSPEC_VPMADD52HUQ
152
153 ;; For AVX512VBMI support
154 UNSPEC_VPMULTISHIFT
155
156 ;; For AVX5124FMAPS/AVX5124VNNIW support
157 UNSPEC_VP4FMADD
158 UNSPEC_VP4FNMADD
159 UNSPEC_VP4DPWSSD
160 UNSPEC_VP4DPWSSDS
161
162 ;; For GFNI support
163 UNSPEC_GF2P8AFFINEINV
164 UNSPEC_GF2P8AFFINE
165 UNSPEC_GF2P8MUL
166
167 ;; For AVX512VBMI2 support
168 UNSPEC_VPSHLD
169 UNSPEC_VPSHRD
170 UNSPEC_VPSHRDV
171 UNSPEC_VPSHLDV
172
173 ;; For AVX512VNNI support
174 UNSPEC_VPMADDUBSWACCD
175 UNSPEC_VPMADDUBSWACCSSD
176 UNSPEC_VPMADDWDACCD
177 UNSPEC_VPMADDWDACCSSD
178
179 ;; For VAES support
180 UNSPEC_VAESDEC
181 UNSPEC_VAESDECLAST
182 UNSPEC_VAESENC
183 UNSPEC_VAESENCLAST
184
185 ;; For VPCLMULQDQ support
186 UNSPEC_VPCLMULQDQ
187
188 ;; For AVX512BITALG support
189 UNSPEC_VPSHUFBIT
190
191 ;; For VP2INTERSECT support
192 UNSPEC_VP2INTERSECT
193
194 ;; For AVX512BF16 support
195 UNSPEC_VCVTNE2PS2BF16
196 UNSPEC_VCVTNEPS2BF16
197 UNSPEC_VDPBF16PS
198 ])
199
200 (define_c_enum "unspecv" [
201 UNSPECV_LDMXCSR
202 UNSPECV_STMXCSR
203 UNSPECV_CLFLUSH
204 UNSPECV_MONITOR
205 UNSPECV_MWAIT
206 UNSPECV_VZEROALL
207 UNSPECV_VZEROUPPER
208 ])
209
210 ;; All vector modes including V?TImode, used in move patterns.
211 (define_mode_iterator VMOVE
212 [(V64QI "TARGET_AVX512F") (V32QI "TARGET_AVX") V16QI
213 (V32HI "TARGET_AVX512F") (V16HI "TARGET_AVX") V8HI
214 (V16SI "TARGET_AVX512F") (V8SI "TARGET_AVX") V4SI
215 (V8DI "TARGET_AVX512F") (V4DI "TARGET_AVX") V2DI
216 (V4TI "TARGET_AVX512F") (V2TI "TARGET_AVX") V1TI
217 (V16SF "TARGET_AVX512F") (V8SF "TARGET_AVX") V4SF
218 (V8DF "TARGET_AVX512F") (V4DF "TARGET_AVX") V2DF])
219
220 ;; All AVX-512{F,VL} vector modes. Supposed TARGET_AVX512F baseline.
221 (define_mode_iterator V48_AVX512VL
222 [V16SI (V8SI "TARGET_AVX512VL") (V4SI "TARGET_AVX512VL")
223 V8DI (V4DI "TARGET_AVX512VL") (V2DI "TARGET_AVX512VL")
224 V16SF (V8SF "TARGET_AVX512VL") (V4SF "TARGET_AVX512VL")
225 V8DF (V4DF "TARGET_AVX512VL") (V2DF "TARGET_AVX512VL")])
226
227 ;; 1,2 byte AVX-512{BW,VL} vector modes. Supposed TARGET_AVX512BW baseline.
228 (define_mode_iterator VI12_AVX512VL
229 [V64QI (V16QI "TARGET_AVX512VL") (V32QI "TARGET_AVX512VL")
230 V32HI (V16HI "TARGET_AVX512VL") (V8HI "TARGET_AVX512VL")])
231
232 ;; Same iterator, but without supposed TARGET_AVX512BW
233 (define_mode_iterator VI12_AVX512VLBW
234 [(V64QI "TARGET_AVX512BW") (V16QI "TARGET_AVX512VL")
235 (V32QI "TARGET_AVX512VL && TARGET_AVX512BW") (V32HI "TARGET_AVX512BW")
236 (V16HI "TARGET_AVX512VL") (V8HI "TARGET_AVX512VL")])
237
238 (define_mode_iterator VI1_AVX512VL
239 [V64QI (V16QI "TARGET_AVX512VL") (V32QI "TARGET_AVX512VL")])
240
241 ;; All vector modes
242 (define_mode_iterator V
243 [(V64QI "TARGET_AVX512F") (V32QI "TARGET_AVX") V16QI
244 (V32HI "TARGET_AVX512F") (V16HI "TARGET_AVX") V8HI
245 (V16SI "TARGET_AVX512F") (V8SI "TARGET_AVX") V4SI
246 (V8DI "TARGET_AVX512F") (V4DI "TARGET_AVX") V2DI
247 (V16SF "TARGET_AVX512F") (V8SF "TARGET_AVX") V4SF
248 (V8DF "TARGET_AVX512F") (V4DF "TARGET_AVX") (V2DF "TARGET_SSE2")])
249
250 ;; All 128bit vector modes
251 (define_mode_iterator V_128
252 [V16QI V8HI V4SI V2DI V4SF (V2DF "TARGET_SSE2")])
253
254 ;; All 256bit vector modes
255 (define_mode_iterator V_256
256 [V32QI V16HI V8SI V4DI V8SF V4DF])
257
258 ;; All 128bit and 256bit vector modes
259 (define_mode_iterator V_128_256
260 [V32QI V16QI V16HI V8HI V8SI V4SI V4DI V2DI V8SF V4SF V4DF V2DF])
261
262 ;; All 512bit vector modes
263 (define_mode_iterator V_512 [V64QI V32HI V16SI V8DI V16SF V8DF])
264
265 ;; All 256bit and 512bit vector modes
266 (define_mode_iterator V_256_512
267 [V32QI V16HI V8SI V4DI V8SF V4DF
268 (V64QI "TARGET_AVX512F") (V32HI "TARGET_AVX512F") (V16SI "TARGET_AVX512F")
269 (V8DI "TARGET_AVX512F") (V16SF "TARGET_AVX512F") (V8DF "TARGET_AVX512F")])
270
271 ;; All vector float modes
272 (define_mode_iterator VF
273 [(V16SF "TARGET_AVX512F") (V8SF "TARGET_AVX") V4SF
274 (V8DF "TARGET_AVX512F") (V4DF "TARGET_AVX") (V2DF "TARGET_SSE2")])
275
276 ;; 128- and 256-bit float vector modes
277 (define_mode_iterator VF_128_256
278 [(V8SF "TARGET_AVX") V4SF
279 (V4DF "TARGET_AVX") (V2DF "TARGET_SSE2")])
280
281 ;; All SFmode vector float modes
282 (define_mode_iterator VF1
283 [(V16SF "TARGET_AVX512F") (V8SF "TARGET_AVX") V4SF])
284
285 (define_mode_iterator VF1_AVX2
286 [(V16SF "TARGET_AVX512F") (V8SF "TARGET_AVX2") V4SF])
287
288 ;; 128- and 256-bit SF vector modes
289 (define_mode_iterator VF1_128_256
290 [(V8SF "TARGET_AVX") V4SF])
291
292 (define_mode_iterator VF1_128_256VL
293 [V8SF (V4SF "TARGET_AVX512VL")])
294
295 ;; All DFmode vector float modes
296 (define_mode_iterator VF2
297 [(V8DF "TARGET_AVX512F") (V4DF "TARGET_AVX") V2DF])
298
299 ;; 128- and 256-bit DF vector modes
300 (define_mode_iterator VF2_128_256
301 [(V4DF "TARGET_AVX") V2DF])
302
303 (define_mode_iterator VF2_512_256
304 [(V8DF "TARGET_AVX512F") V4DF])
305
306 (define_mode_iterator VF2_512_256VL
307 [V8DF (V4DF "TARGET_AVX512VL")])
308
309 ;; All 128bit vector float modes
310 (define_mode_iterator VF_128
311 [V4SF (V2DF "TARGET_SSE2")])
312
313 ;; All 256bit vector float modes
314 (define_mode_iterator VF_256
315 [V8SF V4DF])
316
317 ;; All 512bit vector float modes
318 (define_mode_iterator VF_512
319 [V16SF V8DF])
320
321 (define_mode_iterator VI48_AVX512VL
322 [V16SI (V8SI "TARGET_AVX512VL") (V4SI "TARGET_AVX512VL")
323 V8DI (V4DI "TARGET_AVX512VL") (V2DI "TARGET_AVX512VL")])
324
325 (define_mode_iterator VF_AVX512VL
326 [V16SF (V8SF "TARGET_AVX512VL") (V4SF "TARGET_AVX512VL")
327 V8DF (V4DF "TARGET_AVX512VL") (V2DF "TARGET_AVX512VL")])
328
329 (define_mode_iterator VF2_AVX512VL
330 [V8DF (V4DF "TARGET_AVX512VL") (V2DF "TARGET_AVX512VL")])
331
332 (define_mode_iterator VF1_AVX512VL
333 [V16SF (V8SF "TARGET_AVX512VL") (V4SF "TARGET_AVX512VL")])
334
335 ;; All vector integer modes
336 (define_mode_iterator VI
337 [(V16SI "TARGET_AVX512F") (V8DI "TARGET_AVX512F")
338 (V64QI "TARGET_AVX512BW") (V32QI "TARGET_AVX") V16QI
339 (V32HI "TARGET_AVX512BW") (V16HI "TARGET_AVX") V8HI
340 (V8SI "TARGET_AVX") V4SI
341 (V4DI "TARGET_AVX") V2DI])
342
343 (define_mode_iterator VI_AVX2
344 [(V64QI "TARGET_AVX512BW") (V32QI "TARGET_AVX2") V16QI
345 (V32HI "TARGET_AVX512BW") (V16HI "TARGET_AVX2") V8HI
346 (V16SI "TARGET_AVX512F") (V8SI "TARGET_AVX2") V4SI
347 (V8DI "TARGET_AVX512F") (V4DI "TARGET_AVX2") V2DI])
348
349 ;; All QImode vector integer modes
350 (define_mode_iterator VI1
351 [(V32QI "TARGET_AVX") V16QI])
352
353 ;; All DImode vector integer modes
354 (define_mode_iterator V_AVX
355 [V16QI V8HI V4SI V2DI V4SF V2DF
356 (V32QI "TARGET_AVX") (V16HI "TARGET_AVX")
357 (V8SI "TARGET_AVX") (V4DI "TARGET_AVX")
358 (V8SF "TARGET_AVX") (V4DF"TARGET_AVX")])
359
360 (define_mode_iterator VI48_AVX
361 [V4SI V2DI
362 (V8SI "TARGET_AVX") (V4DI "TARGET_AVX")])
363
364 (define_mode_iterator VI8
365 [(V8DI "TARGET_AVX512F") (V4DI "TARGET_AVX") V2DI])
366
367 (define_mode_iterator VI8_FVL
368 [(V8DI "TARGET_AVX512F") V4DI (V2DI "TARGET_AVX512VL")])
369
370 (define_mode_iterator VI8_AVX512VL
371 [V8DI (V4DI "TARGET_AVX512VL") (V2DI "TARGET_AVX512VL")])
372
373 (define_mode_iterator VI8_256_512
374 [V8DI (V4DI "TARGET_AVX512VL")])
375
376 (define_mode_iterator VI1_AVX2
377 [(V32QI "TARGET_AVX2") V16QI])
378
379 (define_mode_iterator VI1_AVX512
380 [(V64QI "TARGET_AVX512BW") (V32QI "TARGET_AVX2") V16QI])
381
382 (define_mode_iterator VI1_AVX512F
383 [(V64QI "TARGET_AVX512F") (V32QI "TARGET_AVX") V16QI])
384
385 (define_mode_iterator VI2_AVX2
386 [(V32HI "TARGET_AVX512BW") (V16HI "TARGET_AVX2") V8HI])
387
388 (define_mode_iterator VI2_AVX512F
389 [(V32HI "TARGET_AVX512F") (V16HI "TARGET_AVX2") V8HI])
390
391 (define_mode_iterator VI4_AVX
392 [(V8SI "TARGET_AVX") V4SI])
393
394 (define_mode_iterator VI4_AVX2
395 [(V8SI "TARGET_AVX2") V4SI])
396
397 (define_mode_iterator VI4_AVX512F
398 [(V16SI "TARGET_AVX512F") (V8SI "TARGET_AVX2") V4SI])
399
400 (define_mode_iterator VI4_AVX512VL
401 [V16SI (V8SI "TARGET_AVX512VL") (V4SI "TARGET_AVX512VL")])
402
403 (define_mode_iterator VI48_AVX512F_AVX512VL
404 [V4SI V8SI (V16SI "TARGET_AVX512F")
405 (V2DI "TARGET_AVX512VL") (V4DI "TARGET_AVX512VL") (V8DI "TARGET_AVX512F")])
406
407 (define_mode_iterator VI2_AVX512VL
408 [(V8HI "TARGET_AVX512VL") (V16HI "TARGET_AVX512VL") V32HI])
409
410 (define_mode_iterator VI1_AVX512VL_F
411 [V32QI (V16QI "TARGET_AVX512VL") (V64QI "TARGET_AVX512F")])
412
413 (define_mode_iterator VI8_AVX2_AVX512BW
414 [(V8DI "TARGET_AVX512BW") (V4DI "TARGET_AVX2") V2DI])
415
416 (define_mode_iterator VI8_AVX2
417 [(V4DI "TARGET_AVX2") V2DI])
418
419 (define_mode_iterator VI8_AVX2_AVX512F
420 [(V8DI "TARGET_AVX512F") (V4DI "TARGET_AVX2") V2DI])
421
422 (define_mode_iterator VI8_AVX_AVX512F
423 [(V8DI "TARGET_AVX512F") (V4DI "TARGET_AVX")])
424
425 (define_mode_iterator VI4_128_8_256
426 [V4SI V4DI])
427
428 ;; All V8D* modes
429 (define_mode_iterator V8FI
430 [V8DF V8DI])
431
432 ;; All V16S* modes
433 (define_mode_iterator V16FI
434 [V16SF V16SI])
435
436 ;; ??? We should probably use TImode instead.
437 (define_mode_iterator VIMAX_AVX2_AVX512BW
438 [(V4TI "TARGET_AVX512BW") (V2TI "TARGET_AVX2") V1TI])
439
440 ;; Suppose TARGET_AVX512BW as baseline
441 (define_mode_iterator VIMAX_AVX512VL
442 [V4TI (V2TI "TARGET_AVX512VL") (V1TI "TARGET_AVX512VL")])
443
444 (define_mode_iterator VIMAX_AVX2
445 [(V2TI "TARGET_AVX2") V1TI])
446
447 ;; ??? This should probably be dropped in favor of VIMAX_AVX2_AVX512BW.
448 (define_mode_iterator SSESCALARMODE
449 [(V4TI "TARGET_AVX512BW") (V2TI "TARGET_AVX2") TI])
450
451 (define_mode_iterator VI12_AVX2
452 [(V64QI "TARGET_AVX512BW") (V32QI "TARGET_AVX2") V16QI
453 (V32HI "TARGET_AVX512BW") (V16HI "TARGET_AVX2") V8HI])
454
455 (define_mode_iterator VI24_AVX2
456 [(V16HI "TARGET_AVX2") V8HI
457 (V8SI "TARGET_AVX2") V4SI])
458
459 (define_mode_iterator VI124_AVX2_24_AVX512F_1_AVX512BW
460 [(V64QI "TARGET_AVX512BW") (V32QI "TARGET_AVX2") V16QI
461 (V32HI "TARGET_AVX512F") (V16HI "TARGET_AVX2") V8HI
462 (V16SI "TARGET_AVX512F") (V8SI "TARGET_AVX2") V4SI])
463
464 (define_mode_iterator VI124_AVX2
465 [(V32QI "TARGET_AVX2") V16QI
466 (V16HI "TARGET_AVX2") V8HI
467 (V8SI "TARGET_AVX2") V4SI])
468
469 (define_mode_iterator VI2_AVX2_AVX512BW
470 [(V32HI "TARGET_AVX512BW") (V16HI "TARGET_AVX2") V8HI])
471
472 (define_mode_iterator VI248_AVX512VL
473 [V32HI V16SI V8DI
474 (V16HI "TARGET_AVX512VL") (V8SI "TARGET_AVX512VL")
475 (V4DI "TARGET_AVX512VL") (V8HI "TARGET_AVX512VL")
476 (V4SI "TARGET_AVX512VL") (V2DI "TARGET_AVX512VL")])
477
478 (define_mode_iterator VI48_AVX2
479 [(V8SI "TARGET_AVX2") V4SI
480 (V4DI "TARGET_AVX2") V2DI])
481
482 (define_mode_iterator VI248_AVX2
483 [(V16HI "TARGET_AVX2") V8HI
484 (V8SI "TARGET_AVX2") V4SI
485 (V4DI "TARGET_AVX2") V2DI])
486
487 (define_mode_iterator VI248_AVX2_8_AVX512F_24_AVX512BW
488 [(V32HI "TARGET_AVX512BW") (V16HI "TARGET_AVX2") V8HI
489 (V16SI "TARGET_AVX512BW") (V8SI "TARGET_AVX2") V4SI
490 (V8DI "TARGET_AVX512F") (V4DI "TARGET_AVX2") V2DI])
491
492 (define_mode_iterator VI248_AVX512BW
493 [(V32HI "TARGET_AVX512BW") V16SI V8DI])
494
495 (define_mode_iterator VI248_AVX512BW_AVX512VL
496 [(V32HI "TARGET_AVX512BW")
497 (V4DI "TARGET_AVX512VL") V16SI V8DI])
498
499 ;; Suppose TARGET_AVX512VL as baseline
500 (define_mode_iterator VI248_AVX512BW_1
501 [(V16HI "TARGET_AVX512BW") (V8HI "TARGET_AVX512BW")
502 V8SI V4SI
503 V2DI])
504
505 (define_mode_iterator VI248_AVX512BW_2
506 [(V16HI "TARGET_AVX512BW") (V8HI "TARGET_AVX512BW")
507 V8SI V4SI
508 V4DI V2DI])
509
510 (define_mode_iterator VI48_AVX512F
511 [(V16SI "TARGET_AVX512F") V8SI V4SI
512 (V8DI "TARGET_AVX512F") V4DI V2DI])
513
514 (define_mode_iterator VI48_AVX_AVX512F
515 [(V16SI "TARGET_AVX512F") (V8SI "TARGET_AVX") V4SI
516 (V8DI "TARGET_AVX512F") (V4DI "TARGET_AVX") V2DI])
517
518 (define_mode_iterator VI12_AVX_AVX512F
519 [ (V64QI "TARGET_AVX512F") (V32QI "TARGET_AVX") V16QI
520 (V32HI "TARGET_AVX512F") (V16HI "TARGET_AVX") V8HI])
521
522 (define_mode_iterator V48_AVX2
523 [V4SF V2DF
524 V8SF V4DF
525 (V4SI "TARGET_AVX2") (V2DI "TARGET_AVX2")
526 (V8SI "TARGET_AVX2") (V4DI "TARGET_AVX2")])
527
528 (define_mode_iterator VI1_AVX512VLBW
529 [(V64QI "TARGET_AVX512BW") (V32QI "TARGET_AVX512VL")
530 (V16QI "TARGET_AVX512VL")])
531
532 (define_mode_attr avx512
533 [(V16QI "avx512vl") (V32QI "avx512vl") (V64QI "avx512bw")
534 (V8HI "avx512vl") (V16HI "avx512vl") (V32HI "avx512bw")
535 (V4SI "avx512vl") (V8SI "avx512vl") (V16SI "avx512f")
536 (V2DI "avx512vl") (V4DI "avx512vl") (V8DI "avx512f")
537 (V4SF "avx512vl") (V8SF "avx512vl") (V16SF "avx512f")
538 (V2DF "avx512vl") (V4DF "avx512vl") (V8DF "avx512f")])
539
540 (define_mode_attr sse2_avx_avx512f
541 [(V16QI "sse2") (V32QI "avx") (V64QI "avx512f")
542 (V8HI "avx512vl") (V16HI "avx512vl") (V32HI "avx512bw")
543 (V4SI "sse2") (V8SI "avx") (V16SI "avx512f")
544 (V2DI "avx512vl") (V4DI "avx512vl") (V8DI "avx512f")
545 (V16SF "avx512f") (V8SF "avx") (V4SF "avx")
546 (V8DF "avx512f") (V4DF "avx") (V2DF "avx")])
547
548 (define_mode_attr sse2_avx2
549 [(V16QI "sse2") (V32QI "avx2") (V64QI "avx512bw")
550 (V8HI "sse2") (V16HI "avx2") (V32HI "avx512bw")
551 (V4SI "sse2") (V8SI "avx2") (V16SI "avx512f")
552 (V2DI "sse2") (V4DI "avx2") (V8DI "avx512f")
553 (V1TI "sse2") (V2TI "avx2") (V4TI "avx512bw")])
554
555 (define_mode_attr ssse3_avx2
556 [(V16QI "ssse3") (V32QI "avx2") (V64QI "avx512bw")
557 (V4HI "ssse3") (V8HI "ssse3") (V16HI "avx2") (V32HI "avx512bw")
558 (V4SI "ssse3") (V8SI "avx2")
559 (V2DI "ssse3") (V4DI "avx2")
560 (TI "ssse3") (V2TI "avx2") (V4TI "avx512bw")])
561
562 (define_mode_attr sse4_1_avx2
563 [(V16QI "sse4_1") (V32QI "avx2") (V64QI "avx512bw")
564 (V8HI "sse4_1") (V16HI "avx2") (V32HI "avx512bw")
565 (V4SI "sse4_1") (V8SI "avx2") (V16SI "avx512f")
566 (V2DI "sse4_1") (V4DI "avx2") (V8DI "avx512dq")])
567
568 (define_mode_attr avx_avx2
569 [(V4SF "avx") (V2DF "avx")
570 (V8SF "avx") (V4DF "avx")
571 (V4SI "avx2") (V2DI "avx2")
572 (V8SI "avx2") (V4DI "avx2")])
573
574 (define_mode_attr vec_avx2
575 [(V16QI "vec") (V32QI "avx2")
576 (V8HI "vec") (V16HI "avx2")
577 (V4SI "vec") (V8SI "avx2")
578 (V2DI "vec") (V4DI "avx2")])
579
580 (define_mode_attr avx2_avx512
581 [(V4SI "avx2") (V8SI "avx2") (V16SI "avx512f")
582 (V2DI "avx2") (V4DI "avx2") (V8DI "avx512f")
583 (V4SF "avx2") (V8SF "avx2") (V16SF "avx512f")
584 (V2DF "avx2") (V4DF "avx2") (V8DF "avx512f")
585 (V8HI "avx512vl") (V16HI "avx512vl") (V32HI "avx512bw")])
586
587 (define_mode_attr shuffletype
588 [(V16SF "f") (V16SI "i") (V8DF "f") (V8DI "i")
589 (V8SF "f") (V8SI "i") (V4DF "f") (V4DI "i")
590 (V4SF "f") (V4SI "i") (V2DF "f") (V2DI "i")
591 (V32HI "i") (V16HI "i") (V8HI "i")
592 (V64QI "i") (V32QI "i") (V16QI "i")
593 (V4TI "i") (V2TI "i") (V1TI "i")])
594
595 (define_mode_attr ssequartermode
596 [(V16SF "V4SF") (V8DF "V2DF") (V16SI "V4SI") (V8DI "V2DI")])
597
598 (define_mode_attr ssequarterinsnmode
599 [(V16SF "V4SF") (V8DF "V2DF") (V16SI "TI") (V8DI "TI")])
600
601 (define_mode_attr vecmemsuffix
602 [(V16SF "{z}") (V8SF "{y}") (V4SF "{x}")
603 (V8DF "{z}") (V4DF "{y}") (V2DF "{x}")])
604
605 (define_mode_attr ssedoublemodelower
606 [(V16QI "v16hi") (V32QI "v32hi") (V64QI "v64hi")
607 (V8HI "v8si") (V16HI "v16si") (V32HI "v32si")
608 (V4SI "v4di") (V8SI "v8di") (V16SI "v16di")])
609
610 (define_mode_attr ssedoublemode
611 [(V4SF "V8SF") (V8SF "V16SF") (V16SF "V32SF")
612 (V2DF "V4DF") (V4DF "V8DF") (V8DF "V16DF")
613 (V16QI "V16HI") (V32QI "V32HI") (V64QI "V64HI")
614 (V8HI "V8SI") (V16HI "V16SI") (V32HI "V32SI")
615 (V4SI "V4DI") (V8SI "V16SI") (V16SI "V32SI")
616 (V4DI "V8DI") (V8DI "V16DI")])
617
618 (define_mode_attr ssebytemode
619 [(V8DI "V64QI") (V4DI "V32QI") (V2DI "V16QI")
620 (V16SI "V64QI") (V8SI "V32QI") (V4SI "V16QI")])
621
622 ;; All 128bit vector integer modes
623 (define_mode_iterator VI_128 [V16QI V8HI V4SI V2DI])
624
625 ;; All 256bit vector integer modes
626 (define_mode_iterator VI_256 [V32QI V16HI V8SI V4DI])
627
628 ;; Various 128bit vector integer mode combinations
629 (define_mode_iterator VI12_128 [V16QI V8HI])
630 (define_mode_iterator VI14_128 [V16QI V4SI])
631 (define_mode_iterator VI124_128 [V16QI V8HI V4SI])
632 (define_mode_iterator VI24_128 [V8HI V4SI])
633 (define_mode_iterator VI248_128 [V8HI V4SI V2DI])
634 (define_mode_iterator VI48_128 [V4SI V2DI])
635
636 ;; Various 256bit and 512 vector integer mode combinations
637 (define_mode_iterator VI124_256 [V32QI V16HI V8SI])
638 (define_mode_iterator VI124_256_AVX512F_AVX512BW
639 [V32QI V16HI V8SI
640 (V64QI "TARGET_AVX512BW")
641 (V32HI "TARGET_AVX512BW")
642 (V16SI "TARGET_AVX512F")])
643 (define_mode_iterator VI48_256 [V8SI V4DI])
644 (define_mode_iterator VI48_512 [V16SI V8DI])
645 (define_mode_iterator VI4_256_8_512 [V8SI V8DI])
646 (define_mode_iterator VI_AVX512BW
647 [V16SI V8DI (V32HI "TARGET_AVX512BW") (V64QI "TARGET_AVX512BW")])
648
649 ;; Int-float size matches
650 (define_mode_iterator VI4F_128 [V4SI V4SF])
651 (define_mode_iterator VI8F_128 [V2DI V2DF])
652 (define_mode_iterator VI4F_256 [V8SI V8SF])
653 (define_mode_iterator VI8F_256 [V4DI V4DF])
654 (define_mode_iterator VI4F_256_512
655 [V8SI V8SF
656 (V16SI "TARGET_AVX512F") (V16SF "TARGET_AVX512F")])
657 (define_mode_iterator VI48F_256_512
658 [V8SI V8SF
659 (V16SI "TARGET_AVX512F") (V16SF "TARGET_AVX512F")
660 (V8DI "TARGET_AVX512F") (V8DF "TARGET_AVX512F")
661 (V4DI "TARGET_AVX512VL") (V4DF "TARGET_AVX512VL")])
662 (define_mode_iterator VF48_I1248
663 [V16SI V16SF V8DI V8DF V32HI V64QI])
664 (define_mode_iterator VI48F
665 [V16SI V16SF V8DI V8DF
666 (V8SI "TARGET_AVX512VL") (V8SF "TARGET_AVX512VL")
667 (V4DI "TARGET_AVX512VL") (V4DF "TARGET_AVX512VL")
668 (V4SI "TARGET_AVX512VL") (V4SF "TARGET_AVX512VL")
669 (V2DI "TARGET_AVX512VL") (V2DF "TARGET_AVX512VL")])
670 (define_mode_iterator VI48F_256 [V8SI V8SF V4DI V4DF])
671
672 (define_mode_iterator VF_AVX512
673 [(V4SF "TARGET_AVX512VL") (V2DF "TARGET_AVX512VL")
674 (V8SF "TARGET_AVX512VL") (V4DF "TARGET_AVX512VL")
675 V16SF V8DF])
676
677 (define_mode_attr avx512bcst
678 [(V4SI "%{1to4%}") (V2DI "%{1to2%}")
679 (V8SI "%{1to8%}") (V4DI "%{1to4%}")
680 (V16SI "%{1to16%}") (V8DI "%{1to8%}")
681 (V4SF "%{1to4%}") (V2DF "%{1to2%}")
682 (V8SF "%{1to8%}") (V4DF "%{1to4%}")
683 (V16SF "%{1to16%}") (V8DF "%{1to8%}")])
684
685 ;; Mapping from float mode to required SSE level
686 (define_mode_attr sse
687 [(SF "sse") (DF "sse2")
688 (V4SF "sse") (V2DF "sse2")
689 (V16SF "avx512f") (V8SF "avx")
690 (V8DF "avx512f") (V4DF "avx")])
691
692 (define_mode_attr sse2
693 [(V16QI "sse2") (V32QI "avx") (V64QI "avx512f")
694 (V2DI "sse2") (V4DI "avx") (V8DI "avx512f")])
695
696 (define_mode_attr sse3
697 [(V16QI "sse3") (V32QI "avx")])
698
699 (define_mode_attr sse4_1
700 [(V4SF "sse4_1") (V2DF "sse4_1")
701 (V8SF "avx") (V4DF "avx")
702 (V8DF "avx512f")
703 (V4DI "avx") (V2DI "sse4_1")
704 (V8SI "avx") (V4SI "sse4_1")
705 (V16QI "sse4_1") (V32QI "avx")
706 (V8HI "sse4_1") (V16HI "avx")])
707
708 (define_mode_attr avxsizesuffix
709 [(V64QI "512") (V32HI "512") (V16SI "512") (V8DI "512")
710 (V32QI "256") (V16HI "256") (V8SI "256") (V4DI "256")
711 (V16QI "") (V8HI "") (V4SI "") (V2DI "")
712 (V16SF "512") (V8DF "512")
713 (V8SF "256") (V4DF "256")
714 (V4SF "") (V2DF "")])
715
716 ;; SSE instruction mode
717 (define_mode_attr sseinsnmode
718 [(V64QI "XI") (V32HI "XI") (V16SI "XI") (V8DI "XI") (V4TI "XI")
719 (V32QI "OI") (V16HI "OI") (V8SI "OI") (V4DI "OI") (V2TI "OI")
720 (V16QI "TI") (V8HI "TI") (V4SI "TI") (V2DI "TI") (V1TI "TI")
721 (V16SF "V16SF") (V8DF "V8DF")
722 (V8SF "V8SF") (V4DF "V4DF")
723 (V4SF "V4SF") (V2DF "V2DF")
724 (TI "TI")])
725
726 ;; Mapping of vector modes to corresponding mask size
727 (define_mode_attr avx512fmaskmode
728 [(V64QI "DI") (V32QI "SI") (V16QI "HI")
729 (V32HI "SI") (V16HI "HI") (V8HI "QI") (V4HI "QI")
730 (V16SI "HI") (V8SI "QI") (V4SI "QI")
731 (V8DI "QI") (V4DI "QI") (V2DI "QI")
732 (V16SF "HI") (V8SF "QI") (V4SF "QI")
733 (V8DF "QI") (V4DF "QI") (V2DF "QI")])
734
735 ;; Mapping of vector modes to corresponding mask size
736 (define_mode_attr avx512fmaskmodelower
737 [(V64QI "di") (V32QI "si") (V16QI "hi")
738 (V32HI "si") (V16HI "hi") (V8HI "qi") (V4HI "qi")
739 (V16SI "hi") (V8SI "qi") (V4SI "qi")
740 (V8DI "qi") (V4DI "qi") (V2DI "qi")
741 (V16SF "hi") (V8SF "qi") (V4SF "qi")
742 (V8DF "qi") (V4DF "qi") (V2DF "qi")])
743
744 ;; Mapping of vector modes to corresponding mask half size
745 (define_mode_attr avx512fmaskhalfmode
746 [(V64QI "SI") (V32QI "HI") (V16QI "QI")
747 (V32HI "HI") (V16HI "QI") (V8HI "QI") (V4HI "QI")
748 (V16SI "QI") (V8SI "QI") (V4SI "QI")
749 (V8DI "QI") (V4DI "QI") (V2DI "QI")
750 (V16SF "QI") (V8SF "QI") (V4SF "QI")
751 (V8DF "QI") (V4DF "QI") (V2DF "QI")])
752
753 ;; Mapping of vector float modes to an integer mode of the same size
754 (define_mode_attr sseintvecmode
755 [(V16SF "V16SI") (V8DF "V8DI")
756 (V8SF "V8SI") (V4DF "V4DI")
757 (V4SF "V4SI") (V2DF "V2DI")
758 (V16SI "V16SI") (V8DI "V8DI")
759 (V8SI "V8SI") (V4DI "V4DI")
760 (V4SI "V4SI") (V2DI "V2DI")
761 (V16HI "V16HI") (V8HI "V8HI")
762 (V32HI "V32HI") (V64QI "V64QI")
763 (V32QI "V32QI") (V16QI "V16QI")])
764
765 (define_mode_attr sseintvecmode2
766 [(V8DF "XI") (V4DF "OI") (V2DF "TI")
767 (V8SF "OI") (V4SF "TI")])
768
769 (define_mode_attr sseintvecmodelower
770 [(V16SF "v16si") (V8DF "v8di")
771 (V8SF "v8si") (V4DF "v4di")
772 (V4SF "v4si") (V2DF "v2di")
773 (V8SI "v8si") (V4DI "v4di")
774 (V4SI "v4si") (V2DI "v2di")
775 (V16HI "v16hi") (V8HI "v8hi")
776 (V32QI "v32qi") (V16QI "v16qi")])
777
778 ;; Mapping of vector modes to a vector mode of double size
779 (define_mode_attr ssedoublevecmode
780 [(V32QI "V64QI") (V16HI "V32HI") (V8SI "V16SI") (V4DI "V8DI")
781 (V16QI "V32QI") (V8HI "V16HI") (V4SI "V8SI") (V2DI "V4DI")
782 (V8SF "V16SF") (V4DF "V8DF")
783 (V4SF "V8SF") (V2DF "V4DF")])
784
785 ;; Mapping of vector modes to a vector mode of half size
786 (define_mode_attr ssehalfvecmode
787 [(V64QI "V32QI") (V32HI "V16HI") (V16SI "V8SI") (V8DI "V4DI") (V4TI "V2TI")
788 (V32QI "V16QI") (V16HI "V8HI") (V8SI "V4SI") (V4DI "V2DI")
789 (V16QI "V8QI") (V8HI "V4HI") (V4SI "V2SI")
790 (V16SF "V8SF") (V8DF "V4DF")
791 (V8SF "V4SF") (V4DF "V2DF")
792 (V4SF "V2SF")])
793
794 (define_mode_attr ssehalfvecmodelower
795 [(V64QI "v32qi") (V32HI "v16hi") (V16SI "v8si") (V8DI "v4di") (V4TI "v2ti")
796 (V32QI "v16qi") (V16HI "v8hi") (V8SI "v4si") (V4DI "v2di")
797 (V16QI "v8qi") (V8HI "v4hi") (V4SI "v2si")
798 (V16SF "v8sf") (V8DF "v4df")
799 (V8SF "v4sf") (V4DF "v2df")
800 (V4SF "v2sf")])
801
802 ;; Mapping of vector modes ti packed single mode of the same size
803 (define_mode_attr ssePSmode
804 [(V16SI "V16SF") (V8DF "V16SF")
805 (V16SF "V16SF") (V8DI "V16SF")
806 (V64QI "V16SF") (V32QI "V8SF") (V16QI "V4SF")
807 (V32HI "V16SF") (V16HI "V8SF") (V8HI "V4SF")
808 (V8SI "V8SF") (V4SI "V4SF")
809 (V4DI "V8SF") (V2DI "V4SF")
810 (V4TI "V16SF") (V2TI "V8SF") (V1TI "V4SF")
811 (V8SF "V8SF") (V4SF "V4SF")
812 (V4DF "V8SF") (V2DF "V4SF")])
813
814 (define_mode_attr ssePSmode2
815 [(V8DI "V8SF") (V4DI "V4SF")])
816
817 ;; Mapping of vector modes back to the scalar modes
818 (define_mode_attr ssescalarmode
819 [(V64QI "QI") (V32QI "QI") (V16QI "QI")
820 (V32HI "HI") (V16HI "HI") (V8HI "HI")
821 (V16SI "SI") (V8SI "SI") (V4SI "SI")
822 (V8DI "DI") (V4DI "DI") (V2DI "DI")
823 (V16SF "SF") (V8SF "SF") (V4SF "SF")
824 (V8DF "DF") (V4DF "DF") (V2DF "DF")
825 (V4TI "TI") (V2TI "TI")])
826
827 ;; Mapping of vector modes back to the scalar modes
828 (define_mode_attr ssescalarmodelower
829 [(V64QI "qi") (V32QI "qi") (V16QI "qi")
830 (V32HI "hi") (V16HI "hi") (V8HI "hi")
831 (V16SI "si") (V8SI "si") (V4SI "si")
832 (V8DI "di") (V4DI "di") (V2DI "di")
833 (V16SF "sf") (V8SF "sf") (V4SF "sf")
834 (V8DF "df") (V4DF "df") (V2DF "df")
835 (V4TI "ti") (V2TI "ti")])
836
837 ;; Mapping of vector modes to the 128bit modes
838 (define_mode_attr ssexmmmode
839 [(V64QI "V16QI") (V32QI "V16QI") (V16QI "V16QI")
840 (V32HI "V8HI") (V16HI "V8HI") (V8HI "V8HI")
841 (V16SI "V4SI") (V8SI "V4SI") (V4SI "V4SI")
842 (V8DI "V2DI") (V4DI "V2DI") (V2DI "V2DI")
843 (V16SF "V4SF") (V8SF "V4SF") (V4SF "V4SF")
844 (V8DF "V2DF") (V4DF "V2DF") (V2DF "V2DF")])
845
846 ;; Pointer size override for scalar modes (Intel asm dialect)
847 (define_mode_attr iptr
848 [(V64QI "b") (V32HI "w") (V16SI "k") (V8DI "q")
849 (V32QI "b") (V16HI "w") (V8SI "k") (V4DI "q")
850 (V16QI "b") (V8HI "w") (V4SI "k") (V2DI "q")
851 (V16SF "k") (V8DF "q")
852 (V8SF "k") (V4DF "q")
853 (V4SF "k") (V2DF "q")
854 (SF "k") (DF "q")])
855
856 ;; Mapping of vector modes to VPTERNLOG suffix
857 (define_mode_attr ternlogsuffix
858 [(V8DI "q") (V4DI "q") (V2DI "q")
859 (V16SI "d") (V8SI "d") (V4SI "d")
860 (V32HI "d") (V16HI "d") (V8HI "d")
861 (V64QI "d") (V32QI "d") (V16QI "d")])
862
863 ;; Number of scalar elements in each vector type
864 (define_mode_attr ssescalarnum
865 [(V64QI "64") (V16SI "16") (V8DI "8")
866 (V32QI "32") (V16HI "16") (V8SI "8") (V4DI "4")
867 (V16QI "16") (V8HI "8") (V4SI "4") (V2DI "2")
868 (V16SF "16") (V8DF "8")
869 (V8SF "8") (V4DF "4")
870 (V4SF "4") (V2DF "2")])
871
872 ;; Mask of scalar elements in each vector type
873 (define_mode_attr ssescalarnummask
874 [(V32QI "31") (V16HI "15") (V8SI "7") (V4DI "3")
875 (V16QI "15") (V8HI "7") (V4SI "3") (V2DI "1")
876 (V8SF "7") (V4DF "3")
877 (V4SF "3") (V2DF "1")])
878
879 (define_mode_attr ssescalarsize
880 [(V4TI "64") (V2TI "64") (V1TI "64")
881 (V8DI "64") (V4DI "64") (V2DI "64")
882 (V64QI "8") (V32QI "8") (V16QI "8")
883 (V32HI "16") (V16HI "16") (V8HI "16")
884 (V16SI "32") (V8SI "32") (V4SI "32")
885 (V16SF "32") (V8SF "32") (V4SF "32")
886 (V8DF "64") (V4DF "64") (V2DF "64")])
887
888 ;; SSE prefix for integer vector modes
889 (define_mode_attr sseintprefix
890 [(V2DI "p") (V2DF "")
891 (V4DI "p") (V4DF "")
892 (V8DI "p") (V8DF "")
893 (V4SI "p") (V4SF "")
894 (V8SI "p") (V8SF "")
895 (V16SI "p") (V16SF "")
896 (V16QI "p") (V8HI "p")
897 (V32QI "p") (V16HI "p")
898 (V64QI "p") (V32HI "p")])
899
900 ;; SSE scalar suffix for vector modes
901 (define_mode_attr ssescalarmodesuffix
902 [(SF "ss") (DF "sd")
903 (V16SF "ss") (V8DF "sd")
904 (V8SF "ss") (V4DF "sd")
905 (V4SF "ss") (V2DF "sd")
906 (V16SI "d") (V8DI "q")
907 (V8SI "d") (V4DI "q")
908 (V4SI "d") (V2DI "q")])
909
910 ;; Pack/unpack vector modes
911 (define_mode_attr sseunpackmode
912 [(V16QI "V8HI") (V8HI "V4SI") (V4SI "V2DI")
913 (V32QI "V16HI") (V16HI "V8SI") (V8SI "V4DI")
914 (V32HI "V16SI") (V64QI "V32HI") (V16SI "V8DI")])
915
916 (define_mode_attr ssepackmode
917 [(V8HI "V16QI") (V4SI "V8HI") (V2DI "V4SI")
918 (V16HI "V32QI") (V8SI "V16HI") (V4DI "V8SI")
919 (V32HI "V64QI") (V16SI "V32HI") (V8DI "V16SI")])
920
921 ;; Mapping of the max integer size for xop rotate immediate constraint
922 (define_mode_attr sserotatemax
923 [(V16QI "7") (V8HI "15") (V4SI "31") (V2DI "63")])
924
925 ;; Mapping of mode to cast intrinsic name
926 (define_mode_attr castmode
927 [(V8SI "si") (V8SF "ps") (V4DF "pd")
928 (V16SI "si") (V16SF "ps") (V8DF "pd")])
929
930 ;; Instruction suffix for sign and zero extensions.
931 (define_code_attr extsuffix [(sign_extend "sx") (zero_extend "zx")])
932
933 ;; i128 for integer vectors and TARGET_AVX2, f128 otherwise.
934 ;; i64x4 or f64x4 for 512bit modes.
935 (define_mode_attr i128
936 [(V16SF "f64x4") (V8SF "f128") (V8DF "f64x4") (V4DF "f128")
937 (V64QI "i64x4") (V32QI "%~128") (V32HI "i64x4") (V16HI "%~128")
938 (V16SI "i64x4") (V8SI "%~128") (V8DI "i64x4") (V4DI "%~128")])
939
940 ;; For 256-bit modes for TARGET_AVX512VL && TARGET_AVX512DQ
941 ;; i32x4, f32x4, i64x2 or f64x2 suffixes.
942 (define_mode_attr i128vldq
943 [(V8SF "f32x4") (V4DF "f64x2")
944 (V32QI "i32x4") (V16HI "i32x4") (V8SI "i32x4") (V4DI "i64x2")])
945
946 ;; Mix-n-match
947 (define_mode_iterator AVX256MODE2P [V8SI V8SF V4DF])
948 (define_mode_iterator AVX512MODE2P [V16SI V16SF V8DF])
949
950 ;; Mapping for dbpsabbw modes
951 (define_mode_attr dbpsadbwmode
952 [(V32HI "V64QI") (V16HI "V32QI") (V8HI "V16QI")])
953
954 ;; Mapping suffixes for broadcast
955 (define_mode_attr bcstscalarsuff
956 [(V64QI "b") (V32QI "b") (V16QI "b")
957 (V32HI "w") (V16HI "w") (V8HI "w")
958 (V16SI "d") (V8SI "d") (V4SI "d")
959 (V8DI "q") (V4DI "q") (V2DI "q")
960 (V16SF "ss") (V8SF "ss") (V4SF "ss")
961 (V8DF "sd") (V4DF "sd") (V2DF "sd")])
962
963 ;; Tie mode of assembler operand to mode iterator
964 (define_mode_attr xtg_mode
965 [(V16QI "x") (V8HI "x") (V4SI "x") (V2DI "x") (V4SF "x") (V2DF "x")
966 (V32QI "t") (V16HI "t") (V8SI "t") (V4DI "t") (V8SF "t") (V4DF "t")
967 (V64QI "g") (V32HI "g") (V16SI "g") (V8DI "g") (V16SF "g") (V8DF "g")])
968
969 ;; Half mask mode for unpacks
970 (define_mode_attr HALFMASKMODE
971 [(DI "SI") (SI "HI")])
972
973 ;; Double mask mode for packs
974 (define_mode_attr DOUBLEMASKMODE
975 [(HI "SI") (SI "DI")])
976
977
978 ;; Include define_subst patterns for instructions with mask
979 (include "subst.md")
980
981 ;; Patterns whose name begins with "sse{,2,3}_" are invoked by intrinsics.
982
983 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
984 ;;
985 ;; Move patterns
986 ;;
987 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
988
989 ;; All of these patterns are enabled for SSE1 as well as SSE2.
990 ;; This is essential for maintaining stable calling conventions.
991
992 (define_expand "mov<mode>"
993 [(set (match_operand:VMOVE 0 "nonimmediate_operand")
994 (match_operand:VMOVE 1 "nonimmediate_operand"))]
995 "TARGET_SSE"
996 {
997 ix86_expand_vector_move (<MODE>mode, operands);
998 DONE;
999 })
1000
1001 (define_insn "mov<mode>_internal"
1002 [(set (match_operand:VMOVE 0 "nonimmediate_operand"
1003 "=v,v ,v ,m")
1004 (match_operand:VMOVE 1 "nonimmediate_or_sse_const_operand"
1005 " C,BC,vm,v"))]
1006 "TARGET_SSE
1007 && (register_operand (operands[0], <MODE>mode)
1008 || register_operand (operands[1], <MODE>mode))"
1009 {
1010 switch (get_attr_type (insn))
1011 {
1012 case TYPE_SSELOG1:
1013 return standard_sse_constant_opcode (insn, operands);
1014
1015 case TYPE_SSEMOV:
1016 return ix86_output_ssemov (insn, operands);
1017
1018 default:
1019 gcc_unreachable ();
1020 }
1021 }
1022 [(set_attr "type" "sselog1,sselog1,ssemov,ssemov")
1023 (set_attr "prefix" "maybe_vex")
1024 (set (attr "mode")
1025 (cond [(match_test "TARGET_AVX")
1026 (const_string "<sseinsnmode>")
1027 (ior (not (match_test "TARGET_SSE2"))
1028 (match_test "optimize_function_for_size_p (cfun)"))
1029 (const_string "V4SF")
1030 (and (match_test "<MODE>mode == V2DFmode")
1031 (match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL"))
1032 (const_string "V4SF")
1033 (and (eq_attr "alternative" "3")
1034 (match_test "TARGET_SSE_TYPELESS_STORES"))
1035 (const_string "V4SF")
1036 (and (eq_attr "alternative" "0")
1037 (match_test "TARGET_SSE_LOAD0_BY_PXOR"))
1038 (const_string "TI")
1039 ]
1040 (const_string "<sseinsnmode>")))
1041 (set (attr "enabled")
1042 (cond [(and (match_test "<MODE_SIZE> == 16")
1043 (eq_attr "alternative" "1"))
1044 (symbol_ref "TARGET_SSE2")
1045 (and (match_test "<MODE_SIZE> == 32")
1046 (eq_attr "alternative" "1"))
1047 (symbol_ref "TARGET_AVX2")
1048 ]
1049 (symbol_ref "true")))])
1050
1051 (define_insn "<avx512>_load<mode>_mask"
1052 [(set (match_operand:V48_AVX512VL 0 "register_operand" "=v,v")
1053 (vec_merge:V48_AVX512VL
1054 (match_operand:V48_AVX512VL 1 "nonimmediate_operand" "v,m")
1055 (match_operand:V48_AVX512VL 2 "nonimm_or_0_operand" "0C,0C")
1056 (match_operand:<avx512fmaskmode> 3 "register_operand" "Yk,Yk")))]
1057 "TARGET_AVX512F"
1058 {
1059 if (FLOAT_MODE_P (GET_MODE_INNER (<MODE>mode)))
1060 {
1061 if (misaligned_operand (operands[1], <MODE>mode))
1062 return "vmovu<ssemodesuffix>\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}";
1063 else
1064 return "vmova<ssemodesuffix>\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}";
1065 }
1066 else
1067 {
1068 if (misaligned_operand (operands[1], <MODE>mode))
1069 return "vmovdqu<ssescalarsize>\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}";
1070 else
1071 return "vmovdqa<ssescalarsize>\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}";
1072 }
1073 }
1074 [(set_attr "type" "ssemov")
1075 (set_attr "prefix" "evex")
1076 (set_attr "memory" "none,load")
1077 (set_attr "mode" "<sseinsnmode>")])
1078
1079 (define_insn "<avx512>_load<mode>_mask"
1080 [(set (match_operand:VI12_AVX512VL 0 "register_operand" "=v,v")
1081 (vec_merge:VI12_AVX512VL
1082 (match_operand:VI12_AVX512VL 1 "nonimmediate_operand" "v,m")
1083 (match_operand:VI12_AVX512VL 2 "nonimm_or_0_operand" "0C,0C")
1084 (match_operand:<avx512fmaskmode> 3 "register_operand" "Yk,Yk")))]
1085 "TARGET_AVX512BW"
1086 "vmovdqu<ssescalarsize>\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}"
1087 [(set_attr "type" "ssemov")
1088 (set_attr "prefix" "evex")
1089 (set_attr "memory" "none,load")
1090 (set_attr "mode" "<sseinsnmode>")])
1091
1092 (define_insn "avx512f_mov<ssescalarmodelower>_mask"
1093 [(set (match_operand:VF_128 0 "register_operand" "=v")
1094 (vec_merge:VF_128
1095 (vec_merge:VF_128
1096 (match_operand:VF_128 2 "register_operand" "v")
1097 (match_operand:VF_128 3 "nonimm_or_0_operand" "0C")
1098 (match_operand:QI 4 "register_operand" "Yk"))
1099 (match_operand:VF_128 1 "register_operand" "v")
1100 (const_int 1)))]
1101 "TARGET_AVX512F"
1102 "vmov<ssescalarmodesuffix>\t{%2, %1, %0%{%4%}%N3|%0%{%4%}%N3, %1, %2}"
1103 [(set_attr "type" "ssemov")
1104 (set_attr "prefix" "evex")
1105 (set_attr "mode" "<ssescalarmode>")])
1106
1107 (define_expand "avx512f_load<mode>_mask"
1108 [(set (match_operand:<ssevecmode> 0 "register_operand")
1109 (vec_merge:<ssevecmode>
1110 (vec_merge:<ssevecmode>
1111 (vec_duplicate:<ssevecmode>
1112 (match_operand:MODEF 1 "memory_operand"))
1113 (match_operand:<ssevecmode> 2 "nonimm_or_0_operand")
1114 (match_operand:QI 3 "register_operand"))
1115 (match_dup 4)
1116 (const_int 1)))]
1117 "TARGET_AVX512F"
1118 "operands[4] = CONST0_RTX (<ssevecmode>mode);")
1119
1120 (define_insn "*avx512f_load<mode>_mask"
1121 [(set (match_operand:<ssevecmode> 0 "register_operand" "=v")
1122 (vec_merge:<ssevecmode>
1123 (vec_merge:<ssevecmode>
1124 (vec_duplicate:<ssevecmode>
1125 (match_operand:MODEF 1 "memory_operand" "m"))
1126 (match_operand:<ssevecmode> 2 "nonimm_or_0_operand" "0C")
1127 (match_operand:QI 3 "register_operand" "Yk"))
1128 (match_operand:<ssevecmode> 4 "const0_operand" "C")
1129 (const_int 1)))]
1130 "TARGET_AVX512F"
1131 "vmov<ssescalarmodesuffix>\t{%1, %0%{%3%}%N2|%0%{3%}%N2, %1}"
1132 [(set_attr "type" "ssemov")
1133 (set_attr "prefix" "evex")
1134 (set_attr "memory" "load")
1135 (set_attr "mode" "<MODE>")])
1136
1137 (define_insn "avx512f_store<mode>_mask"
1138 [(set (match_operand:MODEF 0 "memory_operand" "=m")
1139 (if_then_else:MODEF
1140 (and:QI (match_operand:QI 2 "register_operand" "Yk")
1141 (const_int 1))
1142 (vec_select:MODEF
1143 (match_operand:<ssevecmode> 1 "register_operand" "v")
1144 (parallel [(const_int 0)]))
1145 (match_dup 0)))]
1146 "TARGET_AVX512F"
1147 "vmov<ssescalarmodesuffix>\t{%1, %0%{%2%}|%0%{%2%}, %1}"
1148 [(set_attr "type" "ssemov")
1149 (set_attr "prefix" "evex")
1150 (set_attr "memory" "store")
1151 (set_attr "mode" "<MODE>")])
1152
1153 (define_insn "<avx512>_blendm<mode>"
1154 [(set (match_operand:V48_AVX512VL 0 "register_operand" "=v")
1155 (vec_merge:V48_AVX512VL
1156 (match_operand:V48_AVX512VL 2 "nonimmediate_operand" "vm")
1157 (match_operand:V48_AVX512VL 1 "register_operand" "v")
1158 (match_operand:<avx512fmaskmode> 3 "register_operand" "Yk")))]
1159 "TARGET_AVX512F"
1160 "v<sseintprefix>blendm<ssemodesuffix>\t{%2, %1, %0%{%3%}|%0%{%3%}, %1, %2}"
1161 [(set_attr "type" "ssemov")
1162 (set_attr "prefix" "evex")
1163 (set_attr "mode" "<sseinsnmode>")])
1164
1165 (define_insn "<avx512>_blendm<mode>"
1166 [(set (match_operand:VI12_AVX512VL 0 "register_operand" "=v")
1167 (vec_merge:VI12_AVX512VL
1168 (match_operand:VI12_AVX512VL 2 "nonimmediate_operand" "vm")
1169 (match_operand:VI12_AVX512VL 1 "register_operand" "v")
1170 (match_operand:<avx512fmaskmode> 3 "register_operand" "Yk")))]
1171 "TARGET_AVX512BW"
1172 "vpblendm<ssemodesuffix>\t{%2, %1, %0%{%3%}|%0%{%3%}, %1, %2}"
1173 [(set_attr "type" "ssemov")
1174 (set_attr "prefix" "evex")
1175 (set_attr "mode" "<sseinsnmode>")])
1176
1177 (define_insn "<avx512>_store<mode>_mask"
1178 [(set (match_operand:V48_AVX512VL 0 "memory_operand" "=m")
1179 (vec_merge:V48_AVX512VL
1180 (match_operand:V48_AVX512VL 1 "register_operand" "v")
1181 (match_dup 0)
1182 (match_operand:<avx512fmaskmode> 2 "register_operand" "Yk")))]
1183 "TARGET_AVX512F"
1184 {
1185 if (FLOAT_MODE_P (GET_MODE_INNER (<MODE>mode)))
1186 {
1187 if (misaligned_operand (operands[0], <MODE>mode))
1188 return "vmovu<ssemodesuffix>\t{%1, %0%{%2%}|%0%{%2%}, %1}";
1189 else
1190 return "vmova<ssemodesuffix>\t{%1, %0%{%2%}|%0%{%2%}, %1}";
1191 }
1192 else
1193 {
1194 if (misaligned_operand (operands[0], <MODE>mode))
1195 return "vmovdqu<ssescalarsize>\t{%1, %0%{%2%}|%0%{%2%}, %1}";
1196 else
1197 return "vmovdqa<ssescalarsize>\t{%1, %0%{%2%}|%0%{%2%}, %1}";
1198 }
1199 }
1200 [(set_attr "type" "ssemov")
1201 (set_attr "prefix" "evex")
1202 (set_attr "memory" "store")
1203 (set_attr "mode" "<sseinsnmode>")])
1204
1205 (define_insn "<avx512>_store<mode>_mask"
1206 [(set (match_operand:VI12_AVX512VL 0 "memory_operand" "=m")
1207 (vec_merge:VI12_AVX512VL
1208 (match_operand:VI12_AVX512VL 1 "register_operand" "v")
1209 (match_dup 0)
1210 (match_operand:<avx512fmaskmode> 2 "register_operand" "Yk")))]
1211 "TARGET_AVX512BW"
1212 "vmovdqu<ssescalarsize>\t{%1, %0%{%2%}|%0%{%2%}, %1}"
1213 [(set_attr "type" "ssemov")
1214 (set_attr "prefix" "evex")
1215 (set_attr "memory" "store")
1216 (set_attr "mode" "<sseinsnmode>")])
1217
1218 (define_insn "sse2_movq128"
1219 [(set (match_operand:V2DI 0 "register_operand" "=v")
1220 (vec_concat:V2DI
1221 (vec_select:DI
1222 (match_operand:V2DI 1 "nonimmediate_operand" "vm")
1223 (parallel [(const_int 0)]))
1224 (const_int 0)))]
1225 "TARGET_SSE2"
1226 "%vmovq\t{%1, %0|%0, %q1}"
1227 [(set_attr "type" "ssemov")
1228 (set_attr "prefix" "maybe_vex")
1229 (set_attr "mode" "TI")])
1230
1231 ;; Move a DI from a 32-bit register pair (e.g. %edx:%eax) to an xmm.
1232 ;; We'd rather avoid this entirely; if the 32-bit reg pair was loaded
1233 ;; from memory, we'd prefer to load the memory directly into the %xmm
1234 ;; register. To facilitate this happy circumstance, this pattern won't
1235 ;; split until after register allocation. If the 64-bit value didn't
1236 ;; come from memory, this is the best we can do. This is much better
1237 ;; than storing %edx:%eax into a stack temporary and loading an %xmm
1238 ;; from there.
1239
1240 (define_insn_and_split "movdi_to_sse"
1241 [(set (match_operand:V4SI 0 "register_operand" "=x,x,?x")
1242 (unspec:V4SI [(match_operand:DI 1 "nonimmediate_operand" "r,m,r")]
1243 UNSPEC_MOVDI_TO_SSE))
1244 (clobber (match_scratch:V4SI 2 "=X,X,&x"))]
1245 "!TARGET_64BIT && TARGET_SSE2 && TARGET_INTER_UNIT_MOVES_TO_VEC"
1246 "#"
1247 "&& reload_completed"
1248 [(const_int 0)]
1249 {
1250 if (register_operand (operands[1], DImode))
1251 {
1252 /* The DImode arrived in a pair of integral registers (e.g. %edx:%eax).
1253 Assemble the 64-bit DImode value in an xmm register. */
1254 emit_insn (gen_sse2_loadld (operands[0], CONST0_RTX (V4SImode),
1255 gen_lowpart (SImode, operands[1])));
1256 if (TARGET_SSE4_1)
1257 emit_insn (gen_sse4_1_pinsrd (operands[0], operands[0],
1258 gen_highpart (SImode, operands[1]),
1259 GEN_INT (2)));
1260 else
1261 {
1262 emit_insn (gen_sse2_loadld (operands[2], CONST0_RTX (V4SImode),
1263 gen_highpart (SImode, operands[1])));
1264 emit_insn (gen_vec_interleave_lowv4si (operands[0], operands[0],
1265 operands[2]));
1266 }
1267 }
1268 else if (memory_operand (operands[1], DImode))
1269 emit_insn (gen_vec_concatv2di (gen_lowpart (V2DImode, operands[0]),
1270 operands[1], const0_rtx));
1271 else
1272 gcc_unreachable ();
1273 DONE;
1274 }
1275 [(set_attr "isa" "sse4,*,*")])
1276
1277 (define_split
1278 [(set (match_operand:V4SF 0 "register_operand")
1279 (match_operand:V4SF 1 "zero_extended_scalar_load_operand"))]
1280 "TARGET_SSE && reload_completed"
1281 [(set (match_dup 0)
1282 (vec_merge:V4SF
1283 (vec_duplicate:V4SF (match_dup 1))
1284 (match_dup 2)
1285 (const_int 1)))]
1286 {
1287 operands[1] = gen_lowpart (SFmode, operands[1]);
1288 operands[2] = CONST0_RTX (V4SFmode);
1289 })
1290
1291 (define_split
1292 [(set (match_operand:V2DF 0 "register_operand")
1293 (match_operand:V2DF 1 "zero_extended_scalar_load_operand"))]
1294 "TARGET_SSE2 && reload_completed"
1295 [(set (match_dup 0) (vec_concat:V2DF (match_dup 1) (match_dup 2)))]
1296 {
1297 operands[1] = gen_lowpart (DFmode, operands[1]);
1298 operands[2] = CONST0_RTX (DFmode);
1299 })
1300
1301 (define_expand "movmisalign<mode>"
1302 [(set (match_operand:VMOVE 0 "nonimmediate_operand")
1303 (match_operand:VMOVE 1 "nonimmediate_operand"))]
1304 "TARGET_SSE"
1305 {
1306 ix86_expand_vector_move_misalign (<MODE>mode, operands);
1307 DONE;
1308 })
1309
1310 ;; Merge movsd/movhpd to movupd for TARGET_SSE_UNALIGNED_LOAD_OPTIMAL targets.
1311 (define_peephole2
1312 [(set (match_operand:V2DF 0 "sse_reg_operand")
1313 (vec_concat:V2DF (match_operand:DF 1 "memory_operand")
1314 (match_operand:DF 4 "const0_operand")))
1315 (set (match_operand:V2DF 2 "sse_reg_operand")
1316 (vec_concat:V2DF (vec_select:DF (match_dup 2)
1317 (parallel [(const_int 0)]))
1318 (match_operand:DF 3 "memory_operand")))]
1319 "TARGET_SSE2 && TARGET_SSE_UNALIGNED_LOAD_OPTIMAL
1320 && ix86_operands_ok_for_move_multiple (operands, true, DFmode)"
1321 [(set (match_dup 2) (match_dup 5))]
1322 "operands[5] = adjust_address (operands[1], V2DFmode, 0);")
1323
1324 (define_peephole2
1325 [(set (match_operand:DF 0 "sse_reg_operand")
1326 (match_operand:DF 1 "memory_operand"))
1327 (set (match_operand:V2DF 2 "sse_reg_operand")
1328 (vec_concat:V2DF (match_operand:DF 4 "sse_reg_operand")
1329 (match_operand:DF 3 "memory_operand")))]
1330 "TARGET_SSE2 && TARGET_SSE_UNALIGNED_LOAD_OPTIMAL
1331 && REGNO (operands[4]) == REGNO (operands[2])
1332 && ix86_operands_ok_for_move_multiple (operands, true, DFmode)"
1333 [(set (match_dup 2) (match_dup 5))]
1334 "operands[5] = adjust_address (operands[1], V2DFmode, 0);")
1335
1336 ;; Merge movlpd/movhpd to movupd for TARGET_SSE_UNALIGNED_STORE_OPTIMAL targets.
1337 (define_peephole2
1338 [(set (match_operand:DF 0 "memory_operand")
1339 (vec_select:DF (match_operand:V2DF 1 "sse_reg_operand")
1340 (parallel [(const_int 0)])))
1341 (set (match_operand:DF 2 "memory_operand")
1342 (vec_select:DF (match_operand:V2DF 3 "sse_reg_operand")
1343 (parallel [(const_int 1)])))]
1344 "TARGET_SSE2 && TARGET_SSE_UNALIGNED_STORE_OPTIMAL
1345 && ix86_operands_ok_for_move_multiple (operands, false, DFmode)"
1346 [(set (match_dup 4) (match_dup 1))]
1347 "operands[4] = adjust_address (operands[0], V2DFmode, 0);")
1348
1349 (define_insn "<sse3>_lddqu<avxsizesuffix>"
1350 [(set (match_operand:VI1 0 "register_operand" "=x")
1351 (unspec:VI1 [(match_operand:VI1 1 "memory_operand" "m")]
1352 UNSPEC_LDDQU))]
1353 "TARGET_SSE3"
1354 "%vlddqu\t{%1, %0|%0, %1}"
1355 [(set_attr "type" "ssemov")
1356 (set_attr "movu" "1")
1357 (set (attr "prefix_data16")
1358 (if_then_else
1359 (match_test "TARGET_AVX")
1360 (const_string "*")
1361 (const_string "0")))
1362 (set (attr "prefix_rep")
1363 (if_then_else
1364 (match_test "TARGET_AVX")
1365 (const_string "*")
1366 (const_string "1")))
1367 (set_attr "prefix" "maybe_vex")
1368 (set_attr "mode" "<sseinsnmode>")])
1369
1370 (define_insn "sse2_movnti<mode>"
1371 [(set (match_operand:SWI48 0 "memory_operand" "=m")
1372 (unspec:SWI48 [(match_operand:SWI48 1 "register_operand" "r")]
1373 UNSPEC_MOVNT))]
1374 "TARGET_SSE2"
1375 "movnti\t{%1, %0|%0, %1}"
1376 [(set_attr "type" "ssemov")
1377 (set_attr "prefix_data16" "0")
1378 (set_attr "mode" "<MODE>")])
1379
1380 (define_insn "<sse>_movnt<mode>"
1381 [(set (match_operand:VF 0 "memory_operand" "=m")
1382 (unspec:VF
1383 [(match_operand:VF 1 "register_operand" "v")]
1384 UNSPEC_MOVNT))]
1385 "TARGET_SSE"
1386 "%vmovnt<ssemodesuffix>\t{%1, %0|%0, %1}"
1387 [(set_attr "type" "ssemov")
1388 (set_attr "prefix" "maybe_vex")
1389 (set_attr "mode" "<MODE>")])
1390
1391 (define_insn "<sse2>_movnt<mode>"
1392 [(set (match_operand:VI8 0 "memory_operand" "=m")
1393 (unspec:VI8 [(match_operand:VI8 1 "register_operand" "v")]
1394 UNSPEC_MOVNT))]
1395 "TARGET_SSE2"
1396 "%vmovntdq\t{%1, %0|%0, %1}"
1397 [(set_attr "type" "ssecvt")
1398 (set (attr "prefix_data16")
1399 (if_then_else
1400 (match_test "TARGET_AVX")
1401 (const_string "*")
1402 (const_string "1")))
1403 (set_attr "prefix" "maybe_vex")
1404 (set_attr "mode" "<sseinsnmode>")])
1405
1406 ; Expand patterns for non-temporal stores. At the moment, only those
1407 ; that directly map to insns are defined; it would be possible to
1408 ; define patterns for other modes that would expand to several insns.
1409
1410 ;; Modes handled by storent patterns.
1411 (define_mode_iterator STORENT_MODE
1412 [(DI "TARGET_SSE2 && TARGET_64BIT") (SI "TARGET_SSE2")
1413 (SF "TARGET_SSE4A") (DF "TARGET_SSE4A")
1414 (V8DI "TARGET_AVX512F") (V4DI "TARGET_AVX") (V2DI "TARGET_SSE2")
1415 (V16SF "TARGET_AVX512F") (V8SF "TARGET_AVX") V4SF
1416 (V8DF "TARGET_AVX512F") (V4DF "TARGET_AVX") (V2DF "TARGET_SSE2")])
1417
1418 (define_expand "storent<mode>"
1419 [(set (match_operand:STORENT_MODE 0 "memory_operand")
1420 (unspec:STORENT_MODE
1421 [(match_operand:STORENT_MODE 1 "register_operand")]
1422 UNSPEC_MOVNT))]
1423 "TARGET_SSE")
1424
1425 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1426 ;;
1427 ;; Mask operations
1428 ;;
1429 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1430
1431 ;; All integer modes with AVX512BW/DQ.
1432 (define_mode_iterator SWI1248_AVX512BWDQ
1433 [(QI "TARGET_AVX512DQ") HI (SI "TARGET_AVX512BW") (DI "TARGET_AVX512BW")])
1434
1435 ;; All integer modes with AVX512BW, where HImode operation
1436 ;; can be used instead of QImode.
1437 (define_mode_iterator SWI1248_AVX512BW
1438 [QI HI (SI "TARGET_AVX512BW") (DI "TARGET_AVX512BW")])
1439
1440 ;; All integer modes with AVX512BW/DQ, even HImode requires DQ.
1441 (define_mode_iterator SWI1248_AVX512BWDQ2
1442 [(QI "TARGET_AVX512DQ") (HI "TARGET_AVX512DQ")
1443 (SI "TARGET_AVX512BW") (DI "TARGET_AVX512BW")])
1444
1445 (define_expand "kmov<mskmodesuffix>"
1446 [(set (match_operand:SWI1248_AVX512BWDQ 0 "nonimmediate_operand")
1447 (match_operand:SWI1248_AVX512BWDQ 1 "nonimmediate_operand"))]
1448 "TARGET_AVX512F
1449 && !(MEM_P (operands[0]) && MEM_P (operands[1]))")
1450
1451 (define_insn "k<code><mode>"
1452 [(set (match_operand:SWI1248_AVX512BW 0 "register_operand" "=k")
1453 (any_logic:SWI1248_AVX512BW
1454 (match_operand:SWI1248_AVX512BW 1 "register_operand" "k")
1455 (match_operand:SWI1248_AVX512BW 2 "register_operand" "k")))
1456 (unspec [(const_int 0)] UNSPEC_MASKOP)]
1457 "TARGET_AVX512F"
1458 {
1459 if (get_attr_mode (insn) == MODE_HI)
1460 return "k<logic>w\t{%2, %1, %0|%0, %1, %2}";
1461 else
1462 return "k<logic><mskmodesuffix>\t{%2, %1, %0|%0, %1, %2}";
1463 }
1464 [(set_attr "type" "msklog")
1465 (set_attr "prefix" "vex")
1466 (set (attr "mode")
1467 (cond [(and (match_test "<MODE>mode == QImode")
1468 (not (match_test "TARGET_AVX512DQ")))
1469 (const_string "HI")
1470 ]
1471 (const_string "<MODE>")))])
1472
1473 (define_insn "kandn<mode>"
1474 [(set (match_operand:SWI1248_AVX512BW 0 "register_operand" "=k")
1475 (and:SWI1248_AVX512BW
1476 (not:SWI1248_AVX512BW
1477 (match_operand:SWI1248_AVX512BW 1 "register_operand" "k"))
1478 (match_operand:SWI1248_AVX512BW 2 "register_operand" "k")))
1479 (unspec [(const_int 0)] UNSPEC_MASKOP)]
1480 "TARGET_AVX512F"
1481 {
1482 if (get_attr_mode (insn) == MODE_HI)
1483 return "kandnw\t{%2, %1, %0|%0, %1, %2}";
1484 else
1485 return "kandn<mskmodesuffix>\t{%2, %1, %0|%0, %1, %2}";
1486 }
1487 [(set_attr "type" "msklog")
1488 (set_attr "prefix" "vex")
1489 (set (attr "mode")
1490 (cond [(and (match_test "<MODE>mode == QImode")
1491 (not (match_test "TARGET_AVX512DQ")))
1492 (const_string "HI")
1493 ]
1494 (const_string "<MODE>")))])
1495
1496 (define_insn "kxnor<mode>"
1497 [(set (match_operand:SWI1248_AVX512BW 0 "register_operand" "=k")
1498 (not:SWI1248_AVX512BW
1499 (xor:SWI1248_AVX512BW
1500 (match_operand:SWI1248_AVX512BW 1 "register_operand" "k")
1501 (match_operand:SWI1248_AVX512BW 2 "register_operand" "k"))))
1502 (unspec [(const_int 0)] UNSPEC_MASKOP)]
1503 "TARGET_AVX512F"
1504 {
1505 if (get_attr_mode (insn) == MODE_HI)
1506 return "kxnorw\t{%2, %1, %0|%0, %1, %2}";
1507 else
1508 return "kxnor<mskmodesuffix>\t{%2, %1, %0|%0, %1, %2}";
1509 }
1510 [(set_attr "type" "msklog")
1511 (set_attr "prefix" "vex")
1512 (set (attr "mode")
1513 (cond [(and (match_test "<MODE>mode == QImode")
1514 (not (match_test "TARGET_AVX512DQ")))
1515 (const_string "HI")
1516 ]
1517 (const_string "<MODE>")))])
1518
1519 (define_insn "knot<mode>"
1520 [(set (match_operand:SWI1248_AVX512BW 0 "register_operand" "=k")
1521 (not:SWI1248_AVX512BW
1522 (match_operand:SWI1248_AVX512BW 1 "register_operand" "k")))
1523 (unspec [(const_int 0)] UNSPEC_MASKOP)]
1524 "TARGET_AVX512F"
1525 {
1526 if (get_attr_mode (insn) == MODE_HI)
1527 return "knotw\t{%1, %0|%0, %1}";
1528 else
1529 return "knot<mskmodesuffix>\t{%1, %0|%0, %1}";
1530 }
1531 [(set_attr "type" "msklog")
1532 (set_attr "prefix" "vex")
1533 (set (attr "mode")
1534 (cond [(and (match_test "<MODE>mode == QImode")
1535 (not (match_test "TARGET_AVX512DQ")))
1536 (const_string "HI")
1537 ]
1538 (const_string "<MODE>")))])
1539
1540 (define_insn "kadd<mode>"
1541 [(set (match_operand:SWI1248_AVX512BWDQ2 0 "register_operand" "=k")
1542 (plus:SWI1248_AVX512BWDQ2
1543 (match_operand:SWI1248_AVX512BWDQ2 1 "register_operand" "k")
1544 (match_operand:SWI1248_AVX512BWDQ2 2 "register_operand" "k")))
1545 (unspec [(const_int 0)] UNSPEC_MASKOP)]
1546 "TARGET_AVX512F"
1547 "kadd<mskmodesuffix>\t{%2, %1, %0|%0, %1, %2}"
1548 [(set_attr "type" "msklog")
1549 (set_attr "prefix" "vex")
1550 (set_attr "mode" "<MODE>")])
1551
1552 ;; Mask variant shift mnemonics
1553 (define_code_attr mshift [(ashift "shiftl") (lshiftrt "shiftr")])
1554
1555 (define_insn "k<code><mode>"
1556 [(set (match_operand:SWI1248_AVX512BWDQ 0 "register_operand" "=k")
1557 (any_lshift:SWI1248_AVX512BWDQ
1558 (match_operand:SWI1248_AVX512BWDQ 1 "register_operand" "k")
1559 (match_operand 2 "const_0_to_255_operand" "n")))
1560 (unspec [(const_int 0)] UNSPEC_MASKOP)]
1561 "TARGET_AVX512F"
1562 "k<mshift><mskmodesuffix>\t{%2, %1, %0|%0, %1, %2}"
1563 [(set_attr "type" "msklog")
1564 (set_attr "prefix" "vex")
1565 (set_attr "mode" "<MODE>")])
1566
1567 (define_insn "ktest<mode>"
1568 [(set (reg:CC FLAGS_REG)
1569 (unspec:CC
1570 [(match_operand:SWI1248_AVX512BWDQ2 0 "register_operand" "k")
1571 (match_operand:SWI1248_AVX512BWDQ2 1 "register_operand" "k")]
1572 UNSPEC_KTEST))]
1573 "TARGET_AVX512F"
1574 "ktest<mskmodesuffix>\t{%1, %0|%0, %1}"
1575 [(set_attr "mode" "<MODE>")
1576 (set_attr "type" "msklog")
1577 (set_attr "prefix" "vex")])
1578
1579 (define_insn "kortest<mode>"
1580 [(set (reg:CC FLAGS_REG)
1581 (unspec:CC
1582 [(match_operand:SWI1248_AVX512BWDQ 0 "register_operand" "k")
1583 (match_operand:SWI1248_AVX512BWDQ 1 "register_operand" "k")]
1584 UNSPEC_KORTEST))]
1585 "TARGET_AVX512F"
1586 "kortest<mskmodesuffix>\t{%1, %0|%0, %1}"
1587 [(set_attr "mode" "<MODE>")
1588 (set_attr "type" "msklog")
1589 (set_attr "prefix" "vex")])
1590
1591 (define_insn "kunpckhi"
1592 [(set (match_operand:HI 0 "register_operand" "=k")
1593 (ior:HI
1594 (ashift:HI
1595 (zero_extend:HI (match_operand:QI 1 "register_operand" "k"))
1596 (const_int 8))
1597 (zero_extend:HI (match_operand:QI 2 "register_operand" "k"))))]
1598 "TARGET_AVX512F"
1599 "kunpckbw\t{%2, %1, %0|%0, %1, %2}"
1600 [(set_attr "mode" "HI")
1601 (set_attr "type" "msklog")
1602 (set_attr "prefix" "vex")])
1603
1604 (define_insn "kunpcksi"
1605 [(set (match_operand:SI 0 "register_operand" "=k")
1606 (ior:SI
1607 (ashift:SI
1608 (zero_extend:SI (match_operand:HI 1 "register_operand" "k"))
1609 (const_int 16))
1610 (zero_extend:SI (match_operand:HI 2 "register_operand" "k"))))]
1611 "TARGET_AVX512BW"
1612 "kunpckwd\t{%2, %1, %0|%0, %1, %2}"
1613 [(set_attr "mode" "SI")])
1614
1615 (define_insn "kunpckdi"
1616 [(set (match_operand:DI 0 "register_operand" "=k")
1617 (ior:DI
1618 (ashift:DI
1619 (zero_extend:DI (match_operand:SI 1 "register_operand" "k"))
1620 (const_int 32))
1621 (zero_extend:DI (match_operand:SI 2 "register_operand" "k"))))]
1622 "TARGET_AVX512BW"
1623 "kunpckdq\t{%2, %1, %0|%0, %1, %2}"
1624 [(set_attr "mode" "DI")])
1625
1626
1627 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1628 ;;
1629 ;; Parallel floating point arithmetic
1630 ;;
1631 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1632
1633 (define_expand "<code><mode>2"
1634 [(set (match_operand:VF 0 "register_operand")
1635 (absneg:VF
1636 (match_operand:VF 1 "register_operand")))]
1637 "TARGET_SSE"
1638 "ix86_expand_fp_absneg_operator (<CODE>, <MODE>mode, operands); DONE;")
1639
1640 (define_insn_and_split "*<code><mode>2"
1641 [(set (match_operand:VF 0 "register_operand" "=x,v")
1642 (absneg:VF
1643 (match_operand:VF 1 "vector_operand" "%0,v")))
1644 (use (match_operand:VF 2 "vector_operand" "xBm,vm"))]
1645 "TARGET_SSE"
1646 "#"
1647 "&& reload_completed"
1648 [(set (match_dup 0)
1649 (<absneg_op>:VF (match_dup 1) (match_dup 2)))]
1650 ""
1651 [(set_attr "isa" "noavx,avx")])
1652
1653 (define_insn_and_split "*nabs<mode>2"
1654 [(set (match_operand:VF 0 "register_operand" "=x,v")
1655 (neg:VF
1656 (abs:VF
1657 (match_operand:VF 1 "vector_operand" "%0,v"))))
1658 (use (match_operand:VF 2 "vector_operand" "xBm,vm"))]
1659 "TARGET_SSE"
1660 "#"
1661 "&& reload_completed"
1662 [(set (match_dup 0)
1663 (ior:VF (match_dup 1) (match_dup 2)))]
1664 ""
1665 [(set_attr "isa" "noavx,avx")])
1666
1667 (define_expand "<plusminus_insn><mode>3<mask_name><round_name>"
1668 [(set (match_operand:VF 0 "register_operand")
1669 (plusminus:VF
1670 (match_operand:VF 1 "<round_nimm_predicate>")
1671 (match_operand:VF 2 "<round_nimm_predicate>")))]
1672 "TARGET_SSE && <mask_mode512bit_condition> && <round_mode512bit_condition>"
1673 "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
1674
1675 (define_insn "*<plusminus_insn><mode>3<mask_name><round_name>"
1676 [(set (match_operand:VF 0 "register_operand" "=x,v")
1677 (plusminus:VF
1678 (match_operand:VF 1 "<round_nimm_predicate>" "<comm>0,v")
1679 (match_operand:VF 2 "<round_nimm_predicate>" "xBm,<round_constraint>")))]
1680 "TARGET_SSE && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)
1681 && <mask_mode512bit_condition> && <round_mode512bit_condition>"
1682 "@
1683 <plusminus_mnemonic><ssemodesuffix>\t{%2, %0|%0, %2}
1684 v<plusminus_mnemonic><ssemodesuffix>\t{<round_mask_op3>%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2<round_mask_op3>}"
1685 [(set_attr "isa" "noavx,avx")
1686 (set_attr "type" "sseadd")
1687 (set_attr "prefix" "<mask_prefix3>")
1688 (set_attr "mode" "<MODE>")])
1689
1690 (define_insn "*sub<mode>3<mask_name>_bcst"
1691 [(set (match_operand:VF_AVX512 0 "register_operand" "=v")
1692 (minus:VF_AVX512
1693 (match_operand:VF_AVX512 1 "register_operand" "v")
1694 (vec_duplicate:VF_AVX512
1695 (match_operand:<ssescalarmode> 2 "memory_operand" "m"))))]
1696 "TARGET_AVX512F
1697 && ix86_binary_operator_ok (MINUS, <MODE>mode, operands)
1698 && <mask_mode512bit_condition>"
1699 "vsub<ssemodesuffix>\t{%2<avx512bcst>, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2<avx512bcst>}"
1700 [(set_attr "prefix" "evex")
1701 (set_attr "type" "sseadd")
1702 (set_attr "mode" "<MODE>")])
1703
1704 (define_insn "*add<mode>3<mask_name>_bcst"
1705 [(set (match_operand:VF_AVX512 0 "register_operand" "=v")
1706 (plus:VF_AVX512
1707 (vec_duplicate:VF_AVX512
1708 (match_operand:<ssescalarmode> 1 "memory_operand" "m"))
1709 (match_operand:VF_AVX512 2 "register_operand" "v")))]
1710 "TARGET_AVX512F
1711 && ix86_binary_operator_ok (PLUS, <MODE>mode, operands)
1712 && <mask_mode512bit_condition>"
1713 "vadd<ssemodesuffix>\t{%1<avx512bcst>, %2, %0<mask_operand3>|%0<mask_operand3>, %2, %1<avx512bcst>}"
1714 [(set_attr "prefix" "evex")
1715 (set_attr "type" "sseadd")
1716 (set_attr "mode" "<MODE>")])
1717
1718 ;; Standard scalar operation patterns which preserve the rest of the
1719 ;; vector for combiner.
1720 (define_insn "*<sse>_vm<plusminus_insn><mode>3"
1721 [(set (match_operand:VF_128 0 "register_operand" "=x,v")
1722 (vec_merge:VF_128
1723 (vec_duplicate:VF_128
1724 (plusminus:<ssescalarmode>
1725 (vec_select:<ssescalarmode>
1726 (match_operand:VF_128 1 "register_operand" "0,v")
1727 (parallel [(const_int 0)]))
1728 (match_operand:<ssescalarmode> 2 "nonimmediate_operand" "xm,vm")))
1729 (match_dup 1)
1730 (const_int 1)))]
1731 "TARGET_SSE"
1732 "@
1733 <plusminus_mnemonic><ssescalarmodesuffix>\t{%2, %0|%0, %2}
1734 v<plusminus_mnemonic><ssescalarmodesuffix>\t{%2, %1, %0|%0, %1, %2}"
1735 [(set_attr "isa" "noavx,avx")
1736 (set_attr "type" "sseadd")
1737 (set_attr "prefix" "orig,vex")
1738 (set_attr "mode" "<ssescalarmode>")])
1739
1740 (define_insn "<sse>_vm<plusminus_insn><mode>3<mask_scalar_name><round_scalar_name>"
1741 [(set (match_operand:VF_128 0 "register_operand" "=x,v")
1742 (vec_merge:VF_128
1743 (plusminus:VF_128
1744 (match_operand:VF_128 1 "register_operand" "0,v")
1745 (match_operand:VF_128 2 "nonimmediate_operand" "xm,<round_scalar_constraint>"))
1746 (match_dup 1)
1747 (const_int 1)))]
1748 "TARGET_SSE"
1749 "@
1750 <plusminus_mnemonic><ssescalarmodesuffix>\t{%2, %0|%0, %<iptr>2}
1751 v<plusminus_mnemonic><ssescalarmodesuffix>\t{<round_scalar_mask_op3>%2, %1, %0<mask_scalar_operand3>|%0<mask_scalar_operand3>, %1, %<iptr>2<round_scalar_mask_op3>}"
1752 [(set_attr "isa" "noavx,avx")
1753 (set_attr "type" "sseadd")
1754 (set_attr "prefix" "<round_scalar_prefix>")
1755 (set_attr "mode" "<ssescalarmode>")])
1756
1757 (define_expand "mul<mode>3<mask_name><round_name>"
1758 [(set (match_operand:VF 0 "register_operand")
1759 (mult:VF
1760 (match_operand:VF 1 "<round_nimm_predicate>")
1761 (match_operand:VF 2 "<round_nimm_predicate>")))]
1762 "TARGET_SSE && <mask_mode512bit_condition> && <round_mode512bit_condition>"
1763 "ix86_fixup_binary_operands_no_copy (MULT, <MODE>mode, operands);")
1764
1765 (define_insn "*mul<mode>3<mask_name><round_name>"
1766 [(set (match_operand:VF 0 "register_operand" "=x,v")
1767 (mult:VF
1768 (match_operand:VF 1 "<round_nimm_predicate>" "%0,v")
1769 (match_operand:VF 2 "<round_nimm_predicate>" "xBm,<round_constraint>")))]
1770 "TARGET_SSE
1771 && !(MEM_P (operands[1]) && MEM_P (operands[2]))
1772 && <mask_mode512bit_condition> && <round_mode512bit_condition>"
1773 "@
1774 mul<ssemodesuffix>\t{%2, %0|%0, %2}
1775 vmul<ssemodesuffix>\t{<round_mask_op3>%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2<round_mask_op3>}"
1776 [(set_attr "isa" "noavx,avx")
1777 (set_attr "type" "ssemul")
1778 (set_attr "prefix" "<mask_prefix3>")
1779 (set_attr "btver2_decode" "direct,double")
1780 (set_attr "mode" "<MODE>")])
1781
1782 (define_insn "*mul<mode>3<mask_name>_bcst"
1783 [(set (match_operand:VF_AVX512 0 "register_operand" "=v")
1784 (mult:VF_AVX512
1785 (vec_duplicate:VF_AVX512
1786 (match_operand:<ssescalarmode> 1 "memory_operand" "m"))
1787 (match_operand:VF_AVX512 2 "register_operand" "v")))]
1788 "TARGET_AVX512F && <mask_mode512bit_condition>"
1789 "vmul<ssemodesuffix>\t{%1<avx512bcst>, %2, %0<mask_operand3>|%0<mask_operand3>, %2, %1<<avx512bcst>>}"
1790 [(set_attr "prefix" "evex")
1791 (set_attr "type" "ssemul")
1792 (set_attr "mode" "<MODE>")])
1793
1794 ;; Standard scalar operation patterns which preserve the rest of the
1795 ;; vector for combiner.
1796 (define_insn "*<sse>_vm<multdiv_mnemonic><mode>3"
1797 [(set (match_operand:VF_128 0 "register_operand" "=x,v")
1798 (vec_merge:VF_128
1799 (vec_duplicate:VF_128
1800 (multdiv:<ssescalarmode>
1801 (vec_select:<ssescalarmode>
1802 (match_operand:VF_128 1 "register_operand" "0,v")
1803 (parallel [(const_int 0)]))
1804 (match_operand:<ssescalarmode> 2 "nonimmediate_operand" "xm,vm")))
1805 (match_dup 1)
1806 (const_int 1)))]
1807 "TARGET_SSE"
1808 "@
1809 <multdiv_mnemonic><ssescalarmodesuffix>\t{%2, %0|%0, %2}
1810 v<multdiv_mnemonic><ssescalarmodesuffix>\t{%2, %1, %0|%0, %1, %2}"
1811 [(set_attr "isa" "noavx,avx")
1812 (set_attr "type" "sse<multdiv_mnemonic>")
1813 (set_attr "prefix" "orig,vex")
1814 (set_attr "btver2_decode" "direct,double")
1815 (set_attr "mode" "<ssescalarmode>")])
1816
1817 (define_insn "<sse>_vm<multdiv_mnemonic><mode>3<mask_scalar_name><round_scalar_name>"
1818 [(set (match_operand:VF_128 0 "register_operand" "=x,v")
1819 (vec_merge:VF_128
1820 (multdiv:VF_128
1821 (match_operand:VF_128 1 "register_operand" "0,v")
1822 (match_operand:VF_128 2 "nonimmediate_operand" "xm,<round_scalar_constraint>"))
1823 (match_dup 1)
1824 (const_int 1)))]
1825 "TARGET_SSE"
1826 "@
1827 <multdiv_mnemonic><ssescalarmodesuffix>\t{%2, %0|%0, %<iptr>2}
1828 v<multdiv_mnemonic><ssescalarmodesuffix>\t{<round_scalar_mask_op3>%2, %1, %0<mask_scalar_operand3>|%0<mask_scalar_operand3>, %1, %<iptr>2<round_scalar_mask_op3>}"
1829 [(set_attr "isa" "noavx,avx")
1830 (set_attr "type" "sse<multdiv_mnemonic>")
1831 (set_attr "prefix" "<round_scalar_prefix>")
1832 (set_attr "btver2_decode" "direct,double")
1833 (set_attr "mode" "<ssescalarmode>")])
1834
1835 (define_expand "div<mode>3"
1836 [(set (match_operand:VF2 0 "register_operand")
1837 (div:VF2 (match_operand:VF2 1 "register_operand")
1838 (match_operand:VF2 2 "vector_operand")))]
1839 "TARGET_SSE2"
1840 "ix86_fixup_binary_operands_no_copy (DIV, <MODE>mode, operands);")
1841
1842 (define_expand "div<mode>3"
1843 [(set (match_operand:VF1 0 "register_operand")
1844 (div:VF1 (match_operand:VF1 1 "register_operand")
1845 (match_operand:VF1 2 "vector_operand")))]
1846 "TARGET_SSE"
1847 {
1848 ix86_fixup_binary_operands_no_copy (DIV, <MODE>mode, operands);
1849
1850 if (TARGET_SSE_MATH
1851 && TARGET_RECIP_VEC_DIV
1852 && !optimize_insn_for_size_p ()
1853 && flag_finite_math_only && !flag_trapping_math
1854 && flag_unsafe_math_optimizations)
1855 {
1856 ix86_emit_swdivsf (operands[0], operands[1], operands[2], <MODE>mode);
1857 DONE;
1858 }
1859 })
1860
1861 (define_insn "<sse>_div<mode>3<mask_name><round_name>"
1862 [(set (match_operand:VF 0 "register_operand" "=x,v")
1863 (div:VF
1864 (match_operand:VF 1 "register_operand" "0,v")
1865 (match_operand:VF 2 "<round_nimm_predicate>" "xBm,<round_constraint>")))]
1866 "TARGET_SSE && <mask_mode512bit_condition> && <round_mode512bit_condition>"
1867 "@
1868 div<ssemodesuffix>\t{%2, %0|%0, %2}
1869 vdiv<ssemodesuffix>\t{<round_mask_op3>%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2<round_mask_op3>}"
1870 [(set_attr "isa" "noavx,avx")
1871 (set_attr "type" "ssediv")
1872 (set_attr "prefix" "<mask_prefix3>")
1873 (set_attr "mode" "<MODE>")])
1874
1875 (define_insn "*<avx512>_div<mode>3<mask_name>_bcst"
1876 [(set (match_operand:VF_AVX512 0 "register_operand" "=v")
1877 (div:VF_AVX512
1878 (match_operand:VF_AVX512 1 "register_operand" "v")
1879 (vec_duplicate:VF_AVX512
1880 (match_operand:<ssescalarmode> 2 "memory_operand" "m"))))]
1881 "TARGET_AVX512F && <mask_mode512bit_condition>"
1882 "vdiv<ssemodesuffix>\t{%2<avx512bcst>, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2<<avx512bcst>>}"
1883 [(set_attr "prefix" "evex")
1884 (set_attr "type" "ssediv")
1885 (set_attr "mode" "<MODE>")])
1886
1887 (define_insn "<sse>_rcp<mode>2"
1888 [(set (match_operand:VF1_128_256 0 "register_operand" "=x")
1889 (unspec:VF1_128_256
1890 [(match_operand:VF1_128_256 1 "vector_operand" "xBm")] UNSPEC_RCP))]
1891 "TARGET_SSE"
1892 "%vrcpps\t{%1, %0|%0, %1}"
1893 [(set_attr "type" "sse")
1894 (set_attr "atom_sse_attr" "rcp")
1895 (set_attr "btver2_sse_attr" "rcp")
1896 (set_attr "prefix" "maybe_vex")
1897 (set_attr "mode" "<MODE>")])
1898
1899 (define_insn "sse_vmrcpv4sf2"
1900 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
1901 (vec_merge:V4SF
1902 (unspec:V4SF [(match_operand:V4SF 1 "nonimmediate_operand" "xm,xm")]
1903 UNSPEC_RCP)
1904 (match_operand:V4SF 2 "register_operand" "0,x")
1905 (const_int 1)))]
1906 "TARGET_SSE"
1907 "@
1908 rcpss\t{%1, %0|%0, %k1}
1909 vrcpss\t{%1, %2, %0|%0, %2, %k1}"
1910 [(set_attr "isa" "noavx,avx")
1911 (set_attr "type" "sse")
1912 (set_attr "atom_sse_attr" "rcp")
1913 (set_attr "btver2_sse_attr" "rcp")
1914 (set_attr "prefix" "orig,vex")
1915 (set_attr "mode" "SF")])
1916
1917 (define_insn "*sse_vmrcpv4sf2"
1918 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
1919 (vec_merge:V4SF
1920 (vec_duplicate:V4SF
1921 (unspec:SF [(match_operand:SF 1 "nonimmediate_operand" "xm,xm")]
1922 UNSPEC_RCP))
1923 (match_operand:V4SF 2 "register_operand" "0,x")
1924 (const_int 1)))]
1925 "TARGET_SSE"
1926 "@
1927 rcpss\t{%1, %0|%0, %1}
1928 vrcpss\t{%1, %2, %0|%0, %2, %1}"
1929 [(set_attr "isa" "noavx,avx")
1930 (set_attr "type" "sse")
1931 (set_attr "atom_sse_attr" "rcp")
1932 (set_attr "btver2_sse_attr" "rcp")
1933 (set_attr "prefix" "orig,vex")
1934 (set_attr "mode" "SF")])
1935
1936 (define_insn "<mask_codefor>rcp14<mode><mask_name>"
1937 [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v")
1938 (unspec:VF_AVX512VL
1939 [(match_operand:VF_AVX512VL 1 "nonimmediate_operand" "vm")]
1940 UNSPEC_RCP14))]
1941 "TARGET_AVX512F"
1942 "vrcp14<ssemodesuffix>\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
1943 [(set_attr "type" "sse")
1944 (set_attr "prefix" "evex")
1945 (set_attr "mode" "<MODE>")])
1946
1947 (define_insn "srcp14<mode>"
1948 [(set (match_operand:VF_128 0 "register_operand" "=v")
1949 (vec_merge:VF_128
1950 (unspec:VF_128
1951 [(match_operand:VF_128 1 "nonimmediate_operand" "vm")]
1952 UNSPEC_RCP14)
1953 (match_operand:VF_128 2 "register_operand" "v")
1954 (const_int 1)))]
1955 "TARGET_AVX512F"
1956 "vrcp14<ssescalarmodesuffix>\t{%1, %2, %0|%0, %2, %<iptr>1}"
1957 [(set_attr "type" "sse")
1958 (set_attr "prefix" "evex")
1959 (set_attr "mode" "<MODE>")])
1960
1961 (define_insn "srcp14<mode>_mask"
1962 [(set (match_operand:VF_128 0 "register_operand" "=v")
1963 (vec_merge:VF_128
1964 (vec_merge:VF_128
1965 (unspec:VF_128
1966 [(match_operand:VF_128 1 "nonimmediate_operand" "vm")]
1967 UNSPEC_RCP14)
1968 (match_operand:VF_128 3 "nonimm_or_0_operand" "0C")
1969 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk"))
1970 (match_operand:VF_128 2 "register_operand" "v")
1971 (const_int 1)))]
1972 "TARGET_AVX512F"
1973 "vrcp14<ssescalarmodesuffix>\t{%1, %2, %0%{%4%}%N3|%0%{%4%}%N3, %2, %<iptr>1}"
1974 [(set_attr "type" "sse")
1975 (set_attr "prefix" "evex")
1976 (set_attr "mode" "<MODE>")])
1977
1978 (define_expand "sqrt<mode>2"
1979 [(set (match_operand:VF2 0 "register_operand")
1980 (sqrt:VF2 (match_operand:VF2 1 "vector_operand")))]
1981 "TARGET_SSE2")
1982
1983 (define_expand "sqrt<mode>2"
1984 [(set (match_operand:VF1 0 "register_operand")
1985 (sqrt:VF1 (match_operand:VF1 1 "vector_operand")))]
1986 "TARGET_SSE"
1987 {
1988 if (TARGET_SSE_MATH
1989 && TARGET_RECIP_VEC_SQRT
1990 && !optimize_insn_for_size_p ()
1991 && flag_finite_math_only && !flag_trapping_math
1992 && flag_unsafe_math_optimizations)
1993 {
1994 ix86_emit_swsqrtsf (operands[0], operands[1], <MODE>mode, false);
1995 DONE;
1996 }
1997 })
1998
1999 (define_insn "<sse>_sqrt<mode>2<mask_name><round_name>"
2000 [(set (match_operand:VF 0 "register_operand" "=x,v")
2001 (sqrt:VF (match_operand:VF 1 "<round_nimm_predicate>" "xBm,<round_constraint>")))]
2002 "TARGET_SSE && <mask_mode512bit_condition> && <round_mode512bit_condition>"
2003 "@
2004 sqrt<ssemodesuffix>\t{%1, %0|%0, %1}
2005 vsqrt<ssemodesuffix>\t{<round_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_mask_op2>}"
2006 [(set_attr "isa" "noavx,avx")
2007 (set_attr "type" "sse")
2008 (set_attr "atom_sse_attr" "sqrt")
2009 (set_attr "btver2_sse_attr" "sqrt")
2010 (set_attr "prefix" "maybe_vex")
2011 (set_attr "mode" "<MODE>")])
2012
2013 (define_insn "<sse>_vmsqrt<mode>2<mask_scalar_name><round_scalar_name>"
2014 [(set (match_operand:VF_128 0 "register_operand" "=x,v")
2015 (vec_merge:VF_128
2016 (sqrt:VF_128
2017 (match_operand:VF_128 1 "nonimmediate_operand" "xm,<round_scalar_constraint>"))
2018 (match_operand:VF_128 2 "register_operand" "0,v")
2019 (const_int 1)))]
2020 "TARGET_SSE"
2021 "@
2022 sqrt<ssescalarmodesuffix>\t{%1, %0|%0, %<iptr>1}
2023 vsqrt<ssescalarmodesuffix>\t{<round_scalar_mask_op3>%1, %2, %0<mask_scalar_operand3>|%0<mask_scalar_operand3>, %2, %<iptr>1<round_scalar_mask_op3>}"
2024 [(set_attr "isa" "noavx,avx")
2025 (set_attr "type" "sse")
2026 (set_attr "atom_sse_attr" "sqrt")
2027 (set_attr "prefix" "<round_scalar_prefix>")
2028 (set_attr "btver2_sse_attr" "sqrt")
2029 (set_attr "mode" "<ssescalarmode>")])
2030
2031 (define_insn "*<sse>_vmsqrt<mode>2<mask_scalar_name><round_scalar_name>"
2032 [(set (match_operand:VF_128 0 "register_operand" "=x,v")
2033 (vec_merge:VF_128
2034 (vec_duplicate:VF_128
2035 (sqrt:<ssescalarmode>
2036 (match_operand:<ssescalarmode> 1 "nonimmediate_operand" "xm,<round_scalar_constraint>")))
2037 (match_operand:VF_128 2 "register_operand" "0,v")
2038 (const_int 1)))]
2039 "TARGET_SSE"
2040 "@
2041 sqrt<ssescalarmodesuffix>\t{%1, %0|%0, %1}
2042 vsqrt<ssescalarmodesuffix>\t{<round_scalar_mask_op3>%1, %2, %0<mask_scalar_operand3>|%0<mask_scalar_operand3>, %2, %1<round_scalar_mask_op3>}"
2043 [(set_attr "isa" "noavx,avx")
2044 (set_attr "type" "sse")
2045 (set_attr "atom_sse_attr" "sqrt")
2046 (set_attr "prefix" "<round_scalar_prefix>")
2047 (set_attr "btver2_sse_attr" "sqrt")
2048 (set_attr "mode" "<ssescalarmode>")])
2049
2050 (define_expand "rsqrt<mode>2"
2051 [(set (match_operand:VF1_128_256 0 "register_operand")
2052 (unspec:VF1_128_256
2053 [(match_operand:VF1_128_256 1 "vector_operand")] UNSPEC_RSQRT))]
2054 "TARGET_SSE && TARGET_SSE_MATH"
2055 {
2056 ix86_emit_swsqrtsf (operands[0], operands[1], <MODE>mode, true);
2057 DONE;
2058 })
2059
2060 (define_expand "rsqrtv16sf2"
2061 [(set (match_operand:V16SF 0 "register_operand")
2062 (unspec:V16SF
2063 [(match_operand:V16SF 1 "vector_operand")]
2064 UNSPEC_RSQRT28))]
2065 "TARGET_AVX512ER && TARGET_SSE_MATH"
2066 {
2067 ix86_emit_swsqrtsf (operands[0], operands[1], V16SFmode, true);
2068 DONE;
2069 })
2070
2071 (define_insn "<sse>_rsqrt<mode>2"
2072 [(set (match_operand:VF1_128_256 0 "register_operand" "=x")
2073 (unspec:VF1_128_256
2074 [(match_operand:VF1_128_256 1 "vector_operand" "xBm")] UNSPEC_RSQRT))]
2075 "TARGET_SSE"
2076 "%vrsqrtps\t{%1, %0|%0, %1}"
2077 [(set_attr "type" "sse")
2078 (set_attr "prefix" "maybe_vex")
2079 (set_attr "mode" "<MODE>")])
2080
2081 (define_insn "<mask_codefor>rsqrt14<mode><mask_name>"
2082 [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v")
2083 (unspec:VF_AVX512VL
2084 [(match_operand:VF_AVX512VL 1 "nonimmediate_operand" "vm")]
2085 UNSPEC_RSQRT14))]
2086 "TARGET_AVX512F"
2087 "vrsqrt14<ssemodesuffix>\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
2088 [(set_attr "type" "sse")
2089 (set_attr "prefix" "evex")
2090 (set_attr "mode" "<MODE>")])
2091
2092 (define_insn "rsqrt14<mode>"
2093 [(set (match_operand:VF_128 0 "register_operand" "=v")
2094 (vec_merge:VF_128
2095 (unspec:VF_128
2096 [(match_operand:VF_128 1 "nonimmediate_operand" "vm")]
2097 UNSPEC_RSQRT14)
2098 (match_operand:VF_128 2 "register_operand" "v")
2099 (const_int 1)))]
2100 "TARGET_AVX512F"
2101 "vrsqrt14<ssescalarmodesuffix>\t{%1, %2, %0|%0, %2, %<iptr>1}"
2102 [(set_attr "type" "sse")
2103 (set_attr "prefix" "evex")
2104 (set_attr "mode" "<MODE>")])
2105
2106 (define_insn "rsqrt14_<mode>_mask"
2107 [(set (match_operand:VF_128 0 "register_operand" "=v")
2108 (vec_merge:VF_128
2109 (vec_merge:VF_128
2110 (unspec:VF_128
2111 [(match_operand:VF_128 1 "nonimmediate_operand" "vm")]
2112 UNSPEC_RSQRT14)
2113 (match_operand:VF_128 3 "nonimm_or_0_operand" "0C")
2114 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk"))
2115 (match_operand:VF_128 2 "register_operand" "v")
2116 (const_int 1)))]
2117 "TARGET_AVX512F"
2118 "vrsqrt14<ssescalarmodesuffix>\t{%1, %2, %0%{%4%}%N3|%0%{%4%}%N3, %2, %<iptr>1}"
2119 [(set_attr "type" "sse")
2120 (set_attr "prefix" "evex")
2121 (set_attr "mode" "<MODE>")])
2122
2123 (define_insn "sse_vmrsqrtv4sf2"
2124 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
2125 (vec_merge:V4SF
2126 (unspec:V4SF [(match_operand:V4SF 1 "nonimmediate_operand" "xm,xm")]
2127 UNSPEC_RSQRT)
2128 (match_operand:V4SF 2 "register_operand" "0,x")
2129 (const_int 1)))]
2130 "TARGET_SSE"
2131 "@
2132 rsqrtss\t{%1, %0|%0, %k1}
2133 vrsqrtss\t{%1, %2, %0|%0, %2, %k1}"
2134 [(set_attr "isa" "noavx,avx")
2135 (set_attr "type" "sse")
2136 (set_attr "prefix" "orig,vex")
2137 (set_attr "mode" "SF")])
2138
2139 (define_insn "*sse_vmrsqrtv4sf2"
2140 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
2141 (vec_merge:V4SF
2142 (vec_duplicate:V4SF
2143 (unspec:SF [(match_operand:SF 1 "nonimmediate_operand" "xm,xm")]
2144 UNSPEC_RSQRT))
2145 (match_operand:V4SF 2 "register_operand" "0,x")
2146 (const_int 1)))]
2147 "TARGET_SSE"
2148 "@
2149 rsqrtss\t{%1, %0|%0, %1}
2150 vrsqrtss\t{%1, %2, %0|%0, %2, %1}"
2151 [(set_attr "isa" "noavx,avx")
2152 (set_attr "type" "sse")
2153 (set_attr "prefix" "orig,vex")
2154 (set_attr "mode" "SF")])
2155
2156 (define_expand "<code><mode>3<mask_name><round_saeonly_name>"
2157 [(set (match_operand:VF 0 "register_operand")
2158 (smaxmin:VF
2159 (match_operand:VF 1 "<round_saeonly_nimm_predicate>")
2160 (match_operand:VF 2 "<round_saeonly_nimm_predicate>")))]
2161 "TARGET_SSE && <mask_mode512bit_condition> && <round_saeonly_mode512bit_condition>"
2162 {
2163 if (!flag_finite_math_only || flag_signed_zeros)
2164 {
2165 operands[1] = force_reg (<MODE>mode, operands[1]);
2166 emit_insn (gen_ieee_<maxmin_float><mode>3<mask_name><round_saeonly_name>
2167 (operands[0], operands[1], operands[2]
2168 <mask_operand_arg34>
2169 <round_saeonly_mask_arg3>));
2170 DONE;
2171 }
2172 else
2173 ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);
2174 })
2175
2176 ;; These versions of the min/max patterns are intentionally ignorant of
2177 ;; their behavior wrt -0.0 and NaN (via the commutative operand mark).
2178 ;; Since both the tree-level MAX_EXPR and the rtl-level SMAX operator
2179 ;; are undefined in this condition, we're certain this is correct.
2180
2181 (define_insn "*<code><mode>3<mask_name><round_saeonly_name>"
2182 [(set (match_operand:VF 0 "register_operand" "=x,v")
2183 (smaxmin:VF
2184 (match_operand:VF 1 "<round_saeonly_nimm_predicate>" "%0,v")
2185 (match_operand:VF 2 "<round_saeonly_nimm_predicate>" "xBm,<round_saeonly_constraint>")))]
2186 "TARGET_SSE
2187 && !(MEM_P (operands[1]) && MEM_P (operands[2]))
2188 && <mask_mode512bit_condition> && <round_saeonly_mode512bit_condition>"
2189 "@
2190 <maxmin_float><ssemodesuffix>\t{%2, %0|%0, %2}
2191 v<maxmin_float><ssemodesuffix>\t{<round_saeonly_mask_op3>%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2<round_saeonly_mask_op3>}"
2192 [(set_attr "isa" "noavx,avx")
2193 (set_attr "type" "sseadd")
2194 (set_attr "btver2_sse_attr" "maxmin")
2195 (set_attr "prefix" "<mask_prefix3>")
2196 (set_attr "mode" "<MODE>")])
2197
2198 ;; These versions of the min/max patterns implement exactly the operations
2199 ;; min = (op1 < op2 ? op1 : op2)
2200 ;; max = (!(op1 < op2) ? op1 : op2)
2201 ;; Their operands are not commutative, and thus they may be used in the
2202 ;; presence of -0.0 and NaN.
2203
2204 (define_insn "ieee_<ieee_maxmin><mode>3<mask_name><round_saeonly_name>"
2205 [(set (match_operand:VF 0 "register_operand" "=x,v")
2206 (unspec:VF
2207 [(match_operand:VF 1 "register_operand" "0,v")
2208 (match_operand:VF 2 "<round_saeonly_nimm_predicate>" "xBm,<round_saeonly_constraint>")]
2209 IEEE_MAXMIN))]
2210 "TARGET_SSE
2211 && <mask_mode512bit_condition> && <round_saeonly_mode512bit_condition>"
2212 "@
2213 <ieee_maxmin><ssemodesuffix>\t{%2, %0|%0, %2}
2214 v<ieee_maxmin><ssemodesuffix>\t{<round_saeonly_mask_op3>%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2<round_saeonly_mask_op3>}"
2215 [(set_attr "isa" "noavx,avx")
2216 (set_attr "type" "sseadd")
2217 (set_attr "btver2_sse_attr" "maxmin")
2218 (set_attr "prefix" "<mask_prefix3>")
2219 (set_attr "mode" "<MODE>")])
2220
2221 ;; Standard scalar operation patterns which preserve the rest of the
2222 ;; vector for combiner.
2223 (define_insn "*ieee_<ieee_maxmin><mode>3"
2224 [(set (match_operand:VF_128 0 "register_operand" "=x,v")
2225 (vec_merge:VF_128
2226 (vec_duplicate:VF_128
2227 (unspec:<ssescalarmode>
2228 [(vec_select:<ssescalarmode>
2229 (match_operand:VF_128 1 "register_operand" "0,v")
2230 (parallel [(const_int 0)]))
2231 (match_operand:<ssescalarmode> 2 "nonimmediate_operand" "xm,vm")]
2232 IEEE_MAXMIN))
2233 (match_dup 1)
2234 (const_int 1)))]
2235 "TARGET_SSE"
2236 "@
2237 <ieee_maxmin><ssescalarmodesuffix>\t{%2, %0|%0, %2}
2238 v<ieee_maxmin><ssescalarmodesuffix>\t{%2, %1, %0|%0, %1, %2}"
2239 [(set_attr "isa" "noavx,avx")
2240 (set_attr "type" "sseadd")
2241 (set_attr "btver2_sse_attr" "maxmin")
2242 (set_attr "prefix" "orig,vex")
2243 (set_attr "mode" "<ssescalarmode>")])
2244
2245 (define_insn "<sse>_vm<code><mode>3<mask_scalar_name><round_saeonly_scalar_name>"
2246 [(set (match_operand:VF_128 0 "register_operand" "=x,v")
2247 (vec_merge:VF_128
2248 (smaxmin:VF_128
2249 (match_operand:VF_128 1 "register_operand" "0,v")
2250 (match_operand:VF_128 2 "nonimmediate_operand" "xm,<round_saeonly_scalar_constraint>"))
2251 (match_dup 1)
2252 (const_int 1)))]
2253 "TARGET_SSE"
2254 "@
2255 <maxmin_float><ssescalarmodesuffix>\t{%2, %0|%0, %<iptr>2}
2256 v<maxmin_float><ssescalarmodesuffix>\t{<round_saeonly_scalar_mask_op3>%2, %1, %0<mask_scalar_operand3>|%0<mask_scalar_operand3>, %1, %<iptr>2<round_saeonly_scalar_mask_op3>}"
2257 [(set_attr "isa" "noavx,avx")
2258 (set_attr "type" "sse")
2259 (set_attr "btver2_sse_attr" "maxmin")
2260 (set_attr "prefix" "<round_saeonly_scalar_prefix>")
2261 (set_attr "mode" "<ssescalarmode>")])
2262
2263 (define_insn "avx_addsubv4df3"
2264 [(set (match_operand:V4DF 0 "register_operand" "=x")
2265 (vec_merge:V4DF
2266 (minus:V4DF
2267 (match_operand:V4DF 1 "register_operand" "x")
2268 (match_operand:V4DF 2 "nonimmediate_operand" "xm"))
2269 (plus:V4DF (match_dup 1) (match_dup 2))
2270 (const_int 5)))]
2271 "TARGET_AVX"
2272 "vaddsubpd\t{%2, %1, %0|%0, %1, %2}"
2273 [(set_attr "type" "sseadd")
2274 (set_attr "prefix" "vex")
2275 (set_attr "mode" "V4DF")])
2276
2277 (define_insn "sse3_addsubv2df3"
2278 [(set (match_operand:V2DF 0 "register_operand" "=x,x")
2279 (vec_merge:V2DF
2280 (minus:V2DF
2281 (match_operand:V2DF 1 "register_operand" "0,x")
2282 (match_operand:V2DF 2 "vector_operand" "xBm,xm"))
2283 (plus:V2DF (match_dup 1) (match_dup 2))
2284 (const_int 1)))]
2285 "TARGET_SSE3"
2286 "@
2287 addsubpd\t{%2, %0|%0, %2}
2288 vaddsubpd\t{%2, %1, %0|%0, %1, %2}"
2289 [(set_attr "isa" "noavx,avx")
2290 (set_attr "type" "sseadd")
2291 (set_attr "atom_unit" "complex")
2292 (set_attr "prefix" "orig,vex")
2293 (set_attr "mode" "V2DF")])
2294
2295 (define_insn "avx_addsubv8sf3"
2296 [(set (match_operand:V8SF 0 "register_operand" "=x")
2297 (vec_merge:V8SF
2298 (minus:V8SF
2299 (match_operand:V8SF 1 "register_operand" "x")
2300 (match_operand:V8SF 2 "nonimmediate_operand" "xm"))
2301 (plus:V8SF (match_dup 1) (match_dup 2))
2302 (const_int 85)))]
2303 "TARGET_AVX"
2304 "vaddsubps\t{%2, %1, %0|%0, %1, %2}"
2305 [(set_attr "type" "sseadd")
2306 (set_attr "prefix" "vex")
2307 (set_attr "mode" "V8SF")])
2308
2309 (define_insn "sse3_addsubv4sf3"
2310 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
2311 (vec_merge:V4SF
2312 (minus:V4SF
2313 (match_operand:V4SF 1 "register_operand" "0,x")
2314 (match_operand:V4SF 2 "vector_operand" "xBm,xm"))
2315 (plus:V4SF (match_dup 1) (match_dup 2))
2316 (const_int 5)))]
2317 "TARGET_SSE3"
2318 "@
2319 addsubps\t{%2, %0|%0, %2}
2320 vaddsubps\t{%2, %1, %0|%0, %1, %2}"
2321 [(set_attr "isa" "noavx,avx")
2322 (set_attr "type" "sseadd")
2323 (set_attr "prefix" "orig,vex")
2324 (set_attr "prefix_rep" "1,*")
2325 (set_attr "mode" "V4SF")])
2326
2327 (define_split
2328 [(set (match_operand:VF_128_256 0 "register_operand")
2329 (match_operator:VF_128_256 6 "addsub_vm_operator"
2330 [(minus:VF_128_256
2331 (match_operand:VF_128_256 1 "register_operand")
2332 (match_operand:VF_128_256 2 "vector_operand"))
2333 (plus:VF_128_256
2334 (match_operand:VF_128_256 3 "vector_operand")
2335 (match_operand:VF_128_256 4 "vector_operand"))
2336 (match_operand 5 "const_int_operand")]))]
2337 "TARGET_SSE3
2338 && can_create_pseudo_p ()
2339 && ((rtx_equal_p (operands[1], operands[3])
2340 && rtx_equal_p (operands[2], operands[4]))
2341 || (rtx_equal_p (operands[1], operands[4])
2342 && rtx_equal_p (operands[2], operands[3])))"
2343 [(set (match_dup 0)
2344 (vec_merge:VF_128_256
2345 (minus:VF_128_256 (match_dup 1) (match_dup 2))
2346 (plus:VF_128_256 (match_dup 1) (match_dup 2))
2347 (match_dup 5)))])
2348
2349 (define_split
2350 [(set (match_operand:VF_128_256 0 "register_operand")
2351 (match_operator:VF_128_256 6 "addsub_vm_operator"
2352 [(plus:VF_128_256
2353 (match_operand:VF_128_256 1 "vector_operand")
2354 (match_operand:VF_128_256 2 "vector_operand"))
2355 (minus:VF_128_256
2356 (match_operand:VF_128_256 3 "register_operand")
2357 (match_operand:VF_128_256 4 "vector_operand"))
2358 (match_operand 5 "const_int_operand")]))]
2359 "TARGET_SSE3
2360 && can_create_pseudo_p ()
2361 && ((rtx_equal_p (operands[1], operands[3])
2362 && rtx_equal_p (operands[2], operands[4]))
2363 || (rtx_equal_p (operands[1], operands[4])
2364 && rtx_equal_p (operands[2], operands[3])))"
2365 [(set (match_dup 0)
2366 (vec_merge:VF_128_256
2367 (minus:VF_128_256 (match_dup 3) (match_dup 4))
2368 (plus:VF_128_256 (match_dup 3) (match_dup 4))
2369 (match_dup 5)))]
2370 {
2371 /* Negate mask bits to compensate for swapped PLUS and MINUS RTXes. */
2372 operands[5]
2373 = GEN_INT (~INTVAL (operands[5])
2374 & ((HOST_WIDE_INT_1U << GET_MODE_NUNITS (<MODE>mode)) - 1));
2375 })
2376
2377 (define_split
2378 [(set (match_operand:VF_128_256 0 "register_operand")
2379 (match_operator:VF_128_256 7 "addsub_vs_operator"
2380 [(vec_concat:<ssedoublemode>
2381 (minus:VF_128_256
2382 (match_operand:VF_128_256 1 "register_operand")
2383 (match_operand:VF_128_256 2 "vector_operand"))
2384 (plus:VF_128_256
2385 (match_operand:VF_128_256 3 "vector_operand")
2386 (match_operand:VF_128_256 4 "vector_operand")))
2387 (match_parallel 5 "addsub_vs_parallel"
2388 [(match_operand 6 "const_int_operand")])]))]
2389 "TARGET_SSE3
2390 && can_create_pseudo_p ()
2391 && ((rtx_equal_p (operands[1], operands[3])
2392 && rtx_equal_p (operands[2], operands[4]))
2393 || (rtx_equal_p (operands[1], operands[4])
2394 && rtx_equal_p (operands[2], operands[3])))"
2395 [(set (match_dup 0)
2396 (vec_merge:VF_128_256
2397 (minus:VF_128_256 (match_dup 1) (match_dup 2))
2398 (plus:VF_128_256 (match_dup 1) (match_dup 2))
2399 (match_dup 5)))]
2400 {
2401 int i, nelt = XVECLEN (operands[5], 0);
2402 HOST_WIDE_INT ival = 0;
2403
2404 for (i = 0; i < nelt; i++)
2405 if (INTVAL (XVECEXP (operands[5], 0, i)) < GET_MODE_NUNITS (<MODE>mode))
2406 ival |= HOST_WIDE_INT_1 << i;
2407
2408 operands[5] = GEN_INT (ival);
2409 })
2410
2411 (define_split
2412 [(set (match_operand:VF_128_256 0 "register_operand")
2413 (match_operator:VF_128_256 7 "addsub_vs_operator"
2414 [(vec_concat:<ssedoublemode>
2415 (plus:VF_128_256
2416 (match_operand:VF_128_256 1 "vector_operand")
2417 (match_operand:VF_128_256 2 "vector_operand"))
2418 (minus:VF_128_256
2419 (match_operand:VF_128_256 3 "register_operand")
2420 (match_operand:VF_128_256 4 "vector_operand")))
2421 (match_parallel 5 "addsub_vs_parallel"
2422 [(match_operand 6 "const_int_operand")])]))]
2423 "TARGET_SSE3
2424 && can_create_pseudo_p ()
2425 && ((rtx_equal_p (operands[1], operands[3])
2426 && rtx_equal_p (operands[2], operands[4]))
2427 || (rtx_equal_p (operands[1], operands[4])
2428 && rtx_equal_p (operands[2], operands[3])))"
2429 [(set (match_dup 0)
2430 (vec_merge:VF_128_256
2431 (minus:VF_128_256 (match_dup 3) (match_dup 4))
2432 (plus:VF_128_256 (match_dup 3) (match_dup 4))
2433 (match_dup 5)))]
2434 {
2435 int i, nelt = XVECLEN (operands[5], 0);
2436 HOST_WIDE_INT ival = 0;
2437
2438 for (i = 0; i < nelt; i++)
2439 if (INTVAL (XVECEXP (operands[5], 0, i)) >= GET_MODE_NUNITS (<MODE>mode))
2440 ival |= HOST_WIDE_INT_1 << i;
2441
2442 operands[5] = GEN_INT (ival);
2443 })
2444
2445 (define_insn "avx_h<plusminus_insn>v4df3"
2446 [(set (match_operand:V4DF 0 "register_operand" "=x")
2447 (vec_concat:V4DF
2448 (vec_concat:V2DF
2449 (plusminus:DF
2450 (vec_select:DF
2451 (match_operand:V4DF 1 "register_operand" "x")
2452 (parallel [(const_int 0)]))
2453 (vec_select:DF (match_dup 1) (parallel [(const_int 1)])))
2454 (plusminus:DF
2455 (vec_select:DF
2456 (match_operand:V4DF 2 "nonimmediate_operand" "xm")
2457 (parallel [(const_int 0)]))
2458 (vec_select:DF (match_dup 2) (parallel [(const_int 1)]))))
2459 (vec_concat:V2DF
2460 (plusminus:DF
2461 (vec_select:DF (match_dup 1) (parallel [(const_int 2)]))
2462 (vec_select:DF (match_dup 1) (parallel [(const_int 3)])))
2463 (plusminus:DF
2464 (vec_select:DF (match_dup 2) (parallel [(const_int 2)]))
2465 (vec_select:DF (match_dup 2) (parallel [(const_int 3)]))))))]
2466 "TARGET_AVX"
2467 "vh<plusminus_mnemonic>pd\t{%2, %1, %0|%0, %1, %2}"
2468 [(set_attr "type" "sseadd")
2469 (set_attr "prefix" "vex")
2470 (set_attr "mode" "V4DF")])
2471
2472 (define_expand "sse3_haddv2df3"
2473 [(set (match_operand:V2DF 0 "register_operand")
2474 (vec_concat:V2DF
2475 (plus:DF
2476 (vec_select:DF
2477 (match_operand:V2DF 1 "register_operand")
2478 (parallel [(const_int 0)]))
2479 (vec_select:DF (match_dup 1) (parallel [(const_int 1)])))
2480 (plus:DF
2481 (vec_select:DF
2482 (match_operand:V2DF 2 "vector_operand")
2483 (parallel [(const_int 0)]))
2484 (vec_select:DF (match_dup 2) (parallel [(const_int 1)])))))]
2485 "TARGET_SSE3")
2486
2487 (define_insn "*sse3_haddv2df3"
2488 [(set (match_operand:V2DF 0 "register_operand" "=x,x")
2489 (vec_concat:V2DF
2490 (plus:DF
2491 (vec_select:DF
2492 (match_operand:V2DF 1 "register_operand" "0,x")
2493 (parallel [(match_operand:SI 3 "const_0_to_1_operand")]))
2494 (vec_select:DF
2495 (match_dup 1)
2496 (parallel [(match_operand:SI 4 "const_0_to_1_operand")])))
2497 (plus:DF
2498 (vec_select:DF
2499 (match_operand:V2DF 2 "vector_operand" "xBm,xm")
2500 (parallel [(match_operand:SI 5 "const_0_to_1_operand")]))
2501 (vec_select:DF
2502 (match_dup 2)
2503 (parallel [(match_operand:SI 6 "const_0_to_1_operand")])))))]
2504 "TARGET_SSE3
2505 && INTVAL (operands[3]) != INTVAL (operands[4])
2506 && INTVAL (operands[5]) != INTVAL (operands[6])"
2507 "@
2508 haddpd\t{%2, %0|%0, %2}
2509 vhaddpd\t{%2, %1, %0|%0, %1, %2}"
2510 [(set_attr "isa" "noavx,avx")
2511 (set_attr "type" "sseadd")
2512 (set_attr "prefix" "orig,vex")
2513 (set_attr "mode" "V2DF")])
2514
2515 (define_insn "sse3_hsubv2df3"
2516 [(set (match_operand:V2DF 0 "register_operand" "=x,x")
2517 (vec_concat:V2DF
2518 (minus:DF
2519 (vec_select:DF
2520 (match_operand:V2DF 1 "register_operand" "0,x")
2521 (parallel [(const_int 0)]))
2522 (vec_select:DF (match_dup 1) (parallel [(const_int 1)])))
2523 (minus:DF
2524 (vec_select:DF
2525 (match_operand:V2DF 2 "vector_operand" "xBm,xm")
2526 (parallel [(const_int 0)]))
2527 (vec_select:DF (match_dup 2) (parallel [(const_int 1)])))))]
2528 "TARGET_SSE3"
2529 "@
2530 hsubpd\t{%2, %0|%0, %2}
2531 vhsubpd\t{%2, %1, %0|%0, %1, %2}"
2532 [(set_attr "isa" "noavx,avx")
2533 (set_attr "type" "sseadd")
2534 (set_attr "prefix" "orig,vex")
2535 (set_attr "mode" "V2DF")])
2536
2537 (define_insn "*sse3_haddv2df3_low"
2538 [(set (match_operand:DF 0 "register_operand" "=x,x")
2539 (plus:DF
2540 (vec_select:DF
2541 (match_operand:V2DF 1 "register_operand" "0,x")
2542 (parallel [(match_operand:SI 2 "const_0_to_1_operand")]))
2543 (vec_select:DF
2544 (match_dup 1)
2545 (parallel [(match_operand:SI 3 "const_0_to_1_operand")]))))]
2546 "TARGET_SSE3
2547 && INTVAL (operands[2]) != INTVAL (operands[3])"
2548 "@
2549 haddpd\t{%0, %0|%0, %0}
2550 vhaddpd\t{%1, %1, %0|%0, %1, %1}"
2551 [(set_attr "isa" "noavx,avx")
2552 (set_attr "type" "sseadd1")
2553 (set_attr "prefix" "orig,vex")
2554 (set_attr "mode" "V2DF")])
2555
2556 (define_insn "*sse3_hsubv2df3_low"
2557 [(set (match_operand:DF 0 "register_operand" "=x,x")
2558 (minus:DF
2559 (vec_select:DF
2560 (match_operand:V2DF 1 "register_operand" "0,x")
2561 (parallel [(const_int 0)]))
2562 (vec_select:DF
2563 (match_dup 1)
2564 (parallel [(const_int 1)]))))]
2565 "TARGET_SSE3"
2566 "@
2567 hsubpd\t{%0, %0|%0, %0}
2568 vhsubpd\t{%1, %1, %0|%0, %1, %1}"
2569 [(set_attr "isa" "noavx,avx")
2570 (set_attr "type" "sseadd1")
2571 (set_attr "prefix" "orig,vex")
2572 (set_attr "mode" "V2DF")])
2573
2574 (define_insn "avx_h<plusminus_insn>v8sf3"
2575 [(set (match_operand:V8SF 0 "register_operand" "=x")
2576 (vec_concat:V8SF
2577 (vec_concat:V4SF
2578 (vec_concat:V2SF
2579 (plusminus:SF
2580 (vec_select:SF
2581 (match_operand:V8SF 1 "register_operand" "x")
2582 (parallel [(const_int 0)]))
2583 (vec_select:SF (match_dup 1) (parallel [(const_int 1)])))
2584 (plusminus:SF
2585 (vec_select:SF (match_dup 1) (parallel [(const_int 2)]))
2586 (vec_select:SF (match_dup 1) (parallel [(const_int 3)]))))
2587 (vec_concat:V2SF
2588 (plusminus:SF
2589 (vec_select:SF
2590 (match_operand:V8SF 2 "nonimmediate_operand" "xm")
2591 (parallel [(const_int 0)]))
2592 (vec_select:SF (match_dup 2) (parallel [(const_int 1)])))
2593 (plusminus:SF
2594 (vec_select:SF (match_dup 2) (parallel [(const_int 2)]))
2595 (vec_select:SF (match_dup 2) (parallel [(const_int 3)])))))
2596 (vec_concat:V4SF
2597 (vec_concat:V2SF
2598 (plusminus:SF
2599 (vec_select:SF (match_dup 1) (parallel [(const_int 4)]))
2600 (vec_select:SF (match_dup 1) (parallel [(const_int 5)])))
2601 (plusminus:SF
2602 (vec_select:SF (match_dup 1) (parallel [(const_int 6)]))
2603 (vec_select:SF (match_dup 1) (parallel [(const_int 7)]))))
2604 (vec_concat:V2SF
2605 (plusminus:SF
2606 (vec_select:SF (match_dup 2) (parallel [(const_int 4)]))
2607 (vec_select:SF (match_dup 2) (parallel [(const_int 5)])))
2608 (plusminus:SF
2609 (vec_select:SF (match_dup 2) (parallel [(const_int 6)]))
2610 (vec_select:SF (match_dup 2) (parallel [(const_int 7)])))))))]
2611 "TARGET_AVX"
2612 "vh<plusminus_mnemonic>ps\t{%2, %1, %0|%0, %1, %2}"
2613 [(set_attr "type" "sseadd")
2614 (set_attr "prefix" "vex")
2615 (set_attr "mode" "V8SF")])
2616
2617 (define_insn "sse3_h<plusminus_insn>v4sf3"
2618 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
2619 (vec_concat:V4SF
2620 (vec_concat:V2SF
2621 (plusminus:SF
2622 (vec_select:SF
2623 (match_operand:V4SF 1 "register_operand" "0,x")
2624 (parallel [(const_int 0)]))
2625 (vec_select:SF (match_dup 1) (parallel [(const_int 1)])))
2626 (plusminus:SF
2627 (vec_select:SF (match_dup 1) (parallel [(const_int 2)]))
2628 (vec_select:SF (match_dup 1) (parallel [(const_int 3)]))))
2629 (vec_concat:V2SF
2630 (plusminus:SF
2631 (vec_select:SF
2632 (match_operand:V4SF 2 "vector_operand" "xBm,xm")
2633 (parallel [(const_int 0)]))
2634 (vec_select:SF (match_dup 2) (parallel [(const_int 1)])))
2635 (plusminus:SF
2636 (vec_select:SF (match_dup 2) (parallel [(const_int 2)]))
2637 (vec_select:SF (match_dup 2) (parallel [(const_int 3)]))))))]
2638 "TARGET_SSE3"
2639 "@
2640 h<plusminus_mnemonic>ps\t{%2, %0|%0, %2}
2641 vh<plusminus_mnemonic>ps\t{%2, %1, %0|%0, %1, %2}"
2642 [(set_attr "isa" "noavx,avx")
2643 (set_attr "type" "sseadd")
2644 (set_attr "atom_unit" "complex")
2645 (set_attr "prefix" "orig,vex")
2646 (set_attr "prefix_rep" "1,*")
2647 (set_attr "mode" "V4SF")])
2648
2649 (define_mode_iterator REDUC_SSE_PLUS_MODE
2650 [(V2DF "TARGET_SSE") (V4SF "TARGET_SSE")])
2651
2652 (define_expand "reduc_plus_scal_<mode>"
2653 [(plus:REDUC_SSE_PLUS_MODE
2654 (match_operand:<ssescalarmode> 0 "register_operand")
2655 (match_operand:REDUC_SSE_PLUS_MODE 1 "register_operand"))]
2656 ""
2657 {
2658 rtx tmp = gen_reg_rtx (<MODE>mode);
2659 ix86_expand_reduc (gen_add<mode>3, tmp, operands[1]);
2660 emit_insn (gen_vec_extract<mode><ssescalarmodelower> (operands[0], tmp,
2661 const0_rtx));
2662 DONE;
2663 })
2664
2665 (define_expand "reduc_plus_scal_v16qi"
2666 [(plus:V16QI
2667 (match_operand:QI 0 "register_operand")
2668 (match_operand:V16QI 1 "register_operand"))]
2669 "TARGET_SSE2"
2670 {
2671 rtx tmp = gen_reg_rtx (V1TImode);
2672 emit_insn (gen_sse2_lshrv1ti3 (tmp, gen_lowpart (V1TImode, operands[1]),
2673 GEN_INT (64)));
2674 rtx tmp2 = gen_reg_rtx (V16QImode);
2675 emit_insn (gen_addv16qi3 (tmp2, operands[1], gen_lowpart (V16QImode, tmp)));
2676 rtx tmp3 = gen_reg_rtx (V16QImode);
2677 emit_move_insn (tmp3, CONST0_RTX (V16QImode));
2678 rtx tmp4 = gen_reg_rtx (V2DImode);
2679 emit_insn (gen_sse2_psadbw (tmp4, tmp2, tmp3));
2680 tmp4 = gen_lowpart (V16QImode, tmp4);
2681 emit_insn (gen_vec_extractv16qiqi (operands[0], tmp4, const0_rtx));
2682 DONE;
2683 })
2684
2685 (define_mode_iterator REDUC_PLUS_MODE
2686 [(V4DF "TARGET_AVX") (V8SF "TARGET_AVX")
2687 (V8DF "TARGET_AVX512F") (V16SF "TARGET_AVX512F")
2688 (V32QI "TARGET_AVX") (V64QI "TARGET_AVX512F")])
2689
2690 (define_expand "reduc_plus_scal_<mode>"
2691 [(plus:REDUC_PLUS_MODE
2692 (match_operand:<ssescalarmode> 0 "register_operand")
2693 (match_operand:REDUC_PLUS_MODE 1 "register_operand"))]
2694 ""
2695 {
2696 rtx tmp = gen_reg_rtx (<ssehalfvecmode>mode);
2697 emit_insn (gen_vec_extract_hi_<mode> (tmp, operands[1]));
2698 rtx tmp2 = gen_reg_rtx (<ssehalfvecmode>mode);
2699 rtx tmp3 = gen_lowpart (<ssehalfvecmode>mode, operands[1]);
2700 emit_insn (gen_add<ssehalfvecmodelower>3 (tmp2, tmp, tmp3));
2701 emit_insn (gen_reduc_plus_scal_<ssehalfvecmodelower> (operands[0], tmp2));
2702 DONE;
2703 })
2704
2705 ;; Modes handled by reduc_sm{in,ax}* patterns.
2706 (define_mode_iterator REDUC_SSE_SMINMAX_MODE
2707 [(V4SF "TARGET_SSE") (V2DF "TARGET_SSE")
2708 (V4SI "TARGET_SSE2") (V8HI "TARGET_SSE2") (V16QI "TARGET_SSE2")
2709 (V2DI "TARGET_SSE4_2")])
2710
2711 (define_expand "reduc_<code>_scal_<mode>"
2712 [(smaxmin:REDUC_SSE_SMINMAX_MODE
2713 (match_operand:<ssescalarmode> 0 "register_operand")
2714 (match_operand:REDUC_SSE_SMINMAX_MODE 1 "register_operand"))]
2715 ""
2716 {
2717 rtx tmp = gen_reg_rtx (<MODE>mode);
2718 ix86_expand_reduc (gen_<code><mode>3, tmp, operands[1]);
2719 emit_insn (gen_vec_extract<mode><ssescalarmodelower> (operands[0], tmp,
2720 const0_rtx));
2721 DONE;
2722 })
2723
2724 (define_mode_iterator REDUC_SMINMAX_MODE
2725 [(V32QI "TARGET_AVX2") (V16HI "TARGET_AVX2")
2726 (V8SI "TARGET_AVX2") (V4DI "TARGET_AVX2")
2727 (V8SF "TARGET_AVX") (V4DF "TARGET_AVX")
2728 (V64QI "TARGET_AVX512BW")
2729 (V32HI "TARGET_AVX512BW") (V16SI "TARGET_AVX512F")
2730 (V8DI "TARGET_AVX512F") (V16SF "TARGET_AVX512F")
2731 (V8DF "TARGET_AVX512F")])
2732
2733 (define_expand "reduc_<code>_scal_<mode>"
2734 [(smaxmin:REDUC_SMINMAX_MODE
2735 (match_operand:<ssescalarmode> 0 "register_operand")
2736 (match_operand:REDUC_SMINMAX_MODE 1 "register_operand"))]
2737 ""
2738 {
2739 rtx tmp = gen_reg_rtx (<ssehalfvecmode>mode);
2740 emit_insn (gen_vec_extract_hi_<mode> (tmp, operands[1]));
2741 rtx tmp2 = gen_reg_rtx (<ssehalfvecmode>mode);
2742 emit_insn (gen_<code><ssehalfvecmodelower>3
2743 (tmp2, tmp, gen_lowpart (<ssehalfvecmode>mode, operands[1])));
2744 emit_insn (gen_reduc_<code>_scal_<ssehalfvecmodelower> (operands[0], tmp2));
2745 DONE;
2746 })
2747
2748 (define_expand "reduc_<code>_scal_<mode>"
2749 [(umaxmin:VI_AVX512BW
2750 (match_operand:<ssescalarmode> 0 "register_operand")
2751 (match_operand:VI_AVX512BW 1 "register_operand"))]
2752 "TARGET_AVX512F"
2753 {
2754 rtx tmp = gen_reg_rtx (<ssehalfvecmode>mode);
2755 emit_insn (gen_vec_extract_hi_<mode> (tmp, operands[1]));
2756 rtx tmp2 = gen_reg_rtx (<ssehalfvecmode>mode);
2757 emit_insn (gen_<code><ssehalfvecmodelower>3
2758 (tmp2, tmp, gen_lowpart (<ssehalfvecmode>mode, operands[1])));
2759 emit_insn (gen_reduc_<code>_scal_<ssehalfvecmodelower> (operands[0], tmp2));
2760 DONE;
2761 })
2762
2763 (define_expand "reduc_<code>_scal_<mode>"
2764 [(umaxmin:VI_256
2765 (match_operand:<ssescalarmode> 0 "register_operand")
2766 (match_operand:VI_256 1 "register_operand"))]
2767 "TARGET_AVX2"
2768 {
2769 rtx tmp = gen_reg_rtx (<ssehalfvecmode>mode);
2770 emit_insn (gen_vec_extract_hi_<mode> (tmp, operands[1]));
2771 rtx tmp2 = gen_reg_rtx (<ssehalfvecmode>mode);
2772 emit_insn (gen_<code><ssehalfvecmodelower>3
2773 (tmp2, tmp, gen_lowpart (<ssehalfvecmode>mode, operands[1])));
2774 rtx tmp3 = gen_reg_rtx (<ssehalfvecmode>mode);
2775 ix86_expand_reduc (gen_<code><ssehalfvecmodelower>3, tmp3, tmp2);
2776 emit_insn (gen_vec_extract<ssehalfvecmodelower><ssescalarmodelower>
2777 (operands[0], tmp3, const0_rtx));
2778 DONE;
2779 })
2780
2781 (define_expand "reduc_umin_scal_v8hi"
2782 [(umin:V8HI
2783 (match_operand:HI 0 "register_operand")
2784 (match_operand:V8HI 1 "register_operand"))]
2785 "TARGET_SSE4_1"
2786 {
2787 rtx tmp = gen_reg_rtx (V8HImode);
2788 ix86_expand_reduc (gen_uminv8hi3, tmp, operands[1]);
2789 emit_insn (gen_vec_extractv8hihi (operands[0], tmp, const0_rtx));
2790 DONE;
2791 })
2792
2793 (define_insn "<mask_codefor>reducep<mode><mask_name>"
2794 [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v")
2795 (unspec:VF_AVX512VL
2796 [(match_operand:VF_AVX512VL 1 "nonimmediate_operand" "vm")
2797 (match_operand:SI 2 "const_0_to_255_operand")]
2798 UNSPEC_REDUCE))]
2799 "TARGET_AVX512DQ"
2800 "vreduce<ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
2801 [(set_attr "type" "sse")
2802 (set_attr "prefix" "evex")
2803 (set_attr "mode" "<MODE>")])
2804
2805 (define_insn "reduces<mode><mask_scalar_name>"
2806 [(set (match_operand:VF_128 0 "register_operand" "=v")
2807 (vec_merge:VF_128
2808 (unspec:VF_128
2809 [(match_operand:VF_128 1 "register_operand" "v")
2810 (match_operand:VF_128 2 "nonimmediate_operand" "vm")
2811 (match_operand:SI 3 "const_0_to_255_operand")]
2812 UNSPEC_REDUCE)
2813 (match_dup 1)
2814 (const_int 1)))]
2815 "TARGET_AVX512DQ"
2816 "vreduce<ssescalarmodesuffix>\t{%3, %2, %1, %0<mask_scalar_operand4>|%0<mask_scalar_operand4>, %1, %<iptr>2, %3}"
2817 [(set_attr "type" "sse")
2818 (set_attr "prefix" "evex")
2819 (set_attr "mode" "<MODE>")])
2820
2821 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2822 ;;
2823 ;; Parallel floating point comparisons
2824 ;;
2825 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2826
2827 (define_insn "avx_cmp<mode>3"
2828 [(set (match_operand:VF_128_256 0 "register_operand" "=x")
2829 (unspec:VF_128_256
2830 [(match_operand:VF_128_256 1 "register_operand" "x")
2831 (match_operand:VF_128_256 2 "nonimmediate_operand" "xm")
2832 (match_operand:SI 3 "const_0_to_31_operand" "n")]
2833 UNSPEC_PCMP))]
2834 "TARGET_AVX"
2835 "vcmp<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
2836 [(set_attr "type" "ssecmp")
2837 (set_attr "length_immediate" "1")
2838 (set_attr "prefix" "vex")
2839 (set_attr "mode" "<MODE>")])
2840
2841 (define_insn "avx_vmcmp<mode>3"
2842 [(set (match_operand:VF_128 0 "register_operand" "=x")
2843 (vec_merge:VF_128
2844 (unspec:VF_128
2845 [(match_operand:VF_128 1 "register_operand" "x")
2846 (match_operand:VF_128 2 "nonimmediate_operand" "xm")
2847 (match_operand:SI 3 "const_0_to_31_operand" "n")]
2848 UNSPEC_PCMP)
2849 (match_dup 1)
2850 (const_int 1)))]
2851 "TARGET_AVX"
2852 "vcmp<ssescalarmodesuffix>\t{%3, %2, %1, %0|%0, %1, %<iptr>2, %3}"
2853 [(set_attr "type" "ssecmp")
2854 (set_attr "length_immediate" "1")
2855 (set_attr "prefix" "vex")
2856 (set_attr "mode" "<ssescalarmode>")])
2857
2858 (define_insn "*<sse>_maskcmp<mode>3_comm"
2859 [(set (match_operand:VF_128_256 0 "register_operand" "=x,x")
2860 (match_operator:VF_128_256 3 "sse_comparison_operator"
2861 [(match_operand:VF_128_256 1 "register_operand" "%0,x")
2862 (match_operand:VF_128_256 2 "vector_operand" "xBm,xm")]))]
2863 "TARGET_SSE
2864 && GET_RTX_CLASS (GET_CODE (operands[3])) == RTX_COMM_COMPARE"
2865 "@
2866 cmp%D3<ssemodesuffix>\t{%2, %0|%0, %2}
2867 vcmp%D3<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
2868 [(set_attr "isa" "noavx,avx")
2869 (set_attr "type" "ssecmp")
2870 (set_attr "length_immediate" "1")
2871 (set_attr "prefix" "orig,vex")
2872 (set_attr "mode" "<MODE>")])
2873
2874 (define_insn "<sse>_maskcmp<mode>3"
2875 [(set (match_operand:VF_128_256 0 "register_operand" "=x,x")
2876 (match_operator:VF_128_256 3 "sse_comparison_operator"
2877 [(match_operand:VF_128_256 1 "register_operand" "0,x")
2878 (match_operand:VF_128_256 2 "vector_operand" "xBm,xm")]))]
2879 "TARGET_SSE"
2880 "@
2881 cmp%D3<ssemodesuffix>\t{%2, %0|%0, %2}
2882 vcmp%D3<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
2883 [(set_attr "isa" "noavx,avx")
2884 (set_attr "type" "ssecmp")
2885 (set_attr "length_immediate" "1")
2886 (set_attr "prefix" "orig,vex")
2887 (set_attr "mode" "<MODE>")])
2888
2889 (define_insn "<sse>_vmmaskcmp<mode>3"
2890 [(set (match_operand:VF_128 0 "register_operand" "=x,x")
2891 (vec_merge:VF_128
2892 (match_operator:VF_128 3 "sse_comparison_operator"
2893 [(match_operand:VF_128 1 "register_operand" "0,x")
2894 (match_operand:VF_128 2 "nonimmediate_operand" "xm,xm")])
2895 (match_dup 1)
2896 (const_int 1)))]
2897 "TARGET_SSE"
2898 "@
2899 cmp%D3<ssescalarmodesuffix>\t{%2, %0|%0, %<iptr>2}
2900 vcmp%D3<ssescalarmodesuffix>\t{%2, %1, %0|%0, %1, %<iptr>2}"
2901 [(set_attr "isa" "noavx,avx")
2902 (set_attr "type" "ssecmp")
2903 (set_attr "length_immediate" "1,*")
2904 (set_attr "prefix" "orig,vex")
2905 (set_attr "mode" "<ssescalarmode>")])
2906
2907 (define_mode_attr cmp_imm_predicate
2908 [(V16SF "const_0_to_31_operand") (V8DF "const_0_to_31_operand")
2909 (V16SI "const_0_to_7_operand") (V8DI "const_0_to_7_operand")
2910 (V8SF "const_0_to_31_operand") (V4DF "const_0_to_31_operand")
2911 (V8SI "const_0_to_7_operand") (V4DI "const_0_to_7_operand")
2912 (V4SF "const_0_to_31_operand") (V2DF "const_0_to_31_operand")
2913 (V4SI "const_0_to_7_operand") (V2DI "const_0_to_7_operand")
2914 (V32HI "const_0_to_7_operand") (V64QI "const_0_to_7_operand")
2915 (V16HI "const_0_to_7_operand") (V32QI "const_0_to_7_operand")
2916 (V8HI "const_0_to_7_operand") (V16QI "const_0_to_7_operand")])
2917
2918 (define_insn "<avx512>_cmp<mode>3<mask_scalar_merge_name><round_saeonly_name>"
2919 [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=k")
2920 (unspec:<avx512fmaskmode>
2921 [(match_operand:V48_AVX512VL 1 "register_operand" "v")
2922 (match_operand:V48_AVX512VL 2 "nonimmediate_operand" "<round_saeonly_constraint>")
2923 (match_operand:SI 3 "<cmp_imm_predicate>" "n")]
2924 UNSPEC_PCMP))]
2925 "TARGET_AVX512F && <round_saeonly_mode512bit_condition>"
2926 "v<sseintprefix>cmp<ssemodesuffix>\t{%3, <round_saeonly_mask_scalar_merge_op4>%2, %1, %0<mask_scalar_merge_operand4>|%0<mask_scalar_merge_operand4>, %1, %2<round_saeonly_mask_scalar_merge_op4>, %3}"
2927 [(set_attr "type" "ssecmp")
2928 (set_attr "length_immediate" "1")
2929 (set_attr "prefix" "evex")
2930 (set_attr "mode" "<sseinsnmode>")])
2931
2932 (define_insn "*<avx512>_cmp<mode>3<mask_scalar_merge_name><round_saeonly_name>"
2933 [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=k")
2934 (match_operator:<avx512fmaskmode> 3 "ix86_comparison_int_operator"
2935 [(match_operand:VI48_AVX512VL 1 "register_operand" "v")
2936 (match_operand:VI48_AVX512VL 2 "nonimmediate_operand" "<round_saeonly_constraint>")]))]
2937 "TARGET_AVX512F && <round_saeonly_mode512bit_condition>"
2938 "vpcmp<ssemodesuffix>\t{%I3, <round_saeonly_mask_scalar_merge_op4>%2, %1, %0<mask_scalar_merge_operand4>|%0<mask_scalar_merge_operand4>, %1, %2<round_saeonly_mask_scalar_merge_op4>, %I3}"
2939 [(set_attr "type" "ssecmp")
2940 (set_attr "length_immediate" "1")
2941 (set_attr "prefix" "evex")
2942 (set_attr "mode" "<sseinsnmode>")])
2943
2944 (define_insn "<avx512>_cmp<mode>3<mask_scalar_merge_name>"
2945 [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=k")
2946 (unspec:<avx512fmaskmode>
2947 [(match_operand:VI12_AVX512VL 1 "register_operand" "v")
2948 (match_operand:VI12_AVX512VL 2 "nonimmediate_operand" "vm")
2949 (match_operand:SI 3 "<cmp_imm_predicate>" "n")]
2950 UNSPEC_PCMP))]
2951 "TARGET_AVX512BW"
2952 "vpcmp<ssemodesuffix>\t{%3, %2, %1, %0<mask_scalar_merge_operand4>|%0<mask_scalar_merge_operand4>, %1, %2, %3}"
2953 [(set_attr "type" "ssecmp")
2954 (set_attr "length_immediate" "1")
2955 (set_attr "prefix" "evex")
2956 (set_attr "mode" "<sseinsnmode>")])
2957
2958 (define_insn "*<avx512>_cmp<mode>3<mask_scalar_merge_name>"
2959 [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=k")
2960 (match_operator:<avx512fmaskmode> 3 "ix86_comparison_int_operator"
2961 [(match_operand:VI12_AVX512VL 1 "register_operand" "v")
2962 (match_operand:VI12_AVX512VL 2 "nonimmediate_operand" "vm")]))]
2963 "TARGET_AVX512BW"
2964 "vpcmp<ssemodesuffix>\t{%I3, %2, %1, %0<mask_scalar_merge_operand4>|%0<mask_scalar_merge_operand4>, %1, %2, %I3}"
2965 [(set_attr "type" "ssecmp")
2966 (set_attr "length_immediate" "1")
2967 (set_attr "prefix" "evex")
2968 (set_attr "mode" "<sseinsnmode>")])
2969
2970 (define_insn "<avx512>_ucmp<mode>3<mask_scalar_merge_name>"
2971 [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=k")
2972 (unspec:<avx512fmaskmode>
2973 [(match_operand:VI12_AVX512VL 1 "register_operand" "v")
2974 (match_operand:VI12_AVX512VL 2 "nonimmediate_operand" "vm")
2975 (match_operand:SI 3 "const_0_to_7_operand" "n")]
2976 UNSPEC_UNSIGNED_PCMP))]
2977 "TARGET_AVX512BW"
2978 "vpcmpu<ssemodesuffix>\t{%3, %2, %1, %0<mask_scalar_merge_operand4>|%0<mask_scalar_merge_operand4>, %1, %2, %3}"
2979 [(set_attr "type" "ssecmp")
2980 (set_attr "length_immediate" "1")
2981 (set_attr "prefix" "evex")
2982 (set_attr "mode" "<sseinsnmode>")])
2983
2984 (define_insn "*<avx512>_ucmp<mode>3<mask_scalar_merge_name>"
2985 [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=k")
2986 (match_operator:<avx512fmaskmode> 3 "ix86_comparison_uns_operator"
2987 [(match_operand:VI12_AVX512VL 1 "register_operand" "v")
2988 (match_operand:VI12_AVX512VL 2 "nonimmediate_operand" "vm")]))]
2989 "TARGET_AVX512BW"
2990 "vpcmpu<ssemodesuffix>\t{%I3, %2, %1, %0<mask_scalar_merge_operand4>|%0<mask_scalar_merge_operand4>, %1, %2, %I3}"
2991 [(set_attr "type" "ssecmp")
2992 (set_attr "length_immediate" "1")
2993 (set_attr "prefix" "evex")
2994 (set_attr "mode" "<sseinsnmode>")])
2995
2996 (define_insn "<avx512>_ucmp<mode>3<mask_scalar_merge_name>"
2997 [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=k")
2998 (unspec:<avx512fmaskmode>
2999 [(match_operand:VI48_AVX512VL 1 "register_operand" "v")
3000 (match_operand:VI48_AVX512VL 2 "nonimmediate_operand" "vm")
3001 (match_operand:SI 3 "const_0_to_7_operand" "n")]
3002 UNSPEC_UNSIGNED_PCMP))]
3003 "TARGET_AVX512F"
3004 "vpcmpu<ssemodesuffix>\t{%3, %2, %1, %0<mask_scalar_merge_operand4>|%0<mask_scalar_merge_operand4>, %1, %2, %3}"
3005 [(set_attr "type" "ssecmp")
3006 (set_attr "length_immediate" "1")
3007 (set_attr "prefix" "evex")
3008 (set_attr "mode" "<sseinsnmode>")])
3009
3010 (define_insn "*<avx512>_ucmp<mode>3<mask_scalar_merge_name>"
3011 [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=k")
3012 (match_operator:<avx512fmaskmode> 3 "ix86_comparison_uns_operator"
3013 [(match_operand:VI48_AVX512VL 1 "register_operand" "v")
3014 (match_operand:VI48_AVX512VL 2 "nonimmediate_operand" "vm")]))]
3015 "TARGET_AVX512F"
3016 "vpcmpu<ssemodesuffix>\t{%I3, %2, %1, %0<mask_scalar_merge_operand4>|%0<mask_scalar_merge_operand4>, %1, %2, %I3}"
3017 [(set_attr "type" "ssecmp")
3018 (set_attr "length_immediate" "1")
3019 (set_attr "prefix" "evex")
3020 (set_attr "mode" "<sseinsnmode>")])
3021
3022 (define_insn "avx512f_vmcmp<mode>3<round_saeonly_name>"
3023 [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=k")
3024 (and:<avx512fmaskmode>
3025 (unspec:<avx512fmaskmode>
3026 [(match_operand:VF_128 1 "register_operand" "v")
3027 (match_operand:VF_128 2 "<round_saeonly_nimm_scalar_predicate>" "<round_saeonly_constraint>")
3028 (match_operand:SI 3 "const_0_to_31_operand" "n")]
3029 UNSPEC_PCMP)
3030 (const_int 1)))]
3031 "TARGET_AVX512F"
3032 "vcmp<ssescalarmodesuffix>\t{%3, <round_saeonly_op4>%2, %1, %0|%0, %1, %<iptr>2<round_saeonly_op4>, %3}"
3033 [(set_attr "type" "ssecmp")
3034 (set_attr "length_immediate" "1")
3035 (set_attr "prefix" "evex")
3036 (set_attr "mode" "<ssescalarmode>")])
3037
3038 (define_insn "avx512f_vmcmp<mode>3_mask<round_saeonly_name>"
3039 [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=k")
3040 (and:<avx512fmaskmode>
3041 (unspec:<avx512fmaskmode>
3042 [(match_operand:VF_128 1 "register_operand" "v")
3043 (match_operand:VF_128 2 "<round_saeonly_nimm_scalar_predicate>" "<round_saeonly_constraint>")
3044 (match_operand:SI 3 "const_0_to_31_operand" "n")]
3045 UNSPEC_PCMP)
3046 (and:<avx512fmaskmode>
3047 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")
3048 (const_int 1))))]
3049 "TARGET_AVX512F"
3050 "vcmp<ssescalarmodesuffix>\t{%3, <round_saeonly_op5>%2, %1, %0%{%4%}|%0%{%4%}, %1, %<iptr>2<round_saeonly_op5>, %3}"
3051 [(set_attr "type" "ssecmp")
3052 (set_attr "length_immediate" "1")
3053 (set_attr "prefix" "evex")
3054 (set_attr "mode" "<ssescalarmode>")])
3055
3056 (define_insn "avx512f_maskcmp<mode>3"
3057 [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=k")
3058 (match_operator:<avx512fmaskmode> 3 "sse_comparison_operator"
3059 [(match_operand:VF_AVX512VL 1 "register_operand" "v")
3060 (match_operand:VF_AVX512VL 2 "nonimmediate_operand" "vm")]))]
3061 "TARGET_AVX512F"
3062 "vcmp%D3<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
3063 [(set_attr "type" "ssecmp")
3064 (set_attr "length_immediate" "1")
3065 (set_attr "prefix" "evex")
3066 (set_attr "mode" "<sseinsnmode>")])
3067
3068 (define_insn "<sse>_<unord>comi<round_saeonly_name>"
3069 [(set (reg:CCFP FLAGS_REG)
3070 (compare:CCFP
3071 (vec_select:MODEF
3072 (match_operand:<ssevecmode> 0 "register_operand" "v")
3073 (parallel [(const_int 0)]))
3074 (vec_select:MODEF
3075 (match_operand:<ssevecmode> 1 "<round_saeonly_nimm_scalar_predicate>" "<round_saeonly_constraint>")
3076 (parallel [(const_int 0)]))))]
3077 "SSE_FLOAT_MODE_P (<MODE>mode)"
3078 "%v<unord>comi<ssemodesuffix>\t{<round_saeonly_op2>%1, %0|%0, %<iptr>1<round_saeonly_op2>}"
3079 [(set_attr "type" "ssecomi")
3080 (set_attr "prefix" "maybe_vex")
3081 (set_attr "prefix_rep" "0")
3082 (set (attr "prefix_data16")
3083 (if_then_else (eq_attr "mode" "DF")
3084 (const_string "1")
3085 (const_string "0")))
3086 (set_attr "mode" "<MODE>")])
3087
3088 (define_expand "vec_cmp<mode><avx512fmaskmodelower>"
3089 [(set (match_operand:<avx512fmaskmode> 0 "register_operand")
3090 (match_operator:<avx512fmaskmode> 1 ""
3091 [(match_operand:V48_AVX512VL 2 "register_operand")
3092 (match_operand:V48_AVX512VL 3 "nonimmediate_operand")]))]
3093 "TARGET_AVX512F"
3094 {
3095 bool ok = ix86_expand_mask_vec_cmp (operands);
3096 gcc_assert (ok);
3097 DONE;
3098 })
3099
3100 (define_expand "vec_cmp<mode><avx512fmaskmodelower>"
3101 [(set (match_operand:<avx512fmaskmode> 0 "register_operand")
3102 (match_operator:<avx512fmaskmode> 1 ""
3103 [(match_operand:VI12_AVX512VL 2 "register_operand")
3104 (match_operand:VI12_AVX512VL 3 "nonimmediate_operand")]))]
3105 "TARGET_AVX512BW"
3106 {
3107 bool ok = ix86_expand_mask_vec_cmp (operands);
3108 gcc_assert (ok);
3109 DONE;
3110 })
3111
3112 (define_expand "vec_cmp<mode><sseintvecmodelower>"
3113 [(set (match_operand:<sseintvecmode> 0 "register_operand")
3114 (match_operator:<sseintvecmode> 1 ""
3115 [(match_operand:VI_256 2 "register_operand")
3116 (match_operand:VI_256 3 "nonimmediate_operand")]))]
3117 "TARGET_AVX2"
3118 {
3119 bool ok = ix86_expand_int_vec_cmp (operands);
3120 gcc_assert (ok);
3121 DONE;
3122 })
3123
3124 (define_expand "vec_cmp<mode><sseintvecmodelower>"
3125 [(set (match_operand:<sseintvecmode> 0 "register_operand")
3126 (match_operator:<sseintvecmode> 1 ""
3127 [(match_operand:VI124_128 2 "register_operand")
3128 (match_operand:VI124_128 3 "vector_operand")]))]
3129 "TARGET_SSE2"
3130 {
3131 bool ok = ix86_expand_int_vec_cmp (operands);
3132 gcc_assert (ok);
3133 DONE;
3134 })
3135
3136 (define_expand "vec_cmpv2div2di"
3137 [(set (match_operand:V2DI 0 "register_operand")
3138 (match_operator:V2DI 1 ""
3139 [(match_operand:V2DI 2 "register_operand")
3140 (match_operand:V2DI 3 "vector_operand")]))]
3141 "TARGET_SSE4_2"
3142 {
3143 bool ok = ix86_expand_int_vec_cmp (operands);
3144 gcc_assert (ok);
3145 DONE;
3146 })
3147
3148 (define_expand "vec_cmp<mode><sseintvecmodelower>"
3149 [(set (match_operand:<sseintvecmode> 0 "register_operand")
3150 (match_operator:<sseintvecmode> 1 ""
3151 [(match_operand:VF_256 2 "register_operand")
3152 (match_operand:VF_256 3 "nonimmediate_operand")]))]
3153 "TARGET_AVX"
3154 {
3155 bool ok = ix86_expand_fp_vec_cmp (operands);
3156 gcc_assert (ok);
3157 DONE;
3158 })
3159
3160 (define_expand "vec_cmp<mode><sseintvecmodelower>"
3161 [(set (match_operand:<sseintvecmode> 0 "register_operand")
3162 (match_operator:<sseintvecmode> 1 ""
3163 [(match_operand:VF_128 2 "register_operand")
3164 (match_operand:VF_128 3 "vector_operand")]))]
3165 "TARGET_SSE"
3166 {
3167 bool ok = ix86_expand_fp_vec_cmp (operands);
3168 gcc_assert (ok);
3169 DONE;
3170 })
3171
3172 (define_expand "vec_cmpu<mode><avx512fmaskmodelower>"
3173 [(set (match_operand:<avx512fmaskmode> 0 "register_operand")
3174 (match_operator:<avx512fmaskmode> 1 ""
3175 [(match_operand:VI48_AVX512VL 2 "register_operand")
3176 (match_operand:VI48_AVX512VL 3 "nonimmediate_operand")]))]
3177 "TARGET_AVX512F"
3178 {
3179 bool ok = ix86_expand_mask_vec_cmp (operands);
3180 gcc_assert (ok);
3181 DONE;
3182 })
3183
3184 (define_expand "vec_cmpu<mode><avx512fmaskmodelower>"
3185 [(set (match_operand:<avx512fmaskmode> 0 "register_operand")
3186 (match_operator:<avx512fmaskmode> 1 ""
3187 [(match_operand:VI12_AVX512VL 2 "register_operand")
3188 (match_operand:VI12_AVX512VL 3 "nonimmediate_operand")]))]
3189 "TARGET_AVX512BW"
3190 {
3191 bool ok = ix86_expand_mask_vec_cmp (operands);
3192 gcc_assert (ok);
3193 DONE;
3194 })
3195
3196 (define_expand "vec_cmpu<mode><sseintvecmodelower>"
3197 [(set (match_operand:<sseintvecmode> 0 "register_operand")
3198 (match_operator:<sseintvecmode> 1 ""
3199 [(match_operand:VI_256 2 "register_operand")
3200 (match_operand:VI_256 3 "nonimmediate_operand")]))]
3201 "TARGET_AVX2"
3202 {
3203 bool ok = ix86_expand_int_vec_cmp (operands);
3204 gcc_assert (ok);
3205 DONE;
3206 })
3207
3208 (define_expand "vec_cmpu<mode><sseintvecmodelower>"
3209 [(set (match_operand:<sseintvecmode> 0 "register_operand")
3210 (match_operator:<sseintvecmode> 1 ""
3211 [(match_operand:VI124_128 2 "register_operand")
3212 (match_operand:VI124_128 3 "vector_operand")]))]
3213 "TARGET_SSE2"
3214 {
3215 bool ok = ix86_expand_int_vec_cmp (operands);
3216 gcc_assert (ok);
3217 DONE;
3218 })
3219
3220 (define_expand "vec_cmpuv2div2di"
3221 [(set (match_operand:V2DI 0 "register_operand")
3222 (match_operator:V2DI 1 ""
3223 [(match_operand:V2DI 2 "register_operand")
3224 (match_operand:V2DI 3 "vector_operand")]))]
3225 "TARGET_SSE4_2"
3226 {
3227 bool ok = ix86_expand_int_vec_cmp (operands);
3228 gcc_assert (ok);
3229 DONE;
3230 })
3231
3232 (define_expand "vec_cmpeqv2div2di"
3233 [(set (match_operand:V2DI 0 "register_operand")
3234 (match_operator:V2DI 1 ""
3235 [(match_operand:V2DI 2 "register_operand")
3236 (match_operand:V2DI 3 "vector_operand")]))]
3237 "TARGET_SSE4_1"
3238 {
3239 bool ok = ix86_expand_int_vec_cmp (operands);
3240 gcc_assert (ok);
3241 DONE;
3242 })
3243
3244 (define_expand "vcond<V_512:mode><VF_512:mode>"
3245 [(set (match_operand:V_512 0 "register_operand")
3246 (if_then_else:V_512
3247 (match_operator 3 ""
3248 [(match_operand:VF_512 4 "nonimmediate_operand")
3249 (match_operand:VF_512 5 "nonimmediate_operand")])
3250 (match_operand:V_512 1 "general_operand")
3251 (match_operand:V_512 2 "general_operand")))]
3252 "TARGET_AVX512F
3253 && (GET_MODE_NUNITS (<V_512:MODE>mode)
3254 == GET_MODE_NUNITS (<VF_512:MODE>mode))"
3255 {
3256 bool ok = ix86_expand_fp_vcond (operands);
3257 gcc_assert (ok);
3258 DONE;
3259 })
3260
3261 (define_expand "vcond<V_256:mode><VF_256:mode>"
3262 [(set (match_operand:V_256 0 "register_operand")
3263 (if_then_else:V_256
3264 (match_operator 3 ""
3265 [(match_operand:VF_256 4 "nonimmediate_operand")
3266 (match_operand:VF_256 5 "nonimmediate_operand")])
3267 (match_operand:V_256 1 "general_operand")
3268 (match_operand:V_256 2 "general_operand")))]
3269 "TARGET_AVX
3270 && (GET_MODE_NUNITS (<V_256:MODE>mode)
3271 == GET_MODE_NUNITS (<VF_256:MODE>mode))"
3272 {
3273 bool ok = ix86_expand_fp_vcond (operands);
3274 gcc_assert (ok);
3275 DONE;
3276 })
3277
3278 (define_expand "vcond<V_128:mode><VF_128:mode>"
3279 [(set (match_operand:V_128 0 "register_operand")
3280 (if_then_else:V_128
3281 (match_operator 3 ""
3282 [(match_operand:VF_128 4 "vector_operand")
3283 (match_operand:VF_128 5 "vector_operand")])
3284 (match_operand:V_128 1 "general_operand")
3285 (match_operand:V_128 2 "general_operand")))]
3286 "TARGET_SSE
3287 && (GET_MODE_NUNITS (<V_128:MODE>mode)
3288 == GET_MODE_NUNITS (<VF_128:MODE>mode))"
3289 {
3290 bool ok = ix86_expand_fp_vcond (operands);
3291 gcc_assert (ok);
3292 DONE;
3293 })
3294
3295 (define_expand "vcond_mask_<mode><avx512fmaskmodelower>"
3296 [(set (match_operand:V48_AVX512VL 0 "register_operand")
3297 (vec_merge:V48_AVX512VL
3298 (match_operand:V48_AVX512VL 1 "nonimmediate_operand")
3299 (match_operand:V48_AVX512VL 2 "nonimm_or_0_operand")
3300 (match_operand:<avx512fmaskmode> 3 "register_operand")))]
3301 "TARGET_AVX512F")
3302
3303 (define_expand "vcond_mask_<mode><avx512fmaskmodelower>"
3304 [(set (match_operand:VI12_AVX512VL 0 "register_operand")
3305 (vec_merge:VI12_AVX512VL
3306 (match_operand:VI12_AVX512VL 1 "nonimmediate_operand")
3307 (match_operand:VI12_AVX512VL 2 "nonimm_or_0_operand")
3308 (match_operand:<avx512fmaskmode> 3 "register_operand")))]
3309 "TARGET_AVX512BW")
3310
3311 ;; As vcondv4div4df and vcondv8siv8sf are enabled already with TARGET_AVX,
3312 ;; and their condition can be folded late into a constant, we need to
3313 ;; support vcond_mask_v4div4di and vcond_mask_v8siv8si for TARGET_AVX.
3314 (define_mode_iterator VI_256_AVX2 [(V32QI "TARGET_AVX2") (V16HI "TARGET_AVX2")
3315 V8SI V4DI])
3316
3317 (define_expand "vcond_mask_<mode><sseintvecmodelower>"
3318 [(set (match_operand:VI_256_AVX2 0 "register_operand")
3319 (vec_merge:VI_256_AVX2
3320 (match_operand:VI_256_AVX2 1 "nonimmediate_operand")
3321 (match_operand:VI_256_AVX2 2 "nonimm_or_0_operand")
3322 (match_operand:<sseintvecmode> 3 "register_operand")))]
3323 "TARGET_AVX"
3324 {
3325 ix86_expand_sse_movcc (operands[0], operands[3],
3326 operands[1], operands[2]);
3327 DONE;
3328 })
3329
3330 (define_expand "vcond_mask_<mode><sseintvecmodelower>"
3331 [(set (match_operand:VI124_128 0 "register_operand")
3332 (vec_merge:VI124_128
3333 (match_operand:VI124_128 1 "vector_operand")
3334 (match_operand:VI124_128 2 "nonimm_or_0_operand")
3335 (match_operand:<sseintvecmode> 3 "register_operand")))]
3336 "TARGET_SSE2"
3337 {
3338 ix86_expand_sse_movcc (operands[0], operands[3],
3339 operands[1], operands[2]);
3340 DONE;
3341 })
3342
3343 (define_expand "vcond_mask_v2div2di"
3344 [(set (match_operand:V2DI 0 "register_operand")
3345 (vec_merge:V2DI
3346 (match_operand:V2DI 1 "vector_operand")
3347 (match_operand:V2DI 2 "nonimm_or_0_operand")
3348 (match_operand:V2DI 3 "register_operand")))]
3349 "TARGET_SSE4_2"
3350 {
3351 ix86_expand_sse_movcc (operands[0], operands[3],
3352 operands[1], operands[2]);
3353 DONE;
3354 })
3355
3356 (define_expand "vcond_mask_<mode><sseintvecmodelower>"
3357 [(set (match_operand:VF_256 0 "register_operand")
3358 (vec_merge:VF_256
3359 (match_operand:VF_256 1 "nonimmediate_operand")
3360 (match_operand:VF_256 2 "nonimm_or_0_operand")
3361 (match_operand:<sseintvecmode> 3 "register_operand")))]
3362 "TARGET_AVX"
3363 {
3364 ix86_expand_sse_movcc (operands[0], operands[3],
3365 operands[1], operands[2]);
3366 DONE;
3367 })
3368
3369 (define_expand "vcond_mask_<mode><sseintvecmodelower>"
3370 [(set (match_operand:VF_128 0 "register_operand")
3371 (vec_merge:VF_128
3372 (match_operand:VF_128 1 "vector_operand")
3373 (match_operand:VF_128 2 "nonimm_or_0_operand")
3374 (match_operand:<sseintvecmode> 3 "register_operand")))]
3375 "TARGET_SSE"
3376 {
3377 ix86_expand_sse_movcc (operands[0], operands[3],
3378 operands[1], operands[2]);
3379 DONE;
3380 })
3381
3382 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
3383 ;;
3384 ;; Parallel floating point logical operations
3385 ;;
3386 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
3387
3388 (define_insn "<sse>_andnot<mode>3<mask_name>"
3389 [(set (match_operand:VF_128_256 0 "register_operand" "=x,x,v,v")
3390 (and:VF_128_256
3391 (not:VF_128_256
3392 (match_operand:VF_128_256 1 "register_operand" "0,x,v,v"))
3393 (match_operand:VF_128_256 2 "vector_operand" "xBm,xm,vm,vm")))]
3394 "TARGET_SSE && <mask_avx512vl_condition>"
3395 {
3396 char buf[128];
3397 const char *ops;
3398 const char *suffix;
3399
3400 switch (which_alternative)
3401 {
3402 case 0:
3403 ops = "andn%s\t{%%2, %%0|%%0, %%2}";
3404 break;
3405 case 1:
3406 case 2:
3407 case 3:
3408 ops = "vandn%s\t{%%2, %%1, %%0<mask_operand3_1>|%%0<mask_operand3_1>, %%1, %%2}";
3409 break;
3410 default:
3411 gcc_unreachable ();
3412 }
3413
3414 switch (get_attr_mode (insn))
3415 {
3416 case MODE_V8SF:
3417 case MODE_V4SF:
3418 suffix = "ps";
3419 break;
3420 case MODE_OI:
3421 case MODE_TI:
3422 /* There is no vandnp[sd] in avx512f. Use vpandn[qd]. */
3423 suffix = GET_MODE_INNER (<MODE>mode) == DFmode ? "q" : "d";
3424 ops = "vpandn%s\t{%%2, %%1, %%0<mask_operand3_1>|%%0<mask_operand3_1>, %%1, %%2}";
3425 break;
3426 default:
3427 suffix = "<ssemodesuffix>";
3428 }
3429
3430 snprintf (buf, sizeof (buf), ops, suffix);
3431 output_asm_insn (buf, operands);
3432 return "";
3433 }
3434 [(set_attr "isa" "noavx,avx,avx512dq,avx512f")
3435 (set_attr "type" "sselog")
3436 (set_attr "prefix" "orig,maybe_vex,evex,evex")
3437 (set (attr "mode")
3438 (cond [(and (match_test "<mask_applied>")
3439 (and (eq_attr "alternative" "1")
3440 (match_test "!TARGET_AVX512DQ")))
3441 (const_string "<sseintvecmode2>")
3442 (eq_attr "alternative" "3")
3443 (const_string "<sseintvecmode2>")
3444 (match_test "TARGET_AVX")
3445 (const_string "<MODE>")
3446 (match_test "optimize_function_for_size_p (cfun)")
3447 (const_string "V4SF")
3448 (match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL")
3449 (const_string "V4SF")
3450 ]
3451 (const_string "<MODE>")))])
3452
3453 (define_insn "<sse>_andnot<mode>3<mask_name>"
3454 [(set (match_operand:VF_512 0 "register_operand" "=v")
3455 (and:VF_512
3456 (not:VF_512
3457 (match_operand:VF_512 1 "register_operand" "v"))
3458 (match_operand:VF_512 2 "nonimmediate_operand" "vm")))]
3459 "TARGET_AVX512F"
3460 {
3461 char buf[128];
3462 const char *ops;
3463 const char *suffix;
3464
3465 suffix = "<ssemodesuffix>";
3466 ops = "";
3467
3468 /* There is no vandnp[sd] in avx512f. Use vpandn[qd]. */
3469 if (!TARGET_AVX512DQ)
3470 {
3471 suffix = GET_MODE_INNER (<MODE>mode) == DFmode ? "q" : "d";
3472 ops = "p";
3473 }
3474
3475 snprintf (buf, sizeof (buf),
3476 "v%sandn%s\t{%%2, %%1, %%0<mask_operand3_1>|%%0<mask_operand3_1>, %%1, %%2}",
3477 ops, suffix);
3478 output_asm_insn (buf, operands);
3479 return "";
3480 }
3481 [(set_attr "type" "sselog")
3482 (set_attr "prefix" "evex")
3483 (set (attr "mode")
3484 (if_then_else (match_test "TARGET_AVX512DQ")
3485 (const_string "<sseinsnmode>")
3486 (const_string "XI")))])
3487
3488 (define_expand "<code><mode>3<mask_name>"
3489 [(set (match_operand:VF_128_256 0 "register_operand")
3490 (any_logic:VF_128_256
3491 (match_operand:VF_128_256 1 "vector_operand")
3492 (match_operand:VF_128_256 2 "vector_operand")))]
3493 "TARGET_SSE && <mask_avx512vl_condition>"
3494 "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
3495
3496 (define_expand "<code><mode>3<mask_name>"
3497 [(set (match_operand:VF_512 0 "register_operand")
3498 (any_logic:VF_512
3499 (match_operand:VF_512 1 "nonimmediate_operand")
3500 (match_operand:VF_512 2 "nonimmediate_operand")))]
3501 "TARGET_AVX512F"
3502 "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
3503
3504 (define_insn "*<code><mode>3<mask_name>"
3505 [(set (match_operand:VF_128_256 0 "register_operand" "=x,x,v,v")
3506 (any_logic:VF_128_256
3507 (match_operand:VF_128_256 1 "vector_operand" "%0,x,v,v")
3508 (match_operand:VF_128_256 2 "vector_operand" "xBm,xm,vm,vm")))]
3509 "TARGET_SSE && <mask_avx512vl_condition>
3510 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
3511 {
3512 char buf[128];
3513 const char *ops;
3514 const char *suffix;
3515
3516 switch (which_alternative)
3517 {
3518 case 0:
3519 ops = "<logic>%s\t{%%2, %%0|%%0, %%2}";
3520 break;
3521 case 1:
3522 case 2:
3523 case 3:
3524 ops = "v<logic>%s\t{%%2, %%1, %%0<mask_operand3_1>|%%0<mask_operand3_1>, %%1, %%2}";
3525 break;
3526 default:
3527 gcc_unreachable ();
3528 }
3529
3530 switch (get_attr_mode (insn))
3531 {
3532 case MODE_V8SF:
3533 case MODE_V4SF:
3534 suffix = "ps";
3535 break;
3536 case MODE_OI:
3537 case MODE_TI:
3538 /* There is no v<logic>p[sd] in avx512f. Use vp<logic>[qd]. */
3539 suffix = GET_MODE_INNER (<MODE>mode) == DFmode ? "q" : "d";
3540 ops = "vp<logic>%s\t{%%2, %%1, %%0<mask_operand3_1>|%%0<mask_operand3_1>, %%1, %%2}";
3541 break;
3542 default:
3543 suffix = "<ssemodesuffix>";
3544 }
3545
3546 snprintf (buf, sizeof (buf), ops, suffix);
3547 output_asm_insn (buf, operands);
3548 return "";
3549 }
3550 [(set_attr "isa" "noavx,avx,avx512dq,avx512f")
3551 (set_attr "type" "sselog")
3552 (set_attr "prefix" "orig,maybe_evex,evex,evex")
3553 (set (attr "mode")
3554 (cond [(and (match_test "<mask_applied>")
3555 (and (eq_attr "alternative" "1")
3556 (match_test "!TARGET_AVX512DQ")))
3557 (const_string "<sseintvecmode2>")
3558 (eq_attr "alternative" "3")
3559 (const_string "<sseintvecmode2>")
3560 (match_test "TARGET_AVX")
3561 (const_string "<MODE>")
3562 (match_test "optimize_function_for_size_p (cfun)")
3563 (const_string "V4SF")
3564 (match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL")
3565 (const_string "V4SF")
3566 ]
3567 (const_string "<MODE>")))])
3568
3569 (define_insn "*<code><mode>3<mask_name>"
3570 [(set (match_operand:VF_512 0 "register_operand" "=v")
3571 (any_logic:VF_512
3572 (match_operand:VF_512 1 "nonimmediate_operand" "%v")
3573 (match_operand:VF_512 2 "nonimmediate_operand" "vm")))]
3574 "TARGET_AVX512F && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
3575 {
3576 char buf[128];
3577 const char *ops;
3578 const char *suffix;
3579
3580 suffix = "<ssemodesuffix>";
3581 ops = "";
3582
3583 /* There is no v<logic>p[sd] in avx512f. Use vp<logic>[dq]. */
3584 if (!TARGET_AVX512DQ)
3585 {
3586 suffix = GET_MODE_INNER (<MODE>mode) == DFmode ? "q" : "d";
3587 ops = "p";
3588 }
3589
3590 snprintf (buf, sizeof (buf),
3591 "v%s<logic>%s\t{%%2, %%1, %%0<mask_operand3_1>|%%0<mask_operand3_1>, %%1, %%2}",
3592 ops, suffix);
3593 output_asm_insn (buf, operands);
3594 return "";
3595 }
3596 [(set_attr "type" "sselog")
3597 (set_attr "prefix" "evex")
3598 (set (attr "mode")
3599 (if_then_else (match_test "TARGET_AVX512DQ")
3600 (const_string "<sseinsnmode>")
3601 (const_string "XI")))])
3602
3603 (define_expand "copysign<mode>3"
3604 [(set (match_dup 4)
3605 (and:VF
3606 (not:VF (match_dup 3))
3607 (match_operand:VF 1 "vector_operand")))
3608 (set (match_dup 5)
3609 (and:VF (match_dup 3)
3610 (match_operand:VF 2 "vector_operand")))
3611 (set (match_operand:VF 0 "register_operand")
3612 (ior:VF (match_dup 4) (match_dup 5)))]
3613 "TARGET_SSE"
3614 {
3615 operands[3] = ix86_build_signbit_mask (<MODE>mode, 1, 0);
3616
3617 operands[4] = gen_reg_rtx (<MODE>mode);
3618 operands[5] = gen_reg_rtx (<MODE>mode);
3619 })
3620
3621 (define_expand "xorsign<mode>3"
3622 [(set (match_dup 4)
3623 (and:VF (match_dup 3)
3624 (match_operand:VF 2 "vector_operand")))
3625 (set (match_operand:VF 0 "register_operand")
3626 (xor:VF (match_dup 4)
3627 (match_operand:VF 1 "vector_operand")))]
3628 "TARGET_SSE"
3629 {
3630 operands[3] = ix86_build_signbit_mask (<MODE>mode, 1, 0);
3631
3632 operands[4] = gen_reg_rtx (<MODE>mode);
3633 })
3634
3635 (define_expand "signbit<mode>2"
3636 [(set (match_operand:<sseintvecmode> 0 "register_operand")
3637 (lshiftrt:<sseintvecmode>
3638 (subreg:<sseintvecmode>
3639 (match_operand:VF1_AVX2 1 "register_operand") 0)
3640 (match_dup 2)))]
3641 "TARGET_SSE2"
3642 "operands[2] = GEN_INT (GET_MODE_UNIT_BITSIZE (<MODE>mode)-1);")
3643
3644 ;; Also define scalar versions. These are used for abs, neg, and
3645 ;; conditional move. Using subregs into vector modes causes register
3646 ;; allocation lossage. These patterns do not allow memory operands
3647 ;; because the native instructions read the full 128-bits.
3648
3649 (define_insn "*andnot<mode>3"
3650 [(set (match_operand:MODEF 0 "register_operand" "=x,x,v,v")
3651 (and:MODEF
3652 (not:MODEF
3653 (match_operand:MODEF 1 "register_operand" "0,x,v,v"))
3654 (match_operand:MODEF 2 "register_operand" "x,x,v,v")))]
3655 "SSE_FLOAT_MODE_P (<MODE>mode)"
3656 {
3657 char buf[128];
3658 const char *ops;
3659 const char *suffix
3660 = (get_attr_mode (insn) == MODE_V4SF) ? "ps" : "<ssevecmodesuffix>";
3661
3662 switch (which_alternative)
3663 {
3664 case 0:
3665 ops = "andn%s\t{%%2, %%0|%%0, %%2}";
3666 break;
3667 case 1:
3668 ops = "vandn%s\t{%%2, %%1, %%0|%%0, %%1, %%2}";
3669 break;
3670 case 2:
3671 if (TARGET_AVX512DQ)
3672 ops = "vandn%s\t{%%2, %%1, %%0|%%0, %%1, %%2}";
3673 else
3674 {
3675 suffix = <MODE>mode == DFmode ? "q" : "d";
3676 ops = "vpandn%s\t{%%2, %%1, %%0|%%0, %%1, %%2}";
3677 }
3678 break;
3679 case 3:
3680 if (TARGET_AVX512DQ)
3681 ops = "vandn%s\t{%%g2, %%g1, %%g0|%%g0, %%g1, %%g2}";
3682 else
3683 {
3684 suffix = <MODE>mode == DFmode ? "q" : "d";
3685 ops = "vpandn%s\t{%%g2, %%g1, %%g0|%%g0, %%g1, %%g2}";
3686 }
3687 break;
3688 default:
3689 gcc_unreachable ();
3690 }
3691
3692 snprintf (buf, sizeof (buf), ops, suffix);
3693 output_asm_insn (buf, operands);
3694 return "";
3695 }
3696 [(set_attr "isa" "noavx,avx,avx512vl,avx512f")
3697 (set_attr "type" "sselog")
3698 (set_attr "prefix" "orig,vex,evex,evex")
3699 (set (attr "mode")
3700 (cond [(eq_attr "alternative" "2")
3701 (if_then_else (match_test "TARGET_AVX512DQ")
3702 (const_string "<ssevecmode>")
3703 (const_string "TI"))
3704 (eq_attr "alternative" "3")
3705 (if_then_else (match_test "TARGET_AVX512DQ")
3706 (const_string "<avx512fvecmode>")
3707 (const_string "XI"))
3708 (match_test "TARGET_AVX")
3709 (const_string "<ssevecmode>")
3710 (match_test "optimize_function_for_size_p (cfun)")
3711 (const_string "V4SF")
3712 (match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL")
3713 (const_string "V4SF")
3714 ]
3715 (const_string "<ssevecmode>")))])
3716
3717 (define_insn "*andnottf3"
3718 [(set (match_operand:TF 0 "register_operand" "=x,x,v,v")
3719 (and:TF
3720 (not:TF (match_operand:TF 1 "register_operand" "0,x,v,v"))
3721 (match_operand:TF 2 "vector_operand" "xBm,xm,vm,v")))]
3722 "TARGET_SSE"
3723 {
3724 char buf[128];
3725 const char *ops;
3726 const char *tmp
3727 = (which_alternative >= 2 ? "pandnq"
3728 : get_attr_mode (insn) == MODE_V4SF ? "andnps" : "pandn");
3729
3730 switch (which_alternative)
3731 {
3732 case 0:
3733 ops = "%s\t{%%2, %%0|%%0, %%2}";
3734 break;
3735 case 1:
3736 case 2:
3737 ops = "v%s\t{%%2, %%1, %%0|%%0, %%1, %%2}";
3738 break;
3739 case 3:
3740 ops = "v%s\t{%%g2, %%g1, %%g0|%%g0, %%g1, %%g2}";
3741 break;
3742 default:
3743 gcc_unreachable ();
3744 }
3745
3746 snprintf (buf, sizeof (buf), ops, tmp);
3747 output_asm_insn (buf, operands);
3748 return "";
3749 }
3750 [(set_attr "isa" "noavx,avx,avx512vl,avx512f")
3751 (set_attr "type" "sselog")
3752 (set (attr "prefix_data16")
3753 (if_then_else
3754 (and (eq_attr "alternative" "0")
3755 (eq_attr "mode" "TI"))
3756 (const_string "1")
3757 (const_string "*")))
3758 (set_attr "prefix" "orig,vex,evex,evex")
3759 (set (attr "mode")
3760 (cond [(eq_attr "alternative" "2")
3761 (const_string "TI")
3762 (eq_attr "alternative" "3")
3763 (const_string "XI")
3764 (match_test "TARGET_AVX")
3765 (const_string "TI")
3766 (ior (not (match_test "TARGET_SSE2"))
3767 (match_test "optimize_function_for_size_p (cfun)"))
3768 (const_string "V4SF")
3769 (match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL")
3770 (const_string "V4SF")
3771 ]
3772 (const_string "TI")))])
3773
3774 (define_insn "*<code><mode>3"
3775 [(set (match_operand:MODEF 0 "register_operand" "=x,x,v,v")
3776 (any_logic:MODEF
3777 (match_operand:MODEF 1 "register_operand" "%0,x,v,v")
3778 (match_operand:MODEF 2 "register_operand" "x,x,v,v")))]
3779 "SSE_FLOAT_MODE_P (<MODE>mode)"
3780 {
3781 char buf[128];
3782 const char *ops;
3783 const char *suffix
3784 = (get_attr_mode (insn) == MODE_V4SF) ? "ps" : "<ssevecmodesuffix>";
3785
3786 switch (which_alternative)
3787 {
3788 case 0:
3789 ops = "<logic>%s\t{%%2, %%0|%%0, %%2}";
3790 break;
3791 case 2:
3792 if (!TARGET_AVX512DQ)
3793 {
3794 suffix = <MODE>mode == DFmode ? "q" : "d";
3795 ops = "vp<logic>%s\t{%%2, %%1, %%0|%%0, %%1, %%2}";
3796 break;
3797 }
3798 /* FALLTHRU */
3799 case 1:
3800 ops = "v<logic>%s\t{%%2, %%1, %%0|%%0, %%1, %%2}";
3801 break;
3802 case 3:
3803 if (TARGET_AVX512DQ)
3804 ops = "v<logic>%s\t{%%g2, %%g1, %%g0|%%g0, %%g1, %%g2}";
3805 else
3806 {
3807 suffix = <MODE>mode == DFmode ? "q" : "d";
3808 ops = "vp<logic>%s\t{%%g2, %%g1, %%g0|%%g0, %%g1, %%g2}";
3809 }
3810 break;
3811 default:
3812 gcc_unreachable ();
3813 }
3814
3815 snprintf (buf, sizeof (buf), ops, suffix);
3816 output_asm_insn (buf, operands);
3817 return "";
3818 }
3819 [(set_attr "isa" "noavx,avx,avx512vl,avx512f")
3820 (set_attr "type" "sselog")
3821 (set_attr "prefix" "orig,vex,evex,evex")
3822 (set (attr "mode")
3823 (cond [(eq_attr "alternative" "2")
3824 (if_then_else (match_test "TARGET_AVX512DQ")
3825 (const_string "<ssevecmode>")
3826 (const_string "TI"))
3827 (eq_attr "alternative" "3")
3828 (if_then_else (match_test "TARGET_AVX512DQ")
3829 (const_string "<avx512fvecmode>")
3830 (const_string "XI"))
3831 (match_test "TARGET_AVX")
3832 (const_string "<ssevecmode>")
3833 (match_test "optimize_function_for_size_p (cfun)")
3834 (const_string "V4SF")
3835 (match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL")
3836 (const_string "V4SF")
3837 ]
3838 (const_string "<ssevecmode>")))])
3839
3840 (define_expand "<code>tf3"
3841 [(set (match_operand:TF 0 "register_operand")
3842 (any_logic:TF
3843 (match_operand:TF 1 "vector_operand")
3844 (match_operand:TF 2 "vector_operand")))]
3845 "TARGET_SSE"
3846 "ix86_fixup_binary_operands_no_copy (<CODE>, TFmode, operands);")
3847
3848 (define_insn "*<code>tf3"
3849 [(set (match_operand:TF 0 "register_operand" "=x,x,v,v")
3850 (any_logic:TF
3851 (match_operand:TF 1 "vector_operand" "%0,x,v,v")
3852 (match_operand:TF 2 "vector_operand" "xBm,xm,vm,v")))]
3853 "TARGET_SSE && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
3854 {
3855 char buf[128];
3856 const char *ops;
3857 const char *tmp
3858 = (which_alternative >= 2 ? "p<logic>q"
3859 : get_attr_mode (insn) == MODE_V4SF ? "<logic>ps" : "p<logic>");
3860
3861 switch (which_alternative)
3862 {
3863 case 0:
3864 ops = "%s\t{%%2, %%0|%%0, %%2}";
3865 break;
3866 case 1:
3867 case 2:
3868 ops = "v%s\t{%%2, %%1, %%0|%%0, %%1, %%2}";
3869 break;
3870 case 3:
3871 ops = "v%s\t{%%g2, %%g1, %%g0|%%g0, %%g1, %%g2}";
3872 break;
3873 default:
3874 gcc_unreachable ();
3875 }
3876
3877 snprintf (buf, sizeof (buf), ops, tmp);
3878 output_asm_insn (buf, operands);
3879 return "";
3880 }
3881 [(set_attr "isa" "noavx,avx,avx512vl,avx512f")
3882 (set_attr "type" "sselog")
3883 (set (attr "prefix_data16")
3884 (if_then_else
3885 (and (eq_attr "alternative" "0")
3886 (eq_attr "mode" "TI"))
3887 (const_string "1")
3888 (const_string "*")))
3889 (set_attr "prefix" "orig,vex,evex,evex")
3890 (set (attr "mode")
3891 (cond [(eq_attr "alternative" "2")
3892 (const_string "TI")
3893 (eq_attr "alternative" "3")
3894 (const_string "QI")
3895 (match_test "TARGET_AVX")
3896 (const_string "TI")
3897 (ior (not (match_test "TARGET_SSE2"))
3898 (match_test "optimize_function_for_size_p (cfun)"))
3899 (const_string "V4SF")
3900 (match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL")
3901 (const_string "V4SF")
3902 ]
3903 (const_string "TI")))])
3904
3905 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
3906 ;;
3907 ;; FMA floating point multiply/accumulate instructions. These include
3908 ;; scalar versions of the instructions as well as vector versions.
3909 ;;
3910 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
3911
3912 ;; The standard names for scalar FMA are only available with SSE math enabled.
3913 ;; CPUID bit AVX512F enables evex encoded scalar and 512-bit fma. It doesn't
3914 ;; care about FMA bit, so we enable fma for TARGET_AVX512F even when TARGET_FMA
3915 ;; and TARGET_FMA4 are both false.
3916 ;; TODO: In theory AVX512F does not automatically imply FMA, and without FMA
3917 ;; one must force the EVEX encoding of the fma insns. Ideally we'd improve
3918 ;; GAS to allow proper prefix selection. However, for the moment all hardware
3919 ;; that supports AVX512F also supports FMA so we can ignore this for now.
3920 (define_mode_iterator FMAMODEM
3921 [(SF "TARGET_SSE_MATH && (TARGET_FMA || TARGET_FMA4 || TARGET_AVX512F)")
3922 (DF "TARGET_SSE_MATH && (TARGET_FMA || TARGET_FMA4 || TARGET_AVX512F)")
3923 (V4SF "TARGET_FMA || TARGET_FMA4 || TARGET_AVX512VL")
3924 (V2DF "TARGET_FMA || TARGET_FMA4 || TARGET_AVX512VL")
3925 (V8SF "TARGET_FMA || TARGET_FMA4 || TARGET_AVX512VL")
3926 (V4DF "TARGET_FMA || TARGET_FMA4 || TARGET_AVX512VL")
3927 (V16SF "TARGET_AVX512F")
3928 (V8DF "TARGET_AVX512F")])
3929
3930 (define_expand "fma<mode>4"
3931 [(set (match_operand:FMAMODEM 0 "register_operand")
3932 (fma:FMAMODEM
3933 (match_operand:FMAMODEM 1 "nonimmediate_operand")
3934 (match_operand:FMAMODEM 2 "nonimmediate_operand")
3935 (match_operand:FMAMODEM 3 "nonimmediate_operand")))])
3936
3937 (define_expand "fms<mode>4"
3938 [(set (match_operand:FMAMODEM 0 "register_operand")
3939 (fma:FMAMODEM
3940 (match_operand:FMAMODEM 1 "nonimmediate_operand")
3941 (match_operand:FMAMODEM 2 "nonimmediate_operand")
3942 (neg:FMAMODEM (match_operand:FMAMODEM 3 "nonimmediate_operand"))))])
3943
3944 (define_expand "fnma<mode>4"
3945 [(set (match_operand:FMAMODEM 0 "register_operand")
3946 (fma:FMAMODEM
3947 (neg:FMAMODEM (match_operand:FMAMODEM 1 "nonimmediate_operand"))
3948 (match_operand:FMAMODEM 2 "nonimmediate_operand")
3949 (match_operand:FMAMODEM 3 "nonimmediate_operand")))])
3950
3951 (define_expand "fnms<mode>4"
3952 [(set (match_operand:FMAMODEM 0 "register_operand")
3953 (fma:FMAMODEM
3954 (neg:FMAMODEM (match_operand:FMAMODEM 1 "nonimmediate_operand"))
3955 (match_operand:FMAMODEM 2 "nonimmediate_operand")
3956 (neg:FMAMODEM (match_operand:FMAMODEM 3 "nonimmediate_operand"))))])
3957
3958 ;; The builtins for intrinsics are not constrained by SSE math enabled.
3959 (define_mode_iterator FMAMODE_AVX512
3960 [(SF "TARGET_FMA || TARGET_FMA4 || TARGET_AVX512F")
3961 (DF "TARGET_FMA || TARGET_FMA4 || TARGET_AVX512F")
3962 (V4SF "TARGET_FMA || TARGET_FMA4 || TARGET_AVX512VL")
3963 (V2DF "TARGET_FMA || TARGET_FMA4 || TARGET_AVX512VL")
3964 (V8SF "TARGET_FMA || TARGET_FMA4 || TARGET_AVX512VL")
3965 (V4DF "TARGET_FMA || TARGET_FMA4 || TARGET_AVX512VL")
3966 (V16SF "TARGET_AVX512F")
3967 (V8DF "TARGET_AVX512F")])
3968
3969 (define_mode_iterator FMAMODE
3970 [SF DF V4SF V2DF V8SF V4DF])
3971
3972 (define_expand "fma4i_fmadd_<mode>"
3973 [(set (match_operand:FMAMODE_AVX512 0 "register_operand")
3974 (fma:FMAMODE_AVX512
3975 (match_operand:FMAMODE_AVX512 1 "nonimmediate_operand")
3976 (match_operand:FMAMODE_AVX512 2 "nonimmediate_operand")
3977 (match_operand:FMAMODE_AVX512 3 "nonimmediate_operand")))])
3978
3979 (define_expand "fma4i_fmsub_<mode>"
3980 [(set (match_operand:FMAMODE_AVX512 0 "register_operand")
3981 (fma:FMAMODE_AVX512
3982 (match_operand:FMAMODE_AVX512 1 "nonimmediate_operand")
3983 (match_operand:FMAMODE_AVX512 2 "nonimmediate_operand")
3984 (neg:FMAMODE_AVX512
3985 (match_operand:FMAMODE_AVX512 3 "nonimmediate_operand"))))])
3986
3987 (define_expand "fma4i_fnmadd_<mode>"
3988 [(set (match_operand:FMAMODE_AVX512 0 "register_operand")
3989 (fma:FMAMODE_AVX512
3990 (neg:FMAMODE_AVX512
3991 (match_operand:FMAMODE_AVX512 1 "nonimmediate_operand"))
3992 (match_operand:FMAMODE_AVX512 2 "nonimmediate_operand")
3993 (match_operand:FMAMODE_AVX512 3 "nonimmediate_operand")))])
3994
3995 (define_expand "fma4i_fnmsub_<mode>"
3996 [(set (match_operand:FMAMODE_AVX512 0 "register_operand")
3997 (fma:FMAMODE_AVX512
3998 (neg:FMAMODE_AVX512
3999 (match_operand:FMAMODE_AVX512 1 "nonimmediate_operand"))
4000 (match_operand:FMAMODE_AVX512 2 "nonimmediate_operand")
4001 (neg:FMAMODE_AVX512
4002 (match_operand:FMAMODE_AVX512 3 "nonimmediate_operand"))))])
4003
4004 (define_expand "<avx512>_fmadd_<mode>_maskz<round_expand_name>"
4005 [(match_operand:VF_AVX512VL 0 "register_operand")
4006 (match_operand:VF_AVX512VL 1 "<round_expand_nimm_predicate>")
4007 (match_operand:VF_AVX512VL 2 "<round_expand_nimm_predicate>")
4008 (match_operand:VF_AVX512VL 3 "<round_expand_nimm_predicate>")
4009 (match_operand:<avx512fmaskmode> 4 "register_operand")]
4010 "TARGET_AVX512F && <round_mode512bit_condition>"
4011 {
4012 emit_insn (gen_fma_fmadd_<mode>_maskz_1<round_expand_name> (
4013 operands[0], operands[1], operands[2], operands[3],
4014 CONST0_RTX (<MODE>mode), operands[4]<round_expand_operand>));
4015 DONE;
4016 })
4017
4018 (define_insn "*fma_fmadd_<mode>"
4019 [(set (match_operand:FMAMODE 0 "register_operand" "=v,v,v,x,x")
4020 (fma:FMAMODE
4021 (match_operand:FMAMODE 1 "nonimmediate_operand" "%0,0,v,x,x")
4022 (match_operand:FMAMODE 2 "nonimmediate_operand" "vm,v,vm,x,m")
4023 (match_operand:FMAMODE 3 "nonimmediate_operand" "v,vm,0,xm,x")))]
4024 "TARGET_FMA || TARGET_FMA4"
4025 "@
4026 vfmadd132<ssemodesuffix>\t{%2, %3, %0|%0, %3, %2}
4027 vfmadd213<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
4028 vfmadd231<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}
4029 vfmadd<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}
4030 vfmadd<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
4031 [(set_attr "isa" "fma,fma,fma,fma4,fma4")
4032 (set_attr "type" "ssemuladd")
4033 (set_attr "mode" "<MODE>")])
4034
4035 ;; Suppose AVX-512F as baseline
4036 (define_mode_iterator VF_SF_AVX512VL
4037 [SF V16SF (V8SF "TARGET_AVX512VL") (V4SF "TARGET_AVX512VL")
4038 DF V8DF (V4DF "TARGET_AVX512VL") (V2DF "TARGET_AVX512VL")])
4039
4040 (define_insn "<sd_mask_codefor>fma_fmadd_<mode><sd_maskz_name><round_name>"
4041 [(set (match_operand:VF_SF_AVX512VL 0 "register_operand" "=v,v,v")
4042 (fma:VF_SF_AVX512VL
4043 (match_operand:VF_SF_AVX512VL 1 "<round_nimm_predicate>" "%0,0,v")
4044 (match_operand:VF_SF_AVX512VL 2 "<round_nimm_predicate>" "<round_constraint>,v,<round_constraint>")
4045 (match_operand:VF_SF_AVX512VL 3 "<round_nimm_predicate>" "v,<round_constraint>,0")))]
4046 "TARGET_AVX512F && <sd_mask_mode512bit_condition> && <round_mode512bit_condition>"
4047 "@
4048 vfmadd132<ssemodesuffix>\t{<round_sd_mask_op4>%2, %3, %0<sd_mask_op4>|%0<sd_mask_op4>, %3, %2<round_sd_mask_op4>}
4049 vfmadd213<ssemodesuffix>\t{<round_sd_mask_op4>%3, %2, %0<sd_mask_op4>|%0<sd_mask_op4>, %2, %3<round_sd_mask_op4>}
4050 vfmadd231<ssemodesuffix>\t{<round_sd_mask_op4>%2, %1, %0<sd_mask_op4>|%0<sd_mask_op4>, %1, %2<round_sd_mask_op4>}"
4051 [(set_attr "type" "ssemuladd")
4052 (set_attr "mode" "<MODE>")])
4053
4054 (define_insn "*<sd_mask_codefor>fma_fmadd_<mode><sd_maskz_name>_bcst_1"
4055 [(set (match_operand:VF_AVX512 0 "register_operand" "=v")
4056 (fma:VF_AVX512
4057 (match_operand:VF_AVX512 1 "register_operand" "%0")
4058 (match_operand:VF_AVX512 2 "register_operand" "v")
4059 (vec_duplicate:VF_AVX512
4060 (match_operand:<ssescalarmode> 3 "memory_operand" "m"))))]
4061 "TARGET_AVX512F && <sd_mask_mode512bit_condition>"
4062 "vfmadd213<ssemodesuffix>\t{%3<avx512bcst>, %2, %0<sd_mask_op4>|%0<sd_mask_op4>, %2, %3<avx512bcst>}"
4063 [(set_attr "type" "ssemuladd")
4064 (set_attr "mode" "<MODE>")])
4065
4066 (define_insn "*<sd_mask_codefor>fma_fmadd_<mode><sd_maskz_name>_bcst_2"
4067 [(set (match_operand:VF_AVX512 0 "register_operand" "=v,v")
4068 (fma:VF_AVX512
4069 (vec_duplicate:VF_AVX512
4070 (match_operand:<ssescalarmode> 1 "memory_operand" "m,m"))
4071 (match_operand:VF_AVX512 2 "register_operand" "0,v")
4072 (match_operand:VF_AVX512 3 "register_operand" "v,0")))]
4073 "TARGET_AVX512F && <sd_mask_mode512bit_condition>"
4074 "@
4075 vfmadd132<ssemodesuffix>\t{%1<avx512bcst>, %3, %0<sd_mask_op4>|%0<sd_mask_op4>, %3, %1<avx512bcst>}
4076 vfmadd231<ssemodesuffix>\t{%1<avx512bcst>, %2, %0<sd_mask_op4>|%0<sd_mask_op4>, %2, %1<avx512bcst>}"
4077 [(set_attr "type" "ssemuladd")
4078 (set_attr "mode" "<MODE>")])
4079
4080 (define_insn "*<sd_mask_codefor>fma_fmadd_<mode><sd_maskz_name>_bcst_3"
4081 [(set (match_operand:VF_AVX512 0 "register_operand" "=v,v")
4082 (fma:VF_AVX512
4083 (match_operand:VF_AVX512 1 "register_operand" "0,v")
4084 (vec_duplicate:VF_AVX512
4085 (match_operand:<ssescalarmode> 2 "memory_operand" "m,m"))
4086 (match_operand:VF_AVX512 3 "register_operand" "v,0")))]
4087 "TARGET_AVX512F && <sd_mask_mode512bit_condition>"
4088 "@
4089 vfmadd132<ssemodesuffix>\t{%2<avx512bcst>, %3, %0<sd_mask_op4>|%0<sd_mask_op4>, %3, %2<avx512bcst>}
4090 vfmadd231<ssemodesuffix>\t{%2<avx512bcst>, %1, %0<sd_mask_op4>|%0<sd_mask_op4>, %1, %2<avx512bcst>}"
4091 [(set_attr "type" "ssemuladd")
4092 (set_attr "mode" "<MODE>")])
4093
4094 (define_insn "<avx512>_fmadd_<mode>_mask<round_name>"
4095 [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v,v")
4096 (vec_merge:VF_AVX512VL
4097 (fma:VF_AVX512VL
4098 (match_operand:VF_AVX512VL 1 "register_operand" "0,0")
4099 (match_operand:VF_AVX512VL 2 "<round_nimm_predicate>" "<round_constraint>,v")
4100 (match_operand:VF_AVX512VL 3 "<round_nimm_predicate>" "v,<round_constraint>"))
4101 (match_dup 1)
4102 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk,Yk")))]
4103 "TARGET_AVX512F && <round_mode512bit_condition>"
4104 "@
4105 vfmadd132<ssemodesuffix>\t{<round_op5>%2, %3, %0%{%4%}|%0%{%4%}, %3, %2<round_op5>}
4106 vfmadd213<ssemodesuffix>\t{<round_op5>%3, %2, %0%{%4%}|%0%{%4%}, %2, %3<round_op5>}"
4107 [(set_attr "type" "ssemuladd")
4108 (set_attr "mode" "<MODE>")])
4109
4110 (define_insn "<avx512>_fmadd_<mode>_mask3<round_name>"
4111 [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v")
4112 (vec_merge:VF_AVX512VL
4113 (fma:VF_AVX512VL
4114 (match_operand:VF_AVX512VL 1 "<round_nimm_predicate>" "%v")
4115 (match_operand:VF_AVX512VL 2 "<round_nimm_predicate>" "<round_constraint>")
4116 (match_operand:VF_AVX512VL 3 "register_operand" "0"))
4117 (match_dup 3)
4118 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")))]
4119 "TARGET_AVX512F"
4120 "vfmadd231<ssemodesuffix>\t{<round_op5>%2, %1, %0%{%4%}|%0%{%4%}, %1, %2<round_op5>}"
4121 [(set_attr "type" "ssemuladd")
4122 (set_attr "mode" "<MODE>")])
4123
4124 (define_insn "*fma_fmsub_<mode>"
4125 [(set (match_operand:FMAMODE 0 "register_operand" "=v,v,v,x,x")
4126 (fma:FMAMODE
4127 (match_operand:FMAMODE 1 "nonimmediate_operand" "%0,0,v,x,x")
4128 (match_operand:FMAMODE 2 "nonimmediate_operand" "vm,v,vm,x,m")
4129 (neg:FMAMODE
4130 (match_operand:FMAMODE 3 "nonimmediate_operand" "v,vm,0,xm,x"))))]
4131 "TARGET_FMA || TARGET_FMA4"
4132 "@
4133 vfmsub132<ssemodesuffix>\t{%2, %3, %0|%0, %3, %2}
4134 vfmsub213<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
4135 vfmsub231<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}
4136 vfmsub<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}
4137 vfmsub<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
4138 [(set_attr "isa" "fma,fma,fma,fma4,fma4")
4139 (set_attr "type" "ssemuladd")
4140 (set_attr "mode" "<MODE>")])
4141
4142 (define_expand "<avx512>_fmsub_<mode>_maskz<round_expand_name>"
4143 [(match_operand:VF_AVX512VL 0 "register_operand")
4144 (match_operand:VF_AVX512VL 1 "<round_expand_nimm_predicate>")
4145 (match_operand:VF_AVX512VL 2 "<round_expand_nimm_predicate>")
4146 (match_operand:VF_AVX512VL 3 "<round_expand_nimm_predicate>")
4147 (match_operand:<avx512fmaskmode> 4 "register_operand")]
4148 "TARGET_AVX512F && <round_mode512bit_condition>"
4149 {
4150 emit_insn (gen_fma_fmsub_<mode>_maskz_1<round_expand_name> (
4151 operands[0], operands[1], operands[2], operands[3],
4152 CONST0_RTX (<MODE>mode), operands[4]<round_expand_operand>));
4153 DONE;
4154 })
4155
4156 (define_insn "<sd_mask_codefor>fma_fmsub_<mode><sd_maskz_name><round_name>"
4157 [(set (match_operand:VF_SF_AVX512VL 0 "register_operand" "=v,v,v")
4158 (fma:VF_SF_AVX512VL
4159 (match_operand:VF_SF_AVX512VL 1 "<round_nimm_predicate>" "%0,0,v")
4160 (match_operand:VF_SF_AVX512VL 2 "<round_nimm_predicate>" "<round_constraint>,v,<round_constraint>")
4161 (neg:VF_SF_AVX512VL
4162 (match_operand:VF_SF_AVX512VL 3 "<round_nimm_predicate>" "v,<round_constraint>,0"))))]
4163 "TARGET_AVX512F && <sd_mask_mode512bit_condition> && <round_mode512bit_condition>"
4164 "@
4165 vfmsub132<ssemodesuffix>\t{<round_sd_mask_op4>%2, %3, %0<sd_mask_op4>|%0<sd_mask_op4>, %3, %2<round_sd_mask_op4>}
4166 vfmsub213<ssemodesuffix>\t{<round_sd_mask_op4>%3, %2, %0<sd_mask_op4>|%0<sd_mask_op4>, %2, %3<round_sd_mask_op4>}
4167 vfmsub231<ssemodesuffix>\t{<round_sd_mask_op4>%2, %1, %0<sd_mask_op4>|%0<sd_mask_op4>, %1, %2<round_sd_mask_op4>}"
4168 [(set_attr "type" "ssemuladd")
4169 (set_attr "mode" "<MODE>")])
4170
4171 (define_insn "*<sd_mask_codefor>fma_fmsub_<mode><sd_maskz_name>_bcst_1"
4172 [(set (match_operand:VF_AVX512 0 "register_operand" "=v")
4173 (fma:VF_AVX512
4174 (match_operand:VF_AVX512 1 "register_operand" "%0")
4175 (match_operand:VF_AVX512 2 "register_operand" "v")
4176 (neg:VF_AVX512
4177 (vec_duplicate:VF_AVX512
4178 (match_operand:<ssescalarmode> 3 "memory_operand" "m")))))]
4179 "TARGET_AVX512F && <sd_mask_mode512bit_condition>"
4180 "vfmsub213<ssemodesuffix>\t{%3<avx512bcst>, %2, %0<sd_mask_op4>|%0<sd_mask_op4>, %2, %3<avx512bcst>}"
4181 [(set_attr "type" "ssemuladd")
4182 (set_attr "mode" "<MODE>")])
4183
4184 (define_insn "*<sd_mask_codefor>fma_fmsub_<mode><sd_maskz_name>_bcst_2"
4185 [(set (match_operand:VF_AVX512 0 "register_operand" "=v,v")
4186 (fma:VF_AVX512
4187 (vec_duplicate:VF_AVX512
4188 (match_operand:<ssescalarmode> 1 "memory_operand" "m,m"))
4189 (match_operand:VF_AVX512 2 "register_operand" "0,v")
4190 (neg:VF_AVX512
4191 (match_operand:VF_AVX512 3 "register_operand" "v,0"))))]
4192 "TARGET_AVX512F && <sd_mask_mode512bit_condition>"
4193 "@
4194 vfmsub132<ssemodesuffix>\t{%1<avx512bcst>, %3, %0<sd_mask_op4>|%0<sd_mask_op4>, %3, %1<avx512bcst>}
4195 vfmsub231<ssemodesuffix>\t{%1<avx512bcst>, %2, %0<sd_mask_op4>|%0<sd_mask_op4>, %2, %1<avx512bcst>}"
4196 [(set_attr "type" "ssemuladd")
4197 (set_attr "mode" "<MODE>")])
4198
4199 (define_insn "*<sd_mask_codefor>fma_fmsub_<mode><sd_maskz_name>_bcst_3"
4200 [(set (match_operand:VF_AVX512 0 "register_operand" "=v,v")
4201 (fma:VF_AVX512
4202 (match_operand:VF_AVX512 1 "register_operand" "0,v")
4203 (vec_duplicate:VF_AVX512
4204 (match_operand:<ssescalarmode> 2 "memory_operand" "m,m"))
4205 (neg:VF_AVX512
4206 (match_operand:VF_AVX512 3 "nonimmediate_operand" "v,0"))))]
4207 "TARGET_AVX512F && <sd_mask_mode512bit_condition>"
4208 "@
4209 vfmsub132<ssemodesuffix>\t{%2<avx512bcst>, %3, %0<sd_mask_op4>|%0<sd_mask_op4>, %3, %2<avx512bcst>}
4210 vfmsub231<ssemodesuffix>\t{%2<avx512bcst>, %1, %0<sd_mask_op4>|%0<sd_mask_op4>, %1, %2<avx512bcst>}"
4211 [(set_attr "type" "ssemuladd")
4212 (set_attr "mode" "<MODE>")])
4213
4214 (define_insn "<avx512>_fmsub_<mode>_mask<round_name>"
4215 [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v,v")
4216 (vec_merge:VF_AVX512VL
4217 (fma:VF_AVX512VL
4218 (match_operand:VF_AVX512VL 1 "register_operand" "0,0")
4219 (match_operand:VF_AVX512VL 2 "<round_nimm_predicate>" "<round_constraint>,v")
4220 (neg:VF_AVX512VL
4221 (match_operand:VF_AVX512VL 3 "<round_nimm_predicate>" "v,<round_constraint>")))
4222 (match_dup 1)
4223 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk,Yk")))]
4224 "TARGET_AVX512F"
4225 "@
4226 vfmsub132<ssemodesuffix>\t{<round_op5>%2, %3, %0%{%4%}|%0%{%4%}, %3, %2<round_op5>}
4227 vfmsub213<ssemodesuffix>\t{<round_op5>%3, %2, %0%{%4%}|%0%{%4%}, %2, %3<round_op5>}"
4228 [(set_attr "type" "ssemuladd")
4229 (set_attr "mode" "<MODE>")])
4230
4231 (define_insn "<avx512>_fmsub_<mode>_mask3<round_name>"
4232 [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v")
4233 (vec_merge:VF_AVX512VL
4234 (fma:VF_AVX512VL
4235 (match_operand:VF_AVX512VL 1 "<round_nimm_predicate>" "%v")
4236 (match_operand:VF_AVX512VL 2 "<round_nimm_predicate>" "<round_constraint>")
4237 (neg:VF_AVX512VL
4238 (match_operand:VF_AVX512VL 3 "register_operand" "0")))
4239 (match_dup 3)
4240 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")))]
4241 "TARGET_AVX512F && <round_mode512bit_condition>"
4242 "vfmsub231<ssemodesuffix>\t{<round_op5>%2, %1, %0%{%4%}|%0%{%4%}, %1, %2<round_op5>}"
4243 [(set_attr "type" "ssemuladd")
4244 (set_attr "mode" "<MODE>")])
4245
4246 (define_insn "*fma_fnmadd_<mode>"
4247 [(set (match_operand:FMAMODE 0 "register_operand" "=v,v,v,x,x")
4248 (fma:FMAMODE
4249 (neg:FMAMODE
4250 (match_operand:FMAMODE 1 "nonimmediate_operand" "%0,0,v,x,x"))
4251 (match_operand:FMAMODE 2 "nonimmediate_operand" "vm,v,vm,x,m")
4252 (match_operand:FMAMODE 3 "nonimmediate_operand" "v,vm,0,xm,x")))]
4253 "TARGET_FMA || TARGET_FMA4"
4254 "@
4255 vfnmadd132<ssemodesuffix>\t{%2, %3, %0|%0, %3, %2}
4256 vfnmadd213<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
4257 vfnmadd231<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}
4258 vfnmadd<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}
4259 vfnmadd<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
4260 [(set_attr "isa" "fma,fma,fma,fma4,fma4")
4261 (set_attr "type" "ssemuladd")
4262 (set_attr "mode" "<MODE>")])
4263
4264 (define_expand "<avx512>_fnmadd_<mode>_maskz<round_expand_name>"
4265 [(match_operand:VF_AVX512VL 0 "register_operand")
4266 (match_operand:VF_AVX512VL 1 "<round_expand_nimm_predicate>")
4267 (match_operand:VF_AVX512VL 2 "<round_expand_nimm_predicate>")
4268 (match_operand:VF_AVX512VL 3 "<round_expand_nimm_predicate>")
4269 (match_operand:<avx512fmaskmode> 4 "register_operand")]
4270 "TARGET_AVX512F && <round_mode512bit_condition>"
4271 {
4272 emit_insn (gen_fma_fnmadd_<mode>_maskz_1<round_expand_name> (
4273 operands[0], operands[1], operands[2], operands[3],
4274 CONST0_RTX (<MODE>mode), operands[4]<round_expand_operand>));
4275 DONE;
4276 })
4277
4278 (define_insn "<sd_mask_codefor>fma_fnmadd_<mode><sd_maskz_name><round_name>"
4279 [(set (match_operand:VF_SF_AVX512VL 0 "register_operand" "=v,v,v")
4280 (fma:VF_SF_AVX512VL
4281 (neg:VF_SF_AVX512VL
4282 (match_operand:VF_SF_AVX512VL 1 "<round_nimm_predicate>" "%0,0,v"))
4283 (match_operand:VF_SF_AVX512VL 2 "<round_nimm_predicate>" "<round_constraint>,v,<round_constraint>")
4284 (match_operand:VF_SF_AVX512VL 3 "<round_nimm_predicate>" "v,<round_constraint>,0")))]
4285 "TARGET_AVX512F && <sd_mask_mode512bit_condition> && <round_mode512bit_condition>"
4286 "@
4287 vfnmadd132<ssemodesuffix>\t{<round_sd_mask_op4>%2, %3, %0<sd_mask_op4>|%0<sd_mask_op4>, %3, %2<round_sd_mask_op4>}
4288 vfnmadd213<ssemodesuffix>\t{<round_sd_mask_op4>%3, %2, %0<sd_mask_op4>|%0<sd_mask_op4>, %2, %3<round_sd_mask_op4>}
4289 vfnmadd231<ssemodesuffix>\t{<round_sd_mask_op4>%2, %1, %0<sd_mask_op4>|%0<sd_mask_op4>, %1, %2<round_sd_mask_op4>}"
4290 [(set_attr "type" "ssemuladd")
4291 (set_attr "mode" "<MODE>")])
4292
4293 (define_insn "*<sd_mask_codefor>fma_fnmadd_<mode><sd_maskz_name>_bcst_1"
4294 [(set (match_operand:VF_AVX512 0 "register_operand" "=v")
4295 (fma:VF_AVX512
4296 (neg:VF_AVX512
4297 (match_operand:VF_AVX512 1 "register_operand" "%0"))
4298 (match_operand:VF_AVX512 2 "register_operand" "v")
4299 (vec_duplicate:VF_AVX512
4300 (match_operand:<ssescalarmode> 3 "memory_operand" "m"))))]
4301 "TARGET_AVX512F && <sd_mask_mode512bit_condition>"
4302 "vfnmadd213<ssemodesuffix>\t{%3<avx512bcst>, %2, %0<sd_mask_op4>|%0<sd_mask_op4>, %2, %3<avx512bcst>}"
4303 [(set_attr "type" "ssemuladd")
4304 (set_attr "mode" "<MODE>")])
4305
4306 (define_insn "*<sd_mask_codefor>fma_fnmadd_<mode><sd_maskz_name>_bcst_2"
4307 [(set (match_operand:VF_AVX512 0 "register_operand" "=v,v")
4308 (fma:VF_AVX512
4309 (neg:VF_AVX512
4310 (vec_duplicate:VF_AVX512
4311 (match_operand:<ssescalarmode> 1 "memory_operand" "m,m")))
4312 (match_operand:VF_AVX512 2 "register_operand" "0,v")
4313 (match_operand:VF_AVX512 3 "register_operand" "v,0")))]
4314 "TARGET_AVX512F && <sd_mask_mode512bit_condition>"
4315 "@
4316 vfnmadd132<ssemodesuffix>\t{%1<avx512bcst>, %3, %0<sd_mask_op4>|%0<sd_mask_op4>, %3, %1<avx512bcst>}
4317 vfnmadd231<ssemodesuffix>\t{%1<avx512bcst>, %2, %0<sd_mask_op4>|%0<sd_mask_op4>, %2, %1<avx512bcst>}"
4318 [(set_attr "type" "ssemuladd")
4319 (set_attr "mode" "<MODE>")])
4320
4321 (define_insn "*<sd_mask_codefor>fma_fnmadd_<mode><sd_maskz_name>_bcst_3"
4322 [(set (match_operand:VF_AVX512 0 "register_operand" "=v,v")
4323 (fma:VF_AVX512
4324 (neg:VF_AVX512
4325 (match_operand:VF_AVX512 1 "register_operand" "0,v"))
4326 (vec_duplicate:VF_AVX512
4327 (match_operand:<ssescalarmode> 2 "memory_operand" "m,m"))
4328 (match_operand:VF_AVX512 3 "register_operand" "v,0")))]
4329 "TARGET_AVX512F && <sd_mask_mode512bit_condition>"
4330 "@
4331 vfnmadd132<ssemodesuffix>\t{%2<avx512bcst>, %3, %0<sd_mask_op4>|%0<sd_mask_op4>, %3, %2<avx512bcst>}
4332 vfnmadd231<ssemodesuffix>\t{%2<avx512bcst>, %1, %0<sd_mask_op4>|%0<sd_mask_op4>, %1, %2<avx512bcst>}"
4333 [(set_attr "type" "ssemuladd")
4334 (set_attr "mode" "<MODE>")])
4335
4336 (define_insn "<avx512>_fnmadd_<mode>_mask<round_name>"
4337 [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v,v")
4338 (vec_merge:VF_AVX512VL
4339 (fma:VF_AVX512VL
4340 (neg:VF_AVX512VL
4341 (match_operand:VF_AVX512VL 1 "register_operand" "0,0"))
4342 (match_operand:VF_AVX512VL 2 "<round_nimm_predicate>" "<round_constraint>,v")
4343 (match_operand:VF_AVX512VL 3 "<round_nimm_predicate>" "v,<round_constraint>"))
4344 (match_dup 1)
4345 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk,Yk")))]
4346 "TARGET_AVX512F && <round_mode512bit_condition>"
4347 "@
4348 vfnmadd132<ssemodesuffix>\t{<round_op5>%2, %3, %0%{%4%}|%0%{%4%}, %3, %2<round_op5>}
4349 vfnmadd213<ssemodesuffix>\t{<round_op5>%3, %2, %0%{%4%}|%0%{%4%}, %2, %3<round_op5>}"
4350 [(set_attr "type" "ssemuladd")
4351 (set_attr "mode" "<MODE>")])
4352
4353 (define_insn "<avx512>_fnmadd_<mode>_mask3<round_name>"
4354 [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v")
4355 (vec_merge:VF_AVX512VL
4356 (fma:VF_AVX512VL
4357 (neg:VF_AVX512VL
4358 (match_operand:VF_AVX512VL 1 "<round_nimm_predicate>" "%v"))
4359 (match_operand:VF_AVX512VL 2 "<round_nimm_predicate>" "<round_constraint>")
4360 (match_operand:VF_AVX512VL 3 "register_operand" "0"))
4361 (match_dup 3)
4362 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")))]
4363 "TARGET_AVX512F && <round_mode512bit_condition>"
4364 "vfnmadd231<ssemodesuffix>\t{<round_op5>%2, %1, %0%{%4%}|%0%{%4%}, %1, %2<round_op5>}"
4365 [(set_attr "type" "ssemuladd")
4366 (set_attr "mode" "<MODE>")])
4367
4368 (define_insn "*fma_fnmsub_<mode>"
4369 [(set (match_operand:FMAMODE 0 "register_operand" "=v,v,v,x,x")
4370 (fma:FMAMODE
4371 (neg:FMAMODE
4372 (match_operand:FMAMODE 1 "nonimmediate_operand" "%0,0,v,x,x"))
4373 (match_operand:FMAMODE 2 "nonimmediate_operand" "vm,v,vm,x,m")
4374 (neg:FMAMODE
4375 (match_operand:FMAMODE 3 "nonimmediate_operand" "v,vm,0,xm,x"))))]
4376 "TARGET_FMA || TARGET_FMA4"
4377 "@
4378 vfnmsub132<ssemodesuffix>\t{<round_sd_mask_op4>%2, %3, %0<sd_mask_op4>|%0<sd_mask_op4>, %3, %2<round_sd_mask_op4>}
4379 vfnmsub213<ssemodesuffix>\t{<round_sd_mask_op4>%3, %2, %0<sd_mask_op4>|%0<sd_mask_op4>, %2, %3<round_sd_mask_op4>}
4380 vfnmsub231<ssemodesuffix>\t{<round_sd_mask_op4>%2, %1, %0<sd_mask_op4>|%0<sd_mask_op4>, %1, %2<round_sd_mask_op4>}
4381 vfnmsub<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}
4382 vfnmsub<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
4383 [(set_attr "isa" "fma,fma,fma,fma4,fma4")
4384 (set_attr "type" "ssemuladd")
4385 (set_attr "mode" "<MODE>")])
4386
4387 (define_expand "<avx512>_fnmsub_<mode>_maskz<round_expand_name>"
4388 [(match_operand:VF_AVX512VL 0 "register_operand")
4389 (match_operand:VF_AVX512VL 1 "<round_expand_nimm_predicate>")
4390 (match_operand:VF_AVX512VL 2 "<round_expand_nimm_predicate>")
4391 (match_operand:VF_AVX512VL 3 "<round_expand_nimm_predicate>")
4392 (match_operand:<avx512fmaskmode> 4 "register_operand")]
4393 "TARGET_AVX512F && <round_mode512bit_condition>"
4394 {
4395 emit_insn (gen_fma_fnmsub_<mode>_maskz_1<round_expand_name> (
4396 operands[0], operands[1], operands[2], operands[3],
4397 CONST0_RTX (<MODE>mode), operands[4]<round_expand_operand>));
4398 DONE;
4399 })
4400
4401 (define_insn "<sd_mask_codefor>fma_fnmsub_<mode><sd_maskz_name><round_name>"
4402 [(set (match_operand:VF_SF_AVX512VL 0 "register_operand" "=v,v,v")
4403 (fma:VF_SF_AVX512VL
4404 (neg:VF_SF_AVX512VL
4405 (match_operand:VF_SF_AVX512VL 1 "<round_nimm_predicate>" "%0,0,v"))
4406 (match_operand:VF_SF_AVX512VL 2 "<round_nimm_predicate>" "<round_constraint>,v,<round_constraint>")
4407 (neg:VF_SF_AVX512VL
4408 (match_operand:VF_SF_AVX512VL 3 "<round_nimm_predicate>" "v,<round_constraint>,0"))))]
4409 "TARGET_AVX512F && <sd_mask_mode512bit_condition> && <round_mode512bit_condition>"
4410 "@
4411 vfnmsub132<ssemodesuffix>\t{<round_sd_mask_op4>%2, %3, %0<sd_mask_op4>|%0<sd_mask_op4>, %3, %2<round_sd_mask_op4>}
4412 vfnmsub213<ssemodesuffix>\t{<round_sd_mask_op4>%3, %2, %0<sd_mask_op4>|%0<sd_mask_op4>, %2, %3<round_sd_mask_op4>}
4413 vfnmsub231<ssemodesuffix>\t{<round_sd_mask_op4>%2, %1, %0<sd_mask_op4>|%0<sd_mask_op4>, %1, %2<round_sd_mask_op4>}"
4414 [(set_attr "type" "ssemuladd")
4415 (set_attr "mode" "<MODE>")])
4416
4417 (define_insn "*<sd_mask_codefor>fma_fnmsub_<mode><sd_maskz_name>_bcst_1"
4418 [(set (match_operand:VF_AVX512 0 "register_operand" "=v")
4419 (fma:VF_AVX512
4420 (neg:VF_AVX512
4421 (match_operand:VF_AVX512 1 "register_operand" "%0"))
4422 (match_operand:VF_AVX512 2 "register_operand" "v")
4423 (neg:VF_AVX512
4424 (vec_duplicate:VF_AVX512
4425 (match_operand:<ssescalarmode> 3 "memory_operand" "m")))))]
4426 "TARGET_AVX512F && <sd_mask_mode512bit_condition>"
4427 "vfnmsub213<ssemodesuffix>\t{%3<avx512bcst>, %2, %0<sd_mask_op4>|%0<sd_mask_op4>, %2, %3<avx512bcst>}"
4428 [(set_attr "type" "ssemuladd")
4429 (set_attr "mode" "<MODE>")])
4430
4431 (define_insn "*<sd_mask_codefor>fma_fnmsub_<mode><sd_maskz_name>_bcst_2"
4432 [(set (match_operand:VF_AVX512 0 "register_operand" "=v,v")
4433 (fma:VF_AVX512
4434 (neg:VF_AVX512
4435 (vec_duplicate:VF_AVX512
4436 (match_operand:<ssescalarmode> 1 "memory_operand" "m,m")))
4437 (match_operand:VF_AVX512 2 "register_operand" "0,v")
4438 (neg:VF_AVX512
4439 (match_operand:VF_AVX512 3 "register_operand" "v,0"))))]
4440 "TARGET_AVX512F && <sd_mask_mode512bit_condition>"
4441 "@
4442 vfnmsub132<ssemodesuffix>\t{%1<avx512bcst>, %3, %0<sd_mask_op4>|%0<sd_mask_op4>, %3, %1<avx512bcst>}
4443 vfnmsub231<ssemodesuffix>\t{%1<avx512bcst>, %2, %0<sd_mask_op4>|%0<sd_mask_op4>, %2, %1<avx512bcst>}"
4444 [(set_attr "type" "ssemuladd")
4445 (set_attr "mode" "<MODE>")])
4446
4447 (define_insn "*<sd_mask_codefor>fma_fnmsub_<mode><sd_maskz_name>_bcst_3"
4448 [(set (match_operand:VF_AVX512 0 "register_operand" "=v,v")
4449 (fma:VF_AVX512
4450 (neg:VF_AVX512
4451 (match_operand:VF_AVX512 1 "register_operand" "0,v"))
4452 (vec_duplicate:VF_AVX512
4453 (match_operand:<ssescalarmode> 2 "memory_operand" "m,m"))
4454 (neg:VF_AVX512
4455 (match_operand:VF_AVX512 3 "register_operand" "v,0"))))]
4456 "TARGET_AVX512F && <sd_mask_mode512bit_condition>"
4457 "@
4458 vfnmsub132<ssemodesuffix>\t{%2<avx512bcst>, %3, %0<sd_mask_op4>|%0<sd_mask_op4>, %3, %2<avx512bcst>}
4459 vfnmsub231<ssemodesuffix>\t{%2<avx512bcst>, %1, %0<sd_mask_op4>|%0<sd_mask_op4>, %1, %2<avx512bcst>}"
4460 [(set_attr "type" "ssemuladd")
4461 (set_attr "mode" "<MODE>")])
4462
4463 (define_insn "<avx512>_fnmsub_<mode>_mask<round_name>"
4464 [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v,v")
4465 (vec_merge:VF_AVX512VL
4466 (fma:VF_AVX512VL
4467 (neg:VF_AVX512VL
4468 (match_operand:VF_AVX512VL 1 "register_operand" "0,0"))
4469 (match_operand:VF_AVX512VL 2 "<round_nimm_predicate>" "<round_constraint>,v")
4470 (neg:VF_AVX512VL
4471 (match_operand:VF_AVX512VL 3 "<round_nimm_predicate>" "v,<round_constraint>")))
4472 (match_dup 1)
4473 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk,Yk")))]
4474 "TARGET_AVX512F && <round_mode512bit_condition>"
4475 "@
4476 vfnmsub132<ssemodesuffix>\t{<round_op5>%2, %3, %0%{%4%}|%0%{%4%}, %3, %2<round_op5>}
4477 vfnmsub213<ssemodesuffix>\t{<round_op5>%3, %2, %0%{%4%}|%0%{%4%}, %2, %3<round_op5>}"
4478 [(set_attr "type" "ssemuladd")
4479 (set_attr "mode" "<MODE>")])
4480
4481 (define_insn "<avx512>_fnmsub_<mode>_mask3<round_name>"
4482 [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v")
4483 (vec_merge:VF_AVX512VL
4484 (fma:VF_AVX512VL
4485 (neg:VF_AVX512VL
4486 (match_operand:VF_AVX512VL 1 "<round_nimm_predicate>" "%v"))
4487 (match_operand:VF_AVX512VL 2 "<round_nimm_predicate>" "<round_constraint>")
4488 (neg:VF_AVX512VL
4489 (match_operand:VF_AVX512VL 3 "register_operand" "0")))
4490 (match_dup 3)
4491 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")))]
4492 "TARGET_AVX512F"
4493 "vfnmsub231<ssemodesuffix>\t{<round_op5>%2, %1, %0%{%4%}|%0%{%4%}, %1, %2<round_op5>}"
4494 [(set_attr "type" "ssemuladd")
4495 (set_attr "mode" "<MODE>")])
4496
4497 ;; FMA parallel floating point multiply addsub and subadd operations.
4498
4499 ;; It would be possible to represent these without the UNSPEC as
4500 ;;
4501 ;; (vec_merge
4502 ;; (fma op1 op2 op3)
4503 ;; (fma op1 op2 (neg op3))
4504 ;; (merge-const))
4505 ;;
4506 ;; But this doesn't seem useful in practice.
4507
4508 (define_expand "fmaddsub_<mode>"
4509 [(set (match_operand:VF 0 "register_operand")
4510 (unspec:VF
4511 [(match_operand:VF 1 "nonimmediate_operand")
4512 (match_operand:VF 2 "nonimmediate_operand")
4513 (match_operand:VF 3 "nonimmediate_operand")]
4514 UNSPEC_FMADDSUB))]
4515 "TARGET_FMA || TARGET_FMA4 || TARGET_AVX512F")
4516
4517 (define_expand "<avx512>_fmaddsub_<mode>_maskz<round_expand_name>"
4518 [(match_operand:VF_AVX512VL 0 "register_operand")
4519 (match_operand:VF_AVX512VL 1 "<round_expand_nimm_predicate>")
4520 (match_operand:VF_AVX512VL 2 "<round_expand_nimm_predicate>")
4521 (match_operand:VF_AVX512VL 3 "<round_expand_nimm_predicate>")
4522 (match_operand:<avx512fmaskmode> 4 "register_operand")]
4523 "TARGET_AVX512F"
4524 {
4525 emit_insn (gen_fma_fmaddsub_<mode>_maskz_1<round_expand_name> (
4526 operands[0], operands[1], operands[2], operands[3],
4527 CONST0_RTX (<MODE>mode), operands[4]<round_expand_operand>));
4528 DONE;
4529 })
4530
4531 (define_insn "*fma_fmaddsub_<mode>"
4532 [(set (match_operand:VF_128_256 0 "register_operand" "=v,v,v,x,x")
4533 (unspec:VF_128_256
4534 [(match_operand:VF_128_256 1 "nonimmediate_operand" "%0,0,v,x,x")
4535 (match_operand:VF_128_256 2 "nonimmediate_operand" "vm,v,vm,x,m")
4536 (match_operand:VF_128_256 3 "nonimmediate_operand" "v,vm,0,xm,x")]
4537 UNSPEC_FMADDSUB))]
4538 "TARGET_FMA || TARGET_FMA4"
4539 "@
4540 vfmaddsub132<ssemodesuffix>\t{%2, %3, %0|%0, %3, %2}
4541 vfmaddsub213<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
4542 vfmaddsub231<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}
4543 vfmaddsub<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}
4544 vfmaddsub<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
4545 [(set_attr "isa" "fma,fma,fma,fma4,fma4")
4546 (set_attr "type" "ssemuladd")
4547 (set_attr "mode" "<MODE>")])
4548
4549 (define_insn "<sd_mask_codefor>fma_fmaddsub_<mode><sd_maskz_name><round_name>"
4550 [(set (match_operand:VF_SF_AVX512VL 0 "register_operand" "=v,v,v")
4551 (unspec:VF_SF_AVX512VL
4552 [(match_operand:VF_SF_AVX512VL 1 "<round_nimm_predicate>" "%0,0,v")
4553 (match_operand:VF_SF_AVX512VL 2 "<round_nimm_predicate>" "<round_constraint>,v,<round_constraint>")
4554 (match_operand:VF_SF_AVX512VL 3 "<round_nimm_predicate>" "v,<round_constraint>,0")]
4555 UNSPEC_FMADDSUB))]
4556 "TARGET_AVX512F && <sd_mask_mode512bit_condition> && <round_mode512bit_condition>"
4557 "@
4558 vfmaddsub132<ssemodesuffix>\t{<round_sd_mask_op4>%2, %3, %0<sd_mask_op4>|%0<sd_mask_op4>, %3, %2<round_sd_mask_op4>}
4559 vfmaddsub213<ssemodesuffix>\t{<round_sd_mask_op4>%3, %2, %0<sd_mask_op4>|%0<sd_mask_op4>, %2, %3<round_sd_mask_op4>}
4560 vfmaddsub231<ssemodesuffix>\t{<round_sd_mask_op4>%2, %1, %0<sd_mask_op4>|%0<sd_mask_op4>, %1, %2<round_sd_mask_op4>}"
4561 [(set_attr "type" "ssemuladd")
4562 (set_attr "mode" "<MODE>")])
4563
4564 (define_insn "<avx512>_fmaddsub_<mode>_mask<round_name>"
4565 [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v,v")
4566 (vec_merge:VF_AVX512VL
4567 (unspec:VF_AVX512VL
4568 [(match_operand:VF_AVX512VL 1 "register_operand" "0,0")
4569 (match_operand:VF_AVX512VL 2 "<round_nimm_predicate>" "<round_constraint>,v")
4570 (match_operand:VF_AVX512VL 3 "<round_nimm_predicate>" "v,<round_constraint>")]
4571 UNSPEC_FMADDSUB)
4572 (match_dup 1)
4573 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk,Yk")))]
4574 "TARGET_AVX512F"
4575 "@
4576 vfmaddsub132<ssemodesuffix>\t{<round_op5>%2, %3, %0%{%4%}|%0%{%4%}, %3, %2<round_op5>}
4577 vfmaddsub213<ssemodesuffix>\t{<round_op5>%3, %2, %0%{%4%}|%0%{%4%}, %2, %3<round_op5>}"
4578 [(set_attr "type" "ssemuladd")
4579 (set_attr "mode" "<MODE>")])
4580
4581 (define_insn "<avx512>_fmaddsub_<mode>_mask3<round_name>"
4582 [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v")
4583 (vec_merge:VF_AVX512VL
4584 (unspec:VF_AVX512VL
4585 [(match_operand:VF_AVX512VL 1 "register_operand" "v")
4586 (match_operand:VF_AVX512VL 2 "<round_nimm_predicate>" "<round_constraint>")
4587 (match_operand:VF_AVX512VL 3 "register_operand" "0")]
4588 UNSPEC_FMADDSUB)
4589 (match_dup 3)
4590 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")))]
4591 "TARGET_AVX512F"
4592 "vfmaddsub231<ssemodesuffix>\t{<round_op5>%2, %1, %0%{%4%}|%0%{%4%}, %1, %2<round_op5>}"
4593 [(set_attr "type" "ssemuladd")
4594 (set_attr "mode" "<MODE>")])
4595
4596 (define_insn "*fma_fmsubadd_<mode>"
4597 [(set (match_operand:VF_128_256 0 "register_operand" "=v,v,v,x,x")
4598 (unspec:VF_128_256
4599 [(match_operand:VF_128_256 1 "nonimmediate_operand" "%0,0,v,x,x")
4600 (match_operand:VF_128_256 2 "nonimmediate_operand" "vm,v,vm,x,m")
4601 (neg:VF_128_256
4602 (match_operand:VF_128_256 3 "nonimmediate_operand" "v,vm,0,xm,x"))]
4603 UNSPEC_FMADDSUB))]
4604 "TARGET_FMA || TARGET_FMA4"
4605 "@
4606 vfmsubadd132<ssemodesuffix>\t{%2, %3, %0|%0, %3, %2}
4607 vfmsubadd213<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
4608 vfmsubadd231<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}
4609 vfmsubadd<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}
4610 vfmsubadd<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
4611 [(set_attr "isa" "fma,fma,fma,fma4,fma4")
4612 (set_attr "type" "ssemuladd")
4613 (set_attr "mode" "<MODE>")])
4614
4615 (define_insn "<sd_mask_codefor>fma_fmsubadd_<mode><sd_maskz_name><round_name>"
4616 [(set (match_operand:VF_SF_AVX512VL 0 "register_operand" "=v,v,v")
4617 (unspec:VF_SF_AVX512VL
4618 [(match_operand:VF_SF_AVX512VL 1 "<round_nimm_predicate>" "%0,0,v")
4619 (match_operand:VF_SF_AVX512VL 2 "<round_nimm_predicate>" "<round_constraint>,v,<round_constraint>")
4620 (neg:VF_SF_AVX512VL
4621 (match_operand:VF_SF_AVX512VL 3 "<round_nimm_predicate>" "v,<round_constraint>,0"))]
4622 UNSPEC_FMADDSUB))]
4623 "TARGET_AVX512F && <sd_mask_mode512bit_condition> && <round_mode512bit_condition>"
4624 "@
4625 vfmsubadd132<ssemodesuffix>\t{<round_sd_mask_op4>%2, %3, %0<sd_mask_op4>|%0<sd_mask_op4>, %3, %2<round_sd_mask_op4>}
4626 vfmsubadd213<ssemodesuffix>\t{<round_sd_mask_op4>%3, %2, %0<sd_mask_op4>|%0<sd_mask_op4>, %2, %3<round_sd_mask_op4>}
4627 vfmsubadd231<ssemodesuffix>\t{<round_sd_mask_op4>%2, %1, %0<sd_mask_op4>|%0<sd_mask_op4>, %1, %2<round_sd_mask_op4>}"
4628 [(set_attr "type" "ssemuladd")
4629 (set_attr "mode" "<MODE>")])
4630
4631 (define_insn "<avx512>_fmsubadd_<mode>_mask<round_name>"
4632 [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v,v")
4633 (vec_merge:VF_AVX512VL
4634 (unspec:VF_AVX512VL
4635 [(match_operand:VF_AVX512VL 1 "register_operand" "0,0")
4636 (match_operand:VF_AVX512VL 2 "<round_nimm_predicate>" "<round_constraint>,v")
4637 (neg:VF_AVX512VL
4638 (match_operand:VF_AVX512VL 3 "<round_nimm_predicate>" "v,<round_constraint>"))]
4639 UNSPEC_FMADDSUB)
4640 (match_dup 1)
4641 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk,Yk")))]
4642 "TARGET_AVX512F"
4643 "@
4644 vfmsubadd132<ssemodesuffix>\t{<round_op5>%2, %3, %0%{%4%}|%0%{%4%}, %3, %2<round_op5>}
4645 vfmsubadd213<ssemodesuffix>\t{<round_op5>%3, %2, %0%{%4%}|%0%{%4%}, %2, %3<round_op5>}"
4646 [(set_attr "type" "ssemuladd")
4647 (set_attr "mode" "<MODE>")])
4648
4649 (define_insn "<avx512>_fmsubadd_<mode>_mask3<round_name>"
4650 [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v")
4651 (vec_merge:VF_AVX512VL
4652 (unspec:VF_AVX512VL
4653 [(match_operand:VF_AVX512VL 1 "register_operand" "v")
4654 (match_operand:VF_AVX512VL 2 "<round_nimm_predicate>" "<round_constraint>")
4655 (neg:VF_AVX512VL
4656 (match_operand:VF_AVX512VL 3 "register_operand" "0"))]
4657 UNSPEC_FMADDSUB)
4658 (match_dup 3)
4659 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")))]
4660 "TARGET_AVX512F"
4661 "vfmsubadd231<ssemodesuffix>\t{<round_op5>%2, %1, %0%{%4%}|%0%{%4%}, %1, %2<round_op5>}"
4662 [(set_attr "type" "ssemuladd")
4663 (set_attr "mode" "<MODE>")])
4664
4665 ;; FMA3 floating point scalar intrinsics. These merge result with
4666 ;; high-order elements from the destination register.
4667
4668 (define_expand "fmai_vmfmadd_<mode><round_name>"
4669 [(set (match_operand:VF_128 0 "register_operand")
4670 (vec_merge:VF_128
4671 (fma:VF_128
4672 (match_operand:VF_128 1 "register_operand")
4673 (match_operand:VF_128 2 "<round_nimm_scalar_predicate>")
4674 (match_operand:VF_128 3 "<round_nimm_scalar_predicate>"))
4675 (match_dup 1)
4676 (const_int 1)))]
4677 "TARGET_FMA")
4678
4679 (define_expand "fmai_vmfmsub_<mode><round_name>"
4680 [(set (match_operand:VF_128 0 "register_operand")
4681 (vec_merge:VF_128
4682 (fma:VF_128
4683 (match_operand:VF_128 1 "register_operand")
4684 (match_operand:VF_128 2 "<round_nimm_scalar_predicate>")
4685 (neg:VF_128
4686 (match_operand:VF_128 3 "<round_nimm_scalar_predicate>")))
4687 (match_dup 1)
4688 (const_int 1)))]
4689 "TARGET_FMA")
4690
4691 (define_expand "fmai_vmfnmadd_<mode><round_name>"
4692 [(set (match_operand:VF_128 0 "register_operand")
4693 (vec_merge:VF_128
4694 (fma:VF_128
4695 (neg:VF_128
4696 (match_operand:VF_128 2 "<round_nimm_scalar_predicate>"))
4697 (match_operand:VF_128 1 "register_operand")
4698 (match_operand:VF_128 3 "<round_nimm_scalar_predicate>"))
4699 (match_dup 1)
4700 (const_int 1)))]
4701 "TARGET_FMA")
4702
4703 (define_expand "fmai_vmfnmsub_<mode><round_name>"
4704 [(set (match_operand:VF_128 0 "register_operand")
4705 (vec_merge:VF_128
4706 (fma:VF_128
4707 (neg:VF_128
4708 (match_operand:VF_128 2 "<round_nimm_scalar_predicate>"))
4709 (match_operand:VF_128 1 "register_operand")
4710 (neg:VF_128
4711 (match_operand:VF_128 3 "<round_nimm_scalar_predicate>")))
4712 (match_dup 1)
4713 (const_int 1)))]
4714 "TARGET_FMA")
4715
4716 (define_insn "*fmai_fmadd_<mode>"
4717 [(set (match_operand:VF_128 0 "register_operand" "=v,v")
4718 (vec_merge:VF_128
4719 (fma:VF_128
4720 (match_operand:VF_128 1 "register_operand" "0,0")
4721 (match_operand:VF_128 2 "<round_nimm_scalar_predicate>" "<round_constraint>, v")
4722 (match_operand:VF_128 3 "<round_nimm_scalar_predicate>" "v,<round_constraint>"))
4723 (match_dup 1)
4724 (const_int 1)))]
4725 "TARGET_FMA || TARGET_AVX512F"
4726 "@
4727 vfmadd132<ssescalarmodesuffix>\t{<round_op4>%2, %3, %0|%0, %<iptr>3, %<iptr>2<round_op4>}
4728 vfmadd213<ssescalarmodesuffix>\t{<round_op4>%3, %2, %0|%0, %<iptr>2, %<iptr>3<round_op4>}"
4729 [(set_attr "type" "ssemuladd")
4730 (set_attr "mode" "<MODE>")])
4731
4732 (define_insn "*fmai_fmsub_<mode>"
4733 [(set (match_operand:VF_128 0 "register_operand" "=v,v")
4734 (vec_merge:VF_128
4735 (fma:VF_128
4736 (match_operand:VF_128 1 "register_operand" "0,0")
4737 (match_operand:VF_128 2 "<round_nimm_scalar_predicate>" "<round_constraint>,v")
4738 (neg:VF_128
4739 (match_operand:VF_128 3 "<round_nimm_scalar_predicate>" "v,<round_constraint>")))
4740 (match_dup 1)
4741 (const_int 1)))]
4742 "TARGET_FMA || TARGET_AVX512F"
4743 "@
4744 vfmsub132<ssescalarmodesuffix>\t{<round_op4>%2, %3, %0|%0, %<iptr>3, %<iptr>2<round_op4>}
4745 vfmsub213<ssescalarmodesuffix>\t{<round_op4>%3, %2, %0|%0, %<iptr>2, %<iptr>3<round_op4>}"
4746 [(set_attr "type" "ssemuladd")
4747 (set_attr "mode" "<MODE>")])
4748
4749 (define_insn "*fmai_fnmadd_<mode><round_name>"
4750 [(set (match_operand:VF_128 0 "register_operand" "=v,v")
4751 (vec_merge:VF_128
4752 (fma:VF_128
4753 (neg:VF_128
4754 (match_operand:VF_128 2 "<round_nimm_scalar_predicate>" "<round_constraint>,v"))
4755 (match_operand:VF_128 1 "register_operand" "0,0")
4756 (match_operand:VF_128 3 "<round_nimm_scalar_predicate>" "v,<round_constraint>"))
4757 (match_dup 1)
4758 (const_int 1)))]
4759 "TARGET_FMA || TARGET_AVX512F"
4760 "@
4761 vfnmadd132<ssescalarmodesuffix>\t{<round_op4>%2, %3, %0|%0, %<iptr>3, %<iptr>2<round_op4>}
4762 vfnmadd213<ssescalarmodesuffix>\t{<round_op4>%3, %2, %0|%0, %<iptr>2, %<iptr>3<round_op4>}"
4763 [(set_attr "type" "ssemuladd")
4764 (set_attr "mode" "<MODE>")])
4765
4766 (define_insn "*fmai_fnmsub_<mode><round_name>"
4767 [(set (match_operand:VF_128 0 "register_operand" "=v,v")
4768 (vec_merge:VF_128
4769 (fma:VF_128
4770 (neg:VF_128
4771 (match_operand:VF_128 2 "<round_nimm_scalar_predicate>" "<round_constraint>,v"))
4772 (match_operand:VF_128 1 "register_operand" "0,0")
4773 (neg:VF_128
4774 (match_operand:VF_128 3 "<round_nimm_scalar_predicate>" "v,<round_constraint>")))
4775 (match_dup 1)
4776 (const_int 1)))]
4777 "TARGET_FMA || TARGET_AVX512F"
4778 "@
4779 vfnmsub132<ssescalarmodesuffix>\t{<round_op4>%2, %3, %0|%0, %<iptr>3, %<iptr>2<round_op4>}
4780 vfnmsub213<ssescalarmodesuffix>\t{<round_op4>%3, %2, %0|%0, %<iptr>2, %<iptr>3<round_op4>}"
4781 [(set_attr "type" "ssemuladd")
4782 (set_attr "mode" "<MODE>")])
4783
4784 (define_insn "avx512f_vmfmadd_<mode>_mask<round_name>"
4785 [(set (match_operand:VF_128 0 "register_operand" "=v,v")
4786 (vec_merge:VF_128
4787 (vec_merge:VF_128
4788 (fma:VF_128
4789 (match_operand:VF_128 1 "register_operand" "0,0")
4790 (match_operand:VF_128 2 "<round_nimm_scalar_predicate>" "<round_constraint>,v")
4791 (match_operand:VF_128 3 "<round_nimm_scalar_predicate>" "v,<round_constraint>"))
4792 (match_dup 1)
4793 (match_operand:QI 4 "register_operand" "Yk,Yk"))
4794 (match_dup 1)
4795 (const_int 1)))]
4796 "TARGET_AVX512F"
4797 "@
4798 vfmadd132<ssescalarmodesuffix>\t{<round_op5>%2, %3, %0%{%4%}|%0%{%4%}, %<iptr>3, %<iptr>2<round_op5>}
4799 vfmadd213<ssescalarmodesuffix>\t{<round_op5>%3, %2, %0%{%4%}|%0%{%4%}, %<iptr>2, %<iptr>3<round_op5>}"
4800 [(set_attr "type" "ssemuladd")
4801 (set_attr "mode" "<MODE>")])
4802
4803 (define_insn "avx512f_vmfmadd_<mode>_mask3<round_name>"
4804 [(set (match_operand:VF_128 0 "register_operand" "=v")
4805 (vec_merge:VF_128
4806 (vec_merge:VF_128
4807 (fma:VF_128
4808 (match_operand:VF_128 1 "<round_nimm_scalar_predicate>" "%v")
4809 (match_operand:VF_128 2 "<round_nimm_scalar_predicate>" "<round_constraint>")
4810 (match_operand:VF_128 3 "register_operand" "0"))
4811 (match_dup 3)
4812 (match_operand:QI 4 "register_operand" "Yk"))
4813 (match_dup 3)
4814 (const_int 1)))]
4815 "TARGET_AVX512F"
4816 "vfmadd231<ssescalarmodesuffix>\t{<round_op5>%2, %1, %0%{%4%}|%0%{%4%}, %<iptr>3, %<iptr>2<round_op5>}"
4817 [(set_attr "type" "ssemuladd")
4818 (set_attr "mode" "<MODE>")])
4819
4820 (define_expand "avx512f_vmfmadd_<mode>_maskz<round_expand_name>"
4821 [(match_operand:VF_128 0 "register_operand")
4822 (match_operand:VF_128 1 "<round_expand_nimm_predicate>")
4823 (match_operand:VF_128 2 "<round_expand_nimm_predicate>")
4824 (match_operand:VF_128 3 "<round_expand_nimm_predicate>")
4825 (match_operand:QI 4 "register_operand")]
4826 "TARGET_AVX512F"
4827 {
4828 emit_insn (gen_avx512f_vmfmadd_<mode>_maskz_1<round_expand_name> (
4829 operands[0], operands[1], operands[2], operands[3],
4830 CONST0_RTX (<MODE>mode), operands[4]<round_expand_operand>));
4831 DONE;
4832 })
4833
4834 (define_insn "avx512f_vmfmadd_<mode>_maskz_1<round_name>"
4835 [(set (match_operand:VF_128 0 "register_operand" "=v,v")
4836 (vec_merge:VF_128
4837 (vec_merge:VF_128
4838 (fma:VF_128
4839 (match_operand:VF_128 1 "register_operand" "0,0")
4840 (match_operand:VF_128 2 "<round_nimm_scalar_predicate>" "<round_constraint>,v")
4841 (match_operand:VF_128 3 "<round_nimm_scalar_predicate>" "v,<round_constraint>"))
4842 (match_operand:VF_128 4 "const0_operand" "C,C")
4843 (match_operand:QI 5 "register_operand" "Yk,Yk"))
4844 (match_dup 1)
4845 (const_int 1)))]
4846 "TARGET_AVX512F"
4847 "@
4848 vfmadd132<ssescalarmodesuffix>\t{<round_op6>%2, %3, %0%{%5%}%{z%}|%0%{%5%}%{z%}, %<iptr>3, %<iptr>2<round_op6>}
4849 vfmadd213<ssescalarmodesuffix>\t{<round_op6>%3, %2, %0%{%5%}%{z%}|%0%{%5%}%{z%}, %<iptr>2, %<iptr>3<round_op6>}"
4850 [(set_attr "type" "ssemuladd")
4851 (set_attr "mode" "<MODE>")])
4852
4853 (define_insn "*avx512f_vmfmsub_<mode>_mask<round_name>"
4854 [(set (match_operand:VF_128 0 "register_operand" "=v,v")
4855 (vec_merge:VF_128
4856 (vec_merge:VF_128
4857 (fma:VF_128
4858 (match_operand:VF_128 1 "register_operand" "0,0")
4859 (match_operand:VF_128 2 "<round_nimm_scalar_predicate>" "<round_constraint>,v")
4860 (neg:VF_128
4861 (match_operand:VF_128 3 "<round_nimm_scalar_predicate>" "v,<round_constraint>")))
4862 (match_dup 1)
4863 (match_operand:QI 4 "register_operand" "Yk,Yk"))
4864 (match_dup 1)
4865 (const_int 1)))]
4866 "TARGET_AVX512F"
4867 "@
4868 vfmsub132<ssescalarmodesuffix>\t{<round_op5>%2, %3, %0%{%4%}|%0%{%4%}, %<iptr>3, %<iptr>2<round_op5>}
4869 vfmsub213<ssescalarmodesuffix>\t{<round_op5>%3, %2, %0%{%4%}|%0%{%4%}, %<iptr>2, %<iptr>3<round_op5>}"
4870 [(set_attr "type" "ssemuladd")
4871 (set_attr "mode" "<MODE>")])
4872
4873 (define_insn "avx512f_vmfmsub_<mode>_mask3<round_name>"
4874 [(set (match_operand:VF_128 0 "register_operand" "=v")
4875 (vec_merge:VF_128
4876 (vec_merge:VF_128
4877 (fma:VF_128
4878 (match_operand:VF_128 1 "<round_nimm_scalar_predicate>" "%v")
4879 (match_operand:VF_128 2 "<round_nimm_scalar_predicate>" "<round_constraint>")
4880 (neg:VF_128
4881 (match_operand:VF_128 3 "register_operand" "0")))
4882 (match_dup 3)
4883 (match_operand:QI 4 "register_operand" "Yk"))
4884 (match_dup 3)
4885 (const_int 1)))]
4886 "TARGET_AVX512F"
4887 "vfmsub231<ssescalarmodesuffix>\t{<round_op5>%2, %1, %0%{%4%}|%0%{%4%}, %<iptr>3, %<iptr>2<round_op5>}"
4888 [(set_attr "type" "ssemuladd")
4889 (set_attr "mode" "<MODE>")])
4890
4891 (define_insn "*avx512f_vmfmsub_<mode>_maskz_1<round_name>"
4892 [(set (match_operand:VF_128 0 "register_operand" "=v,v")
4893 (vec_merge:VF_128
4894 (vec_merge:VF_128
4895 (fma:VF_128
4896 (match_operand:VF_128 1 "register_operand" "0,0")
4897 (match_operand:VF_128 2 "<round_nimm_scalar_predicate>" "<round_constraint>,v")
4898 (neg:VF_128
4899 (match_operand:VF_128 3 "<round_nimm_scalar_predicate>" "v,<round_constraint>")))
4900 (match_operand:VF_128 4 "const0_operand" "C,C")
4901 (match_operand:QI 5 "register_operand" "Yk,Yk"))
4902 (match_dup 1)
4903 (const_int 1)))]
4904 "TARGET_AVX512F"
4905 "@
4906 vfmsub132<ssescalarmodesuffix>\t{<round_op6>%2, %3, %0%{%5%}%{z%}|%0%{%5%}%{z%}, %<iptr>3, %<iptr>2<round_op6>}
4907 vfmsub213<ssescalarmodesuffix>\t{<round_op6>%3, %2, %0%{%5%}%{z%}|%0%{%5%}%{z%}, %<iptr>2, %<iptr>3<round_op6>}"
4908 [(set_attr "type" "ssemuladd")
4909 (set_attr "mode" "<MODE>")])
4910
4911 (define_insn "*avx512f_vmfnmadd_<mode>_mask<round_name>"
4912 [(set (match_operand:VF_128 0 "register_operand" "=v,v")
4913 (vec_merge:VF_128
4914 (vec_merge:VF_128
4915 (fma:VF_128
4916 (neg:VF_128
4917 (match_operand:VF_128 2 "<round_nimm_scalar_predicate>" "<round_constraint>,v"))
4918 (match_operand:VF_128 1 "register_operand" "0,0")
4919 (match_operand:VF_128 3 "<round_nimm_scalar_predicate>" "v,<round_constraint>"))
4920 (match_dup 1)
4921 (match_operand:QI 4 "register_operand" "Yk,Yk"))
4922 (match_dup 1)
4923 (const_int 1)))]
4924 "TARGET_AVX512F"
4925 "@
4926 vfnmadd132<ssescalarmodesuffix>\t{<round_op5>%2, %3, %0%{%4%}|%0%{%4%}, %<iptr>3, %<iptr>2<round_op5>}
4927 vfnmadd213<ssescalarmodesuffix>\t{<round_op5>%3, %2, %0%{%4%}|%0%{%4%}, %<iptr>2, %<iptr>3<round_op5>}"
4928 [(set_attr "type" "ssemuladd")
4929 (set_attr "mode" "<MODE>")])
4930
4931 (define_insn "*avx512f_vmfnmadd_<mode>_mask3<round_name>"
4932 [(set (match_operand:VF_128 0 "register_operand" "=v")
4933 (vec_merge:VF_128
4934 (vec_merge:VF_128
4935 (fma:VF_128
4936 (neg:VF_128
4937 (match_operand:VF_128 2 "<round_nimm_scalar_predicate>" "<round_constraint>"))
4938 (match_operand:VF_128 1 "<round_nimm_scalar_predicate>" "%v")
4939 (match_operand:VF_128 3 "register_operand" "0"))
4940 (match_dup 3)
4941 (match_operand:QI 4 "register_operand" "Yk"))
4942 (match_dup 3)
4943 (const_int 1)))]
4944 "TARGET_AVX512F"
4945 "vfnmadd231<ssescalarmodesuffix>\t{<round_op5>%2, %1, %0%{%4%}|%0%{%4%}, %<iptr>3, %<iptr>2<round_op5>}"
4946 [(set_attr "type" "ssemuladd")
4947 (set_attr "mode" "<MODE>")])
4948
4949 (define_insn "*avx512f_vmfnmadd_<mode>_maskz_1<round_name>"
4950 [(set (match_operand:VF_128 0 "register_operand" "=v,v")
4951 (vec_merge:VF_128
4952 (vec_merge:VF_128
4953 (fma:VF_128
4954 (neg:VF_128
4955 (match_operand:VF_128 2 "<round_nimm_scalar_predicate>" "<round_constraint>,v"))
4956 (match_operand:VF_128 1 "register_operand" "0,0")
4957 (match_operand:VF_128 3 "<round_nimm_scalar_predicate>" "v,<round_constraint>"))
4958 (match_operand:VF_128 4 "const0_operand" "C,C")
4959 (match_operand:QI 5 "register_operand" "Yk,Yk"))
4960 (match_dup 1)
4961 (const_int 1)))]
4962 "TARGET_AVX512F"
4963 "@
4964 vfnmadd132<ssescalarmodesuffix>\t{<round_op6>%2, %3, %0%{%5%}%{z%}|%0%{%5%}%{z%}, %<iptr>3, %<iptr>2<round_op6>}
4965 vfnmadd213<ssescalarmodesuffix>\t{<round_op6>%3, %2, %0%{%5%}%{z%}|%0%{%5%}%{z%}, %<iptr>2, %<iptr>3<round_op6>}"
4966 [(set_attr "type" "ssemuladd")
4967 (set_attr "mode" "<MODE>")])
4968
4969 (define_insn "*avx512f_vmfnmsub_<mode>_mask<round_name>"
4970 [(set (match_operand:VF_128 0 "register_operand" "=v,v")
4971 (vec_merge:VF_128
4972 (vec_merge:VF_128
4973 (fma:VF_128
4974 (neg:VF_128
4975 (match_operand:VF_128 2 "<round_nimm_scalar_predicate>" "<round_constraint>,v"))
4976 (match_operand:VF_128 1 "register_operand" "0,0")
4977 (neg:VF_128
4978 (match_operand:VF_128 3 "<round_nimm_scalar_predicate>" "v,<round_constraint>")))
4979 (match_dup 1)
4980 (match_operand:QI 4 "register_operand" "Yk,Yk"))
4981 (match_dup 1)
4982 (const_int 1)))]
4983 "TARGET_AVX512F"
4984 "@
4985 vfnmsub132<ssescalarmodesuffix>\t{<round_op5>%2, %3, %0%{%4%}|%0%{%4%}, %<iptr>3, %<iptr>2<round_op5>}
4986 vfnmsub213<ssescalarmodesuffix>\t{<round_op5>%3, %2, %0%{%4%}|%0%{%4%}, %<iptr>2, %<iptr>3<round_op5>}"
4987 [(set_attr "type" "ssemuladd")
4988 (set_attr "mode" "<MODE>")])
4989
4990 (define_insn "*avx512f_vmfnmsub_<mode>_mask3<round_name>"
4991 [(set (match_operand:VF_128 0 "register_operand" "=v")
4992 (vec_merge:VF_128
4993 (vec_merge:VF_128
4994 (fma:VF_128
4995 (neg:VF_128
4996 (match_operand:VF_128 2 "<round_nimm_scalar_predicate>" "<round_constraint>"))
4997 (match_operand:VF_128 1 "<round_nimm_scalar_predicate>" "%v")
4998 (neg:VF_128
4999 (match_operand:VF_128 3 "register_operand" "0")))
5000 (match_dup 3)
5001 (match_operand:QI 4 "register_operand" "Yk"))
5002 (match_dup 3)
5003 (const_int 1)))]
5004 "TARGET_AVX512F"
5005 "vfnmsub231<ssescalarmodesuffix>\t{<round_op5>%2, %1, %0%{%4%}|%0%{%4%}, %<iptr>3, %<iptr>2<round_op5>}"
5006 [(set_attr "type" "ssemuladd")
5007 (set_attr "mode" "<MODE>")])
5008
5009 (define_insn "*avx512f_vmfnmsub_<mode>_maskz_1<round_name>"
5010 [(set (match_operand:VF_128 0 "register_operand" "=v,v")
5011 (vec_merge:VF_128
5012 (vec_merge:VF_128
5013 (fma:VF_128
5014 (neg:VF_128
5015 (match_operand:VF_128 2 "<round_nimm_scalar_predicate>" "<round_constraint>,v"))
5016 (match_operand:VF_128 1 "register_operand" "0,0")
5017 (neg:VF_128
5018 (match_operand:VF_128 3 "<round_nimm_scalar_predicate>" "v,<round_constraint>")))
5019 (match_operand:VF_128 4 "const0_operand" "C,C")
5020 (match_operand:QI 5 "register_operand" "Yk,Yk"))
5021 (match_dup 1)
5022 (const_int 1)))]
5023 "TARGET_AVX512F"
5024 "@
5025 vfnmsub132<ssescalarmodesuffix>\t{<round_op6>%2, %3, %0%{%5%}%{z%}|%0%{%5%}%{z%}, %<iptr>3, %<iptr>2<round_op6>}
5026 vfnmsub213<ssescalarmodesuffix>\t{<round_op6>%3, %2, %0%{%5%}%{z%}|%0%{%5%}%{z%}, %<iptr>2, %<iptr>3<round_op6>}"
5027 [(set_attr "type" "ssemuladd")
5028 (set_attr "mode" "<MODE>")])
5029
5030 ;; FMA4 floating point scalar intrinsics. These write the
5031 ;; entire destination register, with the high-order elements zeroed.
5032
5033 (define_expand "fma4i_vmfmadd_<mode>"
5034 [(set (match_operand:VF_128 0 "register_operand")
5035 (vec_merge:VF_128
5036 (fma:VF_128
5037 (match_operand:VF_128 1 "nonimmediate_operand")
5038 (match_operand:VF_128 2 "nonimmediate_operand")
5039 (match_operand:VF_128 3 "nonimmediate_operand"))
5040 (match_dup 4)
5041 (const_int 1)))]
5042 "TARGET_FMA4"
5043 "operands[4] = CONST0_RTX (<MODE>mode);")
5044
5045 (define_insn "*fma4i_vmfmadd_<mode>"
5046 [(set (match_operand:VF_128 0 "register_operand" "=x,x")
5047 (vec_merge:VF_128
5048 (fma:VF_128
5049 (match_operand:VF_128 1 "nonimmediate_operand" "%x,x")
5050 (match_operand:VF_128 2 "nonimmediate_operand" " x,m")
5051 (match_operand:VF_128 3 "nonimmediate_operand" "xm,x"))
5052 (match_operand:VF_128 4 "const0_operand")
5053 (const_int 1)))]
5054 "TARGET_FMA4"
5055 "vfmadd<ssescalarmodesuffix>\t{%3, %2, %1, %0|%0, %1, %<iptr>2, %<iptr>3}"
5056 [(set_attr "type" "ssemuladd")
5057 (set_attr "mode" "<MODE>")])
5058
5059 (define_insn "*fma4i_vmfmsub_<mode>"
5060 [(set (match_operand:VF_128 0 "register_operand" "=x,x")
5061 (vec_merge:VF_128
5062 (fma:VF_128
5063 (match_operand:VF_128 1 "nonimmediate_operand" "%x,x")
5064 (match_operand:VF_128 2 "nonimmediate_operand" " x,m")
5065 (neg:VF_128
5066 (match_operand:VF_128 3 "nonimmediate_operand" "xm,x")))
5067 (match_operand:VF_128 4 "const0_operand")
5068 (const_int 1)))]
5069 "TARGET_FMA4"
5070 "vfmsub<ssescalarmodesuffix>\t{%3, %2, %1, %0|%0, %1, %<iptr>2, %<iptr>3}"
5071 [(set_attr "type" "ssemuladd")
5072 (set_attr "mode" "<MODE>")])
5073
5074 (define_insn "*fma4i_vmfnmadd_<mode>"
5075 [(set (match_operand:VF_128 0 "register_operand" "=x,x")
5076 (vec_merge:VF_128
5077 (fma:VF_128
5078 (neg:VF_128
5079 (match_operand:VF_128 1 "nonimmediate_operand" "%x,x"))
5080 (match_operand:VF_128 2 "nonimmediate_operand" " x,m")
5081 (match_operand:VF_128 3 "nonimmediate_operand" "xm,x"))
5082 (match_operand:VF_128 4 "const0_operand")
5083 (const_int 1)))]
5084 "TARGET_FMA4"
5085 "vfnmadd<ssescalarmodesuffix>\t{%3, %2, %1, %0|%0, %1, %<iptr>2, %<iptr>3}"
5086 [(set_attr "type" "ssemuladd")
5087 (set_attr "mode" "<MODE>")])
5088
5089 (define_insn "*fma4i_vmfnmsub_<mode>"
5090 [(set (match_operand:VF_128 0 "register_operand" "=x,x")
5091 (vec_merge:VF_128
5092 (fma:VF_128
5093 (neg:VF_128
5094 (match_operand:VF_128 1 "nonimmediate_operand" "%x,x"))
5095 (match_operand:VF_128 2 "nonimmediate_operand" " x,m")
5096 (neg:VF_128
5097 (match_operand:VF_128 3 "nonimmediate_operand" "xm,x")))
5098 (match_operand:VF_128 4 "const0_operand")
5099 (const_int 1)))]
5100 "TARGET_FMA4"
5101 "vfnmsub<ssescalarmodesuffix>\t{%3, %2, %1, %0|%0, %1, %<iptr>2, %<iptr>3}"
5102 [(set_attr "type" "ssemuladd")
5103 (set_attr "mode" "<MODE>")])
5104
5105 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
5106 ;;
5107 ;; Parallel single-precision floating point conversion operations
5108 ;;
5109 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
5110
5111 (define_insn_and_split "sse_cvtpi2ps"
5112 [(set (match_operand:V4SF 0 "register_operand" "=x,x,Yv")
5113 (vec_merge:V4SF
5114 (vec_duplicate:V4SF
5115 (float:V2SF (match_operand:V2SI 2 "register_mmxmem_operand" "ym,x,Yv")))
5116 (match_operand:V4SF 1 "register_operand" "0,0,Yv")
5117 (const_int 3)))
5118 (clobber (match_scratch:V4SF 3 "=X,x,Yv"))]
5119 "(TARGET_MMX || TARGET_MMX_WITH_SSE) && TARGET_SSE"
5120 "@
5121 cvtpi2ps\t{%2, %0|%0, %2}
5122 #
5123 #"
5124 "TARGET_SSE2 && reload_completed
5125 && SSE_REG_P (operands[2])"
5126 [(const_int 0)]
5127 {
5128 rtx op2 = lowpart_subreg (V4SImode, operands[2],
5129 GET_MODE (operands[2]));
5130 /* Generate SSE2 cvtdq2ps. */
5131 emit_insn (gen_floatv4siv4sf2 (operands[3], op2));
5132
5133 /* Merge operands[3] with operands[0]. */
5134 rtx mask, op1;
5135 if (TARGET_AVX)
5136 {
5137 mask = gen_rtx_PARALLEL (VOIDmode,
5138 gen_rtvec (4, GEN_INT (0), GEN_INT (1),
5139 GEN_INT (6), GEN_INT (7)));
5140 op1 = gen_rtx_VEC_CONCAT (V8SFmode, operands[3], operands[1]);
5141 op2 = gen_rtx_VEC_SELECT (V4SFmode, op1, mask);
5142 emit_insn (gen_rtx_SET (operands[0], op2));
5143 }
5144 else
5145 {
5146 /* NB: SSE can only concatenate OP0 and OP3 to OP0. */
5147 mask = gen_rtx_PARALLEL (VOIDmode,
5148 gen_rtvec (4, GEN_INT (2), GEN_INT (3),
5149 GEN_INT (4), GEN_INT (5)));
5150 op1 = gen_rtx_VEC_CONCAT (V8SFmode, operands[0], operands[3]);
5151 op2 = gen_rtx_VEC_SELECT (V4SFmode, op1, mask);
5152 emit_insn (gen_rtx_SET (operands[0], op2));
5153
5154 /* Swap bits 0:63 with bits 64:127. */
5155 mask = gen_rtx_PARALLEL (VOIDmode,
5156 gen_rtvec (4, GEN_INT (2), GEN_INT (3),
5157 GEN_INT (0), GEN_INT (1)));
5158 rtx dest = lowpart_subreg (V4SImode, operands[0],
5159 GET_MODE (operands[0]));
5160 op1 = gen_rtx_VEC_SELECT (V4SImode, dest, mask);
5161 emit_insn (gen_rtx_SET (dest, op1));
5162 }
5163 DONE;
5164 }
5165 [(set_attr "mmx_isa" "native,sse_noavx,avx")
5166 (set_attr "type" "ssecvt")
5167 (set_attr "mode" "V4SF")])
5168
5169 (define_insn "sse_cvtps2pi"
5170 [(set (match_operand:V2SI 0 "register_operand" "=y,Yv")
5171 (vec_select:V2SI
5172 (unspec:V4SI [(match_operand:V4SF 1 "register_mmxmem_operand" "xm,YvBm")]
5173 UNSPEC_FIX_NOTRUNC)
5174 (parallel [(const_int 0) (const_int 1)])))]
5175 "(TARGET_MMX || TARGET_MMX_WITH_SSE) && TARGET_SSE"
5176 "@
5177 cvtps2pi\t{%1, %0|%0, %q1}
5178 %vcvtps2dq\t{%1, %0|%0, %1}"
5179 [(set_attr "isa" "*,sse2")
5180 (set_attr "mmx_isa" "native,*")
5181 (set_attr "type" "ssecvt")
5182 (set_attr "unit" "mmx,*")
5183 (set_attr "mode" "DI")])
5184
5185 (define_insn "sse_cvttps2pi"
5186 [(set (match_operand:V2SI 0 "register_operand" "=y,Yv")
5187 (vec_select:V2SI
5188 (fix:V4SI (match_operand:V4SF 1 "register_mmxmem_operand" "xm,YvBm"))
5189 (parallel [(const_int 0) (const_int 1)])))]
5190 "(TARGET_MMX || TARGET_MMX_WITH_SSE) && TARGET_SSE"
5191 "@
5192 cvttps2pi\t{%1, %0|%0, %q1}
5193 %vcvttps2dq\t{%1, %0|%0, %1}"
5194 [(set_attr "isa" "*,sse2")
5195 (set_attr "mmx_isa" "native,*")
5196 (set_attr "type" "ssecvt")
5197 (set_attr "unit" "mmx,*")
5198 (set_attr "prefix_rep" "0")
5199 (set_attr "mode" "SF")])
5200
5201 (define_insn "sse_cvtsi2ss<rex64namesuffix><round_name>"
5202 [(set (match_operand:V4SF 0 "register_operand" "=x,x,v")
5203 (vec_merge:V4SF
5204 (vec_duplicate:V4SF
5205 (float:SF (match_operand:SWI48 2 "<round_nimm_scalar_predicate>" "r,m,<round_constraint3>")))
5206 (match_operand:V4SF 1 "register_operand" "0,0,v")
5207 (const_int 1)))]
5208 "TARGET_SSE"
5209 "@
5210 cvtsi2ss<rex64suffix>\t{%2, %0|%0, %2}
5211 cvtsi2ss<rex64suffix>\t{%2, %0|%0, %2}
5212 vcvtsi2ss<rex64suffix>\t{%2, <round_op3>%1, %0|%0, %1<round_op3>, %2}"
5213 [(set_attr "isa" "noavx,noavx,avx")
5214 (set_attr "type" "sseicvt")
5215 (set_attr "athlon_decode" "vector,double,*")
5216 (set_attr "amdfam10_decode" "vector,double,*")
5217 (set_attr "bdver1_decode" "double,direct,*")
5218 (set_attr "btver2_decode" "double,double,double")
5219 (set_attr "znver1_decode" "double,double,double")
5220 (set (attr "length_vex")
5221 (if_then_else
5222 (and (match_test "<MODE>mode == DImode")
5223 (eq_attr "alternative" "2"))
5224 (const_string "4")
5225 (const_string "*")))
5226 (set (attr "prefix_rex")
5227 (if_then_else
5228 (and (match_test "<MODE>mode == DImode")
5229 (eq_attr "alternative" "0,1"))
5230 (const_string "1")
5231 (const_string "*")))
5232 (set_attr "prefix" "orig,orig,maybe_evex")
5233 (set_attr "mode" "SF")])
5234
5235 (define_insn "sse_cvtss2si<rex64namesuffix><round_name>"
5236 [(set (match_operand:SWI48 0 "register_operand" "=r,r")
5237 (unspec:SWI48
5238 [(vec_select:SF
5239 (match_operand:V4SF 1 "<round_nimm_scalar_predicate>" "v,<round_constraint2>")
5240 (parallel [(const_int 0)]))]
5241 UNSPEC_FIX_NOTRUNC))]
5242 "TARGET_SSE"
5243 "%vcvtss2si<rex64suffix>\t{<round_op2>%1, %0|%0, %k1<round_op2>}"
5244 [(set_attr "type" "sseicvt")
5245 (set_attr "athlon_decode" "double,vector")
5246 (set_attr "bdver1_decode" "double,double")
5247 (set_attr "prefix_rep" "1")
5248 (set_attr "prefix" "maybe_vex")
5249 (set_attr "mode" "<MODE>")])
5250
5251 (define_insn "sse_cvtss2si<rex64namesuffix>_2"
5252 [(set (match_operand:SWI48 0 "register_operand" "=r,r")
5253 (unspec:SWI48 [(match_operand:SF 1 "nonimmediate_operand" "v,m")]
5254 UNSPEC_FIX_NOTRUNC))]
5255 "TARGET_SSE"
5256 "%vcvtss2si<rex64suffix>\t{%1, %0|%0, %1}"
5257 [(set_attr "type" "sseicvt")
5258 (set_attr "athlon_decode" "double,vector")
5259 (set_attr "amdfam10_decode" "double,double")
5260 (set_attr "bdver1_decode" "double,double")
5261 (set_attr "prefix_rep" "1")
5262 (set_attr "prefix" "maybe_vex")
5263 (set_attr "mode" "<MODE>")])
5264
5265 (define_insn "sse_cvttss2si<rex64namesuffix><round_saeonly_name>"
5266 [(set (match_operand:SWI48 0 "register_operand" "=r,r")
5267 (fix:SWI48
5268 (vec_select:SF
5269 (match_operand:V4SF 1 "<round_saeonly_nimm_scalar_predicate>" "v,<round_saeonly_constraint>")
5270 (parallel [(const_int 0)]))))]
5271 "TARGET_SSE"
5272 "%vcvttss2si<rex64suffix>\t{<round_saeonly_op2>%1, %0|%0, %k1<round_saeonly_op2>}"
5273 [(set_attr "type" "sseicvt")
5274 (set_attr "athlon_decode" "double,vector")
5275 (set_attr "amdfam10_decode" "double,double")
5276 (set_attr "bdver1_decode" "double,double")
5277 (set_attr "prefix_rep" "1")
5278 (set_attr "prefix" "maybe_vex")
5279 (set_attr "mode" "<MODE>")])
5280
5281 (define_insn "cvtusi2<ssescalarmodesuffix>32<round_name>"
5282 [(set (match_operand:VF_128 0 "register_operand" "=v")
5283 (vec_merge:VF_128
5284 (vec_duplicate:VF_128
5285 (unsigned_float:<ssescalarmode>
5286 (match_operand:SI 2 "<round_nimm_scalar_predicate>" "<round_constraint3>")))
5287 (match_operand:VF_128 1 "register_operand" "v")
5288 (const_int 1)))]
5289 "TARGET_AVX512F && <round_modev4sf_condition>"
5290 "vcvtusi2<ssescalarmodesuffix>{l}\t{%2, <round_op3>%1, %0|%0, %1<round_op3>, %2}"
5291 [(set_attr "type" "sseicvt")
5292 (set_attr "prefix" "evex")
5293 (set_attr "mode" "<ssescalarmode>")])
5294
5295 (define_insn "cvtusi2<ssescalarmodesuffix>64<round_name>"
5296 [(set (match_operand:VF_128 0 "register_operand" "=v")
5297 (vec_merge:VF_128
5298 (vec_duplicate:VF_128
5299 (unsigned_float:<ssescalarmode>
5300 (match_operand:DI 2 "<round_nimm_scalar_predicate>" "<round_constraint3>")))
5301 (match_operand:VF_128 1 "register_operand" "v")
5302 (const_int 1)))]
5303 "TARGET_AVX512F && TARGET_64BIT"
5304 "vcvtusi2<ssescalarmodesuffix>{q}\t{%2, <round_op3>%1, %0|%0, %1<round_op3>, %2}"
5305 [(set_attr "type" "sseicvt")
5306 (set_attr "prefix" "evex")
5307 (set_attr "mode" "<ssescalarmode>")])
5308
5309 (define_insn "float<sseintvecmodelower><mode>2<mask_name><round_name>"
5310 [(set (match_operand:VF1 0 "register_operand" "=x,v")
5311 (float:VF1
5312 (match_operand:<sseintvecmode> 1 "<round_nimm_predicate>" "xBm,<round_constraint>")))]
5313 "TARGET_SSE2 && <mask_mode512bit_condition> && <round_mode512bit_condition>"
5314 "@
5315 cvtdq2ps\t{%1, %0|%0, %1}
5316 vcvtdq2ps\t{<round_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_mask_op2>}"
5317 [(set_attr "isa" "noavx,avx")
5318 (set_attr "type" "ssecvt")
5319 (set_attr "prefix" "maybe_vex")
5320 (set_attr "mode" "<sseinsnmode>")])
5321
5322 (define_insn "ufloat<sseintvecmodelower><mode>2<mask_name><round_name>"
5323 [(set (match_operand:VF1_AVX512VL 0 "register_operand" "=v")
5324 (unsigned_float:VF1_AVX512VL
5325 (match_operand:<sseintvecmode> 1 "nonimmediate_operand" "<round_constraint>")))]
5326 "TARGET_AVX512F"
5327 "vcvtudq2ps\t{<round_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_mask_op2>}"
5328 [(set_attr "type" "ssecvt")
5329 (set_attr "prefix" "evex")
5330 (set_attr "mode" "<MODE>")])
5331
5332 (define_expand "floatuns<sseintvecmodelower><mode>2"
5333 [(match_operand:VF1 0 "register_operand")
5334 (match_operand:<sseintvecmode> 1 "register_operand")]
5335 "TARGET_SSE2 && (<MODE>mode == V4SFmode || TARGET_AVX2)"
5336 {
5337 if (<MODE>mode == V16SFmode)
5338 emit_insn (gen_ufloatv16siv16sf2 (operands[0], operands[1]));
5339 else
5340 if (TARGET_AVX512VL)
5341 {
5342 if (<MODE>mode == V4SFmode)
5343 emit_insn (gen_ufloatv4siv4sf2 (operands[0], operands[1]));
5344 else
5345 emit_insn (gen_ufloatv8siv8sf2 (operands[0], operands[1]));
5346 }
5347 else
5348 ix86_expand_vector_convert_uns_vsivsf (operands[0], operands[1]);
5349
5350 DONE;
5351 })
5352
5353
5354 ;; For <sse2_avx_avx512f>_fix_notrunc<sf2simodelower><mode> insn pattern
5355 (define_mode_attr sf2simodelower
5356 [(V16SI "v16sf") (V8SI "v8sf") (V4SI "v4sf")])
5357
5358 (define_insn "<sse2_avx_avx512f>_fix_notrunc<sf2simodelower><mode><mask_name>"
5359 [(set (match_operand:VI4_AVX 0 "register_operand" "=v")
5360 (unspec:VI4_AVX
5361 [(match_operand:<ssePSmode> 1 "vector_operand" "vBm")]
5362 UNSPEC_FIX_NOTRUNC))]
5363 "TARGET_SSE2 && <mask_mode512bit_condition>"
5364 "%vcvtps2dq\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
5365 [(set_attr "type" "ssecvt")
5366 (set (attr "prefix_data16")
5367 (if_then_else
5368 (match_test "TARGET_AVX")
5369 (const_string "*")
5370 (const_string "1")))
5371 (set_attr "prefix" "maybe_vex")
5372 (set_attr "mode" "<sseinsnmode>")])
5373
5374 (define_insn "avx512f_fix_notruncv16sfv16si<mask_name><round_name>"
5375 [(set (match_operand:V16SI 0 "register_operand" "=v")
5376 (unspec:V16SI
5377 [(match_operand:V16SF 1 "<round_nimm_predicate>" "<round_constraint>")]
5378 UNSPEC_FIX_NOTRUNC))]
5379 "TARGET_AVX512F"
5380 "vcvtps2dq\t{<round_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_mask_op2>}"
5381 [(set_attr "type" "ssecvt")
5382 (set_attr "prefix" "evex")
5383 (set_attr "mode" "XI")])
5384
5385 (define_insn "<mask_codefor><avx512>_ufix_notrunc<sf2simodelower><mode><mask_name><round_name>"
5386 [(set (match_operand:VI4_AVX512VL 0 "register_operand" "=v")
5387 (unspec:VI4_AVX512VL
5388 [(match_operand:<ssePSmode> 1 "nonimmediate_operand" "<round_constraint>")]
5389 UNSPEC_UNSIGNED_FIX_NOTRUNC))]
5390 "TARGET_AVX512F"
5391 "vcvtps2udq\t{<round_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_mask_op2>}"
5392 [(set_attr "type" "ssecvt")
5393 (set_attr "prefix" "evex")
5394 (set_attr "mode" "<sseinsnmode>")])
5395
5396 (define_insn "<mask_codefor>avx512dq_cvtps2qq<mode><mask_name><round_name>"
5397 [(set (match_operand:VI8_256_512 0 "register_operand" "=v")
5398 (unspec:VI8_256_512 [(match_operand:<ssePSmode2> 1 "nonimmediate_operand" "<round_constraint>")]
5399 UNSPEC_FIX_NOTRUNC))]
5400 "TARGET_AVX512DQ && <round_mode512bit_condition>"
5401 "vcvtps2qq\t{<round_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_mask_op2>}"
5402 [(set_attr "type" "ssecvt")
5403 (set_attr "prefix" "evex")
5404 (set_attr "mode" "<sseinsnmode>")])
5405
5406 (define_insn "<mask_codefor>avx512dq_cvtps2qqv2di<mask_name>"
5407 [(set (match_operand:V2DI 0 "register_operand" "=v")
5408 (unspec:V2DI
5409 [(vec_select:V2SF
5410 (match_operand:V4SF 1 "nonimmediate_operand" "vm")
5411 (parallel [(const_int 0) (const_int 1)]))]
5412 UNSPEC_FIX_NOTRUNC))]
5413 "TARGET_AVX512DQ && TARGET_AVX512VL"
5414 "vcvtps2qq\t{%1, %0<mask_operand2>|%0<mask_operand2>, %q1}"
5415 [(set_attr "type" "ssecvt")
5416 (set_attr "prefix" "evex")
5417 (set_attr "mode" "TI")])
5418
5419 (define_insn "<mask_codefor>avx512dq_cvtps2uqq<mode><mask_name><round_name>"
5420 [(set (match_operand:VI8_256_512 0 "register_operand" "=v")
5421 (unspec:VI8_256_512 [(match_operand:<ssePSmode2> 1 "nonimmediate_operand" "<round_constraint>")]
5422 UNSPEC_UNSIGNED_FIX_NOTRUNC))]
5423 "TARGET_AVX512DQ && <round_mode512bit_condition>"
5424 "vcvtps2uqq\t{<round_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_mask_op2>}"
5425 [(set_attr "type" "ssecvt")
5426 (set_attr "prefix" "evex")
5427 (set_attr "mode" "<sseinsnmode>")])
5428
5429 (define_insn "<mask_codefor>avx512dq_cvtps2uqqv2di<mask_name>"
5430 [(set (match_operand:V2DI 0 "register_operand" "=v")
5431 (unspec:V2DI
5432 [(vec_select:V2SF
5433 (match_operand:V4SF 1 "nonimmediate_operand" "vm")
5434 (parallel [(const_int 0) (const_int 1)]))]
5435 UNSPEC_UNSIGNED_FIX_NOTRUNC))]
5436 "TARGET_AVX512DQ && TARGET_AVX512VL"
5437 "vcvtps2uqq\t{%1, %0<mask_operand2>|%0<mask_operand2>, %q1}"
5438 [(set_attr "type" "ssecvt")
5439 (set_attr "prefix" "evex")
5440 (set_attr "mode" "TI")])
5441
5442 (define_insn "<fixsuffix>fix_truncv16sfv16si2<mask_name><round_saeonly_name>"
5443 [(set (match_operand:V16SI 0 "register_operand" "=v")
5444 (any_fix:V16SI
5445 (match_operand:V16SF 1 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")))]
5446 "TARGET_AVX512F"
5447 "vcvttps2<fixsuffix>dq\t{<round_saeonly_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_saeonly_mask_op2>}"
5448 [(set_attr "type" "ssecvt")
5449 (set_attr "prefix" "evex")
5450 (set_attr "mode" "XI")])
5451
5452 (define_insn "fix_truncv8sfv8si2<mask_name>"
5453 [(set (match_operand:V8SI 0 "register_operand" "=v")
5454 (fix:V8SI (match_operand:V8SF 1 "nonimmediate_operand" "vm")))]
5455 "TARGET_AVX && <mask_avx512vl_condition>"
5456 "vcvttps2dq\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
5457 [(set_attr "type" "ssecvt")
5458 (set_attr "prefix" "<mask_prefix>")
5459 (set_attr "mode" "OI")])
5460
5461 (define_insn "fix_truncv4sfv4si2<mask_name>"
5462 [(set (match_operand:V4SI 0 "register_operand" "=v")
5463 (fix:V4SI (match_operand:V4SF 1 "vector_operand" "vBm")))]
5464 "TARGET_SSE2 && <mask_avx512vl_condition>"
5465 "%vcvttps2dq\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
5466 [(set_attr "type" "ssecvt")
5467 (set (attr "prefix_rep")
5468 (if_then_else
5469 (match_test "TARGET_AVX")
5470 (const_string "*")
5471 (const_string "1")))
5472 (set (attr "prefix_data16")
5473 (if_then_else
5474 (match_test "TARGET_AVX")
5475 (const_string "*")
5476 (const_string "0")))
5477 (set_attr "prefix_data16" "0")
5478 (set_attr "prefix" "<mask_prefix2>")
5479 (set_attr "mode" "TI")])
5480
5481 (define_expand "fixuns_trunc<mode><sseintvecmodelower>2"
5482 [(match_operand:<sseintvecmode> 0 "register_operand")
5483 (match_operand:VF1 1 "register_operand")]
5484 "TARGET_SSE2"
5485 {
5486 if (<MODE>mode == V16SFmode)
5487 emit_insn (gen_ufix_truncv16sfv16si2 (operands[0],
5488 operands[1]));
5489 else
5490 {
5491 rtx tmp[3];
5492 tmp[0] = ix86_expand_adjust_ufix_to_sfix_si (operands[1], &tmp[2]);
5493 tmp[1] = gen_reg_rtx (<sseintvecmode>mode);
5494 emit_insn (gen_fix_trunc<mode><sseintvecmodelower>2 (tmp[1], tmp[0]));
5495 emit_insn (gen_xor<sseintvecmodelower>3 (operands[0], tmp[1], tmp[2]));
5496 }
5497 DONE;
5498 })
5499
5500 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
5501 ;;
5502 ;; Parallel double-precision floating point conversion operations
5503 ;;
5504 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
5505
5506 (define_insn "sse2_cvtpi2pd"
5507 [(set (match_operand:V2DF 0 "register_operand" "=v,?!x")
5508 (float:V2DF (match_operand:V2SI 1 "nonimmediate_operand" "vBm,yBm")))]
5509 "TARGET_SSE2"
5510 "@
5511 %vcvtdq2pd\t{%1, %0|%0, %1}
5512 cvtpi2pd\t{%1, %0|%0, %1}"
5513 [(set_attr "mmx_isa" "*,native")
5514 (set_attr "type" "ssecvt")
5515 (set_attr "unit" "*,mmx")
5516 (set_attr "prefix_data16" "*,1")
5517 (set_attr "prefix" "maybe_vex,*")
5518 (set_attr "mode" "V2DF")])
5519
5520 (define_expand "floatv2siv2df2"
5521 [(set (match_operand:V2DF 0 "register_operand")
5522 (float:V2DF (match_operand:V2SI 1 "nonimmediate_operand")))]
5523 "TARGET_MMX_WITH_SSE")
5524
5525 (define_insn "floatunsv2siv2df2"
5526 [(set (match_operand:V2DF 0 "register_operand" "=v")
5527 (unsigned_float:V2DF
5528 (match_operand:V2SI 1 "nonimmediate_operand" "vm")))]
5529 "TARGET_MMX_WITH_SSE && TARGET_AVX512VL"
5530 "vcvtudq2pd\t{%1, %0|%0, %1}"
5531 [(set_attr "type" "ssecvt")
5532 (set_attr "prefix" "evex")
5533 (set_attr "mode" "V2DF")])
5534
5535 (define_insn "sse2_cvtpd2pi"
5536 [(set (match_operand:V2SI 0 "register_operand" "=v,?!y")
5537 (unspec:V2SI [(match_operand:V2DF 1 "vector_operand" "vBm,xBm")]
5538 UNSPEC_FIX_NOTRUNC))]
5539 "TARGET_SSE2"
5540 "@
5541 * return TARGET_AVX ? \"vcvtpd2dq{x}\t{%1, %0|%0, %1}\" : \"cvtpd2dq\t{%1, %0|%0, %1}\";
5542 cvtpd2pi\t{%1, %0|%0, %1}"
5543 [(set_attr "mmx_isa" "*,native")
5544 (set_attr "type" "ssecvt")
5545 (set_attr "unit" "*,mmx")
5546 (set_attr "amdfam10_decode" "double")
5547 (set_attr "athlon_decode" "vector")
5548 (set_attr "bdver1_decode" "double")
5549 (set_attr "prefix_data16" "*,1")
5550 (set_attr "prefix" "maybe_vex,*")
5551 (set_attr "mode" "TI")])
5552
5553 (define_insn "sse2_cvttpd2pi"
5554 [(set (match_operand:V2SI 0 "register_operand" "=v,?!y")
5555 (fix:V2SI (match_operand:V2DF 1 "vector_operand" "vBm,xBm")))]
5556 "TARGET_SSE2"
5557 "@
5558 * return TARGET_AVX ? \"vcvttpd2dq{x}\t{%1, %0|%0, %1}\" : \"cvttpd2dq\t{%1, %0|%0, %1}\";
5559 cvttpd2pi\t{%1, %0|%0, %1}"
5560 [(set_attr "mmx_isa" "*,native")
5561 (set_attr "type" "ssecvt")
5562 (set_attr "unit" "*,mmx")
5563 (set_attr "amdfam10_decode" "double")
5564 (set_attr "athlon_decode" "vector")
5565 (set_attr "bdver1_decode" "double")
5566 (set_attr "prefix_data16" "*,1")
5567 (set_attr "prefix" "maybe_vex,*")
5568 (set_attr "mode" "TI")])
5569
5570 (define_expand "fix_truncv2dfv2si2"
5571 [(set (match_operand:V2SI 0 "register_operand")
5572 (fix:V2SI (match_operand:V2DF 1 "vector_operand")))]
5573 "TARGET_MMX_WITH_SSE")
5574
5575 (define_insn "fixuns_truncv2dfv2si2"
5576 [(set (match_operand:V2SI 0 "register_operand" "=v")
5577 (unsigned_fix:V2SI
5578 (match_operand:V2DF 1 "nonimmediate_operand" "vm")))]
5579 "TARGET_MMX_WITH_SSE && TARGET_AVX512VL"
5580 "vcvttpd2udq{x}\t{%1, %0|%0, %1}"
5581 [(set_attr "type" "ssecvt")
5582 (set_attr "prefix" "evex")
5583 (set_attr "mode" "TI")])
5584
5585 (define_insn "sse2_cvtsi2sd"
5586 [(set (match_operand:V2DF 0 "register_operand" "=x,x,v")
5587 (vec_merge:V2DF
5588 (vec_duplicate:V2DF
5589 (float:DF (match_operand:SI 2 "nonimmediate_operand" "r,m,rm")))
5590 (match_operand:V2DF 1 "register_operand" "0,0,v")
5591 (const_int 1)))]
5592 "TARGET_SSE2"
5593 "@
5594 cvtsi2sd{l}\t{%2, %0|%0, %2}
5595 cvtsi2sd{l}\t{%2, %0|%0, %2}
5596 vcvtsi2sd{l}\t{%2, %1, %0|%0, %1, %2}"
5597 [(set_attr "isa" "noavx,noavx,avx")
5598 (set_attr "type" "sseicvt")
5599 (set_attr "athlon_decode" "double,direct,*")
5600 (set_attr "amdfam10_decode" "vector,double,*")
5601 (set_attr "bdver1_decode" "double,direct,*")
5602 (set_attr "btver2_decode" "double,double,double")
5603 (set_attr "znver1_decode" "double,double,double")
5604 (set_attr "prefix" "orig,orig,maybe_evex")
5605 (set_attr "mode" "DF")])
5606
5607 (define_insn "sse2_cvtsi2sdq<round_name>"
5608 [(set (match_operand:V2DF 0 "register_operand" "=x,x,v")
5609 (vec_merge:V2DF
5610 (vec_duplicate:V2DF
5611 (float:DF (match_operand:DI 2 "<round_nimm_scalar_predicate>" "r,m,<round_constraint3>")))
5612 (match_operand:V2DF 1 "register_operand" "0,0,v")
5613 (const_int 1)))]
5614 "TARGET_SSE2 && TARGET_64BIT"
5615 "@
5616 cvtsi2sd{q}\t{%2, %0|%0, %2}
5617 cvtsi2sd{q}\t{%2, %0|%0, %2}
5618 vcvtsi2sd{q}\t{%2, <round_op3>%1, %0|%0, %1<round_op3>, %2}"
5619 [(set_attr "isa" "noavx,noavx,avx")
5620 (set_attr "type" "sseicvt")
5621 (set_attr "athlon_decode" "double,direct,*")
5622 (set_attr "amdfam10_decode" "vector,double,*")
5623 (set_attr "bdver1_decode" "double,direct,*")
5624 (set_attr "length_vex" "*,*,4")
5625 (set_attr "prefix_rex" "1,1,*")
5626 (set_attr "prefix" "orig,orig,maybe_evex")
5627 (set_attr "mode" "DF")])
5628
5629 (define_insn "avx512f_vcvtss2usi<rex64namesuffix><round_name>"
5630 [(set (match_operand:SWI48 0 "register_operand" "=r")
5631 (unspec:SWI48
5632 [(vec_select:SF
5633 (match_operand:V4SF 1 "<round_nimm_predicate>" "<round_constraint>")
5634 (parallel [(const_int 0)]))]
5635 UNSPEC_UNSIGNED_FIX_NOTRUNC))]
5636 "TARGET_AVX512F"
5637 "vcvtss2usi\t{<round_op2>%1, %0|%0, %k1<round_op2>}"
5638 [(set_attr "type" "sseicvt")
5639 (set_attr "prefix" "evex")
5640 (set_attr "mode" "<MODE>")])
5641
5642 (define_insn "avx512f_vcvttss2usi<rex64namesuffix><round_saeonly_name>"
5643 [(set (match_operand:SWI48 0 "register_operand" "=r")
5644 (unsigned_fix:SWI48
5645 (vec_select:SF
5646 (match_operand:V4SF 1 "<round_saeonly_nimm_scalar_predicate>" "<round_saeonly_constraint>")
5647 (parallel [(const_int 0)]))))]
5648 "TARGET_AVX512F"
5649 "vcvttss2usi\t{<round_saeonly_op2>%1, %0|%0, %k1<round_saeonly_op2>}"
5650 [(set_attr "type" "sseicvt")
5651 (set_attr "prefix" "evex")
5652 (set_attr "mode" "<MODE>")])
5653
5654 (define_insn "avx512f_vcvtsd2usi<rex64namesuffix><round_name>"
5655 [(set (match_operand:SWI48 0 "register_operand" "=r")
5656 (unspec:SWI48
5657 [(vec_select:DF
5658 (match_operand:V2DF 1 "<round_nimm_predicate>" "<round_constraint>")
5659 (parallel [(const_int 0)]))]
5660 UNSPEC_UNSIGNED_FIX_NOTRUNC))]
5661 "TARGET_AVX512F"
5662 "vcvtsd2usi\t{<round_op2>%1, %0|%0, %q1<round_op2>}"
5663 [(set_attr "type" "sseicvt")
5664 (set_attr "prefix" "evex")
5665 (set_attr "mode" "<MODE>")])
5666
5667 (define_insn "avx512f_vcvttsd2usi<rex64namesuffix><round_saeonly_name>"
5668 [(set (match_operand:SWI48 0 "register_operand" "=r")
5669 (unsigned_fix:SWI48
5670 (vec_select:DF
5671 (match_operand:V2DF 1 "<round_saeonly_nimm_scalar_predicate>" "<round_saeonly_constraint>")
5672 (parallel [(const_int 0)]))))]
5673 "TARGET_AVX512F"
5674 "vcvttsd2usi\t{<round_saeonly_op2>%1, %0|%0, %q1<round_saeonly_op2>}"
5675 [(set_attr "type" "sseicvt")
5676 (set_attr "prefix" "evex")
5677 (set_attr "mode" "<MODE>")])
5678
5679 (define_insn "sse2_cvtsd2si<rex64namesuffix><round_name>"
5680 [(set (match_operand:SWI48 0 "register_operand" "=r,r")
5681 (unspec:SWI48
5682 [(vec_select:DF
5683 (match_operand:V2DF 1 "<round_nimm_scalar_predicate>" "v,<round_constraint2>")
5684 (parallel [(const_int 0)]))]
5685 UNSPEC_FIX_NOTRUNC))]
5686 "TARGET_SSE2"
5687 "%vcvtsd2si<rex64suffix>\t{<round_op2>%1, %0|%0, %q1<round_op2>}"
5688 [(set_attr "type" "sseicvt")
5689 (set_attr "athlon_decode" "double,vector")
5690 (set_attr "bdver1_decode" "double,double")
5691 (set_attr "btver2_decode" "double,double")
5692 (set_attr "prefix_rep" "1")
5693 (set_attr "prefix" "maybe_vex")
5694 (set_attr "mode" "<MODE>")])
5695
5696 (define_insn "sse2_cvtsd2si<rex64namesuffix>_2"
5697 [(set (match_operand:SWI48 0 "register_operand" "=r,r")
5698 (unspec:SWI48 [(match_operand:DF 1 "nonimmediate_operand" "v,m")]
5699 UNSPEC_FIX_NOTRUNC))]
5700 "TARGET_SSE2"
5701 "%vcvtsd2si<rex64suffix>\t{%1, %0|%0, %q1}"
5702 [(set_attr "type" "sseicvt")
5703 (set_attr "athlon_decode" "double,vector")
5704 (set_attr "amdfam10_decode" "double,double")
5705 (set_attr "bdver1_decode" "double,double")
5706 (set_attr "prefix_rep" "1")
5707 (set_attr "prefix" "maybe_vex")
5708 (set_attr "mode" "<MODE>")])
5709
5710 (define_insn "sse2_cvttsd2si<rex64namesuffix><round_saeonly_name>"
5711 [(set (match_operand:SWI48 0 "register_operand" "=r,r")
5712 (fix:SWI48
5713 (vec_select:DF
5714 (match_operand:V2DF 1 "<round_saeonly_nimm_scalar_predicate>" "v,<round_saeonly_constraint2>")
5715 (parallel [(const_int 0)]))))]
5716 "TARGET_SSE2"
5717 "%vcvttsd2si<rex64suffix>\t{<round_saeonly_op2>%1, %0|%0, %q1<round_saeonly_op2>}"
5718 [(set_attr "type" "sseicvt")
5719 (set_attr "athlon_decode" "double,vector")
5720 (set_attr "amdfam10_decode" "double,double")
5721 (set_attr "bdver1_decode" "double,double")
5722 (set_attr "btver2_decode" "double,double")
5723 (set_attr "prefix_rep" "1")
5724 (set_attr "prefix" "maybe_vex")
5725 (set_attr "mode" "<MODE>")])
5726
5727 ;; For float<si2dfmode><mode>2 insn pattern
5728 (define_mode_attr si2dfmode
5729 [(V8DF "V8SI") (V4DF "V4SI")])
5730 (define_mode_attr si2dfmodelower
5731 [(V8DF "v8si") (V4DF "v4si")])
5732
5733 (define_insn "float<si2dfmodelower><mode>2<mask_name>"
5734 [(set (match_operand:VF2_512_256 0 "register_operand" "=v")
5735 (float:VF2_512_256 (match_operand:<si2dfmode> 1 "nonimmediate_operand" "vm")))]
5736 "TARGET_AVX && <mask_mode512bit_condition>"
5737 "vcvtdq2pd\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
5738 [(set_attr "type" "ssecvt")
5739 (set_attr "prefix" "maybe_vex")
5740 (set_attr "mode" "<MODE>")])
5741
5742 (define_insn "float<floatunssuffix><sseintvecmodelower><mode>2<mask_name><round_name>"
5743 [(set (match_operand:VF2_AVX512VL 0 "register_operand" "=v")
5744 (any_float:VF2_AVX512VL
5745 (match_operand:<sseintvecmode> 1 "nonimmediate_operand" "<round_constraint>")))]
5746 "TARGET_AVX512DQ"
5747 "vcvt<floatsuffix>qq2pd\t{<round_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_mask_op2>}"
5748 [(set_attr "type" "ssecvt")
5749 (set_attr "prefix" "evex")
5750 (set_attr "mode" "<MODE>")])
5751
5752 ;; For float<floatunssuffix><sselondveclower><mode> insn patterns
5753 (define_mode_attr qq2pssuff
5754 [(V8SF "") (V4SF "{y}")])
5755
5756 (define_mode_attr sselongvecmode
5757 [(V8SF "V8DI") (V4SF "V4DI")])
5758
5759 (define_mode_attr sselongvecmodelower
5760 [(V8SF "v8di") (V4SF "v4di")])
5761
5762 (define_mode_attr sseintvecmode3
5763 [(V8SF "XI") (V4SF "OI")
5764 (V8DF "OI") (V4DF "TI")])
5765
5766 (define_insn "float<floatunssuffix><sselongvecmodelower><mode>2<mask_name><round_name>"
5767 [(set (match_operand:VF1_128_256VL 0 "register_operand" "=v")
5768 (any_float:VF1_128_256VL
5769 (match_operand:<sselongvecmode> 1 "nonimmediate_operand" "<round_constraint>")))]
5770 "TARGET_AVX512DQ && <round_modev8sf_condition>"
5771 "vcvt<floatsuffix>qq2ps<qq2pssuff>\t{<round_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_mask_op2>}"
5772 [(set_attr "type" "ssecvt")
5773 (set_attr "prefix" "evex")
5774 (set_attr "mode" "<MODE>")])
5775
5776 (define_expand "float<floatunssuffix>v2div2sf2"
5777 [(set (match_operand:V4SF 0 "register_operand" "=v")
5778 (vec_concat:V4SF
5779 (any_float:V2SF (match_operand:V2DI 1 "nonimmediate_operand" "vm"))
5780 (match_dup 2)))]
5781 "TARGET_AVX512DQ && TARGET_AVX512VL"
5782 "operands[2] = CONST0_RTX (V2SFmode);")
5783
5784 (define_insn "*float<floatunssuffix>v2div2sf2"
5785 [(set (match_operand:V4SF 0 "register_operand" "=v")
5786 (vec_concat:V4SF
5787 (any_float:V2SF (match_operand:V2DI 1 "nonimmediate_operand" "vm"))
5788 (match_operand:V2SF 2 "const0_operand" "C")))]
5789 "TARGET_AVX512DQ && TARGET_AVX512VL"
5790 "vcvt<floatsuffix>qq2ps{x}\t{%1, %0|%0, %1}"
5791 [(set_attr "type" "ssecvt")
5792 (set_attr "prefix" "evex")
5793 (set_attr "mode" "V4SF")])
5794
5795 (define_mode_attr vpckfloat_concat_mode
5796 [(V8DI "v16sf") (V4DI "v8sf") (V2DI "v8sf")])
5797 (define_mode_attr vpckfloat_temp_mode
5798 [(V8DI "V8SF") (V4DI "V4SF") (V2DI "V4SF")])
5799 (define_mode_attr vpckfloat_op_mode
5800 [(V8DI "v8sf") (V4DI "v4sf") (V2DI "v2sf")])
5801
5802 (define_expand "vec_pack<floatprefix>_float_<mode>"
5803 [(match_operand:<ssePSmode> 0 "register_operand")
5804 (any_float:<ssePSmode>
5805 (match_operand:VI8_AVX512VL 1 "register_operand"))
5806 (match_operand:VI8_AVX512VL 2 "register_operand")]
5807 "TARGET_AVX512DQ"
5808 {
5809 rtx r1 = gen_reg_rtx (<vpckfloat_temp_mode>mode);
5810 rtx r2 = gen_reg_rtx (<vpckfloat_temp_mode>mode);
5811 rtx (*gen) (rtx, rtx) = gen_float<floatunssuffix><mode><vpckfloat_op_mode>2;
5812 emit_insn (gen (r1, operands[1]));
5813 emit_insn (gen (r2, operands[2]));
5814 if (<MODE>mode == V2DImode)
5815 emit_insn (gen_sse_movlhps (operands[0], r1, r2));
5816 else
5817 emit_insn (gen_avx_vec_concat<vpckfloat_concat_mode> (operands[0],
5818 r1, r2));
5819 DONE;
5820 })
5821
5822 (define_expand "float<floatunssuffix>v2div2sf2_mask"
5823 [(set (match_operand:V4SF 0 "register_operand" "=v")
5824 (vec_concat:V4SF
5825 (vec_merge:V2SF
5826 (any_float:V2SF (match_operand:V2DI 1 "nonimmediate_operand" "vm"))
5827 (vec_select:V2SF
5828 (match_operand:V4SF 2 "nonimm_or_0_operand" "0C")
5829 (parallel [(const_int 0) (const_int 1)]))
5830 (match_operand:QI 3 "register_operand" "Yk"))
5831 (match_dup 4)))]
5832 "TARGET_AVX512DQ && TARGET_AVX512VL"
5833 "operands[4] = CONST0_RTX (V2SFmode);")
5834
5835 (define_insn "*float<floatunssuffix>v2div2sf2_mask"
5836 [(set (match_operand:V4SF 0 "register_operand" "=v")
5837 (vec_concat:V4SF
5838 (vec_merge:V2SF
5839 (any_float:V2SF (match_operand:V2DI 1 "nonimmediate_operand" "vm"))
5840 (vec_select:V2SF
5841 (match_operand:V4SF 2 "nonimm_or_0_operand" "0C")
5842 (parallel [(const_int 0) (const_int 1)]))
5843 (match_operand:QI 3 "register_operand" "Yk"))
5844 (match_operand:V2SF 4 "const0_operand" "C")))]
5845 "TARGET_AVX512DQ && TARGET_AVX512VL"
5846 "vcvt<floatsuffix>qq2ps{x}\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}"
5847 [(set_attr "type" "ssecvt")
5848 (set_attr "prefix" "evex")
5849 (set_attr "mode" "V4SF")])
5850
5851 (define_insn "*float<floatunssuffix>v2div2sf2_mask_1"
5852 [(set (match_operand:V4SF 0 "register_operand" "=v")
5853 (vec_concat:V4SF
5854 (vec_merge:V2SF
5855 (any_float:V2SF (match_operand:V2DI 1
5856 "nonimmediate_operand" "vm"))
5857 (match_operand:V2SF 3 "const0_operand" "C")
5858 (match_operand:QI 2 "register_operand" "Yk"))
5859 (match_operand:V2SF 4 "const0_operand" "C")))]
5860 "TARGET_AVX512DQ && TARGET_AVX512VL"
5861 "vcvt<floatsuffix>qq2ps{x}\t{%1, %0%{%2%}%{z%}|%0%{%2%}%{z%}, %1}"
5862 [(set_attr "type" "ssecvt")
5863 (set_attr "prefix" "evex")
5864 (set_attr "mode" "V4SF")])
5865
5866 (define_insn "ufloat<si2dfmodelower><mode>2<mask_name>"
5867 [(set (match_operand:VF2_512_256VL 0 "register_operand" "=v")
5868 (unsigned_float:VF2_512_256VL
5869 (match_operand:<si2dfmode> 1 "nonimmediate_operand" "vm")))]
5870 "TARGET_AVX512F"
5871 "vcvtudq2pd\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
5872 [(set_attr "type" "ssecvt")
5873 (set_attr "prefix" "evex")
5874 (set_attr "mode" "<MODE>")])
5875
5876 (define_insn "ufloatv2siv2df2<mask_name>"
5877 [(set (match_operand:V2DF 0 "register_operand" "=v")
5878 (unsigned_float:V2DF
5879 (vec_select:V2SI
5880 (match_operand:V4SI 1 "nonimmediate_operand" "vm")
5881 (parallel [(const_int 0) (const_int 1)]))))]
5882 "TARGET_AVX512VL"
5883 "vcvtudq2pd\t{%1, %0<mask_operand2>|%0<mask_operand2>, %q1}"
5884 [(set_attr "type" "ssecvt")
5885 (set_attr "prefix" "evex")
5886 (set_attr "mode" "V2DF")])
5887
5888 (define_insn "avx512f_cvtdq2pd512_2"
5889 [(set (match_operand:V8DF 0 "register_operand" "=v")
5890 (float:V8DF
5891 (vec_select:V8SI
5892 (match_operand:V16SI 1 "nonimmediate_operand" "vm")
5893 (parallel [(const_int 0) (const_int 1)
5894 (const_int 2) (const_int 3)
5895 (const_int 4) (const_int 5)
5896 (const_int 6) (const_int 7)]))))]
5897 "TARGET_AVX512F"
5898 "vcvtdq2pd\t{%t1, %0|%0, %t1}"
5899 [(set_attr "type" "ssecvt")
5900 (set_attr "prefix" "evex")
5901 (set_attr "mode" "V8DF")])
5902
5903 (define_insn "avx_cvtdq2pd256_2"
5904 [(set (match_operand:V4DF 0 "register_operand" "=v")
5905 (float:V4DF
5906 (vec_select:V4SI
5907 (match_operand:V8SI 1 "nonimmediate_operand" "vm")
5908 (parallel [(const_int 0) (const_int 1)
5909 (const_int 2) (const_int 3)]))))]
5910 "TARGET_AVX"
5911 "vcvtdq2pd\t{%x1, %0|%0, %x1}"
5912 [(set_attr "type" "ssecvt")
5913 (set_attr "prefix" "maybe_evex")
5914 (set_attr "mode" "V4DF")])
5915
5916 (define_insn "sse2_cvtdq2pd<mask_name>"
5917 [(set (match_operand:V2DF 0 "register_operand" "=v")
5918 (float:V2DF
5919 (vec_select:V2SI
5920 (match_operand:V4SI 1 "nonimmediate_operand" "vm")
5921 (parallel [(const_int 0) (const_int 1)]))))]
5922 "TARGET_SSE2 && <mask_avx512vl_condition>"
5923 "%vcvtdq2pd\t{%1, %0<mask_operand2>|%0<mask_operand2>, %q1}"
5924 [(set_attr "type" "ssecvt")
5925 (set_attr "prefix" "maybe_vex")
5926 (set_attr "mode" "V2DF")])
5927
5928 (define_insn "avx512f_cvtpd2dq512<mask_name><round_name>"
5929 [(set (match_operand:V8SI 0 "register_operand" "=v")
5930 (unspec:V8SI
5931 [(match_operand:V8DF 1 "<round_nimm_predicate>" "<round_constraint>")]
5932 UNSPEC_FIX_NOTRUNC))]
5933 "TARGET_AVX512F"
5934 "vcvtpd2dq\t{<round_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_mask_op2>}"
5935 [(set_attr "type" "ssecvt")
5936 (set_attr "prefix" "evex")
5937 (set_attr "mode" "OI")])
5938
5939 (define_insn "avx_cvtpd2dq256<mask_name>"
5940 [(set (match_operand:V4SI 0 "register_operand" "=v")
5941 (unspec:V4SI [(match_operand:V4DF 1 "nonimmediate_operand" "vm")]
5942 UNSPEC_FIX_NOTRUNC))]
5943 "TARGET_AVX && <mask_avx512vl_condition>"
5944 "vcvtpd2dq{y}\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
5945 [(set_attr "type" "ssecvt")
5946 (set_attr "prefix" "<mask_prefix>")
5947 (set_attr "mode" "OI")])
5948
5949 (define_expand "avx_cvtpd2dq256_2"
5950 [(set (match_operand:V8SI 0 "register_operand")
5951 (vec_concat:V8SI
5952 (unspec:V4SI [(match_operand:V4DF 1 "nonimmediate_operand")]
5953 UNSPEC_FIX_NOTRUNC)
5954 (match_dup 2)))]
5955 "TARGET_AVX"
5956 "operands[2] = CONST0_RTX (V4SImode);")
5957
5958 (define_insn "*avx_cvtpd2dq256_2"
5959 [(set (match_operand:V8SI 0 "register_operand" "=v")
5960 (vec_concat:V8SI
5961 (unspec:V4SI [(match_operand:V4DF 1 "nonimmediate_operand" "vm")]
5962 UNSPEC_FIX_NOTRUNC)
5963 (match_operand:V4SI 2 "const0_operand")))]
5964 "TARGET_AVX"
5965 "vcvtpd2dq{y}\t{%1, %x0|%x0, %1}"
5966 [(set_attr "type" "ssecvt")
5967 (set_attr "prefix" "vex")
5968 (set_attr "btver2_decode" "vector")
5969 (set_attr "mode" "OI")])
5970
5971 (define_insn "sse2_cvtpd2dq"
5972 [(set (match_operand:V4SI 0 "register_operand" "=v")
5973 (vec_concat:V4SI
5974 (unspec:V2SI [(match_operand:V2DF 1 "vector_operand" "vBm")]
5975 UNSPEC_FIX_NOTRUNC)
5976 (const_vector:V2SI [(const_int 0) (const_int 0)])))]
5977 "TARGET_SSE2"
5978 {
5979 if (TARGET_AVX)
5980 return "vcvtpd2dq{x}\t{%1, %0|%0, %1}";
5981 else
5982 return "cvtpd2dq\t{%1, %0|%0, %1}";
5983 }
5984 [(set_attr "type" "ssecvt")
5985 (set_attr "prefix_rep" "1")
5986 (set_attr "prefix_data16" "0")
5987 (set_attr "prefix" "maybe_vex")
5988 (set_attr "mode" "TI")
5989 (set_attr "amdfam10_decode" "double")
5990 (set_attr "athlon_decode" "vector")
5991 (set_attr "bdver1_decode" "double")])
5992
5993 (define_insn "sse2_cvtpd2dq_mask"
5994 [(set (match_operand:V4SI 0 "register_operand" "=v")
5995 (vec_concat:V4SI
5996 (vec_merge:V2SI
5997 (unspec:V2SI [(match_operand:V2DF 1 "nonimmediate_operand" "vm")]
5998 UNSPEC_FIX_NOTRUNC)
5999 (vec_select:V2SI
6000 (match_operand:V4SI 2 "nonimm_or_0_operand" "0C")
6001 (parallel [(const_int 0) (const_int 1)]))
6002 (match_operand:QI 3 "register_operand" "Yk"))
6003 (const_vector:V2SI [(const_int 0) (const_int 0)])))]
6004 "TARGET_AVX512VL"
6005 "vcvtpd2dq{x}\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}"
6006 [(set_attr "type" "ssecvt")
6007 (set_attr "prefix" "evex")
6008 (set_attr "mode" "TI")])
6009
6010 (define_insn "*sse2_cvtpd2dq_mask_1"
6011 [(set (match_operand:V4SI 0 "register_operand" "=v")
6012 (vec_concat:V4SI
6013 (vec_merge:V2SI
6014 (unspec:V2SI [(match_operand:V2DF 1 "nonimmediate_operand" "vm")]
6015 UNSPEC_FIX_NOTRUNC)
6016 (const_vector:V2SI [(const_int 0) (const_int 0)])
6017 (match_operand:QI 2 "register_operand" "Yk"))
6018 (const_vector:V2SI [(const_int 0) (const_int 0)])))]
6019 "TARGET_AVX512VL"
6020 "vcvtpd2dq{x}\t{%1, %0%{%2%}%{z%}|%0%{%2%}%{z%}, %1}"
6021 [(set_attr "type" "ssecvt")
6022 (set_attr "prefix" "evex")
6023 (set_attr "mode" "TI")])
6024
6025 ;; For ufix_notrunc* insn patterns
6026 (define_mode_attr pd2udqsuff
6027 [(V8DF "") (V4DF "{y}")])
6028
6029 (define_insn "ufix_notrunc<mode><si2dfmodelower>2<mask_name><round_name>"
6030 [(set (match_operand:<si2dfmode> 0 "register_operand" "=v")
6031 (unspec:<si2dfmode>
6032 [(match_operand:VF2_512_256VL 1 "nonimmediate_operand" "<round_constraint>")]
6033 UNSPEC_UNSIGNED_FIX_NOTRUNC))]
6034 "TARGET_AVX512F"
6035 "vcvtpd2udq<pd2udqsuff>\t{<round_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_mask_op2>}"
6036 [(set_attr "type" "ssecvt")
6037 (set_attr "prefix" "evex")
6038 (set_attr "mode" "<sseinsnmode>")])
6039
6040 (define_insn "ufix_notruncv2dfv2si2"
6041 [(set (match_operand:V4SI 0 "register_operand" "=v")
6042 (vec_concat:V4SI
6043 (unspec:V2SI
6044 [(match_operand:V2DF 1 "nonimmediate_operand" "vm")]
6045 UNSPEC_UNSIGNED_FIX_NOTRUNC)
6046 (const_vector:V2SI [(const_int 0) (const_int 0)])))]
6047 "TARGET_AVX512VL"
6048 "vcvtpd2udq{x}\t{%1, %0|%0, %1}"
6049 [(set_attr "type" "ssecvt")
6050 (set_attr "prefix" "evex")
6051 (set_attr "mode" "TI")])
6052
6053 (define_insn "ufix_notruncv2dfv2si2_mask"
6054 [(set (match_operand:V4SI 0 "register_operand" "=v")
6055 (vec_concat:V4SI
6056 (vec_merge:V2SI
6057 (unspec:V2SI
6058 [(match_operand:V2DF 1 "nonimmediate_operand" "vm")]
6059 UNSPEC_UNSIGNED_FIX_NOTRUNC)
6060 (vec_select:V2SI
6061 (match_operand:V4SI 2 "nonimm_or_0_operand" "0C")
6062 (parallel [(const_int 0) (const_int 1)]))
6063 (match_operand:QI 3 "register_operand" "Yk"))
6064 (const_vector:V2SI [(const_int 0) (const_int 0)])))]
6065 "TARGET_AVX512VL"
6066 "vcvtpd2udq{x}\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}"
6067 [(set_attr "type" "ssecvt")
6068 (set_attr "prefix" "evex")
6069 (set_attr "mode" "TI")])
6070
6071 (define_insn "*ufix_notruncv2dfv2si2_mask_1"
6072 [(set (match_operand:V4SI 0 "register_operand" "=v")
6073 (vec_concat:V4SI
6074 (vec_merge:V2SI
6075 (unspec:V2SI
6076 [(match_operand:V2DF 1 "nonimmediate_operand" "vm")]
6077 UNSPEC_UNSIGNED_FIX_NOTRUNC)
6078 (const_vector:V2SI [(const_int 0) (const_int 0)])
6079 (match_operand:QI 2 "register_operand" "Yk"))
6080 (const_vector:V2SI [(const_int 0) (const_int 0)])))]
6081 "TARGET_AVX512VL"
6082 "vcvtpd2udq{x}\t{%1, %0%{%2%}%{z%}|%0%{%2%}%{z%}, %1}"
6083 [(set_attr "type" "ssecvt")
6084 (set_attr "prefix" "evex")
6085 (set_attr "mode" "TI")])
6086
6087 (define_insn "fix<fixunssuffix>_truncv8dfv8si2<mask_name><round_saeonly_name>"
6088 [(set (match_operand:V8SI 0 "register_operand" "=v")
6089 (any_fix:V8SI
6090 (match_operand:V8DF 1 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")))]
6091 "TARGET_AVX512F"
6092 "vcvttpd2<fixsuffix>dq\t{<round_saeonly_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_saeonly_mask_op2>}"
6093 [(set_attr "type" "ssecvt")
6094 (set_attr "prefix" "evex")
6095 (set_attr "mode" "OI")])
6096
6097 (define_insn "ufix_truncv2dfv2si2"
6098 [(set (match_operand:V4SI 0 "register_operand" "=v")
6099 (vec_concat:V4SI
6100 (unsigned_fix:V2SI (match_operand:V2DF 1 "nonimmediate_operand" "vm"))
6101 (const_vector:V2SI [(const_int 0) (const_int 0)])))]
6102 "TARGET_AVX512VL"
6103 "vcvttpd2udq{x}\t{%1, %0|%0, %1}"
6104 [(set_attr "type" "ssecvt")
6105 (set_attr "prefix" "evex")
6106 (set_attr "mode" "TI")])
6107
6108 (define_insn "ufix_truncv2dfv2si2_mask"
6109 [(set (match_operand:V4SI 0 "register_operand" "=v")
6110 (vec_concat:V4SI
6111 (vec_merge:V2SI
6112 (unsigned_fix:V2SI (match_operand:V2DF 1 "nonimmediate_operand" "vm"))
6113 (vec_select:V2SI
6114 (match_operand:V4SI 2 "nonimm_or_0_operand" "0C")
6115 (parallel [(const_int 0) (const_int 1)]))
6116 (match_operand:QI 3 "register_operand" "Yk"))
6117 (const_vector:V2SI [(const_int 0) (const_int 0)])))]
6118 "TARGET_AVX512VL"
6119 "vcvttpd2udq{x}\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}"
6120 [(set_attr "type" "ssecvt")
6121 (set_attr "prefix" "evex")
6122 (set_attr "mode" "TI")])
6123
6124 (define_insn "*ufix_truncv2dfv2si2_mask_1"
6125 [(set (match_operand:V4SI 0 "register_operand" "=v")
6126 (vec_concat:V4SI
6127 (vec_merge:V2SI
6128 (unsigned_fix:V2SI (match_operand:V2DF 1 "nonimmediate_operand" "vm"))
6129 (const_vector:V2SI [(const_int 0) (const_int 0)])
6130 (match_operand:QI 2 "register_operand" "Yk"))
6131 (const_vector:V2SI [(const_int 0) (const_int 0)])))]
6132 "TARGET_AVX512VL"
6133 "vcvttpd2udq{x}\t{%1, %0%{%2%}%{z%}|%0%{%2%}%{z%}, %1}"
6134 [(set_attr "type" "ssecvt")
6135 (set_attr "prefix" "evex")
6136 (set_attr "mode" "TI")])
6137
6138 (define_insn "fix_truncv4dfv4si2<mask_name>"
6139 [(set (match_operand:V4SI 0 "register_operand" "=v")
6140 (fix:V4SI (match_operand:V4DF 1 "nonimmediate_operand" "vm")))]
6141 "TARGET_AVX || (TARGET_AVX512VL && TARGET_AVX512F)"
6142 "vcvttpd2dq{y}\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
6143 [(set_attr "type" "ssecvt")
6144 (set_attr "prefix" "maybe_evex")
6145 (set_attr "mode" "OI")])
6146
6147 (define_insn "ufix_truncv4dfv4si2<mask_name>"
6148 [(set (match_operand:V4SI 0 "register_operand" "=v")
6149 (unsigned_fix:V4SI (match_operand:V4DF 1 "nonimmediate_operand" "vm")))]
6150 "TARGET_AVX512VL && TARGET_AVX512F"
6151 "vcvttpd2udq{y}\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
6152 [(set_attr "type" "ssecvt")
6153 (set_attr "prefix" "maybe_evex")
6154 (set_attr "mode" "OI")])
6155
6156 (define_insn "fix<fixunssuffix>_trunc<mode><sseintvecmodelower>2<mask_name><round_saeonly_name>"
6157 [(set (match_operand:<sseintvecmode> 0 "register_operand" "=v")
6158 (any_fix:<sseintvecmode>
6159 (match_operand:VF2_AVX512VL 1 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")))]
6160 "TARGET_AVX512DQ && <round_saeonly_mode512bit_condition>"
6161 "vcvttpd2<fixsuffix>qq\t{<round_saeonly_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_saeonly_mask_op2>}"
6162 [(set_attr "type" "ssecvt")
6163 (set_attr "prefix" "evex")
6164 (set_attr "mode" "<sseintvecmode2>")])
6165
6166 (define_insn "fix_notrunc<mode><sseintvecmodelower>2<mask_name><round_name>"
6167 [(set (match_operand:<sseintvecmode> 0 "register_operand" "=v")
6168 (unspec:<sseintvecmode>
6169 [(match_operand:VF2_AVX512VL 1 "<round_nimm_predicate>" "<round_constraint>")]
6170 UNSPEC_FIX_NOTRUNC))]
6171 "TARGET_AVX512DQ && <round_mode512bit_condition>"
6172 "vcvtpd2qq\t{<round_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_mask_op2>}"
6173 [(set_attr "type" "ssecvt")
6174 (set_attr "prefix" "evex")
6175 (set_attr "mode" "<sseintvecmode2>")])
6176
6177 (define_insn "ufix_notrunc<mode><sseintvecmodelower>2<mask_name><round_name>"
6178 [(set (match_operand:<sseintvecmode> 0 "register_operand" "=v")
6179 (unspec:<sseintvecmode>
6180 [(match_operand:VF2_AVX512VL 1 "nonimmediate_operand" "<round_constraint>")]
6181 UNSPEC_UNSIGNED_FIX_NOTRUNC))]
6182 "TARGET_AVX512DQ && <round_mode512bit_condition>"
6183 "vcvtpd2uqq\t{<round_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_mask_op2>}"
6184 [(set_attr "type" "ssecvt")
6185 (set_attr "prefix" "evex")
6186 (set_attr "mode" "<sseintvecmode2>")])
6187
6188 (define_insn "fix<fixunssuffix>_trunc<mode><sselongvecmodelower>2<mask_name><round_saeonly_name>"
6189 [(set (match_operand:<sselongvecmode> 0 "register_operand" "=v")
6190 (any_fix:<sselongvecmode>
6191 (match_operand:VF1_128_256VL 1 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")))]
6192 "TARGET_AVX512DQ && <round_saeonly_modev8sf_condition>"
6193 "vcvttps2<fixsuffix>qq\t{<round_saeonly_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_saeonly_mask_op2>}"
6194 [(set_attr "type" "ssecvt")
6195 (set_attr "prefix" "evex")
6196 (set_attr "mode" "<sseintvecmode3>")])
6197
6198 (define_insn "fix<fixunssuffix>_truncv2sfv2di2<mask_name>"
6199 [(set (match_operand:V2DI 0 "register_operand" "=v")
6200 (any_fix:V2DI
6201 (vec_select:V2SF
6202 (match_operand:V4SF 1 "nonimmediate_operand" "vm")
6203 (parallel [(const_int 0) (const_int 1)]))))]
6204 "TARGET_AVX512DQ && TARGET_AVX512VL"
6205 "vcvttps2<fixsuffix>qq\t{%1, %0<mask_operand2>|%0<mask_operand2>, %q1}"
6206 [(set_attr "type" "ssecvt")
6207 (set_attr "prefix" "evex")
6208 (set_attr "mode" "TI")])
6209
6210 (define_mode_attr vunpckfixt_mode
6211 [(V16SF "V8DI") (V8SF "V4DI") (V4SF "V2DI")])
6212 (define_mode_attr vunpckfixt_model
6213 [(V16SF "v8di") (V8SF "v4di") (V4SF "v2di")])
6214 (define_mode_attr vunpckfixt_extract_mode
6215 [(V16SF "v16sf") (V8SF "v8sf") (V4SF "v8sf")])
6216
6217 (define_expand "vec_unpack_<fixprefix>fix_trunc_lo_<mode>"
6218 [(match_operand:<vunpckfixt_mode> 0 "register_operand")
6219 (any_fix:<vunpckfixt_mode>
6220 (match_operand:VF1_AVX512VL 1 "register_operand"))]
6221 "TARGET_AVX512DQ"
6222 {
6223 rtx tem = operands[1];
6224 if (<MODE>mode != V4SFmode)
6225 {
6226 tem = gen_reg_rtx (<ssehalfvecmode>mode);
6227 emit_insn (gen_vec_extract_lo_<vunpckfixt_extract_mode> (tem,
6228 operands[1]));
6229 }
6230 rtx (*gen) (rtx, rtx)
6231 = gen_fix<fixunssuffix>_trunc<ssehalfvecmodelower><vunpckfixt_model>2;
6232 emit_insn (gen (operands[0], tem));
6233 DONE;
6234 })
6235
6236 (define_expand "vec_unpack_<fixprefix>fix_trunc_hi_<mode>"
6237 [(match_operand:<vunpckfixt_mode> 0 "register_operand")
6238 (any_fix:<vunpckfixt_mode>
6239 (match_operand:VF1_AVX512VL 1 "register_operand"))]
6240 "TARGET_AVX512DQ"
6241 {
6242 rtx tem;
6243 if (<MODE>mode != V4SFmode)
6244 {
6245 tem = gen_reg_rtx (<ssehalfvecmode>mode);
6246 emit_insn (gen_vec_extract_hi_<vunpckfixt_extract_mode> (tem,
6247 operands[1]));
6248 }
6249 else
6250 {
6251 tem = gen_reg_rtx (V4SFmode);
6252 emit_insn (gen_avx_vpermilv4sf (tem, operands[1], GEN_INT (0x4e)));
6253 }
6254 rtx (*gen) (rtx, rtx)
6255 = gen_fix<fixunssuffix>_trunc<ssehalfvecmodelower><vunpckfixt_model>2;
6256 emit_insn (gen (operands[0], tem));
6257 DONE;
6258 })
6259
6260 (define_insn "ufix_trunc<mode><sseintvecmodelower>2<mask_name>"
6261 [(set (match_operand:<sseintvecmode> 0 "register_operand" "=v")
6262 (unsigned_fix:<sseintvecmode>
6263 (match_operand:VF1_128_256VL 1 "nonimmediate_operand" "vm")))]
6264 "TARGET_AVX512VL"
6265 "vcvttps2udq\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
6266 [(set_attr "type" "ssecvt")
6267 (set_attr "prefix" "evex")
6268 (set_attr "mode" "<sseintvecmode2>")])
6269
6270 (define_expand "avx_cvttpd2dq256_2"
6271 [(set (match_operand:V8SI 0 "register_operand")
6272 (vec_concat:V8SI
6273 (fix:V4SI (match_operand:V4DF 1 "nonimmediate_operand"))
6274 (match_dup 2)))]
6275 "TARGET_AVX"
6276 "operands[2] = CONST0_RTX (V4SImode);")
6277
6278 (define_insn "sse2_cvttpd2dq"
6279 [(set (match_operand:V4SI 0 "register_operand" "=v")
6280 (vec_concat:V4SI
6281 (fix:V2SI (match_operand:V2DF 1 "vector_operand" "vBm"))
6282 (const_vector:V2SI [(const_int 0) (const_int 0)])))]
6283 "TARGET_SSE2"
6284 {
6285 if (TARGET_AVX)
6286 return "vcvttpd2dq{x}\t{%1, %0|%0, %1}";
6287 else
6288 return "cvttpd2dq\t{%1, %0|%0, %1}";
6289 }
6290 [(set_attr "type" "ssecvt")
6291 (set_attr "amdfam10_decode" "double")
6292 (set_attr "athlon_decode" "vector")
6293 (set_attr "bdver1_decode" "double")
6294 (set_attr "prefix" "maybe_vex")
6295 (set_attr "mode" "TI")])
6296
6297 (define_insn "sse2_cvttpd2dq_mask"
6298 [(set (match_operand:V4SI 0 "register_operand" "=v")
6299 (vec_concat:V4SI
6300 (vec_merge:V2SI
6301 (fix:V2SI (match_operand:V2DF 1 "nonimmediate_operand" "vm"))
6302 (vec_select:V2SI
6303 (match_operand:V4SI 2 "nonimm_or_0_operand" "0C")
6304 (parallel [(const_int 0) (const_int 1)]))
6305 (match_operand:QI 3 "register_operand" "Yk"))
6306 (const_vector:V2SI [(const_int 0) (const_int 0)])))]
6307 "TARGET_AVX512VL"
6308 "vcvttpd2dq{x}\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}"
6309 [(set_attr "type" "ssecvt")
6310 (set_attr "prefix" "evex")
6311 (set_attr "mode" "TI")])
6312
6313 (define_insn "*sse2_cvttpd2dq_mask_1"
6314 [(set (match_operand:V4SI 0 "register_operand" "=v")
6315 (vec_concat:V4SI
6316 (vec_merge:V2SI
6317 (fix:V2SI (match_operand:V2DF 1 "nonimmediate_operand" "vm"))
6318 (const_vector:V2SI [(const_int 0) (const_int 0)])
6319 (match_operand:QI 2 "register_operand" "Yk"))
6320 (const_vector:V2SI [(const_int 0) (const_int 0)])))]
6321 "TARGET_AVX512VL"
6322 "vcvttpd2dq{x}\t{%1, %0%{%2%}%{z%}|%0%{%2%}%{z%}, %1}"
6323 [(set_attr "type" "ssecvt")
6324 (set_attr "prefix" "evex")
6325 (set_attr "mode" "TI")])
6326
6327 (define_insn "sse2_cvtsd2ss<round_name>"
6328 [(set (match_operand:V4SF 0 "register_operand" "=x,x,v")
6329 (vec_merge:V4SF
6330 (vec_duplicate:V4SF
6331 (float_truncate:V2SF
6332 (match_operand:V2DF 2 "nonimmediate_operand" "x,m,<round_constraint>")))
6333 (match_operand:V4SF 1 "register_operand" "0,0,v")
6334 (const_int 1)))]
6335 "TARGET_SSE2"
6336 "@
6337 cvtsd2ss\t{%2, %0|%0, %2}
6338 cvtsd2ss\t{%2, %0|%0, %q2}
6339 vcvtsd2ss\t{<round_op3>%2, %1, %0|%0, %1, %q2<round_op3>}"
6340 [(set_attr "isa" "noavx,noavx,avx")
6341 (set_attr "type" "ssecvt")
6342 (set_attr "athlon_decode" "vector,double,*")
6343 (set_attr "amdfam10_decode" "vector,double,*")
6344 (set_attr "bdver1_decode" "direct,direct,*")
6345 (set_attr "btver2_decode" "double,double,double")
6346 (set_attr "prefix" "orig,orig,<round_prefix>")
6347 (set_attr "mode" "SF")])
6348
6349 (define_insn "*sse2_vd_cvtsd2ss"
6350 [(set (match_operand:V4SF 0 "register_operand" "=x,x,v")
6351 (vec_merge:V4SF
6352 (vec_duplicate:V4SF
6353 (float_truncate:SF (match_operand:DF 2 "nonimmediate_operand" "x,m,vm")))
6354 (match_operand:V4SF 1 "register_operand" "0,0,v")
6355 (const_int 1)))]
6356 "TARGET_SSE2"
6357 "@
6358 cvtsd2ss\t{%2, %0|%0, %2}
6359 cvtsd2ss\t{%2, %0|%0, %2}
6360 vcvtsd2ss\t{%2, %1, %0|%0, %1, %2}"
6361 [(set_attr "isa" "noavx,noavx,avx")
6362 (set_attr "type" "ssecvt")
6363 (set_attr "athlon_decode" "vector,double,*")
6364 (set_attr "amdfam10_decode" "vector,double,*")
6365 (set_attr "bdver1_decode" "direct,direct,*")
6366 (set_attr "btver2_decode" "double,double,double")
6367 (set_attr "prefix" "orig,orig,vex")
6368 (set_attr "mode" "SF")])
6369
6370 (define_insn "sse2_cvtss2sd<round_saeonly_name>"
6371 [(set (match_operand:V2DF 0 "register_operand" "=x,x,v")
6372 (vec_merge:V2DF
6373 (float_extend:V2DF
6374 (vec_select:V2SF
6375 (match_operand:V4SF 2 "<round_saeonly_nimm_scalar_predicate>" "x,m,<round_saeonly_constraint>")
6376 (parallel [(const_int 0) (const_int 1)])))
6377 (match_operand:V2DF 1 "register_operand" "0,0,v")
6378 (const_int 1)))]
6379 "TARGET_SSE2"
6380 "@
6381 cvtss2sd\t{%2, %0|%0, %2}
6382 cvtss2sd\t{%2, %0|%0, %k2}
6383 vcvtss2sd\t{<round_saeonly_op3>%2, %1, %0|%0, %1, %k2<round_saeonly_op3>}"
6384 [(set_attr "isa" "noavx,noavx,avx")
6385 (set_attr "type" "ssecvt")
6386 (set_attr "amdfam10_decode" "vector,double,*")
6387 (set_attr "athlon_decode" "direct,direct,*")
6388 (set_attr "bdver1_decode" "direct,direct,*")
6389 (set_attr "btver2_decode" "double,double,double")
6390 (set_attr "prefix" "orig,orig,<round_saeonly_prefix>")
6391 (set_attr "mode" "DF")])
6392
6393 (define_insn "*sse2_vd_cvtss2sd"
6394 [(set (match_operand:V2DF 0 "register_operand" "=x,x,v")
6395 (vec_merge:V2DF
6396 (vec_duplicate:V2DF
6397 (float_extend:DF (match_operand:SF 2 "nonimmediate_operand" "x,m,vm")))
6398 (match_operand:V2DF 1 "register_operand" "0,0,v")
6399 (const_int 1)))]
6400 "TARGET_SSE2"
6401 "@
6402 cvtss2sd\t{%2, %0|%0, %2}
6403 cvtss2sd\t{%2, %0|%0, %2}
6404 vcvtss2sd\t{%2, %1, %0|%0, %1, %2}"
6405 [(set_attr "isa" "noavx,noavx,avx")
6406 (set_attr "type" "ssecvt")
6407 (set_attr "amdfam10_decode" "vector,double,*")
6408 (set_attr "athlon_decode" "direct,direct,*")
6409 (set_attr "bdver1_decode" "direct,direct,*")
6410 (set_attr "btver2_decode" "double,double,double")
6411 (set_attr "prefix" "orig,orig,vex")
6412 (set_attr "mode" "DF")])
6413
6414 (define_insn "<mask_codefor>avx512f_cvtpd2ps512<mask_name><round_name>"
6415 [(set (match_operand:V8SF 0 "register_operand" "=v")
6416 (float_truncate:V8SF
6417 (match_operand:V8DF 1 "<round_nimm_predicate>" "<round_constraint>")))]
6418 "TARGET_AVX512F"
6419 "vcvtpd2ps\t{<round_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_mask_op2>}"
6420 [(set_attr "type" "ssecvt")
6421 (set_attr "prefix" "evex")
6422 (set_attr "mode" "V8SF")])
6423
6424 (define_insn "avx_cvtpd2ps256<mask_name>"
6425 [(set (match_operand:V4SF 0 "register_operand" "=v")
6426 (float_truncate:V4SF
6427 (match_operand:V4DF 1 "nonimmediate_operand" "vm")))]
6428 "TARGET_AVX && <mask_avx512vl_condition>"
6429 "vcvtpd2ps{y}\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
6430 [(set_attr "type" "ssecvt")
6431 (set_attr "prefix" "maybe_evex")
6432 (set_attr "btver2_decode" "vector")
6433 (set_attr "mode" "V4SF")])
6434
6435 (define_expand "sse2_cvtpd2ps"
6436 [(set (match_operand:V4SF 0 "register_operand")
6437 (vec_concat:V4SF
6438 (float_truncate:V2SF
6439 (match_operand:V2DF 1 "vector_operand"))
6440 (match_dup 2)))]
6441 "TARGET_SSE2"
6442 "operands[2] = CONST0_RTX (V2SFmode);")
6443
6444 (define_expand "sse2_cvtpd2ps_mask"
6445 [(set (match_operand:V4SF 0 "register_operand")
6446 (vec_concat:V4SF
6447 (vec_merge:V2SF
6448 (float_truncate:V2SF
6449 (match_operand:V2DF 1 "vector_operand"))
6450 (vec_select:V2SF
6451 (match_operand:V4SF 2 "nonimm_or_0_operand")
6452 (parallel [(const_int 0) (const_int 1)]))
6453 (match_operand:QI 3 "register_operand"))
6454 (match_dup 4)))]
6455 "TARGET_SSE2"
6456 "operands[4] = CONST0_RTX (V2SFmode);")
6457
6458 (define_insn "*sse2_cvtpd2ps"
6459 [(set (match_operand:V4SF 0 "register_operand" "=v")
6460 (vec_concat:V4SF
6461 (float_truncate:V2SF
6462 (match_operand:V2DF 1 "vector_operand" "vBm"))
6463 (match_operand:V2SF 2 "const0_operand" "C")))]
6464 "TARGET_SSE2"
6465 {
6466 if (TARGET_AVX)
6467 return "vcvtpd2ps{x}\t{%1, %0|%0, %1}";
6468 else
6469 return "cvtpd2ps\t{%1, %0|%0, %1}";
6470 }
6471 [(set_attr "type" "ssecvt")
6472 (set_attr "amdfam10_decode" "double")
6473 (set_attr "athlon_decode" "vector")
6474 (set_attr "bdver1_decode" "double")
6475 (set_attr "prefix_data16" "1")
6476 (set_attr "prefix" "maybe_vex")
6477 (set_attr "mode" "V4SF")])
6478
6479 (define_insn "truncv2dfv2sf2"
6480 [(set (match_operand:V2SF 0 "register_operand" "=v")
6481 (float_truncate:V2SF
6482 (match_operand:V2DF 1 "vector_operand" "vBm")))]
6483 "TARGET_MMX_WITH_SSE"
6484 {
6485 if (TARGET_AVX)
6486 return "vcvtpd2ps{x}\t{%1, %0|%0, %1}";
6487 else
6488 return "cvtpd2ps\t{%1, %0|%0, %1}";
6489 }
6490 [(set_attr "type" "ssecvt")
6491 (set_attr "amdfam10_decode" "double")
6492 (set_attr "athlon_decode" "vector")
6493 (set_attr "bdver1_decode" "double")
6494 (set_attr "prefix_data16" "1")
6495 (set_attr "prefix" "maybe_vex")
6496 (set_attr "mode" "V4SF")])
6497
6498 (define_insn "*sse2_cvtpd2ps_mask"
6499 [(set (match_operand:V4SF 0 "register_operand" "=v")
6500 (vec_concat:V4SF
6501 (vec_merge:V2SF
6502 (float_truncate:V2SF
6503 (match_operand:V2DF 1 "nonimmediate_operand" "vm"))
6504 (vec_select:V2SF
6505 (match_operand:V4SF 2 "nonimm_or_0_operand" "0C")
6506 (parallel [(const_int 0) (const_int 1)]))
6507 (match_operand:QI 3 "register_operand" "Yk"))
6508 (match_operand:V2SF 4 "const0_operand" "C")))]
6509 "TARGET_AVX512VL"
6510 "vcvtpd2ps{x}\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}"
6511 [(set_attr "type" "ssecvt")
6512 (set_attr "prefix" "evex")
6513 (set_attr "mode" "V4SF")])
6514
6515 (define_insn "*sse2_cvtpd2ps_mask_1"
6516 [(set (match_operand:V4SF 0 "register_operand" "=v")
6517 (vec_concat:V4SF
6518 (vec_merge:V2SF
6519 (float_truncate:V2SF
6520 (match_operand:V2DF 1 "nonimmediate_operand" "vm"))
6521 (match_operand:V2SF 3 "const0_operand" "C")
6522 (match_operand:QI 2 "register_operand" "Yk"))
6523 (match_operand:V2SF 4 "const0_operand" "C")))]
6524 "TARGET_AVX512VL"
6525 "vcvtpd2ps{x}\t{%1, %0%{%2%}%{z%}|%0%{%2%}%{z%}, %1}"
6526 [(set_attr "type" "ssecvt")
6527 (set_attr "prefix" "evex")
6528 (set_attr "mode" "V4SF")])
6529
6530 ;; For <sse2_avx_avx512f>_cvtps2pd<avxsizesuffix> insn pattern
6531 (define_mode_attr sf2dfmode
6532 [(V8DF "V8SF") (V4DF "V4SF")])
6533
6534 (define_insn "<sse2_avx_avx512f>_cvtps2pd<avxsizesuffix><mask_name><round_saeonly_name>"
6535 [(set (match_operand:VF2_512_256 0 "register_operand" "=v")
6536 (float_extend:VF2_512_256
6537 (match_operand:<sf2dfmode> 1 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")))]
6538 "TARGET_AVX && <mask_mode512bit_condition> && <round_saeonly_mode512bit_condition>"
6539 "vcvtps2pd\t{<round_saeonly_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_saeonly_mask_op2>}"
6540 [(set_attr "type" "ssecvt")
6541 (set_attr "prefix" "maybe_vex")
6542 (set_attr "mode" "<MODE>")])
6543
6544 (define_insn "*avx_cvtps2pd256_2"
6545 [(set (match_operand:V4DF 0 "register_operand" "=v")
6546 (float_extend:V4DF
6547 (vec_select:V4SF
6548 (match_operand:V8SF 1 "nonimmediate_operand" "vm")
6549 (parallel [(const_int 0) (const_int 1)
6550 (const_int 2) (const_int 3)]))))]
6551 "TARGET_AVX"
6552 "vcvtps2pd\t{%x1, %0|%0, %x1}"
6553 [(set_attr "type" "ssecvt")
6554 (set_attr "prefix" "vex")
6555 (set_attr "mode" "V4DF")])
6556
6557 (define_insn "vec_unpacks_lo_v16sf"
6558 [(set (match_operand:V8DF 0 "register_operand" "=v")
6559 (float_extend:V8DF
6560 (vec_select:V8SF
6561 (match_operand:V16SF 1 "nonimmediate_operand" "vm")
6562 (parallel [(const_int 0) (const_int 1)
6563 (const_int 2) (const_int 3)
6564 (const_int 4) (const_int 5)
6565 (const_int 6) (const_int 7)]))))]
6566 "TARGET_AVX512F"
6567 "vcvtps2pd\t{%t1, %0|%0, %t1}"
6568 [(set_attr "type" "ssecvt")
6569 (set_attr "prefix" "evex")
6570 (set_attr "mode" "V8DF")])
6571
6572 (define_insn "<avx512>_cvt<ssemodesuffix>2mask<mode>"
6573 [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=k")
6574 (unspec:<avx512fmaskmode>
6575 [(match_operand:VI12_AVX512VL 1 "register_operand" "v")]
6576 UNSPEC_CVTINT2MASK))]
6577 "TARGET_AVX512BW"
6578 "vpmov<ssemodesuffix>2m\t{%1, %0|%0, %1}"
6579 [(set_attr "prefix" "evex")
6580 (set_attr "mode" "<sseinsnmode>")])
6581
6582 (define_insn "<avx512>_cvt<ssemodesuffix>2mask<mode>"
6583 [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=k")
6584 (unspec:<avx512fmaskmode>
6585 [(match_operand:VI48_AVX512VL 1 "register_operand" "v")]
6586 UNSPEC_CVTINT2MASK))]
6587 "TARGET_AVX512DQ"
6588 "vpmov<ssemodesuffix>2m\t{%1, %0|%0, %1}"
6589 [(set_attr "prefix" "evex")
6590 (set_attr "mode" "<sseinsnmode>")])
6591
6592 (define_expand "<avx512>_cvtmask2<ssemodesuffix><mode>"
6593 [(set (match_operand:VI12_AVX512VL 0 "register_operand")
6594 (vec_merge:VI12_AVX512VL
6595 (match_dup 2)
6596 (match_dup 3)
6597 (match_operand:<avx512fmaskmode> 1 "register_operand")))]
6598 "TARGET_AVX512BW"
6599 {
6600 operands[2] = CONSTM1_RTX (<MODE>mode);
6601 operands[3] = CONST0_RTX (<MODE>mode);
6602 })
6603
6604 (define_insn "*<avx512>_cvtmask2<ssemodesuffix><mode>"
6605 [(set (match_operand:VI12_AVX512VL 0 "register_operand" "=v")
6606 (vec_merge:VI12_AVX512VL
6607 (match_operand:VI12_AVX512VL 2 "vector_all_ones_operand")
6608 (match_operand:VI12_AVX512VL 3 "const0_operand")
6609 (match_operand:<avx512fmaskmode> 1 "register_operand" "k")))]
6610 "TARGET_AVX512BW"
6611 "vpmovm2<ssemodesuffix>\t{%1, %0|%0, %1}"
6612 [(set_attr "prefix" "evex")
6613 (set_attr "mode" "<sseinsnmode>")])
6614
6615 (define_expand "<avx512>_cvtmask2<ssemodesuffix><mode>"
6616 [(set (match_operand:VI48_AVX512VL 0 "register_operand")
6617 (vec_merge:VI48_AVX512VL
6618 (match_dup 2)
6619 (match_dup 3)
6620 (match_operand:<avx512fmaskmode> 1 "register_operand")))]
6621 "TARGET_AVX512F"
6622 "{
6623 operands[2] = CONSTM1_RTX (<MODE>mode);
6624 operands[3] = CONST0_RTX (<MODE>mode);
6625 }")
6626
6627 (define_insn "*<avx512>_cvtmask2<ssemodesuffix><mode>"
6628 [(set (match_operand:VI48_AVX512VL 0 "register_operand" "=v,v")
6629 (vec_merge:VI48_AVX512VL
6630 (match_operand:VI48_AVX512VL 2 "vector_all_ones_operand")
6631 (match_operand:VI48_AVX512VL 3 "const0_operand")
6632 (match_operand:<avx512fmaskmode> 1 "register_operand" "k,Yk")))]
6633 "TARGET_AVX512F"
6634 "@
6635 vpmovm2<ssemodesuffix>\t{%1, %0|%0, %1}
6636 vpternlog<ssemodesuffix>\t{$0x81, %0, %0, %0%{%1%}%{z%}|%0%{%1%}%{z%}, %0, %0, 0x81}"
6637 [(set_attr "isa" "avx512dq,*")
6638 (set_attr "length_immediate" "0,1")
6639 (set_attr "prefix" "evex")
6640 (set_attr "mode" "<sseinsnmode>")])
6641
6642 (define_insn "sse2_cvtps2pd<mask_name>"
6643 [(set (match_operand:V2DF 0 "register_operand" "=v")
6644 (float_extend:V2DF
6645 (vec_select:V2SF
6646 (match_operand:V4SF 1 "vector_operand" "vm")
6647 (parallel [(const_int 0) (const_int 1)]))))]
6648 "TARGET_SSE2 && <mask_avx512vl_condition>"
6649 "%vcvtps2pd\t{%1, %0<mask_operand2>|%0<mask_operand2>, %q1}"
6650 [(set_attr "type" "ssecvt")
6651 (set_attr "amdfam10_decode" "direct")
6652 (set_attr "athlon_decode" "double")
6653 (set_attr "bdver1_decode" "double")
6654 (set_attr "prefix_data16" "0")
6655 (set_attr "prefix" "maybe_vex")
6656 (set_attr "mode" "V2DF")])
6657
6658 (define_insn "extendv2sfv2df2"
6659 [(set (match_operand:V2DF 0 "register_operand" "=v")
6660 (float_extend:V2DF
6661 (match_operand:V2SF 1 "register_operand" "v")))]
6662 "TARGET_MMX_WITH_SSE"
6663 "%vcvtps2pd\t{%1, %0|%0, %1}"
6664 [(set_attr "type" "ssecvt")
6665 (set_attr "amdfam10_decode" "direct")
6666 (set_attr "athlon_decode" "double")
6667 (set_attr "bdver1_decode" "double")
6668 (set_attr "prefix_data16" "0")
6669 (set_attr "prefix" "maybe_vex")
6670 (set_attr "mode" "V2DF")])
6671
6672 (define_expand "vec_unpacks_hi_v4sf"
6673 [(set (match_dup 2)
6674 (vec_select:V4SF
6675 (vec_concat:V8SF
6676 (match_dup 2)
6677 (match_operand:V4SF 1 "vector_operand"))
6678 (parallel [(const_int 6) (const_int 7)
6679 (const_int 2) (const_int 3)])))
6680 (set (match_operand:V2DF 0 "register_operand")
6681 (float_extend:V2DF
6682 (vec_select:V2SF
6683 (match_dup 2)
6684 (parallel [(const_int 0) (const_int 1)]))))]
6685 "TARGET_SSE2"
6686 "operands[2] = gen_reg_rtx (V4SFmode);")
6687
6688 (define_expand "vec_unpacks_hi_v8sf"
6689 [(set (match_dup 2)
6690 (vec_select:V4SF
6691 (match_operand:V8SF 1 "register_operand")
6692 (parallel [(const_int 4) (const_int 5)
6693 (const_int 6) (const_int 7)])))
6694 (set (match_operand:V4DF 0 "register_operand")
6695 (float_extend:V4DF
6696 (match_dup 2)))]
6697 "TARGET_AVX"
6698 "operands[2] = gen_reg_rtx (V4SFmode);")
6699
6700 (define_expand "vec_unpacks_hi_v16sf"
6701 [(set (match_dup 2)
6702 (vec_select:V8SF
6703 (match_operand:V16SF 1 "register_operand")
6704 (parallel [(const_int 8) (const_int 9)
6705 (const_int 10) (const_int 11)
6706 (const_int 12) (const_int 13)
6707 (const_int 14) (const_int 15)])))
6708 (set (match_operand:V8DF 0 "register_operand")
6709 (float_extend:V8DF
6710 (match_dup 2)))]
6711 "TARGET_AVX512F"
6712 "operands[2] = gen_reg_rtx (V8SFmode);")
6713
6714 (define_expand "vec_unpacks_lo_v4sf"
6715 [(set (match_operand:V2DF 0 "register_operand")
6716 (float_extend:V2DF
6717 (vec_select:V2SF
6718 (match_operand:V4SF 1 "vector_operand")
6719 (parallel [(const_int 0) (const_int 1)]))))]
6720 "TARGET_SSE2")
6721
6722 (define_expand "vec_unpacks_lo_v8sf"
6723 [(set (match_operand:V4DF 0 "register_operand")
6724 (float_extend:V4DF
6725 (vec_select:V4SF
6726 (match_operand:V8SF 1 "nonimmediate_operand")
6727 (parallel [(const_int 0) (const_int 1)
6728 (const_int 2) (const_int 3)]))))]
6729 "TARGET_AVX")
6730
6731 (define_mode_attr sseunpackfltmode
6732 [(V8HI "V4SF") (V4SI "V2DF") (V16HI "V8SF")
6733 (V8SI "V4DF") (V32HI "V16SF") (V16SI "V8DF")])
6734
6735 (define_expand "vec_unpacks_float_hi_<mode>"
6736 [(match_operand:<sseunpackfltmode> 0 "register_operand")
6737 (match_operand:VI2_AVX512F 1 "register_operand")]
6738 "TARGET_SSE2"
6739 {
6740 rtx tmp = gen_reg_rtx (<sseunpackmode>mode);
6741
6742 emit_insn (gen_vec_unpacks_hi_<mode> (tmp, operands[1]));
6743 emit_insn (gen_rtx_SET (operands[0],
6744 gen_rtx_FLOAT (<sseunpackfltmode>mode, tmp)));
6745 DONE;
6746 })
6747
6748 (define_expand "vec_unpacks_float_lo_<mode>"
6749 [(match_operand:<sseunpackfltmode> 0 "register_operand")
6750 (match_operand:VI2_AVX512F 1 "register_operand")]
6751 "TARGET_SSE2"
6752 {
6753 rtx tmp = gen_reg_rtx (<sseunpackmode>mode);
6754
6755 emit_insn (gen_vec_unpacks_lo_<mode> (tmp, operands[1]));
6756 emit_insn (gen_rtx_SET (operands[0],
6757 gen_rtx_FLOAT (<sseunpackfltmode>mode, tmp)));
6758 DONE;
6759 })
6760
6761 (define_expand "vec_unpacku_float_hi_<mode>"
6762 [(match_operand:<sseunpackfltmode> 0 "register_operand")
6763 (match_operand:VI2_AVX512F 1 "register_operand")]
6764 "TARGET_SSE2"
6765 {
6766 rtx tmp = gen_reg_rtx (<sseunpackmode>mode);
6767
6768 emit_insn (gen_vec_unpacku_hi_<mode> (tmp, operands[1]));
6769 emit_insn (gen_rtx_SET (operands[0],
6770 gen_rtx_FLOAT (<sseunpackfltmode>mode, tmp)));
6771 DONE;
6772 })
6773
6774 (define_expand "vec_unpacku_float_lo_<mode>"
6775 [(match_operand:<sseunpackfltmode> 0 "register_operand")
6776 (match_operand:VI2_AVX512F 1 "register_operand")]
6777 "TARGET_SSE2"
6778 {
6779 rtx tmp = gen_reg_rtx (<sseunpackmode>mode);
6780
6781 emit_insn (gen_vec_unpacku_lo_<mode> (tmp, operands[1]));
6782 emit_insn (gen_rtx_SET (operands[0],
6783 gen_rtx_FLOAT (<sseunpackfltmode>mode, tmp)));
6784 DONE;
6785 })
6786
6787 (define_expand "vec_unpacks_float_hi_v4si"
6788 [(set (match_dup 2)
6789 (vec_select:V4SI
6790 (match_operand:V4SI 1 "vector_operand")
6791 (parallel [(const_int 2) (const_int 3)
6792 (const_int 2) (const_int 3)])))
6793 (set (match_operand:V2DF 0 "register_operand")
6794 (float:V2DF
6795 (vec_select:V2SI
6796 (match_dup 2)
6797 (parallel [(const_int 0) (const_int 1)]))))]
6798 "TARGET_SSE2"
6799 "operands[2] = gen_reg_rtx (V4SImode);")
6800
6801 (define_expand "vec_unpacks_float_lo_v4si"
6802 [(set (match_operand:V2DF 0 "register_operand")
6803 (float:V2DF
6804 (vec_select:V2SI
6805 (match_operand:V4SI 1 "vector_operand")
6806 (parallel [(const_int 0) (const_int 1)]))))]
6807 "TARGET_SSE2")
6808
6809 (define_expand "vec_unpacks_float_hi_v8si"
6810 [(set (match_dup 2)
6811 (vec_select:V4SI
6812 (match_operand:V8SI 1 "vector_operand")
6813 (parallel [(const_int 4) (const_int 5)
6814 (const_int 6) (const_int 7)])))
6815 (set (match_operand:V4DF 0 "register_operand")
6816 (float:V4DF
6817 (match_dup 2)))]
6818 "TARGET_AVX"
6819 "operands[2] = gen_reg_rtx (V4SImode);")
6820
6821 (define_expand "vec_unpacks_float_lo_v8si"
6822 [(set (match_operand:V4DF 0 "register_operand")
6823 (float:V4DF
6824 (vec_select:V4SI
6825 (match_operand:V8SI 1 "nonimmediate_operand")
6826 (parallel [(const_int 0) (const_int 1)
6827 (const_int 2) (const_int 3)]))))]
6828 "TARGET_AVX")
6829
6830 (define_expand "vec_unpacks_float_hi_v16si"
6831 [(set (match_dup 2)
6832 (vec_select:V8SI
6833 (match_operand:V16SI 1 "nonimmediate_operand")
6834 (parallel [(const_int 8) (const_int 9)
6835 (const_int 10) (const_int 11)
6836 (const_int 12) (const_int 13)
6837 (const_int 14) (const_int 15)])))
6838 (set (match_operand:V8DF 0 "register_operand")
6839 (float:V8DF
6840 (match_dup 2)))]
6841 "TARGET_AVX512F"
6842 "operands[2] = gen_reg_rtx (V8SImode);")
6843
6844 (define_expand "vec_unpacks_float_lo_v16si"
6845 [(set (match_operand:V8DF 0 "register_operand")
6846 (float:V8DF
6847 (vec_select:V8SI
6848 (match_operand:V16SI 1 "nonimmediate_operand")
6849 (parallel [(const_int 0) (const_int 1)
6850 (const_int 2) (const_int 3)
6851 (const_int 4) (const_int 5)
6852 (const_int 6) (const_int 7)]))))]
6853 "TARGET_AVX512F")
6854
6855 (define_expand "vec_unpacku_float_hi_v4si"
6856 [(set (match_dup 5)
6857 (vec_select:V4SI
6858 (match_operand:V4SI 1 "vector_operand")
6859 (parallel [(const_int 2) (const_int 3)
6860 (const_int 2) (const_int 3)])))
6861 (set (match_dup 6)
6862 (float:V2DF
6863 (vec_select:V2SI
6864 (match_dup 5)
6865 (parallel [(const_int 0) (const_int 1)]))))
6866 (set (match_dup 7)
6867 (lt:V2DF (match_dup 6) (match_dup 3)))
6868 (set (match_dup 8)
6869 (and:V2DF (match_dup 7) (match_dup 4)))
6870 (set (match_operand:V2DF 0 "register_operand")
6871 (plus:V2DF (match_dup 6) (match_dup 8)))]
6872 "TARGET_SSE2"
6873 {
6874 REAL_VALUE_TYPE TWO32r;
6875 rtx x;
6876 int i;
6877
6878 real_ldexp (&TWO32r, &dconst1, 32);
6879 x = const_double_from_real_value (TWO32r, DFmode);
6880
6881 operands[3] = force_reg (V2DFmode, CONST0_RTX (V2DFmode));
6882 operands[4] = force_reg (V2DFmode,
6883 ix86_build_const_vector (V2DFmode, 1, x));
6884
6885 operands[5] = gen_reg_rtx (V4SImode);
6886
6887 for (i = 6; i < 9; i++)
6888 operands[i] = gen_reg_rtx (V2DFmode);
6889 })
6890
6891 (define_expand "vec_unpacku_float_lo_v4si"
6892 [(set (match_dup 5)
6893 (float:V2DF
6894 (vec_select:V2SI
6895 (match_operand:V4SI 1 "vector_operand")
6896 (parallel [(const_int 0) (const_int 1)]))))
6897 (set (match_dup 6)
6898 (lt:V2DF (match_dup 5) (match_dup 3)))
6899 (set (match_dup 7)
6900 (and:V2DF (match_dup 6) (match_dup 4)))
6901 (set (match_operand:V2DF 0 "register_operand")
6902 (plus:V2DF (match_dup 5) (match_dup 7)))]
6903 "TARGET_SSE2"
6904 {
6905 REAL_VALUE_TYPE TWO32r;
6906 rtx x;
6907 int i;
6908
6909 real_ldexp (&TWO32r, &dconst1, 32);
6910 x = const_double_from_real_value (TWO32r, DFmode);
6911
6912 operands[3] = force_reg (V2DFmode, CONST0_RTX (V2DFmode));
6913 operands[4] = force_reg (V2DFmode,
6914 ix86_build_const_vector (V2DFmode, 1, x));
6915
6916 for (i = 5; i < 8; i++)
6917 operands[i] = gen_reg_rtx (V2DFmode);
6918 })
6919
6920 (define_expand "vec_unpacku_float_hi_v8si"
6921 [(match_operand:V4DF 0 "register_operand")
6922 (match_operand:V8SI 1 "register_operand")]
6923 "TARGET_AVX"
6924 {
6925 REAL_VALUE_TYPE TWO32r;
6926 rtx x, tmp[6];
6927 int i;
6928
6929 real_ldexp (&TWO32r, &dconst1, 32);
6930 x = const_double_from_real_value (TWO32r, DFmode);
6931
6932 tmp[0] = force_reg (V4DFmode, CONST0_RTX (V4DFmode));
6933 tmp[1] = force_reg (V4DFmode, ix86_build_const_vector (V4DFmode, 1, x));
6934 tmp[5] = gen_reg_rtx (V4SImode);
6935
6936 for (i = 2; i < 5; i++)
6937 tmp[i] = gen_reg_rtx (V4DFmode);
6938 emit_insn (gen_vec_extract_hi_v8si (tmp[5], operands[1]));
6939 emit_insn (gen_floatv4siv4df2 (tmp[2], tmp[5]));
6940 emit_insn (gen_rtx_SET (tmp[3], gen_rtx_LT (V4DFmode, tmp[2], tmp[0])));
6941 emit_insn (gen_andv4df3 (tmp[4], tmp[3], tmp[1]));
6942 emit_insn (gen_addv4df3 (operands[0], tmp[2], tmp[4]));
6943 DONE;
6944 })
6945
6946 (define_expand "vec_unpacku_float_hi_v16si"
6947 [(match_operand:V8DF 0 "register_operand")
6948 (match_operand:V16SI 1 "register_operand")]
6949 "TARGET_AVX512F"
6950 {
6951 REAL_VALUE_TYPE TWO32r;
6952 rtx k, x, tmp[4];
6953
6954 real_ldexp (&TWO32r, &dconst1, 32);
6955 x = const_double_from_real_value (TWO32r, DFmode);
6956
6957 tmp[0] = force_reg (V8DFmode, CONST0_RTX (V8DFmode));
6958 tmp[1] = force_reg (V8DFmode, ix86_build_const_vector (V8DFmode, 1, x));
6959 tmp[2] = gen_reg_rtx (V8DFmode);
6960 tmp[3] = gen_reg_rtx (V8SImode);
6961 k = gen_reg_rtx (QImode);
6962
6963 emit_insn (gen_vec_extract_hi_v16si (tmp[3], operands[1]));
6964 emit_insn (gen_floatv8siv8df2 (tmp[2], tmp[3]));
6965 emit_insn (gen_rtx_SET (k, gen_rtx_LT (QImode, tmp[2], tmp[0])));
6966 emit_insn (gen_addv8df3_mask (tmp[2], tmp[2], tmp[1], tmp[2], k));
6967 emit_move_insn (operands[0], tmp[2]);
6968 DONE;
6969 })
6970
6971 (define_expand "vec_unpacku_float_lo_v8si"
6972 [(match_operand:V4DF 0 "register_operand")
6973 (match_operand:V8SI 1 "nonimmediate_operand")]
6974 "TARGET_AVX"
6975 {
6976 REAL_VALUE_TYPE TWO32r;
6977 rtx x, tmp[5];
6978 int i;
6979
6980 real_ldexp (&TWO32r, &dconst1, 32);
6981 x = const_double_from_real_value (TWO32r, DFmode);
6982
6983 tmp[0] = force_reg (V4DFmode, CONST0_RTX (V4DFmode));
6984 tmp[1] = force_reg (V4DFmode, ix86_build_const_vector (V4DFmode, 1, x));
6985
6986 for (i = 2; i < 5; i++)
6987 tmp[i] = gen_reg_rtx (V4DFmode);
6988 emit_insn (gen_avx_cvtdq2pd256_2 (tmp[2], operands[1]));
6989 emit_insn (gen_rtx_SET (tmp[3], gen_rtx_LT (V4DFmode, tmp[2], tmp[0])));
6990 emit_insn (gen_andv4df3 (tmp[4], tmp[3], tmp[1]));
6991 emit_insn (gen_addv4df3 (operands[0], tmp[2], tmp[4]));
6992 DONE;
6993 })
6994
6995 (define_expand "vec_unpacku_float_lo_v16si"
6996 [(match_operand:V8DF 0 "register_operand")
6997 (match_operand:V16SI 1 "nonimmediate_operand")]
6998 "TARGET_AVX512F"
6999 {
7000 REAL_VALUE_TYPE TWO32r;
7001 rtx k, x, tmp[3];
7002
7003 real_ldexp (&TWO32r, &dconst1, 32);
7004 x = const_double_from_real_value (TWO32r, DFmode);
7005
7006 tmp[0] = force_reg (V8DFmode, CONST0_RTX (V8DFmode));
7007 tmp[1] = force_reg (V8DFmode, ix86_build_const_vector (V8DFmode, 1, x));
7008 tmp[2] = gen_reg_rtx (V8DFmode);
7009 k = gen_reg_rtx (QImode);
7010
7011 emit_insn (gen_avx512f_cvtdq2pd512_2 (tmp[2], operands[1]));
7012 emit_insn (gen_rtx_SET (k, gen_rtx_LT (QImode, tmp[2], tmp[0])));
7013 emit_insn (gen_addv8df3_mask (tmp[2], tmp[2], tmp[1], tmp[2], k));
7014 emit_move_insn (operands[0], tmp[2]);
7015 DONE;
7016 })
7017
7018 (define_expand "vec_pack_trunc_<mode>"
7019 [(set (match_dup 3)
7020 (float_truncate:<sf2dfmode>
7021 (match_operand:VF2_512_256 1 "nonimmediate_operand")))
7022 (set (match_dup 4)
7023 (float_truncate:<sf2dfmode>
7024 (match_operand:VF2_512_256 2 "nonimmediate_operand")))
7025 (set (match_operand:<ssePSmode> 0 "register_operand")
7026 (vec_concat:<ssePSmode>
7027 (match_dup 3)
7028 (match_dup 4)))]
7029 "TARGET_AVX"
7030 {
7031 operands[3] = gen_reg_rtx (<sf2dfmode>mode);
7032 operands[4] = gen_reg_rtx (<sf2dfmode>mode);
7033 })
7034
7035 (define_expand "vec_pack_trunc_v2df"
7036 [(match_operand:V4SF 0 "register_operand")
7037 (match_operand:V2DF 1 "vector_operand")
7038 (match_operand:V2DF 2 "vector_operand")]
7039 "TARGET_SSE2"
7040 {
7041 rtx tmp0, tmp1;
7042
7043 if (TARGET_AVX && !TARGET_PREFER_AVX128 && optimize_insn_for_speed_p ())
7044 {
7045 tmp0 = gen_reg_rtx (V4DFmode);
7046 tmp1 = force_reg (V2DFmode, operands[1]);
7047
7048 emit_insn (gen_avx_vec_concatv4df (tmp0, tmp1, operands[2]));
7049 emit_insn (gen_avx_cvtpd2ps256 (operands[0], tmp0));
7050 }
7051 else
7052 {
7053 tmp0 = gen_reg_rtx (V4SFmode);
7054 tmp1 = gen_reg_rtx (V4SFmode);
7055
7056 emit_insn (gen_sse2_cvtpd2ps (tmp0, operands[1]));
7057 emit_insn (gen_sse2_cvtpd2ps (tmp1, operands[2]));
7058 emit_insn (gen_sse_movlhps (operands[0], tmp0, tmp1));
7059 }
7060 DONE;
7061 })
7062
7063 (define_expand "vec_pack_sfix_trunc_v8df"
7064 [(match_operand:V16SI 0 "register_operand")
7065 (match_operand:V8DF 1 "nonimmediate_operand")
7066 (match_operand:V8DF 2 "nonimmediate_operand")]
7067 "TARGET_AVX512F"
7068 {
7069 rtx r1, r2;
7070
7071 r1 = gen_reg_rtx (V8SImode);
7072 r2 = gen_reg_rtx (V8SImode);
7073
7074 emit_insn (gen_fix_truncv8dfv8si2 (r1, operands[1]));
7075 emit_insn (gen_fix_truncv8dfv8si2 (r2, operands[2]));
7076 emit_insn (gen_avx_vec_concatv16si (operands[0], r1, r2));
7077 DONE;
7078 })
7079
7080 (define_expand "vec_pack_sfix_trunc_v4df"
7081 [(match_operand:V8SI 0 "register_operand")
7082 (match_operand:V4DF 1 "nonimmediate_operand")
7083 (match_operand:V4DF 2 "nonimmediate_operand")]
7084 "TARGET_AVX"
7085 {
7086 rtx r1, r2;
7087
7088 r1 = gen_reg_rtx (V4SImode);
7089 r2 = gen_reg_rtx (V4SImode);
7090
7091 emit_insn (gen_fix_truncv4dfv4si2 (r1, operands[1]));
7092 emit_insn (gen_fix_truncv4dfv4si2 (r2, operands[2]));
7093 emit_insn (gen_avx_vec_concatv8si (operands[0], r1, r2));
7094 DONE;
7095 })
7096
7097 (define_expand "vec_pack_sfix_trunc_v2df"
7098 [(match_operand:V4SI 0 "register_operand")
7099 (match_operand:V2DF 1 "vector_operand")
7100 (match_operand:V2DF 2 "vector_operand")]
7101 "TARGET_SSE2"
7102 {
7103 rtx tmp0, tmp1, tmp2;
7104
7105 if (TARGET_AVX && !TARGET_PREFER_AVX128 && optimize_insn_for_speed_p ())
7106 {
7107 tmp0 = gen_reg_rtx (V4DFmode);
7108 tmp1 = force_reg (V2DFmode, operands[1]);
7109
7110 emit_insn (gen_avx_vec_concatv4df (tmp0, tmp1, operands[2]));
7111 emit_insn (gen_fix_truncv4dfv4si2 (operands[0], tmp0));
7112 }
7113 else
7114 {
7115 tmp0 = gen_reg_rtx (V4SImode);
7116 tmp1 = gen_reg_rtx (V4SImode);
7117 tmp2 = gen_reg_rtx (V2DImode);
7118
7119 emit_insn (gen_sse2_cvttpd2dq (tmp0, operands[1]));
7120 emit_insn (gen_sse2_cvttpd2dq (tmp1, operands[2]));
7121 emit_insn (gen_vec_interleave_lowv2di (tmp2,
7122 gen_lowpart (V2DImode, tmp0),
7123 gen_lowpart (V2DImode, tmp1)));
7124 emit_move_insn (operands[0], gen_lowpart (V4SImode, tmp2));
7125 }
7126 DONE;
7127 })
7128
7129 (define_mode_attr ssepackfltmode
7130 [(V8DF "V16SI") (V4DF "V8SI") (V2DF "V4SI")])
7131
7132 (define_expand "vec_pack_ufix_trunc_<mode>"
7133 [(match_operand:<ssepackfltmode> 0 "register_operand")
7134 (match_operand:VF2 1 "register_operand")
7135 (match_operand:VF2 2 "register_operand")]
7136 "TARGET_SSE2"
7137 {
7138 if (<MODE>mode == V8DFmode)
7139 {
7140 rtx r1, r2;
7141
7142 r1 = gen_reg_rtx (V8SImode);
7143 r2 = gen_reg_rtx (V8SImode);
7144
7145 emit_insn (gen_fixuns_truncv8dfv8si2 (r1, operands[1]));
7146 emit_insn (gen_fixuns_truncv8dfv8si2 (r2, operands[2]));
7147 emit_insn (gen_avx_vec_concatv16si (operands[0], r1, r2));
7148 }
7149 else
7150 {
7151 rtx tmp[7];
7152 tmp[0] = ix86_expand_adjust_ufix_to_sfix_si (operands[1], &tmp[2]);
7153 tmp[1] = ix86_expand_adjust_ufix_to_sfix_si (operands[2], &tmp[3]);
7154 tmp[4] = gen_reg_rtx (<ssepackfltmode>mode);
7155 emit_insn (gen_vec_pack_sfix_trunc_<mode> (tmp[4], tmp[0], tmp[1]));
7156 if (<ssepackfltmode>mode == V4SImode || TARGET_AVX2)
7157 {
7158 tmp[5] = gen_reg_rtx (<ssepackfltmode>mode);
7159 ix86_expand_vec_extract_even_odd (tmp[5], tmp[2], tmp[3], 0);
7160 }
7161 else
7162 {
7163 tmp[5] = gen_reg_rtx (V8SFmode);
7164 ix86_expand_vec_extract_even_odd (tmp[5],
7165 gen_lowpart (V8SFmode, tmp[2]),
7166 gen_lowpart (V8SFmode, tmp[3]), 0);
7167 tmp[5] = gen_lowpart (V8SImode, tmp[5]);
7168 }
7169 tmp[6] = expand_simple_binop (<ssepackfltmode>mode, XOR, tmp[4], tmp[5],
7170 operands[0], 0, OPTAB_DIRECT);
7171 if (tmp[6] != operands[0])
7172 emit_move_insn (operands[0], tmp[6]);
7173 }
7174
7175 DONE;
7176 })
7177
7178 (define_expand "avx512f_vec_pack_sfix_v8df"
7179 [(match_operand:V16SI 0 "register_operand")
7180 (match_operand:V8DF 1 "nonimmediate_operand")
7181 (match_operand:V8DF 2 "nonimmediate_operand")]
7182 "TARGET_AVX512F"
7183 {
7184 rtx r1, r2;
7185
7186 r1 = gen_reg_rtx (V8SImode);
7187 r2 = gen_reg_rtx (V8SImode);
7188
7189 emit_insn (gen_avx512f_cvtpd2dq512 (r1, operands[1]));
7190 emit_insn (gen_avx512f_cvtpd2dq512 (r2, operands[2]));
7191 emit_insn (gen_avx_vec_concatv16si (operands[0], r1, r2));
7192 DONE;
7193 })
7194
7195 (define_expand "vec_pack_sfix_v4df"
7196 [(match_operand:V8SI 0 "register_operand")
7197 (match_operand:V4DF 1 "nonimmediate_operand")
7198 (match_operand:V4DF 2 "nonimmediate_operand")]
7199 "TARGET_AVX"
7200 {
7201 rtx r1, r2;
7202
7203 r1 = gen_reg_rtx (V4SImode);
7204 r2 = gen_reg_rtx (V4SImode);
7205
7206 emit_insn (gen_avx_cvtpd2dq256 (r1, operands[1]));
7207 emit_insn (gen_avx_cvtpd2dq256 (r2, operands[2]));
7208 emit_insn (gen_avx_vec_concatv8si (operands[0], r1, r2));
7209 DONE;
7210 })
7211
7212 (define_expand "vec_pack_sfix_v2df"
7213 [(match_operand:V4SI 0 "register_operand")
7214 (match_operand:V2DF 1 "vector_operand")
7215 (match_operand:V2DF 2 "vector_operand")]
7216 "TARGET_SSE2"
7217 {
7218 rtx tmp0, tmp1, tmp2;
7219
7220 if (TARGET_AVX && !TARGET_PREFER_AVX128 && optimize_insn_for_speed_p ())
7221 {
7222 tmp0 = gen_reg_rtx (V4DFmode);
7223 tmp1 = force_reg (V2DFmode, operands[1]);
7224
7225 emit_insn (gen_avx_vec_concatv4df (tmp0, tmp1, operands[2]));
7226 emit_insn (gen_avx_cvtpd2dq256 (operands[0], tmp0));
7227 }
7228 else
7229 {
7230 tmp0 = gen_reg_rtx (V4SImode);
7231 tmp1 = gen_reg_rtx (V4SImode);
7232 tmp2 = gen_reg_rtx (V2DImode);
7233
7234 emit_insn (gen_sse2_cvtpd2dq (tmp0, operands[1]));
7235 emit_insn (gen_sse2_cvtpd2dq (tmp1, operands[2]));
7236 emit_insn (gen_vec_interleave_lowv2di (tmp2,
7237 gen_lowpart (V2DImode, tmp0),
7238 gen_lowpart (V2DImode, tmp1)));
7239 emit_move_insn (operands[0], gen_lowpart (V4SImode, tmp2));
7240 }
7241 DONE;
7242 })
7243
7244 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
7245 ;;
7246 ;; Parallel single-precision floating point element swizzling
7247 ;;
7248 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
7249
7250 (define_expand "sse_movhlps_exp"
7251 [(set (match_operand:V4SF 0 "nonimmediate_operand")
7252 (vec_select:V4SF
7253 (vec_concat:V8SF
7254 (match_operand:V4SF 1 "nonimmediate_operand")
7255 (match_operand:V4SF 2 "nonimmediate_operand"))
7256 (parallel [(const_int 6)
7257 (const_int 7)
7258 (const_int 2)
7259 (const_int 3)])))]
7260 "TARGET_SSE"
7261 {
7262 rtx dst = ix86_fixup_binary_operands (UNKNOWN, V4SFmode, operands);
7263
7264 emit_insn (gen_sse_movhlps (dst, operands[1], operands[2]));
7265
7266 /* Fix up the destination if needed. */
7267 if (dst != operands[0])
7268 emit_move_insn (operands[0], dst);
7269
7270 DONE;
7271 })
7272
7273 (define_insn "sse_movhlps"
7274 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,v,x,v,m")
7275 (vec_select:V4SF
7276 (vec_concat:V8SF
7277 (match_operand:V4SF 1 "nonimmediate_operand" " 0,v,0,v,0")
7278 (match_operand:V4SF 2 "nonimmediate_operand" " x,v,o,o,v"))
7279 (parallel [(const_int 6)
7280 (const_int 7)
7281 (const_int 2)
7282 (const_int 3)])))]
7283 "TARGET_SSE && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
7284 "@
7285 movhlps\t{%2, %0|%0, %2}
7286 vmovhlps\t{%2, %1, %0|%0, %1, %2}
7287 movlps\t{%H2, %0|%0, %H2}
7288 vmovlps\t{%H2, %1, %0|%0, %1, %H2}
7289 %vmovhps\t{%2, %0|%q0, %2}"
7290 [(set_attr "isa" "noavx,avx,noavx,avx,*")
7291 (set_attr "type" "ssemov")
7292 (set_attr "prefix" "orig,maybe_evex,orig,maybe_evex,maybe_vex")
7293 (set_attr "mode" "V4SF,V4SF,V2SF,V2SF,V2SF")])
7294
7295 (define_expand "sse_movlhps_exp"
7296 [(set (match_operand:V4SF 0 "nonimmediate_operand")
7297 (vec_select:V4SF
7298 (vec_concat:V8SF
7299 (match_operand:V4SF 1 "nonimmediate_operand")
7300 (match_operand:V4SF 2 "nonimmediate_operand"))
7301 (parallel [(const_int 0)
7302 (const_int 1)
7303 (const_int 4)
7304 (const_int 5)])))]
7305 "TARGET_SSE"
7306 {
7307 rtx dst = ix86_fixup_binary_operands (UNKNOWN, V4SFmode, operands);
7308
7309 emit_insn (gen_sse_movlhps (dst, operands[1], operands[2]));
7310
7311 /* Fix up the destination if needed. */
7312 if (dst != operands[0])
7313 emit_move_insn (operands[0], dst);
7314
7315 DONE;
7316 })
7317
7318 (define_insn "sse_movlhps"
7319 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,v,x,v,o")
7320 (vec_select:V4SF
7321 (vec_concat:V8SF
7322 (match_operand:V4SF 1 "nonimmediate_operand" " 0,v,0,v,0")
7323 (match_operand:V4SF 2 "nonimmediate_operand" " x,v,m,v,v"))
7324 (parallel [(const_int 0)
7325 (const_int 1)
7326 (const_int 4)
7327 (const_int 5)])))]
7328 "TARGET_SSE && ix86_binary_operator_ok (UNKNOWN, V4SFmode, operands)"
7329 "@
7330 movlhps\t{%2, %0|%0, %2}
7331 vmovlhps\t{%2, %1, %0|%0, %1, %2}
7332 movhps\t{%2, %0|%0, %q2}
7333 vmovhps\t{%2, %1, %0|%0, %1, %q2}
7334 %vmovlps\t{%2, %H0|%H0, %2}"
7335 [(set_attr "isa" "noavx,avx,noavx,avx,*")
7336 (set_attr "type" "ssemov")
7337 (set_attr "prefix" "orig,maybe_evex,orig,maybe_evex,maybe_vex")
7338 (set_attr "mode" "V4SF,V4SF,V2SF,V2SF,V2SF")])
7339
7340 (define_insn "<mask_codefor>avx512f_unpckhps512<mask_name>"
7341 [(set (match_operand:V16SF 0 "register_operand" "=v")
7342 (vec_select:V16SF
7343 (vec_concat:V32SF
7344 (match_operand:V16SF 1 "register_operand" "v")
7345 (match_operand:V16SF 2 "nonimmediate_operand" "vm"))
7346 (parallel [(const_int 2) (const_int 18)
7347 (const_int 3) (const_int 19)
7348 (const_int 6) (const_int 22)
7349 (const_int 7) (const_int 23)
7350 (const_int 10) (const_int 26)
7351 (const_int 11) (const_int 27)
7352 (const_int 14) (const_int 30)
7353 (const_int 15) (const_int 31)])))]
7354 "TARGET_AVX512F"
7355 "vunpckhps\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
7356 [(set_attr "type" "sselog")
7357 (set_attr "prefix" "evex")
7358 (set_attr "mode" "V16SF")])
7359
7360 ;; Recall that the 256-bit unpck insns only shuffle within their lanes.
7361 (define_insn "avx_unpckhps256<mask_name>"
7362 [(set (match_operand:V8SF 0 "register_operand" "=v")
7363 (vec_select:V8SF
7364 (vec_concat:V16SF
7365 (match_operand:V8SF 1 "register_operand" "v")
7366 (match_operand:V8SF 2 "nonimmediate_operand" "vm"))
7367 (parallel [(const_int 2) (const_int 10)
7368 (const_int 3) (const_int 11)
7369 (const_int 6) (const_int 14)
7370 (const_int 7) (const_int 15)])))]
7371 "TARGET_AVX && <mask_avx512vl_condition>"
7372 "vunpckhps\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
7373 [(set_attr "type" "sselog")
7374 (set_attr "prefix" "vex")
7375 (set_attr "mode" "V8SF")])
7376
7377 (define_expand "vec_interleave_highv8sf"
7378 [(set (match_dup 3)
7379 (vec_select:V8SF
7380 (vec_concat:V16SF
7381 (match_operand:V8SF 1 "register_operand")
7382 (match_operand:V8SF 2 "nonimmediate_operand"))
7383 (parallel [(const_int 0) (const_int 8)
7384 (const_int 1) (const_int 9)
7385 (const_int 4) (const_int 12)
7386 (const_int 5) (const_int 13)])))
7387 (set (match_dup 4)
7388 (vec_select:V8SF
7389 (vec_concat:V16SF
7390 (match_dup 1)
7391 (match_dup 2))
7392 (parallel [(const_int 2) (const_int 10)
7393 (const_int 3) (const_int 11)
7394 (const_int 6) (const_int 14)
7395 (const_int 7) (const_int 15)])))
7396 (set (match_operand:V8SF 0 "register_operand")
7397 (vec_select:V8SF
7398 (vec_concat:V16SF
7399 (match_dup 3)
7400 (match_dup 4))
7401 (parallel [(const_int 4) (const_int 5)
7402 (const_int 6) (const_int 7)
7403 (const_int 12) (const_int 13)
7404 (const_int 14) (const_int 15)])))]
7405 "TARGET_AVX"
7406 {
7407 operands[3] = gen_reg_rtx (V8SFmode);
7408 operands[4] = gen_reg_rtx (V8SFmode);
7409 })
7410
7411 (define_insn "vec_interleave_highv4sf<mask_name>"
7412 [(set (match_operand:V4SF 0 "register_operand" "=x,v")
7413 (vec_select:V4SF
7414 (vec_concat:V8SF
7415 (match_operand:V4SF 1 "register_operand" "0,v")
7416 (match_operand:V4SF 2 "vector_operand" "xBm,vm"))
7417 (parallel [(const_int 2) (const_int 6)
7418 (const_int 3) (const_int 7)])))]
7419 "TARGET_SSE && <mask_avx512vl_condition>"
7420 "@
7421 unpckhps\t{%2, %0|%0, %2}
7422 vunpckhps\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
7423 [(set_attr "isa" "noavx,avx")
7424 (set_attr "type" "sselog")
7425 (set_attr "prefix" "orig,vex")
7426 (set_attr "mode" "V4SF")])
7427
7428 (define_insn "<mask_codefor>avx512f_unpcklps512<mask_name>"
7429 [(set (match_operand:V16SF 0 "register_operand" "=v")
7430 (vec_select:V16SF
7431 (vec_concat:V32SF
7432 (match_operand:V16SF 1 "register_operand" "v")
7433 (match_operand:V16SF 2 "nonimmediate_operand" "vm"))
7434 (parallel [(const_int 0) (const_int 16)
7435 (const_int 1) (const_int 17)
7436 (const_int 4) (const_int 20)
7437 (const_int 5) (const_int 21)
7438 (const_int 8) (const_int 24)
7439 (const_int 9) (const_int 25)
7440 (const_int 12) (const_int 28)
7441 (const_int 13) (const_int 29)])))]
7442 "TARGET_AVX512F"
7443 "vunpcklps\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
7444 [(set_attr "type" "sselog")
7445 (set_attr "prefix" "evex")
7446 (set_attr "mode" "V16SF")])
7447
7448 ;; Recall that the 256-bit unpck insns only shuffle within their lanes.
7449 (define_insn "avx_unpcklps256<mask_name>"
7450 [(set (match_operand:V8SF 0 "register_operand" "=v")
7451 (vec_select:V8SF
7452 (vec_concat:V16SF
7453 (match_operand:V8SF 1 "register_operand" "v")
7454 (match_operand:V8SF 2 "nonimmediate_operand" "vm"))
7455 (parallel [(const_int 0) (const_int 8)
7456 (const_int 1) (const_int 9)
7457 (const_int 4) (const_int 12)
7458 (const_int 5) (const_int 13)])))]
7459 "TARGET_AVX && <mask_avx512vl_condition>"
7460 "vunpcklps\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
7461 [(set_attr "type" "sselog")
7462 (set_attr "prefix" "vex")
7463 (set_attr "mode" "V8SF")])
7464
7465 (define_insn "unpcklps128_mask"
7466 [(set (match_operand:V4SF 0 "register_operand" "=v")
7467 (vec_merge:V4SF
7468 (vec_select:V4SF
7469 (vec_concat:V8SF
7470 (match_operand:V4SF 1 "register_operand" "v")
7471 (match_operand:V4SF 2 "nonimmediate_operand" "vm"))
7472 (parallel [(const_int 0) (const_int 4)
7473 (const_int 1) (const_int 5)]))
7474 (match_operand:V4SF 3 "nonimm_or_0_operand" "0C")
7475 (match_operand:QI 4 "register_operand" "Yk")))]
7476 "TARGET_AVX512VL"
7477 "vunpcklps\t{%2, %1, %0%{%4%}%N3|%0%{%4%}%N3, %1, %2}"
7478 [(set_attr "type" "sselog")
7479 (set_attr "prefix" "evex")
7480 (set_attr "mode" "V4SF")])
7481
7482 (define_expand "vec_interleave_lowv8sf"
7483 [(set (match_dup 3)
7484 (vec_select:V8SF
7485 (vec_concat:V16SF
7486 (match_operand:V8SF 1 "register_operand")
7487 (match_operand:V8SF 2 "nonimmediate_operand"))
7488 (parallel [(const_int 0) (const_int 8)
7489 (const_int 1) (const_int 9)
7490 (const_int 4) (const_int 12)
7491 (const_int 5) (const_int 13)])))
7492 (set (match_dup 4)
7493 (vec_select:V8SF
7494 (vec_concat:V16SF
7495 (match_dup 1)
7496 (match_dup 2))
7497 (parallel [(const_int 2) (const_int 10)
7498 (const_int 3) (const_int 11)
7499 (const_int 6) (const_int 14)
7500 (const_int 7) (const_int 15)])))
7501 (set (match_operand:V8SF 0 "register_operand")
7502 (vec_select:V8SF
7503 (vec_concat:V16SF
7504 (match_dup 3)
7505 (match_dup 4))
7506 (parallel [(const_int 0) (const_int 1)
7507 (const_int 2) (const_int 3)
7508 (const_int 8) (const_int 9)
7509 (const_int 10) (const_int 11)])))]
7510 "TARGET_AVX"
7511 {
7512 operands[3] = gen_reg_rtx (V8SFmode);
7513 operands[4] = gen_reg_rtx (V8SFmode);
7514 })
7515
7516 (define_insn "vec_interleave_lowv4sf"
7517 [(set (match_operand:V4SF 0 "register_operand" "=x,v")
7518 (vec_select:V4SF
7519 (vec_concat:V8SF
7520 (match_operand:V4SF 1 "register_operand" "0,v")
7521 (match_operand:V4SF 2 "vector_operand" "xBm,vm"))
7522 (parallel [(const_int 0) (const_int 4)
7523 (const_int 1) (const_int 5)])))]
7524 "TARGET_SSE"
7525 "@
7526 unpcklps\t{%2, %0|%0, %2}
7527 vunpcklps\t{%2, %1, %0|%0, %1, %2}"
7528 [(set_attr "isa" "noavx,avx")
7529 (set_attr "type" "sselog")
7530 (set_attr "prefix" "orig,maybe_evex")
7531 (set_attr "mode" "V4SF")])
7532
7533 ;; These are modeled with the same vec_concat as the others so that we
7534 ;; capture users of shufps that can use the new instructions
7535 (define_insn "avx_movshdup256<mask_name>"
7536 [(set (match_operand:V8SF 0 "register_operand" "=v")
7537 (vec_select:V8SF
7538 (vec_concat:V16SF
7539 (match_operand:V8SF 1 "nonimmediate_operand" "vm")
7540 (match_dup 1))
7541 (parallel [(const_int 1) (const_int 1)
7542 (const_int 3) (const_int 3)
7543 (const_int 5) (const_int 5)
7544 (const_int 7) (const_int 7)])))]
7545 "TARGET_AVX && <mask_avx512vl_condition>"
7546 "vmovshdup\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
7547 [(set_attr "type" "sse")
7548 (set_attr "prefix" "vex")
7549 (set_attr "mode" "V8SF")])
7550
7551 (define_insn "sse3_movshdup<mask_name>"
7552 [(set (match_operand:V4SF 0 "register_operand" "=v")
7553 (vec_select:V4SF
7554 (vec_concat:V8SF
7555 (match_operand:V4SF 1 "vector_operand" "vBm")
7556 (match_dup 1))
7557 (parallel [(const_int 1)
7558 (const_int 1)
7559 (const_int 7)
7560 (const_int 7)])))]
7561 "TARGET_SSE3 && <mask_avx512vl_condition>"
7562 "%vmovshdup\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
7563 [(set_attr "type" "sse")
7564 (set_attr "prefix_rep" "1")
7565 (set_attr "prefix" "maybe_vex")
7566 (set_attr "mode" "V4SF")])
7567
7568 (define_insn "<mask_codefor>avx512f_movshdup512<mask_name>"
7569 [(set (match_operand:V16SF 0 "register_operand" "=v")
7570 (vec_select:V16SF
7571 (vec_concat:V32SF
7572 (match_operand:V16SF 1 "nonimmediate_operand" "vm")
7573 (match_dup 1))
7574 (parallel [(const_int 1) (const_int 1)
7575 (const_int 3) (const_int 3)
7576 (const_int 5) (const_int 5)
7577 (const_int 7) (const_int 7)
7578 (const_int 9) (const_int 9)
7579 (const_int 11) (const_int 11)
7580 (const_int 13) (const_int 13)
7581 (const_int 15) (const_int 15)])))]
7582 "TARGET_AVX512F"
7583 "vmovshdup\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
7584 [(set_attr "type" "sse")
7585 (set_attr "prefix" "evex")
7586 (set_attr "mode" "V16SF")])
7587
7588 (define_insn "avx_movsldup256<mask_name>"
7589 [(set (match_operand:V8SF 0 "register_operand" "=v")
7590 (vec_select:V8SF
7591 (vec_concat:V16SF
7592 (match_operand:V8SF 1 "nonimmediate_operand" "vm")
7593 (match_dup 1))
7594 (parallel [(const_int 0) (const_int 0)
7595 (const_int 2) (const_int 2)
7596 (const_int 4) (const_int 4)
7597 (const_int 6) (const_int 6)])))]
7598 "TARGET_AVX && <mask_avx512vl_condition>"
7599 "vmovsldup\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
7600 [(set_attr "type" "sse")
7601 (set_attr "prefix" "vex")
7602 (set_attr "mode" "V8SF")])
7603
7604 (define_insn "sse3_movsldup<mask_name>"
7605 [(set (match_operand:V4SF 0 "register_operand" "=v")
7606 (vec_select:V4SF
7607 (vec_concat:V8SF
7608 (match_operand:V4SF 1 "vector_operand" "vBm")
7609 (match_dup 1))
7610 (parallel [(const_int 0)
7611 (const_int 0)
7612 (const_int 6)
7613 (const_int 6)])))]
7614 "TARGET_SSE3 && <mask_avx512vl_condition>"
7615 "%vmovsldup\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
7616 [(set_attr "type" "sse")
7617 (set_attr "prefix_rep" "1")
7618 (set_attr "prefix" "maybe_vex")
7619 (set_attr "mode" "V4SF")])
7620
7621 (define_insn "<mask_codefor>avx512f_movsldup512<mask_name>"
7622 [(set (match_operand:V16SF 0 "register_operand" "=v")
7623 (vec_select:V16SF
7624 (vec_concat:V32SF
7625 (match_operand:V16SF 1 "nonimmediate_operand" "vm")
7626 (match_dup 1))
7627 (parallel [(const_int 0) (const_int 0)
7628 (const_int 2) (const_int 2)
7629 (const_int 4) (const_int 4)
7630 (const_int 6) (const_int 6)
7631 (const_int 8) (const_int 8)
7632 (const_int 10) (const_int 10)
7633 (const_int 12) (const_int 12)
7634 (const_int 14) (const_int 14)])))]
7635 "TARGET_AVX512F"
7636 "vmovsldup\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
7637 [(set_attr "type" "sse")
7638 (set_attr "prefix" "evex")
7639 (set_attr "mode" "V16SF")])
7640
7641 (define_expand "avx_shufps256<mask_expand4_name>"
7642 [(match_operand:V8SF 0 "register_operand")
7643 (match_operand:V8SF 1 "register_operand")
7644 (match_operand:V8SF 2 "nonimmediate_operand")
7645 (match_operand:SI 3 "const_int_operand")]
7646 "TARGET_AVX"
7647 {
7648 int mask = INTVAL (operands[3]);
7649 emit_insn (gen_avx_shufps256_1<mask_expand4_name> (operands[0],
7650 operands[1],
7651 operands[2],
7652 GEN_INT ((mask >> 0) & 3),
7653 GEN_INT ((mask >> 2) & 3),
7654 GEN_INT (((mask >> 4) & 3) + 8),
7655 GEN_INT (((mask >> 6) & 3) + 8),
7656 GEN_INT (((mask >> 0) & 3) + 4),
7657 GEN_INT (((mask >> 2) & 3) + 4),
7658 GEN_INT (((mask >> 4) & 3) + 12),
7659 GEN_INT (((mask >> 6) & 3) + 12)
7660 <mask_expand4_args>));
7661 DONE;
7662 })
7663
7664 ;; One bit in mask selects 2 elements.
7665 (define_insn "avx_shufps256_1<mask_name>"
7666 [(set (match_operand:V8SF 0 "register_operand" "=v")
7667 (vec_select:V8SF
7668 (vec_concat:V16SF
7669 (match_operand:V8SF 1 "register_operand" "v")
7670 (match_operand:V8SF 2 "nonimmediate_operand" "vm"))
7671 (parallel [(match_operand 3 "const_0_to_3_operand" )
7672 (match_operand 4 "const_0_to_3_operand" )
7673 (match_operand 5 "const_8_to_11_operand" )
7674 (match_operand 6 "const_8_to_11_operand" )
7675 (match_operand 7 "const_4_to_7_operand" )
7676 (match_operand 8 "const_4_to_7_operand" )
7677 (match_operand 9 "const_12_to_15_operand")
7678 (match_operand 10 "const_12_to_15_operand")])))]
7679 "TARGET_AVX
7680 && <mask_avx512vl_condition>
7681 && (INTVAL (operands[3]) == (INTVAL (operands[7]) - 4)
7682 && INTVAL (operands[4]) == (INTVAL (operands[8]) - 4)
7683 && INTVAL (operands[5]) == (INTVAL (operands[9]) - 4)
7684 && INTVAL (operands[6]) == (INTVAL (operands[10]) - 4))"
7685 {
7686 int mask;
7687 mask = INTVAL (operands[3]);
7688 mask |= INTVAL (operands[4]) << 2;
7689 mask |= (INTVAL (operands[5]) - 8) << 4;
7690 mask |= (INTVAL (operands[6]) - 8) << 6;
7691 operands[3] = GEN_INT (mask);
7692
7693 return "vshufps\t{%3, %2, %1, %0<mask_operand11>|%0<mask_operand11>, %1, %2, %3}";
7694 }
7695 [(set_attr "type" "sseshuf")
7696 (set_attr "length_immediate" "1")
7697 (set_attr "prefix" "<mask_prefix>")
7698 (set_attr "mode" "V8SF")])
7699
7700 (define_expand "sse_shufps<mask_expand4_name>"
7701 [(match_operand:V4SF 0 "register_operand")
7702 (match_operand:V4SF 1 "register_operand")
7703 (match_operand:V4SF 2 "vector_operand")
7704 (match_operand:SI 3 "const_int_operand")]
7705 "TARGET_SSE"
7706 {
7707 int mask = INTVAL (operands[3]);
7708 emit_insn (gen_sse_shufps_v4sf<mask_expand4_name> (operands[0],
7709 operands[1],
7710 operands[2],
7711 GEN_INT ((mask >> 0) & 3),
7712 GEN_INT ((mask >> 2) & 3),
7713 GEN_INT (((mask >> 4) & 3) + 4),
7714 GEN_INT (((mask >> 6) & 3) + 4)
7715 <mask_expand4_args>));
7716 DONE;
7717 })
7718
7719 (define_insn "sse_shufps_v4sf_mask"
7720 [(set (match_operand:V4SF 0 "register_operand" "=v")
7721 (vec_merge:V4SF
7722 (vec_select:V4SF
7723 (vec_concat:V8SF
7724 (match_operand:V4SF 1 "register_operand" "v")
7725 (match_operand:V4SF 2 "nonimmediate_operand" "vm"))
7726 (parallel [(match_operand 3 "const_0_to_3_operand")
7727 (match_operand 4 "const_0_to_3_operand")
7728 (match_operand 5 "const_4_to_7_operand")
7729 (match_operand 6 "const_4_to_7_operand")]))
7730 (match_operand:V4SF 7 "nonimm_or_0_operand" "0C")
7731 (match_operand:QI 8 "register_operand" "Yk")))]
7732 "TARGET_AVX512VL"
7733 {
7734 int mask = 0;
7735 mask |= INTVAL (operands[3]) << 0;
7736 mask |= INTVAL (operands[4]) << 2;
7737 mask |= (INTVAL (operands[5]) - 4) << 4;
7738 mask |= (INTVAL (operands[6]) - 4) << 6;
7739 operands[3] = GEN_INT (mask);
7740
7741 return "vshufps\t{%3, %2, %1, %0%{%8%}%N7|%0%{%8%}%N7, %1, %2, %3}";
7742 }
7743 [(set_attr "type" "sseshuf")
7744 (set_attr "length_immediate" "1")
7745 (set_attr "prefix" "evex")
7746 (set_attr "mode" "V4SF")])
7747
7748 (define_insn "sse_shufps_<mode>"
7749 [(set (match_operand:VI4F_128 0 "register_operand" "=x,v")
7750 (vec_select:VI4F_128
7751 (vec_concat:<ssedoublevecmode>
7752 (match_operand:VI4F_128 1 "register_operand" "0,v")
7753 (match_operand:VI4F_128 2 "vector_operand" "xBm,vm"))
7754 (parallel [(match_operand 3 "const_0_to_3_operand")
7755 (match_operand 4 "const_0_to_3_operand")
7756 (match_operand 5 "const_4_to_7_operand")
7757 (match_operand 6 "const_4_to_7_operand")])))]
7758 "TARGET_SSE"
7759 {
7760 int mask = 0;
7761 mask |= INTVAL (operands[3]) << 0;
7762 mask |= INTVAL (operands[4]) << 2;
7763 mask |= (INTVAL (operands[5]) - 4) << 4;
7764 mask |= (INTVAL (operands[6]) - 4) << 6;
7765 operands[3] = GEN_INT (mask);
7766
7767 switch (which_alternative)
7768 {
7769 case 0:
7770 return "shufps\t{%3, %2, %0|%0, %2, %3}";
7771 case 1:
7772 return "vshufps\t{%3, %2, %1, %0|%0, %1, %2, %3}";
7773 default:
7774 gcc_unreachable ();
7775 }
7776 }
7777 [(set_attr "isa" "noavx,avx")
7778 (set_attr "type" "sseshuf")
7779 (set_attr "length_immediate" "1")
7780 (set_attr "prefix" "orig,maybe_evex")
7781 (set_attr "mode" "V4SF")])
7782
7783 (define_insn "sse_storehps"
7784 [(set (match_operand:V2SF 0 "nonimmediate_operand" "=m,v,v")
7785 (vec_select:V2SF
7786 (match_operand:V4SF 1 "nonimmediate_operand" "v,v,o")
7787 (parallel [(const_int 2) (const_int 3)])))]
7788 "TARGET_SSE && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
7789 "@
7790 %vmovhps\t{%1, %0|%q0, %1}
7791 %vmovhlps\t{%1, %d0|%d0, %1}
7792 %vmovlps\t{%H1, %d0|%d0, %H1}"
7793 [(set_attr "type" "ssemov")
7794 (set_attr "prefix" "maybe_vex")
7795 (set_attr "mode" "V2SF,V4SF,V2SF")])
7796
7797 (define_expand "sse_loadhps_exp"
7798 [(set (match_operand:V4SF 0 "nonimmediate_operand")
7799 (vec_concat:V4SF
7800 (vec_select:V2SF
7801 (match_operand:V4SF 1 "nonimmediate_operand")
7802 (parallel [(const_int 0) (const_int 1)]))
7803 (match_operand:V2SF 2 "nonimmediate_operand")))]
7804 "TARGET_SSE"
7805 {
7806 rtx dst = ix86_fixup_binary_operands (UNKNOWN, V4SFmode, operands);
7807
7808 emit_insn (gen_sse_loadhps (dst, operands[1], operands[2]));
7809
7810 /* Fix up the destination if needed. */
7811 if (dst != operands[0])
7812 emit_move_insn (operands[0], dst);
7813
7814 DONE;
7815 })
7816
7817 (define_insn "sse_loadhps"
7818 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,v,x,v,o")
7819 (vec_concat:V4SF
7820 (vec_select:V2SF
7821 (match_operand:V4SF 1 "nonimmediate_operand" " 0,v,0,v,0")
7822 (parallel [(const_int 0) (const_int 1)]))
7823 (match_operand:V2SF 2 "nonimmediate_operand" " m,m,x,v,v")))]
7824 "TARGET_SSE"
7825 "@
7826 movhps\t{%2, %0|%0, %q2}
7827 vmovhps\t{%2, %1, %0|%0, %1, %q2}
7828 movlhps\t{%2, %0|%0, %2}
7829 vmovlhps\t{%2, %1, %0|%0, %1, %2}
7830 %vmovlps\t{%2, %H0|%H0, %2}"
7831 [(set_attr "isa" "noavx,avx,noavx,avx,*")
7832 (set_attr "type" "ssemov")
7833 (set_attr "prefix" "orig,maybe_evex,orig,maybe_evex,maybe_vex")
7834 (set_attr "mode" "V2SF,V2SF,V4SF,V4SF,V2SF")])
7835
7836 (define_insn "sse_storelps"
7837 [(set (match_operand:V2SF 0 "nonimmediate_operand" "=m,v,v")
7838 (vec_select:V2SF
7839 (match_operand:V4SF 1 "nonimmediate_operand" " v,v,m")
7840 (parallel [(const_int 0) (const_int 1)])))]
7841 "TARGET_SSE && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
7842 "@
7843 %vmovlps\t{%1, %0|%q0, %1}
7844 %vmovaps\t{%1, %0|%0, %1}
7845 %vmovlps\t{%1, %d0|%d0, %q1}"
7846 [(set_attr "type" "ssemov")
7847 (set_attr "prefix" "maybe_vex")
7848 (set_attr "mode" "V2SF,V4SF,V2SF")])
7849
7850 (define_expand "sse_loadlps_exp"
7851 [(set (match_operand:V4SF 0 "nonimmediate_operand")
7852 (vec_concat:V4SF
7853 (match_operand:V2SF 2 "nonimmediate_operand")
7854 (vec_select:V2SF
7855 (match_operand:V4SF 1 "nonimmediate_operand")
7856 (parallel [(const_int 2) (const_int 3)]))))]
7857 "TARGET_SSE"
7858 {
7859 rtx dst = ix86_fixup_binary_operands (UNKNOWN, V4SFmode, operands);
7860
7861 emit_insn (gen_sse_loadlps (dst, operands[1], operands[2]));
7862
7863 /* Fix up the destination if needed. */
7864 if (dst != operands[0])
7865 emit_move_insn (operands[0], dst);
7866
7867 DONE;
7868 })
7869
7870 (define_insn "sse_loadlps"
7871 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,v,x,v,m")
7872 (vec_concat:V4SF
7873 (match_operand:V2SF 2 "nonimmediate_operand" " 0,v,m,m,v")
7874 (vec_select:V2SF
7875 (match_operand:V4SF 1 "nonimmediate_operand" " x,v,0,v,0")
7876 (parallel [(const_int 2) (const_int 3)]))))]
7877 "TARGET_SSE"
7878 "@
7879 shufps\t{$0xe4, %1, %0|%0, %1, 0xe4}
7880 vshufps\t{$0xe4, %1, %2, %0|%0, %2, %1, 0xe4}
7881 movlps\t{%2, %0|%0, %q2}
7882 vmovlps\t{%2, %1, %0|%0, %1, %q2}
7883 %vmovlps\t{%2, %0|%q0, %2}"
7884 [(set_attr "isa" "noavx,avx,noavx,avx,*")
7885 (set_attr "type" "sseshuf,sseshuf,ssemov,ssemov,ssemov")
7886 (set (attr "length_immediate")
7887 (if_then_else (eq_attr "alternative" "0,1")
7888 (const_string "1")
7889 (const_string "*")))
7890 (set_attr "prefix" "orig,maybe_evex,orig,maybe_evex,maybe_vex")
7891 (set_attr "mode" "V4SF,V4SF,V2SF,V2SF,V2SF")])
7892
7893 (define_insn "sse_movss"
7894 [(set (match_operand:V4SF 0 "register_operand" "=x,v")
7895 (vec_merge:V4SF
7896 (match_operand:V4SF 2 "register_operand" " x,v")
7897 (match_operand:V4SF 1 "register_operand" " 0,v")
7898 (const_int 1)))]
7899 "TARGET_SSE"
7900 "@
7901 movss\t{%2, %0|%0, %2}
7902 vmovss\t{%2, %1, %0|%0, %1, %2}"
7903 [(set_attr "isa" "noavx,avx")
7904 (set_attr "type" "ssemov")
7905 (set_attr "prefix" "orig,maybe_evex")
7906 (set_attr "mode" "SF")])
7907
7908 (define_insn "avx2_vec_dup<mode>"
7909 [(set (match_operand:VF1_128_256 0 "register_operand" "=v")
7910 (vec_duplicate:VF1_128_256
7911 (vec_select:SF
7912 (match_operand:V4SF 1 "register_operand" "v")
7913 (parallel [(const_int 0)]))))]
7914 "TARGET_AVX2"
7915 "vbroadcastss\t{%1, %0|%0, %1}"
7916 [(set_attr "type" "sselog1")
7917 (set_attr "prefix" "maybe_evex")
7918 (set_attr "mode" "<MODE>")])
7919
7920 (define_insn "avx2_vec_dupv8sf_1"
7921 [(set (match_operand:V8SF 0 "register_operand" "=v")
7922 (vec_duplicate:V8SF
7923 (vec_select:SF
7924 (match_operand:V8SF 1 "register_operand" "v")
7925 (parallel [(const_int 0)]))))]
7926 "TARGET_AVX2"
7927 "vbroadcastss\t{%x1, %0|%0, %x1}"
7928 [(set_attr "type" "sselog1")
7929 (set_attr "prefix" "maybe_evex")
7930 (set_attr "mode" "V8SF")])
7931
7932 (define_insn "avx512f_vec_dup<mode>_1"
7933 [(set (match_operand:VF_512 0 "register_operand" "=v")
7934 (vec_duplicate:VF_512
7935 (vec_select:<ssescalarmode>
7936 (match_operand:VF_512 1 "register_operand" "v")
7937 (parallel [(const_int 0)]))))]
7938 "TARGET_AVX512F"
7939 "vbroadcast<bcstscalarsuff>\t{%x1, %0|%0, %x1}"
7940 [(set_attr "type" "sselog1")
7941 (set_attr "prefix" "evex")
7942 (set_attr "mode" "<MODE>")])
7943
7944 ;; Although insertps takes register source, we prefer
7945 ;; unpcklps with register source since it is shorter.
7946 (define_insn "*vec_concatv2sf_sse4_1"
7947 [(set (match_operand:V2SF 0 "register_operand"
7948 "=Yr,*x, v,Yr,*x,v,v,*y ,*y")
7949 (vec_concat:V2SF
7950 (match_operand:SF 1 "nonimmediate_operand"
7951 " 0, 0,Yv, 0,0, v,m, 0 , m")
7952 (match_operand:SF 2 "nonimm_or_0_operand"
7953 " Yr,*x,Yv, m,m, m,C,*ym, C")))]
7954 "TARGET_SSE4_1 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
7955 "@
7956 unpcklps\t{%2, %0|%0, %2}
7957 unpcklps\t{%2, %0|%0, %2}
7958 vunpcklps\t{%2, %1, %0|%0, %1, %2}
7959 insertps\t{$0x10, %2, %0|%0, %2, 0x10}
7960 insertps\t{$0x10, %2, %0|%0, %2, 0x10}
7961 vinsertps\t{$0x10, %2, %1, %0|%0, %1, %2, 0x10}
7962 %vmovss\t{%1, %0|%0, %1}
7963 punpckldq\t{%2, %0|%0, %2}
7964 movd\t{%1, %0|%0, %1}"
7965 [(set (attr "isa")
7966 (cond [(eq_attr "alternative" "0,1,3,4")
7967 (const_string "noavx")
7968 (eq_attr "alternative" "2,5")
7969 (const_string "avx")
7970 ]
7971 (const_string "*")))
7972 (set (attr "type")
7973 (cond [(eq_attr "alternative" "6")
7974 (const_string "ssemov")
7975 (eq_attr "alternative" "7")
7976 (const_string "mmxcvt")
7977 (eq_attr "alternative" "8")
7978 (const_string "mmxmov")
7979 ]
7980 (const_string "sselog")))
7981 (set (attr "mmx_isa")
7982 (if_then_else (eq_attr "alternative" "7,8")
7983 (const_string "native")
7984 (const_string "*")))
7985 (set (attr "prefix_data16")
7986 (if_then_else (eq_attr "alternative" "3,4")
7987 (const_string "1")
7988 (const_string "*")))
7989 (set (attr "prefix_extra")
7990 (if_then_else (eq_attr "alternative" "3,4,5")
7991 (const_string "1")
7992 (const_string "*")))
7993 (set (attr "length_immediate")
7994 (if_then_else (eq_attr "alternative" "3,4,5")
7995 (const_string "1")
7996 (const_string "*")))
7997 (set (attr "prefix")
7998 (cond [(eq_attr "alternative" "2,5")
7999 (const_string "maybe_evex")
8000 (eq_attr "alternative" "6")
8001 (const_string "maybe_vex")
8002 ]
8003 (const_string "orig")))
8004 (set_attr "mode" "V4SF,V4SF,V4SF,V4SF,V4SF,V4SF,SF,DI,DI")])
8005
8006 ;; ??? In theory we can match memory for the MMX alternative, but allowing
8007 ;; vector_operand for operand 2 and *not* allowing memory for the SSE
8008 ;; alternatives pretty much forces the MMX alternative to be chosen.
8009 (define_insn "*vec_concatv2sf_sse"
8010 [(set (match_operand:V2SF 0 "register_operand" "=x,x,*y,*y")
8011 (vec_concat:V2SF
8012 (match_operand:SF 1 "nonimmediate_operand" " 0,m, 0, m")
8013 (match_operand:SF 2 "reg_or_0_operand" " x,C,*y, C")))]
8014 "TARGET_SSE"
8015 "@
8016 unpcklps\t{%2, %0|%0, %2}
8017 movss\t{%1, %0|%0, %1}
8018 punpckldq\t{%2, %0|%0, %2}
8019 movd\t{%1, %0|%0, %1}"
8020 [(set_attr "mmx_isa" "*,*,native,native")
8021 (set_attr "type" "sselog,ssemov,mmxcvt,mmxmov")
8022 (set_attr "mode" "V4SF,SF,DI,DI")])
8023
8024 (define_insn "*vec_concatv4sf"
8025 [(set (match_operand:V4SF 0 "register_operand" "=x,v,x,v")
8026 (vec_concat:V4SF
8027 (match_operand:V2SF 1 "register_operand" " 0,v,0,v")
8028 (match_operand:V2SF 2 "nonimmediate_operand" " x,v,m,m")))]
8029 "TARGET_SSE"
8030 "@
8031 movlhps\t{%2, %0|%0, %2}
8032 vmovlhps\t{%2, %1, %0|%0, %1, %2}
8033 movhps\t{%2, %0|%0, %q2}
8034 vmovhps\t{%2, %1, %0|%0, %1, %q2}"
8035 [(set_attr "isa" "noavx,avx,noavx,avx")
8036 (set_attr "type" "ssemov")
8037 (set_attr "prefix" "orig,maybe_evex,orig,maybe_evex")
8038 (set_attr "mode" "V4SF,V4SF,V2SF,V2SF")])
8039
8040 (define_insn "*vec_concatv4sf_0"
8041 [(set (match_operand:V4SF 0 "register_operand" "=v")
8042 (vec_concat:V4SF
8043 (match_operand:V2SF 1 "nonimmediate_operand" "xm")
8044 (match_operand:V2SF 2 "const0_operand" " C")))]
8045 "TARGET_SSE2"
8046 "%vmovq\t{%1, %0|%0, %1}"
8047 [(set_attr "type" "ssemov")
8048 (set_attr "prefix" "maybe_vex")
8049 (set_attr "mode" "DF")])
8050
8051 ;; Avoid combining registers from different units in a single alternative,
8052 ;; see comment above inline_secondary_memory_needed function in i386.c
8053 (define_insn "vec_set<mode>_0"
8054 [(set (match_operand:VI4F_128 0 "nonimmediate_operand"
8055 "=Yr,*x,v,v,v,x,x,v,Yr ,*x ,x ,m ,m ,m")
8056 (vec_merge:VI4F_128
8057 (vec_duplicate:VI4F_128
8058 (match_operand:<ssescalarmode> 2 "general_operand"
8059 " Yr,*x,v,m,r ,m,x,v,*rm,*rm,*rm,!x,!*re,!*fF"))
8060 (match_operand:VI4F_128 1 "nonimm_or_0_operand"
8061 " C , C,C,C,C ,C,0,v,0 ,0 ,x ,0 ,0 ,0")
8062 (const_int 1)))]
8063 "TARGET_SSE"
8064 "@
8065 insertps\t{$0xe, %2, %0|%0, %2, 0xe}
8066 insertps\t{$0xe, %2, %0|%0, %2, 0xe}
8067 vinsertps\t{$0xe, %2, %2, %0|%0, %2, %2, 0xe}
8068 %vmov<ssescalarmodesuffix>\t{%2, %0|%0, %2}
8069 %vmovd\t{%2, %0|%0, %2}
8070 movss\t{%2, %0|%0, %2}
8071 movss\t{%2, %0|%0, %2}
8072 vmovss\t{%2, %1, %0|%0, %1, %2}
8073 pinsrd\t{$0, %2, %0|%0, %2, 0}
8074 pinsrd\t{$0, %2, %0|%0, %2, 0}
8075 vpinsrd\t{$0, %2, %1, %0|%0, %1, %2, 0}
8076 #
8077 #
8078 #"
8079 [(set (attr "isa")
8080 (cond [(eq_attr "alternative" "0,1,8,9")
8081 (const_string "sse4_noavx")
8082 (eq_attr "alternative" "2,7,10")
8083 (const_string "avx")
8084 (eq_attr "alternative" "3,4")
8085 (const_string "sse2")
8086 (eq_attr "alternative" "5,6")
8087 (const_string "noavx")
8088 ]
8089 (const_string "*")))
8090 (set (attr "type")
8091 (cond [(eq_attr "alternative" "0,1,2,8,9,10")
8092 (const_string "sselog")
8093 (eq_attr "alternative" "12")
8094 (const_string "imov")
8095 (eq_attr "alternative" "13")
8096 (const_string "fmov")
8097 ]
8098 (const_string "ssemov")))
8099 (set (attr "prefix_extra")
8100 (if_then_else (eq_attr "alternative" "8,9,10")
8101 (const_string "1")
8102 (const_string "*")))
8103 (set (attr "length_immediate")
8104 (if_then_else (eq_attr "alternative" "8,9,10")
8105 (const_string "1")
8106 (const_string "*")))
8107 (set (attr "prefix")
8108 (cond [(eq_attr "alternative" "0,1,5,6,8,9")
8109 (const_string "orig")
8110 (eq_attr "alternative" "2")
8111 (const_string "maybe_evex")
8112 (eq_attr "alternative" "3,4")
8113 (const_string "maybe_vex")
8114 (eq_attr "alternative" "7,10")
8115 (const_string "vex")
8116 ]
8117 (const_string "*")))
8118 (set_attr "mode" "SF,SF,SF,<ssescalarmode>,SI,SF,SF,SF,TI,TI,TI,*,*,*")
8119 (set (attr "preferred_for_speed")
8120 (cond [(eq_attr "alternative" "4")
8121 (symbol_ref "TARGET_INTER_UNIT_MOVES_TO_VEC")
8122 ]
8123 (symbol_ref "true")))])
8124
8125 ;; A subset is vec_setv4sf.
8126 (define_insn "*vec_setv4sf_sse4_1"
8127 [(set (match_operand:V4SF 0 "register_operand" "=Yr,*x,v")
8128 (vec_merge:V4SF
8129 (vec_duplicate:V4SF
8130 (match_operand:SF 2 "nonimmediate_operand" "Yrm,*xm,vm"))
8131 (match_operand:V4SF 1 "register_operand" "0,0,v")
8132 (match_operand:SI 3 "const_int_operand")))]
8133 "TARGET_SSE4_1
8134 && ((unsigned) exact_log2 (INTVAL (operands[3]))
8135 < GET_MODE_NUNITS (V4SFmode))"
8136 {
8137 operands[3] = GEN_INT (exact_log2 (INTVAL (operands[3])) << 4);
8138 switch (which_alternative)
8139 {
8140 case 0:
8141 case 1:
8142 return "insertps\t{%3, %2, %0|%0, %2, %3}";
8143 case 2:
8144 return "vinsertps\t{%3, %2, %1, %0|%0, %1, %2, %3}";
8145 default:
8146 gcc_unreachable ();
8147 }
8148 }
8149 [(set_attr "isa" "noavx,noavx,avx")
8150 (set_attr "type" "sselog")
8151 (set_attr "prefix_data16" "1,1,*")
8152 (set_attr "prefix_extra" "1")
8153 (set_attr "length_immediate" "1")
8154 (set_attr "prefix" "orig,orig,maybe_evex")
8155 (set_attr "mode" "V4SF")])
8156
8157 ;; All of vinsertps, vmovss, vmovd clear also the higher bits.
8158 (define_insn "vec_set<mode>_0"
8159 [(set (match_operand:VI4F_256_512 0 "register_operand" "=v,v,v")
8160 (vec_merge:VI4F_256_512
8161 (vec_duplicate:VI4F_256_512
8162 (match_operand:<ssescalarmode> 2 "nonimmediate_operand" "v,m,r"))
8163 (match_operand:VI4F_256_512 1 "const0_operand" "C,C,C")
8164 (const_int 1)))]
8165 "TARGET_AVX"
8166 "@
8167 vinsertps\t{$0xe, %2, %2, %x0|%x0, %2, %2, 0xe}
8168 vmov<ssescalarmodesuffix>\t{%x2, %x0|%x0, %2}
8169 vmovd\t{%2, %x0|%x0, %2}"
8170 [(set (attr "type")
8171 (if_then_else (eq_attr "alternative" "0")
8172 (const_string "sselog")
8173 (const_string "ssemov")))
8174 (set_attr "prefix" "maybe_evex")
8175 (set_attr "mode" "SF,<ssescalarmode>,SI")
8176 (set (attr "preferred_for_speed")
8177 (cond [(eq_attr "alternative" "2")
8178 (symbol_ref "TARGET_INTER_UNIT_MOVES_TO_VEC")
8179 ]
8180 (symbol_ref "true")))])
8181
8182 (define_insn "sse4_1_insertps"
8183 [(set (match_operand:V4SF 0 "register_operand" "=Yr,*x,v")
8184 (unspec:V4SF [(match_operand:V4SF 2 "nonimmediate_operand" "Yrm,*xm,vm")
8185 (match_operand:V4SF 1 "register_operand" "0,0,v")
8186 (match_operand:SI 3 "const_0_to_255_operand" "n,n,n")]
8187 UNSPEC_INSERTPS))]
8188 "TARGET_SSE4_1"
8189 {
8190 if (MEM_P (operands[2]))
8191 {
8192 unsigned count_s = INTVAL (operands[3]) >> 6;
8193 if (count_s)
8194 operands[3] = GEN_INT (INTVAL (operands[3]) & 0x3f);
8195 operands[2] = adjust_address_nv (operands[2], SFmode, count_s * 4);
8196 }
8197 switch (which_alternative)
8198 {
8199 case 0:
8200 case 1:
8201 return "insertps\t{%3, %2, %0|%0, %2, %3}";
8202 case 2:
8203 return "vinsertps\t{%3, %2, %1, %0|%0, %1, %2, %3}";
8204 default:
8205 gcc_unreachable ();
8206 }
8207 }
8208 [(set_attr "isa" "noavx,noavx,avx")
8209 (set_attr "type" "sselog")
8210 (set_attr "prefix_data16" "1,1,*")
8211 (set_attr "prefix_extra" "1")
8212 (set_attr "length_immediate" "1")
8213 (set_attr "prefix" "orig,orig,maybe_evex")
8214 (set_attr "mode" "V4SF")])
8215
8216 (define_split
8217 [(set (match_operand:VI4F_128 0 "memory_operand")
8218 (vec_merge:VI4F_128
8219 (vec_duplicate:VI4F_128
8220 (match_operand:<ssescalarmode> 1 "nonmemory_operand"))
8221 (match_dup 0)
8222 (const_int 1)))]
8223 "TARGET_SSE && reload_completed"
8224 [(set (match_dup 0) (match_dup 1))]
8225 "operands[0] = adjust_address (operands[0], <ssescalarmode>mode, 0);")
8226
8227 ;; Standard scalar operation patterns which preserve the rest of the
8228 ;; vector for combiner.
8229 (define_insn "vec_setv2df_0"
8230 [(set (match_operand:V2DF 0 "register_operand" "=x,v,x,v")
8231 (vec_merge:V2DF
8232 (vec_duplicate:V2DF
8233 (match_operand:DF 2 "nonimmediate_operand" " x,v,m,m"))
8234 (match_operand:V2DF 1 "register_operand" " 0,v,0,v")
8235 (const_int 1)))]
8236 "TARGET_SSE2"
8237 "@
8238 movsd\t{%2, %0|%0, %2}
8239 vmovsd\t{%2, %1, %0|%0, %1, %2}
8240 movlpd\t{%2, %0|%0, %2}
8241 vmovlpd\t{%2, %1, %0|%0, %1, %2}"
8242 [(set_attr "isa" "noavx,avx,noavx,avx")
8243 (set_attr "type" "ssemov")
8244 (set_attr "mode" "DF")])
8245
8246 (define_expand "vec_set<mode>"
8247 [(match_operand:V 0 "register_operand")
8248 (match_operand:<ssescalarmode> 1 "register_operand")
8249 (match_operand 2 "const_int_operand")]
8250 "TARGET_SSE"
8251 {
8252 ix86_expand_vector_set (false, operands[0], operands[1],
8253 INTVAL (operands[2]));
8254 DONE;
8255 })
8256
8257 (define_insn_and_split "*vec_extractv4sf_0"
8258 [(set (match_operand:SF 0 "nonimmediate_operand" "=v,m,f,r")
8259 (vec_select:SF
8260 (match_operand:V4SF 1 "nonimmediate_operand" "vm,v,m,m")
8261 (parallel [(const_int 0)])))]
8262 "TARGET_SSE && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
8263 "#"
8264 "&& reload_completed"
8265 [(set (match_dup 0) (match_dup 1))]
8266 "operands[1] = gen_lowpart (SFmode, operands[1]);")
8267
8268 (define_insn_and_split "*sse4_1_extractps"
8269 [(set (match_operand:SF 0 "nonimmediate_operand" "=rm,rm,rm,Yv,Yv")
8270 (vec_select:SF
8271 (match_operand:V4SF 1 "register_operand" "Yr,*x,v,0,v")
8272 (parallel [(match_operand:SI 2 "const_0_to_3_operand" "n,n,n,n,n")])))]
8273 "TARGET_SSE4_1"
8274 "@
8275 extractps\t{%2, %1, %0|%0, %1, %2}
8276 extractps\t{%2, %1, %0|%0, %1, %2}
8277 vextractps\t{%2, %1, %0|%0, %1, %2}
8278 #
8279 #"
8280 "&& reload_completed && SSE_REG_P (operands[0])"
8281 [(const_int 0)]
8282 {
8283 rtx dest = lowpart_subreg (V4SFmode, operands[0], SFmode);
8284 switch (INTVAL (operands[2]))
8285 {
8286 case 1:
8287 case 3:
8288 emit_insn (gen_sse_shufps_v4sf (dest, operands[1], operands[1],
8289 operands[2], operands[2],
8290 GEN_INT (INTVAL (operands[2]) + 4),
8291 GEN_INT (INTVAL (operands[2]) + 4)));
8292 break;
8293 case 2:
8294 emit_insn (gen_vec_interleave_highv4sf (dest, operands[1], operands[1]));
8295 break;
8296 default:
8297 /* 0 should be handled by the *vec_extractv4sf_0 pattern above. */
8298 gcc_unreachable ();
8299 }
8300 DONE;
8301 }
8302 [(set_attr "isa" "noavx,noavx,avx,noavx,avx")
8303 (set_attr "type" "sselog,sselog,sselog,*,*")
8304 (set_attr "prefix_data16" "1,1,1,*,*")
8305 (set_attr "prefix_extra" "1,1,1,*,*")
8306 (set_attr "length_immediate" "1,1,1,*,*")
8307 (set_attr "prefix" "orig,orig,maybe_evex,*,*")
8308 (set_attr "mode" "V4SF,V4SF,V4SF,*,*")])
8309
8310 (define_insn_and_split "*vec_extractv4sf_mem"
8311 [(set (match_operand:SF 0 "register_operand" "=v,*r,f")
8312 (vec_select:SF
8313 (match_operand:V4SF 1 "memory_operand" "o,o,o")
8314 (parallel [(match_operand 2 "const_0_to_3_operand" "n,n,n")])))]
8315 "TARGET_SSE"
8316 "#"
8317 "&& reload_completed"
8318 [(set (match_dup 0) (match_dup 1))]
8319 {
8320 operands[1] = adjust_address (operands[1], SFmode, INTVAL (operands[2]) * 4);
8321 })
8322
8323 (define_mode_attr extract_type
8324 [(V16SF "avx512f") (V16SI "avx512f") (V8DF "avx512dq") (V8DI "avx512dq")])
8325
8326 (define_mode_attr extract_suf
8327 [(V16SF "32x4") (V16SI "32x4") (V8DF "64x2") (V8DI "64x2")])
8328
8329 (define_mode_iterator AVX512_VEC
8330 [(V8DF "TARGET_AVX512DQ") (V8DI "TARGET_AVX512DQ") V16SF V16SI])
8331
8332 (define_expand "<extract_type>_vextract<shuffletype><extract_suf>_mask"
8333 [(match_operand:<ssequartermode> 0 "nonimmediate_operand")
8334 (match_operand:AVX512_VEC 1 "register_operand")
8335 (match_operand:SI 2 "const_0_to_3_operand")
8336 (match_operand:<ssequartermode> 3 "nonimmediate_operand")
8337 (match_operand:QI 4 "register_operand")]
8338 "TARGET_AVX512F"
8339 {
8340 int mask;
8341 mask = INTVAL (operands[2]);
8342 rtx dest = operands[0];
8343
8344 if (MEM_P (operands[0]) && !rtx_equal_p (operands[0], operands[3]))
8345 dest = gen_reg_rtx (<ssequartermode>mode);
8346
8347 if (<MODE>mode == V16SImode || <MODE>mode == V16SFmode)
8348 emit_insn (gen_avx512f_vextract<shuffletype>32x4_1_mask (dest,
8349 operands[1], GEN_INT (mask * 4), GEN_INT (mask * 4 + 1),
8350 GEN_INT (mask * 4 + 2), GEN_INT (mask * 4 + 3), operands[3],
8351 operands[4]));
8352 else
8353 emit_insn (gen_avx512dq_vextract<shuffletype>64x2_1_mask (dest,
8354 operands[1], GEN_INT (mask * 2), GEN_INT (mask * 2 + 1), operands[3],
8355 operands[4]));
8356 if (dest != operands[0])
8357 emit_move_insn (operands[0], dest);
8358 DONE;
8359 })
8360
8361 (define_insn "avx512dq_vextract<shuffletype>64x2_1_mask"
8362 [(set (match_operand:<ssequartermode> 0 "nonimmediate_operand" "=v,m")
8363 (vec_merge:<ssequartermode>
8364 (vec_select:<ssequartermode>
8365 (match_operand:V8FI 1 "register_operand" "v,v")
8366 (parallel [(match_operand 2 "const_0_to_7_operand")
8367 (match_operand 3 "const_0_to_7_operand")]))
8368 (match_operand:<ssequartermode> 4 "nonimm_or_0_operand" "0C,0")
8369 (match_operand:QI 5 "register_operand" "Yk,Yk")))]
8370 "TARGET_AVX512DQ
8371 && INTVAL (operands[2]) % 2 == 0
8372 && INTVAL (operands[2]) == INTVAL (operands[3]) - 1
8373 && (!MEM_P (operands[0]) || rtx_equal_p (operands[0], operands[4]))"
8374 {
8375 operands[2] = GEN_INT (INTVAL (operands[2]) >> 1);
8376 return "vextract<shuffletype>64x2\t{%2, %1, %0%{%5%}%N4|%0%{%5%}%N4, %1, %2}";
8377 }
8378 [(set_attr "type" "sselog1")
8379 (set_attr "prefix_extra" "1")
8380 (set_attr "length_immediate" "1")
8381 (set_attr "prefix" "evex")
8382 (set_attr "mode" "<sseinsnmode>")])
8383
8384 (define_insn "*avx512dq_vextract<shuffletype>64x2_1"
8385 [(set (match_operand:<ssequartermode> 0 "nonimmediate_operand" "=vm")
8386 (vec_select:<ssequartermode>
8387 (match_operand:V8FI 1 "register_operand" "v")
8388 (parallel [(match_operand 2 "const_0_to_7_operand")
8389 (match_operand 3 "const_0_to_7_operand")])))]
8390 "TARGET_AVX512DQ
8391 && INTVAL (operands[2]) % 2 == 0
8392 && INTVAL (operands[2]) == INTVAL (operands[3]) - 1"
8393 {
8394 operands[2] = GEN_INT (INTVAL (operands[2]) >> 1);
8395 return "vextract<shuffletype>64x2\t{%2, %1, %0|%0, %1, %2}";
8396 }
8397 [(set_attr "type" "sselog1")
8398 (set_attr "prefix_extra" "1")
8399 (set_attr "length_immediate" "1")
8400 (set_attr "prefix" "evex")
8401 (set_attr "mode" "<sseinsnmode>")])
8402
8403 (define_split
8404 [(set (match_operand:<ssequartermode> 0 "nonimmediate_operand")
8405 (vec_select:<ssequartermode>
8406 (match_operand:V8FI 1 "register_operand")
8407 (parallel [(const_int 0) (const_int 1)])))]
8408 "TARGET_AVX512DQ
8409 && reload_completed
8410 && (TARGET_AVX512VL
8411 || REG_P (operands[0])
8412 || !EXT_REX_SSE_REG_P (operands[1]))"
8413 [(set (match_dup 0) (match_dup 1))]
8414 {
8415 if (!TARGET_AVX512VL
8416 && REG_P (operands[0])
8417 && EXT_REX_SSE_REG_P (operands[1]))
8418 operands[0]
8419 = lowpart_subreg (<MODE>mode, operands[0], <ssequartermode>mode);
8420 else
8421 operands[1] = gen_lowpart (<ssequartermode>mode, operands[1]);
8422 })
8423
8424 (define_insn "avx512f_vextract<shuffletype>32x4_1_mask"
8425 [(set (match_operand:<ssequartermode> 0 "nonimmediate_operand" "=v,m")
8426 (vec_merge:<ssequartermode>
8427 (vec_select:<ssequartermode>
8428 (match_operand:V16FI 1 "register_operand" "v,v")
8429 (parallel [(match_operand 2 "const_0_to_15_operand")
8430 (match_operand 3 "const_0_to_15_operand")
8431 (match_operand 4 "const_0_to_15_operand")
8432 (match_operand 5 "const_0_to_15_operand")]))
8433 (match_operand:<ssequartermode> 6 "nonimm_or_0_operand" "0C,0")
8434 (match_operand:QI 7 "register_operand" "Yk,Yk")))]
8435 "TARGET_AVX512F
8436 && INTVAL (operands[2]) % 4 == 0
8437 && INTVAL (operands[2]) == INTVAL (operands[3]) - 1
8438 && INTVAL (operands[3]) == INTVAL (operands[4]) - 1
8439 && INTVAL (operands[4]) == INTVAL (operands[5]) - 1
8440 && (!MEM_P (operands[0]) || rtx_equal_p (operands[0], operands[6]))"
8441 {
8442 operands[2] = GEN_INT (INTVAL (operands[2]) >> 2);
8443 return "vextract<shuffletype>32x4\t{%2, %1, %0%{%7%}%N6|%0%{%7%}%N6, %1, %2}";
8444 }
8445 [(set_attr "type" "sselog1")
8446 (set_attr "prefix_extra" "1")
8447 (set_attr "length_immediate" "1")
8448 (set_attr "prefix" "evex")
8449 (set_attr "mode" "<sseinsnmode>")])
8450
8451 (define_insn "*avx512f_vextract<shuffletype>32x4_1"
8452 [(set (match_operand:<ssequartermode> 0 "nonimmediate_operand" "=vm")
8453 (vec_select:<ssequartermode>
8454 (match_operand:V16FI 1 "register_operand" "v")
8455 (parallel [(match_operand 2 "const_0_to_15_operand")
8456 (match_operand 3 "const_0_to_15_operand")
8457 (match_operand 4 "const_0_to_15_operand")
8458 (match_operand 5 "const_0_to_15_operand")])))]
8459 "TARGET_AVX512F
8460 && INTVAL (operands[2]) % 4 == 0
8461 && INTVAL (operands[2]) == INTVAL (operands[3]) - 1
8462 && INTVAL (operands[3]) == INTVAL (operands[4]) - 1
8463 && INTVAL (operands[4]) == INTVAL (operands[5]) - 1"
8464 {
8465 operands[2] = GEN_INT (INTVAL (operands[2]) >> 2);
8466 return "vextract<shuffletype>32x4\t{%2, %1, %0|%0, %1, %2}";
8467 }
8468 [(set_attr "type" "sselog1")
8469 (set_attr "prefix_extra" "1")
8470 (set_attr "length_immediate" "1")
8471 (set_attr "prefix" "evex")
8472 (set_attr "mode" "<sseinsnmode>")])
8473
8474 (define_split
8475 [(set (match_operand:<ssequartermode> 0 "nonimmediate_operand")
8476 (vec_select:<ssequartermode>
8477 (match_operand:V16FI 1 "register_operand")
8478 (parallel [(const_int 0) (const_int 1)
8479 (const_int 2) (const_int 3)])))]
8480 "TARGET_AVX512F
8481 && reload_completed
8482 && (TARGET_AVX512VL
8483 || REG_P (operands[0])
8484 || !EXT_REX_SSE_REG_P (operands[1]))"
8485 [(set (match_dup 0) (match_dup 1))]
8486 {
8487 if (!TARGET_AVX512VL
8488 && REG_P (operands[0])
8489 && EXT_REX_SSE_REG_P (operands[1]))
8490 operands[0]
8491 = lowpart_subreg (<MODE>mode, operands[0], <ssequartermode>mode);
8492 else
8493 operands[1] = gen_lowpart (<ssequartermode>mode, operands[1]);
8494 })
8495
8496 (define_mode_attr extract_type_2
8497 [(V16SF "avx512dq") (V16SI "avx512dq") (V8DF "avx512f") (V8DI "avx512f")])
8498
8499 (define_mode_attr extract_suf_2
8500 [(V16SF "32x8") (V16SI "32x8") (V8DF "64x4") (V8DI "64x4")])
8501
8502 (define_mode_iterator AVX512_VEC_2
8503 [(V16SF "TARGET_AVX512DQ") (V16SI "TARGET_AVX512DQ") V8DF V8DI])
8504
8505 (define_expand "<extract_type_2>_vextract<shuffletype><extract_suf_2>_mask"
8506 [(match_operand:<ssehalfvecmode> 0 "nonimmediate_operand")
8507 (match_operand:AVX512_VEC_2 1 "register_operand")
8508 (match_operand:SI 2 "const_0_to_1_operand")
8509 (match_operand:<ssehalfvecmode> 3 "nonimmediate_operand")
8510 (match_operand:QI 4 "register_operand")]
8511 "TARGET_AVX512F"
8512 {
8513 rtx (*insn)(rtx, rtx, rtx, rtx);
8514 rtx dest = operands[0];
8515
8516 if (MEM_P (dest) && !rtx_equal_p (dest, operands[3]))
8517 dest = gen_reg_rtx (<ssehalfvecmode>mode);
8518
8519 switch (INTVAL (operands[2]))
8520 {
8521 case 0:
8522 insn = gen_vec_extract_lo_<mode>_mask;
8523 break;
8524 case 1:
8525 insn = gen_vec_extract_hi_<mode>_mask;
8526 break;
8527 default:
8528 gcc_unreachable ();
8529 }
8530
8531 emit_insn (insn (dest, operands[1], operands[3], operands[4]));
8532 if (dest != operands[0])
8533 emit_move_insn (operands[0], dest);
8534 DONE;
8535 })
8536
8537 (define_split
8538 [(set (match_operand:<ssehalfvecmode> 0 "nonimmediate_operand")
8539 (vec_select:<ssehalfvecmode>
8540 (match_operand:V8FI 1 "nonimmediate_operand")
8541 (parallel [(const_int 0) (const_int 1)
8542 (const_int 2) (const_int 3)])))]
8543 "TARGET_AVX512F && !(MEM_P (operands[0]) && MEM_P (operands[1]))
8544 && reload_completed
8545 && (TARGET_AVX512VL
8546 || (REG_P (operands[0]) && !EXT_REX_SSE_REG_P (operands[1])))"
8547 [(set (match_dup 0) (match_dup 1))]
8548 "operands[1] = gen_lowpart (<ssehalfvecmode>mode, operands[1]);")
8549
8550 (define_insn "vec_extract_lo_<mode>_mask"
8551 [(set (match_operand:<ssehalfvecmode> 0 "nonimmediate_operand" "=v,m")
8552 (vec_merge:<ssehalfvecmode>
8553 (vec_select:<ssehalfvecmode>
8554 (match_operand:V8FI 1 "register_operand" "v,v")
8555 (parallel [(const_int 0) (const_int 1)
8556 (const_int 2) (const_int 3)]))
8557 (match_operand:<ssehalfvecmode> 2 "nonimm_or_0_operand" "0C,0")
8558 (match_operand:QI 3 "register_operand" "Yk,Yk")))]
8559 "TARGET_AVX512F
8560 && (!MEM_P (operands[0]) || rtx_equal_p (operands[0], operands[2]))"
8561 "vextract<shuffletype>64x4\t{$0x0, %1, %0%{%3%}%N2|%0%{%3%}%N2, %1, 0x0}"
8562 [(set_attr "type" "sselog1")
8563 (set_attr "prefix_extra" "1")
8564 (set_attr "length_immediate" "1")
8565 (set_attr "memory" "none,store")
8566 (set_attr "prefix" "evex")
8567 (set_attr "mode" "<sseinsnmode>")])
8568
8569 (define_insn "vec_extract_lo_<mode>"
8570 [(set (match_operand:<ssehalfvecmode> 0 "nonimmediate_operand" "=v,vm,v")
8571 (vec_select:<ssehalfvecmode>
8572 (match_operand:V8FI 1 "nonimmediate_operand" "v,v,vm")
8573 (parallel [(const_int 0) (const_int 1)
8574 (const_int 2) (const_int 3)])))]
8575 "TARGET_AVX512F && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
8576 {
8577 if (!TARGET_AVX512VL && !MEM_P (operands[1]))
8578 return "vextract<shuffletype>64x4\t{$0x0, %1, %0|%0, %1, 0x0}";
8579 else
8580 return "#";
8581 }
8582 [(set_attr "type" "sselog1")
8583 (set_attr "prefix_extra" "1")
8584 (set_attr "length_immediate" "1")
8585 (set_attr "memory" "none,store,load")
8586 (set_attr "prefix" "evex")
8587 (set_attr "mode" "<sseinsnmode>")])
8588
8589 (define_insn "vec_extract_hi_<mode>_mask"
8590 [(set (match_operand:<ssehalfvecmode> 0 "nonimmediate_operand" "=v,m")
8591 (vec_merge:<ssehalfvecmode>
8592 (vec_select:<ssehalfvecmode>
8593 (match_operand:V8FI 1 "register_operand" "v,v")
8594 (parallel [(const_int 4) (const_int 5)
8595 (const_int 6) (const_int 7)]))
8596 (match_operand:<ssehalfvecmode> 2 "nonimm_or_0_operand" "0C,0")
8597 (match_operand:QI 3 "register_operand" "Yk,Yk")))]
8598 "TARGET_AVX512F
8599 && (!MEM_P (operands[0]) || rtx_equal_p (operands[0], operands[2]))"
8600 "vextract<shuffletype>64x4\t{$0x1, %1, %0%{%3%}%N2|%0%{%3%}%N2, %1, 0x1}"
8601 [(set_attr "type" "sselog1")
8602 (set_attr "prefix_extra" "1")
8603 (set_attr "length_immediate" "1")
8604 (set_attr "prefix" "evex")
8605 (set_attr "mode" "<sseinsnmode>")])
8606
8607 (define_insn "vec_extract_hi_<mode>"
8608 [(set (match_operand:<ssehalfvecmode> 0 "nonimmediate_operand" "=vm")
8609 (vec_select:<ssehalfvecmode>
8610 (match_operand:V8FI 1 "register_operand" "v")
8611 (parallel [(const_int 4) (const_int 5)
8612 (const_int 6) (const_int 7)])))]
8613 "TARGET_AVX512F"
8614 "vextract<shuffletype>64x4\t{$0x1, %1, %0|%0, %1, 0x1}"
8615 [(set_attr "type" "sselog1")
8616 (set_attr "prefix_extra" "1")
8617 (set_attr "length_immediate" "1")
8618 (set_attr "prefix" "evex")
8619 (set_attr "mode" "<sseinsnmode>")])
8620
8621 (define_insn "vec_extract_hi_<mode>_mask"
8622 [(set (match_operand:<ssehalfvecmode> 0 "nonimmediate_operand" "=v,m")
8623 (vec_merge:<ssehalfvecmode>
8624 (vec_select:<ssehalfvecmode>
8625 (match_operand:V16FI 1 "register_operand" "v,v")
8626 (parallel [(const_int 8) (const_int 9)
8627 (const_int 10) (const_int 11)
8628 (const_int 12) (const_int 13)
8629 (const_int 14) (const_int 15)]))
8630 (match_operand:<ssehalfvecmode> 2 "nonimm_or_0_operand" "0C,0")
8631 (match_operand:QI 3 "register_operand" "Yk,Yk")))]
8632 "TARGET_AVX512DQ
8633 && (!MEM_P (operands[0]) || rtx_equal_p (operands[0], operands[2]))"
8634 "vextract<shuffletype>32x8\t{$0x1, %1, %0%{%3%}%N2|%0%{%3%}%N2, %1, 0x1}"
8635 [(set_attr "type" "sselog1")
8636 (set_attr "prefix_extra" "1")
8637 (set_attr "length_immediate" "1")
8638 (set_attr "prefix" "evex")
8639 (set_attr "mode" "<sseinsnmode>")])
8640
8641 (define_insn "vec_extract_hi_<mode>"
8642 [(set (match_operand:<ssehalfvecmode> 0 "nonimmediate_operand" "=vm,vm")
8643 (vec_select:<ssehalfvecmode>
8644 (match_operand:V16FI 1 "register_operand" "v,v")
8645 (parallel [(const_int 8) (const_int 9)
8646 (const_int 10) (const_int 11)
8647 (const_int 12) (const_int 13)
8648 (const_int 14) (const_int 15)])))]
8649 "TARGET_AVX512F"
8650 "@
8651 vextract<shuffletype>32x8\t{$0x1, %1, %0|%0, %1, 0x1}
8652 vextracti64x4\t{$0x1, %1, %0|%0, %1, 0x1}"
8653 [(set_attr "type" "sselog1")
8654 (set_attr "prefix_extra" "1")
8655 (set_attr "isa" "avx512dq,noavx512dq")
8656 (set_attr "length_immediate" "1")
8657 (set_attr "prefix" "evex")
8658 (set_attr "mode" "<sseinsnmode>")])
8659
8660 (define_mode_iterator VI48F_256_DQ
8661 [V8SI V8SF (V4DI "TARGET_AVX512DQ") (V4DF "TARGET_AVX512DQ")])
8662
8663 (define_expand "avx512vl_vextractf128<mode>"
8664 [(match_operand:<ssehalfvecmode> 0 "nonimmediate_operand")
8665 (match_operand:VI48F_256_DQ 1 "register_operand")
8666 (match_operand:SI 2 "const_0_to_1_operand")
8667 (match_operand:<ssehalfvecmode> 3 "nonimm_or_0_operand")
8668 (match_operand:QI 4 "register_operand")]
8669 "TARGET_AVX512VL"
8670 {
8671 rtx (*insn)(rtx, rtx, rtx, rtx);
8672 rtx dest = operands[0];
8673
8674 if (MEM_P (dest)
8675 && (GET_MODE_SIZE (GET_MODE_INNER (<MODE>mode)) == 4
8676 /* For V8S[IF]mode there are maskm insns with =m and 0
8677 constraints. */
8678 ? !rtx_equal_p (dest, operands[3])
8679 /* For V4D[IF]mode, hi insns don't allow memory, and
8680 lo insns have =m and 0C constraints. */
8681 : (operands[2] != const0_rtx
8682 || (!rtx_equal_p (dest, operands[3])
8683 && GET_CODE (operands[3]) != CONST_VECTOR))))
8684 dest = gen_reg_rtx (<ssehalfvecmode>mode);
8685 switch (INTVAL (operands[2]))
8686 {
8687 case 0:
8688 insn = gen_vec_extract_lo_<mode>_mask;
8689 break;
8690 case 1:
8691 insn = gen_vec_extract_hi_<mode>_mask;
8692 break;
8693 default:
8694 gcc_unreachable ();
8695 }
8696
8697 emit_insn (insn (dest, operands[1], operands[3], operands[4]));
8698 if (dest != operands[0])
8699 emit_move_insn (operands[0], dest);
8700 DONE;
8701 })
8702
8703 (define_expand "avx_vextractf128<mode>"
8704 [(match_operand:<ssehalfvecmode> 0 "nonimmediate_operand")
8705 (match_operand:V_256 1 "register_operand")
8706 (match_operand:SI 2 "const_0_to_1_operand")]
8707 "TARGET_AVX"
8708 {
8709 rtx (*insn)(rtx, rtx);
8710
8711 switch (INTVAL (operands[2]))
8712 {
8713 case 0:
8714 insn = gen_vec_extract_lo_<mode>;
8715 break;
8716 case 1:
8717 insn = gen_vec_extract_hi_<mode>;
8718 break;
8719 default:
8720 gcc_unreachable ();
8721 }
8722
8723 emit_insn (insn (operands[0], operands[1]));
8724 DONE;
8725 })
8726
8727 (define_insn "vec_extract_lo_<mode>_mask"
8728 [(set (match_operand:<ssehalfvecmode> 0 "nonimmediate_operand" "=v,m")
8729 (vec_merge:<ssehalfvecmode>
8730 (vec_select:<ssehalfvecmode>
8731 (match_operand:V16FI 1 "register_operand" "v,v")
8732 (parallel [(const_int 0) (const_int 1)
8733 (const_int 2) (const_int 3)
8734 (const_int 4) (const_int 5)
8735 (const_int 6) (const_int 7)]))
8736 (match_operand:<ssehalfvecmode> 2 "nonimm_or_0_operand" "0C,0")
8737 (match_operand:QI 3 "register_operand" "Yk,Yk")))]
8738 "TARGET_AVX512DQ
8739 && (!MEM_P (operands[0]) || rtx_equal_p (operands[0], operands[2]))"
8740 "vextract<shuffletype>32x8\t{$0x0, %1, %0%{%3%}%N2|%0%{%3%}%N2, %1, 0x0}"
8741 [(set_attr "type" "sselog1")
8742 (set_attr "prefix_extra" "1")
8743 (set_attr "length_immediate" "1")
8744 (set_attr "memory" "none,store")
8745 (set_attr "prefix" "evex")
8746 (set_attr "mode" "<sseinsnmode>")])
8747
8748 (define_insn "vec_extract_lo_<mode>"
8749 [(set (match_operand:<ssehalfvecmode> 0 "nonimmediate_operand" "=v,v,m")
8750 (vec_select:<ssehalfvecmode>
8751 (match_operand:V16FI 1 "nonimmediate_operand" "v,m,v")
8752 (parallel [(const_int 0) (const_int 1)
8753 (const_int 2) (const_int 3)
8754 (const_int 4) (const_int 5)
8755 (const_int 6) (const_int 7)])))]
8756 "TARGET_AVX512F
8757 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
8758 {
8759 if (!TARGET_AVX512VL
8760 && !REG_P (operands[0])
8761 && EXT_REX_SSE_REG_P (operands[1]))
8762 {
8763 if (TARGET_AVX512DQ)
8764 return "vextract<shuffletype>32x8\t{$0x0, %1, %0|%0, %1, 0x0}";
8765 else
8766 return "vextract<shuffletype>64x4\t{$0x0, %1, %0|%0, %1, 0x0}";
8767 }
8768 else
8769 return "#";
8770 }
8771 [(set_attr "type" "sselog1")
8772 (set_attr "prefix_extra" "1")
8773 (set_attr "length_immediate" "1")
8774 (set_attr "memory" "none,load,store")
8775 (set_attr "prefix" "evex")
8776 (set_attr "mode" "<sseinsnmode>")])
8777
8778 (define_split
8779 [(set (match_operand:<ssehalfvecmode> 0 "nonimmediate_operand")
8780 (vec_select:<ssehalfvecmode>
8781 (match_operand:V16FI 1 "nonimmediate_operand")
8782 (parallel [(const_int 0) (const_int 1)
8783 (const_int 2) (const_int 3)
8784 (const_int 4) (const_int 5)
8785 (const_int 6) (const_int 7)])))]
8786 "TARGET_AVX512F && !(MEM_P (operands[0]) && MEM_P (operands[1]))
8787 && reload_completed
8788 && (TARGET_AVX512VL
8789 || REG_P (operands[0])
8790 || !EXT_REX_SSE_REG_P (operands[1]))"
8791 [(set (match_dup 0) (match_dup 1))]
8792 {
8793 if (!TARGET_AVX512VL
8794 && REG_P (operands[0])
8795 && EXT_REX_SSE_REG_P (operands[1]))
8796 operands[0]
8797 = lowpart_subreg (<MODE>mode, operands[0], <ssehalfvecmode>mode);
8798 else
8799 operands[1] = gen_lowpart (<ssehalfvecmode>mode, operands[1]);
8800 })
8801
8802 (define_insn "vec_extract_lo_<mode>_mask"
8803 [(set (match_operand:<ssehalfvecmode> 0 "nonimmediate_operand" "=v,m")
8804 (vec_merge:<ssehalfvecmode>
8805 (vec_select:<ssehalfvecmode>
8806 (match_operand:VI8F_256 1 "register_operand" "v,v")
8807 (parallel [(const_int 0) (const_int 1)]))
8808 (match_operand:<ssehalfvecmode> 2 "nonimm_or_0_operand" "0C,0")
8809 (match_operand:QI 3 "register_operand" "Yk,Yk")))]
8810 "TARGET_AVX512DQ
8811 && TARGET_AVX512VL
8812 && (!MEM_P (operands[0]) || rtx_equal_p (operands[0], operands[2]))"
8813 "vextract<shuffletype>64x2\t{$0x0, %1, %0%{%3%}%N2|%0%{%3%}%N2, %1, 0x0}"
8814 [(set_attr "type" "sselog1")
8815 (set_attr "prefix_extra" "1")
8816 (set_attr "length_immediate" "1")
8817 (set_attr "memory" "none,store")
8818 (set_attr "prefix" "evex")
8819 (set_attr "mode" "XI")])
8820
8821 (define_insn "vec_extract_lo_<mode>"
8822 [(set (match_operand:<ssehalfvecmode> 0 "nonimmediate_operand" "=vm,v")
8823 (vec_select:<ssehalfvecmode>
8824 (match_operand:VI8F_256 1 "nonimmediate_operand" "v,vm")
8825 (parallel [(const_int 0) (const_int 1)])))]
8826 "TARGET_AVX
8827 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
8828 "#")
8829
8830 (define_split
8831 [(set (match_operand:<ssehalfvecmode> 0 "nonimmediate_operand")
8832 (vec_select:<ssehalfvecmode>
8833 (match_operand:VI8F_256 1 "nonimmediate_operand")
8834 (parallel [(const_int 0) (const_int 1)])))]
8835 "TARGET_AVX && !(MEM_P (operands[0]) && MEM_P (operands[1]))
8836 && reload_completed"
8837 [(set (match_dup 0) (match_dup 1))]
8838 "operands[1] = gen_lowpart (<ssehalfvecmode>mode, operands[1]);")
8839
8840 (define_insn "vec_extract_hi_<mode>_mask"
8841 [(set (match_operand:<ssehalfvecmode> 0 "nonimmediate_operand" "=v,m")
8842 (vec_merge:<ssehalfvecmode>
8843 (vec_select:<ssehalfvecmode>
8844 (match_operand:VI8F_256 1 "register_operand" "v,v")
8845 (parallel [(const_int 2) (const_int 3)]))
8846 (match_operand:<ssehalfvecmode> 2 "nonimm_or_0_operand" "0C,0")
8847 (match_operand:QI 3 "register_operand" "Yk,Yk")))]
8848 "TARGET_AVX512DQ
8849 && TARGET_AVX512VL
8850 && (!MEM_P (operands[0]) || rtx_equal_p (operands[0], operands[2]))"
8851 "vextract<shuffletype>64x2\t{$0x1, %1, %0%{%3%}%N2|%0%{%3%}%N2, %1, 0x1}"
8852 [(set_attr "type" "sselog1")
8853 (set_attr "prefix_extra" "1")
8854 (set_attr "length_immediate" "1")
8855 (set_attr "prefix" "vex")
8856 (set_attr "mode" "<sseinsnmode>")])
8857
8858 (define_insn "vec_extract_hi_<mode>"
8859 [(set (match_operand:<ssehalfvecmode> 0 "nonimmediate_operand" "=vm")
8860 (vec_select:<ssehalfvecmode>
8861 (match_operand:VI8F_256 1 "register_operand" "v")
8862 (parallel [(const_int 2) (const_int 3)])))]
8863 "TARGET_AVX"
8864 {
8865 if (TARGET_AVX512VL)
8866 {
8867 if (TARGET_AVX512DQ)
8868 return "vextract<shuffletype>64x2\t{$0x1, %1, %0|%0, %1, 0x1}";
8869 else
8870 return "vextract<shuffletype>32x4\t{$0x1, %1, %0|%0, %1, 0x1}";
8871 }
8872 else
8873 return "vextract<i128>\t{$0x1, %1, %0|%0, %1, 0x1}";
8874 }
8875 [(set_attr "type" "sselog1")
8876 (set_attr "prefix_extra" "1")
8877 (set_attr "length_immediate" "1")
8878 (set_attr "prefix" "vex")
8879 (set_attr "mode" "<sseinsnmode>")])
8880
8881 (define_split
8882 [(set (match_operand:<ssehalfvecmode> 0 "nonimmediate_operand")
8883 (vec_select:<ssehalfvecmode>
8884 (match_operand:VI4F_256 1 "nonimmediate_operand")
8885 (parallel [(const_int 0) (const_int 1)
8886 (const_int 2) (const_int 3)])))]
8887 "TARGET_AVX && !(MEM_P (operands[0]) && MEM_P (operands[1]))
8888 && reload_completed"
8889 [(set (match_dup 0) (match_dup 1))]
8890 "operands[1] = gen_lowpart (<ssehalfvecmode>mode, operands[1]);")
8891
8892 (define_insn "vec_extract_lo_<mode>_mask"
8893 [(set (match_operand:<ssehalfvecmode> 0 "nonimmediate_operand" "=v,m")
8894 (vec_merge:<ssehalfvecmode>
8895 (vec_select:<ssehalfvecmode>
8896 (match_operand:VI4F_256 1 "register_operand" "v,v")
8897 (parallel [(const_int 0) (const_int 1)
8898 (const_int 2) (const_int 3)]))
8899 (match_operand:<ssehalfvecmode> 2 "nonimm_or_0_operand" "0C,0")
8900 (match_operand:QI 3 "register_operand" "Yk,Yk")))]
8901 "TARGET_AVX512VL
8902 && (!MEM_P (operands[0]) || rtx_equal_p (operands[0], operands[2]))"
8903 "vextract<shuffletype>32x4\t{$0x0, %1, %0%{%3%}%N2|%0%{%3%}%N2, %1, 0x0}"
8904 [(set_attr "type" "sselog1")
8905 (set_attr "prefix_extra" "1")
8906 (set_attr "length_immediate" "1")
8907 (set_attr "prefix" "evex")
8908 (set_attr "mode" "<sseinsnmode>")])
8909
8910 (define_insn "vec_extract_lo_<mode>"
8911 [(set (match_operand:<ssehalfvecmode> 0 "nonimmediate_operand" "=vm,v")
8912 (vec_select:<ssehalfvecmode>
8913 (match_operand:VI4F_256 1 "nonimmediate_operand" "v,vm")
8914 (parallel [(const_int 0) (const_int 1)
8915 (const_int 2) (const_int 3)])))]
8916 "TARGET_AVX
8917 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
8918 "#"
8919 [(set_attr "type" "sselog1")
8920 (set_attr "prefix_extra" "1")
8921 (set_attr "length_immediate" "1")
8922 (set_attr "prefix" "evex")
8923 (set_attr "mode" "<sseinsnmode>")])
8924
8925 (define_insn "vec_extract_hi_<mode>_mask"
8926 [(set (match_operand:<ssehalfvecmode> 0 "register_operand" "=v,m")
8927 (vec_merge:<ssehalfvecmode>
8928 (vec_select:<ssehalfvecmode>
8929 (match_operand:VI4F_256 1 "register_operand" "v,v")
8930 (parallel [(const_int 4) (const_int 5)
8931 (const_int 6) (const_int 7)]))
8932 (match_operand:<ssehalfvecmode> 2 "nonimm_or_0_operand" "0C,0")
8933 (match_operand:<avx512fmaskmode> 3 "register_operand" "Yk,Yk")))]
8934 "TARGET_AVX512VL
8935 && (!MEM_P (operands[0]) || rtx_equal_p (operands[0], operands[2]))"
8936 "vextract<shuffletype>32x4\t{$0x1, %1, %0%{%3%}%N2|%0%{%3%}%N2, %1, 0x1}"
8937 [(set_attr "type" "sselog1")
8938 (set_attr "length_immediate" "1")
8939 (set_attr "prefix" "evex")
8940 (set_attr "mode" "<sseinsnmode>")])
8941
8942 (define_insn "vec_extract_hi_<mode>"
8943 [(set (match_operand:<ssehalfvecmode> 0 "nonimmediate_operand" "=xm, vm")
8944 (vec_select:<ssehalfvecmode>
8945 (match_operand:VI4F_256 1 "register_operand" "x, v")
8946 (parallel [(const_int 4) (const_int 5)
8947 (const_int 6) (const_int 7)])))]
8948 "TARGET_AVX"
8949 "@
8950 vextract<i128>\t{$0x1, %1, %0|%0, %1, 0x1}
8951 vextract<shuffletype>32x4\t{$0x1, %1, %0|%0, %1, 0x1}"
8952 [(set_attr "isa" "*, avx512vl")
8953 (set_attr "prefix" "vex, evex")
8954 (set_attr "type" "sselog1")
8955 (set_attr "length_immediate" "1")
8956 (set_attr "mode" "<sseinsnmode>")])
8957
8958 (define_insn_and_split "vec_extract_lo_v32hi"
8959 [(set (match_operand:V16HI 0 "nonimmediate_operand" "=v,v,m")
8960 (vec_select:V16HI
8961 (match_operand:V32HI 1 "nonimmediate_operand" "v,m,v")
8962 (parallel [(const_int 0) (const_int 1)
8963 (const_int 2) (const_int 3)
8964 (const_int 4) (const_int 5)
8965 (const_int 6) (const_int 7)
8966 (const_int 8) (const_int 9)
8967 (const_int 10) (const_int 11)
8968 (const_int 12) (const_int 13)
8969 (const_int 14) (const_int 15)])))]
8970 "TARGET_AVX512F && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
8971 {
8972 if (TARGET_AVX512VL
8973 || REG_P (operands[0])
8974 || !EXT_REX_SSE_REG_P (operands[1]))
8975 return "#";
8976 else
8977 return "vextracti64x4\t{$0x0, %1, %0|%0, %1, 0x0}";
8978 }
8979 "&& reload_completed
8980 && (TARGET_AVX512VL
8981 || REG_P (operands[0])
8982 || !EXT_REX_SSE_REG_P (operands[1]))"
8983 [(set (match_dup 0) (match_dup 1))]
8984 {
8985 if (!TARGET_AVX512VL
8986 && REG_P (operands[0])
8987 && EXT_REX_SSE_REG_P (operands[1]))
8988 operands[0] = lowpart_subreg (V32HImode, operands[0], V16HImode);
8989 else
8990 operands[1] = gen_lowpart (V16HImode, operands[1]);
8991 }
8992 [(set_attr "type" "sselog1")
8993 (set_attr "prefix_extra" "1")
8994 (set_attr "length_immediate" "1")
8995 (set_attr "memory" "none,load,store")
8996 (set_attr "prefix" "evex")
8997 (set_attr "mode" "XI")])
8998
8999 (define_insn "vec_extract_hi_v32hi"
9000 [(set (match_operand:V16HI 0 "nonimmediate_operand" "=vm")
9001 (vec_select:V16HI
9002 (match_operand:V32HI 1 "register_operand" "v")
9003 (parallel [(const_int 16) (const_int 17)
9004 (const_int 18) (const_int 19)
9005 (const_int 20) (const_int 21)
9006 (const_int 22) (const_int 23)
9007 (const_int 24) (const_int 25)
9008 (const_int 26) (const_int 27)
9009 (const_int 28) (const_int 29)
9010 (const_int 30) (const_int 31)])))]
9011 "TARGET_AVX512F"
9012 "vextracti64x4\t{$0x1, %1, %0|%0, %1, 0x1}"
9013 [(set_attr "type" "sselog1")
9014 (set_attr "prefix_extra" "1")
9015 (set_attr "length_immediate" "1")
9016 (set_attr "prefix" "evex")
9017 (set_attr "mode" "XI")])
9018
9019 (define_insn_and_split "vec_extract_lo_v16hi"
9020 [(set (match_operand:V8HI 0 "nonimmediate_operand" "=v,m")
9021 (vec_select:V8HI
9022 (match_operand:V16HI 1 "nonimmediate_operand" "vm,v")
9023 (parallel [(const_int 0) (const_int 1)
9024 (const_int 2) (const_int 3)
9025 (const_int 4) (const_int 5)
9026 (const_int 6) (const_int 7)])))]
9027 "TARGET_AVX && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
9028 "#"
9029 "&& reload_completed"
9030 [(set (match_dup 0) (match_dup 1))]
9031 "operands[1] = gen_lowpart (V8HImode, operands[1]);")
9032
9033 (define_insn "vec_extract_hi_v16hi"
9034 [(set (match_operand:V8HI 0 "nonimmediate_operand" "=xm,vm,vm")
9035 (vec_select:V8HI
9036 (match_operand:V16HI 1 "register_operand" "x,v,v")
9037 (parallel [(const_int 8) (const_int 9)
9038 (const_int 10) (const_int 11)
9039 (const_int 12) (const_int 13)
9040 (const_int 14) (const_int 15)])))]
9041 "TARGET_AVX"
9042 "@
9043 vextract%~128\t{$0x1, %1, %0|%0, %1, 0x1}
9044 vextracti32x4\t{$0x1, %1, %0|%0, %1, 0x1}
9045 vextracti32x4\t{$0x1, %g1, %0|%0, %g1, 0x1}"
9046 [(set_attr "type" "sselog1")
9047 (set_attr "prefix_extra" "1")
9048 (set_attr "length_immediate" "1")
9049 (set_attr "isa" "*,avx512dq,avx512f")
9050 (set_attr "prefix" "vex,evex,evex")
9051 (set_attr "mode" "OI")])
9052
9053 (define_insn_and_split "vec_extract_lo_v64qi"
9054 [(set (match_operand:V32QI 0 "nonimmediate_operand" "=v,v,m")
9055 (vec_select:V32QI
9056 (match_operand:V64QI 1 "nonimmediate_operand" "v,m,v")
9057 (parallel [(const_int 0) (const_int 1)
9058 (const_int 2) (const_int 3)
9059 (const_int 4) (const_int 5)
9060 (const_int 6) (const_int 7)
9061 (const_int 8) (const_int 9)
9062 (const_int 10) (const_int 11)
9063 (const_int 12) (const_int 13)
9064 (const_int 14) (const_int 15)
9065 (const_int 16) (const_int 17)
9066 (const_int 18) (const_int 19)
9067 (const_int 20) (const_int 21)
9068 (const_int 22) (const_int 23)
9069 (const_int 24) (const_int 25)
9070 (const_int 26) (const_int 27)
9071 (const_int 28) (const_int 29)
9072 (const_int 30) (const_int 31)])))]
9073 "TARGET_AVX512F && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
9074 {
9075 if (TARGET_AVX512VL
9076 || REG_P (operands[0])
9077 || !EXT_REX_SSE_REG_P (operands[1]))
9078 return "#";
9079 else
9080 return "vextracti64x4\t{$0x0, %1, %0|%0, %1, 0x0}";
9081 }
9082 "&& reload_completed
9083 && (TARGET_AVX512VL
9084 || REG_P (operands[0])
9085 || !EXT_REX_SSE_REG_P (operands[1]))"
9086 [(set (match_dup 0) (match_dup 1))]
9087 {
9088 if (!TARGET_AVX512VL
9089 && REG_P (operands[0])
9090 && EXT_REX_SSE_REG_P (operands[1]))
9091 operands[0] = lowpart_subreg (V64QImode, operands[0], V32QImode);
9092 else
9093 operands[1] = gen_lowpart (V32QImode, operands[1]);
9094 }
9095 [(set_attr "type" "sselog1")
9096 (set_attr "prefix_extra" "1")
9097 (set_attr "length_immediate" "1")
9098 (set_attr "memory" "none,load,store")
9099 (set_attr "prefix" "evex")
9100 (set_attr "mode" "XI")])
9101
9102 (define_insn "vec_extract_hi_v64qi"
9103 [(set (match_operand:V32QI 0 "nonimmediate_operand" "=vm")
9104 (vec_select:V32QI
9105 (match_operand:V64QI 1 "register_operand" "v")
9106 (parallel [(const_int 32) (const_int 33)
9107 (const_int 34) (const_int 35)
9108 (const_int 36) (const_int 37)
9109 (const_int 38) (const_int 39)
9110 (const_int 40) (const_int 41)
9111 (const_int 42) (const_int 43)
9112 (const_int 44) (const_int 45)
9113 (const_int 46) (const_int 47)
9114 (const_int 48) (const_int 49)
9115 (const_int 50) (const_int 51)
9116 (const_int 52) (const_int 53)
9117 (const_int 54) (const_int 55)
9118 (const_int 56) (const_int 57)
9119 (const_int 58) (const_int 59)
9120 (const_int 60) (const_int 61)
9121 (const_int 62) (const_int 63)])))]
9122 "TARGET_AVX512F"
9123 "vextracti64x4\t{$0x1, %1, %0|%0, %1, 0x1}"
9124 [(set_attr "type" "sselog1")
9125 (set_attr "prefix_extra" "1")
9126 (set_attr "length_immediate" "1")
9127 (set_attr "prefix" "evex")
9128 (set_attr "mode" "XI")])
9129
9130 (define_insn_and_split "vec_extract_lo_v32qi"
9131 [(set (match_operand:V16QI 0 "nonimmediate_operand" "=v,m")
9132 (vec_select:V16QI
9133 (match_operand:V32QI 1 "nonimmediate_operand" "vm,v")
9134 (parallel [(const_int 0) (const_int 1)
9135 (const_int 2) (const_int 3)
9136 (const_int 4) (const_int 5)
9137 (const_int 6) (const_int 7)
9138 (const_int 8) (const_int 9)
9139 (const_int 10) (const_int 11)
9140 (const_int 12) (const_int 13)
9141 (const_int 14) (const_int 15)])))]
9142 "TARGET_AVX && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
9143 "#"
9144 "&& reload_completed"
9145 [(set (match_dup 0) (match_dup 1))]
9146 "operands[1] = gen_lowpart (V16QImode, operands[1]);")
9147
9148 (define_insn "vec_extract_hi_v32qi"
9149 [(set (match_operand:V16QI 0 "nonimmediate_operand" "=xm,vm,vm")
9150 (vec_select:V16QI
9151 (match_operand:V32QI 1 "register_operand" "x,v,v")
9152 (parallel [(const_int 16) (const_int 17)
9153 (const_int 18) (const_int 19)
9154 (const_int 20) (const_int 21)
9155 (const_int 22) (const_int 23)
9156 (const_int 24) (const_int 25)
9157 (const_int 26) (const_int 27)
9158 (const_int 28) (const_int 29)
9159 (const_int 30) (const_int 31)])))]
9160 "TARGET_AVX"
9161 "@
9162 vextract%~128\t{$0x1, %1, %0|%0, %1, 0x1}
9163 vextracti32x4\t{$0x1, %1, %0|%0, %1, 0x1}
9164 vextracti32x4\t{$0x1, %g1, %0|%0, %g1, 0x1}"
9165 [(set_attr "type" "sselog1")
9166 (set_attr "prefix_extra" "1")
9167 (set_attr "length_immediate" "1")
9168 (set_attr "isa" "*,avx512dq,avx512f")
9169 (set_attr "prefix" "vex,evex,evex")
9170 (set_attr "mode" "OI")])
9171
9172 ;; Modes handled by vec_extract patterns.
9173 (define_mode_iterator VEC_EXTRACT_MODE
9174 [(V64QI "TARGET_AVX512BW") (V32QI "TARGET_AVX") V16QI
9175 (V32HI "TARGET_AVX512BW") (V16HI "TARGET_AVX") V8HI
9176 (V16SI "TARGET_AVX512F") (V8SI "TARGET_AVX") V4SI
9177 (V8DI "TARGET_AVX512F") (V4DI "TARGET_AVX") V2DI
9178 (V16SF "TARGET_AVX512F") (V8SF "TARGET_AVX") V4SF
9179 (V8DF "TARGET_AVX512F") (V4DF "TARGET_AVX") V2DF
9180 (V4TI "TARGET_AVX512F") (V2TI "TARGET_AVX")])
9181
9182 (define_expand "vec_extract<mode><ssescalarmodelower>"
9183 [(match_operand:<ssescalarmode> 0 "register_operand")
9184 (match_operand:VEC_EXTRACT_MODE 1 "register_operand")
9185 (match_operand 2 "const_int_operand")]
9186 "TARGET_SSE"
9187 {
9188 ix86_expand_vector_extract (false, operands[0], operands[1],
9189 INTVAL (operands[2]));
9190 DONE;
9191 })
9192
9193 (define_expand "vec_extract<mode><ssehalfvecmodelower>"
9194 [(match_operand:<ssehalfvecmode> 0 "nonimmediate_operand")
9195 (match_operand:V_256_512 1 "register_operand")
9196 (match_operand 2 "const_0_to_1_operand")]
9197 "TARGET_AVX"
9198 {
9199 if (INTVAL (operands[2]))
9200 emit_insn (gen_vec_extract_hi_<mode> (operands[0], operands[1]));
9201 else
9202 emit_insn (gen_vec_extract_lo_<mode> (operands[0], operands[1]));
9203 DONE;
9204 })
9205
9206 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
9207 ;;
9208 ;; Parallel double-precision floating point element swizzling
9209 ;;
9210 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
9211
9212 (define_insn "<mask_codefor>avx512f_unpckhpd512<mask_name>"
9213 [(set (match_operand:V8DF 0 "register_operand" "=v")
9214 (vec_select:V8DF
9215 (vec_concat:V16DF
9216 (match_operand:V8DF 1 "register_operand" "v")
9217 (match_operand:V8DF 2 "nonimmediate_operand" "vm"))
9218 (parallel [(const_int 1) (const_int 9)
9219 (const_int 3) (const_int 11)
9220 (const_int 5) (const_int 13)
9221 (const_int 7) (const_int 15)])))]
9222 "TARGET_AVX512F"
9223 "vunpckhpd\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
9224 [(set_attr "type" "sselog")
9225 (set_attr "prefix" "evex")
9226 (set_attr "mode" "V8DF")])
9227
9228 ;; Recall that the 256-bit unpck insns only shuffle within their lanes.
9229 (define_insn "avx_unpckhpd256<mask_name>"
9230 [(set (match_operand:V4DF 0 "register_operand" "=v")
9231 (vec_select:V4DF
9232 (vec_concat:V8DF
9233 (match_operand:V4DF 1 "register_operand" "v")
9234 (match_operand:V4DF 2 "nonimmediate_operand" "vm"))
9235 (parallel [(const_int 1) (const_int 5)
9236 (const_int 3) (const_int 7)])))]
9237 "TARGET_AVX && <mask_avx512vl_condition>"
9238 "vunpckhpd\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
9239 [(set_attr "type" "sselog")
9240 (set_attr "prefix" "vex")
9241 (set_attr "mode" "V4DF")])
9242
9243 (define_expand "vec_interleave_highv4df"
9244 [(set (match_dup 3)
9245 (vec_select:V4DF
9246 (vec_concat:V8DF
9247 (match_operand:V4DF 1 "register_operand")
9248 (match_operand:V4DF 2 "nonimmediate_operand"))
9249 (parallel [(const_int 0) (const_int 4)
9250 (const_int 2) (const_int 6)])))
9251 (set (match_dup 4)
9252 (vec_select:V4DF
9253 (vec_concat:V8DF
9254 (match_dup 1)
9255 (match_dup 2))
9256 (parallel [(const_int 1) (const_int 5)
9257 (const_int 3) (const_int 7)])))
9258 (set (match_operand:V4DF 0 "register_operand")
9259 (vec_select:V4DF
9260 (vec_concat:V8DF
9261 (match_dup 3)
9262 (match_dup 4))
9263 (parallel [(const_int 2) (const_int 3)
9264 (const_int 6) (const_int 7)])))]
9265 "TARGET_AVX"
9266 {
9267 operands[3] = gen_reg_rtx (V4DFmode);
9268 operands[4] = gen_reg_rtx (V4DFmode);
9269 })
9270
9271
9272 (define_insn "avx512vl_unpckhpd128_mask"
9273 [(set (match_operand:V2DF 0 "register_operand" "=v")
9274 (vec_merge:V2DF
9275 (vec_select:V2DF
9276 (vec_concat:V4DF
9277 (match_operand:V2DF 1 "register_operand" "v")
9278 (match_operand:V2DF 2 "nonimmediate_operand" "vm"))
9279 (parallel [(const_int 1) (const_int 3)]))
9280 (match_operand:V2DF 3 "nonimm_or_0_operand" "0C")
9281 (match_operand:QI 4 "register_operand" "Yk")))]
9282 "TARGET_AVX512VL"
9283 "vunpckhpd\t{%2, %1, %0%{%4%}%N3|%0%{%4%}%N3, %1, %2}"
9284 [(set_attr "type" "sselog")
9285 (set_attr "prefix" "evex")
9286 (set_attr "mode" "V2DF")])
9287
9288 (define_expand "vec_interleave_highv2df"
9289 [(set (match_operand:V2DF 0 "register_operand")
9290 (vec_select:V2DF
9291 (vec_concat:V4DF
9292 (match_operand:V2DF 1 "nonimmediate_operand")
9293 (match_operand:V2DF 2 "nonimmediate_operand"))
9294 (parallel [(const_int 1)
9295 (const_int 3)])))]
9296 "TARGET_SSE2"
9297 {
9298 if (!ix86_vec_interleave_v2df_operator_ok (operands, 1))
9299 operands[2] = force_reg (V2DFmode, operands[2]);
9300 })
9301
9302 (define_insn "*vec_interleave_highv2df"
9303 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,v,v,x,v,m")
9304 (vec_select:V2DF
9305 (vec_concat:V4DF
9306 (match_operand:V2DF 1 "nonimmediate_operand" " 0,v,o,o,o,v")
9307 (match_operand:V2DF 2 "nonimmediate_operand" " x,v,1,0,v,0"))
9308 (parallel [(const_int 1)
9309 (const_int 3)])))]
9310 "TARGET_SSE2 && ix86_vec_interleave_v2df_operator_ok (operands, 1)"
9311 "@
9312 unpckhpd\t{%2, %0|%0, %2}
9313 vunpckhpd\t{%2, %1, %0|%0, %1, %2}
9314 %vmovddup\t{%H1, %0|%0, %H1}
9315 movlpd\t{%H1, %0|%0, %H1}
9316 vmovlpd\t{%H1, %2, %0|%0, %2, %H1}
9317 %vmovhpd\t{%1, %0|%q0, %1}"
9318 [(set_attr "isa" "noavx,avx,sse3,noavx,avx,*")
9319 (set_attr "type" "sselog,sselog,sselog,ssemov,ssemov,ssemov")
9320 (set (attr "prefix_data16")
9321 (if_then_else (eq_attr "alternative" "3,5")
9322 (const_string "1")
9323 (const_string "*")))
9324 (set_attr "prefix" "orig,maybe_evex,maybe_vex,orig,maybe_evex,maybe_vex")
9325 (set_attr "mode" "V2DF,V2DF,DF,V1DF,V1DF,V1DF")])
9326
9327 (define_expand "avx512f_movddup512<mask_name>"
9328 [(set (match_operand:V8DF 0 "register_operand")
9329 (vec_select:V8DF
9330 (vec_concat:V16DF
9331 (match_operand:V8DF 1 "nonimmediate_operand")
9332 (match_dup 1))
9333 (parallel [(const_int 0) (const_int 8)
9334 (const_int 2) (const_int 10)
9335 (const_int 4) (const_int 12)
9336 (const_int 6) (const_int 14)])))]
9337 "TARGET_AVX512F")
9338
9339 (define_expand "avx512f_unpcklpd512<mask_name>"
9340 [(set (match_operand:V8DF 0 "register_operand")
9341 (vec_select:V8DF
9342 (vec_concat:V16DF
9343 (match_operand:V8DF 1 "register_operand")
9344 (match_operand:V8DF 2 "nonimmediate_operand"))
9345 (parallel [(const_int 0) (const_int 8)
9346 (const_int 2) (const_int 10)
9347 (const_int 4) (const_int 12)
9348 (const_int 6) (const_int 14)])))]
9349 "TARGET_AVX512F")
9350
9351 (define_insn "*avx512f_unpcklpd512<mask_name>"
9352 [(set (match_operand:V8DF 0 "register_operand" "=v,v")
9353 (vec_select:V8DF
9354 (vec_concat:V16DF
9355 (match_operand:V8DF 1 "nonimmediate_operand" "vm, v")
9356 (match_operand:V8DF 2 "nonimmediate_operand" "1 ,vm"))
9357 (parallel [(const_int 0) (const_int 8)
9358 (const_int 2) (const_int 10)
9359 (const_int 4) (const_int 12)
9360 (const_int 6) (const_int 14)])))]
9361 "TARGET_AVX512F"
9362 "@
9363 vmovddup\t{%1, %0<mask_operand3>|%0<mask_operand3>, %1}
9364 vunpcklpd\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
9365 [(set_attr "type" "sselog")
9366 (set_attr "prefix" "evex")
9367 (set_attr "mode" "V8DF")])
9368
9369 ;; Recall that the 256-bit unpck insns only shuffle within their lanes.
9370 (define_expand "avx_movddup256<mask_name>"
9371 [(set (match_operand:V4DF 0 "register_operand")
9372 (vec_select:V4DF
9373 (vec_concat:V8DF
9374 (match_operand:V4DF 1 "nonimmediate_operand")
9375 (match_dup 1))
9376 (parallel [(const_int 0) (const_int 4)
9377 (const_int 2) (const_int 6)])))]
9378 "TARGET_AVX && <mask_avx512vl_condition>")
9379
9380 (define_expand "avx_unpcklpd256<mask_name>"
9381 [(set (match_operand:V4DF 0 "register_operand")
9382 (vec_select:V4DF
9383 (vec_concat:V8DF
9384 (match_operand:V4DF 1 "register_operand")
9385 (match_operand:V4DF 2 "nonimmediate_operand"))
9386 (parallel [(const_int 0) (const_int 4)
9387 (const_int 2) (const_int 6)])))]
9388 "TARGET_AVX && <mask_avx512vl_condition>")
9389
9390 (define_insn "*avx_unpcklpd256<mask_name>"
9391 [(set (match_operand:V4DF 0 "register_operand" "=v,v")
9392 (vec_select:V4DF
9393 (vec_concat:V8DF
9394 (match_operand:V4DF 1 "nonimmediate_operand" " v,m")
9395 (match_operand:V4DF 2 "nonimmediate_operand" "vm,1"))
9396 (parallel [(const_int 0) (const_int 4)
9397 (const_int 2) (const_int 6)])))]
9398 "TARGET_AVX && <mask_avx512vl_condition>"
9399 "@
9400 vunpcklpd\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}
9401 vmovddup\t{%1, %0<mask_operand3>|%0<mask_operand3>, %1}"
9402 [(set_attr "type" "sselog")
9403 (set_attr "prefix" "vex")
9404 (set_attr "mode" "V4DF")])
9405
9406 (define_expand "vec_interleave_lowv4df"
9407 [(set (match_dup 3)
9408 (vec_select:V4DF
9409 (vec_concat:V8DF
9410 (match_operand:V4DF 1 "register_operand")
9411 (match_operand:V4DF 2 "nonimmediate_operand"))
9412 (parallel [(const_int 0) (const_int 4)
9413 (const_int 2) (const_int 6)])))
9414 (set (match_dup 4)
9415 (vec_select:V4DF
9416 (vec_concat:V8DF
9417 (match_dup 1)
9418 (match_dup 2))
9419 (parallel [(const_int 1) (const_int 5)
9420 (const_int 3) (const_int 7)])))
9421 (set (match_operand:V4DF 0 "register_operand")
9422 (vec_select:V4DF
9423 (vec_concat:V8DF
9424 (match_dup 3)
9425 (match_dup 4))
9426 (parallel [(const_int 0) (const_int 1)
9427 (const_int 4) (const_int 5)])))]
9428 "TARGET_AVX"
9429 {
9430 operands[3] = gen_reg_rtx (V4DFmode);
9431 operands[4] = gen_reg_rtx (V4DFmode);
9432 })
9433
9434 (define_insn "avx512vl_unpcklpd128_mask"
9435 [(set (match_operand:V2DF 0 "register_operand" "=v")
9436 (vec_merge:V2DF
9437 (vec_select:V2DF
9438 (vec_concat:V4DF
9439 (match_operand:V2DF 1 "register_operand" "v")
9440 (match_operand:V2DF 2 "nonimmediate_operand" "vm"))
9441 (parallel [(const_int 0) (const_int 2)]))
9442 (match_operand:V2DF 3 "nonimm_or_0_operand" "0C")
9443 (match_operand:QI 4 "register_operand" "Yk")))]
9444 "TARGET_AVX512VL"
9445 "vunpcklpd\t{%2, %1, %0%{%4%}%N3|%0%{%4%}%N3, %1, %2}"
9446 [(set_attr "type" "sselog")
9447 (set_attr "prefix" "evex")
9448 (set_attr "mode" "V2DF")])
9449
9450 (define_expand "vec_interleave_lowv2df"
9451 [(set (match_operand:V2DF 0 "register_operand")
9452 (vec_select:V2DF
9453 (vec_concat:V4DF
9454 (match_operand:V2DF 1 "nonimmediate_operand")
9455 (match_operand:V2DF 2 "nonimmediate_operand"))
9456 (parallel [(const_int 0)
9457 (const_int 2)])))]
9458 "TARGET_SSE2"
9459 {
9460 if (!ix86_vec_interleave_v2df_operator_ok (operands, 0))
9461 operands[1] = force_reg (V2DFmode, operands[1]);
9462 })
9463
9464 (define_insn "*vec_interleave_lowv2df"
9465 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,v,v,x,v,o")
9466 (vec_select:V2DF
9467 (vec_concat:V4DF
9468 (match_operand:V2DF 1 "nonimmediate_operand" " 0,v,m,0,v,0")
9469 (match_operand:V2DF 2 "nonimmediate_operand" " x,v,1,m,m,v"))
9470 (parallel [(const_int 0)
9471 (const_int 2)])))]
9472 "TARGET_SSE2 && ix86_vec_interleave_v2df_operator_ok (operands, 0)"
9473 "@
9474 unpcklpd\t{%2, %0|%0, %2}
9475 vunpcklpd\t{%2, %1, %0|%0, %1, %2}
9476 %vmovddup\t{%1, %0|%0, %q1}
9477 movhpd\t{%2, %0|%0, %q2}
9478 vmovhpd\t{%2, %1, %0|%0, %1, %q2}
9479 %vmovlpd\t{%2, %H0|%H0, %2}"
9480 [(set_attr "isa" "noavx,avx,sse3,noavx,avx,*")
9481 (set_attr "type" "sselog,sselog,sselog,ssemov,ssemov,ssemov")
9482 (set (attr "prefix_data16")
9483 (if_then_else (eq_attr "alternative" "3,5")
9484 (const_string "1")
9485 (const_string "*")))
9486 (set_attr "prefix" "orig,maybe_evex,maybe_vex,orig,maybe_evex,maybe_vex")
9487 (set_attr "mode" "V2DF,V2DF,DF,V1DF,V1DF,V1DF")])
9488
9489 (define_split
9490 [(set (match_operand:V2DF 0 "memory_operand")
9491 (vec_select:V2DF
9492 (vec_concat:V4DF
9493 (match_operand:V2DF 1 "register_operand")
9494 (match_dup 1))
9495 (parallel [(const_int 0)
9496 (const_int 2)])))]
9497 "TARGET_SSE3 && reload_completed"
9498 [(const_int 0)]
9499 {
9500 rtx low = gen_lowpart (DFmode, operands[1]);
9501
9502 emit_move_insn (adjust_address (operands[0], DFmode, 0), low);
9503 emit_move_insn (adjust_address (operands[0], DFmode, 8), low);
9504 DONE;
9505 })
9506
9507 (define_split
9508 [(set (match_operand:V2DF 0 "register_operand")
9509 (vec_select:V2DF
9510 (vec_concat:V4DF
9511 (match_operand:V2DF 1 "memory_operand")
9512 (match_dup 1))
9513 (parallel [(match_operand:SI 2 "const_0_to_1_operand")
9514 (match_operand:SI 3 "const_int_operand")])))]
9515 "TARGET_SSE3 && INTVAL (operands[2]) + 2 == INTVAL (operands[3])"
9516 [(set (match_dup 0) (vec_duplicate:V2DF (match_dup 1)))]
9517 {
9518 operands[1] = adjust_address (operands[1], DFmode, INTVAL (operands[2]) * 8);
9519 })
9520
9521 (define_insn "avx512f_vmscalef<mode><mask_scalar_name><round_scalar_name>"
9522 [(set (match_operand:VF_128 0 "register_operand" "=v")
9523 (vec_merge:VF_128
9524 (unspec:VF_128
9525 [(match_operand:VF_128 1 "register_operand" "v")
9526 (match_operand:VF_128 2 "<round_scalar_nimm_predicate>" "<round_scalar_constraint>")]
9527 UNSPEC_SCALEF)
9528 (match_dup 1)
9529 (const_int 1)))]
9530 "TARGET_AVX512F"
9531 "vscalef<ssescalarmodesuffix>\t{<round_scalar_mask_op3>%2, %1, %0<mask_scalar_operand3>|%0<mask_scalar_operand3>, %1, %2<round_scalar_mask_op3>}"
9532 [(set_attr "prefix" "evex")
9533 (set_attr "mode" "<ssescalarmode>")])
9534
9535 (define_insn "<avx512>_scalef<mode><mask_name><round_name>"
9536 [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v")
9537 (unspec:VF_AVX512VL
9538 [(match_operand:VF_AVX512VL 1 "register_operand" "v")
9539 (match_operand:VF_AVX512VL 2 "nonimmediate_operand" "<round_constraint>")]
9540 UNSPEC_SCALEF))]
9541 "TARGET_AVX512F"
9542 "vscalef<ssemodesuffix>\t{<round_mask_op3>%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2<round_mask_op3>}"
9543 [(set_attr "prefix" "evex")
9544 (set_attr "mode" "<MODE>")])
9545
9546 (define_expand "<avx512>_vternlog<mode>_maskz"
9547 [(match_operand:VI48_AVX512VL 0 "register_operand")
9548 (match_operand:VI48_AVX512VL 1 "register_operand")
9549 (match_operand:VI48_AVX512VL 2 "register_operand")
9550 (match_operand:VI48_AVX512VL 3 "nonimmediate_operand")
9551 (match_operand:SI 4 "const_0_to_255_operand")
9552 (match_operand:<avx512fmaskmode> 5 "register_operand")]
9553 "TARGET_AVX512F"
9554 {
9555 emit_insn (gen_<avx512>_vternlog<mode>_maskz_1 (
9556 operands[0], operands[1], operands[2], operands[3],
9557 operands[4], CONST0_RTX (<MODE>mode), operands[5]));
9558 DONE;
9559 })
9560
9561 (define_insn "<avx512>_vternlog<mode><sd_maskz_name>"
9562 [(set (match_operand:VI48_AVX512VL 0 "register_operand" "=v")
9563 (unspec:VI48_AVX512VL
9564 [(match_operand:VI48_AVX512VL 1 "register_operand" "0")
9565 (match_operand:VI48_AVX512VL 2 "register_operand" "v")
9566 (match_operand:VI48_AVX512VL 3 "nonimmediate_operand" "vm")
9567 (match_operand:SI 4 "const_0_to_255_operand")]
9568 UNSPEC_VTERNLOG))]
9569 "TARGET_AVX512F"
9570 "vpternlog<ssemodesuffix>\t{%4, %3, %2, %0<sd_mask_op5>|%0<sd_mask_op5>, %2, %3, %4}"
9571 [(set_attr "type" "sselog")
9572 (set_attr "prefix" "evex")
9573 (set_attr "mode" "<sseinsnmode>")])
9574
9575 (define_insn "<avx512>_vternlog<mode>_mask"
9576 [(set (match_operand:VI48_AVX512VL 0 "register_operand" "=v")
9577 (vec_merge:VI48_AVX512VL
9578 (unspec:VI48_AVX512VL
9579 [(match_operand:VI48_AVX512VL 1 "register_operand" "0")
9580 (match_operand:VI48_AVX512VL 2 "register_operand" "v")
9581 (match_operand:VI48_AVX512VL 3 "nonimmediate_operand" "vm")
9582 (match_operand:SI 4 "const_0_to_255_operand")]
9583 UNSPEC_VTERNLOG)
9584 (match_dup 1)
9585 (match_operand:<avx512fmaskmode> 5 "register_operand" "Yk")))]
9586 "TARGET_AVX512F"
9587 "vpternlog<ssemodesuffix>\t{%4, %3, %2, %0%{%5%}|%0%{%5%}, %2, %3, %4}"
9588 [(set_attr "type" "sselog")
9589 (set_attr "prefix" "evex")
9590 (set_attr "mode" "<sseinsnmode>")])
9591
9592 (define_insn "<avx512>_getexp<mode><mask_name><round_saeonly_name>"
9593 [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v")
9594 (unspec:VF_AVX512VL [(match_operand:VF_AVX512VL 1 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")]
9595 UNSPEC_GETEXP))]
9596 "TARGET_AVX512F"
9597 "vgetexp<ssemodesuffix>\t{<round_saeonly_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_saeonly_mask_op2>}";
9598 [(set_attr "prefix" "evex")
9599 (set_attr "mode" "<MODE>")])
9600
9601 (define_insn "avx512f_sgetexp<mode><mask_scalar_name><round_saeonly_scalar_name>"
9602 [(set (match_operand:VF_128 0 "register_operand" "=v")
9603 (vec_merge:VF_128
9604 (unspec:VF_128
9605 [(match_operand:VF_128 1 "register_operand" "v")
9606 (match_operand:VF_128 2 "<round_saeonly_scalar_nimm_predicate>" "<round_saeonly_scalar_constraint>")]
9607 UNSPEC_GETEXP)
9608 (match_dup 1)
9609 (const_int 1)))]
9610 "TARGET_AVX512F"
9611 "vgetexp<ssescalarmodesuffix>\t{<round_saeonly_scalar_mask_op3>%2, %1, %0<mask_scalar_operand3>|%0<mask_scalar_operand3>, %1, %<iptr>2<round_saeonly_scalar_mask_op3>}";
9612 [(set_attr "prefix" "evex")
9613 (set_attr "mode" "<ssescalarmode>")])
9614
9615 (define_insn "<mask_codefor><avx512>_align<mode><mask_name>"
9616 [(set (match_operand:VI48_AVX512VL 0 "register_operand" "=v")
9617 (unspec:VI48_AVX512VL [(match_operand:VI48_AVX512VL 1 "register_operand" "v")
9618 (match_operand:VI48_AVX512VL 2 "nonimmediate_operand" "vm")
9619 (match_operand:SI 3 "const_0_to_255_operand")]
9620 UNSPEC_ALIGN))]
9621 "TARGET_AVX512F"
9622 "valign<ssemodesuffix>\t{%3, %2, %1, %0<mask_operand4>|%0<mask_operand4>, %1, %2, %3}";
9623 [(set_attr "prefix" "evex")
9624 (set_attr "mode" "<sseinsnmode>")])
9625
9626 (define_expand "avx512f_shufps512_mask"
9627 [(match_operand:V16SF 0 "register_operand")
9628 (match_operand:V16SF 1 "register_operand")
9629 (match_operand:V16SF 2 "nonimmediate_operand")
9630 (match_operand:SI 3 "const_0_to_255_operand")
9631 (match_operand:V16SF 4 "register_operand")
9632 (match_operand:HI 5 "register_operand")]
9633 "TARGET_AVX512F"
9634 {
9635 int mask = INTVAL (operands[3]);
9636 emit_insn (gen_avx512f_shufps512_1_mask (operands[0], operands[1], operands[2],
9637 GEN_INT ((mask >> 0) & 3),
9638 GEN_INT ((mask >> 2) & 3),
9639 GEN_INT (((mask >> 4) & 3) + 16),
9640 GEN_INT (((mask >> 6) & 3) + 16),
9641 GEN_INT (((mask >> 0) & 3) + 4),
9642 GEN_INT (((mask >> 2) & 3) + 4),
9643 GEN_INT (((mask >> 4) & 3) + 20),
9644 GEN_INT (((mask >> 6) & 3) + 20),
9645 GEN_INT (((mask >> 0) & 3) + 8),
9646 GEN_INT (((mask >> 2) & 3) + 8),
9647 GEN_INT (((mask >> 4) & 3) + 24),
9648 GEN_INT (((mask >> 6) & 3) + 24),
9649 GEN_INT (((mask >> 0) & 3) + 12),
9650 GEN_INT (((mask >> 2) & 3) + 12),
9651 GEN_INT (((mask >> 4) & 3) + 28),
9652 GEN_INT (((mask >> 6) & 3) + 28),
9653 operands[4], operands[5]));
9654 DONE;
9655 })
9656
9657
9658 (define_expand "<avx512>_fixupimm<mode>_maskz<round_saeonly_expand_name>"
9659 [(match_operand:VF_AVX512VL 0 "register_operand")
9660 (match_operand:VF_AVX512VL 1 "register_operand")
9661 (match_operand:VF_AVX512VL 2 "register_operand")
9662 (match_operand:<sseintvecmode> 3 "<round_saeonly_expand_nimm_predicate>")
9663 (match_operand:SI 4 "const_0_to_255_operand")
9664 (match_operand:<avx512fmaskmode> 5 "register_operand")]
9665 "TARGET_AVX512F"
9666 {
9667 emit_insn (gen_<avx512>_fixupimm<mode>_maskz_1<round_saeonly_expand_name> (
9668 operands[0], operands[1], operands[2], operands[3],
9669 operands[4], CONST0_RTX (<MODE>mode), operands[5]
9670 <round_saeonly_expand_operand6>));
9671 DONE;
9672 })
9673
9674 (define_insn "<avx512>_fixupimm<mode><sd_maskz_name><round_saeonly_name>"
9675 [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v")
9676 (unspec:VF_AVX512VL
9677 [(match_operand:VF_AVX512VL 1 "register_operand" "0")
9678 (match_operand:VF_AVX512VL 2 "register_operand" "v")
9679 (match_operand:<sseintvecmode> 3 "nonimmediate_operand" "<round_saeonly_constraint>")
9680 (match_operand:SI 4 "const_0_to_255_operand")]
9681 UNSPEC_FIXUPIMM))]
9682 "TARGET_AVX512F"
9683 "vfixupimm<ssemodesuffix>\t{%4, <round_saeonly_sd_mask_op5>%3, %2, %0<sd_mask_op5>|%0<sd_mask_op5>, %2, %3<round_saeonly_sd_mask_op5>, %4}";
9684 [(set_attr "prefix" "evex")
9685 (set_attr "mode" "<MODE>")])
9686
9687 (define_insn "<avx512>_fixupimm<mode>_mask<round_saeonly_name>"
9688 [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v")
9689 (vec_merge:VF_AVX512VL
9690 (unspec:VF_AVX512VL
9691 [(match_operand:VF_AVX512VL 1 "register_operand" "0")
9692 (match_operand:VF_AVX512VL 2 "register_operand" "v")
9693 (match_operand:<sseintvecmode> 3 "nonimmediate_operand" "<round_saeonly_constraint>")
9694 (match_operand:SI 4 "const_0_to_255_operand")]
9695 UNSPEC_FIXUPIMM)
9696 (match_dup 1)
9697 (match_operand:<avx512fmaskmode> 5 "register_operand" "Yk")))]
9698 "TARGET_AVX512F"
9699 "vfixupimm<ssemodesuffix>\t{%4, <round_saeonly_op6>%3, %2, %0%{%5%}|%0%{%5%}, %2, %3<round_saeonly_op6>, %4}";
9700 [(set_attr "prefix" "evex")
9701 (set_attr "mode" "<MODE>")])
9702
9703 (define_expand "avx512f_sfixupimm<mode>_maskz<round_saeonly_expand_name>"
9704 [(match_operand:VF_128 0 "register_operand")
9705 (match_operand:VF_128 1 "register_operand")
9706 (match_operand:VF_128 2 "register_operand")
9707 (match_operand:<sseintvecmode> 3 "<round_saeonly_expand_nimm_predicate>")
9708 (match_operand:SI 4 "const_0_to_255_operand")
9709 (match_operand:<avx512fmaskmode> 5 "register_operand")]
9710 "TARGET_AVX512F"
9711 {
9712 emit_insn (gen_avx512f_sfixupimm<mode>_maskz_1<round_saeonly_expand_name> (
9713 operands[0], operands[1], operands[2], operands[3],
9714 operands[4], CONST0_RTX (<MODE>mode), operands[5]
9715 <round_saeonly_expand_operand6>));
9716 DONE;
9717 })
9718
9719 (define_insn "avx512f_sfixupimm<mode><sd_maskz_name><round_saeonly_name>"
9720 [(set (match_operand:VF_128 0 "register_operand" "=v")
9721 (vec_merge:VF_128
9722 (unspec:VF_128
9723 [(match_operand:VF_128 1 "register_operand" "0")
9724 (match_operand:VF_128 2 "register_operand" "v")
9725 (match_operand:<sseintvecmode> 3 "<round_saeonly_nimm_scalar_predicate>" "<round_saeonly_constraint>")
9726 (match_operand:SI 4 "const_0_to_255_operand")]
9727 UNSPEC_FIXUPIMM)
9728 (match_dup 1)
9729 (const_int 1)))]
9730 "TARGET_AVX512F"
9731 "vfixupimm<ssescalarmodesuffix>\t{%4, <round_saeonly_sd_mask_op5>%3, %2, %0<sd_mask_op5>|%0<sd_mask_op5>, %2, %<iptr>3<round_saeonly_sd_mask_op5>, %4}";
9732 [(set_attr "prefix" "evex")
9733 (set_attr "mode" "<ssescalarmode>")])
9734
9735 (define_insn "avx512f_sfixupimm<mode>_mask<round_saeonly_name>"
9736 [(set (match_operand:VF_128 0 "register_operand" "=v")
9737 (vec_merge:VF_128
9738 (vec_merge:VF_128
9739 (unspec:VF_128
9740 [(match_operand:VF_128 1 "register_operand" "0")
9741 (match_operand:VF_128 2 "register_operand" "v")
9742 (match_operand:<sseintvecmode> 3 "<round_saeonly_nimm_scalar_predicate>" "<round_saeonly_constraint>")
9743 (match_operand:SI 4 "const_0_to_255_operand")]
9744 UNSPEC_FIXUPIMM)
9745 (match_dup 1)
9746 (const_int 1))
9747 (match_dup 1)
9748 (match_operand:<avx512fmaskmode> 5 "register_operand" "Yk")))]
9749 "TARGET_AVX512F"
9750 "vfixupimm<ssescalarmodesuffix>\t{%4, <round_saeonly_op6>%3, %2, %0%{%5%}|%0%{%5%}, %2, %<iptr>3<round_saeonly_op6>, %4}";
9751 [(set_attr "prefix" "evex")
9752 (set_attr "mode" "<ssescalarmode>")])
9753
9754 (define_insn "<avx512>_rndscale<mode><mask_name><round_saeonly_name>"
9755 [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v")
9756 (unspec:VF_AVX512VL
9757 [(match_operand:VF_AVX512VL 1 "nonimmediate_operand" "<round_saeonly_constraint>")
9758 (match_operand:SI 2 "const_0_to_255_operand")]
9759 UNSPEC_ROUND))]
9760 "TARGET_AVX512F"
9761 "vrndscale<ssemodesuffix>\t{%2, <round_saeonly_mask_op3>%1, %0<mask_operand3>|%0<mask_operand3>, %1<round_saeonly_mask_op3>, %2}"
9762 [(set_attr "length_immediate" "1")
9763 (set_attr "prefix" "evex")
9764 (set_attr "mode" "<MODE>")])
9765
9766 (define_insn "avx512f_rndscale<mode><mask_scalar_name><round_saeonly_scalar_name>"
9767 [(set (match_operand:VF_128 0 "register_operand" "=v")
9768 (vec_merge:VF_128
9769 (unspec:VF_128
9770 [(match_operand:VF_128 2 "<round_saeonly_scalar_nimm_predicate>" "<round_saeonly_scalar_constraint>")
9771 (match_operand:SI 3 "const_0_to_255_operand")]
9772 UNSPEC_ROUND)
9773 (match_operand:VF_128 1 "register_operand" "v")
9774 (const_int 1)))]
9775 "TARGET_AVX512F"
9776 "vrndscale<ssescalarmodesuffix>\t{%3, <round_saeonly_scalar_mask_op4>%2, %1, %0<mask_scalar_operand4>|%0<mask_scalar_operand4>, %1, %<iptr>2<round_saeonly_scalar_mask_op4>, %3}"
9777 [(set_attr "length_immediate" "1")
9778 (set_attr "prefix" "evex")
9779 (set_attr "mode" "<MODE>")])
9780
9781 (define_insn "*avx512f_rndscale<mode><round_saeonly_name>"
9782 [(set (match_operand:VF_128 0 "register_operand" "=v")
9783 (vec_merge:VF_128
9784 (vec_duplicate:VF_128
9785 (unspec:<ssescalarmode>
9786 [(match_operand:<ssescalarmode> 2 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")
9787 (match_operand:SI 3 "const_0_to_255_operand")]
9788 UNSPEC_ROUND))
9789 (match_operand:VF_128 1 "register_operand" "v")
9790 (const_int 1)))]
9791 "TARGET_AVX512F"
9792 "vrndscale<ssescalarmodesuffix>\t{%3, <round_saeonly_op4>%2, %1, %0|%0, %1, %2<round_saeonly_op4>, %3}"
9793 [(set_attr "length_immediate" "1")
9794 (set_attr "prefix" "evex")
9795 (set_attr "mode" "<MODE>")])
9796
9797 ;; One bit in mask selects 2 elements.
9798 (define_insn "avx512f_shufps512_1<mask_name>"
9799 [(set (match_operand:V16SF 0 "register_operand" "=v")
9800 (vec_select:V16SF
9801 (vec_concat:V32SF
9802 (match_operand:V16SF 1 "register_operand" "v")
9803 (match_operand:V16SF 2 "nonimmediate_operand" "vm"))
9804 (parallel [(match_operand 3 "const_0_to_3_operand")
9805 (match_operand 4 "const_0_to_3_operand")
9806 (match_operand 5 "const_16_to_19_operand")
9807 (match_operand 6 "const_16_to_19_operand")
9808 (match_operand 7 "const_4_to_7_operand")
9809 (match_operand 8 "const_4_to_7_operand")
9810 (match_operand 9 "const_20_to_23_operand")
9811 (match_operand 10 "const_20_to_23_operand")
9812 (match_operand 11 "const_8_to_11_operand")
9813 (match_operand 12 "const_8_to_11_operand")
9814 (match_operand 13 "const_24_to_27_operand")
9815 (match_operand 14 "const_24_to_27_operand")
9816 (match_operand 15 "const_12_to_15_operand")
9817 (match_operand 16 "const_12_to_15_operand")
9818 (match_operand 17 "const_28_to_31_operand")
9819 (match_operand 18 "const_28_to_31_operand")])))]
9820 "TARGET_AVX512F
9821 && (INTVAL (operands[3]) == (INTVAL (operands[7]) - 4)
9822 && INTVAL (operands[4]) == (INTVAL (operands[8]) - 4)
9823 && INTVAL (operands[5]) == (INTVAL (operands[9]) - 4)
9824 && INTVAL (operands[6]) == (INTVAL (operands[10]) - 4)
9825 && INTVAL (operands[3]) == (INTVAL (operands[11]) - 8)
9826 && INTVAL (operands[4]) == (INTVAL (operands[12]) - 8)
9827 && INTVAL (operands[5]) == (INTVAL (operands[13]) - 8)
9828 && INTVAL (operands[6]) == (INTVAL (operands[14]) - 8)
9829 && INTVAL (operands[3]) == (INTVAL (operands[15]) - 12)
9830 && INTVAL (operands[4]) == (INTVAL (operands[16]) - 12)
9831 && INTVAL (operands[5]) == (INTVAL (operands[17]) - 12)
9832 && INTVAL (operands[6]) == (INTVAL (operands[18]) - 12))"
9833 {
9834 int mask;
9835 mask = INTVAL (operands[3]);
9836 mask |= INTVAL (operands[4]) << 2;
9837 mask |= (INTVAL (operands[5]) - 16) << 4;
9838 mask |= (INTVAL (operands[6]) - 16) << 6;
9839 operands[3] = GEN_INT (mask);
9840
9841 return "vshufps\t{%3, %2, %1, %0<mask_operand19>|%0<mask_operand19>, %1, %2, %3}";
9842 }
9843 [(set_attr "type" "sselog")
9844 (set_attr "length_immediate" "1")
9845 (set_attr "prefix" "evex")
9846 (set_attr "mode" "V16SF")])
9847
9848 (define_expand "avx512f_shufpd512_mask"
9849 [(match_operand:V8DF 0 "register_operand")
9850 (match_operand:V8DF 1 "register_operand")
9851 (match_operand:V8DF 2 "nonimmediate_operand")
9852 (match_operand:SI 3 "const_0_to_255_operand")
9853 (match_operand:V8DF 4 "register_operand")
9854 (match_operand:QI 5 "register_operand")]
9855 "TARGET_AVX512F"
9856 {
9857 int mask = INTVAL (operands[3]);
9858 emit_insn (gen_avx512f_shufpd512_1_mask (operands[0], operands[1], operands[2],
9859 GEN_INT (mask & 1),
9860 GEN_INT (mask & 2 ? 9 : 8),
9861 GEN_INT (mask & 4 ? 3 : 2),
9862 GEN_INT (mask & 8 ? 11 : 10),
9863 GEN_INT (mask & 16 ? 5 : 4),
9864 GEN_INT (mask & 32 ? 13 : 12),
9865 GEN_INT (mask & 64 ? 7 : 6),
9866 GEN_INT (mask & 128 ? 15 : 14),
9867 operands[4], operands[5]));
9868 DONE;
9869 })
9870
9871 (define_insn "avx512f_shufpd512_1<mask_name>"
9872 [(set (match_operand:V8DF 0 "register_operand" "=v")
9873 (vec_select:V8DF
9874 (vec_concat:V16DF
9875 (match_operand:V8DF 1 "register_operand" "v")
9876 (match_operand:V8DF 2 "nonimmediate_operand" "vm"))
9877 (parallel [(match_operand 3 "const_0_to_1_operand")
9878 (match_operand 4 "const_8_to_9_operand")
9879 (match_operand 5 "const_2_to_3_operand")
9880 (match_operand 6 "const_10_to_11_operand")
9881 (match_operand 7 "const_4_to_5_operand")
9882 (match_operand 8 "const_12_to_13_operand")
9883 (match_operand 9 "const_6_to_7_operand")
9884 (match_operand 10 "const_14_to_15_operand")])))]
9885 "TARGET_AVX512F"
9886 {
9887 int mask;
9888 mask = INTVAL (operands[3]);
9889 mask |= (INTVAL (operands[4]) - 8) << 1;
9890 mask |= (INTVAL (operands[5]) - 2) << 2;
9891 mask |= (INTVAL (operands[6]) - 10) << 3;
9892 mask |= (INTVAL (operands[7]) - 4) << 4;
9893 mask |= (INTVAL (operands[8]) - 12) << 5;
9894 mask |= (INTVAL (operands[9]) - 6) << 6;
9895 mask |= (INTVAL (operands[10]) - 14) << 7;
9896 operands[3] = GEN_INT (mask);
9897
9898 return "vshufpd\t{%3, %2, %1, %0<mask_operand11>|%0<mask_operand11>, %1, %2, %3}";
9899 }
9900 [(set_attr "type" "sselog")
9901 (set_attr "length_immediate" "1")
9902 (set_attr "prefix" "evex")
9903 (set_attr "mode" "V8DF")])
9904
9905 (define_expand "avx_shufpd256<mask_expand4_name>"
9906 [(match_operand:V4DF 0 "register_operand")
9907 (match_operand:V4DF 1 "register_operand")
9908 (match_operand:V4DF 2 "nonimmediate_operand")
9909 (match_operand:SI 3 "const_int_operand")]
9910 "TARGET_AVX"
9911 {
9912 int mask = INTVAL (operands[3]);
9913 emit_insn (gen_avx_shufpd256_1<mask_expand4_name> (operands[0],
9914 operands[1],
9915 operands[2],
9916 GEN_INT (mask & 1),
9917 GEN_INT (mask & 2 ? 5 : 4),
9918 GEN_INT (mask & 4 ? 3 : 2),
9919 GEN_INT (mask & 8 ? 7 : 6)
9920 <mask_expand4_args>));
9921 DONE;
9922 })
9923
9924 (define_insn "avx_shufpd256_1<mask_name>"
9925 [(set (match_operand:V4DF 0 "register_operand" "=v")
9926 (vec_select:V4DF
9927 (vec_concat:V8DF
9928 (match_operand:V4DF 1 "register_operand" "v")
9929 (match_operand:V4DF 2 "nonimmediate_operand" "vm"))
9930 (parallel [(match_operand 3 "const_0_to_1_operand")
9931 (match_operand 4 "const_4_to_5_operand")
9932 (match_operand 5 "const_2_to_3_operand")
9933 (match_operand 6 "const_6_to_7_operand")])))]
9934 "TARGET_AVX && <mask_avx512vl_condition>"
9935 {
9936 int mask;
9937 mask = INTVAL (operands[3]);
9938 mask |= (INTVAL (operands[4]) - 4) << 1;
9939 mask |= (INTVAL (operands[5]) - 2) << 2;
9940 mask |= (INTVAL (operands[6]) - 6) << 3;
9941 operands[3] = GEN_INT (mask);
9942
9943 return "vshufpd\t{%3, %2, %1, %0<mask_operand7>|%0<mask_operand7>, %1, %2, %3}";
9944 }
9945 [(set_attr "type" "sseshuf")
9946 (set_attr "length_immediate" "1")
9947 (set_attr "prefix" "vex")
9948 (set_attr "mode" "V4DF")])
9949
9950 (define_expand "sse2_shufpd<mask_expand4_name>"
9951 [(match_operand:V2DF 0 "register_operand")
9952 (match_operand:V2DF 1 "register_operand")
9953 (match_operand:V2DF 2 "vector_operand")
9954 (match_operand:SI 3 "const_int_operand")]
9955 "TARGET_SSE2"
9956 {
9957 int mask = INTVAL (operands[3]);
9958 emit_insn (gen_sse2_shufpd_v2df<mask_expand4_name> (operands[0], operands[1],
9959 operands[2], GEN_INT (mask & 1),
9960 GEN_INT (mask & 2 ? 3 : 2)
9961 <mask_expand4_args>));
9962 DONE;
9963 })
9964
9965 (define_insn "sse2_shufpd_v2df_mask"
9966 [(set (match_operand:V2DF 0 "register_operand" "=v")
9967 (vec_merge:V2DF
9968 (vec_select:V2DF
9969 (vec_concat:V4DF
9970 (match_operand:V2DF 1 "register_operand" "v")
9971 (match_operand:V2DF 2 "nonimmediate_operand" "vm"))
9972 (parallel [(match_operand 3 "const_0_to_1_operand")
9973 (match_operand 4 "const_2_to_3_operand")]))
9974 (match_operand:V2DF 5 "nonimm_or_0_operand" "0C")
9975 (match_operand:QI 6 "register_operand" "Yk")))]
9976 "TARGET_AVX512VL"
9977 {
9978 int mask;
9979 mask = INTVAL (operands[3]);
9980 mask |= (INTVAL (operands[4]) - 2) << 1;
9981 operands[3] = GEN_INT (mask);
9982
9983 return "vshufpd\t{%3, %2, %1, %0%{%6%}%N5|%0%{%6%}%N5, %1, %2, %3}";
9984 }
9985 [(set_attr "type" "sseshuf")
9986 (set_attr "length_immediate" "1")
9987 (set_attr "prefix" "evex")
9988 (set_attr "mode" "V2DF")])
9989
9990 ;; punpcklqdq and punpckhqdq are shorter than shufpd.
9991 (define_insn "avx2_interleave_highv4di<mask_name>"
9992 [(set (match_operand:V4DI 0 "register_operand" "=v")
9993 (vec_select:V4DI
9994 (vec_concat:V8DI
9995 (match_operand:V4DI 1 "register_operand" "v")
9996 (match_operand:V4DI 2 "nonimmediate_operand" "vm"))
9997 (parallel [(const_int 1)
9998 (const_int 5)
9999 (const_int 3)
10000 (const_int 7)])))]
10001 "TARGET_AVX2 && <mask_avx512vl_condition>"
10002 "vpunpckhqdq\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
10003 [(set_attr "type" "sselog")
10004 (set_attr "prefix" "vex")
10005 (set_attr "mode" "OI")])
10006
10007 (define_insn "<mask_codefor>avx512f_interleave_highv8di<mask_name>"
10008 [(set (match_operand:V8DI 0 "register_operand" "=v")
10009 (vec_select:V8DI
10010 (vec_concat:V16DI
10011 (match_operand:V8DI 1 "register_operand" "v")
10012 (match_operand:V8DI 2 "nonimmediate_operand" "vm"))
10013 (parallel [(const_int 1) (const_int 9)
10014 (const_int 3) (const_int 11)
10015 (const_int 5) (const_int 13)
10016 (const_int 7) (const_int 15)])))]
10017 "TARGET_AVX512F"
10018 "vpunpckhqdq\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
10019 [(set_attr "type" "sselog")
10020 (set_attr "prefix" "evex")
10021 (set_attr "mode" "XI")])
10022
10023 (define_insn "vec_interleave_highv2di<mask_name>"
10024 [(set (match_operand:V2DI 0 "register_operand" "=x,v")
10025 (vec_select:V2DI
10026 (vec_concat:V4DI
10027 (match_operand:V2DI 1 "register_operand" "0,v")
10028 (match_operand:V2DI 2 "vector_operand" "xBm,vm"))
10029 (parallel [(const_int 1)
10030 (const_int 3)])))]
10031 "TARGET_SSE2 && <mask_avx512vl_condition>"
10032 "@
10033 punpckhqdq\t{%2, %0|%0, %2}
10034 vpunpckhqdq\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
10035 [(set_attr "isa" "noavx,avx")
10036 (set_attr "type" "sselog")
10037 (set_attr "prefix_data16" "1,*")
10038 (set_attr "prefix" "orig,<mask_prefix>")
10039 (set_attr "mode" "TI")])
10040
10041 (define_insn "avx2_interleave_lowv4di<mask_name>"
10042 [(set (match_operand:V4DI 0 "register_operand" "=v")
10043 (vec_select:V4DI
10044 (vec_concat:V8DI
10045 (match_operand:V4DI 1 "register_operand" "v")
10046 (match_operand:V4DI 2 "nonimmediate_operand" "vm"))
10047 (parallel [(const_int 0)
10048 (const_int 4)
10049 (const_int 2)
10050 (const_int 6)])))]
10051 "TARGET_AVX2 && <mask_avx512vl_condition>"
10052 "vpunpcklqdq\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
10053 [(set_attr "type" "sselog")
10054 (set_attr "prefix" "vex")
10055 (set_attr "mode" "OI")])
10056
10057 (define_insn "<mask_codefor>avx512f_interleave_lowv8di<mask_name>"
10058 [(set (match_operand:V8DI 0 "register_operand" "=v")
10059 (vec_select:V8DI
10060 (vec_concat:V16DI
10061 (match_operand:V8DI 1 "register_operand" "v")
10062 (match_operand:V8DI 2 "nonimmediate_operand" "vm"))
10063 (parallel [(const_int 0) (const_int 8)
10064 (const_int 2) (const_int 10)
10065 (const_int 4) (const_int 12)
10066 (const_int 6) (const_int 14)])))]
10067 "TARGET_AVX512F"
10068 "vpunpcklqdq\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
10069 [(set_attr "type" "sselog")
10070 (set_attr "prefix" "evex")
10071 (set_attr "mode" "XI")])
10072
10073 (define_insn "vec_interleave_lowv2di<mask_name>"
10074 [(set (match_operand:V2DI 0 "register_operand" "=x,v")
10075 (vec_select:V2DI
10076 (vec_concat:V4DI
10077 (match_operand:V2DI 1 "register_operand" "0,v")
10078 (match_operand:V2DI 2 "vector_operand" "xBm,vm"))
10079 (parallel [(const_int 0)
10080 (const_int 2)])))]
10081 "TARGET_SSE2 && <mask_avx512vl_condition>"
10082 "@
10083 punpcklqdq\t{%2, %0|%0, %2}
10084 vpunpcklqdq\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
10085 [(set_attr "isa" "noavx,avx")
10086 (set_attr "type" "sselog")
10087 (set_attr "prefix_data16" "1,*")
10088 (set_attr "prefix" "orig,vex")
10089 (set_attr "mode" "TI")])
10090
10091 (define_insn "sse2_shufpd_<mode>"
10092 [(set (match_operand:VI8F_128 0 "register_operand" "=x,v")
10093 (vec_select:VI8F_128
10094 (vec_concat:<ssedoublevecmode>
10095 (match_operand:VI8F_128 1 "register_operand" "0,v")
10096 (match_operand:VI8F_128 2 "vector_operand" "xBm,vm"))
10097 (parallel [(match_operand 3 "const_0_to_1_operand")
10098 (match_operand 4 "const_2_to_3_operand")])))]
10099 "TARGET_SSE2"
10100 {
10101 int mask;
10102 mask = INTVAL (operands[3]);
10103 mask |= (INTVAL (operands[4]) - 2) << 1;
10104 operands[3] = GEN_INT (mask);
10105
10106 switch (which_alternative)
10107 {
10108 case 0:
10109 return "shufpd\t{%3, %2, %0|%0, %2, %3}";
10110 case 1:
10111 return "vshufpd\t{%3, %2, %1, %0|%0, %1, %2, %3}";
10112 default:
10113 gcc_unreachable ();
10114 }
10115 }
10116 [(set_attr "isa" "noavx,avx")
10117 (set_attr "type" "sseshuf")
10118 (set_attr "length_immediate" "1")
10119 (set_attr "prefix" "orig,maybe_evex")
10120 (set_attr "mode" "V2DF")])
10121
10122 ;; Avoid combining registers from different units in a single alternative,
10123 ;; see comment above inline_secondary_memory_needed function in i386.c
10124 (define_insn "sse2_storehpd"
10125 [(set (match_operand:DF 0 "nonimmediate_operand" "=m,x,Yv,x,*f,r")
10126 (vec_select:DF
10127 (match_operand:V2DF 1 "nonimmediate_operand" " v,0, v,o,o,o")
10128 (parallel [(const_int 1)])))]
10129 "TARGET_SSE2 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
10130 "@
10131 %vmovhpd\t{%1, %0|%0, %1}
10132 unpckhpd\t%0, %0
10133 vunpckhpd\t{%d1, %0|%0, %d1}
10134 #
10135 #
10136 #"
10137 [(set_attr "isa" "*,noavx,avx,*,*,*")
10138 (set_attr "type" "ssemov,sselog1,sselog1,ssemov,fmov,imov")
10139 (set (attr "prefix_data16")
10140 (if_then_else
10141 (and (eq_attr "alternative" "0")
10142 (not (match_test "TARGET_AVX")))
10143 (const_string "1")
10144 (const_string "*")))
10145 (set_attr "prefix" "maybe_vex,orig,maybe_evex,*,*,*")
10146 (set_attr "mode" "V1DF,V1DF,V2DF,DF,DF,DF")])
10147
10148 (define_split
10149 [(set (match_operand:DF 0 "register_operand")
10150 (vec_select:DF
10151 (match_operand:V2DF 1 "memory_operand")
10152 (parallel [(const_int 1)])))]
10153 "TARGET_SSE2 && reload_completed"
10154 [(set (match_dup 0) (match_dup 1))]
10155 "operands[1] = adjust_address (operands[1], DFmode, 8);")
10156
10157 (define_insn "*vec_extractv2df_1_sse"
10158 [(set (match_operand:DF 0 "nonimmediate_operand" "=m,x,x")
10159 (vec_select:DF
10160 (match_operand:V2DF 1 "nonimmediate_operand" "x,x,o")
10161 (parallel [(const_int 1)])))]
10162 "!TARGET_SSE2 && TARGET_SSE
10163 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
10164 "@
10165 movhps\t{%1, %0|%0, %1}
10166 movhlps\t{%1, %0|%0, %1}
10167 movlps\t{%H1, %0|%0, %H1}"
10168 [(set_attr "type" "ssemov")
10169 (set_attr "mode" "V2SF,V4SF,V2SF")])
10170
10171 ;; Avoid combining registers from different units in a single alternative,
10172 ;; see comment above inline_secondary_memory_needed function in i386.c
10173 (define_insn "sse2_storelpd"
10174 [(set (match_operand:DF 0 "nonimmediate_operand" "=m,x,x,*f,r")
10175 (vec_select:DF
10176 (match_operand:V2DF 1 "nonimmediate_operand" " v,x,m,m,m")
10177 (parallel [(const_int 0)])))]
10178 "TARGET_SSE2 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
10179 "@
10180 %vmovlpd\t{%1, %0|%0, %1}
10181 #
10182 #
10183 #
10184 #"
10185 [(set_attr "type" "ssemov,ssemov,ssemov,fmov,imov")
10186 (set (attr "prefix_data16")
10187 (if_then_else (eq_attr "alternative" "0")
10188 (const_string "1")
10189 (const_string "*")))
10190 (set_attr "prefix" "maybe_vex")
10191 (set_attr "mode" "V1DF,DF,DF,DF,DF")])
10192
10193 (define_split
10194 [(set (match_operand:DF 0 "register_operand")
10195 (vec_select:DF
10196 (match_operand:V2DF 1 "nonimmediate_operand")
10197 (parallel [(const_int 0)])))]
10198 "TARGET_SSE2 && reload_completed"
10199 [(set (match_dup 0) (match_dup 1))]
10200 "operands[1] = gen_lowpart (DFmode, operands[1]);")
10201
10202 (define_insn "*vec_extractv2df_0_sse"
10203 [(set (match_operand:DF 0 "nonimmediate_operand" "=m,x,x")
10204 (vec_select:DF
10205 (match_operand:V2DF 1 "nonimmediate_operand" "x,x,m")
10206 (parallel [(const_int 0)])))]
10207 "!TARGET_SSE2 && TARGET_SSE
10208 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
10209 "@
10210 movlps\t{%1, %0|%0, %1}
10211 movaps\t{%1, %0|%0, %1}
10212 movlps\t{%1, %0|%0, %q1}"
10213 [(set_attr "type" "ssemov")
10214 (set_attr "mode" "V2SF,V4SF,V2SF")])
10215
10216 (define_expand "sse2_loadhpd_exp"
10217 [(set (match_operand:V2DF 0 "nonimmediate_operand")
10218 (vec_concat:V2DF
10219 (vec_select:DF
10220 (match_operand:V2DF 1 "nonimmediate_operand")
10221 (parallel [(const_int 0)]))
10222 (match_operand:DF 2 "nonimmediate_operand")))]
10223 "TARGET_SSE2"
10224 {
10225 rtx dst = ix86_fixup_binary_operands (UNKNOWN, V2DFmode, operands);
10226
10227 emit_insn (gen_sse2_loadhpd (dst, operands[1], operands[2]));
10228
10229 /* Fix up the destination if needed. */
10230 if (dst != operands[0])
10231 emit_move_insn (operands[0], dst);
10232
10233 DONE;
10234 })
10235
10236 ;; Avoid combining registers from different units in a single alternative,
10237 ;; see comment above inline_secondary_memory_needed function in i386.c
10238 (define_insn "sse2_loadhpd"
10239 [(set (match_operand:V2DF 0 "nonimmediate_operand"
10240 "=x,v,x,v ,o,o ,o")
10241 (vec_concat:V2DF
10242 (vec_select:DF
10243 (match_operand:V2DF 1 "nonimmediate_operand"
10244 " 0,v,0,v ,0,0 ,0")
10245 (parallel [(const_int 0)]))
10246 (match_operand:DF 2 "nonimmediate_operand"
10247 " m,m,x,Yv,x,*f,r")))]
10248 "TARGET_SSE2 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
10249 "@
10250 movhpd\t{%2, %0|%0, %2}
10251 vmovhpd\t{%2, %1, %0|%0, %1, %2}
10252 unpcklpd\t{%2, %0|%0, %2}
10253 vunpcklpd\t{%2, %1, %0|%0, %1, %2}
10254 #
10255 #
10256 #"
10257 [(set_attr "isa" "noavx,avx,noavx,avx,*,*,*")
10258 (set_attr "type" "ssemov,ssemov,sselog,sselog,ssemov,fmov,imov")
10259 (set (attr "prefix_data16")
10260 (if_then_else (eq_attr "alternative" "0")
10261 (const_string "1")
10262 (const_string "*")))
10263 (set_attr "prefix" "orig,maybe_evex,orig,maybe_evex,*,*,*")
10264 (set_attr "mode" "V1DF,V1DF,V2DF,V2DF,DF,DF,DF")])
10265
10266 (define_split
10267 [(set (match_operand:V2DF 0 "memory_operand")
10268 (vec_concat:V2DF
10269 (vec_select:DF (match_dup 0) (parallel [(const_int 0)]))
10270 (match_operand:DF 1 "register_operand")))]
10271 "TARGET_SSE2 && reload_completed"
10272 [(set (match_dup 0) (match_dup 1))]
10273 "operands[0] = adjust_address (operands[0], DFmode, 8);")
10274
10275 (define_expand "sse2_loadlpd_exp"
10276 [(set (match_operand:V2DF 0 "nonimmediate_operand")
10277 (vec_concat:V2DF
10278 (match_operand:DF 2 "nonimmediate_operand")
10279 (vec_select:DF
10280 (match_operand:V2DF 1 "nonimmediate_operand")
10281 (parallel [(const_int 1)]))))]
10282 "TARGET_SSE2"
10283 {
10284 rtx dst = ix86_fixup_binary_operands (UNKNOWN, V2DFmode, operands);
10285
10286 emit_insn (gen_sse2_loadlpd (dst, operands[1], operands[2]));
10287
10288 /* Fix up the destination if needed. */
10289 if (dst != operands[0])
10290 emit_move_insn (operands[0], dst);
10291
10292 DONE;
10293 })
10294
10295 ;; Avoid combining registers from different units in a single alternative,
10296 ;; see comment above inline_secondary_memory_needed function in i386.c
10297 (define_insn "sse2_loadlpd"
10298 [(set (match_operand:V2DF 0 "nonimmediate_operand"
10299 "=v,x,v,x,v,x,x,v,m,m ,m")
10300 (vec_concat:V2DF
10301 (match_operand:DF 2 "nonimmediate_operand"
10302 "vm,m,m,x,v,0,0,v,x,*f,r")
10303 (vec_select:DF
10304 (match_operand:V2DF 1 "nonimm_or_0_operand"
10305 " C,0,v,0,v,x,o,o,0,0 ,0")
10306 (parallel [(const_int 1)]))))]
10307 "TARGET_SSE2 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
10308 "@
10309 %vmovq\t{%2, %0|%0, %2}
10310 movlpd\t{%2, %0|%0, %2}
10311 vmovlpd\t{%2, %1, %0|%0, %1, %2}
10312 movsd\t{%2, %0|%0, %2}
10313 vmovsd\t{%2, %1, %0|%0, %1, %2}
10314 shufpd\t{$2, %1, %0|%0, %1, 2}
10315 movhpd\t{%H1, %0|%0, %H1}
10316 vmovhpd\t{%H1, %2, %0|%0, %2, %H1}
10317 #
10318 #
10319 #"
10320 [(set_attr "isa" "*,noavx,avx,noavx,avx,noavx,noavx,avx,*,*,*")
10321 (set (attr "type")
10322 (cond [(eq_attr "alternative" "5")
10323 (const_string "sselog")
10324 (eq_attr "alternative" "9")
10325 (const_string "fmov")
10326 (eq_attr "alternative" "10")
10327 (const_string "imov")
10328 ]
10329 (const_string "ssemov")))
10330 (set (attr "prefix_data16")
10331 (if_then_else (eq_attr "alternative" "1,6")
10332 (const_string "1")
10333 (const_string "*")))
10334 (set (attr "length_immediate")
10335 (if_then_else (eq_attr "alternative" "5")
10336 (const_string "1")
10337 (const_string "*")))
10338 (set (attr "prefix")
10339 (cond [(eq_attr "alternative" "0")
10340 (const_string "maybe_vex")
10341 (eq_attr "alternative" "1,3,5,6")
10342 (const_string "orig")
10343 (eq_attr "alternative" "2,4,7")
10344 (const_string "maybe_evex")
10345 ]
10346 (const_string "*")))
10347 (set_attr "mode" "DF,V1DF,V1DF,V1DF,V1DF,V2DF,V1DF,V1DF,DF,DF,DF")])
10348
10349 (define_split
10350 [(set (match_operand:V2DF 0 "memory_operand")
10351 (vec_concat:V2DF
10352 (match_operand:DF 1 "register_operand")
10353 (vec_select:DF (match_dup 0) (parallel [(const_int 1)]))))]
10354 "TARGET_SSE2 && reload_completed"
10355 [(set (match_dup 0) (match_dup 1))]
10356 "operands[0] = adjust_address (operands[0], DFmode, 0);")
10357
10358 (define_insn "sse2_movsd"
10359 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,v,x,v,m,x,x,v,o")
10360 (vec_merge:V2DF
10361 (match_operand:V2DF 2 "nonimmediate_operand" " x,v,m,m,v,0,0,v,0")
10362 (match_operand:V2DF 1 "nonimmediate_operand" " 0,v,0,v,0,x,o,o,v")
10363 (const_int 1)))]
10364 "TARGET_SSE2"
10365 "@
10366 movsd\t{%2, %0|%0, %2}
10367 vmovsd\t{%2, %1, %0|%0, %1, %2}
10368 movlpd\t{%2, %0|%0, %q2}
10369 vmovlpd\t{%2, %1, %0|%0, %1, %q2}
10370 %vmovlpd\t{%2, %0|%q0, %2}
10371 shufpd\t{$2, %1, %0|%0, %1, 2}
10372 movhps\t{%H1, %0|%0, %H1}
10373 vmovhps\t{%H1, %2, %0|%0, %2, %H1}
10374 %vmovhps\t{%1, %H0|%H0, %1}"
10375 [(set_attr "isa" "noavx,avx,noavx,avx,*,noavx,noavx,avx,*")
10376 (set (attr "type")
10377 (if_then_else
10378 (eq_attr "alternative" "5")
10379 (const_string "sselog")
10380 (const_string "ssemov")))
10381 (set (attr "prefix_data16")
10382 (if_then_else
10383 (and (eq_attr "alternative" "2,4")
10384 (not (match_test "TARGET_AVX")))
10385 (const_string "1")
10386 (const_string "*")))
10387 (set (attr "length_immediate")
10388 (if_then_else (eq_attr "alternative" "5")
10389 (const_string "1")
10390 (const_string "*")))
10391 (set (attr "prefix")
10392 (cond [(eq_attr "alternative" "1,3,7")
10393 (const_string "maybe_evex")
10394 (eq_attr "alternative" "4,8")
10395 (const_string "maybe_vex")
10396 ]
10397 (const_string "orig")))
10398 (set_attr "mode" "DF,DF,V1DF,V1DF,V1DF,V2DF,V1DF,V1DF,V1DF")])
10399
10400 (define_insn "vec_dupv2df<mask_name>"
10401 [(set (match_operand:V2DF 0 "register_operand" "=x,x,v")
10402 (vec_duplicate:V2DF
10403 (match_operand:DF 1 "nonimmediate_operand" " 0,xm,vm")))]
10404 "TARGET_SSE2 && <mask_avx512vl_condition>"
10405 "@
10406 unpcklpd\t%0, %0
10407 %vmovddup\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}
10408 vmovddup\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
10409 [(set_attr "isa" "noavx,sse3,avx512vl")
10410 (set_attr "type" "sselog1")
10411 (set_attr "prefix" "orig,maybe_vex,evex")
10412 (set_attr "mode" "V2DF,DF,DF")])
10413
10414 (define_insn "vec_concatv2df"
10415 [(set (match_operand:V2DF 0 "register_operand" "=x,x,v,x,v,x,x, v,x,x")
10416 (vec_concat:V2DF
10417 (match_operand:DF 1 "nonimmediate_operand" " 0,x,v,m,m,0,x,vm,0,0")
10418 (match_operand:DF 2 "nonimm_or_0_operand" " x,x,v,1,1,m,m, C,x,m")))]
10419 "TARGET_SSE
10420 && (!(MEM_P (operands[1]) && MEM_P (operands[2]))
10421 || (TARGET_SSE3 && rtx_equal_p (operands[1], operands[2])))"
10422 "@
10423 unpcklpd\t{%2, %0|%0, %2}
10424 vunpcklpd\t{%2, %1, %0|%0, %1, %2}
10425 vunpcklpd\t{%2, %1, %0|%0, %1, %2}
10426 %vmovddup\t{%1, %0|%0, %1}
10427 vmovddup\t{%1, %0|%0, %1}
10428 movhpd\t{%2, %0|%0, %2}
10429 vmovhpd\t{%2, %1, %0|%0, %1, %2}
10430 %vmovq\t{%1, %0|%0, %1}
10431 movlhps\t{%2, %0|%0, %2}
10432 movhps\t{%2, %0|%0, %2}"
10433 [(set (attr "isa")
10434 (cond [(eq_attr "alternative" "0,5")
10435 (const_string "sse2_noavx")
10436 (eq_attr "alternative" "1,6")
10437 (const_string "avx")
10438 (eq_attr "alternative" "2,4")
10439 (const_string "avx512vl")
10440 (eq_attr "alternative" "3")
10441 (const_string "sse3")
10442 (eq_attr "alternative" "7")
10443 (const_string "sse2")
10444 ]
10445 (const_string "noavx")))
10446 (set (attr "type")
10447 (if_then_else
10448 (eq_attr "alternative" "0,1,2,3,4")
10449 (const_string "sselog")
10450 (const_string "ssemov")))
10451 (set (attr "prefix_data16")
10452 (if_then_else (eq_attr "alternative" "5")
10453 (const_string "1")
10454 (const_string "*")))
10455 (set (attr "prefix")
10456 (cond [(eq_attr "alternative" "1,6")
10457 (const_string "vex")
10458 (eq_attr "alternative" "2,4")
10459 (const_string "evex")
10460 (eq_attr "alternative" "3,7")
10461 (const_string "maybe_vex")
10462 ]
10463 (const_string "orig")))
10464 (set_attr "mode" "V2DF,V2DF,V2DF, DF, DF, V1DF,V1DF,DF,V4SF,V2SF")])
10465
10466 ;; vmovq clears also the higher bits.
10467 (define_insn "vec_set<mode>_0"
10468 [(set (match_operand:VF2_512_256 0 "register_operand" "=v")
10469 (vec_merge:VF2_512_256
10470 (vec_duplicate:VF2_512_256
10471 (match_operand:<ssescalarmode> 2 "nonimmediate_operand" "xm"))
10472 (match_operand:VF2_512_256 1 "const0_operand" "C")
10473 (const_int 1)))]
10474 "TARGET_AVX"
10475 "vmovq\t{%2, %x0|%x0, %2}"
10476 [(set_attr "type" "ssemov")
10477 (set_attr "prefix" "maybe_evex")
10478 (set_attr "mode" "DF")])
10479
10480 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
10481 ;;
10482 ;; Parallel integer down-conversion operations
10483 ;;
10484 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
10485
10486 (define_mode_iterator PMOV_DST_MODE_1 [V16QI V16HI V8SI V8HI])
10487 (define_mode_attr pmov_src_mode
10488 [(V16QI "V16SI") (V16HI "V16SI") (V8SI "V8DI") (V8HI "V8DI")])
10489 (define_mode_attr pmov_src_lower
10490 [(V16QI "v16si") (V16HI "v16si") (V8SI "v8di") (V8HI "v8di")])
10491 (define_mode_attr pmov_suff_1
10492 [(V16QI "db") (V16HI "dw") (V8SI "qd") (V8HI "qw")])
10493
10494 (define_insn "*avx512f_<code><pmov_src_lower><mode>2"
10495 [(set (match_operand:PMOV_DST_MODE_1 0 "nonimmediate_operand" "=v,m")
10496 (any_truncate:PMOV_DST_MODE_1
10497 (match_operand:<pmov_src_mode> 1 "register_operand" "v,v")))]
10498 "TARGET_AVX512F"
10499 "vpmov<trunsuffix><pmov_suff_1>\t{%1, %0|%0, %1}"
10500 [(set_attr "type" "ssemov")
10501 (set_attr "memory" "none,store")
10502 (set_attr "prefix" "evex")
10503 (set_attr "mode" "<sseinsnmode>")])
10504
10505 (define_insn "avx512f_<code><pmov_src_lower><mode>2_mask"
10506 [(set (match_operand:PMOV_DST_MODE_1 0 "nonimmediate_operand" "=v,m")
10507 (vec_merge:PMOV_DST_MODE_1
10508 (any_truncate:PMOV_DST_MODE_1
10509 (match_operand:<pmov_src_mode> 1 "register_operand" "v,v"))
10510 (match_operand:PMOV_DST_MODE_1 2 "nonimm_or_0_operand" "0C,0")
10511 (match_operand:<avx512fmaskmode> 3 "register_operand" "Yk,Yk")))]
10512 "TARGET_AVX512F"
10513 "vpmov<trunsuffix><pmov_suff_1>\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}"
10514 [(set_attr "type" "ssemov")
10515 (set_attr "memory" "none,store")
10516 (set_attr "prefix" "evex")
10517 (set_attr "mode" "<sseinsnmode>")])
10518
10519 (define_expand "avx512f_<code><pmov_src_lower><mode>2_mask_store"
10520 [(set (match_operand:PMOV_DST_MODE_1 0 "memory_operand")
10521 (vec_merge:PMOV_DST_MODE_1
10522 (any_truncate:PMOV_DST_MODE_1
10523 (match_operand:<pmov_src_mode> 1 "register_operand"))
10524 (match_dup 0)
10525 (match_operand:<avx512fmaskmode> 2 "register_operand")))]
10526 "TARGET_AVX512F")
10527
10528 (define_insn "avx512bw_<code>v32hiv32qi2"
10529 [(set (match_operand:V32QI 0 "nonimmediate_operand" "=v,m")
10530 (any_truncate:V32QI
10531 (match_operand:V32HI 1 "register_operand" "v,v")))]
10532 "TARGET_AVX512BW"
10533 "vpmov<trunsuffix>wb\t{%1, %0|%0, %1}"
10534 [(set_attr "type" "ssemov")
10535 (set_attr "memory" "none,store")
10536 (set_attr "prefix" "evex")
10537 (set_attr "mode" "XI")])
10538
10539 (define_insn "avx512bw_<code>v32hiv32qi2_mask"
10540 [(set (match_operand:V32QI 0 "nonimmediate_operand" "=v,m")
10541 (vec_merge:V32QI
10542 (any_truncate:V32QI
10543 (match_operand:V32HI 1 "register_operand" "v,v"))
10544 (match_operand:V32QI 2 "nonimm_or_0_operand" "0C,0")
10545 (match_operand:SI 3 "register_operand" "Yk,Yk")))]
10546 "TARGET_AVX512BW"
10547 "vpmov<trunsuffix>wb\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}"
10548 [(set_attr "type" "ssemov")
10549 (set_attr "memory" "none,store")
10550 (set_attr "prefix" "evex")
10551 (set_attr "mode" "XI")])
10552
10553 (define_expand "avx512bw_<code>v32hiv32qi2_mask_store"
10554 [(set (match_operand:V32QI 0 "nonimmediate_operand")
10555 (vec_merge:V32QI
10556 (any_truncate:V32QI
10557 (match_operand:V32HI 1 "register_operand"))
10558 (match_dup 0)
10559 (match_operand:SI 2 "register_operand")))]
10560 "TARGET_AVX512BW")
10561
10562 (define_mode_iterator PMOV_DST_MODE_2
10563 [V4SI V8HI (V16QI "TARGET_AVX512BW")])
10564 (define_mode_attr pmov_suff_2
10565 [(V16QI "wb") (V8HI "dw") (V4SI "qd")])
10566
10567 (define_insn "*avx512vl_<code><ssedoublemodelower><mode>2"
10568 [(set (match_operand:PMOV_DST_MODE_2 0 "nonimmediate_operand" "=v,m")
10569 (any_truncate:PMOV_DST_MODE_2
10570 (match_operand:<ssedoublemode> 1 "register_operand" "v,v")))]
10571 "TARGET_AVX512VL"
10572 "vpmov<trunsuffix><pmov_suff_2>\t{%1, %0|%0, %1}"
10573 [(set_attr "type" "ssemov")
10574 (set_attr "memory" "none,store")
10575 (set_attr "prefix" "evex")
10576 (set_attr "mode" "<sseinsnmode>")])
10577
10578 (define_insn "<avx512>_<code><ssedoublemodelower><mode>2_mask"
10579 [(set (match_operand:PMOV_DST_MODE_2 0 "nonimmediate_operand" "=v,m")
10580 (vec_merge:PMOV_DST_MODE_2
10581 (any_truncate:PMOV_DST_MODE_2
10582 (match_operand:<ssedoublemode> 1 "register_operand" "v,v"))
10583 (match_operand:PMOV_DST_MODE_2 2 "nonimm_or_0_operand" "0C,0")
10584 (match_operand:<avx512fmaskmode> 3 "register_operand" "Yk,Yk")))]
10585 "TARGET_AVX512VL"
10586 "vpmov<trunsuffix><pmov_suff_2>\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}"
10587 [(set_attr "type" "ssemov")
10588 (set_attr "memory" "none,store")
10589 (set_attr "prefix" "evex")
10590 (set_attr "mode" "<sseinsnmode>")])
10591
10592 (define_expand "<avx512>_<code><ssedoublemodelower><mode>2_mask_store"
10593 [(set (match_operand:PMOV_DST_MODE_2 0 "nonimmediate_operand")
10594 (vec_merge:PMOV_DST_MODE_2
10595 (any_truncate:PMOV_DST_MODE_2
10596 (match_operand:<ssedoublemode> 1 "register_operand"))
10597 (match_dup 0)
10598 (match_operand:<avx512fmaskmode> 2 "register_operand")))]
10599 "TARGET_AVX512VL")
10600
10601 (define_mode_iterator PMOV_SRC_MODE_3 [V4DI V2DI V8SI V4SI (V8HI "TARGET_AVX512BW")])
10602 (define_mode_attr pmov_dst_3
10603 [(V4DI "V4QI") (V2DI "V2QI") (V8SI "V8QI") (V4SI "V4QI") (V8HI "V8QI")])
10604 (define_mode_attr pmov_dst_zeroed_3
10605 [(V4DI "V12QI") (V2DI "V14QI") (V8SI "V8QI") (V4SI "V12QI") (V8HI "V8QI")])
10606 (define_mode_attr pmov_suff_3
10607 [(V4DI "qb") (V2DI "qb") (V8SI "db") (V4SI "db") (V8HI "wb")])
10608
10609 (define_insn "*avx512vl_<code><mode>v<ssescalarnum>qi2"
10610 [(set (match_operand:V16QI 0 "register_operand" "=v")
10611 (vec_concat:V16QI
10612 (any_truncate:<pmov_dst_3>
10613 (match_operand:PMOV_SRC_MODE_3 1 "register_operand" "v"))
10614 (match_operand:<pmov_dst_zeroed_3> 2 "const0_operand")))]
10615 "TARGET_AVX512VL"
10616 "vpmov<trunsuffix><pmov_suff_3>\t{%1, %0|%0, %1}"
10617 [(set_attr "type" "ssemov")
10618 (set_attr "prefix" "evex")
10619 (set_attr "mode" "TI")])
10620
10621 (define_insn "*avx512vl_<code>v2div2qi2_store"
10622 [(set (match_operand:V16QI 0 "memory_operand" "=m")
10623 (vec_concat:V16QI
10624 (any_truncate:V2QI
10625 (match_operand:V2DI 1 "register_operand" "v"))
10626 (vec_select:V14QI
10627 (match_dup 0)
10628 (parallel [(const_int 2) (const_int 3)
10629 (const_int 4) (const_int 5)
10630 (const_int 6) (const_int 7)
10631 (const_int 8) (const_int 9)
10632 (const_int 10) (const_int 11)
10633 (const_int 12) (const_int 13)
10634 (const_int 14) (const_int 15)]))))]
10635 "TARGET_AVX512VL"
10636 "vpmov<trunsuffix>qb\t{%1, %0|%w0, %1}"
10637 [(set_attr "type" "ssemov")
10638 (set_attr "memory" "store")
10639 (set_attr "prefix" "evex")
10640 (set_attr "mode" "TI")])
10641
10642 (define_insn "avx512vl_<code>v2div2qi2_mask"
10643 [(set (match_operand:V16QI 0 "register_operand" "=v")
10644 (vec_concat:V16QI
10645 (vec_merge:V2QI
10646 (any_truncate:V2QI
10647 (match_operand:V2DI 1 "register_operand" "v"))
10648 (vec_select:V2QI
10649 (match_operand:V16QI 2 "nonimm_or_0_operand" "0C")
10650 (parallel [(const_int 0) (const_int 1)]))
10651 (match_operand:QI 3 "register_operand" "Yk"))
10652 (const_vector:V14QI [(const_int 0) (const_int 0)
10653 (const_int 0) (const_int 0)
10654 (const_int 0) (const_int 0)
10655 (const_int 0) (const_int 0)
10656 (const_int 0) (const_int 0)
10657 (const_int 0) (const_int 0)
10658 (const_int 0) (const_int 0)])))]
10659 "TARGET_AVX512VL"
10660 "vpmov<trunsuffix>qb\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}"
10661 [(set_attr "type" "ssemov")
10662 (set_attr "prefix" "evex")
10663 (set_attr "mode" "TI")])
10664
10665 (define_insn "*avx512vl_<code>v2div2qi2_mask_1"
10666 [(set (match_operand:V16QI 0 "register_operand" "=v")
10667 (vec_concat:V16QI
10668 (vec_merge:V2QI
10669 (any_truncate:V2QI
10670 (match_operand:V2DI 1 "register_operand" "v"))
10671 (const_vector:V2QI [(const_int 0) (const_int 0)])
10672 (match_operand:QI 2 "register_operand" "Yk"))
10673 (const_vector:V14QI [(const_int 0) (const_int 0)
10674 (const_int 0) (const_int 0)
10675 (const_int 0) (const_int 0)
10676 (const_int 0) (const_int 0)
10677 (const_int 0) (const_int 0)
10678 (const_int 0) (const_int 0)
10679 (const_int 0) (const_int 0)])))]
10680 "TARGET_AVX512VL"
10681 "vpmov<trunsuffix>qb\t{%1, %0%{%2%}%{z%}|%0%{%2%}%{z%}, %1}"
10682 [(set_attr "type" "ssemov")
10683 (set_attr "prefix" "evex")
10684 (set_attr "mode" "TI")])
10685
10686 (define_insn "avx512vl_<code>v2div2qi2_mask_store"
10687 [(set (match_operand:V16QI 0 "memory_operand" "=m")
10688 (vec_concat:V16QI
10689 (vec_merge:V2QI
10690 (any_truncate:V2QI
10691 (match_operand:V2DI 1 "register_operand" "v"))
10692 (vec_select:V2QI
10693 (match_dup 0)
10694 (parallel [(const_int 0) (const_int 1)]))
10695 (match_operand:QI 2 "register_operand" "Yk"))
10696 (vec_select:V14QI
10697 (match_dup 0)
10698 (parallel [(const_int 2) (const_int 3)
10699 (const_int 4) (const_int 5)
10700 (const_int 6) (const_int 7)
10701 (const_int 8) (const_int 9)
10702 (const_int 10) (const_int 11)
10703 (const_int 12) (const_int 13)
10704 (const_int 14) (const_int 15)]))))]
10705 "TARGET_AVX512VL"
10706 "vpmov<trunsuffix>qb\t{%1, %0%{%2%}|%w0%{%2%}, %1}"
10707 [(set_attr "type" "ssemov")
10708 (set_attr "memory" "store")
10709 (set_attr "prefix" "evex")
10710 (set_attr "mode" "TI")])
10711
10712 (define_insn "*avx512vl_<code><mode>v4qi2_store"
10713 [(set (match_operand:V16QI 0 "memory_operand" "=m")
10714 (vec_concat:V16QI
10715 (any_truncate:V4QI
10716 (match_operand:VI4_128_8_256 1 "register_operand" "v"))
10717 (vec_select:V12QI
10718 (match_dup 0)
10719 (parallel [(const_int 4) (const_int 5)
10720 (const_int 6) (const_int 7)
10721 (const_int 8) (const_int 9)
10722 (const_int 10) (const_int 11)
10723 (const_int 12) (const_int 13)
10724 (const_int 14) (const_int 15)]))))]
10725 "TARGET_AVX512VL"
10726 "vpmov<trunsuffix><pmov_suff_3>\t{%1, %0|%k0, %1}"
10727 [(set_attr "type" "ssemov")
10728 (set_attr "memory" "store")
10729 (set_attr "prefix" "evex")
10730 (set_attr "mode" "TI")])
10731
10732 (define_insn "avx512vl_<code><mode>v4qi2_mask"
10733 [(set (match_operand:V16QI 0 "register_operand" "=v")
10734 (vec_concat:V16QI
10735 (vec_merge:V4QI
10736 (any_truncate:V4QI
10737 (match_operand:VI4_128_8_256 1 "register_operand" "v"))
10738 (vec_select:V4QI
10739 (match_operand:V16QI 2 "nonimm_or_0_operand" "0C")
10740 (parallel [(const_int 0) (const_int 1)
10741 (const_int 2) (const_int 3)]))
10742 (match_operand:QI 3 "register_operand" "Yk"))
10743 (const_vector:V12QI [(const_int 0) (const_int 0)
10744 (const_int 0) (const_int 0)
10745 (const_int 0) (const_int 0)
10746 (const_int 0) (const_int 0)
10747 (const_int 0) (const_int 0)
10748 (const_int 0) (const_int 0)])))]
10749 "TARGET_AVX512VL"
10750 "vpmov<trunsuffix><pmov_suff_3>\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}"
10751 [(set_attr "type" "ssemov")
10752 (set_attr "prefix" "evex")
10753 (set_attr "mode" "TI")])
10754
10755 (define_insn "*avx512vl_<code><mode>v4qi2_mask_1"
10756 [(set (match_operand:V16QI 0 "register_operand" "=v")
10757 (vec_concat:V16QI
10758 (vec_merge:V4QI
10759 (any_truncate:V4QI
10760 (match_operand:VI4_128_8_256 1 "register_operand" "v"))
10761 (const_vector:V4QI [(const_int 0) (const_int 0)
10762 (const_int 0) (const_int 0)])
10763 (match_operand:QI 2 "register_operand" "Yk"))
10764 (const_vector:V12QI [(const_int 0) (const_int 0)
10765 (const_int 0) (const_int 0)
10766 (const_int 0) (const_int 0)
10767 (const_int 0) (const_int 0)
10768 (const_int 0) (const_int 0)
10769 (const_int 0) (const_int 0)])))]
10770 "TARGET_AVX512VL"
10771 "vpmov<trunsuffix><pmov_suff_3>\t{%1, %0%{%2%}%{z%}|%0%{%2%}%{z%}, %1}"
10772 [(set_attr "type" "ssemov")
10773 (set_attr "prefix" "evex")
10774 (set_attr "mode" "TI")])
10775
10776 (define_insn "avx512vl_<code><mode>v4qi2_mask_store"
10777 [(set (match_operand:V16QI 0 "memory_operand" "=m")
10778 (vec_concat:V16QI
10779 (vec_merge:V4QI
10780 (any_truncate:V4QI
10781 (match_operand:VI4_128_8_256 1 "register_operand" "v"))
10782 (vec_select:V4QI
10783 (match_dup 0)
10784 (parallel [(const_int 0) (const_int 1)
10785 (const_int 2) (const_int 3)]))
10786 (match_operand:QI 2 "register_operand" "Yk"))
10787 (vec_select:V12QI
10788 (match_dup 0)
10789 (parallel [(const_int 4) (const_int 5)
10790 (const_int 6) (const_int 7)
10791 (const_int 8) (const_int 9)
10792 (const_int 10) (const_int 11)
10793 (const_int 12) (const_int 13)
10794 (const_int 14) (const_int 15)]))))]
10795 "TARGET_AVX512VL"
10796 "vpmov<trunsuffix><pmov_suff_3>\t{%1, %0%{%2%}|%k0%{%2%}, %1}"
10797 [(set_attr "type" "ssemov")
10798 (set_attr "memory" "store")
10799 (set_attr "prefix" "evex")
10800 (set_attr "mode" "TI")])
10801
10802 (define_mode_iterator VI2_128_BW_4_256
10803 [(V8HI "TARGET_AVX512BW") V8SI])
10804
10805 (define_insn "*avx512vl_<code><mode>v8qi2_store"
10806 [(set (match_operand:V16QI 0 "memory_operand" "=m")
10807 (vec_concat:V16QI
10808 (any_truncate:V8QI
10809 (match_operand:VI2_128_BW_4_256 1 "register_operand" "v"))
10810 (vec_select:V8QI
10811 (match_dup 0)
10812 (parallel [(const_int 8) (const_int 9)
10813 (const_int 10) (const_int 11)
10814 (const_int 12) (const_int 13)
10815 (const_int 14) (const_int 15)]))))]
10816 "TARGET_AVX512VL"
10817 "vpmov<trunsuffix><pmov_suff_3>\t{%1, %0|%q0, %1}"
10818 [(set_attr "type" "ssemov")
10819 (set_attr "memory" "store")
10820 (set_attr "prefix" "evex")
10821 (set_attr "mode" "TI")])
10822
10823 (define_insn "avx512vl_<code><mode>v8qi2_mask"
10824 [(set (match_operand:V16QI 0 "register_operand" "=v")
10825 (vec_concat:V16QI
10826 (vec_merge:V8QI
10827 (any_truncate:V8QI
10828 (match_operand:VI2_128_BW_4_256 1 "register_operand" "v"))
10829 (vec_select:V8QI
10830 (match_operand:V16QI 2 "nonimm_or_0_operand" "0C")
10831 (parallel [(const_int 0) (const_int 1)
10832 (const_int 2) (const_int 3)
10833 (const_int 4) (const_int 5)
10834 (const_int 6) (const_int 7)]))
10835 (match_operand:QI 3 "register_operand" "Yk"))
10836 (const_vector:V8QI [(const_int 0) (const_int 0)
10837 (const_int 0) (const_int 0)
10838 (const_int 0) (const_int 0)
10839 (const_int 0) (const_int 0)])))]
10840 "TARGET_AVX512VL"
10841 "vpmov<trunsuffix><pmov_suff_3>\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}"
10842 [(set_attr "type" "ssemov")
10843 (set_attr "prefix" "evex")
10844 (set_attr "mode" "TI")])
10845
10846 (define_insn "*avx512vl_<code><mode>v8qi2_mask_1"
10847 [(set (match_operand:V16QI 0 "register_operand" "=v")
10848 (vec_concat:V16QI
10849 (vec_merge:V8QI
10850 (any_truncate:V8QI
10851 (match_operand:VI2_128_BW_4_256 1 "register_operand" "v"))
10852 (const_vector:V8QI [(const_int 0) (const_int 0)
10853 (const_int 0) (const_int 0)
10854 (const_int 0) (const_int 0)
10855 (const_int 0) (const_int 0)])
10856 (match_operand:QI 2 "register_operand" "Yk"))
10857 (const_vector:V8QI [(const_int 0) (const_int 0)
10858 (const_int 0) (const_int 0)
10859 (const_int 0) (const_int 0)
10860 (const_int 0) (const_int 0)])))]
10861 "TARGET_AVX512VL"
10862 "vpmov<trunsuffix><pmov_suff_3>\t{%1, %0%{%2%}%{z%}|%0%{%2%}%{z%}, %1}"
10863 [(set_attr "type" "ssemov")
10864 (set_attr "prefix" "evex")
10865 (set_attr "mode" "TI")])
10866
10867 (define_insn "avx512vl_<code><mode>v8qi2_mask_store"
10868 [(set (match_operand:V16QI 0 "memory_operand" "=m")
10869 (vec_concat:V16QI
10870 (vec_merge:V8QI
10871 (any_truncate:V8QI
10872 (match_operand:VI2_128_BW_4_256 1 "register_operand" "v"))
10873 (vec_select:V8QI
10874 (match_dup 0)
10875 (parallel [(const_int 0) (const_int 1)
10876 (const_int 2) (const_int 3)
10877 (const_int 4) (const_int 5)
10878 (const_int 6) (const_int 7)]))
10879 (match_operand:QI 2 "register_operand" "Yk"))
10880 (vec_select:V8QI
10881 (match_dup 0)
10882 (parallel [(const_int 8) (const_int 9)
10883 (const_int 10) (const_int 11)
10884 (const_int 12) (const_int 13)
10885 (const_int 14) (const_int 15)]))))]
10886 "TARGET_AVX512VL"
10887 "vpmov<trunsuffix><pmov_suff_3>\t{%1, %0%{%2%}|%q0%{%2%}, %1}"
10888 [(set_attr "type" "ssemov")
10889 (set_attr "memory" "store")
10890 (set_attr "prefix" "evex")
10891 (set_attr "mode" "TI")])
10892
10893 (define_mode_iterator PMOV_SRC_MODE_4 [V4DI V2DI V4SI])
10894 (define_mode_attr pmov_dst_4
10895 [(V4DI "V4HI") (V2DI "V2HI") (V4SI "V4HI")])
10896 (define_mode_attr pmov_dst_zeroed_4
10897 [(V4DI "V4HI") (V2DI "V6HI") (V4SI "V4HI")])
10898 (define_mode_attr pmov_suff_4
10899 [(V4DI "qw") (V2DI "qw") (V4SI "dw")])
10900
10901 (define_insn "*avx512vl_<code><mode>v<ssescalarnum>hi2"
10902 [(set (match_operand:V8HI 0 "register_operand" "=v")
10903 (vec_concat:V8HI
10904 (any_truncate:<pmov_dst_4>
10905 (match_operand:PMOV_SRC_MODE_4 1 "register_operand" "v"))
10906 (match_operand:<pmov_dst_zeroed_4> 2 "const0_operand")))]
10907 "TARGET_AVX512VL"
10908 "vpmov<trunsuffix><pmov_suff_4>\t{%1, %0|%0, %1}"
10909 [(set_attr "type" "ssemov")
10910 (set_attr "prefix" "evex")
10911 (set_attr "mode" "TI")])
10912
10913 (define_insn "*avx512vl_<code><mode>v4hi2_store"
10914 [(set (match_operand:V8HI 0 "memory_operand" "=m")
10915 (vec_concat:V8HI
10916 (any_truncate:V4HI
10917 (match_operand:VI4_128_8_256 1 "register_operand" "v"))
10918 (vec_select:V4HI
10919 (match_dup 0)
10920 (parallel [(const_int 4) (const_int 5)
10921 (const_int 6) (const_int 7)]))))]
10922 "TARGET_AVX512VL"
10923 "vpmov<trunsuffix><pmov_suff_4>\t{%1, %0|%0, %1}"
10924 [(set_attr "type" "ssemov")
10925 (set_attr "memory" "store")
10926 (set_attr "prefix" "evex")
10927 (set_attr "mode" "TI")])
10928
10929 (define_insn "avx512vl_<code><mode>v4hi2_mask"
10930 [(set (match_operand:V8HI 0 "register_operand" "=v")
10931 (vec_concat:V8HI
10932 (vec_merge:V4HI
10933 (any_truncate:V4HI
10934 (match_operand:VI4_128_8_256 1 "register_operand" "v"))
10935 (vec_select:V4HI
10936 (match_operand:V8HI 2 "nonimm_or_0_operand" "0C")
10937 (parallel [(const_int 0) (const_int 1)
10938 (const_int 2) (const_int 3)]))
10939 (match_operand:QI 3 "register_operand" "Yk"))
10940 (const_vector:V4HI [(const_int 0) (const_int 0)
10941 (const_int 0) (const_int 0)])))]
10942 "TARGET_AVX512VL"
10943 "vpmov<trunsuffix><pmov_suff_4>\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}"
10944 [(set_attr "type" "ssemov")
10945 (set_attr "prefix" "evex")
10946 (set_attr "mode" "TI")])
10947
10948 (define_insn "*avx512vl_<code><mode>v4hi2_mask_1"
10949 [(set (match_operand:V8HI 0 "register_operand" "=v")
10950 (vec_concat:V8HI
10951 (vec_merge:V4HI
10952 (any_truncate:V4HI
10953 (match_operand:VI4_128_8_256 1 "register_operand" "v"))
10954 (const_vector:V4HI [(const_int 0) (const_int 0)
10955 (const_int 0) (const_int 0)])
10956 (match_operand:QI 2 "register_operand" "Yk"))
10957 (const_vector:V4HI [(const_int 0) (const_int 0)
10958 (const_int 0) (const_int 0)])))]
10959 "TARGET_AVX512VL"
10960 "vpmov<trunsuffix><pmov_suff_4>\t{%1, %0%{%2%}%{z%}|%0%{%2%}%{z%}, %1}"
10961 [(set_attr "type" "ssemov")
10962 (set_attr "prefix" "evex")
10963 (set_attr "mode" "TI")])
10964
10965 (define_insn "avx512vl_<code><mode>v4hi2_mask_store"
10966 [(set (match_operand:V8HI 0 "memory_operand" "=m")
10967 (vec_concat:V8HI
10968 (vec_merge:V4HI
10969 (any_truncate:V4HI
10970 (match_operand:VI4_128_8_256 1 "register_operand" "v"))
10971 (vec_select:V4HI
10972 (match_dup 0)
10973 (parallel [(const_int 0) (const_int 1)
10974 (const_int 2) (const_int 3)]))
10975 (match_operand:QI 2 "register_operand" "Yk"))
10976 (vec_select:V4HI
10977 (match_dup 0)
10978 (parallel [(const_int 4) (const_int 5)
10979 (const_int 6) (const_int 7)]))))]
10980 "TARGET_AVX512VL"
10981 {
10982 if (GET_MODE_SIZE (GET_MODE_INNER (<MODE>mode)) == 4)
10983 return "vpmov<trunsuffix><pmov_suff_4>\t{%1, %0%{%2%}|%0%{%2%}, %t1}";
10984 return "vpmov<trunsuffix><pmov_suff_4>\t{%1, %0%{%2%}|%0%{%2%}, %g1}";
10985 }
10986 [(set_attr "type" "ssemov")
10987 (set_attr "memory" "store")
10988 (set_attr "prefix" "evex")
10989 (set_attr "mode" "TI")])
10990
10991 (define_insn "*avx512vl_<code>v2div2hi2_store"
10992 [(set (match_operand:V8HI 0 "memory_operand" "=m")
10993 (vec_concat:V8HI
10994 (any_truncate:V2HI
10995 (match_operand:V2DI 1 "register_operand" "v"))
10996 (vec_select:V6HI
10997 (match_dup 0)
10998 (parallel [(const_int 2) (const_int 3)
10999 (const_int 4) (const_int 5)
11000 (const_int 6) (const_int 7)]))))]
11001 "TARGET_AVX512VL"
11002 "vpmov<trunsuffix>qw\t{%1, %0|%0, %1}"
11003 [(set_attr "type" "ssemov")
11004 (set_attr "memory" "store")
11005 (set_attr "prefix" "evex")
11006 (set_attr "mode" "TI")])
11007
11008 (define_insn "avx512vl_<code>v2div2hi2_mask"
11009 [(set (match_operand:V8HI 0 "register_operand" "=v")
11010 (vec_concat:V8HI
11011 (vec_merge:V2HI
11012 (any_truncate:V2HI
11013 (match_operand:V2DI 1 "register_operand" "v"))
11014 (vec_select:V2HI
11015 (match_operand:V8HI 2 "nonimm_or_0_operand" "0C")
11016 (parallel [(const_int 0) (const_int 1)]))
11017 (match_operand:QI 3 "register_operand" "Yk"))
11018 (const_vector:V6HI [(const_int 0) (const_int 0)
11019 (const_int 0) (const_int 0)
11020 (const_int 0) (const_int 0)])))]
11021 "TARGET_AVX512VL"
11022 "vpmov<trunsuffix>qw\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}"
11023 [(set_attr "type" "ssemov")
11024 (set_attr "prefix" "evex")
11025 (set_attr "mode" "TI")])
11026
11027 (define_insn "*avx512vl_<code>v2div2hi2_mask_1"
11028 [(set (match_operand:V8HI 0 "register_operand" "=v")
11029 (vec_concat:V8HI
11030 (vec_merge:V2HI
11031 (any_truncate:V2HI
11032 (match_operand:V2DI 1 "register_operand" "v"))
11033 (const_vector:V2HI [(const_int 0) (const_int 0)])
11034 (match_operand:QI 2 "register_operand" "Yk"))
11035 (const_vector:V6HI [(const_int 0) (const_int 0)
11036 (const_int 0) (const_int 0)
11037 (const_int 0) (const_int 0)])))]
11038 "TARGET_AVX512VL"
11039 "vpmov<trunsuffix>qw\t{%1, %0%{%2%}%{z%}|%0%{%2%}%{z%}, %1}"
11040 [(set_attr "type" "ssemov")
11041 (set_attr "prefix" "evex")
11042 (set_attr "mode" "TI")])
11043
11044 (define_insn "avx512vl_<code>v2div2hi2_mask_store"
11045 [(set (match_operand:V8HI 0 "memory_operand" "=m")
11046 (vec_concat:V8HI
11047 (vec_merge:V2HI
11048 (any_truncate:V2HI
11049 (match_operand:V2DI 1 "register_operand" "v"))
11050 (vec_select:V2HI
11051 (match_dup 0)
11052 (parallel [(const_int 0) (const_int 1)]))
11053 (match_operand:QI 2 "register_operand" "Yk"))
11054 (vec_select:V6HI
11055 (match_dup 0)
11056 (parallel [(const_int 2) (const_int 3)
11057 (const_int 4) (const_int 5)
11058 (const_int 6) (const_int 7)]))))]
11059 "TARGET_AVX512VL"
11060 "vpmov<trunsuffix>qw\t{%1, %0%{%2%}|%0%{%2%}, %g1}"
11061 [(set_attr "type" "ssemov")
11062 (set_attr "memory" "store")
11063 (set_attr "prefix" "evex")
11064 (set_attr "mode" "TI")])
11065
11066 (define_insn "*avx512vl_<code>v2div2si2"
11067 [(set (match_operand:V4SI 0 "register_operand" "=v")
11068 (vec_concat:V4SI
11069 (any_truncate:V2SI
11070 (match_operand:V2DI 1 "register_operand" "v"))
11071 (match_operand:V2SI 2 "const0_operand")))]
11072 "TARGET_AVX512VL"
11073 "vpmov<trunsuffix>qd\t{%1, %0|%0, %1}"
11074 [(set_attr "type" "ssemov")
11075 (set_attr "prefix" "evex")
11076 (set_attr "mode" "TI")])
11077
11078 (define_insn "*avx512vl_<code>v2div2si2_store"
11079 [(set (match_operand:V4SI 0 "memory_operand" "=m")
11080 (vec_concat:V4SI
11081 (any_truncate:V2SI
11082 (match_operand:V2DI 1 "register_operand" "v"))
11083 (vec_select:V2SI
11084 (match_dup 0)
11085 (parallel [(const_int 2) (const_int 3)]))))]
11086 "TARGET_AVX512VL"
11087 "vpmov<trunsuffix>qd\t{%1, %0|%0, %1}"
11088 [(set_attr "type" "ssemov")
11089 (set_attr "memory" "store")
11090 (set_attr "prefix" "evex")
11091 (set_attr "mode" "TI")])
11092
11093 (define_insn "avx512vl_<code>v2div2si2_mask"
11094 [(set (match_operand:V4SI 0 "register_operand" "=v")
11095 (vec_concat:V4SI
11096 (vec_merge:V2SI
11097 (any_truncate:V2SI
11098 (match_operand:V2DI 1 "register_operand" "v"))
11099 (vec_select:V2SI
11100 (match_operand:V4SI 2 "nonimm_or_0_operand" "0C")
11101 (parallel [(const_int 0) (const_int 1)]))
11102 (match_operand:QI 3 "register_operand" "Yk"))
11103 (const_vector:V2SI [(const_int 0) (const_int 0)])))]
11104 "TARGET_AVX512VL"
11105 "vpmov<trunsuffix>qd\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}"
11106 [(set_attr "type" "ssemov")
11107 (set_attr "prefix" "evex")
11108 (set_attr "mode" "TI")])
11109
11110 (define_insn "*avx512vl_<code>v2div2si2_mask_1"
11111 [(set (match_operand:V4SI 0 "register_operand" "=v")
11112 (vec_concat:V4SI
11113 (vec_merge:V2SI
11114 (any_truncate:V2SI
11115 (match_operand:V2DI 1 "register_operand" "v"))
11116 (const_vector:V2SI [(const_int 0) (const_int 0)])
11117 (match_operand:QI 2 "register_operand" "Yk"))
11118 (const_vector:V2SI [(const_int 0) (const_int 0)])))]
11119 "TARGET_AVX512VL"
11120 "vpmov<trunsuffix>qd\t{%1, %0%{%2%}%{z%}|%0%{%2%}%{z%}, %1}"
11121 [(set_attr "type" "ssemov")
11122 (set_attr "prefix" "evex")
11123 (set_attr "mode" "TI")])
11124
11125 (define_insn "avx512vl_<code>v2div2si2_mask_store"
11126 [(set (match_operand:V4SI 0 "memory_operand" "=m")
11127 (vec_concat:V4SI
11128 (vec_merge:V2SI
11129 (any_truncate:V2SI
11130 (match_operand:V2DI 1 "register_operand" "v"))
11131 (vec_select:V2SI
11132 (match_dup 0)
11133 (parallel [(const_int 0) (const_int 1)]))
11134 (match_operand:QI 2 "register_operand" "Yk"))
11135 (vec_select:V2SI
11136 (match_dup 0)
11137 (parallel [(const_int 2) (const_int 3)]))))]
11138 "TARGET_AVX512VL"
11139 "vpmov<trunsuffix>qd\t{%1, %0%{%2%}|%0%{%2%}, %t1}"
11140 [(set_attr "type" "ssemov")
11141 (set_attr "memory" "store")
11142 (set_attr "prefix" "evex")
11143 (set_attr "mode" "TI")])
11144
11145 (define_insn "*avx512f_<code>v8div16qi2"
11146 [(set (match_operand:V16QI 0 "register_operand" "=v")
11147 (vec_concat:V16QI
11148 (any_truncate:V8QI
11149 (match_operand:V8DI 1 "register_operand" "v"))
11150 (const_vector:V8QI [(const_int 0) (const_int 0)
11151 (const_int 0) (const_int 0)
11152 (const_int 0) (const_int 0)
11153 (const_int 0) (const_int 0)])))]
11154 "TARGET_AVX512F"
11155 "vpmov<trunsuffix>qb\t{%1, %0|%0, %1}"
11156 [(set_attr "type" "ssemov")
11157 (set_attr "prefix" "evex")
11158 (set_attr "mode" "TI")])
11159
11160 (define_insn "*avx512f_<code>v8div16qi2_store"
11161 [(set (match_operand:V16QI 0 "memory_operand" "=m")
11162 (vec_concat:V16QI
11163 (any_truncate:V8QI
11164 (match_operand:V8DI 1 "register_operand" "v"))
11165 (vec_select:V8QI
11166 (match_dup 0)
11167 (parallel [(const_int 8) (const_int 9)
11168 (const_int 10) (const_int 11)
11169 (const_int 12) (const_int 13)
11170 (const_int 14) (const_int 15)]))))]
11171 "TARGET_AVX512F"
11172 "vpmov<trunsuffix>qb\t{%1, %0|%0, %1}"
11173 [(set_attr "type" "ssemov")
11174 (set_attr "memory" "store")
11175 (set_attr "prefix" "evex")
11176 (set_attr "mode" "TI")])
11177
11178 (define_insn "avx512f_<code>v8div16qi2_mask"
11179 [(set (match_operand:V16QI 0 "register_operand" "=v")
11180 (vec_concat:V16QI
11181 (vec_merge:V8QI
11182 (any_truncate:V8QI
11183 (match_operand:V8DI 1 "register_operand" "v"))
11184 (vec_select:V8QI
11185 (match_operand:V16QI 2 "nonimm_or_0_operand" "0C")
11186 (parallel [(const_int 0) (const_int 1)
11187 (const_int 2) (const_int 3)
11188 (const_int 4) (const_int 5)
11189 (const_int 6) (const_int 7)]))
11190 (match_operand:QI 3 "register_operand" "Yk"))
11191 (const_vector:V8QI [(const_int 0) (const_int 0)
11192 (const_int 0) (const_int 0)
11193 (const_int 0) (const_int 0)
11194 (const_int 0) (const_int 0)])))]
11195 "TARGET_AVX512F"
11196 "vpmov<trunsuffix>qb\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}"
11197 [(set_attr "type" "ssemov")
11198 (set_attr "prefix" "evex")
11199 (set_attr "mode" "TI")])
11200
11201 (define_insn "*avx512f_<code>v8div16qi2_mask_1"
11202 [(set (match_operand:V16QI 0 "register_operand" "=v")
11203 (vec_concat:V16QI
11204 (vec_merge:V8QI
11205 (any_truncate:V8QI
11206 (match_operand:V8DI 1 "register_operand" "v"))
11207 (const_vector:V8QI [(const_int 0) (const_int 0)
11208 (const_int 0) (const_int 0)
11209 (const_int 0) (const_int 0)
11210 (const_int 0) (const_int 0)])
11211 (match_operand:QI 2 "register_operand" "Yk"))
11212 (const_vector:V8QI [(const_int 0) (const_int 0)
11213 (const_int 0) (const_int 0)
11214 (const_int 0) (const_int 0)
11215 (const_int 0) (const_int 0)])))]
11216 "TARGET_AVX512F"
11217 "vpmov<trunsuffix>qb\t{%1, %0%{%2%}%{z%}|%0%{%2%}%{z%}, %1}"
11218 [(set_attr "type" "ssemov")
11219 (set_attr "prefix" "evex")
11220 (set_attr "mode" "TI")])
11221
11222 (define_insn "avx512f_<code>v8div16qi2_mask_store"
11223 [(set (match_operand:V16QI 0 "memory_operand" "=m")
11224 (vec_concat:V16QI
11225 (vec_merge:V8QI
11226 (any_truncate:V8QI
11227 (match_operand:V8DI 1 "register_operand" "v"))
11228 (vec_select:V8QI
11229 (match_dup 0)
11230 (parallel [(const_int 0) (const_int 1)
11231 (const_int 2) (const_int 3)
11232 (const_int 4) (const_int 5)
11233 (const_int 6) (const_int 7)]))
11234 (match_operand:QI 2 "register_operand" "Yk"))
11235 (vec_select:V8QI
11236 (match_dup 0)
11237 (parallel [(const_int 8) (const_int 9)
11238 (const_int 10) (const_int 11)
11239 (const_int 12) (const_int 13)
11240 (const_int 14) (const_int 15)]))))]
11241 "TARGET_AVX512F"
11242 "vpmov<trunsuffix>qb\t{%1, %0%{%2%}|%q0%{%2%}, %1}"
11243 [(set_attr "type" "ssemov")
11244 (set_attr "memory" "store")
11245 (set_attr "prefix" "evex")
11246 (set_attr "mode" "TI")])
11247
11248 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
11249 ;;
11250 ;; Parallel integral arithmetic
11251 ;;
11252 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
11253
11254 (define_expand "neg<mode>2"
11255 [(set (match_operand:VI_AVX2 0 "register_operand")
11256 (minus:VI_AVX2
11257 (match_dup 2)
11258 (match_operand:VI_AVX2 1 "vector_operand")))]
11259 "TARGET_SSE2"
11260 "operands[2] = force_reg (<MODE>mode, CONST0_RTX (<MODE>mode));")
11261
11262 (define_expand "<plusminus_insn><mode>3"
11263 [(set (match_operand:VI_AVX2 0 "register_operand")
11264 (plusminus:VI_AVX2
11265 (match_operand:VI_AVX2 1 "vector_operand")
11266 (match_operand:VI_AVX2 2 "vector_operand")))]
11267 "TARGET_SSE2"
11268 "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
11269
11270 (define_expand "<plusminus_insn><mode>3_mask"
11271 [(set (match_operand:VI48_AVX512VL 0 "register_operand")
11272 (vec_merge:VI48_AVX512VL
11273 (plusminus:VI48_AVX512VL
11274 (match_operand:VI48_AVX512VL 1 "nonimmediate_operand")
11275 (match_operand:VI48_AVX512VL 2 "nonimmediate_operand"))
11276 (match_operand:VI48_AVX512VL 3 "nonimm_or_0_operand")
11277 (match_operand:<avx512fmaskmode> 4 "register_operand")))]
11278 "TARGET_AVX512F"
11279 "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
11280
11281 (define_expand "<plusminus_insn><mode>3_mask"
11282 [(set (match_operand:VI12_AVX512VL 0 "register_operand")
11283 (vec_merge:VI12_AVX512VL
11284 (plusminus:VI12_AVX512VL
11285 (match_operand:VI12_AVX512VL 1 "nonimmediate_operand")
11286 (match_operand:VI12_AVX512VL 2 "nonimmediate_operand"))
11287 (match_operand:VI12_AVX512VL 3 "nonimm_or_0_operand")
11288 (match_operand:<avx512fmaskmode> 4 "register_operand")))]
11289 "TARGET_AVX512BW"
11290 "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
11291
11292 (define_insn "*<plusminus_insn><mode>3"
11293 [(set (match_operand:VI_AVX2 0 "register_operand" "=x,v")
11294 (plusminus:VI_AVX2
11295 (match_operand:VI_AVX2 1 "vector_operand" "<comm>0,v")
11296 (match_operand:VI_AVX2 2 "vector_operand" "xBm,vm")))]
11297 "TARGET_SSE2 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
11298 "@
11299 p<plusminus_mnemonic><ssemodesuffix>\t{%2, %0|%0, %2}
11300 vp<plusminus_mnemonic><ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
11301 [(set_attr "isa" "noavx,avx")
11302 (set_attr "type" "sseiadd")
11303 (set_attr "prefix_data16" "1,*")
11304 (set_attr "prefix" "orig,vex")
11305 (set_attr "mode" "<sseinsnmode>")])
11306
11307 (define_insn "*sub<mode>3_bcst"
11308 [(set (match_operand:VI48_AVX512VL 0 "register_operand" "=v")
11309 (minus:VI48_AVX512VL
11310 (match_operand:VI48_AVX512VL 1 "register_operand" "v")
11311 (vec_duplicate:VI48_AVX512VL
11312 (match_operand:<ssescalarmode> 2 "memory_operand" "m"))))]
11313 "TARGET_AVX512F && ix86_binary_operator_ok (MINUS, <MODE>mode, operands)"
11314 "vpsub<ssemodesuffix>\t{%2<avx512bcst>, %1, %0|%0, %1, %2<avx512bcst>}"
11315 [(set_attr "type" "sseiadd")
11316 (set_attr "prefix" "evex")
11317 (set_attr "mode" "<sseinsnmode>")])
11318
11319 (define_insn "*add<mode>3_bcst"
11320 [(set (match_operand:VI48_AVX512VL 0 "register_operand" "=v")
11321 (plus:VI48_AVX512VL
11322 (vec_duplicate:VI48_AVX512VL
11323 (match_operand:<ssescalarmode> 1 "memory_operand" "m"))
11324 (match_operand:VI48_AVX512VL 2 "register_operand" "v")))]
11325 "TARGET_AVX512F && ix86_binary_operator_ok (PLUS, <MODE>mode, operands)"
11326 "vpadd<ssemodesuffix>\t{%1<avx512bcst>, %2, %0|%0, %2, %1<avx512bcst>}"
11327 [(set_attr "type" "sseiadd")
11328 (set_attr "prefix" "evex")
11329 (set_attr "mode" "<sseinsnmode>")])
11330
11331 (define_insn "*<plusminus_insn><mode>3_mask"
11332 [(set (match_operand:VI48_AVX512VL 0 "register_operand" "=v")
11333 (vec_merge:VI48_AVX512VL
11334 (plusminus:VI48_AVX512VL
11335 (match_operand:VI48_AVX512VL 1 "nonimmediate_operand" "<comm>v")
11336 (match_operand:VI48_AVX512VL 2 "nonimmediate_operand" "vm"))
11337 (match_operand:VI48_AVX512VL 3 "nonimm_or_0_operand" "0C")
11338 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")))]
11339 "TARGET_AVX512F && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
11340 "vp<plusminus_mnemonic><ssemodesuffix>\t{%2, %1, %0%{%4%}%N3|%0%{%4%}%N3, %1, %2}"
11341 [(set_attr "type" "sseiadd")
11342 (set_attr "prefix" "evex")
11343 (set_attr "mode" "<sseinsnmode>")])
11344
11345 (define_insn "*<plusminus_insn><mode>3_mask"
11346 [(set (match_operand:VI12_AVX512VL 0 "register_operand" "=v")
11347 (vec_merge:VI12_AVX512VL
11348 (plusminus:VI12_AVX512VL
11349 (match_operand:VI12_AVX512VL 1 "nonimmediate_operand" "<comm>v")
11350 (match_operand:VI12_AVX512VL 2 "nonimmediate_operand" "vm"))
11351 (match_operand:VI12_AVX512VL 3 "nonimm_or_0_operand" "0C")
11352 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")))]
11353 "TARGET_AVX512BW && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
11354 "vp<plusminus_mnemonic><ssemodesuffix>\t{%2, %1, %0%{%4%}%N3|%0%{%4%}%N3, %1, %2}"
11355 [(set_attr "type" "sseiadd")
11356 (set_attr "prefix" "evex")
11357 (set_attr "mode" "<sseinsnmode>")])
11358
11359 (define_expand "<sse2_avx2>_<plusminus_insn><mode>3<mask_name>"
11360 [(set (match_operand:VI12_AVX2 0 "register_operand")
11361 (sat_plusminus:VI12_AVX2
11362 (match_operand:VI12_AVX2 1 "vector_operand")
11363 (match_operand:VI12_AVX2 2 "vector_operand")))]
11364 "TARGET_SSE2 && <mask_mode512bit_condition> && <mask_avx512bw_condition>"
11365 "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
11366
11367 (define_insn "*<sse2_avx2>_<plusminus_insn><mode>3<mask_name>"
11368 [(set (match_operand:VI12_AVX2 0 "register_operand" "=x,v")
11369 (sat_plusminus:VI12_AVX2
11370 (match_operand:VI12_AVX2 1 "vector_operand" "<comm>0,v")
11371 (match_operand:VI12_AVX2 2 "vector_operand" "xBm,vm")))]
11372 "TARGET_SSE2 && <mask_mode512bit_condition> && <mask_avx512bw_condition>
11373 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
11374 "@
11375 p<plusminus_mnemonic><ssemodesuffix>\t{%2, %0|%0, %2}
11376 vp<plusminus_mnemonic><ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
11377 [(set_attr "isa" "noavx,avx")
11378 (set_attr "type" "sseiadd")
11379 (set_attr "prefix_data16" "1,*")
11380 (set_attr "prefix" "orig,maybe_evex")
11381 (set_attr "mode" "TI")])
11382
11383 (define_expand "mul<mode>3<mask_name>"
11384 [(set (match_operand:VI1_AVX512 0 "register_operand")
11385 (mult:VI1_AVX512 (match_operand:VI1_AVX512 1 "register_operand")
11386 (match_operand:VI1_AVX512 2 "register_operand")))]
11387 "TARGET_SSE2 && <mask_mode512bit_condition> && <mask_avx512bw_condition>"
11388 {
11389 ix86_expand_vecop_qihi (MULT, operands[0], operands[1], operands[2]);
11390 DONE;
11391 })
11392
11393 (define_expand "mul<mode>3<mask_name>"
11394 [(set (match_operand:VI2_AVX2 0 "register_operand")
11395 (mult:VI2_AVX2 (match_operand:VI2_AVX2 1 "vector_operand")
11396 (match_operand:VI2_AVX2 2 "vector_operand")))]
11397 "TARGET_SSE2 && <mask_mode512bit_condition> && <mask_avx512bw_condition>"
11398 "ix86_fixup_binary_operands_no_copy (MULT, <MODE>mode, operands);")
11399
11400 (define_insn "*mul<mode>3<mask_name>"
11401 [(set (match_operand:VI2_AVX2 0 "register_operand" "=x,v")
11402 (mult:VI2_AVX2 (match_operand:VI2_AVX2 1 "vector_operand" "%0,v")
11403 (match_operand:VI2_AVX2 2 "vector_operand" "xBm,vm")))]
11404 "TARGET_SSE2 && !(MEM_P (operands[1]) && MEM_P (operands[2]))
11405 && <mask_mode512bit_condition> && <mask_avx512bw_condition>"
11406 "@
11407 pmullw\t{%2, %0|%0, %2}
11408 vpmullw\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
11409 [(set_attr "isa" "noavx,avx")
11410 (set_attr "type" "sseimul")
11411 (set_attr "prefix_data16" "1,*")
11412 (set_attr "prefix" "orig,vex")
11413 (set_attr "mode" "<sseinsnmode>")])
11414
11415 (define_expand "<s>mul<mode>3_highpart<mask_name>"
11416 [(set (match_operand:VI2_AVX2 0 "register_operand")
11417 (truncate:VI2_AVX2
11418 (lshiftrt:<ssedoublemode>
11419 (mult:<ssedoublemode>
11420 (any_extend:<ssedoublemode>
11421 (match_operand:VI2_AVX2 1 "vector_operand"))
11422 (any_extend:<ssedoublemode>
11423 (match_operand:VI2_AVX2 2 "vector_operand")))
11424 (const_int 16))))]
11425 "TARGET_SSE2
11426 && <mask_mode512bit_condition> && <mask_avx512bw_condition>"
11427 "ix86_fixup_binary_operands_no_copy (MULT, <MODE>mode, operands);")
11428
11429 (define_insn "*<s>mul<mode>3_highpart<mask_name>"
11430 [(set (match_operand:VI2_AVX2 0 "register_operand" "=x,v")
11431 (truncate:VI2_AVX2
11432 (lshiftrt:<ssedoublemode>
11433 (mult:<ssedoublemode>
11434 (any_extend:<ssedoublemode>
11435 (match_operand:VI2_AVX2 1 "vector_operand" "%0,v"))
11436 (any_extend:<ssedoublemode>
11437 (match_operand:VI2_AVX2 2 "vector_operand" "xBm,vm")))
11438 (const_int 16))))]
11439 "TARGET_SSE2 && !(MEM_P (operands[1]) && MEM_P (operands[2]))
11440 && <mask_mode512bit_condition> && <mask_avx512bw_condition>"
11441 "@
11442 pmulh<u>w\t{%2, %0|%0, %2}
11443 vpmulh<u>w\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
11444 [(set_attr "isa" "noavx,avx")
11445 (set_attr "type" "sseimul")
11446 (set_attr "prefix_data16" "1,*")
11447 (set_attr "prefix" "orig,vex")
11448 (set_attr "mode" "<sseinsnmode>")])
11449
11450 (define_expand "vec_widen_umult_even_v16si<mask_name>"
11451 [(set (match_operand:V8DI 0 "register_operand")
11452 (mult:V8DI
11453 (zero_extend:V8DI
11454 (vec_select:V8SI
11455 (match_operand:V16SI 1 "nonimmediate_operand")
11456 (parallel [(const_int 0) (const_int 2)
11457 (const_int 4) (const_int 6)
11458 (const_int 8) (const_int 10)
11459 (const_int 12) (const_int 14)])))
11460 (zero_extend:V8DI
11461 (vec_select:V8SI
11462 (match_operand:V16SI 2 "nonimmediate_operand")
11463 (parallel [(const_int 0) (const_int 2)
11464 (const_int 4) (const_int 6)
11465 (const_int 8) (const_int 10)
11466 (const_int 12) (const_int 14)])))))]
11467 "TARGET_AVX512F"
11468 "ix86_fixup_binary_operands_no_copy (MULT, V16SImode, operands);")
11469
11470 (define_insn "*vec_widen_umult_even_v16si<mask_name>"
11471 [(set (match_operand:V8DI 0 "register_operand" "=v")
11472 (mult:V8DI
11473 (zero_extend:V8DI
11474 (vec_select:V8SI
11475 (match_operand:V16SI 1 "nonimmediate_operand" "%v")
11476 (parallel [(const_int 0) (const_int 2)
11477 (const_int 4) (const_int 6)
11478 (const_int 8) (const_int 10)
11479 (const_int 12) (const_int 14)])))
11480 (zero_extend:V8DI
11481 (vec_select:V8SI
11482 (match_operand:V16SI 2 "nonimmediate_operand" "vm")
11483 (parallel [(const_int 0) (const_int 2)
11484 (const_int 4) (const_int 6)
11485 (const_int 8) (const_int 10)
11486 (const_int 12) (const_int 14)])))))]
11487 "TARGET_AVX512F && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
11488 "vpmuludq\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
11489 [(set_attr "type" "sseimul")
11490 (set_attr "prefix_extra" "1")
11491 (set_attr "prefix" "evex")
11492 (set_attr "mode" "XI")])
11493
11494 (define_expand "vec_widen_umult_even_v8si<mask_name>"
11495 [(set (match_operand:V4DI 0 "register_operand")
11496 (mult:V4DI
11497 (zero_extend:V4DI
11498 (vec_select:V4SI
11499 (match_operand:V8SI 1 "nonimmediate_operand")
11500 (parallel [(const_int 0) (const_int 2)
11501 (const_int 4) (const_int 6)])))
11502 (zero_extend:V4DI
11503 (vec_select:V4SI
11504 (match_operand:V8SI 2 "nonimmediate_operand")
11505 (parallel [(const_int 0) (const_int 2)
11506 (const_int 4) (const_int 6)])))))]
11507 "TARGET_AVX2 && <mask_avx512vl_condition>"
11508 "ix86_fixup_binary_operands_no_copy (MULT, V8SImode, operands);")
11509
11510 (define_insn "*vec_widen_umult_even_v8si<mask_name>"
11511 [(set (match_operand:V4DI 0 "register_operand" "=v")
11512 (mult:V4DI
11513 (zero_extend:V4DI
11514 (vec_select:V4SI
11515 (match_operand:V8SI 1 "nonimmediate_operand" "%v")
11516 (parallel [(const_int 0) (const_int 2)
11517 (const_int 4) (const_int 6)])))
11518 (zero_extend:V4DI
11519 (vec_select:V4SI
11520 (match_operand:V8SI 2 "nonimmediate_operand" "vm")
11521 (parallel [(const_int 0) (const_int 2)
11522 (const_int 4) (const_int 6)])))))]
11523 "TARGET_AVX2 && <mask_avx512vl_condition>
11524 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
11525 "vpmuludq\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
11526 [(set_attr "type" "sseimul")
11527 (set_attr "prefix" "maybe_evex")
11528 (set_attr "mode" "OI")])
11529
11530 (define_expand "vec_widen_umult_even_v4si<mask_name>"
11531 [(set (match_operand:V2DI 0 "register_operand")
11532 (mult:V2DI
11533 (zero_extend:V2DI
11534 (vec_select:V2SI
11535 (match_operand:V4SI 1 "vector_operand")
11536 (parallel [(const_int 0) (const_int 2)])))
11537 (zero_extend:V2DI
11538 (vec_select:V2SI
11539 (match_operand:V4SI 2 "vector_operand")
11540 (parallel [(const_int 0) (const_int 2)])))))]
11541 "TARGET_SSE2 && <mask_avx512vl_condition>"
11542 "ix86_fixup_binary_operands_no_copy (MULT, V4SImode, operands);")
11543
11544 (define_insn "*vec_widen_umult_even_v4si<mask_name>"
11545 [(set (match_operand:V2DI 0 "register_operand" "=x,v")
11546 (mult:V2DI
11547 (zero_extend:V2DI
11548 (vec_select:V2SI
11549 (match_operand:V4SI 1 "vector_operand" "%0,v")
11550 (parallel [(const_int 0) (const_int 2)])))
11551 (zero_extend:V2DI
11552 (vec_select:V2SI
11553 (match_operand:V4SI 2 "vector_operand" "xBm,vm")
11554 (parallel [(const_int 0) (const_int 2)])))))]
11555 "TARGET_SSE2 && <mask_avx512vl_condition>
11556 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
11557 "@
11558 pmuludq\t{%2, %0|%0, %2}
11559 vpmuludq\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
11560 [(set_attr "isa" "noavx,avx")
11561 (set_attr "type" "sseimul")
11562 (set_attr "prefix_data16" "1,*")
11563 (set_attr "prefix" "orig,maybe_evex")
11564 (set_attr "mode" "TI")])
11565
11566 (define_expand "vec_widen_smult_even_v16si<mask_name>"
11567 [(set (match_operand:V8DI 0 "register_operand")
11568 (mult:V8DI
11569 (sign_extend:V8DI
11570 (vec_select:V8SI
11571 (match_operand:V16SI 1 "nonimmediate_operand")
11572 (parallel [(const_int 0) (const_int 2)
11573 (const_int 4) (const_int 6)
11574 (const_int 8) (const_int 10)
11575 (const_int 12) (const_int 14)])))
11576 (sign_extend:V8DI
11577 (vec_select:V8SI
11578 (match_operand:V16SI 2 "nonimmediate_operand")
11579 (parallel [(const_int 0) (const_int 2)
11580 (const_int 4) (const_int 6)
11581 (const_int 8) (const_int 10)
11582 (const_int 12) (const_int 14)])))))]
11583 "TARGET_AVX512F"
11584 "ix86_fixup_binary_operands_no_copy (MULT, V16SImode, operands);")
11585
11586 (define_insn "*vec_widen_smult_even_v16si<mask_name>"
11587 [(set (match_operand:V8DI 0 "register_operand" "=v")
11588 (mult:V8DI
11589 (sign_extend:V8DI
11590 (vec_select:V8SI
11591 (match_operand:V16SI 1 "nonimmediate_operand" "%v")
11592 (parallel [(const_int 0) (const_int 2)
11593 (const_int 4) (const_int 6)
11594 (const_int 8) (const_int 10)
11595 (const_int 12) (const_int 14)])))
11596 (sign_extend:V8DI
11597 (vec_select:V8SI
11598 (match_operand:V16SI 2 "nonimmediate_operand" "vm")
11599 (parallel [(const_int 0) (const_int 2)
11600 (const_int 4) (const_int 6)
11601 (const_int 8) (const_int 10)
11602 (const_int 12) (const_int 14)])))))]
11603 "TARGET_AVX512F && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
11604 "vpmuldq\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
11605 [(set_attr "type" "sseimul")
11606 (set_attr "prefix_extra" "1")
11607 (set_attr "prefix" "evex")
11608 (set_attr "mode" "XI")])
11609
11610 (define_expand "vec_widen_smult_even_v8si<mask_name>"
11611 [(set (match_operand:V4DI 0 "register_operand")
11612 (mult:V4DI
11613 (sign_extend:V4DI
11614 (vec_select:V4SI
11615 (match_operand:V8SI 1 "nonimmediate_operand")
11616 (parallel [(const_int 0) (const_int 2)
11617 (const_int 4) (const_int 6)])))
11618 (sign_extend:V4DI
11619 (vec_select:V4SI
11620 (match_operand:V8SI 2 "nonimmediate_operand")
11621 (parallel [(const_int 0) (const_int 2)
11622 (const_int 4) (const_int 6)])))))]
11623 "TARGET_AVX2 && <mask_avx512vl_condition>"
11624 "ix86_fixup_binary_operands_no_copy (MULT, V8SImode, operands);")
11625
11626 (define_insn "*vec_widen_smult_even_v8si<mask_name>"
11627 [(set (match_operand:V4DI 0 "register_operand" "=v")
11628 (mult:V4DI
11629 (sign_extend:V4DI
11630 (vec_select:V4SI
11631 (match_operand:V8SI 1 "nonimmediate_operand" "%v")
11632 (parallel [(const_int 0) (const_int 2)
11633 (const_int 4) (const_int 6)])))
11634 (sign_extend:V4DI
11635 (vec_select:V4SI
11636 (match_operand:V8SI 2 "nonimmediate_operand" "vm")
11637 (parallel [(const_int 0) (const_int 2)
11638 (const_int 4) (const_int 6)])))))]
11639 "TARGET_AVX2 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
11640 "vpmuldq\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
11641 [(set_attr "type" "sseimul")
11642 (set_attr "prefix_extra" "1")
11643 (set_attr "prefix" "vex")
11644 (set_attr "mode" "OI")])
11645
11646 (define_expand "sse4_1_mulv2siv2di3<mask_name>"
11647 [(set (match_operand:V2DI 0 "register_operand")
11648 (mult:V2DI
11649 (sign_extend:V2DI
11650 (vec_select:V2SI
11651 (match_operand:V4SI 1 "vector_operand")
11652 (parallel [(const_int 0) (const_int 2)])))
11653 (sign_extend:V2DI
11654 (vec_select:V2SI
11655 (match_operand:V4SI 2 "vector_operand")
11656 (parallel [(const_int 0) (const_int 2)])))))]
11657 "TARGET_SSE4_1 && <mask_avx512vl_condition>"
11658 "ix86_fixup_binary_operands_no_copy (MULT, V4SImode, operands);")
11659
11660 (define_insn "*sse4_1_mulv2siv2di3<mask_name>"
11661 [(set (match_operand:V2DI 0 "register_operand" "=Yr,*x,v")
11662 (mult:V2DI
11663 (sign_extend:V2DI
11664 (vec_select:V2SI
11665 (match_operand:V4SI 1 "vector_operand" "%0,0,v")
11666 (parallel [(const_int 0) (const_int 2)])))
11667 (sign_extend:V2DI
11668 (vec_select:V2SI
11669 (match_operand:V4SI 2 "vector_operand" "YrBm,*xBm,vm")
11670 (parallel [(const_int 0) (const_int 2)])))))]
11671 "TARGET_SSE4_1 && <mask_avx512vl_condition>
11672 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
11673 "@
11674 pmuldq\t{%2, %0|%0, %2}
11675 pmuldq\t{%2, %0|%0, %2}
11676 vpmuldq\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
11677 [(set_attr "isa" "noavx,noavx,avx")
11678 (set_attr "type" "sseimul")
11679 (set_attr "prefix_data16" "1,1,*")
11680 (set_attr "prefix_extra" "1")
11681 (set_attr "prefix" "orig,orig,vex")
11682 (set_attr "mode" "TI")])
11683
11684 (define_insn "avx512bw_pmaddwd512<mode><mask_name>"
11685 [(set (match_operand:<sseunpackmode> 0 "register_operand" "=v")
11686 (unspec:<sseunpackmode>
11687 [(match_operand:VI2_AVX2 1 "register_operand" "v")
11688 (match_operand:VI2_AVX2 2 "nonimmediate_operand" "vm")]
11689 UNSPEC_PMADDWD512))]
11690 "TARGET_AVX512BW && <mask_mode512bit_condition>"
11691 "vpmaddwd\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}";
11692 [(set_attr "type" "sseiadd")
11693 (set_attr "prefix" "evex")
11694 (set_attr "mode" "XI")])
11695
11696 (define_expand "avx2_pmaddwd"
11697 [(set (match_operand:V8SI 0 "register_operand")
11698 (plus:V8SI
11699 (mult:V8SI
11700 (sign_extend:V8SI
11701 (vec_select:V8HI
11702 (match_operand:V16HI 1 "nonimmediate_operand")
11703 (parallel [(const_int 0) (const_int 2)
11704 (const_int 4) (const_int 6)
11705 (const_int 8) (const_int 10)
11706 (const_int 12) (const_int 14)])))
11707 (sign_extend:V8SI
11708 (vec_select:V8HI
11709 (match_operand:V16HI 2 "nonimmediate_operand")
11710 (parallel [(const_int 0) (const_int 2)
11711 (const_int 4) (const_int 6)
11712 (const_int 8) (const_int 10)
11713 (const_int 12) (const_int 14)]))))
11714 (mult:V8SI
11715 (sign_extend:V8SI
11716 (vec_select:V8HI (match_dup 1)
11717 (parallel [(const_int 1) (const_int 3)
11718 (const_int 5) (const_int 7)
11719 (const_int 9) (const_int 11)
11720 (const_int 13) (const_int 15)])))
11721 (sign_extend:V8SI
11722 (vec_select:V8HI (match_dup 2)
11723 (parallel [(const_int 1) (const_int 3)
11724 (const_int 5) (const_int 7)
11725 (const_int 9) (const_int 11)
11726 (const_int 13) (const_int 15)]))))))]
11727 "TARGET_AVX2"
11728 "ix86_fixup_binary_operands_no_copy (MULT, V16HImode, operands);")
11729
11730 (define_insn "*avx2_pmaddwd"
11731 [(set (match_operand:V8SI 0 "register_operand" "=x,v")
11732 (plus:V8SI
11733 (mult:V8SI
11734 (sign_extend:V8SI
11735 (vec_select:V8HI
11736 (match_operand:V16HI 1 "nonimmediate_operand" "%x,v")
11737 (parallel [(const_int 0) (const_int 2)
11738 (const_int 4) (const_int 6)
11739 (const_int 8) (const_int 10)
11740 (const_int 12) (const_int 14)])))
11741 (sign_extend:V8SI
11742 (vec_select:V8HI
11743 (match_operand:V16HI 2 "nonimmediate_operand" "xm,vm")
11744 (parallel [(const_int 0) (const_int 2)
11745 (const_int 4) (const_int 6)
11746 (const_int 8) (const_int 10)
11747 (const_int 12) (const_int 14)]))))
11748 (mult:V8SI
11749 (sign_extend:V8SI
11750 (vec_select:V8HI (match_dup 1)
11751 (parallel [(const_int 1) (const_int 3)
11752 (const_int 5) (const_int 7)
11753 (const_int 9) (const_int 11)
11754 (const_int 13) (const_int 15)])))
11755 (sign_extend:V8SI
11756 (vec_select:V8HI (match_dup 2)
11757 (parallel [(const_int 1) (const_int 3)
11758 (const_int 5) (const_int 7)
11759 (const_int 9) (const_int 11)
11760 (const_int 13) (const_int 15)]))))))]
11761 "TARGET_AVX2 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
11762 "vpmaddwd\t{%2, %1, %0|%0, %1, %2}"
11763 [(set_attr "type" "sseiadd")
11764 (set_attr "isa" "*,avx512bw")
11765 (set_attr "prefix" "vex,evex")
11766 (set_attr "mode" "OI")])
11767
11768 (define_expand "sse2_pmaddwd"
11769 [(set (match_operand:V4SI 0 "register_operand")
11770 (plus:V4SI
11771 (mult:V4SI
11772 (sign_extend:V4SI
11773 (vec_select:V4HI
11774 (match_operand:V8HI 1 "vector_operand")
11775 (parallel [(const_int 0) (const_int 2)
11776 (const_int 4) (const_int 6)])))
11777 (sign_extend:V4SI
11778 (vec_select:V4HI
11779 (match_operand:V8HI 2 "vector_operand")
11780 (parallel [(const_int 0) (const_int 2)
11781 (const_int 4) (const_int 6)]))))
11782 (mult:V4SI
11783 (sign_extend:V4SI
11784 (vec_select:V4HI (match_dup 1)
11785 (parallel [(const_int 1) (const_int 3)
11786 (const_int 5) (const_int 7)])))
11787 (sign_extend:V4SI
11788 (vec_select:V4HI (match_dup 2)
11789 (parallel [(const_int 1) (const_int 3)
11790 (const_int 5) (const_int 7)]))))))]
11791 "TARGET_SSE2"
11792 "ix86_fixup_binary_operands_no_copy (MULT, V8HImode, operands);")
11793
11794 (define_insn "*sse2_pmaddwd"
11795 [(set (match_operand:V4SI 0 "register_operand" "=x,x,v")
11796 (plus:V4SI
11797 (mult:V4SI
11798 (sign_extend:V4SI
11799 (vec_select:V4HI
11800 (match_operand:V8HI 1 "vector_operand" "%0,x,v")
11801 (parallel [(const_int 0) (const_int 2)
11802 (const_int 4) (const_int 6)])))
11803 (sign_extend:V4SI
11804 (vec_select:V4HI
11805 (match_operand:V8HI 2 "vector_operand" "xBm,xm,vm")
11806 (parallel [(const_int 0) (const_int 2)
11807 (const_int 4) (const_int 6)]))))
11808 (mult:V4SI
11809 (sign_extend:V4SI
11810 (vec_select:V4HI (match_dup 1)
11811 (parallel [(const_int 1) (const_int 3)
11812 (const_int 5) (const_int 7)])))
11813 (sign_extend:V4SI
11814 (vec_select:V4HI (match_dup 2)
11815 (parallel [(const_int 1) (const_int 3)
11816 (const_int 5) (const_int 7)]))))))]
11817 "TARGET_SSE2 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
11818 "@
11819 pmaddwd\t{%2, %0|%0, %2}
11820 vpmaddwd\t{%2, %1, %0|%0, %1, %2}
11821 vpmaddwd\t{%2, %1, %0|%0, %1, %2}"
11822 [(set_attr "isa" "noavx,avx,avx512bw")
11823 (set_attr "type" "sseiadd")
11824 (set_attr "atom_unit" "simul")
11825 (set_attr "prefix_data16" "1,*,*")
11826 (set_attr "prefix" "orig,vex,evex")
11827 (set_attr "mode" "TI")])
11828
11829 (define_insn "avx512dq_mul<mode>3<mask_name>"
11830 [(set (match_operand:VI8 0 "register_operand" "=v")
11831 (mult:VI8
11832 (match_operand:VI8 1 "register_operand" "v")
11833 (match_operand:VI8 2 "nonimmediate_operand" "vm")))]
11834 "TARGET_AVX512DQ && <mask_mode512bit_condition>"
11835 "vpmullq\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
11836 [(set_attr "type" "sseimul")
11837 (set_attr "prefix" "evex")
11838 (set_attr "mode" "<sseinsnmode>")])
11839
11840 (define_expand "mul<mode>3<mask_name>"
11841 [(set (match_operand:VI4_AVX512F 0 "register_operand")
11842 (mult:VI4_AVX512F
11843 (match_operand:VI4_AVX512F 1 "general_vector_operand")
11844 (match_operand:VI4_AVX512F 2 "general_vector_operand")))]
11845 "TARGET_SSE2 && <mask_mode512bit_condition>"
11846 {
11847 if (TARGET_SSE4_1)
11848 {
11849 if (!vector_operand (operands[1], <MODE>mode))
11850 operands[1] = force_reg (<MODE>mode, operands[1]);
11851 if (!vector_operand (operands[2], <MODE>mode))
11852 operands[2] = force_reg (<MODE>mode, operands[2]);
11853 ix86_fixup_binary_operands_no_copy (MULT, <MODE>mode, operands);
11854 }
11855 else
11856 {
11857 ix86_expand_sse2_mulv4si3 (operands[0], operands[1], operands[2]);
11858 DONE;
11859 }
11860 })
11861
11862 (define_insn "*<sse4_1_avx2>_mul<mode>3<mask_name>"
11863 [(set (match_operand:VI4_AVX512F 0 "register_operand" "=Yr,*x,v")
11864 (mult:VI4_AVX512F
11865 (match_operand:VI4_AVX512F 1 "vector_operand" "%0,0,v")
11866 (match_operand:VI4_AVX512F 2 "vector_operand" "YrBm,*xBm,vm")))]
11867 "TARGET_SSE4_1 && !(MEM_P (operands[1]) && MEM_P (operands[2]))
11868 && <mask_mode512bit_condition>"
11869 "@
11870 pmulld\t{%2, %0|%0, %2}
11871 pmulld\t{%2, %0|%0, %2}
11872 vpmulld\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
11873 [(set_attr "isa" "noavx,noavx,avx")
11874 (set_attr "type" "sseimul")
11875 (set_attr "prefix_extra" "1")
11876 (set_attr "prefix" "<mask_prefix4>")
11877 (set_attr "btver2_decode" "vector,vector,vector")
11878 (set_attr "mode" "<sseinsnmode>")])
11879
11880 (define_expand "mul<mode>3"
11881 [(set (match_operand:VI8_AVX2_AVX512F 0 "register_operand")
11882 (mult:VI8_AVX2_AVX512F
11883 (match_operand:VI8_AVX2_AVX512F 1 "register_operand")
11884 (match_operand:VI8_AVX2_AVX512F 2 "register_operand")))]
11885 "TARGET_SSE2"
11886 {
11887 ix86_expand_sse2_mulvxdi3 (operands[0], operands[1], operands[2]);
11888 DONE;
11889 })
11890
11891 (define_expand "vec_widen_<s>mult_hi_<mode>"
11892 [(match_operand:<sseunpackmode> 0 "register_operand")
11893 (any_extend:<sseunpackmode>
11894 (match_operand:VI124_AVX2 1 "register_operand"))
11895 (match_operand:VI124_AVX2 2 "register_operand")]
11896 "TARGET_SSE2"
11897 {
11898 ix86_expand_mul_widen_hilo (operands[0], operands[1], operands[2],
11899 <u_bool>, true);
11900 DONE;
11901 })
11902
11903 (define_expand "vec_widen_<s>mult_lo_<mode>"
11904 [(match_operand:<sseunpackmode> 0 "register_operand")
11905 (any_extend:<sseunpackmode>
11906 (match_operand:VI124_AVX2 1 "register_operand"))
11907 (match_operand:VI124_AVX2 2 "register_operand")]
11908 "TARGET_SSE2"
11909 {
11910 ix86_expand_mul_widen_hilo (operands[0], operands[1], operands[2],
11911 <u_bool>, false);
11912 DONE;
11913 })
11914
11915 ;; Most widen_<s>mult_even_<mode> can be handled directly from other
11916 ;; named patterns, but signed V4SI needs special help for plain SSE2.
11917 (define_expand "vec_widen_smult_even_v4si"
11918 [(match_operand:V2DI 0 "register_operand")
11919 (match_operand:V4SI 1 "vector_operand")
11920 (match_operand:V4SI 2 "vector_operand")]
11921 "TARGET_SSE2"
11922 {
11923 ix86_expand_mul_widen_evenodd (operands[0], operands[1], operands[2],
11924 false, false);
11925 DONE;
11926 })
11927
11928 (define_expand "vec_widen_<s>mult_odd_<mode>"
11929 [(match_operand:<sseunpackmode> 0 "register_operand")
11930 (any_extend:<sseunpackmode>
11931 (match_operand:VI4_AVX512F 1 "general_vector_operand"))
11932 (match_operand:VI4_AVX512F 2 "general_vector_operand")]
11933 "TARGET_SSE2"
11934 {
11935 ix86_expand_mul_widen_evenodd (operands[0], operands[1], operands[2],
11936 <u_bool>, true);
11937 DONE;
11938 })
11939
11940 (define_mode_attr SDOT_PMADD_SUF
11941 [(V32HI "512v32hi") (V16HI "") (V8HI "")])
11942
11943 (define_expand "sdot_prod<mode>"
11944 [(match_operand:<sseunpackmode> 0 "register_operand")
11945 (match_operand:VI2_AVX2 1 "register_operand")
11946 (match_operand:VI2_AVX2 2 "register_operand")
11947 (match_operand:<sseunpackmode> 3 "register_operand")]
11948 "TARGET_SSE2"
11949 {
11950 rtx t = gen_reg_rtx (<sseunpackmode>mode);
11951 emit_insn (gen_<sse2_avx2>_pmaddwd<SDOT_PMADD_SUF> (t, operands[1], operands[2]));
11952 emit_insn (gen_rtx_SET (operands[0],
11953 gen_rtx_PLUS (<sseunpackmode>mode,
11954 operands[3], t)));
11955 DONE;
11956 })
11957
11958 ;; Normally we use widen_mul_even/odd, but combine can't quite get it all
11959 ;; back together when madd is available.
11960 (define_expand "sdot_prodv4si"
11961 [(match_operand:V2DI 0 "register_operand")
11962 (match_operand:V4SI 1 "register_operand")
11963 (match_operand:V4SI 2 "register_operand")
11964 (match_operand:V2DI 3 "register_operand")]
11965 "TARGET_XOP"
11966 {
11967 rtx t = gen_reg_rtx (V2DImode);
11968 emit_insn (gen_xop_pmacsdqh (t, operands[1], operands[2], operands[3]));
11969 emit_insn (gen_xop_pmacsdql (operands[0], operands[1], operands[2], t));
11970 DONE;
11971 })
11972
11973 (define_expand "uavg<mode>3_ceil"
11974 [(set (match_operand:VI12_AVX2 0 "register_operand")
11975 (truncate:VI12_AVX2
11976 (lshiftrt:<ssedoublemode>
11977 (plus:<ssedoublemode>
11978 (plus:<ssedoublemode>
11979 (zero_extend:<ssedoublemode>
11980 (match_operand:VI12_AVX2 1 "vector_operand"))
11981 (zero_extend:<ssedoublemode>
11982 (match_operand:VI12_AVX2 2 "vector_operand")))
11983 (match_dup 3))
11984 (const_int 1))))]
11985 "TARGET_SSE2"
11986 {
11987 operands[3] = CONST1_RTX(<ssedoublemode>mode);
11988 ix86_fixup_binary_operands_no_copy (PLUS, <MODE>mode, operands);
11989 })
11990
11991 (define_expand "usadv16qi"
11992 [(match_operand:V4SI 0 "register_operand")
11993 (match_operand:V16QI 1 "register_operand")
11994 (match_operand:V16QI 2 "vector_operand")
11995 (match_operand:V4SI 3 "vector_operand")]
11996 "TARGET_SSE2"
11997 {
11998 rtx t1 = gen_reg_rtx (V2DImode);
11999 rtx t2 = gen_reg_rtx (V4SImode);
12000 emit_insn (gen_sse2_psadbw (t1, operands[1], operands[2]));
12001 convert_move (t2, t1, 0);
12002 emit_insn (gen_addv4si3 (operands[0], t2, operands[3]));
12003 DONE;
12004 })
12005
12006 (define_expand "usadv32qi"
12007 [(match_operand:V8SI 0 "register_operand")
12008 (match_operand:V32QI 1 "register_operand")
12009 (match_operand:V32QI 2 "nonimmediate_operand")
12010 (match_operand:V8SI 3 "nonimmediate_operand")]
12011 "TARGET_AVX2"
12012 {
12013 rtx t1 = gen_reg_rtx (V4DImode);
12014 rtx t2 = gen_reg_rtx (V8SImode);
12015 emit_insn (gen_avx2_psadbw (t1, operands[1], operands[2]));
12016 convert_move (t2, t1, 0);
12017 emit_insn (gen_addv8si3 (operands[0], t2, operands[3]));
12018 DONE;
12019 })
12020
12021 (define_expand "usadv64qi"
12022 [(match_operand:V16SI 0 "register_operand")
12023 (match_operand:V64QI 1 "register_operand")
12024 (match_operand:V64QI 2 "nonimmediate_operand")
12025 (match_operand:V16SI 3 "nonimmediate_operand")]
12026 "TARGET_AVX512BW"
12027 {
12028 rtx t1 = gen_reg_rtx (V8DImode);
12029 rtx t2 = gen_reg_rtx (V16SImode);
12030 emit_insn (gen_avx512f_psadbw (t1, operands[1], operands[2]));
12031 convert_move (t2, t1, 0);
12032 emit_insn (gen_addv16si3 (operands[0], t2, operands[3]));
12033 DONE;
12034 })
12035
12036 (define_insn "<mask_codefor>ashr<mode>3<mask_name>"
12037 [(set (match_operand:VI248_AVX512BW_1 0 "register_operand" "=v,v")
12038 (ashiftrt:VI248_AVX512BW_1
12039 (match_operand:VI248_AVX512BW_1 1 "nonimmediate_operand" "v,vm")
12040 (match_operand:DI 2 "nonmemory_operand" "v,N")))]
12041 "TARGET_AVX512VL"
12042 "vpsra<ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
12043 [(set_attr "type" "sseishft")
12044 (set (attr "length_immediate")
12045 (if_then_else (match_operand 2 "const_int_operand")
12046 (const_string "1")
12047 (const_string "0")))
12048 (set_attr "mode" "<sseinsnmode>")])
12049
12050 (define_insn "ashr<mode>3"
12051 [(set (match_operand:VI24_AVX2 0 "register_operand" "=x,x")
12052 (ashiftrt:VI24_AVX2
12053 (match_operand:VI24_AVX2 1 "register_operand" "0,x")
12054 (match_operand:DI 2 "nonmemory_operand" "xN,xN")))]
12055 "TARGET_SSE2"
12056 "@
12057 psra<ssemodesuffix>\t{%2, %0|%0, %2}
12058 vpsra<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
12059 [(set_attr "isa" "noavx,avx")
12060 (set_attr "type" "sseishft")
12061 (set (attr "length_immediate")
12062 (if_then_else (match_operand 2 "const_int_operand")
12063 (const_string "1")
12064 (const_string "0")))
12065 (set_attr "prefix_data16" "1,*")
12066 (set_attr "prefix" "orig,vex")
12067 (set_attr "mode" "<sseinsnmode>")])
12068
12069 (define_insn "ashr<mode>3<mask_name>"
12070 [(set (match_operand:VI248_AVX512BW_AVX512VL 0 "register_operand" "=v,v")
12071 (ashiftrt:VI248_AVX512BW_AVX512VL
12072 (match_operand:VI248_AVX512BW_AVX512VL 1 "nonimmediate_operand" "v,vm")
12073 (match_operand:DI 2 "nonmemory_operand" "v,N")))]
12074 "TARGET_AVX512F"
12075 "vpsra<ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
12076 [(set_attr "type" "sseishft")
12077 (set (attr "length_immediate")
12078 (if_then_else (match_operand 2 "const_int_operand")
12079 (const_string "1")
12080 (const_string "0")))
12081 (set_attr "mode" "<sseinsnmode>")])
12082
12083 (define_insn "<mask_codefor><shift_insn><mode>3<mask_name>"
12084 [(set (match_operand:VI248_AVX512BW_2 0 "register_operand" "=v,v")
12085 (any_lshift:VI248_AVX512BW_2
12086 (match_operand:VI248_AVX512BW_2 1 "nonimmediate_operand" "v,vm")
12087 (match_operand:DI 2 "nonmemory_operand" "v,N")))]
12088 "TARGET_AVX512VL"
12089 "vp<vshift><ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
12090 [(set_attr "type" "sseishft")
12091 (set (attr "length_immediate")
12092 (if_then_else (match_operand 2 "const_int_operand")
12093 (const_string "1")
12094 (const_string "0")))
12095 (set_attr "mode" "<sseinsnmode>")])
12096
12097 (define_insn "<shift_insn><mode>3"
12098 [(set (match_operand:VI248_AVX2 0 "register_operand" "=x,x")
12099 (any_lshift:VI248_AVX2
12100 (match_operand:VI248_AVX2 1 "register_operand" "0,x")
12101 (match_operand:DI 2 "nonmemory_operand" "xN,xN")))]
12102 "TARGET_SSE2"
12103 "@
12104 p<vshift><ssemodesuffix>\t{%2, %0|%0, %2}
12105 vp<vshift><ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
12106 [(set_attr "isa" "noavx,avx")
12107 (set_attr "type" "sseishft")
12108 (set (attr "length_immediate")
12109 (if_then_else (match_operand 2 "const_int_operand")
12110 (const_string "1")
12111 (const_string "0")))
12112 (set_attr "prefix_data16" "1,*")
12113 (set_attr "prefix" "orig,vex")
12114 (set_attr "mode" "<sseinsnmode>")])
12115
12116 (define_insn "<shift_insn><mode>3<mask_name>"
12117 [(set (match_operand:VI248_AVX512BW 0 "register_operand" "=v,v")
12118 (any_lshift:VI248_AVX512BW
12119 (match_operand:VI248_AVX512BW 1 "nonimmediate_operand" "v,m")
12120 (match_operand:DI 2 "nonmemory_operand" "vN,N")))]
12121 "TARGET_AVX512F"
12122 "vp<vshift><ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
12123 [(set_attr "type" "sseishft")
12124 (set (attr "length_immediate")
12125 (if_then_else (match_operand 2 "const_int_operand")
12126 (const_string "1")
12127 (const_string "0")))
12128 (set_attr "mode" "<sseinsnmode>")])
12129
12130
12131 (define_expand "vec_shl_<mode>"
12132 [(set (match_dup 3)
12133 (ashift:V1TI
12134 (match_operand:V_128 1 "register_operand")
12135 (match_operand:SI 2 "const_0_to_255_mul_8_operand")))
12136 (set (match_operand:V_128 0 "register_operand") (match_dup 4))]
12137 "TARGET_SSE2"
12138 {
12139 operands[1] = gen_lowpart (V1TImode, operands[1]);
12140 operands[3] = gen_reg_rtx (V1TImode);
12141 operands[4] = gen_lowpart (<MODE>mode, operands[3]);
12142 })
12143
12144 (define_expand "vec_shr_<mode>"
12145 [(set (match_dup 3)
12146 (lshiftrt:V1TI
12147 (match_operand:V_128 1 "register_operand")
12148 (match_operand:SI 2 "const_0_to_255_mul_8_operand")))
12149 (set (match_operand:V_128 0 "register_operand") (match_dup 4))]
12150 "TARGET_SSE2"
12151 {
12152 operands[1] = gen_lowpart (V1TImode, operands[1]);
12153 operands[3] = gen_reg_rtx (V1TImode);
12154 operands[4] = gen_lowpart (<MODE>mode, operands[3]);
12155 })
12156
12157 (define_insn "avx512bw_<shift_insn><mode>3"
12158 [(set (match_operand:VIMAX_AVX512VL 0 "register_operand" "=v")
12159 (any_lshift:VIMAX_AVX512VL
12160 (match_operand:VIMAX_AVX512VL 1 "nonimmediate_operand" "vm")
12161 (match_operand:SI 2 "const_0_to_255_mul_8_operand" "n")))]
12162 "TARGET_AVX512BW"
12163 {
12164 operands[2] = GEN_INT (INTVAL (operands[2]) / 8);
12165 return "vp<vshift>dq\t{%2, %1, %0|%0, %1, %2}";
12166 }
12167 [(set_attr "type" "sseishft")
12168 (set_attr "length_immediate" "1")
12169 (set_attr "prefix" "maybe_evex")
12170 (set_attr "mode" "<sseinsnmode>")])
12171
12172 (define_insn "<sse2_avx2>_<shift_insn><mode>3"
12173 [(set (match_operand:VIMAX_AVX2 0 "register_operand" "=x,v")
12174 (any_lshift:VIMAX_AVX2
12175 (match_operand:VIMAX_AVX2 1 "register_operand" "0,v")
12176 (match_operand:SI 2 "const_0_to_255_mul_8_operand" "n,n")))]
12177 "TARGET_SSE2"
12178 {
12179 operands[2] = GEN_INT (INTVAL (operands[2]) / 8);
12180
12181 switch (which_alternative)
12182 {
12183 case 0:
12184 return "p<vshift>dq\t{%2, %0|%0, %2}";
12185 case 1:
12186 return "vp<vshift>dq\t{%2, %1, %0|%0, %1, %2}";
12187 default:
12188 gcc_unreachable ();
12189 }
12190 }
12191 [(set_attr "isa" "noavx,avx")
12192 (set_attr "type" "sseishft")
12193 (set_attr "length_immediate" "1")
12194 (set_attr "atom_unit" "sishuf")
12195 (set_attr "prefix_data16" "1,*")
12196 (set_attr "prefix" "orig,vex")
12197 (set_attr "mode" "<sseinsnmode>")])
12198
12199 (define_insn "<avx512>_<rotate>v<mode><mask_name>"
12200 [(set (match_operand:VI48_AVX512VL 0 "register_operand" "=v")
12201 (any_rotate:VI48_AVX512VL
12202 (match_operand:VI48_AVX512VL 1 "register_operand" "v")
12203 (match_operand:VI48_AVX512VL 2 "nonimmediate_operand" "vm")))]
12204 "TARGET_AVX512F"
12205 "vp<rotate>v<ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
12206 [(set_attr "prefix" "evex")
12207 (set_attr "mode" "<sseinsnmode>")])
12208
12209 (define_insn "<avx512>_<rotate><mode><mask_name>"
12210 [(set (match_operand:VI48_AVX512VL 0 "register_operand" "=v")
12211 (any_rotate:VI48_AVX512VL
12212 (match_operand:VI48_AVX512VL 1 "nonimmediate_operand" "vm")
12213 (match_operand:SI 2 "const_0_to_255_operand")))]
12214 "TARGET_AVX512F"
12215 "vp<rotate><ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
12216 [(set_attr "prefix" "evex")
12217 (set_attr "mode" "<sseinsnmode>")])
12218
12219 (define_expand "<code><mode>3"
12220 [(set (match_operand:VI124_256_AVX512F_AVX512BW 0 "register_operand")
12221 (maxmin:VI124_256_AVX512F_AVX512BW
12222 (match_operand:VI124_256_AVX512F_AVX512BW 1 "nonimmediate_operand")
12223 (match_operand:VI124_256_AVX512F_AVX512BW 2 "nonimmediate_operand")))]
12224 "TARGET_AVX2"
12225 "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
12226
12227 (define_insn "*avx2_<code><mode>3"
12228 [(set (match_operand:VI124_256 0 "register_operand" "=v")
12229 (maxmin:VI124_256
12230 (match_operand:VI124_256 1 "nonimmediate_operand" "%v")
12231 (match_operand:VI124_256 2 "nonimmediate_operand" "vm")))]
12232 "TARGET_AVX2 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
12233 "vp<maxmin_int><ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
12234 [(set_attr "type" "sseiadd")
12235 (set_attr "prefix_extra" "1")
12236 (set_attr "prefix" "vex")
12237 (set_attr "mode" "OI")])
12238
12239 (define_expand "<code><mode>3_mask"
12240 [(set (match_operand:VI48_AVX512VL 0 "register_operand")
12241 (vec_merge:VI48_AVX512VL
12242 (maxmin:VI48_AVX512VL
12243 (match_operand:VI48_AVX512VL 1 "nonimmediate_operand")
12244 (match_operand:VI48_AVX512VL 2 "nonimmediate_operand"))
12245 (match_operand:VI48_AVX512VL 3 "nonimm_or_0_operand")
12246 (match_operand:<avx512fmaskmode> 4 "register_operand")))]
12247 "TARGET_AVX512F"
12248 "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
12249
12250 (define_insn "*avx512f_<code><mode>3<mask_name>"
12251 [(set (match_operand:VI48_AVX512VL 0 "register_operand" "=v")
12252 (maxmin:VI48_AVX512VL
12253 (match_operand:VI48_AVX512VL 1 "nonimmediate_operand" "%v")
12254 (match_operand:VI48_AVX512VL 2 "nonimmediate_operand" "vm")))]
12255 "TARGET_AVX512F && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
12256 "vp<maxmin_int><ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
12257 [(set_attr "type" "sseiadd")
12258 (set_attr "prefix_extra" "1")
12259 (set_attr "prefix" "maybe_evex")
12260 (set_attr "mode" "<sseinsnmode>")])
12261
12262 (define_insn "<mask_codefor><code><mode>3<mask_name>"
12263 [(set (match_operand:VI12_AVX512VL 0 "register_operand" "=v")
12264 (maxmin:VI12_AVX512VL
12265 (match_operand:VI12_AVX512VL 1 "register_operand" "v")
12266 (match_operand:VI12_AVX512VL 2 "nonimmediate_operand" "vm")))]
12267 "TARGET_AVX512BW"
12268 "vp<maxmin_int><ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
12269 [(set_attr "type" "sseiadd")
12270 (set_attr "prefix" "evex")
12271 (set_attr "mode" "<sseinsnmode>")])
12272
12273 (define_expand "<code><mode>3"
12274 [(set (match_operand:VI8_AVX2_AVX512F 0 "register_operand")
12275 (maxmin:VI8_AVX2_AVX512F
12276 (match_operand:VI8_AVX2_AVX512F 1 "register_operand")
12277 (match_operand:VI8_AVX2_AVX512F 2 "register_operand")))]
12278 "TARGET_SSE4_2"
12279 {
12280 if (TARGET_AVX512F
12281 && (<MODE>mode == V8DImode || TARGET_AVX512VL))
12282 ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);
12283 else
12284 {
12285 enum rtx_code code;
12286 rtx xops[6];
12287 bool ok;
12288
12289
12290 xops[0] = operands[0];
12291
12292 if (<CODE> == SMAX || <CODE> == UMAX)
12293 {
12294 xops[1] = operands[1];
12295 xops[2] = operands[2];
12296 }
12297 else
12298 {
12299 xops[1] = operands[2];
12300 xops[2] = operands[1];
12301 }
12302
12303 code = (<CODE> == UMAX || <CODE> == UMIN) ? GTU : GT;
12304
12305 xops[3] = gen_rtx_fmt_ee (code, VOIDmode, operands[1], operands[2]);
12306 xops[4] = operands[1];
12307 xops[5] = operands[2];
12308
12309 ok = ix86_expand_int_vcond (xops);
12310 gcc_assert (ok);
12311 DONE;
12312 }
12313 })
12314
12315 (define_expand "<code><mode>3"
12316 [(set (match_operand:VI124_128 0 "register_operand")
12317 (smaxmin:VI124_128
12318 (match_operand:VI124_128 1 "vector_operand")
12319 (match_operand:VI124_128 2 "vector_operand")))]
12320 "TARGET_SSE2"
12321 {
12322 if (TARGET_SSE4_1 || <MODE>mode == V8HImode)
12323 ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);
12324 else
12325 {
12326 rtx xops[6];
12327 bool ok;
12328
12329 xops[0] = operands[0];
12330 operands[1] = force_reg (<MODE>mode, operands[1]);
12331 operands[2] = force_reg (<MODE>mode, operands[2]);
12332
12333 if (<CODE> == SMAX)
12334 {
12335 xops[1] = operands[1];
12336 xops[2] = operands[2];
12337 }
12338 else
12339 {
12340 xops[1] = operands[2];
12341 xops[2] = operands[1];
12342 }
12343
12344 xops[3] = gen_rtx_GT (VOIDmode, operands[1], operands[2]);
12345 xops[4] = operands[1];
12346 xops[5] = operands[2];
12347
12348 ok = ix86_expand_int_vcond (xops);
12349 gcc_assert (ok);
12350 DONE;
12351 }
12352 })
12353
12354 (define_insn "*sse4_1_<code><mode>3<mask_name>"
12355 [(set (match_operand:VI14_128 0 "register_operand" "=Yr,*x,v")
12356 (smaxmin:VI14_128
12357 (match_operand:VI14_128 1 "vector_operand" "%0,0,v")
12358 (match_operand:VI14_128 2 "vector_operand" "YrBm,*xBm,vm")))]
12359 "TARGET_SSE4_1
12360 && <mask_mode512bit_condition>
12361 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
12362 "@
12363 p<maxmin_int><ssemodesuffix>\t{%2, %0|%0, %2}
12364 p<maxmin_int><ssemodesuffix>\t{%2, %0|%0, %2}
12365 vp<maxmin_int><ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
12366 [(set_attr "isa" "noavx,noavx,avx")
12367 (set_attr "type" "sseiadd")
12368 (set_attr "prefix_extra" "1,1,*")
12369 (set_attr "prefix" "orig,orig,vex")
12370 (set_attr "mode" "TI")])
12371
12372 (define_insn "*<code>v8hi3"
12373 [(set (match_operand:V8HI 0 "register_operand" "=x,x,v")
12374 (smaxmin:V8HI
12375 (match_operand:V8HI 1 "vector_operand" "%0,x,v")
12376 (match_operand:V8HI 2 "vector_operand" "xBm,xm,vm")))]
12377 "TARGET_SSE2 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
12378 "@
12379 p<maxmin_int>w\t{%2, %0|%0, %2}
12380 vp<maxmin_int>w\t{%2, %1, %0|%0, %1, %2}
12381 vp<maxmin_int>w\t{%2, %1, %0|%0, %1, %2}"
12382 [(set_attr "isa" "noavx,avx,avx512bw")
12383 (set_attr "type" "sseiadd")
12384 (set_attr "prefix_data16" "1,*,*")
12385 (set_attr "prefix_extra" "*,1,1")
12386 (set_attr "prefix" "orig,vex,evex")
12387 (set_attr "mode" "TI")])
12388
12389 (define_expand "<code><mode>3"
12390 [(set (match_operand:VI124_128 0 "register_operand")
12391 (umaxmin:VI124_128
12392 (match_operand:VI124_128 1 "vector_operand")
12393 (match_operand:VI124_128 2 "vector_operand")))]
12394 "TARGET_SSE2"
12395 {
12396 if (TARGET_SSE4_1 || <MODE>mode == V16QImode)
12397 ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);
12398 else if (<CODE> == UMAX && <MODE>mode == V8HImode)
12399 {
12400 rtx op0 = operands[0], op2 = operands[2], op3 = op0;
12401 operands[1] = force_reg (<MODE>mode, operands[1]);
12402 if (rtx_equal_p (op3, op2))
12403 op3 = gen_reg_rtx (V8HImode);
12404 emit_insn (gen_sse2_ussubv8hi3 (op3, operands[1], op2));
12405 emit_insn (gen_addv8hi3 (op0, op3, op2));
12406 DONE;
12407 }
12408 else
12409 {
12410 rtx xops[6];
12411 bool ok;
12412
12413 operands[1] = force_reg (<MODE>mode, operands[1]);
12414 operands[2] = force_reg (<MODE>mode, operands[2]);
12415
12416 xops[0] = operands[0];
12417
12418 if (<CODE> == UMAX)
12419 {
12420 xops[1] = operands[1];
12421 xops[2] = operands[2];
12422 }
12423 else
12424 {
12425 xops[1] = operands[2];
12426 xops[2] = operands[1];
12427 }
12428
12429 xops[3] = gen_rtx_GTU (VOIDmode, operands[1], operands[2]);
12430 xops[4] = operands[1];
12431 xops[5] = operands[2];
12432
12433 ok = ix86_expand_int_vcond (xops);
12434 gcc_assert (ok);
12435 DONE;
12436 }
12437 })
12438
12439 (define_insn "*sse4_1_<code><mode>3<mask_name>"
12440 [(set (match_operand:VI24_128 0 "register_operand" "=Yr,*x,v")
12441 (umaxmin:VI24_128
12442 (match_operand:VI24_128 1 "vector_operand" "%0,0,v")
12443 (match_operand:VI24_128 2 "vector_operand" "YrBm,*xBm,vm")))]
12444 "TARGET_SSE4_1
12445 && <mask_mode512bit_condition>
12446 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
12447 "@
12448 p<maxmin_int><ssemodesuffix>\t{%2, %0|%0, %2}
12449 p<maxmin_int><ssemodesuffix>\t{%2, %0|%0, %2}
12450 vp<maxmin_int><ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
12451 [(set_attr "isa" "noavx,noavx,avx")
12452 (set_attr "type" "sseiadd")
12453 (set_attr "prefix_extra" "1,1,*")
12454 (set_attr "prefix" "orig,orig,vex")
12455 (set_attr "mode" "TI")])
12456
12457 (define_insn "*<code>v16qi3"
12458 [(set (match_operand:V16QI 0 "register_operand" "=x,x,v")
12459 (umaxmin:V16QI
12460 (match_operand:V16QI 1 "vector_operand" "%0,x,v")
12461 (match_operand:V16QI 2 "vector_operand" "xBm,xm,vm")))]
12462 "TARGET_SSE2 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
12463 "@
12464 p<maxmin_int>b\t{%2, %0|%0, %2}
12465 vp<maxmin_int>b\t{%2, %1, %0|%0, %1, %2}
12466 vp<maxmin_int>b\t{%2, %1, %0|%0, %1, %2}"
12467 [(set_attr "isa" "noavx,avx,avx512bw")
12468 (set_attr "type" "sseiadd")
12469 (set_attr "prefix_data16" "1,*,*")
12470 (set_attr "prefix_extra" "*,1,1")
12471 (set_attr "prefix" "orig,vex,evex")
12472 (set_attr "mode" "TI")])
12473
12474 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
12475 ;;
12476 ;; Parallel integral comparisons
12477 ;;
12478 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
12479
12480 (define_expand "avx2_eq<mode>3"
12481 [(set (match_operand:VI_256 0 "register_operand")
12482 (eq:VI_256
12483 (match_operand:VI_256 1 "nonimmediate_operand")
12484 (match_operand:VI_256 2 "nonimmediate_operand")))]
12485 "TARGET_AVX2"
12486 "ix86_fixup_binary_operands_no_copy (EQ, <MODE>mode, operands);")
12487
12488 (define_insn "*avx2_eq<mode>3"
12489 [(set (match_operand:VI_256 0 "register_operand" "=x")
12490 (eq:VI_256
12491 (match_operand:VI_256 1 "nonimmediate_operand" "%x")
12492 (match_operand:VI_256 2 "nonimmediate_operand" "xm")))]
12493 "TARGET_AVX2 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
12494 "vpcmpeq<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
12495 [(set_attr "type" "ssecmp")
12496 (set_attr "prefix_extra" "1")
12497 (set_attr "prefix" "vex")
12498 (set_attr "mode" "OI")])
12499
12500 (define_expand "<avx512>_eq<mode>3<mask_scalar_merge_name>"
12501 [(set (match_operand:<avx512fmaskmode> 0 "register_operand")
12502 (unspec:<avx512fmaskmode>
12503 [(match_operand:VI12_AVX512VL 1 "nonimmediate_operand")
12504 (match_operand:VI12_AVX512VL 2 "nonimmediate_operand")]
12505 UNSPEC_MASKED_EQ))]
12506 "TARGET_AVX512BW"
12507 "ix86_fixup_binary_operands_no_copy (EQ, <MODE>mode, operands);")
12508
12509 (define_expand "<avx512>_eq<mode>3<mask_scalar_merge_name>"
12510 [(set (match_operand:<avx512fmaskmode> 0 "register_operand")
12511 (unspec:<avx512fmaskmode>
12512 [(match_operand:VI48_AVX512VL 1 "nonimmediate_operand")
12513 (match_operand:VI48_AVX512VL 2 "nonimmediate_operand")]
12514 UNSPEC_MASKED_EQ))]
12515 "TARGET_AVX512F"
12516 "ix86_fixup_binary_operands_no_copy (EQ, <MODE>mode, operands);")
12517
12518 (define_insn "<avx512>_eq<mode>3<mask_scalar_merge_name>_1"
12519 [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=k,k")
12520 (unspec:<avx512fmaskmode>
12521 [(match_operand:VI12_AVX512VL 1 "nonimm_or_0_operand" "%v,v")
12522 (match_operand:VI12_AVX512VL 2 "nonimm_or_0_operand" "vm,C")]
12523 UNSPEC_MASKED_EQ))]
12524 "TARGET_AVX512BW && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
12525 "@
12526 vpcmpeq<ssemodesuffix>\t{%2, %1, %0<mask_scalar_merge_operand3>|%0<mask_scalar_merge_operand3>, %1, %2}
12527 vptestnm<ssemodesuffix>\t{%1, %1, %0<mask_scalar_merge_operand3>|%0<mask_scalar_merge_operand3>, %1, %1}"
12528 [(set_attr "type" "ssecmp")
12529 (set_attr "prefix_extra" "1")
12530 (set_attr "prefix" "evex")
12531 (set_attr "mode" "<sseinsnmode>")])
12532
12533 (define_insn "<avx512>_eq<mode>3<mask_scalar_merge_name>_1"
12534 [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=k,k")
12535 (unspec:<avx512fmaskmode>
12536 [(match_operand:VI48_AVX512VL 1 "nonimm_or_0_operand" "%v,v")
12537 (match_operand:VI48_AVX512VL 2 "nonimm_or_0_operand" "vm,C")]
12538 UNSPEC_MASKED_EQ))]
12539 "TARGET_AVX512F && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
12540 "@
12541 vpcmpeq<ssemodesuffix>\t{%2, %1, %0<mask_scalar_merge_operand3>|%0<mask_scalar_merge_operand3>, %1, %2}
12542 vptestnm<ssemodesuffix>\t{%1, %1, %0<mask_scalar_merge_operand3>|%0<mask_scalar_merge_operand3>, %1, %1}"
12543 [(set_attr "type" "ssecmp")
12544 (set_attr "prefix_extra" "1")
12545 (set_attr "prefix" "evex")
12546 (set_attr "mode" "<sseinsnmode>")])
12547
12548 (define_insn "*sse4_1_eqv2di3"
12549 [(set (match_operand:V2DI 0 "register_operand" "=Yr,*x,x")
12550 (eq:V2DI
12551 (match_operand:V2DI 1 "vector_operand" "%0,0,x")
12552 (match_operand:V2DI 2 "vector_operand" "YrBm,*xBm,xm")))]
12553 "TARGET_SSE4_1 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
12554 "@
12555 pcmpeqq\t{%2, %0|%0, %2}
12556 pcmpeqq\t{%2, %0|%0, %2}
12557 vpcmpeqq\t{%2, %1, %0|%0, %1, %2}"
12558 [(set_attr "isa" "noavx,noavx,avx")
12559 (set_attr "type" "ssecmp")
12560 (set_attr "prefix_extra" "1")
12561 (set_attr "prefix" "orig,orig,vex")
12562 (set_attr "mode" "TI")])
12563
12564 (define_insn "*sse2_eq<mode>3"
12565 [(set (match_operand:VI124_128 0 "register_operand" "=x,x")
12566 (eq:VI124_128
12567 (match_operand:VI124_128 1 "vector_operand" "%0,x")
12568 (match_operand:VI124_128 2 "vector_operand" "xBm,xm")))]
12569 "TARGET_SSE2 && !TARGET_XOP
12570 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
12571 "@
12572 pcmpeq<ssemodesuffix>\t{%2, %0|%0, %2}
12573 vpcmpeq<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
12574 [(set_attr "isa" "noavx,avx")
12575 (set_attr "type" "ssecmp")
12576 (set_attr "prefix_data16" "1,*")
12577 (set_attr "prefix" "orig,vex")
12578 (set_attr "mode" "TI")])
12579
12580 (define_expand "sse2_eq<mode>3"
12581 [(set (match_operand:VI124_128 0 "register_operand")
12582 (eq:VI124_128
12583 (match_operand:VI124_128 1 "vector_operand")
12584 (match_operand:VI124_128 2 "vector_operand")))]
12585 "TARGET_SSE2 && !TARGET_XOP "
12586 "ix86_fixup_binary_operands_no_copy (EQ, <MODE>mode, operands);")
12587
12588 (define_expand "sse4_1_eqv2di3"
12589 [(set (match_operand:V2DI 0 "register_operand")
12590 (eq:V2DI
12591 (match_operand:V2DI 1 "vector_operand")
12592 (match_operand:V2DI 2 "vector_operand")))]
12593 "TARGET_SSE4_1"
12594 "ix86_fixup_binary_operands_no_copy (EQ, V2DImode, operands);")
12595
12596 (define_insn "sse4_2_gtv2di3"
12597 [(set (match_operand:V2DI 0 "register_operand" "=Yr,*x,x")
12598 (gt:V2DI
12599 (match_operand:V2DI 1 "register_operand" "0,0,x")
12600 (match_operand:V2DI 2 "vector_operand" "YrBm,*xBm,xm")))]
12601 "TARGET_SSE4_2"
12602 "@
12603 pcmpgtq\t{%2, %0|%0, %2}
12604 pcmpgtq\t{%2, %0|%0, %2}
12605 vpcmpgtq\t{%2, %1, %0|%0, %1, %2}"
12606 [(set_attr "isa" "noavx,noavx,avx")
12607 (set_attr "type" "ssecmp")
12608 (set_attr "prefix_extra" "1")
12609 (set_attr "prefix" "orig,orig,vex")
12610 (set_attr "mode" "TI")])
12611
12612 (define_insn "avx2_gt<mode>3"
12613 [(set (match_operand:VI_256 0 "register_operand" "=x")
12614 (gt:VI_256
12615 (match_operand:VI_256 1 "register_operand" "x")
12616 (match_operand:VI_256 2 "nonimmediate_operand" "xm")))]
12617 "TARGET_AVX2"
12618 "vpcmpgt<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
12619 [(set_attr "type" "ssecmp")
12620 (set_attr "prefix_extra" "1")
12621 (set_attr "prefix" "vex")
12622 (set_attr "mode" "OI")])
12623
12624 (define_insn "<avx512>_gt<mode>3<mask_scalar_merge_name>"
12625 [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=k")
12626 (unspec:<avx512fmaskmode>
12627 [(match_operand:VI48_AVX512VL 1 "register_operand" "v")
12628 (match_operand:VI48_AVX512VL 2 "nonimmediate_operand" "vm")] UNSPEC_MASKED_GT))]
12629 "TARGET_AVX512F"
12630 "vpcmpgt<ssemodesuffix>\t{%2, %1, %0<mask_scalar_merge_operand3>|%0<mask_scalar_merge_operand3>, %1, %2}"
12631 [(set_attr "type" "ssecmp")
12632 (set_attr "prefix_extra" "1")
12633 (set_attr "prefix" "evex")
12634 (set_attr "mode" "<sseinsnmode>")])
12635
12636 (define_insn "<avx512>_gt<mode>3<mask_scalar_merge_name>"
12637 [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=k")
12638 (unspec:<avx512fmaskmode>
12639 [(match_operand:VI12_AVX512VL 1 "register_operand" "v")
12640 (match_operand:VI12_AVX512VL 2 "nonimmediate_operand" "vm")] UNSPEC_MASKED_GT))]
12641 "TARGET_AVX512BW"
12642 "vpcmpgt<ssemodesuffix>\t{%2, %1, %0<mask_scalar_merge_operand3>|%0<mask_scalar_merge_operand3>, %1, %2}"
12643 [(set_attr "type" "ssecmp")
12644 (set_attr "prefix_extra" "1")
12645 (set_attr "prefix" "evex")
12646 (set_attr "mode" "<sseinsnmode>")])
12647
12648 (define_insn "sse2_gt<mode>3"
12649 [(set (match_operand:VI124_128 0 "register_operand" "=x,x")
12650 (gt:VI124_128
12651 (match_operand:VI124_128 1 "register_operand" "0,x")
12652 (match_operand:VI124_128 2 "vector_operand" "xBm,xm")))]
12653 "TARGET_SSE2 && !TARGET_XOP"
12654 "@
12655 pcmpgt<ssemodesuffix>\t{%2, %0|%0, %2}
12656 vpcmpgt<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
12657 [(set_attr "isa" "noavx,avx")
12658 (set_attr "type" "ssecmp")
12659 (set_attr "prefix_data16" "1,*")
12660 (set_attr "prefix" "orig,vex")
12661 (set_attr "mode" "TI")])
12662
12663 (define_expand "vcond<V_512:mode><VI_AVX512BW:mode>"
12664 [(set (match_operand:V_512 0 "register_operand")
12665 (if_then_else:V_512
12666 (match_operator 3 ""
12667 [(match_operand:VI_AVX512BW 4 "nonimmediate_operand")
12668 (match_operand:VI_AVX512BW 5 "general_operand")])
12669 (match_operand:V_512 1)
12670 (match_operand:V_512 2)))]
12671 "TARGET_AVX512F
12672 && (GET_MODE_NUNITS (<V_512:MODE>mode)
12673 == GET_MODE_NUNITS (<VI_AVX512BW:MODE>mode))"
12674 {
12675 bool ok = ix86_expand_int_vcond (operands);
12676 gcc_assert (ok);
12677 DONE;
12678 })
12679
12680 (define_expand "vcond<V_256:mode><VI_256:mode>"
12681 [(set (match_operand:V_256 0 "register_operand")
12682 (if_then_else:V_256
12683 (match_operator 3 ""
12684 [(match_operand:VI_256 4 "nonimmediate_operand")
12685 (match_operand:VI_256 5 "general_operand")])
12686 (match_operand:V_256 1)
12687 (match_operand:V_256 2)))]
12688 "TARGET_AVX2
12689 && (GET_MODE_NUNITS (<V_256:MODE>mode)
12690 == GET_MODE_NUNITS (<VI_256:MODE>mode))"
12691 {
12692 bool ok = ix86_expand_int_vcond (operands);
12693 gcc_assert (ok);
12694 DONE;
12695 })
12696
12697 (define_expand "vcond<V_128:mode><VI124_128:mode>"
12698 [(set (match_operand:V_128 0 "register_operand")
12699 (if_then_else:V_128
12700 (match_operator 3 ""
12701 [(match_operand:VI124_128 4 "vector_operand")
12702 (match_operand:VI124_128 5 "general_operand")])
12703 (match_operand:V_128 1)
12704 (match_operand:V_128 2)))]
12705 "TARGET_SSE2
12706 && (GET_MODE_NUNITS (<V_128:MODE>mode)
12707 == GET_MODE_NUNITS (<VI124_128:MODE>mode))"
12708 {
12709 bool ok = ix86_expand_int_vcond (operands);
12710 gcc_assert (ok);
12711 DONE;
12712 })
12713
12714 (define_expand "vcond<VI8F_128:mode>v2di"
12715 [(set (match_operand:VI8F_128 0 "register_operand")
12716 (if_then_else:VI8F_128
12717 (match_operator 3 ""
12718 [(match_operand:V2DI 4 "vector_operand")
12719 (match_operand:V2DI 5 "general_operand")])
12720 (match_operand:VI8F_128 1)
12721 (match_operand:VI8F_128 2)))]
12722 "TARGET_SSE4_2"
12723 {
12724 bool ok = ix86_expand_int_vcond (operands);
12725 gcc_assert (ok);
12726 DONE;
12727 })
12728
12729 (define_expand "vcondu<V_512:mode><VI_AVX512BW:mode>"
12730 [(set (match_operand:V_512 0 "register_operand")
12731 (if_then_else:V_512
12732 (match_operator 3 ""
12733 [(match_operand:VI_AVX512BW 4 "nonimmediate_operand")
12734 (match_operand:VI_AVX512BW 5 "nonimmediate_operand")])
12735 (match_operand:V_512 1 "general_operand")
12736 (match_operand:V_512 2 "general_operand")))]
12737 "TARGET_AVX512F
12738 && (GET_MODE_NUNITS (<V_512:MODE>mode)
12739 == GET_MODE_NUNITS (<VI_AVX512BW:MODE>mode))"
12740 {
12741 bool ok = ix86_expand_int_vcond (operands);
12742 gcc_assert (ok);
12743 DONE;
12744 })
12745
12746 (define_expand "vcondu<V_256:mode><VI_256:mode>"
12747 [(set (match_operand:V_256 0 "register_operand")
12748 (if_then_else:V_256
12749 (match_operator 3 ""
12750 [(match_operand:VI_256 4 "nonimmediate_operand")
12751 (match_operand:VI_256 5 "nonimmediate_operand")])
12752 (match_operand:V_256 1 "general_operand")
12753 (match_operand:V_256 2 "general_operand")))]
12754 "TARGET_AVX2
12755 && (GET_MODE_NUNITS (<V_256:MODE>mode)
12756 == GET_MODE_NUNITS (<VI_256:MODE>mode))"
12757 {
12758 bool ok = ix86_expand_int_vcond (operands);
12759 gcc_assert (ok);
12760 DONE;
12761 })
12762
12763 (define_expand "vcondu<V_128:mode><VI124_128:mode>"
12764 [(set (match_operand:V_128 0 "register_operand")
12765 (if_then_else:V_128
12766 (match_operator 3 ""
12767 [(match_operand:VI124_128 4 "vector_operand")
12768 (match_operand:VI124_128 5 "vector_operand")])
12769 (match_operand:V_128 1 "general_operand")
12770 (match_operand:V_128 2 "general_operand")))]
12771 "TARGET_SSE2
12772 && (GET_MODE_NUNITS (<V_128:MODE>mode)
12773 == GET_MODE_NUNITS (<VI124_128:MODE>mode))"
12774 {
12775 bool ok = ix86_expand_int_vcond (operands);
12776 gcc_assert (ok);
12777 DONE;
12778 })
12779
12780 (define_expand "vcondu<VI8F_128:mode>v2di"
12781 [(set (match_operand:VI8F_128 0 "register_operand")
12782 (if_then_else:VI8F_128
12783 (match_operator 3 ""
12784 [(match_operand:V2DI 4 "vector_operand")
12785 (match_operand:V2DI 5 "vector_operand")])
12786 (match_operand:VI8F_128 1 "general_operand")
12787 (match_operand:VI8F_128 2 "general_operand")))]
12788 "TARGET_SSE4_2"
12789 {
12790 bool ok = ix86_expand_int_vcond (operands);
12791 gcc_assert (ok);
12792 DONE;
12793 })
12794
12795 (define_expand "vcondeq<VI8F_128:mode>v2di"
12796 [(set (match_operand:VI8F_128 0 "register_operand")
12797 (if_then_else:VI8F_128
12798 (match_operator 3 ""
12799 [(match_operand:V2DI 4 "vector_operand")
12800 (match_operand:V2DI 5 "general_operand")])
12801 (match_operand:VI8F_128 1)
12802 (match_operand:VI8F_128 2)))]
12803 "TARGET_SSE4_1"
12804 {
12805 bool ok = ix86_expand_int_vcond (operands);
12806 gcc_assert (ok);
12807 DONE;
12808 })
12809
12810 (define_mode_iterator VEC_PERM_AVX2
12811 [V16QI V8HI V4SI V2DI V4SF V2DF
12812 (V32QI "TARGET_AVX2") (V16HI "TARGET_AVX2")
12813 (V8SI "TARGET_AVX2") (V4DI "TARGET_AVX2")
12814 (V8SF "TARGET_AVX2") (V4DF "TARGET_AVX2")
12815 (V16SF "TARGET_AVX512F") (V8DF "TARGET_AVX512F")
12816 (V16SI "TARGET_AVX512F") (V8DI "TARGET_AVX512F")
12817 (V32HI "TARGET_AVX512BW") (V64QI "TARGET_AVX512VBMI")])
12818
12819 (define_expand "vec_perm<mode>"
12820 [(match_operand:VEC_PERM_AVX2 0 "register_operand")
12821 (match_operand:VEC_PERM_AVX2 1 "register_operand")
12822 (match_operand:VEC_PERM_AVX2 2 "register_operand")
12823 (match_operand:<sseintvecmode> 3 "register_operand")]
12824 "TARGET_SSSE3 || TARGET_AVX || TARGET_XOP"
12825 {
12826 ix86_expand_vec_perm (operands);
12827 DONE;
12828 })
12829
12830 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
12831 ;;
12832 ;; Parallel bitwise logical operations
12833 ;;
12834 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
12835
12836 (define_expand "one_cmpl<mode>2"
12837 [(set (match_operand:VI 0 "register_operand")
12838 (xor:VI (match_operand:VI 1 "vector_operand")
12839 (match_dup 2)))]
12840 "TARGET_SSE"
12841 {
12842 if (!TARGET_AVX512F)
12843 operands[2] = force_reg (<MODE>mode, CONSTM1_RTX (<MODE>mode));
12844 else
12845 operands[2] = CONSTM1_RTX (<MODE>mode);
12846 })
12847
12848 (define_insn "<mask_codefor>one_cmpl<mode>2<mask_name>"
12849 [(set (match_operand:VI 0 "register_operand" "=v,v")
12850 (xor:VI (match_operand:VI 1 "nonimmediate_operand" "v,m")
12851 (match_operand:VI 2 "vector_all_ones_operand" "BC,BC")))]
12852 "TARGET_AVX512F
12853 && (!<mask_applied>
12854 || <ssescalarmode>mode == SImode
12855 || <ssescalarmode>mode == DImode)"
12856 {
12857 if (TARGET_AVX512VL)
12858 return "vpternlog<ternlogsuffix>\t{$0x55, %1, %0, %0<mask_operand3>|%0<mask_operand3>, %0, %1, 0x55}";
12859 else
12860 return "vpternlog<ternlogsuffix>\t{$0x55, %g1, %g0, %g0<mask_operand3>|%g0<mask_operand3>, %g0, %g1, 0x55}";
12861 }
12862 [(set_attr "type" "sselog")
12863 (set_attr "prefix" "evex")
12864 (set (attr "mode")
12865 (if_then_else (match_test "TARGET_AVX512VL")
12866 (const_string "<sseinsnmode>")
12867 (const_string "XI")))
12868 (set (attr "enabled")
12869 (if_then_else (eq_attr "alternative" "1")
12870 (symbol_ref "<MODE_SIZE> == 64 || TARGET_AVX512VL")
12871 (const_int 1)))])
12872
12873 (define_expand "<sse2_avx2>_andnot<mode>3"
12874 [(set (match_operand:VI_AVX2 0 "register_operand")
12875 (and:VI_AVX2
12876 (not:VI_AVX2 (match_operand:VI_AVX2 1 "register_operand"))
12877 (match_operand:VI_AVX2 2 "vector_operand")))]
12878 "TARGET_SSE2")
12879
12880 (define_expand "<sse2_avx2>_andnot<mode>3_mask"
12881 [(set (match_operand:VI48_AVX512VL 0 "register_operand")
12882 (vec_merge:VI48_AVX512VL
12883 (and:VI48_AVX512VL
12884 (not:VI48_AVX512VL
12885 (match_operand:VI48_AVX512VL 1 "register_operand"))
12886 (match_operand:VI48_AVX512VL 2 "nonimmediate_operand"))
12887 (match_operand:VI48_AVX512VL 3 "nonimm_or_0_operand")
12888 (match_operand:<avx512fmaskmode> 4 "register_operand")))]
12889 "TARGET_AVX512F")
12890
12891 (define_expand "<sse2_avx2>_andnot<mode>3_mask"
12892 [(set (match_operand:VI12_AVX512VL 0 "register_operand")
12893 (vec_merge:VI12_AVX512VL
12894 (and:VI12_AVX512VL
12895 (not:VI12_AVX512VL
12896 (match_operand:VI12_AVX512VL 1 "register_operand"))
12897 (match_operand:VI12_AVX512VL 2 "nonimmediate_operand"))
12898 (match_operand:VI12_AVX512VL 3 "nonimm_or_0_operand")
12899 (match_operand:<avx512fmaskmode> 4 "register_operand")))]
12900 "TARGET_AVX512BW")
12901
12902 (define_insn "*andnot<mode>3"
12903 [(set (match_operand:VI 0 "register_operand" "=x,x,v")
12904 (and:VI
12905 (not:VI (match_operand:VI 1 "register_operand" "0,x,v"))
12906 (match_operand:VI 2 "vector_operand" "xBm,xm,vm")))]
12907 "TARGET_SSE"
12908 {
12909 char buf[64];
12910 const char *ops;
12911 const char *tmp;
12912 const char *ssesuffix;
12913
12914 switch (get_attr_mode (insn))
12915 {
12916 case MODE_XI:
12917 gcc_assert (TARGET_AVX512F);
12918 /* FALLTHRU */
12919 case MODE_OI:
12920 gcc_assert (TARGET_AVX2);
12921 /* FALLTHRU */
12922 case MODE_TI:
12923 gcc_assert (TARGET_SSE2);
12924 tmp = "pandn";
12925 switch (<MODE>mode)
12926 {
12927 case E_V64QImode:
12928 case E_V32HImode:
12929 /* There is no vpandnb or vpandnw instruction, nor vpandn for
12930 512-bit vectors. Use vpandnq instead. */
12931 ssesuffix = "q";
12932 break;
12933 case E_V16SImode:
12934 case E_V8DImode:
12935 ssesuffix = "<ssemodesuffix>";
12936 break;
12937 case E_V8SImode:
12938 case E_V4DImode:
12939 case E_V4SImode:
12940 case E_V2DImode:
12941 ssesuffix = (TARGET_AVX512VL && which_alternative == 2
12942 ? "<ssemodesuffix>" : "");
12943 break;
12944 default:
12945 ssesuffix = TARGET_AVX512VL && which_alternative == 2 ? "q" : "";
12946 }
12947 break;
12948
12949 case MODE_V16SF:
12950 gcc_assert (TARGET_AVX512F);
12951 /* FALLTHRU */
12952 case MODE_V8SF:
12953 gcc_assert (TARGET_AVX);
12954 /* FALLTHRU */
12955 case MODE_V4SF:
12956 gcc_assert (TARGET_SSE);
12957 tmp = "andn";
12958 ssesuffix = "ps";
12959 break;
12960
12961 default:
12962 gcc_unreachable ();
12963 }
12964
12965 switch (which_alternative)
12966 {
12967 case 0:
12968 ops = "%s%s\t{%%2, %%0|%%0, %%2}";
12969 break;
12970 case 1:
12971 case 2:
12972 ops = "v%s%s\t{%%2, %%1, %%0|%%0, %%1, %%2}";
12973 break;
12974 default:
12975 gcc_unreachable ();
12976 }
12977
12978 snprintf (buf, sizeof (buf), ops, tmp, ssesuffix);
12979 output_asm_insn (buf, operands);
12980 return "";
12981 }
12982 [(set_attr "isa" "noavx,avx,avx")
12983 (set_attr "type" "sselog")
12984 (set (attr "prefix_data16")
12985 (if_then_else
12986 (and (eq_attr "alternative" "0")
12987 (eq_attr "mode" "TI"))
12988 (const_string "1")
12989 (const_string "*")))
12990 (set_attr "prefix" "orig,vex,evex")
12991 (set (attr "mode")
12992 (cond [(match_test "TARGET_AVX2")
12993 (const_string "<sseinsnmode>")
12994 (match_test "TARGET_AVX")
12995 (if_then_else
12996 (match_test "<MODE_SIZE> > 16")
12997 (const_string "V8SF")
12998 (const_string "<sseinsnmode>"))
12999 (ior (not (match_test "TARGET_SSE2"))
13000 (match_test "optimize_function_for_size_p (cfun)"))
13001 (const_string "V4SF")
13002 ]
13003 (const_string "<sseinsnmode>")))])
13004
13005 (define_insn "*andnot<mode>3_bcst"
13006 [(set (match_operand:VI48_AVX512VL 0 "register_operand" "=v")
13007 (and:VI48_AVX512VL
13008 (not:VI48_AVX512VL
13009 (match_operand:VI48_AVX512VL 1 "register_operand" "v"))
13010 (vec_duplicate:VI48_AVX512VL
13011 (match_operand:<ssescalarmode> 2 "memory_operand" "m"))))]
13012 "TARGET_AVX512F"
13013 "vpandn<ssemodesuffix>\t{%2<avx512bcst>, %1, %0|%0, %1, %2<avx512bcst>}"
13014 [(set_attr "type" "sselog")
13015 (set_attr "prefix" "evex")
13016 (set_attr "mode" "<sseinsnmode>")])
13017
13018 (define_insn "*andnot<mode>3_mask"
13019 [(set (match_operand:VI48_AVX512VL 0 "register_operand" "=v")
13020 (vec_merge:VI48_AVX512VL
13021 (and:VI48_AVX512VL
13022 (not:VI48_AVX512VL
13023 (match_operand:VI48_AVX512VL 1 "register_operand" "v"))
13024 (match_operand:VI48_AVX512VL 2 "nonimmediate_operand" "vm"))
13025 (match_operand:VI48_AVX512VL 3 "nonimm_or_0_operand" "0C")
13026 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")))]
13027 "TARGET_AVX512F"
13028 "vpandn<ssemodesuffix>\t{%2, %1, %0%{%4%}%N3|%0%{%4%}%N3, %1, %2}";
13029 [(set_attr "type" "sselog")
13030 (set_attr "prefix" "evex")
13031 (set_attr "mode" "<sseinsnmode>")])
13032
13033 (define_expand "<code><mode>3"
13034 [(set (match_operand:VI 0 "register_operand")
13035 (any_logic:VI
13036 (match_operand:VI 1 "nonimmediate_or_const_vector_operand")
13037 (match_operand:VI 2 "nonimmediate_or_const_vector_operand")))]
13038 "TARGET_SSE"
13039 {
13040 ix86_expand_vector_logical_operator (<CODE>, <MODE>mode, operands);
13041 DONE;
13042 })
13043
13044 (define_insn "<mask_codefor><code><mode>3<mask_name>"
13045 [(set (match_operand:VI48_AVX_AVX512F 0 "register_operand" "=x,x,v")
13046 (any_logic:VI48_AVX_AVX512F
13047 (match_operand:VI48_AVX_AVX512F 1 "vector_operand" "%0,x,v")
13048 (match_operand:VI48_AVX_AVX512F 2 "vector_operand" "xBm,xm,vm")))]
13049 "TARGET_SSE && <mask_mode512bit_condition>
13050 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
13051 {
13052 char buf[64];
13053 const char *ops;
13054 const char *tmp;
13055 const char *ssesuffix;
13056
13057 switch (get_attr_mode (insn))
13058 {
13059 case MODE_XI:
13060 gcc_assert (TARGET_AVX512F);
13061 /* FALLTHRU */
13062 case MODE_OI:
13063 gcc_assert (TARGET_AVX2);
13064 /* FALLTHRU */
13065 case MODE_TI:
13066 gcc_assert (TARGET_SSE2);
13067 tmp = "p<logic>";
13068 switch (<MODE>mode)
13069 {
13070 case E_V16SImode:
13071 case E_V8DImode:
13072 ssesuffix = "<ssemodesuffix>";
13073 break;
13074 case E_V8SImode:
13075 case E_V4DImode:
13076 case E_V4SImode:
13077 case E_V2DImode:
13078 ssesuffix = (TARGET_AVX512VL
13079 && (<mask_applied> || which_alternative == 2)
13080 ? "<ssemodesuffix>" : "");
13081 break;
13082 default:
13083 gcc_unreachable ();
13084 }
13085 break;
13086
13087 case MODE_V8SF:
13088 gcc_assert (TARGET_AVX);
13089 /* FALLTHRU */
13090 case MODE_V4SF:
13091 gcc_assert (TARGET_SSE);
13092 tmp = "<logic>";
13093 ssesuffix = "ps";
13094 break;
13095
13096 default:
13097 gcc_unreachable ();
13098 }
13099
13100 switch (which_alternative)
13101 {
13102 case 0:
13103 if (<mask_applied>)
13104 ops = "v%s%s\t{%%2, %%0, %%0<mask_operand3_1>|%%0<mask_operand3_1>, %%0, %%2}";
13105 else
13106 ops = "%s%s\t{%%2, %%0|%%0, %%2}";
13107 break;
13108 case 1:
13109 case 2:
13110 ops = "v%s%s\t{%%2, %%1, %%0<mask_operand3_1>|%%0<mask_operand3_1>, %%1, %%2}";
13111 break;
13112 default:
13113 gcc_unreachable ();
13114 }
13115
13116 snprintf (buf, sizeof (buf), ops, tmp, ssesuffix);
13117 output_asm_insn (buf, operands);
13118 return "";
13119 }
13120 [(set_attr "isa" "noavx,avx,avx")
13121 (set_attr "type" "sselog")
13122 (set (attr "prefix_data16")
13123 (if_then_else
13124 (and (eq_attr "alternative" "0")
13125 (eq_attr "mode" "TI"))
13126 (const_string "1")
13127 (const_string "*")))
13128 (set_attr "prefix" "<mask_prefix3>,evex")
13129 (set (attr "mode")
13130 (cond [(match_test "TARGET_AVX2")
13131 (const_string "<sseinsnmode>")
13132 (match_test "TARGET_AVX")
13133 (if_then_else
13134 (match_test "<MODE_SIZE> > 16")
13135 (const_string "V8SF")
13136 (const_string "<sseinsnmode>"))
13137 (ior (not (match_test "TARGET_SSE2"))
13138 (match_test "optimize_function_for_size_p (cfun)"))
13139 (const_string "V4SF")
13140 ]
13141 (const_string "<sseinsnmode>")))])
13142
13143 (define_insn "*<code><mode>3"
13144 [(set (match_operand:VI12_AVX_AVX512F 0 "register_operand" "=x,x,v")
13145 (any_logic:VI12_AVX_AVX512F
13146 (match_operand:VI12_AVX_AVX512F 1 "vector_operand" "%0,x,v")
13147 (match_operand:VI12_AVX_AVX512F 2 "vector_operand" "xBm,xm,vm")))]
13148 "TARGET_SSE && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
13149 {
13150 char buf[64];
13151 const char *ops;
13152 const char *tmp;
13153 const char *ssesuffix;
13154
13155 switch (get_attr_mode (insn))
13156 {
13157 case MODE_XI:
13158 gcc_assert (TARGET_AVX512F);
13159 /* FALLTHRU */
13160 case MODE_OI:
13161 gcc_assert (TARGET_AVX2);
13162 /* FALLTHRU */
13163 case MODE_TI:
13164 gcc_assert (TARGET_SSE2);
13165 tmp = "p<logic>";
13166 switch (<MODE>mode)
13167 {
13168 case E_V64QImode:
13169 case E_V32HImode:
13170 ssesuffix = "q";
13171 break;
13172 case E_V32QImode:
13173 case E_V16HImode:
13174 case E_V16QImode:
13175 case E_V8HImode:
13176 ssesuffix = TARGET_AVX512VL && which_alternative == 2 ? "q" : "";
13177 break;
13178 default:
13179 gcc_unreachable ();
13180 }
13181 break;
13182
13183 case MODE_V8SF:
13184 gcc_assert (TARGET_AVX);
13185 /* FALLTHRU */
13186 case MODE_V4SF:
13187 gcc_assert (TARGET_SSE);
13188 tmp = "<logic>";
13189 ssesuffix = "ps";
13190 break;
13191
13192 default:
13193 gcc_unreachable ();
13194 }
13195
13196 switch (which_alternative)
13197 {
13198 case 0:
13199 ops = "%s%s\t{%%2, %%0|%%0, %%2}";
13200 break;
13201 case 1:
13202 case 2:
13203 ops = "v%s%s\t{%%2, %%1, %%0|%%0, %%1, %%2}";
13204 break;
13205 default:
13206 gcc_unreachable ();
13207 }
13208
13209 snprintf (buf, sizeof (buf), ops, tmp, ssesuffix);
13210 output_asm_insn (buf, operands);
13211 return "";
13212 }
13213 [(set_attr "isa" "noavx,avx,avx")
13214 (set_attr "type" "sselog")
13215 (set (attr "prefix_data16")
13216 (if_then_else
13217 (and (eq_attr "alternative" "0")
13218 (eq_attr "mode" "TI"))
13219 (const_string "1")
13220 (const_string "*")))
13221 (set_attr "prefix" "orig,vex,evex")
13222 (set (attr "mode")
13223 (cond [(match_test "TARGET_AVX2")
13224 (const_string "<sseinsnmode>")
13225 (match_test "TARGET_AVX")
13226 (if_then_else
13227 (match_test "<MODE_SIZE> > 16")
13228 (const_string "V8SF")
13229 (const_string "<sseinsnmode>"))
13230 (ior (not (match_test "TARGET_SSE2"))
13231 (match_test "optimize_function_for_size_p (cfun)"))
13232 (const_string "V4SF")
13233 ]
13234 (const_string "<sseinsnmode>")))])
13235
13236 (define_insn "*<code><mode>3_bcst"
13237 [(set (match_operand:VI48_AVX512VL 0 "register_operand" "=v")
13238 (any_logic:VI48_AVX512VL
13239 (vec_duplicate:VI48_AVX512VL
13240 (match_operand:<ssescalarmode> 1 "memory_operand" "m"))
13241 (match_operand:VI48_AVX512VL 2 "register_operand" "v")))]
13242 "TARGET_AVX512F && <mask_avx512vl_condition>"
13243 "vp<logic><ssemodesuffix>\t{%1<avx512bcst>, %2, %0<mask_operand3>|%0<mask_operand3>, %2, %1<avx512bcst>}"
13244 [(set_attr "type" "sseiadd")
13245 (set_attr "prefix" "evex")
13246 (set_attr "mode" "<sseinsnmode>")])
13247
13248 (define_mode_iterator VI1248_AVX512VLBW
13249 [(V64QI "TARGET_AVX512BW") (V32QI "TARGET_AVX512VL && TARGET_AVX512BW")
13250 (V16QI "TARGET_AVX512VL && TARGET_AVX512BW")
13251 (V32HI "TARGET_AVX512BW") (V16HI "TARGET_AVX512VL && TARGET_AVX512BW")
13252 (V8HI "TARGET_AVX512VL && TARGET_AVX512BW")
13253 V16SI (V8SI "TARGET_AVX512VL") (V4SI "TARGET_AVX512VL")
13254 V8DI (V4DI "TARGET_AVX512VL") (V2DI "TARGET_AVX512VL")])
13255
13256 (define_mode_iterator AVX512ZEXTMASK
13257 [(DI "TARGET_AVX512BW") (SI "TARGET_AVX512BW") HI])
13258
13259 (define_insn "<avx512>_testm<mode>3<mask_scalar_merge_name>"
13260 [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=k")
13261 (unspec:<avx512fmaskmode>
13262 [(match_operand:VI1248_AVX512VLBW 1 "register_operand" "v")
13263 (match_operand:VI1248_AVX512VLBW 2 "nonimmediate_operand" "vm")]
13264 UNSPEC_TESTM))]
13265 "TARGET_AVX512F"
13266 "vptestm<ssemodesuffix>\t{%2, %1, %0<mask_scalar_merge_operand3>|%0<mask_scalar_merge_operand3>, %1, %2}"
13267 [(set_attr "prefix" "evex")
13268 (set_attr "mode" "<sseinsnmode>")])
13269
13270 (define_insn "<avx512>_testnm<mode>3<mask_scalar_merge_name>"
13271 [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=k")
13272 (unspec:<avx512fmaskmode>
13273 [(match_operand:VI1248_AVX512VLBW 1 "register_operand" "v")
13274 (match_operand:VI1248_AVX512VLBW 2 "nonimmediate_operand" "vm")]
13275 UNSPEC_TESTNM))]
13276 "TARGET_AVX512F"
13277 "vptestnm<ssemodesuffix>\t{%2, %1, %0<mask_scalar_merge_operand3>|%0<mask_scalar_merge_operand3>, %1, %2}"
13278 [(set_attr "prefix" "evex")
13279 (set_attr "mode" "<sseinsnmode>")])
13280
13281 (define_insn "*<avx512>_testm<VI1248_AVX512VLBW:mode>3_zext"
13282 [(set (match_operand:AVX512ZEXTMASK 0 "register_operand" "=k")
13283 (zero_extend:AVX512ZEXTMASK
13284 (unspec:<VI1248_AVX512VLBW:avx512fmaskmode>
13285 [(match_operand:VI1248_AVX512VLBW 1 "register_operand" "v")
13286 (match_operand:VI1248_AVX512VLBW 2 "nonimmediate_operand" "vm")]
13287 UNSPEC_TESTM)))]
13288 "TARGET_AVX512BW
13289 && (<AVX512ZEXTMASK:MODE_SIZE>
13290 > GET_MODE_SIZE (<VI1248_AVX512VLBW:avx512fmaskmode>mode))"
13291 "vptestm<VI1248_AVX512VLBW:ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
13292 [(set_attr "prefix" "evex")
13293 (set_attr "mode" "<VI1248_AVX512VLBW:sseinsnmode>")])
13294
13295 (define_insn "*<avx512>_testm<VI1248_AVX512VLBW:mode>3_zext_mask"
13296 [(set (match_operand:AVX512ZEXTMASK 0 "register_operand" "=k")
13297 (zero_extend:AVX512ZEXTMASK
13298 (and:<VI1248_AVX512VLBW:avx512fmaskmode>
13299 (unspec:<VI1248_AVX512VLBW:avx512fmaskmode>
13300 [(match_operand:VI1248_AVX512VLBW 1 "register_operand" "v")
13301 (match_operand:VI1248_AVX512VLBW 2 "nonimmediate_operand" "vm")]
13302 UNSPEC_TESTM)
13303 (match_operand:<VI1248_AVX512VLBW:avx512fmaskmode> 3 "register_operand" "Yk"))))]
13304 "TARGET_AVX512BW
13305 && (<AVX512ZEXTMASK:MODE_SIZE>
13306 > GET_MODE_SIZE (<VI1248_AVX512VLBW:avx512fmaskmode>mode))"
13307 "vptestm<VI1248_AVX512VLBW:ssemodesuffix>\t{%2, %1, %0%{%3%}|%0%{%3%}, %1, %2}"
13308 [(set_attr "prefix" "evex")
13309 (set_attr "mode" "<VI1248_AVX512VLBW:sseinsnmode>")])
13310
13311 (define_insn "*<avx512>_testnm<VI1248_AVX512VLBW:mode>3_zext"
13312 [(set (match_operand:AVX512ZEXTMASK 0 "register_operand" "=k")
13313 (zero_extend:AVX512ZEXTMASK
13314 (unspec:<VI1248_AVX512VLBW:avx512fmaskmode>
13315 [(match_operand:VI1248_AVX512VLBW 1 "register_operand" "v")
13316 (match_operand:VI1248_AVX512VLBW 2 "nonimmediate_operand" "vm")]
13317 UNSPEC_TESTNM)))]
13318 "TARGET_AVX512BW
13319 && (<AVX512ZEXTMASK:MODE_SIZE>
13320 > GET_MODE_SIZE (<VI1248_AVX512VLBW:avx512fmaskmode>mode))"
13321 "vptestnm<VI1248_AVX512VLBW:ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
13322 [(set_attr "prefix" "evex")
13323 (set_attr "mode" "<VI1248_AVX512VLBW:sseinsnmode>")])
13324
13325 (define_insn "*<avx512>_testnm<VI1248_AVX512VLBW:mode>3_zext_mask"
13326 [(set (match_operand:AVX512ZEXTMASK 0 "register_operand" "=k")
13327 (zero_extend:AVX512ZEXTMASK
13328 (and:<VI1248_AVX512VLBW:avx512fmaskmode>
13329 (unspec:<VI1248_AVX512VLBW:avx512fmaskmode>
13330 [(match_operand:VI1248_AVX512VLBW 1 "register_operand" "v")
13331 (match_operand:VI1248_AVX512VLBW 2 "nonimmediate_operand" "vm")]
13332 UNSPEC_TESTNM)
13333 (match_operand:<VI1248_AVX512VLBW:avx512fmaskmode> 3 "register_operand" "Yk"))))]
13334 "TARGET_AVX512BW
13335 && (<AVX512ZEXTMASK:MODE_SIZE>
13336 > GET_MODE_SIZE (<VI1248_AVX512VLBW:avx512fmaskmode>mode))"
13337 "vptestnm<VI1248_AVX512VLBW:ssemodesuffix>\t{%2, %1, %0%{%3%}|%0%{%3%}, %1, %2}"
13338 [(set_attr "prefix" "evex")
13339 (set_attr "mode" "<VI1248_AVX512VLBW:sseinsnmode>")])
13340
13341 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
13342 ;;
13343 ;; Parallel integral element swizzling
13344 ;;
13345 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
13346
13347 (define_expand "vec_pack_trunc_<mode>"
13348 [(match_operand:<ssepackmode> 0 "register_operand")
13349 (match_operand:VI248_AVX2_8_AVX512F_24_AVX512BW 1 "register_operand")
13350 (match_operand:VI248_AVX2_8_AVX512F_24_AVX512BW 2 "register_operand")]
13351 "TARGET_SSE2"
13352 {
13353 rtx op1 = gen_lowpart (<ssepackmode>mode, operands[1]);
13354 rtx op2 = gen_lowpart (<ssepackmode>mode, operands[2]);
13355 ix86_expand_vec_extract_even_odd (operands[0], op1, op2, 0);
13356 DONE;
13357 })
13358
13359 (define_expand "vec_pack_trunc_qi"
13360 [(set (match_operand:HI 0 "register_operand")
13361 (ior:HI (ashift:HI (zero_extend:HI (match_operand:QI 2 "register_operand"))
13362 (const_int 8))
13363 (zero_extend:HI (match_operand:QI 1 "register_operand"))))]
13364 "TARGET_AVX512F")
13365
13366 (define_expand "vec_pack_trunc_<mode>"
13367 [(set (match_operand:<DOUBLEMASKMODE> 0 "register_operand")
13368 (ior:<DOUBLEMASKMODE>
13369 (ashift:<DOUBLEMASKMODE>
13370 (zero_extend:<DOUBLEMASKMODE>
13371 (match_operand:SWI24 2 "register_operand"))
13372 (match_dup 3))
13373 (zero_extend:<DOUBLEMASKMODE>
13374 (match_operand:SWI24 1 "register_operand"))))]
13375 "TARGET_AVX512BW"
13376 {
13377 operands[3] = GEN_INT (GET_MODE_BITSIZE (<MODE>mode));
13378 })
13379
13380 (define_expand "vec_pack_sbool_trunc_qi"
13381 [(match_operand:QI 0 "register_operand")
13382 (match_operand:QI 1 "register_operand")
13383 (match_operand:QI 2 "register_operand")
13384 (match_operand:QI 3 "const_int_operand")]
13385 "TARGET_AVX512F"
13386 {
13387 HOST_WIDE_INT nunits = INTVAL (operands[3]);
13388 rtx mask, tem1, tem2;
13389 if (nunits != 8 && nunits != 4)
13390 FAIL;
13391 mask = gen_reg_rtx (QImode);
13392 emit_move_insn (mask, GEN_INT ((1 << (nunits / 2)) - 1));
13393 tem1 = gen_reg_rtx (QImode);
13394 emit_insn (gen_kandqi (tem1, operands[1], mask));
13395 if (TARGET_AVX512DQ)
13396 {
13397 tem2 = gen_reg_rtx (QImode);
13398 emit_insn (gen_kashiftqi (tem2, operands[2],
13399 GEN_INT (nunits / 2)));
13400 }
13401 else
13402 {
13403 tem2 = gen_reg_rtx (HImode);
13404 emit_insn (gen_kashifthi (tem2, lowpart_subreg (HImode, operands[2],
13405 QImode),
13406 GEN_INT (nunits / 2)));
13407 tem2 = lowpart_subreg (QImode, tem2, HImode);
13408 }
13409 emit_insn (gen_kiorqi (operands[0], tem1, tem2));
13410 DONE;
13411 })
13412
13413 (define_insn "<sse2_avx2>_packsswb<mask_name>"
13414 [(set (match_operand:VI1_AVX512 0 "register_operand" "=x,x,v")
13415 (vec_concat:VI1_AVX512
13416 (ss_truncate:<ssehalfvecmode>
13417 (match_operand:<sseunpackmode> 1 "register_operand" "0,x,v"))
13418 (ss_truncate:<ssehalfvecmode>
13419 (match_operand:<sseunpackmode> 2 "vector_operand" "xBm,xm,vm"))))]
13420 "TARGET_SSE2 && <mask_mode512bit_condition> && <mask_avx512bw_condition>"
13421 "@
13422 packsswb\t{%2, %0|%0, %2}
13423 vpacksswb\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}
13424 vpacksswb\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
13425 [(set_attr "isa" "noavx,avx,avx512bw")
13426 (set_attr "type" "sselog")
13427 (set_attr "prefix_data16" "1,*,*")
13428 (set_attr "prefix" "orig,<mask_prefix>,evex")
13429 (set_attr "mode" "<sseinsnmode>")])
13430
13431 (define_insn "<sse2_avx2>_packssdw<mask_name>"
13432 [(set (match_operand:VI2_AVX2 0 "register_operand" "=x,x,v")
13433 (vec_concat:VI2_AVX2
13434 (ss_truncate:<ssehalfvecmode>
13435 (match_operand:<sseunpackmode> 1 "register_operand" "0,x,v"))
13436 (ss_truncate:<ssehalfvecmode>
13437 (match_operand:<sseunpackmode> 2 "vector_operand" "xBm,xm,vm"))))]
13438 "TARGET_SSE2 && <mask_mode512bit_condition> && <mask_avx512bw_condition>"
13439 "@
13440 packssdw\t{%2, %0|%0, %2}
13441 vpackssdw\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}
13442 vpackssdw\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
13443 [(set_attr "isa" "noavx,avx,avx512bw")
13444 (set_attr "type" "sselog")
13445 (set_attr "prefix_data16" "1,*,*")
13446 (set_attr "prefix" "orig,<mask_prefix>,evex")
13447 (set_attr "mode" "<sseinsnmode>")])
13448
13449 (define_insn "<sse2_avx2>_packuswb<mask_name>"
13450 [(set (match_operand:VI1_AVX512 0 "register_operand" "=x,x,v")
13451 (vec_concat:VI1_AVX512
13452 (us_truncate:<ssehalfvecmode>
13453 (match_operand:<sseunpackmode> 1 "register_operand" "0,x,v"))
13454 (us_truncate:<ssehalfvecmode>
13455 (match_operand:<sseunpackmode> 2 "vector_operand" "xBm,xm,vm"))))]
13456 "TARGET_SSE2 && <mask_mode512bit_condition> && <mask_avx512bw_condition>"
13457 "@
13458 packuswb\t{%2, %0|%0, %2}
13459 vpackuswb\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}
13460 vpackuswb\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
13461 [(set_attr "isa" "noavx,avx,avx512bw")
13462 (set_attr "type" "sselog")
13463 (set_attr "prefix_data16" "1,*,*")
13464 (set_attr "prefix" "orig,<mask_prefix>,evex")
13465 (set_attr "mode" "<sseinsnmode>")])
13466
13467 (define_insn "avx512bw_interleave_highv64qi<mask_name>"
13468 [(set (match_operand:V64QI 0 "register_operand" "=v")
13469 (vec_select:V64QI
13470 (vec_concat:V128QI
13471 (match_operand:V64QI 1 "register_operand" "v")
13472 (match_operand:V64QI 2 "nonimmediate_operand" "vm"))
13473 (parallel [(const_int 8) (const_int 72)
13474 (const_int 9) (const_int 73)
13475 (const_int 10) (const_int 74)
13476 (const_int 11) (const_int 75)
13477 (const_int 12) (const_int 76)
13478 (const_int 13) (const_int 77)
13479 (const_int 14) (const_int 78)
13480 (const_int 15) (const_int 79)
13481 (const_int 24) (const_int 88)
13482 (const_int 25) (const_int 89)
13483 (const_int 26) (const_int 90)
13484 (const_int 27) (const_int 91)
13485 (const_int 28) (const_int 92)
13486 (const_int 29) (const_int 93)
13487 (const_int 30) (const_int 94)
13488 (const_int 31) (const_int 95)
13489 (const_int 40) (const_int 104)
13490 (const_int 41) (const_int 105)
13491 (const_int 42) (const_int 106)
13492 (const_int 43) (const_int 107)
13493 (const_int 44) (const_int 108)
13494 (const_int 45) (const_int 109)
13495 (const_int 46) (const_int 110)
13496 (const_int 47) (const_int 111)
13497 (const_int 56) (const_int 120)
13498 (const_int 57) (const_int 121)
13499 (const_int 58) (const_int 122)
13500 (const_int 59) (const_int 123)
13501 (const_int 60) (const_int 124)
13502 (const_int 61) (const_int 125)
13503 (const_int 62) (const_int 126)
13504 (const_int 63) (const_int 127)])))]
13505 "TARGET_AVX512BW"
13506 "vpunpckhbw\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
13507 [(set_attr "type" "sselog")
13508 (set_attr "prefix" "evex")
13509 (set_attr "mode" "XI")])
13510
13511 (define_insn "avx2_interleave_highv32qi<mask_name>"
13512 [(set (match_operand:V32QI 0 "register_operand" "=v")
13513 (vec_select:V32QI
13514 (vec_concat:V64QI
13515 (match_operand:V32QI 1 "register_operand" "v")
13516 (match_operand:V32QI 2 "nonimmediate_operand" "vm"))
13517 (parallel [(const_int 8) (const_int 40)
13518 (const_int 9) (const_int 41)
13519 (const_int 10) (const_int 42)
13520 (const_int 11) (const_int 43)
13521 (const_int 12) (const_int 44)
13522 (const_int 13) (const_int 45)
13523 (const_int 14) (const_int 46)
13524 (const_int 15) (const_int 47)
13525 (const_int 24) (const_int 56)
13526 (const_int 25) (const_int 57)
13527 (const_int 26) (const_int 58)
13528 (const_int 27) (const_int 59)
13529 (const_int 28) (const_int 60)
13530 (const_int 29) (const_int 61)
13531 (const_int 30) (const_int 62)
13532 (const_int 31) (const_int 63)])))]
13533 "TARGET_AVX2 && <mask_avx512vl_condition>"
13534 "vpunpckhbw\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
13535 [(set_attr "type" "sselog")
13536 (set_attr "prefix" "<mask_prefix>")
13537 (set_attr "mode" "OI")])
13538
13539 (define_insn "vec_interleave_highv16qi<mask_name>"
13540 [(set (match_operand:V16QI 0 "register_operand" "=x,v")
13541 (vec_select:V16QI
13542 (vec_concat:V32QI
13543 (match_operand:V16QI 1 "register_operand" "0,v")
13544 (match_operand:V16QI 2 "vector_operand" "xBm,vm"))
13545 (parallel [(const_int 8) (const_int 24)
13546 (const_int 9) (const_int 25)
13547 (const_int 10) (const_int 26)
13548 (const_int 11) (const_int 27)
13549 (const_int 12) (const_int 28)
13550 (const_int 13) (const_int 29)
13551 (const_int 14) (const_int 30)
13552 (const_int 15) (const_int 31)])))]
13553 "TARGET_SSE2 && <mask_avx512vl_condition>"
13554 "@
13555 punpckhbw\t{%2, %0|%0, %2}
13556 vpunpckhbw\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
13557 [(set_attr "isa" "noavx,avx")
13558 (set_attr "type" "sselog")
13559 (set_attr "prefix_data16" "1,*")
13560 (set_attr "prefix" "orig,<mask_prefix>")
13561 (set_attr "mode" "TI")])
13562
13563 (define_insn "avx512bw_interleave_lowv64qi<mask_name>"
13564 [(set (match_operand:V64QI 0 "register_operand" "=v")
13565 (vec_select:V64QI
13566 (vec_concat:V128QI
13567 (match_operand:V64QI 1 "register_operand" "v")
13568 (match_operand:V64QI 2 "nonimmediate_operand" "vm"))
13569 (parallel [(const_int 0) (const_int 64)
13570 (const_int 1) (const_int 65)
13571 (const_int 2) (const_int 66)
13572 (const_int 3) (const_int 67)
13573 (const_int 4) (const_int 68)
13574 (const_int 5) (const_int 69)
13575 (const_int 6) (const_int 70)
13576 (const_int 7) (const_int 71)
13577 (const_int 16) (const_int 80)
13578 (const_int 17) (const_int 81)
13579 (const_int 18) (const_int 82)
13580 (const_int 19) (const_int 83)
13581 (const_int 20) (const_int 84)
13582 (const_int 21) (const_int 85)
13583 (const_int 22) (const_int 86)
13584 (const_int 23) (const_int 87)
13585 (const_int 32) (const_int 96)
13586 (const_int 33) (const_int 97)
13587 (const_int 34) (const_int 98)
13588 (const_int 35) (const_int 99)
13589 (const_int 36) (const_int 100)
13590 (const_int 37) (const_int 101)
13591 (const_int 38) (const_int 102)
13592 (const_int 39) (const_int 103)
13593 (const_int 48) (const_int 112)
13594 (const_int 49) (const_int 113)
13595 (const_int 50) (const_int 114)
13596 (const_int 51) (const_int 115)
13597 (const_int 52) (const_int 116)
13598 (const_int 53) (const_int 117)
13599 (const_int 54) (const_int 118)
13600 (const_int 55) (const_int 119)])))]
13601 "TARGET_AVX512BW"
13602 "vpunpcklbw\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
13603 [(set_attr "type" "sselog")
13604 (set_attr "prefix" "evex")
13605 (set_attr "mode" "XI")])
13606
13607 (define_insn "avx2_interleave_lowv32qi<mask_name>"
13608 [(set (match_operand:V32QI 0 "register_operand" "=v")
13609 (vec_select:V32QI
13610 (vec_concat:V64QI
13611 (match_operand:V32QI 1 "register_operand" "v")
13612 (match_operand:V32QI 2 "nonimmediate_operand" "vm"))
13613 (parallel [(const_int 0) (const_int 32)
13614 (const_int 1) (const_int 33)
13615 (const_int 2) (const_int 34)
13616 (const_int 3) (const_int 35)
13617 (const_int 4) (const_int 36)
13618 (const_int 5) (const_int 37)
13619 (const_int 6) (const_int 38)
13620 (const_int 7) (const_int 39)
13621 (const_int 16) (const_int 48)
13622 (const_int 17) (const_int 49)
13623 (const_int 18) (const_int 50)
13624 (const_int 19) (const_int 51)
13625 (const_int 20) (const_int 52)
13626 (const_int 21) (const_int 53)
13627 (const_int 22) (const_int 54)
13628 (const_int 23) (const_int 55)])))]
13629 "TARGET_AVX2 && <mask_avx512vl_condition> && <mask_avx512bw_condition>"
13630 "vpunpcklbw\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
13631 [(set_attr "type" "sselog")
13632 (set_attr "prefix" "maybe_vex")
13633 (set_attr "mode" "OI")])
13634
13635 (define_insn "vec_interleave_lowv16qi<mask_name>"
13636 [(set (match_operand:V16QI 0 "register_operand" "=x,v")
13637 (vec_select:V16QI
13638 (vec_concat:V32QI
13639 (match_operand:V16QI 1 "register_operand" "0,v")
13640 (match_operand:V16QI 2 "vector_operand" "xBm,vm"))
13641 (parallel [(const_int 0) (const_int 16)
13642 (const_int 1) (const_int 17)
13643 (const_int 2) (const_int 18)
13644 (const_int 3) (const_int 19)
13645 (const_int 4) (const_int 20)
13646 (const_int 5) (const_int 21)
13647 (const_int 6) (const_int 22)
13648 (const_int 7) (const_int 23)])))]
13649 "TARGET_SSE2 && <mask_avx512vl_condition> && <mask_avx512bw_condition>"
13650 "@
13651 punpcklbw\t{%2, %0|%0, %2}
13652 vpunpcklbw\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
13653 [(set_attr "isa" "noavx,avx")
13654 (set_attr "type" "sselog")
13655 (set_attr "prefix_data16" "1,*")
13656 (set_attr "prefix" "orig,vex")
13657 (set_attr "mode" "TI")])
13658
13659 (define_insn "avx512bw_interleave_highv32hi<mask_name>"
13660 [(set (match_operand:V32HI 0 "register_operand" "=v")
13661 (vec_select:V32HI
13662 (vec_concat:V64HI
13663 (match_operand:V32HI 1 "register_operand" "v")
13664 (match_operand:V32HI 2 "nonimmediate_operand" "vm"))
13665 (parallel [(const_int 4) (const_int 36)
13666 (const_int 5) (const_int 37)
13667 (const_int 6) (const_int 38)
13668 (const_int 7) (const_int 39)
13669 (const_int 12) (const_int 44)
13670 (const_int 13) (const_int 45)
13671 (const_int 14) (const_int 46)
13672 (const_int 15) (const_int 47)
13673 (const_int 20) (const_int 52)
13674 (const_int 21) (const_int 53)
13675 (const_int 22) (const_int 54)
13676 (const_int 23) (const_int 55)
13677 (const_int 28) (const_int 60)
13678 (const_int 29) (const_int 61)
13679 (const_int 30) (const_int 62)
13680 (const_int 31) (const_int 63)])))]
13681 "TARGET_AVX512BW"
13682 "vpunpckhwd\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
13683 [(set_attr "type" "sselog")
13684 (set_attr "prefix" "evex")
13685 (set_attr "mode" "XI")])
13686
13687 (define_insn "avx2_interleave_highv16hi<mask_name>"
13688 [(set (match_operand:V16HI 0 "register_operand" "=v")
13689 (vec_select:V16HI
13690 (vec_concat:V32HI
13691 (match_operand:V16HI 1 "register_operand" "v")
13692 (match_operand:V16HI 2 "nonimmediate_operand" "vm"))
13693 (parallel [(const_int 4) (const_int 20)
13694 (const_int 5) (const_int 21)
13695 (const_int 6) (const_int 22)
13696 (const_int 7) (const_int 23)
13697 (const_int 12) (const_int 28)
13698 (const_int 13) (const_int 29)
13699 (const_int 14) (const_int 30)
13700 (const_int 15) (const_int 31)])))]
13701 "TARGET_AVX2 && <mask_avx512vl_condition> && <mask_avx512bw_condition>"
13702 "vpunpckhwd\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
13703 [(set_attr "type" "sselog")
13704 (set_attr "prefix" "maybe_evex")
13705 (set_attr "mode" "OI")])
13706
13707 (define_insn "vec_interleave_highv8hi<mask_name>"
13708 [(set (match_operand:V8HI 0 "register_operand" "=x,v")
13709 (vec_select:V8HI
13710 (vec_concat:V16HI
13711 (match_operand:V8HI 1 "register_operand" "0,v")
13712 (match_operand:V8HI 2 "vector_operand" "xBm,vm"))
13713 (parallel [(const_int 4) (const_int 12)
13714 (const_int 5) (const_int 13)
13715 (const_int 6) (const_int 14)
13716 (const_int 7) (const_int 15)])))]
13717 "TARGET_SSE2 && <mask_avx512vl_condition> && <mask_avx512bw_condition>"
13718 "@
13719 punpckhwd\t{%2, %0|%0, %2}
13720 vpunpckhwd\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
13721 [(set_attr "isa" "noavx,avx")
13722 (set_attr "type" "sselog")
13723 (set_attr "prefix_data16" "1,*")
13724 (set_attr "prefix" "orig,maybe_vex")
13725 (set_attr "mode" "TI")])
13726
13727 (define_insn "<mask_codefor>avx512bw_interleave_lowv32hi<mask_name>"
13728 [(set (match_operand:V32HI 0 "register_operand" "=v")
13729 (vec_select:V32HI
13730 (vec_concat:V64HI
13731 (match_operand:V32HI 1 "register_operand" "v")
13732 (match_operand:V32HI 2 "nonimmediate_operand" "vm"))
13733 (parallel [(const_int 0) (const_int 32)
13734 (const_int 1) (const_int 33)
13735 (const_int 2) (const_int 34)
13736 (const_int 3) (const_int 35)
13737 (const_int 8) (const_int 40)
13738 (const_int 9) (const_int 41)
13739 (const_int 10) (const_int 42)
13740 (const_int 11) (const_int 43)
13741 (const_int 16) (const_int 48)
13742 (const_int 17) (const_int 49)
13743 (const_int 18) (const_int 50)
13744 (const_int 19) (const_int 51)
13745 (const_int 24) (const_int 56)
13746 (const_int 25) (const_int 57)
13747 (const_int 26) (const_int 58)
13748 (const_int 27) (const_int 59)])))]
13749 "TARGET_AVX512BW"
13750 "vpunpcklwd\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
13751 [(set_attr "type" "sselog")
13752 (set_attr "prefix" "evex")
13753 (set_attr "mode" "XI")])
13754
13755 (define_insn "avx2_interleave_lowv16hi<mask_name>"
13756 [(set (match_operand:V16HI 0 "register_operand" "=v")
13757 (vec_select:V16HI
13758 (vec_concat:V32HI
13759 (match_operand:V16HI 1 "register_operand" "v")
13760 (match_operand:V16HI 2 "nonimmediate_operand" "vm"))
13761 (parallel [(const_int 0) (const_int 16)
13762 (const_int 1) (const_int 17)
13763 (const_int 2) (const_int 18)
13764 (const_int 3) (const_int 19)
13765 (const_int 8) (const_int 24)
13766 (const_int 9) (const_int 25)
13767 (const_int 10) (const_int 26)
13768 (const_int 11) (const_int 27)])))]
13769 "TARGET_AVX2 && <mask_avx512vl_condition> && <mask_avx512bw_condition>"
13770 "vpunpcklwd\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
13771 [(set_attr "type" "sselog")
13772 (set_attr "prefix" "maybe_evex")
13773 (set_attr "mode" "OI")])
13774
13775 (define_insn "vec_interleave_lowv8hi<mask_name>"
13776 [(set (match_operand:V8HI 0 "register_operand" "=x,v")
13777 (vec_select:V8HI
13778 (vec_concat:V16HI
13779 (match_operand:V8HI 1 "register_operand" "0,v")
13780 (match_operand:V8HI 2 "vector_operand" "xBm,vm"))
13781 (parallel [(const_int 0) (const_int 8)
13782 (const_int 1) (const_int 9)
13783 (const_int 2) (const_int 10)
13784 (const_int 3) (const_int 11)])))]
13785 "TARGET_SSE2 && <mask_avx512vl_condition> && <mask_avx512bw_condition>"
13786 "@
13787 punpcklwd\t{%2, %0|%0, %2}
13788 vpunpcklwd\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
13789 [(set_attr "isa" "noavx,avx")
13790 (set_attr "type" "sselog")
13791 (set_attr "prefix_data16" "1,*")
13792 (set_attr "prefix" "orig,maybe_evex")
13793 (set_attr "mode" "TI")])
13794
13795 (define_insn "avx2_interleave_highv8si<mask_name>"
13796 [(set (match_operand:V8SI 0 "register_operand" "=v")
13797 (vec_select:V8SI
13798 (vec_concat:V16SI
13799 (match_operand:V8SI 1 "register_operand" "v")
13800 (match_operand:V8SI 2 "nonimmediate_operand" "vm"))
13801 (parallel [(const_int 2) (const_int 10)
13802 (const_int 3) (const_int 11)
13803 (const_int 6) (const_int 14)
13804 (const_int 7) (const_int 15)])))]
13805 "TARGET_AVX2 && <mask_avx512vl_condition>"
13806 "vpunpckhdq\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
13807 [(set_attr "type" "sselog")
13808 (set_attr "prefix" "maybe_evex")
13809 (set_attr "mode" "OI")])
13810
13811 (define_insn "<mask_codefor>avx512f_interleave_highv16si<mask_name>"
13812 [(set (match_operand:V16SI 0 "register_operand" "=v")
13813 (vec_select:V16SI
13814 (vec_concat:V32SI
13815 (match_operand:V16SI 1 "register_operand" "v")
13816 (match_operand:V16SI 2 "nonimmediate_operand" "vm"))
13817 (parallel [(const_int 2) (const_int 18)
13818 (const_int 3) (const_int 19)
13819 (const_int 6) (const_int 22)
13820 (const_int 7) (const_int 23)
13821 (const_int 10) (const_int 26)
13822 (const_int 11) (const_int 27)
13823 (const_int 14) (const_int 30)
13824 (const_int 15) (const_int 31)])))]
13825 "TARGET_AVX512F"
13826 "vpunpckhdq\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
13827 [(set_attr "type" "sselog")
13828 (set_attr "prefix" "evex")
13829 (set_attr "mode" "XI")])
13830
13831
13832 (define_insn "vec_interleave_highv4si<mask_name>"
13833 [(set (match_operand:V4SI 0 "register_operand" "=x,v")
13834 (vec_select:V4SI
13835 (vec_concat:V8SI
13836 (match_operand:V4SI 1 "register_operand" "0,v")
13837 (match_operand:V4SI 2 "vector_operand" "xBm,vm"))
13838 (parallel [(const_int 2) (const_int 6)
13839 (const_int 3) (const_int 7)])))]
13840 "TARGET_SSE2 && <mask_avx512vl_condition>"
13841 "@
13842 punpckhdq\t{%2, %0|%0, %2}
13843 vpunpckhdq\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
13844 [(set_attr "isa" "noavx,avx")
13845 (set_attr "type" "sselog")
13846 (set_attr "prefix_data16" "1,*")
13847 (set_attr "prefix" "orig,maybe_vex")
13848 (set_attr "mode" "TI")])
13849
13850 (define_insn "avx2_interleave_lowv8si<mask_name>"
13851 [(set (match_operand:V8SI 0 "register_operand" "=v")
13852 (vec_select:V8SI
13853 (vec_concat:V16SI
13854 (match_operand:V8SI 1 "register_operand" "v")
13855 (match_operand:V8SI 2 "nonimmediate_operand" "vm"))
13856 (parallel [(const_int 0) (const_int 8)
13857 (const_int 1) (const_int 9)
13858 (const_int 4) (const_int 12)
13859 (const_int 5) (const_int 13)])))]
13860 "TARGET_AVX2 && <mask_avx512vl_condition>"
13861 "vpunpckldq\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
13862 [(set_attr "type" "sselog")
13863 (set_attr "prefix" "maybe_evex")
13864 (set_attr "mode" "OI")])
13865
13866 (define_insn "<mask_codefor>avx512f_interleave_lowv16si<mask_name>"
13867 [(set (match_operand:V16SI 0 "register_operand" "=v")
13868 (vec_select:V16SI
13869 (vec_concat:V32SI
13870 (match_operand:V16SI 1 "register_operand" "v")
13871 (match_operand:V16SI 2 "nonimmediate_operand" "vm"))
13872 (parallel [(const_int 0) (const_int 16)
13873 (const_int 1) (const_int 17)
13874 (const_int 4) (const_int 20)
13875 (const_int 5) (const_int 21)
13876 (const_int 8) (const_int 24)
13877 (const_int 9) (const_int 25)
13878 (const_int 12) (const_int 28)
13879 (const_int 13) (const_int 29)])))]
13880 "TARGET_AVX512F"
13881 "vpunpckldq\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
13882 [(set_attr "type" "sselog")
13883 (set_attr "prefix" "evex")
13884 (set_attr "mode" "XI")])
13885
13886 (define_insn "vec_interleave_lowv4si<mask_name>"
13887 [(set (match_operand:V4SI 0 "register_operand" "=x,v")
13888 (vec_select:V4SI
13889 (vec_concat:V8SI
13890 (match_operand:V4SI 1 "register_operand" "0,v")
13891 (match_operand:V4SI 2 "vector_operand" "xBm,vm"))
13892 (parallel [(const_int 0) (const_int 4)
13893 (const_int 1) (const_int 5)])))]
13894 "TARGET_SSE2 && <mask_avx512vl_condition>"
13895 "@
13896 punpckldq\t{%2, %0|%0, %2}
13897 vpunpckldq\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
13898 [(set_attr "isa" "noavx,avx")
13899 (set_attr "type" "sselog")
13900 (set_attr "prefix_data16" "1,*")
13901 (set_attr "prefix" "orig,vex")
13902 (set_attr "mode" "TI")])
13903
13904 (define_expand "vec_interleave_high<mode>"
13905 [(match_operand:VI_256 0 "register_operand")
13906 (match_operand:VI_256 1 "register_operand")
13907 (match_operand:VI_256 2 "nonimmediate_operand")]
13908 "TARGET_AVX2"
13909 {
13910 rtx t1 = gen_reg_rtx (<MODE>mode);
13911 rtx t2 = gen_reg_rtx (<MODE>mode);
13912 rtx t3 = gen_reg_rtx (V4DImode);
13913 emit_insn (gen_avx2_interleave_low<mode> (t1, operands[1], operands[2]));
13914 emit_insn (gen_avx2_interleave_high<mode> (t2, operands[1], operands[2]));
13915 emit_insn (gen_avx2_permv2ti (t3, gen_lowpart (V4DImode, t1),
13916 gen_lowpart (V4DImode, t2),
13917 GEN_INT (1 + (3 << 4))));
13918 emit_move_insn (operands[0], gen_lowpart (<MODE>mode, t3));
13919 DONE;
13920 })
13921
13922 (define_expand "vec_interleave_low<mode>"
13923 [(match_operand:VI_256 0 "register_operand")
13924 (match_operand:VI_256 1 "register_operand")
13925 (match_operand:VI_256 2 "nonimmediate_operand")]
13926 "TARGET_AVX2"
13927 {
13928 rtx t1 = gen_reg_rtx (<MODE>mode);
13929 rtx t2 = gen_reg_rtx (<MODE>mode);
13930 rtx t3 = gen_reg_rtx (V4DImode);
13931 emit_insn (gen_avx2_interleave_low<mode> (t1, operands[1], operands[2]));
13932 emit_insn (gen_avx2_interleave_high<mode> (t2, operands[1], operands[2]));
13933 emit_insn (gen_avx2_permv2ti (t3, gen_lowpart (V4DImode, t1),
13934 gen_lowpart (V4DImode, t2),
13935 GEN_INT (0 + (2 << 4))));
13936 emit_move_insn (operands[0], gen_lowpart (<MODE>mode, t3));
13937 DONE;
13938 })
13939
13940 ;; Modes handled by pinsr patterns.
13941 (define_mode_iterator PINSR_MODE
13942 [(V16QI "TARGET_SSE4_1") V8HI
13943 (V4SI "TARGET_SSE4_1")
13944 (V2DI "TARGET_SSE4_1 && TARGET_64BIT")])
13945
13946 (define_mode_attr sse2p4_1
13947 [(V16QI "sse4_1") (V8HI "sse2")
13948 (V4SI "sse4_1") (V2DI "sse4_1")])
13949
13950 (define_mode_attr pinsr_evex_isa
13951 [(V16QI "avx512bw") (V8HI "avx512bw")
13952 (V4SI "avx512dq") (V2DI "avx512dq")])
13953
13954 ;; sse4_1_pinsrd must come before sse2_loadld since it is preferred.
13955 (define_insn "<sse2p4_1>_pinsr<ssemodesuffix>"
13956 [(set (match_operand:PINSR_MODE 0 "register_operand" "=x,x,x,x,v,v")
13957 (vec_merge:PINSR_MODE
13958 (vec_duplicate:PINSR_MODE
13959 (match_operand:<ssescalarmode> 2 "nonimmediate_operand" "r,m,r,m,r,m"))
13960 (match_operand:PINSR_MODE 1 "register_operand" "0,0,x,x,v,v")
13961 (match_operand:SI 3 "const_int_operand")))]
13962 "TARGET_SSE2
13963 && ((unsigned) exact_log2 (INTVAL (operands[3]))
13964 < GET_MODE_NUNITS (<MODE>mode))"
13965 {
13966 operands[3] = GEN_INT (exact_log2 (INTVAL (operands[3])));
13967
13968 switch (which_alternative)
13969 {
13970 case 0:
13971 if (GET_MODE_SIZE (<ssescalarmode>mode) < GET_MODE_SIZE (SImode))
13972 return "pinsr<ssemodesuffix>\t{%3, %k2, %0|%0, %k2, %3}";
13973 /* FALLTHRU */
13974 case 1:
13975 return "pinsr<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}";
13976 case 2:
13977 case 4:
13978 if (GET_MODE_SIZE (<ssescalarmode>mode) < GET_MODE_SIZE (SImode))
13979 return "vpinsr<ssemodesuffix>\t{%3, %k2, %1, %0|%0, %1, %k2, %3}";
13980 /* FALLTHRU */
13981 case 3:
13982 case 5:
13983 return "vpinsr<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}";
13984 default:
13985 gcc_unreachable ();
13986 }
13987 }
13988 [(set_attr "isa" "noavx,noavx,avx,avx,<pinsr_evex_isa>,<pinsr_evex_isa>")
13989 (set_attr "type" "sselog")
13990 (set (attr "prefix_rex")
13991 (if_then_else
13992 (and (not (match_test "TARGET_AVX"))
13993 (eq (const_string "<MODE>mode") (const_string "V2DImode")))
13994 (const_string "1")
13995 (const_string "*")))
13996 (set (attr "prefix_data16")
13997 (if_then_else
13998 (and (not (match_test "TARGET_AVX"))
13999 (eq (const_string "<MODE>mode") (const_string "V8HImode")))
14000 (const_string "1")
14001 (const_string "*")))
14002 (set (attr "prefix_extra")
14003 (if_then_else
14004 (and (not (match_test "TARGET_AVX"))
14005 (eq (const_string "<MODE>mode") (const_string "V8HImode")))
14006 (const_string "*")
14007 (const_string "1")))
14008 (set_attr "length_immediate" "1")
14009 (set_attr "prefix" "orig,orig,vex,vex,evex,evex")
14010 (set_attr "mode" "TI")])
14011
14012 (define_expand "<extract_type>_vinsert<shuffletype><extract_suf>_mask"
14013 [(match_operand:AVX512_VEC 0 "register_operand")
14014 (match_operand:AVX512_VEC 1 "register_operand")
14015 (match_operand:<ssequartermode> 2 "nonimmediate_operand")
14016 (match_operand:SI 3 "const_0_to_3_operand")
14017 (match_operand:AVX512_VEC 4 "register_operand")
14018 (match_operand:<avx512fmaskmode> 5 "register_operand")]
14019 "TARGET_AVX512F"
14020 {
14021 int mask, selector;
14022 mask = INTVAL (operands[3]);
14023 selector = (GET_MODE_UNIT_SIZE (<MODE>mode) == 4
14024 ? 0xFFFF ^ (0x000F << mask * 4)
14025 : 0xFF ^ (0x03 << mask * 2));
14026 emit_insn (gen_<extract_type>_vinsert<shuffletype><extract_suf>_1_mask
14027 (operands[0], operands[1], operands[2], GEN_INT (selector),
14028 operands[4], operands[5]));
14029 DONE;
14030 })
14031
14032 (define_insn "*<extract_type>_vinsert<shuffletype><extract_suf>_0"
14033 [(set (match_operand:AVX512_VEC 0 "register_operand" "=v,x,Yv")
14034 (vec_merge:AVX512_VEC
14035 (match_operand:AVX512_VEC 1 "reg_or_0_operand" "v,C,C")
14036 (vec_duplicate:AVX512_VEC
14037 (match_operand:<ssequartermode> 2 "nonimmediate_operand" "vm,xm,vm"))
14038 (match_operand:SI 3 "const_int_operand" "n,n,n")))]
14039 "TARGET_AVX512F
14040 && (INTVAL (operands[3])
14041 == (GET_MODE_UNIT_SIZE (<MODE>mode) == 4 ? 0xFFF0 : 0xFC))"
14042 {
14043 if (which_alternative == 0)
14044 return "vinsert<shuffletype><extract_suf>\t{$0, %2, %1, %0|%0, %1, %2, 0}";
14045 switch (<MODE>mode)
14046 {
14047 case E_V8DFmode:
14048 if (misaligned_operand (operands[2], <ssequartermode>mode))
14049 return "vmovupd\t{%2, %x0|%x0, %2}";
14050 else
14051 return "vmovapd\t{%2, %x0|%x0, %2}";
14052 case E_V16SFmode:
14053 if (misaligned_operand (operands[2], <ssequartermode>mode))
14054 return "vmovups\t{%2, %x0|%x0, %2}";
14055 else
14056 return "vmovaps\t{%2, %x0|%x0, %2}";
14057 case E_V8DImode:
14058 if (misaligned_operand (operands[2], <ssequartermode>mode))
14059 return which_alternative == 2 ? "vmovdqu64\t{%2, %x0|%x0, %2}"
14060 : "vmovdqu\t{%2, %x0|%x0, %2}";
14061 else
14062 return which_alternative == 2 ? "vmovdqa64\t{%2, %x0|%x0, %2}"
14063 : "vmovdqa\t{%2, %x0|%x0, %2}";
14064 case E_V16SImode:
14065 if (misaligned_operand (operands[2], <ssequartermode>mode))
14066 return which_alternative == 2 ? "vmovdqu32\t{%2, %x0|%x0, %2}"
14067 : "vmovdqu\t{%2, %x0|%x0, %2}";
14068 else
14069 return which_alternative == 2 ? "vmovdqa32\t{%2, %x0|%x0, %2}"
14070 : "vmovdqa\t{%2, %x0|%x0, %2}";
14071 default:
14072 gcc_unreachable ();
14073 }
14074 }
14075 [(set_attr "type" "sselog,ssemov,ssemov")
14076 (set_attr "length_immediate" "1,0,0")
14077 (set_attr "prefix" "evex,vex,evex")
14078 (set_attr "mode" "<sseinsnmode>,<ssequarterinsnmode>,<ssequarterinsnmode>")])
14079
14080 (define_insn "<mask_codefor><extract_type>_vinsert<shuffletype><extract_suf>_1<mask_name>"
14081 [(set (match_operand:AVX512_VEC 0 "register_operand" "=v")
14082 (vec_merge:AVX512_VEC
14083 (match_operand:AVX512_VEC 1 "register_operand" "v")
14084 (vec_duplicate:AVX512_VEC
14085 (match_operand:<ssequartermode> 2 "nonimmediate_operand" "vm"))
14086 (match_operand:SI 3 "const_int_operand" "n")))]
14087 "TARGET_AVX512F"
14088 {
14089 int mask;
14090 int selector = INTVAL (operands[3]);
14091
14092 if (selector == (GET_MODE_UNIT_SIZE (<MODE>mode) == 4 ? 0xFFF0 : 0xFC))
14093 mask = 0;
14094 else if (selector == (GET_MODE_UNIT_SIZE (<MODE>mode) == 4 ? 0xFF0F : 0xF3))
14095 mask = 1;
14096 else if (selector == (GET_MODE_UNIT_SIZE (<MODE>mode) == 4 ? 0xF0FF : 0xCF))
14097 mask = 2;
14098 else if (selector == (GET_MODE_UNIT_SIZE (<MODE>mode) == 4 ? 0x0FFF : 0x3F))
14099 mask = 3;
14100 else
14101 gcc_unreachable ();
14102
14103 operands[3] = GEN_INT (mask);
14104
14105 return "vinsert<shuffletype><extract_suf>\t{%3, %2, %1, %0<mask_operand4>|%0<mask_operand4>, %1, %2, %3}";
14106 }
14107 [(set_attr "type" "sselog")
14108 (set_attr "length_immediate" "1")
14109 (set_attr "prefix" "evex")
14110 (set_attr "mode" "<sseinsnmode>")])
14111
14112 (define_expand "<extract_type_2>_vinsert<shuffletype><extract_suf_2>_mask"
14113 [(match_operand:AVX512_VEC_2 0 "register_operand")
14114 (match_operand:AVX512_VEC_2 1 "register_operand")
14115 (match_operand:<ssehalfvecmode> 2 "nonimmediate_operand")
14116 (match_operand:SI 3 "const_0_to_1_operand")
14117 (match_operand:AVX512_VEC_2 4 "register_operand")
14118 (match_operand:<avx512fmaskmode> 5 "register_operand")]
14119 "TARGET_AVX512F"
14120 {
14121 int mask = INTVAL (operands[3]);
14122 if (mask == 0)
14123 emit_insn (gen_vec_set_lo_<mode>_mask (operands[0], operands[1],
14124 operands[2], operands[4],
14125 operands[5]));
14126 else
14127 emit_insn (gen_vec_set_hi_<mode>_mask (operands[0], operands[1],
14128 operands[2], operands[4],
14129 operands[5]));
14130 DONE;
14131 })
14132
14133 (define_insn "vec_set_lo_<mode><mask_name>"
14134 [(set (match_operand:V16FI 0 "register_operand" "=v")
14135 (vec_concat:V16FI
14136 (match_operand:<ssehalfvecmode> 2 "nonimmediate_operand" "vm")
14137 (vec_select:<ssehalfvecmode>
14138 (match_operand:V16FI 1 "register_operand" "v")
14139 (parallel [(const_int 8) (const_int 9)
14140 (const_int 10) (const_int 11)
14141 (const_int 12) (const_int 13)
14142 (const_int 14) (const_int 15)]))))]
14143 "TARGET_AVX512DQ"
14144 "vinsert<shuffletype>32x8\t{$0x0, %2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2, 0x0}"
14145 [(set_attr "type" "sselog")
14146 (set_attr "length_immediate" "1")
14147 (set_attr "prefix" "evex")
14148 (set_attr "mode" "<sseinsnmode>")])
14149
14150 (define_insn "vec_set_hi_<mode><mask_name>"
14151 [(set (match_operand:V16FI 0 "register_operand" "=v")
14152 (vec_concat:V16FI
14153 (vec_select:<ssehalfvecmode>
14154 (match_operand:V16FI 1 "register_operand" "v")
14155 (parallel [(const_int 0) (const_int 1)
14156 (const_int 2) (const_int 3)
14157 (const_int 4) (const_int 5)
14158 (const_int 6) (const_int 7)]))
14159 (match_operand:<ssehalfvecmode> 2 "nonimmediate_operand" "vm")))]
14160 "TARGET_AVX512DQ"
14161 "vinsert<shuffletype>32x8\t{$0x1, %2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2, 0x1}"
14162 [(set_attr "type" "sselog")
14163 (set_attr "length_immediate" "1")
14164 (set_attr "prefix" "evex")
14165 (set_attr "mode" "<sseinsnmode>")])
14166
14167 (define_insn "vec_set_lo_<mode><mask_name>"
14168 [(set (match_operand:V8FI 0 "register_operand" "=v")
14169 (vec_concat:V8FI
14170 (match_operand:<ssehalfvecmode> 2 "nonimmediate_operand" "vm")
14171 (vec_select:<ssehalfvecmode>
14172 (match_operand:V8FI 1 "register_operand" "v")
14173 (parallel [(const_int 4) (const_int 5)
14174 (const_int 6) (const_int 7)]))))]
14175 "TARGET_AVX512F"
14176 "vinsert<shuffletype>64x4\t{$0x0, %2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2, 0x0}"
14177 [(set_attr "type" "sselog")
14178 (set_attr "length_immediate" "1")
14179 (set_attr "prefix" "evex")
14180 (set_attr "mode" "XI")])
14181
14182 (define_insn "vec_set_hi_<mode><mask_name>"
14183 [(set (match_operand:V8FI 0 "register_operand" "=v")
14184 (vec_concat:V8FI
14185 (vec_select:<ssehalfvecmode>
14186 (match_operand:V8FI 1 "register_operand" "v")
14187 (parallel [(const_int 0) (const_int 1)
14188 (const_int 2) (const_int 3)]))
14189 (match_operand:<ssehalfvecmode> 2 "nonimmediate_operand" "vm")))]
14190 "TARGET_AVX512F"
14191 "vinsert<shuffletype>64x4\t{$0x1, %2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2, 0x1}"
14192 [(set_attr "type" "sselog")
14193 (set_attr "length_immediate" "1")
14194 (set_attr "prefix" "evex")
14195 (set_attr "mode" "XI")])
14196
14197 (define_expand "avx512dq_shuf_<shuffletype>64x2_mask"
14198 [(match_operand:VI8F_256 0 "register_operand")
14199 (match_operand:VI8F_256 1 "register_operand")
14200 (match_operand:VI8F_256 2 "nonimmediate_operand")
14201 (match_operand:SI 3 "const_0_to_3_operand")
14202 (match_operand:VI8F_256 4 "register_operand")
14203 (match_operand:QI 5 "register_operand")]
14204 "TARGET_AVX512DQ"
14205 {
14206 int mask = INTVAL (operands[3]);
14207 emit_insn (gen_avx512dq_shuf_<shuffletype>64x2_1_mask
14208 (operands[0], operands[1], operands[2],
14209 GEN_INT (((mask >> 0) & 1) * 2 + 0),
14210 GEN_INT (((mask >> 0) & 1) * 2 + 1),
14211 GEN_INT (((mask >> 1) & 1) * 2 + 4),
14212 GEN_INT (((mask >> 1) & 1) * 2 + 5),
14213 operands[4], operands[5]));
14214 DONE;
14215 })
14216
14217 (define_insn "<mask_codefor>avx512dq_shuf_<shuffletype>64x2_1<mask_name>"
14218 [(set (match_operand:VI8F_256 0 "register_operand" "=v")
14219 (vec_select:VI8F_256
14220 (vec_concat:<ssedoublemode>
14221 (match_operand:VI8F_256 1 "register_operand" "v")
14222 (match_operand:VI8F_256 2 "nonimmediate_operand" "vm"))
14223 (parallel [(match_operand 3 "const_0_to_3_operand")
14224 (match_operand 4 "const_0_to_3_operand")
14225 (match_operand 5 "const_4_to_7_operand")
14226 (match_operand 6 "const_4_to_7_operand")])))]
14227 "TARGET_AVX512VL
14228 && (INTVAL (operands[3]) & 1) == 0
14229 && INTVAL (operands[3]) == INTVAL (operands[4]) - 1
14230 && (INTVAL (operands[5]) & 1) == 0
14231 && INTVAL (operands[5]) == INTVAL (operands[6]) - 1"
14232 {
14233 int mask;
14234 mask = INTVAL (operands[3]) / 2;
14235 mask |= (INTVAL (operands[5]) - 4) / 2 << 1;
14236 operands[3] = GEN_INT (mask);
14237 return "vshuf<shuffletype>64x2\t{%3, %2, %1, %0<mask_operand7>|%0<mask_operand7>, %1, %2, %3}";
14238 }
14239 [(set_attr "type" "sselog")
14240 (set_attr "length_immediate" "1")
14241 (set_attr "prefix" "evex")
14242 (set_attr "mode" "XI")])
14243
14244 (define_expand "avx512f_shuf_<shuffletype>64x2_mask"
14245 [(match_operand:V8FI 0 "register_operand")
14246 (match_operand:V8FI 1 "register_operand")
14247 (match_operand:V8FI 2 "nonimmediate_operand")
14248 (match_operand:SI 3 "const_0_to_255_operand")
14249 (match_operand:V8FI 4 "register_operand")
14250 (match_operand:QI 5 "register_operand")]
14251 "TARGET_AVX512F"
14252 {
14253 int mask = INTVAL (operands[3]);
14254 emit_insn (gen_avx512f_shuf_<shuffletype>64x2_1_mask
14255 (operands[0], operands[1], operands[2],
14256 GEN_INT (((mask >> 0) & 3) * 2),
14257 GEN_INT (((mask >> 0) & 3) * 2 + 1),
14258 GEN_INT (((mask >> 2) & 3) * 2),
14259 GEN_INT (((mask >> 2) & 3) * 2 + 1),
14260 GEN_INT (((mask >> 4) & 3) * 2 + 8),
14261 GEN_INT (((mask >> 4) & 3) * 2 + 9),
14262 GEN_INT (((mask >> 6) & 3) * 2 + 8),
14263 GEN_INT (((mask >> 6) & 3) * 2 + 9),
14264 operands[4], operands[5]));
14265 DONE;
14266 })
14267
14268 (define_insn "avx512f_shuf_<shuffletype>64x2_1<mask_name>"
14269 [(set (match_operand:V8FI 0 "register_operand" "=v")
14270 (vec_select:V8FI
14271 (vec_concat:<ssedoublemode>
14272 (match_operand:V8FI 1 "register_operand" "v")
14273 (match_operand:V8FI 2 "nonimmediate_operand" "vm"))
14274 (parallel [(match_operand 3 "const_0_to_7_operand")
14275 (match_operand 4 "const_0_to_7_operand")
14276 (match_operand 5 "const_0_to_7_operand")
14277 (match_operand 6 "const_0_to_7_operand")
14278 (match_operand 7 "const_8_to_15_operand")
14279 (match_operand 8 "const_8_to_15_operand")
14280 (match_operand 9 "const_8_to_15_operand")
14281 (match_operand 10 "const_8_to_15_operand")])))]
14282 "TARGET_AVX512F
14283 && (INTVAL (operands[3]) & 1) == 0
14284 && INTVAL (operands[3]) == INTVAL (operands[4]) - 1
14285 && (INTVAL (operands[5]) & 1) == 0
14286 && INTVAL (operands[5]) == INTVAL (operands[6]) - 1
14287 && (INTVAL (operands[7]) & 1) == 0
14288 && INTVAL (operands[7]) == INTVAL (operands[8]) - 1
14289 && (INTVAL (operands[9]) & 1) == 0
14290 && INTVAL (operands[9]) == INTVAL (operands[10]) - 1"
14291 {
14292 int mask;
14293 mask = INTVAL (operands[3]) / 2;
14294 mask |= INTVAL (operands[5]) / 2 << 2;
14295 mask |= (INTVAL (operands[7]) - 8) / 2 << 4;
14296 mask |= (INTVAL (operands[9]) - 8) / 2 << 6;
14297 operands[3] = GEN_INT (mask);
14298
14299 return "vshuf<shuffletype>64x2\t{%3, %2, %1, %0<mask_operand11>|%0<mask_operand11>, %1, %2, %3}";
14300 }
14301 [(set_attr "type" "sselog")
14302 (set_attr "length_immediate" "1")
14303 (set_attr "prefix" "evex")
14304 (set_attr "mode" "<sseinsnmode>")])
14305
14306 (define_expand "avx512vl_shuf_<shuffletype>32x4_mask"
14307 [(match_operand:VI4F_256 0 "register_operand")
14308 (match_operand:VI4F_256 1 "register_operand")
14309 (match_operand:VI4F_256 2 "nonimmediate_operand")
14310 (match_operand:SI 3 "const_0_to_3_operand")
14311 (match_operand:VI4F_256 4 "register_operand")
14312 (match_operand:QI 5 "register_operand")]
14313 "TARGET_AVX512VL"
14314 {
14315 int mask = INTVAL (operands[3]);
14316 emit_insn (gen_avx512vl_shuf_<shuffletype>32x4_1_mask
14317 (operands[0], operands[1], operands[2],
14318 GEN_INT (((mask >> 0) & 1) * 4 + 0),
14319 GEN_INT (((mask >> 0) & 1) * 4 + 1),
14320 GEN_INT (((mask >> 0) & 1) * 4 + 2),
14321 GEN_INT (((mask >> 0) & 1) * 4 + 3),
14322 GEN_INT (((mask >> 1) & 1) * 4 + 8),
14323 GEN_INT (((mask >> 1) & 1) * 4 + 9),
14324 GEN_INT (((mask >> 1) & 1) * 4 + 10),
14325 GEN_INT (((mask >> 1) & 1) * 4 + 11),
14326 operands[4], operands[5]));
14327 DONE;
14328 })
14329
14330 (define_insn "avx512vl_shuf_<shuffletype>32x4_1<mask_name>"
14331 [(set (match_operand:VI4F_256 0 "register_operand" "=v")
14332 (vec_select:VI4F_256
14333 (vec_concat:<ssedoublemode>
14334 (match_operand:VI4F_256 1 "register_operand" "v")
14335 (match_operand:VI4F_256 2 "nonimmediate_operand" "vm"))
14336 (parallel [(match_operand 3 "const_0_to_7_operand")
14337 (match_operand 4 "const_0_to_7_operand")
14338 (match_operand 5 "const_0_to_7_operand")
14339 (match_operand 6 "const_0_to_7_operand")
14340 (match_operand 7 "const_8_to_15_operand")
14341 (match_operand 8 "const_8_to_15_operand")
14342 (match_operand 9 "const_8_to_15_operand")
14343 (match_operand 10 "const_8_to_15_operand")])))]
14344 "TARGET_AVX512VL
14345 && (INTVAL (operands[3]) & 3) == 0
14346 && INTVAL (operands[3]) == INTVAL (operands[4]) - 1
14347 && INTVAL (operands[3]) == INTVAL (operands[5]) - 2
14348 && INTVAL (operands[3]) == INTVAL (operands[6]) - 3
14349 && (INTVAL (operands[7]) & 3) == 0
14350 && INTVAL (operands[7]) == INTVAL (operands[8]) - 1
14351 && INTVAL (operands[7]) == INTVAL (operands[9]) - 2
14352 && INTVAL (operands[7]) == INTVAL (operands[10]) - 3"
14353 {
14354 int mask;
14355 mask = INTVAL (operands[3]) / 4;
14356 mask |= (INTVAL (operands[7]) - 8) / 4 << 1;
14357 operands[3] = GEN_INT (mask);
14358
14359 return "vshuf<shuffletype>32x4\t{%3, %2, %1, %0<mask_operand11>|%0<mask_operand11>, %1, %2, %3}";
14360 }
14361 [(set_attr "type" "sselog")
14362 (set_attr "length_immediate" "1")
14363 (set_attr "prefix" "evex")
14364 (set_attr "mode" "<sseinsnmode>")])
14365
14366 (define_expand "avx512f_shuf_<shuffletype>32x4_mask"
14367 [(match_operand:V16FI 0 "register_operand")
14368 (match_operand:V16FI 1 "register_operand")
14369 (match_operand:V16FI 2 "nonimmediate_operand")
14370 (match_operand:SI 3 "const_0_to_255_operand")
14371 (match_operand:V16FI 4 "register_operand")
14372 (match_operand:HI 5 "register_operand")]
14373 "TARGET_AVX512F"
14374 {
14375 int mask = INTVAL (operands[3]);
14376 emit_insn (gen_avx512f_shuf_<shuffletype>32x4_1_mask
14377 (operands[0], operands[1], operands[2],
14378 GEN_INT (((mask >> 0) & 3) * 4),
14379 GEN_INT (((mask >> 0) & 3) * 4 + 1),
14380 GEN_INT (((mask >> 0) & 3) * 4 + 2),
14381 GEN_INT (((mask >> 0) & 3) * 4 + 3),
14382 GEN_INT (((mask >> 2) & 3) * 4),
14383 GEN_INT (((mask >> 2) & 3) * 4 + 1),
14384 GEN_INT (((mask >> 2) & 3) * 4 + 2),
14385 GEN_INT (((mask >> 2) & 3) * 4 + 3),
14386 GEN_INT (((mask >> 4) & 3) * 4 + 16),
14387 GEN_INT (((mask >> 4) & 3) * 4 + 17),
14388 GEN_INT (((mask >> 4) & 3) * 4 + 18),
14389 GEN_INT (((mask >> 4) & 3) * 4 + 19),
14390 GEN_INT (((mask >> 6) & 3) * 4 + 16),
14391 GEN_INT (((mask >> 6) & 3) * 4 + 17),
14392 GEN_INT (((mask >> 6) & 3) * 4 + 18),
14393 GEN_INT (((mask >> 6) & 3) * 4 + 19),
14394 operands[4], operands[5]));
14395 DONE;
14396 })
14397
14398 (define_insn "avx512f_shuf_<shuffletype>32x4_1<mask_name>"
14399 [(set (match_operand:V16FI 0 "register_operand" "=v")
14400 (vec_select:V16FI
14401 (vec_concat:<ssedoublemode>
14402 (match_operand:V16FI 1 "register_operand" "v")
14403 (match_operand:V16FI 2 "nonimmediate_operand" "vm"))
14404 (parallel [(match_operand 3 "const_0_to_15_operand")
14405 (match_operand 4 "const_0_to_15_operand")
14406 (match_operand 5 "const_0_to_15_operand")
14407 (match_operand 6 "const_0_to_15_operand")
14408 (match_operand 7 "const_0_to_15_operand")
14409 (match_operand 8 "const_0_to_15_operand")
14410 (match_operand 9 "const_0_to_15_operand")
14411 (match_operand 10 "const_0_to_15_operand")
14412 (match_operand 11 "const_16_to_31_operand")
14413 (match_operand 12 "const_16_to_31_operand")
14414 (match_operand 13 "const_16_to_31_operand")
14415 (match_operand 14 "const_16_to_31_operand")
14416 (match_operand 15 "const_16_to_31_operand")
14417 (match_operand 16 "const_16_to_31_operand")
14418 (match_operand 17 "const_16_to_31_operand")
14419 (match_operand 18 "const_16_to_31_operand")])))]
14420 "TARGET_AVX512F
14421 && (INTVAL (operands[3]) & 3) == 0
14422 && INTVAL (operands[3]) == INTVAL (operands[4]) - 1
14423 && INTVAL (operands[3]) == INTVAL (operands[5]) - 2
14424 && INTVAL (operands[3]) == INTVAL (operands[6]) - 3
14425 && (INTVAL (operands[7]) & 3) == 0
14426 && INTVAL (operands[7]) == INTVAL (operands[8]) - 1
14427 && INTVAL (operands[7]) == INTVAL (operands[9]) - 2
14428 && INTVAL (operands[7]) == INTVAL (operands[10]) - 3
14429 && (INTVAL (operands[11]) & 3) == 0
14430 && INTVAL (operands[11]) == INTVAL (operands[12]) - 1
14431 && INTVAL (operands[11]) == INTVAL (operands[13]) - 2
14432 && INTVAL (operands[11]) == INTVAL (operands[14]) - 3
14433 && (INTVAL (operands[15]) & 3) == 0
14434 && INTVAL (operands[15]) == INTVAL (operands[16]) - 1
14435 && INTVAL (operands[15]) == INTVAL (operands[17]) - 2
14436 && INTVAL (operands[15]) == INTVAL (operands[18]) - 3"
14437 {
14438 int mask;
14439 mask = INTVAL (operands[3]) / 4;
14440 mask |= INTVAL (operands[7]) / 4 << 2;
14441 mask |= (INTVAL (operands[11]) - 16) / 4 << 4;
14442 mask |= (INTVAL (operands[15]) - 16) / 4 << 6;
14443 operands[3] = GEN_INT (mask);
14444
14445 return "vshuf<shuffletype>32x4\t{%3, %2, %1, %0<mask_operand19>|%0<mask_operand19>, %1, %2, %3}";
14446 }
14447 [(set_attr "type" "sselog")
14448 (set_attr "length_immediate" "1")
14449 (set_attr "prefix" "evex")
14450 (set_attr "mode" "<sseinsnmode>")])
14451
14452 (define_expand "avx512f_pshufdv3_mask"
14453 [(match_operand:V16SI 0 "register_operand")
14454 (match_operand:V16SI 1 "nonimmediate_operand")
14455 (match_operand:SI 2 "const_0_to_255_operand")
14456 (match_operand:V16SI 3 "register_operand")
14457 (match_operand:HI 4 "register_operand")]
14458 "TARGET_AVX512F"
14459 {
14460 int mask = INTVAL (operands[2]);
14461 emit_insn (gen_avx512f_pshufd_1_mask (operands[0], operands[1],
14462 GEN_INT ((mask >> 0) & 3),
14463 GEN_INT ((mask >> 2) & 3),
14464 GEN_INT ((mask >> 4) & 3),
14465 GEN_INT ((mask >> 6) & 3),
14466 GEN_INT (((mask >> 0) & 3) + 4),
14467 GEN_INT (((mask >> 2) & 3) + 4),
14468 GEN_INT (((mask >> 4) & 3) + 4),
14469 GEN_INT (((mask >> 6) & 3) + 4),
14470 GEN_INT (((mask >> 0) & 3) + 8),
14471 GEN_INT (((mask >> 2) & 3) + 8),
14472 GEN_INT (((mask >> 4) & 3) + 8),
14473 GEN_INT (((mask >> 6) & 3) + 8),
14474 GEN_INT (((mask >> 0) & 3) + 12),
14475 GEN_INT (((mask >> 2) & 3) + 12),
14476 GEN_INT (((mask >> 4) & 3) + 12),
14477 GEN_INT (((mask >> 6) & 3) + 12),
14478 operands[3], operands[4]));
14479 DONE;
14480 })
14481
14482 (define_insn "avx512f_pshufd_1<mask_name>"
14483 [(set (match_operand:V16SI 0 "register_operand" "=v")
14484 (vec_select:V16SI
14485 (match_operand:V16SI 1 "nonimmediate_operand" "vm")
14486 (parallel [(match_operand 2 "const_0_to_3_operand")
14487 (match_operand 3 "const_0_to_3_operand")
14488 (match_operand 4 "const_0_to_3_operand")
14489 (match_operand 5 "const_0_to_3_operand")
14490 (match_operand 6 "const_4_to_7_operand")
14491 (match_operand 7 "const_4_to_7_operand")
14492 (match_operand 8 "const_4_to_7_operand")
14493 (match_operand 9 "const_4_to_7_operand")
14494 (match_operand 10 "const_8_to_11_operand")
14495 (match_operand 11 "const_8_to_11_operand")
14496 (match_operand 12 "const_8_to_11_operand")
14497 (match_operand 13 "const_8_to_11_operand")
14498 (match_operand 14 "const_12_to_15_operand")
14499 (match_operand 15 "const_12_to_15_operand")
14500 (match_operand 16 "const_12_to_15_operand")
14501 (match_operand 17 "const_12_to_15_operand")])))]
14502 "TARGET_AVX512F
14503 && INTVAL (operands[2]) + 4 == INTVAL (operands[6])
14504 && INTVAL (operands[3]) + 4 == INTVAL (operands[7])
14505 && INTVAL (operands[4]) + 4 == INTVAL (operands[8])
14506 && INTVAL (operands[5]) + 4 == INTVAL (operands[9])
14507 && INTVAL (operands[2]) + 8 == INTVAL (operands[10])
14508 && INTVAL (operands[3]) + 8 == INTVAL (operands[11])
14509 && INTVAL (operands[4]) + 8 == INTVAL (operands[12])
14510 && INTVAL (operands[5]) + 8 == INTVAL (operands[13])
14511 && INTVAL (operands[2]) + 12 == INTVAL (operands[14])
14512 && INTVAL (operands[3]) + 12 == INTVAL (operands[15])
14513 && INTVAL (operands[4]) + 12 == INTVAL (operands[16])
14514 && INTVAL (operands[5]) + 12 == INTVAL (operands[17])"
14515 {
14516 int mask = 0;
14517 mask |= INTVAL (operands[2]) << 0;
14518 mask |= INTVAL (operands[3]) << 2;
14519 mask |= INTVAL (operands[4]) << 4;
14520 mask |= INTVAL (operands[5]) << 6;
14521 operands[2] = GEN_INT (mask);
14522
14523 return "vpshufd\t{%2, %1, %0<mask_operand18>|%0<mask_operand18>, %1, %2}";
14524 }
14525 [(set_attr "type" "sselog1")
14526 (set_attr "prefix" "evex")
14527 (set_attr "length_immediate" "1")
14528 (set_attr "mode" "XI")])
14529
14530 (define_expand "avx512vl_pshufdv3_mask"
14531 [(match_operand:V8SI 0 "register_operand")
14532 (match_operand:V8SI 1 "nonimmediate_operand")
14533 (match_operand:SI 2 "const_0_to_255_operand")
14534 (match_operand:V8SI 3 "register_operand")
14535 (match_operand:QI 4 "register_operand")]
14536 "TARGET_AVX512VL"
14537 {
14538 int mask = INTVAL (operands[2]);
14539 emit_insn (gen_avx2_pshufd_1_mask (operands[0], operands[1],
14540 GEN_INT ((mask >> 0) & 3),
14541 GEN_INT ((mask >> 2) & 3),
14542 GEN_INT ((mask >> 4) & 3),
14543 GEN_INT ((mask >> 6) & 3),
14544 GEN_INT (((mask >> 0) & 3) + 4),
14545 GEN_INT (((mask >> 2) & 3) + 4),
14546 GEN_INT (((mask >> 4) & 3) + 4),
14547 GEN_INT (((mask >> 6) & 3) + 4),
14548 operands[3], operands[4]));
14549 DONE;
14550 })
14551
14552 (define_expand "avx2_pshufdv3"
14553 [(match_operand:V8SI 0 "register_operand")
14554 (match_operand:V8SI 1 "nonimmediate_operand")
14555 (match_operand:SI 2 "const_0_to_255_operand")]
14556 "TARGET_AVX2"
14557 {
14558 int mask = INTVAL (operands[2]);
14559 emit_insn (gen_avx2_pshufd_1 (operands[0], operands[1],
14560 GEN_INT ((mask >> 0) & 3),
14561 GEN_INT ((mask >> 2) & 3),
14562 GEN_INT ((mask >> 4) & 3),
14563 GEN_INT ((mask >> 6) & 3),
14564 GEN_INT (((mask >> 0) & 3) + 4),
14565 GEN_INT (((mask >> 2) & 3) + 4),
14566 GEN_INT (((mask >> 4) & 3) + 4),
14567 GEN_INT (((mask >> 6) & 3) + 4)));
14568 DONE;
14569 })
14570
14571 (define_insn "avx2_pshufd_1<mask_name>"
14572 [(set (match_operand:V8SI 0 "register_operand" "=v")
14573 (vec_select:V8SI
14574 (match_operand:V8SI 1 "nonimmediate_operand" "vm")
14575 (parallel [(match_operand 2 "const_0_to_3_operand")
14576 (match_operand 3 "const_0_to_3_operand")
14577 (match_operand 4 "const_0_to_3_operand")
14578 (match_operand 5 "const_0_to_3_operand")
14579 (match_operand 6 "const_4_to_7_operand")
14580 (match_operand 7 "const_4_to_7_operand")
14581 (match_operand 8 "const_4_to_7_operand")
14582 (match_operand 9 "const_4_to_7_operand")])))]
14583 "TARGET_AVX2
14584 && <mask_avx512vl_condition>
14585 && INTVAL (operands[2]) + 4 == INTVAL (operands[6])
14586 && INTVAL (operands[3]) + 4 == INTVAL (operands[7])
14587 && INTVAL (operands[4]) + 4 == INTVAL (operands[8])
14588 && INTVAL (operands[5]) + 4 == INTVAL (operands[9])"
14589 {
14590 int mask = 0;
14591 mask |= INTVAL (operands[2]) << 0;
14592 mask |= INTVAL (operands[3]) << 2;
14593 mask |= INTVAL (operands[4]) << 4;
14594 mask |= INTVAL (operands[5]) << 6;
14595 operands[2] = GEN_INT (mask);
14596
14597 return "vpshufd\t{%2, %1, %0<mask_operand10>|%0<mask_operand10>, %1, %2}";
14598 }
14599 [(set_attr "type" "sselog1")
14600 (set_attr "prefix" "maybe_evex")
14601 (set_attr "length_immediate" "1")
14602 (set_attr "mode" "OI")])
14603
14604 (define_expand "avx512vl_pshufd_mask"
14605 [(match_operand:V4SI 0 "register_operand")
14606 (match_operand:V4SI 1 "nonimmediate_operand")
14607 (match_operand:SI 2 "const_0_to_255_operand")
14608 (match_operand:V4SI 3 "register_operand")
14609 (match_operand:QI 4 "register_operand")]
14610 "TARGET_AVX512VL"
14611 {
14612 int mask = INTVAL (operands[2]);
14613 emit_insn (gen_sse2_pshufd_1_mask (operands[0], operands[1],
14614 GEN_INT ((mask >> 0) & 3),
14615 GEN_INT ((mask >> 2) & 3),
14616 GEN_INT ((mask >> 4) & 3),
14617 GEN_INT ((mask >> 6) & 3),
14618 operands[3], operands[4]));
14619 DONE;
14620 })
14621
14622 (define_expand "sse2_pshufd"
14623 [(match_operand:V4SI 0 "register_operand")
14624 (match_operand:V4SI 1 "vector_operand")
14625 (match_operand:SI 2 "const_int_operand")]
14626 "TARGET_SSE2"
14627 {
14628 int mask = INTVAL (operands[2]);
14629 emit_insn (gen_sse2_pshufd_1 (operands[0], operands[1],
14630 GEN_INT ((mask >> 0) & 3),
14631 GEN_INT ((mask >> 2) & 3),
14632 GEN_INT ((mask >> 4) & 3),
14633 GEN_INT ((mask >> 6) & 3)));
14634 DONE;
14635 })
14636
14637 (define_insn "sse2_pshufd_1<mask_name>"
14638 [(set (match_operand:V4SI 0 "register_operand" "=v")
14639 (vec_select:V4SI
14640 (match_operand:V4SI 1 "vector_operand" "vBm")
14641 (parallel [(match_operand 2 "const_0_to_3_operand")
14642 (match_operand 3 "const_0_to_3_operand")
14643 (match_operand 4 "const_0_to_3_operand")
14644 (match_operand 5 "const_0_to_3_operand")])))]
14645 "TARGET_SSE2 && <mask_avx512vl_condition>"
14646 {
14647 int mask = 0;
14648 mask |= INTVAL (operands[2]) << 0;
14649 mask |= INTVAL (operands[3]) << 2;
14650 mask |= INTVAL (operands[4]) << 4;
14651 mask |= INTVAL (operands[5]) << 6;
14652 operands[2] = GEN_INT (mask);
14653
14654 return "%vpshufd\t{%2, %1, %0<mask_operand6>|%0<mask_operand6>, %1, %2}";
14655 }
14656 [(set_attr "type" "sselog1")
14657 (set_attr "prefix_data16" "1")
14658 (set_attr "prefix" "<mask_prefix2>")
14659 (set_attr "length_immediate" "1")
14660 (set_attr "mode" "TI")])
14661
14662 (define_insn "<mask_codefor>avx512bw_pshuflwv32hi<mask_name>"
14663 [(set (match_operand:V32HI 0 "register_operand" "=v")
14664 (unspec:V32HI
14665 [(match_operand:V32HI 1 "nonimmediate_operand" "vm")
14666 (match_operand:SI 2 "const_0_to_255_operand" "n")]
14667 UNSPEC_PSHUFLW))]
14668 "TARGET_AVX512BW"
14669 "vpshuflw\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
14670 [(set_attr "type" "sselog")
14671 (set_attr "prefix" "evex")
14672 (set_attr "mode" "XI")])
14673
14674 (define_expand "avx512vl_pshuflwv3_mask"
14675 [(match_operand:V16HI 0 "register_operand")
14676 (match_operand:V16HI 1 "nonimmediate_operand")
14677 (match_operand:SI 2 "const_0_to_255_operand")
14678 (match_operand:V16HI 3 "register_operand")
14679 (match_operand:HI 4 "register_operand")]
14680 "TARGET_AVX512VL && TARGET_AVX512BW"
14681 {
14682 int mask = INTVAL (operands[2]);
14683 emit_insn (gen_avx2_pshuflw_1_mask (operands[0], operands[1],
14684 GEN_INT ((mask >> 0) & 3),
14685 GEN_INT ((mask >> 2) & 3),
14686 GEN_INT ((mask >> 4) & 3),
14687 GEN_INT ((mask >> 6) & 3),
14688 GEN_INT (((mask >> 0) & 3) + 8),
14689 GEN_INT (((mask >> 2) & 3) + 8),
14690 GEN_INT (((mask >> 4) & 3) + 8),
14691 GEN_INT (((mask >> 6) & 3) + 8),
14692 operands[3], operands[4]));
14693 DONE;
14694 })
14695
14696 (define_expand "avx2_pshuflwv3"
14697 [(match_operand:V16HI 0 "register_operand")
14698 (match_operand:V16HI 1 "nonimmediate_operand")
14699 (match_operand:SI 2 "const_0_to_255_operand")]
14700 "TARGET_AVX2"
14701 {
14702 int mask = INTVAL (operands[2]);
14703 emit_insn (gen_avx2_pshuflw_1 (operands[0], operands[1],
14704 GEN_INT ((mask >> 0) & 3),
14705 GEN_INT ((mask >> 2) & 3),
14706 GEN_INT ((mask >> 4) & 3),
14707 GEN_INT ((mask >> 6) & 3),
14708 GEN_INT (((mask >> 0) & 3) + 8),
14709 GEN_INT (((mask >> 2) & 3) + 8),
14710 GEN_INT (((mask >> 4) & 3) + 8),
14711 GEN_INT (((mask >> 6) & 3) + 8)));
14712 DONE;
14713 })
14714
14715 (define_insn "avx2_pshuflw_1<mask_name>"
14716 [(set (match_operand:V16HI 0 "register_operand" "=v")
14717 (vec_select:V16HI
14718 (match_operand:V16HI 1 "nonimmediate_operand" "vm")
14719 (parallel [(match_operand 2 "const_0_to_3_operand")
14720 (match_operand 3 "const_0_to_3_operand")
14721 (match_operand 4 "const_0_to_3_operand")
14722 (match_operand 5 "const_0_to_3_operand")
14723 (const_int 4)
14724 (const_int 5)
14725 (const_int 6)
14726 (const_int 7)
14727 (match_operand 6 "const_8_to_11_operand")
14728 (match_operand 7 "const_8_to_11_operand")
14729 (match_operand 8 "const_8_to_11_operand")
14730 (match_operand 9 "const_8_to_11_operand")
14731 (const_int 12)
14732 (const_int 13)
14733 (const_int 14)
14734 (const_int 15)])))]
14735 "TARGET_AVX2
14736 && <mask_avx512bw_condition> && <mask_avx512vl_condition>
14737 && INTVAL (operands[2]) + 8 == INTVAL (operands[6])
14738 && INTVAL (operands[3]) + 8 == INTVAL (operands[7])
14739 && INTVAL (operands[4]) + 8 == INTVAL (operands[8])
14740 && INTVAL (operands[5]) + 8 == INTVAL (operands[9])"
14741 {
14742 int mask = 0;
14743 mask |= INTVAL (operands[2]) << 0;
14744 mask |= INTVAL (operands[3]) << 2;
14745 mask |= INTVAL (operands[4]) << 4;
14746 mask |= INTVAL (operands[5]) << 6;
14747 operands[2] = GEN_INT (mask);
14748
14749 return "vpshuflw\t{%2, %1, %0<mask_operand10>|%0<mask_operand10>, %1, %2}";
14750 }
14751 [(set_attr "type" "sselog")
14752 (set_attr "prefix" "maybe_evex")
14753 (set_attr "length_immediate" "1")
14754 (set_attr "mode" "OI")])
14755
14756 (define_expand "avx512vl_pshuflw_mask"
14757 [(match_operand:V8HI 0 "register_operand")
14758 (match_operand:V8HI 1 "nonimmediate_operand")
14759 (match_operand:SI 2 "const_0_to_255_operand")
14760 (match_operand:V8HI 3 "register_operand")
14761 (match_operand:QI 4 "register_operand")]
14762 "TARGET_AVX512VL && TARGET_AVX512BW"
14763 {
14764 int mask = INTVAL (operands[2]);
14765 emit_insn (gen_sse2_pshuflw_1_mask (operands[0], operands[1],
14766 GEN_INT ((mask >> 0) & 3),
14767 GEN_INT ((mask >> 2) & 3),
14768 GEN_INT ((mask >> 4) & 3),
14769 GEN_INT ((mask >> 6) & 3),
14770 operands[3], operands[4]));
14771 DONE;
14772 })
14773
14774 (define_expand "sse2_pshuflw"
14775 [(match_operand:V8HI 0 "register_operand")
14776 (match_operand:V8HI 1 "vector_operand")
14777 (match_operand:SI 2 "const_int_operand")]
14778 "TARGET_SSE2"
14779 {
14780 int mask = INTVAL (operands[2]);
14781 emit_insn (gen_sse2_pshuflw_1 (operands[0], operands[1],
14782 GEN_INT ((mask >> 0) & 3),
14783 GEN_INT ((mask >> 2) & 3),
14784 GEN_INT ((mask >> 4) & 3),
14785 GEN_INT ((mask >> 6) & 3)));
14786 DONE;
14787 })
14788
14789 (define_insn "sse2_pshuflw_1<mask_name>"
14790 [(set (match_operand:V8HI 0 "register_operand" "=v")
14791 (vec_select:V8HI
14792 (match_operand:V8HI 1 "vector_operand" "vBm")
14793 (parallel [(match_operand 2 "const_0_to_3_operand")
14794 (match_operand 3 "const_0_to_3_operand")
14795 (match_operand 4 "const_0_to_3_operand")
14796 (match_operand 5 "const_0_to_3_operand")
14797 (const_int 4)
14798 (const_int 5)
14799 (const_int 6)
14800 (const_int 7)])))]
14801 "TARGET_SSE2 && <mask_avx512bw_condition> && <mask_avx512vl_condition>"
14802 {
14803 int mask = 0;
14804 mask |= INTVAL (operands[2]) << 0;
14805 mask |= INTVAL (operands[3]) << 2;
14806 mask |= INTVAL (operands[4]) << 4;
14807 mask |= INTVAL (operands[5]) << 6;
14808 operands[2] = GEN_INT (mask);
14809
14810 return "%vpshuflw\t{%2, %1, %0<mask_operand6>|%0<mask_operand6>, %1, %2}";
14811 }
14812 [(set_attr "type" "sselog")
14813 (set_attr "prefix_data16" "0")
14814 (set_attr "prefix_rep" "1")
14815 (set_attr "prefix" "maybe_vex")
14816 (set_attr "length_immediate" "1")
14817 (set_attr "mode" "TI")])
14818
14819 (define_expand "avx2_pshufhwv3"
14820 [(match_operand:V16HI 0 "register_operand")
14821 (match_operand:V16HI 1 "nonimmediate_operand")
14822 (match_operand:SI 2 "const_0_to_255_operand")]
14823 "TARGET_AVX2"
14824 {
14825 int mask = INTVAL (operands[2]);
14826 emit_insn (gen_avx2_pshufhw_1 (operands[0], operands[1],
14827 GEN_INT (((mask >> 0) & 3) + 4),
14828 GEN_INT (((mask >> 2) & 3) + 4),
14829 GEN_INT (((mask >> 4) & 3) + 4),
14830 GEN_INT (((mask >> 6) & 3) + 4),
14831 GEN_INT (((mask >> 0) & 3) + 12),
14832 GEN_INT (((mask >> 2) & 3) + 12),
14833 GEN_INT (((mask >> 4) & 3) + 12),
14834 GEN_INT (((mask >> 6) & 3) + 12)));
14835 DONE;
14836 })
14837
14838 (define_insn "<mask_codefor>avx512bw_pshufhwv32hi<mask_name>"
14839 [(set (match_operand:V32HI 0 "register_operand" "=v")
14840 (unspec:V32HI
14841 [(match_operand:V32HI 1 "nonimmediate_operand" "vm")
14842 (match_operand:SI 2 "const_0_to_255_operand" "n")]
14843 UNSPEC_PSHUFHW))]
14844 "TARGET_AVX512BW"
14845 "vpshufhw\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
14846 [(set_attr "type" "sselog")
14847 (set_attr "prefix" "evex")
14848 (set_attr "mode" "XI")])
14849
14850 (define_expand "avx512vl_pshufhwv3_mask"
14851 [(match_operand:V16HI 0 "register_operand")
14852 (match_operand:V16HI 1 "nonimmediate_operand")
14853 (match_operand:SI 2 "const_0_to_255_operand")
14854 (match_operand:V16HI 3 "register_operand")
14855 (match_operand:HI 4 "register_operand")]
14856 "TARGET_AVX512VL && TARGET_AVX512BW"
14857 {
14858 int mask = INTVAL (operands[2]);
14859 emit_insn (gen_avx2_pshufhw_1_mask (operands[0], operands[1],
14860 GEN_INT (((mask >> 0) & 3) + 4),
14861 GEN_INT (((mask >> 2) & 3) + 4),
14862 GEN_INT (((mask >> 4) & 3) + 4),
14863 GEN_INT (((mask >> 6) & 3) + 4),
14864 GEN_INT (((mask >> 0) & 3) + 12),
14865 GEN_INT (((mask >> 2) & 3) + 12),
14866 GEN_INT (((mask >> 4) & 3) + 12),
14867 GEN_INT (((mask >> 6) & 3) + 12),
14868 operands[3], operands[4]));
14869 DONE;
14870 })
14871
14872 (define_insn "avx2_pshufhw_1<mask_name>"
14873 [(set (match_operand:V16HI 0 "register_operand" "=v")
14874 (vec_select:V16HI
14875 (match_operand:V16HI 1 "nonimmediate_operand" "vm")
14876 (parallel [(const_int 0)
14877 (const_int 1)
14878 (const_int 2)
14879 (const_int 3)
14880 (match_operand 2 "const_4_to_7_operand")
14881 (match_operand 3 "const_4_to_7_operand")
14882 (match_operand 4 "const_4_to_7_operand")
14883 (match_operand 5 "const_4_to_7_operand")
14884 (const_int 8)
14885 (const_int 9)
14886 (const_int 10)
14887 (const_int 11)
14888 (match_operand 6 "const_12_to_15_operand")
14889 (match_operand 7 "const_12_to_15_operand")
14890 (match_operand 8 "const_12_to_15_operand")
14891 (match_operand 9 "const_12_to_15_operand")])))]
14892 "TARGET_AVX2
14893 && <mask_avx512bw_condition> && <mask_avx512vl_condition>
14894 && INTVAL (operands[2]) + 8 == INTVAL (operands[6])
14895 && INTVAL (operands[3]) + 8 == INTVAL (operands[7])
14896 && INTVAL (operands[4]) + 8 == INTVAL (operands[8])
14897 && INTVAL (operands[5]) + 8 == INTVAL (operands[9])"
14898 {
14899 int mask = 0;
14900 mask |= (INTVAL (operands[2]) - 4) << 0;
14901 mask |= (INTVAL (operands[3]) - 4) << 2;
14902 mask |= (INTVAL (operands[4]) - 4) << 4;
14903 mask |= (INTVAL (operands[5]) - 4) << 6;
14904 operands[2] = GEN_INT (mask);
14905
14906 return "vpshufhw\t{%2, %1, %0<mask_operand10>|%0<mask_operand10>, %1, %2}";
14907 }
14908 [(set_attr "type" "sselog")
14909 (set_attr "prefix" "maybe_evex")
14910 (set_attr "length_immediate" "1")
14911 (set_attr "mode" "OI")])
14912
14913 (define_expand "avx512vl_pshufhw_mask"
14914 [(match_operand:V8HI 0 "register_operand")
14915 (match_operand:V8HI 1 "nonimmediate_operand")
14916 (match_operand:SI 2 "const_0_to_255_operand")
14917 (match_operand:V8HI 3 "register_operand")
14918 (match_operand:QI 4 "register_operand")]
14919 "TARGET_AVX512VL && TARGET_AVX512BW"
14920 {
14921 int mask = INTVAL (operands[2]);
14922 emit_insn (gen_sse2_pshufhw_1_mask (operands[0], operands[1],
14923 GEN_INT (((mask >> 0) & 3) + 4),
14924 GEN_INT (((mask >> 2) & 3) + 4),
14925 GEN_INT (((mask >> 4) & 3) + 4),
14926 GEN_INT (((mask >> 6) & 3) + 4),
14927 operands[3], operands[4]));
14928 DONE;
14929 })
14930
14931 (define_expand "sse2_pshufhw"
14932 [(match_operand:V8HI 0 "register_operand")
14933 (match_operand:V8HI 1 "vector_operand")
14934 (match_operand:SI 2 "const_int_operand")]
14935 "TARGET_SSE2"
14936 {
14937 int mask = INTVAL (operands[2]);
14938 emit_insn (gen_sse2_pshufhw_1 (operands[0], operands[1],
14939 GEN_INT (((mask >> 0) & 3) + 4),
14940 GEN_INT (((mask >> 2) & 3) + 4),
14941 GEN_INT (((mask >> 4) & 3) + 4),
14942 GEN_INT (((mask >> 6) & 3) + 4)));
14943 DONE;
14944 })
14945
14946 (define_insn "sse2_pshufhw_1<mask_name>"
14947 [(set (match_operand:V8HI 0 "register_operand" "=v")
14948 (vec_select:V8HI
14949 (match_operand:V8HI 1 "vector_operand" "vBm")
14950 (parallel [(const_int 0)
14951 (const_int 1)
14952 (const_int 2)
14953 (const_int 3)
14954 (match_operand 2 "const_4_to_7_operand")
14955 (match_operand 3 "const_4_to_7_operand")
14956 (match_operand 4 "const_4_to_7_operand")
14957 (match_operand 5 "const_4_to_7_operand")])))]
14958 "TARGET_SSE2 && <mask_avx512bw_condition> && <mask_avx512vl_condition>"
14959 {
14960 int mask = 0;
14961 mask |= (INTVAL (operands[2]) - 4) << 0;
14962 mask |= (INTVAL (operands[3]) - 4) << 2;
14963 mask |= (INTVAL (operands[4]) - 4) << 4;
14964 mask |= (INTVAL (operands[5]) - 4) << 6;
14965 operands[2] = GEN_INT (mask);
14966
14967 return "%vpshufhw\t{%2, %1, %0<mask_operand6>|%0<mask_operand6>, %1, %2}";
14968 }
14969 [(set_attr "type" "sselog")
14970 (set_attr "prefix_rep" "1")
14971 (set_attr "prefix_data16" "0")
14972 (set_attr "prefix" "maybe_vex")
14973 (set_attr "length_immediate" "1")
14974 (set_attr "mode" "TI")])
14975
14976 (define_expand "sse2_loadd"
14977 [(set (match_operand:V4SI 0 "register_operand")
14978 (vec_merge:V4SI
14979 (vec_duplicate:V4SI
14980 (match_operand:SI 1 "nonimmediate_operand"))
14981 (match_dup 2)
14982 (const_int 1)))]
14983 "TARGET_SSE"
14984 "operands[2] = CONST0_RTX (V4SImode);")
14985
14986 (define_insn "sse2_loadld"
14987 [(set (match_operand:V4SI 0 "register_operand" "=v,v,x,x,v")
14988 (vec_merge:V4SI
14989 (vec_duplicate:V4SI
14990 (match_operand:SI 2 "nonimmediate_operand" "m ,r ,m,x,v"))
14991 (match_operand:V4SI 1 "reg_or_0_operand" "C ,C ,C,0,v")
14992 (const_int 1)))]
14993 "TARGET_SSE"
14994 "@
14995 %vmovd\t{%2, %0|%0, %2}
14996 %vmovd\t{%2, %0|%0, %2}
14997 movss\t{%2, %0|%0, %2}
14998 movss\t{%2, %0|%0, %2}
14999 vmovss\t{%2, %1, %0|%0, %1, %2}"
15000 [(set_attr "isa" "sse2,sse2,noavx,noavx,avx")
15001 (set_attr "type" "ssemov")
15002 (set_attr "prefix" "maybe_vex,maybe_vex,orig,orig,maybe_evex")
15003 (set_attr "mode" "TI,TI,V4SF,SF,SF")
15004 (set (attr "preferred_for_speed")
15005 (cond [(eq_attr "alternative" "1")
15006 (symbol_ref "TARGET_INTER_UNIT_MOVES_TO_VEC")
15007 ]
15008 (symbol_ref "true")))])
15009
15010 ;; QI and HI modes handled by pextr patterns.
15011 (define_mode_iterator PEXTR_MODE12
15012 [(V16QI "TARGET_SSE4_1") V8HI])
15013
15014 (define_insn "*vec_extract<mode>"
15015 [(set (match_operand:<ssescalarmode> 0 "register_sse4nonimm_operand" "=r,m,r,m")
15016 (vec_select:<ssescalarmode>
15017 (match_operand:PEXTR_MODE12 1 "register_operand" "x,x,v,v")
15018 (parallel
15019 [(match_operand:SI 2 "const_0_to_<ssescalarnummask>_operand")])))]
15020 "TARGET_SSE2"
15021 "@
15022 %vpextr<ssemodesuffix>\t{%2, %1, %k0|%k0, %1, %2}
15023 %vpextr<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}
15024 vpextr<ssemodesuffix>\t{%2, %1, %k0|%k0, %1, %2}
15025 vpextr<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
15026 [(set_attr "isa" "*,sse4,avx512bw,avx512bw")
15027 (set_attr "type" "sselog1")
15028 (set_attr "prefix_data16" "1")
15029 (set (attr "prefix_extra")
15030 (if_then_else
15031 (and (eq_attr "alternative" "0,2")
15032 (eq (const_string "<MODE>mode") (const_string "V8HImode")))
15033 (const_string "*")
15034 (const_string "1")))
15035 (set_attr "length_immediate" "1")
15036 (set_attr "prefix" "maybe_vex,maybe_vex,evex,evex")
15037 (set_attr "mode" "TI")])
15038
15039 (define_insn "*vec_extract<PEXTR_MODE12:mode>_zext"
15040 [(set (match_operand:SWI48 0 "register_operand" "=r,r")
15041 (zero_extend:SWI48
15042 (vec_select:<PEXTR_MODE12:ssescalarmode>
15043 (match_operand:PEXTR_MODE12 1 "register_operand" "x,v")
15044 (parallel
15045 [(match_operand:SI 2
15046 "const_0_to_<PEXTR_MODE12:ssescalarnummask>_operand")]))))]
15047 "TARGET_SSE2"
15048 "@
15049 %vpextr<PEXTR_MODE12:ssemodesuffix>\t{%2, %1, %k0|%k0, %1, %2}
15050 vpextr<PEXTR_MODE12:ssemodesuffix>\t{%2, %1, %k0|%k0, %1, %2}"
15051 [(set_attr "isa" "*,avx512bw")
15052 (set_attr "type" "sselog1")
15053 (set_attr "prefix_data16" "1")
15054 (set (attr "prefix_extra")
15055 (if_then_else
15056 (eq (const_string "<PEXTR_MODE12:MODE>mode") (const_string "V8HImode"))
15057 (const_string "*")
15058 (const_string "1")))
15059 (set_attr "length_immediate" "1")
15060 (set_attr "prefix" "maybe_vex")
15061 (set_attr "mode" "TI")])
15062
15063 (define_insn "*vec_extractv16qi_zext"
15064 [(set (match_operand:HI 0 "register_operand" "=r,r")
15065 (zero_extend:HI
15066 (vec_select:QI
15067 (match_operand:V16QI 1 "register_operand" "x,v")
15068 (parallel
15069 [(match_operand:SI 2 "const_0_to_15_operand")]))))]
15070 "TARGET_SSE4_1"
15071 "@
15072 %vpextrb\t{%2, %1, %k0|%k0, %1, %2}
15073 vpextrb\t{%2, %1, %k0|%k0, %1, %2}"
15074 [(set_attr "isa" "*,avx512bw")
15075 (set_attr "type" "sselog1")
15076 (set_attr "prefix_data16" "1")
15077 (set_attr "prefix_extra" "1")
15078 (set_attr "length_immediate" "1")
15079 (set_attr "prefix" "maybe_vex")
15080 (set_attr "mode" "TI")])
15081
15082 (define_insn "*vec_extract<mode>_mem"
15083 [(set (match_operand:<ssescalarmode> 0 "register_operand" "=r")
15084 (vec_select:<ssescalarmode>
15085 (match_operand:VI12_128 1 "memory_operand" "o")
15086 (parallel
15087 [(match_operand 2 "const_0_to_<ssescalarnummask>_operand")])))]
15088 "TARGET_SSE"
15089 "#")
15090
15091 (define_insn "*vec_extract<ssevecmodelower>_0"
15092 [(set (match_operand:SWI48 0 "nonimmediate_operand" "=r,r,v ,m")
15093 (vec_select:SWI48
15094 (match_operand:<ssevecmode> 1 "nonimmediate_operand" "m ,v,vm,v")
15095 (parallel [(const_int 0)])))]
15096 "TARGET_SSE && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
15097 "#"
15098 [(set_attr "isa" "*,sse2,*,*")
15099 (set (attr "preferred_for_speed")
15100 (cond [(eq_attr "alternative" "1")
15101 (symbol_ref "TARGET_INTER_UNIT_MOVES_FROM_VEC")
15102 ]
15103 (symbol_ref "true")))])
15104
15105 (define_insn "*vec_extractv2di_0_sse"
15106 [(set (match_operand:DI 0 "nonimmediate_operand" "=r,x ,m")
15107 (vec_select:DI
15108 (match_operand:V2DI 1 "nonimmediate_operand" " x,xm,x")
15109 (parallel [(const_int 0)])))]
15110 "TARGET_SSE && !TARGET_64BIT
15111 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
15112 "#"
15113 [(set_attr "isa" "sse4,*,*")
15114 (set (attr "preferred_for_speed")
15115 (cond [(eq_attr "alternative" "0")
15116 (symbol_ref "TARGET_INTER_UNIT_MOVES_FROM_VEC")
15117 ]
15118 (symbol_ref "true")))])
15119
15120 (define_split
15121 [(set (match_operand:DI 0 "general_reg_operand")
15122 (vec_select:DI
15123 (match_operand:V2DI 1 "register_operand")
15124 (parallel [(const_int 0)])))]
15125 "TARGET_SSE4_1 && !TARGET_64BIT
15126 && reload_completed"
15127 [(set (match_dup 2) (match_dup 4))
15128 (set (match_dup 3)
15129 (vec_select:SI
15130 (match_dup 5)
15131 (parallel [(const_int 1)])))]
15132 {
15133 operands[4] = gen_lowpart (SImode, operands[1]);
15134 operands[5] = gen_lowpart (V4SImode, operands[1]);
15135 split_double_mode (DImode, &operands[0], 1, &operands[2], &operands[3]);
15136 })
15137
15138 (define_split
15139 [(set (match_operand:SWI48x 0 "nonimmediate_operand")
15140 (vec_select:SWI48x
15141 (match_operand:<ssevecmode> 1 "register_operand")
15142 (parallel [(const_int 0)])))]
15143 "TARGET_SSE && reload_completed"
15144 [(set (match_dup 0) (match_dup 1))]
15145 "operands[1] = gen_lowpart (<MODE>mode, operands[1]);")
15146
15147 (define_insn "*vec_extractv4si_0_zext_sse4"
15148 [(set (match_operand:DI 0 "register_operand" "=r,x,v")
15149 (zero_extend:DI
15150 (vec_select:SI
15151 (match_operand:V4SI 1 "register_operand" "v,x,v")
15152 (parallel [(const_int 0)]))))]
15153 "TARGET_SSE4_1"
15154 "#"
15155 [(set_attr "isa" "x64,*,avx512f")
15156 (set (attr "preferred_for_speed")
15157 (cond [(eq_attr "alternative" "0")
15158 (symbol_ref "TARGET_INTER_UNIT_MOVES_FROM_VEC")
15159 ]
15160 (symbol_ref "true")))])
15161
15162 (define_insn "*vec_extractv4si_0_zext"
15163 [(set (match_operand:DI 0 "register_operand" "=r")
15164 (zero_extend:DI
15165 (vec_select:SI
15166 (match_operand:V4SI 1 "register_operand" "x")
15167 (parallel [(const_int 0)]))))]
15168 "TARGET_64BIT && TARGET_SSE2 && TARGET_INTER_UNIT_MOVES_FROM_VEC"
15169 "#")
15170
15171 (define_split
15172 [(set (match_operand:DI 0 "register_operand")
15173 (zero_extend:DI
15174 (vec_select:SI
15175 (match_operand:V4SI 1 "register_operand")
15176 (parallel [(const_int 0)]))))]
15177 "TARGET_SSE2 && reload_completed"
15178 [(set (match_dup 0) (zero_extend:DI (match_dup 1)))]
15179 "operands[1] = gen_lowpart (SImode, operands[1]);")
15180
15181 (define_insn "*vec_extractv4si"
15182 [(set (match_operand:SI 0 "nonimmediate_operand" "=rm,rm,Yr,*x,x,Yv")
15183 (vec_select:SI
15184 (match_operand:V4SI 1 "register_operand" "x,v,0,0,x,v")
15185 (parallel [(match_operand:SI 2 "const_0_to_3_operand")])))]
15186 "TARGET_SSE4_1"
15187 {
15188 switch (which_alternative)
15189 {
15190 case 0:
15191 case 1:
15192 return "%vpextrd\t{%2, %1, %0|%0, %1, %2}";
15193
15194 case 2:
15195 case 3:
15196 operands[2] = GEN_INT (INTVAL (operands[2]) * 4);
15197 return "psrldq\t{%2, %0|%0, %2}";
15198
15199 case 4:
15200 case 5:
15201 operands[2] = GEN_INT (INTVAL (operands[2]) * 4);
15202 return "vpsrldq\t{%2, %1, %0|%0, %1, %2}";
15203
15204 default:
15205 gcc_unreachable ();
15206 }
15207 }
15208 [(set_attr "isa" "*,avx512dq,noavx,noavx,avx,avx512bw")
15209 (set_attr "type" "sselog1,sselog1,sseishft1,sseishft1,sseishft1,sseishft1")
15210 (set (attr "prefix_extra")
15211 (if_then_else (eq_attr "alternative" "0,1")
15212 (const_string "1")
15213 (const_string "*")))
15214 (set_attr "length_immediate" "1")
15215 (set_attr "prefix" "maybe_vex,evex,orig,orig,vex,evex")
15216 (set_attr "mode" "TI")])
15217
15218 (define_insn "*vec_extractv4si_zext"
15219 [(set (match_operand:DI 0 "register_operand" "=r,r")
15220 (zero_extend:DI
15221 (vec_select:SI
15222 (match_operand:V4SI 1 "register_operand" "x,v")
15223 (parallel [(match_operand:SI 2 "const_0_to_3_operand")]))))]
15224 "TARGET_64BIT && TARGET_SSE4_1"
15225 "%vpextrd\t{%2, %1, %k0|%k0, %1, %2}"
15226 [(set_attr "isa" "*,avx512dq")
15227 (set_attr "type" "sselog1")
15228 (set_attr "prefix_extra" "1")
15229 (set_attr "length_immediate" "1")
15230 (set_attr "prefix" "maybe_vex")
15231 (set_attr "mode" "TI")])
15232
15233 (define_insn "*vec_extractv4si_mem"
15234 [(set (match_operand:SI 0 "register_operand" "=x,r")
15235 (vec_select:SI
15236 (match_operand:V4SI 1 "memory_operand" "o,o")
15237 (parallel [(match_operand 2 "const_0_to_3_operand")])))]
15238 "TARGET_SSE"
15239 "#")
15240
15241 (define_insn_and_split "*vec_extractv4si_zext_mem"
15242 [(set (match_operand:DI 0 "register_operand" "=x,r")
15243 (zero_extend:DI
15244 (vec_select:SI
15245 (match_operand:V4SI 1 "memory_operand" "o,o")
15246 (parallel [(match_operand:SI 2 "const_0_to_3_operand")]))))]
15247 "TARGET_64BIT && TARGET_SSE"
15248 "#"
15249 "&& reload_completed"
15250 [(set (match_dup 0) (zero_extend:DI (match_dup 1)))]
15251 {
15252 operands[1] = adjust_address (operands[1], SImode, INTVAL (operands[2]) * 4);
15253 })
15254
15255 (define_insn "*vec_extractv2di_1"
15256 [(set (match_operand:DI 0 "nonimmediate_operand" "=rm,rm,m,x,x,Yv,x,v,r")
15257 (vec_select:DI
15258 (match_operand:V2DI 1 "nonimmediate_operand" "x ,v ,v,0,x, v,x,o,o")
15259 (parallel [(const_int 1)])))]
15260 "TARGET_SSE && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
15261 "@
15262 %vpextrq\t{$1, %1, %0|%0, %1, 1}
15263 vpextrq\t{$1, %1, %0|%0, %1, 1}
15264 %vmovhps\t{%1, %0|%0, %1}
15265 psrldq\t{$8, %0|%0, 8}
15266 vpsrldq\t{$8, %1, %0|%0, %1, 8}
15267 vpsrldq\t{$8, %1, %0|%0, %1, 8}
15268 movhlps\t{%1, %0|%0, %1}
15269 #
15270 #"
15271 [(set (attr "isa")
15272 (cond [(eq_attr "alternative" "0")
15273 (const_string "x64_sse4")
15274 (eq_attr "alternative" "1")
15275 (const_string "x64_avx512dq")
15276 (eq_attr "alternative" "3")
15277 (const_string "sse2_noavx")
15278 (eq_attr "alternative" "4")
15279 (const_string "avx")
15280 (eq_attr "alternative" "5")
15281 (const_string "avx512bw")
15282 (eq_attr "alternative" "6")
15283 (const_string "noavx")
15284 (eq_attr "alternative" "8")
15285 (const_string "x64")
15286 ]
15287 (const_string "*")))
15288 (set (attr "type")
15289 (cond [(eq_attr "alternative" "2,6,7")
15290 (const_string "ssemov")
15291 (eq_attr "alternative" "3,4,5")
15292 (const_string "sseishft1")
15293 (eq_attr "alternative" "8")
15294 (const_string "imov")
15295 ]
15296 (const_string "sselog1")))
15297 (set (attr "length_immediate")
15298 (if_then_else (eq_attr "alternative" "0,1,3,4,5")
15299 (const_string "1")
15300 (const_string "*")))
15301 (set (attr "prefix_rex")
15302 (if_then_else (eq_attr "alternative" "0,1")
15303 (const_string "1")
15304 (const_string "*")))
15305 (set (attr "prefix_extra")
15306 (if_then_else (eq_attr "alternative" "0,1")
15307 (const_string "1")
15308 (const_string "*")))
15309 (set_attr "prefix" "maybe_vex,evex,maybe_vex,orig,vex,evex,orig,*,*")
15310 (set_attr "mode" "TI,TI,V2SF,TI,TI,TI,V4SF,DI,DI")])
15311
15312 (define_split
15313 [(set (match_operand:<ssescalarmode> 0 "register_operand")
15314 (vec_select:<ssescalarmode>
15315 (match_operand:VI_128 1 "memory_operand")
15316 (parallel
15317 [(match_operand 2 "const_0_to_<ssescalarnummask>_operand")])))]
15318 "TARGET_SSE && reload_completed"
15319 [(set (match_dup 0) (match_dup 1))]
15320 {
15321 int offs = INTVAL (operands[2]) * GET_MODE_SIZE (<ssescalarmode>mode);
15322
15323 operands[1] = adjust_address (operands[1], <ssescalarmode>mode, offs);
15324 })
15325
15326 (define_insn "*vec_extractv2ti"
15327 [(set (match_operand:TI 0 "nonimmediate_operand" "=xm,vm")
15328 (vec_select:TI
15329 (match_operand:V2TI 1 "register_operand" "x,v")
15330 (parallel
15331 [(match_operand:SI 2 "const_0_to_1_operand")])))]
15332 "TARGET_AVX"
15333 "@
15334 vextract%~128\t{%2, %1, %0|%0, %1, %2}
15335 vextracti32x4\t{%2, %g1, %0|%0, %g1, %2}"
15336 [(set_attr "type" "sselog")
15337 (set_attr "prefix_extra" "1")
15338 (set_attr "length_immediate" "1")
15339 (set_attr "prefix" "vex,evex")
15340 (set_attr "mode" "OI")])
15341
15342 (define_insn "*vec_extractv4ti"
15343 [(set (match_operand:TI 0 "nonimmediate_operand" "=vm")
15344 (vec_select:TI
15345 (match_operand:V4TI 1 "register_operand" "v")
15346 (parallel
15347 [(match_operand:SI 2 "const_0_to_3_operand")])))]
15348 "TARGET_AVX512F"
15349 "vextracti32x4\t{%2, %1, %0|%0, %1, %2}"
15350 [(set_attr "type" "sselog")
15351 (set_attr "prefix_extra" "1")
15352 (set_attr "length_immediate" "1")
15353 (set_attr "prefix" "evex")
15354 (set_attr "mode" "XI")])
15355
15356 (define_mode_iterator VEXTRACTI128_MODE
15357 [(V4TI "TARGET_AVX512F") V2TI])
15358
15359 (define_split
15360 [(set (match_operand:TI 0 "nonimmediate_operand")
15361 (vec_select:TI
15362 (match_operand:VEXTRACTI128_MODE 1 "register_operand")
15363 (parallel [(const_int 0)])))]
15364 "TARGET_AVX
15365 && reload_completed
15366 && (TARGET_AVX512VL || !EXT_REX_SSE_REG_P (operands[1]))"
15367 [(set (match_dup 0) (match_dup 1))]
15368 "operands[1] = gen_lowpart (TImode, operands[1]);")
15369
15370 ;; Turn SImode or DImode extraction from arbitrary SSE/AVX/AVX512F
15371 ;; vector modes into vec_extract*.
15372 (define_split
15373 [(set (match_operand:SWI48x 0 "nonimmediate_operand")
15374 (subreg:SWI48x (match_operand 1 "register_operand") 0))]
15375 "can_create_pseudo_p ()
15376 && REG_P (operands[1])
15377 && VECTOR_MODE_P (GET_MODE (operands[1]))
15378 && ((TARGET_SSE && GET_MODE_SIZE (GET_MODE (operands[1])) == 16)
15379 || (TARGET_AVX && GET_MODE_SIZE (GET_MODE (operands[1])) == 32)
15380 || (TARGET_AVX512F && GET_MODE_SIZE (GET_MODE (operands[1])) == 64))
15381 && (<MODE>mode == SImode || TARGET_64BIT || MEM_P (operands[0]))"
15382 [(set (match_dup 0) (vec_select:SWI48x (match_dup 1)
15383 (parallel [(const_int 0)])))]
15384 {
15385 rtx tmp;
15386
15387 switch (GET_MODE_SIZE (GET_MODE (operands[1])))
15388 {
15389 case 64:
15390 if (<MODE>mode == SImode)
15391 {
15392 tmp = gen_reg_rtx (V8SImode);
15393 emit_insn (gen_vec_extract_lo_v16si (tmp,
15394 gen_lowpart (V16SImode,
15395 operands[1])));
15396 }
15397 else
15398 {
15399 tmp = gen_reg_rtx (V4DImode);
15400 emit_insn (gen_vec_extract_lo_v8di (tmp,
15401 gen_lowpart (V8DImode,
15402 operands[1])));
15403 }
15404 operands[1] = tmp;
15405 /* FALLTHRU */
15406 case 32:
15407 tmp = gen_reg_rtx (<ssevecmode>mode);
15408 if (<MODE>mode == SImode)
15409 emit_insn (gen_vec_extract_lo_v8si (tmp, gen_lowpart (V8SImode,
15410 operands[1])));
15411 else
15412 emit_insn (gen_vec_extract_lo_v4di (tmp, gen_lowpart (V4DImode,
15413 operands[1])));
15414 operands[1] = tmp;
15415 break;
15416 case 16:
15417 operands[1] = gen_lowpart (<ssevecmode>mode, operands[1]);
15418 break;
15419 }
15420 })
15421
15422 (define_insn "*vec_concatv2si_sse4_1"
15423 [(set (match_operand:V2SI 0 "register_operand"
15424 "=Yr,*x, x, v,Yr,*x, v, v, *y,*y")
15425 (vec_concat:V2SI
15426 (match_operand:SI 1 "nonimmediate_operand"
15427 " 0, 0, x,Yv, 0, 0,Yv,rm, 0,rm")
15428 (match_operand:SI 2 "nonimm_or_0_operand"
15429 " rm,rm,rm,rm,Yr,*x,Yv, C,*ym, C")))]
15430 "TARGET_SSE4_1 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
15431 "@
15432 pinsrd\t{$1, %2, %0|%0, %2, 1}
15433 pinsrd\t{$1, %2, %0|%0, %2, 1}
15434 vpinsrd\t{$1, %2, %1, %0|%0, %1, %2, 1}
15435 vpinsrd\t{$1, %2, %1, %0|%0, %1, %2, 1}
15436 punpckldq\t{%2, %0|%0, %2}
15437 punpckldq\t{%2, %0|%0, %2}
15438 vpunpckldq\t{%2, %1, %0|%0, %1, %2}
15439 %vmovd\t{%1, %0|%0, %1}
15440 punpckldq\t{%2, %0|%0, %2}
15441 movd\t{%1, %0|%0, %1}"
15442 [(set_attr "isa" "noavx,noavx,avx,avx512dq,noavx,noavx,avx,*,*,*")
15443 (set (attr "mmx_isa")
15444 (if_then_else (eq_attr "alternative" "8,9")
15445 (const_string "native")
15446 (const_string "*")))
15447 (set (attr "type")
15448 (cond [(eq_attr "alternative" "7")
15449 (const_string "ssemov")
15450 (eq_attr "alternative" "8")
15451 (const_string "mmxcvt")
15452 (eq_attr "alternative" "9")
15453 (const_string "mmxmov")
15454 ]
15455 (const_string "sselog")))
15456 (set (attr "prefix_extra")
15457 (if_then_else (eq_attr "alternative" "0,1,2,3")
15458 (const_string "1")
15459 (const_string "*")))
15460 (set (attr "length_immediate")
15461 (if_then_else (eq_attr "alternative" "0,1,2,3")
15462 (const_string "1")
15463 (const_string "*")))
15464 (set_attr "prefix" "orig,orig,vex,evex,orig,orig,maybe_evex,maybe_vex,orig,orig")
15465 (set_attr "mode" "TI,TI,TI,TI,TI,TI,TI,TI,DI,DI")])
15466
15467 ;; ??? In theory we can match memory for the MMX alternative, but allowing
15468 ;; nonimmediate_operand for operand 2 and *not* allowing memory for the SSE
15469 ;; alternatives pretty much forces the MMX alternative to be chosen.
15470 (define_insn "*vec_concatv2si"
15471 [(set (match_operand:V2SI 0 "register_operand" "=x,x ,x,x,*y,*y")
15472 (vec_concat:V2SI
15473 (match_operand:SI 1 "nonimmediate_operand" " 0,rm,0,m, 0,rm")
15474 (match_operand:SI 2 "reg_or_0_operand" " x,C ,x,C,*y,C")))]
15475 "TARGET_SSE && !TARGET_SSE4_1"
15476 "@
15477 punpckldq\t{%2, %0|%0, %2}
15478 movd\t{%1, %0|%0, %1}
15479 unpcklps\t{%2, %0|%0, %2}
15480 movss\t{%1, %0|%0, %1}
15481 punpckldq\t{%2, %0|%0, %2}
15482 movd\t{%1, %0|%0, %1}"
15483 [(set_attr "isa" "sse2,sse2,*,*,*,*")
15484 (set_attr "mmx_isa" "*,*,*,*,native,native")
15485 (set_attr "type" "sselog,ssemov,sselog,ssemov,mmxcvt,mmxmov")
15486 (set_attr "mode" "TI,TI,V4SF,SF,DI,DI")])
15487
15488 (define_insn "*vec_concatv4si"
15489 [(set (match_operand:V4SI 0 "register_operand" "=x,v,x,x,v")
15490 (vec_concat:V4SI
15491 (match_operand:V2SI 1 "register_operand" " 0,v,0,0,v")
15492 (match_operand:V2SI 2 "nonimmediate_operand" " x,v,x,m,m")))]
15493 "TARGET_SSE"
15494 "@
15495 punpcklqdq\t{%2, %0|%0, %2}
15496 vpunpcklqdq\t{%2, %1, %0|%0, %1, %2}
15497 movlhps\t{%2, %0|%0, %2}
15498 movhps\t{%2, %0|%0, %q2}
15499 vmovhps\t{%2, %1, %0|%0, %1, %q2}"
15500 [(set_attr "isa" "sse2_noavx,avx,noavx,noavx,avx")
15501 (set_attr "type" "sselog,sselog,ssemov,ssemov,ssemov")
15502 (set_attr "prefix" "orig,maybe_evex,orig,orig,maybe_evex")
15503 (set_attr "mode" "TI,TI,V4SF,V2SF,V2SF")])
15504
15505 (define_insn "*vec_concatv4si_0"
15506 [(set (match_operand:V4SI 0 "register_operand" "=v,x")
15507 (vec_concat:V4SI
15508 (match_operand:V2SI 1 "nonimmediate_operand" "vm,?!*y")
15509 (match_operand:V2SI 2 "const0_operand" " C,C")))]
15510 "TARGET_SSE2"
15511 "@
15512 %vmovq\t{%1, %0|%0, %1}
15513 movq2dq\t{%1, %0|%0, %1}"
15514 [(set_attr "mmx_isa" "*,native")
15515 (set_attr "type" "ssemov")
15516 (set_attr "prefix" "maybe_vex,orig")
15517 (set_attr "mode" "TI")])
15518
15519 (define_insn "vec_concatv2di"
15520 [(set (match_operand:V2DI 0 "register_operand"
15521 "=Yr,*x,x ,v ,x,v ,x,x,v")
15522 (vec_concat:V2DI
15523 (match_operand:DI 1 "register_operand"
15524 " 0, 0,x ,Yv,0,Yv,0,0,v")
15525 (match_operand:DI 2 "nonimmediate_operand"
15526 " rm,rm,rm,rm,x,Yv,x,m,m")))]
15527 "TARGET_SSE"
15528 "@
15529 pinsrq\t{$1, %2, %0|%0, %2, 1}
15530 pinsrq\t{$1, %2, %0|%0, %2, 1}
15531 vpinsrq\t{$1, %2, %1, %0|%0, %1, %2, 1}
15532 vpinsrq\t{$1, %2, %1, %0|%0, %1, %2, 1}
15533 punpcklqdq\t{%2, %0|%0, %2}
15534 vpunpcklqdq\t{%2, %1, %0|%0, %1, %2}
15535 movlhps\t{%2, %0|%0, %2}
15536 movhps\t{%2, %0|%0, %2}
15537 vmovhps\t{%2, %1, %0|%0, %1, %2}"
15538 [(set (attr "isa")
15539 (cond [(eq_attr "alternative" "0,1")
15540 (const_string "x64_sse4_noavx")
15541 (eq_attr "alternative" "2")
15542 (const_string "x64_avx")
15543 (eq_attr "alternative" "3")
15544 (const_string "x64_avx512dq")
15545 (eq_attr "alternative" "4")
15546 (const_string "sse2_noavx")
15547 (eq_attr "alternative" "5,8")
15548 (const_string "avx")
15549 ]
15550 (const_string "noavx")))
15551 (set (attr "type")
15552 (if_then_else
15553 (eq_attr "alternative" "0,1,2,3,4,5")
15554 (const_string "sselog")
15555 (const_string "ssemov")))
15556 (set (attr "prefix_rex")
15557 (if_then_else (eq_attr "alternative" "0,1,2,3")
15558 (const_string "1")
15559 (const_string "*")))
15560 (set (attr "prefix_extra")
15561 (if_then_else (eq_attr "alternative" "0,1,2,3")
15562 (const_string "1")
15563 (const_string "*")))
15564 (set (attr "length_immediate")
15565 (if_then_else (eq_attr "alternative" "0,1,2,3")
15566 (const_string "1")
15567 (const_string "*")))
15568 (set (attr "prefix")
15569 (cond [(eq_attr "alternative" "2")
15570 (const_string "vex")
15571 (eq_attr "alternative" "3")
15572 (const_string "evex")
15573 (eq_attr "alternative" "5,8")
15574 (const_string "maybe_evex")
15575 ]
15576 (const_string "orig")))
15577 (set_attr "mode" "TI,TI,TI,TI,TI,TI,V4SF,V2SF,V2SF")])
15578
15579 (define_insn "*vec_concatv2di_0"
15580 [(set (match_operand:V2DI 0 "register_operand" "=v,v ,x")
15581 (vec_concat:V2DI
15582 (match_operand:DI 1 "nonimmediate_operand" " r,vm,?!*y")
15583 (match_operand:DI 2 "const0_operand" " C,C ,C")))]
15584 "TARGET_SSE2"
15585 "@
15586 * return HAVE_AS_IX86_INTERUNIT_MOVQ ? \"%vmovq\t{%1, %0|%0, %1}\" : \"%vmovd\t{%1, %0|%0, %1}\";
15587 %vmovq\t{%1, %0|%0, %1}
15588 movq2dq\t{%1, %0|%0, %1}"
15589 [(set_attr "isa" "x64,*,*")
15590 (set_attr "mmx_isa" "*,*,native")
15591 (set_attr "type" "ssemov")
15592 (set_attr "prefix_rex" "1,*,*")
15593 (set_attr "prefix" "maybe_vex,maybe_vex,orig")
15594 (set_attr "mode" "TI")
15595 (set (attr "preferred_for_speed")
15596 (cond [(eq_attr "alternative" "0")
15597 (symbol_ref "TARGET_INTER_UNIT_MOVES_TO_VEC")
15598 ]
15599 (symbol_ref "true")))])
15600
15601 ;; vmovq clears also the higher bits.
15602 (define_insn "vec_set<mode>_0"
15603 [(set (match_operand:VI8_AVX_AVX512F 0 "register_operand" "=v,v")
15604 (vec_merge:VI8_AVX_AVX512F
15605 (vec_duplicate:VI8_AVX_AVX512F
15606 (match_operand:<ssescalarmode> 2 "nonimmediate_operand" "r,vm"))
15607 (match_operand:VI8_AVX_AVX512F 1 "const0_operand" "C,C")
15608 (const_int 1)))]
15609 "TARGET_AVX"
15610 "vmovq\t{%2, %x0|%x0, %2}"
15611 [(set_attr "isa" "x64,*")
15612 (set_attr "type" "ssemov")
15613 (set_attr "prefix_rex" "1,*")
15614 (set_attr "prefix" "maybe_evex")
15615 (set_attr "mode" "TI")
15616 (set (attr "preferred_for_speed")
15617 (cond [(eq_attr "alternative" "0")
15618 (symbol_ref "TARGET_INTER_UNIT_MOVES_TO_VEC")
15619 ]
15620 (symbol_ref "true")))])
15621
15622 (define_expand "vec_unpacks_lo_<mode>"
15623 [(match_operand:<sseunpackmode> 0 "register_operand")
15624 (match_operand:VI124_AVX2_24_AVX512F_1_AVX512BW 1 "register_operand")]
15625 "TARGET_SSE2"
15626 "ix86_expand_sse_unpack (operands[0], operands[1], false, false); DONE;")
15627
15628 (define_expand "vec_unpacks_hi_<mode>"
15629 [(match_operand:<sseunpackmode> 0 "register_operand")
15630 (match_operand:VI124_AVX2_24_AVX512F_1_AVX512BW 1 "register_operand")]
15631 "TARGET_SSE2"
15632 "ix86_expand_sse_unpack (operands[0], operands[1], false, true); DONE;")
15633
15634 (define_expand "vec_unpacku_lo_<mode>"
15635 [(match_operand:<sseunpackmode> 0 "register_operand")
15636 (match_operand:VI124_AVX2_24_AVX512F_1_AVX512BW 1 "register_operand")]
15637 "TARGET_SSE2"
15638 "ix86_expand_sse_unpack (operands[0], operands[1], true, false); DONE;")
15639
15640 (define_expand "vec_unpacks_sbool_lo_qi"
15641 [(match_operand:QI 0 "register_operand")
15642 (match_operand:QI 1 "register_operand")
15643 (match_operand:QI 2 "const_int_operand")]
15644 "TARGET_AVX512F"
15645 {
15646 if (INTVAL (operands[2]) != 8 && INTVAL (operands[2]) != 4)
15647 FAIL;
15648 emit_move_insn (operands[0], operands[1]);
15649 DONE;
15650 })
15651
15652 (define_expand "vec_unpacks_lo_hi"
15653 [(set (subreg:HI (match_operand:QI 0 "register_operand") 0)
15654 (match_operand:HI 1 "register_operand"))]
15655 "TARGET_AVX512F")
15656
15657 (define_expand "vec_unpacks_lo_si"
15658 [(set (match_operand:HI 0 "register_operand")
15659 (subreg:HI (match_operand:SI 1 "register_operand") 0))]
15660 "TARGET_AVX512F")
15661
15662 (define_expand "vec_unpacks_lo_di"
15663 [(set (match_operand:SI 0 "register_operand")
15664 (subreg:SI (match_operand:DI 1 "register_operand") 0))]
15665 "TARGET_AVX512BW")
15666
15667 (define_expand "vec_unpacku_hi_<mode>"
15668 [(match_operand:<sseunpackmode> 0 "register_operand")
15669 (match_operand:VI124_AVX2_24_AVX512F_1_AVX512BW 1 "register_operand")]
15670 "TARGET_SSE2"
15671 "ix86_expand_sse_unpack (operands[0], operands[1], true, true); DONE;")
15672
15673 (define_expand "vec_unpacks_sbool_hi_qi"
15674 [(match_operand:QI 0 "register_operand")
15675 (match_operand:QI 1 "register_operand")
15676 (match_operand:QI 2 "const_int_operand")]
15677 "TARGET_AVX512F"
15678 {
15679 HOST_WIDE_INT nunits = INTVAL (operands[2]);
15680 if (nunits != 8 && nunits != 4)
15681 FAIL;
15682 if (TARGET_AVX512DQ)
15683 emit_insn (gen_klshiftrtqi (operands[0], operands[1],
15684 GEN_INT (nunits / 2)));
15685 else
15686 {
15687 rtx tem = gen_reg_rtx (HImode);
15688 emit_insn (gen_klshiftrthi (tem, lowpart_subreg (HImode, operands[1],
15689 QImode),
15690 GEN_INT (nunits / 2)));
15691 emit_move_insn (operands[0], lowpart_subreg (QImode, tem, HImode));
15692 }
15693 DONE;
15694 })
15695
15696 (define_expand "vec_unpacks_hi_hi"
15697 [(parallel
15698 [(set (subreg:HI (match_operand:QI 0 "register_operand") 0)
15699 (lshiftrt:HI (match_operand:HI 1 "register_operand")
15700 (const_int 8)))
15701 (unspec [(const_int 0)] UNSPEC_MASKOP)])]
15702 "TARGET_AVX512F")
15703
15704 (define_expand "vec_unpacks_hi_<mode>"
15705 [(parallel
15706 [(set (subreg:SWI48x
15707 (match_operand:<HALFMASKMODE> 0 "register_operand") 0)
15708 (lshiftrt:SWI48x (match_operand:SWI48x 1 "register_operand")
15709 (match_dup 2)))
15710 (unspec [(const_int 0)] UNSPEC_MASKOP)])]
15711 "TARGET_AVX512BW"
15712 "operands[2] = GEN_INT (GET_MODE_BITSIZE (<HALFMASKMODE>mode));")
15713
15714 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
15715 ;;
15716 ;; Miscellaneous
15717 ;;
15718 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
15719
15720 (define_expand "<sse2_avx2>_uavg<mode>3<mask_name>"
15721 [(set (match_operand:VI12_AVX2 0 "register_operand")
15722 (truncate:VI12_AVX2
15723 (lshiftrt:<ssedoublemode>
15724 (plus:<ssedoublemode>
15725 (plus:<ssedoublemode>
15726 (zero_extend:<ssedoublemode>
15727 (match_operand:VI12_AVX2 1 "vector_operand"))
15728 (zero_extend:<ssedoublemode>
15729 (match_operand:VI12_AVX2 2 "vector_operand")))
15730 (match_dup <mask_expand_op3>))
15731 (const_int 1))))]
15732 "TARGET_SSE2 && <mask_mode512bit_condition> && <mask_avx512bw_condition>"
15733 {
15734 operands[<mask_expand_op3>] = CONST1_RTX(<ssedoublemode>mode);
15735 ix86_fixup_binary_operands_no_copy (PLUS, <MODE>mode, operands);
15736 })
15737
15738 (define_insn "*<sse2_avx2>_uavg<mode>3<mask_name>"
15739 [(set (match_operand:VI12_AVX2 0 "register_operand" "=x,v")
15740 (truncate:VI12_AVX2
15741 (lshiftrt:<ssedoublemode>
15742 (plus:<ssedoublemode>
15743 (plus:<ssedoublemode>
15744 (zero_extend:<ssedoublemode>
15745 (match_operand:VI12_AVX2 1 "vector_operand" "%0,v"))
15746 (zero_extend:<ssedoublemode>
15747 (match_operand:VI12_AVX2 2 "vector_operand" "xBm,vm")))
15748 (match_operand:<ssedoublemode> <mask_expand_op3> "const1_operand"))
15749 (const_int 1))))]
15750 "TARGET_SSE2 && <mask_mode512bit_condition> && <mask_avx512bw_condition>
15751 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
15752 "@
15753 pavg<ssemodesuffix>\t{%2, %0|%0, %2}
15754 vpavg<ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
15755 [(set_attr "isa" "noavx,avx")
15756 (set_attr "type" "sseiadd")
15757 (set_attr "prefix_data16" "1,*")
15758 (set_attr "prefix" "orig,<mask_prefix>")
15759 (set_attr "mode" "<sseinsnmode>")])
15760
15761 ;; The correct representation for this is absolutely enormous, and
15762 ;; surely not generally useful.
15763 (define_insn "<sse2_avx2>_psadbw"
15764 [(set (match_operand:VI8_AVX2_AVX512BW 0 "register_operand" "=x,v")
15765 (unspec:VI8_AVX2_AVX512BW
15766 [(match_operand:<ssebytemode> 1 "register_operand" "0,v")
15767 (match_operand:<ssebytemode> 2 "vector_operand" "xBm,vm")]
15768 UNSPEC_PSADBW))]
15769 "TARGET_SSE2"
15770 "@
15771 psadbw\t{%2, %0|%0, %2}
15772 vpsadbw\t{%2, %1, %0|%0, %1, %2}"
15773 [(set_attr "isa" "noavx,avx")
15774 (set_attr "type" "sseiadd")
15775 (set_attr "atom_unit" "simul")
15776 (set_attr "prefix_data16" "1,*")
15777 (set_attr "prefix" "orig,maybe_evex")
15778 (set_attr "mode" "<sseinsnmode>")])
15779
15780 (define_insn "<sse>_movmsk<ssemodesuffix><avxsizesuffix>"
15781 [(set (match_operand:SI 0 "register_operand" "=r")
15782 (unspec:SI
15783 [(match_operand:VF_128_256 1 "register_operand" "x")]
15784 UNSPEC_MOVMSK))]
15785 "TARGET_SSE"
15786 "%vmovmsk<ssemodesuffix>\t{%1, %0|%0, %1}"
15787 [(set_attr "type" "ssemov")
15788 (set_attr "prefix" "maybe_vex")
15789 (set_attr "mode" "<MODE>")])
15790
15791 (define_insn "*<sse>_movmsk<ssemodesuffix><avxsizesuffix>_<u>ext"
15792 [(set (match_operand:DI 0 "register_operand" "=r")
15793 (any_extend:DI
15794 (unspec:SI
15795 [(match_operand:VF_128_256 1 "register_operand" "x")]
15796 UNSPEC_MOVMSK)))]
15797 "TARGET_64BIT && TARGET_SSE"
15798 "%vmovmsk<ssemodesuffix>\t{%1, %k0|%k0, %1}"
15799 [(set_attr "type" "ssemov")
15800 (set_attr "prefix" "maybe_vex")
15801 (set_attr "mode" "<MODE>")])
15802
15803 (define_insn_and_split "*<sse>_movmsk<ssemodesuffix><avxsizesuffix>_lt"
15804 [(set (match_operand:SI 0 "register_operand" "=r")
15805 (unspec:SI
15806 [(lt:VF_128_256
15807 (match_operand:<sseintvecmode> 1 "register_operand" "x")
15808 (match_operand:<sseintvecmode> 2 "const0_operand" "C"))]
15809 UNSPEC_MOVMSK))]
15810 "TARGET_SSE"
15811 "#"
15812 "&& reload_completed"
15813 [(set (match_dup 0)
15814 (unspec:SI [(match_dup 1)] UNSPEC_MOVMSK))]
15815 "operands[1] = gen_lowpart (<MODE>mode, operands[1]);"
15816 [(set_attr "type" "ssemov")
15817 (set_attr "prefix" "maybe_vex")
15818 (set_attr "mode" "<MODE>")])
15819
15820 (define_insn_and_split "*<sse>_movmsk<ssemodesuffix><avxsizesuffix>_<u>ext_lt"
15821 [(set (match_operand:DI 0 "register_operand" "=r")
15822 (any_extend:DI
15823 (unspec:SI
15824 [(lt:VF_128_256
15825 (match_operand:<sseintvecmode> 1 "register_operand" "x")
15826 (match_operand:<sseintvecmode> 2 "const0_operand" "C"))]
15827 UNSPEC_MOVMSK)))]
15828 "TARGET_64BIT && TARGET_SSE"
15829 "#"
15830 "&& reload_completed"
15831 [(set (match_dup 0)
15832 (any_extend:DI (unspec:SI [(match_dup 1)] UNSPEC_MOVMSK)))]
15833 "operands[1] = gen_lowpart (<MODE>mode, operands[1]);"
15834 [(set_attr "type" "ssemov")
15835 (set_attr "prefix" "maybe_vex")
15836 (set_attr "mode" "<MODE>")])
15837
15838 (define_insn_and_split "*<sse>_movmsk<ssemodesuffix><avxsizesuffix>_shift"
15839 [(set (match_operand:SI 0 "register_operand" "=r")
15840 (unspec:SI
15841 [(subreg:VF_128_256
15842 (ashiftrt:<sseintvecmode>
15843 (match_operand:<sseintvecmode> 1 "register_operand" "x")
15844 (match_operand:QI 2 "const_int_operand" "n")) 0)]
15845 UNSPEC_MOVMSK))]
15846 "TARGET_SSE"
15847 "#"
15848 "&& reload_completed"
15849 [(set (match_dup 0)
15850 (unspec:SI [(match_dup 1)] UNSPEC_MOVMSK))]
15851 "operands[1] = gen_lowpart (<MODE>mode, operands[1]);"
15852 [(set_attr "type" "ssemov")
15853 (set_attr "prefix" "maybe_vex")
15854 (set_attr "mode" "<MODE>")])
15855
15856 (define_insn_and_split "*<sse>_movmsk<ssemodesuffix><avxsizesuffix>_<u>ext_shift"
15857 [(set (match_operand:DI 0 "register_operand" "=r")
15858 (any_extend:DI
15859 (unspec:SI
15860 [(subreg:VF_128_256
15861 (ashiftrt:<sseintvecmode>
15862 (match_operand:<sseintvecmode> 1 "register_operand" "x")
15863 (match_operand:QI 2 "const_int_operand" "n")) 0)]
15864 UNSPEC_MOVMSK)))]
15865 "TARGET_64BIT && TARGET_SSE"
15866 "#"
15867 "&& reload_completed"
15868 [(set (match_dup 0)
15869 (any_extend:DI (unspec:SI [(match_dup 1)] UNSPEC_MOVMSK)))]
15870 "operands[1] = gen_lowpart (<MODE>mode, operands[1]);"
15871 [(set_attr "type" "ssemov")
15872 (set_attr "prefix" "maybe_vex")
15873 (set_attr "mode" "<MODE>")])
15874
15875 (define_insn "<sse2_avx2>_pmovmskb"
15876 [(set (match_operand:SI 0 "register_operand" "=r")
15877 (unspec:SI
15878 [(match_operand:VI1_AVX2 1 "register_operand" "x")]
15879 UNSPEC_MOVMSK))]
15880 "TARGET_SSE2"
15881 "%vpmovmskb\t{%1, %0|%0, %1}"
15882 [(set_attr "type" "ssemov")
15883 (set (attr "prefix_data16")
15884 (if_then_else
15885 (match_test "TARGET_AVX")
15886 (const_string "*")
15887 (const_string "1")))
15888 (set_attr "prefix" "maybe_vex")
15889 (set_attr "mode" "SI")])
15890
15891 (define_insn "*<sse2_avx2>_pmovmskb_zext"
15892 [(set (match_operand:DI 0 "register_operand" "=r")
15893 (zero_extend:DI
15894 (unspec:SI
15895 [(match_operand:VI1_AVX2 1 "register_operand" "x")]
15896 UNSPEC_MOVMSK)))]
15897 "TARGET_64BIT && TARGET_SSE2"
15898 "%vpmovmskb\t{%1, %k0|%k0, %1}"
15899 [(set_attr "type" "ssemov")
15900 (set (attr "prefix_data16")
15901 (if_then_else
15902 (match_test "TARGET_AVX")
15903 (const_string "*")
15904 (const_string "1")))
15905 (set_attr "prefix" "maybe_vex")
15906 (set_attr "mode" "SI")])
15907
15908 (define_insn "*sse2_pmovmskb_ext"
15909 [(set (match_operand:DI 0 "register_operand" "=r")
15910 (sign_extend:DI
15911 (unspec:SI
15912 [(match_operand:V16QI 1 "register_operand" "x")]
15913 UNSPEC_MOVMSK)))]
15914 "TARGET_64BIT && TARGET_SSE2"
15915 "%vpmovmskb\t{%1, %k0|%k0, %1}"
15916 [(set_attr "type" "ssemov")
15917 (set (attr "prefix_data16")
15918 (if_then_else
15919 (match_test "TARGET_AVX")
15920 (const_string "*")
15921 (const_string "1")))
15922 (set_attr "prefix" "maybe_vex")
15923 (set_attr "mode" "SI")])
15924
15925 (define_insn_and_split "*<sse2_avx2>_pmovmskb_lt"
15926 [(set (match_operand:SI 0 "register_operand" "=r")
15927 (unspec:SI
15928 [(lt:VI1_AVX2 (match_operand:VI1_AVX2 1 "register_operand" "x")
15929 (match_operand:VI1_AVX2 2 "const0_operand" "C"))]
15930 UNSPEC_MOVMSK))]
15931 "TARGET_SSE2"
15932 "#"
15933 ""
15934 [(set (match_dup 0)
15935 (unspec:SI [(match_dup 1)] UNSPEC_MOVMSK))]
15936 ""
15937 [(set_attr "type" "ssemov")
15938 (set (attr "prefix_data16")
15939 (if_then_else
15940 (match_test "TARGET_AVX")
15941 (const_string "*")
15942 (const_string "1")))
15943 (set_attr "prefix" "maybe_vex")
15944 (set_attr "mode" "SI")])
15945
15946 (define_insn_and_split "*<sse2_avx2>_pmovmskb_zext_lt"
15947 [(set (match_operand:DI 0 "register_operand" "=r")
15948 (zero_extend:DI
15949 (unspec:SI
15950 [(lt:VI1_AVX2 (match_operand:VI1_AVX2 1 "register_operand" "x")
15951 (match_operand:VI1_AVX2 2 "const0_operand" "C"))]
15952 UNSPEC_MOVMSK)))]
15953 "TARGET_64BIT && TARGET_SSE2"
15954 "#"
15955 ""
15956 [(set (match_dup 0)
15957 (zero_extend:DI (unspec:SI [(match_dup 1)] UNSPEC_MOVMSK)))]
15958 ""
15959 [(set_attr "type" "ssemov")
15960 (set (attr "prefix_data16")
15961 (if_then_else
15962 (match_test "TARGET_AVX")
15963 (const_string "*")
15964 (const_string "1")))
15965 (set_attr "prefix" "maybe_vex")
15966 (set_attr "mode" "SI")])
15967
15968 (define_insn_and_split "*sse2_pmovmskb_ext_lt"
15969 [(set (match_operand:DI 0 "register_operand" "=r")
15970 (sign_extend:DI
15971 (unspec:SI
15972 [(lt:V16QI (match_operand:V16QI 1 "register_operand" "x")
15973 (match_operand:V16QI 2 "const0_operand" "C"))]
15974 UNSPEC_MOVMSK)))]
15975 "TARGET_64BIT && TARGET_SSE2"
15976 "#"
15977 ""
15978 [(set (match_dup 0)
15979 (sign_extend:DI (unspec:SI [(match_dup 1)] UNSPEC_MOVMSK)))]
15980 ""
15981 [(set_attr "type" "ssemov")
15982 (set (attr "prefix_data16")
15983 (if_then_else
15984 (match_test "TARGET_AVX")
15985 (const_string "*")
15986 (const_string "1")))
15987 (set_attr "prefix" "maybe_vex")
15988 (set_attr "mode" "SI")])
15989
15990 (define_expand "sse2_maskmovdqu"
15991 [(set (match_operand:V16QI 0 "memory_operand")
15992 (unspec:V16QI [(match_operand:V16QI 1 "register_operand")
15993 (match_operand:V16QI 2 "register_operand")
15994 (match_dup 0)]
15995 UNSPEC_MASKMOV))]
15996 "TARGET_SSE2")
15997
15998 (define_insn "*sse2_maskmovdqu"
15999 [(set (mem:V16QI (match_operand:P 0 "register_operand" "D"))
16000 (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "x")
16001 (match_operand:V16QI 2 "register_operand" "x")
16002 (mem:V16QI (match_dup 0))]
16003 UNSPEC_MASKMOV))]
16004 "TARGET_SSE2"
16005 {
16006 /* We can't use %^ here due to ASM_OUTPUT_OPCODE processing
16007 that requires %v to be at the beginning of the opcode name. */
16008 if (Pmode != word_mode)
16009 fputs ("\taddr32", asm_out_file);
16010 return "%vmaskmovdqu\t{%2, %1|%1, %2}";
16011 }
16012 [(set_attr "type" "ssemov")
16013 (set_attr "prefix_data16" "1")
16014 (set (attr "length_address")
16015 (symbol_ref ("Pmode != word_mode")))
16016 ;; The implicit %rdi operand confuses default length_vex computation.
16017 (set (attr "length_vex")
16018 (symbol_ref ("3 + REX_SSE_REGNO_P (REGNO (operands[2]))")))
16019 (set_attr "prefix" "maybe_vex")
16020 (set_attr "znver1_decode" "vector")
16021 (set_attr "mode" "TI")])
16022
16023 (define_insn "sse_ldmxcsr"
16024 [(unspec_volatile [(match_operand:SI 0 "memory_operand" "m")]
16025 UNSPECV_LDMXCSR)]
16026 "TARGET_SSE"
16027 "%vldmxcsr\t%0"
16028 [(set_attr "type" "sse")
16029 (set_attr "atom_sse_attr" "mxcsr")
16030 (set_attr "prefix" "maybe_vex")
16031 (set_attr "memory" "load")])
16032
16033 (define_insn "sse_stmxcsr"
16034 [(set (match_operand:SI 0 "memory_operand" "=m")
16035 (unspec_volatile:SI [(const_int 0)] UNSPECV_STMXCSR))]
16036 "TARGET_SSE"
16037 "%vstmxcsr\t%0"
16038 [(set_attr "type" "sse")
16039 (set_attr "atom_sse_attr" "mxcsr")
16040 (set_attr "prefix" "maybe_vex")
16041 (set_attr "memory" "store")])
16042
16043 (define_insn "sse2_clflush"
16044 [(unspec_volatile [(match_operand 0 "address_operand" "p")]
16045 UNSPECV_CLFLUSH)]
16046 "TARGET_SSE2"
16047 "clflush\t%a0"
16048 [(set_attr "type" "sse")
16049 (set_attr "atom_sse_attr" "fence")
16050 (set_attr "memory" "unknown")])
16051
16052 ;; As per AMD and Intel ISA manuals, the first operand is extensions
16053 ;; and it goes to %ecx. The second operand received is hints and it goes
16054 ;; to %eax.
16055 (define_insn "sse3_mwait"
16056 [(unspec_volatile [(match_operand:SI 0 "register_operand" "c")
16057 (match_operand:SI 1 "register_operand" "a")]
16058 UNSPECV_MWAIT)]
16059 "TARGET_SSE3"
16060 ;; 64bit version is "mwait %rax,%rcx". But only lower 32bits are used.
16061 ;; Since 32bit register operands are implicitly zero extended to 64bit,
16062 ;; we only need to set up 32bit registers.
16063 "mwait"
16064 [(set_attr "length" "3")])
16065
16066 (define_insn "@sse3_monitor_<mode>"
16067 [(unspec_volatile [(match_operand:P 0 "register_operand" "a")
16068 (match_operand:SI 1 "register_operand" "c")
16069 (match_operand:SI 2 "register_operand" "d")]
16070 UNSPECV_MONITOR)]
16071 "TARGET_SSE3"
16072 ;; 64bit version is "monitor %rax,%rcx,%rdx". But only lower 32bits in
16073 ;; RCX and RDX are used. Since 32bit register operands are implicitly
16074 ;; zero extended to 64bit, we only need to set up 32bit registers.
16075 "%^monitor"
16076 [(set (attr "length")
16077 (symbol_ref ("(Pmode != word_mode) + 3")))])
16078
16079 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
16080 ;;
16081 ;; SSSE3 instructions
16082 ;;
16083 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
16084
16085 (define_code_iterator ssse3_plusminus [plus ss_plus minus ss_minus])
16086
16087 (define_insn "avx2_ph<plusminus_mnemonic>wv16hi3"
16088 [(set (match_operand:V16HI 0 "register_operand" "=x")
16089 (ssse3_plusminus:V16HI
16090 (vec_select:V16HI
16091 (vec_concat:V32HI
16092 (match_operand:V16HI 1 "register_operand" "x")
16093 (match_operand:V16HI 2 "nonimmediate_operand" "xm"))
16094 (parallel
16095 [(const_int 0) (const_int 2) (const_int 4) (const_int 6)
16096 (const_int 16) (const_int 18) (const_int 20) (const_int 22)
16097 (const_int 8) (const_int 10) (const_int 12) (const_int 14)
16098 (const_int 24) (const_int 26) (const_int 28) (const_int 30)]))
16099 (vec_select:V16HI
16100 (vec_concat:V32HI (match_dup 1) (match_dup 2))
16101 (parallel
16102 [(const_int 1) (const_int 3) (const_int 5) (const_int 7)
16103 (const_int 17) (const_int 19) (const_int 21) (const_int 23)
16104 (const_int 9) (const_int 11) (const_int 13) (const_int 15)
16105 (const_int 25) (const_int 27) (const_int 29) (const_int 31)]))))]
16106 "TARGET_AVX2"
16107 "vph<plusminus_mnemonic>w\t{%2, %1, %0|%0, %1, %2}"
16108 [(set_attr "type" "sseiadd")
16109 (set_attr "prefix_extra" "1")
16110 (set_attr "prefix" "vex")
16111 (set_attr "mode" "OI")])
16112
16113 (define_insn "ssse3_ph<plusminus_mnemonic>wv8hi3"
16114 [(set (match_operand:V8HI 0 "register_operand" "=x,x")
16115 (ssse3_plusminus:V8HI
16116 (vec_select:V8HI
16117 (vec_concat:V16HI
16118 (match_operand:V8HI 1 "register_operand" "0,x")
16119 (match_operand:V8HI 2 "vector_operand" "xBm,xm"))
16120 (parallel
16121 [(const_int 0) (const_int 2) (const_int 4) (const_int 6)
16122 (const_int 8) (const_int 10) (const_int 12) (const_int 14)]))
16123 (vec_select:V8HI
16124 (vec_concat:V16HI (match_dup 1) (match_dup 2))
16125 (parallel
16126 [(const_int 1) (const_int 3) (const_int 5) (const_int 7)
16127 (const_int 9) (const_int 11) (const_int 13) (const_int 15)]))))]
16128 "TARGET_SSSE3"
16129 "@
16130 ph<plusminus_mnemonic>w\t{%2, %0|%0, %2}
16131 vph<plusminus_mnemonic>w\t{%2, %1, %0|%0, %1, %2}"
16132 [(set_attr "isa" "noavx,avx")
16133 (set_attr "type" "sseiadd")
16134 (set_attr "atom_unit" "complex")
16135 (set_attr "prefix_data16" "1,*")
16136 (set_attr "prefix_extra" "1")
16137 (set_attr "prefix" "orig,vex")
16138 (set_attr "mode" "TI")])
16139
16140 (define_insn_and_split "ssse3_ph<plusminus_mnemonic>wv4hi3"
16141 [(set (match_operand:V4HI 0 "register_operand" "=y,x,Yv")
16142 (ssse3_plusminus:V4HI
16143 (vec_select:V4HI
16144 (vec_concat:V8HI
16145 (match_operand:V4HI 1 "register_operand" "0,0,Yv")
16146 (match_operand:V4HI 2 "register_mmxmem_operand" "ym,x,Yv"))
16147 (parallel
16148 [(const_int 0) (const_int 2) (const_int 4) (const_int 6)]))
16149 (vec_select:V4HI
16150 (vec_concat:V8HI (match_dup 1) (match_dup 2))
16151 (parallel
16152 [(const_int 1) (const_int 3) (const_int 5) (const_int 7)]))))]
16153 "(TARGET_MMX || TARGET_MMX_WITH_SSE) && TARGET_SSSE3"
16154 "@
16155 ph<plusminus_mnemonic>w\t{%2, %0|%0, %2}
16156 #
16157 #"
16158 "TARGET_SSSE3 && reload_completed
16159 && SSE_REGNO_P (REGNO (operands[0]))"
16160 [(const_int 0)]
16161 {
16162 /* Generate SSE version of the operation. */
16163 rtx op0 = lowpart_subreg (V8HImode, operands[0],
16164 GET_MODE (operands[0]));
16165 rtx op1 = lowpart_subreg (V8HImode, operands[1],
16166 GET_MODE (operands[1]));
16167 rtx op2 = lowpart_subreg (V8HImode, operands[2],
16168 GET_MODE (operands[2]));
16169 emit_insn (gen_ssse3_ph<plusminus_mnemonic>wv8hi3 (op0, op1, op2));
16170 ix86_move_vector_high_sse_to_mmx (op0);
16171 DONE;
16172 }
16173 [(set_attr "mmx_isa" "native,sse_noavx,avx")
16174 (set_attr "type" "sseiadd")
16175 (set_attr "atom_unit" "complex")
16176 (set_attr "prefix_extra" "1")
16177 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
16178 (set_attr "mode" "DI,TI,TI")])
16179
16180 (define_insn "avx2_ph<plusminus_mnemonic>dv8si3"
16181 [(set (match_operand:V8SI 0 "register_operand" "=x")
16182 (plusminus:V8SI
16183 (vec_select:V8SI
16184 (vec_concat:V16SI
16185 (match_operand:V8SI 1 "register_operand" "x")
16186 (match_operand:V8SI 2 "nonimmediate_operand" "xm"))
16187 (parallel
16188 [(const_int 0) (const_int 2) (const_int 8) (const_int 10)
16189 (const_int 4) (const_int 6) (const_int 12) (const_int 14)]))
16190 (vec_select:V8SI
16191 (vec_concat:V16SI (match_dup 1) (match_dup 2))
16192 (parallel
16193 [(const_int 1) (const_int 3) (const_int 9) (const_int 11)
16194 (const_int 5) (const_int 7) (const_int 13) (const_int 15)]))))]
16195 "TARGET_AVX2"
16196 "vph<plusminus_mnemonic>d\t{%2, %1, %0|%0, %1, %2}"
16197 [(set_attr "type" "sseiadd")
16198 (set_attr "prefix_extra" "1")
16199 (set_attr "prefix" "vex")
16200 (set_attr "mode" "OI")])
16201
16202 (define_insn "ssse3_ph<plusminus_mnemonic>dv4si3"
16203 [(set (match_operand:V4SI 0 "register_operand" "=x,x")
16204 (plusminus:V4SI
16205 (vec_select:V4SI
16206 (vec_concat:V8SI
16207 (match_operand:V4SI 1 "register_operand" "0,x")
16208 (match_operand:V4SI 2 "vector_operand" "xBm,xm"))
16209 (parallel
16210 [(const_int 0) (const_int 2) (const_int 4) (const_int 6)]))
16211 (vec_select:V4SI
16212 (vec_concat:V8SI (match_dup 1) (match_dup 2))
16213 (parallel
16214 [(const_int 1) (const_int 3) (const_int 5) (const_int 7)]))))]
16215 "TARGET_SSSE3"
16216 "@
16217 ph<plusminus_mnemonic>d\t{%2, %0|%0, %2}
16218 vph<plusminus_mnemonic>d\t{%2, %1, %0|%0, %1, %2}"
16219 [(set_attr "isa" "noavx,avx")
16220 (set_attr "type" "sseiadd")
16221 (set_attr "atom_unit" "complex")
16222 (set_attr "prefix_data16" "1,*")
16223 (set_attr "prefix_extra" "1")
16224 (set_attr "prefix" "orig,vex")
16225 (set_attr "mode" "TI")])
16226
16227 (define_insn_and_split "ssse3_ph<plusminus_mnemonic>dv2si3"
16228 [(set (match_operand:V2SI 0 "register_operand" "=y,x,Yv")
16229 (plusminus:V2SI
16230 (vec_select:V2SI
16231 (vec_concat:V4SI
16232 (match_operand:V2SI 1 "register_operand" "0,0,Yv")
16233 (match_operand:V2SI 2 "register_mmxmem_operand" "ym,x,Yv"))
16234 (parallel [(const_int 0) (const_int 2)]))
16235 (vec_select:V2SI
16236 (vec_concat:V4SI (match_dup 1) (match_dup 2))
16237 (parallel [(const_int 1) (const_int 3)]))))]
16238 "(TARGET_MMX || TARGET_MMX_WITH_SSE) && TARGET_SSSE3"
16239 "@
16240 ph<plusminus_mnemonic>d\t{%2, %0|%0, %2}
16241 #
16242 #"
16243 "TARGET_SSSE3 && reload_completed
16244 && SSE_REGNO_P (REGNO (operands[0]))"
16245 [(const_int 0)]
16246 {
16247 /* Generate SSE version of the operation. */
16248 rtx op0 = lowpart_subreg (V4SImode, operands[0],
16249 GET_MODE (operands[0]));
16250 rtx op1 = lowpart_subreg (V4SImode, operands[1],
16251 GET_MODE (operands[1]));
16252 rtx op2 = lowpart_subreg (V4SImode, operands[2],
16253 GET_MODE (operands[2]));
16254 emit_insn (gen_ssse3_ph<plusminus_mnemonic>dv4si3 (op0, op1, op2));
16255 ix86_move_vector_high_sse_to_mmx (op0);
16256 DONE;
16257 }
16258 [(set_attr "mmx_isa" "native,sse_noavx,avx")
16259 (set_attr "type" "sseiadd")
16260 (set_attr "atom_unit" "complex")
16261 (set_attr "prefix_extra" "1")
16262 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
16263 (set_attr "mode" "DI,TI,TI")])
16264
16265 (define_insn "avx2_pmaddubsw256"
16266 [(set (match_operand:V16HI 0 "register_operand" "=x,v")
16267 (ss_plus:V16HI
16268 (mult:V16HI
16269 (zero_extend:V16HI
16270 (vec_select:V16QI
16271 (match_operand:V32QI 1 "register_operand" "x,v")
16272 (parallel [(const_int 0) (const_int 2)
16273 (const_int 4) (const_int 6)
16274 (const_int 8) (const_int 10)
16275 (const_int 12) (const_int 14)
16276 (const_int 16) (const_int 18)
16277 (const_int 20) (const_int 22)
16278 (const_int 24) (const_int 26)
16279 (const_int 28) (const_int 30)])))
16280 (sign_extend:V16HI
16281 (vec_select:V16QI
16282 (match_operand:V32QI 2 "nonimmediate_operand" "xm,vm")
16283 (parallel [(const_int 0) (const_int 2)
16284 (const_int 4) (const_int 6)
16285 (const_int 8) (const_int 10)
16286 (const_int 12) (const_int 14)
16287 (const_int 16) (const_int 18)
16288 (const_int 20) (const_int 22)
16289 (const_int 24) (const_int 26)
16290 (const_int 28) (const_int 30)]))))
16291 (mult:V16HI
16292 (zero_extend:V16HI
16293 (vec_select:V16QI (match_dup 1)
16294 (parallel [(const_int 1) (const_int 3)
16295 (const_int 5) (const_int 7)
16296 (const_int 9) (const_int 11)
16297 (const_int 13) (const_int 15)
16298 (const_int 17) (const_int 19)
16299 (const_int 21) (const_int 23)
16300 (const_int 25) (const_int 27)
16301 (const_int 29) (const_int 31)])))
16302 (sign_extend:V16HI
16303 (vec_select:V16QI (match_dup 2)
16304 (parallel [(const_int 1) (const_int 3)
16305 (const_int 5) (const_int 7)
16306 (const_int 9) (const_int 11)
16307 (const_int 13) (const_int 15)
16308 (const_int 17) (const_int 19)
16309 (const_int 21) (const_int 23)
16310 (const_int 25) (const_int 27)
16311 (const_int 29) (const_int 31)]))))))]
16312 "TARGET_AVX2"
16313 "vpmaddubsw\t{%2, %1, %0|%0, %1, %2}"
16314 [(set_attr "isa" "*,avx512bw")
16315 (set_attr "type" "sseiadd")
16316 (set_attr "prefix_extra" "1")
16317 (set_attr "prefix" "vex,evex")
16318 (set_attr "mode" "OI")])
16319
16320 ;; The correct representation for this is absolutely enormous, and
16321 ;; surely not generally useful.
16322 (define_insn "avx512bw_pmaddubsw512<mode><mask_name>"
16323 [(set (match_operand:VI2_AVX512VL 0 "register_operand" "=v")
16324 (unspec:VI2_AVX512VL
16325 [(match_operand:<dbpsadbwmode> 1 "register_operand" "v")
16326 (match_operand:<dbpsadbwmode> 2 "nonimmediate_operand" "vm")]
16327 UNSPEC_PMADDUBSW512))]
16328 "TARGET_AVX512BW"
16329 "vpmaddubsw\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}";
16330 [(set_attr "type" "sseiadd")
16331 (set_attr "prefix" "evex")
16332 (set_attr "mode" "XI")])
16333
16334 (define_insn "avx512bw_umulhrswv32hi3<mask_name>"
16335 [(set (match_operand:V32HI 0 "register_operand" "=v")
16336 (truncate:V32HI
16337 (lshiftrt:V32SI
16338 (plus:V32SI
16339 (lshiftrt:V32SI
16340 (mult:V32SI
16341 (sign_extend:V32SI
16342 (match_operand:V32HI 1 "nonimmediate_operand" "%v"))
16343 (sign_extend:V32SI
16344 (match_operand:V32HI 2 "nonimmediate_operand" "vm")))
16345 (const_int 14))
16346 (const_vector:V32HI [(const_int 1) (const_int 1)
16347 (const_int 1) (const_int 1)
16348 (const_int 1) (const_int 1)
16349 (const_int 1) (const_int 1)
16350 (const_int 1) (const_int 1)
16351 (const_int 1) (const_int 1)
16352 (const_int 1) (const_int 1)
16353 (const_int 1) (const_int 1)
16354 (const_int 1) (const_int 1)
16355 (const_int 1) (const_int 1)
16356 (const_int 1) (const_int 1)
16357 (const_int 1) (const_int 1)
16358 (const_int 1) (const_int 1)
16359 (const_int 1) (const_int 1)
16360 (const_int 1) (const_int 1)
16361 (const_int 1) (const_int 1)]))
16362 (const_int 1))))]
16363 "TARGET_AVX512BW"
16364 "vpmulhrsw\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
16365 [(set_attr "type" "sseimul")
16366 (set_attr "prefix" "evex")
16367 (set_attr "mode" "XI")])
16368
16369 (define_insn "ssse3_pmaddubsw128"
16370 [(set (match_operand:V8HI 0 "register_operand" "=x,x,v")
16371 (ss_plus:V8HI
16372 (mult:V8HI
16373 (zero_extend:V8HI
16374 (vec_select:V8QI
16375 (match_operand:V16QI 1 "register_operand" "0,x,v")
16376 (parallel [(const_int 0) (const_int 2)
16377 (const_int 4) (const_int 6)
16378 (const_int 8) (const_int 10)
16379 (const_int 12) (const_int 14)])))
16380 (sign_extend:V8HI
16381 (vec_select:V8QI
16382 (match_operand:V16QI 2 "vector_operand" "xBm,xm,vm")
16383 (parallel [(const_int 0) (const_int 2)
16384 (const_int 4) (const_int 6)
16385 (const_int 8) (const_int 10)
16386 (const_int 12) (const_int 14)]))))
16387 (mult:V8HI
16388 (zero_extend:V8HI
16389 (vec_select:V8QI (match_dup 1)
16390 (parallel [(const_int 1) (const_int 3)
16391 (const_int 5) (const_int 7)
16392 (const_int 9) (const_int 11)
16393 (const_int 13) (const_int 15)])))
16394 (sign_extend:V8HI
16395 (vec_select:V8QI (match_dup 2)
16396 (parallel [(const_int 1) (const_int 3)
16397 (const_int 5) (const_int 7)
16398 (const_int 9) (const_int 11)
16399 (const_int 13) (const_int 15)]))))))]
16400 "TARGET_SSSE3"
16401 "@
16402 pmaddubsw\t{%2, %0|%0, %2}
16403 vpmaddubsw\t{%2, %1, %0|%0, %1, %2}
16404 vpmaddubsw\t{%2, %1, %0|%0, %1, %2}"
16405 [(set_attr "isa" "noavx,avx,avx512bw")
16406 (set_attr "type" "sseiadd")
16407 (set_attr "atom_unit" "simul")
16408 (set_attr "prefix_data16" "1,*,*")
16409 (set_attr "prefix_extra" "1")
16410 (set_attr "prefix" "orig,vex,evex")
16411 (set_attr "mode" "TI")])
16412
16413 (define_insn "ssse3_pmaddubsw"
16414 [(set (match_operand:V4HI 0 "register_operand" "=y,x,Yv")
16415 (ss_plus:V4HI
16416 (mult:V4HI
16417 (zero_extend:V4HI
16418 (vec_select:V4QI
16419 (match_operand:V8QI 1 "register_operand" "0,0,Yv")
16420 (parallel [(const_int 0) (const_int 2)
16421 (const_int 4) (const_int 6)])))
16422 (sign_extend:V4HI
16423 (vec_select:V4QI
16424 (match_operand:V8QI 2 "register_mmxmem_operand" "ym,x,Yv")
16425 (parallel [(const_int 0) (const_int 2)
16426 (const_int 4) (const_int 6)]))))
16427 (mult:V4HI
16428 (zero_extend:V4HI
16429 (vec_select:V4QI (match_dup 1)
16430 (parallel [(const_int 1) (const_int 3)
16431 (const_int 5) (const_int 7)])))
16432 (sign_extend:V4HI
16433 (vec_select:V4QI (match_dup 2)
16434 (parallel [(const_int 1) (const_int 3)
16435 (const_int 5) (const_int 7)]))))))]
16436 "(TARGET_MMX || TARGET_MMX_WITH_SSE) && TARGET_SSSE3"
16437 "@
16438 pmaddubsw\t{%2, %0|%0, %2}
16439 pmaddubsw\t{%2, %0|%0, %2}
16440 vpmaddubsw\t{%2, %1, %0|%0, %1, %2}"
16441 [(set_attr "isa" "*,noavx,avx")
16442 (set_attr "mmx_isa" "native,*,*")
16443 (set_attr "type" "sseiadd")
16444 (set_attr "atom_unit" "simul")
16445 (set_attr "prefix_extra" "1")
16446 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
16447 (set_attr "mode" "DI,TI,TI")])
16448
16449 (define_mode_iterator PMULHRSW
16450 [V8HI (V16HI "TARGET_AVX2")])
16451
16452 (define_expand "<ssse3_avx2>_pmulhrsw<mode>3_mask"
16453 [(set (match_operand:PMULHRSW 0 "register_operand")
16454 (vec_merge:PMULHRSW
16455 (truncate:PMULHRSW
16456 (lshiftrt:<ssedoublemode>
16457 (plus:<ssedoublemode>
16458 (lshiftrt:<ssedoublemode>
16459 (mult:<ssedoublemode>
16460 (sign_extend:<ssedoublemode>
16461 (match_operand:PMULHRSW 1 "nonimmediate_operand"))
16462 (sign_extend:<ssedoublemode>
16463 (match_operand:PMULHRSW 2 "nonimmediate_operand")))
16464 (const_int 14))
16465 (match_dup 5))
16466 (const_int 1)))
16467 (match_operand:PMULHRSW 3 "register_operand")
16468 (match_operand:<avx512fmaskmode> 4 "register_operand")))]
16469 "TARGET_AVX512BW && TARGET_AVX512VL"
16470 {
16471 operands[5] = CONST1_RTX(<MODE>mode);
16472 ix86_fixup_binary_operands_no_copy (MULT, <MODE>mode, operands);
16473 })
16474
16475 (define_expand "<ssse3_avx2>_pmulhrsw<mode>3"
16476 [(set (match_operand:PMULHRSW 0 "register_operand")
16477 (truncate:PMULHRSW
16478 (lshiftrt:<ssedoublemode>
16479 (plus:<ssedoublemode>
16480 (lshiftrt:<ssedoublemode>
16481 (mult:<ssedoublemode>
16482 (sign_extend:<ssedoublemode>
16483 (match_operand:PMULHRSW 1 "nonimmediate_operand"))
16484 (sign_extend:<ssedoublemode>
16485 (match_operand:PMULHRSW 2 "nonimmediate_operand")))
16486 (const_int 14))
16487 (match_dup 3))
16488 (const_int 1))))]
16489 "TARGET_SSSE3"
16490 {
16491 operands[3] = CONST1_RTX(<MODE>mode);
16492 ix86_fixup_binary_operands_no_copy (MULT, <MODE>mode, operands);
16493 })
16494
16495 (define_expand "smulhrs<mode>3"
16496 [(set (match_operand:VI2_AVX2 0 "register_operand")
16497 (truncate:VI2_AVX2
16498 (lshiftrt:<ssedoublemode>
16499 (plus:<ssedoublemode>
16500 (lshiftrt:<ssedoublemode>
16501 (mult:<ssedoublemode>
16502 (sign_extend:<ssedoublemode>
16503 (match_operand:VI2_AVX2 1 "nonimmediate_operand"))
16504 (sign_extend:<ssedoublemode>
16505 (match_operand:VI2_AVX2 2 "nonimmediate_operand")))
16506 (const_int 14))
16507 (match_dup 3))
16508 (const_int 1))))]
16509 "TARGET_SSSE3"
16510 {
16511 operands[3] = CONST1_RTX(<MODE>mode);
16512 ix86_fixup_binary_operands_no_copy (MULT, <MODE>mode, operands);
16513 })
16514
16515 (define_insn "*<ssse3_avx2>_pmulhrsw<mode>3<mask_name>"
16516 [(set (match_operand:VI2_AVX2 0 "register_operand" "=x,x,v")
16517 (truncate:VI2_AVX2
16518 (lshiftrt:<ssedoublemode>
16519 (plus:<ssedoublemode>
16520 (lshiftrt:<ssedoublemode>
16521 (mult:<ssedoublemode>
16522 (sign_extend:<ssedoublemode>
16523 (match_operand:VI2_AVX2 1 "vector_operand" "%0,x,v"))
16524 (sign_extend:<ssedoublemode>
16525 (match_operand:VI2_AVX2 2 "vector_operand" "xBm,xm,vm")))
16526 (const_int 14))
16527 (match_operand:VI2_AVX2 3 "const1_operand"))
16528 (const_int 1))))]
16529 "TARGET_SSSE3 && <mask_mode512bit_condition> && <mask_avx512bw_condition>
16530 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
16531 "@
16532 pmulhrsw\t{%2, %0|%0, %2}
16533 vpmulhrsw\t{%2, %1, %0<mask_operand4>|%0<mask_operand4>, %1, %2}
16534 vpmulhrsw\t{%2, %1, %0<mask_operand4>|%0<mask_operand4>, %1, %2}"
16535 [(set_attr "isa" "noavx,avx,avx512bw")
16536 (set_attr "type" "sseimul")
16537 (set_attr "prefix_data16" "1,*,*")
16538 (set_attr "prefix_extra" "1")
16539 (set_attr "prefix" "orig,maybe_evex,evex")
16540 (set_attr "mode" "<sseinsnmode>")])
16541
16542 (define_expand "smulhrsv4hi3"
16543 [(set (match_operand:V4HI 0 "register_operand")
16544 (truncate:V4HI
16545 (lshiftrt:V4SI
16546 (plus:V4SI
16547 (lshiftrt:V4SI
16548 (mult:V4SI
16549 (sign_extend:V4SI
16550 (match_operand:V4HI 1 "register_operand"))
16551 (sign_extend:V4SI
16552 (match_operand:V4HI 2 "register_operand")))
16553 (const_int 14))
16554 (match_dup 3))
16555 (const_int 1))))]
16556 "TARGET_MMX_WITH_SSE && TARGET_SSSE3"
16557 {
16558 operands[3] = CONST1_RTX(V4HImode);
16559 ix86_fixup_binary_operands_no_copy (MULT, V4HImode, operands);
16560 })
16561
16562 (define_expand "ssse3_pmulhrswv4hi3"
16563 [(set (match_operand:V4HI 0 "register_operand")
16564 (truncate:V4HI
16565 (lshiftrt:V4SI
16566 (plus:V4SI
16567 (lshiftrt:V4SI
16568 (mult:V4SI
16569 (sign_extend:V4SI
16570 (match_operand:V4HI 1 "register_mmxmem_operand"))
16571 (sign_extend:V4SI
16572 (match_operand:V4HI 2 "register_mmxmem_operand")))
16573 (const_int 14))
16574 (match_dup 3))
16575 (const_int 1))))]
16576 "(TARGET_MMX || TARGET_MMX_WITH_SSE) && TARGET_SSSE3"
16577 {
16578 operands[3] = CONST1_RTX(V4HImode);
16579 ix86_fixup_binary_operands_no_copy (MULT, V4HImode, operands);
16580 })
16581
16582 (define_insn "*ssse3_pmulhrswv4hi3"
16583 [(set (match_operand:V4HI 0 "register_operand" "=y,x,Yv")
16584 (truncate:V4HI
16585 (lshiftrt:V4SI
16586 (plus:V4SI
16587 (lshiftrt:V4SI
16588 (mult:V4SI
16589 (sign_extend:V4SI
16590 (match_operand:V4HI 1 "register_mmxmem_operand" "%0,0,Yv"))
16591 (sign_extend:V4SI
16592 (match_operand:V4HI 2 "register_mmxmem_operand" "ym,x,Yv")))
16593 (const_int 14))
16594 (match_operand:V4HI 3 "const1_operand"))
16595 (const_int 1))))]
16596 "(TARGET_MMX || TARGET_MMX_WITH_SSE)
16597 && TARGET_SSSE3
16598 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
16599 "@
16600 pmulhrsw\t{%2, %0|%0, %2}
16601 pmulhrsw\t{%2, %0|%0, %2}
16602 vpmulhrsw\t{%2, %1, %0|%0, %1, %2}"
16603 [(set_attr "isa" "*,noavx,avx")
16604 (set_attr "mmx_isa" "native,*,*")
16605 (set_attr "type" "sseimul")
16606 (set_attr "prefix_extra" "1")
16607 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
16608 (set_attr "mode" "DI,TI,TI")])
16609
16610 (define_insn "<ssse3_avx2>_pshufb<mode>3<mask_name>"
16611 [(set (match_operand:VI1_AVX512 0 "register_operand" "=x,x,v")
16612 (unspec:VI1_AVX512
16613 [(match_operand:VI1_AVX512 1 "register_operand" "0,x,v")
16614 (match_operand:VI1_AVX512 2 "vector_operand" "xBm,xm,vm")]
16615 UNSPEC_PSHUFB))]
16616 "TARGET_SSSE3 && <mask_mode512bit_condition> && <mask_avx512bw_condition>"
16617 "@
16618 pshufb\t{%2, %0|%0, %2}
16619 vpshufb\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}
16620 vpshufb\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
16621 [(set_attr "isa" "noavx,avx,avx512bw")
16622 (set_attr "type" "sselog1")
16623 (set_attr "prefix_data16" "1,*,*")
16624 (set_attr "prefix_extra" "1")
16625 (set_attr "prefix" "orig,maybe_evex,evex")
16626 (set_attr "btver2_decode" "vector")
16627 (set_attr "mode" "<sseinsnmode>")])
16628
16629 (define_insn_and_split "ssse3_pshufbv8qi3"
16630 [(set (match_operand:V8QI 0 "register_operand" "=y,x,Yv")
16631 (unspec:V8QI [(match_operand:V8QI 1 "register_operand" "0,0,Yv")
16632 (match_operand:V8QI 2 "register_mmxmem_operand" "ym,x,Yv")]
16633 UNSPEC_PSHUFB))
16634 (clobber (match_scratch:V4SI 3 "=X,&x,&Yv"))]
16635 "(TARGET_MMX || TARGET_MMX_WITH_SSE) && TARGET_SSSE3"
16636 "@
16637 pshufb\t{%2, %0|%0, %2}
16638 #
16639 #"
16640 "TARGET_SSSE3 && reload_completed
16641 && SSE_REGNO_P (REGNO (operands[0]))"
16642 [(set (match_dup 3) (match_dup 5))
16643 (set (match_dup 3)
16644 (and:V4SI (match_dup 3) (match_dup 2)))
16645 (set (match_dup 0)
16646 (unspec:V16QI [(match_dup 1) (match_dup 4)] UNSPEC_PSHUFB))]
16647 {
16648 /* Emulate MMX version of pshufb with SSE version by masking out the
16649 bit 3 of the shuffle control byte. */
16650 operands[0] = lowpart_subreg (V16QImode, operands[0],
16651 GET_MODE (operands[0]));
16652 operands[1] = lowpart_subreg (V16QImode, operands[1],
16653 GET_MODE (operands[1]));
16654 operands[2] = lowpart_subreg (V4SImode, operands[2],
16655 GET_MODE (operands[2]));
16656 operands[4] = lowpart_subreg (V16QImode, operands[3],
16657 GET_MODE (operands[3]));
16658 rtvec par = gen_rtvec (4, GEN_INT (0xf7f7f7f7),
16659 GEN_INT (0xf7f7f7f7),
16660 GEN_INT (0xf7f7f7f7),
16661 GEN_INT (0xf7f7f7f7));
16662 rtx vec_const = gen_rtx_CONST_VECTOR (V4SImode, par);
16663 operands[5] = force_const_mem (V4SImode, vec_const);
16664 }
16665 [(set_attr "mmx_isa" "native,sse_noavx,avx")
16666 (set_attr "prefix_extra" "1")
16667 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
16668 (set_attr "mode" "DI,TI,TI")])
16669
16670 (define_insn "<ssse3_avx2>_psign<mode>3"
16671 [(set (match_operand:VI124_AVX2 0 "register_operand" "=x,x")
16672 (unspec:VI124_AVX2
16673 [(match_operand:VI124_AVX2 1 "register_operand" "0,x")
16674 (match_operand:VI124_AVX2 2 "vector_operand" "xBm,xm")]
16675 UNSPEC_PSIGN))]
16676 "TARGET_SSSE3"
16677 "@
16678 psign<ssemodesuffix>\t{%2, %0|%0, %2}
16679 vpsign<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
16680 [(set_attr "isa" "noavx,avx")
16681 (set_attr "type" "sselog1")
16682 (set_attr "prefix_data16" "1,*")
16683 (set_attr "prefix_extra" "1")
16684 (set_attr "prefix" "orig,vex")
16685 (set_attr "mode" "<sseinsnmode>")])
16686
16687 (define_insn "ssse3_psign<mode>3"
16688 [(set (match_operand:MMXMODEI 0 "register_operand" "=y,x,Yv")
16689 (unspec:MMXMODEI
16690 [(match_operand:MMXMODEI 1 "register_operand" "0,0,Yv")
16691 (match_operand:MMXMODEI 2 "register_mmxmem_operand" "ym,x,Yv")]
16692 UNSPEC_PSIGN))]
16693 "(TARGET_MMX || TARGET_MMX_WITH_SSE) && TARGET_SSSE3"
16694 "@
16695 psign<mmxvecsize>\t{%2, %0|%0, %2}
16696 psign<mmxvecsize>\t{%2, %0|%0, %2}
16697 vpsign<mmxvecsize>\t{%2, %1, %0|%0, %1, %2}"
16698 [(set_attr "isa" "*,noavx,avx")
16699 (set_attr "mmx_isa" "native,*,*")
16700 (set_attr "type" "sselog1")
16701 (set_attr "prefix_extra" "1")
16702 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
16703 (set_attr "mode" "DI,TI,TI")])
16704
16705 (define_insn "<ssse3_avx2>_palignr<mode>_mask"
16706 [(set (match_operand:VI1_AVX512 0 "register_operand" "=v")
16707 (vec_merge:VI1_AVX512
16708 (unspec:VI1_AVX512
16709 [(match_operand:VI1_AVX512 1 "register_operand" "v")
16710 (match_operand:VI1_AVX512 2 "nonimmediate_operand" "vm")
16711 (match_operand:SI 3 "const_0_to_255_mul_8_operand" "n")]
16712 UNSPEC_PALIGNR)
16713 (match_operand:VI1_AVX512 4 "nonimm_or_0_operand" "0C")
16714 (match_operand:<avx512fmaskmode> 5 "register_operand" "Yk")))]
16715 "TARGET_AVX512BW && (<MODE_SIZE> == 64 || TARGET_AVX512VL)"
16716 {
16717 operands[3] = GEN_INT (INTVAL (operands[3]) / 8);
16718 return "vpalignr\t{%3, %2, %1, %0%{%5%}%N4|%0%{%5%}%N4, %1, %2, %3}";
16719 }
16720 [(set_attr "type" "sseishft")
16721 (set_attr "atom_unit" "sishuf")
16722 (set_attr "prefix_extra" "1")
16723 (set_attr "length_immediate" "1")
16724 (set_attr "prefix" "evex")
16725 (set_attr "mode" "<sseinsnmode>")])
16726
16727 (define_insn "<ssse3_avx2>_palignr<mode>"
16728 [(set (match_operand:SSESCALARMODE 0 "register_operand" "=x,x,v")
16729 (unspec:SSESCALARMODE
16730 [(match_operand:SSESCALARMODE 1 "register_operand" "0,x,v")
16731 (match_operand:SSESCALARMODE 2 "vector_operand" "xBm,xm,vm")
16732 (match_operand:SI 3 "const_0_to_255_mul_8_operand" "n,n,n")]
16733 UNSPEC_PALIGNR))]
16734 "TARGET_SSSE3"
16735 {
16736 operands[3] = GEN_INT (INTVAL (operands[3]) / 8);
16737
16738 switch (which_alternative)
16739 {
16740 case 0:
16741 return "palignr\t{%3, %2, %0|%0, %2, %3}";
16742 case 1:
16743 case 2:
16744 return "vpalignr\t{%3, %2, %1, %0|%0, %1, %2, %3}";
16745 default:
16746 gcc_unreachable ();
16747 }
16748 }
16749 [(set_attr "isa" "noavx,avx,avx512bw")
16750 (set_attr "type" "sseishft")
16751 (set_attr "atom_unit" "sishuf")
16752 (set_attr "prefix_data16" "1,*,*")
16753 (set_attr "prefix_extra" "1")
16754 (set_attr "length_immediate" "1")
16755 (set_attr "prefix" "orig,vex,evex")
16756 (set_attr "mode" "<sseinsnmode>")])
16757
16758 (define_insn_and_split "ssse3_palignrdi"
16759 [(set (match_operand:DI 0 "register_operand" "=y,x,Yv")
16760 (unspec:DI [(match_operand:DI 1 "register_operand" "0,0,Yv")
16761 (match_operand:DI 2 "register_mmxmem_operand" "ym,x,Yv")
16762 (match_operand:SI 3 "const_0_to_255_mul_8_operand" "n,n,n")]
16763 UNSPEC_PALIGNR))]
16764 "(TARGET_MMX || TARGET_MMX_WITH_SSE) && TARGET_SSSE3"
16765 {
16766 switch (which_alternative)
16767 {
16768 case 0:
16769 operands[3] = GEN_INT (INTVAL (operands[3]) / 8);
16770 return "palignr\t{%3, %2, %0|%0, %2, %3}";
16771 case 1:
16772 case 2:
16773 return "#";
16774 default:
16775 gcc_unreachable ();
16776 }
16777 }
16778 "TARGET_SSSE3 && reload_completed
16779 && SSE_REGNO_P (REGNO (operands[0]))"
16780 [(set (match_dup 0)
16781 (lshiftrt:V1TI (match_dup 0) (match_dup 3)))]
16782 {
16783 /* Emulate MMX palignrdi with SSE psrldq. */
16784 rtx op0 = lowpart_subreg (V2DImode, operands[0],
16785 GET_MODE (operands[0]));
16786 if (TARGET_AVX)
16787 emit_insn (gen_vec_concatv2di (op0, operands[2], operands[1]));
16788 else
16789 {
16790 /* NB: SSE can only concatenate OP0 and OP1 to OP0. */
16791 emit_insn (gen_vec_concatv2di (op0, operands[1], operands[2]));
16792 /* Swap bits 0:63 with bits 64:127. */
16793 rtx mask = gen_rtx_PARALLEL (VOIDmode,
16794 gen_rtvec (4, GEN_INT (2),
16795 GEN_INT (3),
16796 GEN_INT (0),
16797 GEN_INT (1)));
16798 rtx op1 = lowpart_subreg (V4SImode, op0, GET_MODE (op0));
16799 rtx op2 = gen_rtx_VEC_SELECT (V4SImode, op1, mask);
16800 emit_insn (gen_rtx_SET (op1, op2));
16801 }
16802 operands[0] = lowpart_subreg (V1TImode, op0, GET_MODE (op0));
16803 }
16804 [(set_attr "mmx_isa" "native,sse_noavx,avx")
16805 (set_attr "type" "sseishft")
16806 (set_attr "atom_unit" "sishuf")
16807 (set_attr "prefix_extra" "1")
16808 (set_attr "length_immediate" "1")
16809 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
16810 (set_attr "mode" "DI,TI,TI")])
16811
16812 ;; Mode iterator to handle singularity w/ absence of V2DI and V4DI
16813 ;; modes for abs instruction on pre AVX-512 targets.
16814 (define_mode_iterator VI1248_AVX512VL_AVX512BW
16815 [(V64QI "TARGET_AVX512BW") (V32QI "TARGET_AVX2") V16QI
16816 (V32HI "TARGET_AVX512BW") (V16HI "TARGET_AVX2") V8HI
16817 (V16SI "TARGET_AVX512F") (V8SI "TARGET_AVX2") V4SI
16818 (V8DI "TARGET_AVX512F") (V4DI "TARGET_AVX512VL") (V2DI "TARGET_AVX512VL")])
16819
16820 (define_insn "*abs<mode>2"
16821 [(set (match_operand:VI1248_AVX512VL_AVX512BW 0 "register_operand" "=v")
16822 (abs:VI1248_AVX512VL_AVX512BW
16823 (match_operand:VI1248_AVX512VL_AVX512BW 1 "vector_operand" "vBm")))]
16824 "TARGET_SSSE3"
16825 "%vpabs<ssemodesuffix>\t{%1, %0|%0, %1}"
16826 [(set_attr "type" "sselog1")
16827 (set_attr "prefix_data16" "1")
16828 (set_attr "prefix_extra" "1")
16829 (set_attr "prefix" "maybe_vex")
16830 (set_attr "mode" "<sseinsnmode>")])
16831
16832 (define_insn "abs<mode>2_mask"
16833 [(set (match_operand:VI48_AVX512VL 0 "register_operand" "=v")
16834 (vec_merge:VI48_AVX512VL
16835 (abs:VI48_AVX512VL
16836 (match_operand:VI48_AVX512VL 1 "nonimmediate_operand" "vm"))
16837 (match_operand:VI48_AVX512VL 2 "nonimm_or_0_operand" "0C")
16838 (match_operand:<avx512fmaskmode> 3 "register_operand" "Yk")))]
16839 "TARGET_AVX512F"
16840 "vpabs<ssemodesuffix>\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}"
16841 [(set_attr "type" "sselog1")
16842 (set_attr "prefix" "evex")
16843 (set_attr "mode" "<sseinsnmode>")])
16844
16845 (define_insn "abs<mode>2_mask"
16846 [(set (match_operand:VI12_AVX512VL 0 "register_operand" "=v")
16847 (vec_merge:VI12_AVX512VL
16848 (abs:VI12_AVX512VL
16849 (match_operand:VI12_AVX512VL 1 "nonimmediate_operand" "vm"))
16850 (match_operand:VI12_AVX512VL 2 "nonimm_or_0_operand" "0C")
16851 (match_operand:<avx512fmaskmode> 3 "register_operand" "Yk")))]
16852 "TARGET_AVX512BW"
16853 "vpabs<ssemodesuffix>\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}"
16854 [(set_attr "type" "sselog1")
16855 (set_attr "prefix" "evex")
16856 (set_attr "mode" "<sseinsnmode>")])
16857
16858 (define_expand "abs<mode>2"
16859 [(set (match_operand:VI_AVX2 0 "register_operand")
16860 (abs:VI_AVX2
16861 (match_operand:VI_AVX2 1 "vector_operand")))]
16862 "TARGET_SSE2"
16863 {
16864 if (!TARGET_SSSE3
16865 || ((<MODE>mode == V2DImode || <MODE>mode == V4DImode)
16866 && !TARGET_AVX512VL))
16867 {
16868 ix86_expand_sse2_abs (operands[0], operands[1]);
16869 DONE;
16870 }
16871 })
16872
16873 (define_insn "ssse3_abs<mode>2"
16874 [(set (match_operand:MMXMODEI 0 "register_operand" "=y,Yv")
16875 (abs:MMXMODEI
16876 (match_operand:MMXMODEI 1 "register_mmxmem_operand" "ym,Yv")))]
16877 "(TARGET_MMX || TARGET_MMX_WITH_SSE) && TARGET_SSSE3"
16878 "@
16879 pabs<mmxvecsize>\t{%1, %0|%0, %1}
16880 %vpabs<mmxvecsize>\t{%1, %0|%0, %1}"
16881 [(set_attr "mmx_isa" "native,*")
16882 (set_attr "type" "sselog1")
16883 (set_attr "prefix_rep" "0")
16884 (set_attr "prefix_extra" "1")
16885 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
16886 (set_attr "mode" "DI,TI")])
16887
16888 (define_insn "abs<mode>2"
16889 [(set (match_operand:MMXMODEI 0 "register_operand")
16890 (abs:MMXMODEI
16891 (match_operand:MMXMODEI 1 "register_operand")))]
16892 "TARGET_MMX_WITH_SSE && TARGET_SSSE3")
16893
16894 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
16895 ;;
16896 ;; AMD SSE4A instructions
16897 ;;
16898 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
16899
16900 (define_insn "sse4a_movnt<mode>"
16901 [(set (match_operand:MODEF 0 "memory_operand" "=m")
16902 (unspec:MODEF
16903 [(match_operand:MODEF 1 "register_operand" "x")]
16904 UNSPEC_MOVNT))]
16905 "TARGET_SSE4A"
16906 "movnt<ssemodesuffix>\t{%1, %0|%0, %1}"
16907 [(set_attr "type" "ssemov")
16908 (set_attr "mode" "<MODE>")])
16909
16910 (define_insn "sse4a_vmmovnt<mode>"
16911 [(set (match_operand:<ssescalarmode> 0 "memory_operand" "=m")
16912 (unspec:<ssescalarmode>
16913 [(vec_select:<ssescalarmode>
16914 (match_operand:VF_128 1 "register_operand" "x")
16915 (parallel [(const_int 0)]))]
16916 UNSPEC_MOVNT))]
16917 "TARGET_SSE4A"
16918 "movnt<ssescalarmodesuffix>\t{%1, %0|%0, %1}"
16919 [(set_attr "type" "ssemov")
16920 (set_attr "mode" "<ssescalarmode>")])
16921
16922 (define_insn "sse4a_extrqi"
16923 [(set (match_operand:V2DI 0 "register_operand" "=x")
16924 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
16925 (match_operand 2 "const_0_to_255_operand")
16926 (match_operand 3 "const_0_to_255_operand")]
16927 UNSPEC_EXTRQI))]
16928 "TARGET_SSE4A"
16929 "extrq\t{%3, %2, %0|%0, %2, %3}"
16930 [(set_attr "type" "sse")
16931 (set_attr "prefix_data16" "1")
16932 (set_attr "length_immediate" "2")
16933 (set_attr "mode" "TI")])
16934
16935 (define_insn "sse4a_extrq"
16936 [(set (match_operand:V2DI 0 "register_operand" "=x")
16937 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
16938 (match_operand:V16QI 2 "register_operand" "x")]
16939 UNSPEC_EXTRQ))]
16940 "TARGET_SSE4A"
16941 "extrq\t{%2, %0|%0, %2}"
16942 [(set_attr "type" "sse")
16943 (set_attr "prefix_data16" "1")
16944 (set_attr "mode" "TI")])
16945
16946 (define_insn "sse4a_insertqi"
16947 [(set (match_operand:V2DI 0 "register_operand" "=x")
16948 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
16949 (match_operand:V2DI 2 "register_operand" "x")
16950 (match_operand 3 "const_0_to_255_operand")
16951 (match_operand 4 "const_0_to_255_operand")]
16952 UNSPEC_INSERTQI))]
16953 "TARGET_SSE4A"
16954 "insertq\t{%4, %3, %2, %0|%0, %2, %3, %4}"
16955 [(set_attr "type" "sseins")
16956 (set_attr "prefix_data16" "0")
16957 (set_attr "prefix_rep" "1")
16958 (set_attr "length_immediate" "2")
16959 (set_attr "mode" "TI")])
16960
16961 (define_insn "sse4a_insertq"
16962 [(set (match_operand:V2DI 0 "register_operand" "=x")
16963 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
16964 (match_operand:V2DI 2 "register_operand" "x")]
16965 UNSPEC_INSERTQ))]
16966 "TARGET_SSE4A"
16967 "insertq\t{%2, %0|%0, %2}"
16968 [(set_attr "type" "sseins")
16969 (set_attr "prefix_data16" "0")
16970 (set_attr "prefix_rep" "1")
16971 (set_attr "mode" "TI")])
16972
16973 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
16974 ;;
16975 ;; Intel SSE4.1 instructions
16976 ;;
16977 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
16978
16979 ;; Mapping of immediate bits for blend instructions
16980 (define_mode_attr blendbits
16981 [(V8SF "255") (V4SF "15") (V4DF "15") (V2DF "3")])
16982
16983 (define_insn "<sse4_1>_blend<ssemodesuffix><avxsizesuffix>"
16984 [(set (match_operand:VF_128_256 0 "register_operand" "=Yr,*x,x")
16985 (vec_merge:VF_128_256
16986 (match_operand:VF_128_256 2 "vector_operand" "YrBm,*xBm,xm")
16987 (match_operand:VF_128_256 1 "register_operand" "0,0,x")
16988 (match_operand:SI 3 "const_0_to_<blendbits>_operand")))]
16989 "TARGET_SSE4_1"
16990 "@
16991 blend<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
16992 blend<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
16993 vblend<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
16994 [(set_attr "isa" "noavx,noavx,avx")
16995 (set_attr "type" "ssemov")
16996 (set_attr "length_immediate" "1")
16997 (set_attr "prefix_data16" "1,1,*")
16998 (set_attr "prefix_extra" "1")
16999 (set_attr "prefix" "orig,orig,vex")
17000 (set_attr "mode" "<MODE>")])
17001
17002 (define_insn "<sse4_1>_blendv<ssemodesuffix><avxsizesuffix>"
17003 [(set (match_operand:VF_128_256 0 "register_operand" "=Yr,*x,x")
17004 (unspec:VF_128_256
17005 [(match_operand:VF_128_256 1 "register_operand" "0,0,x")
17006 (match_operand:VF_128_256 2 "vector_operand" "YrBm,*xBm,xm")
17007 (match_operand:VF_128_256 3 "register_operand" "Yz,Yz,x")]
17008 UNSPEC_BLENDV))]
17009 "TARGET_SSE4_1"
17010 "@
17011 blendv<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
17012 blendv<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
17013 vblendv<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
17014 [(set_attr "isa" "noavx,noavx,avx")
17015 (set_attr "type" "ssemov")
17016 (set_attr "length_immediate" "1")
17017 (set_attr "prefix_data16" "1,1,*")
17018 (set_attr "prefix_extra" "1")
17019 (set_attr "prefix" "orig,orig,vex")
17020 (set_attr "btver2_decode" "vector,vector,vector")
17021 (set_attr "mode" "<MODE>")])
17022
17023 ;; Also define scalar versions. These are used for conditional move.
17024 ;; Using subregs into vector modes causes register allocation lossage.
17025 ;; These patterns do not allow memory operands because the native
17026 ;; instructions read the full 128-bits.
17027
17028 (define_insn "sse4_1_blendv<ssemodesuffix>"
17029 [(set (match_operand:MODEF 0 "register_operand" "=Yr,*x,x")
17030 (unspec:MODEF
17031 [(match_operand:MODEF 1 "register_operand" "0,0,x")
17032 (match_operand:MODEF 2 "register_operand" "Yr,*x,x")
17033 (match_operand:MODEF 3 "register_operand" "Yz,Yz,x")]
17034 UNSPEC_BLENDV))]
17035 "TARGET_SSE4_1"
17036 {
17037 if (get_attr_mode (insn) == MODE_V4SF)
17038 return (which_alternative == 2
17039 ? "vblendvps\t{%3, %2, %1, %0|%0, %1, %2, %3}"
17040 : "blendvps\t{%3, %2, %0|%0, %2, %3}");
17041 else
17042 return (which_alternative == 2
17043 ? "vblendv<ssevecmodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
17044 : "blendv<ssevecmodesuffix>\t{%3, %2, %0|%0, %2, %3}");
17045 }
17046 [(set_attr "isa" "noavx,noavx,avx")
17047 (set_attr "type" "ssemov")
17048 (set_attr "length_immediate" "1")
17049 (set_attr "prefix_data16" "1,1,*")
17050 (set_attr "prefix_extra" "1")
17051 (set_attr "prefix" "orig,orig,vex")
17052 (set_attr "btver2_decode" "vector,vector,vector")
17053 (set (attr "mode")
17054 (cond [(match_test "TARGET_AVX")
17055 (const_string "<ssevecmode>")
17056 (match_test "optimize_function_for_size_p (cfun)")
17057 (const_string "V4SF")
17058 (match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL")
17059 (const_string "V4SF")
17060 ]
17061 (const_string "<ssevecmode>")))])
17062
17063 (define_insn_and_split "*<sse4_1>_blendv<ssemodesuffix><avxsizesuffix>_lt"
17064 [(set (match_operand:VF_128_256 0 "register_operand" "=Yr,*x,x")
17065 (unspec:VF_128_256
17066 [(match_operand:VF_128_256 1 "register_operand" "0,0,x")
17067 (match_operand:VF_128_256 2 "vector_operand" "YrBm,*xBm,xm")
17068 (lt:VF_128_256
17069 (match_operand:<sseintvecmode> 3 "register_operand" "Yz,Yz,x")
17070 (match_operand:<sseintvecmode> 4 "const0_operand" "C,C,C"))]
17071 UNSPEC_BLENDV))]
17072 "TARGET_SSE4_1"
17073 "#"
17074 "&& reload_completed"
17075 [(set (match_dup 0)
17076 (unspec:VF_128_256
17077 [(match_dup 1) (match_dup 2) (match_dup 3)] UNSPEC_BLENDV))]
17078 "operands[3] = gen_lowpart (<MODE>mode, operands[3]);"
17079 [(set_attr "isa" "noavx,noavx,avx")
17080 (set_attr "type" "ssemov")
17081 (set_attr "length_immediate" "1")
17082 (set_attr "prefix_data16" "1,1,*")
17083 (set_attr "prefix_extra" "1")
17084 (set_attr "prefix" "orig,orig,vex")
17085 (set_attr "btver2_decode" "vector,vector,vector")
17086 (set_attr "mode" "<MODE>")])
17087
17088 (define_mode_attr ssefltmodesuffix
17089 [(V2DI "pd") (V4DI "pd") (V4SI "ps") (V8SI "ps")])
17090
17091 (define_mode_attr ssefltvecmode
17092 [(V2DI "V2DF") (V4DI "V4DF") (V4SI "V4SF") (V8SI "V8SF")])
17093
17094 (define_insn_and_split "*<sse4_1>_blendv<ssefltmodesuffix><avxsizesuffix>_ltint"
17095 [(set (match_operand:<ssebytemode> 0 "register_operand" "=Yr,*x,x")
17096 (unspec:<ssebytemode>
17097 [(match_operand:<ssebytemode> 1 "register_operand" "0,0,x")
17098 (match_operand:<ssebytemode> 2 "vector_operand" "YrBm,*xBm,xm")
17099 (subreg:<ssebytemode>
17100 (lt:VI48_AVX
17101 (match_operand:VI48_AVX 3 "register_operand" "Yz,Yz,x")
17102 (match_operand:VI48_AVX 4 "const0_operand" "C,C,C")) 0)]
17103 UNSPEC_BLENDV))]
17104 "TARGET_SSE4_1"
17105 "#"
17106 "&& reload_completed"
17107 [(set (match_dup 0)
17108 (unspec:<ssefltvecmode>
17109 [(match_dup 1) (match_dup 2) (match_dup 3)] UNSPEC_BLENDV))]
17110 {
17111 operands[0] = gen_lowpart (<ssefltvecmode>mode, operands[0]);
17112 operands[1] = gen_lowpart (<ssefltvecmode>mode, operands[1]);
17113 operands[2] = gen_lowpart (<ssefltvecmode>mode, operands[2]);
17114 operands[3] = gen_lowpart (<ssefltvecmode>mode, operands[3]);
17115 }
17116 [(set_attr "isa" "noavx,noavx,avx")
17117 (set_attr "type" "ssemov")
17118 (set_attr "length_immediate" "1")
17119 (set_attr "prefix_data16" "1,1,*")
17120 (set_attr "prefix_extra" "1")
17121 (set_attr "prefix" "orig,orig,vex")
17122 (set_attr "btver2_decode" "vector,vector,vector")
17123 (set_attr "mode" "<ssefltvecmode>")])
17124
17125 (define_insn "<sse4_1>_dp<ssemodesuffix><avxsizesuffix>"
17126 [(set (match_operand:VF_128_256 0 "register_operand" "=Yr,*x,x")
17127 (unspec:VF_128_256
17128 [(match_operand:VF_128_256 1 "vector_operand" "%0,0,x")
17129 (match_operand:VF_128_256 2 "vector_operand" "YrBm,*xBm,xm")
17130 (match_operand:SI 3 "const_0_to_255_operand" "n,n,n")]
17131 UNSPEC_DP))]
17132 "TARGET_SSE4_1"
17133 "@
17134 dp<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
17135 dp<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
17136 vdp<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
17137 [(set_attr "isa" "noavx,noavx,avx")
17138 (set_attr "type" "ssemul")
17139 (set_attr "length_immediate" "1")
17140 (set_attr "prefix_data16" "1,1,*")
17141 (set_attr "prefix_extra" "1")
17142 (set_attr "prefix" "orig,orig,vex")
17143 (set_attr "btver2_decode" "vector,vector,vector")
17144 (set_attr "znver1_decode" "vector,vector,vector")
17145 (set_attr "mode" "<MODE>")])
17146
17147 ;; Mode attribute used by `vmovntdqa' pattern
17148 (define_mode_attr vi8_sse4_1_avx2_avx512
17149 [(V2DI "sse4_1") (V4DI "avx2") (V8DI "avx512f")])
17150
17151 (define_insn "<vi8_sse4_1_avx2_avx512>_movntdqa"
17152 [(set (match_operand:VI8_AVX2_AVX512F 0 "register_operand" "=Yr,*x,v")
17153 (unspec:VI8_AVX2_AVX512F [(match_operand:VI8_AVX2_AVX512F 1 "memory_operand" "m,m,m")]
17154 UNSPEC_MOVNTDQA))]
17155 "TARGET_SSE4_1"
17156 "%vmovntdqa\t{%1, %0|%0, %1}"
17157 [(set_attr "isa" "noavx,noavx,avx")
17158 (set_attr "type" "ssemov")
17159 (set_attr "prefix_extra" "1,1,*")
17160 (set_attr "prefix" "orig,orig,maybe_evex")
17161 (set_attr "mode" "<sseinsnmode>")])
17162
17163 (define_insn "<sse4_1_avx2>_mpsadbw"
17164 [(set (match_operand:VI1_AVX2 0 "register_operand" "=Yr,*x,x")
17165 (unspec:VI1_AVX2
17166 [(match_operand:VI1_AVX2 1 "register_operand" "0,0,x")
17167 (match_operand:VI1_AVX2 2 "vector_operand" "YrBm,*xBm,xm")
17168 (match_operand:SI 3 "const_0_to_255_operand" "n,n,n")]
17169 UNSPEC_MPSADBW))]
17170 "TARGET_SSE4_1"
17171 "@
17172 mpsadbw\t{%3, %2, %0|%0, %2, %3}
17173 mpsadbw\t{%3, %2, %0|%0, %2, %3}
17174 vmpsadbw\t{%3, %2, %1, %0|%0, %1, %2, %3}"
17175 [(set_attr "isa" "noavx,noavx,avx")
17176 (set_attr "type" "sselog1")
17177 (set_attr "length_immediate" "1")
17178 (set_attr "prefix_extra" "1")
17179 (set_attr "prefix" "orig,orig,vex")
17180 (set_attr "btver2_decode" "vector,vector,vector")
17181 (set_attr "znver1_decode" "vector,vector,vector")
17182 (set_attr "mode" "<sseinsnmode>")])
17183
17184 (define_insn "<sse4_1_avx2>_packusdw<mask_name>"
17185 [(set (match_operand:VI2_AVX2 0 "register_operand" "=Yr,*x,x,v")
17186 (vec_concat:VI2_AVX2
17187 (us_truncate:<ssehalfvecmode>
17188 (match_operand:<sseunpackmode> 1 "register_operand" "0,0,x,v"))
17189 (us_truncate:<ssehalfvecmode>
17190 (match_operand:<sseunpackmode> 2 "vector_operand" "YrBm,*xBm,xm,vm"))))]
17191 "TARGET_SSE4_1 && <mask_mode512bit_condition> && <mask_avx512bw_condition>"
17192 "@
17193 packusdw\t{%2, %0|%0, %2}
17194 packusdw\t{%2, %0|%0, %2}
17195 vpackusdw\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}
17196 vpackusdw\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
17197 [(set_attr "isa" "noavx,noavx,avx,avx512bw")
17198 (set_attr "type" "sselog")
17199 (set_attr "prefix_extra" "1")
17200 (set_attr "prefix" "orig,orig,<mask_prefix>,evex")
17201 (set_attr "mode" "<sseinsnmode>")])
17202
17203 (define_insn "<sse4_1_avx2>_pblendvb"
17204 [(set (match_operand:VI1_AVX2 0 "register_operand" "=Yr,*x,x")
17205 (unspec:VI1_AVX2
17206 [(match_operand:VI1_AVX2 1 "register_operand" "0,0,x")
17207 (match_operand:VI1_AVX2 2 "vector_operand" "YrBm,*xBm,xm")
17208 (match_operand:VI1_AVX2 3 "register_operand" "Yz,Yz,x")]
17209 UNSPEC_BLENDV))]
17210 "TARGET_SSE4_1"
17211 "@
17212 pblendvb\t{%3, %2, %0|%0, %2, %3}
17213 pblendvb\t{%3, %2, %0|%0, %2, %3}
17214 vpblendvb\t{%3, %2, %1, %0|%0, %1, %2, %3}"
17215 [(set_attr "isa" "noavx,noavx,avx")
17216 (set_attr "type" "ssemov")
17217 (set_attr "prefix_extra" "1")
17218 (set_attr "length_immediate" "*,*,1")
17219 (set_attr "prefix" "orig,orig,vex")
17220 (set_attr "btver2_decode" "vector,vector,vector")
17221 (set_attr "mode" "<sseinsnmode>")])
17222
17223 (define_insn_and_split "*<sse4_1_avx2>_pblendvb_lt"
17224 [(set (match_operand:VI1_AVX2 0 "register_operand" "=Yr,*x,x")
17225 (unspec:VI1_AVX2
17226 [(match_operand:VI1_AVX2 1 "register_operand" "0,0,x")
17227 (match_operand:VI1_AVX2 2 "vector_operand" "YrBm,*xBm,xm")
17228 (lt:VI1_AVX2 (match_operand:VI1_AVX2 3 "register_operand" "Yz,Yz,x")
17229 (match_operand:VI1_AVX2 4 "const0_operand" "C,C,C"))]
17230 UNSPEC_BLENDV))]
17231 "TARGET_SSE4_1"
17232 "#"
17233 ""
17234 [(set (match_dup 0)
17235 (unspec:VI1_AVX2
17236 [(match_dup 1) (match_dup 2) (match_dup 3)] UNSPEC_BLENDV))]
17237 ""
17238 [(set_attr "isa" "noavx,noavx,avx")
17239 (set_attr "type" "ssemov")
17240 (set_attr "prefix_extra" "1")
17241 (set_attr "length_immediate" "*,*,1")
17242 (set_attr "prefix" "orig,orig,vex")
17243 (set_attr "btver2_decode" "vector,vector,vector")
17244 (set_attr "mode" "<sseinsnmode>")])
17245
17246 (define_insn "sse4_1_pblendw"
17247 [(set (match_operand:V8HI 0 "register_operand" "=Yr,*x,x")
17248 (vec_merge:V8HI
17249 (match_operand:V8HI 2 "vector_operand" "YrBm,*xBm,xm")
17250 (match_operand:V8HI 1 "register_operand" "0,0,x")
17251 (match_operand:SI 3 "const_0_to_255_operand" "n,n,n")))]
17252 "TARGET_SSE4_1"
17253 "@
17254 pblendw\t{%3, %2, %0|%0, %2, %3}
17255 pblendw\t{%3, %2, %0|%0, %2, %3}
17256 vpblendw\t{%3, %2, %1, %0|%0, %1, %2, %3}"
17257 [(set_attr "isa" "noavx,noavx,avx")
17258 (set_attr "type" "ssemov")
17259 (set_attr "prefix_extra" "1")
17260 (set_attr "length_immediate" "1")
17261 (set_attr "prefix" "orig,orig,vex")
17262 (set_attr "mode" "TI")])
17263
17264 ;; The builtin uses an 8-bit immediate. Expand that.
17265 (define_expand "avx2_pblendw"
17266 [(set (match_operand:V16HI 0 "register_operand")
17267 (vec_merge:V16HI
17268 (match_operand:V16HI 2 "nonimmediate_operand")
17269 (match_operand:V16HI 1 "register_operand")
17270 (match_operand:SI 3 "const_0_to_255_operand")))]
17271 "TARGET_AVX2"
17272 {
17273 HOST_WIDE_INT val = INTVAL (operands[3]) & 0xff;
17274 operands[3] = GEN_INT (val << 8 | val);
17275 })
17276
17277 (define_insn "*avx2_pblendw"
17278 [(set (match_operand:V16HI 0 "register_operand" "=x")
17279 (vec_merge:V16HI
17280 (match_operand:V16HI 2 "nonimmediate_operand" "xm")
17281 (match_operand:V16HI 1 "register_operand" "x")
17282 (match_operand:SI 3 "avx2_pblendw_operand" "n")))]
17283 "TARGET_AVX2"
17284 {
17285 operands[3] = GEN_INT (INTVAL (operands[3]) & 0xff);
17286 return "vpblendw\t{%3, %2, %1, %0|%0, %1, %2, %3}";
17287 }
17288 [(set_attr "type" "ssemov")
17289 (set_attr "prefix_extra" "1")
17290 (set_attr "length_immediate" "1")
17291 (set_attr "prefix" "vex")
17292 (set_attr "mode" "OI")])
17293
17294 (define_insn "avx2_pblendd<mode>"
17295 [(set (match_operand:VI4_AVX2 0 "register_operand" "=x")
17296 (vec_merge:VI4_AVX2
17297 (match_operand:VI4_AVX2 2 "nonimmediate_operand" "xm")
17298 (match_operand:VI4_AVX2 1 "register_operand" "x")
17299 (match_operand:SI 3 "const_0_to_255_operand" "n")))]
17300 "TARGET_AVX2"
17301 "vpblendd\t{%3, %2, %1, %0|%0, %1, %2, %3}"
17302 [(set_attr "type" "ssemov")
17303 (set_attr "prefix_extra" "1")
17304 (set_attr "length_immediate" "1")
17305 (set_attr "prefix" "vex")
17306 (set_attr "mode" "<sseinsnmode>")])
17307
17308 (define_insn "sse4_1_phminposuw"
17309 [(set (match_operand:V8HI 0 "register_operand" "=Yr,*x,x")
17310 (unspec:V8HI [(match_operand:V8HI 1 "vector_operand" "YrBm,*xBm,xm")]
17311 UNSPEC_PHMINPOSUW))]
17312 "TARGET_SSE4_1"
17313 "%vphminposuw\t{%1, %0|%0, %1}"
17314 [(set_attr "isa" "noavx,noavx,avx")
17315 (set_attr "type" "sselog1")
17316 (set_attr "prefix_extra" "1")
17317 (set_attr "prefix" "orig,orig,vex")
17318 (set_attr "mode" "TI")])
17319
17320 (define_insn "avx2_<code>v16qiv16hi2<mask_name>"
17321 [(set (match_operand:V16HI 0 "register_operand" "=v")
17322 (any_extend:V16HI
17323 (match_operand:V16QI 1 "nonimmediate_operand" "vm")))]
17324 "TARGET_AVX2 && <mask_avx512bw_condition> && <mask_avx512vl_condition>"
17325 "vpmov<extsuffix>bw\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
17326 [(set_attr "type" "ssemov")
17327 (set_attr "prefix_extra" "1")
17328 (set_attr "prefix" "maybe_evex")
17329 (set_attr "mode" "OI")])
17330
17331 (define_expand "<code>v16qiv16hi2"
17332 [(set (match_operand:V16HI 0 "register_operand")
17333 (any_extend:V16HI
17334 (match_operand:V16QI 1 "nonimmediate_operand")))]
17335 "TARGET_AVX2")
17336
17337 (define_insn "avx512bw_<code>v32qiv32hi2<mask_name>"
17338 [(set (match_operand:V32HI 0 "register_operand" "=v")
17339 (any_extend:V32HI
17340 (match_operand:V32QI 1 "nonimmediate_operand" "vm")))]
17341 "TARGET_AVX512BW"
17342 "vpmov<extsuffix>bw\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
17343 [(set_attr "type" "ssemov")
17344 (set_attr "prefix_extra" "1")
17345 (set_attr "prefix" "evex")
17346 (set_attr "mode" "XI")])
17347
17348 (define_expand "<code>v32qiv32hi2"
17349 [(set (match_operand:V32HI 0 "register_operand")
17350 (any_extend:V32HI
17351 (match_operand:V32QI 1 "nonimmediate_operand")))]
17352 "TARGET_AVX512BW")
17353
17354 (define_insn "sse4_1_<code>v8qiv8hi2<mask_name>"
17355 [(set (match_operand:V8HI 0 "register_operand" "=Yr,*x,v")
17356 (any_extend:V8HI
17357 (vec_select:V8QI
17358 (match_operand:V16QI 1 "register_operand" "Yr,*x,v")
17359 (parallel [(const_int 0) (const_int 1)
17360 (const_int 2) (const_int 3)
17361 (const_int 4) (const_int 5)
17362 (const_int 6) (const_int 7)]))))]
17363 "TARGET_SSE4_1 && <mask_avx512bw_condition> && <mask_avx512vl_condition>"
17364 "%vpmov<extsuffix>bw\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
17365 [(set_attr "isa" "noavx,noavx,avx")
17366 (set_attr "type" "ssemov")
17367 (set_attr "prefix_extra" "1")
17368 (set_attr "prefix" "orig,orig,maybe_evex")
17369 (set_attr "mode" "TI")])
17370
17371 (define_insn "*sse4_1_<code>v8qiv8hi2<mask_name>_1"
17372 [(set (match_operand:V8HI 0 "register_operand" "=Yr,*x,v")
17373 (any_extend:V8HI
17374 (match_operand:V8QI 1 "memory_operand" "m,m,m")))]
17375 "TARGET_SSE4_1 && <mask_avx512bw_condition> && <mask_avx512vl_condition>"
17376 "%vpmov<extsuffix>bw\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
17377 [(set_attr "isa" "noavx,noavx,avx")
17378 (set_attr "type" "ssemov")
17379 (set_attr "prefix_extra" "1")
17380 (set_attr "prefix" "orig,orig,maybe_evex")
17381 (set_attr "mode" "TI")])
17382
17383 (define_insn_and_split "*sse4_1_<code>v8qiv8hi2<mask_name>_2"
17384 [(set (match_operand:V8HI 0 "register_operand")
17385 (any_extend:V8HI
17386 (vec_select:V8QI
17387 (subreg:V16QI
17388 (vec_concat:V2DI
17389 (match_operand:DI 1 "memory_operand")
17390 (const_int 0)) 0)
17391 (parallel [(const_int 0) (const_int 1)
17392 (const_int 2) (const_int 3)
17393 (const_int 4) (const_int 5)
17394 (const_int 6) (const_int 7)]))))]
17395 "TARGET_SSE4_1 && <mask_avx512bw_condition> && <mask_avx512vl_condition>
17396 && ix86_pre_reload_split ()"
17397 "#"
17398 "&& 1"
17399 [(set (match_dup 0)
17400 (any_extend:V8HI (match_dup 1)))]
17401 "operands[1] = adjust_address_nv (operands[1], V8QImode, 0);")
17402
17403 (define_expand "<code>v8qiv8hi2"
17404 [(set (match_operand:V8HI 0 "register_operand")
17405 (any_extend:V8HI
17406 (match_operand:V8QI 1 "nonimmediate_operand")))]
17407 "TARGET_SSE4_1"
17408 {
17409 if (!MEM_P (operands[1]))
17410 {
17411 operands[1] = simplify_gen_subreg (V16QImode, operands[1], V8QImode, 0);
17412 emit_insn (gen_sse4_1_<code>v8qiv8hi2 (operands[0], operands[1]));
17413 DONE;
17414 }
17415 })
17416
17417 (define_insn "<mask_codefor>avx512f_<code>v16qiv16si2<mask_name>"
17418 [(set (match_operand:V16SI 0 "register_operand" "=v")
17419 (any_extend:V16SI
17420 (match_operand:V16QI 1 "nonimmediate_operand" "vm")))]
17421 "TARGET_AVX512F"
17422 "vpmov<extsuffix>bd\t{%1, %0<mask_operand2>|%0<mask_operand2>, %q1}"
17423 [(set_attr "type" "ssemov")
17424 (set_attr "prefix" "evex")
17425 (set_attr "mode" "XI")])
17426
17427 (define_expand "<code>v16qiv16si2"
17428 [(set (match_operand:V16SI 0 "register_operand")
17429 (any_extend:V16SI
17430 (match_operand:V16QI 1 "nonimmediate_operand")))]
17431 "TARGET_AVX512F")
17432
17433 (define_insn "avx2_<code>v8qiv8si2<mask_name>"
17434 [(set (match_operand:V8SI 0 "register_operand" "=v")
17435 (any_extend:V8SI
17436 (vec_select:V8QI
17437 (match_operand:V16QI 1 "register_operand" "v")
17438 (parallel [(const_int 0) (const_int 1)
17439 (const_int 2) (const_int 3)
17440 (const_int 4) (const_int 5)
17441 (const_int 6) (const_int 7)]))))]
17442 "TARGET_AVX2 && <mask_avx512vl_condition>"
17443 "vpmov<extsuffix>bd\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
17444 [(set_attr "type" "ssemov")
17445 (set_attr "prefix_extra" "1")
17446 (set_attr "prefix" "maybe_evex")
17447 (set_attr "mode" "OI")])
17448
17449 (define_insn "*avx2_<code>v8qiv8si2<mask_name>_1"
17450 [(set (match_operand:V8SI 0 "register_operand" "=v")
17451 (any_extend:V8SI
17452 (match_operand:V8QI 1 "memory_operand" "m")))]
17453 "TARGET_AVX2 && <mask_avx512vl_condition>"
17454 "%vpmov<extsuffix>bd\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
17455 [(set_attr "type" "ssemov")
17456 (set_attr "prefix_extra" "1")
17457 (set_attr "prefix" "maybe_evex")
17458 (set_attr "mode" "OI")])
17459
17460 (define_insn_and_split "*avx2_<code>v8qiv8si2<mask_name>_2"
17461 [(set (match_operand:V8SI 0 "register_operand")
17462 (any_extend:V8SI
17463 (vec_select:V8QI
17464 (subreg:V16QI
17465 (vec_concat:V2DI
17466 (match_operand:DI 1 "memory_operand")
17467 (const_int 0)) 0)
17468 (parallel [(const_int 0) (const_int 1)
17469 (const_int 2) (const_int 3)
17470 (const_int 4) (const_int 5)
17471 (const_int 6) (const_int 7)]))))]
17472 "TARGET_AVX2 && <mask_avx512vl_condition>
17473 && ix86_pre_reload_split ()"
17474 "#"
17475 "&& 1"
17476 [(set (match_dup 0)
17477 (any_extend:V8SI (match_dup 1)))]
17478 "operands[1] = adjust_address_nv (operands[1], V8QImode, 0);")
17479
17480 (define_expand "<code>v8qiv8si2"
17481 [(set (match_operand:V8SI 0 "register_operand")
17482 (any_extend:V8SI
17483 (match_operand:V8QI 1 "nonimmediate_operand")))]
17484 "TARGET_AVX2"
17485 {
17486 if (!MEM_P (operands[1]))
17487 {
17488 operands[1] = simplify_gen_subreg (V16QImode, operands[1], V8QImode, 0);
17489 emit_insn (gen_avx2_<code>v8qiv8si2 (operands[0], operands[1]));
17490 DONE;
17491 }
17492 })
17493
17494 (define_insn "sse4_1_<code>v4qiv4si2<mask_name>"
17495 [(set (match_operand:V4SI 0 "register_operand" "=Yr,*x,v")
17496 (any_extend:V4SI
17497 (vec_select:V4QI
17498 (match_operand:V16QI 1 "register_operand" "Yr,*x,v")
17499 (parallel [(const_int 0) (const_int 1)
17500 (const_int 2) (const_int 3)]))))]
17501 "TARGET_SSE4_1 && <mask_avx512vl_condition>"
17502 "%vpmov<extsuffix>bd\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
17503 [(set_attr "isa" "noavx,noavx,avx")
17504 (set_attr "type" "ssemov")
17505 (set_attr "prefix_extra" "1")
17506 (set_attr "prefix" "orig,orig,maybe_evex")
17507 (set_attr "mode" "TI")])
17508
17509 (define_insn "*sse4_1_<code>v4qiv4si2<mask_name>_1"
17510 [(set (match_operand:V4SI 0 "register_operand" "=Yr,*x,v")
17511 (any_extend:V4SI
17512 (match_operand:V4QI 1 "memory_operand" "m,m,m")))]
17513 "TARGET_SSE4_1 && <mask_avx512vl_condition>"
17514 "%vpmov<extsuffix>bd\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
17515 [(set_attr "isa" "noavx,noavx,avx")
17516 (set_attr "type" "ssemov")
17517 (set_attr "prefix_extra" "1")
17518 (set_attr "prefix" "orig,orig,maybe_evex")
17519 (set_attr "mode" "TI")])
17520
17521 (define_insn_and_split "*sse4_1_<code>v4qiv4si2<mask_name>_2"
17522 [(set (match_operand:V4SI 0 "register_operand")
17523 (any_extend:V4SI
17524 (vec_select:V4QI
17525 (subreg:V16QI
17526 (vec_merge:V4SI
17527 (vec_duplicate:V4SI
17528 (match_operand:SI 1 "memory_operand"))
17529 (const_vector:V4SI
17530 [(const_int 0) (const_int 0)
17531 (const_int 0) (const_int 0)])
17532 (const_int 1)) 0)
17533 (parallel [(const_int 0) (const_int 1)
17534 (const_int 2) (const_int 3)]))))]
17535 "TARGET_SSE4_1 && <mask_avx512vl_condition>
17536 && ix86_pre_reload_split ()"
17537 "#"
17538 "&& 1"
17539 [(set (match_dup 0)
17540 (any_extend:V4SI (match_dup 1)))]
17541 "operands[1] = adjust_address_nv (operands[1], V4QImode, 0);")
17542
17543 (define_expand "<code>v4qiv4si2"
17544 [(set (match_operand:V4SI 0 "register_operand")
17545 (any_extend:V4SI
17546 (match_operand:V4QI 1 "nonimmediate_operand")))]
17547 "TARGET_SSE4_1"
17548 {
17549 if (!MEM_P (operands[1]))
17550 {
17551 operands[1] = simplify_gen_subreg (V16QImode, operands[1], V4QImode, 0);
17552 emit_insn (gen_sse4_1_<code>v4qiv4si2 (operands[0], operands[1]));
17553 DONE;
17554 }
17555 })
17556
17557 (define_insn "avx512f_<code>v16hiv16si2<mask_name>"
17558 [(set (match_operand:V16SI 0 "register_operand" "=v")
17559 (any_extend:V16SI
17560 (match_operand:V16HI 1 "nonimmediate_operand" "vm")))]
17561 "TARGET_AVX512F"
17562 "vpmov<extsuffix>wd\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
17563 [(set_attr "type" "ssemov")
17564 (set_attr "prefix" "evex")
17565 (set_attr "mode" "XI")])
17566
17567 (define_expand "<code>v16hiv16si2"
17568 [(set (match_operand:V16SI 0 "register_operand")
17569 (any_extend:V16SI
17570 (match_operand:V16HI 1 "nonimmediate_operand")))]
17571 "TARGET_AVX512F")
17572
17573 (define_insn "avx2_<code>v8hiv8si2<mask_name>"
17574 [(set (match_operand:V8SI 0 "register_operand" "=v")
17575 (any_extend:V8SI
17576 (match_operand:V8HI 1 "nonimmediate_operand" "vm")))]
17577 "TARGET_AVX2 && <mask_avx512vl_condition>"
17578 "vpmov<extsuffix>wd\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
17579 [(set_attr "type" "ssemov")
17580 (set_attr "prefix_extra" "1")
17581 (set_attr "prefix" "maybe_evex")
17582 (set_attr "mode" "OI")])
17583
17584 (define_expand "<code>v8hiv8si2"
17585 [(set (match_operand:V8SI 0 "register_operand")
17586 (any_extend:V8SI
17587 (match_operand:V8HI 1 "nonimmediate_operand")))]
17588 "TARGET_AVX2")
17589
17590 (define_insn "sse4_1_<code>v4hiv4si2<mask_name>"
17591 [(set (match_operand:V4SI 0 "register_operand" "=Yr,*x,v")
17592 (any_extend:V4SI
17593 (vec_select:V4HI
17594 (match_operand:V8HI 1 "register_operand" "Yr,*x,v")
17595 (parallel [(const_int 0) (const_int 1)
17596 (const_int 2) (const_int 3)]))))]
17597 "TARGET_SSE4_1 && <mask_avx512vl_condition>"
17598 "%vpmov<extsuffix>wd\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
17599 [(set_attr "isa" "noavx,noavx,avx")
17600 (set_attr "type" "ssemov")
17601 (set_attr "prefix_extra" "1")
17602 (set_attr "prefix" "orig,orig,maybe_evex")
17603 (set_attr "mode" "TI")])
17604
17605 (define_insn "*sse4_1_<code>v4hiv4si2<mask_name>_1"
17606 [(set (match_operand:V4SI 0 "register_operand" "=Yr,*x,v")
17607 (any_extend:V4SI
17608 (match_operand:V4HI 1 "memory_operand" "m,m,m")))]
17609 "TARGET_SSE4_1 && <mask_avx512vl_condition>"
17610 "%vpmov<extsuffix>wd\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
17611 [(set_attr "isa" "noavx,noavx,avx")
17612 (set_attr "type" "ssemov")
17613 (set_attr "prefix_extra" "1")
17614 (set_attr "prefix" "orig,orig,maybe_evex")
17615 (set_attr "mode" "TI")])
17616
17617 (define_insn_and_split "*sse4_1_<code>v4hiv4si2<mask_name>_2"
17618 [(set (match_operand:V4SI 0 "register_operand")
17619 (any_extend:V4SI
17620 (vec_select:V4HI
17621 (subreg:V8HI
17622 (vec_concat:V2DI
17623 (match_operand:DI 1 "memory_operand")
17624 (const_int 0)) 0)
17625 (parallel [(const_int 0) (const_int 1)
17626 (const_int 2) (const_int 3)]))))]
17627 "TARGET_SSE4_1 && <mask_avx512vl_condition>
17628 && ix86_pre_reload_split ()"
17629 "#"
17630 "&& 1"
17631 [(set (match_dup 0)
17632 (any_extend:V4SI (match_dup 1)))]
17633 "operands[1] = adjust_address_nv (operands[1], V4HImode, 0);")
17634
17635 (define_expand "<code>v4hiv4si2"
17636 [(set (match_operand:V4SI 0 "register_operand")
17637 (any_extend:V4SI
17638 (match_operand:V4HI 1 "nonimmediate_operand")))]
17639 "TARGET_SSE4_1"
17640 {
17641 if (!MEM_P (operands[1]))
17642 {
17643 operands[1] = simplify_gen_subreg (V8HImode, operands[1], V4HImode, 0);
17644 emit_insn (gen_sse4_1_<code>v4hiv4si2 (operands[0], operands[1]));
17645 DONE;
17646 }
17647 })
17648
17649 (define_insn "avx512f_<code>v8qiv8di2<mask_name>"
17650 [(set (match_operand:V8DI 0 "register_operand" "=v")
17651 (any_extend:V8DI
17652 (vec_select:V8QI
17653 (match_operand:V16QI 1 "register_operand" "v")
17654 (parallel [(const_int 0) (const_int 1)
17655 (const_int 2) (const_int 3)
17656 (const_int 4) (const_int 5)
17657 (const_int 6) (const_int 7)]))))]
17658 "TARGET_AVX512F"
17659 "vpmov<extsuffix>bq\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
17660 [(set_attr "type" "ssemov")
17661 (set_attr "prefix" "evex")
17662 (set_attr "mode" "XI")])
17663
17664 (define_insn "*avx512f_<code>v8qiv8di2<mask_name>_1"
17665 [(set (match_operand:V8DI 0 "register_operand" "=v")
17666 (any_extend:V8DI
17667 (match_operand:V8QI 1 "memory_operand" "m")))]
17668 "TARGET_AVX512F"
17669 "vpmov<extsuffix>bq\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
17670 [(set_attr "type" "ssemov")
17671 (set_attr "prefix" "evex")
17672 (set_attr "mode" "XI")])
17673
17674 (define_insn_and_split "*avx512f_<code>v8qiv8di2<mask_name>_2"
17675 [(set (match_operand:V8DI 0 "register_operand")
17676 (any_extend:V8DI
17677 (vec_select:V8QI
17678 (subreg:V16QI
17679 (vec_concat:V2DI
17680 (match_operand:DI 1 "memory_operand")
17681 (const_int 0)) 0)
17682 (parallel [(const_int 0) (const_int 1)
17683 (const_int 2) (const_int 3)
17684 (const_int 4) (const_int 5)
17685 (const_int 6) (const_int 7)]))))]
17686 "TARGET_AVX512F && ix86_pre_reload_split ()"
17687 "#"
17688 "&& 1"
17689 [(set (match_dup 0)
17690 (any_extend:V8DI (match_dup 1)))]
17691 "operands[1] = adjust_address_nv (operands[1], V8QImode, 0);")
17692
17693 (define_expand "<code>v8qiv8di2"
17694 [(set (match_operand:V8DI 0 "register_operand")
17695 (any_extend:V8DI
17696 (match_operand:V8QI 1 "nonimmediate_operand")))]
17697 "TARGET_AVX512F"
17698 {
17699 if (!MEM_P (operands[1]))
17700 {
17701 operands[1] = simplify_gen_subreg (V16QImode, operands[1], V8QImode, 0);
17702 emit_insn (gen_avx512f_<code>v8qiv8di2 (operands[0], operands[1]));
17703 DONE;
17704 }
17705 })
17706
17707 (define_insn "avx2_<code>v4qiv4di2<mask_name>"
17708 [(set (match_operand:V4DI 0 "register_operand" "=v")
17709 (any_extend:V4DI
17710 (vec_select:V4QI
17711 (match_operand:V16QI 1 "register_operand" "v")
17712 (parallel [(const_int 0) (const_int 1)
17713 (const_int 2) (const_int 3)]))))]
17714 "TARGET_AVX2 && <mask_avx512vl_condition>"
17715 "vpmov<extsuffix>bq\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
17716 [(set_attr "type" "ssemov")
17717 (set_attr "prefix_extra" "1")
17718 (set_attr "prefix" "maybe_evex")
17719 (set_attr "mode" "OI")])
17720
17721 (define_insn "*avx2_<code>v4qiv4di2<mask_name>_1"
17722 [(set (match_operand:V4DI 0 "register_operand" "=v")
17723 (any_extend:V4DI
17724 (match_operand:V4QI 1 "memory_operand" "m")))]
17725 "TARGET_AVX2 && <mask_avx512vl_condition>"
17726 "vpmov<extsuffix>bq\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
17727 [(set_attr "type" "ssemov")
17728 (set_attr "prefix_extra" "1")
17729 (set_attr "prefix" "maybe_evex")
17730 (set_attr "mode" "OI")])
17731
17732 (define_insn_and_split "*avx2_<code>v4qiv4di2<mask_name>_2"
17733 [(set (match_operand:V4DI 0 "register_operand")
17734 (any_extend:V4DI
17735 (vec_select:V4QI
17736 (subreg:V16QI
17737 (vec_merge:V4SI
17738 (vec_duplicate:V4SI
17739 (match_operand:SI 1 "memory_operand"))
17740 (const_vector:V4SI
17741 [(const_int 0) (const_int 0)
17742 (const_int 0) (const_int 0)])
17743 (const_int 1)) 0)
17744 (parallel [(const_int 0) (const_int 1)
17745 (const_int 2) (const_int 3)]))))]
17746 "TARGET_AVX2 && <mask_avx512vl_condition>
17747 && ix86_pre_reload_split ()"
17748 "#"
17749 "&& 1"
17750 [(set (match_dup 0)
17751 (any_extend:V4DI (match_dup 1)))]
17752 "operands[1] = adjust_address_nv (operands[1], V4QImode, 0);")
17753
17754 (define_expand "<code>v4qiv4di2"
17755 [(set (match_operand:V4DI 0 "register_operand")
17756 (any_extend:V4DI
17757 (match_operand:V4QI 1 "nonimmediate_operand")))]
17758 "TARGET_AVX2"
17759 {
17760 if (!MEM_P (operands[1]))
17761 {
17762 operands[1] = simplify_gen_subreg (V16QImode, operands[1], V8QImode, 0);
17763 emit_insn (gen_avx2_<code>v4qiv4di2 (operands[0], operands[1]));
17764 DONE;
17765 }
17766 })
17767
17768 (define_insn "sse4_1_<code>v2qiv2di2<mask_name>"
17769 [(set (match_operand:V2DI 0 "register_operand" "=Yr,*x,v")
17770 (any_extend:V2DI
17771 (vec_select:V2QI
17772 (match_operand:V16QI 1 "register_operand" "Yr,*x,v")
17773 (parallel [(const_int 0) (const_int 1)]))))]
17774 "TARGET_SSE4_1 && <mask_avx512vl_condition>"
17775 "%vpmov<extsuffix>bq\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
17776 [(set_attr "isa" "noavx,noavx,avx")
17777 (set_attr "type" "ssemov")
17778 (set_attr "prefix_extra" "1")
17779 (set_attr "prefix" "orig,orig,maybe_evex")
17780 (set_attr "mode" "TI")])
17781
17782 (define_expand "<code>v2qiv2di2"
17783 [(set (match_operand:V2DI 0 "register_operand")
17784 (any_extend:V2DI
17785 (match_operand:V2QI 1 "register_operand")))]
17786 "TARGET_SSE4_1"
17787 {
17788 operands[1] = simplify_gen_subreg (V16QImode, operands[1], V2QImode, 0);
17789 emit_insn (gen_sse4_1_<code>v2qiv2di2 (operands[0], operands[1]));
17790 DONE;
17791 })
17792
17793 (define_insn "avx512f_<code>v8hiv8di2<mask_name>"
17794 [(set (match_operand:V8DI 0 "register_operand" "=v")
17795 (any_extend:V8DI
17796 (match_operand:V8HI 1 "nonimmediate_operand" "vm")))]
17797 "TARGET_AVX512F"
17798 "vpmov<extsuffix>wq\t{%1, %0<mask_operand2>|%0<mask_operand2>, %q1}"
17799 [(set_attr "type" "ssemov")
17800 (set_attr "prefix" "evex")
17801 (set_attr "mode" "XI")])
17802
17803 (define_expand "<code>v8hiv8di2"
17804 [(set (match_operand:V8DI 0 "register_operand")
17805 (any_extend:V8DI
17806 (match_operand:V8HI 1 "nonimmediate_operand")))]
17807 "TARGET_AVX512F")
17808
17809 (define_insn "avx2_<code>v4hiv4di2<mask_name>"
17810 [(set (match_operand:V4DI 0 "register_operand" "=v")
17811 (any_extend:V4DI
17812 (vec_select:V4HI
17813 (match_operand:V8HI 1 "register_operand" "v")
17814 (parallel [(const_int 0) (const_int 1)
17815 (const_int 2) (const_int 3)]))))]
17816 "TARGET_AVX2 && <mask_avx512vl_condition>"
17817 "vpmov<extsuffix>wq\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
17818 [(set_attr "type" "ssemov")
17819 (set_attr "prefix_extra" "1")
17820 (set_attr "prefix" "maybe_evex")
17821 (set_attr "mode" "OI")])
17822
17823 (define_insn "*avx2_<code>v4hiv4di2<mask_name>_1"
17824 [(set (match_operand:V4DI 0 "register_operand" "=v")
17825 (any_extend:V4DI
17826 (match_operand:V4HI 1 "memory_operand" "m")))]
17827 "TARGET_AVX2 && <mask_avx512vl_condition>"
17828 "vpmov<extsuffix>wq\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
17829 [(set_attr "type" "ssemov")
17830 (set_attr "prefix_extra" "1")
17831 (set_attr "prefix" "maybe_evex")
17832 (set_attr "mode" "OI")])
17833
17834 (define_insn_and_split "*avx2_<code>v4hiv4di2<mask_name>_2"
17835 [(set (match_operand:V4DI 0 "register_operand")
17836 (any_extend:V4DI
17837 (vec_select:V4HI
17838 (subreg:V8HI
17839 (vec_concat:V2DI
17840 (match_operand:DI 1 "memory_operand")
17841 (const_int 0)) 0)
17842 (parallel [(const_int 0) (const_int 1)
17843 (const_int 2) (const_int 3)]))))]
17844 "TARGET_AVX2 && <mask_avx512vl_condition>
17845 && ix86_pre_reload_split ()"
17846 "#"
17847 "&& 1"
17848 [(set (match_dup 0)
17849 (any_extend:V4DI (match_dup 1)))]
17850 "operands[1] = adjust_address_nv (operands[1], V4HImode, 0);")
17851
17852 (define_expand "<code>v4hiv4di2"
17853 [(set (match_operand:V4DI 0 "register_operand")
17854 (any_extend:V4DI
17855 (match_operand:V4HI 1 "nonimmediate_operand")))]
17856 "TARGET_AVX2"
17857 {
17858 if (!MEM_P (operands[1]))
17859 {
17860 operands[1] = simplify_gen_subreg (V8HImode, operands[1], V4HImode, 0);
17861 emit_insn (gen_avx2_<code>v4hiv4di2 (operands[0], operands[1]));
17862 DONE;
17863 }
17864 })
17865
17866 (define_insn "sse4_1_<code>v2hiv2di2<mask_name>"
17867 [(set (match_operand:V2DI 0 "register_operand" "=Yr,*x,v")
17868 (any_extend:V2DI
17869 (vec_select:V2HI
17870 (match_operand:V8HI 1 "register_operand" "Yr,*x,v")
17871 (parallel [(const_int 0) (const_int 1)]))))]
17872 "TARGET_SSE4_1 && <mask_avx512vl_condition>"
17873 "%vpmov<extsuffix>wq\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
17874 [(set_attr "isa" "noavx,noavx,avx")
17875 (set_attr "type" "ssemov")
17876 (set_attr "prefix_extra" "1")
17877 (set_attr "prefix" "orig,orig,maybe_evex")
17878 (set_attr "mode" "TI")])
17879
17880 (define_insn "*sse4_1_<code>v2hiv2di2<mask_name>_1"
17881 [(set (match_operand:V2DI 0 "register_operand" "=Yr,*x,v")
17882 (any_extend:V2DI
17883 (match_operand:V2HI 1 "memory_operand" "m,m,m")))]
17884 "TARGET_SSE4_1 && <mask_avx512vl_condition>"
17885 "%vpmov<extsuffix>wq\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
17886 [(set_attr "isa" "noavx,noavx,avx")
17887 (set_attr "type" "ssemov")
17888 (set_attr "prefix_extra" "1")
17889 (set_attr "prefix" "orig,orig,maybe_evex")
17890 (set_attr "mode" "TI")])
17891
17892 (define_insn_and_split "*sse4_1_<code>v2hiv2di2<mask_name>_2"
17893 [(set (match_operand:V2DI 0 "register_operand")
17894 (any_extend:V2DI
17895 (vec_select:V2HI
17896 (subreg:V8HI
17897 (vec_merge:V4SI
17898 (vec_duplicate:V4SI
17899 (match_operand:SI 1 "memory_operand"))
17900 (const_vector:V4SI
17901 [(const_int 0) (const_int 0)
17902 (const_int 0) (const_int 0)])
17903 (const_int 1)) 0)
17904 (parallel [(const_int 0) (const_int 1)]))))]
17905 "TARGET_SSE4_1 && <mask_avx512vl_condition>
17906 && ix86_pre_reload_split ()"
17907 "#"
17908 "&& 1"
17909 [(set (match_dup 0)
17910 (any_extend:V2DI (match_dup 1)))]
17911 "operands[1] = adjust_address_nv (operands[1], V2HImode, 0);")
17912
17913 (define_expand "<code>v2hiv2di2"
17914 [(set (match_operand:V2DI 0 "register_operand")
17915 (any_extend:V2DI
17916 (match_operand:V2HI 1 "nonimmediate_operand")))]
17917 "TARGET_SSE4_1"
17918 {
17919 if (!MEM_P (operands[1]))
17920 {
17921 operands[1] = simplify_gen_subreg (V8HImode, operands[1], V2HImode, 0);
17922 emit_insn (gen_sse4_1_<code>v2hiv2di2 (operands[0], operands[1]));
17923 DONE;
17924 }
17925 })
17926
17927 (define_insn "avx512f_<code>v8siv8di2<mask_name>"
17928 [(set (match_operand:V8DI 0 "register_operand" "=v")
17929 (any_extend:V8DI
17930 (match_operand:V8SI 1 "nonimmediate_operand" "vm")))]
17931 "TARGET_AVX512F"
17932 "vpmov<extsuffix>dq\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
17933 [(set_attr "type" "ssemov")
17934 (set_attr "prefix" "evex")
17935 (set_attr "mode" "XI")])
17936
17937 (define_expand "<code>v8siv8di2"
17938 [(set (match_operand:V8DI 0 "register_operand" "=v")
17939 (any_extend:V8DI
17940 (match_operand:V8SI 1 "nonimmediate_operand" "vm")))]
17941 "TARGET_AVX512F")
17942
17943 (define_insn "avx2_<code>v4siv4di2<mask_name>"
17944 [(set (match_operand:V4DI 0 "register_operand" "=v")
17945 (any_extend:V4DI
17946 (match_operand:V4SI 1 "nonimmediate_operand" "vm")))]
17947 "TARGET_AVX2 && <mask_avx512vl_condition>"
17948 "vpmov<extsuffix>dq\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
17949 [(set_attr "type" "ssemov")
17950 (set_attr "prefix" "maybe_evex")
17951 (set_attr "prefix_extra" "1")
17952 (set_attr "mode" "OI")])
17953
17954 (define_expand "<code>v4siv4di2"
17955 [(set (match_operand:V4DI 0 "register_operand" "=v")
17956 (any_extend:V4DI
17957 (match_operand:V4SI 1 "nonimmediate_operand" "vm")))]
17958 "TARGET_AVX2")
17959
17960 (define_insn "sse4_1_<code>v2siv2di2<mask_name>"
17961 [(set (match_operand:V2DI 0 "register_operand" "=Yr,*x,v")
17962 (any_extend:V2DI
17963 (vec_select:V2SI
17964 (match_operand:V4SI 1 "register_operand" "Yr,*x,v")
17965 (parallel [(const_int 0) (const_int 1)]))))]
17966 "TARGET_SSE4_1 && <mask_avx512vl_condition>"
17967 "%vpmov<extsuffix>dq\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
17968 [(set_attr "isa" "noavx,noavx,avx")
17969 (set_attr "type" "ssemov")
17970 (set_attr "prefix_extra" "1")
17971 (set_attr "prefix" "orig,orig,maybe_evex")
17972 (set_attr "mode" "TI")])
17973
17974 (define_insn "*sse4_1_<code>v2siv2di2<mask_name>_1"
17975 [(set (match_operand:V2DI 0 "register_operand" "=Yr,*x,v")
17976 (any_extend:V2DI
17977 (match_operand:V2SI 1 "memory_operand" "m,m,m")))]
17978 "TARGET_SSE4_1 && <mask_avx512vl_condition>"
17979 "%vpmov<extsuffix>dq\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
17980 [(set_attr "isa" "noavx,noavx,avx")
17981 (set_attr "type" "ssemov")
17982 (set_attr "prefix_extra" "1")
17983 (set_attr "prefix" "orig,orig,maybe_evex")
17984 (set_attr "mode" "TI")])
17985
17986 (define_insn_and_split "*sse4_1_<code>v2siv2di2<mask_name>_2"
17987 [(set (match_operand:V2DI 0 "register_operand")
17988 (any_extend:V2DI
17989 (vec_select:V2SI
17990 (subreg:V4SI
17991 (vec_concat:V2DI
17992 (match_operand:DI 1 "memory_operand")
17993 (const_int 0)) 0)
17994 (parallel [(const_int 0) (const_int 1)]))))]
17995 "TARGET_SSE4_1 && <mask_avx512vl_condition>
17996 && ix86_pre_reload_split ()"
17997 "#"
17998 "&& 1"
17999 [(set (match_dup 0)
18000 (any_extend:V2DI (match_dup 1)))]
18001 "operands[1] = adjust_address_nv (operands[1], V2SImode, 0);")
18002
18003 (define_expand "<code>v2siv2di2"
18004 [(set (match_operand:V2DI 0 "register_operand")
18005 (any_extend:V2DI
18006 (match_operand:V2SI 1 "nonimmediate_operand")))]
18007 "TARGET_SSE4_1"
18008 {
18009 if (!MEM_P (operands[1]))
18010 {
18011 operands[1] = simplify_gen_subreg (V4SImode, operands[1], V2SImode, 0);
18012 emit_insn (gen_sse4_1_<code>v2siv2di2 (operands[0], operands[1]));
18013 DONE;
18014 }
18015 })
18016
18017 ;; ptestps/ptestpd are very similar to comiss and ucomiss when
18018 ;; setting FLAGS_REG. But it is not a really compare instruction.
18019 (define_insn "avx_vtest<ssemodesuffix><avxsizesuffix>"
18020 [(set (reg:CC FLAGS_REG)
18021 (unspec:CC [(match_operand:VF_128_256 0 "register_operand" "x")
18022 (match_operand:VF_128_256 1 "nonimmediate_operand" "xm")]
18023 UNSPEC_VTESTP))]
18024 "TARGET_AVX"
18025 "vtest<ssemodesuffix>\t{%1, %0|%0, %1}"
18026 [(set_attr "type" "ssecomi")
18027 (set_attr "prefix_extra" "1")
18028 (set_attr "prefix" "vex")
18029 (set_attr "mode" "<MODE>")])
18030
18031 ;; ptest is very similar to comiss and ucomiss when setting FLAGS_REG.
18032 ;; But it is not a really compare instruction.
18033 (define_insn "<sse4_1>_ptest<mode>"
18034 [(set (reg:CC FLAGS_REG)
18035 (unspec:CC [(match_operand:V_AVX 0 "register_operand" "Yr, *x, x")
18036 (match_operand:V_AVX 1 "vector_operand" "YrBm, *xBm, xm")]
18037 UNSPEC_PTEST))]
18038 "TARGET_SSE4_1"
18039 "%vptest\t{%1, %0|%0, %1}"
18040 [(set_attr "isa" "noavx,noavx,avx")
18041 (set_attr "type" "ssecomi")
18042 (set_attr "prefix_extra" "1")
18043 (set_attr "prefix" "orig,orig,vex")
18044 (set (attr "btver2_decode")
18045 (if_then_else
18046 (match_test "<sseinsnmode>mode==OImode")
18047 (const_string "vector")
18048 (const_string "*")))
18049 (set_attr "mode" "<sseinsnmode>")])
18050
18051 (define_insn "ptesttf2"
18052 [(set (reg:CC FLAGS_REG)
18053 (unspec:CC [(match_operand:TF 0 "register_operand" "Yr, *x, x")
18054 (match_operand:TF 1 "vector_operand" "YrBm, *xBm, xm")]
18055 UNSPEC_PTEST))]
18056 "TARGET_SSE4_1"
18057 "%vptest\t{%1, %0|%0, %1}"
18058 [(set_attr "isa" "noavx,noavx,avx")
18059 (set_attr "type" "ssecomi")
18060 (set_attr "prefix_extra" "1")
18061 (set_attr "prefix" "orig,orig,vex")
18062 (set_attr "mode" "TI")])
18063
18064 (define_expand "nearbyint<mode>2"
18065 [(set (match_operand:VF 0 "register_operand")
18066 (unspec:VF
18067 [(match_operand:VF 1 "vector_operand")
18068 (match_dup 2)]
18069 UNSPEC_ROUND))]
18070 "TARGET_SSE4_1"
18071 "operands[2] = GEN_INT (ROUND_MXCSR | ROUND_NO_EXC);")
18072
18073 (define_expand "rint<mode>2"
18074 [(set (match_operand:VF 0 "register_operand")
18075 (unspec:VF
18076 [(match_operand:VF 1 "vector_operand")
18077 (match_dup 2)]
18078 UNSPEC_ROUND))]
18079 "TARGET_SSE4_1"
18080 "operands[2] = GEN_INT (ROUND_MXCSR);")
18081
18082 (define_insn "<sse4_1>_round<ssemodesuffix><avxsizesuffix>"
18083 [(set (match_operand:VF_128_256 0 "register_operand" "=Yr,*x,x")
18084 (unspec:VF_128_256
18085 [(match_operand:VF_128_256 1 "vector_operand" "YrBm,*xBm,xm")
18086 (match_operand:SI 2 "const_0_to_15_operand" "n,n,n")]
18087 UNSPEC_ROUND))]
18088 "TARGET_SSE4_1"
18089 "%vround<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
18090 [(set_attr "isa" "noavx,noavx,avx")
18091 (set_attr "type" "ssecvt")
18092 (set_attr "prefix_data16" "1,1,*")
18093 (set_attr "prefix_extra" "1")
18094 (set_attr "length_immediate" "1")
18095 (set_attr "prefix" "orig,orig,vex")
18096 (set_attr "mode" "<MODE>")])
18097
18098 (define_expand "<sse4_1>_round<ssemodesuffix>_sfix<avxsizesuffix>"
18099 [(match_operand:<sseintvecmode> 0 "register_operand")
18100 (match_operand:VF1_128_256 1 "vector_operand")
18101 (match_operand:SI 2 "const_0_to_15_operand")]
18102 "TARGET_SSE4_1"
18103 {
18104 rtx tmp = gen_reg_rtx (<MODE>mode);
18105
18106 emit_insn
18107 (gen_<sse4_1>_round<ssemodesuffix><avxsizesuffix> (tmp, operands[1],
18108 operands[2]));
18109 emit_insn
18110 (gen_fix_trunc<mode><sseintvecmodelower>2 (operands[0], tmp));
18111 DONE;
18112 })
18113
18114 (define_expand "avx512f_round<castmode>512"
18115 [(match_operand:VF_512 0 "register_operand")
18116 (match_operand:VF_512 1 "nonimmediate_operand")
18117 (match_operand:SI 2 "const_0_to_15_operand")]
18118 "TARGET_AVX512F"
18119 {
18120 emit_insn (gen_avx512f_rndscale<mode> (operands[0], operands[1], operands[2]));
18121 DONE;
18122 })
18123
18124 (define_expand "avx512f_roundps512_sfix"
18125 [(match_operand:V16SI 0 "register_operand")
18126 (match_operand:V16SF 1 "nonimmediate_operand")
18127 (match_operand:SI 2 "const_0_to_15_operand")]
18128 "TARGET_AVX512F"
18129 {
18130 rtx tmp = gen_reg_rtx (V16SFmode);
18131 emit_insn (gen_avx512f_rndscalev16sf (tmp, operands[1], operands[2]));
18132 emit_insn (gen_fix_truncv16sfv16si2 (operands[0], tmp));
18133 DONE;
18134 })
18135
18136 (define_expand "<sse4_1>_round<ssemodesuffix>_vec_pack_sfix<avxsizesuffix>"
18137 [(match_operand:<ssepackfltmode> 0 "register_operand")
18138 (match_operand:VF2 1 "vector_operand")
18139 (match_operand:VF2 2 "vector_operand")
18140 (match_operand:SI 3 "const_0_to_15_operand")]
18141 "TARGET_SSE4_1"
18142 {
18143 rtx tmp0, tmp1;
18144
18145 if (<MODE>mode == V2DFmode
18146 && TARGET_AVX && !TARGET_PREFER_AVX128 && optimize_insn_for_speed_p ())
18147 {
18148 rtx tmp2 = gen_reg_rtx (V4DFmode);
18149
18150 tmp0 = gen_reg_rtx (V4DFmode);
18151 tmp1 = force_reg (V2DFmode, operands[1]);
18152
18153 emit_insn (gen_avx_vec_concatv4df (tmp0, tmp1, operands[2]));
18154 emit_insn (gen_avx_roundpd256 (tmp2, tmp0, operands[3]));
18155 emit_insn (gen_fix_truncv4dfv4si2 (operands[0], tmp2));
18156 }
18157 else
18158 {
18159 tmp0 = gen_reg_rtx (<MODE>mode);
18160 tmp1 = gen_reg_rtx (<MODE>mode);
18161
18162 emit_insn
18163 (gen_<sse4_1>_round<ssemodesuffix><avxsizesuffix> (tmp0, operands[1],
18164 operands[3]));
18165 emit_insn
18166 (gen_<sse4_1>_round<ssemodesuffix><avxsizesuffix> (tmp1, operands[2],
18167 operands[3]));
18168 emit_insn
18169 (gen_vec_pack_sfix_trunc_<mode> (operands[0], tmp0, tmp1));
18170 }
18171 DONE;
18172 })
18173
18174 (define_insn "sse4_1_round<ssescalarmodesuffix>"
18175 [(set (match_operand:VF_128 0 "register_operand" "=Yr,*x,x,v")
18176 (vec_merge:VF_128
18177 (unspec:VF_128
18178 [(match_operand:VF_128 2 "nonimmediate_operand" "Yrm,*xm,xm,vm")
18179 (match_operand:SI 3 "const_0_to_15_operand" "n,n,n,n")]
18180 UNSPEC_ROUND)
18181 (match_operand:VF_128 1 "register_operand" "0,0,x,v")
18182 (const_int 1)))]
18183 "TARGET_SSE4_1"
18184 "@
18185 round<ssescalarmodesuffix>\t{%3, %2, %0|%0, %<iptr>2, %3}
18186 round<ssescalarmodesuffix>\t{%3, %2, %0|%0, %<iptr>2, %3}
18187 vround<ssescalarmodesuffix>\t{%3, %2, %1, %0|%0, %1, %<iptr>2, %3}
18188 vrndscale<ssescalarmodesuffix>\t{%3, %2, %1, %0|%0, %1, %<iptr>2, %3}"
18189 [(set_attr "isa" "noavx,noavx,avx,avx512f")
18190 (set_attr "type" "ssecvt")
18191 (set_attr "length_immediate" "1")
18192 (set_attr "prefix_data16" "1,1,*,*")
18193 (set_attr "prefix_extra" "1")
18194 (set_attr "prefix" "orig,orig,vex,evex")
18195 (set_attr "mode" "<MODE>")])
18196
18197 (define_insn "*sse4_1_round<ssescalarmodesuffix>"
18198 [(set (match_operand:VF_128 0 "register_operand" "=Yr,*x,x,v")
18199 (vec_merge:VF_128
18200 (vec_duplicate:VF_128
18201 (unspec:<ssescalarmode>
18202 [(match_operand:<ssescalarmode> 2 "nonimmediate_operand" "Yrm,*xm,xm,vm")
18203 (match_operand:SI 3 "const_0_to_15_operand" "n,n,n,n")]
18204 UNSPEC_ROUND))
18205 (match_operand:VF_128 1 "register_operand" "0,0,x,v")
18206 (const_int 1)))]
18207 "TARGET_SSE4_1"
18208 "@
18209 round<ssescalarmodesuffix>\t{%3, %2, %0|%0, %2, %3}
18210 round<ssescalarmodesuffix>\t{%3, %2, %0|%0, %2, %3}
18211 vround<ssescalarmodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}
18212 vrndscale<ssescalarmodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
18213 [(set_attr "isa" "noavx,noavx,avx,avx512f")
18214 (set_attr "type" "ssecvt")
18215 (set_attr "length_immediate" "1")
18216 (set_attr "prefix_data16" "1,1,*,*")
18217 (set_attr "prefix_extra" "1")
18218 (set_attr "prefix" "orig,orig,vex,evex")
18219 (set_attr "mode" "<MODE>")])
18220
18221 (define_expand "round<mode>2"
18222 [(set (match_dup 3)
18223 (plus:VF
18224 (match_operand:VF 1 "register_operand")
18225 (match_dup 2)))
18226 (set (match_operand:VF 0 "register_operand")
18227 (unspec:VF
18228 [(match_dup 3) (match_dup 4)]
18229 UNSPEC_ROUND))]
18230 "TARGET_SSE4_1 && !flag_trapping_math"
18231 {
18232 machine_mode scalar_mode;
18233 const struct real_format *fmt;
18234 REAL_VALUE_TYPE pred_half, half_minus_pred_half;
18235 rtx half, vec_half;
18236
18237 scalar_mode = GET_MODE_INNER (<MODE>mode);
18238
18239 /* load nextafter (0.5, 0.0) */
18240 fmt = REAL_MODE_FORMAT (scalar_mode);
18241 real_2expN (&half_minus_pred_half, -(fmt->p) - 1, scalar_mode);
18242 real_arithmetic (&pred_half, MINUS_EXPR, &dconsthalf, &half_minus_pred_half);
18243 half = const_double_from_real_value (pred_half, scalar_mode);
18244
18245 vec_half = ix86_build_const_vector (<MODE>mode, true, half);
18246 vec_half = force_reg (<MODE>mode, vec_half);
18247
18248 operands[2] = gen_reg_rtx (<MODE>mode);
18249 emit_insn (gen_copysign<mode>3 (operands[2], vec_half, operands[1]));
18250
18251 operands[3] = gen_reg_rtx (<MODE>mode);
18252 operands[4] = GEN_INT (ROUND_TRUNC);
18253 })
18254
18255 (define_expand "round<mode>2_sfix"
18256 [(match_operand:<sseintvecmode> 0 "register_operand")
18257 (match_operand:VF1 1 "register_operand")]
18258 "TARGET_SSE4_1 && !flag_trapping_math"
18259 {
18260 rtx tmp = gen_reg_rtx (<MODE>mode);
18261
18262 emit_insn (gen_round<mode>2 (tmp, operands[1]));
18263
18264 emit_insn
18265 (gen_fix_trunc<mode><sseintvecmodelower>2 (operands[0], tmp));
18266 DONE;
18267 })
18268
18269 (define_expand "round<mode>2_vec_pack_sfix"
18270 [(match_operand:<ssepackfltmode> 0 "register_operand")
18271 (match_operand:VF2 1 "register_operand")
18272 (match_operand:VF2 2 "register_operand")]
18273 "TARGET_SSE4_1 && !flag_trapping_math"
18274 {
18275 rtx tmp0, tmp1;
18276
18277 if (<MODE>mode == V2DFmode
18278 && TARGET_AVX && !TARGET_PREFER_AVX128 && optimize_insn_for_speed_p ())
18279 {
18280 rtx tmp2 = gen_reg_rtx (V4DFmode);
18281
18282 tmp0 = gen_reg_rtx (V4DFmode);
18283 tmp1 = force_reg (V2DFmode, operands[1]);
18284
18285 emit_insn (gen_avx_vec_concatv4df (tmp0, tmp1, operands[2]));
18286 emit_insn (gen_roundv4df2 (tmp2, tmp0));
18287 emit_insn (gen_fix_truncv4dfv4si2 (operands[0], tmp2));
18288 }
18289 else
18290 {
18291 tmp0 = gen_reg_rtx (<MODE>mode);
18292 tmp1 = gen_reg_rtx (<MODE>mode);
18293
18294 emit_insn (gen_round<mode>2 (tmp0, operands[1]));
18295 emit_insn (gen_round<mode>2 (tmp1, operands[2]));
18296
18297 emit_insn
18298 (gen_vec_pack_sfix_trunc_<mode> (operands[0], tmp0, tmp1));
18299 }
18300 DONE;
18301 })
18302
18303 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
18304 ;;
18305 ;; Intel SSE4.2 string/text processing instructions
18306 ;;
18307 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
18308
18309 (define_insn_and_split "sse4_2_pcmpestr"
18310 [(set (match_operand:SI 0 "register_operand" "=c,c")
18311 (unspec:SI
18312 [(match_operand:V16QI 2 "register_operand" "x,x")
18313 (match_operand:SI 3 "register_operand" "a,a")
18314 (match_operand:V16QI 4 "nonimmediate_operand" "x,m")
18315 (match_operand:SI 5 "register_operand" "d,d")
18316 (match_operand:SI 6 "const_0_to_255_operand" "n,n")]
18317 UNSPEC_PCMPESTR))
18318 (set (match_operand:V16QI 1 "register_operand" "=Yz,Yz")
18319 (unspec:V16QI
18320 [(match_dup 2)
18321 (match_dup 3)
18322 (match_dup 4)
18323 (match_dup 5)
18324 (match_dup 6)]
18325 UNSPEC_PCMPESTR))
18326 (set (reg:CC FLAGS_REG)
18327 (unspec:CC
18328 [(match_dup 2)
18329 (match_dup 3)
18330 (match_dup 4)
18331 (match_dup 5)
18332 (match_dup 6)]
18333 UNSPEC_PCMPESTR))]
18334 "TARGET_SSE4_2
18335 && ix86_pre_reload_split ()"
18336 "#"
18337 "&& 1"
18338 [(const_int 0)]
18339 {
18340 int ecx = !find_regno_note (curr_insn, REG_UNUSED, REGNO (operands[0]));
18341 int xmm0 = !find_regno_note (curr_insn, REG_UNUSED, REGNO (operands[1]));
18342 int flags = !find_regno_note (curr_insn, REG_UNUSED, FLAGS_REG);
18343
18344 if (ecx)
18345 emit_insn (gen_sse4_2_pcmpestri (operands[0], operands[2],
18346 operands[3], operands[4],
18347 operands[5], operands[6]));
18348 if (xmm0)
18349 emit_insn (gen_sse4_2_pcmpestrm (operands[1], operands[2],
18350 operands[3], operands[4],
18351 operands[5], operands[6]));
18352 if (flags && !(ecx || xmm0))
18353 emit_insn (gen_sse4_2_pcmpestr_cconly (NULL, NULL,
18354 operands[2], operands[3],
18355 operands[4], operands[5],
18356 operands[6]));
18357 if (!(flags || ecx || xmm0))
18358 emit_note (NOTE_INSN_DELETED);
18359
18360 DONE;
18361 }
18362 [(set_attr "type" "sselog")
18363 (set_attr "prefix_data16" "1")
18364 (set_attr "prefix_extra" "1")
18365 (set_attr "length_immediate" "1")
18366 (set_attr "memory" "none,load")
18367 (set_attr "mode" "TI")])
18368
18369 (define_insn "sse4_2_pcmpestri"
18370 [(set (match_operand:SI 0 "register_operand" "=c,c")
18371 (unspec:SI
18372 [(match_operand:V16QI 1 "register_operand" "x,x")
18373 (match_operand:SI 2 "register_operand" "a,a")
18374 (match_operand:V16QI 3 "nonimmediate_operand" "x,m")
18375 (match_operand:SI 4 "register_operand" "d,d")
18376 (match_operand:SI 5 "const_0_to_255_operand" "n,n")]
18377 UNSPEC_PCMPESTR))
18378 (set (reg:CC FLAGS_REG)
18379 (unspec:CC
18380 [(match_dup 1)
18381 (match_dup 2)
18382 (match_dup 3)
18383 (match_dup 4)
18384 (match_dup 5)]
18385 UNSPEC_PCMPESTR))]
18386 "TARGET_SSE4_2"
18387 "%vpcmpestri\t{%5, %3, %1|%1, %3, %5}"
18388 [(set_attr "type" "sselog")
18389 (set_attr "prefix_data16" "1")
18390 (set_attr "prefix_extra" "1")
18391 (set_attr "prefix" "maybe_vex")
18392 (set_attr "length_immediate" "1")
18393 (set_attr "btver2_decode" "vector")
18394 (set_attr "memory" "none,load")
18395 (set_attr "mode" "TI")])
18396
18397 (define_insn "sse4_2_pcmpestrm"
18398 [(set (match_operand:V16QI 0 "register_operand" "=Yz,Yz")
18399 (unspec:V16QI
18400 [(match_operand:V16QI 1 "register_operand" "x,x")
18401 (match_operand:SI 2 "register_operand" "a,a")
18402 (match_operand:V16QI 3 "nonimmediate_operand" "x,m")
18403 (match_operand:SI 4 "register_operand" "d,d")
18404 (match_operand:SI 5 "const_0_to_255_operand" "n,n")]
18405 UNSPEC_PCMPESTR))
18406 (set (reg:CC FLAGS_REG)
18407 (unspec:CC
18408 [(match_dup 1)
18409 (match_dup 2)
18410 (match_dup 3)
18411 (match_dup 4)
18412 (match_dup 5)]
18413 UNSPEC_PCMPESTR))]
18414 "TARGET_SSE4_2"
18415 "%vpcmpestrm\t{%5, %3, %1|%1, %3, %5}"
18416 [(set_attr "type" "sselog")
18417 (set_attr "prefix_data16" "1")
18418 (set_attr "prefix_extra" "1")
18419 (set_attr "length_immediate" "1")
18420 (set_attr "prefix" "maybe_vex")
18421 (set_attr "btver2_decode" "vector")
18422 (set_attr "memory" "none,load")
18423 (set_attr "mode" "TI")])
18424
18425 (define_insn "sse4_2_pcmpestr_cconly"
18426 [(set (reg:CC FLAGS_REG)
18427 (unspec:CC
18428 [(match_operand:V16QI 2 "register_operand" "x,x,x,x")
18429 (match_operand:SI 3 "register_operand" "a,a,a,a")
18430 (match_operand:V16QI 4 "nonimmediate_operand" "x,m,x,m")
18431 (match_operand:SI 5 "register_operand" "d,d,d,d")
18432 (match_operand:SI 6 "const_0_to_255_operand" "n,n,n,n")]
18433 UNSPEC_PCMPESTR))
18434 (clobber (match_scratch:V16QI 0 "=Yz,Yz,X,X"))
18435 (clobber (match_scratch:SI 1 "= X, X,c,c"))]
18436 "TARGET_SSE4_2"
18437 "@
18438 %vpcmpestrm\t{%6, %4, %2|%2, %4, %6}
18439 %vpcmpestrm\t{%6, %4, %2|%2, %4, %6}
18440 %vpcmpestri\t{%6, %4, %2|%2, %4, %6}
18441 %vpcmpestri\t{%6, %4, %2|%2, %4, %6}"
18442 [(set_attr "type" "sselog")
18443 (set_attr "prefix_data16" "1")
18444 (set_attr "prefix_extra" "1")
18445 (set_attr "length_immediate" "1")
18446 (set_attr "memory" "none,load,none,load")
18447 (set_attr "btver2_decode" "vector,vector,vector,vector")
18448 (set_attr "prefix" "maybe_vex")
18449 (set_attr "mode" "TI")])
18450
18451 (define_insn_and_split "sse4_2_pcmpistr"
18452 [(set (match_operand:SI 0 "register_operand" "=c,c")
18453 (unspec:SI
18454 [(match_operand:V16QI 2 "register_operand" "x,x")
18455 (match_operand:V16QI 3 "nonimmediate_operand" "x,m")
18456 (match_operand:SI 4 "const_0_to_255_operand" "n,n")]
18457 UNSPEC_PCMPISTR))
18458 (set (match_operand:V16QI 1 "register_operand" "=Yz,Yz")
18459 (unspec:V16QI
18460 [(match_dup 2)
18461 (match_dup 3)
18462 (match_dup 4)]
18463 UNSPEC_PCMPISTR))
18464 (set (reg:CC FLAGS_REG)
18465 (unspec:CC
18466 [(match_dup 2)
18467 (match_dup 3)
18468 (match_dup 4)]
18469 UNSPEC_PCMPISTR))]
18470 "TARGET_SSE4_2
18471 && ix86_pre_reload_split ()"
18472 "#"
18473 "&& 1"
18474 [(const_int 0)]
18475 {
18476 int ecx = !find_regno_note (curr_insn, REG_UNUSED, REGNO (operands[0]));
18477 int xmm0 = !find_regno_note (curr_insn, REG_UNUSED, REGNO (operands[1]));
18478 int flags = !find_regno_note (curr_insn, REG_UNUSED, FLAGS_REG);
18479
18480 if (ecx)
18481 emit_insn (gen_sse4_2_pcmpistri (operands[0], operands[2],
18482 operands[3], operands[4]));
18483 if (xmm0)
18484 emit_insn (gen_sse4_2_pcmpistrm (operands[1], operands[2],
18485 operands[3], operands[4]));
18486 if (flags && !(ecx || xmm0))
18487 emit_insn (gen_sse4_2_pcmpistr_cconly (NULL, NULL,
18488 operands[2], operands[3],
18489 operands[4]));
18490 if (!(flags || ecx || xmm0))
18491 emit_note (NOTE_INSN_DELETED);
18492
18493 DONE;
18494 }
18495 [(set_attr "type" "sselog")
18496 (set_attr "prefix_data16" "1")
18497 (set_attr "prefix_extra" "1")
18498 (set_attr "length_immediate" "1")
18499 (set_attr "memory" "none,load")
18500 (set_attr "mode" "TI")])
18501
18502 (define_insn "sse4_2_pcmpistri"
18503 [(set (match_operand:SI 0 "register_operand" "=c,c")
18504 (unspec:SI
18505 [(match_operand:V16QI 1 "register_operand" "x,x")
18506 (match_operand:V16QI 2 "nonimmediate_operand" "x,m")
18507 (match_operand:SI 3 "const_0_to_255_operand" "n,n")]
18508 UNSPEC_PCMPISTR))
18509 (set (reg:CC FLAGS_REG)
18510 (unspec:CC
18511 [(match_dup 1)
18512 (match_dup 2)
18513 (match_dup 3)]
18514 UNSPEC_PCMPISTR))]
18515 "TARGET_SSE4_2"
18516 "%vpcmpistri\t{%3, %2, %1|%1, %2, %3}"
18517 [(set_attr "type" "sselog")
18518 (set_attr "prefix_data16" "1")
18519 (set_attr "prefix_extra" "1")
18520 (set_attr "length_immediate" "1")
18521 (set_attr "prefix" "maybe_vex")
18522 (set_attr "memory" "none,load")
18523 (set_attr "btver2_decode" "vector")
18524 (set_attr "mode" "TI")])
18525
18526 (define_insn "sse4_2_pcmpistrm"
18527 [(set (match_operand:V16QI 0 "register_operand" "=Yz,Yz")
18528 (unspec:V16QI
18529 [(match_operand:V16QI 1 "register_operand" "x,x")
18530 (match_operand:V16QI 2 "nonimmediate_operand" "x,m")
18531 (match_operand:SI 3 "const_0_to_255_operand" "n,n")]
18532 UNSPEC_PCMPISTR))
18533 (set (reg:CC FLAGS_REG)
18534 (unspec:CC
18535 [(match_dup 1)
18536 (match_dup 2)
18537 (match_dup 3)]
18538 UNSPEC_PCMPISTR))]
18539 "TARGET_SSE4_2"
18540 "%vpcmpistrm\t{%3, %2, %1|%1, %2, %3}"
18541 [(set_attr "type" "sselog")
18542 (set_attr "prefix_data16" "1")
18543 (set_attr "prefix_extra" "1")
18544 (set_attr "length_immediate" "1")
18545 (set_attr "prefix" "maybe_vex")
18546 (set_attr "memory" "none,load")
18547 (set_attr "btver2_decode" "vector")
18548 (set_attr "mode" "TI")])
18549
18550 (define_insn "sse4_2_pcmpistr_cconly"
18551 [(set (reg:CC FLAGS_REG)
18552 (unspec:CC
18553 [(match_operand:V16QI 2 "register_operand" "x,x,x,x")
18554 (match_operand:V16QI 3 "nonimmediate_operand" "x,m,x,m")
18555 (match_operand:SI 4 "const_0_to_255_operand" "n,n,n,n")]
18556 UNSPEC_PCMPISTR))
18557 (clobber (match_scratch:V16QI 0 "=Yz,Yz,X,X"))
18558 (clobber (match_scratch:SI 1 "= X, X,c,c"))]
18559 "TARGET_SSE4_2"
18560 "@
18561 %vpcmpistrm\t{%4, %3, %2|%2, %3, %4}
18562 %vpcmpistrm\t{%4, %3, %2|%2, %3, %4}
18563 %vpcmpistri\t{%4, %3, %2|%2, %3, %4}
18564 %vpcmpistri\t{%4, %3, %2|%2, %3, %4}"
18565 [(set_attr "type" "sselog")
18566 (set_attr "prefix_data16" "1")
18567 (set_attr "prefix_extra" "1")
18568 (set_attr "length_immediate" "1")
18569 (set_attr "memory" "none,load,none,load")
18570 (set_attr "prefix" "maybe_vex")
18571 (set_attr "btver2_decode" "vector,vector,vector,vector")
18572 (set_attr "mode" "TI")])
18573
18574 ;; Packed float variants
18575 (define_mode_attr GATHER_SCATTER_SF_MEM_MODE
18576 [(V8DI "V8SF") (V16SI "V16SF")])
18577
18578 (define_expand "avx512pf_gatherpf<mode>sf"
18579 [(unspec
18580 [(match_operand:<avx512fmaskmode> 0 "register_operand")
18581 (mem:<GATHER_SCATTER_SF_MEM_MODE>
18582 (match_par_dup 5
18583 [(match_operand 2 "vsib_address_operand")
18584 (match_operand:VI48_512 1 "register_operand")
18585 (match_operand:SI 3 "const1248_operand")]))
18586 (match_operand:SI 4 "const_2_to_3_operand")]
18587 UNSPEC_GATHER_PREFETCH)]
18588 "TARGET_AVX512PF"
18589 {
18590 operands[5]
18591 = gen_rtx_UNSPEC (Pmode, gen_rtvec (3, operands[2], operands[1],
18592 operands[3]), UNSPEC_VSIBADDR);
18593 })
18594
18595 (define_insn "*avx512pf_gatherpf<VI48_512:mode>sf_mask"
18596 [(unspec
18597 [(match_operand:<avx512fmaskmode> 0 "register_operand" "Yk")
18598 (match_operator:<GATHER_SCATTER_SF_MEM_MODE> 5 "vsib_mem_operator"
18599 [(unspec:P
18600 [(match_operand:P 2 "vsib_address_operand" "Tv")
18601 (match_operand:VI48_512 1 "register_operand" "v")
18602 (match_operand:SI 3 "const1248_operand" "n")]
18603 UNSPEC_VSIBADDR)])
18604 (match_operand:SI 4 "const_2_to_3_operand" "n")]
18605 UNSPEC_GATHER_PREFETCH)]
18606 "TARGET_AVX512PF"
18607 {
18608 switch (INTVAL (operands[4]))
18609 {
18610 case 3:
18611 /* %X5 so that we don't emit any *WORD PTR for -masm=intel, as
18612 gas changed what it requires incompatibly. */
18613 return "%M2vgatherpf0<ssemodesuffix>ps\t{%5%{%0%}|%X5%{%0%}}";
18614 case 2:
18615 return "%M2vgatherpf1<ssemodesuffix>ps\t{%5%{%0%}|%X5%{%0%}}";
18616 default:
18617 gcc_unreachable ();
18618 }
18619 }
18620 [(set_attr "type" "sse")
18621 (set_attr "prefix" "evex")
18622 (set_attr "mode" "XI")])
18623
18624 ;; Packed double variants
18625 (define_expand "avx512pf_gatherpf<mode>df"
18626 [(unspec
18627 [(match_operand:<avx512fmaskmode> 0 "register_operand")
18628 (mem:V8DF
18629 (match_par_dup 5
18630 [(match_operand 2 "vsib_address_operand")
18631 (match_operand:VI4_256_8_512 1 "register_operand")
18632 (match_operand:SI 3 "const1248_operand")]))
18633 (match_operand:SI 4 "const_2_to_3_operand")]
18634 UNSPEC_GATHER_PREFETCH)]
18635 "TARGET_AVX512PF"
18636 {
18637 operands[5]
18638 = gen_rtx_UNSPEC (Pmode, gen_rtvec (3, operands[2], operands[1],
18639 operands[3]), UNSPEC_VSIBADDR);
18640 })
18641
18642 (define_insn "*avx512pf_gatherpf<VI4_256_8_512:mode>df_mask"
18643 [(unspec
18644 [(match_operand:<avx512fmaskmode> 0 "register_operand" "Yk")
18645 (match_operator:V8DF 5 "vsib_mem_operator"
18646 [(unspec:P
18647 [(match_operand:P 2 "vsib_address_operand" "Tv")
18648 (match_operand:VI4_256_8_512 1 "register_operand" "v")
18649 (match_operand:SI 3 "const1248_operand" "n")]
18650 UNSPEC_VSIBADDR)])
18651 (match_operand:SI 4 "const_2_to_3_operand" "n")]
18652 UNSPEC_GATHER_PREFETCH)]
18653 "TARGET_AVX512PF"
18654 {
18655 switch (INTVAL (operands[4]))
18656 {
18657 case 3:
18658 /* %X5 so that we don't emit any *WORD PTR for -masm=intel, as
18659 gas changed what it requires incompatibly. */
18660 return "%M2vgatherpf0<ssemodesuffix>pd\t{%5%{%0%}|%X5%{%0%}}";
18661 case 2:
18662 return "%M2vgatherpf1<ssemodesuffix>pd\t{%5%{%0%}|%X5%{%0%}}";
18663 default:
18664 gcc_unreachable ();
18665 }
18666 }
18667 [(set_attr "type" "sse")
18668 (set_attr "prefix" "evex")
18669 (set_attr "mode" "XI")])
18670
18671 ;; Packed float variants
18672 (define_expand "avx512pf_scatterpf<mode>sf"
18673 [(unspec
18674 [(match_operand:<avx512fmaskmode> 0 "register_operand")
18675 (mem:<GATHER_SCATTER_SF_MEM_MODE>
18676 (match_par_dup 5
18677 [(match_operand 2 "vsib_address_operand")
18678 (match_operand:VI48_512 1 "register_operand")
18679 (match_operand:SI 3 "const1248_operand")]))
18680 (match_operand:SI 4 "const2367_operand")]
18681 UNSPEC_SCATTER_PREFETCH)]
18682 "TARGET_AVX512PF"
18683 {
18684 operands[5]
18685 = gen_rtx_UNSPEC (Pmode, gen_rtvec (3, operands[2], operands[1],
18686 operands[3]), UNSPEC_VSIBADDR);
18687 })
18688
18689 (define_insn "*avx512pf_scatterpf<VI48_512:mode>sf_mask"
18690 [(unspec
18691 [(match_operand:<avx512fmaskmode> 0 "register_operand" "Yk")
18692 (match_operator:<GATHER_SCATTER_SF_MEM_MODE> 5 "vsib_mem_operator"
18693 [(unspec:P
18694 [(match_operand:P 2 "vsib_address_operand" "Tv")
18695 (match_operand:VI48_512 1 "register_operand" "v")
18696 (match_operand:SI 3 "const1248_operand" "n")]
18697 UNSPEC_VSIBADDR)])
18698 (match_operand:SI 4 "const2367_operand" "n")]
18699 UNSPEC_SCATTER_PREFETCH)]
18700 "TARGET_AVX512PF"
18701 {
18702 switch (INTVAL (operands[4]))
18703 {
18704 case 3:
18705 case 7:
18706 /* %X5 so that we don't emit any *WORD PTR for -masm=intel, as
18707 gas changed what it requires incompatibly. */
18708 return "%M2vscatterpf0<ssemodesuffix>ps\t{%5%{%0%}|%X5%{%0%}}";
18709 case 2:
18710 case 6:
18711 return "%M2vscatterpf1<ssemodesuffix>ps\t{%5%{%0%}|%X5%{%0%}}";
18712 default:
18713 gcc_unreachable ();
18714 }
18715 }
18716 [(set_attr "type" "sse")
18717 (set_attr "prefix" "evex")
18718 (set_attr "mode" "XI")])
18719
18720 ;; Packed double variants
18721 (define_expand "avx512pf_scatterpf<mode>df"
18722 [(unspec
18723 [(match_operand:<avx512fmaskmode> 0 "register_operand")
18724 (mem:V8DF
18725 (match_par_dup 5
18726 [(match_operand 2 "vsib_address_operand")
18727 (match_operand:VI4_256_8_512 1 "register_operand")
18728 (match_operand:SI 3 "const1248_operand")]))
18729 (match_operand:SI 4 "const2367_operand")]
18730 UNSPEC_SCATTER_PREFETCH)]
18731 "TARGET_AVX512PF"
18732 {
18733 operands[5]
18734 = gen_rtx_UNSPEC (Pmode, gen_rtvec (3, operands[2], operands[1],
18735 operands[3]), UNSPEC_VSIBADDR);
18736 })
18737
18738 (define_insn "*avx512pf_scatterpf<VI4_256_8_512:mode>df_mask"
18739 [(unspec
18740 [(match_operand:<avx512fmaskmode> 0 "register_operand" "Yk")
18741 (match_operator:V8DF 5 "vsib_mem_operator"
18742 [(unspec:P
18743 [(match_operand:P 2 "vsib_address_operand" "Tv")
18744 (match_operand:VI4_256_8_512 1 "register_operand" "v")
18745 (match_operand:SI 3 "const1248_operand" "n")]
18746 UNSPEC_VSIBADDR)])
18747 (match_operand:SI 4 "const2367_operand" "n")]
18748 UNSPEC_SCATTER_PREFETCH)]
18749 "TARGET_AVX512PF"
18750 {
18751 switch (INTVAL (operands[4]))
18752 {
18753 case 3:
18754 case 7:
18755 /* %X5 so that we don't emit any *WORD PTR for -masm=intel, as
18756 gas changed what it requires incompatibly. */
18757 return "%M2vscatterpf0<ssemodesuffix>pd\t{%5%{%0%}|%X5%{%0%}}";
18758 case 2:
18759 case 6:
18760 return "%M2vscatterpf1<ssemodesuffix>pd\t{%5%{%0%}|%X5%{%0%}}";
18761 default:
18762 gcc_unreachable ();
18763 }
18764 }
18765 [(set_attr "type" "sse")
18766 (set_attr "prefix" "evex")
18767 (set_attr "mode" "XI")])
18768
18769 (define_insn "avx512er_exp2<mode><mask_name><round_saeonly_name>"
18770 [(set (match_operand:VF_512 0 "register_operand" "=v")
18771 (unspec:VF_512
18772 [(match_operand:VF_512 1 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")]
18773 UNSPEC_EXP2))]
18774 "TARGET_AVX512ER"
18775 "vexp2<ssemodesuffix>\t{<round_saeonly_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_saeonly_mask_op2>}"
18776 [(set_attr "prefix" "evex")
18777 (set_attr "type" "sse")
18778 (set_attr "mode" "<MODE>")])
18779
18780 (define_insn "<mask_codefor>avx512er_rcp28<mode><mask_name><round_saeonly_name>"
18781 [(set (match_operand:VF_512 0 "register_operand" "=v")
18782 (unspec:VF_512
18783 [(match_operand:VF_512 1 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")]
18784 UNSPEC_RCP28))]
18785 "TARGET_AVX512ER"
18786 "vrcp28<ssemodesuffix>\t{<round_saeonly_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_saeonly_mask_op2>}"
18787 [(set_attr "prefix" "evex")
18788 (set_attr "type" "sse")
18789 (set_attr "mode" "<MODE>")])
18790
18791 (define_insn "avx512er_vmrcp28<mode><round_saeonly_name>"
18792 [(set (match_operand:VF_128 0 "register_operand" "=v")
18793 (vec_merge:VF_128
18794 (unspec:VF_128
18795 [(match_operand:VF_128 1 "<round_saeonly_nimm_scalar_predicate>" "<round_saeonly_constraint>")]
18796 UNSPEC_RCP28)
18797 (match_operand:VF_128 2 "register_operand" "v")
18798 (const_int 1)))]
18799 "TARGET_AVX512ER"
18800 "vrcp28<ssescalarmodesuffix>\t{<round_saeonly_op3>%1, %2, %0|%0, %2, %<iptr>1<round_saeonly_op3>}"
18801 [(set_attr "length_immediate" "1")
18802 (set_attr "prefix" "evex")
18803 (set_attr "type" "sse")
18804 (set_attr "mode" "<MODE>")])
18805
18806 (define_insn "<mask_codefor>avx512er_rsqrt28<mode><mask_name><round_saeonly_name>"
18807 [(set (match_operand:VF_512 0 "register_operand" "=v")
18808 (unspec:VF_512
18809 [(match_operand:VF_512 1 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")]
18810 UNSPEC_RSQRT28))]
18811 "TARGET_AVX512ER"
18812 "vrsqrt28<ssemodesuffix>\t{<round_saeonly_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_saeonly_mask_op2>}"
18813 [(set_attr "prefix" "evex")
18814 (set_attr "type" "sse")
18815 (set_attr "mode" "<MODE>")])
18816
18817 (define_insn "avx512er_vmrsqrt28<mode><round_saeonly_name>"
18818 [(set (match_operand:VF_128 0 "register_operand" "=v")
18819 (vec_merge:VF_128
18820 (unspec:VF_128
18821 [(match_operand:VF_128 1 "<round_saeonly_nimm_scalar_predicate>" "<round_saeonly_constraint>")]
18822 UNSPEC_RSQRT28)
18823 (match_operand:VF_128 2 "register_operand" "v")
18824 (const_int 1)))]
18825 "TARGET_AVX512ER"
18826 "vrsqrt28<ssescalarmodesuffix>\t{<round_saeonly_op3>%1, %2, %0|%0, %2, %<iptr>1<round_saeonly_op3>}"
18827 [(set_attr "length_immediate" "1")
18828 (set_attr "type" "sse")
18829 (set_attr "prefix" "evex")
18830 (set_attr "mode" "<MODE>")])
18831
18832 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
18833 ;;
18834 ;; XOP instructions
18835 ;;
18836 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
18837
18838 (define_code_iterator xop_plus [plus ss_plus])
18839
18840 (define_code_attr macs [(plus "macs") (ss_plus "macss")])
18841 (define_code_attr madcs [(plus "madcs") (ss_plus "madcss")])
18842
18843 ;; XOP parallel integer multiply/add instructions.
18844
18845 (define_insn "xop_p<macs><ssemodesuffix><ssemodesuffix>"
18846 [(set (match_operand:VI24_128 0 "register_operand" "=x")
18847 (xop_plus:VI24_128
18848 (mult:VI24_128
18849 (match_operand:VI24_128 1 "nonimmediate_operand" "%x")
18850 (match_operand:VI24_128 2 "nonimmediate_operand" "xm"))
18851 (match_operand:VI24_128 3 "register_operand" "x")))]
18852 "TARGET_XOP"
18853 "vp<macs><ssemodesuffix><ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
18854 [(set_attr "type" "ssemuladd")
18855 (set_attr "mode" "TI")])
18856
18857 (define_insn "xop_p<macs>dql"
18858 [(set (match_operand:V2DI 0 "register_operand" "=x")
18859 (xop_plus:V2DI
18860 (mult:V2DI
18861 (sign_extend:V2DI
18862 (vec_select:V2SI
18863 (match_operand:V4SI 1 "nonimmediate_operand" "%x")
18864 (parallel [(const_int 0) (const_int 2)])))
18865 (sign_extend:V2DI
18866 (vec_select:V2SI
18867 (match_operand:V4SI 2 "nonimmediate_operand" "xm")
18868 (parallel [(const_int 0) (const_int 2)]))))
18869 (match_operand:V2DI 3 "register_operand" "x")))]
18870 "TARGET_XOP"
18871 "vp<macs>dql\t{%3, %2, %1, %0|%0, %1, %2, %3}"
18872 [(set_attr "type" "ssemuladd")
18873 (set_attr "mode" "TI")])
18874
18875 (define_insn "xop_p<macs>dqh"
18876 [(set (match_operand:V2DI 0 "register_operand" "=x")
18877 (xop_plus:V2DI
18878 (mult:V2DI
18879 (sign_extend:V2DI
18880 (vec_select:V2SI
18881 (match_operand:V4SI 1 "nonimmediate_operand" "%x")
18882 (parallel [(const_int 1) (const_int 3)])))
18883 (sign_extend:V2DI
18884 (vec_select:V2SI
18885 (match_operand:V4SI 2 "nonimmediate_operand" "xm")
18886 (parallel [(const_int 1) (const_int 3)]))))
18887 (match_operand:V2DI 3 "register_operand" "x")))]
18888 "TARGET_XOP"
18889 "vp<macs>dqh\t{%3, %2, %1, %0|%0, %1, %2, %3}"
18890 [(set_attr "type" "ssemuladd")
18891 (set_attr "mode" "TI")])
18892
18893 ;; XOP parallel integer multiply/add instructions for the intrinisics
18894 (define_insn "xop_p<macs>wd"
18895 [(set (match_operand:V4SI 0 "register_operand" "=x")
18896 (xop_plus:V4SI
18897 (mult:V4SI
18898 (sign_extend:V4SI
18899 (vec_select:V4HI
18900 (match_operand:V8HI 1 "nonimmediate_operand" "%x")
18901 (parallel [(const_int 1) (const_int 3)
18902 (const_int 5) (const_int 7)])))
18903 (sign_extend:V4SI
18904 (vec_select:V4HI
18905 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
18906 (parallel [(const_int 1) (const_int 3)
18907 (const_int 5) (const_int 7)]))))
18908 (match_operand:V4SI 3 "register_operand" "x")))]
18909 "TARGET_XOP"
18910 "vp<macs>wd\t{%3, %2, %1, %0|%0, %1, %2, %3}"
18911 [(set_attr "type" "ssemuladd")
18912 (set_attr "mode" "TI")])
18913
18914 (define_insn "xop_p<madcs>wd"
18915 [(set (match_operand:V4SI 0 "register_operand" "=x")
18916 (xop_plus:V4SI
18917 (plus:V4SI
18918 (mult:V4SI
18919 (sign_extend:V4SI
18920 (vec_select:V4HI
18921 (match_operand:V8HI 1 "nonimmediate_operand" "%x")
18922 (parallel [(const_int 0) (const_int 2)
18923 (const_int 4) (const_int 6)])))
18924 (sign_extend:V4SI
18925 (vec_select:V4HI
18926 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
18927 (parallel [(const_int 0) (const_int 2)
18928 (const_int 4) (const_int 6)]))))
18929 (mult:V4SI
18930 (sign_extend:V4SI
18931 (vec_select:V4HI
18932 (match_dup 1)
18933 (parallel [(const_int 1) (const_int 3)
18934 (const_int 5) (const_int 7)])))
18935 (sign_extend:V4SI
18936 (vec_select:V4HI
18937 (match_dup 2)
18938 (parallel [(const_int 1) (const_int 3)
18939 (const_int 5) (const_int 7)])))))
18940 (match_operand:V4SI 3 "register_operand" "x")))]
18941 "TARGET_XOP"
18942 "vp<madcs>wd\t{%3, %2, %1, %0|%0, %1, %2, %3}"
18943 [(set_attr "type" "ssemuladd")
18944 (set_attr "mode" "TI")])
18945
18946 ;; XOP parallel XMM conditional moves
18947 (define_insn "xop_pcmov_<mode><avxsizesuffix>"
18948 [(set (match_operand:V_128_256 0 "register_operand" "=x,x")
18949 (if_then_else:V_128_256
18950 (match_operand:V_128_256 3 "nonimmediate_operand" "x,m")
18951 (match_operand:V_128_256 1 "register_operand" "x,x")
18952 (match_operand:V_128_256 2 "nonimmediate_operand" "xm,x")))]
18953 "TARGET_XOP"
18954 "vpcmov\t{%3, %2, %1, %0|%0, %1, %2, %3}"
18955 [(set_attr "type" "sse4arg")])
18956
18957 ;; XOP horizontal add/subtract instructions
18958 (define_insn "xop_phadd<u>bw"
18959 [(set (match_operand:V8HI 0 "register_operand" "=x")
18960 (plus:V8HI
18961 (any_extend:V8HI
18962 (vec_select:V8QI
18963 (match_operand:V16QI 1 "nonimmediate_operand" "xm")
18964 (parallel [(const_int 0) (const_int 2)
18965 (const_int 4) (const_int 6)
18966 (const_int 8) (const_int 10)
18967 (const_int 12) (const_int 14)])))
18968 (any_extend:V8HI
18969 (vec_select:V8QI
18970 (match_dup 1)
18971 (parallel [(const_int 1) (const_int 3)
18972 (const_int 5) (const_int 7)
18973 (const_int 9) (const_int 11)
18974 (const_int 13) (const_int 15)])))))]
18975 "TARGET_XOP"
18976 "vphadd<u>bw\t{%1, %0|%0, %1}"
18977 [(set_attr "type" "sseiadd1")])
18978
18979 (define_insn "xop_phadd<u>bd"
18980 [(set (match_operand:V4SI 0 "register_operand" "=x")
18981 (plus:V4SI
18982 (plus:V4SI
18983 (any_extend:V4SI
18984 (vec_select:V4QI
18985 (match_operand:V16QI 1 "nonimmediate_operand" "xm")
18986 (parallel [(const_int 0) (const_int 4)
18987 (const_int 8) (const_int 12)])))
18988 (any_extend:V4SI
18989 (vec_select:V4QI
18990 (match_dup 1)
18991 (parallel [(const_int 1) (const_int 5)
18992 (const_int 9) (const_int 13)]))))
18993 (plus:V4SI
18994 (any_extend:V4SI
18995 (vec_select:V4QI
18996 (match_dup 1)
18997 (parallel [(const_int 2) (const_int 6)
18998 (const_int 10) (const_int 14)])))
18999 (any_extend:V4SI
19000 (vec_select:V4QI
19001 (match_dup 1)
19002 (parallel [(const_int 3) (const_int 7)
19003 (const_int 11) (const_int 15)]))))))]
19004 "TARGET_XOP"
19005 "vphadd<u>bd\t{%1, %0|%0, %1}"
19006 [(set_attr "type" "sseiadd1")])
19007
19008 (define_insn "xop_phadd<u>bq"
19009 [(set (match_operand:V2DI 0 "register_operand" "=x")
19010 (plus:V2DI
19011 (plus:V2DI
19012 (plus:V2DI
19013 (any_extend:V2DI
19014 (vec_select:V2QI
19015 (match_operand:V16QI 1 "nonimmediate_operand" "xm")
19016 (parallel [(const_int 0) (const_int 8)])))
19017 (any_extend:V2DI
19018 (vec_select:V2QI
19019 (match_dup 1)
19020 (parallel [(const_int 1) (const_int 9)]))))
19021 (plus:V2DI
19022 (any_extend:V2DI
19023 (vec_select:V2QI
19024 (match_dup 1)
19025 (parallel [(const_int 2) (const_int 10)])))
19026 (any_extend:V2DI
19027 (vec_select:V2QI
19028 (match_dup 1)
19029 (parallel [(const_int 3) (const_int 11)])))))
19030 (plus:V2DI
19031 (plus:V2DI
19032 (any_extend:V2DI
19033 (vec_select:V2QI
19034 (match_dup 1)
19035 (parallel [(const_int 4) (const_int 12)])))
19036 (any_extend:V2DI
19037 (vec_select:V2QI
19038 (match_dup 1)
19039 (parallel [(const_int 5) (const_int 13)]))))
19040 (plus:V2DI
19041 (any_extend:V2DI
19042 (vec_select:V2QI
19043 (match_dup 1)
19044 (parallel [(const_int 6) (const_int 14)])))
19045 (any_extend:V2DI
19046 (vec_select:V2QI
19047 (match_dup 1)
19048 (parallel [(const_int 7) (const_int 15)])))))))]
19049 "TARGET_XOP"
19050 "vphadd<u>bq\t{%1, %0|%0, %1}"
19051 [(set_attr "type" "sseiadd1")])
19052
19053 (define_insn "xop_phadd<u>wd"
19054 [(set (match_operand:V4SI 0 "register_operand" "=x")
19055 (plus:V4SI
19056 (any_extend:V4SI
19057 (vec_select:V4HI
19058 (match_operand:V8HI 1 "nonimmediate_operand" "xm")
19059 (parallel [(const_int 0) (const_int 2)
19060 (const_int 4) (const_int 6)])))
19061 (any_extend:V4SI
19062 (vec_select:V4HI
19063 (match_dup 1)
19064 (parallel [(const_int 1) (const_int 3)
19065 (const_int 5) (const_int 7)])))))]
19066 "TARGET_XOP"
19067 "vphadd<u>wd\t{%1, %0|%0, %1}"
19068 [(set_attr "type" "sseiadd1")])
19069
19070 (define_insn "xop_phadd<u>wq"
19071 [(set (match_operand:V2DI 0 "register_operand" "=x")
19072 (plus:V2DI
19073 (plus:V2DI
19074 (any_extend:V2DI
19075 (vec_select:V2HI
19076 (match_operand:V8HI 1 "nonimmediate_operand" "xm")
19077 (parallel [(const_int 0) (const_int 4)])))
19078 (any_extend:V2DI
19079 (vec_select:V2HI
19080 (match_dup 1)
19081 (parallel [(const_int 1) (const_int 5)]))))
19082 (plus:V2DI
19083 (any_extend:V2DI
19084 (vec_select:V2HI
19085 (match_dup 1)
19086 (parallel [(const_int 2) (const_int 6)])))
19087 (any_extend:V2DI
19088 (vec_select:V2HI
19089 (match_dup 1)
19090 (parallel [(const_int 3) (const_int 7)]))))))]
19091 "TARGET_XOP"
19092 "vphadd<u>wq\t{%1, %0|%0, %1}"
19093 [(set_attr "type" "sseiadd1")])
19094
19095 (define_insn "xop_phadd<u>dq"
19096 [(set (match_operand:V2DI 0 "register_operand" "=x")
19097 (plus:V2DI
19098 (any_extend:V2DI
19099 (vec_select:V2SI
19100 (match_operand:V4SI 1 "nonimmediate_operand" "xm")
19101 (parallel [(const_int 0) (const_int 2)])))
19102 (any_extend:V2DI
19103 (vec_select:V2SI
19104 (match_dup 1)
19105 (parallel [(const_int 1) (const_int 3)])))))]
19106 "TARGET_XOP"
19107 "vphadd<u>dq\t{%1, %0|%0, %1}"
19108 [(set_attr "type" "sseiadd1")])
19109
19110 (define_insn "xop_phsubbw"
19111 [(set (match_operand:V8HI 0 "register_operand" "=x")
19112 (minus:V8HI
19113 (sign_extend:V8HI
19114 (vec_select:V8QI
19115 (match_operand:V16QI 1 "nonimmediate_operand" "xm")
19116 (parallel [(const_int 0) (const_int 2)
19117 (const_int 4) (const_int 6)
19118 (const_int 8) (const_int 10)
19119 (const_int 12) (const_int 14)])))
19120 (sign_extend:V8HI
19121 (vec_select:V8QI
19122 (match_dup 1)
19123 (parallel [(const_int 1) (const_int 3)
19124 (const_int 5) (const_int 7)
19125 (const_int 9) (const_int 11)
19126 (const_int 13) (const_int 15)])))))]
19127 "TARGET_XOP"
19128 "vphsubbw\t{%1, %0|%0, %1}"
19129 [(set_attr "type" "sseiadd1")])
19130
19131 (define_insn "xop_phsubwd"
19132 [(set (match_operand:V4SI 0 "register_operand" "=x")
19133 (minus:V4SI
19134 (sign_extend:V4SI
19135 (vec_select:V4HI
19136 (match_operand:V8HI 1 "nonimmediate_operand" "xm")
19137 (parallel [(const_int 0) (const_int 2)
19138 (const_int 4) (const_int 6)])))
19139 (sign_extend:V4SI
19140 (vec_select:V4HI
19141 (match_dup 1)
19142 (parallel [(const_int 1) (const_int 3)
19143 (const_int 5) (const_int 7)])))))]
19144 "TARGET_XOP"
19145 "vphsubwd\t{%1, %0|%0, %1}"
19146 [(set_attr "type" "sseiadd1")])
19147
19148 (define_insn "xop_phsubdq"
19149 [(set (match_operand:V2DI 0 "register_operand" "=x")
19150 (minus:V2DI
19151 (sign_extend:V2DI
19152 (vec_select:V2SI
19153 (match_operand:V4SI 1 "nonimmediate_operand" "xm")
19154 (parallel [(const_int 0) (const_int 2)])))
19155 (sign_extend:V2DI
19156 (vec_select:V2SI
19157 (match_dup 1)
19158 (parallel [(const_int 1) (const_int 3)])))))]
19159 "TARGET_XOP"
19160 "vphsubdq\t{%1, %0|%0, %1}"
19161 [(set_attr "type" "sseiadd1")])
19162
19163 ;; XOP permute instructions
19164 (define_insn "xop_pperm"
19165 [(set (match_operand:V16QI 0 "register_operand" "=x,x")
19166 (unspec:V16QI
19167 [(match_operand:V16QI 1 "register_operand" "x,x")
19168 (match_operand:V16QI 2 "nonimmediate_operand" "x,m")
19169 (match_operand:V16QI 3 "nonimmediate_operand" "xm,x")]
19170 UNSPEC_XOP_PERMUTE))]
19171 "TARGET_XOP && !(MEM_P (operands[2]) && MEM_P (operands[3]))"
19172 "vpperm\t{%3, %2, %1, %0|%0, %1, %2, %3}"
19173 [(set_attr "type" "sse4arg")
19174 (set_attr "mode" "TI")])
19175
19176 ;; XOP pack instructions that combine two vectors into a smaller vector
19177 (define_insn "xop_pperm_pack_v2di_v4si"
19178 [(set (match_operand:V4SI 0 "register_operand" "=x,x")
19179 (vec_concat:V4SI
19180 (truncate:V2SI
19181 (match_operand:V2DI 1 "register_operand" "x,x"))
19182 (truncate:V2SI
19183 (match_operand:V2DI 2 "nonimmediate_operand" "x,m"))))
19184 (use (match_operand:V16QI 3 "nonimmediate_operand" "xm,x"))]
19185 "TARGET_XOP && !(MEM_P (operands[2]) && MEM_P (operands[3]))"
19186 "vpperm\t{%3, %2, %1, %0|%0, %1, %2, %3}"
19187 [(set_attr "type" "sse4arg")
19188 (set_attr "mode" "TI")])
19189
19190 (define_insn "xop_pperm_pack_v4si_v8hi"
19191 [(set (match_operand:V8HI 0 "register_operand" "=x,x")
19192 (vec_concat:V8HI
19193 (truncate:V4HI
19194 (match_operand:V4SI 1 "register_operand" "x,x"))
19195 (truncate:V4HI
19196 (match_operand:V4SI 2 "nonimmediate_operand" "x,m"))))
19197 (use (match_operand:V16QI 3 "nonimmediate_operand" "xm,x"))]
19198 "TARGET_XOP && !(MEM_P (operands[2]) && MEM_P (operands[3]))"
19199 "vpperm\t{%3, %2, %1, %0|%0, %1, %2, %3}"
19200 [(set_attr "type" "sse4arg")
19201 (set_attr "mode" "TI")])
19202
19203 (define_insn "xop_pperm_pack_v8hi_v16qi"
19204 [(set (match_operand:V16QI 0 "register_operand" "=x,x")
19205 (vec_concat:V16QI
19206 (truncate:V8QI
19207 (match_operand:V8HI 1 "register_operand" "x,x"))
19208 (truncate:V8QI
19209 (match_operand:V8HI 2 "nonimmediate_operand" "x,m"))))
19210 (use (match_operand:V16QI 3 "nonimmediate_operand" "xm,x"))]
19211 "TARGET_XOP && !(MEM_P (operands[2]) && MEM_P (operands[3]))"
19212 "vpperm\t{%3, %2, %1, %0|%0, %1, %2, %3}"
19213 [(set_attr "type" "sse4arg")
19214 (set_attr "mode" "TI")])
19215
19216 ;; XOP packed rotate instructions
19217 (define_expand "rotl<mode>3"
19218 [(set (match_operand:VI_128 0 "register_operand")
19219 (rotate:VI_128
19220 (match_operand:VI_128 1 "nonimmediate_operand")
19221 (match_operand:SI 2 "general_operand")))]
19222 "TARGET_XOP"
19223 {
19224 /* If we were given a scalar, convert it to parallel */
19225 if (! const_0_to_<sserotatemax>_operand (operands[2], SImode))
19226 {
19227 rtvec vs = rtvec_alloc (<ssescalarnum>);
19228 rtx par = gen_rtx_PARALLEL (<MODE>mode, vs);
19229 rtx reg = gen_reg_rtx (<MODE>mode);
19230 rtx op2 = operands[2];
19231 int i;
19232
19233 if (GET_MODE (op2) != <ssescalarmode>mode)
19234 {
19235 op2 = gen_reg_rtx (<ssescalarmode>mode);
19236 convert_move (op2, operands[2], false);
19237 }
19238
19239 for (i = 0; i < <ssescalarnum>; i++)
19240 RTVEC_ELT (vs, i) = op2;
19241
19242 emit_insn (gen_vec_init<mode><ssescalarmodelower> (reg, par));
19243 emit_insn (gen_xop_vrotl<mode>3 (operands[0], operands[1], reg));
19244 DONE;
19245 }
19246 })
19247
19248 (define_expand "rotr<mode>3"
19249 [(set (match_operand:VI_128 0 "register_operand")
19250 (rotatert:VI_128
19251 (match_operand:VI_128 1 "nonimmediate_operand")
19252 (match_operand:SI 2 "general_operand")))]
19253 "TARGET_XOP"
19254 {
19255 /* If we were given a scalar, convert it to parallel */
19256 if (! const_0_to_<sserotatemax>_operand (operands[2], SImode))
19257 {
19258 rtvec vs = rtvec_alloc (<ssescalarnum>);
19259 rtx par = gen_rtx_PARALLEL (<MODE>mode, vs);
19260 rtx neg = gen_reg_rtx (<MODE>mode);
19261 rtx reg = gen_reg_rtx (<MODE>mode);
19262 rtx op2 = operands[2];
19263 int i;
19264
19265 if (GET_MODE (op2) != <ssescalarmode>mode)
19266 {
19267 op2 = gen_reg_rtx (<ssescalarmode>mode);
19268 convert_move (op2, operands[2], false);
19269 }
19270
19271 for (i = 0; i < <ssescalarnum>; i++)
19272 RTVEC_ELT (vs, i) = op2;
19273
19274 emit_insn (gen_vec_init<mode><ssescalarmodelower> (reg, par));
19275 emit_insn (gen_neg<mode>2 (neg, reg));
19276 emit_insn (gen_xop_vrotl<mode>3 (operands[0], operands[1], neg));
19277 DONE;
19278 }
19279 })
19280
19281 (define_insn "xop_rotl<mode>3"
19282 [(set (match_operand:VI_128 0 "register_operand" "=x")
19283 (rotate:VI_128
19284 (match_operand:VI_128 1 "nonimmediate_operand" "xm")
19285 (match_operand:SI 2 "const_0_to_<sserotatemax>_operand" "n")))]
19286 "TARGET_XOP"
19287 "vprot<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
19288 [(set_attr "type" "sseishft")
19289 (set_attr "length_immediate" "1")
19290 (set_attr "mode" "TI")])
19291
19292 (define_insn "xop_rotr<mode>3"
19293 [(set (match_operand:VI_128 0 "register_operand" "=x")
19294 (rotatert:VI_128
19295 (match_operand:VI_128 1 "nonimmediate_operand" "xm")
19296 (match_operand:SI 2 "const_0_to_<sserotatemax>_operand" "n")))]
19297 "TARGET_XOP"
19298 {
19299 operands[3]
19300 = GEN_INT (GET_MODE_BITSIZE (<ssescalarmode>mode) - INTVAL (operands[2]));
19301 return \"vprot<ssemodesuffix>\t{%3, %1, %0|%0, %1, %3}\";
19302 }
19303 [(set_attr "type" "sseishft")
19304 (set_attr "length_immediate" "1")
19305 (set_attr "mode" "TI")])
19306
19307 (define_expand "vrotr<mode>3"
19308 [(match_operand:VI_128 0 "register_operand")
19309 (match_operand:VI_128 1 "register_operand")
19310 (match_operand:VI_128 2 "register_operand")]
19311 "TARGET_XOP"
19312 {
19313 rtx reg = gen_reg_rtx (<MODE>mode);
19314 emit_insn (gen_neg<mode>2 (reg, operands[2]));
19315 emit_insn (gen_xop_vrotl<mode>3 (operands[0], operands[1], reg));
19316 DONE;
19317 })
19318
19319 (define_expand "vrotl<mode>3"
19320 [(match_operand:VI_128 0 "register_operand")
19321 (match_operand:VI_128 1 "register_operand")
19322 (match_operand:VI_128 2 "register_operand")]
19323 "TARGET_XOP"
19324 {
19325 emit_insn (gen_xop_vrotl<mode>3 (operands[0], operands[1], operands[2]));
19326 DONE;
19327 })
19328
19329 (define_insn "xop_vrotl<mode>3"
19330 [(set (match_operand:VI_128 0 "register_operand" "=x,x")
19331 (if_then_else:VI_128
19332 (ge:VI_128
19333 (match_operand:VI_128 2 "nonimmediate_operand" "x,m")
19334 (const_int 0))
19335 (rotate:VI_128
19336 (match_operand:VI_128 1 "nonimmediate_operand" "xm,x")
19337 (match_dup 2))
19338 (rotatert:VI_128
19339 (match_dup 1)
19340 (neg:VI_128 (match_dup 2)))))]
19341 "TARGET_XOP && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
19342 "vprot<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
19343 [(set_attr "type" "sseishft")
19344 (set_attr "prefix_data16" "0")
19345 (set_attr "prefix_extra" "2")
19346 (set_attr "mode" "TI")])
19347
19348 ;; XOP packed shift instructions.
19349 (define_expand "vlshr<mode>3"
19350 [(set (match_operand:VI12_128 0 "register_operand")
19351 (lshiftrt:VI12_128
19352 (match_operand:VI12_128 1 "register_operand")
19353 (match_operand:VI12_128 2 "nonimmediate_operand")))]
19354 "TARGET_XOP"
19355 {
19356 rtx neg = gen_reg_rtx (<MODE>mode);
19357 emit_insn (gen_neg<mode>2 (neg, operands[2]));
19358 emit_insn (gen_xop_shl<mode>3 (operands[0], operands[1], neg));
19359 DONE;
19360 })
19361
19362 (define_expand "vlshr<mode>3"
19363 [(set (match_operand:VI48_128 0 "register_operand")
19364 (lshiftrt:VI48_128
19365 (match_operand:VI48_128 1 "register_operand")
19366 (match_operand:VI48_128 2 "nonimmediate_operand")))]
19367 "TARGET_AVX2 || TARGET_XOP"
19368 {
19369 if (!TARGET_AVX2)
19370 {
19371 rtx neg = gen_reg_rtx (<MODE>mode);
19372 emit_insn (gen_neg<mode>2 (neg, operands[2]));
19373 emit_insn (gen_xop_shl<mode>3 (operands[0], operands[1], neg));
19374 DONE;
19375 }
19376 })
19377
19378 (define_expand "vlshr<mode>3"
19379 [(set (match_operand:VI48_512 0 "register_operand")
19380 (lshiftrt:VI48_512
19381 (match_operand:VI48_512 1 "register_operand")
19382 (match_operand:VI48_512 2 "nonimmediate_operand")))]
19383 "TARGET_AVX512F")
19384
19385 (define_expand "vlshr<mode>3"
19386 [(set (match_operand:VI48_256 0 "register_operand")
19387 (lshiftrt:VI48_256
19388 (match_operand:VI48_256 1 "register_operand")
19389 (match_operand:VI48_256 2 "nonimmediate_operand")))]
19390 "TARGET_AVX2")
19391
19392 (define_expand "vashrv8hi3<mask_name>"
19393 [(set (match_operand:V8HI 0 "register_operand")
19394 (ashiftrt:V8HI
19395 (match_operand:V8HI 1 "register_operand")
19396 (match_operand:V8HI 2 "nonimmediate_operand")))]
19397 "TARGET_XOP || (TARGET_AVX512BW && TARGET_AVX512VL)"
19398 {
19399 if (TARGET_XOP)
19400 {
19401 rtx neg = gen_reg_rtx (V8HImode);
19402 emit_insn (gen_negv8hi2 (neg, operands[2]));
19403 emit_insn (gen_xop_shav8hi3 (operands[0], operands[1], neg));
19404 DONE;
19405 }
19406 })
19407
19408 (define_expand "vashrv16qi3"
19409 [(set (match_operand:V16QI 0 "register_operand")
19410 (ashiftrt:V16QI
19411 (match_operand:V16QI 1 "register_operand")
19412 (match_operand:V16QI 2 "nonimmediate_operand")))]
19413 "TARGET_XOP"
19414 {
19415 rtx neg = gen_reg_rtx (V16QImode);
19416 emit_insn (gen_negv16qi2 (neg, operands[2]));
19417 emit_insn (gen_xop_shav16qi3 (operands[0], operands[1], neg));
19418 DONE;
19419 })
19420
19421 (define_expand "vashrv2di3<mask_name>"
19422 [(set (match_operand:V2DI 0 "register_operand")
19423 (ashiftrt:V2DI
19424 (match_operand:V2DI 1 "register_operand")
19425 (match_operand:V2DI 2 "nonimmediate_operand")))]
19426 "TARGET_XOP || TARGET_AVX512VL"
19427 {
19428 if (TARGET_XOP)
19429 {
19430 rtx neg = gen_reg_rtx (V2DImode);
19431 emit_insn (gen_negv2di2 (neg, operands[2]));
19432 emit_insn (gen_xop_shav2di3 (operands[0], operands[1], neg));
19433 DONE;
19434 }
19435 })
19436
19437 (define_expand "vashrv4si3"
19438 [(set (match_operand:V4SI 0 "register_operand")
19439 (ashiftrt:V4SI (match_operand:V4SI 1 "register_operand")
19440 (match_operand:V4SI 2 "nonimmediate_operand")))]
19441 "TARGET_AVX2 || TARGET_XOP"
19442 {
19443 if (!TARGET_AVX2)
19444 {
19445 rtx neg = gen_reg_rtx (V4SImode);
19446 emit_insn (gen_negv4si2 (neg, operands[2]));
19447 emit_insn (gen_xop_shav4si3 (operands[0], operands[1], neg));
19448 DONE;
19449 }
19450 })
19451
19452 (define_expand "vashrv16si3"
19453 [(set (match_operand:V16SI 0 "register_operand")
19454 (ashiftrt:V16SI (match_operand:V16SI 1 "register_operand")
19455 (match_operand:V16SI 2 "nonimmediate_operand")))]
19456 "TARGET_AVX512F")
19457
19458 (define_expand "vashrv8si3"
19459 [(set (match_operand:V8SI 0 "register_operand")
19460 (ashiftrt:V8SI (match_operand:V8SI 1 "register_operand")
19461 (match_operand:V8SI 2 "nonimmediate_operand")))]
19462 "TARGET_AVX2")
19463
19464 (define_expand "vashl<mode>3"
19465 [(set (match_operand:VI12_128 0 "register_operand")
19466 (ashift:VI12_128
19467 (match_operand:VI12_128 1 "register_operand")
19468 (match_operand:VI12_128 2 "nonimmediate_operand")))]
19469 "TARGET_XOP"
19470 {
19471 emit_insn (gen_xop_sha<mode>3 (operands[0], operands[1], operands[2]));
19472 DONE;
19473 })
19474
19475 (define_expand "vashl<mode>3"
19476 [(set (match_operand:VI48_128 0 "register_operand")
19477 (ashift:VI48_128
19478 (match_operand:VI48_128 1 "register_operand")
19479 (match_operand:VI48_128 2 "nonimmediate_operand")))]
19480 "TARGET_AVX2 || TARGET_XOP"
19481 {
19482 if (!TARGET_AVX2)
19483 {
19484 operands[2] = force_reg (<MODE>mode, operands[2]);
19485 emit_insn (gen_xop_sha<mode>3 (operands[0], operands[1], operands[2]));
19486 DONE;
19487 }
19488 })
19489
19490 (define_expand "vashl<mode>3"
19491 [(set (match_operand:VI48_512 0 "register_operand")
19492 (ashift:VI48_512
19493 (match_operand:VI48_512 1 "register_operand")
19494 (match_operand:VI48_512 2 "nonimmediate_operand")))]
19495 "TARGET_AVX512F")
19496
19497 (define_expand "vashl<mode>3"
19498 [(set (match_operand:VI48_256 0 "register_operand")
19499 (ashift:VI48_256
19500 (match_operand:VI48_256 1 "register_operand")
19501 (match_operand:VI48_256 2 "nonimmediate_operand")))]
19502 "TARGET_AVX2")
19503
19504 (define_insn "xop_sha<mode>3"
19505 [(set (match_operand:VI_128 0 "register_operand" "=x,x")
19506 (if_then_else:VI_128
19507 (ge:VI_128
19508 (match_operand:VI_128 2 "nonimmediate_operand" "x,m")
19509 (const_int 0))
19510 (ashift:VI_128
19511 (match_operand:VI_128 1 "nonimmediate_operand" "xm,x")
19512 (match_dup 2))
19513 (ashiftrt:VI_128
19514 (match_dup 1)
19515 (neg:VI_128 (match_dup 2)))))]
19516 "TARGET_XOP && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
19517 "vpsha<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
19518 [(set_attr "type" "sseishft")
19519 (set_attr "prefix_data16" "0")
19520 (set_attr "prefix_extra" "2")
19521 (set_attr "mode" "TI")])
19522
19523 (define_insn "xop_shl<mode>3"
19524 [(set (match_operand:VI_128 0 "register_operand" "=x,x")
19525 (if_then_else:VI_128
19526 (ge:VI_128
19527 (match_operand:VI_128 2 "nonimmediate_operand" "x,m")
19528 (const_int 0))
19529 (ashift:VI_128
19530 (match_operand:VI_128 1 "nonimmediate_operand" "xm,x")
19531 (match_dup 2))
19532 (lshiftrt:VI_128
19533 (match_dup 1)
19534 (neg:VI_128 (match_dup 2)))))]
19535 "TARGET_XOP && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
19536 "vpshl<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
19537 [(set_attr "type" "sseishft")
19538 (set_attr "prefix_data16" "0")
19539 (set_attr "prefix_extra" "2")
19540 (set_attr "mode" "TI")])
19541
19542 (define_expand "<shift_insn><mode>3"
19543 [(set (match_operand:VI1_AVX512 0 "register_operand")
19544 (any_shift:VI1_AVX512
19545 (match_operand:VI1_AVX512 1 "register_operand")
19546 (match_operand:SI 2 "nonmemory_operand")))]
19547 "TARGET_SSE2"
19548 {
19549 if (TARGET_XOP && <MODE>mode == V16QImode)
19550 {
19551 bool negate = false;
19552 rtx (*gen) (rtx, rtx, rtx);
19553 rtx tmp, par;
19554 int i;
19555
19556 if (<CODE> != ASHIFT)
19557 {
19558 if (CONST_INT_P (operands[2]))
19559 operands[2] = GEN_INT (-INTVAL (operands[2]));
19560 else
19561 negate = true;
19562 }
19563 par = gen_rtx_PARALLEL (V16QImode, rtvec_alloc (16));
19564 for (i = 0; i < 16; i++)
19565 XVECEXP (par, 0, i) = operands[2];
19566
19567 tmp = gen_reg_rtx (V16QImode);
19568 emit_insn (gen_vec_initv16qiqi (tmp, par));
19569
19570 if (negate)
19571 emit_insn (gen_negv16qi2 (tmp, tmp));
19572
19573 gen = (<CODE> == LSHIFTRT ? gen_xop_shlv16qi3 : gen_xop_shav16qi3);
19574 emit_insn (gen (operands[0], operands[1], tmp));
19575 }
19576 else
19577 ix86_expand_vecop_qihi (<CODE>, operands[0], operands[1], operands[2]);
19578 DONE;
19579 })
19580
19581 (define_expand "ashrv2di3"
19582 [(set (match_operand:V2DI 0 "register_operand")
19583 (ashiftrt:V2DI
19584 (match_operand:V2DI 1 "register_operand")
19585 (match_operand:DI 2 "nonmemory_operand")))]
19586 "TARGET_XOP || TARGET_AVX512VL"
19587 {
19588 if (!TARGET_AVX512VL)
19589 {
19590 rtx reg = gen_reg_rtx (V2DImode);
19591 rtx par;
19592 bool negate = false;
19593 int i;
19594
19595 if (CONST_INT_P (operands[2]))
19596 operands[2] = GEN_INT (-INTVAL (operands[2]));
19597 else
19598 negate = true;
19599
19600 par = gen_rtx_PARALLEL (V2DImode, rtvec_alloc (2));
19601 for (i = 0; i < 2; i++)
19602 XVECEXP (par, 0, i) = operands[2];
19603
19604 emit_insn (gen_vec_initv2didi (reg, par));
19605
19606 if (negate)
19607 emit_insn (gen_negv2di2 (reg, reg));
19608
19609 emit_insn (gen_xop_shav2di3 (operands[0], operands[1], reg));
19610 DONE;
19611 }
19612 })
19613
19614 ;; XOP FRCZ support
19615 (define_insn "xop_frcz<mode>2"
19616 [(set (match_operand:FMAMODE 0 "register_operand" "=x")
19617 (unspec:FMAMODE
19618 [(match_operand:FMAMODE 1 "nonimmediate_operand" "xm")]
19619 UNSPEC_FRCZ))]
19620 "TARGET_XOP"
19621 "vfrcz<ssemodesuffix>\t{%1, %0|%0, %1}"
19622 [(set_attr "type" "ssecvt1")
19623 (set_attr "mode" "<MODE>")])
19624
19625 (define_expand "xop_vmfrcz<mode>2"
19626 [(set (match_operand:VF_128 0 "register_operand")
19627 (vec_merge:VF_128
19628 (unspec:VF_128
19629 [(match_operand:VF_128 1 "nonimmediate_operand")]
19630 UNSPEC_FRCZ)
19631 (match_dup 2)
19632 (const_int 1)))]
19633 "TARGET_XOP"
19634 "operands[2] = CONST0_RTX (<MODE>mode);")
19635
19636 (define_insn "*xop_vmfrcz<mode>2"
19637 [(set (match_operand:VF_128 0 "register_operand" "=x")
19638 (vec_merge:VF_128
19639 (unspec:VF_128
19640 [(match_operand:VF_128 1 "nonimmediate_operand" "xm")]
19641 UNSPEC_FRCZ)
19642 (match_operand:VF_128 2 "const0_operand")
19643 (const_int 1)))]
19644 "TARGET_XOP"
19645 "vfrcz<ssescalarmodesuffix>\t{%1, %0|%0, %<iptr>1}"
19646 [(set_attr "type" "ssecvt1")
19647 (set_attr "mode" "<MODE>")])
19648
19649 (define_insn "xop_maskcmp<mode>3"
19650 [(set (match_operand:VI_128 0 "register_operand" "=x")
19651 (match_operator:VI_128 1 "ix86_comparison_int_operator"
19652 [(match_operand:VI_128 2 "register_operand" "x")
19653 (match_operand:VI_128 3 "nonimmediate_operand" "xm")]))]
19654 "TARGET_XOP"
19655 "vpcom%Y1<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}"
19656 [(set_attr "type" "sse4arg")
19657 (set_attr "prefix_data16" "0")
19658 (set_attr "prefix_rep" "0")
19659 (set_attr "prefix_extra" "2")
19660 (set_attr "length_immediate" "1")
19661 (set_attr "mode" "TI")])
19662
19663 (define_insn "xop_maskcmp_uns<mode>3"
19664 [(set (match_operand:VI_128 0 "register_operand" "=x")
19665 (match_operator:VI_128 1 "ix86_comparison_uns_operator"
19666 [(match_operand:VI_128 2 "register_operand" "x")
19667 (match_operand:VI_128 3 "nonimmediate_operand" "xm")]))]
19668 "TARGET_XOP"
19669 "vpcom%Y1u<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}"
19670 [(set_attr "type" "ssecmp")
19671 (set_attr "prefix_data16" "0")
19672 (set_attr "prefix_rep" "0")
19673 (set_attr "prefix_extra" "2")
19674 (set_attr "length_immediate" "1")
19675 (set_attr "mode" "TI")])
19676
19677 ;; Version of pcom*u* that is called from the intrinsics that allows pcomequ*
19678 ;; and pcomneu* not to be converted to the signed ones in case somebody needs
19679 ;; the exact instruction generated for the intrinsic.
19680 (define_insn "xop_maskcmp_uns2<mode>3"
19681 [(set (match_operand:VI_128 0 "register_operand" "=x")
19682 (unspec:VI_128
19683 [(match_operator:VI_128 1 "ix86_comparison_uns_operator"
19684 [(match_operand:VI_128 2 "register_operand" "x")
19685 (match_operand:VI_128 3 "nonimmediate_operand" "xm")])]
19686 UNSPEC_XOP_UNSIGNED_CMP))]
19687 "TARGET_XOP"
19688 "vpcom%Y1u<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}"
19689 [(set_attr "type" "ssecmp")
19690 (set_attr "prefix_data16" "0")
19691 (set_attr "prefix_extra" "2")
19692 (set_attr "length_immediate" "1")
19693 (set_attr "mode" "TI")])
19694
19695 ;; Pcomtrue and pcomfalse support. These are useless instructions, but are
19696 ;; being added here to be complete.
19697 (define_insn "xop_pcom_tf<mode>3"
19698 [(set (match_operand:VI_128 0 "register_operand" "=x")
19699 (unspec:VI_128
19700 [(match_operand:VI_128 1 "register_operand" "x")
19701 (match_operand:VI_128 2 "nonimmediate_operand" "xm")
19702 (match_operand:SI 3 "const_int_operand" "n")]
19703 UNSPEC_XOP_TRUEFALSE))]
19704 "TARGET_XOP"
19705 {
19706 return ((INTVAL (operands[3]) != 0)
19707 ? "vpcomtrue<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
19708 : "vpcomfalse<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}");
19709 }
19710 [(set_attr "type" "ssecmp")
19711 (set_attr "prefix_data16" "0")
19712 (set_attr "prefix_extra" "2")
19713 (set_attr "length_immediate" "1")
19714 (set_attr "mode" "TI")])
19715
19716 (define_insn "xop_vpermil2<mode>3"
19717 [(set (match_operand:VF_128_256 0 "register_operand" "=x,x")
19718 (unspec:VF_128_256
19719 [(match_operand:VF_128_256 1 "register_operand" "x,x")
19720 (match_operand:VF_128_256 2 "nonimmediate_operand" "x,m")
19721 (match_operand:<sseintvecmode> 3 "nonimmediate_operand" "xm,x")
19722 (match_operand:SI 4 "const_0_to_3_operand" "n,n")]
19723 UNSPEC_VPERMIL2))]
19724 "TARGET_XOP"
19725 "vpermil2<ssemodesuffix>\t{%4, %3, %2, %1, %0|%0, %1, %2, %3, %4}"
19726 [(set_attr "type" "sse4arg")
19727 (set_attr "length_immediate" "1")
19728 (set_attr "mode" "<MODE>")])
19729
19730 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
19731
19732 (define_insn "aesenc"
19733 [(set (match_operand:V2DI 0 "register_operand" "=x,x")
19734 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0,x")
19735 (match_operand:V2DI 2 "vector_operand" "xBm,xm")]
19736 UNSPEC_AESENC))]
19737 "TARGET_AES"
19738 "@
19739 aesenc\t{%2, %0|%0, %2}
19740 vaesenc\t{%2, %1, %0|%0, %1, %2}"
19741 [(set_attr "isa" "noavx,avx")
19742 (set_attr "type" "sselog1")
19743 (set_attr "prefix_extra" "1")
19744 (set_attr "prefix" "orig,vex")
19745 (set_attr "btver2_decode" "double,double")
19746 (set_attr "mode" "TI")])
19747
19748 (define_insn "aesenclast"
19749 [(set (match_operand:V2DI 0 "register_operand" "=x,x")
19750 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0,x")
19751 (match_operand:V2DI 2 "vector_operand" "xBm,xm")]
19752 UNSPEC_AESENCLAST))]
19753 "TARGET_AES"
19754 "@
19755 aesenclast\t{%2, %0|%0, %2}
19756 vaesenclast\t{%2, %1, %0|%0, %1, %2}"
19757 [(set_attr "isa" "noavx,avx")
19758 (set_attr "type" "sselog1")
19759 (set_attr "prefix_extra" "1")
19760 (set_attr "prefix" "orig,vex")
19761 (set_attr "btver2_decode" "double,double")
19762 (set_attr "mode" "TI")])
19763
19764 (define_insn "aesdec"
19765 [(set (match_operand:V2DI 0 "register_operand" "=x,x")
19766 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0,x")
19767 (match_operand:V2DI 2 "vector_operand" "xBm,xm")]
19768 UNSPEC_AESDEC))]
19769 "TARGET_AES"
19770 "@
19771 aesdec\t{%2, %0|%0, %2}
19772 vaesdec\t{%2, %1, %0|%0, %1, %2}"
19773 [(set_attr "isa" "noavx,avx")
19774 (set_attr "type" "sselog1")
19775 (set_attr "prefix_extra" "1")
19776 (set_attr "prefix" "orig,vex")
19777 (set_attr "btver2_decode" "double,double")
19778 (set_attr "mode" "TI")])
19779
19780 (define_insn "aesdeclast"
19781 [(set (match_operand:V2DI 0 "register_operand" "=x,x")
19782 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0,x")
19783 (match_operand:V2DI 2 "vector_operand" "xBm,xm")]
19784 UNSPEC_AESDECLAST))]
19785 "TARGET_AES"
19786 "@
19787 aesdeclast\t{%2, %0|%0, %2}
19788 vaesdeclast\t{%2, %1, %0|%0, %1, %2}"
19789 [(set_attr "isa" "noavx,avx")
19790 (set_attr "type" "sselog1")
19791 (set_attr "prefix_extra" "1")
19792 (set_attr "prefix" "orig,vex")
19793 (set_attr "btver2_decode" "double,double")
19794 (set_attr "mode" "TI")])
19795
19796 (define_insn "aesimc"
19797 [(set (match_operand:V2DI 0 "register_operand" "=x")
19798 (unspec:V2DI [(match_operand:V2DI 1 "vector_operand" "xBm")]
19799 UNSPEC_AESIMC))]
19800 "TARGET_AES"
19801 "%vaesimc\t{%1, %0|%0, %1}"
19802 [(set_attr "type" "sselog1")
19803 (set_attr "prefix_extra" "1")
19804 (set_attr "prefix" "maybe_vex")
19805 (set_attr "mode" "TI")])
19806
19807 (define_insn "aeskeygenassist"
19808 [(set (match_operand:V2DI 0 "register_operand" "=x")
19809 (unspec:V2DI [(match_operand:V2DI 1 "vector_operand" "xBm")
19810 (match_operand:SI 2 "const_0_to_255_operand" "n")]
19811 UNSPEC_AESKEYGENASSIST))]
19812 "TARGET_AES"
19813 "%vaeskeygenassist\t{%2, %1, %0|%0, %1, %2}"
19814 [(set_attr "type" "sselog1")
19815 (set_attr "prefix_extra" "1")
19816 (set_attr "length_immediate" "1")
19817 (set_attr "prefix" "maybe_vex")
19818 (set_attr "mode" "TI")])
19819
19820 (define_insn "pclmulqdq"
19821 [(set (match_operand:V2DI 0 "register_operand" "=x,x")
19822 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0,x")
19823 (match_operand:V2DI 2 "vector_operand" "xBm,xm")
19824 (match_operand:SI 3 "const_0_to_255_operand" "n,n")]
19825 UNSPEC_PCLMUL))]
19826 "TARGET_PCLMUL"
19827 "@
19828 pclmulqdq\t{%3, %2, %0|%0, %2, %3}
19829 vpclmulqdq\t{%3, %2, %1, %0|%0, %1, %2, %3}"
19830 [(set_attr "isa" "noavx,avx")
19831 (set_attr "type" "sselog1")
19832 (set_attr "prefix_extra" "1")
19833 (set_attr "length_immediate" "1")
19834 (set_attr "prefix" "orig,vex")
19835 (set_attr "mode" "TI")])
19836
19837 (define_expand "avx_vzeroall"
19838 [(match_par_dup 0 [(const_int 0)])]
19839 "TARGET_AVX"
19840 {
19841 int nregs = TARGET_64BIT ? 16 : 8;
19842 int regno;
19843
19844 operands[0] = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (nregs + 1));
19845
19846 XVECEXP (operands[0], 0, 0)
19847 = gen_rtx_UNSPEC_VOLATILE (VOIDmode, gen_rtvec (1, const0_rtx),
19848 UNSPECV_VZEROALL);
19849
19850 for (regno = 0; regno < nregs; regno++)
19851 XVECEXP (operands[0], 0, regno + 1)
19852 = gen_rtx_SET (gen_rtx_REG (V8SImode, GET_SSE_REGNO (regno)),
19853 CONST0_RTX (V8SImode));
19854 })
19855
19856 (define_insn "*avx_vzeroall"
19857 [(match_parallel 0 "vzeroall_operation"
19858 [(unspec_volatile [(const_int 0)] UNSPECV_VZEROALL)])]
19859 "TARGET_AVX"
19860 "vzeroall"
19861 [(set_attr "type" "sse")
19862 (set_attr "modrm" "0")
19863 (set_attr "memory" "none")
19864 (set_attr "prefix" "vex")
19865 (set_attr "btver2_decode" "vector")
19866 (set_attr "mode" "OI")])
19867
19868 ;; Clear the upper 128bits of AVX registers, equivalent to a NOP
19869 ;; if the upper 128bits are unused. Initially we expand the instructions
19870 ;; as though they had no effect on the SSE registers, but later add SETs and
19871 ;; CLOBBERs to the PARALLEL to model the real effect.
19872 (define_expand "avx_vzeroupper"
19873 [(parallel [(unspec_volatile [(const_int 0)] UNSPECV_VZEROUPPER)])]
19874 "TARGET_AVX")
19875
19876 (define_insn "*avx_vzeroupper"
19877 [(match_parallel 0 "vzeroupper_pattern"
19878 [(unspec_volatile [(const_int 0)] UNSPECV_VZEROUPPER)])]
19879 "TARGET_AVX && XVECLEN (operands[0], 0) == (TARGET_64BIT ? 16 : 8) + 1"
19880 "vzeroupper"
19881 [(set_attr "type" "sse")
19882 (set_attr "modrm" "0")
19883 (set_attr "memory" "none")
19884 (set_attr "prefix" "vex")
19885 (set_attr "btver2_decode" "vector")
19886 (set_attr "mode" "OI")])
19887
19888 (define_insn_and_split "*avx_vzeroupper_1"
19889 [(match_parallel 0 "vzeroupper_pattern"
19890 [(unspec_volatile [(const_int 0)] UNSPECV_VZEROUPPER)])]
19891 "TARGET_AVX && XVECLEN (operands[0], 0) != (TARGET_64BIT ? 16 : 8) + 1"
19892 "#"
19893 "&& epilogue_completed"
19894 [(match_dup 0)]
19895 {
19896 /* For IPA-RA purposes, make it clear the instruction clobbers
19897 even XMM registers not mentioned explicitly in the pattern. */
19898 unsigned int nregs = TARGET_64BIT ? 16 : 8;
19899 unsigned int npats = XVECLEN (operands[0], 0);
19900 rtvec vec = rtvec_alloc (nregs + 1);
19901 RTVEC_ELT (vec, 0) = XVECEXP (operands[0], 0, 0);
19902 for (unsigned int i = 0, j = 1; i < nregs; ++i)
19903 {
19904 unsigned int regno = GET_SSE_REGNO (i);
19905 if (j < npats
19906 && REGNO (SET_DEST (XVECEXP (operands[0], 0, j))) == regno)
19907 {
19908 RTVEC_ELT (vec, i + 1) = XVECEXP (operands[0], 0, j);
19909 j++;
19910 }
19911 else
19912 {
19913 rtx reg = gen_rtx_REG (V2DImode, regno);
19914 RTVEC_ELT (vec, i + 1) = gen_rtx_CLOBBER (VOIDmode, reg);
19915 }
19916 }
19917 operands[0] = gen_rtx_PARALLEL (VOIDmode, vec);
19918 }
19919 [(set_attr "type" "sse")
19920 (set_attr "modrm" "0")
19921 (set_attr "memory" "none")
19922 (set_attr "prefix" "vex")
19923 (set_attr "btver2_decode" "vector")
19924 (set_attr "mode" "OI")])
19925
19926 (define_mode_attr pbroadcast_evex_isa
19927 [(V64QI "avx512bw") (V32QI "avx512bw") (V16QI "avx512bw")
19928 (V32HI "avx512bw") (V16HI "avx512bw") (V8HI "avx512bw")
19929 (V16SI "avx512f") (V8SI "avx512f") (V4SI "avx512f")
19930 (V8DI "avx512f") (V4DI "avx512f") (V2DI "avx512f")])
19931
19932 (define_insn "avx2_pbroadcast<mode>"
19933 [(set (match_operand:VI 0 "register_operand" "=x,v")
19934 (vec_duplicate:VI
19935 (vec_select:<ssescalarmode>
19936 (match_operand:<ssexmmmode> 1 "nonimmediate_operand" "xm,vm")
19937 (parallel [(const_int 0)]))))]
19938 "TARGET_AVX2"
19939 "vpbroadcast<ssemodesuffix>\t{%1, %0|%0, %<iptr>1}"
19940 [(set_attr "isa" "*,<pbroadcast_evex_isa>")
19941 (set_attr "type" "ssemov")
19942 (set_attr "prefix_extra" "1")
19943 (set_attr "prefix" "vex,evex")
19944 (set_attr "mode" "<sseinsnmode>")])
19945
19946 (define_insn "avx2_pbroadcast<mode>_1"
19947 [(set (match_operand:VI_256 0 "register_operand" "=x,x,v,v")
19948 (vec_duplicate:VI_256
19949 (vec_select:<ssescalarmode>
19950 (match_operand:VI_256 1 "nonimmediate_operand" "m,x,m,v")
19951 (parallel [(const_int 0)]))))]
19952 "TARGET_AVX2"
19953 "@
19954 vpbroadcast<ssemodesuffix>\t{%1, %0|%0, %<iptr>1}
19955 vpbroadcast<ssemodesuffix>\t{%x1, %0|%0, %x1}
19956 vpbroadcast<ssemodesuffix>\t{%1, %0|%0, %<iptr>1}
19957 vpbroadcast<ssemodesuffix>\t{%x1, %0|%0, %x1}"
19958 [(set_attr "isa" "*,*,<pbroadcast_evex_isa>,<pbroadcast_evex_isa>")
19959 (set_attr "type" "ssemov")
19960 (set_attr "prefix_extra" "1")
19961 (set_attr "prefix" "vex")
19962 (set_attr "mode" "<sseinsnmode>")])
19963
19964 (define_insn "<avx2_avx512>_permvar<mode><mask_name>"
19965 [(set (match_operand:VI48F_256_512 0 "register_operand" "=v")
19966 (unspec:VI48F_256_512
19967 [(match_operand:VI48F_256_512 1 "nonimmediate_operand" "vm")
19968 (match_operand:<sseintvecmode> 2 "register_operand" "v")]
19969 UNSPEC_VPERMVAR))]
19970 "TARGET_AVX2 && <mask_mode512bit_condition>"
19971 "vperm<ssemodesuffix>\t{%1, %2, %0<mask_operand3>|%0<mask_operand3>, %2, %1}"
19972 [(set_attr "type" "sselog")
19973 (set_attr "prefix" "<mask_prefix2>")
19974 (set_attr "mode" "<sseinsnmode>")])
19975
19976 (define_insn "<avx512>_permvar<mode><mask_name>"
19977 [(set (match_operand:VI1_AVX512VL 0 "register_operand" "=v")
19978 (unspec:VI1_AVX512VL
19979 [(match_operand:VI1_AVX512VL 1 "nonimmediate_operand" "vm")
19980 (match_operand:<sseintvecmode> 2 "register_operand" "v")]
19981 UNSPEC_VPERMVAR))]
19982 "TARGET_AVX512VBMI && <mask_mode512bit_condition>"
19983 "vperm<ssemodesuffix>\t{%1, %2, %0<mask_operand3>|%0<mask_operand3>, %2, %1}"
19984 [(set_attr "type" "sselog")
19985 (set_attr "prefix" "<mask_prefix2>")
19986 (set_attr "mode" "<sseinsnmode>")])
19987
19988 (define_insn "<avx512>_permvar<mode><mask_name>"
19989 [(set (match_operand:VI2_AVX512VL 0 "register_operand" "=v")
19990 (unspec:VI2_AVX512VL
19991 [(match_operand:VI2_AVX512VL 1 "nonimmediate_operand" "vm")
19992 (match_operand:<sseintvecmode> 2 "register_operand" "v")]
19993 UNSPEC_VPERMVAR))]
19994 "TARGET_AVX512BW && <mask_mode512bit_condition>"
19995 "vperm<ssemodesuffix>\t{%1, %2, %0<mask_operand3>|%0<mask_operand3>, %2, %1}"
19996 [(set_attr "type" "sselog")
19997 (set_attr "prefix" "<mask_prefix2>")
19998 (set_attr "mode" "<sseinsnmode>")])
19999
20000 ;; Recognize broadcast as a vec_select as produced by builtin_vec_perm.
20001 ;; If it so happens that the input is in memory, use vbroadcast.
20002 ;; Otherwise use vpermilp (and in the case of 256-bit modes, vperm2f128).
20003 (define_insn "*avx_vperm_broadcast_v4sf"
20004 [(set (match_operand:V4SF 0 "register_operand" "=v,v,v")
20005 (vec_select:V4SF
20006 (match_operand:V4SF 1 "nonimmediate_operand" "m,o,v")
20007 (match_parallel 2 "avx_vbroadcast_operand"
20008 [(match_operand 3 "const_int_operand" "C,n,n")])))]
20009 "TARGET_AVX"
20010 {
20011 int elt = INTVAL (operands[3]);
20012 switch (which_alternative)
20013 {
20014 case 0:
20015 case 1:
20016 operands[1] = adjust_address_nv (operands[1], SFmode, elt * 4);
20017 return "vbroadcastss\t{%1, %0|%0, %k1}";
20018 case 2:
20019 operands[2] = GEN_INT (elt * 0x55);
20020 return "vpermilps\t{%2, %1, %0|%0, %1, %2}";
20021 default:
20022 gcc_unreachable ();
20023 }
20024 }
20025 [(set_attr "type" "ssemov,ssemov,sselog1")
20026 (set_attr "prefix_extra" "1")
20027 (set_attr "length_immediate" "0,0,1")
20028 (set_attr "prefix" "maybe_evex")
20029 (set_attr "mode" "SF,SF,V4SF")])
20030
20031 (define_insn_and_split "*avx_vperm_broadcast_<mode>"
20032 [(set (match_operand:VF_256 0 "register_operand" "=v,v,v")
20033 (vec_select:VF_256
20034 (match_operand:VF_256 1 "nonimmediate_operand" "m,o,?v")
20035 (match_parallel 2 "avx_vbroadcast_operand"
20036 [(match_operand 3 "const_int_operand" "C,n,n")])))]
20037 "TARGET_AVX
20038 && (<MODE>mode != V4DFmode || !TARGET_AVX2 || operands[3] == const0_rtx)"
20039 "#"
20040 "&& reload_completed"
20041 [(set (match_dup 0) (vec_duplicate:VF_256 (match_dup 1)))]
20042 {
20043 rtx op0 = operands[0], op1 = operands[1];
20044 int elt = INTVAL (operands[3]);
20045
20046 if (REG_P (op1))
20047 {
20048 int mask;
20049
20050 if (TARGET_AVX2 && elt == 0)
20051 {
20052 emit_insn (gen_vec_dup<mode> (op0, gen_lowpart (<ssescalarmode>mode,
20053 op1)));
20054 DONE;
20055 }
20056
20057 /* Shuffle element we care about into all elements of the 128-bit lane.
20058 The other lane gets shuffled too, but we don't care. */
20059 if (<MODE>mode == V4DFmode)
20060 mask = (elt & 1 ? 15 : 0);
20061 else
20062 mask = (elt & 3) * 0x55;
20063 emit_insn (gen_avx_vpermil<mode> (op0, op1, GEN_INT (mask)));
20064
20065 /* Shuffle the lane we care about into both lanes of the dest. */
20066 mask = (elt / (<ssescalarnum> / 2)) * 0x11;
20067 if (EXT_REX_SSE_REG_P (op0))
20068 {
20069 /* There is no EVEX VPERM2F128, but we can use either VBROADCASTSS
20070 or VSHUFF128. */
20071 gcc_assert (<MODE>mode == V8SFmode);
20072 if ((mask & 1) == 0)
20073 emit_insn (gen_avx2_vec_dupv8sf (op0,
20074 gen_lowpart (V4SFmode, op0)));
20075 else
20076 emit_insn (gen_avx512vl_shuf_f32x4_1 (op0, op0, op0,
20077 GEN_INT (4), GEN_INT (5),
20078 GEN_INT (6), GEN_INT (7),
20079 GEN_INT (12), GEN_INT (13),
20080 GEN_INT (14), GEN_INT (15)));
20081 DONE;
20082 }
20083
20084 emit_insn (gen_avx_vperm2f128<mode>3 (op0, op0, op0, GEN_INT (mask)));
20085 DONE;
20086 }
20087
20088 operands[1] = adjust_address (op1, <ssescalarmode>mode,
20089 elt * GET_MODE_SIZE (<ssescalarmode>mode));
20090 })
20091
20092 (define_expand "<sse2_avx_avx512f>_vpermil<mode><mask_name>"
20093 [(set (match_operand:VF2 0 "register_operand")
20094 (vec_select:VF2
20095 (match_operand:VF2 1 "nonimmediate_operand")
20096 (match_operand:SI 2 "const_0_to_255_operand")))]
20097 "TARGET_AVX && <mask_mode512bit_condition>"
20098 {
20099 int mask = INTVAL (operands[2]);
20100 rtx perm[<ssescalarnum>];
20101
20102 int i;
20103 for (i = 0; i < <ssescalarnum>; i = i + 2)
20104 {
20105 perm[i] = GEN_INT (((mask >> i) & 1) + i);
20106 perm[i + 1] = GEN_INT (((mask >> (i + 1)) & 1) + i);
20107 }
20108
20109 operands[2]
20110 = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (<ssescalarnum>, perm));
20111 })
20112
20113 (define_expand "<sse2_avx_avx512f>_vpermil<mode><mask_name>"
20114 [(set (match_operand:VF1 0 "register_operand")
20115 (vec_select:VF1
20116 (match_operand:VF1 1 "nonimmediate_operand")
20117 (match_operand:SI 2 "const_0_to_255_operand")))]
20118 "TARGET_AVX && <mask_mode512bit_condition>"
20119 {
20120 int mask = INTVAL (operands[2]);
20121 rtx perm[<ssescalarnum>];
20122
20123 int i;
20124 for (i = 0; i < <ssescalarnum>; i = i + 4)
20125 {
20126 perm[i] = GEN_INT (((mask >> 0) & 3) + i);
20127 perm[i + 1] = GEN_INT (((mask >> 2) & 3) + i);
20128 perm[i + 2] = GEN_INT (((mask >> 4) & 3) + i);
20129 perm[i + 3] = GEN_INT (((mask >> 6) & 3) + i);
20130 }
20131
20132 operands[2]
20133 = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (<ssescalarnum>, perm));
20134 })
20135
20136 ;; This pattern needs to come before the avx2_perm*/avx512f_perm*
20137 ;; patterns, as they have the same RTL representation (vpermilp*
20138 ;; being a subset of what vpermp* can do), but vpermilp* has shorter
20139 ;; latency as it never crosses lanes.
20140 (define_insn "*<sse2_avx_avx512f>_vpermilp<mode><mask_name>"
20141 [(set (match_operand:VF 0 "register_operand" "=v")
20142 (vec_select:VF
20143 (match_operand:VF 1 "nonimmediate_operand" "vm")
20144 (match_parallel 2 ""
20145 [(match_operand 3 "const_int_operand")])))]
20146 "TARGET_AVX && <mask_mode512bit_condition>
20147 && avx_vpermilp_parallel (operands[2], <MODE>mode)"
20148 {
20149 int mask = avx_vpermilp_parallel (operands[2], <MODE>mode) - 1;
20150 operands[2] = GEN_INT (mask);
20151 return "vpermil<ssemodesuffix>\t{%2, %1, %0<mask_operand4>|%0<mask_operand4>, %1, %2}";
20152 }
20153 [(set_attr "type" "sselog")
20154 (set_attr "prefix_extra" "1")
20155 (set_attr "length_immediate" "1")
20156 (set_attr "prefix" "<mask_prefix>")
20157 (set_attr "mode" "<sseinsnmode>")])
20158
20159 (define_expand "avx2_perm<mode>"
20160 [(match_operand:VI8F_256 0 "register_operand")
20161 (match_operand:VI8F_256 1 "nonimmediate_operand")
20162 (match_operand:SI 2 "const_0_to_255_operand")]
20163 "TARGET_AVX2"
20164 {
20165 int mask = INTVAL (operands[2]);
20166 emit_insn (gen_avx2_perm<mode>_1 (operands[0], operands[1],
20167 GEN_INT ((mask >> 0) & 3),
20168 GEN_INT ((mask >> 2) & 3),
20169 GEN_INT ((mask >> 4) & 3),
20170 GEN_INT ((mask >> 6) & 3)));
20171 DONE;
20172 })
20173
20174 (define_expand "avx512vl_perm<mode>_mask"
20175 [(match_operand:VI8F_256 0 "register_operand")
20176 (match_operand:VI8F_256 1 "nonimmediate_operand")
20177 (match_operand:SI 2 "const_0_to_255_operand")
20178 (match_operand:VI8F_256 3 "nonimm_or_0_operand")
20179 (match_operand:<avx512fmaskmode> 4 "register_operand")]
20180 "TARGET_AVX512VL"
20181 {
20182 int mask = INTVAL (operands[2]);
20183 emit_insn (gen_<avx2_avx512>_perm<mode>_1_mask (operands[0], operands[1],
20184 GEN_INT ((mask >> 0) & 3),
20185 GEN_INT ((mask >> 2) & 3),
20186 GEN_INT ((mask >> 4) & 3),
20187 GEN_INT ((mask >> 6) & 3),
20188 operands[3], operands[4]));
20189 DONE;
20190 })
20191
20192 (define_insn "avx2_perm<mode>_1<mask_name>"
20193 [(set (match_operand:VI8F_256 0 "register_operand" "=v")
20194 (vec_select:VI8F_256
20195 (match_operand:VI8F_256 1 "nonimmediate_operand" "vm")
20196 (parallel [(match_operand 2 "const_0_to_3_operand")
20197 (match_operand 3 "const_0_to_3_operand")
20198 (match_operand 4 "const_0_to_3_operand")
20199 (match_operand 5 "const_0_to_3_operand")])))]
20200 "TARGET_AVX2 && <mask_mode512bit_condition>"
20201 {
20202 int mask = 0;
20203 mask |= INTVAL (operands[2]) << 0;
20204 mask |= INTVAL (operands[3]) << 2;
20205 mask |= INTVAL (operands[4]) << 4;
20206 mask |= INTVAL (operands[5]) << 6;
20207 operands[2] = GEN_INT (mask);
20208 return "vperm<ssemodesuffix>\t{%2, %1, %0<mask_operand6>|%0<mask_operand6>, %1, %2}";
20209 }
20210 [(set_attr "type" "sselog")
20211 (set_attr "prefix" "<mask_prefix2>")
20212 (set_attr "mode" "<sseinsnmode>")])
20213
20214 (define_expand "avx512f_perm<mode>"
20215 [(match_operand:V8FI 0 "register_operand")
20216 (match_operand:V8FI 1 "nonimmediate_operand")
20217 (match_operand:SI 2 "const_0_to_255_operand")]
20218 "TARGET_AVX512F"
20219 {
20220 int mask = INTVAL (operands[2]);
20221 emit_insn (gen_avx512f_perm<mode>_1 (operands[0], operands[1],
20222 GEN_INT ((mask >> 0) & 3),
20223 GEN_INT ((mask >> 2) & 3),
20224 GEN_INT ((mask >> 4) & 3),
20225 GEN_INT ((mask >> 6) & 3),
20226 GEN_INT (((mask >> 0) & 3) + 4),
20227 GEN_INT (((mask >> 2) & 3) + 4),
20228 GEN_INT (((mask >> 4) & 3) + 4),
20229 GEN_INT (((mask >> 6) & 3) + 4)));
20230 DONE;
20231 })
20232
20233 (define_expand "avx512f_perm<mode>_mask"
20234 [(match_operand:V8FI 0 "register_operand")
20235 (match_operand:V8FI 1 "nonimmediate_operand")
20236 (match_operand:SI 2 "const_0_to_255_operand")
20237 (match_operand:V8FI 3 "nonimm_or_0_operand")
20238 (match_operand:<avx512fmaskmode> 4 "register_operand")]
20239 "TARGET_AVX512F"
20240 {
20241 int mask = INTVAL (operands[2]);
20242 emit_insn (gen_avx512f_perm<mode>_1_mask (operands[0], operands[1],
20243 GEN_INT ((mask >> 0) & 3),
20244 GEN_INT ((mask >> 2) & 3),
20245 GEN_INT ((mask >> 4) & 3),
20246 GEN_INT ((mask >> 6) & 3),
20247 GEN_INT (((mask >> 0) & 3) + 4),
20248 GEN_INT (((mask >> 2) & 3) + 4),
20249 GEN_INT (((mask >> 4) & 3) + 4),
20250 GEN_INT (((mask >> 6) & 3) + 4),
20251 operands[3], operands[4]));
20252 DONE;
20253 })
20254
20255 (define_insn "avx512f_perm<mode>_1<mask_name>"
20256 [(set (match_operand:V8FI 0 "register_operand" "=v")
20257 (vec_select:V8FI
20258 (match_operand:V8FI 1 "nonimmediate_operand" "vm")
20259 (parallel [(match_operand 2 "const_0_to_3_operand")
20260 (match_operand 3 "const_0_to_3_operand")
20261 (match_operand 4 "const_0_to_3_operand")
20262 (match_operand 5 "const_0_to_3_operand")
20263 (match_operand 6 "const_4_to_7_operand")
20264 (match_operand 7 "const_4_to_7_operand")
20265 (match_operand 8 "const_4_to_7_operand")
20266 (match_operand 9 "const_4_to_7_operand")])))]
20267 "TARGET_AVX512F && <mask_mode512bit_condition>
20268 && (INTVAL (operands[2]) == (INTVAL (operands[6]) - 4)
20269 && INTVAL (operands[3]) == (INTVAL (operands[7]) - 4)
20270 && INTVAL (operands[4]) == (INTVAL (operands[8]) - 4)
20271 && INTVAL (operands[5]) == (INTVAL (operands[9]) - 4))"
20272 {
20273 int mask = 0;
20274 mask |= INTVAL (operands[2]) << 0;
20275 mask |= INTVAL (operands[3]) << 2;
20276 mask |= INTVAL (operands[4]) << 4;
20277 mask |= INTVAL (operands[5]) << 6;
20278 operands[2] = GEN_INT (mask);
20279 return "vperm<ssemodesuffix>\t{%2, %1, %0<mask_operand10>|%0<mask_operand10>, %1, %2}";
20280 }
20281 [(set_attr "type" "sselog")
20282 (set_attr "prefix" "<mask_prefix2>")
20283 (set_attr "mode" "<sseinsnmode>")])
20284
20285 (define_insn "avx2_permv2ti"
20286 [(set (match_operand:V4DI 0 "register_operand" "=x")
20287 (unspec:V4DI
20288 [(match_operand:V4DI 1 "register_operand" "x")
20289 (match_operand:V4DI 2 "nonimmediate_operand" "xm")
20290 (match_operand:SI 3 "const_0_to_255_operand" "n")]
20291 UNSPEC_VPERMTI))]
20292 "TARGET_AVX2"
20293 "vperm2i128\t{%3, %2, %1, %0|%0, %1, %2, %3}"
20294 [(set_attr "type" "sselog")
20295 (set_attr "prefix" "vex")
20296 (set_attr "mode" "OI")])
20297
20298 (define_insn "avx2_vec_dupv4df"
20299 [(set (match_operand:V4DF 0 "register_operand" "=v")
20300 (vec_duplicate:V4DF
20301 (vec_select:DF
20302 (match_operand:V2DF 1 "register_operand" "v")
20303 (parallel [(const_int 0)]))))]
20304 "TARGET_AVX2"
20305 "vbroadcastsd\t{%1, %0|%0, %1}"
20306 [(set_attr "type" "sselog1")
20307 (set_attr "prefix" "maybe_evex")
20308 (set_attr "mode" "V4DF")])
20309
20310 (define_insn "<avx512>_vec_dup<mode>_1"
20311 [(set (match_operand:VI_AVX512BW 0 "register_operand" "=v,v")
20312 (vec_duplicate:VI_AVX512BW
20313 (vec_select:<ssescalarmode>
20314 (match_operand:VI_AVX512BW 1 "nonimmediate_operand" "v,m")
20315 (parallel [(const_int 0)]))))]
20316 "TARGET_AVX512F"
20317 "@
20318 vpbroadcast<ssemodesuffix>\t{%x1, %0|%0, %x1}
20319 vpbroadcast<ssemodesuffix>\t{%x1, %0|%0, %<iptr>1}"
20320 [(set_attr "type" "ssemov")
20321 (set_attr "prefix" "evex")
20322 (set_attr "mode" "<sseinsnmode>")])
20323
20324 (define_insn "<avx512>_vec_dup<mode><mask_name>"
20325 [(set (match_operand:V48_AVX512VL 0 "register_operand" "=v")
20326 (vec_duplicate:V48_AVX512VL
20327 (vec_select:<ssescalarmode>
20328 (match_operand:<ssexmmmode> 1 "nonimmediate_operand" "vm")
20329 (parallel [(const_int 0)]))))]
20330 "TARGET_AVX512F"
20331 {
20332 /* There is no DF broadcast (in AVX-512*) to 128b register.
20333 Mimic it with integer variant. */
20334 if (<MODE>mode == V2DFmode)
20335 return "vpbroadcastq\t{%1, %0<mask_operand2>|%0<mask_operand2>, %q1}";
20336
20337 return "v<sseintprefix>broadcast<bcstscalarsuff>\t{%1, %0<mask_operand2>|%0<mask_operand2>, %<iptr>1}";
20338 }
20339 [(set_attr "type" "ssemov")
20340 (set_attr "prefix" "evex")
20341 (set_attr "mode" "<sseinsnmode>")])
20342
20343 (define_insn "<avx512>_vec_dup<mode><mask_name>"
20344 [(set (match_operand:VI12_AVX512VL 0 "register_operand" "=v")
20345 (vec_duplicate:VI12_AVX512VL
20346 (vec_select:<ssescalarmode>
20347 (match_operand:<ssexmmmode> 1 "nonimmediate_operand" "vm")
20348 (parallel [(const_int 0)]))))]
20349 "TARGET_AVX512BW"
20350 "vpbroadcast<bcstscalarsuff>\t{%1, %0<mask_operand2>|%0<mask_operand2>, %<iptr>1}"
20351 [(set_attr "type" "ssemov")
20352 (set_attr "prefix" "evex")
20353 (set_attr "mode" "<sseinsnmode>")])
20354
20355 (define_insn "<mask_codefor>avx512f_broadcast<mode><mask_name>"
20356 [(set (match_operand:V16FI 0 "register_operand" "=v,v")
20357 (vec_duplicate:V16FI
20358 (match_operand:<ssexmmmode> 1 "nonimmediate_operand" "v,m")))]
20359 "TARGET_AVX512F"
20360 "@
20361 vshuf<shuffletype>32x4\t{$0x0, %g1, %g1, %0<mask_operand2>|%0<mask_operand2>, %g1, %g1, 0x0}
20362 vbroadcast<shuffletype>32x4\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
20363 [(set_attr "type" "ssemov")
20364 (set_attr "prefix" "evex")
20365 (set_attr "mode" "<sseinsnmode>")])
20366
20367 (define_insn "<mask_codefor>avx512f_broadcast<mode><mask_name>"
20368 [(set (match_operand:V8FI 0 "register_operand" "=v,v")
20369 (vec_duplicate:V8FI
20370 (match_operand:<ssehalfvecmode> 1 "nonimmediate_operand" "v,m")))]
20371 "TARGET_AVX512F"
20372 "@
20373 vshuf<shuffletype>64x2\t{$0x44, %g1, %g1, %0<mask_operand2>|%0<mask_operand2>, %g1, %g1, 0x44}
20374 vbroadcast<shuffletype>64x4\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
20375 [(set_attr "type" "ssemov")
20376 (set_attr "prefix" "evex")
20377 (set_attr "mode" "<sseinsnmode>")])
20378
20379 (define_insn "<mask_codefor><avx512>_vec_dup_gpr<mode><mask_name>"
20380 [(set (match_operand:VI12_AVX512VL 0 "register_operand" "=v,v")
20381 (vec_duplicate:VI12_AVX512VL
20382 (match_operand:<ssescalarmode> 1 "nonimmediate_operand" "vm,r")))]
20383 "TARGET_AVX512BW"
20384 "@
20385 vpbroadcast<bcstscalarsuff>\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}
20386 vpbroadcast<bcstscalarsuff>\t{%k1, %0<mask_operand2>|%0<mask_operand2>, %k1}"
20387 [(set_attr "type" "ssemov")
20388 (set_attr "prefix" "evex")
20389 (set_attr "mode" "<sseinsnmode>")])
20390
20391 (define_insn "<mask_codefor><avx512>_vec_dup_gpr<mode><mask_name>"
20392 [(set (match_operand:V48_AVX512VL 0 "register_operand" "=v,v")
20393 (vec_duplicate:V48_AVX512VL
20394 (match_operand:<ssescalarmode> 1 "nonimmediate_operand" "vm,r")))]
20395 "TARGET_AVX512F"
20396 "v<sseintprefix>broadcast<bcstscalarsuff>\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
20397 [(set_attr "type" "ssemov")
20398 (set_attr "prefix" "evex")
20399 (set_attr "mode" "<sseinsnmode>")
20400 (set (attr "enabled")
20401 (if_then_else (eq_attr "alternative" "1")
20402 (symbol_ref "GET_MODE_CLASS (<ssescalarmode>mode) == MODE_INT
20403 && (<ssescalarmode>mode != DImode || TARGET_64BIT)")
20404 (const_int 1)))])
20405
20406 (define_insn "vec_dupv4sf"
20407 [(set (match_operand:V4SF 0 "register_operand" "=v,v,x")
20408 (vec_duplicate:V4SF
20409 (match_operand:SF 1 "nonimmediate_operand" "Yv,m,0")))]
20410 "TARGET_SSE"
20411 "@
20412 vshufps\t{$0, %1, %1, %0|%0, %1, %1, 0}
20413 vbroadcastss\t{%1, %0|%0, %1}
20414 shufps\t{$0, %0, %0|%0, %0, 0}"
20415 [(set_attr "isa" "avx,avx,noavx")
20416 (set_attr "type" "sseshuf1,ssemov,sseshuf1")
20417 (set_attr "length_immediate" "1,0,1")
20418 (set_attr "prefix_extra" "0,1,*")
20419 (set_attr "prefix" "maybe_evex,maybe_evex,orig")
20420 (set_attr "mode" "V4SF")])
20421
20422 (define_insn "*vec_dupv4si"
20423 [(set (match_operand:V4SI 0 "register_operand" "=v,v,x")
20424 (vec_duplicate:V4SI
20425 (match_operand:SI 1 "nonimmediate_operand" "Yv,m,0")))]
20426 "TARGET_SSE"
20427 "@
20428 %vpshufd\t{$0, %1, %0|%0, %1, 0}
20429 vbroadcastss\t{%1, %0|%0, %1}
20430 shufps\t{$0, %0, %0|%0, %0, 0}"
20431 [(set_attr "isa" "sse2,avx,noavx")
20432 (set_attr "type" "sselog1,ssemov,sselog1")
20433 (set_attr "length_immediate" "1,0,1")
20434 (set_attr "prefix_extra" "0,1,*")
20435 (set_attr "prefix" "maybe_vex,maybe_evex,orig")
20436 (set_attr "mode" "TI,V4SF,V4SF")])
20437
20438 (define_insn "*vec_dupv2di"
20439 [(set (match_operand:V2DI 0 "register_operand" "=x,v,v,x")
20440 (vec_duplicate:V2DI
20441 (match_operand:DI 1 "nonimmediate_operand" " 0,Yv,vm,0")))]
20442 "TARGET_SSE"
20443 "@
20444 punpcklqdq\t%0, %0
20445 vpunpcklqdq\t{%d1, %0|%0, %d1}
20446 %vmovddup\t{%1, %0|%0, %1}
20447 movlhps\t%0, %0"
20448 [(set_attr "isa" "sse2_noavx,avx,sse3,noavx")
20449 (set_attr "type" "sselog1,sselog1,sselog1,ssemov")
20450 (set_attr "prefix" "orig,maybe_evex,maybe_vex,orig")
20451 (set_attr "mode" "TI,TI,DF,V4SF")])
20452
20453 (define_insn "avx2_vbroadcasti128_<mode>"
20454 [(set (match_operand:VI_256 0 "register_operand" "=x,v,v")
20455 (vec_concat:VI_256
20456 (match_operand:<ssehalfvecmode> 1 "memory_operand" "m,m,m")
20457 (match_dup 1)))]
20458 "TARGET_AVX2"
20459 "@
20460 vbroadcasti128\t{%1, %0|%0, %1}
20461 vbroadcast<i128vldq>\t{%1, %0|%0, %1}
20462 vbroadcast<shuffletype>32x4\t{%1, %0|%0, %1}"
20463 [(set_attr "isa" "*,avx512dq,avx512vl")
20464 (set_attr "type" "ssemov")
20465 (set_attr "prefix_extra" "1")
20466 (set_attr "prefix" "vex,evex,evex")
20467 (set_attr "mode" "OI")])
20468
20469 ;; Modes handled by AVX vec_dup patterns.
20470 (define_mode_iterator AVX_VEC_DUP_MODE
20471 [V8SI V8SF V4DI V4DF])
20472 (define_mode_attr vecdupssescalarmodesuffix
20473 [(V8SF "ss") (V4DF "sd") (V8SI "ss") (V4DI "sd")])
20474 ;; Modes handled by AVX2 vec_dup patterns.
20475 (define_mode_iterator AVX2_VEC_DUP_MODE
20476 [V32QI V16QI V16HI V8HI V8SI V4SI])
20477
20478 (define_insn "*vec_dup<mode>"
20479 [(set (match_operand:AVX2_VEC_DUP_MODE 0 "register_operand" "=x,x,v")
20480 (vec_duplicate:AVX2_VEC_DUP_MODE
20481 (match_operand:<ssescalarmode> 1 "nonimmediate_operand" "m,x,$r")))]
20482 "TARGET_AVX2"
20483 "@
20484 v<sseintprefix>broadcast<bcstscalarsuff>\t{%1, %0|%0, %1}
20485 v<sseintprefix>broadcast<bcstscalarsuff>\t{%x1, %0|%0, %x1}
20486 #"
20487 [(set_attr "isa" "*,*,noavx512vl")
20488 (set_attr "type" "ssemov")
20489 (set_attr "prefix_extra" "1")
20490 (set_attr "prefix" "maybe_evex")
20491 (set_attr "mode" "<sseinsnmode>")
20492 (set (attr "preferred_for_speed")
20493 (cond [(eq_attr "alternative" "2")
20494 (symbol_ref "TARGET_INTER_UNIT_MOVES_TO_VEC")
20495 ]
20496 (symbol_ref "true")))])
20497
20498 (define_insn "vec_dup<mode>"
20499 [(set (match_operand:AVX_VEC_DUP_MODE 0 "register_operand" "=x,x,x,v,x")
20500 (vec_duplicate:AVX_VEC_DUP_MODE
20501 (match_operand:<ssescalarmode> 1 "nonimmediate_operand" "m,m,x,v,?x")))]
20502 "TARGET_AVX"
20503 "@
20504 v<sseintprefix>broadcast<bcstscalarsuff>\t{%1, %0|%0, %1}
20505 vbroadcast<vecdupssescalarmodesuffix>\t{%1, %0|%0, %1}
20506 v<sseintprefix>broadcast<bcstscalarsuff>\t{%x1, %0|%0, %x1}
20507 v<sseintprefix>broadcast<bcstscalarsuff>\t{%x1, %g0|%g0, %x1}
20508 #"
20509 [(set_attr "type" "ssemov")
20510 (set_attr "prefix_extra" "1")
20511 (set_attr "prefix" "maybe_evex")
20512 (set_attr "isa" "avx2,noavx2,avx2,avx512f,noavx2")
20513 (set_attr "mode" "<sseinsnmode>,V8SF,<sseinsnmode>,<sseinsnmode>,V8SF")])
20514
20515 (define_split
20516 [(set (match_operand:AVX2_VEC_DUP_MODE 0 "register_operand")
20517 (vec_duplicate:AVX2_VEC_DUP_MODE
20518 (match_operand:<ssescalarmode> 1 "register_operand")))]
20519 "TARGET_AVX2
20520 /* Disable this splitter if avx512vl_vec_dup_gprv*[qhs]i insn is
20521 available, because then we can broadcast from GPRs directly.
20522 For V*[QH]I modes it requires both -mavx512vl and -mavx512bw,
20523 for V*SI mode it requires just -mavx512vl. */
20524 && !(TARGET_AVX512VL
20525 && (TARGET_AVX512BW || <ssescalarmode>mode == SImode))
20526 && reload_completed && GENERAL_REG_P (operands[1])"
20527 [(const_int 0)]
20528 {
20529 emit_insn (gen_vec_setv4si_0 (gen_lowpart (V4SImode, operands[0]),
20530 CONST0_RTX (V4SImode),
20531 gen_lowpart (SImode, operands[1])));
20532 emit_insn (gen_avx2_pbroadcast<mode> (operands[0],
20533 gen_lowpart (<ssexmmmode>mode,
20534 operands[0])));
20535 DONE;
20536 })
20537
20538 (define_split
20539 [(set (match_operand:AVX_VEC_DUP_MODE 0 "register_operand")
20540 (vec_duplicate:AVX_VEC_DUP_MODE
20541 (match_operand:<ssescalarmode> 1 "register_operand")))]
20542 "TARGET_AVX && !TARGET_AVX2 && reload_completed"
20543 [(set (match_dup 2)
20544 (vec_duplicate:<ssehalfvecmode> (match_dup 1)))
20545 (set (match_dup 0)
20546 (vec_concat:AVX_VEC_DUP_MODE (match_dup 2) (match_dup 2)))]
20547 "operands[2] = gen_lowpart (<ssehalfvecmode>mode, operands[0]);")
20548
20549 (define_insn "avx_vbroadcastf128_<mode>"
20550 [(set (match_operand:V_256 0 "register_operand" "=x,x,x,v,v,v,v")
20551 (vec_concat:V_256
20552 (match_operand:<ssehalfvecmode> 1 "nonimmediate_operand" "m,0,?x,m,0,m,0")
20553 (match_dup 1)))]
20554 "TARGET_AVX"
20555 "@
20556 vbroadcast<i128>\t{%1, %0|%0, %1}
20557 vinsert<i128>\t{$1, %1, %0, %0|%0, %0, %1, 1}
20558 vperm2<i128>\t{$0, %t1, %t1, %0|%0, %t1, %t1, 0}
20559 vbroadcast<i128vldq>\t{%1, %0|%0, %1}
20560 vinsert<i128vldq>\t{$1, %1, %0, %0|%0, %0, %1, 1}
20561 vbroadcast<shuffletype>32x4\t{%1, %0|%0, %1}
20562 vinsert<shuffletype>32x4\t{$1, %1, %0, %0|%0, %0, %1, 1}"
20563 [(set_attr "isa" "*,*,*,avx512dq,avx512dq,avx512vl,avx512vl")
20564 (set_attr "type" "ssemov,sselog1,sselog1,ssemov,sselog1,ssemov,sselog1")
20565 (set_attr "prefix_extra" "1")
20566 (set_attr "length_immediate" "0,1,1,0,1,0,1")
20567 (set_attr "prefix" "vex,vex,vex,evex,evex,evex,evex")
20568 (set_attr "mode" "<sseinsnmode>")])
20569
20570 ;; For broadcast[i|f]32x2. Yes there is no v4sf version, only v4si.
20571 (define_mode_iterator VI4F_BRCST32x2
20572 [V16SI (V8SI "TARGET_AVX512VL") (V4SI "TARGET_AVX512VL")
20573 V16SF (V8SF "TARGET_AVX512VL")])
20574
20575 (define_mode_attr 64x2mode
20576 [(V8DF "V2DF") (V8DI "V2DI") (V4DI "V2DI") (V4DF "V2DF")])
20577
20578 (define_mode_attr 32x2mode
20579 [(V16SF "V2SF") (V16SI "V2SI") (V8SI "V2SI")
20580 (V8SF "V2SF") (V4SI "V2SI")])
20581
20582 (define_insn "<mask_codefor>avx512dq_broadcast<mode><mask_name>"
20583 [(set (match_operand:VI4F_BRCST32x2 0 "register_operand" "=v")
20584 (vec_duplicate:VI4F_BRCST32x2
20585 (vec_select:<32x2mode>
20586 (match_operand:<ssexmmmode> 1 "nonimmediate_operand" "vm")
20587 (parallel [(const_int 0) (const_int 1)]))))]
20588 "TARGET_AVX512DQ"
20589 "vbroadcast<shuffletype>32x2\t{%1, %0<mask_operand2>|%0<mask_operand2>, %q1}"
20590 [(set_attr "type" "ssemov")
20591 (set_attr "prefix_extra" "1")
20592 (set_attr "prefix" "evex")
20593 (set_attr "mode" "<sseinsnmode>")])
20594
20595 (define_insn "<mask_codefor>avx512vl_broadcast<mode><mask_name>_1"
20596 [(set (match_operand:VI4F_256 0 "register_operand" "=v,v")
20597 (vec_duplicate:VI4F_256
20598 (match_operand:<ssexmmmode> 1 "nonimmediate_operand" "v,m")))]
20599 "TARGET_AVX512VL"
20600 "@
20601 vshuf<shuffletype>32x4\t{$0x0, %t1, %t1, %0<mask_operand2>|%0<mask_operand2>, %t1, %t1, 0x0}
20602 vbroadcast<shuffletype>32x4\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
20603 [(set_attr "type" "ssemov")
20604 (set_attr "prefix_extra" "1")
20605 (set_attr "prefix" "evex")
20606 (set_attr "mode" "<sseinsnmode>")])
20607
20608 (define_insn "<mask_codefor>avx512dq_broadcast<mode><mask_name>_1"
20609 [(set (match_operand:V16FI 0 "register_operand" "=v,v")
20610 (vec_duplicate:V16FI
20611 (match_operand:<ssehalfvecmode> 1 "nonimmediate_operand" "v,m")))]
20612 "TARGET_AVX512DQ"
20613 "@
20614 vshuf<shuffletype>32x4\t{$0x44, %g1, %g1, %0<mask_operand2>|%0<mask_operand2>, %g1, %g1, 0x44}
20615 vbroadcast<shuffletype>32x8\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
20616 [(set_attr "type" "ssemov")
20617 (set_attr "prefix_extra" "1")
20618 (set_attr "prefix" "evex")
20619 (set_attr "mode" "<sseinsnmode>")])
20620
20621 ;; For broadcast[i|f]64x2
20622 (define_mode_iterator VI8F_BRCST64x2
20623 [V8DI V8DF (V4DI "TARGET_AVX512VL") (V4DF "TARGET_AVX512VL")])
20624
20625 (define_insn "<mask_codefor>avx512dq_broadcast<mode><mask_name>_1"
20626 [(set (match_operand:VI8F_BRCST64x2 0 "register_operand" "=v,v")
20627 (vec_duplicate:VI8F_BRCST64x2
20628 (match_operand:<64x2mode> 1 "nonimmediate_operand" "v,m")))]
20629 "TARGET_AVX512DQ"
20630 "@
20631 vshuf<shuffletype>64x2\t{$0x0, %<xtg_mode>1, %<xtg_mode>1, %0<mask_operand2>|%0<mask_operand2>, %<xtg_mode>1, %<xtg_mode>1, 0x0}
20632 vbroadcast<shuffletype>64x2\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
20633 [(set_attr "type" "ssemov")
20634 (set_attr "prefix_extra" "1")
20635 (set_attr "prefix" "evex")
20636 (set_attr "mode" "<sseinsnmode>")])
20637
20638 (define_insn "avx512cd_maskb_vec_dup<mode>"
20639 [(set (match_operand:VI8_AVX512VL 0 "register_operand" "=v")
20640 (vec_duplicate:VI8_AVX512VL
20641 (zero_extend:DI
20642 (match_operand:QI 1 "register_operand" "k"))))]
20643 "TARGET_AVX512CD"
20644 "vpbroadcastmb2q\t{%1, %0|%0, %1}"
20645 [(set_attr "type" "mskmov")
20646 (set_attr "prefix" "evex")
20647 (set_attr "mode" "XI")])
20648
20649 (define_insn "avx512cd_maskw_vec_dup<mode>"
20650 [(set (match_operand:VI4_AVX512VL 0 "register_operand" "=v")
20651 (vec_duplicate:VI4_AVX512VL
20652 (zero_extend:SI
20653 (match_operand:HI 1 "register_operand" "k"))))]
20654 "TARGET_AVX512CD"
20655 "vpbroadcastmw2d\t{%1, %0|%0, %1}"
20656 [(set_attr "type" "mskmov")
20657 (set_attr "prefix" "evex")
20658 (set_attr "mode" "XI")])
20659
20660 (define_insn "<sse2_avx_avx512f>_vpermilvar<mode>3<mask_name>"
20661 [(set (match_operand:VF 0 "register_operand" "=v")
20662 (unspec:VF
20663 [(match_operand:VF 1 "register_operand" "v")
20664 (match_operand:<sseintvecmode> 2 "nonimmediate_operand" "vm")]
20665 UNSPEC_VPERMIL))]
20666 "TARGET_AVX && <mask_mode512bit_condition>"
20667 "vpermil<ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
20668 [(set_attr "type" "sselog")
20669 (set_attr "prefix_extra" "1")
20670 (set_attr "btver2_decode" "vector")
20671 (set_attr "prefix" "<mask_prefix>")
20672 (set_attr "mode" "<sseinsnmode>")])
20673
20674 (define_mode_iterator VPERMI2
20675 [V16SI V16SF V8DI V8DF
20676 (V8SI "TARGET_AVX512VL") (V8SF "TARGET_AVX512VL")
20677 (V4DI "TARGET_AVX512VL") (V4DF "TARGET_AVX512VL")
20678 (V4SI "TARGET_AVX512VL") (V4SF "TARGET_AVX512VL")
20679 (V2DI "TARGET_AVX512VL") (V2DF "TARGET_AVX512VL")
20680 (V32HI "TARGET_AVX512BW") (V16HI "TARGET_AVX512BW && TARGET_AVX512VL")
20681 (V8HI "TARGET_AVX512BW && TARGET_AVX512VL")
20682 (V64QI "TARGET_AVX512VBMI") (V32QI "TARGET_AVX512VBMI && TARGET_AVX512VL")
20683 (V16QI "TARGET_AVX512VBMI && TARGET_AVX512VL")])
20684
20685 (define_mode_iterator VPERMI2I
20686 [V16SI V8DI
20687 (V8SI "TARGET_AVX512VL") (V4SI "TARGET_AVX512VL")
20688 (V4DI "TARGET_AVX512VL") (V2DI "TARGET_AVX512VL")
20689 (V32HI "TARGET_AVX512BW") (V16HI "TARGET_AVX512BW && TARGET_AVX512VL")
20690 (V8HI "TARGET_AVX512BW && TARGET_AVX512VL")
20691 (V64QI "TARGET_AVX512VBMI") (V32QI "TARGET_AVX512VBMI && TARGET_AVX512VL")
20692 (V16QI "TARGET_AVX512VBMI && TARGET_AVX512VL")])
20693
20694 (define_expand "<avx512>_vpermi2var<mode>3_mask"
20695 [(set (match_operand:VPERMI2 0 "register_operand")
20696 (vec_merge:VPERMI2
20697 (unspec:VPERMI2
20698 [(match_operand:<sseintvecmode> 2 "register_operand")
20699 (match_operand:VPERMI2 1 "register_operand")
20700 (match_operand:VPERMI2 3 "nonimmediate_operand")]
20701 UNSPEC_VPERMT2)
20702 (match_dup 5)
20703 (match_operand:<avx512fmaskmode> 4 "register_operand")))]
20704 "TARGET_AVX512F"
20705 {
20706 operands[2] = force_reg (<sseintvecmode>mode, operands[2]);
20707 operands[5] = gen_lowpart (<MODE>mode, operands[2]);
20708 })
20709
20710 (define_insn "*<avx512>_vpermi2var<mode>3_mask"
20711 [(set (match_operand:VPERMI2I 0 "register_operand" "=v")
20712 (vec_merge:VPERMI2I
20713 (unspec:VPERMI2I
20714 [(match_operand:<sseintvecmode> 2 "register_operand" "0")
20715 (match_operand:VPERMI2I 1 "register_operand" "v")
20716 (match_operand:VPERMI2I 3 "nonimmediate_operand" "vm")]
20717 UNSPEC_VPERMT2)
20718 (match_dup 2)
20719 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")))]
20720 "TARGET_AVX512F"
20721 "vpermi2<ssemodesuffix>\t{%3, %1, %0%{%4%}|%0%{%4%}, %1, %3}"
20722 [(set_attr "type" "sselog")
20723 (set_attr "prefix" "evex")
20724 (set_attr "mode" "<sseinsnmode>")])
20725
20726 (define_insn "*<avx512>_vpermi2var<mode>3_mask"
20727 [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v")
20728 (vec_merge:VF_AVX512VL
20729 (unspec:VF_AVX512VL
20730 [(match_operand:<sseintvecmode> 2 "register_operand" "0")
20731 (match_operand:VF_AVX512VL 1 "register_operand" "v")
20732 (match_operand:VF_AVX512VL 3 "nonimmediate_operand" "vm")]
20733 UNSPEC_VPERMT2)
20734 (subreg:VF_AVX512VL (match_dup 2) 0)
20735 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")))]
20736 "TARGET_AVX512F"
20737 "vpermi2<ssemodesuffix>\t{%3, %1, %0%{%4%}|%0%{%4%}, %1, %3}"
20738 [(set_attr "type" "sselog")
20739 (set_attr "prefix" "evex")
20740 (set_attr "mode" "<sseinsnmode>")])
20741
20742 (define_expand "<avx512>_vpermt2var<mode>3_maskz"
20743 [(match_operand:VPERMI2 0 "register_operand")
20744 (match_operand:<sseintvecmode> 1 "register_operand")
20745 (match_operand:VPERMI2 2 "register_operand")
20746 (match_operand:VPERMI2 3 "nonimmediate_operand")
20747 (match_operand:<avx512fmaskmode> 4 "register_operand")]
20748 "TARGET_AVX512F"
20749 {
20750 emit_insn (gen_<avx512>_vpermt2var<mode>3_maskz_1 (
20751 operands[0], operands[1], operands[2], operands[3],
20752 CONST0_RTX (<MODE>mode), operands[4]));
20753 DONE;
20754 })
20755
20756 (define_insn "<avx512>_vpermt2var<mode>3<sd_maskz_name>"
20757 [(set (match_operand:VPERMI2 0 "register_operand" "=v,v")
20758 (unspec:VPERMI2
20759 [(match_operand:<sseintvecmode> 1 "register_operand" "v,0")
20760 (match_operand:VPERMI2 2 "register_operand" "0,v")
20761 (match_operand:VPERMI2 3 "nonimmediate_operand" "vm,vm")]
20762 UNSPEC_VPERMT2))]
20763 "TARGET_AVX512F"
20764 "@
20765 vpermt2<ssemodesuffix>\t{%3, %1, %0<sd_mask_op4>|%0<sd_mask_op4>, %1, %3}
20766 vpermi2<ssemodesuffix>\t{%3, %2, %0<sd_mask_op4>|%0<sd_mask_op4>, %2, %3}"
20767 [(set_attr "type" "sselog")
20768 (set_attr "prefix" "evex")
20769 (set_attr "mode" "<sseinsnmode>")])
20770
20771 (define_insn "<avx512>_vpermt2var<mode>3_mask"
20772 [(set (match_operand:VPERMI2 0 "register_operand" "=v")
20773 (vec_merge:VPERMI2
20774 (unspec:VPERMI2
20775 [(match_operand:<sseintvecmode> 1 "register_operand" "v")
20776 (match_operand:VPERMI2 2 "register_operand" "0")
20777 (match_operand:VPERMI2 3 "nonimmediate_operand" "vm")]
20778 UNSPEC_VPERMT2)
20779 (match_dup 2)
20780 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")))]
20781 "TARGET_AVX512F"
20782 "vpermt2<ssemodesuffix>\t{%3, %1, %0%{%4%}|%0%{%4%}, %1, %3}"
20783 [(set_attr "type" "sselog")
20784 (set_attr "prefix" "evex")
20785 (set_attr "mode" "<sseinsnmode>")])
20786
20787 (define_expand "avx_vperm2f128<mode>3"
20788 [(set (match_operand:AVX256MODE2P 0 "register_operand")
20789 (unspec:AVX256MODE2P
20790 [(match_operand:AVX256MODE2P 1 "register_operand")
20791 (match_operand:AVX256MODE2P 2 "nonimmediate_operand")
20792 (match_operand:SI 3 "const_0_to_255_operand")]
20793 UNSPEC_VPERMIL2F128))]
20794 "TARGET_AVX"
20795 {
20796 int mask = INTVAL (operands[3]);
20797 if ((mask & 0x88) == 0)
20798 {
20799 rtx perm[<ssescalarnum>], t1, t2;
20800 int i, base, nelt = <ssescalarnum>, nelt2 = nelt / 2;
20801
20802 base = (mask & 3) * nelt2;
20803 for (i = 0; i < nelt2; ++i)
20804 perm[i] = GEN_INT (base + i);
20805
20806 base = ((mask >> 4) & 3) * nelt2;
20807 for (i = 0; i < nelt2; ++i)
20808 perm[i + nelt2] = GEN_INT (base + i);
20809
20810 t2 = gen_rtx_VEC_CONCAT (<ssedoublevecmode>mode,
20811 operands[1], operands[2]);
20812 t1 = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (nelt, perm));
20813 t2 = gen_rtx_VEC_SELECT (<MODE>mode, t2, t1);
20814 t2 = gen_rtx_SET (operands[0], t2);
20815 emit_insn (t2);
20816 DONE;
20817 }
20818 })
20819
20820 ;; Note that bits 7 and 3 of the imm8 allow lanes to be zeroed, which
20821 ;; means that in order to represent this properly in rtl we'd have to
20822 ;; nest *another* vec_concat with a zero operand and do the select from
20823 ;; a 4x wide vector. That doesn't seem very nice.
20824 (define_insn "*avx_vperm2f128<mode>_full"
20825 [(set (match_operand:AVX256MODE2P 0 "register_operand" "=x")
20826 (unspec:AVX256MODE2P
20827 [(match_operand:AVX256MODE2P 1 "register_operand" "x")
20828 (match_operand:AVX256MODE2P 2 "nonimmediate_operand" "xm")
20829 (match_operand:SI 3 "const_0_to_255_operand" "n")]
20830 UNSPEC_VPERMIL2F128))]
20831 "TARGET_AVX"
20832 "vperm2<i128>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
20833 [(set_attr "type" "sselog")
20834 (set_attr "prefix_extra" "1")
20835 (set_attr "length_immediate" "1")
20836 (set_attr "prefix" "vex")
20837 (set_attr "mode" "<sseinsnmode>")])
20838
20839 (define_insn "*avx_vperm2f128<mode>_nozero"
20840 [(set (match_operand:AVX256MODE2P 0 "register_operand" "=x")
20841 (vec_select:AVX256MODE2P
20842 (vec_concat:<ssedoublevecmode>
20843 (match_operand:AVX256MODE2P 1 "register_operand" "x")
20844 (match_operand:AVX256MODE2P 2 "nonimmediate_operand" "xm"))
20845 (match_parallel 3 ""
20846 [(match_operand 4 "const_int_operand")])))]
20847 "TARGET_AVX
20848 && avx_vperm2f128_parallel (operands[3], <MODE>mode)"
20849 {
20850 int mask = avx_vperm2f128_parallel (operands[3], <MODE>mode) - 1;
20851 if (mask == 0x12)
20852 return "vinsert<i128>\t{$0, %x2, %1, %0|%0, %1, %x2, 0}";
20853 if (mask == 0x20)
20854 return "vinsert<i128>\t{$1, %x2, %1, %0|%0, %1, %x2, 1}";
20855 operands[3] = GEN_INT (mask);
20856 return "vperm2<i128>\t{%3, %2, %1, %0|%0, %1, %2, %3}";
20857 }
20858 [(set_attr "type" "sselog")
20859 (set_attr "prefix_extra" "1")
20860 (set_attr "length_immediate" "1")
20861 (set_attr "prefix" "vex")
20862 (set_attr "mode" "<sseinsnmode>")])
20863
20864 (define_insn "*ssse3_palignr<mode>_perm"
20865 [(set (match_operand:V_128 0 "register_operand" "=x,x,v")
20866 (vec_select:V_128
20867 (match_operand:V_128 1 "register_operand" "0,x,v")
20868 (match_parallel 2 "palignr_operand"
20869 [(match_operand 3 "const_int_operand" "n,n,n")])))]
20870 "TARGET_SSSE3"
20871 {
20872 operands[2] = (GEN_INT (INTVAL (operands[3])
20873 * GET_MODE_UNIT_SIZE (GET_MODE (operands[0]))));
20874
20875 switch (which_alternative)
20876 {
20877 case 0:
20878 return "palignr\t{%2, %1, %0|%0, %1, %2}";
20879 case 1:
20880 case 2:
20881 return "vpalignr\t{%2, %1, %1, %0|%0, %1, %1, %2}";
20882 default:
20883 gcc_unreachable ();
20884 }
20885 }
20886 [(set_attr "isa" "noavx,avx,avx512bw")
20887 (set_attr "type" "sseishft")
20888 (set_attr "atom_unit" "sishuf")
20889 (set_attr "prefix_data16" "1,*,*")
20890 (set_attr "prefix_extra" "1")
20891 (set_attr "length_immediate" "1")
20892 (set_attr "prefix" "orig,vex,evex")])
20893
20894 (define_expand "avx512vl_vinsert<mode>"
20895 [(match_operand:VI48F_256 0 "register_operand")
20896 (match_operand:VI48F_256 1 "register_operand")
20897 (match_operand:<ssehalfvecmode> 2 "nonimmediate_operand")
20898 (match_operand:SI 3 "const_0_to_1_operand")
20899 (match_operand:VI48F_256 4 "register_operand")
20900 (match_operand:<avx512fmaskmode> 5 "register_operand")]
20901 "TARGET_AVX512VL"
20902 {
20903 rtx (*insn)(rtx, rtx, rtx, rtx, rtx);
20904
20905 switch (INTVAL (operands[3]))
20906 {
20907 case 0:
20908 insn = gen_vec_set_lo_<mode>_mask;
20909 break;
20910 case 1:
20911 insn = gen_vec_set_hi_<mode>_mask;
20912 break;
20913 default:
20914 gcc_unreachable ();
20915 }
20916
20917 emit_insn (insn (operands[0], operands[1], operands[2], operands[4],
20918 operands[5]));
20919 DONE;
20920 })
20921
20922 (define_expand "avx_vinsertf128<mode>"
20923 [(match_operand:V_256 0 "register_operand")
20924 (match_operand:V_256 1 "register_operand")
20925 (match_operand:<ssehalfvecmode> 2 "nonimmediate_operand")
20926 (match_operand:SI 3 "const_0_to_1_operand")]
20927 "TARGET_AVX"
20928 {
20929 rtx (*insn)(rtx, rtx, rtx);
20930
20931 switch (INTVAL (operands[3]))
20932 {
20933 case 0:
20934 insn = gen_vec_set_lo_<mode>;
20935 break;
20936 case 1:
20937 insn = gen_vec_set_hi_<mode>;
20938 break;
20939 default:
20940 gcc_unreachable ();
20941 }
20942
20943 emit_insn (insn (operands[0], operands[1], operands[2]));
20944 DONE;
20945 })
20946
20947 (define_insn "vec_set_lo_<mode><mask_name>"
20948 [(set (match_operand:VI8F_256 0 "register_operand" "=v")
20949 (vec_concat:VI8F_256
20950 (match_operand:<ssehalfvecmode> 2 "nonimmediate_operand" "vm")
20951 (vec_select:<ssehalfvecmode>
20952 (match_operand:VI8F_256 1 "register_operand" "v")
20953 (parallel [(const_int 2) (const_int 3)]))))]
20954 "TARGET_AVX && <mask_avx512dq_condition>"
20955 {
20956 if (TARGET_AVX512DQ)
20957 return "vinsert<shuffletype>64x2\t{$0x0, %2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2, 0x0}";
20958 else if (TARGET_AVX512VL)
20959 return "vinsert<shuffletype>32x4\t{$0x0, %2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2, 0x0}";
20960 else
20961 return "vinsert<i128>\t{$0x0, %2, %1, %0|%0, %1, %2, 0x0}";
20962 }
20963 [(set_attr "type" "sselog")
20964 (set_attr "prefix_extra" "1")
20965 (set_attr "length_immediate" "1")
20966 (set_attr "prefix" "vex")
20967 (set_attr "mode" "<sseinsnmode>")])
20968
20969 (define_insn "vec_set_hi_<mode><mask_name>"
20970 [(set (match_operand:VI8F_256 0 "register_operand" "=v")
20971 (vec_concat:VI8F_256
20972 (vec_select:<ssehalfvecmode>
20973 (match_operand:VI8F_256 1 "register_operand" "v")
20974 (parallel [(const_int 0) (const_int 1)]))
20975 (match_operand:<ssehalfvecmode> 2 "nonimmediate_operand" "vm")))]
20976 "TARGET_AVX && <mask_avx512dq_condition>"
20977 {
20978 if (TARGET_AVX512DQ)
20979 return "vinsert<shuffletype>64x2\t{$0x1, %2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2, 0x1}";
20980 else if (TARGET_AVX512VL)
20981 return "vinsert<shuffletype>32x4\t{$0x1, %2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2, 0x1}";
20982 else
20983 return "vinsert<i128>\t{$0x1, %2, %1, %0|%0, %1, %2, 0x1}";
20984 }
20985 [(set_attr "type" "sselog")
20986 (set_attr "prefix_extra" "1")
20987 (set_attr "length_immediate" "1")
20988 (set_attr "prefix" "vex")
20989 (set_attr "mode" "<sseinsnmode>")])
20990
20991 (define_insn "vec_set_lo_<mode><mask_name>"
20992 [(set (match_operand:VI4F_256 0 "register_operand" "=v")
20993 (vec_concat:VI4F_256
20994 (match_operand:<ssehalfvecmode> 2 "nonimmediate_operand" "vm")
20995 (vec_select:<ssehalfvecmode>
20996 (match_operand:VI4F_256 1 "register_operand" "v")
20997 (parallel [(const_int 4) (const_int 5)
20998 (const_int 6) (const_int 7)]))))]
20999 "TARGET_AVX"
21000 {
21001 if (TARGET_AVX512VL)
21002 return "vinsert<shuffletype>32x4\t{$0x0, %2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2, 0x0}";
21003 else
21004 return "vinsert<i128>\t{$0x0, %2, %1, %0|%0, %1, %2, 0x0}";
21005 }
21006 [(set_attr "type" "sselog")
21007 (set_attr "prefix_extra" "1")
21008 (set_attr "length_immediate" "1")
21009 (set_attr "prefix" "vex")
21010 (set_attr "mode" "<sseinsnmode>")])
21011
21012 (define_insn "vec_set_hi_<mode><mask_name>"
21013 [(set (match_operand:VI4F_256 0 "register_operand" "=v")
21014 (vec_concat:VI4F_256
21015 (vec_select:<ssehalfvecmode>
21016 (match_operand:VI4F_256 1 "register_operand" "v")
21017 (parallel [(const_int 0) (const_int 1)
21018 (const_int 2) (const_int 3)]))
21019 (match_operand:<ssehalfvecmode> 2 "nonimmediate_operand" "vm")))]
21020 "TARGET_AVX"
21021 {
21022 if (TARGET_AVX512VL)
21023 return "vinsert<shuffletype>32x4\t{$0x1, %2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2, 0x1}";
21024 else
21025 return "vinsert<i128>\t{$0x1, %2, %1, %0|%0, %1, %2, 0x1}";
21026 }
21027 [(set_attr "type" "sselog")
21028 (set_attr "prefix_extra" "1")
21029 (set_attr "length_immediate" "1")
21030 (set_attr "prefix" "vex")
21031 (set_attr "mode" "<sseinsnmode>")])
21032
21033 (define_insn "vec_set_lo_v16hi"
21034 [(set (match_operand:V16HI 0 "register_operand" "=x,v")
21035 (vec_concat:V16HI
21036 (match_operand:V8HI 2 "nonimmediate_operand" "xm,vm")
21037 (vec_select:V8HI
21038 (match_operand:V16HI 1 "register_operand" "x,v")
21039 (parallel [(const_int 8) (const_int 9)
21040 (const_int 10) (const_int 11)
21041 (const_int 12) (const_int 13)
21042 (const_int 14) (const_int 15)]))))]
21043 "TARGET_AVX"
21044 "@
21045 vinsert%~128\t{$0x0, %2, %1, %0|%0, %1, %2, 0x0}
21046 vinserti32x4\t{$0x0, %2, %1, %0|%0, %1, %2, 0x0}"
21047 [(set_attr "type" "sselog")
21048 (set_attr "prefix_extra" "1")
21049 (set_attr "length_immediate" "1")
21050 (set_attr "prefix" "vex,evex")
21051 (set_attr "mode" "OI")])
21052
21053 (define_insn "vec_set_hi_v16hi"
21054 [(set (match_operand:V16HI 0 "register_operand" "=x,v")
21055 (vec_concat:V16HI
21056 (vec_select:V8HI
21057 (match_operand:V16HI 1 "register_operand" "x,v")
21058 (parallel [(const_int 0) (const_int 1)
21059 (const_int 2) (const_int 3)
21060 (const_int 4) (const_int 5)
21061 (const_int 6) (const_int 7)]))
21062 (match_operand:V8HI 2 "nonimmediate_operand" "xm,vm")))]
21063 "TARGET_AVX"
21064 "@
21065 vinsert%~128\t{$0x1, %2, %1, %0|%0, %1, %2, 0x1}
21066 vinserti32x4\t{$0x1, %2, %1, %0|%0, %1, %2, 0x1}"
21067 [(set_attr "type" "sselog")
21068 (set_attr "prefix_extra" "1")
21069 (set_attr "length_immediate" "1")
21070 (set_attr "prefix" "vex,evex")
21071 (set_attr "mode" "OI")])
21072
21073 (define_insn "vec_set_lo_v32qi"
21074 [(set (match_operand:V32QI 0 "register_operand" "=x,v")
21075 (vec_concat:V32QI
21076 (match_operand:V16QI 2 "nonimmediate_operand" "xm,v")
21077 (vec_select:V16QI
21078 (match_operand:V32QI 1 "register_operand" "x,v")
21079 (parallel [(const_int 16) (const_int 17)
21080 (const_int 18) (const_int 19)
21081 (const_int 20) (const_int 21)
21082 (const_int 22) (const_int 23)
21083 (const_int 24) (const_int 25)
21084 (const_int 26) (const_int 27)
21085 (const_int 28) (const_int 29)
21086 (const_int 30) (const_int 31)]))))]
21087 "TARGET_AVX"
21088 "@
21089 vinsert%~128\t{$0x0, %2, %1, %0|%0, %1, %2, 0x0}
21090 vinserti32x4\t{$0x0, %2, %1, %0|%0, %1, %2, 0x0}"
21091 [(set_attr "type" "sselog")
21092 (set_attr "prefix_extra" "1")
21093 (set_attr "length_immediate" "1")
21094 (set_attr "prefix" "vex,evex")
21095 (set_attr "mode" "OI")])
21096
21097 (define_insn "vec_set_hi_v32qi"
21098 [(set (match_operand:V32QI 0 "register_operand" "=x,v")
21099 (vec_concat:V32QI
21100 (vec_select:V16QI
21101 (match_operand:V32QI 1 "register_operand" "x,v")
21102 (parallel [(const_int 0) (const_int 1)
21103 (const_int 2) (const_int 3)
21104 (const_int 4) (const_int 5)
21105 (const_int 6) (const_int 7)
21106 (const_int 8) (const_int 9)
21107 (const_int 10) (const_int 11)
21108 (const_int 12) (const_int 13)
21109 (const_int 14) (const_int 15)]))
21110 (match_operand:V16QI 2 "nonimmediate_operand" "xm,vm")))]
21111 "TARGET_AVX"
21112 "@
21113 vinsert%~128\t{$0x1, %2, %1, %0|%0, %1, %2, 0x1}
21114 vinserti32x4\t{$0x1, %2, %1, %0|%0, %1, %2, 0x1}"
21115 [(set_attr "type" "sselog")
21116 (set_attr "prefix_extra" "1")
21117 (set_attr "length_immediate" "1")
21118 (set_attr "prefix" "vex,evex")
21119 (set_attr "mode" "OI")])
21120
21121 (define_insn "<avx_avx2>_maskload<ssemodesuffix><avxsizesuffix>"
21122 [(set (match_operand:V48_AVX2 0 "register_operand" "=x")
21123 (unspec:V48_AVX2
21124 [(match_operand:<sseintvecmode> 2 "register_operand" "x")
21125 (match_operand:V48_AVX2 1 "memory_operand" "m")]
21126 UNSPEC_MASKMOV))]
21127 "TARGET_AVX"
21128 "v<sseintprefix>maskmov<ssemodesuffix>\t{%1, %2, %0|%0, %2, %1}"
21129 [(set_attr "type" "sselog1")
21130 (set_attr "prefix_extra" "1")
21131 (set_attr "prefix" "vex")
21132 (set_attr "btver2_decode" "vector")
21133 (set_attr "mode" "<sseinsnmode>")])
21134
21135 (define_insn "<avx_avx2>_maskstore<ssemodesuffix><avxsizesuffix>"
21136 [(set (match_operand:V48_AVX2 0 "memory_operand" "+m")
21137 (unspec:V48_AVX2
21138 [(match_operand:<sseintvecmode> 1 "register_operand" "x")
21139 (match_operand:V48_AVX2 2 "register_operand" "x")
21140 (match_dup 0)]
21141 UNSPEC_MASKMOV))]
21142 "TARGET_AVX"
21143 "v<sseintprefix>maskmov<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
21144 [(set_attr "type" "sselog1")
21145 (set_attr "prefix_extra" "1")
21146 (set_attr "prefix" "vex")
21147 (set_attr "btver2_decode" "vector")
21148 (set_attr "mode" "<sseinsnmode>")])
21149
21150 (define_expand "maskload<mode><sseintvecmodelower>"
21151 [(set (match_operand:V48_AVX2 0 "register_operand")
21152 (unspec:V48_AVX2
21153 [(match_operand:<sseintvecmode> 2 "register_operand")
21154 (match_operand:V48_AVX2 1 "memory_operand")]
21155 UNSPEC_MASKMOV))]
21156 "TARGET_AVX")
21157
21158 (define_expand "maskload<mode><avx512fmaskmodelower>"
21159 [(set (match_operand:V48_AVX512VL 0 "register_operand")
21160 (vec_merge:V48_AVX512VL
21161 (match_operand:V48_AVX512VL 1 "memory_operand")
21162 (match_dup 0)
21163 (match_operand:<avx512fmaskmode> 2 "register_operand")))]
21164 "TARGET_AVX512F")
21165
21166 (define_expand "maskload<mode><avx512fmaskmodelower>"
21167 [(set (match_operand:VI12_AVX512VL 0 "register_operand")
21168 (vec_merge:VI12_AVX512VL
21169 (match_operand:VI12_AVX512VL 1 "memory_operand")
21170 (match_dup 0)
21171 (match_operand:<avx512fmaskmode> 2 "register_operand")))]
21172 "TARGET_AVX512BW")
21173
21174 (define_expand "maskstore<mode><sseintvecmodelower>"
21175 [(set (match_operand:V48_AVX2 0 "memory_operand")
21176 (unspec:V48_AVX2
21177 [(match_operand:<sseintvecmode> 2 "register_operand")
21178 (match_operand:V48_AVX2 1 "register_operand")
21179 (match_dup 0)]
21180 UNSPEC_MASKMOV))]
21181 "TARGET_AVX")
21182
21183 (define_expand "maskstore<mode><avx512fmaskmodelower>"
21184 [(set (match_operand:V48_AVX512VL 0 "memory_operand")
21185 (vec_merge:V48_AVX512VL
21186 (match_operand:V48_AVX512VL 1 "register_operand")
21187 (match_dup 0)
21188 (match_operand:<avx512fmaskmode> 2 "register_operand")))]
21189 "TARGET_AVX512F")
21190
21191 (define_expand "maskstore<mode><avx512fmaskmodelower>"
21192 [(set (match_operand:VI12_AVX512VL 0 "memory_operand")
21193 (vec_merge:VI12_AVX512VL
21194 (match_operand:VI12_AVX512VL 1 "register_operand")
21195 (match_dup 0)
21196 (match_operand:<avx512fmaskmode> 2 "register_operand")))]
21197 "TARGET_AVX512BW")
21198
21199 (define_expand "cbranch<mode>4"
21200 [(set (reg:CC FLAGS_REG)
21201 (compare:CC (match_operand:VI48_AVX 1 "register_operand")
21202 (match_operand:VI48_AVX 2 "nonimmediate_operand")))
21203 (set (pc) (if_then_else
21204 (match_operator 0 "bt_comparison_operator"
21205 [(reg:CC FLAGS_REG) (const_int 0)])
21206 (label_ref (match_operand 3))
21207 (pc)))]
21208 "TARGET_SSE4_1"
21209 {
21210 ix86_expand_branch (GET_CODE (operands[0]),
21211 operands[1], operands[2], operands[3]);
21212 DONE;
21213 })
21214
21215
21216 (define_insn_and_split "avx_<castmode><avxsizesuffix>_<castmode>"
21217 [(set (match_operand:AVX256MODE2P 0 "nonimmediate_operand" "=x,m")
21218 (vec_concat:AVX256MODE2P
21219 (match_operand:<ssehalfvecmode> 1 "nonimmediate_operand" "xm,x")
21220 (unspec:<ssehalfvecmode> [(const_int 0)] UNSPEC_CAST)))]
21221 "TARGET_AVX && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
21222 "#"
21223 "&& reload_completed"
21224 [(set (match_dup 0) (match_dup 1))]
21225 {
21226 if (REG_P (operands[0]))
21227 operands[0] = gen_lowpart (<ssehalfvecmode>mode, operands[0]);
21228 else
21229 operands[1] = lowpart_subreg (<MODE>mode, operands[1],
21230 <ssehalfvecmode>mode);
21231 })
21232
21233 ;; Modes handled by vec_init expanders.
21234 (define_mode_iterator VEC_INIT_MODE
21235 [(V64QI "TARGET_AVX512F") (V32QI "TARGET_AVX") V16QI
21236 (V32HI "TARGET_AVX512F") (V16HI "TARGET_AVX") V8HI
21237 (V16SI "TARGET_AVX512F") (V8SI "TARGET_AVX") V4SI
21238 (V8DI "TARGET_AVX512F") (V4DI "TARGET_AVX") V2DI
21239 (V16SF "TARGET_AVX512F") (V8SF "TARGET_AVX") V4SF
21240 (V8DF "TARGET_AVX512F") (V4DF "TARGET_AVX") (V2DF "TARGET_SSE2")
21241 (V4TI "TARGET_AVX512F") (V2TI "TARGET_AVX")])
21242
21243 ;; Likewise, but for initialization from half sized vectors.
21244 ;; Thus, these are all VEC_INIT_MODE modes except V2??.
21245 (define_mode_iterator VEC_INIT_HALF_MODE
21246 [(V64QI "TARGET_AVX512F") (V32QI "TARGET_AVX") V16QI
21247 (V32HI "TARGET_AVX512F") (V16HI "TARGET_AVX") V8HI
21248 (V16SI "TARGET_AVX512F") (V8SI "TARGET_AVX") V4SI
21249 (V8DI "TARGET_AVX512F") (V4DI "TARGET_AVX")
21250 (V16SF "TARGET_AVX512F") (V8SF "TARGET_AVX") V4SF
21251 (V8DF "TARGET_AVX512F") (V4DF "TARGET_AVX")
21252 (V4TI "TARGET_AVX512F")])
21253
21254 (define_expand "vec_init<mode><ssescalarmodelower>"
21255 [(match_operand:VEC_INIT_MODE 0 "register_operand")
21256 (match_operand 1)]
21257 "TARGET_SSE"
21258 {
21259 ix86_expand_vector_init (false, operands[0], operands[1]);
21260 DONE;
21261 })
21262
21263 (define_expand "vec_init<mode><ssehalfvecmodelower>"
21264 [(match_operand:VEC_INIT_HALF_MODE 0 "register_operand")
21265 (match_operand 1)]
21266 "TARGET_SSE"
21267 {
21268 ix86_expand_vector_init (false, operands[0], operands[1]);
21269 DONE;
21270 })
21271
21272 (define_insn "<avx2_avx512>_ashrv<mode><mask_name>"
21273 [(set (match_operand:VI48_AVX512F_AVX512VL 0 "register_operand" "=v")
21274 (ashiftrt:VI48_AVX512F_AVX512VL
21275 (match_operand:VI48_AVX512F_AVX512VL 1 "register_operand" "v")
21276 (match_operand:VI48_AVX512F_AVX512VL 2 "nonimmediate_operand" "vm")))]
21277 "TARGET_AVX2 && <mask_mode512bit_condition>"
21278 "vpsrav<ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
21279 [(set_attr "type" "sseishft")
21280 (set_attr "prefix" "maybe_evex")
21281 (set_attr "mode" "<sseinsnmode>")])
21282
21283 (define_insn "<avx2_avx512>_ashrv<mode><mask_name>"
21284 [(set (match_operand:VI2_AVX512VL 0 "register_operand" "=v")
21285 (ashiftrt:VI2_AVX512VL
21286 (match_operand:VI2_AVX512VL 1 "register_operand" "v")
21287 (match_operand:VI2_AVX512VL 2 "nonimmediate_operand" "vm")))]
21288 "TARGET_AVX512BW"
21289 "vpsravw\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
21290 [(set_attr "type" "sseishft")
21291 (set_attr "prefix" "maybe_evex")
21292 (set_attr "mode" "<sseinsnmode>")])
21293
21294 (define_insn "<avx2_avx512>_<shift_insn>v<mode><mask_name>"
21295 [(set (match_operand:VI48_AVX512F 0 "register_operand" "=v")
21296 (any_lshift:VI48_AVX512F
21297 (match_operand:VI48_AVX512F 1 "register_operand" "v")
21298 (match_operand:VI48_AVX512F 2 "nonimmediate_operand" "vm")))]
21299 "TARGET_AVX2 && <mask_mode512bit_condition>"
21300 "vp<vshift>v<ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
21301 [(set_attr "type" "sseishft")
21302 (set_attr "prefix" "maybe_evex")
21303 (set_attr "mode" "<sseinsnmode>")])
21304
21305 (define_insn "<avx2_avx512>_<shift_insn>v<mode><mask_name>"
21306 [(set (match_operand:VI2_AVX512VL 0 "register_operand" "=v")
21307 (any_lshift:VI2_AVX512VL
21308 (match_operand:VI2_AVX512VL 1 "register_operand" "v")
21309 (match_operand:VI2_AVX512VL 2 "nonimmediate_operand" "vm")))]
21310 "TARGET_AVX512BW"
21311 "vp<vshift>v<ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
21312 [(set_attr "type" "sseishft")
21313 (set_attr "prefix" "maybe_evex")
21314 (set_attr "mode" "<sseinsnmode>")])
21315
21316 (define_insn "avx_vec_concat<mode>"
21317 [(set (match_operand:V_256_512 0 "register_operand" "=x,v,x,Yv")
21318 (vec_concat:V_256_512
21319 (match_operand:<ssehalfvecmode> 1 "nonimmediate_operand" "x,v,xm,vm")
21320 (match_operand:<ssehalfvecmode> 2 "nonimm_or_0_operand" "xm,vm,C,C")))]
21321 "TARGET_AVX
21322 && (operands[2] == CONST0_RTX (<ssehalfvecmode>mode)
21323 || !MEM_P (operands[1]))"
21324 {
21325 switch (which_alternative)
21326 {
21327 case 0:
21328 return "vinsert<i128>\t{$0x1, %2, %<xtg_mode>1, %0|%0, %<xtg_mode>1, %2, 0x1}";
21329 case 1:
21330 if (<MODE_SIZE> == 64)
21331 {
21332 if (TARGET_AVX512DQ && GET_MODE_SIZE (<ssescalarmode>mode) == 4)
21333 return "vinsert<shuffletype>32x8\t{$0x1, %2, %<xtg_mode>1, %0|%0, %<xtg_mode>1, %2, 0x1}";
21334 else
21335 return "vinsert<shuffletype>64x4\t{$0x1, %2, %<xtg_mode>1, %0|%0, %<xtg_mode>1, %2, 0x1}";
21336 }
21337 else
21338 {
21339 if (TARGET_AVX512DQ && GET_MODE_SIZE (<ssescalarmode>mode) == 8)
21340 return "vinsert<shuffletype>64x2\t{$0x1, %2, %<xtg_mode>1, %0|%0, %<xtg_mode>1, %2, 0x1}";
21341 else
21342 return "vinsert<shuffletype>32x4\t{$0x1, %2, %<xtg_mode>1, %0|%0, %<xtg_mode>1, %2, 0x1}";
21343 }
21344 case 2:
21345 case 3:
21346 switch (get_attr_mode (insn))
21347 {
21348 case MODE_V16SF:
21349 if (misaligned_operand (operands[1], <ssehalfvecmode>mode))
21350 return "vmovups\t{%1, %t0|%t0, %1}";
21351 else
21352 return "vmovaps\t{%1, %t0|%t0, %1}";
21353 case MODE_V8DF:
21354 if (misaligned_operand (operands[1], <ssehalfvecmode>mode))
21355 return "vmovupd\t{%1, %t0|%t0, %1}";
21356 else
21357 return "vmovapd\t{%1, %t0|%t0, %1}";
21358 case MODE_V8SF:
21359 if (misaligned_operand (operands[1], <ssehalfvecmode>mode))
21360 return "vmovups\t{%1, %x0|%x0, %1}";
21361 else
21362 return "vmovaps\t{%1, %x0|%x0, %1}";
21363 case MODE_V4DF:
21364 if (misaligned_operand (operands[1], <ssehalfvecmode>mode))
21365 return "vmovupd\t{%1, %x0|%x0, %1}";
21366 else
21367 return "vmovapd\t{%1, %x0|%x0, %1}";
21368 case MODE_XI:
21369 if (misaligned_operand (operands[1], <ssehalfvecmode>mode))
21370 {
21371 if (which_alternative == 2)
21372 return "vmovdqu\t{%1, %t0|%t0, %1}";
21373 else if (GET_MODE_SIZE (<ssescalarmode>mode) == 8)
21374 return "vmovdqu64\t{%1, %t0|%t0, %1}";
21375 else
21376 return "vmovdqu32\t{%1, %t0|%t0, %1}";
21377 }
21378 else
21379 {
21380 if (which_alternative == 2)
21381 return "vmovdqa\t{%1, %t0|%t0, %1}";
21382 else if (GET_MODE_SIZE (<ssescalarmode>mode) == 8)
21383 return "vmovdqa64\t{%1, %t0|%t0, %1}";
21384 else
21385 return "vmovdqa32\t{%1, %t0|%t0, %1}";
21386 }
21387 case MODE_OI:
21388 if (misaligned_operand (operands[1], <ssehalfvecmode>mode))
21389 {
21390 if (which_alternative == 2)
21391 return "vmovdqu\t{%1, %x0|%x0, %1}";
21392 else if (GET_MODE_SIZE (<ssescalarmode>mode) == 8)
21393 return "vmovdqu64\t{%1, %x0|%x0, %1}";
21394 else
21395 return "vmovdqu32\t{%1, %x0|%x0, %1}";
21396 }
21397 else
21398 {
21399 if (which_alternative == 2)
21400 return "vmovdqa\t{%1, %x0|%x0, %1}";
21401 else if (GET_MODE_SIZE (<ssescalarmode>mode) == 8)
21402 return "vmovdqa64\t{%1, %x0|%x0, %1}";
21403 else
21404 return "vmovdqa32\t{%1, %x0|%x0, %1}";
21405 }
21406 default:
21407 gcc_unreachable ();
21408 }
21409 default:
21410 gcc_unreachable ();
21411 }
21412 }
21413 [(set_attr "type" "sselog,sselog,ssemov,ssemov")
21414 (set_attr "prefix_extra" "1,1,*,*")
21415 (set_attr "length_immediate" "1,1,*,*")
21416 (set_attr "prefix" "maybe_evex")
21417 (set_attr "mode" "<sseinsnmode>")])
21418
21419 (define_insn "vcvtph2ps<mask_name>"
21420 [(set (match_operand:V4SF 0 "register_operand" "=v")
21421 (vec_select:V4SF
21422 (unspec:V8SF [(match_operand:V8HI 1 "register_operand" "v")]
21423 UNSPEC_VCVTPH2PS)
21424 (parallel [(const_int 0) (const_int 1)
21425 (const_int 2) (const_int 3)])))]
21426 "TARGET_F16C || TARGET_AVX512VL"
21427 "vcvtph2ps\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
21428 [(set_attr "type" "ssecvt")
21429 (set_attr "prefix" "maybe_evex")
21430 (set_attr "mode" "V4SF")])
21431
21432 (define_insn "*vcvtph2ps_load<mask_name>"
21433 [(set (match_operand:V4SF 0 "register_operand" "=v")
21434 (unspec:V4SF [(match_operand:V4HI 1 "memory_operand" "m")]
21435 UNSPEC_VCVTPH2PS))]
21436 "TARGET_F16C || TARGET_AVX512VL"
21437 "vcvtph2ps\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
21438 [(set_attr "type" "ssecvt")
21439 (set_attr "prefix" "vex")
21440 (set_attr "mode" "V8SF")])
21441
21442 (define_insn "vcvtph2ps256<mask_name>"
21443 [(set (match_operand:V8SF 0 "register_operand" "=v")
21444 (unspec:V8SF [(match_operand:V8HI 1 "nonimmediate_operand" "vm")]
21445 UNSPEC_VCVTPH2PS))]
21446 "TARGET_F16C || TARGET_AVX512VL"
21447 "vcvtph2ps\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
21448 [(set_attr "type" "ssecvt")
21449 (set_attr "prefix" "vex")
21450 (set_attr "btver2_decode" "double")
21451 (set_attr "mode" "V8SF")])
21452
21453 (define_insn "<mask_codefor>avx512f_vcvtph2ps512<mask_name><round_saeonly_name>"
21454 [(set (match_operand:V16SF 0 "register_operand" "=v")
21455 (unspec:V16SF
21456 [(match_operand:V16HI 1 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")]
21457 UNSPEC_VCVTPH2PS))]
21458 "TARGET_AVX512F"
21459 "vcvtph2ps\t{<round_saeonly_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_saeonly_mask_op2>}"
21460 [(set_attr "type" "ssecvt")
21461 (set_attr "prefix" "evex")
21462 (set_attr "mode" "V16SF")])
21463
21464 (define_expand "vcvtps2ph_mask"
21465 [(set (match_operand:V8HI 0 "register_operand")
21466 (vec_merge:V8HI
21467 (vec_concat:V8HI
21468 (unspec:V4HI [(match_operand:V4SF 1 "register_operand")
21469 (match_operand:SI 2 "const_0_to_255_operand")]
21470 UNSPEC_VCVTPS2PH)
21471 (match_dup 5))
21472 (match_operand:V8HI 3 "nonimm_or_0_operand")
21473 (match_operand:QI 4 "register_operand")))]
21474 "TARGET_AVX512VL"
21475 "operands[5] = CONST0_RTX (V4HImode);")
21476
21477 (define_expand "vcvtps2ph"
21478 [(set (match_operand:V8HI 0 "register_operand")
21479 (vec_concat:V8HI
21480 (unspec:V4HI [(match_operand:V4SF 1 "register_operand")
21481 (match_operand:SI 2 "const_0_to_255_operand")]
21482 UNSPEC_VCVTPS2PH)
21483 (match_dup 3)))]
21484 "TARGET_F16C"
21485 "operands[3] = CONST0_RTX (V4HImode);")
21486
21487 (define_insn "*vcvtps2ph<mask_name>"
21488 [(set (match_operand:V8HI 0 "register_operand" "=v")
21489 (vec_concat:V8HI
21490 (unspec:V4HI [(match_operand:V4SF 1 "register_operand" "v")
21491 (match_operand:SI 2 "const_0_to_255_operand" "N")]
21492 UNSPEC_VCVTPS2PH)
21493 (match_operand:V4HI 3 "const0_operand")))]
21494 "(TARGET_F16C || TARGET_AVX512VL) && <mask_avx512vl_condition>"
21495 "vcvtps2ph\t{%2, %1, %0<mask_operand4>|%0<mask_operand4>, %1, %2}"
21496 [(set_attr "type" "ssecvt")
21497 (set_attr "prefix" "maybe_evex")
21498 (set_attr "mode" "V4SF")])
21499
21500 (define_insn "*vcvtps2ph_store<mask_name>"
21501 [(set (match_operand:V4HI 0 "memory_operand" "=m")
21502 (unspec:V4HI [(match_operand:V4SF 1 "register_operand" "v")
21503 (match_operand:SI 2 "const_0_to_255_operand" "N")]
21504 UNSPEC_VCVTPS2PH))]
21505 "TARGET_F16C || TARGET_AVX512VL"
21506 "vcvtps2ph\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
21507 [(set_attr "type" "ssecvt")
21508 (set_attr "prefix" "maybe_evex")
21509 (set_attr "mode" "V4SF")])
21510
21511 (define_insn "vcvtps2ph256<mask_name>"
21512 [(set (match_operand:V8HI 0 "nonimmediate_operand" "=vm")
21513 (unspec:V8HI [(match_operand:V8SF 1 "register_operand" "v")
21514 (match_operand:SI 2 "const_0_to_255_operand" "N")]
21515 UNSPEC_VCVTPS2PH))]
21516 "TARGET_F16C || TARGET_AVX512VL"
21517 "vcvtps2ph\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
21518 [(set_attr "type" "ssecvt")
21519 (set_attr "prefix" "maybe_evex")
21520 (set_attr "btver2_decode" "vector")
21521 (set_attr "mode" "V8SF")])
21522
21523 (define_insn "<mask_codefor>avx512f_vcvtps2ph512<mask_name>"
21524 [(set (match_operand:V16HI 0 "nonimmediate_operand" "=vm")
21525 (unspec:V16HI
21526 [(match_operand:V16SF 1 "register_operand" "v")
21527 (match_operand:SI 2 "const_0_to_255_operand" "N")]
21528 UNSPEC_VCVTPS2PH))]
21529 "TARGET_AVX512F"
21530 "vcvtps2ph\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
21531 [(set_attr "type" "ssecvt")
21532 (set_attr "prefix" "evex")
21533 (set_attr "mode" "V16SF")])
21534
21535 ;; For gather* insn patterns
21536 (define_mode_iterator VEC_GATHER_MODE
21537 [V2DI V2DF V4DI V4DF V4SI V4SF V8SI V8SF])
21538 (define_mode_attr VEC_GATHER_IDXSI
21539 [(V2DI "V4SI") (V4DI "V4SI") (V8DI "V8SI")
21540 (V2DF "V4SI") (V4DF "V4SI") (V8DF "V8SI")
21541 (V4SI "V4SI") (V8SI "V8SI") (V16SI "V16SI")
21542 (V4SF "V4SI") (V8SF "V8SI") (V16SF "V16SI")])
21543
21544 (define_mode_attr VEC_GATHER_IDXDI
21545 [(V2DI "V2DI") (V4DI "V4DI") (V8DI "V8DI")
21546 (V2DF "V2DI") (V4DF "V4DI") (V8DF "V8DI")
21547 (V4SI "V2DI") (V8SI "V4DI") (V16SI "V8DI")
21548 (V4SF "V2DI") (V8SF "V4DI") (V16SF "V8DI")])
21549
21550 (define_mode_attr VEC_GATHER_SRCDI
21551 [(V2DI "V2DI") (V4DI "V4DI") (V8DI "V8DI")
21552 (V2DF "V2DF") (V4DF "V4DF") (V8DF "V8DF")
21553 (V4SI "V4SI") (V8SI "V4SI") (V16SI "V8SI")
21554 (V4SF "V4SF") (V8SF "V4SF") (V16SF "V8SF")])
21555
21556 (define_expand "avx2_gathersi<mode>"
21557 [(parallel [(set (match_operand:VEC_GATHER_MODE 0 "register_operand")
21558 (unspec:VEC_GATHER_MODE
21559 [(match_operand:VEC_GATHER_MODE 1 "register_operand")
21560 (mem:<ssescalarmode>
21561 (match_par_dup 6
21562 [(match_operand 2 "vsib_address_operand")
21563 (match_operand:<VEC_GATHER_IDXSI>
21564 3 "register_operand")
21565 (match_operand:SI 5 "const1248_operand ")]))
21566 (mem:BLK (scratch))
21567 (match_operand:VEC_GATHER_MODE 4 "register_operand")]
21568 UNSPEC_GATHER))
21569 (clobber (match_scratch:VEC_GATHER_MODE 7))])]
21570 "TARGET_AVX2"
21571 {
21572 operands[6]
21573 = gen_rtx_UNSPEC (Pmode, gen_rtvec (3, operands[2], operands[3],
21574 operands[5]), UNSPEC_VSIBADDR);
21575 })
21576
21577 (define_insn "*avx2_gathersi<VEC_GATHER_MODE:mode>"
21578 [(set (match_operand:VEC_GATHER_MODE 0 "register_operand" "=&x")
21579 (unspec:VEC_GATHER_MODE
21580 [(match_operand:VEC_GATHER_MODE 2 "register_operand" "0")
21581 (match_operator:<ssescalarmode> 7 "vsib_mem_operator"
21582 [(unspec:P
21583 [(match_operand:P 3 "vsib_address_operand" "Tv")
21584 (match_operand:<VEC_GATHER_IDXSI> 4 "register_operand" "x")
21585 (match_operand:SI 6 "const1248_operand" "n")]
21586 UNSPEC_VSIBADDR)])
21587 (mem:BLK (scratch))
21588 (match_operand:VEC_GATHER_MODE 5 "register_operand" "1")]
21589 UNSPEC_GATHER))
21590 (clobber (match_scratch:VEC_GATHER_MODE 1 "=&x"))]
21591 "TARGET_AVX2"
21592 "%M3v<sseintprefix>gatherd<ssemodesuffix>\t{%1, %7, %0|%0, %7, %1}"
21593 [(set_attr "type" "ssemov")
21594 (set_attr "prefix" "vex")
21595 (set_attr "mode" "<sseinsnmode>")])
21596
21597 (define_insn "*avx2_gathersi<VEC_GATHER_MODE:mode>_2"
21598 [(set (match_operand:VEC_GATHER_MODE 0 "register_operand" "=&x")
21599 (unspec:VEC_GATHER_MODE
21600 [(pc)
21601 (match_operator:<ssescalarmode> 6 "vsib_mem_operator"
21602 [(unspec:P
21603 [(match_operand:P 2 "vsib_address_operand" "Tv")
21604 (match_operand:<VEC_GATHER_IDXSI> 3 "register_operand" "x")
21605 (match_operand:SI 5 "const1248_operand" "n")]
21606 UNSPEC_VSIBADDR)])
21607 (mem:BLK (scratch))
21608 (match_operand:VEC_GATHER_MODE 4 "register_operand" "1")]
21609 UNSPEC_GATHER))
21610 (clobber (match_scratch:VEC_GATHER_MODE 1 "=&x"))]
21611 "TARGET_AVX2"
21612 "%M2v<sseintprefix>gatherd<ssemodesuffix>\t{%1, %6, %0|%0, %6, %1}"
21613 [(set_attr "type" "ssemov")
21614 (set_attr "prefix" "vex")
21615 (set_attr "mode" "<sseinsnmode>")])
21616
21617 (define_expand "avx2_gatherdi<mode>"
21618 [(parallel [(set (match_operand:VEC_GATHER_MODE 0 "register_operand")
21619 (unspec:VEC_GATHER_MODE
21620 [(match_operand:<VEC_GATHER_SRCDI> 1 "register_operand")
21621 (mem:<ssescalarmode>
21622 (match_par_dup 6
21623 [(match_operand 2 "vsib_address_operand")
21624 (match_operand:<VEC_GATHER_IDXDI>
21625 3 "register_operand")
21626 (match_operand:SI 5 "const1248_operand ")]))
21627 (mem:BLK (scratch))
21628 (match_operand:<VEC_GATHER_SRCDI> 4 "register_operand")]
21629 UNSPEC_GATHER))
21630 (clobber (match_scratch:VEC_GATHER_MODE 7))])]
21631 "TARGET_AVX2"
21632 {
21633 operands[6]
21634 = gen_rtx_UNSPEC (Pmode, gen_rtvec (3, operands[2], operands[3],
21635 operands[5]), UNSPEC_VSIBADDR);
21636 })
21637
21638 (define_insn "*avx2_gatherdi<VEC_GATHER_MODE:mode>"
21639 [(set (match_operand:VEC_GATHER_MODE 0 "register_operand" "=&x")
21640 (unspec:VEC_GATHER_MODE
21641 [(match_operand:<VEC_GATHER_SRCDI> 2 "register_operand" "0")
21642 (match_operator:<ssescalarmode> 7 "vsib_mem_operator"
21643 [(unspec:P
21644 [(match_operand:P 3 "vsib_address_operand" "Tv")
21645 (match_operand:<VEC_GATHER_IDXDI> 4 "register_operand" "x")
21646 (match_operand:SI 6 "const1248_operand" "n")]
21647 UNSPEC_VSIBADDR)])
21648 (mem:BLK (scratch))
21649 (match_operand:<VEC_GATHER_SRCDI> 5 "register_operand" "1")]
21650 UNSPEC_GATHER))
21651 (clobber (match_scratch:VEC_GATHER_MODE 1 "=&x"))]
21652 "TARGET_AVX2"
21653 "%M3v<sseintprefix>gatherq<ssemodesuffix>\t{%5, %7, %2|%2, %7, %5}"
21654 [(set_attr "type" "ssemov")
21655 (set_attr "prefix" "vex")
21656 (set_attr "mode" "<sseinsnmode>")])
21657
21658 (define_insn "*avx2_gatherdi<VEC_GATHER_MODE:mode>_2"
21659 [(set (match_operand:VEC_GATHER_MODE 0 "register_operand" "=&x")
21660 (unspec:VEC_GATHER_MODE
21661 [(pc)
21662 (match_operator:<ssescalarmode> 6 "vsib_mem_operator"
21663 [(unspec:P
21664 [(match_operand:P 2 "vsib_address_operand" "Tv")
21665 (match_operand:<VEC_GATHER_IDXDI> 3 "register_operand" "x")
21666 (match_operand:SI 5 "const1248_operand" "n")]
21667 UNSPEC_VSIBADDR)])
21668 (mem:BLK (scratch))
21669 (match_operand:<VEC_GATHER_SRCDI> 4 "register_operand" "1")]
21670 UNSPEC_GATHER))
21671 (clobber (match_scratch:VEC_GATHER_MODE 1 "=&x"))]
21672 "TARGET_AVX2"
21673 {
21674 if (<VEC_GATHER_MODE:MODE>mode != <VEC_GATHER_SRCDI>mode)
21675 return "%M2v<sseintprefix>gatherq<ssemodesuffix>\t{%4, %6, %x0|%x0, %6, %4}";
21676 return "%M2v<sseintprefix>gatherq<ssemodesuffix>\t{%4, %6, %0|%0, %6, %4}";
21677 }
21678 [(set_attr "type" "ssemov")
21679 (set_attr "prefix" "vex")
21680 (set_attr "mode" "<sseinsnmode>")])
21681
21682 (define_insn "*avx2_gatherdi<VI4F_256:mode>_3"
21683 [(set (match_operand:<VEC_GATHER_SRCDI> 0 "register_operand" "=&x")
21684 (vec_select:<VEC_GATHER_SRCDI>
21685 (unspec:VI4F_256
21686 [(match_operand:<VEC_GATHER_SRCDI> 2 "register_operand" "0")
21687 (match_operator:<ssescalarmode> 7 "vsib_mem_operator"
21688 [(unspec:P
21689 [(match_operand:P 3 "vsib_address_operand" "Tv")
21690 (match_operand:<VEC_GATHER_IDXDI> 4 "register_operand" "x")
21691 (match_operand:SI 6 "const1248_operand" "n")]
21692 UNSPEC_VSIBADDR)])
21693 (mem:BLK (scratch))
21694 (match_operand:<VEC_GATHER_SRCDI> 5 "register_operand" "1")]
21695 UNSPEC_GATHER)
21696 (parallel [(const_int 0) (const_int 1)
21697 (const_int 2) (const_int 3)])))
21698 (clobber (match_scratch:VI4F_256 1 "=&x"))]
21699 "TARGET_AVX2"
21700 "%M3v<sseintprefix>gatherq<ssemodesuffix>\t{%5, %7, %0|%0, %7, %5}"
21701 [(set_attr "type" "ssemov")
21702 (set_attr "prefix" "vex")
21703 (set_attr "mode" "<sseinsnmode>")])
21704
21705 (define_insn "*avx2_gatherdi<VI4F_256:mode>_4"
21706 [(set (match_operand:<VEC_GATHER_SRCDI> 0 "register_operand" "=&x")
21707 (vec_select:<VEC_GATHER_SRCDI>
21708 (unspec:VI4F_256
21709 [(pc)
21710 (match_operator:<ssescalarmode> 6 "vsib_mem_operator"
21711 [(unspec:P
21712 [(match_operand:P 2 "vsib_address_operand" "Tv")
21713 (match_operand:<VEC_GATHER_IDXDI> 3 "register_operand" "x")
21714 (match_operand:SI 5 "const1248_operand" "n")]
21715 UNSPEC_VSIBADDR)])
21716 (mem:BLK (scratch))
21717 (match_operand:<VEC_GATHER_SRCDI> 4 "register_operand" "1")]
21718 UNSPEC_GATHER)
21719 (parallel [(const_int 0) (const_int 1)
21720 (const_int 2) (const_int 3)])))
21721 (clobber (match_scratch:VI4F_256 1 "=&x"))]
21722 "TARGET_AVX2"
21723 "%M2v<sseintprefix>gatherq<ssemodesuffix>\t{%4, %6, %0|%0, %6, %4}"
21724 [(set_attr "type" "ssemov")
21725 (set_attr "prefix" "vex")
21726 (set_attr "mode" "<sseinsnmode>")])
21727
21728 (define_expand "<avx512>_gathersi<mode>"
21729 [(parallel [(set (match_operand:VI48F 0 "register_operand")
21730 (unspec:VI48F
21731 [(match_operand:VI48F 1 "register_operand")
21732 (match_operand:<avx512fmaskmode> 4 "register_operand")
21733 (mem:<ssescalarmode>
21734 (match_par_dup 6
21735 [(match_operand 2 "vsib_address_operand")
21736 (match_operand:<VEC_GATHER_IDXSI> 3 "register_operand")
21737 (match_operand:SI 5 "const1248_operand")]))]
21738 UNSPEC_GATHER))
21739 (clobber (match_scratch:<avx512fmaskmode> 7))])]
21740 "TARGET_AVX512F"
21741 {
21742 operands[6]
21743 = gen_rtx_UNSPEC (Pmode, gen_rtvec (3, operands[2], operands[3],
21744 operands[5]), UNSPEC_VSIBADDR);
21745 })
21746
21747 (define_insn "*avx512f_gathersi<VI48F:mode>"
21748 [(set (match_operand:VI48F 0 "register_operand" "=&v")
21749 (unspec:VI48F
21750 [(match_operand:VI48F 1 "register_operand" "0")
21751 (match_operand:<avx512fmaskmode> 7 "register_operand" "2")
21752 (match_operator:<ssescalarmode> 6 "vsib_mem_operator"
21753 [(unspec:P
21754 [(match_operand:P 4 "vsib_address_operand" "Tv")
21755 (match_operand:<VEC_GATHER_IDXSI> 3 "register_operand" "v")
21756 (match_operand:SI 5 "const1248_operand" "n")]
21757 UNSPEC_VSIBADDR)])]
21758 UNSPEC_GATHER))
21759 (clobber (match_scratch:<avx512fmaskmode> 2 "=&Yk"))]
21760 "TARGET_AVX512F"
21761 ;; %X6 so that we don't emit any *WORD PTR for -masm=intel, as
21762 ;; gas changed what it requires incompatibly.
21763 "%M4v<sseintprefix>gatherd<ssemodesuffix>\t{%6, %0%{%2%}|%0%{%2%}, %X6}"
21764 [(set_attr "type" "ssemov")
21765 (set_attr "prefix" "evex")
21766 (set_attr "mode" "<sseinsnmode>")])
21767
21768 (define_insn "*avx512f_gathersi<VI48F:mode>_2"
21769 [(set (match_operand:VI48F 0 "register_operand" "=&v")
21770 (unspec:VI48F
21771 [(pc)
21772 (match_operand:<avx512fmaskmode> 6 "register_operand" "1")
21773 (match_operator:<ssescalarmode> 5 "vsib_mem_operator"
21774 [(unspec:P
21775 [(match_operand:P 3 "vsib_address_operand" "Tv")
21776 (match_operand:<VEC_GATHER_IDXSI> 2 "register_operand" "v")
21777 (match_operand:SI 4 "const1248_operand" "n")]
21778 UNSPEC_VSIBADDR)])]
21779 UNSPEC_GATHER))
21780 (clobber (match_scratch:<avx512fmaskmode> 1 "=&Yk"))]
21781 "TARGET_AVX512F"
21782 ;; %X5 so that we don't emit any *WORD PTR for -masm=intel, as
21783 ;; gas changed what it requires incompatibly.
21784 "%M3v<sseintprefix>gatherd<ssemodesuffix>\t{%5, %0%{%1%}|%0%{%1%}, %X5}"
21785 [(set_attr "type" "ssemov")
21786 (set_attr "prefix" "evex")
21787 (set_attr "mode" "<sseinsnmode>")])
21788
21789
21790 (define_expand "<avx512>_gatherdi<mode>"
21791 [(parallel [(set (match_operand:VI48F 0 "register_operand")
21792 (unspec:VI48F
21793 [(match_operand:<VEC_GATHER_SRCDI> 1 "register_operand")
21794 (match_operand:QI 4 "register_operand")
21795 (mem:<ssescalarmode>
21796 (match_par_dup 6
21797 [(match_operand 2 "vsib_address_operand")
21798 (match_operand:<VEC_GATHER_IDXDI> 3 "register_operand")
21799 (match_operand:SI 5 "const1248_operand")]))]
21800 UNSPEC_GATHER))
21801 (clobber (match_scratch:QI 7))])]
21802 "TARGET_AVX512F"
21803 {
21804 operands[6]
21805 = gen_rtx_UNSPEC (Pmode, gen_rtvec (3, operands[2], operands[3],
21806 operands[5]), UNSPEC_VSIBADDR);
21807 })
21808
21809 (define_insn "*avx512f_gatherdi<VI48F:mode>"
21810 [(set (match_operand:VI48F 0 "register_operand" "=&v")
21811 (unspec:VI48F
21812 [(match_operand:<VEC_GATHER_SRCDI> 1 "register_operand" "0")
21813 (match_operand:QI 7 "register_operand" "2")
21814 (match_operator:<ssescalarmode> 6 "vsib_mem_operator"
21815 [(unspec:P
21816 [(match_operand:P 4 "vsib_address_operand" "Tv")
21817 (match_operand:<VEC_GATHER_IDXDI> 3 "register_operand" "v")
21818 (match_operand:SI 5 "const1248_operand" "n")]
21819 UNSPEC_VSIBADDR)])]
21820 UNSPEC_GATHER))
21821 (clobber (match_scratch:QI 2 "=&Yk"))]
21822 "TARGET_AVX512F"
21823 ;; %X6 so that we don't emit any *WORD PTR for -masm=intel, as
21824 ;; gas changed what it requires incompatibly.
21825 "%M4v<sseintprefix>gatherq<ssemodesuffix>\t{%6, %1%{%2%}|%1%{%2%}, %X6}"
21826 [(set_attr "type" "ssemov")
21827 (set_attr "prefix" "evex")
21828 (set_attr "mode" "<sseinsnmode>")])
21829
21830 (define_insn "*avx512f_gatherdi<VI48F:mode>_2"
21831 [(set (match_operand:VI48F 0 "register_operand" "=&v")
21832 (unspec:VI48F
21833 [(pc)
21834 (match_operand:QI 6 "register_operand" "1")
21835 (match_operator:<ssescalarmode> 5 "vsib_mem_operator"
21836 [(unspec:P
21837 [(match_operand:P 3 "vsib_address_operand" "Tv")
21838 (match_operand:<VEC_GATHER_IDXDI> 2 "register_operand" "v")
21839 (match_operand:SI 4 "const1248_operand" "n")]
21840 UNSPEC_VSIBADDR)])]
21841 UNSPEC_GATHER))
21842 (clobber (match_scratch:QI 1 "=&Yk"))]
21843 "TARGET_AVX512F"
21844 {
21845 /* %X5 so that we don't emit any *WORD PTR for -masm=intel, as
21846 gas changed what it requires incompatibly. */
21847 if (<VI48F:MODE>mode != <VEC_GATHER_SRCDI>mode)
21848 {
21849 if (<VI48F:MODE_SIZE> != 64)
21850 return "%M3v<sseintprefix>gatherq<ssemodesuffix>\t{%5, %x0%{%1%}|%x0%{%1%}, %X5}";
21851 else
21852 return "%M3v<sseintprefix>gatherq<ssemodesuffix>\t{%5, %t0%{%1%}|%t0%{%1%}, %X5}";
21853 }
21854 return "%M3v<sseintprefix>gatherq<ssemodesuffix>\t{%5, %0%{%1%}|%0%{%1%}, %X5}";
21855 }
21856 [(set_attr "type" "ssemov")
21857 (set_attr "prefix" "evex")
21858 (set_attr "mode" "<sseinsnmode>")])
21859
21860 (define_expand "<avx512>_scattersi<mode>"
21861 [(parallel [(set (mem:VI48F
21862 (match_par_dup 5
21863 [(match_operand 0 "vsib_address_operand")
21864 (match_operand:<VEC_GATHER_IDXSI> 2 "register_operand")
21865 (match_operand:SI 4 "const1248_operand")]))
21866 (unspec:VI48F
21867 [(match_operand:<avx512fmaskmode> 1 "register_operand")
21868 (match_operand:VI48F 3 "register_operand")]
21869 UNSPEC_SCATTER))
21870 (clobber (match_scratch:<avx512fmaskmode> 6))])]
21871 "TARGET_AVX512F"
21872 {
21873 operands[5]
21874 = gen_rtx_UNSPEC (Pmode, gen_rtvec (3, operands[0], operands[2],
21875 operands[4]), UNSPEC_VSIBADDR);
21876 })
21877
21878 (define_insn "*avx512f_scattersi<VI48F:mode>"
21879 [(set (match_operator:VI48F 5 "vsib_mem_operator"
21880 [(unspec:P
21881 [(match_operand:P 0 "vsib_address_operand" "Tv")
21882 (match_operand:<VEC_GATHER_IDXSI> 2 "register_operand" "v")
21883 (match_operand:SI 4 "const1248_operand" "n")]
21884 UNSPEC_VSIBADDR)])
21885 (unspec:VI48F
21886 [(match_operand:<avx512fmaskmode> 6 "register_operand" "1")
21887 (match_operand:VI48F 3 "register_operand" "v")]
21888 UNSPEC_SCATTER))
21889 (clobber (match_scratch:<avx512fmaskmode> 1 "=&Yk"))]
21890 "TARGET_AVX512F"
21891 ;; %X5 so that we don't emit any *WORD PTR for -masm=intel, as
21892 ;; gas changed what it requires incompatibly.
21893 "%M0v<sseintprefix>scatterd<ssemodesuffix>\t{%3, %5%{%1%}|%X5%{%1%}, %3}"
21894 [(set_attr "type" "ssemov")
21895 (set_attr "prefix" "evex")
21896 (set_attr "mode" "<sseinsnmode>")])
21897
21898 (define_expand "<avx512>_scatterdi<mode>"
21899 [(parallel [(set (mem:VI48F
21900 (match_par_dup 5
21901 [(match_operand 0 "vsib_address_operand")
21902 (match_operand:<VEC_GATHER_IDXDI> 2 "register_operand")
21903 (match_operand:SI 4 "const1248_operand")]))
21904 (unspec:VI48F
21905 [(match_operand:QI 1 "register_operand")
21906 (match_operand:<VEC_GATHER_SRCDI> 3 "register_operand")]
21907 UNSPEC_SCATTER))
21908 (clobber (match_scratch:QI 6))])]
21909 "TARGET_AVX512F"
21910 {
21911 operands[5]
21912 = gen_rtx_UNSPEC (Pmode, gen_rtvec (3, operands[0], operands[2],
21913 operands[4]), UNSPEC_VSIBADDR);
21914 })
21915
21916 (define_insn "*avx512f_scatterdi<VI48F:mode>"
21917 [(set (match_operator:VI48F 5 "vsib_mem_operator"
21918 [(unspec:P
21919 [(match_operand:P 0 "vsib_address_operand" "Tv")
21920 (match_operand:<VEC_GATHER_IDXDI> 2 "register_operand" "v")
21921 (match_operand:SI 4 "const1248_operand" "n")]
21922 UNSPEC_VSIBADDR)])
21923 (unspec:VI48F
21924 [(match_operand:QI 6 "register_operand" "1")
21925 (match_operand:<VEC_GATHER_SRCDI> 3 "register_operand" "v")]
21926 UNSPEC_SCATTER))
21927 (clobber (match_scratch:QI 1 "=&Yk"))]
21928 "TARGET_AVX512F"
21929 ;; %X5 so that we don't emit any *WORD PTR for -masm=intel, as
21930 ;; gas changed what it requires incompatibly.
21931 "%M0v<sseintprefix>scatterq<ssemodesuffix>\t{%3, %5%{%1%}|%X5%{%1%}, %3}"
21932 [(set_attr "type" "ssemov")
21933 (set_attr "prefix" "evex")
21934 (set_attr "mode" "<sseinsnmode>")])
21935
21936 (define_insn "<avx512>_compress<mode>_mask"
21937 [(set (match_operand:VI48F 0 "register_operand" "=v")
21938 (unspec:VI48F
21939 [(match_operand:VI48F 1 "register_operand" "v")
21940 (match_operand:VI48F 2 "nonimm_or_0_operand" "0C")
21941 (match_operand:<avx512fmaskmode> 3 "register_operand" "Yk")]
21942 UNSPEC_COMPRESS))]
21943 "TARGET_AVX512F"
21944 "v<sseintprefix>compress<ssemodesuffix>\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}"
21945 [(set_attr "type" "ssemov")
21946 (set_attr "prefix" "evex")
21947 (set_attr "mode" "<sseinsnmode>")])
21948
21949 (define_insn "compress<mode>_mask"
21950 [(set (match_operand:VI12_AVX512VLBW 0 "register_operand" "=v")
21951 (unspec:VI12_AVX512VLBW
21952 [(match_operand:VI12_AVX512VLBW 1 "register_operand" "v")
21953 (match_operand:VI12_AVX512VLBW 2 "nonimm_or_0_operand" "0C")
21954 (match_operand:<avx512fmaskmode> 3 "register_operand" "Yk")]
21955 UNSPEC_COMPRESS))]
21956 "TARGET_AVX512VBMI2"
21957 "vpcompress<ssemodesuffix>\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}"
21958 [(set_attr "type" "ssemov")
21959 (set_attr "prefix" "evex")
21960 (set_attr "mode" "<sseinsnmode>")])
21961
21962 (define_insn "<avx512>_compressstore<mode>_mask"
21963 [(set (match_operand:VI48F 0 "memory_operand" "=m")
21964 (unspec:VI48F
21965 [(match_operand:VI48F 1 "register_operand" "x")
21966 (match_dup 0)
21967 (match_operand:<avx512fmaskmode> 2 "register_operand" "Yk")]
21968 UNSPEC_COMPRESS_STORE))]
21969 "TARGET_AVX512F"
21970 "v<sseintprefix>compress<ssemodesuffix>\t{%1, %0%{%2%}|%0%{%2%}, %1}"
21971 [(set_attr "type" "ssemov")
21972 (set_attr "prefix" "evex")
21973 (set_attr "memory" "store")
21974 (set_attr "mode" "<sseinsnmode>")])
21975
21976 (define_insn "compressstore<mode>_mask"
21977 [(set (match_operand:VI12_AVX512VLBW 0 "memory_operand" "=m")
21978 (unspec:VI12_AVX512VLBW
21979 [(match_operand:VI12_AVX512VLBW 1 "register_operand" "x")
21980 (match_dup 0)
21981 (match_operand:<avx512fmaskmode> 2 "register_operand" "Yk")]
21982 UNSPEC_COMPRESS_STORE))]
21983 "TARGET_AVX512VBMI2"
21984 "vpcompress<ssemodesuffix>\t{%1, %0%{%2%}|%0%{%2%}, %1}"
21985 [(set_attr "type" "ssemov")
21986 (set_attr "prefix" "evex")
21987 (set_attr "memory" "store")
21988 (set_attr "mode" "<sseinsnmode>")])
21989
21990 (define_expand "<avx512>_expand<mode>_maskz"
21991 [(set (match_operand:VI48F 0 "register_operand")
21992 (unspec:VI48F
21993 [(match_operand:VI48F 1 "nonimmediate_operand")
21994 (match_operand:VI48F 2 "nonimm_or_0_operand")
21995 (match_operand:<avx512fmaskmode> 3 "register_operand")]
21996 UNSPEC_EXPAND))]
21997 "TARGET_AVX512F"
21998 "operands[2] = CONST0_RTX (<MODE>mode);")
21999
22000 (define_insn "<avx512>_expand<mode>_mask"
22001 [(set (match_operand:VI48F 0 "register_operand" "=v,v")
22002 (unspec:VI48F
22003 [(match_operand:VI48F 1 "nonimmediate_operand" "v,m")
22004 (match_operand:VI48F 2 "nonimm_or_0_operand" "0C,0C")
22005 (match_operand:<avx512fmaskmode> 3 "register_operand" "Yk,Yk")]
22006 UNSPEC_EXPAND))]
22007 "TARGET_AVX512F"
22008 "v<sseintprefix>expand<ssemodesuffix>\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}"
22009 [(set_attr "type" "ssemov")
22010 (set_attr "prefix" "evex")
22011 (set_attr "memory" "none,load")
22012 (set_attr "mode" "<sseinsnmode>")])
22013
22014 (define_insn "expand<mode>_mask"
22015 [(set (match_operand:VI12_AVX512VLBW 0 "register_operand" "=v,v")
22016 (unspec:VI12_AVX512VLBW
22017 [(match_operand:VI12_AVX512VLBW 1 "nonimmediate_operand" "v,m")
22018 (match_operand:VI12_AVX512VLBW 2 "nonimm_or_0_operand" "0C,0C")
22019 (match_operand:<avx512fmaskmode> 3 "register_operand" "Yk,Yk")]
22020 UNSPEC_EXPAND))]
22021 "TARGET_AVX512VBMI2"
22022 "v<sseintprefix>expand<ssemodesuffix>\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}"
22023 [(set_attr "type" "ssemov")
22024 (set_attr "prefix" "evex")
22025 (set_attr "memory" "none,load")
22026 (set_attr "mode" "<sseinsnmode>")])
22027
22028 (define_expand "expand<mode>_maskz"
22029 [(set (match_operand:VI12_AVX512VLBW 0 "register_operand")
22030 (unspec:VI12_AVX512VLBW
22031 [(match_operand:VI12_AVX512VLBW 1 "nonimmediate_operand")
22032 (match_operand:VI12_AVX512VLBW 2 "nonimm_or_0_operand")
22033 (match_operand:<avx512fmaskmode> 3 "register_operand")]
22034 UNSPEC_EXPAND))]
22035 "TARGET_AVX512VBMI2"
22036 "operands[2] = CONST0_RTX (<MODE>mode);")
22037
22038 (define_insn "avx512dq_rangep<mode><mask_name><round_saeonly_name>"
22039 [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v")
22040 (unspec:VF_AVX512VL
22041 [(match_operand:VF_AVX512VL 1 "register_operand" "v")
22042 (match_operand:VF_AVX512VL 2 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")
22043 (match_operand:SI 3 "const_0_to_15_operand")]
22044 UNSPEC_RANGE))]
22045 "TARGET_AVX512DQ && <round_saeonly_mode512bit_condition>"
22046 "vrange<ssemodesuffix>\t{%3, <round_saeonly_mask_op4>%2, %1, %0<mask_operand4>|%0<mask_operand4>, %1, %2<round_saeonly_mask_op4>, %3}"
22047 [(set_attr "type" "sse")
22048 (set_attr "prefix" "evex")
22049 (set_attr "mode" "<MODE>")])
22050
22051 (define_insn "avx512dq_ranges<mode><mask_scalar_name><round_saeonly_scalar_name>"
22052 [(set (match_operand:VF_128 0 "register_operand" "=v")
22053 (vec_merge:VF_128
22054 (unspec:VF_128
22055 [(match_operand:VF_128 1 "register_operand" "v")
22056 (match_operand:VF_128 2 "<round_saeonly_scalar_nimm_predicate>" "<round_saeonly_scalar_constraint>")
22057 (match_operand:SI 3 "const_0_to_15_operand")]
22058 UNSPEC_RANGE)
22059 (match_dup 1)
22060 (const_int 1)))]
22061 "TARGET_AVX512DQ"
22062 "vrange<ssescalarmodesuffix>\t{%3, <round_saeonly_scalar_mask_op4>%2, %1, %0<mask_scalar_operand4>|%0<mask_scalar_operand4>, %1, %<iptr>2<round_saeonly_scalar_mask_op4>, %3}"
22063 [(set_attr "type" "sse")
22064 (set_attr "prefix" "evex")
22065 (set_attr "mode" "<MODE>")])
22066
22067 (define_insn "avx512dq_fpclass<mode><mask_scalar_merge_name>"
22068 [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=k")
22069 (unspec:<avx512fmaskmode>
22070 [(match_operand:VF_AVX512VL 1 "vector_operand" "vm")
22071 (match_operand 2 "const_0_to_255_operand" "n")]
22072 UNSPEC_FPCLASS))]
22073 "TARGET_AVX512DQ"
22074 "vfpclass<ssemodesuffix><vecmemsuffix>\t{%2, %1, %0<mask_scalar_merge_operand3>|%0<mask_scalar_merge_operand3>, %1, %2}";
22075 [(set_attr "type" "sse")
22076 (set_attr "length_immediate" "1")
22077 (set_attr "prefix" "evex")
22078 (set_attr "mode" "<MODE>")])
22079
22080 (define_insn "avx512dq_vmfpclass<mode><mask_scalar_merge_name>"
22081 [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=k")
22082 (and:<avx512fmaskmode>
22083 (unspec:<avx512fmaskmode>
22084 [(match_operand:VF_128 1 "nonimmediate_operand" "vm")
22085 (match_operand 2 "const_0_to_255_operand" "n")]
22086 UNSPEC_FPCLASS)
22087 (const_int 1)))]
22088 "TARGET_AVX512DQ"
22089 "vfpclass<ssescalarmodesuffix>\t{%2, %1, %0<mask_scalar_merge_operand3>|%0<mask_scalar_merge_operand3>, %1, %2}";
22090 [(set_attr "type" "sse")
22091 (set_attr "length_immediate" "1")
22092 (set_attr "prefix" "evex")
22093 (set_attr "mode" "<MODE>")])
22094
22095 (define_insn "<avx512>_getmant<mode><mask_name><round_saeonly_name>"
22096 [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v")
22097 (unspec:VF_AVX512VL
22098 [(match_operand:VF_AVX512VL 1 "nonimmediate_operand" "<round_saeonly_constraint>")
22099 (match_operand:SI 2 "const_0_to_15_operand")]
22100 UNSPEC_GETMANT))]
22101 "TARGET_AVX512F"
22102 "vgetmant<ssemodesuffix>\t{%2, <round_saeonly_mask_op3>%1, %0<mask_operand3>|%0<mask_operand3>, %1<round_saeonly_mask_op3>, %2}";
22103 [(set_attr "prefix" "evex")
22104 (set_attr "mode" "<MODE>")])
22105
22106 (define_insn "avx512f_vgetmant<mode><mask_scalar_name><round_saeonly_scalar_name>"
22107 [(set (match_operand:VF_128 0 "register_operand" "=v")
22108 (vec_merge:VF_128
22109 (unspec:VF_128
22110 [(match_operand:VF_128 1 "register_operand" "v")
22111 (match_operand:VF_128 2 "<round_saeonly_scalar_nimm_predicate>" "<round_saeonly_scalar_constraint>")
22112 (match_operand:SI 3 "const_0_to_15_operand")]
22113 UNSPEC_GETMANT)
22114 (match_dup 1)
22115 (const_int 1)))]
22116 "TARGET_AVX512F"
22117 "vgetmant<ssescalarmodesuffix>\t{%3, <round_saeonly_scalar_mask_op4>%2, %1, %0<mask_scalar_operand4>|%0<mask_scalar_operand4>, %1, %<iptr>2<round_saeonly_scalar_mask_op4>, %3}";
22118 [(set_attr "prefix" "evex")
22119 (set_attr "mode" "<ssescalarmode>")])
22120
22121 ;; The correct representation for this is absolutely enormous, and
22122 ;; surely not generally useful.
22123 (define_insn "<mask_codefor>avx512bw_dbpsadbw<mode><mask_name>"
22124 [(set (match_operand:VI2_AVX512VL 0 "register_operand" "=v")
22125 (unspec:VI2_AVX512VL
22126 [(match_operand:<dbpsadbwmode> 1 "register_operand" "v")
22127 (match_operand:<dbpsadbwmode> 2 "nonimmediate_operand" "vm")
22128 (match_operand:SI 3 "const_0_to_255_operand")]
22129 UNSPEC_DBPSADBW))]
22130 "TARGET_AVX512BW"
22131 "vdbpsadbw\t{%3, %2, %1, %0<mask_operand4>|%0<mask_operand4>, %1, %2, %3}"
22132 [(set_attr "type" "sselog1")
22133 (set_attr "length_immediate" "1")
22134 (set_attr "prefix" "evex")
22135 (set_attr "mode" "<sseinsnmode>")])
22136
22137 (define_insn "clz<mode>2<mask_name>"
22138 [(set (match_operand:VI48_AVX512VL 0 "register_operand" "=v")
22139 (clz:VI48_AVX512VL
22140 (match_operand:VI48_AVX512VL 1 "nonimmediate_operand" "vm")))]
22141 "TARGET_AVX512CD"
22142 "vplzcnt<ssemodesuffix>\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
22143 [(set_attr "type" "sse")
22144 (set_attr "prefix" "evex")
22145 (set_attr "mode" "<sseinsnmode>")])
22146
22147 (define_insn "<mask_codefor>conflict<mode><mask_name>"
22148 [(set (match_operand:VI48_AVX512VL 0 "register_operand" "=v")
22149 (unspec:VI48_AVX512VL
22150 [(match_operand:VI48_AVX512VL 1 "nonimmediate_operand" "vm")]
22151 UNSPEC_CONFLICT))]
22152 "TARGET_AVX512CD"
22153 "vpconflict<ssemodesuffix>\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
22154 [(set_attr "type" "sse")
22155 (set_attr "prefix" "evex")
22156 (set_attr "mode" "<sseinsnmode>")])
22157
22158 (define_insn "sha1msg1"
22159 [(set (match_operand:V4SI 0 "register_operand" "=x")
22160 (unspec:V4SI
22161 [(match_operand:V4SI 1 "register_operand" "0")
22162 (match_operand:V4SI 2 "vector_operand" "xBm")]
22163 UNSPEC_SHA1MSG1))]
22164 "TARGET_SHA"
22165 "sha1msg1\t{%2, %0|%0, %2}"
22166 [(set_attr "type" "sselog1")
22167 (set_attr "mode" "TI")])
22168
22169 (define_insn "sha1msg2"
22170 [(set (match_operand:V4SI 0 "register_operand" "=x")
22171 (unspec:V4SI
22172 [(match_operand:V4SI 1 "register_operand" "0")
22173 (match_operand:V4SI 2 "vector_operand" "xBm")]
22174 UNSPEC_SHA1MSG2))]
22175 "TARGET_SHA"
22176 "sha1msg2\t{%2, %0|%0, %2}"
22177 [(set_attr "type" "sselog1")
22178 (set_attr "mode" "TI")])
22179
22180 (define_insn "sha1nexte"
22181 [(set (match_operand:V4SI 0 "register_operand" "=x")
22182 (unspec:V4SI
22183 [(match_operand:V4SI 1 "register_operand" "0")
22184 (match_operand:V4SI 2 "vector_operand" "xBm")]
22185 UNSPEC_SHA1NEXTE))]
22186 "TARGET_SHA"
22187 "sha1nexte\t{%2, %0|%0, %2}"
22188 [(set_attr "type" "sselog1")
22189 (set_attr "mode" "TI")])
22190
22191 (define_insn "sha1rnds4"
22192 [(set (match_operand:V4SI 0 "register_operand" "=x")
22193 (unspec:V4SI
22194 [(match_operand:V4SI 1 "register_operand" "0")
22195 (match_operand:V4SI 2 "vector_operand" "xBm")
22196 (match_operand:SI 3 "const_0_to_3_operand" "n")]
22197 UNSPEC_SHA1RNDS4))]
22198 "TARGET_SHA"
22199 "sha1rnds4\t{%3, %2, %0|%0, %2, %3}"
22200 [(set_attr "type" "sselog1")
22201 (set_attr "length_immediate" "1")
22202 (set_attr "mode" "TI")])
22203
22204 (define_insn "sha256msg1"
22205 [(set (match_operand:V4SI 0 "register_operand" "=x")
22206 (unspec:V4SI
22207 [(match_operand:V4SI 1 "register_operand" "0")
22208 (match_operand:V4SI 2 "vector_operand" "xBm")]
22209 UNSPEC_SHA256MSG1))]
22210 "TARGET_SHA"
22211 "sha256msg1\t{%2, %0|%0, %2}"
22212 [(set_attr "type" "sselog1")
22213 (set_attr "mode" "TI")])
22214
22215 (define_insn "sha256msg2"
22216 [(set (match_operand:V4SI 0 "register_operand" "=x")
22217 (unspec:V4SI
22218 [(match_operand:V4SI 1 "register_operand" "0")
22219 (match_operand:V4SI 2 "vector_operand" "xBm")]
22220 UNSPEC_SHA256MSG2))]
22221 "TARGET_SHA"
22222 "sha256msg2\t{%2, %0|%0, %2}"
22223 [(set_attr "type" "sselog1")
22224 (set_attr "mode" "TI")])
22225
22226 (define_insn "sha256rnds2"
22227 [(set (match_operand:V4SI 0 "register_operand" "=x")
22228 (unspec:V4SI
22229 [(match_operand:V4SI 1 "register_operand" "0")
22230 (match_operand:V4SI 2 "vector_operand" "xBm")
22231 (match_operand:V4SI 3 "register_operand" "Yz")]
22232 UNSPEC_SHA256RNDS2))]
22233 "TARGET_SHA"
22234 "sha256rnds2\t{%3, %2, %0|%0, %2, %3}"
22235 [(set_attr "type" "sselog1")
22236 (set_attr "length_immediate" "1")
22237 (set_attr "mode" "TI")])
22238
22239 (define_insn_and_split "avx512f_<castmode><avxsizesuffix>_<castmode>"
22240 [(set (match_operand:AVX512MODE2P 0 "nonimmediate_operand" "=x,m")
22241 (vec_concat:AVX512MODE2P
22242 (vec_concat:<ssehalfvecmode>
22243 (match_operand:<ssequartermode> 1 "nonimmediate_operand" "xm,x")
22244 (unspec:<ssequartermode> [(const_int 0)] UNSPEC_CAST))
22245 (unspec:<ssehalfvecmode> [(const_int 0)] UNSPEC_CAST)))]
22246 "TARGET_AVX512F && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
22247 "#"
22248 "&& reload_completed"
22249 [(set (match_dup 0) (match_dup 1))]
22250 {
22251 if (REG_P (operands[0]))
22252 operands[0] = gen_lowpart (<ssequartermode>mode, operands[0]);
22253 else
22254 operands[1] = lowpart_subreg (<MODE>mode, operands[1],
22255 <ssequartermode>mode);
22256 })
22257
22258 (define_insn_and_split "avx512f_<castmode><avxsizesuffix>_256<castmode>"
22259 [(set (match_operand:AVX512MODE2P 0 "nonimmediate_operand" "=x,m")
22260 (vec_concat:AVX512MODE2P
22261 (match_operand:<ssehalfvecmode> 1 "nonimmediate_operand" "xm,x")
22262 (unspec:<ssehalfvecmode> [(const_int 0)] UNSPEC_CAST)))]
22263 "TARGET_AVX512F && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
22264 "#"
22265 "&& reload_completed"
22266 [(set (match_dup 0) (match_dup 1))]
22267 {
22268 if (REG_P (operands[0]))
22269 operands[0] = gen_lowpart (<ssehalfvecmode>mode, operands[0]);
22270 else
22271 operands[1] = lowpart_subreg (<MODE>mode, operands[1],
22272 <ssehalfvecmode>mode);
22273 })
22274
22275 (define_int_iterator VPMADD52
22276 [UNSPEC_VPMADD52LUQ
22277 UNSPEC_VPMADD52HUQ])
22278
22279 (define_int_attr vpmadd52type
22280 [(UNSPEC_VPMADD52LUQ "luq") (UNSPEC_VPMADD52HUQ "huq")])
22281
22282 (define_expand "vpamdd52huq<mode>_maskz"
22283 [(match_operand:VI8_AVX512VL 0 "register_operand")
22284 (match_operand:VI8_AVX512VL 1 "register_operand")
22285 (match_operand:VI8_AVX512VL 2 "register_operand")
22286 (match_operand:VI8_AVX512VL 3 "nonimmediate_operand")
22287 (match_operand:<avx512fmaskmode> 4 "register_operand")]
22288 "TARGET_AVX512IFMA"
22289 {
22290 emit_insn (gen_vpamdd52huq<mode>_maskz_1 (
22291 operands[0], operands[1], operands[2], operands[3],
22292 CONST0_RTX (<MODE>mode), operands[4]));
22293 DONE;
22294 })
22295
22296 (define_expand "vpamdd52luq<mode>_maskz"
22297 [(match_operand:VI8_AVX512VL 0 "register_operand")
22298 (match_operand:VI8_AVX512VL 1 "register_operand")
22299 (match_operand:VI8_AVX512VL 2 "register_operand")
22300 (match_operand:VI8_AVX512VL 3 "nonimmediate_operand")
22301 (match_operand:<avx512fmaskmode> 4 "register_operand")]
22302 "TARGET_AVX512IFMA"
22303 {
22304 emit_insn (gen_vpamdd52luq<mode>_maskz_1 (
22305 operands[0], operands[1], operands[2], operands[3],
22306 CONST0_RTX (<MODE>mode), operands[4]));
22307 DONE;
22308 })
22309
22310 (define_insn "vpamdd52<vpmadd52type><mode><sd_maskz_name>"
22311 [(set (match_operand:VI8_AVX512VL 0 "register_operand" "=v")
22312 (unspec:VI8_AVX512VL
22313 [(match_operand:VI8_AVX512VL 1 "register_operand" "0")
22314 (match_operand:VI8_AVX512VL 2 "register_operand" "v")
22315 (match_operand:VI8_AVX512VL 3 "nonimmediate_operand" "vm")]
22316 VPMADD52))]
22317 "TARGET_AVX512IFMA"
22318 "vpmadd52<vpmadd52type>\t{%3, %2, %0<sd_mask_op4>|%0<sd_mask_op4>, %2, %3}"
22319 [(set_attr "type" "ssemuladd")
22320 (set_attr "prefix" "evex")
22321 (set_attr "mode" "<sseinsnmode>")])
22322
22323 (define_insn "vpamdd52<vpmadd52type><mode>_mask"
22324 [(set (match_operand:VI8_AVX512VL 0 "register_operand" "=v")
22325 (vec_merge:VI8_AVX512VL
22326 (unspec:VI8_AVX512VL
22327 [(match_operand:VI8_AVX512VL 1 "register_operand" "0")
22328 (match_operand:VI8_AVX512VL 2 "register_operand" "v")
22329 (match_operand:VI8_AVX512VL 3 "nonimmediate_operand" "vm")]
22330 VPMADD52)
22331 (match_dup 1)
22332 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")))]
22333 "TARGET_AVX512IFMA"
22334 "vpmadd52<vpmadd52type>\t{%3, %2, %0%{%4%}|%0%{%4%}, %2, %3}"
22335 [(set_attr "type" "ssemuladd")
22336 (set_attr "prefix" "evex")
22337 (set_attr "mode" "<sseinsnmode>")])
22338
22339 (define_insn "vpmultishiftqb<mode><mask_name>"
22340 [(set (match_operand:VI1_AVX512VL 0 "register_operand" "=v")
22341 (unspec:VI1_AVX512VL
22342 [(match_operand:VI1_AVX512VL 1 "register_operand" "v")
22343 (match_operand:VI1_AVX512VL 2 "nonimmediate_operand" "vm")]
22344 UNSPEC_VPMULTISHIFT))]
22345 "TARGET_AVX512VBMI"
22346 "vpmultishiftqb\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
22347 [(set_attr "type" "sselog")
22348 (set_attr "prefix" "evex")
22349 (set_attr "mode" "<sseinsnmode>")])
22350
22351 (define_mode_iterator IMOD4
22352 [(V64SF "TARGET_AVX5124FMAPS") (V64SI "TARGET_AVX5124VNNIW")])
22353
22354 (define_mode_attr imod4_narrow
22355 [(V64SF "V16SF") (V64SI "V16SI")])
22356
22357 (define_expand "mov<mode>"
22358 [(set (match_operand:IMOD4 0 "nonimmediate_operand")
22359 (match_operand:IMOD4 1 "nonimm_or_0_operand"))]
22360 "TARGET_AVX512F"
22361 {
22362 ix86_expand_vector_move (<MODE>mode, operands);
22363 DONE;
22364 })
22365
22366 (define_insn_and_split "*mov<mode>_internal"
22367 [(set (match_operand:IMOD4 0 "nonimmediate_operand" "=v,v ,m")
22368 (match_operand:IMOD4 1 "nonimm_or_0_operand" " C,vm,v"))]
22369 "TARGET_AVX512F
22370 && (register_operand (operands[0], <MODE>mode)
22371 || register_operand (operands[1], <MODE>mode))"
22372 "#"
22373 "&& reload_completed"
22374 [(const_int 0)]
22375 {
22376 rtx op0, op1;
22377 int i;
22378
22379 for (i = 0; i < 4; i++)
22380 {
22381 op0 = simplify_subreg
22382 (<imod4_narrow>mode, operands[0], <MODE>mode, i * 64);
22383 op1 = simplify_subreg
22384 (<imod4_narrow>mode, operands[1], <MODE>mode, i * 64);
22385 emit_move_insn (op0, op1);
22386 }
22387 DONE;
22388 })
22389
22390 (define_insn "avx5124fmaddps_4fmaddps"
22391 [(set (match_operand:V16SF 0 "register_operand" "=v")
22392 (unspec:V16SF
22393 [(match_operand:V16SF 1 "register_operand" "0")
22394 (match_operand:V64SF 2 "register_operand" "v")
22395 (match_operand:V4SF 3 "memory_operand" "m")] UNSPEC_VP4FMADD))]
22396 "TARGET_AVX5124FMAPS"
22397 "v4fmaddps\t{%3, %g2, %0|%0, %g2, %3}"
22398 [(set_attr ("type") ("ssemuladd"))
22399 (set_attr ("prefix") ("evex"))
22400 (set_attr ("mode") ("V16SF"))])
22401
22402 (define_insn "avx5124fmaddps_4fmaddps_mask"
22403 [(set (match_operand:V16SF 0 "register_operand" "=v")
22404 (vec_merge:V16SF
22405 (unspec:V16SF
22406 [(match_operand:V64SF 1 "register_operand" "v")
22407 (match_operand:V4SF 2 "memory_operand" "m")] UNSPEC_VP4FMADD)
22408 (match_operand:V16SF 3 "register_operand" "0")
22409 (match_operand:HI 4 "register_operand" "Yk")))]
22410 "TARGET_AVX5124FMAPS"
22411 "v4fmaddps\t{%2, %g1, %0%{%4%}|%0%{%4%}, %g1, %2}"
22412 [(set_attr ("type") ("ssemuladd"))
22413 (set_attr ("prefix") ("evex"))
22414 (set_attr ("mode") ("V16SF"))])
22415
22416 (define_insn "avx5124fmaddps_4fmaddps_maskz"
22417 [(set (match_operand:V16SF 0 "register_operand" "=v")
22418 (vec_merge:V16SF
22419 (unspec:V16SF
22420 [(match_operand:V16SF 1 "register_operand" "0")
22421 (match_operand:V64SF 2 "register_operand" "v")
22422 (match_operand:V4SF 3 "memory_operand" "m")] UNSPEC_VP4FMADD)
22423 (match_operand:V16SF 4 "const0_operand" "C")
22424 (match_operand:HI 5 "register_operand" "Yk")))]
22425 "TARGET_AVX5124FMAPS"
22426 "v4fmaddps\t{%3, %g2, %0%{%5%}%{z%}|%0%{%5%}%{z%}, %g2, %3}"
22427 [(set_attr ("type") ("ssemuladd"))
22428 (set_attr ("prefix") ("evex"))
22429 (set_attr ("mode") ("V16SF"))])
22430
22431 (define_insn "avx5124fmaddps_4fmaddss"
22432 [(set (match_operand:V4SF 0 "register_operand" "=v")
22433 (unspec:V4SF
22434 [(match_operand:V4SF 1 "register_operand" "0")
22435 (match_operand:V64SF 2 "register_operand" "v")
22436 (match_operand:V4SF 3 "memory_operand" "m")] UNSPEC_VP4FMADD))]
22437 "TARGET_AVX5124FMAPS"
22438 "v4fmaddss\t{%3, %x2, %0|%0, %x2, %3}"
22439 [(set_attr ("type") ("ssemuladd"))
22440 (set_attr ("prefix") ("evex"))
22441 (set_attr ("mode") ("SF"))])
22442
22443 (define_insn "avx5124fmaddps_4fmaddss_mask"
22444 [(set (match_operand:V4SF 0 "register_operand" "=v")
22445 (vec_merge:V4SF
22446 (unspec:V4SF
22447 [(match_operand:V64SF 1 "register_operand" "v")
22448 (match_operand:V4SF 2 "memory_operand" "m")] UNSPEC_VP4FMADD)
22449 (match_operand:V4SF 3 "register_operand" "0")
22450 (match_operand:QI 4 "register_operand" "Yk")))]
22451 "TARGET_AVX5124FMAPS"
22452 "v4fmaddss\t{%2, %x1, %0%{%4%}|%0%{%4%}, %x1, %2}"
22453 [(set_attr ("type") ("ssemuladd"))
22454 (set_attr ("prefix") ("evex"))
22455 (set_attr ("mode") ("SF"))])
22456
22457 (define_insn "avx5124fmaddps_4fmaddss_maskz"
22458 [(set (match_operand:V4SF 0 "register_operand" "=v")
22459 (vec_merge:V4SF
22460 (unspec:V4SF
22461 [(match_operand:V4SF 1 "register_operand" "0")
22462 (match_operand:V64SF 2 "register_operand" "v")
22463 (match_operand:V4SF 3 "memory_operand" "m")] UNSPEC_VP4FMADD)
22464 (match_operand:V4SF 4 "const0_operand" "C")
22465 (match_operand:QI 5 "register_operand" "Yk")))]
22466 "TARGET_AVX5124FMAPS"
22467 "v4fmaddss\t{%3, %x2, %0%{%5%}%{z%}|%0%{%5%}%{z%}, %x2, %3}"
22468 [(set_attr ("type") ("ssemuladd"))
22469 (set_attr ("prefix") ("evex"))
22470 (set_attr ("mode") ("SF"))])
22471
22472 (define_insn "avx5124fmaddps_4fnmaddps"
22473 [(set (match_operand:V16SF 0 "register_operand" "=v")
22474 (unspec:V16SF
22475 [(match_operand:V16SF 1 "register_operand" "0")
22476 (match_operand:V64SF 2 "register_operand" "v")
22477 (match_operand:V4SF 3 "memory_operand" "m")] UNSPEC_VP4FNMADD))]
22478 "TARGET_AVX5124FMAPS"
22479 "v4fnmaddps\t{%3, %g2, %0|%0, %g2, %3}"
22480 [(set_attr ("type") ("ssemuladd"))
22481 (set_attr ("prefix") ("evex"))
22482 (set_attr ("mode") ("V16SF"))])
22483
22484 (define_insn "avx5124fmaddps_4fnmaddps_mask"
22485 [(set (match_operand:V16SF 0 "register_operand" "=v")
22486 (vec_merge:V16SF
22487 (unspec:V16SF
22488 [(match_operand:V64SF 1 "register_operand" "v")
22489 (match_operand:V4SF 2 "memory_operand" "m")] UNSPEC_VP4FNMADD)
22490 (match_operand:V16SF 3 "register_operand" "0")
22491 (match_operand:HI 4 "register_operand" "Yk")))]
22492 "TARGET_AVX5124FMAPS"
22493 "v4fnmaddps\t{%2, %g1, %0%{%4%}|%0%{%4%}, %g1, %2}"
22494 [(set_attr ("type") ("ssemuladd"))
22495 (set_attr ("prefix") ("evex"))
22496 (set_attr ("mode") ("V16SF"))])
22497
22498 (define_insn "avx5124fmaddps_4fnmaddps_maskz"
22499 [(set (match_operand:V16SF 0 "register_operand" "=v")
22500 (vec_merge:V16SF
22501 (unspec:V16SF
22502 [(match_operand:V16SF 1 "register_operand" "0")
22503 (match_operand:V64SF 2 "register_operand" "v")
22504 (match_operand:V4SF 3 "memory_operand" "m")] UNSPEC_VP4FNMADD)
22505 (match_operand:V16SF 4 "const0_operand" "C")
22506 (match_operand:HI 5 "register_operand" "Yk")))]
22507 "TARGET_AVX5124FMAPS"
22508 "v4fnmaddps\t{%3, %g2, %0%{%5%}%{z%}|%0%{%5%}%{z%}, %g2, %3}"
22509 [(set_attr ("type") ("ssemuladd"))
22510 (set_attr ("prefix") ("evex"))
22511 (set_attr ("mode") ("V16SF"))])
22512
22513 (define_insn "avx5124fmaddps_4fnmaddss"
22514 [(set (match_operand:V4SF 0 "register_operand" "=v")
22515 (unspec:V4SF
22516 [(match_operand:V4SF 1 "register_operand" "0")
22517 (match_operand:V64SF 2 "register_operand" "v")
22518 (match_operand:V4SF 3 "memory_operand" "m")] UNSPEC_VP4FNMADD))]
22519 "TARGET_AVX5124FMAPS"
22520 "v4fnmaddss\t{%3, %x2, %0|%0, %x2, %3}"
22521 [(set_attr ("type") ("ssemuladd"))
22522 (set_attr ("prefix") ("evex"))
22523 (set_attr ("mode") ("SF"))])
22524
22525 (define_insn "avx5124fmaddps_4fnmaddss_mask"
22526 [(set (match_operand:V4SF 0 "register_operand" "=v")
22527 (vec_merge:V4SF
22528 (unspec:V4SF
22529 [(match_operand:V64SF 1 "register_operand" "v")
22530 (match_operand:V4SF 2 "memory_operand" "m")] UNSPEC_VP4FNMADD)
22531 (match_operand:V4SF 3 "register_operand" "0")
22532 (match_operand:QI 4 "register_operand" "Yk")))]
22533 "TARGET_AVX5124FMAPS"
22534 "v4fnmaddss\t{%2, %x1, %0%{%4%}|%0%{%4%}, %x1, %2}"
22535 [(set_attr ("type") ("ssemuladd"))
22536 (set_attr ("prefix") ("evex"))
22537 (set_attr ("mode") ("SF"))])
22538
22539 (define_insn "avx5124fmaddps_4fnmaddss_maskz"
22540 [(set (match_operand:V4SF 0 "register_operand" "=v")
22541 (vec_merge:V4SF
22542 (unspec:V4SF
22543 [(match_operand:V4SF 1 "register_operand" "0")
22544 (match_operand:V64SF 2 "register_operand" "v")
22545 (match_operand:V4SF 3 "memory_operand" "m")] UNSPEC_VP4FNMADD)
22546 (match_operand:V4SF 4 "const0_operand" "C")
22547 (match_operand:QI 5 "register_operand" "Yk")))]
22548 "TARGET_AVX5124FMAPS"
22549 "v4fnmaddss\t{%3, %x2, %0%{%5%}%{z%}|%0%{%5%}%{z%}, %x2, %3}"
22550 [(set_attr ("type") ("ssemuladd"))
22551 (set_attr ("prefix") ("evex"))
22552 (set_attr ("mode") ("SF"))])
22553
22554 (define_insn "avx5124vnniw_vp4dpwssd"
22555 [(set (match_operand:V16SI 0 "register_operand" "=v")
22556 (unspec:V16SI
22557 [(match_operand:V16SI 1 "register_operand" "0")
22558 (match_operand:V64SI 2 "register_operand" "v")
22559 (match_operand:V4SI 3 "memory_operand" "m")] UNSPEC_VP4DPWSSD))]
22560 "TARGET_AVX5124VNNIW"
22561 "vp4dpwssd\t{%3, %g2, %0|%0, %g2, %3}"
22562 [(set_attr ("type") ("ssemuladd"))
22563 (set_attr ("prefix") ("evex"))
22564 (set_attr ("mode") ("TI"))])
22565
22566 (define_insn "avx5124vnniw_vp4dpwssd_mask"
22567 [(set (match_operand:V16SI 0 "register_operand" "=v")
22568 (vec_merge:V16SI
22569 (unspec:V16SI
22570 [(match_operand:V64SI 1 "register_operand" "v")
22571 (match_operand:V4SI 2 "memory_operand" "m")] UNSPEC_VP4DPWSSD)
22572 (match_operand:V16SI 3 "register_operand" "0")
22573 (match_operand:HI 4 "register_operand" "Yk")))]
22574 "TARGET_AVX5124VNNIW"
22575 "vp4dpwssd\t{%2, %g1, %0%{%4%}|%0%{%4%}, %g1, %2}"
22576 [(set_attr ("type") ("ssemuladd"))
22577 (set_attr ("prefix") ("evex"))
22578 (set_attr ("mode") ("TI"))])
22579
22580 (define_insn "avx5124vnniw_vp4dpwssd_maskz"
22581 [(set (match_operand:V16SI 0 "register_operand" "=v")
22582 (vec_merge:V16SI
22583 (unspec:V16SI
22584 [(match_operand:V16SI 1 "register_operand" "0")
22585 (match_operand:V64SI 2 "register_operand" "v")
22586 (match_operand:V4SI 3 "memory_operand" "m")] UNSPEC_VP4DPWSSD)
22587 (match_operand:V16SI 4 "const0_operand" "C")
22588 (match_operand:HI 5 "register_operand" "Yk")))]
22589 "TARGET_AVX5124VNNIW"
22590 "vp4dpwssd\t{%3, %g2, %0%{%5%}%{z%}|%0%{%5%}%{z%}, %g2, %3}"
22591 [(set_attr ("type") ("ssemuladd"))
22592 (set_attr ("prefix") ("evex"))
22593 (set_attr ("mode") ("TI"))])
22594
22595 (define_insn "avx5124vnniw_vp4dpwssds"
22596 [(set (match_operand:V16SI 0 "register_operand" "=v")
22597 (unspec:V16SI
22598 [(match_operand:V16SI 1 "register_operand" "0")
22599 (match_operand:V64SI 2 "register_operand" "v")
22600 (match_operand:V4SI 3 "memory_operand" "m")] UNSPEC_VP4DPWSSDS))]
22601 "TARGET_AVX5124VNNIW"
22602 "vp4dpwssds\t{%3, %g2, %0|%0, %g2, %3}"
22603 [(set_attr ("type") ("ssemuladd"))
22604 (set_attr ("prefix") ("evex"))
22605 (set_attr ("mode") ("TI"))])
22606
22607 (define_insn "avx5124vnniw_vp4dpwssds_mask"
22608 [(set (match_operand:V16SI 0 "register_operand" "=v")
22609 (vec_merge:V16SI
22610 (unspec:V16SI
22611 [(match_operand:V64SI 1 "register_operand" "v")
22612 (match_operand:V4SI 2 "memory_operand" "m")] UNSPEC_VP4DPWSSDS)
22613 (match_operand:V16SI 3 "register_operand" "0")
22614 (match_operand:HI 4 "register_operand" "Yk")))]
22615 "TARGET_AVX5124VNNIW"
22616 "vp4dpwssds\t{%2, %g1, %0%{%4%}|%0%{%4%}, %g1, %2}"
22617 [(set_attr ("type") ("ssemuladd"))
22618 (set_attr ("prefix") ("evex"))
22619 (set_attr ("mode") ("TI"))])
22620
22621 (define_insn "avx5124vnniw_vp4dpwssds_maskz"
22622 [(set (match_operand:V16SI 0 "register_operand" "=v")
22623 (vec_merge:V16SI
22624 (unspec:V16SI
22625 [(match_operand:V16SI 1 "register_operand" "0")
22626 (match_operand:V64SI 2 "register_operand" "v")
22627 (match_operand:V4SI 3 "memory_operand" "m")] UNSPEC_VP4DPWSSDS)
22628 (match_operand:V16SI 4 "const0_operand" "C")
22629 (match_operand:HI 5 "register_operand" "Yk")))]
22630 "TARGET_AVX5124VNNIW"
22631 "vp4dpwssds\t{%3, %g2, %0%{%5%}%{z%}|%0%{%5%}%{z%}, %g2, %3}"
22632 [(set_attr ("type") ("ssemuladd"))
22633 (set_attr ("prefix") ("evex"))
22634 (set_attr ("mode") ("TI"))])
22635
22636 (define_insn "vpopcount<mode><mask_name>"
22637 [(set (match_operand:VI48_AVX512VL 0 "register_operand" "=v")
22638 (popcount:VI48_AVX512VL
22639 (match_operand:VI48_AVX512VL 1 "nonimmediate_operand" "vm")))]
22640 "TARGET_AVX512VPOPCNTDQ"
22641 "vpopcnt<ssemodesuffix>\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}")
22642
22643 ;; Save multiple registers out-of-line.
22644 (define_insn "*save_multiple<mode>"
22645 [(match_parallel 0 "save_multiple"
22646 [(use (match_operand:P 1 "symbol_operand"))])]
22647 "TARGET_SSE && TARGET_64BIT"
22648 "call\t%P1")
22649
22650 ;; Restore multiple registers out-of-line.
22651 (define_insn "*restore_multiple<mode>"
22652 [(match_parallel 0 "restore_multiple"
22653 [(use (match_operand:P 1 "symbol_operand"))])]
22654 "TARGET_SSE && TARGET_64BIT"
22655 "call\t%P1")
22656
22657 ;; Restore multiple registers out-of-line and return.
22658 (define_insn "*restore_multiple_and_return<mode>"
22659 [(match_parallel 0 "restore_multiple"
22660 [(return)
22661 (use (match_operand:P 1 "symbol_operand"))
22662 (set (reg:DI SP_REG) (reg:DI R10_REG))
22663 ])]
22664 "TARGET_SSE && TARGET_64BIT"
22665 "jmp\t%P1")
22666
22667 ;; Restore multiple registers out-of-line when hard frame pointer is used,
22668 ;; perform the leave operation prior to returning (from the function).
22669 (define_insn "*restore_multiple_leave_return<mode>"
22670 [(match_parallel 0 "restore_multiple"
22671 [(return)
22672 (use (match_operand:P 1 "symbol_operand"))
22673 (set (reg:DI SP_REG) (plus:DI (reg:DI BP_REG) (const_int 8)))
22674 (set (reg:DI BP_REG) (mem:DI (reg:DI BP_REG)))
22675 (clobber (mem:BLK (scratch)))
22676 ])]
22677 "TARGET_SSE && TARGET_64BIT"
22678 "jmp\t%P1")
22679
22680 (define_insn "vpopcount<mode><mask_name>"
22681 [(set (match_operand:VI12_AVX512VL 0 "register_operand" "=v")
22682 (popcount:VI12_AVX512VL
22683 (match_operand:VI12_AVX512VL 1 "nonimmediate_operand" "vm")))]
22684 "TARGET_AVX512BITALG"
22685 "vpopcnt<ssemodesuffix>\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}")
22686
22687 (define_insn "vgf2p8affineinvqb_<mode><mask_name>"
22688 [(set (match_operand:VI1_AVX512F 0 "register_operand" "=x,v")
22689 (unspec:VI1_AVX512F
22690 [(match_operand:VI1_AVX512F 1 "register_operand" "0,v")
22691 (match_operand:VI1_AVX512F 2 "vector_operand" "xBm,vm")
22692 (match_operand 3 "const_0_to_255_operand" "n,n")]
22693 UNSPEC_GF2P8AFFINEINV))]
22694 "TARGET_GFNI"
22695 "@
22696 gf2p8affineinvqb\t{%3, %2, %0| %0, %2, %3}
22697 vgf2p8affineinvqb\t{%3, %2, %1, %0<mask_operand4>| %0<mask_operand4>, %1, %2, %3}"
22698 [(set_attr "isa" "noavx,avx")
22699 (set_attr "prefix_data16" "1,*")
22700 (set_attr "prefix_extra" "1")
22701 (set_attr "prefix" "orig,maybe_evex")
22702 (set_attr "mode" "<sseinsnmode>")])
22703
22704 (define_insn "vgf2p8affineqb_<mode><mask_name>"
22705 [(set (match_operand:VI1_AVX512F 0 "register_operand" "=x,v")
22706 (unspec:VI1_AVX512F
22707 [(match_operand:VI1_AVX512F 1 "register_operand" "0,v")
22708 (match_operand:VI1_AVX512F 2 "vector_operand" "xBm,vm")
22709 (match_operand 3 "const_0_to_255_operand" "n,n")]
22710 UNSPEC_GF2P8AFFINE))]
22711 "TARGET_GFNI"
22712 "@
22713 gf2p8affineqb\t{%3, %2, %0| %0, %2, %3}
22714 vgf2p8affineqb\t{%3, %2, %1, %0<mask_operand4>| %0<mask_operand4>, %1, %2, %3}"
22715 [(set_attr "isa" "noavx,avx")
22716 (set_attr "prefix_data16" "1,*")
22717 (set_attr "prefix_extra" "1")
22718 (set_attr "prefix" "orig,maybe_evex")
22719 (set_attr "mode" "<sseinsnmode>")])
22720
22721 (define_insn "vgf2p8mulb_<mode><mask_name>"
22722 [(set (match_operand:VI1_AVX512F 0 "register_operand" "=x,v")
22723 (unspec:VI1_AVX512F
22724 [(match_operand:VI1_AVX512F 1 "register_operand" "%0,v")
22725 (match_operand:VI1_AVX512F 2 "vector_operand" "xBm,vm")]
22726 UNSPEC_GF2P8MUL))]
22727 "TARGET_GFNI"
22728 "@
22729 gf2p8mulb\t{%2, %0| %0, %2}
22730 vgf2p8mulb\t{%2, %1, %0<mask_operand3>| %0<mask_operand3>, %1, %2}"
22731 [(set_attr "isa" "noavx,avx")
22732 (set_attr "prefix_data16" "1,*")
22733 (set_attr "prefix_extra" "1")
22734 (set_attr "prefix" "orig,maybe_evex")
22735 (set_attr "mode" "<sseinsnmode>")])
22736
22737 (define_insn "vpshrd_<mode><mask_name>"
22738 [(set (match_operand:VI248_AVX512VL 0 "register_operand" "=v")
22739 (unspec:VI248_AVX512VL
22740 [(match_operand:VI248_AVX512VL 1 "register_operand" "v")
22741 (match_operand:VI248_AVX512VL 2 "nonimmediate_operand" "vm")
22742 (match_operand:SI 3 "const_0_to_255_operand" "n")]
22743 UNSPEC_VPSHRD))]
22744 "TARGET_AVX512VBMI2"
22745 "vpshrd<ssemodesuffix>\t{%3, %2, %1, %0<mask_operand4>|%0<mask_operand4>, %1, %2, %3 }"
22746 [(set_attr ("prefix") ("evex"))])
22747
22748 (define_insn "vpshld_<mode><mask_name>"
22749 [(set (match_operand:VI248_AVX512VL 0 "register_operand" "=v")
22750 (unspec:VI248_AVX512VL
22751 [(match_operand:VI248_AVX512VL 1 "register_operand" "v")
22752 (match_operand:VI248_AVX512VL 2 "nonimmediate_operand" "vm")
22753 (match_operand:SI 3 "const_0_to_255_operand" "n")]
22754 UNSPEC_VPSHLD))]
22755 "TARGET_AVX512VBMI2"
22756 "vpshld<ssemodesuffix>\t{%3, %2, %1, %0<mask_operand4>|%0<mask_operand4>, %1, %2, %3 }"
22757 [(set_attr ("prefix") ("evex"))])
22758
22759 (define_insn "vpshrdv_<mode>"
22760 [(set (match_operand:VI248_AVX512VL 0 "register_operand" "=v")
22761 (unspec:VI248_AVX512VL
22762 [(match_operand:VI248_AVX512VL 1 "register_operand" "0")
22763 (match_operand:VI248_AVX512VL 2 "register_operand" "v")
22764 (match_operand:VI248_AVX512VL 3 "nonimmediate_operand" "vm")]
22765 UNSPEC_VPSHRDV))]
22766 "TARGET_AVX512VBMI2"
22767 "vpshrdv<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3 }"
22768 [(set_attr ("prefix") ("evex"))
22769 (set_attr "mode" "<sseinsnmode>")])
22770
22771 (define_insn "vpshrdv_<mode>_mask"
22772 [(set (match_operand:VI248_AVX512VL 0 "register_operand" "=v")
22773 (vec_merge:VI248_AVX512VL
22774 (unspec:VI248_AVX512VL
22775 [(match_operand:VI248_AVX512VL 1 "register_operand" "0")
22776 (match_operand:VI248_AVX512VL 2 "register_operand" "v")
22777 (match_operand:VI248_AVX512VL 3 "nonimmediate_operand" "vm")]
22778 UNSPEC_VPSHRDV)
22779 (match_dup 1)
22780 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")))]
22781 "TARGET_AVX512VBMI2"
22782 "vpshrdv<ssemodesuffix>\t{%3, %2, %0%{%4%}|%0%{%4%}, %2, %3 }"
22783 [(set_attr ("prefix") ("evex"))
22784 (set_attr "mode" "<sseinsnmode>")])
22785
22786 (define_expand "vpshrdv_<mode>_maskz"
22787 [(match_operand:VI248_AVX512VL 0 "register_operand")
22788 (match_operand:VI248_AVX512VL 1 "register_operand")
22789 (match_operand:VI248_AVX512VL 2 "register_operand")
22790 (match_operand:VI248_AVX512VL 3 "nonimmediate_operand")
22791 (match_operand:<avx512fmaskmode> 4 "register_operand")]
22792 "TARGET_AVX512VBMI2"
22793 {
22794 emit_insn (gen_vpshrdv_<mode>_maskz_1 (operands[0], operands[1],
22795 operands[2], operands[3],
22796 CONST0_RTX (<MODE>mode),
22797 operands[4]));
22798 DONE;
22799 })
22800
22801 (define_insn "vpshrdv_<mode>_maskz_1"
22802 [(set (match_operand:VI248_AVX512VL 0 "register_operand" "=v")
22803 (vec_merge:VI248_AVX512VL
22804 (unspec:VI248_AVX512VL
22805 [(match_operand:VI248_AVX512VL 1 "register_operand" "0")
22806 (match_operand:VI248_AVX512VL 2 "register_operand" "v")
22807 (match_operand:VI248_AVX512VL 3 "nonimmediate_operand" "vm")]
22808 UNSPEC_VPSHRDV)
22809 (match_operand:VI248_AVX512VL 4 "const0_operand" "C")
22810 (match_operand:<avx512fmaskmode> 5 "register_operand" "Yk")))]
22811 "TARGET_AVX512VBMI2"
22812 "vpshrdv<ssemodesuffix>\t{%3, %2, %0%{%5%}%{z%}|%0%{%5%}%{z%}, %2, %3 }"
22813 [(set_attr ("prefix") ("evex"))
22814 (set_attr "mode" "<sseinsnmode>")])
22815
22816 (define_insn "vpshldv_<mode>"
22817 [(set (match_operand:VI248_AVX512VL 0 "register_operand" "=v")
22818 (unspec:VI248_AVX512VL
22819 [(match_operand:VI248_AVX512VL 1 "register_operand" "0")
22820 (match_operand:VI248_AVX512VL 2 "register_operand" "v")
22821 (match_operand:VI248_AVX512VL 3 "nonimmediate_operand" "vm")]
22822 UNSPEC_VPSHLDV))]
22823 "TARGET_AVX512VBMI2"
22824 "vpshldv<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3 }"
22825 [(set_attr ("prefix") ("evex"))
22826 (set_attr "mode" "<sseinsnmode>")])
22827
22828 (define_insn "vpshldv_<mode>_mask"
22829 [(set (match_operand:VI248_AVX512VL 0 "register_operand" "=v")
22830 (vec_merge:VI248_AVX512VL
22831 (unspec:VI248_AVX512VL
22832 [(match_operand:VI248_AVX512VL 1 "register_operand" "0")
22833 (match_operand:VI248_AVX512VL 2 "register_operand" "v")
22834 (match_operand:VI248_AVX512VL 3 "nonimmediate_operand" "vm")]
22835 UNSPEC_VPSHLDV)
22836 (match_dup 1)
22837 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")))]
22838 "TARGET_AVX512VBMI2"
22839 "vpshldv<ssemodesuffix>\t{%3, %2, %0%{%4%}|%0%{%4%}, %2, %3 }"
22840 [(set_attr ("prefix") ("evex"))
22841 (set_attr "mode" "<sseinsnmode>")])
22842
22843 (define_expand "vpshldv_<mode>_maskz"
22844 [(match_operand:VI248_AVX512VL 0 "register_operand")
22845 (match_operand:VI248_AVX512VL 1 "register_operand")
22846 (match_operand:VI248_AVX512VL 2 "register_operand")
22847 (match_operand:VI248_AVX512VL 3 "nonimmediate_operand")
22848 (match_operand:<avx512fmaskmode> 4 "register_operand")]
22849 "TARGET_AVX512VBMI2"
22850 {
22851 emit_insn (gen_vpshldv_<mode>_maskz_1 (operands[0], operands[1],
22852 operands[2], operands[3],
22853 CONST0_RTX (<MODE>mode),
22854 operands[4]));
22855 DONE;
22856 })
22857
22858 (define_insn "vpshldv_<mode>_maskz_1"
22859 [(set (match_operand:VI248_AVX512VL 0 "register_operand" "=v")
22860 (vec_merge:VI248_AVX512VL
22861 (unspec:VI248_AVX512VL
22862 [(match_operand:VI248_AVX512VL 1 "register_operand" "0")
22863 (match_operand:VI248_AVX512VL 2 "register_operand" "v")
22864 (match_operand:VI248_AVX512VL 3 "nonimmediate_operand" "vm")]
22865 UNSPEC_VPSHLDV)
22866 (match_operand:VI248_AVX512VL 4 "const0_operand" "C")
22867 (match_operand:<avx512fmaskmode> 5 "register_operand" "Yk")))]
22868 "TARGET_AVX512VBMI2"
22869 "vpshldv<ssemodesuffix>\t{%3, %2, %0%{%5%}%{z%}|%0%{%5%}%{z%}, %2, %3 }"
22870 [(set_attr ("prefix") ("evex"))
22871 (set_attr "mode" "<sseinsnmode>")])
22872
22873 (define_insn "vpdpbusd_<mode>"
22874 [(set (match_operand:VI4_AVX512VL 0 "register_operand" "=v")
22875 (unspec:VI4_AVX512VL
22876 [(match_operand:VI4_AVX512VL 1 "register_operand" "0")
22877 (match_operand:VI4_AVX512VL 2 "register_operand" "v")
22878 (match_operand:VI4_AVX512VL 3 "nonimmediate_operand" "vm")]
22879 UNSPEC_VPMADDUBSWACCD))]
22880 "TARGET_AVX512VNNI"
22881 "vpdpbusd\t{%3, %2, %0|%0, %2, %3 }"
22882 [(set_attr ("prefix") ("evex"))])
22883
22884 (define_insn "vpdpbusd_<mode>_mask"
22885 [(set (match_operand:VI4_AVX512VL 0 "register_operand" "=v")
22886 (vec_merge:VI4_AVX512VL
22887 (unspec:VI4_AVX512VL
22888 [(match_operand:VI4_AVX512VL 1 "register_operand" "0")
22889 (match_operand:VI4_AVX512VL 2 "register_operand" "v")
22890 (match_operand:VI4_AVX512VL 3 "nonimmediate_operand" "vm")]
22891 UNSPEC_VPMADDUBSWACCD)
22892 (match_dup 1)
22893 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")))]
22894 "TARGET_AVX512VNNI"
22895 "vpdpbusd\t{%3, %2, %0%{%4%}|%0%{%4%}, %2, %3 }"
22896 [(set_attr ("prefix") ("evex"))])
22897
22898 (define_expand "vpdpbusd_<mode>_maskz"
22899 [(match_operand:VI4_AVX512VL 0 "register_operand")
22900 (match_operand:VI4_AVX512VL 1 "register_operand")
22901 (match_operand:VI4_AVX512VL 2 "register_operand")
22902 (match_operand:VI4_AVX512VL 3 "nonimmediate_operand")
22903 (match_operand:<avx512fmaskmode> 4 "register_operand")]
22904 "TARGET_AVX512VNNI"
22905 {
22906 emit_insn (gen_vpdpbusd_<mode>_maskz_1 (operands[0], operands[1],
22907 operands[2], operands[3],
22908 CONST0_RTX (<MODE>mode),
22909 operands[4]));
22910 DONE;
22911 })
22912
22913 (define_insn "vpdpbusd_<mode>_maskz_1"
22914 [(set (match_operand:VI4_AVX512VL 0 "register_operand" "=v")
22915 (vec_merge:VI4_AVX512VL
22916 (unspec:VI4_AVX512VL
22917 [(match_operand:VI4_AVX512VL 1 "register_operand" "0")
22918 (match_operand:VI4_AVX512VL 2 "register_operand" "v")
22919 (match_operand:VI4_AVX512VL 3 "nonimmediate_operand" "vm")
22920 ] UNSPEC_VPMADDUBSWACCD)
22921 (match_operand:VI4_AVX512VL 4 "const0_operand" "C")
22922 (match_operand:<avx512fmaskmode> 5 "register_operand" "Yk")))]
22923 "TARGET_AVX512VNNI"
22924 "vpdpbusd\t{%3, %2, %0%{%5%}%{z%}|%0%{%5%}%{z%}, %2, %3 }"
22925 [(set_attr ("prefix") ("evex"))])
22926
22927
22928 (define_insn "vpdpbusds_<mode>"
22929 [(set (match_operand:VI4_AVX512VL 0 "register_operand" "=v")
22930 (unspec:VI4_AVX512VL
22931 [(match_operand:VI4_AVX512VL 1 "register_operand" "0")
22932 (match_operand:VI4_AVX512VL 2 "register_operand" "v")
22933 (match_operand:VI4_AVX512VL 3 "nonimmediate_operand" "vm")]
22934 UNSPEC_VPMADDUBSWACCSSD))]
22935 "TARGET_AVX512VNNI"
22936 "vpdpbusds\t{%3, %2, %0|%0, %2, %3 }"
22937 [(set_attr ("prefix") ("evex"))])
22938
22939 (define_insn "vpdpbusds_<mode>_mask"
22940 [(set (match_operand:VI4_AVX512VL 0 "register_operand" "=v")
22941 (vec_merge:VI4_AVX512VL
22942 (unspec:VI4_AVX512VL
22943 [(match_operand:VI4_AVX512VL 1 "register_operand" "0")
22944 (match_operand:VI4_AVX512VL 2 "register_operand" "v")
22945 (match_operand:VI4_AVX512VL 3 "nonimmediate_operand" "vm")]
22946 UNSPEC_VPMADDUBSWACCSSD)
22947 (match_dup 1)
22948 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")))]
22949 "TARGET_AVX512VNNI"
22950 "vpdpbusds\t{%3, %2, %0%{%4%}|%0%{%4%}, %2, %3 }"
22951 [(set_attr ("prefix") ("evex"))])
22952
22953 (define_expand "vpdpbusds_<mode>_maskz"
22954 [(match_operand:VI4_AVX512VL 0 "register_operand")
22955 (match_operand:VI4_AVX512VL 1 "register_operand")
22956 (match_operand:VI4_AVX512VL 2 "register_operand")
22957 (match_operand:VI4_AVX512VL 3 "nonimmediate_operand")
22958 (match_operand:<avx512fmaskmode> 4 "register_operand")]
22959 "TARGET_AVX512VNNI"
22960 {
22961 emit_insn (gen_vpdpbusds_<mode>_maskz_1 (operands[0], operands[1],
22962 operands[2], operands[3],
22963 CONST0_RTX (<MODE>mode),
22964 operands[4]));
22965 DONE;
22966 })
22967
22968 (define_insn "vpdpbusds_<mode>_maskz_1"
22969 [(set (match_operand:VI4_AVX512VL 0 "register_operand" "=v")
22970 (vec_merge:VI4_AVX512VL
22971 (unspec:VI4_AVX512VL
22972 [(match_operand:VI4_AVX512VL 1 "register_operand" "0")
22973 (match_operand:VI4_AVX512VL 2 "register_operand" "v")
22974 (match_operand:VI4_AVX512VL 3 "nonimmediate_operand" "vm")]
22975 UNSPEC_VPMADDUBSWACCSSD)
22976 (match_operand:VI4_AVX512VL 4 "const0_operand" "C")
22977 (match_operand:<avx512fmaskmode> 5 "register_operand" "Yk")))]
22978 "TARGET_AVX512VNNI"
22979 "vpdpbusds\t{%3, %2, %0%{%5%}%{z%}|%0%{%5%}%{z%}, %2, %3 }"
22980 [(set_attr ("prefix") ("evex"))])
22981
22982
22983 (define_insn "vpdpwssd_<mode>"
22984 [(set (match_operand:VI4_AVX512VL 0 "register_operand" "=v")
22985 (unspec:VI4_AVX512VL
22986 [(match_operand:VI4_AVX512VL 1 "register_operand" "0")
22987 (match_operand:VI4_AVX512VL 2 "register_operand" "v")
22988 (match_operand:VI4_AVX512VL 3 "nonimmediate_operand" "vm")]
22989 UNSPEC_VPMADDWDACCD))]
22990 "TARGET_AVX512VNNI"
22991 "vpdpwssd\t{%3, %2, %0|%0, %2, %3 }"
22992 [(set_attr ("prefix") ("evex"))])
22993
22994 (define_insn "vpdpwssd_<mode>_mask"
22995 [(set (match_operand:VI4_AVX512VL 0 "register_operand" "=v")
22996 (vec_merge:VI4_AVX512VL
22997 (unspec:VI4_AVX512VL
22998 [(match_operand:VI4_AVX512VL 1 "register_operand" "0")
22999 (match_operand:VI4_AVX512VL 2 "register_operand" "v")
23000 (match_operand:VI4_AVX512VL 3 "nonimmediate_operand" "vm")]
23001 UNSPEC_VPMADDWDACCD)
23002 (match_dup 1)
23003 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")))]
23004 "TARGET_AVX512VNNI"
23005 "vpdpwssd\t{%3, %2, %0%{%4%}|%0%{%4%}, %2, %3 }"
23006 [(set_attr ("prefix") ("evex"))])
23007
23008 (define_expand "vpdpwssd_<mode>_maskz"
23009 [(match_operand:VI4_AVX512VL 0 "register_operand")
23010 (match_operand:VI4_AVX512VL 1 "register_operand")
23011 (match_operand:VI4_AVX512VL 2 "register_operand")
23012 (match_operand:VI4_AVX512VL 3 "nonimmediate_operand")
23013 (match_operand:<avx512fmaskmode> 4 "register_operand")]
23014 "TARGET_AVX512VNNI"
23015 {
23016 emit_insn (gen_vpdpwssd_<mode>_maskz_1 (operands[0], operands[1],
23017 operands[2], operands[3],
23018 CONST0_RTX (<MODE>mode),
23019 operands[4]));
23020 DONE;
23021 })
23022
23023 (define_insn "vpdpwssd_<mode>_maskz_1"
23024 [(set (match_operand:VI4_AVX512VL 0 "register_operand" "=v")
23025 (vec_merge:VI4_AVX512VL
23026 (unspec:VI4_AVX512VL
23027 [(match_operand:VI4_AVX512VL 1 "register_operand" "0")
23028 (match_operand:VI4_AVX512VL 2 "register_operand" "v")
23029 (match_operand:VI4_AVX512VL 3 "nonimmediate_operand" "vm")]
23030 UNSPEC_VPMADDWDACCD)
23031 (match_operand:VI4_AVX512VL 4 "const0_operand" "C")
23032 (match_operand:<avx512fmaskmode> 5 "register_operand" "Yk")))]
23033 "TARGET_AVX512VNNI"
23034 "vpdpwssd\t{%3, %2, %0%{%5%}%{z%}|%0%{%5%}%{z%}, %2, %3 }"
23035 [(set_attr ("prefix") ("evex"))])
23036
23037
23038 (define_insn "vpdpwssds_<mode>"
23039 [(set (match_operand:VI4_AVX512VL 0 "register_operand" "=v")
23040 (unspec:VI4_AVX512VL
23041 [(match_operand:VI4_AVX512VL 1 "register_operand" "0")
23042 (match_operand:VI4_AVX512VL 2 "register_operand" "v")
23043 (match_operand:VI4_AVX512VL 3 "nonimmediate_operand" "vm")]
23044 UNSPEC_VPMADDWDACCSSD))]
23045 "TARGET_AVX512VNNI"
23046 "vpdpwssds\t{%3, %2, %0|%0, %2, %3 }"
23047 [(set_attr ("prefix") ("evex"))])
23048
23049 (define_insn "vpdpwssds_<mode>_mask"
23050 [(set (match_operand:VI4_AVX512VL 0 "register_operand" "=v")
23051 (vec_merge:VI4_AVX512VL
23052 (unspec:VI4_AVX512VL
23053 [(match_operand:VI4_AVX512VL 1 "register_operand" "0")
23054 (match_operand:VI4_AVX512VL 2 "register_operand" "v")
23055 (match_operand:VI4_AVX512VL 3 "nonimmediate_operand" "vm")]
23056 UNSPEC_VPMADDWDACCSSD)
23057 (match_dup 1)
23058 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")))]
23059 "TARGET_AVX512VNNI"
23060 "vpdpwssds\t{%3, %2, %0%{%4%}|%0%{%4%}, %2, %3 }"
23061 [(set_attr ("prefix") ("evex"))])
23062
23063 (define_expand "vpdpwssds_<mode>_maskz"
23064 [(match_operand:VI4_AVX512VL 0 "register_operand")
23065 (match_operand:VI4_AVX512VL 1 "register_operand")
23066 (match_operand:VI4_AVX512VL 2 "register_operand")
23067 (match_operand:VI4_AVX512VL 3 "nonimmediate_operand")
23068 (match_operand:<avx512fmaskmode> 4 "register_operand")]
23069 "TARGET_AVX512VNNI"
23070 {
23071 emit_insn (gen_vpdpwssds_<mode>_maskz_1 (operands[0], operands[1],
23072 operands[2], operands[3],
23073 CONST0_RTX (<MODE>mode),
23074 operands[4]));
23075 DONE;
23076 })
23077
23078 (define_insn "vpdpwssds_<mode>_maskz_1"
23079 [(set (match_operand:VI4_AVX512VL 0 "register_operand" "=v")
23080 (vec_merge:VI4_AVX512VL
23081 (unspec:VI4_AVX512VL
23082 [(match_operand:VI4_AVX512VL 1 "register_operand" "0")
23083 (match_operand:VI4_AVX512VL 2 "register_operand" "v")
23084 (match_operand:VI4_AVX512VL 3 "nonimmediate_operand" "vm")]
23085 UNSPEC_VPMADDWDACCSSD)
23086 (match_operand:VI4_AVX512VL 4 "const0_operand" "C")
23087 (match_operand:<avx512fmaskmode> 5 "register_operand" "Yk")))]
23088 "TARGET_AVX512VNNI"
23089 "vpdpwssds\t{%3, %2, %0%{%5%}%{z%}|%0%{%5%}%{z%}, %2, %3 }"
23090 [(set_attr ("prefix") ("evex"))])
23091
23092 (define_insn "vaesdec_<mode>"
23093 [(set (match_operand:VI1_AVX512VL_F 0 "register_operand" "=v")
23094 (unspec:VI1_AVX512VL_F
23095 [(match_operand:VI1_AVX512VL_F 1 "register_operand" "v")
23096 (match_operand:VI1_AVX512VL_F 2 "vector_operand" "vm")]
23097 UNSPEC_VAESDEC))]
23098 "TARGET_VAES"
23099 "vaesdec\t{%2, %1, %0|%0, %1, %2}"
23100 )
23101
23102 (define_insn "vaesdeclast_<mode>"
23103 [(set (match_operand:VI1_AVX512VL_F 0 "register_operand" "=v")
23104 (unspec:VI1_AVX512VL_F
23105 [(match_operand:VI1_AVX512VL_F 1 "register_operand" "v")
23106 (match_operand:VI1_AVX512VL_F 2 "vector_operand" "vm")]
23107 UNSPEC_VAESDECLAST))]
23108 "TARGET_VAES"
23109 "vaesdeclast\t{%2, %1, %0|%0, %1, %2}"
23110 )
23111
23112 (define_insn "vaesenc_<mode>"
23113 [(set (match_operand:VI1_AVX512VL_F 0 "register_operand" "=v")
23114 (unspec:VI1_AVX512VL_F
23115 [(match_operand:VI1_AVX512VL_F 1 "register_operand" "v")
23116 (match_operand:VI1_AVX512VL_F 2 "vector_operand" "vm")]
23117 UNSPEC_VAESENC))]
23118 "TARGET_VAES"
23119 "vaesenc\t{%2, %1, %0|%0, %1, %2}"
23120 )
23121
23122 (define_insn "vaesenclast_<mode>"
23123 [(set (match_operand:VI1_AVX512VL_F 0 "register_operand" "=v")
23124 (unspec:VI1_AVX512VL_F
23125 [(match_operand:VI1_AVX512VL_F 1 "register_operand" "v")
23126 (match_operand:VI1_AVX512VL_F 2 "vector_operand" "vm")]
23127 UNSPEC_VAESENCLAST))]
23128 "TARGET_VAES"
23129 "vaesenclast\t{%2, %1, %0|%0, %1, %2}"
23130 )
23131
23132 (define_insn "vpclmulqdq_<mode>"
23133 [(set (match_operand:VI8_FVL 0 "register_operand" "=v")
23134 (unspec:VI8_FVL [(match_operand:VI8_FVL 1 "register_operand" "v")
23135 (match_operand:VI8_FVL 2 "vector_operand" "vm")
23136 (match_operand:SI 3 "const_0_to_255_operand" "n")]
23137 UNSPEC_VPCLMULQDQ))]
23138 "TARGET_VPCLMULQDQ"
23139 "vpclmulqdq\t{%3, %2, %1, %0|%0, %1, %2, %3}"
23140 [(set_attr "mode" "DI")])
23141
23142 (define_insn "avx512vl_vpshufbitqmb<mode><mask_scalar_merge_name>"
23143 [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=k")
23144 (unspec:<avx512fmaskmode>
23145 [(match_operand:VI1_AVX512VLBW 1 "register_operand" "v")
23146 (match_operand:VI1_AVX512VLBW 2 "nonimmediate_operand" "vm")]
23147 UNSPEC_VPSHUFBIT))]
23148 "TARGET_AVX512BITALG"
23149 "vpshufbitqmb\t{%2, %1, %0<mask_scalar_merge_operand3>|%0<mask_scalar_merge_operand3>, %1, %2}"
23150 [(set_attr "prefix" "evex")
23151 (set_attr "mode" "<sseinsnmode>")])
23152
23153 (define_mode_iterator VI48_AVX512VP2VL
23154 [V8DI
23155 (V4DI "TARGET_AVX512VL") (V2DI "TARGET_AVX512VL")
23156 (V8SI "TARGET_AVX512VL") (V4SI "TARGET_AVX512VL")])
23157
23158 (define_insn "avx512vp2intersect_2intersect<mode>"
23159 [(set (match_operand:P2QI 0 "register_operand" "=k")
23160 (unspec:P2QI
23161 [(match_operand:VI48_AVX512VP2VL 1 "register_operand" "v")
23162 (match_operand:VI48_AVX512VP2VL 2 "vector_operand" "vm")]
23163 UNSPEC_VP2INTERSECT))]
23164 "TARGET_AVX512VP2INTERSECT"
23165 "vp2intersect<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
23166 [(set_attr ("prefix") ("evex"))])
23167
23168 (define_insn "avx512vp2intersect_2intersectv16si"
23169 [(set (match_operand:P2HI 0 "register_operand" "=k")
23170 (unspec:P2HI [(match_operand:V16SI 1 "register_operand" "v")
23171 (match_operand:V16SI 2 "vector_operand" "vm")]
23172 UNSPEC_VP2INTERSECT))]
23173 "TARGET_AVX512VP2INTERSECT"
23174 "vp2intersectd\t{%2, %1, %0|%0, %1, %2}"
23175 [(set_attr ("prefix") ("evex"))])
23176
23177 (define_mode_iterator BF16 [V32HI (V16HI "TARGET_AVX512VL") (V8HI "TARGET_AVX512VL")])
23178 ;; Converting from BF to SF
23179 (define_mode_attr bf16_cvt_2sf
23180 [(V32HI "V16SF") (V16HI "V8SF") (V8HI "V4SF")])
23181 ;; Converting from SF to BF
23182 (define_mode_attr sf_cvt_bf16
23183 [(V4SF "V8HI") (V8SF "V8HI") (V16SF "V16HI")])
23184 ;; Mapping from BF to SF
23185 (define_mode_attr sf_bf16
23186 [(V4SF "V8HI") (V8SF "V16HI") (V16SF "V32HI")])
23187
23188 (define_expand "avx512f_cvtne2ps2bf16_<mode>_maskz"
23189 [(match_operand:BF16 0 "register_operand")
23190 (match_operand:<bf16_cvt_2sf> 1 "register_operand")
23191 (match_operand:<bf16_cvt_2sf> 2 "register_operand")
23192 (match_operand:<avx512fmaskmode> 3 "register_operand")]
23193 "TARGET_AVX512BF16"
23194 {
23195 emit_insn (gen_avx512f_cvtne2ps2bf16_<mode>_mask(operands[0], operands[1],
23196 operands[2], CONST0_RTX(<MODE>mode), operands[3]));
23197 DONE;
23198 })
23199
23200 (define_insn "avx512f_cvtne2ps2bf16_<mode><mask_name>"
23201 [(set (match_operand:BF16 0 "register_operand" "=v")
23202 (unspec:BF16
23203 [(match_operand:<bf16_cvt_2sf> 1 "register_operand" "v")
23204 (match_operand:<bf16_cvt_2sf> 2 "register_operand" "v")]
23205 UNSPEC_VCVTNE2PS2BF16))]
23206 "TARGET_AVX512BF16"
23207 "vcvtne2ps2bf16\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}")
23208
23209 (define_expand "avx512f_cvtneps2bf16_<mode>_maskz"
23210 [(match_operand:<sf_cvt_bf16> 0 "register_operand")
23211 (match_operand:VF1_AVX512VL 1 "register_operand")
23212 (match_operand:<avx512fmaskmode> 2 "register_operand")]
23213 "TARGET_AVX512BF16"
23214 {
23215 emit_insn (gen_avx512f_cvtneps2bf16_<mode>_mask(operands[0], operands[1],
23216 CONST0_RTX(<sf_cvt_bf16>mode), operands[2]));
23217 DONE;
23218 })
23219
23220 (define_insn "avx512f_cvtneps2bf16_<mode><mask_name>"
23221 [(set (match_operand:<sf_cvt_bf16> 0 "register_operand" "=v")
23222 (unspec:<sf_cvt_bf16>
23223 [(match_operand:VF1_AVX512VL 1 "register_operand" "v")]
23224 UNSPEC_VCVTNEPS2BF16))]
23225 "TARGET_AVX512BF16"
23226 "vcvtneps2bf16\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}")
23227
23228 (define_expand "avx512f_dpbf16ps_<mode>_maskz"
23229 [(match_operand:VF1_AVX512VL 0 "register_operand")
23230 (match_operand:VF1_AVX512VL 1 "register_operand")
23231 (match_operand:<sf_bf16> 2 "register_operand")
23232 (match_operand:<sf_bf16> 3 "register_operand")
23233 (match_operand:<avx512fmaskhalfmode> 4 "register_operand")]
23234 "TARGET_AVX512BF16"
23235 {
23236 emit_insn (gen_avx512f_dpbf16ps_<mode>_maskz_1(operands[0], operands[1],
23237 operands[2], operands[3], CONST0_RTX(<MODE>mode), operands[4]));
23238 DONE;
23239 })
23240
23241 (define_insn "avx512f_dpbf16ps_<mode><maskz_half_name>"
23242 [(set (match_operand:VF1_AVX512VL 0 "register_operand" "=v")
23243 (unspec:VF1_AVX512VL
23244 [(match_operand:VF1_AVX512VL 1 "register_operand" "0")
23245 (match_operand:<sf_bf16> 2 "register_operand" "v")
23246 (match_operand:<sf_bf16> 3 "register_operand" "v")]
23247 UNSPEC_VDPBF16PS))]
23248 "TARGET_AVX512BF16"
23249 "vdpbf16ps\t{%3, %2, %0<maskz_half_operand4>|%0<maskz_half_operand4>, %2, %3}")
23250
23251 (define_insn "avx512f_dpbf16ps_<mode>_mask"
23252 [(set (match_operand:VF1_AVX512VL 0 "register_operand" "=v")
23253 (vec_merge:VF1_AVX512VL
23254 (unspec:VF1_AVX512VL
23255 [(match_operand:VF1_AVX512VL 1 "register_operand" "0")
23256 (match_operand:<sf_bf16> 2 "register_operand" "v")
23257 (match_operand:<sf_bf16> 3 "register_operand" "v")]
23258 UNSPEC_VDPBF16PS)
23259 (match_dup 1)
23260 (match_operand:<avx512fmaskhalfmode> 4 "register_operand" "Yk")))]
23261 "TARGET_AVX512BF16"
23262 "vdpbf16ps\t{%3, %2, %0%{%4%}|%0%{%4%}, %2, %3}")