]> git.ipfire.org Git - thirdparty/gcc.git/blob - gcc/config/i386/sse.md
Update copyright years.
[thirdparty/gcc.git] / gcc / config / i386 / sse.md
1 ;; GCC machine description for SSE instructions
2 ;; Copyright (C) 2005-2016 Free Software Foundation, Inc.
3 ;;
4 ;; This file is part of GCC.
5 ;;
6 ;; GCC is free software; you can redistribute it and/or modify
7 ;; it under the terms of the GNU General Public License as published by
8 ;; the Free Software Foundation; either version 3, or (at your option)
9 ;; any later version.
10 ;;
11 ;; GCC is distributed in the hope that it will be useful,
12 ;; but WITHOUT ANY WARRANTY; without even the implied warranty of
13 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 ;; GNU General Public License for more details.
15 ;;
16 ;; You should have received a copy of the GNU General Public License
17 ;; along with GCC; see the file COPYING3. If not see
18 ;; <http://www.gnu.org/licenses/>.
19
20 (define_c_enum "unspec" [
21 ;; SSE
22 UNSPEC_MOVNT
23 UNSPEC_LOADU
24 UNSPEC_STOREU
25
26 ;; SSE3
27 UNSPEC_LDDQU
28
29 ;; SSSE3
30 UNSPEC_PSHUFB
31 UNSPEC_PSIGN
32 UNSPEC_PALIGNR
33
34 ;; For SSE4A support
35 UNSPEC_EXTRQI
36 UNSPEC_EXTRQ
37 UNSPEC_INSERTQI
38 UNSPEC_INSERTQ
39
40 ;; For SSE4.1 support
41 UNSPEC_BLENDV
42 UNSPEC_INSERTPS
43 UNSPEC_DP
44 UNSPEC_MOVNTDQA
45 UNSPEC_MPSADBW
46 UNSPEC_PHMINPOSUW
47 UNSPEC_PTEST
48
49 ;; For SSE4.2 support
50 UNSPEC_PCMPESTR
51 UNSPEC_PCMPISTR
52
53 ;; For FMA4 support
54 UNSPEC_FMADDSUB
55 UNSPEC_XOP_UNSIGNED_CMP
56 UNSPEC_XOP_TRUEFALSE
57 UNSPEC_XOP_PERMUTE
58 UNSPEC_FRCZ
59
60 ;; For AES support
61 UNSPEC_AESENC
62 UNSPEC_AESENCLAST
63 UNSPEC_AESDEC
64 UNSPEC_AESDECLAST
65 UNSPEC_AESIMC
66 UNSPEC_AESKEYGENASSIST
67
68 ;; For PCLMUL support
69 UNSPEC_PCLMUL
70
71 ;; For AVX support
72 UNSPEC_PCMP
73 UNSPEC_VPERMIL
74 UNSPEC_VPERMIL2
75 UNSPEC_VPERMIL2F128
76 UNSPEC_CAST
77 UNSPEC_VTESTP
78 UNSPEC_VCVTPH2PS
79 UNSPEC_VCVTPS2PH
80
81 ;; For AVX2 support
82 UNSPEC_VPERMVAR
83 UNSPEC_VPERMTI
84 UNSPEC_GATHER
85 UNSPEC_VSIBADDR
86
87 ;; For AVX512F support
88 UNSPEC_VPERMI2
89 UNSPEC_VPERMT2
90 UNSPEC_VPERMI2_MASK
91 UNSPEC_UNSIGNED_FIX_NOTRUNC
92 UNSPEC_UNSIGNED_PCMP
93 UNSPEC_TESTM
94 UNSPEC_TESTNM
95 UNSPEC_SCATTER
96 UNSPEC_RCP14
97 UNSPEC_RSQRT14
98 UNSPEC_FIXUPIMM
99 UNSPEC_SCALEF
100 UNSPEC_VTERNLOG
101 UNSPEC_GETEXP
102 UNSPEC_GETMANT
103 UNSPEC_ALIGN
104 UNSPEC_CONFLICT
105 UNSPEC_COMPRESS
106 UNSPEC_COMPRESS_STORE
107 UNSPEC_EXPAND
108 UNSPEC_MASKED_EQ
109 UNSPEC_MASKED_GT
110
111 ;; For embed. rounding feature
112 UNSPEC_EMBEDDED_ROUNDING
113
114 ;; For AVX512PF support
115 UNSPEC_GATHER_PREFETCH
116 UNSPEC_SCATTER_PREFETCH
117
118 ;; For AVX512ER support
119 UNSPEC_EXP2
120 UNSPEC_RCP28
121 UNSPEC_RSQRT28
122
123 ;; For SHA support
124 UNSPEC_SHA1MSG1
125 UNSPEC_SHA1MSG2
126 UNSPEC_SHA1NEXTE
127 UNSPEC_SHA1RNDS4
128 UNSPEC_SHA256MSG1
129 UNSPEC_SHA256MSG2
130 UNSPEC_SHA256RNDS2
131
132 ;; For AVX512BW support
133 UNSPEC_DBPSADBW
134 UNSPEC_PMADDUBSW512
135 UNSPEC_PMADDWD512
136 UNSPEC_PSHUFHW
137 UNSPEC_PSHUFLW
138 UNSPEC_CVTINT2MASK
139
140 ;; For AVX512DQ support
141 UNSPEC_REDUCE
142 UNSPEC_FPCLASS
143 UNSPEC_RANGE
144
145 ;; For AVX512IFMA support
146 UNSPEC_VPMADD52LUQ
147 UNSPEC_VPMADD52HUQ
148
149 ;; For AVX512VBMI support
150 UNSPEC_VPMULTISHIFT
151 ])
152
153 (define_c_enum "unspecv" [
154 UNSPECV_LDMXCSR
155 UNSPECV_STMXCSR
156 UNSPECV_CLFLUSH
157 UNSPECV_MONITOR
158 UNSPECV_MWAIT
159 UNSPECV_VZEROALL
160 UNSPECV_VZEROUPPER
161 ])
162
163 ;; All vector modes including V?TImode, used in move patterns.
164 (define_mode_iterator VMOVE
165 [(V64QI "TARGET_AVX512F") (V32QI "TARGET_AVX") V16QI
166 (V32HI "TARGET_AVX512F") (V16HI "TARGET_AVX") V8HI
167 (V16SI "TARGET_AVX512F") (V8SI "TARGET_AVX") V4SI
168 (V8DI "TARGET_AVX512F") (V4DI "TARGET_AVX") V2DI
169 (V4TI "TARGET_AVX512BW") (V2TI "TARGET_AVX") V1TI
170 (V16SF "TARGET_AVX512F") (V8SF "TARGET_AVX") V4SF
171 (V8DF "TARGET_AVX512F") (V4DF "TARGET_AVX") V2DF])
172
173 ;; All AVX-512{F,VL} vector modes. Supposed TARGET_AVX512F baseline.
174 (define_mode_iterator V48_AVX512VL
175 [V16SI (V8SI "TARGET_AVX512VL") (V4SI "TARGET_AVX512VL")
176 V8DI (V4DI "TARGET_AVX512VL") (V2DI "TARGET_AVX512VL")
177 V16SF (V8SF "TARGET_AVX512VL") (V4SF "TARGET_AVX512VL")
178 V8DF (V4DF "TARGET_AVX512VL") (V2DF "TARGET_AVX512VL")])
179
180 ;; 1,2 byte AVX-512{BW,VL} vector modes. Supposed TARGET_AVX512BW baseline.
181 (define_mode_iterator VI12_AVX512VL
182 [V64QI (V16QI "TARGET_AVX512VL") (V32QI "TARGET_AVX512VL")
183 V32HI (V16HI "TARGET_AVX512VL") (V8HI "TARGET_AVX512VL")])
184
185 (define_mode_iterator VI1_AVX512VL
186 [V64QI (V16QI "TARGET_AVX512VL") (V32QI "TARGET_AVX512VL")])
187
188 ;; All vector modes
189 (define_mode_iterator V
190 [(V32QI "TARGET_AVX") V16QI
191 (V16HI "TARGET_AVX") V8HI
192 (V16SI "TARGET_AVX512F") (V8SI "TARGET_AVX") V4SI
193 (V8DI "TARGET_AVX512F") (V4DI "TARGET_AVX") V2DI
194 (V16SF "TARGET_AVX512F") (V8SF "TARGET_AVX") V4SF
195 (V8DF "TARGET_AVX512F") (V4DF "TARGET_AVX") (V2DF "TARGET_SSE2")])
196
197 ;; All 128bit vector modes
198 (define_mode_iterator V_128
199 [V16QI V8HI V4SI V2DI V4SF (V2DF "TARGET_SSE2")])
200
201 ;; All 256bit vector modes
202 (define_mode_iterator V_256
203 [V32QI V16HI V8SI V4DI V8SF V4DF])
204
205 ;; All 512bit vector modes
206 (define_mode_iterator V_512 [V64QI V32HI V16SI V8DI V16SF V8DF])
207
208 ;; All 256bit and 512bit vector modes
209 (define_mode_iterator V_256_512
210 [V32QI V16HI V8SI V4DI V8SF V4DF
211 (V64QI "TARGET_AVX512F") (V32HI "TARGET_AVX512F") (V16SI "TARGET_AVX512F")
212 (V8DI "TARGET_AVX512F") (V16SF "TARGET_AVX512F") (V8DF "TARGET_AVX512F")])
213
214 ;; All vector float modes
215 (define_mode_iterator VF
216 [(V16SF "TARGET_AVX512F") (V8SF "TARGET_AVX") V4SF
217 (V8DF "TARGET_AVX512F") (V4DF "TARGET_AVX") (V2DF "TARGET_SSE2")])
218
219 ;; 128- and 256-bit float vector modes
220 (define_mode_iterator VF_128_256
221 [(V8SF "TARGET_AVX") V4SF
222 (V4DF "TARGET_AVX") (V2DF "TARGET_SSE2")])
223
224 ;; All SFmode vector float modes
225 (define_mode_iterator VF1
226 [(V16SF "TARGET_AVX512F") (V8SF "TARGET_AVX") V4SF])
227
228 ;; 128- and 256-bit SF vector modes
229 (define_mode_iterator VF1_128_256
230 [(V8SF "TARGET_AVX") V4SF])
231
232 (define_mode_iterator VF1_128_256VL
233 [V8SF (V4SF "TARGET_AVX512VL")])
234
235 ;; All DFmode vector float modes
236 (define_mode_iterator VF2
237 [(V8DF "TARGET_AVX512F") (V4DF "TARGET_AVX") V2DF])
238
239 ;; 128- and 256-bit DF vector modes
240 (define_mode_iterator VF2_128_256
241 [(V4DF "TARGET_AVX") V2DF])
242
243 (define_mode_iterator VF2_512_256
244 [(V8DF "TARGET_AVX512F") V4DF])
245
246 (define_mode_iterator VF2_512_256VL
247 [V8DF (V4DF "TARGET_AVX512VL")])
248
249 ;; All 128bit vector float modes
250 (define_mode_iterator VF_128
251 [V4SF (V2DF "TARGET_SSE2")])
252
253 ;; All 256bit vector float modes
254 (define_mode_iterator VF_256
255 [V8SF V4DF])
256
257 ;; All 512bit vector float modes
258 (define_mode_iterator VF_512
259 [V16SF V8DF])
260
261 (define_mode_iterator VI48_AVX512VL
262 [V16SI (V8SI "TARGET_AVX512VL") (V4SI "TARGET_AVX512VL")
263 V8DI (V4DI "TARGET_AVX512VL") (V2DI "TARGET_AVX512VL")])
264
265 (define_mode_iterator VF_AVX512VL
266 [V16SF (V8SF "TARGET_AVX512VL") (V4SF "TARGET_AVX512VL")
267 V8DF (V4DF "TARGET_AVX512VL") (V2DF "TARGET_AVX512VL")])
268
269 (define_mode_iterator VF2_AVX512VL
270 [V8DF (V4DF "TARGET_AVX512VL") (V2DF "TARGET_AVX512VL")])
271
272 (define_mode_iterator VF1_AVX512VL
273 [V16SF (V8SF "TARGET_AVX512VL") (V4SF "TARGET_AVX512VL")])
274
275 ;; All vector integer modes
276 (define_mode_iterator VI
277 [(V16SI "TARGET_AVX512F") (V8DI "TARGET_AVX512F")
278 (V64QI "TARGET_AVX512BW") (V32QI "TARGET_AVX") V16QI
279 (V32HI "TARGET_AVX512BW") (V16HI "TARGET_AVX") V8HI
280 (V8SI "TARGET_AVX") V4SI
281 (V4DI "TARGET_AVX") V2DI])
282
283 (define_mode_iterator VI_AVX2
284 [(V64QI "TARGET_AVX512BW") (V32QI "TARGET_AVX2") V16QI
285 (V32HI "TARGET_AVX512BW") (V16HI "TARGET_AVX2") V8HI
286 (V16SI "TARGET_AVX512F") (V8SI "TARGET_AVX2") V4SI
287 (V8DI "TARGET_AVX512F") (V4DI "TARGET_AVX2") V2DI])
288
289 ;; All QImode vector integer modes
290 (define_mode_iterator VI1
291 [(V32QI "TARGET_AVX") V16QI])
292
293 (define_mode_iterator VI_ULOADSTORE_BW_AVX512VL
294 [V64QI
295 V32HI (V8HI "TARGET_AVX512VL") (V16HI "TARGET_AVX512VL")])
296
297 (define_mode_iterator VI_ULOADSTORE_F_AVX512VL
298 [V16SI (V8SI "TARGET_AVX512VL") (V4SI "TARGET_AVX512VL")
299 V8DI (V4DI "TARGET_AVX512VL") (V2DI "TARGET_AVX512VL")])
300
301 ;; All DImode vector integer modes
302 (define_mode_iterator V_AVX
303 [V16QI V8HI V4SI V2DI V4SF V2DF
304 (V32QI "TARGET_AVX") (V16HI "TARGET_AVX")
305 (V8SI "TARGET_AVX") (V4DI "TARGET_AVX")
306 (V8SF "TARGET_AVX") (V4DF"TARGET_AVX")])
307
308 (define_mode_iterator VI8
309 [(V8DI "TARGET_AVX512F") (V4DI "TARGET_AVX") V2DI])
310
311 (define_mode_iterator VI8_AVX512VL
312 [V8DI (V4DI "TARGET_AVX512VL") (V2DI "TARGET_AVX512VL")])
313
314 (define_mode_iterator VI8_256_512
315 [V8DI (V4DI "TARGET_AVX512VL")])
316
317 (define_mode_iterator VI1_AVX2
318 [(V32QI "TARGET_AVX2") V16QI])
319
320 (define_mode_iterator VI1_AVX512
321 [(V64QI "TARGET_AVX512BW") (V32QI "TARGET_AVX2") V16QI])
322
323 (define_mode_iterator VI2_AVX2
324 [(V32HI "TARGET_AVX512BW") (V16HI "TARGET_AVX2") V8HI])
325
326 (define_mode_iterator VI2_AVX512F
327 [(V32HI "TARGET_AVX512F") (V16HI "TARGET_AVX2") V8HI])
328
329 (define_mode_iterator VI4_AVX
330 [(V8SI "TARGET_AVX") V4SI])
331
332 (define_mode_iterator VI4_AVX2
333 [(V8SI "TARGET_AVX2") V4SI])
334
335 (define_mode_iterator VI4_AVX512F
336 [(V16SI "TARGET_AVX512F") (V8SI "TARGET_AVX2") V4SI])
337
338 (define_mode_iterator VI4_AVX512VL
339 [V16SI (V8SI "TARGET_AVX512VL") (V4SI "TARGET_AVX512VL")])
340
341 (define_mode_iterator VI48_AVX512F_AVX512VL
342 [V4SI V8SI (V16SI "TARGET_AVX512F")
343 (V2DI "TARGET_AVX512VL") (V4DI "TARGET_AVX512VL") (V8DI "TARGET_AVX512F")])
344
345 (define_mode_iterator VI2_AVX512VL
346 [(V8HI "TARGET_AVX512VL") (V16HI "TARGET_AVX512VL") V32HI])
347
348 (define_mode_iterator VI8_AVX2_AVX512BW
349 [(V8DI "TARGET_AVX512BW") (V4DI "TARGET_AVX2") V2DI])
350
351 (define_mode_iterator VI8_AVX2
352 [(V4DI "TARGET_AVX2") V2DI])
353
354 (define_mode_iterator VI8_AVX2_AVX512F
355 [(V8DI "TARGET_AVX512F") (V4DI "TARGET_AVX2") V2DI])
356
357 (define_mode_iterator VI4_128_8_256
358 [V4SI V4DI])
359
360 ;; All V8D* modes
361 (define_mode_iterator V8FI
362 [V8DF V8DI])
363
364 ;; All V16S* modes
365 (define_mode_iterator V16FI
366 [V16SF V16SI])
367
368 ;; ??? We should probably use TImode instead.
369 (define_mode_iterator VIMAX_AVX2
370 [(V4TI "TARGET_AVX512BW") (V2TI "TARGET_AVX2") V1TI])
371
372 ;; ??? This should probably be dropped in favor of VIMAX_AVX2.
373 (define_mode_iterator SSESCALARMODE
374 [(V4TI "TARGET_AVX512BW") (V2TI "TARGET_AVX2") TI])
375
376 (define_mode_iterator VI12_AVX2
377 [(V64QI "TARGET_AVX512BW") (V32QI "TARGET_AVX2") V16QI
378 (V32HI "TARGET_AVX512BW") (V16HI "TARGET_AVX2") V8HI])
379
380 (define_mode_iterator VI24_AVX2
381 [(V16HI "TARGET_AVX2") V8HI
382 (V8SI "TARGET_AVX2") V4SI])
383
384 (define_mode_iterator VI124_AVX2_24_AVX512F_1_AVX512BW
385 [(V64QI "TARGET_AVX512BW") (V32QI "TARGET_AVX2") V16QI
386 (V32HI "TARGET_AVX512F") (V16HI "TARGET_AVX2") V8HI
387 (V16SI "TARGET_AVX512F") (V8SI "TARGET_AVX2") V4SI])
388
389 (define_mode_iterator VI124_AVX2
390 [(V32QI "TARGET_AVX2") V16QI
391 (V16HI "TARGET_AVX2") V8HI
392 (V8SI "TARGET_AVX2") V4SI])
393
394 (define_mode_iterator VI2_AVX2_AVX512BW
395 [(V32HI "TARGET_AVX512BW") (V16HI "TARGET_AVX2") V8HI])
396
397 (define_mode_iterator VI48_AVX2
398 [(V8SI "TARGET_AVX2") V4SI
399 (V4DI "TARGET_AVX2") V2DI])
400
401 (define_mode_iterator VI248_AVX2_8_AVX512F_24_AVX512BW
402 [(V32HI "TARGET_AVX512BW") (V16HI "TARGET_AVX2") V8HI
403 (V16SI "TARGET_AVX512BW") (V8SI "TARGET_AVX2") V4SI
404 (V8DI "TARGET_AVX512F") (V4DI "TARGET_AVX2") V2DI])
405
406 (define_mode_iterator VI248_AVX512BW_AVX512VL
407 [(V32HI "TARGET_AVX512BW")
408 (V4DI "TARGET_AVX512VL") V16SI V8DI])
409
410 ;; Suppose TARGET_AVX512VL as baseline
411 (define_mode_iterator VI24_AVX512BW_1
412 [(V16HI "TARGET_AVX512BW") (V8HI "TARGET_AVX512BW")
413 V8SI V4SI])
414
415 (define_mode_iterator VI48_AVX512F
416 [(V16SI "TARGET_AVX512F") V8SI V4SI
417 (V8DI "TARGET_AVX512F") V4DI V2DI])
418
419 (define_mode_iterator VI48_AVX_AVX512F
420 [(V16SI "TARGET_AVX512F") (V8SI "TARGET_AVX") V4SI
421 (V8DI "TARGET_AVX512F") (V4DI "TARGET_AVX") V2DI])
422
423 (define_mode_iterator VI12_AVX_AVX512F
424 [ (V64QI "TARGET_AVX512F") (V32QI "TARGET_AVX") V16QI
425 (V32HI "TARGET_AVX512F") (V16HI "TARGET_AVX") V8HI])
426
427 (define_mode_iterator V48_AVX2
428 [V4SF V2DF
429 V8SF V4DF
430 (V4SI "TARGET_AVX2") (V2DI "TARGET_AVX2")
431 (V8SI "TARGET_AVX2") (V4DI "TARGET_AVX2")])
432
433 (define_mode_attr avx512
434 [(V16QI "avx512vl") (V32QI "avx512vl") (V64QI "avx512bw")
435 (V8HI "avx512vl") (V16HI "avx512vl") (V32HI "avx512bw")
436 (V4SI "avx512vl") (V8SI "avx512vl") (V16SI "avx512f")
437 (V2DI "avx512vl") (V4DI "avx512vl") (V8DI "avx512f")
438 (V4SF "avx512vl") (V8SF "avx512vl") (V16SF "avx512f")
439 (V2DF "avx512vl") (V4DF "avx512vl") (V8DF "avx512f")])
440
441 (define_mode_attr sse2_avx_avx512f
442 [(V16QI "sse2") (V32QI "avx") (V64QI "avx512f")
443 (V8HI "avx512vl") (V16HI "avx512vl") (V32HI "avx512bw")
444 (V4SI "sse2") (V8SI "avx") (V16SI "avx512f")
445 (V2DI "avx512vl") (V4DI "avx512vl") (V8DI "avx512f")
446 (V16SF "avx512f") (V8SF "avx") (V4SF "avx")
447 (V8DF "avx512f") (V4DF "avx") (V2DF "avx")])
448
449 (define_mode_attr sse2_avx2
450 [(V16QI "sse2") (V32QI "avx2") (V64QI "avx512bw")
451 (V8HI "sse2") (V16HI "avx2") (V32HI "avx512bw")
452 (V4SI "sse2") (V8SI "avx2") (V16SI "avx512f")
453 (V2DI "sse2") (V4DI "avx2") (V8DI "avx512f")
454 (V1TI "sse2") (V2TI "avx2") (V4TI "avx512bw")])
455
456 (define_mode_attr ssse3_avx2
457 [(V16QI "ssse3") (V32QI "avx2") (V64QI "avx512bw")
458 (V4HI "ssse3") (V8HI "ssse3") (V16HI "avx2") (V32HI "avx512bw")
459 (V4SI "ssse3") (V8SI "avx2")
460 (V2DI "ssse3") (V4DI "avx2")
461 (TI "ssse3") (V2TI "avx2") (V4TI "avx512bw")])
462
463 (define_mode_attr sse4_1_avx2
464 [(V16QI "sse4_1") (V32QI "avx2") (V64QI "avx512bw")
465 (V8HI "sse4_1") (V16HI "avx2") (V32HI "avx512bw")
466 (V4SI "sse4_1") (V8SI "avx2") (V16SI "avx512f")
467 (V2DI "sse4_1") (V4DI "avx2") (V8DI "avx512dq")])
468
469 (define_mode_attr avx_avx2
470 [(V4SF "avx") (V2DF "avx")
471 (V8SF "avx") (V4DF "avx")
472 (V4SI "avx2") (V2DI "avx2")
473 (V8SI "avx2") (V4DI "avx2")])
474
475 (define_mode_attr vec_avx2
476 [(V16QI "vec") (V32QI "avx2")
477 (V8HI "vec") (V16HI "avx2")
478 (V4SI "vec") (V8SI "avx2")
479 (V2DI "vec") (V4DI "avx2")])
480
481 (define_mode_attr avx2_avx512
482 [(V4SI "avx2") (V8SI "avx2") (V16SI "avx512f")
483 (V2DI "avx2") (V4DI "avx2") (V8DI "avx512f")
484 (V4SF "avx2") (V8SF "avx2") (V16SF "avx512f")
485 (V2DF "avx2") (V4DF "avx2") (V8DF "avx512f")
486 (V8HI "avx512vl") (V16HI "avx512vl") (V32HI "avx512bw")])
487
488 (define_mode_attr shuffletype
489 [(V16SF "f") (V16SI "i") (V8DF "f") (V8DI "i")
490 (V8SF "f") (V8SI "i") (V4DF "f") (V4DI "i")
491 (V4SF "f") (V4SI "i") (V2DF "f") (V2DI "i")
492 (V32QI "i") (V16HI "u") (V16QI "i") (V8HI "i")
493 (V64QI "i") (V1TI "i") (V2TI "i")])
494
495 (define_mode_attr ssequartermode
496 [(V16SF "V4SF") (V8DF "V2DF") (V16SI "V4SI") (V8DI "V2DI")])
497
498 (define_mode_attr ssedoublemodelower
499 [(V16QI "v16hi") (V32QI "v32hi") (V64QI "v64hi")
500 (V8HI "v8si") (V16HI "v16si") (V32HI "v32si")
501 (V4SI "v4di") (V8SI "v8di") (V16SI "v16di")])
502
503 (define_mode_attr ssedoublemode
504 [(V4SF "V8SF") (V8SF "V16SF") (V16SF "V32SF")
505 (V2DF "V4DF") (V4DF "V8DF") (V8DF "V16DF")
506 (V16QI "V16HI") (V32QI "V32HI") (V64QI "V64HI")
507 (V4HI "V4SI") (V8HI "V8SI") (V16HI "V16SI") (V32HI "V32SI")
508 (V4SI "V4DI") (V8SI "V16SI") (V16SI "V32SI")
509 (V4DI "V8DI") (V8DI "V16DI")])
510
511 (define_mode_attr ssebytemode
512 [(V8DI "V64QI") (V4DI "V32QI") (V2DI "V16QI")])
513
514 ;; All 128bit vector integer modes
515 (define_mode_iterator VI_128 [V16QI V8HI V4SI V2DI])
516
517 ;; All 256bit vector integer modes
518 (define_mode_iterator VI_256 [V32QI V16HI V8SI V4DI])
519
520 ;; All 512bit vector integer modes
521 (define_mode_iterator VI_512 [V64QI V32HI V16SI V8DI])
522
523 ;; Various 128bit vector integer mode combinations
524 (define_mode_iterator VI12_128 [V16QI V8HI])
525 (define_mode_iterator VI14_128 [V16QI V4SI])
526 (define_mode_iterator VI124_128 [V16QI V8HI V4SI])
527 (define_mode_iterator VI24_128 [V8HI V4SI])
528 (define_mode_iterator VI248_128 [V8HI V4SI V2DI])
529 (define_mode_iterator VI48_128 [V4SI V2DI])
530
531 ;; Various 256bit and 512 vector integer mode combinations
532 (define_mode_iterator VI124_256 [V32QI V16HI V8SI])
533 (define_mode_iterator VI124_256_AVX512F_AVX512BW
534 [V32QI V16HI V8SI
535 (V64QI "TARGET_AVX512BW")
536 (V32HI "TARGET_AVX512BW")
537 (V16SI "TARGET_AVX512F")])
538 (define_mode_iterator VI48_256 [V8SI V4DI])
539 (define_mode_iterator VI48_512 [V16SI V8DI])
540 (define_mode_iterator VI4_256_8_512 [V8SI V8DI])
541 (define_mode_iterator VI_AVX512BW
542 [V16SI V8DI (V32HI "TARGET_AVX512BW") (V64QI "TARGET_AVX512BW")])
543
544 ;; Int-float size matches
545 (define_mode_iterator VI4F_128 [V4SI V4SF])
546 (define_mode_iterator VI8F_128 [V2DI V2DF])
547 (define_mode_iterator VI4F_256 [V8SI V8SF])
548 (define_mode_iterator VI8F_256 [V4DI V4DF])
549 (define_mode_iterator VI8F_256_512
550 [V4DI V4DF (V8DI "TARGET_AVX512F") (V8DF "TARGET_AVX512F")])
551 (define_mode_iterator VI48F_256_512
552 [V8SI V8SF
553 (V16SI "TARGET_AVX512F") (V16SF "TARGET_AVX512F")
554 (V8DI "TARGET_AVX512F") (V8DF "TARGET_AVX512F")
555 (V4DI "TARGET_AVX512VL") (V4DF "TARGET_AVX512VL")])
556 (define_mode_iterator VF48_I1248
557 [V16SI V16SF V8DI V8DF V32HI V64QI])
558 (define_mode_iterator VI48F
559 [V16SI V16SF V8DI V8DF
560 (V8SI "TARGET_AVX512VL") (V8SF "TARGET_AVX512VL")
561 (V4DI "TARGET_AVX512VL") (V4DF "TARGET_AVX512VL")
562 (V4SI "TARGET_AVX512VL") (V4SF "TARGET_AVX512VL")
563 (V2DI "TARGET_AVX512VL") (V2DF "TARGET_AVX512VL")])
564 (define_mode_iterator VI48F_256 [V8SI V8SF V4DI V4DF])
565
566 ;; Mapping from float mode to required SSE level
567 (define_mode_attr sse
568 [(SF "sse") (DF "sse2")
569 (V4SF "sse") (V2DF "sse2")
570 (V16SF "avx512f") (V8SF "avx")
571 (V8DF "avx512f") (V4DF "avx")])
572
573 (define_mode_attr sse2
574 [(V16QI "sse2") (V32QI "avx") (V64QI "avx512f")
575 (V2DI "sse2") (V4DI "avx") (V8DI "avx512f")])
576
577 (define_mode_attr sse3
578 [(V16QI "sse3") (V32QI "avx")])
579
580 (define_mode_attr sse4_1
581 [(V4SF "sse4_1") (V2DF "sse4_1")
582 (V8SF "avx") (V4DF "avx")
583 (V8DF "avx512f")
584 (V4DI "avx") (V2DI "sse4_1")
585 (V8SI "avx") (V4SI "sse4_1")
586 (V16QI "sse4_1") (V32QI "avx")
587 (V8HI "sse4_1") (V16HI "avx")])
588
589 (define_mode_attr avxsizesuffix
590 [(V64QI "512") (V32HI "512") (V16SI "512") (V8DI "512")
591 (V32QI "256") (V16HI "256") (V8SI "256") (V4DI "256")
592 (V16QI "") (V8HI "") (V4SI "") (V2DI "")
593 (V16SF "512") (V8DF "512")
594 (V8SF "256") (V4DF "256")
595 (V4SF "") (V2DF "")])
596
597 ;; SSE instruction mode
598 (define_mode_attr sseinsnmode
599 [(V64QI "XI") (V32HI "XI") (V16SI "XI") (V8DI "XI") (V4TI "XI")
600 (V32QI "OI") (V16HI "OI") (V8SI "OI") (V4DI "OI") (V2TI "OI")
601 (V16QI "TI") (V8HI "TI") (V4SI "TI") (V2DI "TI") (V1TI "TI")
602 (V16SF "V16SF") (V8DF "V8DF")
603 (V8SF "V8SF") (V4DF "V4DF")
604 (V4SF "V4SF") (V2DF "V2DF")
605 (TI "TI")])
606
607 ;; Mapping of vector modes to corresponding mask size
608 (define_mode_attr avx512fmaskmode
609 [(V64QI "DI") (V32QI "SI") (V16QI "HI")
610 (V32HI "SI") (V16HI "HI") (V8HI "QI") (V4HI "QI")
611 (V16SI "HI") (V8SI "QI") (V4SI "QI")
612 (V8DI "QI") (V4DI "QI") (V2DI "QI")
613 (V16SF "HI") (V8SF "QI") (V4SF "QI")
614 (V8DF "QI") (V4DF "QI") (V2DF "QI")])
615
616 ;; Mapping of vector modes to corresponding mask size
617 (define_mode_attr avx512fmaskmodelower
618 [(V64QI "di") (V32QI "si") (V16QI "hi")
619 (V32HI "si") (V16HI "hi") (V8HI "qi") (V4HI "qi")
620 (V16SI "hi") (V8SI "qi") (V4SI "qi")
621 (V8DI "qi") (V4DI "qi") (V2DI "qi")
622 (V16SF "hi") (V8SF "qi") (V4SF "qi")
623 (V8DF "qi") (V4DF "qi") (V2DF "qi")])
624
625 ;; Mapping of vector float modes to an integer mode of the same size
626 (define_mode_attr sseintvecmode
627 [(V16SF "V16SI") (V8DF "V8DI")
628 (V8SF "V8SI") (V4DF "V4DI")
629 (V4SF "V4SI") (V2DF "V2DI")
630 (V16SI "V16SI") (V8DI "V8DI")
631 (V8SI "V8SI") (V4DI "V4DI")
632 (V4SI "V4SI") (V2DI "V2DI")
633 (V16HI "V16HI") (V8HI "V8HI")
634 (V32HI "V32HI") (V64QI "V64QI")
635 (V32QI "V32QI") (V16QI "V16QI")])
636
637 (define_mode_attr sseintvecmode2
638 [(V8DF "XI") (V4DF "OI") (V2DF "TI")
639 (V8SF "OI") (V4SF "TI")])
640
641 (define_mode_attr sseintvecmodelower
642 [(V16SF "v16si") (V8DF "v8di")
643 (V8SF "v8si") (V4DF "v4di")
644 (V4SF "v4si") (V2DF "v2di")
645 (V8SI "v8si") (V4DI "v4di")
646 (V4SI "v4si") (V2DI "v2di")
647 (V16HI "v16hi") (V8HI "v8hi")
648 (V32QI "v32qi") (V16QI "v16qi")])
649
650 ;; Mapping of vector modes to a vector mode of double size
651 (define_mode_attr ssedoublevecmode
652 [(V32QI "V64QI") (V16HI "V32HI") (V8SI "V16SI") (V4DI "V8DI")
653 (V16QI "V32QI") (V8HI "V16HI") (V4SI "V8SI") (V2DI "V4DI")
654 (V8SF "V16SF") (V4DF "V8DF")
655 (V4SF "V8SF") (V2DF "V4DF")])
656
657 ;; Mapping of vector modes to a vector mode of half size
658 (define_mode_attr ssehalfvecmode
659 [(V64QI "V32QI") (V32HI "V16HI") (V16SI "V8SI") (V8DI "V4DI")
660 (V32QI "V16QI") (V16HI "V8HI") (V8SI "V4SI") (V4DI "V2DI")
661 (V16QI "V8QI") (V8HI "V4HI") (V4SI "V2SI")
662 (V16SF "V8SF") (V8DF "V4DF")
663 (V8SF "V4SF") (V4DF "V2DF")
664 (V4SF "V2SF")])
665
666 ;; Mapping of vector modes ti packed single mode of the same size
667 (define_mode_attr ssePSmode
668 [(V16SI "V16SF") (V8DF "V16SF")
669 (V16SF "V16SF") (V8DI "V16SF")
670 (V64QI "V16SF") (V32QI "V8SF") (V16QI "V4SF")
671 (V32HI "V16SF") (V16HI "V8SF") (V8HI "V4SF")
672 (V8SI "V8SF") (V4SI "V4SF")
673 (V4DI "V8SF") (V2DI "V4SF")
674 (V4TI "V16SF") (V2TI "V8SF") (V1TI "V4SF")
675 (V8SF "V8SF") (V4SF "V4SF")
676 (V4DF "V8SF") (V2DF "V4SF")])
677
678 (define_mode_attr ssePSmode2
679 [(V8DI "V8SF") (V4DI "V4SF")])
680
681 ;; Mapping of vector modes back to the scalar modes
682 (define_mode_attr ssescalarmode
683 [(V64QI "QI") (V32QI "QI") (V16QI "QI")
684 (V32HI "HI") (V16HI "HI") (V8HI "HI")
685 (V16SI "SI") (V8SI "SI") (V4SI "SI")
686 (V8DI "DI") (V4DI "DI") (V2DI "DI")
687 (V16SF "SF") (V8SF "SF") (V4SF "SF")
688 (V8DF "DF") (V4DF "DF") (V2DF "DF")])
689
690 ;; Mapping of vector modes to the 128bit modes
691 (define_mode_attr ssexmmmode
692 [(V64QI "V16QI") (V32QI "V16QI") (V16QI "V16QI")
693 (V32HI "V8HI") (V16HI "V8HI") (V8HI "V8HI")
694 (V16SI "V4SI") (V8SI "V4SI") (V4SI "V4SI")
695 (V8DI "V2DI") (V4DI "V2DI") (V2DI "V2DI")
696 (V16SF "V4SF") (V8SF "V4SF") (V4SF "V4SF")
697 (V8DF "V2DF") (V4DF "V2DF") (V2DF "V2DF")])
698
699 ;; Pointer size override for scalar modes (Intel asm dialect)
700 (define_mode_attr iptr
701 [(V32QI "b") (V16HI "w") (V8SI "k") (V4DI "q")
702 (V16QI "b") (V8HI "w") (V4SI "k") (V2DI "q")
703 (V8SF "k") (V4DF "q")
704 (V4SF "k") (V2DF "q")
705 (SF "k") (DF "q")])
706
707 ;; Number of scalar elements in each vector type
708 (define_mode_attr ssescalarnum
709 [(V64QI "64") (V16SI "16") (V8DI "8")
710 (V32QI "32") (V16HI "16") (V8SI "8") (V4DI "4")
711 (V16QI "16") (V8HI "8") (V4SI "4") (V2DI "2")
712 (V16SF "16") (V8DF "8")
713 (V8SF "8") (V4DF "4")
714 (V4SF "4") (V2DF "2")])
715
716 ;; Mask of scalar elements in each vector type
717 (define_mode_attr ssescalarnummask
718 [(V32QI "31") (V16HI "15") (V8SI "7") (V4DI "3")
719 (V16QI "15") (V8HI "7") (V4SI "3") (V2DI "1")
720 (V8SF "7") (V4DF "3")
721 (V4SF "3") (V2DF "1")])
722
723 (define_mode_attr ssescalarsize
724 [(V8DI "64") (V4DI "64") (V2DI "64")
725 (V64QI "8") (V32QI "8") (V16QI "8")
726 (V32HI "16") (V16HI "16") (V8HI "16")
727 (V16SI "32") (V8SI "32") (V4SI "32")
728 (V16SF "32") (V8DF "64")])
729
730 ;; SSE prefix for integer vector modes
731 (define_mode_attr sseintprefix
732 [(V2DI "p") (V2DF "")
733 (V4DI "p") (V4DF "")
734 (V8DI "p") (V8DF "")
735 (V4SI "p") (V4SF "")
736 (V8SI "p") (V8SF "")
737 (V16SI "p") (V16SF "")
738 (V16QI "p") (V8HI "p")
739 (V32QI "p") (V16HI "p")
740 (V64QI "p") (V32HI "p")])
741
742 ;; SSE scalar suffix for vector modes
743 (define_mode_attr ssescalarmodesuffix
744 [(SF "ss") (DF "sd")
745 (V8SF "ss") (V4DF "sd")
746 (V4SF "ss") (V2DF "sd")
747 (V8SI "ss") (V4DI "sd")
748 (V4SI "d")])
749
750 ;; Pack/unpack vector modes
751 (define_mode_attr sseunpackmode
752 [(V16QI "V8HI") (V8HI "V4SI") (V4SI "V2DI")
753 (V32QI "V16HI") (V16HI "V8SI") (V8SI "V4DI")
754 (V32HI "V16SI") (V64QI "V32HI") (V16SI "V8DI")])
755
756 (define_mode_attr ssepackmode
757 [(V8HI "V16QI") (V4SI "V8HI") (V2DI "V4SI")
758 (V16HI "V32QI") (V8SI "V16HI") (V4DI "V8SI")
759 (V32HI "V64QI") (V16SI "V32HI") (V8DI "V16SI")])
760
761 ;; Mapping of the max integer size for xop rotate immediate constraint
762 (define_mode_attr sserotatemax
763 [(V16QI "7") (V8HI "15") (V4SI "31") (V2DI "63")])
764
765 ;; Mapping of mode to cast intrinsic name
766 (define_mode_attr castmode
767 [(V8SI "si") (V8SF "ps") (V4DF "pd")
768 (V16SI "si") (V16SF "ps") (V8DF "pd")])
769
770 ;; Instruction suffix for sign and zero extensions.
771 (define_code_attr extsuffix [(sign_extend "sx") (zero_extend "zx")])
772
773 ;; i128 for integer vectors and TARGET_AVX2, f128 otherwise.
774 ;; i64x4 or f64x4 for 512bit modes.
775 (define_mode_attr i128
776 [(V16SF "f64x4") (V8SF "f128") (V8DF "f64x4") (V4DF "f128")
777 (V64QI "i64x4") (V32QI "%~128") (V32HI "i64x4") (V16HI "%~128")
778 (V16SI "i64x4") (V8SI "%~128") (V8DI "i64x4") (V4DI "%~128")])
779
780 ;; Mix-n-match
781 (define_mode_iterator AVX256MODE2P [V8SI V8SF V4DF])
782 (define_mode_iterator AVX512MODE2P [V16SI V16SF V8DF])
783
784 ;; Mapping for dbpsabbw modes
785 (define_mode_attr dbpsadbwmode
786 [(V32HI "V64QI") (V16HI "V32QI") (V8HI "V16QI")])
787
788 ;; Mapping suffixes for broadcast
789 (define_mode_attr bcstscalarsuff
790 [(V64QI "b") (V32QI "b") (V16QI "b")
791 (V32HI "w") (V16HI "w") (V8HI "w")
792 (V16SI "d") (V8SI "d") (V4SI "d")
793 (V8DI "q") (V4DI "q") (V2DI "q")
794 (V16SF "ss") (V8SF "ss") (V4SF "ss")
795 (V8DF "sd") (V4DF "sd") (V2DF "sd")])
796
797 ;; Tie mode of assembler operand to mode iterator
798 (define_mode_attr concat_tg_mode
799 [(V32QI "t") (V16HI "t") (V8SI "t") (V4DI "t") (V8SF "t") (V4DF "t")
800 (V64QI "g") (V32HI "g") (V16SI "g") (V8DI "g") (V16SF "g") (V8DF "g")])
801
802 ;; Half mask mode for unpacks
803 (define_mode_attr HALFMASKMODE
804 [(DI "SI") (SI "HI")])
805
806 ;; Double mask mode for packs
807 (define_mode_attr DOUBLEMASKMODE
808 [(HI "SI") (SI "DI")])
809
810
811 ;; Include define_subst patterns for instructions with mask
812 (include "subst.md")
813
814 ;; Patterns whose name begins with "sse{,2,3}_" are invoked by intrinsics.
815
816 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
817 ;;
818 ;; Move patterns
819 ;;
820 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
821
822 ;; All of these patterns are enabled for SSE1 as well as SSE2.
823 ;; This is essential for maintaining stable calling conventions.
824
825 (define_expand "mov<mode>"
826 [(set (match_operand:VMOVE 0 "nonimmediate_operand")
827 (match_operand:VMOVE 1 "nonimmediate_operand"))]
828 "TARGET_SSE"
829 {
830 ix86_expand_vector_move (<MODE>mode, operands);
831 DONE;
832 })
833
834 (define_insn "*mov<mode>_internal"
835 [(set (match_operand:VMOVE 0 "nonimmediate_operand" "=v,v ,m")
836 (match_operand:VMOVE 1 "nonimmediate_or_sse_const_operand" "C ,vm,v"))]
837 "TARGET_SSE
838 && (register_operand (operands[0], <MODE>mode)
839 || register_operand (operands[1], <MODE>mode))"
840 {
841 int mode = get_attr_mode (insn);
842 switch (which_alternative)
843 {
844 case 0:
845 return standard_sse_constant_opcode (insn, operands[1]);
846 case 1:
847 case 2:
848 /* There is no evex-encoded vmov* for sizes smaller than 64-bytes
849 in avx512f, so we need to use workarounds, to access sse registers
850 16-31, which are evex-only. In avx512vl we don't need workarounds. */
851 if (TARGET_AVX512F && <MODE_SIZE> < 64 && !TARGET_AVX512VL
852 && ((REG_P (operands[0]) && EXT_REX_SSE_REGNO_P (REGNO (operands[0])))
853 || (REG_P (operands[1]) && EXT_REX_SSE_REGNO_P (REGNO (operands[1])))))
854 {
855 if (memory_operand (operands[0], <MODE>mode))
856 {
857 if (<MODE_SIZE> == 32)
858 return "vextract<shuffletype>64x4\t{$0x0, %g1, %0|%0, %g1, 0x0}";
859 else if (<MODE_SIZE> == 16)
860 return "vextract<shuffletype>32x4\t{$0x0, %g1, %0|%0, %g1, 0x0}";
861 else
862 gcc_unreachable ();
863 }
864 else if (memory_operand (operands[1], <MODE>mode))
865 {
866 if (<MODE_SIZE> == 32)
867 return "vbroadcast<shuffletype>64x4\t{%1, %g0|%g0, %1}";
868 else if (<MODE_SIZE> == 16)
869 return "vbroadcast<shuffletype>32x4\t{%1, %g0|%g0, %1}";
870 else
871 gcc_unreachable ();
872 }
873 else
874 /* Reg -> reg move is always aligned. Just use wider move. */
875 switch (mode)
876 {
877 case MODE_V8SF:
878 case MODE_V4SF:
879 return "vmovaps\t{%g1, %g0|%g0, %g1}";
880 case MODE_V4DF:
881 case MODE_V2DF:
882 return "vmovapd\t{%g1, %g0|%g0, %g1}";
883 case MODE_OI:
884 case MODE_TI:
885 return "vmovdqa64\t{%g1, %g0|%g0, %g1}";
886 default:
887 gcc_unreachable ();
888 }
889 }
890 switch (mode)
891 {
892 case MODE_V16SF:
893 case MODE_V8SF:
894 case MODE_V4SF:
895 if ((TARGET_AVX || TARGET_IAMCU)
896 && (misaligned_operand (operands[0], <MODE>mode)
897 || misaligned_operand (operands[1], <MODE>mode)))
898 return "%vmovups\t{%1, %0|%0, %1}";
899 else
900 return "%vmovaps\t{%1, %0|%0, %1}";
901
902 case MODE_V8DF:
903 case MODE_V4DF:
904 case MODE_V2DF:
905 if ((TARGET_AVX || TARGET_IAMCU)
906 && (misaligned_operand (operands[0], <MODE>mode)
907 || misaligned_operand (operands[1], <MODE>mode)))
908 return "%vmovupd\t{%1, %0|%0, %1}";
909 else
910 return "%vmovapd\t{%1, %0|%0, %1}";
911
912 case MODE_OI:
913 case MODE_TI:
914 if ((TARGET_AVX || TARGET_IAMCU)
915 && (misaligned_operand (operands[0], <MODE>mode)
916 || misaligned_operand (operands[1], <MODE>mode)))
917 return TARGET_AVX512VL ? "vmovdqu64\t{%1, %0|%0, %1}"
918 : "%vmovdqu\t{%1, %0|%0, %1}";
919 else
920 return TARGET_AVX512VL ? "vmovdqa64\t{%1, %0|%0, %1}"
921 : "%vmovdqa\t{%1, %0|%0, %1}";
922 case MODE_XI:
923 if (misaligned_operand (operands[0], <MODE>mode)
924 || misaligned_operand (operands[1], <MODE>mode))
925 return "vmovdqu64\t{%1, %0|%0, %1}";
926 else
927 return "vmovdqa64\t{%1, %0|%0, %1}";
928
929 default:
930 gcc_unreachable ();
931 }
932 default:
933 gcc_unreachable ();
934 }
935 }
936 [(set_attr "type" "sselog1,ssemov,ssemov")
937 (set_attr "prefix" "maybe_vex")
938 (set (attr "mode")
939 (cond [(and (match_test "<MODE_SIZE> == 16")
940 (ior (match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL")
941 (and (eq_attr "alternative" "2")
942 (match_test "TARGET_SSE_TYPELESS_STORES"))))
943 (const_string "<ssePSmode>")
944 (match_test "TARGET_AVX")
945 (const_string "<sseinsnmode>")
946 (ior (not (match_test "TARGET_SSE2"))
947 (match_test "optimize_function_for_size_p (cfun)"))
948 (const_string "V4SF")
949 (and (eq_attr "alternative" "0")
950 (match_test "TARGET_SSE_LOAD0_BY_PXOR"))
951 (const_string "TI")
952 ]
953 (const_string "<sseinsnmode>")))])
954
955 (define_insn "<avx512>_load<mode>_mask"
956 [(set (match_operand:V48_AVX512VL 0 "register_operand" "=v,v")
957 (vec_merge:V48_AVX512VL
958 (match_operand:V48_AVX512VL 1 "nonimmediate_operand" "v,m")
959 (match_operand:V48_AVX512VL 2 "vector_move_operand" "0C,0C")
960 (match_operand:<avx512fmaskmode> 3 "register_operand" "Yk,Yk")))]
961 "TARGET_AVX512F"
962 {
963 static char buf [64];
964
965 const char *insn_op;
966 const char *sse_suffix;
967 const char *align;
968 if (FLOAT_MODE_P (GET_MODE_INNER (<MODE>mode)))
969 {
970 insn_op = "vmov";
971 sse_suffix = "<ssemodesuffix>";
972 }
973 else
974 {
975 insn_op = "vmovdq";
976 sse_suffix = "<ssescalarsize>";
977 }
978
979 if (misaligned_operand (operands[1], <MODE>mode))
980 align = "u";
981 else
982 align = "a";
983
984 snprintf (buf, sizeof (buf), "%s%s%s\t{%%1, %%0%%{%%3%%}%%N2|%%0%%{%%3%%}%%N2, %%1}",
985 insn_op, align, sse_suffix);
986 return buf;
987 }
988 [(set_attr "type" "ssemov")
989 (set_attr "prefix" "evex")
990 (set_attr "memory" "none,load")
991 (set_attr "mode" "<sseinsnmode>")])
992
993 (define_insn "<avx512>_load<mode>_mask"
994 [(set (match_operand:VI12_AVX512VL 0 "register_operand" "=v,v")
995 (vec_merge:VI12_AVX512VL
996 (match_operand:VI12_AVX512VL 1 "nonimmediate_operand" "v,m")
997 (match_operand:VI12_AVX512VL 2 "vector_move_operand" "0C,0C")
998 (match_operand:<avx512fmaskmode> 3 "register_operand" "Yk,Yk")))]
999 "TARGET_AVX512BW"
1000 "vmovdqu<ssescalarsize>\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}"
1001 [(set_attr "type" "ssemov")
1002 (set_attr "prefix" "evex")
1003 (set_attr "memory" "none,load")
1004 (set_attr "mode" "<sseinsnmode>")])
1005
1006 (define_insn "<avx512>_blendm<mode>"
1007 [(set (match_operand:V48_AVX512VL 0 "register_operand" "=v")
1008 (vec_merge:V48_AVX512VL
1009 (match_operand:V48_AVX512VL 2 "nonimmediate_operand" "vm")
1010 (match_operand:V48_AVX512VL 1 "register_operand" "v")
1011 (match_operand:<avx512fmaskmode> 3 "register_operand" "Yk")))]
1012 "TARGET_AVX512F"
1013 "vblendm<ssemodesuffix>\t{%2, %1, %0%{%3%}|%0%{%3%}, %1, %2}"
1014 [(set_attr "type" "ssemov")
1015 (set_attr "prefix" "evex")
1016 (set_attr "mode" "<sseinsnmode>")])
1017
1018 (define_insn "<avx512>_blendm<mode>"
1019 [(set (match_operand:VI12_AVX512VL 0 "register_operand" "=v")
1020 (vec_merge:VI12_AVX512VL
1021 (match_operand:VI12_AVX512VL 2 "nonimmediate_operand" "vm")
1022 (match_operand:VI12_AVX512VL 1 "register_operand" "v")
1023 (match_operand:<avx512fmaskmode> 3 "register_operand" "Yk")))]
1024 "TARGET_AVX512BW"
1025 "vpblendm<ssemodesuffix>\t{%2, %1, %0%{%3%}|%0%{%3%}, %1, %2}"
1026 [(set_attr "type" "ssemov")
1027 (set_attr "prefix" "evex")
1028 (set_attr "mode" "<sseinsnmode>")])
1029
1030 (define_insn "<avx512>_store<mode>_mask"
1031 [(set (match_operand:V48_AVX512VL 0 "memory_operand" "=m")
1032 (vec_merge:V48_AVX512VL
1033 (match_operand:V48_AVX512VL 1 "register_operand" "v")
1034 (match_dup 0)
1035 (match_operand:<avx512fmaskmode> 2 "register_operand" "Yk")))]
1036 "TARGET_AVX512F"
1037 {
1038 static char buf [64];
1039
1040 const char *insn_op;
1041 const char *sse_suffix;
1042 const char *align;
1043 if (FLOAT_MODE_P (GET_MODE_INNER (<MODE>mode)))
1044 {
1045 insn_op = "vmov";
1046 sse_suffix = "<ssemodesuffix>";
1047 }
1048 else
1049 {
1050 insn_op = "vmovdq";
1051 sse_suffix = "<ssescalarsize>";
1052 }
1053
1054 if (misaligned_operand (operands[0], <MODE>mode))
1055 align = "u";
1056 else
1057 align = "a";
1058
1059 snprintf (buf, sizeof (buf), "%s%s%s\t{%%1, %%0%%{%%2%%}|%%0%%{%%2%%}, %%1}",
1060 insn_op, align, sse_suffix);
1061 return buf;
1062 }
1063 [(set_attr "type" "ssemov")
1064 (set_attr "prefix" "evex")
1065 (set_attr "memory" "store")
1066 (set_attr "mode" "<sseinsnmode>")])
1067
1068 (define_insn "<avx512>_store<mode>_mask"
1069 [(set (match_operand:VI12_AVX512VL 0 "memory_operand" "=m")
1070 (vec_merge:VI12_AVX512VL
1071 (match_operand:VI12_AVX512VL 1 "register_operand" "v")
1072 (match_dup 0)
1073 (match_operand:<avx512fmaskmode> 2 "register_operand" "Yk")))]
1074 "TARGET_AVX512BW"
1075 "vmovdqu<ssescalarsize>\t{%1, %0%{%2%}|%0%{%2%}, %1}"
1076 [(set_attr "type" "ssemov")
1077 (set_attr "prefix" "evex")
1078 (set_attr "memory" "store")
1079 (set_attr "mode" "<sseinsnmode>")])
1080
1081 (define_insn "sse2_movq128"
1082 [(set (match_operand:V2DI 0 "register_operand" "=x")
1083 (vec_concat:V2DI
1084 (vec_select:DI
1085 (match_operand:V2DI 1 "nonimmediate_operand" "xm")
1086 (parallel [(const_int 0)]))
1087 (const_int 0)))]
1088 "TARGET_SSE2"
1089 "%vmovq\t{%1, %0|%0, %q1}"
1090 [(set_attr "type" "ssemov")
1091 (set_attr "prefix" "maybe_vex")
1092 (set_attr "mode" "TI")])
1093
1094 ;; Move a DI from a 32-bit register pair (e.g. %edx:%eax) to an xmm.
1095 ;; We'd rather avoid this entirely; if the 32-bit reg pair was loaded
1096 ;; from memory, we'd prefer to load the memory directly into the %xmm
1097 ;; register. To facilitate this happy circumstance, this pattern won't
1098 ;; split until after register allocation. If the 64-bit value didn't
1099 ;; come from memory, this is the best we can do. This is much better
1100 ;; than storing %edx:%eax into a stack temporary and loading an %xmm
1101 ;; from there.
1102
1103 (define_insn_and_split "movdi_to_sse"
1104 [(parallel
1105 [(set (match_operand:V4SI 0 "register_operand" "=?x,x")
1106 (subreg:V4SI (match_operand:DI 1 "nonimmediate_operand" "r,m") 0))
1107 (clobber (match_scratch:V4SI 2 "=&x,X"))])]
1108 "!TARGET_64BIT && TARGET_SSE2 && TARGET_INTER_UNIT_MOVES_TO_VEC"
1109 "#"
1110 "&& reload_completed"
1111 [(const_int 0)]
1112 {
1113 if (register_operand (operands[1], DImode))
1114 {
1115 /* The DImode arrived in a pair of integral registers (e.g. %edx:%eax).
1116 Assemble the 64-bit DImode value in an xmm register. */
1117 emit_insn (gen_sse2_loadld (operands[0], CONST0_RTX (V4SImode),
1118 gen_lowpart (SImode, operands[1])));
1119 emit_insn (gen_sse2_loadld (operands[2], CONST0_RTX (V4SImode),
1120 gen_highpart (SImode, operands[1])));
1121 emit_insn (gen_vec_interleave_lowv4si (operands[0], operands[0],
1122 operands[2]));
1123 }
1124 else if (memory_operand (operands[1], DImode))
1125 {
1126 rtx tmp = gen_reg_rtx (V2DImode);
1127 emit_insn (gen_vec_concatv2di (tmp, operands[1], const0_rtx));
1128 emit_move_insn (operands[0], gen_lowpart (V4SImode, tmp));
1129 }
1130 else
1131 gcc_unreachable ();
1132 })
1133
1134 (define_split
1135 [(set (match_operand:V4SF 0 "register_operand")
1136 (match_operand:V4SF 1 "zero_extended_scalar_load_operand"))]
1137 "TARGET_SSE && reload_completed"
1138 [(set (match_dup 0)
1139 (vec_merge:V4SF
1140 (vec_duplicate:V4SF (match_dup 1))
1141 (match_dup 2)
1142 (const_int 1)))]
1143 {
1144 operands[1] = simplify_gen_subreg (SFmode, operands[1], V4SFmode, 0);
1145 operands[2] = CONST0_RTX (V4SFmode);
1146 })
1147
1148 (define_split
1149 [(set (match_operand:V2DF 0 "register_operand")
1150 (match_operand:V2DF 1 "zero_extended_scalar_load_operand"))]
1151 "TARGET_SSE2 && reload_completed"
1152 [(set (match_dup 0) (vec_concat:V2DF (match_dup 1) (match_dup 2)))]
1153 {
1154 operands[1] = simplify_gen_subreg (DFmode, operands[1], V2DFmode, 0);
1155 operands[2] = CONST0_RTX (DFmode);
1156 })
1157
1158 (define_expand "movmisalign<mode>"
1159 [(set (match_operand:VMOVE 0 "nonimmediate_operand")
1160 (match_operand:VMOVE 1 "nonimmediate_operand"))]
1161 "TARGET_SSE"
1162 {
1163 ix86_expand_vector_move_misalign (<MODE>mode, operands);
1164 DONE;
1165 })
1166
1167 (define_expand "<sse>_loadu<ssemodesuffix><avxsizesuffix><mask_name>"
1168 [(set (match_operand:VF 0 "register_operand")
1169 (unspec:VF [(match_operand:VF 1 "nonimmediate_operand")]
1170 UNSPEC_LOADU))]
1171 "TARGET_SSE && <mask_mode512bit_condition>"
1172 {
1173 /* For AVX, normal *mov<mode>_internal pattern will handle unaligned loads
1174 just fine if misaligned_operand is true, and without the UNSPEC it can
1175 be combined with arithmetic instructions. If misaligned_operand is
1176 false, still emit UNSPEC_LOADU insn to honor user's request for
1177 misaligned load. */
1178 if (TARGET_AVX
1179 && misaligned_operand (operands[1], <MODE>mode))
1180 {
1181 rtx src = operands[1];
1182 if (<mask_applied>)
1183 src = gen_rtx_VEC_MERGE (<MODE>mode, operands[1],
1184 operands[2 * <mask_applied>],
1185 operands[3 * <mask_applied>]);
1186 emit_insn (gen_rtx_SET (operands[0], src));
1187 DONE;
1188 }
1189 })
1190
1191 (define_insn "*<sse>_loadu<ssemodesuffix><avxsizesuffix><mask_name>"
1192 [(set (match_operand:VF 0 "register_operand" "=v")
1193 (unspec:VF
1194 [(match_operand:VF 1 "nonimmediate_operand" "vm")]
1195 UNSPEC_LOADU))]
1196 "TARGET_SSE && <mask_mode512bit_condition>"
1197 {
1198 switch (get_attr_mode (insn))
1199 {
1200 case MODE_V16SF:
1201 case MODE_V8SF:
1202 case MODE_V4SF:
1203 return "%vmovups\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}";
1204 default:
1205 return "%vmovu<ssemodesuffix>\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}";
1206 }
1207 }
1208 [(set_attr "type" "ssemov")
1209 (set_attr "movu" "1")
1210 (set_attr "ssememalign" "8")
1211 (set_attr "prefix" "maybe_vex")
1212 (set (attr "mode")
1213 (cond [(and (match_test "<MODE_SIZE> == 16")
1214 (match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL"))
1215 (const_string "<ssePSmode>")
1216 (match_test "TARGET_AVX")
1217 (const_string "<MODE>")
1218 (match_test "optimize_function_for_size_p (cfun)")
1219 (const_string "V4SF")
1220 ]
1221 (const_string "<MODE>")))])
1222
1223 ;; Merge movsd/movhpd to movupd for TARGET_SSE_UNALIGNED_LOAD_OPTIMAL targets.
1224 (define_peephole2
1225 [(set (match_operand:V2DF 0 "register_operand")
1226 (vec_concat:V2DF (match_operand:DF 1 "memory_operand")
1227 (match_operand:DF 4 "const0_operand")))
1228 (set (match_operand:V2DF 2 "register_operand")
1229 (vec_concat:V2DF (vec_select:DF (match_dup 2)
1230 (parallel [(const_int 0)]))
1231 (match_operand:DF 3 "memory_operand")))]
1232 "TARGET_SSE2 && TARGET_SSE_UNALIGNED_LOAD_OPTIMAL
1233 && ix86_operands_ok_for_move_multiple (operands, true, DFmode)"
1234 [(set (match_dup 2)
1235 (unspec:V2DF [(match_dup 4)] UNSPEC_LOADU))]
1236 "operands[4] = adjust_address (operands[1], V2DFmode, 0);")
1237
1238 (define_insn "<sse>_storeu<ssemodesuffix><avxsizesuffix>"
1239 [(set (match_operand:VF 0 "memory_operand" "=m")
1240 (unspec:VF
1241 [(match_operand:VF 1 "register_operand" "v")]
1242 UNSPEC_STOREU))]
1243 "TARGET_SSE"
1244 {
1245 switch (get_attr_mode (insn))
1246 {
1247 case MODE_V16SF:
1248 case MODE_V8SF:
1249 case MODE_V4SF:
1250 return "%vmovups\t{%1, %0|%0, %1}";
1251 default:
1252 return "%vmovu<ssemodesuffix>\t{%1, %0|%0, %1}";
1253 }
1254 }
1255 [(set_attr "type" "ssemov")
1256 (set_attr "movu" "1")
1257 (set_attr "ssememalign" "8")
1258 (set_attr "prefix" "maybe_vex")
1259 (set (attr "mode")
1260 (cond [(and (match_test "<MODE_SIZE> == 16")
1261 (ior (match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL")
1262 (match_test "TARGET_SSE_TYPELESS_STORES")))
1263 (const_string "<ssePSmode>")
1264 (match_test "TARGET_AVX")
1265 (const_string "<MODE>")
1266 (match_test "optimize_function_for_size_p (cfun)")
1267 (const_string "V4SF")
1268 ]
1269 (const_string "<MODE>")))])
1270
1271 (define_insn "<avx512>_storeu<ssemodesuffix><avxsizesuffix>_mask"
1272 [(set (match_operand:VF_AVX512VL 0 "memory_operand" "=m")
1273 (vec_merge:VF_AVX512VL
1274 (unspec:VF_AVX512VL
1275 [(match_operand:VF_AVX512VL 1 "register_operand" "v")]
1276 UNSPEC_STOREU)
1277 (match_dup 0)
1278 (match_operand:<avx512fmaskmode> 2 "register_operand" "Yk")))]
1279 "TARGET_AVX512F"
1280 {
1281 switch (get_attr_mode (insn))
1282 {
1283 case MODE_V16SF:
1284 case MODE_V8SF:
1285 case MODE_V4SF:
1286 return "vmovups\t{%1, %0%{%2%}|%0%{%2%}, %1}";
1287 default:
1288 return "vmovu<ssemodesuffix>\t{%1, %0%{%2%}|%0%{%2%}, %1}";
1289 }
1290 }
1291 [(set_attr "type" "ssemov")
1292 (set_attr "movu" "1")
1293 (set_attr "memory" "store")
1294 (set_attr "prefix" "evex")
1295 (set_attr "mode" "<sseinsnmode>")])
1296
1297 ;; Merge movlpd/movhpd to movupd for TARGET_SSE_UNALIGNED_STORE_OPTIMAL targets.
1298 (define_peephole2
1299 [(set (match_operand:DF 0 "memory_operand")
1300 (vec_select:DF (match_operand:V2DF 1 "register_operand")
1301 (parallel [(const_int 0)])))
1302 (set (match_operand:DF 2 "memory_operand")
1303 (vec_select:DF (match_operand:V2DF 3 "register_operand")
1304 (parallel [(const_int 1)])))]
1305 "TARGET_SSE2 && TARGET_SSE_UNALIGNED_STORE_OPTIMAL
1306 && ix86_operands_ok_for_move_multiple (operands, false, DFmode)"
1307 [(set (match_dup 4)
1308 (unspec:V2DF [(match_dup 1)] UNSPEC_STOREU))]
1309 "operands[4] = adjust_address (operands[0], V2DFmode, 0);")
1310
1311 /* For AVX, normal *mov<mode>_internal pattern will handle unaligned loads
1312 just fine if misaligned_operand is true, and without the UNSPEC it can
1313 be combined with arithmetic instructions. If misaligned_operand is
1314 false, still emit UNSPEC_LOADU insn to honor user's request for
1315 misaligned load. */
1316 (define_expand "<sse2_avx_avx512f>_loaddqu<mode><mask_name>"
1317 [(set (match_operand:VI1 0 "register_operand")
1318 (unspec:VI1
1319 [(match_operand:VI1 1 "nonimmediate_operand")]
1320 UNSPEC_LOADU))]
1321 "TARGET_SSE2 && <mask_avx512vl_condition> && <mask_avx512bw_condition>"
1322 {
1323 if (TARGET_AVX
1324 && misaligned_operand (operands[1], <MODE>mode))
1325 {
1326 rtx src = operands[1];
1327 if (<mask_applied>)
1328 src = gen_rtx_VEC_MERGE (<MODE>mode, operands[1],
1329 operands[2 * <mask_applied>],
1330 operands[3 * <mask_applied>]);
1331 emit_insn (gen_rtx_SET (operands[0], src));
1332 DONE;
1333 }
1334 })
1335
1336 (define_expand "<sse2_avx_avx512f>_loaddqu<mode><mask_name>"
1337 [(set (match_operand:VI_ULOADSTORE_BW_AVX512VL 0 "register_operand")
1338 (unspec:VI_ULOADSTORE_BW_AVX512VL
1339 [(match_operand:VI_ULOADSTORE_BW_AVX512VL 1 "nonimmediate_operand")]
1340 UNSPEC_LOADU))]
1341 "TARGET_AVX512BW"
1342 {
1343 if (misaligned_operand (operands[1], <MODE>mode))
1344 {
1345 rtx src = operands[1];
1346 if (<mask_applied>)
1347 src = gen_rtx_VEC_MERGE (<MODE>mode, operands[1],
1348 operands[2 * <mask_applied>],
1349 operands[3 * <mask_applied>]);
1350 emit_insn (gen_rtx_SET (operands[0], src));
1351 DONE;
1352 }
1353 })
1354
1355 (define_expand "<sse2_avx_avx512f>_loaddqu<mode><mask_name>"
1356 [(set (match_operand:VI_ULOADSTORE_F_AVX512VL 0 "register_operand")
1357 (unspec:VI_ULOADSTORE_F_AVX512VL
1358 [(match_operand:VI_ULOADSTORE_F_AVX512VL 1 "nonimmediate_operand")]
1359 UNSPEC_LOADU))]
1360 "TARGET_AVX512F"
1361 {
1362 if (misaligned_operand (operands[1], <MODE>mode))
1363 {
1364 rtx src = operands[1];
1365 if (<mask_applied>)
1366 src = gen_rtx_VEC_MERGE (<MODE>mode, operands[1],
1367 operands[2 * <mask_applied>],
1368 operands[3 * <mask_applied>]);
1369 emit_insn (gen_rtx_SET (operands[0], src));
1370 DONE;
1371 }
1372 })
1373
1374 (define_insn "*<sse2_avx_avx512f>_loaddqu<mode><mask_name>"
1375 [(set (match_operand:VI1 0 "register_operand" "=v")
1376 (unspec:VI1
1377 [(match_operand:VI1 1 "nonimmediate_operand" "vm")]
1378 UNSPEC_LOADU))]
1379 "TARGET_SSE2 && <mask_avx512vl_condition> && <mask_avx512bw_condition>"
1380 {
1381 switch (get_attr_mode (insn))
1382 {
1383 case MODE_V8SF:
1384 case MODE_V4SF:
1385 return "%vmovups\t{%1, %0|%0, %1}";
1386 default:
1387 if (!(TARGET_AVX512VL && TARGET_AVX512BW))
1388 return "%vmovdqu\t{%1, %0|%0, %1}";
1389 else
1390 return "vmovdqu<ssescalarsize>\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}";
1391 }
1392 }
1393 [(set_attr "type" "ssemov")
1394 (set_attr "movu" "1")
1395 (set_attr "ssememalign" "8")
1396 (set (attr "prefix_data16")
1397 (if_then_else
1398 (match_test "TARGET_AVX")
1399 (const_string "*")
1400 (const_string "1")))
1401 (set_attr "prefix" "maybe_vex")
1402 (set (attr "mode")
1403 (cond [(and (match_test "<MODE_SIZE> == 16")
1404 (match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL"))
1405 (const_string "<ssePSmode>")
1406 (match_test "TARGET_AVX")
1407 (const_string "<sseinsnmode>")
1408 (match_test "optimize_function_for_size_p (cfun)")
1409 (const_string "V4SF")
1410 ]
1411 (const_string "<sseinsnmode>")))])
1412
1413 (define_insn "*<sse2_avx_avx512f>_loaddqu<mode><mask_name>"
1414 [(set (match_operand:VI_ULOADSTORE_BW_AVX512VL 0 "register_operand" "=v")
1415 (unspec:VI_ULOADSTORE_BW_AVX512VL
1416 [(match_operand:VI_ULOADSTORE_BW_AVX512VL 1 "nonimmediate_operand" "vm")]
1417 UNSPEC_LOADU))]
1418 "TARGET_AVX512BW"
1419 "vmovdqu<ssescalarsize>\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}";
1420 [(set_attr "type" "ssemov")
1421 (set_attr "movu" "1")
1422 (set_attr "ssememalign" "8")
1423 (set_attr "prefix" "maybe_evex")])
1424
1425 (define_insn "*<sse2_avx_avx512f>_loaddqu<mode><mask_name>"
1426 [(set (match_operand:VI_ULOADSTORE_F_AVX512VL 0 "register_operand" "=v")
1427 (unspec:VI_ULOADSTORE_F_AVX512VL
1428 [(match_operand:VI_ULOADSTORE_F_AVX512VL 1 "nonimmediate_operand" "vm")]
1429 UNSPEC_LOADU))]
1430 "TARGET_AVX512F"
1431 "vmovdqu<ssescalarsize>\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}";
1432 [(set_attr "type" "ssemov")
1433 (set_attr "movu" "1")
1434 (set_attr "ssememalign" "8")
1435 (set_attr "prefix" "maybe_evex")])
1436
1437 (define_insn "<sse2_avx_avx512f>_storedqu<mode>"
1438 [(set (match_operand:VI1 0 "memory_operand" "=m")
1439 (unspec:VI1
1440 [(match_operand:VI1 1 "register_operand" "v")]
1441 UNSPEC_STOREU))]
1442 "TARGET_SSE2"
1443 {
1444 switch (get_attr_mode (insn))
1445 {
1446 case MODE_V16SF:
1447 case MODE_V8SF:
1448 case MODE_V4SF:
1449 return "%vmovups\t{%1, %0|%0, %1}";
1450 default:
1451 switch (<MODE>mode)
1452 {
1453 case V32QImode:
1454 case V16QImode:
1455 if (!(TARGET_AVX512VL && TARGET_AVX512BW))
1456 return "%vmovdqu\t{%1, %0|%0, %1}";
1457 default:
1458 return "vmovdqu<ssescalarsize>\t{%1, %0|%0, %1}";
1459 }
1460 }
1461 }
1462 [(set_attr "type" "ssemov")
1463 (set_attr "movu" "1")
1464 (set_attr "ssememalign" "8")
1465 (set (attr "prefix_data16")
1466 (if_then_else
1467 (match_test "TARGET_AVX")
1468 (const_string "*")
1469 (const_string "1")))
1470 (set_attr "prefix" "maybe_vex")
1471 (set (attr "mode")
1472 (cond [(and (match_test "<MODE_SIZE> == 16")
1473 (ior (match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL")
1474 (match_test "TARGET_SSE_TYPELESS_STORES")))
1475 (const_string "<ssePSmode>")
1476 (match_test "TARGET_AVX")
1477 (const_string "<sseinsnmode>")
1478 (match_test "optimize_function_for_size_p (cfun)")
1479 (const_string "V4SF")
1480 ]
1481 (const_string "<sseinsnmode>")))])
1482
1483 (define_insn "<sse2_avx_avx512f>_storedqu<mode>"
1484 [(set (match_operand:VI_ULOADSTORE_BW_AVX512VL 0 "memory_operand" "=m")
1485 (unspec:VI_ULOADSTORE_BW_AVX512VL
1486 [(match_operand:VI_ULOADSTORE_BW_AVX512VL 1 "register_operand" "v")]
1487 UNSPEC_STOREU))]
1488 "TARGET_AVX512BW"
1489 "vmovdqu<ssescalarsize>\t{%1, %0|%0, %1}"
1490 [(set_attr "type" "ssemov")
1491 (set_attr "movu" "1")
1492 (set_attr "ssememalign" "8")
1493 (set_attr "prefix" "maybe_evex")])
1494
1495 (define_insn "<sse2_avx_avx512f>_storedqu<mode>"
1496 [(set (match_operand:VI_ULOADSTORE_F_AVX512VL 0 "memory_operand" "=m")
1497 (unspec:VI_ULOADSTORE_F_AVX512VL
1498 [(match_operand:VI_ULOADSTORE_F_AVX512VL 1 "register_operand" "v")]
1499 UNSPEC_STOREU))]
1500 "TARGET_AVX512F"
1501 "vmovdqu<ssescalarsize>\t{%1, %0|%0, %1}"
1502 [(set_attr "type" "ssemov")
1503 (set_attr "movu" "1")
1504 (set_attr "ssememalign" "8")
1505 (set_attr "prefix" "maybe_vex")])
1506
1507 (define_insn "<avx512>_storedqu<mode>_mask"
1508 [(set (match_operand:VI48_AVX512VL 0 "memory_operand" "=m")
1509 (vec_merge:VI48_AVX512VL
1510 (unspec:VI48_AVX512VL
1511 [(match_operand:VI48_AVX512VL 1 "register_operand" "v")]
1512 UNSPEC_STOREU)
1513 (match_dup 0)
1514 (match_operand:<avx512fmaskmode> 2 "register_operand" "Yk")))]
1515 "TARGET_AVX512F"
1516 "vmovdqu<ssescalarsize>\t{%1, %0%{%2%}|%0%{%2%}, %1}"
1517 [(set_attr "type" "ssemov")
1518 (set_attr "movu" "1")
1519 (set_attr "memory" "store")
1520 (set_attr "prefix" "evex")
1521 (set_attr "mode" "<sseinsnmode>")])
1522
1523 (define_insn "<avx512>_storedqu<mode>_mask"
1524 [(set (match_operand:VI12_AVX512VL 0 "memory_operand" "=m")
1525 (vec_merge:VI12_AVX512VL
1526 (unspec:VI12_AVX512VL
1527 [(match_operand:VI12_AVX512VL 1 "register_operand" "v")]
1528 UNSPEC_STOREU)
1529 (match_dup 0)
1530 (match_operand:<avx512fmaskmode> 2 "register_operand" "Yk")))]
1531 "TARGET_AVX512BW"
1532 "vmovdqu<ssescalarsize>\t{%1, %0%{%2%}|%0%{%2%}, %1}"
1533 [(set_attr "type" "ssemov")
1534 (set_attr "movu" "1")
1535 (set_attr "memory" "store")
1536 (set_attr "prefix" "evex")
1537 (set_attr "mode" "<sseinsnmode>")])
1538
1539 (define_insn "<sse3>_lddqu<avxsizesuffix>"
1540 [(set (match_operand:VI1 0 "register_operand" "=x")
1541 (unspec:VI1 [(match_operand:VI1 1 "memory_operand" "m")]
1542 UNSPEC_LDDQU))]
1543 "TARGET_SSE3"
1544 "%vlddqu\t{%1, %0|%0, %1}"
1545 [(set_attr "type" "ssemov")
1546 (set_attr "movu" "1")
1547 (set_attr "ssememalign" "8")
1548 (set (attr "prefix_data16")
1549 (if_then_else
1550 (match_test "TARGET_AVX")
1551 (const_string "*")
1552 (const_string "0")))
1553 (set (attr "prefix_rep")
1554 (if_then_else
1555 (match_test "TARGET_AVX")
1556 (const_string "*")
1557 (const_string "1")))
1558 (set_attr "prefix" "maybe_vex")
1559 (set_attr "mode" "<sseinsnmode>")])
1560
1561 (define_insn "sse2_movnti<mode>"
1562 [(set (match_operand:SWI48 0 "memory_operand" "=m")
1563 (unspec:SWI48 [(match_operand:SWI48 1 "register_operand" "r")]
1564 UNSPEC_MOVNT))]
1565 "TARGET_SSE2"
1566 "movnti\t{%1, %0|%0, %1}"
1567 [(set_attr "type" "ssemov")
1568 (set_attr "prefix_data16" "0")
1569 (set_attr "mode" "<MODE>")])
1570
1571 (define_insn "<sse>_movnt<mode>"
1572 [(set (match_operand:VF 0 "memory_operand" "=m")
1573 (unspec:VF
1574 [(match_operand:VF 1 "register_operand" "v")]
1575 UNSPEC_MOVNT))]
1576 "TARGET_SSE"
1577 "%vmovnt<ssemodesuffix>\t{%1, %0|%0, %1}"
1578 [(set_attr "type" "ssemov")
1579 (set_attr "prefix" "maybe_vex")
1580 (set_attr "mode" "<MODE>")])
1581
1582 (define_insn "<sse2>_movnt<mode>"
1583 [(set (match_operand:VI8 0 "memory_operand" "=m")
1584 (unspec:VI8 [(match_operand:VI8 1 "register_operand" "v")]
1585 UNSPEC_MOVNT))]
1586 "TARGET_SSE2"
1587 "%vmovntdq\t{%1, %0|%0, %1}"
1588 [(set_attr "type" "ssecvt")
1589 (set (attr "prefix_data16")
1590 (if_then_else
1591 (match_test "TARGET_AVX")
1592 (const_string "*")
1593 (const_string "1")))
1594 (set_attr "prefix" "maybe_vex")
1595 (set_attr "mode" "<sseinsnmode>")])
1596
1597 ; Expand patterns for non-temporal stores. At the moment, only those
1598 ; that directly map to insns are defined; it would be possible to
1599 ; define patterns for other modes that would expand to several insns.
1600
1601 ;; Modes handled by storent patterns.
1602 (define_mode_iterator STORENT_MODE
1603 [(DI "TARGET_SSE2 && TARGET_64BIT") (SI "TARGET_SSE2")
1604 (SF "TARGET_SSE4A") (DF "TARGET_SSE4A")
1605 (V8DI "TARGET_AVX512F") (V4DI "TARGET_AVX") (V2DI "TARGET_SSE2")
1606 (V16SF "TARGET_AVX512F") (V8SF "TARGET_AVX") V4SF
1607 (V8DF "TARGET_AVX512F") (V4DF "TARGET_AVX") (V2DF "TARGET_SSE2")])
1608
1609 (define_expand "storent<mode>"
1610 [(set (match_operand:STORENT_MODE 0 "memory_operand")
1611 (unspec:STORENT_MODE
1612 [(match_operand:STORENT_MODE 1 "register_operand")]
1613 UNSPEC_MOVNT))]
1614 "TARGET_SSE")
1615
1616 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1617 ;;
1618 ;; Parallel floating point arithmetic
1619 ;;
1620 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1621
1622 (define_expand "<code><mode>2"
1623 [(set (match_operand:VF 0 "register_operand")
1624 (absneg:VF
1625 (match_operand:VF 1 "register_operand")))]
1626 "TARGET_SSE"
1627 "ix86_expand_fp_absneg_operator (<CODE>, <MODE>mode, operands); DONE;")
1628
1629 (define_insn_and_split "*absneg<mode>2"
1630 [(set (match_operand:VF 0 "register_operand" "=x,x,v,v")
1631 (match_operator:VF 3 "absneg_operator"
1632 [(match_operand:VF 1 "nonimmediate_operand" "0, xm, v, m")]))
1633 (use (match_operand:VF 2 "nonimmediate_operand" "xm, 0, vm,v"))]
1634 "TARGET_SSE"
1635 "#"
1636 "&& reload_completed"
1637 [(const_int 0)]
1638 {
1639 enum rtx_code absneg_op;
1640 rtx op1, op2;
1641 rtx t;
1642
1643 if (TARGET_AVX)
1644 {
1645 if (MEM_P (operands[1]))
1646 op1 = operands[2], op2 = operands[1];
1647 else
1648 op1 = operands[1], op2 = operands[2];
1649 }
1650 else
1651 {
1652 op1 = operands[0];
1653 if (rtx_equal_p (operands[0], operands[1]))
1654 op2 = operands[2];
1655 else
1656 op2 = operands[1];
1657 }
1658
1659 absneg_op = GET_CODE (operands[3]) == NEG ? XOR : AND;
1660 t = gen_rtx_fmt_ee (absneg_op, <MODE>mode, op1, op2);
1661 t = gen_rtx_SET (operands[0], t);
1662 emit_insn (t);
1663 DONE;
1664 }
1665 [(set_attr "isa" "noavx,noavx,avx,avx")])
1666
1667 (define_expand "<plusminus_insn><mode>3<mask_name><round_name>"
1668 [(set (match_operand:VF 0 "register_operand")
1669 (plusminus:VF
1670 (match_operand:VF 1 "<round_nimm_predicate>")
1671 (match_operand:VF 2 "<round_nimm_predicate>")))]
1672 "TARGET_SSE && <mask_mode512bit_condition> && <round_mode512bit_condition>"
1673 "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
1674
1675 (define_insn "*<plusminus_insn><mode>3<mask_name><round_name>"
1676 [(set (match_operand:VF 0 "register_operand" "=x,v")
1677 (plusminus:VF
1678 (match_operand:VF 1 "<round_nimm_predicate>" "<comm>0,v")
1679 (match_operand:VF 2 "<round_nimm_predicate>" "xm,<round_constraint>")))]
1680 "TARGET_SSE && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands) && <mask_mode512bit_condition> && <round_mode512bit_condition>"
1681 "@
1682 <plusminus_mnemonic><ssemodesuffix>\t{%2, %0|%0, %2}
1683 v<plusminus_mnemonic><ssemodesuffix>\t{<round_mask_op3>%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2<round_mask_op3>}"
1684 [(set_attr "isa" "noavx,avx")
1685 (set_attr "type" "sseadd")
1686 (set_attr "prefix" "<mask_prefix3>")
1687 (set_attr "mode" "<MODE>")])
1688
1689 (define_insn "<sse>_vm<plusminus_insn><mode>3<round_name>"
1690 [(set (match_operand:VF_128 0 "register_operand" "=x,v")
1691 (vec_merge:VF_128
1692 (plusminus:VF_128
1693 (match_operand:VF_128 1 "register_operand" "0,v")
1694 (match_operand:VF_128 2 "nonimmediate_operand" "xm,<round_constraint>"))
1695 (match_dup 1)
1696 (const_int 1)))]
1697 "TARGET_SSE"
1698 "@
1699 <plusminus_mnemonic><ssescalarmodesuffix>\t{%2, %0|%0, %<iptr>2}
1700 v<plusminus_mnemonic><ssescalarmodesuffix>\t{<round_op3>%2, %1, %0|%0, %1, %<iptr>2<round_op3>}"
1701 [(set_attr "isa" "noavx,avx")
1702 (set_attr "type" "sseadd")
1703 (set_attr "prefix" "<round_prefix>")
1704 (set_attr "mode" "<ssescalarmode>")])
1705
1706 (define_expand "mul<mode>3<mask_name><round_name>"
1707 [(set (match_operand:VF 0 "register_operand")
1708 (mult:VF
1709 (match_operand:VF 1 "<round_nimm_predicate>")
1710 (match_operand:VF 2 "<round_nimm_predicate>")))]
1711 "TARGET_SSE && <mask_mode512bit_condition> && <round_mode512bit_condition>"
1712 "ix86_fixup_binary_operands_no_copy (MULT, <MODE>mode, operands);")
1713
1714 (define_insn "*mul<mode>3<mask_name><round_name>"
1715 [(set (match_operand:VF 0 "register_operand" "=x,v")
1716 (mult:VF
1717 (match_operand:VF 1 "<round_nimm_predicate>" "%0,v")
1718 (match_operand:VF 2 "<round_nimm_predicate>" "xm,<round_constraint>")))]
1719 "TARGET_SSE && ix86_binary_operator_ok (MULT, <MODE>mode, operands) && <mask_mode512bit_condition> && <round_mode512bit_condition>"
1720 "@
1721 mul<ssemodesuffix>\t{%2, %0|%0, %2}
1722 vmul<ssemodesuffix>\t{<round_mask_op3>%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2<round_mask_op3>}"
1723 [(set_attr "isa" "noavx,avx")
1724 (set_attr "type" "ssemul")
1725 (set_attr "prefix" "<mask_prefix3>")
1726 (set_attr "btver2_decode" "direct,double")
1727 (set_attr "mode" "<MODE>")])
1728
1729 (define_insn "<sse>_vm<multdiv_mnemonic><mode>3<round_name>"
1730 [(set (match_operand:VF_128 0 "register_operand" "=x,v")
1731 (vec_merge:VF_128
1732 (multdiv:VF_128
1733 (match_operand:VF_128 1 "register_operand" "0,v")
1734 (match_operand:VF_128 2 "nonimmediate_operand" "xm,<round_constraint>"))
1735 (match_dup 1)
1736 (const_int 1)))]
1737 "TARGET_SSE"
1738 "@
1739 <multdiv_mnemonic><ssescalarmodesuffix>\t{%2, %0|%0, %<iptr>2}
1740 v<multdiv_mnemonic><ssescalarmodesuffix>\t{<round_op3>%2, %1, %0|%0, %1, %<iptr>2<round_op3>}"
1741 [(set_attr "isa" "noavx,avx")
1742 (set_attr "type" "sse<multdiv_mnemonic>")
1743 (set_attr "prefix" "<round_prefix>")
1744 (set_attr "btver2_decode" "direct,double")
1745 (set_attr "mode" "<ssescalarmode>")])
1746
1747 (define_expand "div<mode>3"
1748 [(set (match_operand:VF2 0 "register_operand")
1749 (div:VF2 (match_operand:VF2 1 "register_operand")
1750 (match_operand:VF2 2 "nonimmediate_operand")))]
1751 "TARGET_SSE2"
1752 "ix86_fixup_binary_operands_no_copy (DIV, <MODE>mode, operands);")
1753
1754 (define_expand "div<mode>3"
1755 [(set (match_operand:VF1 0 "register_operand")
1756 (div:VF1 (match_operand:VF1 1 "register_operand")
1757 (match_operand:VF1 2 "nonimmediate_operand")))]
1758 "TARGET_SSE"
1759 {
1760 ix86_fixup_binary_operands_no_copy (DIV, <MODE>mode, operands);
1761
1762 if (TARGET_SSE_MATH
1763 && TARGET_RECIP_VEC_DIV
1764 && !optimize_insn_for_size_p ()
1765 && flag_finite_math_only && !flag_trapping_math
1766 && flag_unsafe_math_optimizations)
1767 {
1768 ix86_emit_swdivsf (operands[0], operands[1], operands[2], <MODE>mode);
1769 DONE;
1770 }
1771 })
1772
1773 (define_insn "<sse>_div<mode>3<mask_name><round_name>"
1774 [(set (match_operand:VF 0 "register_operand" "=x,v")
1775 (div:VF
1776 (match_operand:VF 1 "register_operand" "0,v")
1777 (match_operand:VF 2 "<round_nimm_predicate>" "xm,<round_constraint>")))]
1778 "TARGET_SSE && <mask_mode512bit_condition> && <round_mode512bit_condition>"
1779 "@
1780 div<ssemodesuffix>\t{%2, %0|%0, %2}
1781 vdiv<ssemodesuffix>\t{<round_mask_op3>%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2<round_mask_op3>}"
1782 [(set_attr "isa" "noavx,avx")
1783 (set_attr "type" "ssediv")
1784 (set_attr "prefix" "<mask_prefix3>")
1785 (set_attr "mode" "<MODE>")])
1786
1787 (define_insn "<sse>_rcp<mode>2"
1788 [(set (match_operand:VF1_128_256 0 "register_operand" "=x")
1789 (unspec:VF1_128_256
1790 [(match_operand:VF1_128_256 1 "nonimmediate_operand" "xm")] UNSPEC_RCP))]
1791 "TARGET_SSE"
1792 "%vrcpps\t{%1, %0|%0, %1}"
1793 [(set_attr "type" "sse")
1794 (set_attr "atom_sse_attr" "rcp")
1795 (set_attr "btver2_sse_attr" "rcp")
1796 (set_attr "prefix" "maybe_vex")
1797 (set_attr "mode" "<MODE>")])
1798
1799 (define_insn "sse_vmrcpv4sf2"
1800 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
1801 (vec_merge:V4SF
1802 (unspec:V4SF [(match_operand:V4SF 1 "nonimmediate_operand" "xm,xm")]
1803 UNSPEC_RCP)
1804 (match_operand:V4SF 2 "register_operand" "0,x")
1805 (const_int 1)))]
1806 "TARGET_SSE"
1807 "@
1808 rcpss\t{%1, %0|%0, %k1}
1809 vrcpss\t{%1, %2, %0|%0, %2, %k1}"
1810 [(set_attr "isa" "noavx,avx")
1811 (set_attr "type" "sse")
1812 (set_attr "ssememalign" "32")
1813 (set_attr "atom_sse_attr" "rcp")
1814 (set_attr "btver2_sse_attr" "rcp")
1815 (set_attr "prefix" "orig,vex")
1816 (set_attr "mode" "SF")])
1817
1818 (define_insn "<mask_codefor>rcp14<mode><mask_name>"
1819 [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v")
1820 (unspec:VF_AVX512VL
1821 [(match_operand:VF_AVX512VL 1 "nonimmediate_operand" "vm")]
1822 UNSPEC_RCP14))]
1823 "TARGET_AVX512F"
1824 "vrcp14<ssemodesuffix>\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
1825 [(set_attr "type" "sse")
1826 (set_attr "prefix" "evex")
1827 (set_attr "mode" "<MODE>")])
1828
1829 (define_insn "srcp14<mode>"
1830 [(set (match_operand:VF_128 0 "register_operand" "=v")
1831 (vec_merge:VF_128
1832 (unspec:VF_128
1833 [(match_operand:VF_128 1 "nonimmediate_operand" "vm")]
1834 UNSPEC_RCP14)
1835 (match_operand:VF_128 2 "register_operand" "v")
1836 (const_int 1)))]
1837 "TARGET_AVX512F"
1838 "vrcp14<ssescalarmodesuffix>\t{%1, %2, %0|%0, %2, %1}"
1839 [(set_attr "type" "sse")
1840 (set_attr "prefix" "evex")
1841 (set_attr "mode" "<MODE>")])
1842
1843 (define_expand "sqrt<mode>2"
1844 [(set (match_operand:VF2 0 "register_operand")
1845 (sqrt:VF2 (match_operand:VF2 1 "nonimmediate_operand")))]
1846 "TARGET_SSE2")
1847
1848 (define_expand "sqrt<mode>2"
1849 [(set (match_operand:VF1 0 "register_operand")
1850 (sqrt:VF1 (match_operand:VF1 1 "nonimmediate_operand")))]
1851 "TARGET_SSE"
1852 {
1853 if (TARGET_SSE_MATH
1854 && TARGET_RECIP_VEC_SQRT
1855 && !optimize_insn_for_size_p ()
1856 && flag_finite_math_only && !flag_trapping_math
1857 && flag_unsafe_math_optimizations)
1858 {
1859 ix86_emit_swsqrtsf (operands[0], operands[1], <MODE>mode, false);
1860 DONE;
1861 }
1862 })
1863
1864 (define_insn "<sse>_sqrt<mode>2<mask_name><round_name>"
1865 [(set (match_operand:VF 0 "register_operand" "=v")
1866 (sqrt:VF (match_operand:VF 1 "<round_nimm_predicate>" "<round_constraint>")))]
1867 "TARGET_SSE && <mask_mode512bit_condition> && <round_mode512bit_condition>"
1868 "%vsqrt<ssemodesuffix>\t{<round_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_mask_op2>}"
1869 [(set_attr "type" "sse")
1870 (set_attr "atom_sse_attr" "sqrt")
1871 (set_attr "btver2_sse_attr" "sqrt")
1872 (set_attr "prefix" "maybe_vex")
1873 (set_attr "mode" "<MODE>")])
1874
1875 (define_insn "<sse>_vmsqrt<mode>2<round_name>"
1876 [(set (match_operand:VF_128 0 "register_operand" "=x,v")
1877 (vec_merge:VF_128
1878 (sqrt:VF_128
1879 (match_operand:VF_128 1 "nonimmediate_operand" "xm,<round_constraint>"))
1880 (match_operand:VF_128 2 "register_operand" "0,v")
1881 (const_int 1)))]
1882 "TARGET_SSE"
1883 "@
1884 sqrt<ssescalarmodesuffix>\t{%1, %0|%0, %<iptr>1}
1885 vsqrt<ssescalarmodesuffix>\t{<round_op3>%1, %2, %0|%0, %2, %<iptr>1<round_op3>}"
1886 [(set_attr "isa" "noavx,avx")
1887 (set_attr "type" "sse")
1888 (set_attr "atom_sse_attr" "sqrt")
1889 (set_attr "prefix" "<round_prefix>")
1890 (set_attr "btver2_sse_attr" "sqrt")
1891 (set_attr "mode" "<ssescalarmode>")])
1892
1893 (define_expand "rsqrt<mode>2"
1894 [(set (match_operand:VF1_128_256 0 "register_operand")
1895 (unspec:VF1_128_256
1896 [(match_operand:VF1_128_256 1 "nonimmediate_operand")] UNSPEC_RSQRT))]
1897 "TARGET_SSE_MATH"
1898 {
1899 ix86_emit_swsqrtsf (operands[0], operands[1], <MODE>mode, true);
1900 DONE;
1901 })
1902
1903 (define_insn "<sse>_rsqrt<mode>2"
1904 [(set (match_operand:VF1_128_256 0 "register_operand" "=x")
1905 (unspec:VF1_128_256
1906 [(match_operand:VF1_128_256 1 "nonimmediate_operand" "xm")] UNSPEC_RSQRT))]
1907 "TARGET_SSE"
1908 "%vrsqrtps\t{%1, %0|%0, %1}"
1909 [(set_attr "type" "sse")
1910 (set_attr "prefix" "maybe_vex")
1911 (set_attr "mode" "<MODE>")])
1912
1913 (define_insn "<mask_codefor>rsqrt14<mode><mask_name>"
1914 [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v")
1915 (unspec:VF_AVX512VL
1916 [(match_operand:VF_AVX512VL 1 "nonimmediate_operand" "vm")]
1917 UNSPEC_RSQRT14))]
1918 "TARGET_AVX512F"
1919 "vrsqrt14<ssemodesuffix>\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
1920 [(set_attr "type" "sse")
1921 (set_attr "prefix" "evex")
1922 (set_attr "mode" "<MODE>")])
1923
1924 (define_insn "rsqrt14<mode>"
1925 [(set (match_operand:VF_128 0 "register_operand" "=v")
1926 (vec_merge:VF_128
1927 (unspec:VF_128
1928 [(match_operand:VF_128 1 "nonimmediate_operand" "vm")]
1929 UNSPEC_RSQRT14)
1930 (match_operand:VF_128 2 "register_operand" "v")
1931 (const_int 1)))]
1932 "TARGET_AVX512F"
1933 "vrsqrt14<ssescalarmodesuffix>\t{%1, %2, %0|%0, %2, %1}"
1934 [(set_attr "type" "sse")
1935 (set_attr "prefix" "evex")
1936 (set_attr "mode" "<MODE>")])
1937
1938 (define_insn "sse_vmrsqrtv4sf2"
1939 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
1940 (vec_merge:V4SF
1941 (unspec:V4SF [(match_operand:V4SF 1 "nonimmediate_operand" "xm,xm")]
1942 UNSPEC_RSQRT)
1943 (match_operand:V4SF 2 "register_operand" "0,x")
1944 (const_int 1)))]
1945 "TARGET_SSE"
1946 "@
1947 rsqrtss\t{%1, %0|%0, %k1}
1948 vrsqrtss\t{%1, %2, %0|%0, %2, %k1}"
1949 [(set_attr "isa" "noavx,avx")
1950 (set_attr "type" "sse")
1951 (set_attr "ssememalign" "32")
1952 (set_attr "prefix" "orig,vex")
1953 (set_attr "mode" "SF")])
1954
1955 ;; ??? For !flag_finite_math_only, the representation with SMIN/SMAX
1956 ;; isn't really correct, as those rtl operators aren't defined when
1957 ;; applied to NaNs. Hopefully the optimizers won't get too smart on us.
1958
1959 (define_expand "<code><mode>3<mask_name><round_saeonly_name>"
1960 [(set (match_operand:VF 0 "register_operand")
1961 (smaxmin:VF
1962 (match_operand:VF 1 "<round_saeonly_nimm_predicate>")
1963 (match_operand:VF 2 "<round_saeonly_nimm_predicate>")))]
1964 "TARGET_SSE && <mask_mode512bit_condition> && <round_saeonly_mode512bit_condition>"
1965 {
1966 if (!flag_finite_math_only)
1967 operands[1] = force_reg (<MODE>mode, operands[1]);
1968 ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);
1969 })
1970
1971 (define_insn "*<code><mode>3_finite<mask_name><round_saeonly_name>"
1972 [(set (match_operand:VF 0 "register_operand" "=x,v")
1973 (smaxmin:VF
1974 (match_operand:VF 1 "<round_saeonly_nimm_predicate>" "%0,v")
1975 (match_operand:VF 2 "<round_saeonly_nimm_predicate>" "xm,<round_saeonly_constraint>")))]
1976 "TARGET_SSE && flag_finite_math_only
1977 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)
1978 && <mask_mode512bit_condition> && <round_saeonly_mode512bit_condition>"
1979 "@
1980 <maxmin_float><ssemodesuffix>\t{%2, %0|%0, %2}
1981 v<maxmin_float><ssemodesuffix>\t{<round_saeonly_mask_op3>%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2<round_saeonly_mask_op3>}"
1982 [(set_attr "isa" "noavx,avx")
1983 (set_attr "type" "sseadd")
1984 (set_attr "btver2_sse_attr" "maxmin")
1985 (set_attr "prefix" "<mask_prefix3>")
1986 (set_attr "mode" "<MODE>")])
1987
1988 (define_insn "*<code><mode>3<mask_name><round_saeonly_name>"
1989 [(set (match_operand:VF 0 "register_operand" "=x,v")
1990 (smaxmin:VF
1991 (match_operand:VF 1 "register_operand" "0,v")
1992 (match_operand:VF 2 "<round_saeonly_nimm_predicate>" "xm,<round_saeonly_constraint>")))]
1993 "TARGET_SSE && !flag_finite_math_only
1994 && <mask_mode512bit_condition> && <round_saeonly_mode512bit_condition>"
1995 "@
1996 <maxmin_float><ssemodesuffix>\t{%2, %0|%0, %2}
1997 v<maxmin_float><ssemodesuffix>\t{<round_saeonly_mask_op3>%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2<round_saeonly_mask_op3>}"
1998 [(set_attr "isa" "noavx,avx")
1999 (set_attr "type" "sseadd")
2000 (set_attr "btver2_sse_attr" "maxmin")
2001 (set_attr "prefix" "<mask_prefix3>")
2002 (set_attr "mode" "<MODE>")])
2003
2004 (define_insn "<sse>_vm<code><mode>3<round_saeonly_name>"
2005 [(set (match_operand:VF_128 0 "register_operand" "=x,v")
2006 (vec_merge:VF_128
2007 (smaxmin:VF_128
2008 (match_operand:VF_128 1 "register_operand" "0,v")
2009 (match_operand:VF_128 2 "nonimmediate_operand" "xm,<round_saeonly_constraint>"))
2010 (match_dup 1)
2011 (const_int 1)))]
2012 "TARGET_SSE"
2013 "@
2014 <maxmin_float><ssescalarmodesuffix>\t{%2, %0|%0, %<iptr>2}
2015 v<maxmin_float><ssescalarmodesuffix>\t{<round_saeonly_op3>%2, %1, %0|%0, %1, %<iptr>2<round_saeonly_op3>}"
2016 [(set_attr "isa" "noavx,avx")
2017 (set_attr "type" "sse")
2018 (set_attr "btver2_sse_attr" "maxmin")
2019 (set_attr "prefix" "<round_saeonly_prefix>")
2020 (set_attr "mode" "<ssescalarmode>")])
2021
2022 ;; These versions of the min/max patterns implement exactly the operations
2023 ;; min = (op1 < op2 ? op1 : op2)
2024 ;; max = (!(op1 < op2) ? op1 : op2)
2025 ;; Their operands are not commutative, and thus they may be used in the
2026 ;; presence of -0.0 and NaN.
2027
2028 (define_insn "*ieee_smin<mode>3"
2029 [(set (match_operand:VF 0 "register_operand" "=v,v")
2030 (unspec:VF
2031 [(match_operand:VF 1 "register_operand" "0,v")
2032 (match_operand:VF 2 "nonimmediate_operand" "vm,vm")]
2033 UNSPEC_IEEE_MIN))]
2034 "TARGET_SSE"
2035 "@
2036 min<ssemodesuffix>\t{%2, %0|%0, %2}
2037 vmin<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
2038 [(set_attr "isa" "noavx,avx")
2039 (set_attr "type" "sseadd")
2040 (set_attr "prefix" "orig,vex")
2041 (set_attr "mode" "<MODE>")])
2042
2043 (define_insn "*ieee_smax<mode>3"
2044 [(set (match_operand:VF 0 "register_operand" "=v,v")
2045 (unspec:VF
2046 [(match_operand:VF 1 "register_operand" "0,v")
2047 (match_operand:VF 2 "nonimmediate_operand" "vm,vm")]
2048 UNSPEC_IEEE_MAX))]
2049 "TARGET_SSE"
2050 "@
2051 max<ssemodesuffix>\t{%2, %0|%0, %2}
2052 vmax<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
2053 [(set_attr "isa" "noavx,avx")
2054 (set_attr "type" "sseadd")
2055 (set_attr "prefix" "orig,vex")
2056 (set_attr "mode" "<MODE>")])
2057
2058 (define_insn "avx_addsubv4df3"
2059 [(set (match_operand:V4DF 0 "register_operand" "=x")
2060 (vec_merge:V4DF
2061 (minus:V4DF
2062 (match_operand:V4DF 1 "register_operand" "x")
2063 (match_operand:V4DF 2 "nonimmediate_operand" "xm"))
2064 (plus:V4DF (match_dup 1) (match_dup 2))
2065 (const_int 5)))]
2066 "TARGET_AVX"
2067 "vaddsubpd\t{%2, %1, %0|%0, %1, %2}"
2068 [(set_attr "type" "sseadd")
2069 (set_attr "prefix" "vex")
2070 (set_attr "mode" "V4DF")])
2071
2072 (define_insn "sse3_addsubv2df3"
2073 [(set (match_operand:V2DF 0 "register_operand" "=x,x")
2074 (vec_merge:V2DF
2075 (minus:V2DF
2076 (match_operand:V2DF 1 "register_operand" "0,x")
2077 (match_operand:V2DF 2 "nonimmediate_operand" "xm,xm"))
2078 (plus:V2DF (match_dup 1) (match_dup 2))
2079 (const_int 1)))]
2080 "TARGET_SSE3"
2081 "@
2082 addsubpd\t{%2, %0|%0, %2}
2083 vaddsubpd\t{%2, %1, %0|%0, %1, %2}"
2084 [(set_attr "isa" "noavx,avx")
2085 (set_attr "type" "sseadd")
2086 (set_attr "atom_unit" "complex")
2087 (set_attr "prefix" "orig,vex")
2088 (set_attr "mode" "V2DF")])
2089
2090 (define_insn "avx_addsubv8sf3"
2091 [(set (match_operand:V8SF 0 "register_operand" "=x")
2092 (vec_merge:V8SF
2093 (minus:V8SF
2094 (match_operand:V8SF 1 "register_operand" "x")
2095 (match_operand:V8SF 2 "nonimmediate_operand" "xm"))
2096 (plus:V8SF (match_dup 1) (match_dup 2))
2097 (const_int 85)))]
2098 "TARGET_AVX"
2099 "vaddsubps\t{%2, %1, %0|%0, %1, %2}"
2100 [(set_attr "type" "sseadd")
2101 (set_attr "prefix" "vex")
2102 (set_attr "mode" "V8SF")])
2103
2104 (define_insn "sse3_addsubv4sf3"
2105 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
2106 (vec_merge:V4SF
2107 (minus:V4SF
2108 (match_operand:V4SF 1 "register_operand" "0,x")
2109 (match_operand:V4SF 2 "nonimmediate_operand" "xm,xm"))
2110 (plus:V4SF (match_dup 1) (match_dup 2))
2111 (const_int 5)))]
2112 "TARGET_SSE3"
2113 "@
2114 addsubps\t{%2, %0|%0, %2}
2115 vaddsubps\t{%2, %1, %0|%0, %1, %2}"
2116 [(set_attr "isa" "noavx,avx")
2117 (set_attr "type" "sseadd")
2118 (set_attr "prefix" "orig,vex")
2119 (set_attr "prefix_rep" "1,*")
2120 (set_attr "mode" "V4SF")])
2121
2122 (define_split
2123 [(set (match_operand:VF_128_256 0 "register_operand")
2124 (match_operator:VF_128_256 6 "addsub_vm_operator"
2125 [(minus:VF_128_256
2126 (match_operand:VF_128_256 1 "register_operand")
2127 (match_operand:VF_128_256 2 "nonimmediate_operand"))
2128 (plus:VF_128_256
2129 (match_operand:VF_128_256 3 "nonimmediate_operand")
2130 (match_operand:VF_128_256 4 "nonimmediate_operand"))
2131 (match_operand 5 "const_int_operand")]))]
2132 "TARGET_SSE3
2133 && can_create_pseudo_p ()
2134 && ((rtx_equal_p (operands[1], operands[3])
2135 && rtx_equal_p (operands[2], operands[4]))
2136 || (rtx_equal_p (operands[1], operands[4])
2137 && rtx_equal_p (operands[2], operands[3])))"
2138 [(set (match_dup 0)
2139 (vec_merge:VF_128_256
2140 (minus:VF_128_256 (match_dup 1) (match_dup 2))
2141 (plus:VF_128_256 (match_dup 1) (match_dup 2))
2142 (match_dup 5)))])
2143
2144 (define_split
2145 [(set (match_operand:VF_128_256 0 "register_operand")
2146 (match_operator:VF_128_256 6 "addsub_vm_operator"
2147 [(plus:VF_128_256
2148 (match_operand:VF_128_256 1 "nonimmediate_operand")
2149 (match_operand:VF_128_256 2 "nonimmediate_operand"))
2150 (minus:VF_128_256
2151 (match_operand:VF_128_256 3 "register_operand")
2152 (match_operand:VF_128_256 4 "nonimmediate_operand"))
2153 (match_operand 5 "const_int_operand")]))]
2154 "TARGET_SSE3
2155 && can_create_pseudo_p ()
2156 && ((rtx_equal_p (operands[1], operands[3])
2157 && rtx_equal_p (operands[2], operands[4]))
2158 || (rtx_equal_p (operands[1], operands[4])
2159 && rtx_equal_p (operands[2], operands[3])))"
2160 [(set (match_dup 0)
2161 (vec_merge:VF_128_256
2162 (minus:VF_128_256 (match_dup 3) (match_dup 4))
2163 (plus:VF_128_256 (match_dup 3) (match_dup 4))
2164 (match_dup 5)))]
2165 {
2166 /* Negate mask bits to compensate for swapped PLUS and MINUS RTXes. */
2167 operands[5]
2168 = GEN_INT (~INTVAL (operands[5])
2169 & ((HOST_WIDE_INT_1U << GET_MODE_NUNITS (<MODE>mode)) - 1));
2170 })
2171
2172 (define_split
2173 [(set (match_operand:VF_128_256 0 "register_operand")
2174 (match_operator:VF_128_256 7 "addsub_vs_operator"
2175 [(vec_concat:<ssedoublemode>
2176 (minus:VF_128_256
2177 (match_operand:VF_128_256 1 "register_operand")
2178 (match_operand:VF_128_256 2 "nonimmediate_operand"))
2179 (plus:VF_128_256
2180 (match_operand:VF_128_256 3 "nonimmediate_operand")
2181 (match_operand:VF_128_256 4 "nonimmediate_operand")))
2182 (match_parallel 5 "addsub_vs_parallel"
2183 [(match_operand 6 "const_int_operand")])]))]
2184 "TARGET_SSE3
2185 && can_create_pseudo_p ()
2186 && ((rtx_equal_p (operands[1], operands[3])
2187 && rtx_equal_p (operands[2], operands[4]))
2188 || (rtx_equal_p (operands[1], operands[4])
2189 && rtx_equal_p (operands[2], operands[3])))"
2190 [(set (match_dup 0)
2191 (vec_merge:VF_128_256
2192 (minus:VF_128_256 (match_dup 1) (match_dup 2))
2193 (plus:VF_128_256 (match_dup 1) (match_dup 2))
2194 (match_dup 5)))]
2195 {
2196 int i, nelt = XVECLEN (operands[5], 0);
2197 HOST_WIDE_INT ival = 0;
2198
2199 for (i = 0; i < nelt; i++)
2200 if (INTVAL (XVECEXP (operands[5], 0, i)) < GET_MODE_NUNITS (<MODE>mode))
2201 ival |= HOST_WIDE_INT_1 << i;
2202
2203 operands[5] = GEN_INT (ival);
2204 })
2205
2206 (define_split
2207 [(set (match_operand:VF_128_256 0 "register_operand")
2208 (match_operator:VF_128_256 7 "addsub_vs_operator"
2209 [(vec_concat:<ssedoublemode>
2210 (plus:VF_128_256
2211 (match_operand:VF_128_256 1 "nonimmediate_operand")
2212 (match_operand:VF_128_256 2 "nonimmediate_operand"))
2213 (minus:VF_128_256
2214 (match_operand:VF_128_256 3 "register_operand")
2215 (match_operand:VF_128_256 4 "nonimmediate_operand")))
2216 (match_parallel 5 "addsub_vs_parallel"
2217 [(match_operand 6 "const_int_operand")])]))]
2218 "TARGET_SSE3
2219 && can_create_pseudo_p ()
2220 && ((rtx_equal_p (operands[1], operands[3])
2221 && rtx_equal_p (operands[2], operands[4]))
2222 || (rtx_equal_p (operands[1], operands[4])
2223 && rtx_equal_p (operands[2], operands[3])))"
2224 [(set (match_dup 0)
2225 (vec_merge:VF_128_256
2226 (minus:VF_128_256 (match_dup 3) (match_dup 4))
2227 (plus:VF_128_256 (match_dup 3) (match_dup 4))
2228 (match_dup 5)))]
2229 {
2230 int i, nelt = XVECLEN (operands[5], 0);
2231 HOST_WIDE_INT ival = 0;
2232
2233 for (i = 0; i < nelt; i++)
2234 if (INTVAL (XVECEXP (operands[5], 0, i)) >= GET_MODE_NUNITS (<MODE>mode))
2235 ival |= HOST_WIDE_INT_1 << i;
2236
2237 operands[5] = GEN_INT (ival);
2238 })
2239
2240 (define_insn "avx_h<plusminus_insn>v4df3"
2241 [(set (match_operand:V4DF 0 "register_operand" "=x")
2242 (vec_concat:V4DF
2243 (vec_concat:V2DF
2244 (plusminus:DF
2245 (vec_select:DF
2246 (match_operand:V4DF 1 "register_operand" "x")
2247 (parallel [(const_int 0)]))
2248 (vec_select:DF (match_dup 1) (parallel [(const_int 1)])))
2249 (plusminus:DF
2250 (vec_select:DF
2251 (match_operand:V4DF 2 "nonimmediate_operand" "xm")
2252 (parallel [(const_int 0)]))
2253 (vec_select:DF (match_dup 2) (parallel [(const_int 1)]))))
2254 (vec_concat:V2DF
2255 (plusminus:DF
2256 (vec_select:DF (match_dup 1) (parallel [(const_int 2)]))
2257 (vec_select:DF (match_dup 1) (parallel [(const_int 3)])))
2258 (plusminus:DF
2259 (vec_select:DF (match_dup 2) (parallel [(const_int 2)]))
2260 (vec_select:DF (match_dup 2) (parallel [(const_int 3)]))))))]
2261 "TARGET_AVX"
2262 "vh<plusminus_mnemonic>pd\t{%2, %1, %0|%0, %1, %2}"
2263 [(set_attr "type" "sseadd")
2264 (set_attr "prefix" "vex")
2265 (set_attr "mode" "V4DF")])
2266
2267 (define_expand "sse3_haddv2df3"
2268 [(set (match_operand:V2DF 0 "register_operand")
2269 (vec_concat:V2DF
2270 (plus:DF
2271 (vec_select:DF
2272 (match_operand:V2DF 1 "register_operand")
2273 (parallel [(const_int 0)]))
2274 (vec_select:DF (match_dup 1) (parallel [(const_int 1)])))
2275 (plus:DF
2276 (vec_select:DF
2277 (match_operand:V2DF 2 "nonimmediate_operand")
2278 (parallel [(const_int 0)]))
2279 (vec_select:DF (match_dup 2) (parallel [(const_int 1)])))))]
2280 "TARGET_SSE3")
2281
2282 (define_insn "*sse3_haddv2df3"
2283 [(set (match_operand:V2DF 0 "register_operand" "=x,x")
2284 (vec_concat:V2DF
2285 (plus:DF
2286 (vec_select:DF
2287 (match_operand:V2DF 1 "register_operand" "0,x")
2288 (parallel [(match_operand:SI 3 "const_0_to_1_operand")]))
2289 (vec_select:DF
2290 (match_dup 1)
2291 (parallel [(match_operand:SI 4 "const_0_to_1_operand")])))
2292 (plus:DF
2293 (vec_select:DF
2294 (match_operand:V2DF 2 "nonimmediate_operand" "xm,xm")
2295 (parallel [(match_operand:SI 5 "const_0_to_1_operand")]))
2296 (vec_select:DF
2297 (match_dup 2)
2298 (parallel [(match_operand:SI 6 "const_0_to_1_operand")])))))]
2299 "TARGET_SSE3
2300 && INTVAL (operands[3]) != INTVAL (operands[4])
2301 && INTVAL (operands[5]) != INTVAL (operands[6])"
2302 "@
2303 haddpd\t{%2, %0|%0, %2}
2304 vhaddpd\t{%2, %1, %0|%0, %1, %2}"
2305 [(set_attr "isa" "noavx,avx")
2306 (set_attr "type" "sseadd")
2307 (set_attr "prefix" "orig,vex")
2308 (set_attr "mode" "V2DF")])
2309
2310 (define_insn "sse3_hsubv2df3"
2311 [(set (match_operand:V2DF 0 "register_operand" "=x,x")
2312 (vec_concat:V2DF
2313 (minus:DF
2314 (vec_select:DF
2315 (match_operand:V2DF 1 "register_operand" "0,x")
2316 (parallel [(const_int 0)]))
2317 (vec_select:DF (match_dup 1) (parallel [(const_int 1)])))
2318 (minus:DF
2319 (vec_select:DF
2320 (match_operand:V2DF 2 "nonimmediate_operand" "xm,xm")
2321 (parallel [(const_int 0)]))
2322 (vec_select:DF (match_dup 2) (parallel [(const_int 1)])))))]
2323 "TARGET_SSE3"
2324 "@
2325 hsubpd\t{%2, %0|%0, %2}
2326 vhsubpd\t{%2, %1, %0|%0, %1, %2}"
2327 [(set_attr "isa" "noavx,avx")
2328 (set_attr "type" "sseadd")
2329 (set_attr "prefix" "orig,vex")
2330 (set_attr "mode" "V2DF")])
2331
2332 (define_insn "*sse3_haddv2df3_low"
2333 [(set (match_operand:DF 0 "register_operand" "=x,x")
2334 (plus:DF
2335 (vec_select:DF
2336 (match_operand:V2DF 1 "register_operand" "0,x")
2337 (parallel [(match_operand:SI 2 "const_0_to_1_operand")]))
2338 (vec_select:DF
2339 (match_dup 1)
2340 (parallel [(match_operand:SI 3 "const_0_to_1_operand")]))))]
2341 "TARGET_SSE3
2342 && INTVAL (operands[2]) != INTVAL (operands[3])"
2343 "@
2344 haddpd\t{%0, %0|%0, %0}
2345 vhaddpd\t{%1, %1, %0|%0, %1, %1}"
2346 [(set_attr "isa" "noavx,avx")
2347 (set_attr "type" "sseadd1")
2348 (set_attr "prefix" "orig,vex")
2349 (set_attr "mode" "V2DF")])
2350
2351 (define_insn "*sse3_hsubv2df3_low"
2352 [(set (match_operand:DF 0 "register_operand" "=x,x")
2353 (minus:DF
2354 (vec_select:DF
2355 (match_operand:V2DF 1 "register_operand" "0,x")
2356 (parallel [(const_int 0)]))
2357 (vec_select:DF
2358 (match_dup 1)
2359 (parallel [(const_int 1)]))))]
2360 "TARGET_SSE3"
2361 "@
2362 hsubpd\t{%0, %0|%0, %0}
2363 vhsubpd\t{%1, %1, %0|%0, %1, %1}"
2364 [(set_attr "isa" "noavx,avx")
2365 (set_attr "type" "sseadd1")
2366 (set_attr "prefix" "orig,vex")
2367 (set_attr "mode" "V2DF")])
2368
2369 (define_insn "avx_h<plusminus_insn>v8sf3"
2370 [(set (match_operand:V8SF 0 "register_operand" "=x")
2371 (vec_concat:V8SF
2372 (vec_concat:V4SF
2373 (vec_concat:V2SF
2374 (plusminus:SF
2375 (vec_select:SF
2376 (match_operand:V8SF 1 "register_operand" "x")
2377 (parallel [(const_int 0)]))
2378 (vec_select:SF (match_dup 1) (parallel [(const_int 1)])))
2379 (plusminus:SF
2380 (vec_select:SF (match_dup 1) (parallel [(const_int 2)]))
2381 (vec_select:SF (match_dup 1) (parallel [(const_int 3)]))))
2382 (vec_concat:V2SF
2383 (plusminus:SF
2384 (vec_select:SF
2385 (match_operand:V8SF 2 "nonimmediate_operand" "xm")
2386 (parallel [(const_int 0)]))
2387 (vec_select:SF (match_dup 2) (parallel [(const_int 1)])))
2388 (plusminus:SF
2389 (vec_select:SF (match_dup 2) (parallel [(const_int 2)]))
2390 (vec_select:SF (match_dup 2) (parallel [(const_int 3)])))))
2391 (vec_concat:V4SF
2392 (vec_concat:V2SF
2393 (plusminus:SF
2394 (vec_select:SF (match_dup 1) (parallel [(const_int 4)]))
2395 (vec_select:SF (match_dup 1) (parallel [(const_int 5)])))
2396 (plusminus:SF
2397 (vec_select:SF (match_dup 1) (parallel [(const_int 6)]))
2398 (vec_select:SF (match_dup 1) (parallel [(const_int 7)]))))
2399 (vec_concat:V2SF
2400 (plusminus:SF
2401 (vec_select:SF (match_dup 2) (parallel [(const_int 4)]))
2402 (vec_select:SF (match_dup 2) (parallel [(const_int 5)])))
2403 (plusminus:SF
2404 (vec_select:SF (match_dup 2) (parallel [(const_int 6)]))
2405 (vec_select:SF (match_dup 2) (parallel [(const_int 7)])))))))]
2406 "TARGET_AVX"
2407 "vh<plusminus_mnemonic>ps\t{%2, %1, %0|%0, %1, %2}"
2408 [(set_attr "type" "sseadd")
2409 (set_attr "prefix" "vex")
2410 (set_attr "mode" "V8SF")])
2411
2412 (define_insn "sse3_h<plusminus_insn>v4sf3"
2413 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
2414 (vec_concat:V4SF
2415 (vec_concat:V2SF
2416 (plusminus:SF
2417 (vec_select:SF
2418 (match_operand:V4SF 1 "register_operand" "0,x")
2419 (parallel [(const_int 0)]))
2420 (vec_select:SF (match_dup 1) (parallel [(const_int 1)])))
2421 (plusminus:SF
2422 (vec_select:SF (match_dup 1) (parallel [(const_int 2)]))
2423 (vec_select:SF (match_dup 1) (parallel [(const_int 3)]))))
2424 (vec_concat:V2SF
2425 (plusminus:SF
2426 (vec_select:SF
2427 (match_operand:V4SF 2 "nonimmediate_operand" "xm,xm")
2428 (parallel [(const_int 0)]))
2429 (vec_select:SF (match_dup 2) (parallel [(const_int 1)])))
2430 (plusminus:SF
2431 (vec_select:SF (match_dup 2) (parallel [(const_int 2)]))
2432 (vec_select:SF (match_dup 2) (parallel [(const_int 3)]))))))]
2433 "TARGET_SSE3"
2434 "@
2435 h<plusminus_mnemonic>ps\t{%2, %0|%0, %2}
2436 vh<plusminus_mnemonic>ps\t{%2, %1, %0|%0, %1, %2}"
2437 [(set_attr "isa" "noavx,avx")
2438 (set_attr "type" "sseadd")
2439 (set_attr "atom_unit" "complex")
2440 (set_attr "prefix" "orig,vex")
2441 (set_attr "prefix_rep" "1,*")
2442 (set_attr "mode" "V4SF")])
2443
2444 (define_expand "reduc_plus_scal_v8df"
2445 [(match_operand:DF 0 "register_operand")
2446 (match_operand:V8DF 1 "register_operand")]
2447 "TARGET_AVX512F"
2448 {
2449 rtx tmp = gen_reg_rtx (V8DFmode);
2450 ix86_expand_reduc (gen_addv8df3, tmp, operands[1]);
2451 emit_insn (gen_vec_extractv8df (operands[0], tmp, const0_rtx));
2452 DONE;
2453 })
2454
2455 (define_expand "reduc_plus_scal_v4df"
2456 [(match_operand:DF 0 "register_operand")
2457 (match_operand:V4DF 1 "register_operand")]
2458 "TARGET_AVX"
2459 {
2460 rtx tmp = gen_reg_rtx (V4DFmode);
2461 rtx tmp2 = gen_reg_rtx (V4DFmode);
2462 rtx vec_res = gen_reg_rtx (V4DFmode);
2463 emit_insn (gen_avx_haddv4df3 (tmp, operands[1], operands[1]));
2464 emit_insn (gen_avx_vperm2f128v4df3 (tmp2, tmp, tmp, GEN_INT (1)));
2465 emit_insn (gen_addv4df3 (vec_res, tmp, tmp2));
2466 emit_insn (gen_vec_extractv4df (operands[0], vec_res, const0_rtx));
2467 DONE;
2468 })
2469
2470 (define_expand "reduc_plus_scal_v2df"
2471 [(match_operand:DF 0 "register_operand")
2472 (match_operand:V2DF 1 "register_operand")]
2473 "TARGET_SSE3"
2474 {
2475 rtx tmp = gen_reg_rtx (V2DFmode);
2476 emit_insn (gen_sse3_haddv2df3 (tmp, operands[1], operands[1]));
2477 emit_insn (gen_vec_extractv2df (operands[0], tmp, const0_rtx));
2478 DONE;
2479 })
2480
2481 (define_expand "reduc_plus_scal_v16sf"
2482 [(match_operand:SF 0 "register_operand")
2483 (match_operand:V16SF 1 "register_operand")]
2484 "TARGET_AVX512F"
2485 {
2486 rtx tmp = gen_reg_rtx (V16SFmode);
2487 ix86_expand_reduc (gen_addv16sf3, tmp, operands[1]);
2488 emit_insn (gen_vec_extractv16sf (operands[0], tmp, const0_rtx));
2489 DONE;
2490 })
2491
2492 (define_expand "reduc_plus_scal_v8sf"
2493 [(match_operand:SF 0 "register_operand")
2494 (match_operand:V8SF 1 "register_operand")]
2495 "TARGET_AVX"
2496 {
2497 rtx tmp = gen_reg_rtx (V8SFmode);
2498 rtx tmp2 = gen_reg_rtx (V8SFmode);
2499 rtx vec_res = gen_reg_rtx (V8SFmode);
2500 emit_insn (gen_avx_haddv8sf3 (tmp, operands[1], operands[1]));
2501 emit_insn (gen_avx_haddv8sf3 (tmp2, tmp, tmp));
2502 emit_insn (gen_avx_vperm2f128v8sf3 (tmp, tmp2, tmp2, GEN_INT (1)));
2503 emit_insn (gen_addv8sf3 (vec_res, tmp, tmp2));
2504 emit_insn (gen_vec_extractv8sf (operands[0], vec_res, const0_rtx));
2505 DONE;
2506 })
2507
2508 (define_expand "reduc_plus_scal_v4sf"
2509 [(match_operand:SF 0 "register_operand")
2510 (match_operand:V4SF 1 "register_operand")]
2511 "TARGET_SSE"
2512 {
2513 rtx vec_res = gen_reg_rtx (V4SFmode);
2514 if (TARGET_SSE3)
2515 {
2516 rtx tmp = gen_reg_rtx (V4SFmode);
2517 emit_insn (gen_sse3_haddv4sf3 (tmp, operands[1], operands[1]));
2518 emit_insn (gen_sse3_haddv4sf3 (vec_res, tmp, tmp));
2519 }
2520 else
2521 ix86_expand_reduc (gen_addv4sf3, vec_res, operands[1]);
2522 emit_insn (gen_vec_extractv4sf (operands[0], vec_res, const0_rtx));
2523 DONE;
2524 })
2525
2526 ;; Modes handled by reduc_sm{in,ax}* patterns.
2527 (define_mode_iterator REDUC_SMINMAX_MODE
2528 [(V32QI "TARGET_AVX2") (V16HI "TARGET_AVX2")
2529 (V8SI "TARGET_AVX2") (V4DI "TARGET_AVX2")
2530 (V8SF "TARGET_AVX") (V4DF "TARGET_AVX")
2531 (V4SF "TARGET_SSE") (V64QI "TARGET_AVX512BW")
2532 (V32HI "TARGET_AVX512BW") (V16SI "TARGET_AVX512F")
2533 (V8DI "TARGET_AVX512F") (V16SF "TARGET_AVX512F")
2534 (V8DF "TARGET_AVX512F")])
2535
2536 (define_expand "reduc_<code>_scal_<mode>"
2537 [(smaxmin:REDUC_SMINMAX_MODE
2538 (match_operand:<ssescalarmode> 0 "register_operand")
2539 (match_operand:REDUC_SMINMAX_MODE 1 "register_operand"))]
2540 ""
2541 {
2542 rtx tmp = gen_reg_rtx (<MODE>mode);
2543 ix86_expand_reduc (gen_<code><mode>3, tmp, operands[1]);
2544 emit_insn (gen_vec_extract<mode> (operands[0], tmp, const0_rtx));
2545 DONE;
2546 })
2547
2548 (define_expand "reduc_<code>_scal_<mode>"
2549 [(umaxmin:VI_AVX512BW
2550 (match_operand:<ssescalarmode> 0 "register_operand")
2551 (match_operand:VI_AVX512BW 1 "register_operand"))]
2552 "TARGET_AVX512F"
2553 {
2554 rtx tmp = gen_reg_rtx (<MODE>mode);
2555 ix86_expand_reduc (gen_<code><mode>3, tmp, operands[1]);
2556 emit_insn (gen_vec_extract<mode> (operands[0], tmp, const0_rtx));
2557 DONE;
2558 })
2559
2560 (define_expand "reduc_<code>_scal_<mode>"
2561 [(umaxmin:VI_256
2562 (match_operand:<ssescalarmode> 0 "register_operand")
2563 (match_operand:VI_256 1 "register_operand"))]
2564 "TARGET_AVX2"
2565 {
2566 rtx tmp = gen_reg_rtx (<MODE>mode);
2567 ix86_expand_reduc (gen_<code><mode>3, tmp, operands[1]);
2568 emit_insn (gen_vec_extract<mode> (operands[0], tmp, const0_rtx));
2569 DONE;
2570 })
2571
2572 (define_expand "reduc_umin_scal_v8hi"
2573 [(umin:V8HI
2574 (match_operand:HI 0 "register_operand")
2575 (match_operand:V8HI 1 "register_operand"))]
2576 "TARGET_SSE4_1"
2577 {
2578 rtx tmp = gen_reg_rtx (V8HImode);
2579 ix86_expand_reduc (gen_uminv8hi3, tmp, operands[1]);
2580 emit_insn (gen_vec_extractv8hi (operands[0], tmp, const0_rtx));
2581 DONE;
2582 })
2583
2584 (define_insn "<mask_codefor>reducep<mode><mask_name>"
2585 [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v")
2586 (unspec:VF_AVX512VL
2587 [(match_operand:VF_AVX512VL 1 "nonimmediate_operand" "vm")
2588 (match_operand:SI 2 "const_0_to_255_operand")]
2589 UNSPEC_REDUCE))]
2590 "TARGET_AVX512DQ"
2591 "vreduce<ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
2592 [(set_attr "type" "sse")
2593 (set_attr "prefix" "evex")
2594 (set_attr "mode" "<MODE>")])
2595
2596 (define_insn "reduces<mode>"
2597 [(set (match_operand:VF_128 0 "register_operand" "=v")
2598 (vec_merge:VF_128
2599 (unspec:VF_128
2600 [(match_operand:VF_128 1 "register_operand" "v")
2601 (match_operand:VF_128 2 "nonimmediate_operand" "vm")
2602 (match_operand:SI 3 "const_0_to_255_operand")]
2603 UNSPEC_REDUCE)
2604 (match_dup 1)
2605 (const_int 1)))]
2606 "TARGET_AVX512DQ"
2607 "vreduce<ssescalarmodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
2608 [(set_attr "type" "sse")
2609 (set_attr "prefix" "evex")
2610 (set_attr "mode" "<MODE>")])
2611
2612 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2613 ;;
2614 ;; Parallel floating point comparisons
2615 ;;
2616 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2617
2618 (define_insn "avx_cmp<mode>3"
2619 [(set (match_operand:VF_128_256 0 "register_operand" "=x")
2620 (unspec:VF_128_256
2621 [(match_operand:VF_128_256 1 "register_operand" "x")
2622 (match_operand:VF_128_256 2 "nonimmediate_operand" "xm")
2623 (match_operand:SI 3 "const_0_to_31_operand" "n")]
2624 UNSPEC_PCMP))]
2625 "TARGET_AVX"
2626 "vcmp<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
2627 [(set_attr "type" "ssecmp")
2628 (set_attr "length_immediate" "1")
2629 (set_attr "prefix" "vex")
2630 (set_attr "mode" "<MODE>")])
2631
2632 (define_insn "avx_vmcmp<mode>3"
2633 [(set (match_operand:VF_128 0 "register_operand" "=x")
2634 (vec_merge:VF_128
2635 (unspec:VF_128
2636 [(match_operand:VF_128 1 "register_operand" "x")
2637 (match_operand:VF_128 2 "nonimmediate_operand" "xm")
2638 (match_operand:SI 3 "const_0_to_31_operand" "n")]
2639 UNSPEC_PCMP)
2640 (match_dup 1)
2641 (const_int 1)))]
2642 "TARGET_AVX"
2643 "vcmp<ssescalarmodesuffix>\t{%3, %2, %1, %0|%0, %1, %<iptr>2, %3}"
2644 [(set_attr "type" "ssecmp")
2645 (set_attr "length_immediate" "1")
2646 (set_attr "prefix" "vex")
2647 (set_attr "mode" "<ssescalarmode>")])
2648
2649 (define_insn "*<sse>_maskcmp<mode>3_comm"
2650 [(set (match_operand:VF_128_256 0 "register_operand" "=x,x")
2651 (match_operator:VF_128_256 3 "sse_comparison_operator"
2652 [(match_operand:VF_128_256 1 "register_operand" "%0,x")
2653 (match_operand:VF_128_256 2 "nonimmediate_operand" "xm,xm")]))]
2654 "TARGET_SSE
2655 && GET_RTX_CLASS (GET_CODE (operands[3])) == RTX_COMM_COMPARE"
2656 "@
2657 cmp%D3<ssemodesuffix>\t{%2, %0|%0, %2}
2658 vcmp%D3<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
2659 [(set_attr "isa" "noavx,avx")
2660 (set_attr "type" "ssecmp")
2661 (set_attr "length_immediate" "1")
2662 (set_attr "prefix" "orig,vex")
2663 (set_attr "mode" "<MODE>")])
2664
2665 (define_insn "<sse>_maskcmp<mode>3"
2666 [(set (match_operand:VF_128_256 0 "register_operand" "=x,x")
2667 (match_operator:VF_128_256 3 "sse_comparison_operator"
2668 [(match_operand:VF_128_256 1 "register_operand" "0,x")
2669 (match_operand:VF_128_256 2 "nonimmediate_operand" "xm,xm")]))]
2670 "TARGET_SSE"
2671 "@
2672 cmp%D3<ssemodesuffix>\t{%2, %0|%0, %2}
2673 vcmp%D3<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
2674 [(set_attr "isa" "noavx,avx")
2675 (set_attr "type" "ssecmp")
2676 (set_attr "length_immediate" "1")
2677 (set_attr "prefix" "orig,vex")
2678 (set_attr "mode" "<MODE>")])
2679
2680 (define_insn "<sse>_vmmaskcmp<mode>3"
2681 [(set (match_operand:VF_128 0 "register_operand" "=x,x")
2682 (vec_merge:VF_128
2683 (match_operator:VF_128 3 "sse_comparison_operator"
2684 [(match_operand:VF_128 1 "register_operand" "0,x")
2685 (match_operand:VF_128 2 "nonimmediate_operand" "xm,xm")])
2686 (match_dup 1)
2687 (const_int 1)))]
2688 "TARGET_SSE"
2689 "@
2690 cmp%D3<ssescalarmodesuffix>\t{%2, %0|%0, %<iptr>2}
2691 vcmp%D3<ssescalarmodesuffix>\t{%2, %1, %0|%0, %1, %<iptr>2}"
2692 [(set_attr "isa" "noavx,avx")
2693 (set_attr "type" "ssecmp")
2694 (set_attr "length_immediate" "1,*")
2695 (set_attr "prefix" "orig,vex")
2696 (set_attr "mode" "<ssescalarmode>")])
2697
2698 (define_mode_attr cmp_imm_predicate
2699 [(V16SF "const_0_to_31_operand") (V8DF "const_0_to_31_operand")
2700 (V16SI "const_0_to_7_operand") (V8DI "const_0_to_7_operand")
2701 (V8SF "const_0_to_31_operand") (V4DF "const_0_to_31_operand")
2702 (V8SI "const_0_to_7_operand") (V4DI "const_0_to_7_operand")
2703 (V4SF "const_0_to_31_operand") (V2DF "const_0_to_31_operand")
2704 (V4SI "const_0_to_7_operand") (V2DI "const_0_to_7_operand")
2705 (V32HI "const_0_to_7_operand") (V64QI "const_0_to_7_operand")
2706 (V16HI "const_0_to_7_operand") (V32QI "const_0_to_7_operand")
2707 (V8HI "const_0_to_7_operand") (V16QI "const_0_to_7_operand")])
2708
2709 (define_insn "<avx512>_cmp<mode>3<mask_scalar_merge_name><round_saeonly_name>"
2710 [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=Yk")
2711 (unspec:<avx512fmaskmode>
2712 [(match_operand:V48_AVX512VL 1 "register_operand" "v")
2713 (match_operand:V48_AVX512VL 2 "nonimmediate_operand" "<round_saeonly_constraint>")
2714 (match_operand:SI 3 "<cmp_imm_predicate>" "n")]
2715 UNSPEC_PCMP))]
2716 "TARGET_AVX512F && <round_saeonly_mode512bit_condition>"
2717 "v<sseintprefix>cmp<ssemodesuffix>\t{%3, <round_saeonly_mask_scalar_merge_op4>%2, %1, %0<mask_scalar_merge_operand4>|%0<mask_scalar_merge_operand4>, %1, %2<round_saeonly_mask_scalar_merge_op4>, %3}"
2718 [(set_attr "type" "ssecmp")
2719 (set_attr "length_immediate" "1")
2720 (set_attr "prefix" "evex")
2721 (set_attr "mode" "<sseinsnmode>")])
2722
2723 (define_insn "<avx512>_cmp<mode>3<mask_scalar_merge_name>"
2724 [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=Yk")
2725 (unspec:<avx512fmaskmode>
2726 [(match_operand:VI12_AVX512VL 1 "register_operand" "v")
2727 (match_operand:VI12_AVX512VL 2 "nonimmediate_operand" "vm")
2728 (match_operand:SI 3 "<cmp_imm_predicate>" "n")]
2729 UNSPEC_PCMP))]
2730 "TARGET_AVX512BW"
2731 "vpcmp<ssemodesuffix>\t{%3, %2, %1, %0<mask_scalar_merge_operand4>|%0<mask_scalar_merge_operand4>, %1, %2, %3}"
2732 [(set_attr "type" "ssecmp")
2733 (set_attr "length_immediate" "1")
2734 (set_attr "prefix" "evex")
2735 (set_attr "mode" "<sseinsnmode>")])
2736
2737 (define_insn "<avx512>_ucmp<mode>3<mask_scalar_merge_name>"
2738 [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=Yk")
2739 (unspec:<avx512fmaskmode>
2740 [(match_operand:VI12_AVX512VL 1 "register_operand" "v")
2741 (match_operand:VI12_AVX512VL 2 "nonimmediate_operand" "vm")
2742 (match_operand:SI 3 "const_0_to_7_operand" "n")]
2743 UNSPEC_UNSIGNED_PCMP))]
2744 "TARGET_AVX512BW"
2745 "vpcmpu<ssemodesuffix>\t{%3, %2, %1, %0<mask_scalar_merge_operand4>|%0<mask_scalar_merge_operand4>, %1, %2, %3}"
2746 [(set_attr "type" "ssecmp")
2747 (set_attr "length_immediate" "1")
2748 (set_attr "prefix" "evex")
2749 (set_attr "mode" "<sseinsnmode>")])
2750
2751 (define_insn "<avx512>_ucmp<mode>3<mask_scalar_merge_name>"
2752 [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=Yk")
2753 (unspec:<avx512fmaskmode>
2754 [(match_operand:VI48_AVX512VL 1 "register_operand" "v")
2755 (match_operand:VI48_AVX512VL 2 "nonimmediate_operand" "vm")
2756 (match_operand:SI 3 "const_0_to_7_operand" "n")]
2757 UNSPEC_UNSIGNED_PCMP))]
2758 "TARGET_AVX512F"
2759 "vpcmpu<ssemodesuffix>\t{%3, %2, %1, %0<mask_scalar_merge_operand4>|%0<mask_scalar_merge_operand4>, %1, %2, %3}"
2760 [(set_attr "type" "ssecmp")
2761 (set_attr "length_immediate" "1")
2762 (set_attr "prefix" "evex")
2763 (set_attr "mode" "<sseinsnmode>")])
2764
2765 (define_insn "avx512f_vmcmp<mode>3<round_saeonly_name>"
2766 [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=Yk")
2767 (and:<avx512fmaskmode>
2768 (unspec:<avx512fmaskmode>
2769 [(match_operand:VF_128 1 "register_operand" "v")
2770 (match_operand:VF_128 2 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")
2771 (match_operand:SI 3 "const_0_to_31_operand" "n")]
2772 UNSPEC_PCMP)
2773 (const_int 1)))]
2774 "TARGET_AVX512F"
2775 "vcmp<ssescalarmodesuffix>\t{%3, <round_saeonly_op4>%2, %1, %0|%0, %1, %2<round_saeonly_op4>, %3}"
2776 [(set_attr "type" "ssecmp")
2777 (set_attr "length_immediate" "1")
2778 (set_attr "prefix" "evex")
2779 (set_attr "mode" "<ssescalarmode>")])
2780
2781 (define_insn "avx512f_vmcmp<mode>3_mask<round_saeonly_name>"
2782 [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=Yk")
2783 (and:<avx512fmaskmode>
2784 (unspec:<avx512fmaskmode>
2785 [(match_operand:VF_128 1 "register_operand" "v")
2786 (match_operand:VF_128 2 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")
2787 (match_operand:SI 3 "const_0_to_31_operand" "n")]
2788 UNSPEC_PCMP)
2789 (and:<avx512fmaskmode>
2790 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")
2791 (const_int 1))))]
2792 "TARGET_AVX512F"
2793 "vcmp<ssescalarmodesuffix>\t{%3, <round_saeonly_op5>%2, %1, %0%{%4%}|%0%{%4%}, %1, %2<round_saeonly_op5>, %3}"
2794 [(set_attr "type" "ssecmp")
2795 (set_attr "length_immediate" "1")
2796 (set_attr "prefix" "evex")
2797 (set_attr "mode" "<ssescalarmode>")])
2798
2799 (define_insn "avx512f_maskcmp<mode>3"
2800 [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=Yk")
2801 (match_operator:<avx512fmaskmode> 3 "sse_comparison_operator"
2802 [(match_operand:VF 1 "register_operand" "v")
2803 (match_operand:VF 2 "nonimmediate_operand" "vm")]))]
2804 "TARGET_SSE"
2805 "vcmp%D3<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
2806 [(set_attr "type" "ssecmp")
2807 (set_attr "length_immediate" "1")
2808 (set_attr "prefix" "evex")
2809 (set_attr "mode" "<sseinsnmode>")])
2810
2811 (define_insn "<sse>_comi<round_saeonly_name>"
2812 [(set (reg:CCFP FLAGS_REG)
2813 (compare:CCFP
2814 (vec_select:MODEF
2815 (match_operand:<ssevecmode> 0 "register_operand" "v")
2816 (parallel [(const_int 0)]))
2817 (vec_select:MODEF
2818 (match_operand:<ssevecmode> 1 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")
2819 (parallel [(const_int 0)]))))]
2820 "SSE_FLOAT_MODE_P (<MODE>mode)"
2821 "%vcomi<ssemodesuffix>\t{<round_saeonly_op2>%1, %0|%0, %<iptr>1<round_saeonly_op2>}"
2822 [(set_attr "type" "ssecomi")
2823 (set_attr "prefix" "maybe_vex")
2824 (set_attr "prefix_rep" "0")
2825 (set (attr "prefix_data16")
2826 (if_then_else (eq_attr "mode" "DF")
2827 (const_string "1")
2828 (const_string "0")))
2829 (set_attr "mode" "<MODE>")])
2830
2831 (define_insn "<sse>_ucomi<round_saeonly_name>"
2832 [(set (reg:CCFPU FLAGS_REG)
2833 (compare:CCFPU
2834 (vec_select:MODEF
2835 (match_operand:<ssevecmode> 0 "register_operand" "v")
2836 (parallel [(const_int 0)]))
2837 (vec_select:MODEF
2838 (match_operand:<ssevecmode> 1 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")
2839 (parallel [(const_int 0)]))))]
2840 "SSE_FLOAT_MODE_P (<MODE>mode)"
2841 "%vucomi<ssemodesuffix>\t{<round_saeonly_op2>%1, %0|%0, %<iptr>1<round_saeonly_op2>}"
2842 [(set_attr "type" "ssecomi")
2843 (set_attr "prefix" "maybe_vex")
2844 (set_attr "prefix_rep" "0")
2845 (set (attr "prefix_data16")
2846 (if_then_else (eq_attr "mode" "DF")
2847 (const_string "1")
2848 (const_string "0")))
2849 (set_attr "mode" "<MODE>")])
2850
2851 (define_expand "vec_cmp<mode><avx512fmaskmodelower>"
2852 [(set (match_operand:<avx512fmaskmode> 0 "register_operand")
2853 (match_operator:<avx512fmaskmode> 1 ""
2854 [(match_operand:V48_AVX512VL 2 "register_operand")
2855 (match_operand:V48_AVX512VL 3 "nonimmediate_operand")]))]
2856 "TARGET_AVX512F"
2857 {
2858 bool ok = ix86_expand_mask_vec_cmp (operands);
2859 gcc_assert (ok);
2860 DONE;
2861 })
2862
2863 (define_expand "vec_cmp<mode><avx512fmaskmodelower>"
2864 [(set (match_operand:<avx512fmaskmode> 0 "register_operand")
2865 (match_operator:<avx512fmaskmode> 1 ""
2866 [(match_operand:VI12_AVX512VL 2 "register_operand")
2867 (match_operand:VI12_AVX512VL 3 "nonimmediate_operand")]))]
2868 "TARGET_AVX512BW"
2869 {
2870 bool ok = ix86_expand_mask_vec_cmp (operands);
2871 gcc_assert (ok);
2872 DONE;
2873 })
2874
2875 (define_expand "vec_cmp<mode><sseintvecmodelower>"
2876 [(set (match_operand:<sseintvecmode> 0 "register_operand")
2877 (match_operator:<sseintvecmode> 1 ""
2878 [(match_operand:VI_256 2 "register_operand")
2879 (match_operand:VI_256 3 "nonimmediate_operand")]))]
2880 "TARGET_AVX2"
2881 {
2882 bool ok = ix86_expand_int_vec_cmp (operands);
2883 gcc_assert (ok);
2884 DONE;
2885 })
2886
2887 (define_expand "vec_cmp<mode><sseintvecmodelower>"
2888 [(set (match_operand:<sseintvecmode> 0 "register_operand")
2889 (match_operator:<sseintvecmode> 1 ""
2890 [(match_operand:VI124_128 2 "register_operand")
2891 (match_operand:VI124_128 3 "nonimmediate_operand")]))]
2892 "TARGET_SSE2"
2893 {
2894 bool ok = ix86_expand_int_vec_cmp (operands);
2895 gcc_assert (ok);
2896 DONE;
2897 })
2898
2899 (define_expand "vec_cmpv2div2di"
2900 [(set (match_operand:V2DI 0 "register_operand")
2901 (match_operator:V2DI 1 ""
2902 [(match_operand:V2DI 2 "register_operand")
2903 (match_operand:V2DI 3 "nonimmediate_operand")]))]
2904 "TARGET_SSE4_2"
2905 {
2906 bool ok = ix86_expand_int_vec_cmp (operands);
2907 gcc_assert (ok);
2908 DONE;
2909 })
2910
2911 (define_expand "vec_cmp<mode><sseintvecmodelower>"
2912 [(set (match_operand:<sseintvecmode> 0 "register_operand")
2913 (match_operator:<sseintvecmode> 1 ""
2914 [(match_operand:VF_256 2 "register_operand")
2915 (match_operand:VF_256 3 "nonimmediate_operand")]))]
2916 "TARGET_AVX"
2917 {
2918 bool ok = ix86_expand_fp_vec_cmp (operands);
2919 gcc_assert (ok);
2920 DONE;
2921 })
2922
2923 (define_expand "vec_cmp<mode><sseintvecmodelower>"
2924 [(set (match_operand:<sseintvecmode> 0 "register_operand")
2925 (match_operator:<sseintvecmode> 1 ""
2926 [(match_operand:VF_128 2 "register_operand")
2927 (match_operand:VF_128 3 "nonimmediate_operand")]))]
2928 "TARGET_SSE"
2929 {
2930 bool ok = ix86_expand_fp_vec_cmp (operands);
2931 gcc_assert (ok);
2932 DONE;
2933 })
2934
2935 (define_expand "vec_cmpu<mode><avx512fmaskmodelower>"
2936 [(set (match_operand:<avx512fmaskmode> 0 "register_operand")
2937 (match_operator:<avx512fmaskmode> 1 ""
2938 [(match_operand:VI48_AVX512VL 2 "register_operand")
2939 (match_operand:VI48_AVX512VL 3 "nonimmediate_operand")]))]
2940 "TARGET_AVX512F"
2941 {
2942 bool ok = ix86_expand_mask_vec_cmp (operands);
2943 gcc_assert (ok);
2944 DONE;
2945 })
2946
2947 (define_expand "vec_cmpu<mode><avx512fmaskmodelower>"
2948 [(set (match_operand:<avx512fmaskmode> 0 "register_operand")
2949 (match_operator:<avx512fmaskmode> 1 ""
2950 [(match_operand:VI12_AVX512VL 2 "register_operand")
2951 (match_operand:VI12_AVX512VL 3 "nonimmediate_operand")]))]
2952 "TARGET_AVX512BW"
2953 {
2954 bool ok = ix86_expand_mask_vec_cmp (operands);
2955 gcc_assert (ok);
2956 DONE;
2957 })
2958
2959 (define_expand "vec_cmpu<mode><sseintvecmodelower>"
2960 [(set (match_operand:<sseintvecmode> 0 "register_operand")
2961 (match_operator:<sseintvecmode> 1 ""
2962 [(match_operand:VI_256 2 "register_operand")
2963 (match_operand:VI_256 3 "nonimmediate_operand")]))]
2964 "TARGET_AVX2"
2965 {
2966 bool ok = ix86_expand_int_vec_cmp (operands);
2967 gcc_assert (ok);
2968 DONE;
2969 })
2970
2971 (define_expand "vec_cmpu<mode><sseintvecmodelower>"
2972 [(set (match_operand:<sseintvecmode> 0 "register_operand")
2973 (match_operator:<sseintvecmode> 1 ""
2974 [(match_operand:VI124_128 2 "register_operand")
2975 (match_operand:VI124_128 3 "nonimmediate_operand")]))]
2976 "TARGET_SSE2"
2977 {
2978 bool ok = ix86_expand_int_vec_cmp (operands);
2979 gcc_assert (ok);
2980 DONE;
2981 })
2982
2983 (define_expand "vec_cmpuv2div2di"
2984 [(set (match_operand:V2DI 0 "register_operand")
2985 (match_operator:V2DI 1 ""
2986 [(match_operand:V2DI 2 "register_operand")
2987 (match_operand:V2DI 3 "nonimmediate_operand")]))]
2988 "TARGET_SSE4_2"
2989 {
2990 bool ok = ix86_expand_int_vec_cmp (operands);
2991 gcc_assert (ok);
2992 DONE;
2993 })
2994
2995 (define_expand "vcond<V_512:mode><VF_512:mode>"
2996 [(set (match_operand:V_512 0 "register_operand")
2997 (if_then_else:V_512
2998 (match_operator 3 ""
2999 [(match_operand:VF_512 4 "nonimmediate_operand")
3000 (match_operand:VF_512 5 "nonimmediate_operand")])
3001 (match_operand:V_512 1 "general_operand")
3002 (match_operand:V_512 2 "general_operand")))]
3003 "TARGET_AVX512F
3004 && (GET_MODE_NUNITS (<V_512:MODE>mode)
3005 == GET_MODE_NUNITS (<VF_512:MODE>mode))"
3006 {
3007 bool ok = ix86_expand_fp_vcond (operands);
3008 gcc_assert (ok);
3009 DONE;
3010 })
3011
3012 (define_expand "vcond<V_256:mode><VF_256:mode>"
3013 [(set (match_operand:V_256 0 "register_operand")
3014 (if_then_else:V_256
3015 (match_operator 3 ""
3016 [(match_operand:VF_256 4 "nonimmediate_operand")
3017 (match_operand:VF_256 5 "nonimmediate_operand")])
3018 (match_operand:V_256 1 "general_operand")
3019 (match_operand:V_256 2 "general_operand")))]
3020 "TARGET_AVX
3021 && (GET_MODE_NUNITS (<V_256:MODE>mode)
3022 == GET_MODE_NUNITS (<VF_256:MODE>mode))"
3023 {
3024 bool ok = ix86_expand_fp_vcond (operands);
3025 gcc_assert (ok);
3026 DONE;
3027 })
3028
3029 (define_expand "vcond<V_128:mode><VF_128:mode>"
3030 [(set (match_operand:V_128 0 "register_operand")
3031 (if_then_else:V_128
3032 (match_operator 3 ""
3033 [(match_operand:VF_128 4 "nonimmediate_operand")
3034 (match_operand:VF_128 5 "nonimmediate_operand")])
3035 (match_operand:V_128 1 "general_operand")
3036 (match_operand:V_128 2 "general_operand")))]
3037 "TARGET_SSE
3038 && (GET_MODE_NUNITS (<V_128:MODE>mode)
3039 == GET_MODE_NUNITS (<VF_128:MODE>mode))"
3040 {
3041 bool ok = ix86_expand_fp_vcond (operands);
3042 gcc_assert (ok);
3043 DONE;
3044 })
3045
3046 (define_expand "vcond_mask_<mode><avx512fmaskmodelower>"
3047 [(set (match_operand:V48_AVX512VL 0 "register_operand")
3048 (vec_merge:V48_AVX512VL
3049 (match_operand:V48_AVX512VL 1 "nonimmediate_operand")
3050 (match_operand:V48_AVX512VL 2 "vector_move_operand")
3051 (match_operand:<avx512fmaskmode> 3 "register_operand")))]
3052 "TARGET_AVX512F")
3053
3054 (define_expand "vcond_mask_<mode><avx512fmaskmodelower>"
3055 [(set (match_operand:VI12_AVX512VL 0 "register_operand")
3056 (vec_merge:VI12_AVX512VL
3057 (match_operand:VI12_AVX512VL 1 "nonimmediate_operand")
3058 (match_operand:VI12_AVX512VL 2 "vector_move_operand")
3059 (match_operand:<avx512fmaskmode> 3 "register_operand")))]
3060 "TARGET_AVX512BW")
3061
3062 (define_expand "vcond_mask_<mode><sseintvecmodelower>"
3063 [(set (match_operand:VI_256 0 "register_operand")
3064 (vec_merge:VI_256
3065 (match_operand:VI_256 1 "nonimmediate_operand")
3066 (match_operand:VI_256 2 "vector_move_operand")
3067 (match_operand:<sseintvecmode> 3 "register_operand")))]
3068 "TARGET_AVX2"
3069 {
3070 ix86_expand_sse_movcc (operands[0], operands[3],
3071 operands[1], operands[2]);
3072 DONE;
3073 })
3074
3075 (define_expand "vcond_mask_<mode><sseintvecmodelower>"
3076 [(set (match_operand:VI124_128 0 "register_operand")
3077 (vec_merge:VI124_128
3078 (match_operand:VI124_128 1 "nonimmediate_operand")
3079 (match_operand:VI124_128 2 "vector_move_operand")
3080 (match_operand:<sseintvecmode> 3 "register_operand")))]
3081 "TARGET_SSE2"
3082 {
3083 ix86_expand_sse_movcc (operands[0], operands[3],
3084 operands[1], operands[2]);
3085 DONE;
3086 })
3087
3088 (define_expand "vcond_mask_v2div2di"
3089 [(set (match_operand:V2DI 0 "register_operand")
3090 (vec_merge:V2DI
3091 (match_operand:V2DI 1 "nonimmediate_operand")
3092 (match_operand:V2DI 2 "vector_move_operand")
3093 (match_operand:V2DI 3 "register_operand")))]
3094 "TARGET_SSE4_2"
3095 {
3096 ix86_expand_sse_movcc (operands[0], operands[3],
3097 operands[1], operands[2]);
3098 DONE;
3099 })
3100
3101 (define_expand "vcond_mask_<mode><sseintvecmodelower>"
3102 [(set (match_operand:VF_256 0 "register_operand")
3103 (vec_merge:VF_256
3104 (match_operand:VF_256 1 "nonimmediate_operand")
3105 (match_operand:VF_256 2 "vector_move_operand")
3106 (match_operand:<sseintvecmode> 3 "register_operand")))]
3107 "TARGET_AVX"
3108 {
3109 ix86_expand_sse_movcc (operands[0], operands[3],
3110 operands[1], operands[2]);
3111 DONE;
3112 })
3113
3114 (define_expand "vcond_mask_<mode><sseintvecmodelower>"
3115 [(set (match_operand:VF_128 0 "register_operand")
3116 (vec_merge:VF_128
3117 (match_operand:VF_128 1 "nonimmediate_operand")
3118 (match_operand:VF_128 2 "vector_move_operand")
3119 (match_operand:<sseintvecmode> 3 "register_operand")))]
3120 "TARGET_SSE"
3121 {
3122 ix86_expand_sse_movcc (operands[0], operands[3],
3123 operands[1], operands[2]);
3124 DONE;
3125 })
3126
3127 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
3128 ;;
3129 ;; Parallel floating point logical operations
3130 ;;
3131 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
3132
3133 (define_insn "<sse>_andnot<mode>3<mask_name>"
3134 [(set (match_operand:VF_128_256 0 "register_operand" "=x,v")
3135 (and:VF_128_256
3136 (not:VF_128_256
3137 (match_operand:VF_128_256 1 "register_operand" "0,v"))
3138 (match_operand:VF_128_256 2 "nonimmediate_operand" "xm,vm")))]
3139 "TARGET_SSE && <mask_avx512vl_condition>"
3140 {
3141 static char buf[128];
3142 const char *ops;
3143 const char *suffix;
3144
3145 switch (get_attr_mode (insn))
3146 {
3147 case MODE_V8SF:
3148 case MODE_V4SF:
3149 suffix = "ps";
3150 break;
3151 default:
3152 suffix = "<ssemodesuffix>";
3153 }
3154
3155 switch (which_alternative)
3156 {
3157 case 0:
3158 ops = "andn%s\t{%%2, %%0|%%0, %%2}";
3159 break;
3160 case 1:
3161 ops = "vandn%s\t{%%2, %%1, %%0<mask_operand3_1>|%%0<mask_operand3_1>, %%1, %%2}";
3162 break;
3163 default:
3164 gcc_unreachable ();
3165 }
3166
3167 /* There is no vandnp[sd] in avx512f. Use vpandn[qd]. */
3168 if (<mask_applied> && !TARGET_AVX512DQ)
3169 {
3170 suffix = GET_MODE_INNER (<MODE>mode) == DFmode ? "q" : "d";
3171 ops = "vpandn%s\t{%%2, %%1, %%0<mask_operand3_1>|%%0<mask_operand3_1>, %%1, %%2}";
3172 }
3173
3174 snprintf (buf, sizeof (buf), ops, suffix);
3175 return buf;
3176 }
3177 [(set_attr "isa" "noavx,avx")
3178 (set_attr "type" "sselog")
3179 (set_attr "prefix" "orig,maybe_evex")
3180 (set (attr "mode")
3181 (cond [(and (match_test "<MODE_SIZE> == 16")
3182 (match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL"))
3183 (const_string "<ssePSmode>")
3184 (match_test "TARGET_AVX")
3185 (const_string "<MODE>")
3186 (match_test "optimize_function_for_size_p (cfun)")
3187 (const_string "V4SF")
3188 ]
3189 (const_string "<MODE>")))])
3190
3191
3192 (define_insn "<sse>_andnot<mode>3<mask_name>"
3193 [(set (match_operand:VF_512 0 "register_operand" "=v")
3194 (and:VF_512
3195 (not:VF_512
3196 (match_operand:VF_512 1 "register_operand" "v"))
3197 (match_operand:VF_512 2 "nonimmediate_operand" "vm")))]
3198 "TARGET_AVX512F"
3199 {
3200 static char buf[128];
3201 const char *ops;
3202 const char *suffix;
3203
3204 suffix = "<ssemodesuffix>";
3205 ops = "";
3206
3207 /* There is no vandnp[sd] in avx512f. Use vpandn[qd]. */
3208 if (!TARGET_AVX512DQ)
3209 {
3210 suffix = GET_MODE_INNER (<MODE>mode) == DFmode ? "q" : "d";
3211 ops = "p";
3212 }
3213
3214 snprintf (buf, sizeof (buf),
3215 "v%sandn%s\t{%%2, %%1, %%0<mask_operand3_1>|%%0<mask_operand3_1>, %%1, %%2}",
3216 ops, suffix);
3217 return buf;
3218 }
3219 [(set_attr "type" "sselog")
3220 (set_attr "prefix" "evex")
3221 (set_attr "mode" "<sseinsnmode>")])
3222
3223 (define_expand "<code><mode>3<mask_name>"
3224 [(set (match_operand:VF_128_256 0 "register_operand")
3225 (any_logic:VF_128_256
3226 (match_operand:VF_128_256 1 "nonimmediate_operand")
3227 (match_operand:VF_128_256 2 "nonimmediate_operand")))]
3228 "TARGET_SSE && <mask_avx512vl_condition>"
3229 "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
3230
3231 (define_expand "<code><mode>3<mask_name>"
3232 [(set (match_operand:VF_512 0 "register_operand")
3233 (any_logic:VF_512
3234 (match_operand:VF_512 1 "nonimmediate_operand")
3235 (match_operand:VF_512 2 "nonimmediate_operand")))]
3236 "TARGET_AVX512F"
3237 "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
3238
3239 (define_insn "*<code><mode>3<mask_name>"
3240 [(set (match_operand:VF_128_256 0 "register_operand" "=x,v")
3241 (any_logic:VF_128_256
3242 (match_operand:VF_128_256 1 "nonimmediate_operand" "%0,v")
3243 (match_operand:VF_128_256 2 "nonimmediate_operand" "xm,vm")))]
3244 "TARGET_SSE && <mask_avx512vl_condition>
3245 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
3246 {
3247 static char buf[128];
3248 const char *ops;
3249 const char *suffix;
3250
3251 switch (get_attr_mode (insn))
3252 {
3253 case MODE_V8SF:
3254 case MODE_V4SF:
3255 suffix = "ps";
3256 break;
3257 default:
3258 suffix = "<ssemodesuffix>";
3259 }
3260
3261 switch (which_alternative)
3262 {
3263 case 0:
3264 ops = "<logic>%s\t{%%2, %%0|%%0, %%2}";
3265 break;
3266 case 1:
3267 ops = "v<logic>%s\t{%%2, %%1, %%0<mask_operand3_1>|%%0<mask_operand3_1>, %%1, %%2}";
3268 break;
3269 default:
3270 gcc_unreachable ();
3271 }
3272
3273 /* There is no v<logic>p[sd] in avx512f. Use vp<logic>[dq]. */
3274 if (<mask_applied> && !TARGET_AVX512DQ)
3275 {
3276 suffix = GET_MODE_INNER (<MODE>mode) == DFmode ? "q" : "d";
3277 ops = "vp<logic>%s\t{%%2, %%1, %%0<mask_operand3_1>|%%0<mask_operand3_1>, %%1, %%2}";
3278 }
3279
3280 snprintf (buf, sizeof (buf), ops, suffix);
3281 return buf;
3282 }
3283 [(set_attr "isa" "noavx,avx")
3284 (set_attr "type" "sselog")
3285 (set_attr "prefix" "orig,maybe_evex")
3286 (set (attr "mode")
3287 (cond [(and (match_test "<MODE_SIZE> == 16")
3288 (match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL"))
3289 (const_string "<ssePSmode>")
3290 (match_test "TARGET_AVX")
3291 (const_string "<MODE>")
3292 (match_test "optimize_function_for_size_p (cfun)")
3293 (const_string "V4SF")
3294 ]
3295 (const_string "<MODE>")))])
3296
3297 (define_insn "*<code><mode>3<mask_name>"
3298 [(set (match_operand:VF_512 0 "register_operand" "=v")
3299 (any_logic:VF_512
3300 (match_operand:VF_512 1 "nonimmediate_operand" "%v")
3301 (match_operand:VF_512 2 "nonimmediate_operand" "vm")))]
3302 "TARGET_AVX512F && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
3303 {
3304 static char buf[128];
3305 const char *ops;
3306 const char *suffix;
3307
3308 suffix = "<ssemodesuffix>";
3309 ops = "";
3310
3311 /* There is no v<logic>p[sd] in avx512f. Use vp<logic>[dq]. */
3312 if ((<MODE_SIZE> == 64 || <mask_applied>) && !TARGET_AVX512DQ)
3313 {
3314 suffix = GET_MODE_INNER (<MODE>mode) == DFmode ? "q" : "d";
3315 ops = "p";
3316 }
3317
3318 snprintf (buf, sizeof (buf),
3319 "v%s<logic>%s\t{%%2, %%1, %%0<mask_operand3_1>|%%0<mask_operand3_1>, %%1, %%2}",
3320 ops, suffix);
3321 return buf;
3322 }
3323 [(set_attr "type" "sselog")
3324 (set_attr "prefix" "evex")
3325 (set_attr "mode" "<sseinsnmode>")])
3326
3327 (define_expand "copysign<mode>3"
3328 [(set (match_dup 4)
3329 (and:VF
3330 (not:VF (match_dup 3))
3331 (match_operand:VF 1 "nonimmediate_operand")))
3332 (set (match_dup 5)
3333 (and:VF (match_dup 3)
3334 (match_operand:VF 2 "nonimmediate_operand")))
3335 (set (match_operand:VF 0 "register_operand")
3336 (ior:VF (match_dup 4) (match_dup 5)))]
3337 "TARGET_SSE"
3338 {
3339 operands[3] = ix86_build_signbit_mask (<MODE>mode, 1, 0);
3340
3341 operands[4] = gen_reg_rtx (<MODE>mode);
3342 operands[5] = gen_reg_rtx (<MODE>mode);
3343 })
3344
3345 ;; Also define scalar versions. These are used for abs, neg, and
3346 ;; conditional move. Using subregs into vector modes causes register
3347 ;; allocation lossage. These patterns do not allow memory operands
3348 ;; because the native instructions read the full 128-bits.
3349
3350 (define_insn "*andnot<mode>3"
3351 [(set (match_operand:MODEF 0 "register_operand" "=x,x")
3352 (and:MODEF
3353 (not:MODEF
3354 (match_operand:MODEF 1 "register_operand" "0,x"))
3355 (match_operand:MODEF 2 "register_operand" "x,x")))]
3356 "SSE_FLOAT_MODE_P (<MODE>mode)"
3357 {
3358 static char buf[32];
3359 const char *ops;
3360 const char *suffix
3361 = (get_attr_mode (insn) == MODE_V4SF) ? "ps" : "<ssevecmodesuffix>";
3362
3363 switch (which_alternative)
3364 {
3365 case 0:
3366 ops = "andn%s\t{%%2, %%0|%%0, %%2}";
3367 break;
3368 case 1:
3369 ops = "vandn%s\t{%%2, %%1, %%0|%%0, %%1, %%2}";
3370 break;
3371 default:
3372 gcc_unreachable ();
3373 }
3374
3375 snprintf (buf, sizeof (buf), ops, suffix);
3376 return buf;
3377 }
3378 [(set_attr "isa" "noavx,avx")
3379 (set_attr "type" "sselog")
3380 (set_attr "prefix" "orig,vex")
3381 (set (attr "mode")
3382 (cond [(and (match_test "<MODE_SIZE> == 16")
3383 (match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL"))
3384 (const_string "V4SF")
3385 (match_test "TARGET_AVX")
3386 (const_string "<ssevecmode>")
3387 (match_test "optimize_function_for_size_p (cfun)")
3388 (const_string "V4SF")
3389 ]
3390 (const_string "<ssevecmode>")))])
3391
3392 (define_insn "*andnottf3"
3393 [(set (match_operand:TF 0 "register_operand" "=x,x")
3394 (and:TF
3395 (not:TF (match_operand:TF 1 "register_operand" "0,x"))
3396 (match_operand:TF 2 "nonimmediate_operand" "xm,xm")))]
3397 "TARGET_SSE"
3398 {
3399 static char buf[32];
3400 const char *ops;
3401 const char *tmp
3402 = (get_attr_mode (insn) == MODE_V4SF) ? "andnps" : "pandn";
3403
3404 switch (which_alternative)
3405 {
3406 case 0:
3407 ops = "%s\t{%%2, %%0|%%0, %%2}";
3408 break;
3409 case 1:
3410 ops = "v%s\t{%%2, %%1, %%0|%%0, %%1, %%2}";
3411 break;
3412 default:
3413 gcc_unreachable ();
3414 }
3415
3416 snprintf (buf, sizeof (buf), ops, tmp);
3417 return buf;
3418 }
3419 [(set_attr "isa" "noavx,avx")
3420 (set_attr "type" "sselog")
3421 (set (attr "prefix_data16")
3422 (if_then_else
3423 (and (eq_attr "alternative" "0")
3424 (eq_attr "mode" "TI"))
3425 (const_string "1")
3426 (const_string "*")))
3427 (set_attr "prefix" "orig,vex")
3428 (set (attr "mode")
3429 (cond [(match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL")
3430 (const_string "V4SF")
3431 (match_test "TARGET_AVX")
3432 (const_string "TI")
3433 (ior (not (match_test "TARGET_SSE2"))
3434 (match_test "optimize_function_for_size_p (cfun)"))
3435 (const_string "V4SF")
3436 ]
3437 (const_string "TI")))])
3438
3439 (define_insn "*<code><mode>3"
3440 [(set (match_operand:MODEF 0 "register_operand" "=x,x")
3441 (any_logic:MODEF
3442 (match_operand:MODEF 1 "register_operand" "%0,x")
3443 (match_operand:MODEF 2 "register_operand" "x,x")))]
3444 "SSE_FLOAT_MODE_P (<MODE>mode)"
3445 {
3446 static char buf[32];
3447 const char *ops;
3448 const char *suffix
3449 = (get_attr_mode (insn) == MODE_V4SF) ? "ps" : "<ssevecmodesuffix>";
3450
3451 switch (which_alternative)
3452 {
3453 case 0:
3454 ops = "<logic>%s\t{%%2, %%0|%%0, %%2}";
3455 break;
3456 case 1:
3457 ops = "v<logic>%s\t{%%2, %%1, %%0|%%0, %%1, %%2}";
3458 break;
3459 default:
3460 gcc_unreachable ();
3461 }
3462
3463 snprintf (buf, sizeof (buf), ops, suffix);
3464 return buf;
3465 }
3466 [(set_attr "isa" "noavx,avx")
3467 (set_attr "type" "sselog")
3468 (set_attr "prefix" "orig,vex")
3469 (set (attr "mode")
3470 (cond [(and (match_test "<MODE_SIZE> == 16")
3471 (match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL"))
3472 (const_string "V4SF")
3473 (match_test "TARGET_AVX")
3474 (const_string "<ssevecmode>")
3475 (match_test "optimize_function_for_size_p (cfun)")
3476 (const_string "V4SF")
3477 ]
3478 (const_string "<ssevecmode>")))])
3479
3480 (define_expand "<code>tf3"
3481 [(set (match_operand:TF 0 "register_operand")
3482 (any_logic:TF
3483 (match_operand:TF 1 "nonimmediate_operand")
3484 (match_operand:TF 2 "nonimmediate_operand")))]
3485 "TARGET_SSE"
3486 "ix86_fixup_binary_operands_no_copy (<CODE>, TFmode, operands);")
3487
3488 (define_insn "*<code>tf3"
3489 [(set (match_operand:TF 0 "register_operand" "=x,x")
3490 (any_logic:TF
3491 (match_operand:TF 1 "nonimmediate_operand" "%0,x")
3492 (match_operand:TF 2 "nonimmediate_operand" "xm,xm")))]
3493 "TARGET_SSE
3494 && ix86_binary_operator_ok (<CODE>, TFmode, operands)"
3495 {
3496 static char buf[32];
3497 const char *ops;
3498 const char *tmp
3499 = (get_attr_mode (insn) == MODE_V4SF) ? "<logic>ps" : "p<logic>";
3500
3501 switch (which_alternative)
3502 {
3503 case 0:
3504 ops = "%s\t{%%2, %%0|%%0, %%2}";
3505 break;
3506 case 1:
3507 ops = "v%s\t{%%2, %%1, %%0|%%0, %%1, %%2}";
3508 break;
3509 default:
3510 gcc_unreachable ();
3511 }
3512
3513 snprintf (buf, sizeof (buf), ops, tmp);
3514 return buf;
3515 }
3516 [(set_attr "isa" "noavx,avx")
3517 (set_attr "type" "sselog")
3518 (set (attr "prefix_data16")
3519 (if_then_else
3520 (and (eq_attr "alternative" "0")
3521 (eq_attr "mode" "TI"))
3522 (const_string "1")
3523 (const_string "*")))
3524 (set_attr "prefix" "orig,vex")
3525 (set (attr "mode")
3526 (cond [(match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL")
3527 (const_string "V4SF")
3528 (match_test "TARGET_AVX")
3529 (const_string "TI")
3530 (ior (not (match_test "TARGET_SSE2"))
3531 (match_test "optimize_function_for_size_p (cfun)"))
3532 (const_string "V4SF")
3533 ]
3534 (const_string "TI")))])
3535
3536 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
3537 ;;
3538 ;; FMA floating point multiply/accumulate instructions. These include
3539 ;; scalar versions of the instructions as well as vector versions.
3540 ;;
3541 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
3542
3543 ;; The standard names for scalar FMA are only available with SSE math enabled.
3544 ;; CPUID bit AVX512F enables evex encoded scalar and 512-bit fma. It doesn't
3545 ;; care about FMA bit, so we enable fma for TARGET_AVX512F even when TARGET_FMA
3546 ;; and TARGET_FMA4 are both false.
3547 ;; TODO: In theory AVX512F does not automatically imply FMA, and without FMA
3548 ;; one must force the EVEX encoding of the fma insns. Ideally we'd improve
3549 ;; GAS to allow proper prefix selection. However, for the moment all hardware
3550 ;; that supports AVX512F also supports FMA so we can ignore this for now.
3551 (define_mode_iterator FMAMODEM
3552 [(SF "TARGET_SSE_MATH && (TARGET_FMA || TARGET_FMA4 || TARGET_AVX512F)")
3553 (DF "TARGET_SSE_MATH && (TARGET_FMA || TARGET_FMA4 || TARGET_AVX512F)")
3554 (V4SF "TARGET_FMA || TARGET_FMA4 || TARGET_AVX512VL")
3555 (V2DF "TARGET_FMA || TARGET_FMA4 || TARGET_AVX512VL")
3556 (V8SF "TARGET_FMA || TARGET_FMA4 || TARGET_AVX512VL")
3557 (V4DF "TARGET_FMA || TARGET_FMA4 || TARGET_AVX512VL")
3558 (V16SF "TARGET_AVX512F")
3559 (V8DF "TARGET_AVX512F")])
3560
3561 (define_expand "fma<mode>4"
3562 [(set (match_operand:FMAMODEM 0 "register_operand")
3563 (fma:FMAMODEM
3564 (match_operand:FMAMODEM 1 "nonimmediate_operand")
3565 (match_operand:FMAMODEM 2 "nonimmediate_operand")
3566 (match_operand:FMAMODEM 3 "nonimmediate_operand")))])
3567
3568 (define_expand "fms<mode>4"
3569 [(set (match_operand:FMAMODEM 0 "register_operand")
3570 (fma:FMAMODEM
3571 (match_operand:FMAMODEM 1 "nonimmediate_operand")
3572 (match_operand:FMAMODEM 2 "nonimmediate_operand")
3573 (neg:FMAMODEM (match_operand:FMAMODEM 3 "nonimmediate_operand"))))])
3574
3575 (define_expand "fnma<mode>4"
3576 [(set (match_operand:FMAMODEM 0 "register_operand")
3577 (fma:FMAMODEM
3578 (neg:FMAMODEM (match_operand:FMAMODEM 1 "nonimmediate_operand"))
3579 (match_operand:FMAMODEM 2 "nonimmediate_operand")
3580 (match_operand:FMAMODEM 3 "nonimmediate_operand")))])
3581
3582 (define_expand "fnms<mode>4"
3583 [(set (match_operand:FMAMODEM 0 "register_operand")
3584 (fma:FMAMODEM
3585 (neg:FMAMODEM (match_operand:FMAMODEM 1 "nonimmediate_operand"))
3586 (match_operand:FMAMODEM 2 "nonimmediate_operand")
3587 (neg:FMAMODEM (match_operand:FMAMODEM 3 "nonimmediate_operand"))))])
3588
3589 ;; The builtins for intrinsics are not constrained by SSE math enabled.
3590 (define_mode_iterator FMAMODE_AVX512
3591 [(SF "TARGET_FMA || TARGET_FMA4 || TARGET_AVX512F")
3592 (DF "TARGET_FMA || TARGET_FMA4 || TARGET_AVX512F")
3593 (V4SF "TARGET_FMA || TARGET_FMA4 || TARGET_AVX512VL")
3594 (V2DF "TARGET_FMA || TARGET_FMA4 || TARGET_AVX512VL")
3595 (V8SF "TARGET_FMA || TARGET_FMA4 || TARGET_AVX512VL")
3596 (V4DF "TARGET_FMA || TARGET_FMA4 || TARGET_AVX512VL")
3597 (V16SF "TARGET_AVX512F")
3598 (V8DF "TARGET_AVX512F")])
3599
3600 (define_mode_iterator FMAMODE
3601 [SF DF V4SF V2DF V8SF V4DF])
3602
3603 (define_expand "fma4i_fmadd_<mode>"
3604 [(set (match_operand:FMAMODE_AVX512 0 "register_operand")
3605 (fma:FMAMODE_AVX512
3606 (match_operand:FMAMODE_AVX512 1 "nonimmediate_operand")
3607 (match_operand:FMAMODE_AVX512 2 "nonimmediate_operand")
3608 (match_operand:FMAMODE_AVX512 3 "nonimmediate_operand")))])
3609
3610 (define_expand "<avx512>_fmadd_<mode>_maskz<round_expand_name>"
3611 [(match_operand:VF_AVX512VL 0 "register_operand")
3612 (match_operand:VF_AVX512VL 1 "<round_expand_nimm_predicate>")
3613 (match_operand:VF_AVX512VL 2 "<round_expand_nimm_predicate>")
3614 (match_operand:VF_AVX512VL 3 "<round_expand_nimm_predicate>")
3615 (match_operand:<avx512fmaskmode> 4 "register_operand")]
3616 "TARGET_AVX512F && <round_mode512bit_condition>"
3617 {
3618 emit_insn (gen_fma_fmadd_<mode>_maskz_1<round_expand_name> (
3619 operands[0], operands[1], operands[2], operands[3],
3620 CONST0_RTX (<MODE>mode), operands[4]<round_expand_operand>));
3621 DONE;
3622 })
3623
3624 (define_insn "*fma_fmadd_<mode>"
3625 [(set (match_operand:FMAMODE 0 "register_operand" "=v,v,v,x,x")
3626 (fma:FMAMODE
3627 (match_operand:FMAMODE 1 "nonimmediate_operand" "%0,0,v,x,x")
3628 (match_operand:FMAMODE 2 "nonimmediate_operand" "vm,v,vm,x,m")
3629 (match_operand:FMAMODE 3 "nonimmediate_operand" "v,vm,0,xm,x")))]
3630 "TARGET_FMA || TARGET_FMA4"
3631 "@
3632 vfmadd132<ssemodesuffix>\t{%2, %3, %0|%0, %3, %2}
3633 vfmadd213<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
3634 vfmadd231<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}
3635 vfmadd<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}
3636 vfmadd<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
3637 [(set_attr "isa" "fma,fma,fma,fma4,fma4")
3638 (set_attr "type" "ssemuladd")
3639 (set_attr "mode" "<MODE>")])
3640
3641 ;; Suppose AVX-512F as baseline
3642 (define_mode_iterator VF_SF_AVX512VL
3643 [SF V16SF (V8SF "TARGET_AVX512VL") (V4SF "TARGET_AVX512VL")
3644 DF V8DF (V4DF "TARGET_AVX512VL") (V2DF "TARGET_AVX512VL")])
3645
3646 (define_insn "<sd_mask_codefor>fma_fmadd_<mode><sd_maskz_name><round_name>"
3647 [(set (match_operand:VF_SF_AVX512VL 0 "register_operand" "=v,v,v")
3648 (fma:VF_SF_AVX512VL
3649 (match_operand:VF_SF_AVX512VL 1 "<round_nimm_predicate>" "%0,0,v")
3650 (match_operand:VF_SF_AVX512VL 2 "<round_nimm_predicate>" "<round_constraint>,v,<round_constraint>")
3651 (match_operand:VF_SF_AVX512VL 3 "<round_nimm_predicate>" "v,<round_constraint>,0")))]
3652 "TARGET_AVX512F && <sd_mask_mode512bit_condition> && <round_mode512bit_condition>"
3653 "@
3654 vfmadd132<ssemodesuffix>\t{<round_sd_mask_op4>%2, %3, %0<sd_mask_op4>|%0<sd_mask_op4>, %3, %2<round_sd_mask_op4>}
3655 vfmadd213<ssemodesuffix>\t{<round_sd_mask_op4>%3, %2, %0<sd_mask_op4>|%0<sd_mask_op4>, %2, %3<round_sd_mask_op4>}
3656 vfmadd231<ssemodesuffix>\t{<round_sd_mask_op4>%2, %1, %0<sd_mask_op4>|%0<sd_mask_op4>, %1, %2<round_sd_mask_op4>}"
3657 [(set_attr "type" "ssemuladd")
3658 (set_attr "mode" "<MODE>")])
3659
3660 (define_insn "<avx512>_fmadd_<mode>_mask<round_name>"
3661 [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v,v")
3662 (vec_merge:VF_AVX512VL
3663 (fma:VF_AVX512VL
3664 (match_operand:VF_AVX512VL 1 "register_operand" "0,0")
3665 (match_operand:VF_AVX512VL 2 "nonimmediate_operand" "<round_constraint>,v")
3666 (match_operand:VF_AVX512VL 3 "nonimmediate_operand" "v,<round_constraint>"))
3667 (match_dup 1)
3668 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk,Yk")))]
3669 "TARGET_AVX512F && <round_mode512bit_condition>"
3670 "@
3671 vfmadd132<ssemodesuffix>\t{<round_op5>%2, %3, %0%{%4%}|%0%{%4%}, %3, %2<round_op5>}
3672 vfmadd213<ssemodesuffix>\t{<round_op5>%3, %2, %0%{%4%}|%0%{%4%}, %2, %3<round_op5>}"
3673 [(set_attr "isa" "fma_avx512f,fma_avx512f")
3674 (set_attr "type" "ssemuladd")
3675 (set_attr "mode" "<MODE>")])
3676
3677 (define_insn "<avx512>_fmadd_<mode>_mask3<round_name>"
3678 [(set (match_operand:VF_AVX512VL 0 "register_operand" "=x")
3679 (vec_merge:VF_AVX512VL
3680 (fma:VF_AVX512VL
3681 (match_operand:VF_AVX512VL 1 "register_operand" "x")
3682 (match_operand:VF_AVX512VL 2 "nonimmediate_operand" "<round_constraint>")
3683 (match_operand:VF_AVX512VL 3 "register_operand" "0"))
3684 (match_dup 3)
3685 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")))]
3686 "TARGET_AVX512F"
3687 "vfmadd231<ssemodesuffix>\t{<round_op5>%2, %1, %0%{%4%}|%0%{%4%}, %1, %2<round_op5>}"
3688 [(set_attr "isa" "fma_avx512f")
3689 (set_attr "type" "ssemuladd")
3690 (set_attr "mode" "<MODE>")])
3691
3692 (define_insn "*fma_fmsub_<mode>"
3693 [(set (match_operand:FMAMODE 0 "register_operand" "=v,v,v,x,x")
3694 (fma:FMAMODE
3695 (match_operand:FMAMODE 1 "nonimmediate_operand" "%0,0,v,x,x")
3696 (match_operand:FMAMODE 2 "nonimmediate_operand" "vm,v,vm,x,m")
3697 (neg:FMAMODE
3698 (match_operand:FMAMODE 3 "nonimmediate_operand" "v,vm,0,xm,x"))))]
3699 "TARGET_FMA || TARGET_FMA4"
3700 "@
3701 vfmsub132<ssemodesuffix>\t{%2, %3, %0|%0, %3, %2}
3702 vfmsub213<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
3703 vfmsub231<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}
3704 vfmsub<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}
3705 vfmsub<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
3706 [(set_attr "isa" "fma,fma,fma,fma4,fma4")
3707 (set_attr "type" "ssemuladd")
3708 (set_attr "mode" "<MODE>")])
3709
3710 (define_insn "<sd_mask_codefor>fma_fmsub_<mode><sd_maskz_name><round_name>"
3711 [(set (match_operand:VF_SF_AVX512VL 0 "register_operand" "=v,v,v")
3712 (fma:VF_SF_AVX512VL
3713 (match_operand:VF_SF_AVX512VL 1 "<round_nimm_predicate>" "%0,0,v")
3714 (match_operand:VF_SF_AVX512VL 2 "<round_nimm_predicate>" "<round_constraint>,v,<round_constraint>")
3715 (neg:VF_SF_AVX512VL
3716 (match_operand:VF_SF_AVX512VL 3 "<round_nimm_predicate>" "v,<round_constraint>,0"))))]
3717 "TARGET_AVX512F && <sd_mask_mode512bit_condition> && <round_mode512bit_condition>"
3718 "@
3719 vfmsub132<ssemodesuffix>\t{<round_sd_mask_op4>%2, %3, %0<sd_mask_op4>|%0<sd_mask_op4>, %3, %2<round_sd_mask_op4>}
3720 vfmsub213<ssemodesuffix>\t{<round_sd_mask_op4>%3, %2, %0<sd_mask_op4>|%0<sd_mask_op4>, %2, %3<round_sd_mask_op4>}
3721 vfmsub231<ssemodesuffix>\t{<round_sd_mask_op4>%2, %1, %0<sd_mask_op4>|%0<sd_mask_op4>, %1, %2<round_sd_mask_op4>}"
3722 [(set_attr "type" "ssemuladd")
3723 (set_attr "mode" "<MODE>")])
3724
3725 (define_insn "<avx512>_fmsub_<mode>_mask<round_name>"
3726 [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v,v")
3727 (vec_merge:VF_AVX512VL
3728 (fma:VF_AVX512VL
3729 (match_operand:VF_AVX512VL 1 "register_operand" "0,0")
3730 (match_operand:VF_AVX512VL 2 "nonimmediate_operand" "<round_constraint>,v")
3731 (neg:VF_AVX512VL
3732 (match_operand:VF_AVX512VL 3 "nonimmediate_operand" "v,<round_constraint>")))
3733 (match_dup 1)
3734 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk,Yk")))]
3735 "TARGET_AVX512F"
3736 "@
3737 vfmsub132<ssemodesuffix>\t{<round_op5>%2, %3, %0%{%4%}|%0%{%4%}, %3, %2<round_op5>}
3738 vfmsub213<ssemodesuffix>\t{<round_op5>%3, %2, %0%{%4%}|%0%{%4%}, %2, %3<round_op5>}"
3739 [(set_attr "isa" "fma_avx512f,fma_avx512f")
3740 (set_attr "type" "ssemuladd")
3741 (set_attr "mode" "<MODE>")])
3742
3743 (define_insn "<avx512>_fmsub_<mode>_mask3<round_name>"
3744 [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v")
3745 (vec_merge:VF_AVX512VL
3746 (fma:VF_AVX512VL
3747 (match_operand:VF_AVX512VL 1 "register_operand" "v")
3748 (match_operand:VF_AVX512VL 2 "nonimmediate_operand" "<round_constraint>")
3749 (neg:VF_AVX512VL
3750 (match_operand:VF_AVX512VL 3 "register_operand" "0")))
3751 (match_dup 3)
3752 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")))]
3753 "TARGET_AVX512F && <round_mode512bit_condition>"
3754 "vfmsub231<ssemodesuffix>\t{<round_op5>%2, %1, %0%{%4%}|%0%{%4%}, %1, %2<round_op5>}"
3755 [(set_attr "isa" "fma_avx512f")
3756 (set_attr "type" "ssemuladd")
3757 (set_attr "mode" "<MODE>")])
3758
3759 (define_insn "*fma_fnmadd_<mode>"
3760 [(set (match_operand:FMAMODE 0 "register_operand" "=v,v,v,x,x")
3761 (fma:FMAMODE
3762 (neg:FMAMODE
3763 (match_operand:FMAMODE 1 "nonimmediate_operand" "%0,0,v,x,x"))
3764 (match_operand:FMAMODE 2 "nonimmediate_operand" "vm,v,vm,x,m")
3765 (match_operand:FMAMODE 3 "nonimmediate_operand" "v,vm,0,xm,x")))]
3766 "TARGET_FMA || TARGET_FMA4"
3767 "@
3768 vfnmadd132<ssemodesuffix>\t{%2, %3, %0|%0, %3, %2}
3769 vfnmadd213<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
3770 vfnmadd231<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}
3771 vfnmadd<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}
3772 vfnmadd<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
3773 [(set_attr "isa" "fma,fma,fma,fma4,fma4")
3774 (set_attr "type" "ssemuladd")
3775 (set_attr "mode" "<MODE>")])
3776
3777 (define_insn "<sd_mask_codefor>fma_fnmadd_<mode><sd_maskz_name><round_name>"
3778 [(set (match_operand:VF_SF_AVX512VL 0 "register_operand" "=v,v,v")
3779 (fma:VF_SF_AVX512VL
3780 (neg:VF_SF_AVX512VL
3781 (match_operand:VF_SF_AVX512VL 1 "<round_nimm_predicate>" "%0,0,v"))
3782 (match_operand:VF_SF_AVX512VL 2 "<round_nimm_predicate>" "<round_constraint>,v,<round_constraint>")
3783 (match_operand:VF_SF_AVX512VL 3 "<round_nimm_predicate>" "v,<round_constraint>,0")))]
3784 "TARGET_AVX512F && <sd_mask_mode512bit_condition> && <round_mode512bit_condition>"
3785 "@
3786 vfnmadd132<ssemodesuffix>\t{<round_sd_mask_op4>%2, %3, %0<sd_mask_op4>|%0<sd_mask_op4>, %3, %2<round_sd_mask_op4>}
3787 vfnmadd213<ssemodesuffix>\t{<round_sd_mask_op4>%3, %2, %0<sd_mask_op4>|%0<sd_mask_op4>, %2, %3<round_sd_mask_op4>}
3788 vfnmadd231<ssemodesuffix>\t{<round_sd_mask_op4>%2, %1, %0<sd_mask_op4>|%0<sd_mask_op4>, %1, %2<round_sd_mask_op4>}"
3789 [(set_attr "type" "ssemuladd")
3790 (set_attr "mode" "<MODE>")])
3791
3792 (define_insn "<avx512>_fnmadd_<mode>_mask<round_name>"
3793 [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v,v")
3794 (vec_merge:VF_AVX512VL
3795 (fma:VF_AVX512VL
3796 (neg:VF_AVX512VL
3797 (match_operand:VF_AVX512VL 1 "register_operand" "0,0"))
3798 (match_operand:VF_AVX512VL 2 "nonimmediate_operand" "<round_constraint>,v")
3799 (match_operand:VF_AVX512VL 3 "nonimmediate_operand" "v,<round_constraint>"))
3800 (match_dup 1)
3801 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk,Yk")))]
3802 "TARGET_AVX512F && <round_mode512bit_condition>"
3803 "@
3804 vfnmadd132<ssemodesuffix>\t{<round_op5>%2, %3, %0%{%4%}|%0%{%4%}, %3, %2<round_op5>}
3805 vfnmadd213<ssemodesuffix>\t{<round_op5>%3, %2, %0%{%4%}|%0%{%4%}, %2, %3<round_op5>}"
3806 [(set_attr "isa" "fma_avx512f,fma_avx512f")
3807 (set_attr "type" "ssemuladd")
3808 (set_attr "mode" "<MODE>")])
3809
3810 (define_insn "<avx512>_fnmadd_<mode>_mask3<round_name>"
3811 [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v")
3812 (vec_merge:VF_AVX512VL
3813 (fma:VF_AVX512VL
3814 (neg:VF_AVX512VL
3815 (match_operand:VF_AVX512VL 1 "register_operand" "v"))
3816 (match_operand:VF_AVX512VL 2 "nonimmediate_operand" "<round_constraint>")
3817 (match_operand:VF_AVX512VL 3 "register_operand" "0"))
3818 (match_dup 3)
3819 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")))]
3820 "TARGET_AVX512F && <round_mode512bit_condition>"
3821 "vfnmadd231<ssemodesuffix>\t{<round_op5>%2, %1, %0%{%4%}|%0%{%4%}, %1, %2<round_op5>}"
3822 [(set_attr "isa" "fma_avx512f")
3823 (set_attr "type" "ssemuladd")
3824 (set_attr "mode" "<MODE>")])
3825
3826 (define_insn "*fma_fnmsub_<mode>"
3827 [(set (match_operand:FMAMODE 0 "register_operand" "=v,v,v,x,x")
3828 (fma:FMAMODE
3829 (neg:FMAMODE
3830 (match_operand:FMAMODE 1 "nonimmediate_operand" "%0,0,v,x,x"))
3831 (match_operand:FMAMODE 2 "nonimmediate_operand" "vm,v,vm,x,m")
3832 (neg:FMAMODE
3833 (match_operand:FMAMODE 3 "nonimmediate_operand" "v,vm,0,xm,x"))))]
3834 "TARGET_FMA || TARGET_FMA4"
3835 "@
3836 vfnmsub132<ssemodesuffix>\t{<round_sd_mask_op4>%2, %3, %0<sd_mask_op4>|%0<sd_mask_op4>, %3, %2<round_sd_mask_op4>}
3837 vfnmsub213<ssemodesuffix>\t{<round_sd_mask_op4>%3, %2, %0<sd_mask_op4>|%0<sd_mask_op4>, %2, %3<round_sd_mask_op4>}
3838 vfnmsub231<ssemodesuffix>\t{<round_sd_mask_op4>%2, %1, %0<sd_mask_op4>|%0<sd_mask_op4>, %1, %2<round_sd_mask_op4>}
3839 vfnmsub<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}
3840 vfnmsub<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
3841 [(set_attr "isa" "fma,fma,fma,fma4,fma4")
3842 (set_attr "type" "ssemuladd")
3843 (set_attr "mode" "<MODE>")])
3844
3845 (define_insn "<sd_mask_codefor>fma_fnmsub_<mode><sd_maskz_name><round_name>"
3846 [(set (match_operand:VF_SF_AVX512VL 0 "register_operand" "=v,v,v")
3847 (fma:VF_SF_AVX512VL
3848 (neg:VF_SF_AVX512VL
3849 (match_operand:VF_SF_AVX512VL 1 "<round_nimm_predicate>" "%0,0,v"))
3850 (match_operand:VF_SF_AVX512VL 2 "<round_nimm_predicate>" "<round_constraint>,v,<round_constraint>")
3851 (neg:VF_SF_AVX512VL
3852 (match_operand:VF_SF_AVX512VL 3 "<round_nimm_predicate>" "v,<round_constraint>,0"))))]
3853 "TARGET_AVX512F && <sd_mask_mode512bit_condition> && <round_mode512bit_condition>"
3854 "@
3855 vfnmsub132<ssemodesuffix>\t{<round_sd_mask_op4>%2, %3, %0<sd_mask_op4>|%0<sd_mask_op4>, %3, %2<round_sd_mask_op4>}
3856 vfnmsub213<ssemodesuffix>\t{<round_sd_mask_op4>%3, %2, %0<sd_mask_op4>|%0<sd_mask_op4>, %2, %3<round_sd_mask_op4>}
3857 vfnmsub231<ssemodesuffix>\t{<round_sd_mask_op4>%2, %1, %0<sd_mask_op4>|%0<sd_mask_op4>, %1, %2<round_sd_mask_op4>}"
3858 [(set_attr "type" "ssemuladd")
3859 (set_attr "mode" "<MODE>")])
3860
3861 (define_insn "<avx512>_fnmsub_<mode>_mask<round_name>"
3862 [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v,v")
3863 (vec_merge:VF_AVX512VL
3864 (fma:VF_AVX512VL
3865 (neg:VF_AVX512VL
3866 (match_operand:VF_AVX512VL 1 "register_operand" "0,0"))
3867 (match_operand:VF_AVX512VL 2 "nonimmediate_operand" "<round_constraint>,v")
3868 (neg:VF_AVX512VL
3869 (match_operand:VF_AVX512VL 3 "nonimmediate_operand" "v,<round_constraint>")))
3870 (match_dup 1)
3871 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk,Yk")))]
3872 "TARGET_AVX512F && <round_mode512bit_condition>"
3873 "@
3874 vfnmsub132<ssemodesuffix>\t{<round_op5>%2, %3, %0%{%4%}|%0%{%4%}, %3, %2<round_op5>}
3875 vfnmsub213<ssemodesuffix>\t{<round_op5>%3, %2, %0%{%4%}|%0%{%4%}, %2, %3<round_op5>}"
3876 [(set_attr "isa" "fma_avx512f,fma_avx512f")
3877 (set_attr "type" "ssemuladd")
3878 (set_attr "mode" "<MODE>")])
3879
3880 (define_insn "<avx512>_fnmsub_<mode>_mask3<round_name>"
3881 [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v")
3882 (vec_merge:VF_AVX512VL
3883 (fma:VF_AVX512VL
3884 (neg:VF_AVX512VL
3885 (match_operand:VF_AVX512VL 1 "register_operand" "v"))
3886 (match_operand:VF_AVX512VL 2 "nonimmediate_operand" "<round_constraint>")
3887 (neg:VF_AVX512VL
3888 (match_operand:VF_AVX512VL 3 "register_operand" "0")))
3889 (match_dup 3)
3890 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")))]
3891 "TARGET_AVX512F"
3892 "vfnmsub231<ssemodesuffix>\t{<round_op5>%2, %1, %0%{%4%}|%0%{%4%}, %1, %2<round_op5>}"
3893 [(set_attr "isa" "fma_avx512f")
3894 (set_attr "type" "ssemuladd")
3895 (set_attr "mode" "<MODE>")])
3896
3897 ;; FMA parallel floating point multiply addsub and subadd operations.
3898
3899 ;; It would be possible to represent these without the UNSPEC as
3900 ;;
3901 ;; (vec_merge
3902 ;; (fma op1 op2 op3)
3903 ;; (fma op1 op2 (neg op3))
3904 ;; (merge-const))
3905 ;;
3906 ;; But this doesn't seem useful in practice.
3907
3908 (define_expand "fmaddsub_<mode>"
3909 [(set (match_operand:VF 0 "register_operand")
3910 (unspec:VF
3911 [(match_operand:VF 1 "nonimmediate_operand")
3912 (match_operand:VF 2 "nonimmediate_operand")
3913 (match_operand:VF 3 "nonimmediate_operand")]
3914 UNSPEC_FMADDSUB))]
3915 "TARGET_FMA || TARGET_FMA4 || TARGET_AVX512F")
3916
3917 (define_expand "<avx512>_fmaddsub_<mode>_maskz<round_expand_name>"
3918 [(match_operand:VF_AVX512VL 0 "register_operand")
3919 (match_operand:VF_AVX512VL 1 "<round_expand_nimm_predicate>")
3920 (match_operand:VF_AVX512VL 2 "<round_expand_nimm_predicate>")
3921 (match_operand:VF_AVX512VL 3 "<round_expand_nimm_predicate>")
3922 (match_operand:<avx512fmaskmode> 4 "register_operand")]
3923 "TARGET_AVX512F"
3924 {
3925 emit_insn (gen_fma_fmaddsub_<mode>_maskz_1<round_expand_name> (
3926 operands[0], operands[1], operands[2], operands[3],
3927 CONST0_RTX (<MODE>mode), operands[4]<round_expand_operand>));
3928 DONE;
3929 })
3930
3931 (define_insn "*fma_fmaddsub_<mode>"
3932 [(set (match_operand:VF_128_256 0 "register_operand" "=v,v,v,x,x")
3933 (unspec:VF_128_256
3934 [(match_operand:VF_128_256 1 "nonimmediate_operand" "%0,0,v,x,x")
3935 (match_operand:VF_128_256 2 "nonimmediate_operand" "vm,v,vm,x,m")
3936 (match_operand:VF_128_256 3 "nonimmediate_operand" "v,vm,0,xm,x")]
3937 UNSPEC_FMADDSUB))]
3938 "TARGET_FMA || TARGET_FMA4"
3939 "@
3940 vfmaddsub132<ssemodesuffix>\t{%2, %3, %0|%0, %3, %2}
3941 vfmaddsub213<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
3942 vfmaddsub231<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}
3943 vfmaddsub<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}
3944 vfmaddsub<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
3945 [(set_attr "isa" "fma,fma,fma,fma4,fma4")
3946 (set_attr "type" "ssemuladd")
3947 (set_attr "mode" "<MODE>")])
3948
3949 (define_insn "<sd_mask_codefor>fma_fmaddsub_<mode><sd_maskz_name><round_name>"
3950 [(set (match_operand:VF_SF_AVX512VL 0 "register_operand" "=v,v,v")
3951 (unspec:VF_SF_AVX512VL
3952 [(match_operand:VF_SF_AVX512VL 1 "<round_nimm_predicate>" "%0,0,v")
3953 (match_operand:VF_SF_AVX512VL 2 "<round_nimm_predicate>" "<round_constraint>,v,<round_constraint>")
3954 (match_operand:VF_SF_AVX512VL 3 "<round_nimm_predicate>" "v,<round_constraint>,0")]
3955 UNSPEC_FMADDSUB))]
3956 "TARGET_AVX512F && <sd_mask_mode512bit_condition> && <round_mode512bit_condition>"
3957 "@
3958 vfmaddsub132<ssemodesuffix>\t{<round_sd_mask_op4>%2, %3, %0<sd_mask_op4>|%0<sd_mask_op4>, %3, %2<round_sd_mask_op4>}
3959 vfmaddsub213<ssemodesuffix>\t{<round_sd_mask_op4>%3, %2, %0<sd_mask_op4>|%0<sd_mask_op4>, %2, %3<round_sd_mask_op4>}
3960 vfmaddsub231<ssemodesuffix>\t{<round_sd_mask_op4>%2, %1, %0<sd_mask_op4>|%0<sd_mask_op4>, %1, %2<round_sd_mask_op4>}"
3961 [(set_attr "type" "ssemuladd")
3962 (set_attr "mode" "<MODE>")])
3963
3964 (define_insn "<avx512>_fmaddsub_<mode>_mask<round_name>"
3965 [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v,v")
3966 (vec_merge:VF_AVX512VL
3967 (unspec:VF_AVX512VL
3968 [(match_operand:VF_AVX512VL 1 "register_operand" "0,0")
3969 (match_operand:VF_AVX512VL 2 "nonimmediate_operand" "<round_constraint>,v")
3970 (match_operand:VF_AVX512VL 3 "nonimmediate_operand" "v,<round_constraint>")]
3971 UNSPEC_FMADDSUB)
3972 (match_dup 1)
3973 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk,Yk")))]
3974 "TARGET_AVX512F"
3975 "@
3976 vfmaddsub132<ssemodesuffix>\t{<round_op5>%2, %3, %0%{%4%}|%0%{%4%}, %3, %2<round_op5>}
3977 vfmaddsub213<ssemodesuffix>\t{<round_op5>%3, %2, %0%{%4%}|%0%{%4%}, %2, %3<round_op5>}"
3978 [(set_attr "isa" "fma_avx512f,fma_avx512f")
3979 (set_attr "type" "ssemuladd")
3980 (set_attr "mode" "<MODE>")])
3981
3982 (define_insn "<avx512>_fmaddsub_<mode>_mask3<round_name>"
3983 [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v")
3984 (vec_merge:VF_AVX512VL
3985 (unspec:VF_AVX512VL
3986 [(match_operand:VF_AVX512VL 1 "register_operand" "v")
3987 (match_operand:VF_AVX512VL 2 "nonimmediate_operand" "<round_constraint>")
3988 (match_operand:VF_AVX512VL 3 "register_operand" "0")]
3989 UNSPEC_FMADDSUB)
3990 (match_dup 3)
3991 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")))]
3992 "TARGET_AVX512F"
3993 "vfmaddsub231<ssemodesuffix>\t{<round_op5>%2, %1, %0%{%4%}|%0%{%4%}, %1, %2<round_op5>}"
3994 [(set_attr "isa" "fma_avx512f")
3995 (set_attr "type" "ssemuladd")
3996 (set_attr "mode" "<MODE>")])
3997
3998 (define_insn "*fma_fmsubadd_<mode>"
3999 [(set (match_operand:VF_128_256 0 "register_operand" "=v,v,v,x,x")
4000 (unspec:VF_128_256
4001 [(match_operand:VF_128_256 1 "nonimmediate_operand" "%0,0,v,x,x")
4002 (match_operand:VF_128_256 2 "nonimmediate_operand" "vm,v,vm,x,m")
4003 (neg:VF_128_256
4004 (match_operand:VF_128_256 3 "nonimmediate_operand" "v,vm,0,xm,x"))]
4005 UNSPEC_FMADDSUB))]
4006 "TARGET_FMA || TARGET_FMA4"
4007 "@
4008 vfmsubadd132<ssemodesuffix>\t{%2, %3, %0|%0, %3, %2}
4009 vfmsubadd213<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
4010 vfmsubadd231<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}
4011 vfmsubadd<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}
4012 vfmsubadd<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
4013 [(set_attr "isa" "fma,fma,fma,fma4,fma4")
4014 (set_attr "type" "ssemuladd")
4015 (set_attr "mode" "<MODE>")])
4016
4017 (define_insn "<sd_mask_codefor>fma_fmsubadd_<mode><sd_maskz_name><round_name>"
4018 [(set (match_operand:VF_SF_AVX512VL 0 "register_operand" "=v,v,v")
4019 (unspec:VF_SF_AVX512VL
4020 [(match_operand:VF_SF_AVX512VL 1 "<round_nimm_predicate>" "%0,0,v")
4021 (match_operand:VF_SF_AVX512VL 2 "<round_nimm_predicate>" "<round_constraint>,v,<round_constraint>")
4022 (neg:VF_SF_AVX512VL
4023 (match_operand:VF_SF_AVX512VL 3 "<round_nimm_predicate>" "v,<round_constraint>,0"))]
4024 UNSPEC_FMADDSUB))]
4025 "TARGET_AVX512F && <sd_mask_mode512bit_condition> && <round_mode512bit_condition>"
4026 "@
4027 vfmsubadd132<ssemodesuffix>\t{<round_sd_mask_op4>%2, %3, %0<sd_mask_op4>|%0<sd_mask_op4>, %3, %2<round_sd_mask_op4>}
4028 vfmsubadd213<ssemodesuffix>\t{<round_sd_mask_op4>%3, %2, %0<sd_mask_op4>|%0<sd_mask_op4>, %2, %3<round_sd_mask_op4>}
4029 vfmsubadd231<ssemodesuffix>\t{<round_sd_mask_op4>%2, %1, %0<sd_mask_op4>|%0<sd_mask_op4>, %1, %2<round_sd_mask_op4>}"
4030 [(set_attr "type" "ssemuladd")
4031 (set_attr "mode" "<MODE>")])
4032
4033 (define_insn "<avx512>_fmsubadd_<mode>_mask<round_name>"
4034 [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v,v")
4035 (vec_merge:VF_AVX512VL
4036 (unspec:VF_AVX512VL
4037 [(match_operand:VF_AVX512VL 1 "register_operand" "0,0")
4038 (match_operand:VF_AVX512VL 2 "nonimmediate_operand" "<round_constraint>,v")
4039 (neg:VF_AVX512VL
4040 (match_operand:VF_AVX512VL 3 "nonimmediate_operand" "v,<round_constraint>"))]
4041 UNSPEC_FMADDSUB)
4042 (match_dup 1)
4043 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk,Yk")))]
4044 "TARGET_AVX512F"
4045 "@
4046 vfmsubadd132<ssemodesuffix>\t{<round_op5>%2, %3, %0%{%4%}|%0%{%4%}, %3, %2<round_op5>}
4047 vfmsubadd213<ssemodesuffix>\t{<round_op5>%3, %2, %0%{%4%}|%0%{%4%}, %2, %3<round_op5>}"
4048 [(set_attr "isa" "fma_avx512f,fma_avx512f")
4049 (set_attr "type" "ssemuladd")
4050 (set_attr "mode" "<MODE>")])
4051
4052 (define_insn "<avx512>_fmsubadd_<mode>_mask3<round_name>"
4053 [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v")
4054 (vec_merge:VF_AVX512VL
4055 (unspec:VF_AVX512VL
4056 [(match_operand:VF_AVX512VL 1 "register_operand" "v")
4057 (match_operand:VF_AVX512VL 2 "nonimmediate_operand" "<round_constraint>")
4058 (neg:VF_AVX512VL
4059 (match_operand:VF_AVX512VL 3 "register_operand" "0"))]
4060 UNSPEC_FMADDSUB)
4061 (match_dup 3)
4062 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")))]
4063 "TARGET_AVX512F"
4064 "vfmsubadd231<ssemodesuffix>\t{<round_op5>%2, %1, %0%{%4%}|%0%{%4%}, %1, %2<round_op5>}"
4065 [(set_attr "isa" "fma_avx512f")
4066 (set_attr "type" "ssemuladd")
4067 (set_attr "mode" "<MODE>")])
4068
4069 ;; FMA3 floating point scalar intrinsics. These merge result with
4070 ;; high-order elements from the destination register.
4071
4072 (define_expand "fmai_vmfmadd_<mode><round_name>"
4073 [(set (match_operand:VF_128 0 "register_operand")
4074 (vec_merge:VF_128
4075 (fma:VF_128
4076 (match_operand:VF_128 1 "<round_nimm_predicate>")
4077 (match_operand:VF_128 2 "<round_nimm_predicate>")
4078 (match_operand:VF_128 3 "<round_nimm_predicate>"))
4079 (match_dup 1)
4080 (const_int 1)))]
4081 "TARGET_FMA")
4082
4083 (define_insn "*fmai_fmadd_<mode>"
4084 [(set (match_operand:VF_128 0 "register_operand" "=v,v")
4085 (vec_merge:VF_128
4086 (fma:VF_128
4087 (match_operand:VF_128 1 "<round_nimm_predicate>" " 0, 0")
4088 (match_operand:VF_128 2 "<round_nimm_predicate>" "<round_constraint>, v")
4089 (match_operand:VF_128 3 "<round_nimm_predicate>" " v,<round_constraint>"))
4090 (match_dup 1)
4091 (const_int 1)))]
4092 "TARGET_FMA || TARGET_AVX512F"
4093 "@
4094 vfmadd132<ssescalarmodesuffix>\t{<round_op4>%2, %3, %0|%0, %<iptr>3, %<iptr>2<round_op4>}
4095 vfmadd213<ssescalarmodesuffix>\t{<round_op4>%3, %2, %0|%0, %<iptr>2, %<iptr>3<round_op4>}"
4096 [(set_attr "type" "ssemuladd")
4097 (set_attr "mode" "<MODE>")])
4098
4099 (define_insn "*fmai_fmsub_<mode>"
4100 [(set (match_operand:VF_128 0 "register_operand" "=v,v")
4101 (vec_merge:VF_128
4102 (fma:VF_128
4103 (match_operand:VF_128 1 "<round_nimm_predicate>" "0,0")
4104 (match_operand:VF_128 2 "<round_nimm_predicate>" "<round_constraint>,v")
4105 (neg:VF_128
4106 (match_operand:VF_128 3 "<round_nimm_predicate>" " v,<round_constraint>")))
4107 (match_dup 1)
4108 (const_int 1)))]
4109 "TARGET_FMA || TARGET_AVX512F"
4110 "@
4111 vfmsub132<ssescalarmodesuffix>\t{<round_op4>%2, %3, %0|%0, %<iptr>3, %<iptr>2<round_op4>}
4112 vfmsub213<ssescalarmodesuffix>\t{<round_op4>%3, %2, %0|%0, %<iptr>2, %<iptr>3<round_op4>}"
4113 [(set_attr "type" "ssemuladd")
4114 (set_attr "mode" "<MODE>")])
4115
4116 (define_insn "*fmai_fnmadd_<mode><round_name>"
4117 [(set (match_operand:VF_128 0 "register_operand" "=v,v")
4118 (vec_merge:VF_128
4119 (fma:VF_128
4120 (neg:VF_128
4121 (match_operand:VF_128 2 "<round_nimm_predicate>" "<round_constraint>,v"))
4122 (match_operand:VF_128 1 "<round_nimm_predicate>" "0,0")
4123 (match_operand:VF_128 3 "<round_nimm_predicate>" "v,<round_constraint>"))
4124 (match_dup 1)
4125 (const_int 1)))]
4126 "TARGET_FMA || TARGET_AVX512F"
4127 "@
4128 vfnmadd132<ssescalarmodesuffix>\t{<round_op4>%2, %3, %0|%0, %<iptr>3, %<iptr>2<round_op4>}
4129 vfnmadd213<ssescalarmodesuffix>\t{<round_op4>%3, %2, %0|%0, %<iptr>2, %<iptr>3<round_op4>}"
4130 [(set_attr "type" "ssemuladd")
4131 (set_attr "mode" "<MODE>")])
4132
4133 (define_insn "*fmai_fnmsub_<mode><round_name>"
4134 [(set (match_operand:VF_128 0 "register_operand" "=v,v")
4135 (vec_merge:VF_128
4136 (fma:VF_128
4137 (neg:VF_128
4138 (match_operand:VF_128 2 "<round_nimm_predicate>" "<round_constraint>, v"))
4139 (match_operand:VF_128 1 "<round_nimm_predicate>" " 0, 0")
4140 (neg:VF_128
4141 (match_operand:VF_128 3 "<round_nimm_predicate>" " v,<round_constraint>")))
4142 (match_dup 1)
4143 (const_int 1)))]
4144 "TARGET_FMA || TARGET_AVX512F"
4145 "@
4146 vfnmsub132<ssescalarmodesuffix>\t{<round_op4>%2, %3, %0|%0, %<iptr>3, %<iptr>2<round_op4>}
4147 vfnmsub213<ssescalarmodesuffix>\t{<round_op4>%3, %2, %0|%0, %<iptr>2, %<iptr>3<round_op4>}"
4148 [(set_attr "type" "ssemuladd")
4149 (set_attr "mode" "<MODE>")])
4150
4151 ;; FMA4 floating point scalar intrinsics. These write the
4152 ;; entire destination register, with the high-order elements zeroed.
4153
4154 (define_expand "fma4i_vmfmadd_<mode>"
4155 [(set (match_operand:VF_128 0 "register_operand")
4156 (vec_merge:VF_128
4157 (fma:VF_128
4158 (match_operand:VF_128 1 "nonimmediate_operand")
4159 (match_operand:VF_128 2 "nonimmediate_operand")
4160 (match_operand:VF_128 3 "nonimmediate_operand"))
4161 (match_dup 4)
4162 (const_int 1)))]
4163 "TARGET_FMA4"
4164 "operands[4] = CONST0_RTX (<MODE>mode);")
4165
4166 (define_insn "*fma4i_vmfmadd_<mode>"
4167 [(set (match_operand:VF_128 0 "register_operand" "=x,x")
4168 (vec_merge:VF_128
4169 (fma:VF_128
4170 (match_operand:VF_128 1 "nonimmediate_operand" "%x,x")
4171 (match_operand:VF_128 2 "nonimmediate_operand" " x,m")
4172 (match_operand:VF_128 3 "nonimmediate_operand" "xm,x"))
4173 (match_operand:VF_128 4 "const0_operand")
4174 (const_int 1)))]
4175 "TARGET_FMA4"
4176 "vfmadd<ssescalarmodesuffix>\t{%3, %2, %1, %0|%0, %1, %<iptr>2, %<iptr>3}"
4177 [(set_attr "type" "ssemuladd")
4178 (set_attr "mode" "<MODE>")])
4179
4180 (define_insn "*fma4i_vmfmsub_<mode>"
4181 [(set (match_operand:VF_128 0 "register_operand" "=x,x")
4182 (vec_merge:VF_128
4183 (fma:VF_128
4184 (match_operand:VF_128 1 "nonimmediate_operand" "%x,x")
4185 (match_operand:VF_128 2 "nonimmediate_operand" " x,m")
4186 (neg:VF_128
4187 (match_operand:VF_128 3 "nonimmediate_operand" "xm,x")))
4188 (match_operand:VF_128 4 "const0_operand")
4189 (const_int 1)))]
4190 "TARGET_FMA4"
4191 "vfmsub<ssescalarmodesuffix>\t{%3, %2, %1, %0|%0, %1, %<iptr>2, %<iptr>3}"
4192 [(set_attr "type" "ssemuladd")
4193 (set_attr "mode" "<MODE>")])
4194
4195 (define_insn "*fma4i_vmfnmadd_<mode>"
4196 [(set (match_operand:VF_128 0 "register_operand" "=x,x")
4197 (vec_merge:VF_128
4198 (fma:VF_128
4199 (neg:VF_128
4200 (match_operand:VF_128 1 "nonimmediate_operand" "%x,x"))
4201 (match_operand:VF_128 2 "nonimmediate_operand" " x,m")
4202 (match_operand:VF_128 3 "nonimmediate_operand" "xm,x"))
4203 (match_operand:VF_128 4 "const0_operand")
4204 (const_int 1)))]
4205 "TARGET_FMA4"
4206 "vfnmadd<ssescalarmodesuffix>\t{%3, %2, %1, %0|%0, %1, %<iptr>2, %<iptr>3}"
4207 [(set_attr "type" "ssemuladd")
4208 (set_attr "mode" "<MODE>")])
4209
4210 (define_insn "*fma4i_vmfnmsub_<mode>"
4211 [(set (match_operand:VF_128 0 "register_operand" "=x,x")
4212 (vec_merge:VF_128
4213 (fma:VF_128
4214 (neg:VF_128
4215 (match_operand:VF_128 1 "nonimmediate_operand" "%x,x"))
4216 (match_operand:VF_128 2 "nonimmediate_operand" " x,m")
4217 (neg:VF_128
4218 (match_operand:VF_128 3 "nonimmediate_operand" "xm,x")))
4219 (match_operand:VF_128 4 "const0_operand")
4220 (const_int 1)))]
4221 "TARGET_FMA4"
4222 "vfnmsub<ssescalarmodesuffix>\t{%3, %2, %1, %0|%0, %1, %<iptr>2, %<iptr>3}"
4223 [(set_attr "type" "ssemuladd")
4224 (set_attr "mode" "<MODE>")])
4225
4226 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
4227 ;;
4228 ;; Parallel single-precision floating point conversion operations
4229 ;;
4230 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
4231
4232 (define_insn "sse_cvtpi2ps"
4233 [(set (match_operand:V4SF 0 "register_operand" "=x")
4234 (vec_merge:V4SF
4235 (vec_duplicate:V4SF
4236 (float:V2SF (match_operand:V2SI 2 "nonimmediate_operand" "ym")))
4237 (match_operand:V4SF 1 "register_operand" "0")
4238 (const_int 3)))]
4239 "TARGET_SSE"
4240 "cvtpi2ps\t{%2, %0|%0, %2}"
4241 [(set_attr "type" "ssecvt")
4242 (set_attr "mode" "V4SF")])
4243
4244 (define_insn "sse_cvtps2pi"
4245 [(set (match_operand:V2SI 0 "register_operand" "=y")
4246 (vec_select:V2SI
4247 (unspec:V4SI [(match_operand:V4SF 1 "nonimmediate_operand" "xm")]
4248 UNSPEC_FIX_NOTRUNC)
4249 (parallel [(const_int 0) (const_int 1)])))]
4250 "TARGET_SSE"
4251 "cvtps2pi\t{%1, %0|%0, %q1}"
4252 [(set_attr "type" "ssecvt")
4253 (set_attr "unit" "mmx")
4254 (set_attr "mode" "DI")])
4255
4256 (define_insn "sse_cvttps2pi"
4257 [(set (match_operand:V2SI 0 "register_operand" "=y")
4258 (vec_select:V2SI
4259 (fix:V4SI (match_operand:V4SF 1 "nonimmediate_operand" "xm"))
4260 (parallel [(const_int 0) (const_int 1)])))]
4261 "TARGET_SSE"
4262 "cvttps2pi\t{%1, %0|%0, %q1}"
4263 [(set_attr "type" "ssecvt")
4264 (set_attr "unit" "mmx")
4265 (set_attr "prefix_rep" "0")
4266 (set_attr "mode" "SF")])
4267
4268 (define_insn "sse_cvtsi2ss<round_name>"
4269 [(set (match_operand:V4SF 0 "register_operand" "=x,x,v")
4270 (vec_merge:V4SF
4271 (vec_duplicate:V4SF
4272 (float:SF (match_operand:SI 2 "<round_nimm_predicate>" "r,m,<round_constraint3>")))
4273 (match_operand:V4SF 1 "register_operand" "0,0,v")
4274 (const_int 1)))]
4275 "TARGET_SSE"
4276 "@
4277 cvtsi2ss\t{%2, %0|%0, %2}
4278 cvtsi2ss\t{%2, %0|%0, %2}
4279 vcvtsi2ss\t{%2, <round_op3>%1, %0|%0, %1<round_op3>, %2}"
4280 [(set_attr "isa" "noavx,noavx,avx")
4281 (set_attr "type" "sseicvt")
4282 (set_attr "athlon_decode" "vector,double,*")
4283 (set_attr "amdfam10_decode" "vector,double,*")
4284 (set_attr "bdver1_decode" "double,direct,*")
4285 (set_attr "btver2_decode" "double,double,double")
4286 (set_attr "znver1_decode" "double,double,double")
4287 (set_attr "prefix" "orig,orig,maybe_evex")
4288 (set_attr "mode" "SF")])
4289
4290 (define_insn "sse_cvtsi2ssq<round_name>"
4291 [(set (match_operand:V4SF 0 "register_operand" "=x,x,v")
4292 (vec_merge:V4SF
4293 (vec_duplicate:V4SF
4294 (float:SF (match_operand:DI 2 "<round_nimm_predicate>" "r,m,<round_constraint3>")))
4295 (match_operand:V4SF 1 "register_operand" "0,0,v")
4296 (const_int 1)))]
4297 "TARGET_SSE && TARGET_64BIT"
4298 "@
4299 cvtsi2ssq\t{%2, %0|%0, %2}
4300 cvtsi2ssq\t{%2, %0|%0, %2}
4301 vcvtsi2ssq\t{%2, <round_op3>%1, %0|%0, %1<round_op3>, %2}"
4302 [(set_attr "isa" "noavx,noavx,avx")
4303 (set_attr "type" "sseicvt")
4304 (set_attr "athlon_decode" "vector,double,*")
4305 (set_attr "amdfam10_decode" "vector,double,*")
4306 (set_attr "bdver1_decode" "double,direct,*")
4307 (set_attr "btver2_decode" "double,double,double")
4308 (set_attr "length_vex" "*,*,4")
4309 (set_attr "prefix_rex" "1,1,*")
4310 (set_attr "prefix" "orig,orig,maybe_evex")
4311 (set_attr "mode" "SF")])
4312
4313 (define_insn "sse_cvtss2si<round_name>"
4314 [(set (match_operand:SI 0 "register_operand" "=r,r")
4315 (unspec:SI
4316 [(vec_select:SF
4317 (match_operand:V4SF 1 "<round_nimm_predicate>" "v,<round_constraint2>")
4318 (parallel [(const_int 0)]))]
4319 UNSPEC_FIX_NOTRUNC))]
4320 "TARGET_SSE"
4321 "%vcvtss2si\t{<round_op2>%1, %0|%0, %k1<round_op2>}"
4322 [(set_attr "type" "sseicvt")
4323 (set_attr "athlon_decode" "double,vector")
4324 (set_attr "bdver1_decode" "double,double")
4325 (set_attr "prefix_rep" "1")
4326 (set_attr "prefix" "maybe_vex")
4327 (set_attr "mode" "SI")])
4328
4329 (define_insn "sse_cvtss2si_2"
4330 [(set (match_operand:SI 0 "register_operand" "=r,r")
4331 (unspec:SI [(match_operand:SF 1 "nonimmediate_operand" "v,m")]
4332 UNSPEC_FIX_NOTRUNC))]
4333 "TARGET_SSE"
4334 "%vcvtss2si\t{%1, %0|%0, %k1}"
4335 [(set_attr "type" "sseicvt")
4336 (set_attr "athlon_decode" "double,vector")
4337 (set_attr "amdfam10_decode" "double,double")
4338 (set_attr "bdver1_decode" "double,double")
4339 (set_attr "prefix_rep" "1")
4340 (set_attr "prefix" "maybe_vex")
4341 (set_attr "mode" "SI")])
4342
4343 (define_insn "sse_cvtss2siq<round_name>"
4344 [(set (match_operand:DI 0 "register_operand" "=r,r")
4345 (unspec:DI
4346 [(vec_select:SF
4347 (match_operand:V4SF 1 "<round_nimm_predicate>" "v,<round_constraint2>")
4348 (parallel [(const_int 0)]))]
4349 UNSPEC_FIX_NOTRUNC))]
4350 "TARGET_SSE && TARGET_64BIT"
4351 "%vcvtss2si{q}\t{<round_op2>%1, %0|%0, %k1<round_op2>}"
4352 [(set_attr "type" "sseicvt")
4353 (set_attr "athlon_decode" "double,vector")
4354 (set_attr "bdver1_decode" "double,double")
4355 (set_attr "prefix_rep" "1")
4356 (set_attr "prefix" "maybe_vex")
4357 (set_attr "mode" "DI")])
4358
4359 (define_insn "sse_cvtss2siq_2"
4360 [(set (match_operand:DI 0 "register_operand" "=r,r")
4361 (unspec:DI [(match_operand:SF 1 "nonimmediate_operand" "v,m")]
4362 UNSPEC_FIX_NOTRUNC))]
4363 "TARGET_SSE && TARGET_64BIT"
4364 "%vcvtss2si{q}\t{%1, %0|%0, %k1}"
4365 [(set_attr "type" "sseicvt")
4366 (set_attr "athlon_decode" "double,vector")
4367 (set_attr "amdfam10_decode" "double,double")
4368 (set_attr "bdver1_decode" "double,double")
4369 (set_attr "prefix_rep" "1")
4370 (set_attr "prefix" "maybe_vex")
4371 (set_attr "mode" "DI")])
4372
4373 (define_insn "sse_cvttss2si<round_saeonly_name>"
4374 [(set (match_operand:SI 0 "register_operand" "=r,r")
4375 (fix:SI
4376 (vec_select:SF
4377 (match_operand:V4SF 1 "<round_saeonly_nimm_predicate>" "v,<round_saeonly_constraint2>")
4378 (parallel [(const_int 0)]))))]
4379 "TARGET_SSE"
4380 "%vcvttss2si\t{<round_saeonly_op2>%1, %0|%0, %k1<round_saeonly_op2>}"
4381 [(set_attr "type" "sseicvt")
4382 (set_attr "athlon_decode" "double,vector")
4383 (set_attr "amdfam10_decode" "double,double")
4384 (set_attr "bdver1_decode" "double,double")
4385 (set_attr "prefix_rep" "1")
4386 (set_attr "prefix" "maybe_vex")
4387 (set_attr "mode" "SI")])
4388
4389 (define_insn "sse_cvttss2siq<round_saeonly_name>"
4390 [(set (match_operand:DI 0 "register_operand" "=r,r")
4391 (fix:DI
4392 (vec_select:SF
4393 (match_operand:V4SF 1 "<round_saeonly_nimm_predicate>" "v,<round_saeonly_constraint>")
4394 (parallel [(const_int 0)]))))]
4395 "TARGET_SSE && TARGET_64BIT"
4396 "%vcvttss2si{q}\t{<round_saeonly_op2>%1, %0|%0, %k1<round_saeonly_op2>}"
4397 [(set_attr "type" "sseicvt")
4398 (set_attr "athlon_decode" "double,vector")
4399 (set_attr "amdfam10_decode" "double,double")
4400 (set_attr "bdver1_decode" "double,double")
4401 (set_attr "prefix_rep" "1")
4402 (set_attr "prefix" "maybe_vex")
4403 (set_attr "mode" "DI")])
4404
4405 (define_insn "cvtusi2<ssescalarmodesuffix>32<round_name>"
4406 [(set (match_operand:VF_128 0 "register_operand" "=v")
4407 (vec_merge:VF_128
4408 (vec_duplicate:VF_128
4409 (unsigned_float:<ssescalarmode>
4410 (match_operand:SI 2 "<round_nimm_predicate>" "<round_constraint3>")))
4411 (match_operand:VF_128 1 "register_operand" "v")
4412 (const_int 1)))]
4413 "TARGET_AVX512F && <round_modev4sf_condition>"
4414 "vcvtusi2<ssescalarmodesuffix>\t{%2, <round_op3>%1, %0|%0, %1<round_op3>, %2}"
4415 [(set_attr "type" "sseicvt")
4416 (set_attr "prefix" "evex")
4417 (set_attr "mode" "<ssescalarmode>")])
4418
4419 (define_insn "cvtusi2<ssescalarmodesuffix>64<round_name>"
4420 [(set (match_operand:VF_128 0 "register_operand" "=v")
4421 (vec_merge:VF_128
4422 (vec_duplicate:VF_128
4423 (unsigned_float:<ssescalarmode>
4424 (match_operand:DI 2 "<round_nimm_predicate>" "<round_constraint3>")))
4425 (match_operand:VF_128 1 "register_operand" "v")
4426 (const_int 1)))]
4427 "TARGET_AVX512F && TARGET_64BIT"
4428 "vcvtusi2<ssescalarmodesuffix>\t{%2, <round_op3>%1, %0|%0, %1<round_op3>, %2}"
4429 [(set_attr "type" "sseicvt")
4430 (set_attr "prefix" "evex")
4431 (set_attr "mode" "<ssescalarmode>")])
4432
4433 (define_insn "float<sseintvecmodelower><mode>2<mask_name><round_name>"
4434 [(set (match_operand:VF1 0 "register_operand" "=v")
4435 (float:VF1
4436 (match_operand:<sseintvecmode> 1 "<round_nimm_predicate>" "<round_constraint>")))]
4437 "TARGET_SSE2 && <mask_mode512bit_condition> && <round_mode512bit_condition>"
4438 "%vcvtdq2ps\t{<round_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_mask_op2>}"
4439 [(set_attr "type" "ssecvt")
4440 (set_attr "prefix" "maybe_vex")
4441 (set_attr "mode" "<sseinsnmode>")])
4442
4443 (define_insn "ufloat<sseintvecmodelower><mode>2<mask_name><round_name>"
4444 [(set (match_operand:VF1_AVX512VL 0 "register_operand" "=v")
4445 (unsigned_float:VF1_AVX512VL
4446 (match_operand:<sseintvecmode> 1 "nonimmediate_operand" "<round_constraint>")))]
4447 "TARGET_AVX512F"
4448 "vcvtudq2ps\t{<round_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_mask_op2>}"
4449 [(set_attr "type" "ssecvt")
4450 (set_attr "prefix" "evex")
4451 (set_attr "mode" "<MODE>")])
4452
4453 (define_expand "floatuns<sseintvecmodelower><mode>2"
4454 [(match_operand:VF1 0 "register_operand")
4455 (match_operand:<sseintvecmode> 1 "register_operand")]
4456 "TARGET_SSE2 && (<MODE>mode == V4SFmode || TARGET_AVX2)"
4457 {
4458 if (<MODE>mode == V16SFmode)
4459 emit_insn (gen_ufloatv16siv16sf2 (operands[0], operands[1]));
4460 else
4461 if (TARGET_AVX512VL)
4462 {
4463 if (<MODE>mode == V4SFmode)
4464 emit_insn (gen_ufloatv4siv4sf2 (operands[0], operands[1]));
4465 else
4466 emit_insn (gen_ufloatv8siv8sf2 (operands[0], operands[1]));
4467 }
4468 else
4469 ix86_expand_vector_convert_uns_vsivsf (operands[0], operands[1]);
4470
4471 DONE;
4472 })
4473
4474
4475 ;; For <sse2_avx_avx512f>_fix_notrunc<sf2simodelower><mode> insn pattern
4476 (define_mode_attr sf2simodelower
4477 [(V16SI "v16sf") (V8SI "v8sf") (V4SI "v4sf")])
4478
4479 (define_insn "<sse2_avx_avx512f>_fix_notrunc<sf2simodelower><mode><mask_name>"
4480 [(set (match_operand:VI4_AVX 0 "register_operand" "=v")
4481 (unspec:VI4_AVX
4482 [(match_operand:<ssePSmode> 1 "nonimmediate_operand" "vm")]
4483 UNSPEC_FIX_NOTRUNC))]
4484 "TARGET_SSE2 && <mask_mode512bit_condition>"
4485 "%vcvtps2dq\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
4486 [(set_attr "type" "ssecvt")
4487 (set (attr "prefix_data16")
4488 (if_then_else
4489 (match_test "TARGET_AVX")
4490 (const_string "*")
4491 (const_string "1")))
4492 (set_attr "prefix" "maybe_vex")
4493 (set_attr "mode" "<sseinsnmode>")])
4494
4495 (define_insn "<mask_codefor>avx512f_fix_notruncv16sfv16si<mask_name><round_name>"
4496 [(set (match_operand:V16SI 0 "register_operand" "=v")
4497 (unspec:V16SI
4498 [(match_operand:V16SF 1 "<round_nimm_predicate>" "<round_constraint>")]
4499 UNSPEC_FIX_NOTRUNC))]
4500 "TARGET_AVX512F"
4501 "vcvtps2dq\t{<round_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_mask_op2>}"
4502 [(set_attr "type" "ssecvt")
4503 (set_attr "prefix" "evex")
4504 (set_attr "mode" "XI")])
4505
4506 (define_insn "<mask_codefor><avx512>_ufix_notrunc<sf2simodelower><mode><mask_name><round_name>"
4507 [(set (match_operand:VI4_AVX512VL 0 "register_operand" "=v")
4508 (unspec:VI4_AVX512VL
4509 [(match_operand:<ssePSmode> 1 "nonimmediate_operand" "<round_constraint>")]
4510 UNSPEC_UNSIGNED_FIX_NOTRUNC))]
4511 "TARGET_AVX512F"
4512 "vcvtps2udq\t{<round_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_mask_op2>}"
4513 [(set_attr "type" "ssecvt")
4514 (set_attr "prefix" "evex")
4515 (set_attr "mode" "<sseinsnmode>")])
4516
4517 (define_insn "<mask_codefor>avx512dq_cvtps2qq<mode><mask_name><round_name>"
4518 [(set (match_operand:VI8_256_512 0 "register_operand" "=v")
4519 (unspec:VI8_256_512 [(match_operand:<ssePSmode2> 1 "nonimmediate_operand" "<round_constraint>")]
4520 UNSPEC_FIX_NOTRUNC))]
4521 "TARGET_AVX512DQ && <round_mode512bit_condition>"
4522 "vcvtps2qq\t{<round_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_mask_op2>}"
4523 [(set_attr "type" "ssecvt")
4524 (set_attr "prefix" "evex")
4525 (set_attr "mode" "<sseinsnmode>")])
4526
4527 (define_insn "<mask_codefor>avx512dq_cvtps2qqv2di<mask_name>"
4528 [(set (match_operand:V2DI 0 "register_operand" "=v")
4529 (unspec:V2DI
4530 [(vec_select:V2SF
4531 (match_operand:V4SF 1 "nonimmediate_operand" "vm")
4532 (parallel [(const_int 0) (const_int 1)]))]
4533 UNSPEC_FIX_NOTRUNC))]
4534 "TARGET_AVX512DQ && TARGET_AVX512VL"
4535 "vcvtps2qq\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
4536 [(set_attr "type" "ssecvt")
4537 (set_attr "prefix" "evex")
4538 (set_attr "mode" "TI")])
4539
4540 (define_insn "<mask_codefor>avx512dq_cvtps2uqq<mode><mask_name><round_name>"
4541 [(set (match_operand:VI8_256_512 0 "register_operand" "=v")
4542 (unspec:VI8_256_512 [(match_operand:<ssePSmode2> 1 "nonimmediate_operand" "<round_constraint>")]
4543 UNSPEC_UNSIGNED_FIX_NOTRUNC))]
4544 "TARGET_AVX512DQ && <round_mode512bit_condition>"
4545 "vcvtps2uqq\t{<round_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_mask_op2>}"
4546 [(set_attr "type" "ssecvt")
4547 (set_attr "prefix" "evex")
4548 (set_attr "mode" "<sseinsnmode>")])
4549
4550 (define_insn "<mask_codefor>avx512dq_cvtps2uqqv2di<mask_name>"
4551 [(set (match_operand:V2DI 0 "register_operand" "=v")
4552 (unspec:V2DI
4553 [(vec_select:V2SF
4554 (match_operand:V4SF 1 "nonimmediate_operand" "vm")
4555 (parallel [(const_int 0) (const_int 1)]))]
4556 UNSPEC_UNSIGNED_FIX_NOTRUNC))]
4557 "TARGET_AVX512DQ && TARGET_AVX512VL"
4558 "vcvtps2uqq\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
4559 [(set_attr "type" "ssecvt")
4560 (set_attr "prefix" "evex")
4561 (set_attr "mode" "TI")])
4562
4563 (define_insn "<fixsuffix>fix_truncv16sfv16si2<mask_name><round_saeonly_name>"
4564 [(set (match_operand:V16SI 0 "register_operand" "=v")
4565 (any_fix:V16SI
4566 (match_operand:V16SF 1 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")))]
4567 "TARGET_AVX512F"
4568 "vcvttps2<fixsuffix>dq\t{<round_saeonly_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_saeonly_mask_op2>}"
4569 [(set_attr "type" "ssecvt")
4570 (set_attr "prefix" "evex")
4571 (set_attr "mode" "XI")])
4572
4573 (define_insn "fix_truncv8sfv8si2<mask_name>"
4574 [(set (match_operand:V8SI 0 "register_operand" "=v")
4575 (fix:V8SI (match_operand:V8SF 1 "nonimmediate_operand" "vm")))]
4576 "TARGET_AVX && <mask_avx512vl_condition>"
4577 "vcvttps2dq\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
4578 [(set_attr "type" "ssecvt")
4579 (set_attr "prefix" "<mask_prefix>")
4580 (set_attr "mode" "OI")])
4581
4582 (define_insn "fix_truncv4sfv4si2<mask_name>"
4583 [(set (match_operand:V4SI 0 "register_operand" "=v")
4584 (fix:V4SI (match_operand:V4SF 1 "nonimmediate_operand" "vm")))]
4585 "TARGET_SSE2 && <mask_avx512vl_condition>"
4586 "%vcvttps2dq\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
4587 [(set_attr "type" "ssecvt")
4588 (set (attr "prefix_rep")
4589 (if_then_else
4590 (match_test "TARGET_AVX")
4591 (const_string "*")
4592 (const_string "1")))
4593 (set (attr "prefix_data16")
4594 (if_then_else
4595 (match_test "TARGET_AVX")
4596 (const_string "*")
4597 (const_string "0")))
4598 (set_attr "prefix_data16" "0")
4599 (set_attr "prefix" "<mask_prefix2>")
4600 (set_attr "mode" "TI")])
4601
4602 (define_expand "fixuns_trunc<mode><sseintvecmodelower>2"
4603 [(match_operand:<sseintvecmode> 0 "register_operand")
4604 (match_operand:VF1 1 "register_operand")]
4605 "TARGET_SSE2"
4606 {
4607 if (<MODE>mode == V16SFmode)
4608 emit_insn (gen_ufix_truncv16sfv16si2 (operands[0],
4609 operands[1]));
4610 else
4611 {
4612 rtx tmp[3];
4613 tmp[0] = ix86_expand_adjust_ufix_to_sfix_si (operands[1], &tmp[2]);
4614 tmp[1] = gen_reg_rtx (<sseintvecmode>mode);
4615 emit_insn (gen_fix_trunc<mode><sseintvecmodelower>2 (tmp[1], tmp[0]));
4616 emit_insn (gen_xor<sseintvecmodelower>3 (operands[0], tmp[1], tmp[2]));
4617 }
4618 DONE;
4619 })
4620
4621 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
4622 ;;
4623 ;; Parallel double-precision floating point conversion operations
4624 ;;
4625 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
4626
4627 (define_insn "sse2_cvtpi2pd"
4628 [(set (match_operand:V2DF 0 "register_operand" "=x,x")
4629 (float:V2DF (match_operand:V2SI 1 "nonimmediate_operand" "y,m")))]
4630 "TARGET_SSE2"
4631 "cvtpi2pd\t{%1, %0|%0, %1}"
4632 [(set_attr "type" "ssecvt")
4633 (set_attr "unit" "mmx,*")
4634 (set_attr "prefix_data16" "1,*")
4635 (set_attr "mode" "V2DF")])
4636
4637 (define_insn "sse2_cvtpd2pi"
4638 [(set (match_operand:V2SI 0 "register_operand" "=y")
4639 (unspec:V2SI [(match_operand:V2DF 1 "nonimmediate_operand" "xm")]
4640 UNSPEC_FIX_NOTRUNC))]
4641 "TARGET_SSE2"
4642 "cvtpd2pi\t{%1, %0|%0, %1}"
4643 [(set_attr "type" "ssecvt")
4644 (set_attr "unit" "mmx")
4645 (set_attr "bdver1_decode" "double")
4646 (set_attr "btver2_decode" "direct")
4647 (set_attr "prefix_data16" "1")
4648 (set_attr "mode" "DI")])
4649
4650 (define_insn "sse2_cvttpd2pi"
4651 [(set (match_operand:V2SI 0 "register_operand" "=y")
4652 (fix:V2SI (match_operand:V2DF 1 "nonimmediate_operand" "xm")))]
4653 "TARGET_SSE2"
4654 "cvttpd2pi\t{%1, %0|%0, %1}"
4655 [(set_attr "type" "ssecvt")
4656 (set_attr "unit" "mmx")
4657 (set_attr "bdver1_decode" "double")
4658 (set_attr "prefix_data16" "1")
4659 (set_attr "mode" "TI")])
4660
4661 (define_insn "sse2_cvtsi2sd"
4662 [(set (match_operand:V2DF 0 "register_operand" "=x,x,v")
4663 (vec_merge:V2DF
4664 (vec_duplicate:V2DF
4665 (float:DF (match_operand:SI 2 "nonimmediate_operand" "r,m,rm")))
4666 (match_operand:V2DF 1 "register_operand" "0,0,v")
4667 (const_int 1)))]
4668 "TARGET_SSE2"
4669 "@
4670 cvtsi2sd\t{%2, %0|%0, %2}
4671 cvtsi2sd\t{%2, %0|%0, %2}
4672 vcvtsi2sd\t{%2, %1, %0|%0, %1, %2}"
4673 [(set_attr "isa" "noavx,noavx,avx")
4674 (set_attr "type" "sseicvt")
4675 (set_attr "athlon_decode" "double,direct,*")
4676 (set_attr "amdfam10_decode" "vector,double,*")
4677 (set_attr "bdver1_decode" "double,direct,*")
4678 (set_attr "btver2_decode" "double,double,double")
4679 (set_attr "znver1_decode" "double,double,double")
4680 (set_attr "prefix" "orig,orig,maybe_evex")
4681 (set_attr "mode" "DF")])
4682
4683 (define_insn "sse2_cvtsi2sdq<round_name>"
4684 [(set (match_operand:V2DF 0 "register_operand" "=x,x,v")
4685 (vec_merge:V2DF
4686 (vec_duplicate:V2DF
4687 (float:DF (match_operand:DI 2 "<round_nimm_predicate>" "r,m,<round_constraint3>")))
4688 (match_operand:V2DF 1 "register_operand" "0,0,v")
4689 (const_int 1)))]
4690 "TARGET_SSE2 && TARGET_64BIT"
4691 "@
4692 cvtsi2sdq\t{%2, %0|%0, %2}
4693 cvtsi2sdq\t{%2, %0|%0, %2}
4694 vcvtsi2sdq\t{%2, <round_op3>%1, %0|%0, %1<round_op3>, %2}"
4695 [(set_attr "isa" "noavx,noavx,avx")
4696 (set_attr "type" "sseicvt")
4697 (set_attr "athlon_decode" "double,direct,*")
4698 (set_attr "amdfam10_decode" "vector,double,*")
4699 (set_attr "bdver1_decode" "double,direct,*")
4700 (set_attr "length_vex" "*,*,4")
4701 (set_attr "prefix_rex" "1,1,*")
4702 (set_attr "prefix" "orig,orig,maybe_evex")
4703 (set_attr "mode" "DF")])
4704
4705 (define_insn "avx512f_vcvtss2usi<round_name>"
4706 [(set (match_operand:SI 0 "register_operand" "=r")
4707 (unspec:SI
4708 [(vec_select:SF
4709 (match_operand:V4SF 1 "<round_nimm_predicate>" "<round_constraint>")
4710 (parallel [(const_int 0)]))]
4711 UNSPEC_UNSIGNED_FIX_NOTRUNC))]
4712 "TARGET_AVX512F"
4713 "vcvtss2usi\t{<round_op2>%1, %0|%0, %1<round_op2>}"
4714 [(set_attr "type" "sseicvt")
4715 (set_attr "prefix" "evex")
4716 (set_attr "mode" "SI")])
4717
4718 (define_insn "avx512f_vcvtss2usiq<round_name>"
4719 [(set (match_operand:DI 0 "register_operand" "=r")
4720 (unspec:DI
4721 [(vec_select:SF
4722 (match_operand:V4SF 1 "<round_nimm_predicate>" "<round_constraint>")
4723 (parallel [(const_int 0)]))]
4724 UNSPEC_UNSIGNED_FIX_NOTRUNC))]
4725 "TARGET_AVX512F && TARGET_64BIT"
4726 "vcvtss2usi\t{<round_op2>%1, %0|%0, %1<round_op2>}"
4727 [(set_attr "type" "sseicvt")
4728 (set_attr "prefix" "evex")
4729 (set_attr "mode" "DI")])
4730
4731 (define_insn "avx512f_vcvttss2usi<round_saeonly_name>"
4732 [(set (match_operand:SI 0 "register_operand" "=r")
4733 (unsigned_fix:SI
4734 (vec_select:SF
4735 (match_operand:V4SF 1 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")
4736 (parallel [(const_int 0)]))))]
4737 "TARGET_AVX512F"
4738 "vcvttss2usi\t{<round_saeonly_op2>%1, %0|%0, %1<round_saeonly_op2>}"
4739 [(set_attr "type" "sseicvt")
4740 (set_attr "prefix" "evex")
4741 (set_attr "mode" "SI")])
4742
4743 (define_insn "avx512f_vcvttss2usiq<round_saeonly_name>"
4744 [(set (match_operand:DI 0 "register_operand" "=r")
4745 (unsigned_fix:DI
4746 (vec_select:SF
4747 (match_operand:V4SF 1 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")
4748 (parallel [(const_int 0)]))))]
4749 "TARGET_AVX512F && TARGET_64BIT"
4750 "vcvttss2usi\t{<round_saeonly_op2>%1, %0|%0, %1<round_saeonly_op2>}"
4751 [(set_attr "type" "sseicvt")
4752 (set_attr "prefix" "evex")
4753 (set_attr "mode" "DI")])
4754
4755 (define_insn "avx512f_vcvtsd2usi<round_name>"
4756 [(set (match_operand:SI 0 "register_operand" "=r")
4757 (unspec:SI
4758 [(vec_select:DF
4759 (match_operand:V2DF 1 "<round_nimm_predicate>" "<round_constraint>")
4760 (parallel [(const_int 0)]))]
4761 UNSPEC_UNSIGNED_FIX_NOTRUNC))]
4762 "TARGET_AVX512F"
4763 "vcvtsd2usi\t{<round_op2>%1, %0|%0, %1<round_op2>}"
4764 [(set_attr "type" "sseicvt")
4765 (set_attr "prefix" "evex")
4766 (set_attr "mode" "SI")])
4767
4768 (define_insn "avx512f_vcvtsd2usiq<round_name>"
4769 [(set (match_operand:DI 0 "register_operand" "=r")
4770 (unspec:DI
4771 [(vec_select:DF
4772 (match_operand:V2DF 1 "<round_nimm_predicate>" "<round_constraint>")
4773 (parallel [(const_int 0)]))]
4774 UNSPEC_UNSIGNED_FIX_NOTRUNC))]
4775 "TARGET_AVX512F && TARGET_64BIT"
4776 "vcvtsd2usi\t{<round_op2>%1, %0|%0, %1<round_op2>}"
4777 [(set_attr "type" "sseicvt")
4778 (set_attr "prefix" "evex")
4779 (set_attr "mode" "DI")])
4780
4781 (define_insn "avx512f_vcvttsd2usi<round_saeonly_name>"
4782 [(set (match_operand:SI 0 "register_operand" "=r")
4783 (unsigned_fix:SI
4784 (vec_select:DF
4785 (match_operand:V2DF 1 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")
4786 (parallel [(const_int 0)]))))]
4787 "TARGET_AVX512F"
4788 "vcvttsd2usi\t{<round_saeonly_op2>%1, %0|%0, %1<round_saeonly_op2>}"
4789 [(set_attr "type" "sseicvt")
4790 (set_attr "prefix" "evex")
4791 (set_attr "mode" "SI")])
4792
4793 (define_insn "avx512f_vcvttsd2usiq<round_saeonly_name>"
4794 [(set (match_operand:DI 0 "register_operand" "=r")
4795 (unsigned_fix:DI
4796 (vec_select:DF
4797 (match_operand:V2DF 1 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")
4798 (parallel [(const_int 0)]))))]
4799 "TARGET_AVX512F && TARGET_64BIT"
4800 "vcvttsd2usi\t{<round_saeonly_op2>%1, %0|%0, %1<round_saeonly_op2>}"
4801 [(set_attr "type" "sseicvt")
4802 (set_attr "prefix" "evex")
4803 (set_attr "mode" "DI")])
4804
4805 (define_insn "sse2_cvtsd2si<round_name>"
4806 [(set (match_operand:SI 0 "register_operand" "=r,r")
4807 (unspec:SI
4808 [(vec_select:DF
4809 (match_operand:V2DF 1 "<round_nimm_predicate>" "v,<round_constraint2>")
4810 (parallel [(const_int 0)]))]
4811 UNSPEC_FIX_NOTRUNC))]
4812 "TARGET_SSE2"
4813 "%vcvtsd2si\t{<round_op2>%1, %0|%0, %q1<round_op2>}"
4814 [(set_attr "type" "sseicvt")
4815 (set_attr "athlon_decode" "double,vector")
4816 (set_attr "bdver1_decode" "double,double")
4817 (set_attr "btver2_decode" "double,double")
4818 (set_attr "prefix_rep" "1")
4819 (set_attr "prefix" "maybe_vex")
4820 (set_attr "mode" "SI")])
4821
4822 (define_insn "sse2_cvtsd2si_2"
4823 [(set (match_operand:SI 0 "register_operand" "=r,r")
4824 (unspec:SI [(match_operand:DF 1 "nonimmediate_operand" "v,m")]
4825 UNSPEC_FIX_NOTRUNC))]
4826 "TARGET_SSE2"
4827 "%vcvtsd2si\t{%1, %0|%0, %q1}"
4828 [(set_attr "type" "sseicvt")
4829 (set_attr "athlon_decode" "double,vector")
4830 (set_attr "amdfam10_decode" "double,double")
4831 (set_attr "bdver1_decode" "double,double")
4832 (set_attr "prefix_rep" "1")
4833 (set_attr "prefix" "maybe_vex")
4834 (set_attr "mode" "SI")])
4835
4836 (define_insn "sse2_cvtsd2siq<round_name>"
4837 [(set (match_operand:DI 0 "register_operand" "=r,r")
4838 (unspec:DI
4839 [(vec_select:DF
4840 (match_operand:V2DF 1 "<round_nimm_predicate>" "v,<round_constraint2>")
4841 (parallel [(const_int 0)]))]
4842 UNSPEC_FIX_NOTRUNC))]
4843 "TARGET_SSE2 && TARGET_64BIT"
4844 "%vcvtsd2si{q}\t{<round_op2>%1, %0|%0, %q1<round_op2>}"
4845 [(set_attr "type" "sseicvt")
4846 (set_attr "athlon_decode" "double,vector")
4847 (set_attr "bdver1_decode" "double,double")
4848 (set_attr "prefix_rep" "1")
4849 (set_attr "prefix" "maybe_vex")
4850 (set_attr "mode" "DI")])
4851
4852 (define_insn "sse2_cvtsd2siq_2"
4853 [(set (match_operand:DI 0 "register_operand" "=r,r")
4854 (unspec:DI [(match_operand:DF 1 "nonimmediate_operand" "v,m")]
4855 UNSPEC_FIX_NOTRUNC))]
4856 "TARGET_SSE2 && TARGET_64BIT"
4857 "%vcvtsd2si{q}\t{%1, %0|%0, %q1}"
4858 [(set_attr "type" "sseicvt")
4859 (set_attr "athlon_decode" "double,vector")
4860 (set_attr "amdfam10_decode" "double,double")
4861 (set_attr "bdver1_decode" "double,double")
4862 (set_attr "prefix_rep" "1")
4863 (set_attr "prefix" "maybe_vex")
4864 (set_attr "mode" "DI")])
4865
4866 (define_insn "sse2_cvttsd2si<round_saeonly_name>"
4867 [(set (match_operand:SI 0 "register_operand" "=r,r")
4868 (fix:SI
4869 (vec_select:DF
4870 (match_operand:V2DF 1 "<round_saeonly_nimm_predicate>" "v,<round_saeonly_constraint2>")
4871 (parallel [(const_int 0)]))))]
4872 "TARGET_SSE2"
4873 "%vcvttsd2si\t{<round_saeonly_op2>%1, %0|%0, %q1<round_saeonly_op2>}"
4874 [(set_attr "type" "sseicvt")
4875 (set_attr "athlon_decode" "double,vector")
4876 (set_attr "amdfam10_decode" "double,double")
4877 (set_attr "bdver1_decode" "double,double")
4878 (set_attr "btver2_decode" "double,double")
4879 (set_attr "prefix_rep" "1")
4880 (set_attr "prefix" "maybe_vex")
4881 (set_attr "mode" "SI")])
4882
4883 (define_insn "sse2_cvttsd2siq<round_saeonly_name>"
4884 [(set (match_operand:DI 0 "register_operand" "=r,r")
4885 (fix:DI
4886 (vec_select:DF
4887 (match_operand:V2DF 1 "<round_saeonly_nimm_predicate>" "v,<round_saeonly_constraint2>")
4888 (parallel [(const_int 0)]))))]
4889 "TARGET_SSE2 && TARGET_64BIT"
4890 "%vcvttsd2si{q}\t{<round_saeonly_op2>%1, %0|%0, %q1<round_saeonly_op2>}"
4891 [(set_attr "type" "sseicvt")
4892 (set_attr "athlon_decode" "double,vector")
4893 (set_attr "amdfam10_decode" "double,double")
4894 (set_attr "bdver1_decode" "double,double")
4895 (set_attr "prefix_rep" "1")
4896 (set_attr "prefix" "maybe_vex")
4897 (set_attr "mode" "DI")])
4898
4899 ;; For float<si2dfmode><mode>2 insn pattern
4900 (define_mode_attr si2dfmode
4901 [(V8DF "V8SI") (V4DF "V4SI")])
4902 (define_mode_attr si2dfmodelower
4903 [(V8DF "v8si") (V4DF "v4si")])
4904
4905 (define_insn "float<si2dfmodelower><mode>2<mask_name>"
4906 [(set (match_operand:VF2_512_256 0 "register_operand" "=v")
4907 (float:VF2_512_256 (match_operand:<si2dfmode> 1 "nonimmediate_operand" "vm")))]
4908 "TARGET_AVX && <mask_mode512bit_condition>"
4909 "vcvtdq2pd\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
4910 [(set_attr "type" "ssecvt")
4911 (set_attr "prefix" "maybe_vex")
4912 (set_attr "mode" "<MODE>")])
4913
4914 (define_insn "<floatsuffix>float<sseintvecmodelower><mode>2<mask_name><round_name>"
4915 [(set (match_operand:VF2_AVX512VL 0 "register_operand" "=v")
4916 (any_float:VF2_AVX512VL
4917 (match_operand:<sseintvecmode> 1 "nonimmediate_operand" "vm")))]
4918 "TARGET_AVX512DQ"
4919 "vcvt<floatsuffix>qq2pd\t{<round_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_mask_op2>}"
4920 [(set_attr "type" "ssecvt")
4921 (set_attr "prefix" "evex")
4922 (set_attr "mode" "<MODE>")])
4923
4924 ;; For <floatsuffix>float<sselondveclower><mode> insn patterns
4925 (define_mode_attr qq2pssuff
4926 [(V8SF "") (V4SF "{y}")])
4927
4928 (define_mode_attr sselongvecmode
4929 [(V8SF "V8DI") (V4SF "V4DI")])
4930
4931 (define_mode_attr sselongvecmodelower
4932 [(V8SF "v8di") (V4SF "v4di")])
4933
4934 (define_mode_attr sseintvecmode3
4935 [(V8SF "XI") (V4SF "OI")
4936 (V8DF "OI") (V4DF "TI")])
4937
4938 (define_insn "<floatsuffix>float<sselongvecmodelower><mode>2<mask_name><round_name>"
4939 [(set (match_operand:VF1_128_256VL 0 "register_operand" "=v")
4940 (any_float:VF1_128_256VL
4941 (match_operand:<sselongvecmode> 1 "nonimmediate_operand" "<round_constraint>")))]
4942 "TARGET_AVX512DQ && <round_modev8sf_condition>"
4943 "vcvt<floatsuffix>qq2ps<qq2pssuff>\t{<round_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_mask_op2>}"
4944 [(set_attr "type" "ssecvt")
4945 (set_attr "prefix" "evex")
4946 (set_attr "mode" "<MODE>")])
4947
4948 (define_insn "*<floatsuffix>floatv2div2sf2"
4949 [(set (match_operand:V4SF 0 "register_operand" "=v")
4950 (vec_concat:V4SF
4951 (any_float:V2SF (match_operand:V2DI 1 "nonimmediate_operand" "vm"))
4952 (const_vector:V2SF [(const_int 0) (const_int 0)])))]
4953 "TARGET_AVX512DQ && TARGET_AVX512VL"
4954 "vcvt<floatsuffix>qq2ps{x}\t{%1, %0|%0, %1}"
4955 [(set_attr "type" "ssecvt")
4956 (set_attr "prefix" "evex")
4957 (set_attr "mode" "V4SF")])
4958
4959 (define_insn "<floatsuffix>floatv2div2sf2_mask"
4960 [(set (match_operand:V4SF 0 "register_operand" "=v")
4961 (vec_concat:V4SF
4962 (vec_merge:V2SF
4963 (any_float:V2SF (match_operand:V2DI 1 "nonimmediate_operand" "vm"))
4964 (vec_select:V2SF
4965 (match_operand:V4SF 2 "vector_move_operand" "0C")
4966 (parallel [(const_int 0) (const_int 1)]))
4967 (match_operand:QI 3 "register_operand" "Yk"))
4968 (const_vector:V2SF [(const_int 0) (const_int 0)])))]
4969 "TARGET_AVX512DQ && TARGET_AVX512VL"
4970 "vcvt<floatsuffix>qq2ps{x}\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}"
4971 [(set_attr "type" "ssecvt")
4972 (set_attr "prefix" "evex")
4973 (set_attr "mode" "V4SF")])
4974
4975 (define_insn "ufloat<si2dfmodelower><mode>2<mask_name>"
4976 [(set (match_operand:VF2_512_256VL 0 "register_operand" "=v")
4977 (unsigned_float:VF2_512_256VL
4978 (match_operand:<si2dfmode> 1 "nonimmediate_operand" "vm")))]
4979 "TARGET_AVX512F"
4980 "vcvtudq2pd\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
4981 [(set_attr "type" "ssecvt")
4982 (set_attr "prefix" "evex")
4983 (set_attr "mode" "<MODE>")])
4984
4985 (define_insn "ufloatv2siv2df2<mask_name>"
4986 [(set (match_operand:V2DF 0 "register_operand" "=v")
4987 (unsigned_float:V2DF
4988 (vec_select:V2SI
4989 (match_operand:V4SI 1 "nonimmediate_operand" "vm")
4990 (parallel [(const_int 0) (const_int 1)]))))]
4991 "TARGET_AVX512VL"
4992 "vcvtudq2pd\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
4993 [(set_attr "type" "ssecvt")
4994 (set_attr "prefix" "evex")
4995 (set_attr "mode" "V2DF")])
4996
4997 (define_insn "avx512f_cvtdq2pd512_2"
4998 [(set (match_operand:V8DF 0 "register_operand" "=v")
4999 (float:V8DF
5000 (vec_select:V8SI
5001 (match_operand:V16SI 1 "nonimmediate_operand" "vm")
5002 (parallel [(const_int 0) (const_int 1)
5003 (const_int 2) (const_int 3)
5004 (const_int 4) (const_int 5)
5005 (const_int 6) (const_int 7)]))))]
5006 "TARGET_AVX512F"
5007 "vcvtdq2pd\t{%t1, %0|%0, %t1}"
5008 [(set_attr "type" "ssecvt")
5009 (set_attr "prefix" "evex")
5010 (set_attr "mode" "V8DF")])
5011
5012 (define_insn "avx_cvtdq2pd256_2"
5013 [(set (match_operand:V4DF 0 "register_operand" "=v")
5014 (float:V4DF
5015 (vec_select:V4SI
5016 (match_operand:V8SI 1 "nonimmediate_operand" "vm")
5017 (parallel [(const_int 0) (const_int 1)
5018 (const_int 2) (const_int 3)]))))]
5019 "TARGET_AVX"
5020 "vcvtdq2pd\t{%x1, %0|%0, %x1}"
5021 [(set_attr "type" "ssecvt")
5022 (set_attr "prefix" "maybe_evex")
5023 (set_attr "mode" "V4DF")])
5024
5025 (define_insn "sse2_cvtdq2pd<mask_name>"
5026 [(set (match_operand:V2DF 0 "register_operand" "=v")
5027 (float:V2DF
5028 (vec_select:V2SI
5029 (match_operand:V4SI 1 "nonimmediate_operand" "vm")
5030 (parallel [(const_int 0) (const_int 1)]))))]
5031 "TARGET_SSE2 && <mask_avx512vl_condition>"
5032 "%vcvtdq2pd\t{%1, %0<mask_operand2>|%0<mask_operand2>, %q1}"
5033 [(set_attr "type" "ssecvt")
5034 (set_attr "prefix" "maybe_vex")
5035 (set_attr "ssememalign" "64")
5036 (set_attr "mode" "V2DF")])
5037
5038 (define_insn "<mask_codefor>avx512f_cvtpd2dq512<mask_name><round_name>"
5039 [(set (match_operand:V8SI 0 "register_operand" "=v")
5040 (unspec:V8SI
5041 [(match_operand:V8DF 1 "<round_nimm_predicate>" "<round_constraint>")]
5042 UNSPEC_FIX_NOTRUNC))]
5043 "TARGET_AVX512F"
5044 "vcvtpd2dq\t{<round_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_mask_op2>}"
5045 [(set_attr "type" "ssecvt")
5046 (set_attr "prefix" "evex")
5047 (set_attr "mode" "OI")])
5048
5049 (define_insn "avx_cvtpd2dq256<mask_name>"
5050 [(set (match_operand:V4SI 0 "register_operand" "=v")
5051 (unspec:V4SI [(match_operand:V4DF 1 "nonimmediate_operand" "vm")]
5052 UNSPEC_FIX_NOTRUNC))]
5053 "TARGET_AVX && <mask_avx512vl_condition>"
5054 "vcvtpd2dq{y}\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
5055 [(set_attr "type" "ssecvt")
5056 (set_attr "prefix" "<mask_prefix>")
5057 (set_attr "mode" "OI")])
5058
5059 (define_expand "avx_cvtpd2dq256_2"
5060 [(set (match_operand:V8SI 0 "register_operand")
5061 (vec_concat:V8SI
5062 (unspec:V4SI [(match_operand:V4DF 1 "nonimmediate_operand")]
5063 UNSPEC_FIX_NOTRUNC)
5064 (match_dup 2)))]
5065 "TARGET_AVX"
5066 "operands[2] = CONST0_RTX (V4SImode);")
5067
5068 (define_insn "*avx_cvtpd2dq256_2"
5069 [(set (match_operand:V8SI 0 "register_operand" "=x")
5070 (vec_concat:V8SI
5071 (unspec:V4SI [(match_operand:V4DF 1 "nonimmediate_operand" "xm")]
5072 UNSPEC_FIX_NOTRUNC)
5073 (match_operand:V4SI 2 "const0_operand")))]
5074 "TARGET_AVX"
5075 "vcvtpd2dq{y}\t{%1, %x0|%x0, %1}"
5076 [(set_attr "type" "ssecvt")
5077 (set_attr "prefix" "vex")
5078 (set_attr "btver2_decode" "vector")
5079 (set_attr "mode" "OI")])
5080
5081 (define_insn "sse2_cvtpd2dq<mask_name>"
5082 [(set (match_operand:V4SI 0 "register_operand" "=v")
5083 (vec_concat:V4SI
5084 (unspec:V2SI [(match_operand:V2DF 1 "nonimmediate_operand" "vm")]
5085 UNSPEC_FIX_NOTRUNC)
5086 (const_vector:V2SI [(const_int 0) (const_int 0)])))]
5087 "TARGET_SSE2 && <mask_avx512vl_condition>"
5088 {
5089 if (TARGET_AVX)
5090 return "vcvtpd2dq{x}\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}";
5091 else
5092 return "cvtpd2dq\t{%1, %0|%0, %1}";
5093 }
5094 [(set_attr "type" "ssecvt")
5095 (set_attr "prefix_rep" "1")
5096 (set_attr "prefix_data16" "0")
5097 (set_attr "prefix" "maybe_vex")
5098 (set_attr "mode" "TI")
5099 (set_attr "amdfam10_decode" "double")
5100 (set_attr "athlon_decode" "vector")
5101 (set_attr "bdver1_decode" "double")])
5102
5103 ;; For ufix_notrunc* insn patterns
5104 (define_mode_attr pd2udqsuff
5105 [(V8DF "") (V4DF "{y}")])
5106
5107 (define_insn "ufix_notrunc<mode><si2dfmodelower>2<mask_name><round_name>"
5108 [(set (match_operand:<si2dfmode> 0 "register_operand" "=v")
5109 (unspec:<si2dfmode>
5110 [(match_operand:VF2_512_256VL 1 "nonimmediate_operand" "<round_constraint>")]
5111 UNSPEC_UNSIGNED_FIX_NOTRUNC))]
5112 "TARGET_AVX512F"
5113 "vcvtpd2udq<pd2udqsuff>\t{<round_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_mask_op2>}"
5114 [(set_attr "type" "ssecvt")
5115 (set_attr "prefix" "evex")
5116 (set_attr "mode" "<sseinsnmode>")])
5117
5118 (define_insn "ufix_notruncv2dfv2si2<mask_name>"
5119 [(set (match_operand:V4SI 0 "register_operand" "=v")
5120 (vec_concat:V4SI
5121 (unspec:V2SI
5122 [(match_operand:V2DF 1 "nonimmediate_operand" "vm")]
5123 UNSPEC_UNSIGNED_FIX_NOTRUNC)
5124 (const_vector:V2SI [(const_int 0) (const_int 0)])))]
5125 "TARGET_AVX512VL"
5126 "vcvtpd2udq{x}\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
5127 [(set_attr "type" "ssecvt")
5128 (set_attr "prefix" "evex")
5129 (set_attr "mode" "TI")])
5130
5131 (define_insn "<fixsuffix>fix_truncv8dfv8si2<mask_name><round_saeonly_name>"
5132 [(set (match_operand:V8SI 0 "register_operand" "=v")
5133 (any_fix:V8SI
5134 (match_operand:V8DF 1 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")))]
5135 "TARGET_AVX512F"
5136 "vcvttpd2<fixsuffix>dq\t{<round_saeonly_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_saeonly_mask_op2>}"
5137 [(set_attr "type" "ssecvt")
5138 (set_attr "prefix" "evex")
5139 (set_attr "mode" "OI")])
5140
5141 (define_insn "ufix_truncv2dfv2si2<mask_name>"
5142 [(set (match_operand:V4SI 0 "register_operand" "=v")
5143 (vec_concat:V4SI
5144 (unsigned_fix:V2SI (match_operand:V2DF 1 "nonimmediate_operand" "vm"))
5145 (const_vector:V2SI [(const_int 0) (const_int 0)])))]
5146 "TARGET_AVX512VL"
5147 "vcvttpd2udq{x}\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
5148 [(set_attr "type" "ssecvt")
5149 (set_attr "prefix" "evex")
5150 (set_attr "mode" "TI")])
5151
5152 (define_insn "fix_truncv4dfv4si2<mask_name>"
5153 [(set (match_operand:V4SI 0 "register_operand" "=v")
5154 (fix:V4SI (match_operand:V4DF 1 "nonimmediate_operand" "vm")))]
5155 "TARGET_AVX || (TARGET_AVX512VL && TARGET_AVX512F)"
5156 "vcvttpd2dq{y}\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
5157 [(set_attr "type" "ssecvt")
5158 (set_attr "prefix" "maybe_evex")
5159 (set_attr "mode" "OI")])
5160
5161 (define_insn "ufix_truncv4dfv4si2<mask_name>"
5162 [(set (match_operand:V4SI 0 "register_operand" "=v")
5163 (unsigned_fix:V4SI (match_operand:V4DF 1 "nonimmediate_operand" "vm")))]
5164 "TARGET_AVX512VL && TARGET_AVX512F"
5165 "vcvttpd2udq{y}\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
5166 [(set_attr "type" "ssecvt")
5167 (set_attr "prefix" "maybe_evex")
5168 (set_attr "mode" "OI")])
5169
5170 (define_insn "<fixsuffix>fix_trunc<mode><sseintvecmodelower>2<mask_name><round_saeonly_name>"
5171 [(set (match_operand:<sseintvecmode> 0 "register_operand" "=v")
5172 (any_fix:<sseintvecmode>
5173 (match_operand:VF2_AVX512VL 1 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")))]
5174 "TARGET_AVX512DQ && <round_saeonly_mode512bit_condition>"
5175 "vcvttpd2<fixsuffix>qq\t{<round_saeonly_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_saeonly_mask_op2>}"
5176 [(set_attr "type" "ssecvt")
5177 (set_attr "prefix" "evex")
5178 (set_attr "mode" "<sseintvecmode2>")])
5179
5180 (define_insn "fix_notrunc<mode><sseintvecmodelower>2<mask_name><round_name>"
5181 [(set (match_operand:<sseintvecmode> 0 "register_operand" "=v")
5182 (unspec:<sseintvecmode>
5183 [(match_operand:VF2_AVX512VL 1 "<round_nimm_predicate>" "<round_constraint>")]
5184 UNSPEC_FIX_NOTRUNC))]
5185 "TARGET_AVX512DQ && <round_mode512bit_condition>"
5186 "vcvtpd2qq\t{<round_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_mask_op2>}"
5187 [(set_attr "type" "ssecvt")
5188 (set_attr "prefix" "evex")
5189 (set_attr "mode" "<sseintvecmode2>")])
5190
5191 (define_insn "ufix_notrunc<mode><sseintvecmodelower>2<mask_name><round_name>"
5192 [(set (match_operand:<sseintvecmode> 0 "register_operand" "=v")
5193 (unspec:<sseintvecmode>
5194 [(match_operand:VF2_AVX512VL 1 "nonimmediate_operand" "<round_constraint>")]
5195 UNSPEC_UNSIGNED_FIX_NOTRUNC))]
5196 "TARGET_AVX512DQ && <round_mode512bit_condition>"
5197 "vcvtpd2uqq\t{<round_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_mask_op2>}"
5198 [(set_attr "type" "ssecvt")
5199 (set_attr "prefix" "evex")
5200 (set_attr "mode" "<sseintvecmode2>")])
5201
5202 (define_insn "<fixsuffix>fix_trunc<mode><sselongvecmodelower>2<mask_name><round_saeonly_name>"
5203 [(set (match_operand:<sselongvecmode> 0 "register_operand" "=v")
5204 (any_fix:<sselongvecmode>
5205 (match_operand:VF1_128_256VL 1 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")))]
5206 "TARGET_AVX512DQ && <round_saeonly_modev8sf_condition>"
5207 "vcvttps2<fixsuffix>qq\t{<round_saeonly_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_saeonly_mask_op2>}"
5208 [(set_attr "type" "ssecvt")
5209 (set_attr "prefix" "evex")
5210 (set_attr "mode" "<sseintvecmode3>")])
5211
5212 (define_insn "<fixsuffix>fix_truncv2sfv2di2<mask_name>"
5213 [(set (match_operand:V2DI 0 "register_operand" "=v")
5214 (any_fix:V2DI
5215 (vec_select:V2SF
5216 (match_operand:V4SF 1 "nonimmediate_operand" "vm")
5217 (parallel [(const_int 0) (const_int 1)]))))]
5218 "TARGET_AVX512DQ && TARGET_AVX512VL"
5219 "vcvttps2<fixsuffix>qq\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
5220 [(set_attr "type" "ssecvt")
5221 (set_attr "prefix" "evex")
5222 (set_attr "mode" "TI")])
5223
5224 (define_insn "ufix_trunc<mode><sseintvecmodelower>2<mask_name>"
5225 [(set (match_operand:<sseintvecmode> 0 "register_operand" "=v")
5226 (unsigned_fix:<sseintvecmode>
5227 (match_operand:VF1_128_256VL 1 "nonimmediate_operand" "vm")))]
5228 "TARGET_AVX512VL"
5229 "vcvttps2udq\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
5230 [(set_attr "type" "ssecvt")
5231 (set_attr "prefix" "evex")
5232 (set_attr "mode" "<sseintvecmode2>")])
5233
5234 (define_expand "avx_cvttpd2dq256_2"
5235 [(set (match_operand:V8SI 0 "register_operand")
5236 (vec_concat:V8SI
5237 (fix:V4SI (match_operand:V4DF 1 "nonimmediate_operand"))
5238 (match_dup 2)))]
5239 "TARGET_AVX"
5240 "operands[2] = CONST0_RTX (V4SImode);")
5241
5242 (define_insn "sse2_cvttpd2dq<mask_name>"
5243 [(set (match_operand:V4SI 0 "register_operand" "=v")
5244 (vec_concat:V4SI
5245 (fix:V2SI (match_operand:V2DF 1 "nonimmediate_operand" "vm"))
5246 (const_vector:V2SI [(const_int 0) (const_int 0)])))]
5247 "TARGET_SSE2 && <mask_avx512vl_condition>"
5248 {
5249 if (TARGET_AVX)
5250 return "vcvttpd2dq{x}\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}";
5251 else
5252 return "cvttpd2dq\t{%1, %0|%0, %1}";
5253 }
5254 [(set_attr "type" "ssecvt")
5255 (set_attr "amdfam10_decode" "double")
5256 (set_attr "athlon_decode" "vector")
5257 (set_attr "bdver1_decode" "double")
5258 (set_attr "prefix" "maybe_vex")
5259 (set_attr "mode" "TI")])
5260
5261 (define_insn "sse2_cvtsd2ss<round_name>"
5262 [(set (match_operand:V4SF 0 "register_operand" "=x,x,v")
5263 (vec_merge:V4SF
5264 (vec_duplicate:V4SF
5265 (float_truncate:V2SF
5266 (match_operand:V2DF 2 "nonimmediate_operand" "x,m,<round_constraint>")))
5267 (match_operand:V4SF 1 "register_operand" "0,0,v")
5268 (const_int 1)))]
5269 "TARGET_SSE2"
5270 "@
5271 cvtsd2ss\t{%2, %0|%0, %2}
5272 cvtsd2ss\t{%2, %0|%0, %q2}
5273 vcvtsd2ss\t{<round_op3>%2, %1, %0|%0, %1, %q2<round_op3>}"
5274 [(set_attr "isa" "noavx,noavx,avx")
5275 (set_attr "type" "ssecvt")
5276 (set_attr "athlon_decode" "vector,double,*")
5277 (set_attr "amdfam10_decode" "vector,double,*")
5278 (set_attr "bdver1_decode" "direct,direct,*")
5279 (set_attr "btver2_decode" "double,double,double")
5280 (set_attr "prefix" "orig,orig,<round_prefix>")
5281 (set_attr "mode" "SF")])
5282
5283 (define_insn "sse2_cvtss2sd<round_saeonly_name>"
5284 [(set (match_operand:V2DF 0 "register_operand" "=x,x,v")
5285 (vec_merge:V2DF
5286 (float_extend:V2DF
5287 (vec_select:V2SF
5288 (match_operand:V4SF 2 "<round_saeonly_nimm_predicate>" "x,m,<round_saeonly_constraint>")
5289 (parallel [(const_int 0) (const_int 1)])))
5290 (match_operand:V2DF 1 "register_operand" "0,0,v")
5291 (const_int 1)))]
5292 "TARGET_SSE2"
5293 "@
5294 cvtss2sd\t{%2, %0|%0, %2}
5295 cvtss2sd\t{%2, %0|%0, %k2}
5296 vcvtss2sd\t{<round_saeonly_op3>%2, %1, %0|%0, %1, %k2<round_saeonly_op3>}"
5297 [(set_attr "isa" "noavx,noavx,avx")
5298 (set_attr "type" "ssecvt")
5299 (set_attr "amdfam10_decode" "vector,double,*")
5300 (set_attr "athlon_decode" "direct,direct,*")
5301 (set_attr "bdver1_decode" "direct,direct,*")
5302 (set_attr "btver2_decode" "double,double,double")
5303 (set_attr "prefix" "orig,orig,<round_saeonly_prefix>")
5304 (set_attr "mode" "DF")])
5305
5306 (define_insn "<mask_codefor>avx512f_cvtpd2ps512<mask_name><round_name>"
5307 [(set (match_operand:V8SF 0 "register_operand" "=v")
5308 (float_truncate:V8SF
5309 (match_operand:V8DF 1 "<round_nimm_predicate>" "<round_constraint>")))]
5310 "TARGET_AVX512F"
5311 "vcvtpd2ps\t{<round_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_mask_op2>}"
5312 [(set_attr "type" "ssecvt")
5313 (set_attr "prefix" "evex")
5314 (set_attr "mode" "V8SF")])
5315
5316 (define_insn "avx_cvtpd2ps256<mask_name>"
5317 [(set (match_operand:V4SF 0 "register_operand" "=v")
5318 (float_truncate:V4SF
5319 (match_operand:V4DF 1 "nonimmediate_operand" "vm")))]
5320 "TARGET_AVX && <mask_avx512vl_condition>"
5321 "vcvtpd2ps{y}\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
5322 [(set_attr "type" "ssecvt")
5323 (set_attr "prefix" "maybe_evex")
5324 (set_attr "btver2_decode" "vector")
5325 (set_attr "mode" "V4SF")])
5326
5327 (define_expand "sse2_cvtpd2ps"
5328 [(set (match_operand:V4SF 0 "register_operand")
5329 (vec_concat:V4SF
5330 (float_truncate:V2SF
5331 (match_operand:V2DF 1 "nonimmediate_operand"))
5332 (match_dup 2)))]
5333 "TARGET_SSE2"
5334 "operands[2] = CONST0_RTX (V2SFmode);")
5335
5336 (define_expand "sse2_cvtpd2ps_mask"
5337 [(set (match_operand:V4SF 0 "register_operand")
5338 (vec_merge:V4SF
5339 (vec_concat:V4SF
5340 (float_truncate:V2SF
5341 (match_operand:V2DF 1 "nonimmediate_operand"))
5342 (match_dup 4))
5343 (match_operand:V4SF 2 "register_operand")
5344 (match_operand:QI 3 "register_operand")))]
5345 "TARGET_SSE2"
5346 "operands[4] = CONST0_RTX (V2SFmode);")
5347
5348 (define_insn "*sse2_cvtpd2ps<mask_name>"
5349 [(set (match_operand:V4SF 0 "register_operand" "=v")
5350 (vec_concat:V4SF
5351 (float_truncate:V2SF
5352 (match_operand:V2DF 1 "nonimmediate_operand" "vm"))
5353 (match_operand:V2SF 2 "const0_operand")))]
5354 "TARGET_SSE2 && <mask_avx512vl_condition>"
5355 {
5356 if (TARGET_AVX)
5357 return "vcvtpd2ps{x}\t{%1, %0<mask_operand3>|%0<mask_operand3>, %1}";
5358 else
5359 return "cvtpd2ps\t{%1, %0|%0, %1}";
5360 }
5361 [(set_attr "type" "ssecvt")
5362 (set_attr "amdfam10_decode" "double")
5363 (set_attr "athlon_decode" "vector")
5364 (set_attr "bdver1_decode" "double")
5365 (set_attr "prefix_data16" "1")
5366 (set_attr "prefix" "maybe_vex")
5367 (set_attr "mode" "V4SF")])
5368
5369 ;; For <sse2_avx_avx512f>_cvtps2pd<avxsizesuffix> insn pattern
5370 (define_mode_attr sf2dfmode
5371 [(V8DF "V8SF") (V4DF "V4SF")])
5372
5373 (define_insn "<sse2_avx_avx512f>_cvtps2pd<avxsizesuffix><mask_name><round_saeonly_name>"
5374 [(set (match_operand:VF2_512_256 0 "register_operand" "=v")
5375 (float_extend:VF2_512_256
5376 (match_operand:<sf2dfmode> 1 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")))]
5377 "TARGET_AVX && <mask_mode512bit_condition> && <round_saeonly_mode512bit_condition>"
5378 "vcvtps2pd\t{<round_saeonly_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_saeonly_mask_op2>}"
5379 [(set_attr "type" "ssecvt")
5380 (set_attr "prefix" "maybe_vex")
5381 (set_attr "mode" "<MODE>")])
5382
5383 (define_insn "*avx_cvtps2pd256_2"
5384 [(set (match_operand:V4DF 0 "register_operand" "=x")
5385 (float_extend:V4DF
5386 (vec_select:V4SF
5387 (match_operand:V8SF 1 "nonimmediate_operand" "xm")
5388 (parallel [(const_int 0) (const_int 1)
5389 (const_int 2) (const_int 3)]))))]
5390 "TARGET_AVX"
5391 "vcvtps2pd\t{%x1, %0|%0, %x1}"
5392 [(set_attr "type" "ssecvt")
5393 (set_attr "prefix" "vex")
5394 (set_attr "mode" "V4DF")])
5395
5396 (define_insn "vec_unpacks_lo_v16sf"
5397 [(set (match_operand:V8DF 0 "register_operand" "=v")
5398 (float_extend:V8DF
5399 (vec_select:V8SF
5400 (match_operand:V16SF 1 "nonimmediate_operand" "vm")
5401 (parallel [(const_int 0) (const_int 1)
5402 (const_int 2) (const_int 3)
5403 (const_int 4) (const_int 5)
5404 (const_int 6) (const_int 7)]))))]
5405 "TARGET_AVX512F"
5406 "vcvtps2pd\t{%t1, %0|%0, %t1}"
5407 [(set_attr "type" "ssecvt")
5408 (set_attr "prefix" "evex")
5409 (set_attr "mode" "V8DF")])
5410
5411 (define_insn "<avx512>_cvt<ssemodesuffix>2mask<mode>"
5412 [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=Yk")
5413 (unspec:<avx512fmaskmode>
5414 [(match_operand:VI12_AVX512VL 1 "register_operand" "v")]
5415 UNSPEC_CVTINT2MASK))]
5416 "TARGET_AVX512BW"
5417 "vpmov<ssemodesuffix>2m\t{%1, %0|%0, %1}"
5418 [(set_attr "prefix" "evex")
5419 (set_attr "mode" "<sseinsnmode>")])
5420
5421 (define_insn "<avx512>_cvt<ssemodesuffix>2mask<mode>"
5422 [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=Yk")
5423 (unspec:<avx512fmaskmode>
5424 [(match_operand:VI48_AVX512VL 1 "register_operand" "v")]
5425 UNSPEC_CVTINT2MASK))]
5426 "TARGET_AVX512DQ"
5427 "vpmov<ssemodesuffix>2m\t{%1, %0|%0, %1}"
5428 [(set_attr "prefix" "evex")
5429 (set_attr "mode" "<sseinsnmode>")])
5430
5431 (define_expand "<avx512>_cvtmask2<ssemodesuffix><mode>"
5432 [(set (match_operand:VI12_AVX512VL 0 "register_operand")
5433 (vec_merge:VI12_AVX512VL
5434 (match_dup 2)
5435 (match_dup 3)
5436 (match_operand:<avx512fmaskmode> 1 "register_operand")))]
5437 "TARGET_AVX512BW"
5438 {
5439 operands[2] = CONSTM1_RTX (<MODE>mode);
5440 operands[3] = CONST0_RTX (<MODE>mode);
5441 })
5442
5443 (define_insn "*<avx512>_cvtmask2<ssemodesuffix><mode>"
5444 [(set (match_operand:VI12_AVX512VL 0 "register_operand" "=v")
5445 (vec_merge:VI12_AVX512VL
5446 (match_operand:VI12_AVX512VL 2 "constm1_operand")
5447 (match_operand:VI12_AVX512VL 3 "const0_operand")
5448 (match_operand:<avx512fmaskmode> 1 "register_operand" "Yk")))]
5449 "TARGET_AVX512BW"
5450 "vpmovm2<ssemodesuffix>\t{%1, %0|%0, %1}"
5451 [(set_attr "prefix" "evex")
5452 (set_attr "mode" "<sseinsnmode>")])
5453
5454 (define_expand "<avx512>_cvtmask2<ssemodesuffix><mode>"
5455 [(set (match_operand:VI48_AVX512VL 0 "register_operand")
5456 (vec_merge:VI48_AVX512VL
5457 (match_dup 2)
5458 (match_dup 3)
5459 (match_operand:<avx512fmaskmode> 1 "register_operand")))]
5460 "TARGET_AVX512DQ"
5461 "{
5462 operands[2] = CONSTM1_RTX (<MODE>mode);
5463 operands[3] = CONST0_RTX (<MODE>mode);
5464 }")
5465
5466 (define_insn "*<avx512>_cvtmask2<ssemodesuffix><mode>"
5467 [(set (match_operand:VI48_AVX512VL 0 "register_operand" "=v")
5468 (vec_merge:VI48_AVX512VL
5469 (match_operand:VI48_AVX512VL 2 "constm1_operand")
5470 (match_operand:VI48_AVX512VL 3 "const0_operand")
5471 (match_operand:<avx512fmaskmode> 1 "register_operand" "Yk")))]
5472 "TARGET_AVX512DQ"
5473 "vpmovm2<ssemodesuffix>\t{%1, %0|%0, %1}"
5474 [(set_attr "prefix" "evex")
5475 (set_attr "mode" "<sseinsnmode>")])
5476
5477 (define_insn "sse2_cvtps2pd<mask_name>"
5478 [(set (match_operand:V2DF 0 "register_operand" "=v")
5479 (float_extend:V2DF
5480 (vec_select:V2SF
5481 (match_operand:V4SF 1 "nonimmediate_operand" "vm")
5482 (parallel [(const_int 0) (const_int 1)]))))]
5483 "TARGET_SSE2 && <mask_avx512vl_condition>"
5484 "%vcvtps2pd\t{%1, %0<mask_operand2>|%0<mask_operand2>, %q1}"
5485 [(set_attr "type" "ssecvt")
5486 (set_attr "amdfam10_decode" "direct")
5487 (set_attr "athlon_decode" "double")
5488 (set_attr "bdver1_decode" "double")
5489 (set_attr "prefix_data16" "0")
5490 (set_attr "prefix" "maybe_vex")
5491 (set_attr "mode" "V2DF")])
5492
5493 (define_expand "vec_unpacks_hi_v4sf"
5494 [(set (match_dup 2)
5495 (vec_select:V4SF
5496 (vec_concat:V8SF
5497 (match_dup 2)
5498 (match_operand:V4SF 1 "nonimmediate_operand"))
5499 (parallel [(const_int 6) (const_int 7)
5500 (const_int 2) (const_int 3)])))
5501 (set (match_operand:V2DF 0 "register_operand")
5502 (float_extend:V2DF
5503 (vec_select:V2SF
5504 (match_dup 2)
5505 (parallel [(const_int 0) (const_int 1)]))))]
5506 "TARGET_SSE2"
5507 "operands[2] = gen_reg_rtx (V4SFmode);")
5508
5509 (define_expand "vec_unpacks_hi_v8sf"
5510 [(set (match_dup 2)
5511 (vec_select:V4SF
5512 (match_operand:V8SF 1 "register_operand")
5513 (parallel [(const_int 4) (const_int 5)
5514 (const_int 6) (const_int 7)])))
5515 (set (match_operand:V4DF 0 "register_operand")
5516 (float_extend:V4DF
5517 (match_dup 2)))]
5518 "TARGET_AVX"
5519 "operands[2] = gen_reg_rtx (V4SFmode);")
5520
5521 (define_expand "vec_unpacks_hi_v16sf"
5522 [(set (match_dup 2)
5523 (vec_select:V8SF
5524 (match_operand:V16SF 1 "register_operand")
5525 (parallel [(const_int 8) (const_int 9)
5526 (const_int 10) (const_int 11)
5527 (const_int 12) (const_int 13)
5528 (const_int 14) (const_int 15)])))
5529 (set (match_operand:V8DF 0 "register_operand")
5530 (float_extend:V8DF
5531 (match_dup 2)))]
5532 "TARGET_AVX512F"
5533 "operands[2] = gen_reg_rtx (V8SFmode);")
5534
5535 (define_expand "vec_unpacks_lo_v4sf"
5536 [(set (match_operand:V2DF 0 "register_operand")
5537 (float_extend:V2DF
5538 (vec_select:V2SF
5539 (match_operand:V4SF 1 "nonimmediate_operand")
5540 (parallel [(const_int 0) (const_int 1)]))))]
5541 "TARGET_SSE2")
5542
5543 (define_expand "vec_unpacks_lo_v8sf"
5544 [(set (match_operand:V4DF 0 "register_operand")
5545 (float_extend:V4DF
5546 (vec_select:V4SF
5547 (match_operand:V8SF 1 "nonimmediate_operand")
5548 (parallel [(const_int 0) (const_int 1)
5549 (const_int 2) (const_int 3)]))))]
5550 "TARGET_AVX")
5551
5552 (define_mode_attr sseunpackfltmode
5553 [(V8HI "V4SF") (V4SI "V2DF") (V16HI "V8SF")
5554 (V8SI "V4DF") (V32HI "V16SF") (V16SI "V8DF")])
5555
5556 (define_expand "vec_unpacks_float_hi_<mode>"
5557 [(match_operand:<sseunpackfltmode> 0 "register_operand")
5558 (match_operand:VI2_AVX512F 1 "register_operand")]
5559 "TARGET_SSE2"
5560 {
5561 rtx tmp = gen_reg_rtx (<sseunpackmode>mode);
5562
5563 emit_insn (gen_vec_unpacks_hi_<mode> (tmp, operands[1]));
5564 emit_insn (gen_rtx_SET (operands[0],
5565 gen_rtx_FLOAT (<sseunpackfltmode>mode, tmp)));
5566 DONE;
5567 })
5568
5569 (define_expand "vec_unpacks_float_lo_<mode>"
5570 [(match_operand:<sseunpackfltmode> 0 "register_operand")
5571 (match_operand:VI2_AVX512F 1 "register_operand")]
5572 "TARGET_SSE2"
5573 {
5574 rtx tmp = gen_reg_rtx (<sseunpackmode>mode);
5575
5576 emit_insn (gen_vec_unpacks_lo_<mode> (tmp, operands[1]));
5577 emit_insn (gen_rtx_SET (operands[0],
5578 gen_rtx_FLOAT (<sseunpackfltmode>mode, tmp)));
5579 DONE;
5580 })
5581
5582 (define_expand "vec_unpacku_float_hi_<mode>"
5583 [(match_operand:<sseunpackfltmode> 0 "register_operand")
5584 (match_operand:VI2_AVX512F 1 "register_operand")]
5585 "TARGET_SSE2"
5586 {
5587 rtx tmp = gen_reg_rtx (<sseunpackmode>mode);
5588
5589 emit_insn (gen_vec_unpacku_hi_<mode> (tmp, operands[1]));
5590 emit_insn (gen_rtx_SET (operands[0],
5591 gen_rtx_FLOAT (<sseunpackfltmode>mode, tmp)));
5592 DONE;
5593 })
5594
5595 (define_expand "vec_unpacku_float_lo_<mode>"
5596 [(match_operand:<sseunpackfltmode> 0 "register_operand")
5597 (match_operand:VI2_AVX512F 1 "register_operand")]
5598 "TARGET_SSE2"
5599 {
5600 rtx tmp = gen_reg_rtx (<sseunpackmode>mode);
5601
5602 emit_insn (gen_vec_unpacku_lo_<mode> (tmp, operands[1]));
5603 emit_insn (gen_rtx_SET (operands[0],
5604 gen_rtx_FLOAT (<sseunpackfltmode>mode, tmp)));
5605 DONE;
5606 })
5607
5608 (define_expand "vec_unpacks_float_hi_v4si"
5609 [(set (match_dup 2)
5610 (vec_select:V4SI
5611 (match_operand:V4SI 1 "nonimmediate_operand")
5612 (parallel [(const_int 2) (const_int 3)
5613 (const_int 2) (const_int 3)])))
5614 (set (match_operand:V2DF 0 "register_operand")
5615 (float:V2DF
5616 (vec_select:V2SI
5617 (match_dup 2)
5618 (parallel [(const_int 0) (const_int 1)]))))]
5619 "TARGET_SSE2"
5620 "operands[2] = gen_reg_rtx (V4SImode);")
5621
5622 (define_expand "vec_unpacks_float_lo_v4si"
5623 [(set (match_operand:V2DF 0 "register_operand")
5624 (float:V2DF
5625 (vec_select:V2SI
5626 (match_operand:V4SI 1 "nonimmediate_operand")
5627 (parallel [(const_int 0) (const_int 1)]))))]
5628 "TARGET_SSE2")
5629
5630 (define_expand "vec_unpacks_float_hi_v8si"
5631 [(set (match_dup 2)
5632 (vec_select:V4SI
5633 (match_operand:V8SI 1 "nonimmediate_operand")
5634 (parallel [(const_int 4) (const_int 5)
5635 (const_int 6) (const_int 7)])))
5636 (set (match_operand:V4DF 0 "register_operand")
5637 (float:V4DF
5638 (match_dup 2)))]
5639 "TARGET_AVX"
5640 "operands[2] = gen_reg_rtx (V4SImode);")
5641
5642 (define_expand "vec_unpacks_float_lo_v8si"
5643 [(set (match_operand:V4DF 0 "register_operand")
5644 (float:V4DF
5645 (vec_select:V4SI
5646 (match_operand:V8SI 1 "nonimmediate_operand")
5647 (parallel [(const_int 0) (const_int 1)
5648 (const_int 2) (const_int 3)]))))]
5649 "TARGET_AVX")
5650
5651 (define_expand "vec_unpacks_float_hi_v16si"
5652 [(set (match_dup 2)
5653 (vec_select:V8SI
5654 (match_operand:V16SI 1 "nonimmediate_operand")
5655 (parallel [(const_int 8) (const_int 9)
5656 (const_int 10) (const_int 11)
5657 (const_int 12) (const_int 13)
5658 (const_int 14) (const_int 15)])))
5659 (set (match_operand:V8DF 0 "register_operand")
5660 (float:V8DF
5661 (match_dup 2)))]
5662 "TARGET_AVX512F"
5663 "operands[2] = gen_reg_rtx (V8SImode);")
5664
5665 (define_expand "vec_unpacks_float_lo_v16si"
5666 [(set (match_operand:V8DF 0 "register_operand")
5667 (float:V8DF
5668 (vec_select:V8SI
5669 (match_operand:V16SI 1 "nonimmediate_operand")
5670 (parallel [(const_int 0) (const_int 1)
5671 (const_int 2) (const_int 3)
5672 (const_int 4) (const_int 5)
5673 (const_int 6) (const_int 7)]))))]
5674 "TARGET_AVX512F")
5675
5676 (define_expand "vec_unpacku_float_hi_v4si"
5677 [(set (match_dup 5)
5678 (vec_select:V4SI
5679 (match_operand:V4SI 1 "nonimmediate_operand")
5680 (parallel [(const_int 2) (const_int 3)
5681 (const_int 2) (const_int 3)])))
5682 (set (match_dup 6)
5683 (float:V2DF
5684 (vec_select:V2SI
5685 (match_dup 5)
5686 (parallel [(const_int 0) (const_int 1)]))))
5687 (set (match_dup 7)
5688 (lt:V2DF (match_dup 6) (match_dup 3)))
5689 (set (match_dup 8)
5690 (and:V2DF (match_dup 7) (match_dup 4)))
5691 (set (match_operand:V2DF 0 "register_operand")
5692 (plus:V2DF (match_dup 6) (match_dup 8)))]
5693 "TARGET_SSE2"
5694 {
5695 REAL_VALUE_TYPE TWO32r;
5696 rtx x;
5697 int i;
5698
5699 real_ldexp (&TWO32r, &dconst1, 32);
5700 x = const_double_from_real_value (TWO32r, DFmode);
5701
5702 operands[3] = force_reg (V2DFmode, CONST0_RTX (V2DFmode));
5703 operands[4] = force_reg (V2DFmode,
5704 ix86_build_const_vector (V2DFmode, 1, x));
5705
5706 operands[5] = gen_reg_rtx (V4SImode);
5707
5708 for (i = 6; i < 9; i++)
5709 operands[i] = gen_reg_rtx (V2DFmode);
5710 })
5711
5712 (define_expand "vec_unpacku_float_lo_v4si"
5713 [(set (match_dup 5)
5714 (float:V2DF
5715 (vec_select:V2SI
5716 (match_operand:V4SI 1 "nonimmediate_operand")
5717 (parallel [(const_int 0) (const_int 1)]))))
5718 (set (match_dup 6)
5719 (lt:V2DF (match_dup 5) (match_dup 3)))
5720 (set (match_dup 7)
5721 (and:V2DF (match_dup 6) (match_dup 4)))
5722 (set (match_operand:V2DF 0 "register_operand")
5723 (plus:V2DF (match_dup 5) (match_dup 7)))]
5724 "TARGET_SSE2"
5725 {
5726 REAL_VALUE_TYPE TWO32r;
5727 rtx x;
5728 int i;
5729
5730 real_ldexp (&TWO32r, &dconst1, 32);
5731 x = const_double_from_real_value (TWO32r, DFmode);
5732
5733 operands[3] = force_reg (V2DFmode, CONST0_RTX (V2DFmode));
5734 operands[4] = force_reg (V2DFmode,
5735 ix86_build_const_vector (V2DFmode, 1, x));
5736
5737 for (i = 5; i < 8; i++)
5738 operands[i] = gen_reg_rtx (V2DFmode);
5739 })
5740
5741 (define_expand "vec_unpacku_float_hi_v8si"
5742 [(match_operand:V4DF 0 "register_operand")
5743 (match_operand:V8SI 1 "register_operand")]
5744 "TARGET_AVX"
5745 {
5746 REAL_VALUE_TYPE TWO32r;
5747 rtx x, tmp[6];
5748 int i;
5749
5750 real_ldexp (&TWO32r, &dconst1, 32);
5751 x = const_double_from_real_value (TWO32r, DFmode);
5752
5753 tmp[0] = force_reg (V4DFmode, CONST0_RTX (V4DFmode));
5754 tmp[1] = force_reg (V4DFmode, ix86_build_const_vector (V4DFmode, 1, x));
5755 tmp[5] = gen_reg_rtx (V4SImode);
5756
5757 for (i = 2; i < 5; i++)
5758 tmp[i] = gen_reg_rtx (V4DFmode);
5759 emit_insn (gen_vec_extract_hi_v8si (tmp[5], operands[1]));
5760 emit_insn (gen_floatv4siv4df2 (tmp[2], tmp[5]));
5761 emit_insn (gen_rtx_SET (tmp[3], gen_rtx_LT (V4DFmode, tmp[2], tmp[0])));
5762 emit_insn (gen_andv4df3 (tmp[4], tmp[3], tmp[1]));
5763 emit_insn (gen_addv4df3 (operands[0], tmp[2], tmp[4]));
5764 DONE;
5765 })
5766
5767 (define_expand "vec_unpacku_float_hi_v16si"
5768 [(match_operand:V8DF 0 "register_operand")
5769 (match_operand:V16SI 1 "register_operand")]
5770 "TARGET_AVX512F"
5771 {
5772 REAL_VALUE_TYPE TWO32r;
5773 rtx k, x, tmp[4];
5774
5775 real_ldexp (&TWO32r, &dconst1, 32);
5776 x = const_double_from_real_value (TWO32r, DFmode);
5777
5778 tmp[0] = force_reg (V8DFmode, CONST0_RTX (V8DFmode));
5779 tmp[1] = force_reg (V8DFmode, ix86_build_const_vector (V8DFmode, 1, x));
5780 tmp[2] = gen_reg_rtx (V8DFmode);
5781 tmp[3] = gen_reg_rtx (V8SImode);
5782 k = gen_reg_rtx (QImode);
5783
5784 emit_insn (gen_vec_extract_hi_v16si (tmp[3], operands[1]));
5785 emit_insn (gen_floatv8siv8df2 (tmp[2], tmp[3]));
5786 emit_insn (gen_rtx_SET (k, gen_rtx_LT (QImode, tmp[2], tmp[0])));
5787 emit_insn (gen_addv8df3_mask (tmp[2], tmp[2], tmp[1], tmp[2], k));
5788 emit_move_insn (operands[0], tmp[2]);
5789 DONE;
5790 })
5791
5792 (define_expand "vec_unpacku_float_lo_v8si"
5793 [(match_operand:V4DF 0 "register_operand")
5794 (match_operand:V8SI 1 "nonimmediate_operand")]
5795 "TARGET_AVX"
5796 {
5797 REAL_VALUE_TYPE TWO32r;
5798 rtx x, tmp[5];
5799 int i;
5800
5801 real_ldexp (&TWO32r, &dconst1, 32);
5802 x = const_double_from_real_value (TWO32r, DFmode);
5803
5804 tmp[0] = force_reg (V4DFmode, CONST0_RTX (V4DFmode));
5805 tmp[1] = force_reg (V4DFmode, ix86_build_const_vector (V4DFmode, 1, x));
5806
5807 for (i = 2; i < 5; i++)
5808 tmp[i] = gen_reg_rtx (V4DFmode);
5809 emit_insn (gen_avx_cvtdq2pd256_2 (tmp[2], operands[1]));
5810 emit_insn (gen_rtx_SET (tmp[3], gen_rtx_LT (V4DFmode, tmp[2], tmp[0])));
5811 emit_insn (gen_andv4df3 (tmp[4], tmp[3], tmp[1]));
5812 emit_insn (gen_addv4df3 (operands[0], tmp[2], tmp[4]));
5813 DONE;
5814 })
5815
5816 (define_expand "vec_unpacku_float_lo_v16si"
5817 [(match_operand:V8DF 0 "register_operand")
5818 (match_operand:V16SI 1 "nonimmediate_operand")]
5819 "TARGET_AVX512F"
5820 {
5821 REAL_VALUE_TYPE TWO32r;
5822 rtx k, x, tmp[3];
5823
5824 real_ldexp (&TWO32r, &dconst1, 32);
5825 x = const_double_from_real_value (TWO32r, DFmode);
5826
5827 tmp[0] = force_reg (V8DFmode, CONST0_RTX (V8DFmode));
5828 tmp[1] = force_reg (V8DFmode, ix86_build_const_vector (V8DFmode, 1, x));
5829 tmp[2] = gen_reg_rtx (V8DFmode);
5830 k = gen_reg_rtx (QImode);
5831
5832 emit_insn (gen_avx512f_cvtdq2pd512_2 (tmp[2], operands[1]));
5833 emit_insn (gen_rtx_SET (k, gen_rtx_LT (QImode, tmp[2], tmp[0])));
5834 emit_insn (gen_addv8df3_mask (tmp[2], tmp[2], tmp[1], tmp[2], k));
5835 emit_move_insn (operands[0], tmp[2]);
5836 DONE;
5837 })
5838
5839 (define_expand "vec_pack_trunc_<mode>"
5840 [(set (match_dup 3)
5841 (float_truncate:<sf2dfmode>
5842 (match_operand:VF2_512_256 1 "nonimmediate_operand")))
5843 (set (match_dup 4)
5844 (float_truncate:<sf2dfmode>
5845 (match_operand:VF2_512_256 2 "nonimmediate_operand")))
5846 (set (match_operand:<ssePSmode> 0 "register_operand")
5847 (vec_concat:<ssePSmode>
5848 (match_dup 3)
5849 (match_dup 4)))]
5850 "TARGET_AVX"
5851 {
5852 operands[3] = gen_reg_rtx (<sf2dfmode>mode);
5853 operands[4] = gen_reg_rtx (<sf2dfmode>mode);
5854 })
5855
5856 (define_expand "vec_pack_trunc_v2df"
5857 [(match_operand:V4SF 0 "register_operand")
5858 (match_operand:V2DF 1 "nonimmediate_operand")
5859 (match_operand:V2DF 2 "nonimmediate_operand")]
5860 "TARGET_SSE2"
5861 {
5862 rtx tmp0, tmp1;
5863
5864 if (TARGET_AVX && !TARGET_PREFER_AVX128 && optimize_insn_for_speed_p ())
5865 {
5866 tmp0 = gen_reg_rtx (V4DFmode);
5867 tmp1 = force_reg (V2DFmode, operands[1]);
5868
5869 emit_insn (gen_avx_vec_concatv4df (tmp0, tmp1, operands[2]));
5870 emit_insn (gen_avx_cvtpd2ps256 (operands[0], tmp0));
5871 }
5872 else
5873 {
5874 tmp0 = gen_reg_rtx (V4SFmode);
5875 tmp1 = gen_reg_rtx (V4SFmode);
5876
5877 emit_insn (gen_sse2_cvtpd2ps (tmp0, operands[1]));
5878 emit_insn (gen_sse2_cvtpd2ps (tmp1, operands[2]));
5879 emit_insn (gen_sse_movlhps (operands[0], tmp0, tmp1));
5880 }
5881 DONE;
5882 })
5883
5884 (define_expand "vec_pack_sfix_trunc_v8df"
5885 [(match_operand:V16SI 0 "register_operand")
5886 (match_operand:V8DF 1 "nonimmediate_operand")
5887 (match_operand:V8DF 2 "nonimmediate_operand")]
5888 "TARGET_AVX512F"
5889 {
5890 rtx r1, r2;
5891
5892 r1 = gen_reg_rtx (V8SImode);
5893 r2 = gen_reg_rtx (V8SImode);
5894
5895 emit_insn (gen_fix_truncv8dfv8si2 (r1, operands[1]));
5896 emit_insn (gen_fix_truncv8dfv8si2 (r2, operands[2]));
5897 emit_insn (gen_avx_vec_concatv16si (operands[0], r1, r2));
5898 DONE;
5899 })
5900
5901 (define_expand "vec_pack_sfix_trunc_v4df"
5902 [(match_operand:V8SI 0 "register_operand")
5903 (match_operand:V4DF 1 "nonimmediate_operand")
5904 (match_operand:V4DF 2 "nonimmediate_operand")]
5905 "TARGET_AVX"
5906 {
5907 rtx r1, r2;
5908
5909 r1 = gen_reg_rtx (V4SImode);
5910 r2 = gen_reg_rtx (V4SImode);
5911
5912 emit_insn (gen_fix_truncv4dfv4si2 (r1, operands[1]));
5913 emit_insn (gen_fix_truncv4dfv4si2 (r2, operands[2]));
5914 emit_insn (gen_avx_vec_concatv8si (operands[0], r1, r2));
5915 DONE;
5916 })
5917
5918 (define_expand "vec_pack_sfix_trunc_v2df"
5919 [(match_operand:V4SI 0 "register_operand")
5920 (match_operand:V2DF 1 "nonimmediate_operand")
5921 (match_operand:V2DF 2 "nonimmediate_operand")]
5922 "TARGET_SSE2"
5923 {
5924 rtx tmp0, tmp1, tmp2;
5925
5926 if (TARGET_AVX && !TARGET_PREFER_AVX128 && optimize_insn_for_speed_p ())
5927 {
5928 tmp0 = gen_reg_rtx (V4DFmode);
5929 tmp1 = force_reg (V2DFmode, operands[1]);
5930
5931 emit_insn (gen_avx_vec_concatv4df (tmp0, tmp1, operands[2]));
5932 emit_insn (gen_fix_truncv4dfv4si2 (operands[0], tmp0));
5933 }
5934 else
5935 {
5936 tmp0 = gen_reg_rtx (V4SImode);
5937 tmp1 = gen_reg_rtx (V4SImode);
5938 tmp2 = gen_reg_rtx (V2DImode);
5939
5940 emit_insn (gen_sse2_cvttpd2dq (tmp0, operands[1]));
5941 emit_insn (gen_sse2_cvttpd2dq (tmp1, operands[2]));
5942 emit_insn (gen_vec_interleave_lowv2di (tmp2,
5943 gen_lowpart (V2DImode, tmp0),
5944 gen_lowpart (V2DImode, tmp1)));
5945 emit_move_insn (operands[0], gen_lowpart (V4SImode, tmp2));
5946 }
5947 DONE;
5948 })
5949
5950 (define_mode_attr ssepackfltmode
5951 [(V8DF "V16SI") (V4DF "V8SI") (V2DF "V4SI")])
5952
5953 (define_expand "vec_pack_ufix_trunc_<mode>"
5954 [(match_operand:<ssepackfltmode> 0 "register_operand")
5955 (match_operand:VF2 1 "register_operand")
5956 (match_operand:VF2 2 "register_operand")]
5957 "TARGET_SSE2"
5958 {
5959 if (<MODE>mode == V8DFmode)
5960 {
5961 rtx r1, r2;
5962
5963 r1 = gen_reg_rtx (V8SImode);
5964 r2 = gen_reg_rtx (V8SImode);
5965
5966 emit_insn (gen_ufix_truncv8dfv8si2 (r1, operands[1]));
5967 emit_insn (gen_ufix_truncv8dfv8si2 (r2, operands[2]));
5968 emit_insn (gen_avx_vec_concatv16si (operands[0], r1, r2));
5969 }
5970 else
5971 {
5972 rtx tmp[7];
5973 tmp[0] = ix86_expand_adjust_ufix_to_sfix_si (operands[1], &tmp[2]);
5974 tmp[1] = ix86_expand_adjust_ufix_to_sfix_si (operands[2], &tmp[3]);
5975 tmp[4] = gen_reg_rtx (<ssepackfltmode>mode);
5976 emit_insn (gen_vec_pack_sfix_trunc_<mode> (tmp[4], tmp[0], tmp[1]));
5977 if (<ssepackfltmode>mode == V4SImode || TARGET_AVX2)
5978 {
5979 tmp[5] = gen_reg_rtx (<ssepackfltmode>mode);
5980 ix86_expand_vec_extract_even_odd (tmp[5], tmp[2], tmp[3], 0);
5981 }
5982 else
5983 {
5984 tmp[5] = gen_reg_rtx (V8SFmode);
5985 ix86_expand_vec_extract_even_odd (tmp[5], gen_lowpart (V8SFmode, tmp[2]),
5986 gen_lowpart (V8SFmode, tmp[3]), 0);
5987 tmp[5] = gen_lowpart (V8SImode, tmp[5]);
5988 }
5989 tmp[6] = expand_simple_binop (<ssepackfltmode>mode, XOR, tmp[4], tmp[5],
5990 operands[0], 0, OPTAB_DIRECT);
5991 if (tmp[6] != operands[0])
5992 emit_move_insn (operands[0], tmp[6]);
5993 }
5994
5995 DONE;
5996 })
5997
5998 (define_expand "vec_pack_sfix_v4df"
5999 [(match_operand:V8SI 0 "register_operand")
6000 (match_operand:V4DF 1 "nonimmediate_operand")
6001 (match_operand:V4DF 2 "nonimmediate_operand")]
6002 "TARGET_AVX"
6003 {
6004 rtx r1, r2;
6005
6006 r1 = gen_reg_rtx (V4SImode);
6007 r2 = gen_reg_rtx (V4SImode);
6008
6009 emit_insn (gen_avx_cvtpd2dq256 (r1, operands[1]));
6010 emit_insn (gen_avx_cvtpd2dq256 (r2, operands[2]));
6011 emit_insn (gen_avx_vec_concatv8si (operands[0], r1, r2));
6012 DONE;
6013 })
6014
6015 (define_expand "vec_pack_sfix_v2df"
6016 [(match_operand:V4SI 0 "register_operand")
6017 (match_operand:V2DF 1 "nonimmediate_operand")
6018 (match_operand:V2DF 2 "nonimmediate_operand")]
6019 "TARGET_SSE2"
6020 {
6021 rtx tmp0, tmp1, tmp2;
6022
6023 if (TARGET_AVX && !TARGET_PREFER_AVX128 && optimize_insn_for_speed_p ())
6024 {
6025 tmp0 = gen_reg_rtx (V4DFmode);
6026 tmp1 = force_reg (V2DFmode, operands[1]);
6027
6028 emit_insn (gen_avx_vec_concatv4df (tmp0, tmp1, operands[2]));
6029 emit_insn (gen_avx_cvtpd2dq256 (operands[0], tmp0));
6030 }
6031 else
6032 {
6033 tmp0 = gen_reg_rtx (V4SImode);
6034 tmp1 = gen_reg_rtx (V4SImode);
6035 tmp2 = gen_reg_rtx (V2DImode);
6036
6037 emit_insn (gen_sse2_cvtpd2dq (tmp0, operands[1]));
6038 emit_insn (gen_sse2_cvtpd2dq (tmp1, operands[2]));
6039 emit_insn (gen_vec_interleave_lowv2di (tmp2,
6040 gen_lowpart (V2DImode, tmp0),
6041 gen_lowpart (V2DImode, tmp1)));
6042 emit_move_insn (operands[0], gen_lowpart (V4SImode, tmp2));
6043 }
6044 DONE;
6045 })
6046
6047 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
6048 ;;
6049 ;; Parallel single-precision floating point element swizzling
6050 ;;
6051 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
6052
6053 (define_expand "sse_movhlps_exp"
6054 [(set (match_operand:V4SF 0 "nonimmediate_operand")
6055 (vec_select:V4SF
6056 (vec_concat:V8SF
6057 (match_operand:V4SF 1 "nonimmediate_operand")
6058 (match_operand:V4SF 2 "nonimmediate_operand"))
6059 (parallel [(const_int 6)
6060 (const_int 7)
6061 (const_int 2)
6062 (const_int 3)])))]
6063 "TARGET_SSE"
6064 {
6065 rtx dst = ix86_fixup_binary_operands (UNKNOWN, V4SFmode, operands);
6066
6067 emit_insn (gen_sse_movhlps (dst, operands[1], operands[2]));
6068
6069 /* Fix up the destination if needed. */
6070 if (dst != operands[0])
6071 emit_move_insn (operands[0], dst);
6072
6073 DONE;
6074 })
6075
6076 (define_insn "sse_movhlps"
6077 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,x,x,m")
6078 (vec_select:V4SF
6079 (vec_concat:V8SF
6080 (match_operand:V4SF 1 "nonimmediate_operand" " 0,x,0,x,0")
6081 (match_operand:V4SF 2 "nonimmediate_operand" " x,x,o,o,x"))
6082 (parallel [(const_int 6)
6083 (const_int 7)
6084 (const_int 2)
6085 (const_int 3)])))]
6086 "TARGET_SSE && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
6087 "@
6088 movhlps\t{%2, %0|%0, %2}
6089 vmovhlps\t{%2, %1, %0|%0, %1, %2}
6090 movlps\t{%H2, %0|%0, %H2}
6091 vmovlps\t{%H2, %1, %0|%0, %1, %H2}
6092 %vmovhps\t{%2, %0|%q0, %2}"
6093 [(set_attr "isa" "noavx,avx,noavx,avx,*")
6094 (set_attr "type" "ssemov")
6095 (set_attr "ssememalign" "64")
6096 (set_attr "prefix" "orig,vex,orig,vex,maybe_vex")
6097 (set_attr "mode" "V4SF,V4SF,V2SF,V2SF,V2SF")])
6098
6099 (define_expand "sse_movlhps_exp"
6100 [(set (match_operand:V4SF 0 "nonimmediate_operand")
6101 (vec_select:V4SF
6102 (vec_concat:V8SF
6103 (match_operand:V4SF 1 "nonimmediate_operand")
6104 (match_operand:V4SF 2 "nonimmediate_operand"))
6105 (parallel [(const_int 0)
6106 (const_int 1)
6107 (const_int 4)
6108 (const_int 5)])))]
6109 "TARGET_SSE"
6110 {
6111 rtx dst = ix86_fixup_binary_operands (UNKNOWN, V4SFmode, operands);
6112
6113 emit_insn (gen_sse_movlhps (dst, operands[1], operands[2]));
6114
6115 /* Fix up the destination if needed. */
6116 if (dst != operands[0])
6117 emit_move_insn (operands[0], dst);
6118
6119 DONE;
6120 })
6121
6122 (define_insn "sse_movlhps"
6123 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,x,x,o")
6124 (vec_select:V4SF
6125 (vec_concat:V8SF
6126 (match_operand:V4SF 1 "nonimmediate_operand" " 0,x,0,x,0")
6127 (match_operand:V4SF 2 "nonimmediate_operand" " x,x,m,m,x"))
6128 (parallel [(const_int 0)
6129 (const_int 1)
6130 (const_int 4)
6131 (const_int 5)])))]
6132 "TARGET_SSE && ix86_binary_operator_ok (UNKNOWN, V4SFmode, operands)"
6133 "@
6134 movlhps\t{%2, %0|%0, %2}
6135 vmovlhps\t{%2, %1, %0|%0, %1, %2}
6136 movhps\t{%2, %0|%0, %q2}
6137 vmovhps\t{%2, %1, %0|%0, %1, %q2}
6138 %vmovlps\t{%2, %H0|%H0, %2}"
6139 [(set_attr "isa" "noavx,avx,noavx,avx,*")
6140 (set_attr "type" "ssemov")
6141 (set_attr "ssememalign" "64")
6142 (set_attr "prefix" "orig,vex,orig,vex,maybe_vex")
6143 (set_attr "mode" "V4SF,V4SF,V2SF,V2SF,V2SF")])
6144
6145 (define_insn "<mask_codefor>avx512f_unpckhps512<mask_name>"
6146 [(set (match_operand:V16SF 0 "register_operand" "=v")
6147 (vec_select:V16SF
6148 (vec_concat:V32SF
6149 (match_operand:V16SF 1 "register_operand" "v")
6150 (match_operand:V16SF 2 "nonimmediate_operand" "vm"))
6151 (parallel [(const_int 2) (const_int 18)
6152 (const_int 3) (const_int 19)
6153 (const_int 6) (const_int 22)
6154 (const_int 7) (const_int 23)
6155 (const_int 10) (const_int 26)
6156 (const_int 11) (const_int 27)
6157 (const_int 14) (const_int 30)
6158 (const_int 15) (const_int 31)])))]
6159 "TARGET_AVX512F"
6160 "vunpckhps\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
6161 [(set_attr "type" "sselog")
6162 (set_attr "prefix" "evex")
6163 (set_attr "mode" "V16SF")])
6164
6165 ;; Recall that the 256-bit unpck insns only shuffle within their lanes.
6166 (define_insn "avx_unpckhps256<mask_name>"
6167 [(set (match_operand:V8SF 0 "register_operand" "=v")
6168 (vec_select:V8SF
6169 (vec_concat:V16SF
6170 (match_operand:V8SF 1 "register_operand" "v")
6171 (match_operand:V8SF 2 "nonimmediate_operand" "vm"))
6172 (parallel [(const_int 2) (const_int 10)
6173 (const_int 3) (const_int 11)
6174 (const_int 6) (const_int 14)
6175 (const_int 7) (const_int 15)])))]
6176 "TARGET_AVX && <mask_avx512vl_condition>"
6177 "vunpckhps\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
6178 [(set_attr "type" "sselog")
6179 (set_attr "prefix" "vex")
6180 (set_attr "mode" "V8SF")])
6181
6182 (define_expand "vec_interleave_highv8sf"
6183 [(set (match_dup 3)
6184 (vec_select:V8SF
6185 (vec_concat:V16SF
6186 (match_operand:V8SF 1 "register_operand" "x")
6187 (match_operand:V8SF 2 "nonimmediate_operand" "xm"))
6188 (parallel [(const_int 0) (const_int 8)
6189 (const_int 1) (const_int 9)
6190 (const_int 4) (const_int 12)
6191 (const_int 5) (const_int 13)])))
6192 (set (match_dup 4)
6193 (vec_select:V8SF
6194 (vec_concat:V16SF
6195 (match_dup 1)
6196 (match_dup 2))
6197 (parallel [(const_int 2) (const_int 10)
6198 (const_int 3) (const_int 11)
6199 (const_int 6) (const_int 14)
6200 (const_int 7) (const_int 15)])))
6201 (set (match_operand:V8SF 0 "register_operand")
6202 (vec_select:V8SF
6203 (vec_concat:V16SF
6204 (match_dup 3)
6205 (match_dup 4))
6206 (parallel [(const_int 4) (const_int 5)
6207 (const_int 6) (const_int 7)
6208 (const_int 12) (const_int 13)
6209 (const_int 14) (const_int 15)])))]
6210 "TARGET_AVX"
6211 {
6212 operands[3] = gen_reg_rtx (V8SFmode);
6213 operands[4] = gen_reg_rtx (V8SFmode);
6214 })
6215
6216 (define_insn "vec_interleave_highv4sf<mask_name>"
6217 [(set (match_operand:V4SF 0 "register_operand" "=x,v")
6218 (vec_select:V4SF
6219 (vec_concat:V8SF
6220 (match_operand:V4SF 1 "register_operand" "0,v")
6221 (match_operand:V4SF 2 "nonimmediate_operand" "xm,vm"))
6222 (parallel [(const_int 2) (const_int 6)
6223 (const_int 3) (const_int 7)])))]
6224 "TARGET_SSE && <mask_avx512vl_condition>"
6225 "@
6226 unpckhps\t{%2, %0|%0, %2}
6227 vunpckhps\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
6228 [(set_attr "isa" "noavx,avx")
6229 (set_attr "type" "sselog")
6230 (set_attr "prefix" "orig,vex")
6231 (set_attr "mode" "V4SF")])
6232
6233 (define_insn "<mask_codefor>avx512f_unpcklps512<mask_name>"
6234 [(set (match_operand:V16SF 0 "register_operand" "=v")
6235 (vec_select:V16SF
6236 (vec_concat:V32SF
6237 (match_operand:V16SF 1 "register_operand" "v")
6238 (match_operand:V16SF 2 "nonimmediate_operand" "vm"))
6239 (parallel [(const_int 0) (const_int 16)
6240 (const_int 1) (const_int 17)
6241 (const_int 4) (const_int 20)
6242 (const_int 5) (const_int 21)
6243 (const_int 8) (const_int 24)
6244 (const_int 9) (const_int 25)
6245 (const_int 12) (const_int 28)
6246 (const_int 13) (const_int 29)])))]
6247 "TARGET_AVX512F"
6248 "vunpcklps\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
6249 [(set_attr "type" "sselog")
6250 (set_attr "prefix" "evex")
6251 (set_attr "mode" "V16SF")])
6252
6253 ;; Recall that the 256-bit unpck insns only shuffle within their lanes.
6254 (define_insn "avx_unpcklps256<mask_name>"
6255 [(set (match_operand:V8SF 0 "register_operand" "=v")
6256 (vec_select:V8SF
6257 (vec_concat:V16SF
6258 (match_operand:V8SF 1 "register_operand" "v")
6259 (match_operand:V8SF 2 "nonimmediate_operand" "vm"))
6260 (parallel [(const_int 0) (const_int 8)
6261 (const_int 1) (const_int 9)
6262 (const_int 4) (const_int 12)
6263 (const_int 5) (const_int 13)])))]
6264 "TARGET_AVX && <mask_avx512vl_condition>"
6265 "vunpcklps\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
6266 [(set_attr "type" "sselog")
6267 (set_attr "prefix" "vex")
6268 (set_attr "mode" "V8SF")])
6269
6270 (define_insn "unpcklps128_mask"
6271 [(set (match_operand:V4SF 0 "register_operand" "=v")
6272 (vec_merge:V4SF
6273 (vec_select:V4SF
6274 (vec_concat:V8SF
6275 (match_operand:V4SF 1 "register_operand" "v")
6276 (match_operand:V4SF 2 "nonimmediate_operand" "vm"))
6277 (parallel [(const_int 0) (const_int 4)
6278 (const_int 1) (const_int 5)]))
6279 (match_operand:V4SF 3 "vector_move_operand" "0C")
6280 (match_operand:QI 4 "register_operand" "Yk")))]
6281 "TARGET_AVX512VL"
6282 "vunpcklps\t{%2, %1, %0%{%4%}%N3|%0%{%4%}%N3, %1, %2}"
6283 [(set_attr "type" "sselog")
6284 (set_attr "prefix" "evex")
6285 (set_attr "mode" "V4SF")])
6286
6287 (define_expand "vec_interleave_lowv8sf"
6288 [(set (match_dup 3)
6289 (vec_select:V8SF
6290 (vec_concat:V16SF
6291 (match_operand:V8SF 1 "register_operand" "x")
6292 (match_operand:V8SF 2 "nonimmediate_operand" "xm"))
6293 (parallel [(const_int 0) (const_int 8)
6294 (const_int 1) (const_int 9)
6295 (const_int 4) (const_int 12)
6296 (const_int 5) (const_int 13)])))
6297 (set (match_dup 4)
6298 (vec_select:V8SF
6299 (vec_concat:V16SF
6300 (match_dup 1)
6301 (match_dup 2))
6302 (parallel [(const_int 2) (const_int 10)
6303 (const_int 3) (const_int 11)
6304 (const_int 6) (const_int 14)
6305 (const_int 7) (const_int 15)])))
6306 (set (match_operand:V8SF 0 "register_operand")
6307 (vec_select:V8SF
6308 (vec_concat:V16SF
6309 (match_dup 3)
6310 (match_dup 4))
6311 (parallel [(const_int 0) (const_int 1)
6312 (const_int 2) (const_int 3)
6313 (const_int 8) (const_int 9)
6314 (const_int 10) (const_int 11)])))]
6315 "TARGET_AVX"
6316 {
6317 operands[3] = gen_reg_rtx (V8SFmode);
6318 operands[4] = gen_reg_rtx (V8SFmode);
6319 })
6320
6321 (define_insn "vec_interleave_lowv4sf"
6322 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
6323 (vec_select:V4SF
6324 (vec_concat:V8SF
6325 (match_operand:V4SF 1 "register_operand" "0,x")
6326 (match_operand:V4SF 2 "nonimmediate_operand" "xm,xm"))
6327 (parallel [(const_int 0) (const_int 4)
6328 (const_int 1) (const_int 5)])))]
6329 "TARGET_SSE"
6330 "@
6331 unpcklps\t{%2, %0|%0, %2}
6332 vunpcklps\t{%2, %1, %0|%0, %1, %2}"
6333 [(set_attr "isa" "noavx,avx")
6334 (set_attr "type" "sselog")
6335 (set_attr "prefix" "orig,vex")
6336 (set_attr "mode" "V4SF")])
6337
6338 ;; These are modeled with the same vec_concat as the others so that we
6339 ;; capture users of shufps that can use the new instructions
6340 (define_insn "avx_movshdup256<mask_name>"
6341 [(set (match_operand:V8SF 0 "register_operand" "=v")
6342 (vec_select:V8SF
6343 (vec_concat:V16SF
6344 (match_operand:V8SF 1 "nonimmediate_operand" "vm")
6345 (match_dup 1))
6346 (parallel [(const_int 1) (const_int 1)
6347 (const_int 3) (const_int 3)
6348 (const_int 5) (const_int 5)
6349 (const_int 7) (const_int 7)])))]
6350 "TARGET_AVX && <mask_avx512vl_condition>"
6351 "vmovshdup\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
6352 [(set_attr "type" "sse")
6353 (set_attr "prefix" "vex")
6354 (set_attr "mode" "V8SF")])
6355
6356 (define_insn "sse3_movshdup<mask_name>"
6357 [(set (match_operand:V4SF 0 "register_operand" "=v")
6358 (vec_select:V4SF
6359 (vec_concat:V8SF
6360 (match_operand:V4SF 1 "nonimmediate_operand" "vm")
6361 (match_dup 1))
6362 (parallel [(const_int 1)
6363 (const_int 1)
6364 (const_int 7)
6365 (const_int 7)])))]
6366 "TARGET_SSE3 && <mask_avx512vl_condition>"
6367 "%vmovshdup\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
6368 [(set_attr "type" "sse")
6369 (set_attr "prefix_rep" "1")
6370 (set_attr "prefix" "maybe_vex")
6371 (set_attr "mode" "V4SF")])
6372
6373 (define_insn "<mask_codefor>avx512f_movshdup512<mask_name>"
6374 [(set (match_operand:V16SF 0 "register_operand" "=v")
6375 (vec_select:V16SF
6376 (vec_concat:V32SF
6377 (match_operand:V16SF 1 "nonimmediate_operand" "vm")
6378 (match_dup 1))
6379 (parallel [(const_int 1) (const_int 1)
6380 (const_int 3) (const_int 3)
6381 (const_int 5) (const_int 5)
6382 (const_int 7) (const_int 7)
6383 (const_int 9) (const_int 9)
6384 (const_int 11) (const_int 11)
6385 (const_int 13) (const_int 13)
6386 (const_int 15) (const_int 15)])))]
6387 "TARGET_AVX512F"
6388 "vmovshdup\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
6389 [(set_attr "type" "sse")
6390 (set_attr "prefix" "evex")
6391 (set_attr "mode" "V16SF")])
6392
6393 (define_insn "avx_movsldup256<mask_name>"
6394 [(set (match_operand:V8SF 0 "register_operand" "=v")
6395 (vec_select:V8SF
6396 (vec_concat:V16SF
6397 (match_operand:V8SF 1 "nonimmediate_operand" "vm")
6398 (match_dup 1))
6399 (parallel [(const_int 0) (const_int 0)
6400 (const_int 2) (const_int 2)
6401 (const_int 4) (const_int 4)
6402 (const_int 6) (const_int 6)])))]
6403 "TARGET_AVX && <mask_avx512vl_condition>"
6404 "vmovsldup\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
6405 [(set_attr "type" "sse")
6406 (set_attr "prefix" "vex")
6407 (set_attr "mode" "V8SF")])
6408
6409 (define_insn "sse3_movsldup<mask_name>"
6410 [(set (match_operand:V4SF 0 "register_operand" "=v")
6411 (vec_select:V4SF
6412 (vec_concat:V8SF
6413 (match_operand:V4SF 1 "nonimmediate_operand" "vm")
6414 (match_dup 1))
6415 (parallel [(const_int 0)
6416 (const_int 0)
6417 (const_int 6)
6418 (const_int 6)])))]
6419 "TARGET_SSE3 && <mask_avx512vl_condition>"
6420 "%vmovsldup\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
6421 [(set_attr "type" "sse")
6422 (set_attr "prefix_rep" "1")
6423 (set_attr "prefix" "maybe_vex")
6424 (set_attr "mode" "V4SF")])
6425
6426 (define_insn "<mask_codefor>avx512f_movsldup512<mask_name>"
6427 [(set (match_operand:V16SF 0 "register_operand" "=v")
6428 (vec_select:V16SF
6429 (vec_concat:V32SF
6430 (match_operand:V16SF 1 "nonimmediate_operand" "vm")
6431 (match_dup 1))
6432 (parallel [(const_int 0) (const_int 0)
6433 (const_int 2) (const_int 2)
6434 (const_int 4) (const_int 4)
6435 (const_int 6) (const_int 6)
6436 (const_int 8) (const_int 8)
6437 (const_int 10) (const_int 10)
6438 (const_int 12) (const_int 12)
6439 (const_int 14) (const_int 14)])))]
6440 "TARGET_AVX512F"
6441 "vmovsldup\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
6442 [(set_attr "type" "sse")
6443 (set_attr "prefix" "evex")
6444 (set_attr "mode" "V16SF")])
6445
6446 (define_expand "avx_shufps256<mask_expand4_name>"
6447 [(match_operand:V8SF 0 "register_operand")
6448 (match_operand:V8SF 1 "register_operand")
6449 (match_operand:V8SF 2 "nonimmediate_operand")
6450 (match_operand:SI 3 "const_int_operand")]
6451 "TARGET_AVX"
6452 {
6453 int mask = INTVAL (operands[3]);
6454 emit_insn (gen_avx_shufps256_1<mask_expand4_name> (operands[0],
6455 operands[1],
6456 operands[2],
6457 GEN_INT ((mask >> 0) & 3),
6458 GEN_INT ((mask >> 2) & 3),
6459 GEN_INT (((mask >> 4) & 3) + 8),
6460 GEN_INT (((mask >> 6) & 3) + 8),
6461 GEN_INT (((mask >> 0) & 3) + 4),
6462 GEN_INT (((mask >> 2) & 3) + 4),
6463 GEN_INT (((mask >> 4) & 3) + 12),
6464 GEN_INT (((mask >> 6) & 3) + 12)
6465 <mask_expand4_args>));
6466 DONE;
6467 })
6468
6469 ;; One bit in mask selects 2 elements.
6470 (define_insn "avx_shufps256_1<mask_name>"
6471 [(set (match_operand:V8SF 0 "register_operand" "=v")
6472 (vec_select:V8SF
6473 (vec_concat:V16SF
6474 (match_operand:V8SF 1 "register_operand" "v")
6475 (match_operand:V8SF 2 "nonimmediate_operand" "vm"))
6476 (parallel [(match_operand 3 "const_0_to_3_operand" )
6477 (match_operand 4 "const_0_to_3_operand" )
6478 (match_operand 5 "const_8_to_11_operand" )
6479 (match_operand 6 "const_8_to_11_operand" )
6480 (match_operand 7 "const_4_to_7_operand" )
6481 (match_operand 8 "const_4_to_7_operand" )
6482 (match_operand 9 "const_12_to_15_operand")
6483 (match_operand 10 "const_12_to_15_operand")])))]
6484 "TARGET_AVX
6485 && <mask_avx512vl_condition>
6486 && (INTVAL (operands[3]) == (INTVAL (operands[7]) - 4)
6487 && INTVAL (operands[4]) == (INTVAL (operands[8]) - 4)
6488 && INTVAL (operands[5]) == (INTVAL (operands[9]) - 4)
6489 && INTVAL (operands[6]) == (INTVAL (operands[10]) - 4))"
6490 {
6491 int mask;
6492 mask = INTVAL (operands[3]);
6493 mask |= INTVAL (operands[4]) << 2;
6494 mask |= (INTVAL (operands[5]) - 8) << 4;
6495 mask |= (INTVAL (operands[6]) - 8) << 6;
6496 operands[3] = GEN_INT (mask);
6497
6498 return "vshufps\t{%3, %2, %1, %0<mask_operand11>|%0<mask_operand11>, %1, %2, %3}";
6499 }
6500 [(set_attr "type" "sseshuf")
6501 (set_attr "length_immediate" "1")
6502 (set_attr "prefix" "<mask_prefix>")
6503 (set_attr "mode" "V8SF")])
6504
6505 (define_expand "sse_shufps<mask_expand4_name>"
6506 [(match_operand:V4SF 0 "register_operand")
6507 (match_operand:V4SF 1 "register_operand")
6508 (match_operand:V4SF 2 "nonimmediate_operand")
6509 (match_operand:SI 3 "const_int_operand")]
6510 "TARGET_SSE"
6511 {
6512 int mask = INTVAL (operands[3]);
6513 emit_insn (gen_sse_shufps_v4sf<mask_expand4_name> (operands[0],
6514 operands[1],
6515 operands[2],
6516 GEN_INT ((mask >> 0) & 3),
6517 GEN_INT ((mask >> 2) & 3),
6518 GEN_INT (((mask >> 4) & 3) + 4),
6519 GEN_INT (((mask >> 6) & 3) + 4)
6520 <mask_expand4_args>));
6521 DONE;
6522 })
6523
6524 (define_insn "sse_shufps_v4sf_mask"
6525 [(set (match_operand:V4SF 0 "register_operand" "=v")
6526 (vec_merge:V4SF
6527 (vec_select:V4SF
6528 (vec_concat:V8SF
6529 (match_operand:V4SF 1 "register_operand" "v")
6530 (match_operand:V4SF 2 "nonimmediate_operand" "vm"))
6531 (parallel [(match_operand 3 "const_0_to_3_operand")
6532 (match_operand 4 "const_0_to_3_operand")
6533 (match_operand 5 "const_4_to_7_operand")
6534 (match_operand 6 "const_4_to_7_operand")]))
6535 (match_operand:V4SF 7 "vector_move_operand" "0C")
6536 (match_operand:QI 8 "register_operand" "Yk")))]
6537 "TARGET_AVX512VL"
6538 {
6539 int mask = 0;
6540 mask |= INTVAL (operands[3]) << 0;
6541 mask |= INTVAL (operands[4]) << 2;
6542 mask |= (INTVAL (operands[5]) - 4) << 4;
6543 mask |= (INTVAL (operands[6]) - 4) << 6;
6544 operands[3] = GEN_INT (mask);
6545
6546 return "vshufps\t{%3, %2, %1, %0%{%8%}%N7|%0%{%8%}%N7, %1, %2, %3}";
6547 }
6548 [(set_attr "type" "sseshuf")
6549 (set_attr "length_immediate" "1")
6550 (set_attr "prefix" "evex")
6551 (set_attr "mode" "V4SF")])
6552
6553 (define_insn "sse_shufps_<mode>"
6554 [(set (match_operand:VI4F_128 0 "register_operand" "=x,x")
6555 (vec_select:VI4F_128
6556 (vec_concat:<ssedoublevecmode>
6557 (match_operand:VI4F_128 1 "register_operand" "0,x")
6558 (match_operand:VI4F_128 2 "nonimmediate_operand" "xm,xm"))
6559 (parallel [(match_operand 3 "const_0_to_3_operand")
6560 (match_operand 4 "const_0_to_3_operand")
6561 (match_operand 5 "const_4_to_7_operand")
6562 (match_operand 6 "const_4_to_7_operand")])))]
6563 "TARGET_SSE"
6564 {
6565 int mask = 0;
6566 mask |= INTVAL (operands[3]) << 0;
6567 mask |= INTVAL (operands[4]) << 2;
6568 mask |= (INTVAL (operands[5]) - 4) << 4;
6569 mask |= (INTVAL (operands[6]) - 4) << 6;
6570 operands[3] = GEN_INT (mask);
6571
6572 switch (which_alternative)
6573 {
6574 case 0:
6575 return "shufps\t{%3, %2, %0|%0, %2, %3}";
6576 case 1:
6577 return "vshufps\t{%3, %2, %1, %0|%0, %1, %2, %3}";
6578 default:
6579 gcc_unreachable ();
6580 }
6581 }
6582 [(set_attr "isa" "noavx,avx")
6583 (set_attr "type" "sseshuf")
6584 (set_attr "length_immediate" "1")
6585 (set_attr "prefix" "orig,vex")
6586 (set_attr "mode" "V4SF")])
6587
6588 (define_insn "sse_storehps"
6589 [(set (match_operand:V2SF 0 "nonimmediate_operand" "=m,x,x")
6590 (vec_select:V2SF
6591 (match_operand:V4SF 1 "nonimmediate_operand" "x,x,o")
6592 (parallel [(const_int 2) (const_int 3)])))]
6593 "TARGET_SSE"
6594 "@
6595 %vmovhps\t{%1, %0|%q0, %1}
6596 %vmovhlps\t{%1, %d0|%d0, %1}
6597 %vmovlps\t{%H1, %d0|%d0, %H1}"
6598 [(set_attr "type" "ssemov")
6599 (set_attr "ssememalign" "64")
6600 (set_attr "prefix" "maybe_vex")
6601 (set_attr "mode" "V2SF,V4SF,V2SF")])
6602
6603 (define_expand "sse_loadhps_exp"
6604 [(set (match_operand:V4SF 0 "nonimmediate_operand")
6605 (vec_concat:V4SF
6606 (vec_select:V2SF
6607 (match_operand:V4SF 1 "nonimmediate_operand")
6608 (parallel [(const_int 0) (const_int 1)]))
6609 (match_operand:V2SF 2 "nonimmediate_operand")))]
6610 "TARGET_SSE"
6611 {
6612 rtx dst = ix86_fixup_binary_operands (UNKNOWN, V4SFmode, operands);
6613
6614 emit_insn (gen_sse_loadhps (dst, operands[1], operands[2]));
6615
6616 /* Fix up the destination if needed. */
6617 if (dst != operands[0])
6618 emit_move_insn (operands[0], dst);
6619
6620 DONE;
6621 })
6622
6623 (define_insn "sse_loadhps"
6624 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,x,x,o")
6625 (vec_concat:V4SF
6626 (vec_select:V2SF
6627 (match_operand:V4SF 1 "nonimmediate_operand" " 0,x,0,x,0")
6628 (parallel [(const_int 0) (const_int 1)]))
6629 (match_operand:V2SF 2 "nonimmediate_operand" " m,m,x,x,x")))]
6630 "TARGET_SSE"
6631 "@
6632 movhps\t{%2, %0|%0, %q2}
6633 vmovhps\t{%2, %1, %0|%0, %1, %q2}
6634 movlhps\t{%2, %0|%0, %2}
6635 vmovlhps\t{%2, %1, %0|%0, %1, %2}
6636 %vmovlps\t{%2, %H0|%H0, %2}"
6637 [(set_attr "isa" "noavx,avx,noavx,avx,*")
6638 (set_attr "type" "ssemov")
6639 (set_attr "ssememalign" "64")
6640 (set_attr "prefix" "orig,vex,orig,vex,maybe_vex")
6641 (set_attr "mode" "V2SF,V2SF,V4SF,V4SF,V2SF")])
6642
6643 (define_insn "sse_storelps"
6644 [(set (match_operand:V2SF 0 "nonimmediate_operand" "=m,x,x")
6645 (vec_select:V2SF
6646 (match_operand:V4SF 1 "nonimmediate_operand" " x,x,m")
6647 (parallel [(const_int 0) (const_int 1)])))]
6648 "TARGET_SSE"
6649 "@
6650 %vmovlps\t{%1, %0|%q0, %1}
6651 %vmovaps\t{%1, %0|%0, %1}
6652 %vmovlps\t{%1, %d0|%d0, %q1}"
6653 [(set_attr "type" "ssemov")
6654 (set_attr "prefix" "maybe_vex")
6655 (set_attr "mode" "V2SF,V4SF,V2SF")])
6656
6657 (define_expand "sse_loadlps_exp"
6658 [(set (match_operand:V4SF 0 "nonimmediate_operand")
6659 (vec_concat:V4SF
6660 (match_operand:V2SF 2 "nonimmediate_operand")
6661 (vec_select:V2SF
6662 (match_operand:V4SF 1 "nonimmediate_operand")
6663 (parallel [(const_int 2) (const_int 3)]))))]
6664 "TARGET_SSE"
6665 {
6666 rtx dst = ix86_fixup_binary_operands (UNKNOWN, V4SFmode, operands);
6667
6668 emit_insn (gen_sse_loadlps (dst, operands[1], operands[2]));
6669
6670 /* Fix up the destination if needed. */
6671 if (dst != operands[0])
6672 emit_move_insn (operands[0], dst);
6673
6674 DONE;
6675 })
6676
6677 (define_insn "sse_loadlps"
6678 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,x,x,m")
6679 (vec_concat:V4SF
6680 (match_operand:V2SF 2 "nonimmediate_operand" " 0,x,m,m,x")
6681 (vec_select:V2SF
6682 (match_operand:V4SF 1 "nonimmediate_operand" " x,x,0,x,0")
6683 (parallel [(const_int 2) (const_int 3)]))))]
6684 "TARGET_SSE"
6685 "@
6686 shufps\t{$0xe4, %1, %0|%0, %1, 0xe4}
6687 vshufps\t{$0xe4, %1, %2, %0|%0, %2, %1, 0xe4}
6688 movlps\t{%2, %0|%0, %q2}
6689 vmovlps\t{%2, %1, %0|%0, %1, %q2}
6690 %vmovlps\t{%2, %0|%q0, %2}"
6691 [(set_attr "isa" "noavx,avx,noavx,avx,*")
6692 (set_attr "type" "sseshuf,sseshuf,ssemov,ssemov,ssemov")
6693 (set_attr "ssememalign" "64")
6694 (set_attr "length_immediate" "1,1,*,*,*")
6695 (set_attr "prefix" "orig,vex,orig,vex,maybe_vex")
6696 (set_attr "mode" "V4SF,V4SF,V2SF,V2SF,V2SF")])
6697
6698 (define_insn "sse_movss"
6699 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
6700 (vec_merge:V4SF
6701 (match_operand:V4SF 2 "register_operand" " x,x")
6702 (match_operand:V4SF 1 "register_operand" " 0,x")
6703 (const_int 1)))]
6704 "TARGET_SSE"
6705 "@
6706 movss\t{%2, %0|%0, %2}
6707 vmovss\t{%2, %1, %0|%0, %1, %2}"
6708 [(set_attr "isa" "noavx,avx")
6709 (set_attr "type" "ssemov")
6710 (set_attr "prefix" "orig,vex")
6711 (set_attr "mode" "SF")])
6712
6713 (define_insn "avx2_vec_dup<mode>"
6714 [(set (match_operand:VF1_128_256 0 "register_operand" "=x")
6715 (vec_duplicate:VF1_128_256
6716 (vec_select:SF
6717 (match_operand:V4SF 1 "register_operand" "x")
6718 (parallel [(const_int 0)]))))]
6719 "TARGET_AVX2"
6720 "vbroadcastss\t{%1, %0|%0, %1}"
6721 [(set_attr "type" "sselog1")
6722 (set_attr "prefix" "vex")
6723 (set_attr "mode" "<MODE>")])
6724
6725 (define_insn "avx2_vec_dupv8sf_1"
6726 [(set (match_operand:V8SF 0 "register_operand" "=x")
6727 (vec_duplicate:V8SF
6728 (vec_select:SF
6729 (match_operand:V8SF 1 "register_operand" "x")
6730 (parallel [(const_int 0)]))))]
6731 "TARGET_AVX2"
6732 "vbroadcastss\t{%x1, %0|%0, %x1}"
6733 [(set_attr "type" "sselog1")
6734 (set_attr "prefix" "vex")
6735 (set_attr "mode" "V8SF")])
6736
6737 (define_insn "avx512f_vec_dup<mode>_1"
6738 [(set (match_operand:VF_512 0 "register_operand" "=v")
6739 (vec_duplicate:VF_512
6740 (vec_select:<ssescalarmode>
6741 (match_operand:VF_512 1 "register_operand" "v")
6742 (parallel [(const_int 0)]))))]
6743 "TARGET_AVX512F"
6744 "vbroadcast<bcstscalarsuff>\t{%x1, %0|%0, %x1}"
6745 [(set_attr "type" "sselog1")
6746 (set_attr "prefix" "evex")
6747 (set_attr "mode" "<MODE>")])
6748
6749 ;; Although insertps takes register source, we prefer
6750 ;; unpcklps with register source since it is shorter.
6751 (define_insn "*vec_concatv2sf_sse4_1"
6752 [(set (match_operand:V2SF 0 "register_operand"
6753 "=Yr,*x,x,Yr,*x,x,x,*y ,*y")
6754 (vec_concat:V2SF
6755 (match_operand:SF 1 "nonimmediate_operand"
6756 " 0, 0,x, 0,0, x,m, 0 , m")
6757 (match_operand:SF 2 "vector_move_operand"
6758 " Yr,*x,x, m,m, m,C,*ym, C")))]
6759 "TARGET_SSE4_1 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
6760 "@
6761 unpcklps\t{%2, %0|%0, %2}
6762 unpcklps\t{%2, %0|%0, %2}
6763 vunpcklps\t{%2, %1, %0|%0, %1, %2}
6764 insertps\t{$0x10, %2, %0|%0, %2, 0x10}
6765 insertps\t{$0x10, %2, %0|%0, %2, 0x10}
6766 vinsertps\t{$0x10, %2, %1, %0|%0, %1, %2, 0x10}
6767 %vmovss\t{%1, %0|%0, %1}
6768 punpckldq\t{%2, %0|%0, %2}
6769 movd\t{%1, %0|%0, %1}"
6770 [(set_attr "isa" "noavx,noavx,avx,noavx,noavx,avx,*,*,*")
6771 (set_attr "type" "sselog,sselog,sselog,sselog,sselog,sselog,ssemov,mmxcvt,mmxmov")
6772 (set_attr "prefix_data16" "*,*,*,1,1,*,*,*,*")
6773 (set_attr "prefix_extra" "*,*,*,1,1,1,*,*,*")
6774 (set_attr "length_immediate" "*,*,*,1,1,1,*,*,*")
6775 (set_attr "prefix" "orig,orig,vex,orig,orig,vex,maybe_vex,orig,orig")
6776 (set_attr "mode" "V4SF,V4SF,V4SF,V4SF,V4SF,V4SF,SF,DI,DI")])
6777
6778 ;; ??? In theory we can match memory for the MMX alternative, but allowing
6779 ;; nonimmediate_operand for operand 2 and *not* allowing memory for the SSE
6780 ;; alternatives pretty much forces the MMX alternative to be chosen.
6781 (define_insn "*vec_concatv2sf_sse"
6782 [(set (match_operand:V2SF 0 "register_operand" "=x,x,*y,*y")
6783 (vec_concat:V2SF
6784 (match_operand:SF 1 "nonimmediate_operand" " 0,m, 0, m")
6785 (match_operand:SF 2 "reg_or_0_operand" " x,C,*y, C")))]
6786 "TARGET_SSE"
6787 "@
6788 unpcklps\t{%2, %0|%0, %2}
6789 movss\t{%1, %0|%0, %1}
6790 punpckldq\t{%2, %0|%0, %2}
6791 movd\t{%1, %0|%0, %1}"
6792 [(set_attr "type" "sselog,ssemov,mmxcvt,mmxmov")
6793 (set_attr "mode" "V4SF,SF,DI,DI")])
6794
6795 (define_insn "*vec_concatv4sf"
6796 [(set (match_operand:V4SF 0 "register_operand" "=x,x,x,x")
6797 (vec_concat:V4SF
6798 (match_operand:V2SF 1 "register_operand" " 0,x,0,x")
6799 (match_operand:V2SF 2 "nonimmediate_operand" " x,x,m,m")))]
6800 "TARGET_SSE"
6801 "@
6802 movlhps\t{%2, %0|%0, %2}
6803 vmovlhps\t{%2, %1, %0|%0, %1, %2}
6804 movhps\t{%2, %0|%0, %q2}
6805 vmovhps\t{%2, %1, %0|%0, %1, %q2}"
6806 [(set_attr "isa" "noavx,avx,noavx,avx")
6807 (set_attr "type" "ssemov")
6808 (set_attr "prefix" "orig,vex,orig,vex")
6809 (set_attr "mode" "V4SF,V4SF,V2SF,V2SF")])
6810
6811 (define_expand "vec_init<mode>"
6812 [(match_operand:V_128 0 "register_operand")
6813 (match_operand 1)]
6814 "TARGET_SSE"
6815 {
6816 ix86_expand_vector_init (false, operands[0], operands[1]);
6817 DONE;
6818 })
6819
6820 ;; Avoid combining registers from different units in a single alternative,
6821 ;; see comment above inline_secondary_memory_needed function in i386.c
6822 (define_insn "vec_set<mode>_0"
6823 [(set (match_operand:VI4F_128 0 "nonimmediate_operand"
6824 "=Yr,*v,v,Yi,x,x,v,Yr ,*x ,x ,m ,m ,m")
6825 (vec_merge:VI4F_128
6826 (vec_duplicate:VI4F_128
6827 (match_operand:<ssescalarmode> 2 "general_operand"
6828 " Yr,*v,m,r ,m,x,v,*rm,*rm,*rm,!x,!*re,!*fF"))
6829 (match_operand:VI4F_128 1 "vector_move_operand"
6830 " C , C,C,C ,C,0,v,0 ,0 ,x ,0 ,0 ,0")
6831 (const_int 1)))]
6832 "TARGET_SSE"
6833 "@
6834 %vinsertps\t{$0xe, %d2, %0|%0, %d2, 0xe}
6835 %vinsertps\t{$0xe, %d2, %0|%0, %d2, 0xe}
6836 %vmov<ssescalarmodesuffix>\t{%2, %0|%0, %2}
6837 %vmovd\t{%2, %0|%0, %2}
6838 movss\t{%2, %0|%0, %2}
6839 movss\t{%2, %0|%0, %2}
6840 vmovss\t{%2, %1, %0|%0, %1, %2}
6841 pinsrd\t{$0, %2, %0|%0, %2, 0}
6842 pinsrd\t{$0, %2, %0|%0, %2, 0}
6843 vpinsrd\t{$0, %2, %1, %0|%0, %1, %2, 0}
6844 #
6845 #
6846 #"
6847 [(set_attr "isa" "sse4,sse4,sse2,sse2,noavx,noavx,avx,sse4_noavx,sse4_noavx,avx,*,*,*")
6848 (set (attr "type")
6849 (cond [(eq_attr "alternative" "0,1,7,8,9")
6850 (const_string "sselog")
6851 (eq_attr "alternative" "11")
6852 (const_string "imov")
6853 (eq_attr "alternative" "12")
6854 (const_string "fmov")
6855 ]
6856 (const_string "ssemov")))
6857 (set_attr "prefix_extra" "*,*,*,*,*,*,*,1,1,1,*,*,*")
6858 (set_attr "length_immediate" "*,*,*,*,*,*,*,1,1,1,*,*,*")
6859 (set_attr "prefix" "maybe_vex,maybe_vex,maybe_vex,maybe_vex,orig,orig,vex,orig,orig,vex,*,*,*")
6860 (set_attr "mode" "SF,SF,<ssescalarmode>,SI,SF,SF,SF,TI,TI,TI,*,*,*")])
6861
6862 ;; A subset is vec_setv4sf.
6863 (define_insn "*vec_setv4sf_sse4_1"
6864 [(set (match_operand:V4SF 0 "register_operand" "=Yr,*x,x")
6865 (vec_merge:V4SF
6866 (vec_duplicate:V4SF
6867 (match_operand:SF 2 "nonimmediate_operand" "Yrm,*xm,xm"))
6868 (match_operand:V4SF 1 "register_operand" "0,0,x")
6869 (match_operand:SI 3 "const_int_operand")))]
6870 "TARGET_SSE4_1
6871 && ((unsigned) exact_log2 (INTVAL (operands[3]))
6872 < GET_MODE_NUNITS (V4SFmode))"
6873 {
6874 operands[3] = GEN_INT (exact_log2 (INTVAL (operands[3])) << 4);
6875 switch (which_alternative)
6876 {
6877 case 0:
6878 case 1:
6879 return "insertps\t{%3, %2, %0|%0, %2, %3}";
6880 case 2:
6881 return "vinsertps\t{%3, %2, %1, %0|%0, %1, %2, %3}";
6882 default:
6883 gcc_unreachable ();
6884 }
6885 }
6886 [(set_attr "isa" "noavx,noavx,avx")
6887 (set_attr "type" "sselog")
6888 (set_attr "prefix_data16" "1,1,*")
6889 (set_attr "prefix_extra" "1")
6890 (set_attr "length_immediate" "1")
6891 (set_attr "prefix" "orig,orig,vex")
6892 (set_attr "mode" "V4SF")])
6893
6894 (define_insn "sse4_1_insertps"
6895 [(set (match_operand:V4SF 0 "register_operand" "=Yr,*x,x")
6896 (unspec:V4SF [(match_operand:V4SF 2 "nonimmediate_operand" "Yrm,*xm,xm")
6897 (match_operand:V4SF 1 "register_operand" "0,0,x")
6898 (match_operand:SI 3 "const_0_to_255_operand" "n,n,n")]
6899 UNSPEC_INSERTPS))]
6900 "TARGET_SSE4_1"
6901 {
6902 if (MEM_P (operands[2]))
6903 {
6904 unsigned count_s = INTVAL (operands[3]) >> 6;
6905 if (count_s)
6906 operands[3] = GEN_INT (INTVAL (operands[3]) & 0x3f);
6907 operands[2] = adjust_address_nv (operands[2], SFmode, count_s * 4);
6908 }
6909 switch (which_alternative)
6910 {
6911 case 0:
6912 case 1:
6913 return "insertps\t{%3, %2, %0|%0, %2, %3}";
6914 case 2:
6915 return "vinsertps\t{%3, %2, %1, %0|%0, %1, %2, %3}";
6916 default:
6917 gcc_unreachable ();
6918 }
6919 }
6920 [(set_attr "isa" "noavx,noavx,avx")
6921 (set_attr "type" "sselog")
6922 (set_attr "prefix_data16" "1,1,*")
6923 (set_attr "prefix_extra" "1")
6924 (set_attr "length_immediate" "1")
6925 (set_attr "prefix" "orig,orig,vex")
6926 (set_attr "mode" "V4SF")])
6927
6928 (define_split
6929 [(set (match_operand:VI4F_128 0 "memory_operand")
6930 (vec_merge:VI4F_128
6931 (vec_duplicate:VI4F_128
6932 (match_operand:<ssescalarmode> 1 "nonmemory_operand"))
6933 (match_dup 0)
6934 (const_int 1)))]
6935 "TARGET_SSE && reload_completed"
6936 [(set (match_dup 0) (match_dup 1))]
6937 "operands[0] = adjust_address (operands[0], <ssescalarmode>mode, 0);")
6938
6939 (define_expand "vec_set<mode>"
6940 [(match_operand:V 0 "register_operand")
6941 (match_operand:<ssescalarmode> 1 "register_operand")
6942 (match_operand 2 "const_int_operand")]
6943 "TARGET_SSE"
6944 {
6945 ix86_expand_vector_set (false, operands[0], operands[1],
6946 INTVAL (operands[2]));
6947 DONE;
6948 })
6949
6950 (define_insn_and_split "*vec_extractv4sf_0"
6951 [(set (match_operand:SF 0 "nonimmediate_operand" "=x,m,f,r")
6952 (vec_select:SF
6953 (match_operand:V4SF 1 "nonimmediate_operand" "xm,x,m,m")
6954 (parallel [(const_int 0)])))]
6955 "TARGET_SSE && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
6956 "#"
6957 "&& reload_completed"
6958 [(set (match_dup 0) (match_dup 1))]
6959 {
6960 if (REG_P (operands[1]))
6961 operands[1] = gen_rtx_REG (SFmode, REGNO (operands[1]));
6962 else
6963 operands[1] = adjust_address (operands[1], SFmode, 0);
6964 })
6965
6966 (define_insn_and_split "*sse4_1_extractps"
6967 [(set (match_operand:SF 0 "nonimmediate_operand" "=rm,rm,x,x")
6968 (vec_select:SF
6969 (match_operand:V4SF 1 "register_operand" "Yr,*x,0,x")
6970 (parallel [(match_operand:SI 2 "const_0_to_3_operand" "n,n,n,n")])))]
6971 "TARGET_SSE4_1"
6972 "@
6973 %vextractps\t{%2, %1, %0|%0, %1, %2}
6974 %vextractps\t{%2, %1, %0|%0, %1, %2}
6975 #
6976 #"
6977 "&& reload_completed && SSE_REG_P (operands[0])"
6978 [(const_int 0)]
6979 {
6980 rtx dest = gen_rtx_REG (V4SFmode, REGNO (operands[0]));
6981 switch (INTVAL (operands[2]))
6982 {
6983 case 1:
6984 case 3:
6985 emit_insn (gen_sse_shufps_v4sf (dest, operands[1], operands[1],
6986 operands[2], operands[2],
6987 GEN_INT (INTVAL (operands[2]) + 4),
6988 GEN_INT (INTVAL (operands[2]) + 4)));
6989 break;
6990 case 2:
6991 emit_insn (gen_vec_interleave_highv4sf (dest, operands[1], operands[1]));
6992 break;
6993 default:
6994 /* 0 should be handled by the *vec_extractv4sf_0 pattern above. */
6995 gcc_unreachable ();
6996 }
6997 DONE;
6998 }
6999 [(set_attr "isa" "*,*,noavx,avx")
7000 (set_attr "type" "sselog,sselog,*,*")
7001 (set_attr "prefix_data16" "1,1,*,*")
7002 (set_attr "prefix_extra" "1,1,*,*")
7003 (set_attr "length_immediate" "1,1,*,*")
7004 (set_attr "prefix" "maybe_vex,maybe_vex,*,*")
7005 (set_attr "mode" "V4SF,V4SF,*,*")])
7006
7007 (define_insn_and_split "*vec_extractv4sf_mem"
7008 [(set (match_operand:SF 0 "register_operand" "=x,*r,f")
7009 (vec_select:SF
7010 (match_operand:V4SF 1 "memory_operand" "o,o,o")
7011 (parallel [(match_operand 2 "const_0_to_3_operand" "n,n,n")])))]
7012 "TARGET_SSE"
7013 "#"
7014 "&& reload_completed"
7015 [(set (match_dup 0) (match_dup 1))]
7016 {
7017 operands[1] = adjust_address (operands[1], SFmode, INTVAL (operands[2]) * 4);
7018 })
7019
7020 (define_mode_attr extract_type
7021 [(V16SF "avx512f") (V16SI "avx512f") (V8DF "avx512dq") (V8DI "avx512dq")])
7022
7023 (define_mode_attr extract_suf
7024 [(V16SF "32x4") (V16SI "32x4") (V8DF "64x2") (V8DI "64x2")])
7025
7026 (define_mode_iterator AVX512_VEC
7027 [(V8DF "TARGET_AVX512DQ") (V8DI "TARGET_AVX512DQ") V16SF V16SI])
7028
7029 (define_expand "<extract_type>_vextract<shuffletype><extract_suf>_mask"
7030 [(match_operand:<ssequartermode> 0 "nonimmediate_operand")
7031 (match_operand:AVX512_VEC 1 "register_operand")
7032 (match_operand:SI 2 "const_0_to_3_operand")
7033 (match_operand:<ssequartermode> 3 "nonimmediate_operand")
7034 (match_operand:QI 4 "register_operand")]
7035 "TARGET_AVX512F"
7036 {
7037 int mask;
7038 mask = INTVAL (operands[2]);
7039
7040 if (MEM_P (operands[0]) && GET_CODE (operands[3]) == CONST_VECTOR)
7041 operands[0] = force_reg (<ssequartermode>mode, operands[0]);
7042
7043 if (<MODE>mode == V16SImode || <MODE>mode == V16SFmode)
7044 emit_insn (gen_avx512f_vextract<shuffletype>32x4_1_mask (operands[0],
7045 operands[1], GEN_INT (mask * 4), GEN_INT (mask * 4 + 1),
7046 GEN_INT (mask * 4 + 2), GEN_INT (mask * 4 + 3), operands[3],
7047 operands[4]));
7048 else
7049 emit_insn (gen_avx512dq_vextract<shuffletype>64x2_1_mask (operands[0],
7050 operands[1], GEN_INT (mask * 2), GEN_INT (mask * 2 + 1), operands[3],
7051 operands[4]));
7052 DONE;
7053 })
7054
7055 (define_insn "avx512dq_vextract<shuffletype>64x2_1_maskm"
7056 [(set (match_operand:<ssequartermode> 0 "memory_operand" "=m")
7057 (vec_merge:<ssequartermode>
7058 (vec_select:<ssequartermode>
7059 (match_operand:V8FI 1 "register_operand" "v")
7060 (parallel [(match_operand 2 "const_0_to_7_operand")
7061 (match_operand 3 "const_0_to_7_operand")]))
7062 (match_operand:<ssequartermode> 4 "memory_operand" "0")
7063 (match_operand:QI 5 "register_operand" "k")))]
7064 "TARGET_AVX512DQ
7065 && (INTVAL (operands[2]) % 2 == 0)
7066 && (INTVAL (operands[2]) == INTVAL (operands[3]) - 1)
7067 && rtx_equal_p (operands[4], operands[0])"
7068 {
7069 operands[2] = GEN_INT ((INTVAL (operands[2])) >> 1);
7070 return "vextract<shuffletype>64x2\t{%2, %1, %0%{%5%}|%0%{%5%}, %1, %2}";
7071 }
7072 [(set_attr "type" "sselog")
7073 (set_attr "prefix_extra" "1")
7074 (set_attr "length_immediate" "1")
7075 (set_attr "memory" "store")
7076 (set_attr "prefix" "evex")
7077 (set_attr "mode" "<sseinsnmode>")])
7078
7079 (define_insn "avx512f_vextract<shuffletype>32x4_1_maskm"
7080 [(set (match_operand:<ssequartermode> 0 "memory_operand" "=m")
7081 (vec_merge:<ssequartermode>
7082 (vec_select:<ssequartermode>
7083 (match_operand:V16FI 1 "register_operand" "v")
7084 (parallel [(match_operand 2 "const_0_to_15_operand")
7085 (match_operand 3 "const_0_to_15_operand")
7086 (match_operand 4 "const_0_to_15_operand")
7087 (match_operand 5 "const_0_to_15_operand")]))
7088 (match_operand:<ssequartermode> 6 "memory_operand" "0")
7089 (match_operand:QI 7 "register_operand" "Yk")))]
7090 "TARGET_AVX512F
7091 && ((INTVAL (operands[2]) % 4 == 0)
7092 && INTVAL (operands[2]) == (INTVAL (operands[3]) - 1)
7093 && INTVAL (operands[3]) == (INTVAL (operands[4]) - 1)
7094 && INTVAL (operands[4]) == (INTVAL (operands[5]) - 1))
7095 && rtx_equal_p (operands[6], operands[0])"
7096 {
7097 operands[2] = GEN_INT ((INTVAL (operands[2])) >> 2);
7098 return "vextract<shuffletype>32x4\t{%2, %1, %0%{%7%}|%0%{%7%}, %1, %2}";
7099 }
7100 [(set_attr "type" "sselog")
7101 (set_attr "prefix_extra" "1")
7102 (set_attr "length_immediate" "1")
7103 (set_attr "memory" "store")
7104 (set_attr "prefix" "evex")
7105 (set_attr "mode" "<sseinsnmode>")])
7106
7107 (define_insn "<mask_codefor>avx512dq_vextract<shuffletype>64x2_1<mask_name>"
7108 [(set (match_operand:<ssequartermode> 0 "<store_mask_predicate>" "=<store_mask_constraint>")
7109 (vec_select:<ssequartermode>
7110 (match_operand:V8FI 1 "register_operand" "v")
7111 (parallel [(match_operand 2 "const_0_to_7_operand")
7112 (match_operand 3 "const_0_to_7_operand")])))]
7113 "TARGET_AVX512DQ && (INTVAL (operands[2]) == INTVAL (operands[3]) - 1)"
7114 {
7115 operands[2] = GEN_INT ((INTVAL (operands[2])) >> 1);
7116 return "vextract<shuffletype>64x2\t{%2, %1, %0<mask_operand4>|%0<mask_operand4>, %1, %2}";
7117 }
7118 [(set_attr "type" "sselog1")
7119 (set_attr "prefix_extra" "1")
7120 (set_attr "length_immediate" "1")
7121 (set_attr "prefix" "evex")
7122 (set_attr "mode" "<sseinsnmode>")])
7123
7124 (define_insn "<mask_codefor>avx512f_vextract<shuffletype>32x4_1<mask_name>"
7125 [(set (match_operand:<ssequartermode> 0 "<store_mask_predicate>" "=<store_mask_constraint>")
7126 (vec_select:<ssequartermode>
7127 (match_operand:V16FI 1 "register_operand" "v")
7128 (parallel [(match_operand 2 "const_0_to_15_operand")
7129 (match_operand 3 "const_0_to_15_operand")
7130 (match_operand 4 "const_0_to_15_operand")
7131 (match_operand 5 "const_0_to_15_operand")])))]
7132 "TARGET_AVX512F
7133 && (INTVAL (operands[2]) == (INTVAL (operands[3]) - 1)
7134 && INTVAL (operands[3]) == (INTVAL (operands[4]) - 1)
7135 && INTVAL (operands[4]) == (INTVAL (operands[5]) - 1))"
7136 {
7137 operands[2] = GEN_INT ((INTVAL (operands[2])) >> 2);
7138 return "vextract<shuffletype>32x4\t{%2, %1, %0<mask_operand6>|%0<mask_operand6>, %1, %2}";
7139 }
7140 [(set_attr "type" "sselog1")
7141 (set_attr "prefix_extra" "1")
7142 (set_attr "length_immediate" "1")
7143 (set_attr "prefix" "evex")
7144 (set_attr "mode" "<sseinsnmode>")])
7145
7146 (define_mode_attr extract_type_2
7147 [(V16SF "avx512dq") (V16SI "avx512dq") (V8DF "avx512f") (V8DI "avx512f")])
7148
7149 (define_mode_attr extract_suf_2
7150 [(V16SF "32x8") (V16SI "32x8") (V8DF "64x4") (V8DI "64x4")])
7151
7152 (define_mode_iterator AVX512_VEC_2
7153 [(V16SF "TARGET_AVX512DQ") (V16SI "TARGET_AVX512DQ") V8DF V8DI])
7154
7155 (define_expand "<extract_type_2>_vextract<shuffletype><extract_suf_2>_mask"
7156 [(match_operand:<ssehalfvecmode> 0 "nonimmediate_operand")
7157 (match_operand:AVX512_VEC_2 1 "register_operand")
7158 (match_operand:SI 2 "const_0_to_1_operand")
7159 (match_operand:<ssehalfvecmode> 3 "nonimmediate_operand")
7160 (match_operand:QI 4 "register_operand")]
7161 "TARGET_AVX512F"
7162 {
7163 rtx (*insn)(rtx, rtx, rtx, rtx);
7164
7165 if (MEM_P (operands[0]) && GET_CODE (operands[3]) == CONST_VECTOR)
7166 operands[0] = force_reg (<ssequartermode>mode, operands[0]);
7167
7168 switch (INTVAL (operands[2]))
7169 {
7170 case 0:
7171 insn = gen_vec_extract_lo_<mode>_mask;
7172 break;
7173 case 1:
7174 insn = gen_vec_extract_hi_<mode>_mask;
7175 break;
7176 default:
7177 gcc_unreachable ();
7178 }
7179
7180 emit_insn (insn (operands[0], operands[1], operands[3], operands[4]));
7181 DONE;
7182 })
7183
7184 (define_split
7185 [(set (match_operand:<ssehalfvecmode> 0 "nonimmediate_operand")
7186 (vec_select:<ssehalfvecmode>
7187 (match_operand:V8FI 1 "nonimmediate_operand")
7188 (parallel [(const_int 0) (const_int 1)
7189 (const_int 2) (const_int 3)])))]
7190 "TARGET_AVX512F && !(MEM_P (operands[0]) && MEM_P (operands[1]))
7191 && reload_completed
7192 && (TARGET_AVX512VL || (REG_P (operands[0]) && !EXT_REX_SSE_REG_P (operands[1])))"
7193 [(const_int 0)]
7194 {
7195 rtx op1 = operands[1];
7196 if (REG_P (op1))
7197 op1 = gen_rtx_REG (<ssehalfvecmode>mode, REGNO (op1));
7198 else
7199 op1 = gen_lowpart (<ssehalfvecmode>mode, op1);
7200 emit_move_insn (operands[0], op1);
7201 DONE;
7202 })
7203
7204 (define_insn "vec_extract_lo_<mode>_maskm"
7205 [(set (match_operand:<ssehalfvecmode> 0 "memory_operand" "=m")
7206 (vec_merge:<ssehalfvecmode>
7207 (vec_select:<ssehalfvecmode>
7208 (match_operand:V8FI 1 "register_operand" "v")
7209 (parallel [(const_int 0) (const_int 1)
7210 (const_int 2) (const_int 3)]))
7211 (match_operand:<ssehalfvecmode> 2 "memory_operand" "0")
7212 (match_operand:QI 3 "register_operand" "Yk")))]
7213 "TARGET_AVX512F
7214 && rtx_equal_p (operands[2], operands[0])"
7215 "vextract<shuffletype>64x4\t{$0x0, %1, %0%{%3%}|%0%{%3%}, %1, 0x0}"
7216 [(set_attr "type" "sselog1")
7217 (set_attr "prefix_extra" "1")
7218 (set_attr "length_immediate" "1")
7219 (set_attr "prefix" "evex")
7220 (set_attr "mode" "<sseinsnmode>")])
7221
7222 (define_insn "vec_extract_lo_<mode><mask_name>"
7223 [(set (match_operand:<ssehalfvecmode> 0 "<store_mask_predicate>" "=<store_mask_constraint>,v")
7224 (vec_select:<ssehalfvecmode>
7225 (match_operand:V8FI 1 "nonimmediate_operand" "v,m")
7226 (parallel [(const_int 0) (const_int 1)
7227 (const_int 2) (const_int 3)])))]
7228 "TARGET_AVX512F && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
7229 {
7230 if (<mask_applied> || !TARGET_AVX512VL)
7231 return "vextract<shuffletype>64x4\t{$0x0, %1, %0<mask_operand2>|%0<mask_operand2>, %1, 0x0}";
7232 else
7233 return "#";
7234 }
7235 [(set_attr "type" "sselog1")
7236 (set_attr "prefix_extra" "1")
7237 (set_attr "length_immediate" "1")
7238 (set_attr "prefix" "evex")
7239 (set_attr "mode" "<sseinsnmode>")])
7240
7241 (define_insn "vec_extract_hi_<mode>_maskm"
7242 [(set (match_operand:<ssehalfvecmode> 0 "memory_operand" "=m")
7243 (vec_merge:<ssehalfvecmode>
7244 (vec_select:<ssehalfvecmode>
7245 (match_operand:V8FI 1 "register_operand" "v")
7246 (parallel [(const_int 4) (const_int 5)
7247 (const_int 6) (const_int 7)]))
7248 (match_operand:<ssehalfvecmode> 2 "memory_operand" "0")
7249 (match_operand:QI 3 "register_operand" "Yk")))]
7250 "TARGET_AVX512F
7251 && rtx_equal_p (operands[2], operands[0])"
7252 "vextract<shuffletype>64x4\t{$0x1, %1, %0%{%3%}|%0%{%3%}, %1, 0x1}"
7253 [(set_attr "type" "sselog")
7254 (set_attr "prefix_extra" "1")
7255 (set_attr "length_immediate" "1")
7256 (set_attr "memory" "store")
7257 (set_attr "prefix" "evex")
7258 (set_attr "mode" "<sseinsnmode>")])
7259
7260 (define_insn "vec_extract_hi_<mode><mask_name>"
7261 [(set (match_operand:<ssehalfvecmode> 0 "<store_mask_predicate>" "=<store_mask_constraint>")
7262 (vec_select:<ssehalfvecmode>
7263 (match_operand:V8FI 1 "register_operand" "v")
7264 (parallel [(const_int 4) (const_int 5)
7265 (const_int 6) (const_int 7)])))]
7266 "TARGET_AVX512F"
7267 "vextract<shuffletype>64x4\t{$0x1, %1, %0<mask_operand2>|%0<mask_operand2>, %1, 0x1}"
7268 [(set_attr "type" "sselog1")
7269 (set_attr "prefix_extra" "1")
7270 (set_attr "length_immediate" "1")
7271 (set_attr "prefix" "evex")
7272 (set_attr "mode" "<sseinsnmode>")])
7273
7274 (define_insn "vec_extract_hi_<mode>_maskm"
7275 [(set (match_operand:<ssehalfvecmode> 0 "memory_operand" "=m")
7276 (vec_merge:<ssehalfvecmode>
7277 (vec_select:<ssehalfvecmode>
7278 (match_operand:V16FI 1 "register_operand" "v")
7279 (parallel [(const_int 8) (const_int 9)
7280 (const_int 10) (const_int 11)
7281 (const_int 12) (const_int 13)
7282 (const_int 14) (const_int 15)]))
7283 (match_operand:<ssehalfvecmode> 2 "memory_operand" "0")
7284 (match_operand:QI 3 "register_operand" "k")))]
7285 "TARGET_AVX512DQ
7286 && rtx_equal_p (operands[2], operands[0])"
7287 "vextract<shuffletype>32x8\t{$0x1, %1, %0%{%3%}|%0%{%3%}, %1, 0x1}"
7288 [(set_attr "type" "sselog1")
7289 (set_attr "prefix_extra" "1")
7290 (set_attr "length_immediate" "1")
7291 (set_attr "prefix" "evex")
7292 (set_attr "mode" "<sseinsnmode>")])
7293
7294 (define_insn "vec_extract_hi_<mode><mask_name>"
7295 [(set (match_operand:<ssehalfvecmode> 0 "<store_mask_predicate>" "=<store_mask_constraint>,vm")
7296 (vec_select:<ssehalfvecmode>
7297 (match_operand:V16FI 1 "register_operand" "v,v")
7298 (parallel [(const_int 8) (const_int 9)
7299 (const_int 10) (const_int 11)
7300 (const_int 12) (const_int 13)
7301 (const_int 14) (const_int 15)])))]
7302 "TARGET_AVX512F && <mask_avx512dq_condition>"
7303 "@
7304 vextract<shuffletype>32x8\t{$0x1, %1, %0<mask_operand2>|%0<mask_operand2>, %1, 0x1}
7305 vextracti64x4\t{$0x1, %1, %0|%0, %1, 0x1}"
7306 [(set_attr "type" "sselog1")
7307 (set_attr "prefix_extra" "1")
7308 (set_attr "isa" "avx512dq,noavx512dq")
7309 (set_attr "length_immediate" "1")
7310 (set_attr "prefix" "evex")
7311 (set_attr "mode" "<sseinsnmode>")])
7312
7313 (define_expand "avx512vl_vextractf128<mode>"
7314 [(match_operand:<ssehalfvecmode> 0 "nonimmediate_operand")
7315 (match_operand:VI48F_256 1 "register_operand")
7316 (match_operand:SI 2 "const_0_to_1_operand")
7317 (match_operand:<ssehalfvecmode> 3 "vector_move_operand")
7318 (match_operand:QI 4 "register_operand")]
7319 "TARGET_AVX512DQ && TARGET_AVX512VL"
7320 {
7321 rtx (*insn)(rtx, rtx, rtx, rtx);
7322
7323 if (MEM_P (operands[0]) && GET_CODE (operands[3]) == CONST_VECTOR)
7324 operands[0] = force_reg (<ssehalfvecmode>mode, operands[0]);
7325
7326 switch (INTVAL (operands[2]))
7327 {
7328 case 0:
7329 insn = gen_vec_extract_lo_<mode>_mask;
7330 break;
7331 case 1:
7332 insn = gen_vec_extract_hi_<mode>_mask;
7333 break;
7334 default:
7335 gcc_unreachable ();
7336 }
7337
7338 emit_insn (insn (operands[0], operands[1], operands[3], operands[4]));
7339 DONE;
7340 })
7341
7342 (define_expand "avx_vextractf128<mode>"
7343 [(match_operand:<ssehalfvecmode> 0 "nonimmediate_operand")
7344 (match_operand:V_256 1 "register_operand")
7345 (match_operand:SI 2 "const_0_to_1_operand")]
7346 "TARGET_AVX"
7347 {
7348 rtx (*insn)(rtx, rtx);
7349
7350 switch (INTVAL (operands[2]))
7351 {
7352 case 0:
7353 insn = gen_vec_extract_lo_<mode>;
7354 break;
7355 case 1:
7356 insn = gen_vec_extract_hi_<mode>;
7357 break;
7358 default:
7359 gcc_unreachable ();
7360 }
7361
7362 emit_insn (insn (operands[0], operands[1]));
7363 DONE;
7364 })
7365
7366 (define_insn "vec_extract_lo_<mode><mask_name>"
7367 [(set (match_operand:<ssehalfvecmode> 0 "nonimmediate_operand" "=v,m")
7368 (vec_select:<ssehalfvecmode>
7369 (match_operand:V16FI 1 "nonimmediate_operand" "vm,v")
7370 (parallel [(const_int 0) (const_int 1)
7371 (const_int 2) (const_int 3)
7372 (const_int 4) (const_int 5)
7373 (const_int 6) (const_int 7)])))]
7374 "TARGET_AVX512F
7375 && <mask_mode512bit_condition>
7376 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
7377 {
7378 if (<mask_applied>)
7379 return "vextract<shuffletype>32x8\t{$0x0, %1, %0<mask_operand2>|%0<mask_operand2>, %1, 0x0}";
7380 else
7381 return "#";
7382 })
7383
7384 (define_split
7385 [(set (match_operand:<ssehalfvecmode> 0 "nonimmediate_operand")
7386 (vec_select:<ssehalfvecmode>
7387 (match_operand:V16FI 1 "nonimmediate_operand")
7388 (parallel [(const_int 0) (const_int 1)
7389 (const_int 2) (const_int 3)
7390 (const_int 4) (const_int 5)
7391 (const_int 6) (const_int 7)])))]
7392 "TARGET_AVX512F && !(MEM_P (operands[0]) && MEM_P (operands[1]))
7393 && reload_completed"
7394 [(const_int 0)]
7395 {
7396 rtx op1 = operands[1];
7397 if (REG_P (op1))
7398 op1 = gen_rtx_REG (<ssehalfvecmode>mode, REGNO (op1));
7399 else
7400 op1 = gen_lowpart (<ssehalfvecmode>mode, op1);
7401 emit_move_insn (operands[0], op1);
7402 DONE;
7403 })
7404
7405 (define_insn "vec_extract_lo_<mode><mask_name>"
7406 [(set (match_operand:<ssehalfvecmode> 0 "<store_mask_predicate>" "=v,m")
7407 (vec_select:<ssehalfvecmode>
7408 (match_operand:VI8F_256 1 "nonimmediate_operand" "vm,v")
7409 (parallel [(const_int 0) (const_int 1)])))]
7410 "TARGET_AVX
7411 && <mask_avx512vl_condition> && <mask_avx512dq_condition>
7412 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
7413 {
7414 if (<mask_applied>)
7415 return "vextract<shuffletype>64x2\t{$0x0, %1, %0%{%3%}|%0%{%3%}, %1, 0x0}";
7416 else
7417 return "#";
7418 }
7419 [(set_attr "type" "sselog")
7420 (set_attr "prefix_extra" "1")
7421 (set_attr "length_immediate" "1")
7422 (set_attr "memory" "none,store")
7423 (set_attr "prefix" "evex")
7424 (set_attr "mode" "XI")])
7425
7426 (define_split
7427 [(set (match_operand:<ssehalfvecmode> 0 "nonimmediate_operand")
7428 (vec_select:<ssehalfvecmode>
7429 (match_operand:VI8F_256 1 "nonimmediate_operand")
7430 (parallel [(const_int 0) (const_int 1)])))]
7431 "TARGET_AVX && !(MEM_P (operands[0]) && MEM_P (operands[1]))
7432 && reload_completed"
7433 [(const_int 0)]
7434 {
7435 rtx op1 = operands[1];
7436 if (REG_P (op1))
7437 op1 = gen_rtx_REG (<ssehalfvecmode>mode, REGNO (op1));
7438 else
7439 op1 = gen_lowpart (<ssehalfvecmode>mode, op1);
7440 emit_move_insn (operands[0], op1);
7441 DONE;
7442 })
7443
7444 (define_insn "vec_extract_hi_<mode><mask_name>"
7445 [(set (match_operand:<ssehalfvecmode> 0 "<store_mask_predicate>" "=v,<store_mask_constraint>")
7446 (vec_select:<ssehalfvecmode>
7447 (match_operand:VI8F_256 1 "register_operand" "v,v")
7448 (parallel [(const_int 2) (const_int 3)])))]
7449 "TARGET_AVX && <mask_avx512vl_condition> && <mask_avx512dq_condition>"
7450 {
7451 if (TARGET_AVX512VL)
7452 {
7453 if (TARGET_AVX512DQ)
7454 return "vextract<shuffletype>64x2\t{$0x1, %1, %0<mask_operand2>|%0<mask_operand2>, %1, 0x1}";
7455 else
7456 return "vextract<shuffletype>32x4\t{$0x1, %1, %0|%0, %1, 0x1}";
7457 }
7458 else
7459 return "vextract<i128>\t{$0x1, %1, %0|%0, %1, 0x1}";
7460 }
7461 [(set_attr "type" "sselog")
7462 (set_attr "prefix_extra" "1")
7463 (set_attr "length_immediate" "1")
7464 (set_attr "memory" "none,store")
7465 (set_attr "prefix" "vex")
7466 (set_attr "mode" "<sseinsnmode>")])
7467
7468 (define_split
7469 [(set (match_operand:<ssehalfvecmode> 0 "nonimmediate_operand")
7470 (vec_select:<ssehalfvecmode>
7471 (match_operand:VI4F_256 1 "nonimmediate_operand")
7472 (parallel [(const_int 0) (const_int 1)
7473 (const_int 2) (const_int 3)])))]
7474 "TARGET_AVX && !(MEM_P (operands[0]) && MEM_P (operands[1])) && reload_completed"
7475 [(const_int 0)]
7476 {
7477 rtx op1 = operands[1];
7478 if (REG_P (op1))
7479 op1 = gen_rtx_REG (<ssehalfvecmode>mode, REGNO (op1));
7480 else
7481 op1 = gen_lowpart (<ssehalfvecmode>mode, op1);
7482 emit_move_insn (operands[0], op1);
7483 DONE;
7484 })
7485
7486
7487 (define_insn "vec_extract_lo_<mode><mask_name>"
7488 [(set (match_operand:<ssehalfvecmode> 0 "<store_mask_predicate>" "=<store_mask_constraint>")
7489 (vec_select:<ssehalfvecmode>
7490 (match_operand:VI4F_256 1 "register_operand" "v")
7491 (parallel [(const_int 0) (const_int 1)
7492 (const_int 2) (const_int 3)])))]
7493 "TARGET_AVX && <mask_avx512vl_condition> && <mask_avx512dq_condition>"
7494 {
7495 if (<mask_applied>)
7496 return "vextract<shuffletype>32x4\t{$0x0, %1, %0<mask_operand2>|%0<mask_operand2>, %1, 0x0}";
7497 else
7498 return "#";
7499 }
7500 [(set_attr "type" "sselog1")
7501 (set_attr "prefix_extra" "1")
7502 (set_attr "length_immediate" "1")
7503 (set_attr "prefix" "evex")
7504 (set_attr "mode" "<sseinsnmode>")])
7505
7506 (define_insn "vec_extract_lo_<mode>_maskm"
7507 [(set (match_operand:<ssehalfvecmode> 0 "memory_operand" "=m")
7508 (vec_merge:<ssehalfvecmode>
7509 (vec_select:<ssehalfvecmode>
7510 (match_operand:VI4F_256 1 "register_operand" "v")
7511 (parallel [(const_int 0) (const_int 1)
7512 (const_int 2) (const_int 3)]))
7513 (match_operand:<ssehalfvecmode> 2 "memory_operand" "0")
7514 (match_operand:QI 3 "register_operand" "k")))]
7515 "TARGET_AVX512VL && TARGET_AVX512F
7516 && rtx_equal_p (operands[2], operands[0])"
7517 "vextract<shuffletype>32x4\t{$0x0, %1, %0%{%3%}|%0%{%3%}, %1, 0x0}"
7518 [(set_attr "type" "sselog1")
7519 (set_attr "prefix_extra" "1")
7520 (set_attr "length_immediate" "1")
7521 (set_attr "prefix" "evex")
7522 (set_attr "mode" "<sseinsnmode>")])
7523
7524 (define_insn "vec_extract_hi_<mode>_maskm"
7525 [(set (match_operand:<ssehalfvecmode> 0 "memory_operand" "=m")
7526 (vec_merge:<ssehalfvecmode>
7527 (vec_select:<ssehalfvecmode>
7528 (match_operand:VI4F_256 1 "register_operand" "v")
7529 (parallel [(const_int 4) (const_int 5)
7530 (const_int 6) (const_int 7)]))
7531 (match_operand:<ssehalfvecmode> 2 "memory_operand" "0")
7532 (match_operand:<ssehalfvecmode> 3 "register_operand" "k")))]
7533 "TARGET_AVX512F && TARGET_AVX512VL
7534 && rtx_equal_p (operands[2], operands[0])"
7535 "vextract<shuffletype>32x4\t{$0x1, %1, %0%{%3%}|%0%{%3%}, %1, 0x1}"
7536 [(set_attr "type" "sselog1")
7537 (set_attr "length_immediate" "1")
7538 (set_attr "prefix" "evex")
7539 (set_attr "mode" "<sseinsnmode>")])
7540
7541 (define_insn "vec_extract_hi_<mode>_mask"
7542 [(set (match_operand:<ssehalfvecmode> 0 "register_operand" "=v")
7543 (vec_merge:<ssehalfvecmode>
7544 (vec_select:<ssehalfvecmode>
7545 (match_operand:VI4F_256 1 "register_operand" "v")
7546 (parallel [(const_int 4) (const_int 5)
7547 (const_int 6) (const_int 7)]))
7548 (match_operand:<ssehalfvecmode> 2 "vector_move_operand" "0C")
7549 (match_operand:<avx512fmaskmode> 3 "register_operand" "Yk")))]
7550 "TARGET_AVX512VL"
7551 "vextract<shuffletype>32x4\t{$0x1, %1, %0%{%3%}%N2|%0%{%3%}%N2, %1, 0x1}"
7552 [(set_attr "type" "sselog1")
7553 (set_attr "length_immediate" "1")
7554 (set_attr "prefix" "evex")
7555 (set_attr "mode" "<sseinsnmode>")])
7556
7557 (define_insn "vec_extract_hi_<mode>"
7558 [(set (match_operand:<ssehalfvecmode> 0 "nonimmediate_operand" "=xm, vm")
7559 (vec_select:<ssehalfvecmode>
7560 (match_operand:VI4F_256 1 "register_operand" "x, v")
7561 (parallel [(const_int 4) (const_int 5)
7562 (const_int 6) (const_int 7)])))]
7563 "TARGET_AVX"
7564 "@
7565 vextract<i128>\t{$0x1, %1, %0|%0, %1, 0x1}
7566 vextract<shuffletype>32x4\t{$0x1, %1, %0|%0, %1, 0x1}"
7567 [(set_attr "isa" "*, avx512vl")
7568 (set_attr "prefix" "vex, evex")
7569 (set_attr "type" "sselog1")
7570 (set_attr "length_immediate" "1")
7571 (set_attr "mode" "<sseinsnmode>")])
7572
7573 (define_insn_and_split "vec_extract_lo_v32hi"
7574 [(set (match_operand:V16HI 0 "nonimmediate_operand" "=v,m")
7575 (vec_select:V16HI
7576 (match_operand:V32HI 1 "nonimmediate_operand" "vm,v")
7577 (parallel [(const_int 0) (const_int 1)
7578 (const_int 2) (const_int 3)
7579 (const_int 4) (const_int 5)
7580 (const_int 6) (const_int 7)
7581 (const_int 8) (const_int 9)
7582 (const_int 10) (const_int 11)
7583 (const_int 12) (const_int 13)
7584 (const_int 14) (const_int 15)])))]
7585 "TARGET_AVX512F && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
7586 "#"
7587 "&& reload_completed"
7588 [(set (match_dup 0) (match_dup 1))]
7589 {
7590 if (REG_P (operands[1]))
7591 operands[1] = gen_rtx_REG (V16HImode, REGNO (operands[1]));
7592 else
7593 operands[1] = adjust_address (operands[1], V16HImode, 0);
7594 })
7595
7596 (define_insn "vec_extract_hi_v32hi"
7597 [(set (match_operand:V16HI 0 "nonimmediate_operand" "=v,m")
7598 (vec_select:V16HI
7599 (match_operand:V32HI 1 "register_operand" "v,v")
7600 (parallel [(const_int 16) (const_int 17)
7601 (const_int 18) (const_int 19)
7602 (const_int 20) (const_int 21)
7603 (const_int 22) (const_int 23)
7604 (const_int 24) (const_int 25)
7605 (const_int 26) (const_int 27)
7606 (const_int 28) (const_int 29)
7607 (const_int 30) (const_int 31)])))]
7608 "TARGET_AVX512F"
7609 "vextracti64x4\t{$0x1, %1, %0|%0, %1, 0x1}"
7610 [(set_attr "type" "sselog")
7611 (set_attr "prefix_extra" "1")
7612 (set_attr "length_immediate" "1")
7613 (set_attr "memory" "none,store")
7614 (set_attr "prefix" "evex")
7615 (set_attr "mode" "XI")])
7616
7617 (define_insn_and_split "vec_extract_lo_v16hi"
7618 [(set (match_operand:V8HI 0 "nonimmediate_operand" "=x,m")
7619 (vec_select:V8HI
7620 (match_operand:V16HI 1 "nonimmediate_operand" "xm,x")
7621 (parallel [(const_int 0) (const_int 1)
7622 (const_int 2) (const_int 3)
7623 (const_int 4) (const_int 5)
7624 (const_int 6) (const_int 7)])))]
7625 "TARGET_AVX && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
7626 "#"
7627 "&& reload_completed"
7628 [(set (match_dup 0) (match_dup 1))]
7629 {
7630 if (REG_P (operands[1]))
7631 operands[1] = gen_rtx_REG (V8HImode, REGNO (operands[1]));
7632 else
7633 operands[1] = adjust_address (operands[1], V8HImode, 0);
7634 })
7635
7636 (define_insn "vec_extract_hi_v16hi"
7637 [(set (match_operand:V8HI 0 "nonimmediate_operand" "=x,m")
7638 (vec_select:V8HI
7639 (match_operand:V16HI 1 "register_operand" "x,x")
7640 (parallel [(const_int 8) (const_int 9)
7641 (const_int 10) (const_int 11)
7642 (const_int 12) (const_int 13)
7643 (const_int 14) (const_int 15)])))]
7644 "TARGET_AVX"
7645 "vextract%~128\t{$0x1, %1, %0|%0, %1, 0x1}"
7646 [(set_attr "type" "sselog")
7647 (set_attr "prefix_extra" "1")
7648 (set_attr "length_immediate" "1")
7649 (set_attr "memory" "none,store")
7650 (set_attr "prefix" "vex")
7651 (set_attr "mode" "OI")])
7652
7653 (define_insn_and_split "vec_extract_lo_v64qi"
7654 [(set (match_operand:V32QI 0 "nonimmediate_operand" "=v,m")
7655 (vec_select:V32QI
7656 (match_operand:V64QI 1 "nonimmediate_operand" "vm,v")
7657 (parallel [(const_int 0) (const_int 1)
7658 (const_int 2) (const_int 3)
7659 (const_int 4) (const_int 5)
7660 (const_int 6) (const_int 7)
7661 (const_int 8) (const_int 9)
7662 (const_int 10) (const_int 11)
7663 (const_int 12) (const_int 13)
7664 (const_int 14) (const_int 15)
7665 (const_int 16) (const_int 17)
7666 (const_int 18) (const_int 19)
7667 (const_int 20) (const_int 21)
7668 (const_int 22) (const_int 23)
7669 (const_int 24) (const_int 25)
7670 (const_int 26) (const_int 27)
7671 (const_int 28) (const_int 29)
7672 (const_int 30) (const_int 31)])))]
7673 "TARGET_AVX512F && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
7674 "#"
7675 "&& reload_completed"
7676 [(set (match_dup 0) (match_dup 1))]
7677 {
7678 if (REG_P (operands[1]))
7679 operands[1] = gen_rtx_REG (V32QImode, REGNO (operands[1]));
7680 else
7681 operands[1] = adjust_address (operands[1], V32QImode, 0);
7682 })
7683
7684 (define_insn "vec_extract_hi_v64qi"
7685 [(set (match_operand:V32QI 0 "nonimmediate_operand" "=v,m")
7686 (vec_select:V32QI
7687 (match_operand:V64QI 1 "register_operand" "v,v")
7688 (parallel [(const_int 32) (const_int 33)
7689 (const_int 34) (const_int 35)
7690 (const_int 36) (const_int 37)
7691 (const_int 38) (const_int 39)
7692 (const_int 40) (const_int 41)
7693 (const_int 42) (const_int 43)
7694 (const_int 44) (const_int 45)
7695 (const_int 46) (const_int 47)
7696 (const_int 48) (const_int 49)
7697 (const_int 50) (const_int 51)
7698 (const_int 52) (const_int 53)
7699 (const_int 54) (const_int 55)
7700 (const_int 56) (const_int 57)
7701 (const_int 58) (const_int 59)
7702 (const_int 60) (const_int 61)
7703 (const_int 62) (const_int 63)])))]
7704 "TARGET_AVX512F"
7705 "vextracti64x4\t{$0x1, %1, %0|%0, %1, 0x1}"
7706 [(set_attr "type" "sselog")
7707 (set_attr "prefix_extra" "1")
7708 (set_attr "length_immediate" "1")
7709 (set_attr "memory" "none,store")
7710 (set_attr "prefix" "evex")
7711 (set_attr "mode" "XI")])
7712
7713 (define_insn_and_split "vec_extract_lo_v32qi"
7714 [(set (match_operand:V16QI 0 "nonimmediate_operand" "=x,m")
7715 (vec_select:V16QI
7716 (match_operand:V32QI 1 "nonimmediate_operand" "xm,x")
7717 (parallel [(const_int 0) (const_int 1)
7718 (const_int 2) (const_int 3)
7719 (const_int 4) (const_int 5)
7720 (const_int 6) (const_int 7)
7721 (const_int 8) (const_int 9)
7722 (const_int 10) (const_int 11)
7723 (const_int 12) (const_int 13)
7724 (const_int 14) (const_int 15)])))]
7725 "TARGET_AVX && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
7726 "#"
7727 "&& reload_completed"
7728 [(set (match_dup 0) (match_dup 1))]
7729 {
7730 if (REG_P (operands[1]))
7731 operands[1] = gen_rtx_REG (V16QImode, REGNO (operands[1]));
7732 else
7733 operands[1] = adjust_address (operands[1], V16QImode, 0);
7734 })
7735
7736 (define_insn "vec_extract_hi_v32qi"
7737 [(set (match_operand:V16QI 0 "nonimmediate_operand" "=x,m")
7738 (vec_select:V16QI
7739 (match_operand:V32QI 1 "register_operand" "x,x")
7740 (parallel [(const_int 16) (const_int 17)
7741 (const_int 18) (const_int 19)
7742 (const_int 20) (const_int 21)
7743 (const_int 22) (const_int 23)
7744 (const_int 24) (const_int 25)
7745 (const_int 26) (const_int 27)
7746 (const_int 28) (const_int 29)
7747 (const_int 30) (const_int 31)])))]
7748 "TARGET_AVX"
7749 "vextract%~128\t{$0x1, %1, %0|%0, %1, 0x1}"
7750 [(set_attr "type" "sselog")
7751 (set_attr "prefix_extra" "1")
7752 (set_attr "length_immediate" "1")
7753 (set_attr "memory" "none,store")
7754 (set_attr "prefix" "vex")
7755 (set_attr "mode" "OI")])
7756
7757 ;; Modes handled by vec_extract patterns.
7758 (define_mode_iterator VEC_EXTRACT_MODE
7759 [(V64QI "TARGET_AVX512BW") (V32QI "TARGET_AVX") V16QI
7760 (V32HI "TARGET_AVX512BW") (V16HI "TARGET_AVX") V8HI
7761 (V16SI "TARGET_AVX512F") (V8SI "TARGET_AVX") V4SI
7762 (V8DI "TARGET_AVX512F") (V4DI "TARGET_AVX") V2DI
7763 (V16SF "TARGET_AVX512F") (V8SF "TARGET_AVX") V4SF
7764 (V8DF "TARGET_AVX512F") (V4DF "TARGET_AVX") V2DF])
7765
7766 (define_expand "vec_extract<mode>"
7767 [(match_operand:<ssescalarmode> 0 "register_operand")
7768 (match_operand:VEC_EXTRACT_MODE 1 "register_operand")
7769 (match_operand 2 "const_int_operand")]
7770 "TARGET_SSE"
7771 {
7772 ix86_expand_vector_extract (false, operands[0], operands[1],
7773 INTVAL (operands[2]));
7774 DONE;
7775 })
7776
7777 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
7778 ;;
7779 ;; Parallel double-precision floating point element swizzling
7780 ;;
7781 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
7782
7783 (define_insn "<mask_codefor>avx512f_unpckhpd512<mask_name>"
7784 [(set (match_operand:V8DF 0 "register_operand" "=v")
7785 (vec_select:V8DF
7786 (vec_concat:V16DF
7787 (match_operand:V8DF 1 "register_operand" "v")
7788 (match_operand:V8DF 2 "nonimmediate_operand" "vm"))
7789 (parallel [(const_int 1) (const_int 9)
7790 (const_int 3) (const_int 11)
7791 (const_int 5) (const_int 13)
7792 (const_int 7) (const_int 15)])))]
7793 "TARGET_AVX512F"
7794 "vunpckhpd\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
7795 [(set_attr "type" "sselog")
7796 (set_attr "prefix" "evex")
7797 (set_attr "mode" "V8DF")])
7798
7799 ;; Recall that the 256-bit unpck insns only shuffle within their lanes.
7800 (define_insn "avx_unpckhpd256<mask_name>"
7801 [(set (match_operand:V4DF 0 "register_operand" "=v")
7802 (vec_select:V4DF
7803 (vec_concat:V8DF
7804 (match_operand:V4DF 1 "register_operand" "v")
7805 (match_operand:V4DF 2 "nonimmediate_operand" "vm"))
7806 (parallel [(const_int 1) (const_int 5)
7807 (const_int 3) (const_int 7)])))]
7808 "TARGET_AVX && <mask_avx512vl_condition>"
7809 "vunpckhpd\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
7810 [(set_attr "type" "sselog")
7811 (set_attr "prefix" "vex")
7812 (set_attr "mode" "V4DF")])
7813
7814 (define_expand "vec_interleave_highv4df"
7815 [(set (match_dup 3)
7816 (vec_select:V4DF
7817 (vec_concat:V8DF
7818 (match_operand:V4DF 1 "register_operand" "x")
7819 (match_operand:V4DF 2 "nonimmediate_operand" "xm"))
7820 (parallel [(const_int 0) (const_int 4)
7821 (const_int 2) (const_int 6)])))
7822 (set (match_dup 4)
7823 (vec_select:V4DF
7824 (vec_concat:V8DF
7825 (match_dup 1)
7826 (match_dup 2))
7827 (parallel [(const_int 1) (const_int 5)
7828 (const_int 3) (const_int 7)])))
7829 (set (match_operand:V4DF 0 "register_operand")
7830 (vec_select:V4DF
7831 (vec_concat:V8DF
7832 (match_dup 3)
7833 (match_dup 4))
7834 (parallel [(const_int 2) (const_int 3)
7835 (const_int 6) (const_int 7)])))]
7836 "TARGET_AVX"
7837 {
7838 operands[3] = gen_reg_rtx (V4DFmode);
7839 operands[4] = gen_reg_rtx (V4DFmode);
7840 })
7841
7842
7843 (define_insn "avx512vl_unpckhpd128_mask"
7844 [(set (match_operand:V2DF 0 "register_operand" "=v")
7845 (vec_merge:V2DF
7846 (vec_select:V2DF
7847 (vec_concat:V4DF
7848 (match_operand:V2DF 1 "register_operand" "v")
7849 (match_operand:V2DF 2 "nonimmediate_operand" "vm"))
7850 (parallel [(const_int 1) (const_int 3)]))
7851 (match_operand:V2DF 3 "vector_move_operand" "0C")
7852 (match_operand:QI 4 "register_operand" "Yk")))]
7853 "TARGET_AVX512VL"
7854 "vunpckhpd\t{%2, %1, %0%{%4%}%N3|%0%{%4%}%N3, %1, %2}"
7855 [(set_attr "type" "sselog")
7856 (set_attr "prefix" "evex")
7857 (set_attr "mode" "V2DF")])
7858
7859 (define_expand "vec_interleave_highv2df"
7860 [(set (match_operand:V2DF 0 "register_operand")
7861 (vec_select:V2DF
7862 (vec_concat:V4DF
7863 (match_operand:V2DF 1 "nonimmediate_operand")
7864 (match_operand:V2DF 2 "nonimmediate_operand"))
7865 (parallel [(const_int 1)
7866 (const_int 3)])))]
7867 "TARGET_SSE2"
7868 {
7869 if (!ix86_vec_interleave_v2df_operator_ok (operands, 1))
7870 operands[2] = force_reg (V2DFmode, operands[2]);
7871 })
7872
7873 (define_insn "*vec_interleave_highv2df"
7874 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,x,x,x,m")
7875 (vec_select:V2DF
7876 (vec_concat:V4DF
7877 (match_operand:V2DF 1 "nonimmediate_operand" " 0,x,o,o,o,x")
7878 (match_operand:V2DF 2 "nonimmediate_operand" " x,x,1,0,x,0"))
7879 (parallel [(const_int 1)
7880 (const_int 3)])))]
7881 "TARGET_SSE2 && ix86_vec_interleave_v2df_operator_ok (operands, 1)"
7882 "@
7883 unpckhpd\t{%2, %0|%0, %2}
7884 vunpckhpd\t{%2, %1, %0|%0, %1, %2}
7885 %vmovddup\t{%H1, %0|%0, %H1}
7886 movlpd\t{%H1, %0|%0, %H1}
7887 vmovlpd\t{%H1, %2, %0|%0, %2, %H1}
7888 %vmovhpd\t{%1, %0|%q0, %1}"
7889 [(set_attr "isa" "noavx,avx,sse3,noavx,avx,*")
7890 (set_attr "type" "sselog,sselog,sselog,ssemov,ssemov,ssemov")
7891 (set_attr "ssememalign" "64")
7892 (set_attr "prefix_data16" "*,*,*,1,*,1")
7893 (set_attr "prefix" "orig,vex,maybe_vex,orig,vex,maybe_vex")
7894 (set_attr "mode" "V2DF,V2DF,DF,V1DF,V1DF,V1DF")])
7895
7896 (define_expand "avx512f_movddup512<mask_name>"
7897 [(set (match_operand:V8DF 0 "register_operand")
7898 (vec_select:V8DF
7899 (vec_concat:V16DF
7900 (match_operand:V8DF 1 "nonimmediate_operand")
7901 (match_dup 1))
7902 (parallel [(const_int 0) (const_int 8)
7903 (const_int 2) (const_int 10)
7904 (const_int 4) (const_int 12)
7905 (const_int 6) (const_int 14)])))]
7906 "TARGET_AVX512F")
7907
7908 (define_expand "avx512f_unpcklpd512<mask_name>"
7909 [(set (match_operand:V8DF 0 "register_operand")
7910 (vec_select:V8DF
7911 (vec_concat:V16DF
7912 (match_operand:V8DF 1 "register_operand")
7913 (match_operand:V8DF 2 "nonimmediate_operand"))
7914 (parallel [(const_int 0) (const_int 8)
7915 (const_int 2) (const_int 10)
7916 (const_int 4) (const_int 12)
7917 (const_int 6) (const_int 14)])))]
7918 "TARGET_AVX512F")
7919
7920 (define_insn "*avx512f_unpcklpd512<mask_name>"
7921 [(set (match_operand:V8DF 0 "register_operand" "=v,v")
7922 (vec_select:V8DF
7923 (vec_concat:V16DF
7924 (match_operand:V8DF 1 "nonimmediate_operand" "vm, v")
7925 (match_operand:V8DF 2 "nonimmediate_operand" "1 ,vm"))
7926 (parallel [(const_int 0) (const_int 8)
7927 (const_int 2) (const_int 10)
7928 (const_int 4) (const_int 12)
7929 (const_int 6) (const_int 14)])))]
7930 "TARGET_AVX512F"
7931 "@
7932 vmovddup\t{%1, %0<mask_operand3>|%0<mask_operand3>, %1}
7933 vunpcklpd\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
7934 [(set_attr "type" "sselog")
7935 (set_attr "prefix" "evex")
7936 (set_attr "mode" "V8DF")])
7937
7938 ;; Recall that the 256-bit unpck insns only shuffle within their lanes.
7939 (define_expand "avx_movddup256<mask_name>"
7940 [(set (match_operand:V4DF 0 "register_operand")
7941 (vec_select:V4DF
7942 (vec_concat:V8DF
7943 (match_operand:V4DF 1 "nonimmediate_operand")
7944 (match_dup 1))
7945 (parallel [(const_int 0) (const_int 4)
7946 (const_int 2) (const_int 6)])))]
7947 "TARGET_AVX && <mask_avx512vl_condition>")
7948
7949 (define_expand "avx_unpcklpd256<mask_name>"
7950 [(set (match_operand:V4DF 0 "register_operand")
7951 (vec_select:V4DF
7952 (vec_concat:V8DF
7953 (match_operand:V4DF 1 "register_operand")
7954 (match_operand:V4DF 2 "nonimmediate_operand"))
7955 (parallel [(const_int 0) (const_int 4)
7956 (const_int 2) (const_int 6)])))]
7957 "TARGET_AVX && <mask_avx512vl_condition>")
7958
7959 (define_insn "*avx_unpcklpd256<mask_name>"
7960 [(set (match_operand:V4DF 0 "register_operand" "=v,v")
7961 (vec_select:V4DF
7962 (vec_concat:V8DF
7963 (match_operand:V4DF 1 "nonimmediate_operand" " v,m")
7964 (match_operand:V4DF 2 "nonimmediate_operand" "vm,1"))
7965 (parallel [(const_int 0) (const_int 4)
7966 (const_int 2) (const_int 6)])))]
7967 "TARGET_AVX && <mask_avx512vl_condition>"
7968 "@
7969 vunpcklpd\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}
7970 vmovddup\t{%1, %0<mask_operand3>|%0<mask_operand3>, %1}"
7971 [(set_attr "type" "sselog")
7972 (set_attr "prefix" "vex")
7973 (set_attr "mode" "V4DF")])
7974
7975 (define_expand "vec_interleave_lowv4df"
7976 [(set (match_dup 3)
7977 (vec_select:V4DF
7978 (vec_concat:V8DF
7979 (match_operand:V4DF 1 "register_operand" "x")
7980 (match_operand:V4DF 2 "nonimmediate_operand" "xm"))
7981 (parallel [(const_int 0) (const_int 4)
7982 (const_int 2) (const_int 6)])))
7983 (set (match_dup 4)
7984 (vec_select:V4DF
7985 (vec_concat:V8DF
7986 (match_dup 1)
7987 (match_dup 2))
7988 (parallel [(const_int 1) (const_int 5)
7989 (const_int 3) (const_int 7)])))
7990 (set (match_operand:V4DF 0 "register_operand")
7991 (vec_select:V4DF
7992 (vec_concat:V8DF
7993 (match_dup 3)
7994 (match_dup 4))
7995 (parallel [(const_int 0) (const_int 1)
7996 (const_int 4) (const_int 5)])))]
7997 "TARGET_AVX"
7998 {
7999 operands[3] = gen_reg_rtx (V4DFmode);
8000 operands[4] = gen_reg_rtx (V4DFmode);
8001 })
8002
8003 (define_insn "avx512vl_unpcklpd128_mask"
8004 [(set (match_operand:V2DF 0 "register_operand" "=v")
8005 (vec_merge:V2DF
8006 (vec_select:V2DF
8007 (vec_concat:V4DF
8008 (match_operand:V2DF 1 "register_operand" "v")
8009 (match_operand:V2DF 2 "nonimmediate_operand" "vm"))
8010 (parallel [(const_int 0) (const_int 2)]))
8011 (match_operand:V2DF 3 "vector_move_operand" "0C")
8012 (match_operand:QI 4 "register_operand" "Yk")))]
8013 "TARGET_AVX512VL"
8014 "vunpcklpd\t{%2, %1, %0%{%4%}%N3|%0%{%4%}%N3, %1, %2}"
8015 [(set_attr "type" "sselog")
8016 (set_attr "prefix" "evex")
8017 (set_attr "mode" "V2DF")])
8018
8019 (define_expand "vec_interleave_lowv2df"
8020 [(set (match_operand:V2DF 0 "register_operand")
8021 (vec_select:V2DF
8022 (vec_concat:V4DF
8023 (match_operand:V2DF 1 "nonimmediate_operand")
8024 (match_operand:V2DF 2 "nonimmediate_operand"))
8025 (parallel [(const_int 0)
8026 (const_int 2)])))]
8027 "TARGET_SSE2"
8028 {
8029 if (!ix86_vec_interleave_v2df_operator_ok (operands, 0))
8030 operands[1] = force_reg (V2DFmode, operands[1]);
8031 })
8032
8033 (define_insn "*vec_interleave_lowv2df"
8034 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,x,x,x,o")
8035 (vec_select:V2DF
8036 (vec_concat:V4DF
8037 (match_operand:V2DF 1 "nonimmediate_operand" " 0,x,m,0,x,0")
8038 (match_operand:V2DF 2 "nonimmediate_operand" " x,x,1,m,m,x"))
8039 (parallel [(const_int 0)
8040 (const_int 2)])))]
8041 "TARGET_SSE2 && ix86_vec_interleave_v2df_operator_ok (operands, 0)"
8042 "@
8043 unpcklpd\t{%2, %0|%0, %2}
8044 vunpcklpd\t{%2, %1, %0|%0, %1, %2}
8045 %vmovddup\t{%1, %0|%0, %q1}
8046 movhpd\t{%2, %0|%0, %q2}
8047 vmovhpd\t{%2, %1, %0|%0, %1, %q2}
8048 %vmovlpd\t{%2, %H0|%H0, %2}"
8049 [(set_attr "isa" "noavx,avx,sse3,noavx,avx,*")
8050 (set_attr "type" "sselog,sselog,sselog,ssemov,ssemov,ssemov")
8051 (set_attr "ssememalign" "64")
8052 (set_attr "prefix_data16" "*,*,*,1,*,1")
8053 (set_attr "prefix" "orig,vex,maybe_vex,orig,vex,maybe_vex")
8054 (set_attr "mode" "V2DF,V2DF,DF,V1DF,V1DF,V1DF")])
8055
8056 (define_split
8057 [(set (match_operand:V2DF 0 "memory_operand")
8058 (vec_select:V2DF
8059 (vec_concat:V4DF
8060 (match_operand:V2DF 1 "register_operand")
8061 (match_dup 1))
8062 (parallel [(const_int 0)
8063 (const_int 2)])))]
8064 "TARGET_SSE3 && reload_completed"
8065 [(const_int 0)]
8066 {
8067 rtx low = gen_rtx_REG (DFmode, REGNO (operands[1]));
8068 emit_move_insn (adjust_address (operands[0], DFmode, 0), low);
8069 emit_move_insn (adjust_address (operands[0], DFmode, 8), low);
8070 DONE;
8071 })
8072
8073 (define_split
8074 [(set (match_operand:V2DF 0 "register_operand")
8075 (vec_select:V2DF
8076 (vec_concat:V4DF
8077 (match_operand:V2DF 1 "memory_operand")
8078 (match_dup 1))
8079 (parallel [(match_operand:SI 2 "const_0_to_1_operand")
8080 (match_operand:SI 3 "const_int_operand")])))]
8081 "TARGET_SSE3 && INTVAL (operands[2]) + 2 == INTVAL (operands[3])"
8082 [(set (match_dup 0) (vec_duplicate:V2DF (match_dup 1)))]
8083 {
8084 operands[1] = adjust_address (operands[1], DFmode, INTVAL (operands[2]) * 8);
8085 })
8086
8087 (define_insn "avx512f_vmscalef<mode><round_name>"
8088 [(set (match_operand:VF_128 0 "register_operand" "=v")
8089 (vec_merge:VF_128
8090 (unspec:VF_128
8091 [(match_operand:VF_128 1 "register_operand" "v")
8092 (match_operand:VF_128 2 "<round_nimm_predicate>" "<round_constraint>")]
8093 UNSPEC_SCALEF)
8094 (match_dup 1)
8095 (const_int 1)))]
8096 "TARGET_AVX512F"
8097 "vscalef<ssescalarmodesuffix>\t{<round_op3>%2, %1, %0|%0, %1, %2<round_op3>}"
8098 [(set_attr "prefix" "evex")
8099 (set_attr "mode" "<ssescalarmode>")])
8100
8101 (define_insn "<avx512>_scalef<mode><mask_name><round_name>"
8102 [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v")
8103 (unspec:VF_AVX512VL
8104 [(match_operand:VF_AVX512VL 1 "register_operand" "v")
8105 (match_operand:VF_AVX512VL 2 "nonimmediate_operand" "<round_constraint>")]
8106 UNSPEC_SCALEF))]
8107 "TARGET_AVX512F"
8108 "vscalef<ssemodesuffix>\t{<round_mask_op3>%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2<round_mask_op3>}"
8109 [(set_attr "prefix" "evex")
8110 (set_attr "mode" "<MODE>")])
8111
8112 (define_expand "<avx512>_vternlog<mode>_maskz"
8113 [(match_operand:VI48_AVX512VL 0 "register_operand")
8114 (match_operand:VI48_AVX512VL 1 "register_operand")
8115 (match_operand:VI48_AVX512VL 2 "register_operand")
8116 (match_operand:VI48_AVX512VL 3 "nonimmediate_operand")
8117 (match_operand:SI 4 "const_0_to_255_operand")
8118 (match_operand:<avx512fmaskmode> 5 "register_operand")]
8119 "TARGET_AVX512F"
8120 {
8121 emit_insn (gen_<avx512>_vternlog<mode>_maskz_1 (
8122 operands[0], operands[1], operands[2], operands[3],
8123 operands[4], CONST0_RTX (<MODE>mode), operands[5]));
8124 DONE;
8125 })
8126
8127 (define_insn "<avx512>_vternlog<mode><sd_maskz_name>"
8128 [(set (match_operand:VI48_AVX512VL 0 "register_operand" "=v")
8129 (unspec:VI48_AVX512VL
8130 [(match_operand:VI48_AVX512VL 1 "register_operand" "0")
8131 (match_operand:VI48_AVX512VL 2 "register_operand" "v")
8132 (match_operand:VI48_AVX512VL 3 "nonimmediate_operand" "vm")
8133 (match_operand:SI 4 "const_0_to_255_operand")]
8134 UNSPEC_VTERNLOG))]
8135 "TARGET_AVX512F"
8136 "vpternlog<ssemodesuffix>\t{%4, %3, %2, %0<sd_mask_op5>|%0<sd_mask_op5>, %2, %3, %4}"
8137 [(set_attr "type" "sselog")
8138 (set_attr "prefix" "evex")
8139 (set_attr "mode" "<sseinsnmode>")])
8140
8141 (define_insn "<avx512>_vternlog<mode>_mask"
8142 [(set (match_operand:VI48_AVX512VL 0 "register_operand" "=v")
8143 (vec_merge:VI48_AVX512VL
8144 (unspec:VI48_AVX512VL
8145 [(match_operand:VI48_AVX512VL 1 "register_operand" "0")
8146 (match_operand:VI48_AVX512VL 2 "register_operand" "v")
8147 (match_operand:VI48_AVX512VL 3 "nonimmediate_operand" "vm")
8148 (match_operand:SI 4 "const_0_to_255_operand")]
8149 UNSPEC_VTERNLOG)
8150 (match_dup 1)
8151 (match_operand:<avx512fmaskmode> 5 "register_operand" "Yk")))]
8152 "TARGET_AVX512F"
8153 "vpternlog<ssemodesuffix>\t{%4, %3, %2, %0%{%5%}|%0%{%5%}, %2, %3, %4}"
8154 [(set_attr "type" "sselog")
8155 (set_attr "prefix" "evex")
8156 (set_attr "mode" "<sseinsnmode>")])
8157
8158 (define_insn "<avx512>_getexp<mode><mask_name><round_saeonly_name>"
8159 [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v")
8160 (unspec:VF_AVX512VL [(match_operand:VF_AVX512VL 1 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")]
8161 UNSPEC_GETEXP))]
8162 "TARGET_AVX512F"
8163 "vgetexp<ssemodesuffix>\t{<round_saeonly_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_saeonly_mask_op2>}";
8164 [(set_attr "prefix" "evex")
8165 (set_attr "mode" "<MODE>")])
8166
8167 (define_insn "avx512f_sgetexp<mode><round_saeonly_name>"
8168 [(set (match_operand:VF_128 0 "register_operand" "=v")
8169 (vec_merge:VF_128
8170 (unspec:VF_128
8171 [(match_operand:VF_128 1 "register_operand" "v")
8172 (match_operand:VF_128 2 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")]
8173 UNSPEC_GETEXP)
8174 (match_dup 1)
8175 (const_int 1)))]
8176 "TARGET_AVX512F"
8177 "vgetexp<ssescalarmodesuffix>\t{<round_saeonly_op3>%2, %1, %0|%0, %1, %2<round_saeonly_op3>}";
8178 [(set_attr "prefix" "evex")
8179 (set_attr "mode" "<ssescalarmode>")])
8180
8181 (define_insn "<mask_codefor><avx512>_align<mode><mask_name>"
8182 [(set (match_operand:VI48_AVX512VL 0 "register_operand" "=v")
8183 (unspec:VI48_AVX512VL [(match_operand:VI48_AVX512VL 1 "register_operand" "v")
8184 (match_operand:VI48_AVX512VL 2 "nonimmediate_operand" "vm")
8185 (match_operand:SI 3 "const_0_to_255_operand")]
8186 UNSPEC_ALIGN))]
8187 "TARGET_AVX512F"
8188 "valign<ssemodesuffix>\t{%3, %2, %1, %0<mask_operand4>|%0<mask_operand4>, %1, %2, %3}";
8189 [(set_attr "prefix" "evex")
8190 (set_attr "mode" "<sseinsnmode>")])
8191
8192 (define_expand "avx512f_shufps512_mask"
8193 [(match_operand:V16SF 0 "register_operand")
8194 (match_operand:V16SF 1 "register_operand")
8195 (match_operand:V16SF 2 "nonimmediate_operand")
8196 (match_operand:SI 3 "const_0_to_255_operand")
8197 (match_operand:V16SF 4 "register_operand")
8198 (match_operand:HI 5 "register_operand")]
8199 "TARGET_AVX512F"
8200 {
8201 int mask = INTVAL (operands[3]);
8202 emit_insn (gen_avx512f_shufps512_1_mask (operands[0], operands[1], operands[2],
8203 GEN_INT ((mask >> 0) & 3),
8204 GEN_INT ((mask >> 2) & 3),
8205 GEN_INT (((mask >> 4) & 3) + 16),
8206 GEN_INT (((mask >> 6) & 3) + 16),
8207 GEN_INT (((mask >> 0) & 3) + 4),
8208 GEN_INT (((mask >> 2) & 3) + 4),
8209 GEN_INT (((mask >> 4) & 3) + 20),
8210 GEN_INT (((mask >> 6) & 3) + 20),
8211 GEN_INT (((mask >> 0) & 3) + 8),
8212 GEN_INT (((mask >> 2) & 3) + 8),
8213 GEN_INT (((mask >> 4) & 3) + 24),
8214 GEN_INT (((mask >> 6) & 3) + 24),
8215 GEN_INT (((mask >> 0) & 3) + 12),
8216 GEN_INT (((mask >> 2) & 3) + 12),
8217 GEN_INT (((mask >> 4) & 3) + 28),
8218 GEN_INT (((mask >> 6) & 3) + 28),
8219 operands[4], operands[5]));
8220 DONE;
8221 })
8222
8223
8224 (define_expand "<avx512>_fixupimm<mode>_maskz<round_saeonly_expand_name>"
8225 [(match_operand:VF_AVX512VL 0 "register_operand")
8226 (match_operand:VF_AVX512VL 1 "register_operand")
8227 (match_operand:VF_AVX512VL 2 "register_operand")
8228 (match_operand:<sseintvecmode> 3 "<round_saeonly_expand_nimm_predicate>")
8229 (match_operand:SI 4 "const_0_to_255_operand")
8230 (match_operand:<avx512fmaskmode> 5 "register_operand")]
8231 "TARGET_AVX512F"
8232 {
8233 emit_insn (gen_<avx512>_fixupimm<mode>_maskz_1<round_saeonly_expand_name> (
8234 operands[0], operands[1], operands[2], operands[3],
8235 operands[4], CONST0_RTX (<MODE>mode), operands[5]
8236 <round_saeonly_expand_operand6>));
8237 DONE;
8238 })
8239
8240 (define_insn "<avx512>_fixupimm<mode><sd_maskz_name><round_saeonly_name>"
8241 [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v")
8242 (unspec:VF_AVX512VL
8243 [(match_operand:VF_AVX512VL 1 "register_operand" "0")
8244 (match_operand:VF_AVX512VL 2 "register_operand" "v")
8245 (match_operand:<sseintvecmode> 3 "nonimmediate_operand" "<round_saeonly_constraint>")
8246 (match_operand:SI 4 "const_0_to_255_operand")]
8247 UNSPEC_FIXUPIMM))]
8248 "TARGET_AVX512F"
8249 "vfixupimm<ssemodesuffix>\t{%4, <round_saeonly_sd_mask_op5>%3, %2, %0<sd_mask_op5>|%0<sd_mask_op5>, %2, %3<round_saeonly_sd_mask_op5>, %4}";
8250 [(set_attr "prefix" "evex")
8251 (set_attr "mode" "<MODE>")])
8252
8253 (define_insn "<avx512>_fixupimm<mode>_mask<round_saeonly_name>"
8254 [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v")
8255 (vec_merge:VF_AVX512VL
8256 (unspec:VF_AVX512VL
8257 [(match_operand:VF_AVX512VL 1 "register_operand" "0")
8258 (match_operand:VF_AVX512VL 2 "register_operand" "v")
8259 (match_operand:<sseintvecmode> 3 "nonimmediate_operand" "<round_saeonly_constraint>")
8260 (match_operand:SI 4 "const_0_to_255_operand")]
8261 UNSPEC_FIXUPIMM)
8262 (match_dup 1)
8263 (match_operand:<avx512fmaskmode> 5 "register_operand" "Yk")))]
8264 "TARGET_AVX512F"
8265 "vfixupimm<ssemodesuffix>\t{%4, <round_saeonly_op6>%3, %2, %0%{%5%}|%0%{%5%}, %2, %3<round_saeonly_op6>, %4}";
8266 [(set_attr "prefix" "evex")
8267 (set_attr "mode" "<MODE>")])
8268
8269 (define_expand "avx512f_sfixupimm<mode>_maskz<round_saeonly_expand_name>"
8270 [(match_operand:VF_128 0 "register_operand")
8271 (match_operand:VF_128 1 "register_operand")
8272 (match_operand:VF_128 2 "register_operand")
8273 (match_operand:<sseintvecmode> 3 "<round_saeonly_expand_nimm_predicate>")
8274 (match_operand:SI 4 "const_0_to_255_operand")
8275 (match_operand:<avx512fmaskmode> 5 "register_operand")]
8276 "TARGET_AVX512F"
8277 {
8278 emit_insn (gen_avx512f_sfixupimm<mode>_maskz_1<round_saeonly_expand_name> (
8279 operands[0], operands[1], operands[2], operands[3],
8280 operands[4], CONST0_RTX (<MODE>mode), operands[5]
8281 <round_saeonly_expand_operand6>));
8282 DONE;
8283 })
8284
8285 (define_insn "avx512f_sfixupimm<mode><sd_maskz_name><round_saeonly_name>"
8286 [(set (match_operand:VF_128 0 "register_operand" "=v")
8287 (vec_merge:VF_128
8288 (unspec:VF_128
8289 [(match_operand:VF_128 1 "register_operand" "0")
8290 (match_operand:VF_128 2 "register_operand" "v")
8291 (match_operand:<sseintvecmode> 3 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")
8292 (match_operand:SI 4 "const_0_to_255_operand")]
8293 UNSPEC_FIXUPIMM)
8294 (match_dup 1)
8295 (const_int 1)))]
8296 "TARGET_AVX512F"
8297 "vfixupimm<ssescalarmodesuffix>\t{%4, <round_saeonly_sd_mask_op5>%3, %2, %0<sd_mask_op5>|%0<sd_mask_op5>, %2, %3<round_saeonly_sd_mask_op5>, %4}";
8298 [(set_attr "prefix" "evex")
8299 (set_attr "mode" "<ssescalarmode>")])
8300
8301 (define_insn "avx512f_sfixupimm<mode>_mask<round_saeonly_name>"
8302 [(set (match_operand:VF_128 0 "register_operand" "=v")
8303 (vec_merge:VF_128
8304 (vec_merge:VF_128
8305 (unspec:VF_128
8306 [(match_operand:VF_128 1 "register_operand" "0")
8307 (match_operand:VF_128 2 "register_operand" "v")
8308 (match_operand:<sseintvecmode> 3 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")
8309 (match_operand:SI 4 "const_0_to_255_operand")]
8310 UNSPEC_FIXUPIMM)
8311 (match_dup 1)
8312 (const_int 1))
8313 (match_dup 1)
8314 (match_operand:<avx512fmaskmode> 5 "register_operand" "Yk")))]
8315 "TARGET_AVX512F"
8316 "vfixupimm<ssescalarmodesuffix>\t{%4, <round_saeonly_op6>%3, %2, %0%{%5%}|%0%{%5%}, %2, %3<round_saeonly_op6>, %4}";
8317 [(set_attr "prefix" "evex")
8318 (set_attr "mode" "<ssescalarmode>")])
8319
8320 (define_insn "<avx512>_rndscale<mode><mask_name><round_saeonly_name>"
8321 [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v")
8322 (unspec:VF_AVX512VL
8323 [(match_operand:VF_AVX512VL 1 "nonimmediate_operand" "<round_saeonly_constraint>")
8324 (match_operand:SI 2 "const_0_to_255_operand")]
8325 UNSPEC_ROUND))]
8326 "TARGET_AVX512F"
8327 "vrndscale<ssemodesuffix>\t{%2, <round_saeonly_mask_op3>%1, %0<mask_operand3>|%0<mask_operand3>, %1<round_saeonly_mask_op3>, %2}"
8328 [(set_attr "length_immediate" "1")
8329 (set_attr "prefix" "evex")
8330 (set_attr "mode" "<MODE>")])
8331
8332 (define_insn "avx512f_rndscale<mode><round_saeonly_name>"
8333 [(set (match_operand:VF_128 0 "register_operand" "=v")
8334 (vec_merge:VF_128
8335 (unspec:VF_128
8336 [(match_operand:VF_128 1 "register_operand" "v")
8337 (match_operand:VF_128 2 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")
8338 (match_operand:SI 3 "const_0_to_255_operand")]
8339 UNSPEC_ROUND)
8340 (match_dup 1)
8341 (const_int 1)))]
8342 "TARGET_AVX512F"
8343 "vrndscale<ssescalarmodesuffix>\t{%3, <round_saeonly_op4>%2, %1, %0|%0, %1, %2<round_saeonly_op4>, %3}"
8344 [(set_attr "length_immediate" "1")
8345 (set_attr "prefix" "evex")
8346 (set_attr "mode" "<MODE>")])
8347
8348 ;; One bit in mask selects 2 elements.
8349 (define_insn "avx512f_shufps512_1<mask_name>"
8350 [(set (match_operand:V16SF 0 "register_operand" "=v")
8351 (vec_select:V16SF
8352 (vec_concat:V32SF
8353 (match_operand:V16SF 1 "register_operand" "v")
8354 (match_operand:V16SF 2 "nonimmediate_operand" "vm"))
8355 (parallel [(match_operand 3 "const_0_to_3_operand")
8356 (match_operand 4 "const_0_to_3_operand")
8357 (match_operand 5 "const_16_to_19_operand")
8358 (match_operand 6 "const_16_to_19_operand")
8359 (match_operand 7 "const_4_to_7_operand")
8360 (match_operand 8 "const_4_to_7_operand")
8361 (match_operand 9 "const_20_to_23_operand")
8362 (match_operand 10 "const_20_to_23_operand")
8363 (match_operand 11 "const_8_to_11_operand")
8364 (match_operand 12 "const_8_to_11_operand")
8365 (match_operand 13 "const_24_to_27_operand")
8366 (match_operand 14 "const_24_to_27_operand")
8367 (match_operand 15 "const_12_to_15_operand")
8368 (match_operand 16 "const_12_to_15_operand")
8369 (match_operand 17 "const_28_to_31_operand")
8370 (match_operand 18 "const_28_to_31_operand")])))]
8371 "TARGET_AVX512F
8372 && (INTVAL (operands[3]) == (INTVAL (operands[7]) - 4)
8373 && INTVAL (operands[4]) == (INTVAL (operands[8]) - 4)
8374 && INTVAL (operands[5]) == (INTVAL (operands[9]) - 4)
8375 && INTVAL (operands[6]) == (INTVAL (operands[10]) - 4)
8376 && INTVAL (operands[3]) == (INTVAL (operands[11]) - 8)
8377 && INTVAL (operands[4]) == (INTVAL (operands[12]) - 8)
8378 && INTVAL (operands[5]) == (INTVAL (operands[13]) - 8)
8379 && INTVAL (operands[6]) == (INTVAL (operands[14]) - 8)
8380 && INTVAL (operands[3]) == (INTVAL (operands[15]) - 12)
8381 && INTVAL (operands[4]) == (INTVAL (operands[16]) - 12)
8382 && INTVAL (operands[5]) == (INTVAL (operands[17]) - 12)
8383 && INTVAL (operands[6]) == (INTVAL (operands[18]) - 12))"
8384 {
8385 int mask;
8386 mask = INTVAL (operands[3]);
8387 mask |= INTVAL (operands[4]) << 2;
8388 mask |= (INTVAL (operands[5]) - 16) << 4;
8389 mask |= (INTVAL (operands[6]) - 16) << 6;
8390 operands[3] = GEN_INT (mask);
8391
8392 return "vshufps\t{%3, %2, %1, %0<mask_operand19>|%0<mask_operand19>, %1, %2, %3}";
8393 }
8394 [(set_attr "type" "sselog")
8395 (set_attr "length_immediate" "1")
8396 (set_attr "prefix" "evex")
8397 (set_attr "mode" "V16SF")])
8398
8399 (define_expand "avx512f_shufpd512_mask"
8400 [(match_operand:V8DF 0 "register_operand")
8401 (match_operand:V8DF 1 "register_operand")
8402 (match_operand:V8DF 2 "nonimmediate_operand")
8403 (match_operand:SI 3 "const_0_to_255_operand")
8404 (match_operand:V8DF 4 "register_operand")
8405 (match_operand:QI 5 "register_operand")]
8406 "TARGET_AVX512F"
8407 {
8408 int mask = INTVAL (operands[3]);
8409 emit_insn (gen_avx512f_shufpd512_1_mask (operands[0], operands[1], operands[2],
8410 GEN_INT (mask & 1),
8411 GEN_INT (mask & 2 ? 9 : 8),
8412 GEN_INT (mask & 4 ? 3 : 2),
8413 GEN_INT (mask & 8 ? 11 : 10),
8414 GEN_INT (mask & 16 ? 5 : 4),
8415 GEN_INT (mask & 32 ? 13 : 12),
8416 GEN_INT (mask & 64 ? 7 : 6),
8417 GEN_INT (mask & 128 ? 15 : 14),
8418 operands[4], operands[5]));
8419 DONE;
8420 })
8421
8422 (define_insn "avx512f_shufpd512_1<mask_name>"
8423 [(set (match_operand:V8DF 0 "register_operand" "=v")
8424 (vec_select:V8DF
8425 (vec_concat:V16DF
8426 (match_operand:V8DF 1 "register_operand" "v")
8427 (match_operand:V8DF 2 "nonimmediate_operand" "vm"))
8428 (parallel [(match_operand 3 "const_0_to_1_operand")
8429 (match_operand 4 "const_8_to_9_operand")
8430 (match_operand 5 "const_2_to_3_operand")
8431 (match_operand 6 "const_10_to_11_operand")
8432 (match_operand 7 "const_4_to_5_operand")
8433 (match_operand 8 "const_12_to_13_operand")
8434 (match_operand 9 "const_6_to_7_operand")
8435 (match_operand 10 "const_14_to_15_operand")])))]
8436 "TARGET_AVX512F"
8437 {
8438 int mask;
8439 mask = INTVAL (operands[3]);
8440 mask |= (INTVAL (operands[4]) - 8) << 1;
8441 mask |= (INTVAL (operands[5]) - 2) << 2;
8442 mask |= (INTVAL (operands[6]) - 10) << 3;
8443 mask |= (INTVAL (operands[7]) - 4) << 4;
8444 mask |= (INTVAL (operands[8]) - 12) << 5;
8445 mask |= (INTVAL (operands[9]) - 6) << 6;
8446 mask |= (INTVAL (operands[10]) - 14) << 7;
8447 operands[3] = GEN_INT (mask);
8448
8449 return "vshufpd\t{%3, %2, %1, %0<mask_operand11>|%0<mask_operand11>, %1, %2, %3}";
8450 }
8451 [(set_attr "type" "sselog")
8452 (set_attr "length_immediate" "1")
8453 (set_attr "prefix" "evex")
8454 (set_attr "mode" "V8DF")])
8455
8456 (define_expand "avx_shufpd256<mask_expand4_name>"
8457 [(match_operand:V4DF 0 "register_operand")
8458 (match_operand:V4DF 1 "register_operand")
8459 (match_operand:V4DF 2 "nonimmediate_operand")
8460 (match_operand:SI 3 "const_int_operand")]
8461 "TARGET_AVX"
8462 {
8463 int mask = INTVAL (operands[3]);
8464 emit_insn (gen_avx_shufpd256_1<mask_expand4_name> (operands[0],
8465 operands[1],
8466 operands[2],
8467 GEN_INT (mask & 1),
8468 GEN_INT (mask & 2 ? 5 : 4),
8469 GEN_INT (mask & 4 ? 3 : 2),
8470 GEN_INT (mask & 8 ? 7 : 6)
8471 <mask_expand4_args>));
8472 DONE;
8473 })
8474
8475 (define_insn "avx_shufpd256_1<mask_name>"
8476 [(set (match_operand:V4DF 0 "register_operand" "=v")
8477 (vec_select:V4DF
8478 (vec_concat:V8DF
8479 (match_operand:V4DF 1 "register_operand" "v")
8480 (match_operand:V4DF 2 "nonimmediate_operand" "vm"))
8481 (parallel [(match_operand 3 "const_0_to_1_operand")
8482 (match_operand 4 "const_4_to_5_operand")
8483 (match_operand 5 "const_2_to_3_operand")
8484 (match_operand 6 "const_6_to_7_operand")])))]
8485 "TARGET_AVX && <mask_avx512vl_condition>"
8486 {
8487 int mask;
8488 mask = INTVAL (operands[3]);
8489 mask |= (INTVAL (operands[4]) - 4) << 1;
8490 mask |= (INTVAL (operands[5]) - 2) << 2;
8491 mask |= (INTVAL (operands[6]) - 6) << 3;
8492 operands[3] = GEN_INT (mask);
8493
8494 return "vshufpd\t{%3, %2, %1, %0<mask_operand7>|%0<mask_operand7>, %1, %2, %3}";
8495 }
8496 [(set_attr "type" "sseshuf")
8497 (set_attr "length_immediate" "1")
8498 (set_attr "prefix" "vex")
8499 (set_attr "mode" "V4DF")])
8500
8501 (define_expand "sse2_shufpd<mask_expand4_name>"
8502 [(match_operand:V2DF 0 "register_operand")
8503 (match_operand:V2DF 1 "register_operand")
8504 (match_operand:V2DF 2 "nonimmediate_operand")
8505 (match_operand:SI 3 "const_int_operand")]
8506 "TARGET_SSE2"
8507 {
8508 int mask = INTVAL (operands[3]);
8509 emit_insn (gen_sse2_shufpd_v2df<mask_expand4_name> (operands[0], operands[1],
8510 operands[2], GEN_INT (mask & 1),
8511 GEN_INT (mask & 2 ? 3 : 2)
8512 <mask_expand4_args>));
8513 DONE;
8514 })
8515
8516 (define_insn "sse2_shufpd_v2df_mask"
8517 [(set (match_operand:V2DF 0 "register_operand" "=v")
8518 (vec_merge:V2DF
8519 (vec_select:V2DF
8520 (vec_concat:V4DF
8521 (match_operand:V2DF 1 "register_operand" "v")
8522 (match_operand:V2DF 2 "nonimmediate_operand" "vm"))
8523 (parallel [(match_operand 3 "const_0_to_1_operand")
8524 (match_operand 4 "const_2_to_3_operand")]))
8525 (match_operand:V2DF 5 "vector_move_operand" "0C")
8526 (match_operand:QI 6 "register_operand" "Yk")))]
8527 "TARGET_AVX512VL"
8528 {
8529 int mask;
8530 mask = INTVAL (operands[3]);
8531 mask |= (INTVAL (operands[4]) - 2) << 1;
8532 operands[3] = GEN_INT (mask);
8533
8534 return "vshufpd\t{%3, %2, %1, %0%{%6%}%N5|%0%{6%}%N5, %1, %2, %3}";
8535 }
8536 [(set_attr "type" "sseshuf")
8537 (set_attr "length_immediate" "1")
8538 (set_attr "prefix" "evex")
8539 (set_attr "mode" "V2DF")])
8540
8541 ;; punpcklqdq and punpckhqdq are shorter than shufpd.
8542 (define_insn "avx2_interleave_highv4di<mask_name>"
8543 [(set (match_operand:V4DI 0 "register_operand" "=v")
8544 (vec_select:V4DI
8545 (vec_concat:V8DI
8546 (match_operand:V4DI 1 "register_operand" "v")
8547 (match_operand:V4DI 2 "nonimmediate_operand" "vm"))
8548 (parallel [(const_int 1)
8549 (const_int 5)
8550 (const_int 3)
8551 (const_int 7)])))]
8552 "TARGET_AVX2 && <mask_avx512vl_condition>"
8553 "vpunpckhqdq\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
8554 [(set_attr "type" "sselog")
8555 (set_attr "prefix" "vex")
8556 (set_attr "mode" "OI")])
8557
8558 (define_insn "<mask_codefor>avx512f_interleave_highv8di<mask_name>"
8559 [(set (match_operand:V8DI 0 "register_operand" "=v")
8560 (vec_select:V8DI
8561 (vec_concat:V16DI
8562 (match_operand:V8DI 1 "register_operand" "v")
8563 (match_operand:V8DI 2 "nonimmediate_operand" "vm"))
8564 (parallel [(const_int 1) (const_int 9)
8565 (const_int 3) (const_int 11)
8566 (const_int 5) (const_int 13)
8567 (const_int 7) (const_int 15)])))]
8568 "TARGET_AVX512F"
8569 "vpunpckhqdq\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
8570 [(set_attr "type" "sselog")
8571 (set_attr "prefix" "evex")
8572 (set_attr "mode" "XI")])
8573
8574 (define_insn "vec_interleave_highv2di<mask_name>"
8575 [(set (match_operand:V2DI 0 "register_operand" "=x,v")
8576 (vec_select:V2DI
8577 (vec_concat:V4DI
8578 (match_operand:V2DI 1 "register_operand" "0,v")
8579 (match_operand:V2DI 2 "nonimmediate_operand" "xm,vm"))
8580 (parallel [(const_int 1)
8581 (const_int 3)])))]
8582 "TARGET_SSE2 && <mask_avx512vl_condition>"
8583 "@
8584 punpckhqdq\t{%2, %0|%0, %2}
8585 vpunpckhqdq\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
8586 [(set_attr "isa" "noavx,avx")
8587 (set_attr "type" "sselog")
8588 (set_attr "prefix_data16" "1,*")
8589 (set_attr "prefix" "orig,<mask_prefix>")
8590 (set_attr "mode" "TI")])
8591
8592 (define_insn "avx2_interleave_lowv4di<mask_name>"
8593 [(set (match_operand:V4DI 0 "register_operand" "=v")
8594 (vec_select:V4DI
8595 (vec_concat:V8DI
8596 (match_operand:V4DI 1 "register_operand" "v")
8597 (match_operand:V4DI 2 "nonimmediate_operand" "vm"))
8598 (parallel [(const_int 0)
8599 (const_int 4)
8600 (const_int 2)
8601 (const_int 6)])))]
8602 "TARGET_AVX2 && <mask_avx512vl_condition>"
8603 "vpunpcklqdq\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
8604 [(set_attr "type" "sselog")
8605 (set_attr "prefix" "vex")
8606 (set_attr "mode" "OI")])
8607
8608 (define_insn "<mask_codefor>avx512f_interleave_lowv8di<mask_name>"
8609 [(set (match_operand:V8DI 0 "register_operand" "=v")
8610 (vec_select:V8DI
8611 (vec_concat:V16DI
8612 (match_operand:V8DI 1 "register_operand" "v")
8613 (match_operand:V8DI 2 "nonimmediate_operand" "vm"))
8614 (parallel [(const_int 0) (const_int 8)
8615 (const_int 2) (const_int 10)
8616 (const_int 4) (const_int 12)
8617 (const_int 6) (const_int 14)])))]
8618 "TARGET_AVX512F"
8619 "vpunpcklqdq\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
8620 [(set_attr "type" "sselog")
8621 (set_attr "prefix" "evex")
8622 (set_attr "mode" "XI")])
8623
8624 (define_insn "vec_interleave_lowv2di<mask_name>"
8625 [(set (match_operand:V2DI 0 "register_operand" "=x,v")
8626 (vec_select:V2DI
8627 (vec_concat:V4DI
8628 (match_operand:V2DI 1 "register_operand" "0,v")
8629 (match_operand:V2DI 2 "nonimmediate_operand" "xm,vm"))
8630 (parallel [(const_int 0)
8631 (const_int 2)])))]
8632 "TARGET_SSE2 && <mask_avx512vl_condition>"
8633 "@
8634 punpcklqdq\t{%2, %0|%0, %2}
8635 vpunpcklqdq\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
8636 [(set_attr "isa" "noavx,avx")
8637 (set_attr "type" "sselog")
8638 (set_attr "prefix_data16" "1,*")
8639 (set_attr "prefix" "orig,vex")
8640 (set_attr "mode" "TI")])
8641
8642 (define_insn "sse2_shufpd_<mode>"
8643 [(set (match_operand:VI8F_128 0 "register_operand" "=x,x")
8644 (vec_select:VI8F_128
8645 (vec_concat:<ssedoublevecmode>
8646 (match_operand:VI8F_128 1 "register_operand" "0,x")
8647 (match_operand:VI8F_128 2 "nonimmediate_operand" "xm,xm"))
8648 (parallel [(match_operand 3 "const_0_to_1_operand")
8649 (match_operand 4 "const_2_to_3_operand")])))]
8650 "TARGET_SSE2"
8651 {
8652 int mask;
8653 mask = INTVAL (operands[3]);
8654 mask |= (INTVAL (operands[4]) - 2) << 1;
8655 operands[3] = GEN_INT (mask);
8656
8657 switch (which_alternative)
8658 {
8659 case 0:
8660 return "shufpd\t{%3, %2, %0|%0, %2, %3}";
8661 case 1:
8662 return "vshufpd\t{%3, %2, %1, %0|%0, %1, %2, %3}";
8663 default:
8664 gcc_unreachable ();
8665 }
8666 }
8667 [(set_attr "isa" "noavx,avx")
8668 (set_attr "type" "sseshuf")
8669 (set_attr "length_immediate" "1")
8670 (set_attr "prefix" "orig,vex")
8671 (set_attr "mode" "V2DF")])
8672
8673 ;; Avoid combining registers from different units in a single alternative,
8674 ;; see comment above inline_secondary_memory_needed function in i386.c
8675 (define_insn "sse2_storehpd"
8676 [(set (match_operand:DF 0 "nonimmediate_operand" "=m,x,x,x,*f,r")
8677 (vec_select:DF
8678 (match_operand:V2DF 1 "nonimmediate_operand" " x,0,x,o,o,o")
8679 (parallel [(const_int 1)])))]
8680 "TARGET_SSE2 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
8681 "@
8682 %vmovhpd\t{%1, %0|%0, %1}
8683 unpckhpd\t%0, %0
8684 vunpckhpd\t{%d1, %0|%0, %d1}
8685 #
8686 #
8687 #"
8688 [(set_attr "isa" "*,noavx,avx,*,*,*")
8689 (set_attr "type" "ssemov,sselog1,sselog1,ssemov,fmov,imov")
8690 (set (attr "prefix_data16")
8691 (if_then_else
8692 (and (eq_attr "alternative" "0")
8693 (not (match_test "TARGET_AVX")))
8694 (const_string "1")
8695 (const_string "*")))
8696 (set_attr "prefix" "maybe_vex,orig,vex,*,*,*")
8697 (set_attr "mode" "V1DF,V1DF,V2DF,DF,DF,DF")])
8698
8699 (define_split
8700 [(set (match_operand:DF 0 "register_operand")
8701 (vec_select:DF
8702 (match_operand:V2DF 1 "memory_operand")
8703 (parallel [(const_int 1)])))]
8704 "TARGET_SSE2 && reload_completed"
8705 [(set (match_dup 0) (match_dup 1))]
8706 "operands[1] = adjust_address (operands[1], DFmode, 8);")
8707
8708 (define_insn "*vec_extractv2df_1_sse"
8709 [(set (match_operand:DF 0 "nonimmediate_operand" "=m,x,x")
8710 (vec_select:DF
8711 (match_operand:V2DF 1 "nonimmediate_operand" "x,x,o")
8712 (parallel [(const_int 1)])))]
8713 "!TARGET_SSE2 && TARGET_SSE
8714 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
8715 "@
8716 movhps\t{%1, %0|%q0, %1}
8717 movhlps\t{%1, %0|%0, %1}
8718 movlps\t{%H1, %0|%0, %H1}"
8719 [(set_attr "type" "ssemov")
8720 (set_attr "ssememalign" "64")
8721 (set_attr "mode" "V2SF,V4SF,V2SF")])
8722
8723 ;; Avoid combining registers from different units in a single alternative,
8724 ;; see comment above inline_secondary_memory_needed function in i386.c
8725 (define_insn "sse2_storelpd"
8726 [(set (match_operand:DF 0 "nonimmediate_operand" "=m,x,x,*f,r")
8727 (vec_select:DF
8728 (match_operand:V2DF 1 "nonimmediate_operand" " x,x,m,m,m")
8729 (parallel [(const_int 0)])))]
8730 "TARGET_SSE2 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
8731 "@
8732 %vmovlpd\t{%1, %0|%0, %1}
8733 #
8734 #
8735 #
8736 #"
8737 [(set_attr "type" "ssemov,ssemov,ssemov,fmov,imov")
8738 (set_attr "prefix_data16" "1,*,*,*,*")
8739 (set_attr "prefix" "maybe_vex")
8740 (set_attr "mode" "V1DF,DF,DF,DF,DF")])
8741
8742 (define_split
8743 [(set (match_operand:DF 0 "register_operand")
8744 (vec_select:DF
8745 (match_operand:V2DF 1 "nonimmediate_operand")
8746 (parallel [(const_int 0)])))]
8747 "TARGET_SSE2 && reload_completed"
8748 [(set (match_dup 0) (match_dup 1))]
8749 {
8750 if (REG_P (operands[1]))
8751 operands[1] = gen_rtx_REG (DFmode, REGNO (operands[1]));
8752 else
8753 operands[1] = adjust_address (operands[1], DFmode, 0);
8754 })
8755
8756 (define_insn "*vec_extractv2df_0_sse"
8757 [(set (match_operand:DF 0 "nonimmediate_operand" "=m,x,x")
8758 (vec_select:DF
8759 (match_operand:V2DF 1 "nonimmediate_operand" "x,x,m")
8760 (parallel [(const_int 0)])))]
8761 "!TARGET_SSE2 && TARGET_SSE
8762 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
8763 "@
8764 movlps\t{%1, %0|%0, %1}
8765 movaps\t{%1, %0|%0, %1}
8766 movlps\t{%1, %0|%0, %q1}"
8767 [(set_attr "type" "ssemov")
8768 (set_attr "mode" "V2SF,V4SF,V2SF")])
8769
8770 (define_expand "sse2_loadhpd_exp"
8771 [(set (match_operand:V2DF 0 "nonimmediate_operand")
8772 (vec_concat:V2DF
8773 (vec_select:DF
8774 (match_operand:V2DF 1 "nonimmediate_operand")
8775 (parallel [(const_int 0)]))
8776 (match_operand:DF 2 "nonimmediate_operand")))]
8777 "TARGET_SSE2"
8778 {
8779 rtx dst = ix86_fixup_binary_operands (UNKNOWN, V2DFmode, operands);
8780
8781 emit_insn (gen_sse2_loadhpd (dst, operands[1], operands[2]));
8782
8783 /* Fix up the destination if needed. */
8784 if (dst != operands[0])
8785 emit_move_insn (operands[0], dst);
8786
8787 DONE;
8788 })
8789
8790 ;; Avoid combining registers from different units in a single alternative,
8791 ;; see comment above inline_secondary_memory_needed function in i386.c
8792 (define_insn "sse2_loadhpd"
8793 [(set (match_operand:V2DF 0 "nonimmediate_operand"
8794 "=x,x,x,x,o,o ,o")
8795 (vec_concat:V2DF
8796 (vec_select:DF
8797 (match_operand:V2DF 1 "nonimmediate_operand"
8798 " 0,x,0,x,0,0 ,0")
8799 (parallel [(const_int 0)]))
8800 (match_operand:DF 2 "nonimmediate_operand"
8801 " m,m,x,x,x,*f,r")))]
8802 "TARGET_SSE2 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
8803 "@
8804 movhpd\t{%2, %0|%0, %2}
8805 vmovhpd\t{%2, %1, %0|%0, %1, %2}
8806 unpcklpd\t{%2, %0|%0, %2}
8807 vunpcklpd\t{%2, %1, %0|%0, %1, %2}
8808 #
8809 #
8810 #"
8811 [(set_attr "isa" "noavx,avx,noavx,avx,*,*,*")
8812 (set_attr "type" "ssemov,ssemov,sselog,sselog,ssemov,fmov,imov")
8813 (set_attr "ssememalign" "64")
8814 (set_attr "prefix_data16" "1,*,*,*,*,*,*")
8815 (set_attr "prefix" "orig,vex,orig,vex,*,*,*")
8816 (set_attr "mode" "V1DF,V1DF,V2DF,V2DF,DF,DF,DF")])
8817
8818 (define_split
8819 [(set (match_operand:V2DF 0 "memory_operand")
8820 (vec_concat:V2DF
8821 (vec_select:DF (match_dup 0) (parallel [(const_int 0)]))
8822 (match_operand:DF 1 "register_operand")))]
8823 "TARGET_SSE2 && reload_completed"
8824 [(set (match_dup 0) (match_dup 1))]
8825 "operands[0] = adjust_address (operands[0], DFmode, 8);")
8826
8827 (define_expand "sse2_loadlpd_exp"
8828 [(set (match_operand:V2DF 0 "nonimmediate_operand")
8829 (vec_concat:V2DF
8830 (match_operand:DF 2 "nonimmediate_operand")
8831 (vec_select:DF
8832 (match_operand:V2DF 1 "nonimmediate_operand")
8833 (parallel [(const_int 1)]))))]
8834 "TARGET_SSE2"
8835 {
8836 rtx dst = ix86_fixup_binary_operands (UNKNOWN, V2DFmode, operands);
8837
8838 emit_insn (gen_sse2_loadlpd (dst, operands[1], operands[2]));
8839
8840 /* Fix up the destination if needed. */
8841 if (dst != operands[0])
8842 emit_move_insn (operands[0], dst);
8843
8844 DONE;
8845 })
8846
8847 ;; Avoid combining registers from different units in a single alternative,
8848 ;; see comment above inline_secondary_memory_needed function in i386.c
8849 (define_insn "sse2_loadlpd"
8850 [(set (match_operand:V2DF 0 "nonimmediate_operand"
8851 "=x,x,x,x,x,x,x,x,m,m ,m")
8852 (vec_concat:V2DF
8853 (match_operand:DF 2 "nonimmediate_operand"
8854 " m,m,m,x,x,0,0,x,x,*f,r")
8855 (vec_select:DF
8856 (match_operand:V2DF 1 "vector_move_operand"
8857 " C,0,x,0,x,x,o,o,0,0 ,0")
8858 (parallel [(const_int 1)]))))]
8859 "TARGET_SSE2 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
8860 "@
8861 %vmovsd\t{%2, %0|%0, %2}
8862 movlpd\t{%2, %0|%0, %2}
8863 vmovlpd\t{%2, %1, %0|%0, %1, %2}
8864 movsd\t{%2, %0|%0, %2}
8865 vmovsd\t{%2, %1, %0|%0, %1, %2}
8866 shufpd\t{$2, %1, %0|%0, %1, 2}
8867 movhpd\t{%H1, %0|%0, %H1}
8868 vmovhpd\t{%H1, %2, %0|%0, %2, %H1}
8869 #
8870 #
8871 #"
8872 [(set_attr "isa" "*,noavx,avx,noavx,avx,noavx,noavx,avx,*,*,*")
8873 (set (attr "type")
8874 (cond [(eq_attr "alternative" "5")
8875 (const_string "sselog")
8876 (eq_attr "alternative" "9")
8877 (const_string "fmov")
8878 (eq_attr "alternative" "10")
8879 (const_string "imov")
8880 ]
8881 (const_string "ssemov")))
8882 (set_attr "ssememalign" "64")
8883 (set_attr "prefix_data16" "*,1,*,*,*,*,1,*,*,*,*")
8884 (set_attr "length_immediate" "*,*,*,*,*,1,*,*,*,*,*")
8885 (set_attr "prefix" "maybe_vex,orig,vex,orig,vex,orig,orig,vex,*,*,*")
8886 (set_attr "mode" "DF,V1DF,V1DF,V1DF,V1DF,V2DF,V1DF,V1DF,DF,DF,DF")])
8887
8888 (define_split
8889 [(set (match_operand:V2DF 0 "memory_operand")
8890 (vec_concat:V2DF
8891 (match_operand:DF 1 "register_operand")
8892 (vec_select:DF (match_dup 0) (parallel [(const_int 1)]))))]
8893 "TARGET_SSE2 && reload_completed"
8894 [(set (match_dup 0) (match_dup 1))]
8895 "operands[0] = adjust_address (operands[0], DFmode, 0);")
8896
8897 (define_insn "sse2_movsd"
8898 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,x,x,m,x,x,x,o")
8899 (vec_merge:V2DF
8900 (match_operand:V2DF 2 "nonimmediate_operand" " x,x,m,m,x,0,0,x,0")
8901 (match_operand:V2DF 1 "nonimmediate_operand" " 0,x,0,x,0,x,o,o,x")
8902 (const_int 1)))]
8903 "TARGET_SSE2"
8904 "@
8905 movsd\t{%2, %0|%0, %2}
8906 vmovsd\t{%2, %1, %0|%0, %1, %2}
8907 movlpd\t{%2, %0|%0, %q2}
8908 vmovlpd\t{%2, %1, %0|%0, %1, %q2}
8909 %vmovlpd\t{%2, %0|%q0, %2}
8910 shufpd\t{$2, %1, %0|%0, %1, 2}
8911 movhps\t{%H1, %0|%0, %H1}
8912 vmovhps\t{%H1, %2, %0|%0, %2, %H1}
8913 %vmovhps\t{%1, %H0|%H0, %1}"
8914 [(set_attr "isa" "noavx,avx,noavx,avx,*,noavx,noavx,avx,*")
8915 (set (attr "type")
8916 (if_then_else
8917 (eq_attr "alternative" "5")
8918 (const_string "sselog")
8919 (const_string "ssemov")))
8920 (set (attr "prefix_data16")
8921 (if_then_else
8922 (and (eq_attr "alternative" "2,4")
8923 (not (match_test "TARGET_AVX")))
8924 (const_string "1")
8925 (const_string "*")))
8926 (set_attr "length_immediate" "*,*,*,*,*,1,*,*,*")
8927 (set_attr "ssememalign" "64")
8928 (set_attr "prefix" "orig,vex,orig,vex,maybe_vex,orig,orig,vex,maybe_vex")
8929 (set_attr "mode" "DF,DF,V1DF,V1DF,V1DF,V2DF,V1DF,V1DF,V1DF")])
8930
8931 (define_insn "vec_dupv2df<mask_name>"
8932 [(set (match_operand:V2DF 0 "register_operand" "=x,x,v")
8933 (vec_duplicate:V2DF
8934 (match_operand:DF 1 "nonimmediate_operand" " 0,xm,vm")))]
8935 "TARGET_SSE2 && <mask_avx512vl_condition>"
8936 "@
8937 unpcklpd\t%0, %0
8938 %vmovddup\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}
8939 vmovddup\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
8940 [(set_attr "isa" "noavx,sse3,avx512vl")
8941 (set_attr "type" "sselog1")
8942 (set_attr "prefix" "orig,maybe_vex,evex")
8943 (set_attr "mode" "V2DF,DF,DF")])
8944
8945 (define_insn "*vec_concatv2df"
8946 [(set (match_operand:V2DF 0 "register_operand" "=x,x,v,x,v,x,x,v,x,x")
8947 (vec_concat:V2DF
8948 (match_operand:DF 1 "nonimmediate_operand" " 0,x,v,m,m,0,x,m,0,0")
8949 (match_operand:DF 2 "vector_move_operand" " x,x,v,1,1,m,m,C,x,m")))]
8950 "TARGET_SSE
8951 && (!(MEM_P (operands[1]) && MEM_P (operands[2]))
8952 || (TARGET_SSE3 && rtx_equal_p (operands[1], operands[2])))"
8953 "@
8954 unpcklpd\t{%2, %0|%0, %2}
8955 vunpcklpd\t{%2, %1, %0|%0, %1, %2}
8956 vunpcklpd\t{%2, %1, %0|%0, %1, %2}
8957 %vmovddup\t{%1, %0|%0, %1}
8958 vmovddup\t{%1, %0|%0, %1}
8959 movhpd\t{%2, %0|%0, %2}
8960 vmovhpd\t{%2, %1, %0|%0, %1, %2}
8961 %vmovsd\t{%1, %0|%0, %1}
8962 movlhps\t{%2, %0|%0, %2}
8963 movhps\t{%2, %0|%0, %2}"
8964 [(set_attr "isa" "sse2_noavx,avx,avx512vl,sse3,avx512vl,sse2_noavx,avx,sse2,noavx,noavx")
8965 (set (attr "type")
8966 (if_then_else
8967 (eq_attr "alternative" "0,1,2,3,4")
8968 (const_string "sselog")
8969 (const_string "ssemov")))
8970 (set (attr "prefix_data16")
8971 (if_then_else (eq_attr "alternative" "5")
8972 (const_string "1")
8973 (const_string "*")))
8974 (set_attr "prefix" "orig,vex,evex,maybe_vex,evex,orig,vex,maybe_vex,orig,orig")
8975 (set_attr "mode" "V2DF,V2DF,V2DF, DF, DF, V1DF,V1DF,DF,V4SF,V2SF")])
8976
8977 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
8978 ;;
8979 ;; Parallel integer down-conversion operations
8980 ;;
8981 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
8982
8983 (define_mode_iterator PMOV_DST_MODE_1 [V16QI V16HI V8SI V8HI])
8984 (define_mode_attr pmov_src_mode
8985 [(V16QI "V16SI") (V16HI "V16SI") (V8SI "V8DI") (V8HI "V8DI")])
8986 (define_mode_attr pmov_src_lower
8987 [(V16QI "v16si") (V16HI "v16si") (V8SI "v8di") (V8HI "v8di")])
8988 (define_mode_attr pmov_suff_1
8989 [(V16QI "db") (V16HI "dw") (V8SI "qd") (V8HI "qw")])
8990
8991 (define_insn "*avx512f_<code><pmov_src_lower><mode>2"
8992 [(set (match_operand:PMOV_DST_MODE_1 0 "nonimmediate_operand" "=v,m")
8993 (any_truncate:PMOV_DST_MODE_1
8994 (match_operand:<pmov_src_mode> 1 "register_operand" "v,v")))]
8995 "TARGET_AVX512F"
8996 "vpmov<trunsuffix><pmov_suff_1>\t{%1, %0|%0, %1}"
8997 [(set_attr "type" "ssemov")
8998 (set_attr "memory" "none,store")
8999 (set_attr "prefix" "evex")
9000 (set_attr "mode" "<sseinsnmode>")])
9001
9002 (define_insn "avx512f_<code><pmov_src_lower><mode>2_mask"
9003 [(set (match_operand:PMOV_DST_MODE_1 0 "nonimmediate_operand" "=v,m")
9004 (vec_merge:PMOV_DST_MODE_1
9005 (any_truncate:PMOV_DST_MODE_1
9006 (match_operand:<pmov_src_mode> 1 "register_operand" "v,v"))
9007 (match_operand:PMOV_DST_MODE_1 2 "vector_move_operand" "0C,0")
9008 (match_operand:<avx512fmaskmode> 3 "register_operand" "Yk,Yk")))]
9009 "TARGET_AVX512F"
9010 "vpmov<trunsuffix><pmov_suff_1>\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}"
9011 [(set_attr "type" "ssemov")
9012 (set_attr "memory" "none,store")
9013 (set_attr "prefix" "evex")
9014 (set_attr "mode" "<sseinsnmode>")])
9015
9016 (define_expand "avx512f_<code><pmov_src_lower><mode>2_mask_store"
9017 [(set (match_operand:PMOV_DST_MODE_1 0 "memory_operand")
9018 (vec_merge:PMOV_DST_MODE_1
9019 (any_truncate:PMOV_DST_MODE_1
9020 (match_operand:<pmov_src_mode> 1 "register_operand"))
9021 (match_dup 0)
9022 (match_operand:<avx512fmaskmode> 2 "register_operand")))]
9023 "TARGET_AVX512F")
9024
9025 (define_insn "avx512bw_<code>v32hiv32qi2"
9026 [(set (match_operand:V32QI 0 "nonimmediate_operand" "=v,m")
9027 (any_truncate:V32QI
9028 (match_operand:V32HI 1 "register_operand" "v,v")))]
9029 "TARGET_AVX512BW"
9030 "vpmov<trunsuffix>wb\t{%1, %0|%0, %1}"
9031 [(set_attr "type" "ssemov")
9032 (set_attr "memory" "none,store")
9033 (set_attr "prefix" "evex")
9034 (set_attr "mode" "XI")])
9035
9036 (define_insn "avx512bw_<code>v32hiv32qi2_mask"
9037 [(set (match_operand:V32QI 0 "nonimmediate_operand" "=v,m")
9038 (vec_merge:V32QI
9039 (any_truncate:V32QI
9040 (match_operand:V32HI 1 "register_operand" "v,v"))
9041 (match_operand:V32QI 2 "vector_move_operand" "0C,0")
9042 (match_operand:SI 3 "register_operand" "Yk,Yk")))]
9043 "TARGET_AVX512BW"
9044 "vpmov<trunsuffix>wb\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}"
9045 [(set_attr "type" "ssemov")
9046 (set_attr "memory" "none,store")
9047 (set_attr "prefix" "evex")
9048 (set_attr "mode" "XI")])
9049
9050 (define_expand "avx512bw_<code>v32hiv32qi2_mask_store"
9051 [(set (match_operand:V32QI 0 "nonimmediate_operand")
9052 (vec_merge:V32QI
9053 (any_truncate:V32QI
9054 (match_operand:V32HI 1 "register_operand"))
9055 (match_dup 0)
9056 (match_operand:SI 2 "register_operand")))]
9057 "TARGET_AVX512BW")
9058
9059 (define_mode_iterator PMOV_DST_MODE_2
9060 [V4SI V8HI (V16QI "TARGET_AVX512BW")])
9061 (define_mode_attr pmov_suff_2
9062 [(V16QI "wb") (V8HI "dw") (V4SI "qd")])
9063
9064 (define_insn "*avx512vl_<code><ssedoublemodelower><mode>2"
9065 [(set (match_operand:PMOV_DST_MODE_2 0 "nonimmediate_operand" "=v,m")
9066 (any_truncate:PMOV_DST_MODE_2
9067 (match_operand:<ssedoublemode> 1 "register_operand" "v,v")))]
9068 "TARGET_AVX512VL"
9069 "vpmov<trunsuffix><pmov_suff_2>\t{%1, %0|%0, %1}"
9070 [(set_attr "type" "ssemov")
9071 (set_attr "memory" "none,store")
9072 (set_attr "prefix" "evex")
9073 (set_attr "mode" "<sseinsnmode>")])
9074
9075 (define_insn "<avx512>_<code><ssedoublemodelower><mode>2_mask"
9076 [(set (match_operand:PMOV_DST_MODE_2 0 "nonimmediate_operand" "=v,m")
9077 (vec_merge:PMOV_DST_MODE_2
9078 (any_truncate:PMOV_DST_MODE_2
9079 (match_operand:<ssedoublemode> 1 "register_operand" "v,v"))
9080 (match_operand:PMOV_DST_MODE_2 2 "vector_move_operand" "0C,0")
9081 (match_operand:<avx512fmaskmode> 3 "register_operand" "Yk,Yk")))]
9082 "TARGET_AVX512VL"
9083 "vpmov<trunsuffix><pmov_suff_2>\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}"
9084 [(set_attr "type" "ssemov")
9085 (set_attr "memory" "none,store")
9086 (set_attr "prefix" "evex")
9087 (set_attr "mode" "<sseinsnmode>")])
9088
9089 (define_expand "<avx512>_<code><ssedoublemodelower><mode>2_mask_store"
9090 [(set (match_operand:PMOV_DST_MODE_2 0 "nonimmediate_operand")
9091 (vec_merge:PMOV_DST_MODE_2
9092 (any_truncate:PMOV_DST_MODE_2
9093 (match_operand:<ssedoublemode> 1 "register_operand"))
9094 (match_dup 0)
9095 (match_operand:<avx512fmaskmode> 2 "register_operand")))]
9096 "TARGET_AVX512VL")
9097
9098 (define_mode_iterator PMOV_SRC_MODE_3 [V4DI V2DI V8SI V4SI (V8HI "TARGET_AVX512BW")])
9099 (define_mode_attr pmov_dst_3
9100 [(V4DI "V4QI") (V2DI "V2QI") (V8SI "V8QI") (V4SI "V4QI") (V8HI "V8QI")])
9101 (define_mode_attr pmov_dst_zeroed_3
9102 [(V4DI "V12QI") (V2DI "V14QI") (V8SI "V8QI") (V4SI "V12QI") (V8HI "V8QI")])
9103 (define_mode_attr pmov_suff_3
9104 [(V4DI "qb") (V2DI "qb") (V8SI "db") (V4SI "db") (V8HI "wb")])
9105
9106 (define_insn "*avx512vl_<code><mode>v<ssescalarnum>qi2"
9107 [(set (match_operand:V16QI 0 "register_operand" "=v")
9108 (vec_concat:V16QI
9109 (any_truncate:<pmov_dst_3>
9110 (match_operand:PMOV_SRC_MODE_3 1 "register_operand" "v"))
9111 (match_operand:<pmov_dst_zeroed_3> 2 "const0_operand")))]
9112 "TARGET_AVX512VL"
9113 "vpmov<trunsuffix><pmov_suff_3>\t{%1, %0|%0, %1}"
9114 [(set_attr "type" "ssemov")
9115 (set_attr "prefix" "evex")
9116 (set_attr "mode" "TI")])
9117
9118 (define_insn "*avx512vl_<code>v2div2qi2_store"
9119 [(set (match_operand:V16QI 0 "memory_operand" "=m")
9120 (vec_concat:V16QI
9121 (any_truncate:V2QI
9122 (match_operand:V2DI 1 "register_operand" "v"))
9123 (vec_select:V14QI
9124 (match_dup 0)
9125 (parallel [(const_int 2) (const_int 3)
9126 (const_int 4) (const_int 5)
9127 (const_int 6) (const_int 7)
9128 (const_int 8) (const_int 9)
9129 (const_int 10) (const_int 11)
9130 (const_int 12) (const_int 13)
9131 (const_int 14) (const_int 15)]))))]
9132 "TARGET_AVX512VL"
9133 "vpmov<trunsuffix>qb\t{%1, %0|%0, %1}"
9134 [(set_attr "type" "ssemov")
9135 (set_attr "memory" "store")
9136 (set_attr "prefix" "evex")
9137 (set_attr "mode" "TI")])
9138
9139 (define_insn "avx512vl_<code>v2div2qi2_mask"
9140 [(set (match_operand:V16QI 0 "register_operand" "=v")
9141 (vec_concat:V16QI
9142 (vec_merge:V2QI
9143 (any_truncate:V2QI
9144 (match_operand:V2DI 1 "register_operand" "v"))
9145 (vec_select:V2QI
9146 (match_operand:V16QI 2 "vector_move_operand" "0C")
9147 (parallel [(const_int 0) (const_int 1)]))
9148 (match_operand:QI 3 "register_operand" "Yk"))
9149 (const_vector:V14QI [(const_int 0) (const_int 0)
9150 (const_int 0) (const_int 0)
9151 (const_int 0) (const_int 0)
9152 (const_int 0) (const_int 0)
9153 (const_int 0) (const_int 0)
9154 (const_int 0) (const_int 0)
9155 (const_int 0) (const_int 0)])))]
9156 "TARGET_AVX512VL"
9157 "vpmov<trunsuffix>qb\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}"
9158 [(set_attr "type" "ssemov")
9159 (set_attr "prefix" "evex")
9160 (set_attr "mode" "TI")])
9161
9162 (define_insn "avx512vl_<code>v2div2qi2_mask_store"
9163 [(set (match_operand:V16QI 0 "memory_operand" "=m")
9164 (vec_concat:V16QI
9165 (vec_merge:V2QI
9166 (any_truncate:V2QI
9167 (match_operand:V2DI 1 "register_operand" "v"))
9168 (vec_select:V2QI
9169 (match_dup 0)
9170 (parallel [(const_int 0) (const_int 1)]))
9171 (match_operand:QI 2 "register_operand" "Yk"))
9172 (vec_select:V14QI
9173 (match_dup 0)
9174 (parallel [(const_int 2) (const_int 3)
9175 (const_int 4) (const_int 5)
9176 (const_int 6) (const_int 7)
9177 (const_int 8) (const_int 9)
9178 (const_int 10) (const_int 11)
9179 (const_int 12) (const_int 13)
9180 (const_int 14) (const_int 15)]))))]
9181 "TARGET_AVX512VL"
9182 "vpmov<trunsuffix>qb\t{%1, %0%{%2%}|%0%{%2%}, %1}"
9183 [(set_attr "type" "ssemov")
9184 (set_attr "memory" "store")
9185 (set_attr "prefix" "evex")
9186 (set_attr "mode" "TI")])
9187
9188 (define_insn "*avx512vl_<code><mode>v4qi2_store"
9189 [(set (match_operand:V16QI 0 "memory_operand" "=m")
9190 (vec_concat:V16QI
9191 (any_truncate:V4QI
9192 (match_operand:VI4_128_8_256 1 "register_operand" "v"))
9193 (vec_select:V12QI
9194 (match_dup 0)
9195 (parallel [(const_int 4) (const_int 5)
9196 (const_int 6) (const_int 7)
9197 (const_int 8) (const_int 9)
9198 (const_int 10) (const_int 11)
9199 (const_int 12) (const_int 13)
9200 (const_int 14) (const_int 15)]))))]
9201 "TARGET_AVX512VL"
9202 "vpmov<trunsuffix><pmov_suff_3>\t{%1, %0|%0, %1}"
9203 [(set_attr "type" "ssemov")
9204 (set_attr "memory" "store")
9205 (set_attr "prefix" "evex")
9206 (set_attr "mode" "TI")])
9207
9208 (define_insn "avx512vl_<code><mode>v4qi2_mask"
9209 [(set (match_operand:V16QI 0 "register_operand" "=v")
9210 (vec_concat:V16QI
9211 (vec_merge:V4QI
9212 (any_truncate:V4QI
9213 (match_operand:VI4_128_8_256 1 "register_operand" "v"))
9214 (vec_select:V4QI
9215 (match_operand:V16QI 2 "vector_move_operand" "0C")
9216 (parallel [(const_int 0) (const_int 1)
9217 (const_int 2) (const_int 3)]))
9218 (match_operand:QI 3 "register_operand" "Yk"))
9219 (const_vector:V12QI [(const_int 0) (const_int 0)
9220 (const_int 0) (const_int 0)
9221 (const_int 0) (const_int 0)
9222 (const_int 0) (const_int 0)
9223 (const_int 0) (const_int 0)
9224 (const_int 0) (const_int 0)])))]
9225 "TARGET_AVX512VL"
9226 "vpmov<trunsuffix><pmov_suff_3>\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}"
9227 [(set_attr "type" "ssemov")
9228 (set_attr "prefix" "evex")
9229 (set_attr "mode" "TI")])
9230
9231 (define_insn "avx512vl_<code><mode>v4qi2_mask_store"
9232 [(set (match_operand:V16QI 0 "memory_operand" "=m")
9233 (vec_concat:V16QI
9234 (vec_merge:V4QI
9235 (any_truncate:V4QI
9236 (match_operand:VI4_128_8_256 1 "register_operand" "v"))
9237 (vec_select:V4QI
9238 (match_dup 0)
9239 (parallel [(const_int 0) (const_int 1)
9240 (const_int 2) (const_int 3)]))
9241 (match_operand:QI 2 "register_operand" "Yk"))
9242 (vec_select:V12QI
9243 (match_dup 0)
9244 (parallel [(const_int 4) (const_int 5)
9245 (const_int 6) (const_int 7)
9246 (const_int 8) (const_int 9)
9247 (const_int 10) (const_int 11)
9248 (const_int 12) (const_int 13)
9249 (const_int 14) (const_int 15)]))))]
9250 "TARGET_AVX512VL"
9251 "vpmov<trunsuffix><pmov_suff_3>\t{%1, %0%{%2%}|%0%{%2%}, %1}"
9252 [(set_attr "type" "ssemov")
9253 (set_attr "memory" "store")
9254 (set_attr "prefix" "evex")
9255 (set_attr "mode" "TI")])
9256
9257 (define_mode_iterator VI2_128_BW_4_256
9258 [(V8HI "TARGET_AVX512BW") V8SI])
9259
9260 (define_insn "*avx512vl_<code><mode>v8qi2_store"
9261 [(set (match_operand:V16QI 0 "memory_operand" "=m")
9262 (vec_concat:V16QI
9263 (any_truncate:V8QI
9264 (match_operand:VI2_128_BW_4_256 1 "register_operand" "v"))
9265 (vec_select:V8QI
9266 (match_dup 0)
9267 (parallel [(const_int 8) (const_int 9)
9268 (const_int 10) (const_int 11)
9269 (const_int 12) (const_int 13)
9270 (const_int 14) (const_int 15)]))))]
9271 "TARGET_AVX512VL"
9272 "vpmov<trunsuffix><pmov_suff_3>\t{%1, %0|%0, %1}"
9273 [(set_attr "type" "ssemov")
9274 (set_attr "memory" "store")
9275 (set_attr "prefix" "evex")
9276 (set_attr "mode" "TI")])
9277
9278 (define_insn "avx512vl_<code><mode>v8qi2_mask"
9279 [(set (match_operand:V16QI 0 "register_operand" "=v")
9280 (vec_concat:V16QI
9281 (vec_merge:V8QI
9282 (any_truncate:V8QI
9283 (match_operand:VI2_128_BW_4_256 1 "register_operand" "v"))
9284 (vec_select:V8QI
9285 (match_operand:V16QI 2 "vector_move_operand" "0C")
9286 (parallel [(const_int 0) (const_int 1)
9287 (const_int 2) (const_int 3)
9288 (const_int 4) (const_int 5)
9289 (const_int 6) (const_int 7)]))
9290 (match_operand:QI 3 "register_operand" "Yk"))
9291 (const_vector:V8QI [(const_int 0) (const_int 0)
9292 (const_int 0) (const_int 0)
9293 (const_int 0) (const_int 0)
9294 (const_int 0) (const_int 0)])))]
9295 "TARGET_AVX512VL"
9296 "vpmov<trunsuffix><pmov_suff_3>\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}"
9297 [(set_attr "type" "ssemov")
9298 (set_attr "prefix" "evex")
9299 (set_attr "mode" "TI")])
9300
9301 (define_insn "avx512vl_<code><mode>v8qi2_mask_store"
9302 [(set (match_operand:V16QI 0 "memory_operand" "=m")
9303 (vec_concat:V16QI
9304 (vec_merge:V8QI
9305 (any_truncate:V8QI
9306 (match_operand:VI2_128_BW_4_256 1 "register_operand" "v"))
9307 (vec_select:V8QI
9308 (match_dup 0)
9309 (parallel [(const_int 0) (const_int 1)
9310 (const_int 2) (const_int 3)
9311 (const_int 4) (const_int 5)
9312 (const_int 6) (const_int 7)]))
9313 (match_operand:QI 2 "register_operand" "Yk"))
9314 (vec_select:V8QI
9315 (match_dup 0)
9316 (parallel [(const_int 8) (const_int 9)
9317 (const_int 10) (const_int 11)
9318 (const_int 12) (const_int 13)
9319 (const_int 14) (const_int 15)]))))]
9320 "TARGET_AVX512VL"
9321 "vpmov<trunsuffix><pmov_suff_3>\t{%1, %0%{%2%}|%0%{%2%}, %1}"
9322 [(set_attr "type" "ssemov")
9323 (set_attr "memory" "store")
9324 (set_attr "prefix" "evex")
9325 (set_attr "mode" "TI")])
9326
9327 (define_mode_iterator PMOV_SRC_MODE_4 [V4DI V2DI V4SI])
9328 (define_mode_attr pmov_dst_4
9329 [(V4DI "V4HI") (V2DI "V2HI") (V4SI "V4HI")])
9330 (define_mode_attr pmov_dst_zeroed_4
9331 [(V4DI "V4HI") (V2DI "V6HI") (V4SI "V4HI")])
9332 (define_mode_attr pmov_suff_4
9333 [(V4DI "qw") (V2DI "qw") (V4SI "dw")])
9334
9335 (define_insn "*avx512vl_<code><mode>v<ssescalarnum>hi2"
9336 [(set (match_operand:V8HI 0 "register_operand" "=v")
9337 (vec_concat:V8HI
9338 (any_truncate:<pmov_dst_4>
9339 (match_operand:PMOV_SRC_MODE_4 1 "register_operand" "v"))
9340 (match_operand:<pmov_dst_zeroed_4> 2 "const0_operand")))]
9341 "TARGET_AVX512VL"
9342 "vpmov<trunsuffix><pmov_suff_4>\t{%1, %0|%0, %1}"
9343 [(set_attr "type" "ssemov")
9344 (set_attr "prefix" "evex")
9345 (set_attr "mode" "TI")])
9346
9347 (define_insn "*avx512vl_<code><mode>v4hi2_store"
9348 [(set (match_operand:V8HI 0 "memory_operand" "=m")
9349 (vec_concat:V8HI
9350 (any_truncate:V4HI
9351 (match_operand:VI4_128_8_256 1 "register_operand" "v"))
9352 (vec_select:V4HI
9353 (match_dup 0)
9354 (parallel [(const_int 4) (const_int 5)
9355 (const_int 6) (const_int 7)]))))]
9356 "TARGET_AVX512VL"
9357 "vpmov<trunsuffix><pmov_suff_4>\t{%1, %0|%0, %1}"
9358 [(set_attr "type" "ssemov")
9359 (set_attr "memory" "store")
9360 (set_attr "prefix" "evex")
9361 (set_attr "mode" "TI")])
9362
9363 (define_insn "avx512vl_<code><mode>v4hi2_mask"
9364 [(set (match_operand:V8HI 0 "register_operand" "=v")
9365 (vec_concat:V8HI
9366 (vec_merge:V4HI
9367 (any_truncate:V4HI
9368 (match_operand:VI4_128_8_256 1 "register_operand" "v"))
9369 (vec_select:V4HI
9370 (match_operand:V8HI 2 "vector_move_operand" "0C")
9371 (parallel [(const_int 0) (const_int 1)
9372 (const_int 2) (const_int 3)]))
9373 (match_operand:QI 3 "register_operand" "Yk"))
9374 (const_vector:V4HI [(const_int 0) (const_int 0)
9375 (const_int 0) (const_int 0)])))]
9376 "TARGET_AVX512VL"
9377 "vpmov<trunsuffix><pmov_suff_4>\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}"
9378 [(set_attr "type" "ssemov")
9379 (set_attr "prefix" "evex")
9380 (set_attr "mode" "TI")])
9381
9382 (define_insn "avx512vl_<code><mode>v4hi2_mask_store"
9383 [(set (match_operand:V8HI 0 "memory_operand" "=m")
9384 (vec_concat:V8HI
9385 (vec_merge:V4HI
9386 (any_truncate:V4HI
9387 (match_operand:VI4_128_8_256 1 "register_operand" "v"))
9388 (vec_select:V4HI
9389 (match_dup 0)
9390 (parallel [(const_int 0) (const_int 1)
9391 (const_int 2) (const_int 3)]))
9392 (match_operand:QI 2 "register_operand" "Yk"))
9393 (vec_select:V4HI
9394 (match_dup 0)
9395 (parallel [(const_int 4) (const_int 5)
9396 (const_int 6) (const_int 7)]))))]
9397 "TARGET_AVX512VL"
9398 "vpmov<trunsuffix><pmov_suff_4>\t{%1, %0%{%2%}|%0%{%2%}, %1}"
9399 [(set_attr "type" "ssemov")
9400 (set_attr "memory" "store")
9401 (set_attr "prefix" "evex")
9402 (set_attr "mode" "TI")])
9403
9404 (define_insn "*avx512vl_<code>v2div2hi2_store"
9405 [(set (match_operand:V8HI 0 "memory_operand" "=m")
9406 (vec_concat:V8HI
9407 (any_truncate:V2HI
9408 (match_operand:V2DI 1 "register_operand" "v"))
9409 (vec_select:V6HI
9410 (match_dup 0)
9411 (parallel [(const_int 2) (const_int 3)
9412 (const_int 4) (const_int 5)
9413 (const_int 6) (const_int 7)]))))]
9414 "TARGET_AVX512VL"
9415 "vpmov<trunsuffix>qw\t{%1, %0|%0, %1}"
9416 [(set_attr "type" "ssemov")
9417 (set_attr "memory" "store")
9418 (set_attr "prefix" "evex")
9419 (set_attr "mode" "TI")])
9420
9421 (define_insn "avx512vl_<code>v2div2hi2_mask"
9422 [(set (match_operand:V8HI 0 "register_operand" "=v")
9423 (vec_concat:V8HI
9424 (vec_merge:V2HI
9425 (any_truncate:V2HI
9426 (match_operand:V2DI 1 "register_operand" "v"))
9427 (vec_select:V2HI
9428 (match_operand:V8HI 2 "vector_move_operand" "0C")
9429 (parallel [(const_int 0) (const_int 1)]))
9430 (match_operand:QI 3 "register_operand" "Yk"))
9431 (const_vector:V6HI [(const_int 0) (const_int 0)
9432 (const_int 0) (const_int 0)
9433 (const_int 0) (const_int 0)])))]
9434 "TARGET_AVX512VL"
9435 "vpmov<trunsuffix>qw\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}"
9436 [(set_attr "type" "ssemov")
9437 (set_attr "prefix" "evex")
9438 (set_attr "mode" "TI")])
9439
9440 (define_insn "avx512vl_<code>v2div2hi2_mask_store"
9441 [(set (match_operand:V8HI 0 "memory_operand" "=m")
9442 (vec_concat:V8HI
9443 (vec_merge:V2HI
9444 (any_truncate:V2HI
9445 (match_operand:V2DI 1 "register_operand" "v"))
9446 (vec_select:V2HI
9447 (match_dup 0)
9448 (parallel [(const_int 0) (const_int 1)]))
9449 (match_operand:QI 2 "register_operand" "Yk"))
9450 (vec_select:V6HI
9451 (match_dup 0)
9452 (parallel [(const_int 2) (const_int 3)
9453 (const_int 4) (const_int 5)
9454 (const_int 6) (const_int 7)]))))]
9455 "TARGET_AVX512VL"
9456 "vpmov<trunsuffix>qw\t{%1, %0%{%2%}|%0%{%2%}, %1}"
9457 [(set_attr "type" "ssemov")
9458 (set_attr "memory" "store")
9459 (set_attr "prefix" "evex")
9460 (set_attr "mode" "TI")])
9461
9462 (define_insn "*avx512vl_<code>v2div2si2"
9463 [(set (match_operand:V4SI 0 "register_operand" "=v")
9464 (vec_concat:V4SI
9465 (any_truncate:V2SI
9466 (match_operand:V2DI 1 "register_operand" "v"))
9467 (match_operand:V2SI 2 "const0_operand")))]
9468 "TARGET_AVX512VL"
9469 "vpmov<trunsuffix>qd\t{%1, %0|%0, %1}"
9470 [(set_attr "type" "ssemov")
9471 (set_attr "prefix" "evex")
9472 (set_attr "mode" "TI")])
9473
9474 (define_insn "*avx512vl_<code>v2div2si2_store"
9475 [(set (match_operand:V4SI 0 "memory_operand" "=m")
9476 (vec_concat:V4SI
9477 (any_truncate:V2SI
9478 (match_operand:V2DI 1 "register_operand" "v"))
9479 (vec_select:V2SI
9480 (match_dup 0)
9481 (parallel [(const_int 2) (const_int 3)]))))]
9482 "TARGET_AVX512VL"
9483 "vpmov<trunsuffix>qd\t{%1, %0|%0, %1}"
9484 [(set_attr "type" "ssemov")
9485 (set_attr "memory" "store")
9486 (set_attr "prefix" "evex")
9487 (set_attr "mode" "TI")])
9488
9489 (define_insn "avx512vl_<code>v2div2si2_mask"
9490 [(set (match_operand:V4SI 0 "register_operand" "=v")
9491 (vec_concat:V4SI
9492 (vec_merge:V2SI
9493 (any_truncate:V2SI
9494 (match_operand:V2DI 1 "register_operand" "v"))
9495 (vec_select:V2SI
9496 (match_operand:V4SI 2 "vector_move_operand" "0C")
9497 (parallel [(const_int 0) (const_int 1)]))
9498 (match_operand:QI 3 "register_operand" "Yk"))
9499 (const_vector:V2SI [(const_int 0) (const_int 0)])))]
9500 "TARGET_AVX512VL"
9501 "vpmov<trunsuffix>qd\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}"
9502 [(set_attr "type" "ssemov")
9503 (set_attr "prefix" "evex")
9504 (set_attr "mode" "TI")])
9505
9506 (define_insn "avx512vl_<code>v2div2si2_mask_store"
9507 [(set (match_operand:V4SI 0 "memory_operand" "=m")
9508 (vec_concat:V4SI
9509 (vec_merge:V2SI
9510 (any_truncate:V2SI
9511 (match_operand:V2DI 1 "register_operand" "v"))
9512 (vec_select:V2SI
9513 (match_dup 0)
9514 (parallel [(const_int 0) (const_int 1)]))
9515 (match_operand:QI 2 "register_operand" "Yk"))
9516 (vec_select:V2SI
9517 (match_dup 0)
9518 (parallel [(const_int 2) (const_int 3)]))))]
9519 "TARGET_AVX512VL"
9520 "vpmov<trunsuffix>qd\t{%1, %0%{%2%}|%0%{%2%}, %1}"
9521 [(set_attr "type" "ssemov")
9522 (set_attr "memory" "store")
9523 (set_attr "prefix" "evex")
9524 (set_attr "mode" "TI")])
9525
9526 (define_insn "*avx512f_<code>v8div16qi2"
9527 [(set (match_operand:V16QI 0 "register_operand" "=v")
9528 (vec_concat:V16QI
9529 (any_truncate:V8QI
9530 (match_operand:V8DI 1 "register_operand" "v"))
9531 (const_vector:V8QI [(const_int 0) (const_int 0)
9532 (const_int 0) (const_int 0)
9533 (const_int 0) (const_int 0)
9534 (const_int 0) (const_int 0)])))]
9535 "TARGET_AVX512F"
9536 "vpmov<trunsuffix>qb\t{%1, %0|%0, %1}"
9537 [(set_attr "type" "ssemov")
9538 (set_attr "prefix" "evex")
9539 (set_attr "mode" "TI")])
9540
9541 (define_insn "*avx512f_<code>v8div16qi2_store"
9542 [(set (match_operand:V16QI 0 "memory_operand" "=m")
9543 (vec_concat:V16QI
9544 (any_truncate:V8QI
9545 (match_operand:V8DI 1 "register_operand" "v"))
9546 (vec_select:V8QI
9547 (match_dup 0)
9548 (parallel [(const_int 8) (const_int 9)
9549 (const_int 10) (const_int 11)
9550 (const_int 12) (const_int 13)
9551 (const_int 14) (const_int 15)]))))]
9552 "TARGET_AVX512F"
9553 "vpmov<trunsuffix>qb\t{%1, %0|%0, %1}"
9554 [(set_attr "type" "ssemov")
9555 (set_attr "memory" "store")
9556 (set_attr "prefix" "evex")
9557 (set_attr "mode" "TI")])
9558
9559 (define_insn "avx512f_<code>v8div16qi2_mask"
9560 [(set (match_operand:V16QI 0 "register_operand" "=v")
9561 (vec_concat:V16QI
9562 (vec_merge:V8QI
9563 (any_truncate:V8QI
9564 (match_operand:V8DI 1 "register_operand" "v"))
9565 (vec_select:V8QI
9566 (match_operand:V16QI 2 "vector_move_operand" "0C")
9567 (parallel [(const_int 0) (const_int 1)
9568 (const_int 2) (const_int 3)
9569 (const_int 4) (const_int 5)
9570 (const_int 6) (const_int 7)]))
9571 (match_operand:QI 3 "register_operand" "Yk"))
9572 (const_vector:V8QI [(const_int 0) (const_int 0)
9573 (const_int 0) (const_int 0)
9574 (const_int 0) (const_int 0)
9575 (const_int 0) (const_int 0)])))]
9576 "TARGET_AVX512F"
9577 "vpmov<trunsuffix>qb\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}"
9578 [(set_attr "type" "ssemov")
9579 (set_attr "prefix" "evex")
9580 (set_attr "mode" "TI")])
9581
9582 (define_insn "avx512f_<code>v8div16qi2_mask_store"
9583 [(set (match_operand:V16QI 0 "memory_operand" "=m")
9584 (vec_concat:V16QI
9585 (vec_merge:V8QI
9586 (any_truncate:V8QI
9587 (match_operand:V8DI 1 "register_operand" "v"))
9588 (vec_select:V8QI
9589 (match_dup 0)
9590 (parallel [(const_int 0) (const_int 1)
9591 (const_int 2) (const_int 3)
9592 (const_int 4) (const_int 5)
9593 (const_int 6) (const_int 7)]))
9594 (match_operand:QI 2 "register_operand" "Yk"))
9595 (vec_select:V8QI
9596 (match_dup 0)
9597 (parallel [(const_int 8) (const_int 9)
9598 (const_int 10) (const_int 11)
9599 (const_int 12) (const_int 13)
9600 (const_int 14) (const_int 15)]))))]
9601 "TARGET_AVX512F"
9602 "vpmov<trunsuffix>qb\t{%1, %0%{%2%}|%0%{%2%}, %1}"
9603 [(set_attr "type" "ssemov")
9604 (set_attr "memory" "store")
9605 (set_attr "prefix" "evex")
9606 (set_attr "mode" "TI")])
9607
9608 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
9609 ;;
9610 ;; Parallel integral arithmetic
9611 ;;
9612 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
9613
9614 (define_expand "neg<mode>2"
9615 [(set (match_operand:VI_AVX2 0 "register_operand")
9616 (minus:VI_AVX2
9617 (match_dup 2)
9618 (match_operand:VI_AVX2 1 "nonimmediate_operand")))]
9619 "TARGET_SSE2"
9620 "operands[2] = force_reg (<MODE>mode, CONST0_RTX (<MODE>mode));")
9621
9622 (define_expand "<plusminus_insn><mode>3"
9623 [(set (match_operand:VI_AVX2 0 "register_operand")
9624 (plusminus:VI_AVX2
9625 (match_operand:VI_AVX2 1 "nonimmediate_operand")
9626 (match_operand:VI_AVX2 2 "nonimmediate_operand")))]
9627 "TARGET_SSE2"
9628 "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
9629
9630 (define_expand "<plusminus_insn><mode>3_mask"
9631 [(set (match_operand:VI48_AVX512VL 0 "register_operand")
9632 (vec_merge:VI48_AVX512VL
9633 (plusminus:VI48_AVX512VL
9634 (match_operand:VI48_AVX512VL 1 "nonimmediate_operand")
9635 (match_operand:VI48_AVX512VL 2 "nonimmediate_operand"))
9636 (match_operand:VI48_AVX512VL 3 "vector_move_operand")
9637 (match_operand:<avx512fmaskmode> 4 "register_operand")))]
9638 "TARGET_AVX512F"
9639 "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
9640
9641 (define_expand "<plusminus_insn><mode>3_mask"
9642 [(set (match_operand:VI12_AVX512VL 0 "register_operand")
9643 (vec_merge:VI12_AVX512VL
9644 (plusminus:VI12_AVX512VL
9645 (match_operand:VI12_AVX512VL 1 "nonimmediate_operand")
9646 (match_operand:VI12_AVX512VL 2 "nonimmediate_operand"))
9647 (match_operand:VI12_AVX512VL 3 "vector_move_operand")
9648 (match_operand:<avx512fmaskmode> 4 "register_operand")))]
9649 "TARGET_AVX512BW"
9650 "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
9651
9652 (define_insn "*<plusminus_insn><mode>3"
9653 [(set (match_operand:VI_AVX2 0 "register_operand" "=x,v")
9654 (plusminus:VI_AVX2
9655 (match_operand:VI_AVX2 1 "nonimmediate_operand" "<comm>0,v")
9656 (match_operand:VI_AVX2 2 "nonimmediate_operand" "xm,vm")))]
9657 "TARGET_SSE2
9658 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
9659 "@
9660 p<plusminus_mnemonic><ssemodesuffix>\t{%2, %0|%0, %2}
9661 vp<plusminus_mnemonic><ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
9662 [(set_attr "isa" "noavx,avx")
9663 (set_attr "type" "sseiadd")
9664 (set_attr "prefix_data16" "1,*")
9665 (set_attr "prefix" "<mask_prefix3>")
9666 (set_attr "mode" "<sseinsnmode>")])
9667
9668 (define_insn "*<plusminus_insn><mode>3_mask"
9669 [(set (match_operand:VI48_AVX512VL 0 "register_operand" "=v")
9670 (vec_merge:VI48_AVX512VL
9671 (plusminus:VI48_AVX512VL
9672 (match_operand:VI48_AVX512VL 1 "nonimmediate_operand" "<comm>v")
9673 (match_operand:VI48_AVX512VL 2 "nonimmediate_operand" "vm"))
9674 (match_operand:VI48_AVX512VL 3 "vector_move_operand" "0C")
9675 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")))]
9676 "TARGET_AVX512F
9677 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
9678 "vp<plusminus_mnemonic><ssemodesuffix>\t{%2, %1, %0%{%4%}%N3|%0%{%4%}%N3, %1, %2}"
9679 [(set_attr "type" "sseiadd")
9680 (set_attr "prefix" "evex")
9681 (set_attr "mode" "<sseinsnmode>")])
9682
9683 (define_insn "*<plusminus_insn><mode>3_mask"
9684 [(set (match_operand:VI12_AVX512VL 0 "register_operand" "=v")
9685 (vec_merge:VI12_AVX512VL
9686 (plusminus:VI12_AVX512VL
9687 (match_operand:VI12_AVX512VL 1 "nonimmediate_operand" "<comm>v")
9688 (match_operand:VI12_AVX512VL 2 "nonimmediate_operand" "vm"))
9689 (match_operand:VI12_AVX512VL 3 "vector_move_operand" "0C")
9690 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")))]
9691 "TARGET_AVX512BW && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
9692 "vp<plusminus_mnemonic><ssemodesuffix>\t{%2, %1, %0%{%4%}%N3|%0%{%4%}%N3, %1, %2}"
9693 [(set_attr "type" "sseiadd")
9694 (set_attr "prefix" "evex")
9695 (set_attr "mode" "<sseinsnmode>")])
9696
9697 (define_expand "<sse2_avx2>_<plusminus_insn><mode>3<mask_name>"
9698 [(set (match_operand:VI12_AVX2 0 "register_operand")
9699 (sat_plusminus:VI12_AVX2
9700 (match_operand:VI12_AVX2 1 "nonimmediate_operand")
9701 (match_operand:VI12_AVX2 2 "nonimmediate_operand")))]
9702 "TARGET_SSE2 && <mask_mode512bit_condition> && <mask_avx512bw_condition>"
9703 "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
9704
9705 (define_insn "*<sse2_avx2>_<plusminus_insn><mode>3<mask_name>"
9706 [(set (match_operand:VI12_AVX2 0 "register_operand" "=x,v")
9707 (sat_plusminus:VI12_AVX2
9708 (match_operand:VI12_AVX2 1 "nonimmediate_operand" "<comm>0,v")
9709 (match_operand:VI12_AVX2 2 "nonimmediate_operand" "xm,vm")))]
9710 "TARGET_SSE2 && <mask_mode512bit_condition> && <mask_avx512bw_condition>
9711 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
9712 "@
9713 p<plusminus_mnemonic><ssemodesuffix>\t{%2, %0|%0, %2}
9714 vp<plusminus_mnemonic><ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
9715 [(set_attr "isa" "noavx,avx")
9716 (set_attr "type" "sseiadd")
9717 (set_attr "prefix_data16" "1,*")
9718 (set_attr "prefix" "orig,maybe_evex")
9719 (set_attr "mode" "TI")])
9720
9721 (define_expand "mul<mode>3<mask_name>"
9722 [(set (match_operand:VI1_AVX512 0 "register_operand")
9723 (mult:VI1_AVX512 (match_operand:VI1_AVX512 1 "register_operand")
9724 (match_operand:VI1_AVX512 2 "register_operand")))]
9725 "TARGET_SSE2 && <mask_mode512bit_condition> && <mask_avx512bw_condition>"
9726 {
9727 ix86_expand_vecop_qihi (MULT, operands[0], operands[1], operands[2]);
9728 DONE;
9729 })
9730
9731 (define_expand "mul<mode>3<mask_name>"
9732 [(set (match_operand:VI2_AVX2 0 "register_operand")
9733 (mult:VI2_AVX2 (match_operand:VI2_AVX2 1 "nonimmediate_operand")
9734 (match_operand:VI2_AVX2 2 "nonimmediate_operand")))]
9735 "TARGET_SSE2 && <mask_mode512bit_condition> && <mask_avx512bw_condition>"
9736 "ix86_fixup_binary_operands_no_copy (MULT, <MODE>mode, operands);")
9737
9738 (define_insn "*mul<mode>3<mask_name>"
9739 [(set (match_operand:VI2_AVX2 0 "register_operand" "=x,v")
9740 (mult:VI2_AVX2 (match_operand:VI2_AVX2 1 "nonimmediate_operand" "%0,v")
9741 (match_operand:VI2_AVX2 2 "nonimmediate_operand" "xm,vm")))]
9742 "TARGET_SSE2
9743 && ix86_binary_operator_ok (MULT, <MODE>mode, operands)
9744 && <mask_mode512bit_condition> && <mask_avx512bw_condition>"
9745 "@
9746 pmullw\t{%2, %0|%0, %2}
9747 vpmullw\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
9748 [(set_attr "isa" "noavx,avx")
9749 (set_attr "type" "sseimul")
9750 (set_attr "prefix_data16" "1,*")
9751 (set_attr "prefix" "orig,vex")
9752 (set_attr "mode" "<sseinsnmode>")])
9753
9754 (define_expand "<s>mul<mode>3_highpart<mask_name>"
9755 [(set (match_operand:VI2_AVX2 0 "register_operand")
9756 (truncate:VI2_AVX2
9757 (lshiftrt:<ssedoublemode>
9758 (mult:<ssedoublemode>
9759 (any_extend:<ssedoublemode>
9760 (match_operand:VI2_AVX2 1 "nonimmediate_operand"))
9761 (any_extend:<ssedoublemode>
9762 (match_operand:VI2_AVX2 2 "nonimmediate_operand")))
9763 (const_int 16))))]
9764 "TARGET_SSE2
9765 && <mask_mode512bit_condition> && <mask_avx512bw_condition>"
9766 "ix86_fixup_binary_operands_no_copy (MULT, <MODE>mode, operands);")
9767
9768 (define_insn "*<s>mul<mode>3_highpart<mask_name>"
9769 [(set (match_operand:VI2_AVX2 0 "register_operand" "=x,v")
9770 (truncate:VI2_AVX2
9771 (lshiftrt:<ssedoublemode>
9772 (mult:<ssedoublemode>
9773 (any_extend:<ssedoublemode>
9774 (match_operand:VI2_AVX2 1 "nonimmediate_operand" "%0,v"))
9775 (any_extend:<ssedoublemode>
9776 (match_operand:VI2_AVX2 2 "nonimmediate_operand" "xm,vm")))
9777 (const_int 16))))]
9778 "TARGET_SSE2
9779 && ix86_binary_operator_ok (MULT, <MODE>mode, operands)
9780 && <mask_mode512bit_condition> && <mask_avx512bw_condition>"
9781 "@
9782 pmulh<u>w\t{%2, %0|%0, %2}
9783 vpmulh<u>w\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
9784 [(set_attr "isa" "noavx,avx")
9785 (set_attr "type" "sseimul")
9786 (set_attr "prefix_data16" "1,*")
9787 (set_attr "prefix" "orig,vex")
9788 (set_attr "mode" "<sseinsnmode>")])
9789
9790 (define_expand "vec_widen_umult_even_v16si<mask_name>"
9791 [(set (match_operand:V8DI 0 "register_operand")
9792 (mult:V8DI
9793 (zero_extend:V8DI
9794 (vec_select:V8SI
9795 (match_operand:V16SI 1 "nonimmediate_operand")
9796 (parallel [(const_int 0) (const_int 2)
9797 (const_int 4) (const_int 6)
9798 (const_int 8) (const_int 10)
9799 (const_int 12) (const_int 14)])))
9800 (zero_extend:V8DI
9801 (vec_select:V8SI
9802 (match_operand:V16SI 2 "nonimmediate_operand")
9803 (parallel [(const_int 0) (const_int 2)
9804 (const_int 4) (const_int 6)
9805 (const_int 8) (const_int 10)
9806 (const_int 12) (const_int 14)])))))]
9807 "TARGET_AVX512F"
9808 "ix86_fixup_binary_operands_no_copy (MULT, V16SImode, operands);")
9809
9810 (define_insn "*vec_widen_umult_even_v16si<mask_name>"
9811 [(set (match_operand:V8DI 0 "register_operand" "=v")
9812 (mult:V8DI
9813 (zero_extend:V8DI
9814 (vec_select:V8SI
9815 (match_operand:V16SI 1 "nonimmediate_operand" "%v")
9816 (parallel [(const_int 0) (const_int 2)
9817 (const_int 4) (const_int 6)
9818 (const_int 8) (const_int 10)
9819 (const_int 12) (const_int 14)])))
9820 (zero_extend:V8DI
9821 (vec_select:V8SI
9822 (match_operand:V16SI 2 "nonimmediate_operand" "vm")
9823 (parallel [(const_int 0) (const_int 2)
9824 (const_int 4) (const_int 6)
9825 (const_int 8) (const_int 10)
9826 (const_int 12) (const_int 14)])))))]
9827 "TARGET_AVX512F && ix86_binary_operator_ok (MULT, V16SImode, operands)"
9828 "vpmuludq\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
9829 [(set_attr "isa" "avx512f")
9830 (set_attr "type" "sseimul")
9831 (set_attr "prefix_extra" "1")
9832 (set_attr "prefix" "evex")
9833 (set_attr "mode" "XI")])
9834
9835 (define_expand "vec_widen_umult_even_v8si<mask_name>"
9836 [(set (match_operand:V4DI 0 "register_operand")
9837 (mult:V4DI
9838 (zero_extend:V4DI
9839 (vec_select:V4SI
9840 (match_operand:V8SI 1 "nonimmediate_operand")
9841 (parallel [(const_int 0) (const_int 2)
9842 (const_int 4) (const_int 6)])))
9843 (zero_extend:V4DI
9844 (vec_select:V4SI
9845 (match_operand:V8SI 2 "nonimmediate_operand")
9846 (parallel [(const_int 0) (const_int 2)
9847 (const_int 4) (const_int 6)])))))]
9848 "TARGET_AVX2 && <mask_avx512vl_condition>"
9849 "ix86_fixup_binary_operands_no_copy (MULT, V8SImode, operands);")
9850
9851 (define_insn "*vec_widen_umult_even_v8si<mask_name>"
9852 [(set (match_operand:V4DI 0 "register_operand" "=v")
9853 (mult:V4DI
9854 (zero_extend:V4DI
9855 (vec_select:V4SI
9856 (match_operand:V8SI 1 "nonimmediate_operand" "%v")
9857 (parallel [(const_int 0) (const_int 2)
9858 (const_int 4) (const_int 6)])))
9859 (zero_extend:V4DI
9860 (vec_select:V4SI
9861 (match_operand:V8SI 2 "nonimmediate_operand" "vm")
9862 (parallel [(const_int 0) (const_int 2)
9863 (const_int 4) (const_int 6)])))))]
9864 "TARGET_AVX2 && <mask_avx512vl_condition>
9865 && ix86_binary_operator_ok (MULT, V8SImode, operands)"
9866 "vpmuludq\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
9867 [(set_attr "type" "sseimul")
9868 (set_attr "prefix" "maybe_evex")
9869 (set_attr "mode" "OI")])
9870
9871 (define_expand "vec_widen_umult_even_v4si<mask_name>"
9872 [(set (match_operand:V2DI 0 "register_operand")
9873 (mult:V2DI
9874 (zero_extend:V2DI
9875 (vec_select:V2SI
9876 (match_operand:V4SI 1 "nonimmediate_operand")
9877 (parallel [(const_int 0) (const_int 2)])))
9878 (zero_extend:V2DI
9879 (vec_select:V2SI
9880 (match_operand:V4SI 2 "nonimmediate_operand")
9881 (parallel [(const_int 0) (const_int 2)])))))]
9882 "TARGET_SSE2 && <mask_avx512vl_condition>"
9883 "ix86_fixup_binary_operands_no_copy (MULT, V4SImode, operands);")
9884
9885 (define_insn "*vec_widen_umult_even_v4si<mask_name>"
9886 [(set (match_operand:V2DI 0 "register_operand" "=x,v")
9887 (mult:V2DI
9888 (zero_extend:V2DI
9889 (vec_select:V2SI
9890 (match_operand:V4SI 1 "nonimmediate_operand" "%0,v")
9891 (parallel [(const_int 0) (const_int 2)])))
9892 (zero_extend:V2DI
9893 (vec_select:V2SI
9894 (match_operand:V4SI 2 "nonimmediate_operand" "xm,vm")
9895 (parallel [(const_int 0) (const_int 2)])))))]
9896 "TARGET_SSE2 && <mask_avx512vl_condition>
9897 && ix86_binary_operator_ok (MULT, V4SImode, operands)"
9898 "@
9899 pmuludq\t{%2, %0|%0, %2}
9900 vpmuludq\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
9901 [(set_attr "isa" "noavx,avx")
9902 (set_attr "type" "sseimul")
9903 (set_attr "prefix_data16" "1,*")
9904 (set_attr "prefix" "orig,maybe_evex")
9905 (set_attr "mode" "TI")])
9906
9907 (define_expand "vec_widen_smult_even_v16si<mask_name>"
9908 [(set (match_operand:V8DI 0 "register_operand")
9909 (mult:V8DI
9910 (sign_extend:V8DI
9911 (vec_select:V8SI
9912 (match_operand:V16SI 1 "nonimmediate_operand")
9913 (parallel [(const_int 0) (const_int 2)
9914 (const_int 4) (const_int 6)
9915 (const_int 8) (const_int 10)
9916 (const_int 12) (const_int 14)])))
9917 (sign_extend:V8DI
9918 (vec_select:V8SI
9919 (match_operand:V16SI 2 "nonimmediate_operand")
9920 (parallel [(const_int 0) (const_int 2)
9921 (const_int 4) (const_int 6)
9922 (const_int 8) (const_int 10)
9923 (const_int 12) (const_int 14)])))))]
9924 "TARGET_AVX512F"
9925 "ix86_fixup_binary_operands_no_copy (MULT, V16SImode, operands);")
9926
9927 (define_insn "*vec_widen_smult_even_v16si<mask_name>"
9928 [(set (match_operand:V8DI 0 "register_operand" "=v")
9929 (mult:V8DI
9930 (sign_extend:V8DI
9931 (vec_select:V8SI
9932 (match_operand:V16SI 1 "nonimmediate_operand" "%v")
9933 (parallel [(const_int 0) (const_int 2)
9934 (const_int 4) (const_int 6)
9935 (const_int 8) (const_int 10)
9936 (const_int 12) (const_int 14)])))
9937 (sign_extend:V8DI
9938 (vec_select:V8SI
9939 (match_operand:V16SI 2 "nonimmediate_operand" "vm")
9940 (parallel [(const_int 0) (const_int 2)
9941 (const_int 4) (const_int 6)
9942 (const_int 8) (const_int 10)
9943 (const_int 12) (const_int 14)])))))]
9944 "TARGET_AVX512F && ix86_binary_operator_ok (MULT, V16SImode, operands)"
9945 "vpmuldq\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
9946 [(set_attr "isa" "avx512f")
9947 (set_attr "type" "sseimul")
9948 (set_attr "prefix_extra" "1")
9949 (set_attr "prefix" "evex")
9950 (set_attr "mode" "XI")])
9951
9952 (define_expand "vec_widen_smult_even_v8si<mask_name>"
9953 [(set (match_operand:V4DI 0 "register_operand")
9954 (mult:V4DI
9955 (sign_extend:V4DI
9956 (vec_select:V4SI
9957 (match_operand:V8SI 1 "nonimmediate_operand")
9958 (parallel [(const_int 0) (const_int 2)
9959 (const_int 4) (const_int 6)])))
9960 (sign_extend:V4DI
9961 (vec_select:V4SI
9962 (match_operand:V8SI 2 "nonimmediate_operand")
9963 (parallel [(const_int 0) (const_int 2)
9964 (const_int 4) (const_int 6)])))))]
9965 "TARGET_AVX2 && <mask_avx512vl_condition>"
9966 "ix86_fixup_binary_operands_no_copy (MULT, V8SImode, operands);")
9967
9968 (define_insn "*vec_widen_smult_even_v8si<mask_name>"
9969 [(set (match_operand:V4DI 0 "register_operand" "=v")
9970 (mult:V4DI
9971 (sign_extend:V4DI
9972 (vec_select:V4SI
9973 (match_operand:V8SI 1 "nonimmediate_operand" "%v")
9974 (parallel [(const_int 0) (const_int 2)
9975 (const_int 4) (const_int 6)])))
9976 (sign_extend:V4DI
9977 (vec_select:V4SI
9978 (match_operand:V8SI 2 "nonimmediate_operand" "vm")
9979 (parallel [(const_int 0) (const_int 2)
9980 (const_int 4) (const_int 6)])))))]
9981 "TARGET_AVX2
9982 && ix86_binary_operator_ok (MULT, V8SImode, operands)"
9983 "vpmuldq\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
9984 [(set_attr "type" "sseimul")
9985 (set_attr "prefix_extra" "1")
9986 (set_attr "prefix" "vex")
9987 (set_attr "mode" "OI")])
9988
9989 (define_expand "sse4_1_mulv2siv2di3<mask_name>"
9990 [(set (match_operand:V2DI 0 "register_operand")
9991 (mult:V2DI
9992 (sign_extend:V2DI
9993 (vec_select:V2SI
9994 (match_operand:V4SI 1 "nonimmediate_operand")
9995 (parallel [(const_int 0) (const_int 2)])))
9996 (sign_extend:V2DI
9997 (vec_select:V2SI
9998 (match_operand:V4SI 2 "nonimmediate_operand")
9999 (parallel [(const_int 0) (const_int 2)])))))]
10000 "TARGET_SSE4_1 && <mask_avx512vl_condition>"
10001 "ix86_fixup_binary_operands_no_copy (MULT, V4SImode, operands);")
10002
10003 (define_insn "*sse4_1_mulv2siv2di3<mask_name>"
10004 [(set (match_operand:V2DI 0 "register_operand" "=Yr,*x,v")
10005 (mult:V2DI
10006 (sign_extend:V2DI
10007 (vec_select:V2SI
10008 (match_operand:V4SI 1 "nonimmediate_operand" "%0,0,v")
10009 (parallel [(const_int 0) (const_int 2)])))
10010 (sign_extend:V2DI
10011 (vec_select:V2SI
10012 (match_operand:V4SI 2 "nonimmediate_operand" "Yrm,*xm,vm")
10013 (parallel [(const_int 0) (const_int 2)])))))]
10014 "TARGET_SSE4_1 && <mask_avx512vl_condition>
10015 && ix86_binary_operator_ok (MULT, V4SImode, operands)"
10016 "@
10017 pmuldq\t{%2, %0|%0, %2}
10018 pmuldq\t{%2, %0|%0, %2}
10019 vpmuldq\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
10020 [(set_attr "isa" "noavx,noavx,avx")
10021 (set_attr "type" "sseimul")
10022 (set_attr "prefix_data16" "1,1,*")
10023 (set_attr "prefix_extra" "1")
10024 (set_attr "prefix" "orig,orig,vex")
10025 (set_attr "mode" "TI")])
10026
10027 (define_insn "avx512bw_pmaddwd512<mode><mask_name>"
10028 [(set (match_operand:<sseunpackmode> 0 "register_operand" "=v")
10029 (unspec:<sseunpackmode>
10030 [(match_operand:VI2_AVX2 1 "register_operand" "v")
10031 (match_operand:VI2_AVX2 2 "nonimmediate_operand" "vm")]
10032 UNSPEC_PMADDWD512))]
10033 "TARGET_AVX512BW && <mask_mode512bit_condition>"
10034 "vpmaddwd\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}";
10035 [(set_attr "type" "sseiadd")
10036 (set_attr "prefix" "evex")
10037 (set_attr "mode" "XI")])
10038
10039 (define_expand "avx2_pmaddwd"
10040 [(set (match_operand:V8SI 0 "register_operand")
10041 (plus:V8SI
10042 (mult:V8SI
10043 (sign_extend:V8SI
10044 (vec_select:V8HI
10045 (match_operand:V16HI 1 "nonimmediate_operand")
10046 (parallel [(const_int 0) (const_int 2)
10047 (const_int 4) (const_int 6)
10048 (const_int 8) (const_int 10)
10049 (const_int 12) (const_int 14)])))
10050 (sign_extend:V8SI
10051 (vec_select:V8HI
10052 (match_operand:V16HI 2 "nonimmediate_operand")
10053 (parallel [(const_int 0) (const_int 2)
10054 (const_int 4) (const_int 6)
10055 (const_int 8) (const_int 10)
10056 (const_int 12) (const_int 14)]))))
10057 (mult:V8SI
10058 (sign_extend:V8SI
10059 (vec_select:V8HI (match_dup 1)
10060 (parallel [(const_int 1) (const_int 3)
10061 (const_int 5) (const_int 7)
10062 (const_int 9) (const_int 11)
10063 (const_int 13) (const_int 15)])))
10064 (sign_extend:V8SI
10065 (vec_select:V8HI (match_dup 2)
10066 (parallel [(const_int 1) (const_int 3)
10067 (const_int 5) (const_int 7)
10068 (const_int 9) (const_int 11)
10069 (const_int 13) (const_int 15)]))))))]
10070 "TARGET_AVX2"
10071 "ix86_fixup_binary_operands_no_copy (MULT, V16HImode, operands);")
10072
10073 (define_insn "*avx2_pmaddwd"
10074 [(set (match_operand:V8SI 0 "register_operand" "=x")
10075 (plus:V8SI
10076 (mult:V8SI
10077 (sign_extend:V8SI
10078 (vec_select:V8HI
10079 (match_operand:V16HI 1 "nonimmediate_operand" "%x")
10080 (parallel [(const_int 0) (const_int 2)
10081 (const_int 4) (const_int 6)
10082 (const_int 8) (const_int 10)
10083 (const_int 12) (const_int 14)])))
10084 (sign_extend:V8SI
10085 (vec_select:V8HI
10086 (match_operand:V16HI 2 "nonimmediate_operand" "xm")
10087 (parallel [(const_int 0) (const_int 2)
10088 (const_int 4) (const_int 6)
10089 (const_int 8) (const_int 10)
10090 (const_int 12) (const_int 14)]))))
10091 (mult:V8SI
10092 (sign_extend:V8SI
10093 (vec_select:V8HI (match_dup 1)
10094 (parallel [(const_int 1) (const_int 3)
10095 (const_int 5) (const_int 7)
10096 (const_int 9) (const_int 11)
10097 (const_int 13) (const_int 15)])))
10098 (sign_extend:V8SI
10099 (vec_select:V8HI (match_dup 2)
10100 (parallel [(const_int 1) (const_int 3)
10101 (const_int 5) (const_int 7)
10102 (const_int 9) (const_int 11)
10103 (const_int 13) (const_int 15)]))))))]
10104 "TARGET_AVX2 && ix86_binary_operator_ok (MULT, V16HImode, operands)"
10105 "vpmaddwd\t{%2, %1, %0|%0, %1, %2}"
10106 [(set_attr "type" "sseiadd")
10107 (set_attr "prefix" "vex")
10108 (set_attr "mode" "OI")])
10109
10110 (define_expand "sse2_pmaddwd"
10111 [(set (match_operand:V4SI 0 "register_operand")
10112 (plus:V4SI
10113 (mult:V4SI
10114 (sign_extend:V4SI
10115 (vec_select:V4HI
10116 (match_operand:V8HI 1 "nonimmediate_operand")
10117 (parallel [(const_int 0) (const_int 2)
10118 (const_int 4) (const_int 6)])))
10119 (sign_extend:V4SI
10120 (vec_select:V4HI
10121 (match_operand:V8HI 2 "nonimmediate_operand")
10122 (parallel [(const_int 0) (const_int 2)
10123 (const_int 4) (const_int 6)]))))
10124 (mult:V4SI
10125 (sign_extend:V4SI
10126 (vec_select:V4HI (match_dup 1)
10127 (parallel [(const_int 1) (const_int 3)
10128 (const_int 5) (const_int 7)])))
10129 (sign_extend:V4SI
10130 (vec_select:V4HI (match_dup 2)
10131 (parallel [(const_int 1) (const_int 3)
10132 (const_int 5) (const_int 7)]))))))]
10133 "TARGET_SSE2"
10134 "ix86_fixup_binary_operands_no_copy (MULT, V8HImode, operands);")
10135
10136 (define_insn "*sse2_pmaddwd"
10137 [(set (match_operand:V4SI 0 "register_operand" "=x,x")
10138 (plus:V4SI
10139 (mult:V4SI
10140 (sign_extend:V4SI
10141 (vec_select:V4HI
10142 (match_operand:V8HI 1 "nonimmediate_operand" "%0,x")
10143 (parallel [(const_int 0) (const_int 2)
10144 (const_int 4) (const_int 6)])))
10145 (sign_extend:V4SI
10146 (vec_select:V4HI
10147 (match_operand:V8HI 2 "nonimmediate_operand" "xm,xm")
10148 (parallel [(const_int 0) (const_int 2)
10149 (const_int 4) (const_int 6)]))))
10150 (mult:V4SI
10151 (sign_extend:V4SI
10152 (vec_select:V4HI (match_dup 1)
10153 (parallel [(const_int 1) (const_int 3)
10154 (const_int 5) (const_int 7)])))
10155 (sign_extend:V4SI
10156 (vec_select:V4HI (match_dup 2)
10157 (parallel [(const_int 1) (const_int 3)
10158 (const_int 5) (const_int 7)]))))))]
10159 "TARGET_SSE2 && ix86_binary_operator_ok (MULT, V8HImode, operands)"
10160 "@
10161 pmaddwd\t{%2, %0|%0, %2}
10162 vpmaddwd\t{%2, %1, %0|%0, %1, %2}"
10163 [(set_attr "isa" "noavx,avx")
10164 (set_attr "type" "sseiadd")
10165 (set_attr "atom_unit" "simul")
10166 (set_attr "prefix_data16" "1,*")
10167 (set_attr "prefix" "orig,vex")
10168 (set_attr "mode" "TI")])
10169
10170 (define_insn "avx512dq_mul<mode>3<mask_name>"
10171 [(set (match_operand:VI8 0 "register_operand" "=v")
10172 (mult:VI8
10173 (match_operand:VI8 1 "register_operand" "v")
10174 (match_operand:VI8 2 "nonimmediate_operand" "vm")))]
10175 "TARGET_AVX512DQ && <mask_mode512bit_condition>"
10176 "vpmullq\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
10177 [(set_attr "type" "sseimul")
10178 (set_attr "prefix" "evex")
10179 (set_attr "mode" "<sseinsnmode>")])
10180
10181 (define_expand "mul<mode>3<mask_name>"
10182 [(set (match_operand:VI4_AVX512F 0 "register_operand")
10183 (mult:VI4_AVX512F
10184 (match_operand:VI4_AVX512F 1 "general_vector_operand")
10185 (match_operand:VI4_AVX512F 2 "general_vector_operand")))]
10186 "TARGET_SSE2 && <mask_mode512bit_condition>"
10187 {
10188 if (TARGET_SSE4_1)
10189 {
10190 if (!nonimmediate_operand (operands[1], <MODE>mode))
10191 operands[1] = force_reg (<MODE>mode, operands[1]);
10192 if (!nonimmediate_operand (operands[2], <MODE>mode))
10193 operands[2] = force_reg (<MODE>mode, operands[2]);
10194 ix86_fixup_binary_operands_no_copy (MULT, <MODE>mode, operands);
10195 }
10196 else
10197 {
10198 ix86_expand_sse2_mulv4si3 (operands[0], operands[1], operands[2]);
10199 DONE;
10200 }
10201 })
10202
10203 (define_insn "*<sse4_1_avx2>_mul<mode>3<mask_name>"
10204 [(set (match_operand:VI4_AVX512F 0 "register_operand" "=Yr,*x,v")
10205 (mult:VI4_AVX512F
10206 (match_operand:VI4_AVX512F 1 "nonimmediate_operand" "%0,0,v")
10207 (match_operand:VI4_AVX512F 2 "nonimmediate_operand" "Yrm,*xm,vm")))]
10208 "TARGET_SSE4_1 && ix86_binary_operator_ok (MULT, <MODE>mode, operands) && <mask_mode512bit_condition>"
10209 "@
10210 pmulld\t{%2, %0|%0, %2}
10211 pmulld\t{%2, %0|%0, %2}
10212 vpmulld\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
10213 [(set_attr "isa" "noavx,noavx,avx")
10214 (set_attr "type" "sseimul")
10215 (set_attr "prefix_extra" "1")
10216 (set_attr "prefix" "<mask_prefix4>")
10217 (set_attr "btver2_decode" "vector,vector,vector")
10218 (set_attr "mode" "<sseinsnmode>")])
10219
10220 (define_expand "mul<mode>3"
10221 [(set (match_operand:VI8_AVX2_AVX512F 0 "register_operand")
10222 (mult:VI8_AVX2_AVX512F
10223 (match_operand:VI8_AVX2_AVX512F 1 "register_operand")
10224 (match_operand:VI8_AVX2_AVX512F 2 "register_operand")))]
10225 "TARGET_SSE2"
10226 {
10227 ix86_expand_sse2_mulvxdi3 (operands[0], operands[1], operands[2]);
10228 DONE;
10229 })
10230
10231 (define_expand "vec_widen_<s>mult_hi_<mode>"
10232 [(match_operand:<sseunpackmode> 0 "register_operand")
10233 (any_extend:<sseunpackmode>
10234 (match_operand:VI124_AVX2 1 "register_operand"))
10235 (match_operand:VI124_AVX2 2 "register_operand")]
10236 "TARGET_SSE2"
10237 {
10238 ix86_expand_mul_widen_hilo (operands[0], operands[1], operands[2],
10239 <u_bool>, true);
10240 DONE;
10241 })
10242
10243 (define_expand "vec_widen_<s>mult_lo_<mode>"
10244 [(match_operand:<sseunpackmode> 0 "register_operand")
10245 (any_extend:<sseunpackmode>
10246 (match_operand:VI124_AVX2 1 "register_operand"))
10247 (match_operand:VI124_AVX2 2 "register_operand")]
10248 "TARGET_SSE2"
10249 {
10250 ix86_expand_mul_widen_hilo (operands[0], operands[1], operands[2],
10251 <u_bool>, false);
10252 DONE;
10253 })
10254
10255 ;; Most widen_<s>mult_even_<mode> can be handled directly from other
10256 ;; named patterns, but signed V4SI needs special help for plain SSE2.
10257 (define_expand "vec_widen_smult_even_v4si"
10258 [(match_operand:V2DI 0 "register_operand")
10259 (match_operand:V4SI 1 "nonimmediate_operand")
10260 (match_operand:V4SI 2 "nonimmediate_operand")]
10261 "TARGET_SSE2"
10262 {
10263 ix86_expand_mul_widen_evenodd (operands[0], operands[1], operands[2],
10264 false, false);
10265 DONE;
10266 })
10267
10268 (define_expand "vec_widen_<s>mult_odd_<mode>"
10269 [(match_operand:<sseunpackmode> 0 "register_operand")
10270 (any_extend:<sseunpackmode>
10271 (match_operand:VI4_AVX512F 1 "general_vector_operand"))
10272 (match_operand:VI4_AVX512F 2 "general_vector_operand")]
10273 "TARGET_SSE2"
10274 {
10275 ix86_expand_mul_widen_evenodd (operands[0], operands[1], operands[2],
10276 <u_bool>, true);
10277 DONE;
10278 })
10279
10280 (define_mode_attr SDOT_PMADD_SUF
10281 [(V32HI "512v32hi") (V16HI "") (V8HI "")])
10282
10283 (define_expand "sdot_prod<mode>"
10284 [(match_operand:<sseunpackmode> 0 "register_operand")
10285 (match_operand:VI2_AVX2 1 "register_operand")
10286 (match_operand:VI2_AVX2 2 "register_operand")
10287 (match_operand:<sseunpackmode> 3 "register_operand")]
10288 "TARGET_SSE2"
10289 {
10290 rtx t = gen_reg_rtx (<sseunpackmode>mode);
10291 emit_insn (gen_<sse2_avx2>_pmaddwd<SDOT_PMADD_SUF> (t, operands[1], operands[2]));
10292 emit_insn (gen_rtx_SET (operands[0],
10293 gen_rtx_PLUS (<sseunpackmode>mode,
10294 operands[3], t)));
10295 DONE;
10296 })
10297
10298 ;; Normally we use widen_mul_even/odd, but combine can't quite get it all
10299 ;; back together when madd is available.
10300 (define_expand "sdot_prodv4si"
10301 [(match_operand:V2DI 0 "register_operand")
10302 (match_operand:V4SI 1 "register_operand")
10303 (match_operand:V4SI 2 "register_operand")
10304 (match_operand:V2DI 3 "register_operand")]
10305 "TARGET_XOP"
10306 {
10307 rtx t = gen_reg_rtx (V2DImode);
10308 emit_insn (gen_xop_pmacsdqh (t, operands[1], operands[2], operands[3]));
10309 emit_insn (gen_xop_pmacsdql (operands[0], operands[1], operands[2], t));
10310 DONE;
10311 })
10312
10313 (define_expand "usadv16qi"
10314 [(match_operand:V4SI 0 "register_operand")
10315 (match_operand:V16QI 1 "register_operand")
10316 (match_operand:V16QI 2 "nonimmediate_operand")
10317 (match_operand:V4SI 3 "nonimmediate_operand")]
10318 "TARGET_SSE2"
10319 {
10320 rtx t1 = gen_reg_rtx (V2DImode);
10321 rtx t2 = gen_reg_rtx (V4SImode);
10322 emit_insn (gen_sse2_psadbw (t1, operands[1], operands[2]));
10323 convert_move (t2, t1, 0);
10324 emit_insn (gen_addv4si3 (operands[0], t2, operands[3]));
10325 DONE;
10326 })
10327
10328 (define_expand "usadv32qi"
10329 [(match_operand:V8SI 0 "register_operand")
10330 (match_operand:V32QI 1 "register_operand")
10331 (match_operand:V32QI 2 "nonimmediate_operand")
10332 (match_operand:V8SI 3 "nonimmediate_operand")]
10333 "TARGET_AVX2"
10334 {
10335 rtx t1 = gen_reg_rtx (V4DImode);
10336 rtx t2 = gen_reg_rtx (V8SImode);
10337 emit_insn (gen_avx2_psadbw (t1, operands[1], operands[2]));
10338 convert_move (t2, t1, 0);
10339 emit_insn (gen_addv8si3 (operands[0], t2, operands[3]));
10340 DONE;
10341 })
10342
10343 (define_insn "ashr<mode>3"
10344 [(set (match_operand:VI24_AVX2 0 "register_operand" "=x,x")
10345 (ashiftrt:VI24_AVX2
10346 (match_operand:VI24_AVX2 1 "register_operand" "0,x")
10347 (match_operand:SI 2 "nonmemory_operand" "xN,xN")))]
10348 "TARGET_SSE2"
10349 "@
10350 psra<ssemodesuffix>\t{%2, %0|%0, %2}
10351 vpsra<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
10352 [(set_attr "isa" "noavx,avx")
10353 (set_attr "type" "sseishft")
10354 (set (attr "length_immediate")
10355 (if_then_else (match_operand 2 "const_int_operand")
10356 (const_string "1")
10357 (const_string "0")))
10358 (set_attr "prefix_data16" "1,*")
10359 (set_attr "prefix" "orig,vex")
10360 (set_attr "mode" "<sseinsnmode>")])
10361
10362 (define_insn "<mask_codefor>ashr<mode>3<mask_name>"
10363 [(set (match_operand:VI24_AVX512BW_1 0 "register_operand" "=v,v")
10364 (ashiftrt:VI24_AVX512BW_1
10365 (match_operand:VI24_AVX512BW_1 1 "nonimmediate_operand" "v,vm")
10366 (match_operand:SI 2 "nonmemory_operand" "v,N")))]
10367 "TARGET_AVX512VL"
10368 "vpsra<ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
10369 [(set_attr "type" "sseishft")
10370 (set (attr "length_immediate")
10371 (if_then_else (match_operand 2 "const_int_operand")
10372 (const_string "1")
10373 (const_string "0")))
10374 (set_attr "mode" "<sseinsnmode>")])
10375
10376 (define_insn "<mask_codefor>ashrv2di3<mask_name>"
10377 [(set (match_operand:V2DI 0 "register_operand" "=v,v")
10378 (ashiftrt:V2DI
10379 (match_operand:V2DI 1 "nonimmediate_operand" "v,vm")
10380 (match_operand:DI 2 "nonmemory_operand" "v,N")))]
10381 "TARGET_AVX512VL"
10382 "vpsraq\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
10383 [(set_attr "type" "sseishft")
10384 (set (attr "length_immediate")
10385 (if_then_else (match_operand 2 "const_int_operand")
10386 (const_string "1")
10387 (const_string "0")))
10388 (set_attr "mode" "TI")])
10389
10390 (define_insn "ashr<mode>3<mask_name>"
10391 [(set (match_operand:VI248_AVX512BW_AVX512VL 0 "register_operand" "=v,v")
10392 (ashiftrt:VI248_AVX512BW_AVX512VL
10393 (match_operand:VI248_AVX512BW_AVX512VL 1 "nonimmediate_operand" "v,vm")
10394 (match_operand:SI 2 "nonmemory_operand" "v,N")))]
10395 "TARGET_AVX512F"
10396 "vpsra<ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
10397 [(set_attr "type" "sseishft")
10398 (set (attr "length_immediate")
10399 (if_then_else (match_operand 2 "const_int_operand")
10400 (const_string "1")
10401 (const_string "0")))
10402 (set_attr "mode" "<sseinsnmode>")])
10403
10404 (define_insn "<shift_insn><mode>3<mask_name>"
10405 [(set (match_operand:VI2_AVX2_AVX512BW 0 "register_operand" "=x,v")
10406 (any_lshift:VI2_AVX2_AVX512BW
10407 (match_operand:VI2_AVX2_AVX512BW 1 "register_operand" "0,v")
10408 (match_operand:SI 2 "nonmemory_operand" "xN,vN")))]
10409 "TARGET_SSE2 && <mask_mode512bit_condition> && <mask_avx512bw_condition>"
10410 "@
10411 p<vshift><ssemodesuffix>\t{%2, %0|%0, %2}
10412 vp<vshift><ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
10413 [(set_attr "isa" "noavx,avx")
10414 (set_attr "type" "sseishft")
10415 (set (attr "length_immediate")
10416 (if_then_else (match_operand 2 "const_int_operand")
10417 (const_string "1")
10418 (const_string "0")))
10419 (set_attr "prefix_data16" "1,*")
10420 (set_attr "prefix" "orig,vex")
10421 (set_attr "mode" "<sseinsnmode>")])
10422
10423 (define_insn "<shift_insn><mode>3<mask_name>"
10424 [(set (match_operand:VI48_AVX2 0 "register_operand" "=x,v")
10425 (any_lshift:VI48_AVX2
10426 (match_operand:VI48_AVX2 1 "register_operand" "0,v")
10427 (match_operand:SI 2 "nonmemory_operand" "xN,vN")))]
10428 "TARGET_SSE2 && <mask_mode512bit_condition>"
10429 "@
10430 p<vshift><ssemodesuffix>\t{%2, %0|%0, %2}
10431 vp<vshift><ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
10432 [(set_attr "isa" "noavx,avx")
10433 (set_attr "type" "sseishft")
10434 (set (attr "length_immediate")
10435 (if_then_else (match_operand 2 "const_int_operand")
10436 (const_string "1")
10437 (const_string "0")))
10438 (set_attr "prefix_data16" "1,*")
10439 (set_attr "prefix" "orig,vex")
10440 (set_attr "mode" "<sseinsnmode>")])
10441
10442 (define_insn "<shift_insn><mode>3<mask_name>"
10443 [(set (match_operand:VI48_512 0 "register_operand" "=v,v")
10444 (any_lshift:VI48_512
10445 (match_operand:VI48_512 1 "nonimmediate_operand" "v,m")
10446 (match_operand:SI 2 "nonmemory_operand" "vN,N")))]
10447 "TARGET_AVX512F && <mask_mode512bit_condition>"
10448 "vp<vshift><ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
10449 [(set_attr "isa" "avx512f")
10450 (set_attr "type" "sseishft")
10451 (set (attr "length_immediate")
10452 (if_then_else (match_operand 2 "const_int_operand")
10453 (const_string "1")
10454 (const_string "0")))
10455 (set_attr "prefix" "evex")
10456 (set_attr "mode" "<sseinsnmode>")])
10457
10458
10459 (define_expand "vec_shl_<mode>"
10460 [(set (match_dup 3)
10461 (ashift:V1TI
10462 (match_operand:VI_128 1 "register_operand")
10463 (match_operand:SI 2 "const_0_to_255_mul_8_operand")))
10464 (set (match_operand:VI_128 0 "register_operand") (match_dup 4))]
10465 "TARGET_SSE2"
10466 {
10467 operands[1] = gen_lowpart (V1TImode, operands[1]);
10468 operands[3] = gen_reg_rtx (V1TImode);
10469 operands[4] = gen_lowpart (<MODE>mode, operands[3]);
10470 })
10471
10472 (define_insn "<sse2_avx2>_ashl<mode>3"
10473 [(set (match_operand:VIMAX_AVX2 0 "register_operand" "=x,v")
10474 (ashift:VIMAX_AVX2
10475 (match_operand:VIMAX_AVX2 1 "register_operand" "0,v")
10476 (match_operand:SI 2 "const_0_to_255_mul_8_operand" "n,n")))]
10477 "TARGET_SSE2"
10478 {
10479 operands[2] = GEN_INT (INTVAL (operands[2]) / 8);
10480
10481 switch (which_alternative)
10482 {
10483 case 0:
10484 return "pslldq\t{%2, %0|%0, %2}";
10485 case 1:
10486 return "vpslldq\t{%2, %1, %0|%0, %1, %2}";
10487 default:
10488 gcc_unreachable ();
10489 }
10490 }
10491 [(set_attr "isa" "noavx,avx")
10492 (set_attr "type" "sseishft")
10493 (set_attr "length_immediate" "1")
10494 (set_attr "prefix_data16" "1,*")
10495 (set_attr "prefix" "orig,vex")
10496 (set_attr "mode" "<sseinsnmode>")])
10497
10498 (define_expand "vec_shr_<mode>"
10499 [(set (match_dup 3)
10500 (lshiftrt:V1TI
10501 (match_operand:VI_128 1 "register_operand")
10502 (match_operand:SI 2 "const_0_to_255_mul_8_operand")))
10503 (set (match_operand:VI_128 0 "register_operand") (match_dup 4))]
10504 "TARGET_SSE2"
10505 {
10506 operands[1] = gen_lowpart (V1TImode, operands[1]);
10507 operands[3] = gen_reg_rtx (V1TImode);
10508 operands[4] = gen_lowpart (<MODE>mode, operands[3]);
10509 })
10510
10511 (define_insn "<sse2_avx2>_lshr<mode>3"
10512 [(set (match_operand:VIMAX_AVX2 0 "register_operand" "=x,v")
10513 (lshiftrt:VIMAX_AVX2
10514 (match_operand:VIMAX_AVX2 1 "register_operand" "0,v")
10515 (match_operand:SI 2 "const_0_to_255_mul_8_operand" "n,n")))]
10516 "TARGET_SSE2"
10517 {
10518 operands[2] = GEN_INT (INTVAL (operands[2]) / 8);
10519
10520 switch (which_alternative)
10521 {
10522 case 0:
10523 return "psrldq\t{%2, %0|%0, %2}";
10524 case 1:
10525 return "vpsrldq\t{%2, %1, %0|%0, %1, %2}";
10526 default:
10527 gcc_unreachable ();
10528 }
10529 }
10530 [(set_attr "isa" "noavx,avx")
10531 (set_attr "type" "sseishft")
10532 (set_attr "length_immediate" "1")
10533 (set_attr "atom_unit" "sishuf")
10534 (set_attr "prefix_data16" "1,*")
10535 (set_attr "prefix" "orig,vex")
10536 (set_attr "mode" "<sseinsnmode>")])
10537
10538 (define_insn "<avx512>_<rotate>v<mode><mask_name>"
10539 [(set (match_operand:VI48_AVX512VL 0 "register_operand" "=v")
10540 (any_rotate:VI48_AVX512VL
10541 (match_operand:VI48_AVX512VL 1 "register_operand" "v")
10542 (match_operand:VI48_AVX512VL 2 "nonimmediate_operand" "vm")))]
10543 "TARGET_AVX512F"
10544 "vp<rotate>v<ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
10545 [(set_attr "prefix" "evex")
10546 (set_attr "mode" "<sseinsnmode>")])
10547
10548 (define_insn "<avx512>_<rotate><mode><mask_name>"
10549 [(set (match_operand:VI48_AVX512VL 0 "register_operand" "=v")
10550 (any_rotate:VI48_AVX512VL
10551 (match_operand:VI48_AVX512VL 1 "nonimmediate_operand" "vm")
10552 (match_operand:SI 2 "const_0_to_255_operand")))]
10553 "TARGET_AVX512F"
10554 "vp<rotate><ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
10555 [(set_attr "prefix" "evex")
10556 (set_attr "mode" "<sseinsnmode>")])
10557
10558 (define_expand "<code><mode>3"
10559 [(set (match_operand:VI124_256_AVX512F_AVX512BW 0 "register_operand")
10560 (maxmin:VI124_256_AVX512F_AVX512BW
10561 (match_operand:VI124_256_AVX512F_AVX512BW 1 "nonimmediate_operand")
10562 (match_operand:VI124_256_AVX512F_AVX512BW 2 "nonimmediate_operand")))]
10563 "TARGET_AVX2"
10564 "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
10565
10566 (define_insn "*avx2_<code><mode>3"
10567 [(set (match_operand:VI124_256 0 "register_operand" "=v")
10568 (maxmin:VI124_256
10569 (match_operand:VI124_256 1 "nonimmediate_operand" "%v")
10570 (match_operand:VI124_256 2 "nonimmediate_operand" "vm")))]
10571 "TARGET_AVX2 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
10572 "vp<maxmin_int><ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
10573 [(set_attr "type" "sseiadd")
10574 (set_attr "prefix_extra" "1")
10575 (set_attr "prefix" "vex")
10576 (set_attr "mode" "OI")])
10577
10578 (define_expand "<code><mode>3_mask"
10579 [(set (match_operand:VI48_AVX512VL 0 "register_operand")
10580 (vec_merge:VI48_AVX512VL
10581 (maxmin:VI48_AVX512VL
10582 (match_operand:VI48_AVX512VL 1 "nonimmediate_operand")
10583 (match_operand:VI48_AVX512VL 2 "nonimmediate_operand"))
10584 (match_operand:VI48_AVX512VL 3 "vector_move_operand")
10585 (match_operand:<avx512fmaskmode> 4 "register_operand")))]
10586 "TARGET_AVX512F"
10587 "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
10588
10589 (define_insn "*avx512bw_<code><mode>3<mask_name>"
10590 [(set (match_operand:VI48_AVX512VL 0 "register_operand" "=v")
10591 (maxmin:VI48_AVX512VL
10592 (match_operand:VI48_AVX512VL 1 "nonimmediate_operand" "%v")
10593 (match_operand:VI48_AVX512VL 2 "nonimmediate_operand" "vm")))]
10594 "TARGET_AVX512F && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
10595 "vp<maxmin_int><ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
10596 [(set_attr "type" "sseiadd")
10597 (set_attr "prefix_extra" "1")
10598 (set_attr "prefix" "maybe_evex")
10599 (set_attr "mode" "<sseinsnmode>")])
10600
10601 (define_insn "<mask_codefor><code><mode>3<mask_name>"
10602 [(set (match_operand:VI12_AVX512VL 0 "register_operand" "=v")
10603 (maxmin:VI12_AVX512VL
10604 (match_operand:VI12_AVX512VL 1 "register_operand" "v")
10605 (match_operand:VI12_AVX512VL 2 "nonimmediate_operand" "vm")))]
10606 "TARGET_AVX512BW"
10607 "vp<maxmin_int><ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
10608 [(set_attr "type" "sseiadd")
10609 (set_attr "prefix" "evex")
10610 (set_attr "mode" "<sseinsnmode>")])
10611
10612 (define_expand "<code><mode>3"
10613 [(set (match_operand:VI8_AVX2_AVX512BW 0 "register_operand")
10614 (maxmin:VI8_AVX2_AVX512BW
10615 (match_operand:VI8_AVX2_AVX512BW 1 "register_operand")
10616 (match_operand:VI8_AVX2_AVX512BW 2 "register_operand")))]
10617 "TARGET_SSE4_2"
10618 {
10619 if (TARGET_AVX512F
10620 && (<MODE>mode == V8DImode || TARGET_AVX512VL))
10621 ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);
10622 else
10623 {
10624 enum rtx_code code;
10625 rtx xops[6];
10626 bool ok;
10627
10628
10629 xops[0] = operands[0];
10630
10631 if (<CODE> == SMAX || <CODE> == UMAX)
10632 {
10633 xops[1] = operands[1];
10634 xops[2] = operands[2];
10635 }
10636 else
10637 {
10638 xops[1] = operands[2];
10639 xops[2] = operands[1];
10640 }
10641
10642 code = (<CODE> == UMAX || <CODE> == UMIN) ? GTU : GT;
10643
10644 xops[3] = gen_rtx_fmt_ee (code, VOIDmode, operands[1], operands[2]);
10645 xops[4] = operands[1];
10646 xops[5] = operands[2];
10647
10648 ok = ix86_expand_int_vcond (xops);
10649 gcc_assert (ok);
10650 DONE;
10651 }
10652 })
10653
10654 (define_expand "<code><mode>3"
10655 [(set (match_operand:VI124_128 0 "register_operand")
10656 (smaxmin:VI124_128
10657 (match_operand:VI124_128 1 "nonimmediate_operand")
10658 (match_operand:VI124_128 2 "nonimmediate_operand")))]
10659 "TARGET_SSE2"
10660 {
10661 if (TARGET_SSE4_1 || <MODE>mode == V8HImode)
10662 ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);
10663 else
10664 {
10665 rtx xops[6];
10666 bool ok;
10667
10668 xops[0] = operands[0];
10669 operands[1] = force_reg (<MODE>mode, operands[1]);
10670 operands[2] = force_reg (<MODE>mode, operands[2]);
10671
10672 if (<CODE> == SMAX)
10673 {
10674 xops[1] = operands[1];
10675 xops[2] = operands[2];
10676 }
10677 else
10678 {
10679 xops[1] = operands[2];
10680 xops[2] = operands[1];
10681 }
10682
10683 xops[3] = gen_rtx_GT (VOIDmode, operands[1], operands[2]);
10684 xops[4] = operands[1];
10685 xops[5] = operands[2];
10686
10687 ok = ix86_expand_int_vcond (xops);
10688 gcc_assert (ok);
10689 DONE;
10690 }
10691 })
10692
10693 (define_insn "*sse4_1_<code><mode>3<mask_name>"
10694 [(set (match_operand:VI14_128 0 "register_operand" "=Yr,*x,v")
10695 (smaxmin:VI14_128
10696 (match_operand:VI14_128 1 "nonimmediate_operand" "%0,0,v")
10697 (match_operand:VI14_128 2 "nonimmediate_operand" "Yrm,*xm,vm")))]
10698 "TARGET_SSE4_1
10699 && <mask_mode512bit_condition>
10700 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
10701 "@
10702 p<maxmin_int><ssemodesuffix>\t{%2, %0|%0, %2}
10703 p<maxmin_int><ssemodesuffix>\t{%2, %0|%0, %2}
10704 vp<maxmin_int><ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
10705 [(set_attr "isa" "noavx,noavx,avx")
10706 (set_attr "type" "sseiadd")
10707 (set_attr "prefix_extra" "1,1,*")
10708 (set_attr "prefix" "orig,orig,vex")
10709 (set_attr "mode" "TI")])
10710
10711 (define_insn "*<code>v8hi3"
10712 [(set (match_operand:V8HI 0 "register_operand" "=x,x")
10713 (smaxmin:V8HI
10714 (match_operand:V8HI 1 "nonimmediate_operand" "%0,x")
10715 (match_operand:V8HI 2 "nonimmediate_operand" "xm,xm")))]
10716 "TARGET_SSE2 && ix86_binary_operator_ok (<CODE>, V8HImode, operands)"
10717 "@
10718 p<maxmin_int>w\t{%2, %0|%0, %2}
10719 vp<maxmin_int>w\t{%2, %1, %0|%0, %1, %2}"
10720 [(set_attr "isa" "noavx,avx")
10721 (set_attr "type" "sseiadd")
10722 (set_attr "prefix_data16" "1,*")
10723 (set_attr "prefix_extra" "*,1")
10724 (set_attr "prefix" "orig,vex")
10725 (set_attr "mode" "TI")])
10726
10727 (define_expand "<code><mode>3"
10728 [(set (match_operand:VI124_128 0 "register_operand")
10729 (umaxmin:VI124_128
10730 (match_operand:VI124_128 1 "nonimmediate_operand")
10731 (match_operand:VI124_128 2 "nonimmediate_operand")))]
10732 "TARGET_SSE2"
10733 {
10734 if (TARGET_SSE4_1 || <MODE>mode == V16QImode)
10735 ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);
10736 else if (<CODE> == UMAX && <MODE>mode == V8HImode)
10737 {
10738 rtx op0 = operands[0], op2 = operands[2], op3 = op0;
10739 operands[1] = force_reg (<MODE>mode, operands[1]);
10740 if (rtx_equal_p (op3, op2))
10741 op3 = gen_reg_rtx (V8HImode);
10742 emit_insn (gen_sse2_ussubv8hi3 (op3, operands[1], op2));
10743 emit_insn (gen_addv8hi3 (op0, op3, op2));
10744 DONE;
10745 }
10746 else
10747 {
10748 rtx xops[6];
10749 bool ok;
10750
10751 operands[1] = force_reg (<MODE>mode, operands[1]);
10752 operands[2] = force_reg (<MODE>mode, operands[2]);
10753
10754 xops[0] = operands[0];
10755
10756 if (<CODE> == UMAX)
10757 {
10758 xops[1] = operands[1];
10759 xops[2] = operands[2];
10760 }
10761 else
10762 {
10763 xops[1] = operands[2];
10764 xops[2] = operands[1];
10765 }
10766
10767 xops[3] = gen_rtx_GTU (VOIDmode, operands[1], operands[2]);
10768 xops[4] = operands[1];
10769 xops[5] = operands[2];
10770
10771 ok = ix86_expand_int_vcond (xops);
10772 gcc_assert (ok);
10773 DONE;
10774 }
10775 })
10776
10777 (define_insn "*sse4_1_<code><mode>3<mask_name>"
10778 [(set (match_operand:VI24_128 0 "register_operand" "=Yr,*x,v")
10779 (umaxmin:VI24_128
10780 (match_operand:VI24_128 1 "nonimmediate_operand" "%0,0,v")
10781 (match_operand:VI24_128 2 "nonimmediate_operand" "Yrm,*xm,vm")))]
10782 "TARGET_SSE4_1
10783 && <mask_mode512bit_condition>
10784 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
10785 "@
10786 p<maxmin_int><ssemodesuffix>\t{%2, %0|%0, %2}
10787 p<maxmin_int><ssemodesuffix>\t{%2, %0|%0, %2}
10788 vp<maxmin_int><ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
10789 [(set_attr "isa" "noavx,noavx,avx")
10790 (set_attr "type" "sseiadd")
10791 (set_attr "prefix_extra" "1,1,*")
10792 (set_attr "prefix" "orig,orig,vex")
10793 (set_attr "mode" "TI")])
10794
10795 (define_insn "*<code>v16qi3"
10796 [(set (match_operand:V16QI 0 "register_operand" "=x,x")
10797 (umaxmin:V16QI
10798 (match_operand:V16QI 1 "nonimmediate_operand" "%0,x")
10799 (match_operand:V16QI 2 "nonimmediate_operand" "xm,xm")))]
10800 "TARGET_SSE2 && ix86_binary_operator_ok (<CODE>, V16QImode, operands)"
10801 "@
10802 p<maxmin_int>b\t{%2, %0|%0, %2}
10803 vp<maxmin_int>b\t{%2, %1, %0|%0, %1, %2}"
10804 [(set_attr "isa" "noavx,avx")
10805 (set_attr "type" "sseiadd")
10806 (set_attr "prefix_data16" "1,*")
10807 (set_attr "prefix_extra" "*,1")
10808 (set_attr "prefix" "orig,vex")
10809 (set_attr "mode" "TI")])
10810
10811 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
10812 ;;
10813 ;; Parallel integral comparisons
10814 ;;
10815 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
10816
10817 (define_expand "avx2_eq<mode>3"
10818 [(set (match_operand:VI_256 0 "register_operand")
10819 (eq:VI_256
10820 (match_operand:VI_256 1 "nonimmediate_operand")
10821 (match_operand:VI_256 2 "nonimmediate_operand")))]
10822 "TARGET_AVX2"
10823 "ix86_fixup_binary_operands_no_copy (EQ, <MODE>mode, operands);")
10824
10825 (define_insn "*avx2_eq<mode>3"
10826 [(set (match_operand:VI_256 0 "register_operand" "=x")
10827 (eq:VI_256
10828 (match_operand:VI_256 1 "nonimmediate_operand" "%x")
10829 (match_operand:VI_256 2 "nonimmediate_operand" "xm")))]
10830 "TARGET_AVX2 && ix86_binary_operator_ok (EQ, <MODE>mode, operands)"
10831 "vpcmpeq<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
10832 [(set_attr "type" "ssecmp")
10833 (set_attr "prefix_extra" "1")
10834 (set_attr "prefix" "vex")
10835 (set_attr "mode" "OI")])
10836
10837 (define_expand "<avx512>_eq<mode>3<mask_scalar_merge_name>"
10838 [(set (match_operand:<avx512fmaskmode> 0 "register_operand")
10839 (unspec:<avx512fmaskmode>
10840 [(match_operand:VI12_AVX512VL 1 "register_operand")
10841 (match_operand:VI12_AVX512VL 2 "nonimmediate_operand")]
10842 UNSPEC_MASKED_EQ))]
10843 "TARGET_AVX512BW"
10844 "ix86_fixup_binary_operands_no_copy (EQ, <MODE>mode, operands);")
10845
10846 (define_expand "<avx512>_eq<mode>3<mask_scalar_merge_name>"
10847 [(set (match_operand:<avx512fmaskmode> 0 "register_operand")
10848 (unspec:<avx512fmaskmode>
10849 [(match_operand:VI48_AVX512VL 1 "register_operand")
10850 (match_operand:VI48_AVX512VL 2 "nonimmediate_operand")]
10851 UNSPEC_MASKED_EQ))]
10852 "TARGET_AVX512F"
10853 "ix86_fixup_binary_operands_no_copy (EQ, <MODE>mode, operands);")
10854
10855 (define_insn "<avx512>_eq<mode>3<mask_scalar_merge_name>_1"
10856 [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=Yk")
10857 (unspec:<avx512fmaskmode>
10858 [(match_operand:VI12_AVX512VL 1 "register_operand" "%v")
10859 (match_operand:VI12_AVX512VL 2 "nonimmediate_operand" "vm")]
10860 UNSPEC_MASKED_EQ))]
10861 "TARGET_AVX512F && ix86_binary_operator_ok (EQ, <MODE>mode, operands)"
10862 "vpcmpeq<ssemodesuffix>\t{%2, %1, %0<mask_scalar_merge_operand3>|%0<mask_scalar_merge_operand3>, %1, %2}"
10863 [(set_attr "type" "ssecmp")
10864 (set_attr "prefix_extra" "1")
10865 (set_attr "prefix" "evex")
10866 (set_attr "mode" "<sseinsnmode>")])
10867
10868 (define_insn "<avx512>_eq<mode>3<mask_scalar_merge_name>_1"
10869 [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=Yk")
10870 (unspec:<avx512fmaskmode>
10871 [(match_operand:VI48_AVX512VL 1 "register_operand" "%v")
10872 (match_operand:VI48_AVX512VL 2 "nonimmediate_operand" "vm")]
10873 UNSPEC_MASKED_EQ))]
10874 "TARGET_AVX512F && ix86_binary_operator_ok (EQ, <MODE>mode, operands)"
10875 "vpcmpeq<ssemodesuffix>\t{%2, %1, %0<mask_scalar_merge_operand3>|%0<mask_scalar_merge_operand3>, %1, %2}"
10876 [(set_attr "type" "ssecmp")
10877 (set_attr "prefix_extra" "1")
10878 (set_attr "prefix" "evex")
10879 (set_attr "mode" "<sseinsnmode>")])
10880
10881 (define_insn "*sse4_1_eqv2di3"
10882 [(set (match_operand:V2DI 0 "register_operand" "=Yr,*x,x")
10883 (eq:V2DI
10884 (match_operand:V2DI 1 "nonimmediate_operand" "%0,0,x")
10885 (match_operand:V2DI 2 "nonimmediate_operand" "Yrm,*xm,xm")))]
10886 "TARGET_SSE4_1 && ix86_binary_operator_ok (EQ, V2DImode, operands)"
10887 "@
10888 pcmpeqq\t{%2, %0|%0, %2}
10889 pcmpeqq\t{%2, %0|%0, %2}
10890 vpcmpeqq\t{%2, %1, %0|%0, %1, %2}"
10891 [(set_attr "isa" "noavx,noavx,avx")
10892 (set_attr "type" "ssecmp")
10893 (set_attr "prefix_extra" "1")
10894 (set_attr "prefix" "orig,orig,vex")
10895 (set_attr "mode" "TI")])
10896
10897 (define_insn "*sse2_eq<mode>3"
10898 [(set (match_operand:VI124_128 0 "register_operand" "=x,x")
10899 (eq:VI124_128
10900 (match_operand:VI124_128 1 "nonimmediate_operand" "%0,x")
10901 (match_operand:VI124_128 2 "nonimmediate_operand" "xm,xm")))]
10902 "TARGET_SSE2 && !TARGET_XOP
10903 && ix86_binary_operator_ok (EQ, <MODE>mode, operands)"
10904 "@
10905 pcmpeq<ssemodesuffix>\t{%2, %0|%0, %2}
10906 vpcmpeq<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
10907 [(set_attr "isa" "noavx,avx")
10908 (set_attr "type" "ssecmp")
10909 (set_attr "prefix_data16" "1,*")
10910 (set_attr "prefix" "orig,vex")
10911 (set_attr "mode" "TI")])
10912
10913 (define_expand "sse2_eq<mode>3"
10914 [(set (match_operand:VI124_128 0 "register_operand")
10915 (eq:VI124_128
10916 (match_operand:VI124_128 1 "nonimmediate_operand")
10917 (match_operand:VI124_128 2 "nonimmediate_operand")))]
10918 "TARGET_SSE2 && !TARGET_XOP "
10919 "ix86_fixup_binary_operands_no_copy (EQ, <MODE>mode, operands);")
10920
10921 (define_expand "sse4_1_eqv2di3"
10922 [(set (match_operand:V2DI 0 "register_operand")
10923 (eq:V2DI
10924 (match_operand:V2DI 1 "nonimmediate_operand")
10925 (match_operand:V2DI 2 "nonimmediate_operand")))]
10926 "TARGET_SSE4_1"
10927 "ix86_fixup_binary_operands_no_copy (EQ, V2DImode, operands);")
10928
10929 (define_insn "sse4_2_gtv2di3"
10930 [(set (match_operand:V2DI 0 "register_operand" "=Yr,*x,x")
10931 (gt:V2DI
10932 (match_operand:V2DI 1 "register_operand" "0,0,x")
10933 (match_operand:V2DI 2 "nonimmediate_operand" "Yrm,*xm,xm")))]
10934 "TARGET_SSE4_2"
10935 "@
10936 pcmpgtq\t{%2, %0|%0, %2}
10937 pcmpgtq\t{%2, %0|%0, %2}
10938 vpcmpgtq\t{%2, %1, %0|%0, %1, %2}"
10939 [(set_attr "isa" "noavx,noavx,avx")
10940 (set_attr "type" "ssecmp")
10941 (set_attr "prefix_extra" "1")
10942 (set_attr "prefix" "orig,orig,vex")
10943 (set_attr "mode" "TI")])
10944
10945 (define_insn "avx2_gt<mode>3"
10946 [(set (match_operand:VI_256 0 "register_operand" "=x")
10947 (gt:VI_256
10948 (match_operand:VI_256 1 "register_operand" "x")
10949 (match_operand:VI_256 2 "nonimmediate_operand" "xm")))]
10950 "TARGET_AVX2"
10951 "vpcmpgt<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
10952 [(set_attr "type" "ssecmp")
10953 (set_attr "prefix_extra" "1")
10954 (set_attr "prefix" "vex")
10955 (set_attr "mode" "OI")])
10956
10957 (define_insn "<avx512>_gt<mode>3<mask_scalar_merge_name>"
10958 [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=Yk")
10959 (unspec:<avx512fmaskmode>
10960 [(match_operand:VI48_AVX512VL 1 "register_operand" "v")
10961 (match_operand:VI48_AVX512VL 2 "nonimmediate_operand" "vm")] UNSPEC_MASKED_GT))]
10962 "TARGET_AVX512F"
10963 "vpcmpgt<ssemodesuffix>\t{%2, %1, %0<mask_scalar_merge_operand3>|%0<mask_scalar_merge_operand3>, %1, %2}"
10964 [(set_attr "type" "ssecmp")
10965 (set_attr "prefix_extra" "1")
10966 (set_attr "prefix" "evex")
10967 (set_attr "mode" "<sseinsnmode>")])
10968
10969 (define_insn "<avx512>_gt<mode>3<mask_scalar_merge_name>"
10970 [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=Yk")
10971 (unspec:<avx512fmaskmode>
10972 [(match_operand:VI12_AVX512VL 1 "register_operand" "v")
10973 (match_operand:VI12_AVX512VL 2 "nonimmediate_operand" "vm")] UNSPEC_MASKED_GT))]
10974 "TARGET_AVX512BW"
10975 "vpcmpgt<ssemodesuffix>\t{%2, %1, %0<mask_scalar_merge_operand3>|%0<mask_scalar_merge_operand3>, %1, %2}"
10976 [(set_attr "type" "ssecmp")
10977 (set_attr "prefix_extra" "1")
10978 (set_attr "prefix" "evex")
10979 (set_attr "mode" "<sseinsnmode>")])
10980
10981 (define_insn "sse2_gt<mode>3"
10982 [(set (match_operand:VI124_128 0 "register_operand" "=x,x")
10983 (gt:VI124_128
10984 (match_operand:VI124_128 1 "register_operand" "0,x")
10985 (match_operand:VI124_128 2 "nonimmediate_operand" "xm,xm")))]
10986 "TARGET_SSE2 && !TARGET_XOP"
10987 "@
10988 pcmpgt<ssemodesuffix>\t{%2, %0|%0, %2}
10989 vpcmpgt<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
10990 [(set_attr "isa" "noavx,avx")
10991 (set_attr "type" "ssecmp")
10992 (set_attr "prefix_data16" "1,*")
10993 (set_attr "prefix" "orig,vex")
10994 (set_attr "mode" "TI")])
10995
10996 (define_expand "vcond<V_512:mode><VI_512:mode>"
10997 [(set (match_operand:V_512 0 "register_operand")
10998 (if_then_else:V_512
10999 (match_operator 3 ""
11000 [(match_operand:VI_512 4 "nonimmediate_operand")
11001 (match_operand:VI_512 5 "general_operand")])
11002 (match_operand:V_512 1)
11003 (match_operand:V_512 2)))]
11004 "TARGET_AVX512F
11005 && (GET_MODE_NUNITS (<V_512:MODE>mode)
11006 == GET_MODE_NUNITS (<VI_512:MODE>mode))"
11007 {
11008 bool ok = ix86_expand_int_vcond (operands);
11009 gcc_assert (ok);
11010 DONE;
11011 })
11012
11013 (define_expand "vcond<V_256:mode><VI_256:mode>"
11014 [(set (match_operand:V_256 0 "register_operand")
11015 (if_then_else:V_256
11016 (match_operator 3 ""
11017 [(match_operand:VI_256 4 "nonimmediate_operand")
11018 (match_operand:VI_256 5 "general_operand")])
11019 (match_operand:V_256 1)
11020 (match_operand:V_256 2)))]
11021 "TARGET_AVX2
11022 && (GET_MODE_NUNITS (<V_256:MODE>mode)
11023 == GET_MODE_NUNITS (<VI_256:MODE>mode))"
11024 {
11025 bool ok = ix86_expand_int_vcond (operands);
11026 gcc_assert (ok);
11027 DONE;
11028 })
11029
11030 (define_expand "vcond<V_128:mode><VI124_128:mode>"
11031 [(set (match_operand:V_128 0 "register_operand")
11032 (if_then_else:V_128
11033 (match_operator 3 ""
11034 [(match_operand:VI124_128 4 "nonimmediate_operand")
11035 (match_operand:VI124_128 5 "general_operand")])
11036 (match_operand:V_128 1)
11037 (match_operand:V_128 2)))]
11038 "TARGET_SSE2
11039 && (GET_MODE_NUNITS (<V_128:MODE>mode)
11040 == GET_MODE_NUNITS (<VI124_128:MODE>mode))"
11041 {
11042 bool ok = ix86_expand_int_vcond (operands);
11043 gcc_assert (ok);
11044 DONE;
11045 })
11046
11047 (define_expand "vcond<VI8F_128:mode>v2di"
11048 [(set (match_operand:VI8F_128 0 "register_operand")
11049 (if_then_else:VI8F_128
11050 (match_operator 3 ""
11051 [(match_operand:V2DI 4 "nonimmediate_operand")
11052 (match_operand:V2DI 5 "general_operand")])
11053 (match_operand:VI8F_128 1)
11054 (match_operand:VI8F_128 2)))]
11055 "TARGET_SSE4_2"
11056 {
11057 bool ok = ix86_expand_int_vcond (operands);
11058 gcc_assert (ok);
11059 DONE;
11060 })
11061
11062 (define_expand "vcondu<V_512:mode><VI_512:mode>"
11063 [(set (match_operand:V_512 0 "register_operand")
11064 (if_then_else:V_512
11065 (match_operator 3 ""
11066 [(match_operand:VI_512 4 "nonimmediate_operand")
11067 (match_operand:VI_512 5 "nonimmediate_operand")])
11068 (match_operand:V_512 1 "general_operand")
11069 (match_operand:V_512 2 "general_operand")))]
11070 "TARGET_AVX512F
11071 && (GET_MODE_NUNITS (<V_512:MODE>mode)
11072 == GET_MODE_NUNITS (<VI_512:MODE>mode))"
11073 {
11074 bool ok = ix86_expand_int_vcond (operands);
11075 gcc_assert (ok);
11076 DONE;
11077 })
11078
11079 (define_expand "vcondu<V_256:mode><VI_256:mode>"
11080 [(set (match_operand:V_256 0 "register_operand")
11081 (if_then_else:V_256
11082 (match_operator 3 ""
11083 [(match_operand:VI_256 4 "nonimmediate_operand")
11084 (match_operand:VI_256 5 "nonimmediate_operand")])
11085 (match_operand:V_256 1 "general_operand")
11086 (match_operand:V_256 2 "general_operand")))]
11087 "TARGET_AVX2
11088 && (GET_MODE_NUNITS (<V_256:MODE>mode)
11089 == GET_MODE_NUNITS (<VI_256:MODE>mode))"
11090 {
11091 bool ok = ix86_expand_int_vcond (operands);
11092 gcc_assert (ok);
11093 DONE;
11094 })
11095
11096 (define_expand "vcondu<V_128:mode><VI124_128:mode>"
11097 [(set (match_operand:V_128 0 "register_operand")
11098 (if_then_else:V_128
11099 (match_operator 3 ""
11100 [(match_operand:VI124_128 4 "nonimmediate_operand")
11101 (match_operand:VI124_128 5 "nonimmediate_operand")])
11102 (match_operand:V_128 1 "general_operand")
11103 (match_operand:V_128 2 "general_operand")))]
11104 "TARGET_SSE2
11105 && (GET_MODE_NUNITS (<V_128:MODE>mode)
11106 == GET_MODE_NUNITS (<VI124_128:MODE>mode))"
11107 {
11108 bool ok = ix86_expand_int_vcond (operands);
11109 gcc_assert (ok);
11110 DONE;
11111 })
11112
11113 (define_expand "vcondu<VI8F_128:mode>v2di"
11114 [(set (match_operand:VI8F_128 0 "register_operand")
11115 (if_then_else:VI8F_128
11116 (match_operator 3 ""
11117 [(match_operand:V2DI 4 "nonimmediate_operand")
11118 (match_operand:V2DI 5 "nonimmediate_operand")])
11119 (match_operand:VI8F_128 1 "general_operand")
11120 (match_operand:VI8F_128 2 "general_operand")))]
11121 "TARGET_SSE4_2"
11122 {
11123 bool ok = ix86_expand_int_vcond (operands);
11124 gcc_assert (ok);
11125 DONE;
11126 })
11127
11128 (define_mode_iterator VEC_PERM_AVX2
11129 [V16QI V8HI V4SI V2DI V4SF V2DF
11130 (V32QI "TARGET_AVX2") (V16HI "TARGET_AVX2")
11131 (V8SI "TARGET_AVX2") (V4DI "TARGET_AVX2")
11132 (V8SF "TARGET_AVX2") (V4DF "TARGET_AVX2")
11133 (V16SF "TARGET_AVX512F") (V8DF "TARGET_AVX512F")
11134 (V16SI "TARGET_AVX512F") (V8DI "TARGET_AVX512F")
11135 (V32HI "TARGET_AVX512BW") (V64QI "TARGET_AVX512VBMI")])
11136
11137 (define_expand "vec_perm<mode>"
11138 [(match_operand:VEC_PERM_AVX2 0 "register_operand")
11139 (match_operand:VEC_PERM_AVX2 1 "register_operand")
11140 (match_operand:VEC_PERM_AVX2 2 "register_operand")
11141 (match_operand:<sseintvecmode> 3 "register_operand")]
11142 "TARGET_SSSE3 || TARGET_AVX || TARGET_XOP"
11143 {
11144 ix86_expand_vec_perm (operands);
11145 DONE;
11146 })
11147
11148 (define_mode_iterator VEC_PERM_CONST
11149 [(V4SF "TARGET_SSE") (V4SI "TARGET_SSE")
11150 (V2DF "TARGET_SSE") (V2DI "TARGET_SSE")
11151 (V16QI "TARGET_SSE2") (V8HI "TARGET_SSE2")
11152 (V8SF "TARGET_AVX") (V4DF "TARGET_AVX")
11153 (V8SI "TARGET_AVX") (V4DI "TARGET_AVX")
11154 (V32QI "TARGET_AVX2") (V16HI "TARGET_AVX2")
11155 (V16SI "TARGET_AVX512F") (V8DI "TARGET_AVX512F")
11156 (V16SF "TARGET_AVX512F") (V8DF "TARGET_AVX512F")
11157 (V32HI "TARGET_AVX512BW") (V64QI "TARGET_AVX512BW")])
11158
11159 (define_expand "vec_perm_const<mode>"
11160 [(match_operand:VEC_PERM_CONST 0 "register_operand")
11161 (match_operand:VEC_PERM_CONST 1 "register_operand")
11162 (match_operand:VEC_PERM_CONST 2 "register_operand")
11163 (match_operand:<sseintvecmode> 3)]
11164 ""
11165 {
11166 if (ix86_expand_vec_perm_const (operands))
11167 DONE;
11168 else
11169 FAIL;
11170 })
11171
11172 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
11173 ;;
11174 ;; Parallel bitwise logical operations
11175 ;;
11176 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
11177
11178 (define_expand "one_cmpl<mode>2"
11179 [(set (match_operand:VI 0 "register_operand")
11180 (xor:VI (match_operand:VI 1 "nonimmediate_operand")
11181 (match_dup 2)))]
11182 "TARGET_SSE"
11183 {
11184 int i, n = GET_MODE_NUNITS (<MODE>mode);
11185 rtvec v = rtvec_alloc (n);
11186
11187 for (i = 0; i < n; ++i)
11188 RTVEC_ELT (v, i) = constm1_rtx;
11189
11190 operands[2] = force_reg (<MODE>mode, gen_rtx_CONST_VECTOR (<MODE>mode, v));
11191 })
11192
11193 (define_expand "<sse2_avx2>_andnot<mode>3"
11194 [(set (match_operand:VI_AVX2 0 "register_operand")
11195 (and:VI_AVX2
11196 (not:VI_AVX2 (match_operand:VI_AVX2 1 "register_operand"))
11197 (match_operand:VI_AVX2 2 "nonimmediate_operand")))]
11198 "TARGET_SSE2")
11199
11200 (define_expand "<sse2_avx2>_andnot<mode>3_mask"
11201 [(set (match_operand:VI48_AVX512VL 0 "register_operand")
11202 (vec_merge:VI48_AVX512VL
11203 (and:VI48_AVX512VL
11204 (not:VI48_AVX512VL
11205 (match_operand:VI48_AVX512VL 1 "register_operand"))
11206 (match_operand:VI48_AVX512VL 2 "nonimmediate_operand"))
11207 (match_operand:VI48_AVX512VL 3 "vector_move_operand")
11208 (match_operand:<avx512fmaskmode> 4 "register_operand")))]
11209 "TARGET_AVX512F")
11210
11211 (define_expand "<sse2_avx2>_andnot<mode>3_mask"
11212 [(set (match_operand:VI12_AVX512VL 0 "register_operand")
11213 (vec_merge:VI12_AVX512VL
11214 (and:VI12_AVX512VL
11215 (not:VI12_AVX512VL
11216 (match_operand:VI12_AVX512VL 1 "register_operand"))
11217 (match_operand:VI12_AVX512VL 2 "nonimmediate_operand"))
11218 (match_operand:VI12_AVX512VL 3 "vector_move_operand")
11219 (match_operand:<avx512fmaskmode> 4 "register_operand")))]
11220 "TARGET_AVX512BW")
11221
11222 (define_insn "*andnot<mode>3"
11223 [(set (match_operand:VI 0 "register_operand" "=x,v")
11224 (and:VI
11225 (not:VI (match_operand:VI 1 "register_operand" "0,v"))
11226 (match_operand:VI 2 "nonimmediate_operand" "xm,vm")))]
11227 "TARGET_SSE"
11228 {
11229 static char buf[64];
11230 const char *ops;
11231 const char *tmp;
11232
11233 switch (get_attr_mode (insn))
11234 {
11235 case MODE_XI:
11236 gcc_assert (TARGET_AVX512F);
11237 case MODE_OI:
11238 gcc_assert (TARGET_AVX2 || TARGET_AVX512VL);
11239 case MODE_TI:
11240 gcc_assert (TARGET_SSE2 || TARGET_AVX512VL);
11241 switch (<MODE>mode)
11242 {
11243 case V16SImode:
11244 case V8DImode:
11245 if (TARGET_AVX512F)
11246 {
11247 tmp = "pandn<ssemodesuffix>";
11248 break;
11249 }
11250 case V8SImode:
11251 case V4DImode:
11252 case V4SImode:
11253 case V2DImode:
11254 if (TARGET_AVX512VL)
11255 {
11256 tmp = "pandn<ssemodesuffix>";
11257 break;
11258 }
11259 default:
11260 tmp = TARGET_AVX512VL ? "pandnq" : "pandn";
11261 }
11262 break;
11263
11264 case MODE_V16SF:
11265 gcc_assert (TARGET_AVX512F);
11266 case MODE_V8SF:
11267 gcc_assert (TARGET_AVX);
11268 case MODE_V4SF:
11269 gcc_assert (TARGET_SSE);
11270
11271 tmp = "andnps";
11272 break;
11273
11274 default:
11275 gcc_unreachable ();
11276 }
11277
11278 switch (which_alternative)
11279 {
11280 case 0:
11281 ops = "%s\t{%%2, %%0|%%0, %%2}";
11282 break;
11283 case 1:
11284 ops = "v%s\t{%%2, %%1, %%0<mask_operand3_1>|%%0<mask_operand3_1>, %%1, %%2}";
11285 break;
11286 default:
11287 gcc_unreachable ();
11288 }
11289
11290 snprintf (buf, sizeof (buf), ops, tmp);
11291 return buf;
11292 }
11293 [(set_attr "isa" "noavx,avx")
11294 (set_attr "type" "sselog")
11295 (set (attr "prefix_data16")
11296 (if_then_else
11297 (and (eq_attr "alternative" "0")
11298 (eq_attr "mode" "TI"))
11299 (const_string "1")
11300 (const_string "*")))
11301 (set_attr "prefix" "orig,vex")
11302 (set (attr "mode")
11303 (cond [(and (match_test "<MODE_SIZE> == 16")
11304 (match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL"))
11305 (const_string "<ssePSmode>")
11306 (match_test "TARGET_AVX2")
11307 (const_string "<sseinsnmode>")
11308 (match_test "TARGET_AVX")
11309 (if_then_else
11310 (match_test "<MODE_SIZE> > 16")
11311 (const_string "V8SF")
11312 (const_string "<sseinsnmode>"))
11313 (ior (not (match_test "TARGET_SSE2"))
11314 (match_test "optimize_function_for_size_p (cfun)"))
11315 (const_string "V4SF")
11316 ]
11317 (const_string "<sseinsnmode>")))])
11318
11319 (define_insn "*andnot<mode>3_mask"
11320 [(set (match_operand:VI48_AVX512VL 0 "register_operand" "=v")
11321 (vec_merge:VI48_AVX512VL
11322 (and:VI48_AVX512VL
11323 (not:VI48_AVX512VL
11324 (match_operand:VI48_AVX512VL 1 "register_operand" "v"))
11325 (match_operand:VI48_AVX512VL 2 "nonimmediate_operand" "vm"))
11326 (match_operand:VI48_AVX512VL 3 "vector_move_operand" "0C")
11327 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")))]
11328 "TARGET_AVX512F"
11329 "vpandn<ssemodesuffix>\t{%2, %1, %0%{%4%}%N3|%0%{%4%}%N3, %1, %2}";
11330 [(set_attr "type" "sselog")
11331 (set_attr "prefix" "evex")
11332 (set_attr "mode" "<sseinsnmode>")])
11333
11334 (define_insn "*andnot<mode>3_mask"
11335 [(set (match_operand:VI12_AVX512VL 0 "register_operand" "=v")
11336 (vec_merge:VI12_AVX512VL
11337 (and:VI12_AVX512VL
11338 (not:VI12_AVX512VL
11339 (match_operand:VI12_AVX512VL 1 "register_operand" "v"))
11340 (match_operand:VI12_AVX512VL 2 "nonimmediate_operand" "vm"))
11341 (match_operand:VI12_AVX512VL 3 "vector_move_operand" "0C")
11342 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")))]
11343 "TARGET_AVX512BW"
11344 "vpandn<ssemodesuffix>\t{%2, %1, %0%{%4%}%N3|%0%{%4%}%N3, %1, %2}";
11345 [(set_attr "type" "sselog")
11346 (set_attr "prefix" "evex")
11347 (set_attr "mode" "<sseinsnmode>")])
11348
11349 (define_expand "<code><mode>3"
11350 [(set (match_operand:VI 0 "register_operand")
11351 (any_logic:VI
11352 (match_operand:VI 1 "nonimmediate_or_const_vector_operand")
11353 (match_operand:VI 2 "nonimmediate_or_const_vector_operand")))]
11354 "TARGET_SSE"
11355 {
11356 ix86_expand_vector_logical_operator (<CODE>, <MODE>mode, operands);
11357 DONE;
11358 })
11359
11360 (define_insn "<mask_codefor><code><mode>3<mask_name>"
11361 [(set (match_operand:VI48_AVX_AVX512F 0 "register_operand" "=x,v")
11362 (any_logic:VI48_AVX_AVX512F
11363 (match_operand:VI48_AVX_AVX512F 1 "nonimmediate_operand" "%0,v")
11364 (match_operand:VI48_AVX_AVX512F 2 "nonimmediate_operand" "xm,vm")))]
11365 "TARGET_SSE && <mask_mode512bit_condition>
11366 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
11367 {
11368 static char buf[64];
11369 const char *ops;
11370 const char *tmp;
11371
11372 switch (get_attr_mode (insn))
11373 {
11374 case MODE_XI:
11375 gcc_assert (TARGET_AVX512F);
11376 case MODE_OI:
11377 gcc_assert (TARGET_AVX2 || TARGET_AVX512VL);
11378 case MODE_TI:
11379 gcc_assert (TARGET_SSE2 || TARGET_AVX512VL);
11380 switch (<MODE>mode)
11381 {
11382 case V16SImode:
11383 case V8DImode:
11384 if (TARGET_AVX512F)
11385 {
11386 tmp = "p<logic><ssemodesuffix>";
11387 break;
11388 }
11389 case V8SImode:
11390 case V4DImode:
11391 case V4SImode:
11392 case V2DImode:
11393 tmp = TARGET_AVX512VL ? "p<logic><ssemodesuffix>" : "p<logic>";
11394 break;
11395 default:
11396 gcc_unreachable ();
11397 }
11398 break;
11399
11400 case MODE_V8SF:
11401 gcc_assert (TARGET_AVX);
11402 case MODE_V4SF:
11403 gcc_assert (TARGET_SSE);
11404 gcc_assert (!<mask_applied>);
11405 tmp = "<logic>ps";
11406 break;
11407
11408 default:
11409 gcc_unreachable ();
11410 }
11411
11412 switch (which_alternative)
11413 {
11414 case 0:
11415 if (<mask_applied>)
11416 ops = "v%s\t{%%2, %%0, %%0<mask_operand3_1>|%%0<mask_operand3_1>, %%0, %%2}";
11417 else
11418 ops = "%s\t{%%2, %%0|%%0, %%2}";
11419 break;
11420 case 1:
11421 ops = "v%s\t{%%2, %%1, %%0<mask_operand3_1>|%%0<mask_operand3_1>, %%1, %%2}";
11422 break;
11423 default:
11424 gcc_unreachable ();
11425 }
11426
11427 snprintf (buf, sizeof (buf), ops, tmp);
11428 return buf;
11429 }
11430 [(set_attr "isa" "noavx,avx")
11431 (set_attr "type" "sselog")
11432 (set (attr "prefix_data16")
11433 (if_then_else
11434 (and (eq_attr "alternative" "0")
11435 (eq_attr "mode" "TI"))
11436 (const_string "1")
11437 (const_string "*")))
11438 (set_attr "prefix" "<mask_prefix3>")
11439 (set (attr "mode")
11440 (cond [(and (match_test "<MODE_SIZE> == 16")
11441 (match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL"))
11442 (const_string "<ssePSmode>")
11443 (match_test "TARGET_AVX2")
11444 (const_string "<sseinsnmode>")
11445 (match_test "TARGET_AVX")
11446 (if_then_else
11447 (match_test "<MODE_SIZE> > 16")
11448 (const_string "V8SF")
11449 (const_string "<sseinsnmode>"))
11450 (ior (not (match_test "TARGET_SSE2"))
11451 (match_test "optimize_function_for_size_p (cfun)"))
11452 (const_string "V4SF")
11453 ]
11454 (const_string "<sseinsnmode>")))])
11455
11456 (define_insn "*<code><mode>3"
11457 [(set (match_operand:VI12_AVX_AVX512F 0 "register_operand" "=x,v")
11458 (any_logic: VI12_AVX_AVX512F
11459 (match_operand:VI12_AVX_AVX512F 1 "nonimmediate_operand" "%0,v")
11460 (match_operand:VI12_AVX_AVX512F 2 "nonimmediate_operand" "xm,vm")))]
11461 "TARGET_SSE && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
11462 {
11463 static char buf[64];
11464 const char *ops;
11465 const char *tmp;
11466 const char *ssesuffix;
11467
11468 switch (get_attr_mode (insn))
11469 {
11470 case MODE_XI:
11471 gcc_assert (TARGET_AVX512F);
11472 case MODE_OI:
11473 gcc_assert (TARGET_AVX2 || TARGET_AVX512VL);
11474 case MODE_TI:
11475 gcc_assert (TARGET_SSE2 || TARGET_AVX512VL);
11476 switch (<MODE>mode)
11477 {
11478 case V64QImode:
11479 case V32HImode:
11480 if (TARGET_AVX512F)
11481 {
11482 tmp = "p<logic>";
11483 ssesuffix = "q";
11484 break;
11485 }
11486 case V32QImode:
11487 case V16HImode:
11488 case V16QImode:
11489 case V8HImode:
11490 if (TARGET_AVX512VL || TARGET_AVX2 || TARGET_SSE2)
11491 {
11492 tmp = "p<logic>";
11493 ssesuffix = TARGET_AVX512VL ? "q" : "";
11494 break;
11495 }
11496 default:
11497 gcc_unreachable ();
11498 }
11499 break;
11500
11501 case MODE_V8SF:
11502 gcc_assert (TARGET_AVX);
11503 case MODE_V4SF:
11504 gcc_assert (TARGET_SSE);
11505 tmp = "<logic>ps";
11506 ssesuffix = "";
11507 break;
11508
11509 default:
11510 gcc_unreachable ();
11511 }
11512
11513 switch (which_alternative)
11514 {
11515 case 0:
11516 ops = "%s\t{%%2, %%0|%%0, %%2}";
11517 snprintf (buf, sizeof (buf), ops, tmp);
11518 break;
11519 case 1:
11520 ops = "v%s%s\t{%%2, %%1, %%0|%%0, %%1, %%2}";
11521 snprintf (buf, sizeof (buf), ops, tmp, ssesuffix);
11522 break;
11523 default:
11524 gcc_unreachable ();
11525 }
11526
11527 return buf;
11528 }
11529 [(set_attr "isa" "noavx,avx")
11530 (set_attr "type" "sselog")
11531 (set (attr "prefix_data16")
11532 (if_then_else
11533 (and (eq_attr "alternative" "0")
11534 (eq_attr "mode" "TI"))
11535 (const_string "1")
11536 (const_string "*")))
11537 (set_attr "prefix" "<mask_prefix3>")
11538 (set (attr "mode")
11539 (cond [(and (match_test "<MODE_SIZE> == 16")
11540 (match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL"))
11541 (const_string "<ssePSmode>")
11542 (match_test "TARGET_AVX2")
11543 (const_string "<sseinsnmode>")
11544 (match_test "TARGET_AVX")
11545 (if_then_else
11546 (match_test "<MODE_SIZE> > 16")
11547 (const_string "V8SF")
11548 (const_string "<sseinsnmode>"))
11549 (ior (not (match_test "TARGET_SSE2"))
11550 (match_test "optimize_function_for_size_p (cfun)"))
11551 (const_string "V4SF")
11552 ]
11553 (const_string "<sseinsnmode>")))])
11554
11555 (define_insn "<avx512>_testm<mode>3<mask_scalar_merge_name>"
11556 [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=Yk")
11557 (unspec:<avx512fmaskmode>
11558 [(match_operand:VI12_AVX512VL 1 "register_operand" "v")
11559 (match_operand:VI12_AVX512VL 2 "nonimmediate_operand" "vm")]
11560 UNSPEC_TESTM))]
11561 "TARGET_AVX512BW"
11562 "vptestm<ssemodesuffix>\t{%2, %1, %0<mask_scalar_merge_operand3>|%0<mask_scalar_merge_operand3>, %1, %2}"
11563 [(set_attr "prefix" "evex")
11564 (set_attr "mode" "<sseinsnmode>")])
11565
11566 (define_insn "<avx512>_testm<mode>3<mask_scalar_merge_name>"
11567 [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=Yk")
11568 (unspec:<avx512fmaskmode>
11569 [(match_operand:VI48_AVX512VL 1 "register_operand" "v")
11570 (match_operand:VI48_AVX512VL 2 "nonimmediate_operand" "vm")]
11571 UNSPEC_TESTM))]
11572 "TARGET_AVX512F"
11573 "vptestm<ssemodesuffix>\t{%2, %1, %0<mask_scalar_merge_operand3>|%0<mask_scalar_merge_operand3>, %1, %2}"
11574 [(set_attr "prefix" "evex")
11575 (set_attr "mode" "<sseinsnmode>")])
11576
11577 (define_insn "<avx512>_testnm<mode>3<mask_scalar_merge_name>"
11578 [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=Yk")
11579 (unspec:<avx512fmaskmode>
11580 [(match_operand:VI12_AVX512VL 1 "register_operand" "v")
11581 (match_operand:VI12_AVX512VL 2 "nonimmediate_operand" "vm")]
11582 UNSPEC_TESTNM))]
11583 "TARGET_AVX512BW"
11584 "vptestnm<ssemodesuffix>\t{%2, %1, %0<mask_scalar_merge_operand3>|%0<mask_scalar_merge_operand3>, %1, %2}"
11585 [(set_attr "prefix" "evex")
11586 (set_attr "mode" "<sseinsnmode>")])
11587
11588 (define_insn "<avx512>_testnm<mode>3<mask_scalar_merge_name>"
11589 [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=Yk")
11590 (unspec:<avx512fmaskmode>
11591 [(match_operand:VI48_AVX512VL 1 "register_operand" "v")
11592 (match_operand:VI48_AVX512VL 2 "nonimmediate_operand" "vm")]
11593 UNSPEC_TESTNM))]
11594 "TARGET_AVX512F"
11595 "vptestnm<ssemodesuffix>\t{%2, %1, %0<mask_scalar_merge_operand3>|%0<mask_scalar_merge_operand3>, %1, %2}"
11596 [(set_attr "prefix" "evex")
11597 (set_attr "mode" "<sseinsnmode>")])
11598
11599 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
11600 ;;
11601 ;; Parallel integral element swizzling
11602 ;;
11603 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
11604
11605 (define_expand "vec_pack_trunc_<mode>"
11606 [(match_operand:<ssepackmode> 0 "register_operand")
11607 (match_operand:VI248_AVX2_8_AVX512F_24_AVX512BW 1 "register_operand")
11608 (match_operand:VI248_AVX2_8_AVX512F_24_AVX512BW 2 "register_operand")]
11609 "TARGET_SSE2"
11610 {
11611 rtx op1 = gen_lowpart (<ssepackmode>mode, operands[1]);
11612 rtx op2 = gen_lowpart (<ssepackmode>mode, operands[2]);
11613 ix86_expand_vec_extract_even_odd (operands[0], op1, op2, 0);
11614 DONE;
11615 })
11616
11617 (define_expand "vec_pack_trunc_qi"
11618 [(set (match_operand:HI 0 ("register_operand"))
11619 (ior:HI (ashift:HI (zero_extend:HI (match_operand:QI 1 ("register_operand")))
11620 (const_int 8))
11621 (zero_extend:HI (match_operand:QI 2 ("register_operand")))))]
11622 "TARGET_AVX512F")
11623
11624 (define_expand "vec_pack_trunc_<mode>"
11625 [(set (match_operand:<DOUBLEMASKMODE> 0 ("register_operand"))
11626 (ior:<DOUBLEMASKMODE> (ashift:<DOUBLEMASKMODE> (zero_extend:<DOUBLEMASKMODE> (match_operand:SWI24 1 ("register_operand")))
11627 (match_dup 3))
11628 (zero_extend:<DOUBLEMASKMODE> (match_operand:SWI24 2 ("register_operand")))))]
11629 "TARGET_AVX512BW"
11630 {
11631 operands[3] = GEN_INT (GET_MODE_BITSIZE (<MODE>mode));
11632 })
11633
11634 (define_insn "<sse2_avx2>_packsswb<mask_name>"
11635 [(set (match_operand:VI1_AVX512 0 "register_operand" "=x,x")
11636 (vec_concat:VI1_AVX512
11637 (ss_truncate:<ssehalfvecmode>
11638 (match_operand:<sseunpackmode> 1 "register_operand" "0,v"))
11639 (ss_truncate:<ssehalfvecmode>
11640 (match_operand:<sseunpackmode> 2 "nonimmediate_operand" "xm,vm"))))]
11641 "TARGET_SSE2 && <mask_mode512bit_condition> && <mask_avx512bw_condition>"
11642 "@
11643 packsswb\t{%2, %0|%0, %2}
11644 vpacksswb\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
11645 [(set_attr "isa" "noavx,avx")
11646 (set_attr "type" "sselog")
11647 (set_attr "prefix_data16" "1,*")
11648 (set_attr "prefix" "orig,maybe_evex")
11649 (set_attr "mode" "<sseinsnmode>")])
11650
11651 (define_insn "<sse2_avx2>_packssdw<mask_name>"
11652 [(set (match_operand:VI2_AVX2 0 "register_operand" "=x,v")
11653 (vec_concat:VI2_AVX2
11654 (ss_truncate:<ssehalfvecmode>
11655 (match_operand:<sseunpackmode> 1 "register_operand" "0,v"))
11656 (ss_truncate:<ssehalfvecmode>
11657 (match_operand:<sseunpackmode> 2 "nonimmediate_operand" "xm,vm"))))]
11658 "TARGET_SSE2 && <mask_mode512bit_condition> && <mask_avx512bw_condition>"
11659 "@
11660 packssdw\t{%2, %0|%0, %2}
11661 vpackssdw\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
11662 [(set_attr "isa" "noavx,avx")
11663 (set_attr "type" "sselog")
11664 (set_attr "prefix_data16" "1,*")
11665 (set_attr "prefix" "orig,vex")
11666 (set_attr "mode" "<sseinsnmode>")])
11667
11668 (define_insn "<sse2_avx2>_packuswb<mask_name>"
11669 [(set (match_operand:VI1_AVX512 0 "register_operand" "=x,x")
11670 (vec_concat:VI1_AVX512
11671 (us_truncate:<ssehalfvecmode>
11672 (match_operand:<sseunpackmode> 1 "register_operand" "0,v"))
11673 (us_truncate:<ssehalfvecmode>
11674 (match_operand:<sseunpackmode> 2 "nonimmediate_operand" "xm,vm"))))]
11675 "TARGET_SSE2 && <mask_mode512bit_condition> && <mask_avx512bw_condition>"
11676 "@
11677 packuswb\t{%2, %0|%0, %2}
11678 vpackuswb\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
11679 [(set_attr "isa" "noavx,avx")
11680 (set_attr "type" "sselog")
11681 (set_attr "prefix_data16" "1,*")
11682 (set_attr "prefix" "orig,vex")
11683 (set_attr "mode" "<sseinsnmode>")])
11684
11685 (define_insn "avx512bw_interleave_highv64qi<mask_name>"
11686 [(set (match_operand:V64QI 0 "register_operand" "=v")
11687 (vec_select:V64QI
11688 (vec_concat:V128QI
11689 (match_operand:V64QI 1 "register_operand" "v")
11690 (match_operand:V64QI 2 "nonimmediate_operand" "vm"))
11691 (parallel [(const_int 8) (const_int 72)
11692 (const_int 9) (const_int 73)
11693 (const_int 10) (const_int 74)
11694 (const_int 11) (const_int 75)
11695 (const_int 12) (const_int 76)
11696 (const_int 13) (const_int 77)
11697 (const_int 14) (const_int 78)
11698 (const_int 15) (const_int 79)
11699 (const_int 24) (const_int 88)
11700 (const_int 25) (const_int 89)
11701 (const_int 26) (const_int 90)
11702 (const_int 27) (const_int 91)
11703 (const_int 28) (const_int 92)
11704 (const_int 29) (const_int 93)
11705 (const_int 30) (const_int 94)
11706 (const_int 31) (const_int 95)
11707 (const_int 40) (const_int 104)
11708 (const_int 41) (const_int 105)
11709 (const_int 42) (const_int 106)
11710 (const_int 43) (const_int 107)
11711 (const_int 44) (const_int 108)
11712 (const_int 45) (const_int 109)
11713 (const_int 46) (const_int 110)
11714 (const_int 47) (const_int 111)
11715 (const_int 56) (const_int 120)
11716 (const_int 57) (const_int 121)
11717 (const_int 58) (const_int 122)
11718 (const_int 59) (const_int 123)
11719 (const_int 60) (const_int 124)
11720 (const_int 61) (const_int 125)
11721 (const_int 62) (const_int 126)
11722 (const_int 63) (const_int 127)])))]
11723 "TARGET_AVX512BW"
11724 "vpunpckhbw\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
11725 [(set_attr "type" "sselog")
11726 (set_attr "prefix" "evex")
11727 (set_attr "mode" "XI")])
11728
11729 (define_insn "avx2_interleave_highv32qi<mask_name>"
11730 [(set (match_operand:V32QI 0 "register_operand" "=v")
11731 (vec_select:V32QI
11732 (vec_concat:V64QI
11733 (match_operand:V32QI 1 "register_operand" "v")
11734 (match_operand:V32QI 2 "nonimmediate_operand" "vm"))
11735 (parallel [(const_int 8) (const_int 40)
11736 (const_int 9) (const_int 41)
11737 (const_int 10) (const_int 42)
11738 (const_int 11) (const_int 43)
11739 (const_int 12) (const_int 44)
11740 (const_int 13) (const_int 45)
11741 (const_int 14) (const_int 46)
11742 (const_int 15) (const_int 47)
11743 (const_int 24) (const_int 56)
11744 (const_int 25) (const_int 57)
11745 (const_int 26) (const_int 58)
11746 (const_int 27) (const_int 59)
11747 (const_int 28) (const_int 60)
11748 (const_int 29) (const_int 61)
11749 (const_int 30) (const_int 62)
11750 (const_int 31) (const_int 63)])))]
11751 "TARGET_AVX2 && <mask_avx512vl_condition>"
11752 "vpunpckhbw\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
11753 [(set_attr "type" "sselog")
11754 (set_attr "prefix" "<mask_prefix>")
11755 (set_attr "mode" "OI")])
11756
11757 (define_insn "vec_interleave_highv16qi<mask_name>"
11758 [(set (match_operand:V16QI 0 "register_operand" "=x,v")
11759 (vec_select:V16QI
11760 (vec_concat:V32QI
11761 (match_operand:V16QI 1 "register_operand" "0,v")
11762 (match_operand:V16QI 2 "nonimmediate_operand" "xm,vm"))
11763 (parallel [(const_int 8) (const_int 24)
11764 (const_int 9) (const_int 25)
11765 (const_int 10) (const_int 26)
11766 (const_int 11) (const_int 27)
11767 (const_int 12) (const_int 28)
11768 (const_int 13) (const_int 29)
11769 (const_int 14) (const_int 30)
11770 (const_int 15) (const_int 31)])))]
11771 "TARGET_SSE2 && <mask_avx512vl_condition>"
11772 "@
11773 punpckhbw\t{%2, %0|%0, %2}
11774 vpunpckhbw\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
11775 [(set_attr "isa" "noavx,avx")
11776 (set_attr "type" "sselog")
11777 (set_attr "prefix_data16" "1,*")
11778 (set_attr "prefix" "orig,<mask_prefix>")
11779 (set_attr "mode" "TI")])
11780
11781 (define_insn "avx512bw_interleave_lowv64qi<mask_name>"
11782 [(set (match_operand:V64QI 0 "register_operand" "=v")
11783 (vec_select:V64QI
11784 (vec_concat:V128QI
11785 (match_operand:V64QI 1 "register_operand" "v")
11786 (match_operand:V64QI 2 "nonimmediate_operand" "vm"))
11787 (parallel [(const_int 0) (const_int 64)
11788 (const_int 1) (const_int 65)
11789 (const_int 2) (const_int 66)
11790 (const_int 3) (const_int 67)
11791 (const_int 4) (const_int 68)
11792 (const_int 5) (const_int 69)
11793 (const_int 6) (const_int 70)
11794 (const_int 7) (const_int 71)
11795 (const_int 16) (const_int 80)
11796 (const_int 17) (const_int 81)
11797 (const_int 18) (const_int 82)
11798 (const_int 19) (const_int 83)
11799 (const_int 20) (const_int 84)
11800 (const_int 21) (const_int 85)
11801 (const_int 22) (const_int 86)
11802 (const_int 23) (const_int 87)
11803 (const_int 32) (const_int 96)
11804 (const_int 33) (const_int 97)
11805 (const_int 34) (const_int 98)
11806 (const_int 35) (const_int 99)
11807 (const_int 36) (const_int 100)
11808 (const_int 37) (const_int 101)
11809 (const_int 38) (const_int 102)
11810 (const_int 39) (const_int 103)
11811 (const_int 48) (const_int 112)
11812 (const_int 49) (const_int 113)
11813 (const_int 50) (const_int 114)
11814 (const_int 51) (const_int 115)
11815 (const_int 52) (const_int 116)
11816 (const_int 53) (const_int 117)
11817 (const_int 54) (const_int 118)
11818 (const_int 55) (const_int 119)])))]
11819 "TARGET_AVX512BW"
11820 "vpunpcklbw\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
11821 [(set_attr "type" "sselog")
11822 (set_attr "prefix" "evex")
11823 (set_attr "mode" "XI")])
11824
11825 (define_insn "avx2_interleave_lowv32qi<mask_name>"
11826 [(set (match_operand:V32QI 0 "register_operand" "=v")
11827 (vec_select:V32QI
11828 (vec_concat:V64QI
11829 (match_operand:V32QI 1 "register_operand" "v")
11830 (match_operand:V32QI 2 "nonimmediate_operand" "vm"))
11831 (parallel [(const_int 0) (const_int 32)
11832 (const_int 1) (const_int 33)
11833 (const_int 2) (const_int 34)
11834 (const_int 3) (const_int 35)
11835 (const_int 4) (const_int 36)
11836 (const_int 5) (const_int 37)
11837 (const_int 6) (const_int 38)
11838 (const_int 7) (const_int 39)
11839 (const_int 16) (const_int 48)
11840 (const_int 17) (const_int 49)
11841 (const_int 18) (const_int 50)
11842 (const_int 19) (const_int 51)
11843 (const_int 20) (const_int 52)
11844 (const_int 21) (const_int 53)
11845 (const_int 22) (const_int 54)
11846 (const_int 23) (const_int 55)])))]
11847 "TARGET_AVX2 && <mask_avx512vl_condition> && <mask_avx512bw_condition>"
11848 "vpunpcklbw\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
11849 [(set_attr "type" "sselog")
11850 (set_attr "prefix" "maybe_vex")
11851 (set_attr "mode" "OI")])
11852
11853 (define_insn "vec_interleave_lowv16qi<mask_name>"
11854 [(set (match_operand:V16QI 0 "register_operand" "=x,v")
11855 (vec_select:V16QI
11856 (vec_concat:V32QI
11857 (match_operand:V16QI 1 "register_operand" "0,v")
11858 (match_operand:V16QI 2 "nonimmediate_operand" "xm,vm"))
11859 (parallel [(const_int 0) (const_int 16)
11860 (const_int 1) (const_int 17)
11861 (const_int 2) (const_int 18)
11862 (const_int 3) (const_int 19)
11863 (const_int 4) (const_int 20)
11864 (const_int 5) (const_int 21)
11865 (const_int 6) (const_int 22)
11866 (const_int 7) (const_int 23)])))]
11867 "TARGET_SSE2 && <mask_avx512vl_condition> && <mask_avx512bw_condition>"
11868 "@
11869 punpcklbw\t{%2, %0|%0, %2}
11870 vpunpcklbw\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
11871 [(set_attr "isa" "noavx,avx")
11872 (set_attr "type" "sselog")
11873 (set_attr "prefix_data16" "1,*")
11874 (set_attr "prefix" "orig,vex")
11875 (set_attr "mode" "TI")])
11876
11877 (define_insn "avx512bw_interleave_highv32hi<mask_name>"
11878 [(set (match_operand:V32HI 0 "register_operand" "=v")
11879 (vec_select:V32HI
11880 (vec_concat:V64HI
11881 (match_operand:V32HI 1 "register_operand" "v")
11882 (match_operand:V32HI 2 "nonimmediate_operand" "vm"))
11883 (parallel [(const_int 4) (const_int 36)
11884 (const_int 5) (const_int 37)
11885 (const_int 6) (const_int 38)
11886 (const_int 7) (const_int 39)
11887 (const_int 12) (const_int 44)
11888 (const_int 13) (const_int 45)
11889 (const_int 14) (const_int 46)
11890 (const_int 15) (const_int 47)
11891 (const_int 20) (const_int 52)
11892 (const_int 21) (const_int 53)
11893 (const_int 22) (const_int 54)
11894 (const_int 23) (const_int 55)
11895 (const_int 28) (const_int 60)
11896 (const_int 29) (const_int 61)
11897 (const_int 30) (const_int 62)
11898 (const_int 31) (const_int 63)])))]
11899 "TARGET_AVX512BW"
11900 "vpunpckhwd\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
11901 [(set_attr "type" "sselog")
11902 (set_attr "prefix" "evex")
11903 (set_attr "mode" "XI")])
11904
11905 (define_insn "avx2_interleave_highv16hi<mask_name>"
11906 [(set (match_operand:V16HI 0 "register_operand" "=v")
11907 (vec_select:V16HI
11908 (vec_concat:V32HI
11909 (match_operand:V16HI 1 "register_operand" "v")
11910 (match_operand:V16HI 2 "nonimmediate_operand" "vm"))
11911 (parallel [(const_int 4) (const_int 20)
11912 (const_int 5) (const_int 21)
11913 (const_int 6) (const_int 22)
11914 (const_int 7) (const_int 23)
11915 (const_int 12) (const_int 28)
11916 (const_int 13) (const_int 29)
11917 (const_int 14) (const_int 30)
11918 (const_int 15) (const_int 31)])))]
11919 "TARGET_AVX2 && <mask_avx512vl_condition> && <mask_avx512bw_condition>"
11920 "vpunpckhwd\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
11921 [(set_attr "type" "sselog")
11922 (set_attr "prefix" "maybe_evex")
11923 (set_attr "mode" "OI")])
11924
11925 (define_insn "vec_interleave_highv8hi<mask_name>"
11926 [(set (match_operand:V8HI 0 "register_operand" "=x,v")
11927 (vec_select:V8HI
11928 (vec_concat:V16HI
11929 (match_operand:V8HI 1 "register_operand" "0,v")
11930 (match_operand:V8HI 2 "nonimmediate_operand" "xm,vm"))
11931 (parallel [(const_int 4) (const_int 12)
11932 (const_int 5) (const_int 13)
11933 (const_int 6) (const_int 14)
11934 (const_int 7) (const_int 15)])))]
11935 "TARGET_SSE2 && <mask_avx512vl_condition> && <mask_avx512bw_condition>"
11936 "@
11937 punpckhwd\t{%2, %0|%0, %2}
11938 vpunpckhwd\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
11939 [(set_attr "isa" "noavx,avx")
11940 (set_attr "type" "sselog")
11941 (set_attr "prefix_data16" "1,*")
11942 (set_attr "prefix" "orig,maybe_vex")
11943 (set_attr "mode" "TI")])
11944
11945 (define_insn "<mask_codefor>avx512bw_interleave_lowv32hi<mask_name>"
11946 [(set (match_operand:V32HI 0 "register_operand" "=v")
11947 (vec_select:V32HI
11948 (vec_concat:V64HI
11949 (match_operand:V32HI 1 "register_operand" "v")
11950 (match_operand:V32HI 2 "nonimmediate_operand" "vm"))
11951 (parallel [(const_int 0) (const_int 32)
11952 (const_int 1) (const_int 33)
11953 (const_int 2) (const_int 34)
11954 (const_int 3) (const_int 35)
11955 (const_int 8) (const_int 40)
11956 (const_int 9) (const_int 41)
11957 (const_int 10) (const_int 42)
11958 (const_int 11) (const_int 43)
11959 (const_int 16) (const_int 48)
11960 (const_int 17) (const_int 49)
11961 (const_int 18) (const_int 50)
11962 (const_int 19) (const_int 51)
11963 (const_int 24) (const_int 56)
11964 (const_int 25) (const_int 57)
11965 (const_int 26) (const_int 58)
11966 (const_int 27) (const_int 59)])))]
11967 "TARGET_AVX512BW"
11968 "vpunpcklwd\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
11969 [(set_attr "type" "sselog")
11970 (set_attr "prefix" "evex")
11971 (set_attr "mode" "XI")])
11972
11973 (define_insn "avx2_interleave_lowv16hi<mask_name>"
11974 [(set (match_operand:V16HI 0 "register_operand" "=v")
11975 (vec_select:V16HI
11976 (vec_concat:V32HI
11977 (match_operand:V16HI 1 "register_operand" "v")
11978 (match_operand:V16HI 2 "nonimmediate_operand" "vm"))
11979 (parallel [(const_int 0) (const_int 16)
11980 (const_int 1) (const_int 17)
11981 (const_int 2) (const_int 18)
11982 (const_int 3) (const_int 19)
11983 (const_int 8) (const_int 24)
11984 (const_int 9) (const_int 25)
11985 (const_int 10) (const_int 26)
11986 (const_int 11) (const_int 27)])))]
11987 "TARGET_AVX2 && <mask_avx512vl_condition> && <mask_avx512bw_condition>"
11988 "vpunpcklwd\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
11989 [(set_attr "type" "sselog")
11990 (set_attr "prefix" "maybe_evex")
11991 (set_attr "mode" "OI")])
11992
11993 (define_insn "vec_interleave_lowv8hi<mask_name>"
11994 [(set (match_operand:V8HI 0 "register_operand" "=x,v")
11995 (vec_select:V8HI
11996 (vec_concat:V16HI
11997 (match_operand:V8HI 1 "register_operand" "0,v")
11998 (match_operand:V8HI 2 "nonimmediate_operand" "xm,vm"))
11999 (parallel [(const_int 0) (const_int 8)
12000 (const_int 1) (const_int 9)
12001 (const_int 2) (const_int 10)
12002 (const_int 3) (const_int 11)])))]
12003 "TARGET_SSE2 && <mask_avx512vl_condition> && <mask_avx512bw_condition>"
12004 "@
12005 punpcklwd\t{%2, %0|%0, %2}
12006 vpunpcklwd\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
12007 [(set_attr "isa" "noavx,avx")
12008 (set_attr "type" "sselog")
12009 (set_attr "prefix_data16" "1,*")
12010 (set_attr "prefix" "orig,maybe_evex")
12011 (set_attr "mode" "TI")])
12012
12013 (define_insn "avx2_interleave_highv8si<mask_name>"
12014 [(set (match_operand:V8SI 0 "register_operand" "=v")
12015 (vec_select:V8SI
12016 (vec_concat:V16SI
12017 (match_operand:V8SI 1 "register_operand" "v")
12018 (match_operand:V8SI 2 "nonimmediate_operand" "vm"))
12019 (parallel [(const_int 2) (const_int 10)
12020 (const_int 3) (const_int 11)
12021 (const_int 6) (const_int 14)
12022 (const_int 7) (const_int 15)])))]
12023 "TARGET_AVX2 && <mask_avx512vl_condition>"
12024 "vpunpckhdq\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
12025 [(set_attr "type" "sselog")
12026 (set_attr "prefix" "maybe_evex")
12027 (set_attr "mode" "OI")])
12028
12029 (define_insn "<mask_codefor>avx512f_interleave_highv16si<mask_name>"
12030 [(set (match_operand:V16SI 0 "register_operand" "=v")
12031 (vec_select:V16SI
12032 (vec_concat:V32SI
12033 (match_operand:V16SI 1 "register_operand" "v")
12034 (match_operand:V16SI 2 "nonimmediate_operand" "vm"))
12035 (parallel [(const_int 2) (const_int 18)
12036 (const_int 3) (const_int 19)
12037 (const_int 6) (const_int 22)
12038 (const_int 7) (const_int 23)
12039 (const_int 10) (const_int 26)
12040 (const_int 11) (const_int 27)
12041 (const_int 14) (const_int 30)
12042 (const_int 15) (const_int 31)])))]
12043 "TARGET_AVX512F"
12044 "vpunpckhdq\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
12045 [(set_attr "type" "sselog")
12046 (set_attr "prefix" "evex")
12047 (set_attr "mode" "XI")])
12048
12049
12050 (define_insn "vec_interleave_highv4si<mask_name>"
12051 [(set (match_operand:V4SI 0 "register_operand" "=x,v")
12052 (vec_select:V4SI
12053 (vec_concat:V8SI
12054 (match_operand:V4SI 1 "register_operand" "0,v")
12055 (match_operand:V4SI 2 "nonimmediate_operand" "xm,vm"))
12056 (parallel [(const_int 2) (const_int 6)
12057 (const_int 3) (const_int 7)])))]
12058 "TARGET_SSE2 && <mask_avx512vl_condition>"
12059 "@
12060 punpckhdq\t{%2, %0|%0, %2}
12061 vpunpckhdq\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
12062 [(set_attr "isa" "noavx,avx")
12063 (set_attr "type" "sselog")
12064 (set_attr "prefix_data16" "1,*")
12065 (set_attr "prefix" "orig,maybe_vex")
12066 (set_attr "mode" "TI")])
12067
12068 (define_insn "avx2_interleave_lowv8si<mask_name>"
12069 [(set (match_operand:V8SI 0 "register_operand" "=v")
12070 (vec_select:V8SI
12071 (vec_concat:V16SI
12072 (match_operand:V8SI 1 "register_operand" "v")
12073 (match_operand:V8SI 2 "nonimmediate_operand" "vm"))
12074 (parallel [(const_int 0) (const_int 8)
12075 (const_int 1) (const_int 9)
12076 (const_int 4) (const_int 12)
12077 (const_int 5) (const_int 13)])))]
12078 "TARGET_AVX2 && <mask_avx512vl_condition>"
12079 "vpunpckldq\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
12080 [(set_attr "type" "sselog")
12081 (set_attr "prefix" "maybe_evex")
12082 (set_attr "mode" "OI")])
12083
12084 (define_insn "<mask_codefor>avx512f_interleave_lowv16si<mask_name>"
12085 [(set (match_operand:V16SI 0 "register_operand" "=v")
12086 (vec_select:V16SI
12087 (vec_concat:V32SI
12088 (match_operand:V16SI 1 "register_operand" "v")
12089 (match_operand:V16SI 2 "nonimmediate_operand" "vm"))
12090 (parallel [(const_int 0) (const_int 16)
12091 (const_int 1) (const_int 17)
12092 (const_int 4) (const_int 20)
12093 (const_int 5) (const_int 21)
12094 (const_int 8) (const_int 24)
12095 (const_int 9) (const_int 25)
12096 (const_int 12) (const_int 28)
12097 (const_int 13) (const_int 29)])))]
12098 "TARGET_AVX512F"
12099 "vpunpckldq\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
12100 [(set_attr "type" "sselog")
12101 (set_attr "prefix" "evex")
12102 (set_attr "mode" "XI")])
12103
12104 (define_insn "vec_interleave_lowv4si<mask_name>"
12105 [(set (match_operand:V4SI 0 "register_operand" "=x,v")
12106 (vec_select:V4SI
12107 (vec_concat:V8SI
12108 (match_operand:V4SI 1 "register_operand" "0,v")
12109 (match_operand:V4SI 2 "nonimmediate_operand" "xm,vm"))
12110 (parallel [(const_int 0) (const_int 4)
12111 (const_int 1) (const_int 5)])))]
12112 "TARGET_SSE2 && <mask_avx512vl_condition>"
12113 "@
12114 punpckldq\t{%2, %0|%0, %2}
12115 vpunpckldq\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
12116 [(set_attr "isa" "noavx,avx")
12117 (set_attr "type" "sselog")
12118 (set_attr "prefix_data16" "1,*")
12119 (set_attr "prefix" "orig,vex")
12120 (set_attr "mode" "TI")])
12121
12122 (define_expand "vec_interleave_high<mode>"
12123 [(match_operand:VI_256 0 "register_operand" "=x")
12124 (match_operand:VI_256 1 "register_operand" "x")
12125 (match_operand:VI_256 2 "nonimmediate_operand" "xm")]
12126 "TARGET_AVX2"
12127 {
12128 rtx t1 = gen_reg_rtx (<MODE>mode);
12129 rtx t2 = gen_reg_rtx (<MODE>mode);
12130 rtx t3 = gen_reg_rtx (V4DImode);
12131 emit_insn (gen_avx2_interleave_low<mode> (t1, operands[1], operands[2]));
12132 emit_insn (gen_avx2_interleave_high<mode> (t2, operands[1], operands[2]));
12133 emit_insn (gen_avx2_permv2ti (t3, gen_lowpart (V4DImode, t1),
12134 gen_lowpart (V4DImode, t2),
12135 GEN_INT (1 + (3 << 4))));
12136 emit_move_insn (operands[0], gen_lowpart (<MODE>mode, t3));
12137 DONE;
12138 })
12139
12140 (define_expand "vec_interleave_low<mode>"
12141 [(match_operand:VI_256 0 "register_operand" "=x")
12142 (match_operand:VI_256 1 "register_operand" "x")
12143 (match_operand:VI_256 2 "nonimmediate_operand" "xm")]
12144 "TARGET_AVX2"
12145 {
12146 rtx t1 = gen_reg_rtx (<MODE>mode);
12147 rtx t2 = gen_reg_rtx (<MODE>mode);
12148 rtx t3 = gen_reg_rtx (V4DImode);
12149 emit_insn (gen_avx2_interleave_low<mode> (t1, operands[1], operands[2]));
12150 emit_insn (gen_avx2_interleave_high<mode> (t2, operands[1], operands[2]));
12151 emit_insn (gen_avx2_permv2ti (t3, gen_lowpart (V4DImode, t1),
12152 gen_lowpart (V4DImode, t2),
12153 GEN_INT (0 + (2 << 4))));
12154 emit_move_insn (operands[0], gen_lowpart (<MODE>mode, t3));
12155 DONE;
12156 })
12157
12158 ;; Modes handled by pinsr patterns.
12159 (define_mode_iterator PINSR_MODE
12160 [(V16QI "TARGET_SSE4_1") V8HI
12161 (V4SI "TARGET_SSE4_1")
12162 (V2DI "TARGET_SSE4_1 && TARGET_64BIT")])
12163
12164 (define_mode_attr sse2p4_1
12165 [(V16QI "sse4_1") (V8HI "sse2")
12166 (V4SI "sse4_1") (V2DI "sse4_1")])
12167
12168 ;; sse4_1_pinsrd must come before sse2_loadld since it is preferred.
12169 (define_insn "<sse2p4_1>_pinsr<ssemodesuffix>"
12170 [(set (match_operand:PINSR_MODE 0 "register_operand" "=x,x,x,x")
12171 (vec_merge:PINSR_MODE
12172 (vec_duplicate:PINSR_MODE
12173 (match_operand:<ssescalarmode> 2 "nonimmediate_operand" "r,m,r,m"))
12174 (match_operand:PINSR_MODE 1 "register_operand" "0,0,x,x")
12175 (match_operand:SI 3 "const_int_operand")))]
12176 "TARGET_SSE2
12177 && ((unsigned) exact_log2 (INTVAL (operands[3]))
12178 < GET_MODE_NUNITS (<MODE>mode))"
12179 {
12180 operands[3] = GEN_INT (exact_log2 (INTVAL (operands[3])));
12181
12182 switch (which_alternative)
12183 {
12184 case 0:
12185 if (GET_MODE_SIZE (<ssescalarmode>mode) < GET_MODE_SIZE (SImode))
12186 return "pinsr<ssemodesuffix>\t{%3, %k2, %0|%0, %k2, %3}";
12187 /* FALLTHRU */
12188 case 1:
12189 return "pinsr<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}";
12190 case 2:
12191 if (GET_MODE_SIZE (<ssescalarmode>mode) < GET_MODE_SIZE (SImode))
12192 return "vpinsr<ssemodesuffix>\t{%3, %k2, %1, %0|%0, %1, %k2, %3}";
12193 /* FALLTHRU */
12194 case 3:
12195 return "vpinsr<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}";
12196 default:
12197 gcc_unreachable ();
12198 }
12199 }
12200 [(set_attr "isa" "noavx,noavx,avx,avx")
12201 (set_attr "type" "sselog")
12202 (set (attr "prefix_rex")
12203 (if_then_else
12204 (and (not (match_test "TARGET_AVX"))
12205 (eq (const_string "<MODE>mode") (const_string "V2DImode")))
12206 (const_string "1")
12207 (const_string "*")))
12208 (set (attr "prefix_data16")
12209 (if_then_else
12210 (and (not (match_test "TARGET_AVX"))
12211 (eq (const_string "<MODE>mode") (const_string "V8HImode")))
12212 (const_string "1")
12213 (const_string "*")))
12214 (set (attr "prefix_extra")
12215 (if_then_else
12216 (and (not (match_test "TARGET_AVX"))
12217 (eq (const_string "<MODE>mode") (const_string "V8HImode")))
12218 (const_string "*")
12219 (const_string "1")))
12220 (set_attr "length_immediate" "1")
12221 (set_attr "prefix" "orig,orig,vex,vex")
12222 (set_attr "mode" "TI")])
12223
12224 (define_expand "<extract_type>_vinsert<shuffletype><extract_suf>_mask"
12225 [(match_operand:AVX512_VEC 0 "register_operand")
12226 (match_operand:AVX512_VEC 1 "register_operand")
12227 (match_operand:<ssequartermode> 2 "nonimmediate_operand")
12228 (match_operand:SI 3 "const_0_to_3_operand")
12229 (match_operand:AVX512_VEC 4 "register_operand")
12230 (match_operand:<avx512fmaskmode> 5 "register_operand")]
12231 "TARGET_AVX512F"
12232 {
12233 int mask,selector;
12234 mask = INTVAL (operands[3]);
12235 selector = GET_MODE_UNIT_SIZE (<MODE>mode) == 4 ?
12236 0xFFFF ^ (0xF000 >> mask * 4)
12237 : 0xFF ^ (0xC0 >> mask * 2);
12238 emit_insn (gen_<extract_type>_vinsert<shuffletype><extract_suf>_1_mask
12239 (operands[0], operands[1], operands[2], GEN_INT (selector),
12240 operands[4], operands[5]));
12241 DONE;
12242 })
12243
12244 (define_insn "<mask_codefor><extract_type>_vinsert<shuffletype><extract_suf>_1<mask_name>"
12245 [(set (match_operand:AVX512_VEC 0 "register_operand" "=v")
12246 (vec_merge:AVX512_VEC
12247 (match_operand:AVX512_VEC 1 "register_operand" "v")
12248 (vec_duplicate:AVX512_VEC
12249 (match_operand:<ssequartermode> 2 "nonimmediate_operand" "vm"))
12250 (match_operand:SI 3 "const_int_operand" "n")))]
12251 "TARGET_AVX512F"
12252 {
12253 int mask;
12254 int selector = INTVAL (operands[3]);
12255
12256 if (selector == 0xFFF || selector == 0x3F)
12257 mask = 0;
12258 else if ( selector == 0xF0FF || selector == 0xCF)
12259 mask = 1;
12260 else if ( selector == 0xFF0F || selector == 0xF3)
12261 mask = 2;
12262 else if ( selector == 0xFFF0 || selector == 0xFC)
12263 mask = 3;
12264 else
12265 gcc_unreachable ();
12266
12267 operands[3] = GEN_INT (mask);
12268
12269 return "vinsert<shuffletype><extract_suf>\t{%3, %2, %1, %0<mask_operand4>|%0<mask_operand4>, %1, %2, %3}";
12270 }
12271 [(set_attr "type" "sselog")
12272 (set_attr "length_immediate" "1")
12273 (set_attr "prefix" "evex")
12274 (set_attr "mode" "<sseinsnmode>")])
12275
12276 (define_expand "<extract_type_2>_vinsert<shuffletype><extract_suf_2>_mask"
12277 [(match_operand:AVX512_VEC_2 0 "register_operand")
12278 (match_operand:AVX512_VEC_2 1 "register_operand")
12279 (match_operand:<ssehalfvecmode> 2 "nonimmediate_operand")
12280 (match_operand:SI 3 "const_0_to_1_operand")
12281 (match_operand:AVX512_VEC_2 4 "register_operand")
12282 (match_operand:<avx512fmaskmode> 5 "register_operand")]
12283 "TARGET_AVX512F"
12284 {
12285 int mask = INTVAL (operands[3]);
12286 if (mask == 0)
12287 emit_insn (gen_vec_set_lo_<mode>_mask
12288 (operands[0], operands[1], operands[2],
12289 operands[4], operands[5]));
12290 else
12291 emit_insn (gen_vec_set_hi_<mode>_mask
12292 (operands[0], operands[1], operands[2],
12293 operands[4], operands[5]));
12294 DONE;
12295 })
12296
12297 (define_insn "vec_set_lo_<mode><mask_name>"
12298 [(set (match_operand:V16FI 0 "register_operand" "=v")
12299 (vec_concat:V16FI
12300 (match_operand:<ssehalfvecmode> 2 "nonimmediate_operand" "vm")
12301 (vec_select:<ssehalfvecmode>
12302 (match_operand:V16FI 1 "register_operand" "v")
12303 (parallel [(const_int 8) (const_int 9)
12304 (const_int 10) (const_int 11)
12305 (const_int 12) (const_int 13)
12306 (const_int 14) (const_int 15)]))))]
12307 "TARGET_AVX512DQ"
12308 "vinsert<shuffletype>32x8\t{$0x0, %2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2, $0x0}"
12309 [(set_attr "type" "sselog")
12310 (set_attr "length_immediate" "1")
12311 (set_attr "prefix" "evex")
12312 (set_attr "mode" "<sseinsnmode>")])
12313
12314 (define_insn "vec_set_hi_<mode><mask_name>"
12315 [(set (match_operand:V16FI 0 "register_operand" "=v")
12316 (vec_concat:V16FI
12317 (match_operand:<ssehalfvecmode> 2 "nonimmediate_operand" "vm")
12318 (vec_select:<ssehalfvecmode>
12319 (match_operand:V16FI 1 "register_operand" "v")
12320 (parallel [(const_int 0) (const_int 1)
12321 (const_int 2) (const_int 3)
12322 (const_int 4) (const_int 5)
12323 (const_int 6) (const_int 7)]))))]
12324 "TARGET_AVX512DQ"
12325 "vinsert<shuffletype>32x8\t{$0x1, %2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2, $0x1}"
12326 [(set_attr "type" "sselog")
12327 (set_attr "length_immediate" "1")
12328 (set_attr "prefix" "evex")
12329 (set_attr "mode" "<sseinsnmode>")])
12330
12331 (define_insn "vec_set_lo_<mode><mask_name>"
12332 [(set (match_operand:V8FI 0 "register_operand" "=v")
12333 (vec_concat:V8FI
12334 (match_operand:<ssehalfvecmode> 2 "nonimmediate_operand" "vm")
12335 (vec_select:<ssehalfvecmode>
12336 (match_operand:V8FI 1 "register_operand" "v")
12337 (parallel [(const_int 4) (const_int 5)
12338 (const_int 6) (const_int 7)]))))]
12339 "TARGET_AVX512F"
12340 "vinsert<shuffletype>64x4\t{$0x0, %2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2, $0x0}"
12341 [(set_attr "type" "sselog")
12342 (set_attr "length_immediate" "1")
12343 (set_attr "prefix" "evex")
12344 (set_attr "mode" "XI")])
12345
12346 (define_insn "vec_set_hi_<mode><mask_name>"
12347 [(set (match_operand:V8FI 0 "register_operand" "=v")
12348 (vec_concat:V8FI
12349 (match_operand:<ssehalfvecmode> 2 "nonimmediate_operand" "vm")
12350 (vec_select:<ssehalfvecmode>
12351 (match_operand:V8FI 1 "register_operand" "v")
12352 (parallel [(const_int 0) (const_int 1)
12353 (const_int 2) (const_int 3)]))))]
12354 "TARGET_AVX512F"
12355 "vinsert<shuffletype>64x4\t{$0x1, %2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2, $0x1}"
12356 [(set_attr "type" "sselog")
12357 (set_attr "length_immediate" "1")
12358 (set_attr "prefix" "evex")
12359 (set_attr "mode" "XI")])
12360
12361 (define_expand "avx512dq_shuf_<shuffletype>64x2_mask"
12362 [(match_operand:VI8F_256 0 "register_operand")
12363 (match_operand:VI8F_256 1 "register_operand")
12364 (match_operand:VI8F_256 2 "nonimmediate_operand")
12365 (match_operand:SI 3 "const_0_to_3_operand")
12366 (match_operand:VI8F_256 4 "register_operand")
12367 (match_operand:QI 5 "register_operand")]
12368 "TARGET_AVX512DQ"
12369 {
12370 int mask = INTVAL (operands[3]);
12371 emit_insn (gen_avx512dq_shuf_<shuffletype>64x2_1_mask
12372 (operands[0], operands[1], operands[2],
12373 GEN_INT (((mask >> 0) & 1) * 2 + 0),
12374 GEN_INT (((mask >> 0) & 1) * 2 + 1),
12375 GEN_INT (((mask >> 1) & 1) * 2 + 4),
12376 GEN_INT (((mask >> 1) & 1) * 2 + 5),
12377 operands[4], operands[5]));
12378 DONE;
12379 })
12380
12381 (define_insn "<mask_codefor>avx512dq_shuf_<shuffletype>64x2_1<mask_name>"
12382 [(set (match_operand:VI8F_256 0 "register_operand" "=v")
12383 (vec_select:VI8F_256
12384 (vec_concat:<ssedoublemode>
12385 (match_operand:VI8F_256 1 "register_operand" "v")
12386 (match_operand:VI8F_256 2 "nonimmediate_operand" "vm"))
12387 (parallel [(match_operand 3 "const_0_to_3_operand")
12388 (match_operand 4 "const_0_to_3_operand")
12389 (match_operand 5 "const_4_to_7_operand")
12390 (match_operand 6 "const_4_to_7_operand")])))]
12391 "TARGET_AVX512VL
12392 && (INTVAL (operands[3]) == (INTVAL (operands[4]) - 1)
12393 && INTVAL (operands[5]) == (INTVAL (operands[6]) - 1))"
12394 {
12395 int mask;
12396 mask = INTVAL (operands[3]) / 2;
12397 mask |= (INTVAL (operands[5]) - 4) / 2 << 1;
12398 operands[3] = GEN_INT (mask);
12399 return "vshuf<shuffletype>64x2\t{%3, %2, %1, %0<mask_operand7>|%0<mask_operand7>, %1, %2, %3}";
12400 }
12401 [(set_attr "type" "sselog")
12402 (set_attr "length_immediate" "1")
12403 (set_attr "prefix" "evex")
12404 (set_attr "mode" "XI")])
12405
12406 (define_expand "avx512f_shuf_<shuffletype>64x2_mask"
12407 [(match_operand:V8FI 0 "register_operand")
12408 (match_operand:V8FI 1 "register_operand")
12409 (match_operand:V8FI 2 "nonimmediate_operand")
12410 (match_operand:SI 3 "const_0_to_255_operand")
12411 (match_operand:V8FI 4 "register_operand")
12412 (match_operand:QI 5 "register_operand")]
12413 "TARGET_AVX512F"
12414 {
12415 int mask = INTVAL (operands[3]);
12416 emit_insn (gen_avx512f_shuf_<shuffletype>64x2_1_mask
12417 (operands[0], operands[1], operands[2],
12418 GEN_INT (((mask >> 0) & 3) * 2),
12419 GEN_INT (((mask >> 0) & 3) * 2 + 1),
12420 GEN_INT (((mask >> 2) & 3) * 2),
12421 GEN_INT (((mask >> 2) & 3) * 2 + 1),
12422 GEN_INT (((mask >> 4) & 3) * 2 + 8),
12423 GEN_INT (((mask >> 4) & 3) * 2 + 9),
12424 GEN_INT (((mask >> 6) & 3) * 2 + 8),
12425 GEN_INT (((mask >> 6) & 3) * 2 + 9),
12426 operands[4], operands[5]));
12427 DONE;
12428 })
12429
12430 (define_insn "avx512f_shuf_<shuffletype>64x2_1<mask_name>"
12431 [(set (match_operand:V8FI 0 "register_operand" "=v")
12432 (vec_select:V8FI
12433 (vec_concat:<ssedoublemode>
12434 (match_operand:V8FI 1 "register_operand" "v")
12435 (match_operand:V8FI 2 "nonimmediate_operand" "vm"))
12436 (parallel [(match_operand 3 "const_0_to_7_operand")
12437 (match_operand 4 "const_0_to_7_operand")
12438 (match_operand 5 "const_0_to_7_operand")
12439 (match_operand 6 "const_0_to_7_operand")
12440 (match_operand 7 "const_8_to_15_operand")
12441 (match_operand 8 "const_8_to_15_operand")
12442 (match_operand 9 "const_8_to_15_operand")
12443 (match_operand 10 "const_8_to_15_operand")])))]
12444 "TARGET_AVX512F
12445 && (INTVAL (operands[3]) == (INTVAL (operands[4]) - 1)
12446 && INTVAL (operands[5]) == (INTVAL (operands[6]) - 1)
12447 && INTVAL (operands[7]) == (INTVAL (operands[8]) - 1)
12448 && INTVAL (operands[9]) == (INTVAL (operands[10]) - 1))"
12449 {
12450 int mask;
12451 mask = INTVAL (operands[3]) / 2;
12452 mask |= INTVAL (operands[5]) / 2 << 2;
12453 mask |= (INTVAL (operands[7]) - 8) / 2 << 4;
12454 mask |= (INTVAL (operands[9]) - 8) / 2 << 6;
12455 operands[3] = GEN_INT (mask);
12456
12457 return "vshuf<shuffletype>64x2\t{%3, %2, %1, %0<mask_operand11>|%0<mask_operand11>, %1, %2, %3}";
12458 }
12459 [(set_attr "type" "sselog")
12460 (set_attr "length_immediate" "1")
12461 (set_attr "prefix" "evex")
12462 (set_attr "mode" "<sseinsnmode>")])
12463
12464 (define_expand "avx512vl_shuf_<shuffletype>32x4_mask"
12465 [(match_operand:VI4F_256 0 "register_operand")
12466 (match_operand:VI4F_256 1 "register_operand")
12467 (match_operand:VI4F_256 2 "nonimmediate_operand")
12468 (match_operand:SI 3 "const_0_to_3_operand")
12469 (match_operand:VI4F_256 4 "register_operand")
12470 (match_operand:QI 5 "register_operand")]
12471 "TARGET_AVX512VL"
12472 {
12473 int mask = INTVAL (operands[3]);
12474 emit_insn (gen_avx512vl_shuf_<shuffletype>32x4_1_mask
12475 (operands[0], operands[1], operands[2],
12476 GEN_INT (((mask >> 0) & 1) * 4 + 0),
12477 GEN_INT (((mask >> 0) & 1) * 4 + 1),
12478 GEN_INT (((mask >> 0) & 1) * 4 + 2),
12479 GEN_INT (((mask >> 0) & 1) * 4 + 3),
12480 GEN_INT (((mask >> 1) & 1) * 4 + 8),
12481 GEN_INT (((mask >> 1) & 1) * 4 + 9),
12482 GEN_INT (((mask >> 1) & 1) * 4 + 10),
12483 GEN_INT (((mask >> 1) & 1) * 4 + 11),
12484 operands[4], operands[5]));
12485 DONE;
12486 })
12487
12488 (define_insn "<mask_codefor>avx512vl_shuf_<shuffletype>32x4_1<mask_name>"
12489 [(set (match_operand:VI4F_256 0 "register_operand" "=v")
12490 (vec_select:VI4F_256
12491 (vec_concat:<ssedoublemode>
12492 (match_operand:VI4F_256 1 "register_operand" "v")
12493 (match_operand:VI4F_256 2 "nonimmediate_operand" "vm"))
12494 (parallel [(match_operand 3 "const_0_to_7_operand")
12495 (match_operand 4 "const_0_to_7_operand")
12496 (match_operand 5 "const_0_to_7_operand")
12497 (match_operand 6 "const_0_to_7_operand")
12498 (match_operand 7 "const_8_to_15_operand")
12499 (match_operand 8 "const_8_to_15_operand")
12500 (match_operand 9 "const_8_to_15_operand")
12501 (match_operand 10 "const_8_to_15_operand")])))]
12502 "TARGET_AVX512VL
12503 && (INTVAL (operands[3]) == (INTVAL (operands[4]) - 1)
12504 && INTVAL (operands[3]) == (INTVAL (operands[5]) - 2)
12505 && INTVAL (operands[3]) == (INTVAL (operands[6]) - 3)
12506 && INTVAL (operands[7]) == (INTVAL (operands[8]) - 1)
12507 && INTVAL (operands[7]) == (INTVAL (operands[9]) - 2)
12508 && INTVAL (operands[7]) == (INTVAL (operands[10]) - 3))"
12509 {
12510 int mask;
12511 mask = INTVAL (operands[3]) / 4;
12512 mask |= (INTVAL (operands[7]) - 8) / 4 << 1;
12513 operands[3] = GEN_INT (mask);
12514
12515 return "vshuf<shuffletype>32x4\t{%3, %2, %1, %0<mask_operand11>|%0<mask_operand11>, %1, %2, %3}";
12516 }
12517 [(set_attr "type" "sselog")
12518 (set_attr "length_immediate" "1")
12519 (set_attr "prefix" "evex")
12520 (set_attr "mode" "<sseinsnmode>")])
12521
12522 (define_expand "avx512f_shuf_<shuffletype>32x4_mask"
12523 [(match_operand:V16FI 0 "register_operand")
12524 (match_operand:V16FI 1 "register_operand")
12525 (match_operand:V16FI 2 "nonimmediate_operand")
12526 (match_operand:SI 3 "const_0_to_255_operand")
12527 (match_operand:V16FI 4 "register_operand")
12528 (match_operand:HI 5 "register_operand")]
12529 "TARGET_AVX512F"
12530 {
12531 int mask = INTVAL (operands[3]);
12532 emit_insn (gen_avx512f_shuf_<shuffletype>32x4_1_mask
12533 (operands[0], operands[1], operands[2],
12534 GEN_INT (((mask >> 0) & 3) * 4),
12535 GEN_INT (((mask >> 0) & 3) * 4 + 1),
12536 GEN_INT (((mask >> 0) & 3) * 4 + 2),
12537 GEN_INT (((mask >> 0) & 3) * 4 + 3),
12538 GEN_INT (((mask >> 2) & 3) * 4),
12539 GEN_INT (((mask >> 2) & 3) * 4 + 1),
12540 GEN_INT (((mask >> 2) & 3) * 4 + 2),
12541 GEN_INT (((mask >> 2) & 3) * 4 + 3),
12542 GEN_INT (((mask >> 4) & 3) * 4 + 16),
12543 GEN_INT (((mask >> 4) & 3) * 4 + 17),
12544 GEN_INT (((mask >> 4) & 3) * 4 + 18),
12545 GEN_INT (((mask >> 4) & 3) * 4 + 19),
12546 GEN_INT (((mask >> 6) & 3) * 4 + 16),
12547 GEN_INT (((mask >> 6) & 3) * 4 + 17),
12548 GEN_INT (((mask >> 6) & 3) * 4 + 18),
12549 GEN_INT (((mask >> 6) & 3) * 4 + 19),
12550 operands[4], operands[5]));
12551 DONE;
12552 })
12553
12554 (define_insn "avx512f_shuf_<shuffletype>32x4_1<mask_name>"
12555 [(set (match_operand:V16FI 0 "register_operand" "=v")
12556 (vec_select:V16FI
12557 (vec_concat:<ssedoublemode>
12558 (match_operand:V16FI 1 "register_operand" "v")
12559 (match_operand:V16FI 2 "nonimmediate_operand" "vm"))
12560 (parallel [(match_operand 3 "const_0_to_15_operand")
12561 (match_operand 4 "const_0_to_15_operand")
12562 (match_operand 5 "const_0_to_15_operand")
12563 (match_operand 6 "const_0_to_15_operand")
12564 (match_operand 7 "const_0_to_15_operand")
12565 (match_operand 8 "const_0_to_15_operand")
12566 (match_operand 9 "const_0_to_15_operand")
12567 (match_operand 10 "const_0_to_15_operand")
12568 (match_operand 11 "const_16_to_31_operand")
12569 (match_operand 12 "const_16_to_31_operand")
12570 (match_operand 13 "const_16_to_31_operand")
12571 (match_operand 14 "const_16_to_31_operand")
12572 (match_operand 15 "const_16_to_31_operand")
12573 (match_operand 16 "const_16_to_31_operand")
12574 (match_operand 17 "const_16_to_31_operand")
12575 (match_operand 18 "const_16_to_31_operand")])))]
12576 "TARGET_AVX512F
12577 && (INTVAL (operands[3]) == (INTVAL (operands[4]) - 1)
12578 && INTVAL (operands[3]) == (INTVAL (operands[5]) - 2)
12579 && INTVAL (operands[3]) == (INTVAL (operands[6]) - 3)
12580 && INTVAL (operands[7]) == (INTVAL (operands[8]) - 1)
12581 && INTVAL (operands[7]) == (INTVAL (operands[9]) - 2)
12582 && INTVAL (operands[7]) == (INTVAL (operands[10]) - 3)
12583 && INTVAL (operands[11]) == (INTVAL (operands[12]) - 1)
12584 && INTVAL (operands[11]) == (INTVAL (operands[13]) - 2)
12585 && INTVAL (operands[11]) == (INTVAL (operands[14]) - 3)
12586 && INTVAL (operands[15]) == (INTVAL (operands[16]) - 1)
12587 && INTVAL (operands[15]) == (INTVAL (operands[17]) - 2)
12588 && INTVAL (operands[15]) == (INTVAL (operands[18]) - 3))"
12589 {
12590 int mask;
12591 mask = INTVAL (operands[3]) / 4;
12592 mask |= INTVAL (operands[7]) / 4 << 2;
12593 mask |= (INTVAL (operands[11]) - 16) / 4 << 4;
12594 mask |= (INTVAL (operands[15]) - 16) / 4 << 6;
12595 operands[3] = GEN_INT (mask);
12596
12597 return "vshuf<shuffletype>32x4\t{%3, %2, %1, %0<mask_operand19>|%0<mask_operand19>, %1, %2, %3}";
12598 }
12599 [(set_attr "type" "sselog")
12600 (set_attr "length_immediate" "1")
12601 (set_attr "prefix" "evex")
12602 (set_attr "mode" "<sseinsnmode>")])
12603
12604 (define_expand "avx512f_pshufdv3_mask"
12605 [(match_operand:V16SI 0 "register_operand")
12606 (match_operand:V16SI 1 "nonimmediate_operand")
12607 (match_operand:SI 2 "const_0_to_255_operand")
12608 (match_operand:V16SI 3 "register_operand")
12609 (match_operand:HI 4 "register_operand")]
12610 "TARGET_AVX512F"
12611 {
12612 int mask = INTVAL (operands[2]);
12613 emit_insn (gen_avx512f_pshufd_1_mask (operands[0], operands[1],
12614 GEN_INT ((mask >> 0) & 3),
12615 GEN_INT ((mask >> 2) & 3),
12616 GEN_INT ((mask >> 4) & 3),
12617 GEN_INT ((mask >> 6) & 3),
12618 GEN_INT (((mask >> 0) & 3) + 4),
12619 GEN_INT (((mask >> 2) & 3) + 4),
12620 GEN_INT (((mask >> 4) & 3) + 4),
12621 GEN_INT (((mask >> 6) & 3) + 4),
12622 GEN_INT (((mask >> 0) & 3) + 8),
12623 GEN_INT (((mask >> 2) & 3) + 8),
12624 GEN_INT (((mask >> 4) & 3) + 8),
12625 GEN_INT (((mask >> 6) & 3) + 8),
12626 GEN_INT (((mask >> 0) & 3) + 12),
12627 GEN_INT (((mask >> 2) & 3) + 12),
12628 GEN_INT (((mask >> 4) & 3) + 12),
12629 GEN_INT (((mask >> 6) & 3) + 12),
12630 operands[3], operands[4]));
12631 DONE;
12632 })
12633
12634 (define_insn "avx512f_pshufd_1<mask_name>"
12635 [(set (match_operand:V16SI 0 "register_operand" "=v")
12636 (vec_select:V16SI
12637 (match_operand:V16SI 1 "nonimmediate_operand" "vm")
12638 (parallel [(match_operand 2 "const_0_to_3_operand")
12639 (match_operand 3 "const_0_to_3_operand")
12640 (match_operand 4 "const_0_to_3_operand")
12641 (match_operand 5 "const_0_to_3_operand")
12642 (match_operand 6 "const_4_to_7_operand")
12643 (match_operand 7 "const_4_to_7_operand")
12644 (match_operand 8 "const_4_to_7_operand")
12645 (match_operand 9 "const_4_to_7_operand")
12646 (match_operand 10 "const_8_to_11_operand")
12647 (match_operand 11 "const_8_to_11_operand")
12648 (match_operand 12 "const_8_to_11_operand")
12649 (match_operand 13 "const_8_to_11_operand")
12650 (match_operand 14 "const_12_to_15_operand")
12651 (match_operand 15 "const_12_to_15_operand")
12652 (match_operand 16 "const_12_to_15_operand")
12653 (match_operand 17 "const_12_to_15_operand")])))]
12654 "TARGET_AVX512F
12655 && INTVAL (operands[2]) + 4 == INTVAL (operands[6])
12656 && INTVAL (operands[3]) + 4 == INTVAL (operands[7])
12657 && INTVAL (operands[4]) + 4 == INTVAL (operands[8])
12658 && INTVAL (operands[5]) + 4 == INTVAL (operands[9])
12659 && INTVAL (operands[2]) + 8 == INTVAL (operands[10])
12660 && INTVAL (operands[3]) + 8 == INTVAL (operands[11])
12661 && INTVAL (operands[4]) + 8 == INTVAL (operands[12])
12662 && INTVAL (operands[5]) + 8 == INTVAL (operands[13])
12663 && INTVAL (operands[2]) + 12 == INTVAL (operands[14])
12664 && INTVAL (operands[3]) + 12 == INTVAL (operands[15])
12665 && INTVAL (operands[4]) + 12 == INTVAL (operands[16])
12666 && INTVAL (operands[5]) + 12 == INTVAL (operands[17])"
12667 {
12668 int mask = 0;
12669 mask |= INTVAL (operands[2]) << 0;
12670 mask |= INTVAL (operands[3]) << 2;
12671 mask |= INTVAL (operands[4]) << 4;
12672 mask |= INTVAL (operands[5]) << 6;
12673 operands[2] = GEN_INT (mask);
12674
12675 return "vpshufd\t{%2, %1, %0<mask_operand18>|%0<mask_operand18>, %1, %2}";
12676 }
12677 [(set_attr "type" "sselog1")
12678 (set_attr "prefix" "evex")
12679 (set_attr "length_immediate" "1")
12680 (set_attr "mode" "XI")])
12681
12682 (define_expand "avx512vl_pshufdv3_mask"
12683 [(match_operand:V8SI 0 "register_operand")
12684 (match_operand:V8SI 1 "nonimmediate_operand")
12685 (match_operand:SI 2 "const_0_to_255_operand")
12686 (match_operand:V8SI 3 "register_operand")
12687 (match_operand:QI 4 "register_operand")]
12688 "TARGET_AVX512VL"
12689 {
12690 int mask = INTVAL (operands[2]);
12691 emit_insn (gen_avx2_pshufd_1_mask (operands[0], operands[1],
12692 GEN_INT ((mask >> 0) & 3),
12693 GEN_INT ((mask >> 2) & 3),
12694 GEN_INT ((mask >> 4) & 3),
12695 GEN_INT ((mask >> 6) & 3),
12696 GEN_INT (((mask >> 0) & 3) + 4),
12697 GEN_INT (((mask >> 2) & 3) + 4),
12698 GEN_INT (((mask >> 4) & 3) + 4),
12699 GEN_INT (((mask >> 6) & 3) + 4),
12700 operands[3], operands[4]));
12701 DONE;
12702 })
12703
12704 (define_expand "avx2_pshufdv3"
12705 [(match_operand:V8SI 0 "register_operand")
12706 (match_operand:V8SI 1 "nonimmediate_operand")
12707 (match_operand:SI 2 "const_0_to_255_operand")]
12708 "TARGET_AVX2"
12709 {
12710 int mask = INTVAL (operands[2]);
12711 emit_insn (gen_avx2_pshufd_1 (operands[0], operands[1],
12712 GEN_INT ((mask >> 0) & 3),
12713 GEN_INT ((mask >> 2) & 3),
12714 GEN_INT ((mask >> 4) & 3),
12715 GEN_INT ((mask >> 6) & 3),
12716 GEN_INT (((mask >> 0) & 3) + 4),
12717 GEN_INT (((mask >> 2) & 3) + 4),
12718 GEN_INT (((mask >> 4) & 3) + 4),
12719 GEN_INT (((mask >> 6) & 3) + 4)));
12720 DONE;
12721 })
12722
12723 (define_insn "avx2_pshufd_1<mask_name>"
12724 [(set (match_operand:V8SI 0 "register_operand" "=v")
12725 (vec_select:V8SI
12726 (match_operand:V8SI 1 "nonimmediate_operand" "vm")
12727 (parallel [(match_operand 2 "const_0_to_3_operand")
12728 (match_operand 3 "const_0_to_3_operand")
12729 (match_operand 4 "const_0_to_3_operand")
12730 (match_operand 5 "const_0_to_3_operand")
12731 (match_operand 6 "const_4_to_7_operand")
12732 (match_operand 7 "const_4_to_7_operand")
12733 (match_operand 8 "const_4_to_7_operand")
12734 (match_operand 9 "const_4_to_7_operand")])))]
12735 "TARGET_AVX2
12736 && <mask_avx512vl_condition>
12737 && INTVAL (operands[2]) + 4 == INTVAL (operands[6])
12738 && INTVAL (operands[3]) + 4 == INTVAL (operands[7])
12739 && INTVAL (operands[4]) + 4 == INTVAL (operands[8])
12740 && INTVAL (operands[5]) + 4 == INTVAL (operands[9])"
12741 {
12742 int mask = 0;
12743 mask |= INTVAL (operands[2]) << 0;
12744 mask |= INTVAL (operands[3]) << 2;
12745 mask |= INTVAL (operands[4]) << 4;
12746 mask |= INTVAL (operands[5]) << 6;
12747 operands[2] = GEN_INT (mask);
12748
12749 return "vpshufd\t{%2, %1, %0<mask_operand10>|%0<mask_operand10>, %1, %2}";
12750 }
12751 [(set_attr "type" "sselog1")
12752 (set_attr "prefix" "maybe_evex")
12753 (set_attr "length_immediate" "1")
12754 (set_attr "mode" "OI")])
12755
12756 (define_expand "avx512vl_pshufd_mask"
12757 [(match_operand:V4SI 0 "register_operand")
12758 (match_operand:V4SI 1 "nonimmediate_operand")
12759 (match_operand:SI 2 "const_0_to_255_operand")
12760 (match_operand:V4SI 3 "register_operand")
12761 (match_operand:QI 4 "register_operand")]
12762 "TARGET_AVX512VL"
12763 {
12764 int mask = INTVAL (operands[2]);
12765 emit_insn (gen_sse2_pshufd_1_mask (operands[0], operands[1],
12766 GEN_INT ((mask >> 0) & 3),
12767 GEN_INT ((mask >> 2) & 3),
12768 GEN_INT ((mask >> 4) & 3),
12769 GEN_INT ((mask >> 6) & 3),
12770 operands[3], operands[4]));
12771 DONE;
12772 })
12773
12774 (define_expand "sse2_pshufd"
12775 [(match_operand:V4SI 0 "register_operand")
12776 (match_operand:V4SI 1 "nonimmediate_operand")
12777 (match_operand:SI 2 "const_int_operand")]
12778 "TARGET_SSE2"
12779 {
12780 int mask = INTVAL (operands[2]);
12781 emit_insn (gen_sse2_pshufd_1 (operands[0], operands[1],
12782 GEN_INT ((mask >> 0) & 3),
12783 GEN_INT ((mask >> 2) & 3),
12784 GEN_INT ((mask >> 4) & 3),
12785 GEN_INT ((mask >> 6) & 3)));
12786 DONE;
12787 })
12788
12789 (define_insn "sse2_pshufd_1<mask_name>"
12790 [(set (match_operand:V4SI 0 "register_operand" "=v")
12791 (vec_select:V4SI
12792 (match_operand:V4SI 1 "nonimmediate_operand" "vm")
12793 (parallel [(match_operand 2 "const_0_to_3_operand")
12794 (match_operand 3 "const_0_to_3_operand")
12795 (match_operand 4 "const_0_to_3_operand")
12796 (match_operand 5 "const_0_to_3_operand")])))]
12797 "TARGET_SSE2 && <mask_avx512vl_condition>"
12798 {
12799 int mask = 0;
12800 mask |= INTVAL (operands[2]) << 0;
12801 mask |= INTVAL (operands[3]) << 2;
12802 mask |= INTVAL (operands[4]) << 4;
12803 mask |= INTVAL (operands[5]) << 6;
12804 operands[2] = GEN_INT (mask);
12805
12806 return "%vpshufd\t{%2, %1, %0<mask_operand6>|%0<mask_operand6>, %1, %2}";
12807 }
12808 [(set_attr "type" "sselog1")
12809 (set_attr "prefix_data16" "1")
12810 (set_attr "prefix" "<mask_prefix2>")
12811 (set_attr "length_immediate" "1")
12812 (set_attr "mode" "TI")])
12813
12814 (define_insn "<mask_codefor>avx512bw_pshuflwv32hi<mask_name>"
12815 [(set (match_operand:V32HI 0 "register_operand" "=v")
12816 (unspec:V32HI
12817 [(match_operand:V32HI 1 "nonimmediate_operand" "vm")
12818 (match_operand:SI 2 "const_0_to_255_operand" "n")]
12819 UNSPEC_PSHUFLW))]
12820 "TARGET_AVX512BW"
12821 "vpshuflw\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
12822 [(set_attr "type" "sselog")
12823 (set_attr "prefix" "evex")
12824 (set_attr "mode" "XI")])
12825
12826 (define_expand "avx512vl_pshuflwv3_mask"
12827 [(match_operand:V16HI 0 "register_operand")
12828 (match_operand:V16HI 1 "nonimmediate_operand")
12829 (match_operand:SI 2 "const_0_to_255_operand")
12830 (match_operand:V16HI 3 "register_operand")
12831 (match_operand:HI 4 "register_operand")]
12832 "TARGET_AVX512VL && TARGET_AVX512BW"
12833 {
12834 int mask = INTVAL (operands[2]);
12835 emit_insn (gen_avx2_pshuflw_1_mask (operands[0], operands[1],
12836 GEN_INT ((mask >> 0) & 3),
12837 GEN_INT ((mask >> 2) & 3),
12838 GEN_INT ((mask >> 4) & 3),
12839 GEN_INT ((mask >> 6) & 3),
12840 GEN_INT (((mask >> 0) & 3) + 8),
12841 GEN_INT (((mask >> 2) & 3) + 8),
12842 GEN_INT (((mask >> 4) & 3) + 8),
12843 GEN_INT (((mask >> 6) & 3) + 8),
12844 operands[3], operands[4]));
12845 DONE;
12846 })
12847
12848 (define_expand "avx2_pshuflwv3"
12849 [(match_operand:V16HI 0 "register_operand")
12850 (match_operand:V16HI 1 "nonimmediate_operand")
12851 (match_operand:SI 2 "const_0_to_255_operand")]
12852 "TARGET_AVX2"
12853 {
12854 int mask = INTVAL (operands[2]);
12855 emit_insn (gen_avx2_pshuflw_1 (operands[0], operands[1],
12856 GEN_INT ((mask >> 0) & 3),
12857 GEN_INT ((mask >> 2) & 3),
12858 GEN_INT ((mask >> 4) & 3),
12859 GEN_INT ((mask >> 6) & 3),
12860 GEN_INT (((mask >> 0) & 3) + 8),
12861 GEN_INT (((mask >> 2) & 3) + 8),
12862 GEN_INT (((mask >> 4) & 3) + 8),
12863 GEN_INT (((mask >> 6) & 3) + 8)));
12864 DONE;
12865 })
12866
12867 (define_insn "avx2_pshuflw_1<mask_name>"
12868 [(set (match_operand:V16HI 0 "register_operand" "=v")
12869 (vec_select:V16HI
12870 (match_operand:V16HI 1 "nonimmediate_operand" "vm")
12871 (parallel [(match_operand 2 "const_0_to_3_operand")
12872 (match_operand 3 "const_0_to_3_operand")
12873 (match_operand 4 "const_0_to_3_operand")
12874 (match_operand 5 "const_0_to_3_operand")
12875 (const_int 4)
12876 (const_int 5)
12877 (const_int 6)
12878 (const_int 7)
12879 (match_operand 6 "const_8_to_11_operand")
12880 (match_operand 7 "const_8_to_11_operand")
12881 (match_operand 8 "const_8_to_11_operand")
12882 (match_operand 9 "const_8_to_11_operand")
12883 (const_int 12)
12884 (const_int 13)
12885 (const_int 14)
12886 (const_int 15)])))]
12887 "TARGET_AVX2
12888 && <mask_avx512bw_condition> && <mask_avx512vl_condition>
12889 && INTVAL (operands[2]) + 8 == INTVAL (operands[6])
12890 && INTVAL (operands[3]) + 8 == INTVAL (operands[7])
12891 && INTVAL (operands[4]) + 8 == INTVAL (operands[8])
12892 && INTVAL (operands[5]) + 8 == INTVAL (operands[9])"
12893 {
12894 int mask = 0;
12895 mask |= INTVAL (operands[2]) << 0;
12896 mask |= INTVAL (operands[3]) << 2;
12897 mask |= INTVAL (operands[4]) << 4;
12898 mask |= INTVAL (operands[5]) << 6;
12899 operands[2] = GEN_INT (mask);
12900
12901 return "vpshuflw\t{%2, %1, %0<mask_operand10>|%0<mask_operand10>, %1, %2}";
12902 }
12903 [(set_attr "type" "sselog")
12904 (set_attr "prefix" "maybe_evex")
12905 (set_attr "length_immediate" "1")
12906 (set_attr "mode" "OI")])
12907
12908 (define_expand "avx512vl_pshuflw_mask"
12909 [(match_operand:V8HI 0 "register_operand")
12910 (match_operand:V8HI 1 "nonimmediate_operand")
12911 (match_operand:SI 2 "const_0_to_255_operand")
12912 (match_operand:V8HI 3 "register_operand")
12913 (match_operand:QI 4 "register_operand")]
12914 "TARGET_AVX512VL && TARGET_AVX512BW"
12915 {
12916 int mask = INTVAL (operands[2]);
12917 emit_insn (gen_sse2_pshuflw_1_mask (operands[0], operands[1],
12918 GEN_INT ((mask >> 0) & 3),
12919 GEN_INT ((mask >> 2) & 3),
12920 GEN_INT ((mask >> 4) & 3),
12921 GEN_INT ((mask >> 6) & 3),
12922 operands[3], operands[4]));
12923 DONE;
12924 })
12925
12926 (define_expand "sse2_pshuflw"
12927 [(match_operand:V8HI 0 "register_operand")
12928 (match_operand:V8HI 1 "nonimmediate_operand")
12929 (match_operand:SI 2 "const_int_operand")]
12930 "TARGET_SSE2"
12931 {
12932 int mask = INTVAL (operands[2]);
12933 emit_insn (gen_sse2_pshuflw_1 (operands[0], operands[1],
12934 GEN_INT ((mask >> 0) & 3),
12935 GEN_INT ((mask >> 2) & 3),
12936 GEN_INT ((mask >> 4) & 3),
12937 GEN_INT ((mask >> 6) & 3)));
12938 DONE;
12939 })
12940
12941 (define_insn "sse2_pshuflw_1<mask_name>"
12942 [(set (match_operand:V8HI 0 "register_operand" "=v")
12943 (vec_select:V8HI
12944 (match_operand:V8HI 1 "nonimmediate_operand" "vm")
12945 (parallel [(match_operand 2 "const_0_to_3_operand")
12946 (match_operand 3 "const_0_to_3_operand")
12947 (match_operand 4 "const_0_to_3_operand")
12948 (match_operand 5 "const_0_to_3_operand")
12949 (const_int 4)
12950 (const_int 5)
12951 (const_int 6)
12952 (const_int 7)])))]
12953 "TARGET_SSE2 && <mask_avx512bw_condition> && <mask_avx512vl_condition>"
12954 {
12955 int mask = 0;
12956 mask |= INTVAL (operands[2]) << 0;
12957 mask |= INTVAL (operands[3]) << 2;
12958 mask |= INTVAL (operands[4]) << 4;
12959 mask |= INTVAL (operands[5]) << 6;
12960 operands[2] = GEN_INT (mask);
12961
12962 return "%vpshuflw\t{%2, %1, %0<mask_operand6>|%0<mask_operand6>, %1, %2}";
12963 }
12964 [(set_attr "type" "sselog")
12965 (set_attr "prefix_data16" "0")
12966 (set_attr "prefix_rep" "1")
12967 (set_attr "prefix" "maybe_vex")
12968 (set_attr "length_immediate" "1")
12969 (set_attr "mode" "TI")])
12970
12971 (define_expand "avx2_pshufhwv3"
12972 [(match_operand:V16HI 0 "register_operand")
12973 (match_operand:V16HI 1 "nonimmediate_operand")
12974 (match_operand:SI 2 "const_0_to_255_operand")]
12975 "TARGET_AVX2"
12976 {
12977 int mask = INTVAL (operands[2]);
12978 emit_insn (gen_avx2_pshufhw_1 (operands[0], operands[1],
12979 GEN_INT (((mask >> 0) & 3) + 4),
12980 GEN_INT (((mask >> 2) & 3) + 4),
12981 GEN_INT (((mask >> 4) & 3) + 4),
12982 GEN_INT (((mask >> 6) & 3) + 4),
12983 GEN_INT (((mask >> 0) & 3) + 12),
12984 GEN_INT (((mask >> 2) & 3) + 12),
12985 GEN_INT (((mask >> 4) & 3) + 12),
12986 GEN_INT (((mask >> 6) & 3) + 12)));
12987 DONE;
12988 })
12989
12990 (define_insn "<mask_codefor>avx512bw_pshufhwv32hi<mask_name>"
12991 [(set (match_operand:V32HI 0 "register_operand" "=v")
12992 (unspec:V32HI
12993 [(match_operand:V32HI 1 "nonimmediate_operand" "vm")
12994 (match_operand:SI 2 "const_0_to_255_operand" "n")]
12995 UNSPEC_PSHUFHW))]
12996 "TARGET_AVX512BW"
12997 "vpshufhw\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
12998 [(set_attr "type" "sselog")
12999 (set_attr "prefix" "evex")
13000 (set_attr "mode" "XI")])
13001
13002 (define_expand "avx512vl_pshufhwv3_mask"
13003 [(match_operand:V16HI 0 "register_operand")
13004 (match_operand:V16HI 1 "nonimmediate_operand")
13005 (match_operand:SI 2 "const_0_to_255_operand")
13006 (match_operand:V16HI 3 "register_operand")
13007 (match_operand:HI 4 "register_operand")]
13008 "TARGET_AVX512VL && TARGET_AVX512BW"
13009 {
13010 int mask = INTVAL (operands[2]);
13011 emit_insn (gen_avx2_pshufhw_1_mask (operands[0], operands[1],
13012 GEN_INT (((mask >> 0) & 3) + 4),
13013 GEN_INT (((mask >> 2) & 3) + 4),
13014 GEN_INT (((mask >> 4) & 3) + 4),
13015 GEN_INT (((mask >> 6) & 3) + 4),
13016 GEN_INT (((mask >> 0) & 3) + 12),
13017 GEN_INT (((mask >> 2) & 3) + 12),
13018 GEN_INT (((mask >> 4) & 3) + 12),
13019 GEN_INT (((mask >> 6) & 3) + 12),
13020 operands[3], operands[4]));
13021 DONE;
13022 })
13023
13024 (define_insn "avx2_pshufhw_1<mask_name>"
13025 [(set (match_operand:V16HI 0 "register_operand" "=v")
13026 (vec_select:V16HI
13027 (match_operand:V16HI 1 "nonimmediate_operand" "vm")
13028 (parallel [(const_int 0)
13029 (const_int 1)
13030 (const_int 2)
13031 (const_int 3)
13032 (match_operand 2 "const_4_to_7_operand")
13033 (match_operand 3 "const_4_to_7_operand")
13034 (match_operand 4 "const_4_to_7_operand")
13035 (match_operand 5 "const_4_to_7_operand")
13036 (const_int 8)
13037 (const_int 9)
13038 (const_int 10)
13039 (const_int 11)
13040 (match_operand 6 "const_12_to_15_operand")
13041 (match_operand 7 "const_12_to_15_operand")
13042 (match_operand 8 "const_12_to_15_operand")
13043 (match_operand 9 "const_12_to_15_operand")])))]
13044 "TARGET_AVX2
13045 && <mask_avx512bw_condition> && <mask_avx512vl_condition>
13046 && INTVAL (operands[2]) + 8 == INTVAL (operands[6])
13047 && INTVAL (operands[3]) + 8 == INTVAL (operands[7])
13048 && INTVAL (operands[4]) + 8 == INTVAL (operands[8])
13049 && INTVAL (operands[5]) + 8 == INTVAL (operands[9])"
13050 {
13051 int mask = 0;
13052 mask |= (INTVAL (operands[2]) - 4) << 0;
13053 mask |= (INTVAL (operands[3]) - 4) << 2;
13054 mask |= (INTVAL (operands[4]) - 4) << 4;
13055 mask |= (INTVAL (operands[5]) - 4) << 6;
13056 operands[2] = GEN_INT (mask);
13057
13058 return "vpshufhw\t{%2, %1, %0<mask_operand10>|%0<mask_operand10>, %1, %2}";
13059 }
13060 [(set_attr "type" "sselog")
13061 (set_attr "prefix" "maybe_evex")
13062 (set_attr "length_immediate" "1")
13063 (set_attr "mode" "OI")])
13064
13065 (define_expand "avx512vl_pshufhw_mask"
13066 [(match_operand:V8HI 0 "register_operand")
13067 (match_operand:V8HI 1 "nonimmediate_operand")
13068 (match_operand:SI 2 "const_0_to_255_operand")
13069 (match_operand:V8HI 3 "register_operand")
13070 (match_operand:QI 4 "register_operand")]
13071 "TARGET_AVX512VL && TARGET_AVX512BW"
13072 {
13073 int mask = INTVAL (operands[2]);
13074 emit_insn (gen_sse2_pshufhw_1_mask (operands[0], operands[1],
13075 GEN_INT (((mask >> 0) & 3) + 4),
13076 GEN_INT (((mask >> 2) & 3) + 4),
13077 GEN_INT (((mask >> 4) & 3) + 4),
13078 GEN_INT (((mask >> 6) & 3) + 4),
13079 operands[3], operands[4]));
13080 DONE;
13081 })
13082
13083 (define_expand "sse2_pshufhw"
13084 [(match_operand:V8HI 0 "register_operand")
13085 (match_operand:V8HI 1 "nonimmediate_operand")
13086 (match_operand:SI 2 "const_int_operand")]
13087 "TARGET_SSE2"
13088 {
13089 int mask = INTVAL (operands[2]);
13090 emit_insn (gen_sse2_pshufhw_1 (operands[0], operands[1],
13091 GEN_INT (((mask >> 0) & 3) + 4),
13092 GEN_INT (((mask >> 2) & 3) + 4),
13093 GEN_INT (((mask >> 4) & 3) + 4),
13094 GEN_INT (((mask >> 6) & 3) + 4)));
13095 DONE;
13096 })
13097
13098 (define_insn "sse2_pshufhw_1<mask_name>"
13099 [(set (match_operand:V8HI 0 "register_operand" "=v")
13100 (vec_select:V8HI
13101 (match_operand:V8HI 1 "nonimmediate_operand" "vm")
13102 (parallel [(const_int 0)
13103 (const_int 1)
13104 (const_int 2)
13105 (const_int 3)
13106 (match_operand 2 "const_4_to_7_operand")
13107 (match_operand 3 "const_4_to_7_operand")
13108 (match_operand 4 "const_4_to_7_operand")
13109 (match_operand 5 "const_4_to_7_operand")])))]
13110 "TARGET_SSE2 && <mask_avx512bw_condition> && <mask_avx512vl_condition>"
13111 {
13112 int mask = 0;
13113 mask |= (INTVAL (operands[2]) - 4) << 0;
13114 mask |= (INTVAL (operands[3]) - 4) << 2;
13115 mask |= (INTVAL (operands[4]) - 4) << 4;
13116 mask |= (INTVAL (operands[5]) - 4) << 6;
13117 operands[2] = GEN_INT (mask);
13118
13119 return "%vpshufhw\t{%2, %1, %0<mask_operand6>|%0<mask_operand6>, %1, %2}";
13120 }
13121 [(set_attr "type" "sselog")
13122 (set_attr "prefix_rep" "1")
13123 (set_attr "prefix_data16" "0")
13124 (set_attr "prefix" "maybe_vex")
13125 (set_attr "length_immediate" "1")
13126 (set_attr "mode" "TI")])
13127
13128 (define_expand "sse2_loadd"
13129 [(set (match_operand:V4SI 0 "register_operand")
13130 (vec_merge:V4SI
13131 (vec_duplicate:V4SI
13132 (match_operand:SI 1 "nonimmediate_operand"))
13133 (match_dup 2)
13134 (const_int 1)))]
13135 "TARGET_SSE"
13136 "operands[2] = CONST0_RTX (V4SImode);")
13137
13138 (define_insn "sse2_loadld"
13139 [(set (match_operand:V4SI 0 "register_operand" "=x,Yi,x,x,x")
13140 (vec_merge:V4SI
13141 (vec_duplicate:V4SI
13142 (match_operand:SI 2 "nonimmediate_operand" "m ,r ,m,x,x"))
13143 (match_operand:V4SI 1 "reg_or_0_operand" "C ,C ,C,0,x")
13144 (const_int 1)))]
13145 "TARGET_SSE"
13146 "@
13147 %vmovd\t{%2, %0|%0, %2}
13148 %vmovd\t{%2, %0|%0, %2}
13149 movss\t{%2, %0|%0, %2}
13150 movss\t{%2, %0|%0, %2}
13151 vmovss\t{%2, %1, %0|%0, %1, %2}"
13152 [(set_attr "isa" "sse2,sse2,noavx,noavx,avx")
13153 (set_attr "type" "ssemov")
13154 (set_attr "prefix" "maybe_vex,maybe_vex,orig,orig,vex")
13155 (set_attr "mode" "TI,TI,V4SF,SF,SF")])
13156
13157 ;; QI and HI modes handled by pextr patterns.
13158 (define_mode_iterator PEXTR_MODE12
13159 [(V16QI "TARGET_SSE4_1") V8HI])
13160
13161 (define_insn "*vec_extract<mode>"
13162 [(set (match_operand:<ssescalarmode> 0 "register_sse4nonimm_operand" "=r,m")
13163 (vec_select:<ssescalarmode>
13164 (match_operand:PEXTR_MODE12 1 "register_operand" "x,x")
13165 (parallel
13166 [(match_operand:SI 2 "const_0_to_<ssescalarnummask>_operand")])))]
13167 "TARGET_SSE2"
13168 "%vpextr<ssemodesuffix>\t{%2, %1, %k0|%k0, %1, %2}"
13169 [(set_attr "isa" "*,sse4")
13170 (set_attr "type" "sselog1")
13171 (set_attr "prefix_data16" "1")
13172 (set (attr "prefix_extra")
13173 (if_then_else
13174 (and (eq_attr "alternative" "0")
13175 (eq (const_string "<MODE>mode") (const_string "V8HImode")))
13176 (const_string "*")
13177 (const_string "1")))
13178 (set_attr "length_immediate" "1")
13179 (set_attr "prefix" "maybe_vex")
13180 (set_attr "mode" "TI")])
13181
13182 (define_insn "*vec_extract<PEXTR_MODE12:mode>_zext"
13183 [(set (match_operand:SWI48 0 "register_operand" "=r")
13184 (zero_extend:SWI48
13185 (vec_select:<PEXTR_MODE12:ssescalarmode>
13186 (match_operand:PEXTR_MODE12 1 "register_operand" "x")
13187 (parallel
13188 [(match_operand:SI 2
13189 "const_0_to_<PEXTR_MODE12:ssescalarnummask>_operand")]))))]
13190 "TARGET_SSE2"
13191 "%vpextr<PEXTR_MODE12:ssemodesuffix>\t{%2, %1, %k0|%k0, %1, %2}"
13192 [(set_attr "type" "sselog1")
13193 (set_attr "prefix_data16" "1")
13194 (set (attr "prefix_extra")
13195 (if_then_else
13196 (eq (const_string "<PEXTR_MODE12:MODE>mode") (const_string "V8HImode"))
13197 (const_string "*")
13198 (const_string "1")))
13199 (set_attr "length_immediate" "1")
13200 (set_attr "prefix" "maybe_vex")
13201 (set_attr "mode" "TI")])
13202
13203 (define_insn "*vec_extract<mode>_mem"
13204 [(set (match_operand:<ssescalarmode> 0 "register_operand" "=r")
13205 (vec_select:<ssescalarmode>
13206 (match_operand:VI12_128 1 "memory_operand" "o")
13207 (parallel
13208 [(match_operand 2 "const_0_to_<ssescalarnummask>_operand")])))]
13209 "TARGET_SSE"
13210 "#")
13211
13212 (define_insn "*vec_extract<ssevecmodelower>_0"
13213 [(set (match_operand:SWI48 0 "nonimmediate_operand" "=r ,r,x ,m")
13214 (vec_select:SWI48
13215 (match_operand:<ssevecmode> 1 "nonimmediate_operand" "mYj,x,xm,x")
13216 (parallel [(const_int 0)])))]
13217 "TARGET_SSE && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
13218 "#"
13219 [(set_attr "isa" "*,sse4,*,*")])
13220
13221 (define_insn_and_split "*vec_extractv4si_0_zext"
13222 [(set (match_operand:DI 0 "register_operand" "=r")
13223 (zero_extend:DI
13224 (vec_select:SI
13225 (match_operand:V4SI 1 "register_operand" "x")
13226 (parallel [(const_int 0)]))))]
13227 "TARGET_64BIT && TARGET_SSE2 && TARGET_INTER_UNIT_MOVES_FROM_VEC"
13228 "#"
13229 "&& reload_completed"
13230 [(set (match_dup 0) (zero_extend:DI (match_dup 1)))]
13231 "operands[1] = gen_rtx_REG (SImode, REGNO (operands[1]));")
13232
13233 (define_insn "*vec_extractv2di_0_sse"
13234 [(set (match_operand:DI 0 "nonimmediate_operand" "=x,m")
13235 (vec_select:DI
13236 (match_operand:V2DI 1 "nonimmediate_operand" "xm,x")
13237 (parallel [(const_int 0)])))]
13238 "TARGET_SSE && !TARGET_64BIT
13239 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
13240 "#")
13241
13242 (define_split
13243 [(set (match_operand:SWI48x 0 "nonimmediate_operand")
13244 (vec_select:SWI48x
13245 (match_operand:<ssevecmode> 1 "register_operand")
13246 (parallel [(const_int 0)])))]
13247 "TARGET_SSE && reload_completed"
13248 [(set (match_dup 0) (match_dup 1))]
13249 "operands[1] = gen_rtx_REG (<MODE>mode, REGNO (operands[1]));")
13250
13251 (define_insn "*vec_extractv4si"
13252 [(set (match_operand:SI 0 "nonimmediate_operand" "=rm,Yr,*x,x")
13253 (vec_select:SI
13254 (match_operand:V4SI 1 "register_operand" "x,0,0,x")
13255 (parallel [(match_operand:SI 2 "const_0_to_3_operand")])))]
13256 "TARGET_SSE4_1"
13257 {
13258 switch (which_alternative)
13259 {
13260 case 0:
13261 return "%vpextrd\t{%2, %1, %0|%0, %1, %2}";
13262
13263 case 1:
13264 case 2:
13265 operands [2] = GEN_INT (INTVAL (operands[2]) * 4);
13266 return "psrldq\t{%2, %0|%0, %2}";
13267
13268 case 3:
13269 operands [2] = GEN_INT (INTVAL (operands[2]) * 4);
13270 return "vpsrldq\t{%2, %1, %0|%0, %1, %2}";
13271
13272 default:
13273 gcc_unreachable ();
13274 }
13275 }
13276 [(set_attr "isa" "*,noavx,noavx,avx")
13277 (set_attr "type" "sselog1,sseishft1,sseishft1,sseishft1")
13278 (set_attr "prefix_extra" "1,*,*,*")
13279 (set_attr "length_immediate" "1")
13280 (set_attr "prefix" "maybe_vex,orig,orig,vex")
13281 (set_attr "mode" "TI")])
13282
13283 (define_insn "*vec_extractv4si_zext"
13284 [(set (match_operand:DI 0 "register_operand" "=r")
13285 (zero_extend:DI
13286 (vec_select:SI
13287 (match_operand:V4SI 1 "register_operand" "x")
13288 (parallel [(match_operand:SI 2 "const_0_to_3_operand")]))))]
13289 "TARGET_64BIT && TARGET_SSE4_1"
13290 "%vpextrd\t{%2, %1, %k0|%k0, %1, %2}"
13291 [(set_attr "type" "sselog1")
13292 (set_attr "prefix_extra" "1")
13293 (set_attr "length_immediate" "1")
13294 (set_attr "prefix" "maybe_vex")
13295 (set_attr "mode" "TI")])
13296
13297 (define_insn "*vec_extractv4si_mem"
13298 [(set (match_operand:SI 0 "register_operand" "=x,r")
13299 (vec_select:SI
13300 (match_operand:V4SI 1 "memory_operand" "o,o")
13301 (parallel [(match_operand 2 "const_0_to_3_operand")])))]
13302 "TARGET_SSE"
13303 "#")
13304
13305 (define_insn_and_split "*vec_extractv4si_zext_mem"
13306 [(set (match_operand:DI 0 "register_operand" "=x,r")
13307 (zero_extend:DI
13308 (vec_select:SI
13309 (match_operand:V4SI 1 "memory_operand" "o,o")
13310 (parallel [(match_operand:SI 2 "const_0_to_3_operand")]))))]
13311 "TARGET_64BIT && TARGET_SSE"
13312 "#"
13313 "&& reload_completed"
13314 [(set (match_dup 0) (zero_extend:DI (match_dup 1)))]
13315 {
13316 operands[1] = adjust_address (operands[1], SImode, INTVAL (operands[2]) * 4);
13317 })
13318
13319 (define_insn "*vec_extractv2di_1"
13320 [(set (match_operand:DI 0 "nonimmediate_operand" "=rm,m,x,x,x,x,r")
13321 (vec_select:DI
13322 (match_operand:V2DI 1 "nonimmediate_operand" "x ,x,0,x,x,o,o")
13323 (parallel [(const_int 1)])))]
13324 "TARGET_SSE && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
13325 "@
13326 %vpextrq\t{$1, %1, %0|%0, %1, 1}
13327 %vmovhps\t{%1, %0|%0, %1}
13328 psrldq\t{$8, %0|%0, 8}
13329 vpsrldq\t{$8, %1, %0|%0, %1, 8}
13330 movhlps\t{%1, %0|%0, %1}
13331 #
13332 #"
13333 [(set_attr "isa" "x64_sse4,*,sse2_noavx,avx,noavx,*,x64")
13334 (set_attr "type" "sselog1,ssemov,sseishft1,sseishft1,ssemov,ssemov,imov")
13335 (set_attr "length_immediate" "1,*,1,1,*,*,*")
13336 (set_attr "prefix_rex" "1,*,*,*,*,*,*")
13337 (set_attr "prefix_extra" "1,*,*,*,*,*,*")
13338 (set_attr "prefix" "maybe_vex,maybe_vex,orig,vex,orig,*,*")
13339 (set_attr "mode" "TI,V2SF,TI,TI,V4SF,DI,DI")])
13340
13341 (define_split
13342 [(set (match_operand:<ssescalarmode> 0 "register_operand")
13343 (vec_select:<ssescalarmode>
13344 (match_operand:VI_128 1 "memory_operand")
13345 (parallel
13346 [(match_operand 2 "const_0_to_<ssescalarnummask>_operand")])))]
13347 "TARGET_SSE && reload_completed"
13348 [(set (match_dup 0) (match_dup 1))]
13349 {
13350 int offs = INTVAL (operands[2]) * GET_MODE_SIZE (<ssescalarmode>mode);
13351
13352 operands[1] = adjust_address (operands[1], <ssescalarmode>mode, offs);
13353 })
13354
13355 ;; Turn SImode or DImode extraction from arbitrary SSE/AVX/AVX512F
13356 ;; vector modes into vec_extract*.
13357 (define_split
13358 [(set (match_operand:SWI48x 0 "nonimmediate_operand")
13359 (match_operand:SWI48x 1 "register_operand"))]
13360 "can_create_pseudo_p ()
13361 && SUBREG_P (operands[1])
13362 && REG_P (SUBREG_REG (operands[1]))
13363 && (GET_MODE_CLASS (GET_MODE (SUBREG_REG (operands[1]))) == MODE_VECTOR_INT
13364 || (GET_MODE_CLASS (GET_MODE (SUBREG_REG (operands[1])))
13365 == MODE_VECTOR_FLOAT))
13366 && SUBREG_BYTE (operands[1]) == 0
13367 && TARGET_SSE
13368 && (GET_MODE_SIZE (GET_MODE (SUBREG_REG (operands[1]))) == 16
13369 || (GET_MODE_SIZE (GET_MODE (SUBREG_REG (operands[1]))) == 32
13370 && TARGET_AVX)
13371 || (GET_MODE_SIZE (GET_MODE (SUBREG_REG (operands[1]))) == 64
13372 && TARGET_AVX512F))
13373 && (<MODE>mode == SImode || TARGET_64BIT || MEM_P (operands[0]))"
13374 [(set (match_dup 0) (vec_select:SWI48x (match_dup 1)
13375 (parallel [(const_int 0)])))]
13376 {
13377 rtx tmp;
13378 operands[1] = SUBREG_REG (operands[1]);
13379 switch (GET_MODE_SIZE (GET_MODE (operands[1])))
13380 {
13381 case 64:
13382 if (<MODE>mode == SImode)
13383 {
13384 tmp = gen_reg_rtx (V8SImode);
13385 emit_insn (gen_vec_extract_lo_v16si (tmp,
13386 gen_lowpart (V16SImode,
13387 operands[1])));
13388 }
13389 else
13390 {
13391 tmp = gen_reg_rtx (V4DImode);
13392 emit_insn (gen_vec_extract_lo_v8di (tmp,
13393 gen_lowpart (V8DImode,
13394 operands[1])));
13395 }
13396 operands[1] = tmp;
13397 /* FALLTHRU */
13398 case 32:
13399 tmp = gen_reg_rtx (<ssevecmode>mode);
13400 if (<MODE>mode == SImode)
13401 emit_insn (gen_vec_extract_lo_v8si (tmp, gen_lowpart (V8SImode,
13402 operands[1])));
13403 else
13404 emit_insn (gen_vec_extract_lo_v4di (tmp, gen_lowpart (V4DImode,
13405 operands[1])));
13406 operands[1] = tmp;
13407 break;
13408 case 16:
13409 operands[1] = gen_lowpart (<ssevecmode>mode, operands[1]);
13410 break;
13411 }
13412 })
13413
13414 (define_insn "*vec_concatv2si_sse4_1"
13415 [(set (match_operand:V2SI 0 "register_operand"
13416 "=Yr,*x,x, Yr,*x,x, x, *y,*y")
13417 (vec_concat:V2SI
13418 (match_operand:SI 1 "nonimmediate_operand"
13419 " 0, 0,x, 0,0, x,rm, 0,rm")
13420 (match_operand:SI 2 "vector_move_operand"
13421 " rm,rm,rm,Yr,*x,x, C,*ym, C")))]
13422 "TARGET_SSE4_1 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
13423 "@
13424 pinsrd\t{$1, %2, %0|%0, %2, 1}
13425 pinsrd\t{$1, %2, %0|%0, %2, 1}
13426 vpinsrd\t{$1, %2, %1, %0|%0, %1, %2, 1}
13427 punpckldq\t{%2, %0|%0, %2}
13428 punpckldq\t{%2, %0|%0, %2}
13429 vpunpckldq\t{%2, %1, %0|%0, %1, %2}
13430 %vmovd\t{%1, %0|%0, %1}
13431 punpckldq\t{%2, %0|%0, %2}
13432 movd\t{%1, %0|%0, %1}"
13433 [(set_attr "isa" "noavx,noavx,avx,noavx,noavx,avx,*,*,*")
13434 (set_attr "type" "sselog,sselog,sselog,sselog,sselog,sselog,ssemov,mmxcvt,mmxmov")
13435 (set_attr "prefix_extra" "1,1,1,*,*,*,*,*,*")
13436 (set_attr "length_immediate" "1,1,1,*,*,*,*,*,*")
13437 (set_attr "prefix" "orig,orig,vex,orig,orig,vex,maybe_vex,orig,orig")
13438 (set_attr "mode" "TI,TI,TI,TI,TI,TI,TI,DI,DI")])
13439
13440 ;; ??? In theory we can match memory for the MMX alternative, but allowing
13441 ;; nonimmediate_operand for operand 2 and *not* allowing memory for the SSE
13442 ;; alternatives pretty much forces the MMX alternative to be chosen.
13443 (define_insn "*vec_concatv2si"
13444 [(set (match_operand:V2SI 0 "register_operand" "=x,x ,*y,x,x,*y,*y")
13445 (vec_concat:V2SI
13446 (match_operand:SI 1 "nonimmediate_operand" " 0,rm,rm,0,m, 0,*rm")
13447 (match_operand:SI 2 "reg_or_0_operand" " x,C ,C, x,C,*y,C")))]
13448 "TARGET_SSE && !TARGET_SSE4_1"
13449 "@
13450 punpckldq\t{%2, %0|%0, %2}
13451 movd\t{%1, %0|%0, %1}
13452 movd\t{%1, %0|%0, %1}
13453 unpcklps\t{%2, %0|%0, %2}
13454 movss\t{%1, %0|%0, %1}
13455 punpckldq\t{%2, %0|%0, %2}
13456 movd\t{%1, %0|%0, %1}"
13457 [(set_attr "isa" "sse2,sse2,sse2,*,*,*,*")
13458 (set_attr "type" "sselog,ssemov,mmxmov,sselog,ssemov,mmxcvt,mmxmov")
13459 (set_attr "mode" "TI,TI,DI,V4SF,SF,DI,DI")])
13460
13461 (define_insn "*vec_concatv4si"
13462 [(set (match_operand:V4SI 0 "register_operand" "=x,x,x,x,x")
13463 (vec_concat:V4SI
13464 (match_operand:V2SI 1 "register_operand" " 0,x,0,0,x")
13465 (match_operand:V2SI 2 "nonimmediate_operand" " x,x,x,m,m")))]
13466 "TARGET_SSE"
13467 "@
13468 punpcklqdq\t{%2, %0|%0, %2}
13469 vpunpcklqdq\t{%2, %1, %0|%0, %1, %2}
13470 movlhps\t{%2, %0|%0, %2}
13471 movhps\t{%2, %0|%0, %q2}
13472 vmovhps\t{%2, %1, %0|%0, %1, %q2}"
13473 [(set_attr "isa" "sse2_noavx,avx,noavx,noavx,avx")
13474 (set_attr "type" "sselog,sselog,ssemov,ssemov,ssemov")
13475 (set_attr "prefix" "orig,vex,orig,orig,vex")
13476 (set_attr "mode" "TI,TI,V4SF,V2SF,V2SF")])
13477
13478 ;; movd instead of movq is required to handle broken assemblers.
13479 (define_insn "vec_concatv2di"
13480 [(set (match_operand:V2DI 0 "register_operand"
13481 "=Yr,*x,x ,Yi,x ,!x,x,x,x,x,x")
13482 (vec_concat:V2DI
13483 (match_operand:DI 1 "nonimmediate_operand"
13484 " 0, 0,x ,r ,xm,*y,0,x,0,0,x")
13485 (match_operand:DI 2 "vector_move_operand"
13486 "*rm,rm,rm,C ,C ,C ,x,x,x,m,m")))]
13487 "TARGET_SSE"
13488 "@
13489 pinsrq\t{$1, %2, %0|%0, %2, 1}
13490 pinsrq\t{$1, %2, %0|%0, %2, 1}
13491 vpinsrq\t{$1, %2, %1, %0|%0, %1, %2, 1}
13492 * return HAVE_AS_IX86_INTERUNIT_MOVQ ? \"%vmovq\t{%1, %0|%0, %1}\" : \"%vmovd\t{%1, %0|%0, %1}\";
13493 %vmovq\t{%1, %0|%0, %1}
13494 movq2dq\t{%1, %0|%0, %1}
13495 punpcklqdq\t{%2, %0|%0, %2}
13496 vpunpcklqdq\t{%2, %1, %0|%0, %1, %2}
13497 movlhps\t{%2, %0|%0, %2}
13498 movhps\t{%2, %0|%0, %2}
13499 vmovhps\t{%2, %1, %0|%0, %1, %2}"
13500 [(set_attr "isa" "x64_sse4_noavx,x64_sse4_noavx,x64_avx,x64,sse2,sse2,sse2_noavx,avx,noavx,noavx,avx")
13501 (set (attr "type")
13502 (if_then_else
13503 (eq_attr "alternative" "0,1,2,6,7")
13504 (const_string "sselog")
13505 (const_string "ssemov")))
13506 (set_attr "prefix_rex" "1,1,1,1,*,*,*,*,*,*,*")
13507 (set_attr "prefix_extra" "1,1,1,*,*,*,*,*,*,*,*")
13508 (set_attr "length_immediate" "1,1,1,*,*,*,*,*,*,*,*")
13509 (set_attr "prefix" "orig,orig,vex,maybe_vex,maybe_vex,orig,orig,vex,orig,orig,vex")
13510 (set_attr "mode" "TI,TI,TI,TI,TI,TI,TI,TI,V4SF,V2SF,V2SF")])
13511
13512 (define_expand "vec_unpacks_lo_<mode>"
13513 [(match_operand:<sseunpackmode> 0 "register_operand")
13514 (match_operand:VI124_AVX2_24_AVX512F_1_AVX512BW 1 "register_operand")]
13515 "TARGET_SSE2"
13516 "ix86_expand_sse_unpack (operands[0], operands[1], false, false); DONE;")
13517
13518 (define_expand "vec_unpacks_hi_<mode>"
13519 [(match_operand:<sseunpackmode> 0 "register_operand")
13520 (match_operand:VI124_AVX2_24_AVX512F_1_AVX512BW 1 "register_operand")]
13521 "TARGET_SSE2"
13522 "ix86_expand_sse_unpack (operands[0], operands[1], false, true); DONE;")
13523
13524 (define_expand "vec_unpacku_lo_<mode>"
13525 [(match_operand:<sseunpackmode> 0 "register_operand")
13526 (match_operand:VI124_AVX2_24_AVX512F_1_AVX512BW 1 "register_operand")]
13527 "TARGET_SSE2"
13528 "ix86_expand_sse_unpack (operands[0], operands[1], true, false); DONE;")
13529
13530 (define_expand "vec_unpacks_lo_hi"
13531 [(set (match_operand:QI 0 "register_operand")
13532 (subreg:QI (match_operand:HI 1 "register_operand") 0))]
13533 "TARGET_AVX512DQ")
13534
13535 (define_expand "vec_unpacks_lo_si"
13536 [(set (match_operand:HI 0 "register_operand")
13537 (subreg:HI (match_operand:SI 1 "register_operand") 0))]
13538 "TARGET_AVX512F")
13539
13540 (define_expand "vec_unpacks_lo_di"
13541 [(set (match_operand:SI 0 "register_operand")
13542 (subreg:SI (match_operand:DI 1 "register_operand") 0))]
13543 "TARGET_AVX512BW")
13544
13545 (define_expand "vec_unpacku_hi_<mode>"
13546 [(match_operand:<sseunpackmode> 0 "register_operand")
13547 (match_operand:VI124_AVX2_24_AVX512F_1_AVX512BW 1 "register_operand")]
13548 "TARGET_SSE2"
13549 "ix86_expand_sse_unpack (operands[0], operands[1], true, true); DONE;")
13550
13551 (define_expand "vec_unpacks_hi_hi"
13552 [(set (subreg:HI (match_operand:QI 0 "register_operand") 0)
13553 (lshiftrt:HI (match_operand:HI 1 "register_operand")
13554 (const_int 8)))]
13555 "TARGET_AVX512F")
13556
13557 (define_expand "vec_unpacks_hi_<mode>"
13558 [(set (subreg:SWI48x (match_operand:<HALFMASKMODE> 0 "register_operand") 0)
13559 (lshiftrt:SWI48x (match_operand:SWI48x 1 "register_operand")
13560 (match_dup 2)))]
13561 "TARGET_AVX512BW"
13562 {
13563 operands[2] = GEN_INT (GET_MODE_BITSIZE (<HALFMASKMODE>mode));
13564 })
13565
13566 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
13567 ;;
13568 ;; Miscellaneous
13569 ;;
13570 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
13571
13572 (define_expand "<sse2_avx2>_uavg<mode>3<mask_name>"
13573 [(set (match_operand:VI12_AVX2 0 "register_operand")
13574 (truncate:VI12_AVX2
13575 (lshiftrt:<ssedoublemode>
13576 (plus:<ssedoublemode>
13577 (plus:<ssedoublemode>
13578 (zero_extend:<ssedoublemode>
13579 (match_operand:VI12_AVX2 1 "nonimmediate_operand"))
13580 (zero_extend:<ssedoublemode>
13581 (match_operand:VI12_AVX2 2 "nonimmediate_operand")))
13582 (match_dup <mask_expand_op3>))
13583 (const_int 1))))]
13584 "TARGET_SSE2 && <mask_mode512bit_condition> && <mask_avx512bw_condition>"
13585 {
13586 rtx tmp;
13587 if (<mask_applied>)
13588 tmp = operands[3];
13589 operands[3] = CONST1_RTX(<MODE>mode);
13590 ix86_fixup_binary_operands_no_copy (PLUS, <MODE>mode, operands);
13591
13592 if (<mask_applied>)
13593 {
13594 operands[5] = operands[3];
13595 operands[3] = tmp;
13596 }
13597 })
13598
13599 (define_insn "*<sse2_avx2>_uavg<mode>3<mask_name>"
13600 [(set (match_operand:VI12_AVX2 0 "register_operand" "=x,v")
13601 (truncate:VI12_AVX2
13602 (lshiftrt:<ssedoublemode>
13603 (plus:<ssedoublemode>
13604 (plus:<ssedoublemode>
13605 (zero_extend:<ssedoublemode>
13606 (match_operand:VI12_AVX2 1 "nonimmediate_operand" "%0,v"))
13607 (zero_extend:<ssedoublemode>
13608 (match_operand:VI12_AVX2 2 "nonimmediate_operand" "xm,vm")))
13609 (match_operand:VI12_AVX2 <mask_expand_op3> "const1_operand"))
13610 (const_int 1))))]
13611 "TARGET_SSE2 && <mask_mode512bit_condition> && <mask_avx512bw_condition>
13612 && ix86_binary_operator_ok (PLUS, <MODE>mode, operands)"
13613 "@
13614 pavg<ssemodesuffix>\t{%2, %0|%0, %2}
13615 vpavg<ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
13616 [(set_attr "isa" "noavx,avx")
13617 (set_attr "type" "sseiadd")
13618 (set_attr "prefix_data16" "1,*")
13619 (set_attr "prefix" "orig,<mask_prefix>")
13620 (set_attr "mode" "<sseinsnmode>")])
13621
13622 ;; The correct representation for this is absolutely enormous, and
13623 ;; surely not generally useful.
13624 (define_insn "<sse2_avx2>_psadbw"
13625 [(set (match_operand:VI8_AVX2_AVX512BW 0 "register_operand" "=x,v")
13626 (unspec:VI8_AVX2_AVX512BW
13627 [(match_operand:<ssebytemode> 1 "register_operand" "0,v")
13628 (match_operand:<ssebytemode> 2 "nonimmediate_operand" "xm,vm")]
13629 UNSPEC_PSADBW))]
13630 "TARGET_SSE2"
13631 "@
13632 psadbw\t{%2, %0|%0, %2}
13633 vpsadbw\t{%2, %1, %0|%0, %1, %2}"
13634 [(set_attr "isa" "noavx,avx")
13635 (set_attr "type" "sseiadd")
13636 (set_attr "atom_unit" "simul")
13637 (set_attr "prefix_data16" "1,*")
13638 (set_attr "prefix" "orig,maybe_evex")
13639 (set_attr "mode" "<sseinsnmode>")])
13640
13641 (define_insn "<sse>_movmsk<ssemodesuffix><avxsizesuffix>"
13642 [(set (match_operand:SI 0 "register_operand" "=r")
13643 (unspec:SI
13644 [(match_operand:VF_128_256 1 "register_operand" "x")]
13645 UNSPEC_MOVMSK))]
13646 "TARGET_SSE"
13647 "%vmovmsk<ssemodesuffix>\t{%1, %0|%0, %1}"
13648 [(set_attr "type" "ssemov")
13649 (set_attr "prefix" "maybe_vex")
13650 (set_attr "mode" "<MODE>")])
13651
13652 (define_insn "*<sse>_movmsk<ssemodesuffix><avxsizesuffix>_zext"
13653 [(set (match_operand:DI 0 "register_operand" "=r")
13654 (zero_extend:DI
13655 (unspec:SI
13656 [(match_operand:VF_128_256 1 "register_operand" "x")]
13657 UNSPEC_MOVMSK)))]
13658 "TARGET_64BIT && TARGET_SSE"
13659 "%vmovmsk<ssemodesuffix>\t{%1, %k0|%k0, %1}"
13660 [(set_attr "type" "ssemov")
13661 (set_attr "prefix" "maybe_vex")
13662 (set_attr "mode" "<MODE>")])
13663
13664 (define_insn "<sse2_avx2>_pmovmskb"
13665 [(set (match_operand:SI 0 "register_operand" "=r")
13666 (unspec:SI
13667 [(match_operand:VI1_AVX2 1 "register_operand" "x")]
13668 UNSPEC_MOVMSK))]
13669 "TARGET_SSE2"
13670 "%vpmovmskb\t{%1, %0|%0, %1}"
13671 [(set_attr "type" "ssemov")
13672 (set (attr "prefix_data16")
13673 (if_then_else
13674 (match_test "TARGET_AVX")
13675 (const_string "*")
13676 (const_string "1")))
13677 (set_attr "prefix" "maybe_vex")
13678 (set_attr "mode" "SI")])
13679
13680 (define_insn "*<sse2_avx2>_pmovmskb_zext"
13681 [(set (match_operand:DI 0 "register_operand" "=r")
13682 (zero_extend:DI
13683 (unspec:SI
13684 [(match_operand:VI1_AVX2 1 "register_operand" "x")]
13685 UNSPEC_MOVMSK)))]
13686 "TARGET_64BIT && TARGET_SSE2"
13687 "%vpmovmskb\t{%1, %k0|%k0, %1}"
13688 [(set_attr "type" "ssemov")
13689 (set (attr "prefix_data16")
13690 (if_then_else
13691 (match_test "TARGET_AVX")
13692 (const_string "*")
13693 (const_string "1")))
13694 (set_attr "prefix" "maybe_vex")
13695 (set_attr "mode" "SI")])
13696
13697 (define_expand "sse2_maskmovdqu"
13698 [(set (match_operand:V16QI 0 "memory_operand")
13699 (unspec:V16QI [(match_operand:V16QI 1 "register_operand")
13700 (match_operand:V16QI 2 "register_operand")
13701 (match_dup 0)]
13702 UNSPEC_MASKMOV))]
13703 "TARGET_SSE2")
13704
13705 (define_insn "*sse2_maskmovdqu"
13706 [(set (mem:V16QI (match_operand:P 0 "register_operand" "D"))
13707 (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "x")
13708 (match_operand:V16QI 2 "register_operand" "x")
13709 (mem:V16QI (match_dup 0))]
13710 UNSPEC_MASKMOV))]
13711 "TARGET_SSE2"
13712 {
13713 /* We can't use %^ here due to ASM_OUTPUT_OPCODE processing
13714 that requires %v to be at the beginning of the opcode name. */
13715 if (Pmode != word_mode)
13716 fputs ("\taddr32", asm_out_file);
13717 return "%vmaskmovdqu\t{%2, %1|%1, %2}";
13718 }
13719 [(set_attr "type" "ssemov")
13720 (set_attr "prefix_data16" "1")
13721 (set (attr "length_address")
13722 (symbol_ref ("Pmode != word_mode")))
13723 ;; The implicit %rdi operand confuses default length_vex computation.
13724 (set (attr "length_vex")
13725 (symbol_ref ("3 + REX_SSE_REGNO_P (REGNO (operands[2]))")))
13726 (set_attr "prefix" "maybe_vex")
13727 (set_attr "znver1_decode" "vector")
13728 (set_attr "mode" "TI")])
13729
13730 (define_insn "sse_ldmxcsr"
13731 [(unspec_volatile [(match_operand:SI 0 "memory_operand" "m")]
13732 UNSPECV_LDMXCSR)]
13733 "TARGET_SSE"
13734 "%vldmxcsr\t%0"
13735 [(set_attr "type" "sse")
13736 (set_attr "atom_sse_attr" "mxcsr")
13737 (set_attr "prefix" "maybe_vex")
13738 (set_attr "memory" "load")])
13739
13740 (define_insn "sse_stmxcsr"
13741 [(set (match_operand:SI 0 "memory_operand" "=m")
13742 (unspec_volatile:SI [(const_int 0)] UNSPECV_STMXCSR))]
13743 "TARGET_SSE"
13744 "%vstmxcsr\t%0"
13745 [(set_attr "type" "sse")
13746 (set_attr "atom_sse_attr" "mxcsr")
13747 (set_attr "prefix" "maybe_vex")
13748 (set_attr "memory" "store")])
13749
13750 (define_insn "sse2_clflush"
13751 [(unspec_volatile [(match_operand 0 "address_operand" "p")]
13752 UNSPECV_CLFLUSH)]
13753 "TARGET_SSE2"
13754 "clflush\t%a0"
13755 [(set_attr "type" "sse")
13756 (set_attr "atom_sse_attr" "fence")
13757 (set_attr "memory" "unknown")])
13758
13759 ;; As per AMD and Intel ISA manuals, the first operand is extensions
13760 ;; and it goes to %ecx. The second operand received is hints and it goes
13761 ;; to %eax.
13762 (define_insn "sse3_mwait"
13763 [(unspec_volatile [(match_operand:SI 0 "register_operand" "c")
13764 (match_operand:SI 1 "register_operand" "a")]
13765 UNSPECV_MWAIT)]
13766 "TARGET_SSE3"
13767 ;; 64bit version is "mwait %rax,%rcx". But only lower 32bits are used.
13768 ;; Since 32bit register operands are implicitly zero extended to 64bit,
13769 ;; we only need to set up 32bit registers.
13770 "mwait"
13771 [(set_attr "length" "3")])
13772
13773 (define_insn "sse3_monitor_<mode>"
13774 [(unspec_volatile [(match_operand:P 0 "register_operand" "a")
13775 (match_operand:SI 1 "register_operand" "c")
13776 (match_operand:SI 2 "register_operand" "d")]
13777 UNSPECV_MONITOR)]
13778 "TARGET_SSE3"
13779 ;; 64bit version is "monitor %rax,%rcx,%rdx". But only lower 32bits in
13780 ;; RCX and RDX are used. Since 32bit register operands are implicitly
13781 ;; zero extended to 64bit, we only need to set up 32bit registers.
13782 "%^monitor"
13783 [(set (attr "length")
13784 (symbol_ref ("(Pmode != word_mode) + 3")))])
13785
13786 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
13787 ;;
13788 ;; SSSE3 instructions
13789 ;;
13790 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
13791
13792 (define_code_iterator ssse3_plusminus [plus ss_plus minus ss_minus])
13793
13794 (define_insn "avx2_ph<plusminus_mnemonic>wv16hi3"
13795 [(set (match_operand:V16HI 0 "register_operand" "=x")
13796 (vec_concat:V16HI
13797 (vec_concat:V8HI
13798 (vec_concat:V4HI
13799 (vec_concat:V2HI
13800 (ssse3_plusminus:HI
13801 (vec_select:HI
13802 (match_operand:V16HI 1 "register_operand" "x")
13803 (parallel [(const_int 0)]))
13804 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
13805 (ssse3_plusminus:HI
13806 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
13807 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
13808 (vec_concat:V2HI
13809 (ssse3_plusminus:HI
13810 (vec_select:HI (match_dup 1) (parallel [(const_int 4)]))
13811 (vec_select:HI (match_dup 1) (parallel [(const_int 5)])))
13812 (ssse3_plusminus:HI
13813 (vec_select:HI (match_dup 1) (parallel [(const_int 6)]))
13814 (vec_select:HI (match_dup 1) (parallel [(const_int 7)])))))
13815 (vec_concat:V4HI
13816 (vec_concat:V2HI
13817 (ssse3_plusminus:HI
13818 (vec_select:HI (match_dup 1) (parallel [(const_int 8)]))
13819 (vec_select:HI (match_dup 1) (parallel [(const_int 9)])))
13820 (ssse3_plusminus:HI
13821 (vec_select:HI (match_dup 1) (parallel [(const_int 10)]))
13822 (vec_select:HI (match_dup 1) (parallel [(const_int 11)]))))
13823 (vec_concat:V2HI
13824 (ssse3_plusminus:HI
13825 (vec_select:HI (match_dup 1) (parallel [(const_int 12)]))
13826 (vec_select:HI (match_dup 1) (parallel [(const_int 13)])))
13827 (ssse3_plusminus:HI
13828 (vec_select:HI (match_dup 1) (parallel [(const_int 14)]))
13829 (vec_select:HI (match_dup 1) (parallel [(const_int 15)]))))))
13830 (vec_concat:V8HI
13831 (vec_concat:V4HI
13832 (vec_concat:V2HI
13833 (ssse3_plusminus:HI
13834 (vec_select:HI
13835 (match_operand:V16HI 2 "nonimmediate_operand" "xm")
13836 (parallel [(const_int 0)]))
13837 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
13838 (ssse3_plusminus:HI
13839 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
13840 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))
13841 (vec_concat:V2HI
13842 (ssse3_plusminus:HI
13843 (vec_select:HI (match_dup 2) (parallel [(const_int 4)]))
13844 (vec_select:HI (match_dup 2) (parallel [(const_int 5)])))
13845 (ssse3_plusminus:HI
13846 (vec_select:HI (match_dup 2) (parallel [(const_int 6)]))
13847 (vec_select:HI (match_dup 2) (parallel [(const_int 7)])))))
13848 (vec_concat:V4HI
13849 (vec_concat:V2HI
13850 (ssse3_plusminus:HI
13851 (vec_select:HI (match_dup 2) (parallel [(const_int 8)]))
13852 (vec_select:HI (match_dup 2) (parallel [(const_int 9)])))
13853 (ssse3_plusminus:HI
13854 (vec_select:HI (match_dup 2) (parallel [(const_int 10)]))
13855 (vec_select:HI (match_dup 2) (parallel [(const_int 11)]))))
13856 (vec_concat:V2HI
13857 (ssse3_plusminus:HI
13858 (vec_select:HI (match_dup 2) (parallel [(const_int 12)]))
13859 (vec_select:HI (match_dup 2) (parallel [(const_int 13)])))
13860 (ssse3_plusminus:HI
13861 (vec_select:HI (match_dup 2) (parallel [(const_int 14)]))
13862 (vec_select:HI (match_dup 2) (parallel [(const_int 15)]))))))))]
13863 "TARGET_AVX2"
13864 "vph<plusminus_mnemonic>w\t{%2, %1, %0|%0, %1, %2}"
13865 [(set_attr "type" "sseiadd")
13866 (set_attr "prefix_extra" "1")
13867 (set_attr "prefix" "vex")
13868 (set_attr "mode" "OI")])
13869
13870 (define_insn "ssse3_ph<plusminus_mnemonic>wv8hi3"
13871 [(set (match_operand:V8HI 0 "register_operand" "=x,x")
13872 (vec_concat:V8HI
13873 (vec_concat:V4HI
13874 (vec_concat:V2HI
13875 (ssse3_plusminus:HI
13876 (vec_select:HI
13877 (match_operand:V8HI 1 "register_operand" "0,x")
13878 (parallel [(const_int 0)]))
13879 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
13880 (ssse3_plusminus:HI
13881 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
13882 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
13883 (vec_concat:V2HI
13884 (ssse3_plusminus:HI
13885 (vec_select:HI (match_dup 1) (parallel [(const_int 4)]))
13886 (vec_select:HI (match_dup 1) (parallel [(const_int 5)])))
13887 (ssse3_plusminus:HI
13888 (vec_select:HI (match_dup 1) (parallel [(const_int 6)]))
13889 (vec_select:HI (match_dup 1) (parallel [(const_int 7)])))))
13890 (vec_concat:V4HI
13891 (vec_concat:V2HI
13892 (ssse3_plusminus:HI
13893 (vec_select:HI
13894 (match_operand:V8HI 2 "nonimmediate_operand" "xm,xm")
13895 (parallel [(const_int 0)]))
13896 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
13897 (ssse3_plusminus:HI
13898 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
13899 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))
13900 (vec_concat:V2HI
13901 (ssse3_plusminus:HI
13902 (vec_select:HI (match_dup 2) (parallel [(const_int 4)]))
13903 (vec_select:HI (match_dup 2) (parallel [(const_int 5)])))
13904 (ssse3_plusminus:HI
13905 (vec_select:HI (match_dup 2) (parallel [(const_int 6)]))
13906 (vec_select:HI (match_dup 2) (parallel [(const_int 7)])))))))]
13907 "TARGET_SSSE3"
13908 "@
13909 ph<plusminus_mnemonic>w\t{%2, %0|%0, %2}
13910 vph<plusminus_mnemonic>w\t{%2, %1, %0|%0, %1, %2}"
13911 [(set_attr "isa" "noavx,avx")
13912 (set_attr "type" "sseiadd")
13913 (set_attr "atom_unit" "complex")
13914 (set_attr "prefix_data16" "1,*")
13915 (set_attr "prefix_extra" "1")
13916 (set_attr "prefix" "orig,vex")
13917 (set_attr "mode" "TI")])
13918
13919 (define_insn "ssse3_ph<plusminus_mnemonic>wv4hi3"
13920 [(set (match_operand:V4HI 0 "register_operand" "=y")
13921 (vec_concat:V4HI
13922 (vec_concat:V2HI
13923 (ssse3_plusminus:HI
13924 (vec_select:HI
13925 (match_operand:V4HI 1 "register_operand" "0")
13926 (parallel [(const_int 0)]))
13927 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
13928 (ssse3_plusminus:HI
13929 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
13930 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
13931 (vec_concat:V2HI
13932 (ssse3_plusminus:HI
13933 (vec_select:HI
13934 (match_operand:V4HI 2 "nonimmediate_operand" "ym")
13935 (parallel [(const_int 0)]))
13936 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
13937 (ssse3_plusminus:HI
13938 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
13939 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))))]
13940 "TARGET_SSSE3"
13941 "ph<plusminus_mnemonic>w\t{%2, %0|%0, %2}"
13942 [(set_attr "type" "sseiadd")
13943 (set_attr "atom_unit" "complex")
13944 (set_attr "prefix_extra" "1")
13945 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
13946 (set_attr "mode" "DI")])
13947
13948 (define_insn "avx2_ph<plusminus_mnemonic>dv8si3"
13949 [(set (match_operand:V8SI 0 "register_operand" "=x")
13950 (vec_concat:V8SI
13951 (vec_concat:V4SI
13952 (vec_concat:V2SI
13953 (plusminus:SI
13954 (vec_select:SI
13955 (match_operand:V8SI 1 "register_operand" "x")
13956 (parallel [(const_int 0)]))
13957 (vec_select:SI (match_dup 1) (parallel [(const_int 1)])))
13958 (plusminus:SI
13959 (vec_select:SI (match_dup 1) (parallel [(const_int 2)]))
13960 (vec_select:SI (match_dup 1) (parallel [(const_int 3)]))))
13961 (vec_concat:V2SI
13962 (plusminus:SI
13963 (vec_select:SI (match_dup 1) (parallel [(const_int 4)]))
13964 (vec_select:SI (match_dup 1) (parallel [(const_int 5)])))
13965 (plusminus:SI
13966 (vec_select:SI (match_dup 1) (parallel [(const_int 6)]))
13967 (vec_select:SI (match_dup 1) (parallel [(const_int 7)])))))
13968 (vec_concat:V4SI
13969 (vec_concat:V2SI
13970 (plusminus:SI
13971 (vec_select:SI
13972 (match_operand:V8SI 2 "nonimmediate_operand" "xm")
13973 (parallel [(const_int 0)]))
13974 (vec_select:SI (match_dup 2) (parallel [(const_int 1)])))
13975 (plusminus:SI
13976 (vec_select:SI (match_dup 2) (parallel [(const_int 2)]))
13977 (vec_select:SI (match_dup 2) (parallel [(const_int 3)]))))
13978 (vec_concat:V2SI
13979 (plusminus:SI
13980 (vec_select:SI (match_dup 2) (parallel [(const_int 4)]))
13981 (vec_select:SI (match_dup 2) (parallel [(const_int 5)])))
13982 (plusminus:SI
13983 (vec_select:SI (match_dup 2) (parallel [(const_int 6)]))
13984 (vec_select:SI (match_dup 2) (parallel [(const_int 7)])))))))]
13985 "TARGET_AVX2"
13986 "vph<plusminus_mnemonic>d\t{%2, %1, %0|%0, %1, %2}"
13987 [(set_attr "type" "sseiadd")
13988 (set_attr "prefix_extra" "1")
13989 (set_attr "prefix" "vex")
13990 (set_attr "mode" "OI")])
13991
13992 (define_insn "ssse3_ph<plusminus_mnemonic>dv4si3"
13993 [(set (match_operand:V4SI 0 "register_operand" "=x,x")
13994 (vec_concat:V4SI
13995 (vec_concat:V2SI
13996 (plusminus:SI
13997 (vec_select:SI
13998 (match_operand:V4SI 1 "register_operand" "0,x")
13999 (parallel [(const_int 0)]))
14000 (vec_select:SI (match_dup 1) (parallel [(const_int 1)])))
14001 (plusminus:SI
14002 (vec_select:SI (match_dup 1) (parallel [(const_int 2)]))
14003 (vec_select:SI (match_dup 1) (parallel [(const_int 3)]))))
14004 (vec_concat:V2SI
14005 (plusminus:SI
14006 (vec_select:SI
14007 (match_operand:V4SI 2 "nonimmediate_operand" "xm,xm")
14008 (parallel [(const_int 0)]))
14009 (vec_select:SI (match_dup 2) (parallel [(const_int 1)])))
14010 (plusminus:SI
14011 (vec_select:SI (match_dup 2) (parallel [(const_int 2)]))
14012 (vec_select:SI (match_dup 2) (parallel [(const_int 3)]))))))]
14013 "TARGET_SSSE3"
14014 "@
14015 ph<plusminus_mnemonic>d\t{%2, %0|%0, %2}
14016 vph<plusminus_mnemonic>d\t{%2, %1, %0|%0, %1, %2}"
14017 [(set_attr "isa" "noavx,avx")
14018 (set_attr "type" "sseiadd")
14019 (set_attr "atom_unit" "complex")
14020 (set_attr "prefix_data16" "1,*")
14021 (set_attr "prefix_extra" "1")
14022 (set_attr "prefix" "orig,vex")
14023 (set_attr "mode" "TI")])
14024
14025 (define_insn "ssse3_ph<plusminus_mnemonic>dv2si3"
14026 [(set (match_operand:V2SI 0 "register_operand" "=y")
14027 (vec_concat:V2SI
14028 (plusminus:SI
14029 (vec_select:SI
14030 (match_operand:V2SI 1 "register_operand" "0")
14031 (parallel [(const_int 0)]))
14032 (vec_select:SI (match_dup 1) (parallel [(const_int 1)])))
14033 (plusminus:SI
14034 (vec_select:SI
14035 (match_operand:V2SI 2 "nonimmediate_operand" "ym")
14036 (parallel [(const_int 0)]))
14037 (vec_select:SI (match_dup 2) (parallel [(const_int 1)])))))]
14038 "TARGET_SSSE3"
14039 "ph<plusminus_mnemonic>d\t{%2, %0|%0, %2}"
14040 [(set_attr "type" "sseiadd")
14041 (set_attr "atom_unit" "complex")
14042 (set_attr "prefix_extra" "1")
14043 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
14044 (set_attr "mode" "DI")])
14045
14046 (define_insn "avx2_pmaddubsw256"
14047 [(set (match_operand:V16HI 0 "register_operand" "=x")
14048 (ss_plus:V16HI
14049 (mult:V16HI
14050 (zero_extend:V16HI
14051 (vec_select:V16QI
14052 (match_operand:V32QI 1 "register_operand" "x")
14053 (parallel [(const_int 0) (const_int 2)
14054 (const_int 4) (const_int 6)
14055 (const_int 8) (const_int 10)
14056 (const_int 12) (const_int 14)
14057 (const_int 16) (const_int 18)
14058 (const_int 20) (const_int 22)
14059 (const_int 24) (const_int 26)
14060 (const_int 28) (const_int 30)])))
14061 (sign_extend:V16HI
14062 (vec_select:V16QI
14063 (match_operand:V32QI 2 "nonimmediate_operand" "xm")
14064 (parallel [(const_int 0) (const_int 2)
14065 (const_int 4) (const_int 6)
14066 (const_int 8) (const_int 10)
14067 (const_int 12) (const_int 14)
14068 (const_int 16) (const_int 18)
14069 (const_int 20) (const_int 22)
14070 (const_int 24) (const_int 26)
14071 (const_int 28) (const_int 30)]))))
14072 (mult:V16HI
14073 (zero_extend:V16HI
14074 (vec_select:V16QI (match_dup 1)
14075 (parallel [(const_int 1) (const_int 3)
14076 (const_int 5) (const_int 7)
14077 (const_int 9) (const_int 11)
14078 (const_int 13) (const_int 15)
14079 (const_int 17) (const_int 19)
14080 (const_int 21) (const_int 23)
14081 (const_int 25) (const_int 27)
14082 (const_int 29) (const_int 31)])))
14083 (sign_extend:V16HI
14084 (vec_select:V16QI (match_dup 2)
14085 (parallel [(const_int 1) (const_int 3)
14086 (const_int 5) (const_int 7)
14087 (const_int 9) (const_int 11)
14088 (const_int 13) (const_int 15)
14089 (const_int 17) (const_int 19)
14090 (const_int 21) (const_int 23)
14091 (const_int 25) (const_int 27)
14092 (const_int 29) (const_int 31)]))))))]
14093 "TARGET_AVX2"
14094 "vpmaddubsw\t{%2, %1, %0|%0, %1, %2}"
14095 [(set_attr "type" "sseiadd")
14096 (set_attr "prefix_extra" "1")
14097 (set_attr "prefix" "vex")
14098 (set_attr "mode" "OI")])
14099
14100 ;; The correct representation for this is absolutely enormous, and
14101 ;; surely not generally useful.
14102 (define_insn "avx512bw_pmaddubsw512<mode><mask_name>"
14103 [(set (match_operand:VI2_AVX512VL 0 "register_operand" "=v")
14104 (unspec:VI2_AVX512VL
14105 [(match_operand:<dbpsadbwmode> 1 "register_operand" "v")
14106 (match_operand:<dbpsadbwmode> 2 "nonimmediate_operand" "vm")]
14107 UNSPEC_PMADDUBSW512))]
14108 "TARGET_AVX512BW"
14109 "vpmaddubsw\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}";
14110 [(set_attr "type" "sseiadd")
14111 (set_attr "prefix" "evex")
14112 (set_attr "mode" "XI")])
14113
14114 (define_insn "avx512bw_umulhrswv32hi3<mask_name>"
14115 [(set (match_operand:V32HI 0 "register_operand" "=v")
14116 (truncate:V32HI
14117 (lshiftrt:V32SI
14118 (plus:V32SI
14119 (lshiftrt:V32SI
14120 (mult:V32SI
14121 (sign_extend:V32SI
14122 (match_operand:V32HI 1 "nonimmediate_operand" "%v"))
14123 (sign_extend:V32SI
14124 (match_operand:V32HI 2 "nonimmediate_operand" "vm")))
14125 (const_int 14))
14126 (const_vector:V32HI [(const_int 1) (const_int 1)
14127 (const_int 1) (const_int 1)
14128 (const_int 1) (const_int 1)
14129 (const_int 1) (const_int 1)
14130 (const_int 1) (const_int 1)
14131 (const_int 1) (const_int 1)
14132 (const_int 1) (const_int 1)
14133 (const_int 1) (const_int 1)
14134 (const_int 1) (const_int 1)
14135 (const_int 1) (const_int 1)
14136 (const_int 1) (const_int 1)
14137 (const_int 1) (const_int 1)
14138 (const_int 1) (const_int 1)
14139 (const_int 1) (const_int 1)
14140 (const_int 1) (const_int 1)
14141 (const_int 1) (const_int 1)]))
14142 (const_int 1))))]
14143 "TARGET_AVX512BW"
14144 "vpmulhrsw\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
14145 [(set_attr "type" "sseimul")
14146 (set_attr "prefix" "evex")
14147 (set_attr "mode" "XI")])
14148
14149 (define_insn "ssse3_pmaddubsw128"
14150 [(set (match_operand:V8HI 0 "register_operand" "=x,x")
14151 (ss_plus:V8HI
14152 (mult:V8HI
14153 (zero_extend:V8HI
14154 (vec_select:V8QI
14155 (match_operand:V16QI 1 "register_operand" "0,x")
14156 (parallel [(const_int 0) (const_int 2)
14157 (const_int 4) (const_int 6)
14158 (const_int 8) (const_int 10)
14159 (const_int 12) (const_int 14)])))
14160 (sign_extend:V8HI
14161 (vec_select:V8QI
14162 (match_operand:V16QI 2 "nonimmediate_operand" "xm,xm")
14163 (parallel [(const_int 0) (const_int 2)
14164 (const_int 4) (const_int 6)
14165 (const_int 8) (const_int 10)
14166 (const_int 12) (const_int 14)]))))
14167 (mult:V8HI
14168 (zero_extend:V8HI
14169 (vec_select:V8QI (match_dup 1)
14170 (parallel [(const_int 1) (const_int 3)
14171 (const_int 5) (const_int 7)
14172 (const_int 9) (const_int 11)
14173 (const_int 13) (const_int 15)])))
14174 (sign_extend:V8HI
14175 (vec_select:V8QI (match_dup 2)
14176 (parallel [(const_int 1) (const_int 3)
14177 (const_int 5) (const_int 7)
14178 (const_int 9) (const_int 11)
14179 (const_int 13) (const_int 15)]))))))]
14180 "TARGET_SSSE3"
14181 "@
14182 pmaddubsw\t{%2, %0|%0, %2}
14183 vpmaddubsw\t{%2, %1, %0|%0, %1, %2}"
14184 [(set_attr "isa" "noavx,avx")
14185 (set_attr "type" "sseiadd")
14186 (set_attr "atom_unit" "simul")
14187 (set_attr "prefix_data16" "1,*")
14188 (set_attr "prefix_extra" "1")
14189 (set_attr "prefix" "orig,vex")
14190 (set_attr "mode" "TI")])
14191
14192 (define_insn "ssse3_pmaddubsw"
14193 [(set (match_operand:V4HI 0 "register_operand" "=y")
14194 (ss_plus:V4HI
14195 (mult:V4HI
14196 (zero_extend:V4HI
14197 (vec_select:V4QI
14198 (match_operand:V8QI 1 "register_operand" "0")
14199 (parallel [(const_int 0) (const_int 2)
14200 (const_int 4) (const_int 6)])))
14201 (sign_extend:V4HI
14202 (vec_select:V4QI
14203 (match_operand:V8QI 2 "nonimmediate_operand" "ym")
14204 (parallel [(const_int 0) (const_int 2)
14205 (const_int 4) (const_int 6)]))))
14206 (mult:V4HI
14207 (zero_extend:V4HI
14208 (vec_select:V4QI (match_dup 1)
14209 (parallel [(const_int 1) (const_int 3)
14210 (const_int 5) (const_int 7)])))
14211 (sign_extend:V4HI
14212 (vec_select:V4QI (match_dup 2)
14213 (parallel [(const_int 1) (const_int 3)
14214 (const_int 5) (const_int 7)]))))))]
14215 "TARGET_SSSE3"
14216 "pmaddubsw\t{%2, %0|%0, %2}"
14217 [(set_attr "type" "sseiadd")
14218 (set_attr "atom_unit" "simul")
14219 (set_attr "prefix_extra" "1")
14220 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
14221 (set_attr "mode" "DI")])
14222
14223 (define_mode_iterator PMULHRSW
14224 [V4HI V8HI (V16HI "TARGET_AVX2")])
14225
14226 (define_expand "<ssse3_avx2>_pmulhrsw<mode>3_mask"
14227 [(set (match_operand:PMULHRSW 0 "register_operand")
14228 (vec_merge:PMULHRSW
14229 (truncate:PMULHRSW
14230 (lshiftrt:<ssedoublemode>
14231 (plus:<ssedoublemode>
14232 (lshiftrt:<ssedoublemode>
14233 (mult:<ssedoublemode>
14234 (sign_extend:<ssedoublemode>
14235 (match_operand:PMULHRSW 1 "nonimmediate_operand"))
14236 (sign_extend:<ssedoublemode>
14237 (match_operand:PMULHRSW 2 "nonimmediate_operand")))
14238 (const_int 14))
14239 (match_dup 5))
14240 (const_int 1)))
14241 (match_operand:PMULHRSW 3 "register_operand")
14242 (match_operand:<avx512fmaskmode> 4 "register_operand")))]
14243 "TARGET_AVX512BW && TARGET_AVX512VL"
14244 {
14245 operands[5] = CONST1_RTX(<MODE>mode);
14246 ix86_fixup_binary_operands_no_copy (MULT, <MODE>mode, operands);
14247 })
14248
14249 (define_expand "<ssse3_avx2>_pmulhrsw<mode>3"
14250 [(set (match_operand:PMULHRSW 0 "register_operand")
14251 (truncate:PMULHRSW
14252 (lshiftrt:<ssedoublemode>
14253 (plus:<ssedoublemode>
14254 (lshiftrt:<ssedoublemode>
14255 (mult:<ssedoublemode>
14256 (sign_extend:<ssedoublemode>
14257 (match_operand:PMULHRSW 1 "nonimmediate_operand"))
14258 (sign_extend:<ssedoublemode>
14259 (match_operand:PMULHRSW 2 "nonimmediate_operand")))
14260 (const_int 14))
14261 (match_dup 3))
14262 (const_int 1))))]
14263 "TARGET_AVX2"
14264 {
14265 operands[3] = CONST1_RTX(<MODE>mode);
14266 ix86_fixup_binary_operands_no_copy (MULT, <MODE>mode, operands);
14267 })
14268
14269 (define_insn "*<ssse3_avx2>_pmulhrsw<mode>3<mask_name>"
14270 [(set (match_operand:VI2_AVX2 0 "register_operand" "=x,v")
14271 (truncate:VI2_AVX2
14272 (lshiftrt:<ssedoublemode>
14273 (plus:<ssedoublemode>
14274 (lshiftrt:<ssedoublemode>
14275 (mult:<ssedoublemode>
14276 (sign_extend:<ssedoublemode>
14277 (match_operand:VI2_AVX2 1 "nonimmediate_operand" "%0,v"))
14278 (sign_extend:<ssedoublemode>
14279 (match_operand:VI2_AVX2 2 "nonimmediate_operand" "xm,vm")))
14280 (const_int 14))
14281 (match_operand:VI2_AVX2 3 "const1_operand"))
14282 (const_int 1))))]
14283 "TARGET_SSSE3 && <mask_mode512bit_condition> && <mask_avx512bw_condition>
14284 && ix86_binary_operator_ok (MULT, <MODE>mode, operands)"
14285 "@
14286 pmulhrsw\t{%2, %0|%0, %2}
14287 vpmulhrsw\t{%2, %1, %0<mask_operand4>|%0<mask_operand4>, %1, %2}"
14288 [(set_attr "isa" "noavx,avx")
14289 (set_attr "type" "sseimul")
14290 (set_attr "prefix_data16" "1,*")
14291 (set_attr "prefix_extra" "1")
14292 (set_attr "prefix" "orig,maybe_evex")
14293 (set_attr "mode" "<sseinsnmode>")])
14294
14295 (define_insn "*ssse3_pmulhrswv4hi3"
14296 [(set (match_operand:V4HI 0 "register_operand" "=y")
14297 (truncate:V4HI
14298 (lshiftrt:V4SI
14299 (plus:V4SI
14300 (lshiftrt:V4SI
14301 (mult:V4SI
14302 (sign_extend:V4SI
14303 (match_operand:V4HI 1 "nonimmediate_operand" "%0"))
14304 (sign_extend:V4SI
14305 (match_operand:V4HI 2 "nonimmediate_operand" "ym")))
14306 (const_int 14))
14307 (match_operand:V4HI 3 "const1_operand"))
14308 (const_int 1))))]
14309 "TARGET_SSSE3 && ix86_binary_operator_ok (MULT, V4HImode, operands)"
14310 "pmulhrsw\t{%2, %0|%0, %2}"
14311 [(set_attr "type" "sseimul")
14312 (set_attr "prefix_extra" "1")
14313 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
14314 (set_attr "mode" "DI")])
14315
14316 (define_insn "<ssse3_avx2>_pshufb<mode>3<mask_name>"
14317 [(set (match_operand:VI1_AVX512 0 "register_operand" "=x,v")
14318 (unspec:VI1_AVX512
14319 [(match_operand:VI1_AVX512 1 "register_operand" "0,v")
14320 (match_operand:VI1_AVX512 2 "nonimmediate_operand" "xm,vm")]
14321 UNSPEC_PSHUFB))]
14322 "TARGET_SSSE3 && <mask_mode512bit_condition> && <mask_avx512bw_condition>"
14323 "@
14324 pshufb\t{%2, %0|%0, %2}
14325 vpshufb\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
14326 [(set_attr "isa" "noavx,avx")
14327 (set_attr "type" "sselog1")
14328 (set_attr "prefix_data16" "1,*")
14329 (set_attr "prefix_extra" "1")
14330 (set_attr "prefix" "orig,maybe_evex")
14331 (set_attr "btver2_decode" "vector,vector")
14332 (set_attr "mode" "<sseinsnmode>")])
14333
14334 (define_insn "ssse3_pshufbv8qi3"
14335 [(set (match_operand:V8QI 0 "register_operand" "=y")
14336 (unspec:V8QI [(match_operand:V8QI 1 "register_operand" "0")
14337 (match_operand:V8QI 2 "nonimmediate_operand" "ym")]
14338 UNSPEC_PSHUFB))]
14339 "TARGET_SSSE3"
14340 "pshufb\t{%2, %0|%0, %2}";
14341 [(set_attr "type" "sselog1")
14342 (set_attr "prefix_extra" "1")
14343 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
14344 (set_attr "mode" "DI")])
14345
14346 (define_insn "<ssse3_avx2>_psign<mode>3"
14347 [(set (match_operand:VI124_AVX2 0 "register_operand" "=x,x")
14348 (unspec:VI124_AVX2
14349 [(match_operand:VI124_AVX2 1 "register_operand" "0,x")
14350 (match_operand:VI124_AVX2 2 "nonimmediate_operand" "xm,xm")]
14351 UNSPEC_PSIGN))]
14352 "TARGET_SSSE3"
14353 "@
14354 psign<ssemodesuffix>\t{%2, %0|%0, %2}
14355 vpsign<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
14356 [(set_attr "isa" "noavx,avx")
14357 (set_attr "type" "sselog1")
14358 (set_attr "prefix_data16" "1,*")
14359 (set_attr "prefix_extra" "1")
14360 (set_attr "prefix" "orig,vex")
14361 (set_attr "mode" "<sseinsnmode>")])
14362
14363 (define_insn "ssse3_psign<mode>3"
14364 [(set (match_operand:MMXMODEI 0 "register_operand" "=y")
14365 (unspec:MMXMODEI
14366 [(match_operand:MMXMODEI 1 "register_operand" "0")
14367 (match_operand:MMXMODEI 2 "nonimmediate_operand" "ym")]
14368 UNSPEC_PSIGN))]
14369 "TARGET_SSSE3"
14370 "psign<mmxvecsize>\t{%2, %0|%0, %2}";
14371 [(set_attr "type" "sselog1")
14372 (set_attr "prefix_extra" "1")
14373 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
14374 (set_attr "mode" "DI")])
14375
14376 (define_insn "<ssse3_avx2>_palignr<mode>_mask"
14377 [(set (match_operand:VI1_AVX512 0 "register_operand" "=v")
14378 (vec_merge:VI1_AVX512
14379 (unspec:VI1_AVX512
14380 [(match_operand:VI1_AVX512 1 "register_operand" "v")
14381 (match_operand:VI1_AVX512 2 "nonimmediate_operand" "vm")
14382 (match_operand:SI 3 "const_0_to_255_mul_8_operand" "n")]
14383 UNSPEC_PALIGNR)
14384 (match_operand:VI1_AVX512 4 "vector_move_operand" "0C")
14385 (match_operand:<avx512fmaskmode> 5 "register_operand" "Yk")))]
14386 "TARGET_AVX512BW && (<MODE_SIZE> == 64 || TARGET_AVX512VL)"
14387 {
14388 operands[3] = GEN_INT (INTVAL (operands[3]) / 8);
14389 return "vpalignr\t{%3, %2, %1, %0%{%5%}%N4|%0%{%5%}%N4, %1, %2, %3}";
14390 }
14391 [(set_attr "type" "sseishft")
14392 (set_attr "atom_unit" "sishuf")
14393 (set_attr "prefix_extra" "1")
14394 (set_attr "length_immediate" "1")
14395 (set_attr "prefix" "evex")
14396 (set_attr "mode" "<sseinsnmode>")])
14397
14398 (define_insn "<ssse3_avx2>_palignr<mode>"
14399 [(set (match_operand:SSESCALARMODE 0 "register_operand" "=x,v")
14400 (unspec:SSESCALARMODE
14401 [(match_operand:SSESCALARMODE 1 "register_operand" "0,v")
14402 (match_operand:SSESCALARMODE 2 "nonimmediate_operand" "xm,vm")
14403 (match_operand:SI 3 "const_0_to_255_mul_8_operand" "n,n")]
14404 UNSPEC_PALIGNR))]
14405 "TARGET_SSSE3"
14406 {
14407 operands[3] = GEN_INT (INTVAL (operands[3]) / 8);
14408
14409 switch (which_alternative)
14410 {
14411 case 0:
14412 return "palignr\t{%3, %2, %0|%0, %2, %3}";
14413 case 1:
14414 return "vpalignr\t{%3, %2, %1, %0|%0, %1, %2, %3}";
14415 default:
14416 gcc_unreachable ();
14417 }
14418 }
14419 [(set_attr "isa" "noavx,avx")
14420 (set_attr "type" "sseishft")
14421 (set_attr "atom_unit" "sishuf")
14422 (set_attr "prefix_data16" "1,*")
14423 (set_attr "prefix_extra" "1")
14424 (set_attr "length_immediate" "1")
14425 (set_attr "prefix" "orig,vex")
14426 (set_attr "mode" "<sseinsnmode>")])
14427
14428 (define_insn "ssse3_palignrdi"
14429 [(set (match_operand:DI 0 "register_operand" "=y")
14430 (unspec:DI [(match_operand:DI 1 "register_operand" "0")
14431 (match_operand:DI 2 "nonimmediate_operand" "ym")
14432 (match_operand:SI 3 "const_0_to_255_mul_8_operand" "n")]
14433 UNSPEC_PALIGNR))]
14434 "TARGET_SSSE3"
14435 {
14436 operands[3] = GEN_INT (INTVAL (operands[3]) / 8);
14437 return "palignr\t{%3, %2, %0|%0, %2, %3}";
14438 }
14439 [(set_attr "type" "sseishft")
14440 (set_attr "atom_unit" "sishuf")
14441 (set_attr "prefix_extra" "1")
14442 (set_attr "length_immediate" "1")
14443 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
14444 (set_attr "mode" "DI")])
14445
14446 ;; Mode iterator to handle singularity w/ absence of V2DI and V4DI
14447 ;; modes for abs instruction on pre AVX-512 targets.
14448 (define_mode_iterator VI1248_AVX512VL_AVX512BW
14449 [(V64QI "TARGET_AVX512BW") (V32QI "TARGET_AVX2") V16QI
14450 (V32HI "TARGET_AVX512BW") (V16HI "TARGET_AVX2") V8HI
14451 (V16SI "TARGET_AVX512F") (V8SI "TARGET_AVX2") V4SI
14452 (V8DI "TARGET_AVX512F") (V4DI "TARGET_AVX512VL") (V2DI "TARGET_AVX512VL")])
14453
14454 (define_insn "*abs<mode>2"
14455 [(set (match_operand:VI1248_AVX512VL_AVX512BW 0 "register_operand" "=v")
14456 (abs:VI1248_AVX512VL_AVX512BW
14457 (match_operand:VI1248_AVX512VL_AVX512BW 1 "nonimmediate_operand" "vm")))]
14458 "TARGET_SSSE3"
14459 "%vpabs<ssemodesuffix>\t{%1, %0|%0, %1}"
14460 [(set_attr "type" "sselog1")
14461 (set_attr "prefix_data16" "1")
14462 (set_attr "prefix_extra" "1")
14463 (set_attr "prefix" "maybe_vex")
14464 (set_attr "mode" "<sseinsnmode>")])
14465
14466 (define_insn "abs<mode>2_mask"
14467 [(set (match_operand:VI48_AVX512VL 0 "register_operand" "=v")
14468 (vec_merge:VI48_AVX512VL
14469 (abs:VI48_AVX512VL
14470 (match_operand:VI48_AVX512VL 1 "nonimmediate_operand" "vm"))
14471 (match_operand:VI48_AVX512VL 2 "vector_move_operand" "0C")
14472 (match_operand:<avx512fmaskmode> 3 "register_operand" "Yk")))]
14473 "TARGET_AVX512F"
14474 "vpabs<ssemodesuffix>\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}"
14475 [(set_attr "type" "sselog1")
14476 (set_attr "prefix" "evex")
14477 (set_attr "mode" "<sseinsnmode>")])
14478
14479 (define_insn "abs<mode>2_mask"
14480 [(set (match_operand:VI12_AVX512VL 0 "register_operand" "=v")
14481 (vec_merge:VI12_AVX512VL
14482 (abs:VI12_AVX512VL
14483 (match_operand:VI12_AVX512VL 1 "nonimmediate_operand" "vm"))
14484 (match_operand:VI12_AVX512VL 2 "vector_move_operand" "0C")
14485 (match_operand:<avx512fmaskmode> 3 "register_operand" "Yk")))]
14486 "TARGET_AVX512BW"
14487 "vpabs<ssemodesuffix>\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}"
14488 [(set_attr "type" "sselog1")
14489 (set_attr "prefix" "evex")
14490 (set_attr "mode" "<sseinsnmode>")])
14491
14492 (define_expand "abs<mode>2"
14493 [(set (match_operand:VI1248_AVX512VL_AVX512BW 0 "register_operand")
14494 (abs:VI1248_AVX512VL_AVX512BW
14495 (match_operand:VI1248_AVX512VL_AVX512BW 1 "nonimmediate_operand")))]
14496 "TARGET_SSE2"
14497 {
14498 if (!TARGET_SSSE3)
14499 {
14500 ix86_expand_sse2_abs (operands[0], operands[1]);
14501 DONE;
14502 }
14503 })
14504
14505 (define_insn "abs<mode>2"
14506 [(set (match_operand:MMXMODEI 0 "register_operand" "=y")
14507 (abs:MMXMODEI
14508 (match_operand:MMXMODEI 1 "nonimmediate_operand" "ym")))]
14509 "TARGET_SSSE3"
14510 "pabs<mmxvecsize>\t{%1, %0|%0, %1}";
14511 [(set_attr "type" "sselog1")
14512 (set_attr "prefix_rep" "0")
14513 (set_attr "prefix_extra" "1")
14514 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
14515 (set_attr "mode" "DI")])
14516
14517 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
14518 ;;
14519 ;; AMD SSE4A instructions
14520 ;;
14521 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
14522
14523 (define_insn "sse4a_movnt<mode>"
14524 [(set (match_operand:MODEF 0 "memory_operand" "=m")
14525 (unspec:MODEF
14526 [(match_operand:MODEF 1 "register_operand" "x")]
14527 UNSPEC_MOVNT))]
14528 "TARGET_SSE4A"
14529 "movnt<ssemodesuffix>\t{%1, %0|%0, %1}"
14530 [(set_attr "type" "ssemov")
14531 (set_attr "mode" "<MODE>")])
14532
14533 (define_insn "sse4a_vmmovnt<mode>"
14534 [(set (match_operand:<ssescalarmode> 0 "memory_operand" "=m")
14535 (unspec:<ssescalarmode>
14536 [(vec_select:<ssescalarmode>
14537 (match_operand:VF_128 1 "register_operand" "x")
14538 (parallel [(const_int 0)]))]
14539 UNSPEC_MOVNT))]
14540 "TARGET_SSE4A"
14541 "movnt<ssescalarmodesuffix>\t{%1, %0|%0, %1}"
14542 [(set_attr "type" "ssemov")
14543 (set_attr "mode" "<ssescalarmode>")])
14544
14545 (define_insn "sse4a_extrqi"
14546 [(set (match_operand:V2DI 0 "register_operand" "=x")
14547 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
14548 (match_operand 2 "const_0_to_255_operand")
14549 (match_operand 3 "const_0_to_255_operand")]
14550 UNSPEC_EXTRQI))]
14551 "TARGET_SSE4A"
14552 "extrq\t{%3, %2, %0|%0, %2, %3}"
14553 [(set_attr "type" "sse")
14554 (set_attr "prefix_data16" "1")
14555 (set_attr "length_immediate" "2")
14556 (set_attr "mode" "TI")])
14557
14558 (define_insn "sse4a_extrq"
14559 [(set (match_operand:V2DI 0 "register_operand" "=x")
14560 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
14561 (match_operand:V16QI 2 "register_operand" "x")]
14562 UNSPEC_EXTRQ))]
14563 "TARGET_SSE4A"
14564 "extrq\t{%2, %0|%0, %2}"
14565 [(set_attr "type" "sse")
14566 (set_attr "prefix_data16" "1")
14567 (set_attr "mode" "TI")])
14568
14569 (define_insn "sse4a_insertqi"
14570 [(set (match_operand:V2DI 0 "register_operand" "=x")
14571 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
14572 (match_operand:V2DI 2 "register_operand" "x")
14573 (match_operand 3 "const_0_to_255_operand")
14574 (match_operand 4 "const_0_to_255_operand")]
14575 UNSPEC_INSERTQI))]
14576 "TARGET_SSE4A"
14577 "insertq\t{%4, %3, %2, %0|%0, %2, %3, %4}"
14578 [(set_attr "type" "sseins")
14579 (set_attr "prefix_data16" "0")
14580 (set_attr "prefix_rep" "1")
14581 (set_attr "length_immediate" "2")
14582 (set_attr "mode" "TI")])
14583
14584 (define_insn "sse4a_insertq"
14585 [(set (match_operand:V2DI 0 "register_operand" "=x")
14586 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
14587 (match_operand:V2DI 2 "register_operand" "x")]
14588 UNSPEC_INSERTQ))]
14589 "TARGET_SSE4A"
14590 "insertq\t{%2, %0|%0, %2}"
14591 [(set_attr "type" "sseins")
14592 (set_attr "prefix_data16" "0")
14593 (set_attr "prefix_rep" "1")
14594 (set_attr "mode" "TI")])
14595
14596 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
14597 ;;
14598 ;; Intel SSE4.1 instructions
14599 ;;
14600 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
14601
14602 ;; Mapping of immediate bits for blend instructions
14603 (define_mode_attr blendbits
14604 [(V8SF "255") (V4SF "15") (V4DF "15") (V2DF "3")])
14605
14606 (define_insn "<sse4_1>_blend<ssemodesuffix><avxsizesuffix>"
14607 [(set (match_operand:VF_128_256 0 "register_operand" "=Yr,*x,x")
14608 (vec_merge:VF_128_256
14609 (match_operand:VF_128_256 2 "nonimmediate_operand" "Yrm,*xm,xm")
14610 (match_operand:VF_128_256 1 "register_operand" "0,0,x")
14611 (match_operand:SI 3 "const_0_to_<blendbits>_operand")))]
14612 "TARGET_SSE4_1"
14613 "@
14614 blend<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
14615 blend<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
14616 vblend<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
14617 [(set_attr "isa" "noavx,noavx,avx")
14618 (set_attr "type" "ssemov")
14619 (set_attr "length_immediate" "1")
14620 (set_attr "prefix_data16" "1,1,*")
14621 (set_attr "prefix_extra" "1")
14622 (set_attr "prefix" "orig,orig,vex")
14623 (set_attr "mode" "<MODE>")])
14624
14625 (define_insn "<sse4_1>_blendv<ssemodesuffix><avxsizesuffix>"
14626 [(set (match_operand:VF_128_256 0 "register_operand" "=Yr,*x,x")
14627 (unspec:VF_128_256
14628 [(match_operand:VF_128_256 1 "register_operand" "0,0,x")
14629 (match_operand:VF_128_256 2 "nonimmediate_operand" "Yrm,*xm,xm")
14630 (match_operand:VF_128_256 3 "register_operand" "Yz,Yz,x")]
14631 UNSPEC_BLENDV))]
14632 "TARGET_SSE4_1"
14633 "@
14634 blendv<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
14635 blendv<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
14636 vblendv<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
14637 [(set_attr "isa" "noavx,noavx,avx")
14638 (set_attr "type" "ssemov")
14639 (set_attr "length_immediate" "1")
14640 (set_attr "prefix_data16" "1,1,*")
14641 (set_attr "prefix_extra" "1")
14642 (set_attr "prefix" "orig,orig,vex")
14643 (set_attr "btver2_decode" "vector,vector,vector")
14644 (set_attr "mode" "<MODE>")])
14645
14646 (define_insn "<sse4_1>_dp<ssemodesuffix><avxsizesuffix>"
14647 [(set (match_operand:VF_128_256 0 "register_operand" "=Yr,*x,x")
14648 (unspec:VF_128_256
14649 [(match_operand:VF_128_256 1 "nonimmediate_operand" "%0,0,x")
14650 (match_operand:VF_128_256 2 "nonimmediate_operand" "Yrm,*xm,xm")
14651 (match_operand:SI 3 "const_0_to_255_operand" "n,n,n")]
14652 UNSPEC_DP))]
14653 "TARGET_SSE4_1"
14654 "@
14655 dp<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
14656 dp<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
14657 vdp<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
14658 [(set_attr "isa" "noavx,noavx,avx")
14659 (set_attr "type" "ssemul")
14660 (set_attr "length_immediate" "1")
14661 (set_attr "prefix_data16" "1,1,*")
14662 (set_attr "prefix_extra" "1")
14663 (set_attr "prefix" "orig,orig,vex")
14664 (set_attr "btver2_decode" "vector,vector,vector")
14665 (set_attr "znver1_decode" "vector,vector,vector")
14666 (set_attr "mode" "<MODE>")])
14667
14668 ;; Mode attribute used by `vmovntdqa' pattern
14669 (define_mode_attr vi8_sse4_1_avx2_avx512
14670 [(V2DI "sse4_1") (V4DI "avx2") (V8DI "avx512f")])
14671
14672 (define_insn "<vi8_sse4_1_avx2_avx512>_movntdqa"
14673 [(set (match_operand:VI8_AVX2_AVX512F 0 "register_operand" "=Yr,*x, v")
14674 (unspec:VI8_AVX2_AVX512F [(match_operand:VI8_AVX2_AVX512F 1 "memory_operand" "m, m, m")]
14675 UNSPEC_MOVNTDQA))]
14676 "TARGET_SSE4_1"
14677 "%vmovntdqa\t{%1, %0|%0, %1}"
14678 [(set_attr "type" "ssemov")
14679 (set_attr "prefix_extra" "1,1,*")
14680 (set_attr "prefix" "maybe_vex,maybe_vex,evex")
14681 (set_attr "mode" "<sseinsnmode>")])
14682
14683 (define_insn "<sse4_1_avx2>_mpsadbw"
14684 [(set (match_operand:VI1_AVX2 0 "register_operand" "=Yr,*x,x")
14685 (unspec:VI1_AVX2
14686 [(match_operand:VI1_AVX2 1 "register_operand" "0,0,x")
14687 (match_operand:VI1_AVX2 2 "nonimmediate_operand" "Yrm,*xm,xm")
14688 (match_operand:SI 3 "const_0_to_255_operand" "n,n,n")]
14689 UNSPEC_MPSADBW))]
14690 "TARGET_SSE4_1"
14691 "@
14692 mpsadbw\t{%3, %2, %0|%0, %2, %3}
14693 mpsadbw\t{%3, %2, %0|%0, %2, %3}
14694 vmpsadbw\t{%3, %2, %1, %0|%0, %1, %2, %3}"
14695 [(set_attr "isa" "noavx,noavx,avx")
14696 (set_attr "type" "sselog1")
14697 (set_attr "length_immediate" "1")
14698 (set_attr "prefix_extra" "1")
14699 (set_attr "prefix" "orig,orig,vex")
14700 (set_attr "btver2_decode" "vector,vector,vector")
14701 (set_attr "znver1_decode" "vector,vector,vector")
14702 (set_attr "mode" "<sseinsnmode>")])
14703
14704 (define_insn "<sse4_1_avx2>_packusdw<mask_name>"
14705 [(set (match_operand:VI2_AVX2 0 "register_operand" "=Yr,*x,v")
14706 (vec_concat:VI2_AVX2
14707 (us_truncate:<ssehalfvecmode>
14708 (match_operand:<sseunpackmode> 1 "register_operand" "0,0,v"))
14709 (us_truncate:<ssehalfvecmode>
14710 (match_operand:<sseunpackmode> 2 "nonimmediate_operand" "Yrm,*xm,vm"))))]
14711 "TARGET_SSE4_1 && <mask_mode512bit_condition> && <mask_avx512bw_condition>"
14712 "@
14713 packusdw\t{%2, %0|%0, %2}
14714 packusdw\t{%2, %0|%0, %2}
14715 vpackusdw\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
14716 [(set_attr "isa" "noavx,noavx,avx")
14717 (set_attr "type" "sselog")
14718 (set_attr "prefix_extra" "1")
14719 (set_attr "prefix" "orig,orig,maybe_evex")
14720 (set_attr "mode" "<sseinsnmode>")])
14721
14722 (define_insn "<sse4_1_avx2>_pblendvb"
14723 [(set (match_operand:VI1_AVX2 0 "register_operand" "=Yr,*x,x")
14724 (unspec:VI1_AVX2
14725 [(match_operand:VI1_AVX2 1 "register_operand" "0,0,x")
14726 (match_operand:VI1_AVX2 2 "nonimmediate_operand" "Yrm,*xm,xm")
14727 (match_operand:VI1_AVX2 3 "register_operand" "Yz,Yz,x")]
14728 UNSPEC_BLENDV))]
14729 "TARGET_SSE4_1"
14730 "@
14731 pblendvb\t{%3, %2, %0|%0, %2, %3}
14732 pblendvb\t{%3, %2, %0|%0, %2, %3}
14733 vpblendvb\t{%3, %2, %1, %0|%0, %1, %2, %3}"
14734 [(set_attr "isa" "noavx,noavx,avx")
14735 (set_attr "type" "ssemov")
14736 (set_attr "prefix_extra" "1")
14737 (set_attr "length_immediate" "*,*,1")
14738 (set_attr "prefix" "orig,orig,vex")
14739 (set_attr "btver2_decode" "vector,vector,vector")
14740 (set_attr "mode" "<sseinsnmode>")])
14741
14742 (define_insn "sse4_1_pblendw"
14743 [(set (match_operand:V8HI 0 "register_operand" "=Yr,*x,x")
14744 (vec_merge:V8HI
14745 (match_operand:V8HI 2 "nonimmediate_operand" "Yrm,*xm,xm")
14746 (match_operand:V8HI 1 "register_operand" "0,0,x")
14747 (match_operand:SI 3 "const_0_to_255_operand" "n,n,n")))]
14748 "TARGET_SSE4_1"
14749 "@
14750 pblendw\t{%3, %2, %0|%0, %2, %3}
14751 pblendw\t{%3, %2, %0|%0, %2, %3}
14752 vpblendw\t{%3, %2, %1, %0|%0, %1, %2, %3}"
14753 [(set_attr "isa" "noavx,noavx,avx")
14754 (set_attr "type" "ssemov")
14755 (set_attr "prefix_extra" "1")
14756 (set_attr "length_immediate" "1")
14757 (set_attr "prefix" "orig,orig,vex")
14758 (set_attr "mode" "TI")])
14759
14760 ;; The builtin uses an 8-bit immediate. Expand that.
14761 (define_expand "avx2_pblendw"
14762 [(set (match_operand:V16HI 0 "register_operand")
14763 (vec_merge:V16HI
14764 (match_operand:V16HI 2 "nonimmediate_operand")
14765 (match_operand:V16HI 1 "register_operand")
14766 (match_operand:SI 3 "const_0_to_255_operand")))]
14767 "TARGET_AVX2"
14768 {
14769 HOST_WIDE_INT val = INTVAL (operands[3]) & 0xff;
14770 operands[3] = GEN_INT (val << 8 | val);
14771 })
14772
14773 (define_insn "*avx2_pblendw"
14774 [(set (match_operand:V16HI 0 "register_operand" "=x")
14775 (vec_merge:V16HI
14776 (match_operand:V16HI 2 "nonimmediate_operand" "xm")
14777 (match_operand:V16HI 1 "register_operand" "x")
14778 (match_operand:SI 3 "avx2_pblendw_operand" "n")))]
14779 "TARGET_AVX2"
14780 {
14781 operands[3] = GEN_INT (INTVAL (operands[3]) & 0xff);
14782 return "vpblendw\t{%3, %2, %1, %0|%0, %1, %2, %3}";
14783 }
14784 [(set_attr "type" "ssemov")
14785 (set_attr "prefix_extra" "1")
14786 (set_attr "length_immediate" "1")
14787 (set_attr "prefix" "vex")
14788 (set_attr "mode" "OI")])
14789
14790 (define_insn "avx2_pblendd<mode>"
14791 [(set (match_operand:VI4_AVX2 0 "register_operand" "=x")
14792 (vec_merge:VI4_AVX2
14793 (match_operand:VI4_AVX2 2 "nonimmediate_operand" "xm")
14794 (match_operand:VI4_AVX2 1 "register_operand" "x")
14795 (match_operand:SI 3 "const_0_to_255_operand" "n")))]
14796 "TARGET_AVX2"
14797 "vpblendd\t{%3, %2, %1, %0|%0, %1, %2, %3}"
14798 [(set_attr "type" "ssemov")
14799 (set_attr "prefix_extra" "1")
14800 (set_attr "length_immediate" "1")
14801 (set_attr "prefix" "vex")
14802 (set_attr "mode" "<sseinsnmode>")])
14803
14804 (define_insn "sse4_1_phminposuw"
14805 [(set (match_operand:V8HI 0 "register_operand" "=Yr,*x")
14806 (unspec:V8HI [(match_operand:V8HI 1 "nonimmediate_operand" "Yrm,*xm")]
14807 UNSPEC_PHMINPOSUW))]
14808 "TARGET_SSE4_1"
14809 "%vphminposuw\t{%1, %0|%0, %1}"
14810 [(set_attr "type" "sselog1")
14811 (set_attr "prefix_extra" "1")
14812 (set_attr "prefix" "maybe_vex")
14813 (set_attr "mode" "TI")])
14814
14815 (define_insn "avx2_<code>v16qiv16hi2<mask_name>"
14816 [(set (match_operand:V16HI 0 "register_operand" "=v")
14817 (any_extend:V16HI
14818 (match_operand:V16QI 1 "nonimmediate_operand" "vm")))]
14819 "TARGET_AVX2 && <mask_avx512bw_condition> && <mask_avx512vl_condition>"
14820 "vpmov<extsuffix>bw\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
14821 [(set_attr "type" "ssemov")
14822 (set_attr "prefix_extra" "1")
14823 (set_attr "prefix" "maybe_evex")
14824 (set_attr "mode" "OI")])
14825
14826 (define_insn "avx512bw_<code>v32qiv32hi2<mask_name>"
14827 [(set (match_operand:V32HI 0 "register_operand" "=v")
14828 (any_extend:V32HI
14829 (match_operand:V32QI 1 "nonimmediate_operand" "vm")))]
14830 "TARGET_AVX512BW"
14831 "vpmov<extsuffix>bw\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
14832 [(set_attr "type" "ssemov")
14833 (set_attr "prefix_extra" "1")
14834 (set_attr "prefix" "evex")
14835 (set_attr "mode" "XI")])
14836
14837 (define_insn "sse4_1_<code>v8qiv8hi2<mask_name>"
14838 [(set (match_operand:V8HI 0 "register_operand" "=Yr,*v")
14839 (any_extend:V8HI
14840 (vec_select:V8QI
14841 (match_operand:V16QI 1 "nonimmediate_operand" "Yrm,*vm")
14842 (parallel [(const_int 0) (const_int 1)
14843 (const_int 2) (const_int 3)
14844 (const_int 4) (const_int 5)
14845 (const_int 6) (const_int 7)]))))]
14846 "TARGET_SSE4_1 && <mask_avx512bw_condition> && <mask_avx512vl_condition>"
14847 "%vpmov<extsuffix>bw\t{%1, %0<mask_operand2>|%0<mask_operand2>, %q1}"
14848 [(set_attr "type" "ssemov")
14849 (set_attr "ssememalign" "64")
14850 (set_attr "prefix_extra" "1")
14851 (set_attr "prefix" "maybe_vex")
14852 (set_attr "mode" "TI")])
14853
14854 (define_insn "<mask_codefor>avx512f_<code>v16qiv16si2<mask_name>"
14855 [(set (match_operand:V16SI 0 "register_operand" "=v")
14856 (any_extend:V16SI
14857 (match_operand:V16QI 1 "nonimmediate_operand" "vm")))]
14858 "TARGET_AVX512F"
14859 "vpmov<extsuffix>bd\t{%1, %0<mask_operand2>|%0<mask_operand2>, %q1}"
14860 [(set_attr "type" "ssemov")
14861 (set_attr "prefix" "evex")
14862 (set_attr "mode" "XI")])
14863
14864 (define_insn "avx2_<code>v8qiv8si2<mask_name>"
14865 [(set (match_operand:V8SI 0 "register_operand" "=v")
14866 (any_extend:V8SI
14867 (vec_select:V8QI
14868 (match_operand:V16QI 1 "nonimmediate_operand" "vm")
14869 (parallel [(const_int 0) (const_int 1)
14870 (const_int 2) (const_int 3)
14871 (const_int 4) (const_int 5)
14872 (const_int 6) (const_int 7)]))))]
14873 "TARGET_AVX2 && <mask_avx512vl_condition>"
14874 "vpmov<extsuffix>bd\t{%1, %0<mask_operand2>|%0<mask_operand2>, %q1}"
14875 [(set_attr "type" "ssemov")
14876 (set_attr "prefix_extra" "1")
14877 (set_attr "prefix" "maybe_evex")
14878 (set_attr "mode" "OI")])
14879
14880 (define_insn "sse4_1_<code>v4qiv4si2<mask_name>"
14881 [(set (match_operand:V4SI 0 "register_operand" "=Yr,*v")
14882 (any_extend:V4SI
14883 (vec_select:V4QI
14884 (match_operand:V16QI 1 "nonimmediate_operand" "Yrm,*vm")
14885 (parallel [(const_int 0) (const_int 1)
14886 (const_int 2) (const_int 3)]))))]
14887 "TARGET_SSE4_1 && <mask_avx512vl_condition>"
14888 "%vpmov<extsuffix>bd\t{%1, %0<mask_operand2>|%0<mask_operand2>, %k1}"
14889 [(set_attr "type" "ssemov")
14890 (set_attr "ssememalign" "32")
14891 (set_attr "prefix_extra" "1")
14892 (set_attr "prefix" "maybe_vex")
14893 (set_attr "mode" "TI")])
14894
14895 (define_insn "avx512f_<code>v16hiv16si2<mask_name>"
14896 [(set (match_operand:V16SI 0 "register_operand" "=v")
14897 (any_extend:V16SI
14898 (match_operand:V16HI 1 "nonimmediate_operand" "vm")))]
14899 "TARGET_AVX512F"
14900 "vpmov<extsuffix>wd\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
14901 [(set_attr "type" "ssemov")
14902 (set_attr "prefix" "evex")
14903 (set_attr "mode" "XI")])
14904
14905 (define_insn "avx2_<code>v8hiv8si2<mask_name>"
14906 [(set (match_operand:V8SI 0 "register_operand" "=v")
14907 (any_extend:V8SI
14908 (match_operand:V8HI 1 "nonimmediate_operand" "vm")))]
14909 "TARGET_AVX2 && <mask_avx512vl_condition>"
14910 "vpmov<extsuffix>wd\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
14911 [(set_attr "type" "ssemov")
14912 (set_attr "prefix_extra" "1")
14913 (set_attr "prefix" "maybe_evex")
14914 (set_attr "mode" "OI")])
14915
14916 (define_insn "sse4_1_<code>v4hiv4si2<mask_name>"
14917 [(set (match_operand:V4SI 0 "register_operand" "=Yr,*v")
14918 (any_extend:V4SI
14919 (vec_select:V4HI
14920 (match_operand:V8HI 1 "nonimmediate_operand" "Yrm,*vm")
14921 (parallel [(const_int 0) (const_int 1)
14922 (const_int 2) (const_int 3)]))))]
14923 "TARGET_SSE4_1 && <mask_avx512vl_condition>"
14924 "%vpmov<extsuffix>wd\t{%1, %0<mask_operand2>|%0<mask_operand2>, %q1}"
14925 [(set_attr "type" "ssemov")
14926 (set_attr "ssememalign" "64")
14927 (set_attr "prefix_extra" "1")
14928 (set_attr "prefix" "maybe_vex")
14929 (set_attr "mode" "TI")])
14930
14931 (define_insn "avx512f_<code>v8qiv8di2<mask_name>"
14932 [(set (match_operand:V8DI 0 "register_operand" "=v")
14933 (any_extend:V8DI
14934 (vec_select:V8QI
14935 (match_operand:V16QI 1 "nonimmediate_operand" "vm")
14936 (parallel [(const_int 0) (const_int 1)
14937 (const_int 2) (const_int 3)
14938 (const_int 4) (const_int 5)
14939 (const_int 6) (const_int 7)]))))]
14940 "TARGET_AVX512F"
14941 "vpmov<extsuffix>bq\t{%1, %0<mask_operand2>|%0<mask_operand2>, %k1}"
14942 [(set_attr "type" "ssemov")
14943 (set_attr "prefix" "evex")
14944 (set_attr "mode" "XI")])
14945
14946 (define_insn "avx2_<code>v4qiv4di2<mask_name>"
14947 [(set (match_operand:V4DI 0 "register_operand" "=v")
14948 (any_extend:V4DI
14949 (vec_select:V4QI
14950 (match_operand:V16QI 1 "nonimmediate_operand" "vm")
14951 (parallel [(const_int 0) (const_int 1)
14952 (const_int 2) (const_int 3)]))))]
14953 "TARGET_AVX2 && <mask_avx512vl_condition>"
14954 "vpmov<extsuffix>bq\t{%1, %0<mask_operand2>|%0<mask_operand2>, %k1}"
14955 [(set_attr "type" "ssemov")
14956 (set_attr "prefix_extra" "1")
14957 (set_attr "prefix" "maybe_evex")
14958 (set_attr "mode" "OI")])
14959
14960 (define_insn "sse4_1_<code>v2qiv2di2<mask_name>"
14961 [(set (match_operand:V2DI 0 "register_operand" "=Yr,*v")
14962 (any_extend:V2DI
14963 (vec_select:V2QI
14964 (match_operand:V16QI 1 "nonimmediate_operand" "Yrm,*vm")
14965 (parallel [(const_int 0) (const_int 1)]))))]
14966 "TARGET_SSE4_1 && <mask_avx512vl_condition>"
14967 "%vpmov<extsuffix>bq\t{%1, %0<mask_operand2>|%0<mask_operand2>, %w1}"
14968 [(set_attr "type" "ssemov")
14969 (set_attr "ssememalign" "16")
14970 (set_attr "prefix_extra" "1")
14971 (set_attr "prefix" "maybe_vex")
14972 (set_attr "mode" "TI")])
14973
14974 (define_insn "avx512f_<code>v8hiv8di2<mask_name>"
14975 [(set (match_operand:V8DI 0 "register_operand" "=v")
14976 (any_extend:V8DI
14977 (match_operand:V8HI 1 "nonimmediate_operand" "vm")))]
14978 "TARGET_AVX512F"
14979 "vpmov<extsuffix>wq\t{%1, %0<mask_operand2>|%0<mask_operand2>, %q1}"
14980 [(set_attr "type" "ssemov")
14981 (set_attr "prefix" "evex")
14982 (set_attr "mode" "XI")])
14983
14984 (define_insn "avx2_<code>v4hiv4di2<mask_name>"
14985 [(set (match_operand:V4DI 0 "register_operand" "=v")
14986 (any_extend:V4DI
14987 (vec_select:V4HI
14988 (match_operand:V8HI 1 "nonimmediate_operand" "vm")
14989 (parallel [(const_int 0) (const_int 1)
14990 (const_int 2) (const_int 3)]))))]
14991 "TARGET_AVX2 && <mask_avx512vl_condition>"
14992 "vpmov<extsuffix>wq\t{%1, %0<mask_operand2>|%0<mask_operand2>, %q1}"
14993 [(set_attr "type" "ssemov")
14994 (set_attr "prefix_extra" "1")
14995 (set_attr "prefix" "maybe_evex")
14996 (set_attr "mode" "OI")])
14997
14998 (define_insn "sse4_1_<code>v2hiv2di2<mask_name>"
14999 [(set (match_operand:V2DI 0 "register_operand" "=Yr,*v")
15000 (any_extend:V2DI
15001 (vec_select:V2HI
15002 (match_operand:V8HI 1 "nonimmediate_operand" "Yrm,*vm")
15003 (parallel [(const_int 0) (const_int 1)]))))]
15004 "TARGET_SSE4_1 && <mask_avx512vl_condition>"
15005 "%vpmov<extsuffix>wq\t{%1, %0<mask_operand2>|%0<mask_operand2>, %k1}"
15006 [(set_attr "type" "ssemov")
15007 (set_attr "ssememalign" "32")
15008 (set_attr "prefix_extra" "1")
15009 (set_attr "prefix" "maybe_vex")
15010 (set_attr "mode" "TI")])
15011
15012 (define_insn "avx512f_<code>v8siv8di2<mask_name>"
15013 [(set (match_operand:V8DI 0 "register_operand" "=v")
15014 (any_extend:V8DI
15015 (match_operand:V8SI 1 "nonimmediate_operand" "vm")))]
15016 "TARGET_AVX512F"
15017 "vpmov<extsuffix>dq\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
15018 [(set_attr "type" "ssemov")
15019 (set_attr "prefix" "evex")
15020 (set_attr "mode" "XI")])
15021
15022 (define_insn "avx2_<code>v4siv4di2<mask_name>"
15023 [(set (match_operand:V4DI 0 "register_operand" "=v")
15024 (any_extend:V4DI
15025 (match_operand:V4SI 1 "nonimmediate_operand" "vm")))]
15026 "TARGET_AVX2 && <mask_avx512vl_condition>"
15027 "vpmov<extsuffix>dq\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
15028 [(set_attr "type" "ssemov")
15029 (set_attr "prefix" "maybe_evex")
15030 (set_attr "prefix_extra" "1")
15031 (set_attr "mode" "OI")])
15032
15033 (define_insn "sse4_1_<code>v2siv2di2<mask_name>"
15034 [(set (match_operand:V2DI 0 "register_operand" "=Yr,*v")
15035 (any_extend:V2DI
15036 (vec_select:V2SI
15037 (match_operand:V4SI 1 "nonimmediate_operand" "Yrm,*vm")
15038 (parallel [(const_int 0) (const_int 1)]))))]
15039 "TARGET_SSE4_1 && <mask_avx512vl_condition>"
15040 "%vpmov<extsuffix>dq\t{%1, %0<mask_operand2>|%0<mask_operand2>, %q1}"
15041 [(set_attr "type" "ssemov")
15042 (set_attr "ssememalign" "64")
15043 (set_attr "prefix_extra" "1")
15044 (set_attr "prefix" "maybe_vex")
15045 (set_attr "mode" "TI")])
15046
15047 ;; ptestps/ptestpd are very similar to comiss and ucomiss when
15048 ;; setting FLAGS_REG. But it is not a really compare instruction.
15049 (define_insn "avx_vtest<ssemodesuffix><avxsizesuffix>"
15050 [(set (reg:CC FLAGS_REG)
15051 (unspec:CC [(match_operand:VF_128_256 0 "register_operand" "x")
15052 (match_operand:VF_128_256 1 "nonimmediate_operand" "xm")]
15053 UNSPEC_VTESTP))]
15054 "TARGET_AVX"
15055 "vtest<ssemodesuffix>\t{%1, %0|%0, %1}"
15056 [(set_attr "type" "ssecomi")
15057 (set_attr "prefix_extra" "1")
15058 (set_attr "prefix" "vex")
15059 (set_attr "mode" "<MODE>")])
15060
15061 ;; ptest is very similar to comiss and ucomiss when setting FLAGS_REG.
15062 ;; But it is not a really compare instruction.
15063 (define_insn "<sse4_1>_ptest<mode>"
15064 [(set (reg:CC FLAGS_REG)
15065 (unspec:CC [(match_operand:V_AVX 0 "register_operand" "Yr, *x, x")
15066 (match_operand:V_AVX 1 "nonimmediate_operand" "Yrm, *xm, xm")]
15067 UNSPEC_PTEST))]
15068 "TARGET_SSE4_1"
15069 "%vptest\t{%1, %0|%0, %1}"
15070 [(set_attr "isa" "*,*,avx")
15071 (set_attr "type" "ssecomi")
15072 (set_attr "prefix_extra" "1")
15073 (set_attr "prefix" "maybe_vex")
15074 (set (attr "btver2_decode")
15075 (if_then_else
15076 (match_test "<sseinsnmode>mode==OImode")
15077 (const_string "vector")
15078 (const_string "*")))
15079 (set_attr "mode" "<sseinsnmode>")])
15080
15081 (define_insn "<sse4_1>_round<ssemodesuffix><avxsizesuffix>"
15082 [(set (match_operand:VF_128_256 0 "register_operand" "=Yr,*x")
15083 (unspec:VF_128_256
15084 [(match_operand:VF_128_256 1 "nonimmediate_operand" "Yrm,*xm")
15085 (match_operand:SI 2 "const_0_to_15_operand" "n,n")]
15086 UNSPEC_ROUND))]
15087 "TARGET_ROUND"
15088 "%vround<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
15089 [(set_attr "type" "ssecvt")
15090 (set (attr "prefix_data16")
15091 (if_then_else
15092 (match_test "TARGET_AVX")
15093 (const_string "*")
15094 (const_string "1")))
15095 (set_attr "prefix_extra" "1")
15096 (set_attr "length_immediate" "1")
15097 (set_attr "prefix" "maybe_vex")
15098 (set_attr "mode" "<MODE>")])
15099
15100 (define_expand "<sse4_1>_round<ssemodesuffix>_sfix<avxsizesuffix>"
15101 [(match_operand:<sseintvecmode> 0 "register_operand")
15102 (match_operand:VF1_128_256 1 "nonimmediate_operand")
15103 (match_operand:SI 2 "const_0_to_15_operand")]
15104 "TARGET_ROUND"
15105 {
15106 rtx tmp = gen_reg_rtx (<MODE>mode);
15107
15108 emit_insn
15109 (gen_<sse4_1>_round<ssemodesuffix><avxsizesuffix> (tmp, operands[1],
15110 operands[2]));
15111 emit_insn
15112 (gen_fix_trunc<mode><sseintvecmodelower>2 (operands[0], tmp));
15113 DONE;
15114 })
15115
15116 (define_expand "avx512f_roundpd512"
15117 [(match_operand:V8DF 0 "register_operand")
15118 (match_operand:V8DF 1 "nonimmediate_operand")
15119 (match_operand:SI 2 "const_0_to_15_operand")]
15120 "TARGET_AVX512F"
15121 {
15122 emit_insn (gen_avx512f_rndscalev8df (operands[0], operands[1], operands[2]));
15123 DONE;
15124 })
15125
15126 (define_expand "<sse4_1>_round<ssemodesuffix>_vec_pack_sfix<avxsizesuffix>"
15127 [(match_operand:<ssepackfltmode> 0 "register_operand")
15128 (match_operand:VF2 1 "nonimmediate_operand")
15129 (match_operand:VF2 2 "nonimmediate_operand")
15130 (match_operand:SI 3 "const_0_to_15_operand")]
15131 "TARGET_ROUND"
15132 {
15133 rtx tmp0, tmp1;
15134
15135 if (<MODE>mode == V2DFmode
15136 && TARGET_AVX && !TARGET_PREFER_AVX128 && optimize_insn_for_speed_p ())
15137 {
15138 rtx tmp2 = gen_reg_rtx (V4DFmode);
15139
15140 tmp0 = gen_reg_rtx (V4DFmode);
15141 tmp1 = force_reg (V2DFmode, operands[1]);
15142
15143 emit_insn (gen_avx_vec_concatv4df (tmp0, tmp1, operands[2]));
15144 emit_insn (gen_avx_roundpd256 (tmp2, tmp0, operands[3]));
15145 emit_insn (gen_fix_truncv4dfv4si2 (operands[0], tmp2));
15146 }
15147 else
15148 {
15149 tmp0 = gen_reg_rtx (<MODE>mode);
15150 tmp1 = gen_reg_rtx (<MODE>mode);
15151
15152 emit_insn
15153 (gen_<sse4_1>_round<ssemodesuffix><avxsizesuffix> (tmp0, operands[1],
15154 operands[3]));
15155 emit_insn
15156 (gen_<sse4_1>_round<ssemodesuffix><avxsizesuffix> (tmp1, operands[2],
15157 operands[3]));
15158 emit_insn
15159 (gen_vec_pack_sfix_trunc_<mode> (operands[0], tmp0, tmp1));
15160 }
15161 DONE;
15162 })
15163
15164 (define_insn "sse4_1_round<ssescalarmodesuffix>"
15165 [(set (match_operand:VF_128 0 "register_operand" "=Yr,*x,x")
15166 (vec_merge:VF_128
15167 (unspec:VF_128
15168 [(match_operand:VF_128 2 "register_operand" "Yr,*x,x")
15169 (match_operand:SI 3 "const_0_to_15_operand" "n,n,n")]
15170 UNSPEC_ROUND)
15171 (match_operand:VF_128 1 "register_operand" "0,0,x")
15172 (const_int 1)))]
15173 "TARGET_ROUND"
15174 "@
15175 round<ssescalarmodesuffix>\t{%3, %2, %0|%0, %2, %3}
15176 round<ssescalarmodesuffix>\t{%3, %2, %0|%0, %2, %3}
15177 vround<ssescalarmodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
15178 [(set_attr "isa" "noavx,noavx,avx")
15179 (set_attr "type" "ssecvt")
15180 (set_attr "length_immediate" "1")
15181 (set_attr "prefix_data16" "1,1,*")
15182 (set_attr "prefix_extra" "1")
15183 (set_attr "prefix" "orig,orig,vex")
15184 (set_attr "mode" "<MODE>")])
15185
15186 (define_expand "round<mode>2"
15187 [(set (match_dup 4)
15188 (plus:VF
15189 (match_operand:VF 1 "register_operand")
15190 (match_dup 3)))
15191 (set (match_operand:VF 0 "register_operand")
15192 (unspec:VF
15193 [(match_dup 4) (match_dup 5)]
15194 UNSPEC_ROUND))]
15195 "TARGET_ROUND && !flag_trapping_math"
15196 {
15197 machine_mode scalar_mode;
15198 const struct real_format *fmt;
15199 REAL_VALUE_TYPE pred_half, half_minus_pred_half;
15200 rtx half, vec_half;
15201
15202 scalar_mode = GET_MODE_INNER (<MODE>mode);
15203
15204 /* load nextafter (0.5, 0.0) */
15205 fmt = REAL_MODE_FORMAT (scalar_mode);
15206 real_2expN (&half_minus_pred_half, -(fmt->p) - 1, scalar_mode);
15207 real_arithmetic (&pred_half, MINUS_EXPR, &dconsthalf, &half_minus_pred_half);
15208 half = const_double_from_real_value (pred_half, scalar_mode);
15209
15210 vec_half = ix86_build_const_vector (<MODE>mode, true, half);
15211 vec_half = force_reg (<MODE>mode, vec_half);
15212
15213 operands[3] = gen_reg_rtx (<MODE>mode);
15214 emit_insn (gen_copysign<mode>3 (operands[3], vec_half, operands[1]));
15215
15216 operands[4] = gen_reg_rtx (<MODE>mode);
15217 operands[5] = GEN_INT (ROUND_TRUNC);
15218 })
15219
15220 (define_expand "round<mode>2_sfix"
15221 [(match_operand:<sseintvecmode> 0 "register_operand")
15222 (match_operand:VF1_128_256 1 "register_operand")]
15223 "TARGET_ROUND && !flag_trapping_math"
15224 {
15225 rtx tmp = gen_reg_rtx (<MODE>mode);
15226
15227 emit_insn (gen_round<mode>2 (tmp, operands[1]));
15228
15229 emit_insn
15230 (gen_fix_trunc<mode><sseintvecmodelower>2 (operands[0], tmp));
15231 DONE;
15232 })
15233
15234 (define_expand "round<mode>2_vec_pack_sfix"
15235 [(match_operand:<ssepackfltmode> 0 "register_operand")
15236 (match_operand:VF2 1 "register_operand")
15237 (match_operand:VF2 2 "register_operand")]
15238 "TARGET_ROUND && !flag_trapping_math"
15239 {
15240 rtx tmp0, tmp1;
15241
15242 if (<MODE>mode == V2DFmode
15243 && TARGET_AVX && !TARGET_PREFER_AVX128 && optimize_insn_for_speed_p ())
15244 {
15245 rtx tmp2 = gen_reg_rtx (V4DFmode);
15246
15247 tmp0 = gen_reg_rtx (V4DFmode);
15248 tmp1 = force_reg (V2DFmode, operands[1]);
15249
15250 emit_insn (gen_avx_vec_concatv4df (tmp0, tmp1, operands[2]));
15251 emit_insn (gen_roundv4df2 (tmp2, tmp0));
15252 emit_insn (gen_fix_truncv4dfv4si2 (operands[0], tmp2));
15253 }
15254 else
15255 {
15256 tmp0 = gen_reg_rtx (<MODE>mode);
15257 tmp1 = gen_reg_rtx (<MODE>mode);
15258
15259 emit_insn (gen_round<mode>2 (tmp0, operands[1]));
15260 emit_insn (gen_round<mode>2 (tmp1, operands[2]));
15261
15262 emit_insn
15263 (gen_vec_pack_sfix_trunc_<mode> (operands[0], tmp0, tmp1));
15264 }
15265 DONE;
15266 })
15267
15268 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
15269 ;;
15270 ;; Intel SSE4.2 string/text processing instructions
15271 ;;
15272 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
15273
15274 (define_insn_and_split "sse4_2_pcmpestr"
15275 [(set (match_operand:SI 0 "register_operand" "=c,c")
15276 (unspec:SI
15277 [(match_operand:V16QI 2 "register_operand" "x,x")
15278 (match_operand:SI 3 "register_operand" "a,a")
15279 (match_operand:V16QI 4 "nonimmediate_operand" "x,m")
15280 (match_operand:SI 5 "register_operand" "d,d")
15281 (match_operand:SI 6 "const_0_to_255_operand" "n,n")]
15282 UNSPEC_PCMPESTR))
15283 (set (match_operand:V16QI 1 "register_operand" "=Yz,Yz")
15284 (unspec:V16QI
15285 [(match_dup 2)
15286 (match_dup 3)
15287 (match_dup 4)
15288 (match_dup 5)
15289 (match_dup 6)]
15290 UNSPEC_PCMPESTR))
15291 (set (reg:CC FLAGS_REG)
15292 (unspec:CC
15293 [(match_dup 2)
15294 (match_dup 3)
15295 (match_dup 4)
15296 (match_dup 5)
15297 (match_dup 6)]
15298 UNSPEC_PCMPESTR))]
15299 "TARGET_SSE4_2
15300 && can_create_pseudo_p ()"
15301 "#"
15302 "&& 1"
15303 [(const_int 0)]
15304 {
15305 int ecx = !find_regno_note (curr_insn, REG_UNUSED, REGNO (operands[0]));
15306 int xmm0 = !find_regno_note (curr_insn, REG_UNUSED, REGNO (operands[1]));
15307 int flags = !find_regno_note (curr_insn, REG_UNUSED, FLAGS_REG);
15308
15309 if (ecx)
15310 emit_insn (gen_sse4_2_pcmpestri (operands[0], operands[2],
15311 operands[3], operands[4],
15312 operands[5], operands[6]));
15313 if (xmm0)
15314 emit_insn (gen_sse4_2_pcmpestrm (operands[1], operands[2],
15315 operands[3], operands[4],
15316 operands[5], operands[6]));
15317 if (flags && !(ecx || xmm0))
15318 emit_insn (gen_sse4_2_pcmpestr_cconly (NULL, NULL,
15319 operands[2], operands[3],
15320 operands[4], operands[5],
15321 operands[6]));
15322 if (!(flags || ecx || xmm0))
15323 emit_note (NOTE_INSN_DELETED);
15324
15325 DONE;
15326 }
15327 [(set_attr "type" "sselog")
15328 (set_attr "prefix_data16" "1")
15329 (set_attr "prefix_extra" "1")
15330 (set_attr "ssememalign" "8")
15331 (set_attr "length_immediate" "1")
15332 (set_attr "memory" "none,load")
15333 (set_attr "mode" "TI")])
15334
15335 (define_insn_and_split "*sse4_2_pcmpestr_unaligned"
15336 [(set (match_operand:SI 0 "register_operand" "=c")
15337 (unspec:SI
15338 [(match_operand:V16QI 2 "register_operand" "x")
15339 (match_operand:SI 3 "register_operand" "a")
15340 (unspec:V16QI
15341 [(match_operand:V16QI 4 "memory_operand" "m")]
15342 UNSPEC_LOADU)
15343 (match_operand:SI 5 "register_operand" "d")
15344 (match_operand:SI 6 "const_0_to_255_operand" "n")]
15345 UNSPEC_PCMPESTR))
15346 (set (match_operand:V16QI 1 "register_operand" "=Yz")
15347 (unspec:V16QI
15348 [(match_dup 2)
15349 (match_dup 3)
15350 (unspec:V16QI [(match_dup 4)] UNSPEC_LOADU)
15351 (match_dup 5)
15352 (match_dup 6)]
15353 UNSPEC_PCMPESTR))
15354 (set (reg:CC FLAGS_REG)
15355 (unspec:CC
15356 [(match_dup 2)
15357 (match_dup 3)
15358 (unspec:V16QI [(match_dup 4)] UNSPEC_LOADU)
15359 (match_dup 5)
15360 (match_dup 6)]
15361 UNSPEC_PCMPESTR))]
15362 "TARGET_SSE4_2
15363 && can_create_pseudo_p ()"
15364 "#"
15365 "&& 1"
15366 [(const_int 0)]
15367 {
15368 int ecx = !find_regno_note (curr_insn, REG_UNUSED, REGNO (operands[0]));
15369 int xmm0 = !find_regno_note (curr_insn, REG_UNUSED, REGNO (operands[1]));
15370 int flags = !find_regno_note (curr_insn, REG_UNUSED, FLAGS_REG);
15371
15372 if (ecx)
15373 emit_insn (gen_sse4_2_pcmpestri (operands[0], operands[2],
15374 operands[3], operands[4],
15375 operands[5], operands[6]));
15376 if (xmm0)
15377 emit_insn (gen_sse4_2_pcmpestrm (operands[1], operands[2],
15378 operands[3], operands[4],
15379 operands[5], operands[6]));
15380 if (flags && !(ecx || xmm0))
15381 emit_insn (gen_sse4_2_pcmpestr_cconly (NULL, NULL,
15382 operands[2], operands[3],
15383 operands[4], operands[5],
15384 operands[6]));
15385 if (!(flags || ecx || xmm0))
15386 emit_note (NOTE_INSN_DELETED);
15387
15388 DONE;
15389 }
15390 [(set_attr "type" "sselog")
15391 (set_attr "prefix_data16" "1")
15392 (set_attr "prefix_extra" "1")
15393 (set_attr "ssememalign" "8")
15394 (set_attr "length_immediate" "1")
15395 (set_attr "memory" "load")
15396 (set_attr "mode" "TI")])
15397
15398 (define_insn "sse4_2_pcmpestri"
15399 [(set (match_operand:SI 0 "register_operand" "=c,c")
15400 (unspec:SI
15401 [(match_operand:V16QI 1 "register_operand" "x,x")
15402 (match_operand:SI 2 "register_operand" "a,a")
15403 (match_operand:V16QI 3 "nonimmediate_operand" "x,m")
15404 (match_operand:SI 4 "register_operand" "d,d")
15405 (match_operand:SI 5 "const_0_to_255_operand" "n,n")]
15406 UNSPEC_PCMPESTR))
15407 (set (reg:CC FLAGS_REG)
15408 (unspec:CC
15409 [(match_dup 1)
15410 (match_dup 2)
15411 (match_dup 3)
15412 (match_dup 4)
15413 (match_dup 5)]
15414 UNSPEC_PCMPESTR))]
15415 "TARGET_SSE4_2"
15416 "%vpcmpestri\t{%5, %3, %1|%1, %3, %5}"
15417 [(set_attr "type" "sselog")
15418 (set_attr "prefix_data16" "1")
15419 (set_attr "prefix_extra" "1")
15420 (set_attr "prefix" "maybe_vex")
15421 (set_attr "ssememalign" "8")
15422 (set_attr "length_immediate" "1")
15423 (set_attr "btver2_decode" "vector")
15424 (set_attr "memory" "none,load")
15425 (set_attr "mode" "TI")])
15426
15427 (define_insn "sse4_2_pcmpestrm"
15428 [(set (match_operand:V16QI 0 "register_operand" "=Yz,Yz")
15429 (unspec:V16QI
15430 [(match_operand:V16QI 1 "register_operand" "x,x")
15431 (match_operand:SI 2 "register_operand" "a,a")
15432 (match_operand:V16QI 3 "nonimmediate_operand" "x,m")
15433 (match_operand:SI 4 "register_operand" "d,d")
15434 (match_operand:SI 5 "const_0_to_255_operand" "n,n")]
15435 UNSPEC_PCMPESTR))
15436 (set (reg:CC FLAGS_REG)
15437 (unspec:CC
15438 [(match_dup 1)
15439 (match_dup 2)
15440 (match_dup 3)
15441 (match_dup 4)
15442 (match_dup 5)]
15443 UNSPEC_PCMPESTR))]
15444 "TARGET_SSE4_2"
15445 "%vpcmpestrm\t{%5, %3, %1|%1, %3, %5}"
15446 [(set_attr "type" "sselog")
15447 (set_attr "prefix_data16" "1")
15448 (set_attr "prefix_extra" "1")
15449 (set_attr "ssememalign" "8")
15450 (set_attr "length_immediate" "1")
15451 (set_attr "prefix" "maybe_vex")
15452 (set_attr "btver2_decode" "vector")
15453 (set_attr "memory" "none,load")
15454 (set_attr "mode" "TI")])
15455
15456 (define_insn "sse4_2_pcmpestr_cconly"
15457 [(set (reg:CC FLAGS_REG)
15458 (unspec:CC
15459 [(match_operand:V16QI 2 "register_operand" "x,x,x,x")
15460 (match_operand:SI 3 "register_operand" "a,a,a,a")
15461 (match_operand:V16QI 4 "nonimmediate_operand" "x,m,x,m")
15462 (match_operand:SI 5 "register_operand" "d,d,d,d")
15463 (match_operand:SI 6 "const_0_to_255_operand" "n,n,n,n")]
15464 UNSPEC_PCMPESTR))
15465 (clobber (match_scratch:V16QI 0 "=Yz,Yz,X,X"))
15466 (clobber (match_scratch:SI 1 "= X, X,c,c"))]
15467 "TARGET_SSE4_2"
15468 "@
15469 %vpcmpestrm\t{%6, %4, %2|%2, %4, %6}
15470 %vpcmpestrm\t{%6, %4, %2|%2, %4, %6}
15471 %vpcmpestri\t{%6, %4, %2|%2, %4, %6}
15472 %vpcmpestri\t{%6, %4, %2|%2, %4, %6}"
15473 [(set_attr "type" "sselog")
15474 (set_attr "prefix_data16" "1")
15475 (set_attr "prefix_extra" "1")
15476 (set_attr "ssememalign" "8")
15477 (set_attr "length_immediate" "1")
15478 (set_attr "memory" "none,load,none,load")
15479 (set_attr "btver2_decode" "vector,vector,vector,vector")
15480 (set_attr "prefix" "maybe_vex")
15481 (set_attr "mode" "TI")])
15482
15483 (define_insn_and_split "sse4_2_pcmpistr"
15484 [(set (match_operand:SI 0 "register_operand" "=c,c")
15485 (unspec:SI
15486 [(match_operand:V16QI 2 "register_operand" "x,x")
15487 (match_operand:V16QI 3 "nonimmediate_operand" "x,m")
15488 (match_operand:SI 4 "const_0_to_255_operand" "n,n")]
15489 UNSPEC_PCMPISTR))
15490 (set (match_operand:V16QI 1 "register_operand" "=Yz,Yz")
15491 (unspec:V16QI
15492 [(match_dup 2)
15493 (match_dup 3)
15494 (match_dup 4)]
15495 UNSPEC_PCMPISTR))
15496 (set (reg:CC FLAGS_REG)
15497 (unspec:CC
15498 [(match_dup 2)
15499 (match_dup 3)
15500 (match_dup 4)]
15501 UNSPEC_PCMPISTR))]
15502 "TARGET_SSE4_2
15503 && can_create_pseudo_p ()"
15504 "#"
15505 "&& 1"
15506 [(const_int 0)]
15507 {
15508 int ecx = !find_regno_note (curr_insn, REG_UNUSED, REGNO (operands[0]));
15509 int xmm0 = !find_regno_note (curr_insn, REG_UNUSED, REGNO (operands[1]));
15510 int flags = !find_regno_note (curr_insn, REG_UNUSED, FLAGS_REG);
15511
15512 if (ecx)
15513 emit_insn (gen_sse4_2_pcmpistri (operands[0], operands[2],
15514 operands[3], operands[4]));
15515 if (xmm0)
15516 emit_insn (gen_sse4_2_pcmpistrm (operands[1], operands[2],
15517 operands[3], operands[4]));
15518 if (flags && !(ecx || xmm0))
15519 emit_insn (gen_sse4_2_pcmpistr_cconly (NULL, NULL,
15520 operands[2], operands[3],
15521 operands[4]));
15522 if (!(flags || ecx || xmm0))
15523 emit_note (NOTE_INSN_DELETED);
15524
15525 DONE;
15526 }
15527 [(set_attr "type" "sselog")
15528 (set_attr "prefix_data16" "1")
15529 (set_attr "prefix_extra" "1")
15530 (set_attr "ssememalign" "8")
15531 (set_attr "length_immediate" "1")
15532 (set_attr "memory" "none,load")
15533 (set_attr "mode" "TI")])
15534
15535 (define_insn_and_split "*sse4_2_pcmpistr_unaligned"
15536 [(set (match_operand:SI 0 "register_operand" "=c")
15537 (unspec:SI
15538 [(match_operand:V16QI 2 "register_operand" "x")
15539 (unspec:V16QI
15540 [(match_operand:V16QI 3 "memory_operand" "m")]
15541 UNSPEC_LOADU)
15542 (match_operand:SI 4 "const_0_to_255_operand" "n")]
15543 UNSPEC_PCMPISTR))
15544 (set (match_operand:V16QI 1 "register_operand" "=Yz")
15545 (unspec:V16QI
15546 [(match_dup 2)
15547 (unspec:V16QI [(match_dup 3)] UNSPEC_LOADU)
15548 (match_dup 4)]
15549 UNSPEC_PCMPISTR))
15550 (set (reg:CC FLAGS_REG)
15551 (unspec:CC
15552 [(match_dup 2)
15553 (unspec:V16QI [(match_dup 3)] UNSPEC_LOADU)
15554 (match_dup 4)]
15555 UNSPEC_PCMPISTR))]
15556 "TARGET_SSE4_2
15557 && can_create_pseudo_p ()"
15558 "#"
15559 "&& 1"
15560 [(const_int 0)]
15561 {
15562 int ecx = !find_regno_note (curr_insn, REG_UNUSED, REGNO (operands[0]));
15563 int xmm0 = !find_regno_note (curr_insn, REG_UNUSED, REGNO (operands[1]));
15564 int flags = !find_regno_note (curr_insn, REG_UNUSED, FLAGS_REG);
15565
15566 if (ecx)
15567 emit_insn (gen_sse4_2_pcmpistri (operands[0], operands[2],
15568 operands[3], operands[4]));
15569 if (xmm0)
15570 emit_insn (gen_sse4_2_pcmpistrm (operands[1], operands[2],
15571 operands[3], operands[4]));
15572 if (flags && !(ecx || xmm0))
15573 emit_insn (gen_sse4_2_pcmpistr_cconly (NULL, NULL,
15574 operands[2], operands[3],
15575 operands[4]));
15576 if (!(flags || ecx || xmm0))
15577 emit_note (NOTE_INSN_DELETED);
15578
15579 DONE;
15580 }
15581 [(set_attr "type" "sselog")
15582 (set_attr "prefix_data16" "1")
15583 (set_attr "prefix_extra" "1")
15584 (set_attr "ssememalign" "8")
15585 (set_attr "length_immediate" "1")
15586 (set_attr "memory" "load")
15587 (set_attr "mode" "TI")])
15588
15589 (define_insn "sse4_2_pcmpistri"
15590 [(set (match_operand:SI 0 "register_operand" "=c,c")
15591 (unspec:SI
15592 [(match_operand:V16QI 1 "register_operand" "x,x")
15593 (match_operand:V16QI 2 "nonimmediate_operand" "x,m")
15594 (match_operand:SI 3 "const_0_to_255_operand" "n,n")]
15595 UNSPEC_PCMPISTR))
15596 (set (reg:CC FLAGS_REG)
15597 (unspec:CC
15598 [(match_dup 1)
15599 (match_dup 2)
15600 (match_dup 3)]
15601 UNSPEC_PCMPISTR))]
15602 "TARGET_SSE4_2"
15603 "%vpcmpistri\t{%3, %2, %1|%1, %2, %3}"
15604 [(set_attr "type" "sselog")
15605 (set_attr "prefix_data16" "1")
15606 (set_attr "prefix_extra" "1")
15607 (set_attr "ssememalign" "8")
15608 (set_attr "length_immediate" "1")
15609 (set_attr "prefix" "maybe_vex")
15610 (set_attr "memory" "none,load")
15611 (set_attr "btver2_decode" "vector")
15612 (set_attr "mode" "TI")])
15613
15614 (define_insn "sse4_2_pcmpistrm"
15615 [(set (match_operand:V16QI 0 "register_operand" "=Yz,Yz")
15616 (unspec:V16QI
15617 [(match_operand:V16QI 1 "register_operand" "x,x")
15618 (match_operand:V16QI 2 "nonimmediate_operand" "x,m")
15619 (match_operand:SI 3 "const_0_to_255_operand" "n,n")]
15620 UNSPEC_PCMPISTR))
15621 (set (reg:CC FLAGS_REG)
15622 (unspec:CC
15623 [(match_dup 1)
15624 (match_dup 2)
15625 (match_dup 3)]
15626 UNSPEC_PCMPISTR))]
15627 "TARGET_SSE4_2"
15628 "%vpcmpistrm\t{%3, %2, %1|%1, %2, %3}"
15629 [(set_attr "type" "sselog")
15630 (set_attr "prefix_data16" "1")
15631 (set_attr "prefix_extra" "1")
15632 (set_attr "ssememalign" "8")
15633 (set_attr "length_immediate" "1")
15634 (set_attr "prefix" "maybe_vex")
15635 (set_attr "memory" "none,load")
15636 (set_attr "btver2_decode" "vector")
15637 (set_attr "mode" "TI")])
15638
15639 (define_insn "sse4_2_pcmpistr_cconly"
15640 [(set (reg:CC FLAGS_REG)
15641 (unspec:CC
15642 [(match_operand:V16QI 2 "register_operand" "x,x,x,x")
15643 (match_operand:V16QI 3 "nonimmediate_operand" "x,m,x,m")
15644 (match_operand:SI 4 "const_0_to_255_operand" "n,n,n,n")]
15645 UNSPEC_PCMPISTR))
15646 (clobber (match_scratch:V16QI 0 "=Yz,Yz,X,X"))
15647 (clobber (match_scratch:SI 1 "= X, X,c,c"))]
15648 "TARGET_SSE4_2"
15649 "@
15650 %vpcmpistrm\t{%4, %3, %2|%2, %3, %4}
15651 %vpcmpistrm\t{%4, %3, %2|%2, %3, %4}
15652 %vpcmpistri\t{%4, %3, %2|%2, %3, %4}
15653 %vpcmpistri\t{%4, %3, %2|%2, %3, %4}"
15654 [(set_attr "type" "sselog")
15655 (set_attr "prefix_data16" "1")
15656 (set_attr "prefix_extra" "1")
15657 (set_attr "ssememalign" "8")
15658 (set_attr "length_immediate" "1")
15659 (set_attr "memory" "none,load,none,load")
15660 (set_attr "prefix" "maybe_vex")
15661 (set_attr "btver2_decode" "vector,vector,vector,vector")
15662 (set_attr "mode" "TI")])
15663
15664 ;; Packed float variants
15665 (define_mode_attr GATHER_SCATTER_SF_MEM_MODE
15666 [(V8DI "V8SF") (V16SI "V16SF")])
15667
15668 (define_expand "avx512pf_gatherpf<mode>sf"
15669 [(unspec
15670 [(match_operand:<avx512fmaskmode> 0 "register_or_constm1_operand")
15671 (mem:<GATHER_SCATTER_SF_MEM_MODE>
15672 (match_par_dup 5
15673 [(match_operand 2 "vsib_address_operand")
15674 (match_operand:VI48_512 1 "register_operand")
15675 (match_operand:SI 3 "const1248_operand")]))
15676 (match_operand:SI 4 "const_2_to_3_operand")]
15677 UNSPEC_GATHER_PREFETCH)]
15678 "TARGET_AVX512PF"
15679 {
15680 operands[5]
15681 = gen_rtx_UNSPEC (Pmode, gen_rtvec (3, operands[2], operands[1],
15682 operands[3]), UNSPEC_VSIBADDR);
15683 })
15684
15685 (define_insn "*avx512pf_gatherpf<mode>sf_mask"
15686 [(unspec
15687 [(match_operand:<avx512fmaskmode> 0 "register_operand" "Yk")
15688 (match_operator:<GATHER_SCATTER_SF_MEM_MODE> 5 "vsib_mem_operator"
15689 [(unspec:P
15690 [(match_operand:P 2 "vsib_address_operand" "Tv")
15691 (match_operand:VI48_512 1 "register_operand" "v")
15692 (match_operand:SI 3 "const1248_operand" "n")]
15693 UNSPEC_VSIBADDR)])
15694 (match_operand:SI 4 "const_2_to_3_operand" "n")]
15695 UNSPEC_GATHER_PREFETCH)]
15696 "TARGET_AVX512PF"
15697 {
15698 switch (INTVAL (operands[4]))
15699 {
15700 case 3:
15701 return "vgatherpf0<ssemodesuffix>ps\t{%5%{%0%}|%5%{%0%}}";
15702 case 2:
15703 return "vgatherpf1<ssemodesuffix>ps\t{%5%{%0%}|%5%{%0%}}";
15704 default:
15705 gcc_unreachable ();
15706 }
15707 }
15708 [(set_attr "type" "sse")
15709 (set_attr "prefix" "evex")
15710 (set_attr "mode" "XI")])
15711
15712 (define_insn "*avx512pf_gatherpf<mode>sf"
15713 [(unspec
15714 [(const_int -1)
15715 (match_operator:<GATHER_SCATTER_SF_MEM_MODE> 4 "vsib_mem_operator"
15716 [(unspec:P
15717 [(match_operand:P 1 "vsib_address_operand" "Tv")
15718 (match_operand:VI48_512 0 "register_operand" "v")
15719 (match_operand:SI 2 "const1248_operand" "n")]
15720 UNSPEC_VSIBADDR)])
15721 (match_operand:SI 3 "const_2_to_3_operand" "n")]
15722 UNSPEC_GATHER_PREFETCH)]
15723 "TARGET_AVX512PF"
15724 {
15725 switch (INTVAL (operands[3]))
15726 {
15727 case 3:
15728 return "vgatherpf0<ssemodesuffix>ps\t{%4|%4}";
15729 case 2:
15730 return "vgatherpf1<ssemodesuffix>ps\t{%4|%4}";
15731 default:
15732 gcc_unreachable ();
15733 }
15734 }
15735 [(set_attr "type" "sse")
15736 (set_attr "prefix" "evex")
15737 (set_attr "mode" "XI")])
15738
15739 ;; Packed double variants
15740 (define_expand "avx512pf_gatherpf<mode>df"
15741 [(unspec
15742 [(match_operand:<avx512fmaskmode> 0 "register_or_constm1_operand")
15743 (mem:V8DF
15744 (match_par_dup 5
15745 [(match_operand 2 "vsib_address_operand")
15746 (match_operand:VI4_256_8_512 1 "register_operand")
15747 (match_operand:SI 3 "const1248_operand")]))
15748 (match_operand:SI 4 "const_2_to_3_operand")]
15749 UNSPEC_GATHER_PREFETCH)]
15750 "TARGET_AVX512PF"
15751 {
15752 operands[5]
15753 = gen_rtx_UNSPEC (Pmode, gen_rtvec (3, operands[2], operands[1],
15754 operands[3]), UNSPEC_VSIBADDR);
15755 })
15756
15757 (define_insn "*avx512pf_gatherpf<mode>df_mask"
15758 [(unspec
15759 [(match_operand:<avx512fmaskmode> 0 "register_operand" "Yk")
15760 (match_operator:V8DF 5 "vsib_mem_operator"
15761 [(unspec:P
15762 [(match_operand:P 2 "vsib_address_operand" "Tv")
15763 (match_operand:VI4_256_8_512 1 "register_operand" "v")
15764 (match_operand:SI 3 "const1248_operand" "n")]
15765 UNSPEC_VSIBADDR)])
15766 (match_operand:SI 4 "const_2_to_3_operand" "n")]
15767 UNSPEC_GATHER_PREFETCH)]
15768 "TARGET_AVX512PF"
15769 {
15770 switch (INTVAL (operands[4]))
15771 {
15772 case 3:
15773 return "vgatherpf0<ssemodesuffix>pd\t{%5%{%0%}|%5%{%0%}}";
15774 case 2:
15775 return "vgatherpf1<ssemodesuffix>pd\t{%5%{%0%}|%5%{%0%}}";
15776 default:
15777 gcc_unreachable ();
15778 }
15779 }
15780 [(set_attr "type" "sse")
15781 (set_attr "prefix" "evex")
15782 (set_attr "mode" "XI")])
15783
15784 (define_insn "*avx512pf_gatherpf<mode>df"
15785 [(unspec
15786 [(const_int -1)
15787 (match_operator:V8DF 4 "vsib_mem_operator"
15788 [(unspec:P
15789 [(match_operand:P 1 "vsib_address_operand" "Tv")
15790 (match_operand:VI4_256_8_512 0 "register_operand" "v")
15791 (match_operand:SI 2 "const1248_operand" "n")]
15792 UNSPEC_VSIBADDR)])
15793 (match_operand:SI 3 "const_2_to_3_operand" "n")]
15794 UNSPEC_GATHER_PREFETCH)]
15795 "TARGET_AVX512PF"
15796 {
15797 switch (INTVAL (operands[3]))
15798 {
15799 case 3:
15800 return "vgatherpf0<ssemodesuffix>pd\t{%4|%4}";
15801 case 2:
15802 return "vgatherpf1<ssemodesuffix>pd\t{%4|%4}";
15803 default:
15804 gcc_unreachable ();
15805 }
15806 }
15807 [(set_attr "type" "sse")
15808 (set_attr "prefix" "evex")
15809 (set_attr "mode" "XI")])
15810
15811 ;; Packed float variants
15812 (define_expand "avx512pf_scatterpf<mode>sf"
15813 [(unspec
15814 [(match_operand:<avx512fmaskmode> 0 "register_or_constm1_operand")
15815 (mem:<GATHER_SCATTER_SF_MEM_MODE>
15816 (match_par_dup 5
15817 [(match_operand 2 "vsib_address_operand")
15818 (match_operand:VI48_512 1 "register_operand")
15819 (match_operand:SI 3 "const1248_operand")]))
15820 (match_operand:SI 4 "const2367_operand")]
15821 UNSPEC_SCATTER_PREFETCH)]
15822 "TARGET_AVX512PF"
15823 {
15824 operands[5]
15825 = gen_rtx_UNSPEC (Pmode, gen_rtvec (3, operands[2], operands[1],
15826 operands[3]), UNSPEC_VSIBADDR);
15827 })
15828
15829 (define_insn "*avx512pf_scatterpf<mode>sf_mask"
15830 [(unspec
15831 [(match_operand:<avx512fmaskmode> 0 "register_operand" "Yk")
15832 (match_operator:<GATHER_SCATTER_SF_MEM_MODE> 5 "vsib_mem_operator"
15833 [(unspec:P
15834 [(match_operand:P 2 "vsib_address_operand" "Tv")
15835 (match_operand:VI48_512 1 "register_operand" "v")
15836 (match_operand:SI 3 "const1248_operand" "n")]
15837 UNSPEC_VSIBADDR)])
15838 (match_operand:SI 4 "const2367_operand" "n")]
15839 UNSPEC_SCATTER_PREFETCH)]
15840 "TARGET_AVX512PF"
15841 {
15842 switch (INTVAL (operands[4]))
15843 {
15844 case 3:
15845 case 7:
15846 return "vscatterpf0<ssemodesuffix>ps\t{%5%{%0%}|%5%{%0%}}";
15847 case 2:
15848 case 6:
15849 return "vscatterpf1<ssemodesuffix>ps\t{%5%{%0%}|%5%{%0%}}";
15850 default:
15851 gcc_unreachable ();
15852 }
15853 }
15854 [(set_attr "type" "sse")
15855 (set_attr "prefix" "evex")
15856 (set_attr "mode" "XI")])
15857
15858 (define_insn "*avx512pf_scatterpf<mode>sf"
15859 [(unspec
15860 [(const_int -1)
15861 (match_operator:<GATHER_SCATTER_SF_MEM_MODE> 4 "vsib_mem_operator"
15862 [(unspec:P
15863 [(match_operand:P 1 "vsib_address_operand" "Tv")
15864 (match_operand:VI48_512 0 "register_operand" "v")
15865 (match_operand:SI 2 "const1248_operand" "n")]
15866 UNSPEC_VSIBADDR)])
15867 (match_operand:SI 3 "const2367_operand" "n")]
15868 UNSPEC_SCATTER_PREFETCH)]
15869 "TARGET_AVX512PF"
15870 {
15871 switch (INTVAL (operands[3]))
15872 {
15873 case 3:
15874 case 7:
15875 return "vscatterpf0<ssemodesuffix>ps\t{%4|%4}";
15876 case 2:
15877 case 6:
15878 return "vscatterpf1<ssemodesuffix>ps\t{%4|%4}";
15879 default:
15880 gcc_unreachable ();
15881 }
15882 }
15883 [(set_attr "type" "sse")
15884 (set_attr "prefix" "evex")
15885 (set_attr "mode" "XI")])
15886
15887 ;; Packed double variants
15888 (define_expand "avx512pf_scatterpf<mode>df"
15889 [(unspec
15890 [(match_operand:<avx512fmaskmode> 0 "register_or_constm1_operand")
15891 (mem:V8DF
15892 (match_par_dup 5
15893 [(match_operand 2 "vsib_address_operand")
15894 (match_operand:VI4_256_8_512 1 "register_operand")
15895 (match_operand:SI 3 "const1248_operand")]))
15896 (match_operand:SI 4 "const2367_operand")]
15897 UNSPEC_SCATTER_PREFETCH)]
15898 "TARGET_AVX512PF"
15899 {
15900 operands[5]
15901 = gen_rtx_UNSPEC (Pmode, gen_rtvec (3, operands[2], operands[1],
15902 operands[3]), UNSPEC_VSIBADDR);
15903 })
15904
15905 (define_insn "*avx512pf_scatterpf<mode>df_mask"
15906 [(unspec
15907 [(match_operand:<avx512fmaskmode> 0 "register_operand" "Yk")
15908 (match_operator:V8DF 5 "vsib_mem_operator"
15909 [(unspec:P
15910 [(match_operand:P 2 "vsib_address_operand" "Tv")
15911 (match_operand:VI4_256_8_512 1 "register_operand" "v")
15912 (match_operand:SI 3 "const1248_operand" "n")]
15913 UNSPEC_VSIBADDR)])
15914 (match_operand:SI 4 "const2367_operand" "n")]
15915 UNSPEC_SCATTER_PREFETCH)]
15916 "TARGET_AVX512PF"
15917 {
15918 switch (INTVAL (operands[4]))
15919 {
15920 case 3:
15921 case 7:
15922 return "vscatterpf0<ssemodesuffix>pd\t{%5%{%0%}|%5%{%0%}}";
15923 case 2:
15924 case 6:
15925 return "vscatterpf1<ssemodesuffix>pd\t{%5%{%0%}|%5%{%0%}}";
15926 default:
15927 gcc_unreachable ();
15928 }
15929 }
15930 [(set_attr "type" "sse")
15931 (set_attr "prefix" "evex")
15932 (set_attr "mode" "XI")])
15933
15934 (define_insn "*avx512pf_scatterpf<mode>df"
15935 [(unspec
15936 [(const_int -1)
15937 (match_operator:V8DF 4 "vsib_mem_operator"
15938 [(unspec:P
15939 [(match_operand:P 1 "vsib_address_operand" "Tv")
15940 (match_operand:VI4_256_8_512 0 "register_operand" "v")
15941 (match_operand:SI 2 "const1248_operand" "n")]
15942 UNSPEC_VSIBADDR)])
15943 (match_operand:SI 3 "const2367_operand" "n")]
15944 UNSPEC_SCATTER_PREFETCH)]
15945 "TARGET_AVX512PF"
15946 {
15947 switch (INTVAL (operands[3]))
15948 {
15949 case 3:
15950 case 7:
15951 return "vscatterpf0<ssemodesuffix>pd\t{%4|%4}";
15952 case 2:
15953 case 6:
15954 return "vscatterpf1<ssemodesuffix>pd\t{%4|%4}";
15955 default:
15956 gcc_unreachable ();
15957 }
15958 }
15959 [(set_attr "type" "sse")
15960 (set_attr "prefix" "evex")
15961 (set_attr "mode" "XI")])
15962
15963 (define_insn "avx512er_exp2<mode><mask_name><round_saeonly_name>"
15964 [(set (match_operand:VF_512 0 "register_operand" "=v")
15965 (unspec:VF_512
15966 [(match_operand:VF_512 1 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")]
15967 UNSPEC_EXP2))]
15968 "TARGET_AVX512ER"
15969 "vexp2<ssemodesuffix>\t{<round_saeonly_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_saeonly_mask_op2>}"
15970 [(set_attr "prefix" "evex")
15971 (set_attr "type" "sse")
15972 (set_attr "mode" "<MODE>")])
15973
15974 (define_insn "<mask_codefor>avx512er_rcp28<mode><mask_name><round_saeonly_name>"
15975 [(set (match_operand:VF_512 0 "register_operand" "=v")
15976 (unspec:VF_512
15977 [(match_operand:VF_512 1 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")]
15978 UNSPEC_RCP28))]
15979 "TARGET_AVX512ER"
15980 "vrcp28<ssemodesuffix>\t{<round_saeonly_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_saeonly_mask_op2>}"
15981 [(set_attr "prefix" "evex")
15982 (set_attr "type" "sse")
15983 (set_attr "mode" "<MODE>")])
15984
15985 (define_insn "avx512er_vmrcp28<mode><round_saeonly_name>"
15986 [(set (match_operand:VF_128 0 "register_operand" "=v")
15987 (vec_merge:VF_128
15988 (unspec:VF_128
15989 [(match_operand:VF_128 1 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")]
15990 UNSPEC_RCP28)
15991 (match_operand:VF_128 2 "register_operand" "v")
15992 (const_int 1)))]
15993 "TARGET_AVX512ER"
15994 "vrcp28<ssescalarmodesuffix>\t{<round_saeonly_op3>%1, %2, %0|%0, %2, %1<round_saeonly_op3>}"
15995 [(set_attr "length_immediate" "1")
15996 (set_attr "prefix" "evex")
15997 (set_attr "type" "sse")
15998 (set_attr "mode" "<MODE>")])
15999
16000 (define_insn "<mask_codefor>avx512er_rsqrt28<mode><mask_name><round_saeonly_name>"
16001 [(set (match_operand:VF_512 0 "register_operand" "=v")
16002 (unspec:VF_512
16003 [(match_operand:VF_512 1 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")]
16004 UNSPEC_RSQRT28))]
16005 "TARGET_AVX512ER"
16006 "vrsqrt28<ssemodesuffix>\t{<round_saeonly_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_saeonly_mask_op2>}"
16007 [(set_attr "prefix" "evex")
16008 (set_attr "type" "sse")
16009 (set_attr "mode" "<MODE>")])
16010
16011 (define_insn "avx512er_vmrsqrt28<mode><round_saeonly_name>"
16012 [(set (match_operand:VF_128 0 "register_operand" "=v")
16013 (vec_merge:VF_128
16014 (unspec:VF_128
16015 [(match_operand:VF_128 1 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")]
16016 UNSPEC_RSQRT28)
16017 (match_operand:VF_128 2 "register_operand" "v")
16018 (const_int 1)))]
16019 "TARGET_AVX512ER"
16020 "vrsqrt28<ssescalarmodesuffix>\t{<round_saeonly_op3>%1, %2, %0|%0, %2, %1<round_saeonly_op3>}"
16021 [(set_attr "length_immediate" "1")
16022 (set_attr "type" "sse")
16023 (set_attr "prefix" "evex")
16024 (set_attr "mode" "<MODE>")])
16025
16026 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
16027 ;;
16028 ;; XOP instructions
16029 ;;
16030 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
16031
16032 (define_code_iterator xop_plus [plus ss_plus])
16033
16034 (define_code_attr macs [(plus "macs") (ss_plus "macss")])
16035 (define_code_attr madcs [(plus "madcs") (ss_plus "madcss")])
16036
16037 ;; XOP parallel integer multiply/add instructions.
16038
16039 (define_insn "xop_p<macs><ssemodesuffix><ssemodesuffix>"
16040 [(set (match_operand:VI24_128 0 "register_operand" "=x")
16041 (xop_plus:VI24_128
16042 (mult:VI24_128
16043 (match_operand:VI24_128 1 "nonimmediate_operand" "%x")
16044 (match_operand:VI24_128 2 "nonimmediate_operand" "xm"))
16045 (match_operand:VI24_128 3 "register_operand" "x")))]
16046 "TARGET_XOP"
16047 "vp<macs><ssemodesuffix><ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
16048 [(set_attr "type" "ssemuladd")
16049 (set_attr "mode" "TI")])
16050
16051 (define_insn "xop_p<macs>dql"
16052 [(set (match_operand:V2DI 0 "register_operand" "=x")
16053 (xop_plus:V2DI
16054 (mult:V2DI
16055 (sign_extend:V2DI
16056 (vec_select:V2SI
16057 (match_operand:V4SI 1 "nonimmediate_operand" "%x")
16058 (parallel [(const_int 0) (const_int 2)])))
16059 (sign_extend:V2DI
16060 (vec_select:V2SI
16061 (match_operand:V4SI 2 "nonimmediate_operand" "xm")
16062 (parallel [(const_int 0) (const_int 2)]))))
16063 (match_operand:V2DI 3 "register_operand" "x")))]
16064 "TARGET_XOP"
16065 "vp<macs>dql\t{%3, %2, %1, %0|%0, %1, %2, %3}"
16066 [(set_attr "type" "ssemuladd")
16067 (set_attr "mode" "TI")])
16068
16069 (define_insn "xop_p<macs>dqh"
16070 [(set (match_operand:V2DI 0 "register_operand" "=x")
16071 (xop_plus:V2DI
16072 (mult:V2DI
16073 (sign_extend:V2DI
16074 (vec_select:V2SI
16075 (match_operand:V4SI 1 "nonimmediate_operand" "%x")
16076 (parallel [(const_int 1) (const_int 3)])))
16077 (sign_extend:V2DI
16078 (vec_select:V2SI
16079 (match_operand:V4SI 2 "nonimmediate_operand" "xm")
16080 (parallel [(const_int 1) (const_int 3)]))))
16081 (match_operand:V2DI 3 "register_operand" "x")))]
16082 "TARGET_XOP"
16083 "vp<macs>dqh\t{%3, %2, %1, %0|%0, %1, %2, %3}"
16084 [(set_attr "type" "ssemuladd")
16085 (set_attr "mode" "TI")])
16086
16087 ;; XOP parallel integer multiply/add instructions for the intrinisics
16088 (define_insn "xop_p<macs>wd"
16089 [(set (match_operand:V4SI 0 "register_operand" "=x")
16090 (xop_plus:V4SI
16091 (mult:V4SI
16092 (sign_extend:V4SI
16093 (vec_select:V4HI
16094 (match_operand:V8HI 1 "nonimmediate_operand" "%x")
16095 (parallel [(const_int 1) (const_int 3)
16096 (const_int 5) (const_int 7)])))
16097 (sign_extend:V4SI
16098 (vec_select:V4HI
16099 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
16100 (parallel [(const_int 1) (const_int 3)
16101 (const_int 5) (const_int 7)]))))
16102 (match_operand:V4SI 3 "register_operand" "x")))]
16103 "TARGET_XOP"
16104 "vp<macs>wd\t{%3, %2, %1, %0|%0, %1, %2, %3}"
16105 [(set_attr "type" "ssemuladd")
16106 (set_attr "mode" "TI")])
16107
16108 (define_insn "xop_p<madcs>wd"
16109 [(set (match_operand:V4SI 0 "register_operand" "=x")
16110 (xop_plus:V4SI
16111 (plus:V4SI
16112 (mult:V4SI
16113 (sign_extend:V4SI
16114 (vec_select:V4HI
16115 (match_operand:V8HI 1 "nonimmediate_operand" "%x")
16116 (parallel [(const_int 0) (const_int 2)
16117 (const_int 4) (const_int 6)])))
16118 (sign_extend:V4SI
16119 (vec_select:V4HI
16120 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
16121 (parallel [(const_int 0) (const_int 2)
16122 (const_int 4) (const_int 6)]))))
16123 (mult:V4SI
16124 (sign_extend:V4SI
16125 (vec_select:V4HI
16126 (match_dup 1)
16127 (parallel [(const_int 1) (const_int 3)
16128 (const_int 5) (const_int 7)])))
16129 (sign_extend:V4SI
16130 (vec_select:V4HI
16131 (match_dup 2)
16132 (parallel [(const_int 1) (const_int 3)
16133 (const_int 5) (const_int 7)])))))
16134 (match_operand:V4SI 3 "register_operand" "x")))]
16135 "TARGET_XOP"
16136 "vp<madcs>wd\t{%3, %2, %1, %0|%0, %1, %2, %3}"
16137 [(set_attr "type" "ssemuladd")
16138 (set_attr "mode" "TI")])
16139
16140 ;; XOP parallel XMM conditional moves
16141 (define_insn "xop_pcmov_<mode><avxsizesuffix>"
16142 [(set (match_operand:V 0 "register_operand" "=x,x")
16143 (if_then_else:V
16144 (match_operand:V 3 "nonimmediate_operand" "x,m")
16145 (match_operand:V 1 "register_operand" "x,x")
16146 (match_operand:V 2 "nonimmediate_operand" "xm,x")))]
16147 "TARGET_XOP"
16148 "vpcmov\t{%3, %2, %1, %0|%0, %1, %2, %3}"
16149 [(set_attr "type" "sse4arg")])
16150
16151 ;; XOP horizontal add/subtract instructions
16152 (define_insn "xop_phadd<u>bw"
16153 [(set (match_operand:V8HI 0 "register_operand" "=x")
16154 (plus:V8HI
16155 (any_extend:V8HI
16156 (vec_select:V8QI
16157 (match_operand:V16QI 1 "nonimmediate_operand" "xm")
16158 (parallel [(const_int 0) (const_int 2)
16159 (const_int 4) (const_int 6)
16160 (const_int 8) (const_int 10)
16161 (const_int 12) (const_int 14)])))
16162 (any_extend:V8HI
16163 (vec_select:V8QI
16164 (match_dup 1)
16165 (parallel [(const_int 1) (const_int 3)
16166 (const_int 5) (const_int 7)
16167 (const_int 9) (const_int 11)
16168 (const_int 13) (const_int 15)])))))]
16169 "TARGET_XOP"
16170 "vphadd<u>bw\t{%1, %0|%0, %1}"
16171 [(set_attr "type" "sseiadd1")])
16172
16173 (define_insn "xop_phadd<u>bd"
16174 [(set (match_operand:V4SI 0 "register_operand" "=x")
16175 (plus:V4SI
16176 (plus:V4SI
16177 (any_extend:V4SI
16178 (vec_select:V4QI
16179 (match_operand:V16QI 1 "nonimmediate_operand" "xm")
16180 (parallel [(const_int 0) (const_int 4)
16181 (const_int 8) (const_int 12)])))
16182 (any_extend:V4SI
16183 (vec_select:V4QI
16184 (match_dup 1)
16185 (parallel [(const_int 1) (const_int 5)
16186 (const_int 9) (const_int 13)]))))
16187 (plus:V4SI
16188 (any_extend:V4SI
16189 (vec_select:V4QI
16190 (match_dup 1)
16191 (parallel [(const_int 2) (const_int 6)
16192 (const_int 10) (const_int 14)])))
16193 (any_extend:V4SI
16194 (vec_select:V4QI
16195 (match_dup 1)
16196 (parallel [(const_int 3) (const_int 7)
16197 (const_int 11) (const_int 15)]))))))]
16198 "TARGET_XOP"
16199 "vphadd<u>bd\t{%1, %0|%0, %1}"
16200 [(set_attr "type" "sseiadd1")])
16201
16202 (define_insn "xop_phadd<u>bq"
16203 [(set (match_operand:V2DI 0 "register_operand" "=x")
16204 (plus:V2DI
16205 (plus:V2DI
16206 (plus:V2DI
16207 (any_extend:V2DI
16208 (vec_select:V2QI
16209 (match_operand:V16QI 1 "nonimmediate_operand" "xm")
16210 (parallel [(const_int 0) (const_int 8)])))
16211 (any_extend:V2DI
16212 (vec_select:V2QI
16213 (match_dup 1)
16214 (parallel [(const_int 1) (const_int 9)]))))
16215 (plus:V2DI
16216 (any_extend:V2DI
16217 (vec_select:V2QI
16218 (match_dup 1)
16219 (parallel [(const_int 2) (const_int 10)])))
16220 (any_extend:V2DI
16221 (vec_select:V2QI
16222 (match_dup 1)
16223 (parallel [(const_int 3) (const_int 11)])))))
16224 (plus:V2DI
16225 (plus:V2DI
16226 (any_extend:V2DI
16227 (vec_select:V2QI
16228 (match_dup 1)
16229 (parallel [(const_int 4) (const_int 12)])))
16230 (any_extend:V2DI
16231 (vec_select:V2QI
16232 (match_dup 1)
16233 (parallel [(const_int 5) (const_int 13)]))))
16234 (plus:V2DI
16235 (any_extend:V2DI
16236 (vec_select:V2QI
16237 (match_dup 1)
16238 (parallel [(const_int 6) (const_int 14)])))
16239 (any_extend:V2DI
16240 (vec_select:V2QI
16241 (match_dup 1)
16242 (parallel [(const_int 7) (const_int 15)])))))))]
16243 "TARGET_XOP"
16244 "vphadd<u>bq\t{%1, %0|%0, %1}"
16245 [(set_attr "type" "sseiadd1")])
16246
16247 (define_insn "xop_phadd<u>wd"
16248 [(set (match_operand:V4SI 0 "register_operand" "=x")
16249 (plus:V4SI
16250 (any_extend:V4SI
16251 (vec_select:V4HI
16252 (match_operand:V8HI 1 "nonimmediate_operand" "xm")
16253 (parallel [(const_int 0) (const_int 2)
16254 (const_int 4) (const_int 6)])))
16255 (any_extend:V4SI
16256 (vec_select:V4HI
16257 (match_dup 1)
16258 (parallel [(const_int 1) (const_int 3)
16259 (const_int 5) (const_int 7)])))))]
16260 "TARGET_XOP"
16261 "vphadd<u>wd\t{%1, %0|%0, %1}"
16262 [(set_attr "type" "sseiadd1")])
16263
16264 (define_insn "xop_phadd<u>wq"
16265 [(set (match_operand:V2DI 0 "register_operand" "=x")
16266 (plus:V2DI
16267 (plus:V2DI
16268 (any_extend:V2DI
16269 (vec_select:V2HI
16270 (match_operand:V8HI 1 "nonimmediate_operand" "xm")
16271 (parallel [(const_int 0) (const_int 4)])))
16272 (any_extend:V2DI
16273 (vec_select:V2HI
16274 (match_dup 1)
16275 (parallel [(const_int 1) (const_int 5)]))))
16276 (plus:V2DI
16277 (any_extend:V2DI
16278 (vec_select:V2HI
16279 (match_dup 1)
16280 (parallel [(const_int 2) (const_int 6)])))
16281 (any_extend:V2DI
16282 (vec_select:V2HI
16283 (match_dup 1)
16284 (parallel [(const_int 3) (const_int 7)]))))))]
16285 "TARGET_XOP"
16286 "vphadd<u>wq\t{%1, %0|%0, %1}"
16287 [(set_attr "type" "sseiadd1")])
16288
16289 (define_insn "xop_phadd<u>dq"
16290 [(set (match_operand:V2DI 0 "register_operand" "=x")
16291 (plus:V2DI
16292 (any_extend:V2DI
16293 (vec_select:V2SI
16294 (match_operand:V4SI 1 "nonimmediate_operand" "xm")
16295 (parallel [(const_int 0) (const_int 2)])))
16296 (any_extend:V2DI
16297 (vec_select:V2SI
16298 (match_dup 1)
16299 (parallel [(const_int 1) (const_int 3)])))))]
16300 "TARGET_XOP"
16301 "vphadd<u>dq\t{%1, %0|%0, %1}"
16302 [(set_attr "type" "sseiadd1")])
16303
16304 (define_insn "xop_phsubbw"
16305 [(set (match_operand:V8HI 0 "register_operand" "=x")
16306 (minus:V8HI
16307 (sign_extend:V8HI
16308 (vec_select:V8QI
16309 (match_operand:V16QI 1 "nonimmediate_operand" "xm")
16310 (parallel [(const_int 0) (const_int 2)
16311 (const_int 4) (const_int 6)
16312 (const_int 8) (const_int 10)
16313 (const_int 12) (const_int 14)])))
16314 (sign_extend:V8HI
16315 (vec_select:V8QI
16316 (match_dup 1)
16317 (parallel [(const_int 1) (const_int 3)
16318 (const_int 5) (const_int 7)
16319 (const_int 9) (const_int 11)
16320 (const_int 13) (const_int 15)])))))]
16321 "TARGET_XOP"
16322 "vphsubbw\t{%1, %0|%0, %1}"
16323 [(set_attr "type" "sseiadd1")])
16324
16325 (define_insn "xop_phsubwd"
16326 [(set (match_operand:V4SI 0 "register_operand" "=x")
16327 (minus:V4SI
16328 (sign_extend:V4SI
16329 (vec_select:V4HI
16330 (match_operand:V8HI 1 "nonimmediate_operand" "xm")
16331 (parallel [(const_int 0) (const_int 2)
16332 (const_int 4) (const_int 6)])))
16333 (sign_extend:V4SI
16334 (vec_select:V4HI
16335 (match_dup 1)
16336 (parallel [(const_int 1) (const_int 3)
16337 (const_int 5) (const_int 7)])))))]
16338 "TARGET_XOP"
16339 "vphsubwd\t{%1, %0|%0, %1}"
16340 [(set_attr "type" "sseiadd1")])
16341
16342 (define_insn "xop_phsubdq"
16343 [(set (match_operand:V2DI 0 "register_operand" "=x")
16344 (minus:V2DI
16345 (sign_extend:V2DI
16346 (vec_select:V2SI
16347 (match_operand:V4SI 1 "nonimmediate_operand" "xm")
16348 (parallel [(const_int 0) (const_int 2)])))
16349 (sign_extend:V2DI
16350 (vec_select:V2SI
16351 (match_dup 1)
16352 (parallel [(const_int 1) (const_int 3)])))))]
16353 "TARGET_XOP"
16354 "vphsubdq\t{%1, %0|%0, %1}"
16355 [(set_attr "type" "sseiadd1")])
16356
16357 ;; XOP permute instructions
16358 (define_insn "xop_pperm"
16359 [(set (match_operand:V16QI 0 "register_operand" "=x,x")
16360 (unspec:V16QI
16361 [(match_operand:V16QI 1 "register_operand" "x,x")
16362 (match_operand:V16QI 2 "nonimmediate_operand" "x,m")
16363 (match_operand:V16QI 3 "nonimmediate_operand" "xm,x")]
16364 UNSPEC_XOP_PERMUTE))]
16365 "TARGET_XOP && !(MEM_P (operands[2]) && MEM_P (operands[3]))"
16366 "vpperm\t{%3, %2, %1, %0|%0, %1, %2, %3}"
16367 [(set_attr "type" "sse4arg")
16368 (set_attr "mode" "TI")])
16369
16370 ;; XOP pack instructions that combine two vectors into a smaller vector
16371 (define_insn "xop_pperm_pack_v2di_v4si"
16372 [(set (match_operand:V4SI 0 "register_operand" "=x,x")
16373 (vec_concat:V4SI
16374 (truncate:V2SI
16375 (match_operand:V2DI 1 "register_operand" "x,x"))
16376 (truncate:V2SI
16377 (match_operand:V2DI 2 "nonimmediate_operand" "x,m"))))
16378 (use (match_operand:V16QI 3 "nonimmediate_operand" "xm,x"))]
16379 "TARGET_XOP && !(MEM_P (operands[2]) && MEM_P (operands[3]))"
16380 "vpperm\t{%3, %2, %1, %0|%0, %1, %2, %3}"
16381 [(set_attr "type" "sse4arg")
16382 (set_attr "mode" "TI")])
16383
16384 (define_insn "xop_pperm_pack_v4si_v8hi"
16385 [(set (match_operand:V8HI 0 "register_operand" "=x,x")
16386 (vec_concat:V8HI
16387 (truncate:V4HI
16388 (match_operand:V4SI 1 "register_operand" "x,x"))
16389 (truncate:V4HI
16390 (match_operand:V4SI 2 "nonimmediate_operand" "x,m"))))
16391 (use (match_operand:V16QI 3 "nonimmediate_operand" "xm,x"))]
16392 "TARGET_XOP && !(MEM_P (operands[2]) && MEM_P (operands[3]))"
16393 "vpperm\t{%3, %2, %1, %0|%0, %1, %2, %3}"
16394 [(set_attr "type" "sse4arg")
16395 (set_attr "mode" "TI")])
16396
16397 (define_insn "xop_pperm_pack_v8hi_v16qi"
16398 [(set (match_operand:V16QI 0 "register_operand" "=x,x")
16399 (vec_concat:V16QI
16400 (truncate:V8QI
16401 (match_operand:V8HI 1 "register_operand" "x,x"))
16402 (truncate:V8QI
16403 (match_operand:V8HI 2 "nonimmediate_operand" "x,m"))))
16404 (use (match_operand:V16QI 3 "nonimmediate_operand" "xm,x"))]
16405 "TARGET_XOP && !(MEM_P (operands[2]) && MEM_P (operands[3]))"
16406 "vpperm\t{%3, %2, %1, %0|%0, %1, %2, %3}"
16407 [(set_attr "type" "sse4arg")
16408 (set_attr "mode" "TI")])
16409
16410 ;; XOP packed rotate instructions
16411 (define_expand "rotl<mode>3"
16412 [(set (match_operand:VI_128 0 "register_operand")
16413 (rotate:VI_128
16414 (match_operand:VI_128 1 "nonimmediate_operand")
16415 (match_operand:SI 2 "general_operand")))]
16416 "TARGET_XOP"
16417 {
16418 /* If we were given a scalar, convert it to parallel */
16419 if (! const_0_to_<sserotatemax>_operand (operands[2], SImode))
16420 {
16421 rtvec vs = rtvec_alloc (<ssescalarnum>);
16422 rtx par = gen_rtx_PARALLEL (<MODE>mode, vs);
16423 rtx reg = gen_reg_rtx (<MODE>mode);
16424 rtx op2 = operands[2];
16425 int i;
16426
16427 if (GET_MODE (op2) != <ssescalarmode>mode)
16428 {
16429 op2 = gen_reg_rtx (<ssescalarmode>mode);
16430 convert_move (op2, operands[2], false);
16431 }
16432
16433 for (i = 0; i < <ssescalarnum>; i++)
16434 RTVEC_ELT (vs, i) = op2;
16435
16436 emit_insn (gen_vec_init<mode> (reg, par));
16437 emit_insn (gen_xop_vrotl<mode>3 (operands[0], operands[1], reg));
16438 DONE;
16439 }
16440 })
16441
16442 (define_expand "rotr<mode>3"
16443 [(set (match_operand:VI_128 0 "register_operand")
16444 (rotatert:VI_128
16445 (match_operand:VI_128 1 "nonimmediate_operand")
16446 (match_operand:SI 2 "general_operand")))]
16447 "TARGET_XOP"
16448 {
16449 /* If we were given a scalar, convert it to parallel */
16450 if (! const_0_to_<sserotatemax>_operand (operands[2], SImode))
16451 {
16452 rtvec vs = rtvec_alloc (<ssescalarnum>);
16453 rtx par = gen_rtx_PARALLEL (<MODE>mode, vs);
16454 rtx neg = gen_reg_rtx (<MODE>mode);
16455 rtx reg = gen_reg_rtx (<MODE>mode);
16456 rtx op2 = operands[2];
16457 int i;
16458
16459 if (GET_MODE (op2) != <ssescalarmode>mode)
16460 {
16461 op2 = gen_reg_rtx (<ssescalarmode>mode);
16462 convert_move (op2, operands[2], false);
16463 }
16464
16465 for (i = 0; i < <ssescalarnum>; i++)
16466 RTVEC_ELT (vs, i) = op2;
16467
16468 emit_insn (gen_vec_init<mode> (reg, par));
16469 emit_insn (gen_neg<mode>2 (neg, reg));
16470 emit_insn (gen_xop_vrotl<mode>3 (operands[0], operands[1], neg));
16471 DONE;
16472 }
16473 })
16474
16475 (define_insn "xop_rotl<mode>3"
16476 [(set (match_operand:VI_128 0 "register_operand" "=x")
16477 (rotate:VI_128
16478 (match_operand:VI_128 1 "nonimmediate_operand" "xm")
16479 (match_operand:SI 2 "const_0_to_<sserotatemax>_operand" "n")))]
16480 "TARGET_XOP"
16481 "vprot<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
16482 [(set_attr "type" "sseishft")
16483 (set_attr "length_immediate" "1")
16484 (set_attr "mode" "TI")])
16485
16486 (define_insn "xop_rotr<mode>3"
16487 [(set (match_operand:VI_128 0 "register_operand" "=x")
16488 (rotatert:VI_128
16489 (match_operand:VI_128 1 "nonimmediate_operand" "xm")
16490 (match_operand:SI 2 "const_0_to_<sserotatemax>_operand" "n")))]
16491 "TARGET_XOP"
16492 {
16493 operands[3]
16494 = GEN_INT (GET_MODE_BITSIZE (<ssescalarmode>mode) - INTVAL (operands[2]));
16495 return \"vprot<ssemodesuffix>\t{%3, %1, %0|%0, %1, %3}\";
16496 }
16497 [(set_attr "type" "sseishft")
16498 (set_attr "length_immediate" "1")
16499 (set_attr "mode" "TI")])
16500
16501 (define_expand "vrotr<mode>3"
16502 [(match_operand:VI_128 0 "register_operand")
16503 (match_operand:VI_128 1 "register_operand")
16504 (match_operand:VI_128 2 "register_operand")]
16505 "TARGET_XOP"
16506 {
16507 rtx reg = gen_reg_rtx (<MODE>mode);
16508 emit_insn (gen_neg<mode>2 (reg, operands[2]));
16509 emit_insn (gen_xop_vrotl<mode>3 (operands[0], operands[1], reg));
16510 DONE;
16511 })
16512
16513 (define_expand "vrotl<mode>3"
16514 [(match_operand:VI_128 0 "register_operand")
16515 (match_operand:VI_128 1 "register_operand")
16516 (match_operand:VI_128 2 "register_operand")]
16517 "TARGET_XOP"
16518 {
16519 emit_insn (gen_xop_vrotl<mode>3 (operands[0], operands[1], operands[2]));
16520 DONE;
16521 })
16522
16523 (define_insn "xop_vrotl<mode>3"
16524 [(set (match_operand:VI_128 0 "register_operand" "=x,x")
16525 (if_then_else:VI_128
16526 (ge:VI_128
16527 (match_operand:VI_128 2 "nonimmediate_operand" "x,m")
16528 (const_int 0))
16529 (rotate:VI_128
16530 (match_operand:VI_128 1 "nonimmediate_operand" "xm,x")
16531 (match_dup 2))
16532 (rotatert:VI_128
16533 (match_dup 1)
16534 (neg:VI_128 (match_dup 2)))))]
16535 "TARGET_XOP && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
16536 "vprot<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
16537 [(set_attr "type" "sseishft")
16538 (set_attr "prefix_data16" "0")
16539 (set_attr "prefix_extra" "2")
16540 (set_attr "mode" "TI")])
16541
16542 ;; XOP packed shift instructions.
16543 (define_expand "vlshr<mode>3"
16544 [(set (match_operand:VI12_128 0 "register_operand")
16545 (lshiftrt:VI12_128
16546 (match_operand:VI12_128 1 "register_operand")
16547 (match_operand:VI12_128 2 "nonimmediate_operand")))]
16548 "TARGET_XOP"
16549 {
16550 rtx neg = gen_reg_rtx (<MODE>mode);
16551 emit_insn (gen_neg<mode>2 (neg, operands[2]));
16552 emit_insn (gen_xop_shl<mode>3 (operands[0], operands[1], neg));
16553 DONE;
16554 })
16555
16556 (define_expand "vlshr<mode>3"
16557 [(set (match_operand:VI48_128 0 "register_operand")
16558 (lshiftrt:VI48_128
16559 (match_operand:VI48_128 1 "register_operand")
16560 (match_operand:VI48_128 2 "nonimmediate_operand")))]
16561 "TARGET_AVX2 || TARGET_XOP"
16562 {
16563 if (!TARGET_AVX2)
16564 {
16565 rtx neg = gen_reg_rtx (<MODE>mode);
16566 emit_insn (gen_neg<mode>2 (neg, operands[2]));
16567 emit_insn (gen_xop_shl<mode>3 (operands[0], operands[1], neg));
16568 DONE;
16569 }
16570 })
16571
16572 (define_expand "vlshr<mode>3"
16573 [(set (match_operand:VI48_512 0 "register_operand")
16574 (lshiftrt:VI48_512
16575 (match_operand:VI48_512 1 "register_operand")
16576 (match_operand:VI48_512 2 "nonimmediate_operand")))]
16577 "TARGET_AVX512F")
16578
16579 (define_expand "vlshr<mode>3"
16580 [(set (match_operand:VI48_256 0 "register_operand")
16581 (lshiftrt:VI48_256
16582 (match_operand:VI48_256 1 "register_operand")
16583 (match_operand:VI48_256 2 "nonimmediate_operand")))]
16584 "TARGET_AVX2")
16585
16586 (define_expand "vashrv8hi3<mask_name>"
16587 [(set (match_operand:V8HI 0 "register_operand")
16588 (ashiftrt:V8HI
16589 (match_operand:V8HI 1 "register_operand")
16590 (match_operand:V8HI 2 "nonimmediate_operand")))]
16591 "TARGET_XOP || (TARGET_AVX512BW && TARGET_AVX512VL)"
16592 {
16593 if (TARGET_XOP)
16594 {
16595 rtx neg = gen_reg_rtx (V8HImode);
16596 emit_insn (gen_negv8hi2 (neg, operands[2]));
16597 emit_insn (gen_xop_shav8hi3 (operands[0], operands[1], neg));
16598 DONE;
16599 }
16600 })
16601
16602 (define_expand "vashrv16qi3"
16603 [(set (match_operand:V16QI 0 "register_operand")
16604 (ashiftrt:V16QI
16605 (match_operand:V16QI 1 "register_operand")
16606 (match_operand:V16QI 2 "nonimmediate_operand")))]
16607 "TARGET_XOP"
16608 {
16609 rtx neg = gen_reg_rtx (V16QImode);
16610 emit_insn (gen_negv16qi2 (neg, operands[2]));
16611 emit_insn (gen_xop_shav16qi3 (operands[0], operands[1], neg));
16612 DONE;
16613 })
16614
16615 (define_expand "vashrv2di3<mask_name>"
16616 [(set (match_operand:V2DI 0 "register_operand")
16617 (ashiftrt:V2DI
16618 (match_operand:V2DI 1 "register_operand")
16619 (match_operand:V2DI 2 "nonimmediate_operand")))]
16620 "TARGET_XOP || TARGET_AVX512VL"
16621 {
16622 if (TARGET_XOP)
16623 {
16624 rtx neg = gen_reg_rtx (V2DImode);
16625 emit_insn (gen_negv2di2 (neg, operands[2]));
16626 emit_insn (gen_xop_shav2di3 (operands[0], operands[1], neg));
16627 DONE;
16628 }
16629 })
16630
16631 (define_expand "vashrv4si3"
16632 [(set (match_operand:V4SI 0 "register_operand")
16633 (ashiftrt:V4SI (match_operand:V4SI 1 "register_operand")
16634 (match_operand:V4SI 2 "nonimmediate_operand")))]
16635 "TARGET_AVX2 || TARGET_XOP"
16636 {
16637 if (!TARGET_AVX2)
16638 {
16639 rtx neg = gen_reg_rtx (V4SImode);
16640 emit_insn (gen_negv4si2 (neg, operands[2]));
16641 emit_insn (gen_xop_shav4si3 (operands[0], operands[1], neg));
16642 DONE;
16643 }
16644 })
16645
16646 (define_expand "vashrv16si3"
16647 [(set (match_operand:V16SI 0 "register_operand")
16648 (ashiftrt:V16SI (match_operand:V16SI 1 "register_operand")
16649 (match_operand:V16SI 2 "nonimmediate_operand")))]
16650 "TARGET_AVX512F")
16651
16652 (define_expand "vashrv8si3"
16653 [(set (match_operand:V8SI 0 "register_operand")
16654 (ashiftrt:V8SI (match_operand:V8SI 1 "register_operand")
16655 (match_operand:V8SI 2 "nonimmediate_operand")))]
16656 "TARGET_AVX2")
16657
16658 (define_expand "vashl<mode>3"
16659 [(set (match_operand:VI12_128 0 "register_operand")
16660 (ashift:VI12_128
16661 (match_operand:VI12_128 1 "register_operand")
16662 (match_operand:VI12_128 2 "nonimmediate_operand")))]
16663 "TARGET_XOP"
16664 {
16665 emit_insn (gen_xop_sha<mode>3 (operands[0], operands[1], operands[2]));
16666 DONE;
16667 })
16668
16669 (define_expand "vashl<mode>3"
16670 [(set (match_operand:VI48_128 0 "register_operand")
16671 (ashift:VI48_128
16672 (match_operand:VI48_128 1 "register_operand")
16673 (match_operand:VI48_128 2 "nonimmediate_operand")))]
16674 "TARGET_AVX2 || TARGET_XOP"
16675 {
16676 if (!TARGET_AVX2)
16677 {
16678 operands[2] = force_reg (<MODE>mode, operands[2]);
16679 emit_insn (gen_xop_sha<mode>3 (operands[0], operands[1], operands[2]));
16680 DONE;
16681 }
16682 })
16683
16684 (define_expand "vashl<mode>3"
16685 [(set (match_operand:VI48_512 0 "register_operand")
16686 (ashift:VI48_512
16687 (match_operand:VI48_512 1 "register_operand")
16688 (match_operand:VI48_512 2 "nonimmediate_operand")))]
16689 "TARGET_AVX512F")
16690
16691 (define_expand "vashl<mode>3"
16692 [(set (match_operand:VI48_256 0 "register_operand")
16693 (ashift:VI48_256
16694 (match_operand:VI48_256 1 "register_operand")
16695 (match_operand:VI48_256 2 "nonimmediate_operand")))]
16696 "TARGET_AVX2")
16697
16698 (define_insn "xop_sha<mode>3"
16699 [(set (match_operand:VI_128 0 "register_operand" "=x,x")
16700 (if_then_else:VI_128
16701 (ge:VI_128
16702 (match_operand:VI_128 2 "nonimmediate_operand" "x,m")
16703 (const_int 0))
16704 (ashift:VI_128
16705 (match_operand:VI_128 1 "nonimmediate_operand" "xm,x")
16706 (match_dup 2))
16707 (ashiftrt:VI_128
16708 (match_dup 1)
16709 (neg:VI_128 (match_dup 2)))))]
16710 "TARGET_XOP && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
16711 "vpsha<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
16712 [(set_attr "type" "sseishft")
16713 (set_attr "prefix_data16" "0")
16714 (set_attr "prefix_extra" "2")
16715 (set_attr "mode" "TI")])
16716
16717 (define_insn "xop_shl<mode>3"
16718 [(set (match_operand:VI_128 0 "register_operand" "=x,x")
16719 (if_then_else:VI_128
16720 (ge:VI_128
16721 (match_operand:VI_128 2 "nonimmediate_operand" "x,m")
16722 (const_int 0))
16723 (ashift:VI_128
16724 (match_operand:VI_128 1 "nonimmediate_operand" "xm,x")
16725 (match_dup 2))
16726 (lshiftrt:VI_128
16727 (match_dup 1)
16728 (neg:VI_128 (match_dup 2)))))]
16729 "TARGET_XOP && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
16730 "vpshl<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
16731 [(set_attr "type" "sseishft")
16732 (set_attr "prefix_data16" "0")
16733 (set_attr "prefix_extra" "2")
16734 (set_attr "mode" "TI")])
16735
16736 (define_expand "<shift_insn><mode>3"
16737 [(set (match_operand:VI1_AVX512 0 "register_operand")
16738 (any_shift:VI1_AVX512
16739 (match_operand:VI1_AVX512 1 "register_operand")
16740 (match_operand:SI 2 "nonmemory_operand")))]
16741 "TARGET_SSE2"
16742 {
16743 if (TARGET_XOP && <MODE>mode == V16QImode)
16744 {
16745 bool negate = false;
16746 rtx (*gen) (rtx, rtx, rtx);
16747 rtx tmp, par;
16748 int i;
16749
16750 if (<CODE> != ASHIFT)
16751 {
16752 if (CONST_INT_P (operands[2]))
16753 operands[2] = GEN_INT (-INTVAL (operands[2]));
16754 else
16755 negate = true;
16756 }
16757 par = gen_rtx_PARALLEL (V16QImode, rtvec_alloc (16));
16758 for (i = 0; i < 16; i++)
16759 XVECEXP (par, 0, i) = operands[2];
16760
16761 tmp = gen_reg_rtx (V16QImode);
16762 emit_insn (gen_vec_initv16qi (tmp, par));
16763
16764 if (negate)
16765 emit_insn (gen_negv16qi2 (tmp, tmp));
16766
16767 gen = (<CODE> == LSHIFTRT ? gen_xop_shlv16qi3 : gen_xop_shav16qi3);
16768 emit_insn (gen (operands[0], operands[1], tmp));
16769 }
16770 else
16771 ix86_expand_vecop_qihi (<CODE>, operands[0], operands[1], operands[2]);
16772 DONE;
16773 })
16774
16775 (define_expand "ashrv2di3"
16776 [(set (match_operand:V2DI 0 "register_operand")
16777 (ashiftrt:V2DI
16778 (match_operand:V2DI 1 "register_operand")
16779 (match_operand:DI 2 "nonmemory_operand")))]
16780 "TARGET_XOP || TARGET_AVX512VL"
16781 {
16782 if (!TARGET_AVX512VL)
16783 {
16784 rtx reg = gen_reg_rtx (V2DImode);
16785 rtx par;
16786 bool negate = false;
16787 int i;
16788
16789 if (CONST_INT_P (operands[2]))
16790 operands[2] = GEN_INT (-INTVAL (operands[2]));
16791 else
16792 negate = true;
16793
16794 par = gen_rtx_PARALLEL (V2DImode, rtvec_alloc (2));
16795 for (i = 0; i < 2; i++)
16796 XVECEXP (par, 0, i) = operands[2];
16797
16798 emit_insn (gen_vec_initv2di (reg, par));
16799
16800 if (negate)
16801 emit_insn (gen_negv2di2 (reg, reg));
16802
16803 emit_insn (gen_xop_shav2di3 (operands[0], operands[1], reg));
16804 DONE;
16805 }
16806 })
16807
16808 ;; XOP FRCZ support
16809 (define_insn "xop_frcz<mode>2"
16810 [(set (match_operand:FMAMODE 0 "register_operand" "=x")
16811 (unspec:FMAMODE
16812 [(match_operand:FMAMODE 1 "nonimmediate_operand" "xm")]
16813 UNSPEC_FRCZ))]
16814 "TARGET_XOP"
16815 "vfrcz<ssemodesuffix>\t{%1, %0|%0, %1}"
16816 [(set_attr "type" "ssecvt1")
16817 (set_attr "mode" "<MODE>")])
16818
16819 (define_expand "xop_vmfrcz<mode>2"
16820 [(set (match_operand:VF_128 0 "register_operand")
16821 (vec_merge:VF_128
16822 (unspec:VF_128
16823 [(match_operand:VF_128 1 "nonimmediate_operand")]
16824 UNSPEC_FRCZ)
16825 (match_dup 2)
16826 (const_int 1)))]
16827 "TARGET_XOP"
16828 "operands[2] = CONST0_RTX (<MODE>mode);")
16829
16830 (define_insn "*xop_vmfrcz<mode>2"
16831 [(set (match_operand:VF_128 0 "register_operand" "=x")
16832 (vec_merge:VF_128
16833 (unspec:VF_128
16834 [(match_operand:VF_128 1 "nonimmediate_operand" "xm")]
16835 UNSPEC_FRCZ)
16836 (match_operand:VF_128 2 "const0_operand")
16837 (const_int 1)))]
16838 "TARGET_XOP"
16839 "vfrcz<ssescalarmodesuffix>\t{%1, %0|%0, %<iptr>1}"
16840 [(set_attr "type" "ssecvt1")
16841 (set_attr "mode" "<MODE>")])
16842
16843 (define_insn "xop_maskcmp<mode>3"
16844 [(set (match_operand:VI_128 0 "register_operand" "=x")
16845 (match_operator:VI_128 1 "ix86_comparison_int_operator"
16846 [(match_operand:VI_128 2 "register_operand" "x")
16847 (match_operand:VI_128 3 "nonimmediate_operand" "xm")]))]
16848 "TARGET_XOP"
16849 "vpcom%Y1<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}"
16850 [(set_attr "type" "sse4arg")
16851 (set_attr "prefix_data16" "0")
16852 (set_attr "prefix_rep" "0")
16853 (set_attr "prefix_extra" "2")
16854 (set_attr "length_immediate" "1")
16855 (set_attr "mode" "TI")])
16856
16857 (define_insn "xop_maskcmp_uns<mode>3"
16858 [(set (match_operand:VI_128 0 "register_operand" "=x")
16859 (match_operator:VI_128 1 "ix86_comparison_uns_operator"
16860 [(match_operand:VI_128 2 "register_operand" "x")
16861 (match_operand:VI_128 3 "nonimmediate_operand" "xm")]))]
16862 "TARGET_XOP"
16863 "vpcom%Y1u<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}"
16864 [(set_attr "type" "ssecmp")
16865 (set_attr "prefix_data16" "0")
16866 (set_attr "prefix_rep" "0")
16867 (set_attr "prefix_extra" "2")
16868 (set_attr "length_immediate" "1")
16869 (set_attr "mode" "TI")])
16870
16871 ;; Version of pcom*u* that is called from the intrinsics that allows pcomequ*
16872 ;; and pcomneu* not to be converted to the signed ones in case somebody needs
16873 ;; the exact instruction generated for the intrinsic.
16874 (define_insn "xop_maskcmp_uns2<mode>3"
16875 [(set (match_operand:VI_128 0 "register_operand" "=x")
16876 (unspec:VI_128
16877 [(match_operator:VI_128 1 "ix86_comparison_uns_operator"
16878 [(match_operand:VI_128 2 "register_operand" "x")
16879 (match_operand:VI_128 3 "nonimmediate_operand" "xm")])]
16880 UNSPEC_XOP_UNSIGNED_CMP))]
16881 "TARGET_XOP"
16882 "vpcom%Y1u<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}"
16883 [(set_attr "type" "ssecmp")
16884 (set_attr "prefix_data16" "0")
16885 (set_attr "prefix_extra" "2")
16886 (set_attr "length_immediate" "1")
16887 (set_attr "mode" "TI")])
16888
16889 ;; Pcomtrue and pcomfalse support. These are useless instructions, but are
16890 ;; being added here to be complete.
16891 (define_insn "xop_pcom_tf<mode>3"
16892 [(set (match_operand:VI_128 0 "register_operand" "=x")
16893 (unspec:VI_128
16894 [(match_operand:VI_128 1 "register_operand" "x")
16895 (match_operand:VI_128 2 "nonimmediate_operand" "xm")
16896 (match_operand:SI 3 "const_int_operand" "n")]
16897 UNSPEC_XOP_TRUEFALSE))]
16898 "TARGET_XOP"
16899 {
16900 return ((INTVAL (operands[3]) != 0)
16901 ? "vpcomtrue<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
16902 : "vpcomfalse<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}");
16903 }
16904 [(set_attr "type" "ssecmp")
16905 (set_attr "prefix_data16" "0")
16906 (set_attr "prefix_extra" "2")
16907 (set_attr "length_immediate" "1")
16908 (set_attr "mode" "TI")])
16909
16910 (define_insn "xop_vpermil2<mode>3"
16911 [(set (match_operand:VF_128_256 0 "register_operand" "=x")
16912 (unspec:VF_128_256
16913 [(match_operand:VF_128_256 1 "register_operand" "x")
16914 (match_operand:VF_128_256 2 "nonimmediate_operand" "%x")
16915 (match_operand:<sseintvecmode> 3 "nonimmediate_operand" "xm")
16916 (match_operand:SI 4 "const_0_to_3_operand" "n")]
16917 UNSPEC_VPERMIL2))]
16918 "TARGET_XOP"
16919 "vpermil2<ssemodesuffix>\t{%4, %3, %2, %1, %0|%0, %1, %2, %3, %4}"
16920 [(set_attr "type" "sse4arg")
16921 (set_attr "length_immediate" "1")
16922 (set_attr "mode" "<MODE>")])
16923
16924 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
16925
16926 (define_insn "aesenc"
16927 [(set (match_operand:V2DI 0 "register_operand" "=x,x")
16928 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0,x")
16929 (match_operand:V2DI 2 "nonimmediate_operand" "xm,xm")]
16930 UNSPEC_AESENC))]
16931 "TARGET_AES"
16932 "@
16933 aesenc\t{%2, %0|%0, %2}
16934 vaesenc\t{%2, %1, %0|%0, %1, %2}"
16935 [(set_attr "isa" "noavx,avx")
16936 (set_attr "type" "sselog1")
16937 (set_attr "prefix_extra" "1")
16938 (set_attr "prefix" "orig,vex")
16939 (set_attr "btver2_decode" "double,double")
16940 (set_attr "mode" "TI")])
16941
16942 (define_insn "aesenclast"
16943 [(set (match_operand:V2DI 0 "register_operand" "=x,x")
16944 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0,x")
16945 (match_operand:V2DI 2 "nonimmediate_operand" "xm,xm")]
16946 UNSPEC_AESENCLAST))]
16947 "TARGET_AES"
16948 "@
16949 aesenclast\t{%2, %0|%0, %2}
16950 vaesenclast\t{%2, %1, %0|%0, %1, %2}"
16951 [(set_attr "isa" "noavx,avx")
16952 (set_attr "type" "sselog1")
16953 (set_attr "prefix_extra" "1")
16954 (set_attr "prefix" "orig,vex")
16955 (set_attr "btver2_decode" "double,double")
16956 (set_attr "mode" "TI")])
16957
16958 (define_insn "aesdec"
16959 [(set (match_operand:V2DI 0 "register_operand" "=x,x")
16960 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0,x")
16961 (match_operand:V2DI 2 "nonimmediate_operand" "xm,xm")]
16962 UNSPEC_AESDEC))]
16963 "TARGET_AES"
16964 "@
16965 aesdec\t{%2, %0|%0, %2}
16966 vaesdec\t{%2, %1, %0|%0, %1, %2}"
16967 [(set_attr "isa" "noavx,avx")
16968 (set_attr "type" "sselog1")
16969 (set_attr "prefix_extra" "1")
16970 (set_attr "prefix" "orig,vex")
16971 (set_attr "btver2_decode" "double,double")
16972 (set_attr "mode" "TI")])
16973
16974 (define_insn "aesdeclast"
16975 [(set (match_operand:V2DI 0 "register_operand" "=x,x")
16976 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0,x")
16977 (match_operand:V2DI 2 "nonimmediate_operand" "xm,xm")]
16978 UNSPEC_AESDECLAST))]
16979 "TARGET_AES"
16980 "@
16981 aesdeclast\t{%2, %0|%0, %2}
16982 vaesdeclast\t{%2, %1, %0|%0, %1, %2}"
16983 [(set_attr "isa" "noavx,avx")
16984 (set_attr "type" "sselog1")
16985 (set_attr "prefix_extra" "1")
16986 (set_attr "prefix" "orig,vex")
16987 (set_attr "btver2_decode" "double,double")
16988 (set_attr "mode" "TI")])
16989
16990 (define_insn "aesimc"
16991 [(set (match_operand:V2DI 0 "register_operand" "=x")
16992 (unspec:V2DI [(match_operand:V2DI 1 "nonimmediate_operand" "xm")]
16993 UNSPEC_AESIMC))]
16994 "TARGET_AES"
16995 "%vaesimc\t{%1, %0|%0, %1}"
16996 [(set_attr "type" "sselog1")
16997 (set_attr "prefix_extra" "1")
16998 (set_attr "prefix" "maybe_vex")
16999 (set_attr "mode" "TI")])
17000
17001 (define_insn "aeskeygenassist"
17002 [(set (match_operand:V2DI 0 "register_operand" "=x")
17003 (unspec:V2DI [(match_operand:V2DI 1 "nonimmediate_operand" "xm")
17004 (match_operand:SI 2 "const_0_to_255_operand" "n")]
17005 UNSPEC_AESKEYGENASSIST))]
17006 "TARGET_AES"
17007 "%vaeskeygenassist\t{%2, %1, %0|%0, %1, %2}"
17008 [(set_attr "type" "sselog1")
17009 (set_attr "prefix_extra" "1")
17010 (set_attr "length_immediate" "1")
17011 (set_attr "prefix" "maybe_vex")
17012 (set_attr "mode" "TI")])
17013
17014 (define_insn "pclmulqdq"
17015 [(set (match_operand:V2DI 0 "register_operand" "=x,x")
17016 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0,x")
17017 (match_operand:V2DI 2 "nonimmediate_operand" "xm,xm")
17018 (match_operand:SI 3 "const_0_to_255_operand" "n,n")]
17019 UNSPEC_PCLMUL))]
17020 "TARGET_PCLMUL"
17021 "@
17022 pclmulqdq\t{%3, %2, %0|%0, %2, %3}
17023 vpclmulqdq\t{%3, %2, %1, %0|%0, %1, %2, %3}"
17024 [(set_attr "isa" "noavx,avx")
17025 (set_attr "type" "sselog1")
17026 (set_attr "prefix_extra" "1")
17027 (set_attr "length_immediate" "1")
17028 (set_attr "prefix" "orig,vex")
17029 (set_attr "mode" "TI")])
17030
17031 (define_expand "avx_vzeroall"
17032 [(match_par_dup 0 [(const_int 0)])]
17033 "TARGET_AVX"
17034 {
17035 int nregs = TARGET_64BIT ? 16 : 8;
17036 int regno;
17037
17038 operands[0] = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (nregs + 1));
17039
17040 XVECEXP (operands[0], 0, 0)
17041 = gen_rtx_UNSPEC_VOLATILE (VOIDmode, gen_rtvec (1, const0_rtx),
17042 UNSPECV_VZEROALL);
17043
17044 for (regno = 0; regno < nregs; regno++)
17045 XVECEXP (operands[0], 0, regno + 1)
17046 = gen_rtx_SET (gen_rtx_REG (V8SImode, SSE_REGNO (regno)),
17047 CONST0_RTX (V8SImode));
17048 })
17049
17050 (define_insn "*avx_vzeroall"
17051 [(match_parallel 0 "vzeroall_operation"
17052 [(unspec_volatile [(const_int 0)] UNSPECV_VZEROALL)])]
17053 "TARGET_AVX"
17054 "vzeroall"
17055 [(set_attr "type" "sse")
17056 (set_attr "modrm" "0")
17057 (set_attr "memory" "none")
17058 (set_attr "prefix" "vex")
17059 (set_attr "btver2_decode" "vector")
17060 (set_attr "mode" "OI")])
17061
17062 ;; Clear the upper 128bits of AVX registers, equivalent to a NOP
17063 ;; if the upper 128bits are unused.
17064 (define_insn "avx_vzeroupper"
17065 [(unspec_volatile [(const_int 0)] UNSPECV_VZEROUPPER)]
17066 "TARGET_AVX"
17067 "vzeroupper"
17068 [(set_attr "type" "sse")
17069 (set_attr "modrm" "0")
17070 (set_attr "memory" "none")
17071 (set_attr "prefix" "vex")
17072 (set_attr "btver2_decode" "vector")
17073 (set_attr "mode" "OI")])
17074
17075 (define_insn "avx2_pbroadcast<mode>"
17076 [(set (match_operand:VI 0 "register_operand" "=x")
17077 (vec_duplicate:VI
17078 (vec_select:<ssescalarmode>
17079 (match_operand:<ssexmmmode> 1 "nonimmediate_operand" "xm")
17080 (parallel [(const_int 0)]))))]
17081 "TARGET_AVX2"
17082 "vpbroadcast<ssemodesuffix>\t{%1, %0|%0, %<iptr>1}"
17083 [(set_attr "type" "ssemov")
17084 (set_attr "prefix_extra" "1")
17085 (set_attr "prefix" "vex")
17086 (set_attr "mode" "<sseinsnmode>")])
17087
17088 (define_insn "avx2_pbroadcast<mode>_1"
17089 [(set (match_operand:VI_256 0 "register_operand" "=x,x")
17090 (vec_duplicate:VI_256
17091 (vec_select:<ssescalarmode>
17092 (match_operand:VI_256 1 "nonimmediate_operand" "m,x")
17093 (parallel [(const_int 0)]))))]
17094 "TARGET_AVX2"
17095 "@
17096 vpbroadcast<ssemodesuffix>\t{%1, %0|%0, %<iptr>1}
17097 vpbroadcast<ssemodesuffix>\t{%x1, %0|%0, %x1}"
17098 [(set_attr "type" "ssemov")
17099 (set_attr "prefix_extra" "1")
17100 (set_attr "prefix" "vex")
17101 (set_attr "mode" "<sseinsnmode>")])
17102
17103 (define_insn "<avx2_avx512>_permvar<mode><mask_name>"
17104 [(set (match_operand:VI48F_256_512 0 "register_operand" "=v")
17105 (unspec:VI48F_256_512
17106 [(match_operand:VI48F_256_512 1 "nonimmediate_operand" "vm")
17107 (match_operand:<sseintvecmode> 2 "register_operand" "v")]
17108 UNSPEC_VPERMVAR))]
17109 "TARGET_AVX2 && <mask_mode512bit_condition>"
17110 "vperm<ssemodesuffix>\t{%1, %2, %0<mask_operand3>|%0<mask_operand3>, %2, %1}"
17111 [(set_attr "type" "sselog")
17112 (set_attr "prefix" "<mask_prefix2>")
17113 (set_attr "mode" "<sseinsnmode>")])
17114
17115 (define_insn "<avx512>_permvar<mode><mask_name>"
17116 [(set (match_operand:VI1_AVX512VL 0 "register_operand" "=v")
17117 (unspec:VI1_AVX512VL
17118 [(match_operand:VI1_AVX512VL 1 "nonimmediate_operand" "vm")
17119 (match_operand:<sseintvecmode> 2 "register_operand" "v")]
17120 UNSPEC_VPERMVAR))]
17121 "TARGET_AVX512VBMI && <mask_mode512bit_condition>"
17122 "vperm<ssemodesuffix>\t{%1, %2, %0<mask_operand3>|%0<mask_operand3>, %2, %1}"
17123 [(set_attr "type" "sselog")
17124 (set_attr "prefix" "<mask_prefix2>")
17125 (set_attr "mode" "<sseinsnmode>")])
17126
17127 (define_insn "<avx512>_permvar<mode><mask_name>"
17128 [(set (match_operand:VI2_AVX512VL 0 "register_operand" "=v")
17129 (unspec:VI2_AVX512VL
17130 [(match_operand:VI2_AVX512VL 1 "nonimmediate_operand" "vm")
17131 (match_operand:<sseintvecmode> 2 "register_operand" "v")]
17132 UNSPEC_VPERMVAR))]
17133 "TARGET_AVX512BW && <mask_mode512bit_condition>"
17134 "vperm<ssemodesuffix>\t{%1, %2, %0<mask_operand3>|%0<mask_operand3>, %2, %1}"
17135 [(set_attr "type" "sselog")
17136 (set_attr "prefix" "<mask_prefix2>")
17137 (set_attr "mode" "<sseinsnmode>")])
17138
17139 (define_expand "<avx2_avx512>_perm<mode>"
17140 [(match_operand:VI8F_256_512 0 "register_operand")
17141 (match_operand:VI8F_256_512 1 "nonimmediate_operand")
17142 (match_operand:SI 2 "const_0_to_255_operand")]
17143 "TARGET_AVX2"
17144 {
17145 int mask = INTVAL (operands[2]);
17146 emit_insn (gen_<avx2_avx512>_perm<mode>_1 (operands[0], operands[1],
17147 GEN_INT ((mask >> 0) & 3),
17148 GEN_INT ((mask >> 2) & 3),
17149 GEN_INT ((mask >> 4) & 3),
17150 GEN_INT ((mask >> 6) & 3)));
17151 DONE;
17152 })
17153
17154 (define_expand "<avx512>_perm<mode>_mask"
17155 [(match_operand:VI8F_256_512 0 "register_operand")
17156 (match_operand:VI8F_256_512 1 "nonimmediate_operand")
17157 (match_operand:SI 2 "const_0_to_255_operand")
17158 (match_operand:VI8F_256_512 3 "vector_move_operand")
17159 (match_operand:<avx512fmaskmode> 4 "register_operand")]
17160 "TARGET_AVX512F"
17161 {
17162 int mask = INTVAL (operands[2]);
17163 emit_insn (gen_<avx2_avx512>_perm<mode>_1_mask (operands[0], operands[1],
17164 GEN_INT ((mask >> 0) & 3),
17165 GEN_INT ((mask >> 2) & 3),
17166 GEN_INT ((mask >> 4) & 3),
17167 GEN_INT ((mask >> 6) & 3),
17168 operands[3], operands[4]));
17169 DONE;
17170 })
17171
17172 (define_insn "<avx2_avx512>_perm<mode>_1<mask_name>"
17173 [(set (match_operand:VI8F_256_512 0 "register_operand" "=v")
17174 (vec_select:VI8F_256_512
17175 (match_operand:VI8F_256_512 1 "nonimmediate_operand" "vm")
17176 (parallel [(match_operand 2 "const_0_to_3_operand")
17177 (match_operand 3 "const_0_to_3_operand")
17178 (match_operand 4 "const_0_to_3_operand")
17179 (match_operand 5 "const_0_to_3_operand")])))]
17180 "TARGET_AVX2 && <mask_mode512bit_condition>"
17181 {
17182 int mask = 0;
17183 mask |= INTVAL (operands[2]) << 0;
17184 mask |= INTVAL (operands[3]) << 2;
17185 mask |= INTVAL (operands[4]) << 4;
17186 mask |= INTVAL (operands[5]) << 6;
17187 operands[2] = GEN_INT (mask);
17188 return "vperm<ssemodesuffix>\t{%2, %1, %0<mask_operand6>|%0<mask_operand6>, %1, %2}";
17189 }
17190 [(set_attr "type" "sselog")
17191 (set_attr "prefix" "<mask_prefix2>")
17192 (set_attr "mode" "<sseinsnmode>")])
17193
17194 (define_insn "avx2_permv2ti"
17195 [(set (match_operand:V4DI 0 "register_operand" "=x")
17196 (unspec:V4DI
17197 [(match_operand:V4DI 1 "register_operand" "x")
17198 (match_operand:V4DI 2 "nonimmediate_operand" "xm")
17199 (match_operand:SI 3 "const_0_to_255_operand" "n")]
17200 UNSPEC_VPERMTI))]
17201 "TARGET_AVX2"
17202 "vperm2i128\t{%3, %2, %1, %0|%0, %1, %2, %3}"
17203 [(set_attr "type" "sselog")
17204 (set_attr "prefix" "vex")
17205 (set_attr "mode" "OI")])
17206
17207 (define_insn "avx2_vec_dupv4df"
17208 [(set (match_operand:V4DF 0 "register_operand" "=x")
17209 (vec_duplicate:V4DF
17210 (vec_select:DF
17211 (match_operand:V2DF 1 "register_operand" "x")
17212 (parallel [(const_int 0)]))))]
17213 "TARGET_AVX2"
17214 "vbroadcastsd\t{%1, %0|%0, %1}"
17215 [(set_attr "type" "sselog1")
17216 (set_attr "prefix" "vex")
17217 (set_attr "mode" "V4DF")])
17218
17219 (define_insn "<avx512>_vec_dup<mode>_1"
17220 [(set (match_operand:VI_AVX512BW 0 "register_operand" "=v,v")
17221 (vec_duplicate:VI_AVX512BW
17222 (vec_select:VI_AVX512BW
17223 (match_operand:VI_AVX512BW 1 "nonimmediate_operand" "v,m")
17224 (parallel [(const_int 0)]))))]
17225 "TARGET_AVX512F"
17226 "@
17227 vpbroadcast<ssemodesuffix>\t{%x1, %0|%0, %x1}
17228 vpbroadcast<ssemodesuffix>\t{%x1, %0|%0, %<iptr>1}"
17229 [(set_attr "type" "ssemov")
17230 (set_attr "prefix" "evex")
17231 (set_attr "mode" "<sseinsnmode>")])
17232
17233 (define_insn "<avx512>_vec_dup<mode><mask_name>"
17234 [(set (match_operand:V48_AVX512VL 0 "register_operand" "=v")
17235 (vec_duplicate:V48_AVX512VL
17236 (vec_select:<ssescalarmode>
17237 (match_operand:<ssexmmmode> 1 "nonimmediate_operand" "vm")
17238 (parallel [(const_int 0)]))))]
17239 "TARGET_AVX512F"
17240 "v<sseintprefix>broadcast<bcstscalarsuff>\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
17241 [(set_attr "type" "ssemov")
17242 (set_attr "prefix" "evex")
17243 (set_attr "mode" "<sseinsnmode>")])
17244
17245 (define_insn "<avx512>_vec_dup<mode><mask_name>"
17246 [(set (match_operand:VI12_AVX512VL 0 "register_operand" "=v")
17247 (vec_duplicate:VI12_AVX512VL
17248 (vec_select:<ssescalarmode>
17249 (match_operand:<ssexmmmode> 1 "nonimmediate_operand" "vm")
17250 (parallel [(const_int 0)]))))]
17251 "TARGET_AVX512BW"
17252 "vpbroadcast<bcstscalarsuff>\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
17253 [(set_attr "type" "ssemov")
17254 (set_attr "prefix" "evex")
17255 (set_attr "mode" "<sseinsnmode>")])
17256
17257 (define_insn "<mask_codefor>avx512f_broadcast<mode><mask_name>"
17258 [(set (match_operand:V16FI 0 "register_operand" "=v,v")
17259 (vec_duplicate:V16FI
17260 (match_operand:<ssexmmmode> 1 "nonimmediate_operand" "v,m")))]
17261 "TARGET_AVX512F"
17262 "@
17263 vshuf<shuffletype>32x4\t{$0x0, %g1, %g1, %0<mask_operand2>|%0<mask_operand2>, %g1, %g1, 0x0}
17264 vbroadcast<shuffletype>32x4\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
17265 [(set_attr "type" "ssemov")
17266 (set_attr "prefix" "evex")
17267 (set_attr "mode" "<sseinsnmode>")])
17268
17269 (define_insn "<mask_codefor>avx512f_broadcast<mode><mask_name>"
17270 [(set (match_operand:V8FI 0 "register_operand" "=v,v")
17271 (vec_duplicate:V8FI
17272 (match_operand:<ssehalfvecmode> 1 "nonimmediate_operand" "v,m")))]
17273 "TARGET_AVX512F"
17274 "@
17275 vshuf<shuffletype>64x2\t{$0x44, %g1, %g1, %0<mask_operand2>|%0<mask_operand2>, %g1, %g1, 0x44}
17276 vbroadcast<shuffletype>64x4\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
17277 [(set_attr "type" "ssemov")
17278 (set_attr "prefix" "evex")
17279 (set_attr "mode" "<sseinsnmode>")])
17280
17281 (define_insn "<mask_codefor><avx512>_vec_dup_gpr<mode><mask_name>"
17282 [(set (match_operand:VI12_AVX512VL 0 "register_operand" "=v,v")
17283 (vec_duplicate:VI12_AVX512VL
17284 (match_operand:<ssescalarmode> 1 "nonimmediate_operand" "vm,r")))]
17285 "TARGET_AVX512BW"
17286 "@
17287 vpbroadcast<bcstscalarsuff>\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}
17288 vpbroadcast<bcstscalarsuff>\t{%k1, %0<mask_operand2>|%0<mask_operand2>, %k1}"
17289 [(set_attr "type" "ssemov")
17290 (set_attr "prefix" "evex")
17291 (set_attr "mode" "<sseinsnmode>")])
17292
17293 (define_insn "<mask_codefor><avx512>_vec_dup_gpr<mode><mask_name>"
17294 [(set (match_operand:V48_AVX512VL 0 "register_operand" "=v,v")
17295 (vec_duplicate:V48_AVX512VL
17296 (match_operand:<ssescalarmode> 1 "nonimmediate_operand" "vm,r")))]
17297 "TARGET_AVX512F"
17298 "v<sseintprefix>broadcast<bcstscalarsuff>\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
17299 [(set_attr "type" "ssemov")
17300 (set_attr "prefix" "evex")
17301 (set_attr "mode" "<sseinsnmode>")
17302 (set (attr "enabled")
17303 (if_then_else (eq_attr "alternative" "1")
17304 (symbol_ref "GET_MODE_CLASS (<ssescalarmode>mode) == MODE_INT
17305 && (<ssescalarmode>mode != DImode || TARGET_64BIT)")
17306 (const_int 1)))])
17307
17308 (define_insn "vec_dupv4sf"
17309 [(set (match_operand:V4SF 0 "register_operand" "=x,x,x")
17310 (vec_duplicate:V4SF
17311 (match_operand:SF 1 "nonimmediate_operand" "x,m,0")))]
17312 "TARGET_SSE"
17313 "@
17314 vshufps\t{$0, %1, %1, %0|%0, %1, %1, 0}
17315 vbroadcastss\t{%1, %0|%0, %1}
17316 shufps\t{$0, %0, %0|%0, %0, 0}"
17317 [(set_attr "isa" "avx,avx,noavx")
17318 (set_attr "type" "sseshuf1,ssemov,sseshuf1")
17319 (set_attr "length_immediate" "1,0,1")
17320 (set_attr "prefix_extra" "0,1,*")
17321 (set_attr "prefix" "vex,vex,orig")
17322 (set_attr "mode" "V4SF")])
17323
17324 (define_insn "*vec_dupv4si"
17325 [(set (match_operand:V4SI 0 "register_operand" "=x,x,x")
17326 (vec_duplicate:V4SI
17327 (match_operand:SI 1 "nonimmediate_operand" " x,m,0")))]
17328 "TARGET_SSE"
17329 "@
17330 %vpshufd\t{$0, %1, %0|%0, %1, 0}
17331 vbroadcastss\t{%1, %0|%0, %1}
17332 shufps\t{$0, %0, %0|%0, %0, 0}"
17333 [(set_attr "isa" "sse2,avx,noavx")
17334 (set_attr "type" "sselog1,ssemov,sselog1")
17335 (set_attr "length_immediate" "1,0,1")
17336 (set_attr "prefix_extra" "0,1,*")
17337 (set_attr "prefix" "maybe_vex,vex,orig")
17338 (set_attr "mode" "TI,V4SF,V4SF")])
17339
17340 (define_insn "*vec_dupv2di"
17341 [(set (match_operand:V2DI 0 "register_operand" "=x,x,x,x")
17342 (vec_duplicate:V2DI
17343 (match_operand:DI 1 "nonimmediate_operand" " 0,x,m,0")))]
17344 "TARGET_SSE"
17345 "@
17346 punpcklqdq\t%0, %0
17347 vpunpcklqdq\t{%d1, %0|%0, %d1}
17348 %vmovddup\t{%1, %0|%0, %1}
17349 movlhps\t%0, %0"
17350 [(set_attr "isa" "sse2_noavx,avx,sse3,noavx")
17351 (set_attr "type" "sselog1,sselog1,sselog1,ssemov")
17352 (set_attr "prefix" "orig,vex,maybe_vex,orig")
17353 (set_attr "mode" "TI,TI,DF,V4SF")])
17354
17355 (define_insn "avx2_vbroadcasti128_<mode>"
17356 [(set (match_operand:VI_256 0 "register_operand" "=x")
17357 (vec_concat:VI_256
17358 (match_operand:<ssehalfvecmode> 1 "memory_operand" "m")
17359 (match_dup 1)))]
17360 "TARGET_AVX2"
17361 "vbroadcasti128\t{%1, %0|%0, %1}"
17362 [(set_attr "type" "ssemov")
17363 (set_attr "prefix_extra" "1")
17364 (set_attr "prefix" "vex")
17365 (set_attr "mode" "OI")])
17366
17367 ;; Modes handled by AVX vec_dup patterns.
17368 (define_mode_iterator AVX_VEC_DUP_MODE
17369 [V8SI V8SF V4DI V4DF])
17370 ;; Modes handled by AVX2 vec_dup patterns.
17371 (define_mode_iterator AVX2_VEC_DUP_MODE
17372 [V32QI V16QI V16HI V8HI V8SI V4SI])
17373
17374 (define_insn "*vec_dup<mode>"
17375 [(set (match_operand:AVX2_VEC_DUP_MODE 0 "register_operand" "=x,x,Yi")
17376 (vec_duplicate:AVX2_VEC_DUP_MODE
17377 (match_operand:<ssescalarmode> 1 "nonimmediate_operand" "m,x,$r")))]
17378 "TARGET_AVX2"
17379 "@
17380 v<sseintprefix>broadcast<bcstscalarsuff>\t{%1, %0|%0, %1}
17381 v<sseintprefix>broadcast<bcstscalarsuff>\t{%x1, %0|%0, %x1}
17382 #"
17383 [(set_attr "type" "ssemov")
17384 (set_attr "prefix_extra" "1")
17385 (set_attr "prefix" "maybe_evex")
17386 (set_attr "mode" "<sseinsnmode>")])
17387
17388 (define_insn "vec_dup<mode>"
17389 [(set (match_operand:AVX_VEC_DUP_MODE 0 "register_operand" "=x,x,x,v,x")
17390 (vec_duplicate:AVX_VEC_DUP_MODE
17391 (match_operand:<ssescalarmode> 1 "nonimmediate_operand" "m,m,x,v,?x")))]
17392 "TARGET_AVX"
17393 "@
17394 v<sseintprefix>broadcast<bcstscalarsuff>\t{%1, %0|%0, %1}
17395 vbroadcast<ssescalarmodesuffix>\t{%1, %0|%0, %1}
17396 v<sseintprefix>broadcast<bcstscalarsuff>\t{%x1, %0|%0, %x1}
17397 v<sseintprefix>broadcast<bcstscalarsuff>\t{%x1, %g0|%g0, %x1}
17398 #"
17399 [(set_attr "type" "ssemov")
17400 (set_attr "prefix_extra" "1")
17401 (set_attr "prefix" "maybe_evex")
17402 (set_attr "isa" "avx2,noavx2,avx2,avx512f,noavx2")
17403 (set_attr "mode" "<sseinsnmode>,V8SF,<sseinsnmode>,<sseinsnmode>,V8SF")])
17404
17405 (define_split
17406 [(set (match_operand:AVX2_VEC_DUP_MODE 0 "register_operand")
17407 (vec_duplicate:AVX2_VEC_DUP_MODE
17408 (match_operand:<ssescalarmode> 1 "register_operand")))]
17409 "TARGET_AVX2
17410 /* Disable this splitter if avx512vl_vec_dup_gprv*[qhs]i insn is
17411 available, because then we can broadcast from GPRs directly.
17412 For V*[QH]I modes it requires both -mavx512vl and -mavx512bw,
17413 for V*SI mode it requires just -mavx512vl. */
17414 && !(TARGET_AVX512VL
17415 && (TARGET_AVX512BW || <ssescalarmode>mode == SImode))
17416 && reload_completed && GENERAL_REG_P (operands[1])"
17417 [(const_int 0)]
17418 {
17419 emit_insn (gen_vec_setv4si_0 (gen_lowpart (V4SImode, operands[0]),
17420 CONST0_RTX (V4SImode),
17421 gen_lowpart (SImode, operands[1])));
17422 emit_insn (gen_avx2_pbroadcast<mode> (operands[0],
17423 gen_lowpart (<ssexmmmode>mode,
17424 operands[0])));
17425 DONE;
17426 })
17427
17428 (define_split
17429 [(set (match_operand:AVX_VEC_DUP_MODE 0 "register_operand")
17430 (vec_duplicate:AVX_VEC_DUP_MODE
17431 (match_operand:<ssescalarmode> 1 "register_operand")))]
17432 "TARGET_AVX && !TARGET_AVX2 && reload_completed"
17433 [(set (match_dup 2)
17434 (vec_duplicate:<ssehalfvecmode> (match_dup 1)))
17435 (set (match_dup 0)
17436 (vec_concat:AVX_VEC_DUP_MODE (match_dup 2) (match_dup 2)))]
17437 "operands[2] = gen_rtx_REG (<ssehalfvecmode>mode, REGNO (operands[0]));")
17438
17439 (define_insn "avx_vbroadcastf128_<mode>"
17440 [(set (match_operand:V_256 0 "register_operand" "=x,x,x")
17441 (vec_concat:V_256
17442 (match_operand:<ssehalfvecmode> 1 "nonimmediate_operand" "m,0,?x")
17443 (match_dup 1)))]
17444 "TARGET_AVX"
17445 "@
17446 vbroadcast<i128>\t{%1, %0|%0, %1}
17447 vinsert<i128>\t{$1, %1, %0, %0|%0, %0, %1, 1}
17448 vperm2<i128>\t{$0, %t1, %t1, %0|%0, %t1, %t1, 0}"
17449 [(set_attr "type" "ssemov,sselog1,sselog1")
17450 (set_attr "prefix_extra" "1")
17451 (set_attr "length_immediate" "0,1,1")
17452 (set_attr "prefix" "vex")
17453 (set_attr "mode" "<sseinsnmode>")])
17454
17455 ;; For broadcast[i|f]32x2. Yes there is no v4sf version, only v4si.
17456 (define_mode_iterator VI4F_BRCST32x2
17457 [V16SI (V8SI "TARGET_AVX512VL") (V4SI "TARGET_AVX512VL")
17458 V16SF (V8SF "TARGET_AVX512VL")])
17459
17460 (define_mode_attr 64x2mode
17461 [(V8DF "V2DF") (V8DI "V2DI") (V4DI "V2DI") (V4DF "V2DF")])
17462
17463 (define_mode_attr 32x2mode
17464 [(V16SF "V2SF") (V16SI "V2SI") (V8SI "V2SI")
17465 (V8SF "V2SF") (V4SI "V2SI")])
17466
17467 (define_insn "<mask_codefor>avx512dq_broadcast<mode><mask_name>"
17468 [(set (match_operand:VI4F_BRCST32x2 0 "register_operand" "=v")
17469 (vec_duplicate:VI4F_BRCST32x2
17470 (vec_select:<32x2mode>
17471 (match_operand:<ssexmmmode> 1 "nonimmediate_operand" "vm")
17472 (parallel [(const_int 0) (const_int 1)]))))]
17473 "TARGET_AVX512DQ"
17474 "vbroadcast<shuffletype>32x2\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
17475 [(set_attr "type" "ssemov")
17476 (set_attr "prefix_extra" "1")
17477 (set_attr "prefix" "evex")
17478 (set_attr "mode" "<sseinsnmode>")])
17479
17480 (define_insn "<mask_codefor>avx512vl_broadcast<mode><mask_name>_1"
17481 [(set (match_operand:VI4F_256 0 "register_operand" "=v,v")
17482 (vec_duplicate:VI4F_256
17483 (match_operand:<ssexmmmode> 1 "nonimmediate_operand" "v,m")))]
17484 "TARGET_AVX512VL"
17485 "@
17486 vshuf<shuffletype>32x4\t{$0x0, %t1, %t1, %0<mask_operand2>|%0<mask_operand2>, %t1, %t1, 0x0}
17487 vbroadcast<shuffletype>32x4\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
17488 [(set_attr "type" "ssemov")
17489 (set_attr "prefix_extra" "1")
17490 (set_attr "prefix" "evex")
17491 (set_attr "mode" "<sseinsnmode>")])
17492
17493 (define_insn "<mask_codefor>avx512dq_broadcast<mode><mask_name>_1"
17494 [(set (match_operand:V16FI 0 "register_operand" "=v,v")
17495 (vec_duplicate:V16FI
17496 (match_operand:<ssehalfvecmode> 1 "nonimmediate_operand" "v,m")))]
17497 "TARGET_AVX512DQ"
17498 "@
17499 vshuf<shuffletype>32x4\t{$0x44, %g1, %g1, %0<mask_operand2>|%0<mask_operand2>, %g1, %g1, 0x44}
17500 vbroadcast<shuffletype>32x8\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
17501 [(set_attr "type" "ssemov")
17502 (set_attr "prefix_extra" "1")
17503 (set_attr "prefix" "evex")
17504 (set_attr "mode" "<sseinsnmode>")])
17505
17506 ;; For broadcast[i|f]64x2
17507 (define_mode_iterator VI8F_BRCST64x2
17508 [V8DI V8DF (V4DI "TARGET_AVX512VL") (V4DF "TARGET_AVX512VL")])
17509
17510 (define_insn "<mask_codefor>avx512dq_broadcast<mode><mask_name>_1"
17511 [(set (match_operand:VI8F_BRCST64x2 0 "register_operand" "=v,v")
17512 (vec_duplicate:VI8F_BRCST64x2
17513 (match_operand:<64x2mode> 1 "nonimmediate_operand" "v,m")))]
17514 "TARGET_AVX512DQ"
17515 "@
17516 vshuf<shuffletype>64x2\t{$0x0, %<concat_tg_mode>1, %<concat_tg_mode>1, %0<mask_operand2>|%0<mask_operand2>, %<concat_tg_mode>1, %<concat_tg_mode>1, 0x0}
17517 vbroadcast<shuffletype>64x2\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
17518 [(set_attr "type" "ssemov")
17519 (set_attr "prefix_extra" "1")
17520 (set_attr "prefix" "evex")
17521 (set_attr "mode" "<sseinsnmode>")])
17522
17523 (define_insn "avx512cd_maskb_vec_dup<mode>"
17524 [(set (match_operand:VI8_AVX512VL 0 "register_operand" "=v")
17525 (vec_duplicate:VI8_AVX512VL
17526 (zero_extend:DI
17527 (match_operand:QI 1 "register_operand" "Yk"))))]
17528 "TARGET_AVX512CD"
17529 "vpbroadcastmb2q\t{%1, %0|%0, %1}"
17530 [(set_attr "type" "mskmov")
17531 (set_attr "prefix" "evex")
17532 (set_attr "mode" "XI")])
17533
17534 (define_insn "avx512cd_maskw_vec_dup<mode>"
17535 [(set (match_operand:VI4_AVX512VL 0 "register_operand" "=v")
17536 (vec_duplicate:VI4_AVX512VL
17537 (zero_extend:SI
17538 (match_operand:HI 1 "register_operand" "Yk"))))]
17539 "TARGET_AVX512CD"
17540 "vpbroadcastmw2d\t{%1, %0|%0, %1}"
17541 [(set_attr "type" "mskmov")
17542 (set_attr "prefix" "evex")
17543 (set_attr "mode" "XI")])
17544
17545 ;; Recognize broadcast as a vec_select as produced by builtin_vec_perm.
17546 ;; If it so happens that the input is in memory, use vbroadcast.
17547 ;; Otherwise use vpermilp (and in the case of 256-bit modes, vperm2f128).
17548 (define_insn "*avx_vperm_broadcast_v4sf"
17549 [(set (match_operand:V4SF 0 "register_operand" "=x,x,x")
17550 (vec_select:V4SF
17551 (match_operand:V4SF 1 "nonimmediate_operand" "m,o,x")
17552 (match_parallel 2 "avx_vbroadcast_operand"
17553 [(match_operand 3 "const_int_operand" "C,n,n")])))]
17554 "TARGET_AVX"
17555 {
17556 int elt = INTVAL (operands[3]);
17557 switch (which_alternative)
17558 {
17559 case 0:
17560 case 1:
17561 operands[1] = adjust_address_nv (operands[1], SFmode, elt * 4);
17562 return "vbroadcastss\t{%1, %0|%0, %k1}";
17563 case 2:
17564 operands[2] = GEN_INT (elt * 0x55);
17565 return "vpermilps\t{%2, %1, %0|%0, %1, %2}";
17566 default:
17567 gcc_unreachable ();
17568 }
17569 }
17570 [(set_attr "type" "ssemov,ssemov,sselog1")
17571 (set_attr "prefix_extra" "1")
17572 (set_attr "length_immediate" "0,0,1")
17573 (set_attr "prefix" "vex")
17574 (set_attr "mode" "SF,SF,V4SF")])
17575
17576 (define_insn_and_split "*avx_vperm_broadcast_<mode>"
17577 [(set (match_operand:VF_256 0 "register_operand" "=x,x,x")
17578 (vec_select:VF_256
17579 (match_operand:VF_256 1 "nonimmediate_operand" "m,o,?x")
17580 (match_parallel 2 "avx_vbroadcast_operand"
17581 [(match_operand 3 "const_int_operand" "C,n,n")])))]
17582 "TARGET_AVX"
17583 "#"
17584 "&& reload_completed && (<MODE>mode != V4DFmode || !TARGET_AVX2)"
17585 [(set (match_dup 0) (vec_duplicate:VF_256 (match_dup 1)))]
17586 {
17587 rtx op0 = operands[0], op1 = operands[1];
17588 int elt = INTVAL (operands[3]);
17589
17590 if (REG_P (op1))
17591 {
17592 int mask;
17593
17594 if (TARGET_AVX2 && elt == 0)
17595 {
17596 emit_insn (gen_vec_dup<mode> (op0, gen_lowpart (<ssescalarmode>mode,
17597 op1)));
17598 DONE;
17599 }
17600
17601 /* Shuffle element we care about into all elements of the 128-bit lane.
17602 The other lane gets shuffled too, but we don't care. */
17603 if (<MODE>mode == V4DFmode)
17604 mask = (elt & 1 ? 15 : 0);
17605 else
17606 mask = (elt & 3) * 0x55;
17607 emit_insn (gen_avx_vpermil<mode> (op0, op1, GEN_INT (mask)));
17608
17609 /* Shuffle the lane we care about into both lanes of the dest. */
17610 mask = (elt / (<ssescalarnum> / 2)) * 0x11;
17611 emit_insn (gen_avx_vperm2f128<mode>3 (op0, op0, op0, GEN_INT (mask)));
17612 DONE;
17613 }
17614
17615 operands[1] = adjust_address (op1, <ssescalarmode>mode,
17616 elt * GET_MODE_SIZE (<ssescalarmode>mode));
17617 })
17618
17619 (define_expand "<sse2_avx_avx512f>_vpermil<mode><mask_name>"
17620 [(set (match_operand:VF2 0 "register_operand")
17621 (vec_select:VF2
17622 (match_operand:VF2 1 "nonimmediate_operand")
17623 (match_operand:SI 2 "const_0_to_255_operand")))]
17624 "TARGET_AVX && <mask_mode512bit_condition>"
17625 {
17626 int mask = INTVAL (operands[2]);
17627 rtx perm[<ssescalarnum>];
17628
17629 int i;
17630 for (i = 0; i < <ssescalarnum>; i = i + 2)
17631 {
17632 perm[i] = GEN_INT (((mask >> i) & 1) + i);
17633 perm[i + 1] = GEN_INT (((mask >> (i + 1)) & 1) + i);
17634 }
17635
17636 operands[2]
17637 = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (<ssescalarnum>, perm));
17638 })
17639
17640 (define_expand "<sse2_avx_avx512f>_vpermil<mode><mask_name>"
17641 [(set (match_operand:VF1 0 "register_operand")
17642 (vec_select:VF1
17643 (match_operand:VF1 1 "nonimmediate_operand")
17644 (match_operand:SI 2 "const_0_to_255_operand")))]
17645 "TARGET_AVX && <mask_mode512bit_condition>"
17646 {
17647 int mask = INTVAL (operands[2]);
17648 rtx perm[<ssescalarnum>];
17649
17650 int i;
17651 for (i = 0; i < <ssescalarnum>; i = i + 4)
17652 {
17653 perm[i] = GEN_INT (((mask >> 0) & 3) + i);
17654 perm[i + 1] = GEN_INT (((mask >> 2) & 3) + i);
17655 perm[i + 2] = GEN_INT (((mask >> 4) & 3) + i);
17656 perm[i + 3] = GEN_INT (((mask >> 6) & 3) + i);
17657 }
17658
17659 operands[2]
17660 = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (<ssescalarnum>, perm));
17661 })
17662
17663 (define_insn "*<sse2_avx_avx512f>_vpermilp<mode><mask_name>"
17664 [(set (match_operand:VF 0 "register_operand" "=v")
17665 (vec_select:VF
17666 (match_operand:VF 1 "nonimmediate_operand" "vm")
17667 (match_parallel 2 ""
17668 [(match_operand 3 "const_int_operand")])))]
17669 "TARGET_AVX && <mask_mode512bit_condition>
17670 && avx_vpermilp_parallel (operands[2], <MODE>mode)"
17671 {
17672 int mask = avx_vpermilp_parallel (operands[2], <MODE>mode) - 1;
17673 operands[2] = GEN_INT (mask);
17674 return "vpermil<ssemodesuffix>\t{%2, %1, %0<mask_operand4>|%0<mask_operand4>, %1, %2}";
17675 }
17676 [(set_attr "type" "sselog")
17677 (set_attr "prefix_extra" "1")
17678 (set_attr "length_immediate" "1")
17679 (set_attr "prefix" "<mask_prefix>")
17680 (set_attr "mode" "<sseinsnmode>")])
17681
17682 (define_insn "<sse2_avx_avx512f>_vpermilvar<mode>3<mask_name>"
17683 [(set (match_operand:VF 0 "register_operand" "=v")
17684 (unspec:VF
17685 [(match_operand:VF 1 "register_operand" "v")
17686 (match_operand:<sseintvecmode> 2 "nonimmediate_operand" "vm")]
17687 UNSPEC_VPERMIL))]
17688 "TARGET_AVX && <mask_mode512bit_condition>"
17689 "vpermil<ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
17690 [(set_attr "type" "sselog")
17691 (set_attr "prefix_extra" "1")
17692 (set_attr "btver2_decode" "vector")
17693 (set_attr "prefix" "<mask_prefix>")
17694 (set_attr "mode" "<sseinsnmode>")])
17695
17696 (define_expand "<avx512>_vpermi2var<mode>3_maskz"
17697 [(match_operand:VI48F 0 "register_operand" "=v")
17698 (match_operand:VI48F 1 "register_operand" "v")
17699 (match_operand:<sseintvecmode> 2 "register_operand" "0")
17700 (match_operand:VI48F 3 "nonimmediate_operand" "vm")
17701 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")]
17702 "TARGET_AVX512F"
17703 {
17704 emit_insn (gen_<avx512>_vpermi2var<mode>3_maskz_1 (
17705 operands[0], operands[1], operands[2], operands[3],
17706 CONST0_RTX (<MODE>mode), operands[4]));
17707 DONE;
17708 })
17709
17710 (define_expand "<avx512>_vpermi2var<mode>3_maskz"
17711 [(match_operand:VI1_AVX512VL 0 "register_operand")
17712 (match_operand:VI1_AVX512VL 1 "register_operand")
17713 (match_operand:<sseintvecmode> 2 "register_operand")
17714 (match_operand:VI1_AVX512VL 3 "nonimmediate_operand")
17715 (match_operand:<avx512fmaskmode> 4 "register_operand")]
17716 "TARGET_AVX512VBMI"
17717 {
17718 emit_insn (gen_<avx512>_vpermi2var<mode>3_maskz_1 (
17719 operands[0], operands[1], operands[2], operands[3],
17720 CONST0_RTX (<MODE>mode), operands[4]));
17721 DONE;
17722 })
17723
17724 (define_expand "<avx512>_vpermi2var<mode>3_maskz"
17725 [(match_operand:VI2_AVX512VL 0 "register_operand" "=v")
17726 (match_operand:VI2_AVX512VL 1 "register_operand" "v")
17727 (match_operand:<sseintvecmode> 2 "register_operand" "0")
17728 (match_operand:VI2_AVX512VL 3 "nonimmediate_operand" "vm")
17729 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")]
17730 "TARGET_AVX512BW"
17731 {
17732 emit_insn (gen_<avx512>_vpermi2var<mode>3_maskz_1 (
17733 operands[0], operands[1], operands[2], operands[3],
17734 CONST0_RTX (<MODE>mode), operands[4]));
17735 DONE;
17736 })
17737
17738 (define_insn "<avx512>_vpermi2var<mode>3<sd_maskz_name>"
17739 [(set (match_operand:VI48F 0 "register_operand" "=v")
17740 (unspec:VI48F
17741 [(match_operand:VI48F 1 "register_operand" "v")
17742 (match_operand:<sseintvecmode> 2 "register_operand" "0")
17743 (match_operand:VI48F 3 "nonimmediate_operand" "vm")]
17744 UNSPEC_VPERMI2))]
17745 "TARGET_AVX512F"
17746 "vpermi2<ssemodesuffix>\t{%3, %1, %0<sd_mask_op4>|%0<sd_mask_op4>, %1, %3}"
17747 [(set_attr "type" "sselog")
17748 (set_attr "prefix" "evex")
17749 (set_attr "mode" "<sseinsnmode>")])
17750
17751 (define_insn "<avx512>_vpermi2var<mode>3<sd_maskz_name>"
17752 [(set (match_operand:VI1_AVX512VL 0 "register_operand" "=v")
17753 (unspec:VI1_AVX512VL
17754 [(match_operand:VI1_AVX512VL 1 "register_operand" "v")
17755 (match_operand:<sseintvecmode> 2 "register_operand" "0")
17756 (match_operand:VI1_AVX512VL 3 "nonimmediate_operand" "vm")]
17757 UNSPEC_VPERMI2))]
17758 "TARGET_AVX512VBMI"
17759 "vpermi2<ssemodesuffix>\t{%3, %1, %0<sd_mask_op4>|%0<sd_mask_op4>, %1, %3}"
17760 [(set_attr "type" "sselog")
17761 (set_attr "prefix" "evex")
17762 (set_attr "mode" "<sseinsnmode>")])
17763
17764 (define_insn "<avx512>_vpermi2var<mode>3<sd_maskz_name>"
17765 [(set (match_operand:VI2_AVX512VL 0 "register_operand" "=v")
17766 (unspec:VI2_AVX512VL
17767 [(match_operand:VI2_AVX512VL 1 "register_operand" "v")
17768 (match_operand:<sseintvecmode> 2 "register_operand" "0")
17769 (match_operand:VI2_AVX512VL 3 "nonimmediate_operand" "vm")]
17770 UNSPEC_VPERMI2))]
17771 "TARGET_AVX512BW"
17772 "vpermi2<ssemodesuffix>\t{%3, %1, %0<sd_mask_op4>|%0<sd_mask_op4>, %1, %3}"
17773 [(set_attr "type" "sselog")
17774 (set_attr "prefix" "evex")
17775 (set_attr "mode" "<sseinsnmode>")])
17776
17777 (define_insn "<avx512>_vpermi2var<mode>3_mask"
17778 [(set (match_operand:VI48F 0 "register_operand" "=v")
17779 (vec_merge:VI48F
17780 (unspec:VI48F
17781 [(match_operand:VI48F 1 "register_operand" "v")
17782 (match_operand:<sseintvecmode> 2 "register_operand" "0")
17783 (match_operand:VI48F 3 "nonimmediate_operand" "vm")]
17784 UNSPEC_VPERMI2_MASK)
17785 (match_dup 0)
17786 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")))]
17787 "TARGET_AVX512F"
17788 "vpermi2<ssemodesuffix>\t{%3, %1, %0%{%4%}|%0%{%4%}, %1, %3}"
17789 [(set_attr "type" "sselog")
17790 (set_attr "prefix" "evex")
17791 (set_attr "mode" "<sseinsnmode>")])
17792
17793 (define_insn "<avx512>_vpermi2var<mode>3_mask"
17794 [(set (match_operand:VI1_AVX512VL 0 "register_operand" "=v")
17795 (vec_merge:VI1_AVX512VL
17796 (unspec:VI1_AVX512VL
17797 [(match_operand:VI1_AVX512VL 1 "register_operand" "v")
17798 (match_operand:<sseintvecmode> 2 "register_operand" "0")
17799 (match_operand:VI1_AVX512VL 3 "nonimmediate_operand" "vm")]
17800 UNSPEC_VPERMI2_MASK)
17801 (match_dup 0)
17802 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")))]
17803 "TARGET_AVX512VBMI"
17804 "vpermi2<ssemodesuffix>\t{%3, %1, %0%{%4%}|%0%{%4%}, %1, %3}"
17805 [(set_attr "type" "sselog")
17806 (set_attr "prefix" "evex")
17807 (set_attr "mode" "<sseinsnmode>")])
17808
17809 (define_insn "<avx512>_vpermi2var<mode>3_mask"
17810 [(set (match_operand:VI2_AVX512VL 0 "register_operand" "=v")
17811 (vec_merge:VI2_AVX512VL
17812 (unspec:VI2_AVX512VL
17813 [(match_operand:VI2_AVX512VL 1 "register_operand" "v")
17814 (match_operand:<sseintvecmode> 2 "register_operand" "0")
17815 (match_operand:VI2_AVX512VL 3 "nonimmediate_operand" "vm")]
17816 UNSPEC_VPERMI2_MASK)
17817 (match_dup 0)
17818 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")))]
17819 "TARGET_AVX512BW"
17820 "vpermi2<ssemodesuffix>\t{%3, %1, %0%{%4%}|%0%{%4%}, %1, %3}"
17821 [(set_attr "type" "sselog")
17822 (set_attr "prefix" "evex")
17823 (set_attr "mode" "<sseinsnmode>")])
17824
17825 (define_expand "<avx512>_vpermt2var<mode>3_maskz"
17826 [(match_operand:VI48F 0 "register_operand" "=v")
17827 (match_operand:<sseintvecmode> 1 "register_operand" "v")
17828 (match_operand:VI48F 2 "register_operand" "0")
17829 (match_operand:VI48F 3 "nonimmediate_operand" "vm")
17830 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")]
17831 "TARGET_AVX512F"
17832 {
17833 emit_insn (gen_<avx512>_vpermt2var<mode>3_maskz_1 (
17834 operands[0], operands[1], operands[2], operands[3],
17835 CONST0_RTX (<MODE>mode), operands[4]));
17836 DONE;
17837 })
17838
17839 (define_expand "<avx512>_vpermt2var<mode>3_maskz"
17840 [(match_operand:VI1_AVX512VL 0 "register_operand" "=v")
17841 (match_operand:<sseintvecmode> 1 "register_operand" "v")
17842 (match_operand:VI1_AVX512VL 2 "register_operand" "0")
17843 (match_operand:VI1_AVX512VL 3 "nonimmediate_operand" "vm")
17844 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")]
17845 "TARGET_AVX512VBMI"
17846 {
17847 emit_insn (gen_<avx512>_vpermt2var<mode>3_maskz_1 (
17848 operands[0], operands[1], operands[2], operands[3],
17849 CONST0_RTX (<MODE>mode), operands[4]));
17850 DONE;
17851 })
17852
17853 (define_expand "<avx512>_vpermt2var<mode>3_maskz"
17854 [(match_operand:VI2_AVX512VL 0 "register_operand" "=v")
17855 (match_operand:<sseintvecmode> 1 "register_operand" "v")
17856 (match_operand:VI2_AVX512VL 2 "register_operand" "0")
17857 (match_operand:VI2_AVX512VL 3 "nonimmediate_operand" "vm")
17858 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")]
17859 "TARGET_AVX512BW"
17860 {
17861 emit_insn (gen_<avx512>_vpermt2var<mode>3_maskz_1 (
17862 operands[0], operands[1], operands[2], operands[3],
17863 CONST0_RTX (<MODE>mode), operands[4]));
17864 DONE;
17865 })
17866
17867 (define_insn "<avx512>_vpermt2var<mode>3<sd_maskz_name>"
17868 [(set (match_operand:VI48F 0 "register_operand" "=v")
17869 (unspec:VI48F
17870 [(match_operand:<sseintvecmode> 1 "register_operand" "v")
17871 (match_operand:VI48F 2 "register_operand" "0")
17872 (match_operand:VI48F 3 "nonimmediate_operand" "vm")]
17873 UNSPEC_VPERMT2))]
17874 "TARGET_AVX512F"
17875 "vpermt2<ssemodesuffix>\t{%3, %1, %0<sd_mask_op4>|%0<sd_mask_op4>, %1, %3}"
17876 [(set_attr "type" "sselog")
17877 (set_attr "prefix" "evex")
17878 (set_attr "mode" "<sseinsnmode>")])
17879
17880 (define_insn "<avx512>_vpermt2var<mode>3<sd_maskz_name>"
17881 [(set (match_operand:VI1_AVX512VL 0 "register_operand" "=v")
17882 (unspec:VI1_AVX512VL
17883 [(match_operand:<sseintvecmode> 1 "register_operand" "v")
17884 (match_operand:VI1_AVX512VL 2 "register_operand" "0")
17885 (match_operand:VI1_AVX512VL 3 "nonimmediate_operand" "vm")]
17886 UNSPEC_VPERMT2))]
17887 "TARGET_AVX512VBMI"
17888 "vpermt2<ssemodesuffix>\t{%3, %1, %0<sd_mask_op4>|%0<sd_mask_op4>, %1, %3}"
17889 [(set_attr "type" "sselog")
17890 (set_attr "prefix" "evex")
17891 (set_attr "mode" "<sseinsnmode>")])
17892
17893 (define_insn "<avx512>_vpermt2var<mode>3<sd_maskz_name>"
17894 [(set (match_operand:VI2_AVX512VL 0 "register_operand" "=v")
17895 (unspec:VI2_AVX512VL
17896 [(match_operand:<sseintvecmode> 1 "register_operand" "v")
17897 (match_operand:VI2_AVX512VL 2 "register_operand" "0")
17898 (match_operand:VI2_AVX512VL 3 "nonimmediate_operand" "vm")]
17899 UNSPEC_VPERMT2))]
17900 "TARGET_AVX512BW"
17901 "vpermt2<ssemodesuffix>\t{%3, %1, %0<sd_mask_op4>|%0<sd_mask_op4>, %1, %3}"
17902 [(set_attr "type" "sselog")
17903 (set_attr "prefix" "evex")
17904 (set_attr "mode" "<sseinsnmode>")])
17905
17906 (define_insn "<avx512>_vpermt2var<mode>3_mask"
17907 [(set (match_operand:VI48F 0 "register_operand" "=v")
17908 (vec_merge:VI48F
17909 (unspec:VI48F
17910 [(match_operand:<sseintvecmode> 1 "register_operand" "v")
17911 (match_operand:VI48F 2 "register_operand" "0")
17912 (match_operand:VI48F 3 "nonimmediate_operand" "vm")]
17913 UNSPEC_VPERMT2)
17914 (match_dup 2)
17915 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")))]
17916 "TARGET_AVX512F"
17917 "vpermt2<ssemodesuffix>\t{%3, %1, %0%{%4%}|%0%{%4%}, %1, %3}"
17918 [(set_attr "type" "sselog")
17919 (set_attr "prefix" "evex")
17920 (set_attr "mode" "<sseinsnmode>")])
17921
17922 (define_insn "<avx512>_vpermt2var<mode>3_mask"
17923 [(set (match_operand:VI1_AVX512VL 0 "register_operand" "=v")
17924 (vec_merge:VI1_AVX512VL
17925 (unspec:VI1_AVX512VL
17926 [(match_operand:<sseintvecmode> 1 "register_operand" "v")
17927 (match_operand:VI1_AVX512VL 2 "register_operand" "0")
17928 (match_operand:VI1_AVX512VL 3 "nonimmediate_operand" "vm")]
17929 UNSPEC_VPERMT2)
17930 (match_dup 2)
17931 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")))]
17932 "TARGET_AVX512VBMI"
17933 "vpermt2<ssemodesuffix>\t{%3, %1, %0%{%4%}|%0%{%4%}, %1, %3}"
17934 [(set_attr "type" "sselog")
17935 (set_attr "prefix" "evex")
17936 (set_attr "mode" "<sseinsnmode>")])
17937
17938 (define_insn "<avx512>_vpermt2var<mode>3_mask"
17939 [(set (match_operand:VI2_AVX512VL 0 "register_operand" "=v")
17940 (vec_merge:VI2_AVX512VL
17941 (unspec:VI2_AVX512VL
17942 [(match_operand:<sseintvecmode> 1 "register_operand" "v")
17943 (match_operand:VI2_AVX512VL 2 "register_operand" "0")
17944 (match_operand:VI2_AVX512VL 3 "nonimmediate_operand" "vm")]
17945 UNSPEC_VPERMT2)
17946 (match_dup 2)
17947 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")))]
17948 "TARGET_AVX512BW"
17949 "vpermt2<ssemodesuffix>\t{%3, %1, %0%{%4%}|%0%{%4%}, %1, %3}"
17950 [(set_attr "type" "sselog")
17951 (set_attr "prefix" "evex")
17952 (set_attr "mode" "<sseinsnmode>")])
17953
17954 (define_expand "avx_vperm2f128<mode>3"
17955 [(set (match_operand:AVX256MODE2P 0 "register_operand")
17956 (unspec:AVX256MODE2P
17957 [(match_operand:AVX256MODE2P 1 "register_operand")
17958 (match_operand:AVX256MODE2P 2 "nonimmediate_operand")
17959 (match_operand:SI 3 "const_0_to_255_operand")]
17960 UNSPEC_VPERMIL2F128))]
17961 "TARGET_AVX"
17962 {
17963 int mask = INTVAL (operands[3]);
17964 if ((mask & 0x88) == 0)
17965 {
17966 rtx perm[<ssescalarnum>], t1, t2;
17967 int i, base, nelt = <ssescalarnum>, nelt2 = nelt / 2;
17968
17969 base = (mask & 3) * nelt2;
17970 for (i = 0; i < nelt2; ++i)
17971 perm[i] = GEN_INT (base + i);
17972
17973 base = ((mask >> 4) & 3) * nelt2;
17974 for (i = 0; i < nelt2; ++i)
17975 perm[i + nelt2] = GEN_INT (base + i);
17976
17977 t2 = gen_rtx_VEC_CONCAT (<ssedoublevecmode>mode,
17978 operands[1], operands[2]);
17979 t1 = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (nelt, perm));
17980 t2 = gen_rtx_VEC_SELECT (<MODE>mode, t2, t1);
17981 t2 = gen_rtx_SET (operands[0], t2);
17982 emit_insn (t2);
17983 DONE;
17984 }
17985 })
17986
17987 ;; Note that bits 7 and 3 of the imm8 allow lanes to be zeroed, which
17988 ;; means that in order to represent this properly in rtl we'd have to
17989 ;; nest *another* vec_concat with a zero operand and do the select from
17990 ;; a 4x wide vector. That doesn't seem very nice.
17991 (define_insn "*avx_vperm2f128<mode>_full"
17992 [(set (match_operand:AVX256MODE2P 0 "register_operand" "=x")
17993 (unspec:AVX256MODE2P
17994 [(match_operand:AVX256MODE2P 1 "register_operand" "x")
17995 (match_operand:AVX256MODE2P 2 "nonimmediate_operand" "xm")
17996 (match_operand:SI 3 "const_0_to_255_operand" "n")]
17997 UNSPEC_VPERMIL2F128))]
17998 "TARGET_AVX"
17999 "vperm2<i128>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
18000 [(set_attr "type" "sselog")
18001 (set_attr "prefix_extra" "1")
18002 (set_attr "length_immediate" "1")
18003 (set_attr "prefix" "vex")
18004 (set_attr "mode" "<sseinsnmode>")])
18005
18006 (define_insn "*avx_vperm2f128<mode>_nozero"
18007 [(set (match_operand:AVX256MODE2P 0 "register_operand" "=x")
18008 (vec_select:AVX256MODE2P
18009 (vec_concat:<ssedoublevecmode>
18010 (match_operand:AVX256MODE2P 1 "register_operand" "x")
18011 (match_operand:AVX256MODE2P 2 "nonimmediate_operand" "xm"))
18012 (match_parallel 3 ""
18013 [(match_operand 4 "const_int_operand")])))]
18014 "TARGET_AVX
18015 && avx_vperm2f128_parallel (operands[3], <MODE>mode)"
18016 {
18017 int mask = avx_vperm2f128_parallel (operands[3], <MODE>mode) - 1;
18018 if (mask == 0x12)
18019 return "vinsert<i128>\t{$0, %x2, %1, %0|%0, %1, %x2, 0}";
18020 if (mask == 0x20)
18021 return "vinsert<i128>\t{$1, %x2, %1, %0|%0, %1, %x2, 1}";
18022 operands[3] = GEN_INT (mask);
18023 return "vperm2<i128>\t{%3, %2, %1, %0|%0, %1, %2, %3}";
18024 }
18025 [(set_attr "type" "sselog")
18026 (set_attr "prefix_extra" "1")
18027 (set_attr "length_immediate" "1")
18028 (set_attr "prefix" "vex")
18029 (set_attr "mode" "<sseinsnmode>")])
18030
18031 (define_insn "*ssse3_palignr<mode>_perm"
18032 [(set (match_operand:V_128 0 "register_operand" "=x,x")
18033 (vec_select:V_128
18034 (match_operand:V_128 1 "register_operand" "0,x")
18035 (match_parallel 2 "palignr_operand"
18036 [(match_operand 3 "const_int_operand" "n, n")])))]
18037 "TARGET_SSSE3"
18038 {
18039 operands[2] =
18040 GEN_INT (INTVAL (operands[3]) * GET_MODE_UNIT_SIZE (GET_MODE (operands[0])));
18041
18042 switch (which_alternative)
18043 {
18044 case 0:
18045 return "palignr\t{%2, %1, %0|%0, %1, %2}";
18046 case 1:
18047 return "vpalignr\t{%2, %1, %1, %0|%0, %1, %1, %2}";
18048 default:
18049 gcc_unreachable ();
18050 }
18051 }
18052 [(set_attr "isa" "noavx,avx")
18053 (set_attr "type" "sseishft")
18054 (set_attr "atom_unit" "sishuf")
18055 (set_attr "prefix_data16" "1,*")
18056 (set_attr "prefix_extra" "1")
18057 (set_attr "length_immediate" "1")
18058 (set_attr "prefix" "orig,vex")])
18059
18060 (define_expand "avx512vl_vinsert<mode>"
18061 [(match_operand:VI48F_256 0 "register_operand")
18062 (match_operand:VI48F_256 1 "register_operand")
18063 (match_operand:<ssehalfvecmode> 2 "nonimmediate_operand")
18064 (match_operand:SI 3 "const_0_to_1_operand")
18065 (match_operand:VI48F_256 4 "register_operand")
18066 (match_operand:<avx512fmaskmode> 5 "register_operand")]
18067 "TARGET_AVX512VL"
18068 {
18069 rtx (*insn)(rtx, rtx, rtx, rtx, rtx);
18070
18071 switch (INTVAL (operands[3]))
18072 {
18073 case 0:
18074 insn = gen_vec_set_lo_<mode>_mask;
18075 break;
18076 case 1:
18077 insn = gen_vec_set_hi_<mode>_mask;
18078 break;
18079 default:
18080 gcc_unreachable ();
18081 }
18082
18083 emit_insn (insn (operands[0], operands[1], operands[2], operands[4],
18084 operands[5]));
18085 DONE;
18086 })
18087
18088 (define_expand "avx_vinsertf128<mode>"
18089 [(match_operand:V_256 0 "register_operand")
18090 (match_operand:V_256 1 "register_operand")
18091 (match_operand:<ssehalfvecmode> 2 "nonimmediate_operand")
18092 (match_operand:SI 3 "const_0_to_1_operand")]
18093 "TARGET_AVX"
18094 {
18095 rtx (*insn)(rtx, rtx, rtx);
18096
18097 switch (INTVAL (operands[3]))
18098 {
18099 case 0:
18100 insn = gen_vec_set_lo_<mode>;
18101 break;
18102 case 1:
18103 insn = gen_vec_set_hi_<mode>;
18104 break;
18105 default:
18106 gcc_unreachable ();
18107 }
18108
18109 emit_insn (insn (operands[0], operands[1], operands[2]));
18110 DONE;
18111 })
18112
18113 (define_insn "vec_set_lo_<mode><mask_name>"
18114 [(set (match_operand:VI8F_256 0 "register_operand" "=v")
18115 (vec_concat:VI8F_256
18116 (match_operand:<ssehalfvecmode> 2 "nonimmediate_operand" "vm")
18117 (vec_select:<ssehalfvecmode>
18118 (match_operand:VI8F_256 1 "register_operand" "v")
18119 (parallel [(const_int 2) (const_int 3)]))))]
18120 "TARGET_AVX"
18121 {
18122 if (TARGET_AVX512VL)
18123 return "vinsert<shuffletype>64x2\t{$0x0, %2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2, 0x0}";
18124 else
18125 return "vinsert<i128>\t{$0x0, %2, %1, %0|%0, %1, %2, 0x0}";
18126 }
18127 [(set_attr "type" "sselog")
18128 (set_attr "prefix_extra" "1")
18129 (set_attr "length_immediate" "1")
18130 (set_attr "prefix" "vex")
18131 (set_attr "mode" "<sseinsnmode>")])
18132
18133 (define_insn "vec_set_hi_<mode><mask_name>"
18134 [(set (match_operand:VI8F_256 0 "register_operand" "=v")
18135 (vec_concat:VI8F_256
18136 (vec_select:<ssehalfvecmode>
18137 (match_operand:VI8F_256 1 "register_operand" "v")
18138 (parallel [(const_int 0) (const_int 1)]))
18139 (match_operand:<ssehalfvecmode> 2 "nonimmediate_operand" "vm")))]
18140 "TARGET_AVX"
18141 {
18142 if (TARGET_AVX512VL)
18143 return "vinsert<shuffletype>64x2\t{$0x1, %2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2, 0x1}";
18144 else
18145 return "vinsert<i128>\t{$0x1, %2, %1, %0|%0, %1, %2, 0x1}";
18146 }
18147 [(set_attr "type" "sselog")
18148 (set_attr "prefix_extra" "1")
18149 (set_attr "length_immediate" "1")
18150 (set_attr "prefix" "vex")
18151 (set_attr "mode" "<sseinsnmode>")])
18152
18153 (define_insn "vec_set_lo_<mode><mask_name>"
18154 [(set (match_operand:VI4F_256 0 "register_operand" "=v")
18155 (vec_concat:VI4F_256
18156 (match_operand:<ssehalfvecmode> 2 "nonimmediate_operand" "vm")
18157 (vec_select:<ssehalfvecmode>
18158 (match_operand:VI4F_256 1 "register_operand" "v")
18159 (parallel [(const_int 4) (const_int 5)
18160 (const_int 6) (const_int 7)]))))]
18161 "TARGET_AVX"
18162 {
18163 if (TARGET_AVX512VL)
18164 return "vinsert<shuffletype>32x4\t{$0x0, %2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2, 0x0}";
18165 else
18166 return "vinsert<i128>\t{$0x0, %2, %1, %0|%0, %1, %2, 0x0}";
18167 }
18168 [(set_attr "type" "sselog")
18169 (set_attr "prefix_extra" "1")
18170 (set_attr "length_immediate" "1")
18171 (set_attr "prefix" "vex")
18172 (set_attr "mode" "<sseinsnmode>")])
18173
18174 (define_insn "vec_set_hi_<mode><mask_name>"
18175 [(set (match_operand:VI4F_256 0 "register_operand" "=v")
18176 (vec_concat:VI4F_256
18177 (vec_select:<ssehalfvecmode>
18178 (match_operand:VI4F_256 1 "register_operand" "v")
18179 (parallel [(const_int 0) (const_int 1)
18180 (const_int 2) (const_int 3)]))
18181 (match_operand:<ssehalfvecmode> 2 "nonimmediate_operand" "vm")))]
18182 "TARGET_AVX"
18183 {
18184 if (TARGET_AVX512VL)
18185 return "vinsert<shuffletype>32x4\t{$0x1, %2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2, 0x1}";
18186 else
18187 return "vinsert<i128>\t{$0x1, %2, %1, %0|%0, %1, %2, 0x1}";
18188 }
18189 [(set_attr "type" "sselog")
18190 (set_attr "prefix_extra" "1")
18191 (set_attr "length_immediate" "1")
18192 (set_attr "prefix" "vex")
18193 (set_attr "mode" "<sseinsnmode>")])
18194
18195 (define_insn "vec_set_lo_v16hi"
18196 [(set (match_operand:V16HI 0 "register_operand" "=x")
18197 (vec_concat:V16HI
18198 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
18199 (vec_select:V8HI
18200 (match_operand:V16HI 1 "register_operand" "x")
18201 (parallel [(const_int 8) (const_int 9)
18202 (const_int 10) (const_int 11)
18203 (const_int 12) (const_int 13)
18204 (const_int 14) (const_int 15)]))))]
18205 "TARGET_AVX"
18206 "vinsert%~128\t{$0x0, %2, %1, %0|%0, %1, %2, 0x0}"
18207 [(set_attr "type" "sselog")
18208 (set_attr "prefix_extra" "1")
18209 (set_attr "length_immediate" "1")
18210 (set_attr "prefix" "vex")
18211 (set_attr "mode" "OI")])
18212
18213 (define_insn "vec_set_hi_v16hi"
18214 [(set (match_operand:V16HI 0 "register_operand" "=x")
18215 (vec_concat:V16HI
18216 (vec_select:V8HI
18217 (match_operand:V16HI 1 "register_operand" "x")
18218 (parallel [(const_int 0) (const_int 1)
18219 (const_int 2) (const_int 3)
18220 (const_int 4) (const_int 5)
18221 (const_int 6) (const_int 7)]))
18222 (match_operand:V8HI 2 "nonimmediate_operand" "xm")))]
18223 "TARGET_AVX"
18224 "vinsert%~128\t{$0x1, %2, %1, %0|%0, %1, %2, 0x1}"
18225 [(set_attr "type" "sselog")
18226 (set_attr "prefix_extra" "1")
18227 (set_attr "length_immediate" "1")
18228 (set_attr "prefix" "vex")
18229 (set_attr "mode" "OI")])
18230
18231 (define_insn "vec_set_lo_v32qi"
18232 [(set (match_operand:V32QI 0 "register_operand" "=x")
18233 (vec_concat:V32QI
18234 (match_operand:V16QI 2 "nonimmediate_operand" "xm")
18235 (vec_select:V16QI
18236 (match_operand:V32QI 1 "register_operand" "x")
18237 (parallel [(const_int 16) (const_int 17)
18238 (const_int 18) (const_int 19)
18239 (const_int 20) (const_int 21)
18240 (const_int 22) (const_int 23)
18241 (const_int 24) (const_int 25)
18242 (const_int 26) (const_int 27)
18243 (const_int 28) (const_int 29)
18244 (const_int 30) (const_int 31)]))))]
18245 "TARGET_AVX"
18246 "vinsert%~128\t{$0x0, %2, %1, %0|%0, %1, %2, 0x0}"
18247 [(set_attr "type" "sselog")
18248 (set_attr "prefix_extra" "1")
18249 (set_attr "length_immediate" "1")
18250 (set_attr "prefix" "vex")
18251 (set_attr "mode" "OI")])
18252
18253 (define_insn "vec_set_hi_v32qi"
18254 [(set (match_operand:V32QI 0 "register_operand" "=x")
18255 (vec_concat:V32QI
18256 (vec_select:V16QI
18257 (match_operand:V32QI 1 "register_operand" "x")
18258 (parallel [(const_int 0) (const_int 1)
18259 (const_int 2) (const_int 3)
18260 (const_int 4) (const_int 5)
18261 (const_int 6) (const_int 7)
18262 (const_int 8) (const_int 9)
18263 (const_int 10) (const_int 11)
18264 (const_int 12) (const_int 13)
18265 (const_int 14) (const_int 15)]))
18266 (match_operand:V16QI 2 "nonimmediate_operand" "xm")))]
18267 "TARGET_AVX"
18268 "vinsert%~128\t{$0x1, %2, %1, %0|%0, %1, %2, 0x1}"
18269 [(set_attr "type" "sselog")
18270 (set_attr "prefix_extra" "1")
18271 (set_attr "length_immediate" "1")
18272 (set_attr "prefix" "vex")
18273 (set_attr "mode" "OI")])
18274
18275 (define_insn "<avx_avx2>_maskload<ssemodesuffix><avxsizesuffix>"
18276 [(set (match_operand:V48_AVX2 0 "register_operand" "=x")
18277 (unspec:V48_AVX2
18278 [(match_operand:<sseintvecmode> 2 "register_operand" "x")
18279 (match_operand:V48_AVX2 1 "memory_operand" "m")]
18280 UNSPEC_MASKMOV))]
18281 "TARGET_AVX"
18282 "v<sseintprefix>maskmov<ssemodesuffix>\t{%1, %2, %0|%0, %2, %1}"
18283 [(set_attr "type" "sselog1")
18284 (set_attr "prefix_extra" "1")
18285 (set_attr "prefix" "vex")
18286 (set_attr "btver2_decode" "vector")
18287 (set_attr "mode" "<sseinsnmode>")])
18288
18289 (define_insn "<avx_avx2>_maskstore<ssemodesuffix><avxsizesuffix>"
18290 [(set (match_operand:V48_AVX2 0 "memory_operand" "+m")
18291 (unspec:V48_AVX2
18292 [(match_operand:<sseintvecmode> 1 "register_operand" "x")
18293 (match_operand:V48_AVX2 2 "register_operand" "x")
18294 (match_dup 0)]
18295 UNSPEC_MASKMOV))]
18296 "TARGET_AVX"
18297 "v<sseintprefix>maskmov<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
18298 [(set_attr "type" "sselog1")
18299 (set_attr "prefix_extra" "1")
18300 (set_attr "prefix" "vex")
18301 (set_attr "btver2_decode" "vector")
18302 (set_attr "mode" "<sseinsnmode>")])
18303
18304 (define_expand "maskload<mode><sseintvecmodelower>"
18305 [(set (match_operand:V48_AVX2 0 "register_operand")
18306 (unspec:V48_AVX2
18307 [(match_operand:<sseintvecmode> 2 "register_operand")
18308 (match_operand:V48_AVX2 1 "memory_operand")]
18309 UNSPEC_MASKMOV))]
18310 "TARGET_AVX")
18311
18312 (define_expand "maskload<mode><avx512fmaskmodelower>"
18313 [(set (match_operand:V48_AVX512VL 0 "register_operand")
18314 (vec_merge:V48_AVX512VL
18315 (match_operand:V48_AVX512VL 1 "memory_operand")
18316 (match_dup 0)
18317 (match_operand:<avx512fmaskmode> 2 "register_operand")))]
18318 "TARGET_AVX512F")
18319
18320 (define_expand "maskload<mode><avx512fmaskmodelower>"
18321 [(set (match_operand:VI12_AVX512VL 0 "register_operand")
18322 (vec_merge:VI12_AVX512VL
18323 (match_operand:VI12_AVX512VL 1 "memory_operand")
18324 (match_dup 0)
18325 (match_operand:<avx512fmaskmode> 2 "register_operand")))]
18326 "TARGET_AVX512BW")
18327
18328 (define_expand "maskstore<mode><sseintvecmodelower>"
18329 [(set (match_operand:V48_AVX2 0 "memory_operand")
18330 (unspec:V48_AVX2
18331 [(match_operand:<sseintvecmode> 2 "register_operand")
18332 (match_operand:V48_AVX2 1 "register_operand")
18333 (match_dup 0)]
18334 UNSPEC_MASKMOV))]
18335 "TARGET_AVX")
18336
18337 (define_expand "maskstore<mode><avx512fmaskmodelower>"
18338 [(set (match_operand:V48_AVX512VL 0 "memory_operand")
18339 (vec_merge:V48_AVX512VL
18340 (match_operand:V48_AVX512VL 1 "register_operand")
18341 (match_dup 0)
18342 (match_operand:<avx512fmaskmode> 2 "register_operand")))]
18343 "TARGET_AVX512F")
18344
18345 (define_expand "maskstore<mode><avx512fmaskmodelower>"
18346 [(set (match_operand:VI12_AVX512VL 0 "memory_operand")
18347 (vec_merge:VI12_AVX512VL
18348 (match_operand:VI12_AVX512VL 1 "register_operand")
18349 (match_dup 0)
18350 (match_operand:<avx512fmaskmode> 2 "register_operand")))]
18351 "TARGET_AVX512BW")
18352
18353 (define_insn_and_split "avx_<castmode><avxsizesuffix>_<castmode>"
18354 [(set (match_operand:AVX256MODE2P 0 "nonimmediate_operand" "=x,m")
18355 (unspec:AVX256MODE2P
18356 [(match_operand:<ssehalfvecmode> 1 "nonimmediate_operand" "xm,x")]
18357 UNSPEC_CAST))]
18358 "TARGET_AVX"
18359 "#"
18360 "&& reload_completed"
18361 [(const_int 0)]
18362 {
18363 rtx op0 = operands[0];
18364 rtx op1 = operands[1];
18365 if (REG_P (op0))
18366 op0 = gen_rtx_REG (<ssehalfvecmode>mode, REGNO (op0));
18367 else
18368 op1 = gen_rtx_REG (<MODE>mode, REGNO (op1));
18369 emit_move_insn (op0, op1);
18370 DONE;
18371 })
18372
18373 (define_expand "vec_init<mode>"
18374 [(match_operand:V_256 0 "register_operand")
18375 (match_operand 1)]
18376 "TARGET_AVX"
18377 {
18378 ix86_expand_vector_init (false, operands[0], operands[1]);
18379 DONE;
18380 })
18381
18382 (define_expand "vec_init<mode>"
18383 [(match_operand:VF48_I1248 0 "register_operand")
18384 (match_operand 1)]
18385 "TARGET_AVX512F"
18386 {
18387 ix86_expand_vector_init (false, operands[0], operands[1]);
18388 DONE;
18389 })
18390
18391 (define_insn "<avx2_avx512>_ashrv<mode><mask_name>"
18392 [(set (match_operand:VI48_AVX512F_AVX512VL 0 "register_operand" "=v")
18393 (ashiftrt:VI48_AVX512F_AVX512VL
18394 (match_operand:VI48_AVX512F_AVX512VL 1 "register_operand" "v")
18395 (match_operand:VI48_AVX512F_AVX512VL 2 "nonimmediate_operand" "vm")))]
18396 "TARGET_AVX2 && <mask_mode512bit_condition>"
18397 "vpsrav<ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
18398 [(set_attr "type" "sseishft")
18399 (set_attr "prefix" "maybe_evex")
18400 (set_attr "mode" "<sseinsnmode>")])
18401
18402 (define_insn "<avx2_avx512>_ashrv<mode><mask_name>"
18403 [(set (match_operand:VI2_AVX512VL 0 "register_operand" "=v")
18404 (ashiftrt:VI2_AVX512VL
18405 (match_operand:VI2_AVX512VL 1 "register_operand" "v")
18406 (match_operand:VI2_AVX512VL 2 "nonimmediate_operand" "vm")))]
18407 "TARGET_AVX512BW"
18408 "vpsravw\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
18409 [(set_attr "type" "sseishft")
18410 (set_attr "prefix" "maybe_evex")
18411 (set_attr "mode" "<sseinsnmode>")])
18412
18413 (define_insn "<avx2_avx512>_<shift_insn>v<mode><mask_name>"
18414 [(set (match_operand:VI48_AVX512F 0 "register_operand" "=v")
18415 (any_lshift:VI48_AVX512F
18416 (match_operand:VI48_AVX512F 1 "register_operand" "v")
18417 (match_operand:VI48_AVX512F 2 "nonimmediate_operand" "vm")))]
18418 "TARGET_AVX2 && <mask_mode512bit_condition>"
18419 "vp<vshift>v<ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
18420 [(set_attr "type" "sseishft")
18421 (set_attr "prefix" "maybe_evex")
18422 (set_attr "mode" "<sseinsnmode>")])
18423
18424 (define_insn "<avx2_avx512>_<shift_insn>v<mode><mask_name>"
18425 [(set (match_operand:VI2_AVX512VL 0 "register_operand" "=v")
18426 (any_lshift:VI2_AVX512VL
18427 (match_operand:VI2_AVX512VL 1 "register_operand" "v")
18428 (match_operand:VI2_AVX512VL 2 "nonimmediate_operand" "vm")))]
18429 "TARGET_AVX512BW"
18430 "vp<vshift>v<ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
18431 [(set_attr "type" "sseishft")
18432 (set_attr "prefix" "maybe_evex")
18433 (set_attr "mode" "<sseinsnmode>")])
18434
18435 (define_insn "avx_vec_concat<mode>"
18436 [(set (match_operand:V_256_512 0 "register_operand" "=x,x")
18437 (vec_concat:V_256_512
18438 (match_operand:<ssehalfvecmode> 1 "register_operand" "x,x")
18439 (match_operand:<ssehalfvecmode> 2 "vector_move_operand" "xm,C")))]
18440 "TARGET_AVX"
18441 {
18442 switch (which_alternative)
18443 {
18444 case 0:
18445 return "vinsert<i128>\t{$0x1, %2, %<concat_tg_mode>1, %0|%0, %<concat_tg_mode>1, %2, 0x1}";
18446 case 1:
18447 switch (get_attr_mode (insn))
18448 {
18449 case MODE_V16SF:
18450 return "vmovaps\t{%1, %t0|%t0, %1}";
18451 case MODE_V8DF:
18452 return "vmovapd\t{%1, %t0|%t0, %1}";
18453 case MODE_V8SF:
18454 return "vmovaps\t{%1, %x0|%x0, %1}";
18455 case MODE_V4DF:
18456 return "vmovapd\t{%1, %x0|%x0, %1}";
18457 case MODE_XI:
18458 return "vmovdqa\t{%1, %t0|%t0, %1}";
18459 case MODE_OI:
18460 return "vmovdqa\t{%1, %x0|%x0, %1}";
18461 default:
18462 gcc_unreachable ();
18463 }
18464 default:
18465 gcc_unreachable ();
18466 }
18467 }
18468 [(set_attr "type" "sselog,ssemov")
18469 (set_attr "prefix_extra" "1,*")
18470 (set_attr "length_immediate" "1,*")
18471 (set_attr "prefix" "maybe_evex")
18472 (set_attr "mode" "<sseinsnmode>")])
18473
18474 (define_insn "vcvtph2ps<mask_name>"
18475 [(set (match_operand:V4SF 0 "register_operand" "=v")
18476 (vec_select:V4SF
18477 (unspec:V8SF [(match_operand:V8HI 1 "register_operand" "v")]
18478 UNSPEC_VCVTPH2PS)
18479 (parallel [(const_int 0) (const_int 1)
18480 (const_int 2) (const_int 3)])))]
18481 "TARGET_F16C || TARGET_AVX512VL"
18482 "vcvtph2ps\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
18483 [(set_attr "type" "ssecvt")
18484 (set_attr "prefix" "maybe_evex")
18485 (set_attr "mode" "V4SF")])
18486
18487 (define_insn "*vcvtph2ps_load<mask_name>"
18488 [(set (match_operand:V4SF 0 "register_operand" "=v")
18489 (unspec:V4SF [(match_operand:V4HI 1 "memory_operand" "m")]
18490 UNSPEC_VCVTPH2PS))]
18491 "TARGET_F16C || TARGET_AVX512VL"
18492 "vcvtph2ps\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
18493 [(set_attr "type" "ssecvt")
18494 (set_attr "prefix" "vex")
18495 (set_attr "mode" "V8SF")])
18496
18497 (define_insn "vcvtph2ps256<mask_name>"
18498 [(set (match_operand:V8SF 0 "register_operand" "=v")
18499 (unspec:V8SF [(match_operand:V8HI 1 "nonimmediate_operand" "vm")]
18500 UNSPEC_VCVTPH2PS))]
18501 "TARGET_F16C || TARGET_AVX512VL"
18502 "vcvtph2ps\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
18503 [(set_attr "type" "ssecvt")
18504 (set_attr "prefix" "vex")
18505 (set_attr "btver2_decode" "double")
18506 (set_attr "mode" "V8SF")])
18507
18508 (define_insn "<mask_codefor>avx512f_vcvtph2ps512<mask_name><round_saeonly_name>"
18509 [(set (match_operand:V16SF 0 "register_operand" "=v")
18510 (unspec:V16SF
18511 [(match_operand:V16HI 1 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")]
18512 UNSPEC_VCVTPH2PS))]
18513 "TARGET_AVX512F"
18514 "vcvtph2ps\t{<round_saeonly_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_saeonly_mask_op2>}"
18515 [(set_attr "type" "ssecvt")
18516 (set_attr "prefix" "evex")
18517 (set_attr "mode" "V16SF")])
18518
18519 (define_expand "vcvtps2ph_mask"
18520 [(set (match_operand:V8HI 0 "register_operand")
18521 (vec_merge:V8HI
18522 (vec_concat:V8HI
18523 (unspec:V4HI [(match_operand:V4SF 1 "register_operand")
18524 (match_operand:SI 2 "const_0_to_255_operand")]
18525 UNSPEC_VCVTPS2PH)
18526 (match_dup 5))
18527 (match_operand:V8HI 3 "vector_move_operand")
18528 (match_operand:QI 4 "register_operand")))]
18529 "TARGET_AVX512VL"
18530 "operands[5] = CONST0_RTX (V4HImode);")
18531
18532 (define_expand "vcvtps2ph"
18533 [(set (match_operand:V8HI 0 "register_operand")
18534 (vec_concat:V8HI
18535 (unspec:V4HI [(match_operand:V4SF 1 "register_operand")
18536 (match_operand:SI 2 "const_0_to_255_operand")]
18537 UNSPEC_VCVTPS2PH)
18538 (match_dup 3)))]
18539 "TARGET_F16C"
18540 "operands[3] = CONST0_RTX (V4HImode);")
18541
18542 (define_insn "*vcvtps2ph<mask_name>"
18543 [(set (match_operand:V8HI 0 "register_operand" "=v")
18544 (vec_concat:V8HI
18545 (unspec:V4HI [(match_operand:V4SF 1 "register_operand" "v")
18546 (match_operand:SI 2 "const_0_to_255_operand" "N")]
18547 UNSPEC_VCVTPS2PH)
18548 (match_operand:V4HI 3 "const0_operand")))]
18549 "(TARGET_F16C || TARGET_AVX512VL) && <mask_avx512vl_condition>"
18550 "vcvtps2ph\t{%2, %1, %0<mask_operand4>|%0<mask_operand4>, %1, %2}"
18551 [(set_attr "type" "ssecvt")
18552 (set_attr "prefix" "maybe_evex")
18553 (set_attr "mode" "V4SF")])
18554
18555 (define_insn "*vcvtps2ph_store<mask_name>"
18556 [(set (match_operand:V4HI 0 "memory_operand" "=m")
18557 (unspec:V4HI [(match_operand:V4SF 1 "register_operand" "x")
18558 (match_operand:SI 2 "const_0_to_255_operand" "N")]
18559 UNSPEC_VCVTPS2PH))]
18560 "TARGET_F16C || TARGET_AVX512VL"
18561 "vcvtps2ph\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
18562 [(set_attr "type" "ssecvt")
18563 (set_attr "prefix" "maybe_evex")
18564 (set_attr "mode" "V4SF")])
18565
18566 (define_insn "vcvtps2ph256<mask_name>"
18567 [(set (match_operand:V8HI 0 "nonimmediate_operand" "=xm")
18568 (unspec:V8HI [(match_operand:V8SF 1 "register_operand" "x")
18569 (match_operand:SI 2 "const_0_to_255_operand" "N")]
18570 UNSPEC_VCVTPS2PH))]
18571 "TARGET_F16C || TARGET_AVX512VL"
18572 "vcvtps2ph\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
18573 [(set_attr "type" "ssecvt")
18574 (set_attr "prefix" "maybe_evex")
18575 (set_attr "btver2_decode" "vector")
18576 (set_attr "mode" "V8SF")])
18577
18578 (define_insn "<mask_codefor>avx512f_vcvtps2ph512<mask_name>"
18579 [(set (match_operand:V16HI 0 "nonimmediate_operand" "=vm")
18580 (unspec:V16HI
18581 [(match_operand:V16SF 1 "register_operand" "v")
18582 (match_operand:SI 2 "const_0_to_255_operand" "N")]
18583 UNSPEC_VCVTPS2PH))]
18584 "TARGET_AVX512F"
18585 "vcvtps2ph\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
18586 [(set_attr "type" "ssecvt")
18587 (set_attr "prefix" "evex")
18588 (set_attr "mode" "V16SF")])
18589
18590 ;; For gather* insn patterns
18591 (define_mode_iterator VEC_GATHER_MODE
18592 [V2DI V2DF V4DI V4DF V4SI V4SF V8SI V8SF])
18593 (define_mode_attr VEC_GATHER_IDXSI
18594 [(V2DI "V4SI") (V4DI "V4SI") (V8DI "V8SI")
18595 (V2DF "V4SI") (V4DF "V4SI") (V8DF "V8SI")
18596 (V4SI "V4SI") (V8SI "V8SI") (V16SI "V16SI")
18597 (V4SF "V4SI") (V8SF "V8SI") (V16SF "V16SI")])
18598
18599 (define_mode_attr VEC_GATHER_IDXDI
18600 [(V2DI "V2DI") (V4DI "V4DI") (V8DI "V8DI")
18601 (V2DF "V2DI") (V4DF "V4DI") (V8DF "V8DI")
18602 (V4SI "V2DI") (V8SI "V4DI") (V16SI "V8DI")
18603 (V4SF "V2DI") (V8SF "V4DI") (V16SF "V8DI")])
18604
18605 (define_mode_attr VEC_GATHER_SRCDI
18606 [(V2DI "V2DI") (V4DI "V4DI") (V8DI "V8DI")
18607 (V2DF "V2DF") (V4DF "V4DF") (V8DF "V8DF")
18608 (V4SI "V4SI") (V8SI "V4SI") (V16SI "V8SI")
18609 (V4SF "V4SF") (V8SF "V4SF") (V16SF "V8SF")])
18610
18611 (define_expand "avx2_gathersi<mode>"
18612 [(parallel [(set (match_operand:VEC_GATHER_MODE 0 "register_operand")
18613 (unspec:VEC_GATHER_MODE
18614 [(match_operand:VEC_GATHER_MODE 1 "register_operand")
18615 (mem:<ssescalarmode>
18616 (match_par_dup 7
18617 [(match_operand 2 "vsib_address_operand")
18618 (match_operand:<VEC_GATHER_IDXSI>
18619 3 "register_operand")
18620 (match_operand:SI 5 "const1248_operand ")]))
18621 (mem:BLK (scratch))
18622 (match_operand:VEC_GATHER_MODE 4 "register_operand")]
18623 UNSPEC_GATHER))
18624 (clobber (match_scratch:VEC_GATHER_MODE 6))])]
18625 "TARGET_AVX2"
18626 {
18627 operands[7]
18628 = gen_rtx_UNSPEC (Pmode, gen_rtvec (3, operands[2], operands[3],
18629 operands[5]), UNSPEC_VSIBADDR);
18630 })
18631
18632 (define_insn "*avx2_gathersi<mode>"
18633 [(set (match_operand:VEC_GATHER_MODE 0 "register_operand" "=&x")
18634 (unspec:VEC_GATHER_MODE
18635 [(match_operand:VEC_GATHER_MODE 2 "register_operand" "0")
18636 (match_operator:<ssescalarmode> 7 "vsib_mem_operator"
18637 [(unspec:P
18638 [(match_operand:P 3 "vsib_address_operand" "Tv")
18639 (match_operand:<VEC_GATHER_IDXSI> 4 "register_operand" "x")
18640 (match_operand:SI 6 "const1248_operand" "n")]
18641 UNSPEC_VSIBADDR)])
18642 (mem:BLK (scratch))
18643 (match_operand:VEC_GATHER_MODE 5 "register_operand" "1")]
18644 UNSPEC_GATHER))
18645 (clobber (match_scratch:VEC_GATHER_MODE 1 "=&x"))]
18646 "TARGET_AVX2"
18647 "v<sseintprefix>gatherd<ssemodesuffix>\t{%1, %7, %0|%0, %7, %1}"
18648 [(set_attr "type" "ssemov")
18649 (set_attr "prefix" "vex")
18650 (set_attr "mode" "<sseinsnmode>")])
18651
18652 (define_insn "*avx2_gathersi<mode>_2"
18653 [(set (match_operand:VEC_GATHER_MODE 0 "register_operand" "=&x")
18654 (unspec:VEC_GATHER_MODE
18655 [(pc)
18656 (match_operator:<ssescalarmode> 6 "vsib_mem_operator"
18657 [(unspec:P
18658 [(match_operand:P 2 "vsib_address_operand" "Tv")
18659 (match_operand:<VEC_GATHER_IDXSI> 3 "register_operand" "x")
18660 (match_operand:SI 5 "const1248_operand" "n")]
18661 UNSPEC_VSIBADDR)])
18662 (mem:BLK (scratch))
18663 (match_operand:VEC_GATHER_MODE 4 "register_operand" "1")]
18664 UNSPEC_GATHER))
18665 (clobber (match_scratch:VEC_GATHER_MODE 1 "=&x"))]
18666 "TARGET_AVX2"
18667 "v<sseintprefix>gatherd<ssemodesuffix>\t{%1, %6, %0|%0, %6, %1}"
18668 [(set_attr "type" "ssemov")
18669 (set_attr "prefix" "vex")
18670 (set_attr "mode" "<sseinsnmode>")])
18671
18672 (define_expand "avx2_gatherdi<mode>"
18673 [(parallel [(set (match_operand:VEC_GATHER_MODE 0 "register_operand")
18674 (unspec:VEC_GATHER_MODE
18675 [(match_operand:<VEC_GATHER_SRCDI> 1 "register_operand")
18676 (mem:<ssescalarmode>
18677 (match_par_dup 7
18678 [(match_operand 2 "vsib_address_operand")
18679 (match_operand:<VEC_GATHER_IDXDI>
18680 3 "register_operand")
18681 (match_operand:SI 5 "const1248_operand ")]))
18682 (mem:BLK (scratch))
18683 (match_operand:<VEC_GATHER_SRCDI> 4 "register_operand")]
18684 UNSPEC_GATHER))
18685 (clobber (match_scratch:VEC_GATHER_MODE 6))])]
18686 "TARGET_AVX2"
18687 {
18688 operands[7]
18689 = gen_rtx_UNSPEC (Pmode, gen_rtvec (3, operands[2], operands[3],
18690 operands[5]), UNSPEC_VSIBADDR);
18691 })
18692
18693 (define_insn "*avx2_gatherdi<mode>"
18694 [(set (match_operand:VEC_GATHER_MODE 0 "register_operand" "=&x")
18695 (unspec:VEC_GATHER_MODE
18696 [(match_operand:<VEC_GATHER_SRCDI> 2 "register_operand" "0")
18697 (match_operator:<ssescalarmode> 7 "vsib_mem_operator"
18698 [(unspec:P
18699 [(match_operand:P 3 "vsib_address_operand" "Tv")
18700 (match_operand:<VEC_GATHER_IDXDI> 4 "register_operand" "x")
18701 (match_operand:SI 6 "const1248_operand" "n")]
18702 UNSPEC_VSIBADDR)])
18703 (mem:BLK (scratch))
18704 (match_operand:<VEC_GATHER_SRCDI> 5 "register_operand" "1")]
18705 UNSPEC_GATHER))
18706 (clobber (match_scratch:VEC_GATHER_MODE 1 "=&x"))]
18707 "TARGET_AVX2"
18708 "v<sseintprefix>gatherq<ssemodesuffix>\t{%5, %7, %2|%2, %7, %5}"
18709 [(set_attr "type" "ssemov")
18710 (set_attr "prefix" "vex")
18711 (set_attr "mode" "<sseinsnmode>")])
18712
18713 (define_insn "*avx2_gatherdi<mode>_2"
18714 [(set (match_operand:VEC_GATHER_MODE 0 "register_operand" "=&x")
18715 (unspec:VEC_GATHER_MODE
18716 [(pc)
18717 (match_operator:<ssescalarmode> 6 "vsib_mem_operator"
18718 [(unspec:P
18719 [(match_operand:P 2 "vsib_address_operand" "Tv")
18720 (match_operand:<VEC_GATHER_IDXDI> 3 "register_operand" "x")
18721 (match_operand:SI 5 "const1248_operand" "n")]
18722 UNSPEC_VSIBADDR)])
18723 (mem:BLK (scratch))
18724 (match_operand:<VEC_GATHER_SRCDI> 4 "register_operand" "1")]
18725 UNSPEC_GATHER))
18726 (clobber (match_scratch:VEC_GATHER_MODE 1 "=&x"))]
18727 "TARGET_AVX2"
18728 {
18729 if (<MODE>mode != <VEC_GATHER_SRCDI>mode)
18730 return "v<sseintprefix>gatherq<ssemodesuffix>\t{%4, %6, %x0|%x0, %6, %4}";
18731 return "v<sseintprefix>gatherq<ssemodesuffix>\t{%4, %6, %0|%0, %6, %4}";
18732 }
18733 [(set_attr "type" "ssemov")
18734 (set_attr "prefix" "vex")
18735 (set_attr "mode" "<sseinsnmode>")])
18736
18737 (define_insn "*avx2_gatherdi<mode>_3"
18738 [(set (match_operand:<VEC_GATHER_SRCDI> 0 "register_operand" "=&x")
18739 (vec_select:<VEC_GATHER_SRCDI>
18740 (unspec:VI4F_256
18741 [(match_operand:<VEC_GATHER_SRCDI> 2 "register_operand" "0")
18742 (match_operator:<ssescalarmode> 7 "vsib_mem_operator"
18743 [(unspec:P
18744 [(match_operand:P 3 "vsib_address_operand" "Tv")
18745 (match_operand:<VEC_GATHER_IDXDI> 4 "register_operand" "x")
18746 (match_operand:SI 6 "const1248_operand" "n")]
18747 UNSPEC_VSIBADDR)])
18748 (mem:BLK (scratch))
18749 (match_operand:<VEC_GATHER_SRCDI> 5 "register_operand" "1")]
18750 UNSPEC_GATHER)
18751 (parallel [(const_int 0) (const_int 1)
18752 (const_int 2) (const_int 3)])))
18753 (clobber (match_scratch:VI4F_256 1 "=&x"))]
18754 "TARGET_AVX2"
18755 "v<sseintprefix>gatherq<ssemodesuffix>\t{%5, %7, %0|%0, %7, %5}"
18756 [(set_attr "type" "ssemov")
18757 (set_attr "prefix" "vex")
18758 (set_attr "mode" "<sseinsnmode>")])
18759
18760 (define_insn "*avx2_gatherdi<mode>_4"
18761 [(set (match_operand:<VEC_GATHER_SRCDI> 0 "register_operand" "=&x")
18762 (vec_select:<VEC_GATHER_SRCDI>
18763 (unspec:VI4F_256
18764 [(pc)
18765 (match_operator:<ssescalarmode> 6 "vsib_mem_operator"
18766 [(unspec:P
18767 [(match_operand:P 2 "vsib_address_operand" "Tv")
18768 (match_operand:<VEC_GATHER_IDXDI> 3 "register_operand" "x")
18769 (match_operand:SI 5 "const1248_operand" "n")]
18770 UNSPEC_VSIBADDR)])
18771 (mem:BLK (scratch))
18772 (match_operand:<VEC_GATHER_SRCDI> 4 "register_operand" "1")]
18773 UNSPEC_GATHER)
18774 (parallel [(const_int 0) (const_int 1)
18775 (const_int 2) (const_int 3)])))
18776 (clobber (match_scratch:VI4F_256 1 "=&x"))]
18777 "TARGET_AVX2"
18778 "v<sseintprefix>gatherq<ssemodesuffix>\t{%4, %6, %0|%0, %6, %4}"
18779 [(set_attr "type" "ssemov")
18780 (set_attr "prefix" "vex")
18781 (set_attr "mode" "<sseinsnmode>")])
18782
18783 (define_expand "<avx512>_gathersi<mode>"
18784 [(parallel [(set (match_operand:VI48F 0 "register_operand")
18785 (unspec:VI48F
18786 [(match_operand:VI48F 1 "register_operand")
18787 (match_operand:<avx512fmaskmode> 4 "register_operand")
18788 (mem:<ssescalarmode>
18789 (match_par_dup 6
18790 [(match_operand 2 "vsib_address_operand")
18791 (match_operand:<VEC_GATHER_IDXSI> 3 "register_operand")
18792 (match_operand:SI 5 "const1248_operand")]))]
18793 UNSPEC_GATHER))
18794 (clobber (match_scratch:<avx512fmaskmode> 7))])]
18795 "TARGET_AVX512F"
18796 {
18797 operands[6]
18798 = gen_rtx_UNSPEC (Pmode, gen_rtvec (3, operands[2], operands[3],
18799 operands[5]), UNSPEC_VSIBADDR);
18800 })
18801
18802 (define_insn "*avx512f_gathersi<mode>"
18803 [(set (match_operand:VI48F 0 "register_operand" "=&v")
18804 (unspec:VI48F
18805 [(match_operand:VI48F 1 "register_operand" "0")
18806 (match_operand:<avx512fmaskmode> 7 "register_operand" "2")
18807 (match_operator:<ssescalarmode> 6 "vsib_mem_operator"
18808 [(unspec:P
18809 [(match_operand:P 4 "vsib_address_operand" "Tv")
18810 (match_operand:<VEC_GATHER_IDXSI> 3 "register_operand" "v")
18811 (match_operand:SI 5 "const1248_operand" "n")]
18812 UNSPEC_VSIBADDR)])]
18813 UNSPEC_GATHER))
18814 (clobber (match_scratch:<avx512fmaskmode> 2 "=&Yk"))]
18815 "TARGET_AVX512F"
18816 "v<sseintprefix>gatherd<ssemodesuffix>\t{%6, %0%{%2%}|%0%{%2%}, %g6}"
18817 [(set_attr "type" "ssemov")
18818 (set_attr "prefix" "evex")
18819 (set_attr "mode" "<sseinsnmode>")])
18820
18821 (define_insn "*avx512f_gathersi<mode>_2"
18822 [(set (match_operand:VI48F 0 "register_operand" "=&v")
18823 (unspec:VI48F
18824 [(pc)
18825 (match_operand:<avx512fmaskmode> 6 "register_operand" "1")
18826 (match_operator:<ssescalarmode> 5 "vsib_mem_operator"
18827 [(unspec:P
18828 [(match_operand:P 3 "vsib_address_operand" "Tv")
18829 (match_operand:<VEC_GATHER_IDXSI> 2 "register_operand" "v")
18830 (match_operand:SI 4 "const1248_operand" "n")]
18831 UNSPEC_VSIBADDR)])]
18832 UNSPEC_GATHER))
18833 (clobber (match_scratch:<avx512fmaskmode> 1 "=&Yk"))]
18834 "TARGET_AVX512F"
18835 "v<sseintprefix>gatherd<ssemodesuffix>\t{%5, %0%{%1%}|%0%{%1%}, %g5}"
18836 [(set_attr "type" "ssemov")
18837 (set_attr "prefix" "evex")
18838 (set_attr "mode" "<sseinsnmode>")])
18839
18840
18841 (define_expand "<avx512>_gatherdi<mode>"
18842 [(parallel [(set (match_operand:VI48F 0 "register_operand")
18843 (unspec:VI48F
18844 [(match_operand:<VEC_GATHER_SRCDI> 1 "register_operand")
18845 (match_operand:QI 4 "register_operand")
18846 (mem:<ssescalarmode>
18847 (match_par_dup 6
18848 [(match_operand 2 "vsib_address_operand")
18849 (match_operand:<VEC_GATHER_IDXDI> 3 "register_operand")
18850 (match_operand:SI 5 "const1248_operand")]))]
18851 UNSPEC_GATHER))
18852 (clobber (match_scratch:QI 7))])]
18853 "TARGET_AVX512F"
18854 {
18855 operands[6]
18856 = gen_rtx_UNSPEC (Pmode, gen_rtvec (3, operands[2], operands[3],
18857 operands[5]), UNSPEC_VSIBADDR);
18858 })
18859
18860 (define_insn "*avx512f_gatherdi<mode>"
18861 [(set (match_operand:VI48F 0 "register_operand" "=&v")
18862 (unspec:VI48F
18863 [(match_operand:<VEC_GATHER_SRCDI> 1 "register_operand" "0")
18864 (match_operand:QI 7 "register_operand" "2")
18865 (match_operator:<ssescalarmode> 6 "vsib_mem_operator"
18866 [(unspec:P
18867 [(match_operand:P 4 "vsib_address_operand" "Tv")
18868 (match_operand:<VEC_GATHER_IDXDI> 3 "register_operand" "v")
18869 (match_operand:SI 5 "const1248_operand" "n")]
18870 UNSPEC_VSIBADDR)])]
18871 UNSPEC_GATHER))
18872 (clobber (match_scratch:QI 2 "=&Yk"))]
18873 "TARGET_AVX512F"
18874 "v<sseintprefix>gatherq<ssemodesuffix>\t{%6, %1%{%2%}|%1%{%2%}, %g6}"
18875 [(set_attr "type" "ssemov")
18876 (set_attr "prefix" "evex")
18877 (set_attr "mode" "<sseinsnmode>")])
18878
18879 (define_insn "*avx512f_gatherdi<mode>_2"
18880 [(set (match_operand:VI48F 0 "register_operand" "=&v")
18881 (unspec:VI48F
18882 [(pc)
18883 (match_operand:QI 6 "register_operand" "1")
18884 (match_operator:<ssescalarmode> 5 "vsib_mem_operator"
18885 [(unspec:P
18886 [(match_operand:P 3 "vsib_address_operand" "Tv")
18887 (match_operand:<VEC_GATHER_IDXDI> 2 "register_operand" "v")
18888 (match_operand:SI 4 "const1248_operand" "n")]
18889 UNSPEC_VSIBADDR)])]
18890 UNSPEC_GATHER))
18891 (clobber (match_scratch:QI 1 "=&Yk"))]
18892 "TARGET_AVX512F"
18893 {
18894 if (<MODE>mode != <VEC_GATHER_SRCDI>mode)
18895 {
18896 if (<MODE_SIZE> != 64)
18897 return "v<sseintprefix>gatherq<ssemodesuffix>\t{%5, %x0%{%1%}|%t0%{%1%}, %g5}";
18898 else
18899 return "v<sseintprefix>gatherq<ssemodesuffix>\t{%5, %t0%{%1%}|%t0%{%1%}, %g5}";
18900 }
18901 return "v<sseintprefix>gatherq<ssemodesuffix>\t{%5, %0%{%1%}|%0%{%1%}, %g5}";
18902 }
18903 [(set_attr "type" "ssemov")
18904 (set_attr "prefix" "evex")
18905 (set_attr "mode" "<sseinsnmode>")])
18906
18907 (define_expand "<avx512>_scattersi<mode>"
18908 [(parallel [(set (mem:VI48F
18909 (match_par_dup 5
18910 [(match_operand 0 "vsib_address_operand")
18911 (match_operand:<VEC_GATHER_IDXSI> 2 "register_operand")
18912 (match_operand:SI 4 "const1248_operand")]))
18913 (unspec:VI48F
18914 [(match_operand:<avx512fmaskmode> 1 "register_operand")
18915 (match_operand:VI48F 3 "register_operand")]
18916 UNSPEC_SCATTER))
18917 (clobber (match_scratch:<avx512fmaskmode> 6))])]
18918 "TARGET_AVX512F"
18919 {
18920 operands[5]
18921 = gen_rtx_UNSPEC (Pmode, gen_rtvec (3, operands[0], operands[2],
18922 operands[4]), UNSPEC_VSIBADDR);
18923 })
18924
18925 (define_insn "*avx512f_scattersi<mode>"
18926 [(set (match_operator:VI48F 5 "vsib_mem_operator"
18927 [(unspec:P
18928 [(match_operand:P 0 "vsib_address_operand" "Tv")
18929 (match_operand:<VEC_GATHER_IDXSI> 2 "register_operand" "v")
18930 (match_operand:SI 4 "const1248_operand" "n")]
18931 UNSPEC_VSIBADDR)])
18932 (unspec:VI48F
18933 [(match_operand:<avx512fmaskmode> 6 "register_operand" "1")
18934 (match_operand:VI48F 3 "register_operand" "v")]
18935 UNSPEC_SCATTER))
18936 (clobber (match_scratch:<avx512fmaskmode> 1 "=&Yk"))]
18937 "TARGET_AVX512F"
18938 "v<sseintprefix>scatterd<ssemodesuffix>\t{%3, %5%{%1%}|%5%{%1%}, %3}"
18939 [(set_attr "type" "ssemov")
18940 (set_attr "prefix" "evex")
18941 (set_attr "mode" "<sseinsnmode>")])
18942
18943 (define_expand "<avx512>_scatterdi<mode>"
18944 [(parallel [(set (mem:VI48F
18945 (match_par_dup 5
18946 [(match_operand 0 "vsib_address_operand")
18947 (match_operand:<VEC_GATHER_IDXDI> 2 "register_operand")
18948 (match_operand:SI 4 "const1248_operand")]))
18949 (unspec:VI48F
18950 [(match_operand:QI 1 "register_operand")
18951 (match_operand:<VEC_GATHER_SRCDI> 3 "register_operand")]
18952 UNSPEC_SCATTER))
18953 (clobber (match_scratch:QI 6))])]
18954 "TARGET_AVX512F"
18955 {
18956 operands[5]
18957 = gen_rtx_UNSPEC (Pmode, gen_rtvec (3, operands[0], operands[2],
18958 operands[4]), UNSPEC_VSIBADDR);
18959 })
18960
18961 (define_insn "*avx512f_scatterdi<mode>"
18962 [(set (match_operator:VI48F 5 "vsib_mem_operator"
18963 [(unspec:P
18964 [(match_operand:P 0 "vsib_address_operand" "Tv")
18965 (match_operand:<VEC_GATHER_IDXDI> 2 "register_operand" "v")
18966 (match_operand:SI 4 "const1248_operand" "n")]
18967 UNSPEC_VSIBADDR)])
18968 (unspec:VI48F
18969 [(match_operand:QI 6 "register_operand" "1")
18970 (match_operand:<VEC_GATHER_SRCDI> 3 "register_operand" "v")]
18971 UNSPEC_SCATTER))
18972 (clobber (match_scratch:QI 1 "=&Yk"))]
18973 "TARGET_AVX512F"
18974 "v<sseintprefix>scatterq<ssemodesuffix>\t{%3, %5%{%1%}|%5%{%1%}, %3}"
18975 [(set_attr "type" "ssemov")
18976 (set_attr "prefix" "evex")
18977 (set_attr "mode" "<sseinsnmode>")])
18978
18979 (define_insn "<avx512>_compress<mode>_mask"
18980 [(set (match_operand:VI48F 0 "register_operand" "=v")
18981 (unspec:VI48F
18982 [(match_operand:VI48F 1 "register_operand" "v")
18983 (match_operand:VI48F 2 "vector_move_operand" "0C")
18984 (match_operand:<avx512fmaskmode> 3 "register_operand" "Yk")]
18985 UNSPEC_COMPRESS))]
18986 "TARGET_AVX512F"
18987 "v<sseintprefix>compress<ssemodesuffix>\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}"
18988 [(set_attr "type" "ssemov")
18989 (set_attr "prefix" "evex")
18990 (set_attr "mode" "<sseinsnmode>")])
18991
18992 (define_insn "<avx512>_compressstore<mode>_mask"
18993 [(set (match_operand:VI48F 0 "memory_operand" "=m")
18994 (unspec:VI48F
18995 [(match_operand:VI48F 1 "register_operand" "x")
18996 (match_dup 0)
18997 (match_operand:<avx512fmaskmode> 2 "register_operand" "Yk")]
18998 UNSPEC_COMPRESS_STORE))]
18999 "TARGET_AVX512F"
19000 "v<sseintprefix>compress<ssemodesuffix>\t{%1, %0%{%2%}|%0%{%2%}, %1}"
19001 [(set_attr "type" "ssemov")
19002 (set_attr "prefix" "evex")
19003 (set_attr "memory" "store")
19004 (set_attr "mode" "<sseinsnmode>")])
19005
19006 (define_expand "<avx512>_expand<mode>_maskz"
19007 [(set (match_operand:VI48F 0 "register_operand")
19008 (unspec:VI48F
19009 [(match_operand:VI48F 1 "nonimmediate_operand")
19010 (match_operand:VI48F 2 "vector_move_operand")
19011 (match_operand:<avx512fmaskmode> 3 "register_operand")]
19012 UNSPEC_EXPAND))]
19013 "TARGET_AVX512F"
19014 "operands[2] = CONST0_RTX (<MODE>mode);")
19015
19016 (define_insn "<avx512>_expand<mode>_mask"
19017 [(set (match_operand:VI48F 0 "register_operand" "=v,v")
19018 (unspec:VI48F
19019 [(match_operand:VI48F 1 "nonimmediate_operand" "v,m")
19020 (match_operand:VI48F 2 "vector_move_operand" "0C,0C")
19021 (match_operand:<avx512fmaskmode> 3 "register_operand" "Yk,Yk")]
19022 UNSPEC_EXPAND))]
19023 "TARGET_AVX512F"
19024 "v<sseintprefix>expand<ssemodesuffix>\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}"
19025 [(set_attr "type" "ssemov")
19026 (set_attr "prefix" "evex")
19027 (set_attr "memory" "none,load")
19028 (set_attr "mode" "<sseinsnmode>")])
19029
19030 (define_insn "avx512dq_rangep<mode><mask_name><round_saeonly_name>"
19031 [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v")
19032 (unspec:VF_AVX512VL
19033 [(match_operand:VF_AVX512VL 1 "register_operand" "v")
19034 (match_operand:VF_AVX512VL 2 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")
19035 (match_operand:SI 3 "const_0_to_15_operand")]
19036 UNSPEC_RANGE))]
19037 "TARGET_AVX512DQ && <round_saeonly_mode512bit_condition>"
19038 "vrange<ssemodesuffix>\t{%3, <round_saeonly_mask_op4>%2, %1, %0<mask_operand4>|%0<mask_operand4>, %1, %2<round_saeonly_mask_op4>, %3}"
19039 [(set_attr "type" "sse")
19040 (set_attr "prefix" "evex")
19041 (set_attr "mode" "<MODE>")])
19042
19043 (define_insn "avx512dq_ranges<mode><round_saeonly_name>"
19044 [(set (match_operand:VF_128 0 "register_operand" "=v")
19045 (vec_merge:VF_128
19046 (unspec:VF_128
19047 [(match_operand:VF_128 1 "register_operand" "v")
19048 (match_operand:VF_128 2 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")
19049 (match_operand:SI 3 "const_0_to_15_operand")]
19050 UNSPEC_RANGE)
19051 (match_dup 1)
19052 (const_int 1)))]
19053 "TARGET_AVX512DQ"
19054 "vrange<ssescalarmodesuffix>\t{%3, <round_saeonly_op4>%2, %1, %0|%0, %1, %2<round_saeonly_op4>, %3}"
19055 [(set_attr "type" "sse")
19056 (set_attr "prefix" "evex")
19057 (set_attr "mode" "<MODE>")])
19058
19059 (define_insn "avx512dq_fpclass<mode><mask_scalar_merge_name>"
19060 [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=Yk")
19061 (unspec:<avx512fmaskmode>
19062 [(match_operand:VF_AVX512VL 1 "register_operand" "v")
19063 (match_operand:QI 2 "const_0_to_255_operand" "n")]
19064 UNSPEC_FPCLASS))]
19065 "TARGET_AVX512DQ"
19066 "vfpclass<ssemodesuffix>\t{%2, %1, %0<mask_scalar_merge_operand3>|%0<mask_scalar_merge_operand3>, %1, %2}";
19067 [(set_attr "type" "sse")
19068 (set_attr "length_immediate" "1")
19069 (set_attr "prefix" "evex")
19070 (set_attr "mode" "<MODE>")])
19071
19072 (define_insn "avx512dq_vmfpclass<mode>"
19073 [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=Yk")
19074 (and:<avx512fmaskmode>
19075 (unspec:<avx512fmaskmode>
19076 [(match_operand:VF_128 1 "register_operand" "v")
19077 (match_operand:QI 2 "const_0_to_255_operand" "n")]
19078 UNSPEC_FPCLASS)
19079 (const_int 1)))]
19080 "TARGET_AVX512DQ"
19081 "vfpclass<ssescalarmodesuffix>\t{%2, %1, %0|%0, %1, %2}";
19082 [(set_attr "type" "sse")
19083 (set_attr "length_immediate" "1")
19084 (set_attr "prefix" "evex")
19085 (set_attr "mode" "<MODE>")])
19086
19087 (define_insn "<avx512>_getmant<mode><mask_name><round_saeonly_name>"
19088 [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v")
19089 (unspec:VF_AVX512VL
19090 [(match_operand:VF_AVX512VL 1 "nonimmediate_operand" "<round_saeonly_constraint>")
19091 (match_operand:SI 2 "const_0_to_15_operand")]
19092 UNSPEC_GETMANT))]
19093 "TARGET_AVX512F"
19094 "vgetmant<ssemodesuffix>\t{%2, <round_saeonly_mask_op3>%1, %0<mask_operand3>|%0<mask_operand3>, %1<round_saeonly_mask_op3>, %2}";
19095 [(set_attr "prefix" "evex")
19096 (set_attr "mode" "<MODE>")])
19097
19098 (define_insn "avx512f_vgetmant<mode><round_saeonly_name>"
19099 [(set (match_operand:VF_128 0 "register_operand" "=v")
19100 (vec_merge:VF_128
19101 (unspec:VF_128
19102 [(match_operand:VF_128 1 "register_operand" "v")
19103 (match_operand:VF_128 2 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")
19104 (match_operand:SI 3 "const_0_to_15_operand")]
19105 UNSPEC_GETMANT)
19106 (match_dup 1)
19107 (const_int 1)))]
19108 "TARGET_AVX512F"
19109 "vgetmant<ssescalarmodesuffix>\t{%3, <round_saeonly_op4>%2, %1, %0|%0, %1, %2<round_saeonly_op4>, %3}";
19110 [(set_attr "prefix" "evex")
19111 (set_attr "mode" "<ssescalarmode>")])
19112
19113 ;; The correct representation for this is absolutely enormous, and
19114 ;; surely not generally useful.
19115 (define_insn "<mask_codefor>avx512bw_dbpsadbw<mode><mask_name>"
19116 [(set (match_operand:VI2_AVX512VL 0 "register_operand" "=v")
19117 (unspec:VI2_AVX512VL
19118 [(match_operand:<dbpsadbwmode> 1 "register_operand" "v")
19119 (match_operand:<dbpsadbwmode> 2 "nonimmediate_operand" "vm")
19120 (match_operand:SI 3 "const_0_to_255_operand")]
19121 UNSPEC_DBPSADBW))]
19122 "TARGET_AVX512BW"
19123 "vdbpsadbw\t{%3, %2, %1, %0<mask_operand4>|%0<mask_operand4>, %1, %2, %3}"
19124 [(set_attr "isa" "avx")
19125 (set_attr "type" "sselog1")
19126 (set_attr "length_immediate" "1")
19127 (set_attr "prefix" "evex")
19128 (set_attr "mode" "<sseinsnmode>")])
19129
19130 (define_insn "clz<mode>2<mask_name>"
19131 [(set (match_operand:VI48_AVX512VL 0 "register_operand" "=v")
19132 (clz:VI48_AVX512VL
19133 (match_operand:VI48_AVX512VL 1 "nonimmediate_operand" "vm")))]
19134 "TARGET_AVX512CD"
19135 "vplzcnt<ssemodesuffix>\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
19136 [(set_attr "type" "sse")
19137 (set_attr "prefix" "evex")
19138 (set_attr "mode" "<sseinsnmode>")])
19139
19140 (define_insn "<mask_codefor>conflict<mode><mask_name>"
19141 [(set (match_operand:VI48_AVX512VL 0 "register_operand" "=v")
19142 (unspec:VI48_AVX512VL
19143 [(match_operand:VI48_AVX512VL 1 "nonimmediate_operand" "vm")]
19144 UNSPEC_CONFLICT))]
19145 "TARGET_AVX512CD"
19146 "vpconflict<ssemodesuffix>\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
19147 [(set_attr "type" "sse")
19148 (set_attr "prefix" "evex")
19149 (set_attr "mode" "<sseinsnmode>")])
19150
19151 (define_insn "sha1msg1"
19152 [(set (match_operand:V4SI 0 "register_operand" "=x")
19153 (unspec:V4SI
19154 [(match_operand:V4SI 1 "register_operand" "0")
19155 (match_operand:V4SI 2 "nonimmediate_operand" "xm")]
19156 UNSPEC_SHA1MSG1))]
19157 "TARGET_SHA"
19158 "sha1msg1\t{%2, %0|%0, %2}"
19159 [(set_attr "type" "sselog1")
19160 (set_attr "mode" "TI")])
19161
19162 (define_insn "sha1msg2"
19163 [(set (match_operand:V4SI 0 "register_operand" "=x")
19164 (unspec:V4SI
19165 [(match_operand:V4SI 1 "register_operand" "0")
19166 (match_operand:V4SI 2 "nonimmediate_operand" "xm")]
19167 UNSPEC_SHA1MSG2))]
19168 "TARGET_SHA"
19169 "sha1msg2\t{%2, %0|%0, %2}"
19170 [(set_attr "type" "sselog1")
19171 (set_attr "mode" "TI")])
19172
19173 (define_insn "sha1nexte"
19174 [(set (match_operand:V4SI 0 "register_operand" "=x")
19175 (unspec:V4SI
19176 [(match_operand:V4SI 1 "register_operand" "0")
19177 (match_operand:V4SI 2 "nonimmediate_operand" "xm")]
19178 UNSPEC_SHA1NEXTE))]
19179 "TARGET_SHA"
19180 "sha1nexte\t{%2, %0|%0, %2}"
19181 [(set_attr "type" "sselog1")
19182 (set_attr "mode" "TI")])
19183
19184 (define_insn "sha1rnds4"
19185 [(set (match_operand:V4SI 0 "register_operand" "=x")
19186 (unspec:V4SI
19187 [(match_operand:V4SI 1 "register_operand" "0")
19188 (match_operand:V4SI 2 "nonimmediate_operand" "xm")
19189 (match_operand:SI 3 "const_0_to_3_operand" "n")]
19190 UNSPEC_SHA1RNDS4))]
19191 "TARGET_SHA"
19192 "sha1rnds4\t{%3, %2, %0|%0, %2, %3}"
19193 [(set_attr "type" "sselog1")
19194 (set_attr "length_immediate" "1")
19195 (set_attr "mode" "TI")])
19196
19197 (define_insn "sha256msg1"
19198 [(set (match_operand:V4SI 0 "register_operand" "=x")
19199 (unspec:V4SI
19200 [(match_operand:V4SI 1 "register_operand" "0")
19201 (match_operand:V4SI 2 "nonimmediate_operand" "xm")]
19202 UNSPEC_SHA256MSG1))]
19203 "TARGET_SHA"
19204 "sha256msg1\t{%2, %0|%0, %2}"
19205 [(set_attr "type" "sselog1")
19206 (set_attr "mode" "TI")])
19207
19208 (define_insn "sha256msg2"
19209 [(set (match_operand:V4SI 0 "register_operand" "=x")
19210 (unspec:V4SI
19211 [(match_operand:V4SI 1 "register_operand" "0")
19212 (match_operand:V4SI 2 "nonimmediate_operand" "xm")]
19213 UNSPEC_SHA256MSG2))]
19214 "TARGET_SHA"
19215 "sha256msg2\t{%2, %0|%0, %2}"
19216 [(set_attr "type" "sselog1")
19217 (set_attr "mode" "TI")])
19218
19219 (define_insn "sha256rnds2"
19220 [(set (match_operand:V4SI 0 "register_operand" "=x")
19221 (unspec:V4SI
19222 [(match_operand:V4SI 1 "register_operand" "0")
19223 (match_operand:V4SI 2 "nonimmediate_operand" "xm")
19224 (match_operand:V4SI 3 "register_operand" "Yz")]
19225 UNSPEC_SHA256RNDS2))]
19226 "TARGET_SHA"
19227 "sha256rnds2\t{%3, %2, %0|%0, %2, %3}"
19228 [(set_attr "type" "sselog1")
19229 (set_attr "length_immediate" "1")
19230 (set_attr "mode" "TI")])
19231
19232 (define_insn_and_split "avx512f_<castmode><avxsizesuffix>_<castmode>"
19233 [(set (match_operand:AVX512MODE2P 0 "nonimmediate_operand" "=x,m")
19234 (unspec:AVX512MODE2P
19235 [(match_operand:<ssequartermode> 1 "nonimmediate_operand" "xm,x")]
19236 UNSPEC_CAST))]
19237 "TARGET_AVX512F"
19238 "#"
19239 "&& reload_completed"
19240 [(const_int 0)]
19241 {
19242 rtx op0 = operands[0];
19243 rtx op1 = operands[1];
19244 if (REG_P (op0))
19245 op0 = gen_rtx_REG (<ssequartermode>mode, REGNO (op0));
19246 else
19247 op1 = gen_rtx_REG (<MODE>mode, REGNO (op1));
19248 emit_move_insn (op0, op1);
19249 DONE;
19250 })
19251
19252 (define_insn_and_split "avx512f_<castmode><avxsizesuffix>_256<castmode>"
19253 [(set (match_operand:AVX512MODE2P 0 "nonimmediate_operand" "=x,m")
19254 (unspec:AVX512MODE2P
19255 [(match_operand:<ssehalfvecmode> 1 "nonimmediate_operand" "xm,x")]
19256 UNSPEC_CAST))]
19257 "TARGET_AVX512F"
19258 "#"
19259 "&& reload_completed"
19260 [(const_int 0)]
19261 {
19262 rtx op0 = operands[0];
19263 rtx op1 = operands[1];
19264 if (REG_P (op0))
19265 op0 = gen_rtx_REG (<ssehalfvecmode>mode, REGNO (op0));
19266 else
19267 op1 = gen_rtx_REG (<MODE>mode, REGNO (op1));
19268 emit_move_insn (op0, op1);
19269 DONE;
19270 })
19271
19272 (define_int_iterator VPMADD52
19273 [UNSPEC_VPMADD52LUQ
19274 UNSPEC_VPMADD52HUQ])
19275
19276 (define_int_attr vpmadd52type
19277 [(UNSPEC_VPMADD52LUQ "luq") (UNSPEC_VPMADD52HUQ "huq")])
19278
19279 (define_expand "vpamdd52huq<mode>_maskz"
19280 [(match_operand:VI8_AVX512VL 0 "register_operand")
19281 (match_operand:VI8_AVX512VL 1 "register_operand")
19282 (match_operand:VI8_AVX512VL 2 "register_operand")
19283 (match_operand:VI8_AVX512VL 3 "nonimmediate_operand")
19284 (match_operand:<avx512fmaskmode> 4 "register_operand")]
19285 "TARGET_AVX512IFMA"
19286 {
19287 emit_insn (gen_vpamdd52huq<mode>_maskz_1 (
19288 operands[0], operands[1], operands[2], operands[3],
19289 CONST0_RTX (<MODE>mode), operands[4]));
19290 DONE;
19291 })
19292
19293 (define_expand "vpamdd52luq<mode>_maskz"
19294 [(match_operand:VI8_AVX512VL 0 "register_operand")
19295 (match_operand:VI8_AVX512VL 1 "register_operand")
19296 (match_operand:VI8_AVX512VL 2 "register_operand")
19297 (match_operand:VI8_AVX512VL 3 "nonimmediate_operand")
19298 (match_operand:<avx512fmaskmode> 4 "register_operand")]
19299 "TARGET_AVX512IFMA"
19300 {
19301 emit_insn (gen_vpamdd52luq<mode>_maskz_1 (
19302 operands[0], operands[1], operands[2], operands[3],
19303 CONST0_RTX (<MODE>mode), operands[4]));
19304 DONE;
19305 })
19306
19307 (define_insn "vpamdd52<vpmadd52type><mode><sd_maskz_name>"
19308 [(set (match_operand:VI8_AVX512VL 0 "register_operand" "=v")
19309 (unspec:VI8_AVX512VL
19310 [(match_operand:VI8_AVX512VL 1 "register_operand" "0")
19311 (match_operand:VI8_AVX512VL 2 "register_operand" "v")
19312 (match_operand:VI8_AVX512VL 3 "nonimmediate_operand" "vm")]
19313 VPMADD52))]
19314 "TARGET_AVX512IFMA"
19315 "vpmadd52<vpmadd52type>\t{%3, %2, %0<sd_mask_op4>|%0<sd_mask_op4>, %2, %3}"
19316 [(set_attr "type" "ssemuladd")
19317 (set_attr "prefix" "evex")
19318 (set_attr "mode" "<sseinsnmode>")])
19319
19320 (define_insn "vpamdd52<vpmadd52type><mode>_mask"
19321 [(set (match_operand:VI8_AVX512VL 0 "register_operand" "=v")
19322 (vec_merge:VI8_AVX512VL
19323 (unspec:VI8_AVX512VL
19324 [(match_operand:VI8_AVX512VL 1 "register_operand" "0")
19325 (match_operand:VI8_AVX512VL 2 "register_operand" "v")
19326 (match_operand:VI8_AVX512VL 3 "nonimmediate_operand" "vm")]
19327 VPMADD52)
19328 (match_dup 1)
19329 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")))]
19330 "TARGET_AVX512IFMA"
19331 "vpmadd52<vpmadd52type>\t{%3, %2, %0%{%4%}|%0%{%4%}, %2, %3}"
19332 [(set_attr "type" "ssemuladd")
19333 (set_attr "prefix" "evex")
19334 (set_attr "mode" "<sseinsnmode>")])
19335
19336 (define_insn "vpmultishiftqb<mode><mask_name>"
19337 [(set (match_operand:VI1_AVX512VL 0 "register_operand" "=v")
19338 (unspec:VI1_AVX512VL
19339 [(match_operand:VI1_AVX512VL 1 "register_operand" "v")
19340 (match_operand:VI1_AVX512VL 2 "nonimmediate_operand" "vm")]
19341 UNSPEC_VPMULTISHIFT))]
19342 "TARGET_AVX512VBMI"
19343 "vpmultishiftqb\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
19344 [(set_attr "type" "sselog")
19345 (set_attr "prefix" "evex")
19346 (set_attr "mode" "<sseinsnmode>")])