]> git.ipfire.org Git - thirdparty/gcc.git/blob - gcc/config/i386/sse.md
decl.c, [...]: Remove redundant enum from machine_mode.
[thirdparty/gcc.git] / gcc / config / i386 / sse.md
1 ;; GCC machine description for SSE instructions
2 ;; Copyright (C) 2005-2014 Free Software Foundation, Inc.
3 ;;
4 ;; This file is part of GCC.
5 ;;
6 ;; GCC is free software; you can redistribute it and/or modify
7 ;; it under the terms of the GNU General Public License as published by
8 ;; the Free Software Foundation; either version 3, or (at your option)
9 ;; any later version.
10 ;;
11 ;; GCC is distributed in the hope that it will be useful,
12 ;; but WITHOUT ANY WARRANTY; without even the implied warranty of
13 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 ;; GNU General Public License for more details.
15 ;;
16 ;; You should have received a copy of the GNU General Public License
17 ;; along with GCC; see the file COPYING3. If not see
18 ;; <http://www.gnu.org/licenses/>.
19
20 (define_c_enum "unspec" [
21 ;; SSE
22 UNSPEC_MOVNT
23 UNSPEC_LOADU
24 UNSPEC_STOREU
25
26 ;; SSE3
27 UNSPEC_LDDQU
28
29 ;; SSSE3
30 UNSPEC_PSHUFB
31 UNSPEC_PSIGN
32 UNSPEC_PALIGNR
33
34 ;; For SSE4A support
35 UNSPEC_EXTRQI
36 UNSPEC_EXTRQ
37 UNSPEC_INSERTQI
38 UNSPEC_INSERTQ
39
40 ;; For SSE4.1 support
41 UNSPEC_BLENDV
42 UNSPEC_INSERTPS
43 UNSPEC_DP
44 UNSPEC_MOVNTDQA
45 UNSPEC_MPSADBW
46 UNSPEC_PHMINPOSUW
47 UNSPEC_PTEST
48
49 ;; For SSE4.2 support
50 UNSPEC_PCMPESTR
51 UNSPEC_PCMPISTR
52
53 ;; For FMA4 support
54 UNSPEC_FMADDSUB
55 UNSPEC_XOP_UNSIGNED_CMP
56 UNSPEC_XOP_TRUEFALSE
57 UNSPEC_XOP_PERMUTE
58 UNSPEC_FRCZ
59
60 ;; For AES support
61 UNSPEC_AESENC
62 UNSPEC_AESENCLAST
63 UNSPEC_AESDEC
64 UNSPEC_AESDECLAST
65 UNSPEC_AESIMC
66 UNSPEC_AESKEYGENASSIST
67
68 ;; For PCLMUL support
69 UNSPEC_PCLMUL
70
71 ;; For AVX support
72 UNSPEC_PCMP
73 UNSPEC_VPERMIL
74 UNSPEC_VPERMIL2
75 UNSPEC_VPERMIL2F128
76 UNSPEC_CAST
77 UNSPEC_VTESTP
78 UNSPEC_VCVTPH2PS
79 UNSPEC_VCVTPS2PH
80
81 ;; For AVX2 support
82 UNSPEC_VPERMVAR
83 UNSPEC_VPERMTI
84 UNSPEC_GATHER
85 UNSPEC_VSIBADDR
86
87 ;; For AVX512F support
88 UNSPEC_VPERMI2
89 UNSPEC_VPERMT2
90 UNSPEC_VPERMI2_MASK
91 UNSPEC_UNSIGNED_FIX_NOTRUNC
92 UNSPEC_UNSIGNED_PCMP
93 UNSPEC_TESTM
94 UNSPEC_TESTNM
95 UNSPEC_SCATTER
96 UNSPEC_RCP14
97 UNSPEC_RSQRT14
98 UNSPEC_FIXUPIMM
99 UNSPEC_SCALEF
100 UNSPEC_VTERNLOG
101 UNSPEC_GETEXP
102 UNSPEC_GETMANT
103 UNSPEC_ALIGN
104 UNSPEC_CONFLICT
105 UNSPEC_COMPRESS
106 UNSPEC_COMPRESS_STORE
107 UNSPEC_EXPAND
108 UNSPEC_MASKED_EQ
109 UNSPEC_MASKED_GT
110
111 ;; For embed. rounding feature
112 UNSPEC_EMBEDDED_ROUNDING
113
114 ;; For AVX512PF support
115 UNSPEC_GATHER_PREFETCH
116 UNSPEC_SCATTER_PREFETCH
117
118 ;; For AVX512ER support
119 UNSPEC_EXP2
120 UNSPEC_RCP28
121 UNSPEC_RSQRT28
122
123 ;; For SHA support
124 UNSPEC_SHA1MSG1
125 UNSPEC_SHA1MSG2
126 UNSPEC_SHA1NEXTE
127 UNSPEC_SHA1RNDS4
128 UNSPEC_SHA256MSG1
129 UNSPEC_SHA256MSG2
130 UNSPEC_SHA256RNDS2
131
132 ;; For AVX512BW support
133 UNSPEC_DBPSADBW
134 UNSPEC_PMADDUBSW512
135 UNSPEC_PMADDWD512
136 UNSPEC_PSHUFHW
137 UNSPEC_PSHUFLW
138 UNSPEC_CVTINT2MASK
139
140 ;; For AVX512DQ support
141 UNSPEC_REDUCE
142 UNSPEC_FPCLASS
143 UNSPEC_RANGE
144 ])
145
146 (define_c_enum "unspecv" [
147 UNSPECV_LDMXCSR
148 UNSPECV_STMXCSR
149 UNSPECV_CLFLUSH
150 UNSPECV_MONITOR
151 UNSPECV_MWAIT
152 UNSPECV_VZEROALL
153 UNSPECV_VZEROUPPER
154 ])
155
156 ;; All vector modes including V?TImode, used in move patterns.
157 (define_mode_iterator VMOVE
158 [(V64QI "TARGET_AVX512F") (V32QI "TARGET_AVX") V16QI
159 (V32HI "TARGET_AVX512F") (V16HI "TARGET_AVX") V8HI
160 (V16SI "TARGET_AVX512F") (V8SI "TARGET_AVX") V4SI
161 (V8DI "TARGET_AVX512F") (V4DI "TARGET_AVX") V2DI
162 (V4TI "TARGET_AVX512BW") (V2TI "TARGET_AVX") V1TI
163 (V16SF "TARGET_AVX512F") (V8SF "TARGET_AVX") V4SF
164 (V8DF "TARGET_AVX512F") (V4DF "TARGET_AVX") V2DF])
165
166 ;; All AVX-512{F,VL} vector modes. Supposed TARGET_AVX512F baseline.
167 (define_mode_iterator V48_AVX512VL
168 [V16SI (V8SI "TARGET_AVX512VL") (V4SI "TARGET_AVX512VL")
169 V8DI (V4DI "TARGET_AVX512VL") (V2DI "TARGET_AVX512VL")
170 V16SF (V8SF "TARGET_AVX512VL") (V4SF "TARGET_AVX512VL")
171 V8DF (V4DF "TARGET_AVX512VL") (V2DF "TARGET_AVX512VL")])
172
173 ;; 1,2 byte AVX-512{BW,VL} vector modes. Supposed TARGET_AVX512BW baseline.
174 (define_mode_iterator VI12_AVX512VL
175 [V64QI (V16QI "TARGET_AVX512VL") (V32QI "TARGET_AVX512VL")
176 V32HI (V16HI "TARGET_AVX512VL") (V8HI "TARGET_AVX512VL")])
177
178 ;; All vector modes
179 (define_mode_iterator V
180 [(V32QI "TARGET_AVX") V16QI
181 (V16HI "TARGET_AVX") V8HI
182 (V16SI "TARGET_AVX512F") (V8SI "TARGET_AVX") V4SI
183 (V8DI "TARGET_AVX512F") (V4DI "TARGET_AVX") V2DI
184 (V16SF "TARGET_AVX512F") (V8SF "TARGET_AVX") V4SF
185 (V8DF "TARGET_AVX512F") (V4DF "TARGET_AVX") (V2DF "TARGET_SSE2")])
186
187 ;; All 128bit vector modes
188 (define_mode_iterator V_128
189 [V16QI V8HI V4SI V2DI V4SF (V2DF "TARGET_SSE2")])
190
191 ;; All 256bit vector modes
192 (define_mode_iterator V_256
193 [V32QI V16HI V8SI V4DI V8SF V4DF])
194
195 ;; All 512bit vector modes
196 (define_mode_iterator V_512 [V64QI V32HI V16SI V8DI V16SF V8DF])
197
198 ;; All 256bit and 512bit vector modes
199 (define_mode_iterator V_256_512
200 [V32QI V16HI V8SI V4DI V8SF V4DF
201 (V64QI "TARGET_AVX512F") (V32HI "TARGET_AVX512F") (V16SI "TARGET_AVX512F")
202 (V8DI "TARGET_AVX512F") (V16SF "TARGET_AVX512F") (V8DF "TARGET_AVX512F")])
203
204 ;; All vector float modes
205 (define_mode_iterator VF
206 [(V16SF "TARGET_AVX512F") (V8SF "TARGET_AVX") V4SF
207 (V8DF "TARGET_AVX512F") (V4DF "TARGET_AVX") (V2DF "TARGET_SSE2")])
208
209 ;; 128- and 256-bit float vector modes
210 (define_mode_iterator VF_128_256
211 [(V8SF "TARGET_AVX") V4SF
212 (V4DF "TARGET_AVX") (V2DF "TARGET_SSE2")])
213
214 ;; All SFmode vector float modes
215 (define_mode_iterator VF1
216 [(V16SF "TARGET_AVX512F") (V8SF "TARGET_AVX") V4SF])
217
218 ;; 128- and 256-bit SF vector modes
219 (define_mode_iterator VF1_128_256
220 [(V8SF "TARGET_AVX") V4SF])
221
222 (define_mode_iterator VF1_128_256VL
223 [V8SF (V4SF "TARGET_AVX512VL")])
224
225 ;; All DFmode vector float modes
226 (define_mode_iterator VF2
227 [(V8DF "TARGET_AVX512F") (V4DF "TARGET_AVX") V2DF])
228
229 ;; 128- and 256-bit DF vector modes
230 (define_mode_iterator VF2_128_256
231 [(V4DF "TARGET_AVX") V2DF])
232
233 (define_mode_iterator VF2_512_256
234 [(V8DF "TARGET_AVX512F") V4DF])
235
236 (define_mode_iterator VF2_512_256VL
237 [V8DF (V4DF "TARGET_AVX512VL")])
238
239 ;; All 128bit vector float modes
240 (define_mode_iterator VF_128
241 [V4SF (V2DF "TARGET_SSE2")])
242
243 ;; All 256bit vector float modes
244 (define_mode_iterator VF_256
245 [V8SF V4DF])
246
247 ;; All 512bit vector float modes
248 (define_mode_iterator VF_512
249 [V16SF V8DF])
250
251 (define_mode_iterator VI48_AVX512VL
252 [V16SI (V8SI "TARGET_AVX512VL") (V4SI "TARGET_AVX512VL")
253 V8DI (V4DI "TARGET_AVX512VL") (V2DI "TARGET_AVX512VL")])
254
255 (define_mode_iterator VF_AVX512VL
256 [V16SF (V8SF "TARGET_AVX512VL") (V4SF "TARGET_AVX512VL")
257 V8DF (V4DF "TARGET_AVX512VL") (V2DF "TARGET_AVX512VL")])
258
259 (define_mode_iterator VF2_AVX512VL
260 [V8DF (V4DF "TARGET_AVX512VL") (V2DF "TARGET_AVX512VL")])
261
262 (define_mode_iterator VF1_AVX512VL
263 [V16SF (V8SF "TARGET_AVX512VL") (V4SF "TARGET_AVX512VL")])
264
265 ;; All vector integer modes
266 (define_mode_iterator VI
267 [(V16SI "TARGET_AVX512F") (V8DI "TARGET_AVX512F")
268 (V64QI "TARGET_AVX512BW") (V32QI "TARGET_AVX") V16QI
269 (V32HI "TARGET_AVX512BW") (V16HI "TARGET_AVX") V8HI
270 (V8SI "TARGET_AVX") V4SI
271 (V4DI "TARGET_AVX") V2DI])
272
273 (define_mode_iterator VI_AVX2
274 [(V64QI "TARGET_AVX512BW") (V32QI "TARGET_AVX2") V16QI
275 (V32HI "TARGET_AVX512BW") (V16HI "TARGET_AVX2") V8HI
276 (V16SI "TARGET_AVX512F") (V8SI "TARGET_AVX2") V4SI
277 (V8DI "TARGET_AVX512F") (V4DI "TARGET_AVX2") V2DI])
278
279 ;; All QImode vector integer modes
280 (define_mode_iterator VI1
281 [(V32QI "TARGET_AVX") V16QI])
282
283 (define_mode_iterator VI_ULOADSTORE_BW_AVX512VL
284 [V64QI
285 V32HI (V8HI "TARGET_AVX512VL") (V16HI "TARGET_AVX512VL")])
286
287 (define_mode_iterator VI_ULOADSTORE_F_AVX512VL
288 [V16SI (V8SI "TARGET_AVX512VL") (V4SI "TARGET_AVX512VL")
289 V8DI (V4DI "TARGET_AVX512VL") (V2DI "TARGET_AVX512VL")])
290
291 ;; All DImode vector integer modes
292 (define_mode_iterator VI8
293 [(V8DI "TARGET_AVX512F") (V4DI "TARGET_AVX") V2DI])
294
295 (define_mode_iterator VI8_AVX512VL
296 [V8DI (V4DI "TARGET_AVX512VL") (V2DI "TARGET_AVX512VL")])
297
298 (define_mode_iterator VI8_256_512
299 [V8DI (V4DI "TARGET_AVX512VL")])
300
301 (define_mode_iterator VI1_AVX2
302 [(V32QI "TARGET_AVX2") V16QI])
303
304 (define_mode_iterator VI1_AVX512
305 [(V64QI "TARGET_AVX512BW") (V32QI "TARGET_AVX2") V16QI])
306
307 (define_mode_iterator VI2_AVX2
308 [(V32HI "TARGET_AVX512BW") (V16HI "TARGET_AVX2") V8HI])
309
310 (define_mode_iterator VI2_AVX512F
311 [(V32HI "TARGET_AVX512F") (V16HI "TARGET_AVX2") V8HI])
312
313 (define_mode_iterator VI4_AVX
314 [(V8SI "TARGET_AVX") V4SI])
315
316 (define_mode_iterator VI4_AVX2
317 [(V8SI "TARGET_AVX2") V4SI])
318
319 (define_mode_iterator VI4_AVX512F
320 [(V16SI "TARGET_AVX512F") (V8SI "TARGET_AVX2") V4SI])
321
322 (define_mode_iterator VI4_AVX512VL
323 [V16SI (V8SI "TARGET_AVX512VL") (V4SI "TARGET_AVX512VL")])
324
325 (define_mode_iterator VI48_AVX512F_AVX512VL
326 [V4SI V8SI (V16SI "TARGET_AVX512F")
327 (V2DI "TARGET_AVX512VL") (V4DI "TARGET_AVX512VL") (V8DI "TARGET_AVX512F")])
328
329 (define_mode_iterator VI2_AVX512VL
330 [(V8HI "TARGET_AVX512VL") (V16HI "TARGET_AVX512VL") V32HI])
331
332 (define_mode_iterator VI8_AVX2_AVX512BW
333 [(V8DI "TARGET_AVX512BW") (V4DI "TARGET_AVX2") V2DI])
334
335 (define_mode_iterator VI8_AVX2
336 [(V4DI "TARGET_AVX2") V2DI])
337
338 (define_mode_iterator VI8_AVX2_AVX512F
339 [(V8DI "TARGET_AVX512F") (V4DI "TARGET_AVX2") V2DI])
340
341 (define_mode_iterator VI4_128_8_256
342 [V4SI V4DI])
343
344 ;; All V8D* modes
345 (define_mode_iterator V8FI
346 [V8DF V8DI])
347
348 ;; All V16S* modes
349 (define_mode_iterator V16FI
350 [V16SF V16SI])
351
352 ;; ??? We should probably use TImode instead.
353 (define_mode_iterator VIMAX_AVX2
354 [(V4TI "TARGET_AVX512BW") (V2TI "TARGET_AVX2") V1TI])
355
356 ;; ??? This should probably be dropped in favor of VIMAX_AVX2.
357 (define_mode_iterator SSESCALARMODE
358 [(V4TI "TARGET_AVX512BW") (V2TI "TARGET_AVX2") TI])
359
360 (define_mode_iterator VI12_AVX2
361 [(V64QI "TARGET_AVX512BW") (V32QI "TARGET_AVX2") V16QI
362 (V32HI "TARGET_AVX512BW") (V16HI "TARGET_AVX2") V8HI])
363
364 (define_mode_iterator VI24_AVX2
365 [(V16HI "TARGET_AVX2") V8HI
366 (V8SI "TARGET_AVX2") V4SI])
367
368 (define_mode_iterator VI124_AVX512F
369 [(V32QI "TARGET_AVX2") V16QI
370 (V32HI "TARGET_AVX512F") (V16HI "TARGET_AVX2") V8HI
371 (V16SI "TARGET_AVX512F") (V8SI "TARGET_AVX2") V4SI])
372
373 (define_mode_iterator VI124_AVX2
374 [(V32QI "TARGET_AVX2") V16QI
375 (V16HI "TARGET_AVX2") V8HI
376 (V8SI "TARGET_AVX2") V4SI])
377
378 (define_mode_iterator VI2_AVX2_AVX512BW
379 [(V32HI "TARGET_AVX512BW") (V16HI "TARGET_AVX2") V8HI])
380
381 (define_mode_iterator VI48_AVX2
382 [(V8SI "TARGET_AVX2") V4SI
383 (V4DI "TARGET_AVX2") V2DI])
384
385 (define_mode_iterator VI248_AVX2_8_AVX512F
386 [(V16HI "TARGET_AVX2") V8HI
387 (V8SI "TARGET_AVX2") V4SI
388 (V8DI "TARGET_AVX512F") (V4DI "TARGET_AVX2") V2DI])
389
390 (define_mode_iterator VI248_AVX512BW_AVX512VL
391 [(V32HI "TARGET_AVX512BW")
392 (V4DI "TARGET_AVX512VL") V16SI V8DI])
393
394 ;; Suppose TARGET_AVX512VL as baseline
395 (define_mode_iterator VI24_AVX512BW_1
396 [(V16HI "TARGET_AVX512BW") (V8HI "TARGET_AVX512BW")
397 V8SI V4SI])
398
399 (define_mode_iterator VI48_AVX512F
400 [(V16SI "TARGET_AVX512F") V8SI V4SI
401 (V8DI "TARGET_AVX512F") V4DI V2DI])
402
403 (define_mode_iterator V48_AVX2
404 [V4SF V2DF
405 V8SF V4DF
406 (V4SI "TARGET_AVX2") (V2DI "TARGET_AVX2")
407 (V8SI "TARGET_AVX2") (V4DI "TARGET_AVX2")])
408
409 (define_mode_attr avx512
410 [(V16QI "avx512vl") (V32QI "avx512vl") (V64QI "avx512bw")
411 (V8HI "avx512vl") (V16HI "avx512vl") (V32HI "avx512bw")
412 (V4SI "avx512vl") (V8SI "avx512vl") (V16SI "avx512f")
413 (V2DI "avx512vl") (V4DI "avx512vl") (V8DI "avx512f")
414 (V4SF "avx512vl") (V8SF "avx512vl") (V16SF "avx512f")
415 (V2DF "avx512vl") (V4DF "avx512vl") (V8DF "avx512f")])
416
417 (define_mode_attr sse2_avx_avx512f
418 [(V16QI "sse2") (V32QI "avx") (V64QI "avx512f")
419 (V8HI "avx512vl") (V16HI "avx512vl") (V32HI "avx512bw")
420 (V4SI "sse2") (V8SI "avx") (V16SI "avx512f")
421 (V2DI "avx512vl") (V4DI "avx512vl") (V8DI "avx512f")
422 (V16SF "avx512f") (V8SF "avx") (V4SF "avx")
423 (V8DF "avx512f") (V4DF "avx") (V2DF "avx")])
424
425 (define_mode_attr sse2_avx2
426 [(V16QI "sse2") (V32QI "avx2") (V64QI "avx512bw")
427 (V8HI "sse2") (V16HI "avx2") (V32HI "avx512bw")
428 (V4SI "sse2") (V8SI "avx2") (V16SI "avx512f")
429 (V2DI "sse2") (V4DI "avx2") (V8DI "avx512f")
430 (V1TI "sse2") (V2TI "avx2") (V4TI "avx512bw")])
431
432 (define_mode_attr ssse3_avx2
433 [(V16QI "ssse3") (V32QI "avx2") (V64QI "avx512bw")
434 (V4HI "ssse3") (V8HI "ssse3") (V16HI "avx2") (V32HI "avx512bw")
435 (V4SI "ssse3") (V8SI "avx2")
436 (V2DI "ssse3") (V4DI "avx2")
437 (TI "ssse3") (V2TI "avx2") (V4TI "avx512bw")])
438
439 (define_mode_attr sse4_1_avx2
440 [(V16QI "sse4_1") (V32QI "avx2") (V64QI "avx512bw")
441 (V8HI "sse4_1") (V16HI "avx2") (V32HI "avx512bw")
442 (V4SI "sse4_1") (V8SI "avx2") (V16SI "avx512f")
443 (V2DI "sse4_1") (V4DI "avx2") (V8DI "avx512dq")])
444
445 (define_mode_attr avx_avx2
446 [(V4SF "avx") (V2DF "avx")
447 (V8SF "avx") (V4DF "avx")
448 (V4SI "avx2") (V2DI "avx2")
449 (V8SI "avx2") (V4DI "avx2")])
450
451 (define_mode_attr vec_avx2
452 [(V16QI "vec") (V32QI "avx2")
453 (V8HI "vec") (V16HI "avx2")
454 (V4SI "vec") (V8SI "avx2")
455 (V2DI "vec") (V4DI "avx2")])
456
457 (define_mode_attr avx2_avx512
458 [(V4SI "avx2") (V8SI "avx2") (V16SI "avx512f")
459 (V2DI "avx2") (V4DI "avx2") (V8DI "avx512f")
460 (V4SF "avx2") (V8SF "avx2") (V16SF "avx512f")
461 (V2DF "avx2") (V4DF "avx2") (V8DF "avx512f")
462 (V8HI "avx512vl") (V16HI "avx512vl") (V32HI "avx512bw")])
463
464 (define_mode_attr shuffletype
465 [(V16SF "f") (V16SI "i") (V8DF "f") (V8DI "i")
466 (V8SF "f") (V8SI "i") (V4DF "f") (V4DI "i")
467 (V4SF "f") (V4SI "i") (V2DF "f") (V2DI "i")
468 (V32QI "i") (V16HI "u") (V16QI "i") (V8HI "i")
469 (V64QI "i") (V1TI "i") (V2TI "i")])
470
471 (define_mode_attr ssequartermode
472 [(V16SF "V4SF") (V8DF "V2DF") (V16SI "V4SI") (V8DI "V2DI")])
473
474 (define_mode_attr ssedoublemodelower
475 [(V16QI "v16hi") (V32QI "v32hi") (V64QI "v64hi")
476 (V8HI "v8si") (V16HI "v16si") (V32HI "v32si")
477 (V4SI "v4di") (V8SI "v8di") (V16SI "v16di")])
478
479 (define_mode_attr ssedoublemode
480 [(V16SF "V32SF") (V16SI "V32SI") (V8DI "V16DI") (V8DF "V16DF")
481 (V8SF "V16SF") (V8SI "V16SI") (V4DI "V8DI") (V4DF "V8DF")
482 (V16HI "V16SI") (V8HI "V8SI") (V4HI "V4SI") (V4SI "V4DI")
483 (V32HI "V32SI") (V32QI "V32HI") (V16QI "V16HI") (V64QI "V64HI")])
484
485 (define_mode_attr ssebytemode
486 [(V8DI "V64QI") (V4DI "V32QI") (V2DI "V16QI")])
487
488 ;; All 128bit vector integer modes
489 (define_mode_iterator VI_128 [V16QI V8HI V4SI V2DI])
490
491 ;; All 256bit vector integer modes
492 (define_mode_iterator VI_256 [V32QI V16HI V8SI V4DI])
493
494 ;; All 512bit vector integer modes
495 (define_mode_iterator VI_512 [V64QI V32HI V16SI V8DI])
496
497 ;; Various 128bit vector integer mode combinations
498 (define_mode_iterator VI12_128 [V16QI V8HI])
499 (define_mode_iterator VI14_128 [V16QI V4SI])
500 (define_mode_iterator VI124_128 [V16QI V8HI V4SI])
501 (define_mode_iterator VI24_128 [V8HI V4SI])
502 (define_mode_iterator VI248_128 [V8HI V4SI V2DI])
503 (define_mode_iterator VI48_128 [V4SI V2DI])
504
505 ;; Various 256bit and 512 vector integer mode combinations
506 (define_mode_iterator VI124_256 [V32QI V16HI V8SI])
507 (define_mode_iterator VI124_256_AVX512F_AVX512BW
508 [V32QI V16HI V8SI
509 (V64QI "TARGET_AVX512BW")
510 (V32HI "TARGET_AVX512BW")
511 (V16SI "TARGET_AVX512F")])
512 (define_mode_iterator VI48_256 [V8SI V4DI])
513 (define_mode_iterator VI48_512 [V16SI V8DI])
514 (define_mode_iterator VI4_256_8_512 [V8SI V8DI])
515 (define_mode_iterator VI_AVX512BW
516 [V16SI V8DI (V32HI "TARGET_AVX512BW") (V64QI "TARGET_AVX512BW")])
517
518 ;; Int-float size matches
519 (define_mode_iterator VI4F_128 [V4SI V4SF])
520 (define_mode_iterator VI8F_128 [V2DI V2DF])
521 (define_mode_iterator VI4F_256 [V8SI V8SF])
522 (define_mode_iterator VI8F_256 [V4DI V4DF])
523 (define_mode_iterator VI8F_256_512
524 [V4DI V4DF (V8DI "TARGET_AVX512F") (V8DF "TARGET_AVX512F")])
525 (define_mode_iterator VI48F_256_512
526 [V8SI V8SF
527 (V16SI "TARGET_AVX512F") (V16SF "TARGET_AVX512F")
528 (V8DI "TARGET_AVX512F") (V8DF "TARGET_AVX512F")
529 (V4DI "TARGET_AVX512VL") (V4DF "TARGET_AVX512VL")])
530 (define_mode_iterator VF48_I1248
531 [V16SI V16SF V8DI V8DF V32HI V64QI])
532 (define_mode_iterator VI48F
533 [V16SI V16SF V8DI V8DF
534 (V8SI "TARGET_AVX512VL") (V8SF "TARGET_AVX512VL")
535 (V4DI "TARGET_AVX512VL") (V4DF "TARGET_AVX512VL")
536 (V4SI "TARGET_AVX512VL") (V4SF "TARGET_AVX512VL")
537 (V2DI "TARGET_AVX512VL") (V2DF "TARGET_AVX512VL")])
538 (define_mode_iterator VI48F_256 [V8SI V8SF V4DI V4DF])
539
540 ;; Mapping from float mode to required SSE level
541 (define_mode_attr sse
542 [(SF "sse") (DF "sse2")
543 (V4SF "sse") (V2DF "sse2")
544 (V16SF "avx512f") (V8SF "avx")
545 (V8DF "avx512f") (V4DF "avx")])
546
547 (define_mode_attr sse2
548 [(V16QI "sse2") (V32QI "avx") (V64QI "avx512f")
549 (V2DI "sse2") (V4DI "avx") (V8DI "avx512f")])
550
551 (define_mode_attr sse3
552 [(V16QI "sse3") (V32QI "avx")])
553
554 (define_mode_attr sse4_1
555 [(V4SF "sse4_1") (V2DF "sse4_1")
556 (V8SF "avx") (V4DF "avx")
557 (V8DF "avx512f")])
558
559 (define_mode_attr avxsizesuffix
560 [(V64QI "512") (V32HI "512") (V16SI "512") (V8DI "512")
561 (V32QI "256") (V16HI "256") (V8SI "256") (V4DI "256")
562 (V16QI "") (V8HI "") (V4SI "") (V2DI "")
563 (V16SF "512") (V8DF "512")
564 (V8SF "256") (V4DF "256")
565 (V4SF "") (V2DF "")])
566
567 ;; SSE instruction mode
568 (define_mode_attr sseinsnmode
569 [(V64QI "XI") (V32HI "XI") (V16SI "XI") (V8DI "XI") (V4TI "XI")
570 (V32QI "OI") (V16HI "OI") (V8SI "OI") (V4DI "OI") (V2TI "OI")
571 (V16QI "TI") (V8HI "TI") (V4SI "TI") (V2DI "TI") (V1TI "TI")
572 (V16SF "V16SF") (V8DF "V8DF")
573 (V8SF "V8SF") (V4DF "V4DF")
574 (V4SF "V4SF") (V2DF "V2DF")
575 (TI "TI")])
576
577 ;; Mapping of vector modes to corresponding mask size
578 (define_mode_attr avx512fmaskmode
579 [(V64QI "DI") (V32QI "SI") (V16QI "HI")
580 (V32HI "SI") (V16HI "HI") (V8HI "QI") (V4HI "QI")
581 (V16SI "HI") (V8SI "QI") (V4SI "QI")
582 (V8DI "QI") (V4DI "QI") (V2DI "QI")
583 (V16SF "HI") (V8SF "QI") (V4SF "QI")
584 (V8DF "QI") (V4DF "QI") (V2DF "QI")])
585
586 ;; Mapping of vector float modes to an integer mode of the same size
587 (define_mode_attr sseintvecmode
588 [(V16SF "V16SI") (V8DF "V8DI")
589 (V8SF "V8SI") (V4DF "V4DI")
590 (V4SF "V4SI") (V2DF "V2DI")
591 (V16SI "V16SI") (V8DI "V8DI")
592 (V8SI "V8SI") (V4DI "V4DI")
593 (V4SI "V4SI") (V2DI "V2DI")
594 (V16HI "V16HI") (V8HI "V8HI")
595 (V32HI "V32HI") (V64QI "V64QI")
596 (V32QI "V32QI") (V16QI "V16QI")])
597
598 (define_mode_attr sseintvecmode2
599 [(V8DF "XI") (V4DF "OI") (V2DF "TI")
600 (V8SF "OI") (V4SF "TI")])
601
602 (define_mode_attr sseintvecmodelower
603 [(V16SF "v16si") (V8DF "v8di")
604 (V8SF "v8si") (V4DF "v4di")
605 (V4SF "v4si") (V2DF "v2di")
606 (V8SI "v8si") (V4DI "v4di")
607 (V4SI "v4si") (V2DI "v2di")
608 (V16HI "v16hi") (V8HI "v8hi")
609 (V32QI "v32qi") (V16QI "v16qi")])
610
611 ;; Mapping of vector modes to a vector mode of double size
612 (define_mode_attr ssedoublevecmode
613 [(V32QI "V64QI") (V16HI "V32HI") (V8SI "V16SI") (V4DI "V8DI")
614 (V16QI "V32QI") (V8HI "V16HI") (V4SI "V8SI") (V2DI "V4DI")
615 (V8SF "V16SF") (V4DF "V8DF")
616 (V4SF "V8SF") (V2DF "V4DF")])
617
618 ;; Mapping of vector modes to a vector mode of half size
619 (define_mode_attr ssehalfvecmode
620 [(V64QI "V32QI") (V32HI "V16HI") (V16SI "V8SI") (V8DI "V4DI")
621 (V32QI "V16QI") (V16HI "V8HI") (V8SI "V4SI") (V4DI "V2DI")
622 (V16QI "V8QI") (V8HI "V4HI") (V4SI "V2SI")
623 (V16SF "V8SF") (V8DF "V4DF")
624 (V8SF "V4SF") (V4DF "V2DF")
625 (V4SF "V2SF")])
626
627 ;; Mapping of vector modes ti packed single mode of the same size
628 (define_mode_attr ssePSmode
629 [(V16SI "V16SF") (V8DF "V16SF")
630 (V16SF "V16SF") (V8DI "V16SF")
631 (V64QI "V16SF") (V32QI "V8SF") (V16QI "V4SF")
632 (V32HI "V16SF") (V16HI "V8SF") (V8HI "V4SF")
633 (V8SI "V8SF") (V4SI "V4SF")
634 (V4DI "V8SF") (V2DI "V4SF")
635 (V4TI "V16SF") (V2TI "V8SF") (V1TI "V4SF")
636 (V8SF "V8SF") (V4SF "V4SF")
637 (V4DF "V8SF") (V2DF "V4SF")])
638
639 (define_mode_attr ssePSmode2
640 [(V8DI "V8SF") (V4DI "V4SF")])
641
642 ;; Mapping of vector modes back to the scalar modes
643 (define_mode_attr ssescalarmode
644 [(V64QI "QI") (V32QI "QI") (V16QI "QI")
645 (V32HI "HI") (V16HI "HI") (V8HI "HI")
646 (V16SI "SI") (V8SI "SI") (V4SI "SI")
647 (V8DI "DI") (V4DI "DI") (V2DI "DI")
648 (V16SF "SF") (V8SF "SF") (V4SF "SF")
649 (V8DF "DF") (V4DF "DF") (V2DF "DF")])
650
651 ;; Mapping of vector modes to the 128bit modes
652 (define_mode_attr ssexmmmode
653 [(V64QI "V16QI") (V32QI "V16QI") (V16QI "V16QI")
654 (V32HI "V8HI") (V16HI "V8HI") (V8HI "V8HI")
655 (V16SI "V4SI") (V8SI "V4SI") (V4SI "V4SI")
656 (V8DI "V2DI") (V4DI "V2DI") (V2DI "V2DI")
657 (V16SF "V4SF") (V8SF "V4SF") (V4SF "V4SF")
658 (V8DF "V2DF") (V4DF "V2DF") (V2DF "V2DF")])
659
660 ;; Pointer size override for scalar modes (Intel asm dialect)
661 (define_mode_attr iptr
662 [(V32QI "b") (V16HI "w") (V8SI "k") (V4DI "q")
663 (V16QI "b") (V8HI "w") (V4SI "k") (V2DI "q")
664 (V8SF "k") (V4DF "q")
665 (V4SF "k") (V2DF "q")
666 (SF "k") (DF "q")])
667
668 ;; Number of scalar elements in each vector type
669 (define_mode_attr ssescalarnum
670 [(V64QI "64") (V16SI "16") (V8DI "8")
671 (V32QI "32") (V16HI "16") (V8SI "8") (V4DI "4")
672 (V16QI "16") (V8HI "8") (V4SI "4") (V2DI "2")
673 (V16SF "16") (V8DF "8")
674 (V8SF "8") (V4DF "4")
675 (V4SF "4") (V2DF "2")])
676
677 ;; Mask of scalar elements in each vector type
678 (define_mode_attr ssescalarnummask
679 [(V32QI "31") (V16HI "15") (V8SI "7") (V4DI "3")
680 (V16QI "15") (V8HI "7") (V4SI "3") (V2DI "1")
681 (V8SF "7") (V4DF "3")
682 (V4SF "3") (V2DF "1")])
683
684 (define_mode_attr ssescalarsize
685 [(V8DI "64") (V4DI "64") (V2DI "64")
686 (V64QI "8") (V32QI "8") (V16QI "8")
687 (V32HI "16") (V16HI "16") (V8HI "16")
688 (V16SI "32") (V8SI "32") (V4SI "32")
689 (V16SF "32") (V8DF "64")])
690
691 ;; SSE prefix for integer vector modes
692 (define_mode_attr sseintprefix
693 [(V2DI "p") (V2DF "")
694 (V4DI "p") (V4DF "")
695 (V8DI "p") (V8DF "")
696 (V4SI "p") (V4SF "")
697 (V8SI "p") (V8SF "")
698 (V16SI "p") (V16SF "")
699 (V16QI "p") (V8HI "p")
700 (V32QI "p") (V16HI "p")
701 (V64QI "p") (V32HI "p")])
702
703 ;; SSE scalar suffix for vector modes
704 (define_mode_attr ssescalarmodesuffix
705 [(SF "ss") (DF "sd")
706 (V8SF "ss") (V4DF "sd")
707 (V4SF "ss") (V2DF "sd")
708 (V8SI "ss") (V4DI "sd")
709 (V4SI "d")])
710
711 ;; Pack/unpack vector modes
712 (define_mode_attr sseunpackmode
713 [(V16QI "V8HI") (V8HI "V4SI") (V4SI "V2DI")
714 (V32QI "V16HI") (V16HI "V8SI") (V8SI "V4DI")
715 (V32HI "V16SI") (V64QI "V32HI") (V16SI "V8DI")])
716
717 (define_mode_attr ssepackmode
718 [(V8HI "V16QI") (V4SI "V8HI") (V2DI "V4SI")
719 (V16HI "V32QI") (V8SI "V16HI") (V4DI "V8SI")
720 (V32HI "V64QI") (V16SI "V32HI") (V8DI "V16SI")])
721
722 ;; Mapping of the max integer size for xop rotate immediate constraint
723 (define_mode_attr sserotatemax
724 [(V16QI "7") (V8HI "15") (V4SI "31") (V2DI "63")])
725
726 ;; Mapping of mode to cast intrinsic name
727 (define_mode_attr castmode
728 [(V8SI "si") (V8SF "ps") (V4DF "pd")
729 (V16SI "si") (V16SF "ps") (V8DF "pd")])
730
731 ;; Instruction suffix for sign and zero extensions.
732 (define_code_attr extsuffix [(sign_extend "sx") (zero_extend "zx")])
733
734 ;; i128 for integer vectors and TARGET_AVX2, f128 otherwise.
735 ;; i64x4 or f64x4 for 512bit modes.
736 (define_mode_attr i128
737 [(V16SF "f64x4") (V8SF "f128") (V8DF "f64x4") (V4DF "f128")
738 (V64QI "i64x4") (V32QI "%~128") (V32HI "i64x4") (V16HI "%~128")
739 (V16SI "i64x4") (V8SI "%~128") (V8DI "i64x4") (V4DI "%~128")])
740
741 ;; Mix-n-match
742 (define_mode_iterator AVX256MODE2P [V8SI V8SF V4DF])
743 (define_mode_iterator AVX512MODE2P [V16SI V16SF V8DF])
744
745 ;; Mapping for dbpsabbw modes
746 (define_mode_attr dbpsadbwmode
747 [(V32HI "V64QI") (V16HI "V32QI") (V8HI "V16QI")])
748
749 ;; Mapping suffixes for broadcast
750 (define_mode_attr bcstscalarsuff
751 [(V64QI "b") (V32QI "b") (V16QI "b")
752 (V32HI "w") (V16HI "w") (V8HI "w")
753 (V16SI "d") (V8SI "d") (V4SI "d")
754 (V8DI "q") (V4DI "q") (V2DI "q")
755 (V16SF "ss") (V8SF "ss") (V4SF "ss")
756 (V8DF "sd") (V4DF "sd") (V2DF "sd")])
757
758 ;; Tie mode of assembler operand to mode iterator
759 (define_mode_attr concat_tg_mode
760 [(V32QI "t") (V16HI "t") (V8SI "t") (V4DI "t") (V8SF "t") (V4DF "t")
761 (V64QI "g") (V32HI "g") (V16SI "g") (V8DI "g") (V16SF "g") (V8DF "g")])
762
763
764 ;; Include define_subst patterns for instructions with mask
765 (include "subst.md")
766
767 ;; Patterns whose name begins with "sse{,2,3}_" are invoked by intrinsics.
768
769 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
770 ;;
771 ;; Move patterns
772 ;;
773 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
774
775 ;; All of these patterns are enabled for SSE1 as well as SSE2.
776 ;; This is essential for maintaining stable calling conventions.
777
778 (define_expand "mov<mode>"
779 [(set (match_operand:VMOVE 0 "nonimmediate_operand")
780 (match_operand:VMOVE 1 "nonimmediate_operand"))]
781 "TARGET_SSE"
782 {
783 ix86_expand_vector_move (<MODE>mode, operands);
784 DONE;
785 })
786
787 (define_insn "*mov<mode>_internal"
788 [(set (match_operand:VMOVE 0 "nonimmediate_operand" "=v,v ,m")
789 (match_operand:VMOVE 1 "nonimmediate_or_sse_const_operand" "C ,vm,v"))]
790 "TARGET_SSE
791 && (register_operand (operands[0], <MODE>mode)
792 || register_operand (operands[1], <MODE>mode))"
793 {
794 int mode = get_attr_mode (insn);
795 switch (which_alternative)
796 {
797 case 0:
798 return standard_sse_constant_opcode (insn, operands[1]);
799 case 1:
800 case 2:
801 /* There is no evex-encoded vmov* for sizes smaller than 64-bytes
802 in avx512f, so we need to use workarounds, to access sse registers
803 16-31, which are evex-only. In avx512vl we don't need workarounds. */
804 if (TARGET_AVX512F && GET_MODE_SIZE (<MODE>mode) < 64 && !TARGET_AVX512VL
805 && ((REG_P (operands[0]) && EXT_REX_SSE_REGNO_P (REGNO (operands[0])))
806 || (REG_P (operands[1]) && EXT_REX_SSE_REGNO_P (REGNO (operands[1])))))
807 {
808 if (memory_operand (operands[0], <MODE>mode))
809 {
810 if (<MODE_SIZE> == 32)
811 return "vextract<shuffletype>64x4\t{$0x0, %g1, %0|%0, %g1, 0x0}";
812 else if (<MODE_SIZE> == 16)
813 return "vextract<shuffletype>32x4\t{$0x0, %g1, %0|%0, %g1, 0x0}";
814 else
815 gcc_unreachable ();
816 }
817 else if (memory_operand (operands[1], <MODE>mode))
818 {
819 if (<MODE_SIZE> == 32)
820 return "vbroadcast<shuffletype>64x4\t{%1, %g0|%g0, %1}";
821 else if (<MODE_SIZE> == 16)
822 return "vbroadcast<shuffletype>32x4\t{%1, %g0|%g0, %1}";
823 else
824 gcc_unreachable ();
825 }
826 else
827 /* Reg -> reg move is always aligned. Just use wider move. */
828 switch (mode)
829 {
830 case MODE_V8SF:
831 case MODE_V4SF:
832 return "vmovaps\t{%g1, %g0|%g0, %g1}";
833 case MODE_V4DF:
834 case MODE_V2DF:
835 return "vmovapd\t{%g1, %g0|%g0, %g1}";
836 case MODE_OI:
837 case MODE_TI:
838 return "vmovdqa64\t{%g1, %g0|%g0, %g1}";
839 default:
840 gcc_unreachable ();
841 }
842 }
843 switch (mode)
844 {
845 case MODE_V16SF:
846 case MODE_V8SF:
847 case MODE_V4SF:
848 if (TARGET_AVX
849 && (misaligned_operand (operands[0], <MODE>mode)
850 || misaligned_operand (operands[1], <MODE>mode)))
851 return "vmovups\t{%1, %0|%0, %1}";
852 else
853 return "%vmovaps\t{%1, %0|%0, %1}";
854
855 case MODE_V8DF:
856 case MODE_V4DF:
857 case MODE_V2DF:
858 if (TARGET_AVX
859 && (misaligned_operand (operands[0], <MODE>mode)
860 || misaligned_operand (operands[1], <MODE>mode)))
861 return "vmovupd\t{%1, %0|%0, %1}";
862 else
863 return "%vmovapd\t{%1, %0|%0, %1}";
864
865 case MODE_OI:
866 case MODE_TI:
867 if (TARGET_AVX
868 && (misaligned_operand (operands[0], <MODE>mode)
869 || misaligned_operand (operands[1], <MODE>mode)))
870 return TARGET_AVX512VL ? "vmovdqu64\t{%1, %0|%0, %1}"
871 : "vmovdqu\t{%1, %0|%0, %1}";
872 else
873 return TARGET_AVX512VL ? "vmovdqa64\t{%1, %0|%0, %1}"
874 : "%vmovdqa\t{%1, %0|%0, %1}";
875 case MODE_XI:
876 if (misaligned_operand (operands[0], <MODE>mode)
877 || misaligned_operand (operands[1], <MODE>mode))
878 return "vmovdqu64\t{%1, %0|%0, %1}";
879 else
880 return "vmovdqa64\t{%1, %0|%0, %1}";
881
882 default:
883 gcc_unreachable ();
884 }
885 default:
886 gcc_unreachable ();
887 }
888 }
889 [(set_attr "type" "sselog1,ssemov,ssemov")
890 (set_attr "prefix" "maybe_vex")
891 (set (attr "mode")
892 (cond [(and (match_test "<MODE_SIZE> == 16")
893 (ior (match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL")
894 (and (eq_attr "alternative" "2")
895 (match_test "TARGET_SSE_TYPELESS_STORES"))))
896 (const_string "<ssePSmode>")
897 (match_test "TARGET_AVX")
898 (const_string "<sseinsnmode>")
899 (ior (not (match_test "TARGET_SSE2"))
900 (match_test "optimize_function_for_size_p (cfun)"))
901 (const_string "V4SF")
902 (and (eq_attr "alternative" "0")
903 (match_test "TARGET_SSE_LOAD0_BY_PXOR"))
904 (const_string "TI")
905 ]
906 (const_string "<sseinsnmode>")))])
907
908 (define_insn "<avx512>_load<mode>_mask"
909 [(set (match_operand:V48_AVX512VL 0 "register_operand" "=v,v")
910 (vec_merge:V48_AVX512VL
911 (match_operand:V48_AVX512VL 1 "nonimmediate_operand" "v,m")
912 (match_operand:V48_AVX512VL 2 "vector_move_operand" "0C,0C")
913 (match_operand:<avx512fmaskmode> 3 "register_operand" "Yk,Yk")))]
914 "TARGET_AVX512F"
915 {
916 static char buf [64];
917
918 const char *insn_op;
919 const char *sse_suffix;
920 const char *align;
921 if (FLOAT_MODE_P (GET_MODE_INNER (<MODE>mode)))
922 {
923 insn_op = "vmov";
924 sse_suffix = "<ssemodesuffix>";
925 }
926 else
927 {
928 insn_op = "vmovdq";
929 sse_suffix = "<ssescalarsize>";
930 }
931
932 if (misaligned_operand (operands[1], <MODE>mode))
933 align = "u";
934 else
935 align = "a";
936
937 snprintf (buf, sizeof (buf), "%s%s%s\t{%%1, %%0%%{%%3%%}%%N2|%%0%%{%%3%%}%%N2, %%1}",
938 insn_op, align, sse_suffix);
939 return buf;
940 }
941 [(set_attr "type" "ssemov")
942 (set_attr "prefix" "evex")
943 (set_attr "memory" "none,load")
944 (set_attr "mode" "<sseinsnmode>")])
945
946 (define_insn "<avx512>_load<mode>_mask"
947 [(set (match_operand:VI12_AVX512VL 0 "register_operand" "=v,v")
948 (vec_merge:VI12_AVX512VL
949 (match_operand:VI12_AVX512VL 1 "nonimmediate_operand" "v,m")
950 (match_operand:VI12_AVX512VL 2 "vector_move_operand" "0C,0C")
951 (match_operand:<avx512fmaskmode> 3 "register_operand" "Yk,Yk")))]
952 "TARGET_AVX512BW"
953 "vmovdqu<ssescalarsize>\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}"
954 [(set_attr "type" "ssemov")
955 (set_attr "prefix" "evex")
956 (set_attr "memory" "none,load")
957 (set_attr "mode" "<sseinsnmode>")])
958
959 (define_insn "<avx512>_blendm<mode>"
960 [(set (match_operand:V48_AVX512VL 0 "register_operand" "=v")
961 (vec_merge:V48_AVX512VL
962 (match_operand:V48_AVX512VL 2 "nonimmediate_operand" "vm")
963 (match_operand:V48_AVX512VL 1 "register_operand" "v")
964 (match_operand:<avx512fmaskmode> 3 "register_operand" "Yk")))]
965 "TARGET_AVX512F"
966 "vblendm<ssemodesuffix>\t{%2, %1, %0%{%3%}|%0%{%3%}, %1, %2}"
967 [(set_attr "type" "ssemov")
968 (set_attr "prefix" "evex")
969 (set_attr "mode" "<sseinsnmode>")])
970
971 (define_insn "<avx512>_blendm<mode>"
972 [(set (match_operand:VI12_AVX512VL 0 "register_operand" "=v")
973 (vec_merge:VI12_AVX512VL
974 (match_operand:VI12_AVX512VL 2 "nonimmediate_operand" "vm")
975 (match_operand:VI12_AVX512VL 1 "register_operand" "v")
976 (match_operand:<avx512fmaskmode> 3 "register_operand" "Yk")))]
977 "TARGET_AVX512BW"
978 "vpblendm<ssemodesuffix>\t{%2, %1, %0%{%3%}|%0%{%3%}, %1, %2}"
979 [(set_attr "type" "ssemov")
980 (set_attr "prefix" "evex")
981 (set_attr "mode" "<sseinsnmode>")])
982
983 (define_insn "<avx512>_store<mode>_mask"
984 [(set (match_operand:V48_AVX512VL 0 "memory_operand" "=m")
985 (vec_merge:V48_AVX512VL
986 (match_operand:V48_AVX512VL 1 "register_operand" "v")
987 (match_dup 0)
988 (match_operand:<avx512fmaskmode> 2 "register_operand" "Yk")))]
989 "TARGET_AVX512F"
990 {
991 static char buf [64];
992
993 const char *insn_op;
994 const char *sse_suffix;
995 const char *align;
996 if (FLOAT_MODE_P (GET_MODE_INNER (<MODE>mode)))
997 {
998 insn_op = "vmov";
999 sse_suffix = "<ssemodesuffix>";
1000 }
1001 else
1002 {
1003 insn_op = "vmovdq";
1004 sse_suffix = "<ssescalarsize>";
1005 }
1006
1007 if (misaligned_operand (operands[1], <MODE>mode))
1008 align = "u";
1009 else
1010 align = "a";
1011
1012 snprintf (buf, sizeof (buf), "%s%s%s\t{%%1, %%0%%{%%2%%}|%%0%%{%%2%%}, %%1}",
1013 insn_op, align, sse_suffix);
1014 return buf;
1015 }
1016 [(set_attr "type" "ssemov")
1017 (set_attr "prefix" "evex")
1018 (set_attr "memory" "store")
1019 (set_attr "mode" "<sseinsnmode>")])
1020
1021 (define_insn "<avx512>_store<mode>_mask"
1022 [(set (match_operand:VI12_AVX512VL 0 "memory_operand" "=m")
1023 (vec_merge:VI12_AVX512VL
1024 (match_operand:VI12_AVX512VL 1 "register_operand" "v")
1025 (match_dup 0)
1026 (match_operand:<avx512fmaskmode> 2 "register_operand" "Yk")))]
1027 "TARGET_AVX512BW"
1028 "vmovdqu<ssescalarsize>\t{%1, %0%{%2%}|%0%{%2%}, %1}"
1029 [(set_attr "type" "ssemov")
1030 (set_attr "prefix" "evex")
1031 (set_attr "memory" "store")
1032 (set_attr "mode" "<sseinsnmode>")])
1033
1034 (define_insn "sse2_movq128"
1035 [(set (match_operand:V2DI 0 "register_operand" "=x")
1036 (vec_concat:V2DI
1037 (vec_select:DI
1038 (match_operand:V2DI 1 "nonimmediate_operand" "xm")
1039 (parallel [(const_int 0)]))
1040 (const_int 0)))]
1041 "TARGET_SSE2"
1042 "%vmovq\t{%1, %0|%0, %q1}"
1043 [(set_attr "type" "ssemov")
1044 (set_attr "prefix" "maybe_vex")
1045 (set_attr "mode" "TI")])
1046
1047 ;; Move a DI from a 32-bit register pair (e.g. %edx:%eax) to an xmm.
1048 ;; We'd rather avoid this entirely; if the 32-bit reg pair was loaded
1049 ;; from memory, we'd prefer to load the memory directly into the %xmm
1050 ;; register. To facilitate this happy circumstance, this pattern won't
1051 ;; split until after register allocation. If the 64-bit value didn't
1052 ;; come from memory, this is the best we can do. This is much better
1053 ;; than storing %edx:%eax into a stack temporary and loading an %xmm
1054 ;; from there.
1055
1056 (define_insn_and_split "movdi_to_sse"
1057 [(parallel
1058 [(set (match_operand:V4SI 0 "register_operand" "=?x,x")
1059 (subreg:V4SI (match_operand:DI 1 "nonimmediate_operand" "r,m") 0))
1060 (clobber (match_scratch:V4SI 2 "=&x,X"))])]
1061 "!TARGET_64BIT && TARGET_SSE2 && TARGET_INTER_UNIT_MOVES_TO_VEC"
1062 "#"
1063 "&& reload_completed"
1064 [(const_int 0)]
1065 {
1066 if (register_operand (operands[1], DImode))
1067 {
1068 /* The DImode arrived in a pair of integral registers (e.g. %edx:%eax).
1069 Assemble the 64-bit DImode value in an xmm register. */
1070 emit_insn (gen_sse2_loadld (operands[0], CONST0_RTX (V4SImode),
1071 gen_rtx_SUBREG (SImode, operands[1], 0)));
1072 emit_insn (gen_sse2_loadld (operands[2], CONST0_RTX (V4SImode),
1073 gen_rtx_SUBREG (SImode, operands[1], 4)));
1074 emit_insn (gen_vec_interleave_lowv4si (operands[0], operands[0],
1075 operands[2]));
1076 }
1077 else if (memory_operand (operands[1], DImode))
1078 {
1079 rtx tmp = gen_reg_rtx (V2DImode);
1080 emit_insn (gen_vec_concatv2di (tmp, operands[1], const0_rtx));
1081 emit_move_insn (operands[0], gen_lowpart (V4SImode, tmp));
1082 }
1083 else
1084 gcc_unreachable ();
1085 })
1086
1087 (define_split
1088 [(set (match_operand:V4SF 0 "register_operand")
1089 (match_operand:V4SF 1 "zero_extended_scalar_load_operand"))]
1090 "TARGET_SSE && reload_completed"
1091 [(set (match_dup 0)
1092 (vec_merge:V4SF
1093 (vec_duplicate:V4SF (match_dup 1))
1094 (match_dup 2)
1095 (const_int 1)))]
1096 {
1097 operands[1] = simplify_gen_subreg (SFmode, operands[1], V4SFmode, 0);
1098 operands[2] = CONST0_RTX (V4SFmode);
1099 })
1100
1101 (define_split
1102 [(set (match_operand:V2DF 0 "register_operand")
1103 (match_operand:V2DF 1 "zero_extended_scalar_load_operand"))]
1104 "TARGET_SSE2 && reload_completed"
1105 [(set (match_dup 0) (vec_concat:V2DF (match_dup 1) (match_dup 2)))]
1106 {
1107 operands[1] = simplify_gen_subreg (DFmode, operands[1], V2DFmode, 0);
1108 operands[2] = CONST0_RTX (DFmode);
1109 })
1110
1111 (define_expand "movmisalign<mode>"
1112 [(set (match_operand:VMOVE 0 "nonimmediate_operand")
1113 (match_operand:VMOVE 1 "nonimmediate_operand"))]
1114 "TARGET_SSE"
1115 {
1116 ix86_expand_vector_move_misalign (<MODE>mode, operands);
1117 DONE;
1118 })
1119
1120 (define_expand "<sse>_loadu<ssemodesuffix><avxsizesuffix><mask_name>"
1121 [(set (match_operand:VF 0 "register_operand")
1122 (unspec:VF [(match_operand:VF 1 "nonimmediate_operand")]
1123 UNSPEC_LOADU))]
1124 "TARGET_SSE && <mask_mode512bit_condition>"
1125 {
1126 /* For AVX, normal *mov<mode>_internal pattern will handle unaligned loads
1127 just fine if misaligned_operand is true, and without the UNSPEC it can
1128 be combined with arithmetic instructions. If misaligned_operand is
1129 false, still emit UNSPEC_LOADU insn to honor user's request for
1130 misaligned load. */
1131 if (TARGET_AVX
1132 && misaligned_operand (operands[1], <MODE>mode))
1133 {
1134 rtx src = operands[1];
1135 if (<mask_applied>)
1136 src = gen_rtx_VEC_MERGE (<MODE>mode, operands[1],
1137 operands[2 * <mask_applied>],
1138 operands[3 * <mask_applied>]);
1139 emit_insn (gen_rtx_SET (VOIDmode, operands[0], src));
1140 DONE;
1141 }
1142 })
1143
1144 (define_insn "*<sse>_loadu<ssemodesuffix><avxsizesuffix><mask_name>"
1145 [(set (match_operand:VF 0 "register_operand" "=v")
1146 (unspec:VF
1147 [(match_operand:VF 1 "nonimmediate_operand" "vm")]
1148 UNSPEC_LOADU))]
1149 "TARGET_SSE && <mask_mode512bit_condition>"
1150 {
1151 switch (get_attr_mode (insn))
1152 {
1153 case MODE_V16SF:
1154 case MODE_V8SF:
1155 case MODE_V4SF:
1156 return "%vmovups\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}";
1157 default:
1158 return "%vmovu<ssemodesuffix>\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}";
1159 }
1160 }
1161 [(set_attr "type" "ssemov")
1162 (set_attr "movu" "1")
1163 (set_attr "ssememalign" "8")
1164 (set_attr "prefix" "maybe_vex")
1165 (set (attr "mode")
1166 (cond [(and (match_test "<MODE_SIZE> == 16")
1167 (match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL"))
1168 (const_string "<ssePSmode>")
1169 (match_test "TARGET_AVX")
1170 (const_string "<MODE>")
1171 (match_test "optimize_function_for_size_p (cfun)")
1172 (const_string "V4SF")
1173 ]
1174 (const_string "<MODE>")))])
1175
1176 (define_insn "<sse>_storeu<ssemodesuffix><avxsizesuffix>"
1177 [(set (match_operand:VF 0 "memory_operand" "=m")
1178 (unspec:VF
1179 [(match_operand:VF 1 "register_operand" "v")]
1180 UNSPEC_STOREU))]
1181 "TARGET_SSE"
1182 {
1183 switch (get_attr_mode (insn))
1184 {
1185 case MODE_V16SF:
1186 case MODE_V8SF:
1187 case MODE_V4SF:
1188 return "%vmovups\t{%1, %0|%0, %1}";
1189 default:
1190 return "%vmovu<ssemodesuffix>\t{%1, %0|%0, %1}";
1191 }
1192 }
1193 [(set_attr "type" "ssemov")
1194 (set_attr "movu" "1")
1195 (set_attr "ssememalign" "8")
1196 (set_attr "prefix" "maybe_vex")
1197 (set (attr "mode")
1198 (cond [(and (match_test "<MODE_SIZE> == 16")
1199 (ior (match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL")
1200 (match_test "TARGET_SSE_TYPELESS_STORES")))
1201 (const_string "<ssePSmode>")
1202 (match_test "TARGET_AVX")
1203 (const_string "<MODE>")
1204 (match_test "optimize_function_for_size_p (cfun)")
1205 (const_string "V4SF")
1206 ]
1207 (const_string "<MODE>")))])
1208
1209 (define_insn "<avx512>_storeu<ssemodesuffix><avxsizesuffix>_mask"
1210 [(set (match_operand:VF_AVX512VL 0 "memory_operand" "=m")
1211 (vec_merge:VF_AVX512VL
1212 (unspec:VF_AVX512VL
1213 [(match_operand:VF_AVX512VL 1 "register_operand" "v")]
1214 UNSPEC_STOREU)
1215 (match_dup 0)
1216 (match_operand:<avx512fmaskmode> 2 "register_operand" "Yk")))]
1217 "TARGET_AVX512F"
1218 {
1219 switch (get_attr_mode (insn))
1220 {
1221 case MODE_V16SF:
1222 case MODE_V8SF:
1223 case MODE_V4SF:
1224 return "vmovups\t{%1, %0%{%2%}|%0%{%2%}, %1}";
1225 default:
1226 return "vmovu<ssemodesuffix>\t{%1, %0%{%2%}|%0%{%2%}, %1}";
1227 }
1228 }
1229 [(set_attr "type" "ssemov")
1230 (set_attr "movu" "1")
1231 (set_attr "memory" "store")
1232 (set_attr "prefix" "evex")
1233 (set_attr "mode" "<sseinsnmode>")])
1234
1235 /* For AVX, normal *mov<mode>_internal pattern will handle unaligned loads
1236 just fine if misaligned_operand is true, and without the UNSPEC it can
1237 be combined with arithmetic instructions. If misaligned_operand is
1238 false, still emit UNSPEC_LOADU insn to honor user's request for
1239 misaligned load. */
1240 (define_expand "<sse2_avx_avx512f>_loaddqu<mode><mask_name>"
1241 [(set (match_operand:VI1 0 "register_operand")
1242 (unspec:VI1
1243 [(match_operand:VI1 1 "nonimmediate_operand")]
1244 UNSPEC_LOADU))]
1245 "TARGET_SSE2 && <mask_avx512vl_condition> && <mask_avx512bw_condition>"
1246 {
1247 if (TARGET_AVX
1248 && misaligned_operand (operands[1], <MODE>mode))
1249 {
1250 rtx src = operands[1];
1251 if (<mask_applied>)
1252 src = gen_rtx_VEC_MERGE (<MODE>mode, operands[1],
1253 operands[2 * <mask_applied>],
1254 operands[3 * <mask_applied>]);
1255 emit_insn (gen_rtx_SET (VOIDmode, operands[0], src));
1256 DONE;
1257 }
1258 })
1259
1260 (define_expand "<sse2_avx_avx512f>_loaddqu<mode><mask_name>"
1261 [(set (match_operand:VI_ULOADSTORE_BW_AVX512VL 0 "register_operand")
1262 (unspec:VI_ULOADSTORE_BW_AVX512VL
1263 [(match_operand:VI_ULOADSTORE_BW_AVX512VL 1 "nonimmediate_operand")]
1264 UNSPEC_LOADU))]
1265 "TARGET_AVX512BW"
1266 {
1267 if (misaligned_operand (operands[1], <MODE>mode))
1268 {
1269 rtx src = operands[1];
1270 if (<mask_applied>)
1271 src = gen_rtx_VEC_MERGE (<MODE>mode, operands[1],
1272 operands[2 * <mask_applied>],
1273 operands[3 * <mask_applied>]);
1274 emit_insn (gen_rtx_SET (VOIDmode, operands[0], src));
1275 DONE;
1276 }
1277 })
1278
1279 (define_expand "<sse2_avx_avx512f>_loaddqu<mode><mask_name>"
1280 [(set (match_operand:VI_ULOADSTORE_F_AVX512VL 0 "register_operand")
1281 (unspec:VI_ULOADSTORE_F_AVX512VL
1282 [(match_operand:VI_ULOADSTORE_F_AVX512VL 1 "nonimmediate_operand")]
1283 UNSPEC_LOADU))]
1284 "TARGET_AVX512F"
1285 {
1286 if (misaligned_operand (operands[1], <MODE>mode))
1287 {
1288 rtx src = operands[1];
1289 if (<mask_applied>)
1290 src = gen_rtx_VEC_MERGE (<MODE>mode, operands[1],
1291 operands[2 * <mask_applied>],
1292 operands[3 * <mask_applied>]);
1293 emit_insn (gen_rtx_SET (VOIDmode, operands[0], src));
1294 DONE;
1295 }
1296 })
1297
1298 (define_insn "*<sse2_avx_avx512f>_loaddqu<mode><mask_name>"
1299 [(set (match_operand:VI1 0 "register_operand" "=v")
1300 (unspec:VI1
1301 [(match_operand:VI1 1 "nonimmediate_operand" "vm")]
1302 UNSPEC_LOADU))]
1303 "TARGET_SSE2 && <mask_avx512vl_condition> && <mask_avx512bw_condition>"
1304 {
1305 switch (get_attr_mode (insn))
1306 {
1307 case MODE_V8SF:
1308 case MODE_V4SF:
1309 return "%vmovups\t{%1, %0|%0, %1}";
1310 default:
1311 if (!(TARGET_AVX512VL && TARGET_AVX512BW))
1312 return "%vmovdqu\t{%1, %0|%0, %1}";
1313 else
1314 return "vmovdqu<ssescalarsize>\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}";
1315 }
1316 }
1317 [(set_attr "type" "ssemov")
1318 (set_attr "movu" "1")
1319 (set_attr "ssememalign" "8")
1320 (set (attr "prefix_data16")
1321 (if_then_else
1322 (match_test "TARGET_AVX")
1323 (const_string "*")
1324 (const_string "1")))
1325 (set_attr "prefix" "maybe_vex")
1326 (set (attr "mode")
1327 (cond [(and (match_test "<MODE_SIZE> == 16")
1328 (match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL"))
1329 (const_string "<ssePSmode>")
1330 (match_test "TARGET_AVX")
1331 (const_string "<sseinsnmode>")
1332 (match_test "optimize_function_for_size_p (cfun)")
1333 (const_string "V4SF")
1334 ]
1335 (const_string "<sseinsnmode>")))])
1336
1337 (define_insn "*<sse2_avx_avx512f>_loaddqu<mode><mask_name>"
1338 [(set (match_operand:VI_ULOADSTORE_BW_AVX512VL 0 "register_operand" "=v")
1339 (unspec:VI_ULOADSTORE_BW_AVX512VL
1340 [(match_operand:VI_ULOADSTORE_BW_AVX512VL 1 "nonimmediate_operand" "vm")]
1341 UNSPEC_LOADU))]
1342 "TARGET_AVX512BW"
1343 "vmovdqu<ssescalarsize>\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}";
1344 [(set_attr "type" "ssemov")
1345 (set_attr "movu" "1")
1346 (set_attr "ssememalign" "8")
1347 (set_attr "prefix" "maybe_evex")])
1348
1349 (define_insn "*<sse2_avx_avx512f>_loaddqu<mode><mask_name>"
1350 [(set (match_operand:VI_ULOADSTORE_F_AVX512VL 0 "register_operand" "=v")
1351 (unspec:VI_ULOADSTORE_F_AVX512VL
1352 [(match_operand:VI_ULOADSTORE_F_AVX512VL 1 "nonimmediate_operand" "vm")]
1353 UNSPEC_LOADU))]
1354 "TARGET_AVX512F"
1355 "vmovdqu<ssescalarsize>\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}";
1356 [(set_attr "type" "ssemov")
1357 (set_attr "movu" "1")
1358 (set_attr "ssememalign" "8")
1359 (set_attr "prefix" "maybe_evex")])
1360
1361 (define_insn "<sse2_avx_avx512f>_storedqu<mode>"
1362 [(set (match_operand:VI1 0 "memory_operand" "=m")
1363 (unspec:VI1
1364 [(match_operand:VI1 1 "register_operand" "v")]
1365 UNSPEC_STOREU))]
1366 "TARGET_SSE2"
1367 {
1368 switch (get_attr_mode (insn))
1369 {
1370 case MODE_V16SF:
1371 case MODE_V8SF:
1372 case MODE_V4SF:
1373 return "%vmovups\t{%1, %0|%0, %1}";
1374 default:
1375 switch (<MODE>mode)
1376 {
1377 case V32QImode:
1378 case V16QImode:
1379 if (!(TARGET_AVX512VL && TARGET_AVX512BW))
1380 return "%vmovdqu\t{%1, %0|%0, %1}";
1381 default:
1382 return "vmovdqu<ssescalarsize>\t{%1, %0|%0, %1}";
1383 }
1384 }
1385 }
1386 [(set_attr "type" "ssemov")
1387 (set_attr "movu" "1")
1388 (set_attr "ssememalign" "8")
1389 (set (attr "prefix_data16")
1390 (if_then_else
1391 (match_test "TARGET_AVX")
1392 (const_string "*")
1393 (const_string "1")))
1394 (set_attr "prefix" "maybe_vex")
1395 (set (attr "mode")
1396 (cond [(and (match_test "<MODE_SIZE> == 16")
1397 (ior (match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL")
1398 (match_test "TARGET_SSE_TYPELESS_STORES")))
1399 (const_string "<ssePSmode>")
1400 (match_test "TARGET_AVX")
1401 (const_string "<sseinsnmode>")
1402 (match_test "optimize_function_for_size_p (cfun)")
1403 (const_string "V4SF")
1404 ]
1405 (const_string "<sseinsnmode>")))])
1406
1407 (define_insn "<sse2_avx_avx512f>_storedqu<mode>"
1408 [(set (match_operand:VI_ULOADSTORE_BW_AVX512VL 0 "memory_operand" "=m")
1409 (unspec:VI_ULOADSTORE_BW_AVX512VL
1410 [(match_operand:VI_ULOADSTORE_BW_AVX512VL 1 "register_operand" "v")]
1411 UNSPEC_STOREU))]
1412 "TARGET_AVX512BW"
1413 "vmovdqu<ssescalarsize>\t{%1, %0|%0, %1}"
1414 [(set_attr "type" "ssemov")
1415 (set_attr "movu" "1")
1416 (set_attr "ssememalign" "8")
1417 (set_attr "prefix" "maybe_evex")])
1418
1419 (define_insn "<sse2_avx_avx512f>_storedqu<mode>"
1420 [(set (match_operand:VI_ULOADSTORE_F_AVX512VL 0 "memory_operand" "=m")
1421 (unspec:VI_ULOADSTORE_F_AVX512VL
1422 [(match_operand:VI_ULOADSTORE_F_AVX512VL 1 "register_operand" "v")]
1423 UNSPEC_STOREU))]
1424 "TARGET_AVX512F"
1425 "vmovdqu<ssescalarsize>\t{%1, %0|%0, %1}"
1426 [(set_attr "type" "ssemov")
1427 (set_attr "movu" "1")
1428 (set_attr "ssememalign" "8")
1429 (set_attr "prefix" "maybe_vex")])
1430
1431 (define_insn "<avx512>_storedqu<mode>_mask"
1432 [(set (match_operand:VI48_AVX512VL 0 "memory_operand" "=m")
1433 (vec_merge:VI48_AVX512VL
1434 (unspec:VI48_AVX512VL
1435 [(match_operand:VI48_AVX512VL 1 "register_operand" "v")]
1436 UNSPEC_STOREU)
1437 (match_dup 0)
1438 (match_operand:<avx512fmaskmode> 2 "register_operand" "Yk")))]
1439 "TARGET_AVX512F"
1440 "vmovdqu<ssescalarsize>\t{%1, %0%{%2%}|%0%{%2%}, %1}"
1441 [(set_attr "type" "ssemov")
1442 (set_attr "movu" "1")
1443 (set_attr "memory" "store")
1444 (set_attr "prefix" "evex")
1445 (set_attr "mode" "<sseinsnmode>")])
1446
1447 (define_insn "<avx512>_storedqu<mode>_mask"
1448 [(set (match_operand:VI12_AVX512VL 0 "memory_operand" "=m")
1449 (vec_merge:VI12_AVX512VL
1450 (unspec:VI12_AVX512VL
1451 [(match_operand:VI12_AVX512VL 1 "register_operand" "v")]
1452 UNSPEC_STOREU)
1453 (match_dup 0)
1454 (match_operand:<avx512fmaskmode> 2 "register_operand" "Yk")))]
1455 "TARGET_AVX512BW"
1456 "vmovdqu<ssescalarsize>\t{%1, %0%{%2%}|%0%{%2%}, %1}"
1457 [(set_attr "type" "ssemov")
1458 (set_attr "movu" "1")
1459 (set_attr "memory" "store")
1460 (set_attr "prefix" "evex")
1461 (set_attr "mode" "<sseinsnmode>")])
1462
1463 (define_insn "<sse3>_lddqu<avxsizesuffix>"
1464 [(set (match_operand:VI1 0 "register_operand" "=x")
1465 (unspec:VI1 [(match_operand:VI1 1 "memory_operand" "m")]
1466 UNSPEC_LDDQU))]
1467 "TARGET_SSE3"
1468 "%vlddqu\t{%1, %0|%0, %1}"
1469 [(set_attr "type" "ssemov")
1470 (set_attr "movu" "1")
1471 (set_attr "ssememalign" "8")
1472 (set (attr "prefix_data16")
1473 (if_then_else
1474 (match_test "TARGET_AVX")
1475 (const_string "*")
1476 (const_string "0")))
1477 (set (attr "prefix_rep")
1478 (if_then_else
1479 (match_test "TARGET_AVX")
1480 (const_string "*")
1481 (const_string "1")))
1482 (set_attr "prefix" "maybe_vex")
1483 (set_attr "mode" "<sseinsnmode>")])
1484
1485 (define_insn "sse2_movnti<mode>"
1486 [(set (match_operand:SWI48 0 "memory_operand" "=m")
1487 (unspec:SWI48 [(match_operand:SWI48 1 "register_operand" "r")]
1488 UNSPEC_MOVNT))]
1489 "TARGET_SSE2"
1490 "movnti\t{%1, %0|%0, %1}"
1491 [(set_attr "type" "ssemov")
1492 (set_attr "prefix_data16" "0")
1493 (set_attr "mode" "<MODE>")])
1494
1495 (define_insn "<sse>_movnt<mode>"
1496 [(set (match_operand:VF 0 "memory_operand" "=m")
1497 (unspec:VF
1498 [(match_operand:VF 1 "register_operand" "v")]
1499 UNSPEC_MOVNT))]
1500 "TARGET_SSE"
1501 "%vmovnt<ssemodesuffix>\t{%1, %0|%0, %1}"
1502 [(set_attr "type" "ssemov")
1503 (set_attr "prefix" "maybe_vex")
1504 (set_attr "mode" "<MODE>")])
1505
1506 (define_insn "<sse2>_movnt<mode>"
1507 [(set (match_operand:VI8 0 "memory_operand" "=m")
1508 (unspec:VI8 [(match_operand:VI8 1 "register_operand" "v")]
1509 UNSPEC_MOVNT))]
1510 "TARGET_SSE2"
1511 "%vmovntdq\t{%1, %0|%0, %1}"
1512 [(set_attr "type" "ssecvt")
1513 (set (attr "prefix_data16")
1514 (if_then_else
1515 (match_test "TARGET_AVX")
1516 (const_string "*")
1517 (const_string "1")))
1518 (set_attr "prefix" "maybe_vex")
1519 (set_attr "mode" "<sseinsnmode>")])
1520
1521 ; Expand patterns for non-temporal stores. At the moment, only those
1522 ; that directly map to insns are defined; it would be possible to
1523 ; define patterns for other modes that would expand to several insns.
1524
1525 ;; Modes handled by storent patterns.
1526 (define_mode_iterator STORENT_MODE
1527 [(DI "TARGET_SSE2 && TARGET_64BIT") (SI "TARGET_SSE2")
1528 (SF "TARGET_SSE4A") (DF "TARGET_SSE4A")
1529 (V8DI "TARGET_AVX512F") (V4DI "TARGET_AVX") (V2DI "TARGET_SSE2")
1530 (V16SF "TARGET_AVX512F") (V8SF "TARGET_AVX") V4SF
1531 (V8DF "TARGET_AVX512F") (V4DF "TARGET_AVX") (V2DF "TARGET_SSE2")])
1532
1533 (define_expand "storent<mode>"
1534 [(set (match_operand:STORENT_MODE 0 "memory_operand")
1535 (unspec:STORENT_MODE
1536 [(match_operand:STORENT_MODE 1 "register_operand")]
1537 UNSPEC_MOVNT))]
1538 "TARGET_SSE")
1539
1540 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1541 ;;
1542 ;; Parallel floating point arithmetic
1543 ;;
1544 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1545
1546 (define_expand "<code><mode>2"
1547 [(set (match_operand:VF 0 "register_operand")
1548 (absneg:VF
1549 (match_operand:VF 1 "register_operand")))]
1550 "TARGET_SSE"
1551 "ix86_expand_fp_absneg_operator (<CODE>, <MODE>mode, operands); DONE;")
1552
1553 (define_insn_and_split "*absneg<mode>2"
1554 [(set (match_operand:VF 0 "register_operand" "=x,x,v,v")
1555 (match_operator:VF 3 "absneg_operator"
1556 [(match_operand:VF 1 "nonimmediate_operand" "0, xm, v, m")]))
1557 (use (match_operand:VF 2 "nonimmediate_operand" "xm, 0, vm,v"))]
1558 "TARGET_SSE"
1559 "#"
1560 "&& reload_completed"
1561 [(const_int 0)]
1562 {
1563 enum rtx_code absneg_op;
1564 rtx op1, op2;
1565 rtx t;
1566
1567 if (TARGET_AVX)
1568 {
1569 if (MEM_P (operands[1]))
1570 op1 = operands[2], op2 = operands[1];
1571 else
1572 op1 = operands[1], op2 = operands[2];
1573 }
1574 else
1575 {
1576 op1 = operands[0];
1577 if (rtx_equal_p (operands[0], operands[1]))
1578 op2 = operands[2];
1579 else
1580 op2 = operands[1];
1581 }
1582
1583 absneg_op = GET_CODE (operands[3]) == NEG ? XOR : AND;
1584 t = gen_rtx_fmt_ee (absneg_op, <MODE>mode, op1, op2);
1585 t = gen_rtx_SET (VOIDmode, operands[0], t);
1586 emit_insn (t);
1587 DONE;
1588 }
1589 [(set_attr "isa" "noavx,noavx,avx,avx")])
1590
1591 (define_expand "<plusminus_insn><mode>3<mask_name><round_name>"
1592 [(set (match_operand:VF 0 "register_operand")
1593 (plusminus:VF
1594 (match_operand:VF 1 "<round_nimm_predicate>")
1595 (match_operand:VF 2 "<round_nimm_predicate>")))]
1596 "TARGET_SSE && <mask_mode512bit_condition> && <round_mode512bit_condition>"
1597 "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
1598
1599 (define_insn "*<plusminus_insn><mode>3<mask_name><round_name>"
1600 [(set (match_operand:VF 0 "register_operand" "=x,v")
1601 (plusminus:VF
1602 (match_operand:VF 1 "<round_nimm_predicate>" "<comm>0,v")
1603 (match_operand:VF 2 "<round_nimm_predicate>" "xm,<round_constraint>")))]
1604 "TARGET_SSE && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands) && <mask_mode512bit_condition> && <round_mode512bit_condition>"
1605 "@
1606 <plusminus_mnemonic><ssemodesuffix>\t{%2, %0|%0, %2}
1607 v<plusminus_mnemonic><ssemodesuffix>\t{<round_mask_op3>%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2<round_mask_op3>}"
1608 [(set_attr "isa" "noavx,avx")
1609 (set_attr "type" "sseadd")
1610 (set_attr "prefix" "<mask_prefix3>")
1611 (set_attr "mode" "<MODE>")])
1612
1613 (define_insn "<sse>_vm<plusminus_insn><mode>3<round_name>"
1614 [(set (match_operand:VF_128 0 "register_operand" "=x,v")
1615 (vec_merge:VF_128
1616 (plusminus:VF_128
1617 (match_operand:VF_128 1 "register_operand" "0,v")
1618 (match_operand:VF_128 2 "nonimmediate_operand" "xm,<round_constraint>"))
1619 (match_dup 1)
1620 (const_int 1)))]
1621 "TARGET_SSE"
1622 "@
1623 <plusminus_mnemonic><ssescalarmodesuffix>\t{%2, %0|%0, %<iptr>2}
1624 v<plusminus_mnemonic><ssescalarmodesuffix>\t{<round_op3>%2, %1, %0|%0, %1, %<iptr>2<round_op3>}"
1625 [(set_attr "isa" "noavx,avx")
1626 (set_attr "type" "sseadd")
1627 (set_attr "prefix" "<round_prefix>")
1628 (set_attr "mode" "<ssescalarmode>")])
1629
1630 (define_expand "mul<mode>3<mask_name><round_name>"
1631 [(set (match_operand:VF 0 "register_operand")
1632 (mult:VF
1633 (match_operand:VF 1 "<round_nimm_predicate>")
1634 (match_operand:VF 2 "<round_nimm_predicate>")))]
1635 "TARGET_SSE && <mask_mode512bit_condition> && <round_mode512bit_condition>"
1636 "ix86_fixup_binary_operands_no_copy (MULT, <MODE>mode, operands);")
1637
1638 (define_insn "*mul<mode>3<mask_name><round_name>"
1639 [(set (match_operand:VF 0 "register_operand" "=x,v")
1640 (mult:VF
1641 (match_operand:VF 1 "<round_nimm_predicate>" "%0,v")
1642 (match_operand:VF 2 "<round_nimm_predicate>" "xm,<round_constraint>")))]
1643 "TARGET_SSE && ix86_binary_operator_ok (MULT, <MODE>mode, operands) && <mask_mode512bit_condition> && <round_mode512bit_condition>"
1644 "@
1645 mul<ssemodesuffix>\t{%2, %0|%0, %2}
1646 vmul<ssemodesuffix>\t{<round_mask_op3>%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2<round_mask_op3>}"
1647 [(set_attr "isa" "noavx,avx")
1648 (set_attr "type" "ssemul")
1649 (set_attr "prefix" "<mask_prefix3>")
1650 (set_attr "btver2_decode" "direct,double")
1651 (set_attr "mode" "<MODE>")])
1652
1653 (define_insn "<sse>_vm<multdiv_mnemonic><mode>3<round_name>"
1654 [(set (match_operand:VF_128 0 "register_operand" "=x,v")
1655 (vec_merge:VF_128
1656 (multdiv:VF_128
1657 (match_operand:VF_128 1 "register_operand" "0,v")
1658 (match_operand:VF_128 2 "nonimmediate_operand" "xm,<round_constraint>"))
1659 (match_dup 1)
1660 (const_int 1)))]
1661 "TARGET_SSE"
1662 "@
1663 <multdiv_mnemonic><ssescalarmodesuffix>\t{%2, %0|%0, %<iptr>2}
1664 v<multdiv_mnemonic><ssescalarmodesuffix>\t{<round_op3>%2, %1, %0|%0, %1, %<iptr>2<round_op3>}"
1665 [(set_attr "isa" "noavx,avx")
1666 (set_attr "type" "sse<multdiv_mnemonic>")
1667 (set_attr "prefix" "<round_prefix>")
1668 (set_attr "btver2_decode" "direct,double")
1669 (set_attr "mode" "<ssescalarmode>")])
1670
1671 (define_expand "div<mode>3"
1672 [(set (match_operand:VF2 0 "register_operand")
1673 (div:VF2 (match_operand:VF2 1 "register_operand")
1674 (match_operand:VF2 2 "nonimmediate_operand")))]
1675 "TARGET_SSE2"
1676 "ix86_fixup_binary_operands_no_copy (DIV, <MODE>mode, operands);")
1677
1678 (define_expand "div<mode>3"
1679 [(set (match_operand:VF1 0 "register_operand")
1680 (div:VF1 (match_operand:VF1 1 "register_operand")
1681 (match_operand:VF1 2 "nonimmediate_operand")))]
1682 "TARGET_SSE"
1683 {
1684 ix86_fixup_binary_operands_no_copy (DIV, <MODE>mode, operands);
1685
1686 if (TARGET_SSE_MATH
1687 && TARGET_RECIP_VEC_DIV
1688 && !optimize_insn_for_size_p ()
1689 && flag_finite_math_only && !flag_trapping_math
1690 && flag_unsafe_math_optimizations)
1691 {
1692 ix86_emit_swdivsf (operands[0], operands[1], operands[2], <MODE>mode);
1693 DONE;
1694 }
1695 })
1696
1697 (define_insn "<sse>_div<mode>3<mask_name><round_name>"
1698 [(set (match_operand:VF 0 "register_operand" "=x,v")
1699 (div:VF
1700 (match_operand:VF 1 "register_operand" "0,v")
1701 (match_operand:VF 2 "<round_nimm_predicate>" "xm,<round_constraint>")))]
1702 "TARGET_SSE && <mask_mode512bit_condition> && <round_mode512bit_condition>"
1703 "@
1704 div<ssemodesuffix>\t{%2, %0|%0, %2}
1705 vdiv<ssemodesuffix>\t{<round_mask_op3>%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2<round_mask_op3>}"
1706 [(set_attr "isa" "noavx,avx")
1707 (set_attr "type" "ssediv")
1708 (set_attr "prefix" "<mask_prefix3>")
1709 (set_attr "mode" "<MODE>")])
1710
1711 (define_insn "<sse>_rcp<mode>2"
1712 [(set (match_operand:VF1_128_256 0 "register_operand" "=x")
1713 (unspec:VF1_128_256
1714 [(match_operand:VF1_128_256 1 "nonimmediate_operand" "xm")] UNSPEC_RCP))]
1715 "TARGET_SSE"
1716 "%vrcpps\t{%1, %0|%0, %1}"
1717 [(set_attr "type" "sse")
1718 (set_attr "atom_sse_attr" "rcp")
1719 (set_attr "btver2_sse_attr" "rcp")
1720 (set_attr "prefix" "maybe_vex")
1721 (set_attr "mode" "<MODE>")])
1722
1723 (define_insn "sse_vmrcpv4sf2"
1724 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
1725 (vec_merge:V4SF
1726 (unspec:V4SF [(match_operand:V4SF 1 "nonimmediate_operand" "xm,xm")]
1727 UNSPEC_RCP)
1728 (match_operand:V4SF 2 "register_operand" "0,x")
1729 (const_int 1)))]
1730 "TARGET_SSE"
1731 "@
1732 rcpss\t{%1, %0|%0, %k1}
1733 vrcpss\t{%1, %2, %0|%0, %2, %k1}"
1734 [(set_attr "isa" "noavx,avx")
1735 (set_attr "type" "sse")
1736 (set_attr "ssememalign" "32")
1737 (set_attr "atom_sse_attr" "rcp")
1738 (set_attr "btver2_sse_attr" "rcp")
1739 (set_attr "prefix" "orig,vex")
1740 (set_attr "mode" "SF")])
1741
1742 (define_insn "<mask_codefor>rcp14<mode><mask_name>"
1743 [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v")
1744 (unspec:VF_AVX512VL
1745 [(match_operand:VF_AVX512VL 1 "nonimmediate_operand" "vm")]
1746 UNSPEC_RCP14))]
1747 "TARGET_AVX512F"
1748 "vrcp14<ssemodesuffix>\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
1749 [(set_attr "type" "sse")
1750 (set_attr "prefix" "evex")
1751 (set_attr "mode" "<MODE>")])
1752
1753 (define_insn "srcp14<mode>"
1754 [(set (match_operand:VF_128 0 "register_operand" "=v")
1755 (vec_merge:VF_128
1756 (unspec:VF_128
1757 [(match_operand:VF_128 1 "nonimmediate_operand" "vm")]
1758 UNSPEC_RCP14)
1759 (match_operand:VF_128 2 "register_operand" "v")
1760 (const_int 1)))]
1761 "TARGET_AVX512F"
1762 "vrcp14<ssescalarmodesuffix>\t{%1, %2, %0|%0, %2, %1}"
1763 [(set_attr "type" "sse")
1764 (set_attr "prefix" "evex")
1765 (set_attr "mode" "<MODE>")])
1766
1767 (define_expand "sqrt<mode>2"
1768 [(set (match_operand:VF2 0 "register_operand")
1769 (sqrt:VF2 (match_operand:VF2 1 "nonimmediate_operand")))]
1770 "TARGET_SSE2")
1771
1772 (define_expand "sqrt<mode>2"
1773 [(set (match_operand:VF1 0 "register_operand")
1774 (sqrt:VF1 (match_operand:VF1 1 "nonimmediate_operand")))]
1775 "TARGET_SSE"
1776 {
1777 if (TARGET_SSE_MATH
1778 && TARGET_RECIP_VEC_SQRT
1779 && !optimize_insn_for_size_p ()
1780 && flag_finite_math_only && !flag_trapping_math
1781 && flag_unsafe_math_optimizations)
1782 {
1783 ix86_emit_swsqrtsf (operands[0], operands[1], <MODE>mode, false);
1784 DONE;
1785 }
1786 })
1787
1788 (define_insn "<sse>_sqrt<mode>2<mask_name><round_name>"
1789 [(set (match_operand:VF 0 "register_operand" "=v")
1790 (sqrt:VF (match_operand:VF 1 "<round_nimm_predicate>" "<round_constraint>")))]
1791 "TARGET_SSE && <mask_mode512bit_condition> && <round_mode512bit_condition>"
1792 "%vsqrt<ssemodesuffix>\t{<round_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_mask_op2>}"
1793 [(set_attr "type" "sse")
1794 (set_attr "atom_sse_attr" "sqrt")
1795 (set_attr "btver2_sse_attr" "sqrt")
1796 (set_attr "prefix" "maybe_vex")
1797 (set_attr "mode" "<MODE>")])
1798
1799 (define_insn "<sse>_vmsqrt<mode>2<round_name>"
1800 [(set (match_operand:VF_128 0 "register_operand" "=x,v")
1801 (vec_merge:VF_128
1802 (sqrt:VF_128
1803 (match_operand:VF_128 1 "nonimmediate_operand" "xm,<round_constraint>"))
1804 (match_operand:VF_128 2 "register_operand" "0,v")
1805 (const_int 1)))]
1806 "TARGET_SSE"
1807 "@
1808 sqrt<ssescalarmodesuffix>\t{%1, %0|%0, %<iptr>1}
1809 vsqrt<ssescalarmodesuffix>\t{<round_op3>%1, %2, %0|%0, %2, %<iptr>1<round_op3>}"
1810 [(set_attr "isa" "noavx,avx")
1811 (set_attr "type" "sse")
1812 (set_attr "atom_sse_attr" "sqrt")
1813 (set_attr "prefix" "<round_prefix>")
1814 (set_attr "btver2_sse_attr" "sqrt")
1815 (set_attr "mode" "<ssescalarmode>")])
1816
1817 (define_expand "rsqrt<mode>2"
1818 [(set (match_operand:VF1_128_256 0 "register_operand")
1819 (unspec:VF1_128_256
1820 [(match_operand:VF1_128_256 1 "nonimmediate_operand")] UNSPEC_RSQRT))]
1821 "TARGET_SSE_MATH"
1822 {
1823 ix86_emit_swsqrtsf (operands[0], operands[1], <MODE>mode, true);
1824 DONE;
1825 })
1826
1827 (define_insn "<sse>_rsqrt<mode>2"
1828 [(set (match_operand:VF1_128_256 0 "register_operand" "=x")
1829 (unspec:VF1_128_256
1830 [(match_operand:VF1_128_256 1 "nonimmediate_operand" "xm")] UNSPEC_RSQRT))]
1831 "TARGET_SSE"
1832 "%vrsqrtps\t{%1, %0|%0, %1}"
1833 [(set_attr "type" "sse")
1834 (set_attr "prefix" "maybe_vex")
1835 (set_attr "mode" "<MODE>")])
1836
1837 (define_insn "<mask_codefor>rsqrt14<mode><mask_name>"
1838 [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v")
1839 (unspec:VF_AVX512VL
1840 [(match_operand:VF_AVX512VL 1 "nonimmediate_operand" "vm")]
1841 UNSPEC_RSQRT14))]
1842 "TARGET_AVX512F"
1843 "vrsqrt14<ssemodesuffix>\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
1844 [(set_attr "type" "sse")
1845 (set_attr "prefix" "evex")
1846 (set_attr "mode" "<MODE>")])
1847
1848 (define_insn "rsqrt14<mode>"
1849 [(set (match_operand:VF_128 0 "register_operand" "=v")
1850 (vec_merge:VF_128
1851 (unspec:VF_128
1852 [(match_operand:VF_128 1 "nonimmediate_operand" "vm")]
1853 UNSPEC_RSQRT14)
1854 (match_operand:VF_128 2 "register_operand" "v")
1855 (const_int 1)))]
1856 "TARGET_AVX512F"
1857 "vrsqrt14<ssescalarmodesuffix>\t{%1, %2, %0|%0, %2, %1}"
1858 [(set_attr "type" "sse")
1859 (set_attr "prefix" "evex")
1860 (set_attr "mode" "<MODE>")])
1861
1862 (define_insn "sse_vmrsqrtv4sf2"
1863 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
1864 (vec_merge:V4SF
1865 (unspec:V4SF [(match_operand:V4SF 1 "nonimmediate_operand" "xm,xm")]
1866 UNSPEC_RSQRT)
1867 (match_operand:V4SF 2 "register_operand" "0,x")
1868 (const_int 1)))]
1869 "TARGET_SSE"
1870 "@
1871 rsqrtss\t{%1, %0|%0, %k1}
1872 vrsqrtss\t{%1, %2, %0|%0, %2, %k1}"
1873 [(set_attr "isa" "noavx,avx")
1874 (set_attr "type" "sse")
1875 (set_attr "ssememalign" "32")
1876 (set_attr "prefix" "orig,vex")
1877 (set_attr "mode" "SF")])
1878
1879 ;; ??? For !flag_finite_math_only, the representation with SMIN/SMAX
1880 ;; isn't really correct, as those rtl operators aren't defined when
1881 ;; applied to NaNs. Hopefully the optimizers won't get too smart on us.
1882
1883 (define_expand "<code><mode>3<mask_name><round_saeonly_name>"
1884 [(set (match_operand:VF 0 "register_operand")
1885 (smaxmin:VF
1886 (match_operand:VF 1 "<round_saeonly_nimm_predicate>")
1887 (match_operand:VF 2 "<round_saeonly_nimm_predicate>")))]
1888 "TARGET_SSE && <mask_mode512bit_condition> && <round_saeonly_mode512bit_condition>"
1889 {
1890 if (!flag_finite_math_only)
1891 operands[1] = force_reg (<MODE>mode, operands[1]);
1892 ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);
1893 })
1894
1895 (define_insn "*<code><mode>3_finite<mask_name><round_saeonly_name>"
1896 [(set (match_operand:VF 0 "register_operand" "=x,v")
1897 (smaxmin:VF
1898 (match_operand:VF 1 "<round_saeonly_nimm_predicate>" "%0,v")
1899 (match_operand:VF 2 "<round_saeonly_nimm_predicate>" "xm,<round_saeonly_constraint>")))]
1900 "TARGET_SSE && flag_finite_math_only
1901 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)
1902 && <mask_mode512bit_condition> && <round_saeonly_mode512bit_condition>"
1903 "@
1904 <maxmin_float><ssemodesuffix>\t{%2, %0|%0, %2}
1905 v<maxmin_float><ssemodesuffix>\t{<round_saeonly_mask_op3>%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2<round_saeonly_mask_op3>}"
1906 [(set_attr "isa" "noavx,avx")
1907 (set_attr "type" "sseadd")
1908 (set_attr "btver2_sse_attr" "maxmin")
1909 (set_attr "prefix" "<mask_prefix3>")
1910 (set_attr "mode" "<MODE>")])
1911
1912 (define_insn "*<code><mode>3<mask_name><round_saeonly_name>"
1913 [(set (match_operand:VF 0 "register_operand" "=x,v")
1914 (smaxmin:VF
1915 (match_operand:VF 1 "register_operand" "0,v")
1916 (match_operand:VF 2 "<round_saeonly_nimm_predicate>" "xm,<round_saeonly_constraint>")))]
1917 "TARGET_SSE && !flag_finite_math_only
1918 && <mask_mode512bit_condition> && <round_saeonly_mode512bit_condition>"
1919 "@
1920 <maxmin_float><ssemodesuffix>\t{%2, %0|%0, %2}
1921 v<maxmin_float><ssemodesuffix>\t{<round_saeonly_mask_op3>%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2<round_saeonly_mask_op3>}"
1922 [(set_attr "isa" "noavx,avx")
1923 (set_attr "type" "sseadd")
1924 (set_attr "btver2_sse_attr" "maxmin")
1925 (set_attr "prefix" "<mask_prefix3>")
1926 (set_attr "mode" "<MODE>")])
1927
1928 (define_insn "<sse>_vm<code><mode>3<round_saeonly_name>"
1929 [(set (match_operand:VF_128 0 "register_operand" "=x,v")
1930 (vec_merge:VF_128
1931 (smaxmin:VF_128
1932 (match_operand:VF_128 1 "register_operand" "0,v")
1933 (match_operand:VF_128 2 "nonimmediate_operand" "xm,<round_saeonly_constraint>"))
1934 (match_dup 1)
1935 (const_int 1)))]
1936 "TARGET_SSE"
1937 "@
1938 <maxmin_float><ssescalarmodesuffix>\t{%2, %0|%0, %<iptr>2}
1939 v<maxmin_float><ssescalarmodesuffix>\t{<round_saeonly_op3>%2, %1, %0|%0, %1, %<iptr>2<round_saeonly_op3>}"
1940 [(set_attr "isa" "noavx,avx")
1941 (set_attr "type" "sse")
1942 (set_attr "btver2_sse_attr" "maxmin")
1943 (set_attr "prefix" "<round_saeonly_prefix>")
1944 (set_attr "mode" "<ssescalarmode>")])
1945
1946 ;; These versions of the min/max patterns implement exactly the operations
1947 ;; min = (op1 < op2 ? op1 : op2)
1948 ;; max = (!(op1 < op2) ? op1 : op2)
1949 ;; Their operands are not commutative, and thus they may be used in the
1950 ;; presence of -0.0 and NaN.
1951
1952 (define_insn "*ieee_smin<mode>3"
1953 [(set (match_operand:VF 0 "register_operand" "=v,v")
1954 (unspec:VF
1955 [(match_operand:VF 1 "register_operand" "0,v")
1956 (match_operand:VF 2 "nonimmediate_operand" "vm,vm")]
1957 UNSPEC_IEEE_MIN))]
1958 "TARGET_SSE"
1959 "@
1960 min<ssemodesuffix>\t{%2, %0|%0, %2}
1961 vmin<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
1962 [(set_attr "isa" "noavx,avx")
1963 (set_attr "type" "sseadd")
1964 (set_attr "prefix" "orig,vex")
1965 (set_attr "mode" "<MODE>")])
1966
1967 (define_insn "*ieee_smax<mode>3"
1968 [(set (match_operand:VF 0 "register_operand" "=v,v")
1969 (unspec:VF
1970 [(match_operand:VF 1 "register_operand" "0,v")
1971 (match_operand:VF 2 "nonimmediate_operand" "vm,vm")]
1972 UNSPEC_IEEE_MAX))]
1973 "TARGET_SSE"
1974 "@
1975 max<ssemodesuffix>\t{%2, %0|%0, %2}
1976 vmax<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
1977 [(set_attr "isa" "noavx,avx")
1978 (set_attr "type" "sseadd")
1979 (set_attr "prefix" "orig,vex")
1980 (set_attr "mode" "<MODE>")])
1981
1982 (define_insn "avx_addsubv4df3"
1983 [(set (match_operand:V4DF 0 "register_operand" "=x")
1984 (vec_merge:V4DF
1985 (plus:V4DF
1986 (match_operand:V4DF 1 "register_operand" "x")
1987 (match_operand:V4DF 2 "nonimmediate_operand" "xm"))
1988 (minus:V4DF (match_dup 1) (match_dup 2))
1989 (const_int 10)))]
1990 "TARGET_AVX"
1991 "vaddsubpd\t{%2, %1, %0|%0, %1, %2}"
1992 [(set_attr "type" "sseadd")
1993 (set_attr "prefix" "vex")
1994 (set_attr "mode" "V4DF")])
1995
1996 (define_insn "sse3_addsubv2df3"
1997 [(set (match_operand:V2DF 0 "register_operand" "=x,x")
1998 (vec_merge:V2DF
1999 (plus:V2DF
2000 (match_operand:V2DF 1 "register_operand" "0,x")
2001 (match_operand:V2DF 2 "nonimmediate_operand" "xm,xm"))
2002 (minus:V2DF (match_dup 1) (match_dup 2))
2003 (const_int 2)))]
2004 "TARGET_SSE3"
2005 "@
2006 addsubpd\t{%2, %0|%0, %2}
2007 vaddsubpd\t{%2, %1, %0|%0, %1, %2}"
2008 [(set_attr "isa" "noavx,avx")
2009 (set_attr "type" "sseadd")
2010 (set_attr "atom_unit" "complex")
2011 (set_attr "prefix" "orig,vex")
2012 (set_attr "mode" "V2DF")])
2013
2014 (define_insn "avx_addsubv8sf3"
2015 [(set (match_operand:V8SF 0 "register_operand" "=x")
2016 (vec_merge:V8SF
2017 (plus:V8SF
2018 (match_operand:V8SF 1 "register_operand" "x")
2019 (match_operand:V8SF 2 "nonimmediate_operand" "xm"))
2020 (minus:V8SF (match_dup 1) (match_dup 2))
2021 (const_int 170)))]
2022 "TARGET_AVX"
2023 "vaddsubps\t{%2, %1, %0|%0, %1, %2}"
2024 [(set_attr "type" "sseadd")
2025 (set_attr "prefix" "vex")
2026 (set_attr "mode" "V8SF")])
2027
2028 (define_insn "sse3_addsubv4sf3"
2029 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
2030 (vec_merge:V4SF
2031 (plus:V4SF
2032 (match_operand:V4SF 1 "register_operand" "0,x")
2033 (match_operand:V4SF 2 "nonimmediate_operand" "xm,xm"))
2034 (minus:V4SF (match_dup 1) (match_dup 2))
2035 (const_int 10)))]
2036 "TARGET_SSE3"
2037 "@
2038 addsubps\t{%2, %0|%0, %2}
2039 vaddsubps\t{%2, %1, %0|%0, %1, %2}"
2040 [(set_attr "isa" "noavx,avx")
2041 (set_attr "type" "sseadd")
2042 (set_attr "prefix" "orig,vex")
2043 (set_attr "prefix_rep" "1,*")
2044 (set_attr "mode" "V4SF")])
2045
2046 (define_insn "avx_h<plusminus_insn>v4df3"
2047 [(set (match_operand:V4DF 0 "register_operand" "=x")
2048 (vec_concat:V4DF
2049 (vec_concat:V2DF
2050 (plusminus:DF
2051 (vec_select:DF
2052 (match_operand:V4DF 1 "register_operand" "x")
2053 (parallel [(const_int 0)]))
2054 (vec_select:DF (match_dup 1) (parallel [(const_int 1)])))
2055 (plusminus:DF
2056 (vec_select:DF
2057 (match_operand:V4DF 2 "nonimmediate_operand" "xm")
2058 (parallel [(const_int 0)]))
2059 (vec_select:DF (match_dup 2) (parallel [(const_int 1)]))))
2060 (vec_concat:V2DF
2061 (plusminus:DF
2062 (vec_select:DF (match_dup 1) (parallel [(const_int 2)]))
2063 (vec_select:DF (match_dup 1) (parallel [(const_int 3)])))
2064 (plusminus:DF
2065 (vec_select:DF (match_dup 2) (parallel [(const_int 2)]))
2066 (vec_select:DF (match_dup 2) (parallel [(const_int 3)]))))))]
2067 "TARGET_AVX"
2068 "vh<plusminus_mnemonic>pd\t{%2, %1, %0|%0, %1, %2}"
2069 [(set_attr "type" "sseadd")
2070 (set_attr "prefix" "vex")
2071 (set_attr "mode" "V4DF")])
2072
2073 (define_expand "sse3_haddv2df3"
2074 [(set (match_operand:V2DF 0 "register_operand")
2075 (vec_concat:V2DF
2076 (plus:DF
2077 (vec_select:DF
2078 (match_operand:V2DF 1 "register_operand")
2079 (parallel [(const_int 0)]))
2080 (vec_select:DF (match_dup 1) (parallel [(const_int 1)])))
2081 (plus:DF
2082 (vec_select:DF
2083 (match_operand:V2DF 2 "nonimmediate_operand")
2084 (parallel [(const_int 0)]))
2085 (vec_select:DF (match_dup 2) (parallel [(const_int 1)])))))]
2086 "TARGET_SSE3")
2087
2088 (define_insn "*sse3_haddv2df3"
2089 [(set (match_operand:V2DF 0 "register_operand" "=x,x")
2090 (vec_concat:V2DF
2091 (plus:DF
2092 (vec_select:DF
2093 (match_operand:V2DF 1 "register_operand" "0,x")
2094 (parallel [(match_operand:SI 3 "const_0_to_1_operand")]))
2095 (vec_select:DF
2096 (match_dup 1)
2097 (parallel [(match_operand:SI 4 "const_0_to_1_operand")])))
2098 (plus:DF
2099 (vec_select:DF
2100 (match_operand:V2DF 2 "nonimmediate_operand" "xm,xm")
2101 (parallel [(match_operand:SI 5 "const_0_to_1_operand")]))
2102 (vec_select:DF
2103 (match_dup 2)
2104 (parallel [(match_operand:SI 6 "const_0_to_1_operand")])))))]
2105 "TARGET_SSE3
2106 && INTVAL (operands[3]) != INTVAL (operands[4])
2107 && INTVAL (operands[5]) != INTVAL (operands[6])"
2108 "@
2109 haddpd\t{%2, %0|%0, %2}
2110 vhaddpd\t{%2, %1, %0|%0, %1, %2}"
2111 [(set_attr "isa" "noavx,avx")
2112 (set_attr "type" "sseadd")
2113 (set_attr "prefix" "orig,vex")
2114 (set_attr "mode" "V2DF")])
2115
2116 (define_insn "sse3_hsubv2df3"
2117 [(set (match_operand:V2DF 0 "register_operand" "=x,x")
2118 (vec_concat:V2DF
2119 (minus:DF
2120 (vec_select:DF
2121 (match_operand:V2DF 1 "register_operand" "0,x")
2122 (parallel [(const_int 0)]))
2123 (vec_select:DF (match_dup 1) (parallel [(const_int 1)])))
2124 (minus:DF
2125 (vec_select:DF
2126 (match_operand:V2DF 2 "nonimmediate_operand" "xm,xm")
2127 (parallel [(const_int 0)]))
2128 (vec_select:DF (match_dup 2) (parallel [(const_int 1)])))))]
2129 "TARGET_SSE3"
2130 "@
2131 hsubpd\t{%2, %0|%0, %2}
2132 vhsubpd\t{%2, %1, %0|%0, %1, %2}"
2133 [(set_attr "isa" "noavx,avx")
2134 (set_attr "type" "sseadd")
2135 (set_attr "prefix" "orig,vex")
2136 (set_attr "mode" "V2DF")])
2137
2138 (define_insn "*sse3_haddv2df3_low"
2139 [(set (match_operand:DF 0 "register_operand" "=x,x")
2140 (plus:DF
2141 (vec_select:DF
2142 (match_operand:V2DF 1 "register_operand" "0,x")
2143 (parallel [(match_operand:SI 2 "const_0_to_1_operand")]))
2144 (vec_select:DF
2145 (match_dup 1)
2146 (parallel [(match_operand:SI 3 "const_0_to_1_operand")]))))]
2147 "TARGET_SSE3
2148 && INTVAL (operands[2]) != INTVAL (operands[3])"
2149 "@
2150 haddpd\t{%0, %0|%0, %0}
2151 vhaddpd\t{%1, %1, %0|%0, %1, %1}"
2152 [(set_attr "isa" "noavx,avx")
2153 (set_attr "type" "sseadd1")
2154 (set_attr "prefix" "orig,vex")
2155 (set_attr "mode" "V2DF")])
2156
2157 (define_insn "*sse3_hsubv2df3_low"
2158 [(set (match_operand:DF 0 "register_operand" "=x,x")
2159 (minus:DF
2160 (vec_select:DF
2161 (match_operand:V2DF 1 "register_operand" "0,x")
2162 (parallel [(const_int 0)]))
2163 (vec_select:DF
2164 (match_dup 1)
2165 (parallel [(const_int 1)]))))]
2166 "TARGET_SSE3"
2167 "@
2168 hsubpd\t{%0, %0|%0, %0}
2169 vhsubpd\t{%1, %1, %0|%0, %1, %1}"
2170 [(set_attr "isa" "noavx,avx")
2171 (set_attr "type" "sseadd1")
2172 (set_attr "prefix" "orig,vex")
2173 (set_attr "mode" "V2DF")])
2174
2175 (define_insn "avx_h<plusminus_insn>v8sf3"
2176 [(set (match_operand:V8SF 0 "register_operand" "=x")
2177 (vec_concat:V8SF
2178 (vec_concat:V4SF
2179 (vec_concat:V2SF
2180 (plusminus:SF
2181 (vec_select:SF
2182 (match_operand:V8SF 1 "register_operand" "x")
2183 (parallel [(const_int 0)]))
2184 (vec_select:SF (match_dup 1) (parallel [(const_int 1)])))
2185 (plusminus:SF
2186 (vec_select:SF (match_dup 1) (parallel [(const_int 2)]))
2187 (vec_select:SF (match_dup 1) (parallel [(const_int 3)]))))
2188 (vec_concat:V2SF
2189 (plusminus:SF
2190 (vec_select:SF
2191 (match_operand:V8SF 2 "nonimmediate_operand" "xm")
2192 (parallel [(const_int 0)]))
2193 (vec_select:SF (match_dup 2) (parallel [(const_int 1)])))
2194 (plusminus:SF
2195 (vec_select:SF (match_dup 2) (parallel [(const_int 2)]))
2196 (vec_select:SF (match_dup 2) (parallel [(const_int 3)])))))
2197 (vec_concat:V4SF
2198 (vec_concat:V2SF
2199 (plusminus:SF
2200 (vec_select:SF (match_dup 1) (parallel [(const_int 4)]))
2201 (vec_select:SF (match_dup 1) (parallel [(const_int 5)])))
2202 (plusminus:SF
2203 (vec_select:SF (match_dup 1) (parallel [(const_int 6)]))
2204 (vec_select:SF (match_dup 1) (parallel [(const_int 7)]))))
2205 (vec_concat:V2SF
2206 (plusminus:SF
2207 (vec_select:SF (match_dup 2) (parallel [(const_int 4)]))
2208 (vec_select:SF (match_dup 2) (parallel [(const_int 5)])))
2209 (plusminus:SF
2210 (vec_select:SF (match_dup 2) (parallel [(const_int 6)]))
2211 (vec_select:SF (match_dup 2) (parallel [(const_int 7)])))))))]
2212 "TARGET_AVX"
2213 "vh<plusminus_mnemonic>ps\t{%2, %1, %0|%0, %1, %2}"
2214 [(set_attr "type" "sseadd")
2215 (set_attr "prefix" "vex")
2216 (set_attr "mode" "V8SF")])
2217
2218 (define_insn "sse3_h<plusminus_insn>v4sf3"
2219 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
2220 (vec_concat:V4SF
2221 (vec_concat:V2SF
2222 (plusminus:SF
2223 (vec_select:SF
2224 (match_operand:V4SF 1 "register_operand" "0,x")
2225 (parallel [(const_int 0)]))
2226 (vec_select:SF (match_dup 1) (parallel [(const_int 1)])))
2227 (plusminus:SF
2228 (vec_select:SF (match_dup 1) (parallel [(const_int 2)]))
2229 (vec_select:SF (match_dup 1) (parallel [(const_int 3)]))))
2230 (vec_concat:V2SF
2231 (plusminus:SF
2232 (vec_select:SF
2233 (match_operand:V4SF 2 "nonimmediate_operand" "xm,xm")
2234 (parallel [(const_int 0)]))
2235 (vec_select:SF (match_dup 2) (parallel [(const_int 1)])))
2236 (plusminus:SF
2237 (vec_select:SF (match_dup 2) (parallel [(const_int 2)]))
2238 (vec_select:SF (match_dup 2) (parallel [(const_int 3)]))))))]
2239 "TARGET_SSE3"
2240 "@
2241 h<plusminus_mnemonic>ps\t{%2, %0|%0, %2}
2242 vh<plusminus_mnemonic>ps\t{%2, %1, %0|%0, %1, %2}"
2243 [(set_attr "isa" "noavx,avx")
2244 (set_attr "type" "sseadd")
2245 (set_attr "atom_unit" "complex")
2246 (set_attr "prefix" "orig,vex")
2247 (set_attr "prefix_rep" "1,*")
2248 (set_attr "mode" "V4SF")])
2249
2250 (define_expand "reduc_splus_v8df"
2251 [(match_operand:V8DF 0 "register_operand")
2252 (match_operand:V8DF 1 "register_operand")]
2253 "TARGET_AVX512F"
2254 {
2255 ix86_expand_reduc (gen_addv8df3, operands[0], operands[1]);
2256 DONE;
2257 })
2258
2259 (define_expand "reduc_splus_v4df"
2260 [(match_operand:V4DF 0 "register_operand")
2261 (match_operand:V4DF 1 "register_operand")]
2262 "TARGET_AVX"
2263 {
2264 rtx tmp = gen_reg_rtx (V4DFmode);
2265 rtx tmp2 = gen_reg_rtx (V4DFmode);
2266 emit_insn (gen_avx_haddv4df3 (tmp, operands[1], operands[1]));
2267 emit_insn (gen_avx_vperm2f128v4df3 (tmp2, tmp, tmp, GEN_INT (1)));
2268 emit_insn (gen_addv4df3 (operands[0], tmp, tmp2));
2269 DONE;
2270 })
2271
2272 (define_expand "reduc_splus_v2df"
2273 [(match_operand:V2DF 0 "register_operand")
2274 (match_operand:V2DF 1 "register_operand")]
2275 "TARGET_SSE3"
2276 {
2277 emit_insn (gen_sse3_haddv2df3 (operands[0], operands[1], operands[1]));
2278 DONE;
2279 })
2280
2281 (define_expand "reduc_splus_v16sf"
2282 [(match_operand:V16SF 0 "register_operand")
2283 (match_operand:V16SF 1 "register_operand")]
2284 "TARGET_AVX512F"
2285 {
2286 ix86_expand_reduc (gen_addv16sf3, operands[0], operands[1]);
2287 DONE;
2288 })
2289
2290 (define_expand "reduc_splus_v8sf"
2291 [(match_operand:V8SF 0 "register_operand")
2292 (match_operand:V8SF 1 "register_operand")]
2293 "TARGET_AVX"
2294 {
2295 rtx tmp = gen_reg_rtx (V8SFmode);
2296 rtx tmp2 = gen_reg_rtx (V8SFmode);
2297 emit_insn (gen_avx_haddv8sf3 (tmp, operands[1], operands[1]));
2298 emit_insn (gen_avx_haddv8sf3 (tmp2, tmp, tmp));
2299 emit_insn (gen_avx_vperm2f128v8sf3 (tmp, tmp2, tmp2, GEN_INT (1)));
2300 emit_insn (gen_addv8sf3 (operands[0], tmp, tmp2));
2301 DONE;
2302 })
2303
2304 (define_expand "reduc_splus_v4sf"
2305 [(match_operand:V4SF 0 "register_operand")
2306 (match_operand:V4SF 1 "register_operand")]
2307 "TARGET_SSE"
2308 {
2309 if (TARGET_SSE3)
2310 {
2311 rtx tmp = gen_reg_rtx (V4SFmode);
2312 emit_insn (gen_sse3_haddv4sf3 (tmp, operands[1], operands[1]));
2313 emit_insn (gen_sse3_haddv4sf3 (operands[0], tmp, tmp));
2314 }
2315 else
2316 ix86_expand_reduc (gen_addv4sf3, operands[0], operands[1]);
2317 DONE;
2318 })
2319
2320 ;; Modes handled by reduc_sm{in,ax}* patterns.
2321 (define_mode_iterator REDUC_SMINMAX_MODE
2322 [(V32QI "TARGET_AVX2") (V16HI "TARGET_AVX2")
2323 (V8SI "TARGET_AVX2") (V4DI "TARGET_AVX2")
2324 (V8SF "TARGET_AVX") (V4DF "TARGET_AVX")
2325 (V4SF "TARGET_SSE") (V64QI "TARGET_AVX512BW")
2326 (V32HI "TARGET_AVX512BW") (V16SI "TARGET_AVX512F")
2327 (V8DI "TARGET_AVX512F") (V16SF "TARGET_AVX512F")
2328 (V8DF "TARGET_AVX512F")])
2329
2330 (define_expand "reduc_<code>_<mode>"
2331 [(smaxmin:REDUC_SMINMAX_MODE
2332 (match_operand:REDUC_SMINMAX_MODE 0 "register_operand")
2333 (match_operand:REDUC_SMINMAX_MODE 1 "register_operand"))]
2334 ""
2335 {
2336 ix86_expand_reduc (gen_<code><mode>3, operands[0], operands[1]);
2337 DONE;
2338 })
2339
2340 (define_expand "reduc_<code>_<mode>"
2341 [(umaxmin:VI_AVX512BW
2342 (match_operand:VI_AVX512BW 0 "register_operand")
2343 (match_operand:VI_AVX512BW 1 "register_operand"))]
2344 "TARGET_AVX512F"
2345 {
2346 ix86_expand_reduc (gen_<code><mode>3, operands[0], operands[1]);
2347 DONE;
2348 })
2349
2350 (define_expand "reduc_<code>_<mode>"
2351 [(umaxmin:VI_256
2352 (match_operand:VI_256 0 "register_operand")
2353 (match_operand:VI_256 1 "register_operand"))]
2354 "TARGET_AVX2"
2355 {
2356 ix86_expand_reduc (gen_<code><mode>3, operands[0], operands[1]);
2357 DONE;
2358 })
2359
2360 (define_expand "reduc_umin_v8hi"
2361 [(umin:V8HI
2362 (match_operand:V8HI 0 "register_operand")
2363 (match_operand:V8HI 1 "register_operand"))]
2364 "TARGET_SSE4_1"
2365 {
2366 ix86_expand_reduc (gen_uminv8hi3, operands[0], operands[1]);
2367 DONE;
2368 })
2369
2370 (define_insn "<mask_codefor>reducep<mode><mask_name>"
2371 [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v")
2372 (unspec:VF_AVX512VL
2373 [(match_operand:VF_AVX512VL 1 "nonimmediate_operand" "vm")
2374 (match_operand:SI 2 "const_0_to_255_operand")]
2375 UNSPEC_REDUCE))]
2376 "TARGET_AVX512DQ"
2377 "vreduce<ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
2378 [(set_attr "type" "sse")
2379 (set_attr "prefix" "evex")
2380 (set_attr "mode" "<MODE>")])
2381
2382 (define_insn "reduces<mode>"
2383 [(set (match_operand:VF_128 0 "register_operand" "=v")
2384 (vec_merge:VF_128
2385 (unspec:VF_128
2386 [(match_operand:VF_128 1 "register_operand" "v")
2387 (match_operand:VF_128 2 "nonimmediate_operand" "vm")
2388 (match_operand:SI 3 "const_0_to_255_operand")]
2389 UNSPEC_REDUCE)
2390 (match_dup 1)
2391 (const_int 1)))]
2392 "TARGET_AVX512DQ"
2393 "vreduce<ssescalarmodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
2394 [(set_attr "type" "sse")
2395 (set_attr "prefix" "evex")
2396 (set_attr "mode" "<MODE>")])
2397
2398 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2399 ;;
2400 ;; Parallel floating point comparisons
2401 ;;
2402 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2403
2404 (define_insn "avx_cmp<mode>3"
2405 [(set (match_operand:VF_128_256 0 "register_operand" "=x")
2406 (unspec:VF_128_256
2407 [(match_operand:VF_128_256 1 "register_operand" "x")
2408 (match_operand:VF_128_256 2 "nonimmediate_operand" "xm")
2409 (match_operand:SI 3 "const_0_to_31_operand" "n")]
2410 UNSPEC_PCMP))]
2411 "TARGET_AVX"
2412 "vcmp<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
2413 [(set_attr "type" "ssecmp")
2414 (set_attr "length_immediate" "1")
2415 (set_attr "prefix" "vex")
2416 (set_attr "mode" "<MODE>")])
2417
2418 (define_insn "avx_vmcmp<mode>3"
2419 [(set (match_operand:VF_128 0 "register_operand" "=x")
2420 (vec_merge:VF_128
2421 (unspec:VF_128
2422 [(match_operand:VF_128 1 "register_operand" "x")
2423 (match_operand:VF_128 2 "nonimmediate_operand" "xm")
2424 (match_operand:SI 3 "const_0_to_31_operand" "n")]
2425 UNSPEC_PCMP)
2426 (match_dup 1)
2427 (const_int 1)))]
2428 "TARGET_AVX"
2429 "vcmp<ssescalarmodesuffix>\t{%3, %2, %1, %0|%0, %1, %<iptr>2, %3}"
2430 [(set_attr "type" "ssecmp")
2431 (set_attr "length_immediate" "1")
2432 (set_attr "prefix" "vex")
2433 (set_attr "mode" "<ssescalarmode>")])
2434
2435 (define_insn "*<sse>_maskcmp<mode>3_comm"
2436 [(set (match_operand:VF_128_256 0 "register_operand" "=x,x")
2437 (match_operator:VF_128_256 3 "sse_comparison_operator"
2438 [(match_operand:VF_128_256 1 "register_operand" "%0,x")
2439 (match_operand:VF_128_256 2 "nonimmediate_operand" "xm,xm")]))]
2440 "TARGET_SSE
2441 && GET_RTX_CLASS (GET_CODE (operands[3])) == RTX_COMM_COMPARE"
2442 "@
2443 cmp%D3<ssemodesuffix>\t{%2, %0|%0, %2}
2444 vcmp%D3<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
2445 [(set_attr "isa" "noavx,avx")
2446 (set_attr "type" "ssecmp")
2447 (set_attr "length_immediate" "1")
2448 (set_attr "prefix" "orig,vex")
2449 (set_attr "mode" "<MODE>")])
2450
2451 (define_insn "<sse>_maskcmp<mode>3"
2452 [(set (match_operand:VF_128_256 0 "register_operand" "=x,x")
2453 (match_operator:VF_128_256 3 "sse_comparison_operator"
2454 [(match_operand:VF_128_256 1 "register_operand" "0,x")
2455 (match_operand:VF_128_256 2 "nonimmediate_operand" "xm,xm")]))]
2456 "TARGET_SSE"
2457 "@
2458 cmp%D3<ssemodesuffix>\t{%2, %0|%0, %2}
2459 vcmp%D3<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
2460 [(set_attr "isa" "noavx,avx")
2461 (set_attr "type" "ssecmp")
2462 (set_attr "length_immediate" "1")
2463 (set_attr "prefix" "orig,vex")
2464 (set_attr "mode" "<MODE>")])
2465
2466 (define_insn "<sse>_vmmaskcmp<mode>3"
2467 [(set (match_operand:VF_128 0 "register_operand" "=x,x")
2468 (vec_merge:VF_128
2469 (match_operator:VF_128 3 "sse_comparison_operator"
2470 [(match_operand:VF_128 1 "register_operand" "0,x")
2471 (match_operand:VF_128 2 "nonimmediate_operand" "xm,xm")])
2472 (match_dup 1)
2473 (const_int 1)))]
2474 "TARGET_SSE"
2475 "@
2476 cmp%D3<ssescalarmodesuffix>\t{%2, %0|%0, %<iptr>2}
2477 vcmp%D3<ssescalarmodesuffix>\t{%2, %1, %0|%0, %1, %<iptr>2}"
2478 [(set_attr "isa" "noavx,avx")
2479 (set_attr "type" "ssecmp")
2480 (set_attr "length_immediate" "1,*")
2481 (set_attr "prefix" "orig,vex")
2482 (set_attr "mode" "<ssescalarmode>")])
2483
2484 (define_mode_attr cmp_imm_predicate
2485 [(V16SF "const_0_to_31_operand") (V8DF "const_0_to_31_operand")
2486 (V16SI "const_0_to_7_operand") (V8DI "const_0_to_7_operand")
2487 (V8SF "const_0_to_31_operand") (V4DF "const_0_to_31_operand")
2488 (V8SI "const_0_to_7_operand") (V4DI "const_0_to_7_operand")
2489 (V4SF "const_0_to_31_operand") (V2DF "const_0_to_31_operand")
2490 (V4SI "const_0_to_7_operand") (V2DI "const_0_to_7_operand")
2491 (V32HI "const_0_to_7_operand") (V64QI "const_0_to_7_operand")
2492 (V16HI "const_0_to_7_operand") (V32QI "const_0_to_7_operand")
2493 (V8HI "const_0_to_7_operand") (V16QI "const_0_to_7_operand")])
2494
2495 (define_insn "<avx512>_cmp<mode>3<mask_scalar_merge_name><round_saeonly_name>"
2496 [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=Yk")
2497 (unspec:<avx512fmaskmode>
2498 [(match_operand:V48_AVX512VL 1 "register_operand" "v")
2499 (match_operand:V48_AVX512VL 2 "nonimmediate_operand" "<round_saeonly_constraint>")
2500 (match_operand:SI 3 "<cmp_imm_predicate>" "n")]
2501 UNSPEC_PCMP))]
2502 "TARGET_AVX512F && <round_saeonly_mode512bit_condition>"
2503 "v<sseintprefix>cmp<ssemodesuffix>\t{%3, <round_saeonly_mask_scalar_merge_op4>%2, %1, %0<mask_scalar_merge_operand4>|%0<mask_scalar_merge_operand4>, %1, %2<round_saeonly_mask_scalar_merge_op4>, %3}"
2504 [(set_attr "type" "ssecmp")
2505 (set_attr "length_immediate" "1")
2506 (set_attr "prefix" "evex")
2507 (set_attr "mode" "<sseinsnmode>")])
2508
2509 (define_insn "<avx512>_cmp<mode>3<mask_scalar_merge_name>"
2510 [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=Yk")
2511 (unspec:<avx512fmaskmode>
2512 [(match_operand:VI12_AVX512VL 1 "register_operand" "v")
2513 (match_operand:VI12_AVX512VL 2 "nonimmediate_operand" "vm")
2514 (match_operand:SI 3 "<cmp_imm_predicate>" "n")]
2515 UNSPEC_PCMP))]
2516 "TARGET_AVX512BW"
2517 "vpcmp<ssemodesuffix>\t{%3, %2, %1, %0<mask_scalar_merge_operand4>|%0<mask_scalar_merge_operand4>, %1, %2, %3}"
2518 [(set_attr "type" "ssecmp")
2519 (set_attr "length_immediate" "1")
2520 (set_attr "prefix" "evex")
2521 (set_attr "mode" "<sseinsnmode>")])
2522
2523 (define_insn "<avx512>_ucmp<mode>3<mask_scalar_merge_name>"
2524 [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=Yk")
2525 (unspec:<avx512fmaskmode>
2526 [(match_operand:VI12_AVX512VL 1 "register_operand" "v")
2527 (match_operand:VI12_AVX512VL 2 "nonimmediate_operand" "vm")
2528 (match_operand:SI 3 "const_0_to_7_operand" "n")]
2529 UNSPEC_UNSIGNED_PCMP))]
2530 "TARGET_AVX512BW"
2531 "vpcmpu<ssemodesuffix>\t{%3, %2, %1, %0<mask_scalar_merge_operand4>|%0<mask_scalar_merge_operand4>, %1, %2, %3}"
2532 [(set_attr "type" "ssecmp")
2533 (set_attr "length_immediate" "1")
2534 (set_attr "prefix" "evex")
2535 (set_attr "mode" "<sseinsnmode>")])
2536
2537 (define_insn "<avx512>_ucmp<mode>3<mask_scalar_merge_name>"
2538 [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=Yk")
2539 (unspec:<avx512fmaskmode>
2540 [(match_operand:VI48_AVX512VL 1 "register_operand" "v")
2541 (match_operand:VI48_AVX512VL 2 "nonimmediate_operand" "vm")
2542 (match_operand:SI 3 "const_0_to_7_operand" "n")]
2543 UNSPEC_UNSIGNED_PCMP))]
2544 "TARGET_AVX512F"
2545 "vpcmpu<ssemodesuffix>\t{%3, %2, %1, %0<mask_scalar_merge_operand4>|%0<mask_scalar_merge_operand4>, %1, %2, %3}"
2546 [(set_attr "type" "ssecmp")
2547 (set_attr "length_immediate" "1")
2548 (set_attr "prefix" "evex")
2549 (set_attr "mode" "<sseinsnmode>")])
2550
2551 (define_insn "avx512f_vmcmp<mode>3<round_saeonly_name>"
2552 [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=Yk")
2553 (and:<avx512fmaskmode>
2554 (unspec:<avx512fmaskmode>
2555 [(match_operand:VF_128 1 "register_operand" "v")
2556 (match_operand:VF_128 2 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")
2557 (match_operand:SI 3 "const_0_to_31_operand" "n")]
2558 UNSPEC_PCMP)
2559 (const_int 1)))]
2560 "TARGET_AVX512F"
2561 "vcmp<ssescalarmodesuffix>\t{%3, <round_saeonly_op4>%2, %1, %0|%0, %1, %2<round_saeonly_op4>, %3}"
2562 [(set_attr "type" "ssecmp")
2563 (set_attr "length_immediate" "1")
2564 (set_attr "prefix" "evex")
2565 (set_attr "mode" "<ssescalarmode>")])
2566
2567 (define_insn "avx512f_vmcmp<mode>3_mask<round_saeonly_name>"
2568 [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=Yk")
2569 (and:<avx512fmaskmode>
2570 (unspec:<avx512fmaskmode>
2571 [(match_operand:VF_128 1 "register_operand" "v")
2572 (match_operand:VF_128 2 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")
2573 (match_operand:SI 3 "const_0_to_31_operand" "n")]
2574 UNSPEC_PCMP)
2575 (and:<avx512fmaskmode>
2576 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")
2577 (const_int 1))))]
2578 "TARGET_AVX512F"
2579 "vcmp<ssescalarmodesuffix>\t{%3, <round_saeonly_op5>%2, %1, %0%{%4%}|%0%{%4%}, %1, %2<round_saeonly_op5>, %3}"
2580 [(set_attr "type" "ssecmp")
2581 (set_attr "length_immediate" "1")
2582 (set_attr "prefix" "evex")
2583 (set_attr "mode" "<ssescalarmode>")])
2584
2585 (define_insn "avx512f_maskcmp<mode>3"
2586 [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=Yk")
2587 (match_operator:<avx512fmaskmode> 3 "sse_comparison_operator"
2588 [(match_operand:VF 1 "register_operand" "v")
2589 (match_operand:VF 2 "nonimmediate_operand" "vm")]))]
2590 "TARGET_SSE"
2591 "vcmp%D3<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
2592 [(set_attr "type" "ssecmp")
2593 (set_attr "length_immediate" "1")
2594 (set_attr "prefix" "evex")
2595 (set_attr "mode" "<sseinsnmode>")])
2596
2597 (define_insn "<sse>_comi<round_saeonly_name>"
2598 [(set (reg:CCFP FLAGS_REG)
2599 (compare:CCFP
2600 (vec_select:MODEF
2601 (match_operand:<ssevecmode> 0 "register_operand" "v")
2602 (parallel [(const_int 0)]))
2603 (vec_select:MODEF
2604 (match_operand:<ssevecmode> 1 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")
2605 (parallel [(const_int 0)]))))]
2606 "SSE_FLOAT_MODE_P (<MODE>mode)"
2607 "%vcomi<ssemodesuffix>\t{<round_saeonly_op2>%1, %0|%0, %<iptr>1<round_saeonly_op2>}"
2608 [(set_attr "type" "ssecomi")
2609 (set_attr "prefix" "maybe_vex")
2610 (set_attr "prefix_rep" "0")
2611 (set (attr "prefix_data16")
2612 (if_then_else (eq_attr "mode" "DF")
2613 (const_string "1")
2614 (const_string "0")))
2615 (set_attr "mode" "<MODE>")])
2616
2617 (define_insn "<sse>_ucomi<round_saeonly_name>"
2618 [(set (reg:CCFPU FLAGS_REG)
2619 (compare:CCFPU
2620 (vec_select:MODEF
2621 (match_operand:<ssevecmode> 0 "register_operand" "v")
2622 (parallel [(const_int 0)]))
2623 (vec_select:MODEF
2624 (match_operand:<ssevecmode> 1 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")
2625 (parallel [(const_int 0)]))))]
2626 "SSE_FLOAT_MODE_P (<MODE>mode)"
2627 "%vucomi<ssemodesuffix>\t{<round_saeonly_op2>%1, %0|%0, %<iptr>1<round_saeonly_op2>}"
2628 [(set_attr "type" "ssecomi")
2629 (set_attr "prefix" "maybe_vex")
2630 (set_attr "prefix_rep" "0")
2631 (set (attr "prefix_data16")
2632 (if_then_else (eq_attr "mode" "DF")
2633 (const_string "1")
2634 (const_string "0")))
2635 (set_attr "mode" "<MODE>")])
2636
2637 (define_expand "vcond<V_512:mode><VF_512:mode>"
2638 [(set (match_operand:V_512 0 "register_operand")
2639 (if_then_else:V_512
2640 (match_operator 3 ""
2641 [(match_operand:VF_512 4 "nonimmediate_operand")
2642 (match_operand:VF_512 5 "nonimmediate_operand")])
2643 (match_operand:V_512 1 "general_operand")
2644 (match_operand:V_512 2 "general_operand")))]
2645 "TARGET_AVX512F
2646 && (GET_MODE_NUNITS (<V_512:MODE>mode)
2647 == GET_MODE_NUNITS (<VF_512:MODE>mode))"
2648 {
2649 bool ok = ix86_expand_fp_vcond (operands);
2650 gcc_assert (ok);
2651 DONE;
2652 })
2653
2654 (define_expand "vcond<V_256:mode><VF_256:mode>"
2655 [(set (match_operand:V_256 0 "register_operand")
2656 (if_then_else:V_256
2657 (match_operator 3 ""
2658 [(match_operand:VF_256 4 "nonimmediate_operand")
2659 (match_operand:VF_256 5 "nonimmediate_operand")])
2660 (match_operand:V_256 1 "general_operand")
2661 (match_operand:V_256 2 "general_operand")))]
2662 "TARGET_AVX
2663 && (GET_MODE_NUNITS (<V_256:MODE>mode)
2664 == GET_MODE_NUNITS (<VF_256:MODE>mode))"
2665 {
2666 bool ok = ix86_expand_fp_vcond (operands);
2667 gcc_assert (ok);
2668 DONE;
2669 })
2670
2671 (define_expand "vcond<V_128:mode><VF_128:mode>"
2672 [(set (match_operand:V_128 0 "register_operand")
2673 (if_then_else:V_128
2674 (match_operator 3 ""
2675 [(match_operand:VF_128 4 "nonimmediate_operand")
2676 (match_operand:VF_128 5 "nonimmediate_operand")])
2677 (match_operand:V_128 1 "general_operand")
2678 (match_operand:V_128 2 "general_operand")))]
2679 "TARGET_SSE
2680 && (GET_MODE_NUNITS (<V_128:MODE>mode)
2681 == GET_MODE_NUNITS (<VF_128:MODE>mode))"
2682 {
2683 bool ok = ix86_expand_fp_vcond (operands);
2684 gcc_assert (ok);
2685 DONE;
2686 })
2687
2688 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2689 ;;
2690 ;; Parallel floating point logical operations
2691 ;;
2692 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2693
2694 (define_insn "<sse>_andnot<mode>3<mask_name>"
2695 [(set (match_operand:VF_128_256 0 "register_operand" "=x,v")
2696 (and:VF_128_256
2697 (not:VF_128_256
2698 (match_operand:VF_128_256 1 "register_operand" "0,v"))
2699 (match_operand:VF_128_256 2 "nonimmediate_operand" "xm,vm")))]
2700 "TARGET_SSE && <mask_avx512vl_condition>"
2701 {
2702 static char buf[128];
2703 const char *ops;
2704 const char *suffix;
2705
2706 switch (get_attr_mode (insn))
2707 {
2708 case MODE_V8SF:
2709 case MODE_V4SF:
2710 suffix = "ps";
2711 break;
2712 default:
2713 suffix = "<ssemodesuffix>";
2714 }
2715
2716 switch (which_alternative)
2717 {
2718 case 0:
2719 ops = "andn%s\t{%%2, %%0|%%0, %%2}";
2720 break;
2721 case 1:
2722 ops = "vandn%s\t{%%2, %%1, %%0<mask_operand3_1>|%%0<mask_operand3_1>, %%1, %%2}";
2723 break;
2724 default:
2725 gcc_unreachable ();
2726 }
2727
2728 /* There is no vandnp[sd] in avx512f. Use vpandn[qd]. */
2729 if (<mask_applied> && !TARGET_AVX512DQ)
2730 {
2731 suffix = GET_MODE_INNER (<MODE>mode) == DFmode ? "q" : "d";
2732 ops = "vpandn%s\t{%%2, %%1, %%0<mask_operand3_1>|%%0<mask_operand3_1>, %%1, %%2}";
2733 }
2734
2735 snprintf (buf, sizeof (buf), ops, suffix);
2736 return buf;
2737 }
2738 [(set_attr "isa" "noavx,avx")
2739 (set_attr "type" "sselog")
2740 (set_attr "prefix" "orig,maybe_evex")
2741 (set (attr "mode")
2742 (cond [(and (match_test "<MODE_SIZE> == 16")
2743 (match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL"))
2744 (const_string "<ssePSmode>")
2745 (match_test "TARGET_AVX")
2746 (const_string "<MODE>")
2747 (match_test "optimize_function_for_size_p (cfun)")
2748 (const_string "V4SF")
2749 ]
2750 (const_string "<MODE>")))])
2751
2752
2753 (define_insn "<sse>_andnot<mode>3<mask_name>"
2754 [(set (match_operand:VF_512 0 "register_operand" "=v")
2755 (and:VF_512
2756 (not:VF_512
2757 (match_operand:VF_512 1 "register_operand" "v"))
2758 (match_operand:VF_512 2 "nonimmediate_operand" "vm")))]
2759 "TARGET_AVX512F"
2760 {
2761 static char buf[128];
2762 const char *ops;
2763 const char *suffix;
2764
2765 suffix = "<ssemodesuffix>";
2766 ops = "";
2767
2768 /* There is no vandnp[sd] in avx512f. Use vpandn[qd]. */
2769 if (!TARGET_AVX512DQ)
2770 {
2771 suffix = GET_MODE_INNER (<MODE>mode) == DFmode ? "q" : "d";
2772 ops = "p";
2773 }
2774
2775 snprintf (buf, sizeof (buf),
2776 "v%sandn%s\t{%%2, %%1, %%0<mask_operand3_1>|%%0<mask_operand3_1>, %%1, %%2}",
2777 ops, suffix);
2778 return buf;
2779 }
2780 [(set_attr "type" "sselog")
2781 (set_attr "prefix" "evex")
2782 (set_attr "mode" "<sseinsnmode>")])
2783
2784 (define_expand "<code><mode>3<mask_name>"
2785 [(set (match_operand:VF_128_256 0 "register_operand")
2786 (any_logic:VF_128_256
2787 (match_operand:VF_128_256 1 "nonimmediate_operand")
2788 (match_operand:VF_128_256 2 "nonimmediate_operand")))]
2789 "TARGET_SSE && <mask_avx512vl_condition>"
2790 "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
2791
2792 (define_expand "<code><mode>3<mask_name>"
2793 [(set (match_operand:VF_512 0 "register_operand")
2794 (any_logic:VF_512
2795 (match_operand:VF_512 1 "nonimmediate_operand")
2796 (match_operand:VF_512 2 "nonimmediate_operand")))]
2797 "TARGET_AVX512F"
2798 "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
2799
2800 (define_insn "*<code><mode>3<mask_name>"
2801 [(set (match_operand:VF_128_256 0 "register_operand" "=x,v")
2802 (any_logic:VF_128_256
2803 (match_operand:VF_128_256 1 "nonimmediate_operand" "%0,v")
2804 (match_operand:VF_128_256 2 "nonimmediate_operand" "xm,vm")))]
2805 "TARGET_SSE && <mask_avx512vl_condition>
2806 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
2807 {
2808 static char buf[128];
2809 const char *ops;
2810 const char *suffix;
2811
2812 switch (get_attr_mode (insn))
2813 {
2814 case MODE_V8SF:
2815 case MODE_V4SF:
2816 suffix = "ps";
2817 break;
2818 default:
2819 suffix = "<ssemodesuffix>";
2820 }
2821
2822 switch (which_alternative)
2823 {
2824 case 0:
2825 ops = "<logic>%s\t{%%2, %%0|%%0, %%2}";
2826 break;
2827 case 1:
2828 ops = "v<logic>%s\t{%%2, %%1, %%0<mask_operand3_1>|%%0<mask_operand3_1>, %%1, %%2}";
2829 break;
2830 default:
2831 gcc_unreachable ();
2832 }
2833
2834 /* There is no v<logic>p[sd] in avx512f. Use vp<logic>[dq]. */
2835 if (<mask_applied> && !TARGET_AVX512DQ)
2836 {
2837 suffix = GET_MODE_INNER (<MODE>mode) == DFmode ? "q" : "d";
2838 ops = "vp<logic>%s\t{%%2, %%1, %%0<mask_operand3_1>|%%0<mask_operand3_1>, %%1, %%2}";
2839 }
2840
2841 snprintf (buf, sizeof (buf), ops, suffix);
2842 return buf;
2843 }
2844 [(set_attr "isa" "noavx,avx")
2845 (set_attr "type" "sselog")
2846 (set_attr "prefix" "orig,maybe_evex")
2847 (set (attr "mode")
2848 (cond [(and (match_test "<MODE_SIZE> == 16")
2849 (match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL"))
2850 (const_string "<ssePSmode>")
2851 (match_test "TARGET_AVX")
2852 (const_string "<MODE>")
2853 (match_test "optimize_function_for_size_p (cfun)")
2854 (const_string "V4SF")
2855 ]
2856 (const_string "<MODE>")))])
2857
2858 (define_insn "*<code><mode>3<mask_name>"
2859 [(set (match_operand:VF_512 0 "register_operand" "=v")
2860 (any_logic:VF_512
2861 (match_operand:VF_512 1 "nonimmediate_operand" "%v")
2862 (match_operand:VF_512 2 "nonimmediate_operand" "vm")))]
2863 "TARGET_AVX512F && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
2864 {
2865 static char buf[128];
2866 const char *ops;
2867 const char *suffix;
2868
2869 suffix = "<ssemodesuffix>";
2870 ops = "";
2871
2872 /* There is no v<logic>p[sd] in avx512f. Use vp<logic>[dq]. */
2873 if ((<MODE_SIZE> == 64 || <mask_applied>) && !TARGET_AVX512DQ)
2874 {
2875 suffix = GET_MODE_INNER (<MODE>mode) == DFmode ? "q" : "d";
2876 ops = "p";
2877 }
2878
2879 snprintf (buf, sizeof (buf),
2880 "v%s<logic>%s\t{%%2, %%1, %%0<mask_operand3_1>|%%0<mask_operand3_1>, %%1, %%2}",
2881 ops, suffix);
2882 return buf;
2883 }
2884 [(set_attr "type" "sselog")
2885 (set_attr "prefix" "evex")
2886 (set_attr "mode" "<sseinsnmode>")])
2887
2888 (define_expand "copysign<mode>3"
2889 [(set (match_dup 4)
2890 (and:VF
2891 (not:VF (match_dup 3))
2892 (match_operand:VF 1 "nonimmediate_operand")))
2893 (set (match_dup 5)
2894 (and:VF (match_dup 3)
2895 (match_operand:VF 2 "nonimmediate_operand")))
2896 (set (match_operand:VF 0 "register_operand")
2897 (ior:VF (match_dup 4) (match_dup 5)))]
2898 "TARGET_SSE"
2899 {
2900 operands[3] = ix86_build_signbit_mask (<MODE>mode, 1, 0);
2901
2902 operands[4] = gen_reg_rtx (<MODE>mode);
2903 operands[5] = gen_reg_rtx (<MODE>mode);
2904 })
2905
2906 ;; Also define scalar versions. These are used for abs, neg, and
2907 ;; conditional move. Using subregs into vector modes causes register
2908 ;; allocation lossage. These patterns do not allow memory operands
2909 ;; because the native instructions read the full 128-bits.
2910
2911 (define_insn "*andnot<mode>3"
2912 [(set (match_operand:MODEF 0 "register_operand" "=x,x")
2913 (and:MODEF
2914 (not:MODEF
2915 (match_operand:MODEF 1 "register_operand" "0,x"))
2916 (match_operand:MODEF 2 "register_operand" "x,x")))]
2917 "SSE_FLOAT_MODE_P (<MODE>mode)"
2918 {
2919 static char buf[32];
2920 const char *ops;
2921 const char *suffix
2922 = (get_attr_mode (insn) == MODE_V4SF) ? "ps" : "<ssevecmodesuffix>";
2923
2924 switch (which_alternative)
2925 {
2926 case 0:
2927 ops = "andn%s\t{%%2, %%0|%%0, %%2}";
2928 break;
2929 case 1:
2930 ops = "vandn%s\t{%%2, %%1, %%0|%%0, %%1, %%2}";
2931 break;
2932 default:
2933 gcc_unreachable ();
2934 }
2935
2936 snprintf (buf, sizeof (buf), ops, suffix);
2937 return buf;
2938 }
2939 [(set_attr "isa" "noavx,avx")
2940 (set_attr "type" "sselog")
2941 (set_attr "prefix" "orig,vex")
2942 (set (attr "mode")
2943 (cond [(and (match_test "<MODE_SIZE> == 16")
2944 (match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL"))
2945 (const_string "V4SF")
2946 (match_test "TARGET_AVX")
2947 (const_string "<ssevecmode>")
2948 (match_test "optimize_function_for_size_p (cfun)")
2949 (const_string "V4SF")
2950 ]
2951 (const_string "<ssevecmode>")))])
2952
2953 (define_insn "*andnottf3"
2954 [(set (match_operand:TF 0 "register_operand" "=x,x")
2955 (and:TF
2956 (not:TF (match_operand:TF 1 "register_operand" "0,x"))
2957 (match_operand:TF 2 "nonimmediate_operand" "xm,xm")))]
2958 "TARGET_SSE"
2959 {
2960 static char buf[32];
2961 const char *ops;
2962 const char *tmp
2963 = (get_attr_mode (insn) == MODE_V4SF) ? "andnps" : "pandn";
2964
2965 switch (which_alternative)
2966 {
2967 case 0:
2968 ops = "%s\t{%%2, %%0|%%0, %%2}";
2969 break;
2970 case 1:
2971 ops = "v%s\t{%%2, %%1, %%0|%%0, %%1, %%2}";
2972 break;
2973 default:
2974 gcc_unreachable ();
2975 }
2976
2977 snprintf (buf, sizeof (buf), ops, tmp);
2978 return buf;
2979 }
2980 [(set_attr "isa" "noavx,avx")
2981 (set_attr "type" "sselog")
2982 (set (attr "prefix_data16")
2983 (if_then_else
2984 (and (eq_attr "alternative" "0")
2985 (eq_attr "mode" "TI"))
2986 (const_string "1")
2987 (const_string "*")))
2988 (set_attr "prefix" "orig,vex")
2989 (set (attr "mode")
2990 (cond [(match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL")
2991 (const_string "V4SF")
2992 (match_test "TARGET_AVX")
2993 (const_string "TI")
2994 (ior (not (match_test "TARGET_SSE2"))
2995 (match_test "optimize_function_for_size_p (cfun)"))
2996 (const_string "V4SF")
2997 ]
2998 (const_string "TI")))])
2999
3000 (define_insn "*<code><mode>3"
3001 [(set (match_operand:MODEF 0 "register_operand" "=x,x")
3002 (any_logic:MODEF
3003 (match_operand:MODEF 1 "register_operand" "%0,x")
3004 (match_operand:MODEF 2 "register_operand" "x,x")))]
3005 "SSE_FLOAT_MODE_P (<MODE>mode)"
3006 {
3007 static char buf[32];
3008 const char *ops;
3009 const char *suffix
3010 = (get_attr_mode (insn) == MODE_V4SF) ? "ps" : "<ssevecmodesuffix>";
3011
3012 switch (which_alternative)
3013 {
3014 case 0:
3015 ops = "<logic>%s\t{%%2, %%0|%%0, %%2}";
3016 break;
3017 case 1:
3018 ops = "v<logic>%s\t{%%2, %%1, %%0|%%0, %%1, %%2}";
3019 break;
3020 default:
3021 gcc_unreachable ();
3022 }
3023
3024 snprintf (buf, sizeof (buf), ops, suffix);
3025 return buf;
3026 }
3027 [(set_attr "isa" "noavx,avx")
3028 (set_attr "type" "sselog")
3029 (set_attr "prefix" "orig,vex")
3030 (set (attr "mode")
3031 (cond [(and (match_test "<MODE_SIZE> == 16")
3032 (match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL"))
3033 (const_string "V4SF")
3034 (match_test "TARGET_AVX")
3035 (const_string "<ssevecmode>")
3036 (match_test "optimize_function_for_size_p (cfun)")
3037 (const_string "V4SF")
3038 ]
3039 (const_string "<ssevecmode>")))])
3040
3041 (define_expand "<code>tf3"
3042 [(set (match_operand:TF 0 "register_operand")
3043 (any_logic:TF
3044 (match_operand:TF 1 "nonimmediate_operand")
3045 (match_operand:TF 2 "nonimmediate_operand")))]
3046 "TARGET_SSE"
3047 "ix86_fixup_binary_operands_no_copy (<CODE>, TFmode, operands);")
3048
3049 (define_insn "*<code>tf3"
3050 [(set (match_operand:TF 0 "register_operand" "=x,x")
3051 (any_logic:TF
3052 (match_operand:TF 1 "nonimmediate_operand" "%0,x")
3053 (match_operand:TF 2 "nonimmediate_operand" "xm,xm")))]
3054 "TARGET_SSE
3055 && ix86_binary_operator_ok (<CODE>, TFmode, operands)"
3056 {
3057 static char buf[32];
3058 const char *ops;
3059 const char *tmp
3060 = (get_attr_mode (insn) == MODE_V4SF) ? "<logic>ps" : "p<logic>";
3061
3062 switch (which_alternative)
3063 {
3064 case 0:
3065 ops = "%s\t{%%2, %%0|%%0, %%2}";
3066 break;
3067 case 1:
3068 ops = "v%s\t{%%2, %%1, %%0|%%0, %%1, %%2}";
3069 break;
3070 default:
3071 gcc_unreachable ();
3072 }
3073
3074 snprintf (buf, sizeof (buf), ops, tmp);
3075 return buf;
3076 }
3077 [(set_attr "isa" "noavx,avx")
3078 (set_attr "type" "sselog")
3079 (set (attr "prefix_data16")
3080 (if_then_else
3081 (and (eq_attr "alternative" "0")
3082 (eq_attr "mode" "TI"))
3083 (const_string "1")
3084 (const_string "*")))
3085 (set_attr "prefix" "orig,vex")
3086 (set (attr "mode")
3087 (cond [(match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL")
3088 (const_string "V4SF")
3089 (match_test "TARGET_AVX")
3090 (const_string "TI")
3091 (ior (not (match_test "TARGET_SSE2"))
3092 (match_test "optimize_function_for_size_p (cfun)"))
3093 (const_string "V4SF")
3094 ]
3095 (const_string "TI")))])
3096
3097 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
3098 ;;
3099 ;; FMA floating point multiply/accumulate instructions. These include
3100 ;; scalar versions of the instructions as well as vector versions.
3101 ;;
3102 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
3103
3104 ;; The standard names for scalar FMA are only available with SSE math enabled.
3105 ;; CPUID bit AVX512F enables evex encoded scalar and 512-bit fma. It doesn't
3106 ;; care about FMA bit, so we enable fma for TARGET_AVX512F even when TARGET_FMA
3107 ;; and TARGET_FMA4 are both false.
3108 ;; TODO: In theory AVX512F does not automatically imply FMA, and without FMA
3109 ;; one must force the EVEX encoding of the fma insns. Ideally we'd improve
3110 ;; GAS to allow proper prefix selection. However, for the moment all hardware
3111 ;; that supports AVX512F also supports FMA so we can ignore this for now.
3112 (define_mode_iterator FMAMODEM
3113 [(SF "TARGET_SSE_MATH && (TARGET_FMA || TARGET_FMA4 || TARGET_AVX512F)")
3114 (DF "TARGET_SSE_MATH && (TARGET_FMA || TARGET_FMA4 || TARGET_AVX512F)")
3115 (V4SF "TARGET_FMA || TARGET_FMA4 || TARGET_AVX512VL")
3116 (V2DF "TARGET_FMA || TARGET_FMA4 || TARGET_AVX512VL")
3117 (V8SF "TARGET_FMA || TARGET_FMA4 || TARGET_AVX512VL")
3118 (V4DF "TARGET_FMA || TARGET_FMA4 || TARGET_AVX512VL")
3119 (V16SF "TARGET_AVX512F")
3120 (V8DF "TARGET_AVX512F")])
3121
3122 (define_expand "fma<mode>4"
3123 [(set (match_operand:FMAMODEM 0 "register_operand")
3124 (fma:FMAMODEM
3125 (match_operand:FMAMODEM 1 "nonimmediate_operand")
3126 (match_operand:FMAMODEM 2 "nonimmediate_operand")
3127 (match_operand:FMAMODEM 3 "nonimmediate_operand")))])
3128
3129 (define_expand "fms<mode>4"
3130 [(set (match_operand:FMAMODEM 0 "register_operand")
3131 (fma:FMAMODEM
3132 (match_operand:FMAMODEM 1 "nonimmediate_operand")
3133 (match_operand:FMAMODEM 2 "nonimmediate_operand")
3134 (neg:FMAMODEM (match_operand:FMAMODEM 3 "nonimmediate_operand"))))])
3135
3136 (define_expand "fnma<mode>4"
3137 [(set (match_operand:FMAMODEM 0 "register_operand")
3138 (fma:FMAMODEM
3139 (neg:FMAMODEM (match_operand:FMAMODEM 1 "nonimmediate_operand"))
3140 (match_operand:FMAMODEM 2 "nonimmediate_operand")
3141 (match_operand:FMAMODEM 3 "nonimmediate_operand")))])
3142
3143 (define_expand "fnms<mode>4"
3144 [(set (match_operand:FMAMODEM 0 "register_operand")
3145 (fma:FMAMODEM
3146 (neg:FMAMODEM (match_operand:FMAMODEM 1 "nonimmediate_operand"))
3147 (match_operand:FMAMODEM 2 "nonimmediate_operand")
3148 (neg:FMAMODEM (match_operand:FMAMODEM 3 "nonimmediate_operand"))))])
3149
3150 ;; The builtins for intrinsics are not constrained by SSE math enabled.
3151 (define_mode_iterator FMAMODE_AVX512
3152 [(SF "TARGET_FMA || TARGET_FMA4 || TARGET_AVX512F")
3153 (DF "TARGET_FMA || TARGET_FMA4 || TARGET_AVX512F")
3154 (V4SF "TARGET_FMA || TARGET_FMA4 || TARGET_AVX512VL")
3155 (V2DF "TARGET_FMA || TARGET_FMA4 || TARGET_AVX512VL")
3156 (V8SF "TARGET_FMA || TARGET_FMA4 || TARGET_AVX512VL")
3157 (V4DF "TARGET_FMA || TARGET_FMA4 || TARGET_AVX512VL")
3158 (V16SF "TARGET_AVX512F")
3159 (V8DF "TARGET_AVX512F")])
3160
3161 (define_mode_iterator FMAMODE
3162 [SF DF V4SF V2DF V8SF V4DF])
3163
3164 (define_expand "fma4i_fmadd_<mode>"
3165 [(set (match_operand:FMAMODE_AVX512 0 "register_operand")
3166 (fma:FMAMODE_AVX512
3167 (match_operand:FMAMODE_AVX512 1 "nonimmediate_operand")
3168 (match_operand:FMAMODE_AVX512 2 "nonimmediate_operand")
3169 (match_operand:FMAMODE_AVX512 3 "nonimmediate_operand")))])
3170
3171 (define_expand "<avx512>_fmadd_<mode>_maskz<round_expand_name>"
3172 [(match_operand:VF_AVX512VL 0 "register_operand")
3173 (match_operand:VF_AVX512VL 1 "<round_expand_nimm_predicate>")
3174 (match_operand:VF_AVX512VL 2 "<round_expand_nimm_predicate>")
3175 (match_operand:VF_AVX512VL 3 "<round_expand_nimm_predicate>")
3176 (match_operand:<avx512fmaskmode> 4 "register_operand")]
3177 "TARGET_AVX512F && <round_mode512bit_condition>"
3178 {
3179 emit_insn (gen_fma_fmadd_<mode>_maskz_1<round_expand_name> (
3180 operands[0], operands[1], operands[2], operands[3],
3181 CONST0_RTX (<MODE>mode), operands[4]<round_expand_operand>));
3182 DONE;
3183 })
3184
3185 (define_insn "*fma_fmadd_<mode>"
3186 [(set (match_operand:FMAMODE 0 "register_operand" "=v,v,v,x,x")
3187 (fma:FMAMODE
3188 (match_operand:FMAMODE 1 "nonimmediate_operand" "%0,0,v,x,x")
3189 (match_operand:FMAMODE 2 "nonimmediate_operand" "vm,v,vm,x,m")
3190 (match_operand:FMAMODE 3 "nonimmediate_operand" "v,vm,0,xm,x")))]
3191 "TARGET_FMA || TARGET_FMA4"
3192 "@
3193 vfmadd132<ssemodesuffix>\t{%2, %3, %0|%0, %3, %2}
3194 vfmadd213<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
3195 vfmadd231<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}
3196 vfmadd<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}
3197 vfmadd<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
3198 [(set_attr "isa" "fma,fma,fma,fma4,fma4")
3199 (set_attr "type" "ssemuladd")
3200 (set_attr "mode" "<MODE>")])
3201
3202 ;; Suppose AVX-512F as baseline
3203 (define_mode_iterator VF_SF_AVX512VL
3204 [SF V16SF (V8SF "TARGET_AVX512VL") (V4SF "TARGET_AVX512VL")
3205 DF V8DF (V4DF "TARGET_AVX512VL") (V2DF "TARGET_AVX512VL")])
3206
3207 (define_insn "<sd_mask_codefor>fma_fmadd_<mode><sd_maskz_name><round_name>"
3208 [(set (match_operand:VF_SF_AVX512VL 0 "register_operand" "=v,v,v")
3209 (fma:VF_SF_AVX512VL
3210 (match_operand:VF_SF_AVX512VL 1 "<round_nimm_predicate>" "%0,0,v")
3211 (match_operand:VF_SF_AVX512VL 2 "<round_nimm_predicate>" "<round_constraint>,v,<round_constraint>")
3212 (match_operand:VF_SF_AVX512VL 3 "<round_nimm_predicate>" "v,<round_constraint>,0")))]
3213 "TARGET_AVX512F && <sd_mask_mode512bit_condition> && <round_mode512bit_condition>"
3214 "@
3215 vfmadd132<ssemodesuffix>\t{<round_sd_mask_op4>%2, %3, %0<sd_mask_op4>|%0<sd_mask_op4>, %3, %2<round_sd_mask_op4>}
3216 vfmadd213<ssemodesuffix>\t{<round_sd_mask_op4>%3, %2, %0<sd_mask_op4>|%0<sd_mask_op4>, %2, %3<round_sd_mask_op4>}
3217 vfmadd231<ssemodesuffix>\t{<round_sd_mask_op4>%2, %1, %0<sd_mask_op4>|%0<sd_mask_op4>, %1, %2<round_sd_mask_op4>}"
3218 [(set_attr "type" "ssemuladd")
3219 (set_attr "mode" "<MODE>")])
3220
3221 (define_insn "<avx512>_fmadd_<mode>_mask<round_name>"
3222 [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v,v")
3223 (vec_merge:VF_AVX512VL
3224 (fma:VF_AVX512VL
3225 (match_operand:VF_AVX512VL 1 "register_operand" "0,0")
3226 (match_operand:VF_AVX512VL 2 "nonimmediate_operand" "<round_constraint>,v")
3227 (match_operand:VF_AVX512VL 3 "nonimmediate_operand" "v,<round_constraint>"))
3228 (match_dup 1)
3229 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk,Yk")))]
3230 "TARGET_AVX512F && <round_mode512bit_condition>"
3231 "@
3232 vfmadd132<ssemodesuffix>\t{<round_op5>%2, %3, %0%{%4%}|%0%{%4%}, %3, %2<round_op5>}
3233 vfmadd213<ssemodesuffix>\t{<round_op5>%3, %2, %0%{%4%}|%0%{%4%}, %2, %3<round_op5>}"
3234 [(set_attr "isa" "fma_avx512f,fma_avx512f")
3235 (set_attr "type" "ssemuladd")
3236 (set_attr "mode" "<MODE>")])
3237
3238 (define_insn "<avx512>_fmadd_<mode>_mask3<round_name>"
3239 [(set (match_operand:VF_AVX512VL 0 "register_operand" "=x")
3240 (vec_merge:VF_AVX512VL
3241 (fma:VF_AVX512VL
3242 (match_operand:VF_AVX512VL 1 "register_operand" "x")
3243 (match_operand:VF_AVX512VL 2 "nonimmediate_operand" "<round_constraint>")
3244 (match_operand:VF_AVX512VL 3 "register_operand" "0"))
3245 (match_dup 3)
3246 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")))]
3247 "TARGET_AVX512F"
3248 "vfmadd231<ssemodesuffix>\t{<round_op5>%2, %1, %0%{%4%}|%0%{%4%}, %1, %2<round_op5>}"
3249 [(set_attr "isa" "fma_avx512f")
3250 (set_attr "type" "ssemuladd")
3251 (set_attr "mode" "<MODE>")])
3252
3253 (define_insn "*fma_fmsub_<mode>"
3254 [(set (match_operand:FMAMODE 0 "register_operand" "=v,v,v,x,x")
3255 (fma:FMAMODE
3256 (match_operand:FMAMODE 1 "nonimmediate_operand" "%0,0,v,x,x")
3257 (match_operand:FMAMODE 2 "nonimmediate_operand" "vm,v,vm,x,m")
3258 (neg:FMAMODE
3259 (match_operand:FMAMODE 3 "nonimmediate_operand" "v,vm,0,xm,x"))))]
3260 "TARGET_FMA || TARGET_FMA4"
3261 "@
3262 vfmsub132<ssemodesuffix>\t{%2, %3, %0|%0, %3, %2}
3263 vfmsub213<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
3264 vfmsub231<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}
3265 vfmsub<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}
3266 vfmsub<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
3267 [(set_attr "isa" "fma,fma,fma,fma4,fma4")
3268 (set_attr "type" "ssemuladd")
3269 (set_attr "mode" "<MODE>")])
3270
3271 (define_insn "<sd_mask_codefor>fma_fmsub_<mode><sd_maskz_name><round_name>"
3272 [(set (match_operand:VF_SF_AVX512VL 0 "register_operand" "=v,v,v")
3273 (fma:VF_SF_AVX512VL
3274 (match_operand:VF_SF_AVX512VL 1 "<round_nimm_predicate>" "%0,0,v")
3275 (match_operand:VF_SF_AVX512VL 2 "<round_nimm_predicate>" "<round_constraint>,v,<round_constraint>")
3276 (neg:VF_SF_AVX512VL
3277 (match_operand:VF_SF_AVX512VL 3 "<round_nimm_predicate>" "v,<round_constraint>,0"))))]
3278 "TARGET_AVX512F && <sd_mask_mode512bit_condition> && <round_mode512bit_condition>"
3279 "@
3280 vfmsub132<ssemodesuffix>\t{<round_sd_mask_op4>%2, %3, %0<sd_mask_op4>|%0<sd_mask_op4>, %3, %2<round_sd_mask_op4>}
3281 vfmsub213<ssemodesuffix>\t{<round_sd_mask_op4>%3, %2, %0<sd_mask_op4>|%0<sd_mask_op4>, %2, %3<round_sd_mask_op4>}
3282 vfmsub231<ssemodesuffix>\t{<round_sd_mask_op4>%2, %1, %0<sd_mask_op4>|%0<sd_mask_op4>, %1, %2<round_sd_mask_op4>}"
3283 [(set_attr "type" "ssemuladd")
3284 (set_attr "mode" "<MODE>")])
3285
3286 (define_insn "<avx512>_fmsub_<mode>_mask<round_name>"
3287 [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v,v")
3288 (vec_merge:VF_AVX512VL
3289 (fma:VF_AVX512VL
3290 (match_operand:VF_AVX512VL 1 "register_operand" "0,0")
3291 (match_operand:VF_AVX512VL 2 "nonimmediate_operand" "<round_constraint>,v")
3292 (neg:VF_AVX512VL
3293 (match_operand:VF_AVX512VL 3 "nonimmediate_operand" "v,<round_constraint>")))
3294 (match_dup 1)
3295 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk,Yk")))]
3296 "TARGET_AVX512F"
3297 "@
3298 vfmsub132<ssemodesuffix>\t{<round_op5>%2, %3, %0%{%4%}|%0%{%4%}, %3, %2<round_op5>}
3299 vfmsub213<ssemodesuffix>\t{<round_op5>%3, %2, %0%{%4%}|%0%{%4%}, %2, %3<round_op5>}"
3300 [(set_attr "isa" "fma_avx512f,fma_avx512f")
3301 (set_attr "type" "ssemuladd")
3302 (set_attr "mode" "<MODE>")])
3303
3304 (define_insn "<avx512>_fmsub_<mode>_mask3<round_name>"
3305 [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v")
3306 (vec_merge:VF_AVX512VL
3307 (fma:VF_AVX512VL
3308 (match_operand:VF_AVX512VL 1 "register_operand" "v")
3309 (match_operand:VF_AVX512VL 2 "nonimmediate_operand" "<round_constraint>")
3310 (neg:VF_AVX512VL
3311 (match_operand:VF_AVX512VL 3 "register_operand" "0")))
3312 (match_dup 3)
3313 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")))]
3314 "TARGET_AVX512F && <round_mode512bit_condition>"
3315 "vfmsub231<ssemodesuffix>\t{<round_op5>%2, %1, %0%{%4%}|%0%{%4%}, %1, %2<round_op5>}"
3316 [(set_attr "isa" "fma_avx512f")
3317 (set_attr "type" "ssemuladd")
3318 (set_attr "mode" "<MODE>")])
3319
3320 (define_insn "*fma_fnmadd_<mode>"
3321 [(set (match_operand:FMAMODE 0 "register_operand" "=v,v,v,x,x")
3322 (fma:FMAMODE
3323 (neg:FMAMODE
3324 (match_operand:FMAMODE 1 "nonimmediate_operand" "%0,0,v,x,x"))
3325 (match_operand:FMAMODE 2 "nonimmediate_operand" "vm,v,vm,x,m")
3326 (match_operand:FMAMODE 3 "nonimmediate_operand" "v,vm,0,xm,x")))]
3327 "TARGET_FMA || TARGET_FMA4"
3328 "@
3329 vfnmadd132<ssemodesuffix>\t{%2, %3, %0|%0, %3, %2}
3330 vfnmadd213<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
3331 vfnmadd231<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}
3332 vfnmadd<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}
3333 vfnmadd<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
3334 [(set_attr "isa" "fma,fma,fma,fma4,fma4")
3335 (set_attr "type" "ssemuladd")
3336 (set_attr "mode" "<MODE>")])
3337
3338 (define_insn "<sd_mask_codefor>fma_fnmadd_<mode><sd_maskz_name><round_name>"
3339 [(set (match_operand:VF_SF_AVX512VL 0 "register_operand" "=v,v,v")
3340 (fma:VF_SF_AVX512VL
3341 (neg:VF_SF_AVX512VL
3342 (match_operand:VF_SF_AVX512VL 1 "<round_nimm_predicate>" "%0,0,v"))
3343 (match_operand:VF_SF_AVX512VL 2 "<round_nimm_predicate>" "<round_constraint>,v,<round_constraint>")
3344 (match_operand:VF_SF_AVX512VL 3 "<round_nimm_predicate>" "v,<round_constraint>,0")))]
3345 "TARGET_AVX512F && <sd_mask_mode512bit_condition> && <round_mode512bit_condition>"
3346 "@
3347 vfnmadd132<ssemodesuffix>\t{<round_sd_mask_op4>%2, %3, %0<sd_mask_op4>|%0<sd_mask_op4>, %3, %2<round_sd_mask_op4>}
3348 vfnmadd213<ssemodesuffix>\t{<round_sd_mask_op4>%3, %2, %0<sd_mask_op4>|%0<sd_mask_op4>, %2, %3<round_sd_mask_op4>}
3349 vfnmadd231<ssemodesuffix>\t{<round_sd_mask_op4>%2, %1, %0<sd_mask_op4>|%0<sd_mask_op4>, %1, %2<round_sd_mask_op4>}"
3350 [(set_attr "type" "ssemuladd")
3351 (set_attr "mode" "<MODE>")])
3352
3353 (define_insn "<avx512>_fnmadd_<mode>_mask<round_name>"
3354 [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v,v")
3355 (vec_merge:VF_AVX512VL
3356 (fma:VF_AVX512VL
3357 (neg:VF_AVX512VL
3358 (match_operand:VF_AVX512VL 1 "register_operand" "0,0"))
3359 (match_operand:VF_AVX512VL 2 "nonimmediate_operand" "<round_constraint>,v")
3360 (match_operand:VF_AVX512VL 3 "nonimmediate_operand" "v,<round_constraint>"))
3361 (match_dup 1)
3362 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk,Yk")))]
3363 "TARGET_AVX512F && <round_mode512bit_condition>"
3364 "@
3365 vfnmadd132<ssemodesuffix>\t{<round_op5>%2, %3, %0%{%4%}|%0%{%4%}, %3, %2<round_op5>}
3366 vfnmadd213<ssemodesuffix>\t{<round_op5>%3, %2, %0%{%4%}|%0%{%4%}, %2, %3<round_op5>}"
3367 [(set_attr "isa" "fma_avx512f,fma_avx512f")
3368 (set_attr "type" "ssemuladd")
3369 (set_attr "mode" "<MODE>")])
3370
3371 (define_insn "<avx512>_fnmadd_<mode>_mask3<round_name>"
3372 [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v")
3373 (vec_merge:VF_AVX512VL
3374 (fma:VF_AVX512VL
3375 (neg:VF_AVX512VL
3376 (match_operand:VF_AVX512VL 1 "register_operand" "v"))
3377 (match_operand:VF_AVX512VL 2 "nonimmediate_operand" "<round_constraint>")
3378 (match_operand:VF_AVX512VL 3 "register_operand" "0"))
3379 (match_dup 3)
3380 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")))]
3381 "TARGET_AVX512F && <round_mode512bit_condition>"
3382 "vfnmadd231<ssemodesuffix>\t{<round_op5>%2, %1, %0%{%4%}|%0%{%4%}, %1, %2<round_op5>}"
3383 [(set_attr "isa" "fma_avx512f")
3384 (set_attr "type" "ssemuladd")
3385 (set_attr "mode" "<MODE>")])
3386
3387 (define_insn "*fma_fnmsub_<mode>"
3388 [(set (match_operand:FMAMODE 0 "register_operand" "=v,v,v,x,x")
3389 (fma:FMAMODE
3390 (neg:FMAMODE
3391 (match_operand:FMAMODE 1 "nonimmediate_operand" "%0,0,v,x,x"))
3392 (match_operand:FMAMODE 2 "nonimmediate_operand" "vm,v,vm,x,m")
3393 (neg:FMAMODE
3394 (match_operand:FMAMODE 3 "nonimmediate_operand" "v,vm,0,xm,x"))))]
3395 "TARGET_FMA || TARGET_FMA4"
3396 "@
3397 vfnmsub132<ssemodesuffix>\t{<round_sd_mask_op4>%2, %3, %0<sd_mask_op4>|%0<sd_mask_op4>, %3, %2<round_sd_mask_op4>}
3398 vfnmsub213<ssemodesuffix>\t{<round_sd_mask_op4>%3, %2, %0<sd_mask_op4>|%0<sd_mask_op4>, %2, %3<round_sd_mask_op4>}
3399 vfnmsub231<ssemodesuffix>\t{<round_sd_mask_op4>%2, %1, %0<sd_mask_op4>|%0<sd_mask_op4>, %1, %2<round_sd_mask_op4>}
3400 vfnmsub<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}
3401 vfnmsub<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
3402 [(set_attr "isa" "fma,fma,fma,fma4,fma4")
3403 (set_attr "type" "ssemuladd")
3404 (set_attr "mode" "<MODE>")])
3405
3406 (define_insn "<sd_mask_codefor>fma_fnmsub_<mode><sd_maskz_name><round_name>"
3407 [(set (match_operand:VF_SF_AVX512VL 0 "register_operand" "=v,v,v")
3408 (fma:VF_SF_AVX512VL
3409 (neg:VF_SF_AVX512VL
3410 (match_operand:VF_SF_AVX512VL 1 "<round_nimm_predicate>" "%0,0,v"))
3411 (match_operand:VF_SF_AVX512VL 2 "<round_nimm_predicate>" "<round_constraint>,v,<round_constraint>")
3412 (neg:VF_SF_AVX512VL
3413 (match_operand:VF_SF_AVX512VL 3 "<round_nimm_predicate>" "v,<round_constraint>,0"))))]
3414 "TARGET_AVX512F && <sd_mask_mode512bit_condition> && <round_mode512bit_condition>"
3415 "@
3416 vfnmsub132<ssemodesuffix>\t{<round_sd_mask_op4>%2, %3, %0<sd_mask_op4>|%0<sd_mask_op4>, %3, %2<round_sd_mask_op4>}
3417 vfnmsub213<ssemodesuffix>\t{<round_sd_mask_op4>%3, %2, %0<sd_mask_op4>|%0<sd_mask_op4>, %2, %3<round_sd_mask_op4>}
3418 vfnmsub231<ssemodesuffix>\t{<round_sd_mask_op4>%2, %1, %0<sd_mask_op4>|%0<sd_mask_op4>, %1, %2<round_sd_mask_op4>}"
3419 [(set_attr "type" "ssemuladd")
3420 (set_attr "mode" "<MODE>")])
3421
3422 (define_insn "<avx512>_fnmsub_<mode>_mask<round_name>"
3423 [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v,v")
3424 (vec_merge:VF_AVX512VL
3425 (fma:VF_AVX512VL
3426 (neg:VF_AVX512VL
3427 (match_operand:VF_AVX512VL 1 "register_operand" "0,0"))
3428 (match_operand:VF_AVX512VL 2 "nonimmediate_operand" "<round_constraint>,v")
3429 (neg:VF_AVX512VL
3430 (match_operand:VF_AVX512VL 3 "nonimmediate_operand" "v,<round_constraint>")))
3431 (match_dup 1)
3432 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk,Yk")))]
3433 "TARGET_AVX512F && <round_mode512bit_condition>"
3434 "@
3435 vfnmsub132<ssemodesuffix>\t{<round_op5>%2, %3, %0%{%4%}|%0%{%4%}, %3, %2<round_op5>}
3436 vfnmsub213<ssemodesuffix>\t{<round_op5>%3, %2, %0%{%4%}|%0%{%4%}, %2, %3<round_op5>}"
3437 [(set_attr "isa" "fma_avx512f,fma_avx512f")
3438 (set_attr "type" "ssemuladd")
3439 (set_attr "mode" "<MODE>")])
3440
3441 (define_insn "<avx512>_fnmsub_<mode>_mask3<round_name>"
3442 [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v")
3443 (vec_merge:VF_AVX512VL
3444 (fma:VF_AVX512VL
3445 (neg:VF_AVX512VL
3446 (match_operand:VF_AVX512VL 1 "register_operand" "v"))
3447 (match_operand:VF_AVX512VL 2 "nonimmediate_operand" "<round_constraint>")
3448 (neg:VF_AVX512VL
3449 (match_operand:VF_AVX512VL 3 "register_operand" "0")))
3450 (match_dup 3)
3451 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")))]
3452 "TARGET_AVX512F"
3453 "vfnmsub231<ssemodesuffix>\t{<round_op5>%2, %1, %0%{%4%}|%0%{%4%}, %1, %2<round_op5>}"
3454 [(set_attr "isa" "fma_avx512f")
3455 (set_attr "type" "ssemuladd")
3456 (set_attr "mode" "<MODE>")])
3457
3458 ;; FMA parallel floating point multiply addsub and subadd operations.
3459
3460 ;; It would be possible to represent these without the UNSPEC as
3461 ;;
3462 ;; (vec_merge
3463 ;; (fma op1 op2 op3)
3464 ;; (fma op1 op2 (neg op3))
3465 ;; (merge-const))
3466 ;;
3467 ;; But this doesn't seem useful in practice.
3468
3469 (define_expand "fmaddsub_<mode>"
3470 [(set (match_operand:VF 0 "register_operand")
3471 (unspec:VF
3472 [(match_operand:VF 1 "nonimmediate_operand")
3473 (match_operand:VF 2 "nonimmediate_operand")
3474 (match_operand:VF 3 "nonimmediate_operand")]
3475 UNSPEC_FMADDSUB))]
3476 "TARGET_FMA || TARGET_FMA4 || TARGET_AVX512F")
3477
3478 (define_expand "<avx512>_fmaddsub_<mode>_maskz<round_expand_name>"
3479 [(match_operand:VF_AVX512VL 0 "register_operand")
3480 (match_operand:VF_AVX512VL 1 "<round_expand_nimm_predicate>")
3481 (match_operand:VF_AVX512VL 2 "<round_expand_nimm_predicate>")
3482 (match_operand:VF_AVX512VL 3 "<round_expand_nimm_predicate>")
3483 (match_operand:<avx512fmaskmode> 4 "register_operand")]
3484 "TARGET_AVX512F"
3485 {
3486 emit_insn (gen_fma_fmaddsub_<mode>_maskz_1<round_expand_name> (
3487 operands[0], operands[1], operands[2], operands[3],
3488 CONST0_RTX (<MODE>mode), operands[4]<round_expand_operand>));
3489 DONE;
3490 })
3491
3492 (define_insn "*fma_fmaddsub_<mode>"
3493 [(set (match_operand:VF_128_256 0 "register_operand" "=v,v,v,x,x")
3494 (unspec:VF_128_256
3495 [(match_operand:VF_128_256 1 "nonimmediate_operand" "%0,0,v,x,x")
3496 (match_operand:VF_128_256 2 "nonimmediate_operand" "vm,v,vm,x,m")
3497 (match_operand:VF_128_256 3 "nonimmediate_operand" "v,vm,0,xm,x")]
3498 UNSPEC_FMADDSUB))]
3499 "TARGET_FMA || TARGET_FMA4"
3500 "@
3501 vfmaddsub132<ssemodesuffix>\t{%2, %3, %0|%0, %3, %2}
3502 vfmaddsub213<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
3503 vfmaddsub231<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}
3504 vfmaddsub<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}
3505 vfmaddsub<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
3506 [(set_attr "isa" "fma,fma,fma,fma4,fma4")
3507 (set_attr "type" "ssemuladd")
3508 (set_attr "mode" "<MODE>")])
3509
3510 (define_insn "<sd_mask_codefor>fma_fmaddsub_<mode><sd_maskz_name><round_name>"
3511 [(set (match_operand:VF_SF_AVX512VL 0 "register_operand" "=v,v,v")
3512 (unspec:VF_SF_AVX512VL
3513 [(match_operand:VF_SF_AVX512VL 1 "<round_nimm_predicate>" "%0,0,v")
3514 (match_operand:VF_SF_AVX512VL 2 "<round_nimm_predicate>" "<round_constraint>,v,<round_constraint>")
3515 (match_operand:VF_SF_AVX512VL 3 "<round_nimm_predicate>" "v,<round_constraint>,0")]
3516 UNSPEC_FMADDSUB))]
3517 "TARGET_AVX512F && <sd_mask_mode512bit_condition> && <round_mode512bit_condition>"
3518 "@
3519 vfmaddsub132<ssemodesuffix>\t{<round_sd_mask_op4>%2, %3, %0<sd_mask_op4>|%0<sd_mask_op4>, %3, %2<round_sd_mask_op4>}
3520 vfmaddsub213<ssemodesuffix>\t{<round_sd_mask_op4>%3, %2, %0<sd_mask_op4>|%0<sd_mask_op4>, %2, %3<round_sd_mask_op4>}
3521 vfmaddsub231<ssemodesuffix>\t{<round_sd_mask_op4>%2, %1, %0<sd_mask_op4>|%0<sd_mask_op4>, %1, %2<round_sd_mask_op4>}"
3522 [(set_attr "type" "ssemuladd")
3523 (set_attr "mode" "<MODE>")])
3524
3525 (define_insn "<avx512>_fmaddsub_<mode>_mask<round_name>"
3526 [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v,v")
3527 (vec_merge:VF_AVX512VL
3528 (unspec:VF_AVX512VL
3529 [(match_operand:VF_AVX512VL 1 "register_operand" "0,0")
3530 (match_operand:VF_AVX512VL 2 "nonimmediate_operand" "<round_constraint>,v")
3531 (match_operand:VF_AVX512VL 3 "nonimmediate_operand" "v,<round_constraint>")]
3532 UNSPEC_FMADDSUB)
3533 (match_dup 1)
3534 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk,Yk")))]
3535 "TARGET_AVX512F"
3536 "@
3537 vfmaddsub132<ssemodesuffix>\t{<round_op5>%2, %3, %0%{%4%}|%0%{%4%}, %3, %2<round_op5>}
3538 vfmaddsub213<ssemodesuffix>\t{<round_op5>%3, %2, %0%{%4%}|%0%{%4%}, %2, %3<round_op5>}"
3539 [(set_attr "isa" "fma_avx512f,fma_avx512f")
3540 (set_attr "type" "ssemuladd")
3541 (set_attr "mode" "<MODE>")])
3542
3543 (define_insn "<avx512>_fmaddsub_<mode>_mask3<round_name>"
3544 [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v")
3545 (vec_merge:VF_AVX512VL
3546 (unspec:VF_AVX512VL
3547 [(match_operand:VF_AVX512VL 1 "register_operand" "v")
3548 (match_operand:VF_AVX512VL 2 "nonimmediate_operand" "<round_constraint>")
3549 (match_operand:VF_AVX512VL 3 "register_operand" "0")]
3550 UNSPEC_FMADDSUB)
3551 (match_dup 3)
3552 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")))]
3553 "TARGET_AVX512F"
3554 "vfmaddsub231<ssemodesuffix>\t{<round_op5>%2, %1, %0%{%4%}|%0%{%4%}, %1, %2<round_op5>}"
3555 [(set_attr "isa" "fma_avx512f")
3556 (set_attr "type" "ssemuladd")
3557 (set_attr "mode" "<MODE>")])
3558
3559 (define_insn "*fma_fmsubadd_<mode>"
3560 [(set (match_operand:VF_128_256 0 "register_operand" "=v,v,v,x,x")
3561 (unspec:VF_128_256
3562 [(match_operand:VF_128_256 1 "nonimmediate_operand" "%0,0,v,x,x")
3563 (match_operand:VF_128_256 2 "nonimmediate_operand" "vm,v,vm,x,m")
3564 (neg:VF_128_256
3565 (match_operand:VF_128_256 3 "nonimmediate_operand" "v,vm,0,xm,x"))]
3566 UNSPEC_FMADDSUB))]
3567 "TARGET_FMA || TARGET_FMA4"
3568 "@
3569 vfmsubadd132<ssemodesuffix>\t{%2, %3, %0|%0, %3, %2}
3570 vfmsubadd213<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
3571 vfmsubadd231<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}
3572 vfmsubadd<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}
3573 vfmsubadd<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
3574 [(set_attr "isa" "fma,fma,fma,fma4,fma4")
3575 (set_attr "type" "ssemuladd")
3576 (set_attr "mode" "<MODE>")])
3577
3578 (define_insn "<sd_mask_codefor>fma_fmsubadd_<mode><sd_maskz_name><round_name>"
3579 [(set (match_operand:VF_SF_AVX512VL 0 "register_operand" "=v,v,v")
3580 (unspec:VF_SF_AVX512VL
3581 [(match_operand:VF_SF_AVX512VL 1 "<round_nimm_predicate>" "%0,0,v")
3582 (match_operand:VF_SF_AVX512VL 2 "<round_nimm_predicate>" "<round_constraint>,v,<round_constraint>")
3583 (neg:VF_SF_AVX512VL
3584 (match_operand:VF_SF_AVX512VL 3 "<round_nimm_predicate>" "v,<round_constraint>,0"))]
3585 UNSPEC_FMADDSUB))]
3586 "TARGET_AVX512F && <sd_mask_mode512bit_condition> && <round_mode512bit_condition>"
3587 "@
3588 vfmsubadd132<ssemodesuffix>\t{<round_sd_mask_op4>%2, %3, %0<sd_mask_op4>|%0<sd_mask_op4>, %3, %2<round_sd_mask_op4>}
3589 vfmsubadd213<ssemodesuffix>\t{<round_sd_mask_op4>%3, %2, %0<sd_mask_op4>|%0<sd_mask_op4>, %2, %3<round_sd_mask_op4>}
3590 vfmsubadd231<ssemodesuffix>\t{<round_sd_mask_op4>%2, %1, %0<sd_mask_op4>|%0<sd_mask_op4>, %1, %2<round_sd_mask_op4>}"
3591 [(set_attr "type" "ssemuladd")
3592 (set_attr "mode" "<MODE>")])
3593
3594 (define_insn "<avx512>_fmsubadd_<mode>_mask<round_name>"
3595 [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v,v")
3596 (vec_merge:VF_AVX512VL
3597 (unspec:VF_AVX512VL
3598 [(match_operand:VF_AVX512VL 1 "register_operand" "0,0")
3599 (match_operand:VF_AVX512VL 2 "nonimmediate_operand" "<round_constraint>,v")
3600 (neg:VF_AVX512VL
3601 (match_operand:VF_AVX512VL 3 "nonimmediate_operand" "v,<round_constraint>"))]
3602 UNSPEC_FMADDSUB)
3603 (match_dup 1)
3604 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk,Yk")))]
3605 "TARGET_AVX512F"
3606 "@
3607 vfmsubadd132<ssemodesuffix>\t{<round_op5>%2, %3, %0%{%4%}|%0%{%4%}, %3, %2<round_op5>}
3608 vfmsubadd213<ssemodesuffix>\t{<round_op5>%3, %2, %0%{%4%}|%0%{%4%}, %2, %3<round_op5>}"
3609 [(set_attr "isa" "fma_avx512f,fma_avx512f")
3610 (set_attr "type" "ssemuladd")
3611 (set_attr "mode" "<MODE>")])
3612
3613 (define_insn "<avx512>_fmsubadd_<mode>_mask3<round_name>"
3614 [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v")
3615 (vec_merge:VF_AVX512VL
3616 (unspec:VF_AVX512VL
3617 [(match_operand:VF_AVX512VL 1 "register_operand" "v")
3618 (match_operand:VF_AVX512VL 2 "nonimmediate_operand" "<round_constraint>")
3619 (neg:VF_AVX512VL
3620 (match_operand:VF_AVX512VL 3 "register_operand" "0"))]
3621 UNSPEC_FMADDSUB)
3622 (match_dup 3)
3623 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")))]
3624 "TARGET_AVX512F"
3625 "vfmsubadd231<ssemodesuffix>\t{<round_op5>%2, %1, %0%{%4%}|%0%{%4%}, %1, %2<round_op5>}"
3626 [(set_attr "isa" "fma_avx512f")
3627 (set_attr "type" "ssemuladd")
3628 (set_attr "mode" "<MODE>")])
3629
3630 ;; FMA3 floating point scalar intrinsics. These merge result with
3631 ;; high-order elements from the destination register.
3632
3633 (define_expand "fmai_vmfmadd_<mode><round_name>"
3634 [(set (match_operand:VF_128 0 "register_operand")
3635 (vec_merge:VF_128
3636 (fma:VF_128
3637 (match_operand:VF_128 1 "<round_nimm_predicate>")
3638 (match_operand:VF_128 2 "<round_nimm_predicate>")
3639 (match_operand:VF_128 3 "<round_nimm_predicate>"))
3640 (match_dup 1)
3641 (const_int 1)))]
3642 "TARGET_FMA")
3643
3644 (define_insn "*fmai_fmadd_<mode>"
3645 [(set (match_operand:VF_128 0 "register_operand" "=v,v")
3646 (vec_merge:VF_128
3647 (fma:VF_128
3648 (match_operand:VF_128 1 "<round_nimm_predicate>" " 0, 0")
3649 (match_operand:VF_128 2 "<round_nimm_predicate>" "<round_constraint>, v")
3650 (match_operand:VF_128 3 "<round_nimm_predicate>" " v,<round_constraint>"))
3651 (match_dup 1)
3652 (const_int 1)))]
3653 "TARGET_FMA || TARGET_AVX512F"
3654 "@
3655 vfmadd132<ssescalarmodesuffix>\t{<round_op4>%2, %3, %0|%0, %<iptr>3, %<iptr>2<round_op4>}
3656 vfmadd213<ssescalarmodesuffix>\t{<round_op4>%3, %2, %0|%0, %<iptr>2, %<iptr>3<round_op4>}"
3657 [(set_attr "type" "ssemuladd")
3658 (set_attr "mode" "<MODE>")])
3659
3660 (define_insn "*fmai_fmsub_<mode>"
3661 [(set (match_operand:VF_128 0 "register_operand" "=v,v")
3662 (vec_merge:VF_128
3663 (fma:VF_128
3664 (match_operand:VF_128 1 "<round_nimm_predicate>" "0,0")
3665 (match_operand:VF_128 2 "<round_nimm_predicate>" "<round_constraint>,v")
3666 (neg:VF_128
3667 (match_operand:VF_128 3 "<round_nimm_predicate>" " v,<round_constraint>")))
3668 (match_dup 1)
3669 (const_int 1)))]
3670 "TARGET_FMA || TARGET_AVX512F"
3671 "@
3672 vfmsub132<ssescalarmodesuffix>\t{<round_op4>%2, %3, %0|%0, %<iptr>3, %<iptr>2<round_op4>}
3673 vfmsub213<ssescalarmodesuffix>\t{<round_op4>%3, %2, %0|%0, %<iptr>2, %<iptr>3<round_op4>}"
3674 [(set_attr "type" "ssemuladd")
3675 (set_attr "mode" "<MODE>")])
3676
3677 (define_insn "*fmai_fnmadd_<mode><round_name>"
3678 [(set (match_operand:VF_128 0 "register_operand" "=v,v")
3679 (vec_merge:VF_128
3680 (fma:VF_128
3681 (neg:VF_128
3682 (match_operand:VF_128 2 "<round_nimm_predicate>" "<round_constraint>,v"))
3683 (match_operand:VF_128 1 "<round_nimm_predicate>" "0,0")
3684 (match_operand:VF_128 3 "<round_nimm_predicate>" "v,<round_constraint>"))
3685 (match_dup 1)
3686 (const_int 1)))]
3687 "TARGET_FMA || TARGET_AVX512F"
3688 "@
3689 vfnmadd132<ssescalarmodesuffix>\t{<round_op4>%2, %3, %0|%0, %<iptr>3, %<iptr>2<round_op4>}
3690 vfnmadd213<ssescalarmodesuffix>\t{<round_op4>%3, %2, %0|%0, %<iptr>2, %<iptr>3<round_op4>}"
3691 [(set_attr "type" "ssemuladd")
3692 (set_attr "mode" "<MODE>")])
3693
3694 (define_insn "*fmai_fnmsub_<mode><round_name>"
3695 [(set (match_operand:VF_128 0 "register_operand" "=v,v")
3696 (vec_merge:VF_128
3697 (fma:VF_128
3698 (neg:VF_128
3699 (match_operand:VF_128 2 "<round_nimm_predicate>" "<round_constraint>, v"))
3700 (match_operand:VF_128 1 "<round_nimm_predicate>" " 0, 0")
3701 (neg:VF_128
3702 (match_operand:VF_128 3 "<round_nimm_predicate>" " v,<round_constraint>")))
3703 (match_dup 1)
3704 (const_int 1)))]
3705 "TARGET_FMA || TARGET_AVX512F"
3706 "@
3707 vfnmsub132<ssescalarmodesuffix>\t{<round_op4>%2, %3, %0|%0, %<iptr>3, %<iptr>2<round_op4>}
3708 vfnmsub213<ssescalarmodesuffix>\t{<round_op4>%3, %2, %0|%0, %<iptr>2, %<iptr>3<round_op4>}"
3709 [(set_attr "type" "ssemuladd")
3710 (set_attr "mode" "<MODE>")])
3711
3712 ;; FMA4 floating point scalar intrinsics. These write the
3713 ;; entire destination register, with the high-order elements zeroed.
3714
3715 (define_expand "fma4i_vmfmadd_<mode>"
3716 [(set (match_operand:VF_128 0 "register_operand")
3717 (vec_merge:VF_128
3718 (fma:VF_128
3719 (match_operand:VF_128 1 "nonimmediate_operand")
3720 (match_operand:VF_128 2 "nonimmediate_operand")
3721 (match_operand:VF_128 3 "nonimmediate_operand"))
3722 (match_dup 4)
3723 (const_int 1)))]
3724 "TARGET_FMA4"
3725 "operands[4] = CONST0_RTX (<MODE>mode);")
3726
3727 (define_insn "*fma4i_vmfmadd_<mode>"
3728 [(set (match_operand:VF_128 0 "register_operand" "=x,x")
3729 (vec_merge:VF_128
3730 (fma:VF_128
3731 (match_operand:VF_128 1 "nonimmediate_operand" "%x,x")
3732 (match_operand:VF_128 2 "nonimmediate_operand" " x,m")
3733 (match_operand:VF_128 3 "nonimmediate_operand" "xm,x"))
3734 (match_operand:VF_128 4 "const0_operand")
3735 (const_int 1)))]
3736 "TARGET_FMA4"
3737 "vfmadd<ssescalarmodesuffix>\t{%3, %2, %1, %0|%0, %1, %<iptr>2, %<iptr>3}"
3738 [(set_attr "type" "ssemuladd")
3739 (set_attr "mode" "<MODE>")])
3740
3741 (define_insn "*fma4i_vmfmsub_<mode>"
3742 [(set (match_operand:VF_128 0 "register_operand" "=x,x")
3743 (vec_merge:VF_128
3744 (fma:VF_128
3745 (match_operand:VF_128 1 "nonimmediate_operand" "%x,x")
3746 (match_operand:VF_128 2 "nonimmediate_operand" " x,m")
3747 (neg:VF_128
3748 (match_operand:VF_128 3 "nonimmediate_operand" "xm,x")))
3749 (match_operand:VF_128 4 "const0_operand")
3750 (const_int 1)))]
3751 "TARGET_FMA4"
3752 "vfmsub<ssescalarmodesuffix>\t{%3, %2, %1, %0|%0, %1, %<iptr>2, %<iptr>3}"
3753 [(set_attr "type" "ssemuladd")
3754 (set_attr "mode" "<MODE>")])
3755
3756 (define_insn "*fma4i_vmfnmadd_<mode>"
3757 [(set (match_operand:VF_128 0 "register_operand" "=x,x")
3758 (vec_merge:VF_128
3759 (fma:VF_128
3760 (neg:VF_128
3761 (match_operand:VF_128 1 "nonimmediate_operand" "%x,x"))
3762 (match_operand:VF_128 2 "nonimmediate_operand" " x,m")
3763 (match_operand:VF_128 3 "nonimmediate_operand" "xm,x"))
3764 (match_operand:VF_128 4 "const0_operand")
3765 (const_int 1)))]
3766 "TARGET_FMA4"
3767 "vfnmadd<ssescalarmodesuffix>\t{%3, %2, %1, %0|%0, %1, %<iptr>2, %<iptr>3}"
3768 [(set_attr "type" "ssemuladd")
3769 (set_attr "mode" "<MODE>")])
3770
3771 (define_insn "*fma4i_vmfnmsub_<mode>"
3772 [(set (match_operand:VF_128 0 "register_operand" "=x,x")
3773 (vec_merge:VF_128
3774 (fma:VF_128
3775 (neg:VF_128
3776 (match_operand:VF_128 1 "nonimmediate_operand" "%x,x"))
3777 (match_operand:VF_128 2 "nonimmediate_operand" " x,m")
3778 (neg:VF_128
3779 (match_operand:VF_128 3 "nonimmediate_operand" "xm,x")))
3780 (match_operand:VF_128 4 "const0_operand")
3781 (const_int 1)))]
3782 "TARGET_FMA4"
3783 "vfnmsub<ssescalarmodesuffix>\t{%3, %2, %1, %0|%0, %1, %<iptr>2, %<iptr>3}"
3784 [(set_attr "type" "ssemuladd")
3785 (set_attr "mode" "<MODE>")])
3786
3787 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
3788 ;;
3789 ;; Parallel single-precision floating point conversion operations
3790 ;;
3791 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
3792
3793 (define_insn "sse_cvtpi2ps"
3794 [(set (match_operand:V4SF 0 "register_operand" "=x")
3795 (vec_merge:V4SF
3796 (vec_duplicate:V4SF
3797 (float:V2SF (match_operand:V2SI 2 "nonimmediate_operand" "ym")))
3798 (match_operand:V4SF 1 "register_operand" "0")
3799 (const_int 3)))]
3800 "TARGET_SSE"
3801 "cvtpi2ps\t{%2, %0|%0, %2}"
3802 [(set_attr "type" "ssecvt")
3803 (set_attr "mode" "V4SF")])
3804
3805 (define_insn "sse_cvtps2pi"
3806 [(set (match_operand:V2SI 0 "register_operand" "=y")
3807 (vec_select:V2SI
3808 (unspec:V4SI [(match_operand:V4SF 1 "nonimmediate_operand" "xm")]
3809 UNSPEC_FIX_NOTRUNC)
3810 (parallel [(const_int 0) (const_int 1)])))]
3811 "TARGET_SSE"
3812 "cvtps2pi\t{%1, %0|%0, %q1}"
3813 [(set_attr "type" "ssecvt")
3814 (set_attr "unit" "mmx")
3815 (set_attr "mode" "DI")])
3816
3817 (define_insn "sse_cvttps2pi"
3818 [(set (match_operand:V2SI 0 "register_operand" "=y")
3819 (vec_select:V2SI
3820 (fix:V4SI (match_operand:V4SF 1 "nonimmediate_operand" "xm"))
3821 (parallel [(const_int 0) (const_int 1)])))]
3822 "TARGET_SSE"
3823 "cvttps2pi\t{%1, %0|%0, %q1}"
3824 [(set_attr "type" "ssecvt")
3825 (set_attr "unit" "mmx")
3826 (set_attr "prefix_rep" "0")
3827 (set_attr "mode" "SF")])
3828
3829 (define_insn "sse_cvtsi2ss<round_name>"
3830 [(set (match_operand:V4SF 0 "register_operand" "=x,x,v")
3831 (vec_merge:V4SF
3832 (vec_duplicate:V4SF
3833 (float:SF (match_operand:SI 2 "<round_nimm_predicate>" "r,m,<round_constraint3>")))
3834 (match_operand:V4SF 1 "register_operand" "0,0,v")
3835 (const_int 1)))]
3836 "TARGET_SSE"
3837 "@
3838 cvtsi2ss\t{%2, %0|%0, %2}
3839 cvtsi2ss\t{%2, %0|%0, %2}
3840 vcvtsi2ss\t{<round_op3>%2, %1, %0|%0, %1, %2<round_op3>}"
3841 [(set_attr "isa" "noavx,noavx,avx")
3842 (set_attr "type" "sseicvt")
3843 (set_attr "athlon_decode" "vector,double,*")
3844 (set_attr "amdfam10_decode" "vector,double,*")
3845 (set_attr "bdver1_decode" "double,direct,*")
3846 (set_attr "btver2_decode" "double,double,double")
3847 (set_attr "prefix" "orig,orig,maybe_evex")
3848 (set_attr "mode" "SF")])
3849
3850 (define_insn "sse_cvtsi2ssq<round_name>"
3851 [(set (match_operand:V4SF 0 "register_operand" "=x,x,v")
3852 (vec_merge:V4SF
3853 (vec_duplicate:V4SF
3854 (float:SF (match_operand:DI 2 "<round_nimm_predicate>" "r,m,<round_constraint3>")))
3855 (match_operand:V4SF 1 "register_operand" "0,0,v")
3856 (const_int 1)))]
3857 "TARGET_SSE && TARGET_64BIT"
3858 "@
3859 cvtsi2ssq\t{%2, %0|%0, %2}
3860 cvtsi2ssq\t{%2, %0|%0, %2}
3861 vcvtsi2ssq\t{<round_op3>%2, %1, %0|%0, %1, %2<round_op3>}"
3862 [(set_attr "isa" "noavx,noavx,avx")
3863 (set_attr "type" "sseicvt")
3864 (set_attr "athlon_decode" "vector,double,*")
3865 (set_attr "amdfam10_decode" "vector,double,*")
3866 (set_attr "bdver1_decode" "double,direct,*")
3867 (set_attr "btver2_decode" "double,double,double")
3868 (set_attr "length_vex" "*,*,4")
3869 (set_attr "prefix_rex" "1,1,*")
3870 (set_attr "prefix" "orig,orig,maybe_evex")
3871 (set_attr "mode" "SF")])
3872
3873 (define_insn "sse_cvtss2si<round_name>"
3874 [(set (match_operand:SI 0 "register_operand" "=r,r")
3875 (unspec:SI
3876 [(vec_select:SF
3877 (match_operand:V4SF 1 "<round_nimm_predicate>" "v,<round_constraint2>")
3878 (parallel [(const_int 0)]))]
3879 UNSPEC_FIX_NOTRUNC))]
3880 "TARGET_SSE"
3881 "%vcvtss2si\t{<round_op2>%1, %0|%0, %k1<round_op2>}"
3882 [(set_attr "type" "sseicvt")
3883 (set_attr "athlon_decode" "double,vector")
3884 (set_attr "bdver1_decode" "double,double")
3885 (set_attr "prefix_rep" "1")
3886 (set_attr "prefix" "maybe_vex")
3887 (set_attr "mode" "SI")])
3888
3889 (define_insn "sse_cvtss2si_2"
3890 [(set (match_operand:SI 0 "register_operand" "=r,r")
3891 (unspec:SI [(match_operand:SF 1 "nonimmediate_operand" "v,m")]
3892 UNSPEC_FIX_NOTRUNC))]
3893 "TARGET_SSE"
3894 "%vcvtss2si\t{%1, %0|%0, %k1}"
3895 [(set_attr "type" "sseicvt")
3896 (set_attr "athlon_decode" "double,vector")
3897 (set_attr "amdfam10_decode" "double,double")
3898 (set_attr "bdver1_decode" "double,double")
3899 (set_attr "prefix_rep" "1")
3900 (set_attr "prefix" "maybe_vex")
3901 (set_attr "mode" "SI")])
3902
3903 (define_insn "sse_cvtss2siq<round_name>"
3904 [(set (match_operand:DI 0 "register_operand" "=r,r")
3905 (unspec:DI
3906 [(vec_select:SF
3907 (match_operand:V4SF 1 "<round_nimm_predicate>" "v,<round_constraint2>")
3908 (parallel [(const_int 0)]))]
3909 UNSPEC_FIX_NOTRUNC))]
3910 "TARGET_SSE && TARGET_64BIT"
3911 "%vcvtss2si{q}\t{<round_op2>%1, %0|%0, %k1<round_op2>}"
3912 [(set_attr "type" "sseicvt")
3913 (set_attr "athlon_decode" "double,vector")
3914 (set_attr "bdver1_decode" "double,double")
3915 (set_attr "prefix_rep" "1")
3916 (set_attr "prefix" "maybe_vex")
3917 (set_attr "mode" "DI")])
3918
3919 (define_insn "sse_cvtss2siq_2"
3920 [(set (match_operand:DI 0 "register_operand" "=r,r")
3921 (unspec:DI [(match_operand:SF 1 "nonimmediate_operand" "v,m")]
3922 UNSPEC_FIX_NOTRUNC))]
3923 "TARGET_SSE && TARGET_64BIT"
3924 "%vcvtss2si{q}\t{%1, %0|%0, %k1}"
3925 [(set_attr "type" "sseicvt")
3926 (set_attr "athlon_decode" "double,vector")
3927 (set_attr "amdfam10_decode" "double,double")
3928 (set_attr "bdver1_decode" "double,double")
3929 (set_attr "prefix_rep" "1")
3930 (set_attr "prefix" "maybe_vex")
3931 (set_attr "mode" "DI")])
3932
3933 (define_insn "sse_cvttss2si<round_saeonly_name>"
3934 [(set (match_operand:SI 0 "register_operand" "=r,r")
3935 (fix:SI
3936 (vec_select:SF
3937 (match_operand:V4SF 1 "<round_saeonly_nimm_predicate>" "v,<round_saeonly_constraint2>")
3938 (parallel [(const_int 0)]))))]
3939 "TARGET_SSE"
3940 "%vcvttss2si\t{<round_saeonly_op2>%1, %0|%0, %k1<round_saeonly_op2>}"
3941 [(set_attr "type" "sseicvt")
3942 (set_attr "athlon_decode" "double,vector")
3943 (set_attr "amdfam10_decode" "double,double")
3944 (set_attr "bdver1_decode" "double,double")
3945 (set_attr "prefix_rep" "1")
3946 (set_attr "prefix" "maybe_vex")
3947 (set_attr "mode" "SI")])
3948
3949 (define_insn "sse_cvttss2siq<round_saeonly_name>"
3950 [(set (match_operand:DI 0 "register_operand" "=r,r")
3951 (fix:DI
3952 (vec_select:SF
3953 (match_operand:V4SF 1 "<round_saeonly_nimm_predicate>" "v,<round_saeonly_constraint>")
3954 (parallel [(const_int 0)]))))]
3955 "TARGET_SSE && TARGET_64BIT"
3956 "%vcvttss2si{q}\t{<round_saeonly_op2>%1, %0|%0, %k1<round_saeonly_op2>}"
3957 [(set_attr "type" "sseicvt")
3958 (set_attr "athlon_decode" "double,vector")
3959 (set_attr "amdfam10_decode" "double,double")
3960 (set_attr "bdver1_decode" "double,double")
3961 (set_attr "prefix_rep" "1")
3962 (set_attr "prefix" "maybe_vex")
3963 (set_attr "mode" "DI")])
3964
3965 (define_insn "cvtusi2<ssescalarmodesuffix>32<round_name>"
3966 [(set (match_operand:VF_128 0 "register_operand" "=v")
3967 (vec_merge:VF_128
3968 (vec_duplicate:VF_128
3969 (unsigned_float:<ssescalarmode>
3970 (match_operand:SI 2 "<round_nimm_predicate>" "<round_constraint3>")))
3971 (match_operand:VF_128 1 "register_operand" "v")
3972 (const_int 1)))]
3973 "TARGET_AVX512F && <round_modev4sf_condition>"
3974 "vcvtusi2<ssescalarmodesuffix>\t{<round_op3>%2, %1, %0|%0, %1, %2<round_op3>}"
3975 [(set_attr "type" "sseicvt")
3976 (set_attr "prefix" "evex")
3977 (set_attr "mode" "<ssescalarmode>")])
3978
3979 (define_insn "cvtusi2<ssescalarmodesuffix>64<round_name>"
3980 [(set (match_operand:VF_128 0 "register_operand" "=v")
3981 (vec_merge:VF_128
3982 (vec_duplicate:VF_128
3983 (unsigned_float:<ssescalarmode>
3984 (match_operand:DI 2 "<round_nimm_predicate>" "<round_constraint3>")))
3985 (match_operand:VF_128 1 "register_operand" "v")
3986 (const_int 1)))]
3987 "TARGET_AVX512F && TARGET_64BIT"
3988 "vcvtusi2<ssescalarmodesuffix>\t{<round_op3>%2, %1, %0|%0, %1, %2<round_op3>}"
3989 [(set_attr "type" "sseicvt")
3990 (set_attr "prefix" "evex")
3991 (set_attr "mode" "<ssescalarmode>")])
3992
3993 (define_insn "float<sseintvecmodelower><mode>2<mask_name><round_name>"
3994 [(set (match_operand:VF1 0 "register_operand" "=v")
3995 (float:VF1
3996 (match_operand:<sseintvecmode> 1 "<round_nimm_predicate>" "<round_constraint>")))]
3997 "TARGET_SSE2 && <mask_mode512bit_condition> && <round_mode512bit_condition>"
3998 "%vcvtdq2ps\t{<round_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_mask_op2>}"
3999 [(set_attr "type" "ssecvt")
4000 (set_attr "prefix" "maybe_vex")
4001 (set_attr "mode" "<sseinsnmode>")])
4002
4003 (define_insn "ufloat<sseintvecmodelower><mode>2<mask_name><round_name>"
4004 [(set (match_operand:VF1_AVX512VL 0 "register_operand" "=v")
4005 (unsigned_float:VF1_AVX512VL
4006 (match_operand:<sseintvecmode> 1 "nonimmediate_operand" "<round_constraint>")))]
4007 "TARGET_AVX512F"
4008 "vcvtudq2ps\t{<round_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_mask_op2>}"
4009 [(set_attr "type" "ssecvt")
4010 (set_attr "prefix" "evex")
4011 (set_attr "mode" "<MODE>")])
4012
4013 (define_expand "floatuns<sseintvecmodelower><mode>2"
4014 [(match_operand:VF1 0 "register_operand")
4015 (match_operand:<sseintvecmode> 1 "register_operand")]
4016 "TARGET_SSE2 && (<MODE>mode == V4SFmode || TARGET_AVX2)"
4017 {
4018 if (<MODE>mode == V16SFmode)
4019 emit_insn (gen_ufloatv16siv16sf2 (operands[0], operands[1]));
4020 else
4021 if (TARGET_AVX512VL)
4022 {
4023 if (<MODE>mode == V4SFmode)
4024 emit_insn (gen_ufloatv4siv4sf2 (operands[0], operands[1]));
4025 else
4026 emit_insn (gen_ufloatv8siv8sf2 (operands[0], operands[1]));
4027 }
4028 else
4029 ix86_expand_vector_convert_uns_vsivsf (operands[0], operands[1]);
4030
4031 DONE;
4032 })
4033
4034
4035 ;; For <sse2_avx_avx512f>_fix_notrunc<sf2simodelower><mode> insn pattern
4036 (define_mode_attr sf2simodelower
4037 [(V16SI "v16sf") (V8SI "v8sf") (V4SI "v4sf")])
4038
4039 (define_insn "<sse2_avx_avx512f>_fix_notrunc<sf2simodelower><mode><mask_name>"
4040 [(set (match_operand:VI4_AVX 0 "register_operand" "=v")
4041 (unspec:VI4_AVX
4042 [(match_operand:<ssePSmode> 1 "nonimmediate_operand" "vm")]
4043 UNSPEC_FIX_NOTRUNC))]
4044 "TARGET_SSE2 && <mask_mode512bit_condition>"
4045 "%vcvtps2dq\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
4046 [(set_attr "type" "ssecvt")
4047 (set (attr "prefix_data16")
4048 (if_then_else
4049 (match_test "TARGET_AVX")
4050 (const_string "*")
4051 (const_string "1")))
4052 (set_attr "prefix" "maybe_vex")
4053 (set_attr "mode" "<sseinsnmode>")])
4054
4055 (define_insn "<mask_codefor>avx512f_fix_notruncv16sfv16si<mask_name><round_name>"
4056 [(set (match_operand:V16SI 0 "register_operand" "=v")
4057 (unspec:V16SI
4058 [(match_operand:V16SF 1 "<round_nimm_predicate>" "<round_constraint>")]
4059 UNSPEC_FIX_NOTRUNC))]
4060 "TARGET_AVX512F"
4061 "vcvtps2dq\t{<round_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_mask_op2>}"
4062 [(set_attr "type" "ssecvt")
4063 (set_attr "prefix" "evex")
4064 (set_attr "mode" "XI")])
4065
4066 (define_insn "<mask_codefor><avx512>_ufix_notrunc<sf2simodelower><mode><mask_name><round_name>"
4067 [(set (match_operand:VI4_AVX512VL 0 "register_operand" "=v")
4068 (unspec:VI4_AVX512VL
4069 [(match_operand:<ssePSmode> 1 "nonimmediate_operand" "<round_constraint>")]
4070 UNSPEC_UNSIGNED_FIX_NOTRUNC))]
4071 "TARGET_AVX512F"
4072 "vcvtps2udq\t{<round_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_mask_op2>}"
4073 [(set_attr "type" "ssecvt")
4074 (set_attr "prefix" "evex")
4075 (set_attr "mode" "<sseinsnmode>")])
4076
4077 (define_insn "<mask_codefor>avx512dq_cvtps2qq<mode><mask_name><round_name>"
4078 [(set (match_operand:VI8_256_512 0 "register_operand" "=v")
4079 (unspec:VI8_256_512 [(match_operand:<ssePSmode2> 1 "nonimmediate_operand" "<round_constraint>")]
4080 UNSPEC_FIX_NOTRUNC))]
4081 "TARGET_AVX512DQ && <round_mode512bit_condition>"
4082 "vcvtps2qq\t{<round_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_mask_op2>}"
4083 [(set_attr "type" "ssecvt")
4084 (set_attr "prefix" "evex")
4085 (set_attr "mode" "<sseinsnmode>")])
4086
4087 (define_insn "<mask_codefor>avx512dq_cvtps2qqv2di<mask_name>"
4088 [(set (match_operand:V2DI 0 "register_operand" "=v")
4089 (unspec:V2DI
4090 [(vec_select:V2SF
4091 (match_operand:V4SF 1 "nonimmediate_operand" "vm")
4092 (parallel [(const_int 0) (const_int 1)]))]
4093 UNSPEC_FIX_NOTRUNC))]
4094 "TARGET_AVX512DQ && TARGET_AVX512VL"
4095 "vcvtps2qq\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
4096 [(set_attr "type" "ssecvt")
4097 (set_attr "prefix" "evex")
4098 (set_attr "mode" "TI")])
4099
4100 (define_insn "<mask_codefor>avx512dq_cvtps2uqq<mode><mask_name><round_name>"
4101 [(set (match_operand:VI8_256_512 0 "register_operand" "=v")
4102 (unspec:VI8_256_512 [(match_operand:<ssePSmode2> 1 "nonimmediate_operand" "<round_constraint>")]
4103 UNSPEC_UNSIGNED_FIX_NOTRUNC))]
4104 "TARGET_AVX512DQ && <round_mode512bit_condition>"
4105 "vcvtps2uqq\t{<round_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_mask_op2>}"
4106 [(set_attr "type" "ssecvt")
4107 (set_attr "prefix" "evex")
4108 (set_attr "mode" "<sseinsnmode>")])
4109
4110 (define_insn "<mask_codefor>avx512dq_cvtps2uqqv2di<mask_name>"
4111 [(set (match_operand:V2DI 0 "register_operand" "=v")
4112 (unspec:V2DI
4113 [(vec_select:V2SF
4114 (match_operand:V4SF 1 "nonimmediate_operand" "vm")
4115 (parallel [(const_int 0) (const_int 1)]))]
4116 UNSPEC_UNSIGNED_FIX_NOTRUNC))]
4117 "TARGET_AVX512DQ && TARGET_AVX512VL"
4118 "vcvtps2uqq\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
4119 [(set_attr "type" "ssecvt")
4120 (set_attr "prefix" "evex")
4121 (set_attr "mode" "TI")])
4122
4123 (define_insn "<fixsuffix>fix_truncv16sfv16si2<mask_name><round_saeonly_name>"
4124 [(set (match_operand:V16SI 0 "register_operand" "=v")
4125 (any_fix:V16SI
4126 (match_operand:V16SF 1 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")))]
4127 "TARGET_AVX512F"
4128 "vcvttps2<fixsuffix>dq\t{<round_saeonly_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_saeonly_mask_op2>}"
4129 [(set_attr "type" "ssecvt")
4130 (set_attr "prefix" "evex")
4131 (set_attr "mode" "XI")])
4132
4133 (define_insn "fix_truncv8sfv8si2<mask_name>"
4134 [(set (match_operand:V8SI 0 "register_operand" "=v")
4135 (fix:V8SI (match_operand:V8SF 1 "nonimmediate_operand" "vm")))]
4136 "TARGET_AVX && <mask_avx512vl_condition>"
4137 "vcvttps2dq\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
4138 [(set_attr "type" "ssecvt")
4139 (set_attr "prefix" "<mask_prefix>")
4140 (set_attr "mode" "OI")])
4141
4142 (define_insn "fix_truncv4sfv4si2<mask_name>"
4143 [(set (match_operand:V4SI 0 "register_operand" "=v")
4144 (fix:V4SI (match_operand:V4SF 1 "nonimmediate_operand" "vm")))]
4145 "TARGET_SSE2 && <mask_avx512vl_condition>"
4146 "%vcvttps2dq\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
4147 [(set_attr "type" "ssecvt")
4148 (set (attr "prefix_rep")
4149 (if_then_else
4150 (match_test "TARGET_AVX")
4151 (const_string "*")
4152 (const_string "1")))
4153 (set (attr "prefix_data16")
4154 (if_then_else
4155 (match_test "TARGET_AVX")
4156 (const_string "*")
4157 (const_string "0")))
4158 (set_attr "prefix_data16" "0")
4159 (set_attr "prefix" "<mask_prefix2>")
4160 (set_attr "mode" "TI")])
4161
4162 (define_expand "fixuns_trunc<mode><sseintvecmodelower>2"
4163 [(match_operand:<sseintvecmode> 0 "register_operand")
4164 (match_operand:VF1 1 "register_operand")]
4165 "TARGET_SSE2"
4166 {
4167 if (<MODE>mode == V16SFmode)
4168 emit_insn (gen_ufix_truncv16sfv16si2 (operands[0],
4169 operands[1]));
4170 else
4171 {
4172 rtx tmp[3];
4173 tmp[0] = ix86_expand_adjust_ufix_to_sfix_si (operands[1], &tmp[2]);
4174 tmp[1] = gen_reg_rtx (<sseintvecmode>mode);
4175 emit_insn (gen_fix_trunc<mode><sseintvecmodelower>2 (tmp[1], tmp[0]));
4176 emit_insn (gen_xor<sseintvecmodelower>3 (operands[0], tmp[1], tmp[2]));
4177 }
4178 DONE;
4179 })
4180
4181 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
4182 ;;
4183 ;; Parallel double-precision floating point conversion operations
4184 ;;
4185 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
4186
4187 (define_insn "sse2_cvtpi2pd"
4188 [(set (match_operand:V2DF 0 "register_operand" "=x,x")
4189 (float:V2DF (match_operand:V2SI 1 "nonimmediate_operand" "y,m")))]
4190 "TARGET_SSE2"
4191 "cvtpi2pd\t{%1, %0|%0, %1}"
4192 [(set_attr "type" "ssecvt")
4193 (set_attr "unit" "mmx,*")
4194 (set_attr "prefix_data16" "1,*")
4195 (set_attr "mode" "V2DF")])
4196
4197 (define_insn "sse2_cvtpd2pi"
4198 [(set (match_operand:V2SI 0 "register_operand" "=y")
4199 (unspec:V2SI [(match_operand:V2DF 1 "nonimmediate_operand" "xm")]
4200 UNSPEC_FIX_NOTRUNC))]
4201 "TARGET_SSE2"
4202 "cvtpd2pi\t{%1, %0|%0, %1}"
4203 [(set_attr "type" "ssecvt")
4204 (set_attr "unit" "mmx")
4205 (set_attr "bdver1_decode" "double")
4206 (set_attr "btver2_decode" "direct")
4207 (set_attr "prefix_data16" "1")
4208 (set_attr "mode" "DI")])
4209
4210 (define_insn "sse2_cvttpd2pi"
4211 [(set (match_operand:V2SI 0 "register_operand" "=y")
4212 (fix:V2SI (match_operand:V2DF 1 "nonimmediate_operand" "xm")))]
4213 "TARGET_SSE2"
4214 "cvttpd2pi\t{%1, %0|%0, %1}"
4215 [(set_attr "type" "ssecvt")
4216 (set_attr "unit" "mmx")
4217 (set_attr "bdver1_decode" "double")
4218 (set_attr "prefix_data16" "1")
4219 (set_attr "mode" "TI")])
4220
4221 (define_insn "sse2_cvtsi2sd"
4222 [(set (match_operand:V2DF 0 "register_operand" "=x,x,x")
4223 (vec_merge:V2DF
4224 (vec_duplicate:V2DF
4225 (float:DF (match_operand:SI 2 "nonimmediate_operand" "r,m,rm")))
4226 (match_operand:V2DF 1 "register_operand" "0,0,x")
4227 (const_int 1)))]
4228 "TARGET_SSE2"
4229 "@
4230 cvtsi2sd\t{%2, %0|%0, %2}
4231 cvtsi2sd\t{%2, %0|%0, %2}
4232 vcvtsi2sd\t{%2, %1, %0|%0, %1, %2}"
4233 [(set_attr "isa" "noavx,noavx,avx")
4234 (set_attr "type" "sseicvt")
4235 (set_attr "athlon_decode" "double,direct,*")
4236 (set_attr "amdfam10_decode" "vector,double,*")
4237 (set_attr "bdver1_decode" "double,direct,*")
4238 (set_attr "btver2_decode" "double,double,double")
4239 (set_attr "prefix" "orig,orig,vex")
4240 (set_attr "mode" "DF")])
4241
4242 (define_insn "sse2_cvtsi2sdq<round_name>"
4243 [(set (match_operand:V2DF 0 "register_operand" "=x,x,v")
4244 (vec_merge:V2DF
4245 (vec_duplicate:V2DF
4246 (float:DF (match_operand:DI 2 "<round_nimm_predicate>" "r,m,<round_constraint3>")))
4247 (match_operand:V2DF 1 "register_operand" "0,0,v")
4248 (const_int 1)))]
4249 "TARGET_SSE2 && TARGET_64BIT"
4250 "@
4251 cvtsi2sdq\t{%2, %0|%0, %2}
4252 cvtsi2sdq\t{%2, %0|%0, %2}
4253 vcvtsi2sdq\t{<round_op3>%2, %1, %0|%0, %1, %2<round_op3>}"
4254 [(set_attr "isa" "noavx,noavx,avx")
4255 (set_attr "type" "sseicvt")
4256 (set_attr "athlon_decode" "double,direct,*")
4257 (set_attr "amdfam10_decode" "vector,double,*")
4258 (set_attr "bdver1_decode" "double,direct,*")
4259 (set_attr "length_vex" "*,*,4")
4260 (set_attr "prefix_rex" "1,1,*")
4261 (set_attr "prefix" "orig,orig,maybe_evex")
4262 (set_attr "mode" "DF")])
4263
4264 (define_insn "avx512f_vcvtss2usi<round_name>"
4265 [(set (match_operand:SI 0 "register_operand" "=r")
4266 (unspec:SI
4267 [(vec_select:SF
4268 (match_operand:V4SF 1 "<round_nimm_predicate>" "<round_constraint>")
4269 (parallel [(const_int 0)]))]
4270 UNSPEC_UNSIGNED_FIX_NOTRUNC))]
4271 "TARGET_AVX512F"
4272 "vcvtss2usi\t{<round_op2>%1, %0|%0, %1<round_op2>}"
4273 [(set_attr "type" "sseicvt")
4274 (set_attr "prefix" "evex")
4275 (set_attr "mode" "SI")])
4276
4277 (define_insn "avx512f_vcvtss2usiq<round_name>"
4278 [(set (match_operand:DI 0 "register_operand" "=r")
4279 (unspec:DI
4280 [(vec_select:SF
4281 (match_operand:V4SF 1 "<round_nimm_predicate>" "<round_constraint>")
4282 (parallel [(const_int 0)]))]
4283 UNSPEC_UNSIGNED_FIX_NOTRUNC))]
4284 "TARGET_AVX512F && TARGET_64BIT"
4285 "vcvtss2usi\t{<round_op2>%1, %0|%0, %1<round_op2>}"
4286 [(set_attr "type" "sseicvt")
4287 (set_attr "prefix" "evex")
4288 (set_attr "mode" "DI")])
4289
4290 (define_insn "avx512f_vcvttss2usi<round_saeonly_name>"
4291 [(set (match_operand:SI 0 "register_operand" "=r")
4292 (unsigned_fix:SI
4293 (vec_select:SF
4294 (match_operand:V4SF 1 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")
4295 (parallel [(const_int 0)]))))]
4296 "TARGET_AVX512F"
4297 "vcvttss2usi\t{<round_saeonly_op2>%1, %0|%0, %1<round_saeonly_op2>}"
4298 [(set_attr "type" "sseicvt")
4299 (set_attr "prefix" "evex")
4300 (set_attr "mode" "SI")])
4301
4302 (define_insn "avx512f_vcvttss2usiq<round_saeonly_name>"
4303 [(set (match_operand:DI 0 "register_operand" "=r")
4304 (unsigned_fix:DI
4305 (vec_select:SF
4306 (match_operand:V4SF 1 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")
4307 (parallel [(const_int 0)]))))]
4308 "TARGET_AVX512F && TARGET_64BIT"
4309 "vcvttss2usi\t{<round_saeonly_op2>%1, %0|%0, %1<round_saeonly_op2>}"
4310 [(set_attr "type" "sseicvt")
4311 (set_attr "prefix" "evex")
4312 (set_attr "mode" "DI")])
4313
4314 (define_insn "avx512f_vcvtsd2usi<round_name>"
4315 [(set (match_operand:SI 0 "register_operand" "=r")
4316 (unspec:SI
4317 [(vec_select:DF
4318 (match_operand:V2DF 1 "<round_nimm_predicate>" "<round_constraint>")
4319 (parallel [(const_int 0)]))]
4320 UNSPEC_UNSIGNED_FIX_NOTRUNC))]
4321 "TARGET_AVX512F"
4322 "vcvtsd2usi\t{<round_op2>%1, %0|%0, %1<round_op2>}"
4323 [(set_attr "type" "sseicvt")
4324 (set_attr "prefix" "evex")
4325 (set_attr "mode" "SI")])
4326
4327 (define_insn "avx512f_vcvtsd2usiq<round_name>"
4328 [(set (match_operand:DI 0 "register_operand" "=r")
4329 (unspec:DI
4330 [(vec_select:DF
4331 (match_operand:V2DF 1 "<round_nimm_predicate>" "<round_constraint>")
4332 (parallel [(const_int 0)]))]
4333 UNSPEC_UNSIGNED_FIX_NOTRUNC))]
4334 "TARGET_AVX512F && TARGET_64BIT"
4335 "vcvtsd2usi\t{<round_op2>%1, %0|%0, %1<round_op2>}"
4336 [(set_attr "type" "sseicvt")
4337 (set_attr "prefix" "evex")
4338 (set_attr "mode" "DI")])
4339
4340 (define_insn "avx512f_vcvttsd2usi<round_saeonly_name>"
4341 [(set (match_operand:SI 0 "register_operand" "=r")
4342 (unsigned_fix:SI
4343 (vec_select:DF
4344 (match_operand:V2DF 1 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")
4345 (parallel [(const_int 0)]))))]
4346 "TARGET_AVX512F"
4347 "vcvttsd2usi\t{<round_saeonly_op2>%1, %0|%0, %1<round_saeonly_op2>}"
4348 [(set_attr "type" "sseicvt")
4349 (set_attr "prefix" "evex")
4350 (set_attr "mode" "SI")])
4351
4352 (define_insn "avx512f_vcvttsd2usiq<round_saeonly_name>"
4353 [(set (match_operand:DI 0 "register_operand" "=r")
4354 (unsigned_fix:DI
4355 (vec_select:DF
4356 (match_operand:V2DF 1 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")
4357 (parallel [(const_int 0)]))))]
4358 "TARGET_AVX512F && TARGET_64BIT"
4359 "vcvttsd2usi\t{<round_saeonly_op2>%1, %0|%0, %1<round_saeonly_op2>}"
4360 [(set_attr "type" "sseicvt")
4361 (set_attr "prefix" "evex")
4362 (set_attr "mode" "DI")])
4363
4364 (define_insn "sse2_cvtsd2si<round_name>"
4365 [(set (match_operand:SI 0 "register_operand" "=r,r")
4366 (unspec:SI
4367 [(vec_select:DF
4368 (match_operand:V2DF 1 "<round_nimm_predicate>" "v,<round_constraint2>")
4369 (parallel [(const_int 0)]))]
4370 UNSPEC_FIX_NOTRUNC))]
4371 "TARGET_SSE2"
4372 "%vcvtsd2si\t{<round_op2>%1, %0|%0, %q1<round_op2>}"
4373 [(set_attr "type" "sseicvt")
4374 (set_attr "athlon_decode" "double,vector")
4375 (set_attr "bdver1_decode" "double,double")
4376 (set_attr "btver2_decode" "double,double")
4377 (set_attr "prefix_rep" "1")
4378 (set_attr "prefix" "maybe_vex")
4379 (set_attr "mode" "SI")])
4380
4381 (define_insn "sse2_cvtsd2si_2"
4382 [(set (match_operand:SI 0 "register_operand" "=r,r")
4383 (unspec:SI [(match_operand:DF 1 "nonimmediate_operand" "v,m")]
4384 UNSPEC_FIX_NOTRUNC))]
4385 "TARGET_SSE2"
4386 "%vcvtsd2si\t{%1, %0|%0, %q1}"
4387 [(set_attr "type" "sseicvt")
4388 (set_attr "athlon_decode" "double,vector")
4389 (set_attr "amdfam10_decode" "double,double")
4390 (set_attr "bdver1_decode" "double,double")
4391 (set_attr "prefix_rep" "1")
4392 (set_attr "prefix" "maybe_vex")
4393 (set_attr "mode" "SI")])
4394
4395 (define_insn "sse2_cvtsd2siq<round_name>"
4396 [(set (match_operand:DI 0 "register_operand" "=r,r")
4397 (unspec:DI
4398 [(vec_select:DF
4399 (match_operand:V2DF 1 "<round_nimm_predicate>" "v,<round_constraint2>")
4400 (parallel [(const_int 0)]))]
4401 UNSPEC_FIX_NOTRUNC))]
4402 "TARGET_SSE2 && TARGET_64BIT"
4403 "%vcvtsd2si{q}\t{<round_op2>%1, %0|%0, %q1<round_op2>}"
4404 [(set_attr "type" "sseicvt")
4405 (set_attr "athlon_decode" "double,vector")
4406 (set_attr "bdver1_decode" "double,double")
4407 (set_attr "prefix_rep" "1")
4408 (set_attr "prefix" "maybe_vex")
4409 (set_attr "mode" "DI")])
4410
4411 (define_insn "sse2_cvtsd2siq_2"
4412 [(set (match_operand:DI 0 "register_operand" "=r,r")
4413 (unspec:DI [(match_operand:DF 1 "nonimmediate_operand" "v,m")]
4414 UNSPEC_FIX_NOTRUNC))]
4415 "TARGET_SSE2 && TARGET_64BIT"
4416 "%vcvtsd2si{q}\t{%1, %0|%0, %q1}"
4417 [(set_attr "type" "sseicvt")
4418 (set_attr "athlon_decode" "double,vector")
4419 (set_attr "amdfam10_decode" "double,double")
4420 (set_attr "bdver1_decode" "double,double")
4421 (set_attr "prefix_rep" "1")
4422 (set_attr "prefix" "maybe_vex")
4423 (set_attr "mode" "DI")])
4424
4425 (define_insn "sse2_cvttsd2si<round_saeonly_name>"
4426 [(set (match_operand:SI 0 "register_operand" "=r,r")
4427 (fix:SI
4428 (vec_select:DF
4429 (match_operand:V2DF 1 "<round_saeonly_nimm_predicate>" "v,<round_saeonly_constraint2>")
4430 (parallel [(const_int 0)]))))]
4431 "TARGET_SSE2"
4432 "%vcvttsd2si\t{<round_saeonly_op2>%1, %0|%0, %q1<round_saeonly_op2>}"
4433 [(set_attr "type" "sseicvt")
4434 (set_attr "athlon_decode" "double,vector")
4435 (set_attr "amdfam10_decode" "double,double")
4436 (set_attr "bdver1_decode" "double,double")
4437 (set_attr "btver2_decode" "double,double")
4438 (set_attr "prefix_rep" "1")
4439 (set_attr "prefix" "maybe_vex")
4440 (set_attr "mode" "SI")])
4441
4442 (define_insn "sse2_cvttsd2siq<round_saeonly_name>"
4443 [(set (match_operand:DI 0 "register_operand" "=r,r")
4444 (fix:DI
4445 (vec_select:DF
4446 (match_operand:V2DF 1 "<round_saeonly_nimm_predicate>" "v,<round_saeonly_constraint2>")
4447 (parallel [(const_int 0)]))))]
4448 "TARGET_SSE2 && TARGET_64BIT"
4449 "%vcvttsd2si{q}\t{<round_saeonly_op2>%1, %0|%0, %q1<round_saeonly_op2>}"
4450 [(set_attr "type" "sseicvt")
4451 (set_attr "athlon_decode" "double,vector")
4452 (set_attr "amdfam10_decode" "double,double")
4453 (set_attr "bdver1_decode" "double,double")
4454 (set_attr "prefix_rep" "1")
4455 (set_attr "prefix" "maybe_vex")
4456 (set_attr "mode" "DI")])
4457
4458 ;; For float<si2dfmode><mode>2 insn pattern
4459 (define_mode_attr si2dfmode
4460 [(V8DF "V8SI") (V4DF "V4SI")])
4461 (define_mode_attr si2dfmodelower
4462 [(V8DF "v8si") (V4DF "v4si")])
4463
4464 (define_insn "float<si2dfmodelower><mode>2<mask_name>"
4465 [(set (match_operand:VF2_512_256 0 "register_operand" "=v")
4466 (float:VF2_512_256 (match_operand:<si2dfmode> 1 "nonimmediate_operand" "vm")))]
4467 "TARGET_AVX && <mask_mode512bit_condition>"
4468 "vcvtdq2pd\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
4469 [(set_attr "type" "ssecvt")
4470 (set_attr "prefix" "maybe_vex")
4471 (set_attr "mode" "<MODE>")])
4472
4473 (define_insn "<floatsuffix>float<sseintvecmodelower><mode>2<mask_name><round_name>"
4474 [(set (match_operand:VF2_AVX512VL 0 "register_operand" "=v")
4475 (any_float:VF2_AVX512VL
4476 (match_operand:<sseintvecmode> 1 "nonimmediate_operand" "vm")))]
4477 "TARGET_AVX512DQ"
4478 "vcvt<floatsuffix>qq2pd\t{<round_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_mask_op2>}"
4479 [(set_attr "type" "ssecvt")
4480 (set_attr "prefix" "evex")
4481 (set_attr "mode" "<MODE>")])
4482
4483 ;; For <floatsuffix>float<sselondveclower><mode> insn patterns
4484 (define_mode_attr qq2pssuff
4485 [(V8SF "") (V4SF "{y}")])
4486
4487 (define_mode_attr sselongvecmode
4488 [(V8SF "V8DI") (V4SF "V4DI")])
4489
4490 (define_mode_attr sselongvecmodelower
4491 [(V8SF "v8di") (V4SF "v4di")])
4492
4493 (define_mode_attr sseintvecmode3
4494 [(V8SF "XI") (V4SF "OI")
4495 (V8DF "OI") (V4DF "TI")])
4496
4497 (define_insn "<floatsuffix>float<sselongvecmodelower><mode>2<mask_name><round_name>"
4498 [(set (match_operand:VF1_128_256VL 0 "register_operand" "=v")
4499 (any_float:VF1_128_256VL
4500 (match_operand:<sselongvecmode> 1 "nonimmediate_operand" "<round_constraint>")))]
4501 "TARGET_AVX512DQ && <round_modev8sf_condition>"
4502 "vcvt<floatsuffix>qq2ps<qq2pssuff>\t{<round_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_mask_op2>}"
4503 [(set_attr "type" "ssecvt")
4504 (set_attr "prefix" "evex")
4505 (set_attr "mode" "<MODE>")])
4506
4507 (define_insn "*<floatsuffix>floatv2div2sf2"
4508 [(set (match_operand:V4SF 0 "register_operand" "=v")
4509 (vec_concat:V4SF
4510 (any_float:V2SF (match_operand:V2DI 1 "nonimmediate_operand" "vm"))
4511 (const_vector:V2SF [(const_int 0) (const_int 0)])))]
4512 "TARGET_AVX512DQ && TARGET_AVX512VL"
4513 "vcvt<floatsuffix>qq2ps{x}\t{%1, %0|%0, %1}"
4514 [(set_attr "type" "ssecvt")
4515 (set_attr "prefix" "evex")
4516 (set_attr "mode" "V4SF")])
4517
4518 (define_insn "<floatsuffix>floatv2div2sf2_mask"
4519 [(set (match_operand:V4SF 0 "register_operand" "=v")
4520 (vec_concat:V4SF
4521 (vec_merge:V2SF
4522 (any_float:V2SF (match_operand:V2DI 1 "nonimmediate_operand" "vm"))
4523 (vec_select:V2SF
4524 (match_operand:V4SF 2 "vector_move_operand" "0C")
4525 (parallel [(const_int 0) (const_int 1)]))
4526 (match_operand:QI 3 "register_operand" "Yk"))
4527 (const_vector:V2SF [(const_int 0) (const_int 0)])))]
4528 "TARGET_AVX512DQ && TARGET_AVX512VL"
4529 "vcvt<floatsuffix>qq2ps{x}\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}"
4530 [(set_attr "type" "ssecvt")
4531 (set_attr "prefix" "evex")
4532 (set_attr "mode" "V4SF")])
4533
4534 (define_insn "ufloat<si2dfmodelower><mode>2<mask_name>"
4535 [(set (match_operand:VF2_512_256VL 0 "register_operand" "=v")
4536 (unsigned_float:VF2_512_256VL
4537 (match_operand:<si2dfmode> 1 "nonimmediate_operand" "vm")))]
4538 "TARGET_AVX512F"
4539 "vcvtudq2pd\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
4540 [(set_attr "type" "ssecvt")
4541 (set_attr "prefix" "evex")
4542 (set_attr "mode" "<MODE>")])
4543
4544 (define_insn "ufloatv2siv2df2<mask_name>"
4545 [(set (match_operand:V2DF 0 "register_operand" "=v")
4546 (unsigned_float:V2DF
4547 (vec_select:V2SI
4548 (match_operand:V4SI 1 "nonimmediate_operand" "vm")
4549 (parallel [(const_int 0) (const_int 1)]))))]
4550 "TARGET_AVX512VL"
4551 "vcvtudq2pd\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
4552 [(set_attr "type" "ssecvt")
4553 (set_attr "prefix" "evex")
4554 (set_attr "mode" "V2DF")])
4555
4556 (define_insn "avx512f_cvtdq2pd512_2"
4557 [(set (match_operand:V8DF 0 "register_operand" "=v")
4558 (float:V8DF
4559 (vec_select:V8SI
4560 (match_operand:V16SI 1 "nonimmediate_operand" "vm")
4561 (parallel [(const_int 0) (const_int 1)
4562 (const_int 2) (const_int 3)
4563 (const_int 4) (const_int 5)
4564 (const_int 6) (const_int 7)]))))]
4565 "TARGET_AVX512F"
4566 "vcvtdq2pd\t{%t1, %0|%0, %t1}"
4567 [(set_attr "type" "ssecvt")
4568 (set_attr "prefix" "evex")
4569 (set_attr "mode" "V8DF")])
4570
4571 (define_insn "avx_cvtdq2pd256_2"
4572 [(set (match_operand:V4DF 0 "register_operand" "=v")
4573 (float:V4DF
4574 (vec_select:V4SI
4575 (match_operand:V8SI 1 "nonimmediate_operand" "vm")
4576 (parallel [(const_int 0) (const_int 1)
4577 (const_int 2) (const_int 3)]))))]
4578 "TARGET_AVX"
4579 "vcvtdq2pd\t{%x1, %0|%0, %x1}"
4580 [(set_attr "type" "ssecvt")
4581 (set_attr "prefix" "maybe_evex")
4582 (set_attr "mode" "V4DF")])
4583
4584 (define_insn "sse2_cvtdq2pd<mask_name>"
4585 [(set (match_operand:V2DF 0 "register_operand" "=v")
4586 (float:V2DF
4587 (vec_select:V2SI
4588 (match_operand:V4SI 1 "nonimmediate_operand" "vm")
4589 (parallel [(const_int 0) (const_int 1)]))))]
4590 "TARGET_SSE2 && <mask_avx512vl_condition>"
4591 "%vcvtdq2pd\t{%1, %0<mask_operand2>|%0<mask_operand2>, %q1}"
4592 [(set_attr "type" "ssecvt")
4593 (set_attr "prefix" "maybe_vex")
4594 (set_attr "ssememalign" "64")
4595 (set_attr "mode" "V2DF")])
4596
4597 (define_insn "<mask_codefor>avx512f_cvtpd2dq512<mask_name><round_name>"
4598 [(set (match_operand:V8SI 0 "register_operand" "=v")
4599 (unspec:V8SI
4600 [(match_operand:V8DF 1 "<round_nimm_predicate>" "<round_constraint>")]
4601 UNSPEC_FIX_NOTRUNC))]
4602 "TARGET_AVX512F"
4603 "vcvtpd2dq\t{<round_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_mask_op2>}"
4604 [(set_attr "type" "ssecvt")
4605 (set_attr "prefix" "evex")
4606 (set_attr "mode" "OI")])
4607
4608 (define_insn "avx_cvtpd2dq256<mask_name>"
4609 [(set (match_operand:V4SI 0 "register_operand" "=v")
4610 (unspec:V4SI [(match_operand:V4DF 1 "nonimmediate_operand" "vm")]
4611 UNSPEC_FIX_NOTRUNC))]
4612 "TARGET_AVX && <mask_avx512vl_condition>"
4613 "vcvtpd2dq{y}\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
4614 [(set_attr "type" "ssecvt")
4615 (set_attr "prefix" "<mask_prefix>")
4616 (set_attr "mode" "OI")])
4617
4618 (define_expand "avx_cvtpd2dq256_2"
4619 [(set (match_operand:V8SI 0 "register_operand")
4620 (vec_concat:V8SI
4621 (unspec:V4SI [(match_operand:V4DF 1 "nonimmediate_operand")]
4622 UNSPEC_FIX_NOTRUNC)
4623 (match_dup 2)))]
4624 "TARGET_AVX"
4625 "operands[2] = CONST0_RTX (V4SImode);")
4626
4627 (define_insn "*avx_cvtpd2dq256_2"
4628 [(set (match_operand:V8SI 0 "register_operand" "=x")
4629 (vec_concat:V8SI
4630 (unspec:V4SI [(match_operand:V4DF 1 "nonimmediate_operand" "xm")]
4631 UNSPEC_FIX_NOTRUNC)
4632 (match_operand:V4SI 2 "const0_operand")))]
4633 "TARGET_AVX"
4634 "vcvtpd2dq{y}\t{%1, %x0|%x0, %1}"
4635 [(set_attr "type" "ssecvt")
4636 (set_attr "prefix" "vex")
4637 (set_attr "btver2_decode" "vector")
4638 (set_attr "mode" "OI")])
4639
4640 (define_insn "sse2_cvtpd2dq<mask_name>"
4641 [(set (match_operand:V4SI 0 "register_operand" "=v")
4642 (vec_concat:V4SI
4643 (unspec:V2SI [(match_operand:V2DF 1 "nonimmediate_operand" "vm")]
4644 UNSPEC_FIX_NOTRUNC)
4645 (const_vector:V2SI [(const_int 0) (const_int 0)])))]
4646 "TARGET_SSE2 && <mask_avx512vl_condition>"
4647 {
4648 if (TARGET_AVX)
4649 return "vcvtpd2dq{x}\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}";
4650 else
4651 return "cvtpd2dq\t{%1, %0|%0, %1}";
4652 }
4653 [(set_attr "type" "ssecvt")
4654 (set_attr "prefix_rep" "1")
4655 (set_attr "prefix_data16" "0")
4656 (set_attr "prefix" "maybe_vex")
4657 (set_attr "mode" "TI")
4658 (set_attr "amdfam10_decode" "double")
4659 (set_attr "athlon_decode" "vector")
4660 (set_attr "bdver1_decode" "double")])
4661
4662 ;; For ufix_notrunc* insn patterns
4663 (define_mode_attr pd2udqsuff
4664 [(V8DF "") (V4DF "{y}")])
4665
4666 (define_insn "ufix_notrunc<mode><si2dfmodelower>2<mask_name><round_name>"
4667 [(set (match_operand:<si2dfmode> 0 "register_operand" "=v")
4668 (unspec:<si2dfmode>
4669 [(match_operand:VF2_512_256VL 1 "nonimmediate_operand" "<round_constraint>")]
4670 UNSPEC_UNSIGNED_FIX_NOTRUNC))]
4671 "TARGET_AVX512F"
4672 "vcvtpd2udq<pd2udqsuff>\t{<round_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_mask_op2>}"
4673 [(set_attr "type" "ssecvt")
4674 (set_attr "prefix" "evex")
4675 (set_attr "mode" "<sseinsnmode>")])
4676
4677 (define_insn "ufix_notruncv2dfv2si2<mask_name>"
4678 [(set (match_operand:V4SI 0 "register_operand" "=v")
4679 (vec_concat:V4SI
4680 (unspec:V2SI
4681 [(match_operand:V2DF 1 "nonimmediate_operand" "vm")]
4682 UNSPEC_UNSIGNED_FIX_NOTRUNC)
4683 (const_vector:V2SI [(const_int 0) (const_int 0)])))]
4684 "TARGET_AVX512VL"
4685 "vcvtpd2udq{x}\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
4686 [(set_attr "type" "ssecvt")
4687 (set_attr "prefix" "evex")
4688 (set_attr "mode" "TI")])
4689
4690 (define_insn "<fixsuffix>fix_truncv8dfv8si2<mask_name><round_saeonly_name>"
4691 [(set (match_operand:V8SI 0 "register_operand" "=v")
4692 (any_fix:V8SI
4693 (match_operand:V8DF 1 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")))]
4694 "TARGET_AVX512F"
4695 "vcvttpd2<fixsuffix>dq\t{<round_saeonly_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_saeonly_mask_op2>}"
4696 [(set_attr "type" "ssecvt")
4697 (set_attr "prefix" "evex")
4698 (set_attr "mode" "OI")])
4699
4700 (define_insn "ufix_truncv2dfv2si2<mask_name>"
4701 [(set (match_operand:V4SI 0 "register_operand" "=v")
4702 (vec_concat:V4SI
4703 (unsigned_fix:V2SI (match_operand:V2DF 1 "nonimmediate_operand" "vm"))
4704 (const_vector:V2SI [(const_int 0) (const_int 0)])))]
4705 "TARGET_AVX512VL"
4706 "vcvttpd2udq{x}\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
4707 [(set_attr "type" "ssecvt")
4708 (set_attr "prefix" "evex")
4709 (set_attr "mode" "TI")])
4710
4711 (define_insn "fix_truncv4dfv4si2<mask_name>"
4712 [(set (match_operand:V4SI 0 "register_operand" "=v")
4713 (fix:V4SI (match_operand:V4DF 1 "nonimmediate_operand" "vm")))]
4714 "TARGET_AVX || (TARGET_AVX512VL && TARGET_AVX512F)"
4715 "vcvttpd2dq{y}\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
4716 [(set_attr "type" "ssecvt")
4717 (set_attr "prefix" "maybe_evex")
4718 (set_attr "mode" "OI")])
4719
4720 (define_insn "ufix_truncv4dfv4si2<mask_name>"
4721 [(set (match_operand:V4SI 0 "register_operand" "=v")
4722 (unsigned_fix:V4SI (match_operand:V4DF 1 "nonimmediate_operand" "vm")))]
4723 "TARGET_AVX512VL && TARGET_AVX512F"
4724 "vcvttpd2udq{y}\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
4725 [(set_attr "type" "ssecvt")
4726 (set_attr "prefix" "maybe_evex")
4727 (set_attr "mode" "OI")])
4728
4729 (define_insn "<fixsuffix>fix_trunc<mode><sseintvecmodelower>2<mask_name><round_saeonly_name>"
4730 [(set (match_operand:<sseintvecmode> 0 "register_operand" "=v")
4731 (any_fix:<sseintvecmode>
4732 (match_operand:VF2_AVX512VL 1 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")))]
4733 "TARGET_AVX512DQ && <round_saeonly_mode512bit_condition>"
4734 "vcvttpd2<fixsuffix>qq\t{<round_saeonly_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_saeonly_mask_op2>}"
4735 [(set_attr "type" "ssecvt")
4736 (set_attr "prefix" "evex")
4737 (set_attr "mode" "<sseintvecmode2>")])
4738
4739 (define_insn "fix_notrunc<mode><sseintvecmodelower>2<mask_name><round_name>"
4740 [(set (match_operand:<sseintvecmode> 0 "register_operand" "=v")
4741 (unspec:<sseintvecmode>
4742 [(match_operand:VF2_AVX512VL 1 "<round_nimm_predicate>" "<round_constraint>")]
4743 UNSPEC_FIX_NOTRUNC))]
4744 "TARGET_AVX512DQ && <round_mode512bit_condition>"
4745 "vcvtpd2qq\t{<round_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_mask_op2>}"
4746 [(set_attr "type" "ssecvt")
4747 (set_attr "prefix" "evex")
4748 (set_attr "mode" "<sseintvecmode2>")])
4749
4750 (define_insn "ufix_notrunc<mode><sseintvecmodelower>2<mask_name><round_name>"
4751 [(set (match_operand:<sseintvecmode> 0 "register_operand" "=v")
4752 (unspec:<sseintvecmode>
4753 [(match_operand:VF2_AVX512VL 1 "nonimmediate_operand" "<round_constraint>")]
4754 UNSPEC_UNSIGNED_FIX_NOTRUNC))]
4755 "TARGET_AVX512DQ && <round_mode512bit_condition>"
4756 "vcvtpd2uqq\t{<round_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_mask_op2>}"
4757 [(set_attr "type" "ssecvt")
4758 (set_attr "prefix" "evex")
4759 (set_attr "mode" "<sseintvecmode2>")])
4760
4761 (define_insn "<fixsuffix>fix_trunc<mode><sselongvecmodelower>2<mask_name><round_saeonly_name>"
4762 [(set (match_operand:<sselongvecmode> 0 "register_operand" "=v")
4763 (any_fix:<sselongvecmode>
4764 (match_operand:VF1_128_256VL 1 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")))]
4765 "TARGET_AVX512DQ && <round_saeonly_modev8sf_condition>"
4766 "vcvttps2<fixsuffix>qq\t{<round_saeonly_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_saeonly_mask_op2>}"
4767 [(set_attr "type" "ssecvt")
4768 (set_attr "prefix" "evex")
4769 (set_attr "mode" "<sseintvecmode3>")])
4770
4771 (define_insn "<fixsuffix>fix_truncv2sfv2di2<mask_name>"
4772 [(set (match_operand:V2DI 0 "register_operand" "=v")
4773 (any_fix:V2DI
4774 (vec_select:V2SF
4775 (match_operand:V4SF 1 "nonimmediate_operand" "vm")
4776 (parallel [(const_int 0) (const_int 1)]))))]
4777 "TARGET_AVX512DQ && TARGET_AVX512VL"
4778 "vcvttps2<fixsuffix>qq\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
4779 [(set_attr "type" "ssecvt")
4780 (set_attr "prefix" "evex")
4781 (set_attr "mode" "TI")])
4782
4783 (define_insn "ufix_trunc<mode><sseintvecmodelower>2<mask_name>"
4784 [(set (match_operand:<sseintvecmode> 0 "register_operand" "=v")
4785 (unsigned_fix:<sseintvecmode>
4786 (match_operand:VF1_128_256VL 1 "nonimmediate_operand" "vm")))]
4787 "TARGET_AVX512VL"
4788 "vcvttps2udq\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
4789 [(set_attr "type" "ssecvt")
4790 (set_attr "prefix" "evex")
4791 (set_attr "mode" "<sseintvecmode2>")])
4792
4793 (define_expand "avx_cvttpd2dq256_2"
4794 [(set (match_operand:V8SI 0 "register_operand")
4795 (vec_concat:V8SI
4796 (fix:V4SI (match_operand:V4DF 1 "nonimmediate_operand"))
4797 (match_dup 2)))]
4798 "TARGET_AVX"
4799 "operands[2] = CONST0_RTX (V4SImode);")
4800
4801 (define_insn "sse2_cvttpd2dq<mask_name>"
4802 [(set (match_operand:V4SI 0 "register_operand" "=v")
4803 (vec_concat:V4SI
4804 (fix:V2SI (match_operand:V2DF 1 "nonimmediate_operand" "vm"))
4805 (const_vector:V2SI [(const_int 0) (const_int 0)])))]
4806 "TARGET_SSE2 && <mask_avx512vl_condition>"
4807 {
4808 if (TARGET_AVX)
4809 return "vcvttpd2dq{x}\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}";
4810 else
4811 return "cvttpd2dq\t{%1, %0|%0, %1}";
4812 }
4813 [(set_attr "type" "ssecvt")
4814 (set_attr "amdfam10_decode" "double")
4815 (set_attr "athlon_decode" "vector")
4816 (set_attr "bdver1_decode" "double")
4817 (set_attr "prefix" "maybe_vex")
4818 (set_attr "mode" "TI")])
4819
4820 (define_insn "sse2_cvtsd2ss<round_name>"
4821 [(set (match_operand:V4SF 0 "register_operand" "=x,x,v")
4822 (vec_merge:V4SF
4823 (vec_duplicate:V4SF
4824 (float_truncate:V2SF
4825 (match_operand:V2DF 2 "nonimmediate_operand" "x,m,<round_constraint>")))
4826 (match_operand:V4SF 1 "register_operand" "0,0,v")
4827 (const_int 1)))]
4828 "TARGET_SSE2"
4829 "@
4830 cvtsd2ss\t{%2, %0|%0, %2}
4831 cvtsd2ss\t{%2, %0|%0, %q2}
4832 vcvtsd2ss\t{<round_op3>%2, %1, %0|%0, %1, %q2<round_op3>}"
4833 [(set_attr "isa" "noavx,noavx,avx")
4834 (set_attr "type" "ssecvt")
4835 (set_attr "athlon_decode" "vector,double,*")
4836 (set_attr "amdfam10_decode" "vector,double,*")
4837 (set_attr "bdver1_decode" "direct,direct,*")
4838 (set_attr "btver2_decode" "double,double,double")
4839 (set_attr "prefix" "orig,orig,<round_prefix>")
4840 (set_attr "mode" "SF")])
4841
4842 (define_insn "sse2_cvtss2sd<round_saeonly_name>"
4843 [(set (match_operand:V2DF 0 "register_operand" "=x,x,v")
4844 (vec_merge:V2DF
4845 (float_extend:V2DF
4846 (vec_select:V2SF
4847 (match_operand:V4SF 2 "<round_saeonly_nimm_predicate>" "x,m,<round_saeonly_constraint>")
4848 (parallel [(const_int 0) (const_int 1)])))
4849 (match_operand:V2DF 1 "register_operand" "0,0,v")
4850 (const_int 1)))]
4851 "TARGET_SSE2"
4852 "@
4853 cvtss2sd\t{%2, %0|%0, %2}
4854 cvtss2sd\t{%2, %0|%0, %k2}
4855 vcvtss2sd\t{<round_saeonly_op3>%2, %1, %0|%0, %1, %k2<round_saeonly_op3>}"
4856 [(set_attr "isa" "noavx,noavx,avx")
4857 (set_attr "type" "ssecvt")
4858 (set_attr "amdfam10_decode" "vector,double,*")
4859 (set_attr "athlon_decode" "direct,direct,*")
4860 (set_attr "bdver1_decode" "direct,direct,*")
4861 (set_attr "btver2_decode" "double,double,double")
4862 (set_attr "prefix" "orig,orig,<round_saeonly_prefix>")
4863 (set_attr "mode" "DF")])
4864
4865 (define_insn "<mask_codefor>avx512f_cvtpd2ps512<mask_name><round_name>"
4866 [(set (match_operand:V8SF 0 "register_operand" "=v")
4867 (float_truncate:V8SF
4868 (match_operand:V8DF 1 "<round_nimm_predicate>" "<round_constraint>")))]
4869 "TARGET_AVX512F"
4870 "vcvtpd2ps\t{<round_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_mask_op2>}"
4871 [(set_attr "type" "ssecvt")
4872 (set_attr "prefix" "evex")
4873 (set_attr "mode" "V8SF")])
4874
4875 (define_insn "avx_cvtpd2ps256<mask_name>"
4876 [(set (match_operand:V4SF 0 "register_operand" "=v")
4877 (float_truncate:V4SF
4878 (match_operand:V4DF 1 "nonimmediate_operand" "vm")))]
4879 "TARGET_AVX && <mask_avx512vl_condition>"
4880 "vcvtpd2ps{y}\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
4881 [(set_attr "type" "ssecvt")
4882 (set_attr "prefix" "maybe_evex")
4883 (set_attr "btver2_decode" "vector")
4884 (set_attr "mode" "V4SF")])
4885
4886 (define_expand "sse2_cvtpd2ps"
4887 [(set (match_operand:V4SF 0 "register_operand")
4888 (vec_concat:V4SF
4889 (float_truncate:V2SF
4890 (match_operand:V2DF 1 "nonimmediate_operand"))
4891 (match_dup 2)))]
4892 "TARGET_SSE2"
4893 "operands[2] = CONST0_RTX (V2SFmode);")
4894
4895 (define_expand "sse2_cvtpd2ps_mask"
4896 [(set (match_operand:V4SF 0 "register_operand")
4897 (vec_merge:V4SF
4898 (vec_concat:V4SF
4899 (float_truncate:V2SF
4900 (match_operand:V2DF 1 "nonimmediate_operand"))
4901 (match_dup 4))
4902 (match_operand:V4SF 2 "register_operand")
4903 (match_operand:QI 3 "register_operand")))]
4904 "TARGET_SSE2"
4905 "operands[4] = CONST0_RTX (V2SFmode);")
4906
4907 (define_insn "*sse2_cvtpd2ps<mask_name>"
4908 [(set (match_operand:V4SF 0 "register_operand" "=v")
4909 (vec_concat:V4SF
4910 (float_truncate:V2SF
4911 (match_operand:V2DF 1 "nonimmediate_operand" "vm"))
4912 (match_operand:V2SF 2 "const0_operand")))]
4913 "TARGET_SSE2 && <mask_avx512vl_condition>"
4914 {
4915 if (TARGET_AVX)
4916 return "vcvtpd2ps{x}\t{%1, %0<mask_operand3>|%0<mask_operand3>, %1}";
4917 else
4918 return "cvtpd2ps\t{%1, %0|%0, %1}";
4919 }
4920 [(set_attr "type" "ssecvt")
4921 (set_attr "amdfam10_decode" "double")
4922 (set_attr "athlon_decode" "vector")
4923 (set_attr "bdver1_decode" "double")
4924 (set_attr "prefix_data16" "1")
4925 (set_attr "prefix" "maybe_vex")
4926 (set_attr "mode" "V4SF")])
4927
4928 ;; For <sse2_avx_avx512f>_cvtps2pd<avxsizesuffix> insn pattern
4929 (define_mode_attr sf2dfmode
4930 [(V8DF "V8SF") (V4DF "V4SF")])
4931
4932 (define_insn "<sse2_avx_avx512f>_cvtps2pd<avxsizesuffix><mask_name><round_saeonly_name>"
4933 [(set (match_operand:VF2_512_256 0 "register_operand" "=v")
4934 (float_extend:VF2_512_256
4935 (match_operand:<sf2dfmode> 1 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")))]
4936 "TARGET_AVX && <mask_mode512bit_condition> && <round_saeonly_mode512bit_condition>"
4937 "vcvtps2pd\t{<round_saeonly_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_saeonly_mask_op2>}"
4938 [(set_attr "type" "ssecvt")
4939 (set_attr "prefix" "maybe_vex")
4940 (set_attr "mode" "<MODE>")])
4941
4942 (define_insn "*avx_cvtps2pd256_2"
4943 [(set (match_operand:V4DF 0 "register_operand" "=x")
4944 (float_extend:V4DF
4945 (vec_select:V4SF
4946 (match_operand:V8SF 1 "nonimmediate_operand" "xm")
4947 (parallel [(const_int 0) (const_int 1)
4948 (const_int 2) (const_int 3)]))))]
4949 "TARGET_AVX"
4950 "vcvtps2pd\t{%x1, %0|%0, %x1}"
4951 [(set_attr "type" "ssecvt")
4952 (set_attr "prefix" "vex")
4953 (set_attr "mode" "V4DF")])
4954
4955 (define_insn "vec_unpacks_lo_v16sf"
4956 [(set (match_operand:V8DF 0 "register_operand" "=v")
4957 (float_extend:V8DF
4958 (vec_select:V8SF
4959 (match_operand:V16SF 1 "nonimmediate_operand" "vm")
4960 (parallel [(const_int 0) (const_int 1)
4961 (const_int 2) (const_int 3)
4962 (const_int 4) (const_int 5)
4963 (const_int 6) (const_int 7)]))))]
4964 "TARGET_AVX512F"
4965 "vcvtps2pd\t{%t1, %0|%0, %t1}"
4966 [(set_attr "type" "ssecvt")
4967 (set_attr "prefix" "evex")
4968 (set_attr "mode" "V8DF")])
4969
4970 (define_insn "<avx512>_cvt<ssemodesuffix>2mask<mode>"
4971 [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=Yk")
4972 (unspec:<avx512fmaskmode>
4973 [(match_operand:VI12_AVX512VL 1 "register_operand" "v")]
4974 UNSPEC_CVTINT2MASK))]
4975 "TARGET_AVX512BW"
4976 "vpmov<ssemodesuffix>2m\t{%1, %0|%0, %1}"
4977 [(set_attr "prefix" "evex")
4978 (set_attr "mode" "<sseinsnmode>")])
4979
4980 (define_insn "<avx512>_cvt<ssemodesuffix>2mask<mode>"
4981 [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=Yk")
4982 (unspec:<avx512fmaskmode>
4983 [(match_operand:VI48_AVX512VL 1 "register_operand" "v")]
4984 UNSPEC_CVTINT2MASK))]
4985 "TARGET_AVX512DQ"
4986 "vpmov<ssemodesuffix>2m\t{%1, %0|%0, %1}"
4987 [(set_attr "prefix" "evex")
4988 (set_attr "mode" "<sseinsnmode>")])
4989
4990 (define_expand "<avx512>_cvtmask2<ssemodesuffix><mode>"
4991 [(set (match_operand:VI12_AVX512VL 0 "register_operand")
4992 (vec_merge:VI12_AVX512VL
4993 (match_dup 2)
4994 (match_dup 3)
4995 (match_operand:<avx512fmaskmode> 1 "register_operand")))]
4996 "TARGET_AVX512BW"
4997 {
4998 operands[2] = CONSTM1_RTX (<MODE>mode);
4999 operands[3] = CONST0_RTX (<MODE>mode);
5000 })
5001
5002 (define_insn "*<avx512>_cvtmask2<ssemodesuffix><mode>"
5003 [(set (match_operand:VI12_AVX512VL 0 "register_operand" "=v")
5004 (vec_merge:VI12_AVX512VL
5005 (match_operand:VI12_AVX512VL 2 "constm1_operand")
5006 (match_operand:VI12_AVX512VL 3 "const0_operand")
5007 (match_operand:<avx512fmaskmode> 1 "register_operand" "Yk")))]
5008 "TARGET_AVX512BW"
5009 "vpmovm2<ssemodesuffix>\t{%1, %0|%0, %1}"
5010 [(set_attr "prefix" "evex")
5011 (set_attr "mode" "<sseinsnmode>")])
5012
5013 (define_expand "<avx512>_cvtmask2<ssemodesuffix><mode>"
5014 [(set (match_operand:VI48_AVX512VL 0 "register_operand")
5015 (vec_merge:VI48_AVX512VL
5016 (match_dup 2)
5017 (match_dup 3)
5018 (match_operand:<avx512fmaskmode> 1 "register_operand")))]
5019 "TARGET_AVX512DQ"
5020 "{
5021 operands[2] = CONSTM1_RTX (<MODE>mode);
5022 operands[3] = CONST0_RTX (<MODE>mode);
5023 }")
5024
5025 (define_insn "*<avx512>_cvtmask2<ssemodesuffix><mode>"
5026 [(set (match_operand:VI48_AVX512VL 0 "register_operand" "=v")
5027 (vec_merge:VI48_AVX512VL
5028 (match_operand:VI48_AVX512VL 2 "constm1_operand")
5029 (match_operand:VI48_AVX512VL 3 "const0_operand")
5030 (match_operand:<avx512fmaskmode> 1 "register_operand" "Yk")))]
5031 "TARGET_AVX512DQ"
5032 "vpmovm2<ssemodesuffix>\t{%1, %0|%0, %1}"
5033 [(set_attr "prefix" "evex")
5034 (set_attr "mode" "<sseinsnmode>")])
5035
5036 (define_insn "sse2_cvtps2pd<mask_name>"
5037 [(set (match_operand:V2DF 0 "register_operand" "=v")
5038 (float_extend:V2DF
5039 (vec_select:V2SF
5040 (match_operand:V4SF 1 "nonimmediate_operand" "vm")
5041 (parallel [(const_int 0) (const_int 1)]))))]
5042 "TARGET_SSE2 && <mask_avx512vl_condition>"
5043 "%vcvtps2pd\t{%1, %0<mask_operand2>|%0<mask_operand2>, %q1}"
5044 [(set_attr "type" "ssecvt")
5045 (set_attr "amdfam10_decode" "direct")
5046 (set_attr "athlon_decode" "double")
5047 (set_attr "bdver1_decode" "double")
5048 (set_attr "prefix_data16" "0")
5049 (set_attr "prefix" "maybe_vex")
5050 (set_attr "mode" "V2DF")])
5051
5052 (define_expand "vec_unpacks_hi_v4sf"
5053 [(set (match_dup 2)
5054 (vec_select:V4SF
5055 (vec_concat:V8SF
5056 (match_dup 2)
5057 (match_operand:V4SF 1 "nonimmediate_operand"))
5058 (parallel [(const_int 6) (const_int 7)
5059 (const_int 2) (const_int 3)])))
5060 (set (match_operand:V2DF 0 "register_operand")
5061 (float_extend:V2DF
5062 (vec_select:V2SF
5063 (match_dup 2)
5064 (parallel [(const_int 0) (const_int 1)]))))]
5065 "TARGET_SSE2"
5066 "operands[2] = gen_reg_rtx (V4SFmode);")
5067
5068 (define_expand "vec_unpacks_hi_v8sf"
5069 [(set (match_dup 2)
5070 (vec_select:V4SF
5071 (match_operand:V8SF 1 "nonimmediate_operand")
5072 (parallel [(const_int 4) (const_int 5)
5073 (const_int 6) (const_int 7)])))
5074 (set (match_operand:V4DF 0 "register_operand")
5075 (float_extend:V4DF
5076 (match_dup 2)))]
5077 "TARGET_AVX"
5078 "operands[2] = gen_reg_rtx (V4SFmode);")
5079
5080 (define_expand "vec_unpacks_hi_v16sf"
5081 [(set (match_dup 2)
5082 (vec_select:V8SF
5083 (match_operand:V16SF 1 "nonimmediate_operand")
5084 (parallel [(const_int 8) (const_int 9)
5085 (const_int 10) (const_int 11)
5086 (const_int 12) (const_int 13)
5087 (const_int 14) (const_int 15)])))
5088 (set (match_operand:V8DF 0 "register_operand")
5089 (float_extend:V8DF
5090 (match_dup 2)))]
5091 "TARGET_AVX512F"
5092 "operands[2] = gen_reg_rtx (V8SFmode);")
5093
5094 (define_expand "vec_unpacks_lo_v4sf"
5095 [(set (match_operand:V2DF 0 "register_operand")
5096 (float_extend:V2DF
5097 (vec_select:V2SF
5098 (match_operand:V4SF 1 "nonimmediate_operand")
5099 (parallel [(const_int 0) (const_int 1)]))))]
5100 "TARGET_SSE2")
5101
5102 (define_expand "vec_unpacks_lo_v8sf"
5103 [(set (match_operand:V4DF 0 "register_operand")
5104 (float_extend:V4DF
5105 (vec_select:V4SF
5106 (match_operand:V8SF 1 "nonimmediate_operand")
5107 (parallel [(const_int 0) (const_int 1)
5108 (const_int 2) (const_int 3)]))))]
5109 "TARGET_AVX")
5110
5111 (define_mode_attr sseunpackfltmode
5112 [(V8HI "V4SF") (V4SI "V2DF") (V16HI "V8SF")
5113 (V8SI "V4DF") (V32HI "V16SF") (V16SI "V8DF")])
5114
5115 (define_expand "vec_unpacks_float_hi_<mode>"
5116 [(match_operand:<sseunpackfltmode> 0 "register_operand")
5117 (match_operand:VI2_AVX512F 1 "register_operand")]
5118 "TARGET_SSE2"
5119 {
5120 rtx tmp = gen_reg_rtx (<sseunpackmode>mode);
5121
5122 emit_insn (gen_vec_unpacks_hi_<mode> (tmp, operands[1]));
5123 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
5124 gen_rtx_FLOAT (<sseunpackfltmode>mode, tmp)));
5125 DONE;
5126 })
5127
5128 (define_expand "vec_unpacks_float_lo_<mode>"
5129 [(match_operand:<sseunpackfltmode> 0 "register_operand")
5130 (match_operand:VI2_AVX512F 1 "register_operand")]
5131 "TARGET_SSE2"
5132 {
5133 rtx tmp = gen_reg_rtx (<sseunpackmode>mode);
5134
5135 emit_insn (gen_vec_unpacks_lo_<mode> (tmp, operands[1]));
5136 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
5137 gen_rtx_FLOAT (<sseunpackfltmode>mode, tmp)));
5138 DONE;
5139 })
5140
5141 (define_expand "vec_unpacku_float_hi_<mode>"
5142 [(match_operand:<sseunpackfltmode> 0 "register_operand")
5143 (match_operand:VI2_AVX512F 1 "register_operand")]
5144 "TARGET_SSE2"
5145 {
5146 rtx tmp = gen_reg_rtx (<sseunpackmode>mode);
5147
5148 emit_insn (gen_vec_unpacku_hi_<mode> (tmp, operands[1]));
5149 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
5150 gen_rtx_FLOAT (<sseunpackfltmode>mode, tmp)));
5151 DONE;
5152 })
5153
5154 (define_expand "vec_unpacku_float_lo_<mode>"
5155 [(match_operand:<sseunpackfltmode> 0 "register_operand")
5156 (match_operand:VI2_AVX512F 1 "register_operand")]
5157 "TARGET_SSE2"
5158 {
5159 rtx tmp = gen_reg_rtx (<sseunpackmode>mode);
5160
5161 emit_insn (gen_vec_unpacku_lo_<mode> (tmp, operands[1]));
5162 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
5163 gen_rtx_FLOAT (<sseunpackfltmode>mode, tmp)));
5164 DONE;
5165 })
5166
5167 (define_expand "vec_unpacks_float_hi_v4si"
5168 [(set (match_dup 2)
5169 (vec_select:V4SI
5170 (match_operand:V4SI 1 "nonimmediate_operand")
5171 (parallel [(const_int 2) (const_int 3)
5172 (const_int 2) (const_int 3)])))
5173 (set (match_operand:V2DF 0 "register_operand")
5174 (float:V2DF
5175 (vec_select:V2SI
5176 (match_dup 2)
5177 (parallel [(const_int 0) (const_int 1)]))))]
5178 "TARGET_SSE2"
5179 "operands[2] = gen_reg_rtx (V4SImode);")
5180
5181 (define_expand "vec_unpacks_float_lo_v4si"
5182 [(set (match_operand:V2DF 0 "register_operand")
5183 (float:V2DF
5184 (vec_select:V2SI
5185 (match_operand:V4SI 1 "nonimmediate_operand")
5186 (parallel [(const_int 0) (const_int 1)]))))]
5187 "TARGET_SSE2")
5188
5189 (define_expand "vec_unpacks_float_hi_v8si"
5190 [(set (match_dup 2)
5191 (vec_select:V4SI
5192 (match_operand:V8SI 1 "nonimmediate_operand")
5193 (parallel [(const_int 4) (const_int 5)
5194 (const_int 6) (const_int 7)])))
5195 (set (match_operand:V4DF 0 "register_operand")
5196 (float:V4DF
5197 (match_dup 2)))]
5198 "TARGET_AVX"
5199 "operands[2] = gen_reg_rtx (V4SImode);")
5200
5201 (define_expand "vec_unpacks_float_lo_v8si"
5202 [(set (match_operand:V4DF 0 "register_operand")
5203 (float:V4DF
5204 (vec_select:V4SI
5205 (match_operand:V8SI 1 "nonimmediate_operand")
5206 (parallel [(const_int 0) (const_int 1)
5207 (const_int 2) (const_int 3)]))))]
5208 "TARGET_AVX")
5209
5210 (define_expand "vec_unpacks_float_hi_v16si"
5211 [(set (match_dup 2)
5212 (vec_select:V8SI
5213 (match_operand:V16SI 1 "nonimmediate_operand")
5214 (parallel [(const_int 8) (const_int 9)
5215 (const_int 10) (const_int 11)
5216 (const_int 12) (const_int 13)
5217 (const_int 14) (const_int 15)])))
5218 (set (match_operand:V8DF 0 "register_operand")
5219 (float:V8DF
5220 (match_dup 2)))]
5221 "TARGET_AVX512F"
5222 "operands[2] = gen_reg_rtx (V8SImode);")
5223
5224 (define_expand "vec_unpacks_float_lo_v16si"
5225 [(set (match_operand:V8DF 0 "register_operand")
5226 (float:V8DF
5227 (vec_select:V8SI
5228 (match_operand:V16SI 1 "nonimmediate_operand")
5229 (parallel [(const_int 0) (const_int 1)
5230 (const_int 2) (const_int 3)
5231 (const_int 4) (const_int 5)
5232 (const_int 6) (const_int 7)]))))]
5233 "TARGET_AVX512F")
5234
5235 (define_expand "vec_unpacku_float_hi_v4si"
5236 [(set (match_dup 5)
5237 (vec_select:V4SI
5238 (match_operand:V4SI 1 "nonimmediate_operand")
5239 (parallel [(const_int 2) (const_int 3)
5240 (const_int 2) (const_int 3)])))
5241 (set (match_dup 6)
5242 (float:V2DF
5243 (vec_select:V2SI
5244 (match_dup 5)
5245 (parallel [(const_int 0) (const_int 1)]))))
5246 (set (match_dup 7)
5247 (lt:V2DF (match_dup 6) (match_dup 3)))
5248 (set (match_dup 8)
5249 (and:V2DF (match_dup 7) (match_dup 4)))
5250 (set (match_operand:V2DF 0 "register_operand")
5251 (plus:V2DF (match_dup 6) (match_dup 8)))]
5252 "TARGET_SSE2"
5253 {
5254 REAL_VALUE_TYPE TWO32r;
5255 rtx x;
5256 int i;
5257
5258 real_ldexp (&TWO32r, &dconst1, 32);
5259 x = const_double_from_real_value (TWO32r, DFmode);
5260
5261 operands[3] = force_reg (V2DFmode, CONST0_RTX (V2DFmode));
5262 operands[4] = force_reg (V2DFmode,
5263 ix86_build_const_vector (V2DFmode, 1, x));
5264
5265 operands[5] = gen_reg_rtx (V4SImode);
5266
5267 for (i = 6; i < 9; i++)
5268 operands[i] = gen_reg_rtx (V2DFmode);
5269 })
5270
5271 (define_expand "vec_unpacku_float_lo_v4si"
5272 [(set (match_dup 5)
5273 (float:V2DF
5274 (vec_select:V2SI
5275 (match_operand:V4SI 1 "nonimmediate_operand")
5276 (parallel [(const_int 0) (const_int 1)]))))
5277 (set (match_dup 6)
5278 (lt:V2DF (match_dup 5) (match_dup 3)))
5279 (set (match_dup 7)
5280 (and:V2DF (match_dup 6) (match_dup 4)))
5281 (set (match_operand:V2DF 0 "register_operand")
5282 (plus:V2DF (match_dup 5) (match_dup 7)))]
5283 "TARGET_SSE2"
5284 {
5285 REAL_VALUE_TYPE TWO32r;
5286 rtx x;
5287 int i;
5288
5289 real_ldexp (&TWO32r, &dconst1, 32);
5290 x = const_double_from_real_value (TWO32r, DFmode);
5291
5292 operands[3] = force_reg (V2DFmode, CONST0_RTX (V2DFmode));
5293 operands[4] = force_reg (V2DFmode,
5294 ix86_build_const_vector (V2DFmode, 1, x));
5295
5296 for (i = 5; i < 8; i++)
5297 operands[i] = gen_reg_rtx (V2DFmode);
5298 })
5299
5300 (define_expand "vec_unpacku_float_hi_v8si"
5301 [(match_operand:V4DF 0 "register_operand")
5302 (match_operand:V8SI 1 "register_operand")]
5303 "TARGET_AVX"
5304 {
5305 REAL_VALUE_TYPE TWO32r;
5306 rtx x, tmp[6];
5307 int i;
5308
5309 real_ldexp (&TWO32r, &dconst1, 32);
5310 x = const_double_from_real_value (TWO32r, DFmode);
5311
5312 tmp[0] = force_reg (V4DFmode, CONST0_RTX (V4DFmode));
5313 tmp[1] = force_reg (V4DFmode, ix86_build_const_vector (V4DFmode, 1, x));
5314 tmp[5] = gen_reg_rtx (V4SImode);
5315
5316 for (i = 2; i < 5; i++)
5317 tmp[i] = gen_reg_rtx (V4DFmode);
5318 emit_insn (gen_vec_extract_hi_v8si (tmp[5], operands[1]));
5319 emit_insn (gen_floatv4siv4df2 (tmp[2], tmp[5]));
5320 emit_insn (gen_rtx_SET (VOIDmode, tmp[3],
5321 gen_rtx_LT (V4DFmode, tmp[2], tmp[0])));
5322 emit_insn (gen_andv4df3 (tmp[4], tmp[3], tmp[1]));
5323 emit_insn (gen_addv4df3 (operands[0], tmp[2], tmp[4]));
5324 DONE;
5325 })
5326
5327 (define_expand "vec_unpacku_float_hi_v16si"
5328 [(match_operand:V8DF 0 "register_operand")
5329 (match_operand:V16SI 1 "register_operand")]
5330 "TARGET_AVX512F"
5331 {
5332 REAL_VALUE_TYPE TWO32r;
5333 rtx k, x, tmp[4];
5334
5335 real_ldexp (&TWO32r, &dconst1, 32);
5336 x = const_double_from_real_value (TWO32r, DFmode);
5337
5338 tmp[0] = force_reg (V8DFmode, CONST0_RTX (V8DFmode));
5339 tmp[1] = force_reg (V8DFmode, ix86_build_const_vector (V8DFmode, 1, x));
5340 tmp[2] = gen_reg_rtx (V8DFmode);
5341 tmp[3] = gen_reg_rtx (V8SImode);
5342 k = gen_reg_rtx (QImode);
5343
5344 emit_insn (gen_vec_extract_hi_v16si (tmp[3], operands[1]));
5345 emit_insn (gen_floatv8siv8df2 (tmp[2], tmp[3]));
5346 emit_insn (gen_rtx_SET (VOIDmode, k,
5347 gen_rtx_LT (QImode, tmp[2], tmp[0])));
5348 emit_insn (gen_addv8df3_mask (tmp[2], tmp[2], tmp[1], tmp[2], k));
5349 emit_move_insn (operands[0], tmp[2]);
5350 DONE;
5351 })
5352
5353 (define_expand "vec_unpacku_float_lo_v8si"
5354 [(match_operand:V4DF 0 "register_operand")
5355 (match_operand:V8SI 1 "nonimmediate_operand")]
5356 "TARGET_AVX"
5357 {
5358 REAL_VALUE_TYPE TWO32r;
5359 rtx x, tmp[5];
5360 int i;
5361
5362 real_ldexp (&TWO32r, &dconst1, 32);
5363 x = const_double_from_real_value (TWO32r, DFmode);
5364
5365 tmp[0] = force_reg (V4DFmode, CONST0_RTX (V4DFmode));
5366 tmp[1] = force_reg (V4DFmode, ix86_build_const_vector (V4DFmode, 1, x));
5367
5368 for (i = 2; i < 5; i++)
5369 tmp[i] = gen_reg_rtx (V4DFmode);
5370 emit_insn (gen_avx_cvtdq2pd256_2 (tmp[2], operands[1]));
5371 emit_insn (gen_rtx_SET (VOIDmode, tmp[3],
5372 gen_rtx_LT (V4DFmode, tmp[2], tmp[0])));
5373 emit_insn (gen_andv4df3 (tmp[4], tmp[3], tmp[1]));
5374 emit_insn (gen_addv4df3 (operands[0], tmp[2], tmp[4]));
5375 DONE;
5376 })
5377
5378 (define_expand "vec_unpacku_float_lo_v16si"
5379 [(match_operand:V8DF 0 "register_operand")
5380 (match_operand:V16SI 1 "nonimmediate_operand")]
5381 "TARGET_AVX512F"
5382 {
5383 REAL_VALUE_TYPE TWO32r;
5384 rtx k, x, tmp[3];
5385
5386 real_ldexp (&TWO32r, &dconst1, 32);
5387 x = const_double_from_real_value (TWO32r, DFmode);
5388
5389 tmp[0] = force_reg (V8DFmode, CONST0_RTX (V8DFmode));
5390 tmp[1] = force_reg (V8DFmode, ix86_build_const_vector (V8DFmode, 1, x));
5391 tmp[2] = gen_reg_rtx (V8DFmode);
5392 k = gen_reg_rtx (QImode);
5393
5394 emit_insn (gen_avx512f_cvtdq2pd512_2 (tmp[2], operands[1]));
5395 emit_insn (gen_rtx_SET (VOIDmode, k,
5396 gen_rtx_LT (QImode, tmp[2], tmp[0])));
5397 emit_insn (gen_addv8df3_mask (tmp[2], tmp[2], tmp[1], tmp[2], k));
5398 emit_move_insn (operands[0], tmp[2]);
5399 DONE;
5400 })
5401
5402 (define_expand "vec_pack_trunc_<mode>"
5403 [(set (match_dup 3)
5404 (float_truncate:<sf2dfmode>
5405 (match_operand:VF2_512_256 1 "nonimmediate_operand")))
5406 (set (match_dup 4)
5407 (float_truncate:<sf2dfmode>
5408 (match_operand:VF2_512_256 2 "nonimmediate_operand")))
5409 (set (match_operand:<ssePSmode> 0 "register_operand")
5410 (vec_concat:<ssePSmode>
5411 (match_dup 3)
5412 (match_dup 4)))]
5413 "TARGET_AVX"
5414 {
5415 operands[3] = gen_reg_rtx (<sf2dfmode>mode);
5416 operands[4] = gen_reg_rtx (<sf2dfmode>mode);
5417 })
5418
5419 (define_expand "vec_pack_trunc_v2df"
5420 [(match_operand:V4SF 0 "register_operand")
5421 (match_operand:V2DF 1 "nonimmediate_operand")
5422 (match_operand:V2DF 2 "nonimmediate_operand")]
5423 "TARGET_SSE2"
5424 {
5425 rtx tmp0, tmp1;
5426
5427 if (TARGET_AVX && !TARGET_PREFER_AVX128)
5428 {
5429 tmp0 = gen_reg_rtx (V4DFmode);
5430 tmp1 = force_reg (V2DFmode, operands[1]);
5431
5432 emit_insn (gen_avx_vec_concatv4df (tmp0, tmp1, operands[2]));
5433 emit_insn (gen_avx_cvtpd2ps256 (operands[0], tmp0));
5434 }
5435 else
5436 {
5437 tmp0 = gen_reg_rtx (V4SFmode);
5438 tmp1 = gen_reg_rtx (V4SFmode);
5439
5440 emit_insn (gen_sse2_cvtpd2ps (tmp0, operands[1]));
5441 emit_insn (gen_sse2_cvtpd2ps (tmp1, operands[2]));
5442 emit_insn (gen_sse_movlhps (operands[0], tmp0, tmp1));
5443 }
5444 DONE;
5445 })
5446
5447 (define_expand "vec_pack_sfix_trunc_v8df"
5448 [(match_operand:V16SI 0 "register_operand")
5449 (match_operand:V8DF 1 "nonimmediate_operand")
5450 (match_operand:V8DF 2 "nonimmediate_operand")]
5451 "TARGET_AVX512F"
5452 {
5453 rtx r1, r2;
5454
5455 r1 = gen_reg_rtx (V8SImode);
5456 r2 = gen_reg_rtx (V8SImode);
5457
5458 emit_insn (gen_fix_truncv8dfv8si2 (r1, operands[1]));
5459 emit_insn (gen_fix_truncv8dfv8si2 (r2, operands[2]));
5460 emit_insn (gen_avx_vec_concatv16si (operands[0], r1, r2));
5461 DONE;
5462 })
5463
5464 (define_expand "vec_pack_sfix_trunc_v4df"
5465 [(match_operand:V8SI 0 "register_operand")
5466 (match_operand:V4DF 1 "nonimmediate_operand")
5467 (match_operand:V4DF 2 "nonimmediate_operand")]
5468 "TARGET_AVX"
5469 {
5470 rtx r1, r2;
5471
5472 r1 = gen_reg_rtx (V4SImode);
5473 r2 = gen_reg_rtx (V4SImode);
5474
5475 emit_insn (gen_fix_truncv4dfv4si2 (r1, operands[1]));
5476 emit_insn (gen_fix_truncv4dfv4si2 (r2, operands[2]));
5477 emit_insn (gen_avx_vec_concatv8si (operands[0], r1, r2));
5478 DONE;
5479 })
5480
5481 (define_expand "vec_pack_sfix_trunc_v2df"
5482 [(match_operand:V4SI 0 "register_operand")
5483 (match_operand:V2DF 1 "nonimmediate_operand")
5484 (match_operand:V2DF 2 "nonimmediate_operand")]
5485 "TARGET_SSE2"
5486 {
5487 rtx tmp0, tmp1, tmp2;
5488
5489 if (TARGET_AVX && !TARGET_PREFER_AVX128)
5490 {
5491 tmp0 = gen_reg_rtx (V4DFmode);
5492 tmp1 = force_reg (V2DFmode, operands[1]);
5493
5494 emit_insn (gen_avx_vec_concatv4df (tmp0, tmp1, operands[2]));
5495 emit_insn (gen_fix_truncv4dfv4si2 (operands[0], tmp0));
5496 }
5497 else
5498 {
5499 tmp0 = gen_reg_rtx (V4SImode);
5500 tmp1 = gen_reg_rtx (V4SImode);
5501 tmp2 = gen_reg_rtx (V2DImode);
5502
5503 emit_insn (gen_sse2_cvttpd2dq (tmp0, operands[1]));
5504 emit_insn (gen_sse2_cvttpd2dq (tmp1, operands[2]));
5505 emit_insn (gen_vec_interleave_lowv2di (tmp2,
5506 gen_lowpart (V2DImode, tmp0),
5507 gen_lowpart (V2DImode, tmp1)));
5508 emit_move_insn (operands[0], gen_lowpart (V4SImode, tmp2));
5509 }
5510 DONE;
5511 })
5512
5513 (define_mode_attr ssepackfltmode
5514 [(V8DF "V16SI") (V4DF "V8SI") (V2DF "V4SI")])
5515
5516 (define_expand "vec_pack_ufix_trunc_<mode>"
5517 [(match_operand:<ssepackfltmode> 0 "register_operand")
5518 (match_operand:VF2 1 "register_operand")
5519 (match_operand:VF2 2 "register_operand")]
5520 "TARGET_SSE2"
5521 {
5522 if (<MODE>mode == V8DFmode)
5523 {
5524 rtx r1, r2;
5525
5526 r1 = gen_reg_rtx (V8SImode);
5527 r2 = gen_reg_rtx (V8SImode);
5528
5529 emit_insn (gen_ufix_truncv8dfv8si2 (r1, operands[1]));
5530 emit_insn (gen_ufix_truncv8dfv8si2 (r2, operands[2]));
5531 emit_insn (gen_avx_vec_concatv16si (operands[0], r1, r2));
5532 }
5533 else
5534 {
5535 rtx tmp[7];
5536 tmp[0] = ix86_expand_adjust_ufix_to_sfix_si (operands[1], &tmp[2]);
5537 tmp[1] = ix86_expand_adjust_ufix_to_sfix_si (operands[2], &tmp[3]);
5538 tmp[4] = gen_reg_rtx (<ssepackfltmode>mode);
5539 emit_insn (gen_vec_pack_sfix_trunc_<mode> (tmp[4], tmp[0], tmp[1]));
5540 if (<ssepackfltmode>mode == V4SImode || TARGET_AVX2)
5541 {
5542 tmp[5] = gen_reg_rtx (<ssepackfltmode>mode);
5543 ix86_expand_vec_extract_even_odd (tmp[5], tmp[2], tmp[3], 0);
5544 }
5545 else
5546 {
5547 tmp[5] = gen_reg_rtx (V8SFmode);
5548 ix86_expand_vec_extract_even_odd (tmp[5], gen_lowpart (V8SFmode, tmp[2]),
5549 gen_lowpart (V8SFmode, tmp[3]), 0);
5550 tmp[5] = gen_lowpart (V8SImode, tmp[5]);
5551 }
5552 tmp[6] = expand_simple_binop (<ssepackfltmode>mode, XOR, tmp[4], tmp[5],
5553 operands[0], 0, OPTAB_DIRECT);
5554 if (tmp[6] != operands[0])
5555 emit_move_insn (operands[0], tmp[6]);
5556 }
5557
5558 DONE;
5559 })
5560
5561 (define_expand "vec_pack_sfix_v4df"
5562 [(match_operand:V8SI 0 "register_operand")
5563 (match_operand:V4DF 1 "nonimmediate_operand")
5564 (match_operand:V4DF 2 "nonimmediate_operand")]
5565 "TARGET_AVX"
5566 {
5567 rtx r1, r2;
5568
5569 r1 = gen_reg_rtx (V4SImode);
5570 r2 = gen_reg_rtx (V4SImode);
5571
5572 emit_insn (gen_avx_cvtpd2dq256 (r1, operands[1]));
5573 emit_insn (gen_avx_cvtpd2dq256 (r2, operands[2]));
5574 emit_insn (gen_avx_vec_concatv8si (operands[0], r1, r2));
5575 DONE;
5576 })
5577
5578 (define_expand "vec_pack_sfix_v2df"
5579 [(match_operand:V4SI 0 "register_operand")
5580 (match_operand:V2DF 1 "nonimmediate_operand")
5581 (match_operand:V2DF 2 "nonimmediate_operand")]
5582 "TARGET_SSE2"
5583 {
5584 rtx tmp0, tmp1, tmp2;
5585
5586 if (TARGET_AVX && !TARGET_PREFER_AVX128)
5587 {
5588 tmp0 = gen_reg_rtx (V4DFmode);
5589 tmp1 = force_reg (V2DFmode, operands[1]);
5590
5591 emit_insn (gen_avx_vec_concatv4df (tmp0, tmp1, operands[2]));
5592 emit_insn (gen_avx_cvtpd2dq256 (operands[0], tmp0));
5593 }
5594 else
5595 {
5596 tmp0 = gen_reg_rtx (V4SImode);
5597 tmp1 = gen_reg_rtx (V4SImode);
5598 tmp2 = gen_reg_rtx (V2DImode);
5599
5600 emit_insn (gen_sse2_cvtpd2dq (tmp0, operands[1]));
5601 emit_insn (gen_sse2_cvtpd2dq (tmp1, operands[2]));
5602 emit_insn (gen_vec_interleave_lowv2di (tmp2,
5603 gen_lowpart (V2DImode, tmp0),
5604 gen_lowpart (V2DImode, tmp1)));
5605 emit_move_insn (operands[0], gen_lowpart (V4SImode, tmp2));
5606 }
5607 DONE;
5608 })
5609
5610 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
5611 ;;
5612 ;; Parallel single-precision floating point element swizzling
5613 ;;
5614 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
5615
5616 (define_expand "sse_movhlps_exp"
5617 [(set (match_operand:V4SF 0 "nonimmediate_operand")
5618 (vec_select:V4SF
5619 (vec_concat:V8SF
5620 (match_operand:V4SF 1 "nonimmediate_operand")
5621 (match_operand:V4SF 2 "nonimmediate_operand"))
5622 (parallel [(const_int 6)
5623 (const_int 7)
5624 (const_int 2)
5625 (const_int 3)])))]
5626 "TARGET_SSE"
5627 {
5628 rtx dst = ix86_fixup_binary_operands (UNKNOWN, V4SFmode, operands);
5629
5630 emit_insn (gen_sse_movhlps (dst, operands[1], operands[2]));
5631
5632 /* Fix up the destination if needed. */
5633 if (dst != operands[0])
5634 emit_move_insn (operands[0], dst);
5635
5636 DONE;
5637 })
5638
5639 (define_insn "sse_movhlps"
5640 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,x,x,m")
5641 (vec_select:V4SF
5642 (vec_concat:V8SF
5643 (match_operand:V4SF 1 "nonimmediate_operand" " 0,x,0,x,0")
5644 (match_operand:V4SF 2 "nonimmediate_operand" " x,x,o,o,x"))
5645 (parallel [(const_int 6)
5646 (const_int 7)
5647 (const_int 2)
5648 (const_int 3)])))]
5649 "TARGET_SSE && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
5650 "@
5651 movhlps\t{%2, %0|%0, %2}
5652 vmovhlps\t{%2, %1, %0|%0, %1, %2}
5653 movlps\t{%H2, %0|%0, %H2}
5654 vmovlps\t{%H2, %1, %0|%0, %1, %H2}
5655 %vmovhps\t{%2, %0|%q0, %2}"
5656 [(set_attr "isa" "noavx,avx,noavx,avx,*")
5657 (set_attr "type" "ssemov")
5658 (set_attr "ssememalign" "64")
5659 (set_attr "prefix" "orig,vex,orig,vex,maybe_vex")
5660 (set_attr "mode" "V4SF,V4SF,V2SF,V2SF,V2SF")])
5661
5662 (define_expand "sse_movlhps_exp"
5663 [(set (match_operand:V4SF 0 "nonimmediate_operand")
5664 (vec_select:V4SF
5665 (vec_concat:V8SF
5666 (match_operand:V4SF 1 "nonimmediate_operand")
5667 (match_operand:V4SF 2 "nonimmediate_operand"))
5668 (parallel [(const_int 0)
5669 (const_int 1)
5670 (const_int 4)
5671 (const_int 5)])))]
5672 "TARGET_SSE"
5673 {
5674 rtx dst = ix86_fixup_binary_operands (UNKNOWN, V4SFmode, operands);
5675
5676 emit_insn (gen_sse_movlhps (dst, operands[1], operands[2]));
5677
5678 /* Fix up the destination if needed. */
5679 if (dst != operands[0])
5680 emit_move_insn (operands[0], dst);
5681
5682 DONE;
5683 })
5684
5685 (define_insn "sse_movlhps"
5686 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,x,x,o")
5687 (vec_select:V4SF
5688 (vec_concat:V8SF
5689 (match_operand:V4SF 1 "nonimmediate_operand" " 0,x,0,x,0")
5690 (match_operand:V4SF 2 "nonimmediate_operand" " x,x,m,m,x"))
5691 (parallel [(const_int 0)
5692 (const_int 1)
5693 (const_int 4)
5694 (const_int 5)])))]
5695 "TARGET_SSE && ix86_binary_operator_ok (UNKNOWN, V4SFmode, operands)"
5696 "@
5697 movlhps\t{%2, %0|%0, %2}
5698 vmovlhps\t{%2, %1, %0|%0, %1, %2}
5699 movhps\t{%2, %0|%0, %q2}
5700 vmovhps\t{%2, %1, %0|%0, %1, %q2}
5701 %vmovlps\t{%2, %H0|%H0, %2}"
5702 [(set_attr "isa" "noavx,avx,noavx,avx,*")
5703 (set_attr "type" "ssemov")
5704 (set_attr "ssememalign" "64")
5705 (set_attr "prefix" "orig,vex,orig,vex,maybe_vex")
5706 (set_attr "mode" "V4SF,V4SF,V2SF,V2SF,V2SF")])
5707
5708 (define_insn "<mask_codefor>avx512f_unpckhps512<mask_name>"
5709 [(set (match_operand:V16SF 0 "register_operand" "=v")
5710 (vec_select:V16SF
5711 (vec_concat:V32SF
5712 (match_operand:V16SF 1 "register_operand" "v")
5713 (match_operand:V16SF 2 "nonimmediate_operand" "vm"))
5714 (parallel [(const_int 2) (const_int 18)
5715 (const_int 3) (const_int 19)
5716 (const_int 6) (const_int 22)
5717 (const_int 7) (const_int 23)
5718 (const_int 10) (const_int 26)
5719 (const_int 11) (const_int 27)
5720 (const_int 14) (const_int 30)
5721 (const_int 15) (const_int 31)])))]
5722 "TARGET_AVX512F"
5723 "vunpckhps\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
5724 [(set_attr "type" "sselog")
5725 (set_attr "prefix" "evex")
5726 (set_attr "mode" "V16SF")])
5727
5728 ;; Recall that the 256-bit unpck insns only shuffle within their lanes.
5729 (define_insn "avx_unpckhps256<mask_name>"
5730 [(set (match_operand:V8SF 0 "register_operand" "=v")
5731 (vec_select:V8SF
5732 (vec_concat:V16SF
5733 (match_operand:V8SF 1 "register_operand" "v")
5734 (match_operand:V8SF 2 "nonimmediate_operand" "vm"))
5735 (parallel [(const_int 2) (const_int 10)
5736 (const_int 3) (const_int 11)
5737 (const_int 6) (const_int 14)
5738 (const_int 7) (const_int 15)])))]
5739 "TARGET_AVX && <mask_avx512vl_condition>"
5740 "vunpckhps\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
5741 [(set_attr "type" "sselog")
5742 (set_attr "prefix" "vex")
5743 (set_attr "mode" "V8SF")])
5744
5745 (define_expand "vec_interleave_highv8sf"
5746 [(set (match_dup 3)
5747 (vec_select:V8SF
5748 (vec_concat:V16SF
5749 (match_operand:V8SF 1 "register_operand" "x")
5750 (match_operand:V8SF 2 "nonimmediate_operand" "xm"))
5751 (parallel [(const_int 0) (const_int 8)
5752 (const_int 1) (const_int 9)
5753 (const_int 4) (const_int 12)
5754 (const_int 5) (const_int 13)])))
5755 (set (match_dup 4)
5756 (vec_select:V8SF
5757 (vec_concat:V16SF
5758 (match_dup 1)
5759 (match_dup 2))
5760 (parallel [(const_int 2) (const_int 10)
5761 (const_int 3) (const_int 11)
5762 (const_int 6) (const_int 14)
5763 (const_int 7) (const_int 15)])))
5764 (set (match_operand:V8SF 0 "register_operand")
5765 (vec_select:V8SF
5766 (vec_concat:V16SF
5767 (match_dup 3)
5768 (match_dup 4))
5769 (parallel [(const_int 4) (const_int 5)
5770 (const_int 6) (const_int 7)
5771 (const_int 12) (const_int 13)
5772 (const_int 14) (const_int 15)])))]
5773 "TARGET_AVX"
5774 {
5775 operands[3] = gen_reg_rtx (V8SFmode);
5776 operands[4] = gen_reg_rtx (V8SFmode);
5777 })
5778
5779 (define_insn "vec_interleave_highv4sf<mask_name>"
5780 [(set (match_operand:V4SF 0 "register_operand" "=x,v")
5781 (vec_select:V4SF
5782 (vec_concat:V8SF
5783 (match_operand:V4SF 1 "register_operand" "0,v")
5784 (match_operand:V4SF 2 "nonimmediate_operand" "xm,vm"))
5785 (parallel [(const_int 2) (const_int 6)
5786 (const_int 3) (const_int 7)])))]
5787 "TARGET_SSE && <mask_avx512vl_condition>"
5788 "@
5789 unpckhps\t{%2, %0|%0, %2}
5790 vunpckhps\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
5791 [(set_attr "isa" "noavx,avx")
5792 (set_attr "type" "sselog")
5793 (set_attr "prefix" "orig,vex")
5794 (set_attr "mode" "V4SF")])
5795
5796 (define_insn "<mask_codefor>avx512f_unpcklps512<mask_name>"
5797 [(set (match_operand:V16SF 0 "register_operand" "=v")
5798 (vec_select:V16SF
5799 (vec_concat:V32SF
5800 (match_operand:V16SF 1 "register_operand" "v")
5801 (match_operand:V16SF 2 "nonimmediate_operand" "vm"))
5802 (parallel [(const_int 0) (const_int 16)
5803 (const_int 1) (const_int 17)
5804 (const_int 4) (const_int 20)
5805 (const_int 5) (const_int 21)
5806 (const_int 8) (const_int 24)
5807 (const_int 9) (const_int 25)
5808 (const_int 12) (const_int 28)
5809 (const_int 13) (const_int 29)])))]
5810 "TARGET_AVX512F"
5811 "vunpcklps\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
5812 [(set_attr "type" "sselog")
5813 (set_attr "prefix" "evex")
5814 (set_attr "mode" "V16SF")])
5815
5816 ;; Recall that the 256-bit unpck insns only shuffle within their lanes.
5817 (define_insn "avx_unpcklps256<mask_name>"
5818 [(set (match_operand:V8SF 0 "register_operand" "=v")
5819 (vec_select:V8SF
5820 (vec_concat:V16SF
5821 (match_operand:V8SF 1 "register_operand" "v")
5822 (match_operand:V8SF 2 "nonimmediate_operand" "vm"))
5823 (parallel [(const_int 0) (const_int 8)
5824 (const_int 1) (const_int 9)
5825 (const_int 4) (const_int 12)
5826 (const_int 5) (const_int 13)])))]
5827 "TARGET_AVX && <mask_avx512vl_condition>"
5828 "vunpcklps\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
5829 [(set_attr "type" "sselog")
5830 (set_attr "prefix" "vex")
5831 (set_attr "mode" "V8SF")])
5832
5833 (define_insn "unpcklps128_mask"
5834 [(set (match_operand:V4SF 0 "register_operand" "=v")
5835 (vec_merge:V4SF
5836 (vec_select:V4SF
5837 (vec_concat:V8SF
5838 (match_operand:V4SF 1 "register_operand" "v")
5839 (match_operand:V4SF 2 "nonimmediate_operand" "vm"))
5840 (parallel [(const_int 0) (const_int 4)
5841 (const_int 1) (const_int 5)]))
5842 (match_operand:V4SF 3 "vector_move_operand" "0C")
5843 (match_operand:QI 4 "register_operand" "Yk")))]
5844 "TARGET_AVX512VL"
5845 "vunpcklps\t{%2, %1, %0%{%4%}%N3|%0%{%4%}%N3, %1, %2}"
5846 [(set_attr "type" "sselog")
5847 (set_attr "prefix" "evex")
5848 (set_attr "mode" "V4SF")])
5849
5850 (define_expand "vec_interleave_lowv8sf"
5851 [(set (match_dup 3)
5852 (vec_select:V8SF
5853 (vec_concat:V16SF
5854 (match_operand:V8SF 1 "register_operand" "x")
5855 (match_operand:V8SF 2 "nonimmediate_operand" "xm"))
5856 (parallel [(const_int 0) (const_int 8)
5857 (const_int 1) (const_int 9)
5858 (const_int 4) (const_int 12)
5859 (const_int 5) (const_int 13)])))
5860 (set (match_dup 4)
5861 (vec_select:V8SF
5862 (vec_concat:V16SF
5863 (match_dup 1)
5864 (match_dup 2))
5865 (parallel [(const_int 2) (const_int 10)
5866 (const_int 3) (const_int 11)
5867 (const_int 6) (const_int 14)
5868 (const_int 7) (const_int 15)])))
5869 (set (match_operand:V8SF 0 "register_operand")
5870 (vec_select:V8SF
5871 (vec_concat:V16SF
5872 (match_dup 3)
5873 (match_dup 4))
5874 (parallel [(const_int 0) (const_int 1)
5875 (const_int 2) (const_int 3)
5876 (const_int 8) (const_int 9)
5877 (const_int 10) (const_int 11)])))]
5878 "TARGET_AVX"
5879 {
5880 operands[3] = gen_reg_rtx (V8SFmode);
5881 operands[4] = gen_reg_rtx (V8SFmode);
5882 })
5883
5884 (define_insn "vec_interleave_lowv4sf"
5885 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
5886 (vec_select:V4SF
5887 (vec_concat:V8SF
5888 (match_operand:V4SF 1 "register_operand" "0,x")
5889 (match_operand:V4SF 2 "nonimmediate_operand" "xm,xm"))
5890 (parallel [(const_int 0) (const_int 4)
5891 (const_int 1) (const_int 5)])))]
5892 "TARGET_SSE"
5893 "@
5894 unpcklps\t{%2, %0|%0, %2}
5895 vunpcklps\t{%2, %1, %0|%0, %1, %2}"
5896 [(set_attr "isa" "noavx,avx")
5897 (set_attr "type" "sselog")
5898 (set_attr "prefix" "orig,vex")
5899 (set_attr "mode" "V4SF")])
5900
5901 ;; These are modeled with the same vec_concat as the others so that we
5902 ;; capture users of shufps that can use the new instructions
5903 (define_insn "avx_movshdup256<mask_name>"
5904 [(set (match_operand:V8SF 0 "register_operand" "=v")
5905 (vec_select:V8SF
5906 (vec_concat:V16SF
5907 (match_operand:V8SF 1 "nonimmediate_operand" "vm")
5908 (match_dup 1))
5909 (parallel [(const_int 1) (const_int 1)
5910 (const_int 3) (const_int 3)
5911 (const_int 5) (const_int 5)
5912 (const_int 7) (const_int 7)])))]
5913 "TARGET_AVX && <mask_avx512vl_condition>"
5914 "vmovshdup\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
5915 [(set_attr "type" "sse")
5916 (set_attr "prefix" "vex")
5917 (set_attr "mode" "V8SF")])
5918
5919 (define_insn "sse3_movshdup<mask_name>"
5920 [(set (match_operand:V4SF 0 "register_operand" "=v")
5921 (vec_select:V4SF
5922 (vec_concat:V8SF
5923 (match_operand:V4SF 1 "nonimmediate_operand" "vm")
5924 (match_dup 1))
5925 (parallel [(const_int 1)
5926 (const_int 1)
5927 (const_int 7)
5928 (const_int 7)])))]
5929 "TARGET_SSE3 && <mask_avx512vl_condition>"
5930 "%vmovshdup\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
5931 [(set_attr "type" "sse")
5932 (set_attr "prefix_rep" "1")
5933 (set_attr "prefix" "maybe_vex")
5934 (set_attr "mode" "V4SF")])
5935
5936 (define_insn "<mask_codefor>avx512f_movshdup512<mask_name>"
5937 [(set (match_operand:V16SF 0 "register_operand" "=v")
5938 (vec_select:V16SF
5939 (vec_concat:V32SF
5940 (match_operand:V16SF 1 "nonimmediate_operand" "vm")
5941 (match_dup 1))
5942 (parallel [(const_int 1) (const_int 1)
5943 (const_int 3) (const_int 3)
5944 (const_int 5) (const_int 5)
5945 (const_int 7) (const_int 7)
5946 (const_int 9) (const_int 9)
5947 (const_int 11) (const_int 11)
5948 (const_int 13) (const_int 13)
5949 (const_int 15) (const_int 15)])))]
5950 "TARGET_AVX512F"
5951 "vmovshdup\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
5952 [(set_attr "type" "sse")
5953 (set_attr "prefix" "evex")
5954 (set_attr "mode" "V16SF")])
5955
5956 (define_insn "avx_movsldup256<mask_name>"
5957 [(set (match_operand:V8SF 0 "register_operand" "=v")
5958 (vec_select:V8SF
5959 (vec_concat:V16SF
5960 (match_operand:V8SF 1 "nonimmediate_operand" "vm")
5961 (match_dup 1))
5962 (parallel [(const_int 0) (const_int 0)
5963 (const_int 2) (const_int 2)
5964 (const_int 4) (const_int 4)
5965 (const_int 6) (const_int 6)])))]
5966 "TARGET_AVX && <mask_avx512vl_condition>"
5967 "vmovsldup\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
5968 [(set_attr "type" "sse")
5969 (set_attr "prefix" "vex")
5970 (set_attr "mode" "V8SF")])
5971
5972 (define_insn "sse3_movsldup<mask_name>"
5973 [(set (match_operand:V4SF 0 "register_operand" "=v")
5974 (vec_select:V4SF
5975 (vec_concat:V8SF
5976 (match_operand:V4SF 1 "nonimmediate_operand" "vm")
5977 (match_dup 1))
5978 (parallel [(const_int 0)
5979 (const_int 0)
5980 (const_int 6)
5981 (const_int 6)])))]
5982 "TARGET_SSE3 && <mask_avx512vl_condition>"
5983 "%vmovsldup\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
5984 [(set_attr "type" "sse")
5985 (set_attr "prefix_rep" "1")
5986 (set_attr "prefix" "maybe_vex")
5987 (set_attr "mode" "V4SF")])
5988
5989 (define_insn "<mask_codefor>avx512f_movsldup512<mask_name>"
5990 [(set (match_operand:V16SF 0 "register_operand" "=v")
5991 (vec_select:V16SF
5992 (vec_concat:V32SF
5993 (match_operand:V16SF 1 "nonimmediate_operand" "vm")
5994 (match_dup 1))
5995 (parallel [(const_int 0) (const_int 0)
5996 (const_int 2) (const_int 2)
5997 (const_int 4) (const_int 4)
5998 (const_int 6) (const_int 6)
5999 (const_int 8) (const_int 8)
6000 (const_int 10) (const_int 10)
6001 (const_int 12) (const_int 12)
6002 (const_int 14) (const_int 14)])))]
6003 "TARGET_AVX512F"
6004 "vmovsldup\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
6005 [(set_attr "type" "sse")
6006 (set_attr "prefix" "evex")
6007 (set_attr "mode" "V16SF")])
6008
6009 (define_expand "avx_shufps256<mask_expand4_name>"
6010 [(match_operand:V8SF 0 "register_operand")
6011 (match_operand:V8SF 1 "register_operand")
6012 (match_operand:V8SF 2 "nonimmediate_operand")
6013 (match_operand:SI 3 "const_int_operand")]
6014 "TARGET_AVX"
6015 {
6016 int mask = INTVAL (operands[3]);
6017 emit_insn (gen_avx_shufps256_1<mask_expand4_name> (operands[0],
6018 operands[1],
6019 operands[2],
6020 GEN_INT ((mask >> 0) & 3),
6021 GEN_INT ((mask >> 2) & 3),
6022 GEN_INT (((mask >> 4) & 3) + 8),
6023 GEN_INT (((mask >> 6) & 3) + 8),
6024 GEN_INT (((mask >> 0) & 3) + 4),
6025 GEN_INT (((mask >> 2) & 3) + 4),
6026 GEN_INT (((mask >> 4) & 3) + 12),
6027 GEN_INT (((mask >> 6) & 3) + 12)
6028 <mask_expand4_args>));
6029 DONE;
6030 })
6031
6032 ;; One bit in mask selects 2 elements.
6033 (define_insn "avx_shufps256_1<mask_name>"
6034 [(set (match_operand:V8SF 0 "register_operand" "=v")
6035 (vec_select:V8SF
6036 (vec_concat:V16SF
6037 (match_operand:V8SF 1 "register_operand" "v")
6038 (match_operand:V8SF 2 "nonimmediate_operand" "vm"))
6039 (parallel [(match_operand 3 "const_0_to_3_operand" )
6040 (match_operand 4 "const_0_to_3_operand" )
6041 (match_operand 5 "const_8_to_11_operand" )
6042 (match_operand 6 "const_8_to_11_operand" )
6043 (match_operand 7 "const_4_to_7_operand" )
6044 (match_operand 8 "const_4_to_7_operand" )
6045 (match_operand 9 "const_12_to_15_operand")
6046 (match_operand 10 "const_12_to_15_operand")])))]
6047 "TARGET_AVX
6048 && <mask_avx512vl_condition>
6049 && (INTVAL (operands[3]) == (INTVAL (operands[7]) - 4)
6050 && INTVAL (operands[4]) == (INTVAL (operands[8]) - 4)
6051 && INTVAL (operands[5]) == (INTVAL (operands[9]) - 4)
6052 && INTVAL (operands[6]) == (INTVAL (operands[10]) - 4))"
6053 {
6054 int mask;
6055 mask = INTVAL (operands[3]);
6056 mask |= INTVAL (operands[4]) << 2;
6057 mask |= (INTVAL (operands[5]) - 8) << 4;
6058 mask |= (INTVAL (operands[6]) - 8) << 6;
6059 operands[3] = GEN_INT (mask);
6060
6061 return "vshufps\t{%3, %2, %1, %0<mask_operand11>|%0<mask_operand11>, %1, %2, %3}";
6062 }
6063 [(set_attr "type" "sseshuf")
6064 (set_attr "length_immediate" "1")
6065 (set_attr "prefix" "<mask_prefix>")
6066 (set_attr "mode" "V8SF")])
6067
6068 (define_expand "sse_shufps<mask_expand4_name>"
6069 [(match_operand:V4SF 0 "register_operand")
6070 (match_operand:V4SF 1 "register_operand")
6071 (match_operand:V4SF 2 "nonimmediate_operand")
6072 (match_operand:SI 3 "const_int_operand")]
6073 "TARGET_SSE"
6074 {
6075 int mask = INTVAL (operands[3]);
6076 emit_insn (gen_sse_shufps_v4sf<mask_expand4_name> (operands[0],
6077 operands[1],
6078 operands[2],
6079 GEN_INT ((mask >> 0) & 3),
6080 GEN_INT ((mask >> 2) & 3),
6081 GEN_INT (((mask >> 4) & 3) + 4),
6082 GEN_INT (((mask >> 6) & 3) + 4)
6083 <mask_expand4_args>));
6084 DONE;
6085 })
6086
6087 (define_insn "sse_shufps_v4sf_mask"
6088 [(set (match_operand:V4SF 0 "register_operand" "=v")
6089 (vec_merge:V4SF
6090 (vec_select:V4SF
6091 (vec_concat:V8SF
6092 (match_operand:V4SF 1 "register_operand" "v")
6093 (match_operand:V4SF 2 "nonimmediate_operand" "vm"))
6094 (parallel [(match_operand 3 "const_0_to_3_operand")
6095 (match_operand 4 "const_0_to_3_operand")
6096 (match_operand 5 "const_4_to_7_operand")
6097 (match_operand 6 "const_4_to_7_operand")]))
6098 (match_operand:V4SF 7 "vector_move_operand" "0C")
6099 (match_operand:QI 8 "register_operand" "Yk")))]
6100 "TARGET_AVX512VL"
6101 {
6102 int mask = 0;
6103 mask |= INTVAL (operands[3]) << 0;
6104 mask |= INTVAL (operands[4]) << 2;
6105 mask |= (INTVAL (operands[5]) - 4) << 4;
6106 mask |= (INTVAL (operands[6]) - 4) << 6;
6107 operands[3] = GEN_INT (mask);
6108
6109 return "vshufps\t{%3, %2, %1, %0%{%8%}%N7|%0%{%8%}%N7, %1, %2, %3}";
6110 }
6111 [(set_attr "type" "sseshuf")
6112 (set_attr "length_immediate" "1")
6113 (set_attr "prefix" "evex")
6114 (set_attr "mode" "V4SF")])
6115
6116 (define_insn "sse_shufps_<mode>"
6117 [(set (match_operand:VI4F_128 0 "register_operand" "=x,x")
6118 (vec_select:VI4F_128
6119 (vec_concat:<ssedoublevecmode>
6120 (match_operand:VI4F_128 1 "register_operand" "0,x")
6121 (match_operand:VI4F_128 2 "nonimmediate_operand" "xm,xm"))
6122 (parallel [(match_operand 3 "const_0_to_3_operand")
6123 (match_operand 4 "const_0_to_3_operand")
6124 (match_operand 5 "const_4_to_7_operand")
6125 (match_operand 6 "const_4_to_7_operand")])))]
6126 "TARGET_SSE"
6127 {
6128 int mask = 0;
6129 mask |= INTVAL (operands[3]) << 0;
6130 mask |= INTVAL (operands[4]) << 2;
6131 mask |= (INTVAL (operands[5]) - 4) << 4;
6132 mask |= (INTVAL (operands[6]) - 4) << 6;
6133 operands[3] = GEN_INT (mask);
6134
6135 switch (which_alternative)
6136 {
6137 case 0:
6138 return "shufps\t{%3, %2, %0|%0, %2, %3}";
6139 case 1:
6140 return "vshufps\t{%3, %2, %1, %0|%0, %1, %2, %3}";
6141 default:
6142 gcc_unreachable ();
6143 }
6144 }
6145 [(set_attr "isa" "noavx,avx")
6146 (set_attr "type" "sseshuf")
6147 (set_attr "length_immediate" "1")
6148 (set_attr "prefix" "orig,vex")
6149 (set_attr "mode" "V4SF")])
6150
6151 (define_insn "sse_storehps"
6152 [(set (match_operand:V2SF 0 "nonimmediate_operand" "=m,x,x")
6153 (vec_select:V2SF
6154 (match_operand:V4SF 1 "nonimmediate_operand" "x,x,o")
6155 (parallel [(const_int 2) (const_int 3)])))]
6156 "TARGET_SSE"
6157 "@
6158 %vmovhps\t{%1, %0|%q0, %1}
6159 %vmovhlps\t{%1, %d0|%d0, %1}
6160 %vmovlps\t{%H1, %d0|%d0, %H1}"
6161 [(set_attr "type" "ssemov")
6162 (set_attr "ssememalign" "64")
6163 (set_attr "prefix" "maybe_vex")
6164 (set_attr "mode" "V2SF,V4SF,V2SF")])
6165
6166 (define_expand "sse_loadhps_exp"
6167 [(set (match_operand:V4SF 0 "nonimmediate_operand")
6168 (vec_concat:V4SF
6169 (vec_select:V2SF
6170 (match_operand:V4SF 1 "nonimmediate_operand")
6171 (parallel [(const_int 0) (const_int 1)]))
6172 (match_operand:V2SF 2 "nonimmediate_operand")))]
6173 "TARGET_SSE"
6174 {
6175 rtx dst = ix86_fixup_binary_operands (UNKNOWN, V4SFmode, operands);
6176
6177 emit_insn (gen_sse_loadhps (dst, operands[1], operands[2]));
6178
6179 /* Fix up the destination if needed. */
6180 if (dst != operands[0])
6181 emit_move_insn (operands[0], dst);
6182
6183 DONE;
6184 })
6185
6186 (define_insn "sse_loadhps"
6187 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,x,x,o")
6188 (vec_concat:V4SF
6189 (vec_select:V2SF
6190 (match_operand:V4SF 1 "nonimmediate_operand" " 0,x,0,x,0")
6191 (parallel [(const_int 0) (const_int 1)]))
6192 (match_operand:V2SF 2 "nonimmediate_operand" " m,m,x,x,x")))]
6193 "TARGET_SSE"
6194 "@
6195 movhps\t{%2, %0|%0, %q2}
6196 vmovhps\t{%2, %1, %0|%0, %1, %q2}
6197 movlhps\t{%2, %0|%0, %2}
6198 vmovlhps\t{%2, %1, %0|%0, %1, %2}
6199 %vmovlps\t{%2, %H0|%H0, %2}"
6200 [(set_attr "isa" "noavx,avx,noavx,avx,*")
6201 (set_attr "type" "ssemov")
6202 (set_attr "ssememalign" "64")
6203 (set_attr "prefix" "orig,vex,orig,vex,maybe_vex")
6204 (set_attr "mode" "V2SF,V2SF,V4SF,V4SF,V2SF")])
6205
6206 (define_insn "sse_storelps"
6207 [(set (match_operand:V2SF 0 "nonimmediate_operand" "=m,x,x")
6208 (vec_select:V2SF
6209 (match_operand:V4SF 1 "nonimmediate_operand" " x,x,m")
6210 (parallel [(const_int 0) (const_int 1)])))]
6211 "TARGET_SSE"
6212 "@
6213 %vmovlps\t{%1, %0|%q0, %1}
6214 %vmovaps\t{%1, %0|%0, %1}
6215 %vmovlps\t{%1, %d0|%d0, %q1}"
6216 [(set_attr "type" "ssemov")
6217 (set_attr "prefix" "maybe_vex")
6218 (set_attr "mode" "V2SF,V4SF,V2SF")])
6219
6220 (define_expand "sse_loadlps_exp"
6221 [(set (match_operand:V4SF 0 "nonimmediate_operand")
6222 (vec_concat:V4SF
6223 (match_operand:V2SF 2 "nonimmediate_operand")
6224 (vec_select:V2SF
6225 (match_operand:V4SF 1 "nonimmediate_operand")
6226 (parallel [(const_int 2) (const_int 3)]))))]
6227 "TARGET_SSE"
6228 {
6229 rtx dst = ix86_fixup_binary_operands (UNKNOWN, V4SFmode, operands);
6230
6231 emit_insn (gen_sse_loadlps (dst, operands[1], operands[2]));
6232
6233 /* Fix up the destination if needed. */
6234 if (dst != operands[0])
6235 emit_move_insn (operands[0], dst);
6236
6237 DONE;
6238 })
6239
6240 (define_insn "sse_loadlps"
6241 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,x,x,m")
6242 (vec_concat:V4SF
6243 (match_operand:V2SF 2 "nonimmediate_operand" " 0,x,m,m,x")
6244 (vec_select:V2SF
6245 (match_operand:V4SF 1 "nonimmediate_operand" " x,x,0,x,0")
6246 (parallel [(const_int 2) (const_int 3)]))))]
6247 "TARGET_SSE"
6248 "@
6249 shufps\t{$0xe4, %1, %0|%0, %1, 0xe4}
6250 vshufps\t{$0xe4, %1, %2, %0|%0, %2, %1, 0xe4}
6251 movlps\t{%2, %0|%0, %q2}
6252 vmovlps\t{%2, %1, %0|%0, %1, %q2}
6253 %vmovlps\t{%2, %0|%q0, %2}"
6254 [(set_attr "isa" "noavx,avx,noavx,avx,*")
6255 (set_attr "type" "sseshuf,sseshuf,ssemov,ssemov,ssemov")
6256 (set_attr "ssememalign" "64")
6257 (set_attr "length_immediate" "1,1,*,*,*")
6258 (set_attr "prefix" "orig,vex,orig,vex,maybe_vex")
6259 (set_attr "mode" "V4SF,V4SF,V2SF,V2SF,V2SF")])
6260
6261 (define_insn "sse_movss"
6262 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
6263 (vec_merge:V4SF
6264 (match_operand:V4SF 2 "register_operand" " x,x")
6265 (match_operand:V4SF 1 "register_operand" " 0,x")
6266 (const_int 1)))]
6267 "TARGET_SSE"
6268 "@
6269 movss\t{%2, %0|%0, %2}
6270 vmovss\t{%2, %1, %0|%0, %1, %2}"
6271 [(set_attr "isa" "noavx,avx")
6272 (set_attr "type" "ssemov")
6273 (set_attr "prefix" "orig,vex")
6274 (set_attr "mode" "SF")])
6275
6276 (define_insn "avx2_vec_dup<mode>"
6277 [(set (match_operand:VF1_128_256 0 "register_operand" "=x")
6278 (vec_duplicate:VF1_128_256
6279 (vec_select:SF
6280 (match_operand:V4SF 1 "register_operand" "x")
6281 (parallel [(const_int 0)]))))]
6282 "TARGET_AVX2"
6283 "vbroadcastss\t{%1, %0|%0, %1}"
6284 [(set_attr "type" "sselog1")
6285 (set_attr "prefix" "vex")
6286 (set_attr "mode" "<MODE>")])
6287
6288 (define_insn "avx2_vec_dupv8sf_1"
6289 [(set (match_operand:V8SF 0 "register_operand" "=x")
6290 (vec_duplicate:V8SF
6291 (vec_select:SF
6292 (match_operand:V8SF 1 "register_operand" "x")
6293 (parallel [(const_int 0)]))))]
6294 "TARGET_AVX2"
6295 "vbroadcastss\t{%x1, %0|%0, %x1}"
6296 [(set_attr "type" "sselog1")
6297 (set_attr "prefix" "vex")
6298 (set_attr "mode" "V8SF")])
6299
6300 (define_insn "vec_dupv4sf"
6301 [(set (match_operand:V4SF 0 "register_operand" "=x,x,x")
6302 (vec_duplicate:V4SF
6303 (match_operand:SF 1 "nonimmediate_operand" "x,m,0")))]
6304 "TARGET_SSE"
6305 "@
6306 vshufps\t{$0, %1, %1, %0|%0, %1, %1, 0}
6307 vbroadcastss\t{%1, %0|%0, %1}
6308 shufps\t{$0, %0, %0|%0, %0, 0}"
6309 [(set_attr "isa" "avx,avx,noavx")
6310 (set_attr "type" "sseshuf1,ssemov,sseshuf1")
6311 (set_attr "length_immediate" "1,0,1")
6312 (set_attr "prefix_extra" "0,1,*")
6313 (set_attr "prefix" "vex,vex,orig")
6314 (set_attr "mode" "V4SF")])
6315
6316 ;; Although insertps takes register source, we prefer
6317 ;; unpcklps with register source since it is shorter.
6318 (define_insn "*vec_concatv2sf_sse4_1"
6319 [(set (match_operand:V2SF 0 "register_operand" "=x,x,x,x,x,*y ,*y")
6320 (vec_concat:V2SF
6321 (match_operand:SF 1 "nonimmediate_operand" " 0,x,0,x,m, 0 , m")
6322 (match_operand:SF 2 "vector_move_operand" " x,x,m,m,C,*ym, C")))]
6323 "TARGET_SSE4_1"
6324 "@
6325 unpcklps\t{%2, %0|%0, %2}
6326 vunpcklps\t{%2, %1, %0|%0, %1, %2}
6327 insertps\t{$0x10, %2, %0|%0, %2, 0x10}
6328 vinsertps\t{$0x10, %2, %1, %0|%0, %1, %2, 0x10}
6329 %vmovss\t{%1, %0|%0, %1}
6330 punpckldq\t{%2, %0|%0, %2}
6331 movd\t{%1, %0|%0, %1}"
6332 [(set_attr "isa" "noavx,avx,noavx,avx,*,*,*")
6333 (set_attr "type" "sselog,sselog,sselog,sselog,ssemov,mmxcvt,mmxmov")
6334 (set_attr "prefix_data16" "*,*,1,*,*,*,*")
6335 (set_attr "prefix_extra" "*,*,1,1,*,*,*")
6336 (set_attr "length_immediate" "*,*,1,1,*,*,*")
6337 (set_attr "prefix" "orig,vex,orig,vex,maybe_vex,orig,orig")
6338 (set_attr "mode" "V4SF,V4SF,V4SF,V4SF,SF,DI,DI")])
6339
6340 ;; ??? In theory we can match memory for the MMX alternative, but allowing
6341 ;; nonimmediate_operand for operand 2 and *not* allowing memory for the SSE
6342 ;; alternatives pretty much forces the MMX alternative to be chosen.
6343 (define_insn "*vec_concatv2sf_sse"
6344 [(set (match_operand:V2SF 0 "register_operand" "=x,x,*y,*y")
6345 (vec_concat:V2SF
6346 (match_operand:SF 1 "nonimmediate_operand" " 0,m, 0, m")
6347 (match_operand:SF 2 "reg_or_0_operand" " x,C,*y, C")))]
6348 "TARGET_SSE"
6349 "@
6350 unpcklps\t{%2, %0|%0, %2}
6351 movss\t{%1, %0|%0, %1}
6352 punpckldq\t{%2, %0|%0, %2}
6353 movd\t{%1, %0|%0, %1}"
6354 [(set_attr "type" "sselog,ssemov,mmxcvt,mmxmov")
6355 (set_attr "mode" "V4SF,SF,DI,DI")])
6356
6357 (define_insn "*vec_concatv4sf"
6358 [(set (match_operand:V4SF 0 "register_operand" "=x,x,x,x")
6359 (vec_concat:V4SF
6360 (match_operand:V2SF 1 "register_operand" " 0,x,0,x")
6361 (match_operand:V2SF 2 "nonimmediate_operand" " x,x,m,m")))]
6362 "TARGET_SSE"
6363 "@
6364 movlhps\t{%2, %0|%0, %2}
6365 vmovlhps\t{%2, %1, %0|%0, %1, %2}
6366 movhps\t{%2, %0|%0, %q2}
6367 vmovhps\t{%2, %1, %0|%0, %1, %q2}"
6368 [(set_attr "isa" "noavx,avx,noavx,avx")
6369 (set_attr "type" "ssemov")
6370 (set_attr "prefix" "orig,vex,orig,vex")
6371 (set_attr "mode" "V4SF,V4SF,V2SF,V2SF")])
6372
6373 (define_expand "vec_init<mode>"
6374 [(match_operand:V_128 0 "register_operand")
6375 (match_operand 1)]
6376 "TARGET_SSE"
6377 {
6378 ix86_expand_vector_init (false, operands[0], operands[1]);
6379 DONE;
6380 })
6381
6382 ;; Avoid combining registers from different units in a single alternative,
6383 ;; see comment above inline_secondary_memory_needed function in i386.c
6384 (define_insn "vec_set<mode>_0"
6385 [(set (match_operand:VI4F_128 0 "nonimmediate_operand"
6386 "=v,v,v ,x,x,v,x ,x ,m ,m ,m")
6387 (vec_merge:VI4F_128
6388 (vec_duplicate:VI4F_128
6389 (match_operand:<ssescalarmode> 2 "general_operand"
6390 " v,m,*r,m,x,v,*rm,*rm,!x,!*re,!*fF"))
6391 (match_operand:VI4F_128 1 "vector_move_operand"
6392 " C,C,C ,C,0,v,0 ,x ,0 ,0 ,0")
6393 (const_int 1)))]
6394 "TARGET_SSE"
6395 "@
6396 %vinsertps\t{$0xe, %d2, %0|%0, %d2, 0xe}
6397 %vmov<ssescalarmodesuffix>\t{%2, %0|%0, %2}
6398 %vmovd\t{%2, %0|%0, %2}
6399 movss\t{%2, %0|%0, %2}
6400 movss\t{%2, %0|%0, %2}
6401 vmovss\t{%2, %1, %0|%0, %1, %2}
6402 pinsrd\t{$0, %2, %0|%0, %2, 0}
6403 vpinsrd\t{$0, %2, %1, %0|%0, %1, %2, 0}
6404 #
6405 #
6406 #"
6407 [(set_attr "isa" "sse4,sse2,sse2,noavx,noavx,avx,sse4_noavx,avx,*,*,*")
6408 (set (attr "type")
6409 (cond [(eq_attr "alternative" "0,6,7")
6410 (const_string "sselog")
6411 (eq_attr "alternative" "9")
6412 (const_string "imov")
6413 (eq_attr "alternative" "10")
6414 (const_string "fmov")
6415 ]
6416 (const_string "ssemov")))
6417 (set_attr "prefix_extra" "*,*,*,*,*,*,1,1,*,*,*")
6418 (set_attr "length_immediate" "*,*,*,*,*,*,1,1,*,*,*")
6419 (set_attr "prefix" "maybe_vex,maybe_vex,maybe_vex,orig,orig,vex,orig,vex,*,*,*")
6420 (set_attr "mode" "SF,<ssescalarmode>,SI,SF,SF,SF,TI,TI,*,*,*")])
6421
6422 ;; A subset is vec_setv4sf.
6423 (define_insn "*vec_setv4sf_sse4_1"
6424 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
6425 (vec_merge:V4SF
6426 (vec_duplicate:V4SF
6427 (match_operand:SF 2 "nonimmediate_operand" "xm,xm"))
6428 (match_operand:V4SF 1 "register_operand" "0,x")
6429 (match_operand:SI 3 "const_int_operand")))]
6430 "TARGET_SSE4_1
6431 && ((unsigned) exact_log2 (INTVAL (operands[3]))
6432 < GET_MODE_NUNITS (V4SFmode))"
6433 {
6434 operands[3] = GEN_INT (exact_log2 (INTVAL (operands[3])) << 4);
6435 switch (which_alternative)
6436 {
6437 case 0:
6438 return "insertps\t{%3, %2, %0|%0, %2, %3}";
6439 case 1:
6440 return "vinsertps\t{%3, %2, %1, %0|%0, %1, %2, %3}";
6441 default:
6442 gcc_unreachable ();
6443 }
6444 }
6445 [(set_attr "isa" "noavx,avx")
6446 (set_attr "type" "sselog")
6447 (set_attr "prefix_data16" "1,*")
6448 (set_attr "prefix_extra" "1")
6449 (set_attr "length_immediate" "1")
6450 (set_attr "prefix" "orig,vex")
6451 (set_attr "mode" "V4SF")])
6452
6453 (define_insn "sse4_1_insertps"
6454 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
6455 (unspec:V4SF [(match_operand:V4SF 2 "nonimmediate_operand" "xm,xm")
6456 (match_operand:V4SF 1 "register_operand" "0,x")
6457 (match_operand:SI 3 "const_0_to_255_operand" "n,n")]
6458 UNSPEC_INSERTPS))]
6459 "TARGET_SSE4_1"
6460 {
6461 if (MEM_P (operands[2]))
6462 {
6463 unsigned count_s = INTVAL (operands[3]) >> 6;
6464 if (count_s)
6465 operands[3] = GEN_INT (INTVAL (operands[3]) & 0x3f);
6466 operands[2] = adjust_address_nv (operands[2], SFmode, count_s * 4);
6467 }
6468 switch (which_alternative)
6469 {
6470 case 0:
6471 return "insertps\t{%3, %2, %0|%0, %2, %3}";
6472 case 1:
6473 return "vinsertps\t{%3, %2, %1, %0|%0, %1, %2, %3}";
6474 default:
6475 gcc_unreachable ();
6476 }
6477 }
6478 [(set_attr "isa" "noavx,avx")
6479 (set_attr "type" "sselog")
6480 (set_attr "prefix_data16" "1,*")
6481 (set_attr "prefix_extra" "1")
6482 (set_attr "length_immediate" "1")
6483 (set_attr "prefix" "orig,vex")
6484 (set_attr "mode" "V4SF")])
6485
6486 (define_split
6487 [(set (match_operand:VI4F_128 0 "memory_operand")
6488 (vec_merge:VI4F_128
6489 (vec_duplicate:VI4F_128
6490 (match_operand:<ssescalarmode> 1 "nonmemory_operand"))
6491 (match_dup 0)
6492 (const_int 1)))]
6493 "TARGET_SSE && reload_completed"
6494 [(set (match_dup 0) (match_dup 1))]
6495 "operands[0] = adjust_address (operands[0], <ssescalarmode>mode, 0);")
6496
6497 (define_expand "vec_set<mode>"
6498 [(match_operand:V 0 "register_operand")
6499 (match_operand:<ssescalarmode> 1 "register_operand")
6500 (match_operand 2 "const_int_operand")]
6501 "TARGET_SSE"
6502 {
6503 ix86_expand_vector_set (false, operands[0], operands[1],
6504 INTVAL (operands[2]));
6505 DONE;
6506 })
6507
6508 (define_insn_and_split "*vec_extractv4sf_0"
6509 [(set (match_operand:SF 0 "nonimmediate_operand" "=x,m,f,r")
6510 (vec_select:SF
6511 (match_operand:V4SF 1 "nonimmediate_operand" "xm,x,m,m")
6512 (parallel [(const_int 0)])))]
6513 "TARGET_SSE && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
6514 "#"
6515 "&& reload_completed"
6516 [(set (match_dup 0) (match_dup 1))]
6517 {
6518 if (REG_P (operands[1]))
6519 operands[1] = gen_rtx_REG (SFmode, REGNO (operands[1]));
6520 else
6521 operands[1] = adjust_address (operands[1], SFmode, 0);
6522 })
6523
6524 (define_insn_and_split "*sse4_1_extractps"
6525 [(set (match_operand:SF 0 "nonimmediate_operand" "=rm,x,x")
6526 (vec_select:SF
6527 (match_operand:V4SF 1 "register_operand" "x,0,x")
6528 (parallel [(match_operand:SI 2 "const_0_to_3_operand" "n,n,n")])))]
6529 "TARGET_SSE4_1"
6530 "@
6531 %vextractps\t{%2, %1, %0|%0, %1, %2}
6532 #
6533 #"
6534 "&& reload_completed && SSE_REG_P (operands[0])"
6535 [(const_int 0)]
6536 {
6537 rtx dest = gen_rtx_REG (V4SFmode, REGNO (operands[0]));
6538 switch (INTVAL (operands[2]))
6539 {
6540 case 1:
6541 case 3:
6542 emit_insn (gen_sse_shufps_v4sf (dest, operands[1], operands[1],
6543 operands[2], operands[2],
6544 GEN_INT (INTVAL (operands[2]) + 4),
6545 GEN_INT (INTVAL (operands[2]) + 4)));
6546 break;
6547 case 2:
6548 emit_insn (gen_vec_interleave_highv4sf (dest, operands[1], operands[1]));
6549 break;
6550 default:
6551 /* 0 should be handled by the *vec_extractv4sf_0 pattern above. */
6552 gcc_unreachable ();
6553 }
6554 DONE;
6555 }
6556 [(set_attr "isa" "*,noavx,avx")
6557 (set_attr "type" "sselog,*,*")
6558 (set_attr "prefix_data16" "1,*,*")
6559 (set_attr "prefix_extra" "1,*,*")
6560 (set_attr "length_immediate" "1,*,*")
6561 (set_attr "prefix" "maybe_vex,*,*")
6562 (set_attr "mode" "V4SF,*,*")])
6563
6564 (define_insn_and_split "*vec_extractv4sf_mem"
6565 [(set (match_operand:SF 0 "register_operand" "=x,*r,f")
6566 (vec_select:SF
6567 (match_operand:V4SF 1 "memory_operand" "o,o,o")
6568 (parallel [(match_operand 2 "const_0_to_3_operand" "n,n,n")])))]
6569 "TARGET_SSE"
6570 "#"
6571 "&& reload_completed"
6572 [(set (match_dup 0) (match_dup 1))]
6573 {
6574 operands[1] = adjust_address (operands[1], SFmode, INTVAL (operands[2]) * 4);
6575 })
6576
6577 (define_mode_attr extract_type
6578 [(V16SF "avx512f") (V16SI "avx512f") (V8DF "avx512dq") (V8DI "avx512dq")])
6579
6580 (define_mode_attr extract_suf
6581 [(V16SF "32x4") (V16SI "32x4") (V8DF "64x2") (V8DI "64x2")])
6582
6583 (define_mode_iterator AVX512_VEC
6584 [(V8DF "TARGET_AVX512DQ") (V8DI "TARGET_AVX512DQ") V16SF V16SI])
6585
6586 (define_expand "<extract_type>_vextract<shuffletype><extract_suf>_mask"
6587 [(match_operand:<ssequartermode> 0 "nonimmediate_operand")
6588 (match_operand:AVX512_VEC 1 "register_operand")
6589 (match_operand:SI 2 "const_0_to_3_operand")
6590 (match_operand:<ssequartermode> 3 "nonimmediate_operand")
6591 (match_operand:QI 4 "register_operand")]
6592 "TARGET_AVX512F"
6593 {
6594 int mask;
6595 mask = INTVAL (operands[2]);
6596
6597 if (MEM_P (operands[0]) && GET_CODE (operands[3]) == CONST_VECTOR)
6598 operands[0] = force_reg (<ssequartermode>mode, operands[0]);
6599
6600 if (<MODE>mode == V16SImode || <MODE>mode == V16SFmode)
6601 emit_insn (gen_avx512f_vextract<shuffletype>32x4_1_mask (operands[0],
6602 operands[1], GEN_INT (mask * 4), GEN_INT (mask * 4 + 1),
6603 GEN_INT (mask * 4 + 2), GEN_INT (mask * 4 + 3), operands[3],
6604 operands[4]));
6605 else
6606 emit_insn (gen_avx512dq_vextract<shuffletype>64x2_1_mask (operands[0],
6607 operands[1], GEN_INT (mask * 2), GEN_INT (mask * 2 + 1), operands[3],
6608 operands[4]));
6609 DONE;
6610 })
6611
6612 (define_insn "avx512dq_vextract<shuffletype>64x2_1_maskm"
6613 [(set (match_operand:<ssequartermode> 0 "memory_operand" "=m")
6614 (vec_merge:<ssequartermode>
6615 (vec_select:<ssequartermode>
6616 (match_operand:V8FI 1 "register_operand" "v")
6617 (parallel [(match_operand 2 "const_0_to_7_operand")
6618 (match_operand 3 "const_0_to_7_operand")]))
6619 (match_operand:<ssequartermode> 4 "memory_operand" "0")
6620 (match_operand:QI 5 "register_operand" "k")))]
6621 "TARGET_AVX512DQ
6622 && (INTVAL (operands[2]) % 2 == 0)
6623 && (INTVAL (operands[2]) == INTVAL (operands[3]) - 1 )"
6624 {
6625 operands[2] = GEN_INT ((INTVAL (operands[2])) >> 1);
6626 return "vextract<shuffletype>64x2\t{%2, %1, %0%{%5%}|%0%{%5%}, %1, %2}";
6627 }
6628 [(set_attr "type" "sselog")
6629 (set_attr "prefix_extra" "1")
6630 (set_attr "length_immediate" "1")
6631 (set_attr "memory" "store")
6632 (set_attr "prefix" "evex")
6633 (set_attr "mode" "<sseinsnmode>")])
6634
6635 (define_insn "avx512f_vextract<shuffletype>32x4_1_maskm"
6636 [(set (match_operand:<ssequartermode> 0 "memory_operand" "=m")
6637 (vec_merge:<ssequartermode>
6638 (vec_select:<ssequartermode>
6639 (match_operand:V16FI 1 "register_operand" "v")
6640 (parallel [(match_operand 2 "const_0_to_15_operand")
6641 (match_operand 3 "const_0_to_15_operand")
6642 (match_operand 4 "const_0_to_15_operand")
6643 (match_operand 5 "const_0_to_15_operand")]))
6644 (match_operand:<ssequartermode> 6 "memory_operand" "0")
6645 (match_operand:QI 7 "register_operand" "Yk")))]
6646 "TARGET_AVX512F
6647 && ((INTVAL (operands[2]) % 4 == 0)
6648 && INTVAL (operands[2]) == (INTVAL (operands[3]) - 1)
6649 && INTVAL (operands[3]) == (INTVAL (operands[4]) - 1)
6650 && INTVAL (operands[4]) == (INTVAL (operands[5]) - 1))"
6651 {
6652 operands[2] = GEN_INT ((INTVAL (operands[2])) >> 2);
6653 return "vextract<shuffletype>32x4\t{%2, %1, %0%{%7%}|%0%{%7%}, %1, %2}";
6654 }
6655 [(set_attr "type" "sselog")
6656 (set_attr "prefix_extra" "1")
6657 (set_attr "length_immediate" "1")
6658 (set_attr "memory" "store")
6659 (set_attr "prefix" "evex")
6660 (set_attr "mode" "<sseinsnmode>")])
6661
6662 (define_insn "<mask_codefor>avx512dq_vextract<shuffletype>64x2_1<mask_name>"
6663 [(set (match_operand:<ssequartermode> 0 "<store_mask_predicate>" "=<store_mask_constraint>")
6664 (vec_select:<ssequartermode>
6665 (match_operand:V8FI 1 "register_operand" "v")
6666 (parallel [(match_operand 2 "const_0_to_7_operand")
6667 (match_operand 3 "const_0_to_7_operand")])))]
6668 "TARGET_AVX512DQ && (INTVAL (operands[2]) == INTVAL (operands[3]) - 1)"
6669 {
6670 operands[2] = GEN_INT ((INTVAL (operands[2])) >> 1);
6671 return "vextract<shuffletype>64x2\t{%2, %1, %0<mask_operand4>|%0<mask_operand4>, %1, %2}";
6672 }
6673 [(set_attr "type" "sselog1")
6674 (set_attr "prefix_extra" "1")
6675 (set_attr "length_immediate" "1")
6676 (set_attr "prefix" "evex")
6677 (set_attr "mode" "<sseinsnmode>")])
6678
6679 (define_insn "<mask_codefor>avx512f_vextract<shuffletype>32x4_1<mask_name>"
6680 [(set (match_operand:<ssequartermode> 0 "<store_mask_predicate>" "=<store_mask_constraint>")
6681 (vec_select:<ssequartermode>
6682 (match_operand:V16FI 1 "register_operand" "v")
6683 (parallel [(match_operand 2 "const_0_to_15_operand")
6684 (match_operand 3 "const_0_to_15_operand")
6685 (match_operand 4 "const_0_to_15_operand")
6686 (match_operand 5 "const_0_to_15_operand")])))]
6687 "TARGET_AVX512F
6688 && (INTVAL (operands[2]) == (INTVAL (operands[3]) - 1)
6689 && INTVAL (operands[3]) == (INTVAL (operands[4]) - 1)
6690 && INTVAL (operands[4]) == (INTVAL (operands[5]) - 1))"
6691 {
6692 operands[2] = GEN_INT ((INTVAL (operands[2])) >> 2);
6693 return "vextract<shuffletype>32x4\t{%2, %1, %0<mask_operand6>|%0<mask_operand6>, %1, %2}";
6694 }
6695 [(set_attr "type" "sselog1")
6696 (set_attr "prefix_extra" "1")
6697 (set_attr "length_immediate" "1")
6698 (set_attr "prefix" "evex")
6699 (set_attr "mode" "<sseinsnmode>")])
6700
6701 (define_mode_attr extract_type_2
6702 [(V16SF "avx512dq") (V16SI "avx512dq") (V8DF "avx512f") (V8DI "avx512f")])
6703
6704 (define_mode_attr extract_suf_2
6705 [(V16SF "32x8") (V16SI "32x8") (V8DF "64x4") (V8DI "64x4")])
6706
6707 (define_mode_iterator AVX512_VEC_2
6708 [(V16SF "TARGET_AVX512DQ") (V16SI "TARGET_AVX512DQ") V8DF V8DI])
6709
6710 (define_expand "<extract_type_2>_vextract<shuffletype><extract_suf_2>_mask"
6711 [(match_operand:<ssehalfvecmode> 0 "nonimmediate_operand")
6712 (match_operand:AVX512_VEC_2 1 "register_operand")
6713 (match_operand:SI 2 "const_0_to_1_operand")
6714 (match_operand:<ssehalfvecmode> 3 "nonimmediate_operand")
6715 (match_operand:QI 4 "register_operand")]
6716 "TARGET_AVX512F"
6717 {
6718 rtx (*insn)(rtx, rtx, rtx, rtx);
6719
6720 if (MEM_P (operands[0]) && GET_CODE (operands[3]) == CONST_VECTOR)
6721 operands[0] = force_reg (<ssequartermode>mode, operands[0]);
6722
6723 switch (INTVAL (operands[2]))
6724 {
6725 case 0:
6726 insn = gen_vec_extract_lo_<mode>_mask;
6727 break;
6728 case 1:
6729 insn = gen_vec_extract_hi_<mode>_mask;
6730 break;
6731 default:
6732 gcc_unreachable ();
6733 }
6734
6735 emit_insn (insn (operands[0], operands[1], operands[3], operands[4]));
6736 DONE;
6737 })
6738
6739 (define_split
6740 [(set (match_operand:<ssehalfvecmode> 0 "nonimmediate_operand")
6741 (vec_select:<ssehalfvecmode>
6742 (match_operand:V8FI 1 "nonimmediate_operand")
6743 (parallel [(const_int 0) (const_int 1)
6744 (const_int 2) (const_int 3)])))]
6745 "TARGET_AVX512F && !(MEM_P (operands[0]) && MEM_P (operands[1]))
6746 && reload_completed"
6747 [(const_int 0)]
6748 {
6749 rtx op1 = operands[1];
6750 if (REG_P (op1))
6751 op1 = gen_rtx_REG (<ssehalfvecmode>mode, REGNO (op1));
6752 else
6753 op1 = gen_lowpart (<ssehalfvecmode>mode, op1);
6754 emit_move_insn (operands[0], op1);
6755 DONE;
6756 })
6757
6758 (define_insn "vec_extract_lo_<mode>_maskm"
6759 [(set (match_operand:<ssehalfvecmode> 0 "memory_operand" "=m")
6760 (vec_merge:<ssehalfvecmode>
6761 (vec_select:<ssehalfvecmode>
6762 (match_operand:V8FI 1 "register_operand" "v")
6763 (parallel [(const_int 0) (const_int 1)
6764 (const_int 2) (const_int 3)]))
6765 (match_operand:<ssehalfvecmode> 2 "memory_operand" "0")
6766 (match_operand:QI 3 "register_operand" "Yk")))]
6767 "TARGET_AVX512F"
6768 "vextract<shuffletype>64x4\t{$0x0, %1, %0%{%3%}|%0%{%3%}, %1, 0x0}"
6769 [(set_attr "type" "sselog1")
6770 (set_attr "prefix_extra" "1")
6771 (set_attr "length_immediate" "1")
6772 (set_attr "prefix" "evex")
6773 (set_attr "mode" "<sseinsnmode>")])
6774
6775 (define_insn "vec_extract_lo_<mode><mask_name>"
6776 [(set (match_operand:<ssehalfvecmode> 0 "<store_mask_predicate>" "=<store_mask_constraint>,v")
6777 (vec_select:<ssehalfvecmode>
6778 (match_operand:V8FI 1 "nonimmediate_operand" "v,m")
6779 (parallel [(const_int 0) (const_int 1)
6780 (const_int 2) (const_int 3)])))]
6781 "TARGET_AVX512F && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
6782 {
6783 if (<mask_applied>)
6784 return "vextract<shuffletype>64x4\t{$0x0, %1, %0<mask_operand2>|%0<mask_operand2>, %1, 0x0}";
6785 else
6786 return "#";
6787 }
6788 [(set_attr "type" "sselog1")
6789 (set_attr "prefix_extra" "1")
6790 (set_attr "length_immediate" "1")
6791 (set_attr "prefix" "evex")
6792 (set_attr "mode" "<sseinsnmode>")])
6793
6794 (define_insn "vec_extract_hi_<mode>_maskm"
6795 [(set (match_operand:<ssehalfvecmode> 0 "memory_operand" "=m")
6796 (vec_merge:<ssehalfvecmode>
6797 (vec_select:<ssehalfvecmode>
6798 (match_operand:V8FI 1 "register_operand" "v")
6799 (parallel [(const_int 4) (const_int 5)
6800 (const_int 6) (const_int 7)]))
6801 (match_operand:<ssehalfvecmode> 2 "memory_operand" "0")
6802 (match_operand:QI 3 "register_operand" "Yk")))]
6803 "TARGET_AVX512F"
6804 "vextract<shuffletype>64x4\t{$0x1, %1, %0%{%3%}|%0%{%3%}, %1, 0x1}"
6805 [(set_attr "type" "sselog")
6806 (set_attr "prefix_extra" "1")
6807 (set_attr "length_immediate" "1")
6808 (set_attr "memory" "store")
6809 (set_attr "prefix" "evex")
6810 (set_attr "mode" "<sseinsnmode>")])
6811
6812 (define_insn "vec_extract_hi_<mode><mask_name>"
6813 [(set (match_operand:<ssehalfvecmode> 0 "<store_mask_predicate>" "=<store_mask_constraint>")
6814 (vec_select:<ssehalfvecmode>
6815 (match_operand:V8FI 1 "register_operand" "v")
6816 (parallel [(const_int 4) (const_int 5)
6817 (const_int 6) (const_int 7)])))]
6818 "TARGET_AVX512F"
6819 "vextract<shuffletype>64x4\t{$0x1, %1, %0<mask_operand2>|%0<mask_operand2>, %1, 0x1}"
6820 [(set_attr "type" "sselog1")
6821 (set_attr "prefix_extra" "1")
6822 (set_attr "length_immediate" "1")
6823 (set_attr "prefix" "evex")
6824 (set_attr "mode" "<sseinsnmode>")])
6825
6826 (define_insn "vec_extract_hi_<mode>_maskm"
6827 [(set (match_operand:<ssehalfvecmode> 0 "memory_operand" "=m")
6828 (vec_merge:<ssehalfvecmode>
6829 (vec_select:<ssehalfvecmode>
6830 (match_operand:V16FI 1 "register_operand" "v")
6831 (parallel [(const_int 8) (const_int 9)
6832 (const_int 10) (const_int 11)
6833 (const_int 12) (const_int 13)
6834 (const_int 14) (const_int 15)]))
6835 (match_operand:<ssehalfvecmode> 2 "memory_operand" "0")
6836 (match_operand:QI 3 "register_operand" "k")))]
6837 "TARGET_AVX512DQ"
6838 "vextract<shuffletype>32x8\t{$0x1, %1, %0%{%3%}|%0%{%3%}, %1, 0x1}"
6839 [(set_attr "type" "sselog1")
6840 (set_attr "prefix_extra" "1")
6841 (set_attr "length_immediate" "1")
6842 (set_attr "prefix" "evex")
6843 (set_attr "mode" "<sseinsnmode>")])
6844
6845 (define_insn "vec_extract_hi_<mode><mask_name>"
6846 [(set (match_operand:<ssehalfvecmode> 0 "<store_mask_predicate>" "=<store_mask_constraint>,vm")
6847 (vec_select:<ssehalfvecmode>
6848 (match_operand:V16FI 1 "register_operand" "v,v")
6849 (parallel [(const_int 8) (const_int 9)
6850 (const_int 10) (const_int 11)
6851 (const_int 12) (const_int 13)
6852 (const_int 14) (const_int 15)])))]
6853 "TARGET_AVX512F && <mask_avx512dq_condition>"
6854 "@
6855 vextract<shuffletype>32x8\t{$0x1, %1, %0<mask_operand2>|%0<mask_operand2>, %1, 0x1}
6856 vextracti64x4\t{$0x1, %1, %0|%0, %1, 0x1}"
6857 [(set_attr "type" "sselog1")
6858 (set_attr "prefix_extra" "1")
6859 (set_attr "isa" "avx512dq,noavx512dq")
6860 (set_attr "length_immediate" "1")
6861 (set_attr "prefix" "evex")
6862 (set_attr "mode" "<sseinsnmode>")])
6863
6864 (define_expand "avx512vl_vextractf128<mode>"
6865 [(match_operand:<ssehalfvecmode> 0 "nonimmediate_operand")
6866 (match_operand:VI48F_256 1 "register_operand")
6867 (match_operand:SI 2 "const_0_to_1_operand")
6868 (match_operand:<ssehalfvecmode> 3 "vector_move_operand")
6869 (match_operand:QI 4 "register_operand")]
6870 "TARGET_AVX512DQ && TARGET_AVX512VL"
6871 {
6872 rtx (*insn)(rtx, rtx, rtx, rtx);
6873
6874 if (MEM_P (operands[0]) && GET_CODE (operands[3]) == CONST_VECTOR)
6875 operands[0] = force_reg (<ssehalfvecmode>mode, operands[0]);
6876
6877 switch (INTVAL (operands[2]))
6878 {
6879 case 0:
6880 insn = gen_vec_extract_lo_<mode>_mask;
6881 break;
6882 case 1:
6883 insn = gen_vec_extract_hi_<mode>_mask;
6884 break;
6885 default:
6886 gcc_unreachable ();
6887 }
6888
6889 emit_insn (insn (operands[0], operands[1], operands[3], operands[4]));
6890 DONE;
6891 })
6892
6893 (define_expand "avx_vextractf128<mode>"
6894 [(match_operand:<ssehalfvecmode> 0 "nonimmediate_operand")
6895 (match_operand:V_256 1 "register_operand")
6896 (match_operand:SI 2 "const_0_to_1_operand")]
6897 "TARGET_AVX"
6898 {
6899 rtx (*insn)(rtx, rtx);
6900
6901 switch (INTVAL (operands[2]))
6902 {
6903 case 0:
6904 insn = gen_vec_extract_lo_<mode>;
6905 break;
6906 case 1:
6907 insn = gen_vec_extract_hi_<mode>;
6908 break;
6909 default:
6910 gcc_unreachable ();
6911 }
6912
6913 emit_insn (insn (operands[0], operands[1]));
6914 DONE;
6915 })
6916
6917 (define_insn "vec_extract_lo_<mode><mask_name>"
6918 [(set (match_operand:<ssehalfvecmode> 0 "nonimmediate_operand" "=v,m")
6919 (vec_select:<ssehalfvecmode>
6920 (match_operand:V16FI 1 "nonimmediate_operand" "vm,v")
6921 (parallel [(const_int 0) (const_int 1)
6922 (const_int 2) (const_int 3)
6923 (const_int 4) (const_int 5)
6924 (const_int 6) (const_int 7)])))]
6925 "TARGET_AVX512F
6926 && <mask_mode512bit_condition>
6927 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
6928 {
6929 if (<mask_applied>)
6930 return "vextract<shuffletype>32x8\t{$0x0, %1, %0<mask_operand2>|%0<mask_operand2>, %1, 0x0}";
6931 else
6932 return "#";
6933 })
6934
6935 (define_split
6936 [(set (match_operand:<ssehalfvecmode> 0 "nonimmediate_operand")
6937 (vec_select:<ssehalfvecmode>
6938 (match_operand:V16FI 1 "nonimmediate_operand")
6939 (parallel [(const_int 0) (const_int 1)
6940 (const_int 2) (const_int 3)
6941 (const_int 4) (const_int 5)
6942 (const_int 6) (const_int 7)])))]
6943 "TARGET_AVX512F && !(MEM_P (operands[0]) && MEM_P (operands[1]))
6944 && reload_completed"
6945 [(const_int 0)]
6946 {
6947 rtx op1 = operands[1];
6948 if (REG_P (op1))
6949 op1 = gen_rtx_REG (<ssehalfvecmode>mode, REGNO (op1));
6950 else
6951 op1 = gen_lowpart (<ssehalfvecmode>mode, op1);
6952 emit_move_insn (operands[0], op1);
6953 DONE;
6954 })
6955
6956 (define_insn "vec_extract_lo_<mode><mask_name>"
6957 [(set (match_operand:<ssehalfvecmode> 0 "<store_mask_predicate>" "=v,m")
6958 (vec_select:<ssehalfvecmode>
6959 (match_operand:VI8F_256 1 "nonimmediate_operand" "vm,v")
6960 (parallel [(const_int 0) (const_int 1)])))]
6961 "TARGET_AVX
6962 && <mask_avx512vl_condition> && <mask_avx512dq_condition>
6963 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
6964 {
6965 if (<mask_applied>)
6966 return "vextract<shuffletype>64x2\t{$0x0, %1, %0%{%3%}|%0%{%3%}, %1, 0x0}";
6967 else
6968 return "#";
6969 }
6970 [(set_attr "type" "sselog")
6971 (set_attr "prefix_extra" "1")
6972 (set_attr "length_immediate" "1")
6973 (set_attr "memory" "none,store")
6974 (set_attr "prefix" "evex")
6975 (set_attr "mode" "XI")])
6976
6977 (define_split
6978 [(set (match_operand:<ssehalfvecmode> 0 "nonimmediate_operand")
6979 (vec_select:<ssehalfvecmode>
6980 (match_operand:VI8F_256 1 "nonimmediate_operand")
6981 (parallel [(const_int 0) (const_int 1)])))]
6982 "TARGET_AVX && !(MEM_P (operands[0]) && MEM_P (operands[1]))
6983 && reload_completed"
6984 [(const_int 0)]
6985 {
6986 rtx op1 = operands[1];
6987 if (REG_P (op1))
6988 op1 = gen_rtx_REG (<ssehalfvecmode>mode, REGNO (op1));
6989 else
6990 op1 = gen_lowpart (<ssehalfvecmode>mode, op1);
6991 emit_move_insn (operands[0], op1);
6992 DONE;
6993 })
6994
6995 (define_insn "vec_extract_hi_<mode><mask_name>"
6996 [(set (match_operand:<ssehalfvecmode> 0 "<store_mask_predicate>" "=v,<store_mask_constraint>")
6997 (vec_select:<ssehalfvecmode>
6998 (match_operand:VI8F_256 1 "register_operand" "v,v")
6999 (parallel [(const_int 2) (const_int 3)])))]
7000 "TARGET_AVX"
7001 {
7002 if (TARGET_AVX512DQ && TARGET_AVX512VL)
7003 return "vextract<shuffletype>64x2\t{$0x1, %1, %0<mask_operand2>|%0<mask_operand2>, %1, 0x1}";
7004 else
7005 return "vextract<i128>\t{$0x1, %1, %0|%0, %1, 0x1}";
7006 }
7007 [(set_attr "type" "sselog")
7008 (set_attr "prefix_extra" "1")
7009 (set_attr "length_immediate" "1")
7010 (set_attr "memory" "none,store")
7011 (set_attr "prefix" "vex")
7012 (set_attr "mode" "<sseinsnmode>")])
7013
7014 (define_split
7015 [(set (match_operand:<ssehalfvecmode> 0 "nonimmediate_operand")
7016 (vec_select:<ssehalfvecmode>
7017 (match_operand:VI4F_256 1 "nonimmediate_operand")
7018 (parallel [(const_int 0) (const_int 1)
7019 (const_int 2) (const_int 3)])))]
7020 "TARGET_AVX && !(MEM_P (operands[0]) && MEM_P (operands[1])) && reload_completed"
7021 [(const_int 0)]
7022 {
7023 rtx op1 = operands[1];
7024 if (REG_P (op1))
7025 op1 = gen_rtx_REG (<ssehalfvecmode>mode, REGNO (op1));
7026 else
7027 op1 = gen_lowpart (<ssehalfvecmode>mode, op1);
7028 emit_move_insn (operands[0], op1);
7029 DONE;
7030 })
7031
7032
7033 (define_insn "vec_extract_lo_<mode><mask_name>"
7034 [(set (match_operand:<ssehalfvecmode> 0 "<store_mask_predicate>" "=<store_mask_constraint>")
7035 (vec_select:<ssehalfvecmode>
7036 (match_operand:VI4F_256 1 "nonimmediate_operand" "v")
7037 (parallel [(const_int 0) (const_int 1)
7038 (const_int 2) (const_int 3)])))]
7039 "TARGET_AVX && <mask_avx512vl_condition> && <mask_avx512dq_condition>"
7040 {
7041 if (<mask_applied>)
7042 return "vextract<shuffletype>32x4\t{$0x0, %1, %0<mask_operand2>|%0<mask_operand2>, %1, 0x0}";
7043 else
7044 return "#";
7045 }
7046 [(set_attr "type" "sselog1")
7047 (set_attr "prefix_extra" "1")
7048 (set_attr "length_immediate" "1")
7049 (set_attr "prefix" "evex")
7050 (set_attr "mode" "<sseinsnmode>")])
7051
7052 (define_insn "vec_extract_lo_<mode>_maskm"
7053 [(set (match_operand:<ssehalfvecmode> 0 "memory_operand" "=m")
7054 (vec_merge:<ssehalfvecmode>
7055 (vec_select:<ssehalfvecmode>
7056 (match_operand:VI4F_256 1 "register_operand" "v")
7057 (parallel [(const_int 0) (const_int 1)
7058 (const_int 2) (const_int 3)]))
7059 (match_operand:<ssehalfvecmode> 2 "memory_operand" "0")
7060 (match_operand:QI 3 "register_operand" "k")))]
7061 "TARGET_AVX512VL && TARGET_AVX512F"
7062 "vextract<shuffletype>32x4\t{$0x0, %1, %0%{3%}|%0%{%3%}, %1, 0x0}"
7063 [(set_attr "type" "sselog1")
7064 (set_attr "prefix_extra" "1")
7065 (set_attr "length_immediate" "1")
7066 (set_attr "prefix" "evex")
7067 (set_attr "mode" "<sseinsnmode>")])
7068
7069 (define_insn "vec_extract_hi_<mode>_maskm"
7070 [(set (match_operand:<ssehalfvecmode> 0 "memory_operand" "=m")
7071 (vec_merge:<ssehalfvecmode>
7072 (vec_select:<ssehalfvecmode>
7073 (match_operand:VI4F_256 1 "register_operand" "v")
7074 (parallel [(const_int 4) (const_int 5)
7075 (const_int 6) (const_int 7)]))
7076 (match_operand:<ssehalfvecmode> 2 "memory_operand" "0")
7077 (match_operand:<ssehalfvecmode> 3 "register_operand" "k")))]
7078 "TARGET_AVX512F && TARGET_AVX512VL"
7079 {
7080 return "vextract<shuffletype>32x4\t{$0x1, %1, %0%{%3%}|%0%{%3%}, %1, 0x1}";
7081 }
7082 [(set_attr "type" "sselog1")
7083 (set_attr "prefix_extra" "1")
7084 (set_attr "length_immediate" "1")
7085 (set_attr "prefix" "evex")
7086 (set_attr "mode" "<sseinsnmode>")])
7087
7088 (define_insn "vec_extract_hi_<mode><mask_name>"
7089 [(set (match_operand:<ssehalfvecmode> 0 "<store_mask_predicate>" "=<store_mask_constraint>")
7090 (vec_select:<ssehalfvecmode>
7091 (match_operand:VI4F_256 1 "register_operand" "v")
7092 (parallel [(const_int 4) (const_int 5)
7093 (const_int 6) (const_int 7)])))]
7094 "TARGET_AVX && <mask_avx512vl_condition>"
7095 {
7096 if (TARGET_AVX512VL)
7097 return "vextract<shuffletype>32x4\t{$0x1, %1, %0<mask_operand2>|%0<mask_operand2>, %1, 0x1}";
7098 else
7099 return "vextract<i128>\t{$0x1, %1, %0|%0, %1, 0x1}";
7100 }
7101 [(set_attr "type" "sselog1")
7102 (set_attr "prefix_extra" "1")
7103 (set_attr "length_immediate" "1")
7104 (set (attr "prefix")
7105 (if_then_else
7106 (match_test "TARGET_AVX512VL")
7107 (const_string "evex")
7108 (const_string "vex")))
7109 (set_attr "mode" "<sseinsnmode>")])
7110
7111 (define_insn_and_split "vec_extract_lo_v32hi"
7112 [(set (match_operand:V16HI 0 "nonimmediate_operand" "=v,m")
7113 (vec_select:V16HI
7114 (match_operand:V32HI 1 "nonimmediate_operand" "vm,v")
7115 (parallel [(const_int 0) (const_int 1)
7116 (const_int 2) (const_int 3)
7117 (const_int 4) (const_int 5)
7118 (const_int 6) (const_int 7)
7119 (const_int 8) (const_int 9)
7120 (const_int 10) (const_int 11)
7121 (const_int 12) (const_int 13)
7122 (const_int 14) (const_int 15)])))]
7123 "TARGET_AVX512F && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
7124 "#"
7125 "&& reload_completed"
7126 [(set (match_dup 0) (match_dup 1))]
7127 {
7128 if (REG_P (operands[1]))
7129 operands[1] = gen_rtx_REG (V16HImode, REGNO (operands[1]));
7130 else
7131 operands[1] = adjust_address (operands[1], V16HImode, 0);
7132 })
7133
7134 (define_insn "vec_extract_hi_v32hi"
7135 [(set (match_operand:V16HI 0 "nonimmediate_operand" "=v,m")
7136 (vec_select:V16HI
7137 (match_operand:V32HI 1 "nonimmediate_operand" "v,v")
7138 (parallel [(const_int 16) (const_int 17)
7139 (const_int 18) (const_int 19)
7140 (const_int 20) (const_int 21)
7141 (const_int 22) (const_int 23)
7142 (const_int 24) (const_int 25)
7143 (const_int 26) (const_int 27)
7144 (const_int 28) (const_int 29)
7145 (const_int 30) (const_int 31)])))]
7146 "TARGET_AVX512F"
7147 "vextracti64x4\t{$0x1, %1, %0|%0, %1, 0x1}"
7148 [(set_attr "type" "sselog")
7149 (set_attr "prefix_extra" "1")
7150 (set_attr "length_immediate" "1")
7151 (set_attr "memory" "none,store")
7152 (set_attr "prefix" "evex")
7153 (set_attr "mode" "XI")])
7154
7155 (define_insn_and_split "vec_extract_lo_v16hi"
7156 [(set (match_operand:V8HI 0 "nonimmediate_operand" "=x,m")
7157 (vec_select:V8HI
7158 (match_operand:V16HI 1 "nonimmediate_operand" "xm,x")
7159 (parallel [(const_int 0) (const_int 1)
7160 (const_int 2) (const_int 3)
7161 (const_int 4) (const_int 5)
7162 (const_int 6) (const_int 7)])))]
7163 "TARGET_AVX && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
7164 "#"
7165 "&& reload_completed"
7166 [(set (match_dup 0) (match_dup 1))]
7167 {
7168 if (REG_P (operands[1]))
7169 operands[1] = gen_rtx_REG (V8HImode, REGNO (operands[1]));
7170 else
7171 operands[1] = adjust_address (operands[1], V8HImode, 0);
7172 })
7173
7174 (define_insn "vec_extract_hi_v16hi"
7175 [(set (match_operand:V8HI 0 "nonimmediate_operand" "=x,m")
7176 (vec_select:V8HI
7177 (match_operand:V16HI 1 "register_operand" "x,x")
7178 (parallel [(const_int 8) (const_int 9)
7179 (const_int 10) (const_int 11)
7180 (const_int 12) (const_int 13)
7181 (const_int 14) (const_int 15)])))]
7182 "TARGET_AVX"
7183 "vextract%~128\t{$0x1, %1, %0|%0, %1, 0x1}"
7184 [(set_attr "type" "sselog")
7185 (set_attr "prefix_extra" "1")
7186 (set_attr "length_immediate" "1")
7187 (set_attr "memory" "none,store")
7188 (set_attr "prefix" "vex")
7189 (set_attr "mode" "OI")])
7190
7191 (define_insn_and_split "vec_extract_lo_v64qi"
7192 [(set (match_operand:V32QI 0 "nonimmediate_operand" "=v,m")
7193 (vec_select:V32QI
7194 (match_operand:V64QI 1 "nonimmediate_operand" "vm,v")
7195 (parallel [(const_int 0) (const_int 1)
7196 (const_int 2) (const_int 3)
7197 (const_int 4) (const_int 5)
7198 (const_int 6) (const_int 7)
7199 (const_int 8) (const_int 9)
7200 (const_int 10) (const_int 11)
7201 (const_int 12) (const_int 13)
7202 (const_int 14) (const_int 15)
7203 (const_int 16) (const_int 17)
7204 (const_int 18) (const_int 19)
7205 (const_int 20) (const_int 21)
7206 (const_int 22) (const_int 23)
7207 (const_int 24) (const_int 25)
7208 (const_int 26) (const_int 27)
7209 (const_int 28) (const_int 29)
7210 (const_int 30) (const_int 31)])))]
7211 "TARGET_AVX512F && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
7212 "#"
7213 "&& reload_completed"
7214 [(set (match_dup 0) (match_dup 1))]
7215 {
7216 if (REG_P (operands[1]))
7217 operands[1] = gen_rtx_REG (V32QImode, REGNO (operands[1]));
7218 else
7219 operands[1] = adjust_address (operands[1], V32QImode, 0);
7220 })
7221
7222 (define_insn "vec_extract_hi_v64qi"
7223 [(set (match_operand:V32QI 0 "nonimmediate_operand" "=v,m")
7224 (vec_select:V32QI
7225 (match_operand:V64QI 1 "nonimmediate_operand" "v,v")
7226 (parallel [(const_int 32) (const_int 33)
7227 (const_int 34) (const_int 35)
7228 (const_int 36) (const_int 37)
7229 (const_int 38) (const_int 39)
7230 (const_int 40) (const_int 41)
7231 (const_int 42) (const_int 43)
7232 (const_int 44) (const_int 45)
7233 (const_int 46) (const_int 47)
7234 (const_int 48) (const_int 49)
7235 (const_int 50) (const_int 51)
7236 (const_int 52) (const_int 53)
7237 (const_int 54) (const_int 55)
7238 (const_int 56) (const_int 57)
7239 (const_int 58) (const_int 59)
7240 (const_int 60) (const_int 61)
7241 (const_int 62) (const_int 63)])))]
7242 "TARGET_AVX512F"
7243 "vextracti64x4\t{$0x1, %1, %0|%0, %1, 0x1}"
7244 [(set_attr "type" "sselog")
7245 (set_attr "prefix_extra" "1")
7246 (set_attr "length_immediate" "1")
7247 (set_attr "memory" "none,store")
7248 (set_attr "prefix" "evex")
7249 (set_attr "mode" "XI")])
7250
7251 (define_insn_and_split "vec_extract_lo_v32qi"
7252 [(set (match_operand:V16QI 0 "nonimmediate_operand" "=x,m")
7253 (vec_select:V16QI
7254 (match_operand:V32QI 1 "nonimmediate_operand" "xm,x")
7255 (parallel [(const_int 0) (const_int 1)
7256 (const_int 2) (const_int 3)
7257 (const_int 4) (const_int 5)
7258 (const_int 6) (const_int 7)
7259 (const_int 8) (const_int 9)
7260 (const_int 10) (const_int 11)
7261 (const_int 12) (const_int 13)
7262 (const_int 14) (const_int 15)])))]
7263 "TARGET_AVX && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
7264 "#"
7265 "&& reload_completed"
7266 [(set (match_dup 0) (match_dup 1))]
7267 {
7268 if (REG_P (operands[1]))
7269 operands[1] = gen_rtx_REG (V16QImode, REGNO (operands[1]));
7270 else
7271 operands[1] = adjust_address (operands[1], V16QImode, 0);
7272 })
7273
7274 (define_insn "vec_extract_hi_v32qi"
7275 [(set (match_operand:V16QI 0 "nonimmediate_operand" "=x,m")
7276 (vec_select:V16QI
7277 (match_operand:V32QI 1 "register_operand" "x,x")
7278 (parallel [(const_int 16) (const_int 17)
7279 (const_int 18) (const_int 19)
7280 (const_int 20) (const_int 21)
7281 (const_int 22) (const_int 23)
7282 (const_int 24) (const_int 25)
7283 (const_int 26) (const_int 27)
7284 (const_int 28) (const_int 29)
7285 (const_int 30) (const_int 31)])))]
7286 "TARGET_AVX"
7287 "vextract%~128\t{$0x1, %1, %0|%0, %1, 0x1}"
7288 [(set_attr "type" "sselog")
7289 (set_attr "prefix_extra" "1")
7290 (set_attr "length_immediate" "1")
7291 (set_attr "memory" "none,store")
7292 (set_attr "prefix" "vex")
7293 (set_attr "mode" "OI")])
7294
7295 ;; Modes handled by vec_extract patterns.
7296 (define_mode_iterator VEC_EXTRACT_MODE
7297 [(V64QI "TARGET_AVX512BW") (V32QI "TARGET_AVX") V16QI
7298 (V32HI "TARGET_AVX512BW") (V16HI "TARGET_AVX") V8HI
7299 (V16SI "TARGET_AVX512F") (V8SI "TARGET_AVX") V4SI
7300 (V8DI "TARGET_AVX512F") (V4DI "TARGET_AVX") V2DI
7301 (V16SF "TARGET_AVX512F") (V8SF "TARGET_AVX") V4SF
7302 (V8DF "TARGET_AVX512F") (V4DF "TARGET_AVX") V2DF])
7303
7304 (define_expand "vec_extract<mode>"
7305 [(match_operand:<ssescalarmode> 0 "register_operand")
7306 (match_operand:VEC_EXTRACT_MODE 1 "register_operand")
7307 (match_operand 2 "const_int_operand")]
7308 "TARGET_SSE"
7309 {
7310 ix86_expand_vector_extract (false, operands[0], operands[1],
7311 INTVAL (operands[2]));
7312 DONE;
7313 })
7314
7315 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
7316 ;;
7317 ;; Parallel double-precision floating point element swizzling
7318 ;;
7319 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
7320
7321 (define_insn "<mask_codefor>avx512f_unpckhpd512<mask_name>"
7322 [(set (match_operand:V8DF 0 "register_operand" "=v")
7323 (vec_select:V8DF
7324 (vec_concat:V16DF
7325 (match_operand:V8DF 1 "nonimmediate_operand" "v")
7326 (match_operand:V8DF 2 "nonimmediate_operand" "vm"))
7327 (parallel [(const_int 1) (const_int 9)
7328 (const_int 3) (const_int 11)
7329 (const_int 5) (const_int 13)
7330 (const_int 7) (const_int 15)])))]
7331 "TARGET_AVX512F"
7332 "vunpckhpd\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
7333 [(set_attr "type" "sselog")
7334 (set_attr "prefix" "evex")
7335 (set_attr "mode" "V8DF")])
7336
7337 ;; Recall that the 256-bit unpck insns only shuffle within their lanes.
7338 (define_insn "avx_unpckhpd256<mask_name>"
7339 [(set (match_operand:V4DF 0 "register_operand" "=v")
7340 (vec_select:V4DF
7341 (vec_concat:V8DF
7342 (match_operand:V4DF 1 "register_operand" "v")
7343 (match_operand:V4DF 2 "nonimmediate_operand" "vm"))
7344 (parallel [(const_int 1) (const_int 5)
7345 (const_int 3) (const_int 7)])))]
7346 "TARGET_AVX && <mask_avx512vl_condition>"
7347 "vunpckhpd\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
7348 [(set_attr "type" "sselog")
7349 (set_attr "prefix" "vex")
7350 (set_attr "mode" "V4DF")])
7351
7352 (define_expand "vec_interleave_highv4df"
7353 [(set (match_dup 3)
7354 (vec_select:V4DF
7355 (vec_concat:V8DF
7356 (match_operand:V4DF 1 "register_operand" "x")
7357 (match_operand:V4DF 2 "nonimmediate_operand" "xm"))
7358 (parallel [(const_int 0) (const_int 4)
7359 (const_int 2) (const_int 6)])))
7360 (set (match_dup 4)
7361 (vec_select:V4DF
7362 (vec_concat:V8DF
7363 (match_dup 1)
7364 (match_dup 2))
7365 (parallel [(const_int 1) (const_int 5)
7366 (const_int 3) (const_int 7)])))
7367 (set (match_operand:V4DF 0 "register_operand")
7368 (vec_select:V4DF
7369 (vec_concat:V8DF
7370 (match_dup 3)
7371 (match_dup 4))
7372 (parallel [(const_int 2) (const_int 3)
7373 (const_int 6) (const_int 7)])))]
7374 "TARGET_AVX"
7375 {
7376 operands[3] = gen_reg_rtx (V4DFmode);
7377 operands[4] = gen_reg_rtx (V4DFmode);
7378 })
7379
7380
7381 (define_insn "avx512vl_unpckhpd128_mask"
7382 [(set (match_operand:V2DF 0 "register_operand" "=v")
7383 (vec_merge:V2DF
7384 (vec_select:V2DF
7385 (vec_concat:V4DF
7386 (match_operand:V2DF 1 "register_operand" "v")
7387 (match_operand:V2DF 2 "nonimmediate_operand" "vm"))
7388 (parallel [(const_int 1) (const_int 3)]))
7389 (match_operand:V2DF 3 "vector_move_operand" "0C")
7390 (match_operand:QI 4 "register_operand" "Yk")))]
7391 "TARGET_AVX512VL"
7392 "vunpckhpd\t{%2, %1, %0%{%4%}%N3|%0%{%4%}%N3, %1, %2}"
7393 [(set_attr "type" "sselog")
7394 (set_attr "prefix" "evex")
7395 (set_attr "mode" "V2DF")])
7396
7397 (define_expand "vec_interleave_highv2df"
7398 [(set (match_operand:V2DF 0 "register_operand")
7399 (vec_select:V2DF
7400 (vec_concat:V4DF
7401 (match_operand:V2DF 1 "nonimmediate_operand")
7402 (match_operand:V2DF 2 "nonimmediate_operand"))
7403 (parallel [(const_int 1)
7404 (const_int 3)])))]
7405 "TARGET_SSE2"
7406 {
7407 if (!ix86_vec_interleave_v2df_operator_ok (operands, 1))
7408 operands[2] = force_reg (V2DFmode, operands[2]);
7409 })
7410
7411 (define_insn "*vec_interleave_highv2df"
7412 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,x,x,x,m")
7413 (vec_select:V2DF
7414 (vec_concat:V4DF
7415 (match_operand:V2DF 1 "nonimmediate_operand" " 0,x,o,o,o,x")
7416 (match_operand:V2DF 2 "nonimmediate_operand" " x,x,1,0,x,0"))
7417 (parallel [(const_int 1)
7418 (const_int 3)])))]
7419 "TARGET_SSE2 && ix86_vec_interleave_v2df_operator_ok (operands, 1)"
7420 "@
7421 unpckhpd\t{%2, %0|%0, %2}
7422 vunpckhpd\t{%2, %1, %0|%0, %1, %2}
7423 %vmovddup\t{%H1, %0|%0, %H1}
7424 movlpd\t{%H1, %0|%0, %H1}
7425 vmovlpd\t{%H1, %2, %0|%0, %2, %H1}
7426 %vmovhpd\t{%1, %0|%q0, %1}"
7427 [(set_attr "isa" "noavx,avx,sse3,noavx,avx,*")
7428 (set_attr "type" "sselog,sselog,sselog,ssemov,ssemov,ssemov")
7429 (set_attr "ssememalign" "64")
7430 (set_attr "prefix_data16" "*,*,*,1,*,1")
7431 (set_attr "prefix" "orig,vex,maybe_vex,orig,vex,maybe_vex")
7432 (set_attr "mode" "V2DF,V2DF,DF,V1DF,V1DF,V1DF")])
7433
7434 (define_expand "avx512f_movddup512<mask_name>"
7435 [(set (match_operand:V8DF 0 "register_operand")
7436 (vec_select:V8DF
7437 (vec_concat:V16DF
7438 (match_operand:V8DF 1 "nonimmediate_operand")
7439 (match_dup 1))
7440 (parallel [(const_int 0) (const_int 8)
7441 (const_int 2) (const_int 10)
7442 (const_int 4) (const_int 12)
7443 (const_int 6) (const_int 14)])))]
7444 "TARGET_AVX512F")
7445
7446 (define_expand "avx512f_unpcklpd512<mask_name>"
7447 [(set (match_operand:V8DF 0 "register_operand")
7448 (vec_select:V8DF
7449 (vec_concat:V16DF
7450 (match_operand:V8DF 1 "register_operand")
7451 (match_operand:V8DF 2 "nonimmediate_operand"))
7452 (parallel [(const_int 0) (const_int 8)
7453 (const_int 2) (const_int 10)
7454 (const_int 4) (const_int 12)
7455 (const_int 6) (const_int 14)])))]
7456 "TARGET_AVX512F")
7457
7458 (define_insn "*avx512f_unpcklpd512<mask_name>"
7459 [(set (match_operand:V8DF 0 "register_operand" "=v,v")
7460 (vec_select:V8DF
7461 (vec_concat:V16DF
7462 (match_operand:V8DF 1 "nonimmediate_operand" "vm, v")
7463 (match_operand:V8DF 2 "nonimmediate_operand" "1 ,vm"))
7464 (parallel [(const_int 0) (const_int 8)
7465 (const_int 2) (const_int 10)
7466 (const_int 4) (const_int 12)
7467 (const_int 6) (const_int 14)])))]
7468 "TARGET_AVX512F"
7469 "@
7470 vmovddup\t{%1, %0<mask_operand3>|%0<mask_operand3>, %1}
7471 vunpcklpd\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
7472 [(set_attr "type" "sselog")
7473 (set_attr "prefix" "evex")
7474 (set_attr "mode" "V8DF")])
7475
7476 ;; Recall that the 256-bit unpck insns only shuffle within their lanes.
7477 (define_expand "avx_movddup256<mask_name>"
7478 [(set (match_operand:V4DF 0 "register_operand")
7479 (vec_select:V4DF
7480 (vec_concat:V8DF
7481 (match_operand:V4DF 1 "nonimmediate_operand")
7482 (match_dup 1))
7483 (parallel [(const_int 0) (const_int 4)
7484 (const_int 2) (const_int 6)])))]
7485 "TARGET_AVX && <mask_avx512vl_condition>")
7486
7487 (define_expand "avx_unpcklpd256<mask_name>"
7488 [(set (match_operand:V4DF 0 "register_operand")
7489 (vec_select:V4DF
7490 (vec_concat:V8DF
7491 (match_operand:V4DF 1 "register_operand")
7492 (match_operand:V4DF 2 "nonimmediate_operand"))
7493 (parallel [(const_int 0) (const_int 4)
7494 (const_int 2) (const_int 6)])))]
7495 "TARGET_AVX && <mask_avx512vl_condition>")
7496
7497 (define_insn "*avx_unpcklpd256<mask_name>"
7498 [(set (match_operand:V4DF 0 "register_operand" "=v,v")
7499 (vec_select:V4DF
7500 (vec_concat:V8DF
7501 (match_operand:V4DF 1 "nonimmediate_operand" " v,m")
7502 (match_operand:V4DF 2 "nonimmediate_operand" "vm,1"))
7503 (parallel [(const_int 0) (const_int 4)
7504 (const_int 2) (const_int 6)])))]
7505 "TARGET_AVX && <mask_avx512vl_condition>"
7506 "@
7507 vunpcklpd\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}
7508 vmovddup\t{%1, %0<mask_operand3>|%0<mask_operand3>, %1}"
7509 [(set_attr "type" "sselog")
7510 (set_attr "prefix" "vex")
7511 (set_attr "mode" "V4DF")])
7512
7513 (define_expand "vec_interleave_lowv4df"
7514 [(set (match_dup 3)
7515 (vec_select:V4DF
7516 (vec_concat:V8DF
7517 (match_operand:V4DF 1 "register_operand" "x")
7518 (match_operand:V4DF 2 "nonimmediate_operand" "xm"))
7519 (parallel [(const_int 0) (const_int 4)
7520 (const_int 2) (const_int 6)])))
7521 (set (match_dup 4)
7522 (vec_select:V4DF
7523 (vec_concat:V8DF
7524 (match_dup 1)
7525 (match_dup 2))
7526 (parallel [(const_int 1) (const_int 5)
7527 (const_int 3) (const_int 7)])))
7528 (set (match_operand:V4DF 0 "register_operand")
7529 (vec_select:V4DF
7530 (vec_concat:V8DF
7531 (match_dup 3)
7532 (match_dup 4))
7533 (parallel [(const_int 0) (const_int 1)
7534 (const_int 4) (const_int 5)])))]
7535 "TARGET_AVX"
7536 {
7537 operands[3] = gen_reg_rtx (V4DFmode);
7538 operands[4] = gen_reg_rtx (V4DFmode);
7539 })
7540
7541 (define_insn "avx512vl_unpcklpd128_mask"
7542 [(set (match_operand:V2DF 0 "register_operand" "=v")
7543 (vec_merge:V2DF
7544 (vec_select:V2DF
7545 (vec_concat:V4DF
7546 (match_operand:V2DF 1 "register_operand" "v")
7547 (match_operand:V2DF 2 "nonimmediate_operand" "vm"))
7548 (parallel [(const_int 0) (const_int 2)]))
7549 (match_operand:V2DF 3 "vector_move_operand" "0C")
7550 (match_operand:QI 4 "register_operand" "Yk")))]
7551 "TARGET_AVX512VL"
7552 "vunpcklpd\t{%2, %1, %0%{%4%}%N3|%0%{%4%}%N3, %1, %2}"
7553 [(set_attr "type" "sselog")
7554 (set_attr "prefix" "evex")
7555 (set_attr "mode" "V2DF")])
7556
7557 (define_expand "vec_interleave_lowv2df"
7558 [(set (match_operand:V2DF 0 "register_operand")
7559 (vec_select:V2DF
7560 (vec_concat:V4DF
7561 (match_operand:V2DF 1 "nonimmediate_operand")
7562 (match_operand:V2DF 2 "nonimmediate_operand"))
7563 (parallel [(const_int 0)
7564 (const_int 2)])))]
7565 "TARGET_SSE2"
7566 {
7567 if (!ix86_vec_interleave_v2df_operator_ok (operands, 0))
7568 operands[1] = force_reg (V2DFmode, operands[1]);
7569 })
7570
7571 (define_insn "*vec_interleave_lowv2df"
7572 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,x,x,x,o")
7573 (vec_select:V2DF
7574 (vec_concat:V4DF
7575 (match_operand:V2DF 1 "nonimmediate_operand" " 0,x,m,0,x,0")
7576 (match_operand:V2DF 2 "nonimmediate_operand" " x,x,1,m,m,x"))
7577 (parallel [(const_int 0)
7578 (const_int 2)])))]
7579 "TARGET_SSE2 && ix86_vec_interleave_v2df_operator_ok (operands, 0)"
7580 "@
7581 unpcklpd\t{%2, %0|%0, %2}
7582 vunpcklpd\t{%2, %1, %0|%0, %1, %2}
7583 %vmovddup\t{%1, %0|%0, %q1}
7584 movhpd\t{%2, %0|%0, %q2}
7585 vmovhpd\t{%2, %1, %0|%0, %1, %q2}
7586 %vmovlpd\t{%2, %H0|%H0, %2}"
7587 [(set_attr "isa" "noavx,avx,sse3,noavx,avx,*")
7588 (set_attr "type" "sselog,sselog,sselog,ssemov,ssemov,ssemov")
7589 (set_attr "ssememalign" "64")
7590 (set_attr "prefix_data16" "*,*,*,1,*,1")
7591 (set_attr "prefix" "orig,vex,maybe_vex,orig,vex,maybe_vex")
7592 (set_attr "mode" "V2DF,V2DF,DF,V1DF,V1DF,V1DF")])
7593
7594 (define_split
7595 [(set (match_operand:V2DF 0 "memory_operand")
7596 (vec_select:V2DF
7597 (vec_concat:V4DF
7598 (match_operand:V2DF 1 "register_operand")
7599 (match_dup 1))
7600 (parallel [(const_int 0)
7601 (const_int 2)])))]
7602 "TARGET_SSE3 && reload_completed"
7603 [(const_int 0)]
7604 {
7605 rtx low = gen_rtx_REG (DFmode, REGNO (operands[1]));
7606 emit_move_insn (adjust_address (operands[0], DFmode, 0), low);
7607 emit_move_insn (adjust_address (operands[0], DFmode, 8), low);
7608 DONE;
7609 })
7610
7611 (define_split
7612 [(set (match_operand:V2DF 0 "register_operand")
7613 (vec_select:V2DF
7614 (vec_concat:V4DF
7615 (match_operand:V2DF 1 "memory_operand")
7616 (match_dup 1))
7617 (parallel [(match_operand:SI 2 "const_0_to_1_operand")
7618 (match_operand:SI 3 "const_int_operand")])))]
7619 "TARGET_SSE3 && INTVAL (operands[2]) + 2 == INTVAL (operands[3])"
7620 [(set (match_dup 0) (vec_duplicate:V2DF (match_dup 1)))]
7621 {
7622 operands[1] = adjust_address (operands[1], DFmode, INTVAL (operands[2]) * 8);
7623 })
7624
7625 (define_insn "avx512f_vmscalef<mode><round_name>"
7626 [(set (match_operand:VF_128 0 "register_operand" "=v")
7627 (vec_merge:VF_128
7628 (unspec:VF_128
7629 [(match_operand:VF_128 1 "register_operand" "v")
7630 (match_operand:VF_128 2 "<round_nimm_predicate>" "<round_constraint>")]
7631 UNSPEC_SCALEF)
7632 (match_dup 1)
7633 (const_int 1)))]
7634 "TARGET_AVX512F"
7635 "vscalef<ssescalarmodesuffix>\t{<round_op3>%2, %1, %0|%0, %1, %2<round_op3>}"
7636 [(set_attr "prefix" "evex")
7637 (set_attr "mode" "<ssescalarmode>")])
7638
7639 (define_insn "<avx512>_scalef<mode><mask_name><round_name>"
7640 [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v")
7641 (unspec:VF_AVX512VL
7642 [(match_operand:VF_AVX512VL 1 "register_operand" "v")
7643 (match_operand:VF_AVX512VL 2 "nonimmediate_operand" "<round_constraint>")]
7644 UNSPEC_SCALEF))]
7645 "TARGET_AVX512F"
7646 "vscalef<ssemodesuffix>\t{<round_mask_op3>%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2<round_mask_op3>}"
7647 [(set_attr "prefix" "evex")
7648 (set_attr "mode" "<MODE>")])
7649
7650 (define_expand "<avx512>_vternlog<mode>_maskz"
7651 [(match_operand:VI48_AVX512VL 0 "register_operand")
7652 (match_operand:VI48_AVX512VL 1 "register_operand")
7653 (match_operand:VI48_AVX512VL 2 "register_operand")
7654 (match_operand:VI48_AVX512VL 3 "nonimmediate_operand")
7655 (match_operand:SI 4 "const_0_to_255_operand")
7656 (match_operand:<avx512fmaskmode> 5 "register_operand")]
7657 "TARGET_AVX512F"
7658 {
7659 emit_insn (gen_<avx512>_vternlog<mode>_maskz_1 (
7660 operands[0], operands[1], operands[2], operands[3],
7661 operands[4], CONST0_RTX (<MODE>mode), operands[5]));
7662 DONE;
7663 })
7664
7665 (define_insn "<avx512>_vternlog<mode><sd_maskz_name>"
7666 [(set (match_operand:VI48_AVX512VL 0 "register_operand" "=v")
7667 (unspec:VI48_AVX512VL
7668 [(match_operand:VI48_AVX512VL 1 "register_operand" "0")
7669 (match_operand:VI48_AVX512VL 2 "register_operand" "v")
7670 (match_operand:VI48_AVX512VL 3 "nonimmediate_operand" "vm")
7671 (match_operand:SI 4 "const_0_to_255_operand")]
7672 UNSPEC_VTERNLOG))]
7673 "TARGET_AVX512F"
7674 "vpternlog<ssemodesuffix>\t{%4, %3, %2, %0<sd_mask_op5>|%0<sd_mask_op5>, %2, %3, %4}"
7675 [(set_attr "type" "sselog")
7676 (set_attr "prefix" "evex")
7677 (set_attr "mode" "<sseinsnmode>")])
7678
7679 (define_insn "<avx512>_vternlog<mode>_mask"
7680 [(set (match_operand:VI48_AVX512VL 0 "register_operand" "=v")
7681 (vec_merge:VI48_AVX512VL
7682 (unspec:VI48_AVX512VL
7683 [(match_operand:VI48_AVX512VL 1 "register_operand" "0")
7684 (match_operand:VI48_AVX512VL 2 "register_operand" "v")
7685 (match_operand:VI48_AVX512VL 3 "nonimmediate_operand" "vm")
7686 (match_operand:SI 4 "const_0_to_255_operand")]
7687 UNSPEC_VTERNLOG)
7688 (match_dup 1)
7689 (match_operand:<avx512fmaskmode> 5 "register_operand" "Yk")))]
7690 "TARGET_AVX512F"
7691 "vpternlog<ssemodesuffix>\t{%4, %3, %2, %0%{%5%}|%0%{%5%}, %2, %3, %4}"
7692 [(set_attr "type" "sselog")
7693 (set_attr "prefix" "evex")
7694 (set_attr "mode" "<sseinsnmode>")])
7695
7696 (define_insn "<avx512>_getexp<mode><mask_name><round_saeonly_name>"
7697 [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v")
7698 (unspec:VF_AVX512VL [(match_operand:VF_AVX512VL 1 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")]
7699 UNSPEC_GETEXP))]
7700 "TARGET_AVX512F"
7701 "vgetexp<ssemodesuffix>\t{<round_saeonly_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_saeonly_mask_op2>}";
7702 [(set_attr "prefix" "evex")
7703 (set_attr "mode" "<MODE>")])
7704
7705 (define_insn "avx512f_sgetexp<mode><round_saeonly_name>"
7706 [(set (match_operand:VF_128 0 "register_operand" "=v")
7707 (vec_merge:VF_128
7708 (unspec:VF_128
7709 [(match_operand:VF_128 1 "register_operand" "v")
7710 (match_operand:VF_128 2 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")]
7711 UNSPEC_GETEXP)
7712 (match_dup 1)
7713 (const_int 1)))]
7714 "TARGET_AVX512F"
7715 "vgetexp<ssescalarmodesuffix>\t{<round_saeonly_op3>%2, %1, %0|%0, %1, %2<round_saeonly_op3>}";
7716 [(set_attr "prefix" "evex")
7717 (set_attr "mode" "<ssescalarmode>")])
7718
7719 (define_insn "<mask_codefor><avx512>_align<mode><mask_name>"
7720 [(set (match_operand:VI48_AVX512VL 0 "register_operand" "=v")
7721 (unspec:VI48_AVX512VL [(match_operand:VI48_AVX512VL 1 "register_operand" "v")
7722 (match_operand:VI48_AVX512VL 2 "nonimmediate_operand" "vm")
7723 (match_operand:SI 3 "const_0_to_255_operand")]
7724 UNSPEC_ALIGN))]
7725 "TARGET_AVX512F"
7726 "valign<ssemodesuffix>\t{%3, %2, %1, %0<mask_operand4>|%0<mask_operand4>, %1, %2, %3}";
7727 [(set_attr "prefix" "evex")
7728 (set_attr "mode" "<sseinsnmode>")])
7729
7730 (define_expand "avx512f_shufps512_mask"
7731 [(match_operand:V16SF 0 "register_operand")
7732 (match_operand:V16SF 1 "register_operand")
7733 (match_operand:V16SF 2 "nonimmediate_operand")
7734 (match_operand:SI 3 "const_0_to_255_operand")
7735 (match_operand:V16SF 4 "register_operand")
7736 (match_operand:HI 5 "register_operand")]
7737 "TARGET_AVX512F"
7738 {
7739 int mask = INTVAL (operands[3]);
7740 emit_insn (gen_avx512f_shufps512_1_mask (operands[0], operands[1], operands[2],
7741 GEN_INT ((mask >> 0) & 3),
7742 GEN_INT ((mask >> 2) & 3),
7743 GEN_INT (((mask >> 4) & 3) + 16),
7744 GEN_INT (((mask >> 6) & 3) + 16),
7745 GEN_INT (((mask >> 0) & 3) + 4),
7746 GEN_INT (((mask >> 2) & 3) + 4),
7747 GEN_INT (((mask >> 4) & 3) + 20),
7748 GEN_INT (((mask >> 6) & 3) + 20),
7749 GEN_INT (((mask >> 0) & 3) + 8),
7750 GEN_INT (((mask >> 2) & 3) + 8),
7751 GEN_INT (((mask >> 4) & 3) + 24),
7752 GEN_INT (((mask >> 6) & 3) + 24),
7753 GEN_INT (((mask >> 0) & 3) + 12),
7754 GEN_INT (((mask >> 2) & 3) + 12),
7755 GEN_INT (((mask >> 4) & 3) + 28),
7756 GEN_INT (((mask >> 6) & 3) + 28),
7757 operands[4], operands[5]));
7758 DONE;
7759 })
7760
7761
7762 (define_expand "<avx512>_fixupimm<mode>_maskz<round_saeonly_expand_name>"
7763 [(match_operand:VF_AVX512VL 0 "register_operand")
7764 (match_operand:VF_AVX512VL 1 "register_operand")
7765 (match_operand:VF_AVX512VL 2 "register_operand")
7766 (match_operand:<sseintvecmode> 3 "<round_saeonly_expand_nimm_predicate>")
7767 (match_operand:SI 4 "const_0_to_255_operand")
7768 (match_operand:<avx512fmaskmode> 5 "register_operand")]
7769 "TARGET_AVX512F"
7770 {
7771 emit_insn (gen_<avx512>_fixupimm<mode>_maskz_1<round_saeonly_expand_name> (
7772 operands[0], operands[1], operands[2], operands[3],
7773 operands[4], CONST0_RTX (<MODE>mode), operands[5]
7774 <round_saeonly_expand_operand6>));
7775 DONE;
7776 })
7777
7778 (define_insn "<avx512>_fixupimm<mode><sd_maskz_name><round_saeonly_name>"
7779 [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v")
7780 (unspec:VF_AVX512VL
7781 [(match_operand:VF_AVX512VL 1 "register_operand" "0")
7782 (match_operand:VF_AVX512VL 2 "register_operand" "v")
7783 (match_operand:<sseintvecmode> 3 "nonimmediate_operand" "<round_saeonly_constraint>")
7784 (match_operand:SI 4 "const_0_to_255_operand")]
7785 UNSPEC_FIXUPIMM))]
7786 "TARGET_AVX512F"
7787 "vfixupimm<ssemodesuffix>\t{%4, <round_saeonly_sd_mask_op5>%3, %2, %0<sd_mask_op5>|%0<sd_mask_op5>, %2, %3<round_saeonly_sd_mask_op5>, %4}";
7788 [(set_attr "prefix" "evex")
7789 (set_attr "mode" "<MODE>")])
7790
7791 (define_insn "<avx512>_fixupimm<mode>_mask<round_saeonly_name>"
7792 [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v")
7793 (vec_merge:VF_AVX512VL
7794 (unspec:VF_AVX512VL
7795 [(match_operand:VF_AVX512VL 1 "register_operand" "0")
7796 (match_operand:VF_AVX512VL 2 "register_operand" "v")
7797 (match_operand:<sseintvecmode> 3 "nonimmediate_operand" "<round_saeonly_constraint>")
7798 (match_operand:SI 4 "const_0_to_255_operand")]
7799 UNSPEC_FIXUPIMM)
7800 (match_dup 1)
7801 (match_operand:<avx512fmaskmode> 5 "register_operand" "Yk")))]
7802 "TARGET_AVX512F"
7803 "vfixupimm<ssemodesuffix>\t{%4, <round_saeonly_op6>%3, %2, %0%{%5%}|%0%{%5%}, %2, %3<round_saeonly_op6>, %4}";
7804 [(set_attr "prefix" "evex")
7805 (set_attr "mode" "<MODE>")])
7806
7807 (define_expand "avx512f_sfixupimm<mode>_maskz<round_saeonly_expand_name>"
7808 [(match_operand:VF_128 0 "register_operand")
7809 (match_operand:VF_128 1 "register_operand")
7810 (match_operand:VF_128 2 "register_operand")
7811 (match_operand:<sseintvecmode> 3 "<round_saeonly_expand_nimm_predicate>")
7812 (match_operand:SI 4 "const_0_to_255_operand")
7813 (match_operand:<avx512fmaskmode> 5 "register_operand")]
7814 "TARGET_AVX512F"
7815 {
7816 emit_insn (gen_avx512f_sfixupimm<mode>_maskz_1<round_saeonly_expand_name> (
7817 operands[0], operands[1], operands[2], operands[3],
7818 operands[4], CONST0_RTX (<MODE>mode), operands[5]
7819 <round_saeonly_expand_operand6>));
7820 DONE;
7821 })
7822
7823 (define_insn "avx512f_sfixupimm<mode><sd_maskz_name><round_saeonly_name>"
7824 [(set (match_operand:VF_128 0 "register_operand" "=v")
7825 (vec_merge:VF_128
7826 (unspec:VF_128
7827 [(match_operand:VF_128 1 "register_operand" "0")
7828 (match_operand:VF_128 2 "register_operand" "v")
7829 (match_operand:<sseintvecmode> 3 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")
7830 (match_operand:SI 4 "const_0_to_255_operand")]
7831 UNSPEC_FIXUPIMM)
7832 (match_dup 1)
7833 (const_int 1)))]
7834 "TARGET_AVX512F"
7835 "vfixupimm<ssescalarmodesuffix>\t{%4, <round_saeonly_sd_mask_op5>%3, %2, %0<sd_mask_op5>|%0<sd_mask_op5>, %2, %3<round_saeonly_sd_mask_op5>, %4}";
7836 [(set_attr "prefix" "evex")
7837 (set_attr "mode" "<ssescalarmode>")])
7838
7839 (define_insn "avx512f_sfixupimm<mode>_mask<round_saeonly_name>"
7840 [(set (match_operand:VF_128 0 "register_operand" "=v")
7841 (vec_merge:VF_128
7842 (vec_merge:VF_128
7843 (unspec:VF_128
7844 [(match_operand:VF_128 1 "register_operand" "0")
7845 (match_operand:VF_128 2 "register_operand" "v")
7846 (match_operand:<sseintvecmode> 3 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")
7847 (match_operand:SI 4 "const_0_to_255_operand")]
7848 UNSPEC_FIXUPIMM)
7849 (match_dup 1)
7850 (const_int 1))
7851 (match_dup 1)
7852 (match_operand:<avx512fmaskmode> 5 "register_operand" "Yk")))]
7853 "TARGET_AVX512F"
7854 "vfixupimm<ssescalarmodesuffix>\t{%4, <round_saeonly_op6>%3, %2, %0%{%5%}|%0%{%5%}, %2, %3<round_saeonly_op6>, %4}";
7855 [(set_attr "prefix" "evex")
7856 (set_attr "mode" "<ssescalarmode>")])
7857
7858 (define_insn "<avx512>_rndscale<mode><mask_name><round_saeonly_name>"
7859 [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v")
7860 (unspec:VF_AVX512VL
7861 [(match_operand:VF_AVX512VL 1 "nonimmediate_operand" "<round_saeonly_constraint>")
7862 (match_operand:SI 2 "const_0_to_255_operand")]
7863 UNSPEC_ROUND))]
7864 "TARGET_AVX512F"
7865 "vrndscale<ssemodesuffix>\t{%2, <round_saeonly_mask_op3>%1, %0<mask_operand3>|%0<mask_operand3>, %1<round_saeonly_mask_op3>, %2}"
7866 [(set_attr "length_immediate" "1")
7867 (set_attr "prefix" "evex")
7868 (set_attr "mode" "<MODE>")])
7869
7870 (define_insn "avx512f_rndscale<mode><round_saeonly_name>"
7871 [(set (match_operand:VF_128 0 "register_operand" "=v")
7872 (vec_merge:VF_128
7873 (unspec:VF_128
7874 [(match_operand:VF_128 1 "register_operand" "v")
7875 (match_operand:VF_128 2 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")
7876 (match_operand:SI 3 "const_0_to_255_operand")]
7877 UNSPEC_ROUND)
7878 (match_dup 1)
7879 (const_int 1)))]
7880 "TARGET_AVX512F"
7881 "vrndscale<ssescalarmodesuffix>\t{%3, <round_saeonly_op4>%2, %1, %0|%0, %1, %2<round_saeonly_op4>, %3}"
7882 [(set_attr "length_immediate" "1")
7883 (set_attr "prefix" "evex")
7884 (set_attr "mode" "<MODE>")])
7885
7886 ;; One bit in mask selects 2 elements.
7887 (define_insn "avx512f_shufps512_1<mask_name>"
7888 [(set (match_operand:V16SF 0 "register_operand" "=v")
7889 (vec_select:V16SF
7890 (vec_concat:V32SF
7891 (match_operand:V16SF 1 "register_operand" "v")
7892 (match_operand:V16SF 2 "nonimmediate_operand" "vm"))
7893 (parallel [(match_operand 3 "const_0_to_3_operand")
7894 (match_operand 4 "const_0_to_3_operand")
7895 (match_operand 5 "const_16_to_19_operand")
7896 (match_operand 6 "const_16_to_19_operand")
7897 (match_operand 7 "const_4_to_7_operand")
7898 (match_operand 8 "const_4_to_7_operand")
7899 (match_operand 9 "const_20_to_23_operand")
7900 (match_operand 10 "const_20_to_23_operand")
7901 (match_operand 11 "const_8_to_11_operand")
7902 (match_operand 12 "const_8_to_11_operand")
7903 (match_operand 13 "const_24_to_27_operand")
7904 (match_operand 14 "const_24_to_27_operand")
7905 (match_operand 15 "const_12_to_15_operand")
7906 (match_operand 16 "const_12_to_15_operand")
7907 (match_operand 17 "const_28_to_31_operand")
7908 (match_operand 18 "const_28_to_31_operand")])))]
7909 "TARGET_AVX512F
7910 && (INTVAL (operands[3]) == (INTVAL (operands[7]) - 4)
7911 && INTVAL (operands[4]) == (INTVAL (operands[8]) - 4)
7912 && INTVAL (operands[5]) == (INTVAL (operands[9]) - 4)
7913 && INTVAL (operands[6]) == (INTVAL (operands[10]) - 4)
7914 && INTVAL (operands[3]) == (INTVAL (operands[11]) - 8)
7915 && INTVAL (operands[4]) == (INTVAL (operands[12]) - 8)
7916 && INTVAL (operands[5]) == (INTVAL (operands[13]) - 8)
7917 && INTVAL (operands[6]) == (INTVAL (operands[14]) - 8)
7918 && INTVAL (operands[3]) == (INTVAL (operands[15]) - 12)
7919 && INTVAL (operands[4]) == (INTVAL (operands[16]) - 12)
7920 && INTVAL (operands[5]) == (INTVAL (operands[17]) - 12)
7921 && INTVAL (operands[6]) == (INTVAL (operands[18]) - 12))"
7922 {
7923 int mask;
7924 mask = INTVAL (operands[3]);
7925 mask |= INTVAL (operands[4]) << 2;
7926 mask |= (INTVAL (operands[5]) - 16) << 4;
7927 mask |= (INTVAL (operands[6]) - 16) << 6;
7928 operands[3] = GEN_INT (mask);
7929
7930 return "vshufps\t{%3, %2, %1, %0<mask_operand19>|%0<mask_operand19>, %1, %2, %3}";
7931 }
7932 [(set_attr "type" "sselog")
7933 (set_attr "length_immediate" "1")
7934 (set_attr "prefix" "evex")
7935 (set_attr "mode" "V16SF")])
7936
7937 (define_expand "avx512f_shufpd512_mask"
7938 [(match_operand:V8DF 0 "register_operand")
7939 (match_operand:V8DF 1 "register_operand")
7940 (match_operand:V8DF 2 "nonimmediate_operand")
7941 (match_operand:SI 3 "const_0_to_255_operand")
7942 (match_operand:V8DF 4 "register_operand")
7943 (match_operand:QI 5 "register_operand")]
7944 "TARGET_AVX512F"
7945 {
7946 int mask = INTVAL (operands[3]);
7947 emit_insn (gen_avx512f_shufpd512_1_mask (operands[0], operands[1], operands[2],
7948 GEN_INT (mask & 1),
7949 GEN_INT (mask & 2 ? 9 : 8),
7950 GEN_INT (mask & 4 ? 3 : 2),
7951 GEN_INT (mask & 8 ? 11 : 10),
7952 GEN_INT (mask & 16 ? 5 : 4),
7953 GEN_INT (mask & 32 ? 13 : 12),
7954 GEN_INT (mask & 64 ? 7 : 6),
7955 GEN_INT (mask & 128 ? 15 : 14),
7956 operands[4], operands[5]));
7957 DONE;
7958 })
7959
7960 (define_insn "avx512f_shufpd512_1<mask_name>"
7961 [(set (match_operand:V8DF 0 "register_operand" "=v")
7962 (vec_select:V8DF
7963 (vec_concat:V16DF
7964 (match_operand:V8DF 1 "register_operand" "v")
7965 (match_operand:V8DF 2 "nonimmediate_operand" "vm"))
7966 (parallel [(match_operand 3 "const_0_to_1_operand")
7967 (match_operand 4 "const_8_to_9_operand")
7968 (match_operand 5 "const_2_to_3_operand")
7969 (match_operand 6 "const_10_to_11_operand")
7970 (match_operand 7 "const_4_to_5_operand")
7971 (match_operand 8 "const_12_to_13_operand")
7972 (match_operand 9 "const_6_to_7_operand")
7973 (match_operand 10 "const_14_to_15_operand")])))]
7974 "TARGET_AVX512F"
7975 {
7976 int mask;
7977 mask = INTVAL (operands[3]);
7978 mask |= (INTVAL (operands[4]) - 8) << 1;
7979 mask |= (INTVAL (operands[5]) - 2) << 2;
7980 mask |= (INTVAL (operands[6]) - 10) << 3;
7981 mask |= (INTVAL (operands[7]) - 4) << 4;
7982 mask |= (INTVAL (operands[8]) - 12) << 5;
7983 mask |= (INTVAL (operands[9]) - 6) << 6;
7984 mask |= (INTVAL (operands[10]) - 14) << 7;
7985 operands[3] = GEN_INT (mask);
7986
7987 return "vshufpd\t{%3, %2, %1, %0<mask_operand11>|%0<mask_operand11>, %1, %2, %3}";
7988 }
7989 [(set_attr "type" "sselog")
7990 (set_attr "length_immediate" "1")
7991 (set_attr "prefix" "evex")
7992 (set_attr "mode" "V8DF")])
7993
7994 (define_expand "avx_shufpd256<mask_expand4_name>"
7995 [(match_operand:V4DF 0 "register_operand")
7996 (match_operand:V4DF 1 "register_operand")
7997 (match_operand:V4DF 2 "nonimmediate_operand")
7998 (match_operand:SI 3 "const_int_operand")]
7999 "TARGET_AVX"
8000 {
8001 int mask = INTVAL (operands[3]);
8002 emit_insn (gen_avx_shufpd256_1<mask_expand4_name> (operands[0],
8003 operands[1],
8004 operands[2],
8005 GEN_INT (mask & 1),
8006 GEN_INT (mask & 2 ? 5 : 4),
8007 GEN_INT (mask & 4 ? 3 : 2),
8008 GEN_INT (mask & 8 ? 7 : 6)
8009 <mask_expand4_args>));
8010 DONE;
8011 })
8012
8013 (define_insn "avx_shufpd256_1<mask_name>"
8014 [(set (match_operand:V4DF 0 "register_operand" "=v")
8015 (vec_select:V4DF
8016 (vec_concat:V8DF
8017 (match_operand:V4DF 1 "register_operand" "v")
8018 (match_operand:V4DF 2 "nonimmediate_operand" "vm"))
8019 (parallel [(match_operand 3 "const_0_to_1_operand")
8020 (match_operand 4 "const_4_to_5_operand")
8021 (match_operand 5 "const_2_to_3_operand")
8022 (match_operand 6 "const_6_to_7_operand")])))]
8023 "TARGET_AVX && <mask_avx512vl_condition>"
8024 {
8025 int mask;
8026 mask = INTVAL (operands[3]);
8027 mask |= (INTVAL (operands[4]) - 4) << 1;
8028 mask |= (INTVAL (operands[5]) - 2) << 2;
8029 mask |= (INTVAL (operands[6]) - 6) << 3;
8030 operands[3] = GEN_INT (mask);
8031
8032 return "vshufpd\t{%3, %2, %1, %0<mask_operand7>|%0<mask_operand7>, %1, %2, %3}";
8033 }
8034 [(set_attr "type" "sseshuf")
8035 (set_attr "length_immediate" "1")
8036 (set_attr "prefix" "vex")
8037 (set_attr "mode" "V4DF")])
8038
8039 (define_expand "sse2_shufpd<mask_expand4_name>"
8040 [(match_operand:V2DF 0 "register_operand")
8041 (match_operand:V2DF 1 "register_operand")
8042 (match_operand:V2DF 2 "nonimmediate_operand")
8043 (match_operand:SI 3 "const_int_operand")]
8044 "TARGET_SSE2"
8045 {
8046 int mask = INTVAL (operands[3]);
8047 emit_insn (gen_sse2_shufpd_v2df<mask_expand4_name> (operands[0], operands[1],
8048 operands[2], GEN_INT (mask & 1),
8049 GEN_INT (mask & 2 ? 3 : 2)
8050 <mask_expand4_args>));
8051 DONE;
8052 })
8053
8054 (define_insn "sse2_shufpd_v2df_mask"
8055 [(set (match_operand:V2DF 0 "register_operand" "=v")
8056 (vec_merge:V2DF
8057 (vec_select:V2DF
8058 (vec_concat:V4DF
8059 (match_operand:V2DF 1 "register_operand" "v")
8060 (match_operand:V2DF 2 "nonimmediate_operand" "vm"))
8061 (parallel [(match_operand 3 "const_0_to_1_operand")
8062 (match_operand 4 "const_2_to_3_operand")]))
8063 (match_operand:V2DF 5 "vector_move_operand" "0C")
8064 (match_operand:QI 6 "register_operand" "Yk")))]
8065 "TARGET_AVX512VL"
8066 {
8067 int mask;
8068 mask = INTVAL (operands[3]);
8069 mask |= (INTVAL (operands[4]) - 2) << 1;
8070 operands[3] = GEN_INT (mask);
8071
8072 return "vshufpd\t{%3, %2, %1, %0%{%6%}%N5|%0%{6%}%N5, %1, %2, %3}";
8073 }
8074 [(set_attr "type" "sseshuf")
8075 (set_attr "length_immediate" "1")
8076 (set_attr "prefix" "evex")
8077 (set_attr "mode" "V2DF")])
8078
8079 ;; punpcklqdq and punpckhqdq are shorter than shufpd.
8080 (define_insn "avx2_interleave_highv4di<mask_name>"
8081 [(set (match_operand:V4DI 0 "register_operand" "=v")
8082 (vec_select:V4DI
8083 (vec_concat:V8DI
8084 (match_operand:V4DI 1 "register_operand" "v")
8085 (match_operand:V4DI 2 "nonimmediate_operand" "vm"))
8086 (parallel [(const_int 1)
8087 (const_int 5)
8088 (const_int 3)
8089 (const_int 7)])))]
8090 "TARGET_AVX2 && <mask_avx512vl_condition>"
8091 "vpunpckhqdq\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
8092 [(set_attr "type" "sselog")
8093 (set_attr "prefix" "vex")
8094 (set_attr "mode" "OI")])
8095
8096 (define_insn "<mask_codefor>avx512f_interleave_highv8di<mask_name>"
8097 [(set (match_operand:V8DI 0 "register_operand" "=v")
8098 (vec_select:V8DI
8099 (vec_concat:V16DI
8100 (match_operand:V8DI 1 "register_operand" "v")
8101 (match_operand:V8DI 2 "nonimmediate_operand" "vm"))
8102 (parallel [(const_int 1) (const_int 9)
8103 (const_int 3) (const_int 11)
8104 (const_int 5) (const_int 13)
8105 (const_int 7) (const_int 15)])))]
8106 "TARGET_AVX512F"
8107 "vpunpckhqdq\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
8108 [(set_attr "type" "sselog")
8109 (set_attr "prefix" "evex")
8110 (set_attr "mode" "XI")])
8111
8112 (define_insn "vec_interleave_highv2di<mask_name>"
8113 [(set (match_operand:V2DI 0 "register_operand" "=x,v")
8114 (vec_select:V2DI
8115 (vec_concat:V4DI
8116 (match_operand:V2DI 1 "register_operand" "0,v")
8117 (match_operand:V2DI 2 "nonimmediate_operand" "xm,vm"))
8118 (parallel [(const_int 1)
8119 (const_int 3)])))]
8120 "TARGET_SSE2 && <mask_avx512vl_condition>"
8121 "@
8122 punpckhqdq\t{%2, %0|%0, %2}
8123 vpunpckhqdq\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
8124 [(set_attr "isa" "noavx,avx")
8125 (set_attr "type" "sselog")
8126 (set_attr "prefix_data16" "1,*")
8127 (set_attr "prefix" "orig,<mask_prefix>")
8128 (set_attr "mode" "TI")])
8129
8130 (define_insn "avx2_interleave_lowv4di<mask_name>"
8131 [(set (match_operand:V4DI 0 "register_operand" "=v")
8132 (vec_select:V4DI
8133 (vec_concat:V8DI
8134 (match_operand:V4DI 1 "register_operand" "v")
8135 (match_operand:V4DI 2 "nonimmediate_operand" "vm"))
8136 (parallel [(const_int 0)
8137 (const_int 4)
8138 (const_int 2)
8139 (const_int 6)])))]
8140 "TARGET_AVX2 && <mask_avx512vl_condition>"
8141 "vpunpcklqdq\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
8142 [(set_attr "type" "sselog")
8143 (set_attr "prefix" "vex")
8144 (set_attr "mode" "OI")])
8145
8146 (define_insn "<mask_codefor>avx512f_interleave_lowv8di<mask_name>"
8147 [(set (match_operand:V8DI 0 "register_operand" "=v")
8148 (vec_select:V8DI
8149 (vec_concat:V16DI
8150 (match_operand:V8DI 1 "register_operand" "v")
8151 (match_operand:V8DI 2 "nonimmediate_operand" "vm"))
8152 (parallel [(const_int 0) (const_int 8)
8153 (const_int 2) (const_int 10)
8154 (const_int 4) (const_int 12)
8155 (const_int 6) (const_int 14)])))]
8156 "TARGET_AVX512F"
8157 "vpunpcklqdq\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
8158 [(set_attr "type" "sselog")
8159 (set_attr "prefix" "evex")
8160 (set_attr "mode" "XI")])
8161
8162 (define_insn "vec_interleave_lowv2di<mask_name>"
8163 [(set (match_operand:V2DI 0 "register_operand" "=x,v")
8164 (vec_select:V2DI
8165 (vec_concat:V4DI
8166 (match_operand:V2DI 1 "register_operand" "0,v")
8167 (match_operand:V2DI 2 "nonimmediate_operand" "xm,vm"))
8168 (parallel [(const_int 0)
8169 (const_int 2)])))]
8170 "TARGET_SSE2 && <mask_avx512vl_condition>"
8171 "@
8172 punpcklqdq\t{%2, %0|%0, %2}
8173 vpunpcklqdq\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
8174 [(set_attr "isa" "noavx,avx")
8175 (set_attr "type" "sselog")
8176 (set_attr "prefix_data16" "1,*")
8177 (set_attr "prefix" "orig,vex")
8178 (set_attr "mode" "TI")])
8179
8180 (define_insn "sse2_shufpd_<mode>"
8181 [(set (match_operand:VI8F_128 0 "register_operand" "=x,x")
8182 (vec_select:VI8F_128
8183 (vec_concat:<ssedoublevecmode>
8184 (match_operand:VI8F_128 1 "register_operand" "0,x")
8185 (match_operand:VI8F_128 2 "nonimmediate_operand" "xm,xm"))
8186 (parallel [(match_operand 3 "const_0_to_1_operand")
8187 (match_operand 4 "const_2_to_3_operand")])))]
8188 "TARGET_SSE2"
8189 {
8190 int mask;
8191 mask = INTVAL (operands[3]);
8192 mask |= (INTVAL (operands[4]) - 2) << 1;
8193 operands[3] = GEN_INT (mask);
8194
8195 switch (which_alternative)
8196 {
8197 case 0:
8198 return "shufpd\t{%3, %2, %0|%0, %2, %3}";
8199 case 1:
8200 return "vshufpd\t{%3, %2, %1, %0|%0, %1, %2, %3}";
8201 default:
8202 gcc_unreachable ();
8203 }
8204 }
8205 [(set_attr "isa" "noavx,avx")
8206 (set_attr "type" "sseshuf")
8207 (set_attr "length_immediate" "1")
8208 (set_attr "prefix" "orig,vex")
8209 (set_attr "mode" "V2DF")])
8210
8211 ;; Avoid combining registers from different units in a single alternative,
8212 ;; see comment above inline_secondary_memory_needed function in i386.c
8213 (define_insn "sse2_storehpd"
8214 [(set (match_operand:DF 0 "nonimmediate_operand" "=m,x,x,x,*f,r")
8215 (vec_select:DF
8216 (match_operand:V2DF 1 "nonimmediate_operand" " x,0,x,o,o,o")
8217 (parallel [(const_int 1)])))]
8218 "TARGET_SSE2 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
8219 "@
8220 %vmovhpd\t{%1, %0|%0, %1}
8221 unpckhpd\t%0, %0
8222 vunpckhpd\t{%d1, %0|%0, %d1}
8223 #
8224 #
8225 #"
8226 [(set_attr "isa" "*,noavx,avx,*,*,*")
8227 (set_attr "type" "ssemov,sselog1,sselog1,ssemov,fmov,imov")
8228 (set (attr "prefix_data16")
8229 (if_then_else
8230 (and (eq_attr "alternative" "0")
8231 (not (match_test "TARGET_AVX")))
8232 (const_string "1")
8233 (const_string "*")))
8234 (set_attr "prefix" "maybe_vex,orig,vex,*,*,*")
8235 (set_attr "mode" "V1DF,V1DF,V2DF,DF,DF,DF")])
8236
8237 (define_split
8238 [(set (match_operand:DF 0 "register_operand")
8239 (vec_select:DF
8240 (match_operand:V2DF 1 "memory_operand")
8241 (parallel [(const_int 1)])))]
8242 "TARGET_SSE2 && reload_completed"
8243 [(set (match_dup 0) (match_dup 1))]
8244 "operands[1] = adjust_address (operands[1], DFmode, 8);")
8245
8246 (define_insn "*vec_extractv2df_1_sse"
8247 [(set (match_operand:DF 0 "nonimmediate_operand" "=m,x,x")
8248 (vec_select:DF
8249 (match_operand:V2DF 1 "nonimmediate_operand" "x,x,o")
8250 (parallel [(const_int 1)])))]
8251 "!TARGET_SSE2 && TARGET_SSE
8252 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
8253 "@
8254 movhps\t{%1, %0|%q0, %1}
8255 movhlps\t{%1, %0|%0, %1}
8256 movlps\t{%H1, %0|%0, %H1}"
8257 [(set_attr "type" "ssemov")
8258 (set_attr "ssememalign" "64")
8259 (set_attr "mode" "V2SF,V4SF,V2SF")])
8260
8261 ;; Avoid combining registers from different units in a single alternative,
8262 ;; see comment above inline_secondary_memory_needed function in i386.c
8263 (define_insn "sse2_storelpd"
8264 [(set (match_operand:DF 0 "nonimmediate_operand" "=m,x,x,*f,r")
8265 (vec_select:DF
8266 (match_operand:V2DF 1 "nonimmediate_operand" " x,x,m,m,m")
8267 (parallel [(const_int 0)])))]
8268 "TARGET_SSE2 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
8269 "@
8270 %vmovlpd\t{%1, %0|%0, %1}
8271 #
8272 #
8273 #
8274 #"
8275 [(set_attr "type" "ssemov,ssemov,ssemov,fmov,imov")
8276 (set_attr "prefix_data16" "1,*,*,*,*")
8277 (set_attr "prefix" "maybe_vex")
8278 (set_attr "mode" "V1DF,DF,DF,DF,DF")])
8279
8280 (define_split
8281 [(set (match_operand:DF 0 "register_operand")
8282 (vec_select:DF
8283 (match_operand:V2DF 1 "nonimmediate_operand")
8284 (parallel [(const_int 0)])))]
8285 "TARGET_SSE2 && reload_completed"
8286 [(set (match_dup 0) (match_dup 1))]
8287 {
8288 if (REG_P (operands[1]))
8289 operands[1] = gen_rtx_REG (DFmode, REGNO (operands[1]));
8290 else
8291 operands[1] = adjust_address (operands[1], DFmode, 0);
8292 })
8293
8294 (define_insn "*vec_extractv2df_0_sse"
8295 [(set (match_operand:DF 0 "nonimmediate_operand" "=m,x,x")
8296 (vec_select:DF
8297 (match_operand:V2DF 1 "nonimmediate_operand" "x,x,m")
8298 (parallel [(const_int 0)])))]
8299 "!TARGET_SSE2 && TARGET_SSE
8300 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
8301 "@
8302 movlps\t{%1, %0|%0, %1}
8303 movaps\t{%1, %0|%0, %1}
8304 movlps\t{%1, %0|%0, %q1}"
8305 [(set_attr "type" "ssemov")
8306 (set_attr "mode" "V2SF,V4SF,V2SF")])
8307
8308 (define_expand "sse2_loadhpd_exp"
8309 [(set (match_operand:V2DF 0 "nonimmediate_operand")
8310 (vec_concat:V2DF
8311 (vec_select:DF
8312 (match_operand:V2DF 1 "nonimmediate_operand")
8313 (parallel [(const_int 0)]))
8314 (match_operand:DF 2 "nonimmediate_operand")))]
8315 "TARGET_SSE2"
8316 {
8317 rtx dst = ix86_fixup_binary_operands (UNKNOWN, V2DFmode, operands);
8318
8319 emit_insn (gen_sse2_loadhpd (dst, operands[1], operands[2]));
8320
8321 /* Fix up the destination if needed. */
8322 if (dst != operands[0])
8323 emit_move_insn (operands[0], dst);
8324
8325 DONE;
8326 })
8327
8328 ;; Avoid combining registers from different units in a single alternative,
8329 ;; see comment above inline_secondary_memory_needed function in i386.c
8330 (define_insn "sse2_loadhpd"
8331 [(set (match_operand:V2DF 0 "nonimmediate_operand"
8332 "=x,x,x,x,o,o ,o")
8333 (vec_concat:V2DF
8334 (vec_select:DF
8335 (match_operand:V2DF 1 "nonimmediate_operand"
8336 " 0,x,0,x,0,0 ,0")
8337 (parallel [(const_int 0)]))
8338 (match_operand:DF 2 "nonimmediate_operand"
8339 " m,m,x,x,x,*f,r")))]
8340 "TARGET_SSE2 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
8341 "@
8342 movhpd\t{%2, %0|%0, %2}
8343 vmovhpd\t{%2, %1, %0|%0, %1, %2}
8344 unpcklpd\t{%2, %0|%0, %2}
8345 vunpcklpd\t{%2, %1, %0|%0, %1, %2}
8346 #
8347 #
8348 #"
8349 [(set_attr "isa" "noavx,avx,noavx,avx,*,*,*")
8350 (set_attr "type" "ssemov,ssemov,sselog,sselog,ssemov,fmov,imov")
8351 (set_attr "ssememalign" "64")
8352 (set_attr "prefix_data16" "1,*,*,*,*,*,*")
8353 (set_attr "prefix" "orig,vex,orig,vex,*,*,*")
8354 (set_attr "mode" "V1DF,V1DF,V2DF,V2DF,DF,DF,DF")])
8355
8356 (define_split
8357 [(set (match_operand:V2DF 0 "memory_operand")
8358 (vec_concat:V2DF
8359 (vec_select:DF (match_dup 0) (parallel [(const_int 0)]))
8360 (match_operand:DF 1 "register_operand")))]
8361 "TARGET_SSE2 && reload_completed"
8362 [(set (match_dup 0) (match_dup 1))]
8363 "operands[0] = adjust_address (operands[0], DFmode, 8);")
8364
8365 (define_expand "sse2_loadlpd_exp"
8366 [(set (match_operand:V2DF 0 "nonimmediate_operand")
8367 (vec_concat:V2DF
8368 (match_operand:DF 2 "nonimmediate_operand")
8369 (vec_select:DF
8370 (match_operand:V2DF 1 "nonimmediate_operand")
8371 (parallel [(const_int 1)]))))]
8372 "TARGET_SSE2"
8373 {
8374 rtx dst = ix86_fixup_binary_operands (UNKNOWN, V2DFmode, operands);
8375
8376 emit_insn (gen_sse2_loadlpd (dst, operands[1], operands[2]));
8377
8378 /* Fix up the destination if needed. */
8379 if (dst != operands[0])
8380 emit_move_insn (operands[0], dst);
8381
8382 DONE;
8383 })
8384
8385 ;; Avoid combining registers from different units in a single alternative,
8386 ;; see comment above inline_secondary_memory_needed function in i386.c
8387 (define_insn "sse2_loadlpd"
8388 [(set (match_operand:V2DF 0 "nonimmediate_operand"
8389 "=x,x,x,x,x,x,x,x,m,m ,m")
8390 (vec_concat:V2DF
8391 (match_operand:DF 2 "nonimmediate_operand"
8392 " m,m,m,x,x,0,0,x,x,*f,r")
8393 (vec_select:DF
8394 (match_operand:V2DF 1 "vector_move_operand"
8395 " C,0,x,0,x,x,o,o,0,0 ,0")
8396 (parallel [(const_int 1)]))))]
8397 "TARGET_SSE2 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
8398 "@
8399 %vmovsd\t{%2, %0|%0, %2}
8400 movlpd\t{%2, %0|%0, %2}
8401 vmovlpd\t{%2, %1, %0|%0, %1, %2}
8402 movsd\t{%2, %0|%0, %2}
8403 vmovsd\t{%2, %1, %0|%0, %1, %2}
8404 shufpd\t{$2, %1, %0|%0, %1, 2}
8405 movhpd\t{%H1, %0|%0, %H1}
8406 vmovhpd\t{%H1, %2, %0|%0, %2, %H1}
8407 #
8408 #
8409 #"
8410 [(set_attr "isa" "*,noavx,avx,noavx,avx,noavx,noavx,avx,*,*,*")
8411 (set (attr "type")
8412 (cond [(eq_attr "alternative" "5")
8413 (const_string "sselog")
8414 (eq_attr "alternative" "9")
8415 (const_string "fmov")
8416 (eq_attr "alternative" "10")
8417 (const_string "imov")
8418 ]
8419 (const_string "ssemov")))
8420 (set_attr "ssememalign" "64")
8421 (set_attr "prefix_data16" "*,1,*,*,*,*,1,*,*,*,*")
8422 (set_attr "length_immediate" "*,*,*,*,*,1,*,*,*,*,*")
8423 (set_attr "prefix" "maybe_vex,orig,vex,orig,vex,orig,orig,vex,*,*,*")
8424 (set_attr "mode" "DF,V1DF,V1DF,V1DF,V1DF,V2DF,V1DF,V1DF,DF,DF,DF")])
8425
8426 (define_split
8427 [(set (match_operand:V2DF 0 "memory_operand")
8428 (vec_concat:V2DF
8429 (match_operand:DF 1 "register_operand")
8430 (vec_select:DF (match_dup 0) (parallel [(const_int 1)]))))]
8431 "TARGET_SSE2 && reload_completed"
8432 [(set (match_dup 0) (match_dup 1))]
8433 "operands[0] = adjust_address (operands[0], DFmode, 0);")
8434
8435 (define_insn "sse2_movsd"
8436 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,x,x,m,x,x,x,o")
8437 (vec_merge:V2DF
8438 (match_operand:V2DF 2 "nonimmediate_operand" " x,x,m,m,x,0,0,x,0")
8439 (match_operand:V2DF 1 "nonimmediate_operand" " 0,x,0,x,0,x,o,o,x")
8440 (const_int 1)))]
8441 "TARGET_SSE2"
8442 "@
8443 movsd\t{%2, %0|%0, %2}
8444 vmovsd\t{%2, %1, %0|%0, %1, %2}
8445 movlpd\t{%2, %0|%0, %q2}
8446 vmovlpd\t{%2, %1, %0|%0, %1, %q2}
8447 %vmovlpd\t{%2, %0|%q0, %2}
8448 shufpd\t{$2, %1, %0|%0, %1, 2}
8449 movhps\t{%H1, %0|%0, %H1}
8450 vmovhps\t{%H1, %2, %0|%0, %2, %H1}
8451 %vmovhps\t{%1, %H0|%H0, %1}"
8452 [(set_attr "isa" "noavx,avx,noavx,avx,*,noavx,noavx,avx,*")
8453 (set (attr "type")
8454 (if_then_else
8455 (eq_attr "alternative" "5")
8456 (const_string "sselog")
8457 (const_string "ssemov")))
8458 (set (attr "prefix_data16")
8459 (if_then_else
8460 (and (eq_attr "alternative" "2,4")
8461 (not (match_test "TARGET_AVX")))
8462 (const_string "1")
8463 (const_string "*")))
8464 (set_attr "length_immediate" "*,*,*,*,*,1,*,*,*")
8465 (set_attr "ssememalign" "64")
8466 (set_attr "prefix" "orig,vex,orig,vex,maybe_vex,orig,orig,vex,maybe_vex")
8467 (set_attr "mode" "DF,DF,V1DF,V1DF,V1DF,V2DF,V1DF,V1DF,V1DF")])
8468
8469 (define_insn "vec_dupv2df<mask_name>"
8470 [(set (match_operand:V2DF 0 "register_operand" "=x,v")
8471 (vec_duplicate:V2DF
8472 (match_operand:DF 1 "nonimmediate_operand" " 0,vm")))]
8473 "TARGET_SSE2 && <mask_avx512vl_condition>"
8474 "@
8475 unpcklpd\t%0, %0
8476 %vmovddup\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
8477 [(set_attr "isa" "noavx,sse3")
8478 (set_attr "type" "sselog1")
8479 (set_attr "prefix" "orig,maybe_vex")
8480 (set_attr "mode" "V2DF,DF")])
8481
8482 (define_insn "*vec_concatv2df"
8483 [(set (match_operand:V2DF 0 "register_operand" "=x,v,v,x,x,v,x,x")
8484 (vec_concat:V2DF
8485 (match_operand:DF 1 "nonimmediate_operand" " 0,v,m,0,x,m,0,0")
8486 (match_operand:DF 2 "vector_move_operand" " x,v,1,m,m,C,x,m")))]
8487 "TARGET_SSE"
8488 "@
8489 unpcklpd\t{%2, %0|%0, %2}
8490 vunpcklpd\t{%2, %1, %0|%0, %1, %2}
8491 %vmovddup\t{%1, %0|%0, %1}
8492 movhpd\t{%2, %0|%0, %2}
8493 vmovhpd\t{%2, %1, %0|%0, %1, %2}
8494 %vmovsd\t{%1, %0|%0, %1}
8495 movlhps\t{%2, %0|%0, %2}
8496 movhps\t{%2, %0|%0, %2}"
8497 [(set_attr "isa" "sse2_noavx,avx,sse3,sse2_noavx,avx,sse2,noavx,noavx")
8498 (set (attr "type")
8499 (if_then_else
8500 (eq_attr "alternative" "0,1,2")
8501 (const_string "sselog")
8502 (const_string "ssemov")))
8503 (set_attr "prefix_data16" "*,*,*,1,*,*,*,*")
8504 (set_attr "prefix" "orig,vex,maybe_vex,orig,vex,maybe_vex,orig,orig")
8505 (set_attr "mode" "V2DF,V2DF,DF,V1DF,V1DF,DF,V4SF,V2SF")])
8506
8507 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
8508 ;;
8509 ;; Parallel integer down-conversion operations
8510 ;;
8511 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
8512
8513 (define_mode_iterator PMOV_DST_MODE_1 [V16QI V16HI V8SI V8HI])
8514 (define_mode_attr pmov_src_mode
8515 [(V16QI "V16SI") (V16HI "V16SI") (V8SI "V8DI") (V8HI "V8DI")])
8516 (define_mode_attr pmov_src_lower
8517 [(V16QI "v16si") (V16HI "v16si") (V8SI "v8di") (V8HI "v8di")])
8518 (define_mode_attr pmov_suff_1
8519 [(V16QI "db") (V16HI "dw") (V8SI "qd") (V8HI "qw")])
8520
8521 (define_insn "*avx512f_<code><pmov_src_lower><mode>2"
8522 [(set (match_operand:PMOV_DST_MODE_1 0 "nonimmediate_operand" "=v,m")
8523 (any_truncate:PMOV_DST_MODE_1
8524 (match_operand:<pmov_src_mode> 1 "register_operand" "v,v")))]
8525 "TARGET_AVX512F"
8526 "vpmov<trunsuffix><pmov_suff_1>\t{%1, %0|%0, %1}"
8527 [(set_attr "type" "ssemov")
8528 (set_attr "memory" "none,store")
8529 (set_attr "prefix" "evex")
8530 (set_attr "mode" "<sseinsnmode>")])
8531
8532 (define_insn "avx512f_<code><pmov_src_lower><mode>2_mask"
8533 [(set (match_operand:PMOV_DST_MODE_1 0 "nonimmediate_operand" "=v,m")
8534 (vec_merge:PMOV_DST_MODE_1
8535 (any_truncate:PMOV_DST_MODE_1
8536 (match_operand:<pmov_src_mode> 1 "register_operand" "v,v"))
8537 (match_operand:PMOV_DST_MODE_1 2 "vector_move_operand" "0C,0")
8538 (match_operand:<avx512fmaskmode> 3 "register_operand" "Yk,Yk")))]
8539 "TARGET_AVX512F"
8540 "vpmov<trunsuffix><pmov_suff_1>\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}"
8541 [(set_attr "type" "ssemov")
8542 (set_attr "memory" "none,store")
8543 (set_attr "prefix" "evex")
8544 (set_attr "mode" "<sseinsnmode>")])
8545
8546 (define_expand "avx512f_<code><pmov_src_lower><mode>2_mask_store"
8547 [(set (match_operand:PMOV_DST_MODE_1 0 "memory_operand")
8548 (vec_merge:PMOV_DST_MODE_1
8549 (any_truncate:PMOV_DST_MODE_1
8550 (match_operand:<pmov_src_mode> 1 "register_operand"))
8551 (match_dup 0)
8552 (match_operand:<avx512fmaskmode> 2 "register_operand")))]
8553 "TARGET_AVX512F")
8554
8555 (define_insn "*avx512bw_<code>v32hiv32qi2"
8556 [(set (match_operand:V32QI 0 "nonimmediate_operand" "=v,m")
8557 (any_truncate:V32QI
8558 (match_operand:V32HI 1 "register_operand" "v,v")))]
8559 "TARGET_AVX512BW"
8560 "vpmov<trunsuffix>wb\t{%1, %0|%0, %1}"
8561 [(set_attr "type" "ssemov")
8562 (set_attr "memory" "none,store")
8563 (set_attr "prefix" "evex")
8564 (set_attr "mode" "XI")])
8565
8566 (define_insn "avx512bw_<code>v32hiv32qi2_mask"
8567 [(set (match_operand:V32QI 0 "nonimmediate_operand" "=v,m")
8568 (vec_merge:V32QI
8569 (any_truncate:V32QI
8570 (match_operand:V32HI 1 "register_operand" "v,v"))
8571 (match_operand:V32QI 2 "vector_move_operand" "0C,0")
8572 (match_operand:SI 3 "register_operand" "Yk,Yk")))]
8573 "TARGET_AVX512BW"
8574 "vpmov<trunsuffix>wb\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}"
8575 [(set_attr "type" "ssemov")
8576 (set_attr "memory" "none,store")
8577 (set_attr "prefix" "evex")
8578 (set_attr "mode" "XI")])
8579
8580 (define_expand "avx512bw_<code>v32hiv32qi2_mask_store"
8581 [(set (match_operand:V32QI 0 "nonimmediate_operand")
8582 (vec_merge:V32QI
8583 (any_truncate:V32QI
8584 (match_operand:V32HI 1 "register_operand"))
8585 (match_dup 0)
8586 (match_operand:SI 2 "register_operand")))]
8587 "TARGET_AVX512BW")
8588
8589 (define_mode_iterator PMOV_DST_MODE_2
8590 [V4SI V8HI (V16QI "TARGET_AVX512BW")])
8591 (define_mode_attr pmov_suff_2
8592 [(V16QI "wb") (V8HI "dw") (V4SI "qd")])
8593
8594 (define_insn "*avx512vl_<code><ssedoublemodelower><mode>2"
8595 [(set (match_operand:PMOV_DST_MODE_2 0 "nonimmediate_operand" "=v,m")
8596 (any_truncate:PMOV_DST_MODE_2
8597 (match_operand:<ssedoublemode> 1 "register_operand" "v,v")))]
8598 "TARGET_AVX512VL"
8599 "vpmov<trunsuffix><pmov_suff_2>\t{%1, %0|%0, %1}"
8600 [(set_attr "type" "ssemov")
8601 (set_attr "memory" "none,store")
8602 (set_attr "prefix" "evex")
8603 (set_attr "mode" "<sseinsnmode>")])
8604
8605 (define_insn "<avx512>_<code><ssedoublemodelower><mode>2_mask"
8606 [(set (match_operand:PMOV_DST_MODE_2 0 "nonimmediate_operand" "=v,m")
8607 (vec_merge:PMOV_DST_MODE_2
8608 (any_truncate:PMOV_DST_MODE_2
8609 (match_operand:<ssedoublemode> 1 "register_operand" "v,v"))
8610 (match_operand:PMOV_DST_MODE_2 2 "vector_move_operand" "0C,0")
8611 (match_operand:<avx512fmaskmode> 3 "register_operand" "Yk,Yk")))]
8612 "TARGET_AVX512VL"
8613 "vpmov<trunsuffix><pmov_suff_2>\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}"
8614 [(set_attr "type" "ssemov")
8615 (set_attr "memory" "none,store")
8616 (set_attr "prefix" "evex")
8617 (set_attr "mode" "<sseinsnmode>")])
8618
8619 (define_expand "<avx512>_<code><ssedoublemodelower><mode>2_mask_store"
8620 [(set (match_operand:PMOV_DST_MODE_2 0 "nonimmediate_operand")
8621 (vec_merge:PMOV_DST_MODE_2
8622 (any_truncate:PMOV_DST_MODE_2
8623 (match_operand:<ssedoublemode> 1 "register_operand"))
8624 (match_dup 0)
8625 (match_operand:<avx512fmaskmode> 2 "register_operand")))]
8626 "TARGET_AVX512VL")
8627
8628 (define_mode_iterator PMOV_SRC_MODE_3 [V4DI V2DI V8SI V4SI (V8HI "TARGET_AVX512BW")])
8629 (define_mode_attr pmov_dst_3
8630 [(V4DI "V4QI") (V2DI "V2QI") (V8SI "V8QI") (V4SI "V4QI") (V8HI "V8QI")])
8631 (define_mode_attr pmov_dst_zeroed_3
8632 [(V4DI "V12QI") (V2DI "V14QI") (V8SI "V8QI") (V4SI "V12QI") (V8HI "V8QI")])
8633 (define_mode_attr pmov_suff_3
8634 [(V4DI "qb") (V2DI "qb") (V8SI "db") (V4SI "db") (V8HI "wb")])
8635
8636 (define_insn "*avx512vl_<code><mode>v<ssescalarnum>qi2"
8637 [(set (match_operand:V16QI 0 "register_operand" "=v")
8638 (vec_concat:V16QI
8639 (any_truncate:<pmov_dst_3>
8640 (match_operand:PMOV_SRC_MODE_3 1 "register_operand" "v"))
8641 (match_operand:<pmov_dst_zeroed_3> 2 "const0_operand")))]
8642 "TARGET_AVX512VL"
8643 "vpmov<trunsuffix><pmov_suff_3>\t{%1, %0|%0, %1}"
8644 [(set_attr "type" "ssemov")
8645 (set_attr "prefix" "evex")
8646 (set_attr "mode" "TI")])
8647
8648 (define_insn "*avx512vl_<code>v2div2qi2_store"
8649 [(set (match_operand:V16QI 0 "memory_operand" "=m")
8650 (vec_concat:V16QI
8651 (any_truncate:V2QI
8652 (match_operand:V2DI 1 "register_operand" "v"))
8653 (vec_select:V14QI
8654 (match_dup 0)
8655 (parallel [(const_int 2) (const_int 3)
8656 (const_int 4) (const_int 5)
8657 (const_int 6) (const_int 7)
8658 (const_int 8) (const_int 9)
8659 (const_int 10) (const_int 11)
8660 (const_int 12) (const_int 13)
8661 (const_int 14) (const_int 15)]))))]
8662 "TARGET_AVX512VL"
8663 "vpmov<trunsuffix>qb\t{%1, %0|%0, %1}"
8664 [(set_attr "type" "ssemov")
8665 (set_attr "memory" "store")
8666 (set_attr "prefix" "evex")
8667 (set_attr "mode" "TI")])
8668
8669 (define_insn "avx512vl_<code>v2div2qi2_mask"
8670 [(set (match_operand:V16QI 0 "register_operand" "=v")
8671 (vec_concat:V16QI
8672 (vec_merge:V2QI
8673 (any_truncate:V2QI
8674 (match_operand:V2DI 1 "register_operand" "v"))
8675 (vec_select:V2QI
8676 (match_operand:V16QI 2 "vector_move_operand" "0C")
8677 (parallel [(const_int 0) (const_int 1)]))
8678 (match_operand:QI 3 "register_operand" "Yk"))
8679 (const_vector:V14QI [(const_int 0) (const_int 0)
8680 (const_int 0) (const_int 0)
8681 (const_int 0) (const_int 0)
8682 (const_int 0) (const_int 0)
8683 (const_int 0) (const_int 0)
8684 (const_int 0) (const_int 0)
8685 (const_int 0) (const_int 0)])))]
8686 "TARGET_AVX512VL"
8687 "vpmov<trunsuffix>qb\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}"
8688 [(set_attr "type" "ssemov")
8689 (set_attr "prefix" "evex")
8690 (set_attr "mode" "TI")])
8691
8692 (define_insn "avx512vl_<code>v2div2qi2_mask_store"
8693 [(set (match_operand:V16QI 0 "memory_operand" "=m")
8694 (vec_concat:V16QI
8695 (vec_merge:V2QI
8696 (any_truncate:V2QI
8697 (match_operand:V2DI 1 "register_operand" "v"))
8698 (vec_select:V2QI
8699 (match_dup 0)
8700 (parallel [(const_int 0) (const_int 1)]))
8701 (match_operand:QI 2 "register_operand" "Yk"))
8702 (vec_select:V14QI
8703 (match_dup 0)
8704 (parallel [(const_int 2) (const_int 3)
8705 (const_int 4) (const_int 5)
8706 (const_int 6) (const_int 7)
8707 (const_int 8) (const_int 9)
8708 (const_int 10) (const_int 11)
8709 (const_int 12) (const_int 13)
8710 (const_int 14) (const_int 15)]))))]
8711 "TARGET_AVX512VL"
8712 "vpmov<trunsuffix>qb\t{%1, %0%{%2%}|%0%{%2%}, %1}"
8713 [(set_attr "type" "ssemov")
8714 (set_attr "memory" "store")
8715 (set_attr "prefix" "evex")
8716 (set_attr "mode" "TI")])
8717
8718 (define_insn "*avx512vl_<code><mode>v4qi2_store"
8719 [(set (match_operand:V16QI 0 "memory_operand" "=m")
8720 (vec_concat:V16QI
8721 (any_truncate:V4QI
8722 (match_operand:VI4_128_8_256 1 "register_operand" "v"))
8723 (vec_select:V12QI
8724 (match_dup 0)
8725 (parallel [(const_int 4) (const_int 5)
8726 (const_int 6) (const_int 7)
8727 (const_int 8) (const_int 9)
8728 (const_int 10) (const_int 11)
8729 (const_int 12) (const_int 13)
8730 (const_int 14) (const_int 15)]))))]
8731 "TARGET_AVX512VL"
8732 "vpmov<trunsuffix><pmov_suff_3>\t{%1, %0|%0, %1}"
8733 [(set_attr "type" "ssemov")
8734 (set_attr "memory" "store")
8735 (set_attr "prefix" "evex")
8736 (set_attr "mode" "TI")])
8737
8738 (define_insn "avx512vl_<code><mode>v4qi2_mask"
8739 [(set (match_operand:V16QI 0 "register_operand" "=v")
8740 (vec_concat:V16QI
8741 (vec_merge:V4QI
8742 (any_truncate:V4QI
8743 (match_operand:VI4_128_8_256 1 "register_operand" "v"))
8744 (vec_select:V4QI
8745 (match_operand:V16QI 2 "vector_move_operand" "0C")
8746 (parallel [(const_int 0) (const_int 1)
8747 (const_int 2) (const_int 3)]))
8748 (match_operand:QI 3 "register_operand" "Yk"))
8749 (const_vector:V12QI [(const_int 0) (const_int 0)
8750 (const_int 0) (const_int 0)
8751 (const_int 0) (const_int 0)
8752 (const_int 0) (const_int 0)
8753 (const_int 0) (const_int 0)
8754 (const_int 0) (const_int 0)])))]
8755 "TARGET_AVX512VL"
8756 "vpmov<trunsuffix><pmov_suff_3>\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}"
8757 [(set_attr "type" "ssemov")
8758 (set_attr "prefix" "evex")
8759 (set_attr "mode" "TI")])
8760
8761 (define_insn "avx512vl_<code><mode>v4qi2_mask_store"
8762 [(set (match_operand:V16QI 0 "memory_operand" "=m")
8763 (vec_concat:V16QI
8764 (vec_merge:V4QI
8765 (any_truncate:V4QI
8766 (match_operand:VI4_128_8_256 1 "register_operand" "v"))
8767 (vec_select:V4QI
8768 (match_dup 0)
8769 (parallel [(const_int 0) (const_int 1)
8770 (const_int 2) (const_int 3)]))
8771 (match_operand:QI 2 "register_operand" "Yk"))
8772 (vec_select:V12QI
8773 (match_dup 0)
8774 (parallel [(const_int 4) (const_int 5)
8775 (const_int 6) (const_int 7)
8776 (const_int 8) (const_int 9)
8777 (const_int 10) (const_int 11)
8778 (const_int 12) (const_int 13)
8779 (const_int 14) (const_int 15)]))))]
8780 "TARGET_AVX512VL"
8781 "vpmov<trunsuffix><pmov_suff_3>\t{%1, %0%{%2%}|%0%{%2%}, %1}"
8782 [(set_attr "type" "ssemov")
8783 (set_attr "memory" "store")
8784 (set_attr "prefix" "evex")
8785 (set_attr "mode" "TI")])
8786
8787 (define_mode_iterator VI2_128_BW_4_256
8788 [(V8HI "TARGET_AVX512BW") V8SI])
8789
8790 (define_insn "*avx512vl_<code><mode>v8qi2_store"
8791 [(set (match_operand:V16QI 0 "memory_operand" "=m")
8792 (vec_concat:V16QI
8793 (any_truncate:V8QI
8794 (match_operand:VI2_128_BW_4_256 1 "register_operand" "v"))
8795 (vec_select:V8QI
8796 (match_dup 0)
8797 (parallel [(const_int 8) (const_int 9)
8798 (const_int 10) (const_int 11)
8799 (const_int 12) (const_int 13)
8800 (const_int 14) (const_int 15)]))))]
8801 "TARGET_AVX512VL"
8802 "vpmov<trunsuffix><pmov_suff_3>\t{%1, %0|%0, %1}"
8803 [(set_attr "type" "ssemov")
8804 (set_attr "memory" "store")
8805 (set_attr "prefix" "evex")
8806 (set_attr "mode" "TI")])
8807
8808 (define_insn "avx512vl_<code><mode>v8qi2_mask"
8809 [(set (match_operand:V16QI 0 "register_operand" "=v")
8810 (vec_concat:V16QI
8811 (vec_merge:V8QI
8812 (any_truncate:V8QI
8813 (match_operand:VI2_128_BW_4_256 1 "register_operand" "v"))
8814 (vec_select:V8QI
8815 (match_operand:V16QI 2 "vector_move_operand" "0C")
8816 (parallel [(const_int 0) (const_int 1)
8817 (const_int 2) (const_int 3)
8818 (const_int 4) (const_int 5)
8819 (const_int 6) (const_int 7)]))
8820 (match_operand:QI 3 "register_operand" "Yk"))
8821 (const_vector:V8QI [(const_int 0) (const_int 0)
8822 (const_int 0) (const_int 0)
8823 (const_int 0) (const_int 0)
8824 (const_int 0) (const_int 0)])))]
8825 "TARGET_AVX512VL"
8826 "vpmov<trunsuffix><pmov_suff_3>\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}"
8827 [(set_attr "type" "ssemov")
8828 (set_attr "prefix" "evex")
8829 (set_attr "mode" "TI")])
8830
8831 (define_insn "avx512vl_<code><mode>v8qi2_mask_store"
8832 [(set (match_operand:V16QI 0 "memory_operand" "=m")
8833 (vec_concat:V16QI
8834 (vec_merge:V8QI
8835 (any_truncate:V8QI
8836 (match_operand:VI2_128_BW_4_256 1 "register_operand" "v"))
8837 (vec_select:V8QI
8838 (match_dup 0)
8839 (parallel [(const_int 0) (const_int 1)
8840 (const_int 2) (const_int 3)
8841 (const_int 4) (const_int 5)
8842 (const_int 6) (const_int 7)]))
8843 (match_operand:QI 2 "register_operand" "Yk"))
8844 (vec_select:V8QI
8845 (match_dup 0)
8846 (parallel [(const_int 8) (const_int 9)
8847 (const_int 10) (const_int 11)
8848 (const_int 12) (const_int 13)
8849 (const_int 14) (const_int 15)]))))]
8850 "TARGET_AVX512VL"
8851 "vpmov<trunsuffix><pmov_suff_3>\t{%1, %0%{%2%}|%0%{%2%}, %1}"
8852 [(set_attr "type" "ssemov")
8853 (set_attr "memory" "store")
8854 (set_attr "prefix" "evex")
8855 (set_attr "mode" "TI")])
8856
8857 (define_mode_iterator PMOV_SRC_MODE_4 [V4DI V2DI V4SI])
8858 (define_mode_attr pmov_dst_4
8859 [(V4DI "V4HI") (V2DI "V2HI") (V4SI "V4HI")])
8860 (define_mode_attr pmov_dst_zeroed_4
8861 [(V4DI "V4HI") (V2DI "V6HI") (V4SI "V4HI")])
8862 (define_mode_attr pmov_suff_4
8863 [(V4DI "qw") (V2DI "qw") (V4SI "dw")])
8864
8865 (define_insn "*avx512vl_<code><mode>v<ssescalarnum>hi2"
8866 [(set (match_operand:V8HI 0 "register_operand" "=v")
8867 (vec_concat:V8HI
8868 (any_truncate:<pmov_dst_4>
8869 (match_operand:PMOV_SRC_MODE_4 1 "register_operand" "v"))
8870 (match_operand:<pmov_dst_zeroed_4> 2 "const0_operand")))]
8871 "TARGET_AVX512VL"
8872 "vpmov<trunsuffix><pmov_suff_4>\t{%1, %0|%0, %1}"
8873 [(set_attr "type" "ssemov")
8874 (set_attr "prefix" "evex")
8875 (set_attr "mode" "TI")])
8876
8877 (define_insn "*avx512vl_<code><mode>v4hi2_store"
8878 [(set (match_operand:V8HI 0 "memory_operand" "=m")
8879 (vec_concat:V8HI
8880 (any_truncate:V4HI
8881 (match_operand:VI4_128_8_256 1 "register_operand" "v"))
8882 (vec_select:V4HI
8883 (match_dup 0)
8884 (parallel [(const_int 4) (const_int 5)
8885 (const_int 6) (const_int 7)]))))]
8886 "TARGET_AVX512VL"
8887 "vpmov<trunsuffix><pmov_suff_4>\t{%1, %0|%0, %1}"
8888 [(set_attr "type" "ssemov")
8889 (set_attr "memory" "store")
8890 (set_attr "prefix" "evex")
8891 (set_attr "mode" "TI")])
8892
8893 (define_insn "avx512vl_<code><mode>v4hi2_mask"
8894 [(set (match_operand:V8HI 0 "register_operand" "=v")
8895 (vec_concat:V8HI
8896 (vec_merge:V4HI
8897 (any_truncate:V4HI
8898 (match_operand:VI4_128_8_256 1 "register_operand" "v"))
8899 (vec_select:V4HI
8900 (match_operand:V8HI 2 "vector_move_operand" "0C")
8901 (parallel [(const_int 0) (const_int 1)
8902 (const_int 2) (const_int 3)]))
8903 (match_operand:QI 3 "register_operand" "Yk"))
8904 (const_vector:V4HI [(const_int 0) (const_int 0)
8905 (const_int 0) (const_int 0)])))]
8906 "TARGET_AVX512VL"
8907 "vpmov<trunsuffix><pmov_suff_4>\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}"
8908 [(set_attr "type" "ssemov")
8909 (set_attr "prefix" "evex")
8910 (set_attr "mode" "TI")])
8911
8912 (define_insn "avx512vl_<code><mode>v4hi2_mask_store"
8913 [(set (match_operand:V8HI 0 "memory_operand" "=m")
8914 (vec_concat:V8HI
8915 (vec_merge:V4HI
8916 (any_truncate:V4HI
8917 (match_operand:VI4_128_8_256 1 "register_operand" "v"))
8918 (vec_select:V4HI
8919 (match_dup 0)
8920 (parallel [(const_int 0) (const_int 1)
8921 (const_int 2) (const_int 3)]))
8922 (match_operand:QI 2 "register_operand" "Yk"))
8923 (vec_select:V4HI
8924 (match_dup 0)
8925 (parallel [(const_int 4) (const_int 5)
8926 (const_int 6) (const_int 7)]))))]
8927 "TARGET_AVX512VL"
8928 "vpmov<trunsuffix><pmov_suff_4>\t{%1, %0%{%2%}|%0%{%2%}, %1}"
8929 [(set_attr "type" "ssemov")
8930 (set_attr "memory" "store")
8931 (set_attr "prefix" "evex")
8932 (set_attr "mode" "TI")])
8933
8934 (define_insn "*avx512vl_<code>v2div2hi2_store"
8935 [(set (match_operand:V8HI 0 "memory_operand" "=m")
8936 (vec_concat:V8HI
8937 (any_truncate:V2HI
8938 (match_operand:V2DI 1 "register_operand" "v"))
8939 (vec_select:V6HI
8940 (match_dup 0)
8941 (parallel [(const_int 2) (const_int 3)
8942 (const_int 4) (const_int 5)
8943 (const_int 6) (const_int 7)]))))]
8944 "TARGET_AVX512VL"
8945 "vpmov<trunsuffix>qw\t{%1, %0|%0, %1}"
8946 [(set_attr "type" "ssemov")
8947 (set_attr "memory" "store")
8948 (set_attr "prefix" "evex")
8949 (set_attr "mode" "TI")])
8950
8951 (define_insn "avx512vl_<code>v2div2hi2_mask"
8952 [(set (match_operand:V8HI 0 "register_operand" "=v")
8953 (vec_concat:V8HI
8954 (vec_merge:V2HI
8955 (any_truncate:V2HI
8956 (match_operand:V2DI 1 "register_operand" "v"))
8957 (vec_select:V2HI
8958 (match_operand:V8HI 2 "vector_move_operand" "0C")
8959 (parallel [(const_int 0) (const_int 1)]))
8960 (match_operand:QI 3 "register_operand" "Yk"))
8961 (const_vector:V6HI [(const_int 0) (const_int 0)
8962 (const_int 0) (const_int 0)
8963 (const_int 0) (const_int 0)])))]
8964 "TARGET_AVX512VL"
8965 "vpmov<trunsuffix>qw\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}"
8966 [(set_attr "type" "ssemov")
8967 (set_attr "prefix" "evex")
8968 (set_attr "mode" "TI")])
8969
8970 (define_insn "avx512vl_<code>v2div2hi2_mask_store"
8971 [(set (match_operand:V8HI 0 "memory_operand" "=m")
8972 (vec_concat:V8HI
8973 (vec_merge:V2HI
8974 (any_truncate:V2HI
8975 (match_operand:V2DI 1 "register_operand" "v"))
8976 (vec_select:V2HI
8977 (match_dup 0)
8978 (parallel [(const_int 0) (const_int 1)]))
8979 (match_operand:QI 2 "register_operand" "Yk"))
8980 (vec_select:V6HI
8981 (match_dup 0)
8982 (parallel [(const_int 2) (const_int 3)
8983 (const_int 4) (const_int 5)
8984 (const_int 6) (const_int 7)]))))]
8985 "TARGET_AVX512VL"
8986 "vpmov<trunsuffix>qw\t{%1, %0%{%2%}|%0%{%2%}, %1}"
8987 [(set_attr "type" "ssemov")
8988 (set_attr "memory" "store")
8989 (set_attr "prefix" "evex")
8990 (set_attr "mode" "TI")])
8991
8992 (define_insn "*avx512vl_<code>v2div2si2"
8993 [(set (match_operand:V4SI 0 "register_operand" "=v")
8994 (vec_concat:V4SI
8995 (any_truncate:V2SI
8996 (match_operand:V2DI 1 "register_operand" "v"))
8997 (match_operand:V2SI 2 "const0_operand")))]
8998 "TARGET_AVX512VL"
8999 "vpmov<trunsuffix>qd\t{%1, %0|%0, %1}"
9000 [(set_attr "type" "ssemov")
9001 (set_attr "prefix" "evex")
9002 (set_attr "mode" "TI")])
9003
9004 (define_insn "*avx512vl_<code>v2div2si2_store"
9005 [(set (match_operand:V4SI 0 "memory_operand" "=m")
9006 (vec_concat:V4SI
9007 (any_truncate:V2SI
9008 (match_operand:V2DI 1 "register_operand" "v"))
9009 (vec_select:V2SI
9010 (match_dup 0)
9011 (parallel [(const_int 2) (const_int 3)]))))]
9012 "TARGET_AVX512VL"
9013 "vpmov<trunsuffix>qd\t{%1, %0|%0, %1}"
9014 [(set_attr "type" "ssemov")
9015 (set_attr "memory" "store")
9016 (set_attr "prefix" "evex")
9017 (set_attr "mode" "TI")])
9018
9019 (define_insn "avx512vl_<code>v2div2si2_mask"
9020 [(set (match_operand:V4SI 0 "register_operand" "=v")
9021 (vec_concat:V4SI
9022 (vec_merge:V2SI
9023 (any_truncate:V2SI
9024 (match_operand:V2DI 1 "register_operand" "v"))
9025 (vec_select:V2SI
9026 (match_operand:V4SI 2 "vector_move_operand" "0C")
9027 (parallel [(const_int 0) (const_int 1)]))
9028 (match_operand:QI 3 "register_operand" "Yk"))
9029 (const_vector:V2SI [(const_int 0) (const_int 0)])))]
9030 "TARGET_AVX512VL"
9031 "vpmov<trunsuffix>qd\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}"
9032 [(set_attr "type" "ssemov")
9033 (set_attr "prefix" "evex")
9034 (set_attr "mode" "TI")])
9035
9036 (define_insn "avx512vl_<code>v2div2si2_mask_store"
9037 [(set (match_operand:V4SI 0 "memory_operand" "=m")
9038 (vec_concat:V4SI
9039 (vec_merge:V2SI
9040 (any_truncate:V2SI
9041 (match_operand:V2DI 1 "register_operand" "v"))
9042 (vec_select:V2SI
9043 (match_dup 0)
9044 (parallel [(const_int 0) (const_int 1)]))
9045 (match_operand:QI 2 "register_operand" "Yk"))
9046 (vec_select:V2SI
9047 (match_dup 0)
9048 (parallel [(const_int 2) (const_int 3)]))))]
9049 "TARGET_AVX512VL"
9050 "vpmov<trunsuffix>qd\t{%1, %0%{%2%}|%0%{%2%}, %1}"
9051 [(set_attr "type" "ssemov")
9052 (set_attr "memory" "store")
9053 (set_attr "prefix" "evex")
9054 (set_attr "mode" "TI")])
9055
9056 (define_insn "*avx512f_<code>v8div16qi2"
9057 [(set (match_operand:V16QI 0 "register_operand" "=v")
9058 (vec_concat:V16QI
9059 (any_truncate:V8QI
9060 (match_operand:V8DI 1 "register_operand" "v"))
9061 (const_vector:V8QI [(const_int 0) (const_int 0)
9062 (const_int 0) (const_int 0)
9063 (const_int 0) (const_int 0)
9064 (const_int 0) (const_int 0)])))]
9065 "TARGET_AVX512F"
9066 "vpmov<trunsuffix>qb\t{%1, %0|%0, %1}"
9067 [(set_attr "type" "ssemov")
9068 (set_attr "prefix" "evex")
9069 (set_attr "mode" "TI")])
9070
9071 (define_insn "*avx512f_<code>v8div16qi2_store"
9072 [(set (match_operand:V16QI 0 "memory_operand" "=m")
9073 (vec_concat:V16QI
9074 (any_truncate:V8QI
9075 (match_operand:V8DI 1 "register_operand" "v"))
9076 (vec_select:V8QI
9077 (match_dup 0)
9078 (parallel [(const_int 8) (const_int 9)
9079 (const_int 10) (const_int 11)
9080 (const_int 12) (const_int 13)
9081 (const_int 14) (const_int 15)]))))]
9082 "TARGET_AVX512F"
9083 "vpmov<trunsuffix>qb\t{%1, %0|%0, %1}"
9084 [(set_attr "type" "ssemov")
9085 (set_attr "memory" "store")
9086 (set_attr "prefix" "evex")
9087 (set_attr "mode" "TI")])
9088
9089 (define_insn "avx512f_<code>v8div16qi2_mask"
9090 [(set (match_operand:V16QI 0 "register_operand" "=v")
9091 (vec_concat:V16QI
9092 (vec_merge:V8QI
9093 (any_truncate:V8QI
9094 (match_operand:V8DI 1 "register_operand" "v"))
9095 (vec_select:V8QI
9096 (match_operand:V16QI 2 "vector_move_operand" "0C")
9097 (parallel [(const_int 0) (const_int 1)
9098 (const_int 2) (const_int 3)
9099 (const_int 4) (const_int 5)
9100 (const_int 6) (const_int 7)]))
9101 (match_operand:QI 3 "register_operand" "Yk"))
9102 (const_vector:V8QI [(const_int 0) (const_int 0)
9103 (const_int 0) (const_int 0)
9104 (const_int 0) (const_int 0)
9105 (const_int 0) (const_int 0)])))]
9106 "TARGET_AVX512F"
9107 "vpmov<trunsuffix>qb\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}"
9108 [(set_attr "type" "ssemov")
9109 (set_attr "prefix" "evex")
9110 (set_attr "mode" "TI")])
9111
9112 (define_insn "avx512f_<code>v8div16qi2_mask_store"
9113 [(set (match_operand:V16QI 0 "memory_operand" "=m")
9114 (vec_concat:V16QI
9115 (vec_merge:V8QI
9116 (any_truncate:V8QI
9117 (match_operand:V8DI 1 "register_operand" "v"))
9118 (vec_select:V8QI
9119 (match_dup 0)
9120 (parallel [(const_int 0) (const_int 1)
9121 (const_int 2) (const_int 3)
9122 (const_int 4) (const_int 5)
9123 (const_int 6) (const_int 7)]))
9124 (match_operand:QI 2 "register_operand" "Yk"))
9125 (vec_select:V8QI
9126 (match_dup 0)
9127 (parallel [(const_int 8) (const_int 9)
9128 (const_int 10) (const_int 11)
9129 (const_int 12) (const_int 13)
9130 (const_int 14) (const_int 15)]))))]
9131 "TARGET_AVX512F"
9132 "vpmov<trunsuffix>qb\t{%1, %0%{%2%}|%0%{%2%}, %1}"
9133 [(set_attr "type" "ssemov")
9134 (set_attr "memory" "store")
9135 (set_attr "prefix" "evex")
9136 (set_attr "mode" "TI")])
9137
9138 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
9139 ;;
9140 ;; Parallel integral arithmetic
9141 ;;
9142 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
9143
9144 (define_expand "neg<mode>2"
9145 [(set (match_operand:VI_AVX2 0 "register_operand")
9146 (minus:VI_AVX2
9147 (match_dup 2)
9148 (match_operand:VI_AVX2 1 "nonimmediate_operand")))]
9149 "TARGET_SSE2"
9150 "operands[2] = force_reg (<MODE>mode, CONST0_RTX (<MODE>mode));")
9151
9152 (define_expand "<plusminus_insn><mode>3"
9153 [(set (match_operand:VI_AVX2 0 "register_operand")
9154 (plusminus:VI_AVX2
9155 (match_operand:VI_AVX2 1 "nonimmediate_operand")
9156 (match_operand:VI_AVX2 2 "nonimmediate_operand")))]
9157 "TARGET_SSE2"
9158 "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
9159
9160 (define_expand "<plusminus_insn><mode>3_mask"
9161 [(set (match_operand:VI48_AVX512VL 0 "register_operand")
9162 (vec_merge:VI48_AVX512VL
9163 (plusminus:VI48_AVX512VL
9164 (match_operand:VI48_AVX512VL 1 "nonimmediate_operand")
9165 (match_operand:VI48_AVX512VL 2 "nonimmediate_operand"))
9166 (match_operand:VI48_AVX512VL 3 "vector_move_operand")
9167 (match_operand:<avx512fmaskmode> 4 "register_operand")))]
9168 "TARGET_AVX512F"
9169 "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
9170
9171 (define_expand "<plusminus_insn><mode>3_mask"
9172 [(set (match_operand:VI12_AVX512VL 0 "register_operand")
9173 (vec_merge:VI12_AVX512VL
9174 (plusminus:VI12_AVX512VL
9175 (match_operand:VI12_AVX512VL 1 "nonimmediate_operand")
9176 (match_operand:VI12_AVX512VL 2 "nonimmediate_operand"))
9177 (match_operand:VI12_AVX512VL 3 "vector_move_operand")
9178 (match_operand:<avx512fmaskmode> 4 "register_operand")))]
9179 "TARGET_AVX512BW"
9180 "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
9181
9182 (define_insn "*<plusminus_insn><mode>3"
9183 [(set (match_operand:VI_AVX2 0 "register_operand" "=x,v")
9184 (plusminus:VI_AVX2
9185 (match_operand:VI_AVX2 1 "nonimmediate_operand" "<comm>0,v")
9186 (match_operand:VI_AVX2 2 "nonimmediate_operand" "xm,vm")))]
9187 "TARGET_SSE2
9188 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
9189 "@
9190 p<plusminus_mnemonic><ssemodesuffix>\t{%2, %0|%0, %2}
9191 vp<plusminus_mnemonic><ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
9192 [(set_attr "isa" "noavx,avx")
9193 (set_attr "type" "sseiadd")
9194 (set_attr "prefix_data16" "1,*")
9195 (set_attr "prefix" "<mask_prefix3>")
9196 (set_attr "mode" "<sseinsnmode>")])
9197
9198 (define_insn "*<plusminus_insn><mode>3_mask"
9199 [(set (match_operand:VI48_AVX512VL 0 "register_operand" "=v")
9200 (vec_merge:VI48_AVX512VL
9201 (plusminus:VI48_AVX512VL
9202 (match_operand:VI48_AVX512VL 1 "nonimmediate_operand" "<comm>v")
9203 (match_operand:VI48_AVX512VL 2 "nonimmediate_operand" "vm"))
9204 (match_operand:VI48_AVX512VL 3 "vector_move_operand" "0C")
9205 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")))]
9206 "TARGET_AVX512F
9207 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
9208 "vp<plusminus_mnemonic><ssemodesuffix>\t{%2, %1, %0%{%4%}%N3|%0%{%4%}%N3, %1, %2}"
9209 [(set_attr "type" "sseiadd")
9210 (set_attr "prefix" "evex")
9211 (set_attr "mode" "<sseinsnmode>")])
9212
9213 (define_insn "*<plusminus_insn><mode>3_mask"
9214 [(set (match_operand:VI12_AVX512VL 0 "register_operand" "=v")
9215 (vec_merge:VI12_AVX512VL
9216 (plusminus:VI12_AVX512VL
9217 (match_operand:VI12_AVX512VL 1 "nonimmediate_operand" "<comm>v")
9218 (match_operand:VI12_AVX512VL 2 "nonimmediate_operand" "vm"))
9219 (match_operand:VI12_AVX512VL 3 "vector_move_operand" "0C")
9220 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")))]
9221 "TARGET_AVX512BW && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
9222 "vp<plusminus_mnemonic><ssemodesuffix>\t{%2, %1, %0%{%4%}%N3|%0%{%4%}%N3, %1, %2}"
9223 [(set_attr "type" "sseiadd")
9224 (set_attr "prefix" "evex")
9225 (set_attr "mode" "<sseinsnmode>")])
9226
9227 (define_expand "<sse2_avx2>_<plusminus_insn><mode>3<mask_name>"
9228 [(set (match_operand:VI12_AVX2 0 "register_operand")
9229 (sat_plusminus:VI12_AVX2
9230 (match_operand:VI12_AVX2 1 "nonimmediate_operand")
9231 (match_operand:VI12_AVX2 2 "nonimmediate_operand")))]
9232 "TARGET_SSE2 && <mask_mode512bit_condition> && <mask_avx512bw_condition>"
9233 "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
9234
9235 (define_insn "*<sse2_avx2>_<plusminus_insn><mode>3<mask_name>"
9236 [(set (match_operand:VI12_AVX2 0 "register_operand" "=x,v")
9237 (sat_plusminus:VI12_AVX2
9238 (match_operand:VI12_AVX2 1 "nonimmediate_operand" "<comm>0,v")
9239 (match_operand:VI12_AVX2 2 "nonimmediate_operand" "xm,vm")))]
9240 "TARGET_SSE2 && <mask_mode512bit_condition> && <mask_avx512bw_condition>
9241 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
9242 "@
9243 p<plusminus_mnemonic><ssemodesuffix>\t{%2, %0|%0, %2}
9244 vp<plusminus_mnemonic><ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
9245 [(set_attr "isa" "noavx,avx")
9246 (set_attr "type" "sseiadd")
9247 (set_attr "prefix_data16" "1,*")
9248 (set_attr "prefix" "orig,maybe_evex")
9249 (set_attr "mode" "TI")])
9250
9251 (define_expand "mul<mode>3<mask_name>"
9252 [(set (match_operand:VI1_AVX512 0 "register_operand")
9253 (mult:VI1_AVX512 (match_operand:VI1_AVX512 1 "register_operand")
9254 (match_operand:VI1_AVX512 2 "register_operand")))]
9255 "TARGET_SSE2 && <mask_mode512bit_condition> && <mask_avx512bw_condition>"
9256 {
9257 ix86_expand_vecop_qihi (MULT, operands[0], operands[1], operands[2]);
9258 DONE;
9259 })
9260
9261 (define_expand "mul<mode>3<mask_name>"
9262 [(set (match_operand:VI2_AVX2 0 "register_operand")
9263 (mult:VI2_AVX2 (match_operand:VI2_AVX2 1 "nonimmediate_operand")
9264 (match_operand:VI2_AVX2 2 "nonimmediate_operand")))]
9265 "TARGET_SSE2 && <mask_mode512bit_condition> && <mask_avx512bw_condition>"
9266 "ix86_fixup_binary_operands_no_copy (MULT, <MODE>mode, operands);")
9267
9268 (define_insn "*mul<mode>3<mask_name>"
9269 [(set (match_operand:VI2_AVX2 0 "register_operand" "=x,v")
9270 (mult:VI2_AVX2 (match_operand:VI2_AVX2 1 "nonimmediate_operand" "%0,v")
9271 (match_operand:VI2_AVX2 2 "nonimmediate_operand" "xm,vm")))]
9272 "TARGET_SSE2
9273 && ix86_binary_operator_ok (MULT, <MODE>mode, operands)
9274 && <mask_mode512bit_condition> && <mask_avx512bw_condition>"
9275 "@
9276 pmullw\t{%2, %0|%0, %2}
9277 vpmullw\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
9278 [(set_attr "isa" "noavx,avx")
9279 (set_attr "type" "sseimul")
9280 (set_attr "prefix_data16" "1,*")
9281 (set_attr "prefix" "orig,vex")
9282 (set_attr "mode" "<sseinsnmode>")])
9283
9284 (define_expand "<s>mul<mode>3_highpart<mask_name>"
9285 [(set (match_operand:VI2_AVX2 0 "register_operand")
9286 (truncate:VI2_AVX2
9287 (lshiftrt:<ssedoublemode>
9288 (mult:<ssedoublemode>
9289 (any_extend:<ssedoublemode>
9290 (match_operand:VI2_AVX2 1 "nonimmediate_operand"))
9291 (any_extend:<ssedoublemode>
9292 (match_operand:VI2_AVX2 2 "nonimmediate_operand")))
9293 (const_int 16))))]
9294 "TARGET_SSE2
9295 && <mask_mode512bit_condition> && <mask_avx512bw_condition>"
9296 "ix86_fixup_binary_operands_no_copy (MULT, <MODE>mode, operands);")
9297
9298 (define_insn "*<s>mul<mode>3_highpart<mask_name>"
9299 [(set (match_operand:VI2_AVX2 0 "register_operand" "=x,v")
9300 (truncate:VI2_AVX2
9301 (lshiftrt:<ssedoublemode>
9302 (mult:<ssedoublemode>
9303 (any_extend:<ssedoublemode>
9304 (match_operand:VI2_AVX2 1 "nonimmediate_operand" "%0,v"))
9305 (any_extend:<ssedoublemode>
9306 (match_operand:VI2_AVX2 2 "nonimmediate_operand" "xm,vm")))
9307 (const_int 16))))]
9308 "TARGET_SSE2
9309 && ix86_binary_operator_ok (MULT, <MODE>mode, operands)
9310 && <mask_mode512bit_condition> && <mask_avx512bw_condition>"
9311 "@
9312 pmulh<u>w\t{%2, %0|%0, %2}
9313 vpmulh<u>w\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
9314 [(set_attr "isa" "noavx,avx")
9315 (set_attr "type" "sseimul")
9316 (set_attr "prefix_data16" "1,*")
9317 (set_attr "prefix" "orig,vex")
9318 (set_attr "mode" "<sseinsnmode>")])
9319
9320 (define_expand "vec_widen_umult_even_v16si<mask_name>"
9321 [(set (match_operand:V8DI 0 "register_operand")
9322 (mult:V8DI
9323 (zero_extend:V8DI
9324 (vec_select:V8SI
9325 (match_operand:V16SI 1 "nonimmediate_operand")
9326 (parallel [(const_int 0) (const_int 2)
9327 (const_int 4) (const_int 6)
9328 (const_int 8) (const_int 10)
9329 (const_int 12) (const_int 14)])))
9330 (zero_extend:V8DI
9331 (vec_select:V8SI
9332 (match_operand:V16SI 2 "nonimmediate_operand")
9333 (parallel [(const_int 0) (const_int 2)
9334 (const_int 4) (const_int 6)
9335 (const_int 8) (const_int 10)
9336 (const_int 12) (const_int 14)])))))]
9337 "TARGET_AVX512F"
9338 "ix86_fixup_binary_operands_no_copy (MULT, V16SImode, operands);")
9339
9340 (define_insn "*vec_widen_umult_even_v16si<mask_name>"
9341 [(set (match_operand:V8DI 0 "register_operand" "=v")
9342 (mult:V8DI
9343 (zero_extend:V8DI
9344 (vec_select:V8SI
9345 (match_operand:V16SI 1 "nonimmediate_operand" "%v")
9346 (parallel [(const_int 0) (const_int 2)
9347 (const_int 4) (const_int 6)
9348 (const_int 8) (const_int 10)
9349 (const_int 12) (const_int 14)])))
9350 (zero_extend:V8DI
9351 (vec_select:V8SI
9352 (match_operand:V16SI 2 "nonimmediate_operand" "vm")
9353 (parallel [(const_int 0) (const_int 2)
9354 (const_int 4) (const_int 6)
9355 (const_int 8) (const_int 10)
9356 (const_int 12) (const_int 14)])))))]
9357 "TARGET_AVX512F && ix86_binary_operator_ok (MULT, V16SImode, operands)"
9358 "vpmuludq\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
9359 [(set_attr "isa" "avx512f")
9360 (set_attr "type" "sseimul")
9361 (set_attr "prefix_extra" "1")
9362 (set_attr "prefix" "evex")
9363 (set_attr "mode" "XI")])
9364
9365 (define_expand "vec_widen_umult_even_v8si<mask_name>"
9366 [(set (match_operand:V4DI 0 "register_operand")
9367 (mult:V4DI
9368 (zero_extend:V4DI
9369 (vec_select:V4SI
9370 (match_operand:V8SI 1 "nonimmediate_operand")
9371 (parallel [(const_int 0) (const_int 2)
9372 (const_int 4) (const_int 6)])))
9373 (zero_extend:V4DI
9374 (vec_select:V4SI
9375 (match_operand:V8SI 2 "nonimmediate_operand")
9376 (parallel [(const_int 0) (const_int 2)
9377 (const_int 4) (const_int 6)])))))]
9378 "TARGET_AVX2 && <mask_avx512vl_condition>"
9379 "ix86_fixup_binary_operands_no_copy (MULT, V8SImode, operands);")
9380
9381 (define_insn "*vec_widen_umult_even_v8si<mask_name>"
9382 [(set (match_operand:V4DI 0 "register_operand" "=v")
9383 (mult:V4DI
9384 (zero_extend:V4DI
9385 (vec_select:V4SI
9386 (match_operand:V8SI 1 "nonimmediate_operand" "%v")
9387 (parallel [(const_int 0) (const_int 2)
9388 (const_int 4) (const_int 6)])))
9389 (zero_extend:V4DI
9390 (vec_select:V4SI
9391 (match_operand:V8SI 2 "nonimmediate_operand" "vm")
9392 (parallel [(const_int 0) (const_int 2)
9393 (const_int 4) (const_int 6)])))))]
9394 "TARGET_AVX2 && <mask_avx512vl_condition>
9395 && ix86_binary_operator_ok (MULT, V8SImode, operands)"
9396 "vpmuludq\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
9397 [(set_attr "type" "sseimul")
9398 (set_attr "prefix" "maybe_evex")
9399 (set_attr "mode" "OI")])
9400
9401 (define_expand "vec_widen_umult_even_v4si<mask_name>"
9402 [(set (match_operand:V2DI 0 "register_operand")
9403 (mult:V2DI
9404 (zero_extend:V2DI
9405 (vec_select:V2SI
9406 (match_operand:V4SI 1 "nonimmediate_operand")
9407 (parallel [(const_int 0) (const_int 2)])))
9408 (zero_extend:V2DI
9409 (vec_select:V2SI
9410 (match_operand:V4SI 2 "nonimmediate_operand")
9411 (parallel [(const_int 0) (const_int 2)])))))]
9412 "TARGET_SSE2 && <mask_avx512vl_condition>"
9413 "ix86_fixup_binary_operands_no_copy (MULT, V4SImode, operands);")
9414
9415 (define_insn "*vec_widen_umult_even_v4si<mask_name>"
9416 [(set (match_operand:V2DI 0 "register_operand" "=x,v")
9417 (mult:V2DI
9418 (zero_extend:V2DI
9419 (vec_select:V2SI
9420 (match_operand:V4SI 1 "nonimmediate_operand" "%0,v")
9421 (parallel [(const_int 0) (const_int 2)])))
9422 (zero_extend:V2DI
9423 (vec_select:V2SI
9424 (match_operand:V4SI 2 "nonimmediate_operand" "xm,vm")
9425 (parallel [(const_int 0) (const_int 2)])))))]
9426 "TARGET_SSE2 && <mask_avx512vl_condition>
9427 && ix86_binary_operator_ok (MULT, V4SImode, operands)"
9428 "@
9429 pmuludq\t{%2, %0|%0, %2}
9430 vpmuludq\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
9431 [(set_attr "isa" "noavx,avx")
9432 (set_attr "type" "sseimul")
9433 (set_attr "prefix_data16" "1,*")
9434 (set_attr "prefix" "orig,maybe_evex")
9435 (set_attr "mode" "TI")])
9436
9437 (define_expand "vec_widen_smult_even_v16si<mask_name>"
9438 [(set (match_operand:V8DI 0 "register_operand")
9439 (mult:V8DI
9440 (sign_extend:V8DI
9441 (vec_select:V8SI
9442 (match_operand:V16SI 1 "nonimmediate_operand")
9443 (parallel [(const_int 0) (const_int 2)
9444 (const_int 4) (const_int 6)
9445 (const_int 8) (const_int 10)
9446 (const_int 12) (const_int 14)])))
9447 (sign_extend:V8DI
9448 (vec_select:V8SI
9449 (match_operand:V16SI 2 "nonimmediate_operand")
9450 (parallel [(const_int 0) (const_int 2)
9451 (const_int 4) (const_int 6)
9452 (const_int 8) (const_int 10)
9453 (const_int 12) (const_int 14)])))))]
9454 "TARGET_AVX512F"
9455 "ix86_fixup_binary_operands_no_copy (MULT, V16SImode, operands);")
9456
9457 (define_insn "*vec_widen_smult_even_v16si<mask_name>"
9458 [(set (match_operand:V8DI 0 "register_operand" "=v")
9459 (mult:V8DI
9460 (sign_extend:V8DI
9461 (vec_select:V8SI
9462 (match_operand:V16SI 1 "nonimmediate_operand" "%v")
9463 (parallel [(const_int 0) (const_int 2)
9464 (const_int 4) (const_int 6)
9465 (const_int 8) (const_int 10)
9466 (const_int 12) (const_int 14)])))
9467 (sign_extend:V8DI
9468 (vec_select:V8SI
9469 (match_operand:V16SI 2 "nonimmediate_operand" "vm")
9470 (parallel [(const_int 0) (const_int 2)
9471 (const_int 4) (const_int 6)
9472 (const_int 8) (const_int 10)
9473 (const_int 12) (const_int 14)])))))]
9474 "TARGET_AVX512F && ix86_binary_operator_ok (MULT, V16SImode, operands)"
9475 "vpmuldq\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
9476 [(set_attr "isa" "avx512f")
9477 (set_attr "type" "sseimul")
9478 (set_attr "prefix_extra" "1")
9479 (set_attr "prefix" "evex")
9480 (set_attr "mode" "XI")])
9481
9482 (define_expand "vec_widen_smult_even_v8si<mask_name>"
9483 [(set (match_operand:V4DI 0 "register_operand")
9484 (mult:V4DI
9485 (sign_extend:V4DI
9486 (vec_select:V4SI
9487 (match_operand:V8SI 1 "nonimmediate_operand")
9488 (parallel [(const_int 0) (const_int 2)
9489 (const_int 4) (const_int 6)])))
9490 (sign_extend:V4DI
9491 (vec_select:V4SI
9492 (match_operand:V8SI 2 "nonimmediate_operand")
9493 (parallel [(const_int 0) (const_int 2)
9494 (const_int 4) (const_int 6)])))))]
9495 "TARGET_AVX2 && <mask_avx512vl_condition>"
9496 "ix86_fixup_binary_operands_no_copy (MULT, V8SImode, operands);")
9497
9498 (define_insn "*vec_widen_smult_even_v8si<mask_name>"
9499 [(set (match_operand:V4DI 0 "register_operand" "=v")
9500 (mult:V4DI
9501 (sign_extend:V4DI
9502 (vec_select:V4SI
9503 (match_operand:V8SI 1 "nonimmediate_operand" "v")
9504 (parallel [(const_int 0) (const_int 2)
9505 (const_int 4) (const_int 6)])))
9506 (sign_extend:V4DI
9507 (vec_select:V4SI
9508 (match_operand:V8SI 2 "nonimmediate_operand" "vm")
9509 (parallel [(const_int 0) (const_int 2)
9510 (const_int 4) (const_int 6)])))))]
9511 "TARGET_AVX2
9512 && ix86_binary_operator_ok (MULT, V8SImode, operands)"
9513 "vpmuldq\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
9514 [(set_attr "type" "sseimul")
9515 (set_attr "prefix_extra" "1")
9516 (set_attr "prefix" "vex")
9517 (set_attr "mode" "OI")])
9518
9519 (define_expand "sse4_1_mulv2siv2di3<mask_name>"
9520 [(set (match_operand:V2DI 0 "register_operand")
9521 (mult:V2DI
9522 (sign_extend:V2DI
9523 (vec_select:V2SI
9524 (match_operand:V4SI 1 "nonimmediate_operand")
9525 (parallel [(const_int 0) (const_int 2)])))
9526 (sign_extend:V2DI
9527 (vec_select:V2SI
9528 (match_operand:V4SI 2 "nonimmediate_operand")
9529 (parallel [(const_int 0) (const_int 2)])))))]
9530 "TARGET_SSE4_1 && <mask_avx512vl_condition>"
9531 "ix86_fixup_binary_operands_no_copy (MULT, V4SImode, operands);")
9532
9533 (define_insn "*sse4_1_mulv2siv2di3<mask_name>"
9534 [(set (match_operand:V2DI 0 "register_operand" "=x,v")
9535 (mult:V2DI
9536 (sign_extend:V2DI
9537 (vec_select:V2SI
9538 (match_operand:V4SI 1 "nonimmediate_operand" "%0,v")
9539 (parallel [(const_int 0) (const_int 2)])))
9540 (sign_extend:V2DI
9541 (vec_select:V2SI
9542 (match_operand:V4SI 2 "nonimmediate_operand" "xm,vm")
9543 (parallel [(const_int 0) (const_int 2)])))))]
9544 "TARGET_SSE4_1 && <mask_avx512vl_condition>
9545 && ix86_binary_operator_ok (MULT, V4SImode, operands)"
9546 "@
9547 pmuldq\t{%2, %0|%0, %2}
9548 vpmuldq\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
9549 [(set_attr "isa" "noavx,avx")
9550 (set_attr "type" "sseimul")
9551 (set_attr "prefix_data16" "1,*")
9552 (set_attr "prefix_extra" "1")
9553 (set_attr "prefix" "orig,vex")
9554 (set_attr "mode" "TI")])
9555
9556 (define_insn "avx512bw_pmaddwd512<mode><mask_name>"
9557 [(set (match_operand:<sseunpackmode> 0 "register_operand" "=v")
9558 (unspec:<sseunpackmode>
9559 [(match_operand:VI2_AVX2 1 "register_operand" "v")
9560 (match_operand:VI2_AVX2 2 "nonimmediate_operand" "vm")]
9561 UNSPEC_PMADDWD512))]
9562 "TARGET_AVX512BW && <mask_mode512bit_condition>"
9563 "vpmaddwd\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}";
9564 [(set_attr "type" "sseiadd")
9565 (set_attr "prefix" "evex")
9566 (set_attr "mode" "XI")])
9567
9568 (define_expand "avx2_pmaddwd"
9569 [(set (match_operand:V8SI 0 "register_operand")
9570 (plus:V8SI
9571 (mult:V8SI
9572 (sign_extend:V8SI
9573 (vec_select:V8HI
9574 (match_operand:V16HI 1 "nonimmediate_operand")
9575 (parallel [(const_int 0) (const_int 2)
9576 (const_int 4) (const_int 6)
9577 (const_int 8) (const_int 10)
9578 (const_int 12) (const_int 14)])))
9579 (sign_extend:V8SI
9580 (vec_select:V8HI
9581 (match_operand:V16HI 2 "nonimmediate_operand")
9582 (parallel [(const_int 0) (const_int 2)
9583 (const_int 4) (const_int 6)
9584 (const_int 8) (const_int 10)
9585 (const_int 12) (const_int 14)]))))
9586 (mult:V8SI
9587 (sign_extend:V8SI
9588 (vec_select:V8HI (match_dup 1)
9589 (parallel [(const_int 1) (const_int 3)
9590 (const_int 5) (const_int 7)
9591 (const_int 9) (const_int 11)
9592 (const_int 13) (const_int 15)])))
9593 (sign_extend:V8SI
9594 (vec_select:V8HI (match_dup 2)
9595 (parallel [(const_int 1) (const_int 3)
9596 (const_int 5) (const_int 7)
9597 (const_int 9) (const_int 11)
9598 (const_int 13) (const_int 15)]))))))]
9599 "TARGET_AVX2"
9600 "ix86_fixup_binary_operands_no_copy (MULT, V16HImode, operands);")
9601
9602 (define_insn "*avx2_pmaddwd"
9603 [(set (match_operand:V8SI 0 "register_operand" "=x")
9604 (plus:V8SI
9605 (mult:V8SI
9606 (sign_extend:V8SI
9607 (vec_select:V8HI
9608 (match_operand:V16HI 1 "nonimmediate_operand" "%x")
9609 (parallel [(const_int 0) (const_int 2)
9610 (const_int 4) (const_int 6)
9611 (const_int 8) (const_int 10)
9612 (const_int 12) (const_int 14)])))
9613 (sign_extend:V8SI
9614 (vec_select:V8HI
9615 (match_operand:V16HI 2 "nonimmediate_operand" "xm")
9616 (parallel [(const_int 0) (const_int 2)
9617 (const_int 4) (const_int 6)
9618 (const_int 8) (const_int 10)
9619 (const_int 12) (const_int 14)]))))
9620 (mult:V8SI
9621 (sign_extend:V8SI
9622 (vec_select:V8HI (match_dup 1)
9623 (parallel [(const_int 1) (const_int 3)
9624 (const_int 5) (const_int 7)
9625 (const_int 9) (const_int 11)
9626 (const_int 13) (const_int 15)])))
9627 (sign_extend:V8SI
9628 (vec_select:V8HI (match_dup 2)
9629 (parallel [(const_int 1) (const_int 3)
9630 (const_int 5) (const_int 7)
9631 (const_int 9) (const_int 11)
9632 (const_int 13) (const_int 15)]))))))]
9633 "TARGET_AVX2 && ix86_binary_operator_ok (MULT, V16HImode, operands)"
9634 "vpmaddwd\t{%2, %1, %0|%0, %1, %2}"
9635 [(set_attr "type" "sseiadd")
9636 (set_attr "prefix" "vex")
9637 (set_attr "mode" "OI")])
9638
9639 (define_expand "sse2_pmaddwd"
9640 [(set (match_operand:V4SI 0 "register_operand")
9641 (plus:V4SI
9642 (mult:V4SI
9643 (sign_extend:V4SI
9644 (vec_select:V4HI
9645 (match_operand:V8HI 1 "nonimmediate_operand")
9646 (parallel [(const_int 0) (const_int 2)
9647 (const_int 4) (const_int 6)])))
9648 (sign_extend:V4SI
9649 (vec_select:V4HI
9650 (match_operand:V8HI 2 "nonimmediate_operand")
9651 (parallel [(const_int 0) (const_int 2)
9652 (const_int 4) (const_int 6)]))))
9653 (mult:V4SI
9654 (sign_extend:V4SI
9655 (vec_select:V4HI (match_dup 1)
9656 (parallel [(const_int 1) (const_int 3)
9657 (const_int 5) (const_int 7)])))
9658 (sign_extend:V4SI
9659 (vec_select:V4HI (match_dup 2)
9660 (parallel [(const_int 1) (const_int 3)
9661 (const_int 5) (const_int 7)]))))))]
9662 "TARGET_SSE2"
9663 "ix86_fixup_binary_operands_no_copy (MULT, V8HImode, operands);")
9664
9665 (define_insn "*sse2_pmaddwd"
9666 [(set (match_operand:V4SI 0 "register_operand" "=x,x")
9667 (plus:V4SI
9668 (mult:V4SI
9669 (sign_extend:V4SI
9670 (vec_select:V4HI
9671 (match_operand:V8HI 1 "nonimmediate_operand" "%0,x")
9672 (parallel [(const_int 0) (const_int 2)
9673 (const_int 4) (const_int 6)])))
9674 (sign_extend:V4SI
9675 (vec_select:V4HI
9676 (match_operand:V8HI 2 "nonimmediate_operand" "xm,xm")
9677 (parallel [(const_int 0) (const_int 2)
9678 (const_int 4) (const_int 6)]))))
9679 (mult:V4SI
9680 (sign_extend:V4SI
9681 (vec_select:V4HI (match_dup 1)
9682 (parallel [(const_int 1) (const_int 3)
9683 (const_int 5) (const_int 7)])))
9684 (sign_extend:V4SI
9685 (vec_select:V4HI (match_dup 2)
9686 (parallel [(const_int 1) (const_int 3)
9687 (const_int 5) (const_int 7)]))))))]
9688 "TARGET_SSE2 && ix86_binary_operator_ok (MULT, V8HImode, operands)"
9689 "@
9690 pmaddwd\t{%2, %0|%0, %2}
9691 vpmaddwd\t{%2, %1, %0|%0, %1, %2}"
9692 [(set_attr "isa" "noavx,avx")
9693 (set_attr "type" "sseiadd")
9694 (set_attr "atom_unit" "simul")
9695 (set_attr "prefix_data16" "1,*")
9696 (set_attr "prefix" "orig,vex")
9697 (set_attr "mode" "TI")])
9698
9699 (define_insn "avx512dq_mul<mode>3<mask_name>"
9700 [(set (match_operand:VI8 0 "register_operand" "=v")
9701 (mult:VI8
9702 (match_operand:VI8 1 "register_operand" "v")
9703 (match_operand:VI8 2 "nonimmediate_operand" "vm")))]
9704 "TARGET_AVX512DQ && <mask_mode512bit_condition>"
9705 "vpmullq\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
9706 [(set_attr "type" "sseimul")
9707 (set_attr "prefix" "evex")
9708 (set_attr "mode" "<sseinsnmode>")])
9709
9710 (define_expand "mul<mode>3<mask_name>"
9711 [(set (match_operand:VI4_AVX512F 0 "register_operand")
9712 (mult:VI4_AVX512F
9713 (match_operand:VI4_AVX512F 1 "general_vector_operand")
9714 (match_operand:VI4_AVX512F 2 "general_vector_operand")))]
9715 "TARGET_SSE2 && <mask_mode512bit_condition>"
9716 {
9717 if (TARGET_SSE4_1)
9718 {
9719 if (!nonimmediate_operand (operands[1], <MODE>mode))
9720 operands[1] = force_reg (<MODE>mode, operands[1]);
9721 if (!nonimmediate_operand (operands[2], <MODE>mode))
9722 operands[2] = force_reg (<MODE>mode, operands[2]);
9723 ix86_fixup_binary_operands_no_copy (MULT, <MODE>mode, operands);
9724 }
9725 else
9726 {
9727 ix86_expand_sse2_mulv4si3 (operands[0], operands[1], operands[2]);
9728 DONE;
9729 }
9730 })
9731
9732 (define_insn "*<sse4_1_avx2>_mul<mode>3<mask_name>"
9733 [(set (match_operand:VI4_AVX512F 0 "register_operand" "=x,v")
9734 (mult:VI4_AVX512F
9735 (match_operand:VI4_AVX512F 1 "nonimmediate_operand" "%0,v")
9736 (match_operand:VI4_AVX512F 2 "nonimmediate_operand" "xm,vm")))]
9737 "TARGET_SSE4_1 && ix86_binary_operator_ok (MULT, <MODE>mode, operands) && <mask_mode512bit_condition>"
9738 "@
9739 pmulld\t{%2, %0|%0, %2}
9740 vpmulld\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
9741 [(set_attr "isa" "noavx,avx")
9742 (set_attr "type" "sseimul")
9743 (set_attr "prefix_extra" "1")
9744 (set_attr "prefix" "<mask_prefix3>")
9745 (set_attr "btver2_decode" "vector,vector")
9746 (set_attr "mode" "<sseinsnmode>")])
9747
9748 (define_expand "mul<mode>3"
9749 [(set (match_operand:VI8_AVX2_AVX512F 0 "register_operand")
9750 (mult:VI8_AVX2_AVX512F
9751 (match_operand:VI8_AVX2_AVX512F 1 "register_operand")
9752 (match_operand:VI8_AVX2_AVX512F 2 "register_operand")))]
9753 "TARGET_SSE2"
9754 {
9755 ix86_expand_sse2_mulvxdi3 (operands[0], operands[1], operands[2]);
9756 DONE;
9757 })
9758
9759 (define_expand "vec_widen_<s>mult_hi_<mode>"
9760 [(match_operand:<sseunpackmode> 0 "register_operand")
9761 (any_extend:<sseunpackmode>
9762 (match_operand:VI124_AVX2 1 "register_operand"))
9763 (match_operand:VI124_AVX2 2 "register_operand")]
9764 "TARGET_SSE2"
9765 {
9766 ix86_expand_mul_widen_hilo (operands[0], operands[1], operands[2],
9767 <u_bool>, true);
9768 DONE;
9769 })
9770
9771 (define_expand "vec_widen_<s>mult_lo_<mode>"
9772 [(match_operand:<sseunpackmode> 0 "register_operand")
9773 (any_extend:<sseunpackmode>
9774 (match_operand:VI124_AVX2 1 "register_operand"))
9775 (match_operand:VI124_AVX2 2 "register_operand")]
9776 "TARGET_SSE2"
9777 {
9778 ix86_expand_mul_widen_hilo (operands[0], operands[1], operands[2],
9779 <u_bool>, false);
9780 DONE;
9781 })
9782
9783 ;; Most widen_<s>mult_even_<mode> can be handled directly from other
9784 ;; named patterns, but signed V4SI needs special help for plain SSE2.
9785 (define_expand "vec_widen_smult_even_v4si"
9786 [(match_operand:V2DI 0 "register_operand")
9787 (match_operand:V4SI 1 "nonimmediate_operand")
9788 (match_operand:V4SI 2 "nonimmediate_operand")]
9789 "TARGET_SSE2"
9790 {
9791 ix86_expand_mul_widen_evenodd (operands[0], operands[1], operands[2],
9792 false, false);
9793 DONE;
9794 })
9795
9796 (define_expand "vec_widen_<s>mult_odd_<mode>"
9797 [(match_operand:<sseunpackmode> 0 "register_operand")
9798 (any_extend:<sseunpackmode>
9799 (match_operand:VI4_AVX512F 1 "general_vector_operand"))
9800 (match_operand:VI4_AVX512F 2 "general_vector_operand")]
9801 "TARGET_SSE2"
9802 {
9803 ix86_expand_mul_widen_evenodd (operands[0], operands[1], operands[2],
9804 <u_bool>, true);
9805 DONE;
9806 })
9807
9808 (define_mode_attr SDOT_PMADD_SUF
9809 [(V32HI "512v32hi") (V16HI "") (V8HI "")])
9810
9811 (define_expand "sdot_prod<mode>"
9812 [(match_operand:<sseunpackmode> 0 "register_operand")
9813 (match_operand:VI2_AVX2 1 "register_operand")
9814 (match_operand:VI2_AVX2 2 "register_operand")
9815 (match_operand:<sseunpackmode> 3 "register_operand")]
9816 "TARGET_SSE2"
9817 {
9818 rtx t = gen_reg_rtx (<sseunpackmode>mode);
9819 emit_insn (gen_<sse2_avx2>_pmaddwd<SDOT_PMADD_SUF> (t, operands[1], operands[2]));
9820 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
9821 gen_rtx_PLUS (<sseunpackmode>mode,
9822 operands[3], t)));
9823 DONE;
9824 })
9825
9826 ;; Normally we use widen_mul_even/odd, but combine can't quite get it all
9827 ;; back together when madd is available.
9828 (define_expand "sdot_prodv4si"
9829 [(match_operand:V2DI 0 "register_operand")
9830 (match_operand:V4SI 1 "register_operand")
9831 (match_operand:V4SI 2 "register_operand")
9832 (match_operand:V2DI 3 "register_operand")]
9833 "TARGET_XOP"
9834 {
9835 rtx t = gen_reg_rtx (V2DImode);
9836 emit_insn (gen_xop_pmacsdqh (t, operands[1], operands[2], operands[3]));
9837 emit_insn (gen_xop_pmacsdql (operands[0], operands[1], operands[2], t));
9838 DONE;
9839 })
9840
9841 (define_expand "usadv16qi"
9842 [(match_operand:V4SI 0 "register_operand")
9843 (match_operand:V16QI 1 "register_operand")
9844 (match_operand:V16QI 2 "nonimmediate_operand")
9845 (match_operand:V4SI 3 "nonimmediate_operand")]
9846 "TARGET_SSE2"
9847 {
9848 rtx t1 = gen_reg_rtx (V2DImode);
9849 rtx t2 = gen_reg_rtx (V4SImode);
9850 emit_insn (gen_sse2_psadbw (t1, operands[1], operands[2]));
9851 convert_move (t2, t1, 0);
9852 emit_insn (gen_addv4si3 (operands[0], t2, operands[3]));
9853 DONE;
9854 })
9855
9856 (define_expand "usadv32qi"
9857 [(match_operand:V8SI 0 "register_operand")
9858 (match_operand:V32QI 1 "register_operand")
9859 (match_operand:V32QI 2 "nonimmediate_operand")
9860 (match_operand:V8SI 3 "nonimmediate_operand")]
9861 "TARGET_AVX2"
9862 {
9863 rtx t1 = gen_reg_rtx (V4DImode);
9864 rtx t2 = gen_reg_rtx (V8SImode);
9865 emit_insn (gen_avx2_psadbw (t1, operands[1], operands[2]));
9866 convert_move (t2, t1, 0);
9867 emit_insn (gen_addv8si3 (operands[0], t2, operands[3]));
9868 DONE;
9869 })
9870
9871 (define_insn "ashr<mode>3"
9872 [(set (match_operand:VI24_AVX2 0 "register_operand" "=x,x")
9873 (ashiftrt:VI24_AVX2
9874 (match_operand:VI24_AVX2 1 "register_operand" "0,x")
9875 (match_operand:SI 2 "nonmemory_operand" "xN,xN")))]
9876 "TARGET_SSE2"
9877 "@
9878 psra<ssemodesuffix>\t{%2, %0|%0, %2}
9879 vpsra<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
9880 [(set_attr "isa" "noavx,avx")
9881 (set_attr "type" "sseishft")
9882 (set (attr "length_immediate")
9883 (if_then_else (match_operand 2 "const_int_operand")
9884 (const_string "1")
9885 (const_string "0")))
9886 (set_attr "prefix_data16" "1,*")
9887 (set_attr "prefix" "orig,vex")
9888 (set_attr "mode" "<sseinsnmode>")])
9889
9890 (define_insn "<mask_codefor>ashr<mode>3<mask_name>"
9891 [(set (match_operand:VI24_AVX512BW_1 0 "register_operand" "=v,v")
9892 (ashiftrt:VI24_AVX512BW_1
9893 (match_operand:VI24_AVX512BW_1 1 "nonimmediate_operand" "v,vm")
9894 (match_operand:SI 2 "nonmemory_operand" "v,N")))]
9895 "TARGET_AVX512VL"
9896 "vpsra<ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
9897 [(set_attr "type" "sseishft")
9898 (set (attr "length_immediate")
9899 (if_then_else (match_operand 2 "const_int_operand")
9900 (const_string "1")
9901 (const_string "0")))
9902 (set_attr "mode" "<sseinsnmode>")])
9903
9904 (define_insn "<mask_codefor>ashrv2di3<mask_name>"
9905 [(set (match_operand:V2DI 0 "register_operand" "=v,v")
9906 (ashiftrt:V2DI
9907 (match_operand:V2DI 1 "nonimmediate_operand" "v,vm")
9908 (match_operand:DI 2 "nonmemory_operand" "v,N")))]
9909 "TARGET_AVX512VL"
9910 "vpsraq\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
9911 [(set_attr "type" "sseishft")
9912 (set (attr "length_immediate")
9913 (if_then_else (match_operand 2 "const_int_operand")
9914 (const_string "1")
9915 (const_string "0")))
9916 (set_attr "mode" "TI")])
9917
9918 (define_insn "ashr<mode>3<mask_name>"
9919 [(set (match_operand:VI248_AVX512BW_AVX512VL 0 "register_operand" "=v,v")
9920 (ashiftrt:VI248_AVX512BW_AVX512VL
9921 (match_operand:VI248_AVX512BW_AVX512VL 1 "nonimmediate_operand" "v,vm")
9922 (match_operand:SI 2 "nonmemory_operand" "v,N")))]
9923 "TARGET_AVX512F"
9924 "vpsra<ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
9925 [(set_attr "type" "sseishft")
9926 (set (attr "length_immediate")
9927 (if_then_else (match_operand 2 "const_int_operand")
9928 (const_string "1")
9929 (const_string "0")))
9930 (set_attr "mode" "<sseinsnmode>")])
9931
9932 (define_insn "<shift_insn><mode>3<mask_name>"
9933 [(set (match_operand:VI2_AVX2_AVX512BW 0 "register_operand" "=x,v")
9934 (any_lshift:VI2_AVX2_AVX512BW
9935 (match_operand:VI2_AVX2_AVX512BW 1 "register_operand" "0,v")
9936 (match_operand:SI 2 "nonmemory_operand" "xN,vN")))]
9937 "TARGET_SSE2 && <mask_mode512bit_condition> && <mask_avx512bw_condition>"
9938 "@
9939 p<vshift><ssemodesuffix>\t{%2, %0|%0, %2}
9940 vp<vshift><ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
9941 [(set_attr "isa" "noavx,avx")
9942 (set_attr "type" "sseishft")
9943 (set (attr "length_immediate")
9944 (if_then_else (match_operand 2 "const_int_operand")
9945 (const_string "1")
9946 (const_string "0")))
9947 (set_attr "prefix_data16" "1,*")
9948 (set_attr "prefix" "orig,vex")
9949 (set_attr "mode" "<sseinsnmode>")])
9950
9951 (define_insn "<shift_insn><mode>3<mask_name>"
9952 [(set (match_operand:VI48_AVX2 0 "register_operand" "=x,v")
9953 (any_lshift:VI48_AVX2
9954 (match_operand:VI48_AVX2 1 "register_operand" "0,v")
9955 (match_operand:SI 2 "nonmemory_operand" "xN,vN")))]
9956 "TARGET_SSE2 && <mask_mode512bit_condition>"
9957 "@
9958 p<vshift><ssemodesuffix>\t{%2, %0|%0, %2}
9959 vp<vshift><ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
9960 [(set_attr "isa" "noavx,avx")
9961 (set_attr "type" "sseishft")
9962 (set (attr "length_immediate")
9963 (if_then_else (match_operand 2 "const_int_operand")
9964 (const_string "1")
9965 (const_string "0")))
9966 (set_attr "prefix_data16" "1,*")
9967 (set_attr "prefix" "orig,vex")
9968 (set_attr "mode" "<sseinsnmode>")])
9969
9970 (define_insn "<shift_insn><mode>3<mask_name>"
9971 [(set (match_operand:VI48_512 0 "register_operand" "=v,v")
9972 (any_lshift:VI48_512
9973 (match_operand:VI48_512 1 "nonimmediate_operand" "v,m")
9974 (match_operand:SI 2 "nonmemory_operand" "vN,N")))]
9975 "TARGET_AVX512F && <mask_mode512bit_condition>"
9976 "vp<vshift><ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
9977 [(set_attr "isa" "avx512f")
9978 (set_attr "type" "sseishft")
9979 (set (attr "length_immediate")
9980 (if_then_else (match_operand 2 "const_int_operand")
9981 (const_string "1")
9982 (const_string "0")))
9983 (set_attr "prefix" "evex")
9984 (set_attr "mode" "<sseinsnmode>")])
9985
9986
9987 (define_expand "vec_shl_<mode>"
9988 [(set (match_dup 3)
9989 (ashift:V1TI
9990 (match_operand:VI_128 1 "register_operand")
9991 (match_operand:SI 2 "const_0_to_255_mul_8_operand")))
9992 (set (match_operand:VI_128 0 "register_operand") (match_dup 4))]
9993 "TARGET_SSE2"
9994 {
9995 operands[1] = gen_lowpart (V1TImode, operands[1]);
9996 operands[3] = gen_reg_rtx (V1TImode);
9997 operands[4] = gen_lowpart (<MODE>mode, operands[3]);
9998 })
9999
10000 (define_insn "<sse2_avx2>_ashl<mode>3"
10001 [(set (match_operand:VIMAX_AVX2 0 "register_operand" "=x,v")
10002 (ashift:VIMAX_AVX2
10003 (match_operand:VIMAX_AVX2 1 "register_operand" "0,v")
10004 (match_operand:SI 2 "const_0_to_255_mul_8_operand" "n,n")))]
10005 "TARGET_SSE2"
10006 {
10007 operands[2] = GEN_INT (INTVAL (operands[2]) / 8);
10008
10009 switch (which_alternative)
10010 {
10011 case 0:
10012 return "pslldq\t{%2, %0|%0, %2}";
10013 case 1:
10014 return "vpslldq\t{%2, %1, %0|%0, %1, %2}";
10015 default:
10016 gcc_unreachable ();
10017 }
10018 }
10019 [(set_attr "isa" "noavx,avx")
10020 (set_attr "type" "sseishft")
10021 (set_attr "length_immediate" "1")
10022 (set_attr "prefix_data16" "1,*")
10023 (set_attr "prefix" "orig,vex")
10024 (set_attr "mode" "<sseinsnmode>")])
10025
10026 (define_expand "vec_shr_<mode>"
10027 [(set (match_dup 3)
10028 (lshiftrt:V1TI
10029 (match_operand:VI_128 1 "register_operand")
10030 (match_operand:SI 2 "const_0_to_255_mul_8_operand")))
10031 (set (match_operand:VI_128 0 "register_operand") (match_dup 4))]
10032 "TARGET_SSE2"
10033 {
10034 operands[1] = gen_lowpart (V1TImode, operands[1]);
10035 operands[3] = gen_reg_rtx (V1TImode);
10036 operands[4] = gen_lowpart (<MODE>mode, operands[3]);
10037 })
10038
10039 (define_insn "<sse2_avx2>_lshr<mode>3"
10040 [(set (match_operand:VIMAX_AVX2 0 "register_operand" "=x,v")
10041 (lshiftrt:VIMAX_AVX2
10042 (match_operand:VIMAX_AVX2 1 "register_operand" "0,v")
10043 (match_operand:SI 2 "const_0_to_255_mul_8_operand" "n,n")))]
10044 "TARGET_SSE2"
10045 {
10046 operands[2] = GEN_INT (INTVAL (operands[2]) / 8);
10047
10048 switch (which_alternative)
10049 {
10050 case 0:
10051 return "psrldq\t{%2, %0|%0, %2}";
10052 case 1:
10053 return "vpsrldq\t{%2, %1, %0|%0, %1, %2}";
10054 default:
10055 gcc_unreachable ();
10056 }
10057 }
10058 [(set_attr "isa" "noavx,avx")
10059 (set_attr "type" "sseishft")
10060 (set_attr "length_immediate" "1")
10061 (set_attr "atom_unit" "sishuf")
10062 (set_attr "prefix_data16" "1,*")
10063 (set_attr "prefix" "orig,vex")
10064 (set_attr "mode" "<sseinsnmode>")])
10065
10066 (define_insn "<avx512>_<rotate>v<mode><mask_name>"
10067 [(set (match_operand:VI48_AVX512VL 0 "register_operand" "=v")
10068 (any_rotate:VI48_AVX512VL
10069 (match_operand:VI48_AVX512VL 1 "register_operand" "v")
10070 (match_operand:VI48_AVX512VL 2 "nonimmediate_operand" "vm")))]
10071 "TARGET_AVX512F"
10072 "vp<rotate>v<ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
10073 [(set_attr "prefix" "evex")
10074 (set_attr "mode" "<sseinsnmode>")])
10075
10076 (define_insn "<avx512>_<rotate><mode><mask_name>"
10077 [(set (match_operand:VI48_AVX512VL 0 "register_operand" "=v")
10078 (any_rotate:VI48_AVX512VL
10079 (match_operand:VI48_AVX512VL 1 "nonimmediate_operand" "vm")
10080 (match_operand:SI 2 "const_0_to_255_operand")))]
10081 "TARGET_AVX512F"
10082 "vp<rotate><ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
10083 [(set_attr "prefix" "evex")
10084 (set_attr "mode" "<sseinsnmode>")])
10085
10086 (define_expand "<code><mode>3"
10087 [(set (match_operand:VI124_256_AVX512F_AVX512BW 0 "register_operand")
10088 (maxmin:VI124_256_AVX512F_AVX512BW
10089 (match_operand:VI124_256_AVX512F_AVX512BW 1 "nonimmediate_operand")
10090 (match_operand:VI124_256_AVX512F_AVX512BW 2 "nonimmediate_operand")))]
10091 "TARGET_AVX2"
10092 "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
10093
10094 (define_insn "*avx2_<code><mode>3"
10095 [(set (match_operand:VI124_256 0 "register_operand" "=v")
10096 (maxmin:VI124_256
10097 (match_operand:VI124_256 1 "nonimmediate_operand" "%v")
10098 (match_operand:VI124_256 2 "nonimmediate_operand" "vm")))]
10099 "TARGET_AVX2 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
10100 "vp<maxmin_int><ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
10101 [(set_attr "type" "sseiadd")
10102 (set_attr "prefix_extra" "1")
10103 (set_attr "prefix" "vex")
10104 (set_attr "mode" "OI")])
10105
10106 (define_expand "<code><mode>3_mask"
10107 [(set (match_operand:VI48_AVX512VL 0 "register_operand")
10108 (vec_merge:VI48_AVX512VL
10109 (maxmin:VI48_AVX512VL
10110 (match_operand:VI48_AVX512VL 1 "nonimmediate_operand")
10111 (match_operand:VI48_AVX512VL 2 "nonimmediate_operand"))
10112 (match_operand:VI48_AVX512VL 3 "vector_move_operand")
10113 (match_operand:<avx512fmaskmode> 4 "register_operand")))]
10114 "TARGET_AVX512F"
10115 "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
10116
10117 (define_insn "*avx512bw_<code><mode>3<mask_name>"
10118 [(set (match_operand:VI48_AVX512VL 0 "register_operand" "=v")
10119 (maxmin:VI48_AVX512VL
10120 (match_operand:VI48_AVX512VL 1 "nonimmediate_operand" "%v")
10121 (match_operand:VI48_AVX512VL 2 "nonimmediate_operand" "vm")))]
10122 "TARGET_AVX512F && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
10123 "vp<maxmin_int><ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
10124 [(set_attr "type" "sseiadd")
10125 (set_attr "prefix_extra" "1")
10126 (set_attr "prefix" "maybe_evex")
10127 (set_attr "mode" "<sseinsnmode>")])
10128
10129 (define_insn "<mask_codefor><code><mode>3<mask_name>"
10130 [(set (match_operand:VI12_AVX512VL 0 "register_operand" "=v")
10131 (maxmin:VI12_AVX512VL
10132 (match_operand:VI12_AVX512VL 1 "register_operand" "v")
10133 (match_operand:VI12_AVX512VL 2 "nonimmediate_operand" "vm")))]
10134 "TARGET_AVX512BW"
10135 "vp<maxmin_int><ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
10136 [(set_attr "type" "sseiadd")
10137 (set_attr "prefix" "evex")
10138 (set_attr "mode" "<sseinsnmode>")])
10139
10140 (define_expand "<code><mode>3"
10141 [(set (match_operand:VI8_AVX2_AVX512BW 0 "register_operand")
10142 (maxmin:VI8_AVX2_AVX512BW
10143 (match_operand:VI8_AVX2_AVX512BW 1 "register_operand")
10144 (match_operand:VI8_AVX2_AVX512BW 2 "register_operand")))]
10145 "TARGET_SSE4_2"
10146 {
10147 if (TARGET_AVX512F
10148 && (<MODE>mode == V8DImode || TARGET_AVX512VL))
10149 ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);
10150 else
10151 {
10152 enum rtx_code code;
10153 rtx xops[6];
10154 bool ok;
10155
10156
10157 xops[0] = operands[0];
10158
10159 if (<CODE> == SMAX || <CODE> == UMAX)
10160 {
10161 xops[1] = operands[1];
10162 xops[2] = operands[2];
10163 }
10164 else
10165 {
10166 xops[1] = operands[2];
10167 xops[2] = operands[1];
10168 }
10169
10170 code = (<CODE> == UMAX || <CODE> == UMIN) ? GTU : GT;
10171
10172 xops[3] = gen_rtx_fmt_ee (code, VOIDmode, operands[1], operands[2]);
10173 xops[4] = operands[1];
10174 xops[5] = operands[2];
10175
10176 ok = ix86_expand_int_vcond (xops);
10177 gcc_assert (ok);
10178 DONE;
10179 }
10180 })
10181
10182 (define_expand "<code><mode>3"
10183 [(set (match_operand:VI124_128 0 "register_operand")
10184 (smaxmin:VI124_128
10185 (match_operand:VI124_128 1 "nonimmediate_operand")
10186 (match_operand:VI124_128 2 "nonimmediate_operand")))]
10187 "TARGET_SSE2"
10188 {
10189 if (TARGET_SSE4_1 || <MODE>mode == V8HImode)
10190 ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);
10191 else
10192 {
10193 rtx xops[6];
10194 bool ok;
10195
10196 xops[0] = operands[0];
10197 operands[1] = force_reg (<MODE>mode, operands[1]);
10198 operands[2] = force_reg (<MODE>mode, operands[2]);
10199
10200 if (<CODE> == SMAX)
10201 {
10202 xops[1] = operands[1];
10203 xops[2] = operands[2];
10204 }
10205 else
10206 {
10207 xops[1] = operands[2];
10208 xops[2] = operands[1];
10209 }
10210
10211 xops[3] = gen_rtx_GT (VOIDmode, operands[1], operands[2]);
10212 xops[4] = operands[1];
10213 xops[5] = operands[2];
10214
10215 ok = ix86_expand_int_vcond (xops);
10216 gcc_assert (ok);
10217 DONE;
10218 }
10219 })
10220
10221 (define_insn "*sse4_1_<code><mode>3<mask_name>"
10222 [(set (match_operand:VI14_128 0 "register_operand" "=x,v")
10223 (smaxmin:VI14_128
10224 (match_operand:VI14_128 1 "nonimmediate_operand" "%0,v")
10225 (match_operand:VI14_128 2 "nonimmediate_operand" "xm,vm")))]
10226 "TARGET_SSE4_1
10227 && <mask_mode512bit_condition>
10228 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
10229 "@
10230 p<maxmin_int><ssemodesuffix>\t{%2, %0|%0, %2}
10231 vp<maxmin_int><ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
10232 [(set_attr "isa" "noavx,avx")
10233 (set_attr "type" "sseiadd")
10234 (set_attr "prefix_extra" "1,*")
10235 (set_attr "prefix" "orig,vex")
10236 (set_attr "mode" "TI")])
10237
10238 (define_insn "*<code>v8hi3"
10239 [(set (match_operand:V8HI 0 "register_operand" "=x,x")
10240 (smaxmin:V8HI
10241 (match_operand:V8HI 1 "nonimmediate_operand" "%0,x")
10242 (match_operand:V8HI 2 "nonimmediate_operand" "xm,xm")))]
10243 "TARGET_SSE2 && ix86_binary_operator_ok (<CODE>, V8HImode, operands)"
10244 "@
10245 p<maxmin_int>w\t{%2, %0|%0, %2}
10246 vp<maxmin_int>w\t{%2, %1, %0|%0, %1, %2}"
10247 [(set_attr "isa" "noavx,avx")
10248 (set_attr "type" "sseiadd")
10249 (set_attr "prefix_data16" "1,*")
10250 (set_attr "prefix_extra" "*,1")
10251 (set_attr "prefix" "orig,vex")
10252 (set_attr "mode" "TI")])
10253
10254 (define_expand "<code><mode>3"
10255 [(set (match_operand:VI124_128 0 "register_operand")
10256 (umaxmin:VI124_128
10257 (match_operand:VI124_128 1 "nonimmediate_operand")
10258 (match_operand:VI124_128 2 "nonimmediate_operand")))]
10259 "TARGET_SSE2"
10260 {
10261 if (TARGET_SSE4_1 || <MODE>mode == V16QImode)
10262 ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);
10263 else if (<CODE> == UMAX && <MODE>mode == V8HImode)
10264 {
10265 rtx op0 = operands[0], op2 = operands[2], op3 = op0;
10266 operands[1] = force_reg (<MODE>mode, operands[1]);
10267 if (rtx_equal_p (op3, op2))
10268 op3 = gen_reg_rtx (V8HImode);
10269 emit_insn (gen_sse2_ussubv8hi3 (op3, operands[1], op2));
10270 emit_insn (gen_addv8hi3 (op0, op3, op2));
10271 DONE;
10272 }
10273 else
10274 {
10275 rtx xops[6];
10276 bool ok;
10277
10278 operands[1] = force_reg (<MODE>mode, operands[1]);
10279 operands[2] = force_reg (<MODE>mode, operands[2]);
10280
10281 xops[0] = operands[0];
10282
10283 if (<CODE> == UMAX)
10284 {
10285 xops[1] = operands[1];
10286 xops[2] = operands[2];
10287 }
10288 else
10289 {
10290 xops[1] = operands[2];
10291 xops[2] = operands[1];
10292 }
10293
10294 xops[3] = gen_rtx_GTU (VOIDmode, operands[1], operands[2]);
10295 xops[4] = operands[1];
10296 xops[5] = operands[2];
10297
10298 ok = ix86_expand_int_vcond (xops);
10299 gcc_assert (ok);
10300 DONE;
10301 }
10302 })
10303
10304 (define_insn "*sse4_1_<code><mode>3<mask_name>"
10305 [(set (match_operand:VI24_128 0 "register_operand" "=x,v")
10306 (umaxmin:VI24_128
10307 (match_operand:VI24_128 1 "nonimmediate_operand" "%0,v")
10308 (match_operand:VI24_128 2 "nonimmediate_operand" "xm,vm")))]
10309 "TARGET_SSE4_1
10310 && <mask_mode512bit_condition>
10311 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
10312 "@
10313 p<maxmin_int><ssemodesuffix>\t{%2, %0|%0, %2}
10314 vp<maxmin_int><ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
10315 [(set_attr "isa" "noavx,avx")
10316 (set_attr "type" "sseiadd")
10317 (set_attr "prefix_extra" "1,*")
10318 (set_attr "prefix" "orig,vex")
10319 (set_attr "mode" "TI")])
10320
10321 (define_insn "*<code>v16qi3"
10322 [(set (match_operand:V16QI 0 "register_operand" "=x,x")
10323 (umaxmin:V16QI
10324 (match_operand:V16QI 1 "nonimmediate_operand" "%0,x")
10325 (match_operand:V16QI 2 "nonimmediate_operand" "xm,xm")))]
10326 "TARGET_SSE2 && ix86_binary_operator_ok (<CODE>, V16QImode, operands)"
10327 "@
10328 p<maxmin_int>b\t{%2, %0|%0, %2}
10329 vp<maxmin_int>b\t{%2, %1, %0|%0, %1, %2}"
10330 [(set_attr "isa" "noavx,avx")
10331 (set_attr "type" "sseiadd")
10332 (set_attr "prefix_data16" "1,*")
10333 (set_attr "prefix_extra" "*,1")
10334 (set_attr "prefix" "orig,vex")
10335 (set_attr "mode" "TI")])
10336
10337 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
10338 ;;
10339 ;; Parallel integral comparisons
10340 ;;
10341 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
10342
10343 (define_expand "avx2_eq<mode>3"
10344 [(set (match_operand:VI_256 0 "register_operand")
10345 (eq:VI_256
10346 (match_operand:VI_256 1 "nonimmediate_operand")
10347 (match_operand:VI_256 2 "nonimmediate_operand")))]
10348 "TARGET_AVX2"
10349 "ix86_fixup_binary_operands_no_copy (EQ, <MODE>mode, operands);")
10350
10351 (define_insn "*avx2_eq<mode>3"
10352 [(set (match_operand:VI_256 0 "register_operand" "=x")
10353 (eq:VI_256
10354 (match_operand:VI_256 1 "nonimmediate_operand" "%x")
10355 (match_operand:VI_256 2 "nonimmediate_operand" "xm")))]
10356 "TARGET_AVX2 && ix86_binary_operator_ok (EQ, <MODE>mode, operands)"
10357 "vpcmpeq<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
10358 [(set_attr "type" "ssecmp")
10359 (set_attr "prefix_extra" "1")
10360 (set_attr "prefix" "vex")
10361 (set_attr "mode" "OI")])
10362
10363 (define_expand "<avx512>_eq<mode>3<mask_scalar_merge_name>"
10364 [(set (match_operand:<avx512fmaskmode> 0 "register_operand")
10365 (unspec:<avx512fmaskmode>
10366 [(match_operand:VI12_AVX512VL 1 "register_operand")
10367 (match_operand:VI12_AVX512VL 2 "nonimmediate_operand")]
10368 UNSPEC_MASKED_EQ))]
10369 "TARGET_AVX512BW"
10370 "ix86_fixup_binary_operands_no_copy (EQ, <MODE>mode, operands);")
10371
10372 (define_expand "<avx512>_eq<mode>3<mask_scalar_merge_name>"
10373 [(set (match_operand:<avx512fmaskmode> 0 "register_operand")
10374 (unspec:<avx512fmaskmode>
10375 [(match_operand:VI48_AVX512VL 1 "register_operand")
10376 (match_operand:VI48_AVX512VL 2 "nonimmediate_operand")]
10377 UNSPEC_MASKED_EQ))]
10378 "TARGET_AVX512F"
10379 "ix86_fixup_binary_operands_no_copy (EQ, <MODE>mode, operands);")
10380
10381 (define_insn "<avx512>_eq<mode>3<mask_scalar_merge_name>_1"
10382 [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=Yk")
10383 (unspec:<avx512fmaskmode>
10384 [(match_operand:VI12_AVX512VL 1 "register_operand" "%v")
10385 (match_operand:VI12_AVX512VL 2 "nonimmediate_operand" "vm")]
10386 UNSPEC_MASKED_EQ))]
10387 "TARGET_AVX512F && ix86_binary_operator_ok (EQ, <MODE>mode, operands)"
10388 "vpcmpeq<ssemodesuffix>\t{%2, %1, %0<mask_scalar_merge_operand3>|%0<mask_scalar_merge_operand3>, %1, %2}"
10389 [(set_attr "type" "ssecmp")
10390 (set_attr "prefix_extra" "1")
10391 (set_attr "prefix" "evex")
10392 (set_attr "mode" "<sseinsnmode>")])
10393
10394 (define_insn "<avx512>_eq<mode>3<mask_scalar_merge_name>_1"
10395 [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=Yk")
10396 (unspec:<avx512fmaskmode>
10397 [(match_operand:VI48_AVX512VL 1 "register_operand" "%v")
10398 (match_operand:VI48_AVX512VL 2 "nonimmediate_operand" "vm")]
10399 UNSPEC_MASKED_EQ))]
10400 "TARGET_AVX512F && ix86_binary_operator_ok (EQ, <MODE>mode, operands)"
10401 "vpcmpeq<ssemodesuffix>\t{%2, %1, %0<mask_scalar_merge_operand3>|%0<mask_scalar_merge_operand3>, %1, %2}"
10402 [(set_attr "type" "ssecmp")
10403 (set_attr "prefix_extra" "1")
10404 (set_attr "prefix" "evex")
10405 (set_attr "mode" "<sseinsnmode>")])
10406
10407 (define_insn "*sse4_1_eqv2di3"
10408 [(set (match_operand:V2DI 0 "register_operand" "=x,x")
10409 (eq:V2DI
10410 (match_operand:V2DI 1 "nonimmediate_operand" "%0,x")
10411 (match_operand:V2DI 2 "nonimmediate_operand" "xm,xm")))]
10412 "TARGET_SSE4_1 && ix86_binary_operator_ok (EQ, V2DImode, operands)"
10413 "@
10414 pcmpeqq\t{%2, %0|%0, %2}
10415 vpcmpeqq\t{%2, %1, %0|%0, %1, %2}"
10416 [(set_attr "isa" "noavx,avx")
10417 (set_attr "type" "ssecmp")
10418 (set_attr "prefix_extra" "1")
10419 (set_attr "prefix" "orig,vex")
10420 (set_attr "mode" "TI")])
10421
10422 (define_insn "*sse2_eq<mode>3"
10423 [(set (match_operand:VI124_128 0 "register_operand" "=x,x")
10424 (eq:VI124_128
10425 (match_operand:VI124_128 1 "nonimmediate_operand" "%0,x")
10426 (match_operand:VI124_128 2 "nonimmediate_operand" "xm,xm")))]
10427 "TARGET_SSE2 && !TARGET_XOP
10428 && ix86_binary_operator_ok (EQ, <MODE>mode, operands)"
10429 "@
10430 pcmpeq<ssemodesuffix>\t{%2, %0|%0, %2}
10431 vpcmpeq<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
10432 [(set_attr "isa" "noavx,avx")
10433 (set_attr "type" "ssecmp")
10434 (set_attr "prefix_data16" "1,*")
10435 (set_attr "prefix" "orig,vex")
10436 (set_attr "mode" "TI")])
10437
10438 (define_expand "sse2_eq<mode>3"
10439 [(set (match_operand:VI124_128 0 "register_operand")
10440 (eq:VI124_128
10441 (match_operand:VI124_128 1 "nonimmediate_operand")
10442 (match_operand:VI124_128 2 "nonimmediate_operand")))]
10443 "TARGET_SSE2 && !TARGET_XOP "
10444 "ix86_fixup_binary_operands_no_copy (EQ, <MODE>mode, operands);")
10445
10446 (define_expand "sse4_1_eqv2di3"
10447 [(set (match_operand:V2DI 0 "register_operand")
10448 (eq:V2DI
10449 (match_operand:V2DI 1 "nonimmediate_operand")
10450 (match_operand:V2DI 2 "nonimmediate_operand")))]
10451 "TARGET_SSE4_1"
10452 "ix86_fixup_binary_operands_no_copy (EQ, V2DImode, operands);")
10453
10454 (define_insn "sse4_2_gtv2di3"
10455 [(set (match_operand:V2DI 0 "register_operand" "=x,x")
10456 (gt:V2DI
10457 (match_operand:V2DI 1 "register_operand" "0,x")
10458 (match_operand:V2DI 2 "nonimmediate_operand" "xm,xm")))]
10459 "TARGET_SSE4_2"
10460 "@
10461 pcmpgtq\t{%2, %0|%0, %2}
10462 vpcmpgtq\t{%2, %1, %0|%0, %1, %2}"
10463 [(set_attr "isa" "noavx,avx")
10464 (set_attr "type" "ssecmp")
10465 (set_attr "prefix_extra" "1")
10466 (set_attr "prefix" "orig,vex")
10467 (set_attr "mode" "TI")])
10468
10469 (define_insn "avx2_gt<mode>3"
10470 [(set (match_operand:VI_256 0 "register_operand" "=x")
10471 (gt:VI_256
10472 (match_operand:VI_256 1 "register_operand" "x")
10473 (match_operand:VI_256 2 "nonimmediate_operand" "xm")))]
10474 "TARGET_AVX2"
10475 "vpcmpgt<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
10476 [(set_attr "type" "ssecmp")
10477 (set_attr "prefix_extra" "1")
10478 (set_attr "prefix" "vex")
10479 (set_attr "mode" "OI")])
10480
10481 (define_insn "<avx512>_gt<mode>3<mask_scalar_merge_name>"
10482 [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=Yk")
10483 (unspec:<avx512fmaskmode>
10484 [(match_operand:VI48_AVX512VL 1 "register_operand" "v")
10485 (match_operand:VI48_AVX512VL 2 "nonimmediate_operand" "vm")] UNSPEC_MASKED_GT))]
10486 "TARGET_AVX512F"
10487 "vpcmpgt<ssemodesuffix>\t{%2, %1, %0<mask_scalar_merge_operand3>|%0<mask_scalar_merge_operand3>, %1, %2}"
10488 [(set_attr "type" "ssecmp")
10489 (set_attr "prefix_extra" "1")
10490 (set_attr "prefix" "evex")
10491 (set_attr "mode" "<sseinsnmode>")])
10492
10493 (define_insn "<avx512>_gt<mode>3<mask_scalar_merge_name>"
10494 [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=Yk")
10495 (unspec:<avx512fmaskmode>
10496 [(match_operand:VI12_AVX512VL 1 "register_operand" "v")
10497 (match_operand:VI12_AVX512VL 2 "nonimmediate_operand" "vm")] UNSPEC_MASKED_GT))]
10498 "TARGET_AVX512BW"
10499 "vpcmpgt<ssemodesuffix>\t{%2, %1, %0<mask_scalar_merge_operand3>|%0<mask_scalar_merge_operand3>, %1, %2}"
10500 [(set_attr "type" "ssecmp")
10501 (set_attr "prefix_extra" "1")
10502 (set_attr "prefix" "evex")
10503 (set_attr "mode" "<sseinsnmode>")])
10504
10505 (define_insn "sse2_gt<mode>3"
10506 [(set (match_operand:VI124_128 0 "register_operand" "=x,x")
10507 (gt:VI124_128
10508 (match_operand:VI124_128 1 "register_operand" "0,x")
10509 (match_operand:VI124_128 2 "nonimmediate_operand" "xm,xm")))]
10510 "TARGET_SSE2 && !TARGET_XOP"
10511 "@
10512 pcmpgt<ssemodesuffix>\t{%2, %0|%0, %2}
10513 vpcmpgt<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
10514 [(set_attr "isa" "noavx,avx")
10515 (set_attr "type" "ssecmp")
10516 (set_attr "prefix_data16" "1,*")
10517 (set_attr "prefix" "orig,vex")
10518 (set_attr "mode" "TI")])
10519
10520 (define_expand "vcond<V_512:mode><VI_512:mode>"
10521 [(set (match_operand:V_512 0 "register_operand")
10522 (if_then_else:V_512
10523 (match_operator 3 ""
10524 [(match_operand:VI_512 4 "nonimmediate_operand")
10525 (match_operand:VI_512 5 "general_operand")])
10526 (match_operand:V_512 1)
10527 (match_operand:V_512 2)))]
10528 "TARGET_AVX512F
10529 && (GET_MODE_NUNITS (<V_512:MODE>mode)
10530 == GET_MODE_NUNITS (<VI_512:MODE>mode))"
10531 {
10532 bool ok = ix86_expand_int_vcond (operands);
10533 gcc_assert (ok);
10534 DONE;
10535 })
10536
10537 (define_expand "vcond<V_256:mode><VI_256:mode>"
10538 [(set (match_operand:V_256 0 "register_operand")
10539 (if_then_else:V_256
10540 (match_operator 3 ""
10541 [(match_operand:VI_256 4 "nonimmediate_operand")
10542 (match_operand:VI_256 5 "general_operand")])
10543 (match_operand:V_256 1)
10544 (match_operand:V_256 2)))]
10545 "TARGET_AVX2
10546 && (GET_MODE_NUNITS (<V_256:MODE>mode)
10547 == GET_MODE_NUNITS (<VI_256:MODE>mode))"
10548 {
10549 bool ok = ix86_expand_int_vcond (operands);
10550 gcc_assert (ok);
10551 DONE;
10552 })
10553
10554 (define_expand "vcond<V_128:mode><VI124_128:mode>"
10555 [(set (match_operand:V_128 0 "register_operand")
10556 (if_then_else:V_128
10557 (match_operator 3 ""
10558 [(match_operand:VI124_128 4 "nonimmediate_operand")
10559 (match_operand:VI124_128 5 "general_operand")])
10560 (match_operand:V_128 1)
10561 (match_operand:V_128 2)))]
10562 "TARGET_SSE2
10563 && (GET_MODE_NUNITS (<V_128:MODE>mode)
10564 == GET_MODE_NUNITS (<VI124_128:MODE>mode))"
10565 {
10566 bool ok = ix86_expand_int_vcond (operands);
10567 gcc_assert (ok);
10568 DONE;
10569 })
10570
10571 (define_expand "vcond<VI8F_128:mode>v2di"
10572 [(set (match_operand:VI8F_128 0 "register_operand")
10573 (if_then_else:VI8F_128
10574 (match_operator 3 ""
10575 [(match_operand:V2DI 4 "nonimmediate_operand")
10576 (match_operand:V2DI 5 "general_operand")])
10577 (match_operand:VI8F_128 1)
10578 (match_operand:VI8F_128 2)))]
10579 "TARGET_SSE4_2"
10580 {
10581 bool ok = ix86_expand_int_vcond (operands);
10582 gcc_assert (ok);
10583 DONE;
10584 })
10585
10586 (define_expand "vcondu<V_512:mode><VI_512:mode>"
10587 [(set (match_operand:V_512 0 "register_operand")
10588 (if_then_else:V_512
10589 (match_operator 3 ""
10590 [(match_operand:VI_512 4 "nonimmediate_operand")
10591 (match_operand:VI_512 5 "nonimmediate_operand")])
10592 (match_operand:V_512 1 "general_operand")
10593 (match_operand:V_512 2 "general_operand")))]
10594 "TARGET_AVX512F
10595 && (GET_MODE_NUNITS (<V_512:MODE>mode)
10596 == GET_MODE_NUNITS (<VI_512:MODE>mode))"
10597 {
10598 bool ok = ix86_expand_int_vcond (operands);
10599 gcc_assert (ok);
10600 DONE;
10601 })
10602
10603 (define_expand "vcondu<V_256:mode><VI_256:mode>"
10604 [(set (match_operand:V_256 0 "register_operand")
10605 (if_then_else:V_256
10606 (match_operator 3 ""
10607 [(match_operand:VI_256 4 "nonimmediate_operand")
10608 (match_operand:VI_256 5 "nonimmediate_operand")])
10609 (match_operand:V_256 1 "general_operand")
10610 (match_operand:V_256 2 "general_operand")))]
10611 "TARGET_AVX2
10612 && (GET_MODE_NUNITS (<V_256:MODE>mode)
10613 == GET_MODE_NUNITS (<VI_256:MODE>mode))"
10614 {
10615 bool ok = ix86_expand_int_vcond (operands);
10616 gcc_assert (ok);
10617 DONE;
10618 })
10619
10620 (define_expand "vcondu<V_128:mode><VI124_128:mode>"
10621 [(set (match_operand:V_128 0 "register_operand")
10622 (if_then_else:V_128
10623 (match_operator 3 ""
10624 [(match_operand:VI124_128 4 "nonimmediate_operand")
10625 (match_operand:VI124_128 5 "nonimmediate_operand")])
10626 (match_operand:V_128 1 "general_operand")
10627 (match_operand:V_128 2 "general_operand")))]
10628 "TARGET_SSE2
10629 && (GET_MODE_NUNITS (<V_128:MODE>mode)
10630 == GET_MODE_NUNITS (<VI124_128:MODE>mode))"
10631 {
10632 bool ok = ix86_expand_int_vcond (operands);
10633 gcc_assert (ok);
10634 DONE;
10635 })
10636
10637 (define_expand "vcondu<VI8F_128:mode>v2di"
10638 [(set (match_operand:VI8F_128 0 "register_operand")
10639 (if_then_else:VI8F_128
10640 (match_operator 3 ""
10641 [(match_operand:V2DI 4 "nonimmediate_operand")
10642 (match_operand:V2DI 5 "nonimmediate_operand")])
10643 (match_operand:VI8F_128 1 "general_operand")
10644 (match_operand:VI8F_128 2 "general_operand")))]
10645 "TARGET_SSE4_2"
10646 {
10647 bool ok = ix86_expand_int_vcond (operands);
10648 gcc_assert (ok);
10649 DONE;
10650 })
10651
10652 (define_mode_iterator VEC_PERM_AVX2
10653 [V16QI V8HI V4SI V2DI V4SF V2DF
10654 (V32QI "TARGET_AVX2") (V16HI "TARGET_AVX2")
10655 (V8SI "TARGET_AVX2") (V4DI "TARGET_AVX2")
10656 (V8SF "TARGET_AVX2") (V4DF "TARGET_AVX2")
10657 (V16SF "TARGET_AVX512F") (V8DF "TARGET_AVX512F")
10658 (V16SI "TARGET_AVX512F") (V8DI "TARGET_AVX512F")
10659 (V32HI "TARGET_AVX512BW")])
10660
10661 (define_expand "vec_perm<mode>"
10662 [(match_operand:VEC_PERM_AVX2 0 "register_operand")
10663 (match_operand:VEC_PERM_AVX2 1 "register_operand")
10664 (match_operand:VEC_PERM_AVX2 2 "register_operand")
10665 (match_operand:<sseintvecmode> 3 "register_operand")]
10666 "TARGET_SSSE3 || TARGET_AVX || TARGET_XOP"
10667 {
10668 ix86_expand_vec_perm (operands);
10669 DONE;
10670 })
10671
10672 (define_mode_iterator VEC_PERM_CONST
10673 [(V4SF "TARGET_SSE") (V4SI "TARGET_SSE")
10674 (V2DF "TARGET_SSE") (V2DI "TARGET_SSE")
10675 (V16QI "TARGET_SSE2") (V8HI "TARGET_SSE2")
10676 (V8SF "TARGET_AVX") (V4DF "TARGET_AVX")
10677 (V8SI "TARGET_AVX") (V4DI "TARGET_AVX")
10678 (V32QI "TARGET_AVX2") (V16HI "TARGET_AVX2")
10679 (V16SI "TARGET_AVX512F") (V8DI "TARGET_AVX512F")
10680 (V16SF "TARGET_AVX512F") (V8DF "TARGET_AVX512F")
10681 (V32HI "TARGET_AVX512BW")])
10682
10683 (define_expand "vec_perm_const<mode>"
10684 [(match_operand:VEC_PERM_CONST 0 "register_operand")
10685 (match_operand:VEC_PERM_CONST 1 "register_operand")
10686 (match_operand:VEC_PERM_CONST 2 "register_operand")
10687 (match_operand:<sseintvecmode> 3)]
10688 ""
10689 {
10690 if (ix86_expand_vec_perm_const (operands))
10691 DONE;
10692 else
10693 FAIL;
10694 })
10695
10696 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
10697 ;;
10698 ;; Parallel bitwise logical operations
10699 ;;
10700 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
10701
10702 (define_expand "one_cmpl<mode>2"
10703 [(set (match_operand:VI 0 "register_operand")
10704 (xor:VI (match_operand:VI 1 "nonimmediate_operand")
10705 (match_dup 2)))]
10706 "TARGET_SSE"
10707 {
10708 int i, n = GET_MODE_NUNITS (<MODE>mode);
10709 rtvec v = rtvec_alloc (n);
10710
10711 for (i = 0; i < n; ++i)
10712 RTVEC_ELT (v, i) = constm1_rtx;
10713
10714 operands[2] = force_reg (<MODE>mode, gen_rtx_CONST_VECTOR (<MODE>mode, v));
10715 })
10716
10717 (define_expand "<sse2_avx2>_andnot<mode>3"
10718 [(set (match_operand:VI_AVX2 0 "register_operand")
10719 (and:VI_AVX2
10720 (not:VI_AVX2 (match_operand:VI_AVX2 1 "register_operand"))
10721 (match_operand:VI_AVX2 2 "nonimmediate_operand")))]
10722 "TARGET_SSE2")
10723
10724 (define_expand "<sse2_avx2>_andnot<mode>3_mask"
10725 [(set (match_operand:VI48_AVX512VL 0 "register_operand")
10726 (vec_merge:VI48_AVX512VL
10727 (and:VI48_AVX512VL
10728 (not:VI48_AVX512VL
10729 (match_operand:VI48_AVX512VL 1 "register_operand"))
10730 (match_operand:VI48_AVX512VL 2 "nonimmediate_operand"))
10731 (match_operand:VI48_AVX512VL 3 "vector_move_operand")
10732 (match_operand:<avx512fmaskmode> 4 "register_operand")))]
10733 "TARGET_AVX512F")
10734
10735 (define_expand "<sse2_avx2>_andnot<mode>3_mask"
10736 [(set (match_operand:VI12_AVX512VL 0 "register_operand")
10737 (vec_merge:VI12_AVX512VL
10738 (and:VI12_AVX512VL
10739 (not:VI12_AVX512VL
10740 (match_operand:VI12_AVX512VL 1 "register_operand"))
10741 (match_operand:VI12_AVX512VL 2 "nonimmediate_operand"))
10742 (match_operand:VI12_AVX512VL 3 "vector_move_operand")
10743 (match_operand:<avx512fmaskmode> 4 "register_operand")))]
10744 "TARGET_AVX512BW")
10745
10746 (define_insn "*andnot<mode>3"
10747 [(set (match_operand:VI 0 "register_operand" "=x,v")
10748 (and:VI
10749 (not:VI (match_operand:VI 1 "register_operand" "0,v"))
10750 (match_operand:VI 2 "nonimmediate_operand" "xm,vm")))]
10751 "TARGET_SSE"
10752 {
10753 static char buf[64];
10754 const char *ops;
10755 const char *tmp;
10756
10757 switch (get_attr_mode (insn))
10758 {
10759 case MODE_XI:
10760 gcc_assert (TARGET_AVX512F);
10761 case MODE_OI:
10762 gcc_assert (TARGET_AVX2 || TARGET_AVX512VL);
10763 case MODE_TI:
10764 gcc_assert (TARGET_SSE2 || TARGET_AVX512VL);
10765 switch (<MODE>mode)
10766 {
10767 case V16SImode:
10768 case V8DImode:
10769 if (TARGET_AVX512F)
10770 {
10771 tmp = "pandn<ssemodesuffix>";
10772 break;
10773 }
10774 case V8SImode:
10775 case V4DImode:
10776 case V4SImode:
10777 case V2DImode:
10778 if (TARGET_AVX512VL)
10779 {
10780 tmp = "pandn<ssemodesuffix>";
10781 break;
10782 }
10783 default:
10784 tmp = TARGET_AVX512VL ? "pandnq" : "pandn";
10785 }
10786 break;
10787
10788 case MODE_V16SF:
10789 gcc_assert (TARGET_AVX512F);
10790 case MODE_V8SF:
10791 gcc_assert (TARGET_AVX);
10792 case MODE_V4SF:
10793 gcc_assert (TARGET_SSE);
10794
10795 tmp = "andnps";
10796 break;
10797
10798 default:
10799 gcc_unreachable ();
10800 }
10801
10802 switch (which_alternative)
10803 {
10804 case 0:
10805 ops = "%s\t{%%2, %%0|%%0, %%2}";
10806 break;
10807 case 1:
10808 ops = "v%s\t{%%2, %%1, %%0<mask_operand3_1>|%%0<mask_operand3_1>, %%1, %%2}";
10809 break;
10810 default:
10811 gcc_unreachable ();
10812 }
10813
10814 snprintf (buf, sizeof (buf), ops, tmp);
10815 return buf;
10816 }
10817 [(set_attr "isa" "noavx,avx")
10818 (set_attr "type" "sselog")
10819 (set (attr "prefix_data16")
10820 (if_then_else
10821 (and (eq_attr "alternative" "0")
10822 (eq_attr "mode" "TI"))
10823 (const_string "1")
10824 (const_string "*")))
10825 (set_attr "prefix" "orig,vex")
10826 (set (attr "mode")
10827 (cond [(and (match_test "<MODE_SIZE> == 16")
10828 (match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL"))
10829 (const_string "<ssePSmode>")
10830 (match_test "TARGET_AVX2")
10831 (const_string "<sseinsnmode>")
10832 (match_test "TARGET_AVX")
10833 (if_then_else
10834 (match_test "<MODE_SIZE> > 16")
10835 (const_string "V8SF")
10836 (const_string "<sseinsnmode>"))
10837 (ior (not (match_test "TARGET_SSE2"))
10838 (match_test "optimize_function_for_size_p (cfun)"))
10839 (const_string "V4SF")
10840 ]
10841 (const_string "<sseinsnmode>")))])
10842
10843 (define_insn "*andnot<mode>3_mask"
10844 [(set (match_operand:VI48_AVX512VL 0 "register_operand" "=v")
10845 (vec_merge:VI48_AVX512VL
10846 (and:VI48_AVX512VL
10847 (not:VI48_AVX512VL
10848 (match_operand:VI48_AVX512VL 1 "register_operand" "v"))
10849 (match_operand:VI48_AVX512VL 2 "nonimmediate_operand" "vm"))
10850 (match_operand:VI48_AVX512VL 3 "vector_move_operand" "0C")
10851 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")))]
10852 "TARGET_AVX512F"
10853 "vpandn<ssemodesuffix>\t{%2, %1, %0%{%4%}%N3|%0%{%4%}%N3, %1, %2}";
10854 [(set_attr "type" "sselog")
10855 (set_attr "prefix" "evex")
10856 (set_attr "mode" "<sseinsnmode>")])
10857
10858 (define_insn "*andnot<mode>3_mask"
10859 [(set (match_operand:VI12_AVX512VL 0 "register_operand" "=v")
10860 (vec_merge:VI12_AVX512VL
10861 (and:VI12_AVX512VL
10862 (not:VI12_AVX512VL
10863 (match_operand:VI12_AVX512VL 1 "register_operand" "v"))
10864 (match_operand:VI12_AVX512VL 2 "nonimmediate_operand" "vm"))
10865 (match_operand:VI12_AVX512VL 3 "vector_move_operand" "0C")
10866 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")))]
10867 "TARGET_AVX512BW"
10868 "vpandn<ssemodesuffix>\t{%2, %1, %0%{%4%}%N3|%0%{%4%}%N3, %1, %2}";
10869 [(set_attr "type" "sselog")
10870 (set_attr "prefix" "evex")
10871 (set_attr "mode" "<sseinsnmode>")])
10872
10873 (define_expand "<code><mode>3"
10874 [(set (match_operand:VI 0 "register_operand")
10875 (any_logic:VI
10876 (match_operand:VI 1 "nonimmediate_or_const_vector_operand")
10877 (match_operand:VI 2 "nonimmediate_or_const_vector_operand")))]
10878 "TARGET_SSE"
10879 {
10880 ix86_expand_vector_logical_operator (<CODE>, <MODE>mode, operands);
10881 DONE;
10882 })
10883
10884 (define_insn "<mask_codefor><code><mode>3<mask_name>"
10885 [(set (match_operand:VI 0 "register_operand" "=x,v")
10886 (any_logic:VI
10887 (match_operand:VI 1 "nonimmediate_operand" "%0,v")
10888 (match_operand:VI 2 "nonimmediate_operand" "xm,vm")))]
10889 "TARGET_SSE && <mask_mode512bit_condition>
10890 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
10891 {
10892 static char buf[64];
10893 const char *ops;
10894 const char *tmp;
10895
10896 switch (get_attr_mode (insn))
10897 {
10898 case MODE_XI:
10899 gcc_assert (TARGET_AVX512F);
10900 case MODE_OI:
10901 gcc_assert (TARGET_AVX2 || TARGET_AVX512VL);
10902 case MODE_TI:
10903 gcc_assert (TARGET_SSE2 || TARGET_AVX512VL);
10904 switch (<MODE>mode)
10905 {
10906 case V16SImode:
10907 case V8DImode:
10908 if (TARGET_AVX512F)
10909 {
10910 tmp = "p<logic><ssemodesuffix>";
10911 break;
10912 }
10913 case V8SImode:
10914 case V4DImode:
10915 case V4SImode:
10916 case V2DImode:
10917 if (TARGET_AVX512VL)
10918 {
10919 tmp = "p<logic><ssemodesuffix>";
10920 break;
10921 }
10922 default:
10923 tmp = TARGET_AVX512VL ? "p<logic>q" : "p<logic>";
10924 }
10925 break;
10926
10927 case MODE_V16SF:
10928 gcc_assert (TARGET_AVX512F);
10929 case MODE_V8SF:
10930 gcc_assert (TARGET_AVX);
10931 case MODE_V4SF:
10932 gcc_assert (TARGET_SSE);
10933
10934 tmp = "<logic>ps";
10935 break;
10936
10937 default:
10938 gcc_unreachable ();
10939 }
10940
10941 switch (which_alternative)
10942 {
10943 case 0:
10944 ops = "%s\t{%%2, %%0|%%0, %%2}";
10945 break;
10946 case 1:
10947 ops = "v%s\t{%%2, %%1, %%0<mask_operand3_1>|%%0<mask_operand3_1>, %%1, %%2}";
10948 break;
10949 default:
10950 gcc_unreachable ();
10951 }
10952
10953 snprintf (buf, sizeof (buf), ops, tmp);
10954 return buf;
10955 }
10956 [(set_attr "isa" "noavx,avx")
10957 (set_attr "type" "sselog")
10958 (set (attr "prefix_data16")
10959 (if_then_else
10960 (and (eq_attr "alternative" "0")
10961 (eq_attr "mode" "TI"))
10962 (const_string "1")
10963 (const_string "*")))
10964 (set_attr "prefix" "<mask_prefix3>")
10965 (set (attr "mode")
10966 (cond [(and (match_test "<MODE_SIZE> == 16")
10967 (match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL"))
10968 (const_string "<ssePSmode>")
10969 (match_test "TARGET_AVX2")
10970 (const_string "<sseinsnmode>")
10971 (match_test "TARGET_AVX")
10972 (if_then_else
10973 (match_test "<MODE_SIZE> > 16")
10974 (const_string "V8SF")
10975 (const_string "<sseinsnmode>"))
10976 (ior (not (match_test "TARGET_SSE2"))
10977 (match_test "optimize_function_for_size_p (cfun)"))
10978 (const_string "V4SF")
10979 ]
10980 (const_string "<sseinsnmode>")))])
10981
10982 (define_insn "<avx512>_testm<mode>3<mask_scalar_merge_name>"
10983 [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=Yk")
10984 (unspec:<avx512fmaskmode>
10985 [(match_operand:VI12_AVX512VL 1 "register_operand" "v")
10986 (match_operand:VI12_AVX512VL 2 "nonimmediate_operand" "vm")]
10987 UNSPEC_TESTM))]
10988 "TARGET_AVX512BW"
10989 "vptestm<ssemodesuffix>\t{%2, %1, %0<mask_scalar_merge_operand3>|%0<mask_scalar_merge_operand3>, %1, %2}"
10990 [(set_attr "prefix" "evex")
10991 (set_attr "mode" "<sseinsnmode>")])
10992
10993 (define_insn "<avx512>_testm<mode>3<mask_scalar_merge_name>"
10994 [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=Yk")
10995 (unspec:<avx512fmaskmode>
10996 [(match_operand:VI48_AVX512VL 1 "register_operand" "v")
10997 (match_operand:VI48_AVX512VL 2 "nonimmediate_operand" "vm")]
10998 UNSPEC_TESTM))]
10999 "TARGET_AVX512F"
11000 "vptestm<ssemodesuffix>\t{%2, %1, %0<mask_scalar_merge_operand3>|%0<mask_scalar_merge_operand3>, %1, %2}"
11001 [(set_attr "prefix" "evex")
11002 (set_attr "mode" "<sseinsnmode>")])
11003
11004 (define_insn "<avx512>_testnm<mode>3<mask_scalar_merge_name>"
11005 [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=Yk")
11006 (unspec:<avx512fmaskmode>
11007 [(match_operand:VI12_AVX512VL 1 "register_operand" "v")
11008 (match_operand:VI12_AVX512VL 2 "nonimmediate_operand" "vm")]
11009 UNSPEC_TESTNM))]
11010 "TARGET_AVX512BW"
11011 "vptestnm<ssemodesuffix>\t{%2, %1, %0<mask_scalar_merge_operand3>|%0<mask_scalar_merge_operand3>, %1, %2}"
11012 [(set_attr "prefix" "evex")
11013 (set_attr "mode" "<sseinsnmode>")])
11014
11015 (define_insn "<avx512>_testnm<mode>3<mask_scalar_merge_name>"
11016 [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=Yk")
11017 (unspec:<avx512fmaskmode>
11018 [(match_operand:VI48_AVX512VL 1 "register_operand" "v")
11019 (match_operand:VI48_AVX512VL 2 "nonimmediate_operand" "vm")]
11020 UNSPEC_TESTNM))]
11021 "TARGET_AVX512F"
11022 "vptestnm<ssemodesuffix>\t{%2, %1, %0<mask_scalar_merge_operand3>|%0<mask_scalar_merge_operand3>, %1, %2}"
11023 [(set_attr "prefix" "evex")
11024 (set_attr "mode" "<sseinsnmode>")])
11025
11026 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
11027 ;;
11028 ;; Parallel integral element swizzling
11029 ;;
11030 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
11031
11032 (define_expand "vec_pack_trunc_<mode>"
11033 [(match_operand:<ssepackmode> 0 "register_operand")
11034 (match_operand:VI248_AVX2_8_AVX512F 1 "register_operand")
11035 (match_operand:VI248_AVX2_8_AVX512F 2 "register_operand")]
11036 "TARGET_SSE2"
11037 {
11038 rtx op1 = gen_lowpart (<ssepackmode>mode, operands[1]);
11039 rtx op2 = gen_lowpart (<ssepackmode>mode, operands[2]);
11040 ix86_expand_vec_extract_even_odd (operands[0], op1, op2, 0);
11041 DONE;
11042 })
11043
11044 (define_insn "<sse2_avx2>_packsswb<mask_name>"
11045 [(set (match_operand:VI1_AVX512 0 "register_operand" "=x,x")
11046 (vec_concat:VI1_AVX512
11047 (ss_truncate:<ssehalfvecmode>
11048 (match_operand:<sseunpackmode> 1 "register_operand" "0,v"))
11049 (ss_truncate:<ssehalfvecmode>
11050 (match_operand:<sseunpackmode> 2 "nonimmediate_operand" "xm,vm"))))]
11051 "TARGET_SSE2 && <mask_mode512bit_condition> && <mask_avx512bw_condition>"
11052 "@
11053 packsswb\t{%2, %0|%0, %2}
11054 vpacksswb\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
11055 [(set_attr "isa" "noavx,avx")
11056 (set_attr "type" "sselog")
11057 (set_attr "prefix_data16" "1,*")
11058 (set_attr "prefix" "orig,maybe_evex")
11059 (set_attr "mode" "<sseinsnmode>")])
11060
11061 (define_insn "<sse2_avx2>_packssdw<mask_name>"
11062 [(set (match_operand:VI2_AVX2 0 "register_operand" "=x,v")
11063 (vec_concat:VI2_AVX2
11064 (ss_truncate:<ssehalfvecmode>
11065 (match_operand:<sseunpackmode> 1 "register_operand" "0,v"))
11066 (ss_truncate:<ssehalfvecmode>
11067 (match_operand:<sseunpackmode> 2 "nonimmediate_operand" "xm,vm"))))]
11068 "TARGET_SSE2 && <mask_mode512bit_condition> && <mask_avx512bw_condition>"
11069 "@
11070 packssdw\t{%2, %0|%0, %2}
11071 vpackssdw\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
11072 [(set_attr "isa" "noavx,avx")
11073 (set_attr "type" "sselog")
11074 (set_attr "prefix_data16" "1,*")
11075 (set_attr "prefix" "orig,vex")
11076 (set_attr "mode" "<sseinsnmode>")])
11077
11078 (define_insn "<sse2_avx2>_packuswb<mask_name>"
11079 [(set (match_operand:VI1_AVX512 0 "register_operand" "=x,x")
11080 (vec_concat:VI1_AVX512
11081 (us_truncate:<ssehalfvecmode>
11082 (match_operand:<sseunpackmode> 1 "register_operand" "0,v"))
11083 (us_truncate:<ssehalfvecmode>
11084 (match_operand:<sseunpackmode> 2 "nonimmediate_operand" "xm,vm"))))]
11085 "TARGET_SSE2 && <mask_mode512bit_condition> && <mask_avx512bw_condition>"
11086 "@
11087 packuswb\t{%2, %0|%0, %2}
11088 vpackuswb\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
11089 [(set_attr "isa" "noavx,avx")
11090 (set_attr "type" "sselog")
11091 (set_attr "prefix_data16" "1,*")
11092 (set_attr "prefix" "orig,vex")
11093 (set_attr "mode" "<sseinsnmode>")])
11094
11095 (define_insn "avx512bw_interleave_highv64qi<mask_name>"
11096 [(set (match_operand:V64QI 0 "register_operand" "=v")
11097 (vec_select:V64QI
11098 (vec_concat:V128QI
11099 (match_operand:V64QI 1 "register_operand" "v")
11100 (match_operand:V64QI 2 "nonimmediate_operand" "vm"))
11101 (parallel [(const_int 8) (const_int 72)
11102 (const_int 9) (const_int 73)
11103 (const_int 10) (const_int 74)
11104 (const_int 11) (const_int 75)
11105 (const_int 12) (const_int 76)
11106 (const_int 13) (const_int 77)
11107 (const_int 14) (const_int 78)
11108 (const_int 15) (const_int 79)
11109 (const_int 24) (const_int 88)
11110 (const_int 25) (const_int 89)
11111 (const_int 26) (const_int 90)
11112 (const_int 27) (const_int 91)
11113 (const_int 28) (const_int 92)
11114 (const_int 29) (const_int 93)
11115 (const_int 30) (const_int 94)
11116 (const_int 31) (const_int 95)
11117 (const_int 40) (const_int 104)
11118 (const_int 41) (const_int 105)
11119 (const_int 42) (const_int 106)
11120 (const_int 43) (const_int 107)
11121 (const_int 44) (const_int 108)
11122 (const_int 45) (const_int 109)
11123 (const_int 46) (const_int 110)
11124 (const_int 47) (const_int 111)
11125 (const_int 56) (const_int 120)
11126 (const_int 57) (const_int 121)
11127 (const_int 58) (const_int 122)
11128 (const_int 59) (const_int 123)
11129 (const_int 60) (const_int 124)
11130 (const_int 61) (const_int 125)
11131 (const_int 62) (const_int 126)
11132 (const_int 63) (const_int 127)])))]
11133 "TARGET_AVX512BW"
11134 "vpunpckhbw\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
11135 [(set_attr "type" "sselog")
11136 (set_attr "prefix" "evex")
11137 (set_attr "mode" "XI")])
11138
11139 (define_insn "avx2_interleave_highv32qi<mask_name>"
11140 [(set (match_operand:V32QI 0 "register_operand" "=v")
11141 (vec_select:V32QI
11142 (vec_concat:V64QI
11143 (match_operand:V32QI 1 "register_operand" "v")
11144 (match_operand:V32QI 2 "nonimmediate_operand" "vm"))
11145 (parallel [(const_int 8) (const_int 40)
11146 (const_int 9) (const_int 41)
11147 (const_int 10) (const_int 42)
11148 (const_int 11) (const_int 43)
11149 (const_int 12) (const_int 44)
11150 (const_int 13) (const_int 45)
11151 (const_int 14) (const_int 46)
11152 (const_int 15) (const_int 47)
11153 (const_int 24) (const_int 56)
11154 (const_int 25) (const_int 57)
11155 (const_int 26) (const_int 58)
11156 (const_int 27) (const_int 59)
11157 (const_int 28) (const_int 60)
11158 (const_int 29) (const_int 61)
11159 (const_int 30) (const_int 62)
11160 (const_int 31) (const_int 63)])))]
11161 "TARGET_AVX2 && <mask_avx512vl_condition>"
11162 "vpunpckhbw\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
11163 [(set_attr "type" "sselog")
11164 (set_attr "prefix" "<mask_prefix>")
11165 (set_attr "mode" "OI")])
11166
11167 (define_insn "vec_interleave_highv16qi<mask_name>"
11168 [(set (match_operand:V16QI 0 "register_operand" "=x,v")
11169 (vec_select:V16QI
11170 (vec_concat:V32QI
11171 (match_operand:V16QI 1 "register_operand" "0,v")
11172 (match_operand:V16QI 2 "nonimmediate_operand" "xm,vm"))
11173 (parallel [(const_int 8) (const_int 24)
11174 (const_int 9) (const_int 25)
11175 (const_int 10) (const_int 26)
11176 (const_int 11) (const_int 27)
11177 (const_int 12) (const_int 28)
11178 (const_int 13) (const_int 29)
11179 (const_int 14) (const_int 30)
11180 (const_int 15) (const_int 31)])))]
11181 "TARGET_SSE2 && <mask_avx512vl_condition>"
11182 "@
11183 punpckhbw\t{%2, %0|%0, %2}
11184 vpunpckhbw\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
11185 [(set_attr "isa" "noavx,avx")
11186 (set_attr "type" "sselog")
11187 (set_attr "prefix_data16" "1,*")
11188 (set_attr "prefix" "orig,<mask_prefix>")
11189 (set_attr "mode" "TI")])
11190
11191 (define_insn "avx512bw_interleave_lowv64qi<mask_name>"
11192 [(set (match_operand:V64QI 0 "register_operand" "=v")
11193 (vec_select:V64QI
11194 (vec_concat:V128QI
11195 (match_operand:V64QI 1 "register_operand" "v")
11196 (match_operand:V64QI 2 "nonimmediate_operand" "vm"))
11197 (parallel [(const_int 0) (const_int 64)
11198 (const_int 1) (const_int 65)
11199 (const_int 2) (const_int 66)
11200 (const_int 3) (const_int 67)
11201 (const_int 4) (const_int 68)
11202 (const_int 5) (const_int 69)
11203 (const_int 6) (const_int 70)
11204 (const_int 7) (const_int 71)
11205 (const_int 16) (const_int 80)
11206 (const_int 17) (const_int 81)
11207 (const_int 18) (const_int 82)
11208 (const_int 19) (const_int 83)
11209 (const_int 20) (const_int 84)
11210 (const_int 21) (const_int 85)
11211 (const_int 22) (const_int 86)
11212 (const_int 23) (const_int 87)
11213 (const_int 32) (const_int 96)
11214 (const_int 33) (const_int 97)
11215 (const_int 34) (const_int 98)
11216 (const_int 35) (const_int 99)
11217 (const_int 36) (const_int 100)
11218 (const_int 37) (const_int 101)
11219 (const_int 38) (const_int 102)
11220 (const_int 39) (const_int 103)
11221 (const_int 48) (const_int 112)
11222 (const_int 49) (const_int 113)
11223 (const_int 50) (const_int 114)
11224 (const_int 51) (const_int 115)
11225 (const_int 52) (const_int 116)
11226 (const_int 53) (const_int 117)
11227 (const_int 54) (const_int 118)
11228 (const_int 55) (const_int 119)])))]
11229 "TARGET_AVX512BW"
11230 "vpunpcklbw\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
11231 [(set_attr "type" "sselog")
11232 (set_attr "prefix" "evex")
11233 (set_attr "mode" "XI")])
11234
11235 (define_insn "avx2_interleave_lowv32qi<mask_name>"
11236 [(set (match_operand:V32QI 0 "register_operand" "=v")
11237 (vec_select:V32QI
11238 (vec_concat:V64QI
11239 (match_operand:V32QI 1 "register_operand" "v")
11240 (match_operand:V32QI 2 "nonimmediate_operand" "vm"))
11241 (parallel [(const_int 0) (const_int 32)
11242 (const_int 1) (const_int 33)
11243 (const_int 2) (const_int 34)
11244 (const_int 3) (const_int 35)
11245 (const_int 4) (const_int 36)
11246 (const_int 5) (const_int 37)
11247 (const_int 6) (const_int 38)
11248 (const_int 7) (const_int 39)
11249 (const_int 16) (const_int 48)
11250 (const_int 17) (const_int 49)
11251 (const_int 18) (const_int 50)
11252 (const_int 19) (const_int 51)
11253 (const_int 20) (const_int 52)
11254 (const_int 21) (const_int 53)
11255 (const_int 22) (const_int 54)
11256 (const_int 23) (const_int 55)])))]
11257 "TARGET_AVX2 && <mask_avx512vl_condition> && <mask_avx512bw_condition>"
11258 "vpunpcklbw\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
11259 [(set_attr "type" "sselog")
11260 (set_attr "prefix" "maybe_vex")
11261 (set_attr "mode" "OI")])
11262
11263 (define_insn "vec_interleave_lowv16qi<mask_name>"
11264 [(set (match_operand:V16QI 0 "register_operand" "=x,v")
11265 (vec_select:V16QI
11266 (vec_concat:V32QI
11267 (match_operand:V16QI 1 "register_operand" "0,v")
11268 (match_operand:V16QI 2 "nonimmediate_operand" "xm,vm"))
11269 (parallel [(const_int 0) (const_int 16)
11270 (const_int 1) (const_int 17)
11271 (const_int 2) (const_int 18)
11272 (const_int 3) (const_int 19)
11273 (const_int 4) (const_int 20)
11274 (const_int 5) (const_int 21)
11275 (const_int 6) (const_int 22)
11276 (const_int 7) (const_int 23)])))]
11277 "TARGET_SSE2 && <mask_avx512vl_condition> && <mask_avx512bw_condition>"
11278 "@
11279 punpcklbw\t{%2, %0|%0, %2}
11280 vpunpcklbw\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
11281 [(set_attr "isa" "noavx,avx")
11282 (set_attr "type" "sselog")
11283 (set_attr "prefix_data16" "1,*")
11284 (set_attr "prefix" "orig,vex")
11285 (set_attr "mode" "TI")])
11286
11287 (define_insn "avx512bw_interleave_highv32hi<mask_name>"
11288 [(set (match_operand:V32HI 0 "register_operand" "=v")
11289 (vec_select:V32HI
11290 (vec_concat:V64HI
11291 (match_operand:V32HI 1 "register_operand" "v")
11292 (match_operand:V32HI 2 "nonimmediate_operand" "vm"))
11293 (parallel [(const_int 4) (const_int 36)
11294 (const_int 5) (const_int 37)
11295 (const_int 6) (const_int 38)
11296 (const_int 7) (const_int 39)
11297 (const_int 12) (const_int 44)
11298 (const_int 13) (const_int 45)
11299 (const_int 14) (const_int 46)
11300 (const_int 15) (const_int 47)
11301 (const_int 20) (const_int 52)
11302 (const_int 21) (const_int 53)
11303 (const_int 22) (const_int 54)
11304 (const_int 23) (const_int 55)
11305 (const_int 28) (const_int 60)
11306 (const_int 29) (const_int 61)
11307 (const_int 30) (const_int 62)
11308 (const_int 31) (const_int 63)])))]
11309 "TARGET_AVX512BW"
11310 "vpunpckhwd\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
11311 [(set_attr "type" "sselog")
11312 (set_attr "prefix" "evex")
11313 (set_attr "mode" "XI")])
11314
11315 (define_insn "avx2_interleave_highv16hi<mask_name>"
11316 [(set (match_operand:V16HI 0 "register_operand" "=v")
11317 (vec_select:V16HI
11318 (vec_concat:V32HI
11319 (match_operand:V16HI 1 "register_operand" "v")
11320 (match_operand:V16HI 2 "nonimmediate_operand" "vm"))
11321 (parallel [(const_int 4) (const_int 20)
11322 (const_int 5) (const_int 21)
11323 (const_int 6) (const_int 22)
11324 (const_int 7) (const_int 23)
11325 (const_int 12) (const_int 28)
11326 (const_int 13) (const_int 29)
11327 (const_int 14) (const_int 30)
11328 (const_int 15) (const_int 31)])))]
11329 "TARGET_AVX2 && <mask_avx512vl_condition> && <mask_avx512bw_condition>"
11330 "vpunpckhwd\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
11331 [(set_attr "type" "sselog")
11332 (set_attr "prefix" "maybe_evex")
11333 (set_attr "mode" "OI")])
11334
11335 (define_insn "vec_interleave_highv8hi<mask_name>"
11336 [(set (match_operand:V8HI 0 "register_operand" "=x,v")
11337 (vec_select:V8HI
11338 (vec_concat:V16HI
11339 (match_operand:V8HI 1 "register_operand" "0,v")
11340 (match_operand:V8HI 2 "nonimmediate_operand" "xm,vm"))
11341 (parallel [(const_int 4) (const_int 12)
11342 (const_int 5) (const_int 13)
11343 (const_int 6) (const_int 14)
11344 (const_int 7) (const_int 15)])))]
11345 "TARGET_SSE2 && <mask_avx512vl_condition> && <mask_avx512bw_condition>"
11346 "@
11347 punpckhwd\t{%2, %0|%0, %2}
11348 vpunpckhwd\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
11349 [(set_attr "isa" "noavx,avx")
11350 (set_attr "type" "sselog")
11351 (set_attr "prefix_data16" "1,*")
11352 (set_attr "prefix" "orig,maybe_vex")
11353 (set_attr "mode" "TI")])
11354
11355 (define_insn "<mask_codefor>avx512bw_interleave_lowv32hi<mask_name>"
11356 [(set (match_operand:V32HI 0 "register_operand" "=v")
11357 (vec_select:V32HI
11358 (vec_concat:V64HI
11359 (match_operand:V32HI 1 "register_operand" "v")
11360 (match_operand:V32HI 2 "nonimmediate_operand" "vm"))
11361 (parallel [(const_int 0) (const_int 32)
11362 (const_int 1) (const_int 33)
11363 (const_int 2) (const_int 34)
11364 (const_int 3) (const_int 35)
11365 (const_int 8) (const_int 40)
11366 (const_int 9) (const_int 41)
11367 (const_int 10) (const_int 42)
11368 (const_int 11) (const_int 43)
11369 (const_int 16) (const_int 48)
11370 (const_int 17) (const_int 49)
11371 (const_int 18) (const_int 50)
11372 (const_int 19) (const_int 51)
11373 (const_int 24) (const_int 56)
11374 (const_int 25) (const_int 57)
11375 (const_int 26) (const_int 58)
11376 (const_int 27) (const_int 59)])))]
11377 "TARGET_AVX512BW"
11378 "vpunpcklwd\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
11379 [(set_attr "type" "sselog")
11380 (set_attr "prefix" "evex")
11381 (set_attr "mode" "XI")])
11382
11383 (define_insn "avx2_interleave_lowv16hi<mask_name>"
11384 [(set (match_operand:V16HI 0 "register_operand" "=v")
11385 (vec_select:V16HI
11386 (vec_concat:V32HI
11387 (match_operand:V16HI 1 "register_operand" "v")
11388 (match_operand:V16HI 2 "nonimmediate_operand" "vm"))
11389 (parallel [(const_int 0) (const_int 16)
11390 (const_int 1) (const_int 17)
11391 (const_int 2) (const_int 18)
11392 (const_int 3) (const_int 19)
11393 (const_int 8) (const_int 24)
11394 (const_int 9) (const_int 25)
11395 (const_int 10) (const_int 26)
11396 (const_int 11) (const_int 27)])))]
11397 "TARGET_AVX2 && <mask_avx512vl_condition> && <mask_avx512bw_condition>"
11398 "vpunpcklwd\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
11399 [(set_attr "type" "sselog")
11400 (set_attr "prefix" "maybe_evex")
11401 (set_attr "mode" "OI")])
11402
11403 (define_insn "vec_interleave_lowv8hi<mask_name>"
11404 [(set (match_operand:V8HI 0 "register_operand" "=x,v")
11405 (vec_select:V8HI
11406 (vec_concat:V16HI
11407 (match_operand:V8HI 1 "register_operand" "0,v")
11408 (match_operand:V8HI 2 "nonimmediate_operand" "xm,vm"))
11409 (parallel [(const_int 0) (const_int 8)
11410 (const_int 1) (const_int 9)
11411 (const_int 2) (const_int 10)
11412 (const_int 3) (const_int 11)])))]
11413 "TARGET_SSE2 && <mask_avx512vl_condition> && <mask_avx512bw_condition>"
11414 "@
11415 punpcklwd\t{%2, %0|%0, %2}
11416 vpunpcklwd\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
11417 [(set_attr "isa" "noavx,avx")
11418 (set_attr "type" "sselog")
11419 (set_attr "prefix_data16" "1,*")
11420 (set_attr "prefix" "orig,maybe_evex")
11421 (set_attr "mode" "TI")])
11422
11423 (define_insn "avx2_interleave_highv8si<mask_name>"
11424 [(set (match_operand:V8SI 0 "register_operand" "=v")
11425 (vec_select:V8SI
11426 (vec_concat:V16SI
11427 (match_operand:V8SI 1 "register_operand" "v")
11428 (match_operand:V8SI 2 "nonimmediate_operand" "vm"))
11429 (parallel [(const_int 2) (const_int 10)
11430 (const_int 3) (const_int 11)
11431 (const_int 6) (const_int 14)
11432 (const_int 7) (const_int 15)])))]
11433 "TARGET_AVX2 && <mask_avx512vl_condition>"
11434 "vpunpckhdq\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
11435 [(set_attr "type" "sselog")
11436 (set_attr "prefix" "maybe_evex")
11437 (set_attr "mode" "OI")])
11438
11439 (define_insn "<mask_codefor>avx512f_interleave_highv16si<mask_name>"
11440 [(set (match_operand:V16SI 0 "register_operand" "=v")
11441 (vec_select:V16SI
11442 (vec_concat:V32SI
11443 (match_operand:V16SI 1 "register_operand" "v")
11444 (match_operand:V16SI 2 "nonimmediate_operand" "vm"))
11445 (parallel [(const_int 2) (const_int 18)
11446 (const_int 3) (const_int 19)
11447 (const_int 6) (const_int 22)
11448 (const_int 7) (const_int 23)
11449 (const_int 10) (const_int 26)
11450 (const_int 11) (const_int 27)
11451 (const_int 14) (const_int 30)
11452 (const_int 15) (const_int 31)])))]
11453 "TARGET_AVX512F"
11454 "vpunpckhdq\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
11455 [(set_attr "type" "sselog")
11456 (set_attr "prefix" "evex")
11457 (set_attr "mode" "XI")])
11458
11459
11460 (define_insn "vec_interleave_highv4si<mask_name>"
11461 [(set (match_operand:V4SI 0 "register_operand" "=x,v")
11462 (vec_select:V4SI
11463 (vec_concat:V8SI
11464 (match_operand:V4SI 1 "register_operand" "0,v")
11465 (match_operand:V4SI 2 "nonimmediate_operand" "xm,vm"))
11466 (parallel [(const_int 2) (const_int 6)
11467 (const_int 3) (const_int 7)])))]
11468 "TARGET_SSE2 && <mask_avx512vl_condition>"
11469 "@
11470 punpckhdq\t{%2, %0|%0, %2}
11471 vpunpckhdq\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
11472 [(set_attr "isa" "noavx,avx")
11473 (set_attr "type" "sselog")
11474 (set_attr "prefix_data16" "1,*")
11475 (set_attr "prefix" "orig,maybe_vex")
11476 (set_attr "mode" "TI")])
11477
11478 (define_insn "avx2_interleave_lowv8si<mask_name>"
11479 [(set (match_operand:V8SI 0 "register_operand" "=v")
11480 (vec_select:V8SI
11481 (vec_concat:V16SI
11482 (match_operand:V8SI 1 "register_operand" "v")
11483 (match_operand:V8SI 2 "nonimmediate_operand" "vm"))
11484 (parallel [(const_int 0) (const_int 8)
11485 (const_int 1) (const_int 9)
11486 (const_int 4) (const_int 12)
11487 (const_int 5) (const_int 13)])))]
11488 "TARGET_AVX2 && <mask_avx512vl_condition>"
11489 "vpunpckldq\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
11490 [(set_attr "type" "sselog")
11491 (set_attr "prefix" "maybe_evex")
11492 (set_attr "mode" "OI")])
11493
11494 (define_insn "<mask_codefor>avx512f_interleave_lowv16si<mask_name>"
11495 [(set (match_operand:V16SI 0 "register_operand" "=v")
11496 (vec_select:V16SI
11497 (vec_concat:V32SI
11498 (match_operand:V16SI 1 "register_operand" "v")
11499 (match_operand:V16SI 2 "nonimmediate_operand" "vm"))
11500 (parallel [(const_int 0) (const_int 16)
11501 (const_int 1) (const_int 17)
11502 (const_int 4) (const_int 20)
11503 (const_int 5) (const_int 21)
11504 (const_int 8) (const_int 24)
11505 (const_int 9) (const_int 25)
11506 (const_int 12) (const_int 28)
11507 (const_int 13) (const_int 29)])))]
11508 "TARGET_AVX512F"
11509 "vpunpckldq\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
11510 [(set_attr "type" "sselog")
11511 (set_attr "prefix" "evex")
11512 (set_attr "mode" "XI")])
11513
11514 (define_insn "vec_interleave_lowv4si<mask_name>"
11515 [(set (match_operand:V4SI 0 "register_operand" "=x,v")
11516 (vec_select:V4SI
11517 (vec_concat:V8SI
11518 (match_operand:V4SI 1 "register_operand" "0,v")
11519 (match_operand:V4SI 2 "nonimmediate_operand" "xm,vm"))
11520 (parallel [(const_int 0) (const_int 4)
11521 (const_int 1) (const_int 5)])))]
11522 "TARGET_SSE2 && <mask_avx512vl_condition>"
11523 "@
11524 punpckldq\t{%2, %0|%0, %2}
11525 vpunpckldq\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
11526 [(set_attr "isa" "noavx,avx")
11527 (set_attr "type" "sselog")
11528 (set_attr "prefix_data16" "1,*")
11529 (set_attr "prefix" "orig,vex")
11530 (set_attr "mode" "TI")])
11531
11532 (define_expand "vec_interleave_high<mode>"
11533 [(match_operand:VI_256 0 "register_operand" "=x")
11534 (match_operand:VI_256 1 "register_operand" "x")
11535 (match_operand:VI_256 2 "nonimmediate_operand" "xm")]
11536 "TARGET_AVX2"
11537 {
11538 rtx t1 = gen_reg_rtx (<MODE>mode);
11539 rtx t2 = gen_reg_rtx (<MODE>mode);
11540 rtx t3 = gen_reg_rtx (V4DImode);
11541 emit_insn (gen_avx2_interleave_low<mode> (t1, operands[1], operands[2]));
11542 emit_insn (gen_avx2_interleave_high<mode> (t2, operands[1], operands[2]));
11543 emit_insn (gen_avx2_permv2ti (t3, gen_lowpart (V4DImode, t1),
11544 gen_lowpart (V4DImode, t2),
11545 GEN_INT (1 + (3 << 4))));
11546 emit_move_insn (operands[0], gen_lowpart (<MODE>mode, t3));
11547 DONE;
11548 })
11549
11550 (define_expand "vec_interleave_low<mode>"
11551 [(match_operand:VI_256 0 "register_operand" "=x")
11552 (match_operand:VI_256 1 "register_operand" "x")
11553 (match_operand:VI_256 2 "nonimmediate_operand" "xm")]
11554 "TARGET_AVX2"
11555 {
11556 rtx t1 = gen_reg_rtx (<MODE>mode);
11557 rtx t2 = gen_reg_rtx (<MODE>mode);
11558 rtx t3 = gen_reg_rtx (V4DImode);
11559 emit_insn (gen_avx2_interleave_low<mode> (t1, operands[1], operands[2]));
11560 emit_insn (gen_avx2_interleave_high<mode> (t2, operands[1], operands[2]));
11561 emit_insn (gen_avx2_permv2ti (t3, gen_lowpart (V4DImode, t1),
11562 gen_lowpart (V4DImode, t2),
11563 GEN_INT (0 + (2 << 4))));
11564 emit_move_insn (operands[0], gen_lowpart (<MODE>mode, t3));
11565 DONE;
11566 })
11567
11568 ;; Modes handled by pinsr patterns.
11569 (define_mode_iterator PINSR_MODE
11570 [(V16QI "TARGET_SSE4_1") V8HI
11571 (V4SI "TARGET_SSE4_1")
11572 (V2DI "TARGET_SSE4_1 && TARGET_64BIT")])
11573
11574 (define_mode_attr sse2p4_1
11575 [(V16QI "sse4_1") (V8HI "sse2")
11576 (V4SI "sse4_1") (V2DI "sse4_1")])
11577
11578 ;; sse4_1_pinsrd must come before sse2_loadld since it is preferred.
11579 (define_insn "<sse2p4_1>_pinsr<ssemodesuffix>"
11580 [(set (match_operand:PINSR_MODE 0 "register_operand" "=x,x,x,x")
11581 (vec_merge:PINSR_MODE
11582 (vec_duplicate:PINSR_MODE
11583 (match_operand:<ssescalarmode> 2 "nonimmediate_operand" "r,m,r,m"))
11584 (match_operand:PINSR_MODE 1 "register_operand" "0,0,x,x")
11585 (match_operand:SI 3 "const_int_operand")))]
11586 "TARGET_SSE2
11587 && ((unsigned) exact_log2 (INTVAL (operands[3]))
11588 < GET_MODE_NUNITS (<MODE>mode))"
11589 {
11590 operands[3] = GEN_INT (exact_log2 (INTVAL (operands[3])));
11591
11592 switch (which_alternative)
11593 {
11594 case 0:
11595 if (GET_MODE_SIZE (<ssescalarmode>mode) < GET_MODE_SIZE (SImode))
11596 return "pinsr<ssemodesuffix>\t{%3, %k2, %0|%0, %k2, %3}";
11597 /* FALLTHRU */
11598 case 1:
11599 return "pinsr<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}";
11600 case 2:
11601 if (GET_MODE_SIZE (<ssescalarmode>mode) < GET_MODE_SIZE (SImode))
11602 return "vpinsr<ssemodesuffix>\t{%3, %k2, %1, %0|%0, %1, %k2, %3}";
11603 /* FALLTHRU */
11604 case 3:
11605 return "vpinsr<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}";
11606 default:
11607 gcc_unreachable ();
11608 }
11609 }
11610 [(set_attr "isa" "noavx,noavx,avx,avx")
11611 (set_attr "type" "sselog")
11612 (set (attr "prefix_rex")
11613 (if_then_else
11614 (and (not (match_test "TARGET_AVX"))
11615 (eq (const_string "<MODE>mode") (const_string "V2DImode")))
11616 (const_string "1")
11617 (const_string "*")))
11618 (set (attr "prefix_data16")
11619 (if_then_else
11620 (and (not (match_test "TARGET_AVX"))
11621 (eq (const_string "<MODE>mode") (const_string "V8HImode")))
11622 (const_string "1")
11623 (const_string "*")))
11624 (set (attr "prefix_extra")
11625 (if_then_else
11626 (and (not (match_test "TARGET_AVX"))
11627 (eq (const_string "<MODE>mode") (const_string "V8HImode")))
11628 (const_string "*")
11629 (const_string "1")))
11630 (set_attr "length_immediate" "1")
11631 (set_attr "prefix" "orig,orig,vex,vex")
11632 (set_attr "mode" "TI")])
11633
11634 (define_expand "<extract_type>_vinsert<shuffletype><extract_suf>_mask"
11635 [(match_operand:AVX512_VEC 0 "register_operand")
11636 (match_operand:AVX512_VEC 1 "register_operand")
11637 (match_operand:<ssequartermode> 2 "nonimmediate_operand")
11638 (match_operand:SI 3 "const_0_to_3_operand")
11639 (match_operand:AVX512_VEC 4 "register_operand")
11640 (match_operand:<avx512fmaskmode> 5 "register_operand")]
11641 "TARGET_AVX512F"
11642 {
11643 int mask,selector;
11644 mask = INTVAL (operands[3]);
11645 selector = GET_MODE_SIZE (GET_MODE_INNER (<MODE>mode)) == 4 ?
11646 0xFFFF ^ (0xF000 >> mask * 4)
11647 : 0xFF ^ (0xC0 >> mask * 2);
11648 emit_insn (gen_<extract_type>_vinsert<shuffletype><extract_suf>_1_mask
11649 (operands[0], operands[1], operands[2], GEN_INT (selector),
11650 operands[4], operands[5]));
11651 DONE;
11652 })
11653
11654 (define_insn "<mask_codefor><extract_type>_vinsert<shuffletype><extract_suf>_1<mask_name>"
11655 [(set (match_operand:AVX512_VEC 0 "register_operand" "=v")
11656 (vec_merge:AVX512_VEC
11657 (match_operand:AVX512_VEC 1 "register_operand" "v")
11658 (vec_duplicate:AVX512_VEC
11659 (match_operand:<ssequartermode> 2 "nonimmediate_operand" "vm"))
11660 (match_operand:SI 3 "const_int_operand" "n")))]
11661 "TARGET_AVX512F"
11662 {
11663 int mask;
11664 int selector = INTVAL (operands[3]);
11665
11666 if (selector == 0xFFF || selector == 0x3F)
11667 mask = 0;
11668 else if ( selector == 0xF0FF || selector == 0xCF)
11669 mask = 1;
11670 else if ( selector == 0xFF0F || selector == 0xF3)
11671 mask = 2;
11672 else if ( selector == 0xFFF0 || selector == 0xFC)
11673 mask = 3;
11674 else
11675 gcc_unreachable ();
11676
11677 operands[3] = GEN_INT (mask);
11678
11679 return "vinsert<shuffletype><extract_suf>\t{%3, %2, %1, %0<mask_operand4>|%0<mask_operand4>, %1, %2, %3}";
11680 }
11681 [(set_attr "type" "sselog")
11682 (set_attr "length_immediate" "1")
11683 (set_attr "prefix" "evex")
11684 (set_attr "mode" "<sseinsnmode>")])
11685
11686 (define_expand "<extract_type_2>_vinsert<shuffletype><extract_suf_2>_mask"
11687 [(match_operand:AVX512_VEC_2 0 "register_operand")
11688 (match_operand:AVX512_VEC_2 1 "register_operand")
11689 (match_operand:<ssehalfvecmode> 2 "nonimmediate_operand")
11690 (match_operand:SI 3 "const_0_to_1_operand")
11691 (match_operand:AVX512_VEC_2 4 "register_operand")
11692 (match_operand:<avx512fmaskmode> 5 "register_operand")]
11693 "TARGET_AVX512F"
11694 {
11695 int mask = INTVAL (operands[3]);
11696 if (mask == 0)
11697 emit_insn (gen_vec_set_lo_<mode>_mask
11698 (operands[0], operands[1], operands[2],
11699 operands[4], operands[5]));
11700 else
11701 emit_insn (gen_vec_set_hi_<mode>_mask
11702 (operands[0], operands[1], operands[2],
11703 operands[4], operands[5]));
11704 DONE;
11705 })
11706
11707 (define_insn "vec_set_lo_<mode><mask_name>"
11708 [(set (match_operand:V16FI 0 "register_operand" "=v")
11709 (vec_concat:V16FI
11710 (match_operand:<ssehalfvecmode> 2 "nonimmediate_operand" "vm")
11711 (vec_select:<ssehalfvecmode>
11712 (match_operand:V16FI 1 "register_operand" "v")
11713 (parallel [(const_int 8) (const_int 9)
11714 (const_int 10) (const_int 11)
11715 (const_int 12) (const_int 13)
11716 (const_int 14) (const_int 15)]))))]
11717 "TARGET_AVX512DQ"
11718 "vinsert<shuffletype>32x8\t{$0x0, %2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2, $0x0}"
11719 [(set_attr "type" "sselog")
11720 (set_attr "length_immediate" "1")
11721 (set_attr "prefix" "evex")
11722 (set_attr "mode" "<sseinsnmode>")])
11723
11724 (define_insn "vec_set_hi_<mode><mask_name>"
11725 [(set (match_operand:V16FI 0 "register_operand" "=v")
11726 (vec_concat:V16FI
11727 (match_operand:<ssehalfvecmode> 2 "nonimmediate_operand" "vm")
11728 (vec_select:<ssehalfvecmode>
11729 (match_operand:V16FI 1 "register_operand" "v")
11730 (parallel [(const_int 0) (const_int 1)
11731 (const_int 2) (const_int 3)
11732 (const_int 4) (const_int 5)
11733 (const_int 6) (const_int 7)]))))]
11734 "TARGET_AVX512DQ"
11735 "vinsert<shuffletype>32x8\t{$0x1, %2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2, $0x1}"
11736 [(set_attr "type" "sselog")
11737 (set_attr "length_immediate" "1")
11738 (set_attr "prefix" "evex")
11739 (set_attr "mode" "<sseinsnmode>")])
11740
11741 (define_insn "vec_set_lo_<mode><mask_name>"
11742 [(set (match_operand:V8FI 0 "register_operand" "=v")
11743 (vec_concat:V8FI
11744 (match_operand:<ssehalfvecmode> 2 "nonimmediate_operand" "vm")
11745 (vec_select:<ssehalfvecmode>
11746 (match_operand:V8FI 1 "register_operand" "v")
11747 (parallel [(const_int 4) (const_int 5)
11748 (const_int 6) (const_int 7)]))))]
11749 "TARGET_AVX512F"
11750 "vinsert<shuffletype>64x4\t{$0x0, %2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2, $0x0}"
11751 [(set_attr "type" "sselog")
11752 (set_attr "length_immediate" "1")
11753 (set_attr "prefix" "evex")
11754 (set_attr "mode" "XI")])
11755
11756 (define_insn "vec_set_hi_<mode><mask_name>"
11757 [(set (match_operand:V8FI 0 "register_operand" "=v")
11758 (vec_concat:V8FI
11759 (match_operand:<ssehalfvecmode> 2 "nonimmediate_operand" "vm")
11760 (vec_select:<ssehalfvecmode>
11761 (match_operand:V8FI 1 "register_operand" "v")
11762 (parallel [(const_int 0) (const_int 1)
11763 (const_int 2) (const_int 3)]))))]
11764 "TARGET_AVX512F"
11765 "vinsert<shuffletype>64x4\t{$0x1, %2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2, $0x1}"
11766 [(set_attr "type" "sselog")
11767 (set_attr "length_immediate" "1")
11768 (set_attr "prefix" "evex")
11769 (set_attr "mode" "XI")])
11770
11771 (define_expand "avx512dq_shuf_<shuffletype>64x2_mask"
11772 [(match_operand:VI8F_256 0 "register_operand")
11773 (match_operand:VI8F_256 1 "register_operand")
11774 (match_operand:VI8F_256 2 "nonimmediate_operand")
11775 (match_operand:SI 3 "const_0_to_3_operand")
11776 (match_operand:VI8F_256 4 "register_operand")
11777 (match_operand:QI 5 "register_operand")]
11778 "TARGET_AVX512DQ"
11779 {
11780 int mask = INTVAL (operands[3]);
11781 emit_insn (gen_avx512dq_shuf_<shuffletype>64x2_1_mask
11782 (operands[0], operands[1], operands[2],
11783 GEN_INT (((mask >> 0) & 1) * 2 + 0),
11784 GEN_INT (((mask >> 0) & 1) * 2 + 1),
11785 GEN_INT (((mask >> 1) & 1) * 2 + 4),
11786 GEN_INT (((mask >> 1) & 1) * 2 + 5),
11787 operands[4], operands[5]));
11788 DONE;
11789 })
11790
11791 (define_insn "<mask_codefor>avx512dq_shuf_<shuffletype>64x2_1<mask_name>"
11792 [(set (match_operand:VI8F_256 0 "register_operand" "=v")
11793 (vec_select:VI8F_256
11794 (vec_concat:<ssedoublemode>
11795 (match_operand:VI8F_256 1 "register_operand" "v")
11796 (match_operand:VI8F_256 2 "nonimmediate_operand" "vm"))
11797 (parallel [(match_operand 3 "const_0_to_3_operand")
11798 (match_operand 4 "const_0_to_3_operand")
11799 (match_operand 5 "const_4_to_7_operand")
11800 (match_operand 6 "const_4_to_7_operand")])))]
11801 "TARGET_AVX512VL
11802 && (INTVAL (operands[3]) == (INTVAL (operands[4]) - 1)
11803 && INTVAL (operands[5]) == (INTVAL (operands[6]) - 1))"
11804 {
11805 int mask;
11806 mask = INTVAL (operands[3]) / 2;
11807 mask |= (INTVAL (operands[5]) - 4) / 2 << 1;
11808 operands[3] = GEN_INT (mask);
11809 return "vshuf<shuffletype>64x2\t{%3, %2, %1, %0<mask_operand7>|%0<mask_operand7>, %1, %2, %3}";
11810 }
11811 [(set_attr "type" "sselog")
11812 (set_attr "length_immediate" "1")
11813 (set_attr "prefix" "evex")
11814 (set_attr "mode" "XI")])
11815
11816 (define_expand "avx512f_shuf_<shuffletype>64x2_mask"
11817 [(match_operand:V8FI 0 "register_operand")
11818 (match_operand:V8FI 1 "register_operand")
11819 (match_operand:V8FI 2 "nonimmediate_operand")
11820 (match_operand:SI 3 "const_0_to_255_operand")
11821 (match_operand:V8FI 4 "register_operand")
11822 (match_operand:QI 5 "register_operand")]
11823 "TARGET_AVX512F"
11824 {
11825 int mask = INTVAL (operands[3]);
11826 emit_insn (gen_avx512f_shuf_<shuffletype>64x2_1_mask
11827 (operands[0], operands[1], operands[2],
11828 GEN_INT (((mask >> 0) & 3) * 2),
11829 GEN_INT (((mask >> 0) & 3) * 2 + 1),
11830 GEN_INT (((mask >> 2) & 3) * 2),
11831 GEN_INT (((mask >> 2) & 3) * 2 + 1),
11832 GEN_INT (((mask >> 4) & 3) * 2 + 8),
11833 GEN_INT (((mask >> 4) & 3) * 2 + 9),
11834 GEN_INT (((mask >> 6) & 3) * 2 + 8),
11835 GEN_INT (((mask >> 6) & 3) * 2 + 9),
11836 operands[4], operands[5]));
11837 DONE;
11838 })
11839
11840 (define_insn "avx512f_shuf_<shuffletype>64x2_1<mask_name>"
11841 [(set (match_operand:V8FI 0 "register_operand" "=v")
11842 (vec_select:V8FI
11843 (vec_concat:<ssedoublemode>
11844 (match_operand:V8FI 1 "register_operand" "v")
11845 (match_operand:V8FI 2 "nonimmediate_operand" "vm"))
11846 (parallel [(match_operand 3 "const_0_to_7_operand")
11847 (match_operand 4 "const_0_to_7_operand")
11848 (match_operand 5 "const_0_to_7_operand")
11849 (match_operand 6 "const_0_to_7_operand")
11850 (match_operand 7 "const_8_to_15_operand")
11851 (match_operand 8 "const_8_to_15_operand")
11852 (match_operand 9 "const_8_to_15_operand")
11853 (match_operand 10 "const_8_to_15_operand")])))]
11854 "TARGET_AVX512F
11855 && (INTVAL (operands[3]) == (INTVAL (operands[4]) - 1)
11856 && INTVAL (operands[5]) == (INTVAL (operands[6]) - 1)
11857 && INTVAL (operands[7]) == (INTVAL (operands[8]) - 1)
11858 && INTVAL (operands[9]) == (INTVAL (operands[10]) - 1))"
11859 {
11860 int mask;
11861 mask = INTVAL (operands[3]) / 2;
11862 mask |= INTVAL (operands[5]) / 2 << 2;
11863 mask |= (INTVAL (operands[7]) - 8) / 2 << 4;
11864 mask |= (INTVAL (operands[9]) - 8) / 2 << 6;
11865 operands[3] = GEN_INT (mask);
11866
11867 return "vshuf<shuffletype>64x2\t{%3, %2, %1, %0<mask_operand11>|%0<mask_operand11>, %1, %2, %3}";
11868 }
11869 [(set_attr "type" "sselog")
11870 (set_attr "length_immediate" "1")
11871 (set_attr "prefix" "evex")
11872 (set_attr "mode" "<sseinsnmode>")])
11873
11874 (define_expand "avx512vl_shuf_<shuffletype>32x4_mask"
11875 [(match_operand:VI4F_256 0 "register_operand")
11876 (match_operand:VI4F_256 1 "register_operand")
11877 (match_operand:VI4F_256 2 "nonimmediate_operand")
11878 (match_operand:SI 3 "const_0_to_3_operand")
11879 (match_operand:VI4F_256 4 "register_operand")
11880 (match_operand:QI 5 "register_operand")]
11881 "TARGET_AVX512VL"
11882 {
11883 int mask = INTVAL (operands[3]);
11884 emit_insn (gen_avx512vl_shuf_<shuffletype>32x4_1_mask
11885 (operands[0], operands[1], operands[2],
11886 GEN_INT (((mask >> 0) & 1) * 4 + 0),
11887 GEN_INT (((mask >> 0) & 1) * 4 + 1),
11888 GEN_INT (((mask >> 0) & 1) * 4 + 2),
11889 GEN_INT (((mask >> 0) & 1) * 4 + 3),
11890 GEN_INT (((mask >> 1) & 1) * 4 + 8),
11891 GEN_INT (((mask >> 1) & 1) * 4 + 9),
11892 GEN_INT (((mask >> 1) & 1) * 4 + 10),
11893 GEN_INT (((mask >> 1) & 1) * 4 + 11),
11894 operands[4], operands[5]));
11895 DONE;
11896 })
11897
11898 (define_insn "<mask_codefor>avx512vl_shuf_<shuffletype>32x4_1<mask_name>"
11899 [(set (match_operand:VI4F_256 0 "register_operand" "=v")
11900 (vec_select:VI4F_256
11901 (vec_concat:<ssedoublemode>
11902 (match_operand:VI4F_256 1 "register_operand" "v")
11903 (match_operand:VI4F_256 2 "nonimmediate_operand" "vm"))
11904 (parallel [(match_operand 3 "const_0_to_7_operand")
11905 (match_operand 4 "const_0_to_7_operand")
11906 (match_operand 5 "const_0_to_7_operand")
11907 (match_operand 6 "const_0_to_7_operand")
11908 (match_operand 7 "const_8_to_15_operand")
11909 (match_operand 8 "const_8_to_15_operand")
11910 (match_operand 9 "const_8_to_15_operand")
11911 (match_operand 10 "const_8_to_15_operand")])))]
11912 "TARGET_AVX512VL
11913 && (INTVAL (operands[3]) == (INTVAL (operands[4]) - 1)
11914 && INTVAL (operands[3]) == (INTVAL (operands[5]) - 2)
11915 && INTVAL (operands[3]) == (INTVAL (operands[6]) - 3)
11916 && INTVAL (operands[7]) == (INTVAL (operands[8]) - 1)
11917 && INTVAL (operands[7]) == (INTVAL (operands[9]) - 2)
11918 && INTVAL (operands[7]) == (INTVAL (operands[10]) - 3))"
11919 {
11920 int mask;
11921 mask = INTVAL (operands[3]) / 4;
11922 mask |= (INTVAL (operands[7]) - 8) / 4 << 1;
11923 operands[3] = GEN_INT (mask);
11924
11925 return "vshuf<shuffletype>32x4\t{%3, %2, %1, %0<mask_operand11>|%0<mask_operand11>, %1, %2, %3}";
11926 }
11927 [(set_attr "type" "sselog")
11928 (set_attr "length_immediate" "1")
11929 (set_attr "prefix" "evex")
11930 (set_attr "mode" "<sseinsnmode>")])
11931
11932 (define_expand "avx512f_shuf_<shuffletype>32x4_mask"
11933 [(match_operand:V16FI 0 "register_operand")
11934 (match_operand:V16FI 1 "register_operand")
11935 (match_operand:V16FI 2 "nonimmediate_operand")
11936 (match_operand:SI 3 "const_0_to_255_operand")
11937 (match_operand:V16FI 4 "register_operand")
11938 (match_operand:HI 5 "register_operand")]
11939 "TARGET_AVX512F"
11940 {
11941 int mask = INTVAL (operands[3]);
11942 emit_insn (gen_avx512f_shuf_<shuffletype>32x4_1_mask
11943 (operands[0], operands[1], operands[2],
11944 GEN_INT (((mask >> 0) & 3) * 4),
11945 GEN_INT (((mask >> 0) & 3) * 4 + 1),
11946 GEN_INT (((mask >> 0) & 3) * 4 + 2),
11947 GEN_INT (((mask >> 0) & 3) * 4 + 3),
11948 GEN_INT (((mask >> 2) & 3) * 4),
11949 GEN_INT (((mask >> 2) & 3) * 4 + 1),
11950 GEN_INT (((mask >> 2) & 3) * 4 + 2),
11951 GEN_INT (((mask >> 2) & 3) * 4 + 3),
11952 GEN_INT (((mask >> 4) & 3) * 4 + 16),
11953 GEN_INT (((mask >> 4) & 3) * 4 + 17),
11954 GEN_INT (((mask >> 4) & 3) * 4 + 18),
11955 GEN_INT (((mask >> 4) & 3) * 4 + 19),
11956 GEN_INT (((mask >> 6) & 3) * 4 + 16),
11957 GEN_INT (((mask >> 6) & 3) * 4 + 17),
11958 GEN_INT (((mask >> 6) & 3) * 4 + 18),
11959 GEN_INT (((mask >> 6) & 3) * 4 + 19),
11960 operands[4], operands[5]));
11961 DONE;
11962 })
11963
11964 (define_insn "avx512f_shuf_<shuffletype>32x4_1<mask_name>"
11965 [(set (match_operand:V16FI 0 "register_operand" "=v")
11966 (vec_select:V16FI
11967 (vec_concat:<ssedoublemode>
11968 (match_operand:V16FI 1 "register_operand" "v")
11969 (match_operand:V16FI 2 "nonimmediate_operand" "vm"))
11970 (parallel [(match_operand 3 "const_0_to_15_operand")
11971 (match_operand 4 "const_0_to_15_operand")
11972 (match_operand 5 "const_0_to_15_operand")
11973 (match_operand 6 "const_0_to_15_operand")
11974 (match_operand 7 "const_0_to_15_operand")
11975 (match_operand 8 "const_0_to_15_operand")
11976 (match_operand 9 "const_0_to_15_operand")
11977 (match_operand 10 "const_0_to_15_operand")
11978 (match_operand 11 "const_16_to_31_operand")
11979 (match_operand 12 "const_16_to_31_operand")
11980 (match_operand 13 "const_16_to_31_operand")
11981 (match_operand 14 "const_16_to_31_operand")
11982 (match_operand 15 "const_16_to_31_operand")
11983 (match_operand 16 "const_16_to_31_operand")
11984 (match_operand 17 "const_16_to_31_operand")
11985 (match_operand 18 "const_16_to_31_operand")])))]
11986 "TARGET_AVX512F
11987 && (INTVAL (operands[3]) == (INTVAL (operands[4]) - 1)
11988 && INTVAL (operands[3]) == (INTVAL (operands[5]) - 2)
11989 && INTVAL (operands[3]) == (INTVAL (operands[6]) - 3)
11990 && INTVAL (operands[7]) == (INTVAL (operands[8]) - 1)
11991 && INTVAL (operands[7]) == (INTVAL (operands[9]) - 2)
11992 && INTVAL (operands[7]) == (INTVAL (operands[10]) - 3)
11993 && INTVAL (operands[11]) == (INTVAL (operands[12]) - 1)
11994 && INTVAL (operands[11]) == (INTVAL (operands[13]) - 2)
11995 && INTVAL (operands[11]) == (INTVAL (operands[14]) - 3)
11996 && INTVAL (operands[15]) == (INTVAL (operands[16]) - 1)
11997 && INTVAL (operands[15]) == (INTVAL (operands[17]) - 2)
11998 && INTVAL (operands[15]) == (INTVAL (operands[18]) - 3))"
11999 {
12000 int mask;
12001 mask = INTVAL (operands[3]) / 4;
12002 mask |= INTVAL (operands[7]) / 4 << 2;
12003 mask |= (INTVAL (operands[11]) - 16) / 4 << 4;
12004 mask |= (INTVAL (operands[15]) - 16) / 4 << 6;
12005 operands[3] = GEN_INT (mask);
12006
12007 return "vshuf<shuffletype>32x4\t{%3, %2, %1, %0<mask_operand19>|%0<mask_operand19>, %1, %2, %3}";
12008 }
12009 [(set_attr "type" "sselog")
12010 (set_attr "length_immediate" "1")
12011 (set_attr "prefix" "evex")
12012 (set_attr "mode" "<sseinsnmode>")])
12013
12014 (define_expand "avx512f_pshufdv3_mask"
12015 [(match_operand:V16SI 0 "register_operand")
12016 (match_operand:V16SI 1 "nonimmediate_operand")
12017 (match_operand:SI 2 "const_0_to_255_operand")
12018 (match_operand:V16SI 3 "register_operand")
12019 (match_operand:HI 4 "register_operand")]
12020 "TARGET_AVX512F"
12021 {
12022 int mask = INTVAL (operands[2]);
12023 emit_insn (gen_avx512f_pshufd_1_mask (operands[0], operands[1],
12024 GEN_INT ((mask >> 0) & 3),
12025 GEN_INT ((mask >> 2) & 3),
12026 GEN_INT ((mask >> 4) & 3),
12027 GEN_INT ((mask >> 6) & 3),
12028 GEN_INT (((mask >> 0) & 3) + 4),
12029 GEN_INT (((mask >> 2) & 3) + 4),
12030 GEN_INT (((mask >> 4) & 3) + 4),
12031 GEN_INT (((mask >> 6) & 3) + 4),
12032 GEN_INT (((mask >> 0) & 3) + 8),
12033 GEN_INT (((mask >> 2) & 3) + 8),
12034 GEN_INT (((mask >> 4) & 3) + 8),
12035 GEN_INT (((mask >> 6) & 3) + 8),
12036 GEN_INT (((mask >> 0) & 3) + 12),
12037 GEN_INT (((mask >> 2) & 3) + 12),
12038 GEN_INT (((mask >> 4) & 3) + 12),
12039 GEN_INT (((mask >> 6) & 3) + 12),
12040 operands[3], operands[4]));
12041 DONE;
12042 })
12043
12044 (define_insn "avx512f_pshufd_1<mask_name>"
12045 [(set (match_operand:V16SI 0 "register_operand" "=v")
12046 (vec_select:V16SI
12047 (match_operand:V16SI 1 "nonimmediate_operand" "vm")
12048 (parallel [(match_operand 2 "const_0_to_3_operand")
12049 (match_operand 3 "const_0_to_3_operand")
12050 (match_operand 4 "const_0_to_3_operand")
12051 (match_operand 5 "const_0_to_3_operand")
12052 (match_operand 6 "const_4_to_7_operand")
12053 (match_operand 7 "const_4_to_7_operand")
12054 (match_operand 8 "const_4_to_7_operand")
12055 (match_operand 9 "const_4_to_7_operand")
12056 (match_operand 10 "const_8_to_11_operand")
12057 (match_operand 11 "const_8_to_11_operand")
12058 (match_operand 12 "const_8_to_11_operand")
12059 (match_operand 13 "const_8_to_11_operand")
12060 (match_operand 14 "const_12_to_15_operand")
12061 (match_operand 15 "const_12_to_15_operand")
12062 (match_operand 16 "const_12_to_15_operand")
12063 (match_operand 17 "const_12_to_15_operand")])))]
12064 "TARGET_AVX512F
12065 && INTVAL (operands[2]) + 4 == INTVAL (operands[6])
12066 && INTVAL (operands[3]) + 4 == INTVAL (operands[7])
12067 && INTVAL (operands[4]) + 4 == INTVAL (operands[8])
12068 && INTVAL (operands[5]) + 4 == INTVAL (operands[9])
12069 && INTVAL (operands[2]) + 8 == INTVAL (operands[10])
12070 && INTVAL (operands[3]) + 8 == INTVAL (operands[11])
12071 && INTVAL (operands[4]) + 8 == INTVAL (operands[12])
12072 && INTVAL (operands[5]) + 8 == INTVAL (operands[13])
12073 && INTVAL (operands[2]) + 12 == INTVAL (operands[14])
12074 && INTVAL (operands[3]) + 12 == INTVAL (operands[15])
12075 && INTVAL (operands[4]) + 12 == INTVAL (operands[16])
12076 && INTVAL (operands[5]) + 12 == INTVAL (operands[17])"
12077 {
12078 int mask = 0;
12079 mask |= INTVAL (operands[2]) << 0;
12080 mask |= INTVAL (operands[3]) << 2;
12081 mask |= INTVAL (operands[4]) << 4;
12082 mask |= INTVAL (operands[5]) << 6;
12083 operands[2] = GEN_INT (mask);
12084
12085 return "vpshufd\t{%2, %1, %0<mask_operand18>|%0<mask_operand18>, %1, %2}";
12086 }
12087 [(set_attr "type" "sselog1")
12088 (set_attr "prefix" "evex")
12089 (set_attr "length_immediate" "1")
12090 (set_attr "mode" "XI")])
12091
12092 (define_expand "avx512vl_pshufdv3_mask"
12093 [(match_operand:V8SI 0 "register_operand")
12094 (match_operand:V8SI 1 "nonimmediate_operand")
12095 (match_operand:SI 2 "const_0_to_255_operand")
12096 (match_operand:V8SI 3 "register_operand")
12097 (match_operand:QI 4 "register_operand")]
12098 "TARGET_AVX512VL"
12099 {
12100 int mask = INTVAL (operands[2]);
12101 emit_insn (gen_avx2_pshufd_1_mask (operands[0], operands[1],
12102 GEN_INT ((mask >> 0) & 3),
12103 GEN_INT ((mask >> 2) & 3),
12104 GEN_INT ((mask >> 4) & 3),
12105 GEN_INT ((mask >> 6) & 3),
12106 GEN_INT (((mask >> 0) & 3) + 4),
12107 GEN_INT (((mask >> 2) & 3) + 4),
12108 GEN_INT (((mask >> 4) & 3) + 4),
12109 GEN_INT (((mask >> 6) & 3) + 4),
12110 operands[3], operands[4]));
12111 DONE;
12112 })
12113
12114 (define_expand "avx2_pshufdv3"
12115 [(match_operand:V8SI 0 "register_operand")
12116 (match_operand:V8SI 1 "nonimmediate_operand")
12117 (match_operand:SI 2 "const_0_to_255_operand")]
12118 "TARGET_AVX2"
12119 {
12120 int mask = INTVAL (operands[2]);
12121 emit_insn (gen_avx2_pshufd_1 (operands[0], operands[1],
12122 GEN_INT ((mask >> 0) & 3),
12123 GEN_INT ((mask >> 2) & 3),
12124 GEN_INT ((mask >> 4) & 3),
12125 GEN_INT ((mask >> 6) & 3),
12126 GEN_INT (((mask >> 0) & 3) + 4),
12127 GEN_INT (((mask >> 2) & 3) + 4),
12128 GEN_INT (((mask >> 4) & 3) + 4),
12129 GEN_INT (((mask >> 6) & 3) + 4)));
12130 DONE;
12131 })
12132
12133 (define_insn "avx2_pshufd_1<mask_name>"
12134 [(set (match_operand:V8SI 0 "register_operand" "=v")
12135 (vec_select:V8SI
12136 (match_operand:V8SI 1 "nonimmediate_operand" "vm")
12137 (parallel [(match_operand 2 "const_0_to_3_operand")
12138 (match_operand 3 "const_0_to_3_operand")
12139 (match_operand 4 "const_0_to_3_operand")
12140 (match_operand 5 "const_0_to_3_operand")
12141 (match_operand 6 "const_4_to_7_operand")
12142 (match_operand 7 "const_4_to_7_operand")
12143 (match_operand 8 "const_4_to_7_operand")
12144 (match_operand 9 "const_4_to_7_operand")])))]
12145 "TARGET_AVX2
12146 && <mask_avx512vl_condition>
12147 && INTVAL (operands[2]) + 4 == INTVAL (operands[6])
12148 && INTVAL (operands[3]) + 4 == INTVAL (operands[7])
12149 && INTVAL (operands[4]) + 4 == INTVAL (operands[8])
12150 && INTVAL (operands[5]) + 4 == INTVAL (operands[9])"
12151 {
12152 int mask = 0;
12153 mask |= INTVAL (operands[2]) << 0;
12154 mask |= INTVAL (operands[3]) << 2;
12155 mask |= INTVAL (operands[4]) << 4;
12156 mask |= INTVAL (operands[5]) << 6;
12157 operands[2] = GEN_INT (mask);
12158
12159 return "vpshufd\t{%2, %1, %0<mask_operand10>|%0<mask_operand10>, %1, %2}";
12160 }
12161 [(set_attr "type" "sselog1")
12162 (set_attr "prefix" "maybe_evex")
12163 (set_attr "length_immediate" "1")
12164 (set_attr "mode" "OI")])
12165
12166 (define_expand "avx512vl_pshufd_mask"
12167 [(match_operand:V4SI 0 "register_operand")
12168 (match_operand:V4SI 1 "nonimmediate_operand")
12169 (match_operand:SI 2 "const_0_to_255_operand")
12170 (match_operand:V4SI 3 "register_operand")
12171 (match_operand:QI 4 "register_operand")]
12172 "TARGET_AVX512VL"
12173 {
12174 int mask = INTVAL (operands[2]);
12175 emit_insn (gen_sse2_pshufd_1_mask (operands[0], operands[1],
12176 GEN_INT ((mask >> 0) & 3),
12177 GEN_INT ((mask >> 2) & 3),
12178 GEN_INT ((mask >> 4) & 3),
12179 GEN_INT ((mask >> 6) & 3),
12180 operands[3], operands[4]));
12181 DONE;
12182 })
12183
12184 (define_expand "sse2_pshufd"
12185 [(match_operand:V4SI 0 "register_operand")
12186 (match_operand:V4SI 1 "nonimmediate_operand")
12187 (match_operand:SI 2 "const_int_operand")]
12188 "TARGET_SSE2"
12189 {
12190 int mask = INTVAL (operands[2]);
12191 emit_insn (gen_sse2_pshufd_1 (operands[0], operands[1],
12192 GEN_INT ((mask >> 0) & 3),
12193 GEN_INT ((mask >> 2) & 3),
12194 GEN_INT ((mask >> 4) & 3),
12195 GEN_INT ((mask >> 6) & 3)));
12196 DONE;
12197 })
12198
12199 (define_insn "sse2_pshufd_1<mask_name>"
12200 [(set (match_operand:V4SI 0 "register_operand" "=v")
12201 (vec_select:V4SI
12202 (match_operand:V4SI 1 "nonimmediate_operand" "vm")
12203 (parallel [(match_operand 2 "const_0_to_3_operand")
12204 (match_operand 3 "const_0_to_3_operand")
12205 (match_operand 4 "const_0_to_3_operand")
12206 (match_operand 5 "const_0_to_3_operand")])))]
12207 "TARGET_SSE2 && <mask_avx512vl_condition>"
12208 {
12209 int mask = 0;
12210 mask |= INTVAL (operands[2]) << 0;
12211 mask |= INTVAL (operands[3]) << 2;
12212 mask |= INTVAL (operands[4]) << 4;
12213 mask |= INTVAL (operands[5]) << 6;
12214 operands[2] = GEN_INT (mask);
12215
12216 return "%vpshufd\t{%2, %1, %0<mask_operand6>|%0<mask_operand6>, %1, %2}";
12217 }
12218 [(set_attr "type" "sselog1")
12219 (set_attr "prefix_data16" "1")
12220 (set_attr "prefix" "<mask_prefix2>")
12221 (set_attr "length_immediate" "1")
12222 (set_attr "mode" "TI")])
12223
12224 (define_insn "<mask_codefor>avx512bw_pshuflwv32hi<mask_name>"
12225 [(set (match_operand:V32HI 0 "register_operand" "=v")
12226 (unspec:V32HI
12227 [(match_operand:V32HI 1 "nonimmediate_operand" "vm")
12228 (match_operand:SI 2 "const_0_to_255_operand" "n")]
12229 UNSPEC_PSHUFLW))]
12230 "TARGET_AVX512BW"
12231 "vpshuflw\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
12232 [(set_attr "type" "sselog")
12233 (set_attr "prefix" "evex")
12234 (set_attr "mode" "XI")])
12235
12236 (define_expand "avx512vl_pshuflwv3_mask"
12237 [(match_operand:V16HI 0 "register_operand")
12238 (match_operand:V16HI 1 "nonimmediate_operand")
12239 (match_operand:SI 2 "const_0_to_255_operand")
12240 (match_operand:V16HI 3 "register_operand")
12241 (match_operand:HI 4 "register_operand")]
12242 "TARGET_AVX512VL && TARGET_AVX512BW"
12243 {
12244 int mask = INTVAL (operands[2]);
12245 emit_insn (gen_avx2_pshuflw_1_mask (operands[0], operands[1],
12246 GEN_INT ((mask >> 0) & 3),
12247 GEN_INT ((mask >> 2) & 3),
12248 GEN_INT ((mask >> 4) & 3),
12249 GEN_INT ((mask >> 6) & 3),
12250 GEN_INT (((mask >> 0) & 3) + 8),
12251 GEN_INT (((mask >> 2) & 3) + 8),
12252 GEN_INT (((mask >> 4) & 3) + 8),
12253 GEN_INT (((mask >> 6) & 3) + 8),
12254 operands[3], operands[4]));
12255 DONE;
12256 })
12257
12258 (define_expand "avx2_pshuflwv3"
12259 [(match_operand:V16HI 0 "register_operand")
12260 (match_operand:V16HI 1 "nonimmediate_operand")
12261 (match_operand:SI 2 "const_0_to_255_operand")]
12262 "TARGET_AVX2"
12263 {
12264 int mask = INTVAL (operands[2]);
12265 emit_insn (gen_avx2_pshuflw_1 (operands[0], operands[1],
12266 GEN_INT ((mask >> 0) & 3),
12267 GEN_INT ((mask >> 2) & 3),
12268 GEN_INT ((mask >> 4) & 3),
12269 GEN_INT ((mask >> 6) & 3),
12270 GEN_INT (((mask >> 0) & 3) + 8),
12271 GEN_INT (((mask >> 2) & 3) + 8),
12272 GEN_INT (((mask >> 4) & 3) + 8),
12273 GEN_INT (((mask >> 6) & 3) + 8)));
12274 DONE;
12275 })
12276
12277 (define_insn "avx2_pshuflw_1<mask_name>"
12278 [(set (match_operand:V16HI 0 "register_operand" "=v")
12279 (vec_select:V16HI
12280 (match_operand:V16HI 1 "nonimmediate_operand" "vm")
12281 (parallel [(match_operand 2 "const_0_to_3_operand")
12282 (match_operand 3 "const_0_to_3_operand")
12283 (match_operand 4 "const_0_to_3_operand")
12284 (match_operand 5 "const_0_to_3_operand")
12285 (const_int 4)
12286 (const_int 5)
12287 (const_int 6)
12288 (const_int 7)
12289 (match_operand 6 "const_8_to_11_operand")
12290 (match_operand 7 "const_8_to_11_operand")
12291 (match_operand 8 "const_8_to_11_operand")
12292 (match_operand 9 "const_8_to_11_operand")
12293 (const_int 12)
12294 (const_int 13)
12295 (const_int 14)
12296 (const_int 15)])))]
12297 "TARGET_AVX2
12298 && <mask_avx512bw_condition> && <mask_avx512vl_condition>
12299 && INTVAL (operands[2]) + 8 == INTVAL (operands[6])
12300 && INTVAL (operands[3]) + 8 == INTVAL (operands[7])
12301 && INTVAL (operands[4]) + 8 == INTVAL (operands[8])
12302 && INTVAL (operands[5]) + 8 == INTVAL (operands[9])"
12303 {
12304 int mask = 0;
12305 mask |= INTVAL (operands[2]) << 0;
12306 mask |= INTVAL (operands[3]) << 2;
12307 mask |= INTVAL (operands[4]) << 4;
12308 mask |= INTVAL (operands[5]) << 6;
12309 operands[2] = GEN_INT (mask);
12310
12311 return "vpshuflw\t{%2, %1, %0<mask_operand10>|%0<mask_operand10>, %1, %2}";
12312 }
12313 [(set_attr "type" "sselog")
12314 (set_attr "prefix" "maybe_evex")
12315 (set_attr "length_immediate" "1")
12316 (set_attr "mode" "OI")])
12317
12318 (define_expand "avx512vl_pshuflw_mask"
12319 [(match_operand:V8HI 0 "register_operand")
12320 (match_operand:V8HI 1 "nonimmediate_operand")
12321 (match_operand:SI 2 "const_0_to_255_operand")
12322 (match_operand:V8HI 3 "register_operand")
12323 (match_operand:QI 4 "register_operand")]
12324 "TARGET_AVX512VL && TARGET_AVX512BW"
12325 {
12326 int mask = INTVAL (operands[2]);
12327 emit_insn (gen_sse2_pshuflw_1_mask (operands[0], operands[1],
12328 GEN_INT ((mask >> 0) & 3),
12329 GEN_INT ((mask >> 2) & 3),
12330 GEN_INT ((mask >> 4) & 3),
12331 GEN_INT ((mask >> 6) & 3),
12332 operands[3], operands[4]));
12333 DONE;
12334 })
12335
12336 (define_expand "sse2_pshuflw"
12337 [(match_operand:V8HI 0 "register_operand")
12338 (match_operand:V8HI 1 "nonimmediate_operand")
12339 (match_operand:SI 2 "const_int_operand")]
12340 "TARGET_SSE2"
12341 {
12342 int mask = INTVAL (operands[2]);
12343 emit_insn (gen_sse2_pshuflw_1 (operands[0], operands[1],
12344 GEN_INT ((mask >> 0) & 3),
12345 GEN_INT ((mask >> 2) & 3),
12346 GEN_INT ((mask >> 4) & 3),
12347 GEN_INT ((mask >> 6) & 3)));
12348 DONE;
12349 })
12350
12351 (define_insn "sse2_pshuflw_1<mask_name>"
12352 [(set (match_operand:V8HI 0 "register_operand" "=v")
12353 (vec_select:V8HI
12354 (match_operand:V8HI 1 "nonimmediate_operand" "vm")
12355 (parallel [(match_operand 2 "const_0_to_3_operand")
12356 (match_operand 3 "const_0_to_3_operand")
12357 (match_operand 4 "const_0_to_3_operand")
12358 (match_operand 5 "const_0_to_3_operand")
12359 (const_int 4)
12360 (const_int 5)
12361 (const_int 6)
12362 (const_int 7)])))]
12363 "TARGET_SSE2 && <mask_avx512bw_condition> && <mask_avx512vl_condition>"
12364 {
12365 int mask = 0;
12366 mask |= INTVAL (operands[2]) << 0;
12367 mask |= INTVAL (operands[3]) << 2;
12368 mask |= INTVAL (operands[4]) << 4;
12369 mask |= INTVAL (operands[5]) << 6;
12370 operands[2] = GEN_INT (mask);
12371
12372 return "%vpshuflw\t{%2, %1, %0<mask_operand6>|%0<mask_operand6>, %1, %2}";
12373 }
12374 [(set_attr "type" "sselog")
12375 (set_attr "prefix_data16" "0")
12376 (set_attr "prefix_rep" "1")
12377 (set_attr "prefix" "maybe_vex")
12378 (set_attr "length_immediate" "1")
12379 (set_attr "mode" "TI")])
12380
12381 (define_expand "avx2_pshufhwv3"
12382 [(match_operand:V16HI 0 "register_operand")
12383 (match_operand:V16HI 1 "nonimmediate_operand")
12384 (match_operand:SI 2 "const_0_to_255_operand")]
12385 "TARGET_AVX2"
12386 {
12387 int mask = INTVAL (operands[2]);
12388 emit_insn (gen_avx2_pshufhw_1 (operands[0], operands[1],
12389 GEN_INT (((mask >> 0) & 3) + 4),
12390 GEN_INT (((mask >> 2) & 3) + 4),
12391 GEN_INT (((mask >> 4) & 3) + 4),
12392 GEN_INT (((mask >> 6) & 3) + 4),
12393 GEN_INT (((mask >> 0) & 3) + 12),
12394 GEN_INT (((mask >> 2) & 3) + 12),
12395 GEN_INT (((mask >> 4) & 3) + 12),
12396 GEN_INT (((mask >> 6) & 3) + 12)));
12397 DONE;
12398 })
12399
12400 (define_insn "<mask_codefor>avx512bw_pshufhwv32hi<mask_name>"
12401 [(set (match_operand:V32HI 0 "register_operand" "=v")
12402 (unspec:V32HI
12403 [(match_operand:V32HI 1 "nonimmediate_operand" "vm")
12404 (match_operand:SI 2 "const_0_to_255_operand" "n")]
12405 UNSPEC_PSHUFHW))]
12406 "TARGET_AVX512BW"
12407 "vpshufhw\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
12408 [(set_attr "type" "sselog")
12409 (set_attr "prefix" "evex")
12410 (set_attr "mode" "XI")])
12411
12412 (define_expand "avx512vl_pshufhwv3_mask"
12413 [(match_operand:V16HI 0 "register_operand")
12414 (match_operand:V16HI 1 "nonimmediate_operand")
12415 (match_operand:SI 2 "const_0_to_255_operand")
12416 (match_operand:V16HI 3 "register_operand")
12417 (match_operand:HI 4 "register_operand")]
12418 "TARGET_AVX512VL && TARGET_AVX512BW"
12419 {
12420 int mask = INTVAL (operands[2]);
12421 emit_insn (gen_avx2_pshufhw_1_mask (operands[0], operands[1],
12422 GEN_INT (((mask >> 0) & 3) + 4),
12423 GEN_INT (((mask >> 2) & 3) + 4),
12424 GEN_INT (((mask >> 4) & 3) + 4),
12425 GEN_INT (((mask >> 6) & 3) + 4),
12426 GEN_INT (((mask >> 0) & 3) + 12),
12427 GEN_INT (((mask >> 2) & 3) + 12),
12428 GEN_INT (((mask >> 4) & 3) + 12),
12429 GEN_INT (((mask >> 6) & 3) + 12),
12430 operands[3], operands[4]));
12431 DONE;
12432 })
12433
12434 (define_insn "avx2_pshufhw_1<mask_name>"
12435 [(set (match_operand:V16HI 0 "register_operand" "=v")
12436 (vec_select:V16HI
12437 (match_operand:V16HI 1 "nonimmediate_operand" "vm")
12438 (parallel [(const_int 0)
12439 (const_int 1)
12440 (const_int 2)
12441 (const_int 3)
12442 (match_operand 2 "const_4_to_7_operand")
12443 (match_operand 3 "const_4_to_7_operand")
12444 (match_operand 4 "const_4_to_7_operand")
12445 (match_operand 5 "const_4_to_7_operand")
12446 (const_int 8)
12447 (const_int 9)
12448 (const_int 10)
12449 (const_int 11)
12450 (match_operand 6 "const_12_to_15_operand")
12451 (match_operand 7 "const_12_to_15_operand")
12452 (match_operand 8 "const_12_to_15_operand")
12453 (match_operand 9 "const_12_to_15_operand")])))]
12454 "TARGET_AVX2
12455 && <mask_avx512bw_condition> && <mask_avx512vl_condition>
12456 && INTVAL (operands[2]) + 8 == INTVAL (operands[6])
12457 && INTVAL (operands[3]) + 8 == INTVAL (operands[7])
12458 && INTVAL (operands[4]) + 8 == INTVAL (operands[8])
12459 && INTVAL (operands[5]) + 8 == INTVAL (operands[9])"
12460 {
12461 int mask = 0;
12462 mask |= (INTVAL (operands[2]) - 4) << 0;
12463 mask |= (INTVAL (operands[3]) - 4) << 2;
12464 mask |= (INTVAL (operands[4]) - 4) << 4;
12465 mask |= (INTVAL (operands[5]) - 4) << 6;
12466 operands[2] = GEN_INT (mask);
12467
12468 return "vpshufhw\t{%2, %1, %0<mask_operand10>|%0<mask_operand10>, %1, %2}";
12469 }
12470 [(set_attr "type" "sselog")
12471 (set_attr "prefix" "maybe_evex")
12472 (set_attr "length_immediate" "1")
12473 (set_attr "mode" "OI")])
12474
12475 (define_expand "avx512vl_pshufhw_mask"
12476 [(match_operand:V8HI 0 "register_operand")
12477 (match_operand:V8HI 1 "nonimmediate_operand")
12478 (match_operand:SI 2 "const_0_to_255_operand")
12479 (match_operand:V8HI 3 "register_operand")
12480 (match_operand:QI 4 "register_operand")]
12481 "TARGET_AVX512VL && TARGET_AVX512BW"
12482 {
12483 int mask = INTVAL (operands[2]);
12484 emit_insn (gen_sse2_pshufhw_1_mask (operands[0], operands[1],
12485 GEN_INT (((mask >> 0) & 3) + 4),
12486 GEN_INT (((mask >> 2) & 3) + 4),
12487 GEN_INT (((mask >> 4) & 3) + 4),
12488 GEN_INT (((mask >> 6) & 3) + 4),
12489 operands[3], operands[4]));
12490 DONE;
12491 })
12492
12493 (define_expand "sse2_pshufhw"
12494 [(match_operand:V8HI 0 "register_operand")
12495 (match_operand:V8HI 1 "nonimmediate_operand")
12496 (match_operand:SI 2 "const_int_operand")]
12497 "TARGET_SSE2"
12498 {
12499 int mask = INTVAL (operands[2]);
12500 emit_insn (gen_sse2_pshufhw_1 (operands[0], operands[1],
12501 GEN_INT (((mask >> 0) & 3) + 4),
12502 GEN_INT (((mask >> 2) & 3) + 4),
12503 GEN_INT (((mask >> 4) & 3) + 4),
12504 GEN_INT (((mask >> 6) & 3) + 4)));
12505 DONE;
12506 })
12507
12508 (define_insn "sse2_pshufhw_1<mask_name>"
12509 [(set (match_operand:V8HI 0 "register_operand" "=v")
12510 (vec_select:V8HI
12511 (match_operand:V8HI 1 "nonimmediate_operand" "vm")
12512 (parallel [(const_int 0)
12513 (const_int 1)
12514 (const_int 2)
12515 (const_int 3)
12516 (match_operand 2 "const_4_to_7_operand")
12517 (match_operand 3 "const_4_to_7_operand")
12518 (match_operand 4 "const_4_to_7_operand")
12519 (match_operand 5 "const_4_to_7_operand")])))]
12520 "TARGET_SSE2 && <mask_avx512bw_condition> && <mask_avx512vl_condition>"
12521 {
12522 int mask = 0;
12523 mask |= (INTVAL (operands[2]) - 4) << 0;
12524 mask |= (INTVAL (operands[3]) - 4) << 2;
12525 mask |= (INTVAL (operands[4]) - 4) << 4;
12526 mask |= (INTVAL (operands[5]) - 4) << 6;
12527 operands[2] = GEN_INT (mask);
12528
12529 return "%vpshufhw\t{%2, %1, %0<mask_operand6>|%0<mask_operand6>, %1, %2}";
12530 }
12531 [(set_attr "type" "sselog")
12532 (set_attr "prefix_rep" "1")
12533 (set_attr "prefix_data16" "0")
12534 (set_attr "prefix" "maybe_vex")
12535 (set_attr "length_immediate" "1")
12536 (set_attr "mode" "TI")])
12537
12538 (define_expand "sse2_loadd"
12539 [(set (match_operand:V4SI 0 "register_operand")
12540 (vec_merge:V4SI
12541 (vec_duplicate:V4SI
12542 (match_operand:SI 1 "nonimmediate_operand"))
12543 (match_dup 2)
12544 (const_int 1)))]
12545 "TARGET_SSE"
12546 "operands[2] = CONST0_RTX (V4SImode);")
12547
12548 (define_insn "sse2_loadld"
12549 [(set (match_operand:V4SI 0 "register_operand" "=x,Yi,x,x,x")
12550 (vec_merge:V4SI
12551 (vec_duplicate:V4SI
12552 (match_operand:SI 2 "nonimmediate_operand" "m ,r ,m,x,x"))
12553 (match_operand:V4SI 1 "reg_or_0_operand" "C ,C ,C,0,x")
12554 (const_int 1)))]
12555 "TARGET_SSE"
12556 "@
12557 %vmovd\t{%2, %0|%0, %2}
12558 %vmovd\t{%2, %0|%0, %2}
12559 movss\t{%2, %0|%0, %2}
12560 movss\t{%2, %0|%0, %2}
12561 vmovss\t{%2, %1, %0|%0, %1, %2}"
12562 [(set_attr "isa" "sse2,*,noavx,noavx,avx")
12563 (set_attr "type" "ssemov")
12564 (set_attr "prefix" "maybe_vex,maybe_vex,orig,orig,vex")
12565 (set_attr "mode" "TI,TI,V4SF,SF,SF")])
12566
12567 (define_insn "*vec_extract<mode>"
12568 [(set (match_operand:<ssescalarmode> 0 "nonimmediate_operand" "=r,m")
12569 (vec_select:<ssescalarmode>
12570 (match_operand:VI12_128 1 "register_operand" "x,x")
12571 (parallel
12572 [(match_operand:SI 2 "const_0_to_<ssescalarnummask>_operand")])))]
12573 "TARGET_SSE4_1"
12574 "@
12575 %vpextr<ssemodesuffix>\t{%2, %1, %k0|%k0, %1, %2}
12576 %vpextr<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
12577 [(set_attr "type" "sselog1")
12578 (set (attr "prefix_data16")
12579 (if_then_else
12580 (and (eq_attr "alternative" "0")
12581 (eq (const_string "<MODE>mode") (const_string "V8HImode")))
12582 (const_string "1")
12583 (const_string "*")))
12584 (set (attr "prefix_extra")
12585 (if_then_else
12586 (and (eq_attr "alternative" "0")
12587 (eq (const_string "<MODE>mode") (const_string "V8HImode")))
12588 (const_string "*")
12589 (const_string "1")))
12590 (set_attr "length_immediate" "1")
12591 (set_attr "prefix" "maybe_vex")
12592 (set_attr "mode" "TI")])
12593
12594 (define_insn "*vec_extractv8hi_sse2"
12595 [(set (match_operand:HI 0 "register_operand" "=r")
12596 (vec_select:HI
12597 (match_operand:V8HI 1 "register_operand" "x")
12598 (parallel
12599 [(match_operand:SI 2 "const_0_to_7_operand")])))]
12600 "TARGET_SSE2 && !TARGET_SSE4_1"
12601 "pextrw\t{%2, %1, %k0|%k0, %1, %2}"
12602 [(set_attr "type" "sselog1")
12603 (set_attr "prefix_data16" "1")
12604 (set_attr "length_immediate" "1")
12605 (set_attr "mode" "TI")])
12606
12607 (define_insn "*vec_extractv16qi_zext"
12608 [(set (match_operand:SWI48 0 "register_operand" "=r")
12609 (zero_extend:SWI48
12610 (vec_select:QI
12611 (match_operand:V16QI 1 "register_operand" "x")
12612 (parallel
12613 [(match_operand:SI 2 "const_0_to_15_operand")]))))]
12614 "TARGET_SSE4_1"
12615 "%vpextrb\t{%2, %1, %k0|%k0, %1, %2}"
12616 [(set_attr "type" "sselog1")
12617 (set_attr "prefix_extra" "1")
12618 (set_attr "length_immediate" "1")
12619 (set_attr "prefix" "maybe_vex")
12620 (set_attr "mode" "TI")])
12621
12622 (define_insn "*vec_extractv8hi_zext"
12623 [(set (match_operand:SWI48 0 "register_operand" "=r")
12624 (zero_extend:SWI48
12625 (vec_select:HI
12626 (match_operand:V8HI 1 "register_operand" "x")
12627 (parallel
12628 [(match_operand:SI 2 "const_0_to_7_operand")]))))]
12629 "TARGET_SSE2"
12630 "%vpextrw\t{%2, %1, %k0|%k0, %1, %2}"
12631 [(set_attr "type" "sselog1")
12632 (set_attr "prefix_data16" "1")
12633 (set_attr "length_immediate" "1")
12634 (set_attr "prefix" "maybe_vex")
12635 (set_attr "mode" "TI")])
12636
12637 (define_insn "*vec_extract<mode>_mem"
12638 [(set (match_operand:<ssescalarmode> 0 "register_operand" "=r")
12639 (vec_select:<ssescalarmode>
12640 (match_operand:VI12_128 1 "memory_operand" "o")
12641 (parallel
12642 [(match_operand 2 "const_0_to_<ssescalarnummask>_operand")])))]
12643 "TARGET_SSE"
12644 "#")
12645
12646 (define_insn "*vec_extract<ssevecmodelower>_0"
12647 [(set (match_operand:SWI48 0 "nonimmediate_operand" "=r ,r,x ,m")
12648 (vec_select:SWI48
12649 (match_operand:<ssevecmode> 1 "nonimmediate_operand" "mYj,x,xm,x")
12650 (parallel [(const_int 0)])))]
12651 "TARGET_SSE && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
12652 "#"
12653 [(set_attr "isa" "*,sse4,*,*")])
12654
12655 (define_insn_and_split "*vec_extractv4si_0_zext"
12656 [(set (match_operand:DI 0 "register_operand" "=r")
12657 (zero_extend:DI
12658 (vec_select:SI
12659 (match_operand:V4SI 1 "register_operand" "x")
12660 (parallel [(const_int 0)]))))]
12661 "TARGET_64BIT && TARGET_SSE2 && TARGET_INTER_UNIT_MOVES_FROM_VEC"
12662 "#"
12663 "&& reload_completed"
12664 [(set (match_dup 0) (zero_extend:DI (match_dup 1)))]
12665 "operands[1] = gen_rtx_REG (SImode, REGNO (operands[1]));")
12666
12667 (define_insn "*vec_extractv2di_0_sse"
12668 [(set (match_operand:DI 0 "nonimmediate_operand" "=x,m")
12669 (vec_select:DI
12670 (match_operand:V2DI 1 "nonimmediate_operand" "xm,x")
12671 (parallel [(const_int 0)])))]
12672 "TARGET_SSE && !TARGET_64BIT
12673 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
12674 "#")
12675
12676 (define_split
12677 [(set (match_operand:SWI48x 0 "nonimmediate_operand")
12678 (vec_select:SWI48x
12679 (match_operand:<ssevecmode> 1 "register_operand")
12680 (parallel [(const_int 0)])))]
12681 "TARGET_SSE && reload_completed"
12682 [(set (match_dup 0) (match_dup 1))]
12683 "operands[1] = gen_rtx_REG (<MODE>mode, REGNO (operands[1]));")
12684
12685 (define_insn "*vec_extractv4si"
12686 [(set (match_operand:SI 0 "nonimmediate_operand" "=rm,x,x")
12687 (vec_select:SI
12688 (match_operand:V4SI 1 "register_operand" "x,0,x")
12689 (parallel [(match_operand:SI 2 "const_0_to_3_operand")])))]
12690 "TARGET_SSE4_1"
12691 {
12692 switch (which_alternative)
12693 {
12694 case 0:
12695 return "%vpextrd\t{%2, %1, %0|%0, %1, %2}";
12696
12697 case 1:
12698 operands [2] = GEN_INT (INTVAL (operands[2]) * 4);
12699 return "psrldq\t{%2, %0|%0, %2}";
12700
12701 case 2:
12702 operands [2] = GEN_INT (INTVAL (operands[2]) * 4);
12703 return "vpsrldq\t{%2, %1, %0|%0, %1, %2}";
12704
12705 default:
12706 gcc_unreachable ();
12707 }
12708 }
12709 [(set_attr "isa" "*,noavx,avx")
12710 (set_attr "type" "sselog1,sseishft1,sseishft1")
12711 (set_attr "prefix_extra" "1,*,*")
12712 (set_attr "length_immediate" "1")
12713 (set_attr "prefix" "maybe_vex,orig,vex")
12714 (set_attr "mode" "TI")])
12715
12716 (define_insn "*vec_extractv4si_zext"
12717 [(set (match_operand:DI 0 "register_operand" "=r")
12718 (zero_extend:DI
12719 (vec_select:SI
12720 (match_operand:V4SI 1 "register_operand" "x")
12721 (parallel [(match_operand:SI 2 "const_0_to_3_operand")]))))]
12722 "TARGET_64BIT && TARGET_SSE4_1"
12723 "%vpextrd\t{%2, %1, %k0|%k0, %1, %2}"
12724 [(set_attr "type" "sselog1")
12725 (set_attr "prefix_extra" "1")
12726 (set_attr "length_immediate" "1")
12727 (set_attr "prefix" "maybe_vex")
12728 (set_attr "mode" "TI")])
12729
12730 (define_insn "*vec_extractv4si_mem"
12731 [(set (match_operand:SI 0 "register_operand" "=x,r")
12732 (vec_select:SI
12733 (match_operand:V4SI 1 "memory_operand" "o,o")
12734 (parallel [(match_operand 2 "const_0_to_3_operand")])))]
12735 "TARGET_SSE"
12736 "#")
12737
12738 (define_insn_and_split "*vec_extractv4si_zext_mem"
12739 [(set (match_operand:DI 0 "register_operand" "=x,r")
12740 (zero_extend:DI
12741 (vec_select:SI
12742 (match_operand:V4SI 1 "memory_operand" "o,o")
12743 (parallel [(match_operand:SI 2 "const_0_to_3_operand")]))))]
12744 "TARGET_64BIT && TARGET_SSE"
12745 "#"
12746 "&& reload_completed"
12747 [(set (match_dup 0) (zero_extend:DI (match_dup 1)))]
12748 {
12749 operands[1] = adjust_address (operands[1], SImode, INTVAL (operands[2]) * 4);
12750 })
12751
12752 (define_insn "*vec_extractv2di_1"
12753 [(set (match_operand:DI 0 "nonimmediate_operand" "=rm,m,x,x,x,x,r")
12754 (vec_select:DI
12755 (match_operand:V2DI 1 "nonimmediate_operand" "x ,x,0,x,x,o,o")
12756 (parallel [(const_int 1)])))]
12757 "TARGET_SSE && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
12758 "@
12759 %vpextrq\t{$1, %1, %0|%0, %1, 1}
12760 %vmovhps\t{%1, %0|%0, %1}
12761 psrldq\t{$8, %0|%0, 8}
12762 vpsrldq\t{$8, %1, %0|%0, %1, 8}
12763 movhlps\t{%1, %0|%0, %1}
12764 #
12765 #"
12766 [(set_attr "isa" "x64_sse4,*,sse2_noavx,avx,noavx,*,x64")
12767 (set_attr "type" "sselog1,ssemov,sseishft1,sseishft1,ssemov,ssemov,imov")
12768 (set_attr "length_immediate" "1,*,1,1,*,*,*")
12769 (set_attr "prefix_rex" "1,*,*,*,*,*,*")
12770 (set_attr "prefix_extra" "1,*,*,*,*,*,*")
12771 (set_attr "prefix" "maybe_vex,maybe_vex,orig,vex,orig,*,*")
12772 (set_attr "mode" "TI,V2SF,TI,TI,V4SF,DI,DI")])
12773
12774 (define_split
12775 [(set (match_operand:<ssescalarmode> 0 "register_operand")
12776 (vec_select:<ssescalarmode>
12777 (match_operand:VI_128 1 "memory_operand")
12778 (parallel
12779 [(match_operand 2 "const_0_to_<ssescalarnummask>_operand")])))]
12780 "TARGET_SSE && reload_completed"
12781 [(set (match_dup 0) (match_dup 1))]
12782 {
12783 int offs = INTVAL (operands[2]) * GET_MODE_SIZE (<ssescalarmode>mode);
12784
12785 operands[1] = adjust_address (operands[1], <ssescalarmode>mode, offs);
12786 })
12787
12788 (define_insn "*vec_dupv4si"
12789 [(set (match_operand:V4SI 0 "register_operand" "=x,x,x")
12790 (vec_duplicate:V4SI
12791 (match_operand:SI 1 "nonimmediate_operand" " x,m,0")))]
12792 "TARGET_SSE"
12793 "@
12794 %vpshufd\t{$0, %1, %0|%0, %1, 0}
12795 vbroadcastss\t{%1, %0|%0, %1}
12796 shufps\t{$0, %0, %0|%0, %0, 0}"
12797 [(set_attr "isa" "sse2,avx,noavx")
12798 (set_attr "type" "sselog1,ssemov,sselog1")
12799 (set_attr "length_immediate" "1,0,1")
12800 (set_attr "prefix_extra" "0,1,*")
12801 (set_attr "prefix" "maybe_vex,vex,orig")
12802 (set_attr "mode" "TI,V4SF,V4SF")])
12803
12804 (define_insn "*vec_dupv2di"
12805 [(set (match_operand:V2DI 0 "register_operand" "=x,x,x,x")
12806 (vec_duplicate:V2DI
12807 (match_operand:DI 1 "nonimmediate_operand" " 0,x,m,0")))]
12808 "TARGET_SSE"
12809 "@
12810 punpcklqdq\t%0, %0
12811 vpunpcklqdq\t{%d1, %0|%0, %d1}
12812 %vmovddup\t{%1, %0|%0, %1}
12813 movlhps\t%0, %0"
12814 [(set_attr "isa" "sse2_noavx,avx,sse3,noavx")
12815 (set_attr "type" "sselog1,sselog1,sselog1,ssemov")
12816 (set_attr "prefix" "orig,vex,maybe_vex,orig")
12817 (set_attr "mode" "TI,TI,DF,V4SF")])
12818
12819 (define_insn "*vec_concatv2si_sse4_1"
12820 [(set (match_operand:V2SI 0 "register_operand" "=x, x,x,x, x, *y,*y")
12821 (vec_concat:V2SI
12822 (match_operand:SI 1 "nonimmediate_operand" " 0, x,0,x,rm, 0,rm")
12823 (match_operand:SI 2 "vector_move_operand" "rm,rm,x,x, C,*ym, C")))]
12824 "TARGET_SSE4_1"
12825 "@
12826 pinsrd\t{$1, %2, %0|%0, %2, 1}
12827 vpinsrd\t{$1, %2, %1, %0|%0, %1, %2, 1}
12828 punpckldq\t{%2, %0|%0, %2}
12829 vpunpckldq\t{%2, %1, %0|%0, %1, %2}
12830 %vmovd\t{%1, %0|%0, %1}
12831 punpckldq\t{%2, %0|%0, %2}
12832 movd\t{%1, %0|%0, %1}"
12833 [(set_attr "isa" "noavx,avx,noavx,avx,*,*,*")
12834 (set_attr "type" "sselog,sselog,sselog,sselog,ssemov,mmxcvt,mmxmov")
12835 (set_attr "prefix_extra" "1,1,*,*,*,*,*")
12836 (set_attr "length_immediate" "1,1,*,*,*,*,*")
12837 (set_attr "prefix" "orig,vex,orig,vex,maybe_vex,orig,orig")
12838 (set_attr "mode" "TI,TI,TI,TI,TI,DI,DI")])
12839
12840 ;; ??? In theory we can match memory for the MMX alternative, but allowing
12841 ;; nonimmediate_operand for operand 2 and *not* allowing memory for the SSE
12842 ;; alternatives pretty much forces the MMX alternative to be chosen.
12843 (define_insn "*vec_concatv2si"
12844 [(set (match_operand:V2SI 0 "register_operand" "=x,x ,*y,x,x,*y,*y")
12845 (vec_concat:V2SI
12846 (match_operand:SI 1 "nonimmediate_operand" " 0,rm,rm,0,m, 0,*rm")
12847 (match_operand:SI 2 "reg_or_0_operand" " x,C ,C, x,C,*y,C")))]
12848 "TARGET_SSE && !TARGET_SSE4_1"
12849 "@
12850 punpckldq\t{%2, %0|%0, %2}
12851 movd\t{%1, %0|%0, %1}
12852 movd\t{%1, %0|%0, %1}
12853 unpcklps\t{%2, %0|%0, %2}
12854 movss\t{%1, %0|%0, %1}
12855 punpckldq\t{%2, %0|%0, %2}
12856 movd\t{%1, %0|%0, %1}"
12857 [(set_attr "isa" "sse2,sse2,sse2,*,*,*,*")
12858 (set_attr "type" "sselog,ssemov,mmxmov,sselog,ssemov,mmxcvt,mmxmov")
12859 (set_attr "mode" "TI,TI,DI,V4SF,SF,DI,DI")])
12860
12861 (define_insn "*vec_concatv4si"
12862 [(set (match_operand:V4SI 0 "register_operand" "=x,x,x,x,x")
12863 (vec_concat:V4SI
12864 (match_operand:V2SI 1 "register_operand" " 0,x,0,0,x")
12865 (match_operand:V2SI 2 "nonimmediate_operand" " x,x,x,m,m")))]
12866 "TARGET_SSE"
12867 "@
12868 punpcklqdq\t{%2, %0|%0, %2}
12869 vpunpcklqdq\t{%2, %1, %0|%0, %1, %2}
12870 movlhps\t{%2, %0|%0, %2}
12871 movhps\t{%2, %0|%0, %q2}
12872 vmovhps\t{%2, %1, %0|%0, %1, %q2}"
12873 [(set_attr "isa" "sse2_noavx,avx,noavx,noavx,avx")
12874 (set_attr "type" "sselog,sselog,ssemov,ssemov,ssemov")
12875 (set_attr "prefix" "orig,vex,orig,orig,vex")
12876 (set_attr "mode" "TI,TI,V4SF,V2SF,V2SF")])
12877
12878 ;; movd instead of movq is required to handle broken assemblers.
12879 (define_insn "vec_concatv2di"
12880 [(set (match_operand:V2DI 0 "register_operand"
12881 "=x,x ,Yi,x ,!x,x,x,x,x,x")
12882 (vec_concat:V2DI
12883 (match_operand:DI 1 "nonimmediate_operand"
12884 " 0,x ,r ,xm,*y,0,x,0,0,x")
12885 (match_operand:DI 2 "vector_move_operand"
12886 "rm,rm,C ,C ,C ,x,x,x,m,m")))]
12887 "TARGET_SSE"
12888 "@
12889 pinsrq\t{$1, %2, %0|%0, %2, 1}
12890 vpinsrq\t{$1, %2, %1, %0|%0, %1, %2, 1}
12891 * return HAVE_AS_IX86_INTERUNIT_MOVQ ? \"%vmovq\t{%1, %0|%0, %1}\" : \"%vmovd\t{%1, %0|%0, %1}\";
12892 %vmovq\t{%1, %0|%0, %1}
12893 movq2dq\t{%1, %0|%0, %1}
12894 punpcklqdq\t{%2, %0|%0, %2}
12895 vpunpcklqdq\t{%2, %1, %0|%0, %1, %2}
12896 movlhps\t{%2, %0|%0, %2}
12897 movhps\t{%2, %0|%0, %2}
12898 vmovhps\t{%2, %1, %0|%0, %1, %2}"
12899 [(set_attr "isa" "x64_sse4_noavx,x64_avx,x64,sse2,sse2,sse2_noavx,avx,noavx,noavx,avx")
12900 (set (attr "type")
12901 (if_then_else
12902 (eq_attr "alternative" "0,1,5,6")
12903 (const_string "sselog")
12904 (const_string "ssemov")))
12905 (set_attr "prefix_rex" "1,1,1,*,*,*,*,*,*,*")
12906 (set_attr "prefix_extra" "1,1,*,*,*,*,*,*,*,*")
12907 (set_attr "length_immediate" "1,1,*,*,*,*,*,*,*,*")
12908 (set_attr "prefix" "orig,vex,maybe_vex,maybe_vex,orig,orig,vex,orig,orig,vex")
12909 (set_attr "mode" "TI,TI,TI,TI,TI,TI,TI,V4SF,V2SF,V2SF")])
12910
12911 (define_expand "vec_unpacks_lo_<mode>"
12912 [(match_operand:<sseunpackmode> 0 "register_operand")
12913 (match_operand:VI124_AVX512F 1 "register_operand")]
12914 "TARGET_SSE2"
12915 "ix86_expand_sse_unpack (operands[0], operands[1], false, false); DONE;")
12916
12917 (define_expand "vec_unpacks_hi_<mode>"
12918 [(match_operand:<sseunpackmode> 0 "register_operand")
12919 (match_operand:VI124_AVX512F 1 "register_operand")]
12920 "TARGET_SSE2"
12921 "ix86_expand_sse_unpack (operands[0], operands[1], false, true); DONE;")
12922
12923 (define_expand "vec_unpacku_lo_<mode>"
12924 [(match_operand:<sseunpackmode> 0 "register_operand")
12925 (match_operand:VI124_AVX512F 1 "register_operand")]
12926 "TARGET_SSE2"
12927 "ix86_expand_sse_unpack (operands[0], operands[1], true, false); DONE;")
12928
12929 (define_expand "vec_unpacku_hi_<mode>"
12930 [(match_operand:<sseunpackmode> 0 "register_operand")
12931 (match_operand:VI124_AVX512F 1 "register_operand")]
12932 "TARGET_SSE2"
12933 "ix86_expand_sse_unpack (operands[0], operands[1], true, true); DONE;")
12934
12935 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
12936 ;;
12937 ;; Miscellaneous
12938 ;;
12939 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
12940
12941 (define_expand "<sse2_avx2>_uavg<mode>3<mask_name>"
12942 [(set (match_operand:VI12_AVX2 0 "register_operand")
12943 (truncate:VI12_AVX2
12944 (lshiftrt:<ssedoublemode>
12945 (plus:<ssedoublemode>
12946 (plus:<ssedoublemode>
12947 (zero_extend:<ssedoublemode>
12948 (match_operand:VI12_AVX2 1 "nonimmediate_operand"))
12949 (zero_extend:<ssedoublemode>
12950 (match_operand:VI12_AVX2 2 "nonimmediate_operand")))
12951 (match_dup <mask_expand_op3>))
12952 (const_int 1))))]
12953 "TARGET_SSE2 && <mask_mode512bit_condition> && <mask_avx512bw_condition>"
12954 {
12955 rtx tmp;
12956 if (<mask_applied>)
12957 tmp = operands[3];
12958 operands[3] = CONST1_RTX(<MODE>mode);
12959 ix86_fixup_binary_operands_no_copy (PLUS, <MODE>mode, operands);
12960
12961 if (<mask_applied>)
12962 {
12963 operands[5] = operands[3];
12964 operands[3] = tmp;
12965 }
12966 })
12967
12968 (define_insn "*<sse2_avx2>_uavg<mode>3<mask_name>"
12969 [(set (match_operand:VI12_AVX2 0 "register_operand" "=x,v")
12970 (truncate:VI12_AVX2
12971 (lshiftrt:<ssedoublemode>
12972 (plus:<ssedoublemode>
12973 (plus:<ssedoublemode>
12974 (zero_extend:<ssedoublemode>
12975 (match_operand:VI12_AVX2 1 "nonimmediate_operand" "%0,v"))
12976 (zero_extend:<ssedoublemode>
12977 (match_operand:VI12_AVX2 2 "nonimmediate_operand" "xm,vm")))
12978 (match_operand:VI12_AVX2 <mask_expand_op3> "const1_operand"))
12979 (const_int 1))))]
12980 "TARGET_SSE2 && <mask_mode512bit_condition> && <mask_avx512bw_condition>
12981 && ix86_binary_operator_ok (PLUS, <MODE>mode, operands)"
12982 "@
12983 pavg<ssemodesuffix>\t{%2, %0|%0, %2}
12984 vpavg<ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
12985 [(set_attr "isa" "noavx,avx")
12986 (set_attr "type" "sseiadd")
12987 (set_attr "prefix_data16" "1,*")
12988 (set_attr "prefix" "orig,<mask_prefix>")
12989 (set_attr "mode" "<sseinsnmode>")])
12990
12991 ;; The correct representation for this is absolutely enormous, and
12992 ;; surely not generally useful.
12993 (define_insn "<sse2_avx2>_psadbw"
12994 [(set (match_operand:VI8_AVX2_AVX512BW 0 "register_operand" "=x,v")
12995 (unspec:VI8_AVX2_AVX512BW
12996 [(match_operand:<ssebytemode> 1 "register_operand" "0,v")
12997 (match_operand:<ssebytemode> 2 "nonimmediate_operand" "xm,vm")]
12998 UNSPEC_PSADBW))]
12999 "TARGET_SSE2"
13000 "@
13001 psadbw\t{%2, %0|%0, %2}
13002 vpsadbw\t{%2, %1, %0|%0, %1, %2}"
13003 [(set_attr "isa" "noavx,avx")
13004 (set_attr "type" "sseiadd")
13005 (set_attr "atom_unit" "simul")
13006 (set_attr "prefix_data16" "1,*")
13007 (set_attr "prefix" "orig,maybe_evex")
13008 (set_attr "mode" "<sseinsnmode>")])
13009
13010 (define_insn "<sse>_movmsk<ssemodesuffix><avxsizesuffix>"
13011 [(set (match_operand:SI 0 "register_operand" "=r")
13012 (unspec:SI
13013 [(match_operand:VF_128_256 1 "register_operand" "x")]
13014 UNSPEC_MOVMSK))]
13015 "TARGET_SSE"
13016 "%vmovmsk<ssemodesuffix>\t{%1, %0|%0, %1}"
13017 [(set_attr "type" "ssemov")
13018 (set_attr "prefix" "maybe_vex")
13019 (set_attr "mode" "<MODE>")])
13020
13021 (define_insn "avx2_pmovmskb"
13022 [(set (match_operand:SI 0 "register_operand" "=r")
13023 (unspec:SI [(match_operand:V32QI 1 "register_operand" "x")]
13024 UNSPEC_MOVMSK))]
13025 "TARGET_AVX2"
13026 "vpmovmskb\t{%1, %0|%0, %1}"
13027 [(set_attr "type" "ssemov")
13028 (set_attr "prefix" "vex")
13029 (set_attr "mode" "DI")])
13030
13031 (define_insn "sse2_pmovmskb"
13032 [(set (match_operand:SI 0 "register_operand" "=r")
13033 (unspec:SI [(match_operand:V16QI 1 "register_operand" "x")]
13034 UNSPEC_MOVMSK))]
13035 "TARGET_SSE2"
13036 "%vpmovmskb\t{%1, %0|%0, %1}"
13037 [(set_attr "type" "ssemov")
13038 (set_attr "prefix_data16" "1")
13039 (set_attr "prefix" "maybe_vex")
13040 (set_attr "mode" "SI")])
13041
13042 (define_expand "sse2_maskmovdqu"
13043 [(set (match_operand:V16QI 0 "memory_operand")
13044 (unspec:V16QI [(match_operand:V16QI 1 "register_operand")
13045 (match_operand:V16QI 2 "register_operand")
13046 (match_dup 0)]
13047 UNSPEC_MASKMOV))]
13048 "TARGET_SSE2")
13049
13050 (define_insn "*sse2_maskmovdqu"
13051 [(set (mem:V16QI (match_operand:P 0 "register_operand" "D"))
13052 (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "x")
13053 (match_operand:V16QI 2 "register_operand" "x")
13054 (mem:V16QI (match_dup 0))]
13055 UNSPEC_MASKMOV))]
13056 "TARGET_SSE2"
13057 {
13058 /* We can't use %^ here due to ASM_OUTPUT_OPCODE processing
13059 that requires %v to be at the beginning of the opcode name. */
13060 if (Pmode != word_mode)
13061 fputs ("\taddr32", asm_out_file);
13062 return "%vmaskmovdqu\t{%2, %1|%1, %2}";
13063 }
13064 [(set_attr "type" "ssemov")
13065 (set_attr "prefix_data16" "1")
13066 (set (attr "length_address")
13067 (symbol_ref ("Pmode != word_mode")))
13068 ;; The implicit %rdi operand confuses default length_vex computation.
13069 (set (attr "length_vex")
13070 (symbol_ref ("3 + REX_SSE_REGNO_P (REGNO (operands[2]))")))
13071 (set_attr "prefix" "maybe_vex")
13072 (set_attr "mode" "TI")])
13073
13074 (define_insn "sse_ldmxcsr"
13075 [(unspec_volatile [(match_operand:SI 0 "memory_operand" "m")]
13076 UNSPECV_LDMXCSR)]
13077 "TARGET_SSE"
13078 "%vldmxcsr\t%0"
13079 [(set_attr "type" "sse")
13080 (set_attr "atom_sse_attr" "mxcsr")
13081 (set_attr "prefix" "maybe_vex")
13082 (set_attr "memory" "load")])
13083
13084 (define_insn "sse_stmxcsr"
13085 [(set (match_operand:SI 0 "memory_operand" "=m")
13086 (unspec_volatile:SI [(const_int 0)] UNSPECV_STMXCSR))]
13087 "TARGET_SSE"
13088 "%vstmxcsr\t%0"
13089 [(set_attr "type" "sse")
13090 (set_attr "atom_sse_attr" "mxcsr")
13091 (set_attr "prefix" "maybe_vex")
13092 (set_attr "memory" "store")])
13093
13094 (define_insn "sse2_clflush"
13095 [(unspec_volatile [(match_operand 0 "address_operand" "p")]
13096 UNSPECV_CLFLUSH)]
13097 "TARGET_SSE2"
13098 "clflush\t%a0"
13099 [(set_attr "type" "sse")
13100 (set_attr "atom_sse_attr" "fence")
13101 (set_attr "memory" "unknown")])
13102
13103
13104 (define_insn "sse3_mwait"
13105 [(unspec_volatile [(match_operand:SI 0 "register_operand" "a")
13106 (match_operand:SI 1 "register_operand" "c")]
13107 UNSPECV_MWAIT)]
13108 "TARGET_SSE3"
13109 ;; 64bit version is "mwait %rax,%rcx". But only lower 32bits are used.
13110 ;; Since 32bit register operands are implicitly zero extended to 64bit,
13111 ;; we only need to set up 32bit registers.
13112 "mwait"
13113 [(set_attr "length" "3")])
13114
13115 (define_insn "sse3_monitor_<mode>"
13116 [(unspec_volatile [(match_operand:P 0 "register_operand" "a")
13117 (match_operand:SI 1 "register_operand" "c")
13118 (match_operand:SI 2 "register_operand" "d")]
13119 UNSPECV_MONITOR)]
13120 "TARGET_SSE3"
13121 ;; 64bit version is "monitor %rax,%rcx,%rdx". But only lower 32bits in
13122 ;; RCX and RDX are used. Since 32bit register operands are implicitly
13123 ;; zero extended to 64bit, we only need to set up 32bit registers.
13124 "%^monitor"
13125 [(set (attr "length")
13126 (symbol_ref ("(Pmode != word_mode) + 3")))])
13127
13128 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
13129 ;;
13130 ;; SSSE3 instructions
13131 ;;
13132 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
13133
13134 (define_code_iterator ssse3_plusminus [plus ss_plus minus ss_minus])
13135
13136 (define_insn "avx2_ph<plusminus_mnemonic>wv16hi3"
13137 [(set (match_operand:V16HI 0 "register_operand" "=x")
13138 (vec_concat:V16HI
13139 (vec_concat:V8HI
13140 (vec_concat:V4HI
13141 (vec_concat:V2HI
13142 (ssse3_plusminus:HI
13143 (vec_select:HI
13144 (match_operand:V16HI 1 "register_operand" "x")
13145 (parallel [(const_int 0)]))
13146 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
13147 (ssse3_plusminus:HI
13148 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
13149 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
13150 (vec_concat:V2HI
13151 (ssse3_plusminus:HI
13152 (vec_select:HI (match_dup 1) (parallel [(const_int 4)]))
13153 (vec_select:HI (match_dup 1) (parallel [(const_int 5)])))
13154 (ssse3_plusminus:HI
13155 (vec_select:HI (match_dup 1) (parallel [(const_int 6)]))
13156 (vec_select:HI (match_dup 1) (parallel [(const_int 7)])))))
13157 (vec_concat:V4HI
13158 (vec_concat:V2HI
13159 (ssse3_plusminus:HI
13160 (vec_select:HI (match_dup 1) (parallel [(const_int 8)]))
13161 (vec_select:HI (match_dup 1) (parallel [(const_int 9)])))
13162 (ssse3_plusminus:HI
13163 (vec_select:HI (match_dup 1) (parallel [(const_int 10)]))
13164 (vec_select:HI (match_dup 1) (parallel [(const_int 11)]))))
13165 (vec_concat:V2HI
13166 (ssse3_plusminus:HI
13167 (vec_select:HI (match_dup 1) (parallel [(const_int 12)]))
13168 (vec_select:HI (match_dup 1) (parallel [(const_int 13)])))
13169 (ssse3_plusminus:HI
13170 (vec_select:HI (match_dup 1) (parallel [(const_int 14)]))
13171 (vec_select:HI (match_dup 1) (parallel [(const_int 15)]))))))
13172 (vec_concat:V8HI
13173 (vec_concat:V4HI
13174 (vec_concat:V2HI
13175 (ssse3_plusminus:HI
13176 (vec_select:HI
13177 (match_operand:V16HI 2 "nonimmediate_operand" "xm")
13178 (parallel [(const_int 0)]))
13179 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
13180 (ssse3_plusminus:HI
13181 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
13182 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))
13183 (vec_concat:V2HI
13184 (ssse3_plusminus:HI
13185 (vec_select:HI (match_dup 2) (parallel [(const_int 4)]))
13186 (vec_select:HI (match_dup 2) (parallel [(const_int 5)])))
13187 (ssse3_plusminus:HI
13188 (vec_select:HI (match_dup 2) (parallel [(const_int 6)]))
13189 (vec_select:HI (match_dup 2) (parallel [(const_int 7)])))))
13190 (vec_concat:V4HI
13191 (vec_concat:V2HI
13192 (ssse3_plusminus:HI
13193 (vec_select:HI (match_dup 2) (parallel [(const_int 8)]))
13194 (vec_select:HI (match_dup 2) (parallel [(const_int 9)])))
13195 (ssse3_plusminus:HI
13196 (vec_select:HI (match_dup 2) (parallel [(const_int 10)]))
13197 (vec_select:HI (match_dup 2) (parallel [(const_int 11)]))))
13198 (vec_concat:V2HI
13199 (ssse3_plusminus:HI
13200 (vec_select:HI (match_dup 2) (parallel [(const_int 12)]))
13201 (vec_select:HI (match_dup 2) (parallel [(const_int 13)])))
13202 (ssse3_plusminus:HI
13203 (vec_select:HI (match_dup 2) (parallel [(const_int 14)]))
13204 (vec_select:HI (match_dup 2) (parallel [(const_int 15)]))))))))]
13205 "TARGET_AVX2"
13206 "vph<plusminus_mnemonic>w\t{%2, %1, %0|%0, %1, %2}"
13207 [(set_attr "type" "sseiadd")
13208 (set_attr "prefix_extra" "1")
13209 (set_attr "prefix" "vex")
13210 (set_attr "mode" "OI")])
13211
13212 (define_insn "ssse3_ph<plusminus_mnemonic>wv8hi3"
13213 [(set (match_operand:V8HI 0 "register_operand" "=x,x")
13214 (vec_concat:V8HI
13215 (vec_concat:V4HI
13216 (vec_concat:V2HI
13217 (ssse3_plusminus:HI
13218 (vec_select:HI
13219 (match_operand:V8HI 1 "register_operand" "0,x")
13220 (parallel [(const_int 0)]))
13221 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
13222 (ssse3_plusminus:HI
13223 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
13224 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
13225 (vec_concat:V2HI
13226 (ssse3_plusminus:HI
13227 (vec_select:HI (match_dup 1) (parallel [(const_int 4)]))
13228 (vec_select:HI (match_dup 1) (parallel [(const_int 5)])))
13229 (ssse3_plusminus:HI
13230 (vec_select:HI (match_dup 1) (parallel [(const_int 6)]))
13231 (vec_select:HI (match_dup 1) (parallel [(const_int 7)])))))
13232 (vec_concat:V4HI
13233 (vec_concat:V2HI
13234 (ssse3_plusminus:HI
13235 (vec_select:HI
13236 (match_operand:V8HI 2 "nonimmediate_operand" "xm,xm")
13237 (parallel [(const_int 0)]))
13238 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
13239 (ssse3_plusminus:HI
13240 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
13241 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))
13242 (vec_concat:V2HI
13243 (ssse3_plusminus:HI
13244 (vec_select:HI (match_dup 2) (parallel [(const_int 4)]))
13245 (vec_select:HI (match_dup 2) (parallel [(const_int 5)])))
13246 (ssse3_plusminus:HI
13247 (vec_select:HI (match_dup 2) (parallel [(const_int 6)]))
13248 (vec_select:HI (match_dup 2) (parallel [(const_int 7)])))))))]
13249 "TARGET_SSSE3"
13250 "@
13251 ph<plusminus_mnemonic>w\t{%2, %0|%0, %2}
13252 vph<plusminus_mnemonic>w\t{%2, %1, %0|%0, %1, %2}"
13253 [(set_attr "isa" "noavx,avx")
13254 (set_attr "type" "sseiadd")
13255 (set_attr "atom_unit" "complex")
13256 (set_attr "prefix_data16" "1,*")
13257 (set_attr "prefix_extra" "1")
13258 (set_attr "prefix" "orig,vex")
13259 (set_attr "mode" "TI")])
13260
13261 (define_insn "ssse3_ph<plusminus_mnemonic>wv4hi3"
13262 [(set (match_operand:V4HI 0 "register_operand" "=y")
13263 (vec_concat:V4HI
13264 (vec_concat:V2HI
13265 (ssse3_plusminus:HI
13266 (vec_select:HI
13267 (match_operand:V4HI 1 "register_operand" "0")
13268 (parallel [(const_int 0)]))
13269 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
13270 (ssse3_plusminus:HI
13271 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
13272 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
13273 (vec_concat:V2HI
13274 (ssse3_plusminus:HI
13275 (vec_select:HI
13276 (match_operand:V4HI 2 "nonimmediate_operand" "ym")
13277 (parallel [(const_int 0)]))
13278 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
13279 (ssse3_plusminus:HI
13280 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
13281 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))))]
13282 "TARGET_SSSE3"
13283 "ph<plusminus_mnemonic>w\t{%2, %0|%0, %2}"
13284 [(set_attr "type" "sseiadd")
13285 (set_attr "atom_unit" "complex")
13286 (set_attr "prefix_extra" "1")
13287 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
13288 (set_attr "mode" "DI")])
13289
13290 (define_insn "avx2_ph<plusminus_mnemonic>dv8si3"
13291 [(set (match_operand:V8SI 0 "register_operand" "=x")
13292 (vec_concat:V8SI
13293 (vec_concat:V4SI
13294 (vec_concat:V2SI
13295 (plusminus:SI
13296 (vec_select:SI
13297 (match_operand:V8SI 1 "register_operand" "x")
13298 (parallel [(const_int 0)]))
13299 (vec_select:SI (match_dup 1) (parallel [(const_int 1)])))
13300 (plusminus:SI
13301 (vec_select:SI (match_dup 1) (parallel [(const_int 2)]))
13302 (vec_select:SI (match_dup 1) (parallel [(const_int 3)]))))
13303 (vec_concat:V2SI
13304 (plusminus:SI
13305 (vec_select:SI (match_dup 1) (parallel [(const_int 4)]))
13306 (vec_select:SI (match_dup 1) (parallel [(const_int 5)])))
13307 (plusminus:SI
13308 (vec_select:SI (match_dup 1) (parallel [(const_int 6)]))
13309 (vec_select:SI (match_dup 1) (parallel [(const_int 7)])))))
13310 (vec_concat:V4SI
13311 (vec_concat:V2SI
13312 (plusminus:SI
13313 (vec_select:SI
13314 (match_operand:V8SI 2 "nonimmediate_operand" "xm")
13315 (parallel [(const_int 0)]))
13316 (vec_select:SI (match_dup 2) (parallel [(const_int 1)])))
13317 (plusminus:SI
13318 (vec_select:SI (match_dup 2) (parallel [(const_int 2)]))
13319 (vec_select:SI (match_dup 2) (parallel [(const_int 3)]))))
13320 (vec_concat:V2SI
13321 (plusminus:SI
13322 (vec_select:SI (match_dup 2) (parallel [(const_int 4)]))
13323 (vec_select:SI (match_dup 2) (parallel [(const_int 5)])))
13324 (plusminus:SI
13325 (vec_select:SI (match_dup 2) (parallel [(const_int 6)]))
13326 (vec_select:SI (match_dup 2) (parallel [(const_int 7)])))))))]
13327 "TARGET_AVX2"
13328 "vph<plusminus_mnemonic>d\t{%2, %1, %0|%0, %1, %2}"
13329 [(set_attr "type" "sseiadd")
13330 (set_attr "prefix_extra" "1")
13331 (set_attr "prefix" "vex")
13332 (set_attr "mode" "OI")])
13333
13334 (define_insn "ssse3_ph<plusminus_mnemonic>dv4si3"
13335 [(set (match_operand:V4SI 0 "register_operand" "=x,x")
13336 (vec_concat:V4SI
13337 (vec_concat:V2SI
13338 (plusminus:SI
13339 (vec_select:SI
13340 (match_operand:V4SI 1 "register_operand" "0,x")
13341 (parallel [(const_int 0)]))
13342 (vec_select:SI (match_dup 1) (parallel [(const_int 1)])))
13343 (plusminus:SI
13344 (vec_select:SI (match_dup 1) (parallel [(const_int 2)]))
13345 (vec_select:SI (match_dup 1) (parallel [(const_int 3)]))))
13346 (vec_concat:V2SI
13347 (plusminus:SI
13348 (vec_select:SI
13349 (match_operand:V4SI 2 "nonimmediate_operand" "xm,xm")
13350 (parallel [(const_int 0)]))
13351 (vec_select:SI (match_dup 2) (parallel [(const_int 1)])))
13352 (plusminus:SI
13353 (vec_select:SI (match_dup 2) (parallel [(const_int 2)]))
13354 (vec_select:SI (match_dup 2) (parallel [(const_int 3)]))))))]
13355 "TARGET_SSSE3"
13356 "@
13357 ph<plusminus_mnemonic>d\t{%2, %0|%0, %2}
13358 vph<plusminus_mnemonic>d\t{%2, %1, %0|%0, %1, %2}"
13359 [(set_attr "isa" "noavx,avx")
13360 (set_attr "type" "sseiadd")
13361 (set_attr "atom_unit" "complex")
13362 (set_attr "prefix_data16" "1,*")
13363 (set_attr "prefix_extra" "1")
13364 (set_attr "prefix" "orig,vex")
13365 (set_attr "mode" "TI")])
13366
13367 (define_insn "ssse3_ph<plusminus_mnemonic>dv2si3"
13368 [(set (match_operand:V2SI 0 "register_operand" "=y")
13369 (vec_concat:V2SI
13370 (plusminus:SI
13371 (vec_select:SI
13372 (match_operand:V2SI 1 "register_operand" "0")
13373 (parallel [(const_int 0)]))
13374 (vec_select:SI (match_dup 1) (parallel [(const_int 1)])))
13375 (plusminus:SI
13376 (vec_select:SI
13377 (match_operand:V2SI 2 "nonimmediate_operand" "ym")
13378 (parallel [(const_int 0)]))
13379 (vec_select:SI (match_dup 2) (parallel [(const_int 1)])))))]
13380 "TARGET_SSSE3"
13381 "ph<plusminus_mnemonic>d\t{%2, %0|%0, %2}"
13382 [(set_attr "type" "sseiadd")
13383 (set_attr "atom_unit" "complex")
13384 (set_attr "prefix_extra" "1")
13385 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
13386 (set_attr "mode" "DI")])
13387
13388 (define_insn "avx2_pmaddubsw256"
13389 [(set (match_operand:V16HI 0 "register_operand" "=x")
13390 (ss_plus:V16HI
13391 (mult:V16HI
13392 (zero_extend:V16HI
13393 (vec_select:V16QI
13394 (match_operand:V32QI 1 "register_operand" "x")
13395 (parallel [(const_int 0) (const_int 2)
13396 (const_int 4) (const_int 6)
13397 (const_int 8) (const_int 10)
13398 (const_int 12) (const_int 14)
13399 (const_int 16) (const_int 18)
13400 (const_int 20) (const_int 22)
13401 (const_int 24) (const_int 26)
13402 (const_int 28) (const_int 30)])))
13403 (sign_extend:V16HI
13404 (vec_select:V16QI
13405 (match_operand:V32QI 2 "nonimmediate_operand" "xm")
13406 (parallel [(const_int 0) (const_int 2)
13407 (const_int 4) (const_int 6)
13408 (const_int 8) (const_int 10)
13409 (const_int 12) (const_int 14)
13410 (const_int 16) (const_int 18)
13411 (const_int 20) (const_int 22)
13412 (const_int 24) (const_int 26)
13413 (const_int 28) (const_int 30)]))))
13414 (mult:V16HI
13415 (zero_extend:V16HI
13416 (vec_select:V16QI (match_dup 1)
13417 (parallel [(const_int 1) (const_int 3)
13418 (const_int 5) (const_int 7)
13419 (const_int 9) (const_int 11)
13420 (const_int 13) (const_int 15)
13421 (const_int 17) (const_int 19)
13422 (const_int 21) (const_int 23)
13423 (const_int 25) (const_int 27)
13424 (const_int 29) (const_int 31)])))
13425 (sign_extend:V16HI
13426 (vec_select:V16QI (match_dup 2)
13427 (parallel [(const_int 1) (const_int 3)
13428 (const_int 5) (const_int 7)
13429 (const_int 9) (const_int 11)
13430 (const_int 13) (const_int 15)
13431 (const_int 17) (const_int 19)
13432 (const_int 21) (const_int 23)
13433 (const_int 25) (const_int 27)
13434 (const_int 29) (const_int 31)]))))))]
13435 "TARGET_AVX2"
13436 "vpmaddubsw\t{%2, %1, %0|%0, %1, %2}"
13437 [(set_attr "type" "sseiadd")
13438 (set_attr "prefix_extra" "1")
13439 (set_attr "prefix" "vex")
13440 (set_attr "mode" "OI")])
13441
13442 ;; The correct representation for this is absolutely enormous, and
13443 ;; surely not generally useful.
13444 (define_insn "avx512bw_pmaddubsw512<mode><mask_name>"
13445 [(set (match_operand:VI2_AVX512VL 0 "register_operand" "=v")
13446 (unspec:VI2_AVX512VL
13447 [(match_operand:<dbpsadbwmode> 1 "register_operand" "v")
13448 (match_operand:<dbpsadbwmode> 2 "nonimmediate_operand" "vm")]
13449 UNSPEC_PMADDUBSW512))]
13450 "TARGET_AVX512BW"
13451 "vpmaddubsw\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}";
13452 [(set_attr "type" "sseiadd")
13453 (set_attr "prefix" "evex")
13454 (set_attr "mode" "XI")])
13455
13456 (define_insn "avx512bw_umulhrswv32hi3<mask_name>"
13457 [(set (match_operand:V32HI 0 "register_operand" "=v")
13458 (truncate:V32HI
13459 (lshiftrt:V32SI
13460 (plus:V32SI
13461 (lshiftrt:V32SI
13462 (mult:V32SI
13463 (sign_extend:V32SI
13464 (match_operand:V32HI 1 "nonimmediate_operand" "%v"))
13465 (sign_extend:V32SI
13466 (match_operand:V32HI 2 "nonimmediate_operand" "vm")))
13467 (const_int 14))
13468 (const_vector:V32HI [(const_int 1) (const_int 1)
13469 (const_int 1) (const_int 1)
13470 (const_int 1) (const_int 1)
13471 (const_int 1) (const_int 1)
13472 (const_int 1) (const_int 1)
13473 (const_int 1) (const_int 1)
13474 (const_int 1) (const_int 1)
13475 (const_int 1) (const_int 1)
13476 (const_int 1) (const_int 1)
13477 (const_int 1) (const_int 1)
13478 (const_int 1) (const_int 1)
13479 (const_int 1) (const_int 1)
13480 (const_int 1) (const_int 1)
13481 (const_int 1) (const_int 1)
13482 (const_int 1) (const_int 1)
13483 (const_int 1) (const_int 1)]))
13484 (const_int 1))))]
13485 "TARGET_AVX512BW"
13486 "vpmulhrsw\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
13487 [(set_attr "type" "sseimul")
13488 (set_attr "prefix" "evex")
13489 (set_attr "mode" "XI")])
13490
13491 (define_insn "ssse3_pmaddubsw128"
13492 [(set (match_operand:V8HI 0 "register_operand" "=x,x")
13493 (ss_plus:V8HI
13494 (mult:V8HI
13495 (zero_extend:V8HI
13496 (vec_select:V8QI
13497 (match_operand:V16QI 1 "register_operand" "0,x")
13498 (parallel [(const_int 0) (const_int 2)
13499 (const_int 4) (const_int 6)
13500 (const_int 8) (const_int 10)
13501 (const_int 12) (const_int 14)])))
13502 (sign_extend:V8HI
13503 (vec_select:V8QI
13504 (match_operand:V16QI 2 "nonimmediate_operand" "xm,xm")
13505 (parallel [(const_int 0) (const_int 2)
13506 (const_int 4) (const_int 6)
13507 (const_int 8) (const_int 10)
13508 (const_int 12) (const_int 14)]))))
13509 (mult:V8HI
13510 (zero_extend:V8HI
13511 (vec_select:V8QI (match_dup 1)
13512 (parallel [(const_int 1) (const_int 3)
13513 (const_int 5) (const_int 7)
13514 (const_int 9) (const_int 11)
13515 (const_int 13) (const_int 15)])))
13516 (sign_extend:V8HI
13517 (vec_select:V8QI (match_dup 2)
13518 (parallel [(const_int 1) (const_int 3)
13519 (const_int 5) (const_int 7)
13520 (const_int 9) (const_int 11)
13521 (const_int 13) (const_int 15)]))))))]
13522 "TARGET_SSSE3"
13523 "@
13524 pmaddubsw\t{%2, %0|%0, %2}
13525 vpmaddubsw\t{%2, %1, %0|%0, %1, %2}"
13526 [(set_attr "isa" "noavx,avx")
13527 (set_attr "type" "sseiadd")
13528 (set_attr "atom_unit" "simul")
13529 (set_attr "prefix_data16" "1,*")
13530 (set_attr "prefix_extra" "1")
13531 (set_attr "prefix" "orig,vex")
13532 (set_attr "mode" "TI")])
13533
13534 (define_insn "ssse3_pmaddubsw"
13535 [(set (match_operand:V4HI 0 "register_operand" "=y")
13536 (ss_plus:V4HI
13537 (mult:V4HI
13538 (zero_extend:V4HI
13539 (vec_select:V4QI
13540 (match_operand:V8QI 1 "register_operand" "0")
13541 (parallel [(const_int 0) (const_int 2)
13542 (const_int 4) (const_int 6)])))
13543 (sign_extend:V4HI
13544 (vec_select:V4QI
13545 (match_operand:V8QI 2 "nonimmediate_operand" "ym")
13546 (parallel [(const_int 0) (const_int 2)
13547 (const_int 4) (const_int 6)]))))
13548 (mult:V4HI
13549 (zero_extend:V4HI
13550 (vec_select:V4QI (match_dup 1)
13551 (parallel [(const_int 1) (const_int 3)
13552 (const_int 5) (const_int 7)])))
13553 (sign_extend:V4HI
13554 (vec_select:V4QI (match_dup 2)
13555 (parallel [(const_int 1) (const_int 3)
13556 (const_int 5) (const_int 7)]))))))]
13557 "TARGET_SSSE3"
13558 "pmaddubsw\t{%2, %0|%0, %2}"
13559 [(set_attr "type" "sseiadd")
13560 (set_attr "atom_unit" "simul")
13561 (set_attr "prefix_extra" "1")
13562 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
13563 (set_attr "mode" "DI")])
13564
13565 (define_mode_iterator PMULHRSW
13566 [V4HI V8HI (V16HI "TARGET_AVX2")])
13567
13568 (define_expand "<ssse3_avx2>_pmulhrsw<mode>3_mask"
13569 [(set (match_operand:PMULHRSW 0 "register_operand")
13570 (vec_merge:PMULHRSW
13571 (truncate:PMULHRSW
13572 (lshiftrt:<ssedoublemode>
13573 (plus:<ssedoublemode>
13574 (lshiftrt:<ssedoublemode>
13575 (mult:<ssedoublemode>
13576 (sign_extend:<ssedoublemode>
13577 (match_operand:PMULHRSW 1 "nonimmediate_operand"))
13578 (sign_extend:<ssedoublemode>
13579 (match_operand:PMULHRSW 2 "nonimmediate_operand")))
13580 (const_int 14))
13581 (match_dup 5))
13582 (const_int 1)))
13583 (match_operand:PMULHRSW 3 "register_operand")
13584 (match_operand:<avx512fmaskmode> 4 "register_operand")))]
13585 "TARGET_AVX512BW && TARGET_AVX512VL"
13586 {
13587 operands[5] = CONST1_RTX(<MODE>mode);
13588 ix86_fixup_binary_operands_no_copy (MULT, <MODE>mode, operands);
13589 })
13590
13591 (define_expand "<ssse3_avx2>_pmulhrsw<mode>3"
13592 [(set (match_operand:PMULHRSW 0 "register_operand")
13593 (truncate:PMULHRSW
13594 (lshiftrt:<ssedoublemode>
13595 (plus:<ssedoublemode>
13596 (lshiftrt:<ssedoublemode>
13597 (mult:<ssedoublemode>
13598 (sign_extend:<ssedoublemode>
13599 (match_operand:PMULHRSW 1 "nonimmediate_operand"))
13600 (sign_extend:<ssedoublemode>
13601 (match_operand:PMULHRSW 2 "nonimmediate_operand")))
13602 (const_int 14))
13603 (match_dup 3))
13604 (const_int 1))))]
13605 "TARGET_AVX2"
13606 {
13607 operands[3] = CONST1_RTX(<MODE>mode);
13608 ix86_fixup_binary_operands_no_copy (MULT, <MODE>mode, operands);
13609 })
13610
13611 (define_insn "*<ssse3_avx2>_pmulhrsw<mode>3<mask_name>"
13612 [(set (match_operand:VI2_AVX2 0 "register_operand" "=x,v")
13613 (truncate:VI2_AVX2
13614 (lshiftrt:<ssedoublemode>
13615 (plus:<ssedoublemode>
13616 (lshiftrt:<ssedoublemode>
13617 (mult:<ssedoublemode>
13618 (sign_extend:<ssedoublemode>
13619 (match_operand:VI2_AVX2 1 "nonimmediate_operand" "%0,v"))
13620 (sign_extend:<ssedoublemode>
13621 (match_operand:VI2_AVX2 2 "nonimmediate_operand" "xm,vm")))
13622 (const_int 14))
13623 (match_operand:VI2_AVX2 3 "const1_operand"))
13624 (const_int 1))))]
13625 "TARGET_SSSE3 && <mask_mode512bit_condition> && <mask_avx512bw_condition>
13626 && ix86_binary_operator_ok (MULT, <MODE>mode, operands)"
13627 "@
13628 pmulhrsw\t{%2, %0|%0, %2}
13629 vpmulhrsw\t{%2, %1, %0<mask_operand4>|%0<mask_operand4>, %1, %2}"
13630 [(set_attr "isa" "noavx,avx")
13631 (set_attr "type" "sseimul")
13632 (set_attr "prefix_data16" "1,*")
13633 (set_attr "prefix_extra" "1")
13634 (set_attr "prefix" "orig,maybe_evex")
13635 (set_attr "mode" "<sseinsnmode>")])
13636
13637 (define_insn "*ssse3_pmulhrswv4hi3"
13638 [(set (match_operand:V4HI 0 "register_operand" "=y")
13639 (truncate:V4HI
13640 (lshiftrt:V4SI
13641 (plus:V4SI
13642 (lshiftrt:V4SI
13643 (mult:V4SI
13644 (sign_extend:V4SI
13645 (match_operand:V4HI 1 "nonimmediate_operand" "%0"))
13646 (sign_extend:V4SI
13647 (match_operand:V4HI 2 "nonimmediate_operand" "ym")))
13648 (const_int 14))
13649 (match_operand:V4HI 3 "const1_operand"))
13650 (const_int 1))))]
13651 "TARGET_SSSE3 && ix86_binary_operator_ok (MULT, V4HImode, operands)"
13652 "pmulhrsw\t{%2, %0|%0, %2}"
13653 [(set_attr "type" "sseimul")
13654 (set_attr "prefix_extra" "1")
13655 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
13656 (set_attr "mode" "DI")])
13657
13658 (define_insn "<ssse3_avx2>_pshufb<mode>3<mask_name>"
13659 [(set (match_operand:VI1_AVX512 0 "register_operand" "=x,v")
13660 (unspec:VI1_AVX512
13661 [(match_operand:VI1_AVX512 1 "register_operand" "0,v")
13662 (match_operand:VI1_AVX512 2 "nonimmediate_operand" "xm,vm")]
13663 UNSPEC_PSHUFB))]
13664 "TARGET_SSSE3 && <mask_mode512bit_condition> && <mask_avx512bw_condition>"
13665 "@
13666 pshufb\t{%2, %0|%0, %2}
13667 vpshufb\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
13668 [(set_attr "isa" "noavx,avx")
13669 (set_attr "type" "sselog1")
13670 (set_attr "prefix_data16" "1,*")
13671 (set_attr "prefix_extra" "1")
13672 (set_attr "prefix" "orig,maybe_evex")
13673 (set_attr "btver2_decode" "vector,vector")
13674 (set_attr "mode" "<sseinsnmode>")])
13675
13676 (define_insn "ssse3_pshufbv8qi3"
13677 [(set (match_operand:V8QI 0 "register_operand" "=y")
13678 (unspec:V8QI [(match_operand:V8QI 1 "register_operand" "0")
13679 (match_operand:V8QI 2 "nonimmediate_operand" "ym")]
13680 UNSPEC_PSHUFB))]
13681 "TARGET_SSSE3"
13682 "pshufb\t{%2, %0|%0, %2}";
13683 [(set_attr "type" "sselog1")
13684 (set_attr "prefix_extra" "1")
13685 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
13686 (set_attr "mode" "DI")])
13687
13688 (define_insn "<ssse3_avx2>_psign<mode>3"
13689 [(set (match_operand:VI124_AVX2 0 "register_operand" "=x,x")
13690 (unspec:VI124_AVX2
13691 [(match_operand:VI124_AVX2 1 "register_operand" "0,x")
13692 (match_operand:VI124_AVX2 2 "nonimmediate_operand" "xm,xm")]
13693 UNSPEC_PSIGN))]
13694 "TARGET_SSSE3"
13695 "@
13696 psign<ssemodesuffix>\t{%2, %0|%0, %2}
13697 vpsign<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
13698 [(set_attr "isa" "noavx,avx")
13699 (set_attr "type" "sselog1")
13700 (set_attr "prefix_data16" "1,*")
13701 (set_attr "prefix_extra" "1")
13702 (set_attr "prefix" "orig,vex")
13703 (set_attr "mode" "<sseinsnmode>")])
13704
13705 (define_insn "ssse3_psign<mode>3"
13706 [(set (match_operand:MMXMODEI 0 "register_operand" "=y")
13707 (unspec:MMXMODEI
13708 [(match_operand:MMXMODEI 1 "register_operand" "0")
13709 (match_operand:MMXMODEI 2 "nonimmediate_operand" "ym")]
13710 UNSPEC_PSIGN))]
13711 "TARGET_SSSE3"
13712 "psign<mmxvecsize>\t{%2, %0|%0, %2}";
13713 [(set_attr "type" "sselog1")
13714 (set_attr "prefix_extra" "1")
13715 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
13716 (set_attr "mode" "DI")])
13717
13718 (define_insn "<ssse3_avx2>_palignr<mode>_mask"
13719 [(set (match_operand:VI1_AVX512 0 "register_operand" "=v")
13720 (vec_merge:VI1_AVX512
13721 (unspec:VI1_AVX512
13722 [(match_operand:VI1_AVX512 1 "register_operand" "v")
13723 (match_operand:VI1_AVX512 2 "nonimmediate_operand" "vm")
13724 (match_operand:SI 3 "const_0_to_255_mul_8_operand" "n")]
13725 UNSPEC_PALIGNR)
13726 (match_operand:VI1_AVX512 4 "vector_move_operand" "0C")
13727 (match_operand:<avx512fmaskmode> 5 "register_operand" "Yk")))]
13728 "TARGET_AVX512BW && (<MODE_SIZE> == 64 || TARGET_AVX512VL)"
13729 {
13730 operands[3] = GEN_INT (INTVAL (operands[3]) / 8);
13731 return "vpalignr\t{%3, %2, %1, %0%{%5%}%N4|%0%{%5%}%N4, %1, %2, %3}";
13732 }
13733 [(set_attr "type" "sseishft")
13734 (set_attr "atom_unit" "sishuf")
13735 (set_attr "prefix_extra" "1")
13736 (set_attr "length_immediate" "1")
13737 (set_attr "prefix" "evex")
13738 (set_attr "mode" "<sseinsnmode>")])
13739
13740 (define_insn "<ssse3_avx2>_palignr<mode>"
13741 [(set (match_operand:SSESCALARMODE 0 "register_operand" "=x,v")
13742 (unspec:SSESCALARMODE
13743 [(match_operand:SSESCALARMODE 1 "register_operand" "0,v")
13744 (match_operand:SSESCALARMODE 2 "nonimmediate_operand" "xm,vm")
13745 (match_operand:SI 3 "const_0_to_255_mul_8_operand" "n,n")]
13746 UNSPEC_PALIGNR))]
13747 "TARGET_SSSE3"
13748 {
13749 operands[3] = GEN_INT (INTVAL (operands[3]) / 8);
13750
13751 switch (which_alternative)
13752 {
13753 case 0:
13754 return "palignr\t{%3, %2, %0|%0, %2, %3}";
13755 case 1:
13756 return "vpalignr\t{%3, %2, %1, %0|%0, %1, %2, %3}";
13757 default:
13758 gcc_unreachable ();
13759 }
13760 }
13761 [(set_attr "isa" "noavx,avx")
13762 (set_attr "type" "sseishft")
13763 (set_attr "atom_unit" "sishuf")
13764 (set_attr "prefix_data16" "1,*")
13765 (set_attr "prefix_extra" "1")
13766 (set_attr "length_immediate" "1")
13767 (set_attr "prefix" "orig,vex")
13768 (set_attr "mode" "<sseinsnmode>")])
13769
13770 (define_insn "ssse3_palignrdi"
13771 [(set (match_operand:DI 0 "register_operand" "=y")
13772 (unspec:DI [(match_operand:DI 1 "register_operand" "0")
13773 (match_operand:DI 2 "nonimmediate_operand" "ym")
13774 (match_operand:SI 3 "const_0_to_255_mul_8_operand" "n")]
13775 UNSPEC_PALIGNR))]
13776 "TARGET_SSSE3"
13777 {
13778 operands[3] = GEN_INT (INTVAL (operands[3]) / 8);
13779 return "palignr\t{%3, %2, %0|%0, %2, %3}";
13780 }
13781 [(set_attr "type" "sseishft")
13782 (set_attr "atom_unit" "sishuf")
13783 (set_attr "prefix_extra" "1")
13784 (set_attr "length_immediate" "1")
13785 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
13786 (set_attr "mode" "DI")])
13787
13788 ;; Mode iterator to handle singularity w/ absence of V2DI and V4DI
13789 ;; modes for abs instruction on pre AVX-512 targets.
13790 (define_mode_iterator VI1248_AVX512VL_AVX512BW
13791 [(V64QI "TARGET_AVX512BW") (V32QI "TARGET_AVX2") V16QI
13792 (V32HI "TARGET_AVX512BW") (V16HI "TARGET_AVX2") V8HI
13793 (V16SI "TARGET_AVX512F") (V8SI "TARGET_AVX2") V4SI
13794 (V8DI "TARGET_AVX512F") (V4DI "TARGET_AVX512VL") (V2DI "TARGET_AVX512VL")])
13795
13796 (define_insn "*abs<mode>2"
13797 [(set (match_operand:VI1248_AVX512VL_AVX512BW 0 "register_operand" "=v")
13798 (abs:VI1248_AVX512VL_AVX512BW
13799 (match_operand:VI1248_AVX512VL_AVX512BW 1 "nonimmediate_operand" "vm")))]
13800 "TARGET_SSSE3"
13801 "%vpabs<ssemodesuffix>\t{%1, %0|%0, %1}"
13802 [(set_attr "type" "sselog1")
13803 (set_attr "prefix_data16" "1")
13804 (set_attr "prefix_extra" "1")
13805 (set_attr "prefix" "maybe_vex")
13806 (set_attr "mode" "<sseinsnmode>")])
13807
13808 (define_insn "abs<mode>2_mask"
13809 [(set (match_operand:VI48_AVX512VL 0 "register_operand" "=v")
13810 (vec_merge:VI48_AVX512VL
13811 (abs:VI48_AVX512VL
13812 (match_operand:VI48_AVX512VL 1 "nonimmediate_operand" "vm"))
13813 (match_operand:VI48_AVX512VL 2 "vector_move_operand" "0C")
13814 (match_operand:<avx512fmaskmode> 3 "register_operand" "Yk")))]
13815 "TARGET_AVX512F"
13816 "vpabs<ssemodesuffix>\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}"
13817 [(set_attr "type" "sselog1")
13818 (set_attr "prefix" "evex")
13819 (set_attr "mode" "<sseinsnmode>")])
13820
13821 (define_insn "abs<mode>2_mask"
13822 [(set (match_operand:VI12_AVX512VL 0 "register_operand" "=v")
13823 (vec_merge:VI12_AVX512VL
13824 (abs:VI12_AVX512VL
13825 (match_operand:VI12_AVX512VL 1 "nonimmediate_operand" "vm"))
13826 (match_operand:VI12_AVX512VL 2 "vector_move_operand" "0C")
13827 (match_operand:<avx512fmaskmode> 3 "register_operand" "Yk")))]
13828 "TARGET_AVX512BW"
13829 "vpabs<ssemodesuffix>\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}"
13830 [(set_attr "type" "sselog1")
13831 (set_attr "prefix" "evex")
13832 (set_attr "mode" "<sseinsnmode>")])
13833
13834 (define_expand "abs<mode>2"
13835 [(set (match_operand:VI1248_AVX512VL_AVX512BW 0 "register_operand")
13836 (abs:VI1248_AVX512VL_AVX512BW
13837 (match_operand:VI1248_AVX512VL_AVX512BW 1 "nonimmediate_operand")))]
13838 "TARGET_SSE2"
13839 {
13840 if (!TARGET_SSSE3)
13841 {
13842 ix86_expand_sse2_abs (operands[0], operands[1]);
13843 DONE;
13844 }
13845 })
13846
13847 (define_insn "abs<mode>2"
13848 [(set (match_operand:MMXMODEI 0 "register_operand" "=y")
13849 (abs:MMXMODEI
13850 (match_operand:MMXMODEI 1 "nonimmediate_operand" "ym")))]
13851 "TARGET_SSSE3"
13852 "pabs<mmxvecsize>\t{%1, %0|%0, %1}";
13853 [(set_attr "type" "sselog1")
13854 (set_attr "prefix_rep" "0")
13855 (set_attr "prefix_extra" "1")
13856 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
13857 (set_attr "mode" "DI")])
13858
13859 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
13860 ;;
13861 ;; AMD SSE4A instructions
13862 ;;
13863 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
13864
13865 (define_insn "sse4a_movnt<mode>"
13866 [(set (match_operand:MODEF 0 "memory_operand" "=m")
13867 (unspec:MODEF
13868 [(match_operand:MODEF 1 "register_operand" "x")]
13869 UNSPEC_MOVNT))]
13870 "TARGET_SSE4A"
13871 "movnt<ssemodesuffix>\t{%1, %0|%0, %1}"
13872 [(set_attr "type" "ssemov")
13873 (set_attr "mode" "<MODE>")])
13874
13875 (define_insn "sse4a_vmmovnt<mode>"
13876 [(set (match_operand:<ssescalarmode> 0 "memory_operand" "=m")
13877 (unspec:<ssescalarmode>
13878 [(vec_select:<ssescalarmode>
13879 (match_operand:VF_128 1 "register_operand" "x")
13880 (parallel [(const_int 0)]))]
13881 UNSPEC_MOVNT))]
13882 "TARGET_SSE4A"
13883 "movnt<ssescalarmodesuffix>\t{%1, %0|%0, %1}"
13884 [(set_attr "type" "ssemov")
13885 (set_attr "mode" "<ssescalarmode>")])
13886
13887 (define_insn "sse4a_extrqi"
13888 [(set (match_operand:V2DI 0 "register_operand" "=x")
13889 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
13890 (match_operand 2 "const_0_to_255_operand")
13891 (match_operand 3 "const_0_to_255_operand")]
13892 UNSPEC_EXTRQI))]
13893 "TARGET_SSE4A"
13894 "extrq\t{%3, %2, %0|%0, %2, %3}"
13895 [(set_attr "type" "sse")
13896 (set_attr "prefix_data16" "1")
13897 (set_attr "length_immediate" "2")
13898 (set_attr "mode" "TI")])
13899
13900 (define_insn "sse4a_extrq"
13901 [(set (match_operand:V2DI 0 "register_operand" "=x")
13902 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
13903 (match_operand:V16QI 2 "register_operand" "x")]
13904 UNSPEC_EXTRQ))]
13905 "TARGET_SSE4A"
13906 "extrq\t{%2, %0|%0, %2}"
13907 [(set_attr "type" "sse")
13908 (set_attr "prefix_data16" "1")
13909 (set_attr "mode" "TI")])
13910
13911 (define_insn "sse4a_insertqi"
13912 [(set (match_operand:V2DI 0 "register_operand" "=x")
13913 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
13914 (match_operand:V2DI 2 "register_operand" "x")
13915 (match_operand 3 "const_0_to_255_operand")
13916 (match_operand 4 "const_0_to_255_operand")]
13917 UNSPEC_INSERTQI))]
13918 "TARGET_SSE4A"
13919 "insertq\t{%4, %3, %2, %0|%0, %2, %3, %4}"
13920 [(set_attr "type" "sseins")
13921 (set_attr "prefix_data16" "0")
13922 (set_attr "prefix_rep" "1")
13923 (set_attr "length_immediate" "2")
13924 (set_attr "mode" "TI")])
13925
13926 (define_insn "sse4a_insertq"
13927 [(set (match_operand:V2DI 0 "register_operand" "=x")
13928 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
13929 (match_operand:V2DI 2 "register_operand" "x")]
13930 UNSPEC_INSERTQ))]
13931 "TARGET_SSE4A"
13932 "insertq\t{%2, %0|%0, %2}"
13933 [(set_attr "type" "sseins")
13934 (set_attr "prefix_data16" "0")
13935 (set_attr "prefix_rep" "1")
13936 (set_attr "mode" "TI")])
13937
13938 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
13939 ;;
13940 ;; Intel SSE4.1 instructions
13941 ;;
13942 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
13943
13944 ;; Mapping of immediate bits for blend instructions
13945 (define_mode_attr blendbits
13946 [(V8SF "255") (V4SF "15") (V4DF "15") (V2DF "3")])
13947
13948 (define_insn "<sse4_1>_blend<ssemodesuffix><avxsizesuffix>"
13949 [(set (match_operand:VF_128_256 0 "register_operand" "=x,x")
13950 (vec_merge:VF_128_256
13951 (match_operand:VF_128_256 2 "nonimmediate_operand" "xm,xm")
13952 (match_operand:VF_128_256 1 "register_operand" "0,x")
13953 (match_operand:SI 3 "const_0_to_<blendbits>_operand")))]
13954 "TARGET_SSE4_1"
13955 "@
13956 blend<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
13957 vblend<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
13958 [(set_attr "isa" "noavx,avx")
13959 (set_attr "type" "ssemov")
13960 (set_attr "length_immediate" "1")
13961 (set_attr "prefix_data16" "1,*")
13962 (set_attr "prefix_extra" "1")
13963 (set_attr "prefix" "orig,vex")
13964 (set_attr "mode" "<MODE>")])
13965
13966 (define_insn "<sse4_1>_blendv<ssemodesuffix><avxsizesuffix>"
13967 [(set (match_operand:VF_128_256 0 "register_operand" "=x,x")
13968 (unspec:VF_128_256
13969 [(match_operand:VF_128_256 1 "register_operand" "0,x")
13970 (match_operand:VF_128_256 2 "nonimmediate_operand" "xm,xm")
13971 (match_operand:VF_128_256 3 "register_operand" "Yz,x")]
13972 UNSPEC_BLENDV))]
13973 "TARGET_SSE4_1"
13974 "@
13975 blendv<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
13976 vblendv<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
13977 [(set_attr "isa" "noavx,avx")
13978 (set_attr "type" "ssemov")
13979 (set_attr "length_immediate" "1")
13980 (set_attr "prefix_data16" "1,*")
13981 (set_attr "prefix_extra" "1")
13982 (set_attr "prefix" "orig,vex")
13983 (set_attr "btver2_decode" "vector,vector")
13984 (set_attr "mode" "<MODE>")])
13985
13986 (define_insn "<sse4_1>_dp<ssemodesuffix><avxsizesuffix>"
13987 [(set (match_operand:VF_128_256 0 "register_operand" "=x,x")
13988 (unspec:VF_128_256
13989 [(match_operand:VF_128_256 1 "nonimmediate_operand" "%0,x")
13990 (match_operand:VF_128_256 2 "nonimmediate_operand" "xm,xm")
13991 (match_operand:SI 3 "const_0_to_255_operand" "n,n")]
13992 UNSPEC_DP))]
13993 "TARGET_SSE4_1"
13994 "@
13995 dp<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
13996 vdp<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
13997 [(set_attr "isa" "noavx,avx")
13998 (set_attr "type" "ssemul")
13999 (set_attr "length_immediate" "1")
14000 (set_attr "prefix_data16" "1,*")
14001 (set_attr "prefix_extra" "1")
14002 (set_attr "prefix" "orig,vex")
14003 (set_attr "btver2_decode" "vector,vector")
14004 (set_attr "mode" "<MODE>")])
14005
14006 ;; Mode attribute used by `vmovntdqa' pattern
14007 (define_mode_attr vi8_sse4_1_avx2_avx512
14008 [(V2DI "sse4_1") (V4DI "avx2") (V8DI "avx512f")])
14009
14010 (define_insn "<vi8_sse4_1_avx2_avx512>_movntdqa"
14011 [(set (match_operand:VI8_AVX2_AVX512F 0 "register_operand" "=x, v")
14012 (unspec:VI8_AVX2_AVX512F [(match_operand:VI8_AVX2_AVX512F 1 "memory_operand" "m, m")]
14013 UNSPEC_MOVNTDQA))]
14014 "TARGET_SSE4_1"
14015 "%vmovntdqa\t{%1, %0|%0, %1}"
14016 [(set_attr "type" "ssemov")
14017 (set_attr "prefix_extra" "1, *")
14018 (set_attr "prefix" "maybe_vex, evex")
14019 (set_attr "mode" "<sseinsnmode>")])
14020
14021 (define_insn "<sse4_1_avx2>_mpsadbw"
14022 [(set (match_operand:VI1_AVX2 0 "register_operand" "=x,x")
14023 (unspec:VI1_AVX2
14024 [(match_operand:VI1_AVX2 1 "register_operand" "0,x")
14025 (match_operand:VI1_AVX2 2 "nonimmediate_operand" "xm,xm")
14026 (match_operand:SI 3 "const_0_to_255_operand" "n,n")]
14027 UNSPEC_MPSADBW))]
14028 "TARGET_SSE4_1"
14029 "@
14030 mpsadbw\t{%3, %2, %0|%0, %2, %3}
14031 vmpsadbw\t{%3, %2, %1, %0|%0, %1, %2, %3}"
14032 [(set_attr "isa" "noavx,avx")
14033 (set_attr "type" "sselog1")
14034 (set_attr "length_immediate" "1")
14035 (set_attr "prefix_extra" "1")
14036 (set_attr "prefix" "orig,vex")
14037 (set_attr "btver2_decode" "vector,vector")
14038 (set_attr "mode" "<sseinsnmode>")])
14039
14040 (define_insn "<sse4_1_avx2>_packusdw<mask_name>"
14041 [(set (match_operand:VI2_AVX2 0 "register_operand" "=x,v")
14042 (vec_concat:VI2_AVX2
14043 (us_truncate:<ssehalfvecmode>
14044 (match_operand:<sseunpackmode> 1 "register_operand" "0,v"))
14045 (us_truncate:<ssehalfvecmode>
14046 (match_operand:<sseunpackmode> 2 "nonimmediate_operand" "xm,vm"))))]
14047 "TARGET_SSE4_1 && <mask_mode512bit_condition> && <mask_avx512bw_condition>"
14048 "@
14049 packusdw\t{%2, %0|%0, %2}
14050 vpackusdw\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
14051 [(set_attr "isa" "noavx,avx")
14052 (set_attr "type" "sselog")
14053 (set_attr "prefix_extra" "1")
14054 (set_attr "prefix" "orig,maybe_evex")
14055 (set_attr "mode" "<sseinsnmode>")])
14056
14057 (define_insn "<sse4_1_avx2>_pblendvb"
14058 [(set (match_operand:VI1_AVX2 0 "register_operand" "=x,x")
14059 (unspec:VI1_AVX2
14060 [(match_operand:VI1_AVX2 1 "register_operand" "0,x")
14061 (match_operand:VI1_AVX2 2 "nonimmediate_operand" "xm,xm")
14062 (match_operand:VI1_AVX2 3 "register_operand" "Yz,x")]
14063 UNSPEC_BLENDV))]
14064 "TARGET_SSE4_1"
14065 "@
14066 pblendvb\t{%3, %2, %0|%0, %2, %3}
14067 vpblendvb\t{%3, %2, %1, %0|%0, %1, %2, %3}"
14068 [(set_attr "isa" "noavx,avx")
14069 (set_attr "type" "ssemov")
14070 (set_attr "prefix_extra" "1")
14071 (set_attr "length_immediate" "*,1")
14072 (set_attr "prefix" "orig,vex")
14073 (set_attr "btver2_decode" "vector,vector")
14074 (set_attr "mode" "<sseinsnmode>")])
14075
14076 (define_insn "sse4_1_pblendw"
14077 [(set (match_operand:V8HI 0 "register_operand" "=x,x")
14078 (vec_merge:V8HI
14079 (match_operand:V8HI 2 "nonimmediate_operand" "xm,xm")
14080 (match_operand:V8HI 1 "register_operand" "0,x")
14081 (match_operand:SI 3 "const_0_to_255_operand" "n,n")))]
14082 "TARGET_SSE4_1"
14083 "@
14084 pblendw\t{%3, %2, %0|%0, %2, %3}
14085 vpblendw\t{%3, %2, %1, %0|%0, %1, %2, %3}"
14086 [(set_attr "isa" "noavx,avx")
14087 (set_attr "type" "ssemov")
14088 (set_attr "prefix_extra" "1")
14089 (set_attr "length_immediate" "1")
14090 (set_attr "prefix" "orig,vex")
14091 (set_attr "mode" "TI")])
14092
14093 ;; The builtin uses an 8-bit immediate. Expand that.
14094 (define_expand "avx2_pblendw"
14095 [(set (match_operand:V16HI 0 "register_operand")
14096 (vec_merge:V16HI
14097 (match_operand:V16HI 2 "nonimmediate_operand")
14098 (match_operand:V16HI 1 "register_operand")
14099 (match_operand:SI 3 "const_0_to_255_operand")))]
14100 "TARGET_AVX2"
14101 {
14102 HOST_WIDE_INT val = INTVAL (operands[3]) & 0xff;
14103 operands[3] = GEN_INT (val << 8 | val);
14104 })
14105
14106 (define_insn "*avx2_pblendw"
14107 [(set (match_operand:V16HI 0 "register_operand" "=x")
14108 (vec_merge:V16HI
14109 (match_operand:V16HI 2 "nonimmediate_operand" "xm")
14110 (match_operand:V16HI 1 "register_operand" "x")
14111 (match_operand:SI 3 "avx2_pblendw_operand" "n")))]
14112 "TARGET_AVX2"
14113 {
14114 operands[3] = GEN_INT (INTVAL (operands[3]) & 0xff);
14115 return "vpblendw\t{%3, %2, %1, %0|%0, %1, %2, %3}";
14116 }
14117 [(set_attr "type" "ssemov")
14118 (set_attr "prefix_extra" "1")
14119 (set_attr "length_immediate" "1")
14120 (set_attr "prefix" "vex")
14121 (set_attr "mode" "OI")])
14122
14123 (define_insn "avx2_pblendd<mode>"
14124 [(set (match_operand:VI4_AVX2 0 "register_operand" "=x")
14125 (vec_merge:VI4_AVX2
14126 (match_operand:VI4_AVX2 2 "nonimmediate_operand" "xm")
14127 (match_operand:VI4_AVX2 1 "register_operand" "x")
14128 (match_operand:SI 3 "const_0_to_255_operand" "n")))]
14129 "TARGET_AVX2"
14130 "vpblendd\t{%3, %2, %1, %0|%0, %1, %2, %3}"
14131 [(set_attr "type" "ssemov")
14132 (set_attr "prefix_extra" "1")
14133 (set_attr "length_immediate" "1")
14134 (set_attr "prefix" "vex")
14135 (set_attr "mode" "<sseinsnmode>")])
14136
14137 (define_insn "sse4_1_phminposuw"
14138 [(set (match_operand:V8HI 0 "register_operand" "=x")
14139 (unspec:V8HI [(match_operand:V8HI 1 "nonimmediate_operand" "xm")]
14140 UNSPEC_PHMINPOSUW))]
14141 "TARGET_SSE4_1"
14142 "%vphminposuw\t{%1, %0|%0, %1}"
14143 [(set_attr "type" "sselog1")
14144 (set_attr "prefix_extra" "1")
14145 (set_attr "prefix" "maybe_vex")
14146 (set_attr "mode" "TI")])
14147
14148 (define_insn "avx2_<code>v16qiv16hi2<mask_name>"
14149 [(set (match_operand:V16HI 0 "register_operand" "=v")
14150 (any_extend:V16HI
14151 (match_operand:V16QI 1 "nonimmediate_operand" "vm")))]
14152 "TARGET_AVX2 && <mask_avx512bw_condition> && <mask_avx512vl_condition>"
14153 "vpmov<extsuffix>bw\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
14154 [(set_attr "type" "ssemov")
14155 (set_attr "prefix_extra" "1")
14156 (set_attr "prefix" "maybe_evex")
14157 (set_attr "mode" "OI")])
14158
14159 (define_insn "avx512bw_<code>v32qiv32hi2<mask_name>"
14160 [(set (match_operand:V32HI 0 "register_operand" "=v")
14161 (any_extend:V32HI
14162 (match_operand:V32QI 1 "nonimmediate_operand" "vm")))]
14163 "TARGET_AVX512BW"
14164 "vpmov<extsuffix>bw\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
14165 [(set_attr "type" "ssemov")
14166 (set_attr "prefix_extra" "1")
14167 (set_attr "prefix" "evex")
14168 (set_attr "mode" "XI")])
14169
14170 (define_insn "sse4_1_<code>v8qiv8hi2<mask_name>"
14171 [(set (match_operand:V8HI 0 "register_operand" "=v")
14172 (any_extend:V8HI
14173 (vec_select:V8QI
14174 (match_operand:V16QI 1 "nonimmediate_operand" "vm")
14175 (parallel [(const_int 0) (const_int 1)
14176 (const_int 2) (const_int 3)
14177 (const_int 4) (const_int 5)
14178 (const_int 6) (const_int 7)]))))]
14179 "TARGET_SSE4_1 && <mask_avx512bw_condition> && <mask_avx512vl_condition>"
14180 "%vpmov<extsuffix>bw\t{%1, %0<mask_operand2>|%0<mask_operand2>, %q1}"
14181 [(set_attr "type" "ssemov")
14182 (set_attr "ssememalign" "64")
14183 (set_attr "prefix_extra" "1")
14184 (set_attr "prefix" "maybe_vex")
14185 (set_attr "mode" "TI")])
14186
14187 (define_insn "<mask_codefor>avx512f_<code>v16qiv16si2<mask_name>"
14188 [(set (match_operand:V16SI 0 "register_operand" "=v")
14189 (any_extend:V16SI
14190 (match_operand:V16QI 1 "nonimmediate_operand" "vm")))]
14191 "TARGET_AVX512F"
14192 "vpmov<extsuffix>bd\t{%1, %0<mask_operand2>|%0<mask_operand2>, %q1}"
14193 [(set_attr "type" "ssemov")
14194 (set_attr "prefix" "evex")
14195 (set_attr "mode" "XI")])
14196
14197 (define_insn "avx2_<code>v8qiv8si2<mask_name>"
14198 [(set (match_operand:V8SI 0 "register_operand" "=v")
14199 (any_extend:V8SI
14200 (vec_select:V8QI
14201 (match_operand:V16QI 1 "nonimmediate_operand" "vm")
14202 (parallel [(const_int 0) (const_int 1)
14203 (const_int 2) (const_int 3)
14204 (const_int 4) (const_int 5)
14205 (const_int 6) (const_int 7)]))))]
14206 "TARGET_AVX2 && <mask_avx512vl_condition>"
14207 "vpmov<extsuffix>bd\t{%1, %0<mask_operand2>|%0<mask_operand2>, %q1}"
14208 [(set_attr "type" "ssemov")
14209 (set_attr "prefix_extra" "1")
14210 (set_attr "prefix" "maybe_evex")
14211 (set_attr "mode" "OI")])
14212
14213 (define_insn "sse4_1_<code>v4qiv4si2<mask_name>"
14214 [(set (match_operand:V4SI 0 "register_operand" "=v")
14215 (any_extend:V4SI
14216 (vec_select:V4QI
14217 (match_operand:V16QI 1 "nonimmediate_operand" "vm")
14218 (parallel [(const_int 0) (const_int 1)
14219 (const_int 2) (const_int 3)]))))]
14220 "TARGET_SSE4_1 && <mask_avx512vl_condition>"
14221 "%vpmov<extsuffix>bd\t{%1, %0<mask_operand2>|%0<mask_operand2>, %k1}"
14222 [(set_attr "type" "ssemov")
14223 (set_attr "ssememalign" "32")
14224 (set_attr "prefix_extra" "1")
14225 (set_attr "prefix" "maybe_vex")
14226 (set_attr "mode" "TI")])
14227
14228 (define_insn "avx512f_<code>v16hiv16si2<mask_name>"
14229 [(set (match_operand:V16SI 0 "register_operand" "=v")
14230 (any_extend:V16SI
14231 (match_operand:V16HI 1 "nonimmediate_operand" "vm")))]
14232 "TARGET_AVX512F"
14233 "vpmov<extsuffix>wd\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
14234 [(set_attr "type" "ssemov")
14235 (set_attr "prefix" "evex")
14236 (set_attr "mode" "XI")])
14237
14238 (define_insn "avx2_<code>v8hiv8si2<mask_name>"
14239 [(set (match_operand:V8SI 0 "register_operand" "=v")
14240 (any_extend:V8SI
14241 (match_operand:V8HI 1 "nonimmediate_operand" "vm")))]
14242 "TARGET_AVX2 && <mask_avx512vl_condition>"
14243 "vpmov<extsuffix>wd\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
14244 [(set_attr "type" "ssemov")
14245 (set_attr "prefix_extra" "1")
14246 (set_attr "prefix" "maybe_evex")
14247 (set_attr "mode" "OI")])
14248
14249 (define_insn "sse4_1_<code>v4hiv4si2<mask_name>"
14250 [(set (match_operand:V4SI 0 "register_operand" "=v")
14251 (any_extend:V4SI
14252 (vec_select:V4HI
14253 (match_operand:V8HI 1 "nonimmediate_operand" "vm")
14254 (parallel [(const_int 0) (const_int 1)
14255 (const_int 2) (const_int 3)]))))]
14256 "TARGET_SSE4_1 && <mask_avx512vl_condition>"
14257 "%vpmov<extsuffix>wd\t{%1, %0<mask_operand2>|%0<mask_operand2>, %q1}"
14258 [(set_attr "type" "ssemov")
14259 (set_attr "ssememalign" "64")
14260 (set_attr "prefix_extra" "1")
14261 (set_attr "prefix" "maybe_vex")
14262 (set_attr "mode" "TI")])
14263
14264 (define_insn "avx512f_<code>v8qiv8di2<mask_name>"
14265 [(set (match_operand:V8DI 0 "register_operand" "=v")
14266 (any_extend:V8DI
14267 (vec_select:V8QI
14268 (match_operand:V16QI 1 "nonimmediate_operand" "vm")
14269 (parallel [(const_int 0) (const_int 1)
14270 (const_int 2) (const_int 3)
14271 (const_int 4) (const_int 5)
14272 (const_int 6) (const_int 7)]))))]
14273 "TARGET_AVX512F"
14274 "vpmov<extsuffix>bq\t{%1, %0<mask_operand2>|%0<mask_operand2>, %k1}"
14275 [(set_attr "type" "ssemov")
14276 (set_attr "prefix" "evex")
14277 (set_attr "mode" "XI")])
14278
14279 (define_insn "avx2_<code>v4qiv4di2<mask_name>"
14280 [(set (match_operand:V4DI 0 "register_operand" "=v")
14281 (any_extend:V4DI
14282 (vec_select:V4QI
14283 (match_operand:V16QI 1 "nonimmediate_operand" "vm")
14284 (parallel [(const_int 0) (const_int 1)
14285 (const_int 2) (const_int 3)]))))]
14286 "TARGET_AVX2 && <mask_avx512vl_condition>"
14287 "vpmov<extsuffix>bq\t{%1, %0<mask_operand2>|%0<mask_operand2>, %k1}"
14288 [(set_attr "type" "ssemov")
14289 (set_attr "prefix_extra" "1")
14290 (set_attr "prefix" "maybe_evex")
14291 (set_attr "mode" "OI")])
14292
14293 (define_insn "sse4_1_<code>v2qiv2di2<mask_name>"
14294 [(set (match_operand:V2DI 0 "register_operand" "=v")
14295 (any_extend:V2DI
14296 (vec_select:V2QI
14297 (match_operand:V16QI 1 "nonimmediate_operand" "vm")
14298 (parallel [(const_int 0) (const_int 1)]))))]
14299 "TARGET_SSE4_1 && <mask_avx512vl_condition>"
14300 "%vpmov<extsuffix>bq\t{%1, %0<mask_operand2>|%0<mask_operand2>, %w1}"
14301 [(set_attr "type" "ssemov")
14302 (set_attr "ssememalign" "16")
14303 (set_attr "prefix_extra" "1")
14304 (set_attr "prefix" "maybe_vex")
14305 (set_attr "mode" "TI")])
14306
14307 (define_insn "avx512f_<code>v8hiv8di2<mask_name>"
14308 [(set (match_operand:V8DI 0 "register_operand" "=v")
14309 (any_extend:V8DI
14310 (match_operand:V8HI 1 "nonimmediate_operand" "vm")))]
14311 "TARGET_AVX512F"
14312 "vpmov<extsuffix>wq\t{%1, %0<mask_operand2>|%0<mask_operand2>, %q1}"
14313 [(set_attr "type" "ssemov")
14314 (set_attr "prefix" "evex")
14315 (set_attr "mode" "XI")])
14316
14317 (define_insn "avx2_<code>v4hiv4di2<mask_name>"
14318 [(set (match_operand:V4DI 0 "register_operand" "=v")
14319 (any_extend:V4DI
14320 (vec_select:V4HI
14321 (match_operand:V8HI 1 "nonimmediate_operand" "vm")
14322 (parallel [(const_int 0) (const_int 1)
14323 (const_int 2) (const_int 3)]))))]
14324 "TARGET_AVX2 && <mask_avx512vl_condition>"
14325 "vpmov<extsuffix>wq\t{%1, %0<mask_operand2>|%0<mask_operand2>, %q1}"
14326 [(set_attr "type" "ssemov")
14327 (set_attr "prefix_extra" "1")
14328 (set_attr "prefix" "maybe_evex")
14329 (set_attr "mode" "OI")])
14330
14331 (define_insn "sse4_1_<code>v2hiv2di2<mask_name>"
14332 [(set (match_operand:V2DI 0 "register_operand" "=v")
14333 (any_extend:V2DI
14334 (vec_select:V2HI
14335 (match_operand:V8HI 1 "nonimmediate_operand" "vm")
14336 (parallel [(const_int 0) (const_int 1)]))))]
14337 "TARGET_SSE4_1 && <mask_avx512vl_condition>"
14338 "%vpmov<extsuffix>wq\t{%1, %0<mask_operand2>|%0<mask_operand2>, %k1}"
14339 [(set_attr "type" "ssemov")
14340 (set_attr "ssememalign" "32")
14341 (set_attr "prefix_extra" "1")
14342 (set_attr "prefix" "maybe_vex")
14343 (set_attr "mode" "TI")])
14344
14345 (define_insn "avx512f_<code>v8siv8di2<mask_name>"
14346 [(set (match_operand:V8DI 0 "register_operand" "=v")
14347 (any_extend:V8DI
14348 (match_operand:V8SI 1 "nonimmediate_operand" "vm")))]
14349 "TARGET_AVX512F"
14350 "vpmov<extsuffix>dq\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
14351 [(set_attr "type" "ssemov")
14352 (set_attr "prefix" "evex")
14353 (set_attr "mode" "XI")])
14354
14355 (define_insn "avx2_<code>v4siv4di2<mask_name>"
14356 [(set (match_operand:V4DI 0 "register_operand" "=v")
14357 (any_extend:V4DI
14358 (match_operand:V4SI 1 "nonimmediate_operand" "vm")))]
14359 "TARGET_AVX2 && <mask_avx512vl_condition>"
14360 "vpmov<extsuffix>dq\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
14361 [(set_attr "type" "ssemov")
14362 (set_attr "prefix" "maybe_evex")
14363 (set_attr "prefix_extra" "1")
14364 (set_attr "mode" "OI")])
14365
14366 (define_insn "sse4_1_<code>v2siv2di2<mask_name>"
14367 [(set (match_operand:V2DI 0 "register_operand" "=v")
14368 (any_extend:V2DI
14369 (vec_select:V2SI
14370 (match_operand:V4SI 1 "nonimmediate_operand" "vm")
14371 (parallel [(const_int 0) (const_int 1)]))))]
14372 "TARGET_SSE4_1 && <mask_avx512vl_condition>"
14373 "%vpmov<extsuffix>dq\t{%1, %0<mask_operand2>|%0<mask_operand2>, %q1}"
14374 [(set_attr "type" "ssemov")
14375 (set_attr "ssememalign" "64")
14376 (set_attr "prefix_extra" "1")
14377 (set_attr "prefix" "maybe_vex")
14378 (set_attr "mode" "TI")])
14379
14380 ;; ptestps/ptestpd are very similar to comiss and ucomiss when
14381 ;; setting FLAGS_REG. But it is not a really compare instruction.
14382 (define_insn "avx_vtest<ssemodesuffix><avxsizesuffix>"
14383 [(set (reg:CC FLAGS_REG)
14384 (unspec:CC [(match_operand:VF_128_256 0 "register_operand" "x")
14385 (match_operand:VF_128_256 1 "nonimmediate_operand" "xm")]
14386 UNSPEC_VTESTP))]
14387 "TARGET_AVX"
14388 "vtest<ssemodesuffix>\t{%1, %0|%0, %1}"
14389 [(set_attr "type" "ssecomi")
14390 (set_attr "prefix_extra" "1")
14391 (set_attr "prefix" "vex")
14392 (set_attr "mode" "<MODE>")])
14393
14394 ;; ptest is very similar to comiss and ucomiss when setting FLAGS_REG.
14395 ;; But it is not a really compare instruction.
14396 (define_insn "avx_ptest256"
14397 [(set (reg:CC FLAGS_REG)
14398 (unspec:CC [(match_operand:V4DI 0 "register_operand" "x")
14399 (match_operand:V4DI 1 "nonimmediate_operand" "xm")]
14400 UNSPEC_PTEST))]
14401 "TARGET_AVX"
14402 "vptest\t{%1, %0|%0, %1}"
14403 [(set_attr "type" "ssecomi")
14404 (set_attr "prefix_extra" "1")
14405 (set_attr "prefix" "vex")
14406 (set_attr "btver2_decode" "vector")
14407 (set_attr "mode" "OI")])
14408
14409 (define_insn "sse4_1_ptest"
14410 [(set (reg:CC FLAGS_REG)
14411 (unspec:CC [(match_operand:V2DI 0 "register_operand" "x")
14412 (match_operand:V2DI 1 "nonimmediate_operand" "xm")]
14413 UNSPEC_PTEST))]
14414 "TARGET_SSE4_1"
14415 "%vptest\t{%1, %0|%0, %1}"
14416 [(set_attr "type" "ssecomi")
14417 (set_attr "prefix_extra" "1")
14418 (set_attr "prefix" "maybe_vex")
14419 (set_attr "mode" "TI")])
14420
14421 (define_insn "<sse4_1>_round<ssemodesuffix><avxsizesuffix>"
14422 [(set (match_operand:VF_128_256 0 "register_operand" "=x")
14423 (unspec:VF_128_256
14424 [(match_operand:VF_128_256 1 "nonimmediate_operand" "xm")
14425 (match_operand:SI 2 "const_0_to_15_operand" "n")]
14426 UNSPEC_ROUND))]
14427 "TARGET_ROUND"
14428 "%vround<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
14429 [(set_attr "type" "ssecvt")
14430 (set (attr "prefix_data16")
14431 (if_then_else
14432 (match_test "TARGET_AVX")
14433 (const_string "*")
14434 (const_string "1")))
14435 (set_attr "prefix_extra" "1")
14436 (set_attr "length_immediate" "1")
14437 (set_attr "prefix" "maybe_vex")
14438 (set_attr "mode" "<MODE>")])
14439
14440 (define_expand "<sse4_1>_round<ssemodesuffix>_sfix<avxsizesuffix>"
14441 [(match_operand:<sseintvecmode> 0 "register_operand")
14442 (match_operand:VF1_128_256 1 "nonimmediate_operand")
14443 (match_operand:SI 2 "const_0_to_15_operand")]
14444 "TARGET_ROUND"
14445 {
14446 rtx tmp = gen_reg_rtx (<MODE>mode);
14447
14448 emit_insn
14449 (gen_<sse4_1>_round<ssemodesuffix><avxsizesuffix> (tmp, operands[1],
14450 operands[2]));
14451 emit_insn
14452 (gen_fix_trunc<mode><sseintvecmodelower>2 (operands[0], tmp));
14453 DONE;
14454 })
14455
14456 (define_expand "avx512f_roundpd512"
14457 [(match_operand:V8DF 0 "register_operand")
14458 (match_operand:V8DF 1 "nonimmediate_operand")
14459 (match_operand:SI 2 "const_0_to_15_operand")]
14460 "TARGET_AVX512F"
14461 {
14462 emit_insn (gen_avx512f_rndscalev8df (operands[0], operands[1], operands[2]));
14463 DONE;
14464 })
14465
14466 (define_expand "<sse4_1>_round<ssemodesuffix>_vec_pack_sfix<avxsizesuffix>"
14467 [(match_operand:<ssepackfltmode> 0 "register_operand")
14468 (match_operand:VF2 1 "nonimmediate_operand")
14469 (match_operand:VF2 2 "nonimmediate_operand")
14470 (match_operand:SI 3 "const_0_to_15_operand")]
14471 "TARGET_ROUND"
14472 {
14473 rtx tmp0, tmp1;
14474
14475 if (<MODE>mode == V2DFmode
14476 && TARGET_AVX && !TARGET_PREFER_AVX128)
14477 {
14478 rtx tmp2 = gen_reg_rtx (V4DFmode);
14479
14480 tmp0 = gen_reg_rtx (V4DFmode);
14481 tmp1 = force_reg (V2DFmode, operands[1]);
14482
14483 emit_insn (gen_avx_vec_concatv4df (tmp0, tmp1, operands[2]));
14484 emit_insn (gen_avx_roundpd256 (tmp2, tmp0, operands[3]));
14485 emit_insn (gen_fix_truncv4dfv4si2 (operands[0], tmp2));
14486 }
14487 else
14488 {
14489 tmp0 = gen_reg_rtx (<MODE>mode);
14490 tmp1 = gen_reg_rtx (<MODE>mode);
14491
14492 emit_insn
14493 (gen_<sse4_1>_round<ssemodesuffix><avxsizesuffix> (tmp0, operands[1],
14494 operands[3]));
14495 emit_insn
14496 (gen_<sse4_1>_round<ssemodesuffix><avxsizesuffix> (tmp1, operands[2],
14497 operands[3]));
14498 emit_insn
14499 (gen_vec_pack_sfix_trunc_<mode> (operands[0], tmp0, tmp1));
14500 }
14501 DONE;
14502 })
14503
14504 (define_insn "sse4_1_round<ssescalarmodesuffix>"
14505 [(set (match_operand:VF_128 0 "register_operand" "=x,x")
14506 (vec_merge:VF_128
14507 (unspec:VF_128
14508 [(match_operand:VF_128 2 "register_operand" "x,x")
14509 (match_operand:SI 3 "const_0_to_15_operand" "n,n")]
14510 UNSPEC_ROUND)
14511 (match_operand:VF_128 1 "register_operand" "0,x")
14512 (const_int 1)))]
14513 "TARGET_ROUND"
14514 "@
14515 round<ssescalarmodesuffix>\t{%3, %2, %0|%0, %2, %3}
14516 vround<ssescalarmodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
14517 [(set_attr "isa" "noavx,avx")
14518 (set_attr "type" "ssecvt")
14519 (set_attr "length_immediate" "1")
14520 (set_attr "prefix_data16" "1,*")
14521 (set_attr "prefix_extra" "1")
14522 (set_attr "prefix" "orig,vex")
14523 (set_attr "mode" "<MODE>")])
14524
14525 (define_expand "round<mode>2"
14526 [(set (match_dup 4)
14527 (plus:VF
14528 (match_operand:VF 1 "register_operand")
14529 (match_dup 3)))
14530 (set (match_operand:VF 0 "register_operand")
14531 (unspec:VF
14532 [(match_dup 4) (match_dup 5)]
14533 UNSPEC_ROUND))]
14534 "TARGET_ROUND && !flag_trapping_math"
14535 {
14536 machine_mode scalar_mode;
14537 const struct real_format *fmt;
14538 REAL_VALUE_TYPE pred_half, half_minus_pred_half;
14539 rtx half, vec_half;
14540
14541 scalar_mode = GET_MODE_INNER (<MODE>mode);
14542
14543 /* load nextafter (0.5, 0.0) */
14544 fmt = REAL_MODE_FORMAT (scalar_mode);
14545 real_2expN (&half_minus_pred_half, -(fmt->p) - 1, scalar_mode);
14546 REAL_ARITHMETIC (pred_half, MINUS_EXPR, dconsthalf, half_minus_pred_half);
14547 half = const_double_from_real_value (pred_half, scalar_mode);
14548
14549 vec_half = ix86_build_const_vector (<MODE>mode, true, half);
14550 vec_half = force_reg (<MODE>mode, vec_half);
14551
14552 operands[3] = gen_reg_rtx (<MODE>mode);
14553 emit_insn (gen_copysign<mode>3 (operands[3], vec_half, operands[1]));
14554
14555 operands[4] = gen_reg_rtx (<MODE>mode);
14556 operands[5] = GEN_INT (ROUND_TRUNC);
14557 })
14558
14559 (define_expand "round<mode>2_sfix"
14560 [(match_operand:<sseintvecmode> 0 "register_operand")
14561 (match_operand:VF1_128_256 1 "register_operand")]
14562 "TARGET_ROUND && !flag_trapping_math"
14563 {
14564 rtx tmp = gen_reg_rtx (<MODE>mode);
14565
14566 emit_insn (gen_round<mode>2 (tmp, operands[1]));
14567
14568 emit_insn
14569 (gen_fix_trunc<mode><sseintvecmodelower>2 (operands[0], tmp));
14570 DONE;
14571 })
14572
14573 (define_expand "round<mode>2_vec_pack_sfix"
14574 [(match_operand:<ssepackfltmode> 0 "register_operand")
14575 (match_operand:VF2 1 "register_operand")
14576 (match_operand:VF2 2 "register_operand")]
14577 "TARGET_ROUND && !flag_trapping_math"
14578 {
14579 rtx tmp0, tmp1;
14580
14581 if (<MODE>mode == V2DFmode
14582 && TARGET_AVX && !TARGET_PREFER_AVX128)
14583 {
14584 rtx tmp2 = gen_reg_rtx (V4DFmode);
14585
14586 tmp0 = gen_reg_rtx (V4DFmode);
14587 tmp1 = force_reg (V2DFmode, operands[1]);
14588
14589 emit_insn (gen_avx_vec_concatv4df (tmp0, tmp1, operands[2]));
14590 emit_insn (gen_roundv4df2 (tmp2, tmp0));
14591 emit_insn (gen_fix_truncv4dfv4si2 (operands[0], tmp2));
14592 }
14593 else
14594 {
14595 tmp0 = gen_reg_rtx (<MODE>mode);
14596 tmp1 = gen_reg_rtx (<MODE>mode);
14597
14598 emit_insn (gen_round<mode>2 (tmp0, operands[1]));
14599 emit_insn (gen_round<mode>2 (tmp1, operands[2]));
14600
14601 emit_insn
14602 (gen_vec_pack_sfix_trunc_<mode> (operands[0], tmp0, tmp1));
14603 }
14604 DONE;
14605 })
14606
14607 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
14608 ;;
14609 ;; Intel SSE4.2 string/text processing instructions
14610 ;;
14611 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
14612
14613 (define_insn_and_split "sse4_2_pcmpestr"
14614 [(set (match_operand:SI 0 "register_operand" "=c,c")
14615 (unspec:SI
14616 [(match_operand:V16QI 2 "register_operand" "x,x")
14617 (match_operand:SI 3 "register_operand" "a,a")
14618 (match_operand:V16QI 4 "nonimmediate_operand" "x,m")
14619 (match_operand:SI 5 "register_operand" "d,d")
14620 (match_operand:SI 6 "const_0_to_255_operand" "n,n")]
14621 UNSPEC_PCMPESTR))
14622 (set (match_operand:V16QI 1 "register_operand" "=Yz,Yz")
14623 (unspec:V16QI
14624 [(match_dup 2)
14625 (match_dup 3)
14626 (match_dup 4)
14627 (match_dup 5)
14628 (match_dup 6)]
14629 UNSPEC_PCMPESTR))
14630 (set (reg:CC FLAGS_REG)
14631 (unspec:CC
14632 [(match_dup 2)
14633 (match_dup 3)
14634 (match_dup 4)
14635 (match_dup 5)
14636 (match_dup 6)]
14637 UNSPEC_PCMPESTR))]
14638 "TARGET_SSE4_2
14639 && can_create_pseudo_p ()"
14640 "#"
14641 "&& 1"
14642 [(const_int 0)]
14643 {
14644 int ecx = !find_regno_note (curr_insn, REG_UNUSED, REGNO (operands[0]));
14645 int xmm0 = !find_regno_note (curr_insn, REG_UNUSED, REGNO (operands[1]));
14646 int flags = !find_regno_note (curr_insn, REG_UNUSED, FLAGS_REG);
14647
14648 if (ecx)
14649 emit_insn (gen_sse4_2_pcmpestri (operands[0], operands[2],
14650 operands[3], operands[4],
14651 operands[5], operands[6]));
14652 if (xmm0)
14653 emit_insn (gen_sse4_2_pcmpestrm (operands[1], operands[2],
14654 operands[3], operands[4],
14655 operands[5], operands[6]));
14656 if (flags && !(ecx || xmm0))
14657 emit_insn (gen_sse4_2_pcmpestr_cconly (NULL, NULL,
14658 operands[2], operands[3],
14659 operands[4], operands[5],
14660 operands[6]));
14661 if (!(flags || ecx || xmm0))
14662 emit_note (NOTE_INSN_DELETED);
14663
14664 DONE;
14665 }
14666 [(set_attr "type" "sselog")
14667 (set_attr "prefix_data16" "1")
14668 (set_attr "prefix_extra" "1")
14669 (set_attr "ssememalign" "8")
14670 (set_attr "length_immediate" "1")
14671 (set_attr "memory" "none,load")
14672 (set_attr "mode" "TI")])
14673
14674 (define_insn_and_split "*sse4_2_pcmpestr_unaligned"
14675 [(set (match_operand:SI 0 "register_operand" "=c")
14676 (unspec:SI
14677 [(match_operand:V16QI 2 "register_operand" "x")
14678 (match_operand:SI 3 "register_operand" "a")
14679 (unspec:V16QI
14680 [(match_operand:V16QI 4 "memory_operand" "m")]
14681 UNSPEC_LOADU)
14682 (match_operand:SI 5 "register_operand" "d")
14683 (match_operand:SI 6 "const_0_to_255_operand" "n")]
14684 UNSPEC_PCMPESTR))
14685 (set (match_operand:V16QI 1 "register_operand" "=Yz")
14686 (unspec:V16QI
14687 [(match_dup 2)
14688 (match_dup 3)
14689 (unspec:V16QI [(match_dup 4)] UNSPEC_LOADU)
14690 (match_dup 5)
14691 (match_dup 6)]
14692 UNSPEC_PCMPESTR))
14693 (set (reg:CC FLAGS_REG)
14694 (unspec:CC
14695 [(match_dup 2)
14696 (match_dup 3)
14697 (unspec:V16QI [(match_dup 4)] UNSPEC_LOADU)
14698 (match_dup 5)
14699 (match_dup 6)]
14700 UNSPEC_PCMPESTR))]
14701 "TARGET_SSE4_2
14702 && can_create_pseudo_p ()"
14703 "#"
14704 "&& 1"
14705 [(const_int 0)]
14706 {
14707 int ecx = !find_regno_note (curr_insn, REG_UNUSED, REGNO (operands[0]));
14708 int xmm0 = !find_regno_note (curr_insn, REG_UNUSED, REGNO (operands[1]));
14709 int flags = !find_regno_note (curr_insn, REG_UNUSED, FLAGS_REG);
14710
14711 if (ecx)
14712 emit_insn (gen_sse4_2_pcmpestri (operands[0], operands[2],
14713 operands[3], operands[4],
14714 operands[5], operands[6]));
14715 if (xmm0)
14716 emit_insn (gen_sse4_2_pcmpestrm (operands[1], operands[2],
14717 operands[3], operands[4],
14718 operands[5], operands[6]));
14719 if (flags && !(ecx || xmm0))
14720 emit_insn (gen_sse4_2_pcmpestr_cconly (NULL, NULL,
14721 operands[2], operands[3],
14722 operands[4], operands[5],
14723 operands[6]));
14724 if (!(flags || ecx || xmm0))
14725 emit_note (NOTE_INSN_DELETED);
14726
14727 DONE;
14728 }
14729 [(set_attr "type" "sselog")
14730 (set_attr "prefix_data16" "1")
14731 (set_attr "prefix_extra" "1")
14732 (set_attr "ssememalign" "8")
14733 (set_attr "length_immediate" "1")
14734 (set_attr "memory" "load")
14735 (set_attr "mode" "TI")])
14736
14737 (define_insn "sse4_2_pcmpestri"
14738 [(set (match_operand:SI 0 "register_operand" "=c,c")
14739 (unspec:SI
14740 [(match_operand:V16QI 1 "register_operand" "x,x")
14741 (match_operand:SI 2 "register_operand" "a,a")
14742 (match_operand:V16QI 3 "nonimmediate_operand" "x,m")
14743 (match_operand:SI 4 "register_operand" "d,d")
14744 (match_operand:SI 5 "const_0_to_255_operand" "n,n")]
14745 UNSPEC_PCMPESTR))
14746 (set (reg:CC FLAGS_REG)
14747 (unspec:CC
14748 [(match_dup 1)
14749 (match_dup 2)
14750 (match_dup 3)
14751 (match_dup 4)
14752 (match_dup 5)]
14753 UNSPEC_PCMPESTR))]
14754 "TARGET_SSE4_2"
14755 "%vpcmpestri\t{%5, %3, %1|%1, %3, %5}"
14756 [(set_attr "type" "sselog")
14757 (set_attr "prefix_data16" "1")
14758 (set_attr "prefix_extra" "1")
14759 (set_attr "prefix" "maybe_vex")
14760 (set_attr "ssememalign" "8")
14761 (set_attr "length_immediate" "1")
14762 (set_attr "btver2_decode" "vector")
14763 (set_attr "memory" "none,load")
14764 (set_attr "mode" "TI")])
14765
14766 (define_insn "sse4_2_pcmpestrm"
14767 [(set (match_operand:V16QI 0 "register_operand" "=Yz,Yz")
14768 (unspec:V16QI
14769 [(match_operand:V16QI 1 "register_operand" "x,x")
14770 (match_operand:SI 2 "register_operand" "a,a")
14771 (match_operand:V16QI 3 "nonimmediate_operand" "x,m")
14772 (match_operand:SI 4 "register_operand" "d,d")
14773 (match_operand:SI 5 "const_0_to_255_operand" "n,n")]
14774 UNSPEC_PCMPESTR))
14775 (set (reg:CC FLAGS_REG)
14776 (unspec:CC
14777 [(match_dup 1)
14778 (match_dup 2)
14779 (match_dup 3)
14780 (match_dup 4)
14781 (match_dup 5)]
14782 UNSPEC_PCMPESTR))]
14783 "TARGET_SSE4_2"
14784 "%vpcmpestrm\t{%5, %3, %1|%1, %3, %5}"
14785 [(set_attr "type" "sselog")
14786 (set_attr "prefix_data16" "1")
14787 (set_attr "prefix_extra" "1")
14788 (set_attr "ssememalign" "8")
14789 (set_attr "length_immediate" "1")
14790 (set_attr "prefix" "maybe_vex")
14791 (set_attr "btver2_decode" "vector")
14792 (set_attr "memory" "none,load")
14793 (set_attr "mode" "TI")])
14794
14795 (define_insn "sse4_2_pcmpestr_cconly"
14796 [(set (reg:CC FLAGS_REG)
14797 (unspec:CC
14798 [(match_operand:V16QI 2 "register_operand" "x,x,x,x")
14799 (match_operand:SI 3 "register_operand" "a,a,a,a")
14800 (match_operand:V16QI 4 "nonimmediate_operand" "x,m,x,m")
14801 (match_operand:SI 5 "register_operand" "d,d,d,d")
14802 (match_operand:SI 6 "const_0_to_255_operand" "n,n,n,n")]
14803 UNSPEC_PCMPESTR))
14804 (clobber (match_scratch:V16QI 0 "=Yz,Yz,X,X"))
14805 (clobber (match_scratch:SI 1 "= X, X,c,c"))]
14806 "TARGET_SSE4_2"
14807 "@
14808 %vpcmpestrm\t{%6, %4, %2|%2, %4, %6}
14809 %vpcmpestrm\t{%6, %4, %2|%2, %4, %6}
14810 %vpcmpestri\t{%6, %4, %2|%2, %4, %6}
14811 %vpcmpestri\t{%6, %4, %2|%2, %4, %6}"
14812 [(set_attr "type" "sselog")
14813 (set_attr "prefix_data16" "1")
14814 (set_attr "prefix_extra" "1")
14815 (set_attr "ssememalign" "8")
14816 (set_attr "length_immediate" "1")
14817 (set_attr "memory" "none,load,none,load")
14818 (set_attr "btver2_decode" "vector,vector,vector,vector")
14819 (set_attr "prefix" "maybe_vex")
14820 (set_attr "mode" "TI")])
14821
14822 (define_insn_and_split "sse4_2_pcmpistr"
14823 [(set (match_operand:SI 0 "register_operand" "=c,c")
14824 (unspec:SI
14825 [(match_operand:V16QI 2 "register_operand" "x,x")
14826 (match_operand:V16QI 3 "nonimmediate_operand" "x,m")
14827 (match_operand:SI 4 "const_0_to_255_operand" "n,n")]
14828 UNSPEC_PCMPISTR))
14829 (set (match_operand:V16QI 1 "register_operand" "=Yz,Yz")
14830 (unspec:V16QI
14831 [(match_dup 2)
14832 (match_dup 3)
14833 (match_dup 4)]
14834 UNSPEC_PCMPISTR))
14835 (set (reg:CC FLAGS_REG)
14836 (unspec:CC
14837 [(match_dup 2)
14838 (match_dup 3)
14839 (match_dup 4)]
14840 UNSPEC_PCMPISTR))]
14841 "TARGET_SSE4_2
14842 && can_create_pseudo_p ()"
14843 "#"
14844 "&& 1"
14845 [(const_int 0)]
14846 {
14847 int ecx = !find_regno_note (curr_insn, REG_UNUSED, REGNO (operands[0]));
14848 int xmm0 = !find_regno_note (curr_insn, REG_UNUSED, REGNO (operands[1]));
14849 int flags = !find_regno_note (curr_insn, REG_UNUSED, FLAGS_REG);
14850
14851 if (ecx)
14852 emit_insn (gen_sse4_2_pcmpistri (operands[0], operands[2],
14853 operands[3], operands[4]));
14854 if (xmm0)
14855 emit_insn (gen_sse4_2_pcmpistrm (operands[1], operands[2],
14856 operands[3], operands[4]));
14857 if (flags && !(ecx || xmm0))
14858 emit_insn (gen_sse4_2_pcmpistr_cconly (NULL, NULL,
14859 operands[2], operands[3],
14860 operands[4]));
14861 if (!(flags || ecx || xmm0))
14862 emit_note (NOTE_INSN_DELETED);
14863
14864 DONE;
14865 }
14866 [(set_attr "type" "sselog")
14867 (set_attr "prefix_data16" "1")
14868 (set_attr "prefix_extra" "1")
14869 (set_attr "ssememalign" "8")
14870 (set_attr "length_immediate" "1")
14871 (set_attr "memory" "none,load")
14872 (set_attr "mode" "TI")])
14873
14874 (define_insn_and_split "*sse4_2_pcmpistr_unaligned"
14875 [(set (match_operand:SI 0 "register_operand" "=c")
14876 (unspec:SI
14877 [(match_operand:V16QI 2 "register_operand" "x")
14878 (unspec:V16QI
14879 [(match_operand:V16QI 3 "memory_operand" "m")]
14880 UNSPEC_LOADU)
14881 (match_operand:SI 4 "const_0_to_255_operand" "n")]
14882 UNSPEC_PCMPISTR))
14883 (set (match_operand:V16QI 1 "register_operand" "=Yz")
14884 (unspec:V16QI
14885 [(match_dup 2)
14886 (unspec:V16QI [(match_dup 3)] UNSPEC_LOADU)
14887 (match_dup 4)]
14888 UNSPEC_PCMPISTR))
14889 (set (reg:CC FLAGS_REG)
14890 (unspec:CC
14891 [(match_dup 2)
14892 (unspec:V16QI [(match_dup 3)] UNSPEC_LOADU)
14893 (match_dup 4)]
14894 UNSPEC_PCMPISTR))]
14895 "TARGET_SSE4_2
14896 && can_create_pseudo_p ()"
14897 "#"
14898 "&& 1"
14899 [(const_int 0)]
14900 {
14901 int ecx = !find_regno_note (curr_insn, REG_UNUSED, REGNO (operands[0]));
14902 int xmm0 = !find_regno_note (curr_insn, REG_UNUSED, REGNO (operands[1]));
14903 int flags = !find_regno_note (curr_insn, REG_UNUSED, FLAGS_REG);
14904
14905 if (ecx)
14906 emit_insn (gen_sse4_2_pcmpistri (operands[0], operands[2],
14907 operands[3], operands[4]));
14908 if (xmm0)
14909 emit_insn (gen_sse4_2_pcmpistrm (operands[1], operands[2],
14910 operands[3], operands[4]));
14911 if (flags && !(ecx || xmm0))
14912 emit_insn (gen_sse4_2_pcmpistr_cconly (NULL, NULL,
14913 operands[2], operands[3],
14914 operands[4]));
14915 if (!(flags || ecx || xmm0))
14916 emit_note (NOTE_INSN_DELETED);
14917
14918 DONE;
14919 }
14920 [(set_attr "type" "sselog")
14921 (set_attr "prefix_data16" "1")
14922 (set_attr "prefix_extra" "1")
14923 (set_attr "ssememalign" "8")
14924 (set_attr "length_immediate" "1")
14925 (set_attr "memory" "load")
14926 (set_attr "mode" "TI")])
14927
14928 (define_insn "sse4_2_pcmpistri"
14929 [(set (match_operand:SI 0 "register_operand" "=c,c")
14930 (unspec:SI
14931 [(match_operand:V16QI 1 "register_operand" "x,x")
14932 (match_operand:V16QI 2 "nonimmediate_operand" "x,m")
14933 (match_operand:SI 3 "const_0_to_255_operand" "n,n")]
14934 UNSPEC_PCMPISTR))
14935 (set (reg:CC FLAGS_REG)
14936 (unspec:CC
14937 [(match_dup 1)
14938 (match_dup 2)
14939 (match_dup 3)]
14940 UNSPEC_PCMPISTR))]
14941 "TARGET_SSE4_2"
14942 "%vpcmpistri\t{%3, %2, %1|%1, %2, %3}"
14943 [(set_attr "type" "sselog")
14944 (set_attr "prefix_data16" "1")
14945 (set_attr "prefix_extra" "1")
14946 (set_attr "ssememalign" "8")
14947 (set_attr "length_immediate" "1")
14948 (set_attr "prefix" "maybe_vex")
14949 (set_attr "memory" "none,load")
14950 (set_attr "btver2_decode" "vector")
14951 (set_attr "mode" "TI")])
14952
14953 (define_insn "sse4_2_pcmpistrm"
14954 [(set (match_operand:V16QI 0 "register_operand" "=Yz,Yz")
14955 (unspec:V16QI
14956 [(match_operand:V16QI 1 "register_operand" "x,x")
14957 (match_operand:V16QI 2 "nonimmediate_operand" "x,m")
14958 (match_operand:SI 3 "const_0_to_255_operand" "n,n")]
14959 UNSPEC_PCMPISTR))
14960 (set (reg:CC FLAGS_REG)
14961 (unspec:CC
14962 [(match_dup 1)
14963 (match_dup 2)
14964 (match_dup 3)]
14965 UNSPEC_PCMPISTR))]
14966 "TARGET_SSE4_2"
14967 "%vpcmpistrm\t{%3, %2, %1|%1, %2, %3}"
14968 [(set_attr "type" "sselog")
14969 (set_attr "prefix_data16" "1")
14970 (set_attr "prefix_extra" "1")
14971 (set_attr "ssememalign" "8")
14972 (set_attr "length_immediate" "1")
14973 (set_attr "prefix" "maybe_vex")
14974 (set_attr "memory" "none,load")
14975 (set_attr "btver2_decode" "vector")
14976 (set_attr "mode" "TI")])
14977
14978 (define_insn "sse4_2_pcmpistr_cconly"
14979 [(set (reg:CC FLAGS_REG)
14980 (unspec:CC
14981 [(match_operand:V16QI 2 "register_operand" "x,x,x,x")
14982 (match_operand:V16QI 3 "nonimmediate_operand" "x,m,x,m")
14983 (match_operand:SI 4 "const_0_to_255_operand" "n,n,n,n")]
14984 UNSPEC_PCMPISTR))
14985 (clobber (match_scratch:V16QI 0 "=Yz,Yz,X,X"))
14986 (clobber (match_scratch:SI 1 "= X, X,c,c"))]
14987 "TARGET_SSE4_2"
14988 "@
14989 %vpcmpistrm\t{%4, %3, %2|%2, %3, %4}
14990 %vpcmpistrm\t{%4, %3, %2|%2, %3, %4}
14991 %vpcmpistri\t{%4, %3, %2|%2, %3, %4}
14992 %vpcmpistri\t{%4, %3, %2|%2, %3, %4}"
14993 [(set_attr "type" "sselog")
14994 (set_attr "prefix_data16" "1")
14995 (set_attr "prefix_extra" "1")
14996 (set_attr "ssememalign" "8")
14997 (set_attr "length_immediate" "1")
14998 (set_attr "memory" "none,load,none,load")
14999 (set_attr "prefix" "maybe_vex")
15000 (set_attr "btver2_decode" "vector,vector,vector,vector")
15001 (set_attr "mode" "TI")])
15002
15003 ;; Packed float variants
15004 (define_mode_attr GATHER_SCATTER_SF_MEM_MODE
15005 [(V8DI "V8SF") (V16SI "V16SF")])
15006
15007 (define_expand "avx512pf_gatherpf<mode>sf"
15008 [(unspec
15009 [(match_operand:<avx512fmaskmode> 0 "register_or_constm1_operand")
15010 (mem:<GATHER_SCATTER_SF_MEM_MODE>
15011 (match_par_dup 5
15012 [(match_operand 2 "vsib_address_operand")
15013 (match_operand:VI48_512 1 "register_operand")
15014 (match_operand:SI 3 "const1248_operand")]))
15015 (match_operand:SI 4 "const_2_to_3_operand")]
15016 UNSPEC_GATHER_PREFETCH)]
15017 "TARGET_AVX512PF"
15018 {
15019 operands[5]
15020 = gen_rtx_UNSPEC (Pmode, gen_rtvec (3, operands[2], operands[1],
15021 operands[3]), UNSPEC_VSIBADDR);
15022 })
15023
15024 (define_insn "*avx512pf_gatherpf<mode>sf_mask"
15025 [(unspec
15026 [(match_operand:<avx512fmaskmode> 0 "register_operand" "Yk")
15027 (match_operator:<GATHER_SCATTER_SF_MEM_MODE> 5 "vsib_mem_operator"
15028 [(unspec:P
15029 [(match_operand:P 2 "vsib_address_operand" "Tv")
15030 (match_operand:VI48_512 1 "register_operand" "v")
15031 (match_operand:SI 3 "const1248_operand" "n")]
15032 UNSPEC_VSIBADDR)])
15033 (match_operand:SI 4 "const_2_to_3_operand" "n")]
15034 UNSPEC_GATHER_PREFETCH)]
15035 "TARGET_AVX512PF"
15036 {
15037 switch (INTVAL (operands[4]))
15038 {
15039 case 3:
15040 return "vgatherpf0<ssemodesuffix>ps\t{%5%{%0%}|%5%{%0%}}";
15041 case 2:
15042 return "vgatherpf1<ssemodesuffix>ps\t{%5%{%0%}|%5%{%0%}}";
15043 default:
15044 gcc_unreachable ();
15045 }
15046 }
15047 [(set_attr "type" "sse")
15048 (set_attr "prefix" "evex")
15049 (set_attr "mode" "XI")])
15050
15051 (define_insn "*avx512pf_gatherpf<mode>sf"
15052 [(unspec
15053 [(const_int -1)
15054 (match_operator:<GATHER_SCATTER_SF_MEM_MODE> 4 "vsib_mem_operator"
15055 [(unspec:P
15056 [(match_operand:P 1 "vsib_address_operand" "Tv")
15057 (match_operand:VI48_512 0 "register_operand" "v")
15058 (match_operand:SI 2 "const1248_operand" "n")]
15059 UNSPEC_VSIBADDR)])
15060 (match_operand:SI 3 "const_2_to_3_operand" "n")]
15061 UNSPEC_GATHER_PREFETCH)]
15062 "TARGET_AVX512PF"
15063 {
15064 switch (INTVAL (operands[3]))
15065 {
15066 case 3:
15067 return "vgatherpf0<ssemodesuffix>ps\t{%4|%4}";
15068 case 2:
15069 return "vgatherpf1<ssemodesuffix>ps\t{%4|%4}";
15070 default:
15071 gcc_unreachable ();
15072 }
15073 }
15074 [(set_attr "type" "sse")
15075 (set_attr "prefix" "evex")
15076 (set_attr "mode" "XI")])
15077
15078 ;; Packed double variants
15079 (define_expand "avx512pf_gatherpf<mode>df"
15080 [(unspec
15081 [(match_operand:<avx512fmaskmode> 0 "register_or_constm1_operand")
15082 (mem:V8DF
15083 (match_par_dup 5
15084 [(match_operand 2 "vsib_address_operand")
15085 (match_operand:VI4_256_8_512 1 "register_operand")
15086 (match_operand:SI 3 "const1248_operand")]))
15087 (match_operand:SI 4 "const_2_to_3_operand")]
15088 UNSPEC_GATHER_PREFETCH)]
15089 "TARGET_AVX512PF"
15090 {
15091 operands[5]
15092 = gen_rtx_UNSPEC (Pmode, gen_rtvec (3, operands[2], operands[1],
15093 operands[3]), UNSPEC_VSIBADDR);
15094 })
15095
15096 (define_insn "*avx512pf_gatherpf<mode>df_mask"
15097 [(unspec
15098 [(match_operand:<avx512fmaskmode> 0 "register_operand" "Yk")
15099 (match_operator:V8DF 5 "vsib_mem_operator"
15100 [(unspec:P
15101 [(match_operand:P 2 "vsib_address_operand" "Tv")
15102 (match_operand:VI4_256_8_512 1 "register_operand" "v")
15103 (match_operand:SI 3 "const1248_operand" "n")]
15104 UNSPEC_VSIBADDR)])
15105 (match_operand:SI 4 "const_2_to_3_operand" "n")]
15106 UNSPEC_GATHER_PREFETCH)]
15107 "TARGET_AVX512PF"
15108 {
15109 switch (INTVAL (operands[4]))
15110 {
15111 case 3:
15112 return "vgatherpf0<ssemodesuffix>pd\t{%5%{%0%}|%5%{%0%}}";
15113 case 2:
15114 return "vgatherpf1<ssemodesuffix>pd\t{%5%{%0%}|%5%{%0%}}";
15115 default:
15116 gcc_unreachable ();
15117 }
15118 }
15119 [(set_attr "type" "sse")
15120 (set_attr "prefix" "evex")
15121 (set_attr "mode" "XI")])
15122
15123 (define_insn "*avx512pf_gatherpf<mode>df"
15124 [(unspec
15125 [(const_int -1)
15126 (match_operator:V8DF 4 "vsib_mem_operator"
15127 [(unspec:P
15128 [(match_operand:P 1 "vsib_address_operand" "Tv")
15129 (match_operand:VI4_256_8_512 0 "register_operand" "v")
15130 (match_operand:SI 2 "const1248_operand" "n")]
15131 UNSPEC_VSIBADDR)])
15132 (match_operand:SI 3 "const_2_to_3_operand" "n")]
15133 UNSPEC_GATHER_PREFETCH)]
15134 "TARGET_AVX512PF"
15135 {
15136 switch (INTVAL (operands[3]))
15137 {
15138 case 3:
15139 return "vgatherpf0<ssemodesuffix>pd\t{%4|%4}";
15140 case 2:
15141 return "vgatherpf1<ssemodesuffix>pd\t{%4|%4}";
15142 default:
15143 gcc_unreachable ();
15144 }
15145 }
15146 [(set_attr "type" "sse")
15147 (set_attr "prefix" "evex")
15148 (set_attr "mode" "XI")])
15149
15150 ;; Packed float variants
15151 (define_expand "avx512pf_scatterpf<mode>sf"
15152 [(unspec
15153 [(match_operand:<avx512fmaskmode> 0 "register_or_constm1_operand")
15154 (mem:<GATHER_SCATTER_SF_MEM_MODE>
15155 (match_par_dup 5
15156 [(match_operand 2 "vsib_address_operand")
15157 (match_operand:VI48_512 1 "register_operand")
15158 (match_operand:SI 3 "const1248_operand")]))
15159 (match_operand:SI 4 "const2367_operand")]
15160 UNSPEC_SCATTER_PREFETCH)]
15161 "TARGET_AVX512PF"
15162 {
15163 operands[5]
15164 = gen_rtx_UNSPEC (Pmode, gen_rtvec (3, operands[2], operands[1],
15165 operands[3]), UNSPEC_VSIBADDR);
15166 })
15167
15168 (define_insn "*avx512pf_scatterpf<mode>sf_mask"
15169 [(unspec
15170 [(match_operand:<avx512fmaskmode> 0 "register_operand" "Yk")
15171 (match_operator:<GATHER_SCATTER_SF_MEM_MODE> 5 "vsib_mem_operator"
15172 [(unspec:P
15173 [(match_operand:P 2 "vsib_address_operand" "Tv")
15174 (match_operand:VI48_512 1 "register_operand" "v")
15175 (match_operand:SI 3 "const1248_operand" "n")]
15176 UNSPEC_VSIBADDR)])
15177 (match_operand:SI 4 "const2367_operand" "n")]
15178 UNSPEC_SCATTER_PREFETCH)]
15179 "TARGET_AVX512PF"
15180 {
15181 switch (INTVAL (operands[4]))
15182 {
15183 case 3:
15184 case 7:
15185 return "vscatterpf0<ssemodesuffix>ps\t{%5%{%0%}|%5%{%0%}}";
15186 case 2:
15187 case 6:
15188 return "vscatterpf1<ssemodesuffix>ps\t{%5%{%0%}|%5%{%0%}}";
15189 default:
15190 gcc_unreachable ();
15191 }
15192 }
15193 [(set_attr "type" "sse")
15194 (set_attr "prefix" "evex")
15195 (set_attr "mode" "XI")])
15196
15197 (define_insn "*avx512pf_scatterpf<mode>sf"
15198 [(unspec
15199 [(const_int -1)
15200 (match_operator:<GATHER_SCATTER_SF_MEM_MODE> 4 "vsib_mem_operator"
15201 [(unspec:P
15202 [(match_operand:P 1 "vsib_address_operand" "Tv")
15203 (match_operand:VI48_512 0 "register_operand" "v")
15204 (match_operand:SI 2 "const1248_operand" "n")]
15205 UNSPEC_VSIBADDR)])
15206 (match_operand:SI 3 "const2367_operand" "n")]
15207 UNSPEC_SCATTER_PREFETCH)]
15208 "TARGET_AVX512PF"
15209 {
15210 switch (INTVAL (operands[3]))
15211 {
15212 case 3:
15213 case 7:
15214 return "vscatterpf0<ssemodesuffix>ps\t{%4|%4}";
15215 case 2:
15216 case 6:
15217 return "vscatterpf1<ssemodesuffix>ps\t{%4|%4}";
15218 default:
15219 gcc_unreachable ();
15220 }
15221 }
15222 [(set_attr "type" "sse")
15223 (set_attr "prefix" "evex")
15224 (set_attr "mode" "XI")])
15225
15226 ;; Packed double variants
15227 (define_expand "avx512pf_scatterpf<mode>df"
15228 [(unspec
15229 [(match_operand:<avx512fmaskmode> 0 "register_or_constm1_operand")
15230 (mem:V8DF
15231 (match_par_dup 5
15232 [(match_operand 2 "vsib_address_operand")
15233 (match_operand:VI4_256_8_512 1 "register_operand")
15234 (match_operand:SI 3 "const1248_operand")]))
15235 (match_operand:SI 4 "const2367_operand")]
15236 UNSPEC_SCATTER_PREFETCH)]
15237 "TARGET_AVX512PF"
15238 {
15239 operands[5]
15240 = gen_rtx_UNSPEC (Pmode, gen_rtvec (3, operands[2], operands[1],
15241 operands[3]), UNSPEC_VSIBADDR);
15242 })
15243
15244 (define_insn "*avx512pf_scatterpf<mode>df_mask"
15245 [(unspec
15246 [(match_operand:<avx512fmaskmode> 0 "register_operand" "Yk")
15247 (match_operator:V8DF 5 "vsib_mem_operator"
15248 [(unspec:P
15249 [(match_operand:P 2 "vsib_address_operand" "Tv")
15250 (match_operand:VI4_256_8_512 1 "register_operand" "v")
15251 (match_operand:SI 3 "const1248_operand" "n")]
15252 UNSPEC_VSIBADDR)])
15253 (match_operand:SI 4 "const2367_operand" "n")]
15254 UNSPEC_SCATTER_PREFETCH)]
15255 "TARGET_AVX512PF"
15256 {
15257 switch (INTVAL (operands[4]))
15258 {
15259 case 3:
15260 case 7:
15261 return "vscatterpf0<ssemodesuffix>pd\t{%5%{%0%}|%5%{%0%}}";
15262 case 2:
15263 case 6:
15264 return "vscatterpf1<ssemodesuffix>pd\t{%5%{%0%}|%5%{%0%}}";
15265 default:
15266 gcc_unreachable ();
15267 }
15268 }
15269 [(set_attr "type" "sse")
15270 (set_attr "prefix" "evex")
15271 (set_attr "mode" "XI")])
15272
15273 (define_insn "*avx512pf_scatterpf<mode>df"
15274 [(unspec
15275 [(const_int -1)
15276 (match_operator:V8DF 4 "vsib_mem_operator"
15277 [(unspec:P
15278 [(match_operand:P 1 "vsib_address_operand" "Tv")
15279 (match_operand:VI4_256_8_512 0 "register_operand" "v")
15280 (match_operand:SI 2 "const1248_operand" "n")]
15281 UNSPEC_VSIBADDR)])
15282 (match_operand:SI 3 "const2367_operand" "n")]
15283 UNSPEC_SCATTER_PREFETCH)]
15284 "TARGET_AVX512PF"
15285 {
15286 switch (INTVAL (operands[3]))
15287 {
15288 case 3:
15289 case 7:
15290 return "vscatterpf0<ssemodesuffix>pd\t{%4|%4}";
15291 case 2:
15292 case 6:
15293 return "vscatterpf1<ssemodesuffix>pd\t{%4|%4}";
15294 default:
15295 gcc_unreachable ();
15296 }
15297 }
15298 [(set_attr "type" "sse")
15299 (set_attr "prefix" "evex")
15300 (set_attr "mode" "XI")])
15301
15302 (define_insn "avx512er_exp2<mode><mask_name><round_saeonly_name>"
15303 [(set (match_operand:VF_512 0 "register_operand" "=v")
15304 (unspec:VF_512
15305 [(match_operand:VF_512 1 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")]
15306 UNSPEC_EXP2))]
15307 "TARGET_AVX512ER"
15308 "vexp2<ssemodesuffix>\t{<round_saeonly_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_saeonly_mask_op2>}"
15309 [(set_attr "prefix" "evex")
15310 (set_attr "type" "sse")
15311 (set_attr "mode" "<MODE>")])
15312
15313 (define_insn "<mask_codefor>avx512er_rcp28<mode><mask_name><round_saeonly_name>"
15314 [(set (match_operand:VF_512 0 "register_operand" "=v")
15315 (unspec:VF_512
15316 [(match_operand:VF_512 1 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")]
15317 UNSPEC_RCP28))]
15318 "TARGET_AVX512ER"
15319 "vrcp28<ssemodesuffix>\t{<round_saeonly_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_saeonly_mask_op2>}"
15320 [(set_attr "prefix" "evex")
15321 (set_attr "type" "sse")
15322 (set_attr "mode" "<MODE>")])
15323
15324 (define_insn "avx512er_vmrcp28<mode><round_saeonly_name>"
15325 [(set (match_operand:VF_128 0 "register_operand" "=v")
15326 (vec_merge:VF_128
15327 (unspec:VF_128
15328 [(match_operand:VF_128 1 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")]
15329 UNSPEC_RCP28)
15330 (match_operand:VF_128 2 "register_operand" "v")
15331 (const_int 1)))]
15332 "TARGET_AVX512ER"
15333 "vrcp28<ssescalarmodesuffix>\t{<round_saeonly_op3>%1, %2, %0|%0, %2, %1<round_saeonly_op3>}"
15334 [(set_attr "length_immediate" "1")
15335 (set_attr "prefix" "evex")
15336 (set_attr "type" "sse")
15337 (set_attr "mode" "<MODE>")])
15338
15339 (define_insn "<mask_codefor>avx512er_rsqrt28<mode><mask_name><round_saeonly_name>"
15340 [(set (match_operand:VF_512 0 "register_operand" "=v")
15341 (unspec:VF_512
15342 [(match_operand:VF_512 1 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")]
15343 UNSPEC_RSQRT28))]
15344 "TARGET_AVX512ER"
15345 "vrsqrt28<ssemodesuffix>\t{<round_saeonly_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_saeonly_mask_op2>}"
15346 [(set_attr "prefix" "evex")
15347 (set_attr "type" "sse")
15348 (set_attr "mode" "<MODE>")])
15349
15350 (define_insn "avx512er_vmrsqrt28<mode><round_saeonly_name>"
15351 [(set (match_operand:VF_128 0 "register_operand" "=v")
15352 (vec_merge:VF_128
15353 (unspec:VF_128
15354 [(match_operand:VF_128 1 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")]
15355 UNSPEC_RSQRT28)
15356 (match_operand:VF_128 2 "register_operand" "v")
15357 (const_int 1)))]
15358 "TARGET_AVX512ER"
15359 "vrsqrt28<ssescalarmodesuffix>\t{<round_saeonly_op3>%1, %2, %0|%0, %2, %1<round_saeonly_op3>}"
15360 [(set_attr "length_immediate" "1")
15361 (set_attr "type" "sse")
15362 (set_attr "prefix" "evex")
15363 (set_attr "mode" "<MODE>")])
15364
15365 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
15366 ;;
15367 ;; XOP instructions
15368 ;;
15369 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
15370
15371 (define_code_iterator xop_plus [plus ss_plus])
15372
15373 (define_code_attr macs [(plus "macs") (ss_plus "macss")])
15374 (define_code_attr madcs [(plus "madcs") (ss_plus "madcss")])
15375
15376 ;; XOP parallel integer multiply/add instructions.
15377
15378 (define_insn "xop_p<macs><ssemodesuffix><ssemodesuffix>"
15379 [(set (match_operand:VI24_128 0 "register_operand" "=x")
15380 (xop_plus:VI24_128
15381 (mult:VI24_128
15382 (match_operand:VI24_128 1 "nonimmediate_operand" "%x")
15383 (match_operand:VI24_128 2 "nonimmediate_operand" "xm"))
15384 (match_operand:VI24_128 3 "register_operand" "x")))]
15385 "TARGET_XOP"
15386 "vp<macs><ssemodesuffix><ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
15387 [(set_attr "type" "ssemuladd")
15388 (set_attr "mode" "TI")])
15389
15390 (define_insn "xop_p<macs>dql"
15391 [(set (match_operand:V2DI 0 "register_operand" "=x")
15392 (xop_plus:V2DI
15393 (mult:V2DI
15394 (sign_extend:V2DI
15395 (vec_select:V2SI
15396 (match_operand:V4SI 1 "nonimmediate_operand" "%x")
15397 (parallel [(const_int 0) (const_int 2)])))
15398 (sign_extend:V2DI
15399 (vec_select:V2SI
15400 (match_operand:V4SI 2 "nonimmediate_operand" "xm")
15401 (parallel [(const_int 0) (const_int 2)]))))
15402 (match_operand:V2DI 3 "register_operand" "x")))]
15403 "TARGET_XOP"
15404 "vp<macs>dql\t{%3, %2, %1, %0|%0, %1, %2, %3}"
15405 [(set_attr "type" "ssemuladd")
15406 (set_attr "mode" "TI")])
15407
15408 (define_insn "xop_p<macs>dqh"
15409 [(set (match_operand:V2DI 0 "register_operand" "=x")
15410 (xop_plus:V2DI
15411 (mult:V2DI
15412 (sign_extend:V2DI
15413 (vec_select:V2SI
15414 (match_operand:V4SI 1 "nonimmediate_operand" "%x")
15415 (parallel [(const_int 1) (const_int 3)])))
15416 (sign_extend:V2DI
15417 (vec_select:V2SI
15418 (match_operand:V4SI 2 "nonimmediate_operand" "xm")
15419 (parallel [(const_int 1) (const_int 3)]))))
15420 (match_operand:V2DI 3 "register_operand" "x")))]
15421 "TARGET_XOP"
15422 "vp<macs>dqh\t{%3, %2, %1, %0|%0, %1, %2, %3}"
15423 [(set_attr "type" "ssemuladd")
15424 (set_attr "mode" "TI")])
15425
15426 ;; XOP parallel integer multiply/add instructions for the intrinisics
15427 (define_insn "xop_p<macs>wd"
15428 [(set (match_operand:V4SI 0 "register_operand" "=x")
15429 (xop_plus:V4SI
15430 (mult:V4SI
15431 (sign_extend:V4SI
15432 (vec_select:V4HI
15433 (match_operand:V8HI 1 "nonimmediate_operand" "%x")
15434 (parallel [(const_int 1) (const_int 3)
15435 (const_int 5) (const_int 7)])))
15436 (sign_extend:V4SI
15437 (vec_select:V4HI
15438 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
15439 (parallel [(const_int 1) (const_int 3)
15440 (const_int 5) (const_int 7)]))))
15441 (match_operand:V4SI 3 "register_operand" "x")))]
15442 "TARGET_XOP"
15443 "vp<macs>wd\t{%3, %2, %1, %0|%0, %1, %2, %3}"
15444 [(set_attr "type" "ssemuladd")
15445 (set_attr "mode" "TI")])
15446
15447 (define_insn "xop_p<madcs>wd"
15448 [(set (match_operand:V4SI 0 "register_operand" "=x")
15449 (xop_plus:V4SI
15450 (plus:V4SI
15451 (mult:V4SI
15452 (sign_extend:V4SI
15453 (vec_select:V4HI
15454 (match_operand:V8HI 1 "nonimmediate_operand" "%x")
15455 (parallel [(const_int 0) (const_int 2)
15456 (const_int 4) (const_int 6)])))
15457 (sign_extend:V4SI
15458 (vec_select:V4HI
15459 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
15460 (parallel [(const_int 0) (const_int 2)
15461 (const_int 4) (const_int 6)]))))
15462 (mult:V4SI
15463 (sign_extend:V4SI
15464 (vec_select:V4HI
15465 (match_dup 1)
15466 (parallel [(const_int 1) (const_int 3)
15467 (const_int 5) (const_int 7)])))
15468 (sign_extend:V4SI
15469 (vec_select:V4HI
15470 (match_dup 2)
15471 (parallel [(const_int 1) (const_int 3)
15472 (const_int 5) (const_int 7)])))))
15473 (match_operand:V4SI 3 "register_operand" "x")))]
15474 "TARGET_XOP"
15475 "vp<madcs>wd\t{%3, %2, %1, %0|%0, %1, %2, %3}"
15476 [(set_attr "type" "ssemuladd")
15477 (set_attr "mode" "TI")])
15478
15479 ;; XOP parallel XMM conditional moves
15480 (define_insn "xop_pcmov_<mode><avxsizesuffix>"
15481 [(set (match_operand:V 0 "register_operand" "=x,x")
15482 (if_then_else:V
15483 (match_operand:V 3 "nonimmediate_operand" "x,m")
15484 (match_operand:V 1 "register_operand" "x,x")
15485 (match_operand:V 2 "nonimmediate_operand" "xm,x")))]
15486 "TARGET_XOP"
15487 "vpcmov\t{%3, %2, %1, %0|%0, %1, %2, %3}"
15488 [(set_attr "type" "sse4arg")])
15489
15490 ;; XOP horizontal add/subtract instructions
15491 (define_insn "xop_phadd<u>bw"
15492 [(set (match_operand:V8HI 0 "register_operand" "=x")
15493 (plus:V8HI
15494 (any_extend:V8HI
15495 (vec_select:V8QI
15496 (match_operand:V16QI 1 "nonimmediate_operand" "xm")
15497 (parallel [(const_int 0) (const_int 2)
15498 (const_int 4) (const_int 6)
15499 (const_int 8) (const_int 10)
15500 (const_int 12) (const_int 14)])))
15501 (any_extend:V8HI
15502 (vec_select:V8QI
15503 (match_dup 1)
15504 (parallel [(const_int 1) (const_int 3)
15505 (const_int 5) (const_int 7)
15506 (const_int 9) (const_int 11)
15507 (const_int 13) (const_int 15)])))))]
15508 "TARGET_XOP"
15509 "vphadd<u>bw\t{%1, %0|%0, %1}"
15510 [(set_attr "type" "sseiadd1")])
15511
15512 (define_insn "xop_phadd<u>bd"
15513 [(set (match_operand:V4SI 0 "register_operand" "=x")
15514 (plus:V4SI
15515 (plus:V4SI
15516 (any_extend:V4SI
15517 (vec_select:V4QI
15518 (match_operand:V16QI 1 "nonimmediate_operand" "xm")
15519 (parallel [(const_int 0) (const_int 4)
15520 (const_int 8) (const_int 12)])))
15521 (any_extend:V4SI
15522 (vec_select:V4QI
15523 (match_dup 1)
15524 (parallel [(const_int 1) (const_int 5)
15525 (const_int 9) (const_int 13)]))))
15526 (plus:V4SI
15527 (any_extend:V4SI
15528 (vec_select:V4QI
15529 (match_dup 1)
15530 (parallel [(const_int 2) (const_int 6)
15531 (const_int 10) (const_int 14)])))
15532 (any_extend:V4SI
15533 (vec_select:V4QI
15534 (match_dup 1)
15535 (parallel [(const_int 3) (const_int 7)
15536 (const_int 11) (const_int 15)]))))))]
15537 "TARGET_XOP"
15538 "vphadd<u>bd\t{%1, %0|%0, %1}"
15539 [(set_attr "type" "sseiadd1")])
15540
15541 (define_insn "xop_phadd<u>bq"
15542 [(set (match_operand:V2DI 0 "register_operand" "=x")
15543 (plus:V2DI
15544 (plus:V2DI
15545 (plus:V2DI
15546 (any_extend:V2DI
15547 (vec_select:V2QI
15548 (match_operand:V16QI 1 "nonimmediate_operand" "xm")
15549 (parallel [(const_int 0) (const_int 8)])))
15550 (any_extend:V2DI
15551 (vec_select:V2QI
15552 (match_dup 1)
15553 (parallel [(const_int 1) (const_int 9)]))))
15554 (plus:V2DI
15555 (any_extend:V2DI
15556 (vec_select:V2QI
15557 (match_dup 1)
15558 (parallel [(const_int 2) (const_int 10)])))
15559 (any_extend:V2DI
15560 (vec_select:V2QI
15561 (match_dup 1)
15562 (parallel [(const_int 3) (const_int 11)])))))
15563 (plus:V2DI
15564 (plus:V2DI
15565 (any_extend:V2DI
15566 (vec_select:V2QI
15567 (match_dup 1)
15568 (parallel [(const_int 4) (const_int 12)])))
15569 (any_extend:V2DI
15570 (vec_select:V2QI
15571 (match_dup 1)
15572 (parallel [(const_int 5) (const_int 13)]))))
15573 (plus:V2DI
15574 (any_extend:V2DI
15575 (vec_select:V2QI
15576 (match_dup 1)
15577 (parallel [(const_int 6) (const_int 14)])))
15578 (any_extend:V2DI
15579 (vec_select:V2QI
15580 (match_dup 1)
15581 (parallel [(const_int 7) (const_int 15)])))))))]
15582 "TARGET_XOP"
15583 "vphadd<u>bq\t{%1, %0|%0, %1}"
15584 [(set_attr "type" "sseiadd1")])
15585
15586 (define_insn "xop_phadd<u>wd"
15587 [(set (match_operand:V4SI 0 "register_operand" "=x")
15588 (plus:V4SI
15589 (any_extend:V4SI
15590 (vec_select:V4HI
15591 (match_operand:V8HI 1 "nonimmediate_operand" "xm")
15592 (parallel [(const_int 0) (const_int 2)
15593 (const_int 4) (const_int 6)])))
15594 (any_extend:V4SI
15595 (vec_select:V4HI
15596 (match_dup 1)
15597 (parallel [(const_int 1) (const_int 3)
15598 (const_int 5) (const_int 7)])))))]
15599 "TARGET_XOP"
15600 "vphadd<u>wd\t{%1, %0|%0, %1}"
15601 [(set_attr "type" "sseiadd1")])
15602
15603 (define_insn "xop_phadd<u>wq"
15604 [(set (match_operand:V2DI 0 "register_operand" "=x")
15605 (plus:V2DI
15606 (plus:V2DI
15607 (any_extend:V2DI
15608 (vec_select:V2HI
15609 (match_operand:V8HI 1 "nonimmediate_operand" "xm")
15610 (parallel [(const_int 0) (const_int 4)])))
15611 (any_extend:V2DI
15612 (vec_select:V2HI
15613 (match_dup 1)
15614 (parallel [(const_int 1) (const_int 5)]))))
15615 (plus:V2DI
15616 (any_extend:V2DI
15617 (vec_select:V2HI
15618 (match_dup 1)
15619 (parallel [(const_int 2) (const_int 6)])))
15620 (any_extend:V2DI
15621 (vec_select:V2HI
15622 (match_dup 1)
15623 (parallel [(const_int 3) (const_int 7)]))))))]
15624 "TARGET_XOP"
15625 "vphadd<u>wq\t{%1, %0|%0, %1}"
15626 [(set_attr "type" "sseiadd1")])
15627
15628 (define_insn "xop_phadd<u>dq"
15629 [(set (match_operand:V2DI 0 "register_operand" "=x")
15630 (plus:V2DI
15631 (any_extend:V2DI
15632 (vec_select:V2SI
15633 (match_operand:V4SI 1 "nonimmediate_operand" "xm")
15634 (parallel [(const_int 0) (const_int 2)])))
15635 (any_extend:V2DI
15636 (vec_select:V2SI
15637 (match_dup 1)
15638 (parallel [(const_int 1) (const_int 3)])))))]
15639 "TARGET_XOP"
15640 "vphadd<u>dq\t{%1, %0|%0, %1}"
15641 [(set_attr "type" "sseiadd1")])
15642
15643 (define_insn "xop_phsubbw"
15644 [(set (match_operand:V8HI 0 "register_operand" "=x")
15645 (minus:V8HI
15646 (sign_extend:V8HI
15647 (vec_select:V8QI
15648 (match_operand:V16QI 1 "nonimmediate_operand" "xm")
15649 (parallel [(const_int 0) (const_int 2)
15650 (const_int 4) (const_int 6)
15651 (const_int 8) (const_int 10)
15652 (const_int 12) (const_int 14)])))
15653 (sign_extend:V8HI
15654 (vec_select:V8QI
15655 (match_dup 1)
15656 (parallel [(const_int 1) (const_int 3)
15657 (const_int 5) (const_int 7)
15658 (const_int 9) (const_int 11)
15659 (const_int 13) (const_int 15)])))))]
15660 "TARGET_XOP"
15661 "vphsubbw\t{%1, %0|%0, %1}"
15662 [(set_attr "type" "sseiadd1")])
15663
15664 (define_insn "xop_phsubwd"
15665 [(set (match_operand:V4SI 0 "register_operand" "=x")
15666 (minus:V4SI
15667 (sign_extend:V4SI
15668 (vec_select:V4HI
15669 (match_operand:V8HI 1 "nonimmediate_operand" "xm")
15670 (parallel [(const_int 0) (const_int 2)
15671 (const_int 4) (const_int 6)])))
15672 (sign_extend:V4SI
15673 (vec_select:V4HI
15674 (match_dup 1)
15675 (parallel [(const_int 1) (const_int 3)
15676 (const_int 5) (const_int 7)])))))]
15677 "TARGET_XOP"
15678 "vphsubwd\t{%1, %0|%0, %1}"
15679 [(set_attr "type" "sseiadd1")])
15680
15681 (define_insn "xop_phsubdq"
15682 [(set (match_operand:V2DI 0 "register_operand" "=x")
15683 (minus:V2DI
15684 (sign_extend:V2DI
15685 (vec_select:V2SI
15686 (match_operand:V4SI 1 "nonimmediate_operand" "xm")
15687 (parallel [(const_int 0) (const_int 2)])))
15688 (sign_extend:V2DI
15689 (vec_select:V2SI
15690 (match_dup 1)
15691 (parallel [(const_int 1) (const_int 3)])))))]
15692 "TARGET_XOP"
15693 "vphsubdq\t{%1, %0|%0, %1}"
15694 [(set_attr "type" "sseiadd1")])
15695
15696 ;; XOP permute instructions
15697 (define_insn "xop_pperm"
15698 [(set (match_operand:V16QI 0 "register_operand" "=x,x")
15699 (unspec:V16QI
15700 [(match_operand:V16QI 1 "register_operand" "x,x")
15701 (match_operand:V16QI 2 "nonimmediate_operand" "x,m")
15702 (match_operand:V16QI 3 "nonimmediate_operand" "xm,x")]
15703 UNSPEC_XOP_PERMUTE))]
15704 "TARGET_XOP && !(MEM_P (operands[2]) && MEM_P (operands[3]))"
15705 "vpperm\t{%3, %2, %1, %0|%0, %1, %2, %3}"
15706 [(set_attr "type" "sse4arg")
15707 (set_attr "mode" "TI")])
15708
15709 ;; XOP pack instructions that combine two vectors into a smaller vector
15710 (define_insn "xop_pperm_pack_v2di_v4si"
15711 [(set (match_operand:V4SI 0 "register_operand" "=x,x")
15712 (vec_concat:V4SI
15713 (truncate:V2SI
15714 (match_operand:V2DI 1 "register_operand" "x,x"))
15715 (truncate:V2SI
15716 (match_operand:V2DI 2 "nonimmediate_operand" "x,m"))))
15717 (use (match_operand:V16QI 3 "nonimmediate_operand" "xm,x"))]
15718 "TARGET_XOP && !(MEM_P (operands[2]) && MEM_P (operands[3]))"
15719 "vpperm\t{%3, %2, %1, %0|%0, %1, %2, %3}"
15720 [(set_attr "type" "sse4arg")
15721 (set_attr "mode" "TI")])
15722
15723 (define_insn "xop_pperm_pack_v4si_v8hi"
15724 [(set (match_operand:V8HI 0 "register_operand" "=x,x")
15725 (vec_concat:V8HI
15726 (truncate:V4HI
15727 (match_operand:V4SI 1 "register_operand" "x,x"))
15728 (truncate:V4HI
15729 (match_operand:V4SI 2 "nonimmediate_operand" "x,m"))))
15730 (use (match_operand:V16QI 3 "nonimmediate_operand" "xm,x"))]
15731 "TARGET_XOP && !(MEM_P (operands[2]) && MEM_P (operands[3]))"
15732 "vpperm\t{%3, %2, %1, %0|%0, %1, %2, %3}"
15733 [(set_attr "type" "sse4arg")
15734 (set_attr "mode" "TI")])
15735
15736 (define_insn "xop_pperm_pack_v8hi_v16qi"
15737 [(set (match_operand:V16QI 0 "register_operand" "=x,x")
15738 (vec_concat:V16QI
15739 (truncate:V8QI
15740 (match_operand:V8HI 1 "register_operand" "x,x"))
15741 (truncate:V8QI
15742 (match_operand:V8HI 2 "nonimmediate_operand" "x,m"))))
15743 (use (match_operand:V16QI 3 "nonimmediate_operand" "xm,x"))]
15744 "TARGET_XOP && !(MEM_P (operands[2]) && MEM_P (operands[3]))"
15745 "vpperm\t{%3, %2, %1, %0|%0, %1, %2, %3}"
15746 [(set_attr "type" "sse4arg")
15747 (set_attr "mode" "TI")])
15748
15749 ;; XOP packed rotate instructions
15750 (define_expand "rotl<mode>3"
15751 [(set (match_operand:VI_128 0 "register_operand")
15752 (rotate:VI_128
15753 (match_operand:VI_128 1 "nonimmediate_operand")
15754 (match_operand:SI 2 "general_operand")))]
15755 "TARGET_XOP"
15756 {
15757 /* If we were given a scalar, convert it to parallel */
15758 if (! const_0_to_<sserotatemax>_operand (operands[2], SImode))
15759 {
15760 rtvec vs = rtvec_alloc (<ssescalarnum>);
15761 rtx par = gen_rtx_PARALLEL (<MODE>mode, vs);
15762 rtx reg = gen_reg_rtx (<MODE>mode);
15763 rtx op2 = operands[2];
15764 int i;
15765
15766 if (GET_MODE (op2) != <ssescalarmode>mode)
15767 {
15768 op2 = gen_reg_rtx (<ssescalarmode>mode);
15769 convert_move (op2, operands[2], false);
15770 }
15771
15772 for (i = 0; i < <ssescalarnum>; i++)
15773 RTVEC_ELT (vs, i) = op2;
15774
15775 emit_insn (gen_vec_init<mode> (reg, par));
15776 emit_insn (gen_xop_vrotl<mode>3 (operands[0], operands[1], reg));
15777 DONE;
15778 }
15779 })
15780
15781 (define_expand "rotr<mode>3"
15782 [(set (match_operand:VI_128 0 "register_operand")
15783 (rotatert:VI_128
15784 (match_operand:VI_128 1 "nonimmediate_operand")
15785 (match_operand:SI 2 "general_operand")))]
15786 "TARGET_XOP"
15787 {
15788 /* If we were given a scalar, convert it to parallel */
15789 if (! const_0_to_<sserotatemax>_operand (operands[2], SImode))
15790 {
15791 rtvec vs = rtvec_alloc (<ssescalarnum>);
15792 rtx par = gen_rtx_PARALLEL (<MODE>mode, vs);
15793 rtx neg = gen_reg_rtx (<MODE>mode);
15794 rtx reg = gen_reg_rtx (<MODE>mode);
15795 rtx op2 = operands[2];
15796 int i;
15797
15798 if (GET_MODE (op2) != <ssescalarmode>mode)
15799 {
15800 op2 = gen_reg_rtx (<ssescalarmode>mode);
15801 convert_move (op2, operands[2], false);
15802 }
15803
15804 for (i = 0; i < <ssescalarnum>; i++)
15805 RTVEC_ELT (vs, i) = op2;
15806
15807 emit_insn (gen_vec_init<mode> (reg, par));
15808 emit_insn (gen_neg<mode>2 (neg, reg));
15809 emit_insn (gen_xop_vrotl<mode>3 (operands[0], operands[1], neg));
15810 DONE;
15811 }
15812 })
15813
15814 (define_insn "xop_rotl<mode>3"
15815 [(set (match_operand:VI_128 0 "register_operand" "=x")
15816 (rotate:VI_128
15817 (match_operand:VI_128 1 "nonimmediate_operand" "xm")
15818 (match_operand:SI 2 "const_0_to_<sserotatemax>_operand" "n")))]
15819 "TARGET_XOP"
15820 "vprot<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
15821 [(set_attr "type" "sseishft")
15822 (set_attr "length_immediate" "1")
15823 (set_attr "mode" "TI")])
15824
15825 (define_insn "xop_rotr<mode>3"
15826 [(set (match_operand:VI_128 0 "register_operand" "=x")
15827 (rotatert:VI_128
15828 (match_operand:VI_128 1 "nonimmediate_operand" "xm")
15829 (match_operand:SI 2 "const_0_to_<sserotatemax>_operand" "n")))]
15830 "TARGET_XOP"
15831 {
15832 operands[3]
15833 = GEN_INT (GET_MODE_BITSIZE (<ssescalarmode>mode) - INTVAL (operands[2]));
15834 return \"vprot<ssemodesuffix>\t{%3, %1, %0|%0, %1, %3}\";
15835 }
15836 [(set_attr "type" "sseishft")
15837 (set_attr "length_immediate" "1")
15838 (set_attr "mode" "TI")])
15839
15840 (define_expand "vrotr<mode>3"
15841 [(match_operand:VI_128 0 "register_operand")
15842 (match_operand:VI_128 1 "register_operand")
15843 (match_operand:VI_128 2 "register_operand")]
15844 "TARGET_XOP"
15845 {
15846 rtx reg = gen_reg_rtx (<MODE>mode);
15847 emit_insn (gen_neg<mode>2 (reg, operands[2]));
15848 emit_insn (gen_xop_vrotl<mode>3 (operands[0], operands[1], reg));
15849 DONE;
15850 })
15851
15852 (define_expand "vrotl<mode>3"
15853 [(match_operand:VI_128 0 "register_operand")
15854 (match_operand:VI_128 1 "register_operand")
15855 (match_operand:VI_128 2 "register_operand")]
15856 "TARGET_XOP"
15857 {
15858 emit_insn (gen_xop_vrotl<mode>3 (operands[0], operands[1], operands[2]));
15859 DONE;
15860 })
15861
15862 (define_insn "xop_vrotl<mode>3"
15863 [(set (match_operand:VI_128 0 "register_operand" "=x,x")
15864 (if_then_else:VI_128
15865 (ge:VI_128
15866 (match_operand:VI_128 2 "nonimmediate_operand" "x,m")
15867 (const_int 0))
15868 (rotate:VI_128
15869 (match_operand:VI_128 1 "nonimmediate_operand" "xm,x")
15870 (match_dup 2))
15871 (rotatert:VI_128
15872 (match_dup 1)
15873 (neg:VI_128 (match_dup 2)))))]
15874 "TARGET_XOP && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
15875 "vprot<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
15876 [(set_attr "type" "sseishft")
15877 (set_attr "prefix_data16" "0")
15878 (set_attr "prefix_extra" "2")
15879 (set_attr "mode" "TI")])
15880
15881 ;; XOP packed shift instructions.
15882 (define_expand "vlshr<mode>3"
15883 [(set (match_operand:VI12_128 0 "register_operand")
15884 (lshiftrt:VI12_128
15885 (match_operand:VI12_128 1 "register_operand")
15886 (match_operand:VI12_128 2 "nonimmediate_operand")))]
15887 "TARGET_XOP"
15888 {
15889 rtx neg = gen_reg_rtx (<MODE>mode);
15890 emit_insn (gen_neg<mode>2 (neg, operands[2]));
15891 emit_insn (gen_xop_shl<mode>3 (operands[0], operands[1], neg));
15892 DONE;
15893 })
15894
15895 (define_expand "vlshr<mode>3"
15896 [(set (match_operand:VI48_128 0 "register_operand")
15897 (lshiftrt:VI48_128
15898 (match_operand:VI48_128 1 "register_operand")
15899 (match_operand:VI48_128 2 "nonimmediate_operand")))]
15900 "TARGET_AVX2 || TARGET_XOP"
15901 {
15902 if (!TARGET_AVX2)
15903 {
15904 rtx neg = gen_reg_rtx (<MODE>mode);
15905 emit_insn (gen_neg<mode>2 (neg, operands[2]));
15906 emit_insn (gen_xop_shl<mode>3 (operands[0], operands[1], neg));
15907 DONE;
15908 }
15909 })
15910
15911 (define_expand "vlshr<mode>3"
15912 [(set (match_operand:VI48_512 0 "register_operand")
15913 (lshiftrt:VI48_512
15914 (match_operand:VI48_512 1 "register_operand")
15915 (match_operand:VI48_512 2 "nonimmediate_operand")))]
15916 "TARGET_AVX512F")
15917
15918 (define_expand "vlshr<mode>3"
15919 [(set (match_operand:VI48_256 0 "register_operand")
15920 (lshiftrt:VI48_256
15921 (match_operand:VI48_256 1 "register_operand")
15922 (match_operand:VI48_256 2 "nonimmediate_operand")))]
15923 "TARGET_AVX2")
15924
15925 (define_expand "vashr<mode>3<mask_name>"
15926 [(set (match_operand:VI12_128 0 "register_operand")
15927 (ashiftrt:VI12_128
15928 (match_operand:VI12_128 1 "register_operand")
15929 (match_operand:VI12_128 2 "nonimmediate_operand")))]
15930 "TARGET_XOP || (TARGET_AVX512BW && TARGET_AVX512VL)"
15931 {
15932 if (TARGET_XOP)
15933 {
15934 rtx neg = gen_reg_rtx (<MODE>mode);
15935 emit_insn (gen_neg<mode>2 (neg, operands[2]));
15936 emit_insn (gen_xop_sha<mode>3 (operands[0], operands[1], neg));
15937 DONE;
15938 }
15939 })
15940
15941 (define_expand "vashrv2di3<mask_name>"
15942 [(set (match_operand:V2DI 0 "register_operand")
15943 (ashiftrt:V2DI
15944 (match_operand:V2DI 1 "register_operand")
15945 (match_operand:V2DI 2 "nonimmediate_operand")))]
15946 "TARGET_XOP || TARGET_AVX512VL"
15947 {
15948 if (TARGET_XOP)
15949 {
15950 rtx neg = gen_reg_rtx (V2DImode);
15951 emit_insn (gen_negv2di2 (neg, operands[2]));
15952 emit_insn (gen_xop_shav2di3 (operands[0], operands[1], neg));
15953 DONE;
15954 }
15955 })
15956
15957 (define_expand "vashrv4si3"
15958 [(set (match_operand:V4SI 0 "register_operand")
15959 (ashiftrt:V4SI (match_operand:V4SI 1 "register_operand")
15960 (match_operand:V4SI 2 "nonimmediate_operand")))]
15961 "TARGET_AVX2 || TARGET_XOP"
15962 {
15963 if (!TARGET_AVX2)
15964 {
15965 rtx neg = gen_reg_rtx (V4SImode);
15966 emit_insn (gen_negv4si2 (neg, operands[2]));
15967 emit_insn (gen_xop_shav4si3 (operands[0], operands[1], neg));
15968 DONE;
15969 }
15970 })
15971
15972 (define_expand "vashrv16si3"
15973 [(set (match_operand:V16SI 0 "register_operand")
15974 (ashiftrt:V16SI (match_operand:V16SI 1 "register_operand")
15975 (match_operand:V16SI 2 "nonimmediate_operand")))]
15976 "TARGET_AVX512F")
15977
15978 (define_expand "vashrv8si3"
15979 [(set (match_operand:V8SI 0 "register_operand")
15980 (ashiftrt:V8SI (match_operand:V8SI 1 "register_operand")
15981 (match_operand:V8SI 2 "nonimmediate_operand")))]
15982 "TARGET_AVX2")
15983
15984 (define_expand "vashl<mode>3"
15985 [(set (match_operand:VI12_128 0 "register_operand")
15986 (ashift:VI12_128
15987 (match_operand:VI12_128 1 "register_operand")
15988 (match_operand:VI12_128 2 "nonimmediate_operand")))]
15989 "TARGET_XOP"
15990 {
15991 emit_insn (gen_xop_sha<mode>3 (operands[0], operands[1], operands[2]));
15992 DONE;
15993 })
15994
15995 (define_expand "vashl<mode>3"
15996 [(set (match_operand:VI48_128 0 "register_operand")
15997 (ashift:VI48_128
15998 (match_operand:VI48_128 1 "register_operand")
15999 (match_operand:VI48_128 2 "nonimmediate_operand")))]
16000 "TARGET_AVX2 || TARGET_XOP"
16001 {
16002 if (!TARGET_AVX2)
16003 {
16004 operands[2] = force_reg (<MODE>mode, operands[2]);
16005 emit_insn (gen_xop_sha<mode>3 (operands[0], operands[1], operands[2]));
16006 DONE;
16007 }
16008 })
16009
16010 (define_expand "vashl<mode>3"
16011 [(set (match_operand:VI48_512 0 "register_operand")
16012 (ashift:VI48_512
16013 (match_operand:VI48_512 1 "register_operand")
16014 (match_operand:VI48_512 2 "nonimmediate_operand")))]
16015 "TARGET_AVX512F")
16016
16017 (define_expand "vashl<mode>3"
16018 [(set (match_operand:VI48_256 0 "register_operand")
16019 (ashift:VI48_256
16020 (match_operand:VI48_256 1 "register_operand")
16021 (match_operand:VI48_256 2 "nonimmediate_operand")))]
16022 "TARGET_AVX2")
16023
16024 (define_insn "xop_sha<mode>3"
16025 [(set (match_operand:VI_128 0 "register_operand" "=x,x")
16026 (if_then_else:VI_128
16027 (ge:VI_128
16028 (match_operand:VI_128 2 "nonimmediate_operand" "x,m")
16029 (const_int 0))
16030 (ashift:VI_128
16031 (match_operand:VI_128 1 "nonimmediate_operand" "xm,x")
16032 (match_dup 2))
16033 (ashiftrt:VI_128
16034 (match_dup 1)
16035 (neg:VI_128 (match_dup 2)))))]
16036 "TARGET_XOP && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
16037 "vpsha<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
16038 [(set_attr "type" "sseishft")
16039 (set_attr "prefix_data16" "0")
16040 (set_attr "prefix_extra" "2")
16041 (set_attr "mode" "TI")])
16042
16043 (define_insn "xop_shl<mode>3"
16044 [(set (match_operand:VI_128 0 "register_operand" "=x,x")
16045 (if_then_else:VI_128
16046 (ge:VI_128
16047 (match_operand:VI_128 2 "nonimmediate_operand" "x,m")
16048 (const_int 0))
16049 (ashift:VI_128
16050 (match_operand:VI_128 1 "nonimmediate_operand" "xm,x")
16051 (match_dup 2))
16052 (lshiftrt:VI_128
16053 (match_dup 1)
16054 (neg:VI_128 (match_dup 2)))))]
16055 "TARGET_XOP && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
16056 "vpshl<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
16057 [(set_attr "type" "sseishft")
16058 (set_attr "prefix_data16" "0")
16059 (set_attr "prefix_extra" "2")
16060 (set_attr "mode" "TI")])
16061
16062 (define_expand "<shift_insn><mode>3"
16063 [(set (match_operand:VI1_AVX512 0 "register_operand")
16064 (any_shift:VI1_AVX512
16065 (match_operand:VI1_AVX512 1 "register_operand")
16066 (match_operand:SI 2 "nonmemory_operand")))]
16067 "TARGET_SSE2"
16068 {
16069 if (TARGET_XOP && <MODE>mode == V16QImode)
16070 {
16071 bool negate = false;
16072 rtx (*gen) (rtx, rtx, rtx);
16073 rtx tmp, par;
16074 int i;
16075
16076 if (<CODE> != ASHIFT)
16077 {
16078 if (CONST_INT_P (operands[2]))
16079 operands[2] = GEN_INT (-INTVAL (operands[2]));
16080 else
16081 negate = true;
16082 }
16083 par = gen_rtx_PARALLEL (V16QImode, rtvec_alloc (16));
16084 for (i = 0; i < 16; i++)
16085 XVECEXP (par, 0, i) = operands[2];
16086
16087 tmp = gen_reg_rtx (V16QImode);
16088 emit_insn (gen_vec_initv16qi (tmp, par));
16089
16090 if (negate)
16091 emit_insn (gen_negv16qi2 (tmp, tmp));
16092
16093 gen = (<CODE> == LSHIFTRT ? gen_xop_shlv16qi3 : gen_xop_shav16qi3);
16094 emit_insn (gen (operands[0], operands[1], tmp));
16095 }
16096 else
16097 ix86_expand_vecop_qihi (<CODE>, operands[0], operands[1], operands[2]);
16098 DONE;
16099 })
16100
16101 (define_expand "ashrv2di3"
16102 [(set (match_operand:V2DI 0 "register_operand")
16103 (ashiftrt:V2DI
16104 (match_operand:V2DI 1 "register_operand")
16105 (match_operand:DI 2 "nonmemory_operand")))]
16106 "TARGET_XOP || TARGET_AVX512VL"
16107 {
16108 if (!TARGET_AVX512VL)
16109 {
16110 rtx reg = gen_reg_rtx (V2DImode);
16111 rtx par;
16112 bool negate = false;
16113 int i;
16114
16115 if (CONST_INT_P (operands[2]))
16116 operands[2] = GEN_INT (-INTVAL (operands[2]));
16117 else
16118 negate = true;
16119
16120 par = gen_rtx_PARALLEL (V2DImode, rtvec_alloc (2));
16121 for (i = 0; i < 2; i++)
16122 XVECEXP (par, 0, i) = operands[2];
16123
16124 emit_insn (gen_vec_initv2di (reg, par));
16125
16126 if (negate)
16127 emit_insn (gen_negv2di2 (reg, reg));
16128
16129 emit_insn (gen_xop_shav2di3 (operands[0], operands[1], reg));
16130 DONE;
16131 }
16132 })
16133
16134 ;; XOP FRCZ support
16135 (define_insn "xop_frcz<mode>2"
16136 [(set (match_operand:FMAMODE 0 "register_operand" "=x")
16137 (unspec:FMAMODE
16138 [(match_operand:FMAMODE 1 "nonimmediate_operand" "xm")]
16139 UNSPEC_FRCZ))]
16140 "TARGET_XOP"
16141 "vfrcz<ssemodesuffix>\t{%1, %0|%0, %1}"
16142 [(set_attr "type" "ssecvt1")
16143 (set_attr "mode" "<MODE>")])
16144
16145 (define_expand "xop_vmfrcz<mode>2"
16146 [(set (match_operand:VF_128 0 "register_operand")
16147 (vec_merge:VF_128
16148 (unspec:VF_128
16149 [(match_operand:VF_128 1 "nonimmediate_operand")]
16150 UNSPEC_FRCZ)
16151 (match_dup 2)
16152 (const_int 1)))]
16153 "TARGET_XOP"
16154 "operands[2] = CONST0_RTX (<MODE>mode);")
16155
16156 (define_insn "*xop_vmfrcz<mode>2"
16157 [(set (match_operand:VF_128 0 "register_operand" "=x")
16158 (vec_merge:VF_128
16159 (unspec:VF_128
16160 [(match_operand:VF_128 1 "nonimmediate_operand" "xm")]
16161 UNSPEC_FRCZ)
16162 (match_operand:VF_128 2 "const0_operand")
16163 (const_int 1)))]
16164 "TARGET_XOP"
16165 "vfrcz<ssescalarmodesuffix>\t{%1, %0|%0, %<iptr>1}"
16166 [(set_attr "type" "ssecvt1")
16167 (set_attr "mode" "<MODE>")])
16168
16169 (define_insn "xop_maskcmp<mode>3"
16170 [(set (match_operand:VI_128 0 "register_operand" "=x")
16171 (match_operator:VI_128 1 "ix86_comparison_int_operator"
16172 [(match_operand:VI_128 2 "register_operand" "x")
16173 (match_operand:VI_128 3 "nonimmediate_operand" "xm")]))]
16174 "TARGET_XOP"
16175 "vpcom%Y1<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}"
16176 [(set_attr "type" "sse4arg")
16177 (set_attr "prefix_data16" "0")
16178 (set_attr "prefix_rep" "0")
16179 (set_attr "prefix_extra" "2")
16180 (set_attr "length_immediate" "1")
16181 (set_attr "mode" "TI")])
16182
16183 (define_insn "xop_maskcmp_uns<mode>3"
16184 [(set (match_operand:VI_128 0 "register_operand" "=x")
16185 (match_operator:VI_128 1 "ix86_comparison_uns_operator"
16186 [(match_operand:VI_128 2 "register_operand" "x")
16187 (match_operand:VI_128 3 "nonimmediate_operand" "xm")]))]
16188 "TARGET_XOP"
16189 "vpcom%Y1u<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}"
16190 [(set_attr "type" "ssecmp")
16191 (set_attr "prefix_data16" "0")
16192 (set_attr "prefix_rep" "0")
16193 (set_attr "prefix_extra" "2")
16194 (set_attr "length_immediate" "1")
16195 (set_attr "mode" "TI")])
16196
16197 ;; Version of pcom*u* that is called from the intrinsics that allows pcomequ*
16198 ;; and pcomneu* not to be converted to the signed ones in case somebody needs
16199 ;; the exact instruction generated for the intrinsic.
16200 (define_insn "xop_maskcmp_uns2<mode>3"
16201 [(set (match_operand:VI_128 0 "register_operand" "=x")
16202 (unspec:VI_128
16203 [(match_operator:VI_128 1 "ix86_comparison_uns_operator"
16204 [(match_operand:VI_128 2 "register_operand" "x")
16205 (match_operand:VI_128 3 "nonimmediate_operand" "xm")])]
16206 UNSPEC_XOP_UNSIGNED_CMP))]
16207 "TARGET_XOP"
16208 "vpcom%Y1u<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}"
16209 [(set_attr "type" "ssecmp")
16210 (set_attr "prefix_data16" "0")
16211 (set_attr "prefix_extra" "2")
16212 (set_attr "length_immediate" "1")
16213 (set_attr "mode" "TI")])
16214
16215 ;; Pcomtrue and pcomfalse support. These are useless instructions, but are
16216 ;; being added here to be complete.
16217 (define_insn "xop_pcom_tf<mode>3"
16218 [(set (match_operand:VI_128 0 "register_operand" "=x")
16219 (unspec:VI_128
16220 [(match_operand:VI_128 1 "register_operand" "x")
16221 (match_operand:VI_128 2 "nonimmediate_operand" "xm")
16222 (match_operand:SI 3 "const_int_operand" "n")]
16223 UNSPEC_XOP_TRUEFALSE))]
16224 "TARGET_XOP"
16225 {
16226 return ((INTVAL (operands[3]) != 0)
16227 ? "vpcomtrue<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
16228 : "vpcomfalse<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}");
16229 }
16230 [(set_attr "type" "ssecmp")
16231 (set_attr "prefix_data16" "0")
16232 (set_attr "prefix_extra" "2")
16233 (set_attr "length_immediate" "1")
16234 (set_attr "mode" "TI")])
16235
16236 (define_insn "xop_vpermil2<mode>3"
16237 [(set (match_operand:VF_128_256 0 "register_operand" "=x")
16238 (unspec:VF_128_256
16239 [(match_operand:VF_128_256 1 "register_operand" "x")
16240 (match_operand:VF_128_256 2 "nonimmediate_operand" "%x")
16241 (match_operand:<sseintvecmode> 3 "nonimmediate_operand" "xm")
16242 (match_operand:SI 4 "const_0_to_3_operand" "n")]
16243 UNSPEC_VPERMIL2))]
16244 "TARGET_XOP"
16245 "vpermil2<ssemodesuffix>\t{%4, %3, %2, %1, %0|%0, %1, %2, %3, %4}"
16246 [(set_attr "type" "sse4arg")
16247 (set_attr "length_immediate" "1")
16248 (set_attr "mode" "<MODE>")])
16249
16250 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
16251
16252 (define_insn "aesenc"
16253 [(set (match_operand:V2DI 0 "register_operand" "=x,x")
16254 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0,x")
16255 (match_operand:V2DI 2 "nonimmediate_operand" "xm,xm")]
16256 UNSPEC_AESENC))]
16257 "TARGET_AES"
16258 "@
16259 aesenc\t{%2, %0|%0, %2}
16260 vaesenc\t{%2, %1, %0|%0, %1, %2}"
16261 [(set_attr "isa" "noavx,avx")
16262 (set_attr "type" "sselog1")
16263 (set_attr "prefix_extra" "1")
16264 (set_attr "prefix" "orig,vex")
16265 (set_attr "btver2_decode" "double,double")
16266 (set_attr "mode" "TI")])
16267
16268 (define_insn "aesenclast"
16269 [(set (match_operand:V2DI 0 "register_operand" "=x,x")
16270 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0,x")
16271 (match_operand:V2DI 2 "nonimmediate_operand" "xm,xm")]
16272 UNSPEC_AESENCLAST))]
16273 "TARGET_AES"
16274 "@
16275 aesenclast\t{%2, %0|%0, %2}
16276 vaesenclast\t{%2, %1, %0|%0, %1, %2}"
16277 [(set_attr "isa" "noavx,avx")
16278 (set_attr "type" "sselog1")
16279 (set_attr "prefix_extra" "1")
16280 (set_attr "prefix" "orig,vex")
16281 (set_attr "btver2_decode" "double,double")
16282 (set_attr "mode" "TI")])
16283
16284 (define_insn "aesdec"
16285 [(set (match_operand:V2DI 0 "register_operand" "=x,x")
16286 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0,x")
16287 (match_operand:V2DI 2 "nonimmediate_operand" "xm,xm")]
16288 UNSPEC_AESDEC))]
16289 "TARGET_AES"
16290 "@
16291 aesdec\t{%2, %0|%0, %2}
16292 vaesdec\t{%2, %1, %0|%0, %1, %2}"
16293 [(set_attr "isa" "noavx,avx")
16294 (set_attr "type" "sselog1")
16295 (set_attr "prefix_extra" "1")
16296 (set_attr "prefix" "orig,vex")
16297 (set_attr "btver2_decode" "double,double")
16298 (set_attr "mode" "TI")])
16299
16300 (define_insn "aesdeclast"
16301 [(set (match_operand:V2DI 0 "register_operand" "=x,x")
16302 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0,x")
16303 (match_operand:V2DI 2 "nonimmediate_operand" "xm,xm")]
16304 UNSPEC_AESDECLAST))]
16305 "TARGET_AES"
16306 "@
16307 aesdeclast\t{%2, %0|%0, %2}
16308 vaesdeclast\t{%2, %1, %0|%0, %1, %2}"
16309 [(set_attr "isa" "noavx,avx")
16310 (set_attr "type" "sselog1")
16311 (set_attr "prefix_extra" "1")
16312 (set_attr "prefix" "orig,vex")
16313 (set_attr "btver2_decode" "double,double")
16314 (set_attr "mode" "TI")])
16315
16316 (define_insn "aesimc"
16317 [(set (match_operand:V2DI 0 "register_operand" "=x")
16318 (unspec:V2DI [(match_operand:V2DI 1 "nonimmediate_operand" "xm")]
16319 UNSPEC_AESIMC))]
16320 "TARGET_AES"
16321 "%vaesimc\t{%1, %0|%0, %1}"
16322 [(set_attr "type" "sselog1")
16323 (set_attr "prefix_extra" "1")
16324 (set_attr "prefix" "maybe_vex")
16325 (set_attr "mode" "TI")])
16326
16327 (define_insn "aeskeygenassist"
16328 [(set (match_operand:V2DI 0 "register_operand" "=x")
16329 (unspec:V2DI [(match_operand:V2DI 1 "nonimmediate_operand" "xm")
16330 (match_operand:SI 2 "const_0_to_255_operand" "n")]
16331 UNSPEC_AESKEYGENASSIST))]
16332 "TARGET_AES"
16333 "%vaeskeygenassist\t{%2, %1, %0|%0, %1, %2}"
16334 [(set_attr "type" "sselog1")
16335 (set_attr "prefix_extra" "1")
16336 (set_attr "length_immediate" "1")
16337 (set_attr "prefix" "maybe_vex")
16338 (set_attr "mode" "TI")])
16339
16340 (define_insn "pclmulqdq"
16341 [(set (match_operand:V2DI 0 "register_operand" "=x,x")
16342 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0,x")
16343 (match_operand:V2DI 2 "nonimmediate_operand" "xm,xm")
16344 (match_operand:SI 3 "const_0_to_255_operand" "n,n")]
16345 UNSPEC_PCLMUL))]
16346 "TARGET_PCLMUL"
16347 "@
16348 pclmulqdq\t{%3, %2, %0|%0, %2, %3}
16349 vpclmulqdq\t{%3, %2, %1, %0|%0, %1, %2, %3}"
16350 [(set_attr "isa" "noavx,avx")
16351 (set_attr "type" "sselog1")
16352 (set_attr "prefix_extra" "1")
16353 (set_attr "length_immediate" "1")
16354 (set_attr "prefix" "orig,vex")
16355 (set_attr "mode" "TI")])
16356
16357 (define_expand "avx_vzeroall"
16358 [(match_par_dup 0 [(const_int 0)])]
16359 "TARGET_AVX"
16360 {
16361 int nregs = TARGET_64BIT ? 16 : 8;
16362 int regno;
16363
16364 operands[0] = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (nregs + 1));
16365
16366 XVECEXP (operands[0], 0, 0)
16367 = gen_rtx_UNSPEC_VOLATILE (VOIDmode, gen_rtvec (1, const0_rtx),
16368 UNSPECV_VZEROALL);
16369
16370 for (regno = 0; regno < nregs; regno++)
16371 XVECEXP (operands[0], 0, regno + 1)
16372 = gen_rtx_SET (VOIDmode,
16373 gen_rtx_REG (V8SImode, SSE_REGNO (regno)),
16374 CONST0_RTX (V8SImode));
16375 })
16376
16377 (define_insn "*avx_vzeroall"
16378 [(match_parallel 0 "vzeroall_operation"
16379 [(unspec_volatile [(const_int 0)] UNSPECV_VZEROALL)])]
16380 "TARGET_AVX"
16381 "vzeroall"
16382 [(set_attr "type" "sse")
16383 (set_attr "modrm" "0")
16384 (set_attr "memory" "none")
16385 (set_attr "prefix" "vex")
16386 (set_attr "btver2_decode" "vector")
16387 (set_attr "mode" "OI")])
16388
16389 ;; Clear the upper 128bits of AVX registers, equivalent to a NOP
16390 ;; if the upper 128bits are unused.
16391 (define_insn "avx_vzeroupper"
16392 [(unspec_volatile [(const_int 0)] UNSPECV_VZEROUPPER)]
16393 "TARGET_AVX"
16394 "vzeroupper"
16395 [(set_attr "type" "sse")
16396 (set_attr "modrm" "0")
16397 (set_attr "memory" "none")
16398 (set_attr "prefix" "vex")
16399 (set_attr "btver2_decode" "vector")
16400 (set_attr "mode" "OI")])
16401
16402 (define_insn "avx2_pbroadcast<mode>"
16403 [(set (match_operand:VI 0 "register_operand" "=x")
16404 (vec_duplicate:VI
16405 (vec_select:<ssescalarmode>
16406 (match_operand:<ssexmmmode> 1 "nonimmediate_operand" "xm")
16407 (parallel [(const_int 0)]))))]
16408 "TARGET_AVX2"
16409 "vpbroadcast<ssemodesuffix>\t{%1, %0|%0, %<iptr>1}"
16410 [(set_attr "type" "ssemov")
16411 (set_attr "prefix_extra" "1")
16412 (set_attr "prefix" "vex")
16413 (set_attr "mode" "<sseinsnmode>")])
16414
16415 (define_insn "avx2_pbroadcast<mode>_1"
16416 [(set (match_operand:VI_256 0 "register_operand" "=x,x")
16417 (vec_duplicate:VI_256
16418 (vec_select:<ssescalarmode>
16419 (match_operand:VI_256 1 "nonimmediate_operand" "m,x")
16420 (parallel [(const_int 0)]))))]
16421 "TARGET_AVX2"
16422 "@
16423 vpbroadcast<ssemodesuffix>\t{%1, %0|%0, %<iptr>1}
16424 vpbroadcast<ssemodesuffix>\t{%x1, %0|%0, %x1}"
16425 [(set_attr "type" "ssemov")
16426 (set_attr "prefix_extra" "1")
16427 (set_attr "prefix" "vex")
16428 (set_attr "mode" "<sseinsnmode>")])
16429
16430 (define_insn "<avx2_avx512>_permvar<mode><mask_name>"
16431 [(set (match_operand:VI48F_256_512 0 "register_operand" "=v")
16432 (unspec:VI48F_256_512
16433 [(match_operand:VI48F_256_512 1 "nonimmediate_operand" "vm")
16434 (match_operand:<sseintvecmode> 2 "register_operand" "v")]
16435 UNSPEC_VPERMVAR))]
16436 "TARGET_AVX2 && <mask_mode512bit_condition>"
16437 "vperm<ssemodesuffix>\t{%1, %2, %0<mask_operand3>|%0<mask_operand3>, %2, %1}"
16438 [(set_attr "type" "sselog")
16439 (set_attr "prefix" "<mask_prefix2>")
16440 (set_attr "mode" "<sseinsnmode>")])
16441
16442 (define_insn "<avx512>_permvar<mode><mask_name>"
16443 [(set (match_operand:VI2_AVX512VL 0 "register_operand" "=v")
16444 (unspec:VI2_AVX512VL
16445 [(match_operand:VI2_AVX512VL 1 "nonimmediate_operand" "vm")
16446 (match_operand:<sseintvecmode> 2 "register_operand" "v")]
16447 UNSPEC_VPERMVAR))]
16448 "TARGET_AVX512BW && <mask_mode512bit_condition>"
16449 "vperm<ssemodesuffix>\t{%1, %2, %0<mask_operand3>|%0<mask_operand3>, %2, %1}"
16450 [(set_attr "type" "sselog")
16451 (set_attr "prefix" "<mask_prefix2>")
16452 (set_attr "mode" "<sseinsnmode>")])
16453
16454 (define_expand "<avx2_avx512>_perm<mode>"
16455 [(match_operand:VI8F_256_512 0 "register_operand")
16456 (match_operand:VI8F_256_512 1 "nonimmediate_operand")
16457 (match_operand:SI 2 "const_0_to_255_operand")]
16458 "TARGET_AVX2"
16459 {
16460 int mask = INTVAL (operands[2]);
16461 emit_insn (gen_<avx2_avx512>_perm<mode>_1 (operands[0], operands[1],
16462 GEN_INT ((mask >> 0) & 3),
16463 GEN_INT ((mask >> 2) & 3),
16464 GEN_INT ((mask >> 4) & 3),
16465 GEN_INT ((mask >> 6) & 3)));
16466 DONE;
16467 })
16468
16469 (define_expand "<avx512>_perm<mode>_mask"
16470 [(match_operand:VI8F_256_512 0 "register_operand")
16471 (match_operand:VI8F_256_512 1 "nonimmediate_operand")
16472 (match_operand:SI 2 "const_0_to_255_operand")
16473 (match_operand:VI8F_256_512 3 "vector_move_operand")
16474 (match_operand:<avx512fmaskmode> 4 "register_operand")]
16475 "TARGET_AVX512F"
16476 {
16477 int mask = INTVAL (operands[2]);
16478 emit_insn (gen_<avx2_avx512>_perm<mode>_1_mask (operands[0], operands[1],
16479 GEN_INT ((mask >> 0) & 3),
16480 GEN_INT ((mask >> 2) & 3),
16481 GEN_INT ((mask >> 4) & 3),
16482 GEN_INT ((mask >> 6) & 3),
16483 operands[3], operands[4]));
16484 DONE;
16485 })
16486
16487 (define_insn "<avx2_avx512>_perm<mode>_1<mask_name>"
16488 [(set (match_operand:VI8F_256_512 0 "register_operand" "=v")
16489 (vec_select:VI8F_256_512
16490 (match_operand:VI8F_256_512 1 "nonimmediate_operand" "vm")
16491 (parallel [(match_operand 2 "const_0_to_3_operand")
16492 (match_operand 3 "const_0_to_3_operand")
16493 (match_operand 4 "const_0_to_3_operand")
16494 (match_operand 5 "const_0_to_3_operand")])))]
16495 "TARGET_AVX2 && <mask_mode512bit_condition>"
16496 {
16497 int mask = 0;
16498 mask |= INTVAL (operands[2]) << 0;
16499 mask |= INTVAL (operands[3]) << 2;
16500 mask |= INTVAL (operands[4]) << 4;
16501 mask |= INTVAL (operands[5]) << 6;
16502 operands[2] = GEN_INT (mask);
16503 return "vperm<ssemodesuffix>\t{%2, %1, %0<mask_operand6>|%0<mask_operand6>, %1, %2}";
16504 }
16505 [(set_attr "type" "sselog")
16506 (set_attr "prefix" "<mask_prefix2>")
16507 (set_attr "mode" "<sseinsnmode>")])
16508
16509 (define_insn "avx2_permv2ti"
16510 [(set (match_operand:V4DI 0 "register_operand" "=x")
16511 (unspec:V4DI
16512 [(match_operand:V4DI 1 "register_operand" "x")
16513 (match_operand:V4DI 2 "nonimmediate_operand" "xm")
16514 (match_operand:SI 3 "const_0_to_255_operand" "n")]
16515 UNSPEC_VPERMTI))]
16516 "TARGET_AVX2"
16517 "vperm2i128\t{%3, %2, %1, %0|%0, %1, %2, %3}"
16518 [(set_attr "type" "sselog")
16519 (set_attr "prefix" "vex")
16520 (set_attr "mode" "OI")])
16521
16522 (define_insn "avx2_vec_dupv4df"
16523 [(set (match_operand:V4DF 0 "register_operand" "=x")
16524 (vec_duplicate:V4DF
16525 (vec_select:DF
16526 (match_operand:V2DF 1 "register_operand" "x")
16527 (parallel [(const_int 0)]))))]
16528 "TARGET_AVX2"
16529 "vbroadcastsd\t{%1, %0|%0, %1}"
16530 [(set_attr "type" "sselog1")
16531 (set_attr "prefix" "vex")
16532 (set_attr "mode" "V4DF")])
16533
16534 (define_insn "<avx512>_vec_dup<mode><mask_name>"
16535 [(set (match_operand:V48_AVX512VL 0 "register_operand" "=v")
16536 (vec_duplicate:V48_AVX512VL
16537 (vec_select:<ssescalarmode>
16538 (match_operand:<ssexmmmode> 1 "nonimmediate_operand" "vm")
16539 (parallel [(const_int 0)]))))]
16540 "TARGET_AVX512F"
16541 "v<sseintprefix>broadcast<bcstscalarsuff>\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
16542 [(set_attr "type" "ssemov")
16543 (set_attr "prefix" "evex")
16544 (set_attr "mode" "<sseinsnmode>")])
16545
16546 (define_insn "<avx512>_vec_dup<mode><mask_name>"
16547 [(set (match_operand:VI12_AVX512VL 0 "register_operand" "=v")
16548 (vec_duplicate:VI12_AVX512VL
16549 (vec_select:<ssescalarmode>
16550 (match_operand:<ssexmmmode> 1 "nonimmediate_operand" "vm")
16551 (parallel [(const_int 0)]))))]
16552 "TARGET_AVX512BW"
16553 "vpbroadcast<bcstscalarsuff>\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
16554 [(set_attr "type" "ssemov")
16555 (set_attr "prefix" "evex")
16556 (set_attr "mode" "<sseinsnmode>")])
16557
16558 (define_insn "<mask_codefor>avx512f_broadcast<mode><mask_name>"
16559 [(set (match_operand:V16FI 0 "register_operand" "=v,v")
16560 (vec_duplicate:V16FI
16561 (match_operand:<ssexmmmode> 1 "nonimmediate_operand" "v,m")))]
16562 "TARGET_AVX512F"
16563 "@
16564 vshuf<shuffletype>32x4\t{$0x0, %g1, %g1, %0<mask_operand2>|%0<mask_operand2>, %g1, %g1, 0x0}
16565 vbroadcast<shuffletype>32x4\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
16566 [(set_attr "type" "ssemov")
16567 (set_attr "prefix" "evex")
16568 (set_attr "mode" "<sseinsnmode>")])
16569
16570 (define_insn "<mask_codefor>avx512f_broadcast<mode><mask_name>"
16571 [(set (match_operand:V8FI 0 "register_operand" "=v,v")
16572 (vec_duplicate:V8FI
16573 (match_operand:<ssehalfvecmode> 1 "nonimmediate_operand" "v,m")))]
16574 "TARGET_AVX512F"
16575 "@
16576 vshuf<shuffletype>64x2\t{$0x44, %g1, %g1, %0<mask_operand2>|%0<mask_operand2>, %g1, %g1, 0x44}
16577 vbroadcast<shuffletype>64x4\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
16578 [(set_attr "type" "ssemov")
16579 (set_attr "prefix" "evex")
16580 (set_attr "mode" "<sseinsnmode>")])
16581
16582 (define_insn "<mask_codefor><avx512>_vec_dup_gpr<mode><mask_name>"
16583 [(set (match_operand:VI12_AVX512VL 0 "register_operand" "=v")
16584 (vec_duplicate:VI12_AVX512VL
16585 (match_operand:<ssescalarmode> 1 "register_operand" "r")))]
16586 "TARGET_AVX512BW"
16587 "vpbroadcast<bcstscalarsuff>\t{%k1, %0<mask_operand2>|%0<mask_operand2>, %k1}"
16588 [(set_attr "type" "ssemov")
16589 (set_attr "prefix" "evex")
16590 (set_attr "mode" "<sseinsnmode>")])
16591
16592 (define_insn "<mask_codefor><avx512>_vec_dup_gpr<mode><mask_name>"
16593 [(set (match_operand:VI48_AVX512VL 0 "register_operand" "=v")
16594 (vec_duplicate:VI48_AVX512VL
16595 (match_operand:<ssescalarmode> 1 "register_operand" "r")))]
16596 "TARGET_AVX512F && (<ssescalarmode>mode != DImode || TARGET_64BIT)"
16597 {
16598 return "vpbroadcast<bcstscalarsuff>\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}";
16599 }
16600 [(set_attr "type" "ssemov")
16601 (set_attr "prefix" "evex")
16602 (set_attr "mode" "<sseinsnmode>")])
16603
16604 (define_insn "<mask_codefor><avx512>_vec_dup_mem<mode><mask_name>"
16605 [(set (match_operand:V48_AVX512VL 0 "register_operand" "=v")
16606 (vec_duplicate:V48_AVX512VL
16607 (match_operand:<ssescalarmode> 1 "nonimmediate_operand" "vm")))]
16608 "TARGET_AVX512F"
16609 "v<sseintprefix>broadcast<bcstscalarsuff>\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
16610 [(set_attr "type" "ssemov")
16611 (set_attr "prefix" "evex")
16612 (set_attr "mode" "<sseinsnmode>")])
16613
16614 (define_insn "<mask_codefor><avx512>_vec_dup_mem<mode><mask_name>"
16615 [(set (match_operand:VI12_AVX512VL 0 "register_operand" "=v")
16616 (vec_duplicate:VI12_AVX512VL
16617 (match_operand:<ssescalarmode> 1 "nonimmediate_operand" "vm")))]
16618 "TARGET_AVX512BW"
16619 "vpbroadcast<bcstscalarsuff>\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
16620 [(set_attr "type" "ssemov")
16621 (set_attr "prefix" "evex")
16622 (set_attr "mode" "<sseinsnmode>")])
16623
16624 (define_insn "avx2_vbroadcasti128_<mode>"
16625 [(set (match_operand:VI_256 0 "register_operand" "=x")
16626 (vec_concat:VI_256
16627 (match_operand:<ssehalfvecmode> 1 "memory_operand" "m")
16628 (match_dup 1)))]
16629 "TARGET_AVX2"
16630 "vbroadcasti128\t{%1, %0|%0, %1}"
16631 [(set_attr "type" "ssemov")
16632 (set_attr "prefix_extra" "1")
16633 (set_attr "prefix" "vex")
16634 (set_attr "mode" "OI")])
16635
16636 ;; Modes handled by AVX vec_dup patterns.
16637 (define_mode_iterator AVX_VEC_DUP_MODE
16638 [V8SI V8SF V4DI V4DF])
16639 ;; Modes handled by AVX2 vec_dup patterns.
16640 (define_mode_iterator AVX2_VEC_DUP_MODE
16641 [V32QI V16QI V16HI V8HI V8SI V4SI])
16642
16643 (define_insn "*vec_dup<mode>"
16644 [(set (match_operand:AVX2_VEC_DUP_MODE 0 "register_operand" "=x,x,x")
16645 (vec_duplicate:AVX2_VEC_DUP_MODE
16646 (match_operand:<ssescalarmode> 1 "nonimmediate_operand" "m,x,!r")))]
16647 "TARGET_AVX2"
16648 "@
16649 v<sseintprefix>broadcast<bcstscalarsuff>\t{%1, %0|%0, %1}
16650 v<sseintprefix>broadcast<bcstscalarsuff>\t{%x1, %0|%0, %x1}
16651 #"
16652 [(set_attr "type" "ssemov")
16653 (set_attr "prefix_extra" "1")
16654 (set_attr "prefix" "maybe_evex")
16655 (set_attr "mode" "<sseinsnmode>")])
16656
16657 (define_insn "vec_dup<mode>"
16658 [(set (match_operand:AVX_VEC_DUP_MODE 0 "register_operand" "=x,x,v,x")
16659 (vec_duplicate:AVX_VEC_DUP_MODE
16660 (match_operand:<ssescalarmode> 1 "nonimmediate_operand" "m,m,v,?x")))]
16661 "TARGET_AVX"
16662 "@
16663 v<sseintprefix>broadcast<bcstscalarsuff>\t{%1, %0|%0, %1}
16664 vbroadcast<ssescalarmodesuffix>\t{%1, %0|%0, %1}
16665 v<sseintprefix>broadcast<bcstscalarsuff>\t{%x1, %0|%0, %x1}
16666 #"
16667 [(set_attr "type" "ssemov")
16668 (set_attr "prefix_extra" "1")
16669 (set_attr "prefix" "maybe_evex")
16670 (set_attr "isa" "avx2,noavx2,avx2,noavx2")
16671 (set_attr "mode" "<sseinsnmode>,V8SF,<sseinsnmode>,V8SF")])
16672
16673 (define_split
16674 [(set (match_operand:AVX2_VEC_DUP_MODE 0 "register_operand")
16675 (vec_duplicate:AVX2_VEC_DUP_MODE
16676 (match_operand:<ssescalarmode> 1 "register_operand")))]
16677 "TARGET_AVX2 && reload_completed && GENERAL_REG_P (operands[1])"
16678 [(const_int 0)]
16679 {
16680 emit_insn (gen_vec_setv4si_0 (gen_lowpart (V4SImode, operands[0]),
16681 CONST0_RTX (V4SImode),
16682 gen_lowpart (SImode, operands[1])));
16683 emit_insn (gen_avx2_pbroadcast<mode> (operands[0],
16684 gen_lowpart (<ssexmmmode>mode,
16685 operands[0])));
16686 DONE;
16687 })
16688
16689 (define_split
16690 [(set (match_operand:AVX_VEC_DUP_MODE 0 "register_operand")
16691 (vec_duplicate:AVX_VEC_DUP_MODE
16692 (match_operand:<ssescalarmode> 1 "register_operand")))]
16693 "TARGET_AVX && !TARGET_AVX2 && reload_completed"
16694 [(set (match_dup 2)
16695 (vec_duplicate:<ssehalfvecmode> (match_dup 1)))
16696 (set (match_dup 0)
16697 (vec_concat:AVX_VEC_DUP_MODE (match_dup 2) (match_dup 2)))]
16698 "operands[2] = gen_rtx_REG (<ssehalfvecmode>mode, REGNO (operands[0]));")
16699
16700 (define_insn "avx_vbroadcastf128_<mode>"
16701 [(set (match_operand:V_256 0 "register_operand" "=x,x,x")
16702 (vec_concat:V_256
16703 (match_operand:<ssehalfvecmode> 1 "nonimmediate_operand" "m,0,?x")
16704 (match_dup 1)))]
16705 "TARGET_AVX"
16706 "@
16707 vbroadcast<i128>\t{%1, %0|%0, %1}
16708 vinsert<i128>\t{$1, %1, %0, %0|%0, %0, %1, 1}
16709 vperm2<i128>\t{$0, %t1, %t1, %0|%0, %t1, %t1, 0}"
16710 [(set_attr "type" "ssemov,sselog1,sselog1")
16711 (set_attr "prefix_extra" "1")
16712 (set_attr "length_immediate" "0,1,1")
16713 (set_attr "prefix" "vex")
16714 (set_attr "mode" "<sseinsnmode>")])
16715
16716 ;; For broadcast[i|f]32x2. Yes there is no v4sf version, only v4si.
16717 (define_mode_iterator VI4F_BRCST32x2
16718 [V16SI (V8SI "TARGET_AVX512VL") (V4SI "TARGET_AVX512VL")
16719 V16SF (V8SF "TARGET_AVX512VL")])
16720
16721 (define_mode_attr 64x2mode
16722 [(V8DF "V2DF") (V8DI "V2DI") (V4DI "V2DI") (V4DF "V2DF")])
16723
16724 (define_mode_attr 32x2mode
16725 [(V16SF "V2SF") (V16SI "V2SI") (V8SI "V2SI")
16726 (V8SF "V2SF") (V4SI "V2SI")])
16727
16728 (define_insn "<mask_codefor>avx512dq_broadcast<mode><mask_name>"
16729 [(set (match_operand:VI4F_BRCST32x2 0 "register_operand" "=v")
16730 (vec_duplicate:VI4F_BRCST32x2
16731 (vec_select:<32x2mode>
16732 (match_operand:<ssexmmmode> 1 "nonimmediate_operand" "vm")
16733 (parallel [(const_int 0) (const_int 1)]))))]
16734 "TARGET_AVX512DQ"
16735 "vbroadcast<shuffletype>32x2\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
16736 [(set_attr "type" "ssemov")
16737 (set_attr "prefix_extra" "1")
16738 (set_attr "prefix" "evex")
16739 (set_attr "mode" "<sseinsnmode>")])
16740
16741 (define_insn "<mask_codefor>avx512vl_broadcast<mode><mask_name>_1"
16742 [(set (match_operand:VI4F_256 0 "register_operand" "=v,v")
16743 (vec_duplicate:VI4F_256
16744 (match_operand:<ssexmmmode> 1 "nonimmediate_operand" "v,m")))]
16745 "TARGET_AVX512VL"
16746 "@
16747 vshuf<shuffletype>32x4\t{$0x0, %t1, %t1, %0<mask_operand2>|%0<mask_operand2>, %t1, %t1, 0x0}
16748 vbroadcast<shuffletype>32x4\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
16749 [(set_attr "type" "ssemov")
16750 (set_attr "prefix_extra" "1")
16751 (set_attr "prefix" "evex")
16752 (set_attr "mode" "<sseinsnmode>")])
16753
16754 (define_insn "<mask_codefor>avx512dq_broadcast<mode><mask_name>_1"
16755 [(set (match_operand:V16FI 0 "register_operand" "=v,v")
16756 (vec_duplicate:V16FI
16757 (match_operand:<ssehalfvecmode> 1 "nonimmediate_operand" "v,m")))]
16758 "TARGET_AVX512DQ"
16759 "@
16760 vshuf<shuffletype>32x4\t{$0x44, %g1, %g1, %0<mask_operand2>|%0<mask_operand2>, %g1, %g1, 0x44}
16761 vbroadcast<shuffletype>32x8\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
16762 [(set_attr "type" "ssemov")
16763 (set_attr "prefix_extra" "1")
16764 (set_attr "prefix" "evex")
16765 (set_attr "mode" "<sseinsnmode>")])
16766
16767 ;; For broadcast[i|f]64x2
16768 (define_mode_iterator VI8F_BRCST64x2
16769 [V8DI V8DF (V4DI "TARGET_AVX512VL") (V4DF "TARGET_AVX512VL")])
16770
16771 (define_insn "<mask_codefor>avx512dq_broadcast<mode><mask_name>_1"
16772 [(set (match_operand:VI8F_BRCST64x2 0 "register_operand" "=v,v")
16773 (vec_duplicate:VI8F_BRCST64x2
16774 (match_operand:<64x2mode> 1 "nonimmediate_operand" "v,m")))]
16775 "TARGET_AVX512DQ"
16776 "@
16777 vshuf<shuffletype>64x2\t{$0x0, %<concat_tg_mode>1, %<concat_tg_mode>1, %0<mask_operand2>|%0<mask_operand2>, %<concat_tg_mode>1, %<concat_tg_mode>1, 0x0}
16778 vbroadcast<shuffletype>64x2\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
16779 [(set_attr "type" "ssemov")
16780 (set_attr "prefix_extra" "1")
16781 (set_attr "prefix" "evex")
16782 (set_attr "mode" "<sseinsnmode>")])
16783
16784 (define_insn "avx512cd_maskb_vec_dup<mode>"
16785 [(set (match_operand:VI8_AVX512VL 0 "register_operand" "=v")
16786 (vec_duplicate:VI8_AVX512VL
16787 (zero_extend:DI
16788 (match_operand:QI 1 "register_operand" "Yk"))))]
16789 "TARGET_AVX512CD"
16790 "vpbroadcastmb2q\t{%1, %0|%0, %1}"
16791 [(set_attr "type" "mskmov")
16792 (set_attr "prefix" "evex")
16793 (set_attr "mode" "XI")])
16794
16795 (define_insn "avx512cd_maskw_vec_dup<mode>"
16796 [(set (match_operand:VI4_AVX512VL 0 "register_operand" "=v")
16797 (vec_duplicate:VI4_AVX512VL
16798 (zero_extend:SI
16799 (match_operand:HI 1 "register_operand" "Yk"))))]
16800 "TARGET_AVX512CD"
16801 "vpbroadcastmw2d\t{%1, %0|%0, %1}"
16802 [(set_attr "type" "mskmov")
16803 (set_attr "prefix" "evex")
16804 (set_attr "mode" "XI")])
16805
16806 ;; Recognize broadcast as a vec_select as produced by builtin_vec_perm.
16807 ;; If it so happens that the input is in memory, use vbroadcast.
16808 ;; Otherwise use vpermilp (and in the case of 256-bit modes, vperm2f128).
16809 (define_insn "*avx_vperm_broadcast_v4sf"
16810 [(set (match_operand:V4SF 0 "register_operand" "=x,x,x")
16811 (vec_select:V4SF
16812 (match_operand:V4SF 1 "nonimmediate_operand" "m,o,x")
16813 (match_parallel 2 "avx_vbroadcast_operand"
16814 [(match_operand 3 "const_int_operand" "C,n,n")])))]
16815 "TARGET_AVX"
16816 {
16817 int elt = INTVAL (operands[3]);
16818 switch (which_alternative)
16819 {
16820 case 0:
16821 case 1:
16822 operands[1] = adjust_address_nv (operands[1], SFmode, elt * 4);
16823 return "vbroadcastss\t{%1, %0|%0, %k1}";
16824 case 2:
16825 operands[2] = GEN_INT (elt * 0x55);
16826 return "vpermilps\t{%2, %1, %0|%0, %1, %2}";
16827 default:
16828 gcc_unreachable ();
16829 }
16830 }
16831 [(set_attr "type" "ssemov,ssemov,sselog1")
16832 (set_attr "prefix_extra" "1")
16833 (set_attr "length_immediate" "0,0,1")
16834 (set_attr "prefix" "vex")
16835 (set_attr "mode" "SF,SF,V4SF")])
16836
16837 (define_insn_and_split "*avx_vperm_broadcast_<mode>"
16838 [(set (match_operand:VF_256 0 "register_operand" "=x,x,x")
16839 (vec_select:VF_256
16840 (match_operand:VF_256 1 "nonimmediate_operand" "m,o,?x")
16841 (match_parallel 2 "avx_vbroadcast_operand"
16842 [(match_operand 3 "const_int_operand" "C,n,n")])))]
16843 "TARGET_AVX"
16844 "#"
16845 "&& reload_completed && (<MODE>mode != V4DFmode || !TARGET_AVX2)"
16846 [(set (match_dup 0) (vec_duplicate:VF_256 (match_dup 1)))]
16847 {
16848 rtx op0 = operands[0], op1 = operands[1];
16849 int elt = INTVAL (operands[3]);
16850
16851 if (REG_P (op1))
16852 {
16853 int mask;
16854
16855 if (TARGET_AVX2 && elt == 0)
16856 {
16857 emit_insn (gen_vec_dup<mode> (op0, gen_lowpart (<ssescalarmode>mode,
16858 op1)));
16859 DONE;
16860 }
16861
16862 /* Shuffle element we care about into all elements of the 128-bit lane.
16863 The other lane gets shuffled too, but we don't care. */
16864 if (<MODE>mode == V4DFmode)
16865 mask = (elt & 1 ? 15 : 0);
16866 else
16867 mask = (elt & 3) * 0x55;
16868 emit_insn (gen_avx_vpermil<mode> (op0, op1, GEN_INT (mask)));
16869
16870 /* Shuffle the lane we care about into both lanes of the dest. */
16871 mask = (elt / (<ssescalarnum> / 2)) * 0x11;
16872 emit_insn (gen_avx_vperm2f128<mode>3 (op0, op0, op0, GEN_INT (mask)));
16873 DONE;
16874 }
16875
16876 operands[1] = adjust_address (op1, <ssescalarmode>mode,
16877 elt * GET_MODE_SIZE (<ssescalarmode>mode));
16878 })
16879
16880 (define_expand "<sse2_avx_avx512f>_vpermil<mode><mask_name>"
16881 [(set (match_operand:VF2 0 "register_operand")
16882 (vec_select:VF2
16883 (match_operand:VF2 1 "nonimmediate_operand")
16884 (match_operand:SI 2 "const_0_to_255_operand")))]
16885 "TARGET_AVX && <mask_mode512bit_condition>"
16886 {
16887 int mask = INTVAL (operands[2]);
16888 rtx perm[<ssescalarnum>];
16889
16890 int i;
16891 for (i = 0; i < <ssescalarnum>; i = i + 2)
16892 {
16893 perm[i] = GEN_INT (((mask >> i) & 1) + i);
16894 perm[i + 1] = GEN_INT (((mask >> (i + 1)) & 1) + i);
16895 }
16896
16897 operands[2]
16898 = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (<ssescalarnum>, perm));
16899 })
16900
16901 (define_expand "<sse2_avx_avx512f>_vpermil<mode><mask_name>"
16902 [(set (match_operand:VF1 0 "register_operand")
16903 (vec_select:VF1
16904 (match_operand:VF1 1 "nonimmediate_operand")
16905 (match_operand:SI 2 "const_0_to_255_operand")))]
16906 "TARGET_AVX && <mask_mode512bit_condition>"
16907 {
16908 int mask = INTVAL (operands[2]);
16909 rtx perm[<ssescalarnum>];
16910
16911 int i;
16912 for (i = 0; i < <ssescalarnum>; i = i + 4)
16913 {
16914 perm[i] = GEN_INT (((mask >> 0) & 3) + i);
16915 perm[i + 1] = GEN_INT (((mask >> 2) & 3) + i);
16916 perm[i + 2] = GEN_INT (((mask >> 4) & 3) + i);
16917 perm[i + 3] = GEN_INT (((mask >> 6) & 3) + i);
16918 }
16919
16920 operands[2]
16921 = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (<ssescalarnum>, perm));
16922 })
16923
16924 (define_insn "*<sse2_avx_avx512f>_vpermilp<mode><mask_name>"
16925 [(set (match_operand:VF 0 "register_operand" "=v")
16926 (vec_select:VF
16927 (match_operand:VF 1 "nonimmediate_operand" "vm")
16928 (match_parallel 2 ""
16929 [(match_operand 3 "const_int_operand")])))]
16930 "TARGET_AVX && <mask_mode512bit_condition>
16931 && avx_vpermilp_parallel (operands[2], <MODE>mode)"
16932 {
16933 int mask = avx_vpermilp_parallel (operands[2], <MODE>mode) - 1;
16934 operands[2] = GEN_INT (mask);
16935 return "vpermil<ssemodesuffix>\t{%2, %1, %0<mask_operand4>|%0<mask_operand4>, %1, %2}";
16936 }
16937 [(set_attr "type" "sselog")
16938 (set_attr "prefix_extra" "1")
16939 (set_attr "length_immediate" "1")
16940 (set_attr "prefix" "<mask_prefix>")
16941 (set_attr "mode" "<sseinsnmode>")])
16942
16943 (define_insn "<sse2_avx_avx512f>_vpermilvar<mode>3<mask_name>"
16944 [(set (match_operand:VF 0 "register_operand" "=v")
16945 (unspec:VF
16946 [(match_operand:VF 1 "register_operand" "v")
16947 (match_operand:<sseintvecmode> 2 "nonimmediate_operand" "vm")]
16948 UNSPEC_VPERMIL))]
16949 "TARGET_AVX && <mask_mode512bit_condition>"
16950 "vpermil<ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
16951 [(set_attr "type" "sselog")
16952 (set_attr "prefix_extra" "1")
16953 (set_attr "btver2_decode" "vector")
16954 (set_attr "prefix" "<mask_prefix>")
16955 (set_attr "mode" "<sseinsnmode>")])
16956
16957 (define_expand "<avx512>_vpermi2var<mode>3_maskz"
16958 [(match_operand:VI48F 0 "register_operand" "=v")
16959 (match_operand:VI48F 1 "register_operand" "v")
16960 (match_operand:<sseintvecmode> 2 "register_operand" "0")
16961 (match_operand:VI48F 3 "nonimmediate_operand" "vm")
16962 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")]
16963 "TARGET_AVX512F"
16964 {
16965 emit_insn (gen_<avx512>_vpermi2var<mode>3_maskz_1 (
16966 operands[0], operands[1], operands[2], operands[3],
16967 CONST0_RTX (<MODE>mode), operands[4]));
16968 DONE;
16969 })
16970
16971 (define_expand "<avx512>_vpermi2var<mode>3_maskz"
16972 [(match_operand:VI2_AVX512VL 0 "register_operand" "=v")
16973 (match_operand:VI2_AVX512VL 1 "register_operand" "v")
16974 (match_operand:<sseintvecmode> 2 "register_operand" "0")
16975 (match_operand:VI2_AVX512VL 3 "nonimmediate_operand" "vm")
16976 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")]
16977 "TARGET_AVX512BW"
16978 {
16979 emit_insn (gen_<avx512>_vpermi2var<mode>3_maskz_1 (
16980 operands[0], operands[1], operands[2], operands[3],
16981 CONST0_RTX (<MODE>mode), operands[4]));
16982 DONE;
16983 })
16984
16985 (define_insn "<avx512>_vpermi2var<mode>3<sd_maskz_name>"
16986 [(set (match_operand:VI48F 0 "register_operand" "=v")
16987 (unspec:VI48F
16988 [(match_operand:VI48F 1 "register_operand" "v")
16989 (match_operand:<sseintvecmode> 2 "register_operand" "0")
16990 (match_operand:VI48F 3 "nonimmediate_operand" "vm")]
16991 UNSPEC_VPERMI2))]
16992 "TARGET_AVX512F"
16993 "vpermi2<ssemodesuffix>\t{%3, %1, %0<sd_mask_op4>|%0<sd_mask_op4>, %1, %3}"
16994 [(set_attr "type" "sselog")
16995 (set_attr "prefix" "evex")
16996 (set_attr "mode" "<sseinsnmode>")])
16997
16998 (define_insn "<avx512>_vpermi2var<mode>3<sd_maskz_name>"
16999 [(set (match_operand:VI2_AVX512VL 0 "register_operand" "=v")
17000 (unspec:VI2_AVX512VL
17001 [(match_operand:VI2_AVX512VL 1 "register_operand" "v")
17002 (match_operand:<sseintvecmode> 2 "register_operand" "0")
17003 (match_operand:VI2_AVX512VL 3 "nonimmediate_operand" "vm")]
17004 UNSPEC_VPERMI2))]
17005 "TARGET_AVX512BW"
17006 "vpermi2<ssemodesuffix>\t{%3, %1, %0<sd_mask_op4>|%0<sd_mask_op4>, %1, %3}"
17007 [(set_attr "type" "sselog")
17008 (set_attr "prefix" "evex")
17009 (set_attr "mode" "<sseinsnmode>")])
17010
17011 (define_insn "<avx512>_vpermi2var<mode>3_mask"
17012 [(set (match_operand:VI48F 0 "register_operand" "=v")
17013 (vec_merge:VI48F
17014 (unspec:VI48F
17015 [(match_operand:VI48F 1 "register_operand" "v")
17016 (match_operand:<sseintvecmode> 2 "register_operand" "0")
17017 (match_operand:VI48F 3 "nonimmediate_operand" "vm")]
17018 UNSPEC_VPERMI2_MASK)
17019 (match_dup 0)
17020 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")))]
17021 "TARGET_AVX512F"
17022 "vpermi2<ssemodesuffix>\t{%3, %1, %0%{%4%}|%0%{%4%}, %1, %3}"
17023 [(set_attr "type" "sselog")
17024 (set_attr "prefix" "evex")
17025 (set_attr "mode" "<sseinsnmode>")])
17026
17027 (define_insn "<avx512>_vpermi2var<mode>3_mask"
17028 [(set (match_operand:VI2_AVX512VL 0 "register_operand" "=v")
17029 (vec_merge:VI2_AVX512VL
17030 (unspec:VI2_AVX512VL
17031 [(match_operand:VI2_AVX512VL 1 "register_operand" "v")
17032 (match_operand:<sseintvecmode> 2 "register_operand" "0")
17033 (match_operand:VI2_AVX512VL 3 "nonimmediate_operand" "vm")]
17034 UNSPEC_VPERMI2_MASK)
17035 (match_dup 0)
17036 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")))]
17037 "TARGET_AVX512BW"
17038 "vpermi2<ssemodesuffix>\t{%3, %1, %0%{%4%}|%0%{%4%}, %1, %3}"
17039 [(set_attr "type" "sselog")
17040 (set_attr "prefix" "evex")
17041 (set_attr "mode" "<sseinsnmode>")])
17042
17043 (define_expand "<avx512>_vpermt2var<mode>3_maskz"
17044 [(match_operand:VI48F 0 "register_operand" "=v")
17045 (match_operand:<sseintvecmode> 1 "register_operand" "v")
17046 (match_operand:VI48F 2 "register_operand" "0")
17047 (match_operand:VI48F 3 "nonimmediate_operand" "vm")
17048 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")]
17049 "TARGET_AVX512F"
17050 {
17051 emit_insn (gen_<avx512>_vpermt2var<mode>3_maskz_1 (
17052 operands[0], operands[1], operands[2], operands[3],
17053 CONST0_RTX (<MODE>mode), operands[4]));
17054 DONE;
17055 })
17056
17057 (define_expand "<avx512>_vpermt2var<mode>3_maskz"
17058 [(match_operand:VI2_AVX512VL 0 "register_operand" "=v")
17059 (match_operand:<sseintvecmode> 1 "register_operand" "v")
17060 (match_operand:VI2_AVX512VL 2 "register_operand" "0")
17061 (match_operand:VI2_AVX512VL 3 "nonimmediate_operand" "vm")
17062 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")]
17063 "TARGET_AVX512BW"
17064 {
17065 emit_insn (gen_<avx512>_vpermt2var<mode>3_maskz_1 (
17066 operands[0], operands[1], operands[2], operands[3],
17067 CONST0_RTX (<MODE>mode), operands[4]));
17068 DONE;
17069 })
17070
17071 (define_insn "<avx512>_vpermt2var<mode>3<sd_maskz_name>"
17072 [(set (match_operand:VI48F 0 "register_operand" "=v")
17073 (unspec:VI48F
17074 [(match_operand:<sseintvecmode> 1 "register_operand" "v")
17075 (match_operand:VI48F 2 "register_operand" "0")
17076 (match_operand:VI48F 3 "nonimmediate_operand" "vm")]
17077 UNSPEC_VPERMT2))]
17078 "TARGET_AVX512F"
17079 "vpermt2<ssemodesuffix>\t{%3, %1, %0<sd_mask_op4>|%0<sd_mask_op4>, %1, %3}"
17080 [(set_attr "type" "sselog")
17081 (set_attr "prefix" "evex")
17082 (set_attr "mode" "<sseinsnmode>")])
17083
17084 (define_insn "<avx512>_vpermt2var<mode>3<sd_maskz_name>"
17085 [(set (match_operand:VI2_AVX512VL 0 "register_operand" "=v")
17086 (unspec:VI2_AVX512VL
17087 [(match_operand:<sseintvecmode> 1 "register_operand" "v")
17088 (match_operand:VI2_AVX512VL 2 "register_operand" "0")
17089 (match_operand:VI2_AVX512VL 3 "nonimmediate_operand" "vm")]
17090 UNSPEC_VPERMT2))]
17091 "TARGET_AVX512BW"
17092 "vpermt2<ssemodesuffix>\t{%3, %1, %0<sd_mask_op4>|%0<sd_mask_op4>, %1, %3}"
17093 [(set_attr "type" "sselog")
17094 (set_attr "prefix" "evex")
17095 (set_attr "mode" "<sseinsnmode>")])
17096
17097 (define_insn "<avx512>_vpermt2var<mode>3_mask"
17098 [(set (match_operand:VI48F 0 "register_operand" "=v")
17099 (vec_merge:VI48F
17100 (unspec:VI48F
17101 [(match_operand:<sseintvecmode> 1 "register_operand" "v")
17102 (match_operand:VI48F 2 "register_operand" "0")
17103 (match_operand:VI48F 3 "nonimmediate_operand" "vm")]
17104 UNSPEC_VPERMT2)
17105 (match_dup 2)
17106 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")))]
17107 "TARGET_AVX512F"
17108 "vpermt2<ssemodesuffix>\t{%3, %1, %0%{%4%}|%0%{%4%}, %1, %3}"
17109 [(set_attr "type" "sselog")
17110 (set_attr "prefix" "evex")
17111 (set_attr "mode" "<sseinsnmode>")])
17112
17113 (define_insn "<avx512>_vpermt2var<mode>3_mask"
17114 [(set (match_operand:VI2_AVX512VL 0 "register_operand" "=v")
17115 (vec_merge:VI2_AVX512VL
17116 (unspec:VI2_AVX512VL
17117 [(match_operand:<sseintvecmode> 1 "register_operand" "v")
17118 (match_operand:VI2_AVX512VL 2 "register_operand" "0")
17119 (match_operand:VI2_AVX512VL 3 "nonimmediate_operand" "vm")]
17120 UNSPEC_VPERMT2)
17121 (match_dup 2)
17122 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")))]
17123 "TARGET_AVX512BW"
17124 "vpermt2<ssemodesuffix>\t{%3, %1, %0%{%4%}|%0%{%4%}, %1, %3}"
17125 [(set_attr "type" "sselog")
17126 (set_attr "prefix" "evex")
17127 (set_attr "mode" "<sseinsnmode>")])
17128
17129 (define_expand "avx_vperm2f128<mode>3"
17130 [(set (match_operand:AVX256MODE2P 0 "register_operand")
17131 (unspec:AVX256MODE2P
17132 [(match_operand:AVX256MODE2P 1 "register_operand")
17133 (match_operand:AVX256MODE2P 2 "nonimmediate_operand")
17134 (match_operand:SI 3 "const_0_to_255_operand")]
17135 UNSPEC_VPERMIL2F128))]
17136 "TARGET_AVX"
17137 {
17138 int mask = INTVAL (operands[3]);
17139 if ((mask & 0x88) == 0)
17140 {
17141 rtx perm[<ssescalarnum>], t1, t2;
17142 int i, base, nelt = <ssescalarnum>, nelt2 = nelt / 2;
17143
17144 base = (mask & 3) * nelt2;
17145 for (i = 0; i < nelt2; ++i)
17146 perm[i] = GEN_INT (base + i);
17147
17148 base = ((mask >> 4) & 3) * nelt2;
17149 for (i = 0; i < nelt2; ++i)
17150 perm[i + nelt2] = GEN_INT (base + i);
17151
17152 t2 = gen_rtx_VEC_CONCAT (<ssedoublevecmode>mode,
17153 operands[1], operands[2]);
17154 t1 = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (nelt, perm));
17155 t2 = gen_rtx_VEC_SELECT (<MODE>mode, t2, t1);
17156 t2 = gen_rtx_SET (VOIDmode, operands[0], t2);
17157 emit_insn (t2);
17158 DONE;
17159 }
17160 })
17161
17162 ;; Note that bits 7 and 3 of the imm8 allow lanes to be zeroed, which
17163 ;; means that in order to represent this properly in rtl we'd have to
17164 ;; nest *another* vec_concat with a zero operand and do the select from
17165 ;; a 4x wide vector. That doesn't seem very nice.
17166 (define_insn "*avx_vperm2f128<mode>_full"
17167 [(set (match_operand:AVX256MODE2P 0 "register_operand" "=x")
17168 (unspec:AVX256MODE2P
17169 [(match_operand:AVX256MODE2P 1 "register_operand" "x")
17170 (match_operand:AVX256MODE2P 2 "nonimmediate_operand" "xm")
17171 (match_operand:SI 3 "const_0_to_255_operand" "n")]
17172 UNSPEC_VPERMIL2F128))]
17173 "TARGET_AVX"
17174 "vperm2<i128>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
17175 [(set_attr "type" "sselog")
17176 (set_attr "prefix_extra" "1")
17177 (set_attr "length_immediate" "1")
17178 (set_attr "prefix" "vex")
17179 (set_attr "mode" "<sseinsnmode>")])
17180
17181 (define_insn "*avx_vperm2f128<mode>_nozero"
17182 [(set (match_operand:AVX256MODE2P 0 "register_operand" "=x")
17183 (vec_select:AVX256MODE2P
17184 (vec_concat:<ssedoublevecmode>
17185 (match_operand:AVX256MODE2P 1 "register_operand" "x")
17186 (match_operand:AVX256MODE2P 2 "nonimmediate_operand" "xm"))
17187 (match_parallel 3 ""
17188 [(match_operand 4 "const_int_operand")])))]
17189 "TARGET_AVX
17190 && avx_vperm2f128_parallel (operands[3], <MODE>mode)"
17191 {
17192 int mask = avx_vperm2f128_parallel (operands[3], <MODE>mode) - 1;
17193 if (mask == 0x12)
17194 return "vinsert<i128>\t{$0, %x2, %1, %0|%0, %1, %x2, 0}";
17195 if (mask == 0x20)
17196 return "vinsert<i128>\t{$1, %x2, %1, %0|%0, %1, %x2, 1}";
17197 operands[3] = GEN_INT (mask);
17198 return "vperm2<i128>\t{%3, %2, %1, %0|%0, %1, %2, %3}";
17199 }
17200 [(set_attr "type" "sselog")
17201 (set_attr "prefix_extra" "1")
17202 (set_attr "length_immediate" "1")
17203 (set_attr "prefix" "vex")
17204 (set_attr "mode" "<sseinsnmode>")])
17205
17206 (define_insn "*ssse3_palignr<mode>_perm"
17207 [(set (match_operand:V_128 0 "register_operand" "=x,x")
17208 (vec_select:V_128
17209 (match_operand:V_128 1 "register_operand" "0,x")
17210 (match_parallel 2 "palignr_operand"
17211 [(match_operand 3 "const_int_operand" "n, n")])))]
17212 "TARGET_SSSE3"
17213 {
17214 machine_mode imode = GET_MODE_INNER (GET_MODE (operands[0]));
17215 operands[2] = GEN_INT (INTVAL (operands[3]) * GET_MODE_SIZE (imode));
17216
17217 switch (which_alternative)
17218 {
17219 case 0:
17220 return "palignr\t{%2, %1, %0|%0, %1, %2}";
17221 case 1:
17222 return "vpalignr\t{%2, %1, %1, %0|%0, %1, %1, %2}";
17223 default:
17224 gcc_unreachable ();
17225 }
17226 }
17227 [(set_attr "isa" "noavx,avx")
17228 (set_attr "type" "sseishft")
17229 (set_attr "atom_unit" "sishuf")
17230 (set_attr "prefix_data16" "1,*")
17231 (set_attr "prefix_extra" "1")
17232 (set_attr "length_immediate" "1")
17233 (set_attr "prefix" "orig,vex")])
17234
17235 (define_expand "avx512vl_vinsert<mode>"
17236 [(match_operand:VI48F_256 0 "register_operand")
17237 (match_operand:VI48F_256 1 "register_operand")
17238 (match_operand:<ssehalfvecmode> 2 "nonimmediate_operand")
17239 (match_operand:SI 3 "const_0_to_1_operand")
17240 (match_operand:VI48F_256 4 "register_operand")
17241 (match_operand:<avx512fmaskmode> 5 "register_operand")]
17242 "TARGET_AVX512VL"
17243 {
17244 rtx (*insn)(rtx, rtx, rtx, rtx, rtx);
17245
17246 switch (INTVAL (operands[3]))
17247 {
17248 case 0:
17249 insn = gen_vec_set_lo_<mode>_mask;
17250 break;
17251 case 1:
17252 insn = gen_vec_set_hi_<mode>_mask;
17253 break;
17254 default:
17255 gcc_unreachable ();
17256 }
17257
17258 emit_insn (insn (operands[0], operands[1], operands[2], operands[4],
17259 operands[5]));
17260 DONE;
17261 })
17262
17263 (define_expand "avx_vinsertf128<mode>"
17264 [(match_operand:V_256 0 "register_operand")
17265 (match_operand:V_256 1 "register_operand")
17266 (match_operand:<ssehalfvecmode> 2 "nonimmediate_operand")
17267 (match_operand:SI 3 "const_0_to_1_operand")]
17268 "TARGET_AVX"
17269 {
17270 rtx (*insn)(rtx, rtx, rtx);
17271
17272 switch (INTVAL (operands[3]))
17273 {
17274 case 0:
17275 insn = gen_vec_set_lo_<mode>;
17276 break;
17277 case 1:
17278 insn = gen_vec_set_hi_<mode>;
17279 break;
17280 default:
17281 gcc_unreachable ();
17282 }
17283
17284 emit_insn (insn (operands[0], operands[1], operands[2]));
17285 DONE;
17286 })
17287
17288 (define_insn "vec_set_lo_<mode><mask_name>"
17289 [(set (match_operand:VI8F_256 0 "register_operand" "=v")
17290 (vec_concat:VI8F_256
17291 (match_operand:<ssehalfvecmode> 2 "nonimmediate_operand" "vm")
17292 (vec_select:<ssehalfvecmode>
17293 (match_operand:VI8F_256 1 "register_operand" "v")
17294 (parallel [(const_int 2) (const_int 3)]))))]
17295 "TARGET_AVX"
17296 {
17297 if (TARGET_AVX512VL)
17298 return "vinsert<shuffletype>64x2\t{$0x0, %2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2, 0x0}";
17299 else
17300 return "vinsert<i128>\t{$0x0, %2, %1, %0|%0, %1, %2, 0x0}";
17301 }
17302 [(set_attr "type" "sselog")
17303 (set_attr "prefix_extra" "1")
17304 (set_attr "length_immediate" "1")
17305 (set_attr "prefix" "vex")
17306 (set_attr "mode" "<sseinsnmode>")])
17307
17308 (define_insn "vec_set_hi_<mode><mask_name>"
17309 [(set (match_operand:VI8F_256 0 "register_operand" "=v")
17310 (vec_concat:VI8F_256
17311 (vec_select:<ssehalfvecmode>
17312 (match_operand:VI8F_256 1 "register_operand" "v")
17313 (parallel [(const_int 0) (const_int 1)]))
17314 (match_operand:<ssehalfvecmode> 2 "nonimmediate_operand" "vm")))]
17315 "TARGET_AVX"
17316 {
17317 if (TARGET_AVX512VL)
17318 return "vinsert<shuffletype>64x2\t{$0x1, %2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2, 0x1}";
17319 else
17320 return "vinsert<i128>\t{$0x1, %2, %1, %0|%0, %1, %2, 0x1}";
17321 }
17322 [(set_attr "type" "sselog")
17323 (set_attr "prefix_extra" "1")
17324 (set_attr "length_immediate" "1")
17325 (set_attr "prefix" "vex")
17326 (set_attr "mode" "<sseinsnmode>")])
17327
17328 (define_insn "vec_set_lo_<mode><mask_name>"
17329 [(set (match_operand:VI4F_256 0 "register_operand" "=v")
17330 (vec_concat:VI4F_256
17331 (match_operand:<ssehalfvecmode> 2 "nonimmediate_operand" "vm")
17332 (vec_select:<ssehalfvecmode>
17333 (match_operand:VI4F_256 1 "register_operand" "v")
17334 (parallel [(const_int 4) (const_int 5)
17335 (const_int 6) (const_int 7)]))))]
17336 "TARGET_AVX"
17337 {
17338 if (TARGET_AVX512VL)
17339 return "vinsert<shuffletype>32x4\t{$0x0, %2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2, 0x0}";
17340 else
17341 return "vinsert<i128>\t{$0x0, %2, %1, %0|%0, %1, %2, 0x0}";
17342 }
17343 [(set_attr "type" "sselog")
17344 (set_attr "prefix_extra" "1")
17345 (set_attr "length_immediate" "1")
17346 (set_attr "prefix" "vex")
17347 (set_attr "mode" "<sseinsnmode>")])
17348
17349 (define_insn "vec_set_hi_<mode><mask_name>"
17350 [(set (match_operand:VI4F_256 0 "register_operand" "=v")
17351 (vec_concat:VI4F_256
17352 (vec_select:<ssehalfvecmode>
17353 (match_operand:VI4F_256 1 "register_operand" "v")
17354 (parallel [(const_int 0) (const_int 1)
17355 (const_int 2) (const_int 3)]))
17356 (match_operand:<ssehalfvecmode> 2 "nonimmediate_operand" "vm")))]
17357 "TARGET_AVX"
17358 {
17359 if (TARGET_AVX512VL)
17360 return "vinsert<shuffletype>32x4\t{$0x1, %2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2, 0x1}";
17361 else
17362 return "vinsert<i128>\t{$0x1, %2, %1, %0|%0, %1, %2, 0x1}";
17363 }
17364 [(set_attr "type" "sselog")
17365 (set_attr "prefix_extra" "1")
17366 (set_attr "length_immediate" "1")
17367 (set_attr "prefix" "vex")
17368 (set_attr "mode" "<sseinsnmode>")])
17369
17370 (define_insn "vec_set_lo_v16hi"
17371 [(set (match_operand:V16HI 0 "register_operand" "=x")
17372 (vec_concat:V16HI
17373 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
17374 (vec_select:V8HI
17375 (match_operand:V16HI 1 "register_operand" "x")
17376 (parallel [(const_int 8) (const_int 9)
17377 (const_int 10) (const_int 11)
17378 (const_int 12) (const_int 13)
17379 (const_int 14) (const_int 15)]))))]
17380 "TARGET_AVX"
17381 "vinsert%~128\t{$0x0, %2, %1, %0|%0, %1, %2, 0x0}"
17382 [(set_attr "type" "sselog")
17383 (set_attr "prefix_extra" "1")
17384 (set_attr "length_immediate" "1")
17385 (set_attr "prefix" "vex")
17386 (set_attr "mode" "OI")])
17387
17388 (define_insn "vec_set_hi_v16hi"
17389 [(set (match_operand:V16HI 0 "register_operand" "=x")
17390 (vec_concat:V16HI
17391 (vec_select:V8HI
17392 (match_operand:V16HI 1 "register_operand" "x")
17393 (parallel [(const_int 0) (const_int 1)
17394 (const_int 2) (const_int 3)
17395 (const_int 4) (const_int 5)
17396 (const_int 6) (const_int 7)]))
17397 (match_operand:V8HI 2 "nonimmediate_operand" "xm")))]
17398 "TARGET_AVX"
17399 "vinsert%~128\t{$0x1, %2, %1, %0|%0, %1, %2, 0x1}"
17400 [(set_attr "type" "sselog")
17401 (set_attr "prefix_extra" "1")
17402 (set_attr "length_immediate" "1")
17403 (set_attr "prefix" "vex")
17404 (set_attr "mode" "OI")])
17405
17406 (define_insn "vec_set_lo_v32qi"
17407 [(set (match_operand:V32QI 0 "register_operand" "=x")
17408 (vec_concat:V32QI
17409 (match_operand:V16QI 2 "nonimmediate_operand" "xm")
17410 (vec_select:V16QI
17411 (match_operand:V32QI 1 "register_operand" "x")
17412 (parallel [(const_int 16) (const_int 17)
17413 (const_int 18) (const_int 19)
17414 (const_int 20) (const_int 21)
17415 (const_int 22) (const_int 23)
17416 (const_int 24) (const_int 25)
17417 (const_int 26) (const_int 27)
17418 (const_int 28) (const_int 29)
17419 (const_int 30) (const_int 31)]))))]
17420 "TARGET_AVX"
17421 "vinsert%~128\t{$0x0, %2, %1, %0|%0, %1, %2, 0x0}"
17422 [(set_attr "type" "sselog")
17423 (set_attr "prefix_extra" "1")
17424 (set_attr "length_immediate" "1")
17425 (set_attr "prefix" "vex")
17426 (set_attr "mode" "OI")])
17427
17428 (define_insn "vec_set_hi_v32qi"
17429 [(set (match_operand:V32QI 0 "register_operand" "=x")
17430 (vec_concat:V32QI
17431 (vec_select:V16QI
17432 (match_operand:V32QI 1 "register_operand" "x")
17433 (parallel [(const_int 0) (const_int 1)
17434 (const_int 2) (const_int 3)
17435 (const_int 4) (const_int 5)
17436 (const_int 6) (const_int 7)
17437 (const_int 8) (const_int 9)
17438 (const_int 10) (const_int 11)
17439 (const_int 12) (const_int 13)
17440 (const_int 14) (const_int 15)]))
17441 (match_operand:V16QI 2 "nonimmediate_operand" "xm")))]
17442 "TARGET_AVX"
17443 "vinsert%~128\t{$0x1, %2, %1, %0|%0, %1, %2, 0x1}"
17444 [(set_attr "type" "sselog")
17445 (set_attr "prefix_extra" "1")
17446 (set_attr "length_immediate" "1")
17447 (set_attr "prefix" "vex")
17448 (set_attr "mode" "OI")])
17449
17450 (define_insn "<avx_avx2>_maskload<ssemodesuffix><avxsizesuffix>"
17451 [(set (match_operand:V48_AVX2 0 "register_operand" "=x")
17452 (unspec:V48_AVX2
17453 [(match_operand:<sseintvecmode> 2 "register_operand" "x")
17454 (match_operand:V48_AVX2 1 "memory_operand" "m")]
17455 UNSPEC_MASKMOV))]
17456 "TARGET_AVX"
17457 "v<sseintprefix>maskmov<ssemodesuffix>\t{%1, %2, %0|%0, %2, %1}"
17458 [(set_attr "type" "sselog1")
17459 (set_attr "prefix_extra" "1")
17460 (set_attr "prefix" "vex")
17461 (set_attr "btver2_decode" "vector")
17462 (set_attr "mode" "<sseinsnmode>")])
17463
17464 (define_insn "<avx_avx2>_maskstore<ssemodesuffix><avxsizesuffix>"
17465 [(set (match_operand:V48_AVX2 0 "memory_operand" "+m")
17466 (unspec:V48_AVX2
17467 [(match_operand:<sseintvecmode> 1 "register_operand" "x")
17468 (match_operand:V48_AVX2 2 "register_operand" "x")
17469 (match_dup 0)]
17470 UNSPEC_MASKMOV))]
17471 "TARGET_AVX"
17472 "v<sseintprefix>maskmov<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
17473 [(set_attr "type" "sselog1")
17474 (set_attr "prefix_extra" "1")
17475 (set_attr "prefix" "vex")
17476 (set_attr "btver2_decode" "vector")
17477 (set_attr "mode" "<sseinsnmode>")])
17478
17479 (define_expand "maskload<mode>"
17480 [(set (match_operand:V48_AVX2 0 "register_operand")
17481 (unspec:V48_AVX2
17482 [(match_operand:<sseintvecmode> 2 "register_operand")
17483 (match_operand:V48_AVX2 1 "memory_operand")]
17484 UNSPEC_MASKMOV))]
17485 "TARGET_AVX")
17486
17487 (define_expand "maskstore<mode>"
17488 [(set (match_operand:V48_AVX2 0 "memory_operand")
17489 (unspec:V48_AVX2
17490 [(match_operand:<sseintvecmode> 2 "register_operand")
17491 (match_operand:V48_AVX2 1 "register_operand")
17492 (match_dup 0)]
17493 UNSPEC_MASKMOV))]
17494 "TARGET_AVX")
17495
17496 (define_insn_and_split "avx_<castmode><avxsizesuffix>_<castmode>"
17497 [(set (match_operand:AVX256MODE2P 0 "nonimmediate_operand" "=x,m")
17498 (unspec:AVX256MODE2P
17499 [(match_operand:<ssehalfvecmode> 1 "nonimmediate_operand" "xm,x")]
17500 UNSPEC_CAST))]
17501 "TARGET_AVX"
17502 "#"
17503 "&& reload_completed"
17504 [(const_int 0)]
17505 {
17506 rtx op0 = operands[0];
17507 rtx op1 = operands[1];
17508 if (REG_P (op0))
17509 op0 = gen_rtx_REG (<ssehalfvecmode>mode, REGNO (op0));
17510 else
17511 op1 = gen_rtx_REG (<MODE>mode, REGNO (op1));
17512 emit_move_insn (op0, op1);
17513 DONE;
17514 })
17515
17516 (define_expand "vec_init<mode>"
17517 [(match_operand:V_256 0 "register_operand")
17518 (match_operand 1)]
17519 "TARGET_AVX"
17520 {
17521 ix86_expand_vector_init (false, operands[0], operands[1]);
17522 DONE;
17523 })
17524
17525 (define_expand "vec_init<mode>"
17526 [(match_operand:VF48_I1248 0 "register_operand")
17527 (match_operand 1)]
17528 "TARGET_AVX512F"
17529 {
17530 ix86_expand_vector_init (false, operands[0], operands[1]);
17531 DONE;
17532 })
17533
17534 (define_insn "<avx2_avx512>_ashrv<mode><mask_name>"
17535 [(set (match_operand:VI48_AVX512F_AVX512VL 0 "register_operand" "=v")
17536 (ashiftrt:VI48_AVX512F_AVX512VL
17537 (match_operand:VI48_AVX512F_AVX512VL 1 "register_operand" "v")
17538 (match_operand:VI48_AVX512F_AVX512VL 2 "nonimmediate_operand" "vm")))]
17539 "TARGET_AVX2 && <mask_mode512bit_condition>"
17540 "vpsrav<ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
17541 [(set_attr "type" "sseishft")
17542 (set_attr "prefix" "maybe_evex")
17543 (set_attr "mode" "<sseinsnmode>")])
17544
17545 (define_insn "<avx2_avx512>_ashrv<mode><mask_name>"
17546 [(set (match_operand:VI2_AVX512VL 0 "register_operand" "=v")
17547 (ashiftrt:VI2_AVX512VL
17548 (match_operand:VI2_AVX512VL 1 "register_operand" "v")
17549 (match_operand:VI2_AVX512VL 2 "nonimmediate_operand" "vm")))]
17550 "TARGET_AVX512BW"
17551 "vpsravw\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
17552 [(set_attr "type" "sseishft")
17553 (set_attr "prefix" "maybe_evex")
17554 (set_attr "mode" "<sseinsnmode>")])
17555
17556 (define_insn "<avx2_avx512>_<shift_insn>v<mode><mask_name>"
17557 [(set (match_operand:VI48_AVX512F 0 "register_operand" "=v")
17558 (any_lshift:VI48_AVX512F
17559 (match_operand:VI48_AVX512F 1 "register_operand" "v")
17560 (match_operand:VI48_AVX512F 2 "nonimmediate_operand" "vm")))]
17561 "TARGET_AVX2 && <mask_mode512bit_condition>"
17562 "vp<vshift>v<ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
17563 [(set_attr "type" "sseishft")
17564 (set_attr "prefix" "maybe_evex")
17565 (set_attr "mode" "<sseinsnmode>")])
17566
17567 (define_insn "<avx2_avx512>_<shift_insn>v<mode><mask_name>"
17568 [(set (match_operand:VI2_AVX512VL 0 "register_operand" "=v")
17569 (any_lshift:VI2_AVX512VL
17570 (match_operand:VI2_AVX512VL 1 "register_operand" "v")
17571 (match_operand:VI2_AVX512VL 2 "nonimmediate_operand" "vm")))]
17572 "TARGET_AVX512BW"
17573 "vp<vshift>v<ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
17574 [(set_attr "type" "sseishft")
17575 (set_attr "prefix" "maybe_evex")
17576 (set_attr "mode" "<sseinsnmode>")])
17577
17578 (define_insn "avx_vec_concat<mode>"
17579 [(set (match_operand:V_256_512 0 "register_operand" "=x,x")
17580 (vec_concat:V_256_512
17581 (match_operand:<ssehalfvecmode> 1 "register_operand" "x,x")
17582 (match_operand:<ssehalfvecmode> 2 "vector_move_operand" "xm,C")))]
17583 "TARGET_AVX"
17584 {
17585 switch (which_alternative)
17586 {
17587 case 0:
17588 return "vinsert<i128>\t{$0x1, %2, %<concat_tg_mode>1, %0|%0, %<concat_tg_mode>1, %2, 0x1}";
17589 case 1:
17590 switch (get_attr_mode (insn))
17591 {
17592 case MODE_V16SF:
17593 return "vmovaps\t{%1, %t0|%t0, %1}";
17594 case MODE_V8DF:
17595 return "vmovapd\t{%1, %t0|%t0, %1}";
17596 case MODE_V8SF:
17597 return "vmovaps\t{%1, %x0|%x0, %1}";
17598 case MODE_V4DF:
17599 return "vmovapd\t{%1, %x0|%x0, %1}";
17600 case MODE_XI:
17601 return "vmovdqa\t{%1, %t0|%t0, %1}";
17602 case MODE_OI:
17603 return "vmovdqa\t{%1, %x0|%x0, %1}";
17604 default:
17605 gcc_unreachable ();
17606 }
17607 default:
17608 gcc_unreachable ();
17609 }
17610 }
17611 [(set_attr "type" "sselog,ssemov")
17612 (set_attr "prefix_extra" "1,*")
17613 (set_attr "length_immediate" "1,*")
17614 (set_attr "prefix" "maybe_evex")
17615 (set_attr "mode" "<sseinsnmode>")])
17616
17617 (define_insn "vcvtph2ps<mask_name>"
17618 [(set (match_operand:V4SF 0 "register_operand" "=v")
17619 (vec_select:V4SF
17620 (unspec:V8SF [(match_operand:V8HI 1 "register_operand" "v")]
17621 UNSPEC_VCVTPH2PS)
17622 (parallel [(const_int 0) (const_int 1)
17623 (const_int 2) (const_int 3)])))]
17624 "TARGET_F16C || TARGET_AVX512VL"
17625 "vcvtph2ps\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
17626 [(set_attr "type" "ssecvt")
17627 (set_attr "prefix" "maybe_evex")
17628 (set_attr "mode" "V4SF")])
17629
17630 (define_insn "*vcvtph2ps_load<mask_name>"
17631 [(set (match_operand:V4SF 0 "register_operand" "=v")
17632 (unspec:V4SF [(match_operand:V4HI 1 "memory_operand" "m")]
17633 UNSPEC_VCVTPH2PS))]
17634 "TARGET_F16C || TARGET_AVX512VL"
17635 "vcvtph2ps\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
17636 [(set_attr "type" "ssecvt")
17637 (set_attr "prefix" "vex")
17638 (set_attr "mode" "V8SF")])
17639
17640 (define_insn "vcvtph2ps256<mask_name>"
17641 [(set (match_operand:V8SF 0 "register_operand" "=v")
17642 (unspec:V8SF [(match_operand:V8HI 1 "nonimmediate_operand" "vm")]
17643 UNSPEC_VCVTPH2PS))]
17644 "TARGET_F16C || TARGET_AVX512VL"
17645 "vcvtph2ps\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
17646 [(set_attr "type" "ssecvt")
17647 (set_attr "prefix" "vex")
17648 (set_attr "btver2_decode" "double")
17649 (set_attr "mode" "V8SF")])
17650
17651 (define_insn "<mask_codefor>avx512f_vcvtph2ps512<mask_name><round_saeonly_name>"
17652 [(set (match_operand:V16SF 0 "register_operand" "=v")
17653 (unspec:V16SF
17654 [(match_operand:V16HI 1 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")]
17655 UNSPEC_VCVTPH2PS))]
17656 "TARGET_AVX512F"
17657 "vcvtph2ps\t{<round_saeonly_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_saeonly_mask_op2>}"
17658 [(set_attr "type" "ssecvt")
17659 (set_attr "prefix" "evex")
17660 (set_attr "mode" "V16SF")])
17661
17662 (define_expand "vcvtps2ph_mask"
17663 [(set (match_operand:V8HI 0 "register_operand")
17664 (vec_merge:V8HI
17665 (vec_concat:V8HI
17666 (unspec:V4HI [(match_operand:V4SF 1 "register_operand")
17667 (match_operand:SI 2 "const_0_to_255_operand")]
17668 UNSPEC_VCVTPS2PH)
17669 (match_dup 5))
17670 (match_operand:V8HI 3 "vector_move_operand")
17671 (match_operand:QI 4 "register_operand")))]
17672 "TARGET_AVX512VL"
17673 "operands[5] = CONST0_RTX (V4HImode);")
17674
17675 (define_expand "vcvtps2ph"
17676 [(set (match_operand:V8HI 0 "register_operand")
17677 (vec_concat:V8HI
17678 (unspec:V4HI [(match_operand:V4SF 1 "register_operand")
17679 (match_operand:SI 2 "const_0_to_255_operand")]
17680 UNSPEC_VCVTPS2PH)
17681 (match_dup 3)))]
17682 "TARGET_F16C"
17683 "operands[3] = CONST0_RTX (V4HImode);")
17684
17685 (define_insn "*vcvtps2ph<mask_name>"
17686 [(set (match_operand:V8HI 0 "register_operand" "=v")
17687 (vec_concat:V8HI
17688 (unspec:V4HI [(match_operand:V4SF 1 "register_operand" "v")
17689 (match_operand:SI 2 "const_0_to_255_operand" "N")]
17690 UNSPEC_VCVTPS2PH)
17691 (match_operand:V4HI 3 "const0_operand")))]
17692 "(TARGET_F16C || TARGET_AVX512VL) && <mask_avx512vl_condition>"
17693 "vcvtps2ph\t{%2, %1, %0<mask_operand4>|%0<mask_operand4>, %1, %2}"
17694 [(set_attr "type" "ssecvt")
17695 (set_attr "prefix" "maybe_evex")
17696 (set_attr "mode" "V4SF")])
17697
17698 (define_insn "*vcvtps2ph_store<mask_name>"
17699 [(set (match_operand:V4HI 0 "memory_operand" "=m")
17700 (unspec:V4HI [(match_operand:V4SF 1 "register_operand" "x")
17701 (match_operand:SI 2 "const_0_to_255_operand" "N")]
17702 UNSPEC_VCVTPS2PH))]
17703 "TARGET_F16C || TARGET_AVX512VL"
17704 "vcvtps2ph\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
17705 [(set_attr "type" "ssecvt")
17706 (set_attr "prefix" "maybe_evex")
17707 (set_attr "mode" "V4SF")])
17708
17709 (define_insn "vcvtps2ph256<mask_name>"
17710 [(set (match_operand:V8HI 0 "nonimmediate_operand" "=xm")
17711 (unspec:V8HI [(match_operand:V8SF 1 "register_operand" "x")
17712 (match_operand:SI 2 "const_0_to_255_operand" "N")]
17713 UNSPEC_VCVTPS2PH))]
17714 "TARGET_F16C || TARGET_AVX512VL"
17715 "vcvtps2ph\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
17716 [(set_attr "type" "ssecvt")
17717 (set_attr "prefix" "maybe_evex")
17718 (set_attr "btver2_decode" "vector")
17719 (set_attr "mode" "V8SF")])
17720
17721 (define_insn "<mask_codefor>avx512f_vcvtps2ph512<mask_name>"
17722 [(set (match_operand:V16HI 0 "nonimmediate_operand" "=vm")
17723 (unspec:V16HI
17724 [(match_operand:V16SF 1 "register_operand" "v")
17725 (match_operand:SI 2 "const_0_to_255_operand" "N")]
17726 UNSPEC_VCVTPS2PH))]
17727 "TARGET_AVX512F"
17728 "vcvtps2ph\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
17729 [(set_attr "type" "ssecvt")
17730 (set_attr "prefix" "evex")
17731 (set_attr "mode" "V16SF")])
17732
17733 ;; For gather* insn patterns
17734 (define_mode_iterator VEC_GATHER_MODE
17735 [V2DI V2DF V4DI V4DF V4SI V4SF V8SI V8SF])
17736 (define_mode_attr VEC_GATHER_IDXSI
17737 [(V2DI "V4SI") (V4DI "V4SI") (V8DI "V8SI")
17738 (V2DF "V4SI") (V4DF "V4SI") (V8DF "V8SI")
17739 (V4SI "V4SI") (V8SI "V8SI") (V16SI "V16SI")
17740 (V4SF "V4SI") (V8SF "V8SI") (V16SF "V16SI")])
17741
17742 (define_mode_attr VEC_GATHER_IDXDI
17743 [(V2DI "V2DI") (V4DI "V4DI") (V8DI "V8DI")
17744 (V2DF "V2DI") (V4DF "V4DI") (V8DF "V8DI")
17745 (V4SI "V2DI") (V8SI "V4DI") (V16SI "V8DI")
17746 (V4SF "V2DI") (V8SF "V4DI") (V16SF "V8DI")])
17747
17748 (define_mode_attr VEC_GATHER_SRCDI
17749 [(V2DI "V2DI") (V4DI "V4DI") (V8DI "V8DI")
17750 (V2DF "V2DF") (V4DF "V4DF") (V8DF "V8DF")
17751 (V4SI "V4SI") (V8SI "V4SI") (V16SI "V8SI")
17752 (V4SF "V4SF") (V8SF "V4SF") (V16SF "V8SF")])
17753
17754 (define_expand "avx2_gathersi<mode>"
17755 [(parallel [(set (match_operand:VEC_GATHER_MODE 0 "register_operand")
17756 (unspec:VEC_GATHER_MODE
17757 [(match_operand:VEC_GATHER_MODE 1 "register_operand")
17758 (mem:<ssescalarmode>
17759 (match_par_dup 7
17760 [(match_operand 2 "vsib_address_operand")
17761 (match_operand:<VEC_GATHER_IDXSI>
17762 3 "register_operand")
17763 (match_operand:SI 5 "const1248_operand ")]))
17764 (mem:BLK (scratch))
17765 (match_operand:VEC_GATHER_MODE 4 "register_operand")]
17766 UNSPEC_GATHER))
17767 (clobber (match_scratch:VEC_GATHER_MODE 6))])]
17768 "TARGET_AVX2"
17769 {
17770 operands[7]
17771 = gen_rtx_UNSPEC (Pmode, gen_rtvec (3, operands[2], operands[3],
17772 operands[5]), UNSPEC_VSIBADDR);
17773 })
17774
17775 (define_insn "*avx2_gathersi<mode>"
17776 [(set (match_operand:VEC_GATHER_MODE 0 "register_operand" "=&x")
17777 (unspec:VEC_GATHER_MODE
17778 [(match_operand:VEC_GATHER_MODE 2 "register_operand" "0")
17779 (match_operator:<ssescalarmode> 7 "vsib_mem_operator"
17780 [(unspec:P
17781 [(match_operand:P 3 "vsib_address_operand" "Tv")
17782 (match_operand:<VEC_GATHER_IDXSI> 4 "register_operand" "x")
17783 (match_operand:SI 6 "const1248_operand" "n")]
17784 UNSPEC_VSIBADDR)])
17785 (mem:BLK (scratch))
17786 (match_operand:VEC_GATHER_MODE 5 "register_operand" "1")]
17787 UNSPEC_GATHER))
17788 (clobber (match_scratch:VEC_GATHER_MODE 1 "=&x"))]
17789 "TARGET_AVX2"
17790 "v<sseintprefix>gatherd<ssemodesuffix>\t{%1, %7, %0|%0, %7, %1}"
17791 [(set_attr "type" "ssemov")
17792 (set_attr "prefix" "vex")
17793 (set_attr "mode" "<sseinsnmode>")])
17794
17795 (define_insn "*avx2_gathersi<mode>_2"
17796 [(set (match_operand:VEC_GATHER_MODE 0 "register_operand" "=&x")
17797 (unspec:VEC_GATHER_MODE
17798 [(pc)
17799 (match_operator:<ssescalarmode> 6 "vsib_mem_operator"
17800 [(unspec:P
17801 [(match_operand:P 2 "vsib_address_operand" "Tv")
17802 (match_operand:<VEC_GATHER_IDXSI> 3 "register_operand" "x")
17803 (match_operand:SI 5 "const1248_operand" "n")]
17804 UNSPEC_VSIBADDR)])
17805 (mem:BLK (scratch))
17806 (match_operand:VEC_GATHER_MODE 4 "register_operand" "1")]
17807 UNSPEC_GATHER))
17808 (clobber (match_scratch:VEC_GATHER_MODE 1 "=&x"))]
17809 "TARGET_AVX2"
17810 "v<sseintprefix>gatherd<ssemodesuffix>\t{%1, %6, %0|%0, %6, %1}"
17811 [(set_attr "type" "ssemov")
17812 (set_attr "prefix" "vex")
17813 (set_attr "mode" "<sseinsnmode>")])
17814
17815 (define_expand "avx2_gatherdi<mode>"
17816 [(parallel [(set (match_operand:VEC_GATHER_MODE 0 "register_operand")
17817 (unspec:VEC_GATHER_MODE
17818 [(match_operand:<VEC_GATHER_SRCDI> 1 "register_operand")
17819 (mem:<ssescalarmode>
17820 (match_par_dup 7
17821 [(match_operand 2 "vsib_address_operand")
17822 (match_operand:<VEC_GATHER_IDXDI>
17823 3 "register_operand")
17824 (match_operand:SI 5 "const1248_operand ")]))
17825 (mem:BLK (scratch))
17826 (match_operand:<VEC_GATHER_SRCDI>
17827 4 "register_operand")]
17828 UNSPEC_GATHER))
17829 (clobber (match_scratch:VEC_GATHER_MODE 6))])]
17830 "TARGET_AVX2"
17831 {
17832 operands[7]
17833 = gen_rtx_UNSPEC (Pmode, gen_rtvec (3, operands[2], operands[3],
17834 operands[5]), UNSPEC_VSIBADDR);
17835 })
17836
17837 (define_insn "*avx2_gatherdi<mode>"
17838 [(set (match_operand:VEC_GATHER_MODE 0 "register_operand" "=&x")
17839 (unspec:VEC_GATHER_MODE
17840 [(match_operand:<VEC_GATHER_SRCDI> 2 "register_operand" "0")
17841 (match_operator:<ssescalarmode> 7 "vsib_mem_operator"
17842 [(unspec:P
17843 [(match_operand:P 3 "vsib_address_operand" "Tv")
17844 (match_operand:<VEC_GATHER_IDXDI> 4 "register_operand" "x")
17845 (match_operand:SI 6 "const1248_operand" "n")]
17846 UNSPEC_VSIBADDR)])
17847 (mem:BLK (scratch))
17848 (match_operand:<VEC_GATHER_SRCDI> 5 "register_operand" "1")]
17849 UNSPEC_GATHER))
17850 (clobber (match_scratch:VEC_GATHER_MODE 1 "=&x"))]
17851 "TARGET_AVX2"
17852 "v<sseintprefix>gatherq<ssemodesuffix>\t{%5, %7, %2|%2, %7, %5}"
17853 [(set_attr "type" "ssemov")
17854 (set_attr "prefix" "vex")
17855 (set_attr "mode" "<sseinsnmode>")])
17856
17857 (define_insn "*avx2_gatherdi<mode>_2"
17858 [(set (match_operand:VEC_GATHER_MODE 0 "register_operand" "=&x")
17859 (unspec:VEC_GATHER_MODE
17860 [(pc)
17861 (match_operator:<ssescalarmode> 6 "vsib_mem_operator"
17862 [(unspec:P
17863 [(match_operand:P 2 "vsib_address_operand" "Tv")
17864 (match_operand:<VEC_GATHER_IDXDI> 3 "register_operand" "x")
17865 (match_operand:SI 5 "const1248_operand" "n")]
17866 UNSPEC_VSIBADDR)])
17867 (mem:BLK (scratch))
17868 (match_operand:<VEC_GATHER_SRCDI> 4 "register_operand" "1")]
17869 UNSPEC_GATHER))
17870 (clobber (match_scratch:VEC_GATHER_MODE 1 "=&x"))]
17871 "TARGET_AVX2"
17872 {
17873 if (<MODE>mode != <VEC_GATHER_SRCDI>mode)
17874 return "v<sseintprefix>gatherq<ssemodesuffix>\t{%4, %6, %x0|%x0, %6, %4}";
17875 return "v<sseintprefix>gatherq<ssemodesuffix>\t{%4, %6, %0|%0, %6, %4}";
17876 }
17877 [(set_attr "type" "ssemov")
17878 (set_attr "prefix" "vex")
17879 (set_attr "mode" "<sseinsnmode>")])
17880
17881 (define_insn "*avx2_gatherdi<mode>_3"
17882 [(set (match_operand:<VEC_GATHER_SRCDI> 0 "register_operand" "=&x")
17883 (vec_select:<VEC_GATHER_SRCDI>
17884 (unspec:VI4F_256
17885 [(match_operand:<VEC_GATHER_SRCDI> 2 "register_operand" "0")
17886 (match_operator:<ssescalarmode> 7 "vsib_mem_operator"
17887 [(unspec:P
17888 [(match_operand:P 3 "vsib_address_operand" "Tv")
17889 (match_operand:<VEC_GATHER_IDXDI> 4 "register_operand" "x")
17890 (match_operand:SI 6 "const1248_operand" "n")]
17891 UNSPEC_VSIBADDR)])
17892 (mem:BLK (scratch))
17893 (match_operand:<VEC_GATHER_SRCDI> 5 "register_operand" "1")]
17894 UNSPEC_GATHER)
17895 (parallel [(const_int 0) (const_int 1)
17896 (const_int 2) (const_int 3)])))
17897 (clobber (match_scratch:VI4F_256 1 "=&x"))]
17898 "TARGET_AVX2"
17899 "v<sseintprefix>gatherq<ssemodesuffix>\t{%5, %7, %0|%0, %7, %5}"
17900 [(set_attr "type" "ssemov")
17901 (set_attr "prefix" "vex")
17902 (set_attr "mode" "<sseinsnmode>")])
17903
17904 (define_insn "*avx2_gatherdi<mode>_4"
17905 [(set (match_operand:<VEC_GATHER_SRCDI> 0 "register_operand" "=&x")
17906 (vec_select:<VEC_GATHER_SRCDI>
17907 (unspec:VI4F_256
17908 [(pc)
17909 (match_operator:<ssescalarmode> 6 "vsib_mem_operator"
17910 [(unspec:P
17911 [(match_operand:P 2 "vsib_address_operand" "Tv")
17912 (match_operand:<VEC_GATHER_IDXDI> 3 "register_operand" "x")
17913 (match_operand:SI 5 "const1248_operand" "n")]
17914 UNSPEC_VSIBADDR)])
17915 (mem:BLK (scratch))
17916 (match_operand:<VEC_GATHER_SRCDI> 4 "register_operand" "1")]
17917 UNSPEC_GATHER)
17918 (parallel [(const_int 0) (const_int 1)
17919 (const_int 2) (const_int 3)])))
17920 (clobber (match_scratch:VI4F_256 1 "=&x"))]
17921 "TARGET_AVX2"
17922 "v<sseintprefix>gatherq<ssemodesuffix>\t{%4, %6, %0|%0, %6, %4}"
17923 [(set_attr "type" "ssemov")
17924 (set_attr "prefix" "vex")
17925 (set_attr "mode" "<sseinsnmode>")])
17926
17927 (define_expand "<avx512>_gathersi<mode>"
17928 [(parallel [(set (match_operand:VI48F 0 "register_operand")
17929 (unspec:VI48F
17930 [(match_operand:VI48F 1 "register_operand")
17931 (match_operand:<avx512fmaskmode> 4 "register_operand")
17932 (mem:<ssescalarmode>
17933 (match_par_dup 6
17934 [(match_operand 2 "vsib_address_operand")
17935 (match_operand:<VEC_GATHER_IDXSI> 3 "register_operand")
17936 (match_operand:SI 5 "const1248_operand")]))]
17937 UNSPEC_GATHER))
17938 (clobber (match_scratch:<avx512fmaskmode> 7))])]
17939 "TARGET_AVX512F"
17940 {
17941 operands[6]
17942 = gen_rtx_UNSPEC (Pmode, gen_rtvec (3, operands[2], operands[3],
17943 operands[5]), UNSPEC_VSIBADDR);
17944 })
17945
17946 (define_insn "*avx512f_gathersi<mode>"
17947 [(set (match_operand:VI48F 0 "register_operand" "=&v")
17948 (unspec:VI48F
17949 [(match_operand:VI48F 1 "register_operand" "0")
17950 (match_operand:<avx512fmaskmode> 7 "register_operand" "2")
17951 (match_operator:<ssescalarmode> 6 "vsib_mem_operator"
17952 [(unspec:P
17953 [(match_operand:P 4 "vsib_address_operand" "Tv")
17954 (match_operand:<VEC_GATHER_IDXSI> 3 "register_operand" "v")
17955 (match_operand:SI 5 "const1248_operand" "n")]
17956 UNSPEC_VSIBADDR)])]
17957 UNSPEC_GATHER))
17958 (clobber (match_scratch:<avx512fmaskmode> 2 "=&Yk"))]
17959 "TARGET_AVX512F"
17960 "v<sseintprefix>gatherd<ssemodesuffix>\t{%6, %0%{%2%}|%0%{%2%}, %g6}"
17961 [(set_attr "type" "ssemov")
17962 (set_attr "prefix" "evex")
17963 (set_attr "mode" "<sseinsnmode>")])
17964
17965 (define_insn "*avx512f_gathersi<mode>_2"
17966 [(set (match_operand:VI48F 0 "register_operand" "=&v")
17967 (unspec:VI48F
17968 [(pc)
17969 (match_operand:<avx512fmaskmode> 6 "register_operand" "1")
17970 (match_operator:<ssescalarmode> 5 "vsib_mem_operator"
17971 [(unspec:P
17972 [(match_operand:P 3 "vsib_address_operand" "Tv")
17973 (match_operand:<VEC_GATHER_IDXSI> 2 "register_operand" "v")
17974 (match_operand:SI 4 "const1248_operand" "n")]
17975 UNSPEC_VSIBADDR)])]
17976 UNSPEC_GATHER))
17977 (clobber (match_scratch:<avx512fmaskmode> 1 "=&Yk"))]
17978 "TARGET_AVX512F"
17979 "v<sseintprefix>gatherd<ssemodesuffix>\t{%5, %0%{%1%}|%0%{%1%}, %g5}"
17980 [(set_attr "type" "ssemov")
17981 (set_attr "prefix" "evex")
17982 (set_attr "mode" "<sseinsnmode>")])
17983
17984
17985 (define_expand "<avx512>_gatherdi<mode>"
17986 [(parallel [(set (match_operand:VI48F 0 "register_operand")
17987 (unspec:VI48F
17988 [(match_operand:<VEC_GATHER_SRCDI> 1 "register_operand")
17989 (match_operand:QI 4 "register_operand")
17990 (mem:<ssescalarmode>
17991 (match_par_dup 6
17992 [(match_operand 2 "vsib_address_operand")
17993 (match_operand:<VEC_GATHER_IDXDI> 3 "register_operand")
17994 (match_operand:SI 5 "const1248_operand")]))]
17995 UNSPEC_GATHER))
17996 (clobber (match_scratch:QI 7))])]
17997 "TARGET_AVX512F"
17998 {
17999 operands[6]
18000 = gen_rtx_UNSPEC (Pmode, gen_rtvec (3, operands[2], operands[3],
18001 operands[5]), UNSPEC_VSIBADDR);
18002 })
18003
18004 (define_insn "*avx512f_gatherdi<mode>"
18005 [(set (match_operand:VI48F 0 "register_operand" "=&v")
18006 (unspec:VI48F
18007 [(match_operand:<VEC_GATHER_SRCDI> 1 "register_operand" "0")
18008 (match_operand:QI 7 "register_operand" "2")
18009 (match_operator:<ssescalarmode> 6 "vsib_mem_operator"
18010 [(unspec:P
18011 [(match_operand:P 4 "vsib_address_operand" "Tv")
18012 (match_operand:<VEC_GATHER_IDXDI> 3 "register_operand" "v")
18013 (match_operand:SI 5 "const1248_operand" "n")]
18014 UNSPEC_VSIBADDR)])]
18015 UNSPEC_GATHER))
18016 (clobber (match_scratch:QI 2 "=&Yk"))]
18017 "TARGET_AVX512F"
18018 "v<sseintprefix>gatherq<ssemodesuffix>\t{%6, %1%{%2%}|%1%{%2%}, %g6}"
18019 [(set_attr "type" "ssemov")
18020 (set_attr "prefix" "evex")
18021 (set_attr "mode" "<sseinsnmode>")])
18022
18023 (define_insn "*avx512f_gatherdi<mode>_2"
18024 [(set (match_operand:VI48F 0 "register_operand" "=&v")
18025 (unspec:VI48F
18026 [(pc)
18027 (match_operand:QI 6 "register_operand" "1")
18028 (match_operator:<ssescalarmode> 5 "vsib_mem_operator"
18029 [(unspec:P
18030 [(match_operand:P 3 "vsib_address_operand" "Tv")
18031 (match_operand:<VEC_GATHER_IDXDI> 2 "register_operand" "v")
18032 (match_operand:SI 4 "const1248_operand" "n")]
18033 UNSPEC_VSIBADDR)])]
18034 UNSPEC_GATHER))
18035 (clobber (match_scratch:QI 1 "=&Yk"))]
18036 "TARGET_AVX512F"
18037 {
18038 if (<MODE>mode != <VEC_GATHER_SRCDI>mode)
18039 {
18040 if (GET_MODE_SIZE (<MODE>mode) != 64)
18041 return "v<sseintprefix>gatherq<ssemodesuffix>\t{%5, %x0%{%1%}|%t0%{%1%}, %g5}";
18042 else
18043 return "v<sseintprefix>gatherq<ssemodesuffix>\t{%5, %t0%{%1%}|%t0%{%1%}, %g5}";
18044 }
18045 return "v<sseintprefix>gatherq<ssemodesuffix>\t{%5, %0%{%1%}|%0%{%1%}, %g5}";
18046 }
18047 [(set_attr "type" "ssemov")
18048 (set_attr "prefix" "evex")
18049 (set_attr "mode" "<sseinsnmode>")])
18050
18051 (define_expand "<avx512>_scattersi<mode>"
18052 [(parallel [(set (mem:VI48F
18053 (match_par_dup 5
18054 [(match_operand 0 "vsib_address_operand")
18055 (match_operand:<VEC_GATHER_IDXSI> 2 "register_operand")
18056 (match_operand:SI 4 "const1248_operand")]))
18057 (unspec:VI48F
18058 [(match_operand:<avx512fmaskmode> 1 "register_operand")
18059 (match_operand:VI48F 3 "register_operand")]
18060 UNSPEC_SCATTER))
18061 (clobber (match_scratch:<avx512fmaskmode> 6))])]
18062 "TARGET_AVX512F"
18063 {
18064 operands[5]
18065 = gen_rtx_UNSPEC (Pmode, gen_rtvec (3, operands[0], operands[2],
18066 operands[4]), UNSPEC_VSIBADDR);
18067 })
18068
18069 (define_insn "*avx512f_scattersi<mode>"
18070 [(set (match_operator:VI48F 5 "vsib_mem_operator"
18071 [(unspec:P
18072 [(match_operand:P 0 "vsib_address_operand" "Tv")
18073 (match_operand:<VEC_GATHER_IDXSI> 2 "register_operand" "v")
18074 (match_operand:SI 4 "const1248_operand" "n")]
18075 UNSPEC_VSIBADDR)])
18076 (unspec:VI48F
18077 [(match_operand:<avx512fmaskmode> 6 "register_operand" "1")
18078 (match_operand:VI48F 3 "register_operand" "v")]
18079 UNSPEC_SCATTER))
18080 (clobber (match_scratch:<avx512fmaskmode> 1 "=&Yk"))]
18081 "TARGET_AVX512F"
18082 "v<sseintprefix>scatterd<ssemodesuffix>\t{%3, %5%{%1%}|%5%{%1%}, %3}"
18083 [(set_attr "type" "ssemov")
18084 (set_attr "prefix" "evex")
18085 (set_attr "mode" "<sseinsnmode>")])
18086
18087 (define_expand "<avx512>_scatterdi<mode>"
18088 [(parallel [(set (mem:VI48F
18089 (match_par_dup 5
18090 [(match_operand 0 "vsib_address_operand")
18091 (match_operand:<VEC_GATHER_IDXDI> 2 "register_operand")
18092 (match_operand:SI 4 "const1248_operand")]))
18093 (unspec:VI48F
18094 [(match_operand:QI 1 "register_operand")
18095 (match_operand:<VEC_GATHER_SRCDI> 3 "register_operand")]
18096 UNSPEC_SCATTER))
18097 (clobber (match_scratch:QI 6))])]
18098 "TARGET_AVX512F"
18099 {
18100 operands[5]
18101 = gen_rtx_UNSPEC (Pmode, gen_rtvec (3, operands[0], operands[2],
18102 operands[4]), UNSPEC_VSIBADDR);
18103 })
18104
18105 (define_insn "*avx512f_scatterdi<mode>"
18106 [(set (match_operator:VI48F 5 "vsib_mem_operator"
18107 [(unspec:P
18108 [(match_operand:P 0 "vsib_address_operand" "Tv")
18109 (match_operand:<VEC_GATHER_IDXDI> 2 "register_operand" "v")
18110 (match_operand:SI 4 "const1248_operand" "n")]
18111 UNSPEC_VSIBADDR)])
18112 (unspec:VI48F
18113 [(match_operand:QI 6 "register_operand" "1")
18114 (match_operand:<VEC_GATHER_SRCDI> 3 "register_operand" "v")]
18115 UNSPEC_SCATTER))
18116 (clobber (match_scratch:QI 1 "=&Yk"))]
18117 "TARGET_AVX512F"
18118 "v<sseintprefix>scatterq<ssemodesuffix>\t{%3, %5%{%1%}|%5%{%1%}, %3}"
18119 [(set_attr "type" "ssemov")
18120 (set_attr "prefix" "evex")
18121 (set_attr "mode" "<sseinsnmode>")])
18122
18123 (define_insn "<avx512>_compress<mode>_mask"
18124 [(set (match_operand:VI48F 0 "register_operand" "=v")
18125 (unspec:VI48F
18126 [(match_operand:VI48F 1 "register_operand" "v")
18127 (match_operand:VI48F 2 "vector_move_operand" "0C")
18128 (match_operand:<avx512fmaskmode> 3 "register_operand" "Yk")]
18129 UNSPEC_COMPRESS))]
18130 "TARGET_AVX512F"
18131 "v<sseintprefix>compress<ssemodesuffix>\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}"
18132 [(set_attr "type" "ssemov")
18133 (set_attr "prefix" "evex")
18134 (set_attr "mode" "<sseinsnmode>")])
18135
18136 (define_insn "<avx512>_compressstore<mode>_mask"
18137 [(set (match_operand:VI48F 0 "memory_operand" "=m")
18138 (unspec:VI48F
18139 [(match_operand:VI48F 1 "register_operand" "x")
18140 (match_dup 0)
18141 (match_operand:<avx512fmaskmode> 2 "register_operand" "Yk")]
18142 UNSPEC_COMPRESS_STORE))]
18143 "TARGET_AVX512F"
18144 "v<sseintprefix>compress<ssemodesuffix>\t{%1, %0%{%2%}|%0%{%2%}, %1}"
18145 [(set_attr "type" "ssemov")
18146 (set_attr "prefix" "evex")
18147 (set_attr "memory" "store")
18148 (set_attr "mode" "<sseinsnmode>")])
18149
18150 (define_expand "<avx512>_expand<mode>_maskz"
18151 [(set (match_operand:VI48F 0 "register_operand")
18152 (unspec:VI48F
18153 [(match_operand:VI48F 1 "nonimmediate_operand")
18154 (match_operand:VI48F 2 "vector_move_operand")
18155 (match_operand:<avx512fmaskmode> 3 "register_operand")]
18156 UNSPEC_EXPAND))]
18157 "TARGET_AVX512F"
18158 "operands[2] = CONST0_RTX (<MODE>mode);")
18159
18160 (define_insn "<avx512>_expand<mode>_mask"
18161 [(set (match_operand:VI48F 0 "register_operand" "=v,v")
18162 (unspec:VI48F
18163 [(match_operand:VI48F 1 "nonimmediate_operand" "v,m")
18164 (match_operand:VI48F 2 "vector_move_operand" "0C,0C")
18165 (match_operand:<avx512fmaskmode> 3 "register_operand" "Yk,Yk")]
18166 UNSPEC_EXPAND))]
18167 "TARGET_AVX512F"
18168 "v<sseintprefix>expand<ssemodesuffix>\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}"
18169 [(set_attr "type" "ssemov")
18170 (set_attr "prefix" "evex")
18171 (set_attr "memory" "none,load")
18172 (set_attr "mode" "<sseinsnmode>")])
18173
18174 (define_insn "avx512dq_rangep<mode><mask_name><round_saeonly_name>"
18175 [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v")
18176 (unspec:VF_AVX512VL
18177 [(match_operand:VF_AVX512VL 1 "register_operand" "v")
18178 (match_operand:VF_AVX512VL 2 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")
18179 (match_operand:SI 3 "const_0_to_15_operand")]
18180 UNSPEC_RANGE))]
18181 "TARGET_AVX512DQ && <round_saeonly_mode512bit_condition>"
18182 "vrange<ssemodesuffix>\t{<round_saeonly_mask_op4>%3, %2, %1, %0<mask_operand4>|%0<mask_operand4>, %1, %2, %3<round_saeonly_mask_op4>}"
18183 [(set_attr "type" "sse")
18184 (set_attr "prefix" "evex")
18185 (set_attr "mode" "<MODE>")])
18186
18187 (define_insn "avx512dq_ranges<mode><round_saeonly_name>"
18188 [(set (match_operand:VF_128 0 "register_operand" "=v")
18189 (vec_merge:VF_128
18190 (unspec:VF_128
18191 [(match_operand:VF_128 1 "register_operand" "v")
18192 (match_operand:VF_128 2 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")
18193 (match_operand:SI 3 "const_0_to_15_operand")]
18194 UNSPEC_RANGE)
18195 (match_dup 1)
18196 (const_int 1)))]
18197 "TARGET_AVX512DQ"
18198 "vrange<ssescalarmodesuffix>\t{<round_saeonly_op4>%3, %2, %1, %0|%0, %1, %2, %3<round_saeonly_op4>}"
18199 [(set_attr "type" "sse")
18200 (set_attr "prefix" "evex")
18201 (set_attr "mode" "<MODE>")])
18202
18203 (define_insn "avx512dq_fpclass<mode><mask_scalar_merge_name>"
18204 [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=Yk")
18205 (unspec:<avx512fmaskmode>
18206 [(match_operand:VF_AVX512VL 1 "register_operand" "v")
18207 (match_operand:QI 2 "const_0_to_255_operand" "n")]
18208 UNSPEC_FPCLASS))]
18209 "TARGET_AVX512DQ"
18210 "vfpclass<ssemodesuffix>\t{%2, %1, %0<mask_scalar_merge_operand3>|%0<mask_scalar_merge_operand3>, %1, %2}";
18211 [(set_attr "type" "sse")
18212 (set_attr "length_immediate" "1")
18213 (set_attr "prefix" "evex")
18214 (set_attr "mode" "<MODE>")])
18215
18216 (define_insn "avx512dq_vmfpclass<mode>"
18217 [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=Yk")
18218 (and:<avx512fmaskmode>
18219 (unspec:<avx512fmaskmode>
18220 [(match_operand:VF_128 1 "register_operand" "v")
18221 (match_operand:QI 2 "const_0_to_255_operand" "n")]
18222 UNSPEC_FPCLASS)
18223 (const_int 1)))]
18224 "TARGET_AVX512DQ"
18225 "vfpclass<ssescalarmodesuffix>\t{%2, %1, %0|%0, %1, %2}";
18226 [(set_attr "type" "sse")
18227 (set_attr "length_immediate" "1")
18228 (set_attr "prefix" "evex")
18229 (set_attr "mode" "<MODE>")])
18230
18231 (define_insn "<avx512>_getmant<mode><mask_name><round_saeonly_name>"
18232 [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v")
18233 (unspec:VF_AVX512VL
18234 [(match_operand:VF_AVX512VL 1 "nonimmediate_operand" "<round_saeonly_constraint>")
18235 (match_operand:SI 2 "const_0_to_15_operand")]
18236 UNSPEC_GETMANT))]
18237 "TARGET_AVX512F"
18238 "vgetmant<ssemodesuffix>\t{%2, <round_saeonly_mask_op3>%1, %0<mask_operand3>|%0<mask_operand3>, %1<round_saeonly_mask_op3>, %2}";
18239 [(set_attr "prefix" "evex")
18240 (set_attr "mode" "<MODE>")])
18241
18242 (define_insn "avx512f_vgetmant<mode><round_saeonly_name>"
18243 [(set (match_operand:VF_128 0 "register_operand" "=v")
18244 (vec_merge:VF_128
18245 (unspec:VF_128
18246 [(match_operand:VF_128 1 "register_operand" "v")
18247 (match_operand:VF_128 2 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")
18248 (match_operand:SI 3 "const_0_to_15_operand")]
18249 UNSPEC_GETMANT)
18250 (match_dup 1)
18251 (const_int 1)))]
18252 "TARGET_AVX512F"
18253 "vgetmant<ssescalarmodesuffix>\t{%3, <round_saeonly_op4>%2, %1, %0|%0, %1, %2<round_saeonly_op4>, %3}";
18254 [(set_attr "prefix" "evex")
18255 (set_attr "mode" "<ssescalarmode>")])
18256
18257 ;; The correct representation for this is absolutely enormous, and
18258 ;; surely not generally useful.
18259 (define_insn "<mask_codefor>avx512bw_dbpsadbw<mode><mask_name>"
18260 [(set (match_operand:VI2_AVX512VL 0 "register_operand" "=v")
18261 (unspec:VI2_AVX512VL
18262 [(match_operand:<dbpsadbwmode> 1 "register_operand" "v")
18263 (match_operand:<dbpsadbwmode> 2 "nonimmediate_operand" "vm")
18264 (match_operand:SI 3 "const_0_to_255_operand")]
18265 UNSPEC_DBPSADBW))]
18266 "TARGET_AVX512BW"
18267 "vdbpsadbw\t{%3, %2, %1, %0<mask_operand4>|%0<mask_operand4>, %1, %2, %3}"
18268 [(set_attr "isa" "avx")
18269 (set_attr "type" "sselog1")
18270 (set_attr "length_immediate" "1")
18271 (set_attr "prefix" "evex")
18272 (set_attr "mode" "<sseinsnmode>")])
18273
18274 (define_insn "clz<mode>2<mask_name>"
18275 [(set (match_operand:VI48_AVX512VL 0 "register_operand" "=v")
18276 (clz:VI48_AVX512VL
18277 (match_operand:VI48_AVX512VL 1 "nonimmediate_operand" "vm")))]
18278 "TARGET_AVX512CD"
18279 "vplzcnt<ssemodesuffix>\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
18280 [(set_attr "type" "sse")
18281 (set_attr "prefix" "evex")
18282 (set_attr "mode" "<sseinsnmode>")])
18283
18284 (define_insn "<mask_codefor>conflict<mode><mask_name>"
18285 [(set (match_operand:VI48_AVX512VL 0 "register_operand" "=v")
18286 (unspec:VI48_AVX512VL
18287 [(match_operand:VI48_AVX512VL 1 "nonimmediate_operand" "vm")]
18288 UNSPEC_CONFLICT))]
18289 "TARGET_AVX512CD"
18290 "vpconflict<ssemodesuffix>\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
18291 [(set_attr "type" "sse")
18292 (set_attr "prefix" "evex")
18293 (set_attr "mode" "<sseinsnmode>")])
18294
18295 (define_insn "sha1msg1"
18296 [(set (match_operand:V4SI 0 "register_operand" "=x")
18297 (unspec:V4SI
18298 [(match_operand:V4SI 1 "register_operand" "0")
18299 (match_operand:V4SI 2 "nonimmediate_operand" "xm")]
18300 UNSPEC_SHA1MSG1))]
18301 "TARGET_SHA"
18302 "sha1msg1\t{%2, %0|%0, %2}"
18303 [(set_attr "type" "sselog1")
18304 (set_attr "mode" "TI")])
18305
18306 (define_insn "sha1msg2"
18307 [(set (match_operand:V4SI 0 "register_operand" "=x")
18308 (unspec:V4SI
18309 [(match_operand:V4SI 1 "register_operand" "0")
18310 (match_operand:V4SI 2 "nonimmediate_operand" "xm")]
18311 UNSPEC_SHA1MSG2))]
18312 "TARGET_SHA"
18313 "sha1msg2\t{%2, %0|%0, %2}"
18314 [(set_attr "type" "sselog1")
18315 (set_attr "mode" "TI")])
18316
18317 (define_insn "sha1nexte"
18318 [(set (match_operand:V4SI 0 "register_operand" "=x")
18319 (unspec:V4SI
18320 [(match_operand:V4SI 1 "register_operand" "0")
18321 (match_operand:V4SI 2 "nonimmediate_operand" "xm")]
18322 UNSPEC_SHA1NEXTE))]
18323 "TARGET_SHA"
18324 "sha1nexte\t{%2, %0|%0, %2}"
18325 [(set_attr "type" "sselog1")
18326 (set_attr "mode" "TI")])
18327
18328 (define_insn "sha1rnds4"
18329 [(set (match_operand:V4SI 0 "register_operand" "=x")
18330 (unspec:V4SI
18331 [(match_operand:V4SI 1 "register_operand" "0")
18332 (match_operand:V4SI 2 "nonimmediate_operand" "xm")
18333 (match_operand:SI 3 "const_0_to_3_operand" "n")]
18334 UNSPEC_SHA1RNDS4))]
18335 "TARGET_SHA"
18336 "sha1rnds4\t{%3, %2, %0|%0, %2, %3}"
18337 [(set_attr "type" "sselog1")
18338 (set_attr "length_immediate" "1")
18339 (set_attr "mode" "TI")])
18340
18341 (define_insn "sha256msg1"
18342 [(set (match_operand:V4SI 0 "register_operand" "=x")
18343 (unspec:V4SI
18344 [(match_operand:V4SI 1 "register_operand" "0")
18345 (match_operand:V4SI 2 "nonimmediate_operand" "xm")]
18346 UNSPEC_SHA256MSG1))]
18347 "TARGET_SHA"
18348 "sha256msg1\t{%2, %0|%0, %2}"
18349 [(set_attr "type" "sselog1")
18350 (set_attr "mode" "TI")])
18351
18352 (define_insn "sha256msg2"
18353 [(set (match_operand:V4SI 0 "register_operand" "=x")
18354 (unspec:V4SI
18355 [(match_operand:V4SI 1 "register_operand" "0")
18356 (match_operand:V4SI 2 "nonimmediate_operand" "xm")]
18357 UNSPEC_SHA256MSG2))]
18358 "TARGET_SHA"
18359 "sha256msg2\t{%2, %0|%0, %2}"
18360 [(set_attr "type" "sselog1")
18361 (set_attr "mode" "TI")])
18362
18363 (define_insn "sha256rnds2"
18364 [(set (match_operand:V4SI 0 "register_operand" "=x")
18365 (unspec:V4SI
18366 [(match_operand:V4SI 1 "register_operand" "0")
18367 (match_operand:V4SI 2 "nonimmediate_operand" "xm")
18368 (match_operand:V4SI 3 "register_operand" "Yz")]
18369 UNSPEC_SHA256RNDS2))]
18370 "TARGET_SHA"
18371 "sha256rnds2\t{%3, %2, %0|%0, %2, %3}"
18372 [(set_attr "type" "sselog1")
18373 (set_attr "length_immediate" "1")
18374 (set_attr "mode" "TI")])
18375
18376 (define_insn_and_split "avx512f_<castmode><avxsizesuffix>_<castmode>"
18377 [(set (match_operand:AVX512MODE2P 0 "nonimmediate_operand" "=x,m")
18378 (unspec:AVX512MODE2P
18379 [(match_operand:<ssequartermode> 1 "nonimmediate_operand" "xm,x")]
18380 UNSPEC_CAST))]
18381 "TARGET_AVX512F"
18382 "#"
18383 "&& reload_completed"
18384 [(const_int 0)]
18385 {
18386 rtx op0 = operands[0];
18387 rtx op1 = operands[1];
18388 if (REG_P (op0))
18389 op0 = gen_rtx_REG (<ssequartermode>mode, REGNO (op0));
18390 else
18391 op1 = gen_rtx_REG (<MODE>mode, REGNO (op1));
18392 emit_move_insn (op0, op1);
18393 DONE;
18394 })
18395
18396 (define_insn_and_split "avx512f_<castmode><avxsizesuffix>_256<castmode>"
18397 [(set (match_operand:AVX512MODE2P 0 "nonimmediate_operand" "=x,m")
18398 (unspec:AVX512MODE2P
18399 [(match_operand:<ssehalfvecmode> 1 "nonimmediate_operand" "xm,x")]
18400 UNSPEC_CAST))]
18401 "TARGET_AVX512F"
18402 "#"
18403 "&& reload_completed"
18404 [(const_int 0)]
18405 {
18406 rtx op0 = operands[0];
18407 rtx op1 = operands[1];
18408 if (REG_P (op0))
18409 op0 = gen_rtx_REG (<ssehalfvecmode>mode, REGNO (op0));
18410 else
18411 op1 = gen_rtx_REG (<MODE>mode, REGNO (op1));
18412 emit_move_insn (op0, op1);
18413 DONE;
18414 })