]>
Commit | Line | Data |
---|---|---|
ef719a44 | 1 | ;; GCC machine description for SSE instructions |
5624e564 | 2 | ;; Copyright (C) 2005-2015 Free Software Foundation, Inc. |
ef719a44 RH |
3 | ;; |
4 | ;; This file is part of GCC. | |
5 | ;; | |
6 | ;; GCC is free software; you can redistribute it and/or modify | |
7 | ;; it under the terms of the GNU General Public License as published by | |
2f83c7d6 | 8 | ;; the Free Software Foundation; either version 3, or (at your option) |
ef719a44 RH |
9 | ;; any later version. |
10 | ;; | |
11 | ;; GCC is distributed in the hope that it will be useful, | |
12 | ;; but WITHOUT ANY WARRANTY; without even the implied warranty of | |
13 | ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
14 | ;; GNU General Public License for more details. | |
15 | ;; | |
16 | ;; You should have received a copy of the GNU General Public License | |
2f83c7d6 NC |
17 | ;; along with GCC; see the file COPYING3. If not see |
18 | ;; <http://www.gnu.org/licenses/>. | |
ef719a44 | 19 | |
dc9945a4 UB |
20 | (define_c_enum "unspec" [ |
21 | ;; SSE | |
22 | UNSPEC_MOVNT | |
860f5e77 UB |
23 | UNSPEC_LOADU |
24 | UNSPEC_STOREU | |
dc9945a4 UB |
25 | |
26 | ;; SSE3 | |
27 | UNSPEC_LDDQU | |
28 | ||
29 | ;; SSSE3 | |
30 | UNSPEC_PSHUFB | |
31 | UNSPEC_PSIGN | |
32 | UNSPEC_PALIGNR | |
33 | ||
34 | ;; For SSE4A support | |
35 | UNSPEC_EXTRQI | |
36 | UNSPEC_EXTRQ | |
37 | UNSPEC_INSERTQI | |
38 | UNSPEC_INSERTQ | |
39 | ||
40 | ;; For SSE4.1 support | |
41 | UNSPEC_BLENDV | |
42 | UNSPEC_INSERTPS | |
43 | UNSPEC_DP | |
44 | UNSPEC_MOVNTDQA | |
45 | UNSPEC_MPSADBW | |
46 | UNSPEC_PHMINPOSUW | |
47 | UNSPEC_PTEST | |
48 | ||
49 | ;; For SSE4.2 support | |
50 | UNSPEC_PCMPESTR | |
51 | UNSPEC_PCMPISTR | |
52 | ||
53 | ;; For FMA4 support | |
54 | UNSPEC_FMADDSUB | |
55 | UNSPEC_XOP_UNSIGNED_CMP | |
56 | UNSPEC_XOP_TRUEFALSE | |
57 | UNSPEC_XOP_PERMUTE | |
58 | UNSPEC_FRCZ | |
59 | ||
60 | ;; For AES support | |
61 | UNSPEC_AESENC | |
62 | UNSPEC_AESENCLAST | |
63 | UNSPEC_AESDEC | |
64 | UNSPEC_AESDECLAST | |
65 | UNSPEC_AESIMC | |
66 | UNSPEC_AESKEYGENASSIST | |
67 | ||
68 | ;; For PCLMUL support | |
69 | UNSPEC_PCLMUL | |
70 | ||
71 | ;; For AVX support | |
72 | UNSPEC_PCMP | |
73 | UNSPEC_VPERMIL | |
74 | UNSPEC_VPERMIL2 | |
75 | UNSPEC_VPERMIL2F128 | |
76 | UNSPEC_CAST | |
77 | UNSPEC_VTESTP | |
78 | UNSPEC_VCVTPH2PS | |
79 | UNSPEC_VCVTPS2PH | |
80 | ||
81 | ;; For AVX2 support | |
2ff5ea2d | 82 | UNSPEC_VPERMVAR |
dc9945a4 UB |
83 | UNSPEC_VPERMTI |
84 | UNSPEC_GATHER | |
85 | UNSPEC_VSIBADDR | |
ab931c71 AI |
86 | |
87 | ;; For AVX512F support | |
88 | UNSPEC_VPERMI2 | |
89 | UNSPEC_VPERMT2 | |
47490470 | 90 | UNSPEC_VPERMI2_MASK |
c003c6d6 | 91 | UNSPEC_UNSIGNED_FIX_NOTRUNC |
0fe65b75 AI |
92 | UNSPEC_UNSIGNED_PCMP |
93 | UNSPEC_TESTM | |
94 | UNSPEC_TESTNM | |
ab931c71 | 95 | UNSPEC_SCATTER |
afb4ac68 AI |
96 | UNSPEC_RCP14 |
97 | UNSPEC_RSQRT14 | |
98 | UNSPEC_FIXUPIMM | |
99 | UNSPEC_SCALEF | |
0fe65b75 | 100 | UNSPEC_VTERNLOG |
afb4ac68 AI |
101 | UNSPEC_GETEXP |
102 | UNSPEC_GETMANT | |
0fe65b75 AI |
103 | UNSPEC_ALIGN |
104 | UNSPEC_CONFLICT | |
47490470 AI |
105 | UNSPEC_COMPRESS |
106 | UNSPEC_COMPRESS_STORE | |
107 | UNSPEC_EXPAND | |
0fe65b75 AI |
108 | UNSPEC_MASKED_EQ |
109 | UNSPEC_MASKED_GT | |
110 | ||
47490470 AI |
111 | ;; For embed. rounding feature |
112 | UNSPEC_EMBEDDED_ROUNDING | |
113 | ||
0fe65b75 AI |
114 | ;; For AVX512PF support |
115 | UNSPEC_GATHER_PREFETCH | |
116 | UNSPEC_SCATTER_PREFETCH | |
afb4ac68 AI |
117 | |
118 | ;; For AVX512ER support | |
119 | UNSPEC_EXP2 | |
120 | UNSPEC_RCP28 | |
121 | UNSPEC_RSQRT28 | |
c1618f82 AI |
122 | |
123 | ;; For SHA support | |
124 | UNSPEC_SHA1MSG1 | |
125 | UNSPEC_SHA1MSG2 | |
126 | UNSPEC_SHA1NEXTE | |
127 | UNSPEC_SHA1RNDS4 | |
128 | UNSPEC_SHA256MSG1 | |
129 | UNSPEC_SHA256MSG2 | |
130 | UNSPEC_SHA256RNDS2 | |
b9826286 | 131 | |
41755b52 | 132 | ;; For AVX512BW support |
5f64b496 AI |
133 | UNSPEC_DBPSADBW |
134 | UNSPEC_PMADDUBSW512 | |
ed3e611e | 135 | UNSPEC_PMADDWD512 |
41755b52 AI |
136 | UNSPEC_PSHUFHW |
137 | UNSPEC_PSHUFLW | |
2be4091a | 138 | UNSPEC_CVTINT2MASK |
41755b52 | 139 | |
b9826286 AI |
140 | ;; For AVX512DQ support |
141 | UNSPEC_REDUCE | |
142 | UNSPEC_FPCLASS | |
143 | UNSPEC_RANGE | |
4190ea38 IT |
144 | |
145 | ;; For AVX512IFMA support | |
146 | UNSPEC_VPMADD52LUQ | |
147 | UNSPEC_VPMADD52HUQ | |
3dcc8af5 IT |
148 | |
149 | ;; For AVX512VBMI support | |
150 | UNSPEC_VPMULTISHIFT | |
dc9945a4 UB |
151 | ]) |
152 | ||
153 | (define_c_enum "unspecv" [ | |
154 | UNSPECV_LDMXCSR | |
155 | UNSPECV_STMXCSR | |
156 | UNSPECV_CLFLUSH | |
157 | UNSPECV_MONITOR | |
158 | UNSPECV_MWAIT | |
159 | UNSPECV_VZEROALL | |
160 | UNSPECV_VZEROUPPER | |
161 | ]) | |
162 | ||
e1faf150 | 163 | ;; All vector modes including V?TImode, used in move patterns. |
c7ecdec6 | 164 | (define_mode_iterator VMOVE |
b86f6e9e AI |
165 | [(V64QI "TARGET_AVX512F") (V32QI "TARGET_AVX") V16QI |
166 | (V32HI "TARGET_AVX512F") (V16HI "TARGET_AVX") V8HI | |
167 | (V16SI "TARGET_AVX512F") (V8SI "TARGET_AVX") V4SI | |
168 | (V8DI "TARGET_AVX512F") (V4DI "TARGET_AVX") V2DI | |
e0aacde4 | 169 | (V4TI "TARGET_AVX512BW") (V2TI "TARGET_AVX") V1TI |
b86f6e9e AI |
170 | (V16SF "TARGET_AVX512F") (V8SF "TARGET_AVX") V4SF |
171 | (V8DF "TARGET_AVX512F") (V4DF "TARGET_AVX") V2DF]) | |
ef719a44 | 172 | |
7cbdc87d KY |
173 | ;; All AVX-512{F,VL} vector modes. Supposed TARGET_AVX512F baseline. |
174 | (define_mode_iterator V48_AVX512VL | |
175 | [V16SI (V8SI "TARGET_AVX512VL") (V4SI "TARGET_AVX512VL") | |
176 | V8DI (V4DI "TARGET_AVX512VL") (V2DI "TARGET_AVX512VL") | |
177 | V16SF (V8SF "TARGET_AVX512VL") (V4SF "TARGET_AVX512VL") | |
178 | V8DF (V4DF "TARGET_AVX512VL") (V2DF "TARGET_AVX512VL")]) | |
179 | ||
180 | ;; 1,2 byte AVX-512{BW,VL} vector modes. Supposed TARGET_AVX512BW baseline. | |
181 | (define_mode_iterator VI12_AVX512VL | |
182 | [V64QI (V16QI "TARGET_AVX512VL") (V32QI "TARGET_AVX512VL") | |
183 | V32HI (V16HI "TARGET_AVX512VL") (V8HI "TARGET_AVX512VL")]) | |
e0aacde4 | 184 | |
3dcc8af5 IT |
185 | (define_mode_iterator VI1_AVX512VL |
186 | [V64QI (V16QI "TARGET_AVX512VL") (V32QI "TARGET_AVX512VL")]) | |
187 | ||
6bec6c98 UB |
188 | ;; All vector modes |
189 | (define_mode_iterator V | |
190 | [(V32QI "TARGET_AVX") V16QI | |
191 | (V16HI "TARGET_AVX") V8HI | |
ec5e777c AI |
192 | (V16SI "TARGET_AVX512F") (V8SI "TARGET_AVX") V4SI |
193 | (V8DI "TARGET_AVX512F") (V4DI "TARGET_AVX") V2DI | |
194 | (V16SF "TARGET_AVX512F") (V8SF "TARGET_AVX") V4SF | |
195 | (V8DF "TARGET_AVX512F") (V4DF "TARGET_AVX") (V2DF "TARGET_SSE2")]) | |
6bec6c98 UB |
196 | |
197 | ;; All 128bit vector modes | |
198 | (define_mode_iterator V_128 | |
199 | [V16QI V8HI V4SI V2DI V4SF (V2DF "TARGET_SSE2")]) | |
200 | ||
201 | ;; All 256bit vector modes | |
202 | (define_mode_iterator V_256 | |
203 | [V32QI V16HI V8SI V4DI V8SF V4DF]) | |
204 | ||
f62ce24f AI |
205 | ;; All 512bit vector modes |
206 | (define_mode_iterator V_512 [V64QI V32HI V16SI V8DI V16SF V8DF]) | |
207 | ||
ec5e777c AI |
208 | ;; All 256bit and 512bit vector modes |
209 | (define_mode_iterator V_256_512 | |
210 | [V32QI V16HI V8SI V4DI V8SF V4DF | |
211 | (V64QI "TARGET_AVX512F") (V32HI "TARGET_AVX512F") (V16SI "TARGET_AVX512F") | |
212 | (V8DI "TARGET_AVX512F") (V16SF "TARGET_AVX512F") (V8DF "TARGET_AVX512F")]) | |
213 | ||
07c0852e UB |
214 | ;; All vector float modes |
215 | (define_mode_iterator VF | |
b86f6e9e AI |
216 | [(V16SF "TARGET_AVX512F") (V8SF "TARGET_AVX") V4SF |
217 | (V8DF "TARGET_AVX512F") (V4DF "TARGET_AVX") (V2DF "TARGET_SSE2")]) | |
218 | ||
219 | ;; 128- and 256-bit float vector modes | |
220 | (define_mode_iterator VF_128_256 | |
6bec6c98 UB |
221 | [(V8SF "TARGET_AVX") V4SF |
222 | (V4DF "TARGET_AVX") (V2DF "TARGET_SSE2")]) | |
07c0852e UB |
223 | |
224 | ;; All SFmode vector float modes | |
225 | (define_mode_iterator VF1 | |
a9ccbba2 AI |
226 | [(V16SF "TARGET_AVX512F") (V8SF "TARGET_AVX") V4SF]) |
227 | ||
228 | ;; 128- and 256-bit SF vector modes | |
229 | (define_mode_iterator VF1_128_256 | |
6bec6c98 | 230 | [(V8SF "TARGET_AVX") V4SF]) |
07c0852e | 231 | |
39012b09 AI |
232 | (define_mode_iterator VF1_128_256VL |
233 | [V8SF (V4SF "TARGET_AVX512VL")]) | |
234 | ||
07c0852e UB |
235 | ;; All DFmode vector float modes |
236 | (define_mode_iterator VF2 | |
ec5e777c AI |
237 | [(V8DF "TARGET_AVX512F") (V4DF "TARGET_AVX") V2DF]) |
238 | ||
239 | ;; 128- and 256-bit DF vector modes | |
240 | (define_mode_iterator VF2_128_256 | |
6bec6c98 | 241 | [(V4DF "TARGET_AVX") V2DF]) |
07c0852e | 242 | |
ec5e777c | 243 | (define_mode_iterator VF2_512_256 |
39012b09 AI |
244 | [(V8DF "TARGET_AVX512F") V4DF]) |
245 | ||
246 | (define_mode_iterator VF2_512_256VL | |
247 | [V8DF (V4DF "TARGET_AVX512VL")]) | |
ec5e777c | 248 | |
07c0852e UB |
249 | ;; All 128bit vector float modes |
250 | (define_mode_iterator VF_128 | |
6bec6c98 UB |
251 | [V4SF (V2DF "TARGET_SSE2")]) |
252 | ||
253 | ;; All 256bit vector float modes | |
254 | (define_mode_iterator VF_256 | |
255 | [V8SF V4DF]) | |
07c0852e | 256 | |
b86f6e9e AI |
257 | ;; All 512bit vector float modes |
258 | (define_mode_iterator VF_512 | |
259 | [V16SF V8DF]) | |
260 | ||
ca9b264e AI |
261 | (define_mode_iterator VI48_AVX512VL |
262 | [V16SI (V8SI "TARGET_AVX512VL") (V4SI "TARGET_AVX512VL") | |
263 | V8DI (V4DI "TARGET_AVX512VL") (V2DI "TARGET_AVX512VL")]) | |
264 | ||
e274629e AI |
265 | (define_mode_iterator VF_AVX512VL |
266 | [V16SF (V8SF "TARGET_AVX512VL") (V4SF "TARGET_AVX512VL") | |
267 | V8DF (V4DF "TARGET_AVX512VL") (V2DF "TARGET_AVX512VL")]) | |
268 | ||
3bcf35e7 AI |
269 | (define_mode_iterator VF2_AVX512VL |
270 | [V8DF (V4DF "TARGET_AVX512VL") (V2DF "TARGET_AVX512VL")]) | |
271 | ||
4769c826 AI |
272 | (define_mode_iterator VF1_AVX512VL |
273 | [V16SF (V8SF "TARGET_AVX512VL") (V4SF "TARGET_AVX512VL")]) | |
274 | ||
d8700b1c UB |
275 | ;; All vector integer modes |
276 | (define_mode_iterator VI | |
a9ccbba2 | 277 | [(V16SI "TARGET_AVX512F") (V8DI "TARGET_AVX512F") |
9945a432 AI |
278 | (V64QI "TARGET_AVX512BW") (V32QI "TARGET_AVX") V16QI |
279 | (V32HI "TARGET_AVX512BW") (V16HI "TARGET_AVX") V8HI | |
d8700b1c UB |
280 | (V8SI "TARGET_AVX") V4SI |
281 | (V4DI "TARGET_AVX") V2DI]) | |
282 | ||
1707583b | 283 | (define_mode_iterator VI_AVX2 |
700e2919 AI |
284 | [(V64QI "TARGET_AVX512BW") (V32QI "TARGET_AVX2") V16QI |
285 | (V32HI "TARGET_AVX512BW") (V16HI "TARGET_AVX2") V8HI | |
a9ccbba2 AI |
286 | (V16SI "TARGET_AVX512F") (V8SI "TARGET_AVX2") V4SI |
287 | (V8DI "TARGET_AVX512F") (V4DI "TARGET_AVX2") V2DI]) | |
1707583b | 288 | |
e81b8564 UB |
289 | ;; All QImode vector integer modes |
290 | (define_mode_iterator VI1 | |
291 | [(V32QI "TARGET_AVX") V16QI]) | |
292 | ||
ca9b264e AI |
293 | (define_mode_iterator VI_ULOADSTORE_BW_AVX512VL |
294 | [V64QI | |
295 | V32HI (V8HI "TARGET_AVX512VL") (V16HI "TARGET_AVX512VL")]) | |
296 | ||
297 | (define_mode_iterator VI_ULOADSTORE_F_AVX512VL | |
298 | [V16SI (V8SI "TARGET_AVX512VL") (V4SI "TARGET_AVX512VL") | |
299 | V8DI (V4DI "TARGET_AVX512VL") (V2DI "TARGET_AVX512VL")]) | |
b86f6e9e | 300 | |
e81b8564 UB |
301 | ;; All DImode vector integer modes |
302 | (define_mode_iterator VI8 | |
a9ccbba2 | 303 | [(V8DI "TARGET_AVX512F") (V4DI "TARGET_AVX") V2DI]) |
e81b8564 | 304 | |
98725d44 AI |
305 | (define_mode_iterator VI8_AVX512VL |
306 | [V8DI (V4DI "TARGET_AVX512VL") (V2DI "TARGET_AVX512VL")]) | |
307 | ||
dc3b8d27 AI |
308 | (define_mode_iterator VI8_256_512 |
309 | [V8DI (V4DI "TARGET_AVX512VL")]) | |
310 | ||
977e83a3 KY |
311 | (define_mode_iterator VI1_AVX2 |
312 | [(V32QI "TARGET_AVX2") V16QI]) | |
313 | ||
f5db965f IT |
314 | (define_mode_iterator VI1_AVX512 |
315 | [(V64QI "TARGET_AVX512BW") (V32QI "TARGET_AVX2") V16QI]) | |
316 | ||
977e83a3 | 317 | (define_mode_iterator VI2_AVX2 |
ed3e611e | 318 | [(V32HI "TARGET_AVX512BW") (V16HI "TARGET_AVX2") V8HI]) |
977e83a3 | 319 | |
3bdf6340 AI |
320 | (define_mode_iterator VI2_AVX512F |
321 | [(V32HI "TARGET_AVX512F") (V16HI "TARGET_AVX2") V8HI]) | |
322 | ||
50e60d7d AI |
323 | (define_mode_iterator VI4_AVX |
324 | [(V8SI "TARGET_AVX") V4SI]) | |
325 | ||
977e83a3 KY |
326 | (define_mode_iterator VI4_AVX2 |
327 | [(V8SI "TARGET_AVX2") V4SI]) | |
328 | ||
f5f41d88 AI |
329 | (define_mode_iterator VI4_AVX512F |
330 | [(V16SI "TARGET_AVX512F") (V8SI "TARGET_AVX2") V4SI]) | |
331 | ||
21c924ac AI |
332 | (define_mode_iterator VI4_AVX512VL |
333 | [V16SI (V8SI "TARGET_AVX512VL") (V4SI "TARGET_AVX512VL")]) | |
334 | ||
335 | (define_mode_iterator VI48_AVX512F_AVX512VL | |
336 | [V4SI V8SI (V16SI "TARGET_AVX512F") | |
337 | (V2DI "TARGET_AVX512VL") (V4DI "TARGET_AVX512VL") (V8DI "TARGET_AVX512F")]) | |
338 | ||
339 | (define_mode_iterator VI2_AVX512VL | |
340 | [(V8HI "TARGET_AVX512VL") (V16HI "TARGET_AVX512VL") V32HI]) | |
5348cff8 | 341 | |
44f59829 AI |
342 | (define_mode_iterator VI8_AVX2_AVX512BW |
343 | [(V8DI "TARGET_AVX512BW") (V4DI "TARGET_AVX2") V2DI]) | |
344 | ||
977e83a3 KY |
345 | (define_mode_iterator VI8_AVX2 |
346 | [(V4DI "TARGET_AVX2") V2DI]) | |
347 | ||
f5f41d88 AI |
348 | (define_mode_iterator VI8_AVX2_AVX512F |
349 | [(V8DI "TARGET_AVX512F") (V4DI "TARGET_AVX2") V2DI]) | |
350 | ||
4a90ee35 AI |
351 | (define_mode_iterator VI4_128_8_256 |
352 | [V4SI V4DI]) | |
353 | ||
2e2206fa AI |
354 | ;; All V8D* modes |
355 | (define_mode_iterator V8FI | |
356 | [V8DF V8DI]) | |
357 | ||
358 | ;; All V16S* modes | |
359 | (define_mode_iterator V16FI | |
360 | [V16SF V16SI]) | |
361 | ||
e1faf150 | 362 | ;; ??? We should probably use TImode instead. |
977e83a3 | 363 | (define_mode_iterator VIMAX_AVX2 |
98ee4d9b | 364 | [(V4TI "TARGET_AVX512BW") (V2TI "TARGET_AVX2") V1TI]) |
977e83a3 | 365 | |
e1faf150 | 366 | ;; ??? This should probably be dropped in favor of VIMAX_AVX2. |
977e83a3 | 367 | (define_mode_iterator SSESCALARMODE |
b99ba39a | 368 | [(V4TI "TARGET_AVX512BW") (V2TI "TARGET_AVX2") TI]) |
977e83a3 KY |
369 | |
370 | (define_mode_iterator VI12_AVX2 | |
c9b17fa5 AI |
371 | [(V64QI "TARGET_AVX512BW") (V32QI "TARGET_AVX2") V16QI |
372 | (V32HI "TARGET_AVX512BW") (V16HI "TARGET_AVX2") V8HI]) | |
977e83a3 KY |
373 | |
374 | (define_mode_iterator VI24_AVX2 | |
375 | [(V16HI "TARGET_AVX2") V8HI | |
376 | (V8SI "TARGET_AVX2") V4SI]) | |
377 | ||
3bdf6340 AI |
378 | (define_mode_iterator VI124_AVX512F |
379 | [(V32QI "TARGET_AVX2") V16QI | |
380 | (V32HI "TARGET_AVX512F") (V16HI "TARGET_AVX2") V8HI | |
381 | (V16SI "TARGET_AVX512F") (V8SI "TARGET_AVX2") V4SI]) | |
382 | ||
977e83a3 KY |
383 | (define_mode_iterator VI124_AVX2 |
384 | [(V32QI "TARGET_AVX2") V16QI | |
385 | (V16HI "TARGET_AVX2") V8HI | |
386 | (V8SI "TARGET_AVX2") V4SI]) | |
387 | ||
3616dc70 AI |
388 | (define_mode_iterator VI2_AVX2_AVX512BW |
389 | [(V32HI "TARGET_AVX512BW") (V16HI "TARGET_AVX2") V8HI]) | |
390 | ||
391 | (define_mode_iterator VI48_AVX2 | |
392 | [(V8SI "TARGET_AVX2") V4SI | |
977e83a3 KY |
393 | (V4DI "TARGET_AVX2") V2DI]) |
394 | ||
e8d08206 AI |
395 | (define_mode_iterator VI248_AVX2_8_AVX512F |
396 | [(V16HI "TARGET_AVX2") V8HI | |
397 | (V8SI "TARGET_AVX2") V4SI | |
398 | (V8DI "TARGET_AVX512F") (V4DI "TARGET_AVX2") V2DI]) | |
399 | ||
28e9a294 AI |
400 | (define_mode_iterator VI248_AVX512BW_AVX512VL |
401 | [(V32HI "TARGET_AVX512BW") | |
402 | (V4DI "TARGET_AVX512VL") V16SI V8DI]) | |
403 | ||
404 | ;; Suppose TARGET_AVX512VL as baseline | |
405 | (define_mode_iterator VI24_AVX512BW_1 | |
406 | [(V16HI "TARGET_AVX512BW") (V8HI "TARGET_AVX512BW") | |
407 | V8SI V4SI]) | |
408 | ||
38f4b550 AI |
409 | (define_mode_iterator VI48_AVX512F |
410 | [(V16SI "TARGET_AVX512F") V8SI V4SI | |
411 | (V8DI "TARGET_AVX512F") V4DI V2DI]) | |
977e83a3 KY |
412 | |
413 | (define_mode_iterator V48_AVX2 | |
1707583b UB |
414 | [V4SF V2DF |
415 | V8SF V4DF | |
977e83a3 KY |
416 | (V4SI "TARGET_AVX2") (V2DI "TARGET_AVX2") |
417 | (V8SI "TARGET_AVX2") (V4DI "TARGET_AVX2")]) | |
418 | ||
8b994297 AI |
419 | (define_mode_attr avx512 |
420 | [(V16QI "avx512vl") (V32QI "avx512vl") (V64QI "avx512bw") | |
421 | (V8HI "avx512vl") (V16HI "avx512vl") (V32HI "avx512bw") | |
422 | (V4SI "avx512vl") (V8SI "avx512vl") (V16SI "avx512f") | |
423 | (V2DI "avx512vl") (V4DI "avx512vl") (V8DI "avx512f") | |
424 | (V4SF "avx512vl") (V8SF "avx512vl") (V16SF "avx512f") | |
425 | (V2DF "avx512vl") (V4DF "avx512vl") (V8DF "avx512f")]) | |
426 | ||
b86f6e9e AI |
427 | (define_mode_attr sse2_avx_avx512f |
428 | [(V16QI "sse2") (V32QI "avx") (V64QI "avx512f") | |
8b994297 | 429 | (V8HI "avx512vl") (V16HI "avx512vl") (V32HI "avx512bw") |
b86f6e9e | 430 | (V4SI "sse2") (V8SI "avx") (V16SI "avx512f") |
8b994297 | 431 | (V2DI "avx512vl") (V4DI "avx512vl") (V8DI "avx512f") |
b86f6e9e AI |
432 | (V16SF "avx512f") (V8SF "avx") (V4SF "avx") |
433 | (V8DF "avx512f") (V4DF "avx") (V2DF "avx")]) | |
434 | ||
977e83a3 | 435 | (define_mode_attr sse2_avx2 |
8b994297 AI |
436 | [(V16QI "sse2") (V32QI "avx2") (V64QI "avx512bw") |
437 | (V8HI "sse2") (V16HI "avx2") (V32HI "avx512bw") | |
b86f6e9e AI |
438 | (V4SI "sse2") (V8SI "avx2") (V16SI "avx512f") |
439 | (V2DI "sse2") (V4DI "avx2") (V8DI "avx512f") | |
8b994297 | 440 | (V1TI "sse2") (V2TI "avx2") (V4TI "avx512bw")]) |
977e83a3 KY |
441 | |
442 | (define_mode_attr ssse3_avx2 | |
8b994297 AI |
443 | [(V16QI "ssse3") (V32QI "avx2") (V64QI "avx512bw") |
444 | (V4HI "ssse3") (V8HI "ssse3") (V16HI "avx2") (V32HI "avx512bw") | |
977e83a3 KY |
445 | (V4SI "ssse3") (V8SI "avx2") |
446 | (V2DI "ssse3") (V4DI "avx2") | |
8b994297 | 447 | (TI "ssse3") (V2TI "avx2") (V4TI "avx512bw")]) |
977e83a3 KY |
448 | |
449 | (define_mode_attr sse4_1_avx2 | |
8b994297 AI |
450 | [(V16QI "sse4_1") (V32QI "avx2") (V64QI "avx512bw") |
451 | (V8HI "sse4_1") (V16HI "avx2") (V32HI "avx512bw") | |
b86f6e9e | 452 | (V4SI "sse4_1") (V8SI "avx2") (V16SI "avx512f") |
8b994297 | 453 | (V2DI "sse4_1") (V4DI "avx2") (V8DI "avx512dq")]) |
977e83a3 KY |
454 | |
455 | (define_mode_attr avx_avx2 | |
456 | [(V4SF "avx") (V2DF "avx") | |
457 | (V8SF "avx") (V4DF "avx") | |
458 | (V4SI "avx2") (V2DI "avx2") | |
459 | (V8SI "avx2") (V4DI "avx2")]) | |
460 | ||
f2289672 JJ |
461 | (define_mode_attr vec_avx2 |
462 | [(V16QI "vec") (V32QI "avx2") | |
463 | (V8HI "vec") (V16HI "avx2") | |
464 | (V4SI "vec") (V8SI "avx2") | |
465 | (V2DI "vec") (V4DI "avx2")]) | |
466 | ||
cf92ae7f | 467 | (define_mode_attr avx2_avx512 |
8b994297 AI |
468 | [(V4SI "avx2") (V8SI "avx2") (V16SI "avx512f") |
469 | (V2DI "avx2") (V4DI "avx2") (V8DI "avx512f") | |
470 | (V4SF "avx2") (V8SF "avx2") (V16SF "avx512f") | |
471 | (V2DF "avx2") (V4DF "avx2") (V8DF "avx512f") | |
472 | (V8HI "avx512vl") (V16HI "avx512vl") (V32HI "avx512bw")]) | |
473 | ||
3f97cb0b AI |
474 | (define_mode_attr shuffletype |
475 | [(V16SF "f") (V16SI "i") (V8DF "f") (V8DI "i") | |
476 | (V8SF "f") (V8SI "i") (V4DF "f") (V4DI "i") | |
477 | (V4SF "f") (V4SI "i") (V2DF "f") (V2DI "i") | |
478 | (V32QI "i") (V16HI "u") (V16QI "i") (V8HI "i") | |
479 | (V64QI "i") (V1TI "i") (V2TI "i")]) | |
480 | ||
2e2206fa AI |
481 | (define_mode_attr ssequartermode |
482 | [(V16SF "V4SF") (V8DF "V2DF") (V16SI "V4SI") (V8DI "V2DI")]) | |
483 | ||
8b994297 AI |
484 | (define_mode_attr ssedoublemodelower |
485 | [(V16QI "v16hi") (V32QI "v32hi") (V64QI "v64hi") | |
486 | (V8HI "v8si") (V16HI "v16si") (V32HI "v32si") | |
487 | (V4SI "v4di") (V8SI "v8di") (V16SI "v16di")]) | |
488 | ||
977e83a3 | 489 | (define_mode_attr ssedoublemode |
2e2206fa | 490 | [(V16SF "V32SF") (V16SI "V32SI") (V8DI "V16DI") (V8DF "V16DF") |
8b994297 AI |
491 | (V8SF "V16SF") (V8SI "V16SI") (V4DI "V8DI") (V4DF "V8DF") |
492 | (V16HI "V16SI") (V8HI "V8SI") (V4HI "V4SI") (V4SI "V4DI") | |
493 | (V32HI "V32SI") (V32QI "V32HI") (V16QI "V16HI") (V64QI "V64HI")]) | |
977e83a3 KY |
494 | |
495 | (define_mode_attr ssebytemode | |
8b994297 | 496 | [(V8DI "V64QI") (V4DI "V32QI") (V2DI "V16QI")]) |
977e83a3 | 497 | |
798dd0ba UB |
498 | ;; All 128bit vector integer modes |
499 | (define_mode_iterator VI_128 [V16QI V8HI V4SI V2DI]) | |
500 | ||
977e83a3 KY |
501 | ;; All 256bit vector integer modes |
502 | (define_mode_iterator VI_256 [V32QI V16HI V8SI V4DI]) | |
503 | ||
f62ce24f AI |
504 | ;; All 512bit vector integer modes |
505 | (define_mode_iterator VI_512 [V64QI V32HI V16SI V8DI]) | |
506 | ||
507 | ;; Various 128bit vector integer mode combinations | |
798dd0ba UB |
508 | (define_mode_iterator VI12_128 [V16QI V8HI]) |
509 | (define_mode_iterator VI14_128 [V16QI V4SI]) | |
510 | (define_mode_iterator VI124_128 [V16QI V8HI V4SI]) | |
511 | (define_mode_iterator VI24_128 [V8HI V4SI]) | |
512 | (define_mode_iterator VI248_128 [V8HI V4SI V2DI]) | |
ee3b466d | 513 | (define_mode_iterator VI48_128 [V4SI V2DI]) |
07c0852e | 514 | |
e8d08206 | 515 | ;; Various 256bit and 512 vector integer mode combinations |
575d952c AI |
516 | (define_mode_iterator VI124_256 [V32QI V16HI V8SI]) |
517 | (define_mode_iterator VI124_256_AVX512F_AVX512BW | |
518 | [V32QI V16HI V8SI | |
519 | (V64QI "TARGET_AVX512BW") | |
520 | (V32HI "TARGET_AVX512BW") | |
521 | (V16SI "TARGET_AVX512F")]) | |
ee3b466d | 522 | (define_mode_iterator VI48_256 [V8SI V4DI]) |
0fe65b75 | 523 | (define_mode_iterator VI48_512 [V16SI V8DI]) |
e711dffd | 524 | (define_mode_iterator VI4_256_8_512 [V8SI V8DI]) |
0ab03ea0 AI |
525 | (define_mode_iterator VI_AVX512BW |
526 | [V16SI V8DI (V32HI "TARGET_AVX512BW") (V64QI "TARGET_AVX512BW")]) | |
977e83a3 | 527 | |
6bec6c98 UB |
528 | ;; Int-float size matches |
529 | (define_mode_iterator VI4F_128 [V4SI V4SF]) | |
530 | (define_mode_iterator VI8F_128 [V2DI V2DF]) | |
531 | (define_mode_iterator VI4F_256 [V8SI V8SF]) | |
532 | (define_mode_iterator VI8F_256 [V4DI V4DF]) | |
16821545 AI |
533 | (define_mode_iterator VI8F_256_512 |
534 | [V4DI V4DF (V8DI "TARGET_AVX512F") (V8DF "TARGET_AVX512F")]) | |
3c87b77b AI |
535 | (define_mode_iterator VI48F_256_512 |
536 | [V8SI V8SF | |
537 | (V16SI "TARGET_AVX512F") (V16SF "TARGET_AVX512F") | |
cf92ae7f AI |
538 | (V8DI "TARGET_AVX512F") (V8DF "TARGET_AVX512F") |
539 | (V4DI "TARGET_AVX512VL") (V4DF "TARGET_AVX512VL")]) | |
bf584ca0 AI |
540 | (define_mode_iterator VF48_I1248 |
541 | [V16SI V16SF V8DI V8DF V32HI V64QI]) | |
f7be73c8 AI |
542 | (define_mode_iterator VI48F |
543 | [V16SI V16SF V8DI V8DF | |
544 | (V8SI "TARGET_AVX512VL") (V8SF "TARGET_AVX512VL") | |
545 | (V4DI "TARGET_AVX512VL") (V4DF "TARGET_AVX512VL") | |
546 | (V4SI "TARGET_AVX512VL") (V4SF "TARGET_AVX512VL") | |
547 | (V2DI "TARGET_AVX512VL") (V2DF "TARGET_AVX512VL")]) | |
0774c160 | 548 | (define_mode_iterator VI48F_256 [V8SI V8SF V4DI V4DF]) |
6bec6c98 | 549 | |
8dfb9f16 UB |
550 | ;; Mapping from float mode to required SSE level |
551 | (define_mode_attr sse | |
552 | [(SF "sse") (DF "sse2") | |
553 | (V4SF "sse") (V2DF "sse2") | |
b86f6e9e AI |
554 | (V16SF "avx512f") (V8SF "avx") |
555 | (V8DF "avx512f") (V4DF "avx")]) | |
8dfb9f16 UB |
556 | |
557 | (define_mode_attr sse2 | |
b86f6e9e AI |
558 | [(V16QI "sse2") (V32QI "avx") (V64QI "avx512f") |
559 | (V2DI "sse2") (V4DI "avx") (V8DI "avx512f")]) | |
8dfb9f16 UB |
560 | |
561 | (define_mode_attr sse3 | |
562 | [(V16QI "sse3") (V32QI "avx")]) | |
563 | ||
564 | (define_mode_attr sse4_1 | |
565 | [(V4SF "sse4_1") (V2DF "sse4_1") | |
b86f6e9e AI |
566 | (V8SF "avx") (V4DF "avx") |
567 | (V8DF "avx512f")]) | |
8dfb9f16 | 568 | |
cbb734aa | 569 | (define_mode_attr avxsizesuffix |
b86f6e9e AI |
570 | [(V64QI "512") (V32HI "512") (V16SI "512") (V8DI "512") |
571 | (V32QI "256") (V16HI "256") (V8SI "256") (V4DI "256") | |
6bec6c98 | 572 | (V16QI "") (V8HI "") (V4SI "") (V2DI "") |
b86f6e9e | 573 | (V16SF "512") (V8DF "512") |
cbb734aa UB |
574 | (V8SF "256") (V4DF "256") |
575 | (V4SF "") (V2DF "")]) | |
6cf9eb27 | 576 | |
cbb734aa UB |
577 | ;; SSE instruction mode |
578 | (define_mode_attr sseinsnmode | |
8b994297 | 579 | [(V64QI "XI") (V32HI "XI") (V16SI "XI") (V8DI "XI") (V4TI "XI") |
3f97cb0b | 580 | (V32QI "OI") (V16HI "OI") (V8SI "OI") (V4DI "OI") (V2TI "OI") |
cbb734aa | 581 | (V16QI "TI") (V8HI "TI") (V4SI "TI") (V2DI "TI") (V1TI "TI") |
3f97cb0b | 582 | (V16SF "V16SF") (V8DF "V8DF") |
cbb734aa | 583 | (V8SF "V8SF") (V4DF "V4DF") |
977e83a3 | 584 | (V4SF "V4SF") (V2DF "V2DF") |
601a5d76 | 585 | (TI "TI")]) |
cbb734aa | 586 | |
ab931c71 AI |
587 | ;; Mapping of vector modes to corresponding mask size |
588 | (define_mode_attr avx512fmaskmode | |
2534573e AI |
589 | [(V64QI "DI") (V32QI "SI") (V16QI "HI") |
590 | (V32HI "SI") (V16HI "HI") (V8HI "QI") (V4HI "QI") | |
ab931c71 AI |
591 | (V16SI "HI") (V8SI "QI") (V4SI "QI") |
592 | (V8DI "QI") (V4DI "QI") (V2DI "QI") | |
593 | (V16SF "HI") (V8SF "QI") (V4SF "QI") | |
594 | (V8DF "QI") (V4DF "QI") (V2DF "QI")]) | |
595 | ||
cbb734aa UB |
596 | ;; Mapping of vector float modes to an integer mode of the same size |
597 | (define_mode_attr sseintvecmode | |
b86f6e9e AI |
598 | [(V16SF "V16SI") (V8DF "V8DI") |
599 | (V8SF "V8SI") (V4DF "V4DI") | |
600 | (V4SF "V4SI") (V2DF "V2DI") | |
601 | (V16SI "V16SI") (V8DI "V8DI") | |
602 | (V8SI "V8SI") (V4DI "V4DI") | |
603 | (V4SI "V4SI") (V2DI "V2DI") | |
604 | (V16HI "V16HI") (V8HI "V8HI") | |
8b994297 | 605 | (V32HI "V32HI") (V64QI "V64QI") |
7b45b87f | 606 | (V32QI "V32QI") (V16QI "V16QI")]) |
cbb734aa | 607 | |
3bcf35e7 AI |
608 | (define_mode_attr sseintvecmode2 |
609 | [(V8DF "XI") (V4DF "OI") (V2DF "TI") | |
610 | (V8SF "OI") (V4SF "TI")]) | |
611 | ||
406d683e | 612 | (define_mode_attr sseintvecmodelower |
8b994297 | 613 | [(V16SF "v16si") (V8DF "v8di") |
a9ccbba2 | 614 | (V8SF "v8si") (V4DF "v4di") |
406d683e JJ |
615 | (V4SF "v4si") (V2DF "v2di") |
616 | (V8SI "v8si") (V4DI "v4di") | |
617 | (V4SI "v4si") (V2DI "v2di") | |
618 | (V16HI "v16hi") (V8HI "v8hi") | |
619 | (V32QI "v32qi") (V16QI "v16qi")]) | |
620 | ||
cbb734aa UB |
621 | ;; Mapping of vector modes to a vector mode of double size |
622 | (define_mode_attr ssedoublevecmode | |
623 | [(V32QI "V64QI") (V16HI "V32HI") (V8SI "V16SI") (V4DI "V8DI") | |
624 | (V16QI "V32QI") (V8HI "V16HI") (V4SI "V8SI") (V2DI "V4DI") | |
625 | (V8SF "V16SF") (V4DF "V8DF") | |
626 | (V4SF "V8SF") (V2DF "V4DF")]) | |
627 | ||
628 | ;; Mapping of vector modes to a vector mode of half size | |
629 | (define_mode_attr ssehalfvecmode | |
ec5e777c AI |
630 | [(V64QI "V32QI") (V32HI "V16HI") (V16SI "V8SI") (V8DI "V4DI") |
631 | (V32QI "V16QI") (V16HI "V8HI") (V8SI "V4SI") (V4DI "V2DI") | |
632 | (V16QI "V8QI") (V8HI "V4HI") (V4SI "V2SI") | |
633 | (V16SF "V8SF") (V8DF "V4DF") | |
634 | (V8SF "V4SF") (V4DF "V2DF") | |
635 | (V4SF "V2SF")]) | |
cbb734aa | 636 | |
e338c25c UB |
637 | ;; Mapping of vector modes ti packed single mode of the same size |
638 | (define_mode_attr ssePSmode | |
b86f6e9e AI |
639 | [(V16SI "V16SF") (V8DF "V16SF") |
640 | (V16SF "V16SF") (V8DI "V16SF") | |
641 | (V64QI "V16SF") (V32QI "V8SF") (V16QI "V4SF") | |
642 | (V32HI "V16SF") (V16HI "V8SF") (V8HI "V4SF") | |
e338c25c UB |
643 | (V8SI "V8SF") (V4SI "V4SF") |
644 | (V4DI "V8SF") (V2DI "V4SF") | |
8b994297 | 645 | (V4TI "V16SF") (V2TI "V8SF") (V1TI "V4SF") |
e338c25c UB |
646 | (V8SF "V8SF") (V4SF "V4SF") |
647 | (V4DF "V8SF") (V2DF "V4SF")]) | |
648 | ||
8b994297 AI |
649 | (define_mode_attr ssePSmode2 |
650 | [(V8DI "V8SF") (V4DI "V4SF")]) | |
651 | ||
cbb734aa UB |
652 | ;; Mapping of vector modes back to the scalar modes |
653 | (define_mode_attr ssescalarmode | |
a9ccbba2 AI |
654 | [(V64QI "QI") (V32QI "QI") (V16QI "QI") |
655 | (V32HI "HI") (V16HI "HI") (V8HI "HI") | |
656 | (V16SI "SI") (V8SI "SI") (V4SI "SI") | |
657 | (V8DI "DI") (V4DI "DI") (V2DI "DI") | |
658 | (V16SF "SF") (V8SF "SF") (V4SF "SF") | |
659 | (V8DF "DF") (V4DF "DF") (V2DF "DF")]) | |
660 | ||
661 | ;; Mapping of vector modes to the 128bit modes | |
662 | (define_mode_attr ssexmmmode | |
663 | [(V64QI "V16QI") (V32QI "V16QI") (V16QI "V16QI") | |
664 | (V32HI "V8HI") (V16HI "V8HI") (V8HI "V8HI") | |
665 | (V16SI "V4SI") (V8SI "V4SI") (V4SI "V4SI") | |
666 | (V8DI "V2DI") (V4DI "V2DI") (V2DI "V2DI") | |
667 | (V16SF "V4SF") (V8SF "V4SF") (V4SF "V4SF") | |
668 | (V8DF "V2DF") (V4DF "V2DF") (V2DF "V2DF")]) | |
cbb734aa | 669 | |
eabb5f48 UB |
670 | ;; Pointer size override for scalar modes (Intel asm dialect) |
671 | (define_mode_attr iptr | |
672 | [(V32QI "b") (V16HI "w") (V8SI "k") (V4DI "q") | |
673 | (V16QI "b") (V8HI "w") (V4SI "k") (V2DI "q") | |
674 | (V8SF "k") (V4DF "q") | |
675 | (V4SF "k") (V2DF "q") | |
676 | (SF "k") (DF "q")]) | |
677 | ||
cbb734aa UB |
678 | ;; Number of scalar elements in each vector type |
679 | (define_mode_attr ssescalarnum | |
a9ccbba2 AI |
680 | [(V64QI "64") (V16SI "16") (V8DI "8") |
681 | (V32QI "32") (V16HI "16") (V8SI "8") (V4DI "4") | |
cbb734aa | 682 | (V16QI "16") (V8HI "8") (V4SI "4") (V2DI "2") |
a9ccbba2 | 683 | (V16SF "16") (V8DF "8") |
cbb734aa UB |
684 | (V8SF "8") (V4DF "4") |
685 | (V4SF "4") (V2DF "2")]) | |
686 | ||
3f5783ea UB |
687 | ;; Mask of scalar elements in each vector type |
688 | (define_mode_attr ssescalarnummask | |
689 | [(V32QI "31") (V16HI "15") (V8SI "7") (V4DI "3") | |
690 | (V16QI "15") (V8HI "7") (V4SI "3") (V2DI "1") | |
691 | (V8SF "7") (V4DF "3") | |
692 | (V4SF "3") (V2DF "1")]) | |
693 | ||
47490470 AI |
694 | (define_mode_attr ssescalarsize |
695 | [(V8DI "64") (V4DI "64") (V2DI "64") | |
8b994297 | 696 | (V64QI "8") (V32QI "8") (V16QI "8") |
47490470 AI |
697 | (V32HI "16") (V16HI "16") (V8HI "16") |
698 | (V16SI "32") (V8SI "32") (V4SI "32") | |
699 | (V16SF "32") (V8DF "64")]) | |
700 | ||
7b45b87f UB |
701 | ;; SSE prefix for integer vector modes |
702 | (define_mode_attr sseintprefix | |
ab931c71 AI |
703 | [(V2DI "p") (V2DF "") |
704 | (V4DI "p") (V4DF "") | |
705 | (V8DI "p") (V8DF "") | |
706 | (V4SI "p") (V4SF "") | |
707 | (V8SI "p") (V8SF "") | |
8b994297 AI |
708 | (V16SI "p") (V16SF "") |
709 | (V16QI "p") (V8HI "p") | |
710 | (V32QI "p") (V16HI "p") | |
711 | (V64QI "p") (V32HI "p")]) | |
7b45b87f | 712 | |
cbb734aa UB |
713 | ;; SSE scalar suffix for vector modes |
714 | (define_mode_attr ssescalarmodesuffix | |
977e83a3 KY |
715 | [(SF "ss") (DF "sd") |
716 | (V8SF "ss") (V4DF "sd") | |
cbb734aa UB |
717 | (V4SF "ss") (V2DF "sd") |
718 | (V8SI "ss") (V4DI "sd") | |
719 | (V4SI "d")]) | |
720 | ||
8dfb9f16 UB |
721 | ;; Pack/unpack vector modes |
722 | (define_mode_attr sseunpackmode | |
977e83a3 | 723 | [(V16QI "V8HI") (V8HI "V4SI") (V4SI "V2DI") |
3bdf6340 AI |
724 | (V32QI "V16HI") (V16HI "V8SI") (V8SI "V4DI") |
725 | (V32HI "V16SI") (V64QI "V32HI") (V16SI "V8DI")]) | |
8dfb9f16 UB |
726 | |
727 | (define_mode_attr ssepackmode | |
977e83a3 | 728 | [(V8HI "V16QI") (V4SI "V8HI") (V2DI "V4SI") |
e8d08206 AI |
729 | (V16HI "V32QI") (V8SI "V16HI") (V4DI "V8SI") |
730 | (V32HI "V64QI") (V16SI "V32HI") (V8DI "V16SI")]) | |
8dfb9f16 | 731 | |
cbb734aa UB |
732 | ;; Mapping of the max integer size for xop rotate immediate constraint |
733 | (define_mode_attr sserotatemax | |
734 | [(V16QI "7") (V8HI "15") (V4SI "31") (V2DI "63")]) | |
8dfb9f16 | 735 | |
cd7c6bc5 | 736 | ;; Mapping of mode to cast intrinsic name |
275be1da IT |
737 | (define_mode_attr castmode |
738 | [(V8SI "si") (V8SF "ps") (V4DF "pd") | |
739 | (V16SI "si") (V16SF "ps") (V8DF "pd")]) | |
cd7c6bc5 | 740 | |
ee9dd92e UB |
741 | ;; Instruction suffix for sign and zero extensions. |
742 | (define_code_attr extsuffix [(sign_extend "sx") (zero_extend "zx")]) | |
743 | ||
1db4406e | 744 | ;; i128 for integer vectors and TARGET_AVX2, f128 otherwise. |
ec5e777c | 745 | ;; i64x4 or f64x4 for 512bit modes. |
1db4406e | 746 | (define_mode_attr i128 |
ec5e777c AI |
747 | [(V16SF "f64x4") (V8SF "f128") (V8DF "f64x4") (V4DF "f128") |
748 | (V64QI "i64x4") (V32QI "%~128") (V32HI "i64x4") (V16HI "%~128") | |
749 | (V16SI "i64x4") (V8SI "%~128") (V8DI "i64x4") (V4DI "%~128")]) | |
1db4406e | 750 | |
ef719a44 | 751 | ;; Mix-n-match |
95879c72 | 752 | (define_mode_iterator AVX256MODE2P [V8SI V8SF V4DF]) |
275be1da | 753 | (define_mode_iterator AVX512MODE2P [V16SI V16SF V8DF]) |
95879c72 | 754 | |
8b994297 AI |
755 | ;; Mapping for dbpsabbw modes |
756 | (define_mode_attr dbpsadbwmode | |
757 | [(V32HI "V64QI") (V16HI "V32QI") (V8HI "V16QI")]) | |
c96b4102 | 758 | |
ab931c71 AI |
759 | ;; Mapping suffixes for broadcast |
760 | (define_mode_attr bcstscalarsuff | |
8b994297 AI |
761 | [(V64QI "b") (V32QI "b") (V16QI "b") |
762 | (V32HI "w") (V16HI "w") (V8HI "w") | |
763 | (V16SI "d") (V8SI "d") (V4SI "d") | |
764 | (V8DI "q") (V4DI "q") (V2DI "q") | |
765 | (V16SF "ss") (V8SF "ss") (V4SF "ss") | |
766 | (V8DF "sd") (V4DF "sd") (V2DF "sd")]) | |
ab931c71 | 767 | |
4854de0d AI |
768 | ;; Tie mode of assembler operand to mode iterator |
769 | (define_mode_attr concat_tg_mode | |
770 | [(V32QI "t") (V16HI "t") (V8SI "t") (V4DI "t") (V8SF "t") (V4DF "t") | |
771 | (V64QI "g") (V32HI "g") (V16SI "g") (V8DI "g") (V16SF "g") (V8DF "g")]) | |
772 | ||
773 | ||
47490470 AI |
774 | ;; Include define_subst patterns for instructions with mask |
775 | (include "subst.md") | |
776 | ||
ef719a44 RH |
777 | ;; Patterns whose name begins with "sse{,2,3}_" are invoked by intrinsics. |
778 | ||
779 | ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; | |
780 | ;; | |
781 | ;; Move patterns | |
782 | ;; | |
783 | ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; | |
784 | ||
e81b8564 UB |
785 | ;; All of these patterns are enabled for SSE1 as well as SSE2. |
786 | ;; This is essential for maintaining stable calling conventions. | |
787 | ||
95879c72 | 788 | (define_expand "mov<mode>" |
c7ecdec6 KY |
789 | [(set (match_operand:VMOVE 0 "nonimmediate_operand") |
790 | (match_operand:VMOVE 1 "nonimmediate_operand"))] | |
e81b8564 | 791 | "TARGET_SSE" |
95879c72 L |
792 | { |
793 | ix86_expand_vector_move (<MODE>mode, operands); | |
794 | DONE; | |
795 | }) | |
796 | ||
e81b8564 | 797 | (define_insn "*mov<mode>_internal" |
3f97cb0b AI |
798 | [(set (match_operand:VMOVE 0 "nonimmediate_operand" "=v,v ,m") |
799 | (match_operand:VMOVE 1 "nonimmediate_or_sse_const_operand" "C ,vm,v"))] | |
e81b8564 | 800 | "TARGET_SSE |
95879c72 L |
801 | && (register_operand (operands[0], <MODE>mode) |
802 | || register_operand (operands[1], <MODE>mode))" | |
803 | { | |
3f97cb0b | 804 | int mode = get_attr_mode (insn); |
95879c72 L |
805 | switch (which_alternative) |
806 | { | |
807 | case 0: | |
808 | return standard_sse_constant_opcode (insn, operands[1]); | |
809 | case 1: | |
810 | case 2: | |
3f97cb0b AI |
811 | /* There is no evex-encoded vmov* for sizes smaller than 64-bytes |
812 | in avx512f, so we need to use workarounds, to access sse registers | |
e0aacde4 | 813 | 16-31, which are evex-only. In avx512vl we don't need workarounds. */ |
f2864cc4 | 814 | if (TARGET_AVX512F && <MODE_SIZE> < 64 && !TARGET_AVX512VL |
e0aacde4 AI |
815 | && ((REG_P (operands[0]) && EXT_REX_SSE_REGNO_P (REGNO (operands[0]))) |
816 | || (REG_P (operands[1]) && EXT_REX_SSE_REGNO_P (REGNO (operands[1]))))) | |
3f97cb0b AI |
817 | { |
818 | if (memory_operand (operands[0], <MODE>mode)) | |
819 | { | |
039eee3f | 820 | if (<MODE_SIZE> == 32) |
3f97cb0b | 821 | return "vextract<shuffletype>64x4\t{$0x0, %g1, %0|%0, %g1, 0x0}"; |
039eee3f | 822 | else if (<MODE_SIZE> == 16) |
3f97cb0b AI |
823 | return "vextract<shuffletype>32x4\t{$0x0, %g1, %0|%0, %g1, 0x0}"; |
824 | else | |
825 | gcc_unreachable (); | |
826 | } | |
827 | else if (memory_operand (operands[1], <MODE>mode)) | |
828 | { | |
039eee3f | 829 | if (<MODE_SIZE> == 32) |
3f97cb0b | 830 | return "vbroadcast<shuffletype>64x4\t{%1, %g0|%g0, %1}"; |
039eee3f | 831 | else if (<MODE_SIZE> == 16) |
3f97cb0b AI |
832 | return "vbroadcast<shuffletype>32x4\t{%1, %g0|%g0, %1}"; |
833 | else | |
834 | gcc_unreachable (); | |
835 | } | |
836 | else | |
837 | /* Reg -> reg move is always aligned. Just use wider move. */ | |
838 | switch (mode) | |
839 | { | |
840 | case MODE_V8SF: | |
841 | case MODE_V4SF: | |
842 | return "vmovaps\t{%g1, %g0|%g0, %g1}"; | |
843 | case MODE_V4DF: | |
844 | case MODE_V2DF: | |
845 | return "vmovapd\t{%g1, %g0|%g0, %g1}"; | |
846 | case MODE_OI: | |
847 | case MODE_TI: | |
848 | return "vmovdqa64\t{%g1, %g0|%g0, %g1}"; | |
849 | default: | |
850 | gcc_unreachable (); | |
851 | } | |
852 | } | |
853 | switch (mode) | |
977e83a3 | 854 | { |
3f97cb0b | 855 | case MODE_V16SF: |
95879c72 L |
856 | case MODE_V8SF: |
857 | case MODE_V4SF: | |
e81b8564 UB |
858 | if (TARGET_AVX |
859 | && (misaligned_operand (operands[0], <MODE>mode) | |
860 | || misaligned_operand (operands[1], <MODE>mode))) | |
d253656a L |
861 | return "vmovups\t{%1, %0|%0, %1}"; |
862 | else | |
e81b8564 UB |
863 | return "%vmovaps\t{%1, %0|%0, %1}"; |
864 | ||
3f97cb0b | 865 | case MODE_V8DF: |
95879c72 L |
866 | case MODE_V4DF: |
867 | case MODE_V2DF: | |
e81b8564 UB |
868 | if (TARGET_AVX |
869 | && (misaligned_operand (operands[0], <MODE>mode) | |
870 | || misaligned_operand (operands[1], <MODE>mode))) | |
d253656a | 871 | return "vmovupd\t{%1, %0|%0, %1}"; |
1133125e | 872 | else |
e81b8564 UB |
873 | return "%vmovapd\t{%1, %0|%0, %1}"; |
874 | ||
875 | case MODE_OI: | |
876 | case MODE_TI: | |
877 | if (TARGET_AVX | |
878 | && (misaligned_operand (operands[0], <MODE>mode) | |
879 | || misaligned_operand (operands[1], <MODE>mode))) | |
e0aacde4 AI |
880 | return TARGET_AVX512VL ? "vmovdqu64\t{%1, %0|%0, %1}" |
881 | : "vmovdqu\t{%1, %0|%0, %1}"; | |
1133125e | 882 | else |
e0aacde4 AI |
883 | return TARGET_AVX512VL ? "vmovdqa64\t{%1, %0|%0, %1}" |
884 | : "%vmovdqa\t{%1, %0|%0, %1}"; | |
3f97cb0b AI |
885 | case MODE_XI: |
886 | if (misaligned_operand (operands[0], <MODE>mode) | |
887 | || misaligned_operand (operands[1], <MODE>mode)) | |
888 | return "vmovdqu64\t{%1, %0|%0, %1}"; | |
889 | else | |
890 | return "vmovdqa64\t{%1, %0|%0, %1}"; | |
ef719a44 | 891 | |
a5e11364 | 892 | default: |
e81b8564 | 893 | gcc_unreachable (); |
a5e11364 | 894 | } |
ef719a44 | 895 | default: |
7637e42c | 896 | gcc_unreachable (); |
ef719a44 RH |
897 | } |
898 | } | |
899 | [(set_attr "type" "sselog1,ssemov,ssemov") | |
e81b8564 | 900 | (set_attr "prefix" "maybe_vex") |
ef719a44 | 901 | (set (attr "mode") |
659c0e68 JM |
902 | (cond [(and (match_test "<MODE_SIZE> == 16") |
903 | (ior (match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL") | |
904 | (and (eq_attr "alternative" "2") | |
905 | (match_test "TARGET_SSE_TYPELESS_STORES")))) | |
e338c25c | 906 | (const_string "<ssePSmode>") |
20f9034b | 907 | (match_test "TARGET_AVX") |
977e83a3 | 908 | (const_string "<sseinsnmode>") |
e338c25c UB |
909 | (ior (not (match_test "TARGET_SSE2")) |
910 | (match_test "optimize_function_for_size_p (cfun)")) | |
a5e11364 | 911 | (const_string "V4SF") |
63705578 UB |
912 | (and (eq_attr "alternative" "0") |
913 | (match_test "TARGET_SSE_LOAD0_BY_PXOR")) | |
914 | (const_string "TI") | |
a5e11364 | 915 | ] |
e338c25c | 916 | (const_string "<sseinsnmode>")))]) |
ef719a44 | 917 | |
e0aacde4 | 918 | (define_insn "<avx512>_load<mode>_mask" |
7cbdc87d KY |
919 | [(set (match_operand:V48_AVX512VL 0 "register_operand" "=v,v") |
920 | (vec_merge:V48_AVX512VL | |
921 | (match_operand:V48_AVX512VL 1 "nonimmediate_operand" "v,m") | |
922 | (match_operand:V48_AVX512VL 2 "vector_move_operand" "0C,0C") | |
be792bce | 923 | (match_operand:<avx512fmaskmode> 3 "register_operand" "Yk,Yk")))] |
47490470 AI |
924 | "TARGET_AVX512F" |
925 | { | |
7cbdc87d KY |
926 | static char buf [64]; |
927 | ||
928 | const char *insn_op; | |
929 | const char *sse_suffix; | |
930 | const char *align; | |
931 | if (FLOAT_MODE_P (GET_MODE_INNER (<MODE>mode))) | |
47490470 | 932 | { |
7cbdc87d KY |
933 | insn_op = "vmov"; |
934 | sse_suffix = "<ssemodesuffix>"; | |
935 | } | |
936 | else | |
937 | { | |
938 | insn_op = "vmovdq"; | |
939 | sse_suffix = "<ssescalarsize>"; | |
47490470 | 940 | } |
7cbdc87d KY |
941 | |
942 | if (misaligned_operand (operands[1], <MODE>mode)) | |
943 | align = "u"; | |
944 | else | |
945 | align = "a"; | |
946 | ||
947 | snprintf (buf, sizeof (buf), "%s%s%s\t{%%1, %%0%%{%%3%%}%%N2|%%0%%{%%3%%}%%N2, %%1}", | |
948 | insn_op, align, sse_suffix); | |
949 | return buf; | |
47490470 AI |
950 | } |
951 | [(set_attr "type" "ssemov") | |
952 | (set_attr "prefix" "evex") | |
953 | (set_attr "memory" "none,load") | |
954 | (set_attr "mode" "<sseinsnmode>")]) | |
955 | ||
7cbdc87d KY |
956 | (define_insn "<avx512>_load<mode>_mask" |
957 | [(set (match_operand:VI12_AVX512VL 0 "register_operand" "=v,v") | |
958 | (vec_merge:VI12_AVX512VL | |
959 | (match_operand:VI12_AVX512VL 1 "nonimmediate_operand" "v,m") | |
960 | (match_operand:VI12_AVX512VL 2 "vector_move_operand" "0C,0C") | |
961 | (match_operand:<avx512fmaskmode> 3 "register_operand" "Yk,Yk")))] | |
962 | "TARGET_AVX512BW" | |
963 | "vmovdqu<ssescalarsize>\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}" | |
964 | [(set_attr "type" "ssemov") | |
965 | (set_attr "prefix" "evex") | |
966 | (set_attr "memory" "none,load") | |
967 | (set_attr "mode" "<sseinsnmode>")]) | |
968 | ||
51e14b05 AI |
969 | (define_insn "<avx512>_blendm<mode>" |
970 | [(set (match_operand:V48_AVX512VL 0 "register_operand" "=v") | |
971 | (vec_merge:V48_AVX512VL | |
972 | (match_operand:V48_AVX512VL 2 "nonimmediate_operand" "vm") | |
973 | (match_operand:V48_AVX512VL 1 "register_operand" "v") | |
be792bce | 974 | (match_operand:<avx512fmaskmode> 3 "register_operand" "Yk")))] |
ab931c71 | 975 | "TARGET_AVX512F" |
51e14b05 AI |
976 | "vblendm<ssemodesuffix>\t{%2, %1, %0%{%3%}|%0%{%3%}, %1, %2}" |
977 | [(set_attr "type" "ssemov") | |
978 | (set_attr "prefix" "evex") | |
979 | (set_attr "mode" "<sseinsnmode>")]) | |
980 | ||
981 | (define_insn "<avx512>_blendm<mode>" | |
982 | [(set (match_operand:VI12_AVX512VL 0 "register_operand" "=v") | |
983 | (vec_merge:VI12_AVX512VL | |
984 | (match_operand:VI12_AVX512VL 2 "nonimmediate_operand" "vm") | |
985 | (match_operand:VI12_AVX512VL 1 "register_operand" "v") | |
986 | (match_operand:<avx512fmaskmode> 3 "register_operand" "Yk")))] | |
987 | "TARGET_AVX512BW" | |
988 | "vpblendm<ssemodesuffix>\t{%2, %1, %0%{%3%}|%0%{%3%}, %1, %2}" | |
ab931c71 AI |
989 | [(set_attr "type" "ssemov") |
990 | (set_attr "prefix" "evex") | |
991 | (set_attr "mode" "<sseinsnmode>")]) | |
992 | ||
e0aacde4 | 993 | (define_insn "<avx512>_store<mode>_mask" |
7cbdc87d KY |
994 | [(set (match_operand:V48_AVX512VL 0 "memory_operand" "=m") |
995 | (vec_merge:V48_AVX512VL | |
996 | (match_operand:V48_AVX512VL 1 "register_operand" "v") | |
47490470 | 997 | (match_dup 0) |
be792bce | 998 | (match_operand:<avx512fmaskmode> 2 "register_operand" "Yk")))] |
47490470 AI |
999 | "TARGET_AVX512F" |
1000 | { | |
7cbdc87d KY |
1001 | static char buf [64]; |
1002 | ||
1003 | const char *insn_op; | |
1004 | const char *sse_suffix; | |
1005 | const char *align; | |
1006 | if (FLOAT_MODE_P (GET_MODE_INNER (<MODE>mode))) | |
47490470 | 1007 | { |
7cbdc87d KY |
1008 | insn_op = "vmov"; |
1009 | sse_suffix = "<ssemodesuffix>"; | |
1010 | } | |
1011 | else | |
1012 | { | |
1013 | insn_op = "vmovdq"; | |
1014 | sse_suffix = "<ssescalarsize>"; | |
47490470 | 1015 | } |
7cbdc87d KY |
1016 | |
1017 | if (misaligned_operand (operands[1], <MODE>mode)) | |
1018 | align = "u"; | |
1019 | else | |
1020 | align = "a"; | |
1021 | ||
1022 | snprintf (buf, sizeof (buf), "%s%s%s\t{%%1, %%0%%{%%2%%}|%%0%%{%%2%%}, %%1}", | |
1023 | insn_op, align, sse_suffix); | |
1024 | return buf; | |
47490470 AI |
1025 | } |
1026 | [(set_attr "type" "ssemov") | |
1027 | (set_attr "prefix" "evex") | |
1028 | (set_attr "memory" "store") | |
1029 | (set_attr "mode" "<sseinsnmode>")]) | |
1030 | ||
7cbdc87d KY |
1031 | (define_insn "<avx512>_store<mode>_mask" |
1032 | [(set (match_operand:VI12_AVX512VL 0 "memory_operand" "=m") | |
1033 | (vec_merge:VI12_AVX512VL | |
1034 | (match_operand:VI12_AVX512VL 1 "register_operand" "v") | |
1035 | (match_dup 0) | |
1036 | (match_operand:<avx512fmaskmode> 2 "register_operand" "Yk")))] | |
1037 | "TARGET_AVX512BW" | |
1038 | "vmovdqu<ssescalarsize>\t{%1, %0%{%2%}|%0%{%2%}, %1}" | |
1039 | [(set_attr "type" "ssemov") | |
1040 | (set_attr "prefix" "evex") | |
1041 | (set_attr "memory" "store") | |
1042 | (set_attr "mode" "<sseinsnmode>")]) | |
1043 | ||
e81b8564 UB |
1044 | (define_insn "sse2_movq128" |
1045 | [(set (match_operand:V2DI 0 "register_operand" "=x") | |
1046 | (vec_concat:V2DI | |
1047 | (vec_select:DI | |
1048 | (match_operand:V2DI 1 "nonimmediate_operand" "xm") | |
1049 | (parallel [(const_int 0)])) | |
1050 | (const_int 0)))] | |
1051 | "TARGET_SSE2" | |
eabb5f48 | 1052 | "%vmovq\t{%1, %0|%0, %q1}" |
e81b8564 UB |
1053 | [(set_attr "type" "ssemov") |
1054 | (set_attr "prefix" "maybe_vex") | |
1055 | (set_attr "mode" "TI")]) | |
1056 | ||
ebff937c SH |
1057 | ;; Move a DI from a 32-bit register pair (e.g. %edx:%eax) to an xmm. |
1058 | ;; We'd rather avoid this entirely; if the 32-bit reg pair was loaded | |
1059 | ;; from memory, we'd prefer to load the memory directly into the %xmm | |
1060 | ;; register. To facilitate this happy circumstance, this pattern won't | |
1061 | ;; split until after register allocation. If the 64-bit value didn't | |
1062 | ;; come from memory, this is the best we can do. This is much better | |
1063 | ;; than storing %edx:%eax into a stack temporary and loading an %xmm | |
1064 | ;; from there. | |
1065 | ||
1066 | (define_insn_and_split "movdi_to_sse" | |
1067 | [(parallel | |
1068 | [(set (match_operand:V4SI 0 "register_operand" "=?x,x") | |
1069 | (subreg:V4SI (match_operand:DI 1 "nonimmediate_operand" "r,m") 0)) | |
1070 | (clobber (match_scratch:V4SI 2 "=&x,X"))])] | |
00fcb892 | 1071 | "!TARGET_64BIT && TARGET_SSE2 && TARGET_INTER_UNIT_MOVES_TO_VEC" |
ebff937c SH |
1072 | "#" |
1073 | "&& reload_completed" | |
1074 | [(const_int 0)] | |
1075 | { | |
5eafdd32 EC |
1076 | if (register_operand (operands[1], DImode)) |
1077 | { | |
ebff937c SH |
1078 | /* The DImode arrived in a pair of integral registers (e.g. %edx:%eax). |
1079 | Assemble the 64-bit DImode value in an xmm register. */ | |
1080 | emit_insn (gen_sse2_loadld (operands[0], CONST0_RTX (V4SImode), | |
977e83a3 | 1081 | gen_rtx_SUBREG (SImode, operands[1], 0))); |
ebff937c SH |
1082 | emit_insn (gen_sse2_loadld (operands[2], CONST0_RTX (V4SImode), |
1083 | gen_rtx_SUBREG (SImode, operands[1], 4))); | |
b0d49a6e | 1084 | emit_insn (gen_vec_interleave_lowv4si (operands[0], operands[0], |
977e83a3 | 1085 | operands[2])); |
d8c84975 | 1086 | } |
5eafdd32 | 1087 | else if (memory_operand (operands[1], DImode)) |
d8c84975 JJ |
1088 | { |
1089 | rtx tmp = gen_reg_rtx (V2DImode); | |
1090 | emit_insn (gen_vec_concatv2di (tmp, operands[1], const0_rtx)); | |
1091 | emit_move_insn (operands[0], gen_lowpart (V4SImode, tmp)); | |
1092 | } | |
5eafdd32 | 1093 | else |
b0d49a6e | 1094 | gcc_unreachable (); |
ebff937c SH |
1095 | }) |
1096 | ||
ef719a44 | 1097 | (define_split |
82e86dc6 UB |
1098 | [(set (match_operand:V4SF 0 "register_operand") |
1099 | (match_operand:V4SF 1 "zero_extended_scalar_load_operand"))] | |
ef719a44 | 1100 | "TARGET_SSE && reload_completed" |
eb701deb RH |
1101 | [(set (match_dup 0) |
1102 | (vec_merge:V4SF | |
1103 | (vec_duplicate:V4SF (match_dup 1)) | |
1104 | (match_dup 2) | |
1105 | (const_int 1)))] | |
ef719a44 | 1106 | { |
eb701deb RH |
1107 | operands[1] = simplify_gen_subreg (SFmode, operands[1], V4SFmode, 0); |
1108 | operands[2] = CONST0_RTX (V4SFmode); | |
ef719a44 RH |
1109 | }) |
1110 | ||
ef719a44 | 1111 | (define_split |
82e86dc6 UB |
1112 | [(set (match_operand:V2DF 0 "register_operand") |
1113 | (match_operand:V2DF 1 "zero_extended_scalar_load_operand"))] | |
ef719a44 | 1114 | "TARGET_SSE2 && reload_completed" |
eb701deb | 1115 | [(set (match_dup 0) (vec_concat:V2DF (match_dup 1) (match_dup 2)))] |
ef719a44 | 1116 | { |
eb701deb RH |
1117 | operands[1] = simplify_gen_subreg (DFmode, operands[1], V2DFmode, 0); |
1118 | operands[2] = CONST0_RTX (DFmode); | |
ef719a44 RH |
1119 | }) |
1120 | ||
95879c72 | 1121 | (define_expand "movmisalign<mode>" |
c7ecdec6 KY |
1122 | [(set (match_operand:VMOVE 0 "nonimmediate_operand") |
1123 | (match_operand:VMOVE 1 "nonimmediate_operand"))] | |
ef719a44 RH |
1124 | "TARGET_SSE" |
1125 | { | |
1126 | ix86_expand_vector_move_misalign (<MODE>mode, operands); | |
1127 | DONE; | |
1128 | }) | |
1129 | ||
90be6e46 JJ |
1130 | (define_expand "<sse>_loadu<ssemodesuffix><avxsizesuffix><mask_name>" |
1131 | [(set (match_operand:VF 0 "register_operand") | |
1132 | (unspec:VF [(match_operand:VF 1 "nonimmediate_operand")] | |
1133 | UNSPEC_LOADU))] | |
1134 | "TARGET_SSE && <mask_mode512bit_condition>" | |
1135 | { | |
1136 | /* For AVX, normal *mov<mode>_internal pattern will handle unaligned loads | |
1137 | just fine if misaligned_operand is true, and without the UNSPEC it can | |
1138 | be combined with arithmetic instructions. If misaligned_operand is | |
1139 | false, still emit UNSPEC_LOADU insn to honor user's request for | |
1140 | misaligned load. */ | |
1141 | if (TARGET_AVX | |
dad5ed2e | 1142 | && misaligned_operand (operands[1], <MODE>mode)) |
90be6e46 | 1143 | { |
dad5ed2e JJ |
1144 | rtx src = operands[1]; |
1145 | if (<mask_applied>) | |
1146 | src = gen_rtx_VEC_MERGE (<MODE>mode, operands[1], | |
1147 | operands[2 * <mask_applied>], | |
1148 | operands[3 * <mask_applied>]); | |
f7df4a84 | 1149 | emit_insn (gen_rtx_SET (operands[0], src)); |
90be6e46 JJ |
1150 | DONE; |
1151 | } | |
1152 | }) | |
1153 | ||
1154 | (define_insn "*<sse>_loadu<ssemodesuffix><avxsizesuffix><mask_name>" | |
3f97cb0b | 1155 | [(set (match_operand:VF 0 "register_operand" "=v") |
e81b8564 | 1156 | (unspec:VF |
b86f6e9e | 1157 | [(match_operand:VF 1 "nonimmediate_operand" "vm")] |
860f5e77 | 1158 | UNSPEC_LOADU))] |
47490470 | 1159 | "TARGET_SSE && <mask_mode512bit_condition>" |
20f9034b UB |
1160 | { |
1161 | switch (get_attr_mode (insn)) | |
1162 | { | |
b86f6e9e | 1163 | case MODE_V16SF: |
20f9034b UB |
1164 | case MODE_V8SF: |
1165 | case MODE_V4SF: | |
47490470 | 1166 | return "%vmovups\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"; |
20f9034b | 1167 | default: |
47490470 | 1168 | return "%vmovu<ssemodesuffix>\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"; |
20f9034b UB |
1169 | } |
1170 | } | |
95879c72 | 1171 | [(set_attr "type" "ssemov") |
b6837b94 | 1172 | (set_attr "movu" "1") |
f220a4f4 | 1173 | (set_attr "ssememalign" "8") |
95879c72 | 1174 | (set_attr "prefix" "maybe_vex") |
20f9034b | 1175 | (set (attr "mode") |
659c0e68 JM |
1176 | (cond [(and (match_test "<MODE_SIZE> == 16") |
1177 | (match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL")) | |
e338c25c | 1178 | (const_string "<ssePSmode>") |
20f9034b UB |
1179 | (match_test "TARGET_AVX") |
1180 | (const_string "<MODE>") | |
e338c25c UB |
1181 | (match_test "optimize_function_for_size_p (cfun)") |
1182 | (const_string "V4SF") | |
20f9034b | 1183 | ] |
e338c25c | 1184 | (const_string "<MODE>")))]) |
ef719a44 | 1185 | |
97afef00 UB |
1186 | ;; Merge movsd/movhpd to movupd for TARGET_SSE_UNALIGNED_LOAD_OPTIMAL targets. |
1187 | (define_peephole2 | |
1188 | [(set (match_operand:V2DF 0 "register_operand") | |
1189 | (vec_concat:V2DF (match_operand:DF 1 "memory_operand") | |
1190 | (match_operand:DF 4 "const0_operand"))) | |
1191 | (set (match_operand:V2DF 2 "register_operand") | |
1192 | (vec_concat:V2DF (vec_select:DF (match_dup 2) | |
1193 | (parallel [(const_int 0)])) | |
1194 | (match_operand:DF 3 "memory_operand")))] | |
1195 | "TARGET_SSE2 && TARGET_SSE_UNALIGNED_LOAD_OPTIMAL | |
1196 | && ix86_operands_ok_for_move_multiple (operands, true, DFmode)" | |
1197 | [(set (match_dup 2) | |
1198 | (unspec:V2DF [(match_dup 4)] UNSPEC_LOADU))] | |
1199 | "operands[4] = adjust_address (operands[1], V2DFmode, 0);") | |
1200 | ||
860f5e77 UB |
1201 | (define_insn "<sse>_storeu<ssemodesuffix><avxsizesuffix>" |
1202 | [(set (match_operand:VF 0 "memory_operand" "=m") | |
1203 | (unspec:VF | |
b86f6e9e | 1204 | [(match_operand:VF 1 "register_operand" "v")] |
860f5e77 UB |
1205 | UNSPEC_STOREU))] |
1206 | "TARGET_SSE" | |
1207 | { | |
1208 | switch (get_attr_mode (insn)) | |
1209 | { | |
b86f6e9e | 1210 | case MODE_V16SF: |
860f5e77 UB |
1211 | case MODE_V8SF: |
1212 | case MODE_V4SF: | |
1213 | return "%vmovups\t{%1, %0|%0, %1}"; | |
1214 | default: | |
1215 | return "%vmovu<ssemodesuffix>\t{%1, %0|%0, %1}"; | |
1216 | } | |
1217 | } | |
1218 | [(set_attr "type" "ssemov") | |
1219 | (set_attr "movu" "1") | |
f220a4f4 | 1220 | (set_attr "ssememalign" "8") |
860f5e77 UB |
1221 | (set_attr "prefix" "maybe_vex") |
1222 | (set (attr "mode") | |
039eee3f | 1223 | (cond [(and (match_test "<MODE_SIZE> == 16") |
d3c2fee0 AI |
1224 | (ior (match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL") |
1225 | (match_test "TARGET_SSE_TYPELESS_STORES"))) | |
860f5e77 UB |
1226 | (const_string "<ssePSmode>") |
1227 | (match_test "TARGET_AVX") | |
1228 | (const_string "<MODE>") | |
1229 | (match_test "optimize_function_for_size_p (cfun)") | |
1230 | (const_string "V4SF") | |
1231 | ] | |
1232 | (const_string "<MODE>")))]) | |
1233 | ||
b040ded3 AI |
1234 | (define_insn "<avx512>_storeu<ssemodesuffix><avxsizesuffix>_mask" |
1235 | [(set (match_operand:VF_AVX512VL 0 "memory_operand" "=m") | |
1236 | (vec_merge:VF_AVX512VL | |
1237 | (unspec:VF_AVX512VL | |
1238 | [(match_operand:VF_AVX512VL 1 "register_operand" "v")] | |
47490470 AI |
1239 | UNSPEC_STOREU) |
1240 | (match_dup 0) | |
be792bce | 1241 | (match_operand:<avx512fmaskmode> 2 "register_operand" "Yk")))] |
47490470 AI |
1242 | "TARGET_AVX512F" |
1243 | { | |
1244 | switch (get_attr_mode (insn)) | |
1245 | { | |
1246 | case MODE_V16SF: | |
b040ded3 AI |
1247 | case MODE_V8SF: |
1248 | case MODE_V4SF: | |
47490470 AI |
1249 | return "vmovups\t{%1, %0%{%2%}|%0%{%2%}, %1}"; |
1250 | default: | |
1251 | return "vmovu<ssemodesuffix>\t{%1, %0%{%2%}|%0%{%2%}, %1}"; | |
1252 | } | |
1253 | } | |
1254 | [(set_attr "type" "ssemov") | |
1255 | (set_attr "movu" "1") | |
1256 | (set_attr "memory" "store") | |
1257 | (set_attr "prefix" "evex") | |
1258 | (set_attr "mode" "<sseinsnmode>")]) | |
1259 | ||
97afef00 UB |
1260 | ;; Merge movlpd/movhpd to movupd for TARGET_SSE_UNALIGNED_STORE_OPTIMAL targets. |
1261 | (define_peephole2 | |
1262 | [(set (match_operand:DF 0 "memory_operand") | |
1263 | (vec_select:DF (match_operand:V2DF 1 "register_operand") | |
1264 | (parallel [(const_int 0)]))) | |
1265 | (set (match_operand:DF 2 "memory_operand") | |
1266 | (vec_select:DF (match_operand:V2DF 3 "register_operand") | |
1267 | (parallel [(const_int 1)])))] | |
1268 | "TARGET_SSE2 && TARGET_SSE_UNALIGNED_STORE_OPTIMAL | |
1269 | && ix86_operands_ok_for_move_multiple (operands, false, DFmode)" | |
1270 | [(set (match_dup 4) | |
1271 | (unspec:V2DF [(match_dup 1)] UNSPEC_STOREU))] | |
1272 | "operands[4] = adjust_address (operands[0], V2DFmode, 0);") | |
1273 | ||
ca9b264e AI |
1274 | /* For AVX, normal *mov<mode>_internal pattern will handle unaligned loads |
1275 | just fine if misaligned_operand is true, and without the UNSPEC it can | |
1276 | be combined with arithmetic instructions. If misaligned_operand is | |
1277 | false, still emit UNSPEC_LOADU insn to honor user's request for | |
1278 | misaligned load. */ | |
90be6e46 | 1279 | (define_expand "<sse2_avx_avx512f>_loaddqu<mode><mask_name>" |
ca9b264e AI |
1280 | [(set (match_operand:VI1 0 "register_operand") |
1281 | (unspec:VI1 | |
1282 | [(match_operand:VI1 1 "nonimmediate_operand")] | |
90be6e46 | 1283 | UNSPEC_LOADU))] |
ca9b264e | 1284 | "TARGET_SSE2 && <mask_avx512vl_condition> && <mask_avx512bw_condition>" |
90be6e46 | 1285 | { |
90be6e46 | 1286 | if (TARGET_AVX |
dad5ed2e | 1287 | && misaligned_operand (operands[1], <MODE>mode)) |
90be6e46 | 1288 | { |
dad5ed2e JJ |
1289 | rtx src = operands[1]; |
1290 | if (<mask_applied>) | |
1291 | src = gen_rtx_VEC_MERGE (<MODE>mode, operands[1], | |
1292 | operands[2 * <mask_applied>], | |
1293 | operands[3 * <mask_applied>]); | |
f7df4a84 | 1294 | emit_insn (gen_rtx_SET (operands[0], src)); |
90be6e46 JJ |
1295 | DONE; |
1296 | } | |
1297 | }) | |
1298 | ||
ca9b264e AI |
1299 | (define_expand "<sse2_avx_avx512f>_loaddqu<mode><mask_name>" |
1300 | [(set (match_operand:VI_ULOADSTORE_BW_AVX512VL 0 "register_operand") | |
1301 | (unspec:VI_ULOADSTORE_BW_AVX512VL | |
1302 | [(match_operand:VI_ULOADSTORE_BW_AVX512VL 1 "nonimmediate_operand")] | |
1303 | UNSPEC_LOADU))] | |
1304 | "TARGET_AVX512BW" | |
1305 | { | |
1306 | if (misaligned_operand (operands[1], <MODE>mode)) | |
1307 | { | |
1308 | rtx src = operands[1]; | |
1309 | if (<mask_applied>) | |
1310 | src = gen_rtx_VEC_MERGE (<MODE>mode, operands[1], | |
1311 | operands[2 * <mask_applied>], | |
1312 | operands[3 * <mask_applied>]); | |
f7df4a84 | 1313 | emit_insn (gen_rtx_SET (operands[0], src)); |
ca9b264e AI |
1314 | DONE; |
1315 | } | |
1316 | }) | |
1317 | ||
1318 | (define_expand "<sse2_avx_avx512f>_loaddqu<mode><mask_name>" | |
1319 | [(set (match_operand:VI_ULOADSTORE_F_AVX512VL 0 "register_operand") | |
1320 | (unspec:VI_ULOADSTORE_F_AVX512VL | |
1321 | [(match_operand:VI_ULOADSTORE_F_AVX512VL 1 "nonimmediate_operand")] | |
1322 | UNSPEC_LOADU))] | |
1323 | "TARGET_AVX512F" | |
1324 | { | |
1325 | if (misaligned_operand (operands[1], <MODE>mode)) | |
1326 | { | |
1327 | rtx src = operands[1]; | |
1328 | if (<mask_applied>) | |
1329 | src = gen_rtx_VEC_MERGE (<MODE>mode, operands[1], | |
1330 | operands[2 * <mask_applied>], | |
1331 | operands[3 * <mask_applied>]); | |
f7df4a84 | 1332 | emit_insn (gen_rtx_SET (operands[0], src)); |
ca9b264e AI |
1333 | DONE; |
1334 | } | |
1335 | }) | |
1336 | ||
90be6e46 | 1337 | (define_insn "*<sse2_avx_avx512f>_loaddqu<mode><mask_name>" |
ca9b264e AI |
1338 | [(set (match_operand:VI1 0 "register_operand" "=v") |
1339 | (unspec:VI1 | |
1340 | [(match_operand:VI1 1 "nonimmediate_operand" "vm")] | |
b86f6e9e | 1341 | UNSPEC_LOADU))] |
ca9b264e | 1342 | "TARGET_SSE2 && <mask_avx512vl_condition> && <mask_avx512bw_condition>" |
860f5e77 UB |
1343 | { |
1344 | switch (get_attr_mode (insn)) | |
1345 | { | |
1346 | case MODE_V8SF: | |
1347 | case MODE_V4SF: | |
1348 | return "%vmovups\t{%1, %0|%0, %1}"; | |
1349 | default: | |
ca9b264e AI |
1350 | if (!(TARGET_AVX512VL && TARGET_AVX512BW)) |
1351 | return "%vmovdqu\t{%1, %0|%0, %1}"; | |
1352 | else | |
1353 | return "vmovdqu<ssescalarsize>\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"; | |
860f5e77 UB |
1354 | } |
1355 | } | |
1356 | [(set_attr "type" "ssemov") | |
1357 | (set_attr "movu" "1") | |
f220a4f4 | 1358 | (set_attr "ssememalign" "8") |
860f5e77 UB |
1359 | (set (attr "prefix_data16") |
1360 | (if_then_else | |
1361 | (match_test "TARGET_AVX") | |
1362 | (const_string "*") | |
1363 | (const_string "1"))) | |
1364 | (set_attr "prefix" "maybe_vex") | |
1365 | (set (attr "mode") | |
659c0e68 JM |
1366 | (cond [(and (match_test "<MODE_SIZE> == 16") |
1367 | (match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL")) | |
860f5e77 | 1368 | (const_string "<ssePSmode>") |
860f5e77 UB |
1369 | (match_test "TARGET_AVX") |
1370 | (const_string "<sseinsnmode>") | |
1371 | (match_test "optimize_function_for_size_p (cfun)") | |
1372 | (const_string "V4SF") | |
1373 | ] | |
1374 | (const_string "<sseinsnmode>")))]) | |
1375 | ||
ca9b264e AI |
1376 | (define_insn "*<sse2_avx_avx512f>_loaddqu<mode><mask_name>" |
1377 | [(set (match_operand:VI_ULOADSTORE_BW_AVX512VL 0 "register_operand" "=v") | |
1378 | (unspec:VI_ULOADSTORE_BW_AVX512VL | |
1379 | [(match_operand:VI_ULOADSTORE_BW_AVX512VL 1 "nonimmediate_operand" "vm")] | |
1380 | UNSPEC_LOADU))] | |
1381 | "TARGET_AVX512BW" | |
1382 | "vmovdqu<ssescalarsize>\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"; | |
1383 | [(set_attr "type" "ssemov") | |
1384 | (set_attr "movu" "1") | |
1385 | (set_attr "ssememalign" "8") | |
1386 | (set_attr "prefix" "maybe_evex")]) | |
1387 | ||
1388 | (define_insn "*<sse2_avx_avx512f>_loaddqu<mode><mask_name>" | |
1389 | [(set (match_operand:VI_ULOADSTORE_F_AVX512VL 0 "register_operand" "=v") | |
1390 | (unspec:VI_ULOADSTORE_F_AVX512VL | |
1391 | [(match_operand:VI_ULOADSTORE_F_AVX512VL 1 "nonimmediate_operand" "vm")] | |
1392 | UNSPEC_LOADU))] | |
1393 | "TARGET_AVX512F" | |
1394 | "vmovdqu<ssescalarsize>\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"; | |
1395 | [(set_attr "type" "ssemov") | |
1396 | (set_attr "movu" "1") | |
1397 | (set_attr "ssememalign" "8") | |
1398 | (set_attr "prefix" "maybe_evex")]) | |
1399 | ||
b86f6e9e | 1400 | (define_insn "<sse2_avx_avx512f>_storedqu<mode>" |
ca9b264e AI |
1401 | [(set (match_operand:VI1 0 "memory_operand" "=m") |
1402 | (unspec:VI1 | |
1403 | [(match_operand:VI1 1 "register_operand" "v")] | |
b86f6e9e | 1404 | UNSPEC_STOREU))] |
860f5e77 | 1405 | "TARGET_SSE2" |
20f9034b UB |
1406 | { |
1407 | switch (get_attr_mode (insn)) | |
1408 | { | |
d3c2fee0 | 1409 | case MODE_V16SF: |
20f9034b UB |
1410 | case MODE_V8SF: |
1411 | case MODE_V4SF: | |
1412 | return "%vmovups\t{%1, %0|%0, %1}"; | |
1413 | default: | |
ca9b264e AI |
1414 | switch (<MODE>mode) |
1415 | { | |
1416 | case V32QImode: | |
1417 | case V16QImode: | |
1418 | if (!(TARGET_AVX512VL && TARGET_AVX512BW)) | |
1419 | return "%vmovdqu\t{%1, %0|%0, %1}"; | |
1420 | default: | |
1421 | return "vmovdqu<ssescalarsize>\t{%1, %0|%0, %1}"; | |
1422 | } | |
20f9034b UB |
1423 | } |
1424 | } | |
95879c72 | 1425 | [(set_attr "type" "ssemov") |
b6837b94 | 1426 | (set_attr "movu" "1") |
f220a4f4 | 1427 | (set_attr "ssememalign" "8") |
e81b8564 UB |
1428 | (set (attr "prefix_data16") |
1429 | (if_then_else | |
67b2c493 | 1430 | (match_test "TARGET_AVX") |
e81b8564 UB |
1431 | (const_string "*") |
1432 | (const_string "1"))) | |
1433 | (set_attr "prefix" "maybe_vex") | |
20f9034b | 1434 | (set (attr "mode") |
039eee3f | 1435 | (cond [(and (match_test "<MODE_SIZE> == 16") |
d3c2fee0 AI |
1436 | (ior (match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL") |
1437 | (match_test "TARGET_SSE_TYPELESS_STORES"))) | |
e338c25c | 1438 | (const_string "<ssePSmode>") |
20f9034b UB |
1439 | (match_test "TARGET_AVX") |
1440 | (const_string "<sseinsnmode>") | |
e338c25c | 1441 | (match_test "optimize_function_for_size_p (cfun)") |
20f9034b UB |
1442 | (const_string "V4SF") |
1443 | ] | |
e338c25c | 1444 | (const_string "<sseinsnmode>")))]) |
95879c72 | 1445 | |
ca9b264e AI |
1446 | (define_insn "<sse2_avx_avx512f>_storedqu<mode>" |
1447 | [(set (match_operand:VI_ULOADSTORE_BW_AVX512VL 0 "memory_operand" "=m") | |
1448 | (unspec:VI_ULOADSTORE_BW_AVX512VL | |
1449 | [(match_operand:VI_ULOADSTORE_BW_AVX512VL 1 "register_operand" "v")] | |
1450 | UNSPEC_STOREU))] | |
1451 | "TARGET_AVX512BW" | |
1452 | "vmovdqu<ssescalarsize>\t{%1, %0|%0, %1}" | |
1453 | [(set_attr "type" "ssemov") | |
1454 | (set_attr "movu" "1") | |
1455 | (set_attr "ssememalign" "8") | |
1456 | (set_attr "prefix" "maybe_evex")]) | |
1457 | ||
1458 | (define_insn "<sse2_avx_avx512f>_storedqu<mode>" | |
1459 | [(set (match_operand:VI_ULOADSTORE_F_AVX512VL 0 "memory_operand" "=m") | |
1460 | (unspec:VI_ULOADSTORE_F_AVX512VL | |
1461 | [(match_operand:VI_ULOADSTORE_F_AVX512VL 1 "register_operand" "v")] | |
1462 | UNSPEC_STOREU))] | |
1463 | "TARGET_AVX512F" | |
1464 | "vmovdqu<ssescalarsize>\t{%1, %0|%0, %1}" | |
1465 | [(set_attr "type" "ssemov") | |
1466 | (set_attr "movu" "1") | |
1467 | (set_attr "ssememalign" "8") | |
1468 | (set_attr "prefix" "maybe_vex")]) | |
1469 | ||
1470 | (define_insn "<avx512>_storedqu<mode>_mask" | |
1471 | [(set (match_operand:VI48_AVX512VL 0 "memory_operand" "=m") | |
1472 | (vec_merge:VI48_AVX512VL | |
1473 | (unspec:VI48_AVX512VL | |
1474 | [(match_operand:VI48_AVX512VL 1 "register_operand" "v")] | |
47490470 AI |
1475 | UNSPEC_STOREU) |
1476 | (match_dup 0) | |
be792bce | 1477 | (match_operand:<avx512fmaskmode> 2 "register_operand" "Yk")))] |
47490470 | 1478 | "TARGET_AVX512F" |
ca9b264e AI |
1479 | "vmovdqu<ssescalarsize>\t{%1, %0%{%2%}|%0%{%2%}, %1}" |
1480 | [(set_attr "type" "ssemov") | |
1481 | (set_attr "movu" "1") | |
1482 | (set_attr "memory" "store") | |
1483 | (set_attr "prefix" "evex") | |
1484 | (set_attr "mode" "<sseinsnmode>")]) | |
1485 | ||
1486 | (define_insn "<avx512>_storedqu<mode>_mask" | |
1487 | [(set (match_operand:VI12_AVX512VL 0 "memory_operand" "=m") | |
1488 | (vec_merge:VI12_AVX512VL | |
1489 | (unspec:VI12_AVX512VL | |
1490 | [(match_operand:VI12_AVX512VL 1 "register_operand" "v")] | |
1491 | UNSPEC_STOREU) | |
1492 | (match_dup 0) | |
1493 | (match_operand:<avx512fmaskmode> 2 "register_operand" "Yk")))] | |
1494 | "TARGET_AVX512BW" | |
1495 | "vmovdqu<ssescalarsize>\t{%1, %0%{%2%}|%0%{%2%}, %1}" | |
47490470 AI |
1496 | [(set_attr "type" "ssemov") |
1497 | (set_attr "movu" "1") | |
1498 | (set_attr "memory" "store") | |
1499 | (set_attr "prefix" "evex") | |
1500 | (set_attr "mode" "<sseinsnmode>")]) | |
1501 | ||
cbb734aa | 1502 | (define_insn "<sse3>_lddqu<avxsizesuffix>" |
e81b8564 UB |
1503 | [(set (match_operand:VI1 0 "register_operand" "=x") |
1504 | (unspec:VI1 [(match_operand:VI1 1 "memory_operand" "m")] | |
1505 | UNSPEC_LDDQU))] | |
1506 | "TARGET_SSE3" | |
1507 | "%vlddqu\t{%1, %0|%0, %1}" | |
ef719a44 | 1508 | [(set_attr "type" "ssemov") |
b6837b94 | 1509 | (set_attr "movu" "1") |
f220a4f4 | 1510 | (set_attr "ssememalign" "8") |
e81b8564 UB |
1511 | (set (attr "prefix_data16") |
1512 | (if_then_else | |
67b2c493 | 1513 | (match_test "TARGET_AVX") |
e81b8564 UB |
1514 | (const_string "*") |
1515 | (const_string "0"))) | |
1516 | (set (attr "prefix_rep") | |
1517 | (if_then_else | |
67b2c493 | 1518 | (match_test "TARGET_AVX") |
e81b8564 UB |
1519 | (const_string "*") |
1520 | (const_string "1"))) | |
1521 | (set_attr "prefix" "maybe_vex") | |
cbb734aa | 1522 | (set_attr "mode" "<sseinsnmode>")]) |
65b82caa | 1523 | |
f32c951e L |
1524 | (define_insn "sse2_movnti<mode>" |
1525 | [(set (match_operand:SWI48 0 "memory_operand" "=m") | |
1526 | (unspec:SWI48 [(match_operand:SWI48 1 "register_operand" "r")] | |
aa198500 | 1527 | UNSPEC_MOVNT))] |
ef719a44 RH |
1528 | "TARGET_SSE2" |
1529 | "movnti\t{%1, %0|%0, %1}" | |
b6837b94 | 1530 | [(set_attr "type" "ssemov") |
725fd454 | 1531 | (set_attr "prefix_data16" "0") |
f32c951e | 1532 | (set_attr "mode" "<MODE>")]) |
ef719a44 | 1533 | |
e81b8564 UB |
1534 | (define_insn "<sse>_movnt<mode>" |
1535 | [(set (match_operand:VF 0 "memory_operand" "=m") | |
b86f6e9e AI |
1536 | (unspec:VF |
1537 | [(match_operand:VF 1 "register_operand" "v")] | |
1538 | UNSPEC_MOVNT))] | |
e81b8564 UB |
1539 | "TARGET_SSE" |
1540 | "%vmovnt<ssemodesuffix>\t{%1, %0|%0, %1}" | |
1541 | [(set_attr "type" "ssemov") | |
1542 | (set_attr "prefix" "maybe_vex") | |
1543 | (set_attr "mode" "<MODE>")]) | |
1544 | ||
1545 | (define_insn "<sse2>_movnt<mode>" | |
1546 | [(set (match_operand:VI8 0 "memory_operand" "=m") | |
2b1ebb0c | 1547 | (unspec:VI8 [(match_operand:VI8 1 "register_operand" "v")] |
e81b8564 UB |
1548 | UNSPEC_MOVNT))] |
1549 | "TARGET_SSE2" | |
1550 | "%vmovntdq\t{%1, %0|%0, %1}" | |
95879c72 | 1551 | [(set_attr "type" "ssecvt") |
e81b8564 UB |
1552 | (set (attr "prefix_data16") |
1553 | (if_then_else | |
67b2c493 | 1554 | (match_test "TARGET_AVX") |
e81b8564 UB |
1555 | (const_string "*") |
1556 | (const_string "1"))) | |
1557 | (set_attr "prefix" "maybe_vex") | |
cbb734aa | 1558 | (set_attr "mode" "<sseinsnmode>")]) |
95879c72 | 1559 | |
79f5e442 ZD |
1560 | ; Expand patterns for non-temporal stores. At the moment, only those |
1561 | ; that directly map to insns are defined; it would be possible to | |
1562 | ; define patterns for other modes that would expand to several insns. | |
1563 | ||
6bec6c98 UB |
1564 | ;; Modes handled by storent patterns. |
1565 | (define_mode_iterator STORENT_MODE | |
aa198500 UB |
1566 | [(DI "TARGET_SSE2 && TARGET_64BIT") (SI "TARGET_SSE2") |
1567 | (SF "TARGET_SSE4A") (DF "TARGET_SSE4A") | |
b86f6e9e AI |
1568 | (V8DI "TARGET_AVX512F") (V4DI "TARGET_AVX") (V2DI "TARGET_SSE2") |
1569 | (V16SF "TARGET_AVX512F") (V8SF "TARGET_AVX") V4SF | |
1570 | (V8DF "TARGET_AVX512F") (V4DF "TARGET_AVX") (V2DF "TARGET_SSE2")]) | |
6bec6c98 | 1571 | |
d6023b50 | 1572 | (define_expand "storent<mode>" |
82e86dc6 | 1573 | [(set (match_operand:STORENT_MODE 0 "memory_operand") |
1e27129f | 1574 | (unspec:STORENT_MODE |
82e86dc6 | 1575 | [(match_operand:STORENT_MODE 1 "register_operand")] |
6bec6c98 UB |
1576 | UNSPEC_MOVNT))] |
1577 | "TARGET_SSE") | |
79f5e442 | 1578 | |
ef719a44 RH |
1579 | ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; |
1580 | ;; | |
d6023b50 | 1581 | ;; Parallel floating point arithmetic |
ef719a44 RH |
1582 | ;; |
1583 | ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; | |
1584 | ||
6dd18eb1 | 1585 | (define_expand "<code><mode>2" |
82e86dc6 | 1586 | [(set (match_operand:VF 0 "register_operand") |
07c0852e | 1587 | (absneg:VF |
82e86dc6 | 1588 | (match_operand:VF 1 "register_operand")))] |
6bec6c98 | 1589 | "TARGET_SSE" |
6dd18eb1 | 1590 | "ix86_expand_fp_absneg_operator (<CODE>, <MODE>mode, operands); DONE;") |
ef719a44 | 1591 | |
07c0852e | 1592 | (define_insn_and_split "*absneg<mode>2" |
b86f6e9e | 1593 | [(set (match_operand:VF 0 "register_operand" "=x,x,v,v") |
07c0852e | 1594 | (match_operator:VF 3 "absneg_operator" |
b86f6e9e AI |
1595 | [(match_operand:VF 1 "nonimmediate_operand" "0, xm, v, m")])) |
1596 | (use (match_operand:VF 2 "nonimmediate_operand" "xm, 0, vm,v"))] | |
6bec6c98 | 1597 | "TARGET_SSE" |
7a12785c | 1598 | "#" |
3b0eee5d | 1599 | "&& reload_completed" |
7a12785c RH |
1600 | [(const_int 0)] |
1601 | { | |
07c0852e UB |
1602 | enum rtx_code absneg_op; |
1603 | rtx op1, op2; | |
487a9a3e RH |
1604 | rtx t; |
1605 | ||
07c0852e UB |
1606 | if (TARGET_AVX) |
1607 | { | |
1608 | if (MEM_P (operands[1])) | |
1609 | op1 = operands[2], op2 = operands[1]; | |
1610 | else | |
1611 | op1 = operands[1], op2 = operands[2]; | |
1612 | } | |
487a9a3e | 1613 | else |
07c0852e UB |
1614 | { |
1615 | op1 = operands[0]; | |
1616 | if (rtx_equal_p (operands[0], operands[1])) | |
1617 | op2 = operands[2]; | |
1618 | else | |
1619 | op2 = operands[1]; | |
1620 | } | |
487a9a3e | 1621 | |
07c0852e UB |
1622 | absneg_op = GET_CODE (operands[3]) == NEG ? XOR : AND; |
1623 | t = gen_rtx_fmt_ee (absneg_op, <MODE>mode, op1, op2); | |
f7df4a84 | 1624 | t = gen_rtx_SET (operands[0], t); |
487a9a3e | 1625 | emit_insn (t); |
7a12785c | 1626 | DONE; |
07c0852e UB |
1627 | } |
1628 | [(set_attr "isa" "noavx,noavx,avx,avx")]) | |
95879c72 | 1629 | |
06bc9e41 | 1630 | (define_expand "<plusminus_insn><mode>3<mask_name><round_name>" |
82e86dc6 | 1631 | [(set (match_operand:VF 0 "register_operand") |
07c0852e | 1632 | (plusminus:VF |
06bc9e41 AI |
1633 | (match_operand:VF 1 "<round_nimm_predicate>") |
1634 | (match_operand:VF 2 "<round_nimm_predicate>")))] | |
1635 | "TARGET_SSE && <mask_mode512bit_condition> && <round_mode512bit_condition>" | |
78e8956b | 1636 | "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);") |
ef719a44 | 1637 | |
06bc9e41 | 1638 | (define_insn "*<plusminus_insn><mode>3<mask_name><round_name>" |
3f97cb0b | 1639 | [(set (match_operand:VF 0 "register_operand" "=x,v") |
07c0852e | 1640 | (plusminus:VF |
06bc9e41 AI |
1641 | (match_operand:VF 1 "<round_nimm_predicate>" "<comm>0,v") |
1642 | (match_operand:VF 2 "<round_nimm_predicate>" "xm,<round_constraint>")))] | |
1643 | "TARGET_SSE && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands) && <mask_mode512bit_condition> && <round_mode512bit_condition>" | |
07c0852e UB |
1644 | "@ |
1645 | <plusminus_mnemonic><ssemodesuffix>\t{%2, %0|%0, %2} | |
06bc9e41 | 1646 | v<plusminus_mnemonic><ssemodesuffix>\t{<round_mask_op3>%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2<round_mask_op3>}" |
07c0852e UB |
1647 | [(set_attr "isa" "noavx,avx") |
1648 | (set_attr "type" "sseadd") | |
47490470 | 1649 | (set_attr "prefix" "<mask_prefix3>") |
d6023b50 | 1650 | (set_attr "mode" "<MODE>")]) |
ef719a44 | 1651 | |
075691af | 1652 | (define_insn "<sse>_vm<plusminus_insn><mode>3<round_name>" |
3f97cb0b | 1653 | [(set (match_operand:VF_128 0 "register_operand" "=x,v") |
07c0852e UB |
1654 | (vec_merge:VF_128 |
1655 | (plusminus:VF_128 | |
3f97cb0b | 1656 | (match_operand:VF_128 1 "register_operand" "0,v") |
075691af | 1657 | (match_operand:VF_128 2 "nonimmediate_operand" "xm,<round_constraint>")) |
ef719a44 RH |
1658 | (match_dup 1) |
1659 | (const_int 1)))] | |
6bec6c98 | 1660 | "TARGET_SSE" |
07c0852e | 1661 | "@ |
eabb5f48 | 1662 | <plusminus_mnemonic><ssescalarmodesuffix>\t{%2, %0|%0, %<iptr>2} |
075691af | 1663 | v<plusminus_mnemonic><ssescalarmodesuffix>\t{<round_op3>%2, %1, %0|%0, %1, %<iptr>2<round_op3>}" |
07c0852e UB |
1664 | [(set_attr "isa" "noavx,avx") |
1665 | (set_attr "type" "sseadd") | |
075691af | 1666 | (set_attr "prefix" "<round_prefix>") |
d6023b50 | 1667 | (set_attr "mode" "<ssescalarmode>")]) |
ef719a44 | 1668 | |
06bc9e41 | 1669 | (define_expand "mul<mode>3<mask_name><round_name>" |
82e86dc6 | 1670 | [(set (match_operand:VF 0 "register_operand") |
07c0852e | 1671 | (mult:VF |
06bc9e41 AI |
1672 | (match_operand:VF 1 "<round_nimm_predicate>") |
1673 | (match_operand:VF 2 "<round_nimm_predicate>")))] | |
1674 | "TARGET_SSE && <mask_mode512bit_condition> && <round_mode512bit_condition>" | |
d6023b50 UB |
1675 | "ix86_fixup_binary_operands_no_copy (MULT, <MODE>mode, operands);") |
1676 | ||
06bc9e41 | 1677 | (define_insn "*mul<mode>3<mask_name><round_name>" |
b86f6e9e | 1678 | [(set (match_operand:VF 0 "register_operand" "=x,v") |
07c0852e | 1679 | (mult:VF |
06bc9e41 AI |
1680 | (match_operand:VF 1 "<round_nimm_predicate>" "%0,v") |
1681 | (match_operand:VF 2 "<round_nimm_predicate>" "xm,<round_constraint>")))] | |
1682 | "TARGET_SSE && ix86_binary_operator_ok (MULT, <MODE>mode, operands) && <mask_mode512bit_condition> && <round_mode512bit_condition>" | |
07c0852e UB |
1683 | "@ |
1684 | mul<ssemodesuffix>\t{%2, %0|%0, %2} | |
06bc9e41 | 1685 | vmul<ssemodesuffix>\t{<round_mask_op3>%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2<round_mask_op3>}" |
07c0852e UB |
1686 | [(set_attr "isa" "noavx,avx") |
1687 | (set_attr "type" "ssemul") | |
47490470 | 1688 | (set_attr "prefix" "<mask_prefix3>") |
01284895 | 1689 | (set_attr "btver2_decode" "direct,double") |
d6023b50 | 1690 | (set_attr "mode" "<MODE>")]) |
ef719a44 | 1691 | |
075691af | 1692 | (define_insn "<sse>_vm<multdiv_mnemonic><mode>3<round_name>" |
3f97cb0b | 1693 | [(set (match_operand:VF_128 0 "register_operand" "=x,v") |
07c0852e | 1694 | (vec_merge:VF_128 |
67f783cb | 1695 | (multdiv:VF_128 |
3f97cb0b | 1696 | (match_operand:VF_128 1 "register_operand" "0,v") |
075691af | 1697 | (match_operand:VF_128 2 "nonimmediate_operand" "xm,<round_constraint>")) |
ef719a44 RH |
1698 | (match_dup 1) |
1699 | (const_int 1)))] | |
6bec6c98 | 1700 | "TARGET_SSE" |
07c0852e | 1701 | "@ |
67f783cb | 1702 | <multdiv_mnemonic><ssescalarmodesuffix>\t{%2, %0|%0, %<iptr>2} |
075691af | 1703 | v<multdiv_mnemonic><ssescalarmodesuffix>\t{<round_op3>%2, %1, %0|%0, %1, %<iptr>2<round_op3>}" |
07c0852e | 1704 | [(set_attr "isa" "noavx,avx") |
67f783cb | 1705 | (set_attr "type" "sse<multdiv_mnemonic>") |
075691af | 1706 | (set_attr "prefix" "<round_prefix>") |
67f783cb | 1707 | (set_attr "btver2_decode" "direct,double") |
d6023b50 | 1708 | (set_attr "mode" "<ssescalarmode>")]) |
ef719a44 | 1709 | |
07c0852e | 1710 | (define_expand "div<mode>3" |
82e86dc6 UB |
1711 | [(set (match_operand:VF2 0 "register_operand") |
1712 | (div:VF2 (match_operand:VF2 1 "register_operand") | |
1713 | (match_operand:VF2 2 "nonimmediate_operand")))] | |
6bec6c98 | 1714 | "TARGET_SSE2" |
07c0852e UB |
1715 | "ix86_fixup_binary_operands_no_copy (DIV, <MODE>mode, operands);") |
1716 | ||
1717 | (define_expand "div<mode>3" | |
82e86dc6 UB |
1718 | [(set (match_operand:VF1 0 "register_operand") |
1719 | (div:VF1 (match_operand:VF1 1 "register_operand") | |
1720 | (match_operand:VF1 2 "nonimmediate_operand")))] | |
6bec6c98 | 1721 | "TARGET_SSE" |
95879c72 | 1722 | { |
07c0852e | 1723 | ix86_fixup_binary_operands_no_copy (DIV, <MODE>mode, operands); |
95879c72 | 1724 | |
b86b9f44 MM |
1725 | if (TARGET_SSE_MATH |
1726 | && TARGET_RECIP_VEC_DIV | |
1727 | && !optimize_insn_for_size_p () | |
95879c72 L |
1728 | && flag_finite_math_only && !flag_trapping_math |
1729 | && flag_unsafe_math_optimizations) | |
1730 | { | |
07c0852e | 1731 | ix86_emit_swdivsf (operands[0], operands[1], operands[2], <MODE>mode); |
6b889d89 UB |
1732 | DONE; |
1733 | } | |
1734 | }) | |
ef719a44 | 1735 | |
06bc9e41 | 1736 | (define_insn "<sse>_div<mode>3<mask_name><round_name>" |
3f97cb0b | 1737 | [(set (match_operand:VF 0 "register_operand" "=x,v") |
07c0852e | 1738 | (div:VF |
3f97cb0b | 1739 | (match_operand:VF 1 "register_operand" "0,v") |
06bc9e41 AI |
1740 | (match_operand:VF 2 "<round_nimm_predicate>" "xm,<round_constraint>")))] |
1741 | "TARGET_SSE && <mask_mode512bit_condition> && <round_mode512bit_condition>" | |
07c0852e UB |
1742 | "@ |
1743 | div<ssemodesuffix>\t{%2, %0|%0, %2} | |
06bc9e41 | 1744 | vdiv<ssemodesuffix>\t{<round_mask_op3>%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2<round_mask_op3>}" |
07c0852e UB |
1745 | [(set_attr "isa" "noavx,avx") |
1746 | (set_attr "type" "ssediv") | |
47490470 | 1747 | (set_attr "prefix" "<mask_prefix3>") |
d6023b50 | 1748 | (set_attr "mode" "<MODE>")]) |
ef719a44 | 1749 | |
07c0852e | 1750 | (define_insn "<sse>_rcp<mode>2" |
a9ccbba2 AI |
1751 | [(set (match_operand:VF1_128_256 0 "register_operand" "=x") |
1752 | (unspec:VF1_128_256 | |
1753 | [(match_operand:VF1_128_256 1 "nonimmediate_operand" "xm")] UNSPEC_RCP))] | |
6bec6c98 | 1754 | "TARGET_SSE" |
95879c72 | 1755 | "%vrcpps\t{%1, %0|%0, %1}" |
ef719a44 | 1756 | [(set_attr "type" "sse") |
b6837b94 | 1757 | (set_attr "atom_sse_attr" "rcp") |
01284895 | 1758 | (set_attr "btver2_sse_attr" "rcp") |
95879c72 | 1759 | (set_attr "prefix" "maybe_vex") |
07c0852e | 1760 | (set_attr "mode" "<MODE>")]) |
95879c72 | 1761 | |
ef719a44 | 1762 | (define_insn "sse_vmrcpv4sf2" |
07c0852e | 1763 | [(set (match_operand:V4SF 0 "register_operand" "=x,x") |
ef719a44 | 1764 | (vec_merge:V4SF |
07c0852e | 1765 | (unspec:V4SF [(match_operand:V4SF 1 "nonimmediate_operand" "xm,xm")] |
ef719a44 | 1766 | UNSPEC_RCP) |
07c0852e | 1767 | (match_operand:V4SF 2 "register_operand" "0,x") |
ef719a44 RH |
1768 | (const_int 1)))] |
1769 | "TARGET_SSE" | |
07c0852e | 1770 | "@ |
eabb5f48 UB |
1771 | rcpss\t{%1, %0|%0, %k1} |
1772 | vrcpss\t{%1, %2, %0|%0, %2, %k1}" | |
07c0852e UB |
1773 | [(set_attr "isa" "noavx,avx") |
1774 | (set_attr "type" "sse") | |
f220a4f4 | 1775 | (set_attr "ssememalign" "32") |
b6837b94 | 1776 | (set_attr "atom_sse_attr" "rcp") |
01284895 | 1777 | (set_attr "btver2_sse_attr" "rcp") |
07c0852e | 1778 | (set_attr "prefix" "orig,vex") |
ef719a44 RH |
1779 | (set_attr "mode" "SF")]) |
1780 | ||
47490470 | 1781 | (define_insn "<mask_codefor>rcp14<mode><mask_name>" |
b040ded3 AI |
1782 | [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v") |
1783 | (unspec:VF_AVX512VL | |
1784 | [(match_operand:VF_AVX512VL 1 "nonimmediate_operand" "vm")] | |
afb4ac68 AI |
1785 | UNSPEC_RCP14))] |
1786 | "TARGET_AVX512F" | |
47490470 | 1787 | "vrcp14<ssemodesuffix>\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}" |
afb4ac68 AI |
1788 | [(set_attr "type" "sse") |
1789 | (set_attr "prefix" "evex") | |
1790 | (set_attr "mode" "<MODE>")]) | |
1791 | ||
075691af | 1792 | (define_insn "srcp14<mode>" |
afb4ac68 AI |
1793 | [(set (match_operand:VF_128 0 "register_operand" "=v") |
1794 | (vec_merge:VF_128 | |
1795 | (unspec:VF_128 | |
c56a42b9 | 1796 | [(match_operand:VF_128 1 "nonimmediate_operand" "vm")] |
afb4ac68 | 1797 | UNSPEC_RCP14) |
c56a42b9 | 1798 | (match_operand:VF_128 2 "register_operand" "v") |
afb4ac68 AI |
1799 | (const_int 1)))] |
1800 | "TARGET_AVX512F" | |
c56a42b9 | 1801 | "vrcp14<ssescalarmodesuffix>\t{%1, %2, %0|%0, %2, %1}" |
afb4ac68 AI |
1802 | [(set_attr "type" "sse") |
1803 | (set_attr "prefix" "evex") | |
1804 | (set_attr "mode" "<MODE>")]) | |
1805 | ||
07c0852e | 1806 | (define_expand "sqrt<mode>2" |
82e86dc6 UB |
1807 | [(set (match_operand:VF2 0 "register_operand") |
1808 | (sqrt:VF2 (match_operand:VF2 1 "nonimmediate_operand")))] | |
6bec6c98 | 1809 | "TARGET_SSE2") |
95879c72 | 1810 | |
07c0852e | 1811 | (define_expand "sqrt<mode>2" |
82e86dc6 UB |
1812 | [(set (match_operand:VF1 0 "register_operand") |
1813 | (sqrt:VF1 (match_operand:VF1 1 "nonimmediate_operand")))] | |
6bec6c98 | 1814 | "TARGET_SSE" |
d6023b50 | 1815 | { |
b86b9f44 MM |
1816 | if (TARGET_SSE_MATH |
1817 | && TARGET_RECIP_VEC_SQRT | |
1818 | && !optimize_insn_for_size_p () | |
d6023b50 UB |
1819 | && flag_finite_math_only && !flag_trapping_math |
1820 | && flag_unsafe_math_optimizations) | |
1821 | { | |
07c0852e | 1822 | ix86_emit_swsqrtsf (operands[0], operands[1], <MODE>mode, false); |
d6023b50 UB |
1823 | DONE; |
1824 | } | |
1825 | }) | |
1826 | ||
06bc9e41 | 1827 | (define_insn "<sse>_sqrt<mode>2<mask_name><round_name>" |
3f97cb0b | 1828 | [(set (match_operand:VF 0 "register_operand" "=v") |
06bc9e41 AI |
1829 | (sqrt:VF (match_operand:VF 1 "<round_nimm_predicate>" "<round_constraint>")))] |
1830 | "TARGET_SSE && <mask_mode512bit_condition> && <round_mode512bit_condition>" | |
1831 | "%vsqrt<ssemodesuffix>\t{<round_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_mask_op2>}" | |
d6023b50 | 1832 | [(set_attr "type" "sse") |
b6837b94 | 1833 | (set_attr "atom_sse_attr" "sqrt") |
01284895 | 1834 | (set_attr "btver2_sse_attr" "sqrt") |
95879c72 | 1835 | (set_attr "prefix" "maybe_vex") |
07c0852e | 1836 | (set_attr "mode" "<MODE>")]) |
95879c72 | 1837 | |
075691af | 1838 | (define_insn "<sse>_vmsqrt<mode>2<round_name>" |
3f97cb0b | 1839 | [(set (match_operand:VF_128 0 "register_operand" "=x,v") |
07c0852e UB |
1840 | (vec_merge:VF_128 |
1841 | (sqrt:VF_128 | |
075691af | 1842 | (match_operand:VF_128 1 "nonimmediate_operand" "xm,<round_constraint>")) |
3f97cb0b | 1843 | (match_operand:VF_128 2 "register_operand" "0,v") |
d6023b50 | 1844 | (const_int 1)))] |
6bec6c98 | 1845 | "TARGET_SSE" |
07c0852e | 1846 | "@ |
eabb5f48 | 1847 | sqrt<ssescalarmodesuffix>\t{%1, %0|%0, %<iptr>1} |
075691af | 1848 | vsqrt<ssescalarmodesuffix>\t{<round_op3>%1, %2, %0|%0, %2, %<iptr>1<round_op3>}" |
07c0852e UB |
1849 | [(set_attr "isa" "noavx,avx") |
1850 | (set_attr "type" "sse") | |
b6837b94 | 1851 | (set_attr "atom_sse_attr" "sqrt") |
075691af | 1852 | (set_attr "prefix" "<round_prefix>") |
47490470 | 1853 | (set_attr "btver2_sse_attr" "sqrt") |
d6023b50 UB |
1854 | (set_attr "mode" "<ssescalarmode>")]) |
1855 | ||
07c0852e | 1856 | (define_expand "rsqrt<mode>2" |
a9ccbba2 AI |
1857 | [(set (match_operand:VF1_128_256 0 "register_operand") |
1858 | (unspec:VF1_128_256 | |
1859 | [(match_operand:VF1_128_256 1 "nonimmediate_operand")] UNSPEC_RSQRT))] | |
f1bf33ce | 1860 | "TARGET_SSE_MATH" |
6b889d89 | 1861 | { |
07c0852e | 1862 | ix86_emit_swsqrtsf (operands[0], operands[1], <MODE>mode, true); |
f1bf33ce | 1863 | DONE; |
6b889d89 UB |
1864 | }) |
1865 | ||
07c0852e | 1866 | (define_insn "<sse>_rsqrt<mode>2" |
a9ccbba2 AI |
1867 | [(set (match_operand:VF1_128_256 0 "register_operand" "=x") |
1868 | (unspec:VF1_128_256 | |
1869 | [(match_operand:VF1_128_256 1 "nonimmediate_operand" "xm")] UNSPEC_RSQRT))] | |
6bec6c98 | 1870 | "TARGET_SSE" |
95879c72 | 1871 | "%vrsqrtps\t{%1, %0|%0, %1}" |
3dc0f23a | 1872 | [(set_attr "type" "sse") |
95879c72 | 1873 | (set_attr "prefix" "maybe_vex") |
07c0852e | 1874 | (set_attr "mode" "<MODE>")]) |
95879c72 | 1875 | |
47490470 | 1876 | (define_insn "<mask_codefor>rsqrt14<mode><mask_name>" |
b040ded3 AI |
1877 | [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v") |
1878 | (unspec:VF_AVX512VL | |
1879 | [(match_operand:VF_AVX512VL 1 "nonimmediate_operand" "vm")] | |
afb4ac68 AI |
1880 | UNSPEC_RSQRT14))] |
1881 | "TARGET_AVX512F" | |
47490470 | 1882 | "vrsqrt14<ssemodesuffix>\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}" |
afb4ac68 AI |
1883 | [(set_attr "type" "sse") |
1884 | (set_attr "prefix" "evex") | |
1885 | (set_attr "mode" "<MODE>")]) | |
1886 | ||
075691af | 1887 | (define_insn "rsqrt14<mode>" |
afb4ac68 AI |
1888 | [(set (match_operand:VF_128 0 "register_operand" "=v") |
1889 | (vec_merge:VF_128 | |
1890 | (unspec:VF_128 | |
df62b4af | 1891 | [(match_operand:VF_128 1 "nonimmediate_operand" "vm")] |
afb4ac68 | 1892 | UNSPEC_RSQRT14) |
df62b4af | 1893 | (match_operand:VF_128 2 "register_operand" "v") |
afb4ac68 AI |
1894 | (const_int 1)))] |
1895 | "TARGET_AVX512F" | |
df62b4af | 1896 | "vrsqrt14<ssescalarmodesuffix>\t{%1, %2, %0|%0, %2, %1}" |
afb4ac68 AI |
1897 | [(set_attr "type" "sse") |
1898 | (set_attr "prefix" "evex") | |
1899 | (set_attr "mode" "<MODE>")]) | |
1900 | ||
ef719a44 | 1901 | (define_insn "sse_vmrsqrtv4sf2" |
07c0852e | 1902 | [(set (match_operand:V4SF 0 "register_operand" "=x,x") |
ef719a44 | 1903 | (vec_merge:V4SF |
07c0852e | 1904 | (unspec:V4SF [(match_operand:V4SF 1 "nonimmediate_operand" "xm,xm")] |
ef719a44 | 1905 | UNSPEC_RSQRT) |
07c0852e | 1906 | (match_operand:V4SF 2 "register_operand" "0,x") |
ef719a44 RH |
1907 | (const_int 1)))] |
1908 | "TARGET_SSE" | |
07c0852e | 1909 | "@ |
eabb5f48 UB |
1910 | rsqrtss\t{%1, %0|%0, %k1} |
1911 | vrsqrtss\t{%1, %2, %0|%0, %2, %k1}" | |
07c0852e UB |
1912 | [(set_attr "isa" "noavx,avx") |
1913 | (set_attr "type" "sse") | |
f220a4f4 | 1914 | (set_attr "ssememalign" "32") |
07c0852e | 1915 | (set_attr "prefix" "orig,vex") |
ef719a44 RH |
1916 | (set_attr "mode" "SF")]) |
1917 | ||
115a33c2 | 1918 | ;; ??? For !flag_finite_math_only, the representation with SMIN/SMAX |
4f3f76e6 | 1919 | ;; isn't really correct, as those rtl operators aren't defined when |
115a33c2 RH |
1920 | ;; applied to NaNs. Hopefully the optimizers won't get too smart on us. |
1921 | ||
8a6ef760 | 1922 | (define_expand "<code><mode>3<mask_name><round_saeonly_name>" |
82e86dc6 | 1923 | [(set (match_operand:VF 0 "register_operand") |
07c0852e | 1924 | (smaxmin:VF |
8a6ef760 AI |
1925 | (match_operand:VF 1 "<round_saeonly_nimm_predicate>") |
1926 | (match_operand:VF 2 "<round_saeonly_nimm_predicate>")))] | |
1927 | "TARGET_SSE && <mask_mode512bit_condition> && <round_saeonly_mode512bit_condition>" | |
115a33c2 RH |
1928 | { |
1929 | if (!flag_finite_math_only) | |
d6023b50 | 1930 | operands[1] = force_reg (<MODE>mode, operands[1]); |
78e8956b | 1931 | ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands); |
115a33c2 | 1932 | }) |
ef719a44 | 1933 | |
8a6ef760 | 1934 | (define_insn "*<code><mode>3_finite<mask_name><round_saeonly_name>" |
3f97cb0b | 1935 | [(set (match_operand:VF 0 "register_operand" "=x,v") |
07c0852e | 1936 | (smaxmin:VF |
8a6ef760 AI |
1937 | (match_operand:VF 1 "<round_saeonly_nimm_predicate>" "%0,v") |
1938 | (match_operand:VF 2 "<round_saeonly_nimm_predicate>" "xm,<round_saeonly_constraint>")))] | |
6bec6c98 | 1939 | "TARGET_SSE && flag_finite_math_only |
47490470 | 1940 | && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands) |
8a6ef760 | 1941 | && <mask_mode512bit_condition> && <round_saeonly_mode512bit_condition>" |
07c0852e UB |
1942 | "@ |
1943 | <maxmin_float><ssemodesuffix>\t{%2, %0|%0, %2} | |
8a6ef760 | 1944 | v<maxmin_float><ssemodesuffix>\t{<round_saeonly_mask_op3>%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2<round_saeonly_mask_op3>}" |
07c0852e UB |
1945 | [(set_attr "isa" "noavx,avx") |
1946 | (set_attr "type" "sseadd") | |
01284895 | 1947 | (set_attr "btver2_sse_attr" "maxmin") |
47490470 | 1948 | (set_attr "prefix" "<mask_prefix3>") |
d6023b50 | 1949 | (set_attr "mode" "<MODE>")]) |
ef719a44 | 1950 | |
8a6ef760 | 1951 | (define_insn "*<code><mode>3<mask_name><round_saeonly_name>" |
3f97cb0b | 1952 | [(set (match_operand:VF 0 "register_operand" "=x,v") |
07c0852e | 1953 | (smaxmin:VF |
3f97cb0b | 1954 | (match_operand:VF 1 "register_operand" "0,v") |
8a6ef760 | 1955 | (match_operand:VF 2 "<round_saeonly_nimm_predicate>" "xm,<round_saeonly_constraint>")))] |
47490470 | 1956 | "TARGET_SSE && !flag_finite_math_only |
8a6ef760 | 1957 | && <mask_mode512bit_condition> && <round_saeonly_mode512bit_condition>" |
07c0852e UB |
1958 | "@ |
1959 | <maxmin_float><ssemodesuffix>\t{%2, %0|%0, %2} | |
8a6ef760 | 1960 | v<maxmin_float><ssemodesuffix>\t{<round_saeonly_mask_op3>%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2<round_saeonly_mask_op3>}" |
07c0852e UB |
1961 | [(set_attr "isa" "noavx,avx") |
1962 | (set_attr "type" "sseadd") | |
01284895 | 1963 | (set_attr "btver2_sse_attr" "maxmin") |
47490470 | 1964 | (set_attr "prefix" "<mask_prefix3>") |
d6023b50 | 1965 | (set_attr "mode" "<MODE>")]) |
115a33c2 | 1966 | |
075691af | 1967 | (define_insn "<sse>_vm<code><mode>3<round_saeonly_name>" |
3f97cb0b | 1968 | [(set (match_operand:VF_128 0 "register_operand" "=x,v") |
07c0852e UB |
1969 | (vec_merge:VF_128 |
1970 | (smaxmin:VF_128 | |
3f97cb0b | 1971 | (match_operand:VF_128 1 "register_operand" "0,v") |
075691af | 1972 | (match_operand:VF_128 2 "nonimmediate_operand" "xm,<round_saeonly_constraint>")) |
115a33c2 RH |
1973 | (match_dup 1) |
1974 | (const_int 1)))] | |
6bec6c98 | 1975 | "TARGET_SSE" |
07c0852e | 1976 | "@ |
eabb5f48 | 1977 | <maxmin_float><ssescalarmodesuffix>\t{%2, %0|%0, %<iptr>2} |
075691af | 1978 | v<maxmin_float><ssescalarmodesuffix>\t{<round_saeonly_op3>%2, %1, %0|%0, %1, %<iptr>2<round_saeonly_op3>}" |
07c0852e UB |
1979 | [(set_attr "isa" "noavx,avx") |
1980 | (set_attr "type" "sse") | |
01284895 | 1981 | (set_attr "btver2_sse_attr" "maxmin") |
075691af | 1982 | (set_attr "prefix" "<round_saeonly_prefix>") |
d6023b50 | 1983 | (set_attr "mode" "<ssescalarmode>")]) |
ef719a44 | 1984 | |
79e72538 UB |
1985 | ;; These versions of the min/max patterns implement exactly the operations |
1986 | ;; min = (op1 < op2 ? op1 : op2) | |
1987 | ;; max = (!(op1 < op2) ? op1 : op2) | |
1988 | ;; Their operands are not commutative, and thus they may be used in the | |
1989 | ;; presence of -0.0 and NaN. | |
1990 | ||
d6023b50 | 1991 | (define_insn "*ieee_smin<mode>3" |
b86f6e9e | 1992 | [(set (match_operand:VF 0 "register_operand" "=v,v") |
07c0852e | 1993 | (unspec:VF |
b86f6e9e AI |
1994 | [(match_operand:VF 1 "register_operand" "0,v") |
1995 | (match_operand:VF 2 "nonimmediate_operand" "vm,vm")] | |
d6023b50 | 1996 | UNSPEC_IEEE_MIN))] |
6bec6c98 | 1997 | "TARGET_SSE" |
07c0852e | 1998 | "@ |
266446be L |
1999 | min<ssemodesuffix>\t{%2, %0|%0, %2} |
2000 | vmin<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}" | |
07c0852e UB |
2001 | [(set_attr "isa" "noavx,avx") |
2002 | (set_attr "type" "sseadd") | |
2003 | (set_attr "prefix" "orig,vex") | |
d6023b50 | 2004 | (set_attr "mode" "<MODE>")]) |
79e72538 | 2005 | |
d6023b50 | 2006 | (define_insn "*ieee_smax<mode>3" |
b86f6e9e | 2007 | [(set (match_operand:VF 0 "register_operand" "=v,v") |
07c0852e | 2008 | (unspec:VF |
b86f6e9e AI |
2009 | [(match_operand:VF 1 "register_operand" "0,v") |
2010 | (match_operand:VF 2 "nonimmediate_operand" "vm,vm")] | |
d6023b50 | 2011 | UNSPEC_IEEE_MAX))] |
6bec6c98 | 2012 | "TARGET_SSE" |
07c0852e | 2013 | "@ |
266446be L |
2014 | max<ssemodesuffix>\t{%2, %0|%0, %2} |
2015 | vmax<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}" | |
07c0852e UB |
2016 | [(set_attr "isa" "noavx,avx") |
2017 | (set_attr "type" "sseadd") | |
2018 | (set_attr "prefix" "orig,vex") | |
d6023b50 | 2019 | (set_attr "mode" "<MODE>")]) |
79e72538 | 2020 | |
95879c72 L |
2021 | (define_insn "avx_addsubv4df3" |
2022 | [(set (match_operand:V4DF 0 "register_operand" "=x") | |
2023 | (vec_merge:V4DF | |
2024 | (plus:V4DF | |
2025 | (match_operand:V4DF 1 "register_operand" "x") | |
2026 | (match_operand:V4DF 2 "nonimmediate_operand" "xm")) | |
2027 | (minus:V4DF (match_dup 1) (match_dup 2)) | |
cec8874f | 2028 | (const_int 10)))] |
95879c72 L |
2029 | "TARGET_AVX" |
2030 | "vaddsubpd\t{%2, %1, %0|%0, %1, %2}" | |
2031 | [(set_attr "type" "sseadd") | |
2032 | (set_attr "prefix" "vex") | |
2033 | (set_attr "mode" "V4DF")]) | |
2034 | ||
07c0852e UB |
2035 | (define_insn "sse3_addsubv2df3" |
2036 | [(set (match_operand:V2DF 0 "register_operand" "=x,x") | |
2037 | (vec_merge:V2DF | |
2038 | (plus:V2DF | |
2039 | (match_operand:V2DF 1 "register_operand" "0,x") | |
2040 | (match_operand:V2DF 2 "nonimmediate_operand" "xm,xm")) | |
2041 | (minus:V2DF (match_dup 1) (match_dup 2)) | |
2042 | (const_int 2)))] | |
2043 | "TARGET_SSE3" | |
2044 | "@ | |
2045 | addsubpd\t{%2, %0|%0, %2} | |
2046 | vaddsubpd\t{%2, %1, %0|%0, %1, %2}" | |
2047 | [(set_attr "isa" "noavx,avx") | |
2048 | (set_attr "type" "sseadd") | |
2049 | (set_attr "atom_unit" "complex") | |
2050 | (set_attr "prefix" "orig,vex") | |
2051 | (set_attr "mode" "V2DF")]) | |
2052 | ||
2053 | (define_insn "avx_addsubv8sf3" | |
2054 | [(set (match_operand:V8SF 0 "register_operand" "=x") | |
2055 | (vec_merge:V8SF | |
2056 | (plus:V8SF | |
2057 | (match_operand:V8SF 1 "register_operand" "x") | |
2058 | (match_operand:V8SF 2 "nonimmediate_operand" "xm")) | |
2059 | (minus:V8SF (match_dup 1) (match_dup 2)) | |
2060 | (const_int 170)))] | |
95879c72 L |
2061 | "TARGET_AVX" |
2062 | "vaddsubps\t{%2, %1, %0|%0, %1, %2}" | |
2063 | [(set_attr "type" "sseadd") | |
2064 | (set_attr "prefix" "vex") | |
07c0852e | 2065 | (set_attr "mode" "V8SF")]) |
95879c72 | 2066 | |
ef719a44 | 2067 | (define_insn "sse3_addsubv4sf3" |
07c0852e | 2068 | [(set (match_operand:V4SF 0 "register_operand" "=x,x") |
ef719a44 RH |
2069 | (vec_merge:V4SF |
2070 | (plus:V4SF | |
07c0852e UB |
2071 | (match_operand:V4SF 1 "register_operand" "0,x") |
2072 | (match_operand:V4SF 2 "nonimmediate_operand" "xm,xm")) | |
ef719a44 | 2073 | (minus:V4SF (match_dup 1) (match_dup 2)) |
cec8874f | 2074 | (const_int 10)))] |
ef719a44 | 2075 | "TARGET_SSE3" |
07c0852e UB |
2076 | "@ |
2077 | addsubps\t{%2, %0|%0, %2} | |
2078 | vaddsubps\t{%2, %1, %0|%0, %1, %2}" | |
2079 | [(set_attr "isa" "noavx,avx") | |
2080 | (set_attr "type" "sseadd") | |
2081 | (set_attr "prefix" "orig,vex") | |
2082 | (set_attr "prefix_rep" "1,*") | |
ef719a44 RH |
2083 | (set_attr "mode" "V4SF")]) |
2084 | ||
95879c72 L |
2085 | (define_insn "avx_h<plusminus_insn>v4df3" |
2086 | [(set (match_operand:V4DF 0 "register_operand" "=x") | |
2087 | (vec_concat:V4DF | |
2088 | (vec_concat:V2DF | |
2089 | (plusminus:DF | |
2090 | (vec_select:DF | |
2091 | (match_operand:V4DF 1 "register_operand" "x") | |
2092 | (parallel [(const_int 0)])) | |
2093 | (vec_select:DF (match_dup 1) (parallel [(const_int 1)]))) | |
95879c72 L |
2094 | (plusminus:DF |
2095 | (vec_select:DF | |
2096 | (match_operand:V4DF 2 "nonimmediate_operand" "xm") | |
2097 | (parallel [(const_int 0)])) | |
a15d0d03 UB |
2098 | (vec_select:DF (match_dup 2) (parallel [(const_int 1)])))) |
2099 | (vec_concat:V2DF | |
2100 | (plusminus:DF | |
2101 | (vec_select:DF (match_dup 1) (parallel [(const_int 2)])) | |
2102 | (vec_select:DF (match_dup 1) (parallel [(const_int 3)]))) | |
95879c72 L |
2103 | (plusminus:DF |
2104 | (vec_select:DF (match_dup 2) (parallel [(const_int 2)])) | |
2105 | (vec_select:DF (match_dup 2) (parallel [(const_int 3)]))))))] | |
2106 | "TARGET_AVX" | |
2107 | "vh<plusminus_mnemonic>pd\t{%2, %1, %0|%0, %1, %2}" | |
2108 | [(set_attr "type" "sseadd") | |
2109 | (set_attr "prefix" "vex") | |
2110 | (set_attr "mode" "V4DF")]) | |
2111 | ||
b790dea2 MG |
2112 | (define_expand "sse3_haddv2df3" |
2113 | [(set (match_operand:V2DF 0 "register_operand") | |
2114 | (vec_concat:V2DF | |
2115 | (plus:DF | |
2116 | (vec_select:DF | |
2117 | (match_operand:V2DF 1 "register_operand") | |
2118 | (parallel [(const_int 0)])) | |
2119 | (vec_select:DF (match_dup 1) (parallel [(const_int 1)]))) | |
2120 | (plus:DF | |
2121 | (vec_select:DF | |
2122 | (match_operand:V2DF 2 "nonimmediate_operand") | |
2123 | (parallel [(const_int 0)])) | |
2124 | (vec_select:DF (match_dup 2) (parallel [(const_int 1)])))))] | |
2125 | "TARGET_SSE3") | |
2126 | ||
2127 | (define_insn "*sse3_haddv2df3" | |
07c0852e UB |
2128 | [(set (match_operand:V2DF 0 "register_operand" "=x,x") |
2129 | (vec_concat:V2DF | |
b790dea2 MG |
2130 | (plus:DF |
2131 | (vec_select:DF | |
2132 | (match_operand:V2DF 1 "register_operand" "0,x") | |
2133 | (parallel [(match_operand:SI 3 "const_0_to_1_operand")])) | |
2134 | (vec_select:DF | |
2135 | (match_dup 1) | |
2136 | (parallel [(match_operand:SI 4 "const_0_to_1_operand")]))) | |
2137 | (plus:DF | |
2138 | (vec_select:DF | |
2139 | (match_operand:V2DF 2 "nonimmediate_operand" "xm,xm") | |
2140 | (parallel [(match_operand:SI 5 "const_0_to_1_operand")])) | |
2141 | (vec_select:DF | |
2142 | (match_dup 2) | |
2143 | (parallel [(match_operand:SI 6 "const_0_to_1_operand")])))))] | |
2144 | "TARGET_SSE3 | |
2145 | && INTVAL (operands[3]) != INTVAL (operands[4]) | |
2146 | && INTVAL (operands[5]) != INTVAL (operands[6])" | |
2147 | "@ | |
2148 | haddpd\t{%2, %0|%0, %2} | |
2149 | vhaddpd\t{%2, %1, %0|%0, %1, %2}" | |
2150 | [(set_attr "isa" "noavx,avx") | |
2151 | (set_attr "type" "sseadd") | |
2152 | (set_attr "prefix" "orig,vex") | |
2153 | (set_attr "mode" "V2DF")]) | |
2154 | ||
2155 | (define_insn "sse3_hsubv2df3" | |
2156 | [(set (match_operand:V2DF 0 "register_operand" "=x,x") | |
2157 | (vec_concat:V2DF | |
2158 | (minus:DF | |
07c0852e UB |
2159 | (vec_select:DF |
2160 | (match_operand:V2DF 1 "register_operand" "0,x") | |
2161 | (parallel [(const_int 0)])) | |
2162 | (vec_select:DF (match_dup 1) (parallel [(const_int 1)]))) | |
b790dea2 | 2163 | (minus:DF |
07c0852e UB |
2164 | (vec_select:DF |
2165 | (match_operand:V2DF 2 "nonimmediate_operand" "xm,xm") | |
2166 | (parallel [(const_int 0)])) | |
2167 | (vec_select:DF (match_dup 2) (parallel [(const_int 1)])))))] | |
2168 | "TARGET_SSE3" | |
2169 | "@ | |
b790dea2 MG |
2170 | hsubpd\t{%2, %0|%0, %2} |
2171 | vhsubpd\t{%2, %1, %0|%0, %1, %2}" | |
07c0852e UB |
2172 | [(set_attr "isa" "noavx,avx") |
2173 | (set_attr "type" "sseadd") | |
2174 | (set_attr "prefix" "orig,vex") | |
2175 | (set_attr "mode" "V2DF")]) | |
2176 | ||
b790dea2 MG |
2177 | (define_insn "*sse3_haddv2df3_low" |
2178 | [(set (match_operand:DF 0 "register_operand" "=x,x") | |
2179 | (plus:DF | |
2180 | (vec_select:DF | |
2181 | (match_operand:V2DF 1 "register_operand" "0,x") | |
2182 | (parallel [(match_operand:SI 2 "const_0_to_1_operand")])) | |
2183 | (vec_select:DF | |
2184 | (match_dup 1) | |
2185 | (parallel [(match_operand:SI 3 "const_0_to_1_operand")]))))] | |
2186 | "TARGET_SSE3 | |
2187 | && INTVAL (operands[2]) != INTVAL (operands[3])" | |
2188 | "@ | |
2189 | haddpd\t{%0, %0|%0, %0} | |
2190 | vhaddpd\t{%1, %1, %0|%0, %1, %1}" | |
2191 | [(set_attr "isa" "noavx,avx") | |
2192 | (set_attr "type" "sseadd1") | |
2193 | (set_attr "prefix" "orig,vex") | |
2194 | (set_attr "mode" "V2DF")]) | |
2195 | ||
2196 | (define_insn "*sse3_hsubv2df3_low" | |
2197 | [(set (match_operand:DF 0 "register_operand" "=x,x") | |
2198 | (minus:DF | |
2199 | (vec_select:DF | |
2200 | (match_operand:V2DF 1 "register_operand" "0,x") | |
2201 | (parallel [(const_int 0)])) | |
2202 | (vec_select:DF | |
2203 | (match_dup 1) | |
2204 | (parallel [(const_int 1)]))))] | |
2205 | "TARGET_SSE3" | |
2206 | "@ | |
2207 | hsubpd\t{%0, %0|%0, %0} | |
2208 | vhsubpd\t{%1, %1, %0|%0, %1, %1}" | |
2209 | [(set_attr "isa" "noavx,avx") | |
2210 | (set_attr "type" "sseadd1") | |
2211 | (set_attr "prefix" "orig,vex") | |
2212 | (set_attr "mode" "V2DF")]) | |
2213 | ||
95879c72 L |
2214 | (define_insn "avx_h<plusminus_insn>v8sf3" |
2215 | [(set (match_operand:V8SF 0 "register_operand" "=x") | |
2216 | (vec_concat:V8SF | |
2217 | (vec_concat:V4SF | |
2218 | (vec_concat:V2SF | |
2219 | (plusminus:SF | |
2220 | (vec_select:SF | |
2221 | (match_operand:V8SF 1 "register_operand" "x") | |
2222 | (parallel [(const_int 0)])) | |
2223 | (vec_select:SF (match_dup 1) (parallel [(const_int 1)]))) | |
2224 | (plusminus:SF | |
2225 | (vec_select:SF (match_dup 1) (parallel [(const_int 2)])) | |
2226 | (vec_select:SF (match_dup 1) (parallel [(const_int 3)])))) | |
2227 | (vec_concat:V2SF | |
2228 | (plusminus:SF | |
2229 | (vec_select:SF | |
2230 | (match_operand:V8SF 2 "nonimmediate_operand" "xm") | |
2231 | (parallel [(const_int 0)])) | |
2232 | (vec_select:SF (match_dup 2) (parallel [(const_int 1)]))) | |
2233 | (plusminus:SF | |
2234 | (vec_select:SF (match_dup 2) (parallel [(const_int 2)])) | |
2235 | (vec_select:SF (match_dup 2) (parallel [(const_int 3)]))))) | |
2236 | (vec_concat:V4SF | |
2237 | (vec_concat:V2SF | |
2238 | (plusminus:SF | |
2239 | (vec_select:SF (match_dup 1) (parallel [(const_int 4)])) | |
2240 | (vec_select:SF (match_dup 1) (parallel [(const_int 5)]))) | |
2241 | (plusminus:SF | |
2242 | (vec_select:SF (match_dup 1) (parallel [(const_int 6)])) | |
2243 | (vec_select:SF (match_dup 1) (parallel [(const_int 7)])))) | |
2244 | (vec_concat:V2SF | |
2245 | (plusminus:SF | |
2246 | (vec_select:SF (match_dup 2) (parallel [(const_int 4)])) | |
2247 | (vec_select:SF (match_dup 2) (parallel [(const_int 5)]))) | |
2248 | (plusminus:SF | |
2249 | (vec_select:SF (match_dup 2) (parallel [(const_int 6)])) | |
2250 | (vec_select:SF (match_dup 2) (parallel [(const_int 7)])))))))] | |
2251 | "TARGET_AVX" | |
2252 | "vh<plusminus_mnemonic>ps\t{%2, %1, %0|%0, %1, %2}" | |
2253 | [(set_attr "type" "sseadd") | |
2254 | (set_attr "prefix" "vex") | |
2255 | (set_attr "mode" "V8SF")]) | |
2256 | ||
d1c3b587 | 2257 | (define_insn "sse3_h<plusminus_insn>v4sf3" |
07c0852e | 2258 | [(set (match_operand:V4SF 0 "register_operand" "=x,x") |
d6023b50 UB |
2259 | (vec_concat:V4SF |
2260 | (vec_concat:V2SF | |
78e8956b | 2261 | (plusminus:SF |
d6023b50 | 2262 | (vec_select:SF |
07c0852e | 2263 | (match_operand:V4SF 1 "register_operand" "0,x") |
d6023b50 UB |
2264 | (parallel [(const_int 0)])) |
2265 | (vec_select:SF (match_dup 1) (parallel [(const_int 1)]))) | |
78e8956b | 2266 | (plusminus:SF |
ef719a44 RH |
2267 | (vec_select:SF (match_dup 1) (parallel [(const_int 2)])) |
2268 | (vec_select:SF (match_dup 1) (parallel [(const_int 3)])))) | |
2269 | (vec_concat:V2SF | |
78e8956b | 2270 | (plusminus:SF |
ef719a44 | 2271 | (vec_select:SF |
07c0852e | 2272 | (match_operand:V4SF 2 "nonimmediate_operand" "xm,xm") |
ef719a44 RH |
2273 | (parallel [(const_int 0)])) |
2274 | (vec_select:SF (match_dup 2) (parallel [(const_int 1)]))) | |
78e8956b | 2275 | (plusminus:SF |
ef719a44 RH |
2276 | (vec_select:SF (match_dup 2) (parallel [(const_int 2)])) |
2277 | (vec_select:SF (match_dup 2) (parallel [(const_int 3)]))))))] | |
2278 | "TARGET_SSE3" | |
07c0852e UB |
2279 | "@ |
2280 | h<plusminus_mnemonic>ps\t{%2, %0|%0, %2} | |
2281 | vh<plusminus_mnemonic>ps\t{%2, %1, %0|%0, %1, %2}" | |
2282 | [(set_attr "isa" "noavx,avx") | |
2283 | (set_attr "type" "sseadd") | |
b6837b94 | 2284 | (set_attr "atom_unit" "complex") |
07c0852e UB |
2285 | (set_attr "prefix" "orig,vex") |
2286 | (set_attr "prefix_rep" "1,*") | |
ef719a44 RH |
2287 | (set_attr "mode" "V4SF")]) |
2288 | ||
2e2206fa AI |
2289 | (define_expand "reduc_splus_v8df" |
2290 | [(match_operand:V8DF 0 "register_operand") | |
2291 | (match_operand:V8DF 1 "register_operand")] | |
2292 | "TARGET_AVX512F" | |
2293 | { | |
2294 | ix86_expand_reduc (gen_addv8df3, operands[0], operands[1]); | |
2295 | DONE; | |
2296 | }) | |
2297 | ||
07c0852e | 2298 | (define_expand "reduc_splus_v4df" |
82e86dc6 UB |
2299 | [(match_operand:V4DF 0 "register_operand") |
2300 | (match_operand:V4DF 1 "register_operand")] | |
95879c72 | 2301 | "TARGET_AVX" |
07c0852e UB |
2302 | { |
2303 | rtx tmp = gen_reg_rtx (V4DFmode); | |
2304 | rtx tmp2 = gen_reg_rtx (V4DFmode); | |
2305 | emit_insn (gen_avx_haddv4df3 (tmp, operands[1], operands[1])); | |
2306 | emit_insn (gen_avx_vperm2f128v4df3 (tmp2, tmp, tmp, GEN_INT (1))); | |
2307 | emit_insn (gen_addv4df3 (operands[0], tmp, tmp2)); | |
2308 | DONE; | |
2309 | }) | |
95879c72 | 2310 | |
07c0852e | 2311 | (define_expand "reduc_splus_v2df" |
82e86dc6 UB |
2312 | [(match_operand:V2DF 0 "register_operand") |
2313 | (match_operand:V2DF 1 "register_operand")] | |
d6023b50 | 2314 | "TARGET_SSE3" |
07c0852e UB |
2315 | { |
2316 | emit_insn (gen_sse3_haddv2df3 (operands[0], operands[1], operands[1])); | |
2317 | DONE; | |
2318 | }) | |
d6023b50 | 2319 | |
2e2206fa AI |
2320 | (define_expand "reduc_splus_v16sf" |
2321 | [(match_operand:V16SF 0 "register_operand") | |
2322 | (match_operand:V16SF 1 "register_operand")] | |
2323 | "TARGET_AVX512F" | |
2324 | { | |
2325 | ix86_expand_reduc (gen_addv16sf3, operands[0], operands[1]); | |
2326 | DONE; | |
2327 | }) | |
2328 | ||
1e27129f | 2329 | (define_expand "reduc_splus_v8sf" |
82e86dc6 UB |
2330 | [(match_operand:V8SF 0 "register_operand") |
2331 | (match_operand:V8SF 1 "register_operand")] | |
1e27129f L |
2332 | "TARGET_AVX" |
2333 | { | |
2334 | rtx tmp = gen_reg_rtx (V8SFmode); | |
2335 | rtx tmp2 = gen_reg_rtx (V8SFmode); | |
2336 | emit_insn (gen_avx_haddv8sf3 (tmp, operands[1], operands[1])); | |
b9121f42 L |
2337 | emit_insn (gen_avx_haddv8sf3 (tmp2, tmp, tmp)); |
2338 | emit_insn (gen_avx_vperm2f128v8sf3 (tmp, tmp2, tmp2, GEN_INT (1))); | |
2339 | emit_insn (gen_addv8sf3 (operands[0], tmp, tmp2)); | |
1e27129f L |
2340 | DONE; |
2341 | }) | |
2342 | ||
e20524fa | 2343 | (define_expand "reduc_splus_v4sf" |
82e86dc6 UB |
2344 | [(match_operand:V4SF 0 "register_operand") |
2345 | (match_operand:V4SF 1 "register_operand")] | |
2ab1754e RH |
2346 | "TARGET_SSE" |
2347 | { | |
2348 | if (TARGET_SSE3) | |
2349 | { | |
2350 | rtx tmp = gen_reg_rtx (V4SFmode); | |
2351 | emit_insn (gen_sse3_haddv4sf3 (tmp, operands[1], operands[1])); | |
2352 | emit_insn (gen_sse3_haddv4sf3 (operands[0], tmp, tmp)); | |
2353 | } | |
2354 | else | |
c0b0ee6f | 2355 | ix86_expand_reduc (gen_addv4sf3, operands[0], operands[1]); |
2ab1754e RH |
2356 | DONE; |
2357 | }) | |
2358 | ||
8a0436cb JJ |
2359 | ;; Modes handled by reduc_sm{in,ax}* patterns. |
2360 | (define_mode_iterator REDUC_SMINMAX_MODE | |
2361 | [(V32QI "TARGET_AVX2") (V16HI "TARGET_AVX2") | |
2362 | (V8SI "TARGET_AVX2") (V4DI "TARGET_AVX2") | |
2363 | (V8SF "TARGET_AVX") (V4DF "TARGET_AVX") | |
805e20ad AI |
2364 | (V4SF "TARGET_SSE") (V64QI "TARGET_AVX512BW") |
2365 | (V32HI "TARGET_AVX512BW") (V16SI "TARGET_AVX512F") | |
0fe65b75 AI |
2366 | (V8DI "TARGET_AVX512F") (V16SF "TARGET_AVX512F") |
2367 | (V8DF "TARGET_AVX512F")]) | |
8a0436cb JJ |
2368 | |
2369 | (define_expand "reduc_<code>_<mode>" | |
2370 | [(smaxmin:REDUC_SMINMAX_MODE | |
82e86dc6 UB |
2371 | (match_operand:REDUC_SMINMAX_MODE 0 "register_operand") |
2372 | (match_operand:REDUC_SMINMAX_MODE 1 "register_operand"))] | |
8a0436cb | 2373 | "" |
2ab1754e | 2374 | { |
8a0436cb | 2375 | ix86_expand_reduc (gen_<code><mode>3, operands[0], operands[1]); |
c0b0ee6f JJ |
2376 | DONE; |
2377 | }) | |
2378 | ||
0fe65b75 | 2379 | (define_expand "reduc_<code>_<mode>" |
0ab03ea0 AI |
2380 | [(umaxmin:VI_AVX512BW |
2381 | (match_operand:VI_AVX512BW 0 "register_operand") | |
2382 | (match_operand:VI_AVX512BW 1 "register_operand"))] | |
0fe65b75 AI |
2383 | "TARGET_AVX512F" |
2384 | { | |
2385 | ix86_expand_reduc (gen_<code><mode>3, operands[0], operands[1]); | |
2386 | DONE; | |
2387 | }) | |
2388 | ||
8a0436cb JJ |
2389 | (define_expand "reduc_<code>_<mode>" |
2390 | [(umaxmin:VI_256 | |
82e86dc6 UB |
2391 | (match_operand:VI_256 0 "register_operand") |
2392 | (match_operand:VI_256 1 "register_operand"))] | |
8a0436cb | 2393 | "TARGET_AVX2" |
c0b0ee6f | 2394 | { |
8a0436cb | 2395 | ix86_expand_reduc (gen_<code><mode>3, operands[0], operands[1]); |
2ab1754e RH |
2396 | DONE; |
2397 | }) | |
2398 | ||
a520f3c3 JJ |
2399 | (define_expand "reduc_umin_v8hi" |
2400 | [(umin:V8HI | |
82e86dc6 UB |
2401 | (match_operand:V8HI 0 "register_operand") |
2402 | (match_operand:V8HI 1 "register_operand"))] | |
a520f3c3 JJ |
2403 | "TARGET_SSE4_1" |
2404 | { | |
2405 | ix86_expand_reduc (gen_uminv8hi3, operands[0], operands[1]); | |
2406 | DONE; | |
2407 | }) | |
2408 | ||
b9826286 AI |
2409 | (define_insn "<mask_codefor>reducep<mode><mask_name>" |
2410 | [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v") | |
2411 | (unspec:VF_AVX512VL | |
2412 | [(match_operand:VF_AVX512VL 1 "nonimmediate_operand" "vm") | |
2413 | (match_operand:SI 2 "const_0_to_255_operand")] | |
2414 | UNSPEC_REDUCE))] | |
2415 | "TARGET_AVX512DQ" | |
2416 | "vreduce<ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}" | |
2417 | [(set_attr "type" "sse") | |
2418 | (set_attr "prefix" "evex") | |
2419 | (set_attr "mode" "<MODE>")]) | |
2420 | ||
2421 | (define_insn "reduces<mode>" | |
2422 | [(set (match_operand:VF_128 0 "register_operand" "=v") | |
2423 | (vec_merge:VF_128 | |
2424 | (unspec:VF_128 | |
2425 | [(match_operand:VF_128 1 "register_operand" "v") | |
2426 | (match_operand:VF_128 2 "nonimmediate_operand" "vm") | |
2427 | (match_operand:SI 3 "const_0_to_255_operand")] | |
2428 | UNSPEC_REDUCE) | |
2429 | (match_dup 1) | |
2430 | (const_int 1)))] | |
2431 | "TARGET_AVX512DQ" | |
2432 | "vreduce<ssescalarmodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}" | |
2433 | [(set_attr "type" "sse") | |
2434 | (set_attr "prefix" "evex") | |
2435 | (set_attr "mode" "<MODE>")]) | |
2436 | ||
ef719a44 RH |
2437 | ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; |
2438 | ;; | |
d6023b50 | 2439 | ;; Parallel floating point comparisons |
ef719a44 RH |
2440 | ;; |
2441 | ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; | |
2442 | ||
07c0852e | 2443 | (define_insn "avx_cmp<mode>3" |
b86f6e9e AI |
2444 | [(set (match_operand:VF_128_256 0 "register_operand" "=x") |
2445 | (unspec:VF_128_256 | |
2446 | [(match_operand:VF_128_256 1 "register_operand" "x") | |
2447 | (match_operand:VF_128_256 2 "nonimmediate_operand" "xm") | |
95879c72 L |
2448 | (match_operand:SI 3 "const_0_to_31_operand" "n")] |
2449 | UNSPEC_PCMP))] | |
2450 | "TARGET_AVX" | |
1c154a23 | 2451 | "vcmp<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}" |
95879c72 | 2452 | [(set_attr "type" "ssecmp") |
725fd454 | 2453 | (set_attr "length_immediate" "1") |
95879c72 L |
2454 | (set_attr "prefix" "vex") |
2455 | (set_attr "mode" "<MODE>")]) | |
2456 | ||
07c0852e UB |
2457 | (define_insn "avx_vmcmp<mode>3" |
2458 | [(set (match_operand:VF_128 0 "register_operand" "=x") | |
2459 | (vec_merge:VF_128 | |
2460 | (unspec:VF_128 | |
2461 | [(match_operand:VF_128 1 "register_operand" "x") | |
2462 | (match_operand:VF_128 2 "nonimmediate_operand" "xm") | |
95879c72 L |
2463 | (match_operand:SI 3 "const_0_to_31_operand" "n")] |
2464 | UNSPEC_PCMP) | |
2465 | (match_dup 1) | |
2466 | (const_int 1)))] | |
2467 | "TARGET_AVX" | |
eabb5f48 | 2468 | "vcmp<ssescalarmodesuffix>\t{%3, %2, %1, %0|%0, %1, %<iptr>2, %3}" |
95879c72 | 2469 | [(set_attr "type" "ssecmp") |
725fd454 | 2470 | (set_attr "length_immediate" "1") |
95879c72 L |
2471 | (set_attr "prefix" "vex") |
2472 | (set_attr "mode" "<ssescalarmode>")]) | |
2473 | ||
c497c412 | 2474 | (define_insn "*<sse>_maskcmp<mode>3_comm" |
b86f6e9e AI |
2475 | [(set (match_operand:VF_128_256 0 "register_operand" "=x,x") |
2476 | (match_operator:VF_128_256 3 "sse_comparison_operator" | |
2477 | [(match_operand:VF_128_256 1 "register_operand" "%0,x") | |
2478 | (match_operand:VF_128_256 2 "nonimmediate_operand" "xm,xm")]))] | |
c497c412 UB |
2479 | "TARGET_SSE |
2480 | && GET_RTX_CLASS (GET_CODE (operands[3])) == RTX_COMM_COMPARE" | |
2481 | "@ | |
2482 | cmp%D3<ssemodesuffix>\t{%2, %0|%0, %2} | |
2483 | vcmp%D3<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}" | |
2484 | [(set_attr "isa" "noavx,avx") | |
2485 | (set_attr "type" "ssecmp") | |
2486 | (set_attr "length_immediate" "1") | |
2487 | (set_attr "prefix" "orig,vex") | |
2488 | (set_attr "mode" "<MODE>")]) | |
2489 | ||
95879c72 | 2490 | (define_insn "<sse>_maskcmp<mode>3" |
b86f6e9e AI |
2491 | [(set (match_operand:VF_128_256 0 "register_operand" "=x,x") |
2492 | (match_operator:VF_128_256 3 "sse_comparison_operator" | |
2493 | [(match_operand:VF_128_256 1 "register_operand" "0,x") | |
2494 | (match_operand:VF_128_256 2 "nonimmediate_operand" "xm,xm")]))] | |
6bec6c98 | 2495 | "TARGET_SSE" |
07c0852e UB |
2496 | "@ |
2497 | cmp%D3<ssemodesuffix>\t{%2, %0|%0, %2} | |
2498 | vcmp%D3<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}" | |
2499 | [(set_attr "isa" "noavx,avx") | |
2500 | (set_attr "type" "ssecmp") | |
725fd454 | 2501 | (set_attr "length_immediate" "1") |
07c0852e | 2502 | (set_attr "prefix" "orig,vex") |
d6023b50 | 2503 | (set_attr "mode" "<MODE>")]) |
d096ecdd | 2504 | |
d6023b50 | 2505 | (define_insn "<sse>_vmmaskcmp<mode>3" |
07c0852e UB |
2506 | [(set (match_operand:VF_128 0 "register_operand" "=x,x") |
2507 | (vec_merge:VF_128 | |
2508 | (match_operator:VF_128 3 "sse_comparison_operator" | |
2509 | [(match_operand:VF_128 1 "register_operand" "0,x") | |
2510 | (match_operand:VF_128 2 "nonimmediate_operand" "xm,xm")]) | |
ef719a44 RH |
2511 | (match_dup 1) |
2512 | (const_int 1)))] | |
6bec6c98 | 2513 | "TARGET_SSE" |
07c0852e | 2514 | "@ |
eabb5f48 UB |
2515 | cmp%D3<ssescalarmodesuffix>\t{%2, %0|%0, %<iptr>2} |
2516 | vcmp%D3<ssescalarmodesuffix>\t{%2, %1, %0|%0, %1, %<iptr>2}" | |
07c0852e UB |
2517 | [(set_attr "isa" "noavx,avx") |
2518 | (set_attr "type" "ssecmp") | |
2519 | (set_attr "length_immediate" "1,*") | |
2520 | (set_attr "prefix" "orig,vex") | |
d6023b50 | 2521 | (set_attr "mode" "<ssescalarmode>")]) |
ef719a44 | 2522 | |
ab931c71 | 2523 | (define_mode_attr cmp_imm_predicate |
51e14b05 AI |
2524 | [(V16SF "const_0_to_31_operand") (V8DF "const_0_to_31_operand") |
2525 | (V16SI "const_0_to_7_operand") (V8DI "const_0_to_7_operand") | |
2526 | (V8SF "const_0_to_31_operand") (V4DF "const_0_to_31_operand") | |
2527 | (V8SI "const_0_to_7_operand") (V4DI "const_0_to_7_operand") | |
2528 | (V4SF "const_0_to_31_operand") (V2DF "const_0_to_31_operand") | |
2529 | (V4SI "const_0_to_7_operand") (V2DI "const_0_to_7_operand") | |
2530 | (V32HI "const_0_to_7_operand") (V64QI "const_0_to_7_operand") | |
2531 | (V16HI "const_0_to_7_operand") (V32QI "const_0_to_7_operand") | |
2532 | (V8HI "const_0_to_7_operand") (V16QI "const_0_to_7_operand")]) | |
2533 | ||
2534 | (define_insn "<avx512>_cmp<mode>3<mask_scalar_merge_name><round_saeonly_name>" | |
be792bce | 2535 | [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=Yk") |
ab931c71 | 2536 | (unspec:<avx512fmaskmode> |
51e14b05 AI |
2537 | [(match_operand:V48_AVX512VL 1 "register_operand" "v") |
2538 | (match_operand:V48_AVX512VL 2 "nonimmediate_operand" "<round_saeonly_constraint>") | |
ab931c71 AI |
2539 | (match_operand:SI 3 "<cmp_imm_predicate>" "n")] |
2540 | UNSPEC_PCMP))] | |
8a6ef760 AI |
2541 | "TARGET_AVX512F && <round_saeonly_mode512bit_condition>" |
2542 | "v<sseintprefix>cmp<ssemodesuffix>\t{%3, <round_saeonly_mask_scalar_merge_op4>%2, %1, %0<mask_scalar_merge_operand4>|%0<mask_scalar_merge_operand4>, %1, %2<round_saeonly_mask_scalar_merge_op4>, %3}" | |
ab931c71 AI |
2543 | [(set_attr "type" "ssecmp") |
2544 | (set_attr "length_immediate" "1") | |
2545 | (set_attr "prefix" "evex") | |
2546 | (set_attr "mode" "<sseinsnmode>")]) | |
2547 | ||
51e14b05 AI |
2548 | (define_insn "<avx512>_cmp<mode>3<mask_scalar_merge_name>" |
2549 | [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=Yk") | |
2550 | (unspec:<avx512fmaskmode> | |
2551 | [(match_operand:VI12_AVX512VL 1 "register_operand" "v") | |
2552 | (match_operand:VI12_AVX512VL 2 "nonimmediate_operand" "vm") | |
2553 | (match_operand:SI 3 "<cmp_imm_predicate>" "n")] | |
2554 | UNSPEC_PCMP))] | |
2555 | "TARGET_AVX512BW" | |
2556 | "vpcmp<ssemodesuffix>\t{%3, %2, %1, %0<mask_scalar_merge_operand4>|%0<mask_scalar_merge_operand4>, %1, %2, %3}" | |
2557 | [(set_attr "type" "ssecmp") | |
2558 | (set_attr "length_immediate" "1") | |
2559 | (set_attr "prefix" "evex") | |
2560 | (set_attr "mode" "<sseinsnmode>")]) | |
2561 | ||
54967fb0 | 2562 | (define_insn "<avx512>_ucmp<mode>3<mask_scalar_merge_name>" |
be792bce | 2563 | [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=Yk") |
0fe65b75 | 2564 | (unspec:<avx512fmaskmode> |
54967fb0 AI |
2565 | [(match_operand:VI12_AVX512VL 1 "register_operand" "v") |
2566 | (match_operand:VI12_AVX512VL 2 "nonimmediate_operand" "vm") | |
2567 | (match_operand:SI 3 "const_0_to_7_operand" "n")] | |
2568 | UNSPEC_UNSIGNED_PCMP))] | |
2569 | "TARGET_AVX512BW" | |
2570 | "vpcmpu<ssemodesuffix>\t{%3, %2, %1, %0<mask_scalar_merge_operand4>|%0<mask_scalar_merge_operand4>, %1, %2, %3}" | |
2571 | [(set_attr "type" "ssecmp") | |
2572 | (set_attr "length_immediate" "1") | |
2573 | (set_attr "prefix" "evex") | |
2574 | (set_attr "mode" "<sseinsnmode>")]) | |
2575 | ||
2576 | (define_insn "<avx512>_ucmp<mode>3<mask_scalar_merge_name>" | |
2577 | [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=Yk") | |
2578 | (unspec:<avx512fmaskmode> | |
2579 | [(match_operand:VI48_AVX512VL 1 "register_operand" "v") | |
2580 | (match_operand:VI48_AVX512VL 2 "nonimmediate_operand" "vm") | |
0fe65b75 AI |
2581 | (match_operand:SI 3 "const_0_to_7_operand" "n")] |
2582 | UNSPEC_UNSIGNED_PCMP))] | |
2583 | "TARGET_AVX512F" | |
a95ec517 | 2584 | "vpcmpu<ssemodesuffix>\t{%3, %2, %1, %0<mask_scalar_merge_operand4>|%0<mask_scalar_merge_operand4>, %1, %2, %3}" |
0fe65b75 AI |
2585 | [(set_attr "type" "ssecmp") |
2586 | (set_attr "length_immediate" "1") | |
2587 | (set_attr "prefix" "evex") | |
2588 | (set_attr "mode" "<sseinsnmode>")]) | |
2589 | ||
8a6ef760 | 2590 | (define_insn "avx512f_vmcmp<mode>3<round_saeonly_name>" |
be792bce | 2591 | [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=Yk") |
b868b7ca AI |
2592 | (and:<avx512fmaskmode> |
2593 | (unspec:<avx512fmaskmode> | |
2594 | [(match_operand:VF_128 1 "register_operand" "v") | |
8a6ef760 | 2595 | (match_operand:VF_128 2 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>") |
b868b7ca AI |
2596 | (match_operand:SI 3 "const_0_to_31_operand" "n")] |
2597 | UNSPEC_PCMP) | |
2598 | (const_int 1)))] | |
2599 | "TARGET_AVX512F" | |
8a6ef760 | 2600 | "vcmp<ssescalarmodesuffix>\t{%3, <round_saeonly_op4>%2, %1, %0|%0, %1, %2<round_saeonly_op4>, %3}" |
b868b7ca AI |
2601 | [(set_attr "type" "ssecmp") |
2602 | (set_attr "length_immediate" "1") | |
2603 | (set_attr "prefix" "evex") | |
2604 | (set_attr "mode" "<ssescalarmode>")]) | |
2605 | ||
8a6ef760 | 2606 | (define_insn "avx512f_vmcmp<mode>3_mask<round_saeonly_name>" |
be792bce | 2607 | [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=Yk") |
47490470 AI |
2608 | (and:<avx512fmaskmode> |
2609 | (unspec:<avx512fmaskmode> | |
2610 | [(match_operand:VF_128 1 "register_operand" "v") | |
8a6ef760 | 2611 | (match_operand:VF_128 2 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>") |
47490470 AI |
2612 | (match_operand:SI 3 "const_0_to_31_operand" "n")] |
2613 | UNSPEC_PCMP) | |
2614 | (and:<avx512fmaskmode> | |
be792bce | 2615 | (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk") |
47490470 AI |
2616 | (const_int 1))))] |
2617 | "TARGET_AVX512F" | |
8a6ef760 | 2618 | "vcmp<ssescalarmodesuffix>\t{%3, <round_saeonly_op5>%2, %1, %0%{%4%}|%0%{%4%}, %1, %2<round_saeonly_op5>, %3}" |
47490470 AI |
2619 | [(set_attr "type" "ssecmp") |
2620 | (set_attr "length_immediate" "1") | |
2621 | (set_attr "prefix" "evex") | |
2622 | (set_attr "mode" "<ssescalarmode>")]) | |
2623 | ||
b868b7ca | 2624 | (define_insn "avx512f_maskcmp<mode>3" |
be792bce | 2625 | [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=Yk") |
b868b7ca AI |
2626 | (match_operator:<avx512fmaskmode> 3 "sse_comparison_operator" |
2627 | [(match_operand:VF 1 "register_operand" "v") | |
2628 | (match_operand:VF 2 "nonimmediate_operand" "vm")]))] | |
2629 | "TARGET_SSE" | |
2630 | "vcmp%D3<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}" | |
2631 | [(set_attr "type" "ssecmp") | |
2632 | (set_attr "length_immediate" "1") | |
2633 | (set_attr "prefix" "evex") | |
2634 | (set_attr "mode" "<sseinsnmode>")]) | |
2635 | ||
8a6ef760 | 2636 | (define_insn "<sse>_comi<round_saeonly_name>" |
ef719a44 RH |
2637 | [(set (reg:CCFP FLAGS_REG) |
2638 | (compare:CCFP | |
d6023b50 | 2639 | (vec_select:MODEF |
2b1ebb0c | 2640 | (match_operand:<ssevecmode> 0 "register_operand" "v") |
ef719a44 | 2641 | (parallel [(const_int 0)])) |
d6023b50 | 2642 | (vec_select:MODEF |
8a6ef760 | 2643 | (match_operand:<ssevecmode> 1 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>") |
ef719a44 | 2644 | (parallel [(const_int 0)]))))] |
d6023b50 | 2645 | "SSE_FLOAT_MODE_P (<MODE>mode)" |
8a6ef760 | 2646 | "%vcomi<ssemodesuffix>\t{<round_saeonly_op2>%1, %0|%0, %<iptr>1<round_saeonly_op2>}" |
ef719a44 | 2647 | [(set_attr "type" "ssecomi") |
95879c72 | 2648 | (set_attr "prefix" "maybe_vex") |
725fd454 JJ |
2649 | (set_attr "prefix_rep" "0") |
2650 | (set (attr "prefix_data16") | |
2651 | (if_then_else (eq_attr "mode" "DF") | |
2652 | (const_string "1") | |
2653 | (const_string "0"))) | |
d6023b50 | 2654 | (set_attr "mode" "<MODE>")]) |
ef719a44 | 2655 | |
8a6ef760 | 2656 | (define_insn "<sse>_ucomi<round_saeonly_name>" |
ef719a44 RH |
2657 | [(set (reg:CCFPU FLAGS_REG) |
2658 | (compare:CCFPU | |
d6023b50 | 2659 | (vec_select:MODEF |
2b1ebb0c | 2660 | (match_operand:<ssevecmode> 0 "register_operand" "v") |
ef719a44 | 2661 | (parallel [(const_int 0)])) |
d6023b50 | 2662 | (vec_select:MODEF |
8a6ef760 | 2663 | (match_operand:<ssevecmode> 1 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>") |
ef719a44 | 2664 | (parallel [(const_int 0)]))))] |
d6023b50 | 2665 | "SSE_FLOAT_MODE_P (<MODE>mode)" |
8a6ef760 | 2666 | "%vucomi<ssemodesuffix>\t{<round_saeonly_op2>%1, %0|%0, %<iptr>1<round_saeonly_op2>}" |
ef719a44 | 2667 | [(set_attr "type" "ssecomi") |
95879c72 | 2668 | (set_attr "prefix" "maybe_vex") |
725fd454 JJ |
2669 | (set_attr "prefix_rep" "0") |
2670 | (set (attr "prefix_data16") | |
2671 | (if_then_else (eq_attr "mode" "DF") | |
2672 | (const_string "1") | |
2673 | (const_string "0"))) | |
d6023b50 | 2674 | (set_attr "mode" "<MODE>")]) |
ef719a44 | 2675 | |
f62ce24f AI |
2676 | (define_expand "vcond<V_512:mode><VF_512:mode>" |
2677 | [(set (match_operand:V_512 0 "register_operand") | |
2678 | (if_then_else:V_512 | |
2679 | (match_operator 3 "" | |
2680 | [(match_operand:VF_512 4 "nonimmediate_operand") | |
2681 | (match_operand:VF_512 5 "nonimmediate_operand")]) | |
2682 | (match_operand:V_512 1 "general_operand") | |
2683 | (match_operand:V_512 2 "general_operand")))] | |
2684 | "TARGET_AVX512F | |
2685 | && (GET_MODE_NUNITS (<V_512:MODE>mode) | |
2686 | == GET_MODE_NUNITS (<VF_512:MODE>mode))" | |
2687 | { | |
2688 | bool ok = ix86_expand_fp_vcond (operands); | |
2689 | gcc_assert (ok); | |
2690 | DONE; | |
2691 | }) | |
2692 | ||
e9e1d143 | 2693 | (define_expand "vcond<V_256:mode><VF_256:mode>" |
82e86dc6 | 2694 | [(set (match_operand:V_256 0 "register_operand") |
e9e1d143 | 2695 | (if_then_else:V_256 |
977e83a3 | 2696 | (match_operator 3 "" |
82e86dc6 UB |
2697 | [(match_operand:VF_256 4 "nonimmediate_operand") |
2698 | (match_operand:VF_256 5 "nonimmediate_operand")]) | |
2699 | (match_operand:V_256 1 "general_operand") | |
2700 | (match_operand:V_256 2 "general_operand")))] | |
e9e1d143 RG |
2701 | "TARGET_AVX |
2702 | && (GET_MODE_NUNITS (<V_256:MODE>mode) | |
2703 | == GET_MODE_NUNITS (<VF_256:MODE>mode))" | |
2704 | { | |
2705 | bool ok = ix86_expand_fp_vcond (operands); | |
2706 | gcc_assert (ok); | |
2707 | DONE; | |
2708 | }) | |
2709 | ||
2710 | (define_expand "vcond<V_128:mode><VF_128:mode>" | |
82e86dc6 | 2711 | [(set (match_operand:V_128 0 "register_operand") |
e9e1d143 RG |
2712 | (if_then_else:V_128 |
2713 | (match_operator 3 "" | |
82e86dc6 UB |
2714 | [(match_operand:VF_128 4 "nonimmediate_operand") |
2715 | (match_operand:VF_128 5 "nonimmediate_operand")]) | |
2716 | (match_operand:V_128 1 "general_operand") | |
2717 | (match_operand:V_128 2 "general_operand")))] | |
e9e1d143 RG |
2718 | "TARGET_SSE |
2719 | && (GET_MODE_NUNITS (<V_128:MODE>mode) | |
2720 | == GET_MODE_NUNITS (<VF_128:MODE>mode))" | |
ae46a07a | 2721 | { |
1262fd02 UB |
2722 | bool ok = ix86_expand_fp_vcond (operands); |
2723 | gcc_assert (ok); | |
2724 | DONE; | |
ae46a07a RH |
2725 | }) |
2726 | ||
ef719a44 RH |
2727 | ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; |
2728 | ;; | |
d6023b50 | 2729 | ;; Parallel floating point logical operations |
ef719a44 RH |
2730 | ;; |
2731 | ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; | |
2732 | ||
26358fb6 AI |
2733 | (define_insn "<sse>_andnot<mode>3<mask_name>" |
2734 | [(set (match_operand:VF_128_256 0 "register_operand" "=x,v") | |
2735 | (and:VF_128_256 | |
2736 | (not:VF_128_256 | |
2737 | (match_operand:VF_128_256 1 "register_operand" "0,v")) | |
2738 | (match_operand:VF_128_256 2 "nonimmediate_operand" "xm,vm")))] | |
2739 | "TARGET_SSE && <mask_avx512vl_condition>" | |
a95d4000 | 2740 | { |
26358fb6 | 2741 | static char buf[128]; |
b8dd0894 UB |
2742 | const char *ops; |
2743 | const char *suffix; | |
2744 | ||
2745 | switch (get_attr_mode (insn)) | |
2746 | { | |
2747 | case MODE_V8SF: | |
2748 | case MODE_V4SF: | |
2749 | suffix = "ps"; | |
2750 | break; | |
2751 | default: | |
2752 | suffix = "<ssemodesuffix>"; | |
2753 | } | |
ef719a44 | 2754 | |
a95d4000 UB |
2755 | switch (which_alternative) |
2756 | { | |
2757 | case 0: | |
b8dd0894 | 2758 | ops = "andn%s\t{%%2, %%0|%%0, %%2}"; |
a95d4000 UB |
2759 | break; |
2760 | case 1: | |
26358fb6 | 2761 | ops = "vandn%s\t{%%2, %%1, %%0<mask_operand3_1>|%%0<mask_operand3_1>, %%1, %%2}"; |
a95d4000 UB |
2762 | break; |
2763 | default: | |
2764 | gcc_unreachable (); | |
2765 | } | |
95879c72 | 2766 | |
26358fb6 AI |
2767 | /* There is no vandnp[sd] in avx512f. Use vpandn[qd]. */ |
2768 | if (<mask_applied> && !TARGET_AVX512DQ) | |
b86f6e9e | 2769 | { |
26358fb6 AI |
2770 | suffix = GET_MODE_INNER (<MODE>mode) == DFmode ? "q" : "d"; |
2771 | ops = "vpandn%s\t{%%2, %%1, %%0<mask_operand3_1>|%%0<mask_operand3_1>, %%1, %%2}"; | |
b86f6e9e AI |
2772 | } |
2773 | ||
b8dd0894 | 2774 | snprintf (buf, sizeof (buf), ops, suffix); |
a95d4000 | 2775 | return buf; |
1133125e | 2776 | } |
a95d4000 UB |
2777 | [(set_attr "isa" "noavx,avx") |
2778 | (set_attr "type" "sselog") | |
b86f6e9e | 2779 | (set_attr "prefix" "orig,maybe_evex") |
b8dd0894 | 2780 | (set (attr "mode") |
659c0e68 JM |
2781 | (cond [(and (match_test "<MODE_SIZE> == 16") |
2782 | (match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL")) | |
b8dd0894 UB |
2783 | (const_string "<ssePSmode>") |
2784 | (match_test "TARGET_AVX") | |
2785 | (const_string "<MODE>") | |
2786 | (match_test "optimize_function_for_size_p (cfun)") | |
2787 | (const_string "V4SF") | |
2788 | ] | |
2789 | (const_string "<MODE>")))]) | |
95879c72 | 2790 | |
26358fb6 AI |
2791 | |
2792 | (define_insn "<sse>_andnot<mode>3<mask_name>" | |
2793 | [(set (match_operand:VF_512 0 "register_operand" "=v") | |
2794 | (and:VF_512 | |
2795 | (not:VF_512 | |
2796 | (match_operand:VF_512 1 "register_operand" "v")) | |
2797 | (match_operand:VF_512 2 "nonimmediate_operand" "vm")))] | |
2798 | "TARGET_AVX512F" | |
2799 | { | |
2800 | static char buf[128]; | |
2801 | const char *ops; | |
2802 | const char *suffix; | |
2803 | ||
2804 | suffix = "<ssemodesuffix>"; | |
2805 | ops = ""; | |
2806 | ||
2807 | /* There is no vandnp[sd] in avx512f. Use vpandn[qd]. */ | |
2808 | if (!TARGET_AVX512DQ) | |
2809 | { | |
2810 | suffix = GET_MODE_INNER (<MODE>mode) == DFmode ? "q" : "d"; | |
2811 | ops = "p"; | |
2812 | } | |
2813 | ||
2814 | snprintf (buf, sizeof (buf), | |
2815 | "v%sandn%s\t{%%2, %%1, %%0<mask_operand3_1>|%%0<mask_operand3_1>, %%1, %%2}", | |
2816 | ops, suffix); | |
2817 | return buf; | |
2818 | } | |
2819 | [(set_attr "type" "sselog") | |
2820 | (set_attr "prefix" "evex") | |
2821 | (set_attr "mode" "<sseinsnmode>")]) | |
2822 | ||
2823 | (define_expand "<code><mode>3<mask_name>" | |
b86f6e9e | 2824 | [(set (match_operand:VF_128_256 0 "register_operand") |
26358fb6 AI |
2825 | (any_logic:VF_128_256 |
2826 | (match_operand:VF_128_256 1 "nonimmediate_operand") | |
2827 | (match_operand:VF_128_256 2 "nonimmediate_operand")))] | |
2828 | "TARGET_SSE && <mask_avx512vl_condition>" | |
94237c92 | 2829 | "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);") |
ef719a44 | 2830 | |
26358fb6 | 2831 | (define_expand "<code><mode>3<mask_name>" |
b86f6e9e | 2832 | [(set (match_operand:VF_512 0 "register_operand") |
26358fb6 | 2833 | (any_logic:VF_512 |
b86f6e9e AI |
2834 | (match_operand:VF_512 1 "nonimmediate_operand") |
2835 | (match_operand:VF_512 2 "nonimmediate_operand")))] | |
2836 | "TARGET_AVX512F" | |
2837 | "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);") | |
2838 | ||
26358fb6 AI |
2839 | (define_insn "*<code><mode>3<mask_name>" |
2840 | [(set (match_operand:VF_128_256 0 "register_operand" "=x,v") | |
2841 | (any_logic:VF_128_256 | |
2842 | (match_operand:VF_128_256 1 "nonimmediate_operand" "%0,v") | |
2843 | (match_operand:VF_128_256 2 "nonimmediate_operand" "xm,vm")))] | |
2844 | "TARGET_SSE && <mask_avx512vl_condition> | |
2845 | && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)" | |
1133125e | 2846 | { |
26358fb6 | 2847 | static char buf[128]; |
b8dd0894 UB |
2848 | const char *ops; |
2849 | const char *suffix; | |
2850 | ||
2851 | switch (get_attr_mode (insn)) | |
2852 | { | |
2853 | case MODE_V8SF: | |
2854 | case MODE_V4SF: | |
2855 | suffix = "ps"; | |
2856 | break; | |
2857 | default: | |
2858 | suffix = "<ssemodesuffix>"; | |
2859 | } | |
a95d4000 UB |
2860 | |
2861 | switch (which_alternative) | |
2862 | { | |
2863 | case 0: | |
b8dd0894 | 2864 | ops = "<logic>%s\t{%%2, %%0|%%0, %%2}"; |
a95d4000 UB |
2865 | break; |
2866 | case 1: | |
26358fb6 | 2867 | ops = "v<logic>%s\t{%%2, %%1, %%0<mask_operand3_1>|%%0<mask_operand3_1>, %%1, %%2}"; |
a95d4000 UB |
2868 | break; |
2869 | default: | |
2870 | gcc_unreachable (); | |
2871 | } | |
2872 | ||
26358fb6 AI |
2873 | /* There is no v<logic>p[sd] in avx512f. Use vp<logic>[dq]. */ |
2874 | if (<mask_applied> && !TARGET_AVX512DQ) | |
b86f6e9e | 2875 | { |
26358fb6 AI |
2876 | suffix = GET_MODE_INNER (<MODE>mode) == DFmode ? "q" : "d"; |
2877 | ops = "vp<logic>%s\t{%%2, %%1, %%0<mask_operand3_1>|%%0<mask_operand3_1>, %%1, %%2}"; | |
b86f6e9e AI |
2878 | } |
2879 | ||
b8dd0894 | 2880 | snprintf (buf, sizeof (buf), ops, suffix); |
a95d4000 | 2881 | return buf; |
1133125e | 2882 | } |
a95d4000 UB |
2883 | [(set_attr "isa" "noavx,avx") |
2884 | (set_attr "type" "sselog") | |
b86f6e9e | 2885 | (set_attr "prefix" "orig,maybe_evex") |
b8dd0894 | 2886 | (set (attr "mode") |
659c0e68 JM |
2887 | (cond [(and (match_test "<MODE_SIZE> == 16") |
2888 | (match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL")) | |
b8dd0894 UB |
2889 | (const_string "<ssePSmode>") |
2890 | (match_test "TARGET_AVX") | |
2891 | (const_string "<MODE>") | |
2892 | (match_test "optimize_function_for_size_p (cfun)") | |
2893 | (const_string "V4SF") | |
2894 | ] | |
2895 | (const_string "<MODE>")))]) | |
ef719a44 | 2896 | |
26358fb6 AI |
2897 | (define_insn "*<code><mode>3<mask_name>" |
2898 | [(set (match_operand:VF_512 0 "register_operand" "=v") | |
2899 | (any_logic:VF_512 | |
2900 | (match_operand:VF_512 1 "nonimmediate_operand" "%v") | |
2901 | (match_operand:VF_512 2 "nonimmediate_operand" "vm")))] | |
2902 | "TARGET_AVX512F && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)" | |
2903 | { | |
2904 | static char buf[128]; | |
2905 | const char *ops; | |
2906 | const char *suffix; | |
2907 | ||
2908 | suffix = "<ssemodesuffix>"; | |
2909 | ops = ""; | |
2910 | ||
2911 | /* There is no v<logic>p[sd] in avx512f. Use vp<logic>[dq]. */ | |
2912 | if ((<MODE_SIZE> == 64 || <mask_applied>) && !TARGET_AVX512DQ) | |
2913 | { | |
2914 | suffix = GET_MODE_INNER (<MODE>mode) == DFmode ? "q" : "d"; | |
2915 | ops = "p"; | |
2916 | } | |
2917 | ||
2918 | snprintf (buf, sizeof (buf), | |
2919 | "v%s<logic>%s\t{%%2, %%1, %%0<mask_operand3_1>|%%0<mask_operand3_1>, %%1, %%2}", | |
2920 | ops, suffix); | |
2921 | return buf; | |
2922 | } | |
2923 | [(set_attr "type" "sselog") | |
2924 | (set_attr "prefix" "evex") | |
2925 | (set_attr "mode" "<sseinsnmode>")]) | |
2926 | ||
af766f2d | 2927 | (define_expand "copysign<mode>3" |
31f44cd0 | 2928 | [(set (match_dup 4) |
a95d4000 UB |
2929 | (and:VF |
2930 | (not:VF (match_dup 3)) | |
82e86dc6 | 2931 | (match_operand:VF 1 "nonimmediate_operand"))) |
31f44cd0 | 2932 | (set (match_dup 5) |
a95d4000 | 2933 | (and:VF (match_dup 3) |
82e86dc6 UB |
2934 | (match_operand:VF 2 "nonimmediate_operand"))) |
2935 | (set (match_operand:VF 0 "register_operand") | |
a95d4000 | 2936 | (ior:VF (match_dup 4) (match_dup 5)))] |
6bec6c98 | 2937 | "TARGET_SSE" |
af766f2d | 2938 | { |
1e27129f | 2939 | operands[3] = ix86_build_signbit_mask (<MODE>mode, 1, 0); |
1fba7394 | 2940 | |
31f44cd0 | 2941 | operands[4] = gen_reg_rtx (<MODE>mode); |
1fba7394 | 2942 | operands[5] = gen_reg_rtx (<MODE>mode); |
af766f2d UB |
2943 | }) |
2944 | ||
ab8efbd8 | 2945 | ;; Also define scalar versions. These are used for abs, neg, and |
41f717fb | 2946 | ;; conditional move. Using subregs into vector modes causes register |
ab8efbd8 RH |
2947 | ;; allocation lossage. These patterns do not allow memory operands |
2948 | ;; because the native instructions read the full 128-bits. | |
2949 | ||
c6d55c5b | 2950 | (define_insn "*andnot<mode>3" |
a95d4000 | 2951 | [(set (match_operand:MODEF 0 "register_operand" "=x,x") |
d6023b50 UB |
2952 | (and:MODEF |
2953 | (not:MODEF | |
a95d4000 UB |
2954 | (match_operand:MODEF 1 "register_operand" "0,x")) |
2955 | (match_operand:MODEF 2 "register_operand" "x,x")))] | |
d6023b50 | 2956 | "SSE_FLOAT_MODE_P (<MODE>mode)" |
1133125e | 2957 | { |
a95d4000 | 2958 | static char buf[32]; |
b8dd0894 | 2959 | const char *ops; |
a95d4000 | 2960 | const char *suffix |
b8dd0894 UB |
2961 | = (get_attr_mode (insn) == MODE_V4SF) ? "ps" : "<ssevecmodesuffix>"; |
2962 | ||
2963 | switch (which_alternative) | |
2964 | { | |
2965 | case 0: | |
2966 | ops = "andn%s\t{%%2, %%0|%%0, %%2}"; | |
2967 | break; | |
2968 | case 1: | |
2969 | ops = "vandn%s\t{%%2, %%1, %%0|%%0, %%1, %%2}"; | |
2970 | break; | |
2971 | default: | |
2972 | gcc_unreachable (); | |
2973 | } | |
2974 | ||
2975 | snprintf (buf, sizeof (buf), ops, suffix); | |
2976 | return buf; | |
2977 | } | |
2978 | [(set_attr "isa" "noavx,avx") | |
2979 | (set_attr "type" "sselog") | |
2980 | (set_attr "prefix" "orig,vex") | |
2981 | (set (attr "mode") | |
659c0e68 JM |
2982 | (cond [(and (match_test "<MODE_SIZE> == 16") |
2983 | (match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL")) | |
b8dd0894 UB |
2984 | (const_string "V4SF") |
2985 | (match_test "TARGET_AVX") | |
2986 | (const_string "<ssevecmode>") | |
2987 | (match_test "optimize_function_for_size_p (cfun)") | |
2988 | (const_string "V4SF") | |
2989 | ] | |
2990 | (const_string "<ssevecmode>")))]) | |
2991 | ||
2992 | (define_insn "*andnottf3" | |
2993 | [(set (match_operand:TF 0 "register_operand" "=x,x") | |
2994 | (and:TF | |
2995 | (not:TF (match_operand:TF 1 "register_operand" "0,x")) | |
2996 | (match_operand:TF 2 "nonimmediate_operand" "xm,xm")))] | |
2997 | "TARGET_SSE" | |
2998 | { | |
2999 | static char buf[32]; | |
3000 | const char *ops; | |
3001 | const char *tmp | |
3002 | = (get_attr_mode (insn) == MODE_V4SF) ? "andnps" : "pandn"; | |
a95d4000 UB |
3003 | |
3004 | switch (which_alternative) | |
3005 | { | |
3006 | case 0: | |
b8dd0894 | 3007 | ops = "%s\t{%%2, %%0|%%0, %%2}"; |
a95d4000 UB |
3008 | break; |
3009 | case 1: | |
b8dd0894 | 3010 | ops = "v%s\t{%%2, %%1, %%0|%%0, %%1, %%2}"; |
a95d4000 UB |
3011 | break; |
3012 | default: | |
3013 | gcc_unreachable (); | |
3014 | } | |
3015 | ||
b8dd0894 | 3016 | snprintf (buf, sizeof (buf), ops, tmp); |
a95d4000 | 3017 | return buf; |
1133125e | 3018 | } |
a95d4000 UB |
3019 | [(set_attr "isa" "noavx,avx") |
3020 | (set_attr "type" "sselog") | |
b8dd0894 UB |
3021 | (set (attr "prefix_data16") |
3022 | (if_then_else | |
3023 | (and (eq_attr "alternative" "0") | |
3024 | (eq_attr "mode" "TI")) | |
3025 | (const_string "1") | |
3026 | (const_string "*"))) | |
a95d4000 | 3027 | (set_attr "prefix" "orig,vex") |
b8dd0894 UB |
3028 | (set (attr "mode") |
3029 | (cond [(match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL") | |
3030 | (const_string "V4SF") | |
3031 | (match_test "TARGET_AVX") | |
3032 | (const_string "TI") | |
3033 | (ior (not (match_test "TARGET_SSE2")) | |
3034 | (match_test "optimize_function_for_size_p (cfun)")) | |
3035 | (const_string "V4SF") | |
3036 | ] | |
3037 | (const_string "TI")))]) | |
95879c72 | 3038 | |
94237c92 | 3039 | (define_insn "*<code><mode>3" |
a95d4000 | 3040 | [(set (match_operand:MODEF 0 "register_operand" "=x,x") |
c8427064 | 3041 | (any_logic:MODEF |
a95d4000 UB |
3042 | (match_operand:MODEF 1 "register_operand" "%0,x") |
3043 | (match_operand:MODEF 2 "register_operand" "x,x")))] | |
d6023b50 | 3044 | "SSE_FLOAT_MODE_P (<MODE>mode)" |
1133125e | 3045 | { |
a95d4000 | 3046 | static char buf[32]; |
b8dd0894 | 3047 | const char *ops; |
a95d4000 | 3048 | const char *suffix |
b8dd0894 UB |
3049 | = (get_attr_mode (insn) == MODE_V4SF) ? "ps" : "<ssevecmodesuffix>"; |
3050 | ||
3051 | switch (which_alternative) | |
3052 | { | |
3053 | case 0: | |
3054 | ops = "<logic>%s\t{%%2, %%0|%%0, %%2}"; | |
3055 | break; | |
3056 | case 1: | |
3057 | ops = "v<logic>%s\t{%%2, %%1, %%0|%%0, %%1, %%2}"; | |
3058 | break; | |
3059 | default: | |
3060 | gcc_unreachable (); | |
3061 | } | |
3062 | ||
3063 | snprintf (buf, sizeof (buf), ops, suffix); | |
3064 | return buf; | |
3065 | } | |
3066 | [(set_attr "isa" "noavx,avx") | |
3067 | (set_attr "type" "sselog") | |
3068 | (set_attr "prefix" "orig,vex") | |
3069 | (set (attr "mode") | |
659c0e68 JM |
3070 | (cond [(and (match_test "<MODE_SIZE> == 16") |
3071 | (match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL")) | |
b8dd0894 UB |
3072 | (const_string "V4SF") |
3073 | (match_test "TARGET_AVX") | |
3074 | (const_string "<ssevecmode>") | |
3075 | (match_test "optimize_function_for_size_p (cfun)") | |
3076 | (const_string "V4SF") | |
3077 | ] | |
3078 | (const_string "<ssevecmode>")))]) | |
3079 | ||
3080 | (define_expand "<code>tf3" | |
3081 | [(set (match_operand:TF 0 "register_operand") | |
3082 | (any_logic:TF | |
3083 | (match_operand:TF 1 "nonimmediate_operand") | |
3084 | (match_operand:TF 2 "nonimmediate_operand")))] | |
3085 | "TARGET_SSE" | |
3086 | "ix86_fixup_binary_operands_no_copy (<CODE>, TFmode, operands);") | |
3087 | ||
3088 | (define_insn "*<code>tf3" | |
3089 | [(set (match_operand:TF 0 "register_operand" "=x,x") | |
3090 | (any_logic:TF | |
3091 | (match_operand:TF 1 "nonimmediate_operand" "%0,x") | |
3092 | (match_operand:TF 2 "nonimmediate_operand" "xm,xm")))] | |
3093 | "TARGET_SSE | |
3094 | && ix86_binary_operator_ok (<CODE>, TFmode, operands)" | |
3095 | { | |
3096 | static char buf[32]; | |
3097 | const char *ops; | |
3098 | const char *tmp | |
3099 | = (get_attr_mode (insn) == MODE_V4SF) ? "<logic>ps" : "p<logic>"; | |
a95d4000 UB |
3100 | |
3101 | switch (which_alternative) | |
3102 | { | |
3103 | case 0: | |
b8dd0894 | 3104 | ops = "%s\t{%%2, %%0|%%0, %%2}"; |
a95d4000 UB |
3105 | break; |
3106 | case 1: | |
b8dd0894 | 3107 | ops = "v%s\t{%%2, %%1, %%0|%%0, %%1, %%2}"; |
a95d4000 UB |
3108 | break; |
3109 | default: | |
3110 | gcc_unreachable (); | |
3111 | } | |
3112 | ||
b8dd0894 | 3113 | snprintf (buf, sizeof (buf), ops, tmp); |
a95d4000 | 3114 | return buf; |
1133125e | 3115 | } |
a95d4000 UB |
3116 | [(set_attr "isa" "noavx,avx") |
3117 | (set_attr "type" "sselog") | |
b8dd0894 UB |
3118 | (set (attr "prefix_data16") |
3119 | (if_then_else | |
3120 | (and (eq_attr "alternative" "0") | |
3121 | (eq_attr "mode" "TI")) | |
3122 | (const_string "1") | |
3123 | (const_string "*"))) | |
a95d4000 | 3124 | (set_attr "prefix" "orig,vex") |
b8dd0894 UB |
3125 | (set (attr "mode") |
3126 | (cond [(match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL") | |
3127 | (const_string "V4SF") | |
3128 | (match_test "TARGET_AVX") | |
3129 | (const_string "TI") | |
3130 | (ior (not (match_test "TARGET_SSE2")) | |
3131 | (match_test "optimize_function_for_size_p (cfun)")) | |
3132 | (const_string "V4SF") | |
3133 | ] | |
3134 | (const_string "TI")))]) | |
ab8efbd8 | 3135 | |
cbf2e4d4 HJ |
3136 | ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; |
3137 | ;; | |
b0d5396c UB |
3138 | ;; FMA floating point multiply/accumulate instructions. These include |
3139 | ;; scalar versions of the instructions as well as vector versions. | |
cbf2e4d4 HJ |
3140 | ;; |
3141 | ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; | |
3142 | ||
19c5f6e6 | 3143 | ;; The standard names for scalar FMA are only available with SSE math enabled. |
558d9f79 AI |
3144 | ;; CPUID bit AVX512F enables evex encoded scalar and 512-bit fma. It doesn't |
3145 | ;; care about FMA bit, so we enable fma for TARGET_AVX512F even when TARGET_FMA | |
3146 | ;; and TARGET_FMA4 are both false. | |
3147 | ;; TODO: In theory AVX512F does not automatically imply FMA, and without FMA | |
3148 | ;; one must force the EVEX encoding of the fma insns. Ideally we'd improve | |
3149 | ;; GAS to allow proper prefix selection. However, for the moment all hardware | |
3150 | ;; that supports AVX512F also supports FMA so we can ignore this for now. | |
3151 | (define_mode_iterator FMAMODEM | |
3152 | [(SF "TARGET_SSE_MATH && (TARGET_FMA || TARGET_FMA4 || TARGET_AVX512F)") | |
3153 | (DF "TARGET_SSE_MATH && (TARGET_FMA || TARGET_FMA4 || TARGET_AVX512F)") | |
e274629e AI |
3154 | (V4SF "TARGET_FMA || TARGET_FMA4 || TARGET_AVX512VL") |
3155 | (V2DF "TARGET_FMA || TARGET_FMA4 || TARGET_AVX512VL") | |
3156 | (V8SF "TARGET_FMA || TARGET_FMA4 || TARGET_AVX512VL") | |
3157 | (V4DF "TARGET_FMA || TARGET_FMA4 || TARGET_AVX512VL") | |
558d9f79 AI |
3158 | (V16SF "TARGET_AVX512F") |
3159 | (V8DF "TARGET_AVX512F")]) | |
b0d5396c | 3160 | |
89509419 | 3161 | (define_expand "fma<mode>4" |
19c5f6e6 UB |
3162 | [(set (match_operand:FMAMODEM 0 "register_operand") |
3163 | (fma:FMAMODEM | |
3164 | (match_operand:FMAMODEM 1 "nonimmediate_operand") | |
3165 | (match_operand:FMAMODEM 2 "nonimmediate_operand") | |
429749e2 | 3166 | (match_operand:FMAMODEM 3 "nonimmediate_operand")))]) |
351877cf | 3167 | |
16949072 | 3168 | (define_expand "fms<mode>4" |
19c5f6e6 UB |
3169 | [(set (match_operand:FMAMODEM 0 "register_operand") |
3170 | (fma:FMAMODEM | |
3171 | (match_operand:FMAMODEM 1 "nonimmediate_operand") | |
3172 | (match_operand:FMAMODEM 2 "nonimmediate_operand") | |
429749e2 | 3173 | (neg:FMAMODEM (match_operand:FMAMODEM 3 "nonimmediate_operand"))))]) |
16949072 RG |
3174 | |
3175 | (define_expand "fnma<mode>4" | |
19c5f6e6 UB |
3176 | [(set (match_operand:FMAMODEM 0 "register_operand") |
3177 | (fma:FMAMODEM | |
3178 | (neg:FMAMODEM (match_operand:FMAMODEM 1 "nonimmediate_operand")) | |
3179 | (match_operand:FMAMODEM 2 "nonimmediate_operand") | |
429749e2 | 3180 | (match_operand:FMAMODEM 3 "nonimmediate_operand")))]) |
16949072 RG |
3181 | |
3182 | (define_expand "fnms<mode>4" | |
19c5f6e6 UB |
3183 | [(set (match_operand:FMAMODEM 0 "register_operand") |
3184 | (fma:FMAMODEM | |
3185 | (neg:FMAMODEM (match_operand:FMAMODEM 1 "nonimmediate_operand")) | |
3186 | (match_operand:FMAMODEM 2 "nonimmediate_operand") | |
429749e2 | 3187 | (neg:FMAMODEM (match_operand:FMAMODEM 3 "nonimmediate_operand"))))]) |
19c5f6e6 UB |
3188 | |
3189 | ;; The builtins for intrinsics are not constrained by SSE math enabled. | |
e274629e AI |
3190 | (define_mode_iterator FMAMODE_AVX512 |
3191 | [(SF "TARGET_FMA || TARGET_FMA4 || TARGET_AVX512F") | |
3192 | (DF "TARGET_FMA || TARGET_FMA4 || TARGET_AVX512F") | |
3193 | (V4SF "TARGET_FMA || TARGET_FMA4 || TARGET_AVX512VL") | |
3194 | (V2DF "TARGET_FMA || TARGET_FMA4 || TARGET_AVX512VL") | |
3195 | (V8SF "TARGET_FMA || TARGET_FMA4 || TARGET_AVX512VL") | |
3196 | (V4DF "TARGET_FMA || TARGET_FMA4 || TARGET_AVX512VL") | |
3197 | (V16SF "TARGET_AVX512F") | |
3198 | (V8DF "TARGET_AVX512F")]) | |
3199 | ||
429749e2 | 3200 | (define_mode_iterator FMAMODE |
e274629e | 3201 | [SF DF V4SF V2DF V8SF V4DF]) |
16949072 | 3202 | |
351877cf | 3203 | (define_expand "fma4i_fmadd_<mode>" |
e274629e AI |
3204 | [(set (match_operand:FMAMODE_AVX512 0 "register_operand") |
3205 | (fma:FMAMODE_AVX512 | |
3206 | (match_operand:FMAMODE_AVX512 1 "nonimmediate_operand") | |
3207 | (match_operand:FMAMODE_AVX512 2 "nonimmediate_operand") | |
3208 | (match_operand:FMAMODE_AVX512 3 "nonimmediate_operand")))]) | |
3209 | ||
3210 | (define_expand "<avx512>_fmadd_<mode>_maskz<round_expand_name>" | |
3211 | [(match_operand:VF_AVX512VL 0 "register_operand") | |
3212 | (match_operand:VF_AVX512VL 1 "<round_expand_nimm_predicate>") | |
3213 | (match_operand:VF_AVX512VL 2 "<round_expand_nimm_predicate>") | |
3214 | (match_operand:VF_AVX512VL 3 "<round_expand_nimm_predicate>") | |
06bc9e41 | 3215 | (match_operand:<avx512fmaskmode> 4 "register_operand")] |
e274629e | 3216 | "TARGET_AVX512F && <round_mode512bit_condition>" |
06bc9e41 | 3217 | { |
7cf78561 | 3218 | emit_insn (gen_fma_fmadd_<mode>_maskz_1<round_expand_name> ( |
06bc9e41 | 3219 | operands[0], operands[1], operands[2], operands[3], |
7cf78561 | 3220 | CONST0_RTX (<MODE>mode), operands[4]<round_expand_operand>)); |
06bc9e41 AI |
3221 | DONE; |
3222 | }) | |
3223 | ||
e274629e AI |
3224 | (define_insn "*fma_fmadd_<mode>" |
3225 | [(set (match_operand:FMAMODE 0 "register_operand" "=v,v,v,x,x") | |
3226 | (fma:FMAMODE | |
3227 | (match_operand:FMAMODE 1 "nonimmediate_operand" "%0,0,v,x,x") | |
3228 | (match_operand:FMAMODE 2 "nonimmediate_operand" "vm,v,vm,x,m") | |
3229 | (match_operand:FMAMODE 3 "nonimmediate_operand" "v,vm,0,xm,x")))] | |
3230 | "TARGET_FMA || TARGET_FMA4" | |
06bc9e41 | 3231 | "@ |
e274629e AI |
3232 | vfmadd132<ssemodesuffix>\t{%2, %3, %0|%0, %3, %2} |
3233 | vfmadd213<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3} | |
3234 | vfmadd231<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2} | |
3e5804e1 UB |
3235 | vfmadd<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3} |
3236 | vfmadd<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}" | |
e274629e | 3237 | [(set_attr "isa" "fma,fma,fma,fma4,fma4") |
3e5804e1 | 3238 | (set_attr "type" "ssemuladd") |
b0d5396c UB |
3239 | (set_attr "mode" "<MODE>")]) |
3240 | ||
e274629e AI |
3241 | ;; Suppose AVX-512F as baseline |
3242 | (define_mode_iterator VF_SF_AVX512VL | |
3243 | [SF V16SF (V8SF "TARGET_AVX512VL") (V4SF "TARGET_AVX512VL") | |
3244 | DF V8DF (V4DF "TARGET_AVX512VL") (V2DF "TARGET_AVX512VL")]) | |
3245 | ||
429749e2 | 3246 | (define_insn "<sd_mask_codefor>fma_fmadd_<mode><sd_maskz_name><round_name>" |
e274629e AI |
3247 | [(set (match_operand:VF_SF_AVX512VL 0 "register_operand" "=v,v,v") |
3248 | (fma:VF_SF_AVX512VL | |
3249 | (match_operand:VF_SF_AVX512VL 1 "<round_nimm_predicate>" "%0,0,v") | |
3250 | (match_operand:VF_SF_AVX512VL 2 "<round_nimm_predicate>" "<round_constraint>,v,<round_constraint>") | |
3251 | (match_operand:VF_SF_AVX512VL 3 "<round_nimm_predicate>" "v,<round_constraint>,0")))] | |
3252 | "TARGET_AVX512F && <sd_mask_mode512bit_condition> && <round_mode512bit_condition>" | |
429749e2 UB |
3253 | "@ |
3254 | vfmadd132<ssemodesuffix>\t{<round_sd_mask_op4>%2, %3, %0<sd_mask_op4>|%0<sd_mask_op4>, %3, %2<round_sd_mask_op4>} | |
3255 | vfmadd213<ssemodesuffix>\t{<round_sd_mask_op4>%3, %2, %0<sd_mask_op4>|%0<sd_mask_op4>, %2, %3<round_sd_mask_op4>} | |
3256 | vfmadd231<ssemodesuffix>\t{<round_sd_mask_op4>%2, %1, %0<sd_mask_op4>|%0<sd_mask_op4>, %1, %2<round_sd_mask_op4>}" | |
e274629e | 3257 | [(set_attr "type" "ssemuladd") |
429749e2 UB |
3258 | (set_attr "mode" "<MODE>")]) |
3259 | ||
e274629e AI |
3260 | (define_insn "<avx512>_fmadd_<mode>_mask<round_name>" |
3261 | [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v,v") | |
3262 | (vec_merge:VF_AVX512VL | |
3263 | (fma:VF_AVX512VL | |
3264 | (match_operand:VF_AVX512VL 1 "register_operand" "0,0") | |
3265 | (match_operand:VF_AVX512VL 2 "nonimmediate_operand" "<round_constraint>,v") | |
3266 | (match_operand:VF_AVX512VL 3 "nonimmediate_operand" "v,<round_constraint>")) | |
47490470 | 3267 | (match_dup 1) |
be792bce | 3268 | (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk,Yk")))] |
e274629e | 3269 | "TARGET_AVX512F && <round_mode512bit_condition>" |
47490470 | 3270 | "@ |
06bc9e41 AI |
3271 | vfmadd132<ssemodesuffix>\t{<round_op5>%2, %3, %0%{%4%}|%0%{%4%}, %3, %2<round_op5>} |
3272 | vfmadd213<ssemodesuffix>\t{<round_op5>%3, %2, %0%{%4%}|%0%{%4%}, %2, %3<round_op5>}" | |
47490470 AI |
3273 | [(set_attr "isa" "fma_avx512f,fma_avx512f") |
3274 | (set_attr "type" "ssemuladd") | |
3275 | (set_attr "mode" "<MODE>")]) | |
3276 | ||
e274629e AI |
3277 | (define_insn "<avx512>_fmadd_<mode>_mask3<round_name>" |
3278 | [(set (match_operand:VF_AVX512VL 0 "register_operand" "=x") | |
3279 | (vec_merge:VF_AVX512VL | |
3280 | (fma:VF_AVX512VL | |
3281 | (match_operand:VF_AVX512VL 1 "register_operand" "x") | |
3282 | (match_operand:VF_AVX512VL 2 "nonimmediate_operand" "<round_constraint>") | |
3283 | (match_operand:VF_AVX512VL 3 "register_operand" "0")) | |
47490470 | 3284 | (match_dup 3) |
be792bce | 3285 | (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")))] |
47490470 | 3286 | "TARGET_AVX512F" |
06bc9e41 | 3287 | "vfmadd231<ssemodesuffix>\t{<round_op5>%2, %1, %0%{%4%}|%0%{%4%}, %1, %2<round_op5>}" |
47490470 AI |
3288 | [(set_attr "isa" "fma_avx512f") |
3289 | (set_attr "type" "ssemuladd") | |
3290 | (set_attr "mode" "<MODE>")]) | |
3291 | ||
e274629e AI |
3292 | (define_insn "*fma_fmsub_<mode>" |
3293 | [(set (match_operand:FMAMODE 0 "register_operand" "=v,v,v,x,x") | |
3294 | (fma:FMAMODE | |
3295 | (match_operand:FMAMODE 1 "nonimmediate_operand" "%0,0,v,x,x") | |
3296 | (match_operand:FMAMODE 2 "nonimmediate_operand" "vm,v,vm,x,m") | |
3297 | (neg:FMAMODE | |
3298 | (match_operand:FMAMODE 3 "nonimmediate_operand" "v,vm,0,xm,x"))))] | |
3299 | "TARGET_FMA || TARGET_FMA4" | |
b0d5396c | 3300 | "@ |
e274629e AI |
3301 | vfmsub132<ssemodesuffix>\t{%2, %3, %0|%0, %3, %2} |
3302 | vfmsub213<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3} | |
3303 | vfmsub231<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2} | |
3e5804e1 UB |
3304 | vfmsub<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3} |
3305 | vfmsub<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}" | |
e274629e | 3306 | [(set_attr "isa" "fma,fma,fma,fma4,fma4") |
3e5804e1 | 3307 | (set_attr "type" "ssemuladd") |
b0d5396c UB |
3308 | (set_attr "mode" "<MODE>")]) |
3309 | ||
429749e2 | 3310 | (define_insn "<sd_mask_codefor>fma_fmsub_<mode><sd_maskz_name><round_name>" |
e274629e AI |
3311 | [(set (match_operand:VF_SF_AVX512VL 0 "register_operand" "=v,v,v") |
3312 | (fma:VF_SF_AVX512VL | |
3313 | (match_operand:VF_SF_AVX512VL 1 "<round_nimm_predicate>" "%0,0,v") | |
3314 | (match_operand:VF_SF_AVX512VL 2 "<round_nimm_predicate>" "<round_constraint>,v,<round_constraint>") | |
3315 | (neg:VF_SF_AVX512VL | |
3316 | (match_operand:VF_SF_AVX512VL 3 "<round_nimm_predicate>" "v,<round_constraint>,0"))))] | |
3317 | "TARGET_AVX512F && <sd_mask_mode512bit_condition> && <round_mode512bit_condition>" | |
429749e2 UB |
3318 | "@ |
3319 | vfmsub132<ssemodesuffix>\t{<round_sd_mask_op4>%2, %3, %0<sd_mask_op4>|%0<sd_mask_op4>, %3, %2<round_sd_mask_op4>} | |
3320 | vfmsub213<ssemodesuffix>\t{<round_sd_mask_op4>%3, %2, %0<sd_mask_op4>|%0<sd_mask_op4>, %2, %3<round_sd_mask_op4>} | |
3321 | vfmsub231<ssemodesuffix>\t{<round_sd_mask_op4>%2, %1, %0<sd_mask_op4>|%0<sd_mask_op4>, %1, %2<round_sd_mask_op4>}" | |
e274629e | 3322 | [(set_attr "type" "ssemuladd") |
429749e2 UB |
3323 | (set_attr "mode" "<MODE>")]) |
3324 | ||
e274629e AI |
3325 | (define_insn "<avx512>_fmsub_<mode>_mask<round_name>" |
3326 | [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v,v") | |
3327 | (vec_merge:VF_AVX512VL | |
3328 | (fma:VF_AVX512VL | |
3329 | (match_operand:VF_AVX512VL 1 "register_operand" "0,0") | |
3330 | (match_operand:VF_AVX512VL 2 "nonimmediate_operand" "<round_constraint>,v") | |
3331 | (neg:VF_AVX512VL | |
3332 | (match_operand:VF_AVX512VL 3 "nonimmediate_operand" "v,<round_constraint>"))) | |
47490470 | 3333 | (match_dup 1) |
be792bce | 3334 | (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk,Yk")))] |
47490470 AI |
3335 | "TARGET_AVX512F" |
3336 | "@ | |
06bc9e41 AI |
3337 | vfmsub132<ssemodesuffix>\t{<round_op5>%2, %3, %0%{%4%}|%0%{%4%}, %3, %2<round_op5>} |
3338 | vfmsub213<ssemodesuffix>\t{<round_op5>%3, %2, %0%{%4%}|%0%{%4%}, %2, %3<round_op5>}" | |
47490470 AI |
3339 | [(set_attr "isa" "fma_avx512f,fma_avx512f") |
3340 | (set_attr "type" "ssemuladd") | |
3341 | (set_attr "mode" "<MODE>")]) | |
3342 | ||
e274629e AI |
3343 | (define_insn "<avx512>_fmsub_<mode>_mask3<round_name>" |
3344 | [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v") | |
3345 | (vec_merge:VF_AVX512VL | |
3346 | (fma:VF_AVX512VL | |
3347 | (match_operand:VF_AVX512VL 1 "register_operand" "v") | |
3348 | (match_operand:VF_AVX512VL 2 "nonimmediate_operand" "<round_constraint>") | |
3349 | (neg:VF_AVX512VL | |
3350 | (match_operand:VF_AVX512VL 3 "register_operand" "0"))) | |
47490470 | 3351 | (match_dup 3) |
be792bce | 3352 | (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")))] |
e274629e | 3353 | "TARGET_AVX512F && <round_mode512bit_condition>" |
06bc9e41 | 3354 | "vfmsub231<ssemodesuffix>\t{<round_op5>%2, %1, %0%{%4%}|%0%{%4%}, %1, %2<round_op5>}" |
47490470 AI |
3355 | [(set_attr "isa" "fma_avx512f") |
3356 | (set_attr "type" "ssemuladd") | |
3357 | (set_attr "mode" "<MODE>")]) | |
3358 | ||
e274629e AI |
3359 | (define_insn "*fma_fnmadd_<mode>" |
3360 | [(set (match_operand:FMAMODE 0 "register_operand" "=v,v,v,x,x") | |
3361 | (fma:FMAMODE | |
3362 | (neg:FMAMODE | |
3363 | (match_operand:FMAMODE 1 "nonimmediate_operand" "%0,0,v,x,x")) | |
3364 | (match_operand:FMAMODE 2 "nonimmediate_operand" "vm,v,vm,x,m") | |
3365 | (match_operand:FMAMODE 3 "nonimmediate_operand" "v,vm,0,xm,x")))] | |
3366 | "TARGET_FMA || TARGET_FMA4" | |
06bc9e41 | 3367 | "@ |
e274629e AI |
3368 | vfnmadd132<ssemodesuffix>\t{%2, %3, %0|%0, %3, %2} |
3369 | vfnmadd213<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3} | |
3370 | vfnmadd231<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2} | |
3e5804e1 UB |
3371 | vfnmadd<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3} |
3372 | vfnmadd<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}" | |
e274629e | 3373 | [(set_attr "isa" "fma,fma,fma,fma4,fma4") |
3e5804e1 | 3374 | (set_attr "type" "ssemuladd") |
b0d5396c UB |
3375 | (set_attr "mode" "<MODE>")]) |
3376 | ||
429749e2 | 3377 | (define_insn "<sd_mask_codefor>fma_fnmadd_<mode><sd_maskz_name><round_name>" |
e274629e AI |
3378 | [(set (match_operand:VF_SF_AVX512VL 0 "register_operand" "=v,v,v") |
3379 | (fma:VF_SF_AVX512VL | |
3380 | (neg:VF_SF_AVX512VL | |
3381 | (match_operand:VF_SF_AVX512VL 1 "<round_nimm_predicate>" "%0,0,v")) | |
3382 | (match_operand:VF_SF_AVX512VL 2 "<round_nimm_predicate>" "<round_constraint>,v,<round_constraint>") | |
3383 | (match_operand:VF_SF_AVX512VL 3 "<round_nimm_predicate>" "v,<round_constraint>,0")))] | |
3384 | "TARGET_AVX512F && <sd_mask_mode512bit_condition> && <round_mode512bit_condition>" | |
429749e2 UB |
3385 | "@ |
3386 | vfnmadd132<ssemodesuffix>\t{<round_sd_mask_op4>%2, %3, %0<sd_mask_op4>|%0<sd_mask_op4>, %3, %2<round_sd_mask_op4>} | |
3387 | vfnmadd213<ssemodesuffix>\t{<round_sd_mask_op4>%3, %2, %0<sd_mask_op4>|%0<sd_mask_op4>, %2, %3<round_sd_mask_op4>} | |
3388 | vfnmadd231<ssemodesuffix>\t{<round_sd_mask_op4>%2, %1, %0<sd_mask_op4>|%0<sd_mask_op4>, %1, %2<round_sd_mask_op4>}" | |
e274629e | 3389 | [(set_attr "type" "ssemuladd") |
429749e2 UB |
3390 | (set_attr "mode" "<MODE>")]) |
3391 | ||
e274629e AI |
3392 | (define_insn "<avx512>_fnmadd_<mode>_mask<round_name>" |
3393 | [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v,v") | |
3394 | (vec_merge:VF_AVX512VL | |
3395 | (fma:VF_AVX512VL | |
3396 | (neg:VF_AVX512VL | |
3397 | (match_operand:VF_AVX512VL 1 "register_operand" "0,0")) | |
3398 | (match_operand:VF_AVX512VL 2 "nonimmediate_operand" "<round_constraint>,v") | |
3399 | (match_operand:VF_AVX512VL 3 "nonimmediate_operand" "v,<round_constraint>")) | |
47490470 | 3400 | (match_dup 1) |
be792bce | 3401 | (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk,Yk")))] |
e274629e | 3402 | "TARGET_AVX512F && <round_mode512bit_condition>" |
47490470 | 3403 | "@ |
06bc9e41 AI |
3404 | vfnmadd132<ssemodesuffix>\t{<round_op5>%2, %3, %0%{%4%}|%0%{%4%}, %3, %2<round_op5>} |
3405 | vfnmadd213<ssemodesuffix>\t{<round_op5>%3, %2, %0%{%4%}|%0%{%4%}, %2, %3<round_op5>}" | |
47490470 AI |
3406 | [(set_attr "isa" "fma_avx512f,fma_avx512f") |
3407 | (set_attr "type" "ssemuladd") | |
3408 | (set_attr "mode" "<MODE>")]) | |
3409 | ||
e274629e AI |
3410 | (define_insn "<avx512>_fnmadd_<mode>_mask3<round_name>" |
3411 | [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v") | |
3412 | (vec_merge:VF_AVX512VL | |
3413 | (fma:VF_AVX512VL | |
3414 | (neg:VF_AVX512VL | |
3415 | (match_operand:VF_AVX512VL 1 "register_operand" "v")) | |
3416 | (match_operand:VF_AVX512VL 2 "nonimmediate_operand" "<round_constraint>") | |
3417 | (match_operand:VF_AVX512VL 3 "register_operand" "0")) | |
47490470 | 3418 | (match_dup 3) |
be792bce | 3419 | (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")))] |
e274629e | 3420 | "TARGET_AVX512F && <round_mode512bit_condition>" |
06bc9e41 | 3421 | "vfnmadd231<ssemodesuffix>\t{<round_op5>%2, %1, %0%{%4%}|%0%{%4%}, %1, %2<round_op5>}" |
47490470 AI |
3422 | [(set_attr "isa" "fma_avx512f") |
3423 | (set_attr "type" "ssemuladd") | |
3424 | (set_attr "mode" "<MODE>")]) | |
3425 | ||
e274629e AI |
3426 | (define_insn "*fma_fnmsub_<mode>" |
3427 | [(set (match_operand:FMAMODE 0 "register_operand" "=v,v,v,x,x") | |
3428 | (fma:FMAMODE | |
3429 | (neg:FMAMODE | |
3430 | (match_operand:FMAMODE 1 "nonimmediate_operand" "%0,0,v,x,x")) | |
3431 | (match_operand:FMAMODE 2 "nonimmediate_operand" "vm,v,vm,x,m") | |
3432 | (neg:FMAMODE | |
3433 | (match_operand:FMAMODE 3 "nonimmediate_operand" "v,vm,0,xm,x"))))] | |
3434 | "TARGET_FMA || TARGET_FMA4" | |
b0d5396c | 3435 | "@ |
06bc9e41 AI |
3436 | vfnmsub132<ssemodesuffix>\t{<round_sd_mask_op4>%2, %3, %0<sd_mask_op4>|%0<sd_mask_op4>, %3, %2<round_sd_mask_op4>} |
3437 | vfnmsub213<ssemodesuffix>\t{<round_sd_mask_op4>%3, %2, %0<sd_mask_op4>|%0<sd_mask_op4>, %2, %3<round_sd_mask_op4>} | |
3438 | vfnmsub231<ssemodesuffix>\t{<round_sd_mask_op4>%2, %1, %0<sd_mask_op4>|%0<sd_mask_op4>, %1, %2<round_sd_mask_op4>} | |
3e5804e1 UB |
3439 | vfnmsub<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3} |
3440 | vfnmsub<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}" | |
e274629e | 3441 | [(set_attr "isa" "fma,fma,fma,fma4,fma4") |
3e5804e1 | 3442 | (set_attr "type" "ssemuladd") |
cbf2e4d4 HJ |
3443 | (set_attr "mode" "<MODE>")]) |
3444 | ||
429749e2 | 3445 | (define_insn "<sd_mask_codefor>fma_fnmsub_<mode><sd_maskz_name><round_name>" |
e274629e AI |
3446 | [(set (match_operand:VF_SF_AVX512VL 0 "register_operand" "=v,v,v") |
3447 | (fma:VF_SF_AVX512VL | |
3448 | (neg:VF_SF_AVX512VL | |
3449 | (match_operand:VF_SF_AVX512VL 1 "<round_nimm_predicate>" "%0,0,v")) | |
3450 | (match_operand:VF_SF_AVX512VL 2 "<round_nimm_predicate>" "<round_constraint>,v,<round_constraint>") | |
3451 | (neg:VF_SF_AVX512VL | |
3452 | (match_operand:VF_SF_AVX512VL 3 "<round_nimm_predicate>" "v,<round_constraint>,0"))))] | |
3453 | "TARGET_AVX512F && <sd_mask_mode512bit_condition> && <round_mode512bit_condition>" | |
429749e2 UB |
3454 | "@ |
3455 | vfnmsub132<ssemodesuffix>\t{<round_sd_mask_op4>%2, %3, %0<sd_mask_op4>|%0<sd_mask_op4>, %3, %2<round_sd_mask_op4>} | |
3456 | vfnmsub213<ssemodesuffix>\t{<round_sd_mask_op4>%3, %2, %0<sd_mask_op4>|%0<sd_mask_op4>, %2, %3<round_sd_mask_op4>} | |
3457 | vfnmsub231<ssemodesuffix>\t{<round_sd_mask_op4>%2, %1, %0<sd_mask_op4>|%0<sd_mask_op4>, %1, %2<round_sd_mask_op4>}" | |
e274629e | 3458 | [(set_attr "type" "ssemuladd") |
429749e2 UB |
3459 | (set_attr "mode" "<MODE>")]) |
3460 | ||
e274629e AI |
3461 | (define_insn "<avx512>_fnmsub_<mode>_mask<round_name>" |
3462 | [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v,v") | |
3463 | (vec_merge:VF_AVX512VL | |
3464 | (fma:VF_AVX512VL | |
3465 | (neg:VF_AVX512VL | |
3466 | (match_operand:VF_AVX512VL 1 "register_operand" "0,0")) | |
3467 | (match_operand:VF_AVX512VL 2 "nonimmediate_operand" "<round_constraint>,v") | |
3468 | (neg:VF_AVX512VL | |
3469 | (match_operand:VF_AVX512VL 3 "nonimmediate_operand" "v,<round_constraint>"))) | |
47490470 | 3470 | (match_dup 1) |
be792bce | 3471 | (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk,Yk")))] |
e274629e | 3472 | "TARGET_AVX512F && <round_mode512bit_condition>" |
47490470 | 3473 | "@ |
06bc9e41 AI |
3474 | vfnmsub132<ssemodesuffix>\t{<round_op5>%2, %3, %0%{%4%}|%0%{%4%}, %3, %2<round_op5>} |
3475 | vfnmsub213<ssemodesuffix>\t{<round_op5>%3, %2, %0%{%4%}|%0%{%4%}, %2, %3<round_op5>}" | |
47490470 AI |
3476 | [(set_attr "isa" "fma_avx512f,fma_avx512f") |
3477 | (set_attr "type" "ssemuladd") | |
3478 | (set_attr "mode" "<MODE>")]) | |
3479 | ||
e274629e AI |
3480 | (define_insn "<avx512>_fnmsub_<mode>_mask3<round_name>" |
3481 | [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v") | |
3482 | (vec_merge:VF_AVX512VL | |
3483 | (fma:VF_AVX512VL | |
3484 | (neg:VF_AVX512VL | |
3485 | (match_operand:VF_AVX512VL 1 "register_operand" "v")) | |
3486 | (match_operand:VF_AVX512VL 2 "nonimmediate_operand" "<round_constraint>") | |
3487 | (neg:VF_AVX512VL | |
3488 | (match_operand:VF_AVX512VL 3 "register_operand" "0"))) | |
47490470 | 3489 | (match_dup 3) |
be792bce | 3490 | (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")))] |
47490470 | 3491 | "TARGET_AVX512F" |
06bc9e41 | 3492 | "vfnmsub231<ssemodesuffix>\t{<round_op5>%2, %1, %0%{%4%}|%0%{%4%}, %1, %2<round_op5>}" |
47490470 AI |
3493 | [(set_attr "isa" "fma_avx512f") |
3494 | (set_attr "type" "ssemuladd") | |
3495 | (set_attr "mode" "<MODE>")]) | |
3496 | ||
b0d5396c | 3497 | ;; FMA parallel floating point multiply addsub and subadd operations. |
89509419 | 3498 | |
b0d5396c UB |
3499 | ;; It would be possible to represent these without the UNSPEC as |
3500 | ;; | |
3501 | ;; (vec_merge | |
3502 | ;; (fma op1 op2 op3) | |
3503 | ;; (fma op1 op2 (neg op3)) | |
3504 | ;; (merge-const)) | |
3505 | ;; | |
3506 | ;; But this doesn't seem useful in practice. | |
3507 | ||
3508 | (define_expand "fmaddsub_<mode>" | |
3509 | [(set (match_operand:VF 0 "register_operand") | |
3510 | (unspec:VF | |
3511 | [(match_operand:VF 1 "nonimmediate_operand") | |
3512 | (match_operand:VF 2 "nonimmediate_operand") | |
3513 | (match_operand:VF 3 "nonimmediate_operand")] | |
3514 | UNSPEC_FMADDSUB))] | |
558d9f79 | 3515 | "TARGET_FMA || TARGET_FMA4 || TARGET_AVX512F") |
b0d5396c | 3516 | |
e274629e AI |
3517 | (define_expand "<avx512>_fmaddsub_<mode>_maskz<round_expand_name>" |
3518 | [(match_operand:VF_AVX512VL 0 "register_operand") | |
3519 | (match_operand:VF_AVX512VL 1 "<round_expand_nimm_predicate>") | |
3520 | (match_operand:VF_AVX512VL 2 "<round_expand_nimm_predicate>") | |
3521 | (match_operand:VF_AVX512VL 3 "<round_expand_nimm_predicate>") | |
8b08db1e AI |
3522 | (match_operand:<avx512fmaskmode> 4 "register_operand")] |
3523 | "TARGET_AVX512F" | |
3524 | { | |
7cf78561 | 3525 | emit_insn (gen_fma_fmaddsub_<mode>_maskz_1<round_expand_name> ( |
8b08db1e | 3526 | operands[0], operands[1], operands[2], operands[3], |
7cf78561 | 3527 | CONST0_RTX (<MODE>mode), operands[4]<round_expand_operand>)); |
8b08db1e AI |
3528 | DONE; |
3529 | }) | |
3530 | ||
e274629e | 3531 | (define_insn "*fma_fmaddsub_<mode>" |
429749e2 UB |
3532 | [(set (match_operand:VF_128_256 0 "register_operand" "=v,v,v,x,x") |
3533 | (unspec:VF_128_256 | |
e274629e AI |
3534 | [(match_operand:VF_128_256 1 "nonimmediate_operand" "%0,0,v,x,x") |
3535 | (match_operand:VF_128_256 2 "nonimmediate_operand" "vm,v,vm,x,m") | |
3536 | (match_operand:VF_128_256 3 "nonimmediate_operand" "v,vm,0,xm,x")] | |
b0d5396c | 3537 | UNSPEC_FMADDSUB))] |
e274629e | 3538 | "TARGET_FMA || TARGET_FMA4" |
b0d5396c | 3539 | "@ |
e274629e AI |
3540 | vfmaddsub132<ssemodesuffix>\t{%2, %3, %0|%0, %3, %2} |
3541 | vfmaddsub213<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3} | |
3542 | vfmaddsub231<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2} | |
3e5804e1 UB |
3543 | vfmaddsub<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3} |
3544 | vfmaddsub<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}" | |
e274629e | 3545 | [(set_attr "isa" "fma,fma,fma,fma4,fma4") |
3e5804e1 | 3546 | (set_attr "type" "ssemuladd") |
b0d5396c UB |
3547 | (set_attr "mode" "<MODE>")]) |
3548 | ||
429749e2 | 3549 | (define_insn "<sd_mask_codefor>fma_fmaddsub_<mode><sd_maskz_name><round_name>" |
e274629e AI |
3550 | [(set (match_operand:VF_SF_AVX512VL 0 "register_operand" "=v,v,v") |
3551 | (unspec:VF_SF_AVX512VL | |
3552 | [(match_operand:VF_SF_AVX512VL 1 "<round_nimm_predicate>" "%0,0,v") | |
3553 | (match_operand:VF_SF_AVX512VL 2 "<round_nimm_predicate>" "<round_constraint>,v,<round_constraint>") | |
3554 | (match_operand:VF_SF_AVX512VL 3 "<round_nimm_predicate>" "v,<round_constraint>,0")] | |
429749e2 UB |
3555 | UNSPEC_FMADDSUB))] |
3556 | "TARGET_AVX512F && <sd_mask_mode512bit_condition> && <round_mode512bit_condition>" | |
3557 | "@ | |
3558 | vfmaddsub132<ssemodesuffix>\t{<round_sd_mask_op4>%2, %3, %0<sd_mask_op4>|%0<sd_mask_op4>, %3, %2<round_sd_mask_op4>} | |
3559 | vfmaddsub213<ssemodesuffix>\t{<round_sd_mask_op4>%3, %2, %0<sd_mask_op4>|%0<sd_mask_op4>, %2, %3<round_sd_mask_op4>} | |
3560 | vfmaddsub231<ssemodesuffix>\t{<round_sd_mask_op4>%2, %1, %0<sd_mask_op4>|%0<sd_mask_op4>, %1, %2<round_sd_mask_op4>}" | |
e274629e | 3561 | [(set_attr "type" "ssemuladd") |
429749e2 UB |
3562 | (set_attr "mode" "<MODE>")]) |
3563 | ||
e274629e AI |
3564 | (define_insn "<avx512>_fmaddsub_<mode>_mask<round_name>" |
3565 | [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v,v") | |
3566 | (vec_merge:VF_AVX512VL | |
3567 | (unspec:VF_AVX512VL | |
3568 | [(match_operand:VF_AVX512VL 1 "register_operand" "0,0") | |
3569 | (match_operand:VF_AVX512VL 2 "nonimmediate_operand" "<round_constraint>,v") | |
3570 | (match_operand:VF_AVX512VL 3 "nonimmediate_operand" "v,<round_constraint>")] | |
47490470 AI |
3571 | UNSPEC_FMADDSUB) |
3572 | (match_dup 1) | |
be792bce | 3573 | (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk,Yk")))] |
47490470 AI |
3574 | "TARGET_AVX512F" |
3575 | "@ | |
06bc9e41 AI |
3576 | vfmaddsub132<ssemodesuffix>\t{<round_op5>%2, %3, %0%{%4%}|%0%{%4%}, %3, %2<round_op5>} |
3577 | vfmaddsub213<ssemodesuffix>\t{<round_op5>%3, %2, %0%{%4%}|%0%{%4%}, %2, %3<round_op5>}" | |
47490470 AI |
3578 | [(set_attr "isa" "fma_avx512f,fma_avx512f") |
3579 | (set_attr "type" "ssemuladd") | |
3580 | (set_attr "mode" "<MODE>")]) | |
3581 | ||
e274629e AI |
3582 | (define_insn "<avx512>_fmaddsub_<mode>_mask3<round_name>" |
3583 | [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v") | |
3584 | (vec_merge:VF_AVX512VL | |
3585 | (unspec:VF_AVX512VL | |
3586 | [(match_operand:VF_AVX512VL 1 "register_operand" "v") | |
3587 | (match_operand:VF_AVX512VL 2 "nonimmediate_operand" "<round_constraint>") | |
3588 | (match_operand:VF_AVX512VL 3 "register_operand" "0")] | |
47490470 AI |
3589 | UNSPEC_FMADDSUB) |
3590 | (match_dup 3) | |
be792bce | 3591 | (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")))] |
47490470 | 3592 | "TARGET_AVX512F" |
06bc9e41 | 3593 | "vfmaddsub231<ssemodesuffix>\t{<round_op5>%2, %1, %0%{%4%}|%0%{%4%}, %1, %2<round_op5>}" |
47490470 AI |
3594 | [(set_attr "isa" "fma_avx512f") |
3595 | (set_attr "type" "ssemuladd") | |
3596 | (set_attr "mode" "<MODE>")]) | |
3597 | ||
e274629e | 3598 | (define_insn "*fma_fmsubadd_<mode>" |
429749e2 UB |
3599 | [(set (match_operand:VF_128_256 0 "register_operand" "=v,v,v,x,x") |
3600 | (unspec:VF_128_256 | |
e274629e AI |
3601 | [(match_operand:VF_128_256 1 "nonimmediate_operand" "%0,0,v,x,x") |
3602 | (match_operand:VF_128_256 2 "nonimmediate_operand" "vm,v,vm,x,m") | |
429749e2 | 3603 | (neg:VF_128_256 |
e274629e | 3604 | (match_operand:VF_128_256 3 "nonimmediate_operand" "v,vm,0,xm,x"))] |
b0d5396c | 3605 | UNSPEC_FMADDSUB))] |
e274629e | 3606 | "TARGET_FMA || TARGET_FMA4" |
b0d5396c | 3607 | "@ |
e274629e AI |
3608 | vfmsubadd132<ssemodesuffix>\t{%2, %3, %0|%0, %3, %2} |
3609 | vfmsubadd213<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3} | |
3610 | vfmsubadd231<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2} | |
3e5804e1 UB |
3611 | vfmsubadd<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3} |
3612 | vfmsubadd<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}" | |
e274629e | 3613 | [(set_attr "isa" "fma,fma,fma,fma4,fma4") |
3e5804e1 | 3614 | (set_attr "type" "ssemuladd") |
b0d5396c UB |
3615 | (set_attr "mode" "<MODE>")]) |
3616 | ||
429749e2 | 3617 | (define_insn "<sd_mask_codefor>fma_fmsubadd_<mode><sd_maskz_name><round_name>" |
e274629e AI |
3618 | [(set (match_operand:VF_SF_AVX512VL 0 "register_operand" "=v,v,v") |
3619 | (unspec:VF_SF_AVX512VL | |
3620 | [(match_operand:VF_SF_AVX512VL 1 "<round_nimm_predicate>" "%0,0,v") | |
3621 | (match_operand:VF_SF_AVX512VL 2 "<round_nimm_predicate>" "<round_constraint>,v,<round_constraint>") | |
3622 | (neg:VF_SF_AVX512VL | |
3623 | (match_operand:VF_SF_AVX512VL 3 "<round_nimm_predicate>" "v,<round_constraint>,0"))] | |
429749e2 UB |
3624 | UNSPEC_FMADDSUB))] |
3625 | "TARGET_AVX512F && <sd_mask_mode512bit_condition> && <round_mode512bit_condition>" | |
3626 | "@ | |
3627 | vfmsubadd132<ssemodesuffix>\t{<round_sd_mask_op4>%2, %3, %0<sd_mask_op4>|%0<sd_mask_op4>, %3, %2<round_sd_mask_op4>} | |
3628 | vfmsubadd213<ssemodesuffix>\t{<round_sd_mask_op4>%3, %2, %0<sd_mask_op4>|%0<sd_mask_op4>, %2, %3<round_sd_mask_op4>} | |
3629 | vfmsubadd231<ssemodesuffix>\t{<round_sd_mask_op4>%2, %1, %0<sd_mask_op4>|%0<sd_mask_op4>, %1, %2<round_sd_mask_op4>}" | |
e274629e | 3630 | [(set_attr "type" "ssemuladd") |
429749e2 UB |
3631 | (set_attr "mode" "<MODE>")]) |
3632 | ||
e274629e AI |
3633 | (define_insn "<avx512>_fmsubadd_<mode>_mask<round_name>" |
3634 | [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v,v") | |
3635 | (vec_merge:VF_AVX512VL | |
3636 | (unspec:VF_AVX512VL | |
3637 | [(match_operand:VF_AVX512VL 1 "register_operand" "0,0") | |
3638 | (match_operand:VF_AVX512VL 2 "nonimmediate_operand" "<round_constraint>,v") | |
3639 | (neg:VF_AVX512VL | |
3640 | (match_operand:VF_AVX512VL 3 "nonimmediate_operand" "v,<round_constraint>"))] | |
47490470 AI |
3641 | UNSPEC_FMADDSUB) |
3642 | (match_dup 1) | |
be792bce | 3643 | (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk,Yk")))] |
47490470 AI |
3644 | "TARGET_AVX512F" |
3645 | "@ | |
06bc9e41 AI |
3646 | vfmsubadd132<ssemodesuffix>\t{<round_op5>%2, %3, %0%{%4%}|%0%{%4%}, %3, %2<round_op5>} |
3647 | vfmsubadd213<ssemodesuffix>\t{<round_op5>%3, %2, %0%{%4%}|%0%{%4%}, %2, %3<round_op5>}" | |
47490470 AI |
3648 | [(set_attr "isa" "fma_avx512f,fma_avx512f") |
3649 | (set_attr "type" "ssemuladd") | |
3650 | (set_attr "mode" "<MODE>")]) | |
3651 | ||
e274629e AI |
3652 | (define_insn "<avx512>_fmsubadd_<mode>_mask3<round_name>" |
3653 | [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v") | |
3654 | (vec_merge:VF_AVX512VL | |
3655 | (unspec:VF_AVX512VL | |
3656 | [(match_operand:VF_AVX512VL 1 "register_operand" "v") | |
3657 | (match_operand:VF_AVX512VL 2 "nonimmediate_operand" "<round_constraint>") | |
3658 | (neg:VF_AVX512VL | |
3659 | (match_operand:VF_AVX512VL 3 "register_operand" "0"))] | |
47490470 AI |
3660 | UNSPEC_FMADDSUB) |
3661 | (match_dup 3) | |
be792bce | 3662 | (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")))] |
47490470 | 3663 | "TARGET_AVX512F" |
06bc9e41 | 3664 | "vfmsubadd231<ssemodesuffix>\t{<round_op5>%2, %1, %0%{%4%}|%0%{%4%}, %1, %2<round_op5>}" |
47490470 AI |
3665 | [(set_attr "isa" "fma_avx512f") |
3666 | (set_attr "type" "ssemuladd") | |
3667 | (set_attr "mode" "<MODE>")]) | |
3668 | ||
b0d5396c UB |
3669 | ;; FMA3 floating point scalar intrinsics. These merge result with |
3670 | ;; high-order elements from the destination register. | |
cbf2e4d4 | 3671 | |
06bc9e41 | 3672 | (define_expand "fmai_vmfmadd_<mode><round_name>" |
2ddd46d6 IT |
3673 | [(set (match_operand:VF_128 0 "register_operand") |
3674 | (vec_merge:VF_128 | |
3675 | (fma:VF_128 | |
06bc9e41 AI |
3676 | (match_operand:VF_128 1 "<round_nimm_predicate>") |
3677 | (match_operand:VF_128 2 "<round_nimm_predicate>") | |
3678 | (match_operand:VF_128 3 "<round_nimm_predicate>")) | |
5f08ae1e | 3679 | (match_dup 1) |
2ddd46d6 IT |
3680 | (const_int 1)))] |
3681 | "TARGET_FMA") | |
3682 | ||
3683 | (define_insn "*fmai_fmadd_<mode>" | |
3f97cb0b | 3684 | [(set (match_operand:VF_128 0 "register_operand" "=v,v") |
2ddd46d6 IT |
3685 | (vec_merge:VF_128 |
3686 | (fma:VF_128 | |
06bc9e41 AI |
3687 | (match_operand:VF_128 1 "<round_nimm_predicate>" " 0, 0") |
3688 | (match_operand:VF_128 2 "<round_nimm_predicate>" "<round_constraint>, v") | |
3689 | (match_operand:VF_128 3 "<round_nimm_predicate>" " v,<round_constraint>")) | |
5f08ae1e | 3690 | (match_dup 1) |
2ddd46d6 | 3691 | (const_int 1)))] |
558d9f79 | 3692 | "TARGET_FMA || TARGET_AVX512F" |
2ddd46d6 | 3693 | "@ |
06bc9e41 AI |
3694 | vfmadd132<ssescalarmodesuffix>\t{<round_op4>%2, %3, %0|%0, %<iptr>3, %<iptr>2<round_op4>} |
3695 | vfmadd213<ssescalarmodesuffix>\t{<round_op4>%3, %2, %0|%0, %<iptr>2, %<iptr>3<round_op4>}" | |
2ddd46d6 IT |
3696 | [(set_attr "type" "ssemuladd") |
3697 | (set_attr "mode" "<MODE>")]) | |
3698 | ||
3699 | (define_insn "*fmai_fmsub_<mode>" | |
3f97cb0b | 3700 | [(set (match_operand:VF_128 0 "register_operand" "=v,v") |
2ddd46d6 IT |
3701 | (vec_merge:VF_128 |
3702 | (fma:VF_128 | |
06bc9e41 AI |
3703 | (match_operand:VF_128 1 "<round_nimm_predicate>" "0,0") |
3704 | (match_operand:VF_128 2 "<round_nimm_predicate>" "<round_constraint>,v") | |
2ddd46d6 | 3705 | (neg:VF_128 |
06bc9e41 | 3706 | (match_operand:VF_128 3 "<round_nimm_predicate>" " v,<round_constraint>"))) |
5f08ae1e | 3707 | (match_dup 1) |
2ddd46d6 | 3708 | (const_int 1)))] |
558d9f79 | 3709 | "TARGET_FMA || TARGET_AVX512F" |
2ddd46d6 | 3710 | "@ |
06bc9e41 AI |
3711 | vfmsub132<ssescalarmodesuffix>\t{<round_op4>%2, %3, %0|%0, %<iptr>3, %<iptr>2<round_op4>} |
3712 | vfmsub213<ssescalarmodesuffix>\t{<round_op4>%3, %2, %0|%0, %<iptr>2, %<iptr>3<round_op4>}" | |
2ddd46d6 IT |
3713 | [(set_attr "type" "ssemuladd") |
3714 | (set_attr "mode" "<MODE>")]) | |
3715 | ||
06bc9e41 | 3716 | (define_insn "*fmai_fnmadd_<mode><round_name>" |
3f97cb0b | 3717 | [(set (match_operand:VF_128 0 "register_operand" "=v,v") |
2ddd46d6 IT |
3718 | (vec_merge:VF_128 |
3719 | (fma:VF_128 | |
3720 | (neg:VF_128 | |
06bc9e41 AI |
3721 | (match_operand:VF_128 2 "<round_nimm_predicate>" "<round_constraint>,v")) |
3722 | (match_operand:VF_128 1 "<round_nimm_predicate>" "0,0") | |
3723 | (match_operand:VF_128 3 "<round_nimm_predicate>" "v,<round_constraint>")) | |
5f08ae1e | 3724 | (match_dup 1) |
2ddd46d6 | 3725 | (const_int 1)))] |
558d9f79 | 3726 | "TARGET_FMA || TARGET_AVX512F" |
2ddd46d6 | 3727 | "@ |
06bc9e41 AI |
3728 | vfnmadd132<ssescalarmodesuffix>\t{<round_op4>%2, %3, %0|%0, %<iptr>3, %<iptr>2<round_op4>} |
3729 | vfnmadd213<ssescalarmodesuffix>\t{<round_op4>%3, %2, %0|%0, %<iptr>2, %<iptr>3<round_op4>}" | |
2ddd46d6 IT |
3730 | [(set_attr "type" "ssemuladd") |
3731 | (set_attr "mode" "<MODE>")]) | |
3732 | ||
06bc9e41 | 3733 | (define_insn "*fmai_fnmsub_<mode><round_name>" |
3f97cb0b | 3734 | [(set (match_operand:VF_128 0 "register_operand" "=v,v") |
2ddd46d6 IT |
3735 | (vec_merge:VF_128 |
3736 | (fma:VF_128 | |
3737 | (neg:VF_128 | |
06bc9e41 AI |
3738 | (match_operand:VF_128 2 "<round_nimm_predicate>" "<round_constraint>, v")) |
3739 | (match_operand:VF_128 1 "<round_nimm_predicate>" " 0, 0") | |
2ddd46d6 | 3740 | (neg:VF_128 |
06bc9e41 | 3741 | (match_operand:VF_128 3 "<round_nimm_predicate>" " v,<round_constraint>"))) |
5f08ae1e | 3742 | (match_dup 1) |
2ddd46d6 | 3743 | (const_int 1)))] |
558d9f79 | 3744 | "TARGET_FMA || TARGET_AVX512F" |
2ddd46d6 | 3745 | "@ |
06bc9e41 AI |
3746 | vfnmsub132<ssescalarmodesuffix>\t{<round_op4>%2, %3, %0|%0, %<iptr>3, %<iptr>2<round_op4>} |
3747 | vfnmsub213<ssescalarmodesuffix>\t{<round_op4>%3, %2, %0|%0, %<iptr>2, %<iptr>3<round_op4>}" | |
2ddd46d6 IT |
3748 | [(set_attr "type" "ssemuladd") |
3749 | (set_attr "mode" "<MODE>")]) | |
3750 | ||
b0d5396c UB |
3751 | ;; FMA4 floating point scalar intrinsics. These write the |
3752 | ;; entire destination register, with the high-order elements zeroed. | |
3753 | ||
3754 | (define_expand "fma4i_vmfmadd_<mode>" | |
3755 | [(set (match_operand:VF_128 0 "register_operand") | |
3756 | (vec_merge:VF_128 | |
3757 | (fma:VF_128 | |
3758 | (match_operand:VF_128 1 "nonimmediate_operand") | |
3759 | (match_operand:VF_128 2 "nonimmediate_operand") | |
3760 | (match_operand:VF_128 3 "nonimmediate_operand")) | |
3761 | (match_dup 4) | |
3762 | (const_int 1)))] | |
3763 | "TARGET_FMA4" | |
3764 | "operands[4] = CONST0_RTX (<MODE>mode);") | |
3765 | ||
89509419 | 3766 | (define_insn "*fma4i_vmfmadd_<mode>" |
6bec6c98 UB |
3767 | [(set (match_operand:VF_128 0 "register_operand" "=x,x") |
3768 | (vec_merge:VF_128 | |
3769 | (fma:VF_128 | |
3770 | (match_operand:VF_128 1 "nonimmediate_operand" "%x,x") | |
3771 | (match_operand:VF_128 2 "nonimmediate_operand" " x,m") | |
3772 | (match_operand:VF_128 3 "nonimmediate_operand" "xm,x")) | |
82e86dc6 | 3773 | (match_operand:VF_128 4 "const0_operand") |
89509419 RH |
3774 | (const_int 1)))] |
3775 | "TARGET_FMA4" | |
eabb5f48 | 3776 | "vfmadd<ssescalarmodesuffix>\t{%3, %2, %1, %0|%0, %1, %<iptr>2, %<iptr>3}" |
cbf2e4d4 HJ |
3777 | [(set_attr "type" "ssemuladd") |
3778 | (set_attr "mode" "<MODE>")]) | |
3779 | ||
89509419 | 3780 | (define_insn "*fma4i_vmfmsub_<mode>" |
6bec6c98 UB |
3781 | [(set (match_operand:VF_128 0 "register_operand" "=x,x") |
3782 | (vec_merge:VF_128 | |
3783 | (fma:VF_128 | |
3784 | (match_operand:VF_128 1 "nonimmediate_operand" "%x,x") | |
3785 | (match_operand:VF_128 2 "nonimmediate_operand" " x,m") | |
3786 | (neg:VF_128 | |
3787 | (match_operand:VF_128 3 "nonimmediate_operand" "xm,x"))) | |
82e86dc6 | 3788 | (match_operand:VF_128 4 "const0_operand") |
89509419 RH |
3789 | (const_int 1)))] |
3790 | "TARGET_FMA4" | |
eabb5f48 | 3791 | "vfmsub<ssescalarmodesuffix>\t{%3, %2, %1, %0|%0, %1, %<iptr>2, %<iptr>3}" |
cbf2e4d4 HJ |
3792 | [(set_attr "type" "ssemuladd") |
3793 | (set_attr "mode" "<MODE>")]) | |
3794 | ||
89509419 | 3795 | (define_insn "*fma4i_vmfnmadd_<mode>" |
6bec6c98 UB |
3796 | [(set (match_operand:VF_128 0 "register_operand" "=x,x") |
3797 | (vec_merge:VF_128 | |
3798 | (fma:VF_128 | |
3799 | (neg:VF_128 | |
3800 | (match_operand:VF_128 1 "nonimmediate_operand" "%x,x")) | |
3801 | (match_operand:VF_128 2 "nonimmediate_operand" " x,m") | |
3802 | (match_operand:VF_128 3 "nonimmediate_operand" "xm,x")) | |
82e86dc6 | 3803 | (match_operand:VF_128 4 "const0_operand") |
89509419 RH |
3804 | (const_int 1)))] |
3805 | "TARGET_FMA4" | |
eabb5f48 | 3806 | "vfnmadd<ssescalarmodesuffix>\t{%3, %2, %1, %0|%0, %1, %<iptr>2, %<iptr>3}" |
cbf2e4d4 HJ |
3807 | [(set_attr "type" "ssemuladd") |
3808 | (set_attr "mode" "<MODE>")]) | |
3809 | ||
89509419 | 3810 | (define_insn "*fma4i_vmfnmsub_<mode>" |
6bec6c98 UB |
3811 | [(set (match_operand:VF_128 0 "register_operand" "=x,x") |
3812 | (vec_merge:VF_128 | |
3813 | (fma:VF_128 | |
3814 | (neg:VF_128 | |
3815 | (match_operand:VF_128 1 "nonimmediate_operand" "%x,x")) | |
3816 | (match_operand:VF_128 2 "nonimmediate_operand" " x,m") | |
3817 | (neg:VF_128 | |
3818 | (match_operand:VF_128 3 "nonimmediate_operand" "xm,x"))) | |
82e86dc6 | 3819 | (match_operand:VF_128 4 "const0_operand") |
89509419 | 3820 | (const_int 1)))] |
c71ad61e | 3821 | "TARGET_FMA4" |
eabb5f48 | 3822 | "vfnmsub<ssescalarmodesuffix>\t{%3, %2, %1, %0|%0, %1, %<iptr>2, %<iptr>3}" |
cbf2e4d4 HJ |
3823 | [(set_attr "type" "ssemuladd") |
3824 | (set_attr "mode" "<MODE>")]) | |
3825 | ||
351877cf | 3826 | ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; |
ef719a44 | 3827 | ;; |
d6023b50 UB |
3828 | ;; Parallel single-precision floating point conversion operations |
3829 | ;; | |
3830 | ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; | |
3831 | ||
3832 | (define_insn "sse_cvtpi2ps" | |
3833 | [(set (match_operand:V4SF 0 "register_operand" "=x") | |
3834 | (vec_merge:V4SF | |
3835 | (vec_duplicate:V4SF | |
3836 | (float:V2SF (match_operand:V2SI 2 "nonimmediate_operand" "ym"))) | |
3837 | (match_operand:V4SF 1 "register_operand" "0") | |
3838 | (const_int 3)))] | |
3839 | "TARGET_SSE" | |
3840 | "cvtpi2ps\t{%2, %0|%0, %2}" | |
3841 | [(set_attr "type" "ssecvt") | |
3842 | (set_attr "mode" "V4SF")]) | |
3843 | ||
3844 | (define_insn "sse_cvtps2pi" | |
3845 | [(set (match_operand:V2SI 0 "register_operand" "=y") | |
3846 | (vec_select:V2SI | |
3847 | (unspec:V4SI [(match_operand:V4SF 1 "nonimmediate_operand" "xm")] | |
3848 | UNSPEC_FIX_NOTRUNC) | |
3849 | (parallel [(const_int 0) (const_int 1)])))] | |
3850 | "TARGET_SSE" | |
eabb5f48 | 3851 | "cvtps2pi\t{%1, %0|%0, %q1}" |
d6023b50 UB |
3852 | [(set_attr "type" "ssecvt") |
3853 | (set_attr "unit" "mmx") | |
3854 | (set_attr "mode" "DI")]) | |
3855 | ||
3856 | (define_insn "sse_cvttps2pi" | |
3857 | [(set (match_operand:V2SI 0 "register_operand" "=y") | |
3858 | (vec_select:V2SI | |
3859 | (fix:V4SI (match_operand:V4SF 1 "nonimmediate_operand" "xm")) | |
3860 | (parallel [(const_int 0) (const_int 1)])))] | |
3861 | "TARGET_SSE" | |
eabb5f48 | 3862 | "cvttps2pi\t{%1, %0|%0, %q1}" |
d6023b50 UB |
3863 | [(set_attr "type" "ssecvt") |
3864 | (set_attr "unit" "mmx") | |
725fd454 | 3865 | (set_attr "prefix_rep" "0") |
d6023b50 UB |
3866 | (set_attr "mode" "SF")]) |
3867 | ||
06bc9e41 | 3868 | (define_insn "sse_cvtsi2ss<round_name>" |
3f97cb0b | 3869 | [(set (match_operand:V4SF 0 "register_operand" "=x,x,v") |
d6023b50 UB |
3870 | (vec_merge:V4SF |
3871 | (vec_duplicate:V4SF | |
06bc9e41 | 3872 | (float:SF (match_operand:SI 2 "<round_nimm_predicate>" "r,m,<round_constraint3>"))) |
3f97cb0b | 3873 | (match_operand:V4SF 1 "register_operand" "0,0,v") |
d6023b50 UB |
3874 | (const_int 1)))] |
3875 | "TARGET_SSE" | |
a95d4000 UB |
3876 | "@ |
3877 | cvtsi2ss\t{%2, %0|%0, %2} | |
3878 | cvtsi2ss\t{%2, %0|%0, %2} | |
06bc9e41 | 3879 | vcvtsi2ss\t{<round_op3>%2, %1, %0|%0, %1, %2<round_op3>}" |
a95d4000 UB |
3880 | [(set_attr "isa" "noavx,noavx,avx") |
3881 | (set_attr "type" "sseicvt") | |
3882 | (set_attr "athlon_decode" "vector,double,*") | |
3883 | (set_attr "amdfam10_decode" "vector,double,*") | |
3884 | (set_attr "bdver1_decode" "double,direct,*") | |
01284895 | 3885 | (set_attr "btver2_decode" "double,double,double") |
2b1ebb0c | 3886 | (set_attr "prefix" "orig,orig,maybe_evex") |
95879c72 L |
3887 | (set_attr "mode" "SF")]) |
3888 | ||
06bc9e41 | 3889 | (define_insn "sse_cvtsi2ssq<round_name>" |
3f97cb0b | 3890 | [(set (match_operand:V4SF 0 "register_operand" "=x,x,v") |
d6023b50 UB |
3891 | (vec_merge:V4SF |
3892 | (vec_duplicate:V4SF | |
06bc9e41 | 3893 | (float:SF (match_operand:DI 2 "<round_nimm_predicate>" "r,m,<round_constraint3>"))) |
3f97cb0b | 3894 | (match_operand:V4SF 1 "register_operand" "0,0,v") |
d6023b50 UB |
3895 | (const_int 1)))] |
3896 | "TARGET_SSE && TARGET_64BIT" | |
a95d4000 UB |
3897 | "@ |
3898 | cvtsi2ssq\t{%2, %0|%0, %2} | |
3899 | cvtsi2ssq\t{%2, %0|%0, %2} | |
06bc9e41 | 3900 | vcvtsi2ssq\t{<round_op3>%2, %1, %0|%0, %1, %2<round_op3>}" |
a95d4000 UB |
3901 | [(set_attr "isa" "noavx,noavx,avx") |
3902 | (set_attr "type" "sseicvt") | |
3903 | (set_attr "athlon_decode" "vector,double,*") | |
3904 | (set_attr "amdfam10_decode" "vector,double,*") | |
3905 | (set_attr "bdver1_decode" "double,direct,*") | |
01284895 | 3906 | (set_attr "btver2_decode" "double,double,double") |
a95d4000 UB |
3907 | (set_attr "length_vex" "*,*,4") |
3908 | (set_attr "prefix_rex" "1,1,*") | |
2b1ebb0c | 3909 | (set_attr "prefix" "orig,orig,maybe_evex") |
d6023b50 UB |
3910 | (set_attr "mode" "SF")]) |
3911 | ||
06bc9e41 | 3912 | (define_insn "sse_cvtss2si<round_name>" |
d6023b50 UB |
3913 | [(set (match_operand:SI 0 "register_operand" "=r,r") |
3914 | (unspec:SI | |
3915 | [(vec_select:SF | |
06bc9e41 | 3916 | (match_operand:V4SF 1 "<round_nimm_predicate>" "v,<round_constraint2>") |
d6023b50 UB |
3917 | (parallel [(const_int 0)]))] |
3918 | UNSPEC_FIX_NOTRUNC))] | |
3919 | "TARGET_SSE" | |
06bc9e41 | 3920 | "%vcvtss2si\t{<round_op2>%1, %0|%0, %k1<round_op2>}" |
d6023b50 UB |
3921 | [(set_attr "type" "sseicvt") |
3922 | (set_attr "athlon_decode" "double,vector") | |
6a08ffca | 3923 | (set_attr "bdver1_decode" "double,double") |
d6023b50 | 3924 | (set_attr "prefix_rep" "1") |
95879c72 | 3925 | (set_attr "prefix" "maybe_vex") |
d6023b50 UB |
3926 | (set_attr "mode" "SI")]) |
3927 | ||
3928 | (define_insn "sse_cvtss2si_2" | |
3929 | [(set (match_operand:SI 0 "register_operand" "=r,r") | |
3f97cb0b | 3930 | (unspec:SI [(match_operand:SF 1 "nonimmediate_operand" "v,m")] |
d6023b50 UB |
3931 | UNSPEC_FIX_NOTRUNC))] |
3932 | "TARGET_SSE" | |
eabb5f48 | 3933 | "%vcvtss2si\t{%1, %0|%0, %k1}" |
d6023b50 UB |
3934 | [(set_attr "type" "sseicvt") |
3935 | (set_attr "athlon_decode" "double,vector") | |
3936 | (set_attr "amdfam10_decode" "double,double") | |
6a08ffca | 3937 | (set_attr "bdver1_decode" "double,double") |
d6023b50 | 3938 | (set_attr "prefix_rep" "1") |
95879c72 | 3939 | (set_attr "prefix" "maybe_vex") |
d6023b50 UB |
3940 | (set_attr "mode" "SI")]) |
3941 | ||
06bc9e41 | 3942 | (define_insn "sse_cvtss2siq<round_name>" |
d6023b50 UB |
3943 | [(set (match_operand:DI 0 "register_operand" "=r,r") |
3944 | (unspec:DI | |
3945 | [(vec_select:SF | |
06bc9e41 | 3946 | (match_operand:V4SF 1 "<round_nimm_predicate>" "v,<round_constraint2>") |
d6023b50 UB |
3947 | (parallel [(const_int 0)]))] |
3948 | UNSPEC_FIX_NOTRUNC))] | |
3949 | "TARGET_SSE && TARGET_64BIT" | |
06bc9e41 | 3950 | "%vcvtss2si{q}\t{<round_op2>%1, %0|%0, %k1<round_op2>}" |
d6023b50 UB |
3951 | [(set_attr "type" "sseicvt") |
3952 | (set_attr "athlon_decode" "double,vector") | |
6a08ffca | 3953 | (set_attr "bdver1_decode" "double,double") |
d6023b50 | 3954 | (set_attr "prefix_rep" "1") |
95879c72 | 3955 | (set_attr "prefix" "maybe_vex") |
d6023b50 UB |
3956 | (set_attr "mode" "DI")]) |
3957 | ||
3958 | (define_insn "sse_cvtss2siq_2" | |
3959 | [(set (match_operand:DI 0 "register_operand" "=r,r") | |
2b1ebb0c | 3960 | (unspec:DI [(match_operand:SF 1 "nonimmediate_operand" "v,m")] |
d6023b50 UB |
3961 | UNSPEC_FIX_NOTRUNC))] |
3962 | "TARGET_SSE && TARGET_64BIT" | |
eabb5f48 | 3963 | "%vcvtss2si{q}\t{%1, %0|%0, %k1}" |
d6023b50 UB |
3964 | [(set_attr "type" "sseicvt") |
3965 | (set_attr "athlon_decode" "double,vector") | |
3966 | (set_attr "amdfam10_decode" "double,double") | |
6a08ffca | 3967 | (set_attr "bdver1_decode" "double,double") |
d6023b50 | 3968 | (set_attr "prefix_rep" "1") |
95879c72 | 3969 | (set_attr "prefix" "maybe_vex") |
d6023b50 UB |
3970 | (set_attr "mode" "DI")]) |
3971 | ||
8a6ef760 | 3972 | (define_insn "sse_cvttss2si<round_saeonly_name>" |
d6023b50 UB |
3973 | [(set (match_operand:SI 0 "register_operand" "=r,r") |
3974 | (fix:SI | |
3975 | (vec_select:SF | |
8a6ef760 | 3976 | (match_operand:V4SF 1 "<round_saeonly_nimm_predicate>" "v,<round_saeonly_constraint2>") |
d6023b50 UB |
3977 | (parallel [(const_int 0)]))))] |
3978 | "TARGET_SSE" | |
8a6ef760 | 3979 | "%vcvttss2si\t{<round_saeonly_op2>%1, %0|%0, %k1<round_saeonly_op2>}" |
d6023b50 UB |
3980 | [(set_attr "type" "sseicvt") |
3981 | (set_attr "athlon_decode" "double,vector") | |
3982 | (set_attr "amdfam10_decode" "double,double") | |
6a08ffca | 3983 | (set_attr "bdver1_decode" "double,double") |
d6023b50 | 3984 | (set_attr "prefix_rep" "1") |
95879c72 | 3985 | (set_attr "prefix" "maybe_vex") |
d6023b50 UB |
3986 | (set_attr "mode" "SI")]) |
3987 | ||
8a6ef760 | 3988 | (define_insn "sse_cvttss2siq<round_saeonly_name>" |
d6023b50 UB |
3989 | [(set (match_operand:DI 0 "register_operand" "=r,r") |
3990 | (fix:DI | |
3991 | (vec_select:SF | |
8a6ef760 | 3992 | (match_operand:V4SF 1 "<round_saeonly_nimm_predicate>" "v,<round_saeonly_constraint>") |
d6023b50 UB |
3993 | (parallel [(const_int 0)]))))] |
3994 | "TARGET_SSE && TARGET_64BIT" | |
8a6ef760 | 3995 | "%vcvttss2si{q}\t{<round_saeonly_op2>%1, %0|%0, %k1<round_saeonly_op2>}" |
d6023b50 UB |
3996 | [(set_attr "type" "sseicvt") |
3997 | (set_attr "athlon_decode" "double,vector") | |
3998 | (set_attr "amdfam10_decode" "double,double") | |
6a08ffca | 3999 | (set_attr "bdver1_decode" "double,double") |
d6023b50 | 4000 | (set_attr "prefix_rep" "1") |
95879c72 | 4001 | (set_attr "prefix" "maybe_vex") |
d6023b50 UB |
4002 | (set_attr "mode" "DI")]) |
4003 | ||
06bc9e41 | 4004 | (define_insn "cvtusi2<ssescalarmodesuffix>32<round_name>" |
c003c6d6 AI |
4005 | [(set (match_operand:VF_128 0 "register_operand" "=v") |
4006 | (vec_merge:VF_128 | |
4007 | (vec_duplicate:VF_128 | |
4008 | (unsigned_float:<ssescalarmode> | |
06bc9e41 | 4009 | (match_operand:SI 2 "<round_nimm_predicate>" "<round_constraint3>"))) |
c003c6d6 AI |
4010 | (match_operand:VF_128 1 "register_operand" "v") |
4011 | (const_int 1)))] | |
06bc9e41 AI |
4012 | "TARGET_AVX512F && <round_modev4sf_condition>" |
4013 | "vcvtusi2<ssescalarmodesuffix>\t{<round_op3>%2, %1, %0|%0, %1, %2<round_op3>}" | |
c003c6d6 AI |
4014 | [(set_attr "type" "sseicvt") |
4015 | (set_attr "prefix" "evex") | |
4016 | (set_attr "mode" "<ssescalarmode>")]) | |
4017 | ||
06bc9e41 | 4018 | (define_insn "cvtusi2<ssescalarmodesuffix>64<round_name>" |
c003c6d6 AI |
4019 | [(set (match_operand:VF_128 0 "register_operand" "=v") |
4020 | (vec_merge:VF_128 | |
4021 | (vec_duplicate:VF_128 | |
4022 | (unsigned_float:<ssescalarmode> | |
06bc9e41 | 4023 | (match_operand:DI 2 "<round_nimm_predicate>" "<round_constraint3>"))) |
c003c6d6 AI |
4024 | (match_operand:VF_128 1 "register_operand" "v") |
4025 | (const_int 1)))] | |
4026 | "TARGET_AVX512F && TARGET_64BIT" | |
06bc9e41 | 4027 | "vcvtusi2<ssescalarmodesuffix>\t{<round_op3>%2, %1, %0|%0, %1, %2<round_op3>}" |
c003c6d6 AI |
4028 | [(set_attr "type" "sseicvt") |
4029 | (set_attr "prefix" "evex") | |
4030 | (set_attr "mode" "<ssescalarmode>")]) | |
4031 | ||
06bc9e41 | 4032 | (define_insn "float<sseintvecmodelower><mode>2<mask_name><round_name>" |
3f97cb0b | 4033 | [(set (match_operand:VF1 0 "register_operand" "=v") |
406d683e | 4034 | (float:VF1 |
06bc9e41 AI |
4035 | (match_operand:<sseintvecmode> 1 "<round_nimm_predicate>" "<round_constraint>")))] |
4036 | "TARGET_SSE2 && <mask_mode512bit_condition> && <round_mode512bit_condition>" | |
4037 | "%vcvtdq2ps\t{<round_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_mask_op2>}" | |
d6023b50 | 4038 | [(set_attr "type" "ssecvt") |
a95d4000 | 4039 | (set_attr "prefix" "maybe_vex") |
406d683e | 4040 | (set_attr "mode" "<sseinsnmode>")]) |
d6023b50 | 4041 | |
4769c826 AI |
4042 | (define_insn "ufloat<sseintvecmodelower><mode>2<mask_name><round_name>" |
4043 | [(set (match_operand:VF1_AVX512VL 0 "register_operand" "=v") | |
4044 | (unsigned_float:VF1_AVX512VL | |
4045 | (match_operand:<sseintvecmode> 1 "nonimmediate_operand" "<round_constraint>")))] | |
c003c6d6 | 4046 | "TARGET_AVX512F" |
06bc9e41 | 4047 | "vcvtudq2ps\t{<round_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_mask_op2>}" |
c003c6d6 AI |
4048 | [(set_attr "type" "ssecvt") |
4049 | (set_attr "prefix" "evex") | |
4769c826 | 4050 | (set_attr "mode" "<MODE>")]) |
c003c6d6 | 4051 | |
406d683e | 4052 | (define_expand "floatuns<sseintvecmodelower><mode>2" |
82e86dc6 UB |
4053 | [(match_operand:VF1 0 "register_operand") |
4054 | (match_operand:<sseintvecmode> 1 "register_operand")] | |
635c6321 | 4055 | "TARGET_SSE2 && (<MODE>mode == V4SFmode || TARGET_AVX2)" |
54e86f6b | 4056 | { |
d3c2fee0 AI |
4057 | if (<MODE>mode == V16SFmode) |
4058 | emit_insn (gen_ufloatv16siv16sf2 (operands[0], operands[1])); | |
2fa9ee8f AI |
4059 | else |
4060 | if (TARGET_AVX512VL) | |
4061 | { | |
4062 | if (<MODE>mode == V4SFmode) | |
4063 | emit_insn (gen_ufloatv4siv4sf2 (operands[0], operands[1])); | |
4064 | else | |
4065 | emit_insn (gen_ufloatv8siv8sf2 (operands[0], operands[1])); | |
4066 | } | |
d3c2fee0 AI |
4067 | else |
4068 | ix86_expand_vector_convert_uns_vsivsf (operands[0], operands[1]); | |
4069 | ||
635c6321 | 4070 | DONE; |
54e86f6b UB |
4071 | }) |
4072 | ||
95879c72 | 4073 | |
50e60d7d AI |
4074 | ;; For <sse2_avx_avx512f>_fix_notrunc<sf2simodelower><mode> insn pattern |
4075 | (define_mode_attr sf2simodelower | |
4076 | [(V16SI "v16sf") (V8SI "v8sf") (V4SI "v4sf")]) | |
4077 | ||
415ebad0 | 4078 | (define_insn "<sse2_avx_avx512f>_fix_notrunc<sf2simodelower><mode><mask_name>" |
50e60d7d AI |
4079 | [(set (match_operand:VI4_AVX 0 "register_operand" "=v") |
4080 | (unspec:VI4_AVX | |
4081 | [(match_operand:<ssePSmode> 1 "nonimmediate_operand" "vm")] | |
4082 | UNSPEC_FIX_NOTRUNC))] | |
415ebad0 AI |
4083 | "TARGET_SSE2 && <mask_mode512bit_condition>" |
4084 | "%vcvtps2dq\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}" | |
d6023b50 | 4085 | [(set_attr "type" "ssecvt") |
a95d4000 UB |
4086 | (set (attr "prefix_data16") |
4087 | (if_then_else | |
67b2c493 | 4088 | (match_test "TARGET_AVX") |
a95d4000 UB |
4089 | (const_string "*") |
4090 | (const_string "1"))) | |
4091 | (set_attr "prefix" "maybe_vex") | |
50e60d7d | 4092 | (set_attr "mode" "<sseinsnmode>")]) |
d6023b50 | 4093 | |
06bc9e41 | 4094 | (define_insn "<mask_codefor>avx512f_fix_notruncv16sfv16si<mask_name><round_name>" |
c003c6d6 AI |
4095 | [(set (match_operand:V16SI 0 "register_operand" "=v") |
4096 | (unspec:V16SI | |
06bc9e41 | 4097 | [(match_operand:V16SF 1 "<round_nimm_predicate>" "<round_constraint>")] |
c003c6d6 AI |
4098 | UNSPEC_FIX_NOTRUNC))] |
4099 | "TARGET_AVX512F" | |
06bc9e41 | 4100 | "vcvtps2dq\t{<round_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_mask_op2>}" |
c003c6d6 AI |
4101 | [(set_attr "type" "ssecvt") |
4102 | (set_attr "prefix" "evex") | |
4103 | (set_attr "mode" "XI")]) | |
4104 | ||
21c924ac AI |
4105 | (define_insn "<mask_codefor><avx512>_ufix_notrunc<sf2simodelower><mode><mask_name><round_name>" |
4106 | [(set (match_operand:VI4_AVX512VL 0 "register_operand" "=v") | |
4107 | (unspec:VI4_AVX512VL | |
4108 | [(match_operand:<ssePSmode> 1 "nonimmediate_operand" "<round_constraint>")] | |
c003c6d6 AI |
4109 | UNSPEC_UNSIGNED_FIX_NOTRUNC))] |
4110 | "TARGET_AVX512F" | |
06bc9e41 | 4111 | "vcvtps2udq\t{<round_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_mask_op2>}" |
c003c6d6 AI |
4112 | [(set_attr "type" "ssecvt") |
4113 | (set_attr "prefix" "evex") | |
21c924ac | 4114 | (set_attr "mode" "<sseinsnmode>")]) |
c003c6d6 | 4115 | |
dc3b8d27 AI |
4116 | (define_insn "<mask_codefor>avx512dq_cvtps2qq<mode><mask_name><round_name>" |
4117 | [(set (match_operand:VI8_256_512 0 "register_operand" "=v") | |
4118 | (unspec:VI8_256_512 [(match_operand:<ssePSmode2> 1 "nonimmediate_operand" "<round_constraint>")] | |
4119 | UNSPEC_FIX_NOTRUNC))] | |
4120 | "TARGET_AVX512DQ && <round_mode512bit_condition>" | |
4121 | "vcvtps2qq\t{<round_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_mask_op2>}" | |
4122 | [(set_attr "type" "ssecvt") | |
4123 | (set_attr "prefix" "evex") | |
4124 | (set_attr "mode" "<sseinsnmode>")]) | |
4125 | ||
4126 | (define_insn "<mask_codefor>avx512dq_cvtps2qqv2di<mask_name>" | |
4127 | [(set (match_operand:V2DI 0 "register_operand" "=v") | |
4128 | (unspec:V2DI | |
4129 | [(vec_select:V2SF | |
4130 | (match_operand:V4SF 1 "nonimmediate_operand" "vm") | |
4131 | (parallel [(const_int 0) (const_int 1)]))] | |
4132 | UNSPEC_FIX_NOTRUNC))] | |
4133 | "TARGET_AVX512DQ && TARGET_AVX512VL" | |
4134 | "vcvtps2qq\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}" | |
4135 | [(set_attr "type" "ssecvt") | |
4136 | (set_attr "prefix" "evex") | |
4137 | (set_attr "mode" "TI")]) | |
4138 | ||
4139 | (define_insn "<mask_codefor>avx512dq_cvtps2uqq<mode><mask_name><round_name>" | |
4140 | [(set (match_operand:VI8_256_512 0 "register_operand" "=v") | |
4141 | (unspec:VI8_256_512 [(match_operand:<ssePSmode2> 1 "nonimmediate_operand" "<round_constraint>")] | |
4142 | UNSPEC_UNSIGNED_FIX_NOTRUNC))] | |
4143 | "TARGET_AVX512DQ && <round_mode512bit_condition>" | |
4144 | "vcvtps2uqq\t{<round_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_mask_op2>}" | |
4145 | [(set_attr "type" "ssecvt") | |
4146 | (set_attr "prefix" "evex") | |
4147 | (set_attr "mode" "<sseinsnmode>")]) | |
4148 | ||
4149 | (define_insn "<mask_codefor>avx512dq_cvtps2uqqv2di<mask_name>" | |
4150 | [(set (match_operand:V2DI 0 "register_operand" "=v") | |
4151 | (unspec:V2DI | |
4152 | [(vec_select:V2SF | |
4153 | (match_operand:V4SF 1 "nonimmediate_operand" "vm") | |
4154 | (parallel [(const_int 0) (const_int 1)]))] | |
4155 | UNSPEC_UNSIGNED_FIX_NOTRUNC))] | |
4156 | "TARGET_AVX512DQ && TARGET_AVX512VL" | |
4157 | "vcvtps2uqq\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}" | |
4158 | [(set_attr "type" "ssecvt") | |
4159 | (set_attr "prefix" "evex") | |
4160 | (set_attr "mode" "TI")]) | |
4161 | ||
8a6ef760 | 4162 | (define_insn "<fixsuffix>fix_truncv16sfv16si2<mask_name><round_saeonly_name>" |
a9ccbba2 AI |
4163 | [(set (match_operand:V16SI 0 "register_operand" "=v") |
4164 | (any_fix:V16SI | |
8a6ef760 | 4165 | (match_operand:V16SF 1 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")))] |
a9ccbba2 | 4166 | "TARGET_AVX512F" |
8a6ef760 | 4167 | "vcvttps2<fixsuffix>dq\t{<round_saeonly_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_saeonly_mask_op2>}" |
a9ccbba2 AI |
4168 | [(set_attr "type" "ssecvt") |
4169 | (set_attr "prefix" "evex") | |
4170 | (set_attr "mode" "XI")]) | |
4171 | ||
415ebad0 AI |
4172 | (define_insn "fix_truncv8sfv8si2<mask_name>" |
4173 | [(set (match_operand:V8SI 0 "register_operand" "=v") | |
4174 | (fix:V8SI (match_operand:V8SF 1 "nonimmediate_operand" "vm")))] | |
4175 | "TARGET_AVX && <mask_avx512vl_condition>" | |
4176 | "vcvttps2dq\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}" | |
95879c72 | 4177 | [(set_attr "type" "ssecvt") |
415ebad0 | 4178 | (set_attr "prefix" "<mask_prefix>") |
a95d4000 | 4179 | (set_attr "mode" "OI")]) |
95879c72 | 4180 | |
415ebad0 AI |
4181 | (define_insn "fix_truncv4sfv4si2<mask_name>" |
4182 | [(set (match_operand:V4SI 0 "register_operand" "=v") | |
4183 | (fix:V4SI (match_operand:V4SF 1 "nonimmediate_operand" "vm")))] | |
4184 | "TARGET_SSE2 && <mask_avx512vl_condition>" | |
4185 | "%vcvttps2dq\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}" | |
d6023b50 | 4186 | [(set_attr "type" "ssecvt") |
a95d4000 UB |
4187 | (set (attr "prefix_rep") |
4188 | (if_then_else | |
67b2c493 | 4189 | (match_test "TARGET_AVX") |
a95d4000 UB |
4190 | (const_string "*") |
4191 | (const_string "1"))) | |
4192 | (set (attr "prefix_data16") | |
4193 | (if_then_else | |
67b2c493 | 4194 | (match_test "TARGET_AVX") |
a95d4000 UB |
4195 | (const_string "*") |
4196 | (const_string "0"))) | |
725fd454 | 4197 | (set_attr "prefix_data16" "0") |
415ebad0 | 4198 | (set_attr "prefix" "<mask_prefix2>") |
d6023b50 UB |
4199 | (set_attr "mode" "TI")]) |
4200 | ||
03e0010d | 4201 | (define_expand "fixuns_trunc<mode><sseintvecmodelower>2" |
82e86dc6 UB |
4202 | [(match_operand:<sseintvecmode> 0 "register_operand") |
4203 | (match_operand:VF1 1 "register_operand")] | |
2f2da9e9 | 4204 | "TARGET_SSE2" |
03e0010d | 4205 | { |
d3c2fee0 AI |
4206 | if (<MODE>mode == V16SFmode) |
4207 | emit_insn (gen_ufix_truncv16sfv16si2 (operands[0], | |
4208 | operands[1])); | |
4209 | else | |
4210 | { | |
4211 | rtx tmp[3]; | |
4212 | tmp[0] = ix86_expand_adjust_ufix_to_sfix_si (operands[1], &tmp[2]); | |
4213 | tmp[1] = gen_reg_rtx (<sseintvecmode>mode); | |
4214 | emit_insn (gen_fix_trunc<mode><sseintvecmodelower>2 (tmp[1], tmp[0])); | |
4215 | emit_insn (gen_xor<sseintvecmodelower>3 (operands[0], tmp[1], tmp[2])); | |
4216 | } | |
6bf39801 | 4217 | DONE; |
03e0010d JJ |
4218 | }) |
4219 | ||
d6023b50 UB |
4220 | ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; |
4221 | ;; | |
4222 | ;; Parallel double-precision floating point conversion operations | |
ef719a44 RH |
4223 | ;; |
4224 | ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; | |
4225 | ||
d6023b50 UB |
4226 | (define_insn "sse2_cvtpi2pd" |
4227 | [(set (match_operand:V2DF 0 "register_operand" "=x,x") | |
4228 | (float:V2DF (match_operand:V2SI 1 "nonimmediate_operand" "y,m")))] | |
ef719a44 | 4229 | "TARGET_SSE2" |
d6023b50 UB |
4230 | "cvtpi2pd\t{%1, %0|%0, %1}" |
4231 | [(set_attr "type" "ssecvt") | |
4232 | (set_attr "unit" "mmx,*") | |
725fd454 | 4233 | (set_attr "prefix_data16" "1,*") |
d6023b50 | 4234 | (set_attr "mode" "V2DF")]) |
ef719a44 | 4235 | |
d6023b50 UB |
4236 | (define_insn "sse2_cvtpd2pi" |
4237 | [(set (match_operand:V2SI 0 "register_operand" "=y") | |
4238 | (unspec:V2SI [(match_operand:V2DF 1 "nonimmediate_operand" "xm")] | |
4239 | UNSPEC_FIX_NOTRUNC))] | |
ef719a44 | 4240 | "TARGET_SSE2" |
d6023b50 UB |
4241 | "cvtpd2pi\t{%1, %0|%0, %1}" |
4242 | [(set_attr "type" "ssecvt") | |
4243 | (set_attr "unit" "mmx") | |
a95d4000 | 4244 | (set_attr "bdver1_decode" "double") |
01284895 | 4245 | (set_attr "btver2_decode" "direct") |
d6023b50 | 4246 | (set_attr "prefix_data16" "1") |
a95d4000 | 4247 | (set_attr "mode" "DI")]) |
ef719a44 | 4248 | |
d6023b50 UB |
4249 | (define_insn "sse2_cvttpd2pi" |
4250 | [(set (match_operand:V2SI 0 "register_operand" "=y") | |
4251 | (fix:V2SI (match_operand:V2DF 1 "nonimmediate_operand" "xm")))] | |
ef719a44 | 4252 | "TARGET_SSE2" |
d6023b50 UB |
4253 | "cvttpd2pi\t{%1, %0|%0, %1}" |
4254 | [(set_attr "type" "ssecvt") | |
4255 | (set_attr "unit" "mmx") | |
a95d4000 | 4256 | (set_attr "bdver1_decode" "double") |
d6023b50 | 4257 | (set_attr "prefix_data16" "1") |
a95d4000 | 4258 | (set_attr "mode" "TI")]) |
95879c72 | 4259 | |
d6023b50 | 4260 | (define_insn "sse2_cvtsi2sd" |
460f31ee | 4261 | [(set (match_operand:V2DF 0 "register_operand" "=x,x,v") |
ef719a44 | 4262 | (vec_merge:V2DF |
d6023b50 | 4263 | (vec_duplicate:V2DF |
a95d4000 | 4264 | (float:DF (match_operand:SI 2 "nonimmediate_operand" "r,m,rm"))) |
460f31ee | 4265 | (match_operand:V2DF 1 "register_operand" "0,0,v") |
ef719a44 | 4266 | (const_int 1)))] |
ef719a44 | 4267 | "TARGET_SSE2" |
a95d4000 UB |
4268 | "@ |
4269 | cvtsi2sd\t{%2, %0|%0, %2} | |
4270 | cvtsi2sd\t{%2, %0|%0, %2} | |
4271 | vcvtsi2sd\t{%2, %1, %0|%0, %1, %2}" | |
4272 | [(set_attr "isa" "noavx,noavx,avx") | |
4273 | (set_attr "type" "sseicvt") | |
4274 | (set_attr "athlon_decode" "double,direct,*") | |
4275 | (set_attr "amdfam10_decode" "vector,double,*") | |
4276 | (set_attr "bdver1_decode" "double,direct,*") | |
01284895 | 4277 | (set_attr "btver2_decode" "double,double,double") |
460f31ee | 4278 | (set_attr "prefix" "orig,orig,maybe_evex") |
95879c72 L |
4279 | (set_attr "mode" "DF")]) |
4280 | ||
06bc9e41 | 4281 | (define_insn "sse2_cvtsi2sdq<round_name>" |
2b1ebb0c | 4282 | [(set (match_operand:V2DF 0 "register_operand" "=x,x,v") |
ef719a44 | 4283 | (vec_merge:V2DF |
d6023b50 | 4284 | (vec_duplicate:V2DF |
06bc9e41 | 4285 | (float:DF (match_operand:DI 2 "<round_nimm_predicate>" "r,m,<round_constraint3>"))) |
2b1ebb0c | 4286 | (match_operand:V2DF 1 "register_operand" "0,0,v") |
ef719a44 | 4287 | (const_int 1)))] |
d6023b50 | 4288 | "TARGET_SSE2 && TARGET_64BIT" |
a95d4000 UB |
4289 | "@ |
4290 | cvtsi2sdq\t{%2, %0|%0, %2} | |
4291 | cvtsi2sdq\t{%2, %0|%0, %2} | |
06bc9e41 | 4292 | vcvtsi2sdq\t{<round_op3>%2, %1, %0|%0, %1, %2<round_op3>}" |
a95d4000 UB |
4293 | [(set_attr "isa" "noavx,noavx,avx") |
4294 | (set_attr "type" "sseicvt") | |
4295 | (set_attr "athlon_decode" "double,direct,*") | |
4296 | (set_attr "amdfam10_decode" "vector,double,*") | |
4297 | (set_attr "bdver1_decode" "double,direct,*") | |
4298 | (set_attr "length_vex" "*,*,4") | |
4299 | (set_attr "prefix_rex" "1,1,*") | |
2b1ebb0c | 4300 | (set_attr "prefix" "orig,orig,maybe_evex") |
a95d4000 | 4301 | (set_attr "mode" "DF")]) |
d6023b50 | 4302 | |
06bc9e41 | 4303 | (define_insn "avx512f_vcvtss2usi<round_name>" |
c003c6d6 AI |
4304 | [(set (match_operand:SI 0 "register_operand" "=r") |
4305 | (unspec:SI | |
4306 | [(vec_select:SF | |
06bc9e41 | 4307 | (match_operand:V4SF 1 "<round_nimm_predicate>" "<round_constraint>") |
c003c6d6 AI |
4308 | (parallel [(const_int 0)]))] |
4309 | UNSPEC_UNSIGNED_FIX_NOTRUNC))] | |
4310 | "TARGET_AVX512F" | |
06bc9e41 | 4311 | "vcvtss2usi\t{<round_op2>%1, %0|%0, %1<round_op2>}" |
c003c6d6 AI |
4312 | [(set_attr "type" "sseicvt") |
4313 | (set_attr "prefix" "evex") | |
4314 | (set_attr "mode" "SI")]) | |
4315 | ||
06bc9e41 | 4316 | (define_insn "avx512f_vcvtss2usiq<round_name>" |
c003c6d6 AI |
4317 | [(set (match_operand:DI 0 "register_operand" "=r") |
4318 | (unspec:DI | |
4319 | [(vec_select:SF | |
06bc9e41 | 4320 | (match_operand:V4SF 1 "<round_nimm_predicate>" "<round_constraint>") |
c003c6d6 AI |
4321 | (parallel [(const_int 0)]))] |
4322 | UNSPEC_UNSIGNED_FIX_NOTRUNC))] | |
4323 | "TARGET_AVX512F && TARGET_64BIT" | |
06bc9e41 | 4324 | "vcvtss2usi\t{<round_op2>%1, %0|%0, %1<round_op2>}" |
c003c6d6 AI |
4325 | [(set_attr "type" "sseicvt") |
4326 | (set_attr "prefix" "evex") | |
4327 | (set_attr "mode" "DI")]) | |
4328 | ||
8a6ef760 | 4329 | (define_insn "avx512f_vcvttss2usi<round_saeonly_name>" |
c003c6d6 AI |
4330 | [(set (match_operand:SI 0 "register_operand" "=r") |
4331 | (unsigned_fix:SI | |
4332 | (vec_select:SF | |
8a6ef760 | 4333 | (match_operand:V4SF 1 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>") |
c003c6d6 AI |
4334 | (parallel [(const_int 0)]))))] |
4335 | "TARGET_AVX512F" | |
8a6ef760 | 4336 | "vcvttss2usi\t{<round_saeonly_op2>%1, %0|%0, %1<round_saeonly_op2>}" |
c003c6d6 AI |
4337 | [(set_attr "type" "sseicvt") |
4338 | (set_attr "prefix" "evex") | |
4339 | (set_attr "mode" "SI")]) | |
4340 | ||
8a6ef760 | 4341 | (define_insn "avx512f_vcvttss2usiq<round_saeonly_name>" |
c003c6d6 AI |
4342 | [(set (match_operand:DI 0 "register_operand" "=r") |
4343 | (unsigned_fix:DI | |
4344 | (vec_select:SF | |
8a6ef760 | 4345 | (match_operand:V4SF 1 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>") |
c003c6d6 AI |
4346 | (parallel [(const_int 0)]))))] |
4347 | "TARGET_AVX512F && TARGET_64BIT" | |
8a6ef760 | 4348 | "vcvttss2usi\t{<round_saeonly_op2>%1, %0|%0, %1<round_saeonly_op2>}" |
c003c6d6 AI |
4349 | [(set_attr "type" "sseicvt") |
4350 | (set_attr "prefix" "evex") | |
4351 | (set_attr "mode" "DI")]) | |
4352 | ||
06bc9e41 | 4353 | (define_insn "avx512f_vcvtsd2usi<round_name>" |
c003c6d6 AI |
4354 | [(set (match_operand:SI 0 "register_operand" "=r") |
4355 | (unspec:SI | |
4356 | [(vec_select:DF | |
06bc9e41 | 4357 | (match_operand:V2DF 1 "<round_nimm_predicate>" "<round_constraint>") |
c003c6d6 AI |
4358 | (parallel [(const_int 0)]))] |
4359 | UNSPEC_UNSIGNED_FIX_NOTRUNC))] | |
4360 | "TARGET_AVX512F" | |
06bc9e41 | 4361 | "vcvtsd2usi\t{<round_op2>%1, %0|%0, %1<round_op2>}" |
c003c6d6 AI |
4362 | [(set_attr "type" "sseicvt") |
4363 | (set_attr "prefix" "evex") | |
4364 | (set_attr "mode" "SI")]) | |
4365 | ||
06bc9e41 | 4366 | (define_insn "avx512f_vcvtsd2usiq<round_name>" |
c003c6d6 AI |
4367 | [(set (match_operand:DI 0 "register_operand" "=r") |
4368 | (unspec:DI | |
4369 | [(vec_select:DF | |
06bc9e41 | 4370 | (match_operand:V2DF 1 "<round_nimm_predicate>" "<round_constraint>") |
c003c6d6 AI |
4371 | (parallel [(const_int 0)]))] |
4372 | UNSPEC_UNSIGNED_FIX_NOTRUNC))] | |
4373 | "TARGET_AVX512F && TARGET_64BIT" | |
06bc9e41 | 4374 | "vcvtsd2usi\t{<round_op2>%1, %0|%0, %1<round_op2>}" |
c003c6d6 AI |
4375 | [(set_attr "type" "sseicvt") |
4376 | (set_attr "prefix" "evex") | |
4377 | (set_attr "mode" "DI")]) | |
4378 | ||
8a6ef760 | 4379 | (define_insn "avx512f_vcvttsd2usi<round_saeonly_name>" |
c003c6d6 AI |
4380 | [(set (match_operand:SI 0 "register_operand" "=r") |
4381 | (unsigned_fix:SI | |
4382 | (vec_select:DF | |
8a6ef760 | 4383 | (match_operand:V2DF 1 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>") |
c003c6d6 AI |
4384 | (parallel [(const_int 0)]))))] |
4385 | "TARGET_AVX512F" | |
8a6ef760 | 4386 | "vcvttsd2usi\t{<round_saeonly_op2>%1, %0|%0, %1<round_saeonly_op2>}" |
c003c6d6 AI |
4387 | [(set_attr "type" "sseicvt") |
4388 | (set_attr "prefix" "evex") | |
4389 | (set_attr "mode" "SI")]) | |
4390 | ||
8a6ef760 | 4391 | (define_insn "avx512f_vcvttsd2usiq<round_saeonly_name>" |
c003c6d6 AI |
4392 | [(set (match_operand:DI 0 "register_operand" "=r") |
4393 | (unsigned_fix:DI | |
4394 | (vec_select:DF | |
8a6ef760 | 4395 | (match_operand:V2DF 1 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>") |
c003c6d6 AI |
4396 | (parallel [(const_int 0)]))))] |
4397 | "TARGET_AVX512F && TARGET_64BIT" | |
8a6ef760 | 4398 | "vcvttsd2usi\t{<round_saeonly_op2>%1, %0|%0, %1<round_saeonly_op2>}" |
c003c6d6 AI |
4399 | [(set_attr "type" "sseicvt") |
4400 | (set_attr "prefix" "evex") | |
4401 | (set_attr "mode" "DI")]) | |
4402 | ||
06bc9e41 | 4403 | (define_insn "sse2_cvtsd2si<round_name>" |
d6023b50 UB |
4404 | [(set (match_operand:SI 0 "register_operand" "=r,r") |
4405 | (unspec:SI | |
4406 | [(vec_select:DF | |
06bc9e41 | 4407 | (match_operand:V2DF 1 "<round_nimm_predicate>" "v,<round_constraint2>") |
d6023b50 UB |
4408 | (parallel [(const_int 0)]))] |
4409 | UNSPEC_FIX_NOTRUNC))] | |
ef719a44 | 4410 | "TARGET_SSE2" |
06bc9e41 | 4411 | "%vcvtsd2si\t{<round_op2>%1, %0|%0, %q1<round_op2>}" |
d6023b50 UB |
4412 | [(set_attr "type" "sseicvt") |
4413 | (set_attr "athlon_decode" "double,vector") | |
6a08ffca | 4414 | (set_attr "bdver1_decode" "double,double") |
01284895 | 4415 | (set_attr "btver2_decode" "double,double") |
d6023b50 | 4416 | (set_attr "prefix_rep" "1") |
95879c72 | 4417 | (set_attr "prefix" "maybe_vex") |
d6023b50 | 4418 | (set_attr "mode" "SI")]) |
ef719a44 | 4419 | |
d6023b50 UB |
4420 | (define_insn "sse2_cvtsd2si_2" |
4421 | [(set (match_operand:SI 0 "register_operand" "=r,r") | |
3f97cb0b | 4422 | (unspec:SI [(match_operand:DF 1 "nonimmediate_operand" "v,m")] |
d6023b50 | 4423 | UNSPEC_FIX_NOTRUNC))] |
ef719a44 | 4424 | "TARGET_SSE2" |
eabb5f48 | 4425 | "%vcvtsd2si\t{%1, %0|%0, %q1}" |
d6023b50 UB |
4426 | [(set_attr "type" "sseicvt") |
4427 | (set_attr "athlon_decode" "double,vector") | |
4428 | (set_attr "amdfam10_decode" "double,double") | |
6a08ffca | 4429 | (set_attr "bdver1_decode" "double,double") |
d6023b50 | 4430 | (set_attr "prefix_rep" "1") |
95879c72 | 4431 | (set_attr "prefix" "maybe_vex") |
d6023b50 | 4432 | (set_attr "mode" "SI")]) |
ef719a44 | 4433 | |
06bc9e41 | 4434 | (define_insn "sse2_cvtsd2siq<round_name>" |
d6023b50 UB |
4435 | [(set (match_operand:DI 0 "register_operand" "=r,r") |
4436 | (unspec:DI | |
4437 | [(vec_select:DF | |
06bc9e41 | 4438 | (match_operand:V2DF 1 "<round_nimm_predicate>" "v,<round_constraint2>") |
d6023b50 UB |
4439 | (parallel [(const_int 0)]))] |
4440 | UNSPEC_FIX_NOTRUNC))] | |
4441 | "TARGET_SSE2 && TARGET_64BIT" | |
06bc9e41 | 4442 | "%vcvtsd2si{q}\t{<round_op2>%1, %0|%0, %q1<round_op2>}" |
d6023b50 UB |
4443 | [(set_attr "type" "sseicvt") |
4444 | (set_attr "athlon_decode" "double,vector") | |
6a08ffca | 4445 | (set_attr "bdver1_decode" "double,double") |
d6023b50 | 4446 | (set_attr "prefix_rep" "1") |
95879c72 | 4447 | (set_attr "prefix" "maybe_vex") |
d6023b50 | 4448 | (set_attr "mode" "DI")]) |
ef719a44 | 4449 | |
d6023b50 UB |
4450 | (define_insn "sse2_cvtsd2siq_2" |
4451 | [(set (match_operand:DI 0 "register_operand" "=r,r") | |
3f97cb0b | 4452 | (unspec:DI [(match_operand:DF 1 "nonimmediate_operand" "v,m")] |
d6023b50 UB |
4453 | UNSPEC_FIX_NOTRUNC))] |
4454 | "TARGET_SSE2 && TARGET_64BIT" | |
eabb5f48 | 4455 | "%vcvtsd2si{q}\t{%1, %0|%0, %q1}" |
d6023b50 UB |
4456 | [(set_attr "type" "sseicvt") |
4457 | (set_attr "athlon_decode" "double,vector") | |
4458 | (set_attr "amdfam10_decode" "double,double") | |
6a08ffca | 4459 | (set_attr "bdver1_decode" "double,double") |
d6023b50 | 4460 | (set_attr "prefix_rep" "1") |
95879c72 | 4461 | (set_attr "prefix" "maybe_vex") |
d6023b50 | 4462 | (set_attr "mode" "DI")]) |
ef719a44 | 4463 | |
8a6ef760 | 4464 | (define_insn "sse2_cvttsd2si<round_saeonly_name>" |
d6023b50 UB |
4465 | [(set (match_operand:SI 0 "register_operand" "=r,r") |
4466 | (fix:SI | |
4467 | (vec_select:DF | |
8a6ef760 | 4468 | (match_operand:V2DF 1 "<round_saeonly_nimm_predicate>" "v,<round_saeonly_constraint2>") |
d6023b50 | 4469 | (parallel [(const_int 0)]))))] |
ef719a44 | 4470 | "TARGET_SSE2" |
8a6ef760 | 4471 | "%vcvttsd2si\t{<round_saeonly_op2>%1, %0|%0, %q1<round_saeonly_op2>}" |
d6023b50 | 4472 | [(set_attr "type" "sseicvt") |
d6023b50 | 4473 | (set_attr "athlon_decode" "double,vector") |
6a08ffca | 4474 | (set_attr "amdfam10_decode" "double,double") |
a95d4000 | 4475 | (set_attr "bdver1_decode" "double,double") |
01284895 | 4476 | (set_attr "btver2_decode" "double,double") |
a95d4000 UB |
4477 | (set_attr "prefix_rep" "1") |
4478 | (set_attr "prefix" "maybe_vex") | |
4479 | (set_attr "mode" "SI")]) | |
d6023b50 | 4480 | |
8a6ef760 | 4481 | (define_insn "sse2_cvttsd2siq<round_saeonly_name>" |
d6023b50 UB |
4482 | [(set (match_operand:DI 0 "register_operand" "=r,r") |
4483 | (fix:DI | |
4484 | (vec_select:DF | |
8a6ef760 | 4485 | (match_operand:V2DF 1 "<round_saeonly_nimm_predicate>" "v,<round_saeonly_constraint2>") |
d6023b50 UB |
4486 | (parallel [(const_int 0)]))))] |
4487 | "TARGET_SSE2 && TARGET_64BIT" | |
8a6ef760 | 4488 | "%vcvttsd2si{q}\t{<round_saeonly_op2>%1, %0|%0, %q1<round_saeonly_op2>}" |
d6023b50 | 4489 | [(set_attr "type" "sseicvt") |
d6023b50 | 4490 | (set_attr "athlon_decode" "double,vector") |
6a08ffca | 4491 | (set_attr "amdfam10_decode" "double,double") |
a95d4000 UB |
4492 | (set_attr "bdver1_decode" "double,double") |
4493 | (set_attr "prefix_rep" "1") | |
4494 | (set_attr "prefix" "maybe_vex") | |
4495 | (set_attr "mode" "DI")]) | |
ef719a44 | 4496 | |
ec5e777c AI |
4497 | ;; For float<si2dfmode><mode>2 insn pattern |
4498 | (define_mode_attr si2dfmode | |
4499 | [(V8DF "V8SI") (V4DF "V4SI")]) | |
4500 | (define_mode_attr si2dfmodelower | |
4501 | [(V8DF "v8si") (V4DF "v4si")]) | |
4502 | ||
47490470 | 4503 | (define_insn "float<si2dfmodelower><mode>2<mask_name>" |
ec5e777c AI |
4504 | [(set (match_operand:VF2_512_256 0 "register_operand" "=v") |
4505 | (float:VF2_512_256 (match_operand:<si2dfmode> 1 "nonimmediate_operand" "vm")))] | |
47490470 AI |
4506 | "TARGET_AVX && <mask_mode512bit_condition>" |
4507 | "vcvtdq2pd\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}" | |
95879c72 | 4508 | [(set_attr "type" "ssecvt") |
ec5e777c AI |
4509 | (set_attr "prefix" "maybe_vex") |
4510 | (set_attr "mode" "<MODE>")]) | |
95879c72 | 4511 | |
39012b09 AI |
4512 | (define_insn "<floatsuffix>float<sseintvecmodelower><mode>2<mask_name><round_name>" |
4513 | [(set (match_operand:VF2_AVX512VL 0 "register_operand" "=v") | |
4514 | (any_float:VF2_AVX512VL | |
4515 | (match_operand:<sseintvecmode> 1 "nonimmediate_operand" "vm")))] | |
4516 | "TARGET_AVX512DQ" | |
4517 | "vcvt<floatsuffix>qq2pd\t{<round_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_mask_op2>}" | |
4518 | [(set_attr "type" "ssecvt") | |
4519 | (set_attr "prefix" "evex") | |
4520 | (set_attr "mode" "<MODE>")]) | |
4521 | ||
4522 | ;; For <floatsuffix>float<sselondveclower><mode> insn patterns | |
4523 | (define_mode_attr qq2pssuff | |
4524 | [(V8SF "") (V4SF "{y}")]) | |
4525 | ||
4526 | (define_mode_attr sselongvecmode | |
4527 | [(V8SF "V8DI") (V4SF "V4DI")]) | |
4528 | ||
4529 | (define_mode_attr sselongvecmodelower | |
4530 | [(V8SF "v8di") (V4SF "v4di")]) | |
4531 | ||
4532 | (define_mode_attr sseintvecmode3 | |
4533 | [(V8SF "XI") (V4SF "OI") | |
4534 | (V8DF "OI") (V4DF "TI")]) | |
4535 | ||
4536 | (define_insn "<floatsuffix>float<sselongvecmodelower><mode>2<mask_name><round_name>" | |
4537 | [(set (match_operand:VF1_128_256VL 0 "register_operand" "=v") | |
4538 | (any_float:VF1_128_256VL | |
4539 | (match_operand:<sselongvecmode> 1 "nonimmediate_operand" "<round_constraint>")))] | |
4540 | "TARGET_AVX512DQ && <round_modev8sf_condition>" | |
4541 | "vcvt<floatsuffix>qq2ps<qq2pssuff>\t{<round_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_mask_op2>}" | |
4542 | [(set_attr "type" "ssecvt") | |
4543 | (set_attr "prefix" "evex") | |
4544 | (set_attr "mode" "<MODE>")]) | |
4545 | ||
4546 | (define_insn "*<floatsuffix>floatv2div2sf2" | |
4547 | [(set (match_operand:V4SF 0 "register_operand" "=v") | |
4548 | (vec_concat:V4SF | |
4549 | (any_float:V2SF (match_operand:V2DI 1 "nonimmediate_operand" "vm")) | |
4550 | (const_vector:V2SF [(const_int 0) (const_int 0)])))] | |
4551 | "TARGET_AVX512DQ && TARGET_AVX512VL" | |
4552 | "vcvt<floatsuffix>qq2ps{x}\t{%1, %0|%0, %1}" | |
4553 | [(set_attr "type" "ssecvt") | |
4554 | (set_attr "prefix" "evex") | |
4555 | (set_attr "mode" "V4SF")]) | |
4556 | ||
4557 | (define_insn "<floatsuffix>floatv2div2sf2_mask" | |
4558 | [(set (match_operand:V4SF 0 "register_operand" "=v") | |
4559 | (vec_concat:V4SF | |
4560 | (vec_merge:V2SF | |
4561 | (any_float:V2SF (match_operand:V2DI 1 "nonimmediate_operand" "vm")) | |
4562 | (vec_select:V2SF | |
4563 | (match_operand:V4SF 2 "vector_move_operand" "0C") | |
4564 | (parallel [(const_int 0) (const_int 1)])) | |
4565 | (match_operand:QI 3 "register_operand" "Yk")) | |
4566 | (const_vector:V2SF [(const_int 0) (const_int 0)])))] | |
4567 | "TARGET_AVX512DQ && TARGET_AVX512VL" | |
4568 | "vcvt<floatsuffix>qq2ps{x}\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}" | |
4569 | [(set_attr "type" "ssecvt") | |
4570 | (set_attr "prefix" "evex") | |
4571 | (set_attr "mode" "V4SF")]) | |
4572 | ||
4573 | (define_insn "ufloat<si2dfmodelower><mode>2<mask_name>" | |
4574 | [(set (match_operand:VF2_512_256VL 0 "register_operand" "=v") | |
4575 | (unsigned_float:VF2_512_256VL | |
4576 | (match_operand:<si2dfmode> 1 "nonimmediate_operand" "vm")))] | |
4577 | "TARGET_AVX512F" | |
4578 | "vcvtudq2pd\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}" | |
4579 | [(set_attr "type" "ssecvt") | |
4580 | (set_attr "prefix" "evex") | |
4581 | (set_attr "mode" "<MODE>")]) | |
4582 | ||
4583 | (define_insn "ufloatv2siv2df2<mask_name>" | |
4584 | [(set (match_operand:V2DF 0 "register_operand" "=v") | |
4585 | (unsigned_float:V2DF | |
4586 | (vec_select:V2SI | |
4587 | (match_operand:V4SI 1 "nonimmediate_operand" "vm") | |
4588 | (parallel [(const_int 0) (const_int 1)]))))] | |
4589 | "TARGET_AVX512VL" | |
47490470 | 4590 | "vcvtudq2pd\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}" |
c003c6d6 AI |
4591 | [(set_attr "type" "ssecvt") |
4592 | (set_attr "prefix" "evex") | |
39012b09 | 4593 | (set_attr "mode" "V2DF")]) |
c003c6d6 AI |
4594 | |
4595 | (define_insn "avx512f_cvtdq2pd512_2" | |
4596 | [(set (match_operand:V8DF 0 "register_operand" "=v") | |
4597 | (float:V8DF | |
4598 | (vec_select:V8SI | |
4599 | (match_operand:V16SI 1 "nonimmediate_operand" "vm") | |
4600 | (parallel [(const_int 0) (const_int 1) | |
4601 | (const_int 2) (const_int 3) | |
4602 | (const_int 4) (const_int 5) | |
4603 | (const_int 6) (const_int 7)]))))] | |
42815c48 | 4604 | "TARGET_AVX512F" |
c003c6d6 AI |
4605 | "vcvtdq2pd\t{%t1, %0|%0, %t1}" |
4606 | [(set_attr "type" "ssecvt") | |
4607 | (set_attr "prefix" "evex") | |
4608 | (set_attr "mode" "V8DF")]) | |
4609 | ||
4fbe3b8a | 4610 | (define_insn "avx_cvtdq2pd256_2" |
42815c48 | 4611 | [(set (match_operand:V4DF 0 "register_operand" "=v") |
1e27129f L |
4612 | (float:V4DF |
4613 | (vec_select:V4SI | |
42815c48 | 4614 | (match_operand:V8SI 1 "nonimmediate_operand" "vm") |
a95d4000 UB |
4615 | (parallel [(const_int 0) (const_int 1) |
4616 | (const_int 2) (const_int 3)]))))] | |
1e27129f L |
4617 | "TARGET_AVX" |
4618 | "vcvtdq2pd\t{%x1, %0|%0, %x1}" | |
4619 | [(set_attr "type" "ssecvt") | |
42815c48 | 4620 | (set_attr "prefix" "maybe_evex") |
1e27129f L |
4621 | (set_attr "mode" "V4DF")]) |
4622 | ||
42815c48 AI |
4623 | (define_insn "sse2_cvtdq2pd<mask_name>" |
4624 | [(set (match_operand:V2DF 0 "register_operand" "=v") | |
d6023b50 UB |
4625 | (float:V2DF |
4626 | (vec_select:V2SI | |
42815c48 | 4627 | (match_operand:V4SI 1 "nonimmediate_operand" "vm") |
d6023b50 | 4628 | (parallel [(const_int 0) (const_int 1)]))))] |
42815c48 AI |
4629 | "TARGET_SSE2 && <mask_avx512vl_condition>" |
4630 | "%vcvtdq2pd\t{%1, %0<mask_operand2>|%0<mask_operand2>, %q1}" | |
d6023b50 | 4631 | [(set_attr "type" "ssecvt") |
95879c72 | 4632 | (set_attr "prefix" "maybe_vex") |
f220a4f4 | 4633 | (set_attr "ssememalign" "64") |
ef719a44 RH |
4634 | (set_attr "mode" "V2DF")]) |
4635 | ||
06bc9e41 | 4636 | (define_insn "<mask_codefor>avx512f_cvtpd2dq512<mask_name><round_name>" |
c003c6d6 | 4637 | [(set (match_operand:V8SI 0 "register_operand" "=v") |
47490470 | 4638 | (unspec:V8SI |
06bc9e41 | 4639 | [(match_operand:V8DF 1 "<round_nimm_predicate>" "<round_constraint>")] |
47490470 | 4640 | UNSPEC_FIX_NOTRUNC))] |
c003c6d6 | 4641 | "TARGET_AVX512F" |
06bc9e41 | 4642 | "vcvtpd2dq\t{<round_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_mask_op2>}" |
c003c6d6 AI |
4643 | [(set_attr "type" "ssecvt") |
4644 | (set_attr "prefix" "evex") | |
4645 | (set_attr "mode" "OI")]) | |
4646 | ||
42815c48 AI |
4647 | (define_insn "avx_cvtpd2dq256<mask_name>" |
4648 | [(set (match_operand:V4SI 0 "register_operand" "=v") | |
4649 | (unspec:V4SI [(match_operand:V4DF 1 "nonimmediate_operand" "vm")] | |
95879c72 | 4650 | UNSPEC_FIX_NOTRUNC))] |
42815c48 AI |
4651 | "TARGET_AVX && <mask_avx512vl_condition>" |
4652 | "vcvtpd2dq{y}\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}" | |
95879c72 | 4653 | [(set_attr "type" "ssecvt") |
42815c48 | 4654 | (set_attr "prefix" "<mask_prefix>") |
95879c72 L |
4655 | (set_attr "mode" "OI")]) |
4656 | ||
1ee48839 | 4657 | (define_expand "avx_cvtpd2dq256_2" |
82e86dc6 | 4658 | [(set (match_operand:V8SI 0 "register_operand") |
1ee48839 | 4659 | (vec_concat:V8SI |
82e86dc6 | 4660 | (unspec:V4SI [(match_operand:V4DF 1 "nonimmediate_operand")] |
1ee48839 JJ |
4661 | UNSPEC_FIX_NOTRUNC) |
4662 | (match_dup 2)))] | |
4663 | "TARGET_AVX" | |
4664 | "operands[2] = CONST0_RTX (V4SImode);") | |
4665 | ||
4666 | (define_insn "*avx_cvtpd2dq256_2" | |
4667 | [(set (match_operand:V8SI 0 "register_operand" "=x") | |
4668 | (vec_concat:V8SI | |
4669 | (unspec:V4SI [(match_operand:V4DF 1 "nonimmediate_operand" "xm")] | |
4670 | UNSPEC_FIX_NOTRUNC) | |
82e86dc6 | 4671 | (match_operand:V4SI 2 "const0_operand")))] |
1ee48839 JJ |
4672 | "TARGET_AVX" |
4673 | "vcvtpd2dq{y}\t{%1, %x0|%x0, %1}" | |
4674 | [(set_attr "type" "ssecvt") | |
4675 | (set_attr "prefix" "vex") | |
01284895 | 4676 | (set_attr "btver2_decode" "vector") |
1ee48839 JJ |
4677 | (set_attr "mode" "OI")]) |
4678 | ||
42815c48 AI |
4679 | (define_insn "sse2_cvtpd2dq<mask_name>" |
4680 | [(set (match_operand:V4SI 0 "register_operand" "=v") | |
d6023b50 | 4681 | (vec_concat:V4SI |
42815c48 | 4682 | (unspec:V2SI [(match_operand:V2DF 1 "nonimmediate_operand" "vm")] |
d6023b50 | 4683 | UNSPEC_FIX_NOTRUNC) |
42815c48 AI |
4684 | (const_vector:V2SI [(const_int 0) (const_int 0)])))] |
4685 | "TARGET_SSE2 && <mask_avx512vl_condition>" | |
a95d4000 UB |
4686 | { |
4687 | if (TARGET_AVX) | |
42815c48 | 4688 | return "vcvtpd2dq{x}\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"; |
a95d4000 UB |
4689 | else |
4690 | return "cvtpd2dq\t{%1, %0|%0, %1}"; | |
4691 | } | |
d6023b50 UB |
4692 | [(set_attr "type" "ssecvt") |
4693 | (set_attr "prefix_rep" "1") | |
725fd454 | 4694 | (set_attr "prefix_data16" "0") |
95879c72 | 4695 | (set_attr "prefix" "maybe_vex") |
d6023b50 | 4696 | (set_attr "mode" "TI") |
6a08ffca | 4697 | (set_attr "amdfam10_decode" "double") |
0b7e851b | 4698 | (set_attr "athlon_decode" "vector") |
6a08ffca | 4699 | (set_attr "bdver1_decode" "double")]) |
ef719a44 | 4700 | |
42815c48 AI |
4701 | ;; For ufix_notrunc* insn patterns |
4702 | (define_mode_attr pd2udqsuff | |
4703 | [(V8DF "") (V4DF "{y}")]) | |
4704 | ||
4705 | (define_insn "ufix_notrunc<mode><si2dfmodelower>2<mask_name><round_name>" | |
4706 | [(set (match_operand:<si2dfmode> 0 "register_operand" "=v") | |
4707 | (unspec:<si2dfmode> | |
4708 | [(match_operand:VF2_512_256VL 1 "nonimmediate_operand" "<round_constraint>")] | |
c003c6d6 AI |
4709 | UNSPEC_UNSIGNED_FIX_NOTRUNC))] |
4710 | "TARGET_AVX512F" | |
42815c48 | 4711 | "vcvtpd2udq<pd2udqsuff>\t{<round_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_mask_op2>}" |
c003c6d6 AI |
4712 | [(set_attr "type" "ssecvt") |
4713 | (set_attr "prefix" "evex") | |
42815c48 AI |
4714 | (set_attr "mode" "<sseinsnmode>")]) |
4715 | ||
4716 | (define_insn "ufix_notruncv2dfv2si2<mask_name>" | |
4717 | [(set (match_operand:V4SI 0 "register_operand" "=v") | |
4718 | (vec_concat:V4SI | |
4719 | (unspec:V2SI | |
4720 | [(match_operand:V2DF 1 "nonimmediate_operand" "vm")] | |
4721 | UNSPEC_UNSIGNED_FIX_NOTRUNC) | |
4722 | (const_vector:V2SI [(const_int 0) (const_int 0)])))] | |
4723 | "TARGET_AVX512VL" | |
4724 | "vcvtpd2udq{x}\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}" | |
4725 | [(set_attr "type" "ssecvt") | |
4726 | (set_attr "prefix" "evex") | |
4727 | (set_attr "mode" "TI")]) | |
c003c6d6 | 4728 | |
8a6ef760 | 4729 | (define_insn "<fixsuffix>fix_truncv8dfv8si2<mask_name><round_saeonly_name>" |
ec5e777c | 4730 | [(set (match_operand:V8SI 0 "register_operand" "=v") |
47490470 | 4731 | (any_fix:V8SI |
8a6ef760 | 4732 | (match_operand:V8DF 1 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")))] |
ec5e777c | 4733 | "TARGET_AVX512F" |
8a6ef760 | 4734 | "vcvttpd2<fixsuffix>dq\t{<round_saeonly_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_saeonly_mask_op2>}" |
ec5e777c AI |
4735 | [(set_attr "type" "ssecvt") |
4736 | (set_attr "prefix" "evex") | |
4737 | (set_attr "mode" "OI")]) | |
4738 | ||
3bcf35e7 AI |
4739 | (define_insn "ufix_truncv2dfv2si2<mask_name>" |
4740 | [(set (match_operand:V4SI 0 "register_operand" "=v") | |
4741 | (vec_concat:V4SI | |
4742 | (unsigned_fix:V2SI (match_operand:V2DF 1 "nonimmediate_operand" "vm")) | |
4743 | (const_vector:V2SI [(const_int 0) (const_int 0)])))] | |
4744 | "TARGET_AVX512VL" | |
4745 | "vcvttpd2udq{x}\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}" | |
95879c72 | 4746 | [(set_attr "type" "ssecvt") |
3bcf35e7 AI |
4747 | (set_attr "prefix" "evex") |
4748 | (set_attr "mode" "TI")]) | |
4749 | ||
4750 | (define_insn "fix_truncv4dfv4si2<mask_name>" | |
4751 | [(set (match_operand:V4SI 0 "register_operand" "=v") | |
4752 | (fix:V4SI (match_operand:V4DF 1 "nonimmediate_operand" "vm")))] | |
4753 | "TARGET_AVX || (TARGET_AVX512VL && TARGET_AVX512F)" | |
4754 | "vcvttpd2dq{y}\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}" | |
4755 | [(set_attr "type" "ssecvt") | |
4756 | (set_attr "prefix" "maybe_evex") | |
4757 | (set_attr "mode" "OI")]) | |
4758 | ||
4759 | (define_insn "ufix_truncv4dfv4si2<mask_name>" | |
4760 | [(set (match_operand:V4SI 0 "register_operand" "=v") | |
4761 | (unsigned_fix:V4SI (match_operand:V4DF 1 "nonimmediate_operand" "vm")))] | |
4762 | "TARGET_AVX512VL && TARGET_AVX512F" | |
4763 | "vcvttpd2udq{y}\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}" | |
4764 | [(set_attr "type" "ssecvt") | |
4765 | (set_attr "prefix" "maybe_evex") | |
95879c72 L |
4766 | (set_attr "mode" "OI")]) |
4767 | ||
3bcf35e7 AI |
4768 | (define_insn "<fixsuffix>fix_trunc<mode><sseintvecmodelower>2<mask_name><round_saeonly_name>" |
4769 | [(set (match_operand:<sseintvecmode> 0 "register_operand" "=v") | |
4770 | (any_fix:<sseintvecmode> | |
4771 | (match_operand:VF2_AVX512VL 1 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")))] | |
4772 | "TARGET_AVX512DQ && <round_saeonly_mode512bit_condition>" | |
4773 | "vcvttpd2<fixsuffix>qq\t{<round_saeonly_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_saeonly_mask_op2>}" | |
4774 | [(set_attr "type" "ssecvt") | |
4775 | (set_attr "prefix" "evex") | |
4776 | (set_attr "mode" "<sseintvecmode2>")]) | |
4777 | ||
4778 | (define_insn "fix_notrunc<mode><sseintvecmodelower>2<mask_name><round_name>" | |
4779 | [(set (match_operand:<sseintvecmode> 0 "register_operand" "=v") | |
4780 | (unspec:<sseintvecmode> | |
4781 | [(match_operand:VF2_AVX512VL 1 "<round_nimm_predicate>" "<round_constraint>")] | |
4782 | UNSPEC_FIX_NOTRUNC))] | |
4783 | "TARGET_AVX512DQ && <round_mode512bit_condition>" | |
4784 | "vcvtpd2qq\t{<round_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_mask_op2>}" | |
4785 | [(set_attr "type" "ssecvt") | |
4786 | (set_attr "prefix" "evex") | |
4787 | (set_attr "mode" "<sseintvecmode2>")]) | |
4788 | ||
4789 | (define_insn "ufix_notrunc<mode><sseintvecmodelower>2<mask_name><round_name>" | |
4790 | [(set (match_operand:<sseintvecmode> 0 "register_operand" "=v") | |
4791 | (unspec:<sseintvecmode> | |
4792 | [(match_operand:VF2_AVX512VL 1 "nonimmediate_operand" "<round_constraint>")] | |
4793 | UNSPEC_UNSIGNED_FIX_NOTRUNC))] | |
4794 | "TARGET_AVX512DQ && <round_mode512bit_condition>" | |
4795 | "vcvtpd2uqq\t{<round_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_mask_op2>}" | |
4796 | [(set_attr "type" "ssecvt") | |
4797 | (set_attr "prefix" "evex") | |
4798 | (set_attr "mode" "<sseintvecmode2>")]) | |
4799 | ||
1bdf255a AI |
4800 | (define_insn "<fixsuffix>fix_trunc<mode><sselongvecmodelower>2<mask_name><round_saeonly_name>" |
4801 | [(set (match_operand:<sselongvecmode> 0 "register_operand" "=v") | |
4802 | (any_fix:<sselongvecmode> | |
4803 | (match_operand:VF1_128_256VL 1 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")))] | |
4804 | "TARGET_AVX512DQ && <round_saeonly_modev8sf_condition>" | |
4805 | "vcvttps2<fixsuffix>qq\t{<round_saeonly_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_saeonly_mask_op2>}" | |
4806 | [(set_attr "type" "ssecvt") | |
4807 | (set_attr "prefix" "evex") | |
4808 | (set_attr "mode" "<sseintvecmode3>")]) | |
4809 | ||
4810 | (define_insn "<fixsuffix>fix_truncv2sfv2di2<mask_name>" | |
4811 | [(set (match_operand:V2DI 0 "register_operand" "=v") | |
4812 | (any_fix:V2DI | |
4813 | (vec_select:V2SF | |
4814 | (match_operand:V4SF 1 "nonimmediate_operand" "vm") | |
4815 | (parallel [(const_int 0) (const_int 1)]))))] | |
4816 | "TARGET_AVX512DQ && TARGET_AVX512VL" | |
4817 | "vcvttps2<fixsuffix>qq\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}" | |
4818 | [(set_attr "type" "ssecvt") | |
4819 | (set_attr "prefix" "evex") | |
4820 | (set_attr "mode" "TI")]) | |
4821 | ||
4822 | (define_insn "ufix_trunc<mode><sseintvecmodelower>2<mask_name>" | |
4823 | [(set (match_operand:<sseintvecmode> 0 "register_operand" "=v") | |
4824 | (unsigned_fix:<sseintvecmode> | |
4825 | (match_operand:VF1_128_256VL 1 "nonimmediate_operand" "vm")))] | |
4826 | "TARGET_AVX512VL" | |
4827 | "vcvttps2udq\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}" | |
4828 | [(set_attr "type" "ssecvt") | |
4829 | (set_attr "prefix" "evex") | |
4830 | (set_attr "mode" "<sseintvecmode2>")]) | |
4831 | ||
1ee48839 | 4832 | (define_expand "avx_cvttpd2dq256_2" |
82e86dc6 | 4833 | [(set (match_operand:V8SI 0 "register_operand") |
1ee48839 | 4834 | (vec_concat:V8SI |
82e86dc6 | 4835 | (fix:V4SI (match_operand:V4DF 1 "nonimmediate_operand")) |
1ee48839 JJ |
4836 | (match_dup 2)))] |
4837 | "TARGET_AVX" | |
4838 | "operands[2] = CONST0_RTX (V4SImode);") | |
4839 | ||
42815c48 AI |
4840 | (define_insn "sse2_cvttpd2dq<mask_name>" |
4841 | [(set (match_operand:V4SI 0 "register_operand" "=v") | |
d6023b50 | 4842 | (vec_concat:V4SI |
42815c48 AI |
4843 | (fix:V2SI (match_operand:V2DF 1 "nonimmediate_operand" "vm")) |
4844 | (const_vector:V2SI [(const_int 0) (const_int 0)])))] | |
4845 | "TARGET_SSE2 && <mask_avx512vl_condition>" | |
a95d4000 UB |
4846 | { |
4847 | if (TARGET_AVX) | |
42815c48 | 4848 | return "vcvttpd2dq{x}\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"; |
a95d4000 UB |
4849 | else |
4850 | return "cvttpd2dq\t{%1, %0|%0, %1}"; | |
4851 | } | |
d6023b50 | 4852 | [(set_attr "type" "ssecvt") |
6a08ffca | 4853 | (set_attr "amdfam10_decode" "double") |
0b7e851b | 4854 | (set_attr "athlon_decode" "vector") |
a95d4000 UB |
4855 | (set_attr "bdver1_decode" "double") |
4856 | (set_attr "prefix" "maybe_vex") | |
4857 | (set_attr "mode" "TI")]) | |
95879c72 | 4858 | |
075691af | 4859 | (define_insn "sse2_cvtsd2ss<round_name>" |
3f97cb0b | 4860 | [(set (match_operand:V4SF 0 "register_operand" "=x,x,v") |
d6023b50 UB |
4861 | (vec_merge:V4SF |
4862 | (vec_duplicate:V4SF | |
4863 | (float_truncate:V2SF | |
075691af | 4864 | (match_operand:V2DF 2 "nonimmediate_operand" "x,m,<round_constraint>"))) |
3f97cb0b | 4865 | (match_operand:V4SF 1 "register_operand" "0,0,v") |
d6023b50 UB |
4866 | (const_int 1)))] |
4867 | "TARGET_SSE2" | |
a95d4000 UB |
4868 | "@ |
4869 | cvtsd2ss\t{%2, %0|%0, %2} | |
eabb5f48 | 4870 | cvtsd2ss\t{%2, %0|%0, %q2} |
075691af | 4871 | vcvtsd2ss\t{<round_op3>%2, %1, %0|%0, %1, %q2<round_op3>}" |
a95d4000 UB |
4872 | [(set_attr "isa" "noavx,noavx,avx") |
4873 | (set_attr "type" "ssecvt") | |
4874 | (set_attr "athlon_decode" "vector,double,*") | |
4875 | (set_attr "amdfam10_decode" "vector,double,*") | |
4876 | (set_attr "bdver1_decode" "direct,direct,*") | |
01284895 | 4877 | (set_attr "btver2_decode" "double,double,double") |
075691af | 4878 | (set_attr "prefix" "orig,orig,<round_prefix>") |
d6023b50 | 4879 | (set_attr "mode" "SF")]) |
115a33c2 | 4880 | |
075691af | 4881 | (define_insn "sse2_cvtss2sd<round_saeonly_name>" |
3f97cb0b | 4882 | [(set (match_operand:V2DF 0 "register_operand" "=x,x,v") |
115a33c2 | 4883 | (vec_merge:V2DF |
d6023b50 UB |
4884 | (float_extend:V2DF |
4885 | (vec_select:V2SF | |
1bdf255a | 4886 | (match_operand:V4SF 2 "<round_saeonly_nimm_predicate>" "x,m,<round_saeonly_constraint>") |
d6023b50 | 4887 | (parallel [(const_int 0) (const_int 1)]))) |
3f97cb0b | 4888 | (match_operand:V2DF 1 "register_operand" "0,0,v") |
115a33c2 RH |
4889 | (const_int 1)))] |
4890 | "TARGET_SSE2" | |
a95d4000 UB |
4891 | "@ |
4892 | cvtss2sd\t{%2, %0|%0, %2} | |
eabb5f48 | 4893 | cvtss2sd\t{%2, %0|%0, %k2} |
075691af | 4894 | vcvtss2sd\t{<round_saeonly_op3>%2, %1, %0|%0, %1, %k2<round_saeonly_op3>}" |
a95d4000 UB |
4895 | [(set_attr "isa" "noavx,noavx,avx") |
4896 | (set_attr "type" "ssecvt") | |
4897 | (set_attr "amdfam10_decode" "vector,double,*") | |
4898 | (set_attr "athlon_decode" "direct,direct,*") | |
4899 | (set_attr "bdver1_decode" "direct,direct,*") | |
01284895 | 4900 | (set_attr "btver2_decode" "double,double,double") |
075691af | 4901 | (set_attr "prefix" "orig,orig,<round_saeonly_prefix>") |
ef719a44 RH |
4902 | (set_attr "mode" "DF")]) |
4903 | ||
06bc9e41 | 4904 | (define_insn "<mask_codefor>avx512f_cvtpd2ps512<mask_name><round_name>" |
c003c6d6 AI |
4905 | [(set (match_operand:V8SF 0 "register_operand" "=v") |
4906 | (float_truncate:V8SF | |
06bc9e41 | 4907 | (match_operand:V8DF 1 "<round_nimm_predicate>" "<round_constraint>")))] |
c003c6d6 | 4908 | "TARGET_AVX512F" |
06bc9e41 | 4909 | "vcvtpd2ps\t{<round_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_mask_op2>}" |
c003c6d6 AI |
4910 | [(set_attr "type" "ssecvt") |
4911 | (set_attr "prefix" "evex") | |
4912 | (set_attr "mode" "V8SF")]) | |
4913 | ||
1bdf255a AI |
4914 | (define_insn "avx_cvtpd2ps256<mask_name>" |
4915 | [(set (match_operand:V4SF 0 "register_operand" "=v") | |
95879c72 | 4916 | (float_truncate:V4SF |
1bdf255a AI |
4917 | (match_operand:V4DF 1 "nonimmediate_operand" "vm")))] |
4918 | "TARGET_AVX && <mask_avx512vl_condition>" | |
4919 | "vcvtpd2ps{y}\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}" | |
95879c72 | 4920 | [(set_attr "type" "ssecvt") |
1bdf255a | 4921 | (set_attr "prefix" "maybe_evex") |
01284895 | 4922 | (set_attr "btver2_decode" "vector") |
95879c72 L |
4923 | (set_attr "mode" "V4SF")]) |
4924 | ||
d6023b50 | 4925 | (define_expand "sse2_cvtpd2ps" |
82e86dc6 | 4926 | [(set (match_operand:V4SF 0 "register_operand") |
d6023b50 UB |
4927 | (vec_concat:V4SF |
4928 | (float_truncate:V2SF | |
82e86dc6 | 4929 | (match_operand:V2DF 1 "nonimmediate_operand")) |
d6023b50 | 4930 | (match_dup 2)))] |
ef719a44 | 4931 | "TARGET_SSE2" |
d6023b50 | 4932 | "operands[2] = CONST0_RTX (V2SFmode);") |
ef719a44 | 4933 | |
1bdf255a AI |
4934 | (define_expand "sse2_cvtpd2ps_mask" |
4935 | [(set (match_operand:V4SF 0 "register_operand") | |
4936 | (vec_merge:V4SF | |
4937 | (vec_concat:V4SF | |
4938 | (float_truncate:V2SF | |
4939 | (match_operand:V2DF 1 "nonimmediate_operand")) | |
4940 | (match_dup 4)) | |
4941 | (match_operand:V4SF 2 "register_operand") | |
4942 | (match_operand:QI 3 "register_operand")))] | |
4943 | "TARGET_SSE2" | |
4944 | "operands[4] = CONST0_RTX (V2SFmode);") | |
4945 | ||
4946 | (define_insn "*sse2_cvtpd2ps<mask_name>" | |
4947 | [(set (match_operand:V4SF 0 "register_operand" "=v") | |
d6023b50 UB |
4948 | (vec_concat:V4SF |
4949 | (float_truncate:V2SF | |
1bdf255a | 4950 | (match_operand:V2DF 1 "nonimmediate_operand" "vm")) |
82e86dc6 | 4951 | (match_operand:V2SF 2 "const0_operand")))] |
1bdf255a | 4952 | "TARGET_SSE2 && <mask_avx512vl_condition>" |
a95d4000 UB |
4953 | { |
4954 | if (TARGET_AVX) | |
1bdf255a | 4955 | return "vcvtpd2ps{x}\t{%1, %0<mask_operand3>|%0<mask_operand3>, %1}"; |
a95d4000 UB |
4956 | else |
4957 | return "cvtpd2ps\t{%1, %0|%0, %1}"; | |
4958 | } | |
d6023b50 | 4959 | [(set_attr "type" "ssecvt") |
6a08ffca | 4960 | (set_attr "amdfam10_decode" "double") |
0b7e851b | 4961 | (set_attr "athlon_decode" "vector") |
a95d4000 UB |
4962 | (set_attr "bdver1_decode" "double") |
4963 | (set_attr "prefix_data16" "1") | |
4964 | (set_attr "prefix" "maybe_vex") | |
4965 | (set_attr "mode" "V4SF")]) | |
115a33c2 | 4966 | |
ec5e777c AI |
4967 | ;; For <sse2_avx_avx512f>_cvtps2pd<avxsizesuffix> insn pattern |
4968 | (define_mode_attr sf2dfmode | |
4969 | [(V8DF "V8SF") (V4DF "V4SF")]) | |
4970 | ||
8a6ef760 | 4971 | (define_insn "<sse2_avx_avx512f>_cvtps2pd<avxsizesuffix><mask_name><round_saeonly_name>" |
ec5e777c AI |
4972 | [(set (match_operand:VF2_512_256 0 "register_operand" "=v") |
4973 | (float_extend:VF2_512_256 | |
8a6ef760 AI |
4974 | (match_operand:<sf2dfmode> 1 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")))] |
4975 | "TARGET_AVX && <mask_mode512bit_condition> && <round_saeonly_mode512bit_condition>" | |
4976 | "vcvtps2pd\t{<round_saeonly_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_saeonly_mask_op2>}" | |
95879c72 | 4977 | [(set_attr "type" "ssecvt") |
ec5e777c AI |
4978 | (set_attr "prefix" "maybe_vex") |
4979 | (set_attr "mode" "<MODE>")]) | |
95879c72 | 4980 | |
1e27129f L |
4981 | (define_insn "*avx_cvtps2pd256_2" |
4982 | [(set (match_operand:V4DF 0 "register_operand" "=x") | |
4983 | (float_extend:V4DF | |
4984 | (vec_select:V4SF | |
4985 | (match_operand:V8SF 1 "nonimmediate_operand" "xm") | |
a95d4000 UB |
4986 | (parallel [(const_int 0) (const_int 1) |
4987 | (const_int 2) (const_int 3)]))))] | |
1e27129f L |
4988 | "TARGET_AVX" |
4989 | "vcvtps2pd\t{%x1, %0|%0, %x1}" | |
4990 | [(set_attr "type" "ssecvt") | |
4991 | (set_attr "prefix" "vex") | |
4992 | (set_attr "mode" "V4DF")]) | |
4993 | ||
c003c6d6 AI |
4994 | (define_insn "vec_unpacks_lo_v16sf" |
4995 | [(set (match_operand:V8DF 0 "register_operand" "=v") | |
4996 | (float_extend:V8DF | |
4997 | (vec_select:V8SF | |
4998 | (match_operand:V16SF 1 "nonimmediate_operand" "vm") | |
4999 | (parallel [(const_int 0) (const_int 1) | |
5000 | (const_int 2) (const_int 3) | |
5001 | (const_int 4) (const_int 5) | |
5002 | (const_int 6) (const_int 7)]))))] | |
5003 | "TARGET_AVX512F" | |
5004 | "vcvtps2pd\t{%t1, %0|%0, %t1}" | |
5005 | [(set_attr "type" "ssecvt") | |
5006 | (set_attr "prefix" "evex") | |
5007 | (set_attr "mode" "V8DF")]) | |
5008 | ||
2be4091a AI |
5009 | (define_insn "<avx512>_cvt<ssemodesuffix>2mask<mode>" |
5010 | [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=Yk") | |
5011 | (unspec:<avx512fmaskmode> | |
5012 | [(match_operand:VI12_AVX512VL 1 "register_operand" "v")] | |
5013 | UNSPEC_CVTINT2MASK))] | |
5014 | "TARGET_AVX512BW" | |
5015 | "vpmov<ssemodesuffix>2m\t{%1, %0|%0, %1}" | |
5016 | [(set_attr "prefix" "evex") | |
5017 | (set_attr "mode" "<sseinsnmode>")]) | |
5018 | ||
5019 | (define_insn "<avx512>_cvt<ssemodesuffix>2mask<mode>" | |
5020 | [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=Yk") | |
5021 | (unspec:<avx512fmaskmode> | |
5022 | [(match_operand:VI48_AVX512VL 1 "register_operand" "v")] | |
5023 | UNSPEC_CVTINT2MASK))] | |
5024 | "TARGET_AVX512DQ" | |
5025 | "vpmov<ssemodesuffix>2m\t{%1, %0|%0, %1}" | |
5026 | [(set_attr "prefix" "evex") | |
5027 | (set_attr "mode" "<sseinsnmode>")]) | |
5028 | ||
5029 | (define_expand "<avx512>_cvtmask2<ssemodesuffix><mode>" | |
5030 | [(set (match_operand:VI12_AVX512VL 0 "register_operand") | |
5031 | (vec_merge:VI12_AVX512VL | |
5032 | (match_dup 2) | |
5033 | (match_dup 3) | |
5034 | (match_operand:<avx512fmaskmode> 1 "register_operand")))] | |
5035 | "TARGET_AVX512BW" | |
5036 | { | |
5037 | operands[2] = CONSTM1_RTX (<MODE>mode); | |
5038 | operands[3] = CONST0_RTX (<MODE>mode); | |
5039 | }) | |
5040 | ||
5041 | (define_insn "*<avx512>_cvtmask2<ssemodesuffix><mode>" | |
5042 | [(set (match_operand:VI12_AVX512VL 0 "register_operand" "=v") | |
5043 | (vec_merge:VI12_AVX512VL | |
5044 | (match_operand:VI12_AVX512VL 2 "constm1_operand") | |
5045 | (match_operand:VI12_AVX512VL 3 "const0_operand") | |
5046 | (match_operand:<avx512fmaskmode> 1 "register_operand" "Yk")))] | |
5047 | "TARGET_AVX512BW" | |
5048 | "vpmovm2<ssemodesuffix>\t{%1, %0|%0, %1}" | |
5049 | [(set_attr "prefix" "evex") | |
5050 | (set_attr "mode" "<sseinsnmode>")]) | |
5051 | ||
5052 | (define_expand "<avx512>_cvtmask2<ssemodesuffix><mode>" | |
5053 | [(set (match_operand:VI48_AVX512VL 0 "register_operand") | |
5054 | (vec_merge:VI48_AVX512VL | |
5055 | (match_dup 2) | |
5056 | (match_dup 3) | |
5057 | (match_operand:<avx512fmaskmode> 1 "register_operand")))] | |
5058 | "TARGET_AVX512DQ" | |
5059 | "{ | |
5060 | operands[2] = CONSTM1_RTX (<MODE>mode); | |
5061 | operands[3] = CONST0_RTX (<MODE>mode); | |
5062 | }") | |
5063 | ||
5064 | (define_insn "*<avx512>_cvtmask2<ssemodesuffix><mode>" | |
5065 | [(set (match_operand:VI48_AVX512VL 0 "register_operand" "=v") | |
5066 | (vec_merge:VI48_AVX512VL | |
5067 | (match_operand:VI48_AVX512VL 2 "constm1_operand") | |
5068 | (match_operand:VI48_AVX512VL 3 "const0_operand") | |
5069 | (match_operand:<avx512fmaskmode> 1 "register_operand" "Yk")))] | |
5070 | "TARGET_AVX512DQ" | |
5071 | "vpmovm2<ssemodesuffix>\t{%1, %0|%0, %1}" | |
5072 | [(set_attr "prefix" "evex") | |
5073 | (set_attr "mode" "<sseinsnmode>")]) | |
5074 | ||
1bdf255a AI |
5075 | (define_insn "sse2_cvtps2pd<mask_name>" |
5076 | [(set (match_operand:V2DF 0 "register_operand" "=v") | |
d6023b50 UB |
5077 | (float_extend:V2DF |
5078 | (vec_select:V2SF | |
1bdf255a | 5079 | (match_operand:V4SF 1 "nonimmediate_operand" "vm") |
d6023b50 | 5080 | (parallel [(const_int 0) (const_int 1)]))))] |
1bdf255a AI |
5081 | "TARGET_SSE2 && <mask_avx512vl_condition>" |
5082 | "%vcvtps2pd\t{%1, %0<mask_operand2>|%0<mask_operand2>, %q1}" | |
d6023b50 | 5083 | [(set_attr "type" "ssecvt") |
6a08ffca | 5084 | (set_attr "amdfam10_decode" "direct") |
0b7e851b | 5085 | (set_attr "athlon_decode" "double") |
a95d4000 UB |
5086 | (set_attr "bdver1_decode" "double") |
5087 | (set_attr "prefix_data16" "0") | |
5088 | (set_attr "prefix" "maybe_vex") | |
5089 | (set_attr "mode" "V2DF")]) | |
ef719a44 | 5090 | |
d6023b50 UB |
5091 | (define_expand "vec_unpacks_hi_v4sf" |
5092 | [(set (match_dup 2) | |
5093 | (vec_select:V4SF | |
5094 | (vec_concat:V8SF | |
5095 | (match_dup 2) | |
82e86dc6 | 5096 | (match_operand:V4SF 1 "nonimmediate_operand")) |
a95d4000 UB |
5097 | (parallel [(const_int 6) (const_int 7) |
5098 | (const_int 2) (const_int 3)]))) | |
82e86dc6 | 5099 | (set (match_operand:V2DF 0 "register_operand") |
d6023b50 UB |
5100 | (float_extend:V2DF |
5101 | (vec_select:V2SF | |
5102 | (match_dup 2) | |
5103 | (parallel [(const_int 0) (const_int 1)]))))] | |
a427621f UB |
5104 | "TARGET_SSE2" |
5105 | "operands[2] = gen_reg_rtx (V4SFmode);") | |
e20524fa | 5106 | |
1e27129f L |
5107 | (define_expand "vec_unpacks_hi_v8sf" |
5108 | [(set (match_dup 2) | |
5109 | (vec_select:V4SF | |
110b7886 | 5110 | (match_operand:V8SF 1 "register_operand") |
a95d4000 UB |
5111 | (parallel [(const_int 4) (const_int 5) |
5112 | (const_int 6) (const_int 7)]))) | |
82e86dc6 | 5113 | (set (match_operand:V4DF 0 "register_operand") |
1e27129f L |
5114 | (float_extend:V4DF |
5115 | (match_dup 2)))] | |
5116 | "TARGET_AVX" | |
a95d4000 | 5117 | "operands[2] = gen_reg_rtx (V4SFmode);") |
1e27129f | 5118 | |
c003c6d6 AI |
5119 | (define_expand "vec_unpacks_hi_v16sf" |
5120 | [(set (match_dup 2) | |
5121 | (vec_select:V8SF | |
110b7886 | 5122 | (match_operand:V16SF 1 "register_operand") |
c003c6d6 AI |
5123 | (parallel [(const_int 8) (const_int 9) |
5124 | (const_int 10) (const_int 11) | |
5125 | (const_int 12) (const_int 13) | |
5126 | (const_int 14) (const_int 15)]))) | |
5127 | (set (match_operand:V8DF 0 "register_operand") | |
5128 | (float_extend:V8DF | |
5129 | (match_dup 2)))] | |
5130 | "TARGET_AVX512F" | |
5131 | "operands[2] = gen_reg_rtx (V8SFmode);") | |
5132 | ||
d6023b50 | 5133 | (define_expand "vec_unpacks_lo_v4sf" |
82e86dc6 | 5134 | [(set (match_operand:V2DF 0 "register_operand") |
d6023b50 UB |
5135 | (float_extend:V2DF |
5136 | (vec_select:V2SF | |
82e86dc6 | 5137 | (match_operand:V4SF 1 "nonimmediate_operand") |
d6023b50 UB |
5138 | (parallel [(const_int 0) (const_int 1)]))))] |
5139 | "TARGET_SSE2") | |
ef719a44 | 5140 | |
1e27129f | 5141 | (define_expand "vec_unpacks_lo_v8sf" |
82e86dc6 | 5142 | [(set (match_operand:V4DF 0 "register_operand") |
1e27129f L |
5143 | (float_extend:V4DF |
5144 | (vec_select:V4SF | |
82e86dc6 | 5145 | (match_operand:V8SF 1 "nonimmediate_operand") |
a95d4000 UB |
5146 | (parallel [(const_int 0) (const_int 1) |
5147 | (const_int 2) (const_int 3)]))))] | |
1e27129f L |
5148 | "TARGET_AVX") |
5149 | ||
4fbe3b8a | 5150 | (define_mode_attr sseunpackfltmode |
3bdf6340 AI |
5151 | [(V8HI "V4SF") (V4SI "V2DF") (V16HI "V8SF") |
5152 | (V8SI "V4DF") (V32HI "V16SF") (V16SI "V8DF")]) | |
4fbe3b8a JJ |
5153 | |
5154 | (define_expand "vec_unpacks_float_hi_<mode>" | |
82e86dc6 | 5155 | [(match_operand:<sseunpackfltmode> 0 "register_operand") |
3bdf6340 | 5156 | (match_operand:VI2_AVX512F 1 "register_operand")] |
ef719a44 | 5157 | "TARGET_SSE2" |
d6023b50 | 5158 | { |
4fbe3b8a | 5159 | rtx tmp = gen_reg_rtx (<sseunpackmode>mode); |
ef719a44 | 5160 | |
4fbe3b8a | 5161 | emit_insn (gen_vec_unpacks_hi_<mode> (tmp, operands[1])); |
f7df4a84 | 5162 | emit_insn (gen_rtx_SET (operands[0], |
4fbe3b8a | 5163 | gen_rtx_FLOAT (<sseunpackfltmode>mode, tmp))); |
d6023b50 UB |
5164 | DONE; |
5165 | }) | |
ef719a44 | 5166 | |
4fbe3b8a | 5167 | (define_expand "vec_unpacks_float_lo_<mode>" |
82e86dc6 | 5168 | [(match_operand:<sseunpackfltmode> 0 "register_operand") |
3bdf6340 | 5169 | (match_operand:VI2_AVX512F 1 "register_operand")] |
ae46a07a RH |
5170 | "TARGET_SSE2" |
5171 | { | |
4fbe3b8a | 5172 | rtx tmp = gen_reg_rtx (<sseunpackmode>mode); |
ae46a07a | 5173 | |
4fbe3b8a | 5174 | emit_insn (gen_vec_unpacks_lo_<mode> (tmp, operands[1])); |
f7df4a84 | 5175 | emit_insn (gen_rtx_SET (operands[0], |
4fbe3b8a | 5176 | gen_rtx_FLOAT (<sseunpackfltmode>mode, tmp))); |
d6023b50 UB |
5177 | DONE; |
5178 | }) | |
ef719a44 | 5179 | |
4fbe3b8a | 5180 | (define_expand "vec_unpacku_float_hi_<mode>" |
82e86dc6 | 5181 | [(match_operand:<sseunpackfltmode> 0 "register_operand") |
3bdf6340 | 5182 | (match_operand:VI2_AVX512F 1 "register_operand")] |
ef719a44 | 5183 | "TARGET_SSE2" |
d6023b50 | 5184 | { |
4fbe3b8a | 5185 | rtx tmp = gen_reg_rtx (<sseunpackmode>mode); |
ef719a44 | 5186 | |
4fbe3b8a | 5187 | emit_insn (gen_vec_unpacku_hi_<mode> (tmp, operands[1])); |
f7df4a84 | 5188 | emit_insn (gen_rtx_SET (operands[0], |
4fbe3b8a | 5189 | gen_rtx_FLOAT (<sseunpackfltmode>mode, tmp))); |
d6023b50 UB |
5190 | DONE; |
5191 | }) | |
ef719a44 | 5192 | |
4fbe3b8a | 5193 | (define_expand "vec_unpacku_float_lo_<mode>" |
82e86dc6 | 5194 | [(match_operand:<sseunpackfltmode> 0 "register_operand") |
3bdf6340 | 5195 | (match_operand:VI2_AVX512F 1 "register_operand")] |
ef719a44 | 5196 | "TARGET_SSE2" |
d6023b50 | 5197 | { |
4fbe3b8a | 5198 | rtx tmp = gen_reg_rtx (<sseunpackmode>mode); |
ef719a44 | 5199 | |
4fbe3b8a | 5200 | emit_insn (gen_vec_unpacku_lo_<mode> (tmp, operands[1])); |
f7df4a84 | 5201 | emit_insn (gen_rtx_SET (operands[0], |
4fbe3b8a | 5202 | gen_rtx_FLOAT (<sseunpackfltmode>mode, tmp))); |
d6023b50 UB |
5203 | DONE; |
5204 | }) | |
ef719a44 | 5205 | |
d6023b50 UB |
5206 | (define_expand "vec_unpacks_float_hi_v4si" |
5207 | [(set (match_dup 2) | |
5208 | (vec_select:V4SI | |
82e86dc6 | 5209 | (match_operand:V4SI 1 "nonimmediate_operand") |
a95d4000 UB |
5210 | (parallel [(const_int 2) (const_int 3) |
5211 | (const_int 2) (const_int 3)]))) | |
82e86dc6 | 5212 | (set (match_operand:V2DF 0 "register_operand") |
977e83a3 | 5213 | (float:V2DF |
d6023b50 UB |
5214 | (vec_select:V2SI |
5215 | (match_dup 2) | |
5216 | (parallel [(const_int 0) (const_int 1)]))))] | |
a427621f UB |
5217 | "TARGET_SSE2" |
5218 | "operands[2] = gen_reg_rtx (V4SImode);") | |
ef719a44 | 5219 | |
d6023b50 | 5220 | (define_expand "vec_unpacks_float_lo_v4si" |
82e86dc6 | 5221 | [(set (match_operand:V2DF 0 "register_operand") |
d6023b50 UB |
5222 | (float:V2DF |
5223 | (vec_select:V2SI | |
82e86dc6 | 5224 | (match_operand:V4SI 1 "nonimmediate_operand") |
d6023b50 UB |
5225 | (parallel [(const_int 0) (const_int 1)]))))] |
5226 | "TARGET_SSE2") | |
5227 | ||
1e27129f L |
5228 | (define_expand "vec_unpacks_float_hi_v8si" |
5229 | [(set (match_dup 2) | |
5230 | (vec_select:V4SI | |
82e86dc6 | 5231 | (match_operand:V8SI 1 "nonimmediate_operand") |
a95d4000 UB |
5232 | (parallel [(const_int 4) (const_int 5) |
5233 | (const_int 6) (const_int 7)]))) | |
82e86dc6 | 5234 | (set (match_operand:V4DF 0 "register_operand") |
977e83a3 | 5235 | (float:V4DF |
1e27129f L |
5236 | (match_dup 2)))] |
5237 | "TARGET_AVX" | |
5238 | "operands[2] = gen_reg_rtx (V4SImode);") | |
5239 | ||
5240 | (define_expand "vec_unpacks_float_lo_v8si" | |
82e86dc6 | 5241 | [(set (match_operand:V4DF 0 "register_operand") |
1e27129f L |
5242 | (float:V4DF |
5243 | (vec_select:V4SI | |
82e86dc6 | 5244 | (match_operand:V8SI 1 "nonimmediate_operand") |
a95d4000 UB |
5245 | (parallel [(const_int 0) (const_int 1) |
5246 | (const_int 2) (const_int 3)]))))] | |
1e27129f L |
5247 | "TARGET_AVX") |
5248 | ||
c003c6d6 AI |
5249 | (define_expand "vec_unpacks_float_hi_v16si" |
5250 | [(set (match_dup 2) | |
5251 | (vec_select:V8SI | |
5252 | (match_operand:V16SI 1 "nonimmediate_operand") | |
5253 | (parallel [(const_int 8) (const_int 9) | |
5254 | (const_int 10) (const_int 11) | |
5255 | (const_int 12) (const_int 13) | |
5256 | (const_int 14) (const_int 15)]))) | |
5257 | (set (match_operand:V8DF 0 "register_operand") | |
5258 | (float:V8DF | |
5259 | (match_dup 2)))] | |
5260 | "TARGET_AVX512F" | |
5261 | "operands[2] = gen_reg_rtx (V8SImode);") | |
5262 | ||
5263 | (define_expand "vec_unpacks_float_lo_v16si" | |
5264 | [(set (match_operand:V8DF 0 "register_operand") | |
5265 | (float:V8DF | |
5266 | (vec_select:V8SI | |
5267 | (match_operand:V16SI 1 "nonimmediate_operand") | |
5268 | (parallel [(const_int 0) (const_int 1) | |
5269 | (const_int 2) (const_int 3) | |
5270 | (const_int 4) (const_int 5) | |
5271 | (const_int 6) (const_int 7)]))))] | |
5272 | "TARGET_AVX512F") | |
5273 | ||
848e6317 UB |
5274 | (define_expand "vec_unpacku_float_hi_v4si" |
5275 | [(set (match_dup 5) | |
5276 | (vec_select:V4SI | |
82e86dc6 | 5277 | (match_operand:V4SI 1 "nonimmediate_operand") |
a95d4000 UB |
5278 | (parallel [(const_int 2) (const_int 3) |
5279 | (const_int 2) (const_int 3)]))) | |
848e6317 | 5280 | (set (match_dup 6) |
977e83a3 | 5281 | (float:V2DF |
848e6317 UB |
5282 | (vec_select:V2SI |
5283 | (match_dup 5) | |
5284 | (parallel [(const_int 0) (const_int 1)])))) | |
5285 | (set (match_dup 7) | |
5286 | (lt:V2DF (match_dup 6) (match_dup 3))) | |
5287 | (set (match_dup 8) | |
5288 | (and:V2DF (match_dup 7) (match_dup 4))) | |
82e86dc6 | 5289 | (set (match_operand:V2DF 0 "register_operand") |
848e6317 | 5290 | (plus:V2DF (match_dup 6) (match_dup 8)))] |
a427621f | 5291 | "TARGET_SSE2" |
848e6317 UB |
5292 | { |
5293 | REAL_VALUE_TYPE TWO32r; | |
5294 | rtx x; | |
5295 | int i; | |
5296 | ||
5297 | real_ldexp (&TWO32r, &dconst1, 32); | |
5298 | x = const_double_from_real_value (TWO32r, DFmode); | |
5299 | ||
5300 | operands[3] = force_reg (V2DFmode, CONST0_RTX (V2DFmode)); | |
1e27129f L |
5301 | operands[4] = force_reg (V2DFmode, |
5302 | ix86_build_const_vector (V2DFmode, 1, x)); | |
848e6317 UB |
5303 | |
5304 | operands[5] = gen_reg_rtx (V4SImode); | |
6cf9eb27 | 5305 | |
848e6317 UB |
5306 | for (i = 6; i < 9; i++) |
5307 | operands[i] = gen_reg_rtx (V2DFmode); | |
5308 | }) | |
5309 | ||
5310 | (define_expand "vec_unpacku_float_lo_v4si" | |
5311 | [(set (match_dup 5) | |
5312 | (float:V2DF | |
5313 | (vec_select:V2SI | |
82e86dc6 | 5314 | (match_operand:V4SI 1 "nonimmediate_operand") |
848e6317 UB |
5315 | (parallel [(const_int 0) (const_int 1)])))) |
5316 | (set (match_dup 6) | |
5317 | (lt:V2DF (match_dup 5) (match_dup 3))) | |
5318 | (set (match_dup 7) | |
5319 | (and:V2DF (match_dup 6) (match_dup 4))) | |
82e86dc6 | 5320 | (set (match_operand:V2DF 0 "register_operand") |
848e6317 UB |
5321 | (plus:V2DF (match_dup 5) (match_dup 7)))] |
5322 | "TARGET_SSE2" | |
5323 | { | |
5324 | REAL_VALUE_TYPE TWO32r; | |
5325 | rtx x; | |
5326 | int i; | |
5327 | ||
5328 | real_ldexp (&TWO32r, &dconst1, 32); | |
5329 | x = const_double_from_real_value (TWO32r, DFmode); | |
5330 | ||
5331 | operands[3] = force_reg (V2DFmode, CONST0_RTX (V2DFmode)); | |
1e27129f L |
5332 | operands[4] = force_reg (V2DFmode, |
5333 | ix86_build_const_vector (V2DFmode, 1, x)); | |
848e6317 UB |
5334 | |
5335 | for (i = 5; i < 8; i++) | |
5336 | operands[i] = gen_reg_rtx (V2DFmode); | |
5337 | }) | |
5338 | ||
4fbe3b8a | 5339 | (define_expand "vec_unpacku_float_hi_v8si" |
82e86dc6 UB |
5340 | [(match_operand:V4DF 0 "register_operand") |
5341 | (match_operand:V8SI 1 "register_operand")] | |
4fbe3b8a JJ |
5342 | "TARGET_AVX" |
5343 | { | |
5344 | REAL_VALUE_TYPE TWO32r; | |
5345 | rtx x, tmp[6]; | |
5346 | int i; | |
5347 | ||
5348 | real_ldexp (&TWO32r, &dconst1, 32); | |
5349 | x = const_double_from_real_value (TWO32r, DFmode); | |
5350 | ||
5351 | tmp[0] = force_reg (V4DFmode, CONST0_RTX (V4DFmode)); | |
5352 | tmp[1] = force_reg (V4DFmode, ix86_build_const_vector (V4DFmode, 1, x)); | |
5353 | tmp[5] = gen_reg_rtx (V4SImode); | |
5354 | ||
5355 | for (i = 2; i < 5; i++) | |
5356 | tmp[i] = gen_reg_rtx (V4DFmode); | |
5357 | emit_insn (gen_vec_extract_hi_v8si (tmp[5], operands[1])); | |
788a2908 | 5358 | emit_insn (gen_floatv4siv4df2 (tmp[2], tmp[5])); |
f7df4a84 | 5359 | emit_insn (gen_rtx_SET (tmp[3], gen_rtx_LT (V4DFmode, tmp[2], tmp[0]))); |
4fbe3b8a JJ |
5360 | emit_insn (gen_andv4df3 (tmp[4], tmp[3], tmp[1])); |
5361 | emit_insn (gen_addv4df3 (operands[0], tmp[2], tmp[4])); | |
5362 | DONE; | |
5363 | }) | |
5364 | ||
d3c2fee0 AI |
5365 | (define_expand "vec_unpacku_float_hi_v16si" |
5366 | [(match_operand:V8DF 0 "register_operand") | |
5367 | (match_operand:V16SI 1 "register_operand")] | |
5368 | "TARGET_AVX512F" | |
5369 | { | |
5370 | REAL_VALUE_TYPE TWO32r; | |
5371 | rtx k, x, tmp[4]; | |
5372 | ||
5373 | real_ldexp (&TWO32r, &dconst1, 32); | |
5374 | x = const_double_from_real_value (TWO32r, DFmode); | |
5375 | ||
5376 | tmp[0] = force_reg (V8DFmode, CONST0_RTX (V8DFmode)); | |
5377 | tmp[1] = force_reg (V8DFmode, ix86_build_const_vector (V8DFmode, 1, x)); | |
5378 | tmp[2] = gen_reg_rtx (V8DFmode); | |
5379 | tmp[3] = gen_reg_rtx (V8SImode); | |
5380 | k = gen_reg_rtx (QImode); | |
5381 | ||
5382 | emit_insn (gen_vec_extract_hi_v16si (tmp[3], operands[1])); | |
5383 | emit_insn (gen_floatv8siv8df2 (tmp[2], tmp[3])); | |
f7df4a84 | 5384 | emit_insn (gen_rtx_SET (k, gen_rtx_LT (QImode, tmp[2], tmp[0]))); |
d3c2fee0 AI |
5385 | emit_insn (gen_addv8df3_mask (tmp[2], tmp[2], tmp[1], tmp[2], k)); |
5386 | emit_move_insn (operands[0], tmp[2]); | |
5387 | DONE; | |
5388 | }) | |
5389 | ||
4fbe3b8a | 5390 | (define_expand "vec_unpacku_float_lo_v8si" |
82e86dc6 UB |
5391 | [(match_operand:V4DF 0 "register_operand") |
5392 | (match_operand:V8SI 1 "nonimmediate_operand")] | |
4fbe3b8a JJ |
5393 | "TARGET_AVX" |
5394 | { | |
5395 | REAL_VALUE_TYPE TWO32r; | |
5396 | rtx x, tmp[5]; | |
5397 | int i; | |
5398 | ||
5399 | real_ldexp (&TWO32r, &dconst1, 32); | |
5400 | x = const_double_from_real_value (TWO32r, DFmode); | |
5401 | ||
5402 | tmp[0] = force_reg (V4DFmode, CONST0_RTX (V4DFmode)); | |
5403 | tmp[1] = force_reg (V4DFmode, ix86_build_const_vector (V4DFmode, 1, x)); | |
5404 | ||
5405 | for (i = 2; i < 5; i++) | |
5406 | tmp[i] = gen_reg_rtx (V4DFmode); | |
5407 | emit_insn (gen_avx_cvtdq2pd256_2 (tmp[2], operands[1])); | |
f7df4a84 | 5408 | emit_insn (gen_rtx_SET (tmp[3], gen_rtx_LT (V4DFmode, tmp[2], tmp[0]))); |
4fbe3b8a JJ |
5409 | emit_insn (gen_andv4df3 (tmp[4], tmp[3], tmp[1])); |
5410 | emit_insn (gen_addv4df3 (operands[0], tmp[2], tmp[4])); | |
5411 | DONE; | |
5412 | }) | |
5413 | ||
47490470 AI |
5414 | (define_expand "vec_unpacku_float_lo_v16si" |
5415 | [(match_operand:V8DF 0 "register_operand") | |
5416 | (match_operand:V16SI 1 "nonimmediate_operand")] | |
5417 | "TARGET_AVX512F" | |
5418 | { | |
5419 | REAL_VALUE_TYPE TWO32r; | |
5420 | rtx k, x, tmp[3]; | |
5421 | ||
5422 | real_ldexp (&TWO32r, &dconst1, 32); | |
5423 | x = const_double_from_real_value (TWO32r, DFmode); | |
5424 | ||
5425 | tmp[0] = force_reg (V8DFmode, CONST0_RTX (V8DFmode)); | |
5426 | tmp[1] = force_reg (V8DFmode, ix86_build_const_vector (V8DFmode, 1, x)); | |
5427 | tmp[2] = gen_reg_rtx (V8DFmode); | |
5428 | k = gen_reg_rtx (QImode); | |
5429 | ||
5430 | emit_insn (gen_avx512f_cvtdq2pd512_2 (tmp[2], operands[1])); | |
f7df4a84 | 5431 | emit_insn (gen_rtx_SET (k, gen_rtx_LT (QImode, tmp[2], tmp[0]))); |
47490470 AI |
5432 | emit_insn (gen_addv8df3_mask (tmp[2], tmp[2], tmp[1], tmp[2], k)); |
5433 | emit_move_insn (operands[0], tmp[2]); | |
5434 | DONE; | |
5435 | }) | |
5436 | ||
ec5e777c | 5437 | (define_expand "vec_pack_trunc_<mode>" |
1e27129f | 5438 | [(set (match_dup 3) |
ec5e777c AI |
5439 | (float_truncate:<sf2dfmode> |
5440 | (match_operand:VF2_512_256 1 "nonimmediate_operand"))) | |
1e27129f | 5441 | (set (match_dup 4) |
ec5e777c AI |
5442 | (float_truncate:<sf2dfmode> |
5443 | (match_operand:VF2_512_256 2 "nonimmediate_operand"))) | |
5444 | (set (match_operand:<ssePSmode> 0 "register_operand") | |
5445 | (vec_concat:<ssePSmode> | |
1e27129f L |
5446 | (match_dup 3) |
5447 | (match_dup 4)))] | |
5448 | "TARGET_AVX" | |
5449 | { | |
ec5e777c AI |
5450 | operands[3] = gen_reg_rtx (<sf2dfmode>mode); |
5451 | operands[4] = gen_reg_rtx (<sf2dfmode>mode); | |
1e27129f L |
5452 | }) |
5453 | ||
d6023b50 | 5454 | (define_expand "vec_pack_trunc_v2df" |
82e86dc6 UB |
5455 | [(match_operand:V4SF 0 "register_operand") |
5456 | (match_operand:V2DF 1 "nonimmediate_operand") | |
5457 | (match_operand:V2DF 2 "nonimmediate_operand")] | |
ef719a44 | 5458 | "TARGET_SSE2" |
d6023b50 | 5459 | { |
76a2c904 | 5460 | rtx tmp0, tmp1; |
ef719a44 | 5461 | |
a1aff58f | 5462 | if (TARGET_AVX && !TARGET_PREFER_AVX128 && optimize_insn_for_speed_p ()) |
76a2c904 UB |
5463 | { |
5464 | tmp0 = gen_reg_rtx (V4DFmode); | |
5465 | tmp1 = force_reg (V2DFmode, operands[1]); | |
ef719a44 | 5466 | |
76a2c904 UB |
5467 | emit_insn (gen_avx_vec_concatv4df (tmp0, tmp1, operands[2])); |
5468 | emit_insn (gen_avx_cvtpd2ps256 (operands[0], tmp0)); | |
5469 | } | |
5470 | else | |
5471 | { | |
5472 | tmp0 = gen_reg_rtx (V4SFmode); | |
5473 | tmp1 = gen_reg_rtx (V4SFmode); | |
5474 | ||
5475 | emit_insn (gen_sse2_cvtpd2ps (tmp0, operands[1])); | |
5476 | emit_insn (gen_sse2_cvtpd2ps (tmp1, operands[2])); | |
5477 | emit_insn (gen_sse_movlhps (operands[0], tmp0, tmp1)); | |
5478 | } | |
d6023b50 UB |
5479 | DONE; |
5480 | }) | |
ab8efbd8 | 5481 | |
ec5e777c AI |
5482 | (define_expand "vec_pack_sfix_trunc_v8df" |
5483 | [(match_operand:V16SI 0 "register_operand") | |
5484 | (match_operand:V8DF 1 "nonimmediate_operand") | |
5485 | (match_operand:V8DF 2 "nonimmediate_operand")] | |
5486 | "TARGET_AVX512F" | |
5487 | { | |
5488 | rtx r1, r2; | |
5489 | ||
5490 | r1 = gen_reg_rtx (V8SImode); | |
5491 | r2 = gen_reg_rtx (V8SImode); | |
5492 | ||
5493 | emit_insn (gen_fix_truncv8dfv8si2 (r1, operands[1])); | |
5494 | emit_insn (gen_fix_truncv8dfv8si2 (r2, operands[2])); | |
5495 | emit_insn (gen_avx_vec_concatv16si (operands[0], r1, r2)); | |
5496 | DONE; | |
5497 | }) | |
5498 | ||
1ee48839 | 5499 | (define_expand "vec_pack_sfix_trunc_v4df" |
82e86dc6 UB |
5500 | [(match_operand:V8SI 0 "register_operand") |
5501 | (match_operand:V4DF 1 "nonimmediate_operand") | |
5502 | (match_operand:V4DF 2 "nonimmediate_operand")] | |
1ee48839 JJ |
5503 | "TARGET_AVX" |
5504 | { | |
5505 | rtx r1, r2; | |
5506 | ||
76a2c904 UB |
5507 | r1 = gen_reg_rtx (V4SImode); |
5508 | r2 = gen_reg_rtx (V4SImode); | |
1ee48839 | 5509 | |
76a2c904 UB |
5510 | emit_insn (gen_fix_truncv4dfv4si2 (r1, operands[1])); |
5511 | emit_insn (gen_fix_truncv4dfv4si2 (r2, operands[2])); | |
5512 | emit_insn (gen_avx_vec_concatv8si (operands[0], r1, r2)); | |
1ee48839 JJ |
5513 | DONE; |
5514 | }) | |
5515 | ||
d6023b50 | 5516 | (define_expand "vec_pack_sfix_trunc_v2df" |
82e86dc6 UB |
5517 | [(match_operand:V4SI 0 "register_operand") |
5518 | (match_operand:V2DF 1 "nonimmediate_operand") | |
5519 | (match_operand:V2DF 2 "nonimmediate_operand")] | |
ab8efbd8 | 5520 | "TARGET_SSE2" |
d6023b50 | 5521 | { |
d8c84975 | 5522 | rtx tmp0, tmp1, tmp2; |
ab8efbd8 | 5523 | |
a1aff58f | 5524 | if (TARGET_AVX && !TARGET_PREFER_AVX128 && optimize_insn_for_speed_p ()) |
76a2c904 UB |
5525 | { |
5526 | tmp0 = gen_reg_rtx (V4DFmode); | |
5527 | tmp1 = force_reg (V2DFmode, operands[1]); | |
ab8efbd8 | 5528 | |
76a2c904 UB |
5529 | emit_insn (gen_avx_vec_concatv4df (tmp0, tmp1, operands[2])); |
5530 | emit_insn (gen_fix_truncv4dfv4si2 (operands[0], tmp0)); | |
5531 | } | |
5532 | else | |
5533 | { | |
5534 | tmp0 = gen_reg_rtx (V4SImode); | |
5535 | tmp1 = gen_reg_rtx (V4SImode); | |
d8c84975 | 5536 | tmp2 = gen_reg_rtx (V2DImode); |
76a2c904 UB |
5537 | |
5538 | emit_insn (gen_sse2_cvttpd2dq (tmp0, operands[1])); | |
5539 | emit_insn (gen_sse2_cvttpd2dq (tmp1, operands[2])); | |
d8c84975 JJ |
5540 | emit_insn (gen_vec_interleave_lowv2di (tmp2, |
5541 | gen_lowpart (V2DImode, tmp0), | |
5542 | gen_lowpart (V2DImode, tmp1))); | |
5543 | emit_move_insn (operands[0], gen_lowpart (V4SImode, tmp2)); | |
76a2c904 | 5544 | } |
d6023b50 UB |
5545 | DONE; |
5546 | }) | |
ab8efbd8 | 5547 | |
6bf39801 | 5548 | (define_mode_attr ssepackfltmode |
ec5e777c | 5549 | [(V8DF "V16SI") (V4DF "V8SI") (V2DF "V4SI")]) |
6bf39801 JJ |
5550 | |
5551 | (define_expand "vec_pack_ufix_trunc_<mode>" | |
82e86dc6 | 5552 | [(match_operand:<ssepackfltmode> 0 "register_operand") |
d3c2fee0 AI |
5553 | (match_operand:VF2 1 "register_operand") |
5554 | (match_operand:VF2 2 "register_operand")] | |
2f2da9e9 | 5555 | "TARGET_SSE2" |
6bf39801 | 5556 | { |
d3c2fee0 | 5557 | if (<MODE>mode == V8DFmode) |
2f2da9e9 | 5558 | { |
d3c2fee0 AI |
5559 | rtx r1, r2; |
5560 | ||
5561 | r1 = gen_reg_rtx (V8SImode); | |
5562 | r2 = gen_reg_rtx (V8SImode); | |
5563 | ||
5564 | emit_insn (gen_ufix_truncv8dfv8si2 (r1, operands[1])); | |
5565 | emit_insn (gen_ufix_truncv8dfv8si2 (r2, operands[2])); | |
5566 | emit_insn (gen_avx_vec_concatv16si (operands[0], r1, r2)); | |
2f2da9e9 JJ |
5567 | } |
5568 | else | |
5569 | { | |
d3c2fee0 AI |
5570 | rtx tmp[7]; |
5571 | tmp[0] = ix86_expand_adjust_ufix_to_sfix_si (operands[1], &tmp[2]); | |
5572 | tmp[1] = ix86_expand_adjust_ufix_to_sfix_si (operands[2], &tmp[3]); | |
5573 | tmp[4] = gen_reg_rtx (<ssepackfltmode>mode); | |
5574 | emit_insn (gen_vec_pack_sfix_trunc_<mode> (tmp[4], tmp[0], tmp[1])); | |
5575 | if (<ssepackfltmode>mode == V4SImode || TARGET_AVX2) | |
5576 | { | |
5577 | tmp[5] = gen_reg_rtx (<ssepackfltmode>mode); | |
5578 | ix86_expand_vec_extract_even_odd (tmp[5], tmp[2], tmp[3], 0); | |
5579 | } | |
5580 | else | |
5581 | { | |
5582 | tmp[5] = gen_reg_rtx (V8SFmode); | |
5583 | ix86_expand_vec_extract_even_odd (tmp[5], gen_lowpart (V8SFmode, tmp[2]), | |
5584 | gen_lowpart (V8SFmode, tmp[3]), 0); | |
5585 | tmp[5] = gen_lowpart (V8SImode, tmp[5]); | |
5586 | } | |
5587 | tmp[6] = expand_simple_binop (<ssepackfltmode>mode, XOR, tmp[4], tmp[5], | |
5588 | operands[0], 0, OPTAB_DIRECT); | |
5589 | if (tmp[6] != operands[0]) | |
5590 | emit_move_insn (operands[0], tmp[6]); | |
2f2da9e9 | 5591 | } |
d3c2fee0 | 5592 | |
6bf39801 JJ |
5593 | DONE; |
5594 | }) | |
5595 | ||
1ee48839 | 5596 | (define_expand "vec_pack_sfix_v4df" |
82e86dc6 UB |
5597 | [(match_operand:V8SI 0 "register_operand") |
5598 | (match_operand:V4DF 1 "nonimmediate_operand") | |
5599 | (match_operand:V4DF 2 "nonimmediate_operand")] | |
1ee48839 JJ |
5600 | "TARGET_AVX" |
5601 | { | |
5602 | rtx r1, r2; | |
5603 | ||
76a2c904 UB |
5604 | r1 = gen_reg_rtx (V4SImode); |
5605 | r2 = gen_reg_rtx (V4SImode); | |
1ee48839 | 5606 | |
76a2c904 UB |
5607 | emit_insn (gen_avx_cvtpd2dq256 (r1, operands[1])); |
5608 | emit_insn (gen_avx_cvtpd2dq256 (r2, operands[2])); | |
5609 | emit_insn (gen_avx_vec_concatv8si (operands[0], r1, r2)); | |
1ee48839 JJ |
5610 | DONE; |
5611 | }) | |
5612 | ||
d6023b50 | 5613 | (define_expand "vec_pack_sfix_v2df" |
82e86dc6 UB |
5614 | [(match_operand:V4SI 0 "register_operand") |
5615 | (match_operand:V2DF 1 "nonimmediate_operand") | |
5616 | (match_operand:V2DF 2 "nonimmediate_operand")] | |
ab8efbd8 | 5617 | "TARGET_SSE2" |
d6023b50 | 5618 | { |
d8c84975 | 5619 | rtx tmp0, tmp1, tmp2; |
d6023b50 | 5620 | |
a1aff58f | 5621 | if (TARGET_AVX && !TARGET_PREFER_AVX128 && optimize_insn_for_speed_p ()) |
76a2c904 UB |
5622 | { |
5623 | tmp0 = gen_reg_rtx (V4DFmode); | |
5624 | tmp1 = force_reg (V2DFmode, operands[1]); | |
d6023b50 | 5625 | |
76a2c904 UB |
5626 | emit_insn (gen_avx_vec_concatv4df (tmp0, tmp1, operands[2])); |
5627 | emit_insn (gen_avx_cvtpd2dq256 (operands[0], tmp0)); | |
5628 | } | |
5629 | else | |
5630 | { | |
5631 | tmp0 = gen_reg_rtx (V4SImode); | |
5632 | tmp1 = gen_reg_rtx (V4SImode); | |
d8c84975 | 5633 | tmp2 = gen_reg_rtx (V2DImode); |
76a2c904 UB |
5634 | |
5635 | emit_insn (gen_sse2_cvtpd2dq (tmp0, operands[1])); | |
5636 | emit_insn (gen_sse2_cvtpd2dq (tmp1, operands[2])); | |
d8c84975 JJ |
5637 | emit_insn (gen_vec_interleave_lowv2di (tmp2, |
5638 | gen_lowpart (V2DImode, tmp0), | |
5639 | gen_lowpart (V2DImode, tmp1))); | |
5640 | emit_move_insn (operands[0], gen_lowpart (V4SImode, tmp2)); | |
76a2c904 | 5641 | } |
d6023b50 UB |
5642 | DONE; |
5643 | }) | |
ab8efbd8 | 5644 | |
ef719a44 RH |
5645 | ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; |
5646 | ;; | |
d6023b50 | 5647 | ;; Parallel single-precision floating point element swizzling |
ef719a44 RH |
5648 | ;; |
5649 | ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; | |
5650 | ||
3a3f9d87 | 5651 | (define_expand "sse_movhlps_exp" |
82e86dc6 | 5652 | [(set (match_operand:V4SF 0 "nonimmediate_operand") |
ffbaf337 UB |
5653 | (vec_select:V4SF |
5654 | (vec_concat:V8SF | |
82e86dc6 UB |
5655 | (match_operand:V4SF 1 "nonimmediate_operand") |
5656 | (match_operand:V4SF 2 "nonimmediate_operand")) | |
ffbaf337 UB |
5657 | (parallel [(const_int 6) |
5658 | (const_int 7) | |
5659 | (const_int 2) | |
5660 | (const_int 3)])))] | |
5661 | "TARGET_SSE" | |
f17aa4ad UB |
5662 | { |
5663 | rtx dst = ix86_fixup_binary_operands (UNKNOWN, V4SFmode, operands); | |
6cf9eb27 | 5664 | |
f17aa4ad UB |
5665 | emit_insn (gen_sse_movhlps (dst, operands[1], operands[2])); |
5666 | ||
5667 | /* Fix up the destination if needed. */ | |
5668 | if (dst != operands[0]) | |
5669 | emit_move_insn (operands[0], dst); | |
5670 | ||
5671 | DONE; | |
5672 | }) | |
ffbaf337 | 5673 | |
3a3f9d87 | 5674 | (define_insn "sse_movhlps" |
3729983c | 5675 | [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,x,x,m") |
d6023b50 UB |
5676 | (vec_select:V4SF |
5677 | (vec_concat:V8SF | |
3729983c UB |
5678 | (match_operand:V4SF 1 "nonimmediate_operand" " 0,x,0,x,0") |
5679 | (match_operand:V4SF 2 "nonimmediate_operand" " x,x,o,o,x")) | |
d6023b50 UB |
5680 | (parallel [(const_int 6) |
5681 | (const_int 7) | |
5682 | (const_int 2) | |
5683 | (const_int 3)])))] | |
2fe4dc01 | 5684 | "TARGET_SSE && !(MEM_P (operands[1]) && MEM_P (operands[2]))" |
d6023b50 UB |
5685 | "@ |
5686 | movhlps\t{%2, %0|%0, %2} | |
3729983c | 5687 | vmovhlps\t{%2, %1, %0|%0, %1, %2} |
d6023b50 | 5688 | movlps\t{%H2, %0|%0, %H2} |
3729983c | 5689 | vmovlps\t{%H2, %1, %0|%0, %1, %H2} |
eabb5f48 | 5690 | %vmovhps\t{%2, %0|%q0, %2}" |
ba94c7af | 5691 | [(set_attr "isa" "noavx,avx,noavx,avx,*") |
3729983c | 5692 | (set_attr "type" "ssemov") |
f220a4f4 | 5693 | (set_attr "ssememalign" "64") |
3729983c UB |
5694 | (set_attr "prefix" "orig,vex,orig,vex,maybe_vex") |
5695 | (set_attr "mode" "V4SF,V4SF,V2SF,V2SF,V2SF")]) | |
ef719a44 | 5696 | |
3a3f9d87 | 5697 | (define_expand "sse_movlhps_exp" |
82e86dc6 | 5698 | [(set (match_operand:V4SF 0 "nonimmediate_operand") |
ffbaf337 UB |
5699 | (vec_select:V4SF |
5700 | (vec_concat:V8SF | |
82e86dc6 UB |
5701 | (match_operand:V4SF 1 "nonimmediate_operand") |
5702 | (match_operand:V4SF 2 "nonimmediate_operand")) | |
ffbaf337 UB |
5703 | (parallel [(const_int 0) |
5704 | (const_int 1) | |
5705 | (const_int 4) | |
5706 | (const_int 5)])))] | |
5707 | "TARGET_SSE" | |
f17aa4ad UB |
5708 | { |
5709 | rtx dst = ix86_fixup_binary_operands (UNKNOWN, V4SFmode, operands); | |
6cf9eb27 | 5710 | |
f17aa4ad UB |
5711 | emit_insn (gen_sse_movlhps (dst, operands[1], operands[2])); |
5712 | ||
5713 | /* Fix up the destination if needed. */ | |
5714 | if (dst != operands[0]) | |
5715 | emit_move_insn (operands[0], dst); | |
5716 | ||
5717 | DONE; | |
5718 | }) | |
ffbaf337 | 5719 | |
3a3f9d87 | 5720 | (define_insn "sse_movlhps" |
3729983c | 5721 | [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,x,x,o") |
d6023b50 UB |
5722 | (vec_select:V4SF |
5723 | (vec_concat:V8SF | |
3729983c | 5724 | (match_operand:V4SF 1 "nonimmediate_operand" " 0,x,0,x,0") |
eabb5f48 | 5725 | (match_operand:V4SF 2 "nonimmediate_operand" " x,x,m,m,x")) |
d6023b50 UB |
5726 | (parallel [(const_int 0) |
5727 | (const_int 1) | |
5728 | (const_int 4) | |
5729 | (const_int 5)])))] | |
5730 | "TARGET_SSE && ix86_binary_operator_ok (UNKNOWN, V4SFmode, operands)" | |
5731 | "@ | |
5732 | movlhps\t{%2, %0|%0, %2} | |
3729983c | 5733 | vmovlhps\t{%2, %1, %0|%0, %1, %2} |
eabb5f48 UB |
5734 | movhps\t{%2, %0|%0, %q2} |
5735 | vmovhps\t{%2, %1, %0|%0, %1, %q2} | |
3729983c | 5736 | %vmovlps\t{%2, %H0|%H0, %2}" |
ba94c7af | 5737 | [(set_attr "isa" "noavx,avx,noavx,avx,*") |
3729983c | 5738 | (set_attr "type" "ssemov") |
f220a4f4 | 5739 | (set_attr "ssememalign" "64") |
3729983c UB |
5740 | (set_attr "prefix" "orig,vex,orig,vex,maybe_vex") |
5741 | (set_attr "mode" "V4SF,V4SF,V2SF,V2SF,V2SF")]) | |
ef719a44 | 5742 | |
47490470 | 5743 | (define_insn "<mask_codefor>avx512f_unpckhps512<mask_name>" |
c003c6d6 AI |
5744 | [(set (match_operand:V16SF 0 "register_operand" "=v") |
5745 | (vec_select:V16SF | |
5746 | (vec_concat:V32SF | |
5747 | (match_operand:V16SF 1 "register_operand" "v") | |
5748 | (match_operand:V16SF 2 "nonimmediate_operand" "vm")) | |
5749 | (parallel [(const_int 2) (const_int 18) | |
5750 | (const_int 3) (const_int 19) | |
5751 | (const_int 6) (const_int 22) | |
5752 | (const_int 7) (const_int 23) | |
5753 | (const_int 10) (const_int 26) | |
5754 | (const_int 11) (const_int 27) | |
5755 | (const_int 14) (const_int 30) | |
5756 | (const_int 15) (const_int 31)])))] | |
5757 | "TARGET_AVX512F" | |
47490470 | 5758 | "vunpckhps\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}" |
c003c6d6 AI |
5759 | [(set_attr "type" "sselog") |
5760 | (set_attr "prefix" "evex") | |
5761 | (set_attr "mode" "V16SF")]) | |
5762 | ||
b0d49a6e | 5763 | ;; Recall that the 256-bit unpck insns only shuffle within their lanes. |
47145255 AI |
5764 | (define_insn "avx_unpckhps256<mask_name>" |
5765 | [(set (match_operand:V8SF 0 "register_operand" "=v") | |
95879c72 L |
5766 | (vec_select:V8SF |
5767 | (vec_concat:V16SF | |
47145255 AI |
5768 | (match_operand:V8SF 1 "register_operand" "v") |
5769 | (match_operand:V8SF 2 "nonimmediate_operand" "vm")) | |
c4d3f42f L |
5770 | (parallel [(const_int 2) (const_int 10) |
5771 | (const_int 3) (const_int 11) | |
5772 | (const_int 6) (const_int 14) | |
5773 | (const_int 7) (const_int 15)])))] | |
47145255 AI |
5774 | "TARGET_AVX && <mask_avx512vl_condition>" |
5775 | "vunpckhps\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}" | |
95879c72 L |
5776 | [(set_attr "type" "sselog") |
5777 | (set_attr "prefix" "vex") | |
5778 | (set_attr "mode" "V8SF")]) | |
5779 | ||
1e27129f L |
5780 | (define_expand "vec_interleave_highv8sf" |
5781 | [(set (match_dup 3) | |
5782 | (vec_select:V8SF | |
5783 | (vec_concat:V16SF | |
5784 | (match_operand:V8SF 1 "register_operand" "x") | |
5785 | (match_operand:V8SF 2 "nonimmediate_operand" "xm")) | |
5786 | (parallel [(const_int 0) (const_int 8) | |
5787 | (const_int 1) (const_int 9) | |
5788 | (const_int 4) (const_int 12) | |
5789 | (const_int 5) (const_int 13)]))) | |
5790 | (set (match_dup 4) | |
5791 | (vec_select:V8SF | |
5792 | (vec_concat:V16SF | |
5793 | (match_dup 1) | |
5794 | (match_dup 2)) | |
5795 | (parallel [(const_int 2) (const_int 10) | |
5796 | (const_int 3) (const_int 11) | |
5797 | (const_int 6) (const_int 14) | |
5798 | (const_int 7) (const_int 15)]))) | |
82e86dc6 | 5799 | (set (match_operand:V8SF 0 "register_operand") |
2a4337c0 UB |
5800 | (vec_select:V8SF |
5801 | (vec_concat:V16SF | |
1e27129f | 5802 | (match_dup 3) |
2a4337c0 UB |
5803 | (match_dup 4)) |
5804 | (parallel [(const_int 4) (const_int 5) | |
5805 | (const_int 6) (const_int 7) | |
5806 | (const_int 12) (const_int 13) | |
5807 | (const_int 14) (const_int 15)])))] | |
1e27129f L |
5808 | "TARGET_AVX" |
5809 | { | |
5810 | operands[3] = gen_reg_rtx (V8SFmode); | |
5811 | operands[4] = gen_reg_rtx (V8SFmode); | |
5812 | }) | |
5813 | ||
47145255 AI |
5814 | (define_insn "vec_interleave_highv4sf<mask_name>" |
5815 | [(set (match_operand:V4SF 0 "register_operand" "=x,v") | |
d6023b50 UB |
5816 | (vec_select:V4SF |
5817 | (vec_concat:V8SF | |
47145255 AI |
5818 | (match_operand:V4SF 1 "register_operand" "0,v") |
5819 | (match_operand:V4SF 2 "nonimmediate_operand" "xm,vm")) | |
d6023b50 UB |
5820 | (parallel [(const_int 2) (const_int 6) |
5821 | (const_int 3) (const_int 7)])))] | |
47145255 | 5822 | "TARGET_SSE && <mask_avx512vl_condition>" |
3729983c UB |
5823 | "@ |
5824 | unpckhps\t{%2, %0|%0, %2} | |
47145255 | 5825 | vunpckhps\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}" |
3729983c UB |
5826 | [(set_attr "isa" "noavx,avx") |
5827 | (set_attr "type" "sselog") | |
5828 | (set_attr "prefix" "orig,vex") | |
d6023b50 | 5829 | (set_attr "mode" "V4SF")]) |
ef719a44 | 5830 | |
47490470 | 5831 | (define_insn "<mask_codefor>avx512f_unpcklps512<mask_name>" |
c003c6d6 AI |
5832 | [(set (match_operand:V16SF 0 "register_operand" "=v") |
5833 | (vec_select:V16SF | |
5834 | (vec_concat:V32SF | |
5835 | (match_operand:V16SF 1 "register_operand" "v") | |
5836 | (match_operand:V16SF 2 "nonimmediate_operand" "vm")) | |
5837 | (parallel [(const_int 0) (const_int 16) | |
5838 | (const_int 1) (const_int 17) | |
5839 | (const_int 4) (const_int 20) | |
5840 | (const_int 5) (const_int 21) | |
5841 | (const_int 8) (const_int 24) | |
5842 | (const_int 9) (const_int 25) | |
5843 | (const_int 12) (const_int 28) | |
5844 | (const_int 13) (const_int 29)])))] | |
5845 | "TARGET_AVX512F" | |
47490470 | 5846 | "vunpcklps\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}" |
c003c6d6 AI |
5847 | [(set_attr "type" "sselog") |
5848 | (set_attr "prefix" "evex") | |
5849 | (set_attr "mode" "V16SF")]) | |
5850 | ||
b0d49a6e | 5851 | ;; Recall that the 256-bit unpck insns only shuffle within their lanes. |
47145255 AI |
5852 | (define_insn "avx_unpcklps256<mask_name>" |
5853 | [(set (match_operand:V8SF 0 "register_operand" "=v") | |
95879c72 L |
5854 | (vec_select:V8SF |
5855 | (vec_concat:V16SF | |
47145255 AI |
5856 | (match_operand:V8SF 1 "register_operand" "v") |
5857 | (match_operand:V8SF 2 "nonimmediate_operand" "vm")) | |
c4d3f42f L |
5858 | (parallel [(const_int 0) (const_int 8) |
5859 | (const_int 1) (const_int 9) | |
5860 | (const_int 4) (const_int 12) | |
5861 | (const_int 5) (const_int 13)])))] | |
47145255 AI |
5862 | "TARGET_AVX && <mask_avx512vl_condition>" |
5863 | "vunpcklps\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}" | |
95879c72 L |
5864 | [(set_attr "type" "sselog") |
5865 | (set_attr "prefix" "vex") | |
5866 | (set_attr "mode" "V8SF")]) | |
5867 | ||
47145255 AI |
5868 | (define_insn "unpcklps128_mask" |
5869 | [(set (match_operand:V4SF 0 "register_operand" "=v") | |
5870 | (vec_merge:V4SF | |
5871 | (vec_select:V4SF | |
5872 | (vec_concat:V8SF | |
5873 | (match_operand:V4SF 1 "register_operand" "v") | |
5874 | (match_operand:V4SF 2 "nonimmediate_operand" "vm")) | |
5875 | (parallel [(const_int 0) (const_int 4) | |
5876 | (const_int 1) (const_int 5)])) | |
5877 | (match_operand:V4SF 3 "vector_move_operand" "0C") | |
5878 | (match_operand:QI 4 "register_operand" "Yk")))] | |
5879 | "TARGET_AVX512VL" | |
5880 | "vunpcklps\t{%2, %1, %0%{%4%}%N3|%0%{%4%}%N3, %1, %2}" | |
5881 | [(set_attr "type" "sselog") | |
5882 | (set_attr "prefix" "evex") | |
5883 | (set_attr "mode" "V4SF")]) | |
5884 | ||
1e27129f L |
5885 | (define_expand "vec_interleave_lowv8sf" |
5886 | [(set (match_dup 3) | |
5887 | (vec_select:V8SF | |
5888 | (vec_concat:V16SF | |
5889 | (match_operand:V8SF 1 "register_operand" "x") | |
5890 | (match_operand:V8SF 2 "nonimmediate_operand" "xm")) | |
5891 | (parallel [(const_int 0) (const_int 8) | |
5892 | (const_int 1) (const_int 9) | |
5893 | (const_int 4) (const_int 12) | |
5894 | (const_int 5) (const_int 13)]))) | |
5895 | (set (match_dup 4) | |
5896 | (vec_select:V8SF | |
5897 | (vec_concat:V16SF | |
5898 | (match_dup 1) | |
5899 | (match_dup 2)) | |
5900 | (parallel [(const_int 2) (const_int 10) | |
5901 | (const_int 3) (const_int 11) | |
5902 | (const_int 6) (const_int 14) | |
5903 | (const_int 7) (const_int 15)]))) | |
82e86dc6 | 5904 | (set (match_operand:V8SF 0 "register_operand") |
2a4337c0 UB |
5905 | (vec_select:V8SF |
5906 | (vec_concat:V16SF | |
1e27129f | 5907 | (match_dup 3) |
2a4337c0 UB |
5908 | (match_dup 4)) |
5909 | (parallel [(const_int 0) (const_int 1) | |
5910 | (const_int 2) (const_int 3) | |
5911 | (const_int 8) (const_int 9) | |
5912 | (const_int 10) (const_int 11)])))] | |
1e27129f L |
5913 | "TARGET_AVX" |
5914 | { | |
5915 | operands[3] = gen_reg_rtx (V8SFmode); | |
5916 | operands[4] = gen_reg_rtx (V8SFmode); | |
5917 | }) | |
5918 | ||
b0d49a6e | 5919 | (define_insn "vec_interleave_lowv4sf" |
3729983c | 5920 | [(set (match_operand:V4SF 0 "register_operand" "=x,x") |
d6023b50 UB |
5921 | (vec_select:V4SF |
5922 | (vec_concat:V8SF | |
3729983c UB |
5923 | (match_operand:V4SF 1 "register_operand" "0,x") |
5924 | (match_operand:V4SF 2 "nonimmediate_operand" "xm,xm")) | |
d6023b50 UB |
5925 | (parallel [(const_int 0) (const_int 4) |
5926 | (const_int 1) (const_int 5)])))] | |
5927 | "TARGET_SSE" | |
3729983c UB |
5928 | "@ |
5929 | unpcklps\t{%2, %0|%0, %2} | |
5930 | vunpcklps\t{%2, %1, %0|%0, %1, %2}" | |
5931 | [(set_attr "isa" "noavx,avx") | |
5932 | (set_attr "type" "sselog") | |
5933 | (set_attr "prefix" "orig,vex") | |
d6023b50 | 5934 | (set_attr "mode" "V4SF")]) |
ef719a44 | 5935 | |
d6023b50 UB |
5936 | ;; These are modeled with the same vec_concat as the others so that we |
5937 | ;; capture users of shufps that can use the new instructions | |
6eacd27c AI |
5938 | (define_insn "avx_movshdup256<mask_name>" |
5939 | [(set (match_operand:V8SF 0 "register_operand" "=v") | |
95879c72 L |
5940 | (vec_select:V8SF |
5941 | (vec_concat:V16SF | |
6eacd27c | 5942 | (match_operand:V8SF 1 "nonimmediate_operand" "vm") |
95879c72 L |
5943 | (match_dup 1)) |
5944 | (parallel [(const_int 1) (const_int 1) | |
5945 | (const_int 3) (const_int 3) | |
5946 | (const_int 5) (const_int 5) | |
5947 | (const_int 7) (const_int 7)])))] | |
6eacd27c AI |
5948 | "TARGET_AVX && <mask_avx512vl_condition>" |
5949 | "vmovshdup\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}" | |
95879c72 L |
5950 | [(set_attr "type" "sse") |
5951 | (set_attr "prefix" "vex") | |
5952 | (set_attr "mode" "V8SF")]) | |
5953 | ||
6eacd27c AI |
5954 | (define_insn "sse3_movshdup<mask_name>" |
5955 | [(set (match_operand:V4SF 0 "register_operand" "=v") | |
d6023b50 UB |
5956 | (vec_select:V4SF |
5957 | (vec_concat:V8SF | |
6eacd27c | 5958 | (match_operand:V4SF 1 "nonimmediate_operand" "vm") |
d6023b50 UB |
5959 | (match_dup 1)) |
5960 | (parallel [(const_int 1) | |
5961 | (const_int 1) | |
5962 | (const_int 7) | |
5963 | (const_int 7)])))] | |
6eacd27c AI |
5964 | "TARGET_SSE3 && <mask_avx512vl_condition>" |
5965 | "%vmovshdup\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}" | |
d6023b50 | 5966 | [(set_attr "type" "sse") |
10e4d956 | 5967 | (set_attr "prefix_rep" "1") |
95879c72 | 5968 | (set_attr "prefix" "maybe_vex") |
d6023b50 | 5969 | (set_attr "mode" "V4SF")]) |
ef719a44 | 5970 | |
47490470 | 5971 | (define_insn "<mask_codefor>avx512f_movshdup512<mask_name>" |
c003c6d6 AI |
5972 | [(set (match_operand:V16SF 0 "register_operand" "=v") |
5973 | (vec_select:V16SF | |
5974 | (vec_concat:V32SF | |
5975 | (match_operand:V16SF 1 "nonimmediate_operand" "vm") | |
5976 | (match_dup 1)) | |
5977 | (parallel [(const_int 1) (const_int 1) | |
5978 | (const_int 3) (const_int 3) | |
5979 | (const_int 5) (const_int 5) | |
5980 | (const_int 7) (const_int 7) | |
5981 | (const_int 9) (const_int 9) | |
5982 | (const_int 11) (const_int 11) | |
5983 | (const_int 13) (const_int 13) | |
5984 | (const_int 15) (const_int 15)])))] | |
5985 | "TARGET_AVX512F" | |
47490470 | 5986 | "vmovshdup\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}" |
c003c6d6 AI |
5987 | [(set_attr "type" "sse") |
5988 | (set_attr "prefix" "evex") | |
5989 | (set_attr "mode" "V16SF")]) | |
5990 | ||
6eacd27c AI |
5991 | (define_insn "avx_movsldup256<mask_name>" |
5992 | [(set (match_operand:V8SF 0 "register_operand" "=v") | |
95879c72 L |
5993 | (vec_select:V8SF |
5994 | (vec_concat:V16SF | |
6eacd27c | 5995 | (match_operand:V8SF 1 "nonimmediate_operand" "vm") |
95879c72 L |
5996 | (match_dup 1)) |
5997 | (parallel [(const_int 0) (const_int 0) | |
5998 | (const_int 2) (const_int 2) | |
5999 | (const_int 4) (const_int 4) | |
6000 | (const_int 6) (const_int 6)])))] | |
6eacd27c AI |
6001 | "TARGET_AVX && <mask_avx512vl_condition>" |
6002 | "vmovsldup\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}" | |
95879c72 L |
6003 | [(set_attr "type" "sse") |
6004 | (set_attr "prefix" "vex") | |
6005 | (set_attr "mode" "V8SF")]) | |
6006 | ||
6eacd27c AI |
6007 | (define_insn "sse3_movsldup<mask_name>" |
6008 | [(set (match_operand:V4SF 0 "register_operand" "=v") | |
d6023b50 UB |
6009 | (vec_select:V4SF |
6010 | (vec_concat:V8SF | |
6eacd27c | 6011 | (match_operand:V4SF 1 "nonimmediate_operand" "vm") |
d6023b50 UB |
6012 | (match_dup 1)) |
6013 | (parallel [(const_int 0) | |
6014 | (const_int 0) | |
6015 | (const_int 6) | |
6016 | (const_int 6)])))] | |
6eacd27c AI |
6017 | "TARGET_SSE3 && <mask_avx512vl_condition>" |
6018 | "%vmovsldup\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}" | |
d6023b50 | 6019 | [(set_attr "type" "sse") |
10e4d956 | 6020 | (set_attr "prefix_rep" "1") |
95879c72 | 6021 | (set_attr "prefix" "maybe_vex") |
d6023b50 | 6022 | (set_attr "mode" "V4SF")]) |
ef719a44 | 6023 | |
47490470 | 6024 | (define_insn "<mask_codefor>avx512f_movsldup512<mask_name>" |
c003c6d6 AI |
6025 | [(set (match_operand:V16SF 0 "register_operand" "=v") |
6026 | (vec_select:V16SF | |
6027 | (vec_concat:V32SF | |
6028 | (match_operand:V16SF 1 "nonimmediate_operand" "vm") | |
6029 | (match_dup 1)) | |
6030 | (parallel [(const_int 0) (const_int 0) | |
6031 | (const_int 2) (const_int 2) | |
6032 | (const_int 4) (const_int 4) | |
6033 | (const_int 6) (const_int 6) | |
6034 | (const_int 8) (const_int 8) | |
6035 | (const_int 10) (const_int 10) | |
6036 | (const_int 12) (const_int 12) | |
6037 | (const_int 14) (const_int 14)])))] | |
6038 | "TARGET_AVX512F" | |
47490470 | 6039 | "vmovsldup\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}" |
c003c6d6 AI |
6040 | [(set_attr "type" "sse") |
6041 | (set_attr "prefix" "evex") | |
6042 | (set_attr "mode" "V16SF")]) | |
6043 | ||
fc01a1ac | 6044 | (define_expand "avx_shufps256<mask_expand4_name>" |
82e86dc6 UB |
6045 | [(match_operand:V8SF 0 "register_operand") |
6046 | (match_operand:V8SF 1 "register_operand") | |
6047 | (match_operand:V8SF 2 "nonimmediate_operand") | |
6048 | (match_operand:SI 3 "const_int_operand")] | |
95879c72 L |
6049 | "TARGET_AVX" |
6050 | { | |
6051 | int mask = INTVAL (operands[3]); | |
fc01a1ac AI |
6052 | emit_insn (gen_avx_shufps256_1<mask_expand4_name> (operands[0], |
6053 | operands[1], | |
6054 | operands[2], | |
6055 | GEN_INT ((mask >> 0) & 3), | |
6056 | GEN_INT ((mask >> 2) & 3), | |
6057 | GEN_INT (((mask >> 4) & 3) + 8), | |
6058 | GEN_INT (((mask >> 6) & 3) + 8), | |
6059 | GEN_INT (((mask >> 0) & 3) + 4), | |
6060 | GEN_INT (((mask >> 2) & 3) + 4), | |
6061 | GEN_INT (((mask >> 4) & 3) + 12), | |
6062 | GEN_INT (((mask >> 6) & 3) + 12) | |
6063 | <mask_expand4_args>)); | |
95879c72 L |
6064 | DONE; |
6065 | }) | |
6066 | ||
6067 | ;; One bit in mask selects 2 elements. | |
fc01a1ac AI |
6068 | (define_insn "avx_shufps256_1<mask_name>" |
6069 | [(set (match_operand:V8SF 0 "register_operand" "=v") | |
95879c72 L |
6070 | (vec_select:V8SF |
6071 | (vec_concat:V16SF | |
fc01a1ac AI |
6072 | (match_operand:V8SF 1 "register_operand" "v") |
6073 | (match_operand:V8SF 2 "nonimmediate_operand" "vm")) | |
82e86dc6 UB |
6074 | (parallel [(match_operand 3 "const_0_to_3_operand" ) |
6075 | (match_operand 4 "const_0_to_3_operand" ) | |
6076 | (match_operand 5 "const_8_to_11_operand" ) | |
6077 | (match_operand 6 "const_8_to_11_operand" ) | |
6078 | (match_operand 7 "const_4_to_7_operand" ) | |
6079 | (match_operand 8 "const_4_to_7_operand" ) | |
6080 | (match_operand 9 "const_12_to_15_operand") | |
6081 | (match_operand 10 "const_12_to_15_operand")])))] | |
95879c72 | 6082 | "TARGET_AVX |
fc01a1ac | 6083 | && <mask_avx512vl_condition> |
95879c72 L |
6084 | && (INTVAL (operands[3]) == (INTVAL (operands[7]) - 4) |
6085 | && INTVAL (operands[4]) == (INTVAL (operands[8]) - 4) | |
6086 | && INTVAL (operands[5]) == (INTVAL (operands[9]) - 4) | |
6087 | && INTVAL (operands[6]) == (INTVAL (operands[10]) - 4))" | |
6088 | { | |
6089 | int mask; | |
6090 | mask = INTVAL (operands[3]); | |
6091 | mask |= INTVAL (operands[4]) << 2; | |
6092 | mask |= (INTVAL (operands[5]) - 8) << 4; | |
6093 | mask |= (INTVAL (operands[6]) - 8) << 6; | |
6094 | operands[3] = GEN_INT (mask); | |
6095 | ||
fc01a1ac | 6096 | return "vshufps\t{%3, %2, %1, %0<mask_operand11>|%0<mask_operand11>, %1, %2, %3}"; |
95879c72 | 6097 | } |
eb2f2b44 | 6098 | [(set_attr "type" "sseshuf") |
725fd454 | 6099 | (set_attr "length_immediate" "1") |
fc01a1ac | 6100 | (set_attr "prefix" "<mask_prefix>") |
95879c72 L |
6101 | (set_attr "mode" "V8SF")]) |
6102 | ||
fc01a1ac | 6103 | (define_expand "sse_shufps<mask_expand4_name>" |
82e86dc6 UB |
6104 | [(match_operand:V4SF 0 "register_operand") |
6105 | (match_operand:V4SF 1 "register_operand") | |
6106 | (match_operand:V4SF 2 "nonimmediate_operand") | |
6107 | (match_operand:SI 3 "const_int_operand")] | |
d6023b50 UB |
6108 | "TARGET_SSE" |
6109 | { | |
6110 | int mask = INTVAL (operands[3]); | |
fc01a1ac AI |
6111 | emit_insn (gen_sse_shufps_v4sf<mask_expand4_name> (operands[0], |
6112 | operands[1], | |
6113 | operands[2], | |
6114 | GEN_INT ((mask >> 0) & 3), | |
6115 | GEN_INT ((mask >> 2) & 3), | |
6116 | GEN_INT (((mask >> 4) & 3) + 4), | |
6117 | GEN_INT (((mask >> 6) & 3) + 4) | |
6118 | <mask_expand4_args>)); | |
d6023b50 UB |
6119 | DONE; |
6120 | }) | |
ef719a44 | 6121 | |
fc01a1ac AI |
6122 | (define_insn "sse_shufps_v4sf_mask" |
6123 | [(set (match_operand:V4SF 0 "register_operand" "=v") | |
6124 | (vec_merge:V4SF | |
6125 | (vec_select:V4SF | |
6126 | (vec_concat:V8SF | |
6127 | (match_operand:V4SF 1 "register_operand" "v") | |
6128 | (match_operand:V4SF 2 "nonimmediate_operand" "vm")) | |
6129 | (parallel [(match_operand 3 "const_0_to_3_operand") | |
6130 | (match_operand 4 "const_0_to_3_operand") | |
6131 | (match_operand 5 "const_4_to_7_operand") | |
6132 | (match_operand 6 "const_4_to_7_operand")])) | |
6133 | (match_operand:V4SF 7 "vector_move_operand" "0C") | |
6134 | (match_operand:QI 8 "register_operand" "Yk")))] | |
6135 | "TARGET_AVX512VL" | |
6136 | { | |
6137 | int mask = 0; | |
6138 | mask |= INTVAL (operands[3]) << 0; | |
6139 | mask |= INTVAL (operands[4]) << 2; | |
6140 | mask |= (INTVAL (operands[5]) - 4) << 4; | |
6141 | mask |= (INTVAL (operands[6]) - 4) << 6; | |
6142 | operands[3] = GEN_INT (mask); | |
6143 | ||
6144 | return "vshufps\t{%3, %2, %1, %0%{%8%}%N7|%0%{%8%}%N7, %1, %2, %3}"; | |
6145 | } | |
6146 | [(set_attr "type" "sseshuf") | |
6147 | (set_attr "length_immediate" "1") | |
6148 | (set_attr "prefix" "evex") | |
6149 | (set_attr "mode" "V4SF")]) | |
6150 | ||
ba63dfb9 | 6151 | (define_insn "sse_shufps_<mode>" |
6bec6c98 UB |
6152 | [(set (match_operand:VI4F_128 0 "register_operand" "=x,x") |
6153 | (vec_select:VI4F_128 | |
cbb734aa | 6154 | (vec_concat:<ssedoublevecmode> |
6bec6c98 UB |
6155 | (match_operand:VI4F_128 1 "register_operand" "0,x") |
6156 | (match_operand:VI4F_128 2 "nonimmediate_operand" "xm,xm")) | |
82e86dc6 UB |
6157 | (parallel [(match_operand 3 "const_0_to_3_operand") |
6158 | (match_operand 4 "const_0_to_3_operand") | |
6159 | (match_operand 5 "const_4_to_7_operand") | |
6160 | (match_operand 6 "const_4_to_7_operand")])))] | |
d6023b50 UB |
6161 | "TARGET_SSE" |
6162 | { | |
6163 | int mask = 0; | |
6164 | mask |= INTVAL (operands[3]) << 0; | |
6165 | mask |= INTVAL (operands[4]) << 2; | |
6166 | mask |= (INTVAL (operands[5]) - 4) << 4; | |
6167 | mask |= (INTVAL (operands[6]) - 4) << 6; | |
6168 | operands[3] = GEN_INT (mask); | |
ef719a44 | 6169 | |
3729983c UB |
6170 | switch (which_alternative) |
6171 | { | |
6172 | case 0: | |
6173 | return "shufps\t{%3, %2, %0|%0, %2, %3}"; | |
6174 | case 1: | |
6175 | return "vshufps\t{%3, %2, %1, %0|%0, %1, %2, %3}"; | |
6176 | default: | |
6177 | gcc_unreachable (); | |
6178 | } | |
d6023b50 | 6179 | } |
3729983c | 6180 | [(set_attr "isa" "noavx,avx") |
eb2f2b44 | 6181 | (set_attr "type" "sseshuf") |
725fd454 | 6182 | (set_attr "length_immediate" "1") |
3729983c | 6183 | (set_attr "prefix" "orig,vex") |
d6023b50 | 6184 | (set_attr "mode" "V4SF")]) |
ef719a44 | 6185 | |
d6023b50 UB |
6186 | (define_insn "sse_storehps" |
6187 | [(set (match_operand:V2SF 0 "nonimmediate_operand" "=m,x,x") | |
6188 | (vec_select:V2SF | |
6189 | (match_operand:V4SF 1 "nonimmediate_operand" "x,x,o") | |
6190 | (parallel [(const_int 2) (const_int 3)])))] | |
6191 | "TARGET_SSE" | |
6192 | "@ | |
eabb5f48 | 6193 | %vmovhps\t{%1, %0|%q0, %1} |
95879c72 L |
6194 | %vmovhlps\t{%1, %d0|%d0, %1} |
6195 | %vmovlps\t{%H1, %d0|%d0, %H1}" | |
d6023b50 | 6196 | [(set_attr "type" "ssemov") |
f220a4f4 | 6197 | (set_attr "ssememalign" "64") |
95879c72 | 6198 | (set_attr "prefix" "maybe_vex") |
d6023b50 UB |
6199 | (set_attr "mode" "V2SF,V4SF,V2SF")]) |
6200 | ||
3a3f9d87 | 6201 | (define_expand "sse_loadhps_exp" |
82e86dc6 | 6202 | [(set (match_operand:V4SF 0 "nonimmediate_operand") |
ffbaf337 UB |
6203 | (vec_concat:V4SF |
6204 | (vec_select:V2SF | |
82e86dc6 | 6205 | (match_operand:V4SF 1 "nonimmediate_operand") |
ffbaf337 | 6206 | (parallel [(const_int 0) (const_int 1)])) |
82e86dc6 | 6207 | (match_operand:V2SF 2 "nonimmediate_operand")))] |
ffbaf337 | 6208 | "TARGET_SSE" |
f17aa4ad UB |
6209 | { |
6210 | rtx dst = ix86_fixup_binary_operands (UNKNOWN, V4SFmode, operands); | |
6cf9eb27 | 6211 | |
f17aa4ad UB |
6212 | emit_insn (gen_sse_loadhps (dst, operands[1], operands[2])); |
6213 | ||
6214 | /* Fix up the destination if needed. */ | |
6215 | if (dst != operands[0]) | |
6216 | emit_move_insn (operands[0], dst); | |
6217 | ||
6218 | DONE; | |
6219 | }) | |
ffbaf337 | 6220 | |
3a3f9d87 | 6221 | (define_insn "sse_loadhps" |
3729983c | 6222 | [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,x,x,o") |
ef719a44 | 6223 | (vec_concat:V4SF |
d6023b50 | 6224 | (vec_select:V2SF |
3729983c | 6225 | (match_operand:V4SF 1 "nonimmediate_operand" " 0,x,0,x,0") |
d6023b50 | 6226 | (parallel [(const_int 0) (const_int 1)])) |
3729983c | 6227 | (match_operand:V2SF 2 "nonimmediate_operand" " m,m,x,x,x")))] |
2fe4dc01 | 6228 | "TARGET_SSE" |
d6023b50 | 6229 | "@ |
eabb5f48 UB |
6230 | movhps\t{%2, %0|%0, %q2} |
6231 | vmovhps\t{%2, %1, %0|%0, %1, %q2} | |
d6023b50 | 6232 | movlhps\t{%2, %0|%0, %2} |
3729983c UB |
6233 | vmovlhps\t{%2, %1, %0|%0, %1, %2} |
6234 | %vmovlps\t{%2, %H0|%H0, %2}" | |
ba94c7af | 6235 | [(set_attr "isa" "noavx,avx,noavx,avx,*") |
3729983c | 6236 | (set_attr "type" "ssemov") |
f220a4f4 | 6237 | (set_attr "ssememalign" "64") |
3729983c UB |
6238 | (set_attr "prefix" "orig,vex,orig,vex,maybe_vex") |
6239 | (set_attr "mode" "V2SF,V2SF,V4SF,V4SF,V2SF")]) | |
95879c72 | 6240 | |
d6023b50 | 6241 | (define_insn "sse_storelps" |
3729983c | 6242 | [(set (match_operand:V2SF 0 "nonimmediate_operand" "=m,x,x") |
d6023b50 | 6243 | (vec_select:V2SF |
3729983c | 6244 | (match_operand:V4SF 1 "nonimmediate_operand" " x,x,m") |
d6023b50 | 6245 | (parallel [(const_int 0) (const_int 1)])))] |
2fe4dc01 | 6246 | "TARGET_SSE" |
d6023b50 | 6247 | "@ |
eabb5f48 | 6248 | %vmovlps\t{%1, %0|%q0, %1} |
3729983c | 6249 | %vmovaps\t{%1, %0|%0, %1} |
eabb5f48 | 6250 | %vmovlps\t{%1, %d0|%d0, %q1}" |
d6023b50 | 6251 | [(set_attr "type" "ssemov") |
3729983c | 6252 | (set_attr "prefix" "maybe_vex") |
d6023b50 UB |
6253 | (set_attr "mode" "V2SF,V4SF,V2SF")]) |
6254 | ||
3a3f9d87 | 6255 | (define_expand "sse_loadlps_exp" |
82e86dc6 | 6256 | [(set (match_operand:V4SF 0 "nonimmediate_operand") |
ffbaf337 | 6257 | (vec_concat:V4SF |
82e86dc6 | 6258 | (match_operand:V2SF 2 "nonimmediate_operand") |
ffbaf337 | 6259 | (vec_select:V2SF |
82e86dc6 | 6260 | (match_operand:V4SF 1 "nonimmediate_operand") |
ffbaf337 UB |
6261 | (parallel [(const_int 2) (const_int 3)]))))] |
6262 | "TARGET_SSE" | |
f17aa4ad UB |
6263 | { |
6264 | rtx dst = ix86_fixup_binary_operands (UNKNOWN, V4SFmode, operands); | |
6cf9eb27 | 6265 | |
f17aa4ad UB |
6266 | emit_insn (gen_sse_loadlps (dst, operands[1], operands[2])); |
6267 | ||
6268 | /* Fix up the destination if needed. */ | |
6269 | if (dst != operands[0]) | |
6270 | emit_move_insn (operands[0], dst); | |
6271 | ||
6272 | DONE; | |
6273 | }) | |
ffbaf337 | 6274 | |
3a3f9d87 | 6275 | (define_insn "sse_loadlps" |
3729983c | 6276 | [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,x,x,m") |
d6023b50 | 6277 | (vec_concat:V4SF |
85d91748 | 6278 | (match_operand:V2SF 2 "nonimmediate_operand" " 0,x,m,m,x") |
ef719a44 | 6279 | (vec_select:V2SF |
3729983c | 6280 | (match_operand:V4SF 1 "nonimmediate_operand" " x,x,0,x,0") |
d6023b50 | 6281 | (parallel [(const_int 2) (const_int 3)]))))] |
2fe4dc01 | 6282 | "TARGET_SSE" |
d6023b50 UB |
6283 | "@ |
6284 | shufps\t{$0xe4, %1, %0|%0, %1, 0xe4} | |
3729983c | 6285 | vshufps\t{$0xe4, %1, %2, %0|%0, %2, %1, 0xe4} |
eabb5f48 UB |
6286 | movlps\t{%2, %0|%0, %q2} |
6287 | vmovlps\t{%2, %1, %0|%0, %1, %q2} | |
6288 | %vmovlps\t{%2, %0|%q0, %2}" | |
ba94c7af | 6289 | [(set_attr "isa" "noavx,avx,noavx,avx,*") |
eb2f2b44 | 6290 | (set_attr "type" "sseshuf,sseshuf,ssemov,ssemov,ssemov") |
f220a4f4 | 6291 | (set_attr "ssememalign" "64") |
3729983c UB |
6292 | (set_attr "length_immediate" "1,1,*,*,*") |
6293 | (set_attr "prefix" "orig,vex,orig,vex,maybe_vex") | |
6294 | (set_attr "mode" "V4SF,V4SF,V2SF,V2SF,V2SF")]) | |
95879c72 | 6295 | |
d6023b50 | 6296 | (define_insn "sse_movss" |
3729983c | 6297 | [(set (match_operand:V4SF 0 "register_operand" "=x,x") |
d6023b50 | 6298 | (vec_merge:V4SF |
3729983c UB |
6299 | (match_operand:V4SF 2 "register_operand" " x,x") |
6300 | (match_operand:V4SF 1 "register_operand" " 0,x") | |
d6023b50 UB |
6301 | (const_int 1)))] |
6302 | "TARGET_SSE" | |
3729983c UB |
6303 | "@ |
6304 | movss\t{%2, %0|%0, %2} | |
6305 | vmovss\t{%2, %1, %0|%0, %1, %2}" | |
6306 | [(set_attr "isa" "noavx,avx") | |
6307 | (set_attr "type" "ssemov") | |
6308 | (set_attr "prefix" "orig,vex") | |
d6023b50 | 6309 | (set_attr "mode" "SF")]) |
8115817b | 6310 | |
da957891 | 6311 | (define_insn "avx2_vec_dup<mode>" |
a9ccbba2 AI |
6312 | [(set (match_operand:VF1_128_256 0 "register_operand" "=x") |
6313 | (vec_duplicate:VF1_128_256 | |
977e83a3 KY |
6314 | (vec_select:SF |
6315 | (match_operand:V4SF 1 "register_operand" "x") | |
6316 | (parallel [(const_int 0)]))))] | |
6317 | "TARGET_AVX2" | |
6318 | "vbroadcastss\t{%1, %0|%0, %1}" | |
6319 | [(set_attr "type" "sselog1") | |
6320 | (set_attr "prefix" "vex") | |
da957891 | 6321 | (set_attr "mode" "<MODE>")]) |
977e83a3 | 6322 | |
6945a32e JJ |
6323 | (define_insn "avx2_vec_dupv8sf_1" |
6324 | [(set (match_operand:V8SF 0 "register_operand" "=x") | |
6325 | (vec_duplicate:V8SF | |
6326 | (vec_select:SF | |
6327 | (match_operand:V8SF 1 "register_operand" "x") | |
6328 | (parallel [(const_int 0)]))))] | |
6329 | "TARGET_AVX2" | |
6330 | "vbroadcastss\t{%x1, %0|%0, %x1}" | |
6331 | [(set_attr "type" "sselog1") | |
6332 | (set_attr "prefix" "vex") | |
6333 | (set_attr "mode" "V8SF")]) | |
6334 | ||
b92883d6 IT |
6335 | (define_insn "avx512f_vec_dup<mode>_1" |
6336 | [(set (match_operand:VF_512 0 "register_operand" "=v") | |
6337 | (vec_duplicate:VF_512 | |
6338 | (vec_select:<ssescalarmode> | |
6339 | (match_operand:VF_512 1 "register_operand" "v") | |
6340 | (parallel [(const_int 0)]))))] | |
6341 | "TARGET_AVX512F" | |
6342 | "vbroadcast<bcstscalarsuff>\t{%x1, %0|%0, %x1}" | |
6343 | [(set_attr "type" "sselog1") | |
6344 | (set_attr "prefix" "evex") | |
6345 | (set_attr "mode" "<MODE>")]) | |
6346 | ||
6784c6e0 UB |
6347 | ;; Although insertps takes register source, we prefer |
6348 | ;; unpcklps with register source since it is shorter. | |
6349 | (define_insn "*vec_concatv2sf_sse4_1" | |
ee768d85 UB |
6350 | [(set (match_operand:V2SF 0 "register_operand" |
6351 | "=Yr,*x,x,Yr,*x,x,x,*y ,*y") | |
6784c6e0 | 6352 | (vec_concat:V2SF |
ee768d85 UB |
6353 | (match_operand:SF 1 "nonimmediate_operand" |
6354 | " 0, 0,x, 0,0, x,m, 0 , m") | |
6355 | (match_operand:SF 2 "vector_move_operand" | |
6356 | " Yr,*x,x, m,m, m,C,*ym, C")))] | |
6357 | "TARGET_SSE4_1 && !(MEM_P (operands[1]) && MEM_P (operands[2]))" | |
6784c6e0 | 6358 | "@ |
45392c76 | 6359 | unpcklps\t{%2, %0|%0, %2} |
6784c6e0 | 6360 | unpcklps\t{%2, %0|%0, %2} |
3729983c | 6361 | vunpcklps\t{%2, %1, %0|%0, %1, %2} |
6784c6e0 | 6362 | insertps\t{$0x10, %2, %0|%0, %2, 0x10} |
45392c76 | 6363 | insertps\t{$0x10, %2, %0|%0, %2, 0x10} |
3729983c UB |
6364 | vinsertps\t{$0x10, %2, %1, %0|%0, %1, %2, 0x10} |
6365 | %vmovss\t{%1, %0|%0, %1} | |
6784c6e0 UB |
6366 | punpckldq\t{%2, %0|%0, %2} |
6367 | movd\t{%1, %0|%0, %1}" | |
45392c76 IE |
6368 | [(set_attr "isa" "noavx,noavx,avx,noavx,noavx,avx,*,*,*") |
6369 | (set_attr "type" "sselog,sselog,sselog,sselog,sselog,sselog,ssemov,mmxcvt,mmxmov") | |
6370 | (set_attr "prefix_data16" "*,*,*,1,1,*,*,*,*") | |
6371 | (set_attr "prefix_extra" "*,*,*,1,1,1,*,*,*") | |
6372 | (set_attr "length_immediate" "*,*,*,1,1,1,*,*,*") | |
6373 | (set_attr "prefix" "orig,orig,vex,orig,orig,vex,maybe_vex,orig,orig") | |
6374 | (set_attr "mode" "V4SF,V4SF,V4SF,V4SF,V4SF,V4SF,SF,DI,DI")]) | |
6784c6e0 | 6375 | |
d6023b50 UB |
6376 | ;; ??? In theory we can match memory for the MMX alternative, but allowing |
6377 | ;; nonimmediate_operand for operand 2 and *not* allowing memory for the SSE | |
6378 | ;; alternatives pretty much forces the MMX alternative to be chosen. | |
fcc9fe1e | 6379 | (define_insn "*vec_concatv2sf_sse" |
d6023b50 UB |
6380 | [(set (match_operand:V2SF 0 "register_operand" "=x,x,*y,*y") |
6381 | (vec_concat:V2SF | |
6382 | (match_operand:SF 1 "nonimmediate_operand" " 0,m, 0, m") | |
6383 | (match_operand:SF 2 "reg_or_0_operand" " x,C,*y, C")))] | |
6384 | "TARGET_SSE" | |
6385 | "@ | |
6386 | unpcklps\t{%2, %0|%0, %2} | |
6387 | movss\t{%1, %0|%0, %1} | |
6388 | punpckldq\t{%2, %0|%0, %2} | |
6389 | movd\t{%1, %0|%0, %1}" | |
6390 | [(set_attr "type" "sselog,ssemov,mmxcvt,mmxmov") | |
6391 | (set_attr "mode" "V4SF,SF,DI,DI")]) | |
d9987fb4 | 6392 | |
51e7f377 | 6393 | (define_insn "*vec_concatv4sf" |
3729983c | 6394 | [(set (match_operand:V4SF 0 "register_operand" "=x,x,x,x") |
d6023b50 | 6395 | (vec_concat:V4SF |
3729983c UB |
6396 | (match_operand:V2SF 1 "register_operand" " 0,x,0,x") |
6397 | (match_operand:V2SF 2 "nonimmediate_operand" " x,x,m,m")))] | |
d6023b50 UB |
6398 | "TARGET_SSE" |
6399 | "@ | |
6400 | movlhps\t{%2, %0|%0, %2} | |
3729983c | 6401 | vmovlhps\t{%2, %1, %0|%0, %1, %2} |
eabb5f48 UB |
6402 | movhps\t{%2, %0|%0, %q2} |
6403 | vmovhps\t{%2, %1, %0|%0, %1, %q2}" | |
3729983c UB |
6404 | [(set_attr "isa" "noavx,avx,noavx,avx") |
6405 | (set_attr "type" "ssemov") | |
6406 | (set_attr "prefix" "orig,vex,orig,vex") | |
6407 | (set_attr "mode" "V4SF,V4SF,V2SF,V2SF")]) | |
d9987fb4 | 6408 | |
f8caa3a8 | 6409 | (define_expand "vec_init<mode>" |
82e86dc6 UB |
6410 | [(match_operand:V_128 0 "register_operand") |
6411 | (match_operand 1)] | |
d6023b50 | 6412 | "TARGET_SSE" |
d9987fb4 | 6413 | { |
d6023b50 | 6414 | ix86_expand_vector_init (false, operands[0], operands[1]); |
d9987fb4 UB |
6415 | DONE; |
6416 | }) | |
6417 | ||
3729983c UB |
6418 | ;; Avoid combining registers from different units in a single alternative, |
6419 | ;; see comment above inline_secondary_memory_needed function in i386.c | |
aad61732 | 6420 | (define_insn "vec_set<mode>_0" |
6bec6c98 | 6421 | [(set (match_operand:VI4F_128 0 "nonimmediate_operand" |
98321768 | 6422 | "=Yr,*v,v,Yi,x,x,v,Yr ,*x ,x ,m ,m ,m") |
6bec6c98 UB |
6423 | (vec_merge:VI4F_128 |
6424 | (vec_duplicate:VI4F_128 | |
3729983c | 6425 | (match_operand:<ssescalarmode> 2 "general_operand" |
98321768 | 6426 | " Yr,*v,m,r ,m,x,v,*rm,*rm,*rm,!x,!*re,!*fF")) |
6bec6c98 | 6427 | (match_operand:VI4F_128 1 "vector_move_operand" |
45392c76 | 6428 | " C , C,C,C ,C,0,v,0 ,0 ,x ,0 ,0 ,0") |
5e04b3b6 | 6429 | (const_int 1)))] |
aad61732 | 6430 | "TARGET_SSE" |
5e04b3b6 | 6431 | "@ |
45392c76 | 6432 | %vinsertps\t{$0xe, %d2, %0|%0, %d2, 0xe} |
3729983c UB |
6433 | %vinsertps\t{$0xe, %d2, %0|%0, %d2, 0xe} |
6434 | %vmov<ssescalarmodesuffix>\t{%2, %0|%0, %2} | |
6435 | %vmovd\t{%2, %0|%0, %2} | |
5e04b3b6 | 6436 | movss\t{%2, %0|%0, %2} |
aad61732 | 6437 | movss\t{%2, %0|%0, %2} |
3729983c | 6438 | vmovss\t{%2, %1, %0|%0, %1, %2} |
5e04b3b6 | 6439 | pinsrd\t{$0, %2, %0|%0, %2, 0} |
45392c76 | 6440 | pinsrd\t{$0, %2, %0|%0, %2, 0} |
3729983c UB |
6441 | vpinsrd\t{$0, %2, %1, %0|%0, %1, %2, 0} |
6442 | # | |
6443 | # | |
5e04b3b6 | 6444 | #" |
45392c76 | 6445 | [(set_attr "isa" "sse4,sse4,sse2,sse2,noavx,noavx,avx,sse4_noavx,sse4_noavx,avx,*,*,*") |
ba94c7af | 6446 | (set (attr "type") |
45392c76 | 6447 | (cond [(eq_attr "alternative" "0,1,7,8,9") |
ba94c7af | 6448 | (const_string "sselog") |
45392c76 | 6449 | (eq_attr "alternative" "11") |
ba94c7af | 6450 | (const_string "imov") |
45392c76 | 6451 | (eq_attr "alternative" "12") |
29ebe616 | 6452 | (const_string "fmov") |
ba94c7af UB |
6453 | ] |
6454 | (const_string "ssemov"))) | |
45392c76 IE |
6455 | (set_attr "prefix_extra" "*,*,*,*,*,*,*,1,1,1,*,*,*") |
6456 | (set_attr "length_immediate" "*,*,*,*,*,*,*,1,1,1,*,*,*") | |
6457 | (set_attr "prefix" "maybe_vex,maybe_vex,maybe_vex,maybe_vex,orig,orig,vex,orig,orig,vex,*,*,*") | |
6458 | (set_attr "mode" "SF,SF,<ssescalarmode>,SI,SF,SF,SF,TI,TI,TI,*,*,*")]) | |
d6023b50 UB |
6459 | |
6460 | ;; A subset is vec_setv4sf. | |
6461 | (define_insn "*vec_setv4sf_sse4_1" | |
45392c76 | 6462 | [(set (match_operand:V4SF 0 "register_operand" "=Yr,*x,x") |
d6023b50 UB |
6463 | (vec_merge:V4SF |
6464 | (vec_duplicate:V4SF | |
45392c76 IE |
6465 | (match_operand:SF 2 "nonimmediate_operand" "Yrm,*xm,xm")) |
6466 | (match_operand:V4SF 1 "register_operand" "0,0,x") | |
82e86dc6 | 6467 | (match_operand:SI 3 "const_int_operand")))] |
51e7f377 UB |
6468 | "TARGET_SSE4_1 |
6469 | && ((unsigned) exact_log2 (INTVAL (operands[3])) | |
6470 | < GET_MODE_NUNITS (V4SFmode))" | |
d9987fb4 | 6471 | { |
d6023b50 | 6472 | operands[3] = GEN_INT (exact_log2 (INTVAL (operands[3])) << 4); |
3729983c UB |
6473 | switch (which_alternative) |
6474 | { | |
6475 | case 0: | |
3729983c | 6476 | case 1: |
45392c76 IE |
6477 | return "insertps\t{%3, %2, %0|%0, %2, %3}"; |
6478 | case 2: | |
3729983c UB |
6479 | return "vinsertps\t{%3, %2, %1, %0|%0, %1, %2, %3}"; |
6480 | default: | |
6481 | gcc_unreachable (); | |
6482 | } | |
d6023b50 | 6483 | } |
45392c76 | 6484 | [(set_attr "isa" "noavx,noavx,avx") |
3729983c | 6485 | (set_attr "type" "sselog") |
45392c76 | 6486 | (set_attr "prefix_data16" "1,1,*") |
725fd454 JJ |
6487 | (set_attr "prefix_extra" "1") |
6488 | (set_attr "length_immediate" "1") | |
45392c76 | 6489 | (set_attr "prefix" "orig,orig,vex") |
95879c72 L |
6490 | (set_attr "mode" "V4SF")]) |
6491 | ||
d6023b50 | 6492 | (define_insn "sse4_1_insertps" |
45392c76 IE |
6493 | [(set (match_operand:V4SF 0 "register_operand" "=Yr,*x,x") |
6494 | (unspec:V4SF [(match_operand:V4SF 2 "nonimmediate_operand" "Yrm,*xm,xm") | |
6495 | (match_operand:V4SF 1 "register_operand" "0,0,x") | |
6496 | (match_operand:SI 3 "const_0_to_255_operand" "n,n,n")] | |
d6023b50 UB |
6497 | UNSPEC_INSERTPS))] |
6498 | "TARGET_SSE4_1" | |
09db7afe JJ |
6499 | { |
6500 | if (MEM_P (operands[2])) | |
6501 | { | |
6502 | unsigned count_s = INTVAL (operands[3]) >> 6; | |
6503 | if (count_s) | |
6504 | operands[3] = GEN_INT (INTVAL (operands[3]) & 0x3f); | |
6505 | operands[2] = adjust_address_nv (operands[2], SFmode, count_s * 4); | |
6506 | } | |
6507 | switch (which_alternative) | |
6508 | { | |
6509 | case 0: | |
09db7afe | 6510 | case 1: |
45392c76 IE |
6511 | return "insertps\t{%3, %2, %0|%0, %2, %3}"; |
6512 | case 2: | |
09db7afe JJ |
6513 | return "vinsertps\t{%3, %2, %1, %0|%0, %1, %2, %3}"; |
6514 | default: | |
6515 | gcc_unreachable (); | |
6516 | } | |
6517 | } | |
45392c76 | 6518 | [(set_attr "isa" "noavx,noavx,avx") |
3729983c | 6519 | (set_attr "type" "sselog") |
45392c76 | 6520 | (set_attr "prefix_data16" "1,1,*") |
d6023b50 | 6521 | (set_attr "prefix_extra" "1") |
725fd454 | 6522 | (set_attr "length_immediate" "1") |
45392c76 | 6523 | (set_attr "prefix" "orig,orig,vex") |
d6023b50 | 6524 | (set_attr "mode" "V4SF")]) |
d9987fb4 | 6525 | |
d6023b50 | 6526 | (define_split |
82e86dc6 | 6527 | [(set (match_operand:VI4F_128 0 "memory_operand") |
6bec6c98 UB |
6528 | (vec_merge:VI4F_128 |
6529 | (vec_duplicate:VI4F_128 | |
82e86dc6 | 6530 | (match_operand:<ssescalarmode> 1 "nonmemory_operand")) |
d6023b50 UB |
6531 | (match_dup 0) |
6532 | (const_int 1)))] | |
6533 | "TARGET_SSE && reload_completed" | |
0b013847 UB |
6534 | [(set (match_dup 0) (match_dup 1))] |
6535 | "operands[0] = adjust_address (operands[0], <ssescalarmode>mode, 0);") | |
d9987fb4 | 6536 | |
349587b8 | 6537 | (define_expand "vec_set<mode>" |
82e86dc6 UB |
6538 | [(match_operand:V 0 "register_operand") |
6539 | (match_operand:<ssescalarmode> 1 "register_operand") | |
6540 | (match_operand 2 "const_int_operand")] | |
d6023b50 | 6541 | "TARGET_SSE" |
d9987fb4 | 6542 | { |
d6023b50 UB |
6543 | ix86_expand_vector_set (false, operands[0], operands[1], |
6544 | INTVAL (operands[2])); | |
6545 | DONE; | |
d9987fb4 UB |
6546 | }) |
6547 | ||
d6023b50 | 6548 | (define_insn_and_split "*vec_extractv4sf_0" |
a3d4a22b | 6549 | [(set (match_operand:SF 0 "nonimmediate_operand" "=x,m,f,r") |
d6023b50 | 6550 | (vec_select:SF |
a3d4a22b | 6551 | (match_operand:V4SF 1 "nonimmediate_operand" "xm,x,m,m") |
d6023b50 UB |
6552 | (parallel [(const_int 0)])))] |
6553 | "TARGET_SSE && !(MEM_P (operands[0]) && MEM_P (operands[1]))" | |
6554 | "#" | |
6555 | "&& reload_completed" | |
36c4015b | 6556 | [(set (match_dup 0) (match_dup 1))] |
8115817b | 6557 | { |
36c4015b UB |
6558 | if (REG_P (operands[1])) |
6559 | operands[1] = gen_rtx_REG (SFmode, REGNO (operands[1])); | |
d6023b50 | 6560 | else |
36c4015b | 6561 | operands[1] = adjust_address (operands[1], SFmode, 0); |
8115817b UB |
6562 | }) |
6563 | ||
424c8389 | 6564 | (define_insn_and_split "*sse4_1_extractps" |
45392c76 | 6565 | [(set (match_operand:SF 0 "nonimmediate_operand" "=rm,rm,x,x") |
424c8389 | 6566 | (vec_select:SF |
45392c76 IE |
6567 | (match_operand:V4SF 1 "register_operand" "Yr,*x,0,x") |
6568 | (parallel [(match_operand:SI 2 "const_0_to_3_operand" "n,n,n,n")])))] | |
424c8389 UB |
6569 | "TARGET_SSE4_1" |
6570 | "@ | |
45392c76 | 6571 | %vextractps\t{%2, %1, %0|%0, %1, %2} |
424c8389 UB |
6572 | %vextractps\t{%2, %1, %0|%0, %1, %2} |
6573 | # | |
6574 | #" | |
6575 | "&& reload_completed && SSE_REG_P (operands[0])" | |
6576 | [(const_int 0)] | |
6577 | { | |
6578 | rtx dest = gen_rtx_REG (V4SFmode, REGNO (operands[0])); | |
6579 | switch (INTVAL (operands[2])) | |
6580 | { | |
6581 | case 1: | |
6582 | case 3: | |
6583 | emit_insn (gen_sse_shufps_v4sf (dest, operands[1], operands[1], | |
6584 | operands[2], operands[2], | |
6585 | GEN_INT (INTVAL (operands[2]) + 4), | |
6586 | GEN_INT (INTVAL (operands[2]) + 4))); | |
6587 | break; | |
6588 | case 2: | |
6589 | emit_insn (gen_vec_interleave_highv4sf (dest, operands[1], operands[1])); | |
6590 | break; | |
6591 | default: | |
6592 | /* 0 should be handled by the *vec_extractv4sf_0 pattern above. */ | |
6593 | gcc_unreachable (); | |
6594 | } | |
6595 | DONE; | |
6596 | } | |
45392c76 IE |
6597 | [(set_attr "isa" "*,*,noavx,avx") |
6598 | (set_attr "type" "sselog,sselog,*,*") | |
6599 | (set_attr "prefix_data16" "1,1,*,*") | |
6600 | (set_attr "prefix_extra" "1,1,*,*") | |
6601 | (set_attr "length_immediate" "1,1,*,*") | |
6602 | (set_attr "prefix" "maybe_vex,maybe_vex,*,*") | |
6603 | (set_attr "mode" "V4SF,V4SF,*,*")]) | |
424c8389 | 6604 | |
3095685e | 6605 | (define_insn_and_split "*vec_extractv4sf_mem" |
424c8389 | 6606 | [(set (match_operand:SF 0 "register_operand" "=x,*r,f") |
3095685e UB |
6607 | (vec_select:SF |
6608 | (match_operand:V4SF 1 "memory_operand" "o,o,o") | |
6609 | (parallel [(match_operand 2 "const_0_to_3_operand" "n,n,n")])))] | |
424c8389 UB |
6610 | "TARGET_SSE" |
6611 | "#" | |
6612 | "&& reload_completed" | |
0b013847 | 6613 | [(set (match_dup 0) (match_dup 1))] |
424c8389 | 6614 | { |
0b013847 | 6615 | operands[1] = adjust_address (operands[1], SFmode, INTVAL (operands[2]) * 4); |
424c8389 UB |
6616 | }) |
6617 | ||
0774c160 AI |
6618 | (define_mode_attr extract_type |
6619 | [(V16SF "avx512f") (V16SI "avx512f") (V8DF "avx512dq") (V8DI "avx512dq")]) | |
6620 | ||
6621 | (define_mode_attr extract_suf | |
6622 | [(V16SF "32x4") (V16SI "32x4") (V8DF "64x2") (V8DI "64x2")]) | |
6623 | ||
6624 | (define_mode_iterator AVX512_VEC | |
6625 | [(V8DF "TARGET_AVX512DQ") (V8DI "TARGET_AVX512DQ") V16SF V16SI]) | |
6626 | ||
6627 | (define_expand "<extract_type>_vextract<shuffletype><extract_suf>_mask" | |
47490470 | 6628 | [(match_operand:<ssequartermode> 0 "nonimmediate_operand") |
0774c160 | 6629 | (match_operand:AVX512_VEC 1 "register_operand") |
47490470 AI |
6630 | (match_operand:SI 2 "const_0_to_3_operand") |
6631 | (match_operand:<ssequartermode> 3 "nonimmediate_operand") | |
6632 | (match_operand:QI 4 "register_operand")] | |
6633 | "TARGET_AVX512F" | |
6634 | { | |
0774c160 AI |
6635 | int mask; |
6636 | mask = INTVAL (operands[2]); | |
6637 | ||
47490470 AI |
6638 | if (MEM_P (operands[0]) && GET_CODE (operands[3]) == CONST_VECTOR) |
6639 | operands[0] = force_reg (<ssequartermode>mode, operands[0]); | |
0774c160 AI |
6640 | |
6641 | if (<MODE>mode == V16SImode || <MODE>mode == V16SFmode) | |
6642 | emit_insn (gen_avx512f_vextract<shuffletype>32x4_1_mask (operands[0], | |
6643 | operands[1], GEN_INT (mask * 4), GEN_INT (mask * 4 + 1), | |
6644 | GEN_INT (mask * 4 + 2), GEN_INT (mask * 4 + 3), operands[3], | |
6645 | operands[4])); | |
6646 | else | |
6647 | emit_insn (gen_avx512dq_vextract<shuffletype>64x2_1_mask (operands[0], | |
6648 | operands[1], GEN_INT (mask * 2), GEN_INT (mask * 2 + 1), operands[3], | |
6649 | operands[4])); | |
47490470 AI |
6650 | DONE; |
6651 | }) | |
6652 | ||
0774c160 AI |
6653 | (define_insn "avx512dq_vextract<shuffletype>64x2_1_maskm" |
6654 | [(set (match_operand:<ssequartermode> 0 "memory_operand" "=m") | |
6655 | (vec_merge:<ssequartermode> | |
6656 | (vec_select:<ssequartermode> | |
6657 | (match_operand:V8FI 1 "register_operand" "v") | |
6658 | (parallel [(match_operand 2 "const_0_to_7_operand") | |
6659 | (match_operand 3 "const_0_to_7_operand")])) | |
6660 | (match_operand:<ssequartermode> 4 "memory_operand" "0") | |
6661 | (match_operand:QI 5 "register_operand" "k")))] | |
6662 | "TARGET_AVX512DQ | |
6663 | && (INTVAL (operands[2]) % 2 == 0) | |
bf3b2de7 UB |
6664 | && (INTVAL (operands[2]) == INTVAL (operands[3]) - 1) |
6665 | && rtx_equal_p (operands[4], operands[0])" | |
0774c160 AI |
6666 | { |
6667 | operands[2] = GEN_INT ((INTVAL (operands[2])) >> 1); | |
6668 | return "vextract<shuffletype>64x2\t{%2, %1, %0%{%5%}|%0%{%5%}, %1, %2}"; | |
6669 | } | |
6670 | [(set_attr "type" "sselog") | |
6671 | (set_attr "prefix_extra" "1") | |
6672 | (set_attr "length_immediate" "1") | |
6673 | (set_attr "memory" "store") | |
6674 | (set_attr "prefix" "evex") | |
6675 | (set_attr "mode" "<sseinsnmode>")]) | |
6676 | ||
47490470 AI |
6677 | (define_insn "avx512f_vextract<shuffletype>32x4_1_maskm" |
6678 | [(set (match_operand:<ssequartermode> 0 "memory_operand" "=m") | |
6679 | (vec_merge:<ssequartermode> | |
6680 | (vec_select:<ssequartermode> | |
6681 | (match_operand:V16FI 1 "register_operand" "v") | |
6682 | (parallel [(match_operand 2 "const_0_to_15_operand") | |
6683 | (match_operand 3 "const_0_to_15_operand") | |
6684 | (match_operand 4 "const_0_to_15_operand") | |
6685 | (match_operand 5 "const_0_to_15_operand")])) | |
6686 | (match_operand:<ssequartermode> 6 "memory_operand" "0") | |
be792bce | 6687 | (match_operand:QI 7 "register_operand" "Yk")))] |
622cd23a | 6688 | "TARGET_AVX512F |
0774c160 AI |
6689 | && ((INTVAL (operands[2]) % 4 == 0) |
6690 | && INTVAL (operands[2]) == (INTVAL (operands[3]) - 1) | |
622cd23a | 6691 | && INTVAL (operands[3]) == (INTVAL (operands[4]) - 1) |
bf3b2de7 UB |
6692 | && INTVAL (operands[4]) == (INTVAL (operands[5]) - 1)) |
6693 | && rtx_equal_p (operands[6], operands[0])" | |
47490470 AI |
6694 | { |
6695 | operands[2] = GEN_INT ((INTVAL (operands[2])) >> 2); | |
6696 | return "vextract<shuffletype>32x4\t{%2, %1, %0%{%7%}|%0%{%7%}, %1, %2}"; | |
6697 | } | |
6698 | [(set_attr "type" "sselog") | |
6699 | (set_attr "prefix_extra" "1") | |
6700 | (set_attr "length_immediate" "1") | |
6701 | (set_attr "memory" "store") | |
6702 | (set_attr "prefix" "evex") | |
6703 | (set_attr "mode" "<sseinsnmode>")]) | |
6704 | ||
0774c160 AI |
6705 | (define_insn "<mask_codefor>avx512dq_vextract<shuffletype>64x2_1<mask_name>" |
6706 | [(set (match_operand:<ssequartermode> 0 "<store_mask_predicate>" "=<store_mask_constraint>") | |
6707 | (vec_select:<ssequartermode> | |
6708 | (match_operand:V8FI 1 "register_operand" "v") | |
6709 | (parallel [(match_operand 2 "const_0_to_7_operand") | |
6710 | (match_operand 3 "const_0_to_7_operand")])))] | |
6711 | "TARGET_AVX512DQ && (INTVAL (operands[2]) == INTVAL (operands[3]) - 1)" | |
6712 | { | |
6713 | operands[2] = GEN_INT ((INTVAL (operands[2])) >> 1); | |
6714 | return "vextract<shuffletype>64x2\t{%2, %1, %0<mask_operand4>|%0<mask_operand4>, %1, %2}"; | |
6715 | } | |
6716 | [(set_attr "type" "sselog1") | |
6717 | (set_attr "prefix_extra" "1") | |
6718 | (set_attr "length_immediate" "1") | |
6719 | (set_attr "prefix" "evex") | |
6720 | (set_attr "mode" "<sseinsnmode>")]) | |
6721 | ||
47490470 AI |
6722 | (define_insn "<mask_codefor>avx512f_vextract<shuffletype>32x4_1<mask_name>" |
6723 | [(set (match_operand:<ssequartermode> 0 "<store_mask_predicate>" "=<store_mask_constraint>") | |
2e2206fa AI |
6724 | (vec_select:<ssequartermode> |
6725 | (match_operand:V16FI 1 "register_operand" "v") | |
6726 | (parallel [(match_operand 2 "const_0_to_15_operand") | |
6727 | (match_operand 3 "const_0_to_15_operand") | |
6728 | (match_operand 4 "const_0_to_15_operand") | |
6729 | (match_operand 5 "const_0_to_15_operand")])))] | |
622cd23a UB |
6730 | "TARGET_AVX512F |
6731 | && (INTVAL (operands[2]) == (INTVAL (operands[3]) - 1) | |
6732 | && INTVAL (operands[3]) == (INTVAL (operands[4]) - 1) | |
6733 | && INTVAL (operands[4]) == (INTVAL (operands[5]) - 1))" | |
2e2206fa AI |
6734 | { |
6735 | operands[2] = GEN_INT ((INTVAL (operands[2])) >> 2); | |
47490470 | 6736 | return "vextract<shuffletype>32x4\t{%2, %1, %0<mask_operand6>|%0<mask_operand6>, %1, %2}"; |
2e2206fa | 6737 | } |
0774c160 | 6738 | [(set_attr "type" "sselog1") |
2e2206fa AI |
6739 | (set_attr "prefix_extra" "1") |
6740 | (set_attr "length_immediate" "1") | |
2e2206fa AI |
6741 | (set_attr "prefix" "evex") |
6742 | (set_attr "mode" "<sseinsnmode>")]) | |
6743 | ||
0774c160 AI |
6744 | (define_mode_attr extract_type_2 |
6745 | [(V16SF "avx512dq") (V16SI "avx512dq") (V8DF "avx512f") (V8DI "avx512f")]) | |
6746 | ||
6747 | (define_mode_attr extract_suf_2 | |
6748 | [(V16SF "32x8") (V16SI "32x8") (V8DF "64x4") (V8DI "64x4")]) | |
6749 | ||
6750 | (define_mode_iterator AVX512_VEC_2 | |
6751 | [(V16SF "TARGET_AVX512DQ") (V16SI "TARGET_AVX512DQ") V8DF V8DI]) | |
6752 | ||
6753 | (define_expand "<extract_type_2>_vextract<shuffletype><extract_suf_2>_mask" | |
47490470 | 6754 | [(match_operand:<ssehalfvecmode> 0 "nonimmediate_operand") |
0774c160 | 6755 | (match_operand:AVX512_VEC_2 1 "register_operand") |
47490470 AI |
6756 | (match_operand:SI 2 "const_0_to_1_operand") |
6757 | (match_operand:<ssehalfvecmode> 3 "nonimmediate_operand") | |
6758 | (match_operand:QI 4 "register_operand")] | |
6759 | "TARGET_AVX512F" | |
6760 | { | |
6761 | rtx (*insn)(rtx, rtx, rtx, rtx); | |
6762 | ||
6763 | if (MEM_P (operands[0]) && GET_CODE (operands[3]) == CONST_VECTOR) | |
6764 | operands[0] = force_reg (<ssequartermode>mode, operands[0]); | |
6765 | ||
6766 | switch (INTVAL (operands[2])) | |
6767 | { | |
6768 | case 0: | |
6769 | insn = gen_vec_extract_lo_<mode>_mask; | |
6770 | break; | |
6771 | case 1: | |
6772 | insn = gen_vec_extract_hi_<mode>_mask; | |
6773 | break; | |
6774 | default: | |
6775 | gcc_unreachable (); | |
6776 | } | |
6777 | ||
6778 | emit_insn (insn (operands[0], operands[1], operands[3], operands[4])); | |
6779 | DONE; | |
6780 | }) | |
6781 | ||
2e2206fa AI |
6782 | (define_split |
6783 | [(set (match_operand:<ssehalfvecmode> 0 "nonimmediate_operand") | |
6784 | (vec_select:<ssehalfvecmode> | |
6785 | (match_operand:V8FI 1 "nonimmediate_operand") | |
6786 | (parallel [(const_int 0) (const_int 1) | |
6787 | (const_int 2) (const_int 3)])))] | |
6788 | "TARGET_AVX512F && !(MEM_P (operands[0]) && MEM_P (operands[1])) | |
6789 | && reload_completed" | |
6790 | [(const_int 0)] | |
6791 | { | |
6792 | rtx op1 = operands[1]; | |
6793 | if (REG_P (op1)) | |
6794 | op1 = gen_rtx_REG (<ssehalfvecmode>mode, REGNO (op1)); | |
6795 | else | |
6796 | op1 = gen_lowpart (<ssehalfvecmode>mode, op1); | |
6797 | emit_move_insn (operands[0], op1); | |
6798 | DONE; | |
6799 | }) | |
6800 | ||
47490470 AI |
6801 | (define_insn "vec_extract_lo_<mode>_maskm" |
6802 | [(set (match_operand:<ssehalfvecmode> 0 "memory_operand" "=m") | |
6803 | (vec_merge:<ssehalfvecmode> | |
6804 | (vec_select:<ssehalfvecmode> | |
6805 | (match_operand:V8FI 1 "register_operand" "v") | |
6806 | (parallel [(const_int 0) (const_int 1) | |
6807 | (const_int 2) (const_int 3)])) | |
6808 | (match_operand:<ssehalfvecmode> 2 "memory_operand" "0") | |
be792bce | 6809 | (match_operand:QI 3 "register_operand" "Yk")))] |
bf3b2de7 UB |
6810 | "TARGET_AVX512F |
6811 | && rtx_equal_p (operands[2], operands[0])" | |
0774c160 AI |
6812 | "vextract<shuffletype>64x4\t{$0x0, %1, %0%{%3%}|%0%{%3%}, %1, 0x0}" |
6813 | [(set_attr "type" "sselog1") | |
47490470 AI |
6814 | (set_attr "prefix_extra" "1") |
6815 | (set_attr "length_immediate" "1") | |
6816 | (set_attr "prefix" "evex") | |
6817 | (set_attr "mode" "<sseinsnmode>")]) | |
6818 | ||
6819 | (define_insn "vec_extract_lo_<mode><mask_name>" | |
9d04ba29 | 6820 | [(set (match_operand:<ssehalfvecmode> 0 "<store_mask_predicate>" "=<store_mask_constraint>,v") |
2e2206fa | 6821 | (vec_select:<ssehalfvecmode> |
9d04ba29 | 6822 | (match_operand:V8FI 1 "nonimmediate_operand" "v,m") |
2e2206fa AI |
6823 | (parallel [(const_int 0) (const_int 1) |
6824 | (const_int 2) (const_int 3)])))] | |
6825 | "TARGET_AVX512F && !(MEM_P (operands[0]) && MEM_P (operands[1]))" | |
47490470 AI |
6826 | { |
6827 | if (<mask_applied>) | |
6828 | return "vextract<shuffletype>64x4\t{$0x0, %1, %0<mask_operand2>|%0<mask_operand2>, %1, 0x0}"; | |
6829 | else | |
6830 | return "#"; | |
6831 | } | |
0774c160 | 6832 | [(set_attr "type" "sselog1") |
2e2206fa AI |
6833 | (set_attr "prefix_extra" "1") |
6834 | (set_attr "length_immediate" "1") | |
2e2206fa AI |
6835 | (set_attr "prefix" "evex") |
6836 | (set_attr "mode" "<sseinsnmode>")]) | |
6837 | ||
47490470 AI |
6838 | (define_insn "vec_extract_hi_<mode>_maskm" |
6839 | [(set (match_operand:<ssehalfvecmode> 0 "memory_operand" "=m") | |
6840 | (vec_merge:<ssehalfvecmode> | |
6841 | (vec_select:<ssehalfvecmode> | |
6842 | (match_operand:V8FI 1 "register_operand" "v") | |
6843 | (parallel [(const_int 4) (const_int 5) | |
6844 | (const_int 6) (const_int 7)])) | |
6845 | (match_operand:<ssehalfvecmode> 2 "memory_operand" "0") | |
be792bce | 6846 | (match_operand:QI 3 "register_operand" "Yk")))] |
bf3b2de7 UB |
6847 | "TARGET_AVX512F |
6848 | && rtx_equal_p (operands[2], operands[0])" | |
47490470 AI |
6849 | "vextract<shuffletype>64x4\t{$0x1, %1, %0%{%3%}|%0%{%3%}, %1, 0x1}" |
6850 | [(set_attr "type" "sselog") | |
6851 | (set_attr "prefix_extra" "1") | |
6852 | (set_attr "length_immediate" "1") | |
6853 | (set_attr "memory" "store") | |
6854 | (set_attr "prefix" "evex") | |
6855 | (set_attr "mode" "<sseinsnmode>")]) | |
6856 | ||
6857 | (define_insn "vec_extract_hi_<mode><mask_name>" | |
6858 | [(set (match_operand:<ssehalfvecmode> 0 "<store_mask_predicate>" "=<store_mask_constraint>") | |
2e2206fa AI |
6859 | (vec_select:<ssehalfvecmode> |
6860 | (match_operand:V8FI 1 "register_operand" "v") | |
6861 | (parallel [(const_int 4) (const_int 5) | |
6862 | (const_int 6) (const_int 7)])))] | |
6863 | "TARGET_AVX512F" | |
47490470 | 6864 | "vextract<shuffletype>64x4\t{$0x1, %1, %0<mask_operand2>|%0<mask_operand2>, %1, 0x1}" |
0774c160 AI |
6865 | [(set_attr "type" "sselog1") |
6866 | (set_attr "prefix_extra" "1") | |
6867 | (set_attr "length_immediate" "1") | |
6868 | (set_attr "prefix" "evex") | |
6869 | (set_attr "mode" "<sseinsnmode>")]) | |
6870 | ||
6871 | (define_insn "vec_extract_hi_<mode>_maskm" | |
6872 | [(set (match_operand:<ssehalfvecmode> 0 "memory_operand" "=m") | |
6873 | (vec_merge:<ssehalfvecmode> | |
6874 | (vec_select:<ssehalfvecmode> | |
6875 | (match_operand:V16FI 1 "register_operand" "v") | |
6876 | (parallel [(const_int 8) (const_int 9) | |
6877 | (const_int 10) (const_int 11) | |
6878 | (const_int 12) (const_int 13) | |
6879 | (const_int 14) (const_int 15)])) | |
6880 | (match_operand:<ssehalfvecmode> 2 "memory_operand" "0") | |
6881 | (match_operand:QI 3 "register_operand" "k")))] | |
bf3b2de7 UB |
6882 | "TARGET_AVX512DQ |
6883 | && rtx_equal_p (operands[2], operands[0])" | |
0774c160 AI |
6884 | "vextract<shuffletype>32x8\t{$0x1, %1, %0%{%3%}|%0%{%3%}, %1, 0x1}" |
6885 | [(set_attr "type" "sselog1") | |
2e2206fa AI |
6886 | (set_attr "prefix_extra" "1") |
6887 | (set_attr "length_immediate" "1") | |
2e2206fa AI |
6888 | (set_attr "prefix" "evex") |
6889 | (set_attr "mode" "<sseinsnmode>")]) | |
6890 | ||
0148f0b6 AI |
6891 | (define_insn "vec_extract_hi_<mode><mask_name>" |
6892 | [(set (match_operand:<ssehalfvecmode> 0 "<store_mask_predicate>" "=<store_mask_constraint>,vm") | |
6893 | (vec_select:<ssehalfvecmode> | |
6894 | (match_operand:V16FI 1 "register_operand" "v,v") | |
6895 | (parallel [(const_int 8) (const_int 9) | |
6896 | (const_int 10) (const_int 11) | |
6897 | (const_int 12) (const_int 13) | |
6898 | (const_int 14) (const_int 15)])))] | |
0774c160 | 6899 | "TARGET_AVX512F && <mask_avx512dq_condition>" |
0148f0b6 AI |
6900 | "@ |
6901 | vextract<shuffletype>32x8\t{$0x1, %1, %0<mask_operand2>|%0<mask_operand2>, %1, 0x1} | |
6902 | vextracti64x4\t{$0x1, %1, %0|%0, %1, 0x1}" | |
6903 | [(set_attr "type" "sselog1") | |
6904 | (set_attr "prefix_extra" "1") | |
6905 | (set_attr "isa" "avx512dq,noavx512dq") | |
6906 | (set_attr "length_immediate" "1") | |
6907 | (set_attr "prefix" "evex") | |
6908 | (set_attr "mode" "<sseinsnmode>")]) | |
6909 | ||
0774c160 AI |
6910 | (define_expand "avx512vl_vextractf128<mode>" |
6911 | [(match_operand:<ssehalfvecmode> 0 "nonimmediate_operand") | |
6912 | (match_operand:VI48F_256 1 "register_operand") | |
6913 | (match_operand:SI 2 "const_0_to_1_operand") | |
6914 | (match_operand:<ssehalfvecmode> 3 "vector_move_operand") | |
6915 | (match_operand:QI 4 "register_operand")] | |
6916 | "TARGET_AVX512DQ && TARGET_AVX512VL" | |
6917 | { | |
6918 | rtx (*insn)(rtx, rtx, rtx, rtx); | |
6919 | ||
6920 | if (MEM_P (operands[0]) && GET_CODE (operands[3]) == CONST_VECTOR) | |
6921 | operands[0] = force_reg (<ssehalfvecmode>mode, operands[0]); | |
6922 | ||
6923 | switch (INTVAL (operands[2])) | |
6924 | { | |
6925 | case 0: | |
6926 | insn = gen_vec_extract_lo_<mode>_mask; | |
6927 | break; | |
6928 | case 1: | |
6929 | insn = gen_vec_extract_hi_<mode>_mask; | |
6930 | break; | |
6931 | default: | |
6932 | gcc_unreachable (); | |
6933 | } | |
6934 | ||
6935 | emit_insn (insn (operands[0], operands[1], operands[3], operands[4])); | |
6936 | DONE; | |
6937 | }) | |
6938 | ||
95879c72 | 6939 | (define_expand "avx_vextractf128<mode>" |
82e86dc6 UB |
6940 | [(match_operand:<ssehalfvecmode> 0 "nonimmediate_operand") |
6941 | (match_operand:V_256 1 "register_operand") | |
6942 | (match_operand:SI 2 "const_0_to_1_operand")] | |
95879c72 L |
6943 | "TARGET_AVX" |
6944 | { | |
16cc4440 UB |
6945 | rtx (*insn)(rtx, rtx); |
6946 | ||
95879c72 L |
6947 | switch (INTVAL (operands[2])) |
6948 | { | |
6949 | case 0: | |
16cc4440 | 6950 | insn = gen_vec_extract_lo_<mode>; |
95879c72 L |
6951 | break; |
6952 | case 1: | |
16cc4440 | 6953 | insn = gen_vec_extract_hi_<mode>; |
95879c72 L |
6954 | break; |
6955 | default: | |
6956 | gcc_unreachable (); | |
6957 | } | |
16cc4440 UB |
6958 | |
6959 | emit_insn (insn (operands[0], operands[1])); | |
95879c72 L |
6960 | DONE; |
6961 | }) | |
6962 | ||
0774c160 | 6963 | (define_insn "vec_extract_lo_<mode><mask_name>" |
2e2206fa AI |
6964 | [(set (match_operand:<ssehalfvecmode> 0 "nonimmediate_operand" "=v,m") |
6965 | (vec_select:<ssehalfvecmode> | |
6966 | (match_operand:V16FI 1 "nonimmediate_operand" "vm,v") | |
6967 | (parallel [(const_int 0) (const_int 1) | |
6968 | (const_int 2) (const_int 3) | |
6969 | (const_int 4) (const_int 5) | |
6970 | (const_int 6) (const_int 7)])))] | |
0774c160 AI |
6971 | "TARGET_AVX512F |
6972 | && <mask_mode512bit_condition> | |
6973 | && !(MEM_P (operands[0]) && MEM_P (operands[1]))" | |
2e2206fa | 6974 | { |
0774c160 AI |
6975 | if (<mask_applied>) |
6976 | return "vextract<shuffletype>32x8\t{$0x0, %1, %0<mask_operand2>|%0<mask_operand2>, %1, 0x0}"; | |
6977 | else | |
6978 | return "#"; | |
6979 | }) | |
6980 | ||
6981 | (define_split | |
6982 | [(set (match_operand:<ssehalfvecmode> 0 "nonimmediate_operand") | |
6983 | (vec_select:<ssehalfvecmode> | |
6984 | (match_operand:V16FI 1 "nonimmediate_operand") | |
6985 | (parallel [(const_int 0) (const_int 1) | |
6986 | (const_int 2) (const_int 3) | |
6987 | (const_int 4) (const_int 5) | |
6988 | (const_int 6) (const_int 7)])))] | |
6989 | "TARGET_AVX512F && !(MEM_P (operands[0]) && MEM_P (operands[1])) | |
6990 | && reload_completed" | |
6991 | [(const_int 0)] | |
6992 | { | |
2e2206fa AI |
6993 | rtx op1 = operands[1]; |
6994 | if (REG_P (op1)) | |
6995 | op1 = gen_rtx_REG (<ssehalfvecmode>mode, REGNO (op1)); | |
6996 | else | |
6997 | op1 = gen_lowpart (<ssehalfvecmode>mode, op1); | |
6998 | emit_move_insn (operands[0], op1); | |
6999 | DONE; | |
7000 | }) | |
7001 | ||
0774c160 AI |
7002 | (define_insn "vec_extract_lo_<mode><mask_name>" |
7003 | [(set (match_operand:<ssehalfvecmode> 0 "<store_mask_predicate>" "=v,m") | |
cbb734aa | 7004 | (vec_select:<ssehalfvecmode> |
0774c160 | 7005 | (match_operand:VI8F_256 1 "nonimmediate_operand" "vm,v") |
95879c72 | 7006 | (parallel [(const_int 0) (const_int 1)])))] |
0774c160 AI |
7007 | "TARGET_AVX |
7008 | && <mask_avx512vl_condition> && <mask_avx512dq_condition> | |
7009 | && !(MEM_P (operands[0]) && MEM_P (operands[1]))" | |
9b2133cd | 7010 | { |
0774c160 AI |
7011 | if (<mask_applied>) |
7012 | return "vextract<shuffletype>64x2\t{$0x0, %1, %0%{%3%}|%0%{%3%}, %1, 0x0}"; | |
9b2133cd | 7013 | else |
0774c160 AI |
7014 | return "#"; |
7015 | } | |
7016 | [(set_attr "type" "sselog") | |
7017 | (set_attr "prefix_extra" "1") | |
7018 | (set_attr "length_immediate" "1") | |
7019 | (set_attr "memory" "none,store") | |
7020 | (set_attr "prefix" "evex") | |
7021 | (set_attr "mode" "XI")]) | |
7022 | ||
7023 | (define_split | |
7024 | [(set (match_operand:<ssehalfvecmode> 0 "nonimmediate_operand") | |
7025 | (vec_select:<ssehalfvecmode> | |
7026 | (match_operand:VI8F_256 1 "nonimmediate_operand") | |
7027 | (parallel [(const_int 0) (const_int 1)])))] | |
7028 | "TARGET_AVX && !(MEM_P (operands[0]) && MEM_P (operands[1])) | |
7029 | && reload_completed" | |
7030 | [(const_int 0)] | |
7031 | { | |
7032 | rtx op1 = operands[1]; | |
7033 | if (REG_P (op1)) | |
7034 | op1 = gen_rtx_REG (<ssehalfvecmode>mode, REGNO (op1)); | |
7035 | else | |
7036 | op1 = gen_lowpart (<ssehalfvecmode>mode, op1); | |
7037 | emit_move_insn (operands[0], op1); | |
7038 | DONE; | |
9b2133cd | 7039 | }) |
95879c72 | 7040 | |
0774c160 AI |
7041 | (define_insn "vec_extract_hi_<mode><mask_name>" |
7042 | [(set (match_operand:<ssehalfvecmode> 0 "<store_mask_predicate>" "=v,<store_mask_constraint>") | |
cbb734aa | 7043 | (vec_select:<ssehalfvecmode> |
0774c160 | 7044 | (match_operand:VI8F_256 1 "register_operand" "v,v") |
95879c72 | 7045 | (parallel [(const_int 2) (const_int 3)])))] |
7f664e31 | 7046 | "TARGET_AVX && <mask_avx512vl_condition> && <mask_avx512dq_condition>" |
0774c160 | 7047 | { |
7f664e31 KY |
7048 | if (TARGET_AVX512VL) |
7049 | { | |
7050 | if (TARGET_AVX512DQ) | |
7051 | return "vextract<shuffletype>64x2\t{$0x1, %1, %0<mask_operand2>|%0<mask_operand2>, %1, 0x1}"; | |
7052 | else | |
7053 | return "vextract<shuffletype>32x4\t{$0x1, %1, %0|%0, %1, 0x1}"; | |
7054 | } | |
0774c160 AI |
7055 | else |
7056 | return "vextract<i128>\t{$0x1, %1, %0|%0, %1, 0x1}"; | |
7057 | } | |
95879c72 | 7058 | [(set_attr "type" "sselog") |
725fd454 JJ |
7059 | (set_attr "prefix_extra" "1") |
7060 | (set_attr "length_immediate" "1") | |
95879c72 L |
7061 | (set_attr "memory" "none,store") |
7062 | (set_attr "prefix" "vex") | |
1db4406e | 7063 | (set_attr "mode" "<sseinsnmode>")]) |
95879c72 | 7064 | |
0774c160 AI |
7065 | (define_split |
7066 | [(set (match_operand:<ssehalfvecmode> 0 "nonimmediate_operand") | |
cbb734aa | 7067 | (vec_select:<ssehalfvecmode> |
0774c160 | 7068 | (match_operand:VI4F_256 1 "nonimmediate_operand") |
95879c72 L |
7069 | (parallel [(const_int 0) (const_int 1) |
7070 | (const_int 2) (const_int 3)])))] | |
0774c160 AI |
7071 | "TARGET_AVX && !(MEM_P (operands[0]) && MEM_P (operands[1])) && reload_completed" |
7072 | [(const_int 0)] | |
9b2133cd | 7073 | { |
0774c160 AI |
7074 | rtx op1 = operands[1]; |
7075 | if (REG_P (op1)) | |
7076 | op1 = gen_rtx_REG (<ssehalfvecmode>mode, REGNO (op1)); | |
9b2133cd | 7077 | else |
0774c160 AI |
7078 | op1 = gen_lowpart (<ssehalfvecmode>mode, op1); |
7079 | emit_move_insn (operands[0], op1); | |
7080 | DONE; | |
9b2133cd | 7081 | }) |
95879c72 | 7082 | |
0774c160 AI |
7083 | |
7084 | (define_insn "vec_extract_lo_<mode><mask_name>" | |
7085 | [(set (match_operand:<ssehalfvecmode> 0 "<store_mask_predicate>" "=<store_mask_constraint>") | |
7086 | (vec_select:<ssehalfvecmode> | |
ee768d85 | 7087 | (match_operand:VI4F_256 1 "register_operand" "v") |
0774c160 AI |
7088 | (parallel [(const_int 0) (const_int 1) |
7089 | (const_int 2) (const_int 3)])))] | |
7090 | "TARGET_AVX && <mask_avx512vl_condition> && <mask_avx512dq_condition>" | |
7091 | { | |
7092 | if (<mask_applied>) | |
7093 | return "vextract<shuffletype>32x4\t{$0x0, %1, %0<mask_operand2>|%0<mask_operand2>, %1, 0x0}"; | |
7094 | else | |
7095 | return "#"; | |
7096 | } | |
7097 | [(set_attr "type" "sselog1") | |
7098 | (set_attr "prefix_extra" "1") | |
7099 | (set_attr "length_immediate" "1") | |
7100 | (set_attr "prefix" "evex") | |
7101 | (set_attr "mode" "<sseinsnmode>")]) | |
7102 | ||
7103 | (define_insn "vec_extract_lo_<mode>_maskm" | |
7104 | [(set (match_operand:<ssehalfvecmode> 0 "memory_operand" "=m") | |
7105 | (vec_merge:<ssehalfvecmode> | |
7106 | (vec_select:<ssehalfvecmode> | |
7107 | (match_operand:VI4F_256 1 "register_operand" "v") | |
7108 | (parallel [(const_int 0) (const_int 1) | |
7109 | (const_int 2) (const_int 3)])) | |
7110 | (match_operand:<ssehalfvecmode> 2 "memory_operand" "0") | |
7111 | (match_operand:QI 3 "register_operand" "k")))] | |
bf3b2de7 UB |
7112 | "TARGET_AVX512VL && TARGET_AVX512F |
7113 | && rtx_equal_p (operands[2], operands[0])" | |
7114 | "vextract<shuffletype>32x4\t{$0x0, %1, %0%{%3%}|%0%{%3%}, %1, 0x0}" | |
0774c160 AI |
7115 | [(set_attr "type" "sselog1") |
7116 | (set_attr "prefix_extra" "1") | |
7117 | (set_attr "length_immediate" "1") | |
7118 | (set_attr "prefix" "evex") | |
7119 | (set_attr "mode" "<sseinsnmode>")]) | |
7120 | ||
7121 | (define_insn "vec_extract_hi_<mode>_maskm" | |
7122 | [(set (match_operand:<ssehalfvecmode> 0 "memory_operand" "=m") | |
7123 | (vec_merge:<ssehalfvecmode> | |
7124 | (vec_select:<ssehalfvecmode> | |
7125 | (match_operand:VI4F_256 1 "register_operand" "v") | |
7126 | (parallel [(const_int 4) (const_int 5) | |
7127 | (const_int 6) (const_int 7)])) | |
7128 | (match_operand:<ssehalfvecmode> 2 "memory_operand" "0") | |
7129 | (match_operand:<ssehalfvecmode> 3 "register_operand" "k")))] | |
bf3b2de7 UB |
7130 | "TARGET_AVX512F && TARGET_AVX512VL |
7131 | && rtx_equal_p (operands[2], operands[0])" | |
7132 | "vextract<shuffletype>32x4\t{$0x1, %1, %0%{%3%}|%0%{%3%}, %1, 0x1}" | |
0774c160 AI |
7133 | [(set_attr "type" "sselog1") |
7134 | (set_attr "prefix_extra" "1") | |
7135 | (set_attr "length_immediate" "1") | |
7136 | (set_attr "prefix" "evex") | |
7137 | (set_attr "mode" "<sseinsnmode>")]) | |
7138 | ||
7139 | (define_insn "vec_extract_hi_<mode><mask_name>" | |
7140 | [(set (match_operand:<ssehalfvecmode> 0 "<store_mask_predicate>" "=<store_mask_constraint>") | |
cbb734aa | 7141 | (vec_select:<ssehalfvecmode> |
0774c160 | 7142 | (match_operand:VI4F_256 1 "register_operand" "v") |
95879c72 L |
7143 | (parallel [(const_int 4) (const_int 5) |
7144 | (const_int 6) (const_int 7)])))] | |
0774c160 AI |
7145 | "TARGET_AVX && <mask_avx512vl_condition>" |
7146 | { | |
7147 | if (TARGET_AVX512VL) | |
7148 | return "vextract<shuffletype>32x4\t{$0x1, %1, %0<mask_operand2>|%0<mask_operand2>, %1, 0x1}"; | |
7149 | else | |
7150 | return "vextract<i128>\t{$0x1, %1, %0|%0, %1, 0x1}"; | |
7151 | } | |
7152 | [(set_attr "type" "sselog1") | |
725fd454 JJ |
7153 | (set_attr "prefix_extra" "1") |
7154 | (set_attr "length_immediate" "1") | |
0774c160 AI |
7155 | (set (attr "prefix") |
7156 | (if_then_else | |
7157 | (match_test "TARGET_AVX512VL") | |
7158 | (const_string "evex") | |
7159 | (const_string "vex"))) | |
1db4406e | 7160 | (set_attr "mode" "<sseinsnmode>")]) |
95879c72 | 7161 | |
c003c6d6 AI |
7162 | (define_insn_and_split "vec_extract_lo_v32hi" |
7163 | [(set (match_operand:V16HI 0 "nonimmediate_operand" "=v,m") | |
7164 | (vec_select:V16HI | |
7165 | (match_operand:V32HI 1 "nonimmediate_operand" "vm,v") | |
7166 | (parallel [(const_int 0) (const_int 1) | |
7167 | (const_int 2) (const_int 3) | |
7168 | (const_int 4) (const_int 5) | |
7169 | (const_int 6) (const_int 7) | |
7170 | (const_int 8) (const_int 9) | |
7171 | (const_int 10) (const_int 11) | |
7172 | (const_int 12) (const_int 13) | |
7173 | (const_int 14) (const_int 15)])))] | |
7174 | "TARGET_AVX512F && !(MEM_P (operands[0]) && MEM_P (operands[1]))" | |
7175 | "#" | |
7176 | "&& reload_completed" | |
7177 | [(set (match_dup 0) (match_dup 1))] | |
7178 | { | |
7179 | if (REG_P (operands[1])) | |
7180 | operands[1] = gen_rtx_REG (V16HImode, REGNO (operands[1])); | |
7181 | else | |
7182 | operands[1] = adjust_address (operands[1], V16HImode, 0); | |
7183 | }) | |
7184 | ||
7185 | (define_insn "vec_extract_hi_v32hi" | |
7186 | [(set (match_operand:V16HI 0 "nonimmediate_operand" "=v,m") | |
7187 | (vec_select:V16HI | |
ee768d85 | 7188 | (match_operand:V32HI 1 "register_operand" "v,v") |
c003c6d6 AI |
7189 | (parallel [(const_int 16) (const_int 17) |
7190 | (const_int 18) (const_int 19) | |
7191 | (const_int 20) (const_int 21) | |
7192 | (const_int 22) (const_int 23) | |
7193 | (const_int 24) (const_int 25) | |
7194 | (const_int 26) (const_int 27) | |
7195 | (const_int 28) (const_int 29) | |
7196 | (const_int 30) (const_int 31)])))] | |
7197 | "TARGET_AVX512F" | |
7198 | "vextracti64x4\t{$0x1, %1, %0|%0, %1, 0x1}" | |
7199 | [(set_attr "type" "sselog") | |
7200 | (set_attr "prefix_extra" "1") | |
7201 | (set_attr "length_immediate" "1") | |
7202 | (set_attr "memory" "none,store") | |
7203 | (set_attr "prefix" "evex") | |
7204 | (set_attr "mode" "XI")]) | |
7205 | ||
9b2133cd | 7206 | (define_insn_and_split "vec_extract_lo_v16hi" |
95879c72 L |
7207 | [(set (match_operand:V8HI 0 "nonimmediate_operand" "=x,m") |
7208 | (vec_select:V8HI | |
9b2133cd | 7209 | (match_operand:V16HI 1 "nonimmediate_operand" "xm,x") |
95879c72 L |
7210 | (parallel [(const_int 0) (const_int 1) |
7211 | (const_int 2) (const_int 3) | |
7212 | (const_int 4) (const_int 5) | |
7213 | (const_int 6) (const_int 7)])))] | |
3b0eee5d | 7214 | "TARGET_AVX && !(MEM_P (operands[0]) && MEM_P (operands[1]))" |
9b2133cd L |
7215 | "#" |
7216 | "&& reload_completed" | |
36c4015b | 7217 | [(set (match_dup 0) (match_dup 1))] |
9b2133cd | 7218 | { |
36c4015b UB |
7219 | if (REG_P (operands[1])) |
7220 | operands[1] = gen_rtx_REG (V8HImode, REGNO (operands[1])); | |
9b2133cd | 7221 | else |
36c4015b | 7222 | operands[1] = adjust_address (operands[1], V8HImode, 0); |
9b2133cd | 7223 | }) |
95879c72 L |
7224 | |
7225 | (define_insn "vec_extract_hi_v16hi" | |
7226 | [(set (match_operand:V8HI 0 "nonimmediate_operand" "=x,m") | |
7227 | (vec_select:V8HI | |
7228 | (match_operand:V16HI 1 "register_operand" "x,x") | |
7229 | (parallel [(const_int 8) (const_int 9) | |
7230 | (const_int 10) (const_int 11) | |
7231 | (const_int 12) (const_int 13) | |
7232 | (const_int 14) (const_int 15)])))] | |
7233 | "TARGET_AVX" | |
1db4406e | 7234 | "vextract%~128\t{$0x1, %1, %0|%0, %1, 0x1}" |
95879c72 | 7235 | [(set_attr "type" "sselog") |
725fd454 JJ |
7236 | (set_attr "prefix_extra" "1") |
7237 | (set_attr "length_immediate" "1") | |
95879c72 L |
7238 | (set_attr "memory" "none,store") |
7239 | (set_attr "prefix" "vex") | |
1db4406e | 7240 | (set_attr "mode" "OI")]) |
95879c72 | 7241 | |
c003c6d6 AI |
7242 | (define_insn_and_split "vec_extract_lo_v64qi" |
7243 | [(set (match_operand:V32QI 0 "nonimmediate_operand" "=v,m") | |
7244 | (vec_select:V32QI | |
7245 | (match_operand:V64QI 1 "nonimmediate_operand" "vm,v") | |
7246 | (parallel [(const_int 0) (const_int 1) | |
7247 | (const_int 2) (const_int 3) | |
7248 | (const_int 4) (const_int 5) | |
7249 | (const_int 6) (const_int 7) | |
7250 | (const_int 8) (const_int 9) | |
7251 | (const_int 10) (const_int 11) | |
7252 | (const_int 12) (const_int 13) | |
7253 | (const_int 14) (const_int 15) | |
7254 | (const_int 16) (const_int 17) | |
7255 | (const_int 18) (const_int 19) | |
7256 | (const_int 20) (const_int 21) | |
7257 | (const_int 22) (const_int 23) | |
7258 | (const_int 24) (const_int 25) | |
7259 | (const_int 26) (const_int 27) | |
7260 | (const_int 28) (const_int 29) | |
7261 | (const_int 30) (const_int 31)])))] | |
7262 | "TARGET_AVX512F && !(MEM_P (operands[0]) && MEM_P (operands[1]))" | |
7263 | "#" | |
7264 | "&& reload_completed" | |
7265 | [(set (match_dup 0) (match_dup 1))] | |
7266 | { | |
7267 | if (REG_P (operands[1])) | |
7268 | operands[1] = gen_rtx_REG (V32QImode, REGNO (operands[1])); | |
7269 | else | |
7270 | operands[1] = adjust_address (operands[1], V32QImode, 0); | |
7271 | }) | |
7272 | ||
7273 | (define_insn "vec_extract_hi_v64qi" | |
7274 | [(set (match_operand:V32QI 0 "nonimmediate_operand" "=v,m") | |
7275 | (vec_select:V32QI | |
ee768d85 | 7276 | (match_operand:V64QI 1 "register_operand" "v,v") |
c003c6d6 AI |
7277 | (parallel [(const_int 32) (const_int 33) |
7278 | (const_int 34) (const_int 35) | |
7279 | (const_int 36) (const_int 37) | |
7280 | (const_int 38) (const_int 39) | |
7281 | (const_int 40) (const_int 41) | |
7282 | (const_int 42) (const_int 43) | |
7283 | (const_int 44) (const_int 45) | |
7284 | (const_int 46) (const_int 47) | |
7285 | (const_int 48) (const_int 49) | |
7286 | (const_int 50) (const_int 51) | |
7287 | (const_int 52) (const_int 53) | |
7288 | (const_int 54) (const_int 55) | |
7289 | (const_int 56) (const_int 57) | |
7290 | (const_int 58) (const_int 59) | |
7291 | (const_int 60) (const_int 61) | |
7292 | (const_int 62) (const_int 63)])))] | |
7293 | "TARGET_AVX512F" | |
7294 | "vextracti64x4\t{$0x1, %1, %0|%0, %1, 0x1}" | |
7295 | [(set_attr "type" "sselog") | |
7296 | (set_attr "prefix_extra" "1") | |
7297 | (set_attr "length_immediate" "1") | |
7298 | (set_attr "memory" "none,store") | |
7299 | (set_attr "prefix" "evex") | |
7300 | (set_attr "mode" "XI")]) | |
7301 | ||
9b2133cd | 7302 | (define_insn_and_split "vec_extract_lo_v32qi" |
95879c72 L |
7303 | [(set (match_operand:V16QI 0 "nonimmediate_operand" "=x,m") |
7304 | (vec_select:V16QI | |
9b2133cd | 7305 | (match_operand:V32QI 1 "nonimmediate_operand" "xm,x") |
95879c72 L |
7306 | (parallel [(const_int 0) (const_int 1) |
7307 | (const_int 2) (const_int 3) | |
7308 | (const_int 4) (const_int 5) | |
7309 | (const_int 6) (const_int 7) | |
7310 | (const_int 8) (const_int 9) | |
7311 | (const_int 10) (const_int 11) | |
7312 | (const_int 12) (const_int 13) | |
7313 | (const_int 14) (const_int 15)])))] | |
3b0eee5d | 7314 | "TARGET_AVX && !(MEM_P (operands[0]) && MEM_P (operands[1]))" |
9b2133cd L |
7315 | "#" |
7316 | "&& reload_completed" | |
36c4015b | 7317 | [(set (match_dup 0) (match_dup 1))] |
9b2133cd | 7318 | { |
36c4015b UB |
7319 | if (REG_P (operands[1])) |
7320 | operands[1] = gen_rtx_REG (V16QImode, REGNO (operands[1])); | |
9b2133cd | 7321 | else |
36c4015b | 7322 | operands[1] = adjust_address (operands[1], V16QImode, 0); |
9b2133cd | 7323 | }) |
95879c72 L |
7324 | |
7325 | (define_insn "vec_extract_hi_v32qi" | |
7326 | [(set (match_operand:V16QI 0 "nonimmediate_operand" "=x,m") | |
7327 | (vec_select:V16QI | |
7328 | (match_operand:V32QI 1 "register_operand" "x,x") | |
7329 | (parallel [(const_int 16) (const_int 17) | |
7330 | (const_int 18) (const_int 19) | |
7331 | (const_int 20) (const_int 21) | |
7332 | (const_int 22) (const_int 23) | |
7333 | (const_int 24) (const_int 25) | |
7334 | (const_int 26) (const_int 27) | |
7335 | (const_int 28) (const_int 29) | |
7336 | (const_int 30) (const_int 31)])))] | |
7337 | "TARGET_AVX" | |
1db4406e | 7338 | "vextract%~128\t{$0x1, %1, %0|%0, %1, 0x1}" |
95879c72 | 7339 | [(set_attr "type" "sselog") |
725fd454 JJ |
7340 | (set_attr "prefix_extra" "1") |
7341 | (set_attr "length_immediate" "1") | |
95879c72 L |
7342 | (set_attr "memory" "none,store") |
7343 | (set_attr "prefix" "vex") | |
1db4406e | 7344 | (set_attr "mode" "OI")]) |
95879c72 | 7345 | |
6bec6c98 UB |
7346 | ;; Modes handled by vec_extract patterns. |
7347 | (define_mode_iterator VEC_EXTRACT_MODE | |
0774c160 AI |
7348 | [(V64QI "TARGET_AVX512BW") (V32QI "TARGET_AVX") V16QI |
7349 | (V32HI "TARGET_AVX512BW") (V16HI "TARGET_AVX") V8HI | |
c003c6d6 AI |
7350 | (V16SI "TARGET_AVX512F") (V8SI "TARGET_AVX") V4SI |
7351 | (V8DI "TARGET_AVX512F") (V4DI "TARGET_AVX") V2DI | |
7352 | (V16SF "TARGET_AVX512F") (V8SF "TARGET_AVX") V4SF | |
7353 | (V8DF "TARGET_AVX512F") (V4DF "TARGET_AVX") V2DF]) | |
6bec6c98 | 7354 | |
80980aec | 7355 | (define_expand "vec_extract<mode>" |
82e86dc6 UB |
7356 | [(match_operand:<ssescalarmode> 0 "register_operand") |
7357 | (match_operand:VEC_EXTRACT_MODE 1 "register_operand") | |
7358 | (match_operand 2 "const_int_operand")] | |
d6023b50 | 7359 | "TARGET_SSE" |
b40c4f68 | 7360 | { |
d6023b50 UB |
7361 | ix86_expand_vector_extract (false, operands[0], operands[1], |
7362 | INTVAL (operands[2])); | |
b40c4f68 UB |
7363 | DONE; |
7364 | }) | |
7365 | ||
ef719a44 RH |
7366 | ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; |
7367 | ;; | |
7368 | ;; Parallel double-precision floating point element swizzling | |
7369 | ;; | |
7370 | ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; | |
7371 | ||
47490470 | 7372 | (define_insn "<mask_codefor>avx512f_unpckhpd512<mask_name>" |
c003c6d6 AI |
7373 | [(set (match_operand:V8DF 0 "register_operand" "=v") |
7374 | (vec_select:V8DF | |
7375 | (vec_concat:V16DF | |
ee768d85 | 7376 | (match_operand:V8DF 1 "register_operand" "v") |
c003c6d6 AI |
7377 | (match_operand:V8DF 2 "nonimmediate_operand" "vm")) |
7378 | (parallel [(const_int 1) (const_int 9) | |
7379 | (const_int 3) (const_int 11) | |
7380 | (const_int 5) (const_int 13) | |
7381 | (const_int 7) (const_int 15)])))] | |
7382 | "TARGET_AVX512F" | |
47490470 | 7383 | "vunpckhpd\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}" |
c003c6d6 AI |
7384 | [(set_attr "type" "sselog") |
7385 | (set_attr "prefix" "evex") | |
7386 | (set_attr "mode" "V8DF")]) | |
7387 | ||
b0d49a6e | 7388 | ;; Recall that the 256-bit unpck insns only shuffle within their lanes. |
8fd83a54 AI |
7389 | (define_insn "avx_unpckhpd256<mask_name>" |
7390 | [(set (match_operand:V4DF 0 "register_operand" "=v") | |
95879c72 L |
7391 | (vec_select:V4DF |
7392 | (vec_concat:V8DF | |
8fd83a54 AI |
7393 | (match_operand:V4DF 1 "register_operand" "v") |
7394 | (match_operand:V4DF 2 "nonimmediate_operand" "vm")) | |
c4d3f42f | 7395 | (parallel [(const_int 1) (const_int 5) |
95879c72 | 7396 | (const_int 3) (const_int 7)])))] |
8fd83a54 AI |
7397 | "TARGET_AVX && <mask_avx512vl_condition>" |
7398 | "vunpckhpd\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}" | |
95879c72 L |
7399 | [(set_attr "type" "sselog") |
7400 | (set_attr "prefix" "vex") | |
7401 | (set_attr "mode" "V4DF")]) | |
7402 | ||
1e27129f L |
7403 | (define_expand "vec_interleave_highv4df" |
7404 | [(set (match_dup 3) | |
7405 | (vec_select:V4DF | |
7406 | (vec_concat:V8DF | |
7407 | (match_operand:V4DF 1 "register_operand" "x") | |
7408 | (match_operand:V4DF 2 "nonimmediate_operand" "xm")) | |
7409 | (parallel [(const_int 0) (const_int 4) | |
7410 | (const_int 2) (const_int 6)]))) | |
7411 | (set (match_dup 4) | |
7412 | (vec_select:V4DF | |
7413 | (vec_concat:V8DF | |
7414 | (match_dup 1) | |
7415 | (match_dup 2)) | |
7416 | (parallel [(const_int 1) (const_int 5) | |
7417 | (const_int 3) (const_int 7)]))) | |
82e86dc6 | 7418 | (set (match_operand:V4DF 0 "register_operand") |
2a4337c0 UB |
7419 | (vec_select:V4DF |
7420 | (vec_concat:V8DF | |
1e27129f | 7421 | (match_dup 3) |
2a4337c0 UB |
7422 | (match_dup 4)) |
7423 | (parallel [(const_int 2) (const_int 3) | |
7424 | (const_int 6) (const_int 7)])))] | |
1e27129f L |
7425 | "TARGET_AVX" |
7426 | { | |
7427 | operands[3] = gen_reg_rtx (V4DFmode); | |
7428 | operands[4] = gen_reg_rtx (V4DFmode); | |
7429 | }) | |
7430 | ||
7431 | ||
8fd83a54 AI |
7432 | (define_insn "avx512vl_unpckhpd128_mask" |
7433 | [(set (match_operand:V2DF 0 "register_operand" "=v") | |
7434 | (vec_merge:V2DF | |
7435 | (vec_select:V2DF | |
7436 | (vec_concat:V4DF | |
7437 | (match_operand:V2DF 1 "register_operand" "v") | |
7438 | (match_operand:V2DF 2 "nonimmediate_operand" "vm")) | |
7439 | (parallel [(const_int 1) (const_int 3)])) | |
7440 | (match_operand:V2DF 3 "vector_move_operand" "0C") | |
7441 | (match_operand:QI 4 "register_operand" "Yk")))] | |
7442 | "TARGET_AVX512VL" | |
7443 | "vunpckhpd\t{%2, %1, %0%{%4%}%N3|%0%{%4%}%N3, %1, %2}" | |
7444 | [(set_attr "type" "sselog") | |
7445 | (set_attr "prefix" "evex") | |
7446 | (set_attr "mode" "V2DF")]) | |
7447 | ||
b0d49a6e | 7448 | (define_expand "vec_interleave_highv2df" |
82e86dc6 | 7449 | [(set (match_operand:V2DF 0 "register_operand") |
ffbaf337 UB |
7450 | (vec_select:V2DF |
7451 | (vec_concat:V4DF | |
82e86dc6 UB |
7452 | (match_operand:V2DF 1 "nonimmediate_operand") |
7453 | (match_operand:V2DF 2 "nonimmediate_operand")) | |
ffbaf337 UB |
7454 | (parallel [(const_int 1) |
7455 | (const_int 3)])))] | |
7456 | "TARGET_SSE2" | |
5e04b3b6 RH |
7457 | { |
7458 | if (!ix86_vec_interleave_v2df_operator_ok (operands, 1)) | |
7459 | operands[2] = force_reg (V2DFmode, operands[2]); | |
7460 | }) | |
ffbaf337 | 7461 | |
4e76acd2 | 7462 | (define_insn "*vec_interleave_highv2df" |
a02f398d | 7463 | [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,x,x,x,m") |
5e04b3b6 RH |
7464 | (vec_select:V2DF |
7465 | (vec_concat:V4DF | |
a02f398d UB |
7466 | (match_operand:V2DF 1 "nonimmediate_operand" " 0,x,o,o,o,x") |
7467 | (match_operand:V2DF 2 "nonimmediate_operand" " x,x,1,0,x,0")) | |
5e04b3b6 RH |
7468 | (parallel [(const_int 1) |
7469 | (const_int 3)])))] | |
4e76acd2 | 7470 | "TARGET_SSE2 && ix86_vec_interleave_v2df_operator_ok (operands, 1)" |
5e04b3b6 RH |
7471 | "@ |
7472 | unpckhpd\t{%2, %0|%0, %2} | |
3729983c UB |
7473 | vunpckhpd\t{%2, %1, %0|%0, %1, %2} |
7474 | %vmovddup\t{%H1, %0|%0, %H1} | |
5e04b3b6 | 7475 | movlpd\t{%H1, %0|%0, %H1} |
3729983c | 7476 | vmovlpd\t{%H1, %2, %0|%0, %2, %H1} |
eabb5f48 | 7477 | %vmovhpd\t{%1, %0|%q0, %1}" |
a02f398d | 7478 | [(set_attr "isa" "noavx,avx,sse3,noavx,avx,*") |
f220a4f4 JJ |
7479 | (set_attr "type" "sselog,sselog,sselog,ssemov,ssemov,ssemov") |
7480 | (set_attr "ssememalign" "64") | |
3729983c UB |
7481 | (set_attr "prefix_data16" "*,*,*,1,*,1") |
7482 | (set_attr "prefix" "orig,vex,maybe_vex,orig,vex,maybe_vex") | |
7b0fe4f4 | 7483 | (set_attr "mode" "V2DF,V2DF,DF,V1DF,V1DF,V1DF")]) |
95879c72 | 7484 | |
47490470 | 7485 | (define_expand "avx512f_movddup512<mask_name>" |
c003c6d6 AI |
7486 | [(set (match_operand:V8DF 0 "register_operand") |
7487 | (vec_select:V8DF | |
7488 | (vec_concat:V16DF | |
7489 | (match_operand:V8DF 1 "nonimmediate_operand") | |
7490 | (match_dup 1)) | |
7491 | (parallel [(const_int 0) (const_int 8) | |
7492 | (const_int 2) (const_int 10) | |
7493 | (const_int 4) (const_int 12) | |
7494 | (const_int 6) (const_int 14)])))] | |
7495 | "TARGET_AVX512F") | |
7496 | ||
47490470 | 7497 | (define_expand "avx512f_unpcklpd512<mask_name>" |
c003c6d6 AI |
7498 | [(set (match_operand:V8DF 0 "register_operand") |
7499 | (vec_select:V8DF | |
7500 | (vec_concat:V16DF | |
7501 | (match_operand:V8DF 1 "register_operand") | |
7502 | (match_operand:V8DF 2 "nonimmediate_operand")) | |
7503 | (parallel [(const_int 0) (const_int 8) | |
7504 | (const_int 2) (const_int 10) | |
7505 | (const_int 4) (const_int 12) | |
7506 | (const_int 6) (const_int 14)])))] | |
7507 | "TARGET_AVX512F") | |
7508 | ||
47490470 | 7509 | (define_insn "*avx512f_unpcklpd512<mask_name>" |
c003c6d6 AI |
7510 | [(set (match_operand:V8DF 0 "register_operand" "=v,v") |
7511 | (vec_select:V8DF | |
7512 | (vec_concat:V16DF | |
7513 | (match_operand:V8DF 1 "nonimmediate_operand" "vm, v") | |
7514 | (match_operand:V8DF 2 "nonimmediate_operand" "1 ,vm")) | |
7515 | (parallel [(const_int 0) (const_int 8) | |
7516 | (const_int 2) (const_int 10) | |
7517 | (const_int 4) (const_int 12) | |
7518 | (const_int 6) (const_int 14)])))] | |
7519 | "TARGET_AVX512F" | |
7520 | "@ | |
47490470 AI |
7521 | vmovddup\t{%1, %0<mask_operand3>|%0<mask_operand3>, %1} |
7522 | vunpcklpd\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}" | |
c003c6d6 AI |
7523 | [(set_attr "type" "sselog") |
7524 | (set_attr "prefix" "evex") | |
7525 | (set_attr "mode" "V8DF")]) | |
7526 | ||
5e04b3b6 | 7527 | ;; Recall that the 256-bit unpck insns only shuffle within their lanes. |
8fd83a54 | 7528 | (define_expand "avx_movddup256<mask_name>" |
82e86dc6 | 7529 | [(set (match_operand:V4DF 0 "register_operand") |
95879c72 L |
7530 | (vec_select:V4DF |
7531 | (vec_concat:V8DF | |
82e86dc6 | 7532 | (match_operand:V4DF 1 "nonimmediate_operand") |
95879c72 | 7533 | (match_dup 1)) |
5e04b3b6 RH |
7534 | (parallel [(const_int 0) (const_int 4) |
7535 | (const_int 2) (const_int 6)])))] | |
8fd83a54 | 7536 | "TARGET_AVX && <mask_avx512vl_condition>") |
ef719a44 | 7537 | |
8fd83a54 | 7538 | (define_expand "avx_unpcklpd256<mask_name>" |
82e86dc6 | 7539 | [(set (match_operand:V4DF 0 "register_operand") |
95879c72 L |
7540 | (vec_select:V4DF |
7541 | (vec_concat:V8DF | |
82e86dc6 UB |
7542 | (match_operand:V4DF 1 "register_operand") |
7543 | (match_operand:V4DF 2 "nonimmediate_operand")) | |
95879c72 | 7544 | (parallel [(const_int 0) (const_int 4) |
c4d3f42f | 7545 | (const_int 2) (const_int 6)])))] |
8fd83a54 | 7546 | "TARGET_AVX && <mask_avx512vl_condition>") |
5e04b3b6 | 7547 | |
8fd83a54 AI |
7548 | (define_insn "*avx_unpcklpd256<mask_name>" |
7549 | [(set (match_operand:V4DF 0 "register_operand" "=v,v") | |
5e04b3b6 RH |
7550 | (vec_select:V4DF |
7551 | (vec_concat:V8DF | |
8fd83a54 AI |
7552 | (match_operand:V4DF 1 "nonimmediate_operand" " v,m") |
7553 | (match_operand:V4DF 2 "nonimmediate_operand" "vm,1")) | |
5e04b3b6 RH |
7554 | (parallel [(const_int 0) (const_int 4) |
7555 | (const_int 2) (const_int 6)])))] | |
8fd83a54 | 7556 | "TARGET_AVX && <mask_avx512vl_condition>" |
5e04b3b6 | 7557 | "@ |
8fd83a54 AI |
7558 | vunpcklpd\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2} |
7559 | vmovddup\t{%1, %0<mask_operand3>|%0<mask_operand3>, %1}" | |
95879c72 L |
7560 | [(set_attr "type" "sselog") |
7561 | (set_attr "prefix" "vex") | |
7562 | (set_attr "mode" "V4DF")]) | |
7563 | ||
1e27129f L |
7564 | (define_expand "vec_interleave_lowv4df" |
7565 | [(set (match_dup 3) | |
7566 | (vec_select:V4DF | |
7567 | (vec_concat:V8DF | |
7568 | (match_operand:V4DF 1 "register_operand" "x") | |
7569 | (match_operand:V4DF 2 "nonimmediate_operand" "xm")) | |
7570 | (parallel [(const_int 0) (const_int 4) | |
7571 | (const_int 2) (const_int 6)]))) | |
7572 | (set (match_dup 4) | |
7573 | (vec_select:V4DF | |
7574 | (vec_concat:V8DF | |
7575 | (match_dup 1) | |
7576 | (match_dup 2)) | |
7577 | (parallel [(const_int 1) (const_int 5) | |
7578 | (const_int 3) (const_int 7)]))) | |
82e86dc6 | 7579 | (set (match_operand:V4DF 0 "register_operand") |
2a4337c0 UB |
7580 | (vec_select:V4DF |
7581 | (vec_concat:V8DF | |
1e27129f | 7582 | (match_dup 3) |
2a4337c0 UB |
7583 | (match_dup 4)) |
7584 | (parallel [(const_int 0) (const_int 1) | |
977e83a3 | 7585 | (const_int 4) (const_int 5)])))] |
1e27129f L |
7586 | "TARGET_AVX" |
7587 | { | |
7588 | operands[3] = gen_reg_rtx (V4DFmode); | |
7589 | operands[4] = gen_reg_rtx (V4DFmode); | |
7590 | }) | |
7591 | ||
8fd83a54 AI |
7592 | (define_insn "avx512vl_unpcklpd128_mask" |
7593 | [(set (match_operand:V2DF 0 "register_operand" "=v") | |
7594 | (vec_merge:V2DF | |
7595 | (vec_select:V2DF | |
7596 | (vec_concat:V4DF | |
7597 | (match_operand:V2DF 1 "register_operand" "v") | |
7598 | (match_operand:V2DF 2 "nonimmediate_operand" "vm")) | |
7599 | (parallel [(const_int 0) (const_int 2)])) | |
7600 | (match_operand:V2DF 3 "vector_move_operand" "0C") | |
7601 | (match_operand:QI 4 "register_operand" "Yk")))] | |
7602 | "TARGET_AVX512VL" | |
7603 | "vunpcklpd\t{%2, %1, %0%{%4%}%N3|%0%{%4%}%N3, %1, %2}" | |
7604 | [(set_attr "type" "sselog") | |
7605 | (set_attr "prefix" "evex") | |
7606 | (set_attr "mode" "V2DF")]) | |
7607 | ||
b0d49a6e | 7608 | (define_expand "vec_interleave_lowv2df" |
82e86dc6 | 7609 | [(set (match_operand:V2DF 0 "register_operand") |
ffbaf337 UB |
7610 | (vec_select:V2DF |
7611 | (vec_concat:V4DF | |
82e86dc6 UB |
7612 | (match_operand:V2DF 1 "nonimmediate_operand") |
7613 | (match_operand:V2DF 2 "nonimmediate_operand")) | |
ffbaf337 UB |
7614 | (parallel [(const_int 0) |
7615 | (const_int 2)])))] | |
7616 | "TARGET_SSE2" | |
5e04b3b6 RH |
7617 | { |
7618 | if (!ix86_vec_interleave_v2df_operator_ok (operands, 0)) | |
7619 | operands[1] = force_reg (V2DFmode, operands[1]); | |
7620 | }) | |
ffbaf337 | 7621 | |
4e76acd2 | 7622 | (define_insn "*vec_interleave_lowv2df" |
a02f398d | 7623 | [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,x,x,x,o") |
5e04b3b6 RH |
7624 | (vec_select:V2DF |
7625 | (vec_concat:V4DF | |
a02f398d UB |
7626 | (match_operand:V2DF 1 "nonimmediate_operand" " 0,x,m,0,x,0") |
7627 | (match_operand:V2DF 2 "nonimmediate_operand" " x,x,1,m,m,x")) | |
5e04b3b6 RH |
7628 | (parallel [(const_int 0) |
7629 | (const_int 2)])))] | |
4e76acd2 | 7630 | "TARGET_SSE2 && ix86_vec_interleave_v2df_operator_ok (operands, 0)" |
5e04b3b6 RH |
7631 | "@ |
7632 | unpcklpd\t{%2, %0|%0, %2} | |
3729983c | 7633 | vunpcklpd\t{%2, %1, %0|%0, %1, %2} |
eabb5f48 UB |
7634 | %vmovddup\t{%1, %0|%0, %q1} |
7635 | movhpd\t{%2, %0|%0, %q2} | |
7636 | vmovhpd\t{%2, %1, %0|%0, %1, %q2} | |
3729983c | 7637 | %vmovlpd\t{%2, %H0|%H0, %2}" |
a02f398d | 7638 | [(set_attr "isa" "noavx,avx,sse3,noavx,avx,*") |
3729983c | 7639 | (set_attr "type" "sselog,sselog,sselog,ssemov,ssemov,ssemov") |
f220a4f4 | 7640 | (set_attr "ssememalign" "64") |
3729983c UB |
7641 | (set_attr "prefix_data16" "*,*,*,1,*,1") |
7642 | (set_attr "prefix" "orig,vex,maybe_vex,orig,vex,maybe_vex") | |
7b0fe4f4 | 7643 | (set_attr "mode" "V2DF,V2DF,DF,V1DF,V1DF,V1DF")]) |
95879c72 | 7644 | |
5e04b3b6 | 7645 | (define_split |
82e86dc6 | 7646 | [(set (match_operand:V2DF 0 "memory_operand") |
5e04b3b6 RH |
7647 | (vec_select:V2DF |
7648 | (vec_concat:V4DF | |
82e86dc6 | 7649 | (match_operand:V2DF 1 "register_operand") |
5e04b3b6 RH |
7650 | (match_dup 1)) |
7651 | (parallel [(const_int 0) | |
7652 | (const_int 2)])))] | |
7653 | "TARGET_SSE3 && reload_completed" | |
7654 | [(const_int 0)] | |
7655 | { | |
7656 | rtx low = gen_rtx_REG (DFmode, REGNO (operands[1])); | |
7657 | emit_move_insn (adjust_address (operands[0], DFmode, 0), low); | |
7658 | emit_move_insn (adjust_address (operands[0], DFmode, 8), low); | |
7659 | DONE; | |
7660 | }) | |
7661 | ||
7662 | (define_split | |
82e86dc6 | 7663 | [(set (match_operand:V2DF 0 "register_operand") |
5e04b3b6 RH |
7664 | (vec_select:V2DF |
7665 | (vec_concat:V4DF | |
82e86dc6 | 7666 | (match_operand:V2DF 1 "memory_operand") |
5e04b3b6 | 7667 | (match_dup 1)) |
82e86dc6 UB |
7668 | (parallel [(match_operand:SI 2 "const_0_to_1_operand") |
7669 | (match_operand:SI 3 "const_int_operand")])))] | |
5e04b3b6 RH |
7670 | "TARGET_SSE3 && INTVAL (operands[2]) + 2 == INTVAL (operands[3])" |
7671 | [(set (match_dup 0) (vec_duplicate:V2DF (match_dup 1)))] | |
7672 | { | |
7673 | operands[1] = adjust_address (operands[1], DFmode, INTVAL (operands[2]) * 8); | |
7674 | }) | |
7675 | ||
075691af | 7676 | (define_insn "avx512f_vmscalef<mode><round_name>" |
afb4ac68 AI |
7677 | [(set (match_operand:VF_128 0 "register_operand" "=v") |
7678 | (vec_merge:VF_128 | |
47490470 AI |
7679 | (unspec:VF_128 |
7680 | [(match_operand:VF_128 1 "register_operand" "v") | |
c56a42b9 | 7681 | (match_operand:VF_128 2 "<round_nimm_predicate>" "<round_constraint>")] |
47490470 | 7682 | UNSPEC_SCALEF) |
afb4ac68 AI |
7683 | (match_dup 1) |
7684 | (const_int 1)))] | |
7685 | "TARGET_AVX512F" | |
260d3642 | 7686 | "vscalef<ssescalarmodesuffix>\t{<round_op3>%2, %1, %0|%0, %1, %2<round_op3>}" |
afb4ac68 AI |
7687 | [(set_attr "prefix" "evex") |
7688 | (set_attr "mode" "<ssescalarmode>")]) | |
7689 | ||
b040ded3 AI |
7690 | (define_insn "<avx512>_scalef<mode><mask_name><round_name>" |
7691 | [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v") | |
7692 | (unspec:VF_AVX512VL | |
7693 | [(match_operand:VF_AVX512VL 1 "register_operand" "v") | |
7694 | (match_operand:VF_AVX512VL 2 "nonimmediate_operand" "<round_constraint>")] | |
47490470 | 7695 | UNSPEC_SCALEF))] |
afb4ac68 | 7696 | "TARGET_AVX512F" |
260d3642 | 7697 | "vscalef<ssemodesuffix>\t{<round_mask_op3>%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2<round_mask_op3>}" |
afb4ac68 AI |
7698 | [(set_attr "prefix" "evex") |
7699 | (set_attr "mode" "<MODE>")]) | |
7700 | ||
6ead0238 AI |
7701 | (define_expand "<avx512>_vternlog<mode>_maskz" |
7702 | [(match_operand:VI48_AVX512VL 0 "register_operand") | |
7703 | (match_operand:VI48_AVX512VL 1 "register_operand") | |
7704 | (match_operand:VI48_AVX512VL 2 "register_operand") | |
7705 | (match_operand:VI48_AVX512VL 3 "nonimmediate_operand") | |
8b08db1e AI |
7706 | (match_operand:SI 4 "const_0_to_255_operand") |
7707 | (match_operand:<avx512fmaskmode> 5 "register_operand")] | |
7708 | "TARGET_AVX512F" | |
7709 | { | |
6ead0238 | 7710 | emit_insn (gen_<avx512>_vternlog<mode>_maskz_1 ( |
8b08db1e AI |
7711 | operands[0], operands[1], operands[2], operands[3], |
7712 | operands[4], CONST0_RTX (<MODE>mode), operands[5])); | |
7713 | DONE; | |
7714 | }) | |
7715 | ||
6ead0238 AI |
7716 | (define_insn "<avx512>_vternlog<mode><sd_maskz_name>" |
7717 | [(set (match_operand:VI48_AVX512VL 0 "register_operand" "=v") | |
7718 | (unspec:VI48_AVX512VL | |
7719 | [(match_operand:VI48_AVX512VL 1 "register_operand" "0") | |
7720 | (match_operand:VI48_AVX512VL 2 "register_operand" "v") | |
7721 | (match_operand:VI48_AVX512VL 3 "nonimmediate_operand" "vm") | |
0fe65b75 AI |
7722 | (match_operand:SI 4 "const_0_to_255_operand")] |
7723 | UNSPEC_VTERNLOG))] | |
7724 | "TARGET_AVX512F" | |
8b08db1e | 7725 | "vpternlog<ssemodesuffix>\t{%4, %3, %2, %0<sd_mask_op5>|%0<sd_mask_op5>, %2, %3, %4}" |
0fe65b75 AI |
7726 | [(set_attr "type" "sselog") |
7727 | (set_attr "prefix" "evex") | |
7728 | (set_attr "mode" "<sseinsnmode>")]) | |
7729 | ||
6ead0238 AI |
7730 | (define_insn "<avx512>_vternlog<mode>_mask" |
7731 | [(set (match_operand:VI48_AVX512VL 0 "register_operand" "=v") | |
7732 | (vec_merge:VI48_AVX512VL | |
7733 | (unspec:VI48_AVX512VL | |
7734 | [(match_operand:VI48_AVX512VL 1 "register_operand" "0") | |
7735 | (match_operand:VI48_AVX512VL 2 "register_operand" "v") | |
7736 | (match_operand:VI48_AVX512VL 3 "nonimmediate_operand" "vm") | |
47490470 AI |
7737 | (match_operand:SI 4 "const_0_to_255_operand")] |
7738 | UNSPEC_VTERNLOG) | |
7739 | (match_dup 1) | |
be792bce | 7740 | (match_operand:<avx512fmaskmode> 5 "register_operand" "Yk")))] |
47490470 AI |
7741 | "TARGET_AVX512F" |
7742 | "vpternlog<ssemodesuffix>\t{%4, %3, %2, %0%{%5%}|%0%{%5%}, %2, %3, %4}" | |
7743 | [(set_attr "type" "sselog") | |
7744 | (set_attr "prefix" "evex") | |
7745 | (set_attr "mode" "<sseinsnmode>")]) | |
7746 | ||
b040ded3 AI |
7747 | (define_insn "<avx512>_getexp<mode><mask_name><round_saeonly_name>" |
7748 | [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v") | |
7749 | (unspec:VF_AVX512VL [(match_operand:VF_AVX512VL 1 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")] | |
afb4ac68 AI |
7750 | UNSPEC_GETEXP))] |
7751 | "TARGET_AVX512F" | |
8a6ef760 | 7752 | "vgetexp<ssemodesuffix>\t{<round_saeonly_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_saeonly_mask_op2>}"; |
afb4ac68 AI |
7753 | [(set_attr "prefix" "evex") |
7754 | (set_attr "mode" "<MODE>")]) | |
7755 | ||
075691af | 7756 | (define_insn "avx512f_sgetexp<mode><round_saeonly_name>" |
afb4ac68 AI |
7757 | [(set (match_operand:VF_128 0 "register_operand" "=v") |
7758 | (vec_merge:VF_128 | |
47490470 AI |
7759 | (unspec:VF_128 |
7760 | [(match_operand:VF_128 1 "register_operand" "v") | |
c56a42b9 | 7761 | (match_operand:VF_128 2 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")] |
47490470 | 7762 | UNSPEC_GETEXP) |
afb4ac68 AI |
7763 | (match_dup 1) |
7764 | (const_int 1)))] | |
7765 | "TARGET_AVX512F" | |
075691af | 7766 | "vgetexp<ssescalarmodesuffix>\t{<round_saeonly_op3>%2, %1, %0|%0, %1, %2<round_saeonly_op3>}"; |
afb4ac68 AI |
7767 | [(set_attr "prefix" "evex") |
7768 | (set_attr "mode" "<ssescalarmode>")]) | |
7769 | ||
6ead0238 AI |
7770 | (define_insn "<mask_codefor><avx512>_align<mode><mask_name>" |
7771 | [(set (match_operand:VI48_AVX512VL 0 "register_operand" "=v") | |
7772 | (unspec:VI48_AVX512VL [(match_operand:VI48_AVX512VL 1 "register_operand" "v") | |
7773 | (match_operand:VI48_AVX512VL 2 "nonimmediate_operand" "vm") | |
7774 | (match_operand:SI 3 "const_0_to_255_operand")] | |
7775 | UNSPEC_ALIGN))] | |
0fe65b75 | 7776 | "TARGET_AVX512F" |
47490470 | 7777 | "valign<ssemodesuffix>\t{%3, %2, %1, %0<mask_operand4>|%0<mask_operand4>, %1, %2, %3}"; |
0fe65b75 AI |
7778 | [(set_attr "prefix" "evex") |
7779 | (set_attr "mode" "<sseinsnmode>")]) | |
7780 | ||
47490470 AI |
7781 | (define_expand "avx512f_shufps512_mask" |
7782 | [(match_operand:V16SF 0 "register_operand") | |
7783 | (match_operand:V16SF 1 "register_operand") | |
7784 | (match_operand:V16SF 2 "nonimmediate_operand") | |
7785 | (match_operand:SI 3 "const_0_to_255_operand") | |
7786 | (match_operand:V16SF 4 "register_operand") | |
7787 | (match_operand:HI 5 "register_operand")] | |
7788 | "TARGET_AVX512F" | |
7789 | { | |
7790 | int mask = INTVAL (operands[3]); | |
7791 | emit_insn (gen_avx512f_shufps512_1_mask (operands[0], operands[1], operands[2], | |
7792 | GEN_INT ((mask >> 0) & 3), | |
7793 | GEN_INT ((mask >> 2) & 3), | |
7794 | GEN_INT (((mask >> 4) & 3) + 16), | |
7795 | GEN_INT (((mask >> 6) & 3) + 16), | |
7796 | GEN_INT (((mask >> 0) & 3) + 4), | |
7797 | GEN_INT (((mask >> 2) & 3) + 4), | |
7798 | GEN_INT (((mask >> 4) & 3) + 20), | |
7799 | GEN_INT (((mask >> 6) & 3) + 20), | |
7800 | GEN_INT (((mask >> 0) & 3) + 8), | |
7801 | GEN_INT (((mask >> 2) & 3) + 8), | |
7802 | GEN_INT (((mask >> 4) & 3) + 24), | |
7803 | GEN_INT (((mask >> 6) & 3) + 24), | |
7804 | GEN_INT (((mask >> 0) & 3) + 12), | |
7805 | GEN_INT (((mask >> 2) & 3) + 12), | |
7806 | GEN_INT (((mask >> 4) & 3) + 28), | |
7807 | GEN_INT (((mask >> 6) & 3) + 28), | |
7808 | operands[4], operands[5])); | |
7809 | DONE; | |
7810 | }) | |
7811 | ||
8b08db1e | 7812 | |
b040ded3 AI |
7813 | (define_expand "<avx512>_fixupimm<mode>_maskz<round_saeonly_expand_name>" |
7814 | [(match_operand:VF_AVX512VL 0 "register_operand") | |
7815 | (match_operand:VF_AVX512VL 1 "register_operand") | |
7816 | (match_operand:VF_AVX512VL 2 "register_operand") | |
4de67111 | 7817 | (match_operand:<sseintvecmode> 3 "<round_saeonly_expand_nimm_predicate>") |
8b08db1e AI |
7818 | (match_operand:SI 4 "const_0_to_255_operand") |
7819 | (match_operand:<avx512fmaskmode> 5 "register_operand")] | |
7820 | "TARGET_AVX512F" | |
7821 | { | |
b040ded3 | 7822 | emit_insn (gen_<avx512>_fixupimm<mode>_maskz_1<round_saeonly_expand_name> ( |
8b08db1e | 7823 | operands[0], operands[1], operands[2], operands[3], |
4de67111 AI |
7824 | operands[4], CONST0_RTX (<MODE>mode), operands[5] |
7825 | <round_saeonly_expand_operand6>)); | |
8b08db1e AI |
7826 | DONE; |
7827 | }) | |
7828 | ||
b040ded3 AI |
7829 | (define_insn "<avx512>_fixupimm<mode><sd_maskz_name><round_saeonly_name>" |
7830 | [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v") | |
7831 | (unspec:VF_AVX512VL | |
7832 | [(match_operand:VF_AVX512VL 1 "register_operand" "0") | |
7833 | (match_operand:VF_AVX512VL 2 "register_operand" "v") | |
7834 | (match_operand:<sseintvecmode> 3 "nonimmediate_operand" "<round_saeonly_constraint>") | |
afb4ac68 AI |
7835 | (match_operand:SI 4 "const_0_to_255_operand")] |
7836 | UNSPEC_FIXUPIMM))] | |
7837 | "TARGET_AVX512F" | |
8a6ef760 | 7838 | "vfixupimm<ssemodesuffix>\t{%4, <round_saeonly_sd_mask_op5>%3, %2, %0<sd_mask_op5>|%0<sd_mask_op5>, %2, %3<round_saeonly_sd_mask_op5>, %4}"; |
afb4ac68 AI |
7839 | [(set_attr "prefix" "evex") |
7840 | (set_attr "mode" "<MODE>")]) | |
7841 | ||
b040ded3 AI |
7842 | (define_insn "<avx512>_fixupimm<mode>_mask<round_saeonly_name>" |
7843 | [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v") | |
7844 | (vec_merge:VF_AVX512VL | |
7845 | (unspec:VF_AVX512VL | |
7846 | [(match_operand:VF_AVX512VL 1 "register_operand" "0") | |
7847 | (match_operand:VF_AVX512VL 2 "register_operand" "v") | |
7848 | (match_operand:<sseintvecmode> 3 "nonimmediate_operand" "<round_saeonly_constraint>") | |
47490470 AI |
7849 | (match_operand:SI 4 "const_0_to_255_operand")] |
7850 | UNSPEC_FIXUPIMM) | |
7851 | (match_dup 1) | |
be792bce | 7852 | (match_operand:<avx512fmaskmode> 5 "register_operand" "Yk")))] |
47490470 | 7853 | "TARGET_AVX512F" |
8a6ef760 | 7854 | "vfixupimm<ssemodesuffix>\t{%4, <round_saeonly_op6>%3, %2, %0%{%5%}|%0%{%5%}, %2, %3<round_saeonly_op6>, %4}"; |
47490470 AI |
7855 | [(set_attr "prefix" "evex") |
7856 | (set_attr "mode" "<MODE>")]) | |
7857 | ||
4de67111 | 7858 | (define_expand "avx512f_sfixupimm<mode>_maskz<round_saeonly_expand_name>" |
8b08db1e AI |
7859 | [(match_operand:VF_128 0 "register_operand") |
7860 | (match_operand:VF_128 1 "register_operand") | |
7861 | (match_operand:VF_128 2 "register_operand") | |
4de67111 | 7862 | (match_operand:<sseintvecmode> 3 "<round_saeonly_expand_nimm_predicate>") |
8b08db1e AI |
7863 | (match_operand:SI 4 "const_0_to_255_operand") |
7864 | (match_operand:<avx512fmaskmode> 5 "register_operand")] | |
7865 | "TARGET_AVX512F" | |
7866 | { | |
4de67111 | 7867 | emit_insn (gen_avx512f_sfixupimm<mode>_maskz_1<round_saeonly_expand_name> ( |
8b08db1e | 7868 | operands[0], operands[1], operands[2], operands[3], |
4de67111 AI |
7869 | operands[4], CONST0_RTX (<MODE>mode), operands[5] |
7870 | <round_saeonly_expand_operand6>)); | |
8b08db1e AI |
7871 | DONE; |
7872 | }) | |
7873 | ||
8a6ef760 | 7874 | (define_insn "avx512f_sfixupimm<mode><sd_maskz_name><round_saeonly_name>" |
afb4ac68 AI |
7875 | [(set (match_operand:VF_128 0 "register_operand" "=v") |
7876 | (vec_merge:VF_128 | |
7877 | (unspec:VF_128 | |
7878 | [(match_operand:VF_128 1 "register_operand" "0") | |
7879 | (match_operand:VF_128 2 "register_operand" "v") | |
8a6ef760 | 7880 | (match_operand:<sseintvecmode> 3 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>") |
afb4ac68 AI |
7881 | (match_operand:SI 4 "const_0_to_255_operand")] |
7882 | UNSPEC_FIXUPIMM) | |
7883 | (match_dup 1) | |
7884 | (const_int 1)))] | |
7885 | "TARGET_AVX512F" | |
8a6ef760 | 7886 | "vfixupimm<ssescalarmodesuffix>\t{%4, <round_saeonly_sd_mask_op5>%3, %2, %0<sd_mask_op5>|%0<sd_mask_op5>, %2, %3<round_saeonly_sd_mask_op5>, %4}"; |
afb4ac68 AI |
7887 | [(set_attr "prefix" "evex") |
7888 | (set_attr "mode" "<ssescalarmode>")]) | |
7889 | ||
8a6ef760 | 7890 | (define_insn "avx512f_sfixupimm<mode>_mask<round_saeonly_name>" |
47490470 AI |
7891 | [(set (match_operand:VF_128 0 "register_operand" "=v") |
7892 | (vec_merge:VF_128 | |
7893 | (vec_merge:VF_128 | |
7894 | (unspec:VF_128 | |
7895 | [(match_operand:VF_128 1 "register_operand" "0") | |
7896 | (match_operand:VF_128 2 "register_operand" "v") | |
8a6ef760 | 7897 | (match_operand:<sseintvecmode> 3 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>") |
47490470 AI |
7898 | (match_operand:SI 4 "const_0_to_255_operand")] |
7899 | UNSPEC_FIXUPIMM) | |
7900 | (match_dup 1) | |
7901 | (const_int 1)) | |
7902 | (match_dup 1) | |
be792bce | 7903 | (match_operand:<avx512fmaskmode> 5 "register_operand" "Yk")))] |
47490470 | 7904 | "TARGET_AVX512F" |
8a6ef760 | 7905 | "vfixupimm<ssescalarmodesuffix>\t{%4, <round_saeonly_op6>%3, %2, %0%{%5%}|%0%{%5%}, %2, %3<round_saeonly_op6>, %4}"; |
47490470 AI |
7906 | [(set_attr "prefix" "evex") |
7907 | (set_attr "mode" "<ssescalarmode>")]) | |
7908 | ||
b040ded3 AI |
7909 | (define_insn "<avx512>_rndscale<mode><mask_name><round_saeonly_name>" |
7910 | [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v") | |
7911 | (unspec:VF_AVX512VL | |
7912 | [(match_operand:VF_AVX512VL 1 "nonimmediate_operand" "<round_saeonly_constraint>") | |
afb4ac68 AI |
7913 | (match_operand:SI 2 "const_0_to_255_operand")] |
7914 | UNSPEC_ROUND))] | |
ec5e777c | 7915 | "TARGET_AVX512F" |
8a6ef760 | 7916 | "vrndscale<ssemodesuffix>\t{%2, <round_saeonly_mask_op3>%1, %0<mask_operand3>|%0<mask_operand3>, %1<round_saeonly_mask_op3>, %2}" |
ec5e777c AI |
7917 | [(set_attr "length_immediate" "1") |
7918 | (set_attr "prefix" "evex") | |
7919 | (set_attr "mode" "<MODE>")]) | |
7920 | ||
075691af | 7921 | (define_insn "avx512f_rndscale<mode><round_saeonly_name>" |
afb4ac68 AI |
7922 | [(set (match_operand:VF_128 0 "register_operand" "=v") |
7923 | (vec_merge:VF_128 | |
7924 | (unspec:VF_128 | |
7925 | [(match_operand:VF_128 1 "register_operand" "v") | |
c56a42b9 | 7926 | (match_operand:VF_128 2 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>") |
afb4ac68 AI |
7927 | (match_operand:SI 3 "const_0_to_255_operand")] |
7928 | UNSPEC_ROUND) | |
7929 | (match_dup 1) | |
7930 | (const_int 1)))] | |
7931 | "TARGET_AVX512F" | |
075691af | 7932 | "vrndscale<ssescalarmodesuffix>\t{%3, <round_saeonly_op4>%2, %1, %0|%0, %1, %2<round_saeonly_op4>, %3}" |
afb4ac68 AI |
7933 | [(set_attr "length_immediate" "1") |
7934 | (set_attr "prefix" "evex") | |
7935 | (set_attr "mode" "<MODE>")]) | |
7936 | ||
c003c6d6 | 7937 | ;; One bit in mask selects 2 elements. |
47490470 | 7938 | (define_insn "avx512f_shufps512_1<mask_name>" |
c003c6d6 AI |
7939 | [(set (match_operand:V16SF 0 "register_operand" "=v") |
7940 | (vec_select:V16SF | |
7941 | (vec_concat:V32SF | |
7942 | (match_operand:V16SF 1 "register_operand" "v") | |
7943 | (match_operand:V16SF 2 "nonimmediate_operand" "vm")) | |
7944 | (parallel [(match_operand 3 "const_0_to_3_operand") | |
7945 | (match_operand 4 "const_0_to_3_operand") | |
7946 | (match_operand 5 "const_16_to_19_operand") | |
7947 | (match_operand 6 "const_16_to_19_operand") | |
7948 | (match_operand 7 "const_4_to_7_operand") | |
7949 | (match_operand 8 "const_4_to_7_operand") | |
7950 | (match_operand 9 "const_20_to_23_operand") | |
7951 | (match_operand 10 "const_20_to_23_operand") | |
7952 | (match_operand 11 "const_8_to_11_operand") | |
7953 | (match_operand 12 "const_8_to_11_operand") | |
7954 | (match_operand 13 "const_24_to_27_operand") | |
7955 | (match_operand 14 "const_24_to_27_operand") | |
7956 | (match_operand 15 "const_12_to_15_operand") | |
7957 | (match_operand 16 "const_12_to_15_operand") | |
7958 | (match_operand 17 "const_28_to_31_operand") | |
7959 | (match_operand 18 "const_28_to_31_operand")])))] | |
7960 | "TARGET_AVX512F | |
7961 | && (INTVAL (operands[3]) == (INTVAL (operands[7]) - 4) | |
7962 | && INTVAL (operands[4]) == (INTVAL (operands[8]) - 4) | |
7963 | && INTVAL (operands[5]) == (INTVAL (operands[9]) - 4) | |
7964 | && INTVAL (operands[6]) == (INTVAL (operands[10]) - 4) | |
7965 | && INTVAL (operands[3]) == (INTVAL (operands[11]) - 8) | |
7966 | && INTVAL (operands[4]) == (INTVAL (operands[12]) - 8) | |
7967 | && INTVAL (operands[5]) == (INTVAL (operands[13]) - 8) | |
7968 | && INTVAL (operands[6]) == (INTVAL (operands[14]) - 8) | |
7969 | && INTVAL (operands[3]) == (INTVAL (operands[15]) - 12) | |
7970 | && INTVAL (operands[4]) == (INTVAL (operands[16]) - 12) | |
7971 | && INTVAL (operands[5]) == (INTVAL (operands[17]) - 12) | |
7972 | && INTVAL (operands[6]) == (INTVAL (operands[18]) - 12))" | |
7973 | { | |
7974 | int mask; | |
7975 | mask = INTVAL (operands[3]); | |
7976 | mask |= INTVAL (operands[4]) << 2; | |
7977 | mask |= (INTVAL (operands[5]) - 16) << 4; | |
7978 | mask |= (INTVAL (operands[6]) - 16) << 6; | |
7979 | operands[3] = GEN_INT (mask); | |
7980 | ||
47490470 | 7981 | return "vshufps\t{%3, %2, %1, %0<mask_operand19>|%0<mask_operand19>, %1, %2, %3}"; |
c003c6d6 AI |
7982 | } |
7983 | [(set_attr "type" "sselog") | |
7984 | (set_attr "length_immediate" "1") | |
7985 | (set_attr "prefix" "evex") | |
7986 | (set_attr "mode" "V16SF")]) | |
7987 | ||
47490470 AI |
7988 | (define_expand "avx512f_shufpd512_mask" |
7989 | [(match_operand:V8DF 0 "register_operand") | |
7990 | (match_operand:V8DF 1 "register_operand") | |
7991 | (match_operand:V8DF 2 "nonimmediate_operand") | |
7992 | (match_operand:SI 3 "const_0_to_255_operand") | |
7993 | (match_operand:V8DF 4 "register_operand") | |
7994 | (match_operand:QI 5 "register_operand")] | |
7995 | "TARGET_AVX512F" | |
7996 | { | |
7997 | int mask = INTVAL (operands[3]); | |
7998 | emit_insn (gen_avx512f_shufpd512_1_mask (operands[0], operands[1], operands[2], | |
7999 | GEN_INT (mask & 1), | |
8000 | GEN_INT (mask & 2 ? 9 : 8), | |
8001 | GEN_INT (mask & 4 ? 3 : 2), | |
8002 | GEN_INT (mask & 8 ? 11 : 10), | |
8003 | GEN_INT (mask & 16 ? 5 : 4), | |
8004 | GEN_INT (mask & 32 ? 13 : 12), | |
8005 | GEN_INT (mask & 64 ? 7 : 6), | |
8006 | GEN_INT (mask & 128 ? 15 : 14), | |
8007 | operands[4], operands[5])); | |
8008 | DONE; | |
8009 | }) | |
8010 | ||
8011 | (define_insn "avx512f_shufpd512_1<mask_name>" | |
c003c6d6 AI |
8012 | [(set (match_operand:V8DF 0 "register_operand" "=v") |
8013 | (vec_select:V8DF | |
8014 | (vec_concat:V16DF | |
8015 | (match_operand:V8DF 1 "register_operand" "v") | |
8016 | (match_operand:V8DF 2 "nonimmediate_operand" "vm")) | |
8017 | (parallel [(match_operand 3 "const_0_to_1_operand") | |
8018 | (match_operand 4 "const_8_to_9_operand") | |
8019 | (match_operand 5 "const_2_to_3_operand") | |
8020 | (match_operand 6 "const_10_to_11_operand") | |
8021 | (match_operand 7 "const_4_to_5_operand") | |
8022 | (match_operand 8 "const_12_to_13_operand") | |
8023 | (match_operand 9 "const_6_to_7_operand") | |
8024 | (match_operand 10 "const_14_to_15_operand")])))] | |
8025 | "TARGET_AVX512F" | |
8026 | { | |
8027 | int mask; | |
8028 | mask = INTVAL (operands[3]); | |
8029 | mask |= (INTVAL (operands[4]) - 8) << 1; | |
8030 | mask |= (INTVAL (operands[5]) - 2) << 2; | |
8031 | mask |= (INTVAL (operands[6]) - 10) << 3; | |
8032 | mask |= (INTVAL (operands[7]) - 4) << 4; | |
8033 | mask |= (INTVAL (operands[8]) - 12) << 5; | |
8034 | mask |= (INTVAL (operands[9]) - 6) << 6; | |
8035 | mask |= (INTVAL (operands[10]) - 14) << 7; | |
8036 | operands[3] = GEN_INT (mask); | |
8037 | ||
47490470 | 8038 | return "vshufpd\t{%3, %2, %1, %0<mask_operand11>|%0<mask_operand11>, %1, %2, %3}"; |
c003c6d6 AI |
8039 | } |
8040 | [(set_attr "type" "sselog") | |
8041 | (set_attr "length_immediate" "1") | |
8042 | (set_attr "prefix" "evex") | |
8043 | (set_attr "mode" "V8DF")]) | |
8044 | ||
b2d623e5 | 8045 | (define_expand "avx_shufpd256<mask_expand4_name>" |
82e86dc6 UB |
8046 | [(match_operand:V4DF 0 "register_operand") |
8047 | (match_operand:V4DF 1 "register_operand") | |
8048 | (match_operand:V4DF 2 "nonimmediate_operand") | |
8049 | (match_operand:SI 3 "const_int_operand")] | |
95879c72 L |
8050 | "TARGET_AVX" |
8051 | { | |
8052 | int mask = INTVAL (operands[3]); | |
b2d623e5 AI |
8053 | emit_insn (gen_avx_shufpd256_1<mask_expand4_name> (operands[0], |
8054 | operands[1], | |
8055 | operands[2], | |
8056 | GEN_INT (mask & 1), | |
8057 | GEN_INT (mask & 2 ? 5 : 4), | |
8058 | GEN_INT (mask & 4 ? 3 : 2), | |
8059 | GEN_INT (mask & 8 ? 7 : 6) | |
8060 | <mask_expand4_args>)); | |
95879c72 L |
8061 | DONE; |
8062 | }) | |
8063 | ||
b2d623e5 AI |
8064 | (define_insn "avx_shufpd256_1<mask_name>" |
8065 | [(set (match_operand:V4DF 0 "register_operand" "=v") | |
95879c72 L |
8066 | (vec_select:V4DF |
8067 | (vec_concat:V8DF | |
b2d623e5 AI |
8068 | (match_operand:V4DF 1 "register_operand" "v") |
8069 | (match_operand:V4DF 2 "nonimmediate_operand" "vm")) | |
82e86dc6 UB |
8070 | (parallel [(match_operand 3 "const_0_to_1_operand") |
8071 | (match_operand 4 "const_4_to_5_operand") | |
8072 | (match_operand 5 "const_2_to_3_operand") | |
8073 | (match_operand 6 "const_6_to_7_operand")])))] | |
b2d623e5 | 8074 | "TARGET_AVX && <mask_avx512vl_condition>" |
95879c72 L |
8075 | { |
8076 | int mask; | |
8077 | mask = INTVAL (operands[3]); | |
8078 | mask |= (INTVAL (operands[4]) - 4) << 1; | |
8079 | mask |= (INTVAL (operands[5]) - 2) << 2; | |
8080 | mask |= (INTVAL (operands[6]) - 6) << 3; | |
8081 | operands[3] = GEN_INT (mask); | |
8082 | ||
b2d623e5 | 8083 | return "vshufpd\t{%3, %2, %1, %0<mask_operand7>|%0<mask_operand7>, %1, %2, %3}"; |
95879c72 | 8084 | } |
eb2f2b44 | 8085 | [(set_attr "type" "sseshuf") |
725fd454 | 8086 | (set_attr "length_immediate" "1") |
95879c72 L |
8087 | (set_attr "prefix" "vex") |
8088 | (set_attr "mode" "V4DF")]) | |
8089 | ||
b2d623e5 | 8090 | (define_expand "sse2_shufpd<mask_expand4_name>" |
82e86dc6 UB |
8091 | [(match_operand:V2DF 0 "register_operand") |
8092 | (match_operand:V2DF 1 "register_operand") | |
8093 | (match_operand:V2DF 2 "nonimmediate_operand") | |
8094 | (match_operand:SI 3 "const_int_operand")] | |
ef719a44 RH |
8095 | "TARGET_SSE2" |
8096 | { | |
8097 | int mask = INTVAL (operands[3]); | |
b2d623e5 AI |
8098 | emit_insn (gen_sse2_shufpd_v2df<mask_expand4_name> (operands[0], operands[1], |
8099 | operands[2], GEN_INT (mask & 1), | |
8100 | GEN_INT (mask & 2 ? 3 : 2) | |
8101 | <mask_expand4_args>)); | |
ef719a44 RH |
8102 | DONE; |
8103 | }) | |
8104 | ||
b2d623e5 AI |
8105 | (define_insn "sse2_shufpd_v2df_mask" |
8106 | [(set (match_operand:V2DF 0 "register_operand" "=v") | |
8107 | (vec_merge:V2DF | |
8108 | (vec_select:V2DF | |
8109 | (vec_concat:V4DF | |
8110 | (match_operand:V2DF 1 "register_operand" "v") | |
8111 | (match_operand:V2DF 2 "nonimmediate_operand" "vm")) | |
8112 | (parallel [(match_operand 3 "const_0_to_1_operand") | |
8113 | (match_operand 4 "const_2_to_3_operand")])) | |
8114 | (match_operand:V2DF 5 "vector_move_operand" "0C") | |
8115 | (match_operand:QI 6 "register_operand" "Yk")))] | |
8116 | "TARGET_AVX512VL" | |
8117 | { | |
8118 | int mask; | |
8119 | mask = INTVAL (operands[3]); | |
8120 | mask |= (INTVAL (operands[4]) - 2) << 1; | |
8121 | operands[3] = GEN_INT (mask); | |
8122 | ||
8123 | return "vshufpd\t{%3, %2, %1, %0%{%6%}%N5|%0%{6%}%N5, %1, %2, %3}"; | |
8124 | } | |
8125 | [(set_attr "type" "sseshuf") | |
8126 | (set_attr "length_immediate" "1") | |
8127 | (set_attr "prefix" "evex") | |
8128 | (set_attr "mode" "V2DF")]) | |
8129 | ||
b8aaf506 | 8130 | ;; punpcklqdq and punpckhqdq are shorter than shufpd. |
8cb0a27f AI |
8131 | (define_insn "avx2_interleave_highv4di<mask_name>" |
8132 | [(set (match_operand:V4DI 0 "register_operand" "=v") | |
977e83a3 KY |
8133 | (vec_select:V4DI |
8134 | (vec_concat:V8DI | |
8cb0a27f AI |
8135 | (match_operand:V4DI 1 "register_operand" "v") |
8136 | (match_operand:V4DI 2 "nonimmediate_operand" "vm")) | |
977e83a3 KY |
8137 | (parallel [(const_int 1) |
8138 | (const_int 5) | |
8139 | (const_int 3) | |
8140 | (const_int 7)])))] | |
8cb0a27f AI |
8141 | "TARGET_AVX2 && <mask_avx512vl_condition>" |
8142 | "vpunpckhqdq\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}" | |
977e83a3 KY |
8143 | [(set_attr "type" "sselog") |
8144 | (set_attr "prefix" "vex") | |
8145 | (set_attr "mode" "OI")]) | |
95879c72 | 8146 | |
47490470 | 8147 | (define_insn "<mask_codefor>avx512f_interleave_highv8di<mask_name>" |
c003c6d6 AI |
8148 | [(set (match_operand:V8DI 0 "register_operand" "=v") |
8149 | (vec_select:V8DI | |
8150 | (vec_concat:V16DI | |
8151 | (match_operand:V8DI 1 "register_operand" "v") | |
8152 | (match_operand:V8DI 2 "nonimmediate_operand" "vm")) | |
8153 | (parallel [(const_int 1) (const_int 9) | |
8154 | (const_int 3) (const_int 11) | |
8155 | (const_int 5) (const_int 13) | |
8156 | (const_int 7) (const_int 15)])))] | |
8157 | "TARGET_AVX512F" | |
47490470 | 8158 | "vpunpckhqdq\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}" |
c003c6d6 AI |
8159 | [(set_attr "type" "sselog") |
8160 | (set_attr "prefix" "evex") | |
8161 | (set_attr "mode" "XI")]) | |
8162 | ||
8cb0a27f AI |
8163 | (define_insn "vec_interleave_highv2di<mask_name>" |
8164 | [(set (match_operand:V2DI 0 "register_operand" "=x,v") | |
b8aaf506 L |
8165 | (vec_select:V2DI |
8166 | (vec_concat:V4DI | |
8cb0a27f AI |
8167 | (match_operand:V2DI 1 "register_operand" "0,v") |
8168 | (match_operand:V2DI 2 "nonimmediate_operand" "xm,vm")) | |
b8aaf506 L |
8169 | (parallel [(const_int 1) |
8170 | (const_int 3)])))] | |
8cb0a27f | 8171 | "TARGET_SSE2 && <mask_avx512vl_condition>" |
3729983c UB |
8172 | "@ |
8173 | punpckhqdq\t{%2, %0|%0, %2} | |
8cb0a27f | 8174 | vpunpckhqdq\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}" |
3729983c UB |
8175 | [(set_attr "isa" "noavx,avx") |
8176 | (set_attr "type" "sselog") | |
8177 | (set_attr "prefix_data16" "1,*") | |
8cb0a27f | 8178 | (set_attr "prefix" "orig,<mask_prefix>") |
95879c72 L |
8179 | (set_attr "mode" "TI")]) |
8180 | ||
8cb0a27f AI |
8181 | (define_insn "avx2_interleave_lowv4di<mask_name>" |
8182 | [(set (match_operand:V4DI 0 "register_operand" "=v") | |
977e83a3 KY |
8183 | (vec_select:V4DI |
8184 | (vec_concat:V8DI | |
8cb0a27f AI |
8185 | (match_operand:V4DI 1 "register_operand" "v") |
8186 | (match_operand:V4DI 2 "nonimmediate_operand" "vm")) | |
977e83a3 KY |
8187 | (parallel [(const_int 0) |
8188 | (const_int 4) | |
8189 | (const_int 2) | |
8190 | (const_int 6)])))] | |
8cb0a27f AI |
8191 | "TARGET_AVX2 && <mask_avx512vl_condition>" |
8192 | "vpunpcklqdq\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}" | |
977e83a3 KY |
8193 | [(set_attr "type" "sselog") |
8194 | (set_attr "prefix" "vex") | |
8195 | (set_attr "mode" "OI")]) | |
8196 | ||
47490470 | 8197 | (define_insn "<mask_codefor>avx512f_interleave_lowv8di<mask_name>" |
c003c6d6 AI |
8198 | [(set (match_operand:V8DI 0 "register_operand" "=v") |
8199 | (vec_select:V8DI | |
8200 | (vec_concat:V16DI | |
8201 | (match_operand:V8DI 1 "register_operand" "v") | |
8202 | (match_operand:V8DI 2 "nonimmediate_operand" "vm")) | |
8203 | (parallel [(const_int 0) (const_int 8) | |
8204 | (const_int 2) (const_int 10) | |
8205 | (const_int 4) (const_int 12) | |
8206 | (const_int 6) (const_int 14)])))] | |
8207 | "TARGET_AVX512F" | |
47490470 | 8208 | "vpunpcklqdq\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}" |
c003c6d6 AI |
8209 | [(set_attr "type" "sselog") |
8210 | (set_attr "prefix" "evex") | |
8211 | (set_attr "mode" "XI")]) | |
8212 | ||
8cb0a27f AI |
8213 | (define_insn "vec_interleave_lowv2di<mask_name>" |
8214 | [(set (match_operand:V2DI 0 "register_operand" "=x,v") | |
b8aaf506 L |
8215 | (vec_select:V2DI |
8216 | (vec_concat:V4DI | |
8cb0a27f AI |
8217 | (match_operand:V2DI 1 "register_operand" "0,v") |
8218 | (match_operand:V2DI 2 "nonimmediate_operand" "xm,vm")) | |
b8aaf506 L |
8219 | (parallel [(const_int 0) |
8220 | (const_int 2)])))] | |
8cb0a27f | 8221 | "TARGET_SSE2 && <mask_avx512vl_condition>" |
3729983c UB |
8222 | "@ |
8223 | punpcklqdq\t{%2, %0|%0, %2} | |
8cb0a27f | 8224 | vpunpcklqdq\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}" |
3729983c UB |
8225 | [(set_attr "isa" "noavx,avx") |
8226 | (set_attr "type" "sselog") | |
8227 | (set_attr "prefix_data16" "1,*") | |
8228 | (set_attr "prefix" "orig,vex") | |
b8aaf506 L |
8229 | (set_attr "mode" "TI")]) |
8230 | ||
ba63dfb9 | 8231 | (define_insn "sse2_shufpd_<mode>" |
6bec6c98 UB |
8232 | [(set (match_operand:VI8F_128 0 "register_operand" "=x,x") |
8233 | (vec_select:VI8F_128 | |
cbb734aa | 8234 | (vec_concat:<ssedoublevecmode> |
6bec6c98 UB |
8235 | (match_operand:VI8F_128 1 "register_operand" "0,x") |
8236 | (match_operand:VI8F_128 2 "nonimmediate_operand" "xm,xm")) | |
82e86dc6 UB |
8237 | (parallel [(match_operand 3 "const_0_to_1_operand") |
8238 | (match_operand 4 "const_2_to_3_operand")])))] | |
ef719a44 RH |
8239 | "TARGET_SSE2" |
8240 | { | |
8241 | int mask; | |
8242 | mask = INTVAL (operands[3]); | |
8243 | mask |= (INTVAL (operands[4]) - 2) << 1; | |
8244 | operands[3] = GEN_INT (mask); | |
8245 | ||
3729983c UB |
8246 | switch (which_alternative) |
8247 | { | |
8248 | case 0: | |
8249 | return "shufpd\t{%3, %2, %0|%0, %2, %3}"; | |
8250 | case 1: | |
8251 | return "vshufpd\t{%3, %2, %1, %0|%0, %1, %2, %3}"; | |
8252 | default: | |
8253 | gcc_unreachable (); | |
8254 | } | |
ef719a44 | 8255 | } |
3729983c | 8256 | [(set_attr "isa" "noavx,avx") |
eb2f2b44 | 8257 | (set_attr "type" "sseshuf") |
725fd454 | 8258 | (set_attr "length_immediate" "1") |
3729983c | 8259 | (set_attr "prefix" "orig,vex") |
ef719a44 RH |
8260 | (set_attr "mode" "V2DF")]) |
8261 | ||
a3d4a22b UB |
8262 | ;; Avoid combining registers from different units in a single alternative, |
8263 | ;; see comment above inline_secondary_memory_needed function in i386.c | |
ef719a44 | 8264 | (define_insn "sse2_storehpd" |
3729983c | 8265 | [(set (match_operand:DF 0 "nonimmediate_operand" "=m,x,x,x,*f,r") |
ef719a44 | 8266 | (vec_select:DF |
3729983c | 8267 | (match_operand:V2DF 1 "nonimmediate_operand" " x,0,x,o,o,o") |
ef719a44 RH |
8268 | (parallel [(const_int 1)])))] |
8269 | "TARGET_SSE2 && !(MEM_P (operands[0]) && MEM_P (operands[1]))" | |
8270 | "@ | |
3729983c | 8271 | %vmovhpd\t{%1, %0|%0, %1} |
ef719a44 | 8272 | unpckhpd\t%0, %0 |
3729983c | 8273 | vunpckhpd\t{%d1, %0|%0, %d1} |
a3d4a22b UB |
8274 | # |
8275 | # | |
ef719a44 | 8276 | #" |
ba94c7af | 8277 | [(set_attr "isa" "*,noavx,avx,*,*,*") |
3729983c UB |
8278 | (set_attr "type" "ssemov,sselog1,sselog1,ssemov,fmov,imov") |
8279 | (set (attr "prefix_data16") | |
8280 | (if_then_else | |
8281 | (and (eq_attr "alternative" "0") | |
67b2c493 | 8282 | (not (match_test "TARGET_AVX"))) |
3729983c UB |
8283 | (const_string "1") |
8284 | (const_string "*"))) | |
8285 | (set_attr "prefix" "maybe_vex,orig,vex,*,*,*") | |
8286 | (set_attr "mode" "V1DF,V1DF,V2DF,DF,DF,DF")]) | |
ef719a44 RH |
8287 | |
8288 | (define_split | |
82e86dc6 | 8289 | [(set (match_operand:DF 0 "register_operand") |
ef719a44 | 8290 | (vec_select:DF |
82e86dc6 | 8291 | (match_operand:V2DF 1 "memory_operand") |
ef719a44 RH |
8292 | (parallel [(const_int 1)])))] |
8293 | "TARGET_SSE2 && reload_completed" | |
8294 | [(set (match_dup 0) (match_dup 1))] | |
a427621f | 8295 | "operands[1] = adjust_address (operands[1], DFmode, 8);") |
ef719a44 | 8296 | |
ba94c7af UB |
8297 | (define_insn "*vec_extractv2df_1_sse" |
8298 | [(set (match_operand:DF 0 "nonimmediate_operand" "=m,x,x") | |
8299 | (vec_select:DF | |
8300 | (match_operand:V2DF 1 "nonimmediate_operand" "x,x,o") | |
8301 | (parallel [(const_int 1)])))] | |
8302 | "!TARGET_SSE2 && TARGET_SSE | |
8303 | && !(MEM_P (operands[0]) && MEM_P (operands[1]))" | |
8304 | "@ | |
eabb5f48 | 8305 | movhps\t{%1, %0|%q0, %1} |
ba94c7af UB |
8306 | movhlps\t{%1, %0|%0, %1} |
8307 | movlps\t{%H1, %0|%0, %H1}" | |
8308 | [(set_attr "type" "ssemov") | |
f220a4f4 | 8309 | (set_attr "ssememalign" "64") |
ba94c7af UB |
8310 | (set_attr "mode" "V2SF,V4SF,V2SF")]) |
8311 | ||
a3d4a22b UB |
8312 | ;; Avoid combining registers from different units in a single alternative, |
8313 | ;; see comment above inline_secondary_memory_needed function in i386.c | |
ef719a44 | 8314 | (define_insn "sse2_storelpd" |
a3d4a22b | 8315 | [(set (match_operand:DF 0 "nonimmediate_operand" "=m,x,x,*f,r") |
ef719a44 | 8316 | (vec_select:DF |
a3d4a22b | 8317 | (match_operand:V2DF 1 "nonimmediate_operand" " x,x,m,m,m") |
ef719a44 RH |
8318 | (parallel [(const_int 0)])))] |
8319 | "TARGET_SSE2 && !(MEM_P (operands[0]) && MEM_P (operands[1]))" | |
8320 | "@ | |
95879c72 | 8321 | %vmovlpd\t{%1, %0|%0, %1} |
ef719a44 | 8322 | # |
a3d4a22b UB |
8323 | # |
8324 | # | |
ef719a44 | 8325 | #" |
a3d4a22b | 8326 | [(set_attr "type" "ssemov,ssemov,ssemov,fmov,imov") |
725fd454 | 8327 | (set_attr "prefix_data16" "1,*,*,*,*") |
95879c72 | 8328 | (set_attr "prefix" "maybe_vex") |
a3d4a22b | 8329 | (set_attr "mode" "V1DF,DF,DF,DF,DF")]) |
ef719a44 RH |
8330 | |
8331 | (define_split | |
82e86dc6 | 8332 | [(set (match_operand:DF 0 "register_operand") |
ef719a44 | 8333 | (vec_select:DF |
82e86dc6 | 8334 | (match_operand:V2DF 1 "nonimmediate_operand") |
ef719a44 RH |
8335 | (parallel [(const_int 0)])))] |
8336 | "TARGET_SSE2 && reload_completed" | |
36c4015b | 8337 | [(set (match_dup 0) (match_dup 1))] |
ef719a44 | 8338 | { |
36c4015b UB |
8339 | if (REG_P (operands[1])) |
8340 | operands[1] = gen_rtx_REG (DFmode, REGNO (operands[1])); | |
eb701deb | 8341 | else |
36c4015b | 8342 | operands[1] = adjust_address (operands[1], DFmode, 0); |
ef719a44 RH |
8343 | }) |
8344 | ||
ba94c7af UB |
8345 | (define_insn "*vec_extractv2df_0_sse" |
8346 | [(set (match_operand:DF 0 "nonimmediate_operand" "=m,x,x") | |
8347 | (vec_select:DF | |
8348 | (match_operand:V2DF 1 "nonimmediate_operand" "x,x,m") | |
8349 | (parallel [(const_int 0)])))] | |
8350 | "!TARGET_SSE2 && TARGET_SSE | |
8351 | && !(MEM_P (operands[0]) && MEM_P (operands[1]))" | |
8352 | "@ | |
8353 | movlps\t{%1, %0|%0, %1} | |
8354 | movaps\t{%1, %0|%0, %1} | |
eabb5f48 | 8355 | movlps\t{%1, %0|%0, %q1}" |
ba94c7af UB |
8356 | [(set_attr "type" "ssemov") |
8357 | (set_attr "mode" "V2SF,V4SF,V2SF")]) | |
8358 | ||
3a3f9d87 | 8359 | (define_expand "sse2_loadhpd_exp" |
82e86dc6 | 8360 | [(set (match_operand:V2DF 0 "nonimmediate_operand") |
ffbaf337 UB |
8361 | (vec_concat:V2DF |
8362 | (vec_select:DF | |
82e86dc6 | 8363 | (match_operand:V2DF 1 "nonimmediate_operand") |
ffbaf337 | 8364 | (parallel [(const_int 0)])) |
82e86dc6 | 8365 | (match_operand:DF 2 "nonimmediate_operand")))] |
ffbaf337 | 8366 | "TARGET_SSE2" |
f17aa4ad UB |
8367 | { |
8368 | rtx dst = ix86_fixup_binary_operands (UNKNOWN, V2DFmode, operands); | |
6cf9eb27 | 8369 | |
f17aa4ad UB |
8370 | emit_insn (gen_sse2_loadhpd (dst, operands[1], operands[2])); |
8371 | ||
8372 | /* Fix up the destination if needed. */ | |
8373 | if (dst != operands[0]) | |
8374 | emit_move_insn (operands[0], dst); | |
8375 | ||
8376 | DONE; | |
8377 | }) | |
ffbaf337 | 8378 | |
a3d4a22b UB |
8379 | ;; Avoid combining registers from different units in a single alternative, |
8380 | ;; see comment above inline_secondary_memory_needed function in i386.c | |
3a3f9d87 | 8381 | (define_insn "sse2_loadhpd" |
3729983c | 8382 | [(set (match_operand:V2DF 0 "nonimmediate_operand" |
df1f7315 | 8383 | "=x,x,x,x,o,o ,o") |
ef719a44 RH |
8384 | (vec_concat:V2DF |
8385 | (vec_select:DF | |
3729983c | 8386 | (match_operand:V2DF 1 "nonimmediate_operand" |
df1f7315 | 8387 | " 0,x,0,x,0,0 ,0") |
ef719a44 | 8388 | (parallel [(const_int 0)])) |
3729983c | 8389 | (match_operand:DF 2 "nonimmediate_operand" |
df1f7315 | 8390 | " m,m,x,x,x,*f,r")))] |
2fe4dc01 | 8391 | "TARGET_SSE2 && !(MEM_P (operands[1]) && MEM_P (operands[2]))" |
ef719a44 RH |
8392 | "@ |
8393 | movhpd\t{%2, %0|%0, %2} | |
3729983c | 8394 | vmovhpd\t{%2, %1, %0|%0, %1, %2} |
ef719a44 | 8395 | unpcklpd\t{%2, %0|%0, %2} |
3729983c | 8396 | vunpcklpd\t{%2, %1, %0|%0, %1, %2} |
a3d4a22b UB |
8397 | # |
8398 | # | |
ef719a44 | 8399 | #" |
ba94c7af | 8400 | [(set_attr "isa" "noavx,avx,noavx,avx,*,*,*") |
df1f7315 | 8401 | (set_attr "type" "ssemov,ssemov,sselog,sselog,ssemov,fmov,imov") |
f220a4f4 | 8402 | (set_attr "ssememalign" "64") |
df1f7315 UB |
8403 | (set_attr "prefix_data16" "1,*,*,*,*,*,*") |
8404 | (set_attr "prefix" "orig,vex,orig,vex,*,*,*") | |
8405 | (set_attr "mode" "V1DF,V1DF,V2DF,V2DF,DF,DF,DF")]) | |
ef719a44 RH |
8406 | |
8407 | (define_split | |
82e86dc6 | 8408 | [(set (match_operand:V2DF 0 "memory_operand") |
ef719a44 RH |
8409 | (vec_concat:V2DF |
8410 | (vec_select:DF (match_dup 0) (parallel [(const_int 0)])) | |
82e86dc6 | 8411 | (match_operand:DF 1 "register_operand")))] |
ef719a44 RH |
8412 | "TARGET_SSE2 && reload_completed" |
8413 | [(set (match_dup 0) (match_dup 1))] | |
a427621f | 8414 | "operands[0] = adjust_address (operands[0], DFmode, 8);") |
ef719a44 | 8415 | |
3a3f9d87 | 8416 | (define_expand "sse2_loadlpd_exp" |
82e86dc6 | 8417 | [(set (match_operand:V2DF 0 "nonimmediate_operand") |
ffbaf337 | 8418 | (vec_concat:V2DF |
82e86dc6 | 8419 | (match_operand:DF 2 "nonimmediate_operand") |
ffbaf337 | 8420 | (vec_select:DF |
82e86dc6 | 8421 | (match_operand:V2DF 1 "nonimmediate_operand") |
ffbaf337 UB |
8422 | (parallel [(const_int 1)]))))] |
8423 | "TARGET_SSE2" | |
f17aa4ad UB |
8424 | { |
8425 | rtx dst = ix86_fixup_binary_operands (UNKNOWN, V2DFmode, operands); | |
6cf9eb27 | 8426 | |
f17aa4ad UB |
8427 | emit_insn (gen_sse2_loadlpd (dst, operands[1], operands[2])); |
8428 | ||
8429 | /* Fix up the destination if needed. */ | |
8430 | if (dst != operands[0]) | |
8431 | emit_move_insn (operands[0], dst); | |
8432 | ||
8433 | DONE; | |
8434 | }) | |
ffbaf337 | 8435 | |
a3d4a22b UB |
8436 | ;; Avoid combining registers from different units in a single alternative, |
8437 | ;; see comment above inline_secondary_memory_needed function in i386.c | |
3a3f9d87 | 8438 | (define_insn "sse2_loadlpd" |
3729983c UB |
8439 | [(set (match_operand:V2DF 0 "nonimmediate_operand" |
8440 | "=x,x,x,x,x,x,x,x,m,m ,m") | |
ef719a44 | 8441 | (vec_concat:V2DF |
3729983c UB |
8442 | (match_operand:DF 2 "nonimmediate_operand" |
8443 | " m,m,m,x,x,0,0,x,x,*f,r") | |
ef719a44 | 8444 | (vec_select:DF |
3729983c UB |
8445 | (match_operand:V2DF 1 "vector_move_operand" |
8446 | " C,0,x,0,x,x,o,o,0,0 ,0") | |
ef719a44 RH |
8447 | (parallel [(const_int 1)]))))] |
8448 | "TARGET_SSE2 && !(MEM_P (operands[1]) && MEM_P (operands[2]))" | |
8449 | "@ | |
3729983c | 8450 | %vmovsd\t{%2, %0|%0, %2} |
ef719a44 | 8451 | movlpd\t{%2, %0|%0, %2} |
3729983c | 8452 | vmovlpd\t{%2, %1, %0|%0, %1, %2} |
ef719a44 | 8453 | movsd\t{%2, %0|%0, %2} |
3729983c | 8454 | vmovsd\t{%2, %1, %0|%0, %1, %2} |
401e4fea | 8455 | shufpd\t{$2, %1, %0|%0, %1, 2} |
ef719a44 | 8456 | movhpd\t{%H1, %0|%0, %H1} |
3729983c | 8457 | vmovhpd\t{%H1, %2, %0|%0, %2, %H1} |
a3d4a22b UB |
8458 | # |
8459 | # | |
ef719a44 | 8460 | #" |
ba94c7af UB |
8461 | [(set_attr "isa" "*,noavx,avx,noavx,avx,noavx,noavx,avx,*,*,*") |
8462 | (set (attr "type") | |
8463 | (cond [(eq_attr "alternative" "5") | |
8464 | (const_string "sselog") | |
8465 | (eq_attr "alternative" "9") | |
8466 | (const_string "fmov") | |
8467 | (eq_attr "alternative" "10") | |
8468 | (const_string "imov") | |
8469 | ] | |
8470 | (const_string "ssemov"))) | |
f220a4f4 | 8471 | (set_attr "ssememalign" "64") |
3729983c UB |
8472 | (set_attr "prefix_data16" "*,1,*,*,*,*,1,*,*,*,*") |
8473 | (set_attr "length_immediate" "*,*,*,*,*,1,*,*,*,*,*") | |
8474 | (set_attr "prefix" "maybe_vex,orig,vex,orig,vex,orig,orig,vex,*,*,*") | |
8475 | (set_attr "mode" "DF,V1DF,V1DF,V1DF,V1DF,V2DF,V1DF,V1DF,DF,DF,DF")]) | |
ef719a44 RH |
8476 | |
8477 | (define_split | |
82e86dc6 | 8478 | [(set (match_operand:V2DF 0 "memory_operand") |
ef719a44 | 8479 | (vec_concat:V2DF |
82e86dc6 | 8480 | (match_operand:DF 1 "register_operand") |
ef719a44 RH |
8481 | (vec_select:DF (match_dup 0) (parallel [(const_int 1)]))))] |
8482 | "TARGET_SSE2 && reload_completed" | |
8483 | [(set (match_dup 0) (match_dup 1))] | |
be47bf24 | 8484 | "operands[0] = adjust_address (operands[0], DFmode, 0);") |
ef719a44 | 8485 | |
ef719a44 | 8486 | (define_insn "sse2_movsd" |
3729983c | 8487 | [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,x,x,m,x,x,x,o") |
ef719a44 | 8488 | (vec_merge:V2DF |
3729983c UB |
8489 | (match_operand:V2DF 2 "nonimmediate_operand" " x,x,m,m,x,0,0,x,0") |
8490 | (match_operand:V2DF 1 "nonimmediate_operand" " 0,x,0,x,0,x,o,o,x") | |
ef719a44 RH |
8491 | (const_int 1)))] |
8492 | "TARGET_SSE2" | |
8493 | "@ | |
8494 | movsd\t{%2, %0|%0, %2} | |
3729983c | 8495 | vmovsd\t{%2, %1, %0|%0, %1, %2} |
eabb5f48 UB |
8496 | movlpd\t{%2, %0|%0, %q2} |
8497 | vmovlpd\t{%2, %1, %0|%0, %1, %q2} | |
8498 | %vmovlpd\t{%2, %0|%q0, %2} | |
401e4fea | 8499 | shufpd\t{$2, %1, %0|%0, %1, 2} |
f013cadc | 8500 | movhps\t{%H1, %0|%0, %H1} |
3729983c UB |
8501 | vmovhps\t{%H1, %2, %0|%0, %2, %H1} |
8502 | %vmovhps\t{%1, %H0|%H0, %1}" | |
ba94c7af UB |
8503 | [(set_attr "isa" "noavx,avx,noavx,avx,*,noavx,noavx,avx,*") |
8504 | (set (attr "type") | |
8505 | (if_then_else | |
8506 | (eq_attr "alternative" "5") | |
8507 | (const_string "sselog") | |
8508 | (const_string "ssemov"))) | |
3729983c UB |
8509 | (set (attr "prefix_data16") |
8510 | (if_then_else | |
8511 | (and (eq_attr "alternative" "2,4") | |
67b2c493 | 8512 | (not (match_test "TARGET_AVX"))) |
3729983c UB |
8513 | (const_string "1") |
8514 | (const_string "*"))) | |
8515 | (set_attr "length_immediate" "*,*,*,*,*,1,*,*,*") | |
f220a4f4 | 8516 | (set_attr "ssememalign" "64") |
3729983c UB |
8517 | (set_attr "prefix" "orig,vex,orig,vex,maybe_vex,orig,orig,vex,maybe_vex") |
8518 | (set_attr "mode" "DF,DF,V1DF,V1DF,V1DF,V2DF,V1DF,V1DF,V1DF")]) | |
ef719a44 | 8519 | |
6eacd27c AI |
8520 | (define_insn "vec_dupv2df<mask_name>" |
8521 | [(set (match_operand:V2DF 0 "register_operand" "=x,v") | |
9ee65b55 | 8522 | (vec_duplicate:V2DF |
6eacd27c AI |
8523 | (match_operand:DF 1 "nonimmediate_operand" " 0,vm")))] |
8524 | "TARGET_SSE2 && <mask_avx512vl_condition>" | |
da957891 UB |
8525 | "@ |
8526 | unpcklpd\t%0, %0 | |
6eacd27c | 8527 | %vmovddup\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}" |
da957891 UB |
8528 | [(set_attr "isa" "noavx,sse3") |
8529 | (set_attr "type" "sselog1") | |
8530 | (set_attr "prefix" "orig,maybe_vex") | |
7b0fe4f4 | 8531 | (set_attr "mode" "V2DF,DF")]) |
eb701deb | 8532 | |
eb701deb | 8533 | (define_insn "*vec_concatv2df" |
6eacd27c | 8534 | [(set (match_operand:V2DF 0 "register_operand" "=x,v,v,x,x,v,x,x") |
eb701deb | 8535 | (vec_concat:V2DF |
6eacd27c AI |
8536 | (match_operand:DF 1 "nonimmediate_operand" " 0,v,m,0,x,m,0,0") |
8537 | (match_operand:DF 2 "vector_move_operand" " x,v,1,m,m,C,x,m")))] | |
ee768d85 UB |
8538 | "TARGET_SSE |
8539 | && (!(MEM_P (operands[1]) && MEM_P (operands[2])) | |
8540 | || (TARGET_SSE3 && rtx_equal_p (operands[1], operands[2])))" | |
eb701deb RH |
8541 | "@ |
8542 | unpcklpd\t{%2, %0|%0, %2} | |
3729983c | 8543 | vunpcklpd\t{%2, %1, %0|%0, %1, %2} |
da957891 | 8544 | %vmovddup\t{%1, %0|%0, %1} |
eb701deb | 8545 | movhpd\t{%2, %0|%0, %2} |
3729983c UB |
8546 | vmovhpd\t{%2, %1, %0|%0, %1, %2} |
8547 | %vmovsd\t{%1, %0|%0, %1} | |
eb701deb RH |
8548 | movlhps\t{%2, %0|%0, %2} |
8549 | movhps\t{%2, %0|%0, %2}" | |
da957891 | 8550 | [(set_attr "isa" "sse2_noavx,avx,sse3,sse2_noavx,avx,sse2,noavx,noavx") |
ba94c7af UB |
8551 | (set (attr "type") |
8552 | (if_then_else | |
da957891 | 8553 | (eq_attr "alternative" "0,1,2") |
ba94c7af UB |
8554 | (const_string "sselog") |
8555 | (const_string "ssemov"))) | |
da957891 UB |
8556 | (set_attr "prefix_data16" "*,*,*,1,*,*,*,*") |
8557 | (set_attr "prefix" "orig,vex,maybe_vex,orig,vex,maybe_vex,orig,orig") | |
8558 | (set_attr "mode" "V2DF,V2DF,DF,V1DF,V1DF,DF,V4SF,V2SF")]) | |
eb701deb | 8559 | |
c003c6d6 AI |
8560 | ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; |
8561 | ;; | |
8562 | ;; Parallel integer down-conversion operations | |
8563 | ;; | |
8564 | ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; | |
8565 | ||
4a90ee35 | 8566 | (define_mode_iterator PMOV_DST_MODE_1 [V16QI V16HI V8SI V8HI]) |
c003c6d6 AI |
8567 | (define_mode_attr pmov_src_mode |
8568 | [(V16QI "V16SI") (V16HI "V16SI") (V8SI "V8DI") (V8HI "V8DI")]) | |
8569 | (define_mode_attr pmov_src_lower | |
8570 | [(V16QI "v16si") (V16HI "v16si") (V8SI "v8di") (V8HI "v8di")]) | |
4a90ee35 | 8571 | (define_mode_attr pmov_suff_1 |
c003c6d6 AI |
8572 | [(V16QI "db") (V16HI "dw") (V8SI "qd") (V8HI "qw")]) |
8573 | ||
8574 | (define_insn "*avx512f_<code><pmov_src_lower><mode>2" | |
4a90ee35 AI |
8575 | [(set (match_operand:PMOV_DST_MODE_1 0 "nonimmediate_operand" "=v,m") |
8576 | (any_truncate:PMOV_DST_MODE_1 | |
c003c6d6 AI |
8577 | (match_operand:<pmov_src_mode> 1 "register_operand" "v,v")))] |
8578 | "TARGET_AVX512F" | |
4a90ee35 | 8579 | "vpmov<trunsuffix><pmov_suff_1>\t{%1, %0|%0, %1}" |
c003c6d6 AI |
8580 | [(set_attr "type" "ssemov") |
8581 | (set_attr "memory" "none,store") | |
8582 | (set_attr "prefix" "evex") | |
8583 | (set_attr "mode" "<sseinsnmode>")]) | |
8584 | ||
47490470 | 8585 | (define_insn "avx512f_<code><pmov_src_lower><mode>2_mask" |
4a90ee35 AI |
8586 | [(set (match_operand:PMOV_DST_MODE_1 0 "nonimmediate_operand" "=v,m") |
8587 | (vec_merge:PMOV_DST_MODE_1 | |
8588 | (any_truncate:PMOV_DST_MODE_1 | |
47490470 | 8589 | (match_operand:<pmov_src_mode> 1 "register_operand" "v,v")) |
4a90ee35 | 8590 | (match_operand:PMOV_DST_MODE_1 2 "vector_move_operand" "0C,0") |
be792bce | 8591 | (match_operand:<avx512fmaskmode> 3 "register_operand" "Yk,Yk")))] |
47490470 | 8592 | "TARGET_AVX512F" |
4a90ee35 | 8593 | "vpmov<trunsuffix><pmov_suff_1>\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}" |
47490470 AI |
8594 | [(set_attr "type" "ssemov") |
8595 | (set_attr "memory" "none,store") | |
8596 | (set_attr "prefix" "evex") | |
8597 | (set_attr "mode" "<sseinsnmode>")]) | |
8598 | ||
d256b866 | 8599 | (define_expand "avx512f_<code><pmov_src_lower><mode>2_mask_store" |
4a90ee35 AI |
8600 | [(set (match_operand:PMOV_DST_MODE_1 0 "memory_operand") |
8601 | (vec_merge:PMOV_DST_MODE_1 | |
8602 | (any_truncate:PMOV_DST_MODE_1 | |
d256b866 IT |
8603 | (match_operand:<pmov_src_mode> 1 "register_operand")) |
8604 | (match_dup 0) | |
8605 | (match_operand:<avx512fmaskmode> 2 "register_operand")))] | |
8606 | "TARGET_AVX512F") | |
8607 | ||
4a90ee35 AI |
8608 | (define_insn "*avx512bw_<code>v32hiv32qi2" |
8609 | [(set (match_operand:V32QI 0 "nonimmediate_operand" "=v,m") | |
8610 | (any_truncate:V32QI | |
8611 | (match_operand:V32HI 1 "register_operand" "v,v")))] | |
8612 | "TARGET_AVX512BW" | |
8613 | "vpmov<trunsuffix>wb\t{%1, %0|%0, %1}" | |
8614 | [(set_attr "type" "ssemov") | |
8615 | (set_attr "memory" "none,store") | |
8616 | (set_attr "prefix" "evex") | |
8617 | (set_attr "mode" "XI")]) | |
8618 | ||
8619 | (define_insn "avx512bw_<code>v32hiv32qi2_mask" | |
8620 | [(set (match_operand:V32QI 0 "nonimmediate_operand" "=v,m") | |
8621 | (vec_merge:V32QI | |
8622 | (any_truncate:V32QI | |
8623 | (match_operand:V32HI 1 "register_operand" "v,v")) | |
8624 | (match_operand:V32QI 2 "vector_move_operand" "0C,0") | |
8625 | (match_operand:SI 3 "register_operand" "Yk,Yk")))] | |
8626 | "TARGET_AVX512BW" | |
8627 | "vpmov<trunsuffix>wb\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}" | |
8628 | [(set_attr "type" "ssemov") | |
8629 | (set_attr "memory" "none,store") | |
8630 | (set_attr "prefix" "evex") | |
8631 | (set_attr "mode" "XI")]) | |
8632 | ||
8633 | (define_expand "avx512bw_<code>v32hiv32qi2_mask_store" | |
8634 | [(set (match_operand:V32QI 0 "nonimmediate_operand") | |
8635 | (vec_merge:V32QI | |
8636 | (any_truncate:V32QI | |
8637 | (match_operand:V32HI 1 "register_operand")) | |
8638 | (match_dup 0) | |
8639 | (match_operand:SI 2 "register_operand")))] | |
8640 | "TARGET_AVX512BW") | |
8641 | ||
8642 | (define_mode_iterator PMOV_DST_MODE_2 | |
8643 | [V4SI V8HI (V16QI "TARGET_AVX512BW")]) | |
8644 | (define_mode_attr pmov_suff_2 | |
8645 | [(V16QI "wb") (V8HI "dw") (V4SI "qd")]) | |
8646 | ||
8647 | (define_insn "*avx512vl_<code><ssedoublemodelower><mode>2" | |
8648 | [(set (match_operand:PMOV_DST_MODE_2 0 "nonimmediate_operand" "=v,m") | |
8649 | (any_truncate:PMOV_DST_MODE_2 | |
8650 | (match_operand:<ssedoublemode> 1 "register_operand" "v,v")))] | |
8651 | "TARGET_AVX512VL" | |
8652 | "vpmov<trunsuffix><pmov_suff_2>\t{%1, %0|%0, %1}" | |
8653 | [(set_attr "type" "ssemov") | |
8654 | (set_attr "memory" "none,store") | |
8655 | (set_attr "prefix" "evex") | |
8656 | (set_attr "mode" "<sseinsnmode>")]) | |
8657 | ||
8658 | (define_insn "<avx512>_<code><ssedoublemodelower><mode>2_mask" | |
8659 | [(set (match_operand:PMOV_DST_MODE_2 0 "nonimmediate_operand" "=v,m") | |
8660 | (vec_merge:PMOV_DST_MODE_2 | |
8661 | (any_truncate:PMOV_DST_MODE_2 | |
8662 | (match_operand:<ssedoublemode> 1 "register_operand" "v,v")) | |
8663 | (match_operand:PMOV_DST_MODE_2 2 "vector_move_operand" "0C,0") | |
8664 | (match_operand:<avx512fmaskmode> 3 "register_operand" "Yk,Yk")))] | |
8665 | "TARGET_AVX512VL" | |
8666 | "vpmov<trunsuffix><pmov_suff_2>\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}" | |
8667 | [(set_attr "type" "ssemov") | |
8668 | (set_attr "memory" "none,store") | |
8669 | (set_attr "prefix" "evex") | |
8670 | (set_attr "mode" "<sseinsnmode>")]) | |
8671 | ||
8672 | (define_expand "<avx512>_<code><ssedoublemodelower><mode>2_mask_store" | |
8673 | [(set (match_operand:PMOV_DST_MODE_2 0 "nonimmediate_operand") | |
8674 | (vec_merge:PMOV_DST_MODE_2 | |
8675 | (any_truncate:PMOV_DST_MODE_2 | |
8676 | (match_operand:<ssedoublemode> 1 "register_operand")) | |
8677 | (match_dup 0) | |
8678 | (match_operand:<avx512fmaskmode> 2 "register_operand")))] | |
8679 | "TARGET_AVX512VL") | |
8680 | ||
8681 | (define_mode_iterator PMOV_SRC_MODE_3 [V4DI V2DI V8SI V4SI (V8HI "TARGET_AVX512BW")]) | |
8682 | (define_mode_attr pmov_dst_3 | |
8683 | [(V4DI "V4QI") (V2DI "V2QI") (V8SI "V8QI") (V4SI "V4QI") (V8HI "V8QI")]) | |
8684 | (define_mode_attr pmov_dst_zeroed_3 | |
8685 | [(V4DI "V12QI") (V2DI "V14QI") (V8SI "V8QI") (V4SI "V12QI") (V8HI "V8QI")]) | |
8686 | (define_mode_attr pmov_suff_3 | |
8687 | [(V4DI "qb") (V2DI "qb") (V8SI "db") (V4SI "db") (V8HI "wb")]) | |
8688 | ||
8689 | (define_insn "*avx512vl_<code><mode>v<ssescalarnum>qi2" | |
8690 | [(set (match_operand:V16QI 0 "register_operand" "=v") | |
8691 | (vec_concat:V16QI | |
8692 | (any_truncate:<pmov_dst_3> | |
8693 | (match_operand:PMOV_SRC_MODE_3 1 "register_operand" "v")) | |
8694 | (match_operand:<pmov_dst_zeroed_3> 2 "const0_operand")))] | |
8695 | "TARGET_AVX512VL" | |
8696 | "vpmov<trunsuffix><pmov_suff_3>\t{%1, %0|%0, %1}" | |
8697 | [(set_attr "type" "ssemov") | |
8698 | (set_attr "prefix" "evex") | |
8699 | (set_attr "mode" "TI")]) | |
8700 | ||
8701 | (define_insn "*avx512vl_<code>v2div2qi2_store" | |
8702 | [(set (match_operand:V16QI 0 "memory_operand" "=m") | |
8703 | (vec_concat:V16QI | |
8704 | (any_truncate:V2QI | |
8705 | (match_operand:V2DI 1 "register_operand" "v")) | |
8706 | (vec_select:V14QI | |
8707 | (match_dup 0) | |
8708 | (parallel [(const_int 2) (const_int 3) | |
8709 | (const_int 4) (const_int 5) | |
8710 | (const_int 6) (const_int 7) | |
8711 | (const_int 8) (const_int 9) | |
8712 | (const_int 10) (const_int 11) | |
8713 | (const_int 12) (const_int 13) | |
8714 | (const_int 14) (const_int 15)]))))] | |
8715 | "TARGET_AVX512VL" | |
8716 | "vpmov<trunsuffix>qb\t{%1, %0|%0, %1}" | |
8717 | [(set_attr "type" "ssemov") | |
8718 | (set_attr "memory" "store") | |
8719 | (set_attr "prefix" "evex") | |
8720 | (set_attr "mode" "TI")]) | |
8721 | ||
8722 | (define_insn "avx512vl_<code>v2div2qi2_mask" | |
8723 | [(set (match_operand:V16QI 0 "register_operand" "=v") | |
8724 | (vec_concat:V16QI | |
8725 | (vec_merge:V2QI | |
8726 | (any_truncate:V2QI | |
8727 | (match_operand:V2DI 1 "register_operand" "v")) | |
8728 | (vec_select:V2QI | |
8729 | (match_operand:V16QI 2 "vector_move_operand" "0C") | |
8730 | (parallel [(const_int 0) (const_int 1)])) | |
8731 | (match_operand:QI 3 "register_operand" "Yk")) | |
8732 | (const_vector:V14QI [(const_int 0) (const_int 0) | |
8733 | (const_int 0) (const_int 0) | |
8734 | (const_int 0) (const_int 0) | |
8735 | (const_int 0) (const_int 0) | |
8736 | (const_int 0) (const_int 0) | |
8737 | (const_int 0) (const_int 0) | |
8738 | (const_int 0) (const_int 0)])))] | |
8739 | "TARGET_AVX512VL" | |
8740 | "vpmov<trunsuffix>qb\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}" | |
8741 | [(set_attr "type" "ssemov") | |
8742 | (set_attr "prefix" "evex") | |
8743 | (set_attr "mode" "TI")]) | |
8744 | ||
8745 | (define_insn "avx512vl_<code>v2div2qi2_mask_store" | |
8746 | [(set (match_operand:V16QI 0 "memory_operand" "=m") | |
8747 | (vec_concat:V16QI | |
8748 | (vec_merge:V2QI | |
8749 | (any_truncate:V2QI | |
8750 | (match_operand:V2DI 1 "register_operand" "v")) | |
8751 | (vec_select:V2QI | |
8752 | (match_dup 0) | |
8753 | (parallel [(const_int 0) (const_int 1)])) | |
8754 | (match_operand:QI 2 "register_operand" "Yk")) | |
8755 | (vec_select:V14QI | |
8756 | (match_dup 0) | |
8757 | (parallel [(const_int 2) (const_int 3) | |
8758 | (const_int 4) (const_int 5) | |
8759 | (const_int 6) (const_int 7) | |
8760 | (const_int 8) (const_int 9) | |
8761 | (const_int 10) (const_int 11) | |
8762 | (const_int 12) (const_int 13) | |
8763 | (const_int 14) (const_int 15)]))))] | |
8764 | "TARGET_AVX512VL" | |
8765 | "vpmov<trunsuffix>qb\t{%1, %0%{%2%}|%0%{%2%}, %1}" | |
8766 | [(set_attr "type" "ssemov") | |
8767 | (set_attr "memory" "store") | |
8768 | (set_attr "prefix" "evex") | |
8769 | (set_attr "mode" "TI")]) | |
8770 | ||
8771 | (define_insn "*avx512vl_<code><mode>v4qi2_store" | |
8772 | [(set (match_operand:V16QI 0 "memory_operand" "=m") | |
8773 | (vec_concat:V16QI | |
8774 | (any_truncate:V4QI | |
8775 | (match_operand:VI4_128_8_256 1 "register_operand" "v")) | |
8776 | (vec_select:V12QI | |
8777 | (match_dup 0) | |
8778 | (parallel [(const_int 4) (const_int 5) | |
8779 | (const_int 6) (const_int 7) | |
8780 | (const_int 8) (const_int 9) | |
8781 | (const_int 10) (const_int 11) | |
8782 | (const_int 12) (const_int 13) | |
8783 | (const_int 14) (const_int 15)]))))] | |
8784 | "TARGET_AVX512VL" | |
8785 | "vpmov<trunsuffix><pmov_suff_3>\t{%1, %0|%0, %1}" | |
8786 | [(set_attr "type" "ssemov") | |
8787 | (set_attr "memory" "store") | |
8788 | (set_attr "prefix" "evex") | |
8789 | (set_attr "mode" "TI")]) | |
8790 | ||
8791 | (define_insn "avx512vl_<code><mode>v4qi2_mask" | |
8792 | [(set (match_operand:V16QI 0 "register_operand" "=v") | |
8793 | (vec_concat:V16QI | |
8794 | (vec_merge:V4QI | |
8795 | (any_truncate:V4QI | |
8796 | (match_operand:VI4_128_8_256 1 "register_operand" "v")) | |
8797 | (vec_select:V4QI | |
8798 | (match_operand:V16QI 2 "vector_move_operand" "0C") | |
8799 | (parallel [(const_int 0) (const_int 1) | |
8800 | (const_int 2) (const_int 3)])) | |
8801 | (match_operand:QI 3 "register_operand" "Yk")) | |
8802 | (const_vector:V12QI [(const_int 0) (const_int 0) | |
8803 | (const_int 0) (const_int 0) | |
8804 | (const_int 0) (const_int 0) | |
8805 | (const_int 0) (const_int 0) | |
8806 | (const_int 0) (const_int 0) | |
8807 | (const_int 0) (const_int 0)])))] | |
8808 | "TARGET_AVX512VL" | |
8809 | "vpmov<trunsuffix><pmov_suff_3>\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}" | |
8810 | [(set_attr "type" "ssemov") | |
8811 | (set_attr "prefix" "evex") | |
8812 | (set_attr "mode" "TI")]) | |
8813 | ||
8814 | (define_insn "avx512vl_<code><mode>v4qi2_mask_store" | |
8815 | [(set (match_operand:V16QI 0 "memory_operand" "=m") | |
8816 | (vec_concat:V16QI | |
8817 | (vec_merge:V4QI | |
8818 | (any_truncate:V4QI | |
8819 | (match_operand:VI4_128_8_256 1 "register_operand" "v")) | |
8820 | (vec_select:V4QI | |
8821 | (match_dup 0) | |
8822 | (parallel [(const_int 0) (const_int 1) | |
8823 | (const_int 2) (const_int 3)])) | |
8824 | (match_operand:QI 2 "register_operand" "Yk")) | |
8825 | (vec_select:V12QI | |
8826 | (match_dup 0) | |
8827 | (parallel [(const_int 4) (const_int 5) | |
8828 | (const_int 6) (const_int 7) | |
8829 | (const_int 8) (const_int 9) | |
8830 | (const_int 10) (const_int 11) | |
8831 | (const_int 12) (const_int 13) | |
8832 | (const_int 14) (const_int 15)]))))] | |
8833 | "TARGET_AVX512VL" | |
8834 | "vpmov<trunsuffix><pmov_suff_3>\t{%1, %0%{%2%}|%0%{%2%}, %1}" | |
8835 | [(set_attr "type" "ssemov") | |
8836 | (set_attr "memory" "store") | |
8837 | (set_attr "prefix" "evex") | |
8838 | (set_attr "mode" "TI")]) | |
8839 | ||
8840 | (define_mode_iterator VI2_128_BW_4_256 | |
8841 | [(V8HI "TARGET_AVX512BW") V8SI]) | |
8842 | ||
8843 | (define_insn "*avx512vl_<code><mode>v8qi2_store" | |
8844 | [(set (match_operand:V16QI 0 "memory_operand" "=m") | |
8845 | (vec_concat:V16QI | |
8846 | (any_truncate:V8QI | |
8847 | (match_operand:VI2_128_BW_4_256 1 "register_operand" "v")) | |
8848 | (vec_select:V8QI | |
8849 | (match_dup 0) | |
8850 | (parallel [(const_int 8) (const_int 9) | |
8851 | (const_int 10) (const_int 11) | |
8852 | (const_int 12) (const_int 13) | |
8853 | (const_int 14) (const_int 15)]))))] | |
8854 | "TARGET_AVX512VL" | |
8855 | "vpmov<trunsuffix><pmov_suff_3>\t{%1, %0|%0, %1}" | |
8856 | [(set_attr "type" "ssemov") | |
8857 | (set_attr "memory" "store") | |
8858 | (set_attr "prefix" "evex") | |
8859 | (set_attr "mode" "TI")]) | |
8860 | ||
8861 | (define_insn "avx512vl_<code><mode>v8qi2_mask" | |
8862 | [(set (match_operand:V16QI 0 "register_operand" "=v") | |
8863 | (vec_concat:V16QI | |
8864 | (vec_merge:V8QI | |
8865 | (any_truncate:V8QI | |
8866 | (match_operand:VI2_128_BW_4_256 1 "register_operand" "v")) | |
8867 | (vec_select:V8QI | |
8868 | (match_operand:V16QI 2 "vector_move_operand" "0C") | |
8869 | (parallel [(const_int 0) (const_int 1) | |
8870 | (const_int 2) (const_int 3) | |
8871 | (const_int 4) (const_int 5) | |
8872 | (const_int 6) (const_int 7)])) | |
8873 | (match_operand:QI 3 "register_operand" "Yk")) | |
8874 | (const_vector:V8QI [(const_int 0) (const_int 0) | |
8875 | (const_int 0) (const_int 0) | |
8876 | (const_int 0) (const_int 0) | |
8877 | (const_int 0) (const_int 0)])))] | |
8878 | "TARGET_AVX512VL" | |
8879 | "vpmov<trunsuffix><pmov_suff_3>\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}" | |
8880 | [(set_attr "type" "ssemov") | |
8881 | (set_attr "prefix" "evex") | |
8882 | (set_attr "mode" "TI")]) | |
8883 | ||
8884 | (define_insn "avx512vl_<code><mode>v8qi2_mask_store" | |
8885 | [(set (match_operand:V16QI 0 "memory_operand" "=m") | |
8886 | (vec_concat:V16QI | |
8887 | (vec_merge:V8QI | |
8888 | (any_truncate:V8QI | |
8889 | (match_operand:VI2_128_BW_4_256 1 "register_operand" "v")) | |
8890 | (vec_select:V8QI | |
8891 | (match_dup 0) | |
8892 | (parallel [(const_int 0) (const_int 1) | |
8893 | (const_int 2) (const_int 3) | |
8894 | (const_int 4) (const_int 5) | |
8895 | (const_int 6) (const_int 7)])) | |
8896 | (match_operand:QI 2 "register_operand" "Yk")) | |
8897 | (vec_select:V8QI | |
8898 | (match_dup 0) | |
8899 | (parallel [(const_int 8) (const_int 9) | |
8900 | (const_int 10) (const_int 11) | |
8901 | (const_int 12) (const_int 13) | |
8902 | (const_int 14) (const_int 15)]))))] | |
8903 | "TARGET_AVX512VL" | |
8904 | "vpmov<trunsuffix><pmov_suff_3>\t{%1, %0%{%2%}|%0%{%2%}, %1}" | |
8905 | [(set_attr "type" "ssemov") | |
8906 | (set_attr "memory" "store") | |
8907 | (set_attr "prefix" "evex") | |
8908 | (set_attr "mode" "TI")]) | |
8909 | ||
8910 | (define_mode_iterator PMOV_SRC_MODE_4 [V4DI V2DI V4SI]) | |
8911 | (define_mode_attr pmov_dst_4 | |
8912 | [(V4DI "V4HI") (V2DI "V2HI") (V4SI "V4HI")]) | |
8913 | (define_mode_attr pmov_dst_zeroed_4 | |
8914 | [(V4DI "V4HI") (V2DI "V6HI") (V4SI "V4HI")]) | |
8915 | (define_mode_attr pmov_suff_4 | |
8916 | [(V4DI "qw") (V2DI "qw") (V4SI "dw")]) | |
8917 | ||
8918 | (define_insn "*avx512vl_<code><mode>v<ssescalarnum>hi2" | |
8919 | [(set (match_operand:V8HI 0 "register_operand" "=v") | |
8920 | (vec_concat:V8HI | |
8921 | (any_truncate:<pmov_dst_4> | |
8922 | (match_operand:PMOV_SRC_MODE_4 1 "register_operand" "v")) | |
8923 | (match_operand:<pmov_dst_zeroed_4> 2 "const0_operand")))] | |
8924 | "TARGET_AVX512VL" | |
8925 | "vpmov<trunsuffix><pmov_suff_4>\t{%1, %0|%0, %1}" | |
8926 | [(set_attr "type" "ssemov") | |
8927 | (set_attr "prefix" "evex") | |
8928 | (set_attr "mode" "TI")]) | |
8929 | ||
8930 | (define_insn "*avx512vl_<code><mode>v4hi2_store" | |
8931 | [(set (match_operand:V8HI 0 "memory_operand" "=m") | |
8932 | (vec_concat:V8HI | |
8933 | (any_truncate:V4HI | |
8934 | (match_operand:VI4_128_8_256 1 "register_operand" "v")) | |
8935 | (vec_select:V4HI | |
8936 | (match_dup 0) | |
8937 | (parallel [(const_int 4) (const_int 5) | |
8938 | (const_int 6) (const_int 7)]))))] | |
8939 | "TARGET_AVX512VL" | |
8940 | "vpmov<trunsuffix><pmov_suff_4>\t{%1, %0|%0, %1}" | |
8941 | [(set_attr "type" "ssemov") | |
8942 | (set_attr "memory" "store") | |
8943 | (set_attr "prefix" "evex") | |
8944 | (set_attr "mode" "TI")]) | |
8945 | ||
8946 | (define_insn "avx512vl_<code><mode>v4hi2_mask" | |
8947 | [(set (match_operand:V8HI 0 "register_operand" "=v") | |
8948 | (vec_concat:V8HI | |
8949 | (vec_merge:V4HI | |
8950 | (any_truncate:V4HI | |
8951 | (match_operand:VI4_128_8_256 1 "register_operand" "v")) | |
8952 | (vec_select:V4HI | |
8953 | (match_operand:V8HI 2 "vector_move_operand" "0C") | |
8954 | (parallel [(const_int 0) (const_int 1) | |
8955 | (const_int 2) (const_int 3)])) | |
8956 | (match_operand:QI 3 "register_operand" "Yk")) | |
8957 | (const_vector:V4HI [(const_int 0) (const_int 0) | |
8958 | (const_int 0) (const_int 0)])))] | |
8959 | "TARGET_AVX512VL" | |
8960 | "vpmov<trunsuffix><pmov_suff_4>\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}" | |
8961 | [(set_attr "type" "ssemov") | |
8962 | (set_attr "prefix" "evex") | |
8963 | (set_attr "mode" "TI")]) | |
8964 | ||
8965 | (define_insn "avx512vl_<code><mode>v4hi2_mask_store" | |
8966 | [(set (match_operand:V8HI 0 "memory_operand" "=m") | |
8967 | (vec_concat:V8HI | |
8968 | (vec_merge:V4HI | |
8969 | (any_truncate:V4HI | |
8970 | (match_operand:VI4_128_8_256 1 "register_operand" "v")) | |
8971 | (vec_select:V4HI | |
8972 | (match_dup 0) | |
8973 | (parallel [(const_int 0) (const_int 1) | |
8974 | (const_int 2) (const_int 3)])) | |
8975 | (match_operand:QI 2 "register_operand" "Yk")) | |
8976 | (vec_select:V4HI | |
8977 | (match_dup 0) | |
8978 | (parallel [(const_int 4) (const_int 5) | |
8979 | (const_int 6) (const_int 7)]))))] | |
8980 | "TARGET_AVX512VL" | |
8981 | "vpmov<trunsuffix><pmov_suff_4>\t{%1, %0%{%2%}|%0%{%2%}, %1}" | |
8982 | [(set_attr "type" "ssemov") | |
8983 | (set_attr "memory" "store") | |
8984 | (set_attr "prefix" "evex") | |
8985 | (set_attr "mode" "TI")]) | |
8986 | ||
8987 | (define_insn "*avx512vl_<code>v2div2hi2_store" | |
8988 | [(set (match_operand:V8HI 0 "memory_operand" "=m") | |
8989 | (vec_concat:V8HI | |
8990 | (any_truncate:V2HI | |
8991 | (match_operand:V2DI 1 "register_operand" "v")) | |
8992 | (vec_select:V6HI | |
8993 | (match_dup 0) | |
8994 | (parallel [(const_int 2) (const_int 3) | |
8995 | (const_int 4) (const_int 5) | |
8996 | (const_int 6) (const_int 7)]))))] | |
8997 | "TARGET_AVX512VL" | |
8998 | "vpmov<trunsuffix>qw\t{%1, %0|%0, %1}" | |
8999 | [(set_attr "type" "ssemov") | |
9000 | (set_attr "memory" "store") | |
9001 | (set_attr "prefix" "evex") | |
9002 | (set_attr "mode" "TI")]) | |
9003 | ||
9004 | (define_insn "avx512vl_<code>v2div2hi2_mask" | |
9005 | [(set (match_operand:V8HI 0 "register_operand" "=v") | |
9006 | (vec_concat:V8HI | |
9007 | (vec_merge:V2HI | |
9008 | (any_truncate:V2HI | |
9009 | (match_operand:V2DI 1 "register_operand" "v")) | |
9010 | (vec_select:V2HI | |
9011 | (match_operand:V8HI 2 "vector_move_operand" "0C") | |
9012 | (parallel [(const_int 0) (const_int 1)])) | |
9013 | (match_operand:QI 3 "register_operand" "Yk")) | |
9014 | (const_vector:V6HI [(const_int 0) (const_int 0) | |
9015 | (const_int 0) (const_int 0) | |
9016 | (const_int 0) (const_int 0)])))] | |
9017 | "TARGET_AVX512VL" | |
9018 | "vpmov<trunsuffix>qw\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}" | |
9019 | [(set_attr "type" "ssemov") | |
9020 | (set_attr "prefix" "evex") | |
9021 | (set_attr "mode" "TI")]) | |
9022 | ||
9023 | (define_insn "avx512vl_<code>v2div2hi2_mask_store" | |
9024 | [(set (match_operand:V8HI 0 "memory_operand" "=m") | |
9025 | (vec_concat:V8HI | |
9026 | (vec_merge:V2HI | |
9027 | (any_truncate:V2HI | |
9028 | (match_operand:V2DI 1 "register_operand" "v")) | |
9029 | (vec_select:V2HI | |
9030 | (match_dup 0) | |
9031 | (parallel [(const_int 0) (const_int 1)])) | |
9032 | (match_operand:QI 2 "register_operand" "Yk")) | |
9033 | (vec_select:V6HI | |
9034 | (match_dup 0) | |
9035 | (parallel [(const_int 2) (const_int 3) | |
9036 | (const_int 4) (const_int 5) | |
9037 | (const_int 6) (const_int 7)]))))] | |
9038 | "TARGET_AVX512VL" | |
9039 | "vpmov<trunsuffix>qw\t{%1, %0%{%2%}|%0%{%2%}, %1}" | |
9040 | [(set_attr "type" "ssemov") | |
9041 | (set_attr "memory" "store") | |
9042 | (set_attr "prefix" "evex") | |
9043 | (set_attr "mode" "TI")]) | |
9044 | ||
9045 | (define_insn "*avx512vl_<code>v2div2si2" | |
9046 | [(set (match_operand:V4SI 0 "register_operand" "=v") | |
9047 | (vec_concat:V4SI | |
9048 | (any_truncate:V2SI | |
9049 | (match_operand:V2DI 1 "register_operand" "v")) | |
9050 | (match_operand:V2SI 2 "const0_operand")))] | |
9051 | "TARGET_AVX512VL" | |
9052 | "vpmov<trunsuffix>qd\t{%1, %0|%0, %1}" | |
9053 | [(set_attr "type" "ssemov") | |
9054 | (set_attr "prefix" "evex") | |
9055 | (set_attr "mode" "TI")]) | |
9056 | ||
9057 | (define_insn "*avx512vl_<code>v2div2si2_store" | |
9058 | [(set (match_operand:V4SI 0 "memory_operand" "=m") | |
9059 | (vec_concat:V4SI | |
9060 | (any_truncate:V2SI | |
9061 | (match_operand:V2DI 1 "register_operand" "v")) | |
9062 | (vec_select:V2SI | |
9063 | (match_dup 0) | |
9064 | (parallel [(const_int 2) (const_int 3)]))))] | |
9065 | "TARGET_AVX512VL" | |
9066 | "vpmov<trunsuffix>qd\t{%1, %0|%0, %1}" | |
9067 | [(set_attr "type" "ssemov") | |
9068 | (set_attr "memory" "store") | |
9069 | (set_attr "prefix" "evex") | |
9070 | (set_attr "mode" "TI")]) | |
9071 | ||
9072 | (define_insn "avx512vl_<code>v2div2si2_mask" | |
9073 | [(set (match_operand:V4SI 0 "register_operand" "=v") | |
9074 | (vec_concat:V4SI | |
9075 | (vec_merge:V2SI | |
9076 | (any_truncate:V2SI | |
9077 | (match_operand:V2DI 1 "register_operand" "v")) | |
9078 | (vec_select:V2SI | |
9079 | (match_operand:V4SI 2 "vector_move_operand" "0C") | |
9080 | (parallel [(const_int 0) (const_int 1)])) | |
9081 | (match_operand:QI 3 "register_operand" "Yk")) | |
9082 | (const_vector:V2SI [(const_int 0) (const_int 0)])))] | |
9083 | "TARGET_AVX512VL" | |
9084 | "vpmov<trunsuffix>qd\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}" | |
9085 | [(set_attr "type" "ssemov") | |
9086 | (set_attr "prefix" "evex") | |
9087 | (set_attr "mode" "TI")]) | |
9088 | ||
9089 | (define_insn "avx512vl_<code>v2div2si2_mask_store" | |
9090 | [(set (match_operand:V4SI 0 "memory_operand" "=m") | |
9091 | (vec_concat:V4SI | |
9092 | (vec_merge:V2SI | |
9093 | (any_truncate:V2SI | |
9094 | (match_operand:V2DI 1 "register_operand" "v")) | |
9095 | (vec_select:V2SI | |
9096 | (match_dup 0) | |
9097 | (parallel [(const_int 0) (const_int 1)])) | |
9098 | (match_operand:QI 2 "register_operand" "Yk")) | |
9099 | (vec_select:V2SI | |
9100 | (match_dup 0) | |
9101 | (parallel [(const_int 2) (const_int 3)]))))] | |
9102 | "TARGET_AVX512VL" | |
9103 | "vpmov<trunsuffix>qd\t{%1, %0%{%2%}|%0%{%2%}, %1}" | |
9104 | [(set_attr "type" "ssemov") | |
9105 | (set_attr "memory" "store") | |
9106 | (set_attr "prefix" "evex") | |
9107 | (set_attr "mode" "TI")]) | |
9108 | ||
c003c6d6 AI |
9109 | (define_insn "*avx512f_<code>v8div16qi2" |
9110 | [(set (match_operand:V16QI 0 "register_operand" "=v") | |
9111 | (vec_concat:V16QI | |
9112 | (any_truncate:V8QI | |
9113 | (match_operand:V8DI 1 "register_operand" "v")) | |
9114 | (const_vector:V8QI [(const_int 0) (const_int 0) | |
9115 | (const_int 0) (const_int 0) | |
9116 | (const_int 0) (const_int 0) | |
9117 | (const_int 0) (const_int 0)])))] | |
9118 | "TARGET_AVX512F" | |
9119 | "vpmov<trunsuffix>qb\t{%1, %0|%0, %1}" | |
9120 | [(set_attr "type" "ssemov") | |
9121 | (set_attr "prefix" "evex") | |
9122 | (set_attr "mode" "TI")]) | |
9123 | ||
9124 | (define_insn "*avx512f_<code>v8div16qi2_store" | |
9125 | [(set (match_operand:V16QI 0 "memory_operand" "=m") | |
9126 | (vec_concat:V16QI | |
9127 | (any_truncate:V8QI | |
9128 | (match_operand:V8DI 1 "register_operand" "v")) | |
9129 | (vec_select:V8QI | |
9130 | (match_dup 0) | |
9131 | (parallel [(const_int 8) (const_int 9) | |
9132 | (const_int 10) (const_int 11) | |
9133 | (const_int 12) (const_int 13) | |
9134 | (const_int 14) (const_int 15)]))))] | |
9135 | "TARGET_AVX512F" | |
9136 | "vpmov<trunsuffix>qb\t{%1, %0|%0, %1}" | |
9137 | [(set_attr "type" "ssemov") | |
9138 | (set_attr "memory" "store") | |
9139 | (set_attr "prefix" "evex") | |
9140 | (set_attr "mode" "TI")]) | |
9141 | ||
47490470 AI |
9142 | (define_insn "avx512f_<code>v8div16qi2_mask" |
9143 | [(set (match_operand:V16QI 0 "register_operand" "=v") | |
9144 | (vec_concat:V16QI | |
9145 | (vec_merge:V8QI | |
9146 | (any_truncate:V8QI | |
9147 | (match_operand:V8DI 1 "register_operand" "v")) | |
9148 | (vec_select:V8QI | |
9149 | (match_operand:V16QI 2 "vector_move_operand" "0C") | |
9150 | (parallel [(const_int 0) (const_int 1) | |
9151 | (const_int 2) (const_int 3) | |
9152 | (const_int 4) (const_int 5) | |
9153 | (const_int 6) (const_int 7)])) | |
be792bce | 9154 | (match_operand:QI 3 "register_operand" "Yk")) |
47490470 AI |
9155 | (const_vector:V8QI [(const_int 0) (const_int 0) |
9156 | (const_int 0) (const_int 0) | |
9157 | (const_int 0) (const_int 0) | |
9158 | (const_int 0) (const_int 0)])))] | |
9159 | "TARGET_AVX512F" | |
9160 | "vpmov<trunsuffix>qb\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}" | |
9161 | [(set_attr "type" "ssemov") | |
9162 | (set_attr "prefix" "evex") | |
9163 | (set_attr "mode" "TI")]) | |
9164 | ||
d256b866 | 9165 | (define_insn "avx512f_<code>v8div16qi2_mask_store" |
47490470 AI |
9166 | [(set (match_operand:V16QI 0 "memory_operand" "=m") |
9167 | (vec_concat:V16QI | |
9168 | (vec_merge:V8QI | |
9169 | (any_truncate:V8QI | |
9170 | (match_operand:V8DI 1 "register_operand" "v")) | |
9171 | (vec_select:V8QI | |
9172 | (match_dup 0) | |
9173 | (parallel [(const_int 0) (const_int 1) | |
9174 | (const_int 2) (const_int 3) | |
9175 | (const_int 4) (const_int 5) | |
9176 | (const_int 6) (const_int 7)])) | |
be792bce | 9177 | (match_operand:QI 2 "register_operand" "Yk")) |
47490470 AI |
9178 | (vec_select:V8QI |
9179 | (match_dup 0) | |
9180 | (parallel [(const_int 8) (const_int 9) | |
9181 | (const_int 10) (const_int 11) | |
9182 | (const_int 12) (const_int 13) | |
9183 | (const_int 14) (const_int 15)]))))] | |
9184 | "TARGET_AVX512F" | |
9185 | "vpmov<trunsuffix>qb\t{%1, %0%{%2%}|%0%{%2%}, %1}" | |
9186 | [(set_attr "type" "ssemov") | |
9187 | (set_attr "memory" "store") | |
9188 | (set_attr "prefix" "evex") | |
9189 | (set_attr "mode" "TI")]) | |
9190 | ||
ef719a44 RH |
9191 | ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; |
9192 | ;; | |
9193 | ;; Parallel integral arithmetic | |
9194 | ;; | |
9195 | ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; | |
9196 | ||
9197 | (define_expand "neg<mode>2" | |
82e86dc6 | 9198 | [(set (match_operand:VI_AVX2 0 "register_operand") |
267ff156 | 9199 | (minus:VI_AVX2 |
ef719a44 | 9200 | (match_dup 2) |
82e86dc6 | 9201 | (match_operand:VI_AVX2 1 "nonimmediate_operand")))] |
ef719a44 RH |
9202 | "TARGET_SSE2" |
9203 | "operands[2] = force_reg (<MODE>mode, CONST0_RTX (<MODE>mode));") | |
9204 | ||
700e2919 | 9205 | (define_expand "<plusminus_insn><mode>3" |
82e86dc6 | 9206 | [(set (match_operand:VI_AVX2 0 "register_operand") |
1707583b | 9207 | (plusminus:VI_AVX2 |
82e86dc6 UB |
9208 | (match_operand:VI_AVX2 1 "nonimmediate_operand") |
9209 | (match_operand:VI_AVX2 2 "nonimmediate_operand")))] | |
700e2919 AI |
9210 | "TARGET_SSE2" |
9211 | "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);") | |
9212 | ||
9213 | (define_expand "<plusminus_insn><mode>3_mask" | |
9214 | [(set (match_operand:VI48_AVX512VL 0 "register_operand") | |
9215 | (vec_merge:VI48_AVX512VL | |
9216 | (plusminus:VI48_AVX512VL | |
9217 | (match_operand:VI48_AVX512VL 1 "nonimmediate_operand") | |
9218 | (match_operand:VI48_AVX512VL 2 "nonimmediate_operand")) | |
9219 | (match_operand:VI48_AVX512VL 3 "vector_move_operand") | |
9220 | (match_operand:<avx512fmaskmode> 4 "register_operand")))] | |
9221 | "TARGET_AVX512F" | |
9222 | "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);") | |
9223 | ||
9224 | (define_expand "<plusminus_insn><mode>3_mask" | |
9225 | [(set (match_operand:VI12_AVX512VL 0 "register_operand") | |
9226 | (vec_merge:VI12_AVX512VL | |
9227 | (plusminus:VI12_AVX512VL | |
9228 | (match_operand:VI12_AVX512VL 1 "nonimmediate_operand") | |
9229 | (match_operand:VI12_AVX512VL 2 "nonimmediate_operand")) | |
9230 | (match_operand:VI12_AVX512VL 3 "vector_move_operand") | |
9231 | (match_operand:<avx512fmaskmode> 4 "register_operand")))] | |
9232 | "TARGET_AVX512BW" | |
d1c3b587 | 9233 | "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);") |
ef719a44 | 9234 | |
700e2919 | 9235 | (define_insn "*<plusminus_insn><mode>3" |
3f97cb0b | 9236 | [(set (match_operand:VI_AVX2 0 "register_operand" "=x,v") |
1707583b | 9237 | (plusminus:VI_AVX2 |
3f97cb0b AI |
9238 | (match_operand:VI_AVX2 1 "nonimmediate_operand" "<comm>0,v") |
9239 | (match_operand:VI_AVX2 2 "nonimmediate_operand" "xm,vm")))] | |
700e2919 AI |
9240 | "TARGET_SSE2 |
9241 | && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)" | |
798dd0ba | 9242 | "@ |
cbb734aa | 9243 | p<plusminus_mnemonic><ssemodesuffix>\t{%2, %0|%0, %2} |
47490470 | 9244 | vp<plusminus_mnemonic><ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}" |
798dd0ba UB |
9245 | [(set_attr "isa" "noavx,avx") |
9246 | (set_attr "type" "sseiadd") | |
9247 | (set_attr "prefix_data16" "1,*") | |
47490470 | 9248 | (set_attr "prefix" "<mask_prefix3>") |
977e83a3 | 9249 | (set_attr "mode" "<sseinsnmode>")]) |
ef719a44 | 9250 | |
700e2919 AI |
9251 | (define_insn "*<plusminus_insn><mode>3_mask" |
9252 | [(set (match_operand:VI48_AVX512VL 0 "register_operand" "=v") | |
9253 | (vec_merge:VI48_AVX512VL | |
9254 | (plusminus:VI48_AVX512VL | |
9255 | (match_operand:VI48_AVX512VL 1 "nonimmediate_operand" "<comm>v") | |
9256 | (match_operand:VI48_AVX512VL 2 "nonimmediate_operand" "vm")) | |
9257 | (match_operand:VI48_AVX512VL 3 "vector_move_operand" "0C") | |
9258 | (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")))] | |
9259 | "TARGET_AVX512F | |
9260 | && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)" | |
9261 | "vp<plusminus_mnemonic><ssemodesuffix>\t{%2, %1, %0%{%4%}%N3|%0%{%4%}%N3, %1, %2}" | |
9262 | [(set_attr "type" "sseiadd") | |
9263 | (set_attr "prefix" "evex") | |
9264 | (set_attr "mode" "<sseinsnmode>")]) | |
9265 | ||
9266 | (define_insn "*<plusminus_insn><mode>3_mask" | |
9267 | [(set (match_operand:VI12_AVX512VL 0 "register_operand" "=v") | |
9268 | (vec_merge:VI12_AVX512VL | |
9269 | (plusminus:VI12_AVX512VL | |
9270 | (match_operand:VI12_AVX512VL 1 "nonimmediate_operand" "<comm>v") | |
9271 | (match_operand:VI12_AVX512VL 2 "nonimmediate_operand" "vm")) | |
9272 | (match_operand:VI12_AVX512VL 3 "vector_move_operand" "0C") | |
9273 | (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")))] | |
9274 | "TARGET_AVX512BW && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)" | |
9275 | "vp<plusminus_mnemonic><ssemodesuffix>\t{%2, %1, %0%{%4%}%N3|%0%{%4%}%N3, %1, %2}" | |
9276 | [(set_attr "type" "sseiadd") | |
9277 | (set_attr "prefix" "evex") | |
9278 | (set_attr "mode" "<sseinsnmode>")]) | |
9279 | ||
c9b17fa5 | 9280 | (define_expand "<sse2_avx2>_<plusminus_insn><mode>3<mask_name>" |
82e86dc6 | 9281 | [(set (match_operand:VI12_AVX2 0 "register_operand") |
977e83a3 | 9282 | (sat_plusminus:VI12_AVX2 |
82e86dc6 UB |
9283 | (match_operand:VI12_AVX2 1 "nonimmediate_operand") |
9284 | (match_operand:VI12_AVX2 2 "nonimmediate_operand")))] | |
c9b17fa5 | 9285 | "TARGET_SSE2 && <mask_mode512bit_condition> && <mask_avx512bw_condition>" |
ffbaf337 UB |
9286 | "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);") |
9287 | ||
c9b17fa5 | 9288 | (define_insn "*<sse2_avx2>_<plusminus_insn><mode>3<mask_name>" |
3f97cb0b | 9289 | [(set (match_operand:VI12_AVX2 0 "register_operand" "=x,v") |
977e83a3 | 9290 | (sat_plusminus:VI12_AVX2 |
3f97cb0b AI |
9291 | (match_operand:VI12_AVX2 1 "nonimmediate_operand" "<comm>0,v") |
9292 | (match_operand:VI12_AVX2 2 "nonimmediate_operand" "xm,vm")))] | |
c9b17fa5 AI |
9293 | "TARGET_SSE2 && <mask_mode512bit_condition> && <mask_avx512bw_condition> |
9294 | && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)" | |
798dd0ba | 9295 | "@ |
cbb734aa | 9296 | p<plusminus_mnemonic><ssemodesuffix>\t{%2, %0|%0, %2} |
c9b17fa5 | 9297 | vp<plusminus_mnemonic><ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}" |
798dd0ba UB |
9298 | [(set_attr "isa" "noavx,avx") |
9299 | (set_attr "type" "sseiadd") | |
9300 | (set_attr "prefix_data16" "1,*") | |
c9b17fa5 | 9301 | (set_attr "prefix" "orig,maybe_evex") |
ef719a44 RH |
9302 | (set_attr "mode" "TI")]) |
9303 | ||
2ac7a566 | 9304 | (define_expand "mul<mode>3<mask_name>" |
f5db965f IT |
9305 | [(set (match_operand:VI1_AVX512 0 "register_operand") |
9306 | (mult:VI1_AVX512 (match_operand:VI1_AVX512 1 "register_operand") | |
9307 | (match_operand:VI1_AVX512 2 "register_operand")))] | |
2ac7a566 | 9308 | "TARGET_SSE2 && <mask_mode512bit_condition> && <mask_avx512bw_condition>" |
64e6863e | 9309 | { |
77a3dbf6 | 9310 | ix86_expand_vecop_qihi (MULT, operands[0], operands[1], operands[2]); |
64e6863e RH |
9311 | DONE; |
9312 | }) | |
9313 | ||
ed3e611e | 9314 | (define_expand "mul<mode>3<mask_name>" |
82e86dc6 UB |
9315 | [(set (match_operand:VI2_AVX2 0 "register_operand") |
9316 | (mult:VI2_AVX2 (match_operand:VI2_AVX2 1 "nonimmediate_operand") | |
9317 | (match_operand:VI2_AVX2 2 "nonimmediate_operand")))] | |
ed3e611e | 9318 | "TARGET_SSE2 && <mask_mode512bit_condition> && <mask_avx512bw_condition>" |
977e83a3 | 9319 | "ix86_fixup_binary_operands_no_copy (MULT, <MODE>mode, operands);") |
ef719a44 | 9320 | |
ed3e611e AI |
9321 | (define_insn "*mul<mode>3<mask_name>" |
9322 | [(set (match_operand:VI2_AVX2 0 "register_operand" "=x,v") | |
9323 | (mult:VI2_AVX2 (match_operand:VI2_AVX2 1 "nonimmediate_operand" "%0,v") | |
9324 | (match_operand:VI2_AVX2 2 "nonimmediate_operand" "xm,vm")))] | |
9325 | "TARGET_SSE2 | |
9326 | && ix86_binary_operator_ok (MULT, <MODE>mode, operands) | |
9327 | && <mask_mode512bit_condition> && <mask_avx512bw_condition>" | |
798dd0ba UB |
9328 | "@ |
9329 | pmullw\t{%2, %0|%0, %2} | |
ed3e611e | 9330 | vpmullw\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}" |
798dd0ba UB |
9331 | [(set_attr "isa" "noavx,avx") |
9332 | (set_attr "type" "sseimul") | |
9333 | (set_attr "prefix_data16" "1,*") | |
9334 | (set_attr "prefix" "orig,vex") | |
977e83a3 | 9335 | (set_attr "mode" "<sseinsnmode>")]) |
ef719a44 | 9336 | |
ed3e611e | 9337 | (define_expand "<s>mul<mode>3_highpart<mask_name>" |
82e86dc6 | 9338 | [(set (match_operand:VI2_AVX2 0 "register_operand") |
977e83a3 KY |
9339 | (truncate:VI2_AVX2 |
9340 | (lshiftrt:<ssedoublemode> | |
9341 | (mult:<ssedoublemode> | |
9342 | (any_extend:<ssedoublemode> | |
82e86dc6 | 9343 | (match_operand:VI2_AVX2 1 "nonimmediate_operand")) |
977e83a3 | 9344 | (any_extend:<ssedoublemode> |
82e86dc6 | 9345 | (match_operand:VI2_AVX2 2 "nonimmediate_operand"))) |
977e83a3 | 9346 | (const_int 16))))] |
ed3e611e AI |
9347 | "TARGET_SSE2 |
9348 | && <mask_mode512bit_condition> && <mask_avx512bw_condition>" | |
2ddfea8a | 9349 | "ix86_fixup_binary_operands_no_copy (MULT, <MODE>mode, operands);") |
89d67cca | 9350 | |
ed3e611e AI |
9351 | (define_insn "*<s>mul<mode>3_highpart<mask_name>" |
9352 | [(set (match_operand:VI2_AVX2 0 "register_operand" "=x,v") | |
977e83a3 KY |
9353 | (truncate:VI2_AVX2 |
9354 | (lshiftrt:<ssedoublemode> | |
9355 | (mult:<ssedoublemode> | |
9356 | (any_extend:<ssedoublemode> | |
ed3e611e | 9357 | (match_operand:VI2_AVX2 1 "nonimmediate_operand" "%0,v")) |
977e83a3 | 9358 | (any_extend:<ssedoublemode> |
ed3e611e | 9359 | (match_operand:VI2_AVX2 2 "nonimmediate_operand" "xm,vm"))) |
ef719a44 | 9360 | (const_int 16))))] |
ed3e611e AI |
9361 | "TARGET_SSE2 |
9362 | && ix86_binary_operator_ok (MULT, <MODE>mode, operands) | |
9363 | && <mask_mode512bit_condition> && <mask_avx512bw_condition>" | |
798dd0ba UB |
9364 | "@ |
9365 | pmulh<u>w\t{%2, %0|%0, %2} | |
ed3e611e | 9366 | vpmulh<u>w\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}" |
798dd0ba UB |
9367 | [(set_attr "isa" "noavx,avx") |
9368 | (set_attr "type" "sseimul") | |
9369 | (set_attr "prefix_data16" "1,*") | |
9370 | (set_attr "prefix" "orig,vex") | |
977e83a3 KY |
9371 | (set_attr "mode" "<sseinsnmode>")]) |
9372 | ||
47490470 | 9373 | (define_expand "vec_widen_umult_even_v16si<mask_name>" |
c003c6d6 AI |
9374 | [(set (match_operand:V8DI 0 "register_operand") |
9375 | (mult:V8DI | |
9376 | (zero_extend:V8DI | |
9377 | (vec_select:V8SI | |
9378 | (match_operand:V16SI 1 "nonimmediate_operand") | |
9379 | (parallel [(const_int 0) (const_int 2) | |
9380 | (const_int 4) (const_int 6) | |
9381 | (const_int 8) (const_int 10) | |
9382 | (const_int 12) (const_int 14)]))) | |
9383 | (zero_extend:V8DI | |
9384 | (vec_select:V8SI | |
9385 | (match_operand:V16SI 2 "nonimmediate_operand") | |
9386 | (parallel [(const_int 0) (const_int 2) | |
9387 | (const_int 4) (const_int 6) | |
9388 | (const_int 8) (const_int 10) | |
9389 | (const_int 12) (const_int 14)])))))] | |
9390 | "TARGET_AVX512F" | |
9391 | "ix86_fixup_binary_operands_no_copy (MULT, V16SImode, operands);") | |
9392 | ||
47490470 | 9393 | (define_insn "*vec_widen_umult_even_v16si<mask_name>" |
c003c6d6 AI |
9394 | [(set (match_operand:V8DI 0 "register_operand" "=v") |
9395 | (mult:V8DI | |
9396 | (zero_extend:V8DI | |
9397 | (vec_select:V8SI | |
9398 | (match_operand:V16SI 1 "nonimmediate_operand" "%v") | |
9399 | (parallel [(const_int 0) (const_int 2) | |
9400 | (const_int 4) (const_int 6) | |
9401 | (const_int 8) (const_int 10) | |
9402 | (const_int 12) (const_int 14)]))) | |
9403 | (zero_extend:V8DI | |
9404 | (vec_select:V8SI | |
9405 | (match_operand:V16SI 2 "nonimmediate_operand" "vm") | |
9406 | (parallel [(const_int 0) (const_int 2) | |
9407 | (const_int 4) (const_int 6) | |
9408 | (const_int 8) (const_int 10) | |
9409 | (const_int 12) (const_int 14)])))))] | |
9410 | "TARGET_AVX512F && ix86_binary_operator_ok (MULT, V16SImode, operands)" | |
47490470 | 9411 | "vpmuludq\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}" |
c003c6d6 AI |
9412 | [(set_attr "isa" "avx512f") |
9413 | (set_attr "type" "sseimul") | |
9414 | (set_attr "prefix_extra" "1") | |
9415 | (set_attr "prefix" "evex") | |
9416 | (set_attr "mode" "XI")]) | |
9417 | ||
f1df7a1c | 9418 | (define_expand "vec_widen_umult_even_v8si<mask_name>" |
82e86dc6 | 9419 | [(set (match_operand:V4DI 0 "register_operand") |
977e83a3 KY |
9420 | (mult:V4DI |
9421 | (zero_extend:V4DI | |
9422 | (vec_select:V4SI | |
82e86dc6 | 9423 | (match_operand:V8SI 1 "nonimmediate_operand") |
977e83a3 KY |
9424 | (parallel [(const_int 0) (const_int 2) |
9425 | (const_int 4) (const_int 6)]))) | |
9426 | (zero_extend:V4DI | |
9427 | (vec_select:V4SI | |
82e86dc6 | 9428 | (match_operand:V8SI 2 "nonimmediate_operand") |
977e83a3 KY |
9429 | (parallel [(const_int 0) (const_int 2) |
9430 | (const_int 4) (const_int 6)])))))] | |
f1df7a1c | 9431 | "TARGET_AVX2 && <mask_avx512vl_condition>" |
977e83a3 KY |
9432 | "ix86_fixup_binary_operands_no_copy (MULT, V8SImode, operands);") |
9433 | ||
f1df7a1c AI |
9434 | (define_insn "*vec_widen_umult_even_v8si<mask_name>" |
9435 | [(set (match_operand:V4DI 0 "register_operand" "=v") | |
977e83a3 KY |
9436 | (mult:V4DI |
9437 | (zero_extend:V4DI | |
9438 | (vec_select:V4SI | |
f1df7a1c | 9439 | (match_operand:V8SI 1 "nonimmediate_operand" "%v") |
977e83a3 KY |
9440 | (parallel [(const_int 0) (const_int 2) |
9441 | (const_int 4) (const_int 6)]))) | |
9442 | (zero_extend:V4DI | |
9443 | (vec_select:V4SI | |
f1df7a1c | 9444 | (match_operand:V8SI 2 "nonimmediate_operand" "vm") |
977e83a3 KY |
9445 | (parallel [(const_int 0) (const_int 2) |
9446 | (const_int 4) (const_int 6)])))))] | |
f1df7a1c AI |
9447 | "TARGET_AVX2 && <mask_avx512vl_condition> |
9448 | && ix86_binary_operator_ok (MULT, V8SImode, operands)" | |
9449 | "vpmuludq\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}" | |
977e83a3 | 9450 | [(set_attr "type" "sseimul") |
f1df7a1c | 9451 | (set_attr "prefix" "maybe_evex") |
977e83a3 | 9452 | (set_attr "mode" "OI")]) |
ef719a44 | 9453 | |
f1df7a1c | 9454 | (define_expand "vec_widen_umult_even_v4si<mask_name>" |
82e86dc6 | 9455 | [(set (match_operand:V2DI 0 "register_operand") |
ffbaf337 UB |
9456 | (mult:V2DI |
9457 | (zero_extend:V2DI | |
9458 | (vec_select:V2SI | |
82e86dc6 | 9459 | (match_operand:V4SI 1 "nonimmediate_operand") |
ffbaf337 UB |
9460 | (parallel [(const_int 0) (const_int 2)]))) |
9461 | (zero_extend:V2DI | |
9462 | (vec_select:V2SI | |
82e86dc6 | 9463 | (match_operand:V4SI 2 "nonimmediate_operand") |
ffbaf337 | 9464 | (parallel [(const_int 0) (const_int 2)])))))] |
f1df7a1c | 9465 | "TARGET_SSE2 && <mask_avx512vl_condition>" |
ffbaf337 UB |
9466 | "ix86_fixup_binary_operands_no_copy (MULT, V4SImode, operands);") |
9467 | ||
f1df7a1c AI |
9468 | (define_insn "*vec_widen_umult_even_v4si<mask_name>" |
9469 | [(set (match_operand:V2DI 0 "register_operand" "=x,v") | |
ef719a44 RH |
9470 | (mult:V2DI |
9471 | (zero_extend:V2DI | |
9472 | (vec_select:V2SI | |
f1df7a1c | 9473 | (match_operand:V4SI 1 "nonimmediate_operand" "%0,v") |
ef719a44 RH |
9474 | (parallel [(const_int 0) (const_int 2)]))) |
9475 | (zero_extend:V2DI | |
9476 | (vec_select:V2SI | |
f1df7a1c | 9477 | (match_operand:V4SI 2 "nonimmediate_operand" "xm,vm") |
ef719a44 | 9478 | (parallel [(const_int 0) (const_int 2)])))))] |
f1df7a1c AI |
9479 | "TARGET_SSE2 && <mask_avx512vl_condition> |
9480 | && ix86_binary_operator_ok (MULT, V4SImode, operands)" | |
798dd0ba UB |
9481 | "@ |
9482 | pmuludq\t{%2, %0|%0, %2} | |
f1df7a1c | 9483 | vpmuludq\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}" |
798dd0ba UB |
9484 | [(set_attr "isa" "noavx,avx") |
9485 | (set_attr "type" "sseimul") | |
9486 | (set_attr "prefix_data16" "1,*") | |
f1df7a1c | 9487 | (set_attr "prefix" "orig,maybe_evex") |
ef719a44 RH |
9488 | (set_attr "mode" "TI")]) |
9489 | ||
47490470 | 9490 | (define_expand "vec_widen_smult_even_v16si<mask_name>" |
c003c6d6 AI |
9491 | [(set (match_operand:V8DI 0 "register_operand") |
9492 | (mult:V8DI | |
9493 | (sign_extend:V8DI | |
9494 | (vec_select:V8SI | |
9495 | (match_operand:V16SI 1 "nonimmediate_operand") | |
9496 | (parallel [(const_int 0) (const_int 2) | |
9497 | (const_int 4) (const_int 6) | |
9498 | (const_int 8) (const_int 10) | |
9499 | (const_int 12) (const_int 14)]))) | |
9500 | (sign_extend:V8DI | |
9501 | (vec_select:V8SI | |
9502 | (match_operand:V16SI 2 "nonimmediate_operand") | |
9503 | (parallel [(const_int 0) (const_int 2) | |
9504 | (const_int 4) (const_int 6) | |
9505 | (const_int 8) (const_int 10) | |
9506 | (const_int 12) (const_int 14)])))))] | |
9507 | "TARGET_AVX512F" | |
9508 | "ix86_fixup_binary_operands_no_copy (MULT, V16SImode, operands);") | |
9509 | ||
47490470 | 9510 | (define_insn "*vec_widen_smult_even_v16si<mask_name>" |
c003c6d6 AI |
9511 | [(set (match_operand:V8DI 0 "register_operand" "=v") |
9512 | (mult:V8DI | |
9513 | (sign_extend:V8DI | |
9514 | (vec_select:V8SI | |
9515 | (match_operand:V16SI 1 "nonimmediate_operand" "%v") | |
9516 | (parallel [(const_int 0) (const_int 2) | |
9517 | (const_int 4) (const_int 6) | |
9518 | (const_int 8) (const_int 10) | |
9519 | (const_int 12) (const_int 14)]))) | |
9520 | (sign_extend:V8DI | |
9521 | (vec_select:V8SI | |
9522 | (match_operand:V16SI 2 "nonimmediate_operand" "vm") | |
9523 | (parallel [(const_int 0) (const_int 2) | |
9524 | (const_int 4) (const_int 6) | |
9525 | (const_int 8) (const_int 10) | |
9526 | (const_int 12) (const_int 14)])))))] | |
9527 | "TARGET_AVX512F && ix86_binary_operator_ok (MULT, V16SImode, operands)" | |
47490470 | 9528 | "vpmuldq\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}" |
c003c6d6 AI |
9529 | [(set_attr "isa" "avx512f") |
9530 | (set_attr "type" "sseimul") | |
9531 | (set_attr "prefix_extra" "1") | |
9532 | (set_attr "prefix" "evex") | |
9533 | (set_attr "mode" "XI")]) | |
9534 | ||
f1df7a1c | 9535 | (define_expand "vec_widen_smult_even_v8si<mask_name>" |
82e86dc6 | 9536 | [(set (match_operand:V4DI 0 "register_operand") |
977e83a3 KY |
9537 | (mult:V4DI |
9538 | (sign_extend:V4DI | |
9539 | (vec_select:V4SI | |
82e86dc6 | 9540 | (match_operand:V8SI 1 "nonimmediate_operand") |
977e83a3 KY |
9541 | (parallel [(const_int 0) (const_int 2) |
9542 | (const_int 4) (const_int 6)]))) | |
9543 | (sign_extend:V4DI | |
9544 | (vec_select:V4SI | |
82e86dc6 | 9545 | (match_operand:V8SI 2 "nonimmediate_operand") |
977e83a3 KY |
9546 | (parallel [(const_int 0) (const_int 2) |
9547 | (const_int 4) (const_int 6)])))))] | |
f1df7a1c | 9548 | "TARGET_AVX2 && <mask_avx512vl_condition>" |
977e83a3 KY |
9549 | "ix86_fixup_binary_operands_no_copy (MULT, V8SImode, operands);") |
9550 | ||
f1df7a1c AI |
9551 | (define_insn "*vec_widen_smult_even_v8si<mask_name>" |
9552 | [(set (match_operand:V4DI 0 "register_operand" "=v") | |
977e83a3 KY |
9553 | (mult:V4DI |
9554 | (sign_extend:V4DI | |
9555 | (vec_select:V4SI | |
ee768d85 | 9556 | (match_operand:V8SI 1 "nonimmediate_operand" "%v") |
977e83a3 KY |
9557 | (parallel [(const_int 0) (const_int 2) |
9558 | (const_int 4) (const_int 6)]))) | |
9559 | (sign_extend:V4DI | |
9560 | (vec_select:V4SI | |
f1df7a1c | 9561 | (match_operand:V8SI 2 "nonimmediate_operand" "vm") |
977e83a3 KY |
9562 | (parallel [(const_int 0) (const_int 2) |
9563 | (const_int 4) (const_int 6)])))))] | |
f1df7a1c AI |
9564 | "TARGET_AVX2 |
9565 | && ix86_binary_operator_ok (MULT, V8SImode, operands)" | |
9566 | "vpmuldq\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}" | |
82e33890 | 9567 | [(set_attr "type" "sseimul") |
977e83a3 KY |
9568 | (set_attr "prefix_extra" "1") |
9569 | (set_attr "prefix" "vex") | |
9570 | (set_attr "mode" "OI")]) | |
9571 | ||
f1df7a1c | 9572 | (define_expand "sse4_1_mulv2siv2di3<mask_name>" |
82e86dc6 | 9573 | [(set (match_operand:V2DI 0 "register_operand") |
ffbaf337 UB |
9574 | (mult:V2DI |
9575 | (sign_extend:V2DI | |
9576 | (vec_select:V2SI | |
82e86dc6 | 9577 | (match_operand:V4SI 1 "nonimmediate_operand") |
ffbaf337 UB |
9578 | (parallel [(const_int 0) (const_int 2)]))) |
9579 | (sign_extend:V2DI | |
9580 | (vec_select:V2SI | |
82e86dc6 | 9581 | (match_operand:V4SI 2 "nonimmediate_operand") |
ffbaf337 | 9582 | (parallel [(const_int 0) (const_int 2)])))))] |
f1df7a1c | 9583 | "TARGET_SSE4_1 && <mask_avx512vl_condition>" |
ffbaf337 | 9584 | "ix86_fixup_binary_operands_no_copy (MULT, V4SImode, operands);") |
95879c72 | 9585 | |
f1df7a1c | 9586 | (define_insn "*sse4_1_mulv2siv2di3<mask_name>" |
45392c76 | 9587 | [(set (match_operand:V2DI 0 "register_operand" "=Yr,*x,v") |
95879c72 L |
9588 | (mult:V2DI |
9589 | (sign_extend:V2DI | |
9590 | (vec_select:V2SI | |
45392c76 | 9591 | (match_operand:V4SI 1 "nonimmediate_operand" "%0,0,v") |
95879c72 L |
9592 | (parallel [(const_int 0) (const_int 2)]))) |
9593 | (sign_extend:V2DI | |
9594 | (vec_select:V2SI | |
45392c76 | 9595 | (match_operand:V4SI 2 "nonimmediate_operand" "Yrm,*xm,vm") |
95879c72 | 9596 | (parallel [(const_int 0) (const_int 2)])))))] |
f1df7a1c AI |
9597 | "TARGET_SSE4_1 && <mask_avx512vl_condition> |
9598 | && ix86_binary_operator_ok (MULT, V4SImode, operands)" | |
798dd0ba | 9599 | "@ |
45392c76 | 9600 | pmuldq\t{%2, %0|%0, %2} |
798dd0ba | 9601 | pmuldq\t{%2, %0|%0, %2} |
f1df7a1c | 9602 | vpmuldq\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}" |
45392c76 | 9603 | [(set_attr "isa" "noavx,noavx,avx") |
798dd0ba | 9604 | (set_attr "type" "sseimul") |
45392c76 | 9605 | (set_attr "prefix_data16" "1,1,*") |
95879c72 | 9606 | (set_attr "prefix_extra" "1") |
45392c76 | 9607 | (set_attr "prefix" "orig,orig,vex") |
95879c72 L |
9608 | (set_attr "mode" "TI")]) |
9609 | ||
ed3e611e AI |
9610 | (define_insn "avx512bw_pmaddwd512<mode><mask_name>" |
9611 | [(set (match_operand:<sseunpackmode> 0 "register_operand" "=v") | |
9612 | (unspec:<sseunpackmode> | |
9613 | [(match_operand:VI2_AVX2 1 "register_operand" "v") | |
9614 | (match_operand:VI2_AVX2 2 "nonimmediate_operand" "vm")] | |
9615 | UNSPEC_PMADDWD512))] | |
9616 | "TARGET_AVX512BW && <mask_mode512bit_condition>" | |
9617 | "vpmaddwd\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"; | |
9618 | [(set_attr "type" "sseiadd") | |
9619 | (set_attr "prefix" "evex") | |
9620 | (set_attr "mode" "XI")]) | |
9621 | ||
977e83a3 | 9622 | (define_expand "avx2_pmaddwd" |
82e86dc6 | 9623 | [(set (match_operand:V8SI 0 "register_operand") |
977e83a3 KY |
9624 | (plus:V8SI |
9625 | (mult:V8SI | |
9626 | (sign_extend:V8SI | |
9627 | (vec_select:V8HI | |
82e86dc6 | 9628 | (match_operand:V16HI 1 "nonimmediate_operand") |
608dccd7 UB |
9629 | (parallel [(const_int 0) (const_int 2) |
9630 | (const_int 4) (const_int 6) | |
9631 | (const_int 8) (const_int 10) | |
9632 | (const_int 12) (const_int 14)]))) | |
977e83a3 KY |
9633 | (sign_extend:V8SI |
9634 | (vec_select:V8HI | |
82e86dc6 | 9635 | (match_operand:V16HI 2 "nonimmediate_operand") |
608dccd7 UB |
9636 | (parallel [(const_int 0) (const_int 2) |
9637 | (const_int 4) (const_int 6) | |
9638 | (const_int 8) (const_int 10) | |
9639 | (const_int 12) (const_int 14)])))) | |
977e83a3 KY |
9640 | (mult:V8SI |
9641 | (sign_extend:V8SI | |
9642 | (vec_select:V8HI (match_dup 1) | |
608dccd7 UB |
9643 | (parallel [(const_int 1) (const_int 3) |
9644 | (const_int 5) (const_int 7) | |
9645 | (const_int 9) (const_int 11) | |
9646 | (const_int 13) (const_int 15)]))) | |
977e83a3 KY |
9647 | (sign_extend:V8SI |
9648 | (vec_select:V8HI (match_dup 2) | |
608dccd7 UB |
9649 | (parallel [(const_int 1) (const_int 3) |
9650 | (const_int 5) (const_int 7) | |
9651 | (const_int 9) (const_int 11) | |
9652 | (const_int 13) (const_int 15)]))))))] | |
977e83a3 KY |
9653 | "TARGET_AVX2" |
9654 | "ix86_fixup_binary_operands_no_copy (MULT, V16HImode, operands);") | |
9655 | ||
977e83a3 KY |
9656 | (define_insn "*avx2_pmaddwd" |
9657 | [(set (match_operand:V8SI 0 "register_operand" "=x") | |
9658 | (plus:V8SI | |
9659 | (mult:V8SI | |
9660 | (sign_extend:V8SI | |
9661 | (vec_select:V8HI | |
9662 | (match_operand:V16HI 1 "nonimmediate_operand" "%x") | |
608dccd7 UB |
9663 | (parallel [(const_int 0) (const_int 2) |
9664 | (const_int 4) (const_int 6) | |
9665 | (const_int 8) (const_int 10) | |
9666 | (const_int 12) (const_int 14)]))) | |
977e83a3 KY |
9667 | (sign_extend:V8SI |
9668 | (vec_select:V8HI | |
9669 | (match_operand:V16HI 2 "nonimmediate_operand" "xm") | |
608dccd7 UB |
9670 | (parallel [(const_int 0) (const_int 2) |
9671 | (const_int 4) (const_int 6) | |
9672 | (const_int 8) (const_int 10) | |
9673 | (const_int 12) (const_int 14)])))) | |
977e83a3 KY |
9674 | (mult:V8SI |
9675 | (sign_extend:V8SI | |
9676 | (vec_select:V8HI (match_dup 1) | |
608dccd7 UB |
9677 | (parallel [(const_int 1) (const_int 3) |
9678 | (const_int 5) (const_int 7) | |
9679 | (const_int 9) (const_int 11) | |
9680 | (const_int 13) (const_int 15)]))) | |
977e83a3 KY |
9681 | (sign_extend:V8SI |
9682 | (vec_select:V8HI (match_dup 2) | |
608dccd7 UB |
9683 | (parallel [(const_int 1) (const_int 3) |
9684 | (const_int 5) (const_int 7) | |
9685 | (const_int 9) (const_int 11) | |
9686 | (const_int 13) (const_int 15)]))))))] | |
977e83a3 KY |
9687 | "TARGET_AVX2 && ix86_binary_operator_ok (MULT, V16HImode, operands)" |
9688 | "vpmaddwd\t{%2, %1, %0|%0, %1, %2}" | |
9689 | [(set_attr "type" "sseiadd") | |
9690 | (set_attr "prefix" "vex") | |
9691 | (set_attr "mode" "OI")]) | |
9692 | ||
608dccd7 UB |
9693 | (define_expand "sse2_pmaddwd" |
9694 | [(set (match_operand:V4SI 0 "register_operand") | |
9695 | (plus:V4SI | |
9696 | (mult:V4SI | |
9697 | (sign_extend:V4SI | |
9698 | (vec_select:V4HI | |
9699 | (match_operand:V8HI 1 "nonimmediate_operand") | |
9700 | (parallel [(const_int 0) (const_int 2) | |
9701 | (const_int 4) (const_int 6)]))) | |
9702 | (sign_extend:V4SI | |
9703 | (vec_select:V4HI | |
9704 | (match_operand:V8HI 2 "nonimmediate_operand") | |
9705 | (parallel [(const_int 0) (const_int 2) | |
9706 | (const_int 4) (const_int 6)])))) | |
9707 | (mult:V4SI | |
9708 | (sign_extend:V4SI | |
9709 | (vec_select:V4HI (match_dup 1) | |
9710 | (parallel [(const_int 1) (const_int 3) | |
9711 | (const_int 5) (const_int 7)]))) | |
9712 | (sign_extend:V4SI | |
9713 | (vec_select:V4HI (match_dup 2) | |
9714 | (parallel [(const_int 1) (const_int 3) | |
9715 | (const_int 5) (const_int 7)]))))))] | |
9716 | "TARGET_SSE2" | |
9717 | "ix86_fixup_binary_operands_no_copy (MULT, V8HImode, operands);") | |
9718 | ||
ffbaf337 | 9719 | (define_insn "*sse2_pmaddwd" |
798dd0ba | 9720 | [(set (match_operand:V4SI 0 "register_operand" "=x,x") |
ef719a44 RH |
9721 | (plus:V4SI |
9722 | (mult:V4SI | |
9723 | (sign_extend:V4SI | |
9724 | (vec_select:V4HI | |
798dd0ba | 9725 | (match_operand:V8HI 1 "nonimmediate_operand" "%0,x") |
608dccd7 UB |
9726 | (parallel [(const_int 0) (const_int 2) |
9727 | (const_int 4) (const_int 6)]))) | |
ef719a44 RH |
9728 | (sign_extend:V4SI |
9729 | (vec_select:V4HI | |
798dd0ba | 9730 | (match_operand:V8HI 2 "nonimmediate_operand" "xm,xm") |
608dccd7 UB |
9731 | (parallel [(const_int 0) (const_int 2) |
9732 | (const_int 4) (const_int 6)])))) | |
ef719a44 RH |
9733 | (mult:V4SI |
9734 | (sign_extend:V4SI | |
9735 | (vec_select:V4HI (match_dup 1) | |
608dccd7 UB |
9736 | (parallel [(const_int 1) (const_int 3) |
9737 | (const_int 5) (const_int 7)]))) | |
ef719a44 RH |
9738 | (sign_extend:V4SI |
9739 | (vec_select:V4HI (match_dup 2) | |
608dccd7 UB |
9740 | (parallel [(const_int 1) (const_int 3) |
9741 | (const_int 5) (const_int 7)]))))))] | |
27120a59 | 9742 | "TARGET_SSE2 && ix86_binary_operator_ok (MULT, V8HImode, operands)" |
798dd0ba UB |
9743 | "@ |
9744 | pmaddwd\t{%2, %0|%0, %2} | |
9745 | vpmaddwd\t{%2, %1, %0|%0, %1, %2}" | |
9746 | [(set_attr "isa" "noavx,avx") | |
9747 | (set_attr "type" "sseiadd") | |
b6837b94 | 9748 | (set_attr "atom_unit" "simul") |
798dd0ba UB |
9749 | (set_attr "prefix_data16" "1,*") |
9750 | (set_attr "prefix" "orig,vex") | |
ef719a44 RH |
9751 | (set_attr "mode" "TI")]) |
9752 | ||
f1df7a1c AI |
9753 | (define_insn "avx512dq_mul<mode>3<mask_name>" |
9754 | [(set (match_operand:VI8 0 "register_operand" "=v") | |
9755 | (mult:VI8 | |
9756 | (match_operand:VI8 1 "register_operand" "v") | |
9757 | (match_operand:VI8 2 "nonimmediate_operand" "vm")))] | |
9758 | "TARGET_AVX512DQ && <mask_mode512bit_condition>" | |
9759 | "vpmullq\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}" | |
9760 | [(set_attr "type" "sseimul") | |
9761 | (set_attr "prefix" "evex") | |
9762 | (set_attr "mode" "<sseinsnmode>")]) | |
9763 | ||
47490470 | 9764 | (define_expand "mul<mode>3<mask_name>" |
f5f41d88 AI |
9765 | [(set (match_operand:VI4_AVX512F 0 "register_operand") |
9766 | (mult:VI4_AVX512F | |
9767 | (match_operand:VI4_AVX512F 1 "general_vector_operand") | |
9768 | (match_operand:VI4_AVX512F 2 "general_vector_operand")))] | |
47490470 | 9769 | "TARGET_SSE2 && <mask_mode512bit_condition>" |
2b5bf0e2 | 9770 | { |
ac357108 | 9771 | if (TARGET_SSE4_1) |
73e9d637 | 9772 | { |
baee1763 JJ |
9773 | if (!nonimmediate_operand (operands[1], <MODE>mode)) |
9774 | operands[1] = force_reg (<MODE>mode, operands[1]); | |
9775 | if (!nonimmediate_operand (operands[2], <MODE>mode)) | |
9776 | operands[2] = force_reg (<MODE>mode, operands[2]); | |
73e9d637 RH |
9777 | ix86_fixup_binary_operands_no_copy (MULT, <MODE>mode, operands); |
9778 | } | |
9779 | else | |
9780 | { | |
9781 | ix86_expand_sse2_mulv4si3 (operands[0], operands[1], operands[2]); | |
9782 | DONE; | |
9783 | } | |
2b5bf0e2 RH |
9784 | }) |
9785 | ||
47490470 | 9786 | (define_insn "*<sse4_1_avx2>_mul<mode>3<mask_name>" |
45392c76 | 9787 | [(set (match_operand:VI4_AVX512F 0 "register_operand" "=Yr,*x,v") |
f5f41d88 | 9788 | (mult:VI4_AVX512F |
45392c76 IE |
9789 | (match_operand:VI4_AVX512F 1 "nonimmediate_operand" "%0,0,v") |
9790 | (match_operand:VI4_AVX512F 2 "nonimmediate_operand" "Yrm,*xm,vm")))] | |
47490470 | 9791 | "TARGET_SSE4_1 && ix86_binary_operator_ok (MULT, <MODE>mode, operands) && <mask_mode512bit_condition>" |
798dd0ba | 9792 | "@ |
45392c76 | 9793 | pmulld\t{%2, %0|%0, %2} |
798dd0ba | 9794 | pmulld\t{%2, %0|%0, %2} |
47490470 | 9795 | vpmulld\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}" |
45392c76 | 9796 | [(set_attr "isa" "noavx,noavx,avx") |
798dd0ba | 9797 | (set_attr "type" "sseimul") |
9a5cee02 | 9798 | (set_attr "prefix_extra" "1") |
45392c76 IE |
9799 | (set_attr "prefix" "<mask_prefix4>") |
9800 | (set_attr "btver2_decode" "vector,vector,vector") | |
977e83a3 | 9801 | (set_attr "mode" "<sseinsnmode>")]) |
9a5cee02 | 9802 | |
298301d9 | 9803 | (define_expand "mul<mode>3" |
f5f41d88 AI |
9804 | [(set (match_operand:VI8_AVX2_AVX512F 0 "register_operand") |
9805 | (mult:VI8_AVX2_AVX512F | |
9806 | (match_operand:VI8_AVX2_AVX512F 1 "register_operand") | |
9807 | (match_operand:VI8_AVX2_AVX512F 2 "register_operand")))] | |
298301d9 | 9808 | "TARGET_SSE2" |
64e6863e | 9809 | { |
298301d9 | 9810 | ix86_expand_sse2_mulvxdi3 (operands[0], operands[1], operands[2]); |
64e6863e RH |
9811 | DONE; |
9812 | }) | |
9813 | ||
1c4153dd | 9814 | (define_expand "vec_widen_<s>mult_hi_<mode>" |
82e86dc6 | 9815 | [(match_operand:<sseunpackmode> 0 "register_operand") |
1c4153dd | 9816 | (any_extend:<sseunpackmode> |
ac357108 RH |
9817 | (match_operand:VI124_AVX2 1 "register_operand")) |
9818 | (match_operand:VI124_AVX2 2 "register_operand")] | |
87b78516 | 9819 | "TARGET_SSE2" |
0e9dac9e | 9820 | { |
ac357108 RH |
9821 | ix86_expand_mul_widen_hilo (operands[0], operands[1], operands[2], |
9822 | <u_bool>, true); | |
0e9dac9e UB |
9823 | DONE; |
9824 | }) | |
9825 | ||
1c4153dd | 9826 | (define_expand "vec_widen_<s>mult_lo_<mode>" |
82e86dc6 | 9827 | [(match_operand:<sseunpackmode> 0 "register_operand") |
1c4153dd | 9828 | (any_extend:<sseunpackmode> |
ac357108 RH |
9829 | (match_operand:VI124_AVX2 1 "register_operand")) |
9830 | (match_operand:VI124_AVX2 2 "register_operand")] | |
87b78516 | 9831 | "TARGET_SSE2" |
89d67cca | 9832 | { |
ac357108 RH |
9833 | ix86_expand_mul_widen_hilo (operands[0], operands[1], operands[2], |
9834 | <u_bool>, false); | |
89d67cca DN |
9835 | DONE; |
9836 | }) | |
9837 | ||
87b78516 RH |
9838 | ;; Most widen_<s>mult_even_<mode> can be handled directly from other |
9839 | ;; named patterns, but signed V4SI needs special help for plain SSE2. | |
9840 | (define_expand "vec_widen_smult_even_v4si" | |
9841 | [(match_operand:V2DI 0 "register_operand") | |
baee1763 JJ |
9842 | (match_operand:V4SI 1 "nonimmediate_operand") |
9843 | (match_operand:V4SI 2 "nonimmediate_operand")] | |
87b78516 RH |
9844 | "TARGET_SSE2" |
9845 | { | |
9846 | ix86_expand_mul_widen_evenodd (operands[0], operands[1], operands[2], | |
9847 | false, false); | |
9848 | DONE; | |
9849 | }) | |
9850 | ||
93703e79 RH |
9851 | (define_expand "vec_widen_<s>mult_odd_<mode>" |
9852 | [(match_operand:<sseunpackmode> 0 "register_operand") | |
9853 | (any_extend:<sseunpackmode> | |
f5f41d88 AI |
9854 | (match_operand:VI4_AVX512F 1 "general_vector_operand")) |
9855 | (match_operand:VI4_AVX512F 2 "general_vector_operand")] | |
87b78516 | 9856 | "TARGET_SSE2" |
93703e79 RH |
9857 | { |
9858 | ix86_expand_mul_widen_evenodd (operands[0], operands[1], operands[2], | |
9859 | <u_bool>, true); | |
9860 | DONE; | |
9861 | }) | |
9862 | ||
ed3e611e AI |
9863 | (define_mode_attr SDOT_PMADD_SUF |
9864 | [(V32HI "512v32hi") (V16HI "") (V8HI "")]) | |
9865 | ||
1c4153dd | 9866 | (define_expand "sdot_prod<mode>" |
82e86dc6 UB |
9867 | [(match_operand:<sseunpackmode> 0 "register_operand") |
9868 | (match_operand:VI2_AVX2 1 "register_operand") | |
9869 | (match_operand:VI2_AVX2 2 "register_operand") | |
9870 | (match_operand:<sseunpackmode> 3 "register_operand")] | |
20f06221 DN |
9871 | "TARGET_SSE2" |
9872 | { | |
1c4153dd | 9873 | rtx t = gen_reg_rtx (<sseunpackmode>mode); |
ed3e611e | 9874 | emit_insn (gen_<sse2_avx2>_pmaddwd<SDOT_PMADD_SUF> (t, operands[1], operands[2])); |
f7df4a84 | 9875 | emit_insn (gen_rtx_SET (operands[0], |
1c4153dd JJ |
9876 | gen_rtx_PLUS (<sseunpackmode>mode, |
9877 | operands[3], t))); | |
20f06221 DN |
9878 | DONE; |
9879 | }) | |
9880 | ||
a2051b26 RH |
9881 | ;; Normally we use widen_mul_even/odd, but combine can't quite get it all |
9882 | ;; back together when madd is available. | |
9883 | (define_expand "sdot_prodv4si" | |
82e86dc6 | 9884 | [(match_operand:V2DI 0 "register_operand") |
a2051b26 | 9885 | (match_operand:V4SI 1 "register_operand") |
82e86dc6 UB |
9886 | (match_operand:V4SI 2 "register_operand") |
9887 | (match_operand:V2DI 3 "register_operand")] | |
a2051b26 | 9888 | "TARGET_XOP" |
1c4153dd | 9889 | { |
a2051b26 RH |
9890 | rtx t = gen_reg_rtx (V2DImode); |
9891 | emit_insn (gen_xop_pmacsdqh (t, operands[1], operands[2], operands[3])); | |
9892 | emit_insn (gen_xop_pmacsdql (operands[0], operands[1], operands[2], t)); | |
1c4153dd JJ |
9893 | DONE; |
9894 | }) | |
9895 | ||
79d652a5 CH |
9896 | (define_expand "usadv16qi" |
9897 | [(match_operand:V4SI 0 "register_operand") | |
9898 | (match_operand:V16QI 1 "register_operand") | |
9899 | (match_operand:V16QI 2 "nonimmediate_operand") | |
9900 | (match_operand:V4SI 3 "nonimmediate_operand")] | |
9901 | "TARGET_SSE2" | |
9902 | { | |
9903 | rtx t1 = gen_reg_rtx (V2DImode); | |
9904 | rtx t2 = gen_reg_rtx (V4SImode); | |
9905 | emit_insn (gen_sse2_psadbw (t1, operands[1], operands[2])); | |
9906 | convert_move (t2, t1, 0); | |
9907 | emit_insn (gen_addv4si3 (operands[0], t2, operands[3])); | |
9908 | DONE; | |
9909 | }) | |
9910 | ||
9911 | (define_expand "usadv32qi" | |
9912 | [(match_operand:V8SI 0 "register_operand") | |
9913 | (match_operand:V32QI 1 "register_operand") | |
9914 | (match_operand:V32QI 2 "nonimmediate_operand") | |
9915 | (match_operand:V8SI 3 "nonimmediate_operand")] | |
9916 | "TARGET_AVX2" | |
9917 | { | |
9918 | rtx t1 = gen_reg_rtx (V4DImode); | |
9919 | rtx t2 = gen_reg_rtx (V8SImode); | |
9920 | emit_insn (gen_avx2_psadbw (t1, operands[1], operands[2])); | |
9921 | convert_move (t2, t1, 0); | |
9922 | emit_insn (gen_addv8si3 (operands[0], t2, operands[3])); | |
9923 | DONE; | |
9924 | }) | |
9925 | ||
ef719a44 | 9926 | (define_insn "ashr<mode>3" |
977e83a3 KY |
9927 | [(set (match_operand:VI24_AVX2 0 "register_operand" "=x,x") |
9928 | (ashiftrt:VI24_AVX2 | |
9929 | (match_operand:VI24_AVX2 1 "register_operand" "0,x") | |
798dd0ba | 9930 | (match_operand:SI 2 "nonmemory_operand" "xN,xN")))] |
ef719a44 | 9931 | "TARGET_SSE2" |
798dd0ba | 9932 | "@ |
cbb734aa UB |
9933 | psra<ssemodesuffix>\t{%2, %0|%0, %2} |
9934 | vpsra<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}" | |
798dd0ba UB |
9935 | [(set_attr "isa" "noavx,avx") |
9936 | (set_attr "type" "sseishft") | |
725fd454 | 9937 | (set (attr "length_immediate") |
82e86dc6 | 9938 | (if_then_else (match_operand 2 "const_int_operand") |
725fd454 JJ |
9939 | (const_string "1") |
9940 | (const_string "0"))) | |
798dd0ba UB |
9941 | (set_attr "prefix_data16" "1,*") |
9942 | (set_attr "prefix" "orig,vex") | |
977e83a3 KY |
9943 | (set_attr "mode" "<sseinsnmode>")]) |
9944 | ||
28e9a294 AI |
9945 | (define_insn "<mask_codefor>ashr<mode>3<mask_name>" |
9946 | [(set (match_operand:VI24_AVX512BW_1 0 "register_operand" "=v,v") | |
9947 | (ashiftrt:VI24_AVX512BW_1 | |
9948 | (match_operand:VI24_AVX512BW_1 1 "nonimmediate_operand" "v,vm") | |
9949 | (match_operand:SI 2 "nonmemory_operand" "v,N")))] | |
9950 | "TARGET_AVX512VL" | |
9951 | "vpsra<ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}" | |
9952 | [(set_attr "type" "sseishft") | |
9953 | (set (attr "length_immediate") | |
9954 | (if_then_else (match_operand 2 "const_int_operand") | |
9955 | (const_string "1") | |
9956 | (const_string "0"))) | |
9957 | (set_attr "mode" "<sseinsnmode>")]) | |
9958 | ||
9959 | (define_insn "<mask_codefor>ashrv2di3<mask_name>" | |
9960 | [(set (match_operand:V2DI 0 "register_operand" "=v,v") | |
9961 | (ashiftrt:V2DI | |
9962 | (match_operand:V2DI 1 "nonimmediate_operand" "v,vm") | |
9963 | (match_operand:DI 2 "nonmemory_operand" "v,N")))] | |
9964 | "TARGET_AVX512VL" | |
9965 | "vpsraq\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}" | |
9966 | [(set_attr "type" "sseishft") | |
9967 | (set (attr "length_immediate") | |
9968 | (if_then_else (match_operand 2 "const_int_operand") | |
9969 | (const_string "1") | |
9970 | (const_string "0"))) | |
9971 | (set_attr "mode" "TI")]) | |
9972 | ||
47490470 | 9973 | (define_insn "ashr<mode>3<mask_name>" |
28e9a294 AI |
9974 | [(set (match_operand:VI248_AVX512BW_AVX512VL 0 "register_operand" "=v,v") |
9975 | (ashiftrt:VI248_AVX512BW_AVX512VL | |
9976 | (match_operand:VI248_AVX512BW_AVX512VL 1 "nonimmediate_operand" "v,vm") | |
47490470 | 9977 | (match_operand:SI 2 "nonmemory_operand" "v,N")))] |
28e9a294 | 9978 | "TARGET_AVX512F" |
47490470 AI |
9979 | "vpsra<ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}" |
9980 | [(set_attr "type" "sseishft") | |
9981 | (set (attr "length_immediate") | |
9982 | (if_then_else (match_operand 2 "const_int_operand") | |
9983 | (const_string "1") | |
9984 | (const_string "0"))) | |
9985 | (set_attr "mode" "<sseinsnmode>")]) | |
9986 | ||
3616dc70 AI |
9987 | (define_insn "<shift_insn><mode>3<mask_name>" |
9988 | [(set (match_operand:VI2_AVX2_AVX512BW 0 "register_operand" "=x,v") | |
9989 | (any_lshift:VI2_AVX2_AVX512BW | |
9990 | (match_operand:VI2_AVX2_AVX512BW 1 "register_operand" "0,v") | |
9991 | (match_operand:SI 2 "nonmemory_operand" "xN,vN")))] | |
9992 | "TARGET_SSE2 && <mask_mode512bit_condition> && <mask_avx512bw_condition>" | |
9993 | "@ | |
9994 | p<vshift><ssemodesuffix>\t{%2, %0|%0, %2} | |
9995 | vp<vshift><ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}" | |
9996 | [(set_attr "isa" "noavx,avx") | |
9997 | (set_attr "type" "sseishft") | |
9998 | (set (attr "length_immediate") | |
9999 | (if_then_else (match_operand 2 "const_int_operand") | |
10000 | (const_string "1") | |
10001 | (const_string "0"))) | |
10002 | (set_attr "prefix_data16" "1,*") | |
10003 | (set_attr "prefix" "orig,vex") | |
10004 | (set_attr "mode" "<sseinsnmode>")]) | |
10005 | ||
10006 | (define_insn "<shift_insn><mode>3<mask_name>" | |
10007 | [(set (match_operand:VI48_AVX2 0 "register_operand" "=x,v") | |
10008 | (any_lshift:VI48_AVX2 | |
10009 | (match_operand:VI48_AVX2 1 "register_operand" "0,v") | |
10010 | (match_operand:SI 2 "nonmemory_operand" "xN,vN")))] | |
10011 | "TARGET_SSE2 && <mask_mode512bit_condition>" | |
798dd0ba | 10012 | "@ |
1162730f | 10013 | p<vshift><ssemodesuffix>\t{%2, %0|%0, %2} |
3616dc70 | 10014 | vp<vshift><ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}" |
798dd0ba UB |
10015 | [(set_attr "isa" "noavx,avx") |
10016 | (set_attr "type" "sseishft") | |
725fd454 | 10017 | (set (attr "length_immediate") |
82e86dc6 | 10018 | (if_then_else (match_operand 2 "const_int_operand") |
725fd454 JJ |
10019 | (const_string "1") |
10020 | (const_string "0"))) | |
798dd0ba UB |
10021 | (set_attr "prefix_data16" "1,*") |
10022 | (set_attr "prefix" "orig,vex") | |
8c353205 | 10023 | (set_attr "mode" "<sseinsnmode>")]) |
ef719a44 | 10024 | |
47490470 | 10025 | (define_insn "<shift_insn><mode>3<mask_name>" |
0fe65b75 AI |
10026 | [(set (match_operand:VI48_512 0 "register_operand" "=v,v") |
10027 | (any_lshift:VI48_512 | |
cf73ee60 | 10028 | (match_operand:VI48_512 1 "nonimmediate_operand" "v,m") |
0fe65b75 | 10029 | (match_operand:SI 2 "nonmemory_operand" "vN,N")))] |
47490470 AI |
10030 | "TARGET_AVX512F && <mask_mode512bit_condition>" |
10031 | "vp<vshift><ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}" | |
0fe65b75 AI |
10032 | [(set_attr "isa" "avx512f") |
10033 | (set_attr "type" "sseishft") | |
10034 | (set (attr "length_immediate") | |
10035 | (if_then_else (match_operand 2 "const_int_operand") | |
10036 | (const_string "1") | |
10037 | (const_string "0"))) | |
10038 | (set_attr "prefix" "evex") | |
10039 | (set_attr "mode" "<sseinsnmode>")]) | |
10040 | ||
47490470 | 10041 | |
69a2964c | 10042 | (define_expand "vec_shl_<mode>" |
d8c84975 | 10043 | [(set (match_dup 3) |
977e83a3 | 10044 | (ashift:V1TI |
82e86dc6 | 10045 | (match_operand:VI_128 1 "register_operand") |
d8c84975 JJ |
10046 | (match_operand:SI 2 "const_0_to_255_mul_8_operand"))) |
10047 | (set (match_operand:VI_128 0 "register_operand") (match_dup 4))] | |
69a2964c RH |
10048 | "TARGET_SSE2" |
10049 | { | |
fe6ae2da | 10050 | operands[1] = gen_lowpart (V1TImode, operands[1]); |
d8c84975 JJ |
10051 | operands[3] = gen_reg_rtx (V1TImode); |
10052 | operands[4] = gen_lowpart (<MODE>mode, operands[3]); | |
69a2964c RH |
10053 | }) |
10054 | ||
977e83a3 | 10055 | (define_insn "<sse2_avx2>_ashl<mode>3" |
98ee4d9b | 10056 | [(set (match_operand:VIMAX_AVX2 0 "register_operand" "=x,v") |
977e83a3 | 10057 | (ashift:VIMAX_AVX2 |
98ee4d9b | 10058 | (match_operand:VIMAX_AVX2 1 "register_operand" "0,v") |
798dd0ba UB |
10059 | (match_operand:SI 2 "const_0_to_255_mul_8_operand" "n,n")))] |
10060 | "TARGET_SSE2" | |
10061 | { | |
10062 | operands[2] = GEN_INT (INTVAL (operands[2]) / 8); | |
10063 | ||
10064 | switch (which_alternative) | |
10065 | { | |
10066 | case 0: | |
10067 | return "pslldq\t{%2, %0|%0, %2}"; | |
10068 | case 1: | |
10069 | return "vpslldq\t{%2, %1, %0|%0, %1, %2}"; | |
10070 | default: | |
10071 | gcc_unreachable (); | |
10072 | } | |
10073 | } | |
10074 | [(set_attr "isa" "noavx,avx") | |
10075 | (set_attr "type" "sseishft") | |
10076 | (set_attr "length_immediate" "1") | |
10077 | (set_attr "prefix_data16" "1,*") | |
10078 | (set_attr "prefix" "orig,vex") | |
977e83a3 | 10079 | (set_attr "mode" "<sseinsnmode>")]) |
798dd0ba | 10080 | |
69a2964c | 10081 | (define_expand "vec_shr_<mode>" |
d8c84975 | 10082 | [(set (match_dup 3) |
977e83a3 | 10083 | (lshiftrt:V1TI |
82e86dc6 | 10084 | (match_operand:VI_128 1 "register_operand") |
d8c84975 JJ |
10085 | (match_operand:SI 2 "const_0_to_255_mul_8_operand"))) |
10086 | (set (match_operand:VI_128 0 "register_operand") (match_dup 4))] | |
69a2964c RH |
10087 | "TARGET_SSE2" |
10088 | { | |
fe6ae2da | 10089 | operands[1] = gen_lowpart (V1TImode, operands[1]); |
d8c84975 JJ |
10090 | operands[3] = gen_reg_rtx (V1TImode); |
10091 | operands[4] = gen_lowpart (<MODE>mode, operands[3]); | |
69a2964c RH |
10092 | }) |
10093 | ||
8a0436cb | 10094 | (define_insn "<sse2_avx2>_lshr<mode>3" |
98ee4d9b | 10095 | [(set (match_operand:VIMAX_AVX2 0 "register_operand" "=x,v") |
8a0436cb | 10096 | (lshiftrt:VIMAX_AVX2 |
98ee4d9b | 10097 | (match_operand:VIMAX_AVX2 1 "register_operand" "0,v") |
798dd0ba | 10098 | (match_operand:SI 2 "const_0_to_255_mul_8_operand" "n,n")))] |
ef719a44 | 10099 | "TARGET_SSE2" |
798dd0ba UB |
10100 | { |
10101 | operands[2] = GEN_INT (INTVAL (operands[2]) / 8); | |
ef719a44 | 10102 | |
798dd0ba UB |
10103 | switch (which_alternative) |
10104 | { | |
10105 | case 0: | |
10106 | return "psrldq\t{%2, %0|%0, %2}"; | |
10107 | case 1: | |
10108 | return "vpsrldq\t{%2, %1, %0|%0, %1, %2}"; | |
10109 | default: | |
10110 | gcc_unreachable (); | |
10111 | } | |
10112 | } | |
10113 | [(set_attr "isa" "noavx,avx") | |
10114 | (set_attr "type" "sseishft") | |
10115 | (set_attr "length_immediate" "1") | |
10116 | (set_attr "atom_unit" "sishuf") | |
10117 | (set_attr "prefix_data16" "1,*") | |
10118 | (set_attr "prefix" "orig,vex") | |
8a0436cb JJ |
10119 | (set_attr "mode" "<sseinsnmode>")]) |
10120 | ||
6ead0238 AI |
10121 | (define_insn "<avx512>_<rotate>v<mode><mask_name>" |
10122 | [(set (match_operand:VI48_AVX512VL 0 "register_operand" "=v") | |
10123 | (any_rotate:VI48_AVX512VL | |
10124 | (match_operand:VI48_AVX512VL 1 "register_operand" "v") | |
10125 | (match_operand:VI48_AVX512VL 2 "nonimmediate_operand" "vm")))] | |
0fe65b75 | 10126 | "TARGET_AVX512F" |
47490470 | 10127 | "vp<rotate>v<ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}" |
0fe65b75 AI |
10128 | [(set_attr "prefix" "evex") |
10129 | (set_attr "mode" "<sseinsnmode>")]) | |
10130 | ||
6ead0238 AI |
10131 | (define_insn "<avx512>_<rotate><mode><mask_name>" |
10132 | [(set (match_operand:VI48_AVX512VL 0 "register_operand" "=v") | |
10133 | (any_rotate:VI48_AVX512VL | |
10134 | (match_operand:VI48_AVX512VL 1 "nonimmediate_operand" "vm") | |
0fe65b75 AI |
10135 | (match_operand:SI 2 "const_0_to_255_operand")))] |
10136 | "TARGET_AVX512F" | |
47490470 | 10137 | "vp<rotate><ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}" |
0fe65b75 AI |
10138 | [(set_attr "prefix" "evex") |
10139 | (set_attr "mode" "<sseinsnmode>")]) | |
52325f2c | 10140 | |
575d952c AI |
10141 | (define_expand "<code><mode>3" |
10142 | [(set (match_operand:VI124_256_AVX512F_AVX512BW 0 "register_operand") | |
10143 | (maxmin:VI124_256_AVX512F_AVX512BW | |
10144 | (match_operand:VI124_256_AVX512F_AVX512BW 1 "nonimmediate_operand") | |
10145 | (match_operand:VI124_256_AVX512F_AVX512BW 2 "nonimmediate_operand")))] | |
10146 | "TARGET_AVX2" | |
8a0436cb JJ |
10147 | "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);") |
10148 | ||
575d952c AI |
10149 | (define_insn "*avx2_<code><mode>3" |
10150 | [(set (match_operand:VI124_256 0 "register_operand" "=v") | |
10151 | (maxmin:VI124_256 | |
10152 | (match_operand:VI124_256 1 "nonimmediate_operand" "%v") | |
10153 | (match_operand:VI124_256 2 "nonimmediate_operand" "vm")))] | |
10154 | "TARGET_AVX2 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)" | |
10155 | "vp<maxmin_int><ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}" | |
8a0436cb JJ |
10156 | [(set_attr "type" "sseiadd") |
10157 | (set_attr "prefix_extra" "1") | |
575d952c | 10158 | (set_attr "prefix" "vex") |
8a0436cb | 10159 | (set_attr "mode" "OI")]) |
ef719a44 | 10160 | |
575d952c AI |
10161 | (define_expand "<code><mode>3_mask" |
10162 | [(set (match_operand:VI48_AVX512VL 0 "register_operand") | |
10163 | (vec_merge:VI48_AVX512VL | |
10164 | (maxmin:VI48_AVX512VL | |
10165 | (match_operand:VI48_AVX512VL 1 "nonimmediate_operand") | |
10166 | (match_operand:VI48_AVX512VL 2 "nonimmediate_operand")) | |
10167 | (match_operand:VI48_AVX512VL 3 "vector_move_operand") | |
10168 | (match_operand:<avx512fmaskmode> 4 "register_operand")))] | |
10169 | "TARGET_AVX512F" | |
10170 | "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);") | |
10171 | ||
10172 | (define_insn "*avx512bw_<code><mode>3<mask_name>" | |
10173 | [(set (match_operand:VI48_AVX512VL 0 "register_operand" "=v") | |
10174 | (maxmin:VI48_AVX512VL | |
10175 | (match_operand:VI48_AVX512VL 1 "nonimmediate_operand" "%v") | |
10176 | (match_operand:VI48_AVX512VL 2 "nonimmediate_operand" "vm")))] | |
10177 | "TARGET_AVX512F && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)" | |
10178 | "vp<maxmin_int><ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}" | |
10179 | [(set_attr "type" "sseiadd") | |
10180 | (set_attr "prefix_extra" "1") | |
10181 | (set_attr "prefix" "maybe_evex") | |
10182 | (set_attr "mode" "<sseinsnmode>")]) | |
10183 | ||
e7b533a2 | 10184 | (define_insn "<mask_codefor><code><mode>3<mask_name>" |
575d952c AI |
10185 | [(set (match_operand:VI12_AVX512VL 0 "register_operand" "=v") |
10186 | (maxmin:VI12_AVX512VL | |
10187 | (match_operand:VI12_AVX512VL 1 "register_operand" "v") | |
10188 | (match_operand:VI12_AVX512VL 2 "nonimmediate_operand" "vm")))] | |
10189 | "TARGET_AVX512BW" | |
e7b533a2 AI |
10190 | "vp<maxmin_int><ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}" |
10191 | [(set_attr "type" "sseiadd") | |
10192 | (set_attr "prefix" "evex") | |
10193 | (set_attr "mode" "<sseinsnmode>")]) | |
10194 | ||
32469ccc | 10195 | (define_expand "<code><mode>3" |
575d952c AI |
10196 | [(set (match_operand:VI8_AVX2_AVX512BW 0 "register_operand") |
10197 | (maxmin:VI8_AVX2_AVX512BW | |
10198 | (match_operand:VI8_AVX2_AVX512BW 1 "register_operand") | |
10199 | (match_operand:VI8_AVX2_AVX512BW 2 "register_operand")))] | |
52325f2c UB |
10200 | "TARGET_SSE4_2" |
10201 | { | |
575d952c AI |
10202 | if (TARGET_AVX512F |
10203 | && (<MODE>mode == V8DImode || TARGET_AVX512VL)) | |
10204 | ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands); | |
10205 | else | |
10206 | { | |
10207 | enum rtx_code code; | |
10208 | rtx xops[6]; | |
10209 | bool ok; | |
977e83a3 | 10210 | |
52325f2c | 10211 | |
575d952c | 10212 | xops[0] = operands[0]; |
52325f2c | 10213 | |
575d952c AI |
10214 | if (<CODE> == SMAX || <CODE> == UMAX) |
10215 | { | |
10216 | xops[1] = operands[1]; | |
10217 | xops[2] = operands[2]; | |
10218 | } | |
10219 | else | |
10220 | { | |
10221 | xops[1] = operands[2]; | |
10222 | xops[2] = operands[1]; | |
10223 | } | |
52325f2c | 10224 | |
575d952c | 10225 | code = (<CODE> == UMAX || <CODE> == UMIN) ? GTU : GT; |
52325f2c | 10226 | |
575d952c AI |
10227 | xops[3] = gen_rtx_fmt_ee (code, VOIDmode, operands[1], operands[2]); |
10228 | xops[4] = operands[1]; | |
10229 | xops[5] = operands[2]; | |
10230 | ||
10231 | ok = ix86_expand_int_vcond (xops); | |
10232 | gcc_assert (ok); | |
10233 | DONE; | |
10234 | } | |
52325f2c UB |
10235 | }) |
10236 | ||
10237 | (define_expand "<code><mode>3" | |
82e86dc6 | 10238 | [(set (match_operand:VI124_128 0 "register_operand") |
f327a48e | 10239 | (smaxmin:VI124_128 |
82e86dc6 UB |
10240 | (match_operand:VI124_128 1 "nonimmediate_operand") |
10241 | (match_operand:VI124_128 2 "nonimmediate_operand")))] | |
52325f2c UB |
10242 | "TARGET_SSE2" |
10243 | { | |
10244 | if (TARGET_SSE4_1 || <MODE>mode == V8HImode) | |
10245 | ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands); | |
10246 | else | |
10247 | { | |
10248 | rtx xops[6]; | |
10249 | bool ok; | |
10250 | ||
10251 | xops[0] = operands[0]; | |
bdbebb7d JJ |
10252 | operands[1] = force_reg (<MODE>mode, operands[1]); |
10253 | operands[2] = force_reg (<MODE>mode, operands[2]); | |
52325f2c UB |
10254 | |
10255 | if (<CODE> == SMAX) | |
10256 | { | |
10257 | xops[1] = operands[1]; | |
10258 | xops[2] = operands[2]; | |
10259 | } | |
10260 | else | |
10261 | { | |
10262 | xops[1] = operands[2]; | |
10263 | xops[2] = operands[1]; | |
10264 | } | |
10265 | ||
10266 | xops[3] = gen_rtx_GT (VOIDmode, operands[1], operands[2]); | |
10267 | xops[4] = operands[1]; | |
10268 | xops[5] = operands[2]; | |
10269 | ||
10270 | ok = ix86_expand_int_vcond (xops); | |
10271 | gcc_assert (ok); | |
10272 | DONE; | |
10273 | } | |
10274 | }) | |
977e83a3 | 10275 | |
c305ca7f | 10276 | (define_insn "*sse4_1_<code><mode>3<mask_name>" |
45392c76 | 10277 | [(set (match_operand:VI14_128 0 "register_operand" "=Yr,*x,v") |
798dd0ba | 10278 | (smaxmin:VI14_128 |
45392c76 IE |
10279 | (match_operand:VI14_128 1 "nonimmediate_operand" "%0,0,v") |
10280 | (match_operand:VI14_128 2 "nonimmediate_operand" "Yrm,*xm,vm")))] | |
c305ca7f AI |
10281 | "TARGET_SSE4_1 |
10282 | && <mask_mode512bit_condition> | |
10283 | && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)" | |
798dd0ba | 10284 | "@ |
45392c76 | 10285 | p<maxmin_int><ssemodesuffix>\t{%2, %0|%0, %2} |
cbb734aa | 10286 | p<maxmin_int><ssemodesuffix>\t{%2, %0|%0, %2} |
c305ca7f | 10287 | vp<maxmin_int><ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}" |
45392c76 | 10288 | [(set_attr "isa" "noavx,noavx,avx") |
798dd0ba | 10289 | (set_attr "type" "sseiadd") |
45392c76 IE |
10290 | (set_attr "prefix_extra" "1,1,*") |
10291 | (set_attr "prefix" "orig,orig,vex") | |
4150f926 UB |
10292 | (set_attr "mode" "TI")]) |
10293 | ||
78e8956b | 10294 | (define_insn "*<code>v8hi3" |
798dd0ba | 10295 | [(set (match_operand:V8HI 0 "register_operand" "=x,x") |
78e8956b | 10296 | (smaxmin:V8HI |
798dd0ba UB |
10297 | (match_operand:V8HI 1 "nonimmediate_operand" "%0,x") |
10298 | (match_operand:V8HI 2 "nonimmediate_operand" "xm,xm")))] | |
78e8956b | 10299 | "TARGET_SSE2 && ix86_binary_operator_ok (<CODE>, V8HImode, operands)" |
798dd0ba UB |
10300 | "@ |
10301 | p<maxmin_int>w\t{%2, %0|%0, %2} | |
10302 | vp<maxmin_int>w\t{%2, %1, %0|%0, %1, %2}" | |
10303 | [(set_attr "isa" "noavx,avx") | |
10304 | (set_attr "type" "sseiadd") | |
10305 | (set_attr "prefix_data16" "1,*") | |
10306 | (set_attr "prefix_extra" "*,1") | |
10307 | (set_attr "prefix" "orig,vex") | |
ef719a44 RH |
10308 | (set_attr "mode" "TI")]) |
10309 | ||
32469ccc | 10310 | (define_expand "<code><mode>3" |
82e86dc6 | 10311 | [(set (match_operand:VI124_128 0 "register_operand") |
f327a48e | 10312 | (umaxmin:VI124_128 |
82e86dc6 UB |
10313 | (match_operand:VI124_128 1 "nonimmediate_operand") |
10314 | (match_operand:VI124_128 2 "nonimmediate_operand")))] | |
9fb93f89 RH |
10315 | "TARGET_SSE2" |
10316 | { | |
52325f2c | 10317 | if (TARGET_SSE4_1 || <MODE>mode == V16QImode) |
32469ccc | 10318 | ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands); |
52325f2c UB |
10319 | else if (<CODE> == UMAX && <MODE>mode == V8HImode) |
10320 | { | |
10321 | rtx op0 = operands[0], op2 = operands[2], op3 = op0; | |
bdbebb7d | 10322 | operands[1] = force_reg (<MODE>mode, operands[1]); |
52325f2c UB |
10323 | if (rtx_equal_p (op3, op2)) |
10324 | op3 = gen_reg_rtx (V8HImode); | |
10325 | emit_insn (gen_sse2_ussubv8hi3 (op3, operands[1], op2)); | |
10326 | emit_insn (gen_addv8hi3 (op0, op3, op2)); | |
10327 | DONE; | |
10328 | } | |
9a5cee02 | 10329 | else |
a427621f UB |
10330 | { |
10331 | rtx xops[6]; | |
10332 | bool ok; | |
10333 | ||
bdbebb7d JJ |
10334 | operands[1] = force_reg (<MODE>mode, operands[1]); |
10335 | operands[2] = force_reg (<MODE>mode, operands[2]); | |
10336 | ||
a427621f | 10337 | xops[0] = operands[0]; |
52325f2c UB |
10338 | |
10339 | if (<CODE> == UMAX) | |
10340 | { | |
10341 | xops[1] = operands[1]; | |
10342 | xops[2] = operands[2]; | |
10343 | } | |
10344 | else | |
10345 | { | |
10346 | xops[1] = operands[2]; | |
10347 | xops[2] = operands[1]; | |
10348 | } | |
10349 | ||
10350 | xops[3] = gen_rtx_GTU (VOIDmode, operands[1], operands[2]); | |
a427621f UB |
10351 | xops[4] = operands[1]; |
10352 | xops[5] = operands[2]; | |
52325f2c | 10353 | |
a427621f UB |
10354 | ok = ix86_expand_int_vcond (xops); |
10355 | gcc_assert (ok); | |
10356 | DONE; | |
10357 | } | |
69a2964c RH |
10358 | }) |
10359 | ||
c305ca7f | 10360 | (define_insn "*sse4_1_<code><mode>3<mask_name>" |
45392c76 | 10361 | [(set (match_operand:VI24_128 0 "register_operand" "=Yr,*x,v") |
798dd0ba | 10362 | (umaxmin:VI24_128 |
45392c76 IE |
10363 | (match_operand:VI24_128 1 "nonimmediate_operand" "%0,0,v") |
10364 | (match_operand:VI24_128 2 "nonimmediate_operand" "Yrm,*xm,vm")))] | |
c305ca7f AI |
10365 | "TARGET_SSE4_1 |
10366 | && <mask_mode512bit_condition> | |
10367 | && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)" | |
798dd0ba | 10368 | "@ |
45392c76 | 10369 | p<maxmin_int><ssemodesuffix>\t{%2, %0|%0, %2} |
cbb734aa | 10370 | p<maxmin_int><ssemodesuffix>\t{%2, %0|%0, %2} |
c305ca7f | 10371 | vp<maxmin_int><ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}" |
45392c76 | 10372 | [(set_attr "isa" "noavx,noavx,avx") |
798dd0ba | 10373 | (set_attr "type" "sseiadd") |
45392c76 IE |
10374 | (set_attr "prefix_extra" "1,1,*") |
10375 | (set_attr "prefix" "orig,orig,vex") | |
798dd0ba UB |
10376 | (set_attr "mode" "TI")]) |
10377 | ||
10378 | (define_insn "*<code>v16qi3" | |
10379 | [(set (match_operand:V16QI 0 "register_operand" "=x,x") | |
10380 | (umaxmin:V16QI | |
10381 | (match_operand:V16QI 1 "nonimmediate_operand" "%0,x") | |
10382 | (match_operand:V16QI 2 "nonimmediate_operand" "xm,xm")))] | |
10383 | "TARGET_SSE2 && ix86_binary_operator_ok (<CODE>, V16QImode, operands)" | |
10384 | "@ | |
10385 | p<maxmin_int>b\t{%2, %0|%0, %2} | |
10386 | vp<maxmin_int>b\t{%2, %1, %0|%0, %1, %2}" | |
10387 | [(set_attr "isa" "noavx,avx") | |
10388 | (set_attr "type" "sseiadd") | |
10389 | (set_attr "prefix_data16" "1,*") | |
10390 | (set_attr "prefix_extra" "*,1") | |
10391 | (set_attr "prefix" "orig,vex") | |
10392 | (set_attr "mode" "TI")]) | |
10393 | ||
ef719a44 RH |
10394 | ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; |
10395 | ;; | |
10396 | ;; Parallel integral comparisons | |
10397 | ;; | |
10398 | ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; | |
10399 | ||
977e83a3 | 10400 | (define_expand "avx2_eq<mode>3" |
82e86dc6 | 10401 | [(set (match_operand:VI_256 0 "register_operand") |
b5344bf4 | 10402 | (eq:VI_256 |
82e86dc6 UB |
10403 | (match_operand:VI_256 1 "nonimmediate_operand") |
10404 | (match_operand:VI_256 2 "nonimmediate_operand")))] | |
977e83a3 KY |
10405 | "TARGET_AVX2" |
10406 | "ix86_fixup_binary_operands_no_copy (EQ, <MODE>mode, operands);") | |
10407 | ||
10408 | (define_insn "*avx2_eq<mode>3" | |
b5344bf4 UB |
10409 | [(set (match_operand:VI_256 0 "register_operand" "=x") |
10410 | (eq:VI_256 | |
10411 | (match_operand:VI_256 1 "nonimmediate_operand" "%x") | |
10412 | (match_operand:VI_256 2 "nonimmediate_operand" "xm")))] | |
977e83a3 KY |
10413 | "TARGET_AVX2 && ix86_binary_operator_ok (EQ, <MODE>mode, operands)" |
10414 | "vpcmpeq<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}" | |
10415 | [(set_attr "type" "ssecmp") | |
10416 | (set_attr "prefix_extra" "1") | |
10417 | (set_attr "prefix" "vex") | |
10418 | (set_attr "mode" "OI")]) | |
10419 | ||
54967fb0 AI |
10420 | (define_expand "<avx512>_eq<mode>3<mask_scalar_merge_name>" |
10421 | [(set (match_operand:<avx512fmaskmode> 0 "register_operand") | |
10422 | (unspec:<avx512fmaskmode> | |
10423 | [(match_operand:VI12_AVX512VL 1 "register_operand") | |
10424 | (match_operand:VI12_AVX512VL 2 "nonimmediate_operand")] | |
10425 | UNSPEC_MASKED_EQ))] | |
10426 | "TARGET_AVX512BW" | |
10427 | "ix86_fixup_binary_operands_no_copy (EQ, <MODE>mode, operands);") | |
10428 | ||
10429 | (define_expand "<avx512>_eq<mode>3<mask_scalar_merge_name>" | |
0fe65b75 AI |
10430 | [(set (match_operand:<avx512fmaskmode> 0 "register_operand") |
10431 | (unspec:<avx512fmaskmode> | |
54967fb0 AI |
10432 | [(match_operand:VI48_AVX512VL 1 "register_operand") |
10433 | (match_operand:VI48_AVX512VL 2 "nonimmediate_operand")] | |
0fe65b75 AI |
10434 | UNSPEC_MASKED_EQ))] |
10435 | "TARGET_AVX512F" | |
10436 | "ix86_fixup_binary_operands_no_copy (EQ, <MODE>mode, operands);") | |
10437 | ||
54967fb0 | 10438 | (define_insn "<avx512>_eq<mode>3<mask_scalar_merge_name>_1" |
be792bce | 10439 | [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=Yk") |
0fe65b75 | 10440 | (unspec:<avx512fmaskmode> |
54967fb0 AI |
10441 | [(match_operand:VI12_AVX512VL 1 "register_operand" "%v") |
10442 | (match_operand:VI12_AVX512VL 2 "nonimmediate_operand" "vm")] | |
10443 | UNSPEC_MASKED_EQ))] | |
10444 | "TARGET_AVX512F && ix86_binary_operator_ok (EQ, <MODE>mode, operands)" | |
10445 | "vpcmpeq<ssemodesuffix>\t{%2, %1, %0<mask_scalar_merge_operand3>|%0<mask_scalar_merge_operand3>, %1, %2}" | |
10446 | [(set_attr "type" "ssecmp") | |
10447 | (set_attr "prefix_extra" "1") | |
10448 | (set_attr "prefix" "evex") | |
10449 | (set_attr "mode" "<sseinsnmode>")]) | |
10450 | ||
10451 | (define_insn "<avx512>_eq<mode>3<mask_scalar_merge_name>_1" | |
10452 | [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=Yk") | |
10453 | (unspec:<avx512fmaskmode> | |
10454 | [(match_operand:VI48_AVX512VL 1 "register_operand" "%v") | |
10455 | (match_operand:VI48_AVX512VL 2 "nonimmediate_operand" "vm")] | |
0fe65b75 AI |
10456 | UNSPEC_MASKED_EQ))] |
10457 | "TARGET_AVX512F && ix86_binary_operator_ok (EQ, <MODE>mode, operands)" | |
a95ec517 | 10458 | "vpcmpeq<ssemodesuffix>\t{%2, %1, %0<mask_scalar_merge_operand3>|%0<mask_scalar_merge_operand3>, %1, %2}" |
0fe65b75 AI |
10459 | [(set_attr "type" "ssecmp") |
10460 | (set_attr "prefix_extra" "1") | |
10461 | (set_attr "prefix" "evex") | |
10462 | (set_attr "mode" "<sseinsnmode>")]) | |
10463 | ||
798dd0ba | 10464 | (define_insn "*sse4_1_eqv2di3" |
45392c76 | 10465 | [(set (match_operand:V2DI 0 "register_operand" "=Yr,*x,x") |
798dd0ba | 10466 | (eq:V2DI |
45392c76 IE |
10467 | (match_operand:V2DI 1 "nonimmediate_operand" "%0,0,x") |
10468 | (match_operand:V2DI 2 "nonimmediate_operand" "Yrm,*xm,xm")))] | |
798dd0ba UB |
10469 | "TARGET_SSE4_1 && ix86_binary_operator_ok (EQ, V2DImode, operands)" |
10470 | "@ | |
45392c76 | 10471 | pcmpeqq\t{%2, %0|%0, %2} |
798dd0ba UB |
10472 | pcmpeqq\t{%2, %0|%0, %2} |
10473 | vpcmpeqq\t{%2, %1, %0|%0, %1, %2}" | |
45392c76 | 10474 | [(set_attr "isa" "noavx,noavx,avx") |
798dd0ba UB |
10475 | (set_attr "type" "ssecmp") |
10476 | (set_attr "prefix_extra" "1") | |
45392c76 | 10477 | (set_attr "prefix" "orig,orig,vex") |
95879c72 L |
10478 | (set_attr "mode" "TI")]) |
10479 | ||
ffbaf337 | 10480 | (define_insn "*sse2_eq<mode>3" |
798dd0ba UB |
10481 | [(set (match_operand:VI124_128 0 "register_operand" "=x,x") |
10482 | (eq:VI124_128 | |
10483 | (match_operand:VI124_128 1 "nonimmediate_operand" "%0,x") | |
10484 | (match_operand:VI124_128 2 "nonimmediate_operand" "xm,xm")))] | |
43a8b705 | 10485 | "TARGET_SSE2 && !TARGET_XOP |
04e1d06b | 10486 | && ix86_binary_operator_ok (EQ, <MODE>mode, operands)" |
798dd0ba | 10487 | "@ |
cbb734aa UB |
10488 | pcmpeq<ssemodesuffix>\t{%2, %0|%0, %2} |
10489 | vpcmpeq<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}" | |
798dd0ba UB |
10490 | [(set_attr "isa" "noavx,avx") |
10491 | (set_attr "type" "ssecmp") | |
10492 | (set_attr "prefix_data16" "1,*") | |
10493 | (set_attr "prefix" "orig,vex") | |
ef719a44 RH |
10494 | (set_attr "mode" "TI")]) |
10495 | ||
798dd0ba | 10496 | (define_expand "sse2_eq<mode>3" |
82e86dc6 | 10497 | [(set (match_operand:VI124_128 0 "register_operand") |
798dd0ba | 10498 | (eq:VI124_128 |
82e86dc6 UB |
10499 | (match_operand:VI124_128 1 "nonimmediate_operand") |
10500 | (match_operand:VI124_128 2 "nonimmediate_operand")))] | |
798dd0ba UB |
10501 | "TARGET_SSE2 && !TARGET_XOP " |
10502 | "ix86_fixup_binary_operands_no_copy (EQ, <MODE>mode, operands);") | |
10503 | ||
ffbaf337 | 10504 | (define_expand "sse4_1_eqv2di3" |
82e86dc6 | 10505 | [(set (match_operand:V2DI 0 "register_operand") |
ffbaf337 | 10506 | (eq:V2DI |
82e86dc6 UB |
10507 | (match_operand:V2DI 1 "nonimmediate_operand") |
10508 | (match_operand:V2DI 2 "nonimmediate_operand")))] | |
ffbaf337 UB |
10509 | "TARGET_SSE4_1" |
10510 | "ix86_fixup_binary_operands_no_copy (EQ, V2DImode, operands);") | |
10511 | ||
798dd0ba | 10512 | (define_insn "sse4_2_gtv2di3" |
45392c76 | 10513 | [(set (match_operand:V2DI 0 "register_operand" "=Yr,*x,x") |
798dd0ba | 10514 | (gt:V2DI |
45392c76 IE |
10515 | (match_operand:V2DI 1 "register_operand" "0,0,x") |
10516 | (match_operand:V2DI 2 "nonimmediate_operand" "Yrm,*xm,xm")))] | |
798dd0ba UB |
10517 | "TARGET_SSE4_2" |
10518 | "@ | |
45392c76 | 10519 | pcmpgtq\t{%2, %0|%0, %2} |
798dd0ba UB |
10520 | pcmpgtq\t{%2, %0|%0, %2} |
10521 | vpcmpgtq\t{%2, %1, %0|%0, %1, %2}" | |
45392c76 | 10522 | [(set_attr "isa" "noavx,noavx,avx") |
798dd0ba | 10523 | (set_attr "type" "ssecmp") |
9a5cee02 | 10524 | (set_attr "prefix_extra" "1") |
45392c76 | 10525 | (set_attr "prefix" "orig,orig,vex") |
95879c72 L |
10526 | (set_attr "mode" "TI")]) |
10527 | ||
977e83a3 | 10528 | (define_insn "avx2_gt<mode>3" |
b5344bf4 UB |
10529 | [(set (match_operand:VI_256 0 "register_operand" "=x") |
10530 | (gt:VI_256 | |
10531 | (match_operand:VI_256 1 "register_operand" "x") | |
10532 | (match_operand:VI_256 2 "nonimmediate_operand" "xm")))] | |
977e83a3 KY |
10533 | "TARGET_AVX2" |
10534 | "vpcmpgt<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}" | |
10535 | [(set_attr "type" "ssecmp") | |
10536 | (set_attr "prefix_extra" "1") | |
10537 | (set_attr "prefix" "vex") | |
10538 | (set_attr "mode" "OI")]) | |
10539 | ||
54967fb0 | 10540 | (define_insn "<avx512>_gt<mode>3<mask_scalar_merge_name>" |
be792bce | 10541 | [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=Yk") |
0fe65b75 | 10542 | (unspec:<avx512fmaskmode> |
54967fb0 AI |
10543 | [(match_operand:VI48_AVX512VL 1 "register_operand" "v") |
10544 | (match_operand:VI48_AVX512VL 2 "nonimmediate_operand" "vm")] UNSPEC_MASKED_GT))] | |
0fe65b75 | 10545 | "TARGET_AVX512F" |
a95ec517 | 10546 | "vpcmpgt<ssemodesuffix>\t{%2, %1, %0<mask_scalar_merge_operand3>|%0<mask_scalar_merge_operand3>, %1, %2}" |
0fe65b75 AI |
10547 | [(set_attr "type" "ssecmp") |
10548 | (set_attr "prefix_extra" "1") | |
10549 | (set_attr "prefix" "evex") | |
10550 | (set_attr "mode" "<sseinsnmode>")]) | |
10551 | ||
54967fb0 AI |
10552 | (define_insn "<avx512>_gt<mode>3<mask_scalar_merge_name>" |
10553 | [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=Yk") | |
10554 | (unspec:<avx512fmaskmode> | |
10555 | [(match_operand:VI12_AVX512VL 1 "register_operand" "v") | |
10556 | (match_operand:VI12_AVX512VL 2 "nonimmediate_operand" "vm")] UNSPEC_MASKED_GT))] | |
10557 | "TARGET_AVX512BW" | |
10558 | "vpcmpgt<ssemodesuffix>\t{%2, %1, %0<mask_scalar_merge_operand3>|%0<mask_scalar_merge_operand3>, %1, %2}" | |
10559 | [(set_attr "type" "ssecmp") | |
10560 | (set_attr "prefix_extra" "1") | |
10561 | (set_attr "prefix" "evex") | |
10562 | (set_attr "mode" "<sseinsnmode>")]) | |
10563 | ||
ef719a44 | 10564 | (define_insn "sse2_gt<mode>3" |
798dd0ba UB |
10565 | [(set (match_operand:VI124_128 0 "register_operand" "=x,x") |
10566 | (gt:VI124_128 | |
10567 | (match_operand:VI124_128 1 "register_operand" "0,x") | |
10568 | (match_operand:VI124_128 2 "nonimmediate_operand" "xm,xm")))] | |
43a8b705 | 10569 | "TARGET_SSE2 && !TARGET_XOP" |
798dd0ba | 10570 | "@ |
cbb734aa UB |
10571 | pcmpgt<ssemodesuffix>\t{%2, %0|%0, %2} |
10572 | vpcmpgt<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}" | |
798dd0ba UB |
10573 | [(set_attr "isa" "noavx,avx") |
10574 | (set_attr "type" "ssecmp") | |
10575 | (set_attr "prefix_data16" "1,*") | |
10576 | (set_attr "prefix" "orig,vex") | |
3b8dd071 L |
10577 | (set_attr "mode" "TI")]) |
10578 | ||
f62ce24f AI |
10579 | (define_expand "vcond<V_512:mode><VI_512:mode>" |
10580 | [(set (match_operand:V_512 0 "register_operand") | |
10581 | (if_then_else:V_512 | |
10582 | (match_operator 3 "" | |
10583 | [(match_operand:VI_512 4 "nonimmediate_operand") | |
10584 | (match_operand:VI_512 5 "general_operand")]) | |
10585 | (match_operand:V_512 1) | |
10586 | (match_operand:V_512 2)))] | |
10587 | "TARGET_AVX512F | |
10588 | && (GET_MODE_NUNITS (<V_512:MODE>mode) | |
10589 | == GET_MODE_NUNITS (<VI_512:MODE>mode))" | |
10590 | { | |
10591 | bool ok = ix86_expand_int_vcond (operands); | |
10592 | gcc_assert (ok); | |
10593 | DONE; | |
10594 | }) | |
10595 | ||
32469ccc | 10596 | (define_expand "vcond<V_256:mode><VI_256:mode>" |
82e86dc6 | 10597 | [(set (match_operand:V_256 0 "register_operand") |
32469ccc JJ |
10598 | (if_then_else:V_256 |
10599 | (match_operator 3 "" | |
82e86dc6 UB |
10600 | [(match_operand:VI_256 4 "nonimmediate_operand") |
10601 | (match_operand:VI_256 5 "general_operand")]) | |
10602 | (match_operand:V_256 1) | |
10603 | (match_operand:V_256 2)))] | |
32469ccc JJ |
10604 | "TARGET_AVX2 |
10605 | && (GET_MODE_NUNITS (<V_256:MODE>mode) | |
10606 | == GET_MODE_NUNITS (<VI_256:MODE>mode))" | |
10607 | { | |
10608 | bool ok = ix86_expand_int_vcond (operands); | |
10609 | gcc_assert (ok); | |
10610 | DONE; | |
10611 | }) | |
10612 | ||
e9e1d143 | 10613 | (define_expand "vcond<V_128:mode><VI124_128:mode>" |
82e86dc6 | 10614 | [(set (match_operand:V_128 0 "register_operand") |
e9e1d143 | 10615 | (if_then_else:V_128 |
977e83a3 | 10616 | (match_operator 3 "" |
82e86dc6 UB |
10617 | [(match_operand:VI124_128 4 "nonimmediate_operand") |
10618 | (match_operand:VI124_128 5 "general_operand")]) | |
10619 | (match_operand:V_128 1) | |
10620 | (match_operand:V_128 2)))] | |
e9e1d143 RG |
10621 | "TARGET_SSE2 |
10622 | && (GET_MODE_NUNITS (<V_128:MODE>mode) | |
10623 | == GET_MODE_NUNITS (<VI124_128:MODE>mode))" | |
ae46a07a | 10624 | { |
1262fd02 UB |
10625 | bool ok = ix86_expand_int_vcond (operands); |
10626 | gcc_assert (ok); | |
10627 | DONE; | |
ae46a07a RH |
10628 | }) |
10629 | ||
e9e1d143 | 10630 | (define_expand "vcond<VI8F_128:mode>v2di" |
82e86dc6 | 10631 | [(set (match_operand:VI8F_128 0 "register_operand") |
e9e1d143 | 10632 | (if_then_else:VI8F_128 |
977e83a3 | 10633 | (match_operator 3 "" |
82e86dc6 UB |
10634 | [(match_operand:V2DI 4 "nonimmediate_operand") |
10635 | (match_operand:V2DI 5 "general_operand")]) | |
10636 | (match_operand:VI8F_128 1) | |
10637 | (match_operand:VI8F_128 2)))] | |
798dd0ba UB |
10638 | "TARGET_SSE4_2" |
10639 | { | |
10640 | bool ok = ix86_expand_int_vcond (operands); | |
10641 | gcc_assert (ok); | |
10642 | DONE; | |
10643 | }) | |
10644 | ||
f62ce24f AI |
10645 | (define_expand "vcondu<V_512:mode><VI_512:mode>" |
10646 | [(set (match_operand:V_512 0 "register_operand") | |
10647 | (if_then_else:V_512 | |
10648 | (match_operator 3 "" | |
10649 | [(match_operand:VI_512 4 "nonimmediate_operand") | |
10650 | (match_operand:VI_512 5 "nonimmediate_operand")]) | |
10651 | (match_operand:V_512 1 "general_operand") | |
10652 | (match_operand:V_512 2 "general_operand")))] | |
10653 | "TARGET_AVX512F | |
10654 | && (GET_MODE_NUNITS (<V_512:MODE>mode) | |
10655 | == GET_MODE_NUNITS (<VI_512:MODE>mode))" | |
10656 | { | |
10657 | bool ok = ix86_expand_int_vcond (operands); | |
10658 | gcc_assert (ok); | |
10659 | DONE; | |
10660 | }) | |
10661 | ||
32469ccc | 10662 | (define_expand "vcondu<V_256:mode><VI_256:mode>" |
82e86dc6 | 10663 | [(set (match_operand:V_256 0 "register_operand") |
32469ccc JJ |
10664 | (if_then_else:V_256 |
10665 | (match_operator 3 "" | |
82e86dc6 UB |
10666 | [(match_operand:VI_256 4 "nonimmediate_operand") |
10667 | (match_operand:VI_256 5 "nonimmediate_operand")]) | |
10668 | (match_operand:V_256 1 "general_operand") | |
10669 | (match_operand:V_256 2 "general_operand")))] | |
32469ccc JJ |
10670 | "TARGET_AVX2 |
10671 | && (GET_MODE_NUNITS (<V_256:MODE>mode) | |
10672 | == GET_MODE_NUNITS (<VI_256:MODE>mode))" | |
10673 | { | |
10674 | bool ok = ix86_expand_int_vcond (operands); | |
10675 | gcc_assert (ok); | |
10676 | DONE; | |
10677 | }) | |
10678 | ||
e9e1d143 | 10679 | (define_expand "vcondu<V_128:mode><VI124_128:mode>" |
82e86dc6 | 10680 | [(set (match_operand:V_128 0 "register_operand") |
e9e1d143 | 10681 | (if_then_else:V_128 |
977e83a3 | 10682 | (match_operator 3 "" |
82e86dc6 UB |
10683 | [(match_operand:VI124_128 4 "nonimmediate_operand") |
10684 | (match_operand:VI124_128 5 "nonimmediate_operand")]) | |
10685 | (match_operand:V_128 1 "general_operand") | |
10686 | (match_operand:V_128 2 "general_operand")))] | |
e9e1d143 RG |
10687 | "TARGET_SSE2 |
10688 | && (GET_MODE_NUNITS (<V_128:MODE>mode) | |
10689 | == GET_MODE_NUNITS (<VI124_128:MODE>mode))" | |
ae46a07a | 10690 | { |
1262fd02 UB |
10691 | bool ok = ix86_expand_int_vcond (operands); |
10692 | gcc_assert (ok); | |
10693 | DONE; | |
ae46a07a RH |
10694 | }) |
10695 | ||
e9e1d143 | 10696 | (define_expand "vcondu<VI8F_128:mode>v2di" |
82e86dc6 | 10697 | [(set (match_operand:VI8F_128 0 "register_operand") |
e9e1d143 | 10698 | (if_then_else:VI8F_128 |
977e83a3 | 10699 | (match_operator 3 "" |
82e86dc6 UB |
10700 | [(match_operand:V2DI 4 "nonimmediate_operand") |
10701 | (match_operand:V2DI 5 "nonimmediate_operand")]) | |
10702 | (match_operand:VI8F_128 1 "general_operand") | |
10703 | (match_operand:VI8F_128 2 "general_operand")))] | |
798dd0ba UB |
10704 | "TARGET_SSE4_2" |
10705 | { | |
10706 | bool ok = ix86_expand_int_vcond (operands); | |
10707 | gcc_assert (ok); | |
10708 | DONE; | |
10709 | }) | |
10710 | ||
2205ed25 | 10711 | (define_mode_iterator VEC_PERM_AVX2 |
44167383 | 10712 | [V16QI V8HI V4SI V2DI V4SF V2DF |
0c7189ae | 10713 | (V32QI "TARGET_AVX2") (V16HI "TARGET_AVX2") |
44167383 | 10714 | (V8SI "TARGET_AVX2") (V4DI "TARGET_AVX2") |
c003c6d6 AI |
10715 | (V8SF "TARGET_AVX2") (V4DF "TARGET_AVX2") |
10716 | (V16SF "TARGET_AVX512F") (V8DF "TARGET_AVX512F") | |
f5db965f | 10717 | (V16SI "TARGET_AVX512F") (V8DI "TARGET_AVX512F") |
28adf6e7 | 10718 | (V32HI "TARGET_AVX512BW") (V64QI "TARGET_AVX512VBMI")]) |
44167383 | 10719 | |
2205ed25 | 10720 | (define_expand "vec_perm<mode>" |
82e86dc6 UB |
10721 | [(match_operand:VEC_PERM_AVX2 0 "register_operand") |
10722 | (match_operand:VEC_PERM_AVX2 1 "register_operand") | |
10723 | (match_operand:VEC_PERM_AVX2 2 "register_operand") | |
10724 | (match_operand:<sseintvecmode> 3 "register_operand")] | |
44167383 | 10725 | "TARGET_SSSE3 || TARGET_AVX || TARGET_XOP" |
f90e8e2e | 10726 | { |
2205ed25 | 10727 | ix86_expand_vec_perm (operands); |
f90e8e2e AS |
10728 | DONE; |
10729 | }) | |
10730 | ||
0772d476 RH |
10731 | (define_mode_iterator VEC_PERM_CONST |
10732 | [(V4SF "TARGET_SSE") (V4SI "TARGET_SSE") | |
10733 | (V2DF "TARGET_SSE") (V2DI "TARGET_SSE") | |
10734 | (V16QI "TARGET_SSE2") (V8HI "TARGET_SSE2") | |
10735 | (V8SF "TARGET_AVX") (V4DF "TARGET_AVX") | |
10736 | (V8SI "TARGET_AVX") (V4DI "TARGET_AVX") | |
c003c6d6 AI |
10737 | (V32QI "TARGET_AVX2") (V16HI "TARGET_AVX2") |
10738 | (V16SI "TARGET_AVX512F") (V8DI "TARGET_AVX512F") | |
f5db965f | 10739 | (V16SF "TARGET_AVX512F") (V8DF "TARGET_AVX512F") |
9f9f6115 | 10740 | (V32HI "TARGET_AVX512BW") (V64QI "TARGET_AVX512BW")]) |
0772d476 RH |
10741 | |
10742 | (define_expand "vec_perm_const<mode>" | |
82e86dc6 UB |
10743 | [(match_operand:VEC_PERM_CONST 0 "register_operand") |
10744 | (match_operand:VEC_PERM_CONST 1 "register_operand") | |
10745 | (match_operand:VEC_PERM_CONST 2 "register_operand") | |
10746 | (match_operand:<sseintvecmode> 3)] | |
0772d476 RH |
10747 | "" |
10748 | { | |
10749 | if (ix86_expand_vec_perm_const (operands)) | |
10750 | DONE; | |
10751 | else | |
10752 | FAIL; | |
10753 | }) | |
10754 | ||
ef719a44 RH |
10755 | ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; |
10756 | ;; | |
edc5bbcd | 10757 | ;; Parallel bitwise logical operations |
ef719a44 RH |
10758 | ;; |
10759 | ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; | |
10760 | ||
10761 | (define_expand "one_cmpl<mode>2" | |
82e86dc6 UB |
10762 | [(set (match_operand:VI 0 "register_operand") |
10763 | (xor:VI (match_operand:VI 1 "nonimmediate_operand") | |
d8700b1c UB |
10764 | (match_dup 2)))] |
10765 | "TARGET_SSE" | |
ef719a44 RH |
10766 | { |
10767 | int i, n = GET_MODE_NUNITS (<MODE>mode); | |
10768 | rtvec v = rtvec_alloc (n); | |
10769 | ||
10770 | for (i = 0; i < n; ++i) | |
10771 | RTVEC_ELT (v, i) = constm1_rtx; | |
10772 | ||
10773 | operands[2] = force_reg (<MODE>mode, gen_rtx_CONST_VECTOR (<MODE>mode, v)); | |
10774 | }) | |
10775 | ||
700e2919 | 10776 | (define_expand "<sse2_avx2>_andnot<mode>3" |
82e86dc6 | 10777 | [(set (match_operand:VI_AVX2 0 "register_operand") |
1707583b | 10778 | (and:VI_AVX2 |
82e86dc6 UB |
10779 | (not:VI_AVX2 (match_operand:VI_AVX2 1 "register_operand")) |
10780 | (match_operand:VI_AVX2 2 "nonimmediate_operand")))] | |
700e2919 | 10781 | "TARGET_SSE2") |
35f3782f | 10782 | |
700e2919 AI |
10783 | (define_expand "<sse2_avx2>_andnot<mode>3_mask" |
10784 | [(set (match_operand:VI48_AVX512VL 0 "register_operand") | |
10785 | (vec_merge:VI48_AVX512VL | |
10786 | (and:VI48_AVX512VL | |
10787 | (not:VI48_AVX512VL | |
10788 | (match_operand:VI48_AVX512VL 1 "register_operand")) | |
10789 | (match_operand:VI48_AVX512VL 2 "nonimmediate_operand")) | |
10790 | (match_operand:VI48_AVX512VL 3 "vector_move_operand") | |
10791 | (match_operand:<avx512fmaskmode> 4 "register_operand")))] | |
10792 | "TARGET_AVX512F") | |
10793 | ||
10794 | (define_expand "<sse2_avx2>_andnot<mode>3_mask" | |
10795 | [(set (match_operand:VI12_AVX512VL 0 "register_operand") | |
10796 | (vec_merge:VI12_AVX512VL | |
10797 | (and:VI12_AVX512VL | |
10798 | (not:VI12_AVX512VL | |
10799 | (match_operand:VI12_AVX512VL 1 "register_operand")) | |
10800 | (match_operand:VI12_AVX512VL 2 "nonimmediate_operand")) | |
10801 | (match_operand:VI12_AVX512VL 3 "vector_move_operand") | |
10802 | (match_operand:<avx512fmaskmode> 4 "register_operand")))] | |
10803 | "TARGET_AVX512BW") | |
10804 | ||
10805 | (define_insn "*andnot<mode>3" | |
3f97cb0b | 10806 | [(set (match_operand:VI 0 "register_operand" "=x,v") |
d8700b1c | 10807 | (and:VI |
3f97cb0b AI |
10808 | (not:VI (match_operand:VI 1 "register_operand" "0,v")) |
10809 | (match_operand:VI 2 "nonimmediate_operand" "xm,vm")))] | |
700e2919 | 10810 | "TARGET_SSE" |
d8700b1c | 10811 | { |
a9ccbba2 | 10812 | static char buf[64]; |
d8700b1c | 10813 | const char *ops; |
1707583b UB |
10814 | const char *tmp; |
10815 | ||
10816 | switch (get_attr_mode (insn)) | |
10817 | { | |
a9ccbba2 AI |
10818 | case MODE_XI: |
10819 | gcc_assert (TARGET_AVX512F); | |
1707583b | 10820 | case MODE_OI: |
26358fb6 | 10821 | gcc_assert (TARGET_AVX2 || TARGET_AVX512VL); |
1707583b | 10822 | case MODE_TI: |
26358fb6 AI |
10823 | gcc_assert (TARGET_SSE2 || TARGET_AVX512VL); |
10824 | switch (<MODE>mode) | |
10825 | { | |
10826 | case V16SImode: | |
10827 | case V8DImode: | |
10828 | if (TARGET_AVX512F) | |
10829 | { | |
10830 | tmp = "pandn<ssemodesuffix>"; | |
10831 | break; | |
10832 | } | |
10833 | case V8SImode: | |
10834 | case V4DImode: | |
10835 | case V4SImode: | |
10836 | case V2DImode: | |
10837 | if (TARGET_AVX512VL) | |
10838 | { | |
10839 | tmp = "pandn<ssemodesuffix>"; | |
10840 | break; | |
10841 | } | |
10842 | default: | |
10843 | tmp = TARGET_AVX512VL ? "pandnq" : "pandn"; | |
10844 | } | |
1707583b UB |
10845 | break; |
10846 | ||
8586e4bd UB |
10847 | case MODE_V16SF: |
10848 | gcc_assert (TARGET_AVX512F); | |
1707583b UB |
10849 | case MODE_V8SF: |
10850 | gcc_assert (TARGET_AVX); | |
10851 | case MODE_V4SF: | |
10852 | gcc_assert (TARGET_SSE); | |
10853 | ||
10854 | tmp = "andnps"; | |
10855 | break; | |
10856 | ||
10857 | default: | |
10858 | gcc_unreachable (); | |
10859 | } | |
95879c72 | 10860 | |
d8700b1c UB |
10861 | switch (which_alternative) |
10862 | { | |
10863 | case 0: | |
10864 | ops = "%s\t{%%2, %%0|%%0, %%2}"; | |
10865 | break; | |
10866 | case 1: | |
47490470 | 10867 | ops = "v%s\t{%%2, %%1, %%0<mask_operand3_1>|%%0<mask_operand3_1>, %%1, %%2}"; |
d8700b1c UB |
10868 | break; |
10869 | default: | |
10870 | gcc_unreachable (); | |
10871 | } | |
ef719a44 | 10872 | |
d8700b1c UB |
10873 | snprintf (buf, sizeof (buf), ops, tmp); |
10874 | return buf; | |
10875 | } | |
10876 | [(set_attr "isa" "noavx,avx") | |
10877 | (set_attr "type" "sselog") | |
10878 | (set (attr "prefix_data16") | |
10879 | (if_then_else | |
10880 | (and (eq_attr "alternative" "0") | |
10881 | (eq_attr "mode" "TI")) | |
10882 | (const_string "1") | |
10883 | (const_string "*"))) | |
700e2919 | 10884 | (set_attr "prefix" "orig,vex") |
d8700b1c | 10885 | (set (attr "mode") |
659c0e68 JM |
10886 | (cond [(and (match_test "<MODE_SIZE> == 16") |
10887 | (match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL")) | |
daa51295 UB |
10888 | (const_string "<ssePSmode>") |
10889 | (match_test "TARGET_AVX2") | |
10890 | (const_string "<sseinsnmode>") | |
10891 | (match_test "TARGET_AVX") | |
10892 | (if_then_else | |
039eee3f | 10893 | (match_test "<MODE_SIZE> > 16") |
daa51295 UB |
10894 | (const_string "V8SF") |
10895 | (const_string "<sseinsnmode>")) | |
10896 | (ior (not (match_test "TARGET_SSE2")) | |
10897 | (match_test "optimize_function_for_size_p (cfun)")) | |
10898 | (const_string "V4SF") | |
10899 | ] | |
10900 | (const_string "<sseinsnmode>")))]) | |
edc5bbcd | 10901 | |
700e2919 AI |
10902 | (define_insn "*andnot<mode>3_mask" |
10903 | [(set (match_operand:VI48_AVX512VL 0 "register_operand" "=v") | |
10904 | (vec_merge:VI48_AVX512VL | |
10905 | (and:VI48_AVX512VL | |
10906 | (not:VI48_AVX512VL | |
10907 | (match_operand:VI48_AVX512VL 1 "register_operand" "v")) | |
10908 | (match_operand:VI48_AVX512VL 2 "nonimmediate_operand" "vm")) | |
10909 | (match_operand:VI48_AVX512VL 3 "vector_move_operand" "0C") | |
10910 | (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")))] | |
10911 | "TARGET_AVX512F" | |
10912 | "vpandn<ssemodesuffix>\t{%2, %1, %0%{%4%}%N3|%0%{%4%}%N3, %1, %2}"; | |
10913 | [(set_attr "type" "sselog") | |
10914 | (set_attr "prefix" "evex") | |
10915 | (set_attr "mode" "<sseinsnmode>")]) | |
10916 | ||
10917 | (define_insn "*andnot<mode>3_mask" | |
10918 | [(set (match_operand:VI12_AVX512VL 0 "register_operand" "=v") | |
10919 | (vec_merge:VI12_AVX512VL | |
10920 | (and:VI12_AVX512VL | |
10921 | (not:VI12_AVX512VL | |
10922 | (match_operand:VI12_AVX512VL 1 "register_operand" "v")) | |
10923 | (match_operand:VI12_AVX512VL 2 "nonimmediate_operand" "vm")) | |
10924 | (match_operand:VI12_AVX512VL 3 "vector_move_operand" "0C") | |
10925 | (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")))] | |
10926 | "TARGET_AVX512BW" | |
10927 | "vpandn<ssemodesuffix>\t{%2, %1, %0%{%4%}%N3|%0%{%4%}%N3, %1, %2}"; | |
10928 | [(set_attr "type" "sselog") | |
10929 | (set_attr "prefix" "evex") | |
10930 | (set_attr "mode" "<sseinsnmode>")]) | |
10931 | ||
94237c92 | 10932 | (define_expand "<code><mode>3" |
82e86dc6 | 10933 | [(set (match_operand:VI 0 "register_operand") |
d8700b1c | 10934 | (any_logic:VI |
42bace41 JJ |
10935 | (match_operand:VI 1 "nonimmediate_or_const_vector_operand") |
10936 | (match_operand:VI 2 "nonimmediate_or_const_vector_operand")))] | |
35f3782f | 10937 | "TARGET_SSE" |
42bace41 JJ |
10938 | { |
10939 | ix86_expand_vector_logical_operator (<CODE>, <MODE>mode, operands); | |
10940 | DONE; | |
10941 | }) | |
ef719a44 | 10942 | |
47490470 | 10943 | (define_insn "<mask_codefor><code><mode>3<mask_name>" |
3f97cb0b | 10944 | [(set (match_operand:VI 0 "register_operand" "=x,v") |
d8700b1c | 10945 | (any_logic:VI |
3f97cb0b AI |
10946 | (match_operand:VI 1 "nonimmediate_operand" "%0,v") |
10947 | (match_operand:VI 2 "nonimmediate_operand" "xm,vm")))] | |
47490470 | 10948 | "TARGET_SSE && <mask_mode512bit_condition> |
94237c92 | 10949 | && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)" |
d8700b1c | 10950 | { |
a9ccbba2 | 10951 | static char buf[64]; |
d8700b1c | 10952 | const char *ops; |
1707583b UB |
10953 | const char *tmp; |
10954 | ||
10955 | switch (get_attr_mode (insn)) | |
10956 | { | |
a9ccbba2 AI |
10957 | case MODE_XI: |
10958 | gcc_assert (TARGET_AVX512F); | |
1707583b | 10959 | case MODE_OI: |
26358fb6 | 10960 | gcc_assert (TARGET_AVX2 || TARGET_AVX512VL); |
1707583b | 10961 | case MODE_TI: |
26358fb6 AI |
10962 | gcc_assert (TARGET_SSE2 || TARGET_AVX512VL); |
10963 | switch (<MODE>mode) | |
10964 | { | |
10965 | case V16SImode: | |
10966 | case V8DImode: | |
10967 | if (TARGET_AVX512F) | |
10968 | { | |
10969 | tmp = "p<logic><ssemodesuffix>"; | |
10970 | break; | |
10971 | } | |
10972 | case V8SImode: | |
10973 | case V4DImode: | |
10974 | case V4SImode: | |
10975 | case V2DImode: | |
10976 | if (TARGET_AVX512VL) | |
10977 | { | |
10978 | tmp = "p<logic><ssemodesuffix>"; | |
10979 | break; | |
10980 | } | |
10981 | default: | |
10982 | tmp = TARGET_AVX512VL ? "p<logic>q" : "p<logic>"; | |
10983 | } | |
1707583b UB |
10984 | break; |
10985 | ||
a9ccbba2 AI |
10986 | case MODE_V16SF: |
10987 | gcc_assert (TARGET_AVX512F); | |
1707583b UB |
10988 | case MODE_V8SF: |
10989 | gcc_assert (TARGET_AVX); | |
10990 | case MODE_V4SF: | |
10991 | gcc_assert (TARGET_SSE); | |
10992 | ||
10993 | tmp = "<logic>ps"; | |
10994 | break; | |
10995 | ||
10996 | default: | |
10997 | gcc_unreachable (); | |
10998 | } | |
35f3782f | 10999 | |
d8700b1c UB |
11000 | switch (which_alternative) |
11001 | { | |
11002 | case 0: | |
11003 | ops = "%s\t{%%2, %%0|%%0, %%2}"; | |
11004 | break; | |
11005 | case 1: | |
47490470 | 11006 | ops = "v%s\t{%%2, %%1, %%0<mask_operand3_1>|%%0<mask_operand3_1>, %%1, %%2}"; |
d8700b1c UB |
11007 | break; |
11008 | default: | |
11009 | gcc_unreachable (); | |
11010 | } | |
95879c72 | 11011 | |
d8700b1c UB |
11012 | snprintf (buf, sizeof (buf), ops, tmp); |
11013 | return buf; | |
11014 | } | |
11015 | [(set_attr "isa" "noavx,avx") | |
11016 | (set_attr "type" "sselog") | |
11017 | (set (attr "prefix_data16") | |
11018 | (if_then_else | |
11019 | (and (eq_attr "alternative" "0") | |
11020 | (eq_attr "mode" "TI")) | |
11021 | (const_string "1") | |
11022 | (const_string "*"))) | |
47490470 | 11023 | (set_attr "prefix" "<mask_prefix3>") |
d8700b1c | 11024 | (set (attr "mode") |
659c0e68 JM |
11025 | (cond [(and (match_test "<MODE_SIZE> == 16") |
11026 | (match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL")) | |
daa51295 UB |
11027 | (const_string "<ssePSmode>") |
11028 | (match_test "TARGET_AVX2") | |
11029 | (const_string "<sseinsnmode>") | |
11030 | (match_test "TARGET_AVX") | |
11031 | (if_then_else | |
039eee3f | 11032 | (match_test "<MODE_SIZE> > 16") |
daa51295 UB |
11033 | (const_string "V8SF") |
11034 | (const_string "<sseinsnmode>")) | |
11035 | (ior (not (match_test "TARGET_SSE2")) | |
11036 | (match_test "optimize_function_for_size_p (cfun)")) | |
11037 | (const_string "V4SF") | |
11038 | ] | |
11039 | (const_string "<sseinsnmode>")))]) | |
d8700b1c | 11040 | |
54967fb0 | 11041 | (define_insn "<avx512>_testm<mode>3<mask_scalar_merge_name>" |
be792bce | 11042 | [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=Yk") |
0fe65b75 | 11043 | (unspec:<avx512fmaskmode> |
54967fb0 AI |
11044 | [(match_operand:VI12_AVX512VL 1 "register_operand" "v") |
11045 | (match_operand:VI12_AVX512VL 2 "nonimmediate_operand" "vm")] | |
11046 | UNSPEC_TESTM))] | |
11047 | "TARGET_AVX512BW" | |
11048 | "vptestm<ssemodesuffix>\t{%2, %1, %0<mask_scalar_merge_operand3>|%0<mask_scalar_merge_operand3>, %1, %2}" | |
11049 | [(set_attr "prefix" "evex") | |
11050 | (set_attr "mode" "<sseinsnmode>")]) | |
11051 | ||
11052 | (define_insn "<avx512>_testm<mode>3<mask_scalar_merge_name>" | |
11053 | [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=Yk") | |
11054 | (unspec:<avx512fmaskmode> | |
11055 | [(match_operand:VI48_AVX512VL 1 "register_operand" "v") | |
11056 | (match_operand:VI48_AVX512VL 2 "nonimmediate_operand" "vm")] | |
0fe65b75 AI |
11057 | UNSPEC_TESTM))] |
11058 | "TARGET_AVX512F" | |
a95ec517 | 11059 | "vptestm<ssemodesuffix>\t{%2, %1, %0<mask_scalar_merge_operand3>|%0<mask_scalar_merge_operand3>, %1, %2}" |
0fe65b75 AI |
11060 | [(set_attr "prefix" "evex") |
11061 | (set_attr "mode" "<sseinsnmode>")]) | |
11062 | ||
54967fb0 AI |
11063 | (define_insn "<avx512>_testnm<mode>3<mask_scalar_merge_name>" |
11064 | [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=Yk") | |
11065 | (unspec:<avx512fmaskmode> | |
11066 | [(match_operand:VI12_AVX512VL 1 "register_operand" "v") | |
11067 | (match_operand:VI12_AVX512VL 2 "nonimmediate_operand" "vm")] | |
11068 | UNSPEC_TESTNM))] | |
11069 | "TARGET_AVX512BW" | |
11070 | "vptestnm<ssemodesuffix>\t{%2, %1, %0<mask_scalar_merge_operand3>|%0<mask_scalar_merge_operand3>, %1, %2}" | |
11071 | [(set_attr "prefix" "evex") | |
11072 | (set_attr "mode" "<sseinsnmode>")]) | |
11073 | ||
11074 | (define_insn "<avx512>_testnm<mode>3<mask_scalar_merge_name>" | |
be792bce | 11075 | [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=Yk") |
0fe65b75 | 11076 | (unspec:<avx512fmaskmode> |
54967fb0 AI |
11077 | [(match_operand:VI48_AVX512VL 1 "register_operand" "v") |
11078 | (match_operand:VI48_AVX512VL 2 "nonimmediate_operand" "vm")] | |
0fe65b75 | 11079 | UNSPEC_TESTNM))] |
260d3642 IT |
11080 | "TARGET_AVX512F" |
11081 | "vptestnm<ssemodesuffix>\t{%2, %1, %0<mask_scalar_merge_operand3>|%0<mask_scalar_merge_operand3>, %1, %2}" | |
0fe65b75 AI |
11082 | [(set_attr "prefix" "evex") |
11083 | (set_attr "mode" "<sseinsnmode>")]) | |
11084 | ||
ef719a44 RH |
11085 | ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; |
11086 | ;; | |
11087 | ;; Parallel integral element swizzling | |
11088 | ;; | |
11089 | ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; | |
11090 | ||
8dfb9f16 | 11091 | (define_expand "vec_pack_trunc_<mode>" |
82e86dc6 | 11092 | [(match_operand:<ssepackmode> 0 "register_operand") |
e8d08206 AI |
11093 | (match_operand:VI248_AVX2_8_AVX512F 1 "register_operand") |
11094 | (match_operand:VI248_AVX2_8_AVX512F 2 "register_operand")] | |
89d67cca DN |
11095 | "TARGET_SSE2" |
11096 | { | |
8dfb9f16 UB |
11097 | rtx op1 = gen_lowpart (<ssepackmode>mode, operands[1]); |
11098 | rtx op2 = gen_lowpart (<ssepackmode>mode, operands[2]); | |
0fac5151 | 11099 | ix86_expand_vec_extract_even_odd (operands[0], op1, op2, 0); |
89d67cca DN |
11100 | DONE; |
11101 | }) | |
11102 | ||
d281ef42 | 11103 | (define_insn "<sse2_avx2>_packsswb<mask_name>" |
f5db965f IT |
11104 | [(set (match_operand:VI1_AVX512 0 "register_operand" "=x,x") |
11105 | (vec_concat:VI1_AVX512 | |
977e83a3 | 11106 | (ss_truncate:<ssehalfvecmode> |
d281ef42 | 11107 | (match_operand:<sseunpackmode> 1 "register_operand" "0,v")) |
977e83a3 | 11108 | (ss_truncate:<ssehalfvecmode> |
d281ef42 AI |
11109 | (match_operand:<sseunpackmode> 2 "nonimmediate_operand" "xm,vm"))))] |
11110 | "TARGET_SSE2 && <mask_mode512bit_condition> && <mask_avx512bw_condition>" | |
1ee8b298 UB |
11111 | "@ |
11112 | packsswb\t{%2, %0|%0, %2} | |
d281ef42 | 11113 | vpacksswb\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}" |
1ee8b298 UB |
11114 | [(set_attr "isa" "noavx,avx") |
11115 | (set_attr "type" "sselog") | |
11116 | (set_attr "prefix_data16" "1,*") | |
d281ef42 | 11117 | (set_attr "prefix" "orig,maybe_evex") |
977e83a3 | 11118 | (set_attr "mode" "<sseinsnmode>")]) |
95879c72 | 11119 | |
ed3e611e AI |
11120 | (define_insn "<sse2_avx2>_packssdw<mask_name>" |
11121 | [(set (match_operand:VI2_AVX2 0 "register_operand" "=x,v") | |
977e83a3 KY |
11122 | (vec_concat:VI2_AVX2 |
11123 | (ss_truncate:<ssehalfvecmode> | |
ed3e611e | 11124 | (match_operand:<sseunpackmode> 1 "register_operand" "0,v")) |
977e83a3 | 11125 | (ss_truncate:<ssehalfvecmode> |
ed3e611e AI |
11126 | (match_operand:<sseunpackmode> 2 "nonimmediate_operand" "xm,vm"))))] |
11127 | "TARGET_SSE2 && <mask_mode512bit_condition> && <mask_avx512bw_condition>" | |
1ee8b298 UB |
11128 | "@ |
11129 | packssdw\t{%2, %0|%0, %2} | |
ed3e611e | 11130 | vpackssdw\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}" |
1ee8b298 UB |
11131 | [(set_attr "isa" "noavx,avx") |
11132 | (set_attr "type" "sselog") | |
11133 | (set_attr "prefix_data16" "1,*") | |
11134 | (set_attr "prefix" "orig,vex") | |
977e83a3 | 11135 | (set_attr "mode" "<sseinsnmode>")]) |
95879c72 | 11136 | |
d281ef42 | 11137 | (define_insn "<sse2_avx2>_packuswb<mask_name>" |
f5db965f IT |
11138 | [(set (match_operand:VI1_AVX512 0 "register_operand" "=x,x") |
11139 | (vec_concat:VI1_AVX512 | |
977e83a3 | 11140 | (us_truncate:<ssehalfvecmode> |
d281ef42 | 11141 | (match_operand:<sseunpackmode> 1 "register_operand" "0,v")) |
977e83a3 | 11142 | (us_truncate:<ssehalfvecmode> |
d281ef42 AI |
11143 | (match_operand:<sseunpackmode> 2 "nonimmediate_operand" "xm,vm"))))] |
11144 | "TARGET_SSE2 && <mask_mode512bit_condition> && <mask_avx512bw_condition>" | |
1ee8b298 UB |
11145 | "@ |
11146 | packuswb\t{%2, %0|%0, %2} | |
d281ef42 | 11147 | vpackuswb\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}" |
1ee8b298 UB |
11148 | [(set_attr "isa" "noavx,avx") |
11149 | (set_attr "type" "sselog") | |
11150 | (set_attr "prefix_data16" "1,*") | |
11151 | (set_attr "prefix" "orig,vex") | |
977e83a3 | 11152 | (set_attr "mode" "<sseinsnmode>")]) |
95879c72 | 11153 | |
6edf4f24 AI |
11154 | (define_insn "avx512bw_interleave_highv64qi<mask_name>" |
11155 | [(set (match_operand:V64QI 0 "register_operand" "=v") | |
11156 | (vec_select:V64QI | |
11157 | (vec_concat:V128QI | |
11158 | (match_operand:V64QI 1 "register_operand" "v") | |
11159 | (match_operand:V64QI 2 "nonimmediate_operand" "vm")) | |
11160 | (parallel [(const_int 8) (const_int 72) | |
11161 | (const_int 9) (const_int 73) | |
11162 | (const_int 10) (const_int 74) | |
11163 | (const_int 11) (const_int 75) | |
11164 | (const_int 12) (const_int 76) | |
11165 | (const_int 13) (const_int 77) | |
11166 | (const_int 14) (const_int 78) | |
11167 | (const_int 15) (const_int 79) | |
11168 | (const_int 24) (const_int 88) | |
11169 | (const_int 25) (const_int 89) | |
11170 | (const_int 26) (const_int 90) | |
11171 | (const_int 27) (const_int 91) | |
11172 | (const_int 28) (const_int 92) | |
11173 | (const_int 29) (const_int 93) | |
11174 | (const_int 30) (const_int 94) | |
11175 | (const_int 31) (const_int 95) | |
11176 | (const_int 40) (const_int 104) | |
11177 | (const_int 41) (const_int 105) | |
11178 | (const_int 42) (const_int 106) | |
11179 | (const_int 43) (const_int 107) | |
11180 | (const_int 44) (const_int 108) | |
11181 | (const_int 45) (const_int 109) | |
11182 | (const_int 46) (const_int 110) | |
11183 | (const_int 47) (const_int 111) | |
11184 | (const_int 56) (const_int 120) | |
11185 | (const_int 57) (const_int 121) | |
11186 | (const_int 58) (const_int 122) | |
11187 | (const_int 59) (const_int 123) | |
11188 | (const_int 60) (const_int 124) | |
11189 | (const_int 61) (const_int 125) | |
11190 | (const_int 62) (const_int 126) | |
11191 | (const_int 63) (const_int 127)])))] | |
11192 | "TARGET_AVX512BW" | |
11193 | "vpunpckhbw\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}" | |
11194 | [(set_attr "type" "sselog") | |
11195 | (set_attr "prefix" "evex") | |
11196 | (set_attr "mode" "XI")]) | |
11197 | ||
11198 | (define_insn "avx2_interleave_highv32qi<mask_name>" | |
11199 | [(set (match_operand:V32QI 0 "register_operand" "=v") | |
977e83a3 KY |
11200 | (vec_select:V32QI |
11201 | (vec_concat:V64QI | |
6edf4f24 AI |
11202 | (match_operand:V32QI 1 "register_operand" "v") |
11203 | (match_operand:V32QI 2 "nonimmediate_operand" "vm")) | |
977e83a3 KY |
11204 | (parallel [(const_int 8) (const_int 40) |
11205 | (const_int 9) (const_int 41) | |
11206 | (const_int 10) (const_int 42) | |
11207 | (const_int 11) (const_int 43) | |
11208 | (const_int 12) (const_int 44) | |
11209 | (const_int 13) (const_int 45) | |
11210 | (const_int 14) (const_int 46) | |
11211 | (const_int 15) (const_int 47) | |
11212 | (const_int 24) (const_int 56) | |
11213 | (const_int 25) (const_int 57) | |
11214 | (const_int 26) (const_int 58) | |
11215 | (const_int 27) (const_int 59) | |
11216 | (const_int 28) (const_int 60) | |
11217 | (const_int 29) (const_int 61) | |
11218 | (const_int 30) (const_int 62) | |
0c7189ae | 11219 | (const_int 31) (const_int 63)])))] |
6edf4f24 AI |
11220 | "TARGET_AVX2 && <mask_avx512vl_condition>" |
11221 | "vpunpckhbw\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}" | |
977e83a3 | 11222 | [(set_attr "type" "sselog") |
6edf4f24 | 11223 | (set_attr "prefix" "<mask_prefix>") |
977e83a3 KY |
11224 | (set_attr "mode" "OI")]) |
11225 | ||
6edf4f24 AI |
11226 | (define_insn "vec_interleave_highv16qi<mask_name>" |
11227 | [(set (match_operand:V16QI 0 "register_operand" "=x,v") | |
ef719a44 RH |
11228 | (vec_select:V16QI |
11229 | (vec_concat:V32QI | |
6edf4f24 AI |
11230 | (match_operand:V16QI 1 "register_operand" "0,v") |
11231 | (match_operand:V16QI 2 "nonimmediate_operand" "xm,vm")) | |
ef719a44 RH |
11232 | (parallel [(const_int 8) (const_int 24) |
11233 | (const_int 9) (const_int 25) | |
11234 | (const_int 10) (const_int 26) | |
11235 | (const_int 11) (const_int 27) | |
4f3f76e6 | 11236 | (const_int 12) (const_int 28) |
ef719a44 RH |
11237 | (const_int 13) (const_int 29) |
11238 | (const_int 14) (const_int 30) | |
11239 | (const_int 15) (const_int 31)])))] | |
6edf4f24 | 11240 | "TARGET_SSE2 && <mask_avx512vl_condition>" |
1ee8b298 UB |
11241 | "@ |
11242 | punpckhbw\t{%2, %0|%0, %2} | |
6edf4f24 | 11243 | vpunpckhbw\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}" |
1ee8b298 UB |
11244 | [(set_attr "isa" "noavx,avx") |
11245 | (set_attr "type" "sselog") | |
11246 | (set_attr "prefix_data16" "1,*") | |
6edf4f24 | 11247 | (set_attr "prefix" "orig,<mask_prefix>") |
95879c72 L |
11248 | (set_attr "mode" "TI")]) |
11249 | ||
6edf4f24 AI |
11250 | (define_insn "avx512bw_interleave_lowv64qi<mask_name>" |
11251 | [(set (match_operand:V64QI 0 "register_operand" "=v") | |
11252 | (vec_select:V64QI | |
11253 | (vec_concat:V128QI | |
11254 | (match_operand:V64QI 1 "register_operand" "v") | |
11255 | (match_operand:V64QI 2 "nonimmediate_operand" "vm")) | |
11256 | (parallel [(const_int 0) (const_int 64) | |
11257 | (const_int 1) (const_int 65) | |
11258 | (const_int 2) (const_int 66) | |
11259 | (const_int 3) (const_int 67) | |
11260 | (const_int 4) (const_int 68) | |
11261 | (const_int 5) (const_int 69) | |
11262 | (const_int 6) (const_int 70) | |
11263 | (const_int 7) (const_int 71) | |
11264 | (const_int 16) (const_int 80) | |
11265 | (const_int 17) (const_int 81) | |
11266 | (const_int 18) (const_int 82) | |
11267 | (const_int 19) (const_int 83) | |
11268 | (const_int 20) (const_int 84) | |
11269 | (const_int 21) (const_int 85) | |
11270 | (const_int 22) (const_int 86) | |
11271 | (const_int 23) (const_int 87) | |
11272 | (const_int 32) (const_int 96) | |
11273 | (const_int 33) (const_int 97) | |
11274 | (const_int 34) (const_int 98) | |
11275 | (const_int 35) (const_int 99) | |
11276 | (const_int 36) (const_int 100) | |
11277 | (const_int 37) (const_int 101) | |
11278 | (const_int 38) (const_int 102) | |
11279 | (const_int 39) (const_int 103) | |
11280 | (const_int 48) (const_int 112) | |
11281 | (const_int 49) (const_int 113) | |
11282 | (const_int 50) (const_int 114) | |
11283 | (const_int 51) (const_int 115) | |
11284 | (const_int 52) (const_int 116) | |
11285 | (const_int 53) (const_int 117) | |
11286 | (const_int 54) (const_int 118) | |
11287 | (const_int 55) (const_int 119)])))] | |
11288 | "TARGET_AVX512BW" | |
11289 | "vpunpcklbw\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}" | |
11290 | [(set_attr "type" "sselog") | |
11291 | (set_attr "prefix" "evex") | |
11292 | (set_attr "mode" "XI")]) | |
11293 | ||
11294 | (define_insn "avx2_interleave_lowv32qi<mask_name>" | |
11295 | [(set (match_operand:V32QI 0 "register_operand" "=v") | |
977e83a3 KY |
11296 | (vec_select:V32QI |
11297 | (vec_concat:V64QI | |
6edf4f24 AI |
11298 | (match_operand:V32QI 1 "register_operand" "v") |
11299 | (match_operand:V32QI 2 "nonimmediate_operand" "vm")) | |
977e83a3 KY |
11300 | (parallel [(const_int 0) (const_int 32) |
11301 | (const_int 1) (const_int 33) | |
11302 | (const_int 2) (const_int 34) | |
11303 | (const_int 3) (const_int 35) | |
11304 | (const_int 4) (const_int 36) | |
11305 | (const_int 5) (const_int 37) | |
11306 | (const_int 6) (const_int 38) | |
11307 | (const_int 7) (const_int 39) | |
977e83a3 KY |
11308 | (const_int 16) (const_int 48) |
11309 | (const_int 17) (const_int 49) | |
11310 | (const_int 18) (const_int 50) | |
11311 | (const_int 19) (const_int 51) | |
11312 | (const_int 20) (const_int 52) | |
11313 | (const_int 21) (const_int 53) | |
11314 | (const_int 22) (const_int 54) | |
11315 | (const_int 23) (const_int 55)])))] | |
6edf4f24 AI |
11316 | "TARGET_AVX2 && <mask_avx512vl_condition> && <mask_avx512bw_condition>" |
11317 | "vpunpcklbw\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}" | |
977e83a3 | 11318 | [(set_attr "type" "sselog") |
6edf4f24 | 11319 | (set_attr "prefix" "maybe_vex") |
977e83a3 KY |
11320 | (set_attr "mode" "OI")]) |
11321 | ||
6edf4f24 AI |
11322 | (define_insn "vec_interleave_lowv16qi<mask_name>" |
11323 | [(set (match_operand:V16QI 0 "register_operand" "=x,v") | |
ef719a44 RH |
11324 | (vec_select:V16QI |
11325 | (vec_concat:V32QI | |
6edf4f24 AI |
11326 | (match_operand:V16QI 1 "register_operand" "0,v") |
11327 | (match_operand:V16QI 2 "nonimmediate_operand" "xm,vm")) | |
ef719a44 RH |
11328 | (parallel [(const_int 0) (const_int 16) |
11329 | (const_int 1) (const_int 17) | |
11330 | (const_int 2) (const_int 18) | |
11331 | (const_int 3) (const_int 19) | |
11332 | (const_int 4) (const_int 20) | |
11333 | (const_int 5) (const_int 21) | |
11334 | (const_int 6) (const_int 22) | |
11335 | (const_int 7) (const_int 23)])))] | |
6edf4f24 | 11336 | "TARGET_SSE2 && <mask_avx512vl_condition> && <mask_avx512bw_condition>" |
1ee8b298 UB |
11337 | "@ |
11338 | punpcklbw\t{%2, %0|%0, %2} | |
6edf4f24 | 11339 | vpunpcklbw\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}" |
1ee8b298 UB |
11340 | [(set_attr "isa" "noavx,avx") |
11341 | (set_attr "type" "sselog") | |
11342 | (set_attr "prefix_data16" "1,*") | |
11343 | (set_attr "prefix" "orig,vex") | |
95879c72 L |
11344 | (set_attr "mode" "TI")]) |
11345 | ||
6edf4f24 AI |
11346 | (define_insn "avx512bw_interleave_highv32hi<mask_name>" |
11347 | [(set (match_operand:V32HI 0 "register_operand" "=v") | |
11348 | (vec_select:V32HI | |
11349 | (vec_concat:V64HI | |
11350 | (match_operand:V32HI 1 "register_operand" "v") | |
11351 | (match_operand:V32HI 2 "nonimmediate_operand" "vm")) | |
11352 | (parallel [(const_int 4) (const_int 36) | |
11353 | (const_int 5) (const_int 37) | |
11354 | (const_int 6) (const_int 38) | |
11355 | (const_int 7) (const_int 39) | |
11356 | (const_int 12) (const_int 44) | |
11357 | (const_int 13) (const_int 45) | |
11358 | (const_int 14) (const_int 46) | |
11359 | (const_int 15) (const_int 47) | |
11360 | (const_int 20) (const_int 52) | |
11361 | (const_int 21) (const_int 53) | |
11362 | (const_int 22) (const_int 54) | |
11363 | (const_int 23) (const_int 55) | |
11364 | (const_int 28) (const_int 60) | |
11365 | (const_int 29) (const_int 61) | |
11366 | (const_int 30) (const_int 62) | |
11367 | (const_int 31) (const_int 63)])))] | |
11368 | "TARGET_AVX512BW" | |
11369 | "vpunpckhwd\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}" | |
11370 | [(set_attr "type" "sselog") | |
11371 | (set_attr "prefix" "evex") | |
11372 | (set_attr "mode" "XI")]) | |
11373 | ||
11374 | (define_insn "avx2_interleave_highv16hi<mask_name>" | |
11375 | [(set (match_operand:V16HI 0 "register_operand" "=v") | |
977e83a3 KY |
11376 | (vec_select:V16HI |
11377 | (vec_concat:V32HI | |
6edf4f24 AI |
11378 | (match_operand:V16HI 1 "register_operand" "v") |
11379 | (match_operand:V16HI 2 "nonimmediate_operand" "vm")) | |
977e83a3 KY |
11380 | (parallel [(const_int 4) (const_int 20) |
11381 | (const_int 5) (const_int 21) | |
11382 | (const_int 6) (const_int 22) | |
11383 | (const_int 7) (const_int 23) | |
11384 | (const_int 12) (const_int 28) | |
11385 | (const_int 13) (const_int 29) | |
11386 | (const_int 14) (const_int 30) | |
11387 | (const_int 15) (const_int 31)])))] | |
6edf4f24 AI |
11388 | "TARGET_AVX2 && <mask_avx512vl_condition> && <mask_avx512bw_condition>" |
11389 | "vpunpckhwd\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}" | |
977e83a3 | 11390 | [(set_attr "type" "sselog") |
6edf4f24 | 11391 | (set_attr "prefix" "maybe_evex") |
977e83a3 KY |
11392 | (set_attr "mode" "OI")]) |
11393 | ||
6edf4f24 AI |
11394 | (define_insn "vec_interleave_highv8hi<mask_name>" |
11395 | [(set (match_operand:V8HI 0 "register_operand" "=x,v") | |
ef719a44 RH |
11396 | (vec_select:V8HI |
11397 | (vec_concat:V16HI | |
6edf4f24 AI |
11398 | (match_operand:V8HI 1 "register_operand" "0,v") |
11399 | (match_operand:V8HI 2 "nonimmediate_operand" "xm,vm")) | |
ef719a44 RH |
11400 | (parallel [(const_int 4) (const_int 12) |
11401 | (const_int 5) (const_int 13) | |
11402 | (const_int 6) (const_int 14) | |
11403 | (const_int 7) (const_int 15)])))] | |
6edf4f24 | 11404 | "TARGET_SSE2 && <mask_avx512vl_condition> && <mask_avx512bw_condition>" |
1ee8b298 UB |
11405 | "@ |
11406 | punpckhwd\t{%2, %0|%0, %2} | |
6edf4f24 | 11407 | vpunpckhwd\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}" |
1ee8b298 UB |
11408 | [(set_attr "isa" "noavx,avx") |
11409 | (set_attr "type" "sselog") | |
11410 | (set_attr "prefix_data16" "1,*") | |
6edf4f24 | 11411 | (set_attr "prefix" "orig,maybe_vex") |
95879c72 L |
11412 | (set_attr "mode" "TI")]) |
11413 | ||
6edf4f24 AI |
11414 | (define_insn "<mask_codefor>avx512bw_interleave_lowv32hi<mask_name>" |
11415 | [(set (match_operand:V32HI 0 "register_operand" "=v") | |
11416 | (vec_select:V32HI | |
11417 | (vec_concat:V64HI | |
11418 | (match_operand:V32HI 1 "register_operand" "v") | |
11419 | (match_operand:V32HI 2 "nonimmediate_operand" "vm")) | |
11420 | (parallel [(const_int 0) (const_int 32) | |
11421 | (const_int 1) (const_int 33) | |
11422 | (const_int 2) (const_int 34) | |
11423 | (const_int 3) (const_int 35) | |
11424 | (const_int 8) (const_int 40) | |
11425 | (const_int 9) (const_int 41) | |
11426 | (const_int 10) (const_int 42) | |
11427 | (const_int 11) (const_int 43) | |
11428 | (const_int 16) (const_int 48) | |
11429 | (const_int 17) (const_int 49) | |
11430 | (const_int 18) (const_int 50) | |
11431 | (const_int 19) (const_int 51) | |
11432 | (const_int 24) (const_int 56) | |
11433 | (const_int 25) (const_int 57) | |
11434 | (const_int 26) (const_int 58) | |
11435 | (const_int 27) (const_int 59)])))] | |
11436 | "TARGET_AVX512BW" | |
11437 | "vpunpcklwd\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}" | |
11438 | [(set_attr "type" "sselog") | |
11439 | (set_attr "prefix" "evex") | |
11440 | (set_attr "mode" "XI")]) | |
11441 | ||
11442 | (define_insn "avx2_interleave_lowv16hi<mask_name>" | |
11443 | [(set (match_operand:V16HI 0 "register_operand" "=v") | |
977e83a3 KY |
11444 | (vec_select:V16HI |
11445 | (vec_concat:V32HI | |
6edf4f24 AI |
11446 | (match_operand:V16HI 1 "register_operand" "v") |
11447 | (match_operand:V16HI 2 "nonimmediate_operand" "vm")) | |
977e83a3 KY |
11448 | (parallel [(const_int 0) (const_int 16) |
11449 | (const_int 1) (const_int 17) | |
11450 | (const_int 2) (const_int 18) | |
11451 | (const_int 3) (const_int 19) | |
11452 | (const_int 8) (const_int 24) | |
11453 | (const_int 9) (const_int 25) | |
11454 | (const_int 10) (const_int 26) | |
11455 | (const_int 11) (const_int 27)])))] | |
6edf4f24 AI |
11456 | "TARGET_AVX2 && <mask_avx512vl_condition> && <mask_avx512bw_condition>" |
11457 | "vpunpcklwd\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}" | |
977e83a3 | 11458 | [(set_attr "type" "sselog") |
6edf4f24 | 11459 | (set_attr "prefix" "maybe_evex") |
977e83a3 KY |
11460 | (set_attr "mode" "OI")]) |
11461 | ||
6edf4f24 AI |
11462 | (define_insn "vec_interleave_lowv8hi<mask_name>" |
11463 | [(set (match_operand:V8HI 0 "register_operand" "=x,v") | |
ef719a44 RH |
11464 | (vec_select:V8HI |
11465 | (vec_concat:V16HI | |
6edf4f24 AI |
11466 | (match_operand:V8HI 1 "register_operand" "0,v") |
11467 | (match_operand:V8HI 2 "nonimmediate_operand" "xm,vm")) | |
ef719a44 RH |
11468 | (parallel [(const_int 0) (const_int 8) |
11469 | (const_int 1) (const_int 9) | |
11470 | (const_int 2) (const_int 10) | |
11471 | (const_int 3) (const_int 11)])))] | |
6edf4f24 | 11472 | "TARGET_SSE2 && <mask_avx512vl_condition> && <mask_avx512bw_condition>" |
1ee8b298 UB |
11473 | "@ |
11474 | punpcklwd\t{%2, %0|%0, %2} | |
6edf4f24 | 11475 | vpunpcklwd\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}" |
1ee8b298 UB |
11476 | [(set_attr "isa" "noavx,avx") |
11477 | (set_attr "type" "sselog") | |
11478 | (set_attr "prefix_data16" "1,*") | |
6edf4f24 | 11479 | (set_attr "prefix" "orig,maybe_evex") |
95879c72 L |
11480 | (set_attr "mode" "TI")]) |
11481 | ||
6edf4f24 AI |
11482 | (define_insn "avx2_interleave_highv8si<mask_name>" |
11483 | [(set (match_operand:V8SI 0 "register_operand" "=v") | |
977e83a3 KY |
11484 | (vec_select:V8SI |
11485 | (vec_concat:V16SI | |
6edf4f24 AI |
11486 | (match_operand:V8SI 1 "register_operand" "v") |
11487 | (match_operand:V8SI 2 "nonimmediate_operand" "vm")) | |
977e83a3 KY |
11488 | (parallel [(const_int 2) (const_int 10) |
11489 | (const_int 3) (const_int 11) | |
11490 | (const_int 6) (const_int 14) | |
11491 | (const_int 7) (const_int 15)])))] | |
6edf4f24 AI |
11492 | "TARGET_AVX2 && <mask_avx512vl_condition>" |
11493 | "vpunpckhdq\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}" | |
977e83a3 | 11494 | [(set_attr "type" "sselog") |
6edf4f24 | 11495 | (set_attr "prefix" "maybe_evex") |
977e83a3 KY |
11496 | (set_attr "mode" "OI")]) |
11497 | ||
47490470 | 11498 | (define_insn "<mask_codefor>avx512f_interleave_highv16si<mask_name>" |
c003c6d6 AI |
11499 | [(set (match_operand:V16SI 0 "register_operand" "=v") |
11500 | (vec_select:V16SI | |
11501 | (vec_concat:V32SI | |
11502 | (match_operand:V16SI 1 "register_operand" "v") | |
11503 | (match_operand:V16SI 2 "nonimmediate_operand" "vm")) | |
11504 | (parallel [(const_int 2) (const_int 18) | |
11505 | (const_int 3) (const_int 19) | |
11506 | (const_int 6) (const_int 22) | |
11507 | (const_int 7) (const_int 23) | |
11508 | (const_int 10) (const_int 26) | |
11509 | (const_int 11) (const_int 27) | |
11510 | (const_int 14) (const_int 30) | |
11511 | (const_int 15) (const_int 31)])))] | |
11512 | "TARGET_AVX512F" | |
47490470 | 11513 | "vpunpckhdq\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}" |
c003c6d6 AI |
11514 | [(set_attr "type" "sselog") |
11515 | (set_attr "prefix" "evex") | |
11516 | (set_attr "mode" "XI")]) | |
11517 | ||
11518 | ||
6edf4f24 AI |
11519 | (define_insn "vec_interleave_highv4si<mask_name>" |
11520 | [(set (match_operand:V4SI 0 "register_operand" "=x,v") | |
ef719a44 RH |
11521 | (vec_select:V4SI |
11522 | (vec_concat:V8SI | |
6edf4f24 AI |
11523 | (match_operand:V4SI 1 "register_operand" "0,v") |
11524 | (match_operand:V4SI 2 "nonimmediate_operand" "xm,vm")) | |
ef719a44 RH |
11525 | (parallel [(const_int 2) (const_int 6) |
11526 | (const_int 3) (const_int 7)])))] | |
6edf4f24 | 11527 | "TARGET_SSE2 && <mask_avx512vl_condition>" |
1ee8b298 UB |
11528 | "@ |
11529 | punpckhdq\t{%2, %0|%0, %2} | |
6edf4f24 | 11530 | vpunpckhdq\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}" |
1ee8b298 UB |
11531 | [(set_attr "isa" "noavx,avx") |
11532 | (set_attr "type" "sselog") | |
11533 | (set_attr "prefix_data16" "1,*") | |
6edf4f24 | 11534 | (set_attr "prefix" "orig,maybe_vex") |
95879c72 L |
11535 | (set_attr "mode" "TI")]) |
11536 | ||
6edf4f24 AI |
11537 | (define_insn "avx2_interleave_lowv8si<mask_name>" |
11538 | [(set (match_operand:V8SI 0 "register_operand" "=v") | |
977e83a3 KY |
11539 | (vec_select:V8SI |
11540 | (vec_concat:V16SI | |
6edf4f24 AI |
11541 | (match_operand:V8SI 1 "register_operand" "v") |
11542 | (match_operand:V8SI 2 "nonimmediate_operand" "vm")) | |
977e83a3 KY |
11543 | (parallel [(const_int 0) (const_int 8) |
11544 | (const_int 1) (const_int 9) | |
11545 | (const_int 4) (const_int 12) | |
11546 | (const_int 5) (const_int 13)])))] | |
6edf4f24 AI |
11547 | "TARGET_AVX2 && <mask_avx512vl_condition>" |
11548 | "vpunpckldq\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}" | |
977e83a3 | 11549 | [(set_attr "type" "sselog") |
6edf4f24 | 11550 | (set_attr "prefix" "maybe_evex") |
977e83a3 KY |
11551 | (set_attr "mode" "OI")]) |
11552 | ||
47490470 | 11553 | (define_insn "<mask_codefor>avx512f_interleave_lowv16si<mask_name>" |
c003c6d6 AI |
11554 | [(set (match_operand:V16SI 0 "register_operand" "=v") |
11555 | (vec_select:V16SI | |
11556 | (vec_concat:V32SI | |
11557 | (match_operand:V16SI 1 "register_operand" "v") | |
11558 | (match_operand:V16SI 2 "nonimmediate_operand" "vm")) | |
11559 | (parallel [(const_int 0) (const_int 16) | |
11560 | (const_int 1) (const_int 17) | |
11561 | (const_int 4) (const_int 20) | |
11562 | (const_int 5) (const_int 21) | |
11563 | (const_int 8) (const_int 24) | |
11564 | (const_int 9) (const_int 25) | |
11565 | (const_int 12) (const_int 28) | |
11566 | (const_int 13) (const_int 29)])))] | |
11567 | "TARGET_AVX512F" | |
47490470 | 11568 | "vpunpckldq\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}" |
c003c6d6 AI |
11569 | [(set_attr "type" "sselog") |
11570 | (set_attr "prefix" "evex") | |
11571 | (set_attr "mode" "XI")]) | |
11572 | ||
6edf4f24 AI |
11573 | (define_insn "vec_interleave_lowv4si<mask_name>" |
11574 | [(set (match_operand:V4SI 0 "register_operand" "=x,v") | |
ef719a44 RH |
11575 | (vec_select:V4SI |
11576 | (vec_concat:V8SI | |
6edf4f24 AI |
11577 | (match_operand:V4SI 1 "register_operand" "0,v") |
11578 | (match_operand:V4SI 2 "nonimmediate_operand" "xm,vm")) | |
ef719a44 RH |
11579 | (parallel [(const_int 0) (const_int 4) |
11580 | (const_int 1) (const_int 5)])))] | |
6edf4f24 | 11581 | "TARGET_SSE2 && <mask_avx512vl_condition>" |
1ee8b298 UB |
11582 | "@ |
11583 | punpckldq\t{%2, %0|%0, %2} | |
6edf4f24 | 11584 | vpunpckldq\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}" |
1ee8b298 UB |
11585 | [(set_attr "isa" "noavx,avx") |
11586 | (set_attr "type" "sselog") | |
11587 | (set_attr "prefix_data16" "1,*") | |
11588 | (set_attr "prefix" "orig,vex") | |
95879c72 L |
11589 | (set_attr "mode" "TI")]) |
11590 | ||
2e2accf8 JJ |
11591 | (define_expand "vec_interleave_high<mode>" |
11592 | [(match_operand:VI_256 0 "register_operand" "=x") | |
11593 | (match_operand:VI_256 1 "register_operand" "x") | |
11594 | (match_operand:VI_256 2 "nonimmediate_operand" "xm")] | |
11595 | "TARGET_AVX2" | |
11596 | { | |
11597 | rtx t1 = gen_reg_rtx (<MODE>mode); | |
11598 | rtx t2 = gen_reg_rtx (<MODE>mode); | |
d8c84975 | 11599 | rtx t3 = gen_reg_rtx (V4DImode); |
2e2accf8 JJ |
11600 | emit_insn (gen_avx2_interleave_low<mode> (t1, operands[1], operands[2])); |
11601 | emit_insn (gen_avx2_interleave_high<mode> (t2, operands[1], operands[2])); | |
d8c84975 JJ |
11602 | emit_insn (gen_avx2_permv2ti (t3, gen_lowpart (V4DImode, t1), |
11603 | gen_lowpart (V4DImode, t2), | |
11604 | GEN_INT (1 + (3 << 4)))); | |
11605 | emit_move_insn (operands[0], gen_lowpart (<MODE>mode, t3)); | |
2e2accf8 JJ |
11606 | DONE; |
11607 | }) | |
11608 | ||
11609 | (define_expand "vec_interleave_low<mode>" | |
11610 | [(match_operand:VI_256 0 "register_operand" "=x") | |
11611 | (match_operand:VI_256 1 "register_operand" "x") | |
11612 | (match_operand:VI_256 2 "nonimmediate_operand" "xm")] | |
11613 | "TARGET_AVX2" | |
11614 | { | |
11615 | rtx t1 = gen_reg_rtx (<MODE>mode); | |
11616 | rtx t2 = gen_reg_rtx (<MODE>mode); | |
d8c84975 | 11617 | rtx t3 = gen_reg_rtx (V4DImode); |
2e2accf8 JJ |
11618 | emit_insn (gen_avx2_interleave_low<mode> (t1, operands[1], operands[2])); |
11619 | emit_insn (gen_avx2_interleave_high<mode> (t2, operands[1], operands[2])); | |
d8c84975 JJ |
11620 | emit_insn (gen_avx2_permv2ti (t3, gen_lowpart (V4DImode, t1), |
11621 | gen_lowpart (V4DImode, t2), | |
11622 | GEN_INT (0 + (2 << 4)))); | |
11623 | emit_move_insn (operands[0], gen_lowpart (<MODE>mode, t3)); | |
2e2accf8 JJ |
11624 | DONE; |
11625 | }) | |
11626 | ||
51e7f377 UB |
11627 | ;; Modes handled by pinsr patterns. |
11628 | (define_mode_iterator PINSR_MODE | |
11629 | [(V16QI "TARGET_SSE4_1") V8HI | |
11630 | (V4SI "TARGET_SSE4_1") | |
11631 | (V2DI "TARGET_SSE4_1 && TARGET_64BIT")]) | |
11632 | ||
11633 | (define_mode_attr sse2p4_1 | |
11634 | [(V16QI "sse4_1") (V8HI "sse2") | |
11635 | (V4SI "sse4_1") (V2DI "sse4_1")]) | |
11636 | ||
11637 | ;; sse4_1_pinsrd must come before sse2_loadld since it is preferred. | |
11638 | (define_insn "<sse2p4_1>_pinsr<ssemodesuffix>" | |
11639 | [(set (match_operand:PINSR_MODE 0 "register_operand" "=x,x,x,x") | |
11640 | (vec_merge:PINSR_MODE | |
11641 | (vec_duplicate:PINSR_MODE | |
11642 | (match_operand:<ssescalarmode> 2 "nonimmediate_operand" "r,m,r,m")) | |
11643 | (match_operand:PINSR_MODE 1 "register_operand" "0,0,x,x") | |
82e86dc6 | 11644 | (match_operand:SI 3 "const_int_operand")))] |
51e7f377 UB |
11645 | "TARGET_SSE2 |
11646 | && ((unsigned) exact_log2 (INTVAL (operands[3])) | |
11647 | < GET_MODE_NUNITS (<MODE>mode))" | |
ef719a44 RH |
11648 | { |
11649 | operands[3] = GEN_INT (exact_log2 (INTVAL (operands[3]))); | |
1ee8b298 UB |
11650 | |
11651 | switch (which_alternative) | |
11652 | { | |
11653 | case 0: | |
51e7f377 | 11654 | if (GET_MODE_SIZE (<ssescalarmode>mode) < GET_MODE_SIZE (SImode)) |
977e83a3 | 11655 | return "pinsr<ssemodesuffix>\t{%3, %k2, %0|%0, %k2, %3}"; |
51e7f377 | 11656 | /* FALLTHRU */ |
1ee8b298 | 11657 | case 1: |
51e7f377 | 11658 | return "pinsr<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}"; |
1ee8b298 | 11659 | case 2: |
51e7f377 | 11660 | if (GET_MODE_SIZE (<ssescalarmode>mode) < GET_MODE_SIZE (SImode)) |
977e83a3 | 11661 | return "vpinsr<ssemodesuffix>\t{%3, %k2, %1, %0|%0, %1, %k2, %3}"; |
51e7f377 | 11662 | /* FALLTHRU */ |
1ee8b298 | 11663 | case 3: |
51e7f377 | 11664 | return "vpinsr<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"; |
1ee8b298 UB |
11665 | default: |
11666 | gcc_unreachable (); | |
11667 | } | |
ef719a44 | 11668 | } |
1ee8b298 UB |
11669 | [(set_attr "isa" "noavx,noavx,avx,avx") |
11670 | (set_attr "type" "sselog") | |
51e7f377 UB |
11671 | (set (attr "prefix_rex") |
11672 | (if_then_else | |
67b2c493 | 11673 | (and (not (match_test "TARGET_AVX")) |
51e7f377 UB |
11674 | (eq (const_string "<MODE>mode") (const_string "V2DImode"))) |
11675 | (const_string "1") | |
11676 | (const_string "*"))) | |
11677 | (set (attr "prefix_data16") | |
11678 | (if_then_else | |
67b2c493 | 11679 | (and (not (match_test "TARGET_AVX")) |
51e7f377 UB |
11680 | (eq (const_string "<MODE>mode") (const_string "V8HImode"))) |
11681 | (const_string "1") | |
11682 | (const_string "*"))) | |
11683 | (set (attr "prefix_extra") | |
11684 | (if_then_else | |
67b2c493 | 11685 | (and (not (match_test "TARGET_AVX")) |
51e7f377 UB |
11686 | (eq (const_string "<MODE>mode") (const_string "V8HImode"))) |
11687 | (const_string "*") | |
11688 | (const_string "1"))) | |
725fd454 | 11689 | (set_attr "length_immediate" "1") |
1ee8b298 | 11690 | (set_attr "prefix" "orig,orig,vex,vex") |
ef719a44 RH |
11691 | (set_attr "mode" "TI")]) |
11692 | ||
d0337ddc AI |
11693 | (define_expand "<extract_type>_vinsert<shuffletype><extract_suf>_mask" |
11694 | [(match_operand:AVX512_VEC 0 "register_operand") | |
11695 | (match_operand:AVX512_VEC 1 "register_operand") | |
47490470 AI |
11696 | (match_operand:<ssequartermode> 2 "nonimmediate_operand") |
11697 | (match_operand:SI 3 "const_0_to_3_operand") | |
d0337ddc | 11698 | (match_operand:AVX512_VEC 4 "register_operand") |
47490470 AI |
11699 | (match_operand:<avx512fmaskmode> 5 "register_operand")] |
11700 | "TARGET_AVX512F" | |
11701 | { | |
d0337ddc AI |
11702 | int mask,selector; |
11703 | mask = INTVAL (operands[3]); | |
11704 | selector = GET_MODE_SIZE (GET_MODE_INNER (<MODE>mode)) == 4 ? | |
11705 | 0xFFFF ^ (0xF000 >> mask * 4) | |
11706 | : 0xFF ^ (0xC0 >> mask * 2); | |
11707 | emit_insn (gen_<extract_type>_vinsert<shuffletype><extract_suf>_1_mask | |
11708 | (operands[0], operands[1], operands[2], GEN_INT (selector), | |
11709 | operands[4], operands[5])); | |
47490470 | 11710 | DONE; |
47490470 AI |
11711 | }) |
11712 | ||
d0337ddc AI |
11713 | (define_insn "<mask_codefor><extract_type>_vinsert<shuffletype><extract_suf>_1<mask_name>" |
11714 | [(set (match_operand:AVX512_VEC 0 "register_operand" "=v") | |
11715 | (vec_merge:AVX512_VEC | |
11716 | (match_operand:AVX512_VEC 1 "register_operand" "v") | |
11717 | (vec_duplicate:AVX512_VEC | |
2e2206fa AI |
11718 | (match_operand:<ssequartermode> 2 "nonimmediate_operand" "vm")) |
11719 | (match_operand:SI 3 "const_int_operand" "n")))] | |
11720 | "TARGET_AVX512F" | |
11721 | { | |
11722 | int mask; | |
d0337ddc AI |
11723 | int selector = INTVAL (operands[3]); |
11724 | ||
11725 | if (selector == 0xFFF || selector == 0x3F) | |
11726 | mask = 0; | |
11727 | else if ( selector == 0xF0FF || selector == 0xCF) | |
11728 | mask = 1; | |
11729 | else if ( selector == 0xFF0F || selector == 0xF3) | |
11730 | mask = 2; | |
11731 | else if ( selector == 0xFFF0 || selector == 0xFC) | |
11732 | mask = 3; | |
2e2206fa AI |
11733 | else |
11734 | gcc_unreachable (); | |
11735 | ||
11736 | operands[3] = GEN_INT (mask); | |
11737 | ||
d0337ddc | 11738 | return "vinsert<shuffletype><extract_suf>\t{%3, %2, %1, %0<mask_operand4>|%0<mask_operand4>, %1, %2, %3}"; |
2e2206fa AI |
11739 | } |
11740 | [(set_attr "type" "sselog") | |
11741 | (set_attr "length_immediate" "1") | |
11742 | (set_attr "prefix" "evex") | |
11743 | (set_attr "mode" "<sseinsnmode>")]) | |
11744 | ||
d0337ddc AI |
11745 | (define_expand "<extract_type_2>_vinsert<shuffletype><extract_suf_2>_mask" |
11746 | [(match_operand:AVX512_VEC_2 0 "register_operand") | |
11747 | (match_operand:AVX512_VEC_2 1 "register_operand") | |
47490470 AI |
11748 | (match_operand:<ssehalfvecmode> 2 "nonimmediate_operand") |
11749 | (match_operand:SI 3 "const_0_to_1_operand") | |
d0337ddc | 11750 | (match_operand:AVX512_VEC_2 4 "register_operand") |
47490470 AI |
11751 | (match_operand:<avx512fmaskmode> 5 "register_operand")] |
11752 | "TARGET_AVX512F" | |
11753 | { | |
11754 | int mask = INTVAL (operands[3]); | |
11755 | if (mask == 0) | |
11756 | emit_insn (gen_vec_set_lo_<mode>_mask | |
11757 | (operands[0], operands[1], operands[2], | |
11758 | operands[4], operands[5])); | |
11759 | else | |
11760 | emit_insn (gen_vec_set_hi_<mode>_mask | |
11761 | (operands[0], operands[1], operands[2], | |
11762 | operands[4], operands[5])); | |
11763 | DONE; | |
11764 | }) | |
11765 | ||
d0337ddc AI |
11766 | (define_insn "vec_set_lo_<mode><mask_name>" |
11767 | [(set (match_operand:V16FI 0 "register_operand" "=v") | |
11768 | (vec_concat:V16FI | |
11769 | (match_operand:<ssehalfvecmode> 2 "nonimmediate_operand" "vm") | |
11770 | (vec_select:<ssehalfvecmode> | |
11771 | (match_operand:V16FI 1 "register_operand" "v") | |
11772 | (parallel [(const_int 8) (const_int 9) | |
11773 | (const_int 10) (const_int 11) | |
11774 | (const_int 12) (const_int 13) | |
11775 | (const_int 14) (const_int 15)]))))] | |
11776 | "TARGET_AVX512DQ" | |
11777 | "vinsert<shuffletype>32x8\t{$0x0, %2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2, $0x0}" | |
11778 | [(set_attr "type" "sselog") | |
11779 | (set_attr "length_immediate" "1") | |
11780 | (set_attr "prefix" "evex") | |
11781 | (set_attr "mode" "<sseinsnmode>")]) | |
11782 | ||
11783 | (define_insn "vec_set_hi_<mode><mask_name>" | |
11784 | [(set (match_operand:V16FI 0 "register_operand" "=v") | |
11785 | (vec_concat:V16FI | |
11786 | (match_operand:<ssehalfvecmode> 2 "nonimmediate_operand" "vm") | |
11787 | (vec_select:<ssehalfvecmode> | |
11788 | (match_operand:V16FI 1 "register_operand" "v") | |
11789 | (parallel [(const_int 0) (const_int 1) | |
11790 | (const_int 2) (const_int 3) | |
11791 | (const_int 4) (const_int 5) | |
11792 | (const_int 6) (const_int 7)]))))] | |
11793 | "TARGET_AVX512DQ" | |
11794 | "vinsert<shuffletype>32x8\t{$0x1, %2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2, $0x1}" | |
11795 | [(set_attr "type" "sselog") | |
11796 | (set_attr "length_immediate" "1") | |
11797 | (set_attr "prefix" "evex") | |
11798 | (set_attr "mode" "<sseinsnmode>")]) | |
11799 | ||
47490470 | 11800 | (define_insn "vec_set_lo_<mode><mask_name>" |
2e2206fa AI |
11801 | [(set (match_operand:V8FI 0 "register_operand" "=v") |
11802 | (vec_concat:V8FI | |
11803 | (match_operand:<ssehalfvecmode> 2 "nonimmediate_operand" "vm") | |
11804 | (vec_select:<ssehalfvecmode> | |
11805 | (match_operand:V8FI 1 "register_operand" "v") | |
11806 | (parallel [(const_int 4) (const_int 5) | |
11807 | (const_int 6) (const_int 7)]))))] | |
11808 | "TARGET_AVX512F" | |
47490470 | 11809 | "vinsert<shuffletype>64x4\t{$0x0, %2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2, $0x0}" |
2e2206fa AI |
11810 | [(set_attr "type" "sselog") |
11811 | (set_attr "length_immediate" "1") | |
11812 | (set_attr "prefix" "evex") | |
11813 | (set_attr "mode" "XI")]) | |
11814 | ||
47490470 | 11815 | (define_insn "vec_set_hi_<mode><mask_name>" |
2e2206fa AI |
11816 | [(set (match_operand:V8FI 0 "register_operand" "=v") |
11817 | (vec_concat:V8FI | |
11818 | (match_operand:<ssehalfvecmode> 2 "nonimmediate_operand" "vm") | |
11819 | (vec_select:<ssehalfvecmode> | |
11820 | (match_operand:V8FI 1 "register_operand" "v") | |
11821 | (parallel [(const_int 0) (const_int 1) | |
11822 | (const_int 2) (const_int 3)]))))] | |
11823 | "TARGET_AVX512F" | |
47490470 | 11824 | "vinsert<shuffletype>64x4\t{$0x1, %2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2, $0x1}" |
2e2206fa AI |
11825 | [(set_attr "type" "sselog") |
11826 | (set_attr "length_immediate" "1") | |
11827 | (set_attr "prefix" "evex") | |
11828 | (set_attr "mode" "XI")]) | |
11829 | ||
d286410b AI |
11830 | (define_expand "avx512dq_shuf_<shuffletype>64x2_mask" |
11831 | [(match_operand:VI8F_256 0 "register_operand") | |
11832 | (match_operand:VI8F_256 1 "register_operand") | |
11833 | (match_operand:VI8F_256 2 "nonimmediate_operand") | |
11834 | (match_operand:SI 3 "const_0_to_3_operand") | |
11835 | (match_operand:VI8F_256 4 "register_operand") | |
11836 | (match_operand:QI 5 "register_operand")] | |
11837 | "TARGET_AVX512DQ" | |
11838 | { | |
11839 | int mask = INTVAL (operands[3]); | |
11840 | emit_insn (gen_avx512dq_shuf_<shuffletype>64x2_1_mask | |
11841 | (operands[0], operands[1], operands[2], | |
11842 | GEN_INT (((mask >> 0) & 1) * 2 + 0), | |
11843 | GEN_INT (((mask >> 0) & 1) * 2 + 1), | |
11844 | GEN_INT (((mask >> 1) & 1) * 2 + 4), | |
11845 | GEN_INT (((mask >> 1) & 1) * 2 + 5), | |
11846 | operands[4], operands[5])); | |
11847 | DONE; | |
11848 | }) | |
11849 | ||
11850 | (define_insn "<mask_codefor>avx512dq_shuf_<shuffletype>64x2_1<mask_name>" | |
11851 | [(set (match_operand:VI8F_256 0 "register_operand" "=v") | |
11852 | (vec_select:VI8F_256 | |
11853 | (vec_concat:<ssedoublemode> | |
11854 | (match_operand:VI8F_256 1 "register_operand" "v") | |
11855 | (match_operand:VI8F_256 2 "nonimmediate_operand" "vm")) | |
11856 | (parallel [(match_operand 3 "const_0_to_3_operand") | |
11857 | (match_operand 4 "const_0_to_3_operand") | |
11858 | (match_operand 5 "const_4_to_7_operand") | |
11859 | (match_operand 6 "const_4_to_7_operand")])))] | |
11860 | "TARGET_AVX512VL | |
11861 | && (INTVAL (operands[3]) == (INTVAL (operands[4]) - 1) | |
11862 | && INTVAL (operands[5]) == (INTVAL (operands[6]) - 1))" | |
11863 | { | |
11864 | int mask; | |
11865 | mask = INTVAL (operands[3]) / 2; | |
11866 | mask |= (INTVAL (operands[5]) - 4) / 2 << 1; | |
11867 | operands[3] = GEN_INT (mask); | |
11868 | return "vshuf<shuffletype>64x2\t{%3, %2, %1, %0<mask_operand7>|%0<mask_operand7>, %1, %2, %3}"; | |
11869 | } | |
11870 | [(set_attr "type" "sselog") | |
11871 | (set_attr "length_immediate" "1") | |
11872 | (set_attr "prefix" "evex") | |
11873 | (set_attr "mode" "XI")]) | |
11874 | ||
47490470 AI |
11875 | (define_expand "avx512f_shuf_<shuffletype>64x2_mask" |
11876 | [(match_operand:V8FI 0 "register_operand") | |
11877 | (match_operand:V8FI 1 "register_operand") | |
11878 | (match_operand:V8FI 2 "nonimmediate_operand") | |
11879 | (match_operand:SI 3 "const_0_to_255_operand") | |
11880 | (match_operand:V8FI 4 "register_operand") | |
11881 | (match_operand:QI 5 "register_operand")] | |
11882 | "TARGET_AVX512F" | |
11883 | { | |
11884 | int mask = INTVAL (operands[3]); | |
11885 | emit_insn (gen_avx512f_shuf_<shuffletype>64x2_1_mask | |
11886 | (operands[0], operands[1], operands[2], | |
11887 | GEN_INT (((mask >> 0) & 3) * 2), | |
11888 | GEN_INT (((mask >> 0) & 3) * 2 + 1), | |
11889 | GEN_INT (((mask >> 2) & 3) * 2), | |
11890 | GEN_INT (((mask >> 2) & 3) * 2 + 1), | |
11891 | GEN_INT (((mask >> 4) & 3) * 2 + 8), | |
11892 | GEN_INT (((mask >> 4) & 3) * 2 + 9), | |
11893 | GEN_INT (((mask >> 6) & 3) * 2 + 8), | |
11894 | GEN_INT (((mask >> 6) & 3) * 2 + 9), | |
11895 | operands[4], operands[5])); | |
11896 | DONE; | |
11897 | }) | |
11898 | ||
11899 | (define_insn "avx512f_shuf_<shuffletype>64x2_1<mask_name>" | |
2e2206fa AI |
11900 | [(set (match_operand:V8FI 0 "register_operand" "=v") |
11901 | (vec_select:V8FI | |
11902 | (vec_concat:<ssedoublemode> | |
11903 | (match_operand:V8FI 1 "register_operand" "v") | |
11904 | (match_operand:V8FI 2 "nonimmediate_operand" "vm")) | |
11905 | (parallel [(match_operand 3 "const_0_to_7_operand") | |
11906 | (match_operand 4 "const_0_to_7_operand") | |
11907 | (match_operand 5 "const_0_to_7_operand") | |
11908 | (match_operand 6 "const_0_to_7_operand") | |
11909 | (match_operand 7 "const_8_to_15_operand") | |
11910 | (match_operand 8 "const_8_to_15_operand") | |
11911 | (match_operand 9 "const_8_to_15_operand") | |
11912 | (match_operand 10 "const_8_to_15_operand")])))] | |
11913 | "TARGET_AVX512F | |
11914 | && (INTVAL (operands[3]) == (INTVAL (operands[4]) - 1) | |
11915 | && INTVAL (operands[5]) == (INTVAL (operands[6]) - 1) | |
11916 | && INTVAL (operands[7]) == (INTVAL (operands[8]) - 1) | |
11917 | && INTVAL (operands[9]) == (INTVAL (operands[10]) - 1))" | |
11918 | { | |
11919 | int mask; | |
11920 | mask = INTVAL (operands[3]) / 2; | |
11921 | mask |= INTVAL (operands[5]) / 2 << 2; | |
11922 | mask |= (INTVAL (operands[7]) - 8) / 2 << 4; | |
11923 | mask |= (INTVAL (operands[9]) - 8) / 2 << 6; | |
11924 | operands[3] = GEN_INT (mask); | |
11925 | ||
47490470 | 11926 | return "vshuf<shuffletype>64x2\t{%3, %2, %1, %0<mask_operand11>|%0<mask_operand11>, %1, %2, %3}"; |
2e2206fa AI |
11927 | } |
11928 | [(set_attr "type" "sselog") | |
11929 | (set_attr "length_immediate" "1") | |
11930 | (set_attr "prefix" "evex") | |
11931 | (set_attr "mode" "<sseinsnmode>")]) | |
11932 | ||
d286410b AI |
11933 | (define_expand "avx512vl_shuf_<shuffletype>32x4_mask" |
11934 | [(match_operand:VI4F_256 0 "register_operand") | |
11935 | (match_operand:VI4F_256 1 "register_operand") | |
11936 | (match_operand:VI4F_256 2 "nonimmediate_operand") | |
11937 | (match_operand:SI 3 "const_0_to_3_operand") | |
11938 | (match_operand:VI4F_256 4 "register_operand") | |
11939 | (match_operand:QI 5 "register_operand")] | |
11940 | "TARGET_AVX512VL" | |
11941 | { | |
11942 | int mask = INTVAL (operands[3]); | |
11943 | emit_insn (gen_avx512vl_shuf_<shuffletype>32x4_1_mask | |
11944 | (operands[0], operands[1], operands[2], | |
11945 | GEN_INT (((mask >> 0) & 1) * 4 + 0), | |
11946 | GEN_INT (((mask >> 0) & 1) * 4 + 1), | |
11947 | GEN_INT (((mask >> 0) & 1) * 4 + 2), | |
11948 | GEN_INT (((mask >> 0) & 1) * 4 + 3), | |
11949 | GEN_INT (((mask >> 1) & 1) * 4 + 8), | |
11950 | GEN_INT (((mask >> 1) & 1) * 4 + 9), | |
11951 | GEN_INT (((mask >> 1) & 1) * 4 + 10), | |
11952 | GEN_INT (((mask >> 1) & 1) * 4 + 11), | |
11953 | operands[4], operands[5])); | |
11954 | DONE; | |
11955 | }) | |
11956 | ||
11957 | (define_insn "<mask_codefor>avx512vl_shuf_<shuffletype>32x4_1<mask_name>" | |
11958 | [(set (match_operand:VI4F_256 0 "register_operand" "=v") | |
11959 | (vec_select:VI4F_256 | |
11960 | (vec_concat:<ssedoublemode> | |
11961 | (match_operand:VI4F_256 1 "register_operand" "v") | |
11962 | (match_operand:VI4F_256 2 "nonimmediate_operand" "vm")) | |
11963 | (parallel [(match_operand 3 "const_0_to_7_operand") | |
11964 | (match_operand 4 "const_0_to_7_operand") | |
11965 | (match_operand 5 "const_0_to_7_operand") | |
11966 | (match_operand 6 "const_0_to_7_operand") | |
11967 | (match_operand 7 "const_8_to_15_operand") | |
11968 | (match_operand 8 "const_8_to_15_operand") | |
11969 | (match_operand 9 "const_8_to_15_operand") | |
11970 | (match_operand 10 "const_8_to_15_operand")])))] | |
11971 | "TARGET_AVX512VL | |
11972 | && (INTVAL (operands[3]) == (INTVAL (operands[4]) - 1) | |
11973 | && INTVAL (operands[3]) == (INTVAL (operands[5]) - 2) | |
11974 | && INTVAL (operands[3]) == (INTVAL (operands[6]) - 3) | |
11975 | && INTVAL (operands[7]) == (INTVAL (operands[8]) - 1) | |
11976 | && INTVAL (operands[7]) == (INTVAL (operands[9]) - 2) | |
11977 | && INTVAL (operands[7]) == (INTVAL (operands[10]) - 3))" | |
11978 | { | |
11979 | int mask; | |
11980 | mask = INTVAL (operands[3]) / 4; | |
11981 | mask |= (INTVAL (operands[7]) - 8) / 4 << 1; | |
11982 | operands[3] = GEN_INT (mask); | |
11983 | ||
11984 | return "vshuf<shuffletype>32x4\t{%3, %2, %1, %0<mask_operand11>|%0<mask_operand11>, %1, %2, %3}"; | |
11985 | } | |
11986 | [(set_attr "type" "sselog") | |
11987 | (set_attr "length_immediate" "1") | |
11988 | (set_attr "prefix" "evex") | |
11989 | (set_attr "mode" "<sseinsnmode>")]) | |
11990 | ||
47490470 AI |
11991 | (define_expand "avx512f_shuf_<shuffletype>32x4_mask" |
11992 | [(match_operand:V16FI 0 "register_operand") | |
11993 | (match_operand:V16FI 1 "register_operand") | |
11994 | (match_operand:V16FI 2 "nonimmediate_operand") | |
11995 | (match_operand:SI 3 "const_0_to_255_operand") | |
11996 | (match_operand:V16FI 4 "register_operand") | |
11997 | (match_operand:HI 5 "register_operand")] | |
11998 | "TARGET_AVX512F" | |
11999 | { | |
12000 | int mask = INTVAL (operands[3]); | |
12001 | emit_insn (gen_avx512f_shuf_<shuffletype>32x4_1_mask | |
12002 | (operands[0], operands[1], operands[2], | |
12003 | GEN_INT (((mask >> 0) & 3) * 4), | |
12004 | GEN_INT (((mask >> 0) & 3) * 4 + 1), | |
12005 | GEN_INT (((mask >> 0) & 3) * 4 + 2), | |
12006 | GEN_INT (((mask >> 0) & 3) * 4 + 3), | |
12007 | GEN_INT (((mask >> 2) & 3) * 4), | |
12008 | GEN_INT (((mask >> 2) & 3) * 4 + 1), | |
12009 | GEN_INT (((mask >> 2) & 3) * 4 + 2), | |
12010 | GEN_INT (((mask >> 2) & 3) * 4 + 3), | |
12011 | GEN_INT (((mask >> 4) & 3) * 4 + 16), | |
12012 | GEN_INT (((mask >> 4) & 3) * 4 + 17), | |
12013 | GEN_INT (((mask >> 4) & 3) * 4 + 18), | |
12014 | GEN_INT (((mask >> 4) & 3) * 4 + 19), | |
12015 | GEN_INT (((mask >> 6) & 3) * 4 + 16), | |
12016 | GEN_INT (((mask >> 6) & 3) * 4 + 17), | |
12017 | GEN_INT (((mask >> 6) & 3) * 4 + 18), | |
12018 | GEN_INT (((mask >> 6) & 3) * 4 + 19), | |
12019 | operands[4], operands[5])); | |
12020 | DONE; | |
12021 | }) | |
12022 | ||
12023 | (define_insn "avx512f_shuf_<shuffletype>32x4_1<mask_name>" | |
2e2206fa AI |
12024 | [(set (match_operand:V16FI 0 "register_operand" "=v") |
12025 | (vec_select:V16FI | |
12026 | (vec_concat:<ssedoublemode> | |
12027 | (match_operand:V16FI 1 "register_operand" "v") | |
12028 | (match_operand:V16FI 2 "nonimmediate_operand" "vm")) | |
12029 | (parallel [(match_operand 3 "const_0_to_15_operand") | |
12030 | (match_operand 4 "const_0_to_15_operand") | |
12031 | (match_operand 5 "const_0_to_15_operand") | |
12032 | (match_operand 6 "const_0_to_15_operand") | |
12033 | (match_operand 7 "const_0_to_15_operand") | |
12034 | (match_operand 8 "const_0_to_15_operand") | |
12035 | (match_operand 9 "const_0_to_15_operand") | |
12036 | (match_operand 10 "const_0_to_15_operand") | |
12037 | (match_operand 11 "const_16_to_31_operand") | |
12038 | (match_operand 12 "const_16_to_31_operand") | |
12039 | (match_operand 13 "const_16_to_31_operand") | |
12040 | (match_operand 14 "const_16_to_31_operand") | |
12041 | (match_operand 15 "const_16_to_31_operand") | |
12042 | (match_operand 16 "const_16_to_31_operand") | |
12043 | (match_operand 17 "const_16_to_31_operand") | |
12044 | (match_operand 18 "const_16_to_31_operand")])))] | |
12045 | "TARGET_AVX512F | |
12046 | && (INTVAL (operands[3]) == (INTVAL (operands[4]) - 1) | |
12047 | && INTVAL (operands[3]) == (INTVAL (operands[5]) - 2) | |
12048 | && INTVAL (operands[3]) == (INTVAL (operands[6]) - 3) | |
12049 | && INTVAL (operands[7]) == (INTVAL (operands[8]) - 1) | |
12050 | && INTVAL (operands[7]) == (INTVAL (operands[9]) - 2) | |
12051 | && INTVAL (operands[7]) == (INTVAL (operands[10]) - 3) | |
12052 | && INTVAL (operands[11]) == (INTVAL (operands[12]) - 1) | |
12053 | && INTVAL (operands[11]) == (INTVAL (operands[13]) - 2) | |
12054 | && INTVAL (operands[11]) == (INTVAL (operands[14]) - 3) | |
12055 | && INTVAL (operands[15]) == (INTVAL (operands[16]) - 1) | |
12056 | && INTVAL (operands[15]) == (INTVAL (operands[17]) - 2) | |
12057 | && INTVAL (operands[15]) == (INTVAL (operands[18]) - 3))" | |
12058 | { | |
12059 | int mask; | |
12060 | mask = INTVAL (operands[3]) / 4; | |
12061 | mask |= INTVAL (operands[7]) / 4 << 2; | |
12062 | mask |= (INTVAL (operands[11]) - 16) / 4 << 4; | |
12063 | mask |= (INTVAL (operands[15]) - 16) / 4 << 6; | |
12064 | operands[3] = GEN_INT (mask); | |
12065 | ||
47490470 | 12066 | return "vshuf<shuffletype>32x4\t{%3, %2, %1, %0<mask_operand19>|%0<mask_operand19>, %1, %2, %3}"; |
2e2206fa AI |
12067 | } |
12068 | [(set_attr "type" "sselog") | |
12069 | (set_attr "length_immediate" "1") | |
12070 | (set_attr "prefix" "evex") | |
12071 | (set_attr "mode" "<sseinsnmode>")]) | |
12072 | ||
47490470 AI |
12073 | (define_expand "avx512f_pshufdv3_mask" |
12074 | [(match_operand:V16SI 0 "register_operand") | |
12075 | (match_operand:V16SI 1 "nonimmediate_operand") | |
12076 | (match_operand:SI 2 "const_0_to_255_operand") | |
12077 | (match_operand:V16SI 3 "register_operand") | |
12078 | (match_operand:HI 4 "register_operand")] | |
12079 | "TARGET_AVX512F" | |
12080 | { | |
12081 | int mask = INTVAL (operands[2]); | |
12082 | emit_insn (gen_avx512f_pshufd_1_mask (operands[0], operands[1], | |
12083 | GEN_INT ((mask >> 0) & 3), | |
12084 | GEN_INT ((mask >> 2) & 3), | |
12085 | GEN_INT ((mask >> 4) & 3), | |
12086 | GEN_INT ((mask >> 6) & 3), | |
12087 | GEN_INT (((mask >> 0) & 3) + 4), | |
12088 | GEN_INT (((mask >> 2) & 3) + 4), | |
12089 | GEN_INT (((mask >> 4) & 3) + 4), | |
12090 | GEN_INT (((mask >> 6) & 3) + 4), | |
12091 | GEN_INT (((mask >> 0) & 3) + 8), | |
12092 | GEN_INT (((mask >> 2) & 3) + 8), | |
12093 | GEN_INT (((mask >> 4) & 3) + 8), | |
12094 | GEN_INT (((mask >> 6) & 3) + 8), | |
12095 | GEN_INT (((mask >> 0) & 3) + 12), | |
12096 | GEN_INT (((mask >> 2) & 3) + 12), | |
12097 | GEN_INT (((mask >> 4) & 3) + 12), | |
12098 | GEN_INT (((mask >> 6) & 3) + 12), | |
12099 | operands[3], operands[4])); | |
12100 | DONE; | |
12101 | }) | |
12102 | ||
12103 | (define_insn "avx512f_pshufd_1<mask_name>" | |
2e2206fa AI |
12104 | [(set (match_operand:V16SI 0 "register_operand" "=v") |
12105 | (vec_select:V16SI | |
12106 | (match_operand:V16SI 1 "nonimmediate_operand" "vm") | |
12107 | (parallel [(match_operand 2 "const_0_to_3_operand") | |
12108 | (match_operand 3 "const_0_to_3_operand") | |
12109 | (match_operand 4 "const_0_to_3_operand") | |
12110 | (match_operand 5 "const_0_to_3_operand") | |
12111 | (match_operand 6 "const_4_to_7_operand") | |
12112 | (match_operand 7 "const_4_to_7_operand") | |
12113 | (match_operand 8 "const_4_to_7_operand") | |
12114 | (match_operand 9 "const_4_to_7_operand") | |
12115 | (match_operand 10 "const_8_to_11_operand") | |
12116 | (match_operand 11 "const_8_to_11_operand") | |
12117 | (match_operand 12 "const_8_to_11_operand") | |
12118 | (match_operand 13 "const_8_to_11_operand") | |
12119 | (match_operand 14 "const_12_to_15_operand") | |
12120 | (match_operand 15 "const_12_to_15_operand") | |
12121 | (match_operand 16 "const_12_to_15_operand") | |
12122 | (match_operand 17 "const_12_to_15_operand")])))] | |
12123 | "TARGET_AVX512F | |
12124 | && INTVAL (operands[2]) + 4 == INTVAL (operands[6]) | |
12125 | && INTVAL (operands[3]) + 4 == INTVAL (operands[7]) | |
12126 | && INTVAL (operands[4]) + 4 == INTVAL (operands[8]) | |
12127 | && INTVAL (operands[5]) + 4 == INTVAL (operands[9]) | |
12128 | && INTVAL (operands[2]) + 8 == INTVAL (operands[10]) | |
12129 | && INTVAL (operands[3]) + 8 == INTVAL (operands[11]) | |
12130 | && INTVAL (operands[4]) + 8 == INTVAL (operands[12]) | |
12131 | && INTVAL (operands[5]) + 8 == INTVAL (operands[13]) | |
12132 | && INTVAL (operands[2]) + 12 == INTVAL (operands[14]) | |
12133 | && INTVAL (operands[3]) + 12 == INTVAL (operands[15]) | |
12134 | && INTVAL (operands[4]) + 12 == INTVAL (operands[16]) | |
12135 | && INTVAL (operands[5]) + 12 == INTVAL (operands[17])" | |
12136 | { | |
12137 | int mask = 0; | |
12138 | mask |= INTVAL (operands[2]) << 0; | |
12139 | mask |= INTVAL (operands[3]) << 2; | |
12140 | mask |= INTVAL (operands[4]) << 4; | |
12141 | mask |= INTVAL (operands[5]) << 6; | |
12142 | operands[2] = GEN_INT (mask); | |
12143 | ||
47490470 | 12144 | return "vpshufd\t{%2, %1, %0<mask_operand18>|%0<mask_operand18>, %1, %2}"; |
2e2206fa AI |
12145 | } |
12146 | [(set_attr "type" "sselog1") | |
12147 | (set_attr "prefix" "evex") | |
12148 | (set_attr "length_immediate" "1") | |
12149 | (set_attr "mode" "XI")]) | |
12150 | ||
d286410b AI |
12151 | (define_expand "avx512vl_pshufdv3_mask" |
12152 | [(match_operand:V8SI 0 "register_operand") | |
12153 | (match_operand:V8SI 1 "nonimmediate_operand") | |
12154 | (match_operand:SI 2 "const_0_to_255_operand") | |
12155 | (match_operand:V8SI 3 "register_operand") | |
12156 | (match_operand:QI 4 "register_operand")] | |
12157 | "TARGET_AVX512VL" | |
12158 | { | |
12159 | int mask = INTVAL (operands[2]); | |
12160 | emit_insn (gen_avx2_pshufd_1_mask (operands[0], operands[1], | |
12161 | GEN_INT ((mask >> 0) & 3), | |
12162 | GEN_INT ((mask >> 2) & 3), | |
12163 | GEN_INT ((mask >> 4) & 3), | |
12164 | GEN_INT ((mask >> 6) & 3), | |
12165 | GEN_INT (((mask >> 0) & 3) + 4), | |
12166 | GEN_INT (((mask >> 2) & 3) + 4), | |
12167 | GEN_INT (((mask >> 4) & 3) + 4), | |
12168 | GEN_INT (((mask >> 6) & 3) + 4), | |
12169 | operands[3], operands[4])); | |
12170 | DONE; | |
12171 | }) | |
12172 | ||
977e83a3 | 12173 | (define_expand "avx2_pshufdv3" |
82e86dc6 UB |
12174 | [(match_operand:V8SI 0 "register_operand") |
12175 | (match_operand:V8SI 1 "nonimmediate_operand") | |
12176 | (match_operand:SI 2 "const_0_to_255_operand")] | |
977e83a3 KY |
12177 | "TARGET_AVX2" |
12178 | { | |
12179 | int mask = INTVAL (operands[2]); | |
12180 | emit_insn (gen_avx2_pshufd_1 (operands[0], operands[1], | |
12181 | GEN_INT ((mask >> 0) & 3), | |
12182 | GEN_INT ((mask >> 2) & 3), | |
12183 | GEN_INT ((mask >> 4) & 3), | |
0c7189ae JJ |
12184 | GEN_INT ((mask >> 6) & 3), |
12185 | GEN_INT (((mask >> 0) & 3) + 4), | |
12186 | GEN_INT (((mask >> 2) & 3) + 4), | |
12187 | GEN_INT (((mask >> 4) & 3) + 4), | |
12188 | GEN_INT (((mask >> 6) & 3) + 4))); | |
977e83a3 KY |
12189 | DONE; |
12190 | }) | |
12191 | ||
d286410b AI |
12192 | (define_insn "avx2_pshufd_1<mask_name>" |
12193 | [(set (match_operand:V8SI 0 "register_operand" "=v") | |
977e83a3 | 12194 | (vec_select:V8SI |
d286410b | 12195 | (match_operand:V8SI 1 "nonimmediate_operand" "vm") |
82e86dc6 UB |
12196 | (parallel [(match_operand 2 "const_0_to_3_operand") |
12197 | (match_operand 3 "const_0_to_3_operand") | |
12198 | (match_operand 4 "const_0_to_3_operand") | |
12199 | (match_operand 5 "const_0_to_3_operand") | |
12200 | (match_operand 6 "const_4_to_7_operand") | |
12201 | (match_operand 7 "const_4_to_7_operand") | |
12202 | (match_operand 8 "const_4_to_7_operand") | |
12203 | (match_operand 9 "const_4_to_7_operand")])))] | |
0c7189ae | 12204 | "TARGET_AVX2 |
d286410b | 12205 | && <mask_avx512vl_condition> |
0c7189ae JJ |
12206 | && INTVAL (operands[2]) + 4 == INTVAL (operands[6]) |
12207 | && INTVAL (operands[3]) + 4 == INTVAL (operands[7]) | |
12208 | && INTVAL (operands[4]) + 4 == INTVAL (operands[8]) | |
12209 | && INTVAL (operands[5]) + 4 == INTVAL (operands[9])" | |
977e83a3 KY |
12210 | { |
12211 | int mask = 0; | |
12212 | mask |= INTVAL (operands[2]) << 0; | |
12213 | mask |= INTVAL (operands[3]) << 2; | |
12214 | mask |= INTVAL (operands[4]) << 4; | |
12215 | mask |= INTVAL (operands[5]) << 6; | |
12216 | operands[2] = GEN_INT (mask); | |
12217 | ||
d286410b | 12218 | return "vpshufd\t{%2, %1, %0<mask_operand10>|%0<mask_operand10>, %1, %2}"; |
977e83a3 KY |
12219 | } |
12220 | [(set_attr "type" "sselog1") | |
d286410b | 12221 | (set_attr "prefix" "maybe_evex") |
977e83a3 KY |
12222 | (set_attr "length_immediate" "1") |
12223 | (set_attr "mode" "OI")]) | |
12224 | ||
d286410b AI |
12225 | (define_expand "avx512vl_pshufd_mask" |
12226 | [(match_operand:V4SI 0 "register_operand") | |
12227 | (match_operand:V4SI 1 "nonimmediate_operand") | |
12228 | (match_operand:SI 2 "const_0_to_255_operand") | |
12229 | (match_operand:V4SI 3 "register_operand") | |
12230 | (match_operand:QI 4 "register_operand")] | |
12231 | "TARGET_AVX512VL" | |
12232 | { | |
12233 | int mask = INTVAL (operands[2]); | |
12234 | emit_insn (gen_sse2_pshufd_1_mask (operands[0], operands[1], | |
12235 | GEN_INT ((mask >> 0) & 3), | |
12236 | GEN_INT ((mask >> 2) & 3), | |
12237 | GEN_INT ((mask >> 4) & 3), | |
12238 | GEN_INT ((mask >> 6) & 3), | |
12239 | operands[3], operands[4])); | |
12240 | DONE; | |
12241 | }) | |
12242 | ||
ef719a44 | 12243 | (define_expand "sse2_pshufd" |
82e86dc6 UB |
12244 | [(match_operand:V4SI 0 "register_operand") |
12245 | (match_operand:V4SI 1 "nonimmediate_operand") | |
12246 | (match_operand:SI 2 "const_int_operand")] | |
ef719a44 RH |
12247 | "TARGET_SSE2" |
12248 | { | |
12249 | int mask = INTVAL (operands[2]); | |
12250 | emit_insn (gen_sse2_pshufd_1 (operands[0], operands[1], | |
12251 | GEN_INT ((mask >> 0) & 3), | |
12252 | GEN_INT ((mask >> 2) & 3), | |
12253 | GEN_INT ((mask >> 4) & 3), | |
12254 | GEN_INT ((mask >> 6) & 3))); | |
12255 | DONE; | |
12256 | }) | |
12257 | ||
d286410b AI |
12258 | (define_insn "sse2_pshufd_1<mask_name>" |
12259 | [(set (match_operand:V4SI 0 "register_operand" "=v") | |
ef719a44 | 12260 | (vec_select:V4SI |
d286410b | 12261 | (match_operand:V4SI 1 "nonimmediate_operand" "vm") |
82e86dc6 UB |
12262 | (parallel [(match_operand 2 "const_0_to_3_operand") |
12263 | (match_operand 3 "const_0_to_3_operand") | |
12264 | (match_operand 4 "const_0_to_3_operand") | |
12265 | (match_operand 5 "const_0_to_3_operand")])))] | |
d286410b | 12266 | "TARGET_SSE2 && <mask_avx512vl_condition>" |
ef719a44 RH |
12267 | { |
12268 | int mask = 0; | |
12269 | mask |= INTVAL (operands[2]) << 0; | |
12270 | mask |= INTVAL (operands[3]) << 2; | |
12271 | mask |= INTVAL (operands[4]) << 4; | |
12272 | mask |= INTVAL (operands[5]) << 6; | |
12273 | operands[2] = GEN_INT (mask); | |
12274 | ||
d286410b | 12275 | return "%vpshufd\t{%2, %1, %0<mask_operand6>|%0<mask_operand6>, %1, %2}"; |
ef719a44 RH |
12276 | } |
12277 | [(set_attr "type" "sselog1") | |
10e4d956 | 12278 | (set_attr "prefix_data16" "1") |
d286410b | 12279 | (set_attr "prefix" "<mask_prefix2>") |
725fd454 | 12280 | (set_attr "length_immediate" "1") |
ef719a44 RH |
12281 | (set_attr "mode" "TI")]) |
12282 | ||
41755b52 AI |
12283 | (define_insn "<mask_codefor>avx512bw_pshuflwv32hi<mask_name>" |
12284 | [(set (match_operand:V32HI 0 "register_operand" "=v") | |
12285 | (unspec:V32HI | |
12286 | [(match_operand:V32HI 1 "nonimmediate_operand" "vm") | |
12287 | (match_operand:SI 2 "const_0_to_255_operand" "n")] | |
12288 | UNSPEC_PSHUFLW))] | |
12289 | "TARGET_AVX512BW" | |
12290 | "vpshuflw\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}" | |
12291 | [(set_attr "type" "sselog") | |
12292 | (set_attr "prefix" "evex") | |
12293 | (set_attr "mode" "XI")]) | |
12294 | ||
12295 | (define_expand "avx512vl_pshuflwv3_mask" | |
12296 | [(match_operand:V16HI 0 "register_operand") | |
12297 | (match_operand:V16HI 1 "nonimmediate_operand") | |
12298 | (match_operand:SI 2 "const_0_to_255_operand") | |
12299 | (match_operand:V16HI 3 "register_operand") | |
12300 | (match_operand:HI 4 "register_operand")] | |
12301 | "TARGET_AVX512VL && TARGET_AVX512BW" | |
12302 | { | |
12303 | int mask = INTVAL (operands[2]); | |
12304 | emit_insn (gen_avx2_pshuflw_1_mask (operands[0], operands[1], | |
12305 | GEN_INT ((mask >> 0) & 3), | |
12306 | GEN_INT ((mask >> 2) & 3), | |
12307 | GEN_INT ((mask >> 4) & 3), | |
12308 | GEN_INT ((mask >> 6) & 3), | |
12309 | GEN_INT (((mask >> 0) & 3) + 8), | |
12310 | GEN_INT (((mask >> 2) & 3) + 8), | |
12311 | GEN_INT (((mask >> 4) & 3) + 8), | |
12312 | GEN_INT (((mask >> 6) & 3) + 8), | |
12313 | operands[3], operands[4])); | |
12314 | DONE; | |
12315 | }) | |
12316 | ||
977e83a3 | 12317 | (define_expand "avx2_pshuflwv3" |
82e86dc6 UB |
12318 | [(match_operand:V16HI 0 "register_operand") |
12319 | (match_operand:V16HI 1 "nonimmediate_operand") | |
12320 | (match_operand:SI 2 "const_0_to_255_operand")] | |
977e83a3 KY |
12321 | "TARGET_AVX2" |
12322 | { | |
12323 | int mask = INTVAL (operands[2]); | |
12324 | emit_insn (gen_avx2_pshuflw_1 (operands[0], operands[1], | |
12325 | GEN_INT ((mask >> 0) & 3), | |
12326 | GEN_INT ((mask >> 2) & 3), | |
12327 | GEN_INT ((mask >> 4) & 3), | |
0c7189ae JJ |
12328 | GEN_INT ((mask >> 6) & 3), |
12329 | GEN_INT (((mask >> 0) & 3) + 8), | |
12330 | GEN_INT (((mask >> 2) & 3) + 8), | |
12331 | GEN_INT (((mask >> 4) & 3) + 8), | |
12332 | GEN_INT (((mask >> 6) & 3) + 8))); | |
977e83a3 KY |
12333 | DONE; |
12334 | }) | |
12335 | ||
41755b52 AI |
12336 | (define_insn "avx2_pshuflw_1<mask_name>" |
12337 | [(set (match_operand:V16HI 0 "register_operand" "=v") | |
977e83a3 | 12338 | (vec_select:V16HI |
41755b52 | 12339 | (match_operand:V16HI 1 "nonimmediate_operand" "vm") |
82e86dc6 UB |
12340 | (parallel [(match_operand 2 "const_0_to_3_operand") |
12341 | (match_operand 3 "const_0_to_3_operand") | |
12342 | (match_operand 4 "const_0_to_3_operand") | |
12343 | (match_operand 5 "const_0_to_3_operand") | |
977e83a3 KY |
12344 | (const_int 4) |
12345 | (const_int 5) | |
12346 | (const_int 6) | |
12347 | (const_int 7) | |
82e86dc6 UB |
12348 | (match_operand 6 "const_8_to_11_operand") |
12349 | (match_operand 7 "const_8_to_11_operand") | |
12350 | (match_operand 8 "const_8_to_11_operand") | |
12351 | (match_operand 9 "const_8_to_11_operand") | |
977e83a3 KY |
12352 | (const_int 12) |
12353 | (const_int 13) | |
12354 | (const_int 14) | |
12355 | (const_int 15)])))] | |
0c7189ae | 12356 | "TARGET_AVX2 |
41755b52 | 12357 | && <mask_avx512bw_condition> && <mask_avx512vl_condition> |
0c7189ae JJ |
12358 | && INTVAL (operands[2]) + 8 == INTVAL (operands[6]) |
12359 | && INTVAL (operands[3]) + 8 == INTVAL (operands[7]) | |
12360 | && INTVAL (operands[4]) + 8 == INTVAL (operands[8]) | |
12361 | && INTVAL (operands[5]) + 8 == INTVAL (operands[9])" | |
977e83a3 KY |
12362 | { |
12363 | int mask = 0; | |
12364 | mask |= INTVAL (operands[2]) << 0; | |
12365 | mask |= INTVAL (operands[3]) << 2; | |
12366 | mask |= INTVAL (operands[4]) << 4; | |
12367 | mask |= INTVAL (operands[5]) << 6; | |
12368 | operands[2] = GEN_INT (mask); | |
12369 | ||
41755b52 | 12370 | return "vpshuflw\t{%2, %1, %0<mask_operand10>|%0<mask_operand10>, %1, %2}"; |
977e83a3 KY |
12371 | } |
12372 | [(set_attr "type" "sselog") | |
41755b52 | 12373 | (set_attr "prefix" "maybe_evex") |
977e83a3 KY |
12374 | (set_attr "length_immediate" "1") |
12375 | (set_attr "mode" "OI")]) | |
12376 | ||
41755b52 AI |
12377 | (define_expand "avx512vl_pshuflw_mask" |
12378 | [(match_operand:V8HI 0 "register_operand") | |
12379 | (match_operand:V8HI 1 "nonimmediate_operand") | |
12380 | (match_operand:SI 2 "const_0_to_255_operand") | |
12381 | (match_operand:V8HI 3 "register_operand") | |
12382 | (match_operand:QI 4 "register_operand")] | |
12383 | "TARGET_AVX512VL && TARGET_AVX512BW" | |
12384 | { | |
12385 | int mask = INTVAL (operands[2]); | |
12386 | emit_insn (gen_sse2_pshuflw_1_mask (operands[0], operands[1], | |
12387 | GEN_INT ((mask >> 0) & 3), | |
12388 | GEN_INT ((mask >> 2) & 3), | |
12389 | GEN_INT ((mask >> 4) & 3), | |
12390 | GEN_INT ((mask >> 6) & 3), | |
12391 | operands[3], operands[4])); | |
12392 | DONE; | |
12393 | }) | |
12394 | ||
ef719a44 | 12395 | (define_expand "sse2_pshuflw" |
82e86dc6 UB |
12396 | [(match_operand:V8HI 0 "register_operand") |
12397 | (match_operand:V8HI 1 "nonimmediate_operand") | |
12398 | (match_operand:SI 2 "const_int_operand")] | |
ef719a44 RH |
12399 | "TARGET_SSE2" |
12400 | { | |
12401 | int mask = INTVAL (operands[2]); | |
12402 | emit_insn (gen_sse2_pshuflw_1 (operands[0], operands[1], | |
12403 | GEN_INT ((mask >> 0) & 3), | |
12404 | GEN_INT ((mask >> 2) & 3), | |
12405 | GEN_INT ((mask >> 4) & 3), | |
12406 | GEN_INT ((mask >> 6) & 3))); | |
12407 | DONE; | |
12408 | }) | |
12409 | ||
41755b52 AI |
12410 | (define_insn "sse2_pshuflw_1<mask_name>" |
12411 | [(set (match_operand:V8HI 0 "register_operand" "=v") | |
ef719a44 | 12412 | (vec_select:V8HI |
41755b52 | 12413 | (match_operand:V8HI 1 "nonimmediate_operand" "vm") |
82e86dc6 UB |
12414 | (parallel [(match_operand 2 "const_0_to_3_operand") |
12415 | (match_operand 3 "const_0_to_3_operand") | |
12416 | (match_operand 4 "const_0_to_3_operand") | |
12417 | (match_operand 5 "const_0_to_3_operand") | |
ef719a44 RH |
12418 | (const_int 4) |
12419 | (const_int 5) | |
12420 | (const_int 6) | |
12421 | (const_int 7)])))] | |
41755b52 | 12422 | "TARGET_SSE2 && <mask_avx512bw_condition> && <mask_avx512vl_condition>" |
ef719a44 RH |
12423 | { |
12424 | int mask = 0; | |
12425 | mask |= INTVAL (operands[2]) << 0; | |
12426 | mask |= INTVAL (operands[3]) << 2; | |
12427 | mask |= INTVAL (operands[4]) << 4; | |
12428 | mask |= INTVAL (operands[5]) << 6; | |
12429 | operands[2] = GEN_INT (mask); | |
12430 | ||
41755b52 | 12431 | return "%vpshuflw\t{%2, %1, %0<mask_operand6>|%0<mask_operand6>, %1, %2}"; |
ef719a44 RH |
12432 | } |
12433 | [(set_attr "type" "sselog") | |
725fd454 | 12434 | (set_attr "prefix_data16" "0") |
10e4d956 | 12435 | (set_attr "prefix_rep" "1") |
95879c72 | 12436 | (set_attr "prefix" "maybe_vex") |
725fd454 | 12437 | (set_attr "length_immediate" "1") |
ef719a44 RH |
12438 | (set_attr "mode" "TI")]) |
12439 | ||
977e83a3 | 12440 | (define_expand "avx2_pshufhwv3" |
82e86dc6 UB |
12441 | [(match_operand:V16HI 0 "register_operand") |
12442 | (match_operand:V16HI 1 "nonimmediate_operand") | |
12443 | (match_operand:SI 2 "const_0_to_255_operand")] | |
977e83a3 KY |
12444 | "TARGET_AVX2" |
12445 | { | |
12446 | int mask = INTVAL (operands[2]); | |
12447 | emit_insn (gen_avx2_pshufhw_1 (operands[0], operands[1], | |
12448 | GEN_INT (((mask >> 0) & 3) + 4), | |
12449 | GEN_INT (((mask >> 2) & 3) + 4), | |
12450 | GEN_INT (((mask >> 4) & 3) + 4), | |
0c7189ae JJ |
12451 | GEN_INT (((mask >> 6) & 3) + 4), |
12452 | GEN_INT (((mask >> 0) & 3) + 12), | |
12453 | GEN_INT (((mask >> 2) & 3) + 12), | |
12454 | GEN_INT (((mask >> 4) & 3) + 12), | |
12455 | GEN_INT (((mask >> 6) & 3) + 12))); | |
977e83a3 KY |
12456 | DONE; |
12457 | }) | |
12458 | ||
41755b52 AI |
12459 | (define_insn "<mask_codefor>avx512bw_pshufhwv32hi<mask_name>" |
12460 | [(set (match_operand:V32HI 0 "register_operand" "=v") | |
12461 | (unspec:V32HI | |
12462 | [(match_operand:V32HI 1 "nonimmediate_operand" "vm") | |
12463 | (match_operand:SI 2 "const_0_to_255_operand" "n")] | |
12464 | UNSPEC_PSHUFHW))] | |
12465 | "TARGET_AVX512BW" | |
12466 | "vpshufhw\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}" | |
12467 | [(set_attr "type" "sselog") | |
12468 | (set_attr "prefix" "evex") | |
12469 | (set_attr "mode" "XI")]) | |
12470 | ||
12471 | (define_expand "avx512vl_pshufhwv3_mask" | |
12472 | [(match_operand:V16HI 0 "register_operand") | |
12473 | (match_operand:V16HI 1 "nonimmediate_operand") | |
12474 | (match_operand:SI 2 "const_0_to_255_operand") | |
12475 | (match_operand:V16HI 3 "register_operand") | |
12476 | (match_operand:HI 4 "register_operand")] | |
12477 | "TARGET_AVX512VL && TARGET_AVX512BW" | |
12478 | { | |
12479 | int mask = INTVAL (operands[2]); | |
12480 | emit_insn (gen_avx2_pshufhw_1_mask (operands[0], operands[1], | |
12481 | GEN_INT (((mask >> 0) & 3) + 4), | |
12482 | GEN_INT (((mask >> 2) & 3) + 4), | |
12483 | GEN_INT (((mask >> 4) & 3) + 4), | |
12484 | GEN_INT (((mask >> 6) & 3) + 4), | |
12485 | GEN_INT (((mask >> 0) & 3) + 12), | |
12486 | GEN_INT (((mask >> 2) & 3) + 12), | |
12487 | GEN_INT (((mask >> 4) & 3) + 12), | |
12488 | GEN_INT (((mask >> 6) & 3) + 12), | |
12489 | operands[3], operands[4])); | |
12490 | DONE; | |
12491 | }) | |
12492 | ||
12493 | (define_insn "avx2_pshufhw_1<mask_name>" | |
12494 | [(set (match_operand:V16HI 0 "register_operand" "=v") | |
977e83a3 | 12495 | (vec_select:V16HI |
41755b52 | 12496 | (match_operand:V16HI 1 "nonimmediate_operand" "vm") |
977e83a3 KY |
12497 | (parallel [(const_int 0) |
12498 | (const_int 1) | |
12499 | (const_int 2) | |
12500 | (const_int 3) | |
82e86dc6 UB |
12501 | (match_operand 2 "const_4_to_7_operand") |
12502 | (match_operand 3 "const_4_to_7_operand") | |
12503 | (match_operand 4 "const_4_to_7_operand") | |
12504 | (match_operand 5 "const_4_to_7_operand") | |
977e83a3 KY |
12505 | (const_int 8) |
12506 | (const_int 9) | |
12507 | (const_int 10) | |
12508 | (const_int 11) | |
82e86dc6 UB |
12509 | (match_operand 6 "const_12_to_15_operand") |
12510 | (match_operand 7 "const_12_to_15_operand") | |
12511 | (match_operand 8 "const_12_to_15_operand") | |
12512 | (match_operand 9 "const_12_to_15_operand")])))] | |
0c7189ae | 12513 | "TARGET_AVX2 |
41755b52 | 12514 | && <mask_avx512bw_condition> && <mask_avx512vl_condition> |
0c7189ae JJ |
12515 | && INTVAL (operands[2]) + 8 == INTVAL (operands[6]) |
12516 | && INTVAL (operands[3]) + 8 == INTVAL (operands[7]) | |
12517 | && INTVAL (operands[4]) + 8 == INTVAL (operands[8]) | |
12518 | && INTVAL (operands[5]) + 8 == INTVAL (operands[9])" | |
977e83a3 KY |
12519 | { |
12520 | int mask = 0; | |
12521 | mask |= (INTVAL (operands[2]) - 4) << 0; | |
12522 | mask |= (INTVAL (operands[3]) - 4) << 2; | |
12523 | mask |= (INTVAL (operands[4]) - 4) << 4; | |
12524 | mask |= (INTVAL (operands[5]) - 4) << 6; | |
12525 | operands[2] = GEN_INT (mask); | |
12526 | ||
41755b52 | 12527 | return "vpshufhw\t{%2, %1, %0<mask_operand10>|%0<mask_operand10>, %1, %2}"; |
977e83a3 KY |
12528 | } |
12529 | [(set_attr "type" "sselog") | |
41755b52 | 12530 | (set_attr "prefix" "maybe_evex") |
977e83a3 KY |
12531 | (set_attr "length_immediate" "1") |
12532 | (set_attr "mode" "OI")]) | |
12533 | ||
41755b52 AI |
12534 | (define_expand "avx512vl_pshufhw_mask" |
12535 | [(match_operand:V8HI 0 "register_operand") | |
12536 | (match_operand:V8HI 1 "nonimmediate_operand") | |
12537 | (match_operand:SI 2 "const_0_to_255_operand") | |
12538 | (match_operand:V8HI 3 "register_operand") | |
12539 | (match_operand:QI 4 "register_operand")] | |
12540 | "TARGET_AVX512VL && TARGET_AVX512BW" | |
12541 | { | |
12542 | int mask = INTVAL (operands[2]); | |
12543 | emit_insn (gen_sse2_pshufhw_1_mask (operands[0], operands[1], | |
12544 | GEN_INT (((mask >> 0) & 3) + 4), | |
12545 | GEN_INT (((mask >> 2) & 3) + 4), | |
12546 | GEN_INT (((mask >> 4) & 3) + 4), | |
12547 | GEN_INT (((mask >> 6) & 3) + 4), | |
12548 | operands[3], operands[4])); | |
12549 | DONE; | |
12550 | }) | |
12551 | ||
ef719a44 | 12552 | (define_expand "sse2_pshufhw" |
82e86dc6 UB |
12553 | [(match_operand:V8HI 0 "register_operand") |
12554 | (match_operand:V8HI 1 "nonimmediate_operand") | |
12555 | (match_operand:SI 2 "const_int_operand")] | |
ef719a44 RH |
12556 | "TARGET_SSE2" |
12557 | { | |
12558 | int mask = INTVAL (operands[2]); | |
12559 | emit_insn (gen_sse2_pshufhw_1 (operands[0], operands[1], | |
12560 | GEN_INT (((mask >> 0) & 3) + 4), | |
12561 | GEN_INT (((mask >> 2) & 3) + 4), | |
12562 | GEN_INT (((mask >> 4) & 3) + 4), | |
12563 | GEN_INT (((mask >> 6) & 3) + 4))); | |
12564 | DONE; | |
12565 | }) | |
12566 | ||
41755b52 AI |
12567 | (define_insn "sse2_pshufhw_1<mask_name>" |
12568 | [(set (match_operand:V8HI 0 "register_operand" "=v") | |
ef719a44 | 12569 | (vec_select:V8HI |
41755b52 | 12570 | (match_operand:V8HI 1 "nonimmediate_operand" "vm") |
ef719a44 RH |
12571 | (parallel [(const_int 0) |
12572 | (const_int 1) | |
12573 | (const_int 2) | |
12574 | (const_int 3) | |
82e86dc6 UB |
12575 | (match_operand 2 "const_4_to_7_operand") |
12576 | (match_operand 3 "const_4_to_7_operand") | |
12577 | (match_operand 4 "const_4_to_7_operand") | |
12578 | (match_operand 5 "const_4_to_7_operand")])))] | |
41755b52 | 12579 | "TARGET_SSE2 && <mask_avx512bw_condition> && <mask_avx512vl_condition>" |
ef719a44 RH |
12580 | { |
12581 | int mask = 0; | |
12582 | mask |= (INTVAL (operands[2]) - 4) << 0; | |
12583 | mask |= (INTVAL (operands[3]) - 4) << 2; | |
12584 | mask |= (INTVAL (operands[4]) - 4) << 4; | |
12585 | mask |= (INTVAL (operands[5]) - 4) << 6; | |
12586 | operands[2] = GEN_INT (mask); | |
12587 | ||
41755b52 | 12588 | return "%vpshufhw\t{%2, %1, %0<mask_operand6>|%0<mask_operand6>, %1, %2}"; |
ef719a44 RH |
12589 | } |
12590 | [(set_attr "type" "sselog") | |
10e4d956 | 12591 | (set_attr "prefix_rep" "1") |
725fd454 | 12592 | (set_attr "prefix_data16" "0") |
95879c72 | 12593 | (set_attr "prefix" "maybe_vex") |
725fd454 | 12594 | (set_attr "length_immediate" "1") |
ef719a44 RH |
12595 | (set_attr "mode" "TI")]) |
12596 | ||
12597 | (define_expand "sse2_loadd" | |
82e86dc6 | 12598 | [(set (match_operand:V4SI 0 "register_operand") |
ef719a44 RH |
12599 | (vec_merge:V4SI |
12600 | (vec_duplicate:V4SI | |
82e86dc6 | 12601 | (match_operand:SI 1 "nonimmediate_operand")) |
ef719a44 RH |
12602 | (match_dup 2) |
12603 | (const_int 1)))] | |
eb701deb | 12604 | "TARGET_SSE" |
ef719a44 RH |
12605 | "operands[2] = CONST0_RTX (V4SImode);") |
12606 | ||
12607 | (define_insn "sse2_loadld" | |
a02f398d | 12608 | [(set (match_operand:V4SI 0 "register_operand" "=x,Yi,x,x,x") |
ef719a44 RH |
12609 | (vec_merge:V4SI |
12610 | (vec_duplicate:V4SI | |
a02f398d UB |
12611 | (match_operand:SI 2 "nonimmediate_operand" "m ,r ,m,x,x")) |
12612 | (match_operand:V4SI 1 "reg_or_0_operand" "C ,C ,C,0,x") | |
ef719a44 | 12613 | (const_int 1)))] |
eb701deb | 12614 | "TARGET_SSE" |
ef719a44 | 12615 | "@ |
1ee8b298 UB |
12616 | %vmovd\t{%2, %0|%0, %2} |
12617 | %vmovd\t{%2, %0|%0, %2} | |
eb701deb | 12618 | movss\t{%2, %0|%0, %2} |
1ee8b298 UB |
12619 | movss\t{%2, %0|%0, %2} |
12620 | vmovss\t{%2, %1, %0|%0, %1, %2}" | |
b2d7aa9a | 12621 | [(set_attr "isa" "sse2,sse2,noavx,noavx,avx") |
1ee8b298 UB |
12622 | (set_attr "type" "ssemov") |
12623 | (set_attr "prefix" "maybe_vex,maybe_vex,orig,orig,vex") | |
12624 | (set_attr "mode" "TI,TI,V4SF,SF,SF")]) | |
ef719a44 | 12625 | |
3f5783ea UB |
12626 | (define_insn "*vec_extract<mode>" |
12627 | [(set (match_operand:<ssescalarmode> 0 "nonimmediate_operand" "=r,m") | |
12628 | (vec_select:<ssescalarmode> | |
e61e7d28 | 12629 | (match_operand:VI12_128 1 "register_operand" "x,x") |
3f5783ea UB |
12630 | (parallel |
12631 | [(match_operand:SI 2 "const_0_to_<ssescalarnummask>_operand")])))] | |
12632 | "TARGET_SSE4_1" | |
12633 | "@ | |
12634 | %vpextr<ssemodesuffix>\t{%2, %1, %k0|%k0, %1, %2} | |
12635 | %vpextr<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}" | |
12636 | [(set_attr "type" "sselog1") | |
12637 | (set (attr "prefix_data16") | |
12638 | (if_then_else | |
12639 | (and (eq_attr "alternative" "0") | |
12640 | (eq (const_string "<MODE>mode") (const_string "V8HImode"))) | |
12641 | (const_string "1") | |
12642 | (const_string "*"))) | |
12643 | (set (attr "prefix_extra") | |
12644 | (if_then_else | |
12645 | (and (eq_attr "alternative" "0") | |
12646 | (eq (const_string "<MODE>mode") (const_string "V8HImode"))) | |
12647 | (const_string "*") | |
12648 | (const_string "1"))) | |
12649 | (set_attr "length_immediate" "1") | |
12650 | (set_attr "prefix" "maybe_vex") | |
12651 | (set_attr "mode" "TI")]) | |
12652 | ||
12653 | (define_insn "*vec_extractv8hi_sse2" | |
12654 | [(set (match_operand:HI 0 "register_operand" "=r") | |
12655 | (vec_select:HI | |
12656 | (match_operand:V8HI 1 "register_operand" "x") | |
12657 | (parallel | |
12658 | [(match_operand:SI 2 "const_0_to_7_operand")])))] | |
12659 | "TARGET_SSE2 && !TARGET_SSE4_1" | |
12660 | "pextrw\t{%2, %1, %k0|%k0, %1, %2}" | |
12661 | [(set_attr "type" "sselog1") | |
12662 | (set_attr "prefix_data16" "1") | |
12663 | (set_attr "length_immediate" "1") | |
12664 | (set_attr "mode" "TI")]) | |
12665 | ||
12666 | (define_insn "*vec_extractv16qi_zext" | |
12667 | [(set (match_operand:SWI48 0 "register_operand" "=r") | |
12668 | (zero_extend:SWI48 | |
12669 | (vec_select:QI | |
12670 | (match_operand:V16QI 1 "register_operand" "x") | |
12671 | (parallel | |
12672 | [(match_operand:SI 2 "const_0_to_15_operand")]))))] | |
12673 | "TARGET_SSE4_1" | |
12674 | "%vpextrb\t{%2, %1, %k0|%k0, %1, %2}" | |
12675 | [(set_attr "type" "sselog1") | |
12676 | (set_attr "prefix_extra" "1") | |
12677 | (set_attr "length_immediate" "1") | |
12678 | (set_attr "prefix" "maybe_vex") | |
12679 | (set_attr "mode" "TI")]) | |
12680 | ||
12681 | (define_insn "*vec_extractv8hi_zext" | |
12682 | [(set (match_operand:SWI48 0 "register_operand" "=r") | |
12683 | (zero_extend:SWI48 | |
12684 | (vec_select:HI | |
12685 | (match_operand:V8HI 1 "register_operand" "x") | |
12686 | (parallel | |
12687 | [(match_operand:SI 2 "const_0_to_7_operand")]))))] | |
12688 | "TARGET_SSE2" | |
12689 | "%vpextrw\t{%2, %1, %k0|%k0, %1, %2}" | |
12690 | [(set_attr "type" "sselog1") | |
12691 | (set_attr "prefix_data16" "1") | |
12692 | (set_attr "length_immediate" "1") | |
12693 | (set_attr "prefix" "maybe_vex") | |
12694 | (set_attr "mode" "TI")]) | |
12695 | ||
e61e7d28 | 12696 | (define_insn "*vec_extract<mode>_mem" |
3f5783ea UB |
12697 | [(set (match_operand:<ssescalarmode> 0 "register_operand" "=r") |
12698 | (vec_select:<ssescalarmode> | |
e61e7d28 | 12699 | (match_operand:VI12_128 1 "memory_operand" "o") |
3f5783ea UB |
12700 | (parallel |
12701 | [(match_operand 2 "const_0_to_<ssescalarnummask>_operand")])))] | |
12702 | "TARGET_SSE" | |
e61e7d28 | 12703 | "#") |
3f5783ea | 12704 | |
3095685e | 12705 | (define_insn "*vec_extract<ssevecmodelower>_0" |
3f5783ea | 12706 | [(set (match_operand:SWI48 0 "nonimmediate_operand" "=r ,r,x ,m") |
3095685e | 12707 | (vec_select:SWI48 |
3f5783ea | 12708 | (match_operand:<ssevecmode> 1 "nonimmediate_operand" "mYj,x,xm,x") |
ef719a44 | 12709 | (parallel [(const_int 0)])))] |
3095685e | 12710 | "TARGET_SSE && !(MEM_P (operands[0]) && MEM_P (operands[1]))" |
f75e6a51 | 12711 | "#" |
3f5783ea | 12712 | [(set_attr "isa" "*,sse4,*,*")]) |
ef719a44 | 12713 | |
60ca9a65 UB |
12714 | (define_insn_and_split "*vec_extractv4si_0_zext" |
12715 | [(set (match_operand:DI 0 "register_operand" "=r") | |
12716 | (zero_extend:DI | |
12717 | (vec_select:SI | |
12718 | (match_operand:V4SI 1 "register_operand" "x") | |
12719 | (parallel [(const_int 0)]))))] | |
12720 | "TARGET_64BIT && TARGET_SSE2 && TARGET_INTER_UNIT_MOVES_FROM_VEC" | |
12721 | "#" | |
12722 | "&& reload_completed" | |
12723 | [(set (match_dup 0) (zero_extend:DI (match_dup 1)))] | |
12724 | "operands[1] = gen_rtx_REG (SImode, REGNO (operands[1]));") | |
12725 | ||
3095685e UB |
12726 | (define_insn "*vec_extractv2di_0_sse" |
12727 | [(set (match_operand:DI 0 "nonimmediate_operand" "=x,m") | |
ed69105c | 12728 | (vec_select:DI |
3095685e | 12729 | (match_operand:V2DI 1 "nonimmediate_operand" "xm,x") |
ed69105c | 12730 | (parallel [(const_int 0)])))] |
3095685e UB |
12731 | "TARGET_SSE && !TARGET_64BIT |
12732 | && !(MEM_P (operands[0]) && MEM_P (operands[1]))" | |
12733 | "#") | |
ed69105c | 12734 | |
3095685e | 12735 | (define_split |
f75e6a51 | 12736 | [(set (match_operand:SWI48x 0 "nonimmediate_operand") |
3095685e | 12737 | (vec_select:SWI48x |
e61e7d28 | 12738 | (match_operand:<ssevecmode> 1 "register_operand") |
ef719a44 | 12739 | (parallel [(const_int 0)])))] |
3095685e UB |
12740 | "TARGET_SSE && reload_completed" |
12741 | [(set (match_dup 0) (match_dup 1))] | |
e61e7d28 | 12742 | "operands[1] = gen_rtx_REG (<MODE>mode, REGNO (operands[1]));") |
ef719a44 | 12743 | |
f75e6a51 | 12744 | (define_insn "*vec_extractv4si" |
45392c76 | 12745 | [(set (match_operand:SI 0 "nonimmediate_operand" "=rm,Yr,*x,x") |
f75e6a51 | 12746 | (vec_select:SI |
45392c76 | 12747 | (match_operand:V4SI 1 "register_operand" "x,0,0,x") |
f75e6a51 UB |
12748 | (parallel [(match_operand:SI 2 "const_0_to_3_operand")])))] |
12749 | "TARGET_SSE4_1" | |
60ca9a65 UB |
12750 | { |
12751 | switch (which_alternative) | |
12752 | { | |
12753 | case 0: | |
12754 | return "%vpextrd\t{%2, %1, %0|%0, %1, %2}"; | |
12755 | ||
12756 | case 1: | |
45392c76 | 12757 | case 2: |
60ca9a65 UB |
12758 | operands [2] = GEN_INT (INTVAL (operands[2]) * 4); |
12759 | return "psrldq\t{%2, %0|%0, %2}"; | |
12760 | ||
45392c76 | 12761 | case 3: |
60ca9a65 UB |
12762 | operands [2] = GEN_INT (INTVAL (operands[2]) * 4); |
12763 | return "vpsrldq\t{%2, %1, %0|%0, %1, %2}"; | |
12764 | ||
12765 | default: | |
12766 | gcc_unreachable (); | |
12767 | } | |
12768 | } | |
45392c76 IE |
12769 | [(set_attr "isa" "*,noavx,noavx,avx") |
12770 | (set_attr "type" "sselog1,sseishft1,sseishft1,sseishft1") | |
12771 | (set_attr "prefix_extra" "1,*,*,*") | |
f75e6a51 | 12772 | (set_attr "length_immediate" "1") |
45392c76 | 12773 | (set_attr "prefix" "maybe_vex,orig,orig,vex") |
f75e6a51 UB |
12774 | (set_attr "mode" "TI")]) |
12775 | ||
12776 | (define_insn "*vec_extractv4si_zext" | |
12777 | [(set (match_operand:DI 0 "register_operand" "=r") | |
12778 | (zero_extend:DI | |
12779 | (vec_select:SI | |
12780 | (match_operand:V4SI 1 "register_operand" "x") | |
12781 | (parallel [(match_operand:SI 2 "const_0_to_3_operand")]))))] | |
12782 | "TARGET_64BIT && TARGET_SSE4_1" | |
12783 | "%vpextrd\t{%2, %1, %k0|%k0, %1, %2}" | |
12784 | [(set_attr "type" "sselog1") | |
12785 | (set_attr "prefix_extra" "1") | |
12786 | (set_attr "length_immediate" "1") | |
12787 | (set_attr "prefix" "maybe_vex") | |
12788 | (set_attr "mode" "TI")]) | |
3095685e | 12789 | |
e61e7d28 | 12790 | (define_insn "*vec_extractv4si_mem" |
3095685e UB |
12791 | [(set (match_operand:SI 0 "register_operand" "=x,r") |
12792 | (vec_select:SI | |
12793 | (match_operand:V4SI 1 "memory_operand" "o,o") | |
12794 | (parallel [(match_operand 2 "const_0_to_3_operand")])))] | |
12795 | "TARGET_SSE" | |
e61e7d28 | 12796 | "#") |
ef719a44 | 12797 | |
60ca9a65 UB |
12798 | (define_insn_and_split "*vec_extractv4si_zext_mem" |
12799 | [(set (match_operand:DI 0 "register_operand" "=x,r") | |
12800 | (zero_extend:DI | |
12801 | (vec_select:SI | |
12802 | (match_operand:V4SI 1 "memory_operand" "o,o") | |
12803 | (parallel [(match_operand:SI 2 "const_0_to_3_operand")]))))] | |
12804 | "TARGET_64BIT && TARGET_SSE" | |
12805 | "#" | |
12806 | "&& reload_completed" | |
12807 | [(set (match_dup 0) (zero_extend:DI (match_dup 1)))] | |
12808 | { | |
12809 | operands[1] = adjust_address (operands[1], SImode, INTVAL (operands[2]) * 4); | |
12810 | }) | |
12811 | ||
aad61732 | 12812 | (define_insn "*vec_extractv2di_1" |
f75e6a51 | 12813 | [(set (match_operand:DI 0 "nonimmediate_operand" "=rm,m,x,x,x,x,r") |
c49c0c39 | 12814 | (vec_select:DI |
f75e6a51 | 12815 | (match_operand:V2DI 1 "nonimmediate_operand" "x ,x,0,x,x,o,o") |
c49c0c39 | 12816 | (parallel [(const_int 1)])))] |
3095685e | 12817 | "TARGET_SSE && !(MEM_P (operands[0]) && MEM_P (operands[1]))" |
c49c0c39 | 12818 | "@ |
f75e6a51 | 12819 | %vpextrq\t{$1, %1, %0|%0, %1, 1} |
1ee8b298 | 12820 | %vmovhps\t{%1, %0|%0, %1} |
77315816 | 12821 | psrldq\t{$8, %0|%0, 8} |
1ee8b298 | 12822 | vpsrldq\t{$8, %1, %0|%0, %1, 8} |
4d9cab74 | 12823 | movhlps\t{%1, %0|%0, %1} |
3095685e UB |
12824 | # |
12825 | #" | |
f75e6a51 UB |
12826 | [(set_attr "isa" "x64_sse4,*,sse2_noavx,avx,noavx,*,x64") |
12827 | (set_attr "type" "sselog1,ssemov,sseishft1,sseishft1,ssemov,ssemov,imov") | |
12828 | (set_attr "length_immediate" "1,*,1,1,*,*,*") | |
f75e6a51 UB |
12829 | (set_attr "prefix_rex" "1,*,*,*,*,*,*") |
12830 | (set_attr "prefix_extra" "1,*,*,*,*,*,*") | |
12831 | (set_attr "prefix" "maybe_vex,maybe_vex,orig,vex,orig,*,*") | |
12832 | (set_attr "mode" "TI,V2SF,TI,TI,V4SF,DI,DI")]) | |
3095685e UB |
12833 | |
12834 | (define_split | |
e61e7d28 UB |
12835 | [(set (match_operand:<ssescalarmode> 0 "register_operand") |
12836 | (vec_select:<ssescalarmode> | |
12837 | (match_operand:VI_128 1 "memory_operand") | |
12838 | (parallel | |
12839 | [(match_operand 2 "const_0_to_<ssescalarnummask>_operand")])))] | |
3095685e UB |
12840 | "TARGET_SSE && reload_completed" |
12841 | [(set (match_dup 0) (match_dup 1))] | |
e61e7d28 UB |
12842 | { |
12843 | int offs = INTVAL (operands[2]) * GET_MODE_SIZE (<ssescalarmode>mode); | |
12844 | ||
12845 | operands[1] = adjust_address (operands[1], <ssescalarmode>mode, offs); | |
12846 | }) | |
c49c0c39 | 12847 | |
fbf524de JJ |
12848 | ;; Turn SImode or DImode extraction from arbitrary SSE/AVX/AVX512F |
12849 | ;; vector modes into vec_extract*. | |
12850 | (define_split | |
12851 | [(set (match_operand:SWI48x 0 "nonimmediate_operand") | |
12852 | (match_operand:SWI48x 1 "register_operand"))] | |
12853 | "can_create_pseudo_p () | |
12854 | && GET_CODE (operands[1]) == SUBREG | |
12855 | && REG_P (SUBREG_REG (operands[1])) | |
12856 | && (GET_MODE_CLASS (GET_MODE (SUBREG_REG (operands[1]))) == MODE_VECTOR_INT | |
12857 | || (GET_MODE_CLASS (GET_MODE (SUBREG_REG (operands[1]))) | |
12858 | == MODE_VECTOR_FLOAT)) | |
12859 | && SUBREG_BYTE (operands[1]) == 0 | |
12860 | && TARGET_SSE | |
12861 | && (GET_MODE_SIZE (GET_MODE (SUBREG_REG (operands[1]))) == 16 | |
12862 | || (GET_MODE_SIZE (GET_MODE (SUBREG_REG (operands[1]))) == 32 | |
12863 | && TARGET_AVX) | |
12864 | || (GET_MODE_SIZE (GET_MODE (SUBREG_REG (operands[1]))) == 64 | |
12865 | && TARGET_AVX512F)) | |
12866 | && (<MODE>mode == SImode || TARGET_64BIT || MEM_P (operands[0]))" | |
12867 | [(set (match_dup 0) (vec_select:SWI48x (match_dup 1) | |
12868 | (parallel [(const_int 0)])))] | |
12869 | { | |
12870 | rtx tmp; | |
12871 | operands[1] = SUBREG_REG (operands[1]); | |
12872 | switch (GET_MODE_SIZE (GET_MODE (operands[1]))) | |
12873 | { | |
12874 | case 64: | |
12875 | if (<MODE>mode == SImode) | |
12876 | { | |
12877 | tmp = gen_reg_rtx (V8SImode); | |
12878 | emit_insn (gen_vec_extract_lo_v16si (tmp, | |
12879 | gen_lowpart (V16SImode, | |
12880 | operands[1]))); | |
12881 | } | |
12882 | else | |
12883 | { | |
12884 | tmp = gen_reg_rtx (V4DImode); | |
12885 | emit_insn (gen_vec_extract_lo_v8di (tmp, | |
12886 | gen_lowpart (V8DImode, | |
12887 | operands[1]))); | |
12888 | } | |
12889 | operands[1] = tmp; | |
12890 | /* FALLTHRU */ | |
12891 | case 32: | |
12892 | tmp = gen_reg_rtx (<ssevecmode>mode); | |
12893 | if (<MODE>mode == SImode) | |
12894 | emit_insn (gen_vec_extract_lo_v8si (tmp, gen_lowpart (V8SImode, | |
12895 | operands[1]))); | |
12896 | else | |
12897 | emit_insn (gen_vec_extract_lo_v4di (tmp, gen_lowpart (V4DImode, | |
12898 | operands[1]))); | |
12899 | operands[1] = tmp; | |
12900 | break; | |
12901 | case 16: | |
12902 | operands[1] = gen_lowpart (<ssevecmode>mode, operands[1]); | |
12903 | break; | |
12904 | } | |
12905 | }) | |
12906 | ||
fcc9fe1e | 12907 | (define_insn "*vec_concatv2si_sse4_1" |
ee768d85 UB |
12908 | [(set (match_operand:V2SI 0 "register_operand" |
12909 | "=Yr,*x,x, Yr,*x,x, x, *y,*y") | |
fcc9fe1e | 12910 | (vec_concat:V2SI |
ee768d85 UB |
12911 | (match_operand:SI 1 "nonimmediate_operand" |
12912 | " 0, 0,x, 0,0, x,rm, 0,rm") | |
12913 | (match_operand:SI 2 "vector_move_operand" | |
12914 | " rm,rm,rm,Yr,*x,x, C,*ym, C")))] | |
12915 | "TARGET_SSE4_1 && !(MEM_P (operands[1]) && MEM_P (operands[2]))" | |
fcc9fe1e | 12916 | "@ |
45392c76 | 12917 | pinsrd\t{$1, %2, %0|%0, %2, 1} |
aad61732 UB |
12918 | pinsrd\t{$1, %2, %0|%0, %2, 1} |
12919 | vpinsrd\t{$1, %2, %1, %0|%0, %1, %2, 1} | |
6784c6e0 | 12920 | punpckldq\t{%2, %0|%0, %2} |
45392c76 | 12921 | punpckldq\t{%2, %0|%0, %2} |
1ee8b298 UB |
12922 | vpunpckldq\t{%2, %1, %0|%0, %1, %2} |
12923 | %vmovd\t{%1, %0|%0, %1} | |
6784c6e0 UB |
12924 | punpckldq\t{%2, %0|%0, %2} |
12925 | movd\t{%1, %0|%0, %1}" | |
45392c76 IE |
12926 | [(set_attr "isa" "noavx,noavx,avx,noavx,noavx,avx,*,*,*") |
12927 | (set_attr "type" "sselog,sselog,sselog,sselog,sselog,sselog,ssemov,mmxcvt,mmxmov") | |
12928 | (set_attr "prefix_extra" "1,1,1,*,*,*,*,*,*") | |
12929 | (set_attr "length_immediate" "1,1,1,*,*,*,*,*,*") | |
12930 | (set_attr "prefix" "orig,orig,vex,orig,orig,vex,maybe_vex,orig,orig") | |
12931 | (set_attr "mode" "TI,TI,TI,TI,TI,TI,TI,DI,DI")]) | |
fcc9fe1e | 12932 | |
eb701deb RH |
12933 | ;; ??? In theory we can match memory for the MMX alternative, but allowing |
12934 | ;; nonimmediate_operand for operand 2 and *not* allowing memory for the SSE | |
12935 | ;; alternatives pretty much forces the MMX alternative to be chosen. | |
3c21604f UB |
12936 | (define_insn "*vec_concatv2si" |
12937 | [(set (match_operand:V2SI 0 "register_operand" "=x,x ,*y,x,x,*y,*y") | |
eb701deb | 12938 | (vec_concat:V2SI |
3c21604f UB |
12939 | (match_operand:SI 1 "nonimmediate_operand" " 0,rm,rm,0,m, 0,*rm") |
12940 | (match_operand:SI 2 "reg_or_0_operand" " x,C ,C, x,C,*y,C")))] | |
12941 | "TARGET_SSE && !TARGET_SSE4_1" | |
eb701deb RH |
12942 | "@ |
12943 | punpckldq\t{%2, %0|%0, %2} | |
12944 | movd\t{%1, %0|%0, %1} | |
3c21604f | 12945 | movd\t{%1, %0|%0, %1} |
eb701deb RH |
12946 | unpcklps\t{%2, %0|%0, %2} |
12947 | movss\t{%1, %0|%0, %1} | |
12948 | punpckldq\t{%2, %0|%0, %2} | |
12949 | movd\t{%1, %0|%0, %1}" | |
3c21604f UB |
12950 | [(set_attr "isa" "sse2,sse2,sse2,*,*,*,*") |
12951 | (set_attr "type" "sselog,ssemov,mmxmov,sselog,ssemov,mmxcvt,mmxmov") | |
12952 | (set_attr "mode" "TI,TI,DI,V4SF,SF,DI,DI")]) | |
eb701deb | 12953 | |
fb55d62e | 12954 | (define_insn "*vec_concatv4si" |
a02f398d | 12955 | [(set (match_operand:V4SI 0 "register_operand" "=x,x,x,x,x") |
95879c72 | 12956 | (vec_concat:V4SI |
a02f398d UB |
12957 | (match_operand:V2SI 1 "register_operand" " 0,x,0,0,x") |
12958 | (match_operand:V2SI 2 "nonimmediate_operand" " x,x,x,m,m")))] | |
eb701deb RH |
12959 | "TARGET_SSE" |
12960 | "@ | |
12961 | punpcklqdq\t{%2, %0|%0, %2} | |
fb55d62e | 12962 | vpunpcklqdq\t{%2, %1, %0|%0, %1, %2} |
eb701deb | 12963 | movlhps\t{%2, %0|%0, %2} |
eabb5f48 UB |
12964 | movhps\t{%2, %0|%0, %q2} |
12965 | vmovhps\t{%2, %1, %0|%0, %1, %q2}" | |
a02f398d | 12966 | [(set_attr "isa" "sse2_noavx,avx,noavx,noavx,avx") |
fb55d62e UB |
12967 | (set_attr "type" "sselog,sselog,ssemov,ssemov,ssemov") |
12968 | (set_attr "prefix" "orig,vex,orig,orig,vex") | |
12969 | (set_attr "mode" "TI,TI,V4SF,V2SF,V2SF")]) | |
eb701deb | 12970 | |
843b6915 | 12971 | ;; movd instead of movq is required to handle broken assemblers. |
3c21604f | 12972 | (define_insn "vec_concatv2di" |
1ee8b298 | 12973 | [(set (match_operand:V2DI 0 "register_operand" |
45392c76 | 12974 | "=Yr,*x,x ,Yi,x ,!x,x,x,x,x,x") |
6784c6e0 | 12975 | (vec_concat:V2DI |
1ee8b298 | 12976 | (match_operand:DI 1 "nonimmediate_operand" |
45392c76 | 12977 | " 0, 0,x ,r ,xm,*y,0,x,0,0,x") |
1ee8b298 | 12978 | (match_operand:DI 2 "vector_move_operand" |
45392c76 | 12979 | "*rm,rm,rm,C ,C ,C ,x,x,x,m,m")))] |
3c21604f | 12980 | "TARGET_SSE" |
6784c6e0 | 12981 | "@ |
45392c76 | 12982 | pinsrq\t{$1, %2, %0|%0, %2, 1} |
aad61732 UB |
12983 | pinsrq\t{$1, %2, %0|%0, %2, 1} |
12984 | vpinsrq\t{$1, %2, %1, %0|%0, %1, %2, 1} | |
13a26a7d | 12985 | * return HAVE_AS_IX86_INTERUNIT_MOVQ ? \"%vmovq\t{%1, %0|%0, %1}\" : \"%vmovd\t{%1, %0|%0, %1}\"; |
3c21604f | 12986 | %vmovq\t{%1, %0|%0, %1} |
6784c6e0 UB |
12987 | movq2dq\t{%1, %0|%0, %1} |
12988 | punpcklqdq\t{%2, %0|%0, %2} | |
1ee8b298 | 12989 | vpunpcklqdq\t{%2, %1, %0|%0, %1, %2} |
3c21604f | 12990 | movlhps\t{%2, %0|%0, %2} |
1ee8b298 UB |
12991 | movhps\t{%2, %0|%0, %2} |
12992 | vmovhps\t{%2, %1, %0|%0, %1, %2}" | |
45392c76 | 12993 | [(set_attr "isa" "x64_sse4_noavx,x64_sse4_noavx,x64_avx,x64,sse2,sse2,sse2_noavx,avx,noavx,noavx,avx") |
aad61732 UB |
12994 | (set (attr "type") |
12995 | (if_then_else | |
45392c76 | 12996 | (eq_attr "alternative" "0,1,2,6,7") |
aad61732 UB |
12997 | (const_string "sselog") |
12998 | (const_string "ssemov"))) | |
45392c76 IE |
12999 | (set_attr "prefix_rex" "1,1,1,1,*,*,*,*,*,*,*") |
13000 | (set_attr "prefix_extra" "1,1,1,*,*,*,*,*,*,*,*") | |
13001 | (set_attr "length_immediate" "1,1,1,*,*,*,*,*,*,*,*") | |
13002 | (set_attr "prefix" "orig,orig,vex,maybe_vex,maybe_vex,orig,orig,vex,orig,orig,vex") | |
13003 | (set_attr "mode" "TI,TI,TI,TI,TI,TI,TI,TI,V4SF,V2SF,V2SF")]) | |
1ee8b298 | 13004 | |
8dfb9f16 | 13005 | (define_expand "vec_unpacks_lo_<mode>" |
82e86dc6 | 13006 | [(match_operand:<sseunpackmode> 0 "register_operand") |
3bdf6340 | 13007 | (match_operand:VI124_AVX512F 1 "register_operand")] |
89d67cca | 13008 | "TARGET_SSE2" |
7b532118 | 13009 | "ix86_expand_sse_unpack (operands[0], operands[1], false, false); DONE;") |
89d67cca | 13010 | |
8dfb9f16 | 13011 | (define_expand "vec_unpacks_hi_<mode>" |
82e86dc6 | 13012 | [(match_operand:<sseunpackmode> 0 "register_operand") |
3bdf6340 | 13013 | (match_operand:VI124_AVX512F 1 "register_operand")] |
89d67cca | 13014 | "TARGET_SSE2" |
7b532118 | 13015 | "ix86_expand_sse_unpack (operands[0], operands[1], false, true); DONE;") |
89d67cca | 13016 | |
8dfb9f16 | 13017 | (define_expand "vec_unpacku_lo_<mode>" |
82e86dc6 | 13018 | [(match_operand:<sseunpackmode> 0 "register_operand") |
3bdf6340 | 13019 | (match_operand:VI124_AVX512F 1 "register_operand")] |
89d67cca | 13020 | "TARGET_SSE2" |
7b532118 | 13021 | "ix86_expand_sse_unpack (operands[0], operands[1], true, false); DONE;") |
89d67cca | 13022 | |
8dfb9f16 | 13023 | (define_expand "vec_unpacku_hi_<mode>" |
82e86dc6 | 13024 | [(match_operand:<sseunpackmode> 0 "register_operand") |
3bdf6340 | 13025 | (match_operand:VI124_AVX512F 1 "register_operand")] |
89d67cca | 13026 | "TARGET_SSE2" |
7b532118 | 13027 | "ix86_expand_sse_unpack (operands[0], operands[1], true, true); DONE;") |
89d67cca | 13028 | |
ef719a44 RH |
13029 | ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; |
13030 | ;; | |
35fd3193 | 13031 | ;; Miscellaneous |
ef719a44 RH |
13032 | ;; |
13033 | ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; | |
13034 | ||
c9b17fa5 | 13035 | (define_expand "<sse2_avx2>_uavg<mode>3<mask_name>" |
880ab4be AT |
13036 | [(set (match_operand:VI12_AVX2 0 "register_operand") |
13037 | (truncate:VI12_AVX2 | |
13038 | (lshiftrt:<ssedoublemode> | |
13039 | (plus:<ssedoublemode> | |
13040 | (plus:<ssedoublemode> | |
13041 | (zero_extend:<ssedoublemode> | |
13042 | (match_operand:VI12_AVX2 1 "nonimmediate_operand")) | |
13043 | (zero_extend:<ssedoublemode> | |
13044 | (match_operand:VI12_AVX2 2 "nonimmediate_operand"))) | |
c9b17fa5 | 13045 | (match_dup <mask_expand_op3>)) |
ffbaf337 | 13046 | (const_int 1))))] |
c9b17fa5 | 13047 | "TARGET_SSE2 && <mask_mode512bit_condition> && <mask_avx512bw_condition>" |
880ab4be | 13048 | { |
c9b17fa5 AI |
13049 | rtx tmp; |
13050 | if (<mask_applied>) | |
13051 | tmp = operands[3]; | |
880ab4be AT |
13052 | operands[3] = CONST1_RTX(<MODE>mode); |
13053 | ix86_fixup_binary_operands_no_copy (PLUS, <MODE>mode, operands); | |
c9b17fa5 AI |
13054 | |
13055 | if (<mask_applied>) | |
13056 | { | |
13057 | operands[5] = operands[3]; | |
13058 | operands[3] = tmp; | |
13059 | } | |
880ab4be | 13060 | }) |
977e83a3 | 13061 | |
c9b17fa5 AI |
13062 | (define_insn "*<sse2_avx2>_uavg<mode>3<mask_name>" |
13063 | [(set (match_operand:VI12_AVX2 0 "register_operand" "=x,v") | |
880ab4be AT |
13064 | (truncate:VI12_AVX2 |
13065 | (lshiftrt:<ssedoublemode> | |
13066 | (plus:<ssedoublemode> | |
13067 | (plus:<ssedoublemode> | |
13068 | (zero_extend:<ssedoublemode> | |
c9b17fa5 | 13069 | (match_operand:VI12_AVX2 1 "nonimmediate_operand" "%0,v")) |
880ab4be | 13070 | (zero_extend:<ssedoublemode> |
c9b17fa5 AI |
13071 | (match_operand:VI12_AVX2 2 "nonimmediate_operand" "xm,vm"))) |
13072 | (match_operand:VI12_AVX2 <mask_expand_op3> "const1_operand")) | |
ef719a44 | 13073 | (const_int 1))))] |
c9b17fa5 AI |
13074 | "TARGET_SSE2 && <mask_mode512bit_condition> && <mask_avx512bw_condition> |
13075 | && ix86_binary_operator_ok (PLUS, <MODE>mode, operands)" | |
81b1e7eb | 13076 | "@ |
880ab4be | 13077 | pavg<ssemodesuffix>\t{%2, %0|%0, %2} |
c9b17fa5 | 13078 | vpavg<ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}" |
81b1e7eb UB |
13079 | [(set_attr "isa" "noavx,avx") |
13080 | (set_attr "type" "sseiadd") | |
13081 | (set_attr "prefix_data16" "1,*") | |
c9b17fa5 | 13082 | (set_attr "prefix" "orig,<mask_prefix>") |
880ab4be | 13083 | (set_attr "mode" "<sseinsnmode>")]) |
ef719a44 | 13084 | |
4f3f76e6 | 13085 | ;; The correct representation for this is absolutely enormous, and |
ef719a44 | 13086 | ;; surely not generally useful. |
977e83a3 | 13087 | (define_insn "<sse2_avx2>_psadbw" |
44f59829 AI |
13088 | [(set (match_operand:VI8_AVX2_AVX512BW 0 "register_operand" "=x,v") |
13089 | (unspec:VI8_AVX2_AVX512BW | |
13090 | [(match_operand:<ssebytemode> 1 "register_operand" "0,v") | |
13091 | (match_operand:<ssebytemode> 2 "nonimmediate_operand" "xm,vm")] | |
8861ba4d | 13092 | UNSPEC_PSADBW))] |
ef719a44 | 13093 | "TARGET_SSE2" |
81b1e7eb UB |
13094 | "@ |
13095 | psadbw\t{%2, %0|%0, %2} | |
13096 | vpsadbw\t{%2, %1, %0|%0, %1, %2}" | |
13097 | [(set_attr "isa" "noavx,avx") | |
13098 | (set_attr "type" "sseiadd") | |
b6837b94 | 13099 | (set_attr "atom_unit" "simul") |
81b1e7eb | 13100 | (set_attr "prefix_data16" "1,*") |
44f59829 | 13101 | (set_attr "prefix" "orig,maybe_evex") |
977e83a3 | 13102 | (set_attr "mode" "<sseinsnmode>")]) |
ef719a44 | 13103 | |
cbb734aa | 13104 | (define_insn "<sse>_movmsk<ssemodesuffix><avxsizesuffix>" |
95879c72 L |
13105 | [(set (match_operand:SI 0 "register_operand" "=r") |
13106 | (unspec:SI | |
b86f6e9e | 13107 | [(match_operand:VF_128_256 1 "register_operand" "x")] |
95879c72 | 13108 | UNSPEC_MOVMSK))] |
6bec6c98 | 13109 | "TARGET_SSE" |
1c154a23 | 13110 | "%vmovmsk<ssemodesuffix>\t{%1, %0|%0, %1}" |
b6837b94 | 13111 | [(set_attr "type" "ssemov") |
95879c72 | 13112 | (set_attr "prefix" "maybe_vex") |
85845bb9 | 13113 | (set_attr "mode" "<MODE>")]) |
ef719a44 | 13114 | |
977e83a3 KY |
13115 | (define_insn "avx2_pmovmskb" |
13116 | [(set (match_operand:SI 0 "register_operand" "=r") | |
13117 | (unspec:SI [(match_operand:V32QI 1 "register_operand" "x")] | |
13118 | UNSPEC_MOVMSK))] | |
13119 | "TARGET_AVX2" | |
13120 | "vpmovmskb\t{%1, %0|%0, %1}" | |
13121 | [(set_attr "type" "ssemov") | |
13122 | (set_attr "prefix" "vex") | |
13123 | (set_attr "mode" "DI")]) | |
13124 | ||
ef719a44 RH |
13125 | (define_insn "sse2_pmovmskb" |
13126 | [(set (match_operand:SI 0 "register_operand" "=r") | |
13127 | (unspec:SI [(match_operand:V16QI 1 "register_operand" "x")] | |
13128 | UNSPEC_MOVMSK))] | |
13129 | "TARGET_SSE2" | |
95879c72 | 13130 | "%vpmovmskb\t{%1, %0|%0, %1}" |
b6837b94 | 13131 | [(set_attr "type" "ssemov") |
10e4d956 | 13132 | (set_attr "prefix_data16" "1") |
95879c72 | 13133 | (set_attr "prefix" "maybe_vex") |
10e4d956 | 13134 | (set_attr "mode" "SI")]) |
ef719a44 RH |
13135 | |
13136 | (define_expand "sse2_maskmovdqu" | |
82e86dc6 UB |
13137 | [(set (match_operand:V16QI 0 "memory_operand") |
13138 | (unspec:V16QI [(match_operand:V16QI 1 "register_operand") | |
13139 | (match_operand:V16QI 2 "register_operand") | |
ef719a44 RH |
13140 | (match_dup 0)] |
13141 | UNSPEC_MASKMOV))] | |
a427621f | 13142 | "TARGET_SSE2") |
ef719a44 RH |
13143 | |
13144 | (define_insn "*sse2_maskmovdqu" | |
f60c2554 | 13145 | [(set (mem:V16QI (match_operand:P 0 "register_operand" "D")) |
ef719a44 RH |
13146 | (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "x") |
13147 | (match_operand:V16QI 2 "register_operand" "x") | |
13148 | (mem:V16QI (match_dup 0))] | |
13149 | UNSPEC_MASKMOV))] | |
f60c2554 | 13150 | "TARGET_SSE2" |
061eff6d UB |
13151 | { |
13152 | /* We can't use %^ here due to ASM_OUTPUT_OPCODE processing | |
13153 | that requires %v to be at the beginning of the opcode name. */ | |
13154 | if (Pmode != word_mode) | |
13155 | fputs ("\taddr32", asm_out_file); | |
13156 | return "%vmaskmovdqu\t{%2, %1|%1, %2}"; | |
13157 | } | |
b6837b94 | 13158 | [(set_attr "type" "ssemov") |
10e4d956 | 13159 | (set_attr "prefix_data16" "1") |
061eff6d UB |
13160 | (set (attr "length_address") |
13161 | (symbol_ref ("Pmode != word_mode"))) | |
725fd454 JJ |
13162 | ;; The implicit %rdi operand confuses default length_vex computation. |
13163 | (set (attr "length_vex") | |
f60c2554 | 13164 | (symbol_ref ("3 + REX_SSE_REGNO_P (REGNO (operands[2]))"))) |
95879c72 | 13165 | (set_attr "prefix" "maybe_vex") |
ef719a44 RH |
13166 | (set_attr "mode" "TI")]) |
13167 | ||
80e8bb90 RH |
13168 | (define_insn "sse_ldmxcsr" |
13169 | [(unspec_volatile [(match_operand:SI 0 "memory_operand" "m")] | |
13170 | UNSPECV_LDMXCSR)] | |
13171 | "TARGET_SSE" | |
95879c72 | 13172 | "%vldmxcsr\t%0" |
80e8bb90 | 13173 | [(set_attr "type" "sse") |
b6837b94 | 13174 | (set_attr "atom_sse_attr" "mxcsr") |
95879c72 | 13175 | (set_attr "prefix" "maybe_vex") |
80e8bb90 RH |
13176 | (set_attr "memory" "load")]) |
13177 | ||
13178 | (define_insn "sse_stmxcsr" | |
13179 | [(set (match_operand:SI 0 "memory_operand" "=m") | |
13180 | (unspec_volatile:SI [(const_int 0)] UNSPECV_STMXCSR))] | |
13181 | "TARGET_SSE" | |
95879c72 | 13182 | "%vstmxcsr\t%0" |
80e8bb90 | 13183 | [(set_attr "type" "sse") |
b6837b94 | 13184 | (set_attr "atom_sse_attr" "mxcsr") |
95879c72 | 13185 | (set_attr "prefix" "maybe_vex") |
80e8bb90 RH |
13186 | (set_attr "memory" "store")]) |
13187 | ||
ef719a44 RH |
13188 | (define_insn "sse2_clflush" |
13189 | [(unspec_volatile [(match_operand 0 "address_operand" "p")] | |
13190 | UNSPECV_CLFLUSH)] | |
13191 | "TARGET_SSE2" | |
13192 | "clflush\t%a0" | |
13193 | [(set_attr "type" "sse") | |
b6837b94 | 13194 | (set_attr "atom_sse_attr" "fence") |
ef719a44 RH |
13195 | (set_attr "memory" "unknown")]) |
13196 | ||
ef719a44 RH |
13197 | |
13198 | (define_insn "sse3_mwait" | |
13199 | [(unspec_volatile [(match_operand:SI 0 "register_operand" "a") | |
13200 | (match_operand:SI 1 "register_operand" "c")] | |
13201 | UNSPECV_MWAIT)] | |
13202 | "TARGET_SSE3" | |
67a4b391 L |
13203 | ;; 64bit version is "mwait %rax,%rcx". But only lower 32bits are used. |
13204 | ;; Since 32bit register operands are implicitly zero extended to 64bit, | |
13205 | ;; we only need to set up 32bit registers. | |
13206 | "mwait" | |
ef719a44 RH |
13207 | [(set_attr "length" "3")]) |
13208 | ||
061eff6d | 13209 | (define_insn "sse3_monitor_<mode>" |
986b6423 | 13210 | [(unspec_volatile [(match_operand:P 0 "register_operand" "a") |
67a4b391 L |
13211 | (match_operand:SI 1 "register_operand" "c") |
13212 | (match_operand:SI 2 "register_operand" "d")] | |
13213 | UNSPECV_MONITOR)] | |
061eff6d | 13214 | "TARGET_SSE3" |
67a4b391 L |
13215 | ;; 64bit version is "monitor %rax,%rcx,%rdx". But only lower 32bits in |
13216 | ;; RCX and RDX are used. Since 32bit register operands are implicitly | |
13217 | ;; zero extended to 64bit, we only need to set up 32bit registers. | |
061eff6d UB |
13218 | "%^monitor" |
13219 | [(set (attr "length") | |
13220 | (symbol_ref ("(Pmode != word_mode) + 3")))]) | |
b1875f52 | 13221 | |
85845bb9 UB |
13222 | ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; |
13223 | ;; | |
13224 | ;; SSSE3 instructions | |
13225 | ;; | |
13226 | ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; | |
13227 | ||
8861ba4d | 13228 | (define_code_iterator ssse3_plusminus [plus ss_plus minus ss_minus]) |
b1875f52 | 13229 | |
8861ba4d | 13230 | (define_insn "avx2_ph<plusminus_mnemonic>wv16hi3" |
977e83a3 KY |
13231 | [(set (match_operand:V16HI 0 "register_operand" "=x") |
13232 | (vec_concat:V16HI | |
13233 | (vec_concat:V8HI | |
13234 | (vec_concat:V4HI | |
13235 | (vec_concat:V2HI | |
8861ba4d | 13236 | (ssse3_plusminus:HI |
977e83a3 KY |
13237 | (vec_select:HI |
13238 | (match_operand:V16HI 1 "register_operand" "x") | |
13239 | (parallel [(const_int 0)])) | |
13240 | (vec_select:HI (match_dup 1) (parallel [(const_int 1)]))) | |
8861ba4d | 13241 | (ssse3_plusminus:HI |
977e83a3 KY |
13242 | (vec_select:HI (match_dup 1) (parallel [(const_int 2)])) |
13243 | (vec_select:HI (match_dup 1) (parallel [(const_int 3)])))) | |
13244 | (vec_concat:V2HI | |
8861ba4d | 13245 | (ssse3_plusminus:HI |
977e83a3 KY |
13246 | (vec_select:HI (match_dup 1) (parallel [(const_int 4)])) |
13247 | (vec_select:HI (match_dup 1) (parallel [(const_int 5)]))) | |
8861ba4d | 13248 | (ssse3_plusminus:HI |
977e83a3 KY |
13249 | (vec_select:HI (match_dup 1) (parallel [(const_int 6)])) |
13250 | (vec_select:HI (match_dup 1) (parallel [(const_int 7)]))))) | |
13251 | (vec_concat:V4HI | |
13252 | (vec_concat:V2HI | |
8861ba4d | 13253 | (ssse3_plusminus:HI |
977e83a3 KY |
13254 | (vec_select:HI (match_dup 1) (parallel [(const_int 8)])) |
13255 | (vec_select:HI (match_dup 1) (parallel [(const_int 9)]))) | |
8861ba4d | 13256 | (ssse3_plusminus:HI |
977e83a3 KY |
13257 | (vec_select:HI (match_dup 1) (parallel [(const_int 10)])) |
13258 | (vec_select:HI (match_dup 1) (parallel [(const_int 11)])))) | |
13259 | (vec_concat:V2HI | |
8861ba4d | 13260 | (ssse3_plusminus:HI |
977e83a3 KY |
13261 | (vec_select:HI (match_dup 1) (parallel [(const_int 12)])) |
13262 | (vec_select:HI (match_dup 1) (parallel [(const_int 13)]))) | |
8861ba4d | 13263 | (ssse3_plusminus:HI |
977e83a3 KY |
13264 | (vec_select:HI (match_dup 1) (parallel [(const_int 14)])) |
13265 | (vec_select:HI (match_dup 1) (parallel [(const_int 15)])))))) | |
13266 | (vec_concat:V8HI | |
13267 | (vec_concat:V4HI | |
13268 | (vec_concat:V2HI | |
8861ba4d | 13269 | (ssse3_plusminus:HI |
977e83a3 KY |
13270 | (vec_select:HI |
13271 | (match_operand:V16HI 2 "nonimmediate_operand" "xm") | |
13272 | (parallel [(const_int 0)])) | |
13273 | (vec_select:HI (match_dup 2) (parallel [(const_int 1)]))) | |
8861ba4d | 13274 | (ssse3_plusminus:HI |
977e83a3 KY |
13275 | (vec_select:HI (match_dup 2) (parallel [(const_int 2)])) |
13276 | (vec_select:HI (match_dup 2) (parallel [(const_int 3)])))) | |
13277 | (vec_concat:V2HI | |
8861ba4d | 13278 | (ssse3_plusminus:HI |
977e83a3 KY |
13279 | (vec_select:HI (match_dup 2) (parallel [(const_int 4)])) |
13280 | (vec_select:HI (match_dup 2) (parallel [(const_int 5)]))) | |
8861ba4d | 13281 | (ssse3_plusminus:HI |
977e83a3 KY |
13282 | (vec_select:HI (match_dup 2) (parallel [(const_int 6)])) |
13283 | (vec_select:HI (match_dup 2) (parallel [(const_int 7)]))))) | |
13284 | (vec_concat:V4HI | |
13285 | (vec_concat:V2HI | |
8861ba4d | 13286 | (ssse3_plusminus:HI |
977e83a3 KY |
13287 | (vec_select:HI (match_dup 2) (parallel [(const_int 8)])) |
13288 | (vec_select:HI (match_dup 2) (parallel [(const_int 9)]))) | |
8861ba4d | 13289 | (ssse3_plusminus:HI |
977e83a3 KY |
13290 | (vec_select:HI (match_dup 2) (parallel [(const_int 10)])) |
13291 | (vec_select:HI (match_dup 2) (parallel [(const_int 11)])))) | |
13292 | (vec_concat:V2HI | |
8861ba4d | 13293 | (ssse3_plusminus:HI |
977e83a3 KY |
13294 | (vec_select:HI (match_dup 2) (parallel [(const_int 12)])) |
13295 | (vec_select:HI (match_dup 2) (parallel [(const_int 13)]))) | |
8861ba4d | 13296 | (ssse3_plusminus:HI |
977e83a3 KY |
13297 | (vec_select:HI (match_dup 2) (parallel [(const_int 14)])) |
13298 | (vec_select:HI (match_dup 2) (parallel [(const_int 15)]))))))))] | |
13299 | "TARGET_AVX2" | |
8861ba4d | 13300 | "vph<plusminus_mnemonic>w\t{%2, %1, %0|%0, %1, %2}" |
977e83a3 KY |
13301 | [(set_attr "type" "sseiadd") |
13302 | (set_attr "prefix_extra" "1") | |
13303 | (set_attr "prefix" "vex") | |
13304 | (set_attr "mode" "OI")]) | |
13305 | ||
8861ba4d | 13306 | (define_insn "ssse3_ph<plusminus_mnemonic>wv8hi3" |
81b1e7eb | 13307 | [(set (match_operand:V8HI 0 "register_operand" "=x,x") |
b1875f52 L |
13308 | (vec_concat:V8HI |
13309 | (vec_concat:V4HI | |
13310 | (vec_concat:V2HI | |
8861ba4d | 13311 | (ssse3_plusminus:HI |
b1875f52 | 13312 | (vec_select:HI |
81b1e7eb | 13313 | (match_operand:V8HI 1 "register_operand" "0,x") |
b1875f52 L |
13314 | (parallel [(const_int 0)])) |
13315 | (vec_select:HI (match_dup 1) (parallel [(const_int 1)]))) | |
8861ba4d | 13316 | (ssse3_plusminus:HI |
b1875f52 L |
13317 | (vec_select:HI (match_dup 1) (parallel [(const_int 2)])) |
13318 | (vec_select:HI (match_dup 1) (parallel [(const_int 3)])))) | |
13319 | (vec_concat:V2HI | |
8861ba4d | 13320 | (ssse3_plusminus:HI |
b1875f52 L |
13321 | (vec_select:HI (match_dup 1) (parallel [(const_int 4)])) |
13322 | (vec_select:HI (match_dup 1) (parallel [(const_int 5)]))) | |
8861ba4d | 13323 | (ssse3_plusminus:HI |
b1875f52 L |
13324 | (vec_select:HI (match_dup 1) (parallel [(const_int 6)])) |
13325 | (vec_select:HI (match_dup 1) (parallel [(const_int 7)]))))) | |
13326 | (vec_concat:V4HI | |
13327 | (vec_concat:V2HI | |
8861ba4d | 13328 | (ssse3_plusminus:HI |
b1875f52 | 13329 | (vec_select:HI |
81b1e7eb | 13330 | (match_operand:V8HI 2 "nonimmediate_operand" "xm,xm") |
b1875f52 L |
13331 | (parallel [(const_int 0)])) |
13332 | (vec_select:HI (match_dup 2) (parallel [(const_int 1)]))) | |
8861ba4d | 13333 | (ssse3_plusminus:HI |
b1875f52 L |
13334 | (vec_select:HI (match_dup 2) (parallel [(const_int 2)])) |
13335 | (vec_select:HI (match_dup 2) (parallel [(const_int 3)])))) | |
13336 | (vec_concat:V2HI | |
8861ba4d | 13337 | (ssse3_plusminus:HI |
b1875f52 L |
13338 | (vec_select:HI (match_dup 2) (parallel [(const_int 4)])) |
13339 | (vec_select:HI (match_dup 2) (parallel [(const_int 5)]))) | |
8861ba4d | 13340 | (ssse3_plusminus:HI |
b1875f52 L |
13341 | (vec_select:HI (match_dup 2) (parallel [(const_int 6)])) |
13342 | (vec_select:HI (match_dup 2) (parallel [(const_int 7)])))))))] | |
13343 | "TARGET_SSSE3" | |
81b1e7eb | 13344 | "@ |
8861ba4d UB |
13345 | ph<plusminus_mnemonic>w\t{%2, %0|%0, %2} |
13346 | vph<plusminus_mnemonic>w\t{%2, %1, %0|%0, %1, %2}" | |
81b1e7eb UB |
13347 | [(set_attr "isa" "noavx,avx") |
13348 | (set_attr "type" "sseiadd") | |
b6837b94 | 13349 | (set_attr "atom_unit" "complex") |
81b1e7eb | 13350 | (set_attr "prefix_data16" "1,*") |
10e4d956 | 13351 | (set_attr "prefix_extra" "1") |
81b1e7eb | 13352 | (set_attr "prefix" "orig,vex") |
b1875f52 L |
13353 | (set_attr "mode" "TI")]) |
13354 | ||
8861ba4d | 13355 | (define_insn "ssse3_ph<plusminus_mnemonic>wv4hi3" |
b1875f52 L |
13356 | [(set (match_operand:V4HI 0 "register_operand" "=y") |
13357 | (vec_concat:V4HI | |
13358 | (vec_concat:V2HI | |
8861ba4d | 13359 | (ssse3_plusminus:HI |
b1875f52 L |
13360 | (vec_select:HI |
13361 | (match_operand:V4HI 1 "register_operand" "0") | |
13362 | (parallel [(const_int 0)])) | |
13363 | (vec_select:HI (match_dup 1) (parallel [(const_int 1)]))) | |
8861ba4d | 13364 | (ssse3_plusminus:HI |
b1875f52 L |
13365 | (vec_select:HI (match_dup 1) (parallel [(const_int 2)])) |
13366 | (vec_select:HI (match_dup 1) (parallel [(const_int 3)])))) | |
13367 | (vec_concat:V2HI | |
8861ba4d | 13368 | (ssse3_plusminus:HI |
b1875f52 L |
13369 | (vec_select:HI |
13370 | (match_operand:V4HI 2 "nonimmediate_operand" "ym") | |
13371 | (parallel [(const_int 0)])) | |
13372 | (vec_select:HI (match_dup 2) (parallel [(const_int 1)]))) | |
8861ba4d | 13373 | (ssse3_plusminus:HI |
b1875f52 L |
13374 | (vec_select:HI (match_dup 2) (parallel [(const_int 2)])) |
13375 | (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))))] | |
13376 | "TARGET_SSSE3" | |
8861ba4d | 13377 | "ph<plusminus_mnemonic>w\t{%2, %0|%0, %2}" |
b1875f52 | 13378 | [(set_attr "type" "sseiadd") |
b6837b94 | 13379 | (set_attr "atom_unit" "complex") |
10e4d956 | 13380 | (set_attr "prefix_extra" "1") |
725fd454 | 13381 | (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)")) |
b1875f52 L |
13382 | (set_attr "mode" "DI")]) |
13383 | ||
8861ba4d | 13384 | (define_insn "avx2_ph<plusminus_mnemonic>dv8si3" |
977e83a3 KY |
13385 | [(set (match_operand:V8SI 0 "register_operand" "=x") |
13386 | (vec_concat:V8SI | |
13387 | (vec_concat:V4SI | |
13388 | (vec_concat:V2SI | |
8861ba4d | 13389 | (plusminus:SI |
977e83a3 KY |
13390 | (vec_select:SI |
13391 | (match_operand:V8SI 1 "register_operand" "x") | |
13392 | (parallel [(const_int 0)])) | |
13393 | (vec_select:SI (match_dup 1) (parallel [(const_int 1)]))) | |
8861ba4d | 13394 | (plusminus:SI |
977e83a3 KY |
13395 | (vec_select:SI (match_dup 1) (parallel [(const_int 2)])) |
13396 | (vec_select:SI (match_dup 1) (parallel [(const_int 3)])))) | |
13397 | (vec_concat:V2SI | |
8861ba4d | 13398 | (plusminus:SI |
977e83a3 KY |
13399 | (vec_select:SI (match_dup 1) (parallel [(const_int 4)])) |
13400 | (vec_select:SI (match_dup 1) (parallel [(const_int 5)]))) | |
8861ba4d | 13401 | (plusminus:SI |
977e83a3 KY |
13402 | (vec_select:SI (match_dup 1) (parallel [(const_int 6)])) |
13403 | (vec_select:SI (match_dup 1) (parallel [(const_int 7)]))))) | |
13404 | (vec_concat:V4SI | |
13405 | (vec_concat:V2SI | |
8861ba4d | 13406 | (plusminus:SI |
977e83a3 KY |
13407 | (vec_select:SI |
13408 | (match_operand:V8SI 2 "nonimmediate_operand" "xm") | |
13409 | (parallel [(const_int 0)])) | |
13410 | (vec_select:SI (match_dup 2) (parallel [(const_int 1)]))) | |
8861ba4d | 13411 | (plusminus:SI |
977e83a3 KY |
13412 | (vec_select:SI (match_dup 2) (parallel [(const_int 2)])) |
13413 | (vec_select:SI (match_dup 2) (parallel [(const_int 3)])))) | |
13414 | (vec_concat:V2SI | |
8861ba4d | 13415 | (plusminus:SI |
977e83a3 KY |
13416 | (vec_select:SI (match_dup 2) (parallel [(const_int 4)])) |
13417 | (vec_select:SI (match_dup 2) (parallel [(const_int 5)]))) | |
8861ba4d | 13418 | (plusminus:SI |
977e83a3 KY |
13419 | (vec_select:SI (match_dup 2) (parallel [(const_int 6)])) |
13420 | (vec_select:SI (match_dup 2) (parallel [(const_int 7)])))))))] | |
13421 | "TARGET_AVX2" | |
8861ba4d | 13422 | "vph<plusminus_mnemonic>d\t{%2, %1, %0|%0, %1, %2}" |
977e83a3 KY |
13423 | [(set_attr "type" "sseiadd") |
13424 | (set_attr "prefix_extra" "1") | |
13425 | (set_attr "prefix" "vex") | |
13426 | (set_attr "mode" "OI")]) | |
13427 | ||
8861ba4d | 13428 | (define_insn "ssse3_ph<plusminus_mnemonic>dv4si3" |
81b1e7eb | 13429 | [(set (match_operand:V4SI 0 "register_operand" "=x,x") |
b1875f52 L |
13430 | (vec_concat:V4SI |
13431 | (vec_concat:V2SI | |
8861ba4d | 13432 | (plusminus:SI |
b1875f52 | 13433 | (vec_select:SI |
81b1e7eb | 13434 | (match_operand:V4SI 1 "register_operand" "0,x") |
b1875f52 L |
13435 | (parallel [(const_int 0)])) |
13436 | (vec_select:SI (match_dup 1) (parallel [(const_int 1)]))) | |
8861ba4d | 13437 | (plusminus:SI |
b1875f52 L |
13438 | (vec_select:SI (match_dup 1) (parallel [(const_int 2)])) |
13439 | (vec_select:SI (match_dup 1) (parallel [(const_int 3)])))) | |
13440 | (vec_concat:V2SI | |
8861ba4d | 13441 | (plusminus:SI |
b1875f52 | 13442 | (vec_select:SI |
81b1e7eb | 13443 | (match_operand:V4SI 2 "nonimmediate_operand" "xm,xm") |
b1875f52 L |
13444 | (parallel [(const_int 0)])) |
13445 | (vec_select:SI (match_dup 2) (parallel [(const_int 1)]))) | |
8861ba4d | 13446 | (plusminus:SI |
b1875f52 L |
13447 | (vec_select:SI (match_dup 2) (parallel [(const_int 2)])) |
13448 | (vec_select:SI (match_dup 2) (parallel [(const_int 3)]))))))] | |
13449 | "TARGET_SSSE3" | |
81b1e7eb | 13450 | "@ |
8861ba4d UB |
13451 | ph<plusminus_mnemonic>d\t{%2, %0|%0, %2} |
13452 | vph<plusminus_mnemonic>d\t{%2, %1, %0|%0, %1, %2}" | |
81b1e7eb UB |
13453 | [(set_attr "isa" "noavx,avx") |
13454 | (set_attr "type" "sseiadd") | |
b6837b94 | 13455 | (set_attr "atom_unit" "complex") |
81b1e7eb | 13456 | (set_attr "prefix_data16" "1,*") |
10e4d956 | 13457 | (set_attr "prefix_extra" "1") |
81b1e7eb | 13458 | (set_attr "prefix" "orig,vex") |
b1875f52 L |
13459 | (set_attr "mode" "TI")]) |
13460 | ||
8861ba4d | 13461 | (define_insn "ssse3_ph<plusminus_mnemonic>dv2si3" |
b1875f52 L |
13462 | [(set (match_operand:V2SI 0 "register_operand" "=y") |
13463 | (vec_concat:V2SI | |
8861ba4d | 13464 | (plusminus:SI |
b1875f52 L |
13465 | (vec_select:SI |
13466 | (match_operand:V2SI 1 "register_operand" "0") | |
13467 | (parallel [(const_int 0)])) | |
13468 | (vec_select:SI (match_dup 1) (parallel [(const_int 1)]))) | |
8861ba4d | 13469 | (plusminus:SI |
b1875f52 L |
13470 | (vec_select:SI |
13471 | (match_operand:V2SI 2 "nonimmediate_operand" "ym") | |
13472 | (parallel [(const_int 0)])) | |
13473 | (vec_select:SI (match_dup 2) (parallel [(const_int 1)])))))] | |
13474 | "TARGET_SSSE3" | |
8861ba4d | 13475 | "ph<plusminus_mnemonic>d\t{%2, %0|%0, %2}" |
b1875f52 | 13476 | [(set_attr "type" "sseiadd") |
b6837b94 | 13477 | (set_attr "atom_unit" "complex") |
95879c72 | 13478 | (set_attr "prefix_extra" "1") |
725fd454 | 13479 | (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)")) |
95879c72 L |
13480 | (set_attr "mode" "DI")]) |
13481 | ||
977e83a3 KY |
13482 | (define_insn "avx2_pmaddubsw256" |
13483 | [(set (match_operand:V16HI 0 "register_operand" "=x") | |
13484 | (ss_plus:V16HI | |
13485 | (mult:V16HI | |
13486 | (zero_extend:V16HI | |
13487 | (vec_select:V16QI | |
13488 | (match_operand:V32QI 1 "register_operand" "x") | |
608dccd7 UB |
13489 | (parallel [(const_int 0) (const_int 2) |
13490 | (const_int 4) (const_int 6) | |
13491 | (const_int 8) (const_int 10) | |
13492 | (const_int 12) (const_int 14) | |
13493 | (const_int 16) (const_int 18) | |
13494 | (const_int 20) (const_int 22) | |
13495 | (const_int 24) (const_int 26) | |
13496 | (const_int 28) (const_int 30)]))) | |
977e83a3 KY |
13497 | (sign_extend:V16HI |
13498 | (vec_select:V16QI | |
13499 | (match_operand:V32QI 2 "nonimmediate_operand" "xm") | |
608dccd7 UB |
13500 | (parallel [(const_int 0) (const_int 2) |
13501 | (const_int 4) (const_int 6) | |
13502 | (const_int 8) (const_int 10) | |
13503 | (const_int 12) (const_int 14) | |
13504 | (const_int 16) (const_int 18) | |
13505 | (const_int 20) (const_int 22) | |
13506 | (const_int 24) (const_int 26) | |
13507 | (const_int 28) (const_int 30)])))) | |
977e83a3 KY |
13508 | (mult:V16HI |
13509 | (zero_extend:V16HI | |
13510 | (vec_select:V16QI (match_dup 1) | |
608dccd7 UB |
13511 | (parallel [(const_int 1) (const_int 3) |
13512 | (const_int 5) (const_int 7) | |
13513 | (const_int 9) (const_int 11) | |
13514 | (const_int 13) (const_int 15) | |
13515 | (const_int 17) (const_int 19) | |
13516 | (const_int 21) (const_int 23) | |
13517 | (const_int 25) (const_int 27) | |
13518 | (const_int 29) (const_int 31)]))) | |
977e83a3 KY |
13519 | (sign_extend:V16HI |
13520 | (vec_select:V16QI (match_dup 2) | |
608dccd7 UB |
13521 | (parallel [(const_int 1) (const_int 3) |
13522 | (const_int 5) (const_int 7) | |
13523 | (const_int 9) (const_int 11) | |
13524 | (const_int 13) (const_int 15) | |
13525 | (const_int 17) (const_int 19) | |
13526 | (const_int 21) (const_int 23) | |
13527 | (const_int 25) (const_int 27) | |
13528 | (const_int 29) (const_int 31)]))))))] | |
977e83a3 KY |
13529 | "TARGET_AVX2" |
13530 | "vpmaddubsw\t{%2, %1, %0|%0, %1, %2}" | |
13531 | [(set_attr "type" "sseiadd") | |
13532 | (set_attr "prefix_extra" "1") | |
13533 | (set_attr "prefix" "vex") | |
13534 | (set_attr "mode" "OI")]) | |
13535 | ||
5f64b496 AI |
13536 | ;; The correct representation for this is absolutely enormous, and |
13537 | ;; surely not generally useful. | |
13538 | (define_insn "avx512bw_pmaddubsw512<mode><mask_name>" | |
13539 | [(set (match_operand:VI2_AVX512VL 0 "register_operand" "=v") | |
13540 | (unspec:VI2_AVX512VL | |
13541 | [(match_operand:<dbpsadbwmode> 1 "register_operand" "v") | |
13542 | (match_operand:<dbpsadbwmode> 2 "nonimmediate_operand" "vm")] | |
13543 | UNSPEC_PMADDUBSW512))] | |
13544 | "TARGET_AVX512BW" | |
13545 | "vpmaddubsw\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"; | |
13546 | [(set_attr "type" "sseiadd") | |
13547 | (set_attr "prefix" "evex") | |
13548 | (set_attr "mode" "XI")]) | |
13549 | ||
cf25c309 AI |
13550 | (define_insn "avx512bw_umulhrswv32hi3<mask_name>" |
13551 | [(set (match_operand:V32HI 0 "register_operand" "=v") | |
13552 | (truncate:V32HI | |
13553 | (lshiftrt:V32SI | |
13554 | (plus:V32SI | |
13555 | (lshiftrt:V32SI | |
13556 | (mult:V32SI | |
13557 | (sign_extend:V32SI | |
13558 | (match_operand:V32HI 1 "nonimmediate_operand" "%v")) | |
13559 | (sign_extend:V32SI | |
13560 | (match_operand:V32HI 2 "nonimmediate_operand" "vm"))) | |
13561 | (const_int 14)) | |
13562 | (const_vector:V32HI [(const_int 1) (const_int 1) | |
13563 | (const_int 1) (const_int 1) | |
13564 | (const_int 1) (const_int 1) | |
13565 | (const_int 1) (const_int 1) | |
13566 | (const_int 1) (const_int 1) | |
13567 | (const_int 1) (const_int 1) | |
13568 | (const_int 1) (const_int 1) | |
13569 | (const_int 1) (const_int 1) | |
13570 | (const_int 1) (const_int 1) | |
13571 | (const_int 1) (const_int 1) | |
13572 | (const_int 1) (const_int 1) | |
13573 | (const_int 1) (const_int 1) | |
13574 | (const_int 1) (const_int 1) | |
13575 | (const_int 1) (const_int 1) | |
13576 | (const_int 1) (const_int 1) | |
13577 | (const_int 1) (const_int 1)])) | |
13578 | (const_int 1))))] | |
13579 | "TARGET_AVX512BW" | |
13580 | "vpmulhrsw\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}" | |
13581 | [(set_attr "type" "sseimul") | |
13582 | (set_attr "prefix" "evex") | |
13583 | (set_attr "mode" "XI")]) | |
13584 | ||
1b667c82 | 13585 | (define_insn "ssse3_pmaddubsw128" |
81b1e7eb | 13586 | [(set (match_operand:V8HI 0 "register_operand" "=x,x") |
b1875f52 L |
13587 | (ss_plus:V8HI |
13588 | (mult:V8HI | |
13589 | (zero_extend:V8HI | |
dfee1406 | 13590 | (vec_select:V8QI |
81b1e7eb | 13591 | (match_operand:V16QI 1 "register_operand" "0,x") |
608dccd7 UB |
13592 | (parallel [(const_int 0) (const_int 2) |
13593 | (const_int 4) (const_int 6) | |
13594 | (const_int 8) (const_int 10) | |
13595 | (const_int 12) (const_int 14)]))) | |
b1875f52 L |
13596 | (sign_extend:V8HI |
13597 | (vec_select:V8QI | |
81b1e7eb | 13598 | (match_operand:V16QI 2 "nonimmediate_operand" "xm,xm") |
608dccd7 UB |
13599 | (parallel [(const_int 0) (const_int 2) |
13600 | (const_int 4) (const_int 6) | |
13601 | (const_int 8) (const_int 10) | |
13602 | (const_int 12) (const_int 14)])))) | |
b1875f52 L |
13603 | (mult:V8HI |
13604 | (zero_extend:V8HI | |
dfee1406 | 13605 | (vec_select:V8QI (match_dup 1) |
608dccd7 UB |
13606 | (parallel [(const_int 1) (const_int 3) |
13607 | (const_int 5) (const_int 7) | |
13608 | (const_int 9) (const_int 11) | |
13609 | (const_int 13) (const_int 15)]))) | |
b1875f52 | 13610 | (sign_extend:V8HI |
dfee1406 | 13611 | (vec_select:V8QI (match_dup 2) |
608dccd7 UB |
13612 | (parallel [(const_int 1) (const_int 3) |
13613 | (const_int 5) (const_int 7) | |
13614 | (const_int 9) (const_int 11) | |
13615 | (const_int 13) (const_int 15)]))))))] | |
b1875f52 | 13616 | "TARGET_SSSE3" |
81b1e7eb UB |
13617 | "@ |
13618 | pmaddubsw\t{%2, %0|%0, %2} | |
13619 | vpmaddubsw\t{%2, %1, %0|%0, %1, %2}" | |
13620 | [(set_attr "isa" "noavx,avx") | |
13621 | (set_attr "type" "sseiadd") | |
b6837b94 | 13622 | (set_attr "atom_unit" "simul") |
81b1e7eb | 13623 | (set_attr "prefix_data16" "1,*") |
10e4d956 | 13624 | (set_attr "prefix_extra" "1") |
81b1e7eb | 13625 | (set_attr "prefix" "orig,vex") |
b1875f52 L |
13626 | (set_attr "mode" "TI")]) |
13627 | ||
1b667c82 | 13628 | (define_insn "ssse3_pmaddubsw" |
b1875f52 L |
13629 | [(set (match_operand:V4HI 0 "register_operand" "=y") |
13630 | (ss_plus:V4HI | |
13631 | (mult:V4HI | |
13632 | (zero_extend:V4HI | |
13633 | (vec_select:V4QI | |
ffbaf337 | 13634 | (match_operand:V8QI 1 "register_operand" "0") |
608dccd7 UB |
13635 | (parallel [(const_int 0) (const_int 2) |
13636 | (const_int 4) (const_int 6)]))) | |
b1875f52 L |
13637 | (sign_extend:V4HI |
13638 | (vec_select:V4QI | |
13639 | (match_operand:V8QI 2 "nonimmediate_operand" "ym") | |
608dccd7 UB |
13640 | (parallel [(const_int 0) (const_int 2) |
13641 | (const_int 4) (const_int 6)])))) | |
b1875f52 L |
13642 | (mult:V4HI |
13643 | (zero_extend:V4HI | |
dfee1406 | 13644 | (vec_select:V4QI (match_dup 1) |
608dccd7 UB |
13645 | (parallel [(const_int 1) (const_int 3) |
13646 | (const_int 5) (const_int 7)]))) | |
b1875f52 | 13647 | (sign_extend:V4HI |
dfee1406 | 13648 | (vec_select:V4QI (match_dup 2) |
608dccd7 UB |
13649 | (parallel [(const_int 1) (const_int 3) |
13650 | (const_int 5) (const_int 7)]))))))] | |
b1875f52 L |
13651 | "TARGET_SSSE3" |
13652 | "pmaddubsw\t{%2, %0|%0, %2}" | |
13653 | [(set_attr "type" "sseiadd") | |
b6837b94 | 13654 | (set_attr "atom_unit" "simul") |
10e4d956 | 13655 | (set_attr "prefix_extra" "1") |
977e83a3 KY |
13656 | (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)")) |
13657 | (set_attr "mode" "DI")]) | |
13658 | ||
880ab4be AT |
13659 | (define_mode_iterator PMULHRSW |
13660 | [V4HI V8HI (V16HI "TARGET_AVX2")]) | |
13661 | ||
cf25c309 AI |
13662 | (define_expand "<ssse3_avx2>_pmulhrsw<mode>3_mask" |
13663 | [(set (match_operand:PMULHRSW 0 "register_operand") | |
13664 | (vec_merge:PMULHRSW | |
13665 | (truncate:PMULHRSW | |
13666 | (lshiftrt:<ssedoublemode> | |
13667 | (plus:<ssedoublemode> | |
13668 | (lshiftrt:<ssedoublemode> | |
13669 | (mult:<ssedoublemode> | |
13670 | (sign_extend:<ssedoublemode> | |
13671 | (match_operand:PMULHRSW 1 "nonimmediate_operand")) | |
13672 | (sign_extend:<ssedoublemode> | |
13673 | (match_operand:PMULHRSW 2 "nonimmediate_operand"))) | |
13674 | (const_int 14)) | |
13675 | (match_dup 5)) | |
13676 | (const_int 1))) | |
13677 | (match_operand:PMULHRSW 3 "register_operand") | |
13678 | (match_operand:<avx512fmaskmode> 4 "register_operand")))] | |
13679 | "TARGET_AVX512BW && TARGET_AVX512VL" | |
13680 | { | |
13681 | operands[5] = CONST1_RTX(<MODE>mode); | |
13682 | ix86_fixup_binary_operands_no_copy (MULT, <MODE>mode, operands); | |
13683 | }) | |
13684 | ||
880ab4be AT |
13685 | (define_expand "<ssse3_avx2>_pmulhrsw<mode>3" |
13686 | [(set (match_operand:PMULHRSW 0 "register_operand") | |
13687 | (truncate:PMULHRSW | |
13688 | (lshiftrt:<ssedoublemode> | |
13689 | (plus:<ssedoublemode> | |
13690 | (lshiftrt:<ssedoublemode> | |
13691 | (mult:<ssedoublemode> | |
13692 | (sign_extend:<ssedoublemode> | |
13693 | (match_operand:PMULHRSW 1 "nonimmediate_operand")) | |
13694 | (sign_extend:<ssedoublemode> | |
13695 | (match_operand:PMULHRSW 2 "nonimmediate_operand"))) | |
977e83a3 | 13696 | (const_int 14)) |
880ab4be | 13697 | (match_dup 3)) |
977e83a3 KY |
13698 | (const_int 1))))] |
13699 | "TARGET_AVX2" | |
880ab4be AT |
13700 | { |
13701 | operands[3] = CONST1_RTX(<MODE>mode); | |
13702 | ix86_fixup_binary_operands_no_copy (MULT, <MODE>mode, operands); | |
13703 | }) | |
977e83a3 | 13704 | |
ed3e611e AI |
13705 | (define_insn "*<ssse3_avx2>_pmulhrsw<mode>3<mask_name>" |
13706 | [(set (match_operand:VI2_AVX2 0 "register_operand" "=x,v") | |
e7d8fc6c AT |
13707 | (truncate:VI2_AVX2 |
13708 | (lshiftrt:<ssedoublemode> | |
13709 | (plus:<ssedoublemode> | |
13710 | (lshiftrt:<ssedoublemode> | |
13711 | (mult:<ssedoublemode> | |
13712 | (sign_extend:<ssedoublemode> | |
ed3e611e | 13713 | (match_operand:VI2_AVX2 1 "nonimmediate_operand" "%0,v")) |
e7d8fc6c | 13714 | (sign_extend:<ssedoublemode> |
ed3e611e | 13715 | (match_operand:VI2_AVX2 2 "nonimmediate_operand" "xm,vm"))) |
b1875f52 | 13716 | (const_int 14)) |
e7d8fc6c | 13717 | (match_operand:VI2_AVX2 3 "const1_operand")) |
b1875f52 | 13718 | (const_int 1))))] |
ed3e611e AI |
13719 | "TARGET_SSSE3 && <mask_mode512bit_condition> && <mask_avx512bw_condition> |
13720 | && ix86_binary_operator_ok (MULT, <MODE>mode, operands)" | |
81b1e7eb UB |
13721 | "@ |
13722 | pmulhrsw\t{%2, %0|%0, %2} | |
ed3e611e | 13723 | vpmulhrsw\t{%2, %1, %0<mask_operand4>|%0<mask_operand4>, %1, %2}" |
81b1e7eb UB |
13724 | [(set_attr "isa" "noavx,avx") |
13725 | (set_attr "type" "sseimul") | |
13726 | (set_attr "prefix_data16" "1,*") | |
10e4d956 | 13727 | (set_attr "prefix_extra" "1") |
ed3e611e | 13728 | (set_attr "prefix" "orig,maybe_evex") |
e7d8fc6c | 13729 | (set_attr "mode" "<sseinsnmode>")]) |
b1875f52 | 13730 | |
ffbaf337 | 13731 | (define_insn "*ssse3_pmulhrswv4hi3" |
b1875f52 L |
13732 | [(set (match_operand:V4HI 0 "register_operand" "=y") |
13733 | (truncate:V4HI | |
13734 | (lshiftrt:V4SI | |
13735 | (plus:V4SI | |
13736 | (lshiftrt:V4SI | |
13737 | (mult:V4SI | |
13738 | (sign_extend:V4SI | |
13739 | (match_operand:V4HI 1 "nonimmediate_operand" "%0")) | |
13740 | (sign_extend:V4SI | |
13741 | (match_operand:V4HI 2 "nonimmediate_operand" "ym"))) | |
13742 | (const_int 14)) | |
880ab4be | 13743 | (match_operand:V4HI 3 "const1_operand")) |
b1875f52 L |
13744 | (const_int 1))))] |
13745 | "TARGET_SSSE3 && ix86_binary_operator_ok (MULT, V4HImode, operands)" | |
13746 | "pmulhrsw\t{%2, %0|%0, %2}" | |
13747 | [(set_attr "type" "sseimul") | |
10e4d956 | 13748 | (set_attr "prefix_extra" "1") |
725fd454 | 13749 | (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)")) |
b1875f52 L |
13750 | (set_attr "mode" "DI")]) |
13751 | ||
f5db965f IT |
13752 | (define_insn "<ssse3_avx2>_pshufb<mode>3<mask_name>" |
13753 | [(set (match_operand:VI1_AVX512 0 "register_operand" "=x,v") | |
13754 | (unspec:VI1_AVX512 | |
13755 | [(match_operand:VI1_AVX512 1 "register_operand" "0,v") | |
13756 | (match_operand:VI1_AVX512 2 "nonimmediate_operand" "xm,vm")] | |
8861ba4d | 13757 | UNSPEC_PSHUFB))] |
f5db965f | 13758 | "TARGET_SSSE3 && <mask_mode512bit_condition> && <mask_avx512bw_condition>" |
81b1e7eb UB |
13759 | "@ |
13760 | pshufb\t{%2, %0|%0, %2} | |
f5db965f | 13761 | vpshufb\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}" |
81b1e7eb UB |
13762 | [(set_attr "isa" "noavx,avx") |
13763 | (set_attr "type" "sselog1") | |
13764 | (set_attr "prefix_data16" "1,*") | |
10e4d956 | 13765 | (set_attr "prefix_extra" "1") |
f5db965f | 13766 | (set_attr "prefix" "orig,maybe_evex") |
01284895 | 13767 | (set_attr "btver2_decode" "vector,vector") |
977e83a3 | 13768 | (set_attr "mode" "<sseinsnmode>")]) |
b1875f52 L |
13769 | |
13770 | (define_insn "ssse3_pshufbv8qi3" | |
13771 | [(set (match_operand:V8QI 0 "register_operand" "=y") | |
13772 | (unspec:V8QI [(match_operand:V8QI 1 "register_operand" "0") | |
13773 | (match_operand:V8QI 2 "nonimmediate_operand" "ym")] | |
d6023b50 | 13774 | UNSPEC_PSHUFB))] |
b1875f52 L |
13775 | "TARGET_SSSE3" |
13776 | "pshufb\t{%2, %0|%0, %2}"; | |
13777 | [(set_attr "type" "sselog1") | |
10e4d956 | 13778 | (set_attr "prefix_extra" "1") |
725fd454 | 13779 | (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)")) |
b1875f52 L |
13780 | (set_attr "mode" "DI")]) |
13781 | ||
977e83a3 KY |
13782 | (define_insn "<ssse3_avx2>_psign<mode>3" |
13783 | [(set (match_operand:VI124_AVX2 0 "register_operand" "=x,x") | |
13784 | (unspec:VI124_AVX2 | |
13785 | [(match_operand:VI124_AVX2 1 "register_operand" "0,x") | |
13786 | (match_operand:VI124_AVX2 2 "nonimmediate_operand" "xm,xm")] | |
d6023b50 | 13787 | UNSPEC_PSIGN))] |
b1875f52 | 13788 | "TARGET_SSSE3" |
81b1e7eb | 13789 | "@ |
cbb734aa UB |
13790 | psign<ssemodesuffix>\t{%2, %0|%0, %2} |
13791 | vpsign<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}" | |
81b1e7eb UB |
13792 | [(set_attr "isa" "noavx,avx") |
13793 | (set_attr "type" "sselog1") | |
13794 | (set_attr "prefix_data16" "1,*") | |
10e4d956 | 13795 | (set_attr "prefix_extra" "1") |
81b1e7eb | 13796 | (set_attr "prefix" "orig,vex") |
977e83a3 | 13797 | (set_attr "mode" "<sseinsnmode>")]) |
b1875f52 L |
13798 | |
13799 | (define_insn "ssse3_psign<mode>3" | |
13800 | [(set (match_operand:MMXMODEI 0 "register_operand" "=y") | |
d6023b50 UB |
13801 | (unspec:MMXMODEI |
13802 | [(match_operand:MMXMODEI 1 "register_operand" "0") | |
13803 | (match_operand:MMXMODEI 2 "nonimmediate_operand" "ym")] | |
13804 | UNSPEC_PSIGN))] | |
b1875f52 L |
13805 | "TARGET_SSSE3" |
13806 | "psign<mmxvecsize>\t{%2, %0|%0, %2}"; | |
13807 | [(set_attr "type" "sselog1") | |
10e4d956 | 13808 | (set_attr "prefix_extra" "1") |
725fd454 | 13809 | (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)")) |
b1875f52 L |
13810 | (set_attr "mode" "DI")]) |
13811 | ||
b99ba39a | 13812 | (define_insn "<ssse3_avx2>_palignr<mode>_mask" |
6afcac32 IT |
13813 | [(set (match_operand:VI1_AVX512 0 "register_operand" "=v") |
13814 | (vec_merge:VI1_AVX512 | |
13815 | (unspec:VI1_AVX512 | |
13816 | [(match_operand:VI1_AVX512 1 "register_operand" "v") | |
13817 | (match_operand:VI1_AVX512 2 "nonimmediate_operand" "vm") | |
b99ba39a AI |
13818 | (match_operand:SI 3 "const_0_to_255_mul_8_operand" "n")] |
13819 | UNSPEC_PALIGNR) | |
6afcac32 | 13820 | (match_operand:VI1_AVX512 4 "vector_move_operand" "0C") |
b99ba39a AI |
13821 | (match_operand:<avx512fmaskmode> 5 "register_operand" "Yk")))] |
13822 | "TARGET_AVX512BW && (<MODE_SIZE> == 64 || TARGET_AVX512VL)" | |
13823 | { | |
13824 | operands[3] = GEN_INT (INTVAL (operands[3]) / 8); | |
13825 | return "vpalignr\t{%3, %2, %1, %0%{%5%}%N4|%0%{%5%}%N4, %1, %2, %3}"; | |
13826 | } | |
13827 | [(set_attr "type" "sseishft") | |
13828 | (set_attr "atom_unit" "sishuf") | |
13829 | (set_attr "prefix_extra" "1") | |
13830 | (set_attr "length_immediate" "1") | |
13831 | (set_attr "prefix" "evex") | |
13832 | (set_attr "mode" "<sseinsnmode>")]) | |
13833 | ||
977e83a3 | 13834 | (define_insn "<ssse3_avx2>_palignr<mode>" |
b99ba39a | 13835 | [(set (match_operand:SSESCALARMODE 0 "register_operand" "=x,v") |
8861ba4d | 13836 | (unspec:SSESCALARMODE |
b99ba39a AI |
13837 | [(match_operand:SSESCALARMODE 1 "register_operand" "0,v") |
13838 | (match_operand:SSESCALARMODE 2 "nonimmediate_operand" "xm,vm") | |
8861ba4d UB |
13839 | (match_operand:SI 3 "const_0_to_255_mul_8_operand" "n,n")] |
13840 | UNSPEC_PALIGNR))] | |
b1875f52 L |
13841 | "TARGET_SSSE3" |
13842 | { | |
13843 | operands[3] = GEN_INT (INTVAL (operands[3]) / 8); | |
81b1e7eb UB |
13844 | |
13845 | switch (which_alternative) | |
13846 | { | |
13847 | case 0: | |
13848 | return "palignr\t{%3, %2, %0|%0, %2, %3}"; | |
13849 | case 1: | |
13850 | return "vpalignr\t{%3, %2, %1, %0|%0, %1, %2, %3}"; | |
13851 | default: | |
13852 | gcc_unreachable (); | |
13853 | } | |
b1875f52 | 13854 | } |
81b1e7eb UB |
13855 | [(set_attr "isa" "noavx,avx") |
13856 | (set_attr "type" "sseishft") | |
b6837b94 | 13857 | (set_attr "atom_unit" "sishuf") |
81b1e7eb | 13858 | (set_attr "prefix_data16" "1,*") |
10e4d956 | 13859 | (set_attr "prefix_extra" "1") |
725fd454 | 13860 | (set_attr "length_immediate" "1") |
81b1e7eb | 13861 | (set_attr "prefix" "orig,vex") |
977e83a3 | 13862 | (set_attr "mode" "<sseinsnmode>")]) |
b1875f52 L |
13863 | |
13864 | (define_insn "ssse3_palignrdi" | |
13865 | [(set (match_operand:DI 0 "register_operand" "=y") | |
13866 | (unspec:DI [(match_operand:DI 1 "register_operand" "0") | |
13867 | (match_operand:DI 2 "nonimmediate_operand" "ym") | |
13868 | (match_operand:SI 3 "const_0_to_255_mul_8_operand" "n")] | |
d6023b50 | 13869 | UNSPEC_PALIGNR))] |
b1875f52 L |
13870 | "TARGET_SSSE3" |
13871 | { | |
13872 | operands[3] = GEN_INT (INTVAL (operands[3]) / 8); | |
13873 | return "palignr\t{%3, %2, %0|%0, %2, %3}"; | |
13874 | } | |
13875 | [(set_attr "type" "sseishft") | |
b6837b94 | 13876 | (set_attr "atom_unit" "sishuf") |
10e4d956 | 13877 | (set_attr "prefix_extra" "1") |
725fd454 JJ |
13878 | (set_attr "length_immediate" "1") |
13879 | (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)")) | |
b1875f52 L |
13880 | (set_attr "mode" "DI")]) |
13881 | ||
e650a568 KY |
13882 | ;; Mode iterator to handle singularity w/ absence of V2DI and V4DI |
13883 | ;; modes for abs instruction on pre AVX-512 targets. | |
13884 | (define_mode_iterator VI1248_AVX512VL_AVX512BW | |
13885 | [(V64QI "TARGET_AVX512BW") (V32QI "TARGET_AVX2") V16QI | |
13886 | (V32HI "TARGET_AVX512BW") (V16HI "TARGET_AVX2") V8HI | |
13887 | (V16SI "TARGET_AVX512F") (V8SI "TARGET_AVX2") V4SI | |
13888 | (V8DI "TARGET_AVX512F") (V4DI "TARGET_AVX512VL") (V2DI "TARGET_AVX512VL")]) | |
13889 | ||
700e2919 | 13890 | (define_insn "*abs<mode>2" |
e650a568 KY |
13891 | [(set (match_operand:VI1248_AVX512VL_AVX512BW 0 "register_operand" "=v") |
13892 | (abs:VI1248_AVX512VL_AVX512BW | |
13893 | (match_operand:VI1248_AVX512VL_AVX512BW 1 "nonimmediate_operand" "vm")))] | |
700e2919 AI |
13894 | "TARGET_SSSE3" |
13895 | "%vpabs<ssemodesuffix>\t{%1, %0|%0, %1}" | |
b1875f52 | 13896 | [(set_attr "type" "sselog1") |
10e4d956 L |
13897 | (set_attr "prefix_data16" "1") |
13898 | (set_attr "prefix_extra" "1") | |
95879c72 | 13899 | (set_attr "prefix" "maybe_vex") |
977e83a3 | 13900 | (set_attr "mode" "<sseinsnmode>")]) |
b1875f52 | 13901 | |
700e2919 AI |
13902 | (define_insn "abs<mode>2_mask" |
13903 | [(set (match_operand:VI48_AVX512VL 0 "register_operand" "=v") | |
13904 | (vec_merge:VI48_AVX512VL | |
13905 | (abs:VI48_AVX512VL | |
13906 | (match_operand:VI48_AVX512VL 1 "nonimmediate_operand" "vm")) | |
13907 | (match_operand:VI48_AVX512VL 2 "vector_move_operand" "0C") | |
13908 | (match_operand:<avx512fmaskmode> 3 "register_operand" "Yk")))] | |
13909 | "TARGET_AVX512F" | |
13910 | "vpabs<ssemodesuffix>\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}" | |
13911 | [(set_attr "type" "sselog1") | |
13912 | (set_attr "prefix" "evex") | |
13913 | (set_attr "mode" "<sseinsnmode>")]) | |
13914 | ||
13915 | (define_insn "abs<mode>2_mask" | |
13916 | [(set (match_operand:VI12_AVX512VL 0 "register_operand" "=v") | |
13917 | (vec_merge:VI12_AVX512VL | |
13918 | (abs:VI12_AVX512VL | |
13919 | (match_operand:VI12_AVX512VL 1 "nonimmediate_operand" "vm")) | |
13920 | (match_operand:VI12_AVX512VL 2 "vector_move_operand" "0C") | |
13921 | (match_operand:<avx512fmaskmode> 3 "register_operand" "Yk")))] | |
13922 | "TARGET_AVX512BW" | |
13923 | "vpabs<ssemodesuffix>\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}" | |
13924 | [(set_attr "type" "sselog1") | |
13925 | (set_attr "prefix" "evex") | |
13926 | (set_attr "mode" "<sseinsnmode>")]) | |
13927 | ||
8f24613d | 13928 | (define_expand "abs<mode>2" |
e650a568 KY |
13929 | [(set (match_operand:VI1248_AVX512VL_AVX512BW 0 "register_operand") |
13930 | (abs:VI1248_AVX512VL_AVX512BW | |
13931 | (match_operand:VI1248_AVX512VL_AVX512BW 1 "nonimmediate_operand")))] | |
8f24613d CH |
13932 | "TARGET_SSE2" |
13933 | { | |
13934 | if (!TARGET_SSSE3) | |
13935 | { | |
13936 | ix86_expand_sse2_abs (operands[0], operands[1]); | |
13937 | DONE; | |
13938 | } | |
13939 | }) | |
13940 | ||
b1875f52 L |
13941 | (define_insn "abs<mode>2" |
13942 | [(set (match_operand:MMXMODEI 0 "register_operand" "=y") | |
81b1e7eb UB |
13943 | (abs:MMXMODEI |
13944 | (match_operand:MMXMODEI 1 "nonimmediate_operand" "ym")))] | |
b1875f52 L |
13945 | "TARGET_SSSE3" |
13946 | "pabs<mmxvecsize>\t{%1, %0|%0, %1}"; | |
13947 | [(set_attr "type" "sselog1") | |
725fd454 | 13948 | (set_attr "prefix_rep" "0") |
10e4d956 | 13949 | (set_attr "prefix_extra" "1") |
725fd454 | 13950 | (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)")) |
b1875f52 | 13951 | (set_attr "mode" "DI")]) |
21efb4d4 HJ |
13952 | |
13953 | ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; | |
13954 | ;; | |
13955 | ;; AMD SSE4A instructions | |
13956 | ;; | |
13957 | ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; | |
13958 | ||
85845bb9 UB |
13959 | (define_insn "sse4a_movnt<mode>" |
13960 | [(set (match_operand:MODEF 0 "memory_operand" "=m") | |
13961 | (unspec:MODEF | |
13962 | [(match_operand:MODEF 1 "register_operand" "x")] | |
977e83a3 | 13963 | UNSPEC_MOVNT))] |
21efb4d4 | 13964 | "TARGET_SSE4A" |
cbb734aa | 13965 | "movnt<ssemodesuffix>\t{%1, %0|%0, %1}" |
21efb4d4 | 13966 | [(set_attr "type" "ssemov") |
85845bb9 | 13967 | (set_attr "mode" "<MODE>")]) |
21efb4d4 | 13968 | |
85845bb9 UB |
13969 | (define_insn "sse4a_vmmovnt<mode>" |
13970 | [(set (match_operand:<ssescalarmode> 0 "memory_operand" "=m") | |
13971 | (unspec:<ssescalarmode> | |
13972 | [(vec_select:<ssescalarmode> | |
6bec6c98 | 13973 | (match_operand:VF_128 1 "register_operand" "x") |
85845bb9 UB |
13974 | (parallel [(const_int 0)]))] |
13975 | UNSPEC_MOVNT))] | |
21efb4d4 | 13976 | "TARGET_SSE4A" |
1c154a23 | 13977 | "movnt<ssescalarmodesuffix>\t{%1, %0|%0, %1}" |
21efb4d4 | 13978 | [(set_attr "type" "ssemov") |
85845bb9 | 13979 | (set_attr "mode" "<ssescalarmode>")]) |
21efb4d4 HJ |
13980 | |
13981 | (define_insn "sse4a_extrqi" | |
13982 | [(set (match_operand:V2DI 0 "register_operand" "=x") | |
977e83a3 | 13983 | (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0") |
82e86dc6 UB |
13984 | (match_operand 2 "const_0_to_255_operand") |
13985 | (match_operand 3 "const_0_to_255_operand")] | |
977e83a3 | 13986 | UNSPEC_EXTRQI))] |
21efb4d4 HJ |
13987 | "TARGET_SSE4A" |
13988 | "extrq\t{%3, %2, %0|%0, %2, %3}" | |
13989 | [(set_attr "type" "sse") | |
10e4d956 | 13990 | (set_attr "prefix_data16" "1") |
725fd454 | 13991 | (set_attr "length_immediate" "2") |
21efb4d4 HJ |
13992 | (set_attr "mode" "TI")]) |
13993 | ||
13994 | (define_insn "sse4a_extrq" | |
13995 | [(set (match_operand:V2DI 0 "register_operand" "=x") | |
977e83a3 KY |
13996 | (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0") |
13997 | (match_operand:V16QI 2 "register_operand" "x")] | |
13998 | UNSPEC_EXTRQ))] | |
21efb4d4 HJ |
13999 | "TARGET_SSE4A" |
14000 | "extrq\t{%2, %0|%0, %2}" | |
14001 | [(set_attr "type" "sse") | |
10e4d956 | 14002 | (set_attr "prefix_data16" "1") |
21efb4d4 HJ |
14003 | (set_attr "mode" "TI")]) |
14004 | ||
14005 | (define_insn "sse4a_insertqi" | |
14006 | [(set (match_operand:V2DI 0 "register_operand" "=x") | |
977e83a3 KY |
14007 | (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0") |
14008 | (match_operand:V2DI 2 "register_operand" "x") | |
82e86dc6 UB |
14009 | (match_operand 3 "const_0_to_255_operand") |
14010 | (match_operand 4 "const_0_to_255_operand")] | |
977e83a3 | 14011 | UNSPEC_INSERTQI))] |
21efb4d4 HJ |
14012 | "TARGET_SSE4A" |
14013 | "insertq\t{%4, %3, %2, %0|%0, %2, %3, %4}" | |
14014 | [(set_attr "type" "sseins") | |
725fd454 | 14015 | (set_attr "prefix_data16" "0") |
10e4d956 | 14016 | (set_attr "prefix_rep" "1") |
725fd454 | 14017 | (set_attr "length_immediate" "2") |
21efb4d4 HJ |
14018 | (set_attr "mode" "TI")]) |
14019 | ||
14020 | (define_insn "sse4a_insertq" | |
14021 | [(set (match_operand:V2DI 0 "register_operand" "=x") | |
977e83a3 KY |
14022 | (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0") |
14023 | (match_operand:V2DI 2 "register_operand" "x")] | |
14024 | UNSPEC_INSERTQ))] | |
21efb4d4 HJ |
14025 | "TARGET_SSE4A" |
14026 | "insertq\t{%2, %0|%0, %2}" | |
14027 | [(set_attr "type" "sseins") | |
725fd454 | 14028 | (set_attr "prefix_data16" "0") |
10e4d956 | 14029 | (set_attr "prefix_rep" "1") |
21efb4d4 | 14030 | (set_attr "mode" "TI")]) |
9a5cee02 L |
14031 | |
14032 | ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; | |
14033 | ;; | |
14034 | ;; Intel SSE4.1 instructions | |
14035 | ;; | |
14036 | ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; | |
14037 | ||
8b994297 AI |
14038 | ;; Mapping of immediate bits for blend instructions |
14039 | (define_mode_attr blendbits | |
14040 | [(V8SF "255") (V4SF "15") (V4DF "15") (V2DF "3")]) | |
14041 | ||
cbb734aa | 14042 | (define_insn "<sse4_1>_blend<ssemodesuffix><avxsizesuffix>" |
45392c76 | 14043 | [(set (match_operand:VF_128_256 0 "register_operand" "=Yr,*x,x") |
b86f6e9e | 14044 | (vec_merge:VF_128_256 |
45392c76 IE |
14045 | (match_operand:VF_128_256 2 "nonimmediate_operand" "Yrm,*xm,xm") |
14046 | (match_operand:VF_128_256 1 "register_operand" "0,0,x") | |
82e86dc6 | 14047 | (match_operand:SI 3 "const_0_to_<blendbits>_operand")))] |
c96b4102 | 14048 | "TARGET_SSE4_1" |
5e60198b | 14049 | "@ |
45392c76 | 14050 | blend<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3} |
5e60198b UB |
14051 | blend<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3} |
14052 | vblend<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}" | |
45392c76 | 14053 | [(set_attr "isa" "noavx,noavx,avx") |
5e60198b | 14054 | (set_attr "type" "ssemov") |
725fd454 | 14055 | (set_attr "length_immediate" "1") |
45392c76 | 14056 | (set_attr "prefix_data16" "1,1,*") |
5e60198b | 14057 | (set_attr "prefix_extra" "1") |
45392c76 | 14058 | (set_attr "prefix" "orig,orig,vex") |
85845bb9 | 14059 | (set_attr "mode" "<MODE>")]) |
9a5cee02 | 14060 | |
cbb734aa | 14061 | (define_insn "<sse4_1>_blendv<ssemodesuffix><avxsizesuffix>" |
45392c76 | 14062 | [(set (match_operand:VF_128_256 0 "register_operand" "=Yr,*x,x") |
b86f6e9e | 14063 | (unspec:VF_128_256 |
45392c76 IE |
14064 | [(match_operand:VF_128_256 1 "register_operand" "0,0,x") |
14065 | (match_operand:VF_128_256 2 "nonimmediate_operand" "Yrm,*xm,xm") | |
14066 | (match_operand:VF_128_256 3 "register_operand" "Yz,Yz,x")] | |
85845bb9 | 14067 | UNSPEC_BLENDV))] |
9a5cee02 | 14068 | "TARGET_SSE4_1" |
5e60198b | 14069 | "@ |
45392c76 | 14070 | blendv<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3} |
5e60198b UB |
14071 | blendv<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3} |
14072 | vblendv<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}" | |
45392c76 | 14073 | [(set_attr "isa" "noavx,noavx,avx") |
5e60198b UB |
14074 | (set_attr "type" "ssemov") |
14075 | (set_attr "length_immediate" "1") | |
45392c76 | 14076 | (set_attr "prefix_data16" "1,1,*") |
9a5cee02 | 14077 | (set_attr "prefix_extra" "1") |
45392c76 IE |
14078 | (set_attr "prefix" "orig,orig,vex") |
14079 | (set_attr "btver2_decode" "vector,vector,vector") | |
85845bb9 | 14080 | (set_attr "mode" "<MODE>")]) |
9a5cee02 | 14081 | |
cbb734aa | 14082 | (define_insn "<sse4_1>_dp<ssemodesuffix><avxsizesuffix>" |
45392c76 | 14083 | [(set (match_operand:VF_128_256 0 "register_operand" "=Yr,*x,x") |
b86f6e9e | 14084 | (unspec:VF_128_256 |
45392c76 IE |
14085 | [(match_operand:VF_128_256 1 "nonimmediate_operand" "%0,0,x") |
14086 | (match_operand:VF_128_256 2 "nonimmediate_operand" "Yrm,*xm,xm") | |
14087 | (match_operand:SI 3 "const_0_to_255_operand" "n,n,n")] | |
85845bb9 | 14088 | UNSPEC_DP))] |
9a5cee02 | 14089 | "TARGET_SSE4_1" |
5e60198b | 14090 | "@ |
45392c76 | 14091 | dp<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3} |
5e60198b UB |
14092 | dp<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3} |
14093 | vdp<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}" | |
45392c76 | 14094 | [(set_attr "isa" "noavx,noavx,avx") |
5e60198b | 14095 | (set_attr "type" "ssemul") |
725fd454 | 14096 | (set_attr "length_immediate" "1") |
45392c76 | 14097 | (set_attr "prefix_data16" "1,1,*") |
5e60198b | 14098 | (set_attr "prefix_extra" "1") |
45392c76 IE |
14099 | (set_attr "prefix" "orig,orig,vex") |
14100 | (set_attr "btver2_decode" "vector,vector,vector") | |
85845bb9 | 14101 | (set_attr "mode" "<MODE>")]) |
9a5cee02 | 14102 | |
8b994297 AI |
14103 | ;; Mode attribute used by `vmovntdqa' pattern |
14104 | (define_mode_attr vi8_sse4_1_avx2_avx512 | |
14105 | [(V2DI "sse4_1") (V4DI "avx2") (V8DI "avx512f")]) | |
14106 | ||
14107 | (define_insn "<vi8_sse4_1_avx2_avx512>_movntdqa" | |
45392c76 IE |
14108 | [(set (match_operand:VI8_AVX2_AVX512F 0 "register_operand" "=Yr,*x, v") |
14109 | (unspec:VI8_AVX2_AVX512F [(match_operand:VI8_AVX2_AVX512F 1 "memory_operand" "m, m, m")] | |
9a5cee02 L |
14110 | UNSPEC_MOVNTDQA))] |
14111 | "TARGET_SSE4_1" | |
95879c72 | 14112 | "%vmovntdqa\t{%1, %0|%0, %1}" |
b6837b94 | 14113 | [(set_attr "type" "ssemov") |
45392c76 IE |
14114 | (set_attr "prefix_extra" "1,1,*") |
14115 | (set_attr "prefix" "maybe_vex,maybe_vex,evex") | |
977e83a3 | 14116 | (set_attr "mode" "<sseinsnmode>")]) |
95879c72 | 14117 | |
977e83a3 | 14118 | (define_insn "<sse4_1_avx2>_mpsadbw" |
45392c76 | 14119 | [(set (match_operand:VI1_AVX2 0 "register_operand" "=Yr,*x,x") |
8861ba4d | 14120 | (unspec:VI1_AVX2 |
45392c76 IE |
14121 | [(match_operand:VI1_AVX2 1 "register_operand" "0,0,x") |
14122 | (match_operand:VI1_AVX2 2 "nonimmediate_operand" "Yrm,*xm,xm") | |
14123 | (match_operand:SI 3 "const_0_to_255_operand" "n,n,n")] | |
8861ba4d | 14124 | UNSPEC_MPSADBW))] |
9a5cee02 | 14125 | "TARGET_SSE4_1" |
5e60198b | 14126 | "@ |
45392c76 | 14127 | mpsadbw\t{%3, %2, %0|%0, %2, %3} |
5e60198b UB |
14128 | mpsadbw\t{%3, %2, %0|%0, %2, %3} |
14129 | vmpsadbw\t{%3, %2, %1, %0|%0, %1, %2, %3}" | |
45392c76 | 14130 | [(set_attr "isa" "noavx,noavx,avx") |
5e60198b | 14131 | (set_attr "type" "sselog1") |
725fd454 | 14132 | (set_attr "length_immediate" "1") |
725fd454 | 14133 | (set_attr "prefix_extra" "1") |
45392c76 IE |
14134 | (set_attr "prefix" "orig,orig,vex") |
14135 | (set_attr "btver2_decode" "vector,vector,vector") | |
977e83a3 KY |
14136 | (set_attr "mode" "<sseinsnmode>")]) |
14137 | ||
ed3e611e | 14138 | (define_insn "<sse4_1_avx2>_packusdw<mask_name>" |
45392c76 | 14139 | [(set (match_operand:VI2_AVX2 0 "register_operand" "=Yr,*x,v") |
ed3e611e AI |
14140 | (vec_concat:VI2_AVX2 |
14141 | (us_truncate:<ssehalfvecmode> | |
45392c76 | 14142 | (match_operand:<sseunpackmode> 1 "register_operand" "0,0,v")) |
ed3e611e | 14143 | (us_truncate:<ssehalfvecmode> |
45392c76 | 14144 | (match_operand:<sseunpackmode> 2 "nonimmediate_operand" "Yrm,*xm,vm"))))] |
ed3e611e | 14145 | "TARGET_SSE4_1 && <mask_mode512bit_condition> && <mask_avx512bw_condition>" |
5e60198b | 14146 | "@ |
45392c76 | 14147 | packusdw\t{%2, %0|%0, %2} |
5e60198b | 14148 | packusdw\t{%2, %0|%0, %2} |
ed3e611e | 14149 | vpackusdw\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}" |
45392c76 | 14150 | [(set_attr "isa" "noavx,noavx,avx") |
5e60198b | 14151 | (set_attr "type" "sselog") |
725fd454 | 14152 | (set_attr "prefix_extra" "1") |
45392c76 | 14153 | (set_attr "prefix" "orig,orig,maybe_evex") |
ed3e611e | 14154 | (set_attr "mode" "<sseinsnmode>")]) |
95879c72 | 14155 | |
977e83a3 | 14156 | (define_insn "<sse4_1_avx2>_pblendvb" |
45392c76 | 14157 | [(set (match_operand:VI1_AVX2 0 "register_operand" "=Yr,*x,x") |
977e83a3 | 14158 | (unspec:VI1_AVX2 |
45392c76 IE |
14159 | [(match_operand:VI1_AVX2 1 "register_operand" "0,0,x") |
14160 | (match_operand:VI1_AVX2 2 "nonimmediate_operand" "Yrm,*xm,xm") | |
14161 | (match_operand:VI1_AVX2 3 "register_operand" "Yz,Yz,x")] | |
5e60198b | 14162 | UNSPEC_BLENDV))] |
9a5cee02 | 14163 | "TARGET_SSE4_1" |
5e60198b | 14164 | "@ |
45392c76 | 14165 | pblendvb\t{%3, %2, %0|%0, %2, %3} |
5e60198b UB |
14166 | pblendvb\t{%3, %2, %0|%0, %2, %3} |
14167 | vpblendvb\t{%3, %2, %1, %0|%0, %1, %2, %3}" | |
45392c76 | 14168 | [(set_attr "isa" "noavx,noavx,avx") |
5e60198b | 14169 | (set_attr "type" "ssemov") |
725fd454 | 14170 | (set_attr "prefix_extra" "1") |
45392c76 IE |
14171 | (set_attr "length_immediate" "*,*,1") |
14172 | (set_attr "prefix" "orig,orig,vex") | |
14173 | (set_attr "btver2_decode" "vector,vector,vector") | |
977e83a3 | 14174 | (set_attr "mode" "<sseinsnmode>")]) |
95879c72 | 14175 | |
96d86115 | 14176 | (define_insn "sse4_1_pblendw" |
45392c76 | 14177 | [(set (match_operand:V8HI 0 "register_operand" "=Yr,*x,x") |
96d86115 | 14178 | (vec_merge:V8HI |
45392c76 IE |
14179 | (match_operand:V8HI 2 "nonimmediate_operand" "Yrm,*xm,xm") |
14180 | (match_operand:V8HI 1 "register_operand" "0,0,x") | |
14181 | (match_operand:SI 3 "const_0_to_255_operand" "n,n,n")))] | |
9a5cee02 | 14182 | "TARGET_SSE4_1" |
5e60198b | 14183 | "@ |
45392c76 | 14184 | pblendw\t{%3, %2, %0|%0, %2, %3} |
5e60198b UB |
14185 | pblendw\t{%3, %2, %0|%0, %2, %3} |
14186 | vpblendw\t{%3, %2, %1, %0|%0, %1, %2, %3}" | |
45392c76 | 14187 | [(set_attr "isa" "noavx,noavx,avx") |
5e60198b | 14188 | (set_attr "type" "ssemov") |
9a5cee02 | 14189 | (set_attr "prefix_extra" "1") |
725fd454 | 14190 | (set_attr "length_immediate" "1") |
45392c76 | 14191 | (set_attr "prefix" "orig,orig,vex") |
96d86115 RH |
14192 | (set_attr "mode" "TI")]) |
14193 | ||
14194 | ;; The builtin uses an 8-bit immediate. Expand that. | |
14195 | (define_expand "avx2_pblendw" | |
82e86dc6 | 14196 | [(set (match_operand:V16HI 0 "register_operand") |
96d86115 | 14197 | (vec_merge:V16HI |
82e86dc6 UB |
14198 | (match_operand:V16HI 2 "nonimmediate_operand") |
14199 | (match_operand:V16HI 1 "register_operand") | |
14200 | (match_operand:SI 3 "const_0_to_255_operand")))] | |
96d86115 RH |
14201 | "TARGET_AVX2" |
14202 | { | |
14203 | HOST_WIDE_INT val = INTVAL (operands[3]) & 0xff; | |
14204 | operands[3] = GEN_INT (val << 8 | val); | |
14205 | }) | |
14206 | ||
14207 | (define_insn "*avx2_pblendw" | |
14208 | [(set (match_operand:V16HI 0 "register_operand" "=x") | |
14209 | (vec_merge:V16HI | |
14210 | (match_operand:V16HI 2 "nonimmediate_operand" "xm") | |
14211 | (match_operand:V16HI 1 "register_operand" "x") | |
14212 | (match_operand:SI 3 "avx2_pblendw_operand" "n")))] | |
524857ec | 14213 | "TARGET_AVX2" |
96d86115 RH |
14214 | { |
14215 | operands[3] = GEN_INT (INTVAL (operands[3]) & 0xff); | |
14216 | return "vpblendw\t{%3, %2, %1, %0|%0, %1, %2, %3}"; | |
14217 | } | |
14218 | [(set_attr "type" "ssemov") | |
14219 | (set_attr "prefix_extra" "1") | |
14220 | (set_attr "length_immediate" "1") | |
14221 | (set_attr "prefix" "vex") | |
14222 | (set_attr "mode" "OI")]) | |
977e83a3 KY |
14223 | |
14224 | (define_insn "avx2_pblendd<mode>" | |
14225 | [(set (match_operand:VI4_AVX2 0 "register_operand" "=x") | |
14226 | (vec_merge:VI4_AVX2 | |
14227 | (match_operand:VI4_AVX2 2 "nonimmediate_operand" "xm") | |
14228 | (match_operand:VI4_AVX2 1 "register_operand" "x") | |
14229 | (match_operand:SI 3 "const_0_to_255_operand" "n")))] | |
14230 | "TARGET_AVX2" | |
14231 | "vpblendd\t{%3, %2, %1, %0|%0, %1, %2, %3}" | |
14232 | [(set_attr "type" "ssemov") | |
14233 | (set_attr "prefix_extra" "1") | |
14234 | (set_attr "length_immediate" "1") | |
14235 | (set_attr "prefix" "vex") | |
14236 | (set_attr "mode" "<sseinsnmode>")]) | |
9a5cee02 L |
14237 | |
14238 | (define_insn "sse4_1_phminposuw" | |
45392c76 IE |
14239 | [(set (match_operand:V8HI 0 "register_operand" "=Yr,*x") |
14240 | (unspec:V8HI [(match_operand:V8HI 1 "nonimmediate_operand" "Yrm,*xm")] | |
9a5cee02 L |
14241 | UNSPEC_PHMINPOSUW))] |
14242 | "TARGET_SSE4_1" | |
95879c72 | 14243 | "%vphminposuw\t{%1, %0|%0, %1}" |
9a5cee02 L |
14244 | [(set_attr "type" "sselog1") |
14245 | (set_attr "prefix_extra" "1") | |
95879c72 | 14246 | (set_attr "prefix" "maybe_vex") |
9a5cee02 L |
14247 | (set_attr "mode" "TI")]) |
14248 | ||
f95dcc81 AI |
14249 | (define_insn "avx2_<code>v16qiv16hi2<mask_name>" |
14250 | [(set (match_operand:V16HI 0 "register_operand" "=v") | |
977e83a3 | 14251 | (any_extend:V16HI |
f95dcc81 AI |
14252 | (match_operand:V16QI 1 "nonimmediate_operand" "vm")))] |
14253 | "TARGET_AVX2 && <mask_avx512bw_condition> && <mask_avx512vl_condition>" | |
14254 | "vpmov<extsuffix>bw\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}" | |
977e83a3 KY |
14255 | [(set_attr "type" "ssemov") |
14256 | (set_attr "prefix_extra" "1") | |
f95dcc81 | 14257 | (set_attr "prefix" "maybe_evex") |
977e83a3 KY |
14258 | (set_attr "mode" "OI")]) |
14259 | ||
f95dcc81 AI |
14260 | (define_insn "avx512bw_<code>v32qiv32hi2<mask_name>" |
14261 | [(set (match_operand:V32HI 0 "register_operand" "=v") | |
14262 | (any_extend:V32HI | |
14263 | (match_operand:V32QI 1 "nonimmediate_operand" "vm")))] | |
14264 | "TARGET_AVX512BW" | |
14265 | "vpmov<extsuffix>bw\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}" | |
14266 | [(set_attr "type" "ssemov") | |
14267 | (set_attr "prefix_extra" "1") | |
14268 | (set_attr "prefix" "evex") | |
14269 | (set_attr "mode" "XI")]) | |
14270 | ||
14271 | (define_insn "sse4_1_<code>v8qiv8hi2<mask_name>" | |
45392c76 | 14272 | [(set (match_operand:V8HI 0 "register_operand" "=Yr,*v") |
ee9dd92e | 14273 | (any_extend:V8HI |
9a5cee02 | 14274 | (vec_select:V8QI |
45392c76 | 14275 | (match_operand:V16QI 1 "nonimmediate_operand" "Yrm,*vm") |
608dccd7 UB |
14276 | (parallel [(const_int 0) (const_int 1) |
14277 | (const_int 2) (const_int 3) | |
14278 | (const_int 4) (const_int 5) | |
14279 | (const_int 6) (const_int 7)]))))] | |
f95dcc81 AI |
14280 | "TARGET_SSE4_1 && <mask_avx512bw_condition> && <mask_avx512vl_condition>" |
14281 | "%vpmov<extsuffix>bw\t{%1, %0<mask_operand2>|%0<mask_operand2>, %q1}" | |
9a5cee02 | 14282 | [(set_attr "type" "ssemov") |
f220a4f4 | 14283 | (set_attr "ssememalign" "64") |
9a5cee02 | 14284 | (set_attr "prefix_extra" "1") |
95879c72 | 14285 | (set_attr "prefix" "maybe_vex") |
9a5cee02 L |
14286 | (set_attr "mode" "TI")]) |
14287 | ||
47490470 | 14288 | (define_insn "<mask_codefor>avx512f_<code>v16qiv16si2<mask_name>" |
c003c6d6 AI |
14289 | [(set (match_operand:V16SI 0 "register_operand" "=v") |
14290 | (any_extend:V16SI | |
14291 | (match_operand:V16QI 1 "nonimmediate_operand" "vm")))] | |
14292 | "TARGET_AVX512F" | |
47490470 | 14293 | "vpmov<extsuffix>bd\t{%1, %0<mask_operand2>|%0<mask_operand2>, %q1}" |
c003c6d6 AI |
14294 | [(set_attr "type" "ssemov") |
14295 | (set_attr "prefix" "evex") | |
14296 | (set_attr "mode" "XI")]) | |
14297 | ||
f95dcc81 AI |
14298 | (define_insn "avx2_<code>v8qiv8si2<mask_name>" |
14299 | [(set (match_operand:V8SI 0 "register_operand" "=v") | |
977e83a3 KY |
14300 | (any_extend:V8SI |
14301 | (vec_select:V8QI | |
f95dcc81 | 14302 | (match_operand:V16QI 1 "nonimmediate_operand" "vm") |
608dccd7 UB |
14303 | (parallel [(const_int 0) (const_int 1) |
14304 | (const_int 2) (const_int 3) | |
14305 | (const_int 4) (const_int 5) | |
14306 | (const_int 6) (const_int 7)]))))] | |
f95dcc81 AI |
14307 | "TARGET_AVX2 && <mask_avx512vl_condition>" |
14308 | "vpmov<extsuffix>bd\t{%1, %0<mask_operand2>|%0<mask_operand2>, %q1}" | |
977e83a3 KY |
14309 | [(set_attr "type" "ssemov") |
14310 | (set_attr "prefix_extra" "1") | |
f95dcc81 | 14311 | (set_attr "prefix" "maybe_evex") |
977e83a3 KY |
14312 | (set_attr "mode" "OI")]) |
14313 | ||
f95dcc81 | 14314 | (define_insn "sse4_1_<code>v4qiv4si2<mask_name>" |
45392c76 | 14315 | [(set (match_operand:V4SI 0 "register_operand" "=Yr,*v") |
ee9dd92e | 14316 | (any_extend:V4SI |
9a5cee02 | 14317 | (vec_select:V4QI |
45392c76 | 14318 | (match_operand:V16QI 1 "nonimmediate_operand" "Yrm,*vm") |
608dccd7 UB |
14319 | (parallel [(const_int 0) (const_int 1) |
14320 | (const_int 2) (const_int 3)]))))] | |
f95dcc81 AI |
14321 | "TARGET_SSE4_1 && <mask_avx512vl_condition>" |
14322 | "%vpmov<extsuffix>bd\t{%1, %0<mask_operand2>|%0<mask_operand2>, %k1}" | |
9a5cee02 | 14323 | [(set_attr "type" "ssemov") |
f220a4f4 | 14324 | (set_attr "ssememalign" "32") |
9a5cee02 | 14325 | (set_attr "prefix_extra" "1") |
95879c72 | 14326 | (set_attr "prefix" "maybe_vex") |
9a5cee02 L |
14327 | (set_attr "mode" "TI")]) |
14328 | ||
47490470 | 14329 | (define_insn "avx512f_<code>v16hiv16si2<mask_name>" |
c003c6d6 AI |
14330 | [(set (match_operand:V16SI 0 "register_operand" "=v") |
14331 | (any_extend:V16SI | |
14332 | (match_operand:V16HI 1 "nonimmediate_operand" "vm")))] | |
14333 | "TARGET_AVX512F" | |
47490470 | 14334 | "vpmov<extsuffix>wd\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}" |
c003c6d6 AI |
14335 | [(set_attr "type" "ssemov") |
14336 | (set_attr "prefix" "evex") | |
14337 | (set_attr "mode" "XI")]) | |
14338 | ||
f95dcc81 AI |
14339 | (define_insn "avx2_<code>v8hiv8si2<mask_name>" |
14340 | [(set (match_operand:V8SI 0 "register_operand" "=v") | |
977e83a3 | 14341 | (any_extend:V8SI |
f95dcc81 AI |
14342 | (match_operand:V8HI 1 "nonimmediate_operand" "vm")))] |
14343 | "TARGET_AVX2 && <mask_avx512vl_condition>" | |
14344 | "vpmov<extsuffix>wd\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}" | |
977e83a3 KY |
14345 | [(set_attr "type" "ssemov") |
14346 | (set_attr "prefix_extra" "1") | |
f95dcc81 | 14347 | (set_attr "prefix" "maybe_evex") |
977e83a3 KY |
14348 | (set_attr "mode" "OI")]) |
14349 | ||
f95dcc81 | 14350 | (define_insn "sse4_1_<code>v4hiv4si2<mask_name>" |
45392c76 | 14351 | [(set (match_operand:V4SI 0 "register_operand" "=Yr,*v") |
ee9dd92e | 14352 | (any_extend:V4SI |
9a5cee02 | 14353 | (vec_select:V4HI |
45392c76 | 14354 | (match_operand:V8HI 1 "nonimmediate_operand" "Yrm,*vm") |
608dccd7 UB |
14355 | (parallel [(const_int 0) (const_int 1) |
14356 | (const_int 2) (const_int 3)]))))] | |
f95dcc81 AI |
14357 | "TARGET_SSE4_1 && <mask_avx512vl_condition>" |
14358 | "%vpmov<extsuffix>wd\t{%1, %0<mask_operand2>|%0<mask_operand2>, %q1}" | |
9a5cee02 | 14359 | [(set_attr "type" "ssemov") |
f220a4f4 | 14360 | (set_attr "ssememalign" "64") |
9a5cee02 | 14361 | (set_attr "prefix_extra" "1") |
95879c72 | 14362 | (set_attr "prefix" "maybe_vex") |
9a5cee02 L |
14363 | (set_attr "mode" "TI")]) |
14364 | ||
47490470 | 14365 | (define_insn "avx512f_<code>v8qiv8di2<mask_name>" |
c003c6d6 AI |
14366 | [(set (match_operand:V8DI 0 "register_operand" "=v") |
14367 | (any_extend:V8DI | |
14368 | (vec_select:V8QI | |
14369 | (match_operand:V16QI 1 "nonimmediate_operand" "vm") | |
14370 | (parallel [(const_int 0) (const_int 1) | |
14371 | (const_int 2) (const_int 3) | |
14372 | (const_int 4) (const_int 5) | |
14373 | (const_int 6) (const_int 7)]))))] | |
14374 | "TARGET_AVX512F" | |
47490470 | 14375 | "vpmov<extsuffix>bq\t{%1, %0<mask_operand2>|%0<mask_operand2>, %k1}" |
c003c6d6 AI |
14376 | [(set_attr "type" "ssemov") |
14377 | (set_attr "prefix" "evex") | |
14378 | (set_attr "mode" "XI")]) | |
14379 | ||
f95dcc81 AI |
14380 | (define_insn "avx2_<code>v4qiv4di2<mask_name>" |
14381 | [(set (match_operand:V4DI 0 "register_operand" "=v") | |
977e83a3 KY |
14382 | (any_extend:V4DI |
14383 | (vec_select:V4QI | |
f95dcc81 | 14384 | (match_operand:V16QI 1 "nonimmediate_operand" "vm") |
608dccd7 UB |
14385 | (parallel [(const_int 0) (const_int 1) |
14386 | (const_int 2) (const_int 3)]))))] | |
f95dcc81 AI |
14387 | "TARGET_AVX2 && <mask_avx512vl_condition>" |
14388 | "vpmov<extsuffix>bq\t{%1, %0<mask_operand2>|%0<mask_operand2>, %k1}" | |
977e83a3 KY |
14389 | [(set_attr "type" "ssemov") |
14390 | (set_attr "prefix_extra" "1") | |
f95dcc81 | 14391 | (set_attr "prefix" "maybe_evex") |
977e83a3 KY |
14392 | (set_attr "mode" "OI")]) |
14393 | ||
f95dcc81 | 14394 | (define_insn "sse4_1_<code>v2qiv2di2<mask_name>" |
45392c76 | 14395 | [(set (match_operand:V2DI 0 "register_operand" "=Yr,*v") |
ee9dd92e | 14396 | (any_extend:V2DI |
9a5cee02 | 14397 | (vec_select:V2QI |
45392c76 | 14398 | (match_operand:V16QI 1 "nonimmediate_operand" "Yrm,*vm") |
608dccd7 | 14399 | (parallel [(const_int 0) (const_int 1)]))))] |
f95dcc81 AI |
14400 | "TARGET_SSE4_1 && <mask_avx512vl_condition>" |
14401 | "%vpmov<extsuffix>bq\t{%1, %0<mask_operand2>|%0<mask_operand2>, %w1}" | |
9a5cee02 | 14402 | [(set_attr "type" "ssemov") |
f220a4f4 | 14403 | (set_attr "ssememalign" "16") |
9a5cee02 | 14404 | (set_attr "prefix_extra" "1") |
95879c72 | 14405 | (set_attr "prefix" "maybe_vex") |
9a5cee02 L |
14406 | (set_attr "mode" "TI")]) |
14407 | ||
47490470 | 14408 | (define_insn "avx512f_<code>v8hiv8di2<mask_name>" |
c003c6d6 AI |
14409 | [(set (match_operand:V8DI 0 "register_operand" "=v") |
14410 | (any_extend:V8DI | |
14411 | (match_operand:V8HI 1 "nonimmediate_operand" "vm")))] | |
14412 | "TARGET_AVX512F" | |
47490470 | 14413 | "vpmov<extsuffix>wq\t{%1, %0<mask_operand2>|%0<mask_operand2>, %q1}" |
c003c6d6 AI |
14414 | [(set_attr "type" "ssemov") |
14415 | (set_attr "prefix" "evex") | |
14416 | (set_attr "mode" "XI")]) | |
14417 | ||
f95dcc81 AI |
14418 | (define_insn "avx2_<code>v4hiv4di2<mask_name>" |
14419 | [(set (match_operand:V4DI 0 "register_operand" "=v") | |
977e83a3 KY |
14420 | (any_extend:V4DI |
14421 | (vec_select:V4HI | |
f95dcc81 | 14422 | (match_operand:V8HI 1 "nonimmediate_operand" "vm") |
608dccd7 UB |
14423 | (parallel [(const_int 0) (const_int 1) |
14424 | (const_int 2) (const_int 3)]))))] | |
f95dcc81 AI |
14425 | "TARGET_AVX2 && <mask_avx512vl_condition>" |
14426 | "vpmov<extsuffix>wq\t{%1, %0<mask_operand2>|%0<mask_operand2>, %q1}" | |
977e83a3 KY |
14427 | [(set_attr "type" "ssemov") |
14428 | (set_attr "prefix_extra" "1") | |
f95dcc81 | 14429 | (set_attr "prefix" "maybe_evex") |
977e83a3 KY |
14430 | (set_attr "mode" "OI")]) |
14431 | ||
f95dcc81 | 14432 | (define_insn "sse4_1_<code>v2hiv2di2<mask_name>" |
45392c76 | 14433 | [(set (match_operand:V2DI 0 "register_operand" "=Yr,*v") |
ee9dd92e | 14434 | (any_extend:V2DI |
9a5cee02 | 14435 | (vec_select:V2HI |
45392c76 | 14436 | (match_operand:V8HI 1 "nonimmediate_operand" "Yrm,*vm") |
608dccd7 | 14437 | (parallel [(const_int 0) (const_int 1)]))))] |
f95dcc81 AI |
14438 | "TARGET_SSE4_1 && <mask_avx512vl_condition>" |
14439 | "%vpmov<extsuffix>wq\t{%1, %0<mask_operand2>|%0<mask_operand2>, %k1}" | |
9a5cee02 | 14440 | [(set_attr "type" "ssemov") |
f220a4f4 | 14441 | (set_attr "ssememalign" "32") |
9a5cee02 | 14442 | (set_attr "prefix_extra" "1") |
95879c72 | 14443 | (set_attr "prefix" "maybe_vex") |
9a5cee02 L |
14444 | (set_attr "mode" "TI")]) |
14445 | ||
47490470 | 14446 | (define_insn "avx512f_<code>v8siv8di2<mask_name>" |
c003c6d6 AI |
14447 | [(set (match_operand:V8DI 0 "register_operand" "=v") |
14448 | (any_extend:V8DI | |
14449 | (match_operand:V8SI 1 "nonimmediate_operand" "vm")))] | |
14450 | "TARGET_AVX512F" | |
47490470 | 14451 | "vpmov<extsuffix>dq\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}" |
c003c6d6 AI |
14452 | [(set_attr "type" "ssemov") |
14453 | (set_attr "prefix" "evex") | |
14454 | (set_attr "mode" "XI")]) | |
14455 | ||
f95dcc81 AI |
14456 | (define_insn "avx2_<code>v4siv4di2<mask_name>" |
14457 | [(set (match_operand:V4DI 0 "register_operand" "=v") | |
977e83a3 | 14458 | (any_extend:V4DI |
f95dcc81 AI |
14459 | (match_operand:V4SI 1 "nonimmediate_operand" "vm")))] |
14460 | "TARGET_AVX2 && <mask_avx512vl_condition>" | |
14461 | "vpmov<extsuffix>dq\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}" | |
977e83a3 | 14462 | [(set_attr "type" "ssemov") |
f95dcc81 | 14463 | (set_attr "prefix" "maybe_evex") |
977e83a3 KY |
14464 | (set_attr "prefix_extra" "1") |
14465 | (set_attr "mode" "OI")]) | |
14466 | ||
f95dcc81 | 14467 | (define_insn "sse4_1_<code>v2siv2di2<mask_name>" |
45392c76 | 14468 | [(set (match_operand:V2DI 0 "register_operand" "=Yr,*v") |
ee9dd92e | 14469 | (any_extend:V2DI |
9a5cee02 | 14470 | (vec_select:V2SI |
45392c76 | 14471 | (match_operand:V4SI 1 "nonimmediate_operand" "Yrm,*vm") |
608dccd7 | 14472 | (parallel [(const_int 0) (const_int 1)]))))] |
f95dcc81 AI |
14473 | "TARGET_SSE4_1 && <mask_avx512vl_condition>" |
14474 | "%vpmov<extsuffix>dq\t{%1, %0<mask_operand2>|%0<mask_operand2>, %q1}" | |
9a5cee02 | 14475 | [(set_attr "type" "ssemov") |
f220a4f4 | 14476 | (set_attr "ssememalign" "64") |
9a5cee02 | 14477 | (set_attr "prefix_extra" "1") |
95879c72 | 14478 | (set_attr "prefix" "maybe_vex") |
9a5cee02 L |
14479 | (set_attr "mode" "TI")]) |
14480 | ||
95879c72 L |
14481 | ;; ptestps/ptestpd are very similar to comiss and ucomiss when |
14482 | ;; setting FLAGS_REG. But it is not a really compare instruction. | |
cbb734aa | 14483 | (define_insn "avx_vtest<ssemodesuffix><avxsizesuffix>" |
95879c72 | 14484 | [(set (reg:CC FLAGS_REG) |
b86f6e9e AI |
14485 | (unspec:CC [(match_operand:VF_128_256 0 "register_operand" "x") |
14486 | (match_operand:VF_128_256 1 "nonimmediate_operand" "xm")] | |
95879c72 L |
14487 | UNSPEC_VTESTP))] |
14488 | "TARGET_AVX" | |
1c154a23 | 14489 | "vtest<ssemodesuffix>\t{%1, %0|%0, %1}" |
95879c72 | 14490 | [(set_attr "type" "ssecomi") |
725fd454 | 14491 | (set_attr "prefix_extra" "1") |
95879c72 L |
14492 | (set_attr "prefix" "vex") |
14493 | (set_attr "mode" "<MODE>")]) | |
14494 | ||
9a5cee02 L |
14495 | ;; ptest is very similar to comiss and ucomiss when setting FLAGS_REG. |
14496 | ;; But it is not a really compare instruction. | |
95879c72 L |
14497 | (define_insn "avx_ptest256" |
14498 | [(set (reg:CC FLAGS_REG) | |
14499 | (unspec:CC [(match_operand:V4DI 0 "register_operand" "x") | |
14500 | (match_operand:V4DI 1 "nonimmediate_operand" "xm")] | |
14501 | UNSPEC_PTEST))] | |
14502 | "TARGET_AVX" | |
14503 | "vptest\t{%1, %0|%0, %1}" | |
14504 | [(set_attr "type" "ssecomi") | |
725fd454 | 14505 | (set_attr "prefix_extra" "1") |
95879c72 | 14506 | (set_attr "prefix" "vex") |
01284895 | 14507 | (set_attr "btver2_decode" "vector") |
95879c72 L |
14508 | (set_attr "mode" "OI")]) |
14509 | ||
9a5cee02 L |
14510 | (define_insn "sse4_1_ptest" |
14511 | [(set (reg:CC FLAGS_REG) | |
45392c76 IE |
14512 | (unspec:CC [(match_operand:V2DI 0 "register_operand" "Yr,*x") |
14513 | (match_operand:V2DI 1 "nonimmediate_operand" "Yrm,*xm")] | |
9a5cee02 L |
14514 | UNSPEC_PTEST))] |
14515 | "TARGET_SSE4_1" | |
95879c72 | 14516 | "%vptest\t{%1, %0|%0, %1}" |
9a5cee02 L |
14517 | [(set_attr "type" "ssecomi") |
14518 | (set_attr "prefix_extra" "1") | |
95879c72 | 14519 | (set_attr "prefix" "maybe_vex") |
9a5cee02 L |
14520 | (set_attr "mode" "TI")]) |
14521 | ||
cbb734aa | 14522 | (define_insn "<sse4_1>_round<ssemodesuffix><avxsizesuffix>" |
45392c76 | 14523 | [(set (match_operand:VF_128_256 0 "register_operand" "=Yr,*x") |
b86f6e9e | 14524 | (unspec:VF_128_256 |
45392c76 IE |
14525 | [(match_operand:VF_128_256 1 "nonimmediate_operand" "Yrm,*xm") |
14526 | (match_operand:SI 2 "const_0_to_15_operand" "n,n")] | |
85845bb9 | 14527 | UNSPEC_ROUND))] |
04e1d06b | 14528 | "TARGET_ROUND" |
1c154a23 | 14529 | "%vround<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}" |
9a5cee02 | 14530 | [(set_attr "type" "ssecvt") |
5e60198b UB |
14531 | (set (attr "prefix_data16") |
14532 | (if_then_else | |
67b2c493 | 14533 | (match_test "TARGET_AVX") |
5e60198b UB |
14534 | (const_string "*") |
14535 | (const_string "1"))) | |
9a5cee02 | 14536 | (set_attr "prefix_extra" "1") |
725fd454 | 14537 | (set_attr "length_immediate" "1") |
95879c72 L |
14538 | (set_attr "prefix" "maybe_vex") |
14539 | (set_attr "mode" "<MODE>")]) | |
14540 | ||
eab880cf | 14541 | (define_expand "<sse4_1>_round<ssemodesuffix>_sfix<avxsizesuffix>" |
82e86dc6 | 14542 | [(match_operand:<sseintvecmode> 0 "register_operand") |
a9ccbba2 | 14543 | (match_operand:VF1_128_256 1 "nonimmediate_operand") |
82e86dc6 | 14544 | (match_operand:SI 2 "const_0_to_15_operand")] |
eab880cf UB |
14545 | "TARGET_ROUND" |
14546 | { | |
14547 | rtx tmp = gen_reg_rtx (<MODE>mode); | |
14548 | ||
14549 | emit_insn | |
14550 | (gen_<sse4_1>_round<ssemodesuffix><avxsizesuffix> (tmp, operands[1], | |
14551 | operands[2])); | |
14552 | emit_insn | |
14553 | (gen_fix_trunc<mode><sseintvecmodelower>2 (operands[0], tmp)); | |
14554 | DONE; | |
14555 | }) | |
14556 | ||
ec5e777c AI |
14557 | (define_expand "avx512f_roundpd512" |
14558 | [(match_operand:V8DF 0 "register_operand") | |
14559 | (match_operand:V8DF 1 "nonimmediate_operand") | |
14560 | (match_operand:SI 2 "const_0_to_15_operand")] | |
14561 | "TARGET_AVX512F" | |
14562 | { | |
14563 | emit_insn (gen_avx512f_rndscalev8df (operands[0], operands[1], operands[2])); | |
14564 | DONE; | |
14565 | }) | |
14566 | ||
eab880cf | 14567 | (define_expand "<sse4_1>_round<ssemodesuffix>_vec_pack_sfix<avxsizesuffix>" |
82e86dc6 UB |
14568 | [(match_operand:<ssepackfltmode> 0 "register_operand") |
14569 | (match_operand:VF2 1 "nonimmediate_operand") | |
14570 | (match_operand:VF2 2 "nonimmediate_operand") | |
14571 | (match_operand:SI 3 "const_0_to_15_operand")] | |
eab880cf UB |
14572 | "TARGET_ROUND" |
14573 | { | |
14574 | rtx tmp0, tmp1; | |
14575 | ||
c05e32f5 | 14576 | if (<MODE>mode == V2DFmode |
a1aff58f | 14577 | && TARGET_AVX && !TARGET_PREFER_AVX128 && optimize_insn_for_speed_p ()) |
c05e32f5 UB |
14578 | { |
14579 | rtx tmp2 = gen_reg_rtx (V4DFmode); | |
eab880cf | 14580 | |
c05e32f5 UB |
14581 | tmp0 = gen_reg_rtx (V4DFmode); |
14582 | tmp1 = force_reg (V2DFmode, operands[1]); | |
14583 | ||
14584 | emit_insn (gen_avx_vec_concatv4df (tmp0, tmp1, operands[2])); | |
14585 | emit_insn (gen_avx_roundpd256 (tmp2, tmp0, operands[3])); | |
14586 | emit_insn (gen_fix_truncv4dfv4si2 (operands[0], tmp2)); | |
14587 | } | |
14588 | else | |
14589 | { | |
14590 | tmp0 = gen_reg_rtx (<MODE>mode); | |
14591 | tmp1 = gen_reg_rtx (<MODE>mode); | |
14592 | ||
14593 | emit_insn | |
14594 | (gen_<sse4_1>_round<ssemodesuffix><avxsizesuffix> (tmp0, operands[1], | |
14595 | operands[3])); | |
14596 | emit_insn | |
14597 | (gen_<sse4_1>_round<ssemodesuffix><avxsizesuffix> (tmp1, operands[2], | |
14598 | operands[3])); | |
14599 | emit_insn | |
14600 | (gen_vec_pack_sfix_trunc_<mode> (operands[0], tmp0, tmp1)); | |
14601 | } | |
eab880cf UB |
14602 | DONE; |
14603 | }) | |
14604 | ||
1c154a23 | 14605 | (define_insn "sse4_1_round<ssescalarmodesuffix>" |
45392c76 | 14606 | [(set (match_operand:VF_128 0 "register_operand" "=Yr,*x,x") |
5e60198b UB |
14607 | (vec_merge:VF_128 |
14608 | (unspec:VF_128 | |
45392c76 IE |
14609 | [(match_operand:VF_128 2 "register_operand" "Yr,*x,x") |
14610 | (match_operand:SI 3 "const_0_to_15_operand" "n,n,n")] | |
85845bb9 | 14611 | UNSPEC_ROUND) |
45392c76 | 14612 | (match_operand:VF_128 1 "register_operand" "0,0,x") |
9a5cee02 | 14613 | (const_int 1)))] |
04e1d06b | 14614 | "TARGET_ROUND" |
5e60198b | 14615 | "@ |
45392c76 | 14616 | round<ssescalarmodesuffix>\t{%3, %2, %0|%0, %2, %3} |
5e60198b UB |
14617 | round<ssescalarmodesuffix>\t{%3, %2, %0|%0, %2, %3} |
14618 | vround<ssescalarmodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}" | |
45392c76 | 14619 | [(set_attr "isa" "noavx,noavx,avx") |
5e60198b | 14620 | (set_attr "type" "ssecvt") |
725fd454 | 14621 | (set_attr "length_immediate" "1") |
45392c76 | 14622 | (set_attr "prefix_data16" "1,1,*") |
5e60198b | 14623 | (set_attr "prefix_extra" "1") |
45392c76 | 14624 | (set_attr "prefix" "orig,orig,vex") |
85845bb9 | 14625 | (set_attr "mode" "<MODE>")]) |
06f4e35d | 14626 | |
bbeb5beb UB |
14627 | (define_expand "round<mode>2" |
14628 | [(set (match_dup 4) | |
14629 | (plus:VF | |
82e86dc6 | 14630 | (match_operand:VF 1 "register_operand") |
bbeb5beb | 14631 | (match_dup 3))) |
82e86dc6 | 14632 | (set (match_operand:VF 0 "register_operand") |
bbeb5beb UB |
14633 | (unspec:VF |
14634 | [(match_dup 4) (match_dup 5)] | |
14635 | UNSPEC_ROUND))] | |
14636 | "TARGET_ROUND && !flag_trapping_math" | |
14637 | { | |
ef4bddc2 | 14638 | machine_mode scalar_mode; |
bbeb5beb UB |
14639 | const struct real_format *fmt; |
14640 | REAL_VALUE_TYPE pred_half, half_minus_pred_half; | |
14641 | rtx half, vec_half; | |
14642 | ||
14643 | scalar_mode = GET_MODE_INNER (<MODE>mode); | |
14644 | ||
14645 | /* load nextafter (0.5, 0.0) */ | |
14646 | fmt = REAL_MODE_FORMAT (scalar_mode); | |
14647 | real_2expN (&half_minus_pred_half, -(fmt->p) - 1, scalar_mode); | |
14648 | REAL_ARITHMETIC (pred_half, MINUS_EXPR, dconsthalf, half_minus_pred_half); | |
14649 | half = const_double_from_real_value (pred_half, scalar_mode); | |
14650 | ||
14651 | vec_half = ix86_build_const_vector (<MODE>mode, true, half); | |
14652 | vec_half = force_reg (<MODE>mode, vec_half); | |
14653 | ||
14654 | operands[3] = gen_reg_rtx (<MODE>mode); | |
14655 | emit_insn (gen_copysign<mode>3 (operands[3], vec_half, operands[1])); | |
14656 | ||
14657 | operands[4] = gen_reg_rtx (<MODE>mode); | |
14658 | operands[5] = GEN_INT (ROUND_TRUNC); | |
14659 | }) | |
14660 | ||
eab880cf | 14661 | (define_expand "round<mode>2_sfix" |
82e86dc6 | 14662 | [(match_operand:<sseintvecmode> 0 "register_operand") |
a9ccbba2 | 14663 | (match_operand:VF1_128_256 1 "register_operand")] |
eab880cf UB |
14664 | "TARGET_ROUND && !flag_trapping_math" |
14665 | { | |
14666 | rtx tmp = gen_reg_rtx (<MODE>mode); | |
14667 | ||
14668 | emit_insn (gen_round<mode>2 (tmp, operands[1])); | |
14669 | ||
14670 | emit_insn | |
14671 | (gen_fix_trunc<mode><sseintvecmodelower>2 (operands[0], tmp)); | |
14672 | DONE; | |
14673 | }) | |
14674 | ||
14675 | (define_expand "round<mode>2_vec_pack_sfix" | |
82e86dc6 UB |
14676 | [(match_operand:<ssepackfltmode> 0 "register_operand") |
14677 | (match_operand:VF2 1 "register_operand") | |
14678 | (match_operand:VF2 2 "register_operand")] | |
eab880cf UB |
14679 | "TARGET_ROUND && !flag_trapping_math" |
14680 | { | |
14681 | rtx tmp0, tmp1; | |
14682 | ||
c05e32f5 | 14683 | if (<MODE>mode == V2DFmode |
a1aff58f | 14684 | && TARGET_AVX && !TARGET_PREFER_AVX128 && optimize_insn_for_speed_p ()) |
c05e32f5 UB |
14685 | { |
14686 | rtx tmp2 = gen_reg_rtx (V4DFmode); | |
eab880cf | 14687 | |
c05e32f5 UB |
14688 | tmp0 = gen_reg_rtx (V4DFmode); |
14689 | tmp1 = force_reg (V2DFmode, operands[1]); | |
eab880cf | 14690 | |
c05e32f5 UB |
14691 | emit_insn (gen_avx_vec_concatv4df (tmp0, tmp1, operands[2])); |
14692 | emit_insn (gen_roundv4df2 (tmp2, tmp0)); | |
14693 | emit_insn (gen_fix_truncv4dfv4si2 (operands[0], tmp2)); | |
14694 | } | |
14695 | else | |
14696 | { | |
14697 | tmp0 = gen_reg_rtx (<MODE>mode); | |
14698 | tmp1 = gen_reg_rtx (<MODE>mode); | |
14699 | ||
14700 | emit_insn (gen_round<mode>2 (tmp0, operands[1])); | |
14701 | emit_insn (gen_round<mode>2 (tmp1, operands[2])); | |
14702 | ||
14703 | emit_insn | |
14704 | (gen_vec_pack_sfix_trunc_<mode> (operands[0], tmp0, tmp1)); | |
14705 | } | |
eab880cf UB |
14706 | DONE; |
14707 | }) | |
14708 | ||
06f4e35d L |
14709 | ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; |
14710 | ;; | |
14711 | ;; Intel SSE4.2 string/text processing instructions | |
14712 | ;; | |
14713 | ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; | |
14714 | ||
14715 | (define_insn_and_split "sse4_2_pcmpestr" | |
14716 | [(set (match_operand:SI 0 "register_operand" "=c,c") | |
14717 | (unspec:SI | |
305b3c9b | 14718 | [(match_operand:V16QI 2 "register_operand" "x,x") |
06f4e35d | 14719 | (match_operand:SI 3 "register_operand" "a,a") |
305b3c9b | 14720 | (match_operand:V16QI 4 "nonimmediate_operand" "x,m") |
06f4e35d L |
14721 | (match_operand:SI 5 "register_operand" "d,d") |
14722 | (match_operand:SI 6 "const_0_to_255_operand" "n,n")] | |
14723 | UNSPEC_PCMPESTR)) | |
e2520c41 | 14724 | (set (match_operand:V16QI 1 "register_operand" "=Yz,Yz") |
06f4e35d L |
14725 | (unspec:V16QI |
14726 | [(match_dup 2) | |
14727 | (match_dup 3) | |
14728 | (match_dup 4) | |
14729 | (match_dup 5) | |
14730 | (match_dup 6)] | |
14731 | UNSPEC_PCMPESTR)) | |
14732 | (set (reg:CC FLAGS_REG) | |
14733 | (unspec:CC | |
14734 | [(match_dup 2) | |
14735 | (match_dup 3) | |
14736 | (match_dup 4) | |
14737 | (match_dup 5) | |
14738 | (match_dup 6)] | |
14739 | UNSPEC_PCMPESTR))] | |
14740 | "TARGET_SSE4_2 | |
5071eab7 | 14741 | && can_create_pseudo_p ()" |
06f4e35d L |
14742 | "#" |
14743 | "&& 1" | |
14744 | [(const_int 0)] | |
14745 | { | |
14746 | int ecx = !find_regno_note (curr_insn, REG_UNUSED, REGNO (operands[0])); | |
14747 | int xmm0 = !find_regno_note (curr_insn, REG_UNUSED, REGNO (operands[1])); | |
14748 | int flags = !find_regno_note (curr_insn, REG_UNUSED, FLAGS_REG); | |
14749 | ||
14750 | if (ecx) | |
14751 | emit_insn (gen_sse4_2_pcmpestri (operands[0], operands[2], | |
14752 | operands[3], operands[4], | |
14753 | operands[5], operands[6])); | |
14754 | if (xmm0) | |
14755 | emit_insn (gen_sse4_2_pcmpestrm (operands[1], operands[2], | |
14756 | operands[3], operands[4], | |
14757 | operands[5], operands[6])); | |
14758 | if (flags && !(ecx || xmm0)) | |
627eb745 UB |
14759 | emit_insn (gen_sse4_2_pcmpestr_cconly (NULL, NULL, |
14760 | operands[2], operands[3], | |
06f4e35d L |
14761 | operands[4], operands[5], |
14762 | operands[6])); | |
b807694e UB |
14763 | if (!(flags || ecx || xmm0)) |
14764 | emit_note (NOTE_INSN_DELETED); | |
14765 | ||
06f4e35d L |
14766 | DONE; |
14767 | } | |
14768 | [(set_attr "type" "sselog") | |
14769 | (set_attr "prefix_data16" "1") | |
14770 | (set_attr "prefix_extra" "1") | |
f220a4f4 | 14771 | (set_attr "ssememalign" "8") |
725fd454 | 14772 | (set_attr "length_immediate" "1") |
06f4e35d L |
14773 | (set_attr "memory" "none,load") |
14774 | (set_attr "mode" "TI")]) | |
14775 | ||
b86da593 UB |
14776 | (define_insn_and_split "*sse4_2_pcmpestr_unaligned" |
14777 | [(set (match_operand:SI 0 "register_operand" "=c") | |
14778 | (unspec:SI | |
305b3c9b | 14779 | [(match_operand:V16QI 2 "register_operand" "x") |
b86da593 UB |
14780 | (match_operand:SI 3 "register_operand" "a") |
14781 | (unspec:V16QI | |
14782 | [(match_operand:V16QI 4 "memory_operand" "m")] | |
860f5e77 | 14783 | UNSPEC_LOADU) |
b86da593 UB |
14784 | (match_operand:SI 5 "register_operand" "d") |
14785 | (match_operand:SI 6 "const_0_to_255_operand" "n")] | |
14786 | UNSPEC_PCMPESTR)) | |
14787 | (set (match_operand:V16QI 1 "register_operand" "=Yz") | |
14788 | (unspec:V16QI | |
14789 | [(match_dup 2) | |
14790 | (match_dup 3) | |
860f5e77 | 14791 | (unspec:V16QI [(match_dup 4)] UNSPEC_LOADU) |
b86da593 UB |
14792 | (match_dup 5) |
14793 | (match_dup 6)] | |
14794 | UNSPEC_PCMPESTR)) | |
14795 | (set (reg:CC FLAGS_REG) | |
14796 | (unspec:CC | |
14797 | [(match_dup 2) | |
14798 | (match_dup 3) | |
860f5e77 | 14799 | (unspec:V16QI [(match_dup 4)] UNSPEC_LOADU) |
b86da593 UB |
14800 | (match_dup 5) |
14801 | (match_dup 6)] | |
14802 | UNSPEC_PCMPESTR))] | |
14803 | "TARGET_SSE4_2 | |
14804 | && can_create_pseudo_p ()" | |
14805 | "#" | |
14806 | "&& 1" | |
14807 | [(const_int 0)] | |
14808 | { | |
14809 | int ecx = !find_regno_note (curr_insn, REG_UNUSED, REGNO (operands[0])); | |
14810 | int xmm0 = !find_regno_note (curr_insn, REG_UNUSED, REGNO (operands[1])); | |
14811 | int flags = !find_regno_note (curr_insn, REG_UNUSED, FLAGS_REG); | |
14812 | ||
14813 | if (ecx) | |
14814 | emit_insn (gen_sse4_2_pcmpestri (operands[0], operands[2], | |
14815 | operands[3], operands[4], | |
14816 | operands[5], operands[6])); | |
14817 | if (xmm0) | |
14818 | emit_insn (gen_sse4_2_pcmpestrm (operands[1], operands[2], | |
14819 | operands[3], operands[4], | |
14820 | operands[5], operands[6])); | |
14821 | if (flags && !(ecx || xmm0)) | |
14822 | emit_insn (gen_sse4_2_pcmpestr_cconly (NULL, NULL, | |
14823 | operands[2], operands[3], | |
14824 | operands[4], operands[5], | |
14825 | operands[6])); | |
14826 | if (!(flags || ecx || xmm0)) | |
14827 | emit_note (NOTE_INSN_DELETED); | |
14828 | ||
14829 | DONE; | |
14830 | } | |
14831 | [(set_attr "type" "sselog") | |
14832 | (set_attr "prefix_data16" "1") | |
14833 | (set_attr "prefix_extra" "1") | |
f220a4f4 | 14834 | (set_attr "ssememalign" "8") |
b86da593 UB |
14835 | (set_attr "length_immediate" "1") |
14836 | (set_attr "memory" "load") | |
14837 | (set_attr "mode" "TI")]) | |
14838 | ||
06f4e35d L |
14839 | (define_insn "sse4_2_pcmpestri" |
14840 | [(set (match_operand:SI 0 "register_operand" "=c,c") | |
14841 | (unspec:SI | |
14842 | [(match_operand:V16QI 1 "register_operand" "x,x") | |
14843 | (match_operand:SI 2 "register_operand" "a,a") | |
14844 | (match_operand:V16QI 3 "nonimmediate_operand" "x,m") | |
14845 | (match_operand:SI 4 "register_operand" "d,d") | |
14846 | (match_operand:SI 5 "const_0_to_255_operand" "n,n")] | |
14847 | UNSPEC_PCMPESTR)) | |
14848 | (set (reg:CC FLAGS_REG) | |
14849 | (unspec:CC | |
14850 | [(match_dup 1) | |
14851 | (match_dup 2) | |
14852 | (match_dup 3) | |
14853 | (match_dup 4) | |
14854 | (match_dup 5)] | |
14855 | UNSPEC_PCMPESTR))] | |
14856 | "TARGET_SSE4_2" | |
95879c72 | 14857 | "%vpcmpestri\t{%5, %3, %1|%1, %3, %5}" |
06f4e35d L |
14858 | [(set_attr "type" "sselog") |
14859 | (set_attr "prefix_data16" "1") | |
14860 | (set_attr "prefix_extra" "1") | |
95879c72 | 14861 | (set_attr "prefix" "maybe_vex") |
f220a4f4 | 14862 | (set_attr "ssememalign" "8") |
725fd454 | 14863 | (set_attr "length_immediate" "1") |
01284895 | 14864 | (set_attr "btver2_decode" "vector") |
06f4e35d L |
14865 | (set_attr "memory" "none,load") |
14866 | (set_attr "mode" "TI")]) | |
14867 | ||
14868 | (define_insn "sse4_2_pcmpestrm" | |
e2520c41 | 14869 | [(set (match_operand:V16QI 0 "register_operand" "=Yz,Yz") |
06f4e35d L |
14870 | (unspec:V16QI |
14871 | [(match_operand:V16QI 1 "register_operand" "x,x") | |
14872 | (match_operand:SI 2 "register_operand" "a,a") | |
14873 | (match_operand:V16QI 3 "nonimmediate_operand" "x,m") | |
14874 | (match_operand:SI 4 "register_operand" "d,d") | |
14875 | (match_operand:SI 5 "const_0_to_255_operand" "n,n")] | |
14876 | UNSPEC_PCMPESTR)) | |
14877 | (set (reg:CC FLAGS_REG) | |
14878 | (unspec:CC | |
14879 | [(match_dup 1) | |
14880 | (match_dup 2) | |
14881 | (match_dup 3) | |
14882 | (match_dup 4) | |
14883 | (match_dup 5)] | |
14884 | UNSPEC_PCMPESTR))] | |
14885 | "TARGET_SSE4_2" | |
95879c72 | 14886 | "%vpcmpestrm\t{%5, %3, %1|%1, %3, %5}" |
06f4e35d L |
14887 | [(set_attr "type" "sselog") |
14888 | (set_attr "prefix_data16" "1") | |
14889 | (set_attr "prefix_extra" "1") | |
f220a4f4 | 14890 | (set_attr "ssememalign" "8") |
725fd454 | 14891 | (set_attr "length_immediate" "1") |
95879c72 | 14892 | (set_attr "prefix" "maybe_vex") |
01284895 | 14893 | (set_attr "btver2_decode" "vector") |
06f4e35d L |
14894 | (set_attr "memory" "none,load") |
14895 | (set_attr "mode" "TI")]) | |
14896 | ||
14897 | (define_insn "sse4_2_pcmpestr_cconly" | |
14898 | [(set (reg:CC FLAGS_REG) | |
14899 | (unspec:CC | |
627eb745 UB |
14900 | [(match_operand:V16QI 2 "register_operand" "x,x,x,x") |
14901 | (match_operand:SI 3 "register_operand" "a,a,a,a") | |
14902 | (match_operand:V16QI 4 "nonimmediate_operand" "x,m,x,m") | |
14903 | (match_operand:SI 5 "register_operand" "d,d,d,d") | |
14904 | (match_operand:SI 6 "const_0_to_255_operand" "n,n,n,n")] | |
06f4e35d | 14905 | UNSPEC_PCMPESTR)) |
627eb745 UB |
14906 | (clobber (match_scratch:V16QI 0 "=Yz,Yz,X,X")) |
14907 | (clobber (match_scratch:SI 1 "= X, X,c,c"))] | |
06f4e35d L |
14908 | "TARGET_SSE4_2" |
14909 | "@ | |
95879c72 L |
14910 | %vpcmpestrm\t{%6, %4, %2|%2, %4, %6} |
14911 | %vpcmpestrm\t{%6, %4, %2|%2, %4, %6} | |
14912 | %vpcmpestri\t{%6, %4, %2|%2, %4, %6} | |
14913 | %vpcmpestri\t{%6, %4, %2|%2, %4, %6}" | |
06f4e35d L |
14914 | [(set_attr "type" "sselog") |
14915 | (set_attr "prefix_data16" "1") | |
14916 | (set_attr "prefix_extra" "1") | |
f220a4f4 | 14917 | (set_attr "ssememalign" "8") |
725fd454 | 14918 | (set_attr "length_immediate" "1") |
06f4e35d | 14919 | (set_attr "memory" "none,load,none,load") |
01284895 | 14920 | (set_attr "btver2_decode" "vector,vector,vector,vector") |
95879c72 | 14921 | (set_attr "prefix" "maybe_vex") |
06f4e35d L |
14922 | (set_attr "mode" "TI")]) |
14923 | ||
14924 | (define_insn_and_split "sse4_2_pcmpistr" | |
14925 | [(set (match_operand:SI 0 "register_operand" "=c,c") | |
14926 | (unspec:SI | |
305b3c9b UB |
14927 | [(match_operand:V16QI 2 "register_operand" "x,x") |
14928 | (match_operand:V16QI 3 "nonimmediate_operand" "x,m") | |
06f4e35d L |
14929 | (match_operand:SI 4 "const_0_to_255_operand" "n,n")] |
14930 | UNSPEC_PCMPISTR)) | |
e2520c41 | 14931 | (set (match_operand:V16QI 1 "register_operand" "=Yz,Yz") |
06f4e35d L |
14932 | (unspec:V16QI |
14933 | [(match_dup 2) | |
14934 | (match_dup 3) | |
14935 | (match_dup 4)] | |
14936 | UNSPEC_PCMPISTR)) | |
14937 | (set (reg:CC FLAGS_REG) | |
14938 | (unspec:CC | |
14939 | [(match_dup 2) | |
14940 | (match_dup 3) | |
14941 | (match_dup 4)] | |
14942 | UNSPEC_PCMPISTR))] | |
14943 | "TARGET_SSE4_2 | |
5071eab7 | 14944 | && can_create_pseudo_p ()" |
06f4e35d L |
14945 | "#" |
14946 | "&& 1" | |
14947 | [(const_int 0)] | |
14948 | { | |
14949 | int ecx = !find_regno_note (curr_insn, REG_UNUSED, REGNO (operands[0])); | |
14950 | int xmm0 = !find_regno_note (curr_insn, REG_UNUSED, REGNO (operands[1])); | |
14951 | int flags = !find_regno_note (curr_insn, REG_UNUSED, FLAGS_REG); | |
14952 | ||
14953 | if (ecx) | |
14954 | emit_insn (gen_sse4_2_pcmpistri (operands[0], operands[2], | |
14955 | operands[3], operands[4])); | |
14956 | if (xmm0) | |
14957 | emit_insn (gen_sse4_2_pcmpistrm (operands[1], operands[2], | |
14958 | operands[3], operands[4])); | |
14959 | if (flags && !(ecx || xmm0)) | |
627eb745 UB |
14960 | emit_insn (gen_sse4_2_pcmpistr_cconly (NULL, NULL, |
14961 | operands[2], operands[3], | |
06f4e35d | 14962 | operands[4])); |
b807694e UB |
14963 | if (!(flags || ecx || xmm0)) |
14964 | emit_note (NOTE_INSN_DELETED); | |
14965 | ||
06f4e35d L |
14966 | DONE; |
14967 | } | |
14968 | [(set_attr "type" "sselog") | |
14969 | (set_attr "prefix_data16" "1") | |
14970 | (set_attr "prefix_extra" "1") | |
f220a4f4 | 14971 | (set_attr "ssememalign" "8") |
725fd454 | 14972 | (set_attr "length_immediate" "1") |
06f4e35d L |
14973 | (set_attr "memory" "none,load") |
14974 | (set_attr "mode" "TI")]) | |
14975 | ||
b86da593 UB |
14976 | (define_insn_and_split "*sse4_2_pcmpistr_unaligned" |
14977 | [(set (match_operand:SI 0 "register_operand" "=c") | |
14978 | (unspec:SI | |
305b3c9b | 14979 | [(match_operand:V16QI 2 "register_operand" "x") |
b86da593 UB |
14980 | (unspec:V16QI |
14981 | [(match_operand:V16QI 3 "memory_operand" "m")] | |
860f5e77 | 14982 | UNSPEC_LOADU) |
b86da593 UB |
14983 | (match_operand:SI 4 "const_0_to_255_operand" "n")] |
14984 | UNSPEC_PCMPISTR)) | |
14985 | (set (match_operand:V16QI 1 "register_operand" "=Yz") | |
14986 | (unspec:V16QI | |
14987 | [(match_dup 2) | |
860f5e77 | 14988 | (unspec:V16QI [(match_dup 3)] UNSPEC_LOADU) |
b86da593 UB |
14989 | (match_dup 4)] |
14990 | UNSPEC_PCMPISTR)) | |
14991 | (set (reg:CC FLAGS_REG) | |
14992 | (unspec:CC | |
14993 | [(match_dup 2) | |
860f5e77 | 14994 | (unspec:V16QI [(match_dup 3)] UNSPEC_LOADU) |
b86da593 UB |
14995 | (match_dup 4)] |
14996 | UNSPEC_PCMPISTR))] | |
14997 | "TARGET_SSE4_2 | |
14998 | && can_create_pseudo_p ()" | |
14999 | "#" | |
15000 | "&& 1" | |
15001 | [(const_int 0)] | |
15002 | { | |
15003 | int ecx = !find_regno_note (curr_insn, REG_UNUSED, REGNO (operands[0])); | |
15004 | int xmm0 = !find_regno_note (curr_insn, REG_UNUSED, REGNO (operands[1])); | |
15005 | int flags = !find_regno_note (curr_insn, REG_UNUSED, FLAGS_REG); | |
15006 | ||
15007 | if (ecx) | |
15008 | emit_insn (gen_sse4_2_pcmpistri (operands[0], operands[2], | |
15009 | operands[3], operands[4])); | |
15010 | if (xmm0) | |
15011 | emit_insn (gen_sse4_2_pcmpistrm (operands[1], operands[2], | |
15012 | operands[3], operands[4])); | |
15013 | if (flags && !(ecx || xmm0)) | |
15014 | emit_insn (gen_sse4_2_pcmpistr_cconly (NULL, NULL, | |
15015 | operands[2], operands[3], | |
15016 | operands[4])); | |
15017 | if (!(flags || ecx || xmm0)) | |
15018 | emit_note (NOTE_INSN_DELETED); | |
15019 | ||
15020 | DONE; | |
15021 | } | |
15022 | [(set_attr "type" "sselog") | |
15023 | (set_attr "prefix_data16" "1") | |
15024 | (set_attr "prefix_extra" "1") | |
f220a4f4 | 15025 | (set_attr "ssememalign" "8") |
b86da593 UB |
15026 | (set_attr "length_immediate" "1") |
15027 | (set_attr "memory" "load") | |
15028 | (set_attr "mode" "TI")]) | |
15029 | ||
06f4e35d L |
15030 | (define_insn "sse4_2_pcmpistri" |
15031 | [(set (match_operand:SI 0 "register_operand" "=c,c") | |
15032 | (unspec:SI | |
15033 | [(match_operand:V16QI 1 "register_operand" "x,x") | |
15034 | (match_operand:V16QI 2 "nonimmediate_operand" "x,m") | |
15035 | (match_operand:SI 3 "const_0_to_255_operand" "n,n")] | |
15036 | UNSPEC_PCMPISTR)) | |
15037 | (set (reg:CC FLAGS_REG) | |
15038 | (unspec:CC | |
15039 | [(match_dup 1) | |
15040 | (match_dup 2) | |
15041 | (match_dup 3)] | |
15042 | UNSPEC_PCMPISTR))] | |
15043 | "TARGET_SSE4_2" | |
95879c72 | 15044 | "%vpcmpistri\t{%3, %2, %1|%1, %2, %3}" |
06f4e35d L |
15045 | [(set_attr "type" "sselog") |
15046 | (set_attr "prefix_data16" "1") | |
15047 | (set_attr "prefix_extra" "1") | |
f220a4f4 | 15048 | (set_attr "ssememalign" "8") |
725fd454 | 15049 | (set_attr "length_immediate" "1") |
95879c72 | 15050 | (set_attr "prefix" "maybe_vex") |
06f4e35d | 15051 | (set_attr "memory" "none,load") |
01284895 | 15052 | (set_attr "btver2_decode" "vector") |
06f4e35d L |
15053 | (set_attr "mode" "TI")]) |
15054 | ||
15055 | (define_insn "sse4_2_pcmpistrm" | |
e2520c41 | 15056 | [(set (match_operand:V16QI 0 "register_operand" "=Yz,Yz") |
06f4e35d L |
15057 | (unspec:V16QI |
15058 | [(match_operand:V16QI 1 "register_operand" "x,x") | |
15059 | (match_operand:V16QI 2 "nonimmediate_operand" "x,m") | |
15060 | (match_operand:SI 3 "const_0_to_255_operand" "n,n")] | |
15061 | UNSPEC_PCMPISTR)) | |
15062 | (set (reg:CC FLAGS_REG) | |
15063 | (unspec:CC | |
15064 | [(match_dup 1) | |
15065 | (match_dup 2) | |
15066 | (match_dup 3)] | |
15067 | UNSPEC_PCMPISTR))] | |
15068 | "TARGET_SSE4_2" | |
95879c72 | 15069 | "%vpcmpistrm\t{%3, %2, %1|%1, %2, %3}" |
06f4e35d L |
15070 | [(set_attr "type" "sselog") |
15071 | (set_attr "prefix_data16" "1") | |
15072 | (set_attr "prefix_extra" "1") | |
f220a4f4 | 15073 | (set_attr "ssememalign" "8") |
725fd454 | 15074 | (set_attr "length_immediate" "1") |
95879c72 | 15075 | (set_attr "prefix" "maybe_vex") |
06f4e35d | 15076 | (set_attr "memory" "none,load") |
01284895 | 15077 | (set_attr "btver2_decode" "vector") |
06f4e35d L |
15078 | (set_attr "mode" "TI")]) |
15079 | ||
15080 | (define_insn "sse4_2_pcmpistr_cconly" | |
15081 | [(set (reg:CC FLAGS_REG) | |
15082 | (unspec:CC | |
627eb745 UB |
15083 | [(match_operand:V16QI 2 "register_operand" "x,x,x,x") |
15084 | (match_operand:V16QI 3 "nonimmediate_operand" "x,m,x,m") | |
15085 | (match_operand:SI 4 "const_0_to_255_operand" "n,n,n,n")] | |
06f4e35d | 15086 | UNSPEC_PCMPISTR)) |
627eb745 UB |
15087 | (clobber (match_scratch:V16QI 0 "=Yz,Yz,X,X")) |
15088 | (clobber (match_scratch:SI 1 "= X, X,c,c"))] | |
06f4e35d L |
15089 | "TARGET_SSE4_2" |
15090 | "@ | |
95879c72 L |
15091 | %vpcmpistrm\t{%4, %3, %2|%2, %3, %4} |
15092 | %vpcmpistrm\t{%4, %3, %2|%2, %3, %4} | |
15093 | %vpcmpistri\t{%4, %3, %2|%2, %3, %4} | |
15094 | %vpcmpistri\t{%4, %3, %2|%2, %3, %4}" | |
06f4e35d L |
15095 | [(set_attr "type" "sselog") |
15096 | (set_attr "prefix_data16" "1") | |
15097 | (set_attr "prefix_extra" "1") | |
f220a4f4 | 15098 | (set_attr "ssememalign" "8") |
725fd454 | 15099 | (set_attr "length_immediate" "1") |
06f4e35d | 15100 | (set_attr "memory" "none,load,none,load") |
95879c72 | 15101 | (set_attr "prefix" "maybe_vex") |
01284895 | 15102 | (set_attr "btver2_decode" "vector,vector,vector,vector") |
06f4e35d | 15103 | (set_attr "mode" "TI")]) |
04e1d06b | 15104 | |
e711dffd KY |
15105 | ;; Packed float variants |
15106 | (define_mode_attr GATHER_SCATTER_SF_MEM_MODE | |
15107 | [(V8DI "V8SF") (V16SI "V16SF")]) | |
15108 | ||
15109 | (define_expand "avx512pf_gatherpf<mode>sf" | |
0fe65b75 AI |
15110 | [(unspec |
15111 | [(match_operand:<avx512fmaskmode> 0 "register_or_constm1_operand") | |
e711dffd | 15112 | (mem:<GATHER_SCATTER_SF_MEM_MODE> |
0fe65b75 AI |
15113 | (match_par_dup 5 |
15114 | [(match_operand 2 "vsib_address_operand") | |
15115 | (match_operand:VI48_512 1 "register_operand") | |
15116 | (match_operand:SI 3 "const1248_operand")])) | |
22c8aab3 | 15117 | (match_operand:SI 4 "const_2_to_3_operand")] |
0fe65b75 AI |
15118 | UNSPEC_GATHER_PREFETCH)] |
15119 | "TARGET_AVX512PF" | |
15120 | { | |
15121 | operands[5] | |
15122 | = gen_rtx_UNSPEC (Pmode, gen_rtvec (3, operands[2], operands[1], | |
15123 | operands[3]), UNSPEC_VSIBADDR); | |
15124 | }) | |
15125 | ||
e711dffd | 15126 | (define_insn "*avx512pf_gatherpf<mode>sf_mask" |
0fe65b75 | 15127 | [(unspec |
be792bce | 15128 | [(match_operand:<avx512fmaskmode> 0 "register_operand" "Yk") |
e711dffd | 15129 | (match_operator:<GATHER_SCATTER_SF_MEM_MODE> 5 "vsib_mem_operator" |
0fe65b75 | 15130 | [(unspec:P |
65e95828 | 15131 | [(match_operand:P 2 "vsib_address_operand" "Tv") |
0fe65b75 AI |
15132 | (match_operand:VI48_512 1 "register_operand" "v") |
15133 | (match_operand:SI 3 "const1248_operand" "n")] | |
15134 | UNSPEC_VSIBADDR)]) | |
22c8aab3 | 15135 | (match_operand:SI 4 "const_2_to_3_operand" "n")] |
0fe65b75 AI |
15136 | UNSPEC_GATHER_PREFETCH)] |
15137 | "TARGET_AVX512PF" | |
15138 | { | |
15139 | switch (INTVAL (operands[4])) | |
15140 | { | |
22c8aab3 | 15141 | case 3: |
cf73ee60 KY |
15142 | return "vgatherpf0<ssemodesuffix>ps\t{%5%{%0%}|%5%{%0%}}"; |
15143 | case 2: | |
0fe65b75 AI |
15144 | return "vgatherpf1<ssemodesuffix>ps\t{%5%{%0%}|%5%{%0%}}"; |
15145 | default: | |
15146 | gcc_unreachable (); | |
15147 | } | |
15148 | } | |
15149 | [(set_attr "type" "sse") | |
15150 | (set_attr "prefix" "evex") | |
15151 | (set_attr "mode" "XI")]) | |
15152 | ||
e711dffd | 15153 | (define_insn "*avx512pf_gatherpf<mode>sf" |
0fe65b75 AI |
15154 | [(unspec |
15155 | [(const_int -1) | |
e711dffd | 15156 | (match_operator:<GATHER_SCATTER_SF_MEM_MODE> 4 "vsib_mem_operator" |
0fe65b75 | 15157 | [(unspec:P |
65e95828 | 15158 | [(match_operand:P 1 "vsib_address_operand" "Tv") |
0fe65b75 AI |
15159 | (match_operand:VI48_512 0 "register_operand" "v") |
15160 | (match_operand:SI 2 "const1248_operand" "n")] | |
15161 | UNSPEC_VSIBADDR)]) | |
22c8aab3 | 15162 | (match_operand:SI 3 "const_2_to_3_operand" "n")] |
0fe65b75 AI |
15163 | UNSPEC_GATHER_PREFETCH)] |
15164 | "TARGET_AVX512PF" | |
15165 | { | |
15166 | switch (INTVAL (operands[3])) | |
15167 | { | |
22c8aab3 | 15168 | case 3: |
cf73ee60 KY |
15169 | return "vgatherpf0<ssemodesuffix>ps\t{%4|%4}"; |
15170 | case 2: | |
0fe65b75 AI |
15171 | return "vgatherpf1<ssemodesuffix>ps\t{%4|%4}"; |
15172 | default: | |
15173 | gcc_unreachable (); | |
15174 | } | |
15175 | } | |
15176 | [(set_attr "type" "sse") | |
15177 | (set_attr "prefix" "evex") | |
15178 | (set_attr "mode" "XI")]) | |
15179 | ||
e711dffd KY |
15180 | ;; Packed double variants |
15181 | (define_expand "avx512pf_gatherpf<mode>df" | |
15182 | [(unspec | |
15183 | [(match_operand:<avx512fmaskmode> 0 "register_or_constm1_operand") | |
15184 | (mem:V8DF | |
15185 | (match_par_dup 5 | |
15186 | [(match_operand 2 "vsib_address_operand") | |
15187 | (match_operand:VI4_256_8_512 1 "register_operand") | |
15188 | (match_operand:SI 3 "const1248_operand")])) | |
22c8aab3 | 15189 | (match_operand:SI 4 "const_2_to_3_operand")] |
e711dffd KY |
15190 | UNSPEC_GATHER_PREFETCH)] |
15191 | "TARGET_AVX512PF" | |
15192 | { | |
15193 | operands[5] | |
15194 | = gen_rtx_UNSPEC (Pmode, gen_rtvec (3, operands[2], operands[1], | |
15195 | operands[3]), UNSPEC_VSIBADDR); | |
15196 | }) | |
15197 | ||
15198 | (define_insn "*avx512pf_gatherpf<mode>df_mask" | |
15199 | [(unspec | |
be792bce | 15200 | [(match_operand:<avx512fmaskmode> 0 "register_operand" "Yk") |
e711dffd KY |
15201 | (match_operator:V8DF 5 "vsib_mem_operator" |
15202 | [(unspec:P | |
15203 | [(match_operand:P 2 "vsib_address_operand" "Tv") | |
15204 | (match_operand:VI4_256_8_512 1 "register_operand" "v") | |
15205 | (match_operand:SI 3 "const1248_operand" "n")] | |
15206 | UNSPEC_VSIBADDR)]) | |
22c8aab3 | 15207 | (match_operand:SI 4 "const_2_to_3_operand" "n")] |
e711dffd KY |
15208 | UNSPEC_GATHER_PREFETCH)] |
15209 | "TARGET_AVX512PF" | |
15210 | { | |
15211 | switch (INTVAL (operands[4])) | |
15212 | { | |
22c8aab3 | 15213 | case 3: |
cf73ee60 KY |
15214 | return "vgatherpf0<ssemodesuffix>pd\t{%5%{%0%}|%5%{%0%}}"; |
15215 | case 2: | |
e711dffd KY |
15216 | return "vgatherpf1<ssemodesuffix>pd\t{%5%{%0%}|%5%{%0%}}"; |
15217 | default: | |
15218 | gcc_unreachable (); | |
15219 | } | |
15220 | } | |
15221 | [(set_attr "type" "sse") | |
15222 | (set_attr "prefix" "evex") | |
15223 | (set_attr "mode" "XI")]) | |
15224 | ||
15225 | (define_insn "*avx512pf_gatherpf<mode>df" | |
15226 | [(unspec | |
15227 | [(const_int -1) | |
15228 | (match_operator:V8DF 4 "vsib_mem_operator" | |
15229 | [(unspec:P | |
15230 | [(match_operand:P 1 "vsib_address_operand" "Tv") | |
15231 | (match_operand:VI4_256_8_512 0 "register_operand" "v") | |
15232 | (match_operand:SI 2 "const1248_operand" "n")] | |
15233 | UNSPEC_VSIBADDR)]) | |
22c8aab3 | 15234 | (match_operand:SI 3 "const_2_to_3_operand" "n")] |
e711dffd KY |
15235 | UNSPEC_GATHER_PREFETCH)] |
15236 | "TARGET_AVX512PF" | |
15237 | { | |
15238 | switch (INTVAL (operands[3])) | |
15239 | { | |
22c8aab3 | 15240 | case 3: |
cf73ee60 KY |
15241 | return "vgatherpf0<ssemodesuffix>pd\t{%4|%4}"; |
15242 | case 2: | |
e711dffd KY |
15243 | return "vgatherpf1<ssemodesuffix>pd\t{%4|%4}"; |
15244 | default: | |
15245 | gcc_unreachable (); | |
15246 | } | |
15247 | } | |
15248 | [(set_attr "type" "sse") | |
15249 | (set_attr "prefix" "evex") | |
15250 | (set_attr "mode" "XI")]) | |
15251 | ||
15252 | ;; Packed float variants | |
15253 | (define_expand "avx512pf_scatterpf<mode>sf" | |
0fe65b75 AI |
15254 | [(unspec |
15255 | [(match_operand:<avx512fmaskmode> 0 "register_or_constm1_operand") | |
e711dffd | 15256 | (mem:<GATHER_SCATTER_SF_MEM_MODE> |
0fe65b75 AI |
15257 | (match_par_dup 5 |
15258 | [(match_operand 2 "vsib_address_operand") | |
15259 | (match_operand:VI48_512 1 "register_operand") | |
15260 | (match_operand:SI 3 "const1248_operand")])) | |
66b03f81 | 15261 | (match_operand:SI 4 "const2367_operand")] |
0fe65b75 AI |
15262 | UNSPEC_SCATTER_PREFETCH)] |
15263 | "TARGET_AVX512PF" | |
15264 | { | |
15265 | operands[5] | |
15266 | = gen_rtx_UNSPEC (Pmode, gen_rtvec (3, operands[2], operands[1], | |
15267 | operands[3]), UNSPEC_VSIBADDR); | |
15268 | }) | |
15269 | ||
e711dffd | 15270 | (define_insn "*avx512pf_scatterpf<mode>sf_mask" |
0fe65b75 | 15271 | [(unspec |
be792bce | 15272 | [(match_operand:<avx512fmaskmode> 0 "register_operand" "Yk") |
e711dffd | 15273 | (match_operator:<GATHER_SCATTER_SF_MEM_MODE> 5 "vsib_mem_operator" |
0fe65b75 | 15274 | [(unspec:P |
65e95828 | 15275 | [(match_operand:P 2 "vsib_address_operand" "Tv") |
0fe65b75 AI |
15276 | (match_operand:VI48_512 1 "register_operand" "v") |
15277 | (match_operand:SI 3 "const1248_operand" "n")] | |
15278 | UNSPEC_VSIBADDR)]) | |
66b03f81 | 15279 | (match_operand:SI 4 "const2367_operand" "n")] |
0fe65b75 AI |
15280 | UNSPEC_SCATTER_PREFETCH)] |
15281 | "TARGET_AVX512PF" | |
15282 | { | |
15283 | switch (INTVAL (operands[4])) | |
15284 | { | |
22c8aab3 | 15285 | case 3: |
66b03f81 | 15286 | case 7: |
cf73ee60 KY |
15287 | return "vscatterpf0<ssemodesuffix>ps\t{%5%{%0%}|%5%{%0%}}"; |
15288 | case 2: | |
15289 | case 6: | |
0fe65b75 AI |
15290 | return "vscatterpf1<ssemodesuffix>ps\t{%5%{%0%}|%5%{%0%}}"; |
15291 | default: | |
15292 | gcc_unreachable (); | |
15293 | } | |
15294 | } | |
15295 | [(set_attr "type" "sse") | |
15296 | (set_attr "prefix" "evex") | |
15297 | (set_attr "mode" "XI")]) | |
15298 | ||
e711dffd | 15299 | (define_insn "*avx512pf_scatterpf<mode>sf" |
0fe65b75 AI |
15300 | [(unspec |
15301 | [(const_int -1) | |
e711dffd | 15302 | (match_operator:<GATHER_SCATTER_SF_MEM_MODE> 4 "vsib_mem_operator" |
0fe65b75 | 15303 | [(unspec:P |
65e95828 | 15304 | [(match_operand:P 1 "vsib_address_operand" "Tv") |
0fe65b75 AI |
15305 | (match_operand:VI48_512 0 "register_operand" "v") |
15306 | (match_operand:SI 2 "const1248_operand" "n")] | |
15307 | UNSPEC_VSIBADDR)]) | |
66b03f81 | 15308 | (match_operand:SI 3 "const2367_operand" "n")] |
0fe65b75 AI |
15309 | UNSPEC_SCATTER_PREFETCH)] |
15310 | "TARGET_AVX512PF" | |
15311 | { | |
15312 | switch (INTVAL (operands[3])) | |
15313 | { | |
22c8aab3 | 15314 | case 3: |
66b03f81 | 15315 | case 7: |
cf73ee60 KY |
15316 | return "vscatterpf0<ssemodesuffix>ps\t{%4|%4}"; |
15317 | case 2: | |
15318 | case 6: | |
0fe65b75 AI |
15319 | return "vscatterpf1<ssemodesuffix>ps\t{%4|%4}"; |
15320 | default: | |
15321 | gcc_unreachable (); | |
15322 | } | |
15323 | } | |
15324 | [(set_attr "type" "sse") | |
15325 | (set_attr "prefix" "evex") | |
15326 | (set_attr "mode" "XI")]) | |
15327 | ||
e711dffd KY |
15328 | ;; Packed double variants |
15329 | (define_expand "avx512pf_scatterpf<mode>df" | |
15330 | [(unspec | |
15331 | [(match_operand:<avx512fmaskmode> 0 "register_or_constm1_operand") | |
15332 | (mem:V8DF | |
15333 | (match_par_dup 5 | |
15334 | [(match_operand 2 "vsib_address_operand") | |
15335 | (match_operand:VI4_256_8_512 1 "register_operand") | |
15336 | (match_operand:SI 3 "const1248_operand")])) | |
66b03f81 | 15337 | (match_operand:SI 4 "const2367_operand")] |
e711dffd KY |
15338 | UNSPEC_SCATTER_PREFETCH)] |
15339 | "TARGET_AVX512PF" | |
15340 | { | |
15341 | operands[5] | |
15342 | = gen_rtx_UNSPEC (Pmode, gen_rtvec (3, operands[2], operands[1], | |
15343 | operands[3]), UNSPEC_VSIBADDR); | |
15344 | }) | |
15345 | ||
15346 | (define_insn "*avx512pf_scatterpf<mode>df_mask" | |
15347 | [(unspec | |
be792bce | 15348 | [(match_operand:<avx512fmaskmode> 0 "register_operand" "Yk") |
e711dffd KY |
15349 | (match_operator:V8DF 5 "vsib_mem_operator" |
15350 | [(unspec:P | |
15351 | [(match_operand:P 2 "vsib_address_operand" "Tv") | |
15352 | (match_operand:VI4_256_8_512 1 "register_operand" "v") | |
15353 | (match_operand:SI 3 "const1248_operand" "n")] | |
15354 | UNSPEC_VSIBADDR)]) | |
66b03f81 | 15355 | (match_operand:SI 4 "const2367_operand" "n")] |
e711dffd KY |
15356 | UNSPEC_SCATTER_PREFETCH)] |
15357 | "TARGET_AVX512PF" | |
15358 | { | |
15359 | switch (INTVAL (operands[4])) | |
15360 | { | |
22c8aab3 | 15361 | case 3: |
66b03f81 | 15362 | case 7: |
cf73ee60 KY |
15363 | return "vscatterpf0<ssemodesuffix>pd\t{%5%{%0%}|%5%{%0%}}"; |
15364 | case 2: | |
15365 | case 6: | |
e711dffd KY |
15366 | return "vscatterpf1<ssemodesuffix>pd\t{%5%{%0%}|%5%{%0%}}"; |
15367 | default: | |
15368 | gcc_unreachable (); | |
15369 | } | |
15370 | } | |
15371 | [(set_attr "type" "sse") | |
15372 | (set_attr "prefix" "evex") | |
15373 | (set_attr "mode" "XI")]) | |
15374 | ||
15375 | (define_insn "*avx512pf_scatterpf<mode>df" | |
15376 | [(unspec | |
15377 | [(const_int -1) | |
15378 | (match_operator:V8DF 4 "vsib_mem_operator" | |
15379 | [(unspec:P | |
15380 | [(match_operand:P 1 "vsib_address_operand" "Tv") | |
15381 | (match_operand:VI4_256_8_512 0 "register_operand" "v") | |
15382 | (match_operand:SI 2 "const1248_operand" "n")] | |
15383 | UNSPEC_VSIBADDR)]) | |
66b03f81 | 15384 | (match_operand:SI 3 "const2367_operand" "n")] |
e711dffd KY |
15385 | UNSPEC_SCATTER_PREFETCH)] |
15386 | "TARGET_AVX512PF" | |
15387 | { | |
15388 | switch (INTVAL (operands[3])) | |
15389 | { | |
22c8aab3 | 15390 | case 3: |
66b03f81 | 15391 | case 7: |
cf73ee60 KY |
15392 | return "vscatterpf0<ssemodesuffix>pd\t{%4|%4}"; |
15393 | case 2: | |
15394 | case 6: | |
e711dffd KY |
15395 | return "vscatterpf1<ssemodesuffix>pd\t{%4|%4}"; |
15396 | default: | |
15397 | gcc_unreachable (); | |
15398 | } | |
15399 | } | |
15400 | [(set_attr "type" "sse") | |
15401 | (set_attr "prefix" "evex") | |
15402 | (set_attr "mode" "XI")]) | |
15403 | ||
c56a42b9 | 15404 | (define_insn "avx512er_exp2<mode><mask_name><round_saeonly_name>" |
afb4ac68 AI |
15405 | [(set (match_operand:VF_512 0 "register_operand" "=v") |
15406 | (unspec:VF_512 | |
c56a42b9 | 15407 | [(match_operand:VF_512 1 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")] |
afb4ac68 AI |
15408 | UNSPEC_EXP2))] |
15409 | "TARGET_AVX512ER" | |
c56a42b9 | 15410 | "vexp2<ssemodesuffix>\t{<round_saeonly_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_saeonly_mask_op2>}" |
afb4ac68 | 15411 | [(set_attr "prefix" "evex") |
df62b4af | 15412 | (set_attr "type" "sse") |
afb4ac68 AI |
15413 | (set_attr "mode" "<MODE>")]) |
15414 | ||
c56a42b9 | 15415 | (define_insn "<mask_codefor>avx512er_rcp28<mode><mask_name><round_saeonly_name>" |
afb4ac68 AI |
15416 | [(set (match_operand:VF_512 0 "register_operand" "=v") |
15417 | (unspec:VF_512 | |
c56a42b9 | 15418 | [(match_operand:VF_512 1 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")] |
afb4ac68 AI |
15419 | UNSPEC_RCP28))] |
15420 | "TARGET_AVX512ER" | |
c56a42b9 | 15421 | "vrcp28<ssemodesuffix>\t{<round_saeonly_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_saeonly_mask_op2>}" |
afb4ac68 | 15422 | [(set_attr "prefix" "evex") |
df62b4af | 15423 | (set_attr "type" "sse") |
afb4ac68 AI |
15424 | (set_attr "mode" "<MODE>")]) |
15425 | ||
c56a42b9 KY |
15426 | (define_insn "avx512er_vmrcp28<mode><round_saeonly_name>" |
15427 | [(set (match_operand:VF_128 0 "register_operand" "=v") | |
15428 | (vec_merge:VF_128 | |
15429 | (unspec:VF_128 | |
15430 | [(match_operand:VF_128 1 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")] | |
15431 | UNSPEC_RCP28) | |
15432 | (match_operand:VF_128 2 "register_operand" "v") | |
15433 | (const_int 1)))] | |
15434 | "TARGET_AVX512ER" | |
df62b4af | 15435 | "vrcp28<ssescalarmodesuffix>\t{<round_saeonly_op3>%1, %2, %0|%0, %2, %1<round_saeonly_op3>}" |
c56a42b9 KY |
15436 | [(set_attr "length_immediate" "1") |
15437 | (set_attr "prefix" "evex") | |
df62b4af | 15438 | (set_attr "type" "sse") |
c56a42b9 KY |
15439 | (set_attr "mode" "<MODE>")]) |
15440 | ||
15441 | (define_insn "<mask_codefor>avx512er_rsqrt28<mode><mask_name><round_saeonly_name>" | |
afb4ac68 AI |
15442 | [(set (match_operand:VF_512 0 "register_operand" "=v") |
15443 | (unspec:VF_512 | |
c56a42b9 | 15444 | [(match_operand:VF_512 1 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")] |
afb4ac68 AI |
15445 | UNSPEC_RSQRT28))] |
15446 | "TARGET_AVX512ER" | |
c56a42b9 | 15447 | "vrsqrt28<ssemodesuffix>\t{<round_saeonly_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_saeonly_mask_op2>}" |
afb4ac68 | 15448 | [(set_attr "prefix" "evex") |
df62b4af | 15449 | (set_attr "type" "sse") |
afb4ac68 AI |
15450 | (set_attr "mode" "<MODE>")]) |
15451 | ||
c56a42b9 KY |
15452 | (define_insn "avx512er_vmrsqrt28<mode><round_saeonly_name>" |
15453 | [(set (match_operand:VF_128 0 "register_operand" "=v") | |
15454 | (vec_merge:VF_128 | |
15455 | (unspec:VF_128 | |
15456 | [(match_operand:VF_128 1 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")] | |
15457 | UNSPEC_RSQRT28) | |
15458 | (match_operand:VF_128 2 "register_operand" "v") | |
15459 | (const_int 1)))] | |
15460 | "TARGET_AVX512ER" | |
df62b4af | 15461 | "vrsqrt28<ssescalarmodesuffix>\t{<round_saeonly_op3>%1, %2, %0|%0, %2, %1<round_saeonly_op3>}" |
c56a42b9 | 15462 | [(set_attr "length_immediate" "1") |
df62b4af | 15463 | (set_attr "type" "sse") |
c56a42b9 KY |
15464 | (set_attr "prefix" "evex") |
15465 | (set_attr "mode" "<MODE>")]) | |
15466 | ||
43a8b705 HJ |
15467 | ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; |
15468 | ;; | |
15469 | ;; XOP instructions | |
15470 | ;; | |
15471 | ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; | |
15472 | ||
8861ba4d UB |
15473 | (define_code_iterator xop_plus [plus ss_plus]) |
15474 | ||
15475 | (define_code_attr macs [(plus "macs") (ss_plus "macss")]) | |
15476 | (define_code_attr madcs [(plus "madcs") (ss_plus "madcss")]) | |
15477 | ||
43a8b705 | 15478 | ;; XOP parallel integer multiply/add instructions. |
43a8b705 | 15479 | |
8861ba4d UB |
15480 | (define_insn "xop_p<macs><ssemodesuffix><ssemodesuffix>" |
15481 | [(set (match_operand:VI24_128 0 "register_operand" "=x") | |
15482 | (xop_plus:VI24_128 | |
15483 | (mult:VI24_128 | |
15484 | (match_operand:VI24_128 1 "nonimmediate_operand" "%x") | |
15485 | (match_operand:VI24_128 2 "nonimmediate_operand" "xm")) | |
920ac804 | 15486 | (match_operand:VI24_128 3 "register_operand" "x")))] |
4926bb1d | 15487 | "TARGET_XOP" |
8861ba4d | 15488 | "vp<macs><ssemodesuffix><ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}" |
43a8b705 HJ |
15489 | [(set_attr "type" "ssemuladd") |
15490 | (set_attr "mode" "TI")]) | |
15491 | ||
8861ba4d | 15492 | (define_insn "xop_p<macs>dql" |
4926bb1d | 15493 | [(set (match_operand:V2DI 0 "register_operand" "=x") |
8861ba4d | 15494 | (xop_plus:V2DI |
43a8b705 HJ |
15495 | (mult:V2DI |
15496 | (sign_extend:V2DI | |
15497 | (vec_select:V2SI | |
aedff010 | 15498 | (match_operand:V4SI 1 "nonimmediate_operand" "%x") |
d21a7b44 | 15499 | (parallel [(const_int 0) (const_int 2)]))) |
43a8b705 HJ |
15500 | (sign_extend:V2DI |
15501 | (vec_select:V2SI | |
8861ba4d | 15502 | (match_operand:V4SI 2 "nonimmediate_operand" "xm") |
d21a7b44 | 15503 | (parallel [(const_int 0) (const_int 2)])))) |
920ac804 | 15504 | (match_operand:V2DI 3 "register_operand" "x")))] |
4926bb1d | 15505 | "TARGET_XOP" |
8861ba4d | 15506 | "vp<macs>dql\t{%3, %2, %1, %0|%0, %1, %2, %3}" |
43a8b705 HJ |
15507 | [(set_attr "type" "ssemuladd") |
15508 | (set_attr "mode" "TI")]) | |
15509 | ||
8861ba4d | 15510 | (define_insn "xop_p<macs>dqh" |
4926bb1d | 15511 | [(set (match_operand:V2DI 0 "register_operand" "=x") |
8861ba4d | 15512 | (xop_plus:V2DI |
43a8b705 HJ |
15513 | (mult:V2DI |
15514 | (sign_extend:V2DI | |
15515 | (vec_select:V2SI | |
aedff010 | 15516 | (match_operand:V4SI 1 "nonimmediate_operand" "%x") |
d21a7b44 | 15517 | (parallel [(const_int 1) (const_int 3)]))) |
43a8b705 HJ |
15518 | (sign_extend:V2DI |
15519 | (vec_select:V2SI | |
4926bb1d | 15520 | (match_operand:V4SI 2 "nonimmediate_operand" "xm") |
d21a7b44 | 15521 | (parallel [(const_int 1) (const_int 3)])))) |
920ac804 | 15522 | (match_operand:V2DI 3 "register_operand" "x")))] |
4926bb1d | 15523 | "TARGET_XOP" |
8861ba4d | 15524 | "vp<macs>dqh\t{%3, %2, %1, %0|%0, %1, %2, %3}" |
43a8b705 HJ |
15525 | [(set_attr "type" "ssemuladd") |
15526 | (set_attr "mode" "TI")]) | |
15527 | ||
43a8b705 | 15528 | ;; XOP parallel integer multiply/add instructions for the intrinisics |
8861ba4d | 15529 | (define_insn "xop_p<macs>wd" |
4926bb1d | 15530 | [(set (match_operand:V4SI 0 "register_operand" "=x") |
8861ba4d | 15531 | (xop_plus:V4SI |
43a8b705 HJ |
15532 | (mult:V4SI |
15533 | (sign_extend:V4SI | |
15534 | (vec_select:V4HI | |
aedff010 | 15535 | (match_operand:V8HI 1 "nonimmediate_operand" "%x") |
608dccd7 UB |
15536 | (parallel [(const_int 1) (const_int 3) |
15537 | (const_int 5) (const_int 7)]))) | |
43a8b705 HJ |
15538 | (sign_extend:V4SI |
15539 | (vec_select:V4HI | |
4926bb1d | 15540 | (match_operand:V8HI 2 "nonimmediate_operand" "xm") |
608dccd7 UB |
15541 | (parallel [(const_int 1) (const_int 3) |
15542 | (const_int 5) (const_int 7)])))) | |
920ac804 | 15543 | (match_operand:V4SI 3 "register_operand" "x")))] |
4926bb1d | 15544 | "TARGET_XOP" |
8861ba4d | 15545 | "vp<macs>wd\t{%3, %2, %1, %0|%0, %1, %2, %3}" |
43a8b705 HJ |
15546 | [(set_attr "type" "ssemuladd") |
15547 | (set_attr "mode" "TI")]) | |
15548 | ||
8861ba4d | 15549 | (define_insn "xop_p<madcs>wd" |
4926bb1d | 15550 | [(set (match_operand:V4SI 0 "register_operand" "=x") |
8861ba4d | 15551 | (xop_plus:V4SI |
43a8b705 HJ |
15552 | (plus:V4SI |
15553 | (mult:V4SI | |
15554 | (sign_extend:V4SI | |
15555 | (vec_select:V4HI | |
aedff010 | 15556 | (match_operand:V8HI 1 "nonimmediate_operand" "%x") |
608dccd7 UB |
15557 | (parallel [(const_int 0) (const_int 2) |
15558 | (const_int 4) (const_int 6)]))) | |
43a8b705 HJ |
15559 | (sign_extend:V4SI |
15560 | (vec_select:V4HI | |
4926bb1d | 15561 | (match_operand:V8HI 2 "nonimmediate_operand" "xm") |
608dccd7 UB |
15562 | (parallel [(const_int 0) (const_int 2) |
15563 | (const_int 4) (const_int 6)])))) | |
43a8b705 HJ |
15564 | (mult:V4SI |
15565 | (sign_extend:V4SI | |
15566 | (vec_select:V4HI | |
15567 | (match_dup 1) | |
608dccd7 UB |
15568 | (parallel [(const_int 1) (const_int 3) |
15569 | (const_int 5) (const_int 7)]))) | |
43a8b705 HJ |
15570 | (sign_extend:V4SI |
15571 | (vec_select:V4HI | |
15572 | (match_dup 2) | |
608dccd7 UB |
15573 | (parallel [(const_int 1) (const_int 3) |
15574 | (const_int 5) (const_int 7)]))))) | |
920ac804 | 15575 | (match_operand:V4SI 3 "register_operand" "x")))] |
4926bb1d | 15576 | "TARGET_XOP" |
8861ba4d | 15577 | "vp<madcs>wd\t{%3, %2, %1, %0|%0, %1, %2, %3}" |
43a8b705 HJ |
15578 | [(set_attr "type" "ssemuladd") |
15579 | (set_attr "mode" "TI")]) | |
15580 | ||
15581 | ;; XOP parallel XMM conditional moves | |
6bec6c98 UB |
15582 | (define_insn "xop_pcmov_<mode><avxsizesuffix>" |
15583 | [(set (match_operand:V 0 "register_operand" "=x,x") | |
15584 | (if_then_else:V | |
15585 | (match_operand:V 3 "nonimmediate_operand" "x,m") | |
bd352290 UB |
15586 | (match_operand:V 1 "register_operand" "x,x") |
15587 | (match_operand:V 2 "nonimmediate_operand" "xm,x")))] | |
4926bb1d | 15588 | "TARGET_XOP" |
1a62cb3b | 15589 | "vpcmov\t{%3, %2, %1, %0|%0, %1, %2, %3}" |
43a8b705 HJ |
15590 | [(set_attr "type" "sse4arg")]) |
15591 | ||
15592 | ;; XOP horizontal add/subtract instructions | |
8861ba4d | 15593 | (define_insn "xop_phadd<u>bw" |
43a8b705 HJ |
15594 | [(set (match_operand:V8HI 0 "register_operand" "=x") |
15595 | (plus:V8HI | |
8861ba4d | 15596 | (any_extend:V8HI |
43a8b705 HJ |
15597 | (vec_select:V8QI |
15598 | (match_operand:V16QI 1 "nonimmediate_operand" "xm") | |
608dccd7 UB |
15599 | (parallel [(const_int 0) (const_int 2) |
15600 | (const_int 4) (const_int 6) | |
15601 | (const_int 8) (const_int 10) | |
15602 | (const_int 12) (const_int 14)]))) | |
8861ba4d | 15603 | (any_extend:V8HI |
43a8b705 HJ |
15604 | (vec_select:V8QI |
15605 | (match_dup 1) | |
608dccd7 UB |
15606 | (parallel [(const_int 1) (const_int 3) |
15607 | (const_int 5) (const_int 7) | |
15608 | (const_int 9) (const_int 11) | |
15609 | (const_int 13) (const_int 15)])))))] | |
43a8b705 | 15610 | "TARGET_XOP" |
8861ba4d | 15611 | "vphadd<u>bw\t{%1, %0|%0, %1}" |
43a8b705 HJ |
15612 | [(set_attr "type" "sseiadd1")]) |
15613 | ||
8861ba4d | 15614 | (define_insn "xop_phadd<u>bd" |
43a8b705 HJ |
15615 | [(set (match_operand:V4SI 0 "register_operand" "=x") |
15616 | (plus:V4SI | |
15617 | (plus:V4SI | |
8861ba4d | 15618 | (any_extend:V4SI |
43a8b705 HJ |
15619 | (vec_select:V4QI |
15620 | (match_operand:V16QI 1 "nonimmediate_operand" "xm") | |
608dccd7 UB |
15621 | (parallel [(const_int 0) (const_int 4) |
15622 | (const_int 8) (const_int 12)]))) | |
8861ba4d | 15623 | (any_extend:V4SI |
43a8b705 HJ |
15624 | (vec_select:V4QI |
15625 | (match_dup 1) | |
608dccd7 UB |
15626 | (parallel [(const_int 1) (const_int 5) |
15627 | (const_int 9) (const_int 13)])))) | |
43a8b705 | 15628 | (plus:V4SI |
8861ba4d | 15629 | (any_extend:V4SI |
43a8b705 HJ |
15630 | (vec_select:V4QI |
15631 | (match_dup 1) | |
608dccd7 UB |
15632 | (parallel [(const_int 2) (const_int 6) |
15633 | (const_int 10) (const_int 14)]))) | |
8861ba4d | 15634 | (any_extend:V4SI |
43a8b705 HJ |
15635 | (vec_select:V4QI |
15636 | (match_dup 1) | |
608dccd7 UB |
15637 | (parallel [(const_int 3) (const_int 7) |
15638 | (const_int 11) (const_int 15)]))))))] | |
43a8b705 | 15639 | "TARGET_XOP" |
8861ba4d | 15640 | "vphadd<u>bd\t{%1, %0|%0, %1}" |
43a8b705 HJ |
15641 | [(set_attr "type" "sseiadd1")]) |
15642 | ||
8861ba4d | 15643 | (define_insn "xop_phadd<u>bq" |
43a8b705 HJ |
15644 | [(set (match_operand:V2DI 0 "register_operand" "=x") |
15645 | (plus:V2DI | |
15646 | (plus:V2DI | |
15647 | (plus:V2DI | |
8861ba4d | 15648 | (any_extend:V2DI |
43a8b705 HJ |
15649 | (vec_select:V2QI |
15650 | (match_operand:V16QI 1 "nonimmediate_operand" "xm") | |
920ac804 | 15651 | (parallel [(const_int 0) (const_int 8)]))) |
8861ba4d | 15652 | (any_extend:V2DI |
43a8b705 HJ |
15653 | (vec_select:V2QI |
15654 | (match_dup 1) | |
920ac804 | 15655 | (parallel [(const_int 1) (const_int 9)])))) |
43a8b705 | 15656 | (plus:V2DI |
8861ba4d | 15657 | (any_extend:V2DI |
43a8b705 HJ |
15658 | (vec_select:V2QI |
15659 | (match_dup 1) | |
920ac804 | 15660 | (parallel [(const_int 2) (const_int 10)]))) |
8861ba4d | 15661 | (any_extend:V2DI |
43a8b705 HJ |
15662 | (vec_select:V2QI |
15663 | (match_dup 1) | |
920ac804 | 15664 | (parallel [(const_int 3) (const_int 11)]))))) |
43a8b705 HJ |
15665 | (plus:V2DI |
15666 | (plus:V2DI | |
8861ba4d | 15667 | (any_extend:V2DI |
43a8b705 HJ |
15668 | (vec_select:V2QI |
15669 | (match_dup 1) | |
920ac804 | 15670 | (parallel [(const_int 4) (const_int 12)]))) |
8861ba4d | 15671 | (any_extend:V2DI |
43a8b705 HJ |
15672 | (vec_select:V2QI |
15673 | (match_dup 1) | |
920ac804 | 15674 | (parallel [(const_int 5) (const_int 13)])))) |
43a8b705 | 15675 | (plus:V2DI |
8861ba4d | 15676 | (any_extend:V2DI |
43a8b705 HJ |
15677 | (vec_select:V2QI |
15678 | (match_dup 1) | |
920ac804 | 15679 | (parallel [(const_int 6) (const_int 14)]))) |
8861ba4d | 15680 | (any_extend:V2DI |
43a8b705 HJ |
15681 | (vec_select:V2QI |
15682 | (match_dup 1) | |
920ac804 | 15683 | (parallel [(const_int 7) (const_int 15)])))))))] |
43a8b705 | 15684 | "TARGET_XOP" |
8861ba4d | 15685 | "vphadd<u>bq\t{%1, %0|%0, %1}" |
43a8b705 HJ |
15686 | [(set_attr "type" "sseiadd1")]) |
15687 | ||
8861ba4d | 15688 | (define_insn "xop_phadd<u>wd" |
43a8b705 HJ |
15689 | [(set (match_operand:V4SI 0 "register_operand" "=x") |
15690 | (plus:V4SI | |
8861ba4d | 15691 | (any_extend:V4SI |
43a8b705 HJ |
15692 | (vec_select:V4HI |
15693 | (match_operand:V8HI 1 "nonimmediate_operand" "xm") | |
608dccd7 UB |
15694 | (parallel [(const_int 0) (const_int 2) |
15695 | (const_int 4) (const_int 6)]))) | |
8861ba4d | 15696 | (any_extend:V4SI |
43a8b705 HJ |
15697 | (vec_select:V4HI |
15698 | (match_dup 1) | |
608dccd7 UB |
15699 | (parallel [(const_int 1) (const_int 3) |
15700 | (const_int 5) (const_int 7)])))))] | |
43a8b705 | 15701 | "TARGET_XOP" |
8861ba4d | 15702 | "vphadd<u>wd\t{%1, %0|%0, %1}" |
43a8b705 HJ |
15703 | [(set_attr "type" "sseiadd1")]) |
15704 | ||
8861ba4d | 15705 | (define_insn "xop_phadd<u>wq" |
43a8b705 HJ |
15706 | [(set (match_operand:V2DI 0 "register_operand" "=x") |
15707 | (plus:V2DI | |
15708 | (plus:V2DI | |
8861ba4d | 15709 | (any_extend:V2DI |
43a8b705 HJ |
15710 | (vec_select:V2HI |
15711 | (match_operand:V8HI 1 "nonimmediate_operand" "xm") | |
608dccd7 | 15712 | (parallel [(const_int 0) (const_int 4)]))) |
8861ba4d | 15713 | (any_extend:V2DI |
43a8b705 HJ |
15714 | (vec_select:V2HI |
15715 | (match_dup 1) | |
608dccd7 | 15716 | (parallel [(const_int 1) (const_int 5)])))) |
43a8b705 | 15717 | (plus:V2DI |
8861ba4d | 15718 | (any_extend:V2DI |
43a8b705 HJ |
15719 | (vec_select:V2HI |
15720 | (match_dup 1) | |
608dccd7 | 15721 | (parallel [(const_int 2) (const_int 6)]))) |
8861ba4d | 15722 | (any_extend:V2DI |
43a8b705 HJ |
15723 | (vec_select:V2HI |
15724 | (match_dup 1) | |
608dccd7 | 15725 | (parallel [(const_int 3) (const_int 7)]))))))] |
43a8b705 | 15726 | "TARGET_XOP" |
8861ba4d | 15727 | "vphadd<u>wq\t{%1, %0|%0, %1}" |
43a8b705 HJ |
15728 | [(set_attr "type" "sseiadd1")]) |
15729 | ||
8861ba4d | 15730 | (define_insn "xop_phadd<u>dq" |
43a8b705 HJ |
15731 | [(set (match_operand:V2DI 0 "register_operand" "=x") |
15732 | (plus:V2DI | |
8861ba4d | 15733 | (any_extend:V2DI |
43a8b705 HJ |
15734 | (vec_select:V2SI |
15735 | (match_operand:V4SI 1 "nonimmediate_operand" "xm") | |
608dccd7 | 15736 | (parallel [(const_int 0) (const_int 2)]))) |
8861ba4d | 15737 | (any_extend:V2DI |
43a8b705 HJ |
15738 | (vec_select:V2SI |
15739 | (match_dup 1) | |
608dccd7 | 15740 | (parallel [(const_int 1) (const_int 3)])))))] |
43a8b705 | 15741 | "TARGET_XOP" |
8861ba4d | 15742 | "vphadd<u>dq\t{%1, %0|%0, %1}" |
43a8b705 HJ |
15743 | [(set_attr "type" "sseiadd1")]) |
15744 | ||
15745 | (define_insn "xop_phsubbw" | |
15746 | [(set (match_operand:V8HI 0 "register_operand" "=x") | |
15747 | (minus:V8HI | |
15748 | (sign_extend:V8HI | |
15749 | (vec_select:V8QI | |
15750 | (match_operand:V16QI 1 "nonimmediate_operand" "xm") | |
608dccd7 UB |
15751 | (parallel [(const_int 0) (const_int 2) |
15752 | (const_int 4) (const_int 6) | |
15753 | (const_int 8) (const_int 10) | |
15754 | (const_int 12) (const_int 14)]))) | |
43a8b705 HJ |
15755 | (sign_extend:V8HI |
15756 | (vec_select:V8QI | |
15757 | (match_dup 1) | |
608dccd7 UB |
15758 | (parallel [(const_int 1) (const_int 3) |
15759 | (const_int 5) (const_int 7) | |
15760 | (const_int 9) (const_int 11) | |
15761 | (const_int 13) (const_int 15)])))))] | |
43a8b705 HJ |
15762 | "TARGET_XOP" |
15763 | "vphsubbw\t{%1, %0|%0, %1}" | |
15764 | [(set_attr "type" "sseiadd1")]) | |
15765 | ||
15766 | (define_insn "xop_phsubwd" | |
15767 | [(set (match_operand:V4SI 0 "register_operand" "=x") | |
15768 | (minus:V4SI | |
15769 | (sign_extend:V4SI | |
15770 | (vec_select:V4HI | |
15771 | (match_operand:V8HI 1 "nonimmediate_operand" "xm") | |
608dccd7 UB |
15772 | (parallel [(const_int 0) (const_int 2) |
15773 | (const_int 4) (const_int 6)]))) | |
43a8b705 HJ |
15774 | (sign_extend:V4SI |
15775 | (vec_select:V4HI | |
15776 | (match_dup 1) | |
608dccd7 UB |
15777 | (parallel [(const_int 1) (const_int 3) |
15778 | (const_int 5) (const_int 7)])))))] | |
43a8b705 HJ |
15779 | "TARGET_XOP" |
15780 | "vphsubwd\t{%1, %0|%0, %1}" | |
15781 | [(set_attr "type" "sseiadd1")]) | |
15782 | ||
15783 | (define_insn "xop_phsubdq" | |
15784 | [(set (match_operand:V2DI 0 "register_operand" "=x") | |
15785 | (minus:V2DI | |
15786 | (sign_extend:V2DI | |
15787 | (vec_select:V2SI | |
15788 | (match_operand:V4SI 1 "nonimmediate_operand" "xm") | |
608dccd7 | 15789 | (parallel [(const_int 0) (const_int 2)]))) |
43a8b705 HJ |
15790 | (sign_extend:V2DI |
15791 | (vec_select:V2SI | |
15792 | (match_dup 1) | |
608dccd7 | 15793 | (parallel [(const_int 1) (const_int 3)])))))] |
43a8b705 HJ |
15794 | "TARGET_XOP" |
15795 | "vphsubdq\t{%1, %0|%0, %1}" | |
15796 | [(set_attr "type" "sseiadd1")]) | |
15797 | ||
15798 | ;; XOP permute instructions | |
15799 | (define_insn "xop_pperm" | |
4926bb1d | 15800 | [(set (match_operand:V16QI 0 "register_operand" "=x,x") |
43a8b705 | 15801 | (unspec:V16QI |
4926bb1d SP |
15802 | [(match_operand:V16QI 1 "register_operand" "x,x") |
15803 | (match_operand:V16QI 2 "nonimmediate_operand" "x,m") | |
15804 | (match_operand:V16QI 3 "nonimmediate_operand" "xm,x")] | |
43a8b705 | 15805 | UNSPEC_XOP_PERMUTE))] |
4926bb1d | 15806 | "TARGET_XOP && !(MEM_P (operands[2]) && MEM_P (operands[3]))" |
43a8b705 HJ |
15807 | "vpperm\t{%3, %2, %1, %0|%0, %1, %2, %3}" |
15808 | [(set_attr "type" "sse4arg") | |
15809 | (set_attr "mode" "TI")]) | |
15810 | ||
15811 | ;; XOP pack instructions that combine two vectors into a smaller vector | |
15812 | (define_insn "xop_pperm_pack_v2di_v4si" | |
4926bb1d | 15813 | [(set (match_operand:V4SI 0 "register_operand" "=x,x") |
43a8b705 HJ |
15814 | (vec_concat:V4SI |
15815 | (truncate:V2SI | |
4926bb1d | 15816 | (match_operand:V2DI 1 "register_operand" "x,x")) |
43a8b705 | 15817 | (truncate:V2SI |
4926bb1d SP |
15818 | (match_operand:V2DI 2 "nonimmediate_operand" "x,m")))) |
15819 | (use (match_operand:V16QI 3 "nonimmediate_operand" "xm,x"))] | |
15820 | "TARGET_XOP && !(MEM_P (operands[2]) && MEM_P (operands[3]))" | |
43a8b705 HJ |
15821 | "vpperm\t{%3, %2, %1, %0|%0, %1, %2, %3}" |
15822 | [(set_attr "type" "sse4arg") | |
15823 | (set_attr "mode" "TI")]) | |
15824 | ||
15825 | (define_insn "xop_pperm_pack_v4si_v8hi" | |
4926bb1d | 15826 | [(set (match_operand:V8HI 0 "register_operand" "=x,x") |
43a8b705 HJ |
15827 | (vec_concat:V8HI |
15828 | (truncate:V4HI | |
4926bb1d | 15829 | (match_operand:V4SI 1 "register_operand" "x,x")) |
43a8b705 | 15830 | (truncate:V4HI |
4926bb1d SP |
15831 | (match_operand:V4SI 2 "nonimmediate_operand" "x,m")))) |
15832 | (use (match_operand:V16QI 3 "nonimmediate_operand" "xm,x"))] | |
15833 | "TARGET_XOP && !(MEM_P (operands[2]) && MEM_P (operands[3]))" | |
43a8b705 HJ |
15834 | "vpperm\t{%3, %2, %1, %0|%0, %1, %2, %3}" |
15835 | [(set_attr "type" "sse4arg") | |
15836 | (set_attr "mode" "TI")]) | |
15837 | ||
15838 | (define_insn "xop_pperm_pack_v8hi_v16qi" | |
4926bb1d | 15839 | [(set (match_operand:V16QI 0 "register_operand" "=x,x") |
43a8b705 HJ |
15840 | (vec_concat:V16QI |
15841 | (truncate:V8QI | |
4926bb1d | 15842 | (match_operand:V8HI 1 "register_operand" "x,x")) |
43a8b705 | 15843 | (truncate:V8QI |
4926bb1d SP |
15844 | (match_operand:V8HI 2 "nonimmediate_operand" "x,m")))) |
15845 | (use (match_operand:V16QI 3 "nonimmediate_operand" "xm,x"))] | |
15846 | "TARGET_XOP && !(MEM_P (operands[2]) && MEM_P (operands[3]))" | |
43a8b705 HJ |
15847 | "vpperm\t{%3, %2, %1, %0|%0, %1, %2, %3}" |
15848 | [(set_attr "type" "sse4arg") | |
15849 | (set_attr "mode" "TI")]) | |
15850 | ||
15851 | ;; XOP packed rotate instructions | |
15852 | (define_expand "rotl<mode>3" | |
82e86dc6 | 15853 | [(set (match_operand:VI_128 0 "register_operand") |
6bec6c98 | 15854 | (rotate:VI_128 |
82e86dc6 | 15855 | (match_operand:VI_128 1 "nonimmediate_operand") |
43a8b705 HJ |
15856 | (match_operand:SI 2 "general_operand")))] |
15857 | "TARGET_XOP" | |
15858 | { | |
15859 | /* If we were given a scalar, convert it to parallel */ | |
15860 | if (! const_0_to_<sserotatemax>_operand (operands[2], SImode)) | |
15861 | { | |
15862 | rtvec vs = rtvec_alloc (<ssescalarnum>); | |
15863 | rtx par = gen_rtx_PARALLEL (<MODE>mode, vs); | |
15864 | rtx reg = gen_reg_rtx (<MODE>mode); | |
15865 | rtx op2 = operands[2]; | |
15866 | int i; | |
15867 | ||
15868 | if (GET_MODE (op2) != <ssescalarmode>mode) | |
977e83a3 | 15869 | { |
43a8b705 HJ |
15870 | op2 = gen_reg_rtx (<ssescalarmode>mode); |
15871 | convert_move (op2, operands[2], false); | |
15872 | } | |
15873 | ||
15874 | for (i = 0; i < <ssescalarnum>; i++) | |
15875 | RTVEC_ELT (vs, i) = op2; | |
15876 | ||
15877 | emit_insn (gen_vec_init<mode> (reg, par)); | |
15878 | emit_insn (gen_xop_vrotl<mode>3 (operands[0], operands[1], reg)); | |
15879 | DONE; | |
15880 | } | |
15881 | }) | |
15882 | ||
15883 | (define_expand "rotr<mode>3" | |
82e86dc6 | 15884 | [(set (match_operand:VI_128 0 "register_operand") |
6bec6c98 | 15885 | (rotatert:VI_128 |
82e86dc6 | 15886 | (match_operand:VI_128 1 "nonimmediate_operand") |
43a8b705 HJ |
15887 | (match_operand:SI 2 "general_operand")))] |
15888 | "TARGET_XOP" | |
15889 | { | |
15890 | /* If we were given a scalar, convert it to parallel */ | |
15891 | if (! const_0_to_<sserotatemax>_operand (operands[2], SImode)) | |
15892 | { | |
15893 | rtvec vs = rtvec_alloc (<ssescalarnum>); | |
15894 | rtx par = gen_rtx_PARALLEL (<MODE>mode, vs); | |
15895 | rtx neg = gen_reg_rtx (<MODE>mode); | |
15896 | rtx reg = gen_reg_rtx (<MODE>mode); | |
15897 | rtx op2 = operands[2]; | |
15898 | int i; | |
15899 | ||
15900 | if (GET_MODE (op2) != <ssescalarmode>mode) | |
977e83a3 | 15901 | { |
43a8b705 HJ |
15902 | op2 = gen_reg_rtx (<ssescalarmode>mode); |
15903 | convert_move (op2, operands[2], false); | |
15904 | } | |
15905 | ||
15906 | for (i = 0; i < <ssescalarnum>; i++) | |
15907 | RTVEC_ELT (vs, i) = op2; | |
15908 | ||
15909 | emit_insn (gen_vec_init<mode> (reg, par)); | |
15910 | emit_insn (gen_neg<mode>2 (neg, reg)); | |
15911 | emit_insn (gen_xop_vrotl<mode>3 (operands[0], operands[1], neg)); | |
15912 | DONE; | |
15913 | } | |
15914 | }) | |
15915 | ||
15916 | (define_insn "xop_rotl<mode>3" | |
6bec6c98 UB |
15917 | [(set (match_operand:VI_128 0 "register_operand" "=x") |
15918 | (rotate:VI_128 | |
15919 | (match_operand:VI_128 1 "nonimmediate_operand" "xm") | |
43a8b705 HJ |
15920 | (match_operand:SI 2 "const_0_to_<sserotatemax>_operand" "n")))] |
15921 | "TARGET_XOP" | |
cbb734aa | 15922 | "vprot<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}" |
43a8b705 HJ |
15923 | [(set_attr "type" "sseishft") |
15924 | (set_attr "length_immediate" "1") | |
15925 | (set_attr "mode" "TI")]) | |
15926 | ||
15927 | (define_insn "xop_rotr<mode>3" | |
6bec6c98 UB |
15928 | [(set (match_operand:VI_128 0 "register_operand" "=x") |
15929 | (rotatert:VI_128 | |
15930 | (match_operand:VI_128 1 "nonimmediate_operand" "xm") | |
43a8b705 HJ |
15931 | (match_operand:SI 2 "const_0_to_<sserotatemax>_operand" "n")))] |
15932 | "TARGET_XOP" | |
15933 | { | |
ba8011e6 JJ |
15934 | operands[3] |
15935 | = GEN_INT (GET_MODE_BITSIZE (<ssescalarmode>mode) - INTVAL (operands[2])); | |
cbb734aa | 15936 | return \"vprot<ssemodesuffix>\t{%3, %1, %0|%0, %1, %3}\"; |
43a8b705 HJ |
15937 | } |
15938 | [(set_attr "type" "sseishft") | |
15939 | (set_attr "length_immediate" "1") | |
15940 | (set_attr "mode" "TI")]) | |
15941 | ||
15942 | (define_expand "vrotr<mode>3" | |
82e86dc6 UB |
15943 | [(match_operand:VI_128 0 "register_operand") |
15944 | (match_operand:VI_128 1 "register_operand") | |
15945 | (match_operand:VI_128 2 "register_operand")] | |
43a8b705 HJ |
15946 | "TARGET_XOP" |
15947 | { | |
15948 | rtx reg = gen_reg_rtx (<MODE>mode); | |
15949 | emit_insn (gen_neg<mode>2 (reg, operands[2])); | |
15950 | emit_insn (gen_xop_vrotl<mode>3 (operands[0], operands[1], reg)); | |
15951 | DONE; | |
15952 | }) | |
15953 | ||
15954 | (define_expand "vrotl<mode>3" | |
82e86dc6 UB |
15955 | [(match_operand:VI_128 0 "register_operand") |
15956 | (match_operand:VI_128 1 "register_operand") | |
15957 | (match_operand:VI_128 2 "register_operand")] | |
43a8b705 HJ |
15958 | "TARGET_XOP" |
15959 | { | |
15960 | emit_insn (gen_xop_vrotl<mode>3 (operands[0], operands[1], operands[2])); | |
15961 | DONE; | |
15962 | }) | |
15963 | ||
15964 | (define_insn "xop_vrotl<mode>3" | |
6bec6c98 UB |
15965 | [(set (match_operand:VI_128 0 "register_operand" "=x,x") |
15966 | (if_then_else:VI_128 | |
15967 | (ge:VI_128 | |
15968 | (match_operand:VI_128 2 "nonimmediate_operand" "x,m") | |
43a8b705 | 15969 | (const_int 0)) |
6bec6c98 UB |
15970 | (rotate:VI_128 |
15971 | (match_operand:VI_128 1 "nonimmediate_operand" "xm,x") | |
43a8b705 | 15972 | (match_dup 2)) |
6bec6c98 | 15973 | (rotatert:VI_128 |
43a8b705 | 15974 | (match_dup 1) |
6bec6c98 | 15975 | (neg:VI_128 (match_dup 2)))))] |
4926bb1d | 15976 | "TARGET_XOP && !(MEM_P (operands[1]) && MEM_P (operands[2]))" |
cbb734aa | 15977 | "vprot<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}" |
43a8b705 HJ |
15978 | [(set_attr "type" "sseishft") |
15979 | (set_attr "prefix_data16" "0") | |
15980 | (set_attr "prefix_extra" "2") | |
15981 | (set_attr "mode" "TI")]) | |
15982 | ||
15983 | ;; XOP packed shift instructions. | |
43a8b705 | 15984 | (define_expand "vlshr<mode>3" |
82e86dc6 | 15985 | [(set (match_operand:VI12_128 0 "register_operand") |
c4ab64c6 | 15986 | (lshiftrt:VI12_128 |
82e86dc6 UB |
15987 | (match_operand:VI12_128 1 "register_operand") |
15988 | (match_operand:VI12_128 2 "nonimmediate_operand")))] | |
43a8b705 HJ |
15989 | "TARGET_XOP" |
15990 | { | |
15991 | rtx neg = gen_reg_rtx (<MODE>mode); | |
15992 | emit_insn (gen_neg<mode>2 (neg, operands[2])); | |
b99f906a | 15993 | emit_insn (gen_xop_shl<mode>3 (operands[0], operands[1], neg)); |
43a8b705 HJ |
15994 | DONE; |
15995 | }) | |
15996 | ||
ee3b466d | 15997 | (define_expand "vlshr<mode>3" |
82e86dc6 | 15998 | [(set (match_operand:VI48_128 0 "register_operand") |
ee3b466d | 15999 | (lshiftrt:VI48_128 |
82e86dc6 UB |
16000 | (match_operand:VI48_128 1 "register_operand") |
16001 | (match_operand:VI48_128 2 "nonimmediate_operand")))] | |
ee3b466d JJ |
16002 | "TARGET_AVX2 || TARGET_XOP" |
16003 | { | |
16004 | if (!TARGET_AVX2) | |
16005 | { | |
16006 | rtx neg = gen_reg_rtx (<MODE>mode); | |
16007 | emit_insn (gen_neg<mode>2 (neg, operands[2])); | |
b99f906a | 16008 | emit_insn (gen_xop_shl<mode>3 (operands[0], operands[1], neg)); |
ee3b466d JJ |
16009 | DONE; |
16010 | } | |
16011 | }) | |
16012 | ||
0fe65b75 AI |
16013 | (define_expand "vlshr<mode>3" |
16014 | [(set (match_operand:VI48_512 0 "register_operand") | |
16015 | (lshiftrt:VI48_512 | |
16016 | (match_operand:VI48_512 1 "register_operand") | |
16017 | (match_operand:VI48_512 2 "nonimmediate_operand")))] | |
16018 | "TARGET_AVX512F") | |
16019 | ||
ee3b466d | 16020 | (define_expand "vlshr<mode>3" |
82e86dc6 | 16021 | [(set (match_operand:VI48_256 0 "register_operand") |
ee3b466d | 16022 | (lshiftrt:VI48_256 |
82e86dc6 UB |
16023 | (match_operand:VI48_256 1 "register_operand") |
16024 | (match_operand:VI48_256 2 "nonimmediate_operand")))] | |
ee3b466d JJ |
16025 | "TARGET_AVX2") |
16026 | ||
b92883d6 IT |
16027 | (define_expand "vashrv8hi3<mask_name>" |
16028 | [(set (match_operand:V8HI 0 "register_operand") | |
16029 | (ashiftrt:V8HI | |
16030 | (match_operand:V8HI 1 "register_operand") | |
16031 | (match_operand:V8HI 2 "nonimmediate_operand")))] | |
06ba0585 | 16032 | "TARGET_XOP || (TARGET_AVX512BW && TARGET_AVX512VL)" |
43a8b705 | 16033 | { |
06ba0585 AI |
16034 | if (TARGET_XOP) |
16035 | { | |
b92883d6 IT |
16036 | rtx neg = gen_reg_rtx (V8HImode); |
16037 | emit_insn (gen_negv8hi2 (neg, operands[2])); | |
16038 | emit_insn (gen_xop_shav8hi3 (operands[0], operands[1], neg)); | |
06ba0585 AI |
16039 | DONE; |
16040 | } | |
16041 | }) | |
16042 | ||
b92883d6 IT |
16043 | (define_expand "vashrv16qi3" |
16044 | [(set (match_operand:V16QI 0 "register_operand") | |
16045 | (ashiftrt:V16QI | |
16046 | (match_operand:V16QI 1 "register_operand") | |
16047 | (match_operand:V16QI 2 "nonimmediate_operand")))] | |
16048 | "TARGET_XOP" | |
16049 | { | |
16050 | rtx neg = gen_reg_rtx (V16QImode); | |
16051 | emit_insn (gen_negv16qi2 (neg, operands[2])); | |
16052 | emit_insn (gen_xop_shav16qi3 (operands[0], operands[1], neg)); | |
16053 | DONE; | |
16054 | }) | |
16055 | ||
06ba0585 AI |
16056 | (define_expand "vashrv2di3<mask_name>" |
16057 | [(set (match_operand:V2DI 0 "register_operand") | |
16058 | (ashiftrt:V2DI | |
16059 | (match_operand:V2DI 1 "register_operand") | |
16060 | (match_operand:V2DI 2 "nonimmediate_operand")))] | |
16061 | "TARGET_XOP || TARGET_AVX512VL" | |
16062 | { | |
16063 | if (TARGET_XOP) | |
16064 | { | |
16065 | rtx neg = gen_reg_rtx (V2DImode); | |
16066 | emit_insn (gen_negv2di2 (neg, operands[2])); | |
16067 | emit_insn (gen_xop_shav2di3 (operands[0], operands[1], neg)); | |
16068 | DONE; | |
16069 | } | |
43a8b705 HJ |
16070 | }) |
16071 | ||
ee3b466d | 16072 | (define_expand "vashrv4si3" |
82e86dc6 UB |
16073 | [(set (match_operand:V4SI 0 "register_operand") |
16074 | (ashiftrt:V4SI (match_operand:V4SI 1 "register_operand") | |
16075 | (match_operand:V4SI 2 "nonimmediate_operand")))] | |
ee3b466d JJ |
16076 | "TARGET_AVX2 || TARGET_XOP" |
16077 | { | |
16078 | if (!TARGET_AVX2) | |
16079 | { | |
16080 | rtx neg = gen_reg_rtx (V4SImode); | |
16081 | emit_insn (gen_negv4si2 (neg, operands[2])); | |
b99f906a | 16082 | emit_insn (gen_xop_shav4si3 (operands[0], operands[1], neg)); |
ee3b466d JJ |
16083 | DONE; |
16084 | } | |
16085 | }) | |
16086 | ||
b868b7ca AI |
16087 | (define_expand "vashrv16si3" |
16088 | [(set (match_operand:V16SI 0 "register_operand") | |
16089 | (ashiftrt:V16SI (match_operand:V16SI 1 "register_operand") | |
16090 | (match_operand:V16SI 2 "nonimmediate_operand")))] | |
16091 | "TARGET_AVX512F") | |
16092 | ||
ee3b466d | 16093 | (define_expand "vashrv8si3" |
82e86dc6 UB |
16094 | [(set (match_operand:V8SI 0 "register_operand") |
16095 | (ashiftrt:V8SI (match_operand:V8SI 1 "register_operand") | |
16096 | (match_operand:V8SI 2 "nonimmediate_operand")))] | |
ee3b466d JJ |
16097 | "TARGET_AVX2") |
16098 | ||
43a8b705 | 16099 | (define_expand "vashl<mode>3" |
82e86dc6 | 16100 | [(set (match_operand:VI12_128 0 "register_operand") |
c4ab64c6 | 16101 | (ashift:VI12_128 |
82e86dc6 UB |
16102 | (match_operand:VI12_128 1 "register_operand") |
16103 | (match_operand:VI12_128 2 "nonimmediate_operand")))] | |
43a8b705 HJ |
16104 | "TARGET_XOP" |
16105 | { | |
b99f906a | 16106 | emit_insn (gen_xop_sha<mode>3 (operands[0], operands[1], operands[2])); |
43a8b705 HJ |
16107 | DONE; |
16108 | }) | |
16109 | ||
ee3b466d | 16110 | (define_expand "vashl<mode>3" |
82e86dc6 | 16111 | [(set (match_operand:VI48_128 0 "register_operand") |
ee3b466d | 16112 | (ashift:VI48_128 |
82e86dc6 UB |
16113 | (match_operand:VI48_128 1 "register_operand") |
16114 | (match_operand:VI48_128 2 "nonimmediate_operand")))] | |
ee3b466d JJ |
16115 | "TARGET_AVX2 || TARGET_XOP" |
16116 | { | |
16117 | if (!TARGET_AVX2) | |
16118 | { | |
16119 | operands[2] = force_reg (<MODE>mode, operands[2]); | |
b99f906a | 16120 | emit_insn (gen_xop_sha<mode>3 (operands[0], operands[1], operands[2])); |
ee3b466d JJ |
16121 | DONE; |
16122 | } | |
16123 | }) | |
16124 | ||
0fe65b75 AI |
16125 | (define_expand "vashl<mode>3" |
16126 | [(set (match_operand:VI48_512 0 "register_operand") | |
16127 | (ashift:VI48_512 | |
16128 | (match_operand:VI48_512 1 "register_operand") | |
16129 | (match_operand:VI48_512 2 "nonimmediate_operand")))] | |
16130 | "TARGET_AVX512F") | |
16131 | ||
ee3b466d | 16132 | (define_expand "vashl<mode>3" |
82e86dc6 | 16133 | [(set (match_operand:VI48_256 0 "register_operand") |
ee3b466d | 16134 | (ashift:VI48_256 |
82e86dc6 UB |
16135 | (match_operand:VI48_256 1 "register_operand") |
16136 | (match_operand:VI48_256 2 "nonimmediate_operand")))] | |
ee3b466d JJ |
16137 | "TARGET_AVX2") |
16138 | ||
b99f906a | 16139 | (define_insn "xop_sha<mode>3" |
6bec6c98 UB |
16140 | [(set (match_operand:VI_128 0 "register_operand" "=x,x") |
16141 | (if_then_else:VI_128 | |
16142 | (ge:VI_128 | |
16143 | (match_operand:VI_128 2 "nonimmediate_operand" "x,m") | |
43a8b705 | 16144 | (const_int 0)) |
6bec6c98 UB |
16145 | (ashift:VI_128 |
16146 | (match_operand:VI_128 1 "nonimmediate_operand" "xm,x") | |
43a8b705 | 16147 | (match_dup 2)) |
6bec6c98 | 16148 | (ashiftrt:VI_128 |
43a8b705 | 16149 | (match_dup 1) |
6bec6c98 | 16150 | (neg:VI_128 (match_dup 2)))))] |
4926bb1d | 16151 | "TARGET_XOP && !(MEM_P (operands[1]) && MEM_P (operands[2]))" |
cbb734aa | 16152 | "vpsha<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}" |
43a8b705 HJ |
16153 | [(set_attr "type" "sseishft") |
16154 | (set_attr "prefix_data16" "0") | |
16155 | (set_attr "prefix_extra" "2") | |
16156 | (set_attr "mode" "TI")]) | |
16157 | ||
b99f906a | 16158 | (define_insn "xop_shl<mode>3" |
6bec6c98 UB |
16159 | [(set (match_operand:VI_128 0 "register_operand" "=x,x") |
16160 | (if_then_else:VI_128 | |
16161 | (ge:VI_128 | |
16162 | (match_operand:VI_128 2 "nonimmediate_operand" "x,m") | |
43a8b705 | 16163 | (const_int 0)) |
6bec6c98 UB |
16164 | (ashift:VI_128 |
16165 | (match_operand:VI_128 1 "nonimmediate_operand" "xm,x") | |
43a8b705 | 16166 | (match_dup 2)) |
6bec6c98 | 16167 | (lshiftrt:VI_128 |
43a8b705 | 16168 | (match_dup 1) |
6bec6c98 | 16169 | (neg:VI_128 (match_dup 2)))))] |
4926bb1d | 16170 | "TARGET_XOP && !(MEM_P (operands[1]) && MEM_P (operands[2]))" |
cbb734aa | 16171 | "vpshl<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}" |
43a8b705 HJ |
16172 | [(set_attr "type" "sseishft") |
16173 | (set_attr "prefix_data16" "0") | |
16174 | (set_attr "prefix_extra" "2") | |
16175 | (set_attr "mode" "TI")]) | |
16176 | ||
2d542a9f | 16177 | (define_expand "<shift_insn><mode>3" |
f5db965f IT |
16178 | [(set (match_operand:VI1_AVX512 0 "register_operand") |
16179 | (any_shift:VI1_AVX512 | |
16180 | (match_operand:VI1_AVX512 1 "register_operand") | |
82e86dc6 | 16181 | (match_operand:SI 2 "nonmemory_operand")))] |
2d542a9f | 16182 | "TARGET_SSE2" |
43a8b705 | 16183 | { |
2d542a9f RH |
16184 | if (TARGET_XOP && <MODE>mode == V16QImode) |
16185 | { | |
16186 | bool negate = false; | |
16187 | rtx (*gen) (rtx, rtx, rtx); | |
16188 | rtx tmp, par; | |
16189 | int i; | |
f327a48e | 16190 | |
2d542a9f RH |
16191 | if (<CODE> != ASHIFT) |
16192 | { | |
16193 | if (CONST_INT_P (operands[2])) | |
16194 | operands[2] = GEN_INT (-INTVAL (operands[2])); | |
16195 | else | |
16196 | negate = true; | |
16197 | } | |
16198 | par = gen_rtx_PARALLEL (V16QImode, rtvec_alloc (16)); | |
16199 | for (i = 0; i < 16; i++) | |
16200 | XVECEXP (par, 0, i) = operands[2]; | |
43a8b705 | 16201 | |
2d542a9f RH |
16202 | tmp = gen_reg_rtx (V16QImode); |
16203 | emit_insn (gen_vec_initv16qi (tmp, par)); | |
43a8b705 | 16204 | |
2d542a9f RH |
16205 | if (negate) |
16206 | emit_insn (gen_negv16qi2 (tmp, tmp)); | |
f327a48e | 16207 | |
2d542a9f RH |
16208 | gen = (<CODE> == LSHIFTRT ? gen_xop_shlv16qi3 : gen_xop_shav16qi3); |
16209 | emit_insn (gen (operands[0], operands[1], tmp)); | |
16210 | } | |
43a8b705 | 16211 | else |
2d542a9f | 16212 | ix86_expand_vecop_qihi (<CODE>, operands[0], operands[1], operands[2]); |
43a8b705 HJ |
16213 | DONE; |
16214 | }) | |
16215 | ||
16216 | (define_expand "ashrv2di3" | |
82e86dc6 | 16217 | [(set (match_operand:V2DI 0 "register_operand") |
1162730f | 16218 | (ashiftrt:V2DI |
82e86dc6 UB |
16219 | (match_operand:V2DI 1 "register_operand") |
16220 | (match_operand:DI 2 "nonmemory_operand")))] | |
28e9a294 | 16221 | "TARGET_XOP || TARGET_AVX512VL" |
43a8b705 | 16222 | { |
28e9a294 AI |
16223 | if (!TARGET_AVX512VL) |
16224 | { | |
16225 | rtx reg = gen_reg_rtx (V2DImode); | |
16226 | rtx par; | |
16227 | bool negate = false; | |
16228 | int i; | |
43a8b705 | 16229 | |
28e9a294 AI |
16230 | if (CONST_INT_P (operands[2])) |
16231 | operands[2] = GEN_INT (-INTVAL (operands[2])); | |
16232 | else | |
16233 | negate = true; | |
f327a48e | 16234 | |
28e9a294 AI |
16235 | par = gen_rtx_PARALLEL (V2DImode, rtvec_alloc (2)); |
16236 | for (i = 0; i < 2; i++) | |
16237 | XVECEXP (par, 0, i) = operands[2]; | |
43a8b705 | 16238 | |
28e9a294 | 16239 | emit_insn (gen_vec_initv2di (reg, par)); |
f327a48e | 16240 | |
28e9a294 AI |
16241 | if (negate) |
16242 | emit_insn (gen_negv2di2 (reg, reg)); | |
f327a48e | 16243 | |
28e9a294 AI |
16244 | emit_insn (gen_xop_shav2di3 (operands[0], operands[1], reg)); |
16245 | DONE; | |
16246 | } | |
43a8b705 HJ |
16247 | }) |
16248 | ||
16249 | ;; XOP FRCZ support | |
43a8b705 | 16250 | (define_insn "xop_frcz<mode>2" |
89509419 RH |
16251 | [(set (match_operand:FMAMODE 0 "register_operand" "=x") |
16252 | (unspec:FMAMODE | |
16253 | [(match_operand:FMAMODE 1 "nonimmediate_operand" "xm")] | |
43a8b705 HJ |
16254 | UNSPEC_FRCZ))] |
16255 | "TARGET_XOP" | |
1c154a23 | 16256 | "vfrcz<ssemodesuffix>\t{%1, %0|%0, %1}" |
43a8b705 HJ |
16257 | [(set_attr "type" "ssecvt1") |
16258 | (set_attr "mode" "<MODE>")]) | |
16259 | ||
89509419 | 16260 | (define_expand "xop_vmfrcz<mode>2" |
6bec6c98 UB |
16261 | [(set (match_operand:VF_128 0 "register_operand") |
16262 | (vec_merge:VF_128 | |
16263 | (unspec:VF_128 | |
16264 | [(match_operand:VF_128 1 "nonimmediate_operand")] | |
43a8b705 | 16265 | UNSPEC_FRCZ) |
1287ae50 | 16266 | (match_dup 2) |
43a8b705 HJ |
16267 | (const_int 1)))] |
16268 | "TARGET_XOP" | |
1287ae50 | 16269 | "operands[2] = CONST0_RTX (<MODE>mode);") |
43a8b705 | 16270 | |
b84acf54 | 16271 | (define_insn "*xop_vmfrcz<mode>2" |
6bec6c98 UB |
16272 | [(set (match_operand:VF_128 0 "register_operand" "=x") |
16273 | (vec_merge:VF_128 | |
16274 | (unspec:VF_128 | |
16275 | [(match_operand:VF_128 1 "nonimmediate_operand" "xm")] | |
89509419 | 16276 | UNSPEC_FRCZ) |
6bec6c98 | 16277 | (match_operand:VF_128 2 "const0_operand") |
89509419 | 16278 | (const_int 1)))] |
43a8b705 | 16279 | "TARGET_XOP" |
eabb5f48 | 16280 | "vfrcz<ssescalarmodesuffix>\t{%1, %0|%0, %<iptr>1}" |
43a8b705 HJ |
16281 | [(set_attr "type" "ssecvt1") |
16282 | (set_attr "mode" "<MODE>")]) | |
16283 | ||
16284 | (define_insn "xop_maskcmp<mode>3" | |
6bec6c98 UB |
16285 | [(set (match_operand:VI_128 0 "register_operand" "=x") |
16286 | (match_operator:VI_128 1 "ix86_comparison_int_operator" | |
16287 | [(match_operand:VI_128 2 "register_operand" "x") | |
16288 | (match_operand:VI_128 3 "nonimmediate_operand" "xm")]))] | |
43a8b705 | 16289 | "TARGET_XOP" |
cbb734aa | 16290 | "vpcom%Y1<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}" |
43a8b705 HJ |
16291 | [(set_attr "type" "sse4arg") |
16292 | (set_attr "prefix_data16" "0") | |
16293 | (set_attr "prefix_rep" "0") | |
16294 | (set_attr "prefix_extra" "2") | |
16295 | (set_attr "length_immediate" "1") | |
16296 | (set_attr "mode" "TI")]) | |
16297 | ||
16298 | (define_insn "xop_maskcmp_uns<mode>3" | |
6bec6c98 UB |
16299 | [(set (match_operand:VI_128 0 "register_operand" "=x") |
16300 | (match_operator:VI_128 1 "ix86_comparison_uns_operator" | |
16301 | [(match_operand:VI_128 2 "register_operand" "x") | |
16302 | (match_operand:VI_128 3 "nonimmediate_operand" "xm")]))] | |
43a8b705 | 16303 | "TARGET_XOP" |
cbb734aa | 16304 | "vpcom%Y1u<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}" |
43a8b705 HJ |
16305 | [(set_attr "type" "ssecmp") |
16306 | (set_attr "prefix_data16" "0") | |
16307 | (set_attr "prefix_rep" "0") | |
16308 | (set_attr "prefix_extra" "2") | |
16309 | (set_attr "length_immediate" "1") | |
16310 | (set_attr "mode" "TI")]) | |
16311 | ||
16312 | ;; Version of pcom*u* that is called from the intrinsics that allows pcomequ* | |
16313 | ;; and pcomneu* not to be converted to the signed ones in case somebody needs | |
16314 | ;; the exact instruction generated for the intrinsic. | |
16315 | (define_insn "xop_maskcmp_uns2<mode>3" | |
6bec6c98 UB |
16316 | [(set (match_operand:VI_128 0 "register_operand" "=x") |
16317 | (unspec:VI_128 | |
16318 | [(match_operator:VI_128 1 "ix86_comparison_uns_operator" | |
16319 | [(match_operand:VI_128 2 "register_operand" "x") | |
16320 | (match_operand:VI_128 3 "nonimmediate_operand" "xm")])] | |
43a8b705 HJ |
16321 | UNSPEC_XOP_UNSIGNED_CMP))] |
16322 | "TARGET_XOP" | |
cbb734aa | 16323 | "vpcom%Y1u<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}" |
43a8b705 HJ |
16324 | [(set_attr "type" "ssecmp") |
16325 | (set_attr "prefix_data16" "0") | |
16326 | (set_attr "prefix_extra" "2") | |
16327 | (set_attr "length_immediate" "1") | |
16328 | (set_attr "mode" "TI")]) | |
16329 | ||
16330 | ;; Pcomtrue and pcomfalse support. These are useless instructions, but are | |
16331 | ;; being added here to be complete. | |
16332 | (define_insn "xop_pcom_tf<mode>3" | |
6bec6c98 UB |
16333 | [(set (match_operand:VI_128 0 "register_operand" "=x") |
16334 | (unspec:VI_128 | |
16335 | [(match_operand:VI_128 1 "register_operand" "x") | |
16336 | (match_operand:VI_128 2 "nonimmediate_operand" "xm") | |
43a8b705 HJ |
16337 | (match_operand:SI 3 "const_int_operand" "n")] |
16338 | UNSPEC_XOP_TRUEFALSE))] | |
16339 | "TARGET_XOP" | |
16340 | { | |
16341 | return ((INTVAL (operands[3]) != 0) | |
cbb734aa UB |
16342 | ? "vpcomtrue<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}" |
16343 | : "vpcomfalse<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"); | |
43a8b705 HJ |
16344 | } |
16345 | [(set_attr "type" "ssecmp") | |
16346 | (set_attr "prefix_data16" "0") | |
16347 | (set_attr "prefix_extra" "2") | |
16348 | (set_attr "length_immediate" "1") | |
16349 | (set_attr "mode" "TI")]) | |
16350 | ||
02edd2f6 | 16351 | (define_insn "xop_vpermil2<mode>3" |
b86f6e9e AI |
16352 | [(set (match_operand:VF_128_256 0 "register_operand" "=x") |
16353 | (unspec:VF_128_256 | |
16354 | [(match_operand:VF_128_256 1 "register_operand" "x") | |
16355 | (match_operand:VF_128_256 2 "nonimmediate_operand" "%x") | |
cbb734aa | 16356 | (match_operand:<sseintvecmode> 3 "nonimmediate_operand" "xm") |
02edd2f6 SP |
16357 | (match_operand:SI 4 "const_0_to_3_operand" "n")] |
16358 | UNSPEC_VPERMIL2))] | |
16359 | "TARGET_XOP" | |
1c154a23 | 16360 | "vpermil2<ssemodesuffix>\t{%4, %3, %2, %1, %0|%0, %1, %2, %3, %4}" |
02edd2f6 SP |
16361 | [(set_attr "type" "sse4arg") |
16362 | (set_attr "length_immediate" "1") | |
16363 | (set_attr "mode" "<MODE>")]) | |
16364 | ||
43a8b705 | 16365 | ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; |
95879c72 | 16366 | |
8b96a312 | 16367 | (define_insn "aesenc" |
5e60198b UB |
16368 | [(set (match_operand:V2DI 0 "register_operand" "=x,x") |
16369 | (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0,x") | |
16370 | (match_operand:V2DI 2 "nonimmediate_operand" "xm,xm")] | |
8b96a312 L |
16371 | UNSPEC_AESENC))] |
16372 | "TARGET_AES" | |
5e60198b UB |
16373 | "@ |
16374 | aesenc\t{%2, %0|%0, %2} | |
16375 | vaesenc\t{%2, %1, %0|%0, %1, %2}" | |
16376 | [(set_attr "isa" "noavx,avx") | |
16377 | (set_attr "type" "sselog1") | |
725fd454 | 16378 | (set_attr "prefix_extra" "1") |
5e60198b | 16379 | (set_attr "prefix" "orig,vex") |
01284895 | 16380 | (set_attr "btver2_decode" "double,double") |
95879c72 L |
16381 | (set_attr "mode" "TI")]) |
16382 | ||
8b96a312 | 16383 | (define_insn "aesenclast" |
5e60198b UB |
16384 | [(set (match_operand:V2DI 0 "register_operand" "=x,x") |
16385 | (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0,x") | |
16386 | (match_operand:V2DI 2 "nonimmediate_operand" "xm,xm")] | |
8b96a312 L |
16387 | UNSPEC_AESENCLAST))] |
16388 | "TARGET_AES" | |
5e60198b UB |
16389 | "@ |
16390 | aesenclast\t{%2, %0|%0, %2} | |
16391 | vaesenclast\t{%2, %1, %0|%0, %1, %2}" | |
16392 | [(set_attr "isa" "noavx,avx") | |
16393 | (set_attr "type" "sselog1") | |
725fd454 | 16394 | (set_attr "prefix_extra" "1") |
5e60198b | 16395 | (set_attr "prefix" "orig,vex") |
01284895 | 16396 | (set_attr "btver2_decode" "double,double") |
95879c72 L |
16397 | (set_attr "mode" "TI")]) |
16398 | ||
8b96a312 | 16399 | (define_insn "aesdec" |
5e60198b UB |
16400 | [(set (match_operand:V2DI 0 "register_operand" "=x,x") |
16401 | (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0,x") | |
16402 | (match_operand:V2DI 2 "nonimmediate_operand" "xm,xm")] | |
8b96a312 L |
16403 | UNSPEC_AESDEC))] |
16404 | "TARGET_AES" | |
5e60198b UB |
16405 | "@ |
16406 | aesdec\t{%2, %0|%0, %2} | |
16407 | vaesdec\t{%2, %1, %0|%0, %1, %2}" | |
16408 | [(set_attr "isa" "noavx,avx") | |
16409 | (set_attr "type" "sselog1") | |
725fd454 | 16410 | (set_attr "prefix_extra" "1") |
5e60198b | 16411 | (set_attr "prefix" "orig,vex") |
01284895 | 16412 | (set_attr "btver2_decode" "double,double") |
95879c72 L |
16413 | (set_attr "mode" "TI")]) |
16414 | ||
8b96a312 | 16415 | (define_insn "aesdeclast" |
5e60198b UB |
16416 | [(set (match_operand:V2DI 0 "register_operand" "=x,x") |
16417 | (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0,x") | |
16418 | (match_operand:V2DI 2 "nonimmediate_operand" "xm,xm")] | |
8b96a312 L |
16419 | UNSPEC_AESDECLAST))] |
16420 | "TARGET_AES" | |
5e60198b UB |
16421 | "@ |
16422 | aesdeclast\t{%2, %0|%0, %2} | |
16423 | vaesdeclast\t{%2, %1, %0|%0, %1, %2}" | |
16424 | [(set_attr "isa" "noavx,avx") | |
16425 | (set_attr "type" "sselog1") | |
8b96a312 | 16426 | (set_attr "prefix_extra" "1") |
5e60198b | 16427 | (set_attr "prefix" "orig,vex") |
01284895 | 16428 | (set_attr "btver2_decode" "double,double") |
8b96a312 L |
16429 | (set_attr "mode" "TI")]) |
16430 | ||
16431 | (define_insn "aesimc" | |
16432 | [(set (match_operand:V2DI 0 "register_operand" "=x") | |
16433 | (unspec:V2DI [(match_operand:V2DI 1 "nonimmediate_operand" "xm")] | |
16434 | UNSPEC_AESIMC))] | |
16435 | "TARGET_AES" | |
95879c72 | 16436 | "%vaesimc\t{%1, %0|%0, %1}" |
8b96a312 L |
16437 | [(set_attr "type" "sselog1") |
16438 | (set_attr "prefix_extra" "1") | |
95879c72 | 16439 | (set_attr "prefix" "maybe_vex") |
8b96a312 L |
16440 | (set_attr "mode" "TI")]) |
16441 | ||
16442 | (define_insn "aeskeygenassist" | |
16443 | [(set (match_operand:V2DI 0 "register_operand" "=x") | |
16444 | (unspec:V2DI [(match_operand:V2DI 1 "nonimmediate_operand" "xm") | |
16445 | (match_operand:SI 2 "const_0_to_255_operand" "n")] | |
16446 | UNSPEC_AESKEYGENASSIST))] | |
16447 | "TARGET_AES" | |
95879c72 | 16448 | "%vaeskeygenassist\t{%2, %1, %0|%0, %1, %2}" |
8b96a312 L |
16449 | [(set_attr "type" "sselog1") |
16450 | (set_attr "prefix_extra" "1") | |
725fd454 | 16451 | (set_attr "length_immediate" "1") |
95879c72 | 16452 | (set_attr "prefix" "maybe_vex") |
8b96a312 L |
16453 | (set_attr "mode" "TI")]) |
16454 | ||
16455 | (define_insn "pclmulqdq" | |
5e60198b UB |
16456 | [(set (match_operand:V2DI 0 "register_operand" "=x,x") |
16457 | (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0,x") | |
16458 | (match_operand:V2DI 2 "nonimmediate_operand" "xm,xm") | |
16459 | (match_operand:SI 3 "const_0_to_255_operand" "n,n")] | |
8b96a312 L |
16460 | UNSPEC_PCLMUL))] |
16461 | "TARGET_PCLMUL" | |
5e60198b UB |
16462 | "@ |
16463 | pclmulqdq\t{%3, %2, %0|%0, %2, %3} | |
16464 | vpclmulqdq\t{%3, %2, %1, %0|%0, %1, %2, %3}" | |
16465 | [(set_attr "isa" "noavx,avx") | |
16466 | (set_attr "type" "sselog1") | |
8b96a312 | 16467 | (set_attr "prefix_extra" "1") |
725fd454 | 16468 | (set_attr "length_immediate" "1") |
5e60198b | 16469 | (set_attr "prefix" "orig,vex") |
8b96a312 | 16470 | (set_attr "mode" "TI")]) |
95879c72 L |
16471 | |
16472 | (define_expand "avx_vzeroall" | |
16473 | [(match_par_dup 0 [(const_int 0)])] | |
16474 | "TARGET_AVX" | |
16475 | { | |
16476 | int nregs = TARGET_64BIT ? 16 : 8; | |
16477 | int regno; | |
16478 | ||
16479 | operands[0] = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (nregs + 1)); | |
16480 | ||
16481 | XVECEXP (operands[0], 0, 0) | |
16482 | = gen_rtx_UNSPEC_VOLATILE (VOIDmode, gen_rtvec (1, const0_rtx), | |
16483 | UNSPECV_VZEROALL); | |
16484 | ||
16485 | for (regno = 0; regno < nregs; regno++) | |
16486 | XVECEXP (operands[0], 0, regno + 1) | |
f7df4a84 | 16487 | = gen_rtx_SET (gen_rtx_REG (V8SImode, SSE_REGNO (regno)), |
95879c72 L |
16488 | CONST0_RTX (V8SImode)); |
16489 | }) | |
16490 | ||
16491 | (define_insn "*avx_vzeroall" | |
16492 | [(match_parallel 0 "vzeroall_operation" | |
85b1d1bd | 16493 | [(unspec_volatile [(const_int 0)] UNSPECV_VZEROALL)])] |
95879c72 L |
16494 | "TARGET_AVX" |
16495 | "vzeroall" | |
16496 | [(set_attr "type" "sse") | |
725fd454 | 16497 | (set_attr "modrm" "0") |
95879c72 L |
16498 | (set_attr "memory" "none") |
16499 | (set_attr "prefix" "vex") | |
01284895 | 16500 | (set_attr "btver2_decode" "vector") |
95879c72 L |
16501 | (set_attr "mode" "OI")]) |
16502 | ||
2767a7f2 L |
16503 | ;; Clear the upper 128bits of AVX registers, equivalent to a NOP |
16504 | ;; if the upper 128bits are unused. | |
16505 | (define_insn "avx_vzeroupper" | |
ff97910d | 16506 | [(unspec_volatile [(const_int 0)] UNSPECV_VZEROUPPER)] |
85b1d1bd | 16507 | "TARGET_AVX" |
95879c72 L |
16508 | "vzeroupper" |
16509 | [(set_attr "type" "sse") | |
725fd454 | 16510 | (set_attr "modrm" "0") |
95879c72 L |
16511 | (set_attr "memory" "none") |
16512 | (set_attr "prefix" "vex") | |
01284895 | 16513 | (set_attr "btver2_decode" "vector") |
95879c72 L |
16514 | (set_attr "mode" "OI")]) |
16515 | ||
977e83a3 KY |
16516 | (define_insn "avx2_pbroadcast<mode>" |
16517 | [(set (match_operand:VI 0 "register_operand" "=x") | |
16518 | (vec_duplicate:VI | |
16519 | (vec_select:<ssescalarmode> | |
a9ccbba2 | 16520 | (match_operand:<ssexmmmode> 1 "nonimmediate_operand" "xm") |
977e83a3 KY |
16521 | (parallel [(const_int 0)]))))] |
16522 | "TARGET_AVX2" | |
eabb5f48 | 16523 | "vpbroadcast<ssemodesuffix>\t{%1, %0|%0, %<iptr>1}" |
977e83a3 KY |
16524 | [(set_attr "type" "ssemov") |
16525 | (set_attr "prefix_extra" "1") | |
16526 | (set_attr "prefix" "vex") | |
16527 | (set_attr "mode" "<sseinsnmode>")]) | |
16528 | ||
6945a32e | 16529 | (define_insn "avx2_pbroadcast<mode>_1" |
eabb5f48 | 16530 | [(set (match_operand:VI_256 0 "register_operand" "=x,x") |
6945a32e JJ |
16531 | (vec_duplicate:VI_256 |
16532 | (vec_select:<ssescalarmode> | |
eabb5f48 | 16533 | (match_operand:VI_256 1 "nonimmediate_operand" "m,x") |
6945a32e JJ |
16534 | (parallel [(const_int 0)]))))] |
16535 | "TARGET_AVX2" | |
eabb5f48 UB |
16536 | "@ |
16537 | vpbroadcast<ssemodesuffix>\t{%1, %0|%0, %<iptr>1} | |
16538 | vpbroadcast<ssemodesuffix>\t{%x1, %0|%0, %x1}" | |
6945a32e JJ |
16539 | [(set_attr "type" "ssemov") |
16540 | (set_attr "prefix_extra" "1") | |
16541 | (set_attr "prefix" "vex") | |
16542 | (set_attr "mode" "<sseinsnmode>")]) | |
16543 | ||
cf92ae7f | 16544 | (define_insn "<avx2_avx512>_permvar<mode><mask_name>" |
3c87b77b AI |
16545 | [(set (match_operand:VI48F_256_512 0 "register_operand" "=v") |
16546 | (unspec:VI48F_256_512 | |
16547 | [(match_operand:VI48F_256_512 1 "nonimmediate_operand" "vm") | |
16548 | (match_operand:<sseintvecmode> 2 "register_operand" "v")] | |
2ff5ea2d | 16549 | UNSPEC_VPERMVAR))] |
47490470 AI |
16550 | "TARGET_AVX2 && <mask_mode512bit_condition>" |
16551 | "vperm<ssemodesuffix>\t{%1, %2, %0<mask_operand3>|%0<mask_operand3>, %2, %1}" | |
977e83a3 | 16552 | [(set_attr "type" "sselog") |
47490470 | 16553 | (set_attr "prefix" "<mask_prefix2>") |
3c87b77b | 16554 | (set_attr "mode" "<sseinsnmode>")]) |
977e83a3 | 16555 | |
3dcc8af5 IT |
16556 | (define_insn "<avx512>_permvar<mode><mask_name>" |
16557 | [(set (match_operand:VI1_AVX512VL 0 "register_operand" "=v") | |
16558 | (unspec:VI1_AVX512VL | |
16559 | [(match_operand:VI1_AVX512VL 1 "nonimmediate_operand" "vm") | |
16560 | (match_operand:<sseintvecmode> 2 "register_operand" "v")] | |
16561 | UNSPEC_VPERMVAR))] | |
16562 | "TARGET_AVX512VBMI && <mask_mode512bit_condition>" | |
16563 | "vperm<ssemodesuffix>\t{%1, %2, %0<mask_operand3>|%0<mask_operand3>, %2, %1}" | |
16564 | [(set_attr "type" "sselog") | |
16565 | (set_attr "prefix" "<mask_prefix2>") | |
16566 | (set_attr "mode" "<sseinsnmode>")]) | |
16567 | ||
cf92ae7f AI |
16568 | (define_insn "<avx512>_permvar<mode><mask_name>" |
16569 | [(set (match_operand:VI2_AVX512VL 0 "register_operand" "=v") | |
16570 | (unspec:VI2_AVX512VL | |
16571 | [(match_operand:VI2_AVX512VL 1 "nonimmediate_operand" "vm") | |
16572 | (match_operand:<sseintvecmode> 2 "register_operand" "v")] | |
16573 | UNSPEC_VPERMVAR))] | |
16574 | "TARGET_AVX512BW && <mask_mode512bit_condition>" | |
16575 | "vperm<ssemodesuffix>\t{%1, %2, %0<mask_operand3>|%0<mask_operand3>, %2, %1}" | |
16576 | [(set_attr "type" "sselog") | |
16577 | (set_attr "prefix" "<mask_prefix2>") | |
16578 | (set_attr "mode" "<sseinsnmode>")]) | |
16579 | ||
e2a2165d | 16580 | (define_expand "<avx2_avx512>_perm<mode>" |
16821545 AI |
16581 | [(match_operand:VI8F_256_512 0 "register_operand") |
16582 | (match_operand:VI8F_256_512 1 "nonimmediate_operand") | |
82e86dc6 | 16583 | (match_operand:SI 2 "const_0_to_255_operand")] |
0c7189ae JJ |
16584 | "TARGET_AVX2" |
16585 | { | |
16586 | int mask = INTVAL (operands[2]); | |
e2a2165d | 16587 | emit_insn (gen_<avx2_avx512>_perm<mode>_1 (operands[0], operands[1], |
47490470 AI |
16588 | GEN_INT ((mask >> 0) & 3), |
16589 | GEN_INT ((mask >> 2) & 3), | |
16590 | GEN_INT ((mask >> 4) & 3), | |
16591 | GEN_INT ((mask >> 6) & 3))); | |
16592 | DONE; | |
16593 | }) | |
16594 | ||
e2a2165d AI |
16595 | (define_expand "<avx512>_perm<mode>_mask" |
16596 | [(match_operand:VI8F_256_512 0 "register_operand") | |
16597 | (match_operand:VI8F_256_512 1 "nonimmediate_operand") | |
47490470 | 16598 | (match_operand:SI 2 "const_0_to_255_operand") |
e2a2165d | 16599 | (match_operand:VI8F_256_512 3 "vector_move_operand") |
47490470 AI |
16600 | (match_operand:<avx512fmaskmode> 4 "register_operand")] |
16601 | "TARGET_AVX512F" | |
16602 | { | |
16603 | int mask = INTVAL (operands[2]); | |
e2a2165d | 16604 | emit_insn (gen_<avx2_avx512>_perm<mode>_1_mask (operands[0], operands[1], |
47490470 AI |
16605 | GEN_INT ((mask >> 0) & 3), |
16606 | GEN_INT ((mask >> 2) & 3), | |
16607 | GEN_INT ((mask >> 4) & 3), | |
16608 | GEN_INT ((mask >> 6) & 3), | |
16609 | operands[3], operands[4])); | |
0c7189ae JJ |
16610 | DONE; |
16611 | }) | |
16612 | ||
e2a2165d | 16613 | (define_insn "<avx2_avx512>_perm<mode>_1<mask_name>" |
16821545 AI |
16614 | [(set (match_operand:VI8F_256_512 0 "register_operand" "=v") |
16615 | (vec_select:VI8F_256_512 | |
16616 | (match_operand:VI8F_256_512 1 "nonimmediate_operand" "vm") | |
82e86dc6 UB |
16617 | (parallel [(match_operand 2 "const_0_to_3_operand") |
16618 | (match_operand 3 "const_0_to_3_operand") | |
16619 | (match_operand 4 "const_0_to_3_operand") | |
16620 | (match_operand 5 "const_0_to_3_operand")])))] | |
47490470 | 16621 | "TARGET_AVX2 && <mask_mode512bit_condition>" |
0c7189ae JJ |
16622 | { |
16623 | int mask = 0; | |
16624 | mask |= INTVAL (operands[2]) << 0; | |
16625 | mask |= INTVAL (operands[3]) << 2; | |
16626 | mask |= INTVAL (operands[4]) << 4; | |
16627 | mask |= INTVAL (operands[5]) << 6; | |
16628 | operands[2] = GEN_INT (mask); | |
47490470 | 16629 | return "vperm<ssemodesuffix>\t{%2, %1, %0<mask_operand6>|%0<mask_operand6>, %1, %2}"; |
0c7189ae | 16630 | } |
977e83a3 | 16631 | [(set_attr "type" "sselog") |
47490470 | 16632 | (set_attr "prefix" "<mask_prefix2>") |
b8227739 | 16633 | (set_attr "mode" "<sseinsnmode>")]) |
977e83a3 KY |
16634 | |
16635 | (define_insn "avx2_permv2ti" | |
16636 | [(set (match_operand:V4DI 0 "register_operand" "=x") | |
16637 | (unspec:V4DI | |
16638 | [(match_operand:V4DI 1 "register_operand" "x") | |
0c7189ae | 16639 | (match_operand:V4DI 2 "nonimmediate_operand" "xm") |
977e83a3 KY |
16640 | (match_operand:SI 3 "const_0_to_255_operand" "n")] |
16641 | UNSPEC_VPERMTI))] | |
16642 | "TARGET_AVX2" | |
16643 | "vperm2i128\t{%3, %2, %1, %0|%0, %1, %2, %3}" | |
16644 | [(set_attr "type" "sselog") | |
16645 | (set_attr "prefix" "vex") | |
16646 | (set_attr "mode" "OI")]) | |
16647 | ||
16648 | (define_insn "avx2_vec_dupv4df" | |
16649 | [(set (match_operand:V4DF 0 "register_operand" "=x") | |
16650 | (vec_duplicate:V4DF | |
16651 | (vec_select:DF | |
16652 | (match_operand:V2DF 1 "register_operand" "x") | |
16653 | (parallel [(const_int 0)]))))] | |
16654 | "TARGET_AVX2" | |
16655 | "vbroadcastsd\t{%1, %0|%0, %1}" | |
16656 | [(set_attr "type" "sselog1") | |
16657 | (set_attr "prefix" "vex") | |
16658 | (set_attr "mode" "V4DF")]) | |
16659 | ||
b92883d6 IT |
16660 | (define_insn "<avx512>_vec_dup<mode>_1" |
16661 | [(set (match_operand:VI_AVX512BW 0 "register_operand" "=v,v") | |
16662 | (vec_duplicate:VI_AVX512BW | |
16663 | (vec_select:VI_AVX512BW | |
16664 | (match_operand:VI_AVX512BW 1 "nonimmediate_operand" "v,m") | |
16665 | (parallel [(const_int 0)]))))] | |
16666 | "TARGET_AVX512F" | |
16667 | "vpbroadcast<ssemodesuffix>\t{%1, %0|%0, %<iptr>1} | |
16668 | vpbroadcast<ssemodesuffix>\t{%x1, %0|%0, %x1}" | |
16669 | [(set_attr "type" "ssemov") | |
16670 | (set_attr "prefix" "evex") | |
16671 | (set_attr "mode" "<sseinsnmode>")]) | |
16672 | ||
51e14b05 AI |
16673 | (define_insn "<avx512>_vec_dup<mode><mask_name>" |
16674 | [(set (match_operand:V48_AVX512VL 0 "register_operand" "=v") | |
16675 | (vec_duplicate:V48_AVX512VL | |
ab931c71 AI |
16676 | (vec_select:<ssescalarmode> |
16677 | (match_operand:<ssexmmmode> 1 "nonimmediate_operand" "vm") | |
16678 | (parallel [(const_int 0)]))))] | |
16679 | "TARGET_AVX512F" | |
47490470 | 16680 | "v<sseintprefix>broadcast<bcstscalarsuff>\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}" |
ab931c71 AI |
16681 | [(set_attr "type" "ssemov") |
16682 | (set_attr "prefix" "evex") | |
16683 | (set_attr "mode" "<sseinsnmode>")]) | |
16684 | ||
51e14b05 AI |
16685 | (define_insn "<avx512>_vec_dup<mode><mask_name>" |
16686 | [(set (match_operand:VI12_AVX512VL 0 "register_operand" "=v") | |
16687 | (vec_duplicate:VI12_AVX512VL | |
16688 | (vec_select:<ssescalarmode> | |
16689 | (match_operand:<ssexmmmode> 1 "nonimmediate_operand" "vm") | |
16690 | (parallel [(const_int 0)]))))] | |
16691 | "TARGET_AVX512BW" | |
16692 | "vpbroadcast<bcstscalarsuff>\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}" | |
16693 | [(set_attr "type" "ssemov") | |
16694 | (set_attr "prefix" "evex") | |
16695 | (set_attr "mode" "<sseinsnmode>")]) | |
16696 | ||
47490470 | 16697 | (define_insn "<mask_codefor>avx512f_broadcast<mode><mask_name>" |
2e2206fa AI |
16698 | [(set (match_operand:V16FI 0 "register_operand" "=v,v") |
16699 | (vec_duplicate:V16FI | |
16700 | (match_operand:<ssexmmmode> 1 "nonimmediate_operand" "v,m")))] | |
16701 | "TARGET_AVX512F" | |
16702 | "@ | |
47490470 AI |
16703 | vshuf<shuffletype>32x4\t{$0x0, %g1, %g1, %0<mask_operand2>|%0<mask_operand2>, %g1, %g1, 0x0} |
16704 | vbroadcast<shuffletype>32x4\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}" | |
2e2206fa AI |
16705 | [(set_attr "type" "ssemov") |
16706 | (set_attr "prefix" "evex") | |
16707 | (set_attr "mode" "<sseinsnmode>")]) | |
16708 | ||
47490470 | 16709 | (define_insn "<mask_codefor>avx512f_broadcast<mode><mask_name>" |
2e2206fa AI |
16710 | [(set (match_operand:V8FI 0 "register_operand" "=v,v") |
16711 | (vec_duplicate:V8FI | |
16712 | (match_operand:<ssehalfvecmode> 1 "nonimmediate_operand" "v,m")))] | |
16713 | "TARGET_AVX512F" | |
16714 | "@ | |
47490470 AI |
16715 | vshuf<shuffletype>64x2\t{$0x44, %g1, %g1, %0<mask_operand2>|%0<mask_operand2>, %g1, %g1, 0x44} |
16716 | vbroadcast<shuffletype>64x4\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}" | |
2e2206fa AI |
16717 | [(set_attr "type" "ssemov") |
16718 | (set_attr "prefix" "evex") | |
16719 | (set_attr "mode" "<sseinsnmode>")]) | |
16720 | ||
51e14b05 | 16721 | (define_insn "<mask_codefor><avx512>_vec_dup_gpr<mode><mask_name>" |
092444af | 16722 | [(set (match_operand:VI12_AVX512VL 0 "register_operand" "=v,v") |
51e14b05 | 16723 | (vec_duplicate:VI12_AVX512VL |
092444af | 16724 | (match_operand:<ssescalarmode> 1 "nonimmediate_operand" "vm,r")))] |
51e14b05 | 16725 | "TARGET_AVX512BW" |
092444af JJ |
16726 | "@ |
16727 | vpbroadcast<bcstscalarsuff>\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1} | |
16728 | vpbroadcast<bcstscalarsuff>\t{%k1, %0<mask_operand2>|%0<mask_operand2>, %k1}" | |
51e14b05 AI |
16729 | [(set_attr "type" "ssemov") |
16730 | (set_attr "prefix" "evex") | |
16731 | (set_attr "mode" "<sseinsnmode>")]) | |
16732 | ||
16733 | (define_insn "<mask_codefor><avx512>_vec_dup_gpr<mode><mask_name>" | |
092444af | 16734 | [(set (match_operand:V48_AVX512VL 0 "register_operand" "=v,v") |
51e14b05 | 16735 | (vec_duplicate:V48_AVX512VL |
092444af | 16736 | (match_operand:<ssescalarmode> 1 "nonimmediate_operand" "vm,r")))] |
ab931c71 | 16737 | "TARGET_AVX512F" |
47490470 | 16738 | "v<sseintprefix>broadcast<bcstscalarsuff>\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}" |
ab931c71 AI |
16739 | [(set_attr "type" "ssemov") |
16740 | (set_attr "prefix" "evex") | |
092444af JJ |
16741 | (set_attr "mode" "<sseinsnmode>") |
16742 | (set (attr "enabled") | |
16743 | (if_then_else (eq_attr "alternative" "1") | |
16744 | (symbol_ref "GET_MODE_CLASS (<ssescalarmode>mode) == MODE_INT | |
16745 | && (<ssescalarmode>mode != DImode || TARGET_64BIT)") | |
16746 | (const_int 1)))]) | |
ab931c71 | 16747 | |
092444af JJ |
16748 | (define_insn "vec_dupv4sf" |
16749 | [(set (match_operand:V4SF 0 "register_operand" "=x,x,x") | |
16750 | (vec_duplicate:V4SF | |
16751 | (match_operand:SF 1 "nonimmediate_operand" "x,m,0")))] | |
16752 | "TARGET_SSE" | |
16753 | "@ | |
16754 | vshufps\t{$0, %1, %1, %0|%0, %1, %1, 0} | |
16755 | vbroadcastss\t{%1, %0|%0, %1} | |
16756 | shufps\t{$0, %0, %0|%0, %0, 0}" | |
16757 | [(set_attr "isa" "avx,avx,noavx") | |
16758 | (set_attr "type" "sseshuf1,ssemov,sseshuf1") | |
16759 | (set_attr "length_immediate" "1,0,1") | |
16760 | (set_attr "prefix_extra" "0,1,*") | |
16761 | (set_attr "prefix" "vex,vex,orig") | |
16762 | (set_attr "mode" "V4SF")]) | |
16763 | ||
16764 | (define_insn "*vec_dupv4si" | |
16765 | [(set (match_operand:V4SI 0 "register_operand" "=x,x,x") | |
16766 | (vec_duplicate:V4SI | |
16767 | (match_operand:SI 1 "nonimmediate_operand" " x,m,0")))] | |
16768 | "TARGET_SSE" | |
16769 | "@ | |
16770 | %vpshufd\t{$0, %1, %0|%0, %1, 0} | |
16771 | vbroadcastss\t{%1, %0|%0, %1} | |
16772 | shufps\t{$0, %0, %0|%0, %0, 0}" | |
16773 | [(set_attr "isa" "sse2,avx,noavx") | |
16774 | (set_attr "type" "sselog1,ssemov,sselog1") | |
16775 | (set_attr "length_immediate" "1,0,1") | |
16776 | (set_attr "prefix_extra" "0,1,*") | |
16777 | (set_attr "prefix" "maybe_vex,vex,orig") | |
16778 | (set_attr "mode" "TI,V4SF,V4SF")]) | |
16779 | ||
16780 | (define_insn "*vec_dupv2di" | |
16781 | [(set (match_operand:V2DI 0 "register_operand" "=x,x,x,x") | |
16782 | (vec_duplicate:V2DI | |
16783 | (match_operand:DI 1 "nonimmediate_operand" " 0,x,m,0")))] | |
16784 | "TARGET_SSE" | |
16785 | "@ | |
16786 | punpcklqdq\t%0, %0 | |
16787 | vpunpcklqdq\t{%d1, %0|%0, %d1} | |
16788 | %vmovddup\t{%1, %0|%0, %1} | |
16789 | movlhps\t%0, %0" | |
16790 | [(set_attr "isa" "sse2_noavx,avx,sse3,noavx") | |
16791 | (set_attr "type" "sselog1,sselog1,sselog1,ssemov") | |
16792 | (set_attr "prefix" "orig,vex,maybe_vex,orig") | |
16793 | (set_attr "mode" "TI,TI,DF,V4SF")]) | |
51e14b05 | 16794 | |
977e83a3 KY |
16795 | (define_insn "avx2_vbroadcasti128_<mode>" |
16796 | [(set (match_operand:VI_256 0 "register_operand" "=x") | |
16797 | (vec_concat:VI_256 | |
16798 | (match_operand:<ssehalfvecmode> 1 "memory_operand" "m") | |
16799 | (match_dup 1)))] | |
16800 | "TARGET_AVX2" | |
16801 | "vbroadcasti128\t{%1, %0|%0, %1}" | |
16802 | [(set_attr "type" "ssemov") | |
16803 | (set_attr "prefix_extra" "1") | |
16804 | (set_attr "prefix" "vex") | |
16805 | (set_attr "mode" "OI")]) | |
16806 | ||
7d9f1cd2 JJ |
16807 | ;; Modes handled by AVX vec_dup patterns. |
16808 | (define_mode_iterator AVX_VEC_DUP_MODE | |
16809 | [V8SI V8SF V4DI V4DF]) | |
16810 | ;; Modes handled by AVX2 vec_dup patterns. | |
16811 | (define_mode_iterator AVX2_VEC_DUP_MODE | |
16812 | [V32QI V16QI V16HI V8HI V8SI V4SI]) | |
16813 | ||
16814 | (define_insn "*vec_dup<mode>" | |
a0d8720a | 16815 | [(set (match_operand:AVX2_VEC_DUP_MODE 0 "register_operand" "=x,x,Yi") |
7d9f1cd2 | 16816 | (vec_duplicate:AVX2_VEC_DUP_MODE |
d1457701 | 16817 | (match_operand:<ssescalarmode> 1 "nonimmediate_operand" "m,x,$r")))] |
7d9f1cd2 JJ |
16818 | "TARGET_AVX2" |
16819 | "@ | |
16820 | v<sseintprefix>broadcast<bcstscalarsuff>\t{%1, %0|%0, %1} | |
16821 | v<sseintprefix>broadcast<bcstscalarsuff>\t{%x1, %0|%0, %x1} | |
16822 | #" | |
16823 | [(set_attr "type" "ssemov") | |
16824 | (set_attr "prefix_extra" "1") | |
16825 | (set_attr "prefix" "maybe_evex") | |
16826 | (set_attr "mode" "<sseinsnmode>")]) | |
16827 | ||
16828 | (define_insn "vec_dup<mode>" | |
16829 | [(set (match_operand:AVX_VEC_DUP_MODE 0 "register_operand" "=x,x,v,x") | |
16830 | (vec_duplicate:AVX_VEC_DUP_MODE | |
16831 | (match_operand:<ssescalarmode> 1 "nonimmediate_operand" "m,m,v,?x")))] | |
16832 | "TARGET_AVX" | |
16833 | "@ | |
16834 | v<sseintprefix>broadcast<bcstscalarsuff>\t{%1, %0|%0, %1} | |
16835 | vbroadcast<ssescalarmodesuffix>\t{%1, %0|%0, %1} | |
16836 | v<sseintprefix>broadcast<bcstscalarsuff>\t{%x1, %0|%0, %x1} | |
16837 | #" | |
16838 | [(set_attr "type" "ssemov") | |
16839 | (set_attr "prefix_extra" "1") | |
16840 | (set_attr "prefix" "maybe_evex") | |
16841 | (set_attr "isa" "avx2,noavx2,avx2,noavx2") | |
16842 | (set_attr "mode" "<sseinsnmode>,V8SF,<sseinsnmode>,V8SF")]) | |
16843 | ||
16844 | (define_split | |
16845 | [(set (match_operand:AVX2_VEC_DUP_MODE 0 "register_operand") | |
16846 | (vec_duplicate:AVX2_VEC_DUP_MODE | |
16847 | (match_operand:<ssescalarmode> 1 "register_operand")))] | |
092444af JJ |
16848 | "TARGET_AVX2 |
16849 | /* Disable this splitter if avx512vl_vec_dup_gprv*[qhs]i insn is | |
16850 | available, because then we can broadcast from GPRs directly. | |
16851 | For V*[QH]I modes it requires both -mavx512vl and -mavx512bw, | |
16852 | for V*SI mode it requires just -mavx512vl. */ | |
16853 | && !(TARGET_AVX512VL | |
16854 | && (TARGET_AVX512BW || <ssescalarmode>mode == SImode)) | |
16855 | && reload_completed && GENERAL_REG_P (operands[1])" | |
7d9f1cd2 JJ |
16856 | [(const_int 0)] |
16857 | { | |
16858 | emit_insn (gen_vec_setv4si_0 (gen_lowpart (V4SImode, operands[0]), | |
16859 | CONST0_RTX (V4SImode), | |
16860 | gen_lowpart (SImode, operands[1]))); | |
16861 | emit_insn (gen_avx2_pbroadcast<mode> (operands[0], | |
16862 | gen_lowpart (<ssexmmmode>mode, | |
16863 | operands[0]))); | |
16864 | DONE; | |
16865 | }) | |
16866 | ||
8dfb9f16 | 16867 | (define_split |
82e86dc6 | 16868 | [(set (match_operand:AVX_VEC_DUP_MODE 0 "register_operand") |
9ee65b55 | 16869 | (vec_duplicate:AVX_VEC_DUP_MODE |
82e86dc6 | 16870 | (match_operand:<ssescalarmode> 1 "register_operand")))] |
6945a32e | 16871 | "TARGET_AVX && !TARGET_AVX2 && reload_completed" |
9ee65b55 UB |
16872 | [(set (match_dup 2) |
16873 | (vec_duplicate:<ssehalfvecmode> (match_dup 1))) | |
16874 | (set (match_dup 0) | |
16875 | (vec_concat:AVX_VEC_DUP_MODE (match_dup 2) (match_dup 2)))] | |
cbb734aa | 16876 | "operands[2] = gen_rtx_REG (<ssehalfvecmode>mode, REGNO (operands[0]));") |
8dfb9f16 | 16877 | |
5e04b3b6 | 16878 | (define_insn "avx_vbroadcastf128_<mode>" |
6bec6c98 UB |
16879 | [(set (match_operand:V_256 0 "register_operand" "=x,x,x") |
16880 | (vec_concat:V_256 | |
cbb734aa | 16881 | (match_operand:<ssehalfvecmode> 1 "nonimmediate_operand" "m,0,?x") |
5e04b3b6 RH |
16882 | (match_dup 1)))] |
16883 | "TARGET_AVX" | |
16884 | "@ | |
1db4406e JJ |
16885 | vbroadcast<i128>\t{%1, %0|%0, %1} |
16886 | vinsert<i128>\t{$1, %1, %0, %0|%0, %0, %1, 1} | |
16887 | vperm2<i128>\t{$0, %t1, %t1, %0|%0, %t1, %t1, 0}" | |
5e04b3b6 RH |
16888 | [(set_attr "type" "ssemov,sselog1,sselog1") |
16889 | (set_attr "prefix_extra" "1") | |
16890 | (set_attr "length_immediate" "0,1,1") | |
16891 | (set_attr "prefix" "vex") | |
1db4406e | 16892 | (set_attr "mode" "<sseinsnmode>")]) |
5e04b3b6 | 16893 | |
698ea04f AI |
16894 | ;; For broadcast[i|f]32x2. Yes there is no v4sf version, only v4si. |
16895 | (define_mode_iterator VI4F_BRCST32x2 | |
16896 | [V16SI (V8SI "TARGET_AVX512VL") (V4SI "TARGET_AVX512VL") | |
16897 | V16SF (V8SF "TARGET_AVX512VL")]) | |
16898 | ||
16899 | (define_mode_attr 64x2mode | |
16900 | [(V8DF "V2DF") (V8DI "V2DI") (V4DI "V2DI") (V4DF "V2DF")]) | |
16901 | ||
16902 | (define_mode_attr 32x2mode | |
16903 | [(V16SF "V2SF") (V16SI "V2SI") (V8SI "V2SI") | |
16904 | (V8SF "V2SF") (V4SI "V2SI")]) | |
16905 | ||
16906 | (define_insn "<mask_codefor>avx512dq_broadcast<mode><mask_name>" | |
16907 | [(set (match_operand:VI4F_BRCST32x2 0 "register_operand" "=v") | |
16908 | (vec_duplicate:VI4F_BRCST32x2 | |
16909 | (vec_select:<32x2mode> | |
16910 | (match_operand:<ssexmmmode> 1 "nonimmediate_operand" "vm") | |
16911 | (parallel [(const_int 0) (const_int 1)]))))] | |
16912 | "TARGET_AVX512DQ" | |
16913 | "vbroadcast<shuffletype>32x2\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}" | |
16914 | [(set_attr "type" "ssemov") | |
16915 | (set_attr "prefix_extra" "1") | |
16916 | (set_attr "prefix" "evex") | |
16917 | (set_attr "mode" "<sseinsnmode>")]) | |
16918 | ||
16919 | (define_insn "<mask_codefor>avx512vl_broadcast<mode><mask_name>_1" | |
16920 | [(set (match_operand:VI4F_256 0 "register_operand" "=v,v") | |
16921 | (vec_duplicate:VI4F_256 | |
16922 | (match_operand:<ssexmmmode> 1 "nonimmediate_operand" "v,m")))] | |
16923 | "TARGET_AVX512VL" | |
16924 | "@ | |
16925 | vshuf<shuffletype>32x4\t{$0x0, %t1, %t1, %0<mask_operand2>|%0<mask_operand2>, %t1, %t1, 0x0} | |
16926 | vbroadcast<shuffletype>32x4\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}" | |
16927 | [(set_attr "type" "ssemov") | |
16928 | (set_attr "prefix_extra" "1") | |
16929 | (set_attr "prefix" "evex") | |
16930 | (set_attr "mode" "<sseinsnmode>")]) | |
16931 | ||
16932 | (define_insn "<mask_codefor>avx512dq_broadcast<mode><mask_name>_1" | |
16933 | [(set (match_operand:V16FI 0 "register_operand" "=v,v") | |
16934 | (vec_duplicate:V16FI | |
16935 | (match_operand:<ssehalfvecmode> 1 "nonimmediate_operand" "v,m")))] | |
16936 | "TARGET_AVX512DQ" | |
16937 | "@ | |
16938 | vshuf<shuffletype>32x4\t{$0x44, %g1, %g1, %0<mask_operand2>|%0<mask_operand2>, %g1, %g1, 0x44} | |
16939 | vbroadcast<shuffletype>32x8\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}" | |
16940 | [(set_attr "type" "ssemov") | |
16941 | (set_attr "prefix_extra" "1") | |
16942 | (set_attr "prefix" "evex") | |
16943 | (set_attr "mode" "<sseinsnmode>")]) | |
16944 | ||
16945 | ;; For broadcast[i|f]64x2 | |
16946 | (define_mode_iterator VI8F_BRCST64x2 | |
16947 | [V8DI V8DF (V4DI "TARGET_AVX512VL") (V4DF "TARGET_AVX512VL")]) | |
16948 | ||
16949 | (define_insn "<mask_codefor>avx512dq_broadcast<mode><mask_name>_1" | |
16950 | [(set (match_operand:VI8F_BRCST64x2 0 "register_operand" "=v,v") | |
16951 | (vec_duplicate:VI8F_BRCST64x2 | |
16952 | (match_operand:<64x2mode> 1 "nonimmediate_operand" "v,m")))] | |
16953 | "TARGET_AVX512DQ" | |
16954 | "@ | |
4854de0d | 16955 | vshuf<shuffletype>64x2\t{$0x0, %<concat_tg_mode>1, %<concat_tg_mode>1, %0<mask_operand2>|%0<mask_operand2>, %<concat_tg_mode>1, %<concat_tg_mode>1, 0x0} |
698ea04f AI |
16956 | vbroadcast<shuffletype>64x2\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}" |
16957 | [(set_attr "type" "ssemov") | |
16958 | (set_attr "prefix_extra" "1") | |
16959 | (set_attr "prefix" "evex") | |
16960 | (set_attr "mode" "<sseinsnmode>")]) | |
16961 | ||
98725d44 AI |
16962 | (define_insn "avx512cd_maskb_vec_dup<mode>" |
16963 | [(set (match_operand:VI8_AVX512VL 0 "register_operand" "=v") | |
16964 | (vec_duplicate:VI8_AVX512VL | |
c003c6d6 | 16965 | (zero_extend:DI |
be792bce | 16966 | (match_operand:QI 1 "register_operand" "Yk"))))] |
c003c6d6 AI |
16967 | "TARGET_AVX512CD" |
16968 | "vpbroadcastmb2q\t{%1, %0|%0, %1}" | |
16969 | [(set_attr "type" "mskmov") | |
16970 | (set_attr "prefix" "evex") | |
16971 | (set_attr "mode" "XI")]) | |
16972 | ||
21c924ac AI |
16973 | (define_insn "avx512cd_maskw_vec_dup<mode>" |
16974 | [(set (match_operand:VI4_AVX512VL 0 "register_operand" "=v") | |
16975 | (vec_duplicate:VI4_AVX512VL | |
c003c6d6 | 16976 | (zero_extend:SI |
be792bce | 16977 | (match_operand:HI 1 "register_operand" "Yk"))))] |
c003c6d6 AI |
16978 | "TARGET_AVX512CD" |
16979 | "vpbroadcastmw2d\t{%1, %0|%0, %1}" | |
16980 | [(set_attr "type" "mskmov") | |
16981 | (set_attr "prefix" "evex") | |
16982 | (set_attr "mode" "XI")]) | |
16983 | ||
5e04b3b6 RH |
16984 | ;; Recognize broadcast as a vec_select as produced by builtin_vec_perm. |
16985 | ;; If it so happens that the input is in memory, use vbroadcast. | |
16986 | ;; Otherwise use vpermilp (and in the case of 256-bit modes, vperm2f128). | |
16987 | (define_insn "*avx_vperm_broadcast_v4sf" | |
16988 | [(set (match_operand:V4SF 0 "register_operand" "=x,x,x") | |
16989 | (vec_select:V4SF | |
16990 | (match_operand:V4SF 1 "nonimmediate_operand" "m,o,x") | |
16991 | (match_parallel 2 "avx_vbroadcast_operand" | |
16992 | [(match_operand 3 "const_int_operand" "C,n,n")])))] | |
16993 | "TARGET_AVX" | |
16994 | { | |
16995 | int elt = INTVAL (operands[3]); | |
16996 | switch (which_alternative) | |
16997 | { | |
16998 | case 0: | |
16999 | case 1: | |
17000 | operands[1] = adjust_address_nv (operands[1], SFmode, elt * 4); | |
eabb5f48 | 17001 | return "vbroadcastss\t{%1, %0|%0, %k1}"; |
5e04b3b6 RH |
17002 | case 2: |
17003 | operands[2] = GEN_INT (elt * 0x55); | |
17004 | return "vpermilps\t{%2, %1, %0|%0, %1, %2}"; | |
17005 | default: | |
17006 | gcc_unreachable (); | |
17007 | } | |
17008 | } | |
17009 | [(set_attr "type" "ssemov,ssemov,sselog1") | |
17010 | (set_attr "prefix_extra" "1") | |
17011 | (set_attr "length_immediate" "0,0,1") | |
17012 | (set_attr "prefix" "vex") | |
17013 | (set_attr "mode" "SF,SF,V4SF")]) | |
17014 | ||
17015 | (define_insn_and_split "*avx_vperm_broadcast_<mode>" | |
6bec6c98 UB |
17016 | [(set (match_operand:VF_256 0 "register_operand" "=x,x,x") |
17017 | (vec_select:VF_256 | |
17018 | (match_operand:VF_256 1 "nonimmediate_operand" "m,o,?x") | |
5e04b3b6 RH |
17019 | (match_parallel 2 "avx_vbroadcast_operand" |
17020 | [(match_operand 3 "const_int_operand" "C,n,n")])))] | |
17021 | "TARGET_AVX" | |
17022 | "#" | |
6945a32e | 17023 | "&& reload_completed && (<MODE>mode != V4DFmode || !TARGET_AVX2)" |
6bec6c98 | 17024 | [(set (match_dup 0) (vec_duplicate:VF_256 (match_dup 1)))] |
5e04b3b6 RH |
17025 | { |
17026 | rtx op0 = operands[0], op1 = operands[1]; | |
17027 | int elt = INTVAL (operands[3]); | |
17028 | ||
17029 | if (REG_P (op1)) | |
17030 | { | |
17031 | int mask; | |
17032 | ||
6945a32e JJ |
17033 | if (TARGET_AVX2 && elt == 0) |
17034 | { | |
17035 | emit_insn (gen_vec_dup<mode> (op0, gen_lowpart (<ssescalarmode>mode, | |
17036 | op1))); | |
17037 | DONE; | |
17038 | } | |
17039 | ||
5e04b3b6 RH |
17040 | /* Shuffle element we care about into all elements of the 128-bit lane. |
17041 | The other lane gets shuffled too, but we don't care. */ | |
17042 | if (<MODE>mode == V4DFmode) | |
17043 | mask = (elt & 1 ? 15 : 0); | |
17044 | else | |
17045 | mask = (elt & 3) * 0x55; | |
17046 | emit_insn (gen_avx_vpermil<mode> (op0, op1, GEN_INT (mask))); | |
17047 | ||
17048 | /* Shuffle the lane we care about into both lanes of the dest. */ | |
17049 | mask = (elt / (<ssescalarnum> / 2)) * 0x11; | |
17050 | emit_insn (gen_avx_vperm2f128<mode>3 (op0, op0, op0, GEN_INT (mask))); | |
17051 | DONE; | |
17052 | } | |
17053 | ||
0b013847 UB |
17054 | operands[1] = adjust_address (op1, <ssescalarmode>mode, |
17055 | elt * GET_MODE_SIZE (<ssescalarmode>mode)); | |
5e04b3b6 RH |
17056 | }) |
17057 | ||
47490470 | 17058 | (define_expand "<sse2_avx_avx512f>_vpermil<mode><mask_name>" |
82e86dc6 | 17059 | [(set (match_operand:VF2 0 "register_operand") |
6bec6c98 | 17060 | (vec_select:VF2 |
82e86dc6 UB |
17061 | (match_operand:VF2 1 "nonimmediate_operand") |
17062 | (match_operand:SI 2 "const_0_to_255_operand")))] | |
47490470 | 17063 | "TARGET_AVX && <mask_mode512bit_condition>" |
8a67ca92 RH |
17064 | { |
17065 | int mask = INTVAL (operands[2]); | |
17066 | rtx perm[<ssescalarnum>]; | |
17067 | ||
ec5e777c AI |
17068 | int i; |
17069 | for (i = 0; i < <ssescalarnum>; i = i + 2) | |
8a67ca92 | 17070 | { |
ec5e777c AI |
17071 | perm[i] = GEN_INT (((mask >> i) & 1) + i); |
17072 | perm[i + 1] = GEN_INT (((mask >> (i + 1)) & 1) + i); | |
8a67ca92 RH |
17073 | } |
17074 | ||
17075 | operands[2] | |
17076 | = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (<ssescalarnum>, perm)); | |
17077 | }) | |
17078 | ||
47490470 | 17079 | (define_expand "<sse2_avx_avx512f>_vpermil<mode><mask_name>" |
82e86dc6 | 17080 | [(set (match_operand:VF1 0 "register_operand") |
6bec6c98 | 17081 | (vec_select:VF1 |
82e86dc6 UB |
17082 | (match_operand:VF1 1 "nonimmediate_operand") |
17083 | (match_operand:SI 2 "const_0_to_255_operand")))] | |
47490470 | 17084 | "TARGET_AVX && <mask_mode512bit_condition>" |
8a67ca92 RH |
17085 | { |
17086 | int mask = INTVAL (operands[2]); | |
17087 | rtx perm[<ssescalarnum>]; | |
17088 | ||
a9ccbba2 AI |
17089 | int i; |
17090 | for (i = 0; i < <ssescalarnum>; i = i + 4) | |
8a67ca92 | 17091 | { |
a9ccbba2 AI |
17092 | perm[i] = GEN_INT (((mask >> 0) & 3) + i); |
17093 | perm[i + 1] = GEN_INT (((mask >> 2) & 3) + i); | |
17094 | perm[i + 2] = GEN_INT (((mask >> 4) & 3) + i); | |
17095 | perm[i + 3] = GEN_INT (((mask >> 6) & 3) + i); | |
8a67ca92 RH |
17096 | } |
17097 | ||
17098 | operands[2] | |
17099 | = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (<ssescalarnum>, perm)); | |
17100 | }) | |
17101 | ||
47490470 | 17102 | (define_insn "*<sse2_avx_avx512f>_vpermilp<mode><mask_name>" |
3f97cb0b | 17103 | [(set (match_operand:VF 0 "register_operand" "=v") |
6bec6c98 | 17104 | (vec_select:VF |
3f97cb0b | 17105 | (match_operand:VF 1 "nonimmediate_operand" "vm") |
200eb7d2 | 17106 | (match_parallel 2 "" |
82e86dc6 | 17107 | [(match_operand 3 "const_int_operand")])))] |
47490470 | 17108 | "TARGET_AVX && <mask_mode512bit_condition> |
200eb7d2 | 17109 | && avx_vpermilp_parallel (operands[2], <MODE>mode)" |
8a67ca92 RH |
17110 | { |
17111 | int mask = avx_vpermilp_parallel (operands[2], <MODE>mode) - 1; | |
17112 | operands[2] = GEN_INT (mask); | |
47490470 | 17113 | return "vpermil<ssemodesuffix>\t{%2, %1, %0<mask_operand4>|%0<mask_operand4>, %1, %2}"; |
8a67ca92 | 17114 | } |
95879c72 | 17115 | [(set_attr "type" "sselog") |
725fd454 JJ |
17116 | (set_attr "prefix_extra" "1") |
17117 | (set_attr "length_immediate" "1") | |
47490470 | 17118 | (set_attr "prefix" "<mask_prefix>") |
b86f6e9e | 17119 | (set_attr "mode" "<sseinsnmode>")]) |
95879c72 | 17120 | |
47490470 | 17121 | (define_insn "<sse2_avx_avx512f>_vpermilvar<mode>3<mask_name>" |
3f97cb0b | 17122 | [(set (match_operand:VF 0 "register_operand" "=v") |
6bec6c98 | 17123 | (unspec:VF |
3f97cb0b AI |
17124 | [(match_operand:VF 1 "register_operand" "v") |
17125 | (match_operand:<sseintvecmode> 2 "nonimmediate_operand" "vm")] | |
95879c72 | 17126 | UNSPEC_VPERMIL))] |
47490470 AI |
17127 | "TARGET_AVX && <mask_mode512bit_condition>" |
17128 | "vpermil<ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}" | |
95879c72 | 17129 | [(set_attr "type" "sselog") |
725fd454 | 17130 | (set_attr "prefix_extra" "1") |
01284895 | 17131 | (set_attr "btver2_decode" "vector") |
47490470 | 17132 | (set_attr "prefix" "<mask_prefix>") |
b86f6e9e AI |
17133 | (set_attr "mode" "<sseinsnmode>")]) |
17134 | ||
c883e5fb AI |
17135 | (define_expand "<avx512>_vpermi2var<mode>3_maskz" |
17136 | [(match_operand:VI48F 0 "register_operand" "=v") | |
17137 | (match_operand:VI48F 1 "register_operand" "v") | |
8b08db1e | 17138 | (match_operand:<sseintvecmode> 2 "register_operand" "0") |
c883e5fb | 17139 | (match_operand:VI48F 3 "nonimmediate_operand" "vm") |
be792bce | 17140 | (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")] |
8b08db1e AI |
17141 | "TARGET_AVX512F" |
17142 | { | |
c883e5fb | 17143 | emit_insn (gen_<avx512>_vpermi2var<mode>3_maskz_1 ( |
8b08db1e AI |
17144 | operands[0], operands[1], operands[2], operands[3], |
17145 | CONST0_RTX (<MODE>mode), operands[4])); | |
17146 | DONE; | |
17147 | }) | |
17148 | ||
3dcc8af5 IT |
17149 | (define_expand "<avx512>_vpermi2var<mode>3_maskz" |
17150 | [(match_operand:VI1_AVX512VL 0 "register_operand") | |
17151 | (match_operand:VI1_AVX512VL 1 "register_operand") | |
17152 | (match_operand:<sseintvecmode> 2 "register_operand") | |
17153 | (match_operand:VI1_AVX512VL 3 "nonimmediate_operand") | |
17154 | (match_operand:<avx512fmaskmode> 4 "register_operand")] | |
17155 | "TARGET_AVX512VBMI" | |
17156 | { | |
17157 | emit_insn (gen_<avx512>_vpermi2var<mode>3_maskz_1 ( | |
17158 | operands[0], operands[1], operands[2], operands[3], | |
17159 | CONST0_RTX (<MODE>mode), operands[4])); | |
17160 | DONE; | |
17161 | }) | |
17162 | ||
c883e5fb AI |
17163 | (define_expand "<avx512>_vpermi2var<mode>3_maskz" |
17164 | [(match_operand:VI2_AVX512VL 0 "register_operand" "=v") | |
17165 | (match_operand:VI2_AVX512VL 1 "register_operand" "v") | |
17166 | (match_operand:<sseintvecmode> 2 "register_operand" "0") | |
17167 | (match_operand:VI2_AVX512VL 3 "nonimmediate_operand" "vm") | |
17168 | (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")] | |
17169 | "TARGET_AVX512BW" | |
17170 | { | |
17171 | emit_insn (gen_<avx512>_vpermi2var<mode>3_maskz_1 ( | |
17172 | operands[0], operands[1], operands[2], operands[3], | |
17173 | CONST0_RTX (<MODE>mode), operands[4])); | |
17174 | DONE; | |
17175 | }) | |
17176 | ||
17177 | (define_insn "<avx512>_vpermi2var<mode>3<sd_maskz_name>" | |
17178 | [(set (match_operand:VI48F 0 "register_operand" "=v") | |
17179 | (unspec:VI48F | |
17180 | [(match_operand:VI48F 1 "register_operand" "v") | |
ab931c71 | 17181 | (match_operand:<sseintvecmode> 2 "register_operand" "0") |
c883e5fb | 17182 | (match_operand:VI48F 3 "nonimmediate_operand" "vm")] |
ab931c71 AI |
17183 | UNSPEC_VPERMI2))] |
17184 | "TARGET_AVX512F" | |
8b08db1e | 17185 | "vpermi2<ssemodesuffix>\t{%3, %1, %0<sd_mask_op4>|%0<sd_mask_op4>, %1, %3}" |
ab931c71 AI |
17186 | [(set_attr "type" "sselog") |
17187 | (set_attr "prefix" "evex") | |
17188 | (set_attr "mode" "<sseinsnmode>")]) | |
17189 | ||
3dcc8af5 IT |
17190 | (define_insn "<avx512>_vpermi2var<mode>3<sd_maskz_name>" |
17191 | [(set (match_operand:VI1_AVX512VL 0 "register_operand" "=v") | |
17192 | (unspec:VI1_AVX512VL | |
17193 | [(match_operand:VI1_AVX512VL 1 "register_operand" "v") | |
17194 | (match_operand:<sseintvecmode> 2 "register_operand" "0") | |
17195 | (match_operand:VI1_AVX512VL 3 "nonimmediate_operand" "vm")] | |
17196 | UNSPEC_VPERMI2))] | |
17197 | "TARGET_AVX512VBMI" | |
17198 | "vpermi2<ssemodesuffix>\t{%3, %1, %0<sd_mask_op4>|%0<sd_mask_op4>, %1, %3}" | |
17199 | [(set_attr "type" "sselog") | |
17200 | (set_attr "prefix" "evex") | |
17201 | (set_attr "mode" "<sseinsnmode>")]) | |
17202 | ||
c883e5fb AI |
17203 | (define_insn "<avx512>_vpermi2var<mode>3<sd_maskz_name>" |
17204 | [(set (match_operand:VI2_AVX512VL 0 "register_operand" "=v") | |
17205 | (unspec:VI2_AVX512VL | |
17206 | [(match_operand:VI2_AVX512VL 1 "register_operand" "v") | |
17207 | (match_operand:<sseintvecmode> 2 "register_operand" "0") | |
17208 | (match_operand:VI2_AVX512VL 3 "nonimmediate_operand" "vm")] | |
17209 | UNSPEC_VPERMI2))] | |
17210 | "TARGET_AVX512BW" | |
17211 | "vpermi2<ssemodesuffix>\t{%3, %1, %0<sd_mask_op4>|%0<sd_mask_op4>, %1, %3}" | |
17212 | [(set_attr "type" "sselog") | |
17213 | (set_attr "prefix" "evex") | |
17214 | (set_attr "mode" "<sseinsnmode>")]) | |
17215 | ||
17216 | (define_insn "<avx512>_vpermi2var<mode>3_mask" | |
17217 | [(set (match_operand:VI48F 0 "register_operand" "=v") | |
17218 | (vec_merge:VI48F | |
17219 | (unspec:VI48F | |
17220 | [(match_operand:VI48F 1 "register_operand" "v") | |
47490470 | 17221 | (match_operand:<sseintvecmode> 2 "register_operand" "0") |
c883e5fb | 17222 | (match_operand:VI48F 3 "nonimmediate_operand" "vm")] |
47490470 AI |
17223 | UNSPEC_VPERMI2_MASK) |
17224 | (match_dup 0) | |
be792bce | 17225 | (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")))] |
47490470 AI |
17226 | "TARGET_AVX512F" |
17227 | "vpermi2<ssemodesuffix>\t{%3, %1, %0%{%4%}|%0%{%4%}, %1, %3}" | |
17228 | [(set_attr "type" "sselog") | |
17229 | (set_attr "prefix" "evex") | |
17230 | (set_attr "mode" "<sseinsnmode>")]) | |
17231 | ||
3dcc8af5 IT |
17232 | (define_insn "<avx512>_vpermi2var<mode>3_mask" |
17233 | [(set (match_operand:VI1_AVX512VL 0 "register_operand" "=v") | |
17234 | (vec_merge:VI1_AVX512VL | |
17235 | (unspec:VI1_AVX512VL | |
17236 | [(match_operand:VI1_AVX512VL 1 "register_operand" "v") | |
17237 | (match_operand:<sseintvecmode> 2 "register_operand" "0") | |
17238 | (match_operand:VI1_AVX512VL 3 "nonimmediate_operand" "vm")] | |
17239 | UNSPEC_VPERMI2_MASK) | |
17240 | (match_dup 0) | |
17241 | (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")))] | |
17242 | "TARGET_AVX512VBMI" | |
17243 | "vpermi2<ssemodesuffix>\t{%3, %1, %0%{%4%}|%0%{%4%}, %1, %3}" | |
17244 | [(set_attr "type" "sselog") | |
17245 | (set_attr "prefix" "evex") | |
17246 | (set_attr "mode" "<sseinsnmode>")]) | |
17247 | ||
c883e5fb AI |
17248 | (define_insn "<avx512>_vpermi2var<mode>3_mask" |
17249 | [(set (match_operand:VI2_AVX512VL 0 "register_operand" "=v") | |
17250 | (vec_merge:VI2_AVX512VL | |
17251 | (unspec:VI2_AVX512VL | |
17252 | [(match_operand:VI2_AVX512VL 1 "register_operand" "v") | |
17253 | (match_operand:<sseintvecmode> 2 "register_operand" "0") | |
17254 | (match_operand:VI2_AVX512VL 3 "nonimmediate_operand" "vm")] | |
17255 | UNSPEC_VPERMI2_MASK) | |
17256 | (match_dup 0) | |
17257 | (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")))] | |
17258 | "TARGET_AVX512BW" | |
17259 | "vpermi2<ssemodesuffix>\t{%3, %1, %0%{%4%}|%0%{%4%}, %1, %3}" | |
17260 | [(set_attr "type" "sselog") | |
17261 | (set_attr "prefix" "evex") | |
17262 | (set_attr "mode" "<sseinsnmode>")]) | |
17263 | ||
17264 | (define_expand "<avx512>_vpermt2var<mode>3_maskz" | |
17265 | [(match_operand:VI48F 0 "register_operand" "=v") | |
8b08db1e | 17266 | (match_operand:<sseintvecmode> 1 "register_operand" "v") |
c883e5fb AI |
17267 | (match_operand:VI48F 2 "register_operand" "0") |
17268 | (match_operand:VI48F 3 "nonimmediate_operand" "vm") | |
be792bce | 17269 | (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")] |
8b08db1e AI |
17270 | "TARGET_AVX512F" |
17271 | { | |
c883e5fb | 17272 | emit_insn (gen_<avx512>_vpermt2var<mode>3_maskz_1 ( |
8b08db1e AI |
17273 | operands[0], operands[1], operands[2], operands[3], |
17274 | CONST0_RTX (<MODE>mode), operands[4])); | |
17275 | DONE; | |
17276 | }) | |
17277 | ||
3dcc8af5 IT |
17278 | (define_expand "<avx512>_vpermt2var<mode>3_maskz" |
17279 | [(match_operand:VI1_AVX512VL 0 "register_operand" "=v") | |
17280 | (match_operand:<sseintvecmode> 1 "register_operand" "v") | |
17281 | (match_operand:VI1_AVX512VL 2 "register_operand" "0") | |
17282 | (match_operand:VI1_AVX512VL 3 "nonimmediate_operand" "vm") | |
17283 | (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")] | |
17284 | "TARGET_AVX512VBMI" | |
17285 | { | |
17286 | emit_insn (gen_<avx512>_vpermt2var<mode>3_maskz_1 ( | |
17287 | operands[0], operands[1], operands[2], operands[3], | |
17288 | CONST0_RTX (<MODE>mode), operands[4])); | |
17289 | DONE; | |
17290 | }) | |
17291 | ||
c883e5fb AI |
17292 | (define_expand "<avx512>_vpermt2var<mode>3_maskz" |
17293 | [(match_operand:VI2_AVX512VL 0 "register_operand" "=v") | |
17294 | (match_operand:<sseintvecmode> 1 "register_operand" "v") | |
17295 | (match_operand:VI2_AVX512VL 2 "register_operand" "0") | |
17296 | (match_operand:VI2_AVX512VL 3 "nonimmediate_operand" "vm") | |
17297 | (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")] | |
17298 | "TARGET_AVX512BW" | |
17299 | { | |
17300 | emit_insn (gen_<avx512>_vpermt2var<mode>3_maskz_1 ( | |
17301 | operands[0], operands[1], operands[2], operands[3], | |
17302 | CONST0_RTX (<MODE>mode), operands[4])); | |
17303 | DONE; | |
17304 | }) | |
17305 | ||
17306 | (define_insn "<avx512>_vpermt2var<mode>3<sd_maskz_name>" | |
17307 | [(set (match_operand:VI48F 0 "register_operand" "=v") | |
17308 | (unspec:VI48F | |
ab931c71 | 17309 | [(match_operand:<sseintvecmode> 1 "register_operand" "v") |
c883e5fb AI |
17310 | (match_operand:VI48F 2 "register_operand" "0") |
17311 | (match_operand:VI48F 3 "nonimmediate_operand" "vm")] | |
ab931c71 AI |
17312 | UNSPEC_VPERMT2))] |
17313 | "TARGET_AVX512F" | |
8b08db1e | 17314 | "vpermt2<ssemodesuffix>\t{%3, %1, %0<sd_mask_op4>|%0<sd_mask_op4>, %1, %3}" |
ab931c71 AI |
17315 | [(set_attr "type" "sselog") |
17316 | (set_attr "prefix" "evex") | |
17317 | (set_attr "mode" "<sseinsnmode>")]) | |
95879c72 | 17318 | |
3dcc8af5 IT |
17319 | (define_insn "<avx512>_vpermt2var<mode>3<sd_maskz_name>" |
17320 | [(set (match_operand:VI1_AVX512VL 0 "register_operand" "=v") | |
17321 | (unspec:VI1_AVX512VL | |
17322 | [(match_operand:<sseintvecmode> 1 "register_operand" "v") | |
17323 | (match_operand:VI1_AVX512VL 2 "register_operand" "0") | |
17324 | (match_operand:VI1_AVX512VL 3 "nonimmediate_operand" "vm")] | |
17325 | UNSPEC_VPERMT2))] | |
17326 | "TARGET_AVX512VBMI" | |
17327 | "vpermt2<ssemodesuffix>\t{%3, %1, %0<sd_mask_op4>|%0<sd_mask_op4>, %1, %3}" | |
17328 | [(set_attr "type" "sselog") | |
17329 | (set_attr "prefix" "evex") | |
17330 | (set_attr "mode" "<sseinsnmode>")]) | |
17331 | ||
c883e5fb AI |
17332 | (define_insn "<avx512>_vpermt2var<mode>3<sd_maskz_name>" |
17333 | [(set (match_operand:VI2_AVX512VL 0 "register_operand" "=v") | |
17334 | (unspec:VI2_AVX512VL | |
17335 | [(match_operand:<sseintvecmode> 1 "register_operand" "v") | |
17336 | (match_operand:VI2_AVX512VL 2 "register_operand" "0") | |
17337 | (match_operand:VI2_AVX512VL 3 "nonimmediate_operand" "vm")] | |
17338 | UNSPEC_VPERMT2))] | |
17339 | "TARGET_AVX512BW" | |
17340 | "vpermt2<ssemodesuffix>\t{%3, %1, %0<sd_mask_op4>|%0<sd_mask_op4>, %1, %3}" | |
17341 | [(set_attr "type" "sselog") | |
17342 | (set_attr "prefix" "evex") | |
17343 | (set_attr "mode" "<sseinsnmode>")]) | |
17344 | ||
17345 | (define_insn "<avx512>_vpermt2var<mode>3_mask" | |
17346 | [(set (match_operand:VI48F 0 "register_operand" "=v") | |
17347 | (vec_merge:VI48F | |
17348 | (unspec:VI48F | |
47490470 | 17349 | [(match_operand:<sseintvecmode> 1 "register_operand" "v") |
c883e5fb AI |
17350 | (match_operand:VI48F 2 "register_operand" "0") |
17351 | (match_operand:VI48F 3 "nonimmediate_operand" "vm")] | |
47490470 AI |
17352 | UNSPEC_VPERMT2) |
17353 | (match_dup 2) | |
be792bce | 17354 | (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")))] |
47490470 AI |
17355 | "TARGET_AVX512F" |
17356 | "vpermt2<ssemodesuffix>\t{%3, %1, %0%{%4%}|%0%{%4%}, %1, %3}" | |
17357 | [(set_attr "type" "sselog") | |
17358 | (set_attr "prefix" "evex") | |
17359 | (set_attr "mode" "<sseinsnmode>")]) | |
17360 | ||
3dcc8af5 IT |
17361 | (define_insn "<avx512>_vpermt2var<mode>3_mask" |
17362 | [(set (match_operand:VI1_AVX512VL 0 "register_operand" "=v") | |
17363 | (vec_merge:VI1_AVX512VL | |
17364 | (unspec:VI1_AVX512VL | |
17365 | [(match_operand:<sseintvecmode> 1 "register_operand" "v") | |
17366 | (match_operand:VI1_AVX512VL 2 "register_operand" "0") | |
17367 | (match_operand:VI1_AVX512VL 3 "nonimmediate_operand" "vm")] | |
17368 | UNSPEC_VPERMT2) | |
17369 | (match_dup 2) | |
17370 | (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")))] | |
17371 | "TARGET_AVX512VBMI" | |
17372 | "vpermt2<ssemodesuffix>\t{%3, %1, %0%{%4%}|%0%{%4%}, %1, %3}" | |
17373 | [(set_attr "type" "sselog") | |
17374 | (set_attr "prefix" "evex") | |
17375 | (set_attr "mode" "<sseinsnmode>")]) | |
17376 | ||
c883e5fb AI |
17377 | (define_insn "<avx512>_vpermt2var<mode>3_mask" |
17378 | [(set (match_operand:VI2_AVX512VL 0 "register_operand" "=v") | |
17379 | (vec_merge:VI2_AVX512VL | |
17380 | (unspec:VI2_AVX512VL | |
17381 | [(match_operand:<sseintvecmode> 1 "register_operand" "v") | |
17382 | (match_operand:VI2_AVX512VL 2 "register_operand" "0") | |
17383 | (match_operand:VI2_AVX512VL 3 "nonimmediate_operand" "vm")] | |
17384 | UNSPEC_VPERMT2) | |
17385 | (match_dup 2) | |
17386 | (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")))] | |
17387 | "TARGET_AVX512BW" | |
17388 | "vpermt2<ssemodesuffix>\t{%3, %1, %0%{%4%}|%0%{%4%}, %1, %3}" | |
17389 | [(set_attr "type" "sselog") | |
17390 | (set_attr "prefix" "evex") | |
17391 | (set_attr "mode" "<sseinsnmode>")]) | |
17392 | ||
ca659f6e | 17393 | (define_expand "avx_vperm2f128<mode>3" |
82e86dc6 | 17394 | [(set (match_operand:AVX256MODE2P 0 "register_operand") |
ca659f6e | 17395 | (unspec:AVX256MODE2P |
82e86dc6 UB |
17396 | [(match_operand:AVX256MODE2P 1 "register_operand") |
17397 | (match_operand:AVX256MODE2P 2 "nonimmediate_operand") | |
17398 | (match_operand:SI 3 "const_0_to_255_operand")] | |
ca659f6e RH |
17399 | UNSPEC_VPERMIL2F128))] |
17400 | "TARGET_AVX" | |
17401 | { | |
a1b5171b | 17402 | int mask = INTVAL (operands[3]); |
ca659f6e RH |
17403 | if ((mask & 0x88) == 0) |
17404 | { | |
17405 | rtx perm[<ssescalarnum>], t1, t2; | |
17406 | int i, base, nelt = <ssescalarnum>, nelt2 = nelt / 2; | |
17407 | ||
17408 | base = (mask & 3) * nelt2; | |
17409 | for (i = 0; i < nelt2; ++i) | |
17410 | perm[i] = GEN_INT (base + i); | |
17411 | ||
17412 | base = ((mask >> 4) & 3) * nelt2; | |
17413 | for (i = 0; i < nelt2; ++i) | |
17414 | perm[i + nelt2] = GEN_INT (base + i); | |
17415 | ||
cbb734aa | 17416 | t2 = gen_rtx_VEC_CONCAT (<ssedoublevecmode>mode, |
ca659f6e RH |
17417 | operands[1], operands[2]); |
17418 | t1 = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (nelt, perm)); | |
17419 | t2 = gen_rtx_VEC_SELECT (<MODE>mode, t2, t1); | |
f7df4a84 | 17420 | t2 = gen_rtx_SET (operands[0], t2); |
ca659f6e RH |
17421 | emit_insn (t2); |
17422 | DONE; | |
17423 | } | |
17424 | }) | |
17425 | ||
17426 | ;; Note that bits 7 and 3 of the imm8 allow lanes to be zeroed, which | |
17427 | ;; means that in order to represent this properly in rtl we'd have to | |
17428 | ;; nest *another* vec_concat with a zero operand and do the select from | |
17429 | ;; a 4x wide vector. That doesn't seem very nice. | |
17430 | (define_insn "*avx_vperm2f128<mode>_full" | |
95879c72 L |
17431 | [(set (match_operand:AVX256MODE2P 0 "register_operand" "=x") |
17432 | (unspec:AVX256MODE2P | |
17433 | [(match_operand:AVX256MODE2P 1 "register_operand" "x") | |
17434 | (match_operand:AVX256MODE2P 2 "nonimmediate_operand" "xm") | |
17435 | (match_operand:SI 3 "const_0_to_255_operand" "n")] | |
17436 | UNSPEC_VPERMIL2F128))] | |
17437 | "TARGET_AVX" | |
1db4406e | 17438 | "vperm2<i128>\t{%3, %2, %1, %0|%0, %1, %2, %3}" |
95879c72 | 17439 | [(set_attr "type" "sselog") |
725fd454 JJ |
17440 | (set_attr "prefix_extra" "1") |
17441 | (set_attr "length_immediate" "1") | |
95879c72 | 17442 | (set_attr "prefix" "vex") |
1db4406e | 17443 | (set_attr "mode" "<sseinsnmode>")]) |
95879c72 | 17444 | |
ca659f6e RH |
17445 | (define_insn "*avx_vperm2f128<mode>_nozero" |
17446 | [(set (match_operand:AVX256MODE2P 0 "register_operand" "=x") | |
17447 | (vec_select:AVX256MODE2P | |
cbb734aa | 17448 | (vec_concat:<ssedoublevecmode> |
ca659f6e RH |
17449 | (match_operand:AVX256MODE2P 1 "register_operand" "x") |
17450 | (match_operand:AVX256MODE2P 2 "nonimmediate_operand" "xm")) | |
200eb7d2 | 17451 | (match_parallel 3 "" |
82e86dc6 | 17452 | [(match_operand 4 "const_int_operand")])))] |
200eb7d2 UB |
17453 | "TARGET_AVX |
17454 | && avx_vperm2f128_parallel (operands[3], <MODE>mode)" | |
ca659f6e RH |
17455 | { |
17456 | int mask = avx_vperm2f128_parallel (operands[3], <MODE>mode) - 1; | |
5d54daac JJ |
17457 | if (mask == 0x12) |
17458 | return "vinsert<i128>\t{$0, %x2, %1, %0|%0, %1, %x2, 0}"; | |
17459 | if (mask == 0x20) | |
17460 | return "vinsert<i128>\t{$1, %x2, %1, %0|%0, %1, %x2, 1}"; | |
ca659f6e | 17461 | operands[3] = GEN_INT (mask); |
1db4406e | 17462 | return "vperm2<i128>\t{%3, %2, %1, %0|%0, %1, %2, %3}"; |
ca659f6e RH |
17463 | } |
17464 | [(set_attr "type" "sselog") | |
17465 | (set_attr "prefix_extra" "1") | |
17466 | (set_attr "length_immediate" "1") | |
17467 | (set_attr "prefix" "vex") | |
1db4406e | 17468 | (set_attr "mode" "<sseinsnmode>")]) |
ca659f6e | 17469 | |
edbb0749 ES |
17470 | (define_insn "*ssse3_palignr<mode>_perm" |
17471 | [(set (match_operand:V_128 0 "register_operand" "=x,x") | |
17472 | (vec_select:V_128 | |
17473 | (match_operand:V_128 1 "register_operand" "0,x") | |
17474 | (match_parallel 2 "palignr_operand" | |
17475 | [(match_operand 3 "const_int_operand" "n, n")])))] | |
17476 | "TARGET_SSSE3" | |
17477 | { | |
ef4bddc2 | 17478 | machine_mode imode = GET_MODE_INNER (GET_MODE (operands[0])); |
edbb0749 ES |
17479 | operands[2] = GEN_INT (INTVAL (operands[3]) * GET_MODE_SIZE (imode)); |
17480 | ||
17481 | switch (which_alternative) | |
17482 | { | |
17483 | case 0: | |
17484 | return "palignr\t{%2, %1, %0|%0, %1, %2}"; | |
17485 | case 1: | |
17486 | return "vpalignr\t{%2, %1, %1, %0|%0, %1, %1, %2}"; | |
17487 | default: | |
17488 | gcc_unreachable (); | |
17489 | } | |
17490 | } | |
17491 | [(set_attr "isa" "noavx,avx") | |
17492 | (set_attr "type" "sseishft") | |
17493 | (set_attr "atom_unit" "sishuf") | |
17494 | (set_attr "prefix_data16" "1,*") | |
17495 | (set_attr "prefix_extra" "1") | |
17496 | (set_attr "length_immediate" "1") | |
17497 | (set_attr "prefix" "orig,vex")]) | |
17498 | ||
d0337ddc AI |
17499 | (define_expand "avx512vl_vinsert<mode>" |
17500 | [(match_operand:VI48F_256 0 "register_operand") | |
17501 | (match_operand:VI48F_256 1 "register_operand") | |
17502 | (match_operand:<ssehalfvecmode> 2 "nonimmediate_operand") | |
17503 | (match_operand:SI 3 "const_0_to_1_operand") | |
17504 | (match_operand:VI48F_256 4 "register_operand") | |
17505 | (match_operand:<avx512fmaskmode> 5 "register_operand")] | |
17506 | "TARGET_AVX512VL" | |
17507 | { | |
17508 | rtx (*insn)(rtx, rtx, rtx, rtx, rtx); | |
17509 | ||
17510 | switch (INTVAL (operands[3])) | |
17511 | { | |
17512 | case 0: | |
17513 | insn = gen_vec_set_lo_<mode>_mask; | |
17514 | break; | |
17515 | case 1: | |
17516 | insn = gen_vec_set_hi_<mode>_mask; | |
17517 | break; | |
17518 | default: | |
17519 | gcc_unreachable (); | |
17520 | } | |
17521 | ||
17522 | emit_insn (insn (operands[0], operands[1], operands[2], operands[4], | |
17523 | operands[5])); | |
17524 | DONE; | |
17525 | }) | |
17526 | ||
95879c72 | 17527 | (define_expand "avx_vinsertf128<mode>" |
82e86dc6 UB |
17528 | [(match_operand:V_256 0 "register_operand") |
17529 | (match_operand:V_256 1 "register_operand") | |
17530 | (match_operand:<ssehalfvecmode> 2 "nonimmediate_operand") | |
17531 | (match_operand:SI 3 "const_0_to_1_operand")] | |
95879c72 L |
17532 | "TARGET_AVX" |
17533 | { | |
16cc4440 UB |
17534 | rtx (*insn)(rtx, rtx, rtx); |
17535 | ||
95879c72 L |
17536 | switch (INTVAL (operands[3])) |
17537 | { | |
17538 | case 0: | |
16cc4440 | 17539 | insn = gen_vec_set_lo_<mode>; |
95879c72 L |
17540 | break; |
17541 | case 1: | |
16cc4440 | 17542 | insn = gen_vec_set_hi_<mode>; |
95879c72 L |
17543 | break; |
17544 | default: | |
17545 | gcc_unreachable (); | |
17546 | } | |
16cc4440 UB |
17547 | |
17548 | emit_insn (insn (operands[0], operands[1], operands[2])); | |
95879c72 L |
17549 | DONE; |
17550 | }) | |
17551 | ||
d0337ddc AI |
17552 | (define_insn "vec_set_lo_<mode><mask_name>" |
17553 | [(set (match_operand:VI8F_256 0 "register_operand" "=v") | |
6bec6c98 | 17554 | (vec_concat:VI8F_256 |
d0337ddc | 17555 | (match_operand:<ssehalfvecmode> 2 "nonimmediate_operand" "vm") |
cbb734aa | 17556 | (vec_select:<ssehalfvecmode> |
d0337ddc | 17557 | (match_operand:VI8F_256 1 "register_operand" "v") |
95879c72 L |
17558 | (parallel [(const_int 2) (const_int 3)]))))] |
17559 | "TARGET_AVX" | |
d0337ddc AI |
17560 | { |
17561 | if (TARGET_AVX512VL) | |
17562 | return "vinsert<shuffletype>64x2\t{$0x0, %2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2, 0x0}"; | |
17563 | else | |
17564 | return "vinsert<i128>\t{$0x0, %2, %1, %0|%0, %1, %2, 0x0}"; | |
17565 | } | |
95879c72 | 17566 | [(set_attr "type" "sselog") |
725fd454 JJ |
17567 | (set_attr "prefix_extra" "1") |
17568 | (set_attr "length_immediate" "1") | |
95879c72 | 17569 | (set_attr "prefix" "vex") |
1db4406e | 17570 | (set_attr "mode" "<sseinsnmode>")]) |
95879c72 | 17571 | |
d0337ddc AI |
17572 | (define_insn "vec_set_hi_<mode><mask_name>" |
17573 | [(set (match_operand:VI8F_256 0 "register_operand" "=v") | |
6bec6c98 | 17574 | (vec_concat:VI8F_256 |
cbb734aa | 17575 | (vec_select:<ssehalfvecmode> |
d0337ddc | 17576 | (match_operand:VI8F_256 1 "register_operand" "v") |
95879c72 | 17577 | (parallel [(const_int 0) (const_int 1)])) |
d0337ddc | 17578 | (match_operand:<ssehalfvecmode> 2 "nonimmediate_operand" "vm")))] |
95879c72 | 17579 | "TARGET_AVX" |
d0337ddc AI |
17580 | { |
17581 | if (TARGET_AVX512VL) | |
17582 | return "vinsert<shuffletype>64x2\t{$0x1, %2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2, 0x1}"; | |
17583 | else | |
17584 | return "vinsert<i128>\t{$0x1, %2, %1, %0|%0, %1, %2, 0x1}"; | |
17585 | } | |
95879c72 | 17586 | [(set_attr "type" "sselog") |
725fd454 JJ |
17587 | (set_attr "prefix_extra" "1") |
17588 | (set_attr "length_immediate" "1") | |
95879c72 | 17589 | (set_attr "prefix" "vex") |
1db4406e | 17590 | (set_attr "mode" "<sseinsnmode>")]) |
95879c72 | 17591 | |
d0337ddc AI |
17592 | (define_insn "vec_set_lo_<mode><mask_name>" |
17593 | [(set (match_operand:VI4F_256 0 "register_operand" "=v") | |
6bec6c98 | 17594 | (vec_concat:VI4F_256 |
d0337ddc | 17595 | (match_operand:<ssehalfvecmode> 2 "nonimmediate_operand" "vm") |
cbb734aa | 17596 | (vec_select:<ssehalfvecmode> |
d0337ddc | 17597 | (match_operand:VI4F_256 1 "register_operand" "v") |
95879c72 L |
17598 | (parallel [(const_int 4) (const_int 5) |
17599 | (const_int 6) (const_int 7)]))))] | |
17600 | "TARGET_AVX" | |
d0337ddc AI |
17601 | { |
17602 | if (TARGET_AVX512VL) | |
17603 | return "vinsert<shuffletype>32x4\t{$0x0, %2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2, 0x0}"; | |
17604 | else | |
17605 | return "vinsert<i128>\t{$0x0, %2, %1, %0|%0, %1, %2, 0x0}"; | |
17606 | } | |
95879c72 | 17607 | [(set_attr "type" "sselog") |
725fd454 JJ |
17608 | (set_attr "prefix_extra" "1") |
17609 | (set_attr "length_immediate" "1") | |
95879c72 | 17610 | (set_attr "prefix" "vex") |
1db4406e | 17611 | (set_attr "mode" "<sseinsnmode>")]) |
95879c72 | 17612 | |
d0337ddc AI |
17613 | (define_insn "vec_set_hi_<mode><mask_name>" |
17614 | [(set (match_operand:VI4F_256 0 "register_operand" "=v") | |
6bec6c98 | 17615 | (vec_concat:VI4F_256 |
cbb734aa | 17616 | (vec_select:<ssehalfvecmode> |
d0337ddc | 17617 | (match_operand:VI4F_256 1 "register_operand" "v") |
95879c72 L |
17618 | (parallel [(const_int 0) (const_int 1) |
17619 | (const_int 2) (const_int 3)])) | |
d0337ddc | 17620 | (match_operand:<ssehalfvecmode> 2 "nonimmediate_operand" "vm")))] |
95879c72 | 17621 | "TARGET_AVX" |
d0337ddc AI |
17622 | { |
17623 | if (TARGET_AVX512VL) | |
17624 | return "vinsert<shuffletype>32x4\t{$0x1, %2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2, 0x1}"; | |
17625 | else | |
17626 | return "vinsert<i128>\t{$0x1, %2, %1, %0|%0, %1, %2, 0x1}"; | |
17627 | } | |
95879c72 | 17628 | [(set_attr "type" "sselog") |
725fd454 JJ |
17629 | (set_attr "prefix_extra" "1") |
17630 | (set_attr "length_immediate" "1") | |
95879c72 | 17631 | (set_attr "prefix" "vex") |
1db4406e | 17632 | (set_attr "mode" "<sseinsnmode>")]) |
95879c72 L |
17633 | |
17634 | (define_insn "vec_set_lo_v16hi" | |
17635 | [(set (match_operand:V16HI 0 "register_operand" "=x") | |
17636 | (vec_concat:V16HI | |
17637 | (match_operand:V8HI 2 "nonimmediate_operand" "xm") | |
17638 | (vec_select:V8HI | |
17639 | (match_operand:V16HI 1 "register_operand" "x") | |
17640 | (parallel [(const_int 8) (const_int 9) | |
17641 | (const_int 10) (const_int 11) | |
17642 | (const_int 12) (const_int 13) | |
17643 | (const_int 14) (const_int 15)]))))] | |
17644 | "TARGET_AVX" | |
1db4406e | 17645 | "vinsert%~128\t{$0x0, %2, %1, %0|%0, %1, %2, 0x0}" |
95879c72 | 17646 | [(set_attr "type" "sselog") |
725fd454 JJ |
17647 | (set_attr "prefix_extra" "1") |
17648 | (set_attr "length_immediate" "1") | |
95879c72 | 17649 | (set_attr "prefix" "vex") |
1db4406e | 17650 | (set_attr "mode" "OI")]) |
95879c72 L |
17651 | |
17652 | (define_insn "vec_set_hi_v16hi" | |
17653 | [(set (match_operand:V16HI 0 "register_operand" "=x") | |
17654 | (vec_concat:V16HI | |
17655 | (vec_select:V8HI | |
17656 | (match_operand:V16HI 1 "register_operand" "x") | |
17657 | (parallel [(const_int 0) (const_int 1) | |
17658 | (const_int 2) (const_int 3) | |
17659 | (const_int 4) (const_int 5) | |
17660 | (const_int 6) (const_int 7)])) | |
17661 | (match_operand:V8HI 2 "nonimmediate_operand" "xm")))] | |
17662 | "TARGET_AVX" | |
1db4406e | 17663 | "vinsert%~128\t{$0x1, %2, %1, %0|%0, %1, %2, 0x1}" |
95879c72 | 17664 | [(set_attr "type" "sselog") |
725fd454 JJ |
17665 | (set_attr "prefix_extra" "1") |
17666 | (set_attr "length_immediate" "1") | |
95879c72 | 17667 | (set_attr "prefix" "vex") |
1db4406e | 17668 | (set_attr "mode" "OI")]) |
95879c72 L |
17669 | |
17670 | (define_insn "vec_set_lo_v32qi" | |
17671 | [(set (match_operand:V32QI 0 "register_operand" "=x") | |
17672 | (vec_concat:V32QI | |
17673 | (match_operand:V16QI 2 "nonimmediate_operand" "xm") | |
17674 | (vec_select:V16QI | |
17675 | (match_operand:V32QI 1 "register_operand" "x") | |
17676 | (parallel [(const_int 16) (const_int 17) | |
17677 | (const_int 18) (const_int 19) | |
17678 | (const_int 20) (const_int 21) | |
17679 | (const_int 22) (const_int 23) | |
17680 | (const_int 24) (const_int 25) | |
17681 | (const_int 26) (const_int 27) | |
17682 | (const_int 28) (const_int 29) | |
17683 | (const_int 30) (const_int 31)]))))] | |
17684 | "TARGET_AVX" | |
1db4406e | 17685 | "vinsert%~128\t{$0x0, %2, %1, %0|%0, %1, %2, 0x0}" |
95879c72 | 17686 | [(set_attr "type" "sselog") |
725fd454 JJ |
17687 | (set_attr "prefix_extra" "1") |
17688 | (set_attr "length_immediate" "1") | |
95879c72 | 17689 | (set_attr "prefix" "vex") |
1db4406e | 17690 | (set_attr "mode" "OI")]) |
95879c72 L |
17691 | |
17692 | (define_insn "vec_set_hi_v32qi" | |
17693 | [(set (match_operand:V32QI 0 "register_operand" "=x") | |
17694 | (vec_concat:V32QI | |
17695 | (vec_select:V16QI | |
17696 | (match_operand:V32QI 1 "register_operand" "x") | |
17697 | (parallel [(const_int 0) (const_int 1) | |
17698 | (const_int 2) (const_int 3) | |
17699 | (const_int 4) (const_int 5) | |
17700 | (const_int 6) (const_int 7) | |
17701 | (const_int 8) (const_int 9) | |
17702 | (const_int 10) (const_int 11) | |
17703 | (const_int 12) (const_int 13) | |
17704 | (const_int 14) (const_int 15)])) | |
17705 | (match_operand:V16QI 2 "nonimmediate_operand" "xm")))] | |
17706 | "TARGET_AVX" | |
1db4406e | 17707 | "vinsert%~128\t{$0x1, %2, %1, %0|%0, %1, %2, 0x1}" |
95879c72 | 17708 | [(set_attr "type" "sselog") |
725fd454 JJ |
17709 | (set_attr "prefix_extra" "1") |
17710 | (set_attr "length_immediate" "1") | |
95879c72 | 17711 | (set_attr "prefix" "vex") |
1db4406e | 17712 | (set_attr "mode" "OI")]) |
95879c72 | 17713 | |
7b45b87f UB |
17714 | (define_insn "<avx_avx2>_maskload<ssemodesuffix><avxsizesuffix>" |
17715 | [(set (match_operand:V48_AVX2 0 "register_operand" "=x") | |
977e83a3 | 17716 | (unspec:V48_AVX2 |
7b45b87f UB |
17717 | [(match_operand:<sseintvecmode> 2 "register_operand" "x") |
17718 | (match_operand:V48_AVX2 1 "memory_operand" "m")] | |
fe646a69 UB |
17719 | UNSPEC_MASKMOV))] |
17720 | "TARGET_AVX" | |
7b45b87f | 17721 | "v<sseintprefix>maskmov<ssemodesuffix>\t{%1, %2, %0|%0, %2, %1}" |
977e83a3 KY |
17722 | [(set_attr "type" "sselog1") |
17723 | (set_attr "prefix_extra" "1") | |
17724 | (set_attr "prefix" "vex") | |
01284895 | 17725 | (set_attr "btver2_decode" "vector") |
977e83a3 KY |
17726 | (set_attr "mode" "<sseinsnmode>")]) |
17727 | ||
7b45b87f | 17728 | (define_insn "<avx_avx2>_maskstore<ssemodesuffix><avxsizesuffix>" |
e4ecb922 | 17729 | [(set (match_operand:V48_AVX2 0 "memory_operand" "+m") |
7b45b87f | 17730 | (unspec:V48_AVX2 |
fe646a69 | 17731 | [(match_operand:<sseintvecmode> 1 "register_operand" "x") |
7b45b87f | 17732 | (match_operand:V48_AVX2 2 "register_operand" "x") |
f60c2554 UB |
17733 | (match_dup 0)] |
17734 | UNSPEC_MASKMOV))] | |
fe646a69 | 17735 | "TARGET_AVX" |
7b45b87f | 17736 | "v<sseintprefix>maskmov<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}" |
95879c72 | 17737 | [(set_attr "type" "sselog1") |
725fd454 | 17738 | (set_attr "prefix_extra" "1") |
95879c72 | 17739 | (set_attr "prefix" "vex") |
01284895 | 17740 | (set_attr "btver2_decode" "vector") |
7b45b87f | 17741 | (set_attr "mode" "<sseinsnmode>")]) |
95879c72 | 17742 | |
5ce9450f JJ |
17743 | (define_expand "maskload<mode>" |
17744 | [(set (match_operand:V48_AVX2 0 "register_operand") | |
17745 | (unspec:V48_AVX2 | |
17746 | [(match_operand:<sseintvecmode> 2 "register_operand") | |
17747 | (match_operand:V48_AVX2 1 "memory_operand")] | |
17748 | UNSPEC_MASKMOV))] | |
17749 | "TARGET_AVX") | |
17750 | ||
17751 | (define_expand "maskstore<mode>" | |
17752 | [(set (match_operand:V48_AVX2 0 "memory_operand") | |
17753 | (unspec:V48_AVX2 | |
17754 | [(match_operand:<sseintvecmode> 2 "register_operand") | |
17755 | (match_operand:V48_AVX2 1 "register_operand") | |
17756 | (match_dup 0)] | |
17757 | UNSPEC_MASKMOV))] | |
17758 | "TARGET_AVX") | |
17759 | ||
cd7c6bc5 | 17760 | (define_insn_and_split "avx_<castmode><avxsizesuffix>_<castmode>" |
9b2133cd | 17761 | [(set (match_operand:AVX256MODE2P 0 "nonimmediate_operand" "=x,m") |
95879c72 | 17762 | (unspec:AVX256MODE2P |
cbb734aa | 17763 | [(match_operand:<ssehalfvecmode> 1 "nonimmediate_operand" "xm,x")] |
95879c72 L |
17764 | UNSPEC_CAST))] |
17765 | "TARGET_AVX" | |
9b2133cd L |
17766 | "#" |
17767 | "&& reload_completed" | |
17768 | [(const_int 0)] | |
95879c72 | 17769 | { |
31f9eb59 | 17770 | rtx op0 = operands[0]; |
9b2133cd | 17771 | rtx op1 = operands[1]; |
31f9eb59 L |
17772 | if (REG_P (op0)) |
17773 | op0 = gen_rtx_REG (<ssehalfvecmode>mode, REGNO (op0)); | |
6cf9eb27 | 17774 | else |
9b2133cd | 17775 | op1 = gen_rtx_REG (<MODE>mode, REGNO (op1)); |
31f9eb59 | 17776 | emit_move_insn (op0, op1); |
9b2133cd L |
17777 | DONE; |
17778 | }) | |
95879c72 L |
17779 | |
17780 | (define_expand "vec_init<mode>" | |
82e86dc6 UB |
17781 | [(match_operand:V_256 0 "register_operand") |
17782 | (match_operand 1)] | |
95879c72 L |
17783 | "TARGET_AVX" |
17784 | { | |
17785 | ix86_expand_vector_init (false, operands[0], operands[1]); | |
17786 | DONE; | |
17787 | }) | |
17788 | ||
ab931c71 | 17789 | (define_expand "vec_init<mode>" |
bf584ca0 | 17790 | [(match_operand:VF48_I1248 0 "register_operand") |
ab931c71 AI |
17791 | (match_operand 1)] |
17792 | "TARGET_AVX512F" | |
17793 | { | |
17794 | ix86_expand_vector_init (false, operands[0], operands[1]); | |
17795 | DONE; | |
17796 | }) | |
17797 | ||
cf92ae7f | 17798 | (define_insn "<avx2_avx512>_ashrv<mode><mask_name>" |
21c924ac AI |
17799 | [(set (match_operand:VI48_AVX512F_AVX512VL 0 "register_operand" "=v") |
17800 | (ashiftrt:VI48_AVX512F_AVX512VL | |
17801 | (match_operand:VI48_AVX512F_AVX512VL 1 "register_operand" "v") | |
17802 | (match_operand:VI48_AVX512F_AVX512VL 2 "nonimmediate_operand" "vm")))] | |
47490470 AI |
17803 | "TARGET_AVX2 && <mask_mode512bit_condition>" |
17804 | "vpsrav<ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}" | |
977e83a3 | 17805 | [(set_attr "type" "sseishft") |
5348cff8 | 17806 | (set_attr "prefix" "maybe_evex") |
ee3b466d | 17807 | (set_attr "mode" "<sseinsnmode>")]) |
977e83a3 | 17808 | |
cf92ae7f | 17809 | (define_insn "<avx2_avx512>_ashrv<mode><mask_name>" |
21c924ac AI |
17810 | [(set (match_operand:VI2_AVX512VL 0 "register_operand" "=v") |
17811 | (ashiftrt:VI2_AVX512VL | |
17812 | (match_operand:VI2_AVX512VL 1 "register_operand" "v") | |
17813 | (match_operand:VI2_AVX512VL 2 "nonimmediate_operand" "vm")))] | |
17814 | "TARGET_AVX512BW" | |
17815 | "vpsravw\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}" | |
17816 | [(set_attr "type" "sseishft") | |
17817 | (set_attr "prefix" "maybe_evex") | |
17818 | (set_attr "mode" "<sseinsnmode>")]) | |
17819 | ||
cf92ae7f | 17820 | (define_insn "<avx2_avx512>_<shift_insn>v<mode><mask_name>" |
38f4b550 AI |
17821 | [(set (match_operand:VI48_AVX512F 0 "register_operand" "=v") |
17822 | (any_lshift:VI48_AVX512F | |
17823 | (match_operand:VI48_AVX512F 1 "register_operand" "v") | |
17824 | (match_operand:VI48_AVX512F 2 "nonimmediate_operand" "vm")))] | |
47490470 AI |
17825 | "TARGET_AVX2 && <mask_mode512bit_condition>" |
17826 | "vp<vshift>v<ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}" | |
977e83a3 | 17827 | [(set_attr "type" "sseishft") |
5348cff8 | 17828 | (set_attr "prefix" "maybe_evex") |
977e83a3 | 17829 | (set_attr "mode" "<sseinsnmode>")]) |
38f4b550 | 17830 | |
cf92ae7f | 17831 | (define_insn "<avx2_avx512>_<shift_insn>v<mode><mask_name>" |
38f4b550 AI |
17832 | [(set (match_operand:VI2_AVX512VL 0 "register_operand" "=v") |
17833 | (any_lshift:VI2_AVX512VL | |
17834 | (match_operand:VI2_AVX512VL 1 "register_operand" "v") | |
17835 | (match_operand:VI2_AVX512VL 2 "nonimmediate_operand" "vm")))] | |
17836 | "TARGET_AVX512BW" | |
17837 | "vp<vshift>v<ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}" | |
17838 | [(set_attr "type" "sseishft") | |
17839 | (set_attr "prefix" "maybe_evex") | |
17840 | (set_attr "mode" "<sseinsnmode>")]) | |
977e83a3 | 17841 | |
44167383 | 17842 | (define_insn "avx_vec_concat<mode>" |
ec5e777c AI |
17843 | [(set (match_operand:V_256_512 0 "register_operand" "=x,x") |
17844 | (vec_concat:V_256_512 | |
cbb734aa UB |
17845 | (match_operand:<ssehalfvecmode> 1 "register_operand" "x,x") |
17846 | (match_operand:<ssehalfvecmode> 2 "vector_move_operand" "xm,C")))] | |
95879c72 L |
17847 | "TARGET_AVX" |
17848 | { | |
17849 | switch (which_alternative) | |
17850 | { | |
17851 | case 0: | |
ec5e777c | 17852 | return "vinsert<i128>\t{$0x1, %2, %<concat_tg_mode>1, %0|%0, %<concat_tg_mode>1, %2, 0x1}"; |
95879c72 L |
17853 | case 1: |
17854 | switch (get_attr_mode (insn)) | |
977e83a3 | 17855 | { |
ec5e777c AI |
17856 | case MODE_V16SF: |
17857 | return "vmovaps\t{%1, %t0|%t0, %1}"; | |
17858 | case MODE_V8DF: | |
17859 | return "vmovapd\t{%1, %t0|%t0, %1}"; | |
95879c72 L |
17860 | case MODE_V8SF: |
17861 | return "vmovaps\t{%1, %x0|%x0, %1}"; | |
17862 | case MODE_V4DF: | |
17863 | return "vmovapd\t{%1, %x0|%x0, %1}"; | |
ec5e777c AI |
17864 | case MODE_XI: |
17865 | return "vmovdqa\t{%1, %t0|%t0, %1}"; | |
17866 | case MODE_OI: | |
95879c72 | 17867 | return "vmovdqa\t{%1, %x0|%x0, %1}"; |
ec5e777c AI |
17868 | default: |
17869 | gcc_unreachable (); | |
95879c72 L |
17870 | } |
17871 | default: | |
17872 | gcc_unreachable (); | |
17873 | } | |
17874 | } | |
17875 | [(set_attr "type" "sselog,ssemov") | |
725fd454 JJ |
17876 | (set_attr "prefix_extra" "1,*") |
17877 | (set_attr "length_immediate" "1,*") | |
ec5e777c | 17878 | (set_attr "prefix" "maybe_evex") |
cbb734aa | 17879 | (set_attr "mode" "<sseinsnmode>")]) |
4ee89d5f | 17880 | |
b570c6dd AI |
17881 | (define_insn "vcvtph2ps<mask_name>" |
17882 | [(set (match_operand:V4SF 0 "register_operand" "=v") | |
4ee89d5f | 17883 | (vec_select:V4SF |
b570c6dd | 17884 | (unspec:V8SF [(match_operand:V8HI 1 "register_operand" "v")] |
4ee89d5f L |
17885 | UNSPEC_VCVTPH2PS) |
17886 | (parallel [(const_int 0) (const_int 1) | |
0a2818d5 | 17887 | (const_int 2) (const_int 3)])))] |
b570c6dd AI |
17888 | "TARGET_F16C || TARGET_AVX512VL" |
17889 | "vcvtph2ps\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}" | |
4ee89d5f | 17890 | [(set_attr "type" "ssecvt") |
b570c6dd | 17891 | (set_attr "prefix" "maybe_evex") |
4ee89d5f L |
17892 | (set_attr "mode" "V4SF")]) |
17893 | ||
b570c6dd AI |
17894 | (define_insn "*vcvtph2ps_load<mask_name>" |
17895 | [(set (match_operand:V4SF 0 "register_operand" "=v") | |
4ee89d5f L |
17896 | (unspec:V4SF [(match_operand:V4HI 1 "memory_operand" "m")] |
17897 | UNSPEC_VCVTPH2PS))] | |
b570c6dd AI |
17898 | "TARGET_F16C || TARGET_AVX512VL" |
17899 | "vcvtph2ps\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}" | |
4ee89d5f L |
17900 | [(set_attr "type" "ssecvt") |
17901 | (set_attr "prefix" "vex") | |
17902 | (set_attr "mode" "V8SF")]) | |
17903 | ||
b570c6dd AI |
17904 | (define_insn "vcvtph2ps256<mask_name>" |
17905 | [(set (match_operand:V8SF 0 "register_operand" "=v") | |
17906 | (unspec:V8SF [(match_operand:V8HI 1 "nonimmediate_operand" "vm")] | |
4ee89d5f | 17907 | UNSPEC_VCVTPH2PS))] |
b570c6dd AI |
17908 | "TARGET_F16C || TARGET_AVX512VL" |
17909 | "vcvtph2ps\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}" | |
4ee89d5f L |
17910 | [(set_attr "type" "ssecvt") |
17911 | (set_attr "prefix" "vex") | |
01284895 | 17912 | (set_attr "btver2_decode" "double") |
4ee89d5f L |
17913 | (set_attr "mode" "V8SF")]) |
17914 | ||
8a6ef760 | 17915 | (define_insn "<mask_codefor>avx512f_vcvtph2ps512<mask_name><round_saeonly_name>" |
c003c6d6 | 17916 | [(set (match_operand:V16SF 0 "register_operand" "=v") |
47490470 | 17917 | (unspec:V16SF |
8a6ef760 | 17918 | [(match_operand:V16HI 1 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")] |
47490470 | 17919 | UNSPEC_VCVTPH2PS))] |
c003c6d6 | 17920 | "TARGET_AVX512F" |
8a6ef760 | 17921 | "vcvtph2ps\t{<round_saeonly_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_saeonly_mask_op2>}" |
c003c6d6 AI |
17922 | [(set_attr "type" "ssecvt") |
17923 | (set_attr "prefix" "evex") | |
17924 | (set_attr "mode" "V16SF")]) | |
17925 | ||
b570c6dd AI |
17926 | (define_expand "vcvtps2ph_mask" |
17927 | [(set (match_operand:V8HI 0 "register_operand") | |
17928 | (vec_merge:V8HI | |
17929 | (vec_concat:V8HI | |
17930 | (unspec:V4HI [(match_operand:V4SF 1 "register_operand") | |
17931 | (match_operand:SI 2 "const_0_to_255_operand")] | |
17932 | UNSPEC_VCVTPS2PH) | |
17933 | (match_dup 5)) | |
17934 | (match_operand:V8HI 3 "vector_move_operand") | |
17935 | (match_operand:QI 4 "register_operand")))] | |
17936 | "TARGET_AVX512VL" | |
17937 | "operands[5] = CONST0_RTX (V4HImode);") | |
17938 | ||
4ee89d5f | 17939 | (define_expand "vcvtps2ph" |
82e86dc6 | 17940 | [(set (match_operand:V8HI 0 "register_operand") |
4ee89d5f | 17941 | (vec_concat:V8HI |
82e86dc6 UB |
17942 | (unspec:V4HI [(match_operand:V4SF 1 "register_operand") |
17943 | (match_operand:SI 2 "const_0_to_255_operand")] | |
4ee89d5f L |
17944 | UNSPEC_VCVTPS2PH) |
17945 | (match_dup 3)))] | |
17946 | "TARGET_F16C" | |
17947 | "operands[3] = CONST0_RTX (V4HImode);") | |
17948 | ||
b570c6dd AI |
17949 | (define_insn "*vcvtps2ph<mask_name>" |
17950 | [(set (match_operand:V8HI 0 "register_operand" "=v") | |
4ee89d5f | 17951 | (vec_concat:V8HI |
b570c6dd | 17952 | (unspec:V4HI [(match_operand:V4SF 1 "register_operand" "v") |
c96b4102 | 17953 | (match_operand:SI 2 "const_0_to_255_operand" "N")] |
4ee89d5f | 17954 | UNSPEC_VCVTPS2PH) |
82e86dc6 | 17955 | (match_operand:V4HI 3 "const0_operand")))] |
0774c160 | 17956 | "(TARGET_F16C || TARGET_AVX512VL) && <mask_avx512vl_condition>" |
b570c6dd | 17957 | "vcvtps2ph\t{%2, %1, %0<mask_operand4>|%0<mask_operand4>, %1, %2}" |
4ee89d5f | 17958 | [(set_attr "type" "ssecvt") |
b570c6dd | 17959 | (set_attr "prefix" "maybe_evex") |
4ee89d5f L |
17960 | (set_attr "mode" "V4SF")]) |
17961 | ||
b570c6dd | 17962 | (define_insn "*vcvtps2ph_store<mask_name>" |
4ee89d5f L |
17963 | [(set (match_operand:V4HI 0 "memory_operand" "=m") |
17964 | (unspec:V4HI [(match_operand:V4SF 1 "register_operand" "x") | |
c96b4102 | 17965 | (match_operand:SI 2 "const_0_to_255_operand" "N")] |
4ee89d5f | 17966 | UNSPEC_VCVTPS2PH))] |
b570c6dd AI |
17967 | "TARGET_F16C || TARGET_AVX512VL" |
17968 | "vcvtps2ph\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}" | |
4ee89d5f | 17969 | [(set_attr "type" "ssecvt") |
b570c6dd | 17970 | (set_attr "prefix" "maybe_evex") |
4ee89d5f L |
17971 | (set_attr "mode" "V4SF")]) |
17972 | ||
b570c6dd | 17973 | (define_insn "vcvtps2ph256<mask_name>" |
4ee89d5f L |
17974 | [(set (match_operand:V8HI 0 "nonimmediate_operand" "=xm") |
17975 | (unspec:V8HI [(match_operand:V8SF 1 "register_operand" "x") | |
c96b4102 | 17976 | (match_operand:SI 2 "const_0_to_255_operand" "N")] |
4ee89d5f | 17977 | UNSPEC_VCVTPS2PH))] |
b570c6dd AI |
17978 | "TARGET_F16C || TARGET_AVX512VL" |
17979 | "vcvtps2ph\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}" | |
4ee89d5f | 17980 | [(set_attr "type" "ssecvt") |
b570c6dd | 17981 | (set_attr "prefix" "maybe_evex") |
01284895 | 17982 | (set_attr "btver2_decode" "vector") |
4ee89d5f | 17983 | (set_attr "mode" "V8SF")]) |
977e83a3 | 17984 | |
47490470 | 17985 | (define_insn "<mask_codefor>avx512f_vcvtps2ph512<mask_name>" |
c003c6d6 | 17986 | [(set (match_operand:V16HI 0 "nonimmediate_operand" "=vm") |
47490470 AI |
17987 | (unspec:V16HI |
17988 | [(match_operand:V16SF 1 "register_operand" "v") | |
17989 | (match_operand:SI 2 "const_0_to_255_operand" "N")] | |
17990 | UNSPEC_VCVTPS2PH))] | |
c003c6d6 | 17991 | "TARGET_AVX512F" |
47490470 | 17992 | "vcvtps2ph\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}" |
c003c6d6 AI |
17993 | [(set_attr "type" "ssecvt") |
17994 | (set_attr "prefix" "evex") | |
17995 | (set_attr "mode" "V16SF")]) | |
17996 | ||
977e83a3 KY |
17997 | ;; For gather* insn patterns |
17998 | (define_mode_iterator VEC_GATHER_MODE | |
17999 | [V2DI V2DF V4DI V4DF V4SI V4SF V8SI V8SF]) | |
aec7ae7d | 18000 | (define_mode_attr VEC_GATHER_IDXSI |
ab931c71 AI |
18001 | [(V2DI "V4SI") (V4DI "V4SI") (V8DI "V8SI") |
18002 | (V2DF "V4SI") (V4DF "V4SI") (V8DF "V8SI") | |
18003 | (V4SI "V4SI") (V8SI "V8SI") (V16SI "V16SI") | |
18004 | (V4SF "V4SI") (V8SF "V8SI") (V16SF "V16SI")]) | |
18005 | ||
aec7ae7d | 18006 | (define_mode_attr VEC_GATHER_IDXDI |
ab931c71 AI |
18007 | [(V2DI "V2DI") (V4DI "V4DI") (V8DI "V8DI") |
18008 | (V2DF "V2DI") (V4DF "V4DI") (V8DF "V8DI") | |
18009 | (V4SI "V2DI") (V8SI "V4DI") (V16SI "V8DI") | |
18010 | (V4SF "V2DI") (V8SF "V4DI") (V16SF "V8DI")]) | |
18011 | ||
aec7ae7d | 18012 | (define_mode_attr VEC_GATHER_SRCDI |
ab931c71 AI |
18013 | [(V2DI "V2DI") (V4DI "V4DI") (V8DI "V8DI") |
18014 | (V2DF "V2DF") (V4DF "V4DF") (V8DF "V8DF") | |
18015 | (V4SI "V4SI") (V8SI "V4SI") (V16SI "V8SI") | |
18016 | (V4SF "V4SF") (V8SF "V4SF") (V16SF "V8SF")]) | |
977e83a3 KY |
18017 | |
18018 | (define_expand "avx2_gathersi<mode>" | |
82e86dc6 | 18019 | [(parallel [(set (match_operand:VEC_GATHER_MODE 0 "register_operand") |
9d901b0e | 18020 | (unspec:VEC_GATHER_MODE |
82e86dc6 | 18021 | [(match_operand:VEC_GATHER_MODE 1 "register_operand") |
e43451aa JJ |
18022 | (mem:<ssescalarmode> |
18023 | (match_par_dup 7 | |
82e86dc6 | 18024 | [(match_operand 2 "vsib_address_operand") |
aec7ae7d | 18025 | (match_operand:<VEC_GATHER_IDXSI> |
82e86dc6 UB |
18026 | 3 "register_operand") |
18027 | (match_operand:SI 5 "const1248_operand ")])) | |
9d901b0e | 18028 | (mem:BLK (scratch)) |
82e86dc6 | 18029 | (match_operand:VEC_GATHER_MODE 4 "register_operand")] |
9d901b0e | 18030 | UNSPEC_GATHER)) |
82e86dc6 | 18031 | (clobber (match_scratch:VEC_GATHER_MODE 6))])] |
e43451aa JJ |
18032 | "TARGET_AVX2" |
18033 | { | |
18034 | operands[7] | |
18035 | = gen_rtx_UNSPEC (Pmode, gen_rtvec (3, operands[2], operands[3], | |
18036 | operands[5]), UNSPEC_VSIBADDR); | |
18037 | }) | |
977e83a3 KY |
18038 | |
18039 | (define_insn "*avx2_gathersi<mode>" | |
9d901b0e | 18040 | [(set (match_operand:VEC_GATHER_MODE 0 "register_operand" "=&x") |
977e83a3 | 18041 | (unspec:VEC_GATHER_MODE |
9d901b0e | 18042 | [(match_operand:VEC_GATHER_MODE 2 "register_operand" "0") |
e43451aa JJ |
18043 | (match_operator:<ssescalarmode> 7 "vsib_mem_operator" |
18044 | [(unspec:P | |
65e95828 | 18045 | [(match_operand:P 3 "vsib_address_operand" "Tv") |
aec7ae7d | 18046 | (match_operand:<VEC_GATHER_IDXSI> 4 "register_operand" "x") |
e43451aa JJ |
18047 | (match_operand:SI 6 "const1248_operand" "n")] |
18048 | UNSPEC_VSIBADDR)]) | |
9d901b0e | 18049 | (mem:BLK (scratch)) |
e43451aa | 18050 | (match_operand:VEC_GATHER_MODE 5 "register_operand" "1")] |
9d901b0e JJ |
18051 | UNSPEC_GATHER)) |
18052 | (clobber (match_scratch:VEC_GATHER_MODE 1 "=&x"))] | |
977e83a3 | 18053 | "TARGET_AVX2" |
e43451aa | 18054 | "v<sseintprefix>gatherd<ssemodesuffix>\t{%1, %7, %0|%0, %7, %1}" |
977e83a3 KY |
18055 | [(set_attr "type" "ssemov") |
18056 | (set_attr "prefix" "vex") | |
18057 | (set_attr "mode" "<sseinsnmode>")]) | |
18058 | ||
da80a646 JJ |
18059 | (define_insn "*avx2_gathersi<mode>_2" |
18060 | [(set (match_operand:VEC_GATHER_MODE 0 "register_operand" "=&x") | |
18061 | (unspec:VEC_GATHER_MODE | |
18062 | [(pc) | |
18063 | (match_operator:<ssescalarmode> 6 "vsib_mem_operator" | |
18064 | [(unspec:P | |
65e95828 | 18065 | [(match_operand:P 2 "vsib_address_operand" "Tv") |
da80a646 JJ |
18066 | (match_operand:<VEC_GATHER_IDXSI> 3 "register_operand" "x") |
18067 | (match_operand:SI 5 "const1248_operand" "n")] | |
18068 | UNSPEC_VSIBADDR)]) | |
18069 | (mem:BLK (scratch)) | |
18070 | (match_operand:VEC_GATHER_MODE 4 "register_operand" "1")] | |
18071 | UNSPEC_GATHER)) | |
18072 | (clobber (match_scratch:VEC_GATHER_MODE 1 "=&x"))] | |
18073 | "TARGET_AVX2" | |
18074 | "v<sseintprefix>gatherd<ssemodesuffix>\t{%1, %6, %0|%0, %6, %1}" | |
18075 | [(set_attr "type" "ssemov") | |
18076 | (set_attr "prefix" "vex") | |
18077 | (set_attr "mode" "<sseinsnmode>")]) | |
18078 | ||
977e83a3 | 18079 | (define_expand "avx2_gatherdi<mode>" |
82e86dc6 | 18080 | [(parallel [(set (match_operand:VEC_GATHER_MODE 0 "register_operand") |
9d901b0e | 18081 | (unspec:VEC_GATHER_MODE |
82e86dc6 | 18082 | [(match_operand:<VEC_GATHER_SRCDI> 1 "register_operand") |
e43451aa JJ |
18083 | (mem:<ssescalarmode> |
18084 | (match_par_dup 7 | |
82e86dc6 | 18085 | [(match_operand 2 "vsib_address_operand") |
aec7ae7d | 18086 | (match_operand:<VEC_GATHER_IDXDI> |
82e86dc6 UB |
18087 | 3 "register_operand") |
18088 | (match_operand:SI 5 "const1248_operand ")])) | |
9d901b0e | 18089 | (mem:BLK (scratch)) |
aec7ae7d | 18090 | (match_operand:<VEC_GATHER_SRCDI> |
82e86dc6 | 18091 | 4 "register_operand")] |
9d901b0e | 18092 | UNSPEC_GATHER)) |
82e86dc6 | 18093 | (clobber (match_scratch:VEC_GATHER_MODE 6))])] |
e43451aa JJ |
18094 | "TARGET_AVX2" |
18095 | { | |
18096 | operands[7] | |
18097 | = gen_rtx_UNSPEC (Pmode, gen_rtvec (3, operands[2], operands[3], | |
18098 | operands[5]), UNSPEC_VSIBADDR); | |
18099 | }) | |
977e83a3 KY |
18100 | |
18101 | (define_insn "*avx2_gatherdi<mode>" | |
aec7ae7d JJ |
18102 | [(set (match_operand:VEC_GATHER_MODE 0 "register_operand" "=&x") |
18103 | (unspec:VEC_GATHER_MODE | |
18104 | [(match_operand:<VEC_GATHER_SRCDI> 2 "register_operand" "0") | |
e43451aa JJ |
18105 | (match_operator:<ssescalarmode> 7 "vsib_mem_operator" |
18106 | [(unspec:P | |
65e95828 | 18107 | [(match_operand:P 3 "vsib_address_operand" "Tv") |
aec7ae7d | 18108 | (match_operand:<VEC_GATHER_IDXDI> 4 "register_operand" "x") |
e43451aa JJ |
18109 | (match_operand:SI 6 "const1248_operand" "n")] |
18110 | UNSPEC_VSIBADDR)]) | |
9d901b0e | 18111 | (mem:BLK (scratch)) |
aec7ae7d | 18112 | (match_operand:<VEC_GATHER_SRCDI> 5 "register_operand" "1")] |
9d901b0e | 18113 | UNSPEC_GATHER)) |
aec7ae7d | 18114 | (clobber (match_scratch:VEC_GATHER_MODE 1 "=&x"))] |
977e83a3 | 18115 | "TARGET_AVX2" |
aec7ae7d | 18116 | "v<sseintprefix>gatherq<ssemodesuffix>\t{%5, %7, %2|%2, %7, %5}" |
977e83a3 KY |
18117 | [(set_attr "type" "ssemov") |
18118 | (set_attr "prefix" "vex") | |
18119 | (set_attr "mode" "<sseinsnmode>")]) | |
da80a646 JJ |
18120 | |
18121 | (define_insn "*avx2_gatherdi<mode>_2" | |
18122 | [(set (match_operand:VEC_GATHER_MODE 0 "register_operand" "=&x") | |
18123 | (unspec:VEC_GATHER_MODE | |
18124 | [(pc) | |
18125 | (match_operator:<ssescalarmode> 6 "vsib_mem_operator" | |
18126 | [(unspec:P | |
65e95828 | 18127 | [(match_operand:P 2 "vsib_address_operand" "Tv") |
da80a646 JJ |
18128 | (match_operand:<VEC_GATHER_IDXDI> 3 "register_operand" "x") |
18129 | (match_operand:SI 5 "const1248_operand" "n")] | |
18130 | UNSPEC_VSIBADDR)]) | |
18131 | (mem:BLK (scratch)) | |
18132 | (match_operand:<VEC_GATHER_SRCDI> 4 "register_operand" "1")] | |
18133 | UNSPEC_GATHER)) | |
18134 | (clobber (match_scratch:VEC_GATHER_MODE 1 "=&x"))] | |
18135 | "TARGET_AVX2" | |
18136 | { | |
18137 | if (<MODE>mode != <VEC_GATHER_SRCDI>mode) | |
18138 | return "v<sseintprefix>gatherq<ssemodesuffix>\t{%4, %6, %x0|%x0, %6, %4}"; | |
18139 | return "v<sseintprefix>gatherq<ssemodesuffix>\t{%4, %6, %0|%0, %6, %4}"; | |
18140 | } | |
18141 | [(set_attr "type" "ssemov") | |
18142 | (set_attr "prefix" "vex") | |
18143 | (set_attr "mode" "<sseinsnmode>")]) | |
06046046 JJ |
18144 | |
18145 | (define_insn "*avx2_gatherdi<mode>_3" | |
18146 | [(set (match_operand:<VEC_GATHER_SRCDI> 0 "register_operand" "=&x") | |
18147 | (vec_select:<VEC_GATHER_SRCDI> | |
18148 | (unspec:VI4F_256 | |
18149 | [(match_operand:<VEC_GATHER_SRCDI> 2 "register_operand" "0") | |
18150 | (match_operator:<ssescalarmode> 7 "vsib_mem_operator" | |
18151 | [(unspec:P | |
65e95828 | 18152 | [(match_operand:P 3 "vsib_address_operand" "Tv") |
06046046 JJ |
18153 | (match_operand:<VEC_GATHER_IDXDI> 4 "register_operand" "x") |
18154 | (match_operand:SI 6 "const1248_operand" "n")] | |
18155 | UNSPEC_VSIBADDR)]) | |
18156 | (mem:BLK (scratch)) | |
18157 | (match_operand:<VEC_GATHER_SRCDI> 5 "register_operand" "1")] | |
18158 | UNSPEC_GATHER) | |
18159 | (parallel [(const_int 0) (const_int 1) | |
18160 | (const_int 2) (const_int 3)]))) | |
18161 | (clobber (match_scratch:VI4F_256 1 "=&x"))] | |
18162 | "TARGET_AVX2" | |
18163 | "v<sseintprefix>gatherq<ssemodesuffix>\t{%5, %7, %0|%0, %7, %5}" | |
18164 | [(set_attr "type" "ssemov") | |
18165 | (set_attr "prefix" "vex") | |
18166 | (set_attr "mode" "<sseinsnmode>")]) | |
18167 | ||
18168 | (define_insn "*avx2_gatherdi<mode>_4" | |
18169 | [(set (match_operand:<VEC_GATHER_SRCDI> 0 "register_operand" "=&x") | |
18170 | (vec_select:<VEC_GATHER_SRCDI> | |
18171 | (unspec:VI4F_256 | |
18172 | [(pc) | |
18173 | (match_operator:<ssescalarmode> 6 "vsib_mem_operator" | |
18174 | [(unspec:P | |
65e95828 | 18175 | [(match_operand:P 2 "vsib_address_operand" "Tv") |
06046046 JJ |
18176 | (match_operand:<VEC_GATHER_IDXDI> 3 "register_operand" "x") |
18177 | (match_operand:SI 5 "const1248_operand" "n")] | |
18178 | UNSPEC_VSIBADDR)]) | |
18179 | (mem:BLK (scratch)) | |
18180 | (match_operand:<VEC_GATHER_SRCDI> 4 "register_operand" "1")] | |
18181 | UNSPEC_GATHER) | |
18182 | (parallel [(const_int 0) (const_int 1) | |
18183 | (const_int 2) (const_int 3)]))) | |
18184 | (clobber (match_scratch:VI4F_256 1 "=&x"))] | |
18185 | "TARGET_AVX2" | |
18186 | "v<sseintprefix>gatherq<ssemodesuffix>\t{%4, %6, %0|%0, %6, %4}" | |
18187 | [(set_attr "type" "ssemov") | |
18188 | (set_attr "prefix" "vex") | |
18189 | (set_attr "mode" "<sseinsnmode>")]) | |
ab931c71 | 18190 | |
be746da1 AI |
18191 | (define_expand "<avx512>_gathersi<mode>" |
18192 | [(parallel [(set (match_operand:VI48F 0 "register_operand") | |
18193 | (unspec:VI48F | |
18194 | [(match_operand:VI48F 1 "register_operand") | |
ab931c71 AI |
18195 | (match_operand:<avx512fmaskmode> 4 "register_operand") |
18196 | (mem:<ssescalarmode> | |
18197 | (match_par_dup 6 | |
18198 | [(match_operand 2 "vsib_address_operand") | |
18199 | (match_operand:<VEC_GATHER_IDXSI> 3 "register_operand") | |
18200 | (match_operand:SI 5 "const1248_operand")]))] | |
18201 | UNSPEC_GATHER)) | |
18202 | (clobber (match_scratch:<avx512fmaskmode> 7))])] | |
18203 | "TARGET_AVX512F" | |
18204 | { | |
18205 | operands[6] | |
18206 | = gen_rtx_UNSPEC (Pmode, gen_rtvec (3, operands[2], operands[3], | |
18207 | operands[5]), UNSPEC_VSIBADDR); | |
18208 | }) | |
18209 | ||
18210 | (define_insn "*avx512f_gathersi<mode>" | |
be746da1 AI |
18211 | [(set (match_operand:VI48F 0 "register_operand" "=&v") |
18212 | (unspec:VI48F | |
18213 | [(match_operand:VI48F 1 "register_operand" "0") | |
ab931c71 AI |
18214 | (match_operand:<avx512fmaskmode> 7 "register_operand" "2") |
18215 | (match_operator:<ssescalarmode> 6 "vsib_mem_operator" | |
18216 | [(unspec:P | |
65e95828 | 18217 | [(match_operand:P 4 "vsib_address_operand" "Tv") |
ab931c71 AI |
18218 | (match_operand:<VEC_GATHER_IDXSI> 3 "register_operand" "v") |
18219 | (match_operand:SI 5 "const1248_operand" "n")] | |
18220 | UNSPEC_VSIBADDR)])] | |
18221 | UNSPEC_GATHER)) | |
be792bce | 18222 | (clobber (match_scratch:<avx512fmaskmode> 2 "=&Yk"))] |
ab931c71 AI |
18223 | "TARGET_AVX512F" |
18224 | "v<sseintprefix>gatherd<ssemodesuffix>\t{%6, %0%{%2%}|%0%{%2%}, %g6}" | |
18225 | [(set_attr "type" "ssemov") | |
18226 | (set_attr "prefix" "evex") | |
18227 | (set_attr "mode" "<sseinsnmode>")]) | |
18228 | ||
18229 | (define_insn "*avx512f_gathersi<mode>_2" | |
be746da1 AI |
18230 | [(set (match_operand:VI48F 0 "register_operand" "=&v") |
18231 | (unspec:VI48F | |
ab931c71 AI |
18232 | [(pc) |
18233 | (match_operand:<avx512fmaskmode> 6 "register_operand" "1") | |
18234 | (match_operator:<ssescalarmode> 5 "vsib_mem_operator" | |
18235 | [(unspec:P | |
65e95828 | 18236 | [(match_operand:P 3 "vsib_address_operand" "Tv") |
ab931c71 AI |
18237 | (match_operand:<VEC_GATHER_IDXSI> 2 "register_operand" "v") |
18238 | (match_operand:SI 4 "const1248_operand" "n")] | |
18239 | UNSPEC_VSIBADDR)])] | |
18240 | UNSPEC_GATHER)) | |
be792bce | 18241 | (clobber (match_scratch:<avx512fmaskmode> 1 "=&Yk"))] |
ab931c71 AI |
18242 | "TARGET_AVX512F" |
18243 | "v<sseintprefix>gatherd<ssemodesuffix>\t{%5, %0%{%1%}|%0%{%1%}, %g5}" | |
18244 | [(set_attr "type" "ssemov") | |
18245 | (set_attr "prefix" "evex") | |
18246 | (set_attr "mode" "<sseinsnmode>")]) | |
18247 | ||
18248 | ||
be746da1 AI |
18249 | (define_expand "<avx512>_gatherdi<mode>" |
18250 | [(parallel [(set (match_operand:VI48F 0 "register_operand") | |
18251 | (unspec:VI48F | |
ab931c71 AI |
18252 | [(match_operand:<VEC_GATHER_SRCDI> 1 "register_operand") |
18253 | (match_operand:QI 4 "register_operand") | |
18254 | (mem:<ssescalarmode> | |
18255 | (match_par_dup 6 | |
18256 | [(match_operand 2 "vsib_address_operand") | |
18257 | (match_operand:<VEC_GATHER_IDXDI> 3 "register_operand") | |
18258 | (match_operand:SI 5 "const1248_operand")]))] | |
18259 | UNSPEC_GATHER)) | |
18260 | (clobber (match_scratch:QI 7))])] | |
18261 | "TARGET_AVX512F" | |
18262 | { | |
18263 | operands[6] | |
18264 | = gen_rtx_UNSPEC (Pmode, gen_rtvec (3, operands[2], operands[3], | |
18265 | operands[5]), UNSPEC_VSIBADDR); | |
18266 | }) | |
18267 | ||
18268 | (define_insn "*avx512f_gatherdi<mode>" | |
be746da1 AI |
18269 | [(set (match_operand:VI48F 0 "register_operand" "=&v") |
18270 | (unspec:VI48F | |
ab931c71 AI |
18271 | [(match_operand:<VEC_GATHER_SRCDI> 1 "register_operand" "0") |
18272 | (match_operand:QI 7 "register_operand" "2") | |
18273 | (match_operator:<ssescalarmode> 6 "vsib_mem_operator" | |
18274 | [(unspec:P | |
65e95828 | 18275 | [(match_operand:P 4 "vsib_address_operand" "Tv") |
ab931c71 AI |
18276 | (match_operand:<VEC_GATHER_IDXDI> 3 "register_operand" "v") |
18277 | (match_operand:SI 5 "const1248_operand" "n")] | |
18278 | UNSPEC_VSIBADDR)])] | |
18279 | UNSPEC_GATHER)) | |
be792bce | 18280 | (clobber (match_scratch:QI 2 "=&Yk"))] |
ab931c71 AI |
18281 | "TARGET_AVX512F" |
18282 | "v<sseintprefix>gatherq<ssemodesuffix>\t{%6, %1%{%2%}|%1%{%2%}, %g6}" | |
18283 | [(set_attr "type" "ssemov") | |
18284 | (set_attr "prefix" "evex") | |
18285 | (set_attr "mode" "<sseinsnmode>")]) | |
18286 | ||
18287 | (define_insn "*avx512f_gatherdi<mode>_2" | |
be746da1 AI |
18288 | [(set (match_operand:VI48F 0 "register_operand" "=&v") |
18289 | (unspec:VI48F | |
ab931c71 AI |
18290 | [(pc) |
18291 | (match_operand:QI 6 "register_operand" "1") | |
18292 | (match_operator:<ssescalarmode> 5 "vsib_mem_operator" | |
18293 | [(unspec:P | |
65e95828 | 18294 | [(match_operand:P 3 "vsib_address_operand" "Tv") |
ab931c71 AI |
18295 | (match_operand:<VEC_GATHER_IDXDI> 2 "register_operand" "v") |
18296 | (match_operand:SI 4 "const1248_operand" "n")] | |
18297 | UNSPEC_VSIBADDR)])] | |
18298 | UNSPEC_GATHER)) | |
be792bce | 18299 | (clobber (match_scratch:QI 1 "=&Yk"))] |
ab931c71 AI |
18300 | "TARGET_AVX512F" |
18301 | { | |
18302 | if (<MODE>mode != <VEC_GATHER_SRCDI>mode) | |
be746da1 | 18303 | { |
f2864cc4 | 18304 | if (<MODE_SIZE> != 64) |
be746da1 AI |
18305 | return "v<sseintprefix>gatherq<ssemodesuffix>\t{%5, %x0%{%1%}|%t0%{%1%}, %g5}"; |
18306 | else | |
18307 | return "v<sseintprefix>gatherq<ssemodesuffix>\t{%5, %t0%{%1%}|%t0%{%1%}, %g5}"; | |
18308 | } | |
ab931c71 AI |
18309 | return "v<sseintprefix>gatherq<ssemodesuffix>\t{%5, %0%{%1%}|%0%{%1%}, %g5}"; |
18310 | } | |
18311 | [(set_attr "type" "ssemov") | |
18312 | (set_attr "prefix" "evex") | |
18313 | (set_attr "mode" "<sseinsnmode>")]) | |
18314 | ||
be746da1 AI |
18315 | (define_expand "<avx512>_scattersi<mode>" |
18316 | [(parallel [(set (mem:VI48F | |
ab931c71 AI |
18317 | (match_par_dup 5 |
18318 | [(match_operand 0 "vsib_address_operand") | |
18319 | (match_operand:<VEC_GATHER_IDXSI> 2 "register_operand") | |
18320 | (match_operand:SI 4 "const1248_operand")])) | |
be746da1 | 18321 | (unspec:VI48F |
ab931c71 | 18322 | [(match_operand:<avx512fmaskmode> 1 "register_operand") |
be746da1 | 18323 | (match_operand:VI48F 3 "register_operand")] |
ab931c71 AI |
18324 | UNSPEC_SCATTER)) |
18325 | (clobber (match_scratch:<avx512fmaskmode> 6))])] | |
18326 | "TARGET_AVX512F" | |
18327 | { | |
18328 | operands[5] | |
18329 | = gen_rtx_UNSPEC (Pmode, gen_rtvec (3, operands[0], operands[2], | |
18330 | operands[4]), UNSPEC_VSIBADDR); | |
18331 | }) | |
18332 | ||
18333 | (define_insn "*avx512f_scattersi<mode>" | |
be746da1 | 18334 | [(set (match_operator:VI48F 5 "vsib_mem_operator" |
ab931c71 | 18335 | [(unspec:P |
65e95828 | 18336 | [(match_operand:P 0 "vsib_address_operand" "Tv") |
ab931c71 AI |
18337 | (match_operand:<VEC_GATHER_IDXSI> 2 "register_operand" "v") |
18338 | (match_operand:SI 4 "const1248_operand" "n")] | |
18339 | UNSPEC_VSIBADDR)]) | |
be746da1 | 18340 | (unspec:VI48F |
ab931c71 | 18341 | [(match_operand:<avx512fmaskmode> 6 "register_operand" "1") |
be746da1 | 18342 | (match_operand:VI48F 3 "register_operand" "v")] |
ab931c71 | 18343 | UNSPEC_SCATTER)) |
be792bce | 18344 | (clobber (match_scratch:<avx512fmaskmode> 1 "=&Yk"))] |
ab931c71 AI |
18345 | "TARGET_AVX512F" |
18346 | "v<sseintprefix>scatterd<ssemodesuffix>\t{%3, %5%{%1%}|%5%{%1%}, %3}" | |
18347 | [(set_attr "type" "ssemov") | |
18348 | (set_attr "prefix" "evex") | |
18349 | (set_attr "mode" "<sseinsnmode>")]) | |
18350 | ||
be746da1 AI |
18351 | (define_expand "<avx512>_scatterdi<mode>" |
18352 | [(parallel [(set (mem:VI48F | |
ab931c71 AI |
18353 | (match_par_dup 5 |
18354 | [(match_operand 0 "vsib_address_operand") | |
be746da1 | 18355 | (match_operand:<VEC_GATHER_IDXDI> 2 "register_operand") |
ab931c71 | 18356 | (match_operand:SI 4 "const1248_operand")])) |
be746da1 | 18357 | (unspec:VI48F |
ab931c71 AI |
18358 | [(match_operand:QI 1 "register_operand") |
18359 | (match_operand:<VEC_GATHER_SRCDI> 3 "register_operand")] | |
18360 | UNSPEC_SCATTER)) | |
18361 | (clobber (match_scratch:QI 6))])] | |
18362 | "TARGET_AVX512F" | |
18363 | { | |
18364 | operands[5] | |
18365 | = gen_rtx_UNSPEC (Pmode, gen_rtvec (3, operands[0], operands[2], | |
18366 | operands[4]), UNSPEC_VSIBADDR); | |
18367 | }) | |
18368 | ||
18369 | (define_insn "*avx512f_scatterdi<mode>" | |
be746da1 | 18370 | [(set (match_operator:VI48F 5 "vsib_mem_operator" |
ab931c71 | 18371 | [(unspec:P |
65e95828 | 18372 | [(match_operand:P 0 "vsib_address_operand" "Tv") |
be746da1 | 18373 | (match_operand:<VEC_GATHER_IDXDI> 2 "register_operand" "v") |
ab931c71 AI |
18374 | (match_operand:SI 4 "const1248_operand" "n")] |
18375 | UNSPEC_VSIBADDR)]) | |
be746da1 | 18376 | (unspec:VI48F |
ab931c71 AI |
18377 | [(match_operand:QI 6 "register_operand" "1") |
18378 | (match_operand:<VEC_GATHER_SRCDI> 3 "register_operand" "v")] | |
18379 | UNSPEC_SCATTER)) | |
be792bce | 18380 | (clobber (match_scratch:QI 1 "=&Yk"))] |
ab931c71 AI |
18381 | "TARGET_AVX512F" |
18382 | "v<sseintprefix>scatterq<ssemodesuffix>\t{%3, %5%{%1%}|%5%{%1%}, %3}" | |
18383 | [(set_attr "type" "ssemov") | |
18384 | (set_attr "prefix" "evex") | |
18385 | (set_attr "mode" "<sseinsnmode>")]) | |
0fe65b75 | 18386 | |
f7be73c8 AI |
18387 | (define_insn "<avx512>_compress<mode>_mask" |
18388 | [(set (match_operand:VI48F 0 "register_operand" "=v") | |
18389 | (unspec:VI48F | |
18390 | [(match_operand:VI48F 1 "register_operand" "v") | |
18391 | (match_operand:VI48F 2 "vector_move_operand" "0C") | |
be792bce | 18392 | (match_operand:<avx512fmaskmode> 3 "register_operand" "Yk")] |
47490470 AI |
18393 | UNSPEC_COMPRESS))] |
18394 | "TARGET_AVX512F" | |
18395 | "v<sseintprefix>compress<ssemodesuffix>\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}" | |
18396 | [(set_attr "type" "ssemov") | |
18397 | (set_attr "prefix" "evex") | |
18398 | (set_attr "mode" "<sseinsnmode>")]) | |
18399 | ||
f7be73c8 AI |
18400 | (define_insn "<avx512>_compressstore<mode>_mask" |
18401 | [(set (match_operand:VI48F 0 "memory_operand" "=m") | |
18402 | (unspec:VI48F | |
18403 | [(match_operand:VI48F 1 "register_operand" "x") | |
47490470 | 18404 | (match_dup 0) |
be792bce | 18405 | (match_operand:<avx512fmaskmode> 2 "register_operand" "Yk")] |
47490470 AI |
18406 | UNSPEC_COMPRESS_STORE))] |
18407 | "TARGET_AVX512F" | |
18408 | "v<sseintprefix>compress<ssemodesuffix>\t{%1, %0%{%2%}|%0%{%2%}, %1}" | |
18409 | [(set_attr "type" "ssemov") | |
18410 | (set_attr "prefix" "evex") | |
18411 | (set_attr "memory" "store") | |
18412 | (set_attr "mode" "<sseinsnmode>")]) | |
18413 | ||
f7be73c8 AI |
18414 | (define_expand "<avx512>_expand<mode>_maskz" |
18415 | [(set (match_operand:VI48F 0 "register_operand") | |
18416 | (unspec:VI48F | |
18417 | [(match_operand:VI48F 1 "nonimmediate_operand") | |
18418 | (match_operand:VI48F 2 "vector_move_operand") | |
8b08db1e AI |
18419 | (match_operand:<avx512fmaskmode> 3 "register_operand")] |
18420 | UNSPEC_EXPAND))] | |
18421 | "TARGET_AVX512F" | |
18422 | "operands[2] = CONST0_RTX (<MODE>mode);") | |
18423 | ||
f7be73c8 AI |
18424 | (define_insn "<avx512>_expand<mode>_mask" |
18425 | [(set (match_operand:VI48F 0 "register_operand" "=v,v") | |
18426 | (unspec:VI48F | |
18427 | [(match_operand:VI48F 1 "nonimmediate_operand" "v,m") | |
18428 | (match_operand:VI48F 2 "vector_move_operand" "0C,0C") | |
be792bce | 18429 | (match_operand:<avx512fmaskmode> 3 "register_operand" "Yk,Yk")] |
47490470 AI |
18430 | UNSPEC_EXPAND))] |
18431 | "TARGET_AVX512F" | |
18432 | "v<sseintprefix>expand<ssemodesuffix>\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}" | |
18433 | [(set_attr "type" "ssemov") | |
18434 | (set_attr "prefix" "evex") | |
18435 | (set_attr "memory" "none,load") | |
18436 | (set_attr "mode" "<sseinsnmode>")]) | |
18437 | ||
b9826286 AI |
18438 | (define_insn "avx512dq_rangep<mode><mask_name><round_saeonly_name>" |
18439 | [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v") | |
18440 | (unspec:VF_AVX512VL | |
18441 | [(match_operand:VF_AVX512VL 1 "register_operand" "v") | |
18442 | (match_operand:VF_AVX512VL 2 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>") | |
18443 | (match_operand:SI 3 "const_0_to_15_operand")] | |
18444 | UNSPEC_RANGE))] | |
18445 | "TARGET_AVX512DQ && <round_saeonly_mode512bit_condition>" | |
18446 | "vrange<ssemodesuffix>\t{<round_saeonly_mask_op4>%3, %2, %1, %0<mask_operand4>|%0<mask_operand4>, %1, %2, %3<round_saeonly_mask_op4>}" | |
18447 | [(set_attr "type" "sse") | |
18448 | (set_attr "prefix" "evex") | |
18449 | (set_attr "mode" "<MODE>")]) | |
18450 | ||
18451 | (define_insn "avx512dq_ranges<mode><round_saeonly_name>" | |
18452 | [(set (match_operand:VF_128 0 "register_operand" "=v") | |
18453 | (vec_merge:VF_128 | |
18454 | (unspec:VF_128 | |
18455 | [(match_operand:VF_128 1 "register_operand" "v") | |
18456 | (match_operand:VF_128 2 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>") | |
18457 | (match_operand:SI 3 "const_0_to_15_operand")] | |
18458 | UNSPEC_RANGE) | |
18459 | (match_dup 1) | |
18460 | (const_int 1)))] | |
18461 | "TARGET_AVX512DQ" | |
18462 | "vrange<ssescalarmodesuffix>\t{<round_saeonly_op4>%3, %2, %1, %0|%0, %1, %2, %3<round_saeonly_op4>}" | |
18463 | [(set_attr "type" "sse") | |
18464 | (set_attr "prefix" "evex") | |
18465 | (set_attr "mode" "<MODE>")]) | |
18466 | ||
18467 | (define_insn "avx512dq_fpclass<mode><mask_scalar_merge_name>" | |
18468 | [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=Yk") | |
18469 | (unspec:<avx512fmaskmode> | |
18470 | [(match_operand:VF_AVX512VL 1 "register_operand" "v") | |
18471 | (match_operand:QI 2 "const_0_to_255_operand" "n")] | |
18472 | UNSPEC_FPCLASS))] | |
18473 | "TARGET_AVX512DQ" | |
18474 | "vfpclass<ssemodesuffix>\t{%2, %1, %0<mask_scalar_merge_operand3>|%0<mask_scalar_merge_operand3>, %1, %2}"; | |
18475 | [(set_attr "type" "sse") | |
18476 | (set_attr "length_immediate" "1") | |
18477 | (set_attr "prefix" "evex") | |
18478 | (set_attr "mode" "<MODE>")]) | |
18479 | ||
18480 | (define_insn "avx512dq_vmfpclass<mode>" | |
18481 | [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=Yk") | |
18482 | (and:<avx512fmaskmode> | |
18483 | (unspec:<avx512fmaskmode> | |
18484 | [(match_operand:VF_128 1 "register_operand" "v") | |
18485 | (match_operand:QI 2 "const_0_to_255_operand" "n")] | |
18486 | UNSPEC_FPCLASS) | |
18487 | (const_int 1)))] | |
18488 | "TARGET_AVX512DQ" | |
18489 | "vfpclass<ssescalarmodesuffix>\t{%2, %1, %0|%0, %1, %2}"; | |
18490 | [(set_attr "type" "sse") | |
18491 | (set_attr "length_immediate" "1") | |
18492 | (set_attr "prefix" "evex") | |
18493 | (set_attr "mode" "<MODE>")]) | |
18494 | ||
b040ded3 AI |
18495 | (define_insn "<avx512>_getmant<mode><mask_name><round_saeonly_name>" |
18496 | [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v") | |
18497 | (unspec:VF_AVX512VL | |
18498 | [(match_operand:VF_AVX512VL 1 "nonimmediate_operand" "<round_saeonly_constraint>") | |
afb4ac68 AI |
18499 | (match_operand:SI 2 "const_0_to_15_operand")] |
18500 | UNSPEC_GETMANT))] | |
18501 | "TARGET_AVX512F" | |
8a6ef760 | 18502 | "vgetmant<ssemodesuffix>\t{%2, <round_saeonly_mask_op3>%1, %0<mask_operand3>|%0<mask_operand3>, %1<round_saeonly_mask_op3>, %2}"; |
afb4ac68 AI |
18503 | [(set_attr "prefix" "evex") |
18504 | (set_attr "mode" "<MODE>")]) | |
18505 | ||
b040ded3 | 18506 | (define_insn "avx512f_vgetmant<mode><round_saeonly_name>" |
afb4ac68 AI |
18507 | [(set (match_operand:VF_128 0 "register_operand" "=v") |
18508 | (vec_merge:VF_128 | |
18509 | (unspec:VF_128 | |
18510 | [(match_operand:VF_128 1 "register_operand" "v") | |
c56a42b9 | 18511 | (match_operand:VF_128 2 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>") |
afb4ac68 AI |
18512 | (match_operand:SI 3 "const_0_to_15_operand")] |
18513 | UNSPEC_GETMANT) | |
18514 | (match_dup 1) | |
18515 | (const_int 1)))] | |
18516 | "TARGET_AVX512F" | |
075691af | 18517 | "vgetmant<ssescalarmodesuffix>\t{%3, <round_saeonly_op4>%2, %1, %0|%0, %1, %2<round_saeonly_op4>, %3}"; |
afb4ac68 AI |
18518 | [(set_attr "prefix" "evex") |
18519 | (set_attr "mode" "<ssescalarmode>")]) | |
18520 | ||
5f64b496 AI |
18521 | ;; The correct representation for this is absolutely enormous, and |
18522 | ;; surely not generally useful. | |
18523 | (define_insn "<mask_codefor>avx512bw_dbpsadbw<mode><mask_name>" | |
18524 | [(set (match_operand:VI2_AVX512VL 0 "register_operand" "=v") | |
18525 | (unspec:VI2_AVX512VL | |
18526 | [(match_operand:<dbpsadbwmode> 1 "register_operand" "v") | |
18527 | (match_operand:<dbpsadbwmode> 2 "nonimmediate_operand" "vm") | |
18528 | (match_operand:SI 3 "const_0_to_255_operand")] | |
18529 | UNSPEC_DBPSADBW))] | |
18530 | "TARGET_AVX512BW" | |
18531 | "vdbpsadbw\t{%3, %2, %1, %0<mask_operand4>|%0<mask_operand4>, %1, %2, %3}" | |
18532 | [(set_attr "isa" "avx") | |
18533 | (set_attr "type" "sselog1") | |
18534 | (set_attr "length_immediate" "1") | |
18535 | (set_attr "prefix" "evex") | |
18536 | (set_attr "mode" "<sseinsnmode>")]) | |
18537 | ||
47490470 | 18538 | (define_insn "clz<mode>2<mask_name>" |
6ead0238 AI |
18539 | [(set (match_operand:VI48_AVX512VL 0 "register_operand" "=v") |
18540 | (clz:VI48_AVX512VL | |
18541 | (match_operand:VI48_AVX512VL 1 "nonimmediate_operand" "vm")))] | |
0fe65b75 | 18542 | "TARGET_AVX512CD" |
47490470 | 18543 | "vplzcnt<ssemodesuffix>\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}" |
0fe65b75 AI |
18544 | [(set_attr "type" "sse") |
18545 | (set_attr "prefix" "evex") | |
18546 | (set_attr "mode" "<sseinsnmode>")]) | |
18547 | ||
47490470 | 18548 | (define_insn "<mask_codefor>conflict<mode><mask_name>" |
6ead0238 AI |
18549 | [(set (match_operand:VI48_AVX512VL 0 "register_operand" "=v") |
18550 | (unspec:VI48_AVX512VL | |
18551 | [(match_operand:VI48_AVX512VL 1 "nonimmediate_operand" "vm")] | |
0fe65b75 AI |
18552 | UNSPEC_CONFLICT))] |
18553 | "TARGET_AVX512CD" | |
47490470 | 18554 | "vpconflict<ssemodesuffix>\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}" |
0fe65b75 AI |
18555 | [(set_attr "type" "sse") |
18556 | (set_attr "prefix" "evex") | |
18557 | (set_attr "mode" "<sseinsnmode>")]) | |
c1618f82 AI |
18558 | |
18559 | (define_insn "sha1msg1" | |
18560 | [(set (match_operand:V4SI 0 "register_operand" "=x") | |
18561 | (unspec:V4SI | |
18562 | [(match_operand:V4SI 1 "register_operand" "0") | |
18563 | (match_operand:V4SI 2 "nonimmediate_operand" "xm")] | |
18564 | UNSPEC_SHA1MSG1))] | |
18565 | "TARGET_SHA" | |
18566 | "sha1msg1\t{%2, %0|%0, %2}" | |
18567 | [(set_attr "type" "sselog1") | |
18568 | (set_attr "mode" "TI")]) | |
18569 | ||
18570 | (define_insn "sha1msg2" | |
18571 | [(set (match_operand:V4SI 0 "register_operand" "=x") | |
18572 | (unspec:V4SI | |
18573 | [(match_operand:V4SI 1 "register_operand" "0") | |
18574 | (match_operand:V4SI 2 "nonimmediate_operand" "xm")] | |
18575 | UNSPEC_SHA1MSG2))] | |
18576 | "TARGET_SHA" | |
18577 | "sha1msg2\t{%2, %0|%0, %2}" | |
18578 | [(set_attr "type" "sselog1") | |
18579 | (set_attr "mode" "TI")]) | |
18580 | ||
18581 | (define_insn "sha1nexte" | |
18582 | [(set (match_operand:V4SI 0 "register_operand" "=x") | |
18583 | (unspec:V4SI | |
18584 | [(match_operand:V4SI 1 "register_operand" "0") | |
18585 | (match_operand:V4SI 2 "nonimmediate_operand" "xm")] | |
18586 | UNSPEC_SHA1NEXTE))] | |
18587 | "TARGET_SHA" | |
18588 | "sha1nexte\t{%2, %0|%0, %2}" | |
18589 | [(set_attr "type" "sselog1") | |
18590 | (set_attr "mode" "TI")]) | |
18591 | ||
18592 | (define_insn "sha1rnds4" | |
18593 | [(set (match_operand:V4SI 0 "register_operand" "=x") | |
18594 | (unspec:V4SI | |
18595 | [(match_operand:V4SI 1 "register_operand" "0") | |
18596 | (match_operand:V4SI 2 "nonimmediate_operand" "xm") | |
18597 | (match_operand:SI 3 "const_0_to_3_operand" "n")] | |
18598 | UNSPEC_SHA1RNDS4))] | |
18599 | "TARGET_SHA" | |
18600 | "sha1rnds4\t{%3, %2, %0|%0, %2, %3}" | |
18601 | [(set_attr "type" "sselog1") | |
18602 | (set_attr "length_immediate" "1") | |
18603 | (set_attr "mode" "TI")]) | |
18604 | ||
18605 | (define_insn "sha256msg1" | |
18606 | [(set (match_operand:V4SI 0 "register_operand" "=x") | |
18607 | (unspec:V4SI | |
18608 | [(match_operand:V4SI 1 "register_operand" "0") | |
18609 | (match_operand:V4SI 2 "nonimmediate_operand" "xm")] | |
18610 | UNSPEC_SHA256MSG1))] | |
18611 | "TARGET_SHA" | |
18612 | "sha256msg1\t{%2, %0|%0, %2}" | |
18613 | [(set_attr "type" "sselog1") | |
18614 | (set_attr "mode" "TI")]) | |
18615 | ||
18616 | (define_insn "sha256msg2" | |
18617 | [(set (match_operand:V4SI 0 "register_operand" "=x") | |
18618 | (unspec:V4SI | |
18619 | [(match_operand:V4SI 1 "register_operand" "0") | |
18620 | (match_operand:V4SI 2 "nonimmediate_operand" "xm")] | |
18621 | UNSPEC_SHA256MSG2))] | |
18622 | "TARGET_SHA" | |
18623 | "sha256msg2\t{%2, %0|%0, %2}" | |
18624 | [(set_attr "type" "sselog1") | |
18625 | (set_attr "mode" "TI")]) | |
18626 | ||
18627 | (define_insn "sha256rnds2" | |
18628 | [(set (match_operand:V4SI 0 "register_operand" "=x") | |
18629 | (unspec:V4SI | |
18630 | [(match_operand:V4SI 1 "register_operand" "0") | |
18631 | (match_operand:V4SI 2 "nonimmediate_operand" "xm") | |
18632 | (match_operand:V4SI 3 "register_operand" "Yz")] | |
18633 | UNSPEC_SHA256RNDS2))] | |
18634 | "TARGET_SHA" | |
18635 | "sha256rnds2\t{%3, %2, %0|%0, %2, %3}" | |
18636 | [(set_attr "type" "sselog1") | |
18637 | (set_attr "length_immediate" "1") | |
18638 | (set_attr "mode" "TI")]) | |
275be1da IT |
18639 | |
18640 | (define_insn_and_split "avx512f_<castmode><avxsizesuffix>_<castmode>" | |
18641 | [(set (match_operand:AVX512MODE2P 0 "nonimmediate_operand" "=x,m") | |
18642 | (unspec:AVX512MODE2P | |
18643 | [(match_operand:<ssequartermode> 1 "nonimmediate_operand" "xm,x")] | |
18644 | UNSPEC_CAST))] | |
18645 | "TARGET_AVX512F" | |
18646 | "#" | |
18647 | "&& reload_completed" | |
18648 | [(const_int 0)] | |
18649 | { | |
18650 | rtx op0 = operands[0]; | |
18651 | rtx op1 = operands[1]; | |
18652 | if (REG_P (op0)) | |
18653 | op0 = gen_rtx_REG (<ssequartermode>mode, REGNO (op0)); | |
18654 | else | |
18655 | op1 = gen_rtx_REG (<MODE>mode, REGNO (op1)); | |
18656 | emit_move_insn (op0, op1); | |
18657 | DONE; | |
18658 | }) | |
18659 | ||
18660 | (define_insn_and_split "avx512f_<castmode><avxsizesuffix>_256<castmode>" | |
18661 | [(set (match_operand:AVX512MODE2P 0 "nonimmediate_operand" "=x,m") | |
18662 | (unspec:AVX512MODE2P | |
18663 | [(match_operand:<ssehalfvecmode> 1 "nonimmediate_operand" "xm,x")] | |
18664 | UNSPEC_CAST))] | |
18665 | "TARGET_AVX512F" | |
18666 | "#" | |
18667 | "&& reload_completed" | |
18668 | [(const_int 0)] | |
18669 | { | |
18670 | rtx op0 = operands[0]; | |
18671 | rtx op1 = operands[1]; | |
18672 | if (REG_P (op0)) | |
18673 | op0 = gen_rtx_REG (<ssehalfvecmode>mode, REGNO (op0)); | |
18674 | else | |
18675 | op1 = gen_rtx_REG (<MODE>mode, REGNO (op1)); | |
18676 | emit_move_insn (op0, op1); | |
18677 | DONE; | |
18678 | }) | |
4190ea38 IT |
18679 | |
18680 | (define_int_iterator VPMADD52 | |
18681 | [UNSPEC_VPMADD52LUQ | |
18682 | UNSPEC_VPMADD52HUQ]) | |
18683 | ||
18684 | (define_int_attr vpmadd52type | |
18685 | [(UNSPEC_VPMADD52LUQ "luq") (UNSPEC_VPMADD52HUQ "huq")]) | |
18686 | ||
18687 | (define_expand "vpamdd52huq<mode>_maskz" | |
18688 | [(match_operand:VI8_AVX512VL 0 "register_operand") | |
18689 | (match_operand:VI8_AVX512VL 1 "register_operand") | |
18690 | (match_operand:VI8_AVX512VL 2 "register_operand") | |
18691 | (match_operand:VI8_AVX512VL 3 "nonimmediate_operand") | |
18692 | (match_operand:<avx512fmaskmode> 4 "register_operand")] | |
18693 | "TARGET_AVX512IFMA" | |
18694 | { | |
18695 | emit_insn (gen_vpamdd52huq<mode>_maskz_1 ( | |
18696 | operands[0], operands[1], operands[2], operands[3], | |
18697 | CONST0_RTX (<MODE>mode), operands[4])); | |
18698 | DONE; | |
18699 | }) | |
18700 | ||
18701 | (define_expand "vpamdd52luq<mode>_maskz" | |
18702 | [(match_operand:VI8_AVX512VL 0 "register_operand") | |
18703 | (match_operand:VI8_AVX512VL 1 "register_operand") | |
18704 | (match_operand:VI8_AVX512VL 2 "register_operand") | |
18705 | (match_operand:VI8_AVX512VL 3 "nonimmediate_operand") | |
18706 | (match_operand:<avx512fmaskmode> 4 "register_operand")] | |
18707 | "TARGET_AVX512IFMA" | |
18708 | { | |
18709 | emit_insn (gen_vpamdd52luq<mode>_maskz_1 ( | |
18710 | operands[0], operands[1], operands[2], operands[3], | |
18711 | CONST0_RTX (<MODE>mode), operands[4])); | |
18712 | DONE; | |
18713 | }) | |
18714 | ||
18715 | (define_insn "vpamdd52<vpmadd52type><mode><sd_maskz_name>" | |
18716 | [(set (match_operand:VI8_AVX512VL 0 "register_operand" "=v") | |
18717 | (unspec:VI8_AVX512VL | |
18718 | [(match_operand:VI8_AVX512VL 1 "register_operand" "0") | |
18719 | (match_operand:VI8_AVX512VL 2 "register_operand" "v") | |
18720 | (match_operand:VI8_AVX512VL 3 "nonimmediate_operand" "vm")] | |
18721 | VPMADD52))] | |
18722 | "TARGET_AVX512IFMA" | |
18723 | "vpmadd52<vpmadd52type>\t{%3, %2, %0<sd_mask_op4>|%0<sd_mask_op4>, %2, %3}" | |
18724 | [(set_attr "type" "ssemuladd") | |
18725 | (set_attr "prefix" "evex") | |
18726 | (set_attr "mode" "<sseinsnmode>")]) | |
18727 | ||
18728 | (define_insn "vpamdd52<vpmadd52type><mode>_mask" | |
18729 | [(set (match_operand:VI8_AVX512VL 0 "register_operand" "=v") | |
18730 | (vec_merge:VI8_AVX512VL | |
18731 | (unspec:VI8_AVX512VL | |
18732 | [(match_operand:VI8_AVX512VL 1 "register_operand" "0") | |
18733 | (match_operand:VI8_AVX512VL 2 "register_operand" "v") | |
18734 | (match_operand:VI8_AVX512VL 3 "nonimmediate_operand" "vm")] | |
18735 | VPMADD52) | |
18736 | (match_dup 1) | |
18737 | (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")))] | |
18738 | "TARGET_AVX512IFMA" | |
18739 | "vpmadd52<vpmadd52type>\t{%3, %2, %0%{%4%}|%0%{%4%}, %2, %3}" | |
18740 | [(set_attr "type" "ssemuladd") | |
18741 | (set_attr "prefix" "evex") | |
18742 | (set_attr "mode" "<sseinsnmode>")]) | |
18743 | ||
3dcc8af5 IT |
18744 | (define_insn "vpmultishiftqb<mode><mask_name>" |
18745 | [(set (match_operand:VI1_AVX512VL 0 "register_operand" "=v") | |
18746 | (unspec:VI1_AVX512VL | |
18747 | [(match_operand:VI1_AVX512VL 1 "register_operand" "v") | |
18748 | (match_operand:VI1_AVX512VL 2 "nonimmediate_operand" "vm")] | |
18749 | UNSPEC_VPMULTISHIFT))] | |
18750 | "TARGET_AVX512VBMI" | |
18751 | "vpmultishiftqb\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}" | |
18752 | [(set_attr "type" "sselog") | |
18753 | (set_attr "prefix" "evex") | |
18754 | (set_attr "mode" "<sseinsnmode>")]) |